summaryrefslogtreecommitdiffstats
path: root/third_party/aom
diff options
context:
space:
mode:
authorMatt A. Tobin <email@mattatobin.com>2020-04-07 23:30:51 -0400
committerwolfbeast <mcwerewolf@wolfbeast.com>2020-04-14 13:26:42 +0200
commit277f2116b6660e9bbe7f5d67524be57eceb49b8b (patch)
tree4595f7cc71418f71b9a97dfaeb03a30aa60f336a /third_party/aom
parentd270404436f6e84ffa3b92af537ac721bf10d66e (diff)
downloadUXP-277f2116b6660e9bbe7f5d67524be57eceb49b8b.tar
UXP-277f2116b6660e9bbe7f5d67524be57eceb49b8b.tar.gz
UXP-277f2116b6660e9bbe7f5d67524be57eceb49b8b.tar.lz
UXP-277f2116b6660e9bbe7f5d67524be57eceb49b8b.tar.xz
UXP-277f2116b6660e9bbe7f5d67524be57eceb49b8b.zip
Move aom source to a sub-directory under media/libaom
There is no damned reason to treat this differently than any other media lib given its license and there never was.
Diffstat (limited to 'third_party/aom')
-rw-r--r--third_party/aom/.clang-format109
-rw-r--r--third_party/aom/.cmake-format.py48
-rw-r--r--third_party/aom/.mailmap34
-rw-r--r--third_party/aom/AUTHORS144
-rw-r--r--third_party/aom/CHANGELOG5
-rw-r--r--third_party/aom/CMakeLists.txt758
-rw-r--r--third_party/aom/LICENSE27
-rw-r--r--third_party/aom/PATENTS108
-rw-r--r--third_party/aom/README.md625
-rw-r--r--third_party/aom/aom/aom.h147
-rw-r--r--third_party/aom/aom/aom_codec.h523
-rw-r--r--third_party/aom/aom/aom_decoder.h364
-rw-r--r--third_party/aom/aom/aom_encoder.h981
-rw-r--r--third_party/aom/aom/aom_frame_buffer.h84
-rw-r--r--third_party/aom/aom/aom_image.h331
-rw-r--r--third_party/aom/aom/aom_integer.h106
-rw-r--r--third_party/aom/aom/aomcx.h1198
-rw-r--r--third_party/aom/aom/aomdx.h302
-rw-r--r--third_party/aom/aom/exports_com32
-rw-r--r--third_party/aom/aom/exports_dec10
-rw-r--r--third_party/aom/aom/exports_enc18
-rw-r--r--third_party/aom/aom/exports_test2
-rw-r--r--third_party/aom/aom/internal/aom_codec_internal.h441
-rw-r--r--third_party/aom/aom/src/aom_codec.c157
-rw-r--r--third_party/aom/aom/src/aom_decoder.c180
-rw-r--r--third_party/aom/aom/src/aom_encoder.c402
-rw-r--r--third_party/aom/aom/src/aom_image.c265
-rw-r--r--third_party/aom/aom/src/aom_integer.c105
-rw-r--r--third_party/aom/aom_dsp/add_noise.c73
-rw-r--r--third_party/aom/aom_dsp/aom_convolve.c238
-rw-r--r--third_party/aom/aom_dsp/aom_dsp.cmake356
-rw-r--r--third_party/aom/aom_dsp/aom_dsp_common.h98
-rw-r--r--third_party/aom/aom_dsp/aom_dsp_rtcd.c18
-rwxr-xr-xthird_party/aom/aom_dsp/aom_dsp_rtcd_defs.pl1575
-rw-r--r--third_party/aom/aom_dsp/aom_filter.h56
-rw-r--r--third_party/aom/aom_dsp/aom_simd.h38
-rw-r--r--third_party/aom/aom_dsp/aom_simd_inline.h21
-rw-r--r--third_party/aom/aom_dsp/arm/blend_a64_mask_neon.c451
-rw-r--r--third_party/aom/aom_dsp/arm/fwd_txfm_neon.c222
-rw-r--r--third_party/aom/aom_dsp/arm/intrapred_neon.c590
-rw-r--r--third_party/aom/aom_dsp/arm/loopfilter_neon.c928
-rw-r--r--third_party/aom/aom_dsp/arm/sad4d_neon.c226
-rw-r--r--third_party/aom/aom_dsp/arm/sad_neon.c224
-rw-r--r--third_party/aom/aom_dsp/arm/subpel_variance_neon.c131
-rw-r--r--third_party/aom/aom_dsp/arm/subtract_neon.c81
-rw-r--r--third_party/aom/aom_dsp/arm/variance_neon.c400
-rw-r--r--third_party/aom/aom_dsp/binary_codes_reader.c123
-rw-r--r--third_party/aom/aom_dsp/binary_codes_reader.h47
-rw-r--r--third_party/aom/aom_dsp/binary_codes_writer.c210
-rw-r--r--third_party/aom/aom_dsp/binary_codes_writer.h68
-rw-r--r--third_party/aom/aom_dsp/bitreader.h160
-rw-r--r--third_party/aom/aom_dsp/bitreader_buffer.c67
-rw-r--r--third_party/aom/aom_dsp/bitreader_buffer.h50
-rw-r--r--third_party/aom/aom_dsp/bitwriter.h89
-rw-r--r--third_party/aom/aom_dsp/bitwriter_buffer.c87
-rw-r--r--third_party/aom/aom_dsp/bitwriter_buffer.h51
-rw-r--r--third_party/aom/aom_dsp/blend.h45
-rw-r--r--third_party/aom/aom_dsp/blend_a64_hmask.c69
-rw-r--r--third_party/aom/aom_dsp/blend_a64_mask.c345
-rw-r--r--third_party/aom/aom_dsp/blend_a64_vmask.c71
-rw-r--r--third_party/aom/aom_dsp/buf_ans.c70
-rw-r--r--third_party/aom/aom_dsp/buf_ans.h136
-rw-r--r--third_party/aom/aom_dsp/daalaboolreader.c47
-rw-r--r--third_party/aom/aom_dsp/daalaboolreader.h160
-rw-r--r--third_party/aom/aom_dsp/daalaboolwriter.c31
-rw-r--r--third_party/aom/aom_dsp/daalaboolwriter.h78
-rw-r--r--third_party/aom/aom_dsp/entcode.c49
-rw-r--r--third_party/aom/aom_dsp/entcode.h40
-rw-r--r--third_party/aom/aom_dsp/entdec.c229
-rw-r--r--third_party/aom/aom_dsp/entdec.h83
-rw-r--r--third_party/aom/aom_dsp/entenc.c423
-rw-r--r--third_party/aom/aom_dsp/entenc.h85
-rw-r--r--third_party/aom/aom_dsp/fastssim.c487
-rw-r--r--third_party/aom/aom_dsp/fft.c219
-rw-r--r--third_party/aom/aom_dsp/fft_common.h1050
-rw-r--r--third_party/aom/aom_dsp/fwd_txfm.c103
-rw-r--r--third_party/aom/aom_dsp/grain_synthesis.c1409
-rw-r--r--third_party/aom/aom_dsp/grain_synthesis.h122
-rw-r--r--third_party/aom/aom_dsp/grain_table.c333
-rw-r--r--third_party/aom/aom_dsp/grain_table.h102
-rw-r--r--third_party/aom/aom_dsp/intrapred.c792
-rw-r--r--third_party/aom/aom_dsp/intrapred_common.h47
-rw-r--r--third_party/aom/aom_dsp/loopfilter.c925
-rw-r--r--third_party/aom/aom_dsp/mips/add_noise_msa.c61
-rw-r--r--third_party/aom/aom_dsp/mips/aom_convolve8_horiz_msa.c694
-rw-r--r--third_party/aom/aom_dsp/mips/aom_convolve8_vert_msa.c701
-rw-r--r--third_party/aom/aom_dsp/mips/aom_convolve_copy_msa.c248
-rw-r--r--third_party/aom/aom_dsp/mips/aom_convolve_msa.h79
-rw-r--r--third_party/aom/aom_dsp/mips/common_dspr2.c31
-rw-r--r--third_party/aom/aom_dsp/mips/common_dspr2.h51
-rw-r--r--third_party/aom/aom_dsp/mips/convolve2_dspr2.c1031
-rw-r--r--third_party/aom/aom_dsp/mips/convolve2_horiz_dspr2.c681
-rw-r--r--third_party/aom/aom_dsp/mips/convolve2_vert_dspr2.c237
-rw-r--r--third_party/aom/aom_dsp/mips/convolve8_dspr2.c222
-rw-r--r--third_party/aom/aom_dsp/mips/convolve8_horiz_dspr2.c879
-rw-r--r--third_party/aom/aom_dsp/mips/convolve8_vert_dspr2.c361
-rw-r--r--third_party/aom/aom_dsp/mips/convolve_common_dspr2.h48
-rw-r--r--third_party/aom/aom_dsp/mips/intrapred16_dspr2.c327
-rw-r--r--third_party/aom/aom_dsp/mips/intrapred4_dspr2.c82
-rw-r--r--third_party/aom/aom_dsp/mips/intrapred8_dspr2.c150
-rw-r--r--third_party/aom/aom_dsp/mips/intrapred_msa.c550
-rw-r--r--third_party/aom/aom_dsp/mips/loopfilter_16_msa.c1488
-rw-r--r--third_party/aom/aom_dsp/mips/loopfilter_4_msa.c147
-rw-r--r--third_party/aom/aom_dsp/mips/loopfilter_8_msa.c333
-rw-r--r--third_party/aom/aom_dsp/mips/loopfilter_filters_dspr2.c328
-rw-r--r--third_party/aom/aom_dsp/mips/loopfilter_filters_dspr2.h736
-rw-r--r--third_party/aom/aom_dsp/mips/loopfilter_macros_dspr2.h437
-rw-r--r--third_party/aom/aom_dsp/mips/loopfilter_masks_dspr2.h357
-rw-r--r--third_party/aom/aom_dsp/mips/loopfilter_mb_dspr2.c590
-rw-r--r--third_party/aom/aom_dsp/mips/loopfilter_mb_horiz_dspr2.c734
-rw-r--r--third_party/aom/aom_dsp/mips/loopfilter_mb_vert_dspr2.c758
-rw-r--r--third_party/aom/aom_dsp/mips/loopfilter_msa.h251
-rw-r--r--third_party/aom/aom_dsp/mips/macros_msa.h2058
-rw-r--r--third_party/aom/aom_dsp/mips/sad_msa.c800
-rw-r--r--third_party/aom/aom_dsp/mips/sub_pixel_variance_msa.c1792
-rw-r--r--third_party/aom/aom_dsp/mips/subtract_msa.c266
-rw-r--r--third_party/aom/aom_dsp/mips/variance_msa.c633
-rw-r--r--third_party/aom/aom_dsp/noise_model.c1648
-rw-r--r--third_party/aom/aom_dsp/noise_model.h323
-rw-r--r--third_party/aom/aom_dsp/noise_util.c221
-rw-r--r--third_party/aom/aom_dsp/noise_util.h68
-rw-r--r--third_party/aom/aom_dsp/postproc.h26
-rw-r--r--third_party/aom/aom_dsp/prob.h671
-rw-r--r--third_party/aom/aom_dsp/psnr.c381
-rw-r--r--third_party/aom/aom_dsp/psnr.h79
-rw-r--r--third_party/aom/aom_dsp/psnrhvs.c272
-rw-r--r--third_party/aom/aom_dsp/quantize.c206
-rw-r--r--third_party/aom/aom_dsp/quantize.h59
-rw-r--r--third_party/aom/aom_dsp/sad.c304
-rw-r--r--third_party/aom/aom_dsp/sad_av1.c248
-rw-r--r--third_party/aom/aom_dsp/simd/v128_intrinsics.h344
-rw-r--r--third_party/aom/aom_dsp/simd/v128_intrinsics_arm.h958
-rw-r--r--third_party/aom/aom_dsp/simd/v128_intrinsics_c.h888
-rw-r--r--third_party/aom/aom_dsp/simd/v128_intrinsics_x86.h656
-rw-r--r--third_party/aom/aom_dsp/simd/v256_intrinsics.h376
-rw-r--r--third_party/aom/aom_dsp/simd/v256_intrinsics_arm.h17
-rw-r--r--third_party/aom/aom_dsp/simd/v256_intrinsics_c.h953
-rw-r--r--third_party/aom/aom_dsp/simd/v256_intrinsics_v128.h873
-rw-r--r--third_party/aom/aom_dsp/simd/v256_intrinsics_x86.h750
-rw-r--r--third_party/aom/aom_dsp/simd/v64_intrinsics.h232
-rw-r--r--third_party/aom/aom_dsp/simd/v64_intrinsics_arm.h680
-rw-r--r--third_party/aom/aom_dsp/simd/v64_intrinsics_c.h968
-rw-r--r--third_party/aom/aom_dsp/simd/v64_intrinsics_x86.h491
-rw-r--r--third_party/aom/aom_dsp/sse.c52
-rw-r--r--third_party/aom/aom_dsp/ssim.c439
-rw-r--r--third_party/aom/aom_dsp/ssim.h87
-rw-r--r--third_party/aom/aom_dsp/subtract.c53
-rw-r--r--third_party/aom/aom_dsp/sum_squares.c40
-rw-r--r--third_party/aom/aom_dsp/txfm_common.h91
-rw-r--r--third_party/aom/aom_dsp/variance.c1579
-rw-r--r--third_party/aom/aom_dsp/variance.h130
-rw-r--r--third_party/aom/aom_dsp/x86/aom_asm_stubs.c89
-rw-r--r--third_party/aom/aom_dsp/x86/aom_convolve_copy_sse2.asm297
-rw-r--r--third_party/aom/aom_dsp/x86/aom_high_subpixel_8t_sse2.asm613
-rw-r--r--third_party/aom/aom_dsp/x86/aom_high_subpixel_bilinear_sse2.asm338
-rw-r--r--third_party/aom/aom_dsp/x86/aom_subpixel_8t_intrin_avx2.c1441
-rw-r--r--third_party/aom/aom_dsp/x86/aom_subpixel_8t_intrin_ssse3.c315
-rw-r--r--third_party/aom/aom_dsp/x86/aom_subpixel_8t_sse2.asm615
-rw-r--r--third_party/aom/aom_dsp/x86/aom_subpixel_8t_ssse3.asm870
-rw-r--r--third_party/aom/aom_dsp/x86/aom_subpixel_bilinear_sse2.asm295
-rw-r--r--third_party/aom/aom_dsp/x86/aom_subpixel_bilinear_ssse3.asm267
-rw-r--r--third_party/aom/aom_dsp/x86/blend_a64_hmask_sse4.c34
-rw-r--r--third_party/aom/aom_dsp/x86/blend_a64_mask_avx2.c900
-rw-r--r--third_party/aom/aom_dsp/x86/blend_a64_mask_sse4.c1109
-rw-r--r--third_party/aom/aom_dsp/x86/blend_a64_vmask_sse4.c283
-rw-r--r--third_party/aom/aom_dsp/x86/blend_mask_sse4.h237
-rw-r--r--third_party/aom/aom_dsp/x86/blend_sse4.h191
-rw-r--r--third_party/aom/aom_dsp/x86/common_avx2.h147
-rw-r--r--third_party/aom/aom_dsp/x86/convolve.h178
-rw-r--r--third_party/aom/aom_dsp/x86/convolve_avx2.h199
-rw-r--r--third_party/aom/aom_dsp/x86/convolve_common_intrin.h31
-rw-r--r--third_party/aom/aom_dsp/x86/convolve_sse2.h121
-rw-r--r--third_party/aom/aom_dsp/x86/convolve_sse4_1.h53
-rw-r--r--third_party/aom/aom_dsp/x86/fft_avx2.c73
-rw-r--r--third_party/aom/aom_dsp/x86/fft_sse2.c166
-rw-r--r--third_party/aom/aom_dsp/x86/fwd_txfm_impl_sse2.h344
-rw-r--r--third_party/aom/aom_dsp/x86/fwd_txfm_sse2.c69
-rw-r--r--third_party/aom/aom_dsp/x86/fwd_txfm_sse2.h155
-rw-r--r--third_party/aom/aom_dsp/x86/fwd_txfm_ssse3_x86_64.asm379
-rw-r--r--third_party/aom/aom_dsp/x86/highbd_convolve_avx2.c998
-rw-r--r--third_party/aom/aom_dsp/x86/highbd_convolve_ssse3.c251
-rw-r--r--third_party/aom/aom_dsp/x86/highbd_intrapred_sse2.c984
-rw-r--r--third_party/aom/aom_dsp/x86/highbd_intrapred_sse2_asm.asm259
-rw-r--r--third_party/aom/aom_dsp/x86/highbd_loopfilter_avx2.c66
-rw-r--r--third_party/aom/aom_dsp/x86/highbd_loopfilter_sse2.c1697
-rw-r--r--third_party/aom/aom_dsp/x86/highbd_quantize_intrin_avx2.c160
-rw-r--r--third_party/aom/aom_dsp/x86/highbd_quantize_intrin_sse2.c148
-rw-r--r--third_party/aom/aom_dsp/x86/highbd_sad4d_sse2.asm296
-rw-r--r--third_party/aom/aom_dsp/x86/highbd_sad_sse2.asm374
-rw-r--r--third_party/aom/aom_dsp/x86/highbd_subpel_variance_impl_sse2.asm1036
-rw-r--r--third_party/aom/aom_dsp/x86/highbd_subtract_sse2.c267
-rw-r--r--third_party/aom/aom_dsp/x86/highbd_variance_avx2.c140
-rw-r--r--third_party/aom/aom_dsp/x86/highbd_variance_impl_sse2.asm318
-rw-r--r--third_party/aom/aom_dsp/x86/highbd_variance_sse2.c868
-rw-r--r--third_party/aom/aom_dsp/x86/highbd_variance_sse4.c216
-rw-r--r--third_party/aom/aom_dsp/x86/intrapred_avx2.c811
-rw-r--r--third_party/aom/aom_dsp/x86/intrapred_sse2.c1430
-rw-r--r--third_party/aom/aom_dsp/x86/intrapred_sse2_asm.asm625
-rw-r--r--third_party/aom/aom_dsp/x86/intrapred_ssse3.c1692
-rw-r--r--third_party/aom/aom_dsp/x86/inv_wht_sse2.asm107
-rw-r--r--third_party/aom/aom_dsp/x86/jnt_sad_ssse3.c238
-rw-r--r--third_party/aom/aom_dsp/x86/jnt_variance_ssse3.c192
-rw-r--r--third_party/aom/aom_dsp/x86/loopfilter_sse2.c2385
-rw-r--r--third_party/aom/aom_dsp/x86/lpf_common_sse2.h215
-rw-r--r--third_party/aom/aom_dsp/x86/masked_sad_intrin_avx2.c389
-rw-r--r--third_party/aom/aom_dsp/x86/masked_sad_intrin_ssse3.c402
-rw-r--r--third_party/aom/aom_dsp/x86/masked_sad_intrin_ssse3.h33
-rw-r--r--third_party/aom/aom_dsp/x86/masked_variance_intrin_ssse3.c1064
-rw-r--r--third_party/aom/aom_dsp/x86/masked_variance_intrin_ssse3.h92
-rw-r--r--third_party/aom/aom_dsp/x86/mem_sse2.h42
-rw-r--r--third_party/aom/aom_dsp/x86/obmc_intrinsic_sse4.h58
-rw-r--r--third_party/aom/aom_dsp/x86/obmc_intrinsic_ssse3.h54
-rw-r--r--third_party/aom/aom_dsp/x86/obmc_sad_avx2.c270
-rw-r--r--third_party/aom/aom_dsp/x86/obmc_sad_sse4.c268
-rw-r--r--third_party/aom/aom_dsp/x86/obmc_variance_avx2.c190
-rw-r--r--third_party/aom/aom_dsp/x86/obmc_variance_sse4.c380
-rw-r--r--third_party/aom/aom_dsp/x86/quantize_avx_x86_64.asm435
-rw-r--r--third_party/aom/aom_dsp/x86/quantize_sse2.c147
-rw-r--r--third_party/aom/aom_dsp/x86/quantize_ssse3_x86_64.asm272
-rw-r--r--third_party/aom/aom_dsp/x86/quantize_x86.h77
-rw-r--r--third_party/aom/aom_dsp/x86/sad4d_avx2.c218
-rw-r--r--third_party/aom/aom_dsp/x86/sad4d_sse2.asm257
-rw-r--r--third_party/aom/aom_dsp/x86/sad_avx2.c189
-rw-r--r--third_party/aom/aom_dsp/x86/sad_highbd_avx2.c1038
-rw-r--r--third_party/aom/aom_dsp/x86/sad_impl_avx2.c234
-rw-r--r--third_party/aom/aom_dsp/x86/sad_sse2.asm353
-rw-r--r--third_party/aom/aom_dsp/x86/sse_avx2.c250
-rw-r--r--third_party/aom/aom_dsp/x86/sse_sse4.c241
-rw-r--r--third_party/aom/aom_dsp/x86/ssim_opt_x86_64.asm222
-rw-r--r--third_party/aom/aom_dsp/x86/subpel_variance_sse2.asm1481
-rw-r--r--third_party/aom/aom_dsp/x86/subtract_avx2.c108
-rw-r--r--third_party/aom/aom_dsp/x86/subtract_sse2.asm146
-rw-r--r--third_party/aom/aom_dsp/x86/sum_squares_avx2.c79
-rw-r--r--third_party/aom/aom_dsp/x86/sum_squares_sse2.c203
-rw-r--r--third_party/aom/aom_dsp/x86/sum_squares_sse2.h22
-rw-r--r--third_party/aom/aom_dsp/x86/synonyms.h114
-rw-r--r--third_party/aom/aom_dsp/x86/synonyms_avx2.h74
-rw-r--r--third_party/aom/aom_dsp/x86/transpose_sse2.h420
-rw-r--r--third_party/aom/aom_dsp/x86/txfm_common_avx2.h199
-rw-r--r--third_party/aom/aom_dsp/x86/txfm_common_sse2.h29
-rw-r--r--third_party/aom/aom_dsp/x86/variance_avx2.c517
-rw-r--r--third_party/aom/aom_dsp/x86/variance_impl_avx2.c517
-rw-r--r--third_party/aom/aom_dsp/x86/variance_impl_ssse3.c129
-rw-r--r--third_party/aom/aom_dsp/x86/variance_sse2.c806
-rw-r--r--third_party/aom/aom_mem/aom_mem.c84
-rw-r--r--third_party/aom/aom_mem/aom_mem.cmake26
-rw-r--r--third_party/aom/aom_mem/aom_mem.h70
-rw-r--r--third_party/aom/aom_mem/include/aom_mem_intrnl.h33
-rw-r--r--third_party/aom/aom_ports/aom_once.h138
-rw-r--r--third_party/aom/aom_ports/aom_ports.cmake81
-rw-r--r--third_party/aom/aom_ports/aom_timer.h111
-rw-r--r--third_party/aom/aom_ports/arm.h41
-rw-r--r--third_party/aom/aom_ports/arm_cpudetect.c150
-rw-r--r--third_party/aom/aom_ports/bitops.h78
-rw-r--r--third_party/aom/aom_ports/emmintrin_compat.h56
-rw-r--r--third_party/aom/aom_ports/emms.asm41
-rw-r--r--third_party/aom/aom_ports/mem.h69
-rw-r--r--third_party/aom/aom_ports/mem_ops.h228
-rw-r--r--third_party/aom/aom_ports/mem_ops_aligned.h173
-rw-r--r--third_party/aom/aom_ports/msvc.h75
-rw-r--r--third_party/aom/aom_ports/ppc.h30
-rw-r--r--third_party/aom/aom_ports/ppc_cpudetect.c82
-rw-r--r--third_party/aom/aom_ports/sanitizer.h38
-rw-r--r--third_party/aom/aom_ports/system_state.h23
-rw-r--r--third_party/aom/aom_ports/x86.h325
-rw-r--r--third_party/aom/aom_ports/x86_abi_support.asm395
-rw-r--r--third_party/aom/aom_scale/aom_scale.cmake38
-rw-r--r--third_party/aom/aom_scale/aom_scale.h23
-rw-r--r--third_party/aom/aom_scale/aom_scale_rtcd.c18
-rw-r--r--third_party/aom/aom_scale/aom_scale_rtcd.pl52
-rw-r--r--third_party/aom/aom_scale/generic/aom_scale.c506
-rw-r--r--third_party/aom/aom_scale/generic/gen_scalers.c201
-rw-r--r--third_party/aom/aom_scale/generic/yv12config.c203
-rw-r--r--third_party/aom/aom_scale/generic/yv12extend.c411
-rw-r--r--third_party/aom/aom_scale/mips/dspr2/yv12extend_dspr2.c142
-rw-r--r--third_party/aom/aom_scale/yv12config.h143
-rw-r--r--third_party/aom/aom_util/aom_thread.c184
-rw-r--r--third_party/aom/aom_util/aom_thread.h430
-rw-r--r--third_party/aom/aom_util/aom_util.cmake28
-rw-r--r--third_party/aom/aom_util/debug_util.c275
-rw-r--r--third_party/aom/aom_util/debug_util.h69
-rw-r--r--third_party/aom/aom_util/endian_inl.h122
-rw-r--r--third_party/aom/apps/aomdec.c1046
-rw-r--r--third_party/aom/apps/aomenc.c2391
-rw-r--r--third_party/aom/apps/aomenc.h62
-rw-r--r--third_party/aom/av1/av1.cmake469
-rw-r--r--third_party/aom/av1/av1_cx_iface.c1908
-rw-r--r--third_party/aom/av1/av1_dx_iface.c1328
-rw-r--r--third_party/aom/av1/av1_iface_common.h136
-rw-r--r--third_party/aom/av1/common/alloccommon.c300
-rw-r--r--third_party/aom/av1/common/alloccommon.h48
-rw-r--r--third_party/aom/av1/common/arm/av1_inv_txfm_neon.c3231
-rw-r--r--third_party/aom/av1/common/arm/av1_inv_txfm_neon.h154
-rw-r--r--third_party/aom/av1/common/arm/av1_txfm_neon.c28
-rw-r--r--third_party/aom/av1/common/arm/blend_a64_hmask_neon.c134
-rw-r--r--third_party/aom/av1/common/arm/blend_a64_vmask_neon.c141
-rw-r--r--third_party/aom/av1/common/arm/cfl_neon.c584
-rw-r--r--third_party/aom/av1/common/arm/convolve_neon.c1455
-rw-r--r--third_party/aom/av1/common/arm/convolve_neon.h228
-rw-r--r--third_party/aom/av1/common/arm/jnt_convolve_neon.c1740
-rw-r--r--third_party/aom/av1/common/arm/mem_neon.h494
-rw-r--r--third_party/aom/av1/common/arm/reconinter_neon.c86
-rw-r--r--third_party/aom/av1/common/arm/selfguided_neon.c1508
-rw-r--r--third_party/aom/av1/common/arm/transpose_neon.h537
-rw-r--r--third_party/aom/av1/common/arm/warp_plane_neon.c714
-rw-r--r--third_party/aom/av1/common/arm/wiener_convolve_neon.c530
-rw-r--r--third_party/aom/av1/common/av1_inv_txfm1d.c1846
-rw-r--r--third_party/aom/av1/common/av1_inv_txfm1d.h61
-rw-r--r--third_party/aom/av1/common/av1_inv_txfm1d_cfg.h47
-rw-r--r--third_party/aom/av1/common/av1_inv_txfm2d.c505
-rw-r--r--third_party/aom/av1/common/av1_loopfilter.c2377
-rw-r--r--third_party/aom/av1/common/av1_loopfilter.h227
-rw-r--r--third_party/aom/av1/common/av1_rtcd.c22
-rwxr-xr-xthird_party/aom/av1/common/av1_rtcd_defs.pl398
-rw-r--r--third_party/aom/av1/common/av1_txfm.c160
-rw-r--r--third_party/aom/av1/common/av1_txfm.h232
-rw-r--r--third_party/aom/av1/common/blockd.c140
-rw-r--r--third_party/aom/av1/common/blockd.h1176
-rw-r--r--third_party/aom/av1/common/cdef.c403
-rw-r--r--third_party/aom/av1/common/cdef.h51
-rw-r--r--third_party/aom/av1/common/cdef_block.c257
-rw-r--r--third_party/aom/av1/common/cdef_block.h59
-rw-r--r--third_party/aom/av1/common/cdef_block_avx2.c14
-rw-r--r--third_party/aom/av1/common/cdef_block_neon.c14
-rw-r--r--third_party/aom/av1/common/cdef_block_simd.h920
-rw-r--r--third_party/aom/av1/common/cdef_block_sse2.c14
-rw-r--r--third_party/aom/av1/common/cdef_block_sse4.c14
-rw-r--r--third_party/aom/av1/common/cdef_block_ssse3.c14
-rw-r--r--third_party/aom/av1/common/cfl.c448
-rw-r--r--third_party/aom/av1/common/cfl.h302
-rw-r--r--third_party/aom/av1/common/common.h63
-rw-r--r--third_party/aom/av1/common/common_data.h446
-rw-r--r--third_party/aom/av1/common/convolve.c1295
-rw-r--r--third_party/aom/av1/common/convolve.h125
-rw-r--r--third_party/aom/av1/common/debugmodes.c107
-rw-r--r--third_party/aom/av1/common/entropy.c178
-rw-r--r--third_party/aom/av1/common/entropy.h181
-rw-r--r--third_party/aom/av1/common/entropymode.c1103
-rw-r--r--third_party/aom/av1/common/entropymode.h212
-rw-r--r--third_party/aom/av1/common/entropymv.c67
-rw-r--r--third_party/aom/av1/common/entropymv.h104
-rw-r--r--third_party/aom/av1/common/enums.h619
-rw-r--r--third_party/aom/av1/common/filter.h214
-rw-r--r--third_party/aom/av1/common/frame_buffers.c91
-rw-r--r--third_party/aom/av1/common/frame_buffers.h60
-rw-r--r--third_party/aom/av1/common/idct.c322
-rw-r--r--third_party/aom/av1/common/idct.h67
-rw-r--r--third_party/aom/av1/common/mv.h301
-rw-r--r--third_party/aom/av1/common/mvref_common.c1523
-rw-r--r--third_party/aom/av1/common/mvref_common.h361
-rw-r--r--third_party/aom/av1/common/obmc.h91
-rw-r--r--third_party/aom/av1/common/obu_util.c147
-rw-r--r--third_party/aom/av1/common/obu_util.h47
-rw-r--r--third_party/aom/av1/common/odintrin.c541
-rw-r--r--third_party/aom/av1/common/odintrin.h96
-rw-r--r--third_party/aom/av1/common/onyxc_int.h1342
-rw-r--r--third_party/aom/av1/common/ppc/cfl_ppc.c152
-rw-r--r--third_party/aom/av1/common/pred_common.c501
-rw-r--r--third_party/aom/av1/common/pred_common.h360
-rw-r--r--third_party/aom/av1/common/quant_common.c13676
-rw-r--r--third_party/aom/av1/common/quant_common.h63
-rw-r--r--third_party/aom/av1/common/reconinter.c1162
-rw-r--r--third_party/aom/av1/common/reconinter.h365
-rw-r--r--third_party/aom/av1/common/reconintra.c1640
-rw-r--r--third_party/aom/av1/common/reconintra.h119
-rw-r--r--third_party/aom/av1/common/resize.c1280
-rw-r--r--third_party/aom/av1/common/resize.h112
-rw-r--r--third_party/aom/av1/common/restoration.c1556
-rw-r--r--third_party/aom/av1/common/restoration.h377
-rw-r--r--third_party/aom/av1/common/scale.c126
-rw-r--r--third_party/aom/av1/common/scale.h67
-rw-r--r--third_party/aom/av1/common/scan.c3735
-rw-r--r--third_party/aom/av1/common/scan.h55
-rw-r--r--third_party/aom/av1/common/seg_common.c84
-rw-r--r--third_party/aom/av1/common/seg_common.h104
-rw-r--r--third_party/aom/av1/common/thread_common.c786
-rw-r--r--third_party/aom/av1/common/thread_common.h119
-rw-r--r--third_party/aom/av1/common/tile_common.c207
-rw-r--r--third_party/aom/av1/common/tile_common.h72
-rw-r--r--third_party/aom/av1/common/timing.c79
-rw-r--r--third_party/aom/av1/common/timing.h59
-rw-r--r--third_party/aom/av1/common/token_cdfs.h3555
-rw-r--r--third_party/aom/av1/common/txb_common.c475
-rw-r--r--third_party/aom/av1/common/txb_common.h424
-rw-r--r--third_party/aom/av1/common/warped_motion.c1148
-rw-r--r--third_party/aom/av1/common/warped_motion.h95
-rw-r--r--third_party/aom/av1/common/x86/av1_convolve_horiz_rs_sse4.c228
-rw-r--r--third_party/aom/av1/common/x86/av1_convolve_scale_sse4.c499
-rw-r--r--third_party/aom/av1/common/x86/av1_highbd_convolve_sse4.c205
-rw-r--r--third_party/aom/av1/common/x86/av1_inv_txfm_avx2.c1945
-rw-r--r--third_party/aom/av1/common/x86/av1_inv_txfm_avx2.h71
-rw-r--r--third_party/aom/av1/common/x86/av1_inv_txfm_ssse3.c2923
-rw-r--r--third_party/aom/av1/common/x86/av1_inv_txfm_ssse3.h232
-rw-r--r--third_party/aom/av1/common/x86/av1_txfm_sse2.h317
-rw-r--r--third_party/aom/av1/common/x86/av1_txfm_sse4.c21
-rw-r--r--third_party/aom/av1/common/x86/av1_txfm_sse4.h72
-rw-r--r--third_party/aom/av1/common/x86/cfl_avx2.c491
-rw-r--r--third_party/aom/av1/common/x86/cfl_simd.h243
-rw-r--r--third_party/aom/av1/common/x86/cfl_sse2.c89
-rw-r--r--third_party/aom/av1/common/x86/cfl_ssse3.c393
-rw-r--r--third_party/aom/av1/common/x86/convolve_2d_avx2.c283
-rw-r--r--third_party/aom/av1/common/x86/convolve_2d_sse2.c472
-rw-r--r--third_party/aom/av1/common/x86/convolve_avx2.c277
-rw-r--r--third_party/aom/av1/common/x86/convolve_sse2.c338
-rw-r--r--third_party/aom/av1/common/x86/filterintra_sse4.c75
-rw-r--r--third_party/aom/av1/common/x86/highbd_convolve_2d_avx2.c326
-rw-r--r--third_party/aom/av1/common/x86/highbd_convolve_2d_sse2.c191
-rw-r--r--third_party/aom/av1/common/x86/highbd_convolve_2d_sse4.c420
-rw-r--r--third_party/aom/av1/common/x86/highbd_convolve_2d_ssse3.c217
-rw-r--r--third_party/aom/av1/common/x86/highbd_inv_txfm_avx2.c1349
-rw-r--r--third_party/aom/av1/common/x86/highbd_inv_txfm_sse4.c5348
-rw-r--r--third_party/aom/av1/common/x86/highbd_jnt_convolve_avx2.c846
-rw-r--r--third_party/aom/av1/common/x86/highbd_jnt_convolve_sse4.c383
-rw-r--r--third_party/aom/av1/common/x86/highbd_txfm_utility_sse4.h125
-rw-r--r--third_party/aom/av1/common/x86/highbd_warp_plane_sse4.c624
-rw-r--r--third_party/aom/av1/common/x86/highbd_wiener_convolve_avx2.c245
-rw-r--r--third_party/aom/av1/common/x86/highbd_wiener_convolve_ssse3.c202
-rw-r--r--third_party/aom/av1/common/x86/intra_edge_sse4.c318
-rw-r--r--third_party/aom/av1/common/x86/jnt_convolve_avx2.c633
-rw-r--r--third_party/aom/av1/common/x86/jnt_convolve_sse2.c385
-rw-r--r--third_party/aom/av1/common/x86/jnt_convolve_ssse3.c232
-rw-r--r--third_party/aom/av1/common/x86/reconinter_avx2.c620
-rw-r--r--third_party/aom/av1/common/x86/reconinter_sse4.c153
-rw-r--r--third_party/aom/av1/common/x86/reconinter_ssse3.c116
-rw-r--r--third_party/aom/av1/common/x86/selfguided_avx2.c724
-rw-r--r--third_party/aom/av1/common/x86/selfguided_sse4.c660
-rw-r--r--third_party/aom/av1/common/x86/warp_plane_sse4.c942
-rw-r--r--third_party/aom/av1/common/x86/wiener_convolve_avx2.c261
-rw-r--r--third_party/aom/av1/common/x86/wiener_convolve_sse2.c199
-rw-r--r--third_party/aom/av1/decoder/accounting.c138
-rw-r--r--third_party/aom/av1/decoder/accounting.h82
-rw-r--r--third_party/aom/av1/decoder/decodeframe.c5567
-rw-r--r--third_party/aom/av1/decoder/decodeframe.h85
-rw-r--r--third_party/aom/av1/decoder/decodemv.c1560
-rw-r--r--third_party/aom/av1/decoder/decodemv.h35
-rw-r--r--third_party/aom/av1/decoder/decoder.c575
-rw-r--r--third_party/aom/av1/decoder/decoder.h317
-rw-r--r--third_party/aom/av1/decoder/decodetxb.c362
-rw-r--r--third_party/aom/av1/decoder/decodetxb.h32
-rw-r--r--third_party/aom/av1/decoder/detokenize.c78
-rw-r--r--third_party/aom/av1/decoder/detokenize.h29
-rw-r--r--third_party/aom/av1/decoder/dthread.c192
-rw-r--r--third_party/aom/av1/decoder/dthread.h82
-rw-r--r--third_party/aom/av1/decoder/inspection.c117
-rw-r--r--third_party/aom/av1/decoder/inspection.h84
-rw-r--r--third_party/aom/av1/decoder/obu.c839
-rw-r--r--third_party/aom/av1/decoder/obu.h31
-rw-r--r--third_party/aom/av1/encoder/aq_complexity.c172
-rw-r--r--third_party/aom/av1/encoder/aq_complexity.h37
-rw-r--r--third_party/aom/av1/encoder/aq_cyclicrefresh.c580
-rw-r--r--third_party/aom/av1/encoder/aq_cyclicrefresh.h98
-rw-r--r--third_party/aom/av1/encoder/aq_variance.c202
-rw-r--r--third_party/aom/av1/encoder/aq_variance.h33
-rw-r--r--third_party/aom/av1/encoder/arm/neon/quantize_neon.c118
-rw-r--r--third_party/aom/av1/encoder/av1_fwd_txfm1d.c1885
-rw-r--r--third_party/aom/av1/encoder/av1_fwd_txfm1d.h49
-rw-r--r--third_party/aom/av1/encoder/av1_fwd_txfm1d_cfg.h19
-rw-r--r--third_party/aom/av1/encoder/av1_fwd_txfm2d.c431
-rw-r--r--third_party/aom/av1/encoder/av1_quantize.c738
-rw-r--r--third_party/aom/av1/encoder/av1_quantize.h148
-rw-r--r--third_party/aom/av1/encoder/bitstream.c3999
-rw-r--r--third_party/aom/av1/encoder/bitstream.h51
-rw-r--r--third_party/aom/av1/encoder/block.h452
-rw-r--r--third_party/aom/av1/encoder/blockiness.c142
-rw-r--r--third_party/aom/av1/encoder/context_tree.c215
-rw-r--r--third_party/aom/av1/encoder/context_tree.h114
-rw-r--r--third_party/aom/av1/encoder/corner_detect.c37
-rw-r--r--third_party/aom/av1/encoder/corner_detect.h22
-rw-r--r--third_party/aom/av1/encoder/corner_match.c191
-rw-r--r--third_party/aom/av1/encoder/corner_match.h33
-rw-r--r--third_party/aom/av1/encoder/cost.c46
-rw-r--r--third_party/aom/av1/encoder/cost.h47
-rw-r--r--third_party/aom/av1/encoder/dwt.c155
-rw-r--r--third_party/aom/av1/encoder/dwt.h25
-rw-r--r--third_party/aom/av1/encoder/encodeframe.c5739
-rw-r--r--third_party/aom/av1/encoder/encodeframe.h47
-rw-r--r--third_party/aom/av1/encoder/encodemb.c649
-rw-r--r--third_party/aom/av1/encoder/encodemb.h96
-rw-r--r--third_party/aom/av1/encoder/encodemv.c239
-rw-r--r--third_party/aom/av1/encoder/encodemv.h55
-rw-r--r--third_party/aom/av1/encoder/encoder.c6437
-rw-r--r--third_party/aom/av1/encoder/encoder.h985
-rw-r--r--third_party/aom/av1/encoder/encodetxb.c2062
-rw-r--r--third_party/aom/av1/encoder/encodetxb.h87
-rw-r--r--third_party/aom/av1/encoder/ethread.c261
-rw-r--r--third_party/aom/av1/encoder/ethread.h37
-rw-r--r--third_party/aom/av1/encoder/extend.c188
-rw-r--r--third_party/aom/av1/encoder/extend.h32
-rw-r--r--third_party/aom/av1/encoder/firstpass.c3480
-rw-r--r--third_party/aom/av1/encoder/firstpass.h208
-rw-r--r--third_party/aom/av1/encoder/global_motion.c298
-rw-r--r--third_party/aom/av1/encoder/global_motion.h64
-rw-r--r--third_party/aom/av1/encoder/grain_test_vectors.h781
-rw-r--r--third_party/aom/av1/encoder/hash.c125
-rw-r--r--third_party/aom/av1/encoder/hash.h52
-rw-r--r--third_party/aom/av1/encoder/hash_motion.c482
-rw-r--r--third_party/aom/av1/encoder/hash_motion.h78
-rw-r--r--third_party/aom/av1/encoder/hybrid_fwd_txfm.c390
-rw-r--r--third_party/aom/av1/encoder/hybrid_fwd_txfm.h31
-rw-r--r--third_party/aom/av1/encoder/k_means_template.h123
-rw-r--r--third_party/aom/av1/encoder/lookahead.c210
-rw-r--r--third_party/aom/av1/encoder/lookahead.h106
-rw-r--r--third_party/aom/av1/encoder/mathutils.h359
-rw-r--r--third_party/aom/av1/encoder/mbgraph.c401
-rw-r--r--third_party/aom/av1/encoder/mbgraph.h41
-rw-r--r--third_party/aom/av1/encoder/mcomp.c2885
-rw-r--r--third_party/aom/av1/encoder/mcomp.h161
-rw-r--r--third_party/aom/av1/encoder/mips/msa/error_msa.c109
-rw-r--r--third_party/aom/av1/encoder/mips/msa/fdct4x4_msa.c46
-rw-r--r--third_party/aom/av1/encoder/mips/msa/temporal_filter_msa.c285
-rw-r--r--third_party/aom/av1/encoder/ml.c73
-rw-r--r--third_party/aom/av1/encoder/ml.h49
-rw-r--r--third_party/aom/av1/encoder/palette.c154
-rw-r--r--third_party/aom/av1/encoder/palette.h96
-rw-r--r--third_party/aom/av1/encoder/partition_model_weights.h2448
-rw-r--r--third_party/aom/av1/encoder/pickcdef.c526
-rw-r--r--third_party/aom/av1/encoder/picklpf.c263
-rw-r--r--third_party/aom/av1/encoder/picklpf.h30
-rw-r--r--third_party/aom/av1/encoder/pickrst.c1362
-rw-r--r--third_party/aom/av1/encoder/pickrst.h46
-rw-r--r--third_party/aom/av1/encoder/pustats.h198
-rw-r--r--third_party/aom/av1/encoder/random.h29
-rw-r--r--third_party/aom/av1/encoder/ransac.c603
-rw-r--r--third_party/aom/av1/encoder/ransac.h35
-rw-r--r--third_party/aom/av1/encoder/rate_distortion_model_params.h591
-rw-r--r--third_party/aom/av1/encoder/ratectrl.c1776
-rw-r--r--third_party/aom/av1/encoder/ratectrl.h295
-rw-r--r--third_party/aom/av1/encoder/rd.c1512
-rw-r--r--third_party/aom/av1/encoder/rd.h464
-rw-r--r--third_party/aom/av1/encoder/rdopt.c12199
-rw-r--r--third_party/aom/av1/encoder/rdopt.h138
-rw-r--r--third_party/aom/av1/encoder/reconinter_enc.c627
-rw-r--r--third_party/aom/av1/encoder/reconinter_enc.h127
-rw-r--r--third_party/aom/av1/encoder/segmentation.c244
-rw-r--r--third_party/aom/av1/encoder/segmentation.h38
-rw-r--r--third_party/aom/av1/encoder/speed_features.c564
-rw-r--r--third_party/aom/av1/encoder/speed_features.h568
-rw-r--r--third_party/aom/av1/encoder/temporal_filter.c602
-rw-r--r--third_party/aom/av1/encoder/temporal_filter.h25
-rw-r--r--third_party/aom/av1/encoder/tokenize.c248
-rw-r--r--third_party/aom/av1/encoder/tokenize.h73
-rw-r--r--third_party/aom/av1/encoder/tx_prune_model_weights.h1944
-rw-r--r--third_party/aom/av1/encoder/wedge_utils.c125
-rw-r--r--third_party/aom/av1/encoder/x86/av1_fwd_txfm1d_sse4.c1217
-rw-r--r--third_party/aom/av1/encoder/x86/av1_fwd_txfm2d_avx2.c2068
-rw-r--r--third_party/aom/av1/encoder/x86/av1_fwd_txfm2d_sse4.c365
-rw-r--r--third_party/aom/av1/encoder/x86/av1_fwd_txfm_avx2.h103
-rw-r--r--third_party/aom/av1/encoder/x86/av1_fwd_txfm_sse2.c2889
-rw-r--r--third_party/aom/av1/encoder/x86/av1_fwd_txfm_sse2.h117
-rw-r--r--third_party/aom/av1/encoder/x86/av1_highbd_quantize_avx2.c137
-rw-r--r--third_party/aom/av1/encoder/x86/av1_highbd_quantize_sse4.c195
-rw-r--r--third_party/aom/av1/encoder/x86/av1_quantize_avx2.c330
-rw-r--r--third_party/aom/av1/encoder/x86/av1_quantize_sse2.c189
-rw-r--r--third_party/aom/av1/encoder/x86/av1_quantize_ssse3_x86_64.asm204
-rw-r--r--third_party/aom/av1/encoder/x86/av1_ssim_opt_x86_64.asm222
-rw-r--r--third_party/aom/av1/encoder/x86/av1_txfm1d_sse4.h142
-rw-r--r--third_party/aom/av1/encoder/x86/corner_match_sse4.c103
-rw-r--r--third_party/aom/av1/encoder/x86/dct_sse2.asm82
-rw-r--r--third_party/aom/av1/encoder/x86/encodetxb_avx2.c130
-rw-r--r--third_party/aom/av1/encoder/x86/encodetxb_sse2.c505
-rw-r--r--third_party/aom/av1/encoder/x86/encodetxb_sse4.c92
-rw-r--r--third_party/aom/av1/encoder/x86/error_intrin_avx2.c88
-rw-r--r--third_party/aom/av1/encoder/x86/error_sse2.asm79
-rw-r--r--third_party/aom/av1/encoder/x86/hash_sse42.c51
-rw-r--r--third_party/aom/av1/encoder/x86/highbd_block_error_intrin_sse2.c72
-rw-r--r--third_party/aom/av1/encoder/x86/highbd_fwd_txfm_sse4.c1783
-rw-r--r--third_party/aom/av1/encoder/x86/pickrst_avx2.c403
-rw-r--r--third_party/aom/av1/encoder/x86/pickrst_sse4.c389
-rw-r--r--third_party/aom/av1/encoder/x86/temporal_filter_apply_sse2.asm217
-rw-r--r--third_party/aom/av1/encoder/x86/wedge_utils_avx2.c215
-rw-r--r--third_party/aom/av1/encoder/x86/wedge_utils_sse2.c254
-rw-r--r--third_party/aom/av1/exports_com2
-rw-r--r--third_party/aom/av1/exports_dec3
-rw-r--r--third_party/aom/av1/exports_enc2
-rw-r--r--third_party/aom/av1/exports_test2
-rw-r--r--third_party/aom/build/cmake/aom_config.c.template13
-rw-r--r--third_party/aom/build/cmake/aom_config_defaults.cmake196
-rw-r--r--third_party/aom/build/cmake/aom_configure.cmake377
-rw-r--r--third_party/aom/build/cmake/aom_experiment_deps.cmake32
-rw-r--r--third_party/aom/build/cmake/aom_optimization.cmake212
-rw-r--r--third_party/aom/build/cmake/compiler_flags.cmake373
-rw-r--r--third_party/aom/build/cmake/compiler_tests.cmake175
-rw-r--r--third_party/aom/build/cmake/cpu.cmake93
-rw-r--r--third_party/aom/build/cmake/dist.cmake64
-rw-r--r--third_party/aom/build/cmake/exports.cmake65
-rw-r--r--third_party/aom/build/cmake/exports_sources.cmake32
-rw-r--r--third_party/aom/build/cmake/generate_aom_config_templates.cmake101
-rw-r--r--third_party/aom/build/cmake/generate_exports.cmake66
-rw-r--r--third_party/aom/build/cmake/ios-Info.plist37
-rwxr-xr-xthird_party/aom/build/cmake/iosbuild.sh384
-rw-r--r--third_party/aom/build/cmake/msvc_runtime.cmake37
-rw-r--r--third_party/aom/build/cmake/pkg_config.cmake58
-rwxr-xr-xthird_party/aom/build/cmake/rtcd.pl467
-rw-r--r--third_party/aom/build/cmake/sanitizers.cmake38
-rw-r--r--third_party/aom/build/cmake/toolchains/arm-ios-common.cmake26
-rw-r--r--third_party/aom/build/cmake/toolchains/arm64-ios.cmake23
-rw-r--r--third_party/aom/build/cmake/toolchains/arm64-linux-gcc.cmake36
-rw-r--r--third_party/aom/build/cmake/toolchains/arm64-mingw-gcc.cmake32
-rw-r--r--third_party/aom/build/cmake/toolchains/armv7-ios.cmake31
-rw-r--r--third_party/aom/build/cmake/toolchains/armv7-linux-gcc.cmake43
-rw-r--r--third_party/aom/build/cmake/toolchains/armv7-mingw-gcc.cmake32
-rw-r--r--third_party/aom/build/cmake/toolchains/armv7s-ios.cmake31
-rw-r--r--third_party/aom/build/cmake/toolchains/ios-simulator-common.cmake23
-rw-r--r--third_party/aom/build/cmake/toolchains/mips32-linux-gcc.cmake77
-rw-r--r--third_party/aom/build/cmake/toolchains/mips64-linux-gcc.cmake54
-rw-r--r--third_party/aom/build/cmake/toolchains/ppc-linux-gcc.cmake29
-rw-r--r--third_party/aom/build/cmake/toolchains/x86-ios-simulator.cmake28
-rw-r--r--third_party/aom/build/cmake/toolchains/x86-linux.cmake19
-rw-r--r--third_party/aom/build/cmake/toolchains/x86-macos.cmake18
-rw-r--r--third_party/aom/build/cmake/toolchains/x86-mingw-gcc.cmake31
-rw-r--r--third_party/aom/build/cmake/toolchains/x86_64-ios-simulator.cmake25
-rw-r--r--third_party/aom/build/cmake/toolchains/x86_64-mingw-gcc.cmake29
-rw-r--r--third_party/aom/build/cmake/util.cmake171
-rw-r--r--third_party/aom/build/cmake/version.cmake57
-rwxr-xr-xthird_party/aom/build/cmake/version.pl112
-rw-r--r--third_party/aom/codereview.settings4
-rw-r--r--third_party/aom/common/args.c297
-rw-r--r--third_party/aom/common/args.h68
-rw-r--r--third_party/aom/common/av1_config.c511
-rw-r--r--third_party/aom/common/av1_config.h86
-rw-r--r--third_party/aom/common/ivfdec.c110
-rw-r--r--third_party/aom/common/ivfdec.h30
-rw-r--r--third_party/aom/common/ivfenc.c52
-rw-r--r--third_party/aom/common/ivfenc.h34
-rw-r--r--third_party/aom/common/md5_utils.c249
-rw-r--r--third_party/aom/common/md5_utils.h49
-rw-r--r--third_party/aom/common/obudec.c448
-rw-r--r--third_party/aom/common/obudec.h48
-rw-r--r--third_party/aom/common/rawenc.c44
-rw-r--r--third_party/aom/common/rawenc.h32
-rw-r--r--third_party/aom/common/tools_common.c425
-rw-r--r--third_party/aom/common/tools_common.h164
-rw-r--r--third_party/aom/common/video_common.h25
-rw-r--r--third_party/aom/common/video_reader.c123
-rw-r--r--third_party/aom/common/video_reader.h57
-rw-r--r--third_party/aom/common/video_writer.c77
-rw-r--r--third_party/aom/common/video_writer.h45
-rw-r--r--third_party/aom/common/warnings.c97
-rw-r--r--third_party/aom/common/warnings.h34
-rw-r--r--third_party/aom/common/webmdec.cc229
-rw-r--r--third_party/aom/common/webmdec.h71
-rw-r--r--third_party/aom/common/webmenc.cc96
-rw-r--r--third_party/aom/common/webmenc.h56
-rw-r--r--third_party/aom/common/y4menc.c103
-rw-r--r--third_party/aom/common/y4menc.h39
-rw-r--r--third_party/aom/common/y4minput.c1142
-rw-r--r--third_party/aom/common/y4minput.h69
-rw-r--r--third_party/aom/docs.cmake251
-rw-r--r--third_party/aom/examples/analyzer.cc723
-rw-r--r--third_party/aom/examples/aom_cx_set_ref.c385
-rw-r--r--third_party/aom/examples/decode_to_md5.c131
-rw-r--r--third_party/aom/examples/decode_with_drops.c146
-rw-r--r--third_party/aom/examples/encoder_util.c136
-rw-r--r--third_party/aom/examples/encoder_util.h33
-rw-r--r--third_party/aom/examples/inspect.c763
-rw-r--r--third_party/aom/examples/lightfield_bitstream_parsing.c348
-rw-r--r--third_party/aom/examples/lightfield_decoder.c208
-rw-r--r--third_party/aom/examples/lightfield_encoder.c499
-rw-r--r--third_party/aom/examples/lightfield_tile_list_decoder.c161
-rw-r--r--third_party/aom/examples/lossless_encoder.c138
-rw-r--r--third_party/aom/examples/noise_model.c431
-rw-r--r--third_party/aom/examples/resize_util.c124
-rw-r--r--third_party/aom/examples/scalable_decoder.c185
-rw-r--r--third_party/aom/examples/scalable_encoder.c289
-rw-r--r--third_party/aom/examples/set_maps.c208
-rw-r--r--third_party/aom/examples/simple_decoder.c146
-rw-r--r--third_party/aom/examples/simple_encoder.c249
-rw-r--r--third_party/aom/examples/twopass_encoder.c250
-rw-r--r--third_party/aom/keywords.dox51
-rw-r--r--third_party/aom/libs.doxy_template1260
-rw-r--r--third_party/aom/mainpage.dox52
-rw-r--r--third_party/aom/stats/aomstats.c106
-rw-r--r--third_party/aom/stats/aomstats.h44
-rw-r--r--third_party/aom/stats/rate_hist.c271
-rw-r--r--third_party/aom/stats/rate_hist.h41
-rw-r--r--third_party/aom/test/accounting_test.cc75
-rw-r--r--third_party/aom/test/acm_random.h84
-rw-r--r--third_party/aom/test/active_map_test.cc103
-rw-r--r--third_party/aom/test/altref_test.cc97
-rw-r--r--third_party/aom/test/aom_integer_test.cc177
-rwxr-xr-xthird_party/aom/test/aomcx_set_ref.sh58
-rwxr-xr-xthird_party/aom/test/aomdec.sh147
-rwxr-xr-xthird_party/aom/test/aomenc.sh269
-rw-r--r--third_party/aom/test/aq_segment_test.cc108
-rw-r--r--third_party/aom/test/arf_freq_test.cc223
-rw-r--r--third_party/aom/test/av1_config_test.cc164
-rw-r--r--third_party/aom/test/av1_convolve_2d_test.cc249
-rw-r--r--third_party/aom/test/av1_convolve_2d_test_util.cc705
-rw-r--r--third_party/aom/test/av1_convolve_2d_test_util.h117
-rw-r--r--third_party/aom/test/av1_convolve_scale_test.cc529
-rw-r--r--third_party/aom/test/av1_encoder_parms_get_to_decoder.cc158
-rw-r--r--third_party/aom/test/av1_ext_tile_test.cc215
-rw-r--r--third_party/aom/test/av1_fwd_txfm1d_test.cc105
-rw-r--r--third_party/aom/test/av1_fwd_txfm2d_test.cc511
-rw-r--r--third_party/aom/test/av1_highbd_iht_test.cc315
-rw-r--r--third_party/aom/test/av1_horz_only_frame_superres_test.cc362
-rw-r--r--third_party/aom/test/av1_inv_txfm1d_test.cc157
-rw-r--r--third_party/aom/test/av1_inv_txfm2d_test.cc378
-rw-r--r--third_party/aom/test/av1_quantize_test.cc239
-rw-r--r--third_party/aom/test/av1_round_shift_array_test.cc129
-rw-r--r--third_party/aom/test/av1_txfm_test.cc371
-rw-r--r--third_party/aom/test/av1_txfm_test.h135
-rw-r--r--third_party/aom/test/av1_wedge_utils_test.cc390
-rwxr-xr-xthird_party/aom/test/best_encode.sh103
-rw-r--r--third_party/aom/test/binary_codes_test.cc83
-rw-r--r--third_party/aom/test/blend_a64_mask_1d_test.cc339
-rw-r--r--third_party/aom/test/blend_a64_mask_test.cc583
-rw-r--r--third_party/aom/test/blockd_test.cc122
-rw-r--r--third_party/aom/test/boolcoder_test.cc173
-rw-r--r--third_party/aom/test/borders_test.cc85
-rw-r--r--third_party/aom/test/cdef_test.cc425
-rw-r--r--third_party/aom/test/cfl_test.cc567
-rw-r--r--third_party/aom/test/clear_system_state.h31
-rw-r--r--third_party/aom/test/codec_factory.h170
-rw-r--r--third_party/aom/test/coding_path_sync.cc205
-rw-r--r--third_party/aom/test/comp_avg_pred_test.cc72
-rw-r--r--third_party/aom/test/comp_avg_pred_test.h555
-rw-r--r--third_party/aom/test/comp_mask_variance_test.cc574
-rw-r--r--third_party/aom/test/convolve_round_test.cc183
-rw-r--r--third_party/aom/test/convolve_test.cc856
-rw-r--r--third_party/aom/test/corner_match_test.cc100
-rw-r--r--third_party/aom/test/cpu_speed_test.cc180
-rw-r--r--third_party/aom/test/datarate_test.cc255
-rw-r--r--third_party/aom/test/decode_api_test.cc55
-rw-r--r--third_party/aom/test/decode_multithreaded_test.cc185
-rw-r--r--third_party/aom/test/decode_perf_test.cc246
-rw-r--r--third_party/aom/test/decode_test_driver.cc114
-rw-r--r--third_party/aom/test/decode_test_driver.h165
-rwxr-xr-xthird_party/aom/test/decode_to_md5.sh77
-rwxr-xr-xthird_party/aom/test/decode_with_drops.sh68
-rw-r--r--third_party/aom/test/divu_small_test.cc41
-rw-r--r--third_party/aom/test/dr_prediction_test.cc369
-rwxr-xr-xthird_party/aom/test/dump_obu.sh70
-rw-r--r--third_party/aom/test/ec_test.cc159
-rw-r--r--third_party/aom/test/encode_api_test.cc73
-rw-r--r--third_party/aom/test/encode_perf_test.cc188
-rw-r--r--third_party/aom/test/encode_test_driver.cc288
-rw-r--r--third_party/aom/test/encode_test_driver.h249
-rw-r--r--third_party/aom/test/encodetxb_test.cc262
-rw-r--r--third_party/aom/test/end_to_end_test.cc199
-rw-r--r--third_party/aom/test/error_block_test.cc171
-rw-r--r--third_party/aom/test/error_resilience_test.cc438
-rw-r--r--third_party/aom/test/ethread_test.cc273
-rwxr-xr-xthird_party/aom/test/examples.sh29
-rw-r--r--third_party/aom/test/external_frame_buffer_test.cc512
-rw-r--r--third_party/aom/test/fft_test.cc256
-rw-r--r--third_party/aom/test/film_grain_table_test.cc250
-rw-r--r--third_party/aom/test/filterintra_test.cc134
-rw-r--r--third_party/aom/test/frame_size_tests.cc78
-rw-r--r--third_party/aom/test/function_equivalence_test.h69
-rw-r--r--third_party/aom/test/fwht4x4_test.cc98
-rwxr-xr-xthird_party/aom/test/gviz_api.py1087
-rw-r--r--third_party/aom/test/hash_test.cc133
-rw-r--r--third_party/aom/test/hbd_metrics_test.cc239
-rw-r--r--third_party/aom/test/hiprec_convolve_test.cc62
-rw-r--r--third_party/aom/test/hiprec_convolve_test_util.cc331
-rw-r--r--third_party/aom/test/hiprec_convolve_test_util.h93
-rw-r--r--third_party/aom/test/horz_superres_test.cc322
-rw-r--r--third_party/aom/test/i420_video_source.h34
-rw-r--r--third_party/aom/test/intra_edge_test.cc337
-rw-r--r--third_party/aom/test/intrabc_test.cc168
-rw-r--r--third_party/aom/test/intrapred_test.cc266
-rw-r--r--third_party/aom/test/invalid_file_test.cc122
-rw-r--r--third_party/aom/test/ivf_video_source.h114
-rwxr-xr-xthird_party/aom/test/lightfield_test.sh98
-rw-r--r--third_party/aom/test/log2_test.cc50
-rw-r--r--third_party/aom/test/lossless_test.cc126
-rw-r--r--third_party/aom/test/lpf_test.cc627
-rw-r--r--third_party/aom/test/masked_sad_test.cc342
-rw-r--r--third_party/aom/test/masked_variance_test.cc459
-rw-r--r--third_party/aom/test/md5_helper.h76
-rw-r--r--third_party/aom/test/metrics_template.html422
-rw-r--r--third_party/aom/test/monochrome_test.cc130
-rw-r--r--third_party/aom/test/motion_vector_test.cc105
-rw-r--r--third_party/aom/test/noise_model_test.cc1343
-rw-r--r--third_party/aom/test/obmc_sad_test.cc237
-rw-r--r--third_party/aom/test/obmc_variance_test.cc345
-rw-r--r--third_party/aom/test/onyxc_int_test.cc22
-rw-r--r--third_party/aom/test/pickrst_test.cc187
-rw-r--r--third_party/aom/test/qm_test.cc81
-rw-r--r--third_party/aom/test/quantize_func_test.cc425
-rw-r--r--third_party/aom/test/reconinter_test.cc258
-rw-r--r--third_party/aom/test/register_state_check.h148
-rw-r--r--third_party/aom/test/resize_test.cc642
-rwxr-xr-xthird_party/aom/test/run_encodes.sh39
-rw-r--r--third_party/aom/test/sad_test.cc1528
-rw-r--r--third_party/aom/test/scalability_test.cc81
-rw-r--r--third_party/aom/test/scan_test.cc133
-rw-r--r--third_party/aom/test/segment_binarization_sync.cc61
-rw-r--r--third_party/aom/test/selfguided_filter_test.cc410
-rwxr-xr-xthird_party/aom/test/set_maps.sh52
-rw-r--r--third_party/aom/test/simd_avx2_test.cc15
-rw-r--r--third_party/aom/test/simd_cmp_avx2.cc15
-rw-r--r--third_party/aom/test/simd_cmp_impl.h2171
-rw-r--r--third_party/aom/test/simd_cmp_neon.cc17
-rw-r--r--third_party/aom/test/simd_cmp_sse2.cc18
-rw-r--r--third_party/aom/test/simd_cmp_sse4.cc18
-rw-r--r--third_party/aom/test/simd_cmp_ssse3.cc18
-rw-r--r--third_party/aom/test/simd_impl.h1141
-rw-r--r--third_party/aom/test/simd_neon_test.cc17
-rw-r--r--third_party/aom/test/simd_sse2_test.cc18
-rw-r--r--third_party/aom/test/simd_sse4_test.cc18
-rw-r--r--third_party/aom/test/simd_ssse3_test.cc18
-rwxr-xr-xthird_party/aom/test/simple_decoder.sh58
-rwxr-xr-xthird_party/aom/test/simple_encoder.sh53
-rw-r--r--third_party/aom/test/subtract_test.cc249
-rw-r--r--third_party/aom/test/sum_squares_test.cc228
-rw-r--r--third_party/aom/test/superframe_test.cc109
-rw-r--r--third_party/aom/test/test-data.sha1507
-rw-r--r--third_party/aom/test/test.cmake438
-rw-r--r--third_party/aom/test/test_data_download_worker.cmake46
-rw-r--r--third_party/aom/test/test_data_util.cmake598
-rw-r--r--third_party/aom/test/test_intra_pred_speed.cc1464
-rw-r--r--third_party/aom/test/test_libaom.cc74
-rw-r--r--third_party/aom/test/test_runner.cmake28
-rw-r--r--third_party/aom/test/test_vector_test.cc172
-rw-r--r--third_party/aom/test/test_vectors.cc140
-rw-r--r--third_party/aom/test/test_vectors.h26
-rw-r--r--third_party/aom/test/tile_independence_test.cc173
-rwxr-xr-xthird_party/aom/test/tools_common.sh477
-rw-r--r--third_party/aom/test/transform_test_base.h342
-rwxr-xr-xthird_party/aom/test/twopass_encoder.sh54
-rw-r--r--third_party/aom/test/util.h53
-rw-r--r--third_party/aom/test/variance_test.cc2064
-rw-r--r--third_party/aom/test/video_source.h259
-rwxr-xr-xthird_party/aom/test/visual_metrics.py466
-rw-r--r--third_party/aom/test/warp_filter_test.cc56
-rw-r--r--third_party/aom/test/warp_filter_test_util.cc480
-rw-r--r--third_party/aom/test/warp_filter_test_util.h103
-rw-r--r--third_party/aom/test/webm_video_source.h96
-rw-r--r--third_party/aom/test/wiener_test.cc280
-rw-r--r--third_party/aom/test/y4m_test.cc180
-rw-r--r--third_party/aom/test/y4m_video_source.h123
-rw-r--r--third_party/aom/test/yuv_video_source.h123
-rw-r--r--third_party/aom/third_party/fastfeat/LICENSE30
-rw-r--r--third_party/aom/third_party/fastfeat/README.libvpx39
-rw-r--r--third_party/aom/third_party/fastfeat/fast.c22
-rw-r--r--third_party/aom/third_party/fastfeat/fast.h20
-rw-r--r--third_party/aom/third_party/fastfeat/fast_9.c5911
-rw-r--r--third_party/aom/third_party/fastfeat/nonmax.c121
-rw-r--r--third_party/aom/third_party/googletest/README.libaom26
-rw-r--r--third_party/aom/third_party/googletest/gtest.mk1
-rw-r--r--third_party/aom/third_party/googletest/src/googletest/CHANGES157
-rw-r--r--third_party/aom/third_party/googletest/src/googletest/CMakeLists.txt286
-rw-r--r--third_party/aom/third_party/googletest/src/googletest/CONTRIBUTORS37
-rw-r--r--third_party/aom/third_party/googletest/src/googletest/LICENSE28
-rw-r--r--third_party/aom/third_party/googletest/src/googletest/README.md280
-rw-r--r--third_party/aom/third_party/googletest/src/googletest/cmake/internal_utils.cmake254
-rw-r--r--third_party/aom/third_party/googletest/src/googletest/include/gtest/gtest-death-test.h294
-rw-r--r--third_party/aom/third_party/googletest/src/googletest/include/gtest/gtest-message.h250
-rw-r--r--third_party/aom/third_party/googletest/src/googletest/include/gtest/gtest-param-test.h1444
-rw-r--r--third_party/aom/third_party/googletest/src/googletest/include/gtest/gtest-param-test.h.pump510
-rw-r--r--third_party/aom/third_party/googletest/src/googletest/include/gtest/gtest-printers.h993
-rw-r--r--third_party/aom/third_party/googletest/src/googletest/include/gtest/gtest-spi.h232
-rw-r--r--third_party/aom/third_party/googletest/src/googletest/include/gtest/gtest-test-part.h179
-rw-r--r--third_party/aom/third_party/googletest/src/googletest/include/gtest/gtest-typed-test.h263
-rw-r--r--third_party/aom/third_party/googletest/src/googletest/include/gtest/gtest.h2236
-rw-r--r--third_party/aom/third_party/googletest/src/googletest/include/gtest/gtest_pred_impl.h358
-rw-r--r--third_party/aom/third_party/googletest/src/googletest/include/gtest/gtest_prod.h58
-rw-r--r--third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/custom/gtest-port.h69
-rw-r--r--third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/custom/gtest-printers.h42
-rw-r--r--third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/custom/gtest.h41
-rw-r--r--third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-death-test-internal.h319
-rw-r--r--third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-filepath.h206
-rw-r--r--third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-internal.h1238
-rw-r--r--third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-linked_ptr.h243
-rw-r--r--third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-param-util-generated.h5146
-rw-r--r--third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-param-util-generated.h.pump286
-rw-r--r--third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-param-util.h731
-rw-r--r--third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-port-arch.h93
-rw-r--r--third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-port.h2567
-rw-r--r--third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-string.h167
-rw-r--r--third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-tuple.h1020
-rw-r--r--third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-tuple.h.pump347
-rw-r--r--third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-type-util.h3331
-rw-r--r--third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-type-util.h.pump297
-rw-r--r--third_party/aom/third_party/googletest/src/googletest/src/gtest-all.cc48
-rw-r--r--third_party/aom/third_party/googletest/src/googletest/src/gtest-death-test.cc1342
-rw-r--r--third_party/aom/third_party/googletest/src/googletest/src/gtest-filepath.cc387
-rw-r--r--third_party/aom/third_party/googletest/src/googletest/src/gtest-internal-inl.h1183
-rw-r--r--third_party/aom/third_party/googletest/src/googletest/src/gtest-port.cc1259
-rw-r--r--third_party/aom/third_party/googletest/src/googletest/src/gtest-printers.cc373
-rw-r--r--third_party/aom/third_party/googletest/src/googletest/src/gtest-test-part.cc110
-rw-r--r--third_party/aom/third_party/googletest/src/googletest/src/gtest-typed-test.cc118
-rw-r--r--third_party/aom/third_party/googletest/src/googletest/src/gtest.cc5389
-rw-r--r--third_party/aom/third_party/googletest/src/googletest/src/gtest_main.cc38
-rw-r--r--third_party/aom/third_party/libwebm/AUTHORS.TXT4
-rw-r--r--third_party/aom/third_party/libwebm/Android.mk17
-rw-r--r--third_party/aom/third_party/libwebm/LICENSE.TXT30
-rw-r--r--third_party/aom/third_party/libwebm/PATENTS.TXT23
-rw-r--r--third_party/aom/third_party/libwebm/README.libaom22
-rw-r--r--third_party/aom/third_party/libwebm/common/file_util.cc93
-rw-r--r--third_party/aom/third_party/libwebm/common/file_util.h44
-rw-r--r--third_party/aom/third_party/libwebm/common/hdr_util.cc220
-rw-r--r--third_party/aom/third_party/libwebm/common/hdr_util.h71
-rw-r--r--third_party/aom/third_party/libwebm/common/webmids.h192
-rw-r--r--third_party/aom/third_party/libwebm/mkvmuxer/mkvmuxer.cc4194
-rw-r--r--third_party/aom/third_party/libwebm/mkvmuxer/mkvmuxer.h1922
-rw-r--r--third_party/aom/third_party/libwebm/mkvmuxer/mkvmuxertypes.h28
-rw-r--r--third_party/aom/third_party/libwebm/mkvmuxer/mkvmuxerutil.cc744
-rw-r--r--third_party/aom/third_party/libwebm/mkvmuxer/mkvmuxerutil.h112
-rw-r--r--third_party/aom/third_party/libwebm/mkvmuxer/mkvwriter.cc90
-rw-r--r--third_party/aom/third_party/libwebm/mkvmuxer/mkvwriter.h51
-rw-r--r--third_party/aom/third_party/libwebm/mkvparser/mkvparser.cc8049
-rw-r--r--third_party/aom/third_party/libwebm/mkvparser/mkvparser.h1145
-rw-r--r--third_party/aom/third_party/libwebm/mkvparser/mkvreader.cc133
-rw-r--r--third_party/aom/third_party/libwebm/mkvparser/mkvreader.h45
-rw-r--r--third_party/aom/third_party/libyuv/README.libaom15
-rw-r--r--third_party/aom/third_party/libyuv/include/libyuv/basic_types.h119
-rw-r--r--third_party/aom/third_party/libyuv/include/libyuv/compare.h79
-rw-r--r--third_party/aom/third_party/libyuv/include/libyuv/convert.h246
-rw-r--r--third_party/aom/third_party/libyuv/include/libyuv/convert_argb.h232
-rw-r--r--third_party/aom/third_party/libyuv/include/libyuv/convert_from.h182
-rw-r--r--third_party/aom/third_party/libyuv/include/libyuv/convert_from_argb.h191
-rw-r--r--third_party/aom/third_party/libyuv/include/libyuv/cpu_id.h82
-rw-r--r--third_party/aom/third_party/libyuv/include/libyuv/mjpeg_decoder.h193
-rw-r--r--third_party/aom/third_party/libyuv/include/libyuv/planar_functions.h454
-rw-r--r--third_party/aom/third_party/libyuv/include/libyuv/rotate.h118
-rw-r--r--third_party/aom/third_party/libyuv/include/libyuv/rotate_argb.h34
-rw-r--r--third_party/aom/third_party/libyuv/include/libyuv/rotate_row.h139
-rw-r--r--third_party/aom/third_party/libyuv/include/libyuv/row.h1857
-rw-r--r--third_party/aom/third_party/libyuv/include/libyuv/scale.h104
-rw-r--r--third_party/aom/third_party/libyuv/include/libyuv/scale_argb.h58
-rw-r--r--third_party/aom/third_party/libyuv/include/libyuv/scale_row.h479
-rw-r--r--third_party/aom/third_party/libyuv/include/libyuv/version.h17
-rw-r--r--third_party/aom/third_party/libyuv/include/libyuv/video_common.h183
-rw-r--r--third_party/aom/third_party/libyuv/source/compare.cc373
-rw-r--r--third_party/aom/third_party/libyuv/source/compare_common.cc42
-rw-r--r--third_party/aom/third_party/libyuv/source/compare_gcc.cc152
-rw-r--r--third_party/aom/third_party/libyuv/source/compare_neon.cc65
-rw-r--r--third_party/aom/third_party/libyuv/source/compare_neon64.cc63
-rw-r--r--third_party/aom/third_party/libyuv/source/compare_win.cc229
-rw-r--r--third_party/aom/third_party/libyuv/source/convert.cc1389
-rw-r--r--third_party/aom/third_party/libyuv/source/convert_argb.cc1155
-rw-r--r--third_party/aom/third_party/libyuv/source/convert_from.cc1348
-rw-r--r--third_party/aom/third_party/libyuv/source/convert_from_argb.cc1301
-rw-r--r--third_party/aom/third_party/libyuv/source/convert_jpeg.cc392
-rw-r--r--third_party/aom/third_party/libyuv/source/convert_to_argb.cc306
-rw-r--r--third_party/aom/third_party/libyuv/source/convert_to_i420.cc339
-rw-r--r--third_party/aom/third_party/libyuv/source/cpu_id.cc307
-rw-r--r--third_party/aom/third_party/libyuv/source/mjpeg_decoder.cc572
-rw-r--r--third_party/aom/third_party/libyuv/source/mjpeg_validate.cc101
-rw-r--r--third_party/aom/third_party/libyuv/source/planar_functions.cc2555
-rw-r--r--third_party/aom/third_party/libyuv/source/rotate.cc496
-rw-r--r--third_party/aom/third_party/libyuv/source/rotate_any.cc55
-rw-r--r--third_party/aom/third_party/libyuv/source/rotate_argb.cc205
-rw-r--r--third_party/aom/third_party/libyuv/source/rotate_common.cc92
-rw-r--r--third_party/aom/third_party/libyuv/source/rotate_gcc.cc493
-rw-r--r--third_party/aom/third_party/libyuv/source/rotate_mips.cc484
-rw-r--r--third_party/aom/third_party/libyuv/source/rotate_neon.cc535
-rw-r--r--third_party/aom/third_party/libyuv/source/rotate_neon64.cc543
-rw-r--r--third_party/aom/third_party/libyuv/source/rotate_win.cc248
-rw-r--r--third_party/aom/third_party/libyuv/source/row_any.cc680
-rw-r--r--third_party/aom/third_party/libyuv/source/row_common.cc2576
-rw-r--r--third_party/aom/third_party/libyuv/source/row_gcc.cc5475
-rw-r--r--third_party/aom/third_party/libyuv/source/row_mips.cc911
-rw-r--r--third_party/aom/third_party/libyuv/source/row_neon.cc3084
-rw-r--r--third_party/aom/third_party/libyuv/source/row_neon64.cc3087
-rw-r--r--third_party/aom/third_party/libyuv/source/row_win.cc6331
-rw-r--r--third_party/aom/third_party/libyuv/source/row_x86.asm146
-rw-r--r--third_party/aom/third_party/libyuv/source/scale.cc1689
-rw-r--r--third_party/aom/third_party/libyuv/source/scale_any.cc200
-rw-r--r--third_party/aom/third_party/libyuv/source/scale_argb.cc853
-rw-r--r--third_party/aom/third_party/libyuv/source/scale_common.cc1137
-rw-r--r--third_party/aom/third_party/libyuv/source/scale_gcc.cc1089
-rw-r--r--third_party/aom/third_party/libyuv/source/scale_mips.cc654
-rw-r--r--third_party/aom/third_party/libyuv/source/scale_neon.cc1037
-rw-r--r--third_party/aom/third_party/libyuv/source/scale_neon64.cc1042
-rw-r--r--third_party/aom/third_party/libyuv/source/scale_win.cc1354
-rw-r--r--third_party/aom/third_party/libyuv/source/video_common.cc64
-rw-r--r--third_party/aom/third_party/libyuv/source/x86inc.asm1136
-rw-r--r--third_party/aom/third_party/vector/LICENSE19
-rw-r--r--third_party/aom/third_party/vector/README.libaom14
-rw-r--r--third_party/aom/third_party/vector/vector.c543
-rw-r--r--third_party/aom/third_party/vector/vector.h159
-rw-r--r--third_party/aom/third_party/x86inc/LICENSE18
-rw-r--r--third_party/aom/third_party/x86inc/README.libaom20
-rw-r--r--third_party/aom/third_party/x86inc/x86inc.asm1649
-rw-r--r--third_party/aom/tools/aggregate_entropy_stats.py39
-rw-r--r--third_party/aom/tools/aom_entropy_optimizer.c758
-rwxr-xr-xthird_party/aom/tools/cpplint.py4756
-rw-r--r--third_party/aom/tools/diff.py132
-rw-r--r--third_party/aom/tools/dump_obu.cc164
-rwxr-xr-xthird_party/aom/tools/gen_authors.sh10
-rwxr-xr-xthird_party/aom/tools/gen_constrained_tokenset.py120
-rw-r--r--third_party/aom/tools/inspect-cli.js39
-rw-r--r--third_party/aom/tools/inspect-post.js1
-rwxr-xr-xthird_party/aom/tools/intersect-diffs.py78
-rwxr-xr-xthird_party/aom/tools/lint-hunks.py146
-rw-r--r--third_party/aom/tools/obu_parser.cc190
-rw-r--r--third_party/aom/tools/obu_parser.h27
-rw-r--r--third_party/aom/tools/txfm_analyzer/txfm_gen_code.cc580
-rw-r--r--third_party/aom/tools/txfm_analyzer/txfm_graph.cc943
-rw-r--r--third_party/aom/tools/txfm_analyzer/txfm_graph.h161
-rwxr-xr-xthird_party/aom/tools/wrap-commit-msg.py72
-rw-r--r--third_party/aom/usage.dox111
-rw-r--r--third_party/aom/usage_cx.dox9
-rw-r--r--third_party/aom/usage_dx.dox57
998 files changed, 0 insertions, 460333 deletions
diff --git a/third_party/aom/.clang-format b/third_party/aom/.clang-format
deleted file mode 100644
index e76a526e4..000000000
--- a/third_party/aom/.clang-format
+++ /dev/null
@@ -1,109 +0,0 @@
----
-Language: Cpp
-# BasedOnStyle: Google
-# Generated with clang-format 5.0.0
-AccessModifierOffset: -1
-AlignAfterOpenBracket: Align
-AlignConsecutiveAssignments: false
-AlignConsecutiveDeclarations: false
-AlignEscapedNewlines: Left
-AlignOperands: true
-AlignTrailingComments: true
-AllowAllParametersOfDeclarationOnNextLine: true
-AllowShortBlocksOnASingleLine: false
-AllowShortCaseLabelsOnASingleLine: true
-AllowShortFunctionsOnASingleLine: All
-AllowShortIfStatementsOnASingleLine: true
-AllowShortLoopsOnASingleLine: true
-AlwaysBreakAfterDefinitionReturnType: None
-AlwaysBreakAfterReturnType: None
-AlwaysBreakBeforeMultilineStrings: true
-AlwaysBreakTemplateDeclarations: true
-BinPackArguments: true
-BinPackParameters: true
-BraceWrapping:
- AfterClass: false
- AfterControlStatement: false
- AfterEnum: false
- AfterFunction: false
- AfterNamespace: false
- AfterObjCDeclaration: false
- AfterStruct: false
- AfterUnion: false
- BeforeCatch: false
- BeforeElse: false
- IndentBraces: false
- SplitEmptyFunction: true
- SplitEmptyRecord: true
- SplitEmptyNamespace: true
-BreakBeforeBinaryOperators: None
-BreakBeforeBraces: Attach
-BreakBeforeInheritanceComma: false
-BreakBeforeTernaryOperators: true
-BreakConstructorInitializersBeforeComma: false
-BreakConstructorInitializers: BeforeColon
-BreakAfterJavaFieldAnnotations: false
-BreakStringLiterals: true
-ColumnLimit: 80
-CommentPragmas: '^ IWYU pragma:'
-CompactNamespaces: false
-ConstructorInitializerAllOnOneLineOrOnePerLine: false
-ConstructorInitializerIndentWidth: 4
-ContinuationIndentWidth: 4
-Cpp11BracedListStyle: false
-DerivePointerAlignment: false
-DisableFormat: false
-ExperimentalAutoDetectBinPacking: false
-FixNamespaceComments: true
-ForEachMacros:
- - foreach
- - Q_FOREACH
- - BOOST_FOREACH
-IncludeCategories:
- - Regex: '^<.*\.h>'
- Priority: 1
- - Regex: '^<.*'
- Priority: 2
- - Regex: '.*'
- Priority: 3
-IncludeIsMainRegex: '([-_](test|unittest))?$'
-IndentCaseLabels: true
-IndentWidth: 2
-IndentWrappedFunctionNames: false
-JavaScriptQuotes: Leave
-JavaScriptWrapImports: true
-KeepEmptyLinesAtTheStartOfBlocks: false
-MacroBlockBegin: ''
-MacroBlockEnd: ''
-MaxEmptyLinesToKeep: 1
-NamespaceIndentation: None
-ObjCBlockIndentWidth: 2
-ObjCSpaceAfterProperty: false
-ObjCSpaceBeforeProtocolList: false
-PenaltyBreakAssignment: 2
-PenaltyBreakBeforeFirstCallParameter: 1
-PenaltyBreakComment: 300
-PenaltyBreakFirstLessLess: 120
-PenaltyBreakString: 1000
-PenaltyExcessCharacter: 1000000
-PenaltyReturnTypeOnItsOwnLine: 200
-PointerAlignment: Right
-ReflowComments: true
-SortIncludes: false
-SortUsingDeclarations: true
-SpaceAfterCStyleCast: false
-SpaceAfterTemplateKeyword: true
-SpaceBeforeAssignmentOperators: true
-SpaceBeforeParens: ControlStatements
-SpaceInEmptyParentheses: false
-SpacesBeforeTrailingComments: 2
-SpacesInAngles: false
-SpacesInContainerLiterals: true
-SpacesInCStyleCastParentheses: false
-SpacesInParentheses: false
-SpacesInSquareBrackets: false
-Standard: Auto
-TabWidth: 8
-UseTab: Never
-...
-
diff --git a/third_party/aom/.cmake-format.py b/third_party/aom/.cmake-format.py
deleted file mode 100644
index aa7354c2a..000000000
--- a/third_party/aom/.cmake-format.py
+++ /dev/null
@@ -1,48 +0,0 @@
-# Generated with cmake-format 0.3.6
-# How wide to allow formatted cmake files
-line_width = 80
-
-# How many spaces to tab for indent
-tab_size = 2
-
-# If arglists are longer than this, break them always. This introduces some
-# interesting effects with complicated 'if' statements. However, we want file
-# lists to look reasonable. Try to strike a balance.
-max_subargs_per_line = 10
-
-# If true, separate flow control names from their parentheses with a space
-separate_ctrl_name_with_space = False
-
-# If true, separate function names from parentheses with a space
-separate_fn_name_with_space = False
-
-# If a statement is wrapped to more than one line, than dangle the closing
-# parenthesis on it's own line
-dangle_parens = False
-
-# What character to use for bulleted lists
-bullet_char = u'*'
-
-# What character to use as punctuation after numerals in an enumerated list
-enum_char = u'.'
-
-# What style line endings to use in the output.
-line_ending = u'unix'
-
-# Format command names consistently as 'lower' or 'upper' case
-command_case = u'lower'
-
-# Specify structure for custom cmake functions
-additional_commands = {
- "foo": {
- "flags": [
- "BAR",
- "BAZ"
- ],
- "kwargs": {
- "HEADERS": "*",
- "DEPENDS": "*",
- "SOURCES": "*"
- }
- }
-}
diff --git a/third_party/aom/.mailmap b/third_party/aom/.mailmap
deleted file mode 100644
index bbe4525b1..000000000
--- a/third_party/aom/.mailmap
+++ /dev/null
@@ -1,34 +0,0 @@
-Adrian Grange <agrange@google.com>
-Aℓex Converse <aconverse@google.com>
-Aℓex Converse <aconverse@google.com> <alex.converse@gmail.com>
-Alexis Ballier <aballier@gentoo.org> <alexis.ballier@gmail.com>
-Alpha Lam <hclam@google.com> <hclam@chromium.org>
-Deb Mukherjee <debargha@google.com>
-Erik Niemeyer <erik.a.niemeyer@intel.com> <erik.a.niemeyer@gmail.com>
-Guillaume Martres <gmartres@google.com> <smarter3@gmail.com>
-Hangyu Kuang <hkuang@google.com>
-Hui Su <huisu@google.com>
-Jacky Chen <jackychen@google.com>
-Jim Bankoski <jimbankoski@google.com>
-Johann Koenig <johannkoenig@google.com>
-Johann Koenig <johannkoenig@google.com> <johann.koenig@duck.com>
-Johann Koenig <johannkoenig@google.com> <johann.koenig@gmail.com>
-John Koleszar <jkoleszar@google.com>
-Joshua Litt <joshualitt@google.com> <joshualitt@chromium.org>
-Marco Paniconi <marpan@google.com>
-Marco Paniconi <marpan@google.com> <marpan@chromium.org>
-Pascal Massimino <pascal.massimino@gmail.com>
-Paul Wilkins <paulwilkins@google.com>
-Ralph Giles <giles@xiph.org> <giles@entropywave.com>
-Ralph Giles <giles@xiph.org> <giles@mozilla.com>
-Ronald S. Bultje <rsbultje@gmail.com> <rbultje@google.com>
-Sami Pietilä <samipietila@google.com>
-Sarah Parker <sarahparker@google.com>
-Tamar Levy <tamar.levy@intel.com>
-Tamar Levy <tamar.levy@intel.com> <levytamar82@gmail.com>
-Tero Rintaluoma <teror@google.com> <tero.rintaluoma@on2.com>
-Timothy B. Terriberry <tterribe@xiph.org> Tim Terriberry <tterriberry@mozilla.com>
-Tom Finegan <tomfinegan@google.com>
-Tom Finegan <tomfinegan@google.com> <tomfinegan@chromium.org>
-Yaowu Xu <yaowu@google.com> <yaowu@xuyaowu.com>
-Yaowu Xu <yaowu@google.com> <yaowu@yaowu-macbookpro.roam.corp.google.com>
diff --git a/third_party/aom/AUTHORS b/third_party/aom/AUTHORS
deleted file mode 100644
index 95c3c8bf2..000000000
--- a/third_party/aom/AUTHORS
+++ /dev/null
@@ -1,144 +0,0 @@
-# This file is automatically generated from the git commit history
-# by tools/gen_authors.sh.
-
-Aaron Watry <awatry@gmail.com>
-Abo Talib Mahfoodh <ab.mahfoodh@gmail.com>
-Adam Xu <adam@xuyaowu.com>
-Adrian Grange <agrange@google.com>
-Aℓex Converse <aconverse@google.com>
-Ahmad Sharif <asharif@google.com>
-Alexander Voronov <avoronov@graphics.cs.msu.ru>
-Alexis Ballier <aballier@gentoo.org>
-Alok Ahuja <waveletcoeff@gmail.com>
-Alpha Lam <hclam@google.com>
-A.Mahfoodh <ab.mahfoodh@gmail.com>
-Ami Fischman <fischman@chromium.org>
-Andoni Morales Alastruey <ylatuya@gmail.com>
-Andres Mejia <mcitadel@gmail.com>
-Andrew Russell <anrussell@google.com>
-Angie Chiang <angiebird@google.com>
-Aron Rosenberg <arosenberg@logitech.com>
-Attila Nagy <attilanagy@google.com>
-Brion Vibber <bvibber@wikimedia.org>
-changjun.yang <changjun.yang@intel.com>
-Charles 'Buck' Krasic <ckrasic@google.com>
-chm <chm@rock-chips.com>
-Christian Duvivier <cduvivier@google.com>
-Daniel Kang <ddkang@google.com>
-Deb Mukherjee <debargha@google.com>
-Dim Temp <dimtemp0@gmail.com>
-Dmitry Kovalev <dkovalev@google.com>
-Dragan Mrdjan <dmrdjan@mips.com>
-Ed Baker <edward.baker@intel.com>
-Ehsan Akhgari <ehsan.akhgari@gmail.com>
-Erik Niemeyer <erik.a.niemeyer@intel.com>
-Fabio Pedretti <fabio.ped@libero.it>
-Frank Galligan <fgalligan@google.com>
-Fredrik Söderquist <fs@opera.com>
-Fritz Koenig <frkoenig@google.com>
-Gaute Strokkenes <gaute.strokkenes@broadcom.com>
-Geza Lore <gezalore@gmail.com>
-Ghislain MARY <ghislainmary2@gmail.com>
-Giuseppe Scrivano <gscrivano@gnu.org>
-Gordana Cmiljanovic <gordana.cmiljanovic@imgtec.com>
-Guillaume Martres <gmartres@google.com>
-Guillermo Ballester Valor <gbvalor@gmail.com>
-Hangyu Kuang <hkuang@google.com>
-Hanno Böck <hanno@hboeck.de>
-Henrik Lundin <hlundin@google.com>
-Hui Su <huisu@google.com>
-Ivan Maltz <ivanmaltz@google.com>
-Jacek Caban <cjacek@gmail.com>
-Jacky Chen <jackychen@google.com>
-James Berry <jamesberry@google.com>
-James Yu <james.yu@linaro.org>
-James Zern <jzern@google.com>
-Jan Gerber <j@mailb.org>
-Jan Kratochvil <jan.kratochvil@redhat.com>
-Janne Salonen <jsalonen@google.com>
-Jean-Marc Valin <jmvalin@jmvalin.ca>
-Jeff Faust <jfaust@google.com>
-Jeff Muizelaar <jmuizelaar@mozilla.com>
-Jeff Petkau <jpet@chromium.org>
-Jia Jia <jia.jia@linaro.org>
-Jian Zhou <zhoujian@google.com>
-Jim Bankoski <jimbankoski@google.com>
-Jingning Han <jingning@google.com>
-Joey Parrish <joeyparrish@google.com>
-Johann Koenig <johannkoenig@chromium.org>
-Johann Koenig <johannkoenig@google.com>
-John Koleszar <jkoleszar@google.com>
-Johnny Klonaris <google@jawknee.com>
-John Stark <jhnstrk@gmail.com>
-Joshua Bleecher Snyder <josh@treelinelabs.com>
-Joshua Litt <joshualitt@google.com>
-Julia Robson <juliamrobson@gmail.com>
-Justin Clift <justin@salasaga.org>
-Justin Lebar <justin.lebar@gmail.com>
-KO Myung-Hun <komh@chollian.net>
-Lawrence Velázquez <larryv@macports.org>
-Lou Quillio <louquillio@google.com>
-Luca Barbato <lu_zero@gentoo.org>
-Makoto Kato <makoto.kt@gmail.com>
-Mans Rullgard <mans@mansr.com>
-Marco Paniconi <marpan@google.com>
-Mark Mentovai <mark@chromium.org>
-Martin Ettl <ettl.martin78@googlemail.com>
-Martin Storsjo <martin@martin.st>
-Matthew Heaney <matthewjheaney@chromium.org>
-Michael Kohler <michaelkohler@live.com>
-Mike Frysinger <vapier@chromium.org>
-Mike Hommey <mhommey@mozilla.com>
-Mikhal Shemer <mikhal@google.com>
-Minghai Shang <minghai@google.com>
-Morton Jonuschat <yabawock@gmail.com>
-Nathan E. Egge <negge@dgql.org>
-Nico Weber <thakis@chromium.org>
-Parag Salasakar <img.mips1@gmail.com>
-Pascal Massimino <pascal.massimino@gmail.com>
-Patrik Westin <patrik.westin@gmail.com>
-Paul Wilkins <paulwilkins@google.com>
-Pavol Rusnak <stick@gk2.sk>
-Paweł Hajdan <phajdan@google.com>
-Pengchong Jin <pengchong@google.com>
-Peter de Rivaz <peter.derivaz@argondesign.com>
-Peter de Rivaz <peter.derivaz@gmail.com>
-Philip Jägenstedt <philipj@opera.com>
-Priit Laes <plaes@plaes.org>
-Rafael Ávila de Espíndola <rafael.espindola@gmail.com>
-Rafaël Carré <funman@videolan.org>
-Ralph Giles <giles@xiph.org>
-Rob Bradford <rob@linux.intel.com>
-Ronald S. Bultje <rsbultje@gmail.com>
-Rui Ueyama <ruiu@google.com>
-Sami Pietilä <samipietila@google.com>
-Sasi Inguva <isasi@google.com>
-Scott Graham <scottmg@chromium.org>
-Scott LaVarnway <slavarnway@google.com>
-Sean McGovern <gseanmcg@gmail.com>
-Sergey Kolomenkin <kolomenkin@gmail.com>
-Sergey Ulanov <sergeyu@chromium.org>
-Shimon Doodkin <helpmepro1@gmail.com>
-Shunyao Li <shunyaoli@google.com>
-Stefan Holmer <holmer@google.com>
-Steinar Midtskogen <stemidts@cisco.com>
-Suman Sunkara <sunkaras@google.com>
-Taekhyun Kim <takim@nvidia.com>
-Takanori MATSUURA <t.matsuu@gmail.com>
-Tamar Levy <tamar.levy@intel.com>
-Tao Bai <michaelbai@chromium.org>
-Tero Rintaluoma <teror@google.com>
-Thijs Vermeir <thijsvermeir@gmail.com>
-Thomas Daede <tdaede@mozilla.com>
-Thomas Davies <thdavies@cisco.com>
-Thomas <thdavies@cisco.com>
-Tim Kopp <tkopp@google.com>
-Timothy B. Terriberry <tterribe@xiph.org>
-Tom Finegan <tomfinegan@google.com>
-Tristan Matthews <le.businessman@gmail.com>
-Tristan Matthews <tmatth@videolan.org>
-Vignesh Venkatasubramanian <vigneshv@google.com>
-Yaowu Xu <yaowu@google.com>
-Yongzhe Wang <yongzhe@google.com>
-Yunqing Wang <yunqingwang@google.com>
-Zoe Liu <zoeliu@google.com>
diff --git a/third_party/aom/CHANGELOG b/third_party/aom/CHANGELOG
deleted file mode 100644
index d84aa0249..000000000
--- a/third_party/aom/CHANGELOG
+++ /dev/null
@@ -1,5 +0,0 @@
-2018-06-28 v1.0.0
- AOMedia Codec Workgroup Approved version 1.0
-
-2016-04-07 v0.1.0 "AOMedia Codec 1"
- This release is the first Alliance for Open Media codec.
diff --git a/third_party/aom/CMakeLists.txt b/third_party/aom/CMakeLists.txt
deleted file mode 100644
index a58e54f40..000000000
--- a/third_party/aom/CMakeLists.txt
+++ /dev/null
@@ -1,758 +0,0 @@
-#
-# Copyright (c) 2016, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and the
-# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
-# not distributed with this source code in the LICENSE file, you can obtain it
-# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
-# License 1.0 was not distributed with this source code in the PATENTS file, you
-# can obtain it at www.aomedia.org/license/patent.
-#
-cmake_minimum_required(VERSION 3.5)
-
-if(NOT EMSCRIPTEN)
- if(NOT CMAKE_BUILD_TYPE)
- set(CMAKE_BUILD_TYPE "Release"
- CACHE "Build type: Debug, Release, RelWithDebInfo or MinSizeRel" STRING
- FORCE)
- endif()
-endif()
-
-project(AOM C CXX)
-
-set(AOM_ROOT "${CMAKE_CURRENT_SOURCE_DIR}")
-set(AOM_CONFIG_DIR "${CMAKE_CURRENT_BINARY_DIR}")
-set(INCLUDE_INSTALL_DIR "${CMAKE_INSTALL_PREFIX}/include"
- CACHE PATH "Installation path of includes")
-set(LIB_INSTALL_DIR "${CMAKE_INSTALL_PREFIX}/lib"
- CACHE PATH "Installation path of libraries")
-
-if("${AOM_ROOT}" STREQUAL "${AOM_CONFIG_DIR}")
- message(FATAL_ERROR
- "Building from within the aom source tree is not supported.\n"
- "Hint: Run these commands\n" "$ rm -rf CMakeCache.txt CMakeFiles\n"
- "$ mkdir -p ../aom_build\n" "$ cd ../aom_build\n"
- "And re-run CMake from the aom_build directory.")
-endif()
-
-include("${AOM_ROOT}/build/cmake/aom_configure.cmake")
-include("${AOM_ROOT}/aom_dsp/aom_dsp.cmake")
-include("${AOM_ROOT}/aom_mem/aom_mem.cmake")
-include("${AOM_ROOT}/aom_ports/aom_ports.cmake")
-include("${AOM_ROOT}/aom_scale/aom_scale.cmake")
-include("${AOM_ROOT}/aom_util/aom_util.cmake")
-include("${AOM_ROOT}/av1/av1.cmake")
-include("${AOM_ROOT}/test/test.cmake")
-include("${AOM_ROOT}/build/cmake/sanitizers.cmake")
-include("${AOM_ROOT}/build/cmake/util.cmake")
-
-list(APPEND AOM_RTCD_SOURCES
- "${AOM_CONFIG_DIR}/config/aom_dsp_rtcd.h"
- "${AOM_CONFIG_DIR}/config/aom_scale_rtcd.h"
- "${AOM_CONFIG_DIR}/config/av1_rtcd.h"
- "${AOM_ROOT}/aom_dsp/aom_dsp_rtcd_defs.pl"
- "${AOM_ROOT}/aom_dsp/aom_dsp_rtcd.c"
- "${AOM_ROOT}/aom_scale/aom_scale_rtcd.pl"
- "${AOM_ROOT}/aom_scale/aom_scale_rtcd.c"
- "${AOM_ROOT}/av1/common/av1_rtcd_defs.pl"
- "${AOM_ROOT}/av1/common/av1_rtcd.c"
- "${AOM_ROOT}/build/cmake/rtcd.pl")
-
-list(APPEND AOM_LIBWEBM_SOURCES
- "${AOM_ROOT}/third_party/libwebm/common/hdr_util.cc"
- "${AOM_ROOT}/third_party/libwebm/common/hdr_util.h"
- "${AOM_ROOT}/third_party/libwebm/common/webmids.h"
- "${AOM_ROOT}/third_party/libwebm/mkvmuxer/mkvmuxer.cc"
- "${AOM_ROOT}/third_party/libwebm/mkvmuxer/mkvmuxer.h"
- "${AOM_ROOT}/third_party/libwebm/mkvmuxer/mkvmuxertypes.h"
- "${AOM_ROOT}/third_party/libwebm/mkvmuxer/mkvmuxerutil.cc"
- "${AOM_ROOT}/third_party/libwebm/mkvmuxer/mkvmuxerutil.h"
- "${AOM_ROOT}/third_party/libwebm/mkvmuxer/mkvwriter.cc"
- "${AOM_ROOT}/third_party/libwebm/mkvmuxer/mkvwriter.h"
- "${AOM_ROOT}/third_party/libwebm/mkvparser/mkvparser.cc"
- "${AOM_ROOT}/third_party/libwebm/mkvparser/mkvparser.h"
- "${AOM_ROOT}/third_party/libwebm/mkvparser/mkvreader.cc"
- "${AOM_ROOT}/third_party/libwebm/mkvparser/mkvreader.h")
-
-list(APPEND AOM_LIBYUV_SOURCES
- "${AOM_ROOT}/third_party/libyuv/include/libyuv/basic_types.h"
- "${AOM_ROOT}/third_party/libyuv/include/libyuv/convert.h"
- "${AOM_ROOT}/third_party/libyuv/include/libyuv/convert_argb.h"
- "${AOM_ROOT}/third_party/libyuv/include/libyuv/convert_from.h"
- "${AOM_ROOT}/third_party/libyuv/include/libyuv/cpu_id.h"
- "${AOM_ROOT}/third_party/libyuv/include/libyuv/planar_functions.h"
- "${AOM_ROOT}/third_party/libyuv/include/libyuv/rotate.h"
- "${AOM_ROOT}/third_party/libyuv/include/libyuv/row.h"
- "${AOM_ROOT}/third_party/libyuv/include/libyuv/scale.h"
- "${AOM_ROOT}/third_party/libyuv/include/libyuv/scale_row.h"
- "${AOM_ROOT}/third_party/libyuv/source/cpu_id.cc"
- "${AOM_ROOT}/third_party/libyuv/source/planar_functions.cc"
- "${AOM_ROOT}/third_party/libyuv/source/row_any.cc"
- "${AOM_ROOT}/third_party/libyuv/source/row_common.cc"
- "${AOM_ROOT}/third_party/libyuv/source/row_gcc.cc"
- "${AOM_ROOT}/third_party/libyuv/source/row_mips.cc"
- "${AOM_ROOT}/third_party/libyuv/source/row_neon.cc"
- "${AOM_ROOT}/third_party/libyuv/source/row_neon64.cc"
- "${AOM_ROOT}/third_party/libyuv/source/row_win.cc"
- "${AOM_ROOT}/third_party/libyuv/source/scale.cc"
- "${AOM_ROOT}/third_party/libyuv/source/scale_any.cc"
- "${AOM_ROOT}/third_party/libyuv/source/scale_common.cc"
- "${AOM_ROOT}/third_party/libyuv/source/scale_gcc.cc"
- "${AOM_ROOT}/third_party/libyuv/source/scale_mips.cc"
- "${AOM_ROOT}/third_party/libyuv/source/scale_neon.cc"
- "${AOM_ROOT}/third_party/libyuv/source/scale_neon64.cc"
- "${AOM_ROOT}/third_party/libyuv/source/scale_win.cc")
-
-list(APPEND AOM_SOURCES
- "${AOM_CONFIG_DIR}/config/aom_config.c"
- "${AOM_CONFIG_DIR}/config/aom_config.h"
- "${AOM_ROOT}/aom/aom.h"
- "${AOM_ROOT}/aom/aom_codec.h"
- "${AOM_ROOT}/aom/aom_decoder.h"
- "${AOM_ROOT}/aom/aom_encoder.h"
- "${AOM_ROOT}/aom/aom_frame_buffer.h"
- "${AOM_ROOT}/aom/aom_image.h"
- "${AOM_ROOT}/aom/aom_integer.h"
- "${AOM_ROOT}/aom/aomcx.h"
- "${AOM_ROOT}/aom/aomdx.h"
- "${AOM_ROOT}/aom/internal/aom_codec_internal.h"
- "${AOM_ROOT}/aom/src/aom_codec.c"
- "${AOM_ROOT}/aom/src/aom_decoder.c"
- "${AOM_ROOT}/aom/src/aom_encoder.c"
- "${AOM_ROOT}/aom/src/aom_image.c"
- "${AOM_ROOT}/aom/src/aom_integer.c")
-
-list(APPEND AOM_COMMON_APP_UTIL_SOURCES
- "${AOM_ROOT}/common/args.c"
- "${AOM_ROOT}/common/args.h"
- "${AOM_ROOT}/common/av1_config.c"
- "${AOM_ROOT}/common/av1_config.h"
- "${AOM_ROOT}/common/md5_utils.c"
- "${AOM_ROOT}/common/md5_utils.h"
- "${AOM_ROOT}/common/tools_common.c"
- "${AOM_ROOT}/common/tools_common.h"
- "${AOM_ROOT}/common/video_common.h"
- "${AOM_ROOT}/common/rawenc.c"
- "${AOM_ROOT}/common/rawenc.h"
- "${AOM_ROOT}/common/y4menc.c"
- "${AOM_ROOT}/common/y4menc.h")
-
-list(APPEND AOM_DECODER_APP_UTIL_SOURCES "${AOM_ROOT}/common/ivfdec.c"
- "${AOM_ROOT}/common/ivfdec.h" "${AOM_ROOT}/common/obudec.c"
- "${AOM_ROOT}/common/obudec.h" "${AOM_ROOT}/common/video_reader.c"
- "${AOM_ROOT}/common/video_reader.h")
-
-list(APPEND AOM_ENCODER_APP_UTIL_SOURCES
- "${AOM_ROOT}/common/ivfenc.c"
- "${AOM_ROOT}/common/ivfenc.h"
- "${AOM_ROOT}/common/video_writer.c"
- "${AOM_ROOT}/common/video_writer.h"
- "${AOM_ROOT}/common/warnings.c"
- "${AOM_ROOT}/common/warnings.h"
- "${AOM_ROOT}/common/y4minput.c"
- "${AOM_ROOT}/common/y4minput.h"
- "${AOM_ROOT}/examples/encoder_util.h"
- "${AOM_ROOT}/examples/encoder_util.c")
-
-list(APPEND AOM_ENCODER_STATS_SOURCES "${AOM_ROOT}/stats/aomstats.c"
- "${AOM_ROOT}/stats/aomstats.h" "${AOM_ROOT}/stats/rate_hist.c"
- "${AOM_ROOT}/stats/rate_hist.h")
-
-list(APPEND AOM_PKG_CONFIG_SOURCES "${AOM_CONFIG_DIR}/aom.pc")
-
-list(APPEND AOM_VERSION_SOURCES "${AOM_CONFIG_DIR}/config/aom_version.h")
-
-list(APPEND AOM_WEBM_DECODER_SOURCES "${AOM_ROOT}/common/webmdec.cc"
- "${AOM_ROOT}/common/webmdec.h")
-
-list(APPEND AOM_WEBM_ENCODER_SOURCES "${AOM_ROOT}/common/webmenc.cc"
- "${AOM_ROOT}/common/webmenc.h")
-
-include_directories(${AOM_ROOT} ${AOM_CONFIG_DIR} ${AOM_ROOT}/apps
- ${AOM_ROOT}/common ${AOM_ROOT}/examples ${AOM_ROOT}/stats)
-
-# Targets
-add_library(aom_version ${AOM_VERSION_SOURCES})
-add_dummy_source_file_to_target(aom_version c)
-add_custom_command(OUTPUT "${AOM_CONFIG_DIR}/config/aom_version.h"
- COMMAND ${CMAKE_COMMAND} ARGS
- -DAOM_CONFIG_DIR=${AOM_CONFIG_DIR}
- -DAOM_ROOT=${AOM_ROOT}
- -DGIT_EXECUTABLE=${GIT_EXECUTABLE}
- -DPERL_EXECUTABLE=${PERL_EXECUTABLE} -P
- "${AOM_ROOT}/build/cmake/version.cmake"
- COMMENT "Writing aom_version.h" VERBATIM)
-
-add_custom_target(aom_version_check
- COMMAND ${CMAKE_COMMAND} -DAOM_CONFIG_DIR=${AOM_CONFIG_DIR}
- -DAOM_ROOT=${AOM_ROOT}
- -DGIT_EXECUTABLE=${GIT_EXECUTABLE}
- -DPERL_EXECUTABLE=${PERL_EXECUTABLE} -P
- "${AOM_ROOT}/build/cmake/version.cmake"
- COMMENT "Updating version info if necessary." VERBATIM)
-add_dependencies(aom_version aom_version_check)
-
-if(NOT MSVC)
- add_library(aom_pc ${AOM_PKG_CONFIG_SOURCES})
- add_dummy_source_file_to_target(aom_pc c)
- add_custom_command(OUTPUT "${AOM_CONFIG_DIR}/aom.pc"
- COMMAND ${CMAKE_COMMAND} ARGS
- -DAOM_CONFIG_DIR=${AOM_CONFIG_DIR}
- -DAOM_ROOT=${AOM_ROOT}
- -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}
- -DCMAKE_PROJECT_NAME=${CMAKE_PROJECT_NAME}
- -DCONFIG_MULTITHREAD=${CONFIG_MULTITHREAD}
- -DHAVE_PTHREAD_H=${HAVE_PTHREAD_H} -P
- "${AOM_ROOT}/build/cmake/pkg_config.cmake"
- COMMENT "Writing aom.pc" VERBATIM)
- add_dependencies(aom_pc aom_version)
-endif()
-
-# TODO(tomfinegan): Move rtcd target setup where it belongs for each rtcd
-# source.
-add_rtcd_build_step("${AOM_ROOT}/aom_dsp/aom_dsp_rtcd_defs.pl"
- "${AOM_CONFIG_DIR}/config/aom_dsp_rtcd.h"
- "${AOM_ROOT}/aom_dsp/aom_dsp_rtcd.c" "aom_dsp_rtcd")
-add_rtcd_build_step("${AOM_ROOT}/aom_scale/aom_scale_rtcd.pl"
- "${AOM_CONFIG_DIR}/config/aom_scale_rtcd.h"
- "${AOM_ROOT}/aom_scale/aom_scale_rtcd.c" "aom_scale_rtcd")
-add_rtcd_build_step("${AOM_ROOT}/av1/common/av1_rtcd_defs.pl"
- "${AOM_CONFIG_DIR}/config/av1_rtcd.h"
- "${AOM_ROOT}/av1/common/av1_rtcd.c" "av1_rtcd")
-
-add_library(aom_rtcd OBJECT ${AOM_RTCD_SOURCES})
-add_dependencies(aom_rtcd aom_version)
-
-if(ENABLE_EXAMPLES)
- add_library(aom_encoder_stats OBJECT ${AOM_ENCODER_STATS_SOURCES})
- set(AOM_LIB_TARGETS ${AOM_LIB_TARGETS} aom_encoder_stats)
-endif()
-add_library(aom ${AOM_SOURCES} $<TARGET_OBJECTS:aom_rtcd>)
-
-if(NOT MSVC AND NOT APPLE)
- target_link_libraries(aom ${AOM_LIB_LINK_TYPE} m)
-endif()
-
-# List of object and static library targets.
-set(AOM_LIB_TARGETS ${AOM_LIB_TARGETS} aom_rtcd aom_encoder_stats aom_mem
- aom_scale aom)
-
-# Setup dependencies.
-setup_aom_dsp_targets()
-setup_aom_mem_targets()
-setup_aom_ports_targets()
-setup_aom_util_targets()
-setup_aom_scale_targets()
-setup_av1_targets()
-
-# Make all library targets depend on aom_rtcd to make sure it builds first.
-foreach(aom_lib ${AOM_LIB_TARGETS})
- if(NOT "${aom_lib}" STREQUAL "aom_rtcd")
- add_dependencies(${aom_lib} aom_rtcd)
- endif()
-endforeach()
-
-# Generate C/C++ stub files containing the function usage_exit(). Users of the
-# aom_common_app_util library must define this function. This is a convenience
-# to allow omission of the function from applications that might want to use
-# other pieces of the util support without defining usage_exit().
-file(WRITE "${AOM_GEN_SRC_DIR}/usage_exit.c" "void usage_exit(void) {}")
-file(WRITE "${AOM_GEN_SRC_DIR}/usage_exit.cc"
- "extern \"C\" void usage_exit(void) {}")
-
-#
-# Application and application support targets.
-#
-if(ENABLE_EXAMPLES OR ENABLE_TESTS OR ENABLE_TOOLS)
- add_library(aom_common_app_util OBJECT ${AOM_COMMON_APP_UTIL_SOURCES})
- if(CONFIG_AV1_DECODER)
- add_library(aom_decoder_app_util OBJECT ${AOM_DECODER_APP_UTIL_SOURCES})
- # obudec depends on internal headers that require *rtcd.h
- add_dependencies(aom_decoder_app_util aom_rtcd)
- endif()
- if(CONFIG_AV1_ENCODER)
- add_library(aom_encoder_app_util OBJECT ${AOM_ENCODER_APP_UTIL_SOURCES})
- endif()
-endif()
-
-if((CONFIG_AV1_DECODER OR CONFIG_AV1_ENCODER) AND ENABLE_EXAMPLES)
- add_executable(resize_util "${AOM_ROOT}/examples/resize_util.c"
- $<TARGET_OBJECTS:aom_common_app_util>)
- list(APPEND AOM_APP_TARGETS resize_util)
-endif()
-
-if(CONFIG_AV1_DECODER AND ENABLE_EXAMPLES)
- add_executable(aomdec "${AOM_ROOT}/apps/aomdec.c"
- $<TARGET_OBJECTS:aom_common_app_util>
- $<TARGET_OBJECTS:aom_decoder_app_util>)
- add_executable(decode_to_md5 "${AOM_ROOT}/examples/decode_to_md5.c"
- $<TARGET_OBJECTS:aom_common_app_util>
- $<TARGET_OBJECTS:aom_decoder_app_util>)
- add_executable(decode_with_drops "${AOM_ROOT}/examples/decode_with_drops.c"
- $<TARGET_OBJECTS:aom_common_app_util>
- $<TARGET_OBJECTS:aom_decoder_app_util>)
- add_executable(simple_decoder "${AOM_ROOT}/examples/simple_decoder.c"
- $<TARGET_OBJECTS:aom_common_app_util>
- $<TARGET_OBJECTS:aom_decoder_app_util>)
- add_executable(scalable_decoder "${AOM_ROOT}/examples/scalable_decoder.c"
- $<TARGET_OBJECTS:aom_common_app_util>
- $<TARGET_OBJECTS:aom_decoder_app_util>)
-
- if(CONFIG_ANALYZER)
- add_executable(analyzer "${AOM_ROOT}/examples/analyzer.cc"
- $<TARGET_OBJECTS:aom_common_app_util>
- $<TARGET_OBJECTS:aom_decoder_app_util>)
- target_link_libraries(analyzer ${AOM_LIB_LINK_TYPE} ${wxWidgets_LIBRARIES})
- list(APPEND AOM_APP_TARGETS analyzer)
- list(APPEND AOM_DECODER_EXAMPLE_TARGETS analyzer)
- endif()
-
- if(CONFIG_INSPECTION)
- add_executable(inspect "${AOM_ROOT}/examples/inspect.c"
- $<TARGET_OBJECTS:aom_common_app_util>
- $<TARGET_OBJECTS:aom_decoder_app_util>)
- list(APPEND AOM_DECODER_EXAMPLE_TARGETS inspect)
-
- if(EMSCRIPTEN)
- add_preproc_definition(_POSIX_SOURCE)
- append_link_flag_to_target("inspect" "-s TOTAL_MEMORY=402653184")
- append_link_flag_to_target("inspect" "-s MODULARIZE=1")
- append_link_flag_to_target(
- "inspect" "-s \'EXTRA_EXPORTED_RUNTIME_METHODS=[\"UTF8ToString\"]\'")
- append_link_flag_to_target("inspect"
- "-s EXPORT_NAME=\"\'DecoderModule\'\"")
- append_link_flag_to_target("inspect" "--memory-init-file 0")
-
- if("${CMAKE_BUILD_TYPE}" STREQUAL "")
-
- # Default to -O3 when no build type is specified.
- append_compiler_flag("-O3")
- endif()
-
- em_link_post_js(inspect "${AOM_ROOT}/tools/inspect-post.js")
- endif()
- endif()
-
- # Maintain a list of decoder example targets.
- list(APPEND AOM_DECODER_EXAMPLE_TARGETS aomdec decode_to_md5
- decode_with_drops scalable_decoder simple_decoder)
-
- # Add decoder examples to the app targets list.
- list(APPEND AOM_APP_TARGETS ${AOM_DECODER_EXAMPLE_TARGETS})
-endif()
-
-if(CONFIG_AV1_ENCODER)
- if(ENABLE_EXAMPLES)
- add_executable(aomenc "${AOM_ROOT}/apps/aomenc.c"
- $<TARGET_OBJECTS:aom_common_app_util>
- $<TARGET_OBJECTS:aom_encoder_app_util>
- $<TARGET_OBJECTS:aom_encoder_stats>)
- add_executable(lossless_encoder "${AOM_ROOT}/examples/lossless_encoder.c"
- $<TARGET_OBJECTS:aom_common_app_util>
- $<TARGET_OBJECTS:aom_encoder_app_util>)
- add_executable(set_maps "${AOM_ROOT}/examples/set_maps.c"
- $<TARGET_OBJECTS:aom_common_app_util>
- $<TARGET_OBJECTS:aom_encoder_app_util>)
- add_executable(simple_encoder "${AOM_ROOT}/examples/simple_encoder.c"
- $<TARGET_OBJECTS:aom_common_app_util>
- $<TARGET_OBJECTS:aom_encoder_app_util>)
- add_executable(twopass_encoder "${AOM_ROOT}/examples/twopass_encoder.c"
- $<TARGET_OBJECTS:aom_common_app_util>
- $<TARGET_OBJECTS:aom_encoder_app_util>)
- add_executable(noise_model "${AOM_ROOT}/examples/noise_model.c"
- $<TARGET_OBJECTS:aom_common_app_util>
- $<TARGET_OBJECTS:aom_encoder_app_util>)
- add_executable(scalable_encoder "${AOM_ROOT}/examples/scalable_encoder.c"
- $<TARGET_OBJECTS:aom_common_app_util>
- $<TARGET_OBJECTS:aom_encoder_app_util>)
-
- # Maintain a list of encoder example targets.
- list(APPEND AOM_ENCODER_EXAMPLE_TARGETS aomenc lossless_encoder noise_model
- set_maps simple_encoder scalable_encoder twopass_encoder)
- endif()
-
- if(ENABLE_TOOLS)
- if(CONFIG_ENTROPY_STATS AND NOT BUILD_SHARED_LIBS)
-
- # TODO(tomfinegan): Sort out why a simple link command with
- # aom_entropy_optimizer.c won't work on macos, but dragging in all the
- # helper machinery allows the link to succeed.
- add_executable(aom_entropy_optimizer "${AOM_GEN_SRC_DIR}/usage_exit.c"
- "${AOM_ROOT}/tools/aom_entropy_optimizer.c"
- $<TARGET_OBJECTS:aom_common_app_util>
- $<TARGET_OBJECTS:aom_encoder_app_util>)
-
- # Maintain a list of encoder tool targets.
- list(APPEND AOM_ENCODER_TOOL_TARGETS aom_entropy_optimizer)
- endif()
- endif()
-
- # Add encoder examples and tools to the targets list.
- list(APPEND AOM_APP_TARGETS ${AOM_ENCODER_EXAMPLE_TARGETS}
- ${AOM_ENCODER_TOOL_TARGETS})
-endif()
-
-if(ENABLE_EXAMPLES)
-
- # Maintain a separate variable listing only the examples to facilitate
- # installation of example programs into an examples sub directory of
- # $AOM_DIST_DIR/bin when building the dist target.
- list(APPEND AOM_EXAMPLE_TARGETS ${AOM_DECODER_EXAMPLE_TARGETS}
- ${AOM_ENCODER_EXAMPLE_TARGETS})
-endif()
-
-if(ENABLE_TOOLS)
- if(CONFIG_AV1_DECODER)
- require_cxx_flag_nomsvc("-std=c++11" NO)
- add_executable(dump_obu "${AOM_GEN_SRC_DIR}/usage_exit.cc"
- "${AOM_ROOT}/tools/dump_obu.cc"
- "${AOM_ROOT}/tools/obu_parser.cc"
- "${AOM_ROOT}/tools/obu_parser.h"
- $<TARGET_OBJECTS:aom_common_app_util>
- $<TARGET_OBJECTS:aom_decoder_app_util>)
-
- list(APPEND AOM_TOOL_TARGETS dump_obu)
- list(APPEND AOM_APP_TARGETS dump_obu)
-
- if(NOT MSVC)
- target_compile_options(dump_obu PUBLIC -std=c++11)
- endif()
-
- # Maintain a separate variable listing only the examples to facilitate
- # installation of example programs into an tools sub directory of
- # $AOM_DIST_DIR/bin when building the dist target.
- list(APPEND AOM_TOOL_TARGETS ${AOM_DECODER_TOOL_TARGETS}
- ${AOM_ENCODER_TOOL_TARGETS})
- endif()
-endif()
-
-if(ENABLE_EXAMPLES AND CONFIG_AV1_DECODER AND CONFIG_AV1_ENCODER)
- add_executable(aom_cx_set_ref "${AOM_ROOT}/examples/aom_cx_set_ref.c"
- $<TARGET_OBJECTS:aom_common_app_util>
- $<TARGET_OBJECTS:aom_encoder_app_util>)
- list(APPEND AOM_EXAMPLE_TARGETS aom_cx_set_ref)
- list(APPEND AOM_APP_TARGETS aom_cx_set_ref)
-endif()
-
-if(ENABLE_EXAMPLES AND CONFIG_AV1_ENCODER)
- add_executable(lightfield_encoder "${AOM_ROOT}/examples/lightfield_encoder.c"
- $<TARGET_OBJECTS:aom_common_app_util>
- $<TARGET_OBJECTS:aom_encoder_app_util>)
- list(APPEND AOM_EXAMPLE_TARGETS lightfield_encoder)
- list(APPEND AOM_APP_TARGETS lightfield_encoder)
-endif()
-
-if(ENABLE_EXAMPLES AND CONFIG_AV1_DECODER)
- add_executable(lightfield_tile_list_decoder
- "${AOM_ROOT}/examples/lightfield_tile_list_decoder.c"
- $<TARGET_OBJECTS:aom_common_app_util>
- $<TARGET_OBJECTS:aom_decoder_app_util>)
- list(APPEND AOM_EXAMPLE_TARGETS lightfield_tile_list_decoder)
- list(APPEND AOM_APP_TARGETS lightfield_tile_list_decoder)
-endif()
-
-if(ENABLE_EXAMPLES AND CONFIG_AV1_DECODER)
- add_executable(lightfield_decoder "${AOM_ROOT}/examples/lightfield_decoder.c"
- $<TARGET_OBJECTS:aom_common_app_util>
- $<TARGET_OBJECTS:aom_decoder_app_util>)
- list(APPEND AOM_EXAMPLE_TARGETS lightfield_decoder)
- list(APPEND AOM_APP_TARGETS lightfield_decoder)
-endif()
-
-if(ENABLE_EXAMPLES AND CONFIG_AV1_ENCODER AND CONFIG_AV1_DECODER)
- add_executable(lightfield_bitstream_parsing
- "${AOM_ROOT}/examples/lightfield_bitstream_parsing.c"
- $<TARGET_OBJECTS:aom_common_app_util>
- $<TARGET_OBJECTS:aom_encoder_app_util>
- $<TARGET_OBJECTS:aom_decoder_app_util>)
- list(APPEND AOM_EXAMPLE_TARGETS lightfield_bitstream_parsing)
- list(APPEND AOM_APP_TARGETS lightfield_bitstream_parsing)
-endif()
-
-foreach(aom_app ${AOM_APP_TARGETS})
- target_link_libraries(${aom_app} ${AOM_LIB_LINK_TYPE} aom)
-endforeach()
-
-if(ENABLE_EXAMPLES OR ENABLE_TESTS OR ENABLE_TOOLS)
- if(CONFIG_LIBYUV)
- add_library(yuv OBJECT ${AOM_LIBYUV_SOURCES})
- if(NOT MSVC)
- target_compile_options(yuv PRIVATE -Wno-unused-parameter)
- endif()
- include_directories("${AOM_ROOT}/third_party/libyuv/include")
-
- # Add to existing targets.
- foreach(aom_app ${AOM_APP_TARGETS})
- target_sources(${aom_app} PRIVATE $<TARGET_OBJECTS:yuv>)
- set_property(TARGET ${aom_app} PROPERTY LINKER_LANGUAGE CXX)
- endforeach()
- endif()
-
- if(CONFIG_WEBM_IO)
- require_cxx_flag_nomsvc("-std=c++11" NO)
-
- add_library(webm OBJECT ${AOM_LIBWEBM_SOURCES})
- include_directories("${AOM_ROOT}/third_party/libwebm")
- target_compile_definitions(webm PRIVATE __STDC_CONSTANT_MACROS)
- target_compile_definitions(webm PRIVATE __STDC_LIMIT_MACROS)
-
- if(NOT MSVC)
- target_compile_options(webm PRIVATE -Wno-shadow)
- target_compile_options(webm PUBLIC -std=c++11)
- endif()
-
- # Add to existing targets.
- if(CONFIG_AV1_DECODER)
- target_sources(aom_decoder_app_util PRIVATE ${AOM_WEBM_DECODER_SOURCES})
- endif()
-
- if(CONFIG_AV1_ENCODER)
- target_sources(aom_encoder_app_util PRIVATE ${AOM_WEBM_ENCODER_SOURCES})
- endif()
-
- foreach(aom_app ${AOM_APP_TARGETS})
- target_sources(${aom_app} PRIVATE $<TARGET_OBJECTS:webm>)
- set_property(TARGET ${aom_app} PROPERTY LINKER_LANGUAGE CXX)
- endforeach()
- endif()
-endif()
-
-if(ENABLE_TESTS)
-
- # Create test_libaom target and the targets it depends on.
- setup_aom_test_targets()
-endif()
-
-if(HAVE_PTHREAD_H AND CONFIG_MULTITHREAD)
- find_package(Threads)
- target_link_libraries(aom ${AOM_LIB_LINK_TYPE} Threads::Threads)
-endif()
-
-if(XCODE)
-
- # TODO(tomfinegan): Make sure target has no C++ files before doing this as
- # it's not necessary in that case.
- if(CONFIG_LIBYUV OR CONFIG_WEBM_IO)
-
- # The Xcode generator does not obey LINKER_LANGUAGE. Because of the issue
- # what looks like a C++ file needs to be in any target that Xcode will link
- # when the target contains a C++ dependency. Without this Xcode will try to
- # link with the C linker, which always ends badly when a dependency actually
- # includes C++.
-
- # Note: LINKER_LANGUAGE is explicitly set to C++ for all targets touched
- # here, it really is the Xcode generator's fault, or just a deficiency in
- # Xcode itself.
- foreach(aom_app ${AOM_APP_TARGETS})
- add_dummy_source_file_to_target("${aom_app}" "cc")
- endforeach()
- endif()
-endif()
-
-if(ENABLE_EXAMPLES AND "${CMAKE_GENERATOR}" MATCHES "Makefiles$")
-
- # For historical purposes place the example binaries in the example directory.
- file(MAKE_DIRECTORY "${AOM_CONFIG_DIR}/examples")
-
- foreach(target ${AOM_EXAMPLE_TARGETS})
- if(NOT "${target}" MATCHES "aomdec\|aomenc")
- set_target_properties(${target}
- PROPERTIES RUNTIME_OUTPUT_DIRECTORY
- "${AOM_CONFIG_DIR}/examples")
- endif()
- endforeach()
-
- if(ENABLE_TOOLS AND AOM_TOOL_TARGETS)
-
- # The same expectation is true for tool targets.
- file(MAKE_DIRECTORY "${AOM_CONFIG_DIR}/tools")
- set_target_properties(${AOM_TOOL_TARGETS}
- PROPERTIES RUNTIME_OUTPUT_DIRECTORY
- "${AOM_CONFIG_DIR}/tools")
- endif()
-endif()
-
-if(BUILD_SHARED_LIBS)
- include("${AOM_ROOT}/build/cmake/exports.cmake")
- setup_exports_target()
- set_target_properties(aom PROPERTIES SOVERSION 0)
-endif()
-
-# Handle user supplied compile and link flags last to ensure they're obeyed.
-set_user_flags()
-
-# Aomedia documentation rule.
-if(ENABLE_DOCS)
- include(FindDoxygen)
- if(DOXYGEN_FOUND)
- include("${AOM_ROOT}/docs.cmake")
- setup_documentation_targets()
- else()
- message("--- Cannot find doxygen, ENABLE_DOCS turned off.")
- set(ENABLE_DOCS OFF)
- endif()
-endif()
-
-if(NOT (MSVC OR XCODE))
-
- # Aomedia install rule.
- list(APPEND AOM_INSTALL_INCS "${AOM_ROOT}/aom/aom.h"
- "${AOM_ROOT}/aom/aom_codec.h"
- "${AOM_ROOT}/aom/aom_frame_buffer.h"
- "${AOM_ROOT}/aom/aom_image.h" "${AOM_ROOT}/aom/aom_integer.h"
- "${AOM_ROOT}/aom/aom.h")
-
- if(CONFIG_AV1_DECODER)
- if(ENABLE_EXAMPLES)
- list(APPEND AOM_INSTALL_BINS aomdec)
- endif()
-
- list(APPEND AOM_INSTALL_INCS "${AOM_ROOT}/aom/aom_decoder.h"
- "${AOM_ROOT}/aom/aomdx.h")
- endif()
-
- if(CONFIG_AV1_ENCODER)
- if(ENABLE_EXAMPLES)
- list(APPEND AOM_INSTALL_BINS aomenc)
- endif()
-
- list(APPEND AOM_INSTALL_INCS "${AOM_ROOT}/aom/aomcx.h"
- "${AOM_ROOT}/aom/aom_encoder.h")
- endif()
-
- set(AOM_INSTALL_LIBS aom)
-
- install(FILES ${AOM_INSTALL_INCS} DESTINATION "${INCLUDE_INSTALL_DIR}/aom")
- install(FILES "${AOM_CONFIG_DIR}/aom.pc" DESTINATION
- "${LIB_INSTALL_DIR}/pkgconfig")
- install(TARGETS ${AOM_INSTALL_LIBS} DESTINATION "${LIB_INSTALL_DIR}")
-
- if(ENABLE_EXAMPLES)
- install(TARGETS ${AOM_INSTALL_BINS} DESTINATION
- "${CMAKE_INSTALL_PREFIX}/bin")
- endif()
-endif()
-
-# Aomedia dist rule.
-if(CONFIG_AV1_DECODER AND ENABLE_EXAMPLES)
- list(APPEND AOM_DIST_APPS $<TARGET_FILE:aomdec>)
-endif()
-if(CONFIG_AV1_ENCODER AND ENABLE_EXAMPLES)
- list(APPEND AOM_DIST_APPS $<TARGET_FILE:aomenc>)
-endif()
-
-if(ENABLE_EXAMPLES)
- foreach(example ${AOM_EXAMPLE_TARGETS})
- list(APPEND AOM_DIST_EXAMPLES $<TARGET_FILE:${example}>)
- endforeach()
-endif()
-
-if(ENABLE_TOOLS)
- foreach(tool ${AOM_TOOL_TARGETS})
- list(APPEND AOM_DIST_TOOLS $<TARGET_FILE:${tool}>)
- endforeach()
-endif()
-
-if(NOT AOM_DIST_DIR)
- set(AOM_DIST_DIR "${AOM_CONFIG_DIR}/dist")
-endif()
-
-add_custom_target(dist
- COMMAND ${CMAKE_COMMAND} -DAOM_ROOT=${AOM_ROOT}
- -DAOM_CONFIG_DIR=${AOM_CONFIG_DIR}
- -DAOM_DIST_DIR=${AOM_DIST_DIR}
- -DAOM_DIST_APPS="${AOM_DIST_APPS}"
- -DAOM_DIST_EXAMPLES="${AOM_DIST_EXAMPLES}"
- -DAOM_DIST_TOOLS="${AOM_DIST_TOOLS}"
- -DAOM_DIST_INCLUDES="${AOM_INSTALL_INCS}"
- -DAOM_DIST_LIBS=$<TARGET_FILE:aom>
- -DENABLE_DOCS=${ENABLE_DOCS} -P
- "${AOM_ROOT}/build/cmake/dist.cmake"
- DEPENDS ${AOM_INSTALL_BINS} ${AOM_INSTALL_LIBS}
- ${AOM_INSTALL_INCS} ${AOM_EXAMPLE_TARGETS}
- ${AOM_TOOL_TARGETS})
-
-if(ENABLE_DOCS)
- add_dependencies(dist docs)
-endif()
-
-# Collect all variables containing libaom source files.
-get_cmake_property(all_cmake_vars VARIABLES)
-foreach(var ${all_cmake_vars})
- if("${var}" MATCHES "SOURCES$\|_INTRIN_\|_ASM_" AND NOT "${var}" MATCHES
- "_APP_\|DOXYGEN\|LIBWEBM\|LIBYUV\|_PKG_\|TEST")
- list(APPEND aom_source_vars ${var})
- endif()
-endforeach()
-
-# Libaom_srcs.txt generation.
-set(libaom_srcs_txt_file "${AOM_CONFIG_DIR}/libaom_srcs.txt")
-file(WRITE "${libaom_srcs_txt_file}" "# This file is generated. DO NOT EDIT.\n")
-
-# Static source file list first.
-foreach(aom_source_var ${aom_source_vars})
- foreach(file ${${aom_source_var}})
- if(NOT "${file}" MATCHES "${AOM_CONFIG_DIR}")
- string(REPLACE "${AOM_ROOT}/" "" file "${file}")
- file(APPEND "${libaom_srcs_txt_file}" "${file}\n")
- endif()
- endforeach()
-endforeach()
-
-file(APPEND
- "${libaom_srcs_txt_file}"
- "# Files below this line are generated by the libaom build system.\n")
-foreach(aom_source_var ${aom_source_vars})
- foreach(file ${${aom_source_var}})
- if("${file}" MATCHES "${AOM_CONFIG_DIR}")
- string(REPLACE "${AOM_CONFIG_DIR}/" "" file "${file}")
- file(APPEND "${libaom_srcs_txt_file}" "${file}\n")
- endif()
- endforeach()
-endforeach()
-
-# Libaom_srcs.gni generation.
-set(libaom_srcs_gni_file "${AOM_CONFIG_DIR}/libaom_srcs.gni")
-file(WRITE "${libaom_srcs_gni_file}" "# This file is generated. DO NOT EDIT.\n")
-
-foreach(aom_source_var ${aom_source_vars})
- if("${${aom_source_var}}" MATCHES "${AOM_ROOT}")
- string(TOLOWER ${aom_source_var} aom_source_var_lowercase)
- file(APPEND "${libaom_srcs_gni_file}" "\n${aom_source_var_lowercase} = [\n")
- endif()
-
- foreach(file ${${aom_source_var}})
- if(NOT "${file}" MATCHES "${AOM_CONFIG_DIR}")
- string(REPLACE "${AOM_ROOT}" "//third_party/libaom/source/libaom" file
- "${file}")
- file(APPEND "${libaom_srcs_gni_file}" " \"${file}\",\n")
- endif()
- endforeach()
-
- if("${${aom_source_var}}" MATCHES "${AOM_ROOT}")
- file(APPEND "${libaom_srcs_gni_file}" "]\n")
- endif()
-endforeach()
-
-file(APPEND
- "${libaom_srcs_gni_file}"
- "\n# Files below this line are generated by the libaom build system.\n")
-
-foreach(aom_source_var ${aom_source_vars})
- if("${${aom_source_var}}" MATCHES "${AOM_CONFIG_DIR}")
- string(TOLOWER ${aom_source_var} aom_source_var_lowercase)
- file(APPEND "${libaom_srcs_gni_file}"
- "\n${aom_source_var_lowercase}_gen = [\n")
- endif()
- foreach(file ${${aom_source_var}})
- if(NOT "${file}" MATCHES "${AOM_ROOT}")
- string(REPLACE "${AOM_CONFIG_DIR}" "//third_party/libaom/source/libaom"
- file "${file}")
- file(APPEND "${libaom_srcs_gni_file}" " \"${file}\",\n")
- endif()
- endforeach()
-
- if("${${aom_source_var}}" MATCHES "${AOM_CONFIG_DIR}")
- file(APPEND "${libaom_srcs_gni_file}" "]\n")
- endif()
-endforeach()
diff --git a/third_party/aom/LICENSE b/third_party/aom/LICENSE
deleted file mode 100644
index fc340c376..000000000
--- a/third_party/aom/LICENSE
+++ /dev/null
@@ -1,27 +0,0 @@
-Copyright (c) 2016, Alliance for Open Media. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
-
-1. Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in
- the documentation and/or other materials provided with the
- distribution.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
-FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
-COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
-BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
-ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
diff --git a/third_party/aom/PATENTS b/third_party/aom/PATENTS
deleted file mode 100644
index 97842e02f..000000000
--- a/third_party/aom/PATENTS
+++ /dev/null
@@ -1,108 +0,0 @@
-Alliance for Open Media Patent License 1.0
-
-1. License Terms.
-
-1.1. Patent License. Subject to the terms and conditions of this License, each
- Licensor, on behalf of itself and successors in interest and assigns,
- grants Licensee a non-sublicensable, perpetual, worldwide, non-exclusive,
- no-charge, royalty-free, irrevocable (except as expressly stated in this
- License) patent license to its Necessary Claims to make, use, sell, offer
- for sale, import or distribute any Implementation.
-
-1.2. Conditions.
-
-1.2.1. Availability. As a condition to the grant of rights to Licensee to make,
- sell, offer for sale, import or distribute an Implementation under
- Section 1.1, Licensee must make its Necessary Claims available under
- this License, and must reproduce this License with any Implementation
- as follows:
-
- a. For distribution in source code, by including this License in the
- root directory of the source code with its Implementation.
-
- b. For distribution in any other form (including binary, object form,
- and/or hardware description code (e.g., HDL, RTL, Gate Level Netlist,
- GDSII, etc.)), by including this License in the documentation, legal
- notices, and/or other written materials provided with the
- Implementation.
-
-1.2.2. Additional Conditions. This license is directly from Licensor to
- Licensee. Licensee acknowledges as a condition of benefiting from it
- that no rights from Licensor are received from suppliers, distributors,
- or otherwise in connection with this License.
-
-1.3. Defensive Termination. If any Licensee, its Affiliates, or its agents
- initiates patent litigation or files, maintains, or voluntarily
- participates in a lawsuit against another entity or any person asserting
- that any Implementation infringes Necessary Claims, any patent licenses
- granted under this License directly to the Licensee are immediately
- terminated as of the date of the initiation of action unless 1) that suit
- was in response to a corresponding suit regarding an Implementation first
- brought against an initiating entity, or 2) that suit was brought to
- enforce the terms of this License (including intervention in a third-party
- action by a Licensee).
-
-1.4. Disclaimers. The Reference Implementation and Specification are provided
- "AS IS" and without warranty. The entire risk as to implementing or
- otherwise using the Reference Implementation or Specification is assumed
- by the implementer and user. Licensor expressly disclaims any warranties
- (express, implied, or otherwise), including implied warranties of
- merchantability, non-infringement, fitness for a particular purpose, or
- title, related to the material. IN NO EVENT WILL LICENSOR BE LIABLE TO
- ANY OTHER PARTY FOR LOST PROFITS OR ANY FORM OF INDIRECT, SPECIAL,
- INCIDENTAL, OR CONSEQUENTIAL DAMAGES OF ANY CHARACTER FROM ANY CAUSES OF
- ACTION OF ANY KIND WITH RESPECT TO THIS LICENSE, WHETHER BASED ON BREACH
- OF CONTRACT, TORT (INCLUDING NEGLIGENCE), OR OTHERWISE, AND WHETHER OR
- NOT THE OTHER PARTRY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-2. Definitions.
-
-2.1. Affiliate. �Affiliate� means an entity that directly or indirectly
- Controls, is Controlled by, or is under common Control of that party.
-
-2.2. Control. �Control� means direct or indirect control of more than 50% of
- the voting power to elect directors of that corporation, or for any other
- entity, the power to direct management of such entity.
-
-2.3. Decoder. "Decoder" means any decoder that conforms fully with all
- non-optional portions of the Specification.
-
-2.4. Encoder. "Encoder" means any encoder that produces a bitstream that can
- be decoded by a Decoder only to the extent it produces such a bitstream.
-
-2.5. Final Deliverable. �Final Deliverable� means the final version of a
- deliverable approved by the Alliance for Open Media as a Final
- Deliverable.
-
-2.6. Implementation. "Implementation" means any implementation, including the
- Reference Implementation, that is an Encoder and/or a Decoder. An
- Implementation also includes components of an Implementation only to the
- extent they are used as part of an Implementation.
-
-2.7. License. �License� means this license.
-
-2.8. Licensee. �Licensee� means any person or entity who exercises patent
- rights granted under this License.
-
-2.9. Licensor. "Licensor" means (i) any Licensee that makes, sells, offers
- for sale, imports or distributes any Implementation, or (ii) a person
- or entity that has a licensing obligation to the Implementation as a
- result of its membership and/or participation in the Alliance for Open
- Media working group that developed the Specification.
-
-2.10. Necessary Claims. "Necessary Claims" means all claims of patents or
- patent applications, (a) that currently or at any time in the future,
- are owned or controlled by the Licensor, and (b) (i) would be an
- Essential Claim as defined by the W3C Policy as of February 5, 2004
- (https://www.w3.org/Consortium/Patent-Policy-20040205/#def-essential)
- as if the Specification was a W3C Recommendation; or (ii) are infringed
- by the Reference Implementation.
-
-2.11. Reference Implementation. �Reference Implementation� means an Encoder
- and/or Decoder released by the Alliance for Open Media as a Final
- Deliverable.
-
-2.12. Specification. �Specification� means the specification designated by
- the Alliance for Open Media as a Final Deliverable for which this
- License was issued.
-
diff --git a/third_party/aom/README.md b/third_party/aom/README.md
deleted file mode 100644
index cab3f9993..000000000
--- a/third_party/aom/README.md
+++ /dev/null
@@ -1,625 +0,0 @@
-# AV1 Codec Library
-
-## Contents
-1. [Building the lib and applications](#building-the-library-and-applications)
- - [Prerequisites](#prerequisites)
- - [Get the code](#get-the-code)
- - [Basics](#basic-build)
- - [Configuration options](#configuration-options)
- - [Dylib builds](#dylib-builds)
- - [Debugging](#debugging)
- - [Cross compiling](#cross-compiling)
- - [Sanitizer support](#sanitizers)
- - [MSVC builds](#microsoft-visual-studio-builds)
- - [Xcode builds](#xcode-builds)
- - [Emscripten builds](#emscripten-builds)
- - [Extra Build Flags](#extra-build-flags)
-2. [Testing the library](#testing-the-av1-codec)
- - [Basics](#testing-basics)
- - [Unit tests](#1_unit-tests)
- - [Example tests](#2_example-tests)
- - [Encoder tests](#3_encoder-tests)
- - [IDE hosted tests](#ide-hosted-tests)
- - [Downloading test data](#downloading-the-test-data)
- - [Adding a new test data file](#adding-a-new-test-data-file)
- - [Additional test data](#additional-test-data)
- - [Sharded testing](#sharded-testing)
- - [Running tests directly](#1_running-test_libaom-directly)
- - [Running tests via CMake](#2_running-the-tests-via-the-cmake-build)
-3. [Coding style](#coding-style)
-4. [Submitting patches](#submitting-patches)
- - [Login cookie](#login-cookie)
- - [Contributor agreement](#contributor-agreement)
- - [Testing your code](#testing-your-code)
- - [Commit message hook](#commit-message-hook)
- - [Upload your change](#upload-your-change)
- - [Incorporating Reviewer Comments](#incorporating-reviewer-comments)
- - [Submitting your change](#submitting-your-change)
- - [Viewing change status](#viewing-the-status-of-uploaded-changes)
-5. [Support](#support)
-6. [Bug reports](#bug-reports)
-
-## Building the library and applications
-
-### Prerequisites
-
- 1. [CMake](https://cmake.org) version 3.5 or higher.
- 2. [Git](https://git-scm.com/).
- 3. [Perl](https://www.perl.org/).
- 4. For x86 targets, [yasm](http://yasm.tortall.net/), which is preferred, or a
- recent version of [nasm](http://www.nasm.us/).
- 5. Building the documentation requires [doxygen](http://doxygen.org).
- 6. Building the unit tests requires [Python](https://www.python.org/).
- 7. Emscripten builds require the portable
- [EMSDK](https://kripken.github.io/emscripten-site/index.html).
-
-### Get the code
-
-The AV1 library source code is stored in the Alliance for Open Media Git
-repository:
-
-~~~
- $ git clone https://aomedia.googlesource.com/aom
- # By default, the above command stores the source in the aom directory:
- $ cd aom
-~~~
-
-### Basic build
-
-CMake replaces the configure step typical of many projects. Running CMake will
-produce configuration and build files for the currently selected CMake
-generator. For most systems the default generator is Unix Makefiles. The basic
-form of a makefile build is the following:
-
-~~~
- $ cmake path/to/aom
- $ make
-~~~
-
-The above will generate a makefile build that produces the AV1 library and
-applications for the current host system after the make step completes
-successfully. The compiler chosen varies by host platform, but a general rule
-applies: On systems where cc and c++ are present in $PATH at the time CMake is
-run the generated build will use cc and c++ by default.
-
-### Configuration options
-
-The AV1 codec library has a great many configuration options. These come in two
-varieties:
-
- 1. Build system configuration options. These have the form `ENABLE_FEATURE`.
- 2. AV1 codec configuration options. These have the form `CONFIG_FEATURE`.
-
-Both types of options are set at the time CMake is run. The following example
-enables ccache and disables the AV1 encoder:
-
-~~~
- $ cmake path/to/aom -DENABLE_CCACHE=1 -DCONFIG_AV1_ENCODER=0
- $ make
-~~~
-
-The available configuration options are too numerous to list here. Build system
-configuration options can be found at the top of the CMakeLists.txt file found
-in the root of the AV1 repository, and AV1 codec configuration options can
-currently be found in the file `build/cmake/aom_config_defaults.cmake`.
-
-### Dylib builds
-
-A dylib (shared object) build of the AV1 codec library can be enabled via the
-CMake built in variable `BUILD_SHARED_LIBS`:
-
-~~~
- $ cmake path/to/aom -DBUILD_SHARED_LIBS=1
- $ make
-~~~
-
-This is currently only supported on non-Windows targets.
-
-### Debugging
-
-Depending on the generator used there are multiple ways of going about
-debugging AV1 components. For single configuration generators like the Unix
-Makefiles generator, setting `CMAKE_BUILD_TYPE` to Debug is sufficient:
-
-~~~
- $ cmake path/to/aom -DCMAKE_BUILD_TYPE=Debug
-~~~
-
-For Xcode, mainly because configuration controls for Xcode builds are buried two
-configuration windows deep and must be set for each subproject within the Xcode
-IDE individually, `CMAKE_CONFIGURATION_TYPES` should be set to Debug:
-
-~~~
- $ cmake path/to/aom -G Xcode -DCMAKE_CONFIGURATION_TYPES=Debug
-~~~
-
-For Visual Studio the in-IDE configuration controls should be used. Simply set
-the IDE project configuration to Debug to allow for stepping through the code.
-
-In addition to the above it can sometimes be useful to debug only C and C++
-code. To disable all assembly code and intrinsics set `AOM_TARGET_CPU` to
-generic at generation time:
-
-~~~
- $ cmake path/to/aom -DAOM_TARGET_CPU=generic
-~~~
-
-### Cross compiling
-
-For the purposes of building the AV1 codec and applications and relative to the
-scope of this guide, all builds for architectures differing from the native host
-architecture will be considered cross compiles. The AV1 CMake build handles
-cross compiling via the use of toolchain files included in the AV1 repository.
-The toolchain files available at the time of this writing are:
-
- - arm64-ios.cmake
- - arm64-linux-gcc.cmake
- - arm64-mingw-gcc.cmake
- - armv7-ios.cmake
- - armv7-linux-gcc.cmake
- - armv7-mingw-gcc.cmake
- - armv7s-ios.cmake
- - mips32-linux-gcc.cmake
- - mips64-linux-gcc.cmake
- - x86-ios-simulator.cmake
- - x86-linux.cmake
- - x86-macos.cmake
- - x86-mingw-gcc.cmake
- - x86\_64-ios-simulator.cmake
- - x86\_64-mingw-gcc.cmake
-
-The following example demonstrates use of the x86-macos.cmake toolchain file on
-a x86\_64 MacOS host:
-
-~~~
- $ cmake path/to/aom \
- -DCMAKE_TOOLCHAIN_FILE=path/to/aom/build/cmake/toolchains/x86-macos.cmake
- $ make
-~~~
-
-To build for an unlisted target creation of a new toolchain file is the best
-solution. The existing toolchain files can be used a starting point for a new
-toolchain file since each one exposes the basic requirements for toolchain files
-as used in the AV1 codec build.
-
-As a temporary work around an unoptimized AV1 configuration that builds only C
-and C++ sources can be produced using the following commands:
-
-~~~
- $ cmake path/to/aom -DAOM_TARGET_CPU=generic
- $ make
-~~~
-
-In addition to the above it's important to note that the toolchain files
-suffixed with gcc behave differently than the others. These toolchain files
-attempt to obey the $CROSS environment variable.
-
-### Sanitizers
-
-Sanitizer integration is built-in to the CMake build system. To enable a
-sanitizer, add `-DSANITIZE=<type>` to the CMake command line. For example, to
-enable address sanitizer:
-
-~~~
- $ cmake path/to/aom -DSANITIZE=address
- $ make
-~~~
-
-Sanitizers available vary by platform, target, and compiler. Consult your
-compiler documentation to determine which, if any, are available.
-
-### Microsoft Visual Studio builds
-
-Building the AV1 codec library in Microsoft Visual Studio is supported. The
-following example demonstrates generating projects and a solution for the
-Microsoft IDE:
-
-~~~
- # This does not require a bash shell; command.exe is fine.
- $ cmake path/to/aom -G "Visual Studio 15 2017"
-~~~
-
-### Xcode builds
-
-Building the AV1 codec library in Xcode is supported. The following example
-demonstrates generating an Xcode project:
-
-~~~
- $ cmake path/to/aom -G Xcode
-~~~
-
-### Emscripten builds
-
-Building the AV1 codec library with Emscripten is supported. Typically this is
-used to hook into the AOMAnalyzer GUI application. These instructions focus on
-using the inspector with AOMAnalyzer, but all tools can be built with
-Emscripten.
-
-It is assumed here that you have already downloaded and installed the EMSDK,
-installed and activated at least one toolchain, and setup your environment
-appropriately using the emsdk\_env script.
-
-1. Download [AOMAnalyzer](https://people.xiph.org/~mbebenita/analyzer/).
-
-2. Configure the build:
-
-~~~
- $ cmake path/to/aom \
- -DENABLE_CCACHE=1 \
- -DAOM_TARGET_CPU=generic \
- -DENABLE_DOCS=0 \
- -DENABLE_TESTS=0 \
- -DCONFIG_ACCOUNTING=1 \
- -DCONFIG_INSPECTION=1 \
- -DCONFIG_MULTITHREAD=0 \
- -DCONFIG_RUNTIME_CPU_DETECT=0 \
- -DCONFIG_WEBM_IO=0 \
- -DCMAKE_TOOLCHAIN_FILE=path/to/emsdk-portable/.../Emscripten.cmake
-~~~
-
-3. Build it: run make if that's your generator of choice:
-
-~~~
- $ make inspect
-~~~
-
-4. Run the analyzer:
-
-~~~
- # inspect.js is in the examples sub directory of the directory in which you
- # executed cmake.
- $ path/to/AOMAnalyzer path/to/examples/inspect.js path/to/av1/input/file
-~~~
-
-### Extra build flags
-
-Three variables allow for passing of additional flags to the build system.
-
-- AOM\_EXTRA\_C\_FLAGS
-- AOM\_EXTRA\_CXX\_FLAGS
-- AOM\_EXTRA\_EXE\_LINKER\_FLAGS
-
-The build system attempts to ensure the flags passed through the above variables
-are passed to tools last in order to allow for override of default behavior.
-These flags can be used, for example, to enable asserts in a release build:
-
-~~~
- $ cmake path/to/aom \
- -DCMAKE_BUILD_TYPE=Release \
- -DAOM_EXTRA_C_FLAGS=-UNDEBUG \
- -DAOM_EXTRA_CXX_FLAGS=-UNDEBUG
-~~~
-
-## Testing the AV1 codec
-
-### Testing basics
-
-There are several methods of testing the AV1 codec. All of these methods require
-the presence of the AV1 source code and a working build of the AV1 library and
-applications.
-
-#### 1. Unit tests:
-
-The unit tests can be run at build time:
-
-~~~
- # Before running the make command the LIBAOM_TEST_DATA_PATH environment
- # variable should be set to avoid downloading the test files to the
- # cmake build configuration directory.
- $ cmake path/to/aom
- # Note: The AV1 CMake build creates many test targets. Running make
- # with multiple jobs will speed up the test run significantly.
- $ make runtests
-~~~
-
-#### 2. Example tests:
-
-The example tests require a bash shell and can be run in the following manner:
-
-~~~
- # See the note above about LIBAOM_TEST_DATA_PATH above.
- $ cmake path/to/aom
- $ make
- # It's best to build the testdata target using many make jobs.
- # Running it like this will verify and download (if necessary)
- # one at a time, which takes a while.
- $ make testdata
- $ path/to/aom/test/examples.sh --bin-path examples
-~~~
-
-#### 3. Encoder tests:
-
-When making a change to the encoder run encoder tests to confirm that your
-change has a positive or negligible impact on encode quality. When running these
-tests the build configuration should be changed to enable internal encoder
-statistics:
-
-~~~
- $ cmake path/to/aom -DCONFIG_INTERNAL_STATS=1
- $ make
-~~~
-
-The repository contains scripts intended to make running these tests as simple
-as possible. The following example demonstrates creating a set of baseline clips
-for comparison to results produced after making your change to libaom:
-
-~~~
- # This will encode all Y4M files in the current directory using the
- # settings specified to create the encoder baseline statistical data:
- $ cd path/to/test/inputs
- # This command line assumes that run_encodes.sh, its helper script
- # best_encode.sh, and the aomenc you intend to test are all within a
- # directory in your PATH.
- $ run_encodes.sh 200 500 50 baseline
-~~~
-
-After making your change and creating the baseline clips, you'll need to run
-encodes that include your change(s) to confirm that things are working as
-intended:
-
-~~~
- # This will encode all Y4M files in the current directory using the
- # settings specified to create the statistical data for your change:
- $ cd path/to/test/inputs
- # This command line assumes that run_encodes.sh, its helper script
- # best_encode.sh, and the aomenc you intend to test are all within a
- # directory in your PATH.
- $ run_encodes.sh 200 500 50 mytweak
-~~~
-
-After creating both data sets you can use `test/visual_metrics.py` to generate a
-report that can be viewed in a web browser:
-
-~~~
- $ visual_metrics.py metrics_template.html "*stt" baseline mytweak \
- > mytweak.html
-~~~
-
-You can view the report by opening mytweak.html in a web browser.
-
-
-### IDE hosted tests
-
-By default the generated projects files created by CMake will not include the
-runtests and testdata rules when generating for IDEs like Microsoft Visual
-Studio and Xcode. This is done to avoid intolerably long build cycles in the
-IDEs-- IDE behavior is to build all targets when selecting the build project
-options in MSVS and Xcode. To enable the test rules in IDEs the
-`ENABLE_IDE_TEST_HOSTING` variable must be enabled at CMake generation time:
-
-~~~
- # This example uses Xcode. To get a list of the generators
- # available, run cmake with the -G argument missing its
- # value.
- $ cmake path/to/aom -DENABLE_IDE_TEST_HOSTING=1 -G Xcode
-~~~
-
-### Downloading the test data
-
-The fastest and easiest way to obtain the test data is to use CMake to generate
-a build using the Unix Makefiles generator, and then to build only the testdata
-rule:
-
-~~~
- $ cmake path/to/aom -G "Unix Makefiles"
- # 28 is used because there are 28 test files as of this writing.
- $ make -j28 testdata
-~~~
-
-The above make command will only download and verify the test data.
-
-### Adding a new test data file
-
-First, add the new test data file to the `aom-test-data` bucket of the
-`aomedia-testing` project on Google Cloud Platform. You may need to ask someone
-with the necessary access permissions to do this for you.
-
-NOTE: When a new test data file is added to the `aom-test-data` bucket, its
-"Public access" is initially "Not public". We need to change its
-"Public access" to "Public" by using the following
-[`gsutil`](https://cloud.google.com/storage/docs/gsutil_install) command:
-~~~
- $ gsutil acl ch -g all:R gs://aom-test-data/test-data-file-name
-~~~
-This command grants the `AllUsers` group READ access to the file named
-"test-data-file-name" in the `aom-test-data` bucket.
-
-Once the new test data file has been added to `aom-test-data`, create a CL to
-add the name of the new test data file to `test/test_data_util.cmake` and add
-the SHA1 checksum of the new test data file to `test/test-data.sha1`. (The SHA1
-checksum of a file can be calculated by running the `sha1sum` command on the
-file.)
-
-### Additional test data
-
-The test data mentioned above is strictly intended for unit testing.
-
-Additional input data for testing the encoder can be obtained from:
-https://media.xiph.org/video/derf/
-
-### Sharded testing
-
-The AV1 codec library unit tests are built upon gtest which supports sharding of
-test jobs. Sharded test runs can be achieved in a couple of ways.
-
-#### 1. Running test\_libaom directly:
-
-~~~
- # Set the environment variable GTEST_TOTAL_SHARDS to control the number of
- # shards.
- $ export GTEST_TOTAL_SHARDS=10
- # (GTEST shard indexing is 0 based).
- $ seq 0 $(( $GTEST_TOTAL_SHARDS - 1 )) \
- | xargs -n 1 -P 0 -I{} env GTEST_SHARD_INDEX={} ./test_libaom
-~~~
-
-To create a test shard for each CPU core available on the current system set
-`GTEST_TOTAL_SHARDS` to the number of CPU cores on your system minus one.
-
-#### 2. Running the tests via the CMake build:
-
-~~~
- # For IDE based builds, ENABLE_IDE_TEST_HOSTING must be enabled. See
- # the IDE hosted tests section above for more information. If the IDE
- # supports building targets concurrently tests will be sharded by default.
-
- # For make and ninja builds the -j parameter controls the number of shards
- # at test run time. This example will run the tests using 10 shards via
- # make.
- $ make -j10 runtests
-~~~
-
-The maximum number of test targets that can run concurrently is determined by
-the number of CPUs on the system where the build is configured as detected by
-CMake. A system with 24 cores can run 24 test shards using a value of 24 with
-the `-j` parameter. When CMake is unable to detect the number of cores 10 shards
-is the default maximum value.
-
-## Coding style
-
-We are using the Google C Coding Style defined by the
-[Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html).
-
-The coding style used by this project is enforced with clang-format using the
-configuration contained in the
-[.clang-format](https://chromium.googlesource.com/webm/aom/+/master/.clang-format)
-file in the root of the repository.
-
-You can download clang-format using your system's package manager, or directly
-from [llvm.org](http://llvm.org/releases/download.html). You can also view the
-[documentation](https://clang.llvm.org/docs/ClangFormat.html) on llvm.org.
-Output from clang-format varies by clang-format version, for best results your
-version should match the one used on Jenkins. You can find the clang-format
-version by reading the comment in the `.clang-format` file linked above.
-
-Before pushing changes for review you can format your code with:
-
-~~~
- # Apply clang-format to modified .c, .h and .cc files
- $ clang-format -i --style=file \
- $(git diff --name-only --diff-filter=ACMR '*.[hc]' '*.cc')
-~~~
-
-Check the .clang-format file for the version used to generate it if there is any
-difference between your local formatting and the review system.
-
-Some Git installations have clang-format integration. Here are some examples:
-
-~~~
- # Apply clang-format to all staged changes:
- $ git clang-format
-
- # Clang format all staged and unstaged changes:
- $ git clang-format -f
-
- # Clang format all staged and unstaged changes interactively:
- $ git clang-format -f -p
-~~~
-
-## Submitting patches
-
-We manage the submission of patches using the
-[Gerrit](https://www.gerritcodereview.com/) code review tool. This tool
-implements a workflow on top of the Git version control system to ensure that
-all changes get peer reviewed and tested prior to their distribution.
-
-### Login cookie
-
-Browse to [AOMedia Git index](https://aomedia.googlesource.com/) and login with
-your account (Gmail credentials, for example). Next, follow the
-`Generate Password` Password link at the top of the page. You’ll be given
-instructions for creating a cookie to use with our Git repos.
-
-### Contributor agreement
-
-You will be required to execute a
-[contributor agreement](http://aomedia.org/license) to ensure that the AOMedia
-Project has the right to distribute your changes.
-
-### Testing your code
-
-The testing basics are covered in the [testing section](#testing-the-av1-codec)
-above.
-
-In addition to the local tests, many more (e.g. asan, tsan, valgrind) will run
-through Jenkins instances upon upload to gerrit.
-
-### Commit message hook
-
-Gerrit requires that each submission include a unique Change-Id. You can assign
-one manually using git commit --amend, but it’s easier to automate it with the
-commit-msg hook provided by Gerrit.
-
-Copy commit-msg to the `.git/hooks` directory of your local repo. Here's an
-example:
-
-~~~
- $ curl -Lo aom/.git/hooks/commit-msg https://chromium-review.googlesource.com/tools/hooks/commit-msg
-
- # Next, ensure that the downloaded commit-msg script is executable:
- $ chmod u+x aom/.git/hooks/commit-msg
-~~~
-
-See the Gerrit
-[documentation](https://gerrit-review.googlesource.com/Documentation/user-changeid.html)
-for more information.
-
-### Upload your change
-
-The command line to upload your patch looks like this:
-
-~~~
- $ git push https://aomedia-review.googlesource.com/aom HEAD:refs/for/master
-~~~
-
-### Incorporating reviewer comments
-
-If you previously uploaded a change to Gerrit and the Approver has asked for
-changes, follow these steps:
-
-1. Edit the files to make the changes the reviewer has requested.
-2. Recommit your edits using the --amend flag, for example:
-
-~~~
- $ git commit -a --amend
-~~~
-
-3. Use the same git push command as above to upload to Gerrit again for another
- review cycle.
-
-In general, you should not rebase your changes when doing updates in response to
-review. Doing so can make it harder to follow the evolution of your change in
-the diff view.
-
-### Submitting your change
-
-Once your change has been Approved and Verified, you can “submit” it through the
-Gerrit UI. This will usually automatically rebase your change onto the branch
-specified.
-
-Sometimes this can’t be done automatically. If you run into this problem, you
-must rebase your changes manually:
-
-~~~
- $ git fetch
- $ git rebase origin/branchname
-~~~
-
-If there are any conflicts, resolve them as you normally would with Git. When
-you’re done, reupload your change.
-
-### Viewing the status of uploaded changes
-
-To check the status of a change that you uploaded, open
-[Gerrit](https://aomedia-review.googlesource.com/), sign in, and click My >
-Changes.
-
-## Support
-
-This library is an open source project supported by its community. Please
-please email aomediacodec@jointdevelopment.kavi.com for help.
-
-## Bug reports
-
-Bug reports can be filed in the Alliance for Open Media
-[issue tracker](https://bugs.chromium.org/p/aomedia/issues/list).
diff --git a/third_party/aom/aom/aom.h b/third_party/aom/aom/aom.h
deleted file mode 100644
index b1cc1ecce..000000000
--- a/third_party/aom/aom/aom.h
+++ /dev/null
@@ -1,147 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-/*!\defgroup aom AOM
- * \ingroup codecs
- * AOM is aom's newest video compression algorithm that uses motion
- * compensated prediction, Discrete Cosine Transform (DCT) coding of the
- * prediction error signal and context dependent entropy coding techniques
- * based on arithmetic principles. It features:
- * - YUV 4:2:0 image format
- * - Macro-block based coding (16x16 luma plus two 8x8 chroma)
- * - 1/4 (1/8) pixel accuracy motion compensated prediction
- * - 4x4 DCT transform
- * - 128 level linear quantizer
- * - In loop deblocking filter
- * - Context-based entropy coding
- *
- * @{
- */
-/*!\file
- * \brief Provides controls common to both the AOM encoder and decoder.
- */
-#ifndef AOM_AOM_AOM_H_
-#define AOM_AOM_AOM_H_
-
-#include "aom/aom_codec.h"
-#include "aom/aom_image.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*!\brief Control functions
- *
- * The set of macros define the control functions of AOM interface
- */
-enum aom_com_control_id {
- /*!\brief pass in an external frame into decoder to be used as reference frame
- */
- AOM_SET_POSTPROC = 3, /**< set the decoder's post processing settings */
- AOM_SET_DBG_COLOR_REF_FRAME =
- 4, /**< set the reference frames to color for each macroblock */
- AOM_SET_DBG_COLOR_MB_MODES = 5, /**< set which macro block modes to color */
- AOM_SET_DBG_COLOR_B_MODES = 6, /**< set which blocks modes to color */
- AOM_SET_DBG_DISPLAY_MV = 7, /**< set which motion vector modes to draw */
-
- /* TODO(jkoleszar): The encoder incorrectly reuses some of these values (5+)
- * for its control ids. These should be migrated to something like the
- * AOM_DECODER_CTRL_ID_START range next time we're ready to break the ABI.
- */
- AV1_GET_REFERENCE = 128, /**< get a pointer to a reference frame */
- AV1_SET_REFERENCE = 129, /**< write a frame into a reference buffer */
- AV1_COPY_REFERENCE =
- 130, /**< get a copy of reference frame from the decoder */
- AOM_COMMON_CTRL_ID_MAX,
-
- AV1_GET_NEW_FRAME_IMAGE = 192, /**< get a pointer to the new frame */
- AV1_COPY_NEW_FRAME_IMAGE =
- 193, /**< copy the new frame to an external buffer */
-
- AOM_DECODER_CTRL_ID_START = 256
-};
-
-/*!\brief post process flags
- *
- * The set of macros define AOM decoder post processing flags
- */
-enum aom_postproc_level {
- AOM_NOFILTERING = 0,
- AOM_DEBLOCK = 1 << 0,
- AOM_DEMACROBLOCK = 1 << 1,
- AOM_ADDNOISE = 1 << 2,
- AOM_DEBUG_TXT_FRAME_INFO = 1 << 3, /**< print frame information */
- AOM_DEBUG_TXT_MBLK_MODES =
- 1 << 4, /**< print macro block modes over each macro block */
- AOM_DEBUG_TXT_DC_DIFF = 1 << 5, /**< print dc diff for each macro block */
- AOM_DEBUG_TXT_RATE_INFO = 1 << 6, /**< print video rate info (encoder only) */
- AOM_MFQE = 1 << 10
-};
-
-/*!\brief post process flags
- *
- * This define a structure that describe the post processing settings. For
- * the best objective measure (using the PSNR metric) set post_proc_flag
- * to AOM_DEBLOCK and deblocking_level to 1.
- */
-
-typedef struct aom_postproc_cfg {
- /*!\brief the types of post processing to be done, should be combination of
- * "aom_postproc_level" */
- int post_proc_flag;
- int deblocking_level; /**< the strength of deblocking, valid range [0, 16] */
- int noise_level; /**< the strength of additive noise, valid range [0, 16] */
-} aom_postproc_cfg_t;
-
-/*!\brief AV1 specific reference frame data struct
- *
- * Define the data struct to access av1 reference frames.
- */
-typedef struct av1_ref_frame {
- int idx; /**< frame index to get (input) */
- int use_external_ref; /**< Directly use external ref buffer(decoder only) */
- aom_image_t img; /**< img structure to populate (output) */
-} av1_ref_frame_t;
-
-/*!\cond */
-/*!\brief aom decoder control function parameter type
- *
- * defines the data type for each of AOM decoder control function requires
- */
-AOM_CTRL_USE_TYPE(AOM_SET_POSTPROC, aom_postproc_cfg_t *)
-#define AOM_CTRL_AOM_SET_POSTPROC
-AOM_CTRL_USE_TYPE(AOM_SET_DBG_COLOR_REF_FRAME, int)
-#define AOM_CTRL_AOM_SET_DBG_COLOR_REF_FRAME
-AOM_CTRL_USE_TYPE(AOM_SET_DBG_COLOR_MB_MODES, int)
-#define AOM_CTRL_AOM_SET_DBG_COLOR_MB_MODES
-AOM_CTRL_USE_TYPE(AOM_SET_DBG_COLOR_B_MODES, int)
-#define AOM_CTRL_AOM_SET_DBG_COLOR_B_MODES
-AOM_CTRL_USE_TYPE(AOM_SET_DBG_DISPLAY_MV, int)
-#define AOM_CTRL_AOM_SET_DBG_DISPLAY_MV
-AOM_CTRL_USE_TYPE(AV1_GET_REFERENCE, av1_ref_frame_t *)
-#define AOM_CTRL_AV1_GET_REFERENCE
-AOM_CTRL_USE_TYPE(AV1_SET_REFERENCE, av1_ref_frame_t *)
-#define AOM_CTRL_AV1_SET_REFERENCE
-AOM_CTRL_USE_TYPE(AV1_COPY_REFERENCE, av1_ref_frame_t *)
-#define AOM_CTRL_AV1_COPY_REFERENCE
-AOM_CTRL_USE_TYPE(AV1_GET_NEW_FRAME_IMAGE, aom_image_t *)
-#define AOM_CTRL_AV1_GET_NEW_FRAME_IMAGE
-AOM_CTRL_USE_TYPE(AV1_COPY_NEW_FRAME_IMAGE, aom_image_t *)
-#define AOM_CTRL_AV1_COPY_NEW_FRAME_IMAGE
-
-/*!\endcond */
-/*! @} - end defgroup aom */
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AOM_AOM_H_
diff --git a/third_party/aom/aom/aom_codec.h b/third_party/aom/aom/aom_codec.h
deleted file mode 100644
index fc0df5b9e..000000000
--- a/third_party/aom/aom/aom_codec.h
+++ /dev/null
@@ -1,523 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-/*!\defgroup codec Common Algorithm Interface
- * This abstraction allows applications to easily support multiple video
- * formats with minimal code duplication. This section describes the interface
- * common to all codecs (both encoders and decoders).
- * @{
- */
-
-/*!\file
- * \brief Describes the codec algorithm interface to applications.
- *
- * This file describes the interface between an application and a
- * video codec algorithm.
- *
- * An application instantiates a specific codec instance by using
- * aom_codec_init() and a pointer to the algorithm's interface structure:
- * <pre>
- * my_app.c:
- * extern aom_codec_iface_t my_codec;
- * {
- * aom_codec_ctx_t algo;
- * res = aom_codec_init(&algo, &my_codec);
- * }
- * </pre>
- *
- * Once initialized, the instance is managed using other functions from
- * the aom_codec_* family.
- */
-#ifndef AOM_AOM_AOM_CODEC_H_
-#define AOM_AOM_AOM_CODEC_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "aom/aom_image.h"
-#include "aom/aom_integer.h"
-
-/*!\brief Decorator indicating a function is deprecated */
-#ifndef AOM_DEPRECATED
-#if defined(__GNUC__) && __GNUC__
-#define AOM_DEPRECATED __attribute__((deprecated))
-#elif defined(_MSC_VER)
-#define AOM_DEPRECATED
-#else
-#define AOM_DEPRECATED
-#endif
-#endif /* AOM_DEPRECATED */
-
-#ifndef AOM_DECLSPEC_DEPRECATED
-#if defined(__GNUC__) && __GNUC__
-#define AOM_DECLSPEC_DEPRECATED /**< \copydoc #AOM_DEPRECATED */
-#elif defined(_MSC_VER)
-/*!\brief \copydoc #AOM_DEPRECATED */
-#define AOM_DECLSPEC_DEPRECATED __declspec(deprecated)
-#else
-#define AOM_DECLSPEC_DEPRECATED /**< \copydoc #AOM_DEPRECATED */
-#endif
-#endif /* AOM_DECLSPEC_DEPRECATED */
-
-/*!\brief Decorator indicating a function is potentially unused */
-#ifdef AOM_UNUSED
-#elif defined(__GNUC__) || defined(__clang__)
-#define AOM_UNUSED __attribute__((unused))
-#else
-#define AOM_UNUSED
-#endif
-
-/*!\brief Decorator indicating that given struct/union/enum is packed */
-#ifndef ATTRIBUTE_PACKED
-#if defined(__GNUC__) && __GNUC__
-#define ATTRIBUTE_PACKED __attribute__((packed))
-#elif defined(_MSC_VER)
-#define ATTRIBUTE_PACKED
-#else
-#define ATTRIBUTE_PACKED
-#endif
-#endif /* ATTRIBUTE_PACKED */
-
-/*!\brief Current ABI version number
- *
- * \internal
- * If this file is altered in any way that changes the ABI, this value
- * must be bumped. Examples include, but are not limited to, changing
- * types, removing or reassigning enums, adding/removing/rearranging
- * fields to structures
- */
-#define AOM_CODEC_ABI_VERSION (3 + AOM_IMAGE_ABI_VERSION) /**<\hideinitializer*/
-
-/*!\brief Algorithm return codes */
-typedef enum {
- /*!\brief Operation completed without error */
- AOM_CODEC_OK,
-
- /*!\brief Unspecified error */
- AOM_CODEC_ERROR,
-
- /*!\brief Memory operation failed */
- AOM_CODEC_MEM_ERROR,
-
- /*!\brief ABI version mismatch */
- AOM_CODEC_ABI_MISMATCH,
-
- /*!\brief Algorithm does not have required capability */
- AOM_CODEC_INCAPABLE,
-
- /*!\brief The given bitstream is not supported.
- *
- * The bitstream was unable to be parsed at the highest level. The decoder
- * is unable to proceed. This error \ref SHOULD be treated as fatal to the
- * stream. */
- AOM_CODEC_UNSUP_BITSTREAM,
-
- /*!\brief Encoded bitstream uses an unsupported feature
- *
- * The decoder does not implement a feature required by the encoder. This
- * return code should only be used for features that prevent future
- * pictures from being properly decoded. This error \ref MAY be treated as
- * fatal to the stream or \ref MAY be treated as fatal to the current GOP.
- */
- AOM_CODEC_UNSUP_FEATURE,
-
- /*!\brief The coded data for this stream is corrupt or incomplete
- *
- * There was a problem decoding the current frame. This return code
- * should only be used for failures that prevent future pictures from
- * being properly decoded. This error \ref MAY be treated as fatal to the
- * stream or \ref MAY be treated as fatal to the current GOP. If decoding
- * is continued for the current GOP, artifacts may be present.
- */
- AOM_CODEC_CORRUPT_FRAME,
-
- /*!\brief An application-supplied parameter is not valid.
- *
- */
- AOM_CODEC_INVALID_PARAM,
-
- /*!\brief An iterator reached the end of list.
- *
- */
- AOM_CODEC_LIST_END
-
-} aom_codec_err_t;
-
-/*! \brief Codec capabilities bitfield
- *
- * Each codec advertises the capabilities it supports as part of its
- * ::aom_codec_iface_t interface structure. Capabilities are extra interfaces
- * or functionality, and are not required to be supported.
- *
- * The available flags are specified by AOM_CODEC_CAP_* defines.
- */
-typedef long aom_codec_caps_t;
-#define AOM_CODEC_CAP_DECODER 0x1 /**< Is a decoder */
-#define AOM_CODEC_CAP_ENCODER 0x2 /**< Is an encoder */
-
-/*! \brief Initialization-time Feature Enabling
- *
- * Certain codec features must be known at initialization time, to allow for
- * proper memory allocation.
- *
- * The available flags are specified by AOM_CODEC_USE_* defines.
- */
-typedef long aom_codec_flags_t;
-
-/*!\brief Codec interface structure.
- *
- * Contains function pointers and other data private to the codec
- * implementation. This structure is opaque to the application.
- */
-typedef const struct aom_codec_iface aom_codec_iface_t;
-
-/*!\brief Codec private data structure.
- *
- * Contains data private to the codec implementation. This structure is opaque
- * to the application.
- */
-typedef struct aom_codec_priv aom_codec_priv_t;
-
-/*!\brief Iterator
- *
- * Opaque storage used for iterating over lists.
- */
-typedef const void *aom_codec_iter_t;
-
-/*!\brief Codec context structure
- *
- * All codecs \ref MUST support this context structure fully. In general,
- * this data should be considered private to the codec algorithm, and
- * not be manipulated or examined by the calling application. Applications
- * may reference the 'name' member to get a printable description of the
- * algorithm.
- */
-typedef struct aom_codec_ctx {
- const char *name; /**< Printable interface name */
- aom_codec_iface_t *iface; /**< Interface pointers */
- aom_codec_err_t err; /**< Last returned error */
- const char *err_detail; /**< Detailed info, if available */
- aom_codec_flags_t init_flags; /**< Flags passed at init time */
- union {
- /**< Decoder Configuration Pointer */
- const struct aom_codec_dec_cfg *dec;
- /**< Encoder Configuration Pointer */
- const struct aom_codec_enc_cfg *enc;
- const void *raw;
- } config; /**< Configuration pointer aliasing union */
- aom_codec_priv_t *priv; /**< Algorithm private storage */
-} aom_codec_ctx_t;
-
-/*!\brief Bit depth for codec
- * *
- * This enumeration determines the bit depth of the codec.
- */
-typedef enum aom_bit_depth {
- AOM_BITS_8 = 8, /**< 8 bits */
- AOM_BITS_10 = 10, /**< 10 bits */
- AOM_BITS_12 = 12, /**< 12 bits */
-} aom_bit_depth_t;
-
-/*!\brief Superblock size selection.
- *
- * Defines the superblock size used for encoding. The superblock size can
- * either be fixed at 64x64 or 128x128 pixels, or it can be dynamically
- * selected by the encoder for each frame.
- */
-typedef enum aom_superblock_size {
- AOM_SUPERBLOCK_SIZE_64X64, /**< Always use 64x64 superblocks. */
- AOM_SUPERBLOCK_SIZE_128X128, /**< Always use 128x128 superblocks. */
- AOM_SUPERBLOCK_SIZE_DYNAMIC /**< Select superblock size dynamically. */
-} aom_superblock_size_t;
-
-/*
- * Library Version Number Interface
- *
- * For example, see the following sample return values:
- * aom_codec_version() (1<<16 | 2<<8 | 3)
- * aom_codec_version_str() "v1.2.3-rc1-16-gec6a1ba"
- * aom_codec_version_extra_str() "rc1-16-gec6a1ba"
- */
-
-/*!\brief Return the version information (as an integer)
- *
- * Returns a packed encoding of the library version number. This will only
- * include
- * the major.minor.patch component of the version number. Note that this encoded
- * value should be accessed through the macros provided, as the encoding may
- * change
- * in the future.
- *
- */
-int aom_codec_version(void);
-
-/*!\brief Return the version major number */
-#define aom_codec_version_major() ((aom_codec_version() >> 16) & 0xff)
-
-/*!\brief Return the version minor number */
-#define aom_codec_version_minor() ((aom_codec_version() >> 8) & 0xff)
-
-/*!\brief Return the version patch number */
-#define aom_codec_version_patch() ((aom_codec_version() >> 0) & 0xff)
-
-/*!\brief Return the version information (as a string)
- *
- * Returns a printable string containing the full library version number. This
- * may
- * contain additional text following the three digit version number, as to
- * indicate
- * release candidates, prerelease versions, etc.
- *
- */
-const char *aom_codec_version_str(void);
-
-/*!\brief Return the version information (as a string)
- *
- * Returns a printable "extra string". This is the component of the string
- * returned
- * by aom_codec_version_str() following the three digit version number.
- *
- */
-const char *aom_codec_version_extra_str(void);
-
-/*!\brief Return the build configuration
- *
- * Returns a printable string containing an encoded version of the build
- * configuration. This may be useful to aom support.
- *
- */
-const char *aom_codec_build_config(void);
-
-/*!\brief Return the name for a given interface
- *
- * Returns a human readable string for name of the given codec interface.
- *
- * \param[in] iface Interface pointer
- *
- */
-const char *aom_codec_iface_name(aom_codec_iface_t *iface);
-
-/*!\brief Convert error number to printable string
- *
- * Returns a human readable string for the last error returned by the
- * algorithm. The returned error will be one line and will not contain
- * any newline characters.
- *
- *
- * \param[in] err Error number.
- *
- */
-const char *aom_codec_err_to_string(aom_codec_err_t err);
-
-/*!\brief Retrieve error synopsis for codec context
- *
- * Returns a human readable string for the last error returned by the
- * algorithm. The returned error will be one line and will not contain
- * any newline characters.
- *
- *
- * \param[in] ctx Pointer to this instance's context.
- *
- */
-const char *aom_codec_error(aom_codec_ctx_t *ctx);
-
-/*!\brief Retrieve detailed error information for codec context
- *
- * Returns a human readable string providing detailed information about
- * the last error.
- *
- * \param[in] ctx Pointer to this instance's context.
- *
- * \retval NULL
- * No detailed information is available.
- */
-const char *aom_codec_error_detail(aom_codec_ctx_t *ctx);
-
-/* REQUIRED FUNCTIONS
- *
- * The following functions are required to be implemented for all codecs.
- * They represent the base case functionality expected of all codecs.
- */
-
-/*!\brief Destroy a codec instance
- *
- * Destroys a codec context, freeing any associated memory buffers.
- *
- * \param[in] ctx Pointer to this instance's context
- *
- * \retval #AOM_CODEC_OK
- * The codec algorithm initialized.
- * \retval #AOM_CODEC_MEM_ERROR
- * Memory allocation failed.
- */
-aom_codec_err_t aom_codec_destroy(aom_codec_ctx_t *ctx);
-
-/*!\brief Get the capabilities of an algorithm.
- *
- * Retrieves the capabilities bitfield from the algorithm's interface.
- *
- * \param[in] iface Pointer to the algorithm interface
- *
- */
-aom_codec_caps_t aom_codec_get_caps(aom_codec_iface_t *iface);
-
-/*!\brief Control algorithm
- *
- * This function is used to exchange algorithm specific data with the codec
- * instance. This can be used to implement features specific to a particular
- * algorithm.
- *
- * This wrapper function dispatches the request to the helper function
- * associated with the given ctrl_id. It tries to call this function
- * transparently, but will return #AOM_CODEC_ERROR if the request could not
- * be dispatched.
- *
- * Note that this function should not be used directly. Call the
- * #aom_codec_control wrapper macro instead.
- *
- * \param[in] ctx Pointer to this instance's context
- * \param[in] ctrl_id Algorithm specific control identifier
- *
- * \retval #AOM_CODEC_OK
- * The control request was processed.
- * \retval #AOM_CODEC_ERROR
- * The control request was not processed.
- * \retval #AOM_CODEC_INVALID_PARAM
- * The data was not valid.
- */
-aom_codec_err_t aom_codec_control_(aom_codec_ctx_t *ctx, int ctrl_id, ...);
-#if defined(AOM_DISABLE_CTRL_TYPECHECKS) && AOM_DISABLE_CTRL_TYPECHECKS
-#define aom_codec_control(ctx, id, data) aom_codec_control_(ctx, id, data)
-#define AOM_CTRL_USE_TYPE(id, typ)
-#define AOM_CTRL_USE_TYPE_DEPRECATED(id, typ)
-#define AOM_CTRL_VOID(id, typ)
-
-#else
-/*!\brief aom_codec_control wrapper macro
- *
- * This macro allows for type safe conversions across the variadic parameter
- * to aom_codec_control_().
- *
- * \internal
- * It works by dispatching the call to the control function through a wrapper
- * function named with the id parameter.
- */
-#define aom_codec_control(ctx, id, data) \
- aom_codec_control_##id(ctx, id, data) /**<\hideinitializer*/
-
-/*!\brief aom_codec_control type definition macro
- *
- * This macro allows for type safe conversions across the variadic parameter
- * to aom_codec_control_(). It defines the type of the argument for a given
- * control identifier.
- *
- * \internal
- * It defines a static function with
- * the correctly typed arguments as a wrapper to the type-unsafe internal
- * function.
- */
-#define AOM_CTRL_USE_TYPE(id, typ) \
- static aom_codec_err_t aom_codec_control_##id(aom_codec_ctx_t *, int, typ) \
- AOM_UNUSED; \
- \
- static aom_codec_err_t aom_codec_control_##id(aom_codec_ctx_t *ctx, \
- int ctrl_id, typ data) { \
- return aom_codec_control_(ctx, ctrl_id, data); \
- } /**<\hideinitializer*/
-
-/*!\brief aom_codec_control deprecated type definition macro
- *
- * Like #AOM_CTRL_USE_TYPE, but indicates that the specified control is
- * deprecated and should not be used. Consult the documentation for your
- * codec for more information.
- *
- * \internal
- * It defines a static function with the correctly typed arguments as a
- * wrapper to the type-unsafe internal function.
- */
-#define AOM_CTRL_USE_TYPE_DEPRECATED(id, typ) \
- AOM_DECLSPEC_DEPRECATED static aom_codec_err_t aom_codec_control_##id( \
- aom_codec_ctx_t *, int, typ) AOM_DEPRECATED AOM_UNUSED; \
- \
- AOM_DECLSPEC_DEPRECATED static aom_codec_err_t aom_codec_control_##id( \
- aom_codec_ctx_t *ctx, int ctrl_id, typ data) { \
- return aom_codec_control_(ctx, ctrl_id, data); \
- } /**<\hideinitializer*/
-
-/*!\brief aom_codec_control void type definition macro
- *
- * This macro allows for type safe conversions across the variadic parameter
- * to aom_codec_control_(). It indicates that a given control identifier takes
- * no argument.
- *
- * \internal
- * It defines a static function without a data argument as a wrapper to the
- * type-unsafe internal function.
- */
-#define AOM_CTRL_VOID(id) \
- static aom_codec_err_t aom_codec_control_##id(aom_codec_ctx_t *, int) \
- AOM_UNUSED; \
- \
- static aom_codec_err_t aom_codec_control_##id(aom_codec_ctx_t *ctx, \
- int ctrl_id) { \
- return aom_codec_control_(ctx, ctrl_id); \
- } /**<\hideinitializer*/
-
-#endif
-
-/*!\brief OBU types. */
-typedef enum ATTRIBUTE_PACKED {
- OBU_SEQUENCE_HEADER = 1,
- OBU_TEMPORAL_DELIMITER = 2,
- OBU_FRAME_HEADER = 3,
- OBU_TILE_GROUP = 4,
- OBU_METADATA = 5,
- OBU_FRAME = 6,
- OBU_REDUNDANT_FRAME_HEADER = 7,
- OBU_TILE_LIST = 8,
- OBU_PADDING = 15,
-} OBU_TYPE;
-
-/*!\brief OBU metadata types. */
-typedef enum {
- OBU_METADATA_TYPE_AOM_RESERVED_0 = 0,
- OBU_METADATA_TYPE_HDR_CLL = 1,
- OBU_METADATA_TYPE_HDR_MDCV = 2,
- OBU_METADATA_TYPE_SCALABILITY = 3,
- OBU_METADATA_TYPE_ITUT_T35 = 4,
- OBU_METADATA_TYPE_TIMECODE = 5,
-} OBU_METADATA_TYPE;
-
-/*!\brief Returns string representation of OBU_TYPE.
- *
- * \param[in] type The OBU_TYPE to convert to string.
- */
-const char *aom_obu_type_to_string(OBU_TYPE type);
-
-/*!\brief Config Options
- *
- * This type allows to enumerate and control options defined for control
- * via config file at runtime.
- */
-typedef struct cfg_options {
- /*!\brief Reflects if ext_partition should be enabled
- *
- * If this value is non-zero it enabled the feature
- */
- unsigned int ext_partition;
-} cfg_options_t;
-
-/*!@} - end defgroup codec*/
-#ifdef __cplusplus
-}
-#endif
-#endif // AOM_AOM_AOM_CODEC_H_
diff --git a/third_party/aom/aom/aom_decoder.h b/third_party/aom/aom/aom_decoder.h
deleted file mode 100644
index 06c2dc5f7..000000000
--- a/third_party/aom/aom/aom_decoder.h
+++ /dev/null
@@ -1,364 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#ifndef AOM_AOM_AOM_DECODER_H_
-#define AOM_AOM_AOM_DECODER_H_
-
-/*!\defgroup decoder Decoder Algorithm Interface
- * \ingroup codec
- * This abstraction allows applications using this decoder to easily support
- * multiple video formats with minimal code duplication. This section describes
- * the interface common to all decoders.
- * @{
- */
-
-/*!\file
- * \brief Describes the decoder algorithm interface to applications.
- *
- * This file describes the interface between an application and a
- * video decoder algorithm.
- *
- */
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "aom/aom_codec.h"
-#include "aom/aom_frame_buffer.h"
-
-/*!\brief Current ABI version number
- *
- * \internal
- * If this file is altered in any way that changes the ABI, this value
- * must be bumped. Examples include, but are not limited to, changing
- * types, removing or reassigning enums, adding/removing/rearranging
- * fields to structures
- */
-#define AOM_DECODER_ABI_VERSION \
- (3 + AOM_CODEC_ABI_VERSION) /**<\hideinitializer*/
-
-/*! \brief Decoder capabilities bitfield
- *
- * Each decoder advertises the capabilities it supports as part of its
- * ::aom_codec_iface_t interface structure. Capabilities are extra interfaces
- * or functionality, and are not required to be supported by a decoder.
- *
- * The available flags are specified by AOM_CODEC_CAP_* defines.
- */
-#define AOM_CODEC_CAP_PUT_SLICE 0x10000 /**< Will issue put_slice callbacks */
-#define AOM_CODEC_CAP_PUT_FRAME 0x20000 /**< Will issue put_frame callbacks */
-#define AOM_CODEC_CAP_POSTPROC 0x40000 /**< Can postprocess decoded frame */
-/*!\brief Can receive encoded frames one fragment at a time */
-#define AOM_CODEC_CAP_INPUT_FRAGMENTS 0x100000
-
-/*! \brief Initialization-time Feature Enabling
- *
- * Certain codec features must be known at initialization time, to allow for
- * proper memory allocation.
- *
- * The available flags are specified by AOM_CODEC_USE_* defines.
- */
-/*!\brief Can support frame-based multi-threading */
-#define AOM_CODEC_CAP_FRAME_THREADING 0x200000
-/*!brief Can support external frame buffers */
-#define AOM_CODEC_CAP_EXTERNAL_FRAME_BUFFER 0x400000
-
-#define AOM_CODEC_USE_POSTPROC 0x10000 /**< Postprocess decoded frame */
-/*!\brief The input frame should be passed to the decoder one fragment at a
- * time */
-#define AOM_CODEC_USE_INPUT_FRAGMENTS 0x40000
-
-/*!\brief Stream properties
- *
- * This structure is used to query or set properties of the decoded
- * stream.
- */
-typedef struct aom_codec_stream_info {
- unsigned int w; /**< Width (or 0 for unknown/default) */
- unsigned int h; /**< Height (or 0 for unknown/default) */
- unsigned int is_kf; /**< Current frame is a keyframe */
- unsigned int number_spatial_layers; /**< Number of spatial layers */
- unsigned int number_temporal_layers; /**< Number of temporal layers */
- unsigned int is_annexb; /**< Is Bitstream in Annex-B format */
-} aom_codec_stream_info_t;
-
-/* REQUIRED FUNCTIONS
- *
- * The following functions are required to be implemented for all decoders.
- * They represent the base case functionality expected of all decoders.
- */
-
-/*!\brief Initialization Configurations
- *
- * This structure is used to pass init time configuration options to the
- * decoder.
- */
-typedef struct aom_codec_dec_cfg {
- unsigned int threads; /**< Maximum number of threads to use, default 1 */
- unsigned int w; /**< Width */
- unsigned int h; /**< Height */
- unsigned int allow_lowbitdepth; /**< Allow use of low-bitdepth coding path */
- cfg_options_t cfg; /**< Options defined per config attributes */
-} aom_codec_dec_cfg_t; /**< alias for struct aom_codec_dec_cfg */
-
-/*!\brief Initialize a decoder instance
- *
- * Initializes a decoder context using the given interface. Applications
- * should call the aom_codec_dec_init convenience macro instead of this
- * function directly, to ensure that the ABI version number parameter
- * is properly initialized.
- *
- * If the library was configured with --disable-multithread, this call
- * is not thread safe and should be guarded with a lock if being used
- * in a multithreaded context.
- *
- * \param[in] ctx Pointer to this instance's context.
- * \param[in] iface Pointer to the algorithm interface to use.
- * \param[in] cfg Configuration to use, if known. May be NULL.
- * \param[in] flags Bitfield of AOM_CODEC_USE_* flags
- * \param[in] ver ABI version number. Must be set to
- * AOM_DECODER_ABI_VERSION
- * \retval #AOM_CODEC_OK
- * The decoder algorithm initialized.
- * \retval #AOM_CODEC_MEM_ERROR
- * Memory allocation failed.
- */
-aom_codec_err_t aom_codec_dec_init_ver(aom_codec_ctx_t *ctx,
- aom_codec_iface_t *iface,
- const aom_codec_dec_cfg_t *cfg,
- aom_codec_flags_t flags, int ver);
-
-/*!\brief Convenience macro for aom_codec_dec_init_ver()
- *
- * Ensures the ABI version parameter is properly set.
- */
-#define aom_codec_dec_init(ctx, iface, cfg, flags) \
- aom_codec_dec_init_ver(ctx, iface, cfg, flags, AOM_DECODER_ABI_VERSION)
-
-/*!\brief Parse stream info from a buffer
- *
- * Performs high level parsing of the bitstream. Construction of a decoder
- * context is not necessary. Can be used to determine if the bitstream is
- * of the proper format, and to extract information from the stream.
- *
- * \param[in] iface Pointer to the algorithm interface
- * \param[in] data Pointer to a block of data to parse
- * \param[in] data_sz Size of the data buffer
- * \param[in,out] si Pointer to stream info to update. The is_annexb
- * member \ref MUST be properly initialized. This
- * function sets the rest of the members.
- *
- * \retval #AOM_CODEC_OK
- * Bitstream is parsable and stream information updated.
- * \retval #AOM_CODEC_INVALID_PARAM
- * One of the arguments is invalid, for example a NULL pointer.
- * \retval #AOM_CODEC_UNSUP_BITSTREAM
- * The decoder didn't recognize the coded data, or the
- * buffer was too short.
- */
-aom_codec_err_t aom_codec_peek_stream_info(aom_codec_iface_t *iface,
- const uint8_t *data, size_t data_sz,
- aom_codec_stream_info_t *si);
-
-/*!\brief Return information about the current stream.
- *
- * Returns information about the stream that has been parsed during decoding.
- *
- * \param[in] ctx Pointer to this instance's context
- * \param[in,out] si Pointer to stream info to update.
- *
- * \retval #AOM_CODEC_OK
- * Bitstream is parsable and stream information updated.
- * \retval #AOM_CODEC_INVALID_PARAM
- * One of the arguments is invalid, for example a NULL pointer.
- * \retval #AOM_CODEC_UNSUP_BITSTREAM
- * The decoder couldn't parse the submitted data.
- */
-aom_codec_err_t aom_codec_get_stream_info(aom_codec_ctx_t *ctx,
- aom_codec_stream_info_t *si);
-
-/*!\brief Decode data
- *
- * Processes a buffer of coded data. If the processing results in a new
- * decoded frame becoming available, PUT_SLICE and PUT_FRAME events may be
- * generated, as appropriate. Encoded data \ref MUST be passed in DTS (decode
- * time stamp) order. Frames produced will always be in PTS (presentation
- * time stamp) order.
- * If the decoder is configured with AOM_CODEC_USE_INPUT_FRAGMENTS enabled,
- * data and data_sz can contain a fragment of the encoded frame. Fragment
- * \#n must contain at least partition \#n, but can also contain subsequent
- * partitions (\#n+1 - \#n+i), and if so, fragments \#n+1, .., \#n+i must
- * be empty. When no more data is available, this function should be called
- * with NULL as data and 0 as data_sz. The memory passed to this function
- * must be available until the frame has been decoded.
- *
- * \param[in] ctx Pointer to this instance's context
- * \param[in] data Pointer to this block of new coded data. If
- * NULL, a AOM_CODEC_CB_PUT_FRAME event is posted
- * for the previously decoded frame.
- * \param[in] data_sz Size of the coded data, in bytes.
- * \param[in] user_priv Application specific data to associate with
- * this frame.
- *
- * \return Returns #AOM_CODEC_OK if the coded data was processed completely
- * and future pictures can be decoded without error. Otherwise,
- * see the descriptions of the other error codes in ::aom_codec_err_t
- * for recoverability capabilities.
- */
-aom_codec_err_t aom_codec_decode(aom_codec_ctx_t *ctx, const uint8_t *data,
- size_t data_sz, void *user_priv);
-
-/*!\brief Decoded frames iterator
- *
- * Iterates over a list of the frames available for display. The iterator
- * storage should be initialized to NULL to start the iteration. Iteration is
- * complete when this function returns NULL.
- *
- * The list of available frames becomes valid upon completion of the
- * aom_codec_decode call, and remains valid until the next call to
- * aom_codec_decode.
- *
- * \param[in] ctx Pointer to this instance's context
- * \param[in,out] iter Iterator storage, initialized to NULL
- *
- * \return Returns a pointer to an image, if one is ready for display. Frames
- * produced will always be in PTS (presentation time stamp) order.
- */
-aom_image_t *aom_codec_get_frame(aom_codec_ctx_t *ctx, aom_codec_iter_t *iter);
-
-/*!\defgroup cap_put_frame Frame-Based Decoding Functions
- *
- * The following functions are required to be implemented for all decoders
- * that advertise the AOM_CODEC_CAP_PUT_FRAME capability. Calling these
- * functions
- * for codecs that don't advertise this capability will result in an error
- * code being returned, usually AOM_CODEC_ERROR
- * @{
- */
-
-/*!\brief put frame callback prototype
- *
- * This callback is invoked by the decoder to notify the application of
- * the availability of decoded image data.
- */
-typedef void (*aom_codec_put_frame_cb_fn_t)(void *user_priv,
- const aom_image_t *img);
-
-/*!\brief Register for notification of frame completion.
- *
- * Registers a given function to be called when a decoded frame is
- * available.
- *
- * \param[in] ctx Pointer to this instance's context
- * \param[in] cb Pointer to the callback function
- * \param[in] user_priv User's private data
- *
- * \retval #AOM_CODEC_OK
- * Callback successfully registered.
- * \retval #AOM_CODEC_ERROR
- * Decoder context not initialized, or algorithm not capable of
- * posting slice completion.
- */
-aom_codec_err_t aom_codec_register_put_frame_cb(aom_codec_ctx_t *ctx,
- aom_codec_put_frame_cb_fn_t cb,
- void *user_priv);
-
-/*!@} - end defgroup cap_put_frame */
-
-/*!\defgroup cap_put_slice Slice-Based Decoding Functions
- *
- * The following functions are required to be implemented for all decoders
- * that advertise the AOM_CODEC_CAP_PUT_SLICE capability. Calling these
- * functions
- * for codecs that don't advertise this capability will result in an error
- * code being returned, usually AOM_CODEC_ERROR
- * @{
- */
-
-/*!\brief put slice callback prototype
- *
- * This callback is invoked by the decoder to notify the application of
- * the availability of partially decoded image data. The
- */
-typedef void (*aom_codec_put_slice_cb_fn_t)(void *user_priv,
- const aom_image_t *img,
- const aom_image_rect_t *valid,
- const aom_image_rect_t *update);
-
-/*!\brief Register for notification of slice completion.
- *
- * Registers a given function to be called when a decoded slice is
- * available.
- *
- * \param[in] ctx Pointer to this instance's context
- * \param[in] cb Pointer to the callback function
- * \param[in] user_priv User's private data
- *
- * \retval #AOM_CODEC_OK
- * Callback successfully registered.
- * \retval #AOM_CODEC_ERROR
- * Decoder context not initialized, or algorithm not capable of
- * posting slice completion.
- */
-aom_codec_err_t aom_codec_register_put_slice_cb(aom_codec_ctx_t *ctx,
- aom_codec_put_slice_cb_fn_t cb,
- void *user_priv);
-
-/*!@} - end defgroup cap_put_slice*/
-
-/*!\defgroup cap_external_frame_buffer External Frame Buffer Functions
- *
- * The following section is required to be implemented for all decoders
- * that advertise the AOM_CODEC_CAP_EXTERNAL_FRAME_BUFFER capability.
- * Calling this function for codecs that don't advertise this capability
- * will result in an error code being returned, usually AOM_CODEC_ERROR.
- *
- * \note
- * Currently this only works with AV1.
- * @{
- */
-
-/*!\brief Pass in external frame buffers for the decoder to use.
- *
- * Registers functions to be called when libaom needs a frame buffer
- * to decode the current frame and a function to be called when libaom does
- * not internally reference the frame buffer. This set function must
- * be called before the first call to decode or libaom will assume the
- * default behavior of allocating frame buffers internally.
- *
- * \param[in] ctx Pointer to this instance's context
- * \param[in] cb_get Pointer to the get callback function
- * \param[in] cb_release Pointer to the release callback function
- * \param[in] cb_priv Callback's private data
- *
- * \retval #AOM_CODEC_OK
- * External frame buffers will be used by libaom.
- * \retval #AOM_CODEC_INVALID_PARAM
- * One or more of the callbacks were NULL.
- * \retval #AOM_CODEC_ERROR
- * Decoder context not initialized, or algorithm not capable of
- * using external frame buffers.
- *
- * \note
- * When decoding AV1, the application may be required to pass in at least
- * #AOM_MAXIMUM_WORK_BUFFERS external frame
- * buffers.
- */
-aom_codec_err_t aom_codec_set_frame_buffer_functions(
- aom_codec_ctx_t *ctx, aom_get_frame_buffer_cb_fn_t cb_get,
- aom_release_frame_buffer_cb_fn_t cb_release, void *cb_priv);
-
-/*!@} - end defgroup cap_external_frame_buffer */
-
-/*!@} - end defgroup decoder*/
-#ifdef __cplusplus
-}
-#endif
-#endif // AOM_AOM_AOM_DECODER_H_
diff --git a/third_party/aom/aom/aom_encoder.h b/third_party/aom/aom/aom_encoder.h
deleted file mode 100644
index 0894ca9e3..000000000
--- a/third_party/aom/aom/aom_encoder.h
+++ /dev/null
@@ -1,981 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#ifndef AOM_AOM_AOM_ENCODER_H_
-#define AOM_AOM_AOM_ENCODER_H_
-
-/*!\defgroup encoder Encoder Algorithm Interface
- * \ingroup codec
- * This abstraction allows applications using this encoder to easily support
- * multiple video formats with minimal code duplication. This section describes
- * the interface common to all encoders.
- * @{
- */
-
-/*!\file
- * \brief Describes the encoder algorithm interface to applications.
- *
- * This file describes the interface between an application and a
- * video encoder algorithm.
- *
- */
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "aom/aom_codec.h"
-
-/*!\brief Current ABI version number
- *
- * \internal
- * If this file is altered in any way that changes the ABI, this value
- * must be bumped. Examples include, but are not limited to, changing
- * types, removing or reassigning enums, adding/removing/rearranging
- * fields to structures
- */
-#define AOM_ENCODER_ABI_VERSION \
- (5 + AOM_CODEC_ABI_VERSION) /**<\hideinitializer*/
-
-/*! \brief Encoder capabilities bitfield
- *
- * Each encoder advertises the capabilities it supports as part of its
- * ::aom_codec_iface_t interface structure. Capabilities are extra
- * interfaces or functionality, and are not required to be supported
- * by an encoder.
- *
- * The available flags are specified by AOM_CODEC_CAP_* defines.
- */
-#define AOM_CODEC_CAP_PSNR 0x10000 /**< Can issue PSNR packets */
-
-/*! Can support input images at greater than 8 bitdepth.
- */
-#define AOM_CODEC_CAP_HIGHBITDEPTH 0x40000
-
-/*! \brief Initialization-time Feature Enabling
- *
- * Certain codec features must be known at initialization time, to allow
- * for proper memory allocation.
- *
- * The available flags are specified by AOM_CODEC_USE_* defines.
- */
-#define AOM_CODEC_USE_PSNR 0x10000 /**< Calculate PSNR on each frame */
-/*!\brief Make the encoder output one partition at a time. */
-#define AOM_CODEC_USE_HIGHBITDEPTH 0x40000 /**< Use high bitdepth */
-
-/*!\brief Generic fixed size buffer structure
- *
- * This structure is able to hold a reference to any fixed size buffer.
- */
-typedef struct aom_fixed_buf {
- void *buf; /**< Pointer to the data */
- size_t sz; /**< Length of the buffer, in chars */
-} aom_fixed_buf_t; /**< alias for struct aom_fixed_buf */
-
-/*!\brief Time Stamp Type
- *
- * An integer, which when multiplied by the stream's time base, provides
- * the absolute time of a sample.
- */
-typedef int64_t aom_codec_pts_t;
-
-/*!\brief Compressed Frame Flags
- *
- * This type represents a bitfield containing information about a compressed
- * frame that may be useful to an application. The most significant 16 bits
- * can be used by an algorithm to provide additional detail, for example to
- * support frame types that are codec specific (MPEG-1 D-frames for example)
- */
-typedef uint32_t aom_codec_frame_flags_t;
-#define AOM_FRAME_IS_KEY 0x1 /**< frame is the start of a GOP */
-/*!\brief frame can be dropped without affecting the stream (no future frame
- * depends on this one) */
-#define AOM_FRAME_IS_DROPPABLE 0x2
-/*!\brief frame should be decoded but will not be shown */
-#define AOM_FRAME_IS_INVISIBLE 0x4
-/*!\brief this is a fragment of the encoded frame */
-#define AOM_FRAME_IS_FRAGMENT 0x8
-
-/*!\brief Error Resilient flags
- *
- * These flags define which error resilient features to enable in the
- * encoder. The flags are specified through the
- * aom_codec_enc_cfg::g_error_resilient variable.
- */
-typedef uint32_t aom_codec_er_flags_t;
-/*!\brief Improve resiliency against losses of whole frames */
-#define AOM_ERROR_RESILIENT_DEFAULT 0x1
-
-/*!\brief Encoder output packet variants
- *
- * This enumeration lists the different kinds of data packets that can be
- * returned by calls to aom_codec_get_cx_data(). Algorithms \ref MAY
- * extend this list to provide additional functionality.
- */
-enum aom_codec_cx_pkt_kind {
- AOM_CODEC_CX_FRAME_PKT, /**< Compressed video frame */
- AOM_CODEC_STATS_PKT, /**< Two-pass statistics for this frame */
- AOM_CODEC_FPMB_STATS_PKT, /**< first pass mb statistics for this frame */
- AOM_CODEC_PSNR_PKT, /**< PSNR statistics for this frame */
- AOM_CODEC_CUSTOM_PKT = 256 /**< Algorithm extensions */
-};
-
-/*!\brief Encoder output packet
- *
- * This structure contains the different kinds of output data the encoder
- * may produce while compressing a frame.
- */
-typedef struct aom_codec_cx_pkt {
- enum aom_codec_cx_pkt_kind kind; /**< packet variant */
- union {
- struct {
- void *buf; /**< compressed data buffer */
- size_t sz; /**< length of compressed data */
- /*!\brief time stamp to show frame (in timebase units) */
- aom_codec_pts_t pts;
- /*!\brief duration to show frame (in timebase units) */
- unsigned long duration;
- aom_codec_frame_flags_t flags; /**< flags for this frame */
- /*!\brief the partition id defines the decoding order of the partitions.
- * Only applicable when "output partition" mode is enabled. First
- * partition has id 0.*/
- int partition_id;
- /*!\brief size of the visible frame in this packet */
- size_t vis_frame_size;
- } frame; /**< data for compressed frame packet */
- aom_fixed_buf_t twopass_stats; /**< data for two-pass packet */
- aom_fixed_buf_t firstpass_mb_stats; /**< first pass mb packet */
- struct aom_psnr_pkt {
- unsigned int samples[4]; /**< Number of samples, total/y/u/v */
- uint64_t sse[4]; /**< sum squared error, total/y/u/v */
- double psnr[4]; /**< PSNR, total/y/u/v */
- } psnr; /**< data for PSNR packet */
- aom_fixed_buf_t raw; /**< data for arbitrary packets */
-
- /* This packet size is fixed to allow codecs to extend this
- * interface without having to manage storage for raw packets,
- * i.e., if it's smaller than 128 bytes, you can store in the
- * packet list directly.
- */
- char pad[128 - sizeof(enum aom_codec_cx_pkt_kind)]; /**< fixed sz */
- } data; /**< packet data */
-} aom_codec_cx_pkt_t; /**< alias for struct aom_codec_cx_pkt */
-
-/*!\brief Rational Number
- *
- * This structure holds a fractional value.
- */
-typedef struct aom_rational {
- int num; /**< fraction numerator */
- int den; /**< fraction denominator */
-} aom_rational_t; /**< alias for struct aom_rational */
-
-/*!\brief Multi-pass Encoding Pass */
-enum aom_enc_pass {
- AOM_RC_ONE_PASS, /**< Single pass mode */
- AOM_RC_FIRST_PASS, /**< First pass of multi-pass mode */
- AOM_RC_LAST_PASS /**< Final pass of multi-pass mode */
-};
-
-/*!\brief Rate control mode */
-enum aom_rc_mode {
- AOM_VBR, /**< Variable Bit Rate (VBR) mode */
- AOM_CBR, /**< Constant Bit Rate (CBR) mode */
- AOM_CQ, /**< Constrained Quality (CQ) mode */
- AOM_Q, /**< Constant Quality (Q) mode */
-};
-
-/*!\brief Keyframe placement mode.
- *
- * This enumeration determines whether keyframes are placed automatically by
- * the encoder or whether this behavior is disabled. Older releases of this
- * SDK were implemented such that AOM_KF_FIXED meant keyframes were disabled.
- * This name is confusing for this behavior, so the new symbols to be used
- * are AOM_KF_AUTO and AOM_KF_DISABLED.
- */
-enum aom_kf_mode {
- AOM_KF_FIXED, /**< deprecated, implies AOM_KF_DISABLED */
- AOM_KF_AUTO, /**< Encoder determines optimal placement automatically */
- AOM_KF_DISABLED = 0 /**< Encoder does not place keyframes. */
-};
-
-/*!\brief Encoded Frame Flags
- *
- * This type indicates a bitfield to be passed to aom_codec_encode(), defining
- * per-frame boolean values. By convention, bits common to all codecs will be
- * named AOM_EFLAG_*, and bits specific to an algorithm will be named
- * /algo/_eflag_*. The lower order 16 bits are reserved for common use.
- */
-typedef long aom_enc_frame_flags_t;
-#define AOM_EFLAG_FORCE_KF (1 << 0) /**< Force this frame to be a keyframe */
-
-/*!\brief Encoder configuration structure
- *
- * This structure contains the encoder settings that have common representations
- * across all codecs. This doesn't imply that all codecs support all features,
- * however.
- */
-typedef struct aom_codec_enc_cfg {
- /*
- * generic settings (g)
- */
-
- /*!\brief Algorithm specific "usage" value
- *
- * Algorithms may define multiple values for usage, which may convey the
- * intent of how the application intends to use the stream. If this value
- * is non-zero, consult the documentation for the codec to determine its
- * meaning.
- */
- unsigned int g_usage;
-
- /*!\brief Maximum number of threads to use
- *
- * For multi-threaded implementations, use no more than this number of
- * threads. The codec may use fewer threads than allowed. The value
- * 0 is equivalent to the value 1.
- */
- unsigned int g_threads;
-
- /*!\brief Bitstream profile to use
- *
- * Some codecs support a notion of multiple bitstream profiles. Typically
- * this maps to a set of features that are turned on or off. Often the
- * profile to use is determined by the features of the intended decoder.
- * Consult the documentation for the codec to determine the valid values
- * for this parameter, or set to zero for a sane default.
- */
- unsigned int g_profile; /**< profile of bitstream to use */
-
- /*!\brief Width of the frame
- *
- * This value identifies the presentation resolution of the frame,
- * in pixels. Note that the frames passed as input to the encoder must
- * have this resolution. Frames will be presented by the decoder in this
- * resolution, independent of any spatial resampling the encoder may do.
- */
- unsigned int g_w;
-
- /*!\brief Height of the frame
- *
- * This value identifies the presentation resolution of the frame,
- * in pixels. Note that the frames passed as input to the encoder must
- * have this resolution. Frames will be presented by the decoder in this
- * resolution, independent of any spatial resampling the encoder may do.
- */
- unsigned int g_h;
-
- /*!\brief Max number of frames to encode
- *
- */
- unsigned int g_limit;
-
- /*!\brief Forced maximum width of the frame
- *
- * If this value is non-zero then it is used to force the maximum frame
- * width written in write_sequence_header().
- */
- unsigned int g_forced_max_frame_width;
-
- /*!\brief Forced maximum height of the frame
- *
- * If this value is non-zero then it is used to force the maximum frame
- * height written in write_sequence_header().
- */
- unsigned int g_forced_max_frame_height;
-
- /*!\brief Bit-depth of the codec
- *
- * This value identifies the bit_depth of the codec,
- * Only certain bit-depths are supported as identified in the
- * aom_bit_depth_t enum.
- */
- aom_bit_depth_t g_bit_depth;
-
- /*!\brief Bit-depth of the input frames
- *
- * This value identifies the bit_depth of the input frames in bits.
- * Note that the frames passed as input to the encoder must have
- * this bit-depth.
- */
- unsigned int g_input_bit_depth;
-
- /*!\brief Stream timebase units
- *
- * Indicates the smallest interval of time, in seconds, used by the stream.
- * For fixed frame rate material, or variable frame rate material where
- * frames are timed at a multiple of a given clock (ex: video capture),
- * the \ref RECOMMENDED method is to set the timebase to the reciprocal
- * of the frame rate (ex: 1001/30000 for 29.970 Hz NTSC). This allows the
- * pts to correspond to the frame number, which can be handy. For
- * re-encoding video from containers with absolute time timestamps, the
- * \ref RECOMMENDED method is to set the timebase to that of the parent
- * container or multimedia framework (ex: 1/1000 for ms, as in FLV).
- */
- struct aom_rational g_timebase;
-
- /*!\brief Enable error resilient modes.
- *
- * The error resilient bitfield indicates to the encoder which features
- * it should enable to take measures for streaming over lossy or noisy
- * links.
- */
- aom_codec_er_flags_t g_error_resilient;
-
- /*!\brief Multi-pass Encoding Mode
- *
- * This value should be set to the current phase for multi-pass encoding.
- * For single pass, set to #AOM_RC_ONE_PASS.
- */
- enum aom_enc_pass g_pass;
-
- /*!\brief Allow lagged encoding
- *
- * If set, this value allows the encoder to consume a number of input
- * frames before producing output frames. This allows the encoder to
- * base decisions for the current frame on future frames. This does
- * increase the latency of the encoding pipeline, so it is not appropriate
- * in all situations (ex: realtime encoding).
- *
- * Note that this is a maximum value -- the encoder may produce frames
- * sooner than the given limit. Set this value to 0 to disable this
- * feature.
- */
- unsigned int g_lag_in_frames;
-
- /*
- * rate control settings (rc)
- */
-
- /*!\brief Temporal resampling configuration, if supported by the codec.
- *
- * Temporal resampling allows the codec to "drop" frames as a strategy to
- * meet its target data rate. This can cause temporal discontinuities in
- * the encoded video, which may appear as stuttering during playback. This
- * trade-off is often acceptable, but for many applications is not. It can
- * be disabled in these cases.
- *
- * Note that not all codecs support this feature. All aom AVx codecs do.
- * For other codecs, consult the documentation for that algorithm.
- *
- * This threshold is described as a percentage of the target data buffer.
- * When the data buffer falls below this percentage of fullness, a
- * dropped frame is indicated. Set the threshold to zero (0) to disable
- * this feature.
- */
- unsigned int rc_dropframe_thresh;
-
- /*!\brief Mode for spatial resampling, if supported by the codec.
- *
- * Spatial resampling allows the codec to compress a lower resolution
- * version of the frame, which is then upscaled by the decoder to the
- * correct presentation resolution. This increases visual quality at
- * low data rates, at the expense of CPU time on the encoder/decoder.
- */
- unsigned int rc_resize_mode;
-
- /*!\brief Frame resize denominator.
- *
- * The denominator for resize to use, assuming 8 as the numerator.
- *
- * Valid denominators are 8 - 16 for now.
- */
- unsigned int rc_resize_denominator;
-
- /*!\brief Keyframe resize denominator.
- *
- * The denominator for resize to use, assuming 8 as the numerator.
- *
- * Valid denominators are 8 - 16 for now.
- */
- unsigned int rc_resize_kf_denominator;
-
- /*!\brief Frame super-resolution scaling mode.
- *
- * Similar to spatial resampling, frame super-resolution integrates
- * upscaling after the encode/decode process. Taking control of upscaling and
- * using restoration filters should allow it to outperform normal resizing.
- *
- * Mode 0 is SUPERRES_NONE, mode 1 is SUPERRES_FIXED, mode 2 is
- * SUPERRES_RANDOM and mode 3 is SUPERRES_QTHRESH.
- */
- unsigned int rc_superres_mode;
-
- /*!\brief Frame super-resolution denominator.
- *
- * The denominator for superres to use. If fixed it will only change if the
- * cumulative scale change over resizing and superres is greater than 1/2;
- * this forces superres to reduce scaling.
- *
- * Valid denominators are 8 to 16.
- *
- * Used only by SUPERRES_FIXED.
- */
- unsigned int rc_superres_denominator;
-
- /*!\brief Keyframe super-resolution denominator.
- *
- * The denominator for superres to use. If fixed it will only change if the
- * cumulative scale change over resizing and superres is greater than 1/2;
- * this forces superres to reduce scaling.
- *
- * Valid denominators are 8 - 16 for now.
- */
- unsigned int rc_superres_kf_denominator;
-
- /*!\brief Frame super-resolution q threshold.
- *
- * The q level threshold after which superres is used.
- * Valid values are 1 to 63.
- *
- * Used only by SUPERRES_QTHRESH
- */
- unsigned int rc_superres_qthresh;
-
- /*!\brief Keyframe super-resolution q threshold.
- *
- * The q level threshold after which superres is used for key frames.
- * Valid values are 1 to 63.
- *
- * Used only by SUPERRES_QTHRESH
- */
- unsigned int rc_superres_kf_qthresh;
-
- /*!\brief Rate control algorithm to use.
- *
- * Indicates whether the end usage of this stream is to be streamed over
- * a bandwidth constrained link, indicating that Constant Bit Rate (CBR)
- * mode should be used, or whether it will be played back on a high
- * bandwidth link, as from a local disk, where higher variations in
- * bitrate are acceptable.
- */
- enum aom_rc_mode rc_end_usage;
-
- /*!\brief Two-pass stats buffer.
- *
- * A buffer containing all of the stats packets produced in the first
- * pass, concatenated.
- */
- aom_fixed_buf_t rc_twopass_stats_in;
-
- /*!\brief first pass mb stats buffer.
- *
- * A buffer containing all of the first pass mb stats packets produced
- * in the first pass, concatenated.
- */
- aom_fixed_buf_t rc_firstpass_mb_stats_in;
-
- /*!\brief Target data rate
- *
- * Target bandwidth to use for this stream, in kilobits per second.
- */
- unsigned int rc_target_bitrate;
-
- /*
- * quantizer settings
- */
-
- /*!\brief Minimum (Best Quality) Quantizer
- *
- * The quantizer is the most direct control over the quality of the
- * encoded image. The range of valid values for the quantizer is codec
- * specific. Consult the documentation for the codec to determine the
- * values to use. To determine the range programmatically, call
- * aom_codec_enc_config_default() with a usage value of 0.
- */
- unsigned int rc_min_quantizer;
-
- /*!\brief Maximum (Worst Quality) Quantizer
- *
- * The quantizer is the most direct control over the quality of the
- * encoded image. The range of valid values for the quantizer is codec
- * specific. Consult the documentation for the codec to determine the
- * values to use. To determine the range programmatically, call
- * aom_codec_enc_config_default() with a usage value of 0.
- */
- unsigned int rc_max_quantizer;
-
- /*
- * bitrate tolerance
- */
-
- /*!\brief Rate control adaptation undershoot control
- *
- * This value, expressed as a percentage of the target bitrate,
- * controls the maximum allowed adaptation speed of the codec.
- * This factor controls the maximum amount of bits that can
- * be subtracted from the target bitrate in order to compensate
- * for prior overshoot.
- *
- * Valid values in the range 0-1000.
- */
- unsigned int rc_undershoot_pct;
-
- /*!\brief Rate control adaptation overshoot control
- *
- * This value, expressed as a percentage of the target bitrate,
- * controls the maximum allowed adaptation speed of the codec.
- * This factor controls the maximum amount of bits that can
- * be added to the target bitrate in order to compensate for
- * prior undershoot.
- *
- * Valid values in the range 0-1000.
- */
- unsigned int rc_overshoot_pct;
-
- /*
- * decoder buffer model parameters
- */
-
- /*!\brief Decoder Buffer Size
- *
- * This value indicates the amount of data that may be buffered by the
- * decoding application. Note that this value is expressed in units of
- * time (milliseconds). For example, a value of 5000 indicates that the
- * client will buffer (at least) 5000ms worth of encoded data. Use the
- * target bitrate (#rc_target_bitrate) to convert to bits/bytes, if
- * necessary.
- */
- unsigned int rc_buf_sz;
-
- /*!\brief Decoder Buffer Initial Size
- *
- * This value indicates the amount of data that will be buffered by the
- * decoding application prior to beginning playback. This value is
- * expressed in units of time (milliseconds). Use the target bitrate
- * (#rc_target_bitrate) to convert to bits/bytes, if necessary.
- */
- unsigned int rc_buf_initial_sz;
-
- /*!\brief Decoder Buffer Optimal Size
- *
- * This value indicates the amount of data that the encoder should try
- * to maintain in the decoder's buffer. This value is expressed in units
- * of time (milliseconds). Use the target bitrate (#rc_target_bitrate)
- * to convert to bits/bytes, if necessary.
- */
- unsigned int rc_buf_optimal_sz;
-
- /*
- * 2 pass rate control parameters
- */
-
- /*!\brief Two-pass mode CBR/VBR bias
- *
- * Bias, expressed on a scale of 0 to 100, for determining target size
- * for the current frame. The value 0 indicates the optimal CBR mode
- * value should be used. The value 100 indicates the optimal VBR mode
- * value should be used. Values in between indicate which way the
- * encoder should "lean."
- */
- unsigned int rc_2pass_vbr_bias_pct;
-
- /*!\brief Two-pass mode per-GOP minimum bitrate
- *
- * This value, expressed as a percentage of the target bitrate, indicates
- * the minimum bitrate to be used for a single GOP (aka "section")
- */
- unsigned int rc_2pass_vbr_minsection_pct;
-
- /*!\brief Two-pass mode per-GOP maximum bitrate
- *
- * This value, expressed as a percentage of the target bitrate, indicates
- * the maximum bitrate to be used for a single GOP (aka "section")
- */
- unsigned int rc_2pass_vbr_maxsection_pct;
-
- /*
- * keyframing settings (kf)
- */
-
- /*!\brief Option to enable forward reference key frame
- *
- */
- int fwd_kf_enabled;
-
- /*!\brief Keyframe placement mode
- *
- * This value indicates whether the encoder should place keyframes at a
- * fixed interval, or determine the optimal placement automatically
- * (as governed by the #kf_min_dist and #kf_max_dist parameters)
- */
- enum aom_kf_mode kf_mode;
-
- /*!\brief Keyframe minimum interval
- *
- * This value, expressed as a number of frames, prevents the encoder from
- * placing a keyframe nearer than kf_min_dist to the previous keyframe. At
- * least kf_min_dist frames non-keyframes will be coded before the next
- * keyframe. Set kf_min_dist equal to kf_max_dist for a fixed interval.
- */
- unsigned int kf_min_dist;
-
- /*!\brief Keyframe maximum interval
- *
- * This value, expressed as a number of frames, forces the encoder to code
- * a keyframe if one has not been coded in the last kf_max_dist frames.
- * A value of 0 implies all frames will be keyframes. Set kf_min_dist
- * equal to kf_max_dist for a fixed interval.
- */
- unsigned int kf_max_dist;
-
- /*!\brief sframe interval
- *
- * This value, expressed as a number of frames, forces the encoder to code
- * an S-Frame every sframe_dist frames.
- */
- unsigned int sframe_dist;
-
- /*!\brief sframe insertion mode
- *
- * This value must be set to 1 or 2, and tells the encoder how to insert
- * S-Frames. It will only have an effect if sframe_dist != 0.
- *
- * If altref is enabled:
- * - if sframe_mode == 1, the considered frame will be made into an
- * S-Frame only if it is an altref frame
- * - if sframe_mode == 2, the next altref frame will be made into an
- * S-Frame.
- *
- * Otherwise: the considered frame will be made into an S-Frame.
- */
- unsigned int sframe_mode;
-
- /*!\brief Tile coding mode
- *
- * This value indicates the tile coding mode.
- * A value of 0 implies a normal non-large-scale tile coding. A value of 1
- * implies a large-scale tile coding.
- */
- unsigned int large_scale_tile;
-
- /*!\brief Monochrome mode
- *
- * If this is nonzero, the encoder will generate a monochrome stream
- * with no chroma planes.
- */
- unsigned int monochrome;
-
- /*!\brief full_still_picture_hdr
- *
- * If this is nonzero, the encoder will generate a full header even for
- * still picture encoding. if zero, a reduced header is used for still
- * picture. This flag has no effect when a regular video with more than
- * a single frame is encoded.
- */
- unsigned int full_still_picture_hdr;
-
- /*!\brief Bitstream syntax mode
- *
- * This value indicates the bitstream syntax mode.
- * A value of 0 indicates bitstream is saved as Section 5 bitstream. A value
- * of 1 indicates the bitstream is saved in Annex-B format
- */
- unsigned int save_as_annexb;
-
- /*!\brief Number of explicit tile widths specified
- *
- * This value indicates the number of tile widths specified
- * A value of 0 implies no tile widths are specified.
- * Tile widths are given in the array tile_widths[]
- */
- int tile_width_count;
-
- /*!\brief Number of explicit tile heights specified
- *
- * This value indicates the number of tile heights specified
- * A value of 0 implies no tile heights are specified.
- * Tile heights are given in the array tile_heights[]
- */
- int tile_height_count;
-
-/*!\brief Maximum number of tile widths in tile widths array
- *
- * This define gives the maximum number of elements in the tile_widths array.
- */
-#define MAX_TILE_WIDTHS 64 // maximum tile width array length
-
- /*!\brief Array of specified tile widths
- *
- * This array specifies tile widths (and may be empty)
- * The number of widths specified is given by tile_width_count
- */
- int tile_widths[MAX_TILE_WIDTHS];
-
-/*!\brief Maximum number of tile heights in tile heights array.
- *
- * This define gives the maximum number of elements in the tile_heights array.
- */
-#define MAX_TILE_HEIGHTS 64 // maximum tile height array length
-
- /*!\brief Array of specified tile heights
- *
- * This array specifies tile heights (and may be empty)
- * The number of heights specified is given by tile_height_count
- */
- int tile_heights[MAX_TILE_HEIGHTS];
-
- /*!\brief Options defined per config file
- *
- */
- cfg_options_t cfg;
-} aom_codec_enc_cfg_t; /**< alias for struct aom_codec_enc_cfg */
-
-/*!\brief Initialize an encoder instance
- *
- * Initializes a encoder context using the given interface. Applications
- * should call the aom_codec_enc_init convenience macro instead of this
- * function directly, to ensure that the ABI version number parameter
- * is properly initialized.
- *
- * If the library was configured with --disable-multithread, this call
- * is not thread safe and should be guarded with a lock if being used
- * in a multithreaded context.
- *
- * \param[in] ctx Pointer to this instance's context.
- * \param[in] iface Pointer to the algorithm interface to use.
- * \param[in] cfg Configuration to use, if known.
- * \param[in] flags Bitfield of AOM_CODEC_USE_* flags
- * \param[in] ver ABI version number. Must be set to
- * AOM_ENCODER_ABI_VERSION
- * \retval #AOM_CODEC_OK
- * The decoder algorithm initialized.
- * \retval #AOM_CODEC_MEM_ERROR
- * Memory allocation failed.
- */
-aom_codec_err_t aom_codec_enc_init_ver(aom_codec_ctx_t *ctx,
- aom_codec_iface_t *iface,
- const aom_codec_enc_cfg_t *cfg,
- aom_codec_flags_t flags, int ver);
-
-/*!\brief Convenience macro for aom_codec_enc_init_ver()
- *
- * Ensures the ABI version parameter is properly set.
- */
-#define aom_codec_enc_init(ctx, iface, cfg, flags) \
- aom_codec_enc_init_ver(ctx, iface, cfg, flags, AOM_ENCODER_ABI_VERSION)
-
-/*!\brief Initialize multi-encoder instance
- *
- * Initializes multi-encoder context using the given interface.
- * Applications should call the aom_codec_enc_init_multi convenience macro
- * instead of this function directly, to ensure that the ABI version number
- * parameter is properly initialized.
- *
- * \param[in] ctx Pointer to this instance's context.
- * \param[in] iface Pointer to the algorithm interface to use.
- * \param[in] cfg Configuration to use, if known.
- * \param[in] num_enc Total number of encoders.
- * \param[in] flags Bitfield of AOM_CODEC_USE_* flags
- * \param[in] dsf Pointer to down-sampling factors.
- * \param[in] ver ABI version number. Must be set to
- * AOM_ENCODER_ABI_VERSION
- * \retval #AOM_CODEC_OK
- * The decoder algorithm initialized.
- * \retval #AOM_CODEC_MEM_ERROR
- * Memory allocation failed.
- */
-aom_codec_err_t aom_codec_enc_init_multi_ver(
- aom_codec_ctx_t *ctx, aom_codec_iface_t *iface, aom_codec_enc_cfg_t *cfg,
- int num_enc, aom_codec_flags_t flags, aom_rational_t *dsf, int ver);
-
-/*!\brief Convenience macro for aom_codec_enc_init_multi_ver()
- *
- * Ensures the ABI version parameter is properly set.
- */
-#define aom_codec_enc_init_multi(ctx, iface, cfg, num_enc, flags, dsf) \
- aom_codec_enc_init_multi_ver(ctx, iface, cfg, num_enc, flags, dsf, \
- AOM_ENCODER_ABI_VERSION)
-
-/*!\brief Get a default configuration
- *
- * Initializes a encoder configuration structure with default values. Supports
- * the notion of "usages" so that an algorithm may offer different default
- * settings depending on the user's intended goal. This function \ref SHOULD
- * be called by all applications to initialize the configuration structure
- * before specializing the configuration with application specific values.
- *
- * \param[in] iface Pointer to the algorithm interface to use.
- * \param[out] cfg Configuration buffer to populate.
- * \param[in] reserved Must set to 0.
- *
- * \retval #AOM_CODEC_OK
- * The configuration was populated.
- * \retval #AOM_CODEC_INCAPABLE
- * Interface is not an encoder interface.
- * \retval #AOM_CODEC_INVALID_PARAM
- * A parameter was NULL, or the usage value was not recognized.
- */
-aom_codec_err_t aom_codec_enc_config_default(aom_codec_iface_t *iface,
- aom_codec_enc_cfg_t *cfg,
- unsigned int reserved);
-
-/*!\brief Set or change configuration
- *
- * Reconfigures an encoder instance according to the given configuration.
- *
- * \param[in] ctx Pointer to this instance's context
- * \param[in] cfg Configuration buffer to use
- *
- * \retval #AOM_CODEC_OK
- * The configuration was populated.
- * \retval #AOM_CODEC_INCAPABLE
- * Interface is not an encoder interface.
- * \retval #AOM_CODEC_INVALID_PARAM
- * A parameter was NULL, or the usage value was not recognized.
- */
-aom_codec_err_t aom_codec_enc_config_set(aom_codec_ctx_t *ctx,
- const aom_codec_enc_cfg_t *cfg);
-
-/*!\brief Get global stream headers
- *
- * Retrieves a stream level global header packet, if supported by the codec.
- * Calls to this function should be deferred until all configuration information
- * has been passed to libaom. Otherwise the global header data may be
- * invalidated by additional configuration changes.
- *
- * The AV1 implementation of this function returns an OBU. The OBU returned is
- * in Low Overhead Bitstream Format. Specifically, the obu_has_size_field bit is
- * set, and the buffer contains the obu_size field for the returned OBU.
- *
- * \param[in] ctx Pointer to this instance's context
- *
- * \retval NULL
- * Encoder does not support global header, or an error occurred while
- * generating the global header.
- *
- * \retval Non-NULL
- * Pointer to buffer containing global header packet. The caller owns the
- * memory associated with this buffer, and must free the 'buf' member of the
- * aom_fixed_buf_t as well as the aom_fixed_buf_t pointer. Memory returned
- * must be freed via call to free().
- */
-aom_fixed_buf_t *aom_codec_get_global_headers(aom_codec_ctx_t *ctx);
-
-/*!\brief Encode a frame
- *
- * Encodes a video frame at the given "presentation time." The presentation
- * time stamp (PTS) \ref MUST be strictly increasing.
- *
- * When the last frame has been passed to the encoder, this function should
- * continue to be called, with the img parameter set to NULL. This will
- * signal the end-of-stream condition to the encoder and allow it to encode
- * any held buffers. Encoding is complete when aom_codec_encode() is called
- * and aom_codec_get_cx_data() returns no data.
- *
- * \param[in] ctx Pointer to this instance's context
- * \param[in] img Image data to encode, NULL to flush.
- * \param[in] pts Presentation time stamp, in timebase units.
- * \param[in] duration Duration to show frame, in timebase units.
- * \param[in] flags Flags to use for encoding this frame.
- *
- * \retval #AOM_CODEC_OK
- * The configuration was populated.
- * \retval #AOM_CODEC_INCAPABLE
- * Interface is not an encoder interface.
- * \retval #AOM_CODEC_INVALID_PARAM
- * A parameter was NULL, the image format is unsupported, etc.
- */
-aom_codec_err_t aom_codec_encode(aom_codec_ctx_t *ctx, const aom_image_t *img,
- aom_codec_pts_t pts, unsigned long duration,
- aom_enc_frame_flags_t flags);
-
-/*!\brief Set compressed data output buffer
- *
- * Sets the buffer that the codec should output the compressed data
- * into. This call effectively sets the buffer pointer returned in the
- * next AOM_CODEC_CX_FRAME_PKT packet. Subsequent packets will be
- * appended into this buffer. The buffer is preserved across frames,
- * so applications must periodically call this function after flushing
- * the accumulated compressed data to disk or to the network to reset
- * the pointer to the buffer's head.
- *
- * `pad_before` bytes will be skipped before writing the compressed
- * data, and `pad_after` bytes will be appended to the packet. The size
- * of the packet will be the sum of the size of the actual compressed
- * data, pad_before, and pad_after. The padding bytes will be preserved
- * (not overwritten).
- *
- * Note that calling this function does not guarantee that the returned
- * compressed data will be placed into the specified buffer. In the
- * event that the encoded data will not fit into the buffer provided,
- * the returned packet \ref MAY point to an internal buffer, as it would
- * if this call were never used. In this event, the output packet will
- * NOT have any padding, and the application must free space and copy it
- * to the proper place. This is of particular note in configurations
- * that may output multiple packets for a single encoded frame (e.g., lagged
- * encoding) or if the application does not reset the buffer periodically.
- *
- * Applications may restore the default behavior of the codec providing
- * the compressed data buffer by calling this function with a NULL
- * buffer.
- *
- * Applications \ref MUSTNOT call this function during iteration of
- * aom_codec_get_cx_data().
- *
- * \param[in] ctx Pointer to this instance's context
- * \param[in] buf Buffer to store compressed data into
- * \param[in] pad_before Bytes to skip before writing compressed data
- * \param[in] pad_after Bytes to skip after writing compressed data
- *
- * \retval #AOM_CODEC_OK
- * The buffer was set successfully.
- * \retval #AOM_CODEC_INVALID_PARAM
- * A parameter was NULL, the image format is unsupported, etc.
- */
-aom_codec_err_t aom_codec_set_cx_data_buf(aom_codec_ctx_t *ctx,
- const aom_fixed_buf_t *buf,
- unsigned int pad_before,
- unsigned int pad_after);
-
-/*!\brief Encoded data iterator
- *
- * Iterates over a list of data packets to be passed from the encoder to the
- * application. The different kinds of packets available are enumerated in
- * #aom_codec_cx_pkt_kind.
- *
- * #AOM_CODEC_CX_FRAME_PKT packets should be passed to the application's
- * muxer. Multiple compressed frames may be in the list.
- * #AOM_CODEC_STATS_PKT packets should be appended to a global buffer.
- *
- * The application \ref MUST silently ignore any packet kinds that it does
- * not recognize or support.
- *
- * The data buffers returned from this function are only guaranteed to be
- * valid until the application makes another call to any aom_codec_* function.
- *
- * \param[in] ctx Pointer to this instance's context
- * \param[in,out] iter Iterator storage, initialized to NULL
- *
- * \return Returns a pointer to an output data packet (compressed frame data,
- * two-pass statistics, etc.) or NULL to signal end-of-list.
- *
- */
-const aom_codec_cx_pkt_t *aom_codec_get_cx_data(aom_codec_ctx_t *ctx,
- aom_codec_iter_t *iter);
-
-/*!\brief Get Preview Frame
- *
- * Returns an image that can be used as a preview. Shows the image as it would
- * exist at the decompressor. The application \ref MUST NOT write into this
- * image buffer.
- *
- * \param[in] ctx Pointer to this instance's context
- *
- * \return Returns a pointer to a preview image, or NULL if no image is
- * available.
- *
- */
-const aom_image_t *aom_codec_get_preview_frame(aom_codec_ctx_t *ctx);
-
-/*!@} - end defgroup encoder*/
-#ifdef __cplusplus
-}
-#endif
-#endif // AOM_AOM_AOM_ENCODER_H_
diff --git a/third_party/aom/aom/aom_frame_buffer.h b/third_party/aom/aom/aom_frame_buffer.h
deleted file mode 100644
index fba4322f8..000000000
--- a/third_party/aom/aom/aom_frame_buffer.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_AOM_FRAME_BUFFER_H_
-#define AOM_AOM_AOM_FRAME_BUFFER_H_
-
-/*!\file
- * \brief Describes the decoder external frame buffer interface.
- */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "aom/aom_integer.h"
-
-/*!\brief The maximum number of work buffers used by libaom.
- * Support maximum 4 threads to decode video in parallel.
- * Each thread will use one work buffer.
- * TODO(hkuang): Add support to set number of worker threads dynamically.
- */
-#define AOM_MAXIMUM_WORK_BUFFERS 8
-
-/*!\brief The maximum number of reference buffers that a AV1 encoder may use.
- */
-#define AOM_MAXIMUM_REF_BUFFERS 8
-
-/*!\brief External frame buffer
- *
- * This structure holds allocated frame buffers used by the decoder.
- */
-typedef struct aom_codec_frame_buffer {
- uint8_t *data; /**< Pointer to the data buffer */
- size_t size; /**< Size of data in bytes */
- void *priv; /**< Frame's private data */
-} aom_codec_frame_buffer_t;
-
-/*!\brief get frame buffer callback prototype
- *
- * This callback is invoked by the decoder to retrieve data for the frame
- * buffer in order for the decode call to complete. The callback must
- * allocate at least min_size in bytes and assign it to fb->data. The callback
- * must zero out all the data allocated. Then the callback must set fb->size
- * to the allocated size. The application does not need to align the allocated
- * data. The callback is triggered when the decoder needs a frame buffer to
- * decode a compressed image into. This function may be called more than once
- * for every call to aom_codec_decode. The application may set fb->priv to
- * some data which will be passed back in the ximage and the release function
- * call. |fb| is guaranteed to not be NULL. On success the callback must
- * return 0. Any failure the callback must return a value less than 0.
- *
- * \param[in] priv Callback's private data
- * \param[in] new_size Size in bytes needed by the buffer
- * \param[in,out] fb Pointer to aom_codec_frame_buffer_t
- */
-typedef int (*aom_get_frame_buffer_cb_fn_t)(void *priv, size_t min_size,
- aom_codec_frame_buffer_t *fb);
-
-/*!\brief release frame buffer callback prototype
- *
- * This callback is invoked by the decoder when the frame buffer is not
- * referenced by any other buffers. |fb| is guaranteed to not be NULL. On
- * success the callback must return 0. Any failure the callback must return
- * a value less than 0.
- *
- * \param[in] priv Callback's private data
- * \param[in] fb Pointer to aom_codec_frame_buffer_t
- */
-typedef int (*aom_release_frame_buffer_cb_fn_t)(void *priv,
- aom_codec_frame_buffer_t *fb);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AOM_AOM_FRAME_BUFFER_H_
diff --git a/third_party/aom/aom/aom_image.h b/third_party/aom/aom/aom_image.h
deleted file mode 100644
index a960127f1..000000000
--- a/third_party/aom/aom/aom_image.h
+++ /dev/null
@@ -1,331 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-/*!\file
- * \brief Describes the aom image descriptor and associated operations
- *
- */
-#ifndef AOM_AOM_AOM_IMAGE_H_
-#define AOM_AOM_AOM_IMAGE_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "aom/aom_integer.h"
-
-/*!\brief Current ABI version number
- *
- * \internal
- * If this file is altered in any way that changes the ABI, this value
- * must be bumped. Examples include, but are not limited to, changing
- * types, removing or reassigning enums, adding/removing/rearranging
- * fields to structures
- */
-#define AOM_IMAGE_ABI_VERSION (4) /**<\hideinitializer*/
-
-#define AOM_IMG_FMT_PLANAR 0x100 /**< Image is a planar format. */
-#define AOM_IMG_FMT_UV_FLIP 0x200 /**< V plane precedes U in memory. */
-#define AOM_IMG_FMT_HAS_ALPHA 0x400 /**< Image has an alpha channel. */
-#define AOM_IMG_FMT_HIGHBITDEPTH 0x800 /**< Image uses 16bit framebuffer. */
-
-/*!\brief List of supported image formats */
-typedef enum aom_img_fmt {
- AOM_IMG_FMT_NONE,
- AOM_IMG_FMT_YV12 =
- AOM_IMG_FMT_PLANAR | AOM_IMG_FMT_UV_FLIP | 1, /**< planar YVU */
- AOM_IMG_FMT_I420 = AOM_IMG_FMT_PLANAR | 2,
- AOM_IMG_FMT_AOMYV12 = AOM_IMG_FMT_PLANAR | AOM_IMG_FMT_UV_FLIP |
- 3, /** < planar 4:2:0 format with aom color space */
- AOM_IMG_FMT_AOMI420 = AOM_IMG_FMT_PLANAR | 4,
- AOM_IMG_FMT_I422 = AOM_IMG_FMT_PLANAR | 5,
- AOM_IMG_FMT_I444 = AOM_IMG_FMT_PLANAR | 6,
- AOM_IMG_FMT_444A = AOM_IMG_FMT_PLANAR | AOM_IMG_FMT_HAS_ALPHA | 6,
- AOM_IMG_FMT_I42016 = AOM_IMG_FMT_I420 | AOM_IMG_FMT_HIGHBITDEPTH,
- AOM_IMG_FMT_I42216 = AOM_IMG_FMT_I422 | AOM_IMG_FMT_HIGHBITDEPTH,
- AOM_IMG_FMT_I44416 = AOM_IMG_FMT_I444 | AOM_IMG_FMT_HIGHBITDEPTH,
-} aom_img_fmt_t; /**< alias for enum aom_img_fmt */
-
-/*!\brief List of supported color primaries */
-typedef enum aom_color_primaries {
- AOM_CICP_CP_RESERVED_0 = 0, /**< For future use */
- AOM_CICP_CP_BT_709 = 1, /**< BT.709 */
- AOM_CICP_CP_UNSPECIFIED = 2, /**< Unspecified */
- AOM_CICP_CP_RESERVED_3 = 3, /**< For future use */
- AOM_CICP_CP_BT_470_M = 4, /**< BT.470 System M (historical) */
- AOM_CICP_CP_BT_470_B_G = 5, /**< BT.470 System B, G (historical) */
- AOM_CICP_CP_BT_601 = 6, /**< BT.601 */
- AOM_CICP_CP_SMPTE_240 = 7, /**< SMPTE 240 */
- AOM_CICP_CP_GENERIC_FILM =
- 8, /**< Generic film (color filters using illuminant C) */
- AOM_CICP_CP_BT_2020 = 9, /**< BT.2020, BT.2100 */
- AOM_CICP_CP_XYZ = 10, /**< SMPTE 428 (CIE 1921 XYZ) */
- AOM_CICP_CP_SMPTE_431 = 11, /**< SMPTE RP 431-2 */
- AOM_CICP_CP_SMPTE_432 = 12, /**< SMPTE EG 432-1 */
- AOM_CICP_CP_RESERVED_13 = 13, /**< For future use (values 13 - 21) */
- AOM_CICP_CP_EBU_3213 = 22, /**< EBU Tech. 3213-E */
- AOM_CICP_CP_RESERVED_23 = 23 /**< For future use (values 23 - 255) */
-} aom_color_primaries_t; /**< alias for enum aom_color_primaries */
-
-/*!\brief List of supported transfer functions */
-typedef enum aom_transfer_characteristics {
- AOM_CICP_TC_RESERVED_0 = 0, /**< For future use */
- AOM_CICP_TC_BT_709 = 1, /**< BT.709 */
- AOM_CICP_TC_UNSPECIFIED = 2, /**< Unspecified */
- AOM_CICP_TC_RESERVED_3 = 3, /**< For future use */
- AOM_CICP_TC_BT_470_M = 4, /**< BT.470 System M (historical) */
- AOM_CICP_TC_BT_470_B_G = 5, /**< BT.470 System B, G (historical) */
- AOM_CICP_TC_BT_601 = 6, /**< BT.601 */
- AOM_CICP_TC_SMPTE_240 = 7, /**< SMPTE 240 M */
- AOM_CICP_TC_LINEAR = 8, /**< Linear */
- AOM_CICP_TC_LOG_100 = 9, /**< Logarithmic (100 : 1 range) */
- AOM_CICP_TC_LOG_100_SQRT10 =
- 10, /**< Logarithmic (100 * Sqrt(10) : 1 range) */
- AOM_CICP_TC_IEC_61966 = 11, /**< IEC 61966-2-4 */
- AOM_CICP_TC_BT_1361 = 12, /**< BT.1361 */
- AOM_CICP_TC_SRGB = 13, /**< sRGB or sYCC*/
- AOM_CICP_TC_BT_2020_10_BIT = 14, /**< BT.2020 10-bit systems */
- AOM_CICP_TC_BT_2020_12_BIT = 15, /**< BT.2020 12-bit systems */
- AOM_CICP_TC_SMPTE_2084 = 16, /**< SMPTE ST 2084, ITU BT.2100 PQ */
- AOM_CICP_TC_SMPTE_428 = 17, /**< SMPTE ST 428 */
- AOM_CICP_TC_HLG = 18, /**< BT.2100 HLG, ARIB STD-B67 */
- AOM_CICP_TC_RESERVED_19 = 19 /**< For future use (values 19-255) */
-} aom_transfer_characteristics_t; /**< alias for enum aom_transfer_function */
-
-/*!\brief List of supported matrix coefficients */
-typedef enum aom_matrix_coefficients {
- AOM_CICP_MC_IDENTITY = 0, /**< Identity matrix */
- AOM_CICP_MC_BT_709 = 1, /**< BT.709 */
- AOM_CICP_MC_UNSPECIFIED = 2, /**< Unspecified */
- AOM_CICP_MC_RESERVED_3 = 3, /**< For future use */
- AOM_CICP_MC_FCC = 4, /**< US FCC 73.628 */
- AOM_CICP_MC_BT_470_B_G = 5, /**< BT.470 System B, G (historical) */
- AOM_CICP_MC_BT_601 = 6, /**< BT.601 */
- AOM_CICP_MC_SMPTE_240 = 7, /**< SMPTE 240 M */
- AOM_CICP_MC_SMPTE_YCGCO = 8, /**< YCgCo */
- AOM_CICP_MC_BT_2020_NCL =
- 9, /**< BT.2020 non-constant luminance, BT.2100 YCbCr */
- AOM_CICP_MC_BT_2020_CL = 10, /**< BT.2020 constant luminance */
- AOM_CICP_MC_SMPTE_2085 = 11, /**< SMPTE ST 2085 YDzDx */
- AOM_CICP_MC_CHROMAT_NCL =
- 12, /**< Chromaticity-derived non-constant luminance */
- AOM_CICP_MC_CHROMAT_CL = 13, /**< Chromaticity-derived constant luminance */
- AOM_CICP_MC_ICTCP = 14, /**< BT.2100 ICtCp */
- AOM_CICP_MC_RESERVED_15 = 15 /**< For future use (values 15-255) */
-} aom_matrix_coefficients_t;
-
-/*!\brief List of supported color range */
-typedef enum aom_color_range {
- AOM_CR_STUDIO_RANGE = 0, /**< Y [16..235], UV [16..240] */
- AOM_CR_FULL_RANGE = 1 /**< YUV/RGB [0..255] */
-} aom_color_range_t; /**< alias for enum aom_color_range */
-
-/*!\brief List of chroma sample positions */
-typedef enum aom_chroma_sample_position {
- AOM_CSP_UNKNOWN = 0, /**< Unknown */
- AOM_CSP_VERTICAL = 1, /**< Horizontally co-located with luma(0, 0)*/
- /**< sample, between two vertical samples */
- AOM_CSP_COLOCATED = 2, /**< Co-located with luma(0, 0) sample */
- AOM_CSP_RESERVED = 3 /**< Reserved value */
-} aom_chroma_sample_position_t; /**< alias for enum aom_transfer_function */
-
-/**\brief Image Descriptor */
-typedef struct aom_image {
- aom_img_fmt_t fmt; /**< Image Format */
- aom_color_primaries_t cp; /**< CICP Color Primaries */
- aom_transfer_characteristics_t tc; /**< CICP Transfer Characteristics */
- aom_matrix_coefficients_t mc; /**< CICP Matrix Coefficients */
- int monochrome; /**< Whether image is monochrome */
- aom_chroma_sample_position_t csp; /**< chroma sample position */
- aom_color_range_t range; /**< Color Range */
-
- /* Image storage dimensions */
- unsigned int w; /**< Stored image width */
- unsigned int h; /**< Stored image height */
- unsigned int bit_depth; /**< Stored image bit-depth */
-
- /* Image display dimensions */
- unsigned int d_w; /**< Displayed image width */
- unsigned int d_h; /**< Displayed image height */
-
- /* Image intended rendering dimensions */
- unsigned int r_w; /**< Intended rendering image width */
- unsigned int r_h; /**< Intended rendering image height */
-
- /* Chroma subsampling info */
- unsigned int x_chroma_shift; /**< subsampling order, X */
- unsigned int y_chroma_shift; /**< subsampling order, Y */
-
-/* Image data pointers. */
-#define AOM_PLANE_PACKED 0 /**< To be used for all packed formats */
-#define AOM_PLANE_Y 0 /**< Y (Luminance) plane */
-#define AOM_PLANE_U 1 /**< U (Chroma) plane */
-#define AOM_PLANE_V 2 /**< V (Chroma) plane */
-#define AOM_PLANE_ALPHA 3 /**< A (Transparency) plane */
- unsigned char *planes[4]; /**< pointer to the top left pixel for each plane */
- int stride[4]; /**< stride between rows for each plane */
- size_t sz; /**< data size */
-
- int bps; /**< bits per sample (for packed formats) */
-
- int temporal_id; /**< Temporal layer Id of image */
- int spatial_id; /**< Spatial layer Id of image */
-
- /*!\brief The following member may be set by the application to associate
- * data with this image.
- */
- void *user_priv;
-
- /* The following members should be treated as private. */
- unsigned char *img_data; /**< private */
- int img_data_owner; /**< private */
- int self_allocd; /**< private */
-
- void *fb_priv; /**< Frame buffer data associated with the image. */
-} aom_image_t; /**< alias for struct aom_image */
-
-/**\brief Representation of a rectangle on a surface */
-typedef struct aom_image_rect {
- unsigned int x; /**< leftmost column */
- unsigned int y; /**< topmost row */
- unsigned int w; /**< width */
- unsigned int h; /**< height */
-} aom_image_rect_t; /**< alias for struct aom_image_rect */
-
-/*!\brief Open a descriptor, allocating storage for the underlying image
- *
- * Returns a descriptor for storing an image of the given format. The
- * storage for the descriptor is allocated on the heap.
- *
- * \param[in] img Pointer to storage for descriptor. If this parameter
- * is NULL, the storage for the descriptor will be
- * allocated on the heap.
- * \param[in] fmt Format for the image
- * \param[in] d_w Width of the image
- * \param[in] d_h Height of the image
- * \param[in] align Alignment, in bytes, of the image buffer and
- * each row in the image(stride).
- *
- * \return Returns a pointer to the initialized image descriptor. If the img
- * parameter is non-null, the value of the img parameter will be
- * returned.
- */
-aom_image_t *aom_img_alloc(aom_image_t *img, aom_img_fmt_t fmt,
- unsigned int d_w, unsigned int d_h,
- unsigned int align);
-
-/*!\brief Open a descriptor, using existing storage for the underlying image
- *
- * Returns a descriptor for storing an image of the given format. The
- * storage for descriptor has been allocated elsewhere, and a descriptor is
- * desired to "wrap" that storage.
- *
- * \param[in] img Pointer to storage for descriptor. If this parameter
- * is NULL, the storage for the descriptor will be
- * allocated on the heap.
- * \param[in] fmt Format for the image
- * \param[in] d_w Width of the image
- * \param[in] d_h Height of the image
- * \param[in] align Alignment, in bytes, of each row in the image.
- * \param[in] img_data Storage to use for the image
- *
- * \return Returns a pointer to the initialized image descriptor. If the img
- * parameter is non-null, the value of the img parameter will be
- * returned.
- */
-aom_image_t *aom_img_wrap(aom_image_t *img, aom_img_fmt_t fmt, unsigned int d_w,
- unsigned int d_h, unsigned int align,
- unsigned char *img_data);
-
-/*!\brief Open a descriptor, allocating storage for the underlying image with a
- * border
- *
- * Returns a descriptor for storing an image of the given format and its
- * borders. The storage for the descriptor is allocated on the heap.
- *
- * \param[in] img Pointer to storage for descriptor. If this parameter
- * is NULL, the storage for the descriptor will be
- * allocated on the heap.
- * \param[in] fmt Format for the image
- * \param[in] d_w Width of the image
- * \param[in] d_h Height of the image
- * \param[in] align Alignment, in bytes, of the image buffer and
- * each row in the image(stride).
- * \param[in] size_align Alignment, in bytes, of the image width and height.
- * \param[in] border A border that is padded on four sides of the image.
- *
- * \return Returns a pointer to the initialized image descriptor. If the img
- * parameter is non-null, the value of the img parameter will be
- * returned.
- */
-aom_image_t *aom_img_alloc_with_border(aom_image_t *img, aom_img_fmt_t fmt,
- unsigned int d_w, unsigned int d_h,
- unsigned int align,
- unsigned int size_align,
- unsigned int border);
-
-/*!\brief Set the rectangle identifying the displayed portion of the image
- *
- * Updates the displayed rectangle (aka viewport) on the image surface to
- * match the specified coordinates and size.
- *
- * \param[in] img Image descriptor
- * \param[in] x leftmost column
- * \param[in] y topmost row
- * \param[in] w width
- * \param[in] h height
- * \param[in] border A border that is padded on four sides of the image.
- *
- * \return 0 if the requested rectangle is valid, nonzero otherwise.
- */
-int aom_img_set_rect(aom_image_t *img, unsigned int x, unsigned int y,
- unsigned int w, unsigned int h, unsigned int border);
-
-/*!\brief Flip the image vertically (top for bottom)
- *
- * Adjusts the image descriptor's pointers and strides to make the image
- * be referenced upside-down.
- *
- * \param[in] img Image descriptor
- */
-void aom_img_flip(aom_image_t *img);
-
-/*!\brief Close an image descriptor
- *
- * Frees all allocated storage associated with an image descriptor.
- *
- * \param[in] img Image descriptor
- */
-void aom_img_free(aom_image_t *img);
-
-/*!\brief Get the width of a plane
- *
- * Get the width of a plane of an image
- *
- * \param[in] img Image descriptor
- * \param[in] plane Plane index
- */
-int aom_img_plane_width(const aom_image_t *img, int plane);
-
-/*!\brief Get the height of a plane
- *
- * Get the height of a plane of an image
- *
- * \param[in] img Image descriptor
- * \param[in] plane Plane index
- */
-int aom_img_plane_height(const aom_image_t *img, int plane);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AOM_AOM_IMAGE_H_
diff --git a/third_party/aom/aom/aom_integer.h b/third_party/aom/aom/aom_integer.h
deleted file mode 100644
index 90263bd4f..000000000
--- a/third_party/aom/aom/aom_integer.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#ifndef AOM_AOM_AOM_INTEGER_H_
-#define AOM_AOM_AOM_INTEGER_H_
-
-/* get ptrdiff_t, size_t, wchar_t, NULL */
-#include <stddef.h>
-
-#if defined(_MSC_VER)
-#define AOM_FORCE_INLINE __forceinline
-#define AOM_INLINE __inline
-#else
-#define AOM_FORCE_INLINE __inline__ __attribute__((always_inline))
-// TODO(jbb): Allow a way to force inline off for older compilers.
-#define AOM_INLINE inline
-#endif
-
-#if defined(AOM_EMULATE_INTTYPES)
-typedef signed char int8_t;
-typedef signed short int16_t;
-typedef signed int int32_t;
-
-typedef unsigned char uint8_t;
-typedef unsigned short uint16_t;
-typedef unsigned int uint32_t;
-
-#ifndef _UINTPTR_T_DEFINED
-typedef size_t uintptr_t;
-#endif
-
-#else
-
-/* Most platforms have the C99 standard integer types. */
-
-#if defined(__cplusplus)
-#if !defined(__STDC_FORMAT_MACROS)
-#define __STDC_FORMAT_MACROS
-#endif
-#if !defined(__STDC_LIMIT_MACROS)
-#define __STDC_LIMIT_MACROS
-#endif
-#endif // __cplusplus
-
-#include <stdint.h>
-
-#endif
-
-/* VS2010 defines stdint.h, but not inttypes.h */
-#if defined(_MSC_VER) && _MSC_VER < 1800
-#define PRId64 "I64d"
-#else
-#include <inttypes.h>
-#endif
-
-#if !defined(INT8_MAX)
-#define INT8_MAX 127
-#endif
-
-#if !defined(INT32_MAX)
-#define INT32_MAX 2147483647
-#endif
-
-#if !defined(INT32_MIN)
-#define INT32_MIN (-2147483647 - 1)
-#endif
-
-#define NELEMENTS(x) (int)(sizeof(x) / sizeof(x[0]))
-
-#if defined(__cplusplus)
-extern "C" {
-#endif // __cplusplus
-
-// Returns size of uint64_t when encoded using LEB128.
-size_t aom_uleb_size_in_bytes(uint64_t value);
-
-// Returns 0 on success, -1 on decode failure.
-// On success, 'value' stores the decoded LEB128 value and 'length' stores
-// the number of bytes decoded.
-int aom_uleb_decode(const uint8_t *buffer, size_t available, uint64_t *value,
- size_t *length);
-
-// Encodes LEB128 integer. Returns 0 when successful, and -1 upon failure.
-int aom_uleb_encode(uint64_t value, size_t available, uint8_t *coded_value,
- size_t *coded_size);
-
-// Encodes LEB128 integer to size specified. Returns 0 when successful, and -1
-// upon failure.
-// Note: This will write exactly pad_to_size bytes; if the value cannot be
-// encoded in this many bytes, then this will fail.
-int aom_uleb_encode_fixed_size(uint64_t value, size_t available,
- size_t pad_to_size, uint8_t *coded_value,
- size_t *coded_size);
-
-#if defined(__cplusplus)
-} // extern "C"
-#endif // __cplusplus
-
-#endif // AOM_AOM_AOM_INTEGER_H_
diff --git a/third_party/aom/aom/aomcx.h b/third_party/aom/aom/aomcx.h
deleted file mode 100644
index 013ddf57e..000000000
--- a/third_party/aom/aom/aomcx.h
+++ /dev/null
@@ -1,1198 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#ifndef AOM_AOM_AOMCX_H_
-#define AOM_AOM_AOMCX_H_
-
-/*!\defgroup aom_encoder AOMedia AOM/AV1 Encoder
- * \ingroup aom
- *
- * @{
- */
-#include "aom/aom.h"
-#include "aom/aom_encoder.h"
-
-/*!\file
- * \brief Provides definitions for using AOM or AV1 encoder algorithm within the
- * aom Codec Interface.
- */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*!\name Algorithm interface for AV1
- *
- * This interface provides the capability to encode raw AV1 streams.
- * @{
- */
-extern aom_codec_iface_t aom_codec_av1_cx_algo;
-extern aom_codec_iface_t *aom_codec_av1_cx(void);
-/*!@} - end algorithm interface member group*/
-
-/*
- * Algorithm Flags
- */
-
-/*!\brief Don't reference the last frame
- *
- * When this flag is set, the encoder will not use the last frame as a
- * predictor. When not set, the encoder will choose whether to use the
- * last frame or not automatically.
- */
-#define AOM_EFLAG_NO_REF_LAST (1 << 16)
-/*!\brief Don't reference the last2 frame
- *
- * When this flag is set, the encoder will not use the last2 frame as a
- * predictor. When not set, the encoder will choose whether to use the
- * last2 frame or not automatically.
- */
-#define AOM_EFLAG_NO_REF_LAST2 (1 << 17)
-/*!\brief Don't reference the last3 frame
- *
- * When this flag is set, the encoder will not use the last3 frame as a
- * predictor. When not set, the encoder will choose whether to use the
- * last3 frame or not automatically.
- */
-#define AOM_EFLAG_NO_REF_LAST3 (1 << 18)
-/*!\brief Don't reference the golden frame
- *
- * When this flag is set, the encoder will not use the golden frame as a
- * predictor. When not set, the encoder will choose whether to use the
- * golden frame or not automatically.
- */
-#define AOM_EFLAG_NO_REF_GF (1 << 19)
-
-/*!\brief Don't reference the alternate reference frame
- *
- * When this flag is set, the encoder will not use the alt ref frame as a
- * predictor. When not set, the encoder will choose whether to use the
- * alt ref frame or not automatically.
- */
-#define AOM_EFLAG_NO_REF_ARF (1 << 20)
-/*!\brief Don't reference the bwd reference frame
- *
- * When this flag is set, the encoder will not use the bwd ref frame as a
- * predictor. When not set, the encoder will choose whether to use the
- * bwd ref frame or not automatically.
- */
-#define AOM_EFLAG_NO_REF_BWD (1 << 21)
-/*!\brief Don't reference the alt2 reference frame
- *
- * When this flag is set, the encoder will not use the alt2 ref frame as a
- * predictor. When not set, the encoder will choose whether to use the
- * alt2 ref frame or not automatically.
- */
-#define AOM_EFLAG_NO_REF_ARF2 (1 << 22)
-
-/*!\brief Don't update the last frame
- *
- * When this flag is set, the encoder will not update the last frame with
- * the contents of the current frame.
- */
-#define AOM_EFLAG_NO_UPD_LAST (1 << 23)
-
-/*!\brief Don't update the golden frame
- *
- * When this flag is set, the encoder will not update the golden frame with
- * the contents of the current frame.
- */
-#define AOM_EFLAG_NO_UPD_GF (1 << 24)
-
-/*!\brief Don't update the alternate reference frame
- *
- * When this flag is set, the encoder will not update the alt ref frame with
- * the contents of the current frame.
- */
-#define AOM_EFLAG_NO_UPD_ARF (1 << 25)
-/*!\brief Disable entropy update
- *
- * When this flag is set, the encoder will not update its internal entropy
- * model based on the entropy of this frame.
- */
-#define AOM_EFLAG_NO_UPD_ENTROPY (1 << 26)
-/*!\brief Disable ref frame mvs
- *
- * When this flag is set, the encoder will not allow frames to
- * be encoded using mfmv.
- */
-#define AOM_EFLAG_NO_REF_FRAME_MVS (1 << 27)
-/*!\brief Enable error resilient frame
- *
- * When this flag is set, the encoder will code frames as error
- * resilient.
- */
-#define AOM_EFLAG_ERROR_RESILIENT (1 << 28)
-/*!\brief Enable s frame mode
- *
- * When this flag is set, the encoder will code frames as an
- * s frame.
- */
-#define AOM_EFLAG_SET_S_FRAME (1 << 29)
-/*!\brief Force primary_ref_frame to PRIMARY_REF_NONE
- *
- * When this flag is set, the encoder will set a frame's primary_ref_frame
- * to PRIMARY_REF_NONE
- */
-#define AOM_EFLAG_SET_PRIMARY_REF_NONE (1 << 30)
-
-/*!\brief AVx encoder control functions
- *
- * This set of macros define the control functions available for AVx
- * encoder interface.
- *
- * \sa #aom_codec_control
- */
-enum aome_enc_control_id {
- /*!\brief Codec control function to set which reference frame encoder can use.
- */
- AOME_USE_REFERENCE = 7,
-
- /*!\brief Codec control function to pass an ROI map to encoder.
- */
- AOME_SET_ROI_MAP = 8,
-
- /*!\brief Codec control function to pass an Active map to encoder.
- */
- AOME_SET_ACTIVEMAP,
-
- /*!\brief Codec control function to set encoder scaling mode.
- */
- AOME_SET_SCALEMODE = 11,
-
- /*!\brief Codec control function to set encoder spatial layer id.
- */
- AOME_SET_SPATIAL_LAYER_ID = 12,
-
- /*!\brief Codec control function to set encoder internal speed settings.
- *
- * Changes in this value influences, among others, the encoder's selection
- * of motion estimation methods. Values greater than 0 will increase encoder
- * speed at the expense of quality.
- *
- * \note Valid range: 0..8
- */
- AOME_SET_CPUUSED = 13,
-
- /*!\brief Codec control function to enable automatic set and use alf frames.
- */
- AOME_SET_ENABLEAUTOALTREF,
-
- /*!\brief Codec control function to set sharpness.
- */
- AOME_SET_SHARPNESS = AOME_SET_ENABLEAUTOALTREF + 2,
-
- /*!\brief Codec control function to set the threshold for MBs treated static.
- */
- AOME_SET_STATIC_THRESHOLD,
-
- /*!\brief Codec control function to get last quantizer chosen by the encoder.
- *
- * Return value uses internal quantizer scale defined by the codec.
- */
- AOME_GET_LAST_QUANTIZER = AOME_SET_STATIC_THRESHOLD + 2,
-
- /*!\brief Codec control function to get last quantizer chosen by the encoder.
- *
- * Return value uses the 0..63 scale as used by the rc_*_quantizer config
- * parameters.
- */
- AOME_GET_LAST_QUANTIZER_64,
-
- /*!\brief Codec control function to set the max no of frames to create arf.
- */
- AOME_SET_ARNR_MAXFRAMES,
-
- /*!\brief Codec control function to set the filter strength for the arf.
- */
- AOME_SET_ARNR_STRENGTH,
-
- /*!\brief Codec control function to set visual tuning.
- */
- AOME_SET_TUNING = AOME_SET_ARNR_STRENGTH + 2,
-
- /*!\brief Codec control function to set constrained quality level.
- *
- * \attention For this value to be used aom_codec_enc_cfg_t::g_usage must be
- * set to #AOM_CQ.
- * \note Valid range: 0..63
- */
- AOME_SET_CQ_LEVEL,
-
- /*!\brief Codec control function to set Max data rate for Intra frames.
- *
- * This value controls additional clamping on the maximum size of a
- * keyframe. It is expressed as a percentage of the average
- * per-frame bitrate, with the special (and default) value 0 meaning
- * unlimited, or no additional clamping beyond the codec's built-in
- * algorithm.
- *
- * For example, to allocate no more than 4.5 frames worth of bitrate
- * to a keyframe, set this to 450.
- */
- AOME_SET_MAX_INTRA_BITRATE_PCT,
-
- /*!\brief Codec control function to set number of spatial layers.
- */
- AOME_SET_NUMBER_SPATIAL_LAYERS,
-
- /*!\brief Codec control function to set max data rate for Inter frames.
- *
- * This value controls additional clamping on the maximum size of an
- * inter frame. It is expressed as a percentage of the average
- * per-frame bitrate, with the special (and default) value 0 meaning
- * unlimited, or no additional clamping beyond the codec's built-in
- * algorithm.
- *
- * For example, to allow no more than 4.5 frames worth of bitrate
- * to an inter frame, set this to 450.
- */
- AV1E_SET_MAX_INTER_BITRATE_PCT = AOME_SET_MAX_INTRA_BITRATE_PCT + 2,
-
- /*!\brief Boost percentage for Golden Frame in CBR mode.
- *
- * This value controls the amount of boost given to Golden Frame in
- * CBR mode. It is expressed as a percentage of the average
- * per-frame bitrate, with the special (and default) value 0 meaning
- * the feature is off, i.e., no golden frame boost in CBR mode and
- * average bitrate target is used.
- *
- * For example, to allow 100% more bits, i.e, 2X, in a golden frame
- * than average frame, set this to 100.
- */
- AV1E_SET_GF_CBR_BOOST_PCT,
-
- /*!\brief Codec control function to set lossless encoding mode.
- *
- * AV1 can operate in lossless encoding mode, in which the bitstream
- * produced will be able to decode and reconstruct a perfect copy of
- * input source. This control function provides a mean to switch encoder
- * into lossless coding mode(1) or normal coding mode(0) that may be lossy.
- * 0 = lossy coding mode
- * 1 = lossless coding mode
- *
- * By default, encoder operates in normal coding mode (maybe lossy).
- */
- AV1E_SET_LOSSLESS = AV1E_SET_GF_CBR_BOOST_PCT + 2,
-
- /** control function to enable the row based multi-threading of encoder. A
- * value that is equal to 1 indicates that row based multi-threading is
- * enabled.
- */
- AV1E_SET_ROW_MT,
-
- /*!\brief Codec control function to set number of tile columns.
- *
- * In encoding and decoding, AV1 allows an input image frame be partitioned
- * into separate vertical tile columns, which can be encoded or decoded
- * independently. This enables easy implementation of parallel encoding and
- * decoding. The parameter for this control describes the number of tile
- * columns (in log2 units), which has a valid range of [0, 6]:
- * 0 = 1 tile column
- * 1 = 2 tile columns
- * 2 = 4 tile columns
- * .....
- * n = 2**n tile columns
- * The requested tile columns will be capped by encoder based on image size
- * limitation (The minimum width of a tile column is 256 pixel, the maximum
- * is 4096).
- *
- * By default, the value is 0, i.e. one single column tile for entire image.
- */
- AV1E_SET_TILE_COLUMNS,
-
- /*!\brief Codec control function to set number of tile rows.
- *
- * In encoding and decoding, AV1 allows an input image frame be partitioned
- * into separate horizontal tile rows, which can be encoded or decoded
- * independently. The parameter for this control describes the number of tile
- * rows (in log2 units), which has a valid range of [0, 6]:
- * 0 = 1 tile row
- * 1 = 2 tile rows
- * 2 = 4 tile rows
- * .....
- * n = 2**n tile rows
- *
- * By default, the value is 0, i.e. one single row tile for entire image.
- */
- AV1E_SET_TILE_ROWS,
-
- /*!\brief Codec control function to enable frame parallel decoding feature.
- *
- * AV1 has a bitstream feature to reduce decoding dependency between frames
- * by turning off backward update of probability context used in encoding
- * and decoding. This allows staged parallel processing of more than one
- * video frames in the decoder. This control function provides a mean to
- * turn this feature on or off for bitstreams produced by encoder.
- *
- * By default, this feature is off.
- */
- AV1E_SET_FRAME_PARALLEL_DECODING,
-
- /*!\brief Codec control function to enable error_resilient_mode
- *
- * AV1 has a bitstream feature to guarantee parseability of a frame
- * by turning on the error_resilient_decoding mode, even though the
- * reference buffers are unreliable or not received.
- *
- * By default, this feature is off.
- */
- AV1E_SET_ERROR_RESILIENT_MODE,
-
- /*!\brief Codec control function to enable s_frame_mode
- *
- * AV1 has a bitstream feature to designate certain frames as S-frames,
- * from where we can switch to a different stream,
- * even though the reference buffers may not be exactly identical.
- *
- * By default, this feature is off.
- */
- AV1E_SET_S_FRAME_MODE,
-
- /*!\brief Codec control function to set adaptive quantization mode.
- *
- * AV1 has a segment based feature that allows encoder to adaptively change
- * quantization parameter for each segment within a frame to improve the
- * subjective quality. This control makes encoder operate in one of the
- * several AQ_modes supported.
- *
- * By default, encoder operates with AQ_Mode 0(adaptive quantization off).
- */
- AV1E_SET_AQ_MODE,
-
- /*!\brief Codec control function to enable/disable periodic Q boost.
- *
- * One AV1 encoder speed feature is to enable quality boost by lowering
- * frame level Q periodically. This control function provides a mean to
- * turn on/off this feature.
- * 0 = off
- * 1 = on
- *
- * By default, the encoder is allowed to use this feature for appropriate
- * encoding modes.
- */
- AV1E_SET_FRAME_PERIODIC_BOOST,
-
- /*!\brief Codec control function to set noise sensitivity.
- *
- * 0: off, 1: On(YOnly)
- */
- AV1E_SET_NOISE_SENSITIVITY,
-
- /*!\brief Codec control function to set content type.
- * \note Valid parameter range:
- * AOM_CONTENT_DEFAULT = Regular video content (Default)
- * AOM_CONTENT_SCREEN = Screen capture content
- */
- AV1E_SET_TUNE_CONTENT,
-
- /*!\brief Codec control function to set CDF update mode.
- *
- * 0: no update 1: update on every frame
- * 2: selectively update
- */
- AV1E_SET_CDF_UPDATE_MODE,
-
- /*!\brief Codec control function to set color space info.
- * \note Valid ranges: 0..23, default is "Unspecified".
- * 0 = For future use
- * 1 = BT.709
- * 2 = Unspecified
- * 3 = For future use
- * 4 = BT.470 System M (historical)
- * 5 = BT.470 System B, G (historical)
- * 6 = BT.601
- * 7 = SMPTE 240
- * 8 = Generic film (color filters using illuminant C)
- * 9 = BT.2020, BT.2100
- * 10 = SMPTE 428 (CIE 1921 XYZ)
- * 11 = SMPTE RP 431-2
- * 12 = SMPTE EG 432-1
- * 13 = For future use (values 13 - 21)
- * 22 = EBU Tech. 3213-E
- * 23 = For future use
- *
- */
- AV1E_SET_COLOR_PRIMARIES,
-
- /*!\brief Codec control function to set transfer function info.
- * \note Valid ranges: 0..19, default is "Unspecified".
- * 0 = For future use
- * 1 = BT.709
- * 2 = Unspecified
- * 3 = For future use
- * 4 = BT.470 System M (historical)
- * 5 = BT.470 System B, G (historical)
- * 6 = BT.601
- * 7 = SMPTE 240 M
- * 8 = Linear
- * 9 = Logarithmic (100 : 1 range)
- * 10 = Logarithmic (100 * Sqrt(10) : 1 range)
- * 11 = IEC 61966-2-4
- * 12 = BT.1361
- * 13 = sRGB or sYCC
- * 14 = BT.2020 10-bit systems
- * 15 = BT.2020 12-bit systems
- * 16 = SMPTE ST 2084, ITU BT.2100 PQ
- * 17 = SMPTE ST 428
- * 18 = BT.2100 HLG, ARIB STD-B67
- * 19 = For future use
- *
- */
- AV1E_SET_TRANSFER_CHARACTERISTICS,
-
- /*!\brief Codec control function to set transfer function info.
- * \note Valid ranges: 0..15, default is "Unspecified".
- * 0 = Identity matrix
- * 1 = BT.709
- * 2 = Unspecified
- * 3 = For future use
- * 4 = US FCC 73.628
- * 5 = BT.470 System B, G (historical)
- * 6 = BT.601
- * 7 = SMPTE 240 M
- * 8 = YCgCo
- * 9 = BT.2020 non-constant luminance, BT.2100 YCbCr
- * 10 = BT.2020 constant luminance
- * 11 = SMPTE ST 2085 YDzDx
- * 12 = Chromaticity-derived non-constant luminance
- * 13 = Chromaticity-derived constant luminance
- * 14 = BT.2100 ICtCp
- * 15 = For future use
- *
- */
- AV1E_SET_MATRIX_COEFFICIENTS,
-
- /*!\brief Codec control function to set chroma 4:2:0 sample position info.
- * \note Valid ranges: 0..3, default is "UNKNOWN".
- * 0 = UNKNOWN,
- * 1 = VERTICAL
- * 2 = COLOCATED
- * 3 = RESERVED
- */
- AV1E_SET_CHROMA_SAMPLE_POSITION,
-
- /*!\brief Codec control function to set minimum interval between GF/ARF frames
- *
- * By default the value is set as 4.
- */
- AV1E_SET_MIN_GF_INTERVAL,
-
- /*!\brief Codec control function to set minimum interval between GF/ARF frames
- *
- * By default the value is set as 16.
- */
- AV1E_SET_MAX_GF_INTERVAL,
-
- /*!\brief Codec control function to get an Active map back from the encoder.
- */
- AV1E_GET_ACTIVEMAP,
-
- /*!\brief Codec control function to set color range bit.
- * \note Valid ranges: 0..1, default is 0
- * 0 = Limited range (16..235 or HBD equivalent)
- * 1 = Full range (0..255 or HBD equivalent)
- */
- AV1E_SET_COLOR_RANGE,
-
- /*!\brief Codec control function to set intended rendering image size.
- *
- * By default, this is identical to the image size in pixels.
- */
- AV1E_SET_RENDER_SIZE,
-
- /*!\brief Codec control function to set target level.
- *
- * 255: off (default); 0: only keep level stats; 10: target for level 1.0;
- * 11: target for level 1.1; ... 62: target for level 6.2
- */
- AV1E_SET_TARGET_LEVEL,
-
- /*!\brief Codec control function to get bitstream level.
- */
- AV1E_GET_LEVEL,
-
- /*!\brief Codec control function to set intended superblock size.
- *
- * By default, the superblock size is determined separately for each
- * frame by the encoder.
- *
- * Experiment: EXT_PARTITION
- */
- AV1E_SET_SUPERBLOCK_SIZE,
-
- /*!\brief Codec control function to enable automatic set and use
- * bwd-pred frames.
- *
- */
- AOME_SET_ENABLEAUTOBWDREF,
-
- /*!\brief Codec control function to encode with CDEF.
- *
- * CDEF is the constrained directional enhancement filter which is an
- * in-loop filter aiming to remove coding artifacts
- * 0 = do not apply CDEF
- * 1 = apply CDEF
- *
- * By default, the encoder applies CDEF.
- *
- * Experiment: AOM_CDEF
- */
- AV1E_SET_ENABLE_CDEF,
-
- /*!\brief Codec control function to encode with Loop Restoration Filter.
- *
- * 0 = do not apply Restoration Filter
- * 1 = apply Restoration Filter
- *
- * By default, the encoder applies Restoration Filter.
- *
- */
- AV1E_SET_ENABLE_RESTORATION,
-
- /*!\brief Codec control function to encode without trellis quantization.
- *
- * 0 = apply trellis quantization
- * 1 = do not apply trellis quantization
- *
- * By default, the encoder applies trellis optimization on quantized
- * coefficients.
- *
- */
- AV1E_SET_DISABLE_TRELLIS_QUANT,
-
- /*!\brief Codec control function to encode with quantisation matrices.
- *
- * AOM can operate with default quantisation matrices dependent on
- * quantisation level and block type.
- * 0 = do not use quantisation matrices
- * 1 = use quantisation matrices
- *
- * By default, the encoder operates without quantisation matrices.
- *
- * Experiment: AOM_QM
- */
-
- AV1E_SET_ENABLE_QM,
-
- /*!\brief Codec control function to set the min quant matrix flatness.
- *
- * AOM can operate with different ranges of quantisation matrices.
- * As quantisation levels increase, the matrices get flatter. This
- * control sets the minimum level of flatness from which the matrices
- * are determined.
- *
- * By default, the encoder sets this minimum at half the available
- * range.
- *
- * Experiment: AOM_QM
- */
- AV1E_SET_QM_MIN,
-
- /*!\brief Codec control function to set the max quant matrix flatness.
- *
- * AOM can operate with different ranges of quantisation matrices.
- * As quantisation levels increase, the matrices get flatter. This
- * control sets the maximum level of flatness possible.
- *
- * By default, the encoder sets this maximum at the top of the
- * available range.
- *
- * Experiment: AOM_QM
- */
- AV1E_SET_QM_MAX,
-
- /*!\brief Codec control function to set the min quant matrix flatness.
- *
- * AOM can operate with different ranges of quantisation matrices.
- * As quantisation levels increase, the matrices get flatter. This
- * control sets the flatness for luma (Y).
- *
- * By default, the encoder sets this minimum at half the available
- * range.
- *
- * Experiment: AOM_QM
- */
- AV1E_SET_QM_Y,
-
- /*!\brief Codec control function to set the min quant matrix flatness.
- *
- * AOM can operate with different ranges of quantisation matrices.
- * As quantisation levels increase, the matrices get flatter. This
- * control sets the flatness for chroma (U).
- *
- * By default, the encoder sets this minimum at half the available
- * range.
- *
- * Experiment: AOM_QM
- */
- AV1E_SET_QM_U,
-
- /*!\brief Codec control function to set the min quant matrix flatness.
- *
- * AOM can operate with different ranges of quantisation matrices.
- * As quantisation levels increase, the matrices get flatter. This
- * control sets the flatness for chrome (V).
- *
- * By default, the encoder sets this minimum at half the available
- * range.
- *
- * Experiment: AOM_QM
- */
- AV1E_SET_QM_V,
-
- /*!\brief Codec control function to encode with dist_8x8.
- *
- * The dist_8x8 is enabled automatically for model tuning parameters that
- * require measuring distortion at the 8x8 level. This control also allows
- * measuring distortion at the 8x8 level for other tuning options
- * (e.g., PSNR), for testing purposes.
- * 0 = do not use dist_8x8
- * 1 = use dist_8x8
- *
- * By default, the encoder does not use dist_8x8
- *
- * Experiment: DIST_8X8
- */
- AV1E_SET_ENABLE_DIST_8X8,
-
- /*!\brief Codec control function to set a maximum number of tile groups.
- *
- * This will set the maximum number of tile groups. This will be
- * overridden if an MTU size is set. The default value is 1.
- *
- * Experiment: TILE_GROUPS
- */
- AV1E_SET_NUM_TG,
-
- /*!\brief Codec control function to set an MTU size for a tile group.
- *
- * This will set the maximum number of bytes in a tile group. This can be
- * exceeded only if a single tile is larger than this amount.
- *
- * By default, the value is 0, in which case a fixed number of tile groups
- * is used.
- *
- * Experiment: TILE_GROUPS
- */
- AV1E_SET_MTU,
-
- /*!\brief Codec control function to set dependent_horz_tiles.
- *
- * In encoding and decoding, AV1 allows enabling dependent horizontal tile
- * The parameter for this control describes the value of this flag,
- * which has a valid range [0, 1]:
- * 0 = disable dependent horizontal tile
- * 1 = enable dependent horizontal tile,
- *
- * By default, the value is 0, i.e. disable dependent horizontal tile.
- */
- AV1E_SET_TILE_DEPENDENT_ROWS,
-
- /*!\brief Codec control function to set the number of symbols in an ANS data
- * window.
- *
- * The number of ANS symbols (both boolean and non-booleans alphabets) in an
- * ANS data window is set to 1 << value.
- *
- * \note Valid range: [8, 23]
- *
- * Experiment: ANS
- */
- AV1E_SET_ANS_WINDOW_SIZE_LOG2,
-
- /*!\brief Codec control function to turn on / off dual filter
- * enabling/disabling.
- *
- * This will enable or disable dual filter. The default value is 1
- *
- */
- AV1E_SET_ENABLE_DF,
-
- /*!\brief Codec control function to turn on / off frame order hint for a
- * few tools:
- *
- * joint compound mode
- * motion field motion vector
- * ref frame sign bias
- *
- * The default value is 1.
- *
- */
- AV1E_SET_ENABLE_ORDER_HINT,
-
- /*!\brief Codec control function to turn on / off joint compound mode
- * at sequence level.
- *
- * This will enable or disable joint compound mode. The default value is 1.
- * If AV1E_SET_ENABLE_ORDER_HINT is 0, then this flag is forced to 0.
- *
- */
- AV1E_SET_ENABLE_JNT_COMP,
-
- /*!\brief Codec control function to turn on / off ref frame mvs (mfmv) usage
- * at sequence level.
- *
- * This will enable or disable usage of MFMV. The default value is 1.
- * If AV1E_SET_ENABLE_ORDER_HINT is 0, then this flag is forced to 0.
- *
- */
- AV1E_SET_ENABLE_REF_FRAME_MVS,
-
- /*!\brief Codec control function to set temporal mv prediction
- * enabling/disabling at frame level.
- *
- * This will enable or disable temporal mv predicton. The default value is 1.
- * If AV1E_SET_ENABLE_REF_FRAME_MVS is 0, then this flag is forced to 0.
- *
- */
- AV1E_SET_ALLOW_REF_FRAME_MVS,
-
- /*!\brief Codec control function to turn on / off warped motion usage
- * at sequence level.
- *
- * This will enable or disable usage of warped motion. The default value is 1.
- *
- */
- AV1E_SET_ENABLE_WARPED_MOTION,
-
- /*!\brief Codec control function to turn on / off warped motion usage
- * at frame level.
- *
- * This will enable or disable usage of warped motion. The default value is 1.
- * If AV1E_SET_ENABLE_WARPED_MOTION is 0, then this flag is forced to 0.
- *
- */
- AV1E_SET_ALLOW_WARPED_MOTION,
-
- /*!\brief Codec control function to turn on / off frame superresolution.
- *
- * This will enable or disable frame superresolution. The default value is 1
- * If AV1E_SET_ENABLE_SUPERRES is 0, then this flag is forced to 0.
- */
- AV1E_SET_ENABLE_SUPERRES,
-
- /*!\brief Codec control function to set loop_filter_across_tiles_v_enabled
- * and loop_filter_across_tiles_h_enabled.
- * In encoding and decoding, AV1 allows disabling loop filter across tile
- * boundary The parameter for this control describes the value of this flag,
- * which has a valid range [0, 1]:
- * 0 = disable loop filter across tile boundary
- * 1 = enable loop filter across tile boundary
- *
- * By default, the value is 1, i.e. enable loop filter across tile boundary.
- *
- * Experiment: LOOPFILTERING_ACROSS_TILES_EXT
- */
- AV1E_SET_TILE_LOOPFILTER_V,
- AV1E_SET_TILE_LOOPFILTER_H,
-
- /*!\brief Codec control function to set loop_filter_across_tiles_enabled.
- *
- * In encoding and decoding, AV1 allows disabling loop filter across tile
- * boundary The parameter for this control describes the value of this flag,
- * which has a valid range [0, 1]:
- * 0 = disable loop filter across tile boundary
- * 1 = enable loop filter across tile boundary
- *
- * By default, the value is 1, i.e. enable loop filter across tile boundary.
- *
- * Experiment: LOOPFILTERING_ACROSS_TILES
- */
- AV1E_SET_TILE_LOOPFILTER,
-
- /*!\brief Codec control function to set the delta q mode
- *
- * AV1 has a segment based feature that allows encoder to adaptively change
- * quantization parameter for each segment within a frame to improve the
- * subjective quality. the delta q mode is added on top of segment based
- * feature, and allows control per 64x64 q and lf delta.This control makes
- * encoder operate in one of the several DELTA_Q_modes supported.
- *
- * By default, encoder operates with DELTAQ_Mode 0(deltaq signaling off).
- */
- AV1E_SET_DELTAQ_MODE,
-
- /*!\brief Codec control function to set the single tile decoding mode to 0 or
- * 1.
- *
- * 0 means that the single tile decoding is off, and 1 means that the single
- * tile decoding is on.
- *
- * Experiment: EXT_TILE
- */
- AV1E_SET_SINGLE_TILE_DECODING,
-
- /*!\brief Codec control function to enable the extreme motion vector unit test
- * in AV1. Please note that this is only used in motion vector unit test.
- *
- * 0 : off, 1 : MAX_EXTREME_MV, 2 : MIN_EXTREME_MV
- */
- AV1E_ENABLE_MOTION_VECTOR_UNIT_TEST,
-
- /*!\brief Codec control function to signal picture timing info in the
- * bitstream. \note Valid ranges: 0..1, default is "UNKNOWN". 0 = UNKNOWN, 1 =
- * EQUAL
- */
- AV1E_SET_TIMING_INFO_TYPE,
-
- /*!\brief Codec control function to add film grain parameters (one of several
- * preset types) info in the bitstream.
- * \note Valid ranges: 0..11, default is "0". 0 = UNKNOWN,
- * 1..16 = different test vectors for grain
- */
- AV1E_SET_FILM_GRAIN_TEST_VECTOR,
-
- /*!\brief Codec control function to set the path to the film grain parameters
- */
- AV1E_SET_FILM_GRAIN_TABLE,
-
- /*!\brief Sets the noise level */
- AV1E_SET_DENOISE_NOISE_LEVEL,
-
- /*!\brief Sets the denoisers block size */
- AV1E_SET_DENOISE_BLOCK_SIZE,
-
- /*!\brief Sets the chroma subsampling x value */
- AV1E_SET_CHROMA_SUBSAMPLING_X,
-
- /*!\brief Sets the chroma subsampling y value */
- AV1E_SET_CHROMA_SUBSAMPLING_Y,
-};
-
-/*!\brief aom 1-D scaling mode
- *
- * This set of constants define 1-D aom scaling modes
- */
-typedef enum aom_scaling_mode_1d {
- AOME_NORMAL = 0,
- AOME_FOURFIVE = 1,
- AOME_THREEFIVE = 2,
- AOME_ONETWO = 3
-} AOM_SCALING_MODE;
-
-/*!\brief Max number of segments
- *
- * This is the limit of number of segments allowed within a frame.
- *
- * Currently same as "MAX_SEGMENTS" in AV1, the maximum that AV1 supports.
- *
- */
-#define AOM_MAX_SEGMENTS 8
-
-/*!\brief aom region of interest map
- *
- * These defines the data structures for the region of interest map
- *
- * TODO(yaowu): create a unit test for ROI map related APIs
- *
- */
-typedef struct aom_roi_map {
- /*! An id between 0 and 7 for each 8x8 region within a frame. */
- unsigned char *roi_map;
- unsigned int rows; /**< Number of rows. */
- unsigned int cols; /**< Number of columns. */
- int delta_q[AOM_MAX_SEGMENTS]; /**< Quantizer deltas. */
- int delta_lf[AOM_MAX_SEGMENTS]; /**< Loop filter deltas. */
- /*! Static breakout threshold for each segment. */
- unsigned int static_threshold[AOM_MAX_SEGMENTS];
-} aom_roi_map_t;
-
-/*!\brief aom active region map
- *
- * These defines the data structures for active region map
- *
- */
-
-typedef struct aom_active_map {
- /*!\brief specify an on (1) or off (0) each 16x16 region within a frame */
- unsigned char *active_map;
- unsigned int rows; /**< number of rows */
- unsigned int cols; /**< number of cols */
-} aom_active_map_t;
-
-/*!\brief aom image scaling mode
- *
- * This defines the data structure for image scaling mode
- *
- */
-typedef struct aom_scaling_mode {
- AOM_SCALING_MODE h_scaling_mode; /**< horizontal scaling mode */
- AOM_SCALING_MODE v_scaling_mode; /**< vertical scaling mode */
-} aom_scaling_mode_t;
-
-/*!brief AV1 encoder content type */
-typedef enum {
- AOM_CONTENT_DEFAULT,
- AOM_CONTENT_SCREEN,
- AOM_CONTENT_INVALID
-} aom_tune_content;
-
-/*!brief AV1 encoder timing info type signaling */
-typedef enum {
- AOM_TIMING_UNSPECIFIED,
- AOM_TIMING_EQUAL,
- AOM_TIMING_DEC_MODEL
-} aom_timing_info_type_t;
-
-/*!\brief Model tuning parameters
- *
- * Changes the encoder to tune for certain types of input material.
- *
- */
-typedef enum {
- AOM_TUNE_PSNR,
- AOM_TUNE_SSIM,
- AOM_TUNE_CDEF_DIST,
- AOM_TUNE_DAALA_DIST
-} aom_tune_metric;
-
-/*!\cond */
-/*!\brief Encoder control function parameter type
- *
- * Defines the data types that AOME/AV1E control functions take. Note that
- * additional common controls are defined in aom.h
- *
- */
-
-AOM_CTRL_USE_TYPE(AOME_USE_REFERENCE, int)
-#define AOM_CTRL_AOME_USE_REFERENCE
-AOM_CTRL_USE_TYPE(AOME_SET_ROI_MAP, aom_roi_map_t *)
-#define AOM_CTRL_AOME_SET_ROI_MAP
-AOM_CTRL_USE_TYPE(AOME_SET_ACTIVEMAP, aom_active_map_t *)
-#define AOM_CTRL_AOME_SET_ACTIVEMAP
-AOM_CTRL_USE_TYPE(AOME_SET_SCALEMODE, aom_scaling_mode_t *)
-#define AOM_CTRL_AOME_SET_SCALEMODE
-
-AOM_CTRL_USE_TYPE(AOME_SET_SPATIAL_LAYER_ID, int)
-#define AOM_CTRL_AOME_SET_SPATIAL_LAYER_ID
-
-AOM_CTRL_USE_TYPE(AOME_SET_CPUUSED, int)
-#define AOM_CTRL_AOME_SET_CPUUSED
-AOM_CTRL_USE_TYPE(AOME_SET_DEVSF, int)
-#define AOM_CTRL_AOME_SET_DEVSF
-AOM_CTRL_USE_TYPE(AOME_SET_ENABLEAUTOALTREF, unsigned int)
-#define AOM_CTRL_AOME_SET_ENABLEAUTOALTREF
-
-AOM_CTRL_USE_TYPE(AOME_SET_ENABLEAUTOBWDREF, unsigned int)
-#define AOM_CTRL_AOME_SET_ENABLEAUTOBWDREF
-
-AOM_CTRL_USE_TYPE(AOME_SET_SHARPNESS, unsigned int)
-#define AOM_CTRL_AOME_SET_SHARPNESS
-AOM_CTRL_USE_TYPE(AOME_SET_STATIC_THRESHOLD, unsigned int)
-#define AOM_CTRL_AOME_SET_STATIC_THRESHOLD
-
-AOM_CTRL_USE_TYPE(AOME_SET_ARNR_MAXFRAMES, unsigned int)
-#define AOM_CTRL_AOME_SET_ARNR_MAXFRAMES
-AOM_CTRL_USE_TYPE(AOME_SET_ARNR_STRENGTH, unsigned int)
-#define AOM_CTRL_AOME_SET_ARNR_STRENGTH
-AOM_CTRL_USE_TYPE(AOME_SET_TUNING, int) /* aom_tune_metric */
-#define AOM_CTRL_AOME_SET_TUNING
-AOM_CTRL_USE_TYPE(AOME_SET_CQ_LEVEL, unsigned int)
-#define AOM_CTRL_AOME_SET_CQ_LEVEL
-
-AOM_CTRL_USE_TYPE(AV1E_SET_ROW_MT, int)
-#define AOM_CTRL_AV1E_SET_ROW_MT
-
-AOM_CTRL_USE_TYPE(AV1E_SET_TILE_COLUMNS, int)
-#define AOM_CTRL_AV1E_SET_TILE_COLUMNS
-AOM_CTRL_USE_TYPE(AV1E_SET_TILE_ROWS, int)
-#define AOM_CTRL_AV1E_SET_TILE_ROWS
-
-AOM_CTRL_USE_TYPE(AV1E_SET_TILE_DEPENDENT_ROWS, int)
-#define AOM_CTRL_AV1E_SET_TILE_DEPENDENT_ROWS
-
-AOM_CTRL_USE_TYPE(AV1E_SET_TILE_LOOPFILTER_V, int)
-#define AOM_CTRL_AV1E_SET_TILE_LOOPFILTER_V
-AOM_CTRL_USE_TYPE(AV1E_SET_TILE_LOOPFILTER_H, int)
-#define AOM_CTRL_AV1E_SET_TILE_LOOPFILTER_H
-AOM_CTRL_USE_TYPE(AV1E_SET_TILE_LOOPFILTER, int)
-#define AOM_CTRL_AV1E_SET_TILE_LOOPFILTER
-
-AOM_CTRL_USE_TYPE(AOME_GET_LAST_QUANTIZER, int *)
-#define AOM_CTRL_AOME_GET_LAST_QUANTIZER
-AOM_CTRL_USE_TYPE(AOME_GET_LAST_QUANTIZER_64, int *)
-#define AOM_CTRL_AOME_GET_LAST_QUANTIZER_64
-
-AOM_CTRL_USE_TYPE(AOME_SET_MAX_INTRA_BITRATE_PCT, unsigned int)
-#define AOM_CTRL_AOME_SET_MAX_INTRA_BITRATE_PCT
-AOM_CTRL_USE_TYPE(AOME_SET_MAX_INTER_BITRATE_PCT, unsigned int)
-#define AOM_CTRL_AOME_SET_MAX_INTER_BITRATE_PCT
-
-AOM_CTRL_USE_TYPE(AOME_SET_NUMBER_SPATIAL_LAYERS, int)
-#define AOME_CTRL_AOME_SET_NUMBER_SPATIAL_LAYERS
-
-AOM_CTRL_USE_TYPE(AV1E_SET_GF_CBR_BOOST_PCT, unsigned int)
-#define AOM_CTRL_AV1E_SET_GF_CBR_BOOST_PCT
-
-AOM_CTRL_USE_TYPE(AV1E_SET_LOSSLESS, unsigned int)
-#define AOM_CTRL_AV1E_SET_LOSSLESS
-
-AOM_CTRL_USE_TYPE(AV1E_SET_ENABLE_CDEF, unsigned int)
-#define AOM_CTRL_AV1E_SET_ENABLE_CDEF
-
-AOM_CTRL_USE_TYPE(AV1E_SET_ENABLE_RESTORATION, unsigned int)
-#define AOM_CTRL_AV1E_SET_ENABLE_RESTORATION
-
-AOM_CTRL_USE_TYPE(AV1E_SET_DISABLE_TRELLIS_QUANT, unsigned int)
-#define AOM_CTRL_AV1E_SET_DISABLE_TRELLIS_QUANT
-
-AOM_CTRL_USE_TYPE(AV1E_SET_ENABLE_QM, unsigned int)
-#define AOM_CTRL_AV1E_SET_ENABLE_QM
-
-AOM_CTRL_USE_TYPE(AV1E_SET_ENABLE_DIST_8X8, unsigned int)
-#define AOM_CTRL_AV1E_SET_ENABLE_DIST_8X8
-
-AOM_CTRL_USE_TYPE(AV1E_SET_QM_MIN, unsigned int)
-#define AOM_CTRL_AV1E_SET_QM_MIN
-
-AOM_CTRL_USE_TYPE(AV1E_SET_QM_MAX, unsigned int)
-#define AOM_CTRL_AV1E_SET_QM_MAX
-
-AOM_CTRL_USE_TYPE(AV1E_SET_QM_Y, unsigned int)
-#define AOM_CTRL_AV1E_SET_QM_Y
-
-AOM_CTRL_USE_TYPE(AV1E_SET_QM_U, unsigned int)
-#define AOM_CTRL_AV1E_SET_QM_U
-
-AOM_CTRL_USE_TYPE(AV1E_SET_QM_V, unsigned int)
-#define AOM_CTRL_AV1E_SET_QM_V
-
-AOM_CTRL_USE_TYPE(AV1E_SET_NUM_TG, unsigned int)
-#define AOM_CTRL_AV1E_SET_NUM_TG
-AOM_CTRL_USE_TYPE(AV1E_SET_MTU, unsigned int)
-#define AOM_CTRL_AV1E_SET_MTU
-
-AOM_CTRL_USE_TYPE(AV1E_SET_TIMING_INFO_TYPE, aom_timing_info_type_t)
-#define AOM_CTRL_AV1E_SET_TIMING_INFO_TYPE
-
-AOM_CTRL_USE_TYPE(AV1E_SET_ENABLE_DF, unsigned int)
-#define AOM_CTRL_AV1E_SET_ENABLE_DF
-
-AOM_CTRL_USE_TYPE(AV1E_SET_ENABLE_ORDER_HINT, unsigned int)
-#define AOM_CTRL_AV1E_SET_ENABLE_ORDER_HINT
-
-AOM_CTRL_USE_TYPE(AV1E_SET_ENABLE_JNT_COMP, unsigned int)
-#define AOM_CTRL_AV1E_SET_ENABLE_JNT_COMP
-
-AOM_CTRL_USE_TYPE(AV1E_SET_ENABLE_REF_FRAME_MVS, unsigned int)
-#define AOM_CTRL_AV1E_SET_ENABLE_REF_FRAME_MVS
-
-AOM_CTRL_USE_TYPE(AV1E_SET_ALLOW_REF_FRAME_MVS, unsigned int)
-#define AOM_CTRL_AV1E_SET_ALLOW_REF_FRAME_MVS
-
-AOM_CTRL_USE_TYPE(AV1E_SET_ENABLE_WARPED_MOTION, unsigned int)
-#define AOM_CTRL_AV1E_SET_ENABLE_WARPED_MOTION
-
-AOM_CTRL_USE_TYPE(AV1E_SET_ALLOW_WARPED_MOTION, unsigned int)
-#define AOM_CTRL_AV1E_SET_ALLOW_WARPED_MOTION
-
-AOM_CTRL_USE_TYPE(AV1E_SET_ENABLE_SUPERRES, unsigned int)
-#define AOM_CTRL_AV1E_SET_ENABLE_SUPERRES
-
-AOM_CTRL_USE_TYPE(AV1E_SET_FRAME_PARALLEL_DECODING, unsigned int)
-#define AOM_CTRL_AV1E_SET_FRAME_PARALLEL_DECODING
-
-AOM_CTRL_USE_TYPE(AV1E_SET_ERROR_RESILIENT_MODE, unsigned int)
-#define AOM_CTRL_AV1E_SET_ERROR_RESILIENT_MODE
-
-AOM_CTRL_USE_TYPE(AV1E_SET_S_FRAME_MODE, unsigned int)
-#define AOM_CTRL_AV1E_SET_S_FRAME_MODE
-
-AOM_CTRL_USE_TYPE(AV1E_SET_AQ_MODE, unsigned int)
-#define AOM_CTRL_AV1E_SET_AQ_MODE
-
-AOM_CTRL_USE_TYPE(AV1E_SET_DELTAQ_MODE, unsigned int)
-#define AOM_CTRL_AV1E_SET_DELTAQ_MODE
-
-AOM_CTRL_USE_TYPE(AV1E_SET_FRAME_PERIODIC_BOOST, unsigned int)
-#define AOM_CTRL_AV1E_SET_FRAME_PERIODIC_BOOST
-
-AOM_CTRL_USE_TYPE(AV1E_SET_NOISE_SENSITIVITY, unsigned int)
-#define AOM_CTRL_AV1E_SET_NOISE_SENSITIVITY
-
-AOM_CTRL_USE_TYPE(AV1E_SET_TUNE_CONTENT, int) /* aom_tune_content */
-#define AOM_CTRL_AV1E_SET_TUNE_CONTENT
-
-AOM_CTRL_USE_TYPE(AV1E_SET_COLOR_PRIMARIES, int)
-#define AOM_CTRL_AV1E_SET_COLOR_PRIMARIES
-
-AOM_CTRL_USE_TYPE(AV1E_SET_TRANSFER_CHARACTERISTICS, int)
-#define AOM_CTRL_AV1E_SET_TRANSFER_CHARACTERISTICS
-
-AOM_CTRL_USE_TYPE(AV1E_SET_MATRIX_COEFFICIENTS, int)
-#define AOM_CTRL_AV1E_SET_MATRIX_COEFFICIENTS
-
-AOM_CTRL_USE_TYPE(AV1E_SET_CHROMA_SAMPLE_POSITION, int)
-#define AOM_CTRL_AV1E_SET_CHROMA_SAMPLE_POSITION
-
-AOM_CTRL_USE_TYPE(AV1E_SET_MIN_GF_INTERVAL, unsigned int)
-#define AOM_CTRL_AV1E_SET_MIN_GF_INTERVAL
-
-AOM_CTRL_USE_TYPE(AV1E_SET_MAX_GF_INTERVAL, unsigned int)
-#define AOM_CTRL_AV1E_SET_MAX_GF_INTERVAL
-
-AOM_CTRL_USE_TYPE(AV1E_GET_ACTIVEMAP, aom_active_map_t *)
-#define AOM_CTRL_AV1E_GET_ACTIVEMAP
-
-AOM_CTRL_USE_TYPE(AV1E_SET_COLOR_RANGE, int)
-#define AOM_CTRL_AV1E_SET_COLOR_RANGE
-
-#define AOM_CTRL_AV1E_SET_RENDER_SIZE
-AOM_CTRL_USE_TYPE(AV1E_SET_RENDER_SIZE, int *)
-
-AOM_CTRL_USE_TYPE(AV1E_SET_SUPERBLOCK_SIZE, unsigned int)
-#define AOM_CTRL_AV1E_SET_SUPERBLOCK_SIZE
-
-AOM_CTRL_USE_TYPE(AV1E_SET_TARGET_LEVEL, unsigned int)
-#define AOM_CTRL_AV1E_SET_TARGET_LEVEL
-
-AOM_CTRL_USE_TYPE(AV1E_GET_LEVEL, int *)
-#define AOM_CTRL_AV1E_GET_LEVEL
-
-AOM_CTRL_USE_TYPE(AV1E_SET_ANS_WINDOW_SIZE_LOG2, unsigned int)
-#define AOM_CTRL_AV1E_SET_ANS_WINDOW_SIZE_LOG2
-
-AOM_CTRL_USE_TYPE(AV1E_SET_SINGLE_TILE_DECODING, unsigned int)
-#define AOM_CTRL_AV1E_SET_SINGLE_TILE_DECODING
-
-AOM_CTRL_USE_TYPE(AV1E_ENABLE_MOTION_VECTOR_UNIT_TEST, unsigned int)
-#define AOM_CTRL_AV1E_ENABLE_MOTION_VECTOR_UNIT_TEST
-
-AOM_CTRL_USE_TYPE(AV1E_SET_FILM_GRAIN_TEST_VECTOR, unsigned int)
-#define AOM_CTRL_AV1E_SET_FILM_GRAIN_TEST_VECTOR
-
-AOM_CTRL_USE_TYPE(AV1E_SET_FILM_GRAIN_TABLE, const char *)
-#define AOM_CTRL_AV1E_SET_FILM_GRAIN_TABLE
-
-AOM_CTRL_USE_TYPE(AV1E_SET_CDF_UPDATE_MODE, int)
-#define AOM_CTRL_AV1E_SET_CDF_UPDATE_MODE
-
-#ifdef CONFIG_DENOISE
-AOM_CTRL_USE_TYPE(AV1E_SET_DENOISE_NOISE_LEVEL, int);
-#define AOM_CTRL_AV1E_SET_DENOISE_NOISE_LEVEL
-
-AOM_CTRL_USE_TYPE(AV1E_SET_DENOISE_BLOCK_SIZE, unsigned int);
-#define AOM_CTRL_AV1E_SET_DENOISE_BLOCK_SIZE
-#endif
-
-AOM_CTRL_USE_TYPE(AV1E_SET_CHROMA_SUBSAMPLING_X, unsigned int)
-#define AOM_CTRL_AV1E_SET_CHROMA_SUBSAMPLING_X
-
-AOM_CTRL_USE_TYPE(AV1E_SET_CHROMA_SUBSAMPLING_Y, unsigned int)
-#define AOM_CTRL_AV1E_SET_CHROMA_SUBSAMPLING_Y
-
-/*!\endcond */
-/*! @} - end defgroup aom_encoder */
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AOM_AOMCX_H_
diff --git a/third_party/aom/aom/aomdx.h b/third_party/aom/aom/aomdx.h
deleted file mode 100644
index 765856a1b..000000000
--- a/third_party/aom/aom/aomdx.h
+++ /dev/null
@@ -1,302 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-/*!\defgroup aom_decoder AOMedia AOM/AV1 Decoder
- * \ingroup aom
- *
- * @{
- */
-/*!\file
- * \brief Provides definitions for using AOM or AV1 within the aom Decoder
- * interface.
- */
-#ifndef AOM_AOM_AOMDX_H_
-#define AOM_AOM_AOMDX_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* Include controls common to both the encoder and decoder */
-#include "aom/aom.h"
-
-/*!\name Algorithm interface for AV1
- *
- * This interface provides the capability to decode AV1 streams.
- * @{
- */
-extern aom_codec_iface_t aom_codec_av1_dx_algo;
-extern aom_codec_iface_t *aom_codec_av1_dx(void);
-/*!@} - end algorithm interface member group*/
-
-/** Data structure that stores bit accounting for debug
- */
-typedef struct Accounting Accounting;
-
-#ifndef AOM_INSPECTION_H_
-/** Callback that inspects decoder frame data.
- */
-typedef void (*aom_inspect_cb)(void *decoder, void *ctx);
-#endif
-
-/*!\brief Structure to hold inspection callback and context.
- *
- * Defines a structure to hold the inspection callback function and calling
- * context.
- */
-typedef struct aom_inspect_init {
- /*! Inspection callback. */
- aom_inspect_cb inspect_cb;
-
- /*! Inspection context. */
- void *inspect_ctx;
-} aom_inspect_init;
-
-/*!\brief Structure to hold a tile's start address and size in the bitstream.
- *
- * Defines a structure to hold a tile's start address and size in the bitstream.
- */
-typedef struct aom_tile_data {
- /*! Tile data size. */
- size_t coded_tile_data_size;
- /*! Tile's start address. */
- const void *coded_tile_data;
- /*! Extra size information. */
- size_t extra_size;
-} aom_tile_data;
-
-/*!\brief Structure to hold the external reference frame pointer.
- *
- * Define a structure to hold the external reference frame pointer.
- */
-typedef struct av1_ext_ref_frame {
- /*! Start pointer of external references. */
- aom_image_t *img;
- /*! Number of available external references. */
- int num;
-} av1_ext_ref_frame_t;
-
-/*!\enum aom_dec_control_id
- * \brief AOM decoder control functions
- *
- * This set of macros define the control functions available for the AOM
- * decoder interface.
- *
- * \sa #aom_codec_control
- */
-enum aom_dec_control_id {
- /** control function to get info on which reference frames were updated
- * by the last decode
- */
- AOMD_GET_LAST_REF_UPDATES = AOM_DECODER_CTRL_ID_START,
-
- /** check if the indicated frame is corrupted */
- AOMD_GET_FRAME_CORRUPTED,
-
- /** control function to get info on which reference frames were used
- * by the last decode
- */
- AOMD_GET_LAST_REF_USED,
-
- /** control function to get the dimensions that the current frame is decoded
- * at. This may be different to the intended display size for the frame as
- * specified in the wrapper or frame header (see AV1D_GET_DISPLAY_SIZE). */
- AV1D_GET_FRAME_SIZE,
-
- /** control function to get the current frame's intended display dimensions
- * (as specified in the wrapper or frame header). This may be different to
- * the decoded dimensions of this frame (see AV1D_GET_FRAME_SIZE). */
- AV1D_GET_DISPLAY_SIZE,
-
- /** control function to get the bit depth of the stream. */
- AV1D_GET_BIT_DEPTH,
-
- /** control function to get the image format of the stream. */
- AV1D_GET_IMG_FORMAT,
-
- /** control function to get the size of the tile. */
- AV1D_GET_TILE_SIZE,
-
- /** control function to set the byte alignment of the planes in the reference
- * buffers. Valid values are power of 2, from 32 to 1024. A value of 0 sets
- * legacy alignment. I.e. Y plane is aligned to 32 bytes, U plane directly
- * follows Y plane, and V plane directly follows U plane. Default value is 0.
- */
- AV1_SET_BYTE_ALIGNMENT,
-
- /** control function to invert the decoding order to from right to left. The
- * function is used in a test to confirm the decoding independence of tile
- * columns. The function may be used in application where this order
- * of decoding is desired.
- *
- * TODO(yaowu): Rework the unit test that uses this control, and in a future
- * release, this test-only control shall be removed.
- */
- AV1_INVERT_TILE_DECODE_ORDER,
-
- /** control function to set the skip loop filter flag. Valid values are
- * integers. The decoder will skip the loop filter when its value is set to
- * nonzero. If the loop filter is skipped the decoder may accumulate decode
- * artifacts. The default value is 0.
- */
- AV1_SET_SKIP_LOOP_FILTER,
-
- /** control function to retrieve a pointer to the Accounting struct. When
- * compiled without --enable-accounting, this returns AOM_CODEC_INCAPABLE.
- * If called before a frame has been decoded, this returns AOM_CODEC_ERROR.
- * The caller should ensure that AOM_CODEC_OK is returned before attempting
- * to dereference the Accounting pointer.
- */
- AV1_GET_ACCOUNTING,
-
- /** control function to get last decoded frame quantizer. Returned value uses
- * internal quantizer scale defined by the codec.
- */
- AOMD_GET_LAST_QUANTIZER,
-
- /** control function to set the range of tile decoding. A value that is
- * greater and equal to zero indicates only the specific row/column is
- * decoded. A value that is -1 indicates the whole row/column is decoded.
- * A special case is both values are -1 that means the whole frame is
- * decoded.
- */
- AV1_SET_DECODE_TILE_ROW,
- AV1_SET_DECODE_TILE_COL,
- /** control function to set the tile coding mode. A value that is equal to
- * zero indicates the tiles are coded in normal tile mode. A value that is
- * 1 indicates the tiles are coded in large-scale tile mode.
- */
- AV1_SET_TILE_MODE,
- /** control function to get the frame header information of an encoded frame
- * in the bitstream. This provides a way to access a frame's header data.
- */
- AV1D_GET_FRAME_HEADER_INFO,
- /** control function to get the start address and size of a tile in the coded
- * bitstream. This provides a way to access a specific tile's bitstream data.
- */
- AV1D_GET_TILE_DATA,
- /** control function to set the external references' pointers in the decoder.
- * This is used while decoding the tile list OBU in large-scale tile coding
- * mode.
- */
- AV1D_SET_EXT_REF_PTR,
- /** control function to enable the ext-tile software debug and testing code in
- * the decoder.
- */
- AV1D_EXT_TILE_DEBUG,
-
- /** control function to enable the row based multi-threading of decoding. A
- * value that is equal to 1 indicates that row based multi-threading is
- * enabled.
- */
- AV1D_SET_ROW_MT,
-
- /** control function to indicate whether bitstream is in Annex-B format. */
- AV1D_SET_IS_ANNEXB,
-
- /** control function to indicate which operating point to use. A scalable
- * stream may define multiple operating points, each of which defines a
- * set of temporal and spatial layers to be processed. The operating point
- * index may take a value between 0 and operating_points_cnt_minus_1 (which
- * is at most 31).
- */
- AV1D_SET_OPERATING_POINT,
-
- /** control function to indicate whether to output one frame per temporal
- * unit (the default), or one frame per spatial layer.
- * In a scalable stream, each temporal unit corresponds to a single "frame"
- * of video, and within a temporal unit there may be multiple spatial layers
- * with different versions of that frame.
- * For video playback, only the highest-quality version (within the
- * selected operating point) is needed, but for some use cases it is useful
- * to have access to multiple versions of a frame when they are available.
- */
- AV1D_SET_OUTPUT_ALL_LAYERS,
-
- /** control function to set an aom_inspect_cb callback that is invoked each
- * time a frame is decoded. When compiled without --enable-inspection, this
- * returns AOM_CODEC_INCAPABLE.
- */
- AV1_SET_INSPECTION_CALLBACK,
-
- /** control function to set the skip film grain flag. Valid values are
- * integers. The decoder will skip the film grain when its value is set to
- * nonzero. The default value is 0.
- */
- AV1D_SET_SKIP_FILM_GRAIN,
-
- AOM_DECODER_CTRL_ID_MAX,
-};
-
-/*!\cond */
-/*!\brief AOM decoder control function parameter type
- *
- * Defines the data types that AOMD control functions take. Note that
- * additional common controls are defined in aom.h
- *
- */
-
-AOM_CTRL_USE_TYPE(AOMD_GET_LAST_REF_UPDATES, int *)
-#define AOM_CTRL_AOMD_GET_LAST_REF_UPDATES
-AOM_CTRL_USE_TYPE(AOMD_GET_FRAME_CORRUPTED, int *)
-#define AOM_CTRL_AOMD_GET_FRAME_CORRUPTED
-AOM_CTRL_USE_TYPE(AOMD_GET_LAST_REF_USED, int *)
-#define AOM_CTRL_AOMD_GET_LAST_REF_USED
-AOM_CTRL_USE_TYPE(AOMD_GET_LAST_QUANTIZER, int *)
-#define AOM_CTRL_AOMD_GET_LAST_QUANTIZER
-AOM_CTRL_USE_TYPE(AV1D_GET_DISPLAY_SIZE, int *)
-#define AOM_CTRL_AV1D_GET_DISPLAY_SIZE
-AOM_CTRL_USE_TYPE(AV1D_GET_BIT_DEPTH, unsigned int *)
-#define AOM_CTRL_AV1D_GET_BIT_DEPTH
-AOM_CTRL_USE_TYPE(AV1D_GET_IMG_FORMAT, aom_img_fmt_t *)
-#define AOM_CTRL_AV1D_GET_IMG_FORMAT
-AOM_CTRL_USE_TYPE(AV1D_GET_TILE_SIZE, unsigned int *)
-#define AOM_CTRL_AV1D_GET_TILE_SIZE
-AOM_CTRL_USE_TYPE(AV1D_GET_FRAME_SIZE, int *)
-#define AOM_CTRL_AV1D_GET_FRAME_SIZE
-AOM_CTRL_USE_TYPE(AV1_INVERT_TILE_DECODE_ORDER, int)
-#define AOM_CTRL_AV1_INVERT_TILE_DECODE_ORDER
-AOM_CTRL_USE_TYPE(AV1_GET_ACCOUNTING, Accounting **)
-#define AOM_CTRL_AV1_GET_ACCOUNTING
-AOM_CTRL_USE_TYPE(AV1_SET_DECODE_TILE_ROW, int)
-#define AOM_CTRL_AV1_SET_DECODE_TILE_ROW
-AOM_CTRL_USE_TYPE(AV1_SET_DECODE_TILE_COL, int)
-#define AOM_CTRL_AV1_SET_DECODE_TILE_COL
-AOM_CTRL_USE_TYPE(AV1_SET_TILE_MODE, unsigned int)
-#define AOM_CTRL_AV1_SET_TILE_MODE
-AOM_CTRL_USE_TYPE(AV1D_GET_FRAME_HEADER_INFO, aom_tile_data *)
-#define AOM_CTRL_AV1D_GET_FRAME_HEADER_INFO
-AOM_CTRL_USE_TYPE(AV1D_GET_TILE_DATA, aom_tile_data *)
-#define AOM_CTRL_AV1D_GET_TILE_DATA
-AOM_CTRL_USE_TYPE(AV1D_SET_EXT_REF_PTR, av1_ext_ref_frame_t *)
-#define AOM_CTRL_AV1D_SET_EXT_REF_PTR
-AOM_CTRL_USE_TYPE(AV1D_EXT_TILE_DEBUG, unsigned int)
-#define AOM_CTRL_AV1D_EXT_TILE_DEBUG
-AOM_CTRL_USE_TYPE(AV1D_SET_ROW_MT, unsigned int)
-#define AOM_CTRL_AV1D_SET_ROW_MT
-AOM_CTRL_USE_TYPE(AV1D_SET_SKIP_FILM_GRAIN, int)
-#define AOM_CTRL_AV1D_SET_SKIP_FILM_GRAIN
-AOM_CTRL_USE_TYPE(AV1D_SET_IS_ANNEXB, unsigned int)
-#define AOM_CTRL_AV1D_SET_IS_ANNEXB
-AOM_CTRL_USE_TYPE(AV1D_SET_OPERATING_POINT, int)
-#define AOM_CTRL_AV1D_SET_OPERATING_POINT
-AOM_CTRL_USE_TYPE(AV1D_SET_OUTPUT_ALL_LAYERS, int)
-#define AOM_CTRL_AV1D_SET_OUTPUT_ALL_LAYERS
-AOM_CTRL_USE_TYPE(AV1_SET_INSPECTION_CALLBACK, aom_inspect_init *)
-#define AOM_CTRL_AV1_SET_INSPECTION_CALLBACK
-/*!\endcond */
-/*! @} - end defgroup aom_decoder */
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AOM_AOMDX_H_
diff --git a/third_party/aom/aom/exports_com b/third_party/aom/aom/exports_com
deleted file mode 100644
index 2798bd51a..000000000
--- a/third_party/aom/aom/exports_com
+++ /dev/null
@@ -1,32 +0,0 @@
-text aom_codec_build_config
-text aom_codec_control_
-text aom_codec_destroy
-text aom_codec_err_to_string
-text aom_codec_error
-text aom_codec_error_detail
-text aom_codec_get_caps
-text aom_codec_iface_name
-text aom_codec_version
-text aom_codec_version_extra_str
-text aom_codec_version_str
-text aom_img_alloc
-text aom_img_alloc_with_border
-text aom_img_flip
-text aom_img_free
-text aom_img_plane_height
-text aom_img_plane_width
-text aom_img_set_rect
-text aom_img_wrap
-text aom_malloc
-text aom_rb_bytes_read
-text aom_rb_read_bit
-text aom_rb_read_literal
-text aom_rb_read_uvlc
-text aom_uleb_decode
-text aom_uleb_encode
-text aom_uleb_encode_fixed_size
-text aom_uleb_size_in_bytes
-text aom_wb_bytes_written
-text aom_wb_write_bit
-text aom_wb_write_literal
-text aom_wb_write_unsigned_literal
diff --git a/third_party/aom/aom/exports_dec b/third_party/aom/aom/exports_dec
deleted file mode 100644
index d7d1c4f7d..000000000
--- a/third_party/aom/aom/exports_dec
+++ /dev/null
@@ -1,10 +0,0 @@
-text aom_codec_dec_init_ver
-text aom_codec_decode
-text aom_codec_get_frame
-text aom_codec_get_stream_info
-text aom_codec_peek_stream_info
-text aom_codec_register_put_frame_cb
-text aom_codec_register_put_slice_cb
-text aom_codec_set_frame_buffer_functions
-text aom_obu_type_to_string
-text aom_read_obu_header
diff --git a/third_party/aom/aom/exports_enc b/third_party/aom/aom/exports_enc
deleted file mode 100644
index 918d742f0..000000000
--- a/third_party/aom/aom/exports_enc
+++ /dev/null
@@ -1,18 +0,0 @@
-text aom_codec_enc_config_default
-text aom_codec_enc_config_set
-text aom_codec_enc_init_multi_ver
-text aom_codec_enc_init_ver
-text aom_codec_encode
-text aom_codec_get_cx_data
-text aom_codec_get_global_headers
-text aom_codec_get_preview_frame
-text aom_codec_set_cx_data_buf
-text aom_film_grain_table_append
-text aom_film_grain_table_free
-text aom_film_grain_table_write
-text aom_flat_block_finder_init
-text aom_flat_block_finder_run
-text aom_noise_model_init
-text aom_noise_model_get_grain_parameters
-text aom_noise_model_save_latest
-text aom_noise_model_update
diff --git a/third_party/aom/aom/exports_test b/third_party/aom/aom/exports_test
deleted file mode 100644
index 01b864bae..000000000
--- a/third_party/aom/aom/exports_test
+++ /dev/null
@@ -1,2 +0,0 @@
-text aom_dsp_rtcd
-text aom_scale_rtcd
diff --git a/third_party/aom/aom/internal/aom_codec_internal.h b/third_party/aom/aom/internal/aom_codec_internal.h
deleted file mode 100644
index 21c0dc69c..000000000
--- a/third_party/aom/aom/internal/aom_codec_internal.h
+++ /dev/null
@@ -1,441 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-/*!\file
- * \brief Describes the decoder algorithm interface for algorithm
- * implementations.
- *
- * This file defines the private structures and data types that are only
- * relevant to implementing an algorithm, as opposed to using it.
- *
- * To create a decoder algorithm class, an interface structure is put
- * into the global namespace:
- * <pre>
- * my_codec.c:
- * aom_codec_iface_t my_codec = {
- * "My Codec v1.0",
- * AOM_CODEC_ALG_ABI_VERSION,
- * ...
- * };
- * </pre>
- *
- * An application instantiates a specific decoder instance by using
- * aom_codec_init() and a pointer to the algorithm's interface structure:
- * <pre>
- * my_app.c:
- * extern aom_codec_iface_t my_codec;
- * {
- * aom_codec_ctx_t algo;
- * res = aom_codec_init(&algo, &my_codec);
- * }
- * </pre>
- *
- * Once initialized, the instance is managed using other functions from
- * the aom_codec_* family.
- */
-#ifndef AOM_AOM_INTERNAL_AOM_CODEC_INTERNAL_H_
-#define AOM_AOM_INTERNAL_AOM_CODEC_INTERNAL_H_
-#include "../aom_decoder.h"
-#include "../aom_encoder.h"
-#include <stdarg.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*!\brief Current ABI version number
- *
- * \internal
- * If this file is altered in any way that changes the ABI, this value
- * must be bumped. Examples include, but are not limited to, changing
- * types, removing or reassigning enums, adding/removing/rearranging
- * fields to structures
- */
-#define AOM_CODEC_INTERNAL_ABI_VERSION (5) /**<\hideinitializer*/
-
-typedef struct aom_codec_alg_priv aom_codec_alg_priv_t;
-typedef struct aom_codec_priv_enc_mr_cfg aom_codec_priv_enc_mr_cfg_t;
-
-/*!\brief init function pointer prototype
- *
- * Performs algorithm-specific initialization of the decoder context. This
- * function is called by the generic aom_codec_init() wrapper function, so
- * plugins implementing this interface may trust the input parameters to be
- * properly initialized.
- *
- * \param[in] ctx Pointer to this instance's context
- * \retval #AOM_CODEC_OK
- * The input stream was recognized and decoder initialized.
- * \retval #AOM_CODEC_MEM_ERROR
- * Memory operation failed.
- */
-typedef aom_codec_err_t (*aom_codec_init_fn_t)(
- aom_codec_ctx_t *ctx, aom_codec_priv_enc_mr_cfg_t *data);
-
-/*!\brief destroy function pointer prototype
- *
- * Performs algorithm-specific destruction of the decoder context. This
- * function is called by the generic aom_codec_destroy() wrapper function,
- * so plugins implementing this interface may trust the input parameters
- * to be properly initialized.
- *
- * \param[in] ctx Pointer to this instance's context
- * \retval #AOM_CODEC_OK
- * The input stream was recognized and decoder initialized.
- * \retval #AOM_CODEC_MEM_ERROR
- * Memory operation failed.
- */
-typedef aom_codec_err_t (*aom_codec_destroy_fn_t)(aom_codec_alg_priv_t *ctx);
-
-/*!\brief parse stream info function pointer prototype
- *
- * Performs high level parsing of the bitstream. This function is called by the
- * generic aom_codec_peek_stream_info() wrapper function, so plugins
- * implementing this interface may trust the input parameters to be properly
- * initialized.
- *
- * \param[in] data Pointer to a block of data to parse
- * \param[in] data_sz Size of the data buffer
- * \param[in,out] si Pointer to stream info to update. The is_annexb
- * member \ref MUST be properly initialized. This
- * function sets the rest of the members.
- *
- * \retval #AOM_CODEC_OK
- * Bitstream is parsable and stream information updated
- */
-typedef aom_codec_err_t (*aom_codec_peek_si_fn_t)(const uint8_t *data,
- size_t data_sz,
- aom_codec_stream_info_t *si);
-
-/*!\brief Return information about the current stream.
- *
- * Returns information about the stream that has been parsed during decoding.
- *
- * \param[in] ctx Pointer to this instance's context
- * \param[in,out] si Pointer to stream info to update
- *
- * \retval #AOM_CODEC_OK
- * Bitstream is parsable and stream information updated
- */
-typedef aom_codec_err_t (*aom_codec_get_si_fn_t)(aom_codec_alg_priv_t *ctx,
- aom_codec_stream_info_t *si);
-
-/*!\brief control function pointer prototype
- *
- * This function is used to exchange algorithm specific data with the decoder
- * instance. This can be used to implement features specific to a particular
- * algorithm.
- *
- * This function is called by the generic aom_codec_control() wrapper
- * function, so plugins implementing this interface may trust the input
- * parameters to be properly initialized. However, this interface does not
- * provide type safety for the exchanged data or assign meanings to the
- * control codes. Those details should be specified in the algorithm's
- * header file. In particular, the ctrl_id parameter is guaranteed to exist
- * in the algorithm's control mapping table, and the data parameter may be NULL.
- *
- *
- * \param[in] ctx Pointer to this instance's context
- * \param[in] ctrl_id Algorithm specific control identifier
- * \param[in,out] data Data to exchange with algorithm instance.
- *
- * \retval #AOM_CODEC_OK
- * The internal state data was deserialized.
- */
-typedef aom_codec_err_t (*aom_codec_control_fn_t)(aom_codec_alg_priv_t *ctx,
- va_list ap);
-
-/*!\brief control function pointer mapping
- *
- * This structure stores the mapping between control identifiers and
- * implementing functions. Each algorithm provides a list of these
- * mappings. This list is searched by the aom_codec_control() wrapper
- * function to determine which function to invoke. The special
- * value {0, NULL} is used to indicate end-of-list, and must be
- * present. The special value {0, <non-null>} can be used as a catch-all
- * mapping. This implies that ctrl_id values chosen by the algorithm
- * \ref MUST be non-zero.
- */
-typedef const struct aom_codec_ctrl_fn_map {
- int ctrl_id;
- aom_codec_control_fn_t fn;
-} aom_codec_ctrl_fn_map_t;
-
-/*!\brief decode data function pointer prototype
- *
- * Processes a buffer of coded data. If the processing results in a new
- * decoded frame becoming available, #AOM_CODEC_CB_PUT_SLICE and
- * #AOM_CODEC_CB_PUT_FRAME events are generated as appropriate. This
- * function is called by the generic aom_codec_decode() wrapper function,
- * so plugins implementing this interface may trust the input parameters
- * to be properly initialized.
- *
- * \param[in] ctx Pointer to this instance's context
- * \param[in] data Pointer to this block of new coded data. If
- * NULL, a #AOM_CODEC_CB_PUT_FRAME event is posted
- * for the previously decoded frame.
- * \param[in] data_sz Size of the coded data, in bytes.
- *
- * \return Returns #AOM_CODEC_OK if the coded data was processed completely
- * and future pictures can be decoded without error. Otherwise,
- * see the descriptions of the other error codes in ::aom_codec_err_t
- * for recoverability capabilities.
- */
-typedef aom_codec_err_t (*aom_codec_decode_fn_t)(aom_codec_alg_priv_t *ctx,
- const uint8_t *data,
- size_t data_sz,
- void *user_priv);
-
-/*!\brief Decoded frames iterator
- *
- * Iterates over a list of the frames available for display. The iterator
- * storage should be initialized to NULL to start the iteration. Iteration is
- * complete when this function returns NULL.
- *
- * The list of available frames becomes valid upon completion of the
- * aom_codec_decode call, and remains valid until the next call to
- * aom_codec_decode.
- *
- * \param[in] ctx Pointer to this instance's context
- * \param[in out] iter Iterator storage, initialized to NULL
- *
- * \return Returns a pointer to an image, if one is ready for display. Frames
- * produced will always be in PTS (presentation time stamp) order.
- */
-typedef aom_image_t *(*aom_codec_get_frame_fn_t)(aom_codec_alg_priv_t *ctx,
- aom_codec_iter_t *iter);
-
-/*!\brief Pass in external frame buffers for the decoder to use.
- *
- * Registers functions to be called when libaom needs a frame buffer
- * to decode the current frame and a function to be called when libaom does
- * not internally reference the frame buffer. This set function must
- * be called before the first call to decode or libaom will assume the
- * default behavior of allocating frame buffers internally.
- *
- * \param[in] ctx Pointer to this instance's context
- * \param[in] cb_get Pointer to the get callback function
- * \param[in] cb_release Pointer to the release callback function
- * \param[in] cb_priv Callback's private data
- *
- * \retval #AOM_CODEC_OK
- * External frame buffers will be used by libaom.
- * \retval #AOM_CODEC_INVALID_PARAM
- * One or more of the callbacks were NULL.
- * \retval #AOM_CODEC_ERROR
- * Decoder context not initialized, or algorithm not capable of
- * using external frame buffers.
- *
- * \note
- * When decoding AV1, the application may be required to pass in at least
- * #AOM_MAXIMUM_WORK_BUFFERS external frame
- * buffers.
- */
-typedef aom_codec_err_t (*aom_codec_set_fb_fn_t)(
- aom_codec_alg_priv_t *ctx, aom_get_frame_buffer_cb_fn_t cb_get,
- aom_release_frame_buffer_cb_fn_t cb_release, void *cb_priv);
-
-typedef aom_codec_err_t (*aom_codec_encode_fn_t)(aom_codec_alg_priv_t *ctx,
- const aom_image_t *img,
- aom_codec_pts_t pts,
- unsigned long duration,
- aom_enc_frame_flags_t flags);
-typedef const aom_codec_cx_pkt_t *(*aom_codec_get_cx_data_fn_t)(
- aom_codec_alg_priv_t *ctx, aom_codec_iter_t *iter);
-
-typedef aom_codec_err_t (*aom_codec_enc_config_set_fn_t)(
- aom_codec_alg_priv_t *ctx, const aom_codec_enc_cfg_t *cfg);
-typedef aom_fixed_buf_t *(*aom_codec_get_global_headers_fn_t)(
- aom_codec_alg_priv_t *ctx);
-
-typedef aom_image_t *(*aom_codec_get_preview_frame_fn_t)(
- aom_codec_alg_priv_t *ctx);
-
-typedef aom_codec_err_t (*aom_codec_enc_mr_get_mem_loc_fn_t)(
- const aom_codec_enc_cfg_t *cfg, void **mem_loc);
-
-/*!\brief usage configuration mapping
- *
- * This structure stores the mapping between usage identifiers and
- * configuration structures. Each algorithm provides a list of these
- * mappings. This list is searched by the aom_codec_enc_config_default()
- * wrapper function to determine which config to return. The special value
- * {-1, {0}} is used to indicate end-of-list, and must be present. At least
- * one mapping must be present, in addition to the end-of-list.
- *
- */
-typedef const struct aom_codec_enc_cfg_map {
- int usage;
- aom_codec_enc_cfg_t cfg;
-} aom_codec_enc_cfg_map_t;
-
-/*!\brief Decoder algorithm interface interface
- *
- * All decoders \ref MUST expose a variable of this type.
- */
-struct aom_codec_iface {
- const char *name; /**< Identification String */
- int abi_version; /**< Implemented ABI version */
- aom_codec_caps_t caps; /**< Decoder capabilities */
- aom_codec_init_fn_t init; /**< \copydoc ::aom_codec_init_fn_t */
- aom_codec_destroy_fn_t destroy; /**< \copydoc ::aom_codec_destroy_fn_t */
- aom_codec_ctrl_fn_map_t *ctrl_maps; /**< \copydoc ::aom_codec_ctrl_fn_map_t */
- struct aom_codec_dec_iface {
- aom_codec_peek_si_fn_t peek_si; /**< \copydoc ::aom_codec_peek_si_fn_t */
- aom_codec_get_si_fn_t get_si; /**< \copydoc ::aom_codec_get_si_fn_t */
- aom_codec_decode_fn_t decode; /**< \copydoc ::aom_codec_decode_fn_t */
- aom_codec_get_frame_fn_t
- get_frame; /**< \copydoc ::aom_codec_get_frame_fn_t */
- aom_codec_set_fb_fn_t set_fb_fn; /**< \copydoc ::aom_codec_set_fb_fn_t */
- } dec;
- struct aom_codec_enc_iface {
- int cfg_map_count;
- aom_codec_enc_cfg_map_t
- *cfg_maps; /**< \copydoc ::aom_codec_enc_cfg_map_t */
- aom_codec_encode_fn_t encode; /**< \copydoc ::aom_codec_encode_fn_t */
- aom_codec_get_cx_data_fn_t
- get_cx_data; /**< \copydoc ::aom_codec_get_cx_data_fn_t */
- aom_codec_enc_config_set_fn_t
- cfg_set; /**< \copydoc ::aom_codec_enc_config_set_fn_t */
- aom_codec_get_global_headers_fn_t
- get_glob_hdrs; /**< \copydoc ::aom_codec_get_global_headers_fn_t */
- aom_codec_get_preview_frame_fn_t
- get_preview; /**< \copydoc ::aom_codec_get_preview_frame_fn_t */
- aom_codec_enc_mr_get_mem_loc_fn_t
- mr_get_mem_loc; /**< \copydoc ::aom_codec_enc_mr_get_mem_loc_fn_t */
- } enc;
-};
-
-/*!\brief Callback function pointer / user data pair storage */
-typedef struct aom_codec_priv_cb_pair {
- union {
- aom_codec_put_frame_cb_fn_t put_frame;
- aom_codec_put_slice_cb_fn_t put_slice;
- } u;
- void *user_priv;
-} aom_codec_priv_cb_pair_t;
-
-/*!\brief Instance private storage
- *
- * This structure is allocated by the algorithm's init function. It can be
- * extended in one of two ways. First, a second, algorithm specific structure
- * can be allocated and the priv member pointed to it. Alternatively, this
- * structure can be made the first member of the algorithm specific structure,
- * and the pointer cast to the proper type.
- */
-struct aom_codec_priv {
- const char *err_detail;
- aom_codec_flags_t init_flags;
- struct {
- aom_codec_priv_cb_pair_t put_frame_cb;
- aom_codec_priv_cb_pair_t put_slice_cb;
- } dec;
- struct {
- aom_fixed_buf_t cx_data_dst_buf;
- unsigned int cx_data_pad_before;
- unsigned int cx_data_pad_after;
- aom_codec_cx_pkt_t cx_data_pkt;
- unsigned int total_encoders;
- } enc;
-};
-
-/*
- * Multi-resolution encoding internal configuration
- */
-struct aom_codec_priv_enc_mr_cfg {
- unsigned int mr_total_resolutions;
- unsigned int mr_encoder_id;
- struct aom_rational mr_down_sampling_factor;
- void *mr_low_res_mode_info;
-};
-
-#undef AOM_CTRL_USE_TYPE
-#define AOM_CTRL_USE_TYPE(id, typ) \
- static AOM_INLINE typ id##__value(va_list args) { return va_arg(args, typ); }
-
-#undef AOM_CTRL_USE_TYPE_DEPRECATED
-#define AOM_CTRL_USE_TYPE_DEPRECATED(id, typ) \
- static AOM_INLINE typ id##__value(va_list args) { return va_arg(args, typ); }
-
-#define CAST(id, arg) id##__value(arg)
-
-/* CODEC_INTERFACE convenience macro
- *
- * By convention, each codec interface is a struct with extern linkage, where
- * the symbol is suffixed with _algo. A getter function is also defined to
- * return a pointer to the struct, since in some cases it's easier to work
- * with text symbols than data symbols (see issue #169). This function has
- * the same name as the struct, less the _algo suffix. The CODEC_INTERFACE
- * macro is provided to define this getter function automatically.
- */
-#define CODEC_INTERFACE(id) \
- aom_codec_iface_t *id(void) { return &id##_algo; } \
- aom_codec_iface_t id##_algo
-
-/* Internal Utility Functions
- *
- * The following functions are intended to be used inside algorithms as
- * utilities for manipulating aom_codec_* data structures.
- */
-struct aom_codec_pkt_list {
- unsigned int cnt;
- unsigned int max;
- struct aom_codec_cx_pkt pkts[1];
-};
-
-#define aom_codec_pkt_list_decl(n) \
- union { \
- struct aom_codec_pkt_list head; \
- struct { \
- struct aom_codec_pkt_list head; \
- struct aom_codec_cx_pkt pkts[n]; \
- } alloc; \
- }
-
-#define aom_codec_pkt_list_init(m) \
- (m)->alloc.head.cnt = 0, \
- (m)->alloc.head.max = sizeof((m)->alloc.pkts) / sizeof((m)->alloc.pkts[0])
-
-int aom_codec_pkt_list_add(struct aom_codec_pkt_list *,
- const struct aom_codec_cx_pkt *);
-
-const aom_codec_cx_pkt_t *aom_codec_pkt_list_get(
- struct aom_codec_pkt_list *list, aom_codec_iter_t *iter);
-
-#include <stdio.h>
-#include <setjmp.h>
-
-struct aom_internal_error_info {
- aom_codec_err_t error_code;
- int has_detail;
- char detail[80];
- int setjmp; // Boolean: whether 'jmp' is valid.
- jmp_buf jmp;
-};
-
-#define CLANG_ANALYZER_NORETURN
-#if defined(__has_feature)
-#if __has_feature(attribute_analyzer_noreturn)
-#undef CLANG_ANALYZER_NORETURN
-#define CLANG_ANALYZER_NORETURN __attribute__((analyzer_noreturn))
-#endif
-#endif
-
-void aom_internal_error(struct aom_internal_error_info *info,
- aom_codec_err_t error, const char *fmt,
- ...) CLANG_ANALYZER_NORETURN;
-
-void aom_merge_corrupted_flag(int *corrupted, int value);
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AOM_INTERNAL_AOM_CODEC_INTERNAL_H_
diff --git a/third_party/aom/aom/src/aom_codec.c b/third_party/aom/aom/src/aom_codec.c
deleted file mode 100644
index 733bffb25..000000000
--- a/third_party/aom/aom/src/aom_codec.c
+++ /dev/null
@@ -1,157 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-/*!\file
- * \brief Provides the high level interface to wrap decoder algorithms.
- *
- */
-#include <stdarg.h>
-#include <stdlib.h>
-
-#include "config/aom_config.h"
-#include "config/aom_version.h"
-
-#include "aom/aom_integer.h"
-#include "aom/internal/aom_codec_internal.h"
-
-#define SAVE_STATUS(ctx, var) (ctx ? (ctx->err = var) : var)
-
-int aom_codec_version(void) { return VERSION_PACKED; }
-
-const char *aom_codec_version_str(void) { return VERSION_STRING_NOSP; }
-
-const char *aom_codec_version_extra_str(void) { return VERSION_EXTRA; }
-
-const char *aom_codec_iface_name(aom_codec_iface_t *iface) {
- return iface ? iface->name : "<invalid interface>";
-}
-
-const char *aom_codec_err_to_string(aom_codec_err_t err) {
- switch (err) {
- case AOM_CODEC_OK: return "Success";
- case AOM_CODEC_ERROR: return "Unspecified internal error";
- case AOM_CODEC_MEM_ERROR: return "Memory allocation error";
- case AOM_CODEC_ABI_MISMATCH: return "ABI version mismatch";
- case AOM_CODEC_INCAPABLE:
- return "Codec does not implement requested capability";
- case AOM_CODEC_UNSUP_BITSTREAM:
- return "Bitstream not supported by this decoder";
- case AOM_CODEC_UNSUP_FEATURE:
- return "Bitstream required feature not supported by this decoder";
- case AOM_CODEC_CORRUPT_FRAME: return "Corrupt frame detected";
- case AOM_CODEC_INVALID_PARAM: return "Invalid parameter";
- case AOM_CODEC_LIST_END: return "End of iterated list";
- }
-
- return "Unrecognized error code";
-}
-
-const char *aom_codec_error(aom_codec_ctx_t *ctx) {
- return (ctx) ? aom_codec_err_to_string(ctx->err)
- : aom_codec_err_to_string(AOM_CODEC_INVALID_PARAM);
-}
-
-const char *aom_codec_error_detail(aom_codec_ctx_t *ctx) {
- if (ctx && ctx->err)
- return ctx->priv ? ctx->priv->err_detail : ctx->err_detail;
-
- return NULL;
-}
-
-aom_codec_err_t aom_codec_destroy(aom_codec_ctx_t *ctx) {
- aom_codec_err_t res;
-
- if (!ctx)
- res = AOM_CODEC_INVALID_PARAM;
- else if (!ctx->iface || !ctx->priv)
- res = AOM_CODEC_ERROR;
- else {
- ctx->iface->destroy((aom_codec_alg_priv_t *)ctx->priv);
-
- ctx->iface = NULL;
- ctx->name = NULL;
- ctx->priv = NULL;
- res = AOM_CODEC_OK;
- }
-
- return SAVE_STATUS(ctx, res);
-}
-
-aom_codec_caps_t aom_codec_get_caps(aom_codec_iface_t *iface) {
- return (iface) ? iface->caps : 0;
-}
-
-aom_codec_err_t aom_codec_control_(aom_codec_ctx_t *ctx, int ctrl_id, ...) {
- aom_codec_err_t res;
-
- if (!ctx || !ctrl_id)
- res = AOM_CODEC_INVALID_PARAM;
- else if (!ctx->iface || !ctx->priv || !ctx->iface->ctrl_maps)
- res = AOM_CODEC_ERROR;
- else {
- aom_codec_ctrl_fn_map_t *entry;
-
- res = AOM_CODEC_ERROR;
-
- for (entry = ctx->iface->ctrl_maps; entry && entry->fn; entry++) {
- if (!entry->ctrl_id || entry->ctrl_id == ctrl_id) {
- va_list ap;
-
- va_start(ap, ctrl_id);
- res = entry->fn((aom_codec_alg_priv_t *)ctx->priv, ap);
- va_end(ap);
- break;
- }
- }
- }
-
- return SAVE_STATUS(ctx, res);
-}
-
-void aom_internal_error(struct aom_internal_error_info *info,
- aom_codec_err_t error, const char *fmt, ...) {
- va_list ap;
-
- info->error_code = error;
- info->has_detail = 0;
-
- if (fmt) {
- size_t sz = sizeof(info->detail);
-
- info->has_detail = 1;
- va_start(ap, fmt);
- vsnprintf(info->detail, sz - 1, fmt, ap);
- va_end(ap);
- info->detail[sz - 1] = '\0';
- }
-
- if (info->setjmp) longjmp(info->jmp, info->error_code);
-}
-
-void aom_merge_corrupted_flag(int *corrupted, int value) {
- *corrupted |= value;
-}
-
-const char *aom_obu_type_to_string(OBU_TYPE type) {
- switch (type) {
- case OBU_SEQUENCE_HEADER: return "OBU_SEQUENCE_HEADER";
- case OBU_TEMPORAL_DELIMITER: return "OBU_TEMPORAL_DELIMITER";
- case OBU_FRAME_HEADER: return "OBU_FRAME_HEADER";
- case OBU_REDUNDANT_FRAME_HEADER: return "OBU_REDUNDANT_FRAME_HEADER";
- case OBU_FRAME: return "OBU_FRAME";
- case OBU_TILE_GROUP: return "OBU_TILE_GROUP";
- case OBU_METADATA: return "OBU_METADATA";
- case OBU_TILE_LIST: return "OBU_TILE_LIST";
- case OBU_PADDING: return "OBU_PADDING";
- default: break;
- }
- return "<Invalid OBU Type>";
-}
diff --git a/third_party/aom/aom/src/aom_decoder.c b/third_party/aom/aom/src/aom_decoder.c
deleted file mode 100644
index 8c9111faf..000000000
--- a/third_party/aom/aom/src/aom_decoder.c
+++ /dev/null
@@ -1,180 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-/*!\file
- * \brief Provides the high level interface to wrap decoder algorithms.
- *
- */
-#include <string.h>
-#include "aom/internal/aom_codec_internal.h"
-
-#define SAVE_STATUS(ctx, var) (ctx ? (ctx->err = var) : var)
-
-static aom_codec_alg_priv_t *get_alg_priv(aom_codec_ctx_t *ctx) {
- return (aom_codec_alg_priv_t *)ctx->priv;
-}
-
-aom_codec_err_t aom_codec_dec_init_ver(aom_codec_ctx_t *ctx,
- aom_codec_iface_t *iface,
- const aom_codec_dec_cfg_t *cfg,
- aom_codec_flags_t flags, int ver) {
- aom_codec_err_t res;
-
- if (ver != AOM_DECODER_ABI_VERSION)
- res = AOM_CODEC_ABI_MISMATCH;
- else if (!ctx || !iface)
- res = AOM_CODEC_INVALID_PARAM;
- else if (iface->abi_version != AOM_CODEC_INTERNAL_ABI_VERSION)
- res = AOM_CODEC_ABI_MISMATCH;
- else if ((flags & AOM_CODEC_USE_POSTPROC) &&
- !(iface->caps & AOM_CODEC_CAP_POSTPROC))
- res = AOM_CODEC_INCAPABLE;
- else if ((flags & AOM_CODEC_USE_INPUT_FRAGMENTS) &&
- !(iface->caps & AOM_CODEC_CAP_INPUT_FRAGMENTS))
- res = AOM_CODEC_INCAPABLE;
- else if (!(iface->caps & AOM_CODEC_CAP_DECODER))
- res = AOM_CODEC_INCAPABLE;
- else {
- memset(ctx, 0, sizeof(*ctx));
- ctx->iface = iface;
- ctx->name = iface->name;
- ctx->priv = NULL;
- ctx->init_flags = flags;
- ctx->config.dec = cfg;
-
- res = ctx->iface->init(ctx, NULL);
- if (res) {
- ctx->err_detail = ctx->priv ? ctx->priv->err_detail : NULL;
- aom_codec_destroy(ctx);
- }
- }
-
- return SAVE_STATUS(ctx, res);
-}
-
-aom_codec_err_t aom_codec_peek_stream_info(aom_codec_iface_t *iface,
- const uint8_t *data, size_t data_sz,
- aom_codec_stream_info_t *si) {
- aom_codec_err_t res;
-
- if (!iface || !data || !data_sz || !si) {
- res = AOM_CODEC_INVALID_PARAM;
- } else {
- /* Set default/unknown values */
- si->w = 0;
- si->h = 0;
-
- res = iface->dec.peek_si(data, data_sz, si);
- }
-
- return res;
-}
-
-aom_codec_err_t aom_codec_get_stream_info(aom_codec_ctx_t *ctx,
- aom_codec_stream_info_t *si) {
- aom_codec_err_t res;
-
- if (!ctx || !si) {
- res = AOM_CODEC_INVALID_PARAM;
- } else if (!ctx->iface || !ctx->priv) {
- res = AOM_CODEC_ERROR;
- } else {
- /* Set default/unknown values */
- si->w = 0;
- si->h = 0;
-
- res = ctx->iface->dec.get_si(get_alg_priv(ctx), si);
- }
-
- return SAVE_STATUS(ctx, res);
-}
-
-aom_codec_err_t aom_codec_decode(aom_codec_ctx_t *ctx, const uint8_t *data,
- size_t data_sz, void *user_priv) {
- aom_codec_err_t res;
-
- if (!ctx)
- res = AOM_CODEC_INVALID_PARAM;
- else if (!ctx->iface || !ctx->priv)
- res = AOM_CODEC_ERROR;
- else {
- res = ctx->iface->dec.decode(get_alg_priv(ctx), data, data_sz, user_priv);
- }
-
- return SAVE_STATUS(ctx, res);
-}
-
-aom_image_t *aom_codec_get_frame(aom_codec_ctx_t *ctx, aom_codec_iter_t *iter) {
- aom_image_t *img;
-
- if (!ctx || !iter || !ctx->iface || !ctx->priv)
- img = NULL;
- else
- img = ctx->iface->dec.get_frame(get_alg_priv(ctx), iter);
-
- return img;
-}
-
-aom_codec_err_t aom_codec_register_put_frame_cb(aom_codec_ctx_t *ctx,
- aom_codec_put_frame_cb_fn_t cb,
- void *user_priv) {
- aom_codec_err_t res;
-
- if (!ctx || !cb)
- res = AOM_CODEC_INVALID_PARAM;
- else if (!ctx->iface || !ctx->priv ||
- !(ctx->iface->caps & AOM_CODEC_CAP_PUT_FRAME))
- res = AOM_CODEC_ERROR;
- else {
- ctx->priv->dec.put_frame_cb.u.put_frame = cb;
- ctx->priv->dec.put_frame_cb.user_priv = user_priv;
- res = AOM_CODEC_OK;
- }
-
- return SAVE_STATUS(ctx, res);
-}
-
-aom_codec_err_t aom_codec_register_put_slice_cb(aom_codec_ctx_t *ctx,
- aom_codec_put_slice_cb_fn_t cb,
- void *user_priv) {
- aom_codec_err_t res;
-
- if (!ctx || !cb)
- res = AOM_CODEC_INVALID_PARAM;
- else if (!ctx->iface || !ctx->priv ||
- !(ctx->iface->caps & AOM_CODEC_CAP_PUT_SLICE))
- res = AOM_CODEC_ERROR;
- else {
- ctx->priv->dec.put_slice_cb.u.put_slice = cb;
- ctx->priv->dec.put_slice_cb.user_priv = user_priv;
- res = AOM_CODEC_OK;
- }
-
- return SAVE_STATUS(ctx, res);
-}
-
-aom_codec_err_t aom_codec_set_frame_buffer_functions(
- aom_codec_ctx_t *ctx, aom_get_frame_buffer_cb_fn_t cb_get,
- aom_release_frame_buffer_cb_fn_t cb_release, void *cb_priv) {
- aom_codec_err_t res;
-
- if (!ctx || !cb_get || !cb_release) {
- res = AOM_CODEC_INVALID_PARAM;
- } else if (!ctx->iface || !ctx->priv ||
- !(ctx->iface->caps & AOM_CODEC_CAP_EXTERNAL_FRAME_BUFFER)) {
- res = AOM_CODEC_ERROR;
- } else {
- res = ctx->iface->dec.set_fb_fn(get_alg_priv(ctx), cb_get, cb_release,
- cb_priv);
- }
-
- return SAVE_STATUS(ctx, res);
-}
diff --git a/third_party/aom/aom/src/aom_encoder.c b/third_party/aom/aom/src/aom_encoder.c
deleted file mode 100644
index 523f40bbe..000000000
--- a/third_party/aom/aom/src/aom_encoder.c
+++ /dev/null
@@ -1,402 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-/*!\file
- * \brief Provides the high level interface to wrap encoder algorithms.
- *
- */
-#include "config/aom_config.h"
-
-#if HAVE_FEXCEPT
-#ifndef _GNU_SOURCE
-#define _GNU_SOURCE
-#endif
-#include <fenv.h>
-#endif
-
-#include <limits.h>
-#include <string.h>
-#include "aom/internal/aom_codec_internal.h"
-
-#define SAVE_STATUS(ctx, var) (ctx ? (ctx->err = var) : var)
-
-static aom_codec_alg_priv_t *get_alg_priv(aom_codec_ctx_t *ctx) {
- return (aom_codec_alg_priv_t *)ctx->priv;
-}
-
-aom_codec_err_t aom_codec_enc_init_ver(aom_codec_ctx_t *ctx,
- aom_codec_iface_t *iface,
- const aom_codec_enc_cfg_t *cfg,
- aom_codec_flags_t flags, int ver) {
- aom_codec_err_t res;
-
- if (ver != AOM_ENCODER_ABI_VERSION)
- res = AOM_CODEC_ABI_MISMATCH;
- else if (!ctx || !iface || !cfg)
- res = AOM_CODEC_INVALID_PARAM;
- else if (iface->abi_version != AOM_CODEC_INTERNAL_ABI_VERSION)
- res = AOM_CODEC_ABI_MISMATCH;
- else if (!(iface->caps & AOM_CODEC_CAP_ENCODER))
- res = AOM_CODEC_INCAPABLE;
- else if ((flags & AOM_CODEC_USE_PSNR) && !(iface->caps & AOM_CODEC_CAP_PSNR))
- res = AOM_CODEC_INCAPABLE;
- else {
- ctx->iface = iface;
- ctx->name = iface->name;
- ctx->priv = NULL;
- ctx->init_flags = flags;
- ctx->config.enc = cfg;
- res = ctx->iface->init(ctx, NULL);
-
- if (res) {
- ctx->err_detail = ctx->priv ? ctx->priv->err_detail : NULL;
- aom_codec_destroy(ctx);
- }
- }
-
- return SAVE_STATUS(ctx, res);
-}
-
-aom_codec_err_t aom_codec_enc_init_multi_ver(
- aom_codec_ctx_t *ctx, aom_codec_iface_t *iface, aom_codec_enc_cfg_t *cfg,
- int num_enc, aom_codec_flags_t flags, aom_rational_t *dsf, int ver) {
- aom_codec_err_t res = AOM_CODEC_OK;
-
- if (ver != AOM_ENCODER_ABI_VERSION)
- res = AOM_CODEC_ABI_MISMATCH;
- else if (!ctx || !iface || !cfg || (num_enc > 16 || num_enc < 1))
- res = AOM_CODEC_INVALID_PARAM;
- else if (iface->abi_version != AOM_CODEC_INTERNAL_ABI_VERSION)
- res = AOM_CODEC_ABI_MISMATCH;
- else if (!(iface->caps & AOM_CODEC_CAP_ENCODER))
- res = AOM_CODEC_INCAPABLE;
- else if ((flags & AOM_CODEC_USE_PSNR) && !(iface->caps & AOM_CODEC_CAP_PSNR))
- res = AOM_CODEC_INCAPABLE;
- else {
- int i;
- void *mem_loc = NULL;
-
- if (!(res = iface->enc.mr_get_mem_loc(cfg, &mem_loc))) {
- for (i = 0; i < num_enc; i++) {
- aom_codec_priv_enc_mr_cfg_t mr_cfg;
-
- /* Validate down-sampling factor. */
- if (dsf->num < 1 || dsf->num > 4096 || dsf->den < 1 ||
- dsf->den > dsf->num) {
- res = AOM_CODEC_INVALID_PARAM;
- break;
- }
-
- mr_cfg.mr_low_res_mode_info = mem_loc;
- mr_cfg.mr_total_resolutions = num_enc;
- mr_cfg.mr_encoder_id = num_enc - 1 - i;
- mr_cfg.mr_down_sampling_factor.num = dsf->num;
- mr_cfg.mr_down_sampling_factor.den = dsf->den;
-
- /* Force Key-frame synchronization. Namely, encoder at higher
- * resolution always use the same frame_type chosen by the
- * lowest-resolution encoder.
- */
- if (mr_cfg.mr_encoder_id) cfg->kf_mode = AOM_KF_DISABLED;
-
- ctx->iface = iface;
- ctx->name = iface->name;
- ctx->priv = NULL;
- ctx->init_flags = flags;
- ctx->config.enc = cfg;
- res = ctx->iface->init(ctx, &mr_cfg);
-
- if (res) {
- const char *error_detail = ctx->priv ? ctx->priv->err_detail : NULL;
- /* Destroy current ctx */
- ctx->err_detail = error_detail;
- aom_codec_destroy(ctx);
-
- /* Destroy already allocated high-level ctx */
- while (i) {
- ctx--;
- ctx->err_detail = error_detail;
- aom_codec_destroy(ctx);
- i--;
- }
- }
-
- if (res) break;
-
- ctx++;
- cfg++;
- dsf++;
- }
- ctx--;
- }
- }
-
- return SAVE_STATUS(ctx, res);
-}
-
-aom_codec_err_t aom_codec_enc_config_default(aom_codec_iface_t *iface,
- aom_codec_enc_cfg_t *cfg,
- unsigned int usage) {
- aom_codec_err_t res;
- aom_codec_enc_cfg_map_t *map;
- int i;
-
- if (!iface || !cfg || usage > INT_MAX)
- res = AOM_CODEC_INVALID_PARAM;
- else if (!(iface->caps & AOM_CODEC_CAP_ENCODER))
- res = AOM_CODEC_INCAPABLE;
- else {
- res = AOM_CODEC_INVALID_PARAM;
-
- for (i = 0; i < iface->enc.cfg_map_count; ++i) {
- map = iface->enc.cfg_maps + i;
- if (map->usage == (int)usage) {
- *cfg = map->cfg;
- cfg->g_usage = usage;
- res = AOM_CODEC_OK;
- break;
- }
- }
- }
-
- /* default values */
- if (cfg) {
- cfg->cfg.ext_partition = 1;
- }
-
- return res;
-}
-
-#if ARCH_X86 || ARCH_X86_64
-/* On X86, disable the x87 unit's internal 80 bit precision for better
- * consistency with the SSE unit's 64 bit precision.
- */
-#include "aom_ports/x86.h"
-#define FLOATING_POINT_SET_PRECISION \
- unsigned short x87_orig_mode = x87_set_double_precision();
-#define FLOATING_POINT_RESTORE_PRECISION x87_set_control_word(x87_orig_mode);
-#else
-#define FLOATING_POINT_SET_PRECISION
-#define FLOATING_POINT_RESTORE_PRECISION
-#endif // ARCH_X86 || ARCH_X86_64
-
-#if HAVE_FEXCEPT && CONFIG_DEBUG
-#define FLOATING_POINT_SET_EXCEPTIONS \
- const int float_excepts = feenableexcept(FE_DIVBYZERO);
-#define FLOATING_POINT_RESTORE_EXCEPTIONS feenableexcept(float_excepts);
-#else
-#define FLOATING_POINT_SET_EXCEPTIONS
-#define FLOATING_POINT_RESTORE_EXCEPTIONS
-#endif // HAVE_FEXCEPT && CONFIG_DEBUG
-
-/* clang-format off */
-#define FLOATING_POINT_INIT \
- do { \
- FLOATING_POINT_SET_PRECISION \
- FLOATING_POINT_SET_EXCEPTIONS
-
-#define FLOATING_POINT_RESTORE \
- FLOATING_POINT_RESTORE_EXCEPTIONS \
- FLOATING_POINT_RESTORE_PRECISION \
- } while (0);
-/* clang-format on */
-
-aom_codec_err_t aom_codec_encode(aom_codec_ctx_t *ctx, const aom_image_t *img,
- aom_codec_pts_t pts, unsigned long duration,
- aom_enc_frame_flags_t flags) {
- aom_codec_err_t res = AOM_CODEC_OK;
-
- if (!ctx || (img && !duration))
- res = AOM_CODEC_INVALID_PARAM;
- else if (!ctx->iface || !ctx->priv)
- res = AOM_CODEC_ERROR;
- else if (!(ctx->iface->caps & AOM_CODEC_CAP_ENCODER))
- res = AOM_CODEC_INCAPABLE;
- else {
- unsigned int num_enc = ctx->priv->enc.total_encoders;
-
- /* Execute in a normalized floating point environment, if the platform
- * requires it.
- */
- FLOATING_POINT_INIT
-
- if (num_enc == 1)
- res =
- ctx->iface->enc.encode(get_alg_priv(ctx), img, pts, duration, flags);
- else {
- /* Multi-resolution encoding:
- * Encode multi-levels in reverse order. For example,
- * if mr_total_resolutions = 3, first encode level 2,
- * then encode level 1, and finally encode level 0.
- */
- int i;
-
- ctx += num_enc - 1;
- if (img) img += num_enc - 1;
-
- for (i = num_enc - 1; i >= 0; i--) {
- if ((res = ctx->iface->enc.encode(get_alg_priv(ctx), img, pts, duration,
- flags)))
- break;
-
- ctx--;
- if (img) img--;
- }
- ctx++;
- }
-
- FLOATING_POINT_RESTORE
- }
-
- return SAVE_STATUS(ctx, res);
-}
-
-const aom_codec_cx_pkt_t *aom_codec_get_cx_data(aom_codec_ctx_t *ctx,
- aom_codec_iter_t *iter) {
- const aom_codec_cx_pkt_t *pkt = NULL;
-
- if (ctx) {
- if (!iter)
- ctx->err = AOM_CODEC_INVALID_PARAM;
- else if (!ctx->iface || !ctx->priv)
- ctx->err = AOM_CODEC_ERROR;
- else if (!(ctx->iface->caps & AOM_CODEC_CAP_ENCODER))
- ctx->err = AOM_CODEC_INCAPABLE;
- else
- pkt = ctx->iface->enc.get_cx_data(get_alg_priv(ctx), iter);
- }
-
- if (pkt && pkt->kind == AOM_CODEC_CX_FRAME_PKT) {
- // If the application has specified a destination area for the
- // compressed data, and the codec has not placed the data there,
- // and it fits, copy it.
- aom_codec_priv_t *const priv = ctx->priv;
- char *const dst_buf = (char *)priv->enc.cx_data_dst_buf.buf;
-
- if (dst_buf && pkt->data.raw.buf != dst_buf &&
- pkt->data.raw.sz + priv->enc.cx_data_pad_before +
- priv->enc.cx_data_pad_after <=
- priv->enc.cx_data_dst_buf.sz) {
- aom_codec_cx_pkt_t *modified_pkt = &priv->enc.cx_data_pkt;
-
- memcpy(dst_buf + priv->enc.cx_data_pad_before, pkt->data.raw.buf,
- pkt->data.raw.sz);
- *modified_pkt = *pkt;
- modified_pkt->data.raw.buf = dst_buf;
- modified_pkt->data.raw.sz +=
- priv->enc.cx_data_pad_before + priv->enc.cx_data_pad_after;
- pkt = modified_pkt;
- }
-
- if (dst_buf == pkt->data.raw.buf) {
- priv->enc.cx_data_dst_buf.buf = dst_buf + pkt->data.raw.sz;
- priv->enc.cx_data_dst_buf.sz -= pkt->data.raw.sz;
- }
- }
-
- return pkt;
-}
-
-aom_codec_err_t aom_codec_set_cx_data_buf(aom_codec_ctx_t *ctx,
- const aom_fixed_buf_t *buf,
- unsigned int pad_before,
- unsigned int pad_after) {
- if (!ctx || !ctx->priv) return AOM_CODEC_INVALID_PARAM;
-
- if (buf) {
- ctx->priv->enc.cx_data_dst_buf = *buf;
- ctx->priv->enc.cx_data_pad_before = pad_before;
- ctx->priv->enc.cx_data_pad_after = pad_after;
- } else {
- ctx->priv->enc.cx_data_dst_buf.buf = NULL;
- ctx->priv->enc.cx_data_dst_buf.sz = 0;
- ctx->priv->enc.cx_data_pad_before = 0;
- ctx->priv->enc.cx_data_pad_after = 0;
- }
-
- return AOM_CODEC_OK;
-}
-
-const aom_image_t *aom_codec_get_preview_frame(aom_codec_ctx_t *ctx) {
- aom_image_t *img = NULL;
-
- if (ctx) {
- if (!ctx->iface || !ctx->priv)
- ctx->err = AOM_CODEC_ERROR;
- else if (!(ctx->iface->caps & AOM_CODEC_CAP_ENCODER))
- ctx->err = AOM_CODEC_INCAPABLE;
- else if (!ctx->iface->enc.get_preview)
- ctx->err = AOM_CODEC_INCAPABLE;
- else
- img = ctx->iface->enc.get_preview(get_alg_priv(ctx));
- }
-
- return img;
-}
-
-aom_fixed_buf_t *aom_codec_get_global_headers(aom_codec_ctx_t *ctx) {
- aom_fixed_buf_t *buf = NULL;
-
- if (ctx) {
- if (!ctx->iface || !ctx->priv)
- ctx->err = AOM_CODEC_ERROR;
- else if (!(ctx->iface->caps & AOM_CODEC_CAP_ENCODER))
- ctx->err = AOM_CODEC_INCAPABLE;
- else if (!ctx->iface->enc.get_glob_hdrs)
- ctx->err = AOM_CODEC_INCAPABLE;
- else
- buf = ctx->iface->enc.get_glob_hdrs(get_alg_priv(ctx));
- }
-
- return buf;
-}
-
-aom_codec_err_t aom_codec_enc_config_set(aom_codec_ctx_t *ctx,
- const aom_codec_enc_cfg_t *cfg) {
- aom_codec_err_t res;
-
- if (!ctx || !ctx->iface || !ctx->priv || !cfg)
- res = AOM_CODEC_INVALID_PARAM;
- else if (!(ctx->iface->caps & AOM_CODEC_CAP_ENCODER))
- res = AOM_CODEC_INCAPABLE;
- else
- res = ctx->iface->enc.cfg_set(get_alg_priv(ctx), cfg);
-
- return SAVE_STATUS(ctx, res);
-}
-
-int aom_codec_pkt_list_add(struct aom_codec_pkt_list *list,
- const struct aom_codec_cx_pkt *pkt) {
- if (list->cnt < list->max) {
- list->pkts[list->cnt++] = *pkt;
- return 0;
- }
-
- return 1;
-}
-
-const aom_codec_cx_pkt_t *aom_codec_pkt_list_get(
- struct aom_codec_pkt_list *list, aom_codec_iter_t *iter) {
- const aom_codec_cx_pkt_t *pkt;
-
- if (!(*iter)) {
- *iter = list->pkts;
- }
-
- pkt = (const aom_codec_cx_pkt_t *)*iter;
-
- if ((size_t)(pkt - list->pkts) < list->cnt)
- *iter = pkt + 1;
- else
- pkt = NULL;
-
- return pkt;
-}
diff --git a/third_party/aom/aom/src/aom_image.c b/third_party/aom/aom/src/aom_image.c
deleted file mode 100644
index 437f0241e..000000000
--- a/third_party/aom/aom/src/aom_image.c
+++ /dev/null
@@ -1,265 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <stdlib.h>
-#include <string.h>
-
-#include "aom/aom_image.h"
-#include "aom/aom_integer.h"
-#include "aom_mem/aom_mem.h"
-
-static INLINE unsigned int align_image_dimension(unsigned int d,
- unsigned int subsampling,
- unsigned int size_align) {
- unsigned int align;
-
- align = (1 << subsampling) - 1;
- align = (size_align - 1 > align) ? (size_align - 1) : align;
- return ((d + align) & ~align);
-}
-
-static aom_image_t *img_alloc_helper(
- aom_image_t *img, aom_img_fmt_t fmt, unsigned int d_w, unsigned int d_h,
- unsigned int buf_align, unsigned int stride_align, unsigned int size_align,
- unsigned char *img_data, unsigned int border) {
- unsigned int h, w, s, xcs, ycs, bps;
- unsigned int stride_in_bytes;
-
- /* Treat align==0 like align==1 */
- if (!buf_align) buf_align = 1;
-
- /* Validate alignment (must be power of 2) */
- if (buf_align & (buf_align - 1)) goto fail;
-
- /* Treat align==0 like align==1 */
- if (!stride_align) stride_align = 1;
-
- /* Validate alignment (must be power of 2) */
- if (stride_align & (stride_align - 1)) goto fail;
-
- /* Treat align==0 like align==1 */
- if (!size_align) size_align = 1;
-
- /* Validate alignment (must be power of 2) */
- if (size_align & (size_align - 1)) goto fail;
-
- /* Get sample size for this format */
- switch (fmt) {
- case AOM_IMG_FMT_I420:
- case AOM_IMG_FMT_YV12:
- case AOM_IMG_FMT_AOMI420:
- case AOM_IMG_FMT_AOMYV12: bps = 12; break;
- case AOM_IMG_FMT_I422:
- case AOM_IMG_FMT_I444: bps = 24; break;
- case AOM_IMG_FMT_I42016: bps = 24; break;
- case AOM_IMG_FMT_I42216:
- case AOM_IMG_FMT_I44416: bps = 48; break;
- default: bps = 16; break;
- }
-
- /* Get chroma shift values for this format */
- switch (fmt) {
- case AOM_IMG_FMT_I420:
- case AOM_IMG_FMT_YV12:
- case AOM_IMG_FMT_AOMI420:
- case AOM_IMG_FMT_AOMYV12:
- case AOM_IMG_FMT_I422:
- case AOM_IMG_FMT_I42016:
- case AOM_IMG_FMT_I42216: xcs = 1; break;
- default: xcs = 0; break;
- }
-
- switch (fmt) {
- case AOM_IMG_FMT_I420:
- case AOM_IMG_FMT_YV12:
- case AOM_IMG_FMT_AOMI420:
- case AOM_IMG_FMT_AOMYV12:
- case AOM_IMG_FMT_I42016: ycs = 1; break;
- default: ycs = 0; break;
- }
-
- /* Calculate storage sizes given the chroma subsampling */
- w = align_image_dimension(d_w, xcs, size_align);
- h = align_image_dimension(d_h, ycs, size_align);
-
- s = (fmt & AOM_IMG_FMT_PLANAR) ? w : bps * w / 8;
- s = (s + 2 * border + stride_align - 1) & ~(stride_align - 1);
- stride_in_bytes = (fmt & AOM_IMG_FMT_HIGHBITDEPTH) ? s * 2 : s;
-
- /* Allocate the new image */
- if (!img) {
- img = (aom_image_t *)calloc(1, sizeof(aom_image_t));
-
- if (!img) goto fail;
-
- img->self_allocd = 1;
- } else {
- memset(img, 0, sizeof(aom_image_t));
- }
-
- img->img_data = img_data;
-
- if (!img_data) {
- const uint64_t alloc_size =
- (fmt & AOM_IMG_FMT_PLANAR)
- ? (uint64_t)(h + 2 * border) * stride_in_bytes * bps / 8
- : (uint64_t)(h + 2 * border) * stride_in_bytes;
-
- if (alloc_size != (size_t)alloc_size) goto fail;
-
- img->img_data = (uint8_t *)aom_memalign(buf_align, (size_t)alloc_size);
- img->img_data_owner = 1;
- }
-
- if (!img->img_data) goto fail;
-
- img->fmt = fmt;
- img->bit_depth = (fmt & AOM_IMG_FMT_HIGHBITDEPTH) ? 16 : 8;
- // aligned width and aligned height
- img->w = w;
- img->h = h;
- img->x_chroma_shift = xcs;
- img->y_chroma_shift = ycs;
- img->bps = bps;
-
- /* Calculate strides */
- img->stride[AOM_PLANE_Y] = img->stride[AOM_PLANE_ALPHA] = stride_in_bytes;
- img->stride[AOM_PLANE_U] = img->stride[AOM_PLANE_V] = stride_in_bytes >> xcs;
-
- /* Default viewport to entire image */
- if (!aom_img_set_rect(img, 0, 0, d_w, d_h, border)) return img;
-
-fail:
- aom_img_free(img);
- return NULL;
-}
-
-aom_image_t *aom_img_alloc(aom_image_t *img, aom_img_fmt_t fmt,
- unsigned int d_w, unsigned int d_h,
- unsigned int align) {
- return img_alloc_helper(img, fmt, d_w, d_h, align, align, 1, NULL, 0);
-}
-
-aom_image_t *aom_img_wrap(aom_image_t *img, aom_img_fmt_t fmt, unsigned int d_w,
- unsigned int d_h, unsigned int stride_align,
- unsigned char *img_data) {
- /* By setting buf_align = 1, we don't change buffer alignment in this
- * function. */
- return img_alloc_helper(img, fmt, d_w, d_h, 1, stride_align, 1, img_data, 0);
-}
-
-aom_image_t *aom_img_alloc_with_border(aom_image_t *img, aom_img_fmt_t fmt,
- unsigned int d_w, unsigned int d_h,
- unsigned int align,
- unsigned int size_align,
- unsigned int border) {
- return img_alloc_helper(img, fmt, d_w, d_h, align, align, size_align, NULL,
- border);
-}
-
-int aom_img_set_rect(aom_image_t *img, unsigned int x, unsigned int y,
- unsigned int w, unsigned int h, unsigned int border) {
- unsigned char *data;
-
- if (x + w <= img->w && y + h <= img->h) {
- img->d_w = w;
- img->d_h = h;
-
- x += border;
- y += border;
-
- /* Calculate plane pointers */
- if (!(img->fmt & AOM_IMG_FMT_PLANAR)) {
- img->planes[AOM_PLANE_PACKED] =
- img->img_data + x * img->bps / 8 + y * img->stride[AOM_PLANE_PACKED];
- } else {
- const int bytes_per_sample =
- (img->fmt & AOM_IMG_FMT_HIGHBITDEPTH) ? 2 : 1;
- data = img->img_data;
-
- if (img->fmt & AOM_IMG_FMT_HAS_ALPHA) {
- img->planes[AOM_PLANE_ALPHA] =
- data + x * bytes_per_sample + y * img->stride[AOM_PLANE_ALPHA];
- data += (img->h + 2 * border) * img->stride[AOM_PLANE_ALPHA];
- }
-
- img->planes[AOM_PLANE_Y] =
- data + x * bytes_per_sample + y * img->stride[AOM_PLANE_Y];
- data += (img->h + 2 * border) * img->stride[AOM_PLANE_Y];
-
- unsigned int uv_border_h = border >> img->y_chroma_shift;
- unsigned int uv_x = x >> img->x_chroma_shift;
- unsigned int uv_y = y >> img->y_chroma_shift;
- if (!(img->fmt & AOM_IMG_FMT_UV_FLIP)) {
- img->planes[AOM_PLANE_U] =
- data + uv_x * bytes_per_sample + uv_y * img->stride[AOM_PLANE_U];
- data += ((img->h >> img->y_chroma_shift) + 2 * uv_border_h) *
- img->stride[AOM_PLANE_U];
- img->planes[AOM_PLANE_V] =
- data + uv_x * bytes_per_sample + uv_y * img->stride[AOM_PLANE_V];
- } else {
- img->planes[AOM_PLANE_V] =
- data + uv_x * bytes_per_sample + uv_y * img->stride[AOM_PLANE_V];
- data += ((img->h >> img->y_chroma_shift) + 2 * uv_border_h) *
- img->stride[AOM_PLANE_V];
- img->planes[AOM_PLANE_U] =
- data + uv_x * bytes_per_sample + uv_y * img->stride[AOM_PLANE_U];
- }
- }
- return 0;
- }
- return -1;
-}
-
-void aom_img_flip(aom_image_t *img) {
- /* Note: In the calculation pointer adjustment calculation, we want the
- * rhs to be promoted to a signed type. Section 6.3.1.8 of the ISO C99
- * standard indicates that if the adjustment parameter is unsigned, the
- * stride parameter will be promoted to unsigned, causing errors when
- * the lhs is a larger type than the rhs.
- */
- img->planes[AOM_PLANE_Y] += (signed)(img->d_h - 1) * img->stride[AOM_PLANE_Y];
- img->stride[AOM_PLANE_Y] = -img->stride[AOM_PLANE_Y];
-
- img->planes[AOM_PLANE_U] += (signed)((img->d_h >> img->y_chroma_shift) - 1) *
- img->stride[AOM_PLANE_U];
- img->stride[AOM_PLANE_U] = -img->stride[AOM_PLANE_U];
-
- img->planes[AOM_PLANE_V] += (signed)((img->d_h >> img->y_chroma_shift) - 1) *
- img->stride[AOM_PLANE_V];
- img->stride[AOM_PLANE_V] = -img->stride[AOM_PLANE_V];
-
- img->planes[AOM_PLANE_ALPHA] +=
- (signed)(img->d_h - 1) * img->stride[AOM_PLANE_ALPHA];
- img->stride[AOM_PLANE_ALPHA] = -img->stride[AOM_PLANE_ALPHA];
-}
-
-void aom_img_free(aom_image_t *img) {
- if (img) {
- if (img->img_data && img->img_data_owner) aom_free(img->img_data);
-
- if (img->self_allocd) free(img);
- }
-}
-
-int aom_img_plane_width(const aom_image_t *img, int plane) {
- if (plane > 0 && img->x_chroma_shift > 0)
- return (img->d_w + 1) >> img->x_chroma_shift;
- else
- return img->d_w;
-}
-
-int aom_img_plane_height(const aom_image_t *img, int plane) {
- if (plane > 0 && img->y_chroma_shift > 0)
- return (img->d_h + 1) >> img->y_chroma_shift;
- else
- return img->d_h;
-}
diff --git a/third_party/aom/aom/src/aom_integer.c b/third_party/aom/aom/src/aom_integer.c
deleted file mode 100644
index 7edfd0de8..000000000
--- a/third_party/aom/aom/src/aom_integer.c
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#include <assert.h>
-
-#include "aom/aom_integer.h"
-
-static const size_t kMaximumLeb128Size = 8;
-static const uint8_t kLeb128ByteMask = 0x7f; // Binary: 01111111
-
-// Disallow values larger than 32-bits to ensure consistent behavior on 32 and
-// 64 bit targets: value is typically used to determine buffer allocation size
-// when decoded.
-static const uint64_t kMaximumLeb128Value = UINT32_MAX;
-
-size_t aom_uleb_size_in_bytes(uint64_t value) {
- size_t size = 0;
- do {
- ++size;
- } while ((value >>= 7) != 0);
- return size;
-}
-
-int aom_uleb_decode(const uint8_t *buffer, size_t available, uint64_t *value,
- size_t *length) {
- if (buffer && value) {
- *value = 0;
- for (size_t i = 0; i < kMaximumLeb128Size && i < available; ++i) {
- const uint8_t decoded_byte = *(buffer + i) & kLeb128ByteMask;
- *value |= ((uint64_t)decoded_byte) << (i * 7);
- if ((*(buffer + i) >> 7) == 0) {
- if (length) {
- *length = i + 1;
- }
-
- // Fail on values larger than 32-bits to ensure consistent behavior on
- // 32 and 64 bit targets: value is typically used to determine buffer
- // allocation size.
- if (*value > UINT32_MAX) return -1;
-
- return 0;
- }
- }
- }
-
- // If we get here, either the buffer/value pointers were invalid,
- // or we ran over the available space
- return -1;
-}
-
-int aom_uleb_encode(uint64_t value, size_t available, uint8_t *coded_value,
- size_t *coded_size) {
- const size_t leb_size = aom_uleb_size_in_bytes(value);
- if (value > kMaximumLeb128Value || leb_size > kMaximumLeb128Size ||
- leb_size > available || !coded_value || !coded_size) {
- return -1;
- }
-
- for (size_t i = 0; i < leb_size; ++i) {
- uint8_t byte = value & 0x7f;
- value >>= 7;
-
- if (value != 0) byte |= 0x80; // Signal that more bytes follow.
-
- *(coded_value + i) = byte;
- }
-
- *coded_size = leb_size;
- return 0;
-}
-
-int aom_uleb_encode_fixed_size(uint64_t value, size_t available,
- size_t pad_to_size, uint8_t *coded_value,
- size_t *coded_size) {
- if (value > kMaximumLeb128Value || !coded_value || !coded_size ||
- available < pad_to_size || pad_to_size > kMaximumLeb128Size) {
- return -1;
- }
- const uint64_t limit = 1ULL << (7 * pad_to_size);
- if (value >= limit) {
- // Can't encode 'value' within 'pad_to_size' bytes
- return -1;
- }
-
- for (size_t i = 0; i < pad_to_size; ++i) {
- uint8_t byte = value & 0x7f;
- value >>= 7;
-
- if (i < pad_to_size - 1) byte |= 0x80; // Signal that more bytes follow.
-
- *(coded_value + i) = byte;
- }
-
- assert(value == 0);
-
- *coded_size = pad_to_size;
- return 0;
-}
diff --git a/third_party/aom/aom_dsp/add_noise.c b/third_party/aom/aom_dsp/add_noise.c
deleted file mode 100644
index bfb3e7e00..000000000
--- a/third_party/aom/aom_dsp/add_noise.c
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <math.h>
-#include <stdlib.h>
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom/aom_integer.h"
-#include "aom_ports/mem.h"
-
-void aom_plane_add_noise_c(uint8_t *start, char *noise, char blackclamp[16],
- char whiteclamp[16], char bothclamp[16],
- unsigned int width, unsigned int height, int pitch) {
- unsigned int i, j;
-
- for (i = 0; i < height; ++i) {
- uint8_t *pos = start + i * pitch;
- char *ref = (char *)(noise + (rand() & 0xff)); // NOLINT
-
- for (j = 0; j < width; ++j) {
- int v = pos[j];
-
- v = clamp(v - blackclamp[0], 0, 255);
- v = clamp(v + bothclamp[0], 0, 255);
- v = clamp(v - whiteclamp[0], 0, 255);
-
- pos[j] = v + ref[j];
- }
- }
-}
-
-static double gaussian(double sigma, double mu, double x) {
- return 1 / (sigma * sqrt(2.0 * 3.14159265)) *
- (exp(-(x - mu) * (x - mu) / (2 * sigma * sigma)));
-}
-
-int aom_setup_noise(double sigma, int size, char *noise) {
- char char_dist[256];
- int next = 0, i, j;
-
- // set up a 256 entry lookup that matches gaussian distribution
- for (i = -32; i < 32; ++i) {
- const int a_i = (int)(0.5 + 256 * gaussian(sigma, 0, i));
- if (a_i) {
- for (j = 0; j < a_i; ++j) {
- char_dist[next + j] = (char)i;
- }
- next = next + j;
- }
- }
-
- // Rounding error - might mean we have less than 256.
- for (; next < 256; ++next) {
- char_dist[next] = 0;
- }
-
- for (i = 0; i < size; ++i) {
- noise[i] = char_dist[rand() & 0xff]; // NOLINT
- }
-
- // Returns the highest non 0 value used in distribution.
- return -char_dist[0];
-}
diff --git a/third_party/aom/aom_dsp/aom_convolve.c b/third_party/aom/aom_dsp/aom_convolve.c
deleted file mode 100644
index 4791826da..000000000
--- a/third_party/aom/aom_dsp/aom_convolve.c
+++ /dev/null
@@ -1,238 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <string.h>
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom/aom_integer.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/aom_filter.h"
-#include "aom_ports/mem.h"
-
-static INLINE int horz_scalar_product(const uint8_t *a, const int16_t *b) {
- int sum = 0;
- for (int k = 0; k < SUBPEL_TAPS; ++k) sum += a[k] * b[k];
- return sum;
-}
-
-static INLINE int vert_scalar_product(const uint8_t *a, ptrdiff_t a_stride,
- const int16_t *b) {
- int sum = 0;
- for (int k = 0; k < SUBPEL_TAPS; ++k) sum += a[k * a_stride] * b[k];
- return sum;
-}
-
-static void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const InterpKernel *x_filters, int x0_q4,
- int x_step_q4, int w, int h) {
- src -= SUBPEL_TAPS / 2 - 1;
- for (int y = 0; y < h; ++y) {
- int x_q4 = x0_q4;
- for (int x = 0; x < w; ++x) {
- const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
- const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
- const int sum = horz_scalar_product(src_x, x_filter);
- dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
- x_q4 += x_step_q4;
- }
- src += src_stride;
- dst += dst_stride;
- }
-}
-
-static void convolve_vert(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const InterpKernel *y_filters, int y0_q4,
- int y_step_q4, int w, int h) {
- src -= src_stride * (SUBPEL_TAPS / 2 - 1);
-
- for (int x = 0; x < w; ++x) {
- int y_q4 = y0_q4;
- for (int y = 0; y < h; ++y) {
- const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
- const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
- const int sum = vert_scalar_product(src_y, src_stride, y_filter);
- dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
- y_q4 += y_step_q4;
- }
- ++src;
- ++dst;
- }
-}
-
-static const InterpKernel *get_filter_base(const int16_t *filter) {
- // NOTE: This assumes that the filter table is 256-byte aligned.
- // TODO(agrange) Modify to make independent of table alignment.
- return (const InterpKernel *)(((intptr_t)filter) & ~((intptr_t)0xFF));
-}
-
-static int get_filter_offset(const int16_t *f, const InterpKernel *base) {
- return (int)((const InterpKernel *)(intptr_t)f - base);
-}
-
-void aom_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4, int w,
- int h) {
- const InterpKernel *const filters_x = get_filter_base(filter_x);
- const int x0_q4 = get_filter_offset(filter_x, filters_x);
-
- (void)filter_y;
- (void)y_step_q4;
-
- convolve_horiz(src, src_stride, dst, dst_stride, filters_x, x0_q4, x_step_q4,
- w, h);
-}
-
-void aom_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4, int w,
- int h) {
- const InterpKernel *const filters_y = get_filter_base(filter_y);
- const int y0_q4 = get_filter_offset(filter_y, filters_y);
-
- (void)filter_x;
- (void)x_step_q4;
-
- convolve_vert(src, src_stride, dst, dst_stride, filters_y, y0_q4, y_step_q4,
- w, h);
-}
-
-void aom_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
- ptrdiff_t dst_stride, const int16_t *filter_x,
- int filter_x_stride, const int16_t *filter_y,
- int filter_y_stride, int w, int h) {
- int r;
-
- (void)filter_x;
- (void)filter_x_stride;
- (void)filter_y;
- (void)filter_y_stride;
-
- for (r = h; r > 0; --r) {
- memcpy(dst, src, w);
- src += src_stride;
- dst += dst_stride;
- }
-}
-
-static INLINE int highbd_vert_scalar_product(const uint16_t *a,
- ptrdiff_t a_stride,
- const int16_t *b) {
- int sum = 0;
- for (int k = 0; k < SUBPEL_TAPS; ++k) sum += a[k * a_stride] * b[k];
- return sum;
-}
-
-static INLINE int highbd_horz_scalar_product(const uint16_t *a,
- const int16_t *b) {
- int sum = 0;
- for (int k = 0; k < SUBPEL_TAPS; ++k) sum += a[k] * b[k];
- return sum;
-}
-
-static void highbd_convolve_horiz(const uint8_t *src8, ptrdiff_t src_stride,
- uint8_t *dst8, ptrdiff_t dst_stride,
- const InterpKernel *x_filters, int x0_q4,
- int x_step_q4, int w, int h, int bd) {
- uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
- src -= SUBPEL_TAPS / 2 - 1;
- for (int y = 0; y < h; ++y) {
- int x_q4 = x0_q4;
- for (int x = 0; x < w; ++x) {
- const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
- const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
- const int sum = highbd_horz_scalar_product(src_x, x_filter);
- dst[x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
- x_q4 += x_step_q4;
- }
- src += src_stride;
- dst += dst_stride;
- }
-}
-
-static void highbd_convolve_vert(const uint8_t *src8, ptrdiff_t src_stride,
- uint8_t *dst8, ptrdiff_t dst_stride,
- const InterpKernel *y_filters, int y0_q4,
- int y_step_q4, int w, int h, int bd) {
- uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
- src -= src_stride * (SUBPEL_TAPS / 2 - 1);
- for (int x = 0; x < w; ++x) {
- int y_q4 = y0_q4;
- for (int y = 0; y < h; ++y) {
- const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
- const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
- const int sum = highbd_vert_scalar_product(src_y, src_stride, y_filter);
- dst[y * dst_stride] =
- clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
- y_q4 += y_step_q4;
- }
- ++src;
- ++dst;
- }
-}
-
-void aom_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4, int w,
- int h, int bd) {
- const InterpKernel *const filters_x = get_filter_base(filter_x);
- const int x0_q4 = get_filter_offset(filter_x, filters_x);
- (void)filter_y;
- (void)y_step_q4;
-
- highbd_convolve_horiz(src, src_stride, dst, dst_stride, filters_x, x0_q4,
- x_step_q4, w, h, bd);
-}
-
-void aom_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4, int w,
- int h, int bd) {
- const InterpKernel *const filters_y = get_filter_base(filter_y);
- const int y0_q4 = get_filter_offset(filter_y, filters_y);
- (void)filter_x;
- (void)x_step_q4;
-
- highbd_convolve_vert(src, src_stride, dst, dst_stride, filters_y, y0_q4,
- y_step_q4, w, h, bd);
-}
-
-void aom_highbd_convolve_copy_c(const uint8_t *src8, ptrdiff_t src_stride,
- uint8_t *dst8, ptrdiff_t dst_stride,
- const int16_t *filter_x, int filter_x_stride,
- const int16_t *filter_y, int filter_y_stride,
- int w, int h, int bd) {
- int r;
- uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
- (void)filter_x;
- (void)filter_y;
- (void)filter_x_stride;
- (void)filter_y_stride;
- (void)bd;
-
- for (r = h; r > 0; --r) {
- memcpy(dst, src, w * sizeof(uint16_t));
- src += src_stride;
- dst += dst_stride;
- }
-}
diff --git a/third_party/aom/aom_dsp/aom_dsp.cmake b/third_party/aom/aom_dsp/aom_dsp.cmake
deleted file mode 100644
index 11ff73756..000000000
--- a/third_party/aom/aom_dsp/aom_dsp.cmake
+++ /dev/null
@@ -1,356 +0,0 @@
-#
-# Copyright (c) 2017, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and the
-# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
-# not distributed with this source code in the LICENSE file, you can obtain it
-# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
-# License 1.0 was not distributed with this source code in the PATENTS file, you
-# can obtain it at www.aomedia.org/license/patent.
-#
-if(AOM_AOM_DSP_AOM_DSP_CMAKE_)
- return()
-endif() # AOM_AOM_DSP_AOM_DSP_CMAKE_
-set(AOM_AOM_DSP_AOM_DSP_CMAKE_ 1)
-
-list(APPEND AOM_DSP_COMMON_SOURCES
- "${AOM_ROOT}/aom_dsp/aom_convolve.c"
- "${AOM_ROOT}/aom_dsp/aom_dsp_common.h"
- "${AOM_ROOT}/aom_dsp/aom_filter.h"
- "${AOM_ROOT}/aom_dsp/aom_simd.h"
- "${AOM_ROOT}/aom_dsp/aom_simd_inline.h"
- "${AOM_ROOT}/aom_dsp/bitreader_buffer.c"
- "${AOM_ROOT}/aom_dsp/bitreader_buffer.h"
- "${AOM_ROOT}/aom_dsp/bitwriter_buffer.c"
- "${AOM_ROOT}/aom_dsp/bitwriter_buffer.h"
- "${AOM_ROOT}/aom_dsp/blend.h"
- "${AOM_ROOT}/aom_dsp/blend_a64_hmask.c"
- "${AOM_ROOT}/aom_dsp/blend_a64_mask.c"
- "${AOM_ROOT}/aom_dsp/blend_a64_vmask.c"
- "${AOM_ROOT}/aom_dsp/entcode.c"
- "${AOM_ROOT}/aom_dsp/entcode.h"
- "${AOM_ROOT}/aom_dsp/fft.c"
- "${AOM_ROOT}/aom_dsp/fft_common.h"
- "${AOM_ROOT}/aom_dsp/intrapred.c"
- "${AOM_ROOT}/aom_dsp/intrapred_common.h"
- "${AOM_ROOT}/aom_dsp/loopfilter.c"
- "${AOM_ROOT}/aom_dsp/prob.h"
- "${AOM_ROOT}/aom_dsp/simd/v128_intrinsics.h"
- "${AOM_ROOT}/aom_dsp/simd/v128_intrinsics_c.h"
- "${AOM_ROOT}/aom_dsp/simd/v256_intrinsics.h"
- "${AOM_ROOT}/aom_dsp/simd/v256_intrinsics_c.h"
- "${AOM_ROOT}/aom_dsp/simd/v64_intrinsics.h"
- "${AOM_ROOT}/aom_dsp/simd/v64_intrinsics_c.h"
- "${AOM_ROOT}/aom_dsp/subtract.c"
- "${AOM_ROOT}/aom_dsp/txfm_common.h"
- "${AOM_ROOT}/aom_dsp/x86/convolve_common_intrin.h")
-
-list(APPEND AOM_DSP_COMMON_ASM_SSE2
- "${AOM_ROOT}/aom_dsp/x86/aom_convolve_copy_sse2.asm"
- "${AOM_ROOT}/aom_dsp/x86/aom_high_subpixel_8t_sse2.asm"
- "${AOM_ROOT}/aom_dsp/x86/aom_high_subpixel_bilinear_sse2.asm"
- "${AOM_ROOT}/aom_dsp/x86/aom_subpixel_8t_sse2.asm"
- "${AOM_ROOT}/aom_dsp/x86/aom_subpixel_bilinear_sse2.asm"
- "${AOM_ROOT}/aom_dsp/x86/highbd_intrapred_sse2.asm"
- "${AOM_ROOT}/aom_dsp/x86/intrapred_sse2.asm"
- "${AOM_ROOT}/aom_dsp/x86/inv_wht_sse2.asm")
-
-list(APPEND AOM_DSP_COMMON_INTRIN_SSE2
- "${AOM_ROOT}/aom_dsp/x86/aom_asm_stubs.c"
- "${AOM_ROOT}/aom_dsp/x86/convolve.h"
- "${AOM_ROOT}/aom_dsp/x86/convolve_sse2.h"
- "${AOM_ROOT}/aom_dsp/x86/fft_sse2.c"
- "${AOM_ROOT}/aom_dsp/x86/highbd_intrapred_sse2.c"
- "${AOM_ROOT}/aom_dsp/x86/highbd_loopfilter_sse2.c"
- "${AOM_ROOT}/aom_dsp/x86/intrapred_sse2.c"
- "${AOM_ROOT}/aom_dsp/x86/loopfilter_sse2.c"
- "${AOM_ROOT}/aom_dsp/x86/lpf_common_sse2.h"
- "${AOM_ROOT}/aom_dsp/x86/mem_sse2.h"
- "${AOM_ROOT}/aom_dsp/x86/transpose_sse2.h"
- "${AOM_ROOT}/aom_dsp/x86/txfm_common_sse2.h"
- "${AOM_ROOT}/aom_dsp/x86/sum_squares_sse2.h")
-
-list(APPEND AOM_DSP_COMMON_ASM_SSSE3
- "${AOM_ROOT}/aom_dsp/x86/aom_subpixel_8t_ssse3.asm"
- "${AOM_ROOT}/aom_dsp/x86/aom_subpixel_bilinear_ssse3.asm")
-
-list(APPEND AOM_DSP_COMMON_INTRIN_SSSE3
- "${AOM_ROOT}/aom_dsp/x86/aom_subpixel_8t_intrin_ssse3.c"
- "${AOM_ROOT}/aom_dsp/x86/highbd_convolve_ssse3.c"
- "${AOM_ROOT}/aom_dsp/x86/intrapred_ssse3.c")
-
-list(APPEND AOM_DSP_COMMON_INTRIN_SSE4_1
- "${AOM_ROOT}/aom_dsp/x86/blend_mask_sse4.h"
- "${AOM_ROOT}/aom_dsp/x86/blend_a64_hmask_sse4.c"
- "${AOM_ROOT}/aom_dsp/x86/blend_a64_mask_sse4.c"
- "${AOM_ROOT}/aom_dsp/x86/blend_a64_vmask_sse4.c")
-
-list(APPEND AOM_DSP_COMMON_INTRIN_AVX2
- "${AOM_ROOT}/aom_dsp/x86/aom_subpixel_8t_intrin_avx2.c"
- "${AOM_ROOT}/aom_dsp/x86/common_avx2.h"
- "${AOM_ROOT}/aom_dsp/x86/txfm_common_avx2.h"
- "${AOM_ROOT}/aom_dsp/x86/convolve_avx2.h"
- "${AOM_ROOT}/aom_dsp/x86/fft_avx2.c"
- "${AOM_ROOT}/aom_dsp/x86/highbd_convolve_avx2.c"
- "${AOM_ROOT}/aom_dsp/x86/highbd_loopfilter_avx2.c"
- "${AOM_ROOT}/aom_dsp/x86/intrapred_avx2.c"
- "${AOM_ROOT}/aom_dsp/x86/blend_a64_mask_avx2.c")
-
-list(APPEND AOM_DSP_COMMON_INTRIN_NEON
- "${AOM_ROOT}/aom_dsp/arm/fwd_txfm_neon.c"
- "${AOM_ROOT}/aom_dsp/arm/loopfilter_neon.c"
- "${AOM_ROOT}/aom_dsp/arm/intrapred_neon.c"
- "${AOM_ROOT}/aom_dsp/arm/subtract_neon.c"
- "${AOM_ROOT}/aom_dsp/arm/blend_a64_mask_neon.c")
-
-list(APPEND AOM_DSP_COMMON_INTRIN_DSPR2
- "${AOM_ROOT}/aom_dsp/mips/common_dspr2.c"
- "${AOM_ROOT}/aom_dsp/mips/common_dspr2.h"
- "${AOM_ROOT}/aom_dsp/mips/convolve2_dspr2.c"
- "${AOM_ROOT}/aom_dsp/mips/convolve2_horiz_dspr2.c"
- "${AOM_ROOT}/aom_dsp/mips/convolve2_vert_dspr2.c"
- "${AOM_ROOT}/aom_dsp/mips/convolve8_dspr2.c"
- "${AOM_ROOT}/aom_dsp/mips/convolve8_horiz_dspr2.c"
- "${AOM_ROOT}/aom_dsp/mips/convolve8_vert_dspr2.c"
- "${AOM_ROOT}/aom_dsp/mips/convolve_common_dspr2.h"
- "${AOM_ROOT}/aom_dsp/mips/intrapred16_dspr2.c"
- "${AOM_ROOT}/aom_dsp/mips/intrapred4_dspr2.c"
- "${AOM_ROOT}/aom_dsp/mips/intrapred8_dspr2.c"
- "${AOM_ROOT}/aom_dsp/mips/inv_txfm_dspr2.h")
-
-list(APPEND AOM_DSP_COMMON_INTRIN_MSA
- "${AOM_ROOT}/aom_dsp/mips/aom_convolve8_horiz_msa.c"
- "${AOM_ROOT}/aom_dsp/mips/aom_convolve8_vert_msa.c"
- "${AOM_ROOT}/aom_dsp/mips/aom_convolve_copy_msa.c"
- "${AOM_ROOT}/aom_dsp/mips/aom_convolve_msa.h"
- "${AOM_ROOT}/aom_dsp/mips/intrapred_msa.c"
- "${AOM_ROOT}/aom_dsp/mips/macros_msa.h")
-
-if(CONFIG_AV1_DECODER)
- list(APPEND AOM_DSP_DECODER_SOURCES
- "${AOM_ROOT}/aom_dsp/binary_codes_reader.c"
- "${AOM_ROOT}/aom_dsp/binary_codes_reader.h"
- "${AOM_ROOT}/aom_dsp/bitreader.h"
- "${AOM_ROOT}/aom_dsp/daalaboolreader.c"
- "${AOM_ROOT}/aom_dsp/daalaboolreader.h"
- "${AOM_ROOT}/aom_dsp/entdec.c" "${AOM_ROOT}/aom_dsp/entdec.h"
- "${AOM_ROOT}/aom_dsp/grain_synthesis.c"
- "${AOM_ROOT}/aom_dsp/grain_synthesis.h")
-endif()
-
-if(CONFIG_AV1_ENCODER)
- list(APPEND AOM_DSP_ENCODER_SOURCES
- "${AOM_ROOT}/aom_dsp/binary_codes_writer.c"
- "${AOM_ROOT}/aom_dsp/binary_codes_writer.h"
- "${AOM_ROOT}/aom_dsp/bitwriter.h"
- "${AOM_ROOT}/aom_dsp/daalaboolwriter.c"
- "${AOM_ROOT}/aom_dsp/daalaboolwriter.h"
- "${AOM_ROOT}/aom_dsp/entenc.c"
- "${AOM_ROOT}/aom_dsp/entenc.h"
- "${AOM_ROOT}/aom_dsp/fwd_txfm.c"
- "${AOM_ROOT}/aom_dsp/grain_table.c"
- "${AOM_ROOT}/aom_dsp/grain_table.h"
- "${AOM_ROOT}/aom_dsp/noise_model.c"
- "${AOM_ROOT}/aom_dsp/noise_model.h"
- "${AOM_ROOT}/aom_dsp/noise_util.c"
- "${AOM_ROOT}/aom_dsp/noise_util.h"
- "${AOM_ROOT}/aom_dsp/psnr.c"
- "${AOM_ROOT}/aom_dsp/psnr.h"
- "${AOM_ROOT}/aom_dsp/quantize.c"
- "${AOM_ROOT}/aom_dsp/quantize.h"
- "${AOM_ROOT}/aom_dsp/sad.c"
- "${AOM_ROOT}/aom_dsp/sse.c"
- "${AOM_ROOT}/aom_dsp/sad_av1.c"
- "${AOM_ROOT}/aom_dsp/sum_squares.c"
- "${AOM_ROOT}/aom_dsp/variance.c"
- "${AOM_ROOT}/aom_dsp/variance.h")
-
- list(APPEND AOM_DSP_ENCODER_ASM_SSE2
- "${AOM_ROOT}/aom_dsp/x86/highbd_sad4d_sse2.asm"
- "${AOM_ROOT}/aom_dsp/x86/highbd_sad_sse2.asm"
- "${AOM_ROOT}/aom_dsp/x86/highbd_subpel_variance_impl_sse2.asm"
- "${AOM_ROOT}/aom_dsp/x86/highbd_variance_impl_sse2.asm"
- "${AOM_ROOT}/aom_dsp/x86/sad4d_sse2.asm"
- "${AOM_ROOT}/aom_dsp/x86/sad_sse2.asm"
- "${AOM_ROOT}/aom_dsp/x86/subpel_variance_sse2.asm"
- "${AOM_ROOT}/aom_dsp/x86/subtract_sse2.asm")
-
- list(APPEND AOM_DSP_ENCODER_INTRIN_SSE2
- "${AOM_ROOT}/aom_dsp/x86/fwd_txfm_impl_sse2.h"
- "${AOM_ROOT}/aom_dsp/x86/fwd_txfm_sse2.c"
- "${AOM_ROOT}/aom_dsp/x86/fwd_txfm_sse2.h"
- "${AOM_ROOT}/aom_dsp/x86/highbd_quantize_intrin_sse2.c"
- "${AOM_ROOT}/aom_dsp/x86/highbd_subtract_sse2.c"
- "${AOM_ROOT}/aom_dsp/x86/highbd_variance_sse2.c"
- "${AOM_ROOT}/aom_dsp/x86/quantize_sse2.c"
- "${AOM_ROOT}/aom_dsp/x86/quantize_x86.h"
- "${AOM_ROOT}/aom_dsp/x86/sum_squares_sse2.c"
- "${AOM_ROOT}/aom_dsp/x86/variance_sse2.c")
-
- list(APPEND AOM_DSP_ENCODER_ASM_SSSE3_X86_64
- "${AOM_ROOT}/aom_dsp/x86/fwd_txfm_ssse3_x86_64.asm"
- "${AOM_ROOT}/aom_dsp/x86/ssim_opt_x86_64.asm")
-
- list(APPEND AOM_DSP_ENCODER_INTRIN_AVX2
- "${AOM_ROOT}/aom_dsp/x86/masked_sad_intrin_avx2.c"
- "${AOM_ROOT}/aom_dsp/x86/subtract_avx2.c"
- "${AOM_ROOT}/aom_dsp/x86/highbd_quantize_intrin_avx2.c"
- "${AOM_ROOT}/aom_dsp/x86/sad4d_avx2.c"
- "${AOM_ROOT}/aom_dsp/x86/sad_avx2.c"
- "${AOM_ROOT}/aom_dsp/x86/sad_highbd_avx2.c"
- "${AOM_ROOT}/aom_dsp/x86/sad_impl_avx2.c"
- "${AOM_ROOT}/aom_dsp/x86/variance_avx2.c"
- "${AOM_ROOT}/aom_dsp/x86/highbd_variance_avx2.c"
- "${AOM_ROOT}/aom_dsp/x86/sse_avx2.c"
- "${AOM_ROOT}/aom_dsp/x86/variance_impl_avx2.c"
- "${AOM_ROOT}/aom_dsp/x86/obmc_sad_avx2.c"
- "${AOM_ROOT}/aom_dsp/x86/obmc_variance_avx2.c"
- "${AOM_ROOT}/aom_dsp/x86/sum_squares_avx2.c")
-
- list(APPEND AOM_DSP_ENCODER_ASM_SSSE3_X86_64
- "${AOM_ROOT}/aom_dsp/x86/quantize_ssse3_x86_64.asm")
-
- list(APPEND AOM_DSP_ENCODER_AVX_ASM_X86_64
- "${AOM_ROOT}/aom_dsp/x86/quantize_avx_x86_64.asm")
-
- list(APPEND AOM_DSP_ENCODER_INTRIN_SSSE3
- "${AOM_ROOT}/aom_dsp/x86/masked_sad_intrin_ssse3.h"
- "${AOM_ROOT}/aom_dsp/x86/masked_sad_intrin_ssse3.c"
- "${AOM_ROOT}/aom_dsp/x86/masked_variance_intrin_ssse3.h"
- "${AOM_ROOT}/aom_dsp/x86/masked_variance_intrin_ssse3.c"
- "${AOM_ROOT}/aom_dsp/x86/variance_impl_ssse3.c"
- "${AOM_ROOT}/aom_dsp/x86/jnt_variance_ssse3.c"
- "${AOM_ROOT}/aom_dsp/x86/jnt_sad_ssse3.c")
-
- list(APPEND AOM_DSP_ENCODER_INTRIN_SSE4_1
- "${AOM_ROOT}/aom_dsp/x86/highbd_variance_sse4.c"
- "${AOM_ROOT}/aom_dsp/x86/sse_sse4.c"
- "${AOM_ROOT}/aom_dsp/x86/obmc_sad_sse4.c"
- "${AOM_ROOT}/aom_dsp/x86/obmc_variance_sse4.c")
-
- list(APPEND AOM_DSP_ENCODER_INTRIN_NEON
- "${AOM_ROOT}/aom_dsp/arm/sad4d_neon.c"
- "${AOM_ROOT}/aom_dsp/arm/sad_neon.c"
- "${AOM_ROOT}/aom_dsp/arm/subpel_variance_neon.c"
- "${AOM_ROOT}/aom_dsp/arm/variance_neon.c")
-
- list(APPEND AOM_DSP_ENCODER_INTRIN_MSA "${AOM_ROOT}/aom_dsp/mips/sad_msa.c"
- "${AOM_ROOT}/aom_dsp/mips/subtract_msa.c"
- "${AOM_ROOT}/aom_dsp/mips/variance_msa.c"
- "${AOM_ROOT}/aom_dsp/mips/sub_pixel_variance_msa.c")
-
- if(CONFIG_INTERNAL_STATS)
- list(APPEND AOM_DSP_ENCODER_SOURCES "${AOM_ROOT}/aom_dsp/fastssim.c"
- "${AOM_ROOT}/aom_dsp/psnrhvs.c" "${AOM_ROOT}/aom_dsp/ssim.c"
- "${AOM_ROOT}/aom_dsp/ssim.h")
- endif()
-endif()
-
-# Creates aom_dsp build targets. Must not be called until after libaom target
-# has been created.
-function(setup_aom_dsp_targets)
- add_library(aom_dsp_common OBJECT ${AOM_DSP_COMMON_SOURCES})
- list(APPEND AOM_LIB_TARGETS aom_dsp_common)
- create_dummy_source_file("aom_av1" "c" "dummy_source_file")
- add_library(aom_dsp OBJECT "${dummy_source_file}")
- target_sources(aom PRIVATE $<TARGET_OBJECTS:aom_dsp_common>)
- list(APPEND AOM_LIB_TARGETS aom_dsp)
-
- # Not all generators support libraries consisting only of object files. Add a
- # dummy source file to the aom_dsp target.
- add_dummy_source_file_to_target("aom_dsp" "c")
-
- if(CONFIG_AV1_DECODER)
- add_library(aom_dsp_decoder OBJECT ${AOM_DSP_DECODER_SOURCES})
- list(APPEND AOM_LIB_TARGETS aom_dsp_decoder)
- target_sources(aom PRIVATE $<TARGET_OBJECTS:aom_dsp_decoder>)
- endif()
-
- if(CONFIG_AV1_ENCODER)
- add_library(aom_dsp_encoder OBJECT ${AOM_DSP_ENCODER_SOURCES})
- list(APPEND AOM_LIB_TARGETS aom_dsp_encoder)
- target_sources(aom PRIVATE $<TARGET_OBJECTS:aom_dsp_encoder>)
- endif()
-
- if(HAVE_SSE2)
- add_asm_library("aom_dsp_common_sse2" "AOM_DSP_COMMON_ASM_SSE2" "aom")
- add_intrinsics_object_library("-msse2" "sse2" "aom_dsp_common"
- "AOM_DSP_COMMON_INTRIN_SSE2" "aom")
-
- if(CONFIG_AV1_ENCODER)
- add_asm_library("aom_dsp_encoder_sse2" "AOM_DSP_ENCODER_ASM_SSE2" "aom")
- add_intrinsics_object_library("-msse2" "sse2" "aom_dsp_encoder"
- "AOM_DSP_ENCODER_INTRIN_SSE2" "aom")
- endif()
- endif()
-
- if(HAVE_SSSE3)
- add_asm_library("aom_dsp_common_ssse3" "AOM_DSP_COMMON_ASM_SSSE3" "aom")
- add_intrinsics_object_library("-mssse3" "ssse3" "aom_dsp_common"
- "AOM_DSP_COMMON_INTRIN_SSSE3" "aom")
-
- if(CONFIG_AV1_ENCODER)
- if("${AOM_TARGET_CPU}" STREQUAL "x86_64")
- list(APPEND AOM_DSP_ENCODER_ASM_SSSE3
- ${AOM_DSP_ENCODER_ASM_SSSE3_X86_64})
- endif()
- add_asm_library("aom_dsp_encoder_ssse3" "AOM_DSP_ENCODER_ASM_SSSE3" "aom")
- add_intrinsics_object_library("-mssse3" "ssse3" "aom_dsp_encoder"
- "AOM_DSP_ENCODER_INTRIN_SSSE3" "aom")
- endif()
- endif()
-
- if(HAVE_SSE4_1)
- add_intrinsics_object_library("-msse4.1" "sse4_1" "aom_dsp_common"
- "AOM_DSP_COMMON_INTRIN_SSE4_1" "aom")
- if(CONFIG_AV1_ENCODER)
- add_intrinsics_object_library("-msse4.1" "sse4_1" "aom_dsp_encoder"
- "AOM_DSP_ENCODER_INTRIN_SSE4_1" "aom")
- endif()
- endif()
-
- if(HAVE_AVX AND "${AOM_TARGET_CPU}" STREQUAL "x86_64")
- if(CONFIG_AV1_ENCODER)
- add_asm_library("aom_dsp_encoder_avx" "AOM_DSP_ENCODER_AVX_ASM_X86_64"
- "aom")
- endif()
- endif()
-
- if(HAVE_AVX2)
- add_intrinsics_object_library("-mavx2" "avx2" "aom_dsp_common"
- "AOM_DSP_COMMON_INTRIN_AVX2" "aom")
- if(CONFIG_AV1_ENCODER)
- add_intrinsics_object_library("-mavx2" "avx2" "aom_dsp_encoder"
- "AOM_DSP_ENCODER_INTRIN_AVX2" "aom")
- endif()
- endif()
-
- if(HAVE_NEON)
- add_intrinsics_object_library("${AOM_NEON_INTRIN_FLAG}" "neon"
- "aom_dsp_common" "AOM_DSP_COMMON_INTRIN_NEON"
- "aom")
- if(CONFIG_AV1_ENCODER)
- add_intrinsics_object_library("${AOM_NEON_INTRIN_FLAG}" "neon"
- "aom_dsp_encoder"
- "AOM_DSP_ENCODER_INTRIN_NEON" "aom")
- endif()
- endif()
-
- if(HAVE_DSPR2)
- add_intrinsics_object_library("" "dspr2" "aom_dsp_common"
- "AOM_DSP_COMMON_INTRIN_DSPR2" "aom")
- endif()
-
- if(HAVE_MSA)
- add_intrinsics_object_library("" "msa" "aom_dsp_common"
- "AOM_DSP_COMMON_INTRIN_MSA" "aom")
- if(CONFIG_AV1_ENCODER)
- add_intrinsics_object_library("" "msa" "aom_dsp_encoder"
- "AOM_DSP_ENCODER_INTRIN_MSA" "aom")
- endif()
- endif()
-
- # Pass the new lib targets up to the parent scope instance of
- # $AOM_LIB_TARGETS.
- set(AOM_LIB_TARGETS ${AOM_LIB_TARGETS} PARENT_SCOPE)
-endfunction()
diff --git a/third_party/aom/aom_dsp/aom_dsp_common.h b/third_party/aom/aom_dsp/aom_dsp_common.h
deleted file mode 100644
index a185b23c8..000000000
--- a/third_party/aom/aom_dsp/aom_dsp_common.h
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_AOM_DSP_COMMON_H_
-#define AOM_AOM_DSP_AOM_DSP_COMMON_H_
-
-#include "config/aom_config.h"
-
-#include "aom/aom_integer.h"
-#include "aom_ports/mem.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifndef MAX_SB_SIZE
-#define MAX_SB_SIZE 128
-#endif // ndef MAX_SB_SIZE
-
-#define AOMMIN(x, y) (((x) < (y)) ? (x) : (y))
-#define AOMMAX(x, y) (((x) > (y)) ? (x) : (y))
-
-#define IMPLIES(a, b) (!(a) || (b)) // Logical 'a implies b' (or 'a -> b')
-
-#define IS_POWER_OF_TWO(x) (((x) & ((x)-1)) == 0)
-
-/* Left shifting a negative value became undefined behavior in C99 (downgraded
- from merely implementation-defined in C89). This should still compile to the
- correct thing on any two's-complement machine, but avoid ubsan warnings.*/
-#define AOM_SIGNED_SHL(x, shift) ((x) * (((x)*0 + 1) << (shift)))
-
-// These can be used to give a hint about branch outcomes.
-// This can have an effect, even if your target processor has a
-// good branch predictor, as these hints can affect basic block
-// ordering by the compiler.
-#ifdef __GNUC__
-#define LIKELY(v) __builtin_expect(v, 1)
-#define UNLIKELY(v) __builtin_expect(v, 0)
-#else
-#define LIKELY(v) (v)
-#define UNLIKELY(v) (v)
-#endif
-
-typedef uint8_t qm_val_t;
-#define AOM_QM_BITS 5
-
-// Note:
-// tran_low_t is the datatype used for final transform coefficients.
-// tran_high_t is the datatype used for intermediate transform stages.
-typedef int64_t tran_high_t;
-typedef int32_t tran_low_t;
-
-static INLINE uint8_t clip_pixel(int val) {
- return (val > 255) ? 255 : (val < 0) ? 0 : val;
-}
-
-static INLINE int clamp(int value, int low, int high) {
- return value < low ? low : (value > high ? high : value);
-}
-
-static INLINE int64_t clamp64(int64_t value, int64_t low, int64_t high) {
- return value < low ? low : (value > high ? high : value);
-}
-
-static INLINE double fclamp(double value, double low, double high) {
- return value < low ? low : (value > high ? high : value);
-}
-
-static INLINE uint16_t clip_pixel_highbd(int val, int bd) {
- switch (bd) {
- case 8:
- default: return (uint16_t)clamp(val, 0, 255);
- case 10: return (uint16_t)clamp(val, 0, 1023);
- case 12: return (uint16_t)clamp(val, 0, 4095);
- }
-}
-
-// The result of this branchless code is equivalent to (value < 0 ? 0 : value)
-// or max(0, value) and might be faster in some cases.
-// Care should be taken since the behavior of right shifting signed type
-// negative value is undefined by C standards and implementation defined,
-static INLINE unsigned int negative_to_zero(int value) {
- return value & ~(value >> (sizeof(value) * 8 - 1));
-}
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AOM_DSP_AOM_DSP_COMMON_H_
diff --git a/third_party/aom/aom_dsp/aom_dsp_rtcd.c b/third_party/aom/aom_dsp/aom_dsp_rtcd.c
deleted file mode 100644
index 1514bd64e..000000000
--- a/third_party/aom/aom_dsp/aom_dsp_rtcd.c
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#include "config/aom_config.h"
-
-#define RTCD_C
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_ports/aom_once.h"
-
-void aom_dsp_rtcd() { aom_once(setup_rtcd_internal); }
diff --git a/third_party/aom/aom_dsp/aom_dsp_rtcd_defs.pl b/third_party/aom/aom_dsp/aom_dsp_rtcd_defs.pl
deleted file mode 100755
index 8e8a480fe..000000000
--- a/third_party/aom/aom_dsp/aom_dsp_rtcd_defs.pl
+++ /dev/null
@@ -1,1575 +0,0 @@
-##
-## Copyright (c) 2017, Alliance for Open Media. All rights reserved
-##
-## This source code is subject to the terms of the BSD 2 Clause License and
-## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-## was not distributed with this source code in the LICENSE file, you can
-## obtain it at www.aomedia.org/license/software. If the Alliance for Open
-## Media Patent License 1.0 was not distributed with this source code in the
-## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-##
-sub aom_dsp_forward_decls() {
-print <<EOF
-/*
- * DSP
- */
-
-#include "aom/aom_integer.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "av1/common/enums.h"
-#include "av1/common/blockd.h"
-
-EOF
-}
-forward_decls qw/aom_dsp_forward_decls/;
-
-# optimizations which depend on multiple features
-$avx2_ssse3 = '';
-if ((aom_config("HAVE_AVX2") eq "yes") && (aom_config("HAVE_SSSE3") eq "yes")) {
- $avx2_ssse3 = 'avx2';
-}
-
-# functions that are 64 bit only.
-$mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 = $avx_x86_64 = $avx2_x86_64 = '';
-if ($opts{arch} eq "x86_64") {
- $mmx_x86_64 = 'mmx';
- $sse2_x86_64 = 'sse2';
- $ssse3_x86_64 = 'ssse3';
- $avx_x86_64 = 'avx';
- $avx2_x86_64 = 'avx2';
-}
-
-@block_widths = (4, 8, 16, 32, 64, 128);
-
-@block_sizes = ();
-foreach $w (@block_widths) {
- foreach $h (@block_widths) {
- push @block_sizes, [$w, $h] if ($w <= 2*$h && $h <= 2*$w) ;
- }
-}
-push @block_sizes, [4, 16];
-push @block_sizes, [16, 4];
-push @block_sizes, [8, 32];
-push @block_sizes, [32, 8];
-push @block_sizes, [16, 64];
-push @block_sizes, [64, 16];
-
-@tx_dims = (2, 4, 8, 16, 32, 64);
-@tx_sizes = ();
-foreach $w (@tx_dims) {
- push @tx_sizes, [$w, $w];
- foreach $h (@tx_dims) {
- push @tx_sizes, [$w, $h] if ($w >=4 && $h >=4 && ($w == 2*$h || $h == 2*$w));
- push @tx_sizes, [$w, $h] if ($w >=4 && $h >=4 && ($w == 4*$h || $h == 4*$w));
- }
-}
-
-@pred_names = qw/dc dc_top dc_left dc_128 v h paeth smooth smooth_v smooth_h/;
-
-#
-# Intra prediction
-#
-
-foreach (@tx_sizes) {
- ($w, $h) = @$_;
- foreach $pred_name (@pred_names) {
- add_proto "void", "aom_${pred_name}_predictor_${w}x${h}",
- "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- add_proto "void", "aom_highbd_${pred_name}_predictor_${w}x${h}",
- "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- }
-}
-
-specialize qw/aom_dc_top_predictor_4x4 msa neon sse2/;
-specialize qw/aom_dc_top_predictor_4x8 sse2/;
-specialize qw/aom_dc_top_predictor_4x16 sse2/;
-specialize qw/aom_dc_top_predictor_8x4 sse2/;
-specialize qw/aom_dc_top_predictor_8x8 neon msa sse2/;
-specialize qw/aom_dc_top_predictor_8x16 sse2/;
-specialize qw/aom_dc_top_predictor_8x32 sse2/;
-specialize qw/aom_dc_top_predictor_16x4 sse2/;
-specialize qw/aom_dc_top_predictor_16x8 sse2/;
-specialize qw/aom_dc_top_predictor_16x16 neon msa sse2/;
-specialize qw/aom_dc_top_predictor_16x32 sse2/;
-specialize qw/aom_dc_top_predictor_16x64 sse2/;
-specialize qw/aom_dc_top_predictor_32x8 sse2/;
-specialize qw/aom_dc_top_predictor_32x16 sse2 avx2/;
-specialize qw/aom_dc_top_predictor_32x32 msa neon sse2 avx2/;
-specialize qw/aom_dc_top_predictor_32x64 sse2 avx2/;
-specialize qw/aom_dc_top_predictor_64x64 sse2 avx2/;
-specialize qw/aom_dc_top_predictor_64x32 sse2 avx2/;
-specialize qw/aom_dc_top_predictor_64x16 sse2 avx2/;
-specialize qw/aom_dc_left_predictor_4x4 msa neon sse2/;
-specialize qw/aom_dc_left_predictor_4x8 sse2/;
-specialize qw/aom_dc_left_predictor_4x16 sse2/;
-specialize qw/aom_dc_left_predictor_8x4 sse2/;
-specialize qw/aom_dc_left_predictor_8x8 neon msa sse2/;
-specialize qw/aom_dc_left_predictor_8x16 sse2/;
-specialize qw/aom_dc_left_predictor_8x32 sse2/;
-specialize qw/aom_dc_left_predictor_16x4 sse2/;
-specialize qw/aom_dc_left_predictor_16x8 sse2/;
-specialize qw/aom_dc_left_predictor_16x16 neon msa sse2/;
-specialize qw/aom_dc_left_predictor_16x32 sse2/;
-specialize qw/aom_dc_left_predictor_16x64 sse2/;
-specialize qw/aom_dc_left_predictor_32x8 sse2/;
-specialize qw/aom_dc_left_predictor_32x16 sse2 avx2/;
-specialize qw/aom_dc_left_predictor_32x32 msa neon sse2 avx2/;
-specialize qw/aom_dc_left_predictor_32x64 sse2 avx2/;
-specialize qw/aom_dc_left_predictor_64x64 sse2 avx2/;
-specialize qw/aom_dc_left_predictor_64x32 sse2 avx2/;
-specialize qw/aom_dc_left_predictor_64x16 sse2 avx2/;
-specialize qw/aom_dc_128_predictor_4x4 msa neon sse2/;
-specialize qw/aom_dc_128_predictor_4x8 sse2/;
-specialize qw/aom_dc_128_predictor_4x16 sse2/;
-specialize qw/aom_dc_128_predictor_8x4 sse2/;
-specialize qw/aom_dc_128_predictor_8x8 neon msa sse2/;
-specialize qw/aom_dc_128_predictor_8x16 sse2/;
-specialize qw/aom_dc_128_predictor_8x32 sse2/;
-specialize qw/aom_dc_128_predictor_16x4 sse2/;
-specialize qw/aom_dc_128_predictor_16x8 sse2/;
-specialize qw/aom_dc_128_predictor_16x16 neon msa sse2/;
-specialize qw/aom_dc_128_predictor_16x32 sse2/;
-specialize qw/aom_dc_128_predictor_16x64 sse2/;
-specialize qw/aom_dc_128_predictor_32x8 sse2/;
-specialize qw/aom_dc_128_predictor_32x16 sse2 avx2/;
-specialize qw/aom_dc_128_predictor_32x32 msa neon sse2 avx2/;
-specialize qw/aom_dc_128_predictor_32x64 sse2 avx2/;
-specialize qw/aom_dc_128_predictor_64x64 sse2 avx2/;
-specialize qw/aom_dc_128_predictor_64x32 sse2 avx2/;
-specialize qw/aom_dc_128_predictor_64x16 sse2 avx2/;
-specialize qw/aom_v_predictor_4x4 neon msa sse2/;
-specialize qw/aom_v_predictor_4x8 sse2/;
-specialize qw/aom_v_predictor_4x16 sse2/;
-specialize qw/aom_v_predictor_8x4 sse2/;
-specialize qw/aom_v_predictor_8x8 neon msa sse2/;
-specialize qw/aom_v_predictor_8x16 sse2/;
-specialize qw/aom_v_predictor_8x32 sse2/;
-specialize qw/aom_v_predictor_16x4 sse2/;
-specialize qw/aom_v_predictor_16x8 sse2/;
-specialize qw/aom_v_predictor_16x16 neon msa sse2/;
-specialize qw/aom_v_predictor_16x32 sse2/;
-specialize qw/aom_v_predictor_16x64 sse2/;
-specialize qw/aom_v_predictor_32x8 sse2/;
-specialize qw/aom_v_predictor_32x16 sse2 avx2/;
-specialize qw/aom_v_predictor_32x32 neon msa sse2 avx2/;
-specialize qw/aom_v_predictor_32x64 sse2 avx2/;
-specialize qw/aom_v_predictor_64x64 sse2 avx2/;
-specialize qw/aom_v_predictor_64x32 sse2 avx2/;
-specialize qw/aom_v_predictor_64x16 sse2 avx2/;
-specialize qw/aom_h_predictor_4x8 sse2/;
-specialize qw/aom_h_predictor_4x16 sse2/;
-specialize qw/aom_h_predictor_4x4 neon dspr2 msa sse2/;
-specialize qw/aom_h_predictor_8x4 sse2/;
-specialize qw/aom_h_predictor_8x8 neon dspr2 msa sse2/;
-specialize qw/aom_h_predictor_8x16 sse2/;
-specialize qw/aom_h_predictor_8x32 sse2/;
-specialize qw/aom_h_predictor_16x4 sse2/;
-specialize qw/aom_h_predictor_16x8 sse2/;
-specialize qw/aom_h_predictor_16x16 neon dspr2 msa sse2/;
-specialize qw/aom_h_predictor_16x32 sse2/;
-specialize qw/aom_h_predictor_16x64 sse2/;
-specialize qw/aom_h_predictor_32x8 sse2/;
-specialize qw/aom_h_predictor_32x16 sse2/;
-specialize qw/aom_h_predictor_32x32 neon msa sse2 avx2/;
-specialize qw/aom_h_predictor_32x64 sse2/;
-specialize qw/aom_h_predictor_64x64 sse2/;
-specialize qw/aom_h_predictor_64x32 sse2/;
-specialize qw/aom_h_predictor_64x16 sse2/;
-specialize qw/aom_paeth_predictor_4x4 ssse3/;
-specialize qw/aom_paeth_predictor_4x8 ssse3/;
-specialize qw/aom_paeth_predictor_4x16 ssse3/;
-specialize qw/aom_paeth_predictor_8x4 ssse3/;
-specialize qw/aom_paeth_predictor_8x8 ssse3/;
-specialize qw/aom_paeth_predictor_8x16 ssse3/;
-specialize qw/aom_paeth_predictor_8x32 ssse3/;
-specialize qw/aom_paeth_predictor_16x4 ssse3/;
-specialize qw/aom_paeth_predictor_16x8 ssse3 avx2/;
-specialize qw/aom_paeth_predictor_16x16 ssse3 avx2/;
-specialize qw/aom_paeth_predictor_16x32 ssse3 avx2/;
-specialize qw/aom_paeth_predictor_16x64 ssse3 avx2/;
-specialize qw/aom_paeth_predictor_32x8 ssse3/;
-specialize qw/aom_paeth_predictor_32x16 ssse3 avx2/;
-specialize qw/aom_paeth_predictor_32x32 ssse3 avx2/;
-specialize qw/aom_paeth_predictor_32x64 ssse3 avx2/;
-specialize qw/aom_paeth_predictor_64x32 ssse3 avx2/;
-specialize qw/aom_paeth_predictor_64x64 ssse3 avx2/;
-specialize qw/aom_paeth_predictor_64x16 ssse3 avx2/;
-specialize qw/aom_paeth_predictor_16x8 ssse3/;
-specialize qw/aom_paeth_predictor_16x16 ssse3/;
-specialize qw/aom_paeth_predictor_16x32 ssse3/;
-specialize qw/aom_paeth_predictor_32x16 ssse3/;
-specialize qw/aom_paeth_predictor_32x32 ssse3/;
-specialize qw/aom_smooth_predictor_4x4 ssse3/;
-specialize qw/aom_smooth_predictor_4x8 ssse3/;
-specialize qw/aom_smooth_predictor_4x16 ssse3/;
-specialize qw/aom_smooth_predictor_8x4 ssse3/;
-specialize qw/aom_smooth_predictor_8x8 ssse3/;
-specialize qw/aom_smooth_predictor_8x16 ssse3/;
-specialize qw/aom_smooth_predictor_8x32 ssse3/;
-specialize qw/aom_smooth_predictor_16x4 ssse3/;
-specialize qw/aom_smooth_predictor_16x8 ssse3/;
-specialize qw/aom_smooth_predictor_16x16 ssse3/;
-specialize qw/aom_smooth_predictor_16x32 ssse3/;
-specialize qw/aom_smooth_predictor_16x64 ssse3/;
-specialize qw/aom_smooth_predictor_32x8 ssse3/;
-specialize qw/aom_smooth_predictor_32x16 ssse3/;
-specialize qw/aom_smooth_predictor_32x32 ssse3/;
-specialize qw/aom_smooth_predictor_32x64 ssse3/;
-specialize qw/aom_smooth_predictor_64x64 ssse3/;
-specialize qw/aom_smooth_predictor_64x32 ssse3/;
-specialize qw/aom_smooth_predictor_64x16 ssse3/;
-
-specialize qw/aom_smooth_v_predictor_4x4 ssse3/;
-specialize qw/aom_smooth_v_predictor_4x8 ssse3/;
-specialize qw/aom_smooth_v_predictor_4x16 ssse3/;
-specialize qw/aom_smooth_v_predictor_8x4 ssse3/;
-specialize qw/aom_smooth_v_predictor_8x8 ssse3/;
-specialize qw/aom_smooth_v_predictor_8x16 ssse3/;
-specialize qw/aom_smooth_v_predictor_8x32 ssse3/;
-specialize qw/aom_smooth_v_predictor_16x4 ssse3/;
-specialize qw/aom_smooth_v_predictor_16x8 ssse3/;
-specialize qw/aom_smooth_v_predictor_16x16 ssse3/;
-specialize qw/aom_smooth_v_predictor_16x32 ssse3/;
-specialize qw/aom_smooth_v_predictor_16x64 ssse3/;
-specialize qw/aom_smooth_v_predictor_32x8 ssse3/;
-specialize qw/aom_smooth_v_predictor_32x16 ssse3/;
-specialize qw/aom_smooth_v_predictor_32x32 ssse3/;
-specialize qw/aom_smooth_v_predictor_32x64 ssse3/;
-specialize qw/aom_smooth_v_predictor_64x64 ssse3/;
-specialize qw/aom_smooth_v_predictor_64x32 ssse3/;
-specialize qw/aom_smooth_v_predictor_64x16 ssse3/;
-
-specialize qw/aom_smooth_h_predictor_4x4 ssse3/;
-specialize qw/aom_smooth_h_predictor_4x8 ssse3/;
-specialize qw/aom_smooth_h_predictor_4x16 ssse3/;
-specialize qw/aom_smooth_h_predictor_8x4 ssse3/;
-specialize qw/aom_smooth_h_predictor_8x8 ssse3/;
-specialize qw/aom_smooth_h_predictor_8x16 ssse3/;
-specialize qw/aom_smooth_h_predictor_8x32 ssse3/;
-specialize qw/aom_smooth_h_predictor_16x4 ssse3/;
-specialize qw/aom_smooth_h_predictor_16x8 ssse3/;
-specialize qw/aom_smooth_h_predictor_16x16 ssse3/;
-specialize qw/aom_smooth_h_predictor_16x32 ssse3/;
-specialize qw/aom_smooth_h_predictor_16x64 ssse3/;
-specialize qw/aom_smooth_h_predictor_32x8 ssse3/;
-specialize qw/aom_smooth_h_predictor_32x16 ssse3/;
-specialize qw/aom_smooth_h_predictor_32x32 ssse3/;
-specialize qw/aom_smooth_h_predictor_32x64 ssse3/;
-specialize qw/aom_smooth_h_predictor_64x64 ssse3/;
-specialize qw/aom_smooth_h_predictor_64x32 ssse3/;
-specialize qw/aom_smooth_h_predictor_64x16 ssse3/;
-
-# TODO(yunqingwang): optimize rectangular DC_PRED to replace division
-# by multiply and shift.
-specialize qw/aom_dc_predictor_4x4 dspr2 msa neon sse2/;
-specialize qw/aom_dc_predictor_4x8 sse2/;
-specialize qw/aom_dc_predictor_4x16 sse2/;
-specialize qw/aom_dc_predictor_8x4 sse2/;
-specialize qw/aom_dc_predictor_8x8 dspr2 neon msa sse2/;
-specialize qw/aom_dc_predictor_8x16 sse2/;
-specialize qw/aom_dc_predictor_8x32 sse2/;
-specialize qw/aom_dc_predictor_16x4 sse2/;
-specialize qw/aom_dc_predictor_16x8 sse2/;
-specialize qw/aom_dc_predictor_16x16 dspr2 neon msa sse2/;
-specialize qw/aom_dc_predictor_16x32 sse2/;
-specialize qw/aom_dc_predictor_16x64 sse2/;
-specialize qw/aom_dc_predictor_32x8 sse2/;
-specialize qw/aom_dc_predictor_32x16 sse2 avx2/;
-specialize qw/aom_dc_predictor_32x32 msa neon sse2 avx2/;
-specialize qw/aom_dc_predictor_32x64 sse2 avx2/;
-specialize qw/aom_dc_predictor_64x64 sse2 avx2/;
-specialize qw/aom_dc_predictor_64x32 sse2 avx2/;
-specialize qw/aom_dc_predictor_64x16 sse2 avx2/;
-
- specialize qw/aom_highbd_v_predictor_4x4 sse2/;
- specialize qw/aom_highbd_v_predictor_4x8 sse2/;
- specialize qw/aom_highbd_v_predictor_8x4 sse2/;
- specialize qw/aom_highbd_v_predictor_8x8 sse2/;
- specialize qw/aom_highbd_v_predictor_8x16 sse2/;
- specialize qw/aom_highbd_v_predictor_16x8 sse2/;
- specialize qw/aom_highbd_v_predictor_16x16 sse2/;
- specialize qw/aom_highbd_v_predictor_16x32 sse2/;
- specialize qw/aom_highbd_v_predictor_32x16 sse2/;
- specialize qw/aom_highbd_v_predictor_32x32 sse2/;
-
- # TODO(yunqingwang): optimize rectangular DC_PRED to replace division
- # by multiply and shift.
- specialize qw/aom_highbd_dc_predictor_4x4 sse2 neon/;
- specialize qw/aom_highbd_dc_predictor_4x8 sse2/;
- specialize qw/aom_highbd_dc_predictor_8x4 sse2/;;
- specialize qw/aom_highbd_dc_predictor_8x8 sse2 neon/;;
- specialize qw/aom_highbd_dc_predictor_8x16 sse2/;;
- specialize qw/aom_highbd_dc_predictor_16x8 sse2/;
- specialize qw/aom_highbd_dc_predictor_16x16 sse2 neon/;
- specialize qw/aom_highbd_dc_predictor_16x32 sse2/;
- specialize qw/aom_highbd_dc_predictor_32x16 sse2/;
- specialize qw/aom_highbd_dc_predictor_32x32 sse2 neon/;
- specialize qw/aom_highbd_dc_predictor_64x64 neon/;
-
- specialize qw/aom_highbd_h_predictor_4x4 sse2/;
- specialize qw/aom_highbd_h_predictor_4x8 sse2/;
- specialize qw/aom_highbd_h_predictor_8x4 sse2/;
- specialize qw/aom_highbd_h_predictor_8x8 sse2/;
- specialize qw/aom_highbd_h_predictor_8x16 sse2/;
- specialize qw/aom_highbd_h_predictor_16x8 sse2/;
- specialize qw/aom_highbd_h_predictor_16x16 sse2/;
- specialize qw/aom_highbd_h_predictor_16x32 sse2/;
- specialize qw/aom_highbd_h_predictor_32x16 sse2/;
- specialize qw/aom_highbd_h_predictor_32x32 sse2/;
- specialize qw/aom_highbd_dc_left_predictor_4x4 sse2/;
- specialize qw/aom_highbd_dc_top_predictor_4x4 sse2/;
- specialize qw/aom_highbd_dc_128_predictor_4x4 sse2/;
- specialize qw/aom_highbd_dc_left_predictor_4x8 sse2/;
- specialize qw/aom_highbd_dc_top_predictor_4x8 sse2/;
- specialize qw/aom_highbd_dc_128_predictor_4x8 sse2/;
- specialize qw/aom_highbd_dc_left_predictor_8x4 sse2/;
- specialize qw/aom_highbd_dc_top_predictor_8x4 sse2/;
- specialize qw/aom_highbd_dc_128_predictor_8x4 sse2/;
- specialize qw/aom_highbd_dc_left_predictor_8x8 sse2/;
- specialize qw/aom_highbd_dc_top_predictor_8x8 sse2/;
- specialize qw/aom_highbd_dc_128_predictor_8x8 sse2/;
- specialize qw/aom_highbd_dc_left_predictor_8x16 sse2/;
- specialize qw/aom_highbd_dc_top_predictor_8x16 sse2/;
- specialize qw/aom_highbd_dc_128_predictor_8x16 sse2/;
- specialize qw/aom_highbd_dc_left_predictor_16x8 sse2/;
- specialize qw/aom_highbd_dc_top_predictor_16x8 sse2/;
- specialize qw/aom_highbd_dc_128_predictor_16x8 sse2/;
- specialize qw/aom_highbd_dc_left_predictor_16x16 sse2/;
- specialize qw/aom_highbd_dc_top_predictor_16x16 sse2/;
- specialize qw/aom_highbd_dc_128_predictor_16x16 sse2/;
- specialize qw/aom_highbd_dc_left_predictor_16x32 sse2/;
- specialize qw/aom_highbd_dc_top_predictor_16x32 sse2/;
- specialize qw/aom_highbd_dc_128_predictor_16x32 sse2/;
- specialize qw/aom_highbd_dc_left_predictor_32x16 sse2/;
- specialize qw/aom_highbd_dc_top_predictor_32x16 sse2/;
- specialize qw/aom_highbd_dc_128_predictor_32x16 sse2/;
- specialize qw/aom_highbd_dc_left_predictor_32x32 sse2/;
- specialize qw/aom_highbd_dc_top_predictor_32x32 sse2/;
- specialize qw/aom_highbd_dc_128_predictor_32x32 sse2/;
-
-#
-# Sub Pixel Filters
-#
-add_proto qw/void aom_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-add_proto qw/void aom_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-add_proto qw/void aom_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-
-specialize qw/aom_convolve_copy sse2 /;
-specialize qw/aom_convolve8_horiz sse2 ssse3/, "$avx2_ssse3";
-specialize qw/aom_convolve8_vert sse2 ssse3/, "$avx2_ssse3";
-
-add_proto qw/void aom_highbd_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
-specialize qw/aom_highbd_convolve_copy sse2 avx2/;
-
-add_proto qw/void aom_highbd_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
-specialize qw/aom_highbd_convolve8_horiz avx2/, "$sse2_x86_64";
-
-add_proto qw/void aom_highbd_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
-specialize qw/aom_highbd_convolve8_vert avx2/, "$sse2_x86_64";
-
-#
-# Loopfilter
-#
-add_proto qw/void aom_lpf_vertical_14/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
-specialize qw/aom_lpf_vertical_14 sse2 neon/;
-
-add_proto qw/void aom_lpf_vertical_14_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
-specialize qw/aom_lpf_vertical_14_dual sse2/;
-
-add_proto qw/void aom_lpf_vertical_6/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
-specialize qw/aom_lpf_vertical_6 sse2 neon/;
-
-add_proto qw/void aom_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
-specialize qw/aom_lpf_vertical_8 sse2 neon/;
-
-add_proto qw/void aom_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
-specialize qw/aom_lpf_vertical_8_dual sse2/;
-
-add_proto qw/void aom_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
-specialize qw/aom_lpf_vertical_4 sse2 neon/;
-
-add_proto qw/void aom_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
-specialize qw/aom_lpf_vertical_4_dual sse2/;
-
-add_proto qw/void aom_lpf_horizontal_14/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
-specialize qw/aom_lpf_horizontal_14 sse2 neon/;
-
-add_proto qw/void aom_lpf_horizontal_14_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
-specialize qw/aom_lpf_horizontal_14_dual sse2/;
-
-add_proto qw/void aom_lpf_horizontal_6/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
-specialize qw/aom_lpf_horizontal_6 sse2 neon/;
-
-add_proto qw/void aom_lpf_horizontal_6_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
-specialize qw/aom_lpf_horizontal_6_dual sse2/;
-
-add_proto qw/void aom_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
-specialize qw/aom_lpf_horizontal_8 sse2 neon/;
-
-add_proto qw/void aom_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
-specialize qw/aom_lpf_horizontal_8_dual sse2/;
-
-add_proto qw/void aom_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
-specialize qw/aom_lpf_horizontal_4 sse2 neon/;
-
-add_proto qw/void aom_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
-specialize qw/aom_lpf_horizontal_4_dual sse2/;
-
-add_proto qw/void aom_highbd_lpf_vertical_14/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
-specialize qw/aom_highbd_lpf_vertical_14 sse2/;
-
-add_proto qw/void aom_highbd_lpf_vertical_14_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
-specialize qw/aom_highbd_lpf_vertical_14_dual sse2 avx2/;
-
-add_proto qw/void aom_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
-specialize qw/aom_highbd_lpf_vertical_8 sse2/;
-
-add_proto qw/void aom_highbd_lpf_vertical_6/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
-specialize qw/aom_highbd_lpf_vertical_6 sse2/;
-
-add_proto qw/void aom_lpf_vertical_6_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
-specialize qw/aom_lpf_vertical_6_dual sse2/;
-
-add_proto qw/void aom_highbd_lpf_vertical_6_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
-specialize qw/aom_highbd_lpf_vertical_6_dual sse2/;
-
-add_proto qw/void aom_highbd_lpf_vertical_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
-specialize qw/aom_highbd_lpf_vertical_8_dual sse2 avx2/;
-
-add_proto qw/void aom_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
-specialize qw/aom_highbd_lpf_vertical_4 sse2/;
-
-add_proto qw/void aom_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
-specialize qw/aom_highbd_lpf_vertical_4_dual sse2 avx2/;
-
-add_proto qw/void aom_highbd_lpf_horizontal_14/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
-specialize qw/aom_highbd_lpf_horizontal_14 sse2/;
-
-add_proto qw/void aom_highbd_lpf_horizontal_14_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limt1, const uint8_t *thresh1,int bd";
-specialize qw/aom_highbd_lpf_horizontal_14_dual sse2 avx2/;
-
-add_proto qw/void aom_highbd_lpf_horizontal_6/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
-specialize qw/aom_highbd_lpf_horizontal_6 sse2/;
-
-add_proto qw/void aom_highbd_lpf_horizontal_6_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
-specialize qw/aom_highbd_lpf_horizontal_6_dual sse2/;
-
-add_proto qw/void aom_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
-specialize qw/aom_highbd_lpf_horizontal_8 sse2/;
-
-add_proto qw/void aom_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
-specialize qw/aom_highbd_lpf_horizontal_8_dual sse2 avx2/;
-
-add_proto qw/void aom_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
-specialize qw/aom_highbd_lpf_horizontal_4 sse2/;
-
-add_proto qw/void aom_highbd_lpf_horizontal_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
-specialize qw/aom_highbd_lpf_horizontal_4_dual sse2 avx2/;
-
-# Helper functions.
-add_proto qw/void av1_round_shift_array/, "int32_t *arr, int size, int bit";
-specialize "av1_round_shift_array", qw/sse4_1 neon/;
-
-#
-# Encoder functions.
-#
-
-#
-# Forward transform
-#
-if (aom_config("CONFIG_AV1_ENCODER") eq "yes"){
- add_proto qw/void aom_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/aom_fdct8x8 sse2/, "$ssse3_x86_64";
-
- # High bit depth
- add_proto qw/void aom_highbd_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/aom_highbd_fdct8x8 sse2/;
-
- # FFT/IFFT (float) only used for denoising (and noise power spectral density estimation)
- add_proto qw/void aom_fft2x2_float/, "const float *input, float *temp, float *output";
-
- add_proto qw/void aom_fft4x4_float/, "const float *input, float *temp, float *output";
- specialize qw/aom_fft4x4_float sse2/;
-
- add_proto qw/void aom_fft8x8_float/, "const float *input, float *temp, float *output";
- specialize qw/aom_fft8x8_float avx2 sse2/;
-
- add_proto qw/void aom_fft16x16_float/, "const float *input, float *temp, float *output";
- specialize qw/aom_fft16x16_float avx2 sse2/;
-
- add_proto qw/void aom_fft32x32_float/, "const float *input, float *temp, float *output";
- specialize qw/aom_fft32x32_float avx2 sse2/;
-
- add_proto qw/void aom_ifft2x2_float/, "const float *input, float *temp, float *output";
-
- add_proto qw/void aom_ifft4x4_float/, "const float *input, float *temp, float *output";
- specialize qw/aom_ifft4x4_float sse2/;
-
- add_proto qw/void aom_ifft8x8_float/, "const float *input, float *temp, float *output";
- specialize qw/aom_ifft8x8_float avx2 sse2/;
-
- add_proto qw/void aom_ifft16x16_float/, "const float *input, float *temp, float *output";
- specialize qw/aom_ifft16x16_float avx2 sse2/;
-
- add_proto qw/void aom_ifft32x32_float/, "const float *input, float *temp, float *output";
- specialize qw/aom_ifft32x32_float avx2 sse2/;
-} # CONFIG_AV1_ENCODER
-
-#
-# Quantization
-#
-if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
- add_proto qw/void aom_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/aom_quantize_b sse2/, "$ssse3_x86_64", "$avx_x86_64";
-
- add_proto qw/void aom_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/aom_quantize_b_32x32/, "$ssse3_x86_64", "$avx_x86_64";
-
- add_proto qw/void aom_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
-} # CONFIG_AV1_ENCODER
-
-if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
- add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/aom_highbd_quantize_b sse2 avx2/;
-
- add_proto qw/void aom_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/aom_highbd_quantize_b_32x32 sse2/;
-
- add_proto qw/void aom_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
-
-} # CONFIG_AV1_ENCODER
-
-#
-# Alpha blending with mask
-#
-add_proto qw/void aom_lowbd_blend_a64_d16_mask/, "uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subx, int suby, ConvolveParams *conv_params";
-specialize qw/aom_lowbd_blend_a64_d16_mask sse4_1 avx2 neon/;
-add_proto qw/void aom_highbd_blend_a64_d16_mask/, "uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subx, int suby, ConvolveParams *conv_params, const int bd";
-add_proto qw/void aom_blend_a64_mask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subx, int suby";
-add_proto qw/void aom_blend_a64_hmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h";
-add_proto qw/void aom_blend_a64_vmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h";
-specialize "aom_blend_a64_mask", qw/sse4_1 avx2/;
-specialize "aom_blend_a64_hmask", qw/sse4_1 neon/;
-specialize "aom_blend_a64_vmask", qw/sse4_1 neon/;
-
-add_proto qw/void aom_highbd_blend_a64_mask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subx, int suby, int bd";
-add_proto qw/void aom_highbd_blend_a64_hmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h, int bd";
-add_proto qw/void aom_highbd_blend_a64_vmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h, int bd";
-specialize "aom_highbd_blend_a64_mask", qw/sse4_1/;
-specialize "aom_highbd_blend_a64_hmask", qw/sse4_1/;
-specialize "aom_highbd_blend_a64_vmask", qw/sse4_1/;
-
-if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
- #
- # Block subtraction
- #
- add_proto qw/void aom_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
- specialize qw/aom_subtract_block neon msa sse2 avx2/;
-
- add_proto qw/void aom_highbd_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride, int bd";
- specialize qw/aom_highbd_subtract_block sse2/;
-
- add_proto qw/int64_t/, "aom_sse", "const uint8_t *a, int a_stride, const uint8_t *b,int b_stride, int width, int height";
- specialize qw/aom_sse sse4_1 avx2/;
-
- add_proto qw/int64_t/, "aom_highbd_sse", "const uint8_t *a8, int a_stride, const uint8_t *b8,int b_stride, int width, int height";
- specialize qw/aom_highbd_sse sse4_1 avx2/;
-
- if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
- #
- # Sum of Squares
- #
- add_proto qw/uint64_t aom_sum_squares_2d_i16/, "const int16_t *src, int stride, int width, int height";
- specialize qw/aom_sum_squares_2d_i16 sse2 avx2/;
-
- add_proto qw/uint64_t aom_sum_squares_i16/, "const int16_t *src, uint32_t N";
- specialize qw/aom_sum_squares_i16 sse2/;
-
- }
-
-
- #
- # Single block SAD / Single block Avg SAD
- #
- foreach (@block_sizes) {
- ($w, $h) = @$_;
- add_proto qw/unsigned int/, "aom_sad${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
- add_proto qw/unsigned int/, "aom_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- add_proto qw/unsigned int/, "aom_jnt_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, const JNT_COMP_PARAMS *jcp_param";
- }
-
- specialize qw/aom_sad128x128 avx2 sse2/;
- specialize qw/aom_sad128x64 avx2 sse2/;
- specialize qw/aom_sad64x128 avx2 sse2/;
- specialize qw/aom_sad64x64 avx2 neon msa sse2/;
- specialize qw/aom_sad64x32 avx2 msa sse2/;
- specialize qw/aom_sad32x64 avx2 msa sse2/;
- specialize qw/aom_sad32x32 avx2 neon msa sse2/;
- specialize qw/aom_sad32x16 avx2 msa sse2/;
- specialize qw/aom_sad16x32 msa sse2/;
- specialize qw/aom_sad16x16 neon msa sse2/;
- specialize qw/aom_sad16x8 neon msa sse2/;
- specialize qw/aom_sad8x16 neon msa sse2/;
- specialize qw/aom_sad8x8 neon msa sse2/;
- specialize qw/aom_sad8x4 msa sse2/;
- specialize qw/aom_sad4x8 msa sse2/;
- specialize qw/aom_sad4x4 neon msa sse2/;
-
- specialize qw/aom_sad128x128_avg avx2 sse2/;
- specialize qw/aom_sad128x64_avg avx2 sse2/;
- specialize qw/aom_sad64x128_avg avx2 sse2/;
- specialize qw/aom_sad64x64_avg avx2 msa sse2/;
- specialize qw/aom_sad64x32_avg avx2 msa sse2/;
- specialize qw/aom_sad32x64_avg avx2 msa sse2/;
- specialize qw/aom_sad32x32_avg avx2 msa sse2/;
- specialize qw/aom_sad32x16_avg avx2 msa sse2/;
- specialize qw/aom_sad16x32_avg msa sse2/;
- specialize qw/aom_sad16x16_avg msa sse2/;
- specialize qw/aom_sad16x8_avg msa sse2/;
- specialize qw/aom_sad8x16_avg msa sse2/;
- specialize qw/aom_sad8x8_avg msa sse2/;
- specialize qw/aom_sad8x4_avg msa sse2/;
- specialize qw/aom_sad4x8_avg msa sse2/;
- specialize qw/aom_sad4x4_avg msa sse2/;
-
- specialize qw/aom_sad4x16 sse2/;
- specialize qw/aom_sad16x4 sse2/;
- specialize qw/aom_sad8x32 sse2/;
- specialize qw/aom_sad32x8 sse2/;
- specialize qw/aom_sad16x64 sse2/;
- specialize qw/aom_sad64x16 sse2/;
-
- specialize qw/aom_sad4x16_avg sse2/;
- specialize qw/aom_sad16x4_avg sse2/;
- specialize qw/aom_sad8x32_avg sse2/;
- specialize qw/aom_sad32x8_avg sse2/;
- specialize qw/aom_sad16x64_avg sse2/;
- specialize qw/aom_sad64x16_avg sse2/;
-
- specialize qw/aom_jnt_sad128x128_avg ssse3/;
- specialize qw/aom_jnt_sad128x64_avg ssse3/;
- specialize qw/aom_jnt_sad64x128_avg ssse3/;
- specialize qw/aom_jnt_sad64x64_avg ssse3/;
- specialize qw/aom_jnt_sad64x32_avg ssse3/;
- specialize qw/aom_jnt_sad32x64_avg ssse3/;
- specialize qw/aom_jnt_sad32x32_avg ssse3/;
- specialize qw/aom_jnt_sad32x16_avg ssse3/;
- specialize qw/aom_jnt_sad16x32_avg ssse3/;
- specialize qw/aom_jnt_sad16x16_avg ssse3/;
- specialize qw/aom_jnt_sad16x8_avg ssse3/;
- specialize qw/aom_jnt_sad8x16_avg ssse3/;
- specialize qw/aom_jnt_sad8x8_avg ssse3/;
- specialize qw/aom_jnt_sad8x4_avg ssse3/;
- specialize qw/aom_jnt_sad4x8_avg ssse3/;
- specialize qw/aom_jnt_sad4x4_avg ssse3/;
-
- specialize qw/aom_jnt_sad4x16_avg ssse3/;
- specialize qw/aom_jnt_sad16x4_avg ssse3/;
- specialize qw/aom_jnt_sad8x32_avg ssse3/;
- specialize qw/aom_jnt_sad32x8_avg ssse3/;
- specialize qw/aom_jnt_sad16x64_avg ssse3/;
- specialize qw/aom_jnt_sad64x16_avg ssse3/;
-
- add_proto qw/unsigned int/, "aom_sad4xh", "const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, int width, int height";
- add_proto qw/unsigned int/, "aom_sad8xh", "const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, int width, int height";
- add_proto qw/unsigned int/, "aom_sad16xh", "const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, int width, int height";
- add_proto qw/unsigned int/, "aom_sad32xh", "const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, int width, int height";
- add_proto qw/unsigned int/, "aom_sad64xh", "const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, int width, int height";
- add_proto qw/unsigned int/, "aom_sad128xh", "const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, int width, int height";
-
- specialize qw/aom_sad4xh sse2/;
- specialize qw/aom_sad8xh sse2/;
- specialize qw/aom_sad16xh sse2/;
- specialize qw/aom_sad32xh sse2/;
- specialize qw/aom_sad64xh sse2/;
- specialize qw/aom_sad128xh sse2/;
-
-
- foreach (@block_sizes) {
- ($w, $h) = @$_;
- add_proto qw/unsigned int/, "aom_highbd_sad${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
- add_proto qw/unsigned int/, "aom_highbd_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- if ($w != 128 && $h != 128 && $w != 4) {
- specialize "aom_highbd_sad${w}x${h}", qw/sse2/;
- specialize "aom_highbd_sad${w}x${h}_avg", qw/sse2/;
- }
- add_proto qw/unsigned int/, "aom_highbd_jnt_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, const JNT_COMP_PARAMS* jcp_param";
- }
- specialize qw/aom_highbd_sad128x128 avx2/;
- specialize qw/aom_highbd_sad128x64 avx2/;
- specialize qw/aom_highbd_sad64x128 avx2/;
- specialize qw/aom_highbd_sad64x64 avx2 sse2/;
- specialize qw/aom_highbd_sad64x32 avx2 sse2/;
- specialize qw/aom_highbd_sad32x64 avx2 sse2/;
- specialize qw/aom_highbd_sad32x32 avx2 sse2/;
- specialize qw/aom_highbd_sad32x16 avx2 sse2/;
- specialize qw/aom_highbd_sad16x32 avx2 sse2/;
- specialize qw/aom_highbd_sad16x16 avx2 sse2/;
- specialize qw/aom_highbd_sad16x8 avx2 sse2/;
- specialize qw/aom_highbd_sad8x4 sse2/;
-
- specialize qw/aom_highbd_sad128x128_avg avx2/;
- specialize qw/aom_highbd_sad128x64_avg avx2/;
- specialize qw/aom_highbd_sad64x128_avg avx2/;
- specialize qw/aom_highbd_sad64x64_avg avx2 sse2/;
- specialize qw/aom_highbd_sad64x32_avg avx2 sse2/;
- specialize qw/aom_highbd_sad32x64_avg avx2 sse2/;
- specialize qw/aom_highbd_sad32x32_avg avx2 sse2/;
- specialize qw/aom_highbd_sad32x16_avg avx2 sse2/;
- specialize qw/aom_highbd_sad16x32_avg avx2 sse2/;
- specialize qw/aom_highbd_sad16x16_avg avx2 sse2/;
- specialize qw/aom_highbd_sad16x8_avg avx2 sse2/;
- specialize qw/aom_highbd_sad8x4_avg sse2/;
-
- specialize qw/aom_highbd_sad16x4 sse2/;
- specialize qw/aom_highbd_sad8x32 sse2/;
- specialize qw/aom_highbd_sad32x8 sse2/;
- specialize qw/aom_highbd_sad16x64 sse2/;
- specialize qw/aom_highbd_sad64x16 sse2/;
-
- specialize qw/aom_highbd_sad16x4_avg sse2/;
- specialize qw/aom_highbd_sad8x32_avg sse2/;
- specialize qw/aom_highbd_sad32x8_avg sse2/;
- specialize qw/aom_highbd_sad16x64_avg sse2/;
- specialize qw/aom_highbd_sad64x16_avg sse2/;
-
- #
- # Masked SAD
- #
- foreach (@block_sizes) {
- ($w, $h) = @$_;
- add_proto qw/unsigned int/, "aom_masked_sad${w}x${h}", "const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask";
- specialize "aom_masked_sad${w}x${h}", qw/ssse3 avx2/;
- }
-
-
- foreach (@block_sizes) {
- ($w, $h) = @$_;
- add_proto qw/unsigned int/, "aom_highbd_masked_sad${w}x${h}", "const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask";
- specialize "aom_highbd_masked_sad${w}x${h}", qw/ssse3 avx2/;
- }
-
-
- #
- # OBMC SAD
- #
- foreach (@block_sizes) {
- ($w, $h) = @$_;
- add_proto qw/unsigned int/, "aom_obmc_sad${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask";
- if (! (($w == 128 && $h == 32) || ($w == 32 && $h == 128))) {
- specialize "aom_obmc_sad${w}x${h}", qw/sse4_1 avx2/;
- }
- }
-
-
- foreach (@block_sizes) {
- ($w, $h) = @$_;
- add_proto qw/unsigned int/, "aom_highbd_obmc_sad${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask";
- if (! (($w == 128 && $h == 32) || ($w == 32 && $h == 128))) {
- specialize "aom_highbd_obmc_sad${w}x${h}", qw/sse4_1 avx2/;
- }
- }
-
-
- #
- # Multi-block SAD, comparing a reference to N independent blocks
- #
- foreach (@block_sizes) {
- ($w, $h) = @$_;
- add_proto qw/void/, "aom_sad${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
- }
-
- specialize qw/aom_sad128x128x4d avx2 sse2/;
- specialize qw/aom_sad128x64x4d avx2 sse2/;
- specialize qw/aom_sad64x128x4d avx2 sse2/;
- specialize qw/aom_sad64x64x4d avx2 neon msa sse2/;
- specialize qw/aom_sad64x32x4d avx2 msa sse2/;
- specialize qw/aom_sad32x64x4d avx2 msa sse2/;
- specialize qw/aom_sad32x32x4d avx2 neon msa sse2/;
- specialize qw/aom_sad32x16x4d msa sse2/;
- specialize qw/aom_sad16x32x4d msa sse2/;
- specialize qw/aom_sad16x16x4d neon msa sse2/;
- specialize qw/aom_sad16x8x4d msa sse2/;
- specialize qw/aom_sad8x16x4d msa sse2/;
- specialize qw/aom_sad8x8x4d msa sse2/;
- specialize qw/aom_sad8x4x4d msa sse2/;
- specialize qw/aom_sad4x8x4d msa sse2/;
- specialize qw/aom_sad4x4x4d msa sse2/;
-
- specialize qw/aom_sad4x16x4d sse2/;
- specialize qw/aom_sad16x4x4d sse2/;
- specialize qw/aom_sad8x32x4d sse2/;
- specialize qw/aom_sad32x8x4d sse2/;
- specialize qw/aom_sad16x64x4d sse2/;
- specialize qw/aom_sad64x16x4d sse2/;
-
- #
- # Multi-block SAD, comparing a reference to N independent blocks
- #
- foreach (@block_sizes) {
- ($w, $h) = @$_;
- add_proto qw/void/, "aom_highbd_sad${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
- if ($w != 128 && $h != 128) {
- specialize "aom_highbd_sad${w}x${h}x4d", qw/sse2/;
- }
- }
- specialize qw/aom_highbd_sad128x128x4d avx2/;
- specialize qw/aom_highbd_sad128x64x4d avx2/;
- specialize qw/aom_highbd_sad64x128x4d avx2/;
- specialize qw/aom_highbd_sad64x64x4d sse2 avx2/;
- specialize qw/aom_highbd_sad64x32x4d sse2 avx2/;
- specialize qw/aom_highbd_sad32x64x4d sse2 avx2/;
- specialize qw/aom_highbd_sad32x32x4d sse2 avx2/;
- specialize qw/aom_highbd_sad32x16x4d sse2 avx2/;
- specialize qw/aom_highbd_sad16x32x4d sse2 avx2/;
- specialize qw/aom_highbd_sad16x16x4d sse2 avx2/;
- specialize qw/aom_highbd_sad16x8x4d sse2 avx2/;
- specialize qw/aom_highbd_sad8x16x4d sse2/;
- specialize qw/aom_highbd_sad8x8x4d sse2/;
- specialize qw/aom_highbd_sad8x4x4d sse2/;
- specialize qw/aom_highbd_sad4x8x4d sse2/;
- specialize qw/aom_highbd_sad4x4x4d sse2/;
-
- specialize qw/aom_highbd_sad4x16x4d sse2/;
- specialize qw/aom_highbd_sad16x4x4d sse2/;
- specialize qw/aom_highbd_sad8x32x4d sse2/;
- specialize qw/aom_highbd_sad32x8x4d sse2/;
- specialize qw/aom_highbd_sad16x64x4d sse2/;
- specialize qw/aom_highbd_sad64x16x4d sse2/;
-
- #
- # Structured Similarity (SSIM)
- #
- if (aom_config("CONFIG_INTERNAL_STATS") eq "yes") {
- add_proto qw/void aom_ssim_parms_8x8/, "const uint8_t *s, int sp, const uint8_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
- specialize qw/aom_ssim_parms_8x8/, "$sse2_x86_64";
-
- add_proto qw/void aom_ssim_parms_16x16/, "const uint8_t *s, int sp, const uint8_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
- specialize qw/aom_ssim_parms_16x16/, "$sse2_x86_64";
-
- add_proto qw/void aom_highbd_ssim_parms_8x8/, "const uint16_t *s, int sp, const uint16_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
-
- }
-} # CONFIG_AV1_ENCODER
-
-if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
-
- #
- # Specialty Variance
- #
- add_proto qw/void aom_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
-
- add_proto qw/void aom_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
-
- specialize qw/aom_get16x16var neon msa/;
- specialize qw/aom_get8x8var neon msa/;
-
-
- add_proto qw/unsigned int aom_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- add_proto qw/unsigned int aom_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- add_proto qw/unsigned int aom_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- add_proto qw/unsigned int aom_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
-
- specialize qw/aom_mse16x16 sse2 avx2 neon msa/;
- specialize qw/aom_mse16x8 sse2 msa/;
- specialize qw/aom_mse8x16 sse2 msa/;
- specialize qw/aom_mse8x8 sse2 msa/;
-
- foreach $bd (8, 10, 12) {
- add_proto qw/void/, "aom_highbd_${bd}_get16x16var", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
- add_proto qw/void/, "aom_highbd_${bd}_get8x8var", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
-
- add_proto qw/unsigned int/, "aom_highbd_${bd}_mse16x16", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- add_proto qw/unsigned int/, "aom_highbd_${bd}_mse16x8", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- add_proto qw/unsigned int/, "aom_highbd_${bd}_mse8x16", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- add_proto qw/unsigned int/, "aom_highbd_${bd}_mse8x8", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
-
- specialize "aom_highbd_${bd}_mse16x16", qw/sse2/;
- specialize "aom_highbd_${bd}_mse8x8", qw/sse2/;
- }
-
-
- #
- #
- #
- add_proto qw/void aom_upsampled_pred/, "MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
- const MV *const mv, uint8_t *comp_pred, int width, int height, int subpel_x_q3,
- int subpel_y_q3, const uint8_t *ref, int ref_stride, int subpel_search";
- specialize qw/aom_upsampled_pred sse2/;
-
- add_proto qw/void aom_comp_avg_upsampled_pred/, "MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
- const MV *const mv, uint8_t *comp_pred, const uint8_t *pred, int width,
- int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref,
- int ref_stride, int subpel_search";
- specialize qw/aom_comp_avg_upsampled_pred sse2/;
-
- add_proto qw/void aom_jnt_comp_avg_upsampled_pred/, "MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
- const MV *const mv, uint8_t *comp_pred, const uint8_t *pred, int width,
- int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref,
- int ref_stride, const JNT_COMP_PARAMS *jcp_param, int subpel_search";
- specialize qw/aom_jnt_comp_avg_upsampled_pred ssse3/;
-
- add_proto qw/void aom_comp_mask_upsampled_pred/, "MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
- const MV *const mv, uint8_t *comp_pred, const uint8_t *pred, int width,
- int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref,
- int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask,
- int subpel_search";
- specialize qw/aom_comp_mask_upsampled_pred sse2/;
-
-
- add_proto qw/void aom_highbd_upsampled_pred/, "MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
- const MV *const mv, uint8_t *comp_pred8, int width, int height, int subpel_x_q3,
- int subpel_y_q3, const uint8_t *ref8, int ref_stride, int bd, int subpel_search";
- specialize qw/aom_highbd_upsampled_pred sse2/;
-
- add_proto qw/void aom_highbd_comp_avg_upsampled_pred/, "MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
- const MV *const mv, uint8_t *comp_pred8, const uint8_t *pred8, int width,
- int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref8, int ref_stride, int bd, int subpel_search";
- specialize qw/aom_highbd_comp_avg_upsampled_pred sse2/;
-
- add_proto qw/void aom_highbd_jnt_comp_avg_upsampled_pred/, "MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
- const MV *const mv, uint8_t *comp_pred8, const uint8_t *pred8, int width,
- int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref8,
- int ref_stride, int bd, const JNT_COMP_PARAMS *jcp_param, int subpel_search";
- specialize qw/aom_highbd_jnt_comp_avg_upsampled_pred sse2/;
-
-
- #
- #
- #
- add_proto qw/unsigned int aom_get_mb_ss/, "const int16_t *";
- add_proto qw/unsigned int aom_get4x4sse_cs/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride";
-
- specialize qw/aom_get_mb_ss sse2 msa/;
- specialize qw/aom_get4x4sse_cs neon msa/;
-
- #
- # Variance / Subpixel Variance / Subpixel Avg Variance
- #
- add_proto qw/unsigned int/, "aom_variance2x2", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-
- add_proto qw/unsigned int/, "aom_variance2x4", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-
- add_proto qw/unsigned int/, "aom_variance4x2", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-
- foreach (@block_sizes) {
- ($w, $h) = @$_;
- add_proto qw/unsigned int/, "aom_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- add_proto qw/uint32_t/, "aom_sub_pixel_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- add_proto qw/uint32_t/, "aom_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- add_proto qw/uint32_t/, "aom_jnt_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred, const JNT_COMP_PARAMS *jcp_param";
- }
- specialize qw/aom_variance128x128 sse2 avx2 /;
- specialize qw/aom_variance128x64 sse2 avx2 /;
- specialize qw/aom_variance64x128 sse2 avx2 /;
- specialize qw/aom_variance64x64 sse2 avx2 neon msa/;
- specialize qw/aom_variance64x32 sse2 avx2 neon msa/;
- specialize qw/aom_variance32x64 sse2 avx2 neon msa/;
- specialize qw/aom_variance32x32 sse2 avx2 neon msa/;
- specialize qw/aom_variance32x16 sse2 avx2 msa/;
- specialize qw/aom_variance16x32 sse2 avx2 msa/;
- specialize qw/aom_variance16x16 sse2 avx2 neon msa/;
- specialize qw/aom_variance16x8 sse2 avx2 neon msa/;
- specialize qw/aom_variance8x16 sse2 neon msa/;
- specialize qw/aom_variance8x8 sse2 neon msa/;
- specialize qw/aom_variance8x4 sse2 msa/;
- specialize qw/aom_variance4x8 sse2 msa/;
- specialize qw/aom_variance4x4 sse2 msa/;
-
- specialize qw/aom_sub_pixel_variance128x128 avx2 sse2 ssse3/;
- specialize qw/aom_sub_pixel_variance128x64 avx2 sse2 ssse3/;
- specialize qw/aom_sub_pixel_variance64x128 avx2 sse2 ssse3/;
- specialize qw/aom_sub_pixel_variance64x64 avx2 neon msa sse2 ssse3/;
- specialize qw/aom_sub_pixel_variance64x32 avx2 msa sse2 ssse3/;
- specialize qw/aom_sub_pixel_variance32x64 avx2 msa sse2 ssse3/;
- specialize qw/aom_sub_pixel_variance32x32 avx2 neon msa sse2 ssse3/;
- specialize qw/aom_sub_pixel_variance32x16 avx2 msa sse2 ssse3/;
- specialize qw/aom_sub_pixel_variance16x32 msa sse2 ssse3/;
- specialize qw/aom_sub_pixel_variance16x16 neon msa sse2 ssse3/;
- specialize qw/aom_sub_pixel_variance16x8 msa sse2 ssse3/;
- specialize qw/aom_sub_pixel_variance8x16 msa sse2 ssse3/;
- specialize qw/aom_sub_pixel_variance8x8 neon msa sse2 ssse3/;
- specialize qw/aom_sub_pixel_variance8x4 msa sse2 ssse3/;
- specialize qw/aom_sub_pixel_variance4x8 msa sse2 ssse3/;
- specialize qw/aom_sub_pixel_variance4x4 msa sse2 ssse3/;
-
- specialize qw/aom_sub_pixel_avg_variance128x128 avx2 sse2 ssse3/;
- specialize qw/aom_sub_pixel_avg_variance128x64 avx2 sse2 ssse3/;
- specialize qw/aom_sub_pixel_avg_variance64x128 avx2 sse2 ssse3/;
- specialize qw/aom_sub_pixel_avg_variance64x64 avx2 msa sse2 ssse3/;
- specialize qw/aom_sub_pixel_avg_variance64x32 avx2 msa sse2 ssse3/;
- specialize qw/aom_sub_pixel_avg_variance32x64 avx2 msa sse2 ssse3/;
- specialize qw/aom_sub_pixel_avg_variance32x32 avx2 msa sse2 ssse3/;
- specialize qw/aom_sub_pixel_avg_variance32x16 avx2 msa sse2 ssse3/;
- specialize qw/aom_sub_pixel_avg_variance16x32 msa sse2 ssse3/;
- specialize qw/aom_sub_pixel_avg_variance16x16 msa sse2 ssse3/;
- specialize qw/aom_sub_pixel_avg_variance16x8 msa sse2 ssse3/;
- specialize qw/aom_sub_pixel_avg_variance8x16 msa sse2 ssse3/;
- specialize qw/aom_sub_pixel_avg_variance8x8 msa sse2 ssse3/;
- specialize qw/aom_sub_pixel_avg_variance8x4 msa sse2 ssse3/;
- specialize qw/aom_sub_pixel_avg_variance4x8 msa sse2 ssse3/;
- specialize qw/aom_sub_pixel_avg_variance4x4 msa sse2 ssse3/;
-
- specialize qw/aom_variance4x16 sse2/;
- specialize qw/aom_variance16x4 sse2 avx2/;
- specialize qw/aom_variance8x32 sse2/;
- specialize qw/aom_variance32x8 sse2 avx2/;
- specialize qw/aom_variance16x64 sse2 avx2/;
- specialize qw/aom_variance64x16 sse2 avx2/;
- specialize qw/aom_sub_pixel_variance4x16 sse2 ssse3/;
- specialize qw/aom_sub_pixel_variance16x4 sse2 ssse3/;
- specialize qw/aom_sub_pixel_variance8x32 sse2 ssse3/;
- specialize qw/aom_sub_pixel_variance32x8 sse2 ssse3/;
- specialize qw/aom_sub_pixel_variance16x64 sse2 ssse3/;
- specialize qw/aom_sub_pixel_variance64x16 sse2 ssse3/;
- specialize qw/aom_sub_pixel_avg_variance4x16 sse2 ssse3/;
- specialize qw/aom_sub_pixel_avg_variance16x4 sse2 ssse3/;
- specialize qw/aom_sub_pixel_avg_variance8x32 sse2 ssse3/;
- specialize qw/aom_sub_pixel_avg_variance32x8 sse2 ssse3/;
- specialize qw/aom_sub_pixel_avg_variance16x64 sse2 ssse3/;
- specialize qw/aom_sub_pixel_avg_variance64x16 sse2 ssse3/;
-
- specialize qw/aom_jnt_sub_pixel_avg_variance64x64 ssse3/;
- specialize qw/aom_jnt_sub_pixel_avg_variance64x32 ssse3/;
- specialize qw/aom_jnt_sub_pixel_avg_variance32x64 ssse3/;
- specialize qw/aom_jnt_sub_pixel_avg_variance32x32 ssse3/;
- specialize qw/aom_jnt_sub_pixel_avg_variance32x16 ssse3/;
- specialize qw/aom_jnt_sub_pixel_avg_variance16x32 ssse3/;
- specialize qw/aom_jnt_sub_pixel_avg_variance16x16 ssse3/;
- specialize qw/aom_jnt_sub_pixel_avg_variance16x8 ssse3/;
- specialize qw/aom_jnt_sub_pixel_avg_variance8x16 ssse3/;
- specialize qw/aom_jnt_sub_pixel_avg_variance8x8 ssse3/;
- specialize qw/aom_jnt_sub_pixel_avg_variance8x4 ssse3/;
- specialize qw/aom_jnt_sub_pixel_avg_variance4x8 ssse3/;
- specialize qw/aom_jnt_sub_pixel_avg_variance4x4 ssse3/;
-
- specialize qw/aom_jnt_sub_pixel_avg_variance4x16 ssse3/;
- specialize qw/aom_jnt_sub_pixel_avg_variance16x4 ssse3/;
- specialize qw/aom_jnt_sub_pixel_avg_variance8x32 ssse3/;
- specialize qw/aom_jnt_sub_pixel_avg_variance32x8 ssse3/;
- specialize qw/aom_jnt_sub_pixel_avg_variance16x64 ssse3/;
- specialize qw/aom_jnt_sub_pixel_avg_variance64x16 ssse3/;
-
- specialize qw/aom_jnt_sub_pixel_avg_variance128x128 ssse3/;
- specialize qw/aom_jnt_sub_pixel_avg_variance128x64 ssse3/;
- specialize qw/aom_jnt_sub_pixel_avg_variance64x128 ssse3/;
-
-
- foreach $bd (8, 10, 12) {
- add_proto qw/unsigned int/, "aom_highbd_${bd}_variance2x2", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-
- add_proto qw/unsigned int/, "aom_highbd_${bd}_variance2x4", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-
- add_proto qw/unsigned int/, "aom_highbd_${bd}_variance4x2", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-
- foreach (@block_sizes) {
- ($w, $h) = @$_;
- add_proto qw/unsigned int/, "aom_highbd_${bd}_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- add_proto qw/uint32_t/, "aom_highbd_${bd}_sub_pixel_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- add_proto qw/uint32_t/, "aom_highbd_${bd}_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- if ($w != 128 && $h != 128 && $w != 4 && $h != 4) {
- specialize "aom_highbd_${bd}_variance${w}x${h}", "sse2";
- }
- # TODO(david.barker): When ext-partition-types is enabled, we currently
- # don't have vectorized 4x16 highbd variance functions
- if ($w == 4 && $h == 4) {
- specialize "aom_highbd_${bd}_variance${w}x${h}", "sse4_1";
- }
- if ($w != 128 && $h != 128 && $w != 4) {
- specialize "aom_highbd_${bd}_sub_pixel_variance${w}x${h}", qw/sse2/;
- specialize "aom_highbd_${bd}_sub_pixel_avg_variance${w}x${h}", qw/sse2/;
- }
- if ($w == 4 && $h == 4) {
- specialize "aom_highbd_${bd}_sub_pixel_variance${w}x${h}", "sse4_1";
- specialize "aom_highbd_${bd}_sub_pixel_avg_variance${w}x${h}", "sse4_1";
- }
-
- add_proto qw/uint32_t/, "aom_highbd_${bd}_jnt_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred, const JNT_COMP_PARAMS* jcp_param";
- }
- }
-
- #
- # Masked Variance / Masked Subpixel Variance
- #
- foreach (@block_sizes) {
- ($w, $h) = @$_;
- add_proto qw/unsigned int/, "aom_masked_sub_pixel_variance${w}x${h}", "const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse";
- specialize "aom_masked_sub_pixel_variance${w}x${h}", qw/ssse3/;
- }
-
-
- foreach $bd ("_8_", "_10_", "_12_") {
- foreach (@block_sizes) {
- ($w, $h) = @$_;
- add_proto qw/unsigned int/, "aom_highbd${bd}masked_sub_pixel_variance${w}x${h}", "const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse";
- specialize "aom_highbd${bd}masked_sub_pixel_variance${w}x${h}", qw/ssse3/;
- }
- }
-
-
- #
- # OBMC Variance / OBMC Subpixel Variance
- #
- foreach (@block_sizes) {
- ($w, $h) = @$_;
- add_proto qw/unsigned int/, "aom_obmc_variance${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
- add_proto qw/unsigned int/, "aom_obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
- specialize "aom_obmc_variance${w}x${h}", qw/sse4_1 avx2/;
- specialize "aom_obmc_sub_pixel_variance${w}x${h}", q/sse4_1/;
- }
-
-
- foreach $bd ("_", "_10_", "_12_") {
- foreach (@block_sizes) {
- ($w, $h) = @$_;
- add_proto qw/unsigned int/, "aom_highbd${bd}obmc_variance${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
- add_proto qw/unsigned int/, "aom_highbd${bd}obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
- specialize "aom_highbd${bd}obmc_variance${w}x${h}", qw/sse4_1/;
- }
- }
-
-
- add_proto qw/uint32_t aom_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_sub_pixel_avg_variance64x64 avx2 msa sse2 ssse3/;
-
- add_proto qw/uint32_t aom_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_sub_pixel_avg_variance64x32 msa sse2 ssse3/;
-
- add_proto qw/uint32_t aom_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_sub_pixel_avg_variance32x64 msa sse2 ssse3/;
-
- add_proto qw/uint32_t aom_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_sub_pixel_avg_variance32x32 avx2 msa sse2 ssse3/;
-
- add_proto qw/uint32_t aom_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_sub_pixel_avg_variance32x16 msa sse2 ssse3/;
-
- add_proto qw/uint32_t aom_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_sub_pixel_avg_variance16x32 msa sse2 ssse3/;
-
- add_proto qw/uint32_t aom_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_sub_pixel_avg_variance16x16 msa sse2 ssse3/;
-
- add_proto qw/uint32_t aom_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_sub_pixel_avg_variance16x8 msa sse2 ssse3/;
-
- add_proto qw/uint32_t aom_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_sub_pixel_avg_variance8x16 msa sse2 ssse3/;
-
- add_proto qw/uint32_t aom_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_sub_pixel_avg_variance8x8 msa sse2 ssse3/;
-
- add_proto qw/uint32_t aom_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_sub_pixel_avg_variance8x4 msa sse2 ssse3/;
-
- add_proto qw/uint32_t aom_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_sub_pixel_avg_variance4x8 msa sse2 ssse3/;
-
- add_proto qw/uint32_t aom_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_sub_pixel_avg_variance4x4 msa sse2 ssse3/;
-
- #
- # Comp Avg
- #
- add_proto qw/void aom_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride";
-
- add_proto qw/void aom_jnt_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride, const JNT_COMP_PARAMS *jcp_param";
- specialize qw/aom_jnt_comp_avg_pred ssse3/;
-
- add_proto qw/unsigned int aom_highbd_12_variance128x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/aom_highbd_12_variance128x128 sse2/;
-
- add_proto qw/unsigned int aom_highbd_12_variance128x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/aom_highbd_12_variance128x64 sse2/;
-
- add_proto qw/unsigned int aom_highbd_12_variance64x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/aom_highbd_12_variance64x128 sse2/;
-
- add_proto qw/unsigned int aom_highbd_12_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/aom_highbd_12_variance64x64 sse2/;
-
- add_proto qw/unsigned int aom_highbd_12_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/aom_highbd_12_variance64x32 sse2/;
-
- add_proto qw/unsigned int aom_highbd_12_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/aom_highbd_12_variance32x64 sse2/;
-
- add_proto qw/unsigned int aom_highbd_12_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/aom_highbd_12_variance32x32 sse2/;
-
- add_proto qw/unsigned int aom_highbd_12_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/aom_highbd_12_variance32x16 sse2/;
-
- add_proto qw/unsigned int aom_highbd_12_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/aom_highbd_12_variance16x32 sse2/;
-
- add_proto qw/unsigned int aom_highbd_12_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/aom_highbd_12_variance16x16 sse2/;
-
- add_proto qw/unsigned int aom_highbd_12_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/aom_highbd_12_variance16x8 sse2/;
-
- add_proto qw/unsigned int aom_highbd_12_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/aom_highbd_12_variance8x16 sse2/;
-
- add_proto qw/unsigned int aom_highbd_12_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/aom_highbd_12_variance8x8 sse2/;
-
- add_proto qw/unsigned int aom_highbd_12_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- add_proto qw/unsigned int aom_highbd_12_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- add_proto qw/unsigned int aom_highbd_12_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-
- add_proto qw/unsigned int aom_highbd_10_variance128x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/aom_highbd_10_variance128x128 sse2 avx2/;
-
- add_proto qw/unsigned int aom_highbd_10_variance128x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/aom_highbd_10_variance128x64 sse2 avx2/;
-
- add_proto qw/unsigned int aom_highbd_10_variance64x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/aom_highbd_10_variance64x128 sse2 avx2/;
-
- add_proto qw/unsigned int aom_highbd_10_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/aom_highbd_10_variance64x64 sse2 avx2/;
-
- add_proto qw/unsigned int aom_highbd_10_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/aom_highbd_10_variance64x32 sse2 avx2/;
-
- add_proto qw/unsigned int aom_highbd_10_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/aom_highbd_10_variance32x64 sse2 avx2/;
-
- add_proto qw/unsigned int aom_highbd_10_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/aom_highbd_10_variance32x32 sse2 avx2/;
-
- add_proto qw/unsigned int aom_highbd_10_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/aom_highbd_10_variance32x16 sse2 avx2/;
-
- add_proto qw/unsigned int aom_highbd_10_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/aom_highbd_10_variance16x32 sse2 avx2/;
-
- add_proto qw/unsigned int aom_highbd_10_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/aom_highbd_10_variance16x16 sse2 avx2/;
-
- add_proto qw/unsigned int aom_highbd_10_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/aom_highbd_10_variance16x8 sse2 avx2/;
-
- add_proto qw/unsigned int aom_highbd_10_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/aom_highbd_10_variance8x16 sse2 avx2/;
-
- add_proto qw/unsigned int aom_highbd_10_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/aom_highbd_10_variance8x8 sse2 avx2/;
-
- add_proto qw/unsigned int aom_highbd_10_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- add_proto qw/unsigned int aom_highbd_10_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- add_proto qw/unsigned int aom_highbd_10_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-
- add_proto qw/unsigned int aom_highbd_8_variance128x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/aom_highbd_8_variance128x128 sse2/;
-
- add_proto qw/unsigned int aom_highbd_8_variance128x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/aom_highbd_8_variance128x64 sse2/;
-
- add_proto qw/unsigned int aom_highbd_8_variance64x128/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/aom_highbd_8_variance64x128 sse2/;
-
- add_proto qw/unsigned int aom_highbd_8_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/aom_highbd_8_variance64x64 sse2/;
-
- add_proto qw/unsigned int aom_highbd_8_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/aom_highbd_8_variance64x32 sse2/;
-
- add_proto qw/unsigned int aom_highbd_8_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/aom_highbd_8_variance32x64 sse2/;
-
- add_proto qw/unsigned int aom_highbd_8_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/aom_highbd_8_variance32x32 sse2/;
-
- add_proto qw/unsigned int aom_highbd_8_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/aom_highbd_8_variance32x16 sse2/;
-
- add_proto qw/unsigned int aom_highbd_8_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/aom_highbd_8_variance16x32 sse2/;
-
- add_proto qw/unsigned int aom_highbd_8_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/aom_highbd_8_variance16x16 sse2/;
-
- add_proto qw/unsigned int aom_highbd_8_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/aom_highbd_8_variance16x8 sse2/;
-
- add_proto qw/unsigned int aom_highbd_8_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/aom_highbd_8_variance8x16 sse2/;
-
- add_proto qw/unsigned int aom_highbd_8_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/aom_highbd_8_variance8x8 sse2/;
-
- add_proto qw/unsigned int aom_highbd_8_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- add_proto qw/unsigned int aom_highbd_8_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- add_proto qw/unsigned int aom_highbd_8_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-
- add_proto qw/void aom_highbd_8_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
- add_proto qw/void aom_highbd_8_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
-
- add_proto qw/void aom_highbd_10_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
- add_proto qw/void aom_highbd_10_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
-
- add_proto qw/void aom_highbd_12_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
- add_proto qw/void aom_highbd_12_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
-
- add_proto qw/unsigned int aom_highbd_8_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/aom_highbd_8_mse16x16 sse2/;
-
- add_proto qw/unsigned int aom_highbd_8_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- add_proto qw/unsigned int aom_highbd_8_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- add_proto qw/unsigned int aom_highbd_8_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/aom_highbd_8_mse8x8 sse2/;
-
- add_proto qw/unsigned int aom_highbd_10_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/aom_highbd_10_mse16x16 sse2/;
-
- add_proto qw/unsigned int aom_highbd_10_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- add_proto qw/unsigned int aom_highbd_10_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- add_proto qw/unsigned int aom_highbd_10_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/aom_highbd_10_mse8x8 sse2/;
-
- add_proto qw/unsigned int aom_highbd_12_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/aom_highbd_12_mse16x16 sse2/;
-
- add_proto qw/unsigned int aom_highbd_12_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- add_proto qw/unsigned int aom_highbd_12_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- add_proto qw/unsigned int aom_highbd_12_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/aom_highbd_12_mse8x8 sse2/;
-
- add_proto qw/void aom_highbd_comp_avg_pred/, "uint8_t *comp_pred8, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride";
-
- add_proto qw/void aom_highbd_jnt_comp_avg_pred/, "uint8_t *comp_pred8, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride, const JNT_COMP_PARAMS *jcp_param";
- specialize qw/aom_highbd_jnt_comp_avg_pred sse2/;
-
- #
- # Subpixel Variance
- #
- add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_12_sub_pixel_variance64x64 sse2/;
-
- add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_12_sub_pixel_variance64x32 sse2/;
-
- add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_12_sub_pixel_variance32x64 sse2/;
-
- add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_12_sub_pixel_variance32x32 sse2/;
-
- add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_12_sub_pixel_variance32x16 sse2/;
-
- add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_12_sub_pixel_variance16x32 sse2/;
-
- add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_12_sub_pixel_variance16x16 sse2/;
-
- add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_12_sub_pixel_variance16x8 sse2/;
-
- add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_12_sub_pixel_variance8x16 sse2/;
-
- add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_12_sub_pixel_variance8x8 sse2/;
-
- add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_12_sub_pixel_variance8x4 sse2/;
-
- add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
-
- add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_10_sub_pixel_variance64x64 sse2/;
-
- add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_10_sub_pixel_variance64x32 sse2/;
-
- add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_10_sub_pixel_variance32x64 sse2/;
-
- add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_10_sub_pixel_variance32x32 sse2/;
-
- add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_10_sub_pixel_variance32x16 sse2/;
-
- add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_10_sub_pixel_variance16x32 sse2/;
-
- add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_10_sub_pixel_variance16x16 sse2/;
-
- add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_10_sub_pixel_variance16x8 sse2/;
-
- add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_10_sub_pixel_variance8x16 sse2/;
-
- add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_10_sub_pixel_variance8x8 sse2/;
-
- add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_10_sub_pixel_variance8x4 sse2/;
-
- add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
-
- add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_8_sub_pixel_variance64x64 sse2/;
-
- add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_8_sub_pixel_variance64x32 sse2/;
-
- add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_8_sub_pixel_variance32x64 sse2/;
-
- add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_8_sub_pixel_variance32x32 sse2/;
-
- add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_8_sub_pixel_variance32x16 sse2/;
-
- add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_8_sub_pixel_variance16x32 sse2/;
-
- add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_8_sub_pixel_variance16x16 sse2/;
-
- add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_8_sub_pixel_variance16x8 sse2/;
-
- add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_8_sub_pixel_variance8x16 sse2/;
-
- add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_8_sub_pixel_variance8x8 sse2/;
-
- add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_8_sub_pixel_variance8x4 sse2/;
-
- add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
-
- add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_12_sub_pixel_avg_variance64x64 sse2/;
-
- add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_12_sub_pixel_avg_variance64x32 sse2/;
-
- add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_12_sub_pixel_avg_variance32x64 sse2/;
-
- add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_12_sub_pixel_avg_variance32x32 sse2/;
-
- add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_12_sub_pixel_avg_variance32x16 sse2/;
-
- add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_12_sub_pixel_avg_variance16x32 sse2/;
-
- add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_12_sub_pixel_avg_variance16x16 sse2/;
-
- add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_12_sub_pixel_avg_variance16x8 sse2/;
-
- add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_12_sub_pixel_avg_variance8x16 sse2/;
-
- add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_12_sub_pixel_avg_variance8x8 sse2/;
-
- add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_12_sub_pixel_avg_variance8x4 sse2/;
-
- add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-
- add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_10_sub_pixel_avg_variance64x64 sse2/;
-
- add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_10_sub_pixel_avg_variance64x32 sse2/;
-
- add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_10_sub_pixel_avg_variance32x64 sse2/;
-
- add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_10_sub_pixel_avg_variance32x32 sse2/;
-
- add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_10_sub_pixel_avg_variance32x16 sse2/;
-
- add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_10_sub_pixel_avg_variance16x32 sse2/;
-
- add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_10_sub_pixel_avg_variance16x16 sse2/;
-
- add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_10_sub_pixel_avg_variance16x8 sse2/;
-
- add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_10_sub_pixel_avg_variance8x16 sse2/;
-
- add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_10_sub_pixel_avg_variance8x8 sse2/;
-
- add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_10_sub_pixel_avg_variance8x4 sse2/;
-
- add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-
- add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_8_sub_pixel_avg_variance64x64 sse2/;
-
- add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_8_sub_pixel_avg_variance64x32 sse2/;
-
- add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_8_sub_pixel_avg_variance32x64 sse2/;
-
- add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_8_sub_pixel_avg_variance32x32 sse2/;
-
- add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_8_sub_pixel_avg_variance32x16 sse2/;
-
- add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_8_sub_pixel_avg_variance16x32 sse2/;
-
- add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_8_sub_pixel_avg_variance16x16 sse2/;
-
- add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_8_sub_pixel_avg_variance16x8 sse2/;
-
- add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_8_sub_pixel_avg_variance8x16 sse2/;
-
- add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_8_sub_pixel_avg_variance8x8 sse2/;
-
- add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_8_sub_pixel_avg_variance8x4 sse2/;
-
- add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-
-
-
- add_proto qw/void aom_comp_mask_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask";
- specialize qw/aom_comp_mask_pred ssse3 avx2/;
-
- add_proto qw/void aom_highbd_comp_mask_pred/, "uint8_t *comp_pred, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask";
- specialize qw/aom_highbd_comp_mask_pred sse2 avx2/;
-
-} # CONFIG_AV1_ENCODER
-
-1;
diff --git a/third_party/aom/aom_dsp/aom_filter.h b/third_party/aom/aom_dsp/aom_filter.h
deleted file mode 100644
index 00686ac38..000000000
--- a/third_party/aom/aom_dsp/aom_filter.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_AOM_FILTER_H_
-#define AOM_AOM_DSP_AOM_FILTER_H_
-
-#include "aom/aom_integer.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define FILTER_BITS 7
-
-#define SUBPEL_BITS 4
-#define SUBPEL_MASK ((1 << SUBPEL_BITS) - 1)
-#define SUBPEL_SHIFTS (1 << SUBPEL_BITS)
-#define SUBPEL_TAPS 8
-
-#define SCALE_SUBPEL_BITS 10
-#define SCALE_SUBPEL_SHIFTS (1 << SCALE_SUBPEL_BITS)
-#define SCALE_SUBPEL_MASK (SCALE_SUBPEL_SHIFTS - 1)
-#define SCALE_EXTRA_BITS (SCALE_SUBPEL_BITS - SUBPEL_BITS)
-#define SCALE_EXTRA_OFF ((1 << SCALE_EXTRA_BITS) / 2)
-
-#define RS_SUBPEL_BITS 6
-#define RS_SUBPEL_MASK ((1 << RS_SUBPEL_BITS) - 1)
-#define RS_SCALE_SUBPEL_BITS 14
-#define RS_SCALE_SUBPEL_MASK ((1 << RS_SCALE_SUBPEL_BITS) - 1)
-#define RS_SCALE_EXTRA_BITS (RS_SCALE_SUBPEL_BITS - RS_SUBPEL_BITS)
-#define RS_SCALE_EXTRA_OFF (1 << (RS_SCALE_EXTRA_BITS - 1))
-
-typedef int16_t InterpKernel[SUBPEL_TAPS];
-
-#define BIL_SUBPEL_BITS 3
-#define BIL_SUBPEL_SHIFTS (1 << BIL_SUBPEL_BITS)
-
-// 2 tap bilinear filters
-static const uint8_t bilinear_filters_2t[BIL_SUBPEL_SHIFTS][2] = {
- { 128, 0 }, { 112, 16 }, { 96, 32 }, { 80, 48 },
- { 64, 64 }, { 48, 80 }, { 32, 96 }, { 16, 112 },
-};
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AOM_DSP_AOM_FILTER_H_
diff --git a/third_party/aom/aom_dsp/aom_simd.h b/third_party/aom/aom_dsp/aom_simd.h
deleted file mode 100644
index ab950ca55..000000000
--- a/third_party/aom/aom_dsp/aom_simd.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_AOM_SIMD_H_
-#define AOM_AOM_DSP_AOM_SIMD_H_
-
-#include <stdint.h>
-
-#if defined(_WIN32)
-#include <intrin.h>
-#endif
-
-#include "config/aom_config.h"
-
-#include "aom_dsp/aom_simd_inline.h"
-
-#define SIMD_CHECK 1 // Sanity checks in C equivalents
-
-#if HAVE_NEON
-#include "simd/v256_intrinsics_arm.h"
-// VS compiling for 32 bit targets does not support vector types in
-// structs as arguments, which makes the v256 type of the intrinsics
-// hard to support, so optimizations for this target are disabled.
-#elif HAVE_SSE2 && (defined(_WIN64) || !defined(_MSC_VER) || defined(__clang__))
-#include "simd/v256_intrinsics_x86.h"
-#else
-#include "simd/v256_intrinsics.h"
-#endif
-
-#endif // AOM_AOM_DSP_AOM_SIMD_H_
diff --git a/third_party/aom/aom_dsp/aom_simd_inline.h b/third_party/aom/aom_dsp/aom_simd_inline.h
deleted file mode 100644
index eb333f6f6..000000000
--- a/third_party/aom/aom_dsp/aom_simd_inline.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_AOM_SIMD_INLINE_H_
-#define AOM_AOM_DSP_AOM_SIMD_INLINE_H_
-
-#include "aom/aom_integer.h"
-
-#ifndef SIMD_INLINE
-#define SIMD_INLINE static AOM_FORCE_INLINE
-#endif
-
-#endif // AOM_AOM_DSP_AOM_SIMD_INLINE_H_
diff --git a/third_party/aom/aom_dsp/arm/blend_a64_mask_neon.c b/third_party/aom/aom_dsp/arm/blend_a64_mask_neon.c
deleted file mode 100644
index e7f08a5fd..000000000
--- a/third_party/aom/aom_dsp/arm/blend_a64_mask_neon.c
+++ /dev/null
@@ -1,451 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <arm_neon.h>
-#include <assert.h>
-
-#include "aom/aom_integer.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/blend.h"
-#include "aom_ports/mem.h"
-#include "av1/common/arm/mem_neon.h"
-#include "config/aom_dsp_rtcd.h"
-
-static INLINE void blend8x1(int16x8_t mask, int16x8_t src_0, int16x8_t src_1,
- const int16x8_t v_maxval, int16x8_t *res) {
- int32x4_t im_res_low, im_res_high;
- const int16x8_t max_minus_mask = vsubq_s16(v_maxval, mask);
-
- im_res_low = vmull_s16(vget_low_s16(mask), vget_low_s16(src_0));
- im_res_low =
- vmlal_s16(im_res_low, vget_low_s16(max_minus_mask), vget_low_s16(src_1));
-
- im_res_high = vmull_s16(vget_high_s16(mask), vget_high_s16(src_0));
- im_res_high = vmlal_s16(im_res_high, vget_high_s16(max_minus_mask),
- vget_high_s16(src_1));
-
- *res = vcombine_s16(vshrn_n_s32(im_res_low, AOM_BLEND_A64_ROUND_BITS),
- vshrn_n_s32(im_res_high, AOM_BLEND_A64_ROUND_BITS));
-}
-
-static INLINE void blend_8x4(uint8_t *dst, uint32_t dst_stride,
- const CONV_BUF_TYPE *src0, uint32_t src0_stride,
- const CONV_BUF_TYPE *src1, uint32_t src1_stride,
- int16x8_t mask0, int16x8_t mask1, int16x8_t mask2,
- int16x8_t mask3, const int16x8_t v_maxval,
- const uint16x8_t vec_round_offset,
- const int16x8_t vec_round_bits) {
- int16x8_t src0_0, src0_1, src0_2, src0_3;
- int16x8_t src1_0, src1_1, src1_2, src1_3;
- int16x8_t im_res_0, im_res_1, im_res_2, im_res_3;
-
- load_s16_8x4((int16_t *)src0, (int32_t)src0_stride, &src0_0, &src0_1, &src0_2,
- &src0_3);
- load_s16_8x4((int16_t *)src1, (int32_t)src1_stride, &src1_0, &src1_1, &src1_2,
- &src1_3);
-
- blend8x1(mask0, src0_0, src1_0, v_maxval, &im_res_0);
- blend8x1(mask1, src0_1, src1_1, v_maxval, &im_res_1);
- blend8x1(mask2, src0_2, src1_2, v_maxval, &im_res_2);
- blend8x1(mask3, src0_3, src1_3, v_maxval, &im_res_3);
-
- uint16x8_t im_res1_0 =
- vqsubq_u16(vreinterpretq_u16_s16(im_res_0), vec_round_offset);
- uint16x8_t im_res1_1 =
- vqsubq_u16(vreinterpretq_u16_s16(im_res_1), vec_round_offset);
- uint16x8_t im_res1_2 =
- vqsubq_u16(vreinterpretq_u16_s16(im_res_2), vec_round_offset);
- uint16x8_t im_res1_3 =
- vqsubq_u16(vreinterpretq_u16_s16(im_res_3), vec_round_offset);
-
- im_res_0 = vshlq_s16(vreinterpretq_s16_u16(im_res1_0), vec_round_bits);
- im_res_1 = vshlq_s16(vreinterpretq_s16_u16(im_res1_1), vec_round_bits);
- im_res_2 = vshlq_s16(vreinterpretq_s16_u16(im_res1_2), vec_round_bits);
- im_res_3 = vshlq_s16(vreinterpretq_s16_u16(im_res1_3), vec_round_bits);
-
- vst1_u8((dst + 0 * dst_stride), vqmovun_s16(im_res_0));
- vst1_u8((dst + 1 * dst_stride), vqmovun_s16(im_res_1));
- vst1_u8((dst + 2 * dst_stride), vqmovun_s16(im_res_2));
- vst1_u8((dst + 3 * dst_stride), vqmovun_s16(im_res_3));
-}
-
-static INLINE void blend_4x4(uint8_t *dst, uint32_t dst_stride,
- const CONV_BUF_TYPE *src0, uint32_t src0_stride,
- const CONV_BUF_TYPE *src1, uint32_t src1_stride,
- int16x4_t mask0, int16x4_t mask1, int16x4_t mask2,
- int16x4_t mask3, const int16x8_t v_maxval,
- const uint16x8_t vec_round_offset,
- const int16x8_t vec_round_bits) {
- int16x8_t src0_0, src0_1;
- int16x8_t src1_0, src1_1;
- uint64x2_t tu0 = vdupq_n_u64(0), tu1 = vdupq_n_u64(0), tu2 = vdupq_n_u64(0),
- tu3 = vdupq_n_u64(0);
- int16x8_t mask0_1, mask2_3;
- int16x8_t res0, res1;
-
- load_unaligned_u16_4x4(src0, src0_stride, &tu0, &tu1);
- load_unaligned_u16_4x4(src1, src1_stride, &tu2, &tu3);
-
- src0_0 = vreinterpretq_s16_u64(tu0);
- src0_1 = vreinterpretq_s16_u64(tu1);
-
- src1_0 = vreinterpretq_s16_u64(tu2);
- src1_1 = vreinterpretq_s16_u64(tu3);
-
- mask0_1 = vcombine_s16(mask0, mask1);
- mask2_3 = vcombine_s16(mask2, mask3);
-
- blend8x1(mask0_1, src0_0, src1_0, v_maxval, &res0);
- blend8x1(mask2_3, src0_1, src1_1, v_maxval, &res1);
-
- uint16x8_t im_res_0 =
- vqsubq_u16(vreinterpretq_u16_s16(res0), vec_round_offset);
- uint16x8_t im_res_1 =
- vqsubq_u16(vreinterpretq_u16_s16(res1), vec_round_offset);
-
- src0_0 = vshlq_s16(vreinterpretq_s16_u16(im_res_0), vec_round_bits);
- src0_1 = vshlq_s16(vreinterpretq_s16_u16(im_res_1), vec_round_bits);
-
- uint8x8_t res_0 = vqmovun_s16(src0_0);
- uint8x8_t res_1 = vqmovun_s16(src0_1);
-
- vst1_lane_u32((uint32_t *)(dst + 0 * dst_stride), vreinterpret_u32_u8(res_0),
- 0);
- vst1_lane_u32((uint32_t *)(dst + 1 * dst_stride), vreinterpret_u32_u8(res_0),
- 1);
- vst1_lane_u32((uint32_t *)(dst + 2 * dst_stride), vreinterpret_u32_u8(res_1),
- 0);
- vst1_lane_u32((uint32_t *)(dst + 3 * dst_stride), vreinterpret_u32_u8(res_1),
- 1);
-}
-
-void aom_lowbd_blend_a64_d16_mask_neon(
- uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0,
- uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh,
- ConvolveParams *conv_params) {
- int i = 0;
- const int bd = 8;
- int w_tmp = w;
- const uint8_t *mask_tmp = mask;
- const CONV_BUF_TYPE *src0_tmp = src0;
- const CONV_BUF_TYPE *src1_tmp = src1;
- uint8_t *dst_tmp = dst;
-
- const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
- const int round_offset = (1 << (offset_bits - conv_params->round_1)) +
- (1 << (offset_bits - conv_params->round_1 - 1));
- const int round_bits =
- 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
-
- assert(IMPLIES((void *)src0 == dst, src0_stride == dst_stride));
- assert(IMPLIES((void *)src1 == dst, src1_stride == dst_stride));
-
- assert(h >= 4);
- assert(w >= 4);
- assert(IS_POWER_OF_TWO(h));
- assert(IS_POWER_OF_TWO(w));
-
- uint8x8_t s0, s1, s2, s3;
- uint32x2_t tu0 = vdup_n_u32(0), tu1 = vdup_n_u32(0), tu2 = vdup_n_u32(0),
- tu3 = vdup_n_u32(0);
- uint8x16_t t0, t1, t2, t3, t4, t5, t6, t7;
- int16x8_t mask0, mask1, mask2, mask3;
- int16x8_t mask4, mask5, mask6, mask7;
- int32x4_t m0_32, m1_32, m2_32, m3_32;
- int32x4_t m4_32, m5_32, m6_32, m7_32;
- uint8x8_t mask0_l, mask1_l, mask2_l, mask3_l;
- uint8x8_t mask4_l, mask5_l, mask6_l, mask7_l;
- int16x4_t mask0_low, mask1_low, mask2_low, mask3_low;
- const uint16x4_t vec_zero = vdup_n_u16(0);
- const uint16_t offset = round_offset - (1 << (round_bits - 1));
- const int16x8_t v_maxval = vdupq_n_s16(AOM_BLEND_A64_MAX_ALPHA);
- const int16x8_t vec_round_bits = vdupq_n_s16(-round_bits);
- const uint16x8_t vec_offset = vdupq_n_u16(offset);
-
- if (subw == 0 && subh == 0) {
- if (w_tmp > 7) {
- do {
- w_tmp = w;
- do {
- load_u8_8x4(mask_tmp, mask_stride, &s0, &s1, &s2, &s3);
-
- mask0 = vmovl_s8(vreinterpret_s8_u8(s0));
- mask1 = vmovl_s8(vreinterpret_s8_u8(s1));
- mask2 = vmovl_s8(vreinterpret_s8_u8(s2));
- mask3 = vmovl_s8(vreinterpret_s8_u8(s3));
-
- blend_8x4(dst_tmp, dst_stride, src0_tmp, src0_stride, src1_tmp,
- src1_stride, mask0, mask1, mask2, mask3, v_maxval,
- vec_offset, vec_round_bits);
-
- w_tmp -= 8;
- mask_tmp += 8;
- dst_tmp += 8;
- src0_tmp += 8;
- src1_tmp += 8;
- } while (w_tmp > 7);
- i += 4;
- mask_tmp += (4 * mask_stride) - w;
- dst_tmp += (4 * dst_stride) - w;
- src0_tmp += (4 * src0_stride) - w;
- src1_tmp += (4 * src1_stride) - w;
- } while (i < h);
- } else {
- do {
- load_unaligned_u8_4x4(mask_tmp, mask_stride, &tu0, &tu1);
-
- mask0 = vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(tu0)));
- mask1 = vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(tu1)));
-
- mask0_low = vget_low_s16(mask0);
- mask1_low = vget_high_s16(mask0);
- mask2_low = vget_low_s16(mask1);
- mask3_low = vget_high_s16(mask1);
-
- blend_4x4(dst_tmp, dst_stride, src0_tmp, src0_stride, src1_tmp,
- src1_stride, mask0_low, mask1_low, mask2_low, mask3_low,
- v_maxval, vec_offset, vec_round_bits);
-
- i += 4;
- mask_tmp += (4 * mask_stride);
- dst_tmp += (4 * dst_stride);
- src0_tmp += (4 * src0_stride);
- src1_tmp += (4 * src1_stride);
- } while (i < h);
- }
- } else if (subw == 1 && subh == 1) {
- if (w_tmp > 7) {
- do {
- w_tmp = w;
- do {
- load_u8_16x8(mask_tmp, mask_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6,
- &t7);
-
- mask0 =
- vreinterpretq_s16_u16(vaddl_u8(vget_low_u8(t0), vget_low_u8(t1)));
- mask1 =
- vreinterpretq_s16_u16(vaddl_u8(vget_low_u8(t2), vget_low_u8(t3)));
- mask2 =
- vreinterpretq_s16_u16(vaddl_u8(vget_low_u8(t4), vget_low_u8(t5)));
- mask3 =
- vreinterpretq_s16_u16(vaddl_u8(vget_low_u8(t6), vget_low_u8(t7)));
-
- mask4 = vreinterpretq_s16_u16(
- vaddl_u8(vget_high_u8(t0), vget_high_u8(t1)));
- mask5 = vreinterpretq_s16_u16(
- vaddl_u8(vget_high_u8(t2), vget_high_u8(t3)));
- mask6 = vreinterpretq_s16_u16(
- vaddl_u8(vget_high_u8(t4), vget_high_u8(t5)));
- mask7 = vreinterpretq_s16_u16(
- vaddl_u8(vget_high_u8(t6), vget_high_u8(t7)));
-
- m0_32 = vpaddlq_s16(mask0);
- m1_32 = vpaddlq_s16(mask1);
- m2_32 = vpaddlq_s16(mask2);
- m3_32 = vpaddlq_s16(mask3);
-
- m4_32 = vpaddlq_s16(mask4);
- m5_32 = vpaddlq_s16(mask5);
- m6_32 = vpaddlq_s16(mask6);
- m7_32 = vpaddlq_s16(mask7);
-
- mask0 =
- vcombine_s16(vqrshrn_n_s32(m0_32, 2), vqrshrn_n_s32(m4_32, 2));
- mask1 =
- vcombine_s16(vqrshrn_n_s32(m1_32, 2), vqrshrn_n_s32(m5_32, 2));
- mask2 =
- vcombine_s16(vqrshrn_n_s32(m2_32, 2), vqrshrn_n_s32(m6_32, 2));
- mask3 =
- vcombine_s16(vqrshrn_n_s32(m3_32, 2), vqrshrn_n_s32(m7_32, 2));
-
- blend_8x4(dst_tmp, dst_stride, src0_tmp, src0_stride, src1_tmp,
- src1_stride, mask0, mask1, mask2, mask3, v_maxval,
- vec_offset, vec_round_bits);
-
- w_tmp -= 8;
- mask_tmp += 16;
- dst_tmp += 8;
- src0_tmp += 8;
- src1_tmp += 8;
- } while (w_tmp > 7);
- i += 4;
- mask_tmp += (8 * mask_stride) - (2 * w);
- dst_tmp += (4 * dst_stride) - w;
- src0_tmp += (4 * src0_stride) - w;
- src1_tmp += (4 * src1_stride) - w;
- } while (i < h);
- } else {
- do {
- load_u8_8x8(mask_tmp, mask_stride, &mask0_l, &mask1_l, &mask2_l,
- &mask3_l, &mask4_l, &mask5_l, &mask6_l, &mask7_l);
-
- mask0 = vreinterpretq_s16_u16(vaddl_u8(mask0_l, mask1_l));
- mask1 = vreinterpretq_s16_u16(vaddl_u8(mask2_l, mask3_l));
- mask2 = vreinterpretq_s16_u16(vaddl_u8(mask4_l, mask5_l));
- mask3 = vreinterpretq_s16_u16(vaddl_u8(mask6_l, mask7_l));
-
- m0_32 = vpaddlq_s16(mask0);
- m1_32 = vpaddlq_s16(mask1);
- m2_32 = vpaddlq_s16(mask2);
- m3_32 = vpaddlq_s16(mask3);
-
- mask0_low = vqrshrn_n_s32(m0_32, 2);
- mask1_low = vqrshrn_n_s32(m1_32, 2);
- mask2_low = vqrshrn_n_s32(m2_32, 2);
- mask3_low = vqrshrn_n_s32(m3_32, 2);
-
- blend_4x4(dst_tmp, dst_stride, src0_tmp, src0_stride, src1_tmp,
- src1_stride, mask0_low, mask1_low, mask2_low, mask3_low,
- v_maxval, vec_offset, vec_round_bits);
-
- i += 4;
- mask_tmp += (8 * mask_stride);
- dst_tmp += (4 * dst_stride);
- src0_tmp += (4 * src0_stride);
- src1_tmp += (4 * src1_stride);
- } while (i < h);
- }
- } else if (subw == 1 && subh == 0) {
- if (w_tmp > 7) {
- do {
- w_tmp = w;
- do {
- load_u8_16x4(mask_tmp, mask_stride, &t0, &t1, &t2, &t3);
-
- mask0 = vreinterpretq_s16_u16(vcombine_u16(
- vpaddl_u8(vget_low_u8(t0)), vpaddl_u8(vget_high_u8(t0))));
- mask1 = vreinterpretq_s16_u16(vcombine_u16(
- vpaddl_u8(vget_low_u8(t1)), vpaddl_u8(vget_high_u8(t1))));
- mask2 = vreinterpretq_s16_u16(vcombine_u16(
- vpaddl_u8(vget_low_u8(t2)), vpaddl_u8(vget_high_u8(t2))));
- mask3 = vreinterpretq_s16_u16(vcombine_u16(
- vpaddl_u8(vget_low_u8(t3)), vpaddl_u8(vget_high_u8(t3))));
-
- mask0 = vmovl_s8(vqrshrn_n_s16(mask0, 1));
- mask1 = vmovl_s8(vqrshrn_n_s16(mask1, 1));
- mask2 = vmovl_s8(vqrshrn_n_s16(mask2, 1));
- mask3 = vmovl_s8(vqrshrn_n_s16(mask3, 1));
-
- blend_8x4(dst_tmp, dst_stride, src0_tmp, src0_stride, src1_tmp,
- src1_stride, mask0, mask1, mask2, mask3, v_maxval,
- vec_offset, vec_round_bits);
- w_tmp -= 8;
- mask_tmp += 16;
- dst_tmp += 8;
- src0_tmp += 8;
- src1_tmp += 8;
- } while (w_tmp > 7);
- i += 4;
- mask_tmp += (4 * mask_stride) - (2 * w);
- dst_tmp += (4 * dst_stride) - w;
- src0_tmp += (4 * src0_stride) - w;
- src1_tmp += (4 * src1_stride) - w;
- } while (i < h);
- } else {
- do {
- load_u8_8x4(mask_tmp, mask_stride, &mask0_l, &mask1_l, &mask2_l,
- &mask3_l);
-
- mask0 =
- vreinterpretq_s16_u16(vcombine_u16(vpaddl_u8(mask0_l), vec_zero));
- mask1 =
- vreinterpretq_s16_u16(vcombine_u16(vpaddl_u8(mask1_l), vec_zero));
- mask2 =
- vreinterpretq_s16_u16(vcombine_u16(vpaddl_u8(mask2_l), vec_zero));
- mask3 =
- vreinterpretq_s16_u16(vcombine_u16(vpaddl_u8(mask3_l), vec_zero));
-
- mask0_low = vget_low_s16(vmovl_s8(vqrshrn_n_s16(mask0, 1)));
- mask1_low = vget_low_s16(vmovl_s8(vqrshrn_n_s16(mask1, 1)));
- mask2_low = vget_low_s16(vmovl_s8(vqrshrn_n_s16(mask2, 1)));
- mask3_low = vget_low_s16(vmovl_s8(vqrshrn_n_s16(mask3, 1)));
-
- blend_4x4(dst_tmp, dst_stride, src0_tmp, src0_stride, src1_tmp,
- src1_stride, mask0_low, mask1_low, mask2_low, mask3_low,
- v_maxval, vec_offset, vec_round_bits);
-
- i += 4;
- mask_tmp += (4 * mask_stride);
- dst_tmp += (4 * dst_stride);
- src0_tmp += (4 * src0_stride);
- src1_tmp += (4 * src1_stride);
- } while (i < h);
- }
- } else {
- if (w_tmp > 7) {
- do {
- w_tmp = w;
- do {
- load_u8_8x8(mask_tmp, mask_stride, &mask0_l, &mask1_l, &mask2_l,
- &mask3_l, &mask4_l, &mask5_l, &mask6_l, &mask7_l);
-
- mask0 = vreinterpretq_s16_u16(vaddl_u8(mask0_l, mask1_l));
- mask1 = vreinterpretq_s16_u16(vaddl_u8(mask2_l, mask3_l));
- mask2 = vreinterpretq_s16_u16(vaddl_u8(mask4_l, mask5_l));
- mask3 = vreinterpretq_s16_u16(vaddl_u8(mask6_l, mask7_l));
-
- mask0 = vmovl_s8(vqrshrn_n_s16(mask0, 1));
- mask1 = vmovl_s8(vqrshrn_n_s16(mask1, 1));
- mask2 = vmovl_s8(vqrshrn_n_s16(mask2, 1));
- mask3 = vmovl_s8(vqrshrn_n_s16(mask3, 1));
-
- blend_8x4(dst_tmp, dst_stride, src0_tmp, src0_stride, src1_tmp,
- src1_stride, mask0, mask1, mask2, mask3, v_maxval,
- vec_offset, vec_round_bits);
-
- w_tmp -= 8;
- mask_tmp += 8;
- dst_tmp += 8;
- src0_tmp += 8;
- src1_tmp += 8;
- } while (w_tmp > 7);
- i += 4;
- mask_tmp += (8 * mask_stride) - w;
- dst_tmp += (4 * dst_stride) - w;
- src0_tmp += (4 * src0_stride) - w;
- src1_tmp += (4 * src1_stride) - w;
- } while (i < h);
- } else {
- do {
- load_unaligned_u8_4x4(mask_tmp, 2 * mask_stride, &tu0, &tu1);
- load_unaligned_u8_4x4(mask_tmp + mask_stride, 2 * mask_stride, &tu2,
- &tu3);
-
- s0 = vreinterpret_u8_u32(tu0);
- s1 = vreinterpret_u8_u32(tu1);
- s2 = vreinterpret_u8_u32(tu2);
- s3 = vreinterpret_u8_u32(tu3);
-
- mask0 = vreinterpretq_s16_u16(vaddl_u8(s0, s2));
- mask1 = vreinterpretq_s16_u16(vaddl_u8(s1, s3));
-
- mask0 = vmovl_s8(vqrshrn_n_s16(mask0, 1));
- mask1 = vmovl_s8(vqrshrn_n_s16(mask1, 1));
-
- mask0_low = vget_low_s16(mask0);
- mask1_low = vget_high_s16(mask0);
- mask2_low = vget_low_s16(mask1);
- mask3_low = vget_high_s16(mask1);
-
- blend_4x4(dst_tmp, dst_stride, src0_tmp, src0_stride, src1_tmp,
- src1_stride, mask0_low, mask1_low, mask2_low, mask3_low,
- v_maxval, vec_offset, vec_round_bits);
-
- i += 4;
- mask_tmp += (8 * mask_stride);
- dst_tmp += (4 * dst_stride);
- src0_tmp += (4 * src0_stride);
- src1_tmp += (4 * src1_stride);
- } while (i < h);
- }
- }
-}
diff --git a/third_party/aom/aom_dsp/arm/fwd_txfm_neon.c b/third_party/aom/aom_dsp/arm/fwd_txfm_neon.c
deleted file mode 100644
index e4300c992..000000000
--- a/third_party/aom/aom_dsp/arm/fwd_txfm_neon.c
+++ /dev/null
@@ -1,222 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <arm_neon.h>
-
-#include "config/aom_config.h"
-
-#include "aom_dsp/txfm_common.h"
-
-void aom_fdct8x8_neon(const int16_t *input, int16_t *final_output, int stride) {
- int i;
- // stage 1
- int16x8_t input_0 = vshlq_n_s16(vld1q_s16(&input[0 * stride]), 2);
- int16x8_t input_1 = vshlq_n_s16(vld1q_s16(&input[1 * stride]), 2);
- int16x8_t input_2 = vshlq_n_s16(vld1q_s16(&input[2 * stride]), 2);
- int16x8_t input_3 = vshlq_n_s16(vld1q_s16(&input[3 * stride]), 2);
- int16x8_t input_4 = vshlq_n_s16(vld1q_s16(&input[4 * stride]), 2);
- int16x8_t input_5 = vshlq_n_s16(vld1q_s16(&input[5 * stride]), 2);
- int16x8_t input_6 = vshlq_n_s16(vld1q_s16(&input[6 * stride]), 2);
- int16x8_t input_7 = vshlq_n_s16(vld1q_s16(&input[7 * stride]), 2);
- for (i = 0; i < 2; ++i) {
- int16x8_t out_0, out_1, out_2, out_3, out_4, out_5, out_6, out_7;
- const int16x8_t v_s0 = vaddq_s16(input_0, input_7);
- const int16x8_t v_s1 = vaddq_s16(input_1, input_6);
- const int16x8_t v_s2 = vaddq_s16(input_2, input_5);
- const int16x8_t v_s3 = vaddq_s16(input_3, input_4);
- const int16x8_t v_s4 = vsubq_s16(input_3, input_4);
- const int16x8_t v_s5 = vsubq_s16(input_2, input_5);
- const int16x8_t v_s6 = vsubq_s16(input_1, input_6);
- const int16x8_t v_s7 = vsubq_s16(input_0, input_7);
- // fdct4(step, step);
- int16x8_t v_x0 = vaddq_s16(v_s0, v_s3);
- int16x8_t v_x1 = vaddq_s16(v_s1, v_s2);
- int16x8_t v_x2 = vsubq_s16(v_s1, v_s2);
- int16x8_t v_x3 = vsubq_s16(v_s0, v_s3);
- // fdct4(step, step);
- int32x4_t v_t0_lo = vaddl_s16(vget_low_s16(v_x0), vget_low_s16(v_x1));
- int32x4_t v_t0_hi = vaddl_s16(vget_high_s16(v_x0), vget_high_s16(v_x1));
- int32x4_t v_t1_lo = vsubl_s16(vget_low_s16(v_x0), vget_low_s16(v_x1));
- int32x4_t v_t1_hi = vsubl_s16(vget_high_s16(v_x0), vget_high_s16(v_x1));
- int32x4_t v_t2_lo = vmull_n_s16(vget_low_s16(v_x2), (int16_t)cospi_24_64);
- int32x4_t v_t2_hi = vmull_n_s16(vget_high_s16(v_x2), (int16_t)cospi_24_64);
- int32x4_t v_t3_lo = vmull_n_s16(vget_low_s16(v_x3), (int16_t)cospi_24_64);
- int32x4_t v_t3_hi = vmull_n_s16(vget_high_s16(v_x3), (int16_t)cospi_24_64);
- v_t2_lo = vmlal_n_s16(v_t2_lo, vget_low_s16(v_x3), (int16_t)cospi_8_64);
- v_t2_hi = vmlal_n_s16(v_t2_hi, vget_high_s16(v_x3), (int16_t)cospi_8_64);
- v_t3_lo = vmlsl_n_s16(v_t3_lo, vget_low_s16(v_x2), (int16_t)cospi_8_64);
- v_t3_hi = vmlsl_n_s16(v_t3_hi, vget_high_s16(v_x2), (int16_t)cospi_8_64);
- v_t0_lo = vmulq_n_s32(v_t0_lo, (int32_t)cospi_16_64);
- v_t0_hi = vmulq_n_s32(v_t0_hi, (int32_t)cospi_16_64);
- v_t1_lo = vmulq_n_s32(v_t1_lo, (int32_t)cospi_16_64);
- v_t1_hi = vmulq_n_s32(v_t1_hi, (int32_t)cospi_16_64);
- {
- const int16x4_t a = vrshrn_n_s32(v_t0_lo, DCT_CONST_BITS);
- const int16x4_t b = vrshrn_n_s32(v_t0_hi, DCT_CONST_BITS);
- const int16x4_t c = vrshrn_n_s32(v_t1_lo, DCT_CONST_BITS);
- const int16x4_t d = vrshrn_n_s32(v_t1_hi, DCT_CONST_BITS);
- const int16x4_t e = vrshrn_n_s32(v_t2_lo, DCT_CONST_BITS);
- const int16x4_t f = vrshrn_n_s32(v_t2_hi, DCT_CONST_BITS);
- const int16x4_t g = vrshrn_n_s32(v_t3_lo, DCT_CONST_BITS);
- const int16x4_t h = vrshrn_n_s32(v_t3_hi, DCT_CONST_BITS);
- out_0 = vcombine_s16(a, c); // 00 01 02 03 40 41 42 43
- out_2 = vcombine_s16(e, g); // 20 21 22 23 60 61 62 63
- out_4 = vcombine_s16(b, d); // 04 05 06 07 44 45 46 47
- out_6 = vcombine_s16(f, h); // 24 25 26 27 64 65 66 67
- }
- // Stage 2
- v_x0 = vsubq_s16(v_s6, v_s5);
- v_x1 = vaddq_s16(v_s6, v_s5);
- v_t0_lo = vmull_n_s16(vget_low_s16(v_x0), (int16_t)cospi_16_64);
- v_t0_hi = vmull_n_s16(vget_high_s16(v_x0), (int16_t)cospi_16_64);
- v_t1_lo = vmull_n_s16(vget_low_s16(v_x1), (int16_t)cospi_16_64);
- v_t1_hi = vmull_n_s16(vget_high_s16(v_x1), (int16_t)cospi_16_64);
- {
- const int16x4_t a = vrshrn_n_s32(v_t0_lo, DCT_CONST_BITS);
- const int16x4_t b = vrshrn_n_s32(v_t0_hi, DCT_CONST_BITS);
- const int16x4_t c = vrshrn_n_s32(v_t1_lo, DCT_CONST_BITS);
- const int16x4_t d = vrshrn_n_s32(v_t1_hi, DCT_CONST_BITS);
- const int16x8_t ab = vcombine_s16(a, b);
- const int16x8_t cd = vcombine_s16(c, d);
- // Stage 3
- v_x0 = vaddq_s16(v_s4, ab);
- v_x1 = vsubq_s16(v_s4, ab);
- v_x2 = vsubq_s16(v_s7, cd);
- v_x3 = vaddq_s16(v_s7, cd);
- }
- // Stage 4
- v_t0_lo = vmull_n_s16(vget_low_s16(v_x3), (int16_t)cospi_4_64);
- v_t0_hi = vmull_n_s16(vget_high_s16(v_x3), (int16_t)cospi_4_64);
- v_t0_lo = vmlal_n_s16(v_t0_lo, vget_low_s16(v_x0), (int16_t)cospi_28_64);
- v_t0_hi = vmlal_n_s16(v_t0_hi, vget_high_s16(v_x0), (int16_t)cospi_28_64);
- v_t1_lo = vmull_n_s16(vget_low_s16(v_x1), (int16_t)cospi_12_64);
- v_t1_hi = vmull_n_s16(vget_high_s16(v_x1), (int16_t)cospi_12_64);
- v_t1_lo = vmlal_n_s16(v_t1_lo, vget_low_s16(v_x2), (int16_t)cospi_20_64);
- v_t1_hi = vmlal_n_s16(v_t1_hi, vget_high_s16(v_x2), (int16_t)cospi_20_64);
- v_t2_lo = vmull_n_s16(vget_low_s16(v_x2), (int16_t)cospi_12_64);
- v_t2_hi = vmull_n_s16(vget_high_s16(v_x2), (int16_t)cospi_12_64);
- v_t2_lo = vmlsl_n_s16(v_t2_lo, vget_low_s16(v_x1), (int16_t)cospi_20_64);
- v_t2_hi = vmlsl_n_s16(v_t2_hi, vget_high_s16(v_x1), (int16_t)cospi_20_64);
- v_t3_lo = vmull_n_s16(vget_low_s16(v_x3), (int16_t)cospi_28_64);
- v_t3_hi = vmull_n_s16(vget_high_s16(v_x3), (int16_t)cospi_28_64);
- v_t3_lo = vmlsl_n_s16(v_t3_lo, vget_low_s16(v_x0), (int16_t)cospi_4_64);
- v_t3_hi = vmlsl_n_s16(v_t3_hi, vget_high_s16(v_x0), (int16_t)cospi_4_64);
- {
- const int16x4_t a = vrshrn_n_s32(v_t0_lo, DCT_CONST_BITS);
- const int16x4_t b = vrshrn_n_s32(v_t0_hi, DCT_CONST_BITS);
- const int16x4_t c = vrshrn_n_s32(v_t1_lo, DCT_CONST_BITS);
- const int16x4_t d = vrshrn_n_s32(v_t1_hi, DCT_CONST_BITS);
- const int16x4_t e = vrshrn_n_s32(v_t2_lo, DCT_CONST_BITS);
- const int16x4_t f = vrshrn_n_s32(v_t2_hi, DCT_CONST_BITS);
- const int16x4_t g = vrshrn_n_s32(v_t3_lo, DCT_CONST_BITS);
- const int16x4_t h = vrshrn_n_s32(v_t3_hi, DCT_CONST_BITS);
- out_1 = vcombine_s16(a, c); // 10 11 12 13 50 51 52 53
- out_3 = vcombine_s16(e, g); // 30 31 32 33 70 71 72 73
- out_5 = vcombine_s16(b, d); // 14 15 16 17 54 55 56 57
- out_7 = vcombine_s16(f, h); // 34 35 36 37 74 75 76 77
- }
- // transpose 8x8
- {
- // 00 01 02 03 40 41 42 43
- // 10 11 12 13 50 51 52 53
- // 20 21 22 23 60 61 62 63
- // 30 31 32 33 70 71 72 73
- // 04 05 06 07 44 45 46 47
- // 14 15 16 17 54 55 56 57
- // 24 25 26 27 64 65 66 67
- // 34 35 36 37 74 75 76 77
- const int32x4x2_t r02_s32 =
- vtrnq_s32(vreinterpretq_s32_s16(out_0), vreinterpretq_s32_s16(out_2));
- const int32x4x2_t r13_s32 =
- vtrnq_s32(vreinterpretq_s32_s16(out_1), vreinterpretq_s32_s16(out_3));
- const int32x4x2_t r46_s32 =
- vtrnq_s32(vreinterpretq_s32_s16(out_4), vreinterpretq_s32_s16(out_6));
- const int32x4x2_t r57_s32 =
- vtrnq_s32(vreinterpretq_s32_s16(out_5), vreinterpretq_s32_s16(out_7));
- const int16x8x2_t r01_s16 =
- vtrnq_s16(vreinterpretq_s16_s32(r02_s32.val[0]),
- vreinterpretq_s16_s32(r13_s32.val[0]));
- const int16x8x2_t r23_s16 =
- vtrnq_s16(vreinterpretq_s16_s32(r02_s32.val[1]),
- vreinterpretq_s16_s32(r13_s32.val[1]));
- const int16x8x2_t r45_s16 =
- vtrnq_s16(vreinterpretq_s16_s32(r46_s32.val[0]),
- vreinterpretq_s16_s32(r57_s32.val[0]));
- const int16x8x2_t r67_s16 =
- vtrnq_s16(vreinterpretq_s16_s32(r46_s32.val[1]),
- vreinterpretq_s16_s32(r57_s32.val[1]));
- input_0 = r01_s16.val[0];
- input_1 = r01_s16.val[1];
- input_2 = r23_s16.val[0];
- input_3 = r23_s16.val[1];
- input_4 = r45_s16.val[0];
- input_5 = r45_s16.val[1];
- input_6 = r67_s16.val[0];
- input_7 = r67_s16.val[1];
- // 00 10 20 30 40 50 60 70
- // 01 11 21 31 41 51 61 71
- // 02 12 22 32 42 52 62 72
- // 03 13 23 33 43 53 63 73
- // 04 14 24 34 44 54 64 74
- // 05 15 25 35 45 55 65 75
- // 06 16 26 36 46 56 66 76
- // 07 17 27 37 47 57 67 77
- }
- } // for
- {
- // from aom_dct_sse2.c
- // Post-condition (division by two)
- // division of two 16 bits signed numbers using shifts
- // n / 2 = (n - (n >> 15)) >> 1
- const int16x8_t sign_in0 = vshrq_n_s16(input_0, 15);
- const int16x8_t sign_in1 = vshrq_n_s16(input_1, 15);
- const int16x8_t sign_in2 = vshrq_n_s16(input_2, 15);
- const int16x8_t sign_in3 = vshrq_n_s16(input_3, 15);
- const int16x8_t sign_in4 = vshrq_n_s16(input_4, 15);
- const int16x8_t sign_in5 = vshrq_n_s16(input_5, 15);
- const int16x8_t sign_in6 = vshrq_n_s16(input_6, 15);
- const int16x8_t sign_in7 = vshrq_n_s16(input_7, 15);
- input_0 = vhsubq_s16(input_0, sign_in0);
- input_1 = vhsubq_s16(input_1, sign_in1);
- input_2 = vhsubq_s16(input_2, sign_in2);
- input_3 = vhsubq_s16(input_3, sign_in3);
- input_4 = vhsubq_s16(input_4, sign_in4);
- input_5 = vhsubq_s16(input_5, sign_in5);
- input_6 = vhsubq_s16(input_6, sign_in6);
- input_7 = vhsubq_s16(input_7, sign_in7);
- // store results
- vst1q_s16(&final_output[0 * 8], input_0);
- vst1q_s16(&final_output[1 * 8], input_1);
- vst1q_s16(&final_output[2 * 8], input_2);
- vst1q_s16(&final_output[3 * 8], input_3);
- vst1q_s16(&final_output[4 * 8], input_4);
- vst1q_s16(&final_output[5 * 8], input_5);
- vst1q_s16(&final_output[6 * 8], input_6);
- vst1q_s16(&final_output[7 * 8], input_7);
- }
-}
-
-void aom_fdct8x8_1_neon(const int16_t *input, int16_t *output, int stride) {
- int r;
- int16x8_t sum = vld1q_s16(&input[0]);
- for (r = 1; r < 8; ++r) {
- const int16x8_t input_00 = vld1q_s16(&input[r * stride]);
- sum = vaddq_s16(sum, input_00);
- }
- {
- const int32x4_t a = vpaddlq_s16(sum);
- const int64x2_t b = vpaddlq_s32(a);
- const int32x2_t c = vadd_s32(vreinterpret_s32_s64(vget_low_s64(b)),
- vreinterpret_s32_s64(vget_high_s64(b)));
- output[0] = vget_lane_s16(vreinterpret_s16_s32(c), 0);
- output[1] = 0;
- }
-}
diff --git a/third_party/aom/aom_dsp/arm/intrapred_neon.c b/third_party/aom/aom_dsp/arm/intrapred_neon.c
deleted file mode 100644
index c85b1e910..000000000
--- a/third_party/aom/aom_dsp/arm/intrapred_neon.c
+++ /dev/null
@@ -1,590 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <arm_neon.h>
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom/aom_integer.h"
-
-//------------------------------------------------------------------------------
-// DC 4x4
-
-// 'do_above' and 'do_left' facilitate branch removal when inlined.
-static INLINE void dc_4x4(uint8_t *dst, ptrdiff_t stride, const uint8_t *above,
- const uint8_t *left, int do_above, int do_left) {
- uint16x8_t sum_top;
- uint16x8_t sum_left;
- uint8x8_t dc0;
-
- if (do_above) {
- const uint8x8_t A = vld1_u8(above); // top row
- const uint16x4_t p0 = vpaddl_u8(A); // cascading summation of the top
- const uint16x4_t p1 = vpadd_u16(p0, p0);
- sum_top = vcombine_u16(p1, p1);
- }
-
- if (do_left) {
- const uint8x8_t L = vld1_u8(left); // left border
- const uint16x4_t p0 = vpaddl_u8(L); // cascading summation of the left
- const uint16x4_t p1 = vpadd_u16(p0, p0);
- sum_left = vcombine_u16(p1, p1);
- }
-
- if (do_above && do_left) {
- const uint16x8_t sum = vaddq_u16(sum_left, sum_top);
- dc0 = vrshrn_n_u16(sum, 3);
- } else if (do_above) {
- dc0 = vrshrn_n_u16(sum_top, 2);
- } else if (do_left) {
- dc0 = vrshrn_n_u16(sum_left, 2);
- } else {
- dc0 = vdup_n_u8(0x80);
- }
-
- {
- const uint8x8_t dc = vdup_lane_u8(dc0, 0);
- int i;
- for (i = 0; i < 4; ++i) {
- vst1_lane_u32((uint32_t *)(dst + i * stride), vreinterpret_u32_u8(dc), 0);
- }
- }
-}
-
-void aom_dc_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- dc_4x4(dst, stride, above, left, 1, 1);
-}
-
-void aom_dc_left_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- (void)above;
- dc_4x4(dst, stride, NULL, left, 0, 1);
-}
-
-void aom_dc_top_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- (void)left;
- dc_4x4(dst, stride, above, NULL, 1, 0);
-}
-
-void aom_dc_128_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- (void)above;
- (void)left;
- dc_4x4(dst, stride, NULL, NULL, 0, 0);
-}
-
-//------------------------------------------------------------------------------
-// DC 8x8
-
-// 'do_above' and 'do_left' facilitate branch removal when inlined.
-static INLINE void dc_8x8(uint8_t *dst, ptrdiff_t stride, const uint8_t *above,
- const uint8_t *left, int do_above, int do_left) {
- uint16x8_t sum_top;
- uint16x8_t sum_left;
- uint8x8_t dc0;
-
- if (do_above) {
- const uint8x8_t A = vld1_u8(above); // top row
- const uint16x4_t p0 = vpaddl_u8(A); // cascading summation of the top
- const uint16x4_t p1 = vpadd_u16(p0, p0);
- const uint16x4_t p2 = vpadd_u16(p1, p1);
- sum_top = vcombine_u16(p2, p2);
- }
-
- if (do_left) {
- const uint8x8_t L = vld1_u8(left); // left border
- const uint16x4_t p0 = vpaddl_u8(L); // cascading summation of the left
- const uint16x4_t p1 = vpadd_u16(p0, p0);
- const uint16x4_t p2 = vpadd_u16(p1, p1);
- sum_left = vcombine_u16(p2, p2);
- }
-
- if (do_above && do_left) {
- const uint16x8_t sum = vaddq_u16(sum_left, sum_top);
- dc0 = vrshrn_n_u16(sum, 4);
- } else if (do_above) {
- dc0 = vrshrn_n_u16(sum_top, 3);
- } else if (do_left) {
- dc0 = vrshrn_n_u16(sum_left, 3);
- } else {
- dc0 = vdup_n_u8(0x80);
- }
-
- {
- const uint8x8_t dc = vdup_lane_u8(dc0, 0);
- int i;
- for (i = 0; i < 8; ++i) {
- vst1_u32((uint32_t *)(dst + i * stride), vreinterpret_u32_u8(dc));
- }
- }
-}
-
-void aom_dc_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- dc_8x8(dst, stride, above, left, 1, 1);
-}
-
-void aom_dc_left_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- (void)above;
- dc_8x8(dst, stride, NULL, left, 0, 1);
-}
-
-void aom_dc_top_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- (void)left;
- dc_8x8(dst, stride, above, NULL, 1, 0);
-}
-
-void aom_dc_128_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- (void)above;
- (void)left;
- dc_8x8(dst, stride, NULL, NULL, 0, 0);
-}
-
-//------------------------------------------------------------------------------
-// DC 16x16
-
-// 'do_above' and 'do_left' facilitate branch removal when inlined.
-static INLINE void dc_16x16(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left,
- int do_above, int do_left) {
- uint16x8_t sum_top;
- uint16x8_t sum_left;
- uint8x8_t dc0;
-
- if (do_above) {
- const uint8x16_t A = vld1q_u8(above); // top row
- const uint16x8_t p0 = vpaddlq_u8(A); // cascading summation of the top
- const uint16x4_t p1 = vadd_u16(vget_low_u16(p0), vget_high_u16(p0));
- const uint16x4_t p2 = vpadd_u16(p1, p1);
- const uint16x4_t p3 = vpadd_u16(p2, p2);
- sum_top = vcombine_u16(p3, p3);
- }
-
- if (do_left) {
- const uint8x16_t L = vld1q_u8(left); // left row
- const uint16x8_t p0 = vpaddlq_u8(L); // cascading summation of the left
- const uint16x4_t p1 = vadd_u16(vget_low_u16(p0), vget_high_u16(p0));
- const uint16x4_t p2 = vpadd_u16(p1, p1);
- const uint16x4_t p3 = vpadd_u16(p2, p2);
- sum_left = vcombine_u16(p3, p3);
- }
-
- if (do_above && do_left) {
- const uint16x8_t sum = vaddq_u16(sum_left, sum_top);
- dc0 = vrshrn_n_u16(sum, 5);
- } else if (do_above) {
- dc0 = vrshrn_n_u16(sum_top, 4);
- } else if (do_left) {
- dc0 = vrshrn_n_u16(sum_left, 4);
- } else {
- dc0 = vdup_n_u8(0x80);
- }
-
- {
- const uint8x16_t dc = vdupq_lane_u8(dc0, 0);
- int i;
- for (i = 0; i < 16; ++i) {
- vst1q_u8(dst + i * stride, dc);
- }
- }
-}
-
-void aom_dc_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- dc_16x16(dst, stride, above, left, 1, 1);
-}
-
-void aom_dc_left_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- (void)above;
- dc_16x16(dst, stride, NULL, left, 0, 1);
-}
-
-void aom_dc_top_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- (void)left;
- dc_16x16(dst, stride, above, NULL, 1, 0);
-}
-
-void aom_dc_128_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- (void)above;
- (void)left;
- dc_16x16(dst, stride, NULL, NULL, 0, 0);
-}
-
-//------------------------------------------------------------------------------
-// DC 32x32
-
-// 'do_above' and 'do_left' facilitate branch removal when inlined.
-static INLINE void dc_32x32(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left,
- int do_above, int do_left) {
- uint16x8_t sum_top;
- uint16x8_t sum_left;
- uint8x8_t dc0;
-
- if (do_above) {
- const uint8x16_t A0 = vld1q_u8(above); // top row
- const uint8x16_t A1 = vld1q_u8(above + 16);
- const uint16x8_t p0 = vpaddlq_u8(A0); // cascading summation of the top
- const uint16x8_t p1 = vpaddlq_u8(A1);
- const uint16x8_t p2 = vaddq_u16(p0, p1);
- const uint16x4_t p3 = vadd_u16(vget_low_u16(p2), vget_high_u16(p2));
- const uint16x4_t p4 = vpadd_u16(p3, p3);
- const uint16x4_t p5 = vpadd_u16(p4, p4);
- sum_top = vcombine_u16(p5, p5);
- }
-
- if (do_left) {
- const uint8x16_t L0 = vld1q_u8(left); // left row
- const uint8x16_t L1 = vld1q_u8(left + 16);
- const uint16x8_t p0 = vpaddlq_u8(L0); // cascading summation of the left
- const uint16x8_t p1 = vpaddlq_u8(L1);
- const uint16x8_t p2 = vaddq_u16(p0, p1);
- const uint16x4_t p3 = vadd_u16(vget_low_u16(p2), vget_high_u16(p2));
- const uint16x4_t p4 = vpadd_u16(p3, p3);
- const uint16x4_t p5 = vpadd_u16(p4, p4);
- sum_left = vcombine_u16(p5, p5);
- }
-
- if (do_above && do_left) {
- const uint16x8_t sum = vaddq_u16(sum_left, sum_top);
- dc0 = vrshrn_n_u16(sum, 6);
- } else if (do_above) {
- dc0 = vrshrn_n_u16(sum_top, 5);
- } else if (do_left) {
- dc0 = vrshrn_n_u16(sum_left, 5);
- } else {
- dc0 = vdup_n_u8(0x80);
- }
-
- {
- const uint8x16_t dc = vdupq_lane_u8(dc0, 0);
- int i;
- for (i = 0; i < 32; ++i) {
- vst1q_u8(dst + i * stride, dc);
- vst1q_u8(dst + i * stride + 16, dc);
- }
- }
-}
-
-void aom_dc_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- dc_32x32(dst, stride, above, left, 1, 1);
-}
-
-void aom_dc_left_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- (void)above;
- dc_32x32(dst, stride, NULL, left, 0, 1);
-}
-
-void aom_dc_top_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- (void)left;
- dc_32x32(dst, stride, above, NULL, 1, 0);
-}
-
-void aom_dc_128_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- (void)above;
- (void)left;
- dc_32x32(dst, stride, NULL, NULL, 0, 0);
-}
-
-// -----------------------------------------------------------------------------
-
-void aom_d135_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- const uint8x8_t XABCD_u8 = vld1_u8(above - 1);
- const uint64x1_t XABCD = vreinterpret_u64_u8(XABCD_u8);
- const uint64x1_t ____XABC = vshl_n_u64(XABCD, 32);
- const uint32x2_t zero = vdup_n_u32(0);
- const uint32x2_t IJKL = vld1_lane_u32((const uint32_t *)left, zero, 0);
- const uint8x8_t IJKL_u8 = vreinterpret_u8_u32(IJKL);
- const uint64x1_t LKJI____ = vreinterpret_u64_u8(vrev32_u8(IJKL_u8));
- const uint64x1_t LKJIXABC = vorr_u64(LKJI____, ____XABC);
- const uint8x8_t KJIXABC_ = vreinterpret_u8_u64(vshr_n_u64(LKJIXABC, 8));
- const uint8x8_t JIXABC__ = vreinterpret_u8_u64(vshr_n_u64(LKJIXABC, 16));
- const uint8_t D = vget_lane_u8(XABCD_u8, 4);
- const uint8x8_t JIXABCD_ = vset_lane_u8(D, JIXABC__, 6);
- const uint8x8_t LKJIXABC_u8 = vreinterpret_u8_u64(LKJIXABC);
- const uint8x8_t avg1 = vhadd_u8(JIXABCD_, LKJIXABC_u8);
- const uint8x8_t avg2 = vrhadd_u8(avg1, KJIXABC_);
- const uint64x1_t avg2_u64 = vreinterpret_u64_u8(avg2);
- const uint32x2_t r3 = vreinterpret_u32_u8(avg2);
- const uint32x2_t r2 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 8));
- const uint32x2_t r1 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 16));
- const uint32x2_t r0 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 24));
- vst1_lane_u32((uint32_t *)(dst + 0 * stride), r0, 0);
- vst1_lane_u32((uint32_t *)(dst + 1 * stride), r1, 0);
- vst1_lane_u32((uint32_t *)(dst + 2 * stride), r2, 0);
- vst1_lane_u32((uint32_t *)(dst + 3 * stride), r3, 0);
-}
-
-void aom_v_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- int i;
- uint32x2_t d0u32 = vdup_n_u32(0);
- (void)left;
-
- d0u32 = vld1_lane_u32((const uint32_t *)above, d0u32, 0);
- for (i = 0; i < 4; i++, dst += stride)
- vst1_lane_u32((uint32_t *)dst, d0u32, 0);
-}
-
-void aom_v_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- int i;
- uint8x8_t d0u8 = vdup_n_u8(0);
- (void)left;
-
- d0u8 = vld1_u8(above);
- for (i = 0; i < 8; i++, dst += stride) vst1_u8(dst, d0u8);
-}
-
-void aom_v_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- int i;
- uint8x16_t q0u8 = vdupq_n_u8(0);
- (void)left;
-
- q0u8 = vld1q_u8(above);
- for (i = 0; i < 16; i++, dst += stride) vst1q_u8(dst, q0u8);
-}
-
-void aom_v_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- int i;
- uint8x16_t q0u8 = vdupq_n_u8(0);
- uint8x16_t q1u8 = vdupq_n_u8(0);
- (void)left;
-
- q0u8 = vld1q_u8(above);
- q1u8 = vld1q_u8(above + 16);
- for (i = 0; i < 32; i++, dst += stride) {
- vst1q_u8(dst, q0u8);
- vst1q_u8(dst + 16, q1u8);
- }
-}
-
-void aom_h_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- uint8x8_t d0u8 = vdup_n_u8(0);
- uint32x2_t d1u32 = vdup_n_u32(0);
- (void)above;
-
- d1u32 = vld1_lane_u32((const uint32_t *)left, d1u32, 0);
-
- d0u8 = vdup_lane_u8(vreinterpret_u8_u32(d1u32), 0);
- vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0);
- dst += stride;
- d0u8 = vdup_lane_u8(vreinterpret_u8_u32(d1u32), 1);
- vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0);
- dst += stride;
- d0u8 = vdup_lane_u8(vreinterpret_u8_u32(d1u32), 2);
- vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0);
- dst += stride;
- d0u8 = vdup_lane_u8(vreinterpret_u8_u32(d1u32), 3);
- vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0);
-}
-
-void aom_h_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- uint8x8_t d0u8 = vdup_n_u8(0);
- uint64x1_t d1u64 = vdup_n_u64(0);
- (void)above;
-
- d1u64 = vld1_u64((const uint64_t *)left);
-
- d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 0);
- vst1_u8(dst, d0u8);
- dst += stride;
- d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 1);
- vst1_u8(dst, d0u8);
- dst += stride;
- d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 2);
- vst1_u8(dst, d0u8);
- dst += stride;
- d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 3);
- vst1_u8(dst, d0u8);
- dst += stride;
- d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 4);
- vst1_u8(dst, d0u8);
- dst += stride;
- d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 5);
- vst1_u8(dst, d0u8);
- dst += stride;
- d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 6);
- vst1_u8(dst, d0u8);
- dst += stride;
- d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 7);
- vst1_u8(dst, d0u8);
-}
-
-void aom_h_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- int j;
- uint8x8_t d2u8 = vdup_n_u8(0);
- uint8x16_t q0u8 = vdupq_n_u8(0);
- uint8x16_t q1u8 = vdupq_n_u8(0);
- (void)above;
-
- q1u8 = vld1q_u8(left);
- d2u8 = vget_low_u8(q1u8);
- for (j = 0; j < 2; j++, d2u8 = vget_high_u8(q1u8)) {
- q0u8 = vdupq_lane_u8(d2u8, 0);
- vst1q_u8(dst, q0u8);
- dst += stride;
- q0u8 = vdupq_lane_u8(d2u8, 1);
- vst1q_u8(dst, q0u8);
- dst += stride;
- q0u8 = vdupq_lane_u8(d2u8, 2);
- vst1q_u8(dst, q0u8);
- dst += stride;
- q0u8 = vdupq_lane_u8(d2u8, 3);
- vst1q_u8(dst, q0u8);
- dst += stride;
- q0u8 = vdupq_lane_u8(d2u8, 4);
- vst1q_u8(dst, q0u8);
- dst += stride;
- q0u8 = vdupq_lane_u8(d2u8, 5);
- vst1q_u8(dst, q0u8);
- dst += stride;
- q0u8 = vdupq_lane_u8(d2u8, 6);
- vst1q_u8(dst, q0u8);
- dst += stride;
- q0u8 = vdupq_lane_u8(d2u8, 7);
- vst1q_u8(dst, q0u8);
- dst += stride;
- }
-}
-
-void aom_h_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- int j, k;
- uint8x8_t d2u8 = vdup_n_u8(0);
- uint8x16_t q0u8 = vdupq_n_u8(0);
- uint8x16_t q1u8 = vdupq_n_u8(0);
- (void)above;
-
- for (k = 0; k < 2; k++, left += 16) {
- q1u8 = vld1q_u8(left);
- d2u8 = vget_low_u8(q1u8);
- for (j = 0; j < 2; j++, d2u8 = vget_high_u8(q1u8)) {
- q0u8 = vdupq_lane_u8(d2u8, 0);
- vst1q_u8(dst, q0u8);
- vst1q_u8(dst + 16, q0u8);
- dst += stride;
- q0u8 = vdupq_lane_u8(d2u8, 1);
- vst1q_u8(dst, q0u8);
- vst1q_u8(dst + 16, q0u8);
- dst += stride;
- q0u8 = vdupq_lane_u8(d2u8, 2);
- vst1q_u8(dst, q0u8);
- vst1q_u8(dst + 16, q0u8);
- dst += stride;
- q0u8 = vdupq_lane_u8(d2u8, 3);
- vst1q_u8(dst, q0u8);
- vst1q_u8(dst + 16, q0u8);
- dst += stride;
- q0u8 = vdupq_lane_u8(d2u8, 4);
- vst1q_u8(dst, q0u8);
- vst1q_u8(dst + 16, q0u8);
- dst += stride;
- q0u8 = vdupq_lane_u8(d2u8, 5);
- vst1q_u8(dst, q0u8);
- vst1q_u8(dst + 16, q0u8);
- dst += stride;
- q0u8 = vdupq_lane_u8(d2u8, 6);
- vst1q_u8(dst, q0u8);
- vst1q_u8(dst + 16, q0u8);
- dst += stride;
- q0u8 = vdupq_lane_u8(d2u8, 7);
- vst1q_u8(dst, q0u8);
- vst1q_u8(dst + 16, q0u8);
- dst += stride;
- }
- }
-}
-
-static INLINE void highbd_dc_predictor(uint16_t *dst, ptrdiff_t stride, int bw,
- const uint16_t *above,
- const uint16_t *left) {
- assert(bw >= 4);
- assert(IS_POWER_OF_TWO(bw));
- int expected_dc, sum = 0;
- const int count = bw * 2;
- uint32x4_t sum_q = vdupq_n_u32(0);
- uint32x2_t sum_d;
- uint16_t *dst_1;
- if (bw >= 8) {
- for (int i = 0; i < bw; i += 8) {
- sum_q = vpadalq_u16(sum_q, vld1q_u16(above));
- sum_q = vpadalq_u16(sum_q, vld1q_u16(left));
- above += 8;
- left += 8;
- }
- sum_d = vadd_u32(vget_low_u32(sum_q), vget_high_u32(sum_q));
- sum = vget_lane_s32(vreinterpret_s32_u64(vpaddl_u32(sum_d)), 0);
- expected_dc = (sum + (count >> 1)) / count;
- const uint16x8_t dc = vdupq_n_u16((uint16_t)expected_dc);
- for (int r = 0; r < bw; r++) {
- dst_1 = dst;
- for (int i = 0; i < bw; i += 8) {
- vst1q_u16(dst_1, dc);
- dst_1 += 8;
- }
- dst += stride;
- }
- } else { // 4x4
- sum_q = vaddl_u16(vld1_u16(above), vld1_u16(left));
- sum_d = vadd_u32(vget_low_u32(sum_q), vget_high_u32(sum_q));
- sum = vget_lane_s32(vreinterpret_s32_u64(vpaddl_u32(sum_d)), 0);
- expected_dc = (sum + (count >> 1)) / count;
- const uint16x4_t dc = vdup_n_u16((uint16_t)expected_dc);
- for (int r = 0; r < bw; r++) {
- vst1_u16(dst, dc);
- dst += stride;
- }
- }
-}
-
-#define intra_pred_highbd_sized_neon(type, width) \
- void aom_highbd_##type##_predictor_##width##x##width##_neon( \
- uint16_t *dst, ptrdiff_t stride, const uint16_t *above, \
- const uint16_t *left, int bd) { \
- (void)bd; \
- highbd_##type##_predictor(dst, stride, width, above, left); \
- }
-
-#define intra_pred_square(type) \
- intra_pred_highbd_sized_neon(type, 4); \
- intra_pred_highbd_sized_neon(type, 8); \
- intra_pred_highbd_sized_neon(type, 16); \
- intra_pred_highbd_sized_neon(type, 32); \
- intra_pred_highbd_sized_neon(type, 64);
-
-intra_pred_square(dc);
-#undef intra_pred_square
diff --git a/third_party/aom/aom_dsp/arm/loopfilter_neon.c b/third_party/aom/aom_dsp/arm/loopfilter_neon.c
deleted file mode 100644
index bdc67626d..000000000
--- a/third_party/aom/aom_dsp/arm/loopfilter_neon.c
+++ /dev/null
@@ -1,928 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <arm_neon.h>
-
-#include "config/aom_dsp_rtcd.h"
-#include "config/aom_config.h"
-
-#include "aom/aom_integer.h"
-#include "av1/common/arm/mem_neon.h"
-#include "av1/common/arm/transpose_neon.h"
-
-static INLINE uint8x8_t lpf_mask(uint8x8_t p3q3, uint8x8_t p2q2, uint8x8_t p1q1,
- uint8x8_t p0q0, const uint8_t blimit,
- const uint8_t limit) {
- // Calculate mask values for four samples
- uint32x2x2_t p0q0_p1q1;
- uint16x8_t temp_16x8;
- uint16x4_t temp0_16x4, temp1_16x4;
- uint8x8_t mask_8x8, temp_8x8;
- const uint8x8_t limit_8x8 = vdup_n_u8(limit);
- const uint16x4_t blimit_16x4 = vdup_n_u16((uint16_t)blimit);
-
- mask_8x8 = vabd_u8(p3q3, p2q2);
- mask_8x8 = vmax_u8(mask_8x8, vabd_u8(p2q2, p1q1));
- mask_8x8 = vmax_u8(mask_8x8, vabd_u8(p1q1, p0q0));
- mask_8x8 = vcle_u8(mask_8x8, limit_8x8);
-
- temp_8x8 = vreinterpret_u8_u32(vrev64_u32(vreinterpret_u32_u8(mask_8x8)));
- mask_8x8 = vand_u8(mask_8x8, temp_8x8);
-
- p0q0_p1q1 = vtrn_u32(vreinterpret_u32_u8(p0q0), vreinterpret_u32_u8(p1q1));
- temp_8x8 = vabd_u8(vreinterpret_u8_u32(p0q0_p1q1.val[0]),
- vreinterpret_u8_u32(p0q0_p1q1.val[1]));
- temp_16x8 = vmovl_u8(temp_8x8);
- temp0_16x4 = vshl_n_u16(vget_low_u16(temp_16x8), 1);
- temp1_16x4 = vshr_n_u16(vget_high_u16(temp_16x8), 1);
- temp0_16x4 = vadd_u16(temp0_16x4, temp1_16x4);
- temp0_16x4 = vcle_u16(temp0_16x4, blimit_16x4);
- temp_8x8 = vmovn_u16(vcombine_u16(temp0_16x4, temp0_16x4));
-
- mask_8x8 = vand_u8(mask_8x8, temp_8x8);
-
- return mask_8x8;
-}
-
-static INLINE uint8x8_t lpf_mask2(uint8x8_t p1q1, uint8x8_t p0q0,
- const uint8_t blimit, const uint8_t limit) {
- uint32x2x2_t p0q0_p1q1;
- uint16x8_t temp_16x8;
- uint16x4_t temp0_16x4, temp1_16x4;
- const uint16x4_t blimit_16x4 = vdup_n_u16(blimit);
- const uint8x8_t limit_8x8 = vdup_n_u8(limit);
- uint8x8_t mask_8x8, temp_8x8;
-
- mask_8x8 = vabd_u8(p1q1, p0q0);
- mask_8x8 = vcle_u8(mask_8x8, limit_8x8);
-
- temp_8x8 = vreinterpret_u8_u32(vrev64_u32(vreinterpret_u32_u8(mask_8x8)));
- mask_8x8 = vand_u8(mask_8x8, temp_8x8);
-
- p0q0_p1q1 = vtrn_u32(vreinterpret_u32_u8(p0q0), vreinterpret_u32_u8(p1q1));
- temp_8x8 = vabd_u8(vreinterpret_u8_u32(p0q0_p1q1.val[0]),
- vreinterpret_u8_u32(p0q0_p1q1.val[1]));
- temp_16x8 = vmovl_u8(temp_8x8);
- temp0_16x4 = vshl_n_u16(vget_low_u16(temp_16x8), 1);
- temp1_16x4 = vshr_n_u16(vget_high_u16(temp_16x8), 1);
- temp0_16x4 = vadd_u16(temp0_16x4, temp1_16x4);
- temp0_16x4 = vcle_u16(temp0_16x4, blimit_16x4);
- temp_8x8 = vmovn_u16(vcombine_u16(temp0_16x4, temp0_16x4));
-
- mask_8x8 = vand_u8(mask_8x8, temp_8x8);
-
- return mask_8x8;
-}
-
-static INLINE uint8x8_t lpf_flat_mask4(uint8x8_t p3q3, uint8x8_t p2q2,
- uint8x8_t p1q1, uint8x8_t p0q0) {
- const uint8x8_t thresh_8x8 = vdup_n_u8(1); // for bd==8 threshold is always 1
- uint8x8_t flat_8x8, temp_8x8;
-
- flat_8x8 = vabd_u8(p1q1, p0q0);
- flat_8x8 = vmax_u8(flat_8x8, vabd_u8(p2q2, p0q0));
- flat_8x8 = vmax_u8(flat_8x8, vabd_u8(p3q3, p0q0));
- flat_8x8 = vcle_u8(flat_8x8, thresh_8x8);
-
- temp_8x8 = vreinterpret_u8_u32(vrev64_u32(vreinterpret_u32_u8(flat_8x8)));
- flat_8x8 = vand_u8(flat_8x8, temp_8x8);
-
- return flat_8x8;
-}
-
-static INLINE uint8x8_t lpf_flat_mask3(uint8x8_t p2q2, uint8x8_t p1q1,
- uint8x8_t p0q0) {
- const uint8x8_t thresh_8x8 = vdup_n_u8(1); // for bd==8 threshold is always 1
- uint8x8_t flat_8x8, temp_8x8;
-
- flat_8x8 = vabd_u8(p1q1, p0q0);
- flat_8x8 = vmax_u8(flat_8x8, vabd_u8(p2q2, p0q0));
- flat_8x8 = vcle_u8(flat_8x8, thresh_8x8);
-
- temp_8x8 = vreinterpret_u8_u32(vrev64_u32(vreinterpret_u32_u8(flat_8x8)));
- flat_8x8 = vand_u8(flat_8x8, temp_8x8);
-
- return flat_8x8;
-}
-
-static INLINE uint8x8_t lpf_mask3_chroma(uint8x8_t p2q2, uint8x8_t p1q1,
- uint8x8_t p0q0, const uint8_t blimit,
- const uint8_t limit) {
- // Calculate mask3 values for four samples
- uint32x2x2_t p0q0_p1q1;
- uint16x8_t temp_16x8;
- uint16x4_t temp0_16x4, temp1_16x4;
- uint8x8_t mask_8x8, temp_8x8;
- const uint8x8_t limit_8x8 = vdup_n_u8(limit);
- const uint16x4_t blimit_16x4 = vdup_n_u16((uint16_t)blimit);
-
- mask_8x8 = vabd_u8(p2q2, p1q1);
- mask_8x8 = vmax_u8(mask_8x8, vabd_u8(p1q1, p0q0));
- mask_8x8 = vcle_u8(mask_8x8, limit_8x8);
-
- temp_8x8 = vreinterpret_u8_u32(vrev64_u32(vreinterpret_u32_u8(mask_8x8)));
- mask_8x8 = vand_u8(mask_8x8, temp_8x8);
-
- p0q0_p1q1 = vtrn_u32(vreinterpret_u32_u8(p0q0), vreinterpret_u32_u8(p1q1));
- temp_8x8 = vabd_u8(vreinterpret_u8_u32(p0q0_p1q1.val[0]),
- vreinterpret_u8_u32(p0q0_p1q1.val[1]));
- temp_16x8 = vmovl_u8(temp_8x8);
- temp0_16x4 = vshl_n_u16(vget_low_u16(temp_16x8), 1);
- temp1_16x4 = vshr_n_u16(vget_high_u16(temp_16x8), 1);
- temp0_16x4 = vadd_u16(temp0_16x4, temp1_16x4);
- temp0_16x4 = vcle_u16(temp0_16x4, blimit_16x4);
- temp_8x8 = vmovn_u16(vcombine_u16(temp0_16x4, temp0_16x4));
-
- mask_8x8 = vand_u8(mask_8x8, temp_8x8);
-
- return mask_8x8;
-}
-
-static void lpf_14_neon(uint8x8_t *p6q6, uint8x8_t *p5q5, uint8x8_t *p4q4,
- uint8x8_t *p3q3, uint8x8_t *p2q2, uint8x8_t *p1q1,
- uint8x8_t *p0q0, const uint8_t blimit,
- const uint8_t limit, const uint8_t thresh) {
- uint16x8_t out;
- uint8x8_t out_f14_pq0, out_f14_pq1, out_f14_pq2, out_f14_pq3, out_f14_pq4,
- out_f14_pq5;
- uint8x8_t out_f7_pq0, out_f7_pq1, out_f7_pq2;
- uint8x8_t out_f4_pq0, out_f4_pq1;
- uint8x8_t mask_8x8, flat_8x8, flat2_8x8;
- uint8x8_t q0p0, q1p1, q2p2;
-
- // Calculate filter masks
- mask_8x8 = lpf_mask(*p3q3, *p2q2, *p1q1, *p0q0, blimit, limit);
- flat_8x8 = lpf_flat_mask4(*p3q3, *p2q2, *p1q1, *p0q0);
- flat2_8x8 = lpf_flat_mask4(*p6q6, *p5q5, *p4q4, *p0q0);
- {
- // filter 4
- int32x2x2_t ps0_qs0, ps1_qs1;
- int16x8_t filter_s16;
- const uint8x8_t thresh_f4 = vdup_n_u8(thresh);
- uint8x8_t temp0_8x8, temp1_8x8;
- int8x8_t ps0_s8, ps1_s8, qs0_s8, qs1_s8, temp_s8;
- int8x8_t op0, oq0, op1, oq1;
- int8x8_t pq_s0, pq_s1;
- int8x8_t filter_s8, filter1_s8, filter2_s8;
- int8x8_t hev_8x8;
- const int8x8_t sign_mask = vdup_n_s8(0x80);
- const int8x8_t val_4 = vdup_n_s8(4);
- const int8x8_t val_3 = vdup_n_s8(3);
-
- pq_s0 = veor_s8(vreinterpret_s8_u8(*p0q0), sign_mask);
- pq_s1 = veor_s8(vreinterpret_s8_u8(*p1q1), sign_mask);
-
- ps0_qs0 = vtrn_s32(vreinterpret_s32_s8(pq_s0), vreinterpret_s32_s8(pq_s0));
- ps1_qs1 = vtrn_s32(vreinterpret_s32_s8(pq_s1), vreinterpret_s32_s8(pq_s1));
- ps0_s8 = vreinterpret_s8_s32(ps0_qs0.val[0]);
- qs0_s8 = vreinterpret_s8_s32(ps0_qs0.val[1]);
- ps1_s8 = vreinterpret_s8_s32(ps1_qs1.val[0]);
- qs1_s8 = vreinterpret_s8_s32(ps1_qs1.val[1]);
-
- // hev_mask
- temp0_8x8 = vcgt_u8(vabd_u8(*p0q0, *p1q1), thresh_f4);
- temp1_8x8 = vreinterpret_u8_u32(vrev64_u32(vreinterpret_u32_u8(temp0_8x8)));
- hev_8x8 = vreinterpret_s8_u8(vorr_u8(temp0_8x8, temp1_8x8));
-
- // add outer taps if we have high edge variance
- filter_s8 = vqsub_s8(ps1_s8, qs1_s8);
- filter_s8 = vand_s8(filter_s8, hev_8x8);
-
- // inner taps
- temp_s8 = vqsub_s8(qs0_s8, ps0_s8);
- filter_s16 = vmovl_s8(filter_s8);
- filter_s16 = vmlal_s8(filter_s16, temp_s8, val_3);
- filter_s8 = vqmovn_s16(filter_s16);
- filter_s8 = vand_s8(filter_s8, vreinterpret_s8_u8(mask_8x8));
-
- filter1_s8 = vqadd_s8(filter_s8, val_4);
- filter2_s8 = vqadd_s8(filter_s8, val_3);
- filter1_s8 = vshr_n_s8(filter1_s8, 3);
- filter2_s8 = vshr_n_s8(filter2_s8, 3);
-
- oq0 = veor_s8(vqsub_s8(qs0_s8, filter1_s8), sign_mask);
- op0 = veor_s8(vqadd_s8(ps0_s8, filter2_s8), sign_mask);
-
- hev_8x8 = vmvn_s8(hev_8x8);
- filter_s8 = vrshr_n_s8(filter1_s8, 1);
- filter_s8 = vand_s8(filter_s8, hev_8x8);
-
- oq1 = veor_s8(vqsub_s8(qs1_s8, filter_s8), sign_mask);
- op1 = veor_s8(vqadd_s8(ps1_s8, filter_s8), sign_mask);
-
- out_f4_pq0 = vreinterpret_u8_s8(vext_s8(op0, oq0, 4));
- out_f4_pq1 = vreinterpret_u8_s8(vext_s8(op1, oq1, 4));
- }
- // reverse p and q
- q0p0 = vreinterpret_u8_u32(vrev64_u32(vreinterpret_u32_u8(*p0q0)));
- q1p1 = vreinterpret_u8_u32(vrev64_u32(vreinterpret_u32_u8(*p1q1)));
- q2p2 = vreinterpret_u8_u32(vrev64_u32(vreinterpret_u32_u8(*p2q2)));
- {
- // filter 8
- uint16x8_t out_pq0, out_pq1, out_pq2;
- out = vaddl_u8(*p3q3, *p2q2);
- out = vaddw_u8(out, *p1q1);
- out = vaddw_u8(out, *p0q0);
-
- out = vaddw_u8(out, q0p0);
- out_pq1 = vaddw_u8(out, *p3q3);
- out_pq2 = vaddw_u8(out_pq1, *p3q3);
- out_pq2 = vaddw_u8(out_pq2, *p2q2);
- out_pq1 = vaddw_u8(out_pq1, *p1q1);
- out_pq1 = vaddw_u8(out_pq1, q1p1);
-
- out_pq0 = vaddw_u8(out, *p0q0);
- out_pq0 = vaddw_u8(out_pq0, q1p1);
- out_pq0 = vaddw_u8(out_pq0, q2p2);
-
- out_f7_pq0 = vrshrn_n_u16(out_pq0, 3);
- out_f7_pq1 = vrshrn_n_u16(out_pq1, 3);
- out_f7_pq2 = vrshrn_n_u16(out_pq2, 3);
- }
- {
- // filter 14
- uint16x8_t out_pq0, out_pq1, out_pq2, out_pq3, out_pq4, out_pq5;
- uint16x8_t p6q6_2, p6q6_temp, qp_sum;
- uint8x8_t qp_rev;
-
- out = vaddw_u8(out, *p4q4);
- out = vaddw_u8(out, *p5q5);
- out = vaddw_u8(out, *p6q6);
-
- out_pq5 = vaddw_u8(out, *p4q4);
- out_pq4 = vaddw_u8(out_pq5, *p3q3);
- out_pq3 = vaddw_u8(out_pq4, *p2q2);
-
- out_pq5 = vaddw_u8(out_pq5, *p5q5);
- out_pq4 = vaddw_u8(out_pq4, *p5q5);
-
- out_pq0 = vaddw_u8(out, *p1q1);
- out_pq1 = vaddw_u8(out_pq0, *p2q2);
- out_pq2 = vaddw_u8(out_pq1, *p3q3);
-
- out_pq0 = vaddw_u8(out_pq0, *p0q0);
- out_pq1 = vaddw_u8(out_pq1, *p0q0);
-
- out_pq1 = vaddw_u8(out_pq1, *p6q6);
- p6q6_2 = vaddl_u8(*p6q6, *p6q6);
- out_pq2 = vaddq_u16(out_pq2, p6q6_2);
- p6q6_temp = vaddw_u8(p6q6_2, *p6q6);
- out_pq3 = vaddq_u16(out_pq3, p6q6_temp);
- p6q6_temp = vaddw_u8(p6q6_temp, *p6q6);
- out_pq4 = vaddq_u16(out_pq4, p6q6_temp);
- p6q6_temp = vaddq_u16(p6q6_temp, p6q6_2);
- out_pq5 = vaddq_u16(out_pq5, p6q6_temp);
-
- out_pq4 = vaddw_u8(out_pq4, q1p1);
-
- qp_sum = vaddl_u8(q2p2, q1p1);
- out_pq3 = vaddq_u16(out_pq3, qp_sum);
-
- qp_rev = vreinterpret_u8_u32(vrev64_u32(vreinterpret_u32_u8(*p3q3)));
- qp_sum = vaddw_u8(qp_sum, qp_rev);
- out_pq2 = vaddq_u16(out_pq2, qp_sum);
-
- qp_rev = vreinterpret_u8_u32(vrev64_u32(vreinterpret_u32_u8(*p4q4)));
- qp_sum = vaddw_u8(qp_sum, qp_rev);
- out_pq1 = vaddq_u16(out_pq1, qp_sum);
-
- qp_rev = vreinterpret_u8_u32(vrev64_u32(vreinterpret_u32_u8(*p5q5)));
- qp_sum = vaddw_u8(qp_sum, qp_rev);
- out_pq0 = vaddq_u16(out_pq0, qp_sum);
-
- out_pq0 = vaddw_u8(out_pq0, q0p0);
-
- out_f14_pq0 = vrshrn_n_u16(out_pq0, 4);
- out_f14_pq1 = vrshrn_n_u16(out_pq1, 4);
- out_f14_pq2 = vrshrn_n_u16(out_pq2, 4);
- out_f14_pq3 = vrshrn_n_u16(out_pq3, 4);
- out_f14_pq4 = vrshrn_n_u16(out_pq4, 4);
- out_f14_pq5 = vrshrn_n_u16(out_pq5, 4);
- }
- {
- uint8x8_t filter4_cond, filter8_cond, filter14_cond;
- filter8_cond = vand_u8(flat_8x8, mask_8x8);
- filter4_cond = vmvn_u8(filter8_cond);
- filter14_cond = vand_u8(filter8_cond, flat2_8x8);
-
- // filter4 outputs
- *p0q0 = vbsl_u8(filter4_cond, out_f4_pq0, *p0q0);
- *p1q1 = vbsl_u8(filter4_cond, out_f4_pq1, *p1q1);
-
- // filter8 outputs
- *p0q0 = vbsl_u8(filter8_cond, out_f7_pq0, *p0q0);
- *p1q1 = vbsl_u8(filter8_cond, out_f7_pq1, *p1q1);
- *p2q2 = vbsl_u8(filter8_cond, out_f7_pq2, *p2q2);
-
- // filter14 outputs
- *p0q0 = vbsl_u8(filter14_cond, out_f14_pq0, *p0q0);
- *p1q1 = vbsl_u8(filter14_cond, out_f14_pq1, *p1q1);
- *p2q2 = vbsl_u8(filter14_cond, out_f14_pq2, *p2q2);
- *p3q3 = vbsl_u8(filter14_cond, out_f14_pq3, *p3q3);
- *p4q4 = vbsl_u8(filter14_cond, out_f14_pq4, *p4q4);
- *p5q5 = vbsl_u8(filter14_cond, out_f14_pq5, *p5q5);
- }
-}
-
-static void lpf_8_neon(uint8x8_t *p3q3, uint8x8_t *p2q2, uint8x8_t *p1q1,
- uint8x8_t *p0q0, const uint8_t blimit,
- const uint8_t limit, const uint8_t thresh) {
- uint16x8_t out;
- uint8x8_t out_f7_pq0, out_f7_pq1, out_f7_pq2;
- uint8x8_t out_f4_pq0, out_f4_pq1;
- uint8x8_t mask_8x8, flat_8x8;
-
- // Calculate filter masks
- mask_8x8 = lpf_mask(*p3q3, *p2q2, *p1q1, *p0q0, blimit, limit);
- flat_8x8 = lpf_flat_mask4(*p3q3, *p2q2, *p1q1, *p0q0);
- {
- // filter 4
- int32x2x2_t ps0_qs0, ps1_qs1;
- int16x8_t filter_s16;
- const uint8x8_t thresh_f4 = vdup_n_u8(thresh);
- uint8x8_t temp0_8x8, temp1_8x8;
- int8x8_t ps0_s8, ps1_s8, qs0_s8, qs1_s8, temp_s8;
- int8x8_t op0, oq0, op1, oq1;
- int8x8_t pq_s0, pq_s1;
- int8x8_t filter_s8, filter1_s8, filter2_s8;
- int8x8_t hev_8x8;
- const int8x8_t sign_mask = vdup_n_s8(0x80);
- const int8x8_t val_4 = vdup_n_s8(4);
- const int8x8_t val_3 = vdup_n_s8(3);
-
- pq_s0 = veor_s8(vreinterpret_s8_u8(*p0q0), sign_mask);
- pq_s1 = veor_s8(vreinterpret_s8_u8(*p1q1), sign_mask);
-
- ps0_qs0 = vtrn_s32(vreinterpret_s32_s8(pq_s0), vreinterpret_s32_s8(pq_s0));
- ps1_qs1 = vtrn_s32(vreinterpret_s32_s8(pq_s1), vreinterpret_s32_s8(pq_s1));
- ps0_s8 = vreinterpret_s8_s32(ps0_qs0.val[0]);
- qs0_s8 = vreinterpret_s8_s32(ps0_qs0.val[1]);
- ps1_s8 = vreinterpret_s8_s32(ps1_qs1.val[0]);
- qs1_s8 = vreinterpret_s8_s32(ps1_qs1.val[1]);
-
- // hev_mask
- temp0_8x8 = vcgt_u8(vabd_u8(*p0q0, *p1q1), thresh_f4);
- temp1_8x8 = vreinterpret_u8_u32(vrev64_u32(vreinterpret_u32_u8(temp0_8x8)));
- hev_8x8 = vreinterpret_s8_u8(vorr_u8(temp0_8x8, temp1_8x8));
-
- // add outer taps if we have high edge variance
- filter_s8 = vqsub_s8(ps1_s8, qs1_s8);
- filter_s8 = vand_s8(filter_s8, hev_8x8);
-
- // inner taps
- temp_s8 = vqsub_s8(qs0_s8, ps0_s8);
- filter_s16 = vmovl_s8(filter_s8);
- filter_s16 = vmlal_s8(filter_s16, temp_s8, val_3);
- filter_s8 = vqmovn_s16(filter_s16);
- filter_s8 = vand_s8(filter_s8, vreinterpret_s8_u8(mask_8x8));
-
- filter1_s8 = vqadd_s8(filter_s8, val_4);
- filter2_s8 = vqadd_s8(filter_s8, val_3);
- filter1_s8 = vshr_n_s8(filter1_s8, 3);
- filter2_s8 = vshr_n_s8(filter2_s8, 3);
-
- oq0 = veor_s8(vqsub_s8(qs0_s8, filter1_s8), sign_mask);
- op0 = veor_s8(vqadd_s8(ps0_s8, filter2_s8), sign_mask);
-
- hev_8x8 = vmvn_s8(hev_8x8);
- filter_s8 = vrshr_n_s8(filter1_s8, 1);
- filter_s8 = vand_s8(filter_s8, hev_8x8);
-
- oq1 = veor_s8(vqsub_s8(qs1_s8, filter_s8), sign_mask);
- op1 = veor_s8(vqadd_s8(ps1_s8, filter_s8), sign_mask);
-
- out_f4_pq0 = vreinterpret_u8_s8(vext_s8(op0, oq0, 4));
- out_f4_pq1 = vreinterpret_u8_s8(vext_s8(op1, oq1, 4));
- }
- {
- // filter 8
- uint16x8_t out_pq0, out_pq1, out_pq2;
- uint8x8_t q0p0, q1p1, q2p2;
-
- out = vaddl_u8(*p3q3, *p2q2);
- out = vaddw_u8(out, *p1q1);
- out = vaddw_u8(out, *p0q0);
-
- // reverse p and q
- q0p0 = vreinterpret_u8_u32(vrev64_u32(vreinterpret_u32_u8(*p0q0)));
- q1p1 = vreinterpret_u8_u32(vrev64_u32(vreinterpret_u32_u8(*p1q1)));
- q2p2 = vreinterpret_u8_u32(vrev64_u32(vreinterpret_u32_u8(*p2q2)));
-
- out = vaddw_u8(out, q0p0);
- out_pq1 = vaddw_u8(out, *p3q3);
- out_pq2 = vaddw_u8(out_pq1, *p3q3);
- out_pq2 = vaddw_u8(out_pq2, *p2q2);
- out_pq1 = vaddw_u8(out_pq1, *p1q1);
- out_pq1 = vaddw_u8(out_pq1, q1p1);
-
- out_pq0 = vaddw_u8(out, *p0q0);
- out_pq0 = vaddw_u8(out_pq0, q1p1);
- out_pq0 = vaddw_u8(out_pq0, q2p2);
-
- out_f7_pq0 = vrshrn_n_u16(out_pq0, 3);
- out_f7_pq1 = vrshrn_n_u16(out_pq1, 3);
- out_f7_pq2 = vrshrn_n_u16(out_pq2, 3);
- }
- {
- uint8x8_t filter4_cond, filter8_cond;
- filter8_cond = vand_u8(flat_8x8, mask_8x8);
- filter4_cond = vmvn_u8(filter8_cond);
-
- // filter4 outputs
- *p0q0 = vbsl_u8(filter4_cond, out_f4_pq0, *p0q0);
- *p1q1 = vbsl_u8(filter4_cond, out_f4_pq1, *p1q1);
-
- // filter8 outputs
- *p0q0 = vbsl_u8(filter8_cond, out_f7_pq0, *p0q0);
- *p1q1 = vbsl_u8(filter8_cond, out_f7_pq1, *p1q1);
- *p2q2 = vbsl_u8(filter8_cond, out_f7_pq2, *p2q2);
- }
-}
-
-static void lpf_6_neon(uint8x8_t *p2q2, uint8x8_t *p1q1, uint8x8_t *p0q0,
- const uint8_t blimit, const uint8_t limit,
- const uint8_t thresh) {
- uint16x8_t out;
- uint8x8_t out_f6_pq0, out_f6_pq1;
- uint8x8_t out_f4_pq0, out_f4_pq1;
- uint8x8_t mask_8x8, flat_8x8;
-
- // Calculate filter masks
- mask_8x8 = lpf_mask3_chroma(*p2q2, *p1q1, *p0q0, blimit, limit);
- flat_8x8 = lpf_flat_mask3(*p2q2, *p1q1, *p0q0);
- {
- // filter 4
- int32x2x2_t ps0_qs0, ps1_qs1;
- int16x8_t filter_s16;
- const uint8x8_t thresh_f4 = vdup_n_u8(thresh);
- uint8x8_t temp0_8x8, temp1_8x8;
- int8x8_t ps0_s8, ps1_s8, qs0_s8, qs1_s8, temp_s8;
- int8x8_t op0, oq0, op1, oq1;
- int8x8_t pq_s0, pq_s1;
- int8x8_t filter_s8, filter1_s8, filter2_s8;
- int8x8_t hev_8x8;
- const int8x8_t sign_mask = vdup_n_s8(0x80);
- const int8x8_t val_4 = vdup_n_s8(4);
- const int8x8_t val_3 = vdup_n_s8(3);
-
- pq_s0 = veor_s8(vreinterpret_s8_u8(*p0q0), sign_mask);
- pq_s1 = veor_s8(vreinterpret_s8_u8(*p1q1), sign_mask);
-
- ps0_qs0 = vtrn_s32(vreinterpret_s32_s8(pq_s0), vreinterpret_s32_s8(pq_s0));
- ps1_qs1 = vtrn_s32(vreinterpret_s32_s8(pq_s1), vreinterpret_s32_s8(pq_s1));
- ps0_s8 = vreinterpret_s8_s32(ps0_qs0.val[0]);
- qs0_s8 = vreinterpret_s8_s32(ps0_qs0.val[1]);
- ps1_s8 = vreinterpret_s8_s32(ps1_qs1.val[0]);
- qs1_s8 = vreinterpret_s8_s32(ps1_qs1.val[1]);
-
- // hev_mask
- temp0_8x8 = vcgt_u8(vabd_u8(*p0q0, *p1q1), thresh_f4);
- temp1_8x8 = vreinterpret_u8_u32(vrev64_u32(vreinterpret_u32_u8(temp0_8x8)));
- hev_8x8 = vreinterpret_s8_u8(vorr_u8(temp0_8x8, temp1_8x8));
-
- // add outer taps if we have high edge variance
- filter_s8 = vqsub_s8(ps1_s8, qs1_s8);
- filter_s8 = vand_s8(filter_s8, hev_8x8);
-
- // inner taps
- temp_s8 = vqsub_s8(qs0_s8, ps0_s8);
- filter_s16 = vmovl_s8(filter_s8);
- filter_s16 = vmlal_s8(filter_s16, temp_s8, val_3);
- filter_s8 = vqmovn_s16(filter_s16);
- filter_s8 = vand_s8(filter_s8, vreinterpret_s8_u8(mask_8x8));
-
- filter1_s8 = vqadd_s8(filter_s8, val_4);
- filter2_s8 = vqadd_s8(filter_s8, val_3);
- filter1_s8 = vshr_n_s8(filter1_s8, 3);
- filter2_s8 = vshr_n_s8(filter2_s8, 3);
-
- oq0 = veor_s8(vqsub_s8(qs0_s8, filter1_s8), sign_mask);
- op0 = veor_s8(vqadd_s8(ps0_s8, filter2_s8), sign_mask);
-
- filter_s8 = vrshr_n_s8(filter1_s8, 1);
- filter_s8 = vbic_s8(filter_s8, hev_8x8);
-
- oq1 = veor_s8(vqsub_s8(qs1_s8, filter_s8), sign_mask);
- op1 = veor_s8(vqadd_s8(ps1_s8, filter_s8), sign_mask);
-
- out_f4_pq0 = vreinterpret_u8_s8(vext_s8(op0, oq0, 4));
- out_f4_pq1 = vreinterpret_u8_s8(vext_s8(op1, oq1, 4));
- }
- {
- // filter 6
- uint16x8_t out_pq0, out_pq1;
- uint8x8_t pq_rev;
-
- out = vaddl_u8(*p0q0, *p1q1);
- out = vaddq_u16(out, out);
- out = vaddw_u8(out, *p2q2);
-
- pq_rev = vreinterpret_u8_u32(vrev64_u32(vreinterpret_u32_u8(*p0q0)));
- out = vaddw_u8(out, pq_rev);
-
- out_pq0 = vaddw_u8(out, pq_rev);
- pq_rev = vreinterpret_u8_u32(vrev64_u32(vreinterpret_u32_u8(*p1q1)));
- out_pq0 = vaddw_u8(out_pq0, pq_rev);
-
- out_pq1 = vaddw_u8(out, *p2q2);
- out_pq1 = vaddw_u8(out_pq1, *p2q2);
-
- out_f6_pq0 = vrshrn_n_u16(out_pq0, 3);
- out_f6_pq1 = vrshrn_n_u16(out_pq1, 3);
- }
- {
- uint8x8_t filter4_cond, filter6_cond;
- filter6_cond = vand_u8(flat_8x8, mask_8x8);
- filter4_cond = vmvn_u8(filter6_cond);
-
- // filter4 outputs
- *p0q0 = vbsl_u8(filter4_cond, out_f4_pq0, *p0q0);
- *p1q1 = vbsl_u8(filter4_cond, out_f4_pq1, *p1q1);
-
- // filter6 outputs
- *p0q0 = vbsl_u8(filter6_cond, out_f6_pq0, *p0q0);
- *p1q1 = vbsl_u8(filter6_cond, out_f6_pq1, *p1q1);
- }
-}
-
-static void lpf_4_neon(uint8x8_t *p1q1, uint8x8_t *p0q0, const uint8_t blimit,
- const uint8_t limit, const uint8_t thresh) {
- int32x2x2_t ps0_qs0, ps1_qs1;
- int16x8_t filter_s16;
- const uint8x8_t thresh_f4 = vdup_n_u8(thresh);
- uint8x8_t mask_8x8, temp0_8x8, temp1_8x8;
- int8x8_t ps0_s8, ps1_s8, qs0_s8, qs1_s8, temp_s8;
- int8x8_t op0, oq0, op1, oq1;
- int8x8_t pq_s0, pq_s1;
- int8x8_t filter_s8, filter1_s8, filter2_s8;
- int8x8_t hev_8x8;
- const int8x8_t sign_mask = vdup_n_s8(0x80);
- const int8x8_t val_4 = vdup_n_s8(4);
- const int8x8_t val_3 = vdup_n_s8(3);
-
- // Calculate filter mask
- mask_8x8 = lpf_mask2(*p1q1, *p0q0, blimit, limit);
-
- pq_s0 = veor_s8(vreinterpret_s8_u8(*p0q0), sign_mask);
- pq_s1 = veor_s8(vreinterpret_s8_u8(*p1q1), sign_mask);
-
- ps0_qs0 = vtrn_s32(vreinterpret_s32_s8(pq_s0), vreinterpret_s32_s8(pq_s0));
- ps1_qs1 = vtrn_s32(vreinterpret_s32_s8(pq_s1), vreinterpret_s32_s8(pq_s1));
- ps0_s8 = vreinterpret_s8_s32(ps0_qs0.val[0]);
- qs0_s8 = vreinterpret_s8_s32(ps0_qs0.val[1]);
- ps1_s8 = vreinterpret_s8_s32(ps1_qs1.val[0]);
- qs1_s8 = vreinterpret_s8_s32(ps1_qs1.val[1]);
-
- // hev_mask
- temp0_8x8 = vcgt_u8(vabd_u8(*p0q0, *p1q1), thresh_f4);
- temp1_8x8 = vreinterpret_u8_u32(vrev64_u32(vreinterpret_u32_u8(temp0_8x8)));
- hev_8x8 = vreinterpret_s8_u8(vorr_u8(temp0_8x8, temp1_8x8));
-
- // add outer taps if we have high edge variance
- filter_s8 = vqsub_s8(ps1_s8, qs1_s8);
- filter_s8 = vand_s8(filter_s8, hev_8x8);
-
- // inner taps
- temp_s8 = vqsub_s8(qs0_s8, ps0_s8);
- filter_s16 = vmovl_s8(filter_s8);
- filter_s16 = vmlal_s8(filter_s16, temp_s8, val_3);
- filter_s8 = vqmovn_s16(filter_s16);
- filter_s8 = vand_s8(filter_s8, vreinterpret_s8_u8(mask_8x8));
-
- filter1_s8 = vqadd_s8(filter_s8, val_4);
- filter2_s8 = vqadd_s8(filter_s8, val_3);
- filter1_s8 = vshr_n_s8(filter1_s8, 3);
- filter2_s8 = vshr_n_s8(filter2_s8, 3);
-
- oq0 = veor_s8(vqsub_s8(qs0_s8, filter1_s8), sign_mask);
- op0 = veor_s8(vqadd_s8(ps0_s8, filter2_s8), sign_mask);
-
- filter_s8 = vrshr_n_s8(filter1_s8, 1);
- filter_s8 = vbic_s8(filter_s8, hev_8x8);
-
- oq1 = veor_s8(vqsub_s8(qs1_s8, filter_s8), sign_mask);
- op1 = veor_s8(vqadd_s8(ps1_s8, filter_s8), sign_mask);
-
- *p0q0 = vreinterpret_u8_s8(vext_s8(op0, oq0, 4));
- *p1q1 = vreinterpret_u8_s8(vext_s8(op1, oq1, 4));
-}
-
-void aom_lpf_vertical_14_neon(uint8_t *src, int stride, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh) {
- uint8x16_t row0, row1, row2, row3;
- uint8x8_t pxp3, p6p2, p5p1, p4p0;
- uint8x8_t q0q4, q1q5, q2q6, q3qy;
- uint32x2x2_t p6q6_p2q2, p5q5_p1q1, p4q4_p0q0, pxqx_p3q3;
- uint32x2_t pq_rev;
- uint8x8_t p0q0, p1q1, p2q2, p3q3, p4q4, p5q5, p6q6;
-
- // row0: x p6 p5 p4 p3 p2 p1 p0 | q0 q1 q2 q3 q4 q5 q6 y
- // row1: x p6 p5 p4 p3 p2 p1 p0 | q0 q1 q2 q3 q4 q5 q6 y
- // row2: x p6 p5 p4 p3 p2 p1 p0 | q0 q1 q2 q3 q4 q5 q6 y
- // row3: x p6 p5 p4 p3 p2 p1 p0 | q0 q1 q2 q3 q4 q5 q6 y
- load_u8_8x16(src - 8, stride, &row0, &row1, &row2, &row3);
-
- pxp3 = vget_low_u8(row0);
- p6p2 = vget_low_u8(row1);
- p5p1 = vget_low_u8(row2);
- p4p0 = vget_low_u8(row3);
- transpose_u8_8x4(&pxp3, &p6p2, &p5p1, &p4p0);
-
- q0q4 = vget_high_u8(row0);
- q1q5 = vget_high_u8(row1);
- q2q6 = vget_high_u8(row2);
- q3qy = vget_high_u8(row3);
- transpose_u8_8x4(&q0q4, &q1q5, &q2q6, &q3qy);
-
- pq_rev = vrev64_u32(vreinterpret_u32_u8(q3qy));
- pxqx_p3q3 = vtrn_u32(vreinterpret_u32_u8(pxp3), pq_rev);
-
- pq_rev = vrev64_u32(vreinterpret_u32_u8(q1q5));
- p5q5_p1q1 = vtrn_u32(vreinterpret_u32_u8(p5p1), pq_rev);
-
- pq_rev = vrev64_u32(vreinterpret_u32_u8(q0q4));
- p4q4_p0q0 = vtrn_u32(vreinterpret_u32_u8(p4p0), pq_rev);
-
- pq_rev = vrev64_u32(vreinterpret_u32_u8(q2q6));
- p6q6_p2q2 = vtrn_u32(vreinterpret_u32_u8(p6p2), pq_rev);
-
- p0q0 = vreinterpret_u8_u32(p4q4_p0q0.val[1]);
- p1q1 = vreinterpret_u8_u32(p5q5_p1q1.val[1]);
- p2q2 = vreinterpret_u8_u32(p6q6_p2q2.val[1]);
- p3q3 = vreinterpret_u8_u32(pxqx_p3q3.val[1]);
- p4q4 = vreinterpret_u8_u32(p4q4_p0q0.val[0]);
- p5q5 = vreinterpret_u8_u32(p5q5_p1q1.val[0]);
- p6q6 = vreinterpret_u8_u32(p6q6_p2q2.val[0]);
-
- lpf_14_neon(&p6q6, &p5q5, &p4q4, &p3q3, &p2q2, &p1q1, &p0q0, *blimit, *limit,
- *thresh);
-
- pxqx_p3q3 = vtrn_u32(pxqx_p3q3.val[0], vreinterpret_u32_u8(p3q3));
- p5q5_p1q1 = vtrn_u32(vreinterpret_u32_u8(p5q5), vreinterpret_u32_u8(p1q1));
- p4q4_p0q0 = vtrn_u32(vreinterpret_u32_u8(p4q4), vreinterpret_u32_u8(p0q0));
- p6q6_p2q2 = vtrn_u32(vreinterpret_u32_u8(p6q6), vreinterpret_u32_u8(p2q2));
-
- pxqx_p3q3.val[1] = vrev64_u32(pxqx_p3q3.val[1]);
- p5q5_p1q1.val[1] = vrev64_u32(p5q5_p1q1.val[1]);
- p4q4_p0q0.val[1] = vrev64_u32(p4q4_p0q0.val[1]);
- p6q6_p2q2.val[1] = vrev64_u32(p6q6_p2q2.val[1]);
-
- q0q4 = vreinterpret_u8_u32(p4q4_p0q0.val[1]);
- q1q5 = vreinterpret_u8_u32(p5q5_p1q1.val[1]);
- q2q6 = vreinterpret_u8_u32(p6q6_p2q2.val[1]);
- q3qy = vreinterpret_u8_u32(pxqx_p3q3.val[1]);
- transpose_u8_8x4(&q0q4, &q1q5, &q2q6, &q3qy);
-
- pxp3 = vreinterpret_u8_u32(pxqx_p3q3.val[0]);
- p6p2 = vreinterpret_u8_u32(p6q6_p2q2.val[0]);
- p5p1 = vreinterpret_u8_u32(p5q5_p1q1.val[0]);
- p4p0 = vreinterpret_u8_u32(p4q4_p0q0.val[0]);
- transpose_u8_8x4(&pxp3, &p6p2, &p5p1, &p4p0);
-
- row0 = vcombine_u8(pxp3, q0q4);
- row1 = vcombine_u8(p6p2, q1q5);
- row2 = vcombine_u8(p5p1, q2q6);
- row3 = vcombine_u8(p4p0, q3qy);
-
- store_u8_8x16(src - 8, stride, row0, row1, row2, row3);
-}
-
-void aom_lpf_vertical_8_neon(uint8_t *src, int stride, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh) {
- uint32x2x2_t p2q2_p1q1, p3q3_p0q0;
- uint32x2_t pq_rev;
- uint8x8_t p3q0, p2q1, p1q2, p0q3;
- uint8x8_t p0q0, p1q1, p2q2, p3q3;
-
- // row0: p3 p2 p1 p0 | q0 q1 q2 q3
- // row1: p3 p2 p1 p0 | q0 q1 q2 q3
- // row2: p3 p2 p1 p0 | q0 q1 q2 q3
- // row3: p3 p2 p1 p0 | q0 q1 q2 q3
- load_u8_8x4(src - 4, stride, &p3q0, &p2q1, &p1q2, &p0q3);
-
- transpose_u8_8x4(&p3q0, &p2q1, &p1q2, &p0q3);
-
- pq_rev = vrev64_u32(vreinterpret_u32_u8(p0q3));
- p3q3_p0q0 = vtrn_u32(vreinterpret_u32_u8(p3q0), pq_rev);
-
- pq_rev = vrev64_u32(vreinterpret_u32_u8(p1q2));
- p2q2_p1q1 = vtrn_u32(vreinterpret_u32_u8(p2q1), pq_rev);
-
- p0q0 = vreinterpret_u8_u32(vrev64_u32(p3q3_p0q0.val[1]));
- p1q1 = vreinterpret_u8_u32(vrev64_u32(p2q2_p1q1.val[1]));
- p2q2 = vreinterpret_u8_u32(p2q2_p1q1.val[0]);
- p3q3 = vreinterpret_u8_u32(p3q3_p0q0.val[0]);
-
- lpf_8_neon(&p3q3, &p2q2, &p1q1, &p0q0, *blimit, *limit, *thresh);
-
- pq_rev = vrev64_u32(vreinterpret_u32_u8(p0q0));
- p3q3_p0q0 = vtrn_u32(vreinterpret_u32_u8(p3q3), pq_rev);
-
- pq_rev = vrev64_u32(vreinterpret_u32_u8(p1q1));
- p2q2_p1q1 = vtrn_u32(vreinterpret_u32_u8(p2q2), pq_rev);
-
- p0q3 = vreinterpret_u8_u32(vrev64_u32(p3q3_p0q0.val[1]));
- p1q2 = vreinterpret_u8_u32(vrev64_u32(p2q2_p1q1.val[1]));
- p2q1 = vreinterpret_u8_u32(p2q2_p1q1.val[0]);
- p3q0 = vreinterpret_u8_u32(p3q3_p0q0.val[0]);
- transpose_u8_8x4(&p3q0, &p2q1, &p1q2, &p0q3);
-
- store_u8_8x4(src - 4, stride, p3q0, p2q1, p1q2, p0q3);
-}
-
-void aom_lpf_vertical_6_neon(uint8_t *src, int stride, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh) {
- uint32x2x2_t p2q2_p1q1, pxqy_p0q0;
- uint32x2_t pq_rev;
- uint8x8_t pxq0, p2q1, p1q2, p0qy;
- uint8x8_t p0q0, p1q1, p2q2, pxqy;
-
- // row0: px p2 p1 p0 | q0 q1 q2 qy
- // row1: px p2 p1 p0 | q0 q1 q2 qy
- // row2: px p2 p1 p0 | q0 q1 q2 qy
- // row3: px p2 p1 p0 | q0 q1 q2 qy
- load_u8_8x4(src - 4, stride, &pxq0, &p2q1, &p1q2, &p0qy);
-
- transpose_u8_8x4(&pxq0, &p2q1, &p1q2, &p0qy);
-
- pq_rev = vrev64_u32(vreinterpret_u32_u8(p0qy));
- pxqy_p0q0 = vtrn_u32(vreinterpret_u32_u8(pxq0), pq_rev);
-
- pq_rev = vrev64_u32(vreinterpret_u32_u8(p1q2));
- p2q2_p1q1 = vtrn_u32(vreinterpret_u32_u8(p2q1), pq_rev);
-
- p0q0 = vreinterpret_u8_u32(vrev64_u32(pxqy_p0q0.val[1]));
- p1q1 = vreinterpret_u8_u32(vrev64_u32(p2q2_p1q1.val[1]));
- p2q2 = vreinterpret_u8_u32(p2q2_p1q1.val[0]);
- pxqy = vreinterpret_u8_u32(pxqy_p0q0.val[0]);
-
- lpf_6_neon(&p2q2, &p1q1, &p0q0, *blimit, *limit, *thresh);
-
- pq_rev = vrev64_u32(vreinterpret_u32_u8(p0q0));
- pxqy_p0q0 = vtrn_u32(vreinterpret_u32_u8(pxqy), pq_rev);
-
- pq_rev = vrev64_u32(vreinterpret_u32_u8(p1q1));
- p2q2_p1q1 = vtrn_u32(vreinterpret_u32_u8(p2q2), pq_rev);
-
- p0qy = vreinterpret_u8_u32(vrev64_u32(pxqy_p0q0.val[1]));
- p1q2 = vreinterpret_u8_u32(vrev64_u32(p2q2_p1q1.val[1]));
- p2q1 = vreinterpret_u8_u32(p2q2_p1q1.val[0]);
- pxq0 = vreinterpret_u8_u32(pxqy_p0q0.val[0]);
- transpose_u8_8x4(&pxq0, &p2q1, &p1q2, &p0qy);
-
- store_u8_8x4(src - 4, stride, pxq0, p2q1, p1q2, p0qy);
-}
-
-void aom_lpf_vertical_4_neon(uint8_t *src, int stride, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh) {
- uint32x2x2_t p1q0_p0q1, p1q1_p0q0, p1p0_q1q0;
- uint32x2_t pq_rev;
- uint8x8_t UNINITIALIZED_IS_SAFE(p1p0), q0q1, p0q0, p1q1;
-
- // row0: p1 p0 | q0 q1
- // row1: p1 p0 | q0 q1
- // row2: p1 p0 | q0 q1
- // row3: p1 p0 | q0 q1
- load_u8_4x1(src - 2, &p1p0, 0);
- load_u8_4x1((src - 2) + 1 * stride, &p1p0, 1);
- load_u8_4x1((src - 2) + 2 * stride, &q0q1, 0);
- load_u8_4x1((src - 2) + 3 * stride, &q0q1, 1);
-
- transpose_u8_4x4(&p1p0, &q0q1);
-
- p1q0_p0q1 = vtrn_u32(vreinterpret_u32_u8(p1p0), vreinterpret_u32_u8(q0q1));
-
- pq_rev = vrev64_u32(p1q0_p0q1.val[1]);
- p1q1_p0q0 = vtrn_u32(p1q0_p0q1.val[0], pq_rev);
-
- p1q1 = vreinterpret_u8_u32(p1q1_p0q0.val[0]);
- p0q0 = vreinterpret_u8_u32(p1q1_p0q0.val[1]);
-
- lpf_4_neon(&p1q1, &p0q0, *blimit, *limit, *thresh);
-
- p1p0_q1q0 = vtrn_u32(vreinterpret_u32_u8(p1q1), vreinterpret_u32_u8(p0q0));
-
- p1p0 = vreinterpret_u8_u32(p1p0_q1q0.val[0]);
- q0q1 = vreinterpret_u8_u32(vrev64_u32(p1p0_q1q0.val[1]));
-
- transpose_u8_4x4(&p1p0, &q0q1);
-
- store_u8_4x1(src - 2, p1p0, 0);
- store_u8_4x1((src - 2) + 1 * stride, q0q1, 0);
- store_u8_4x1((src - 2) + 2 * stride, p1p0, 1);
- store_u8_4x1((src - 2) + 3 * stride, q0q1, 1);
-}
-
-void aom_lpf_horizontal_14_neon(uint8_t *src, int stride, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh) {
- uint8x8_t p0q0, p1q1, p2q2, p3q3, p4q4, p5q5, UNINITIALIZED_IS_SAFE(p6q6);
-
- load_u8_4x1(src - 7 * stride, &p6q6, 0);
- load_u8_4x1(src - 6 * stride, &p5q5, 0);
- load_u8_4x1(src - 5 * stride, &p4q4, 0);
- load_u8_4x1(src - 4 * stride, &p3q3, 0);
- load_u8_4x1(src - 3 * stride, &p2q2, 0);
- load_u8_4x1(src - 2 * stride, &p1q1, 0);
- load_u8_4x1(src - 1 * stride, &p0q0, 0);
- load_u8_4x1(src + 0 * stride, &p0q0, 1);
- load_u8_4x1(src + 1 * stride, &p1q1, 1);
- load_u8_4x1(src + 2 * stride, &p2q2, 1);
- load_u8_4x1(src + 3 * stride, &p3q3, 1);
- load_u8_4x1(src + 4 * stride, &p4q4, 1);
- load_u8_4x1(src + 5 * stride, &p5q5, 1);
- load_u8_4x1(src + 6 * stride, &p6q6, 1);
-
- lpf_14_neon(&p6q6, &p5q5, &p4q4, &p3q3, &p2q2, &p1q1, &p0q0, *blimit, *limit,
- *thresh);
-
- store_u8_4x1(src - 6 * stride, p5q5, 0);
- store_u8_4x1(src - 5 * stride, p4q4, 0);
- store_u8_4x1(src - 4 * stride, p3q3, 0);
- store_u8_4x1(src - 3 * stride, p2q2, 0);
- store_u8_4x1(src - 2 * stride, p1q1, 0);
- store_u8_4x1(src - 1 * stride, p0q0, 0);
- store_u8_4x1(src + 0 * stride, p0q0, 1);
- store_u8_4x1(src + 1 * stride, p1q1, 1);
- store_u8_4x1(src + 2 * stride, p2q2, 1);
- store_u8_4x1(src + 3 * stride, p3q3, 1);
- store_u8_4x1(src + 4 * stride, p4q4, 1);
- store_u8_4x1(src + 5 * stride, p5q5, 1);
-}
-
-void aom_lpf_horizontal_8_neon(uint8_t *src, int stride, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh) {
- uint8x8_t p0q0, p1q1, p2q2, p3q3;
-
- p3q3 = vreinterpret_u8_u32(vld1_dup_u32((uint32_t *)(src - 4 * stride)));
- p2q2 = vreinterpret_u8_u32(vld1_dup_u32((uint32_t *)(src - 3 * stride)));
- p1q1 = vreinterpret_u8_u32(vld1_dup_u32((uint32_t *)(src - 2 * stride)));
- p0q0 = vreinterpret_u8_u32(vld1_dup_u32((uint32_t *)(src - 1 * stride)));
- p0q0 = vreinterpret_u8_u32(vld1_lane_u32((uint32_t *)(src + 0 * stride),
- vreinterpret_u32_u8(p0q0), 1));
- p1q1 = vreinterpret_u8_u32(vld1_lane_u32((uint32_t *)(src + 1 * stride),
- vreinterpret_u32_u8(p1q1), 1));
- p2q2 = vreinterpret_u8_u32(vld1_lane_u32((uint32_t *)(src + 2 * stride),
- vreinterpret_u32_u8(p2q2), 1));
- p3q3 = vreinterpret_u8_u32(vld1_lane_u32((uint32_t *)(src + 3 * stride),
- vreinterpret_u32_u8(p3q3), 1));
-
- lpf_8_neon(&p3q3, &p2q2, &p1q1, &p0q0, *blimit, *limit, *thresh);
-
- vst1_lane_u32((uint32_t *)(src - 4 * stride), vreinterpret_u32_u8(p3q3), 0);
- vst1_lane_u32((uint32_t *)(src - 3 * stride), vreinterpret_u32_u8(p2q2), 0);
- vst1_lane_u32((uint32_t *)(src - 2 * stride), vreinterpret_u32_u8(p1q1), 0);
- vst1_lane_u32((uint32_t *)(src - 1 * stride), vreinterpret_u32_u8(p0q0), 0);
- vst1_lane_u32((uint32_t *)(src + 0 * stride), vreinterpret_u32_u8(p0q0), 1);
- vst1_lane_u32((uint32_t *)(src + 1 * stride), vreinterpret_u32_u8(p1q1), 1);
- vst1_lane_u32((uint32_t *)(src + 2 * stride), vreinterpret_u32_u8(p2q2), 1);
- vst1_lane_u32((uint32_t *)(src + 3 * stride), vreinterpret_u32_u8(p3q3), 1);
-}
-
-void aom_lpf_horizontal_6_neon(uint8_t *src, int stride, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh) {
- uint8x8_t p0q0, p1q1, p2q2;
-
- p2q2 = vreinterpret_u8_u32(vld1_dup_u32((uint32_t *)(src - 3 * stride)));
- p1q1 = vreinterpret_u8_u32(vld1_dup_u32((uint32_t *)(src - 2 * stride)));
- p0q0 = vreinterpret_u8_u32(vld1_dup_u32((uint32_t *)(src - 1 * stride)));
- p0q0 = vreinterpret_u8_u32(vld1_lane_u32((uint32_t *)(src + 0 * stride),
- vreinterpret_u32_u8(p0q0), 1));
- p1q1 = vreinterpret_u8_u32(vld1_lane_u32((uint32_t *)(src + 1 * stride),
- vreinterpret_u32_u8(p1q1), 1));
- p2q2 = vreinterpret_u8_u32(vld1_lane_u32((uint32_t *)(src + 2 * stride),
- vreinterpret_u32_u8(p2q2), 1));
-
- lpf_6_neon(&p2q2, &p1q1, &p0q0, *blimit, *limit, *thresh);
-
- vst1_lane_u32((uint32_t *)(src - 3 * stride), vreinterpret_u32_u8(p2q2), 0);
- vst1_lane_u32((uint32_t *)(src - 2 * stride), vreinterpret_u32_u8(p1q1), 0);
- vst1_lane_u32((uint32_t *)(src - 1 * stride), vreinterpret_u32_u8(p0q0), 0);
- vst1_lane_u32((uint32_t *)(src + 0 * stride), vreinterpret_u32_u8(p0q0), 1);
- vst1_lane_u32((uint32_t *)(src + 1 * stride), vreinterpret_u32_u8(p1q1), 1);
- vst1_lane_u32((uint32_t *)(src + 2 * stride), vreinterpret_u32_u8(p2q2), 1);
-}
-
-void aom_lpf_horizontal_4_neon(uint8_t *src, int stride, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh) {
- uint8x8_t p0q0, UNINITIALIZED_IS_SAFE(p1q1);
-
- load_u8_4x1(src - 2 * stride, &p1q1, 0);
- load_u8_4x1(src - 1 * stride, &p0q0, 0);
- load_u8_4x1(src + 0 * stride, &p0q0, 1);
- load_u8_4x1(src + 1 * stride, &p1q1, 1);
-
- lpf_4_neon(&p1q1, &p0q0, *blimit, *limit, *thresh);
-
- store_u8_4x1(src - 2 * stride, p1q1, 0);
- store_u8_4x1(src - 1 * stride, p0q0, 0);
- store_u8_4x1(src + 0 * stride, p0q0, 1);
- store_u8_4x1(src + 1 * stride, p1q1, 1);
-}
diff --git a/third_party/aom/aom_dsp/arm/sad4d_neon.c b/third_party/aom/aom_dsp/arm/sad4d_neon.c
deleted file mode 100644
index 606950ab2..000000000
--- a/third_party/aom/aom_dsp/arm/sad4d_neon.c
+++ /dev/null
@@ -1,226 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <arm_neon.h>
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom/aom_integer.h"
-
-static INLINE unsigned int horizontal_long_add_16x8(const uint16x8_t vec_lo,
- const uint16x8_t vec_hi) {
- const uint32x4_t vec_l_lo =
- vaddl_u16(vget_low_u16(vec_lo), vget_high_u16(vec_lo));
- const uint32x4_t vec_l_hi =
- vaddl_u16(vget_low_u16(vec_hi), vget_high_u16(vec_hi));
- const uint32x4_t a = vaddq_u32(vec_l_lo, vec_l_hi);
- const uint64x2_t b = vpaddlq_u32(a);
- const uint32x2_t c = vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)),
- vreinterpret_u32_u64(vget_high_u64(b)));
- return vget_lane_u32(c, 0);
-}
-
-// Calculate the absolute difference of 64 bytes from vec_src_00, vec_src_16,
-// vec_src_32, vec_src_48 and ref. Accumulate partial sums in vec_sum_ref_lo
-// and vec_sum_ref_hi.
-static void sad_neon_64(const uint8x16_t vec_src_00,
- const uint8x16_t vec_src_16,
- const uint8x16_t vec_src_32,
- const uint8x16_t vec_src_48, const uint8_t *ref,
- uint16x8_t *vec_sum_ref_lo,
- uint16x8_t *vec_sum_ref_hi) {
- const uint8x16_t vec_ref_00 = vld1q_u8(ref);
- const uint8x16_t vec_ref_16 = vld1q_u8(ref + 16);
- const uint8x16_t vec_ref_32 = vld1q_u8(ref + 32);
- const uint8x16_t vec_ref_48 = vld1q_u8(ref + 48);
-
- *vec_sum_ref_lo = vabal_u8(*vec_sum_ref_lo, vget_low_u8(vec_src_00),
- vget_low_u8(vec_ref_00));
- *vec_sum_ref_hi = vabal_u8(*vec_sum_ref_hi, vget_high_u8(vec_src_00),
- vget_high_u8(vec_ref_00));
- *vec_sum_ref_lo = vabal_u8(*vec_sum_ref_lo, vget_low_u8(vec_src_16),
- vget_low_u8(vec_ref_16));
- *vec_sum_ref_hi = vabal_u8(*vec_sum_ref_hi, vget_high_u8(vec_src_16),
- vget_high_u8(vec_ref_16));
- *vec_sum_ref_lo = vabal_u8(*vec_sum_ref_lo, vget_low_u8(vec_src_32),
- vget_low_u8(vec_ref_32));
- *vec_sum_ref_hi = vabal_u8(*vec_sum_ref_hi, vget_high_u8(vec_src_32),
- vget_high_u8(vec_ref_32));
- *vec_sum_ref_lo = vabal_u8(*vec_sum_ref_lo, vget_low_u8(vec_src_48),
- vget_low_u8(vec_ref_48));
- *vec_sum_ref_hi = vabal_u8(*vec_sum_ref_hi, vget_high_u8(vec_src_48),
- vget_high_u8(vec_ref_48));
-}
-
-// Calculate the absolute difference of 32 bytes from vec_src_00, vec_src_16,
-// and ref. Accumulate partial sums in vec_sum_ref_lo and vec_sum_ref_hi.
-static void sad_neon_32(const uint8x16_t vec_src_00,
- const uint8x16_t vec_src_16, const uint8_t *ref,
- uint16x8_t *vec_sum_ref_lo,
- uint16x8_t *vec_sum_ref_hi) {
- const uint8x16_t vec_ref_00 = vld1q_u8(ref);
- const uint8x16_t vec_ref_16 = vld1q_u8(ref + 16);
-
- *vec_sum_ref_lo = vabal_u8(*vec_sum_ref_lo, vget_low_u8(vec_src_00),
- vget_low_u8(vec_ref_00));
- *vec_sum_ref_hi = vabal_u8(*vec_sum_ref_hi, vget_high_u8(vec_src_00),
- vget_high_u8(vec_ref_00));
- *vec_sum_ref_lo = vabal_u8(*vec_sum_ref_lo, vget_low_u8(vec_src_16),
- vget_low_u8(vec_ref_16));
- *vec_sum_ref_hi = vabal_u8(*vec_sum_ref_hi, vget_high_u8(vec_src_16),
- vget_high_u8(vec_ref_16));
-}
-
-void aom_sad64x64x4d_neon(const uint8_t *src, int src_stride,
- const uint8_t *const ref[4], int ref_stride,
- uint32_t *res) {
- int i;
- uint16x8_t vec_sum_ref0_lo = vdupq_n_u16(0);
- uint16x8_t vec_sum_ref0_hi = vdupq_n_u16(0);
- uint16x8_t vec_sum_ref1_lo = vdupq_n_u16(0);
- uint16x8_t vec_sum_ref1_hi = vdupq_n_u16(0);
- uint16x8_t vec_sum_ref2_lo = vdupq_n_u16(0);
- uint16x8_t vec_sum_ref2_hi = vdupq_n_u16(0);
- uint16x8_t vec_sum_ref3_lo = vdupq_n_u16(0);
- uint16x8_t vec_sum_ref3_hi = vdupq_n_u16(0);
- const uint8_t *ref0, *ref1, *ref2, *ref3;
- ref0 = ref[0];
- ref1 = ref[1];
- ref2 = ref[2];
- ref3 = ref[3];
-
- for (i = 0; i < 64; ++i) {
- const uint8x16_t vec_src_00 = vld1q_u8(src);
- const uint8x16_t vec_src_16 = vld1q_u8(src + 16);
- const uint8x16_t vec_src_32 = vld1q_u8(src + 32);
- const uint8x16_t vec_src_48 = vld1q_u8(src + 48);
-
- sad_neon_64(vec_src_00, vec_src_16, vec_src_32, vec_src_48, ref0,
- &vec_sum_ref0_lo, &vec_sum_ref0_hi);
- sad_neon_64(vec_src_00, vec_src_16, vec_src_32, vec_src_48, ref1,
- &vec_sum_ref1_lo, &vec_sum_ref1_hi);
- sad_neon_64(vec_src_00, vec_src_16, vec_src_32, vec_src_48, ref2,
- &vec_sum_ref2_lo, &vec_sum_ref2_hi);
- sad_neon_64(vec_src_00, vec_src_16, vec_src_32, vec_src_48, ref3,
- &vec_sum_ref3_lo, &vec_sum_ref3_hi);
-
- src += src_stride;
- ref0 += ref_stride;
- ref1 += ref_stride;
- ref2 += ref_stride;
- ref3 += ref_stride;
- }
-
- res[0] = horizontal_long_add_16x8(vec_sum_ref0_lo, vec_sum_ref0_hi);
- res[1] = horizontal_long_add_16x8(vec_sum_ref1_lo, vec_sum_ref1_hi);
- res[2] = horizontal_long_add_16x8(vec_sum_ref2_lo, vec_sum_ref2_hi);
- res[3] = horizontal_long_add_16x8(vec_sum_ref3_lo, vec_sum_ref3_hi);
-}
-
-void aom_sad32x32x4d_neon(const uint8_t *src, int src_stride,
- const uint8_t *const ref[4], int ref_stride,
- uint32_t *res) {
- int i;
- uint16x8_t vec_sum_ref0_lo = vdupq_n_u16(0);
- uint16x8_t vec_sum_ref0_hi = vdupq_n_u16(0);
- uint16x8_t vec_sum_ref1_lo = vdupq_n_u16(0);
- uint16x8_t vec_sum_ref1_hi = vdupq_n_u16(0);
- uint16x8_t vec_sum_ref2_lo = vdupq_n_u16(0);
- uint16x8_t vec_sum_ref2_hi = vdupq_n_u16(0);
- uint16x8_t vec_sum_ref3_lo = vdupq_n_u16(0);
- uint16x8_t vec_sum_ref3_hi = vdupq_n_u16(0);
- const uint8_t *ref0, *ref1, *ref2, *ref3;
- ref0 = ref[0];
- ref1 = ref[1];
- ref2 = ref[2];
- ref3 = ref[3];
-
- for (i = 0; i < 32; ++i) {
- const uint8x16_t vec_src_00 = vld1q_u8(src);
- const uint8x16_t vec_src_16 = vld1q_u8(src + 16);
-
- sad_neon_32(vec_src_00, vec_src_16, ref0, &vec_sum_ref0_lo,
- &vec_sum_ref0_hi);
- sad_neon_32(vec_src_00, vec_src_16, ref1, &vec_sum_ref1_lo,
- &vec_sum_ref1_hi);
- sad_neon_32(vec_src_00, vec_src_16, ref2, &vec_sum_ref2_lo,
- &vec_sum_ref2_hi);
- sad_neon_32(vec_src_00, vec_src_16, ref3, &vec_sum_ref3_lo,
- &vec_sum_ref3_hi);
-
- src += src_stride;
- ref0 += ref_stride;
- ref1 += ref_stride;
- ref2 += ref_stride;
- ref3 += ref_stride;
- }
-
- res[0] = horizontal_long_add_16x8(vec_sum_ref0_lo, vec_sum_ref0_hi);
- res[1] = horizontal_long_add_16x8(vec_sum_ref1_lo, vec_sum_ref1_hi);
- res[2] = horizontal_long_add_16x8(vec_sum_ref2_lo, vec_sum_ref2_hi);
- res[3] = horizontal_long_add_16x8(vec_sum_ref3_lo, vec_sum_ref3_hi);
-}
-
-void aom_sad16x16x4d_neon(const uint8_t *src, int src_stride,
- const uint8_t *const ref[4], int ref_stride,
- uint32_t *res) {
- int i;
- uint16x8_t vec_sum_ref0_lo = vdupq_n_u16(0);
- uint16x8_t vec_sum_ref0_hi = vdupq_n_u16(0);
- uint16x8_t vec_sum_ref1_lo = vdupq_n_u16(0);
- uint16x8_t vec_sum_ref1_hi = vdupq_n_u16(0);
- uint16x8_t vec_sum_ref2_lo = vdupq_n_u16(0);
- uint16x8_t vec_sum_ref2_hi = vdupq_n_u16(0);
- uint16x8_t vec_sum_ref3_lo = vdupq_n_u16(0);
- uint16x8_t vec_sum_ref3_hi = vdupq_n_u16(0);
- const uint8_t *ref0, *ref1, *ref2, *ref3;
- ref0 = ref[0];
- ref1 = ref[1];
- ref2 = ref[2];
- ref3 = ref[3];
-
- for (i = 0; i < 16; ++i) {
- const uint8x16_t vec_src = vld1q_u8(src);
- const uint8x16_t vec_ref0 = vld1q_u8(ref0);
- const uint8x16_t vec_ref1 = vld1q_u8(ref1);
- const uint8x16_t vec_ref2 = vld1q_u8(ref2);
- const uint8x16_t vec_ref3 = vld1q_u8(ref3);
-
- vec_sum_ref0_lo =
- vabal_u8(vec_sum_ref0_lo, vget_low_u8(vec_src), vget_low_u8(vec_ref0));
- vec_sum_ref0_hi = vabal_u8(vec_sum_ref0_hi, vget_high_u8(vec_src),
- vget_high_u8(vec_ref0));
- vec_sum_ref1_lo =
- vabal_u8(vec_sum_ref1_lo, vget_low_u8(vec_src), vget_low_u8(vec_ref1));
- vec_sum_ref1_hi = vabal_u8(vec_sum_ref1_hi, vget_high_u8(vec_src),
- vget_high_u8(vec_ref1));
- vec_sum_ref2_lo =
- vabal_u8(vec_sum_ref2_lo, vget_low_u8(vec_src), vget_low_u8(vec_ref2));
- vec_sum_ref2_hi = vabal_u8(vec_sum_ref2_hi, vget_high_u8(vec_src),
- vget_high_u8(vec_ref2));
- vec_sum_ref3_lo =
- vabal_u8(vec_sum_ref3_lo, vget_low_u8(vec_src), vget_low_u8(vec_ref3));
- vec_sum_ref3_hi = vabal_u8(vec_sum_ref3_hi, vget_high_u8(vec_src),
- vget_high_u8(vec_ref3));
-
- src += src_stride;
- ref0 += ref_stride;
- ref1 += ref_stride;
- ref2 += ref_stride;
- ref3 += ref_stride;
- }
-
- res[0] = horizontal_long_add_16x8(vec_sum_ref0_lo, vec_sum_ref0_hi);
- res[1] = horizontal_long_add_16x8(vec_sum_ref1_lo, vec_sum_ref1_hi);
- res[2] = horizontal_long_add_16x8(vec_sum_ref2_lo, vec_sum_ref2_hi);
- res[3] = horizontal_long_add_16x8(vec_sum_ref3_lo, vec_sum_ref3_hi);
-}
diff --git a/third_party/aom/aom_dsp/arm/sad_neon.c b/third_party/aom/aom_dsp/arm/sad_neon.c
deleted file mode 100644
index a39de91d6..000000000
--- a/third_party/aom/aom_dsp/arm/sad_neon.c
+++ /dev/null
@@ -1,224 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <arm_neon.h>
-
-#include "config/aom_config.h"
-
-#include "aom/aom_integer.h"
-
-unsigned int aom_sad8x16_neon(unsigned char *src_ptr, int src_stride,
- unsigned char *ref_ptr, int ref_stride) {
- uint8x8_t d0, d8;
- uint16x8_t q12;
- uint32x4_t q1;
- uint64x2_t q3;
- uint32x2_t d5;
- int i;
-
- d0 = vld1_u8(src_ptr);
- src_ptr += src_stride;
- d8 = vld1_u8(ref_ptr);
- ref_ptr += ref_stride;
- q12 = vabdl_u8(d0, d8);
-
- for (i = 0; i < 15; i++) {
- d0 = vld1_u8(src_ptr);
- src_ptr += src_stride;
- d8 = vld1_u8(ref_ptr);
- ref_ptr += ref_stride;
- q12 = vabal_u8(q12, d0, d8);
- }
-
- q1 = vpaddlq_u16(q12);
- q3 = vpaddlq_u32(q1);
- d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)),
- vreinterpret_u32_u64(vget_high_u64(q3)));
-
- return vget_lane_u32(d5, 0);
-}
-
-unsigned int aom_sad4x4_neon(unsigned char *src_ptr, int src_stride,
- unsigned char *ref_ptr, int ref_stride) {
- uint8x8_t d0, d8;
- uint16x8_t q12;
- uint32x2_t d1;
- uint64x1_t d3;
- int i;
-
- d0 = vld1_u8(src_ptr);
- src_ptr += src_stride;
- d8 = vld1_u8(ref_ptr);
- ref_ptr += ref_stride;
- q12 = vabdl_u8(d0, d8);
-
- for (i = 0; i < 3; i++) {
- d0 = vld1_u8(src_ptr);
- src_ptr += src_stride;
- d8 = vld1_u8(ref_ptr);
- ref_ptr += ref_stride;
- q12 = vabal_u8(q12, d0, d8);
- }
-
- d1 = vpaddl_u16(vget_low_u16(q12));
- d3 = vpaddl_u32(d1);
-
- return vget_lane_u32(vreinterpret_u32_u64(d3), 0);
-}
-
-unsigned int aom_sad16x8_neon(unsigned char *src_ptr, int src_stride,
- unsigned char *ref_ptr, int ref_stride) {
- uint8x16_t q0, q4;
- uint16x8_t q12, q13;
- uint32x4_t q1;
- uint64x2_t q3;
- uint32x2_t d5;
- int i;
-
- q0 = vld1q_u8(src_ptr);
- src_ptr += src_stride;
- q4 = vld1q_u8(ref_ptr);
- ref_ptr += ref_stride;
- q12 = vabdl_u8(vget_low_u8(q0), vget_low_u8(q4));
- q13 = vabdl_u8(vget_high_u8(q0), vget_high_u8(q4));
-
- for (i = 0; i < 7; i++) {
- q0 = vld1q_u8(src_ptr);
- src_ptr += src_stride;
- q4 = vld1q_u8(ref_ptr);
- ref_ptr += ref_stride;
- q12 = vabal_u8(q12, vget_low_u8(q0), vget_low_u8(q4));
- q13 = vabal_u8(q13, vget_high_u8(q0), vget_high_u8(q4));
- }
-
- q12 = vaddq_u16(q12, q13);
- q1 = vpaddlq_u16(q12);
- q3 = vpaddlq_u32(q1);
- d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)),
- vreinterpret_u32_u64(vget_high_u64(q3)));
-
- return vget_lane_u32(d5, 0);
-}
-
-static INLINE unsigned int horizontal_long_add_16x8(const uint16x8_t vec_lo,
- const uint16x8_t vec_hi) {
- const uint32x4_t vec_l_lo =
- vaddl_u16(vget_low_u16(vec_lo), vget_high_u16(vec_lo));
- const uint32x4_t vec_l_hi =
- vaddl_u16(vget_low_u16(vec_hi), vget_high_u16(vec_hi));
- const uint32x4_t a = vaddq_u32(vec_l_lo, vec_l_hi);
- const uint64x2_t b = vpaddlq_u32(a);
- const uint32x2_t c = vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)),
- vreinterpret_u32_u64(vget_high_u64(b)));
- return vget_lane_u32(c, 0);
-}
-static INLINE unsigned int horizontal_add_16x8(const uint16x8_t vec_16x8) {
- const uint32x4_t a = vpaddlq_u16(vec_16x8);
- const uint64x2_t b = vpaddlq_u32(a);
- const uint32x2_t c = vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)),
- vreinterpret_u32_u64(vget_high_u64(b)));
- return vget_lane_u32(c, 0);
-}
-
-unsigned int aom_sad64x64_neon(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride) {
- int i;
- uint16x8_t vec_accum_lo = vdupq_n_u16(0);
- uint16x8_t vec_accum_hi = vdupq_n_u16(0);
- for (i = 0; i < 64; ++i) {
- const uint8x16_t vec_src_00 = vld1q_u8(src);
- const uint8x16_t vec_src_16 = vld1q_u8(src + 16);
- const uint8x16_t vec_src_32 = vld1q_u8(src + 32);
- const uint8x16_t vec_src_48 = vld1q_u8(src + 48);
- const uint8x16_t vec_ref_00 = vld1q_u8(ref);
- const uint8x16_t vec_ref_16 = vld1q_u8(ref + 16);
- const uint8x16_t vec_ref_32 = vld1q_u8(ref + 32);
- const uint8x16_t vec_ref_48 = vld1q_u8(ref + 48);
- src += src_stride;
- ref += ref_stride;
- vec_accum_lo = vabal_u8(vec_accum_lo, vget_low_u8(vec_src_00),
- vget_low_u8(vec_ref_00));
- vec_accum_hi = vabal_u8(vec_accum_hi, vget_high_u8(vec_src_00),
- vget_high_u8(vec_ref_00));
- vec_accum_lo = vabal_u8(vec_accum_lo, vget_low_u8(vec_src_16),
- vget_low_u8(vec_ref_16));
- vec_accum_hi = vabal_u8(vec_accum_hi, vget_high_u8(vec_src_16),
- vget_high_u8(vec_ref_16));
- vec_accum_lo = vabal_u8(vec_accum_lo, vget_low_u8(vec_src_32),
- vget_low_u8(vec_ref_32));
- vec_accum_hi = vabal_u8(vec_accum_hi, vget_high_u8(vec_src_32),
- vget_high_u8(vec_ref_32));
- vec_accum_lo = vabal_u8(vec_accum_lo, vget_low_u8(vec_src_48),
- vget_low_u8(vec_ref_48));
- vec_accum_hi = vabal_u8(vec_accum_hi, vget_high_u8(vec_src_48),
- vget_high_u8(vec_ref_48));
- }
- return horizontal_long_add_16x8(vec_accum_lo, vec_accum_hi);
-}
-
-unsigned int aom_sad32x32_neon(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride) {
- int i;
- uint16x8_t vec_accum_lo = vdupq_n_u16(0);
- uint16x8_t vec_accum_hi = vdupq_n_u16(0);
-
- for (i = 0; i < 32; ++i) {
- const uint8x16_t vec_src_00 = vld1q_u8(src);
- const uint8x16_t vec_src_16 = vld1q_u8(src + 16);
- const uint8x16_t vec_ref_00 = vld1q_u8(ref);
- const uint8x16_t vec_ref_16 = vld1q_u8(ref + 16);
- src += src_stride;
- ref += ref_stride;
- vec_accum_lo = vabal_u8(vec_accum_lo, vget_low_u8(vec_src_00),
- vget_low_u8(vec_ref_00));
- vec_accum_hi = vabal_u8(vec_accum_hi, vget_high_u8(vec_src_00),
- vget_high_u8(vec_ref_00));
- vec_accum_lo = vabal_u8(vec_accum_lo, vget_low_u8(vec_src_16),
- vget_low_u8(vec_ref_16));
- vec_accum_hi = vabal_u8(vec_accum_hi, vget_high_u8(vec_src_16),
- vget_high_u8(vec_ref_16));
- }
- return horizontal_add_16x8(vaddq_u16(vec_accum_lo, vec_accum_hi));
-}
-
-unsigned int aom_sad16x16_neon(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride) {
- int i;
- uint16x8_t vec_accum_lo = vdupq_n_u16(0);
- uint16x8_t vec_accum_hi = vdupq_n_u16(0);
-
- for (i = 0; i < 16; ++i) {
- const uint8x16_t vec_src = vld1q_u8(src);
- const uint8x16_t vec_ref = vld1q_u8(ref);
- src += src_stride;
- ref += ref_stride;
- vec_accum_lo =
- vabal_u8(vec_accum_lo, vget_low_u8(vec_src), vget_low_u8(vec_ref));
- vec_accum_hi =
- vabal_u8(vec_accum_hi, vget_high_u8(vec_src), vget_high_u8(vec_ref));
- }
- return horizontal_add_16x8(vaddq_u16(vec_accum_lo, vec_accum_hi));
-}
-
-unsigned int aom_sad8x8_neon(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride) {
- int i;
- uint16x8_t vec_accum = vdupq_n_u16(0);
-
- for (i = 0; i < 8; ++i) {
- const uint8x8_t vec_src = vld1_u8(src);
- const uint8x8_t vec_ref = vld1_u8(ref);
- src += src_stride;
- ref += ref_stride;
- vec_accum = vabal_u8(vec_accum, vec_src, vec_ref);
- }
- return horizontal_add_16x8(vec_accum);
-}
diff --git a/third_party/aom/aom_dsp/arm/subpel_variance_neon.c b/third_party/aom/aom_dsp/arm/subpel_variance_neon.c
deleted file mode 100644
index cf618eee7..000000000
--- a/third_party/aom/aom_dsp/arm/subpel_variance_neon.c
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <arm_neon.h>
-
-#include "config/aom_dsp_rtcd.h"
-#include "config/aom_config.h"
-
-#include "aom_ports/mem.h"
-#include "aom/aom_integer.h"
-
-#include "aom_dsp/aom_filter.h"
-#include "aom_dsp/variance.h"
-
-static void var_filter_block2d_bil_w8(const uint8_t *src_ptr,
- uint8_t *output_ptr,
- unsigned int src_pixels_per_line,
- int pixel_step,
- unsigned int output_height,
- unsigned int output_width,
- const uint8_t *filter) {
- const uint8x8_t f0 = vmov_n_u8(filter[0]);
- const uint8x8_t f1 = vmov_n_u8(filter[1]);
- unsigned int i;
- for (i = 0; i < output_height; ++i) {
- const uint8x8_t src_0 = vld1_u8(&src_ptr[0]);
- const uint8x8_t src_1 = vld1_u8(&src_ptr[pixel_step]);
- const uint16x8_t a = vmull_u8(src_0, f0);
- const uint16x8_t b = vmlal_u8(a, src_1, f1);
- const uint8x8_t out = vrshrn_n_u16(b, FILTER_BITS);
- vst1_u8(&output_ptr[0], out);
- // Next row...
- src_ptr += src_pixels_per_line;
- output_ptr += output_width;
- }
-}
-
-static void var_filter_block2d_bil_w16(const uint8_t *src_ptr,
- uint8_t *output_ptr,
- unsigned int src_pixels_per_line,
- int pixel_step,
- unsigned int output_height,
- unsigned int output_width,
- const uint8_t *filter) {
- const uint8x8_t f0 = vmov_n_u8(filter[0]);
- const uint8x8_t f1 = vmov_n_u8(filter[1]);
- unsigned int i, j;
- for (i = 0; i < output_height; ++i) {
- for (j = 0; j < output_width; j += 16) {
- const uint8x16_t src_0 = vld1q_u8(&src_ptr[j]);
- const uint8x16_t src_1 = vld1q_u8(&src_ptr[j + pixel_step]);
- const uint16x8_t a = vmull_u8(vget_low_u8(src_0), f0);
- const uint16x8_t b = vmlal_u8(a, vget_low_u8(src_1), f1);
- const uint8x8_t out_lo = vrshrn_n_u16(b, FILTER_BITS);
- const uint16x8_t c = vmull_u8(vget_high_u8(src_0), f0);
- const uint16x8_t d = vmlal_u8(c, vget_high_u8(src_1), f1);
- const uint8x8_t out_hi = vrshrn_n_u16(d, FILTER_BITS);
- vst1q_u8(&output_ptr[j], vcombine_u8(out_lo, out_hi));
- }
- // Next row...
- src_ptr += src_pixels_per_line;
- output_ptr += output_width;
- }
-}
-
-unsigned int aom_sub_pixel_variance8x8_neon(const uint8_t *src, int src_stride,
- int xoffset, int yoffset,
- const uint8_t *dst, int dst_stride,
- unsigned int *sse) {
- DECLARE_ALIGNED(16, uint8_t, temp2[8 * 8]);
- DECLARE_ALIGNED(16, uint8_t, fdata3[9 * 8]);
-
- var_filter_block2d_bil_w8(src, fdata3, src_stride, 1, 9, 8,
- bilinear_filters_2t[xoffset]);
- var_filter_block2d_bil_w8(fdata3, temp2, 8, 8, 8, 8,
- bilinear_filters_2t[yoffset]);
- return aom_variance8x8_neon(temp2, 8, dst, dst_stride, sse);
-}
-
-unsigned int aom_sub_pixel_variance16x16_neon(const uint8_t *src,
- int src_stride, int xoffset,
- int yoffset, const uint8_t *dst,
- int dst_stride,
- unsigned int *sse) {
- DECLARE_ALIGNED(16, uint8_t, temp2[16 * 16]);
- DECLARE_ALIGNED(16, uint8_t, fdata3[17 * 16]);
-
- var_filter_block2d_bil_w16(src, fdata3, src_stride, 1, 17, 16,
- bilinear_filters_2t[xoffset]);
- var_filter_block2d_bil_w16(fdata3, temp2, 16, 16, 16, 16,
- bilinear_filters_2t[yoffset]);
- return aom_variance16x16_neon(temp2, 16, dst, dst_stride, sse);
-}
-
-unsigned int aom_sub_pixel_variance32x32_neon(const uint8_t *src,
- int src_stride, int xoffset,
- int yoffset, const uint8_t *dst,
- int dst_stride,
- unsigned int *sse) {
- DECLARE_ALIGNED(16, uint8_t, temp2[32 * 32]);
- DECLARE_ALIGNED(16, uint8_t, fdata3[33 * 32]);
-
- var_filter_block2d_bil_w16(src, fdata3, src_stride, 1, 33, 32,
- bilinear_filters_2t[xoffset]);
- var_filter_block2d_bil_w16(fdata3, temp2, 32, 32, 32, 32,
- bilinear_filters_2t[yoffset]);
- return aom_variance32x32_neon(temp2, 32, dst, dst_stride, sse);
-}
-
-unsigned int aom_sub_pixel_variance64x64_neon(const uint8_t *src,
- int src_stride, int xoffset,
- int yoffset, const uint8_t *dst,
- int dst_stride,
- unsigned int *sse) {
- DECLARE_ALIGNED(16, uint8_t, temp2[64 * 64]);
- DECLARE_ALIGNED(16, uint8_t, fdata3[65 * 64]);
-
- var_filter_block2d_bil_w16(src, fdata3, src_stride, 1, 65, 64,
- bilinear_filters_2t[xoffset]);
- var_filter_block2d_bil_w16(fdata3, temp2, 64, 64, 64, 64,
- bilinear_filters_2t[yoffset]);
- return aom_variance64x64_neon(temp2, 64, dst, dst_stride, sse);
-}
diff --git a/third_party/aom/aom_dsp/arm/subtract_neon.c b/third_party/aom/aom_dsp/arm/subtract_neon.c
deleted file mode 100644
index 28f5ace8e..000000000
--- a/third_party/aom/aom_dsp/arm/subtract_neon.c
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <arm_neon.h>
-
-#include "config/aom_config.h"
-
-#include "aom/aom_integer.h"
-
-void aom_subtract_block_neon(int rows, int cols, int16_t *diff,
- ptrdiff_t diff_stride, const uint8_t *src,
- ptrdiff_t src_stride, const uint8_t *pred,
- ptrdiff_t pred_stride) {
- int r, c;
-
- if (cols > 16) {
- for (r = 0; r < rows; ++r) {
- for (c = 0; c < cols; c += 32) {
- const uint8x16_t v_src_00 = vld1q_u8(&src[c + 0]);
- const uint8x16_t v_src_16 = vld1q_u8(&src[c + 16]);
- const uint8x16_t v_pred_00 = vld1q_u8(&pred[c + 0]);
- const uint8x16_t v_pred_16 = vld1q_u8(&pred[c + 16]);
- const uint16x8_t v_diff_lo_00 =
- vsubl_u8(vget_low_u8(v_src_00), vget_low_u8(v_pred_00));
- const uint16x8_t v_diff_hi_00 =
- vsubl_u8(vget_high_u8(v_src_00), vget_high_u8(v_pred_00));
- const uint16x8_t v_diff_lo_16 =
- vsubl_u8(vget_low_u8(v_src_16), vget_low_u8(v_pred_16));
- const uint16x8_t v_diff_hi_16 =
- vsubl_u8(vget_high_u8(v_src_16), vget_high_u8(v_pred_16));
- vst1q_s16(&diff[c + 0], vreinterpretq_s16_u16(v_diff_lo_00));
- vst1q_s16(&diff[c + 8], vreinterpretq_s16_u16(v_diff_hi_00));
- vst1q_s16(&diff[c + 16], vreinterpretq_s16_u16(v_diff_lo_16));
- vst1q_s16(&diff[c + 24], vreinterpretq_s16_u16(v_diff_hi_16));
- }
- diff += diff_stride;
- pred += pred_stride;
- src += src_stride;
- }
- } else if (cols > 8) {
- for (r = 0; r < rows; ++r) {
- const uint8x16_t v_src = vld1q_u8(&src[0]);
- const uint8x16_t v_pred = vld1q_u8(&pred[0]);
- const uint16x8_t v_diff_lo =
- vsubl_u8(vget_low_u8(v_src), vget_low_u8(v_pred));
- const uint16x8_t v_diff_hi =
- vsubl_u8(vget_high_u8(v_src), vget_high_u8(v_pred));
- vst1q_s16(&diff[0], vreinterpretq_s16_u16(v_diff_lo));
- vst1q_s16(&diff[8], vreinterpretq_s16_u16(v_diff_hi));
- diff += diff_stride;
- pred += pred_stride;
- src += src_stride;
- }
- } else if (cols > 4) {
- for (r = 0; r < rows; ++r) {
- const uint8x8_t v_src = vld1_u8(&src[0]);
- const uint8x8_t v_pred = vld1_u8(&pred[0]);
- const uint16x8_t v_diff = vsubl_u8(v_src, v_pred);
- vst1q_s16(&diff[0], vreinterpretq_s16_u16(v_diff));
- diff += diff_stride;
- pred += pred_stride;
- src += src_stride;
- }
- } else {
- for (r = 0; r < rows; ++r) {
- for (c = 0; c < cols; ++c) diff[c] = src[c] - pred[c];
-
- diff += diff_stride;
- pred += pred_stride;
- src += src_stride;
- }
- }
-}
diff --git a/third_party/aom/aom_dsp/arm/variance_neon.c b/third_party/aom/aom_dsp/arm/variance_neon.c
deleted file mode 100644
index 74385a601..000000000
--- a/third_party/aom/aom_dsp/arm/variance_neon.c
+++ /dev/null
@@ -1,400 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <arm_neon.h>
-
-#include "config/aom_dsp_rtcd.h"
-#include "config/aom_config.h"
-
-#include "aom/aom_integer.h"
-#include "aom_ports/mem.h"
-
-static INLINE int horizontal_add_s16x8(const int16x8_t v_16x8) {
- const int32x4_t a = vpaddlq_s16(v_16x8);
- const int64x2_t b = vpaddlq_s32(a);
- const int32x2_t c = vadd_s32(vreinterpret_s32_s64(vget_low_s64(b)),
- vreinterpret_s32_s64(vget_high_s64(b)));
- return vget_lane_s32(c, 0);
-}
-
-static INLINE int horizontal_add_s32x4(const int32x4_t v_32x4) {
- const int64x2_t b = vpaddlq_s32(v_32x4);
- const int32x2_t c = vadd_s32(vreinterpret_s32_s64(vget_low_s64(b)),
- vreinterpret_s32_s64(vget_high_s64(b)));
- return vget_lane_s32(c, 0);
-}
-
-// w * h must be less than 2048 or local variable v_sum may overflow.
-static void variance_neon_w8(const uint8_t *a, int a_stride, const uint8_t *b,
- int b_stride, int w, int h, uint32_t *sse,
- int *sum) {
- int i, j;
- int16x8_t v_sum = vdupq_n_s16(0);
- int32x4_t v_sse_lo = vdupq_n_s32(0);
- int32x4_t v_sse_hi = vdupq_n_s32(0);
-
- for (i = 0; i < h; ++i) {
- for (j = 0; j < w; j += 8) {
- const uint8x8_t v_a = vld1_u8(&a[j]);
- const uint8x8_t v_b = vld1_u8(&b[j]);
- const uint16x8_t v_diff = vsubl_u8(v_a, v_b);
- const int16x8_t sv_diff = vreinterpretq_s16_u16(v_diff);
- v_sum = vaddq_s16(v_sum, sv_diff);
- v_sse_lo =
- vmlal_s16(v_sse_lo, vget_low_s16(sv_diff), vget_low_s16(sv_diff));
- v_sse_hi =
- vmlal_s16(v_sse_hi, vget_high_s16(sv_diff), vget_high_s16(sv_diff));
- }
- a += a_stride;
- b += b_stride;
- }
-
- *sum = horizontal_add_s16x8(v_sum);
- *sse = (unsigned int)horizontal_add_s32x4(vaddq_s32(v_sse_lo, v_sse_hi));
-}
-
-void aom_get8x8var_neon(const uint8_t *a, int a_stride, const uint8_t *b,
- int b_stride, unsigned int *sse, int *sum) {
- variance_neon_w8(a, a_stride, b, b_stride, 8, 8, sse, sum);
-}
-
-void aom_get16x16var_neon(const uint8_t *a, int a_stride, const uint8_t *b,
- int b_stride, unsigned int *sse, int *sum) {
- variance_neon_w8(a, a_stride, b, b_stride, 16, 16, sse, sum);
-}
-
-unsigned int aom_variance8x8_neon(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride,
- unsigned int *sse) {
- int sum;
- variance_neon_w8(a, a_stride, b, b_stride, 8, 8, sse, &sum);
- return *sse - ((sum * sum) >> 6);
-}
-
-unsigned int aom_variance16x16_neon(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride,
- unsigned int *sse) {
- int sum;
- variance_neon_w8(a, a_stride, b, b_stride, 16, 16, sse, &sum);
- return *sse - (((unsigned int)((int64_t)sum * sum)) >> 8);
-}
-
-unsigned int aom_variance32x32_neon(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride,
- unsigned int *sse) {
- int sum;
- variance_neon_w8(a, a_stride, b, b_stride, 32, 32, sse, &sum);
- return *sse - (unsigned int)(((int64_t)sum * sum) >> 10);
-}
-
-unsigned int aom_variance32x64_neon(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride,
- unsigned int *sse) {
- int sum1, sum2;
- uint32_t sse1, sse2;
- variance_neon_w8(a, a_stride, b, b_stride, 32, 32, &sse1, &sum1);
- variance_neon_w8(a + (32 * a_stride), a_stride, b + (32 * b_stride), b_stride,
- 32, 32, &sse2, &sum2);
- *sse = sse1 + sse2;
- sum1 += sum2;
- return *sse - (unsigned int)(((int64_t)sum1 * sum1) >> 11);
-}
-
-unsigned int aom_variance64x32_neon(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride,
- unsigned int *sse) {
- int sum1, sum2;
- uint32_t sse1, sse2;
- variance_neon_w8(a, a_stride, b, b_stride, 64, 16, &sse1, &sum1);
- variance_neon_w8(a + (16 * a_stride), a_stride, b + (16 * b_stride), b_stride,
- 64, 16, &sse2, &sum2);
- *sse = sse1 + sse2;
- sum1 += sum2;
- return *sse - (unsigned int)(((int64_t)sum1 * sum1) >> 11);
-}
-
-unsigned int aom_variance64x64_neon(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride,
- unsigned int *sse) {
- int sum1, sum2;
- uint32_t sse1, sse2;
-
- variance_neon_w8(a, a_stride, b, b_stride, 64, 16, &sse1, &sum1);
- variance_neon_w8(a + (16 * a_stride), a_stride, b + (16 * b_stride), b_stride,
- 64, 16, &sse2, &sum2);
- sse1 += sse2;
- sum1 += sum2;
-
- variance_neon_w8(a + (16 * 2 * a_stride), a_stride, b + (16 * 2 * b_stride),
- b_stride, 64, 16, &sse2, &sum2);
- sse1 += sse2;
- sum1 += sum2;
-
- variance_neon_w8(a + (16 * 3 * a_stride), a_stride, b + (16 * 3 * b_stride),
- b_stride, 64, 16, &sse2, &sum2);
- *sse = sse1 + sse2;
- sum1 += sum2;
- return *sse - (unsigned int)(((int64_t)sum1 * sum1) >> 12);
-}
-
-unsigned int aom_variance16x8_neon(const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride, unsigned int *sse) {
- int i;
- int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16;
- uint32x2_t d0u32, d10u32;
- int64x1_t d0s64, d1s64;
- uint8x16_t q0u8, q1u8, q2u8, q3u8;
- uint16x8_t q11u16, q12u16, q13u16, q14u16;
- int32x4_t q8s32, q9s32, q10s32;
- int64x2_t q0s64, q1s64, q5s64;
-
- q8s32 = vdupq_n_s32(0);
- q9s32 = vdupq_n_s32(0);
- q10s32 = vdupq_n_s32(0);
-
- for (i = 0; i < 4; i++) {
- q0u8 = vld1q_u8(src_ptr);
- src_ptr += source_stride;
- q1u8 = vld1q_u8(src_ptr);
- src_ptr += source_stride;
- __builtin_prefetch(src_ptr);
-
- q2u8 = vld1q_u8(ref_ptr);
- ref_ptr += recon_stride;
- q3u8 = vld1q_u8(ref_ptr);
- ref_ptr += recon_stride;
- __builtin_prefetch(ref_ptr);
-
- q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8));
- q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8));
- q13u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q3u8));
- q14u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q3u8));
-
- d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
- d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
- q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16));
- q9s32 = vmlal_s16(q9s32, d22s16, d22s16);
- q10s32 = vmlal_s16(q10s32, d23s16, d23s16);
-
- d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
- d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
- q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16));
- q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
- q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
-
- d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
- d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
- q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q13u16));
- q9s32 = vmlal_s16(q9s32, d26s16, d26s16);
- q10s32 = vmlal_s16(q10s32, d27s16, d27s16);
-
- d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16));
- d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16));
- q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q14u16));
- q9s32 = vmlal_s16(q9s32, d28s16, d28s16);
- q10s32 = vmlal_s16(q10s32, d29s16, d29s16);
- }
-
- q10s32 = vaddq_s32(q10s32, q9s32);
- q0s64 = vpaddlq_s32(q8s32);
- q1s64 = vpaddlq_s32(q10s32);
-
- d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64));
- d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
-
- q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64), vreinterpret_s32_s64(d0s64));
- vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0);
-
- d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 7);
- d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32);
-
- return vget_lane_u32(d0u32, 0);
-}
-
-unsigned int aom_variance8x16_neon(const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride, unsigned int *sse) {
- int i;
- uint8x8_t d0u8, d2u8, d4u8, d6u8;
- int16x4_t d22s16, d23s16, d24s16, d25s16;
- uint32x2_t d0u32, d10u32;
- int64x1_t d0s64, d1s64;
- uint16x8_t q11u16, q12u16;
- int32x4_t q8s32, q9s32, q10s32;
- int64x2_t q0s64, q1s64, q5s64;
-
- q8s32 = vdupq_n_s32(0);
- q9s32 = vdupq_n_s32(0);
- q10s32 = vdupq_n_s32(0);
-
- for (i = 0; i < 8; i++) {
- d0u8 = vld1_u8(src_ptr);
- src_ptr += source_stride;
- d2u8 = vld1_u8(src_ptr);
- src_ptr += source_stride;
- __builtin_prefetch(src_ptr);
-
- d4u8 = vld1_u8(ref_ptr);
- ref_ptr += recon_stride;
- d6u8 = vld1_u8(ref_ptr);
- ref_ptr += recon_stride;
- __builtin_prefetch(ref_ptr);
-
- q11u16 = vsubl_u8(d0u8, d4u8);
- q12u16 = vsubl_u8(d2u8, d6u8);
-
- d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
- d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
- q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16));
- q9s32 = vmlal_s16(q9s32, d22s16, d22s16);
- q10s32 = vmlal_s16(q10s32, d23s16, d23s16);
-
- d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
- d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
- q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16));
- q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
- q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
- }
-
- q10s32 = vaddq_s32(q10s32, q9s32);
- q0s64 = vpaddlq_s32(q8s32);
- q1s64 = vpaddlq_s32(q10s32);
-
- d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64));
- d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
-
- q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64), vreinterpret_s32_s64(d0s64));
- vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0);
-
- d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 7);
- d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32);
-
- return vget_lane_u32(d0u32, 0);
-}
-
-unsigned int aom_mse16x16_neon(const unsigned char *src_ptr, int source_stride,
- const unsigned char *ref_ptr, int recon_stride,
- unsigned int *sse) {
- int i;
- int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16;
- int64x1_t d0s64;
- uint8x16_t q0u8, q1u8, q2u8, q3u8;
- int32x4_t q7s32, q8s32, q9s32, q10s32;
- uint16x8_t q11u16, q12u16, q13u16, q14u16;
- int64x2_t q1s64;
-
- q7s32 = vdupq_n_s32(0);
- q8s32 = vdupq_n_s32(0);
- q9s32 = vdupq_n_s32(0);
- q10s32 = vdupq_n_s32(0);
-
- for (i = 0; i < 8; i++) { // mse16x16_neon_loop
- q0u8 = vld1q_u8(src_ptr);
- src_ptr += source_stride;
- q1u8 = vld1q_u8(src_ptr);
- src_ptr += source_stride;
- q2u8 = vld1q_u8(ref_ptr);
- ref_ptr += recon_stride;
- q3u8 = vld1q_u8(ref_ptr);
- ref_ptr += recon_stride;
-
- q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8));
- q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8));
- q13u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q3u8));
- q14u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q3u8));
-
- d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
- d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
- q7s32 = vmlal_s16(q7s32, d22s16, d22s16);
- q8s32 = vmlal_s16(q8s32, d23s16, d23s16);
-
- d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
- d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
- q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
- q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
-
- d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
- d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
- q7s32 = vmlal_s16(q7s32, d26s16, d26s16);
- q8s32 = vmlal_s16(q8s32, d27s16, d27s16);
-
- d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16));
- d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16));
- q9s32 = vmlal_s16(q9s32, d28s16, d28s16);
- q10s32 = vmlal_s16(q10s32, d29s16, d29s16);
- }
-
- q7s32 = vaddq_s32(q7s32, q8s32);
- q9s32 = vaddq_s32(q9s32, q10s32);
- q10s32 = vaddq_s32(q7s32, q9s32);
-
- q1s64 = vpaddlq_s32(q10s32);
- d0s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
-
- vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d0s64), 0);
- return vget_lane_u32(vreinterpret_u32_s64(d0s64), 0);
-}
-
-unsigned int aom_get4x4sse_cs_neon(const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride) {
- int16x4_t d22s16, d24s16, d26s16, d28s16;
- int64x1_t d0s64;
- uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;
- int32x4_t q7s32, q8s32, q9s32, q10s32;
- uint16x8_t q11u16, q12u16, q13u16, q14u16;
- int64x2_t q1s64;
-
- d0u8 = vld1_u8(src_ptr);
- src_ptr += source_stride;
- d4u8 = vld1_u8(ref_ptr);
- ref_ptr += recon_stride;
- d1u8 = vld1_u8(src_ptr);
- src_ptr += source_stride;
- d5u8 = vld1_u8(ref_ptr);
- ref_ptr += recon_stride;
- d2u8 = vld1_u8(src_ptr);
- src_ptr += source_stride;
- d6u8 = vld1_u8(ref_ptr);
- ref_ptr += recon_stride;
- d3u8 = vld1_u8(src_ptr);
- src_ptr += source_stride;
- d7u8 = vld1_u8(ref_ptr);
- ref_ptr += recon_stride;
-
- q11u16 = vsubl_u8(d0u8, d4u8);
- q12u16 = vsubl_u8(d1u8, d5u8);
- q13u16 = vsubl_u8(d2u8, d6u8);
- q14u16 = vsubl_u8(d3u8, d7u8);
-
- d22s16 = vget_low_s16(vreinterpretq_s16_u16(q11u16));
- d24s16 = vget_low_s16(vreinterpretq_s16_u16(q12u16));
- d26s16 = vget_low_s16(vreinterpretq_s16_u16(q13u16));
- d28s16 = vget_low_s16(vreinterpretq_s16_u16(q14u16));
-
- q7s32 = vmull_s16(d22s16, d22s16);
- q8s32 = vmull_s16(d24s16, d24s16);
- q9s32 = vmull_s16(d26s16, d26s16);
- q10s32 = vmull_s16(d28s16, d28s16);
-
- q7s32 = vaddq_s32(q7s32, q8s32);
- q9s32 = vaddq_s32(q9s32, q10s32);
- q9s32 = vaddq_s32(q7s32, q9s32);
-
- q1s64 = vpaddlq_s32(q9s32);
- d0s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
-
- return vget_lane_u32(vreinterpret_u32_s64(d0s64), 0);
-}
diff --git a/third_party/aom/aom_dsp/binary_codes_reader.c b/third_party/aom/aom_dsp/binary_codes_reader.c
deleted file mode 100644
index 01088010a..000000000
--- a/third_party/aom/aom_dsp/binary_codes_reader.c
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "aom_dsp/binary_codes_reader.h"
-
-#include "av1/common/common.h"
-
-// Inverse recenters a non-negative literal v around a reference r
-static uint16_t inv_recenter_nonneg(uint16_t r, uint16_t v) {
- if (v > (r << 1))
- return v;
- else if ((v & 1) == 0)
- return (v >> 1) + r;
- else
- return r - ((v + 1) >> 1);
-}
-
-// Inverse recenters a non-negative literal v in [0, n-1] around a
-// reference r also in [0, n-1]
-static uint16_t inv_recenter_finite_nonneg(uint16_t n, uint16_t r, uint16_t v) {
- if ((r << 1) <= n) {
- return inv_recenter_nonneg(r, v);
- } else {
- return n - 1 - inv_recenter_nonneg(n - 1 - r, v);
- }
-}
-
-uint16_t aom_read_primitive_quniform_(aom_reader *r,
- uint16_t n ACCT_STR_PARAM) {
- if (n <= 1) return 0;
- const int l = get_msb(n) + 1;
- const int m = (1 << l) - n;
- const int v = aom_read_literal(r, l - 1, ACCT_STR_NAME);
- return v < m ? v : (v << 1) - m + aom_read_bit(r, ACCT_STR_NAME);
-}
-
-static uint16_t aom_rb_read_primitive_quniform(struct aom_read_bit_buffer *rb,
- uint16_t n) {
- if (n <= 1) return 0;
- const int l = get_msb(n) + 1;
- const int m = (1 << l) - n;
- const int v = aom_rb_read_literal(rb, l - 1);
- return v < m ? v : (v << 1) - m + aom_rb_read_bit(rb);
-}
-
-// Decode finite subexponential code that for a symbol v in [0, n-1] with
-// parameter k
-uint16_t aom_read_primitive_subexpfin_(aom_reader *r, uint16_t n,
- uint16_t k ACCT_STR_PARAM) {
- int i = 0;
- int mk = 0;
-
- while (1) {
- int b = (i ? k + i - 1 : k);
- int a = (1 << b);
-
- if (n <= mk + 3 * a) {
- return aom_read_primitive_quniform(r, n - mk, ACCT_STR_NAME) + mk;
- }
-
- if (!aom_read_bit(r, ACCT_STR_NAME)) {
- return aom_read_literal(r, b, ACCT_STR_NAME) + mk;
- }
-
- i = i + 1;
- mk += a;
- }
-
- assert(0);
- return 0;
-}
-
-static uint16_t aom_rb_read_primitive_subexpfin(struct aom_read_bit_buffer *rb,
- uint16_t n, uint16_t k) {
- int i = 0;
- int mk = 0;
-
- while (1) {
- int b = (i ? k + i - 1 : k);
- int a = (1 << b);
-
- if (n <= mk + 3 * a) {
- return aom_rb_read_primitive_quniform(rb, n - mk) + mk;
- }
-
- if (!aom_rb_read_bit(rb)) {
- return aom_rb_read_literal(rb, b) + mk;
- }
-
- i = i + 1;
- mk += a;
- }
-
- assert(0);
- return 0;
-}
-
-uint16_t aom_read_primitive_refsubexpfin_(aom_reader *r, uint16_t n, uint16_t k,
- uint16_t ref ACCT_STR_PARAM) {
- return inv_recenter_finite_nonneg(
- n, ref, aom_read_primitive_subexpfin(r, n, k, ACCT_STR_NAME));
-}
-
-static uint16_t aom_rb_read_primitive_refsubexpfin(
- struct aom_read_bit_buffer *rb, uint16_t n, uint16_t k, uint16_t ref) {
- return inv_recenter_finite_nonneg(n, ref,
- aom_rb_read_primitive_subexpfin(rb, n, k));
-}
-
-int16_t aom_rb_read_signed_primitive_refsubexpfin(
- struct aom_read_bit_buffer *rb, uint16_t n, uint16_t k, int16_t ref) {
- ref += n - 1;
- const uint16_t scaled_n = (n << 1) - 1;
- return aom_rb_read_primitive_refsubexpfin(rb, scaled_n, k, ref) - n + 1;
-}
diff --git a/third_party/aom/aom_dsp/binary_codes_reader.h b/third_party/aom/aom_dsp/binary_codes_reader.h
deleted file mode 100644
index 364a67469..000000000
--- a/third_party/aom/aom_dsp/binary_codes_reader.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_BINARY_CODES_READER_H_
-#define AOM_AOM_DSP_BINARY_CODES_READER_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <assert.h>
-
-#include "config/aom_config.h"
-
-#include "aom/aom_integer.h"
-#include "aom_dsp/bitreader.h"
-#include "aom_dsp/bitreader_buffer.h"
-
-#define aom_read_primitive_quniform(r, n, ACCT_STR_NAME) \
- aom_read_primitive_quniform_(r, n ACCT_STR_ARG(ACCT_STR_NAME))
-#define aom_read_primitive_subexpfin(r, n, k, ACCT_STR_NAME) \
- aom_read_primitive_subexpfin_(r, n, k ACCT_STR_ARG(ACCT_STR_NAME))
-#define aom_read_primitive_refsubexpfin(r, n, k, ref, ACCT_STR_NAME) \
- aom_read_primitive_refsubexpfin_(r, n, k, ref ACCT_STR_ARG(ACCT_STR_NAME))
-
-uint16_t aom_read_primitive_quniform_(aom_reader *r, uint16_t n ACCT_STR_PARAM);
-uint16_t aom_read_primitive_subexpfin_(aom_reader *r, uint16_t n,
- uint16_t k ACCT_STR_PARAM);
-uint16_t aom_read_primitive_refsubexpfin_(aom_reader *r, uint16_t n, uint16_t k,
- uint16_t ref ACCT_STR_PARAM);
-
-int16_t aom_rb_read_signed_primitive_refsubexpfin(
- struct aom_read_bit_buffer *rb, uint16_t n, uint16_t k, int16_t ref);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AOM_DSP_BINARY_CODES_READER_H_
diff --git a/third_party/aom/aom_dsp/binary_codes_writer.c b/third_party/aom/aom_dsp/binary_codes_writer.c
deleted file mode 100644
index ee7a9f567..000000000
--- a/third_party/aom/aom_dsp/binary_codes_writer.c
+++ /dev/null
@@ -1,210 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "aom_dsp/bitwriter.h"
-#include "aom_dsp/binary_codes_writer.h"
-
-#include "av1/common/common.h"
-
-// Recenters a non-negative literal v around a reference r
-static uint16_t recenter_nonneg(uint16_t r, uint16_t v) {
- if (v > (r << 1))
- return v;
- else if (v >= r)
- return ((v - r) << 1);
- else
- return ((r - v) << 1) - 1;
-}
-
-// Recenters a non-negative literal v in [0, n-1] around a
-// reference r also in [0, n-1]
-static uint16_t recenter_finite_nonneg(uint16_t n, uint16_t r, uint16_t v) {
- if ((r << 1) <= n) {
- return recenter_nonneg(r, v);
- } else {
- return recenter_nonneg(n - 1 - r, n - 1 - v);
- }
-}
-
-// Codes a symbol v in [-2^mag_bits, 2^mag_bits].
-// mag_bits is number of bits for magnitude. The alphabet is of size
-// 2 * 2^mag_bits + 1, symmetric around 0, where one bit is used to
-// indicate 0 or non-zero, mag_bits bits are used to indicate magnitide
-// and 1 more bit for the sign if non-zero.
-void aom_write_primitive_symmetric(aom_writer *w, int16_t v,
- unsigned int abs_bits) {
- if (v == 0) {
- aom_write_bit(w, 0);
- } else {
- const int x = abs(v);
- const int s = v < 0;
- aom_write_bit(w, 1);
- aom_write_bit(w, s);
- aom_write_literal(w, x - 1, abs_bits);
- }
-}
-
-int aom_count_primitive_symmetric(int16_t v, unsigned int abs_bits) {
- return (v == 0 ? 1 : abs_bits + 2);
-}
-
-// Encodes a value v in [0, n-1] quasi-uniformly
-void aom_write_primitive_quniform(aom_writer *w, uint16_t n, uint16_t v) {
- if (n <= 1) return;
- const int l = get_msb(n) + 1;
- const int m = (1 << l) - n;
- if (v < m) {
- aom_write_literal(w, v, l - 1);
- } else {
- aom_write_literal(w, m + ((v - m) >> 1), l - 1);
- aom_write_bit(w, (v - m) & 1);
- }
-}
-
-static void aom_wb_write_primitive_quniform(struct aom_write_bit_buffer *wb,
- uint16_t n, uint16_t v) {
- if (n <= 1) return;
- const int l = get_msb(n) + 1;
- const int m = (1 << l) - n;
- if (v < m) {
- aom_wb_write_literal(wb, v, l - 1);
- } else {
- aom_wb_write_literal(wb, m + ((v - m) >> 1), l - 1);
- aom_wb_write_bit(wb, (v - m) & 1);
- }
-}
-
-int aom_count_primitive_quniform(uint16_t n, uint16_t v) {
- if (n <= 1) return 0;
- const int l = get_msb(n) + 1;
- const int m = (1 << l) - n;
- return v < m ? l - 1 : l;
-}
-
-// Finite subexponential code that codes a symbol v in [0, n-1] with parameter k
-void aom_write_primitive_subexpfin(aom_writer *w, uint16_t n, uint16_t k,
- uint16_t v) {
- int i = 0;
- int mk = 0;
- while (1) {
- int b = (i ? k + i - 1 : k);
- int a = (1 << b);
- if (n <= mk + 3 * a) {
- aom_write_primitive_quniform(w, n - mk, v - mk);
- break;
- } else {
- int t = (v >= mk + a);
- aom_write_bit(w, t);
- if (t) {
- i = i + 1;
- mk += a;
- } else {
- aom_write_literal(w, v - mk, b);
- break;
- }
- }
- }
-}
-
-static void aom_wb_write_primitive_subexpfin(struct aom_write_bit_buffer *wb,
- uint16_t n, uint16_t k,
- uint16_t v) {
- int i = 0;
- int mk = 0;
- while (1) {
- int b = (i ? k + i - 1 : k);
- int a = (1 << b);
- if (n <= mk + 3 * a) {
- aom_wb_write_primitive_quniform(wb, n - mk, v - mk);
- break;
- } else {
- int t = (v >= mk + a);
- aom_wb_write_bit(wb, t);
- if (t) {
- i = i + 1;
- mk += a;
- } else {
- aom_wb_write_literal(wb, v - mk, b);
- break;
- }
- }
- }
-}
-
-int aom_count_primitive_subexpfin(uint16_t n, uint16_t k, uint16_t v) {
- int count = 0;
- int i = 0;
- int mk = 0;
- while (1) {
- int b = (i ? k + i - 1 : k);
- int a = (1 << b);
- if (n <= mk + 3 * a) {
- count += aom_count_primitive_quniform(n - mk, v - mk);
- break;
- } else {
- int t = (v >= mk + a);
- count++;
- if (t) {
- i = i + 1;
- mk += a;
- } else {
- count += b;
- break;
- }
- }
- }
- return count;
-}
-
-// Finite subexponential code that codes a symbol v in [0, n-1] with parameter k
-// based on a reference ref also in [0, n-1].
-// Recenters symbol around r first and then uses a finite subexponential code.
-void aom_write_primitive_refsubexpfin(aom_writer *w, uint16_t n, uint16_t k,
- uint16_t ref, uint16_t v) {
- aom_write_primitive_subexpfin(w, n, k, recenter_finite_nonneg(n, ref, v));
-}
-
-static void aom_wb_write_primitive_refsubexpfin(struct aom_write_bit_buffer *wb,
- uint16_t n, uint16_t k,
- uint16_t ref, uint16_t v) {
- aom_wb_write_primitive_subexpfin(wb, n, k, recenter_finite_nonneg(n, ref, v));
-}
-
-void aom_write_signed_primitive_refsubexpfin(aom_writer *w, uint16_t n,
- uint16_t k, int16_t ref,
- int16_t v) {
- ref += n - 1;
- v += n - 1;
- const uint16_t scaled_n = (n << 1) - 1;
- aom_write_primitive_refsubexpfin(w, scaled_n, k, ref, v);
-}
-
-void aom_wb_write_signed_primitive_refsubexpfin(struct aom_write_bit_buffer *wb,
- uint16_t n, uint16_t k,
- int16_t ref, int16_t v) {
- ref += n - 1;
- v += n - 1;
- const uint16_t scaled_n = (n << 1) - 1;
- aom_wb_write_primitive_refsubexpfin(wb, scaled_n, k, ref, v);
-}
-
-int aom_count_primitive_refsubexpfin(uint16_t n, uint16_t k, uint16_t ref,
- uint16_t v) {
- return aom_count_primitive_subexpfin(n, k, recenter_finite_nonneg(n, ref, v));
-}
-
-int aom_count_signed_primitive_refsubexpfin(uint16_t n, uint16_t k, int16_t ref,
- int16_t v) {
- ref += n - 1;
- v += n - 1;
- const uint16_t scaled_n = (n << 1) - 1;
- return aom_count_primitive_refsubexpfin(scaled_n, k, ref, v);
-}
diff --git a/third_party/aom/aom_dsp/binary_codes_writer.h b/third_party/aom/aom_dsp/binary_codes_writer.h
deleted file mode 100644
index c360e0e29..000000000
--- a/third_party/aom/aom_dsp/binary_codes_writer.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_BINARY_CODES_WRITER_H_
-#define AOM_AOM_DSP_BINARY_CODES_WRITER_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <assert.h>
-#include "config/aom_config.h"
-
-#include "aom/aom_integer.h"
-#include "aom_dsp/bitwriter.h"
-#include "aom_dsp/bitwriter_buffer.h"
-
-// Codes a symbol v in [-2^mag_bits, 2^mag_bits]
-// mag_bits is number of bits for magnitude. The alphabet is of size
-// 2 * 2^mag_bits + 1, symmetric around 0, where one bit is used to
-// indicate 0 or non-zero, mag_bits bits are used to indicate magnitide
-// and 1 more bit for the sign if non-zero.
-void aom_write_primitive_symmetric(aom_writer *w, int16_t v,
- unsigned int mag_bits);
-
-// Encodes a value v in [0, n-1] quasi-uniformly
-void aom_write_primitive_quniform(aom_writer *w, uint16_t n, uint16_t v);
-
-// Finite subexponential code that codes a symbol v in [0, n-1] with parameter k
-void aom_write_primitive_subexpfin(aom_writer *w, uint16_t n, uint16_t k,
- uint16_t v);
-
-// Finite subexponential code that codes a symbol v in [0, n-1] with parameter k
-// based on a reference ref also in [0, n-1].
-void aom_write_primitive_refsubexpfin(aom_writer *w, uint16_t n, uint16_t k,
- uint16_t ref, uint16_t v);
-
-// Finite subexponential code that codes a symbol v in [-(n-1), n-1] with
-// parameter k based on a reference ref also in [-(n-1), n-1].
-void aom_write_signed_primitive_refsubexpfin(aom_writer *w, uint16_t n,
- uint16_t k, int16_t ref,
- int16_t v);
-
-void aom_wb_write_signed_primitive_refsubexpfin(struct aom_write_bit_buffer *wb,
- uint16_t n, uint16_t k,
- int16_t ref, int16_t v);
-
-// Functions that counts bits for the above primitives
-int aom_count_primitive_symmetric(int16_t v, unsigned int mag_bits);
-int aom_count_primitive_quniform(uint16_t n, uint16_t v);
-int aom_count_primitive_subexpfin(uint16_t n, uint16_t k, uint16_t v);
-int aom_count_primitive_refsubexpfin(uint16_t n, uint16_t k, uint16_t ref,
- uint16_t v);
-int aom_count_signed_primitive_refsubexpfin(uint16_t n, uint16_t k, int16_t ref,
- int16_t v);
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AOM_DSP_BINARY_CODES_WRITER_H_
diff --git a/third_party/aom/aom_dsp/bitreader.h b/third_party/aom/aom_dsp/bitreader.h
deleted file mode 100644
index 7c0efcc78..000000000
--- a/third_party/aom/aom_dsp/bitreader.h
+++ /dev/null
@@ -1,160 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_BITREADER_H_
-#define AOM_AOM_DSP_BITREADER_H_
-
-#include <assert.h>
-#include <limits.h>
-
-#include "config/aom_config.h"
-
-#include "aom/aomdx.h"
-#include "aom/aom_integer.h"
-#include "aom_dsp/daalaboolreader.h"
-#include "aom_dsp/prob.h"
-#include "av1/common/odintrin.h"
-
-#if CONFIG_ACCOUNTING
-#include "av1/decoder/accounting.h"
-#define ACCT_STR_NAME acct_str
-#define ACCT_STR_PARAM , const char *ACCT_STR_NAME
-#define ACCT_STR_ARG(s) , s
-#else
-#define ACCT_STR_PARAM
-#define ACCT_STR_ARG(s)
-#endif
-
-#define aom_read(r, prob, ACCT_STR_NAME) \
- aom_read_(r, prob ACCT_STR_ARG(ACCT_STR_NAME))
-#define aom_read_bit(r, ACCT_STR_NAME) \
- aom_read_bit_(r ACCT_STR_ARG(ACCT_STR_NAME))
-#define aom_read_tree(r, tree, probs, ACCT_STR_NAME) \
- aom_read_tree_(r, tree, probs ACCT_STR_ARG(ACCT_STR_NAME))
-#define aom_read_literal(r, bits, ACCT_STR_NAME) \
- aom_read_literal_(r, bits ACCT_STR_ARG(ACCT_STR_NAME))
-#define aom_read_cdf(r, cdf, nsymbs, ACCT_STR_NAME) \
- aom_read_cdf_(r, cdf, nsymbs ACCT_STR_ARG(ACCT_STR_NAME))
-#define aom_read_symbol(r, cdf, nsymbs, ACCT_STR_NAME) \
- aom_read_symbol_(r, cdf, nsymbs ACCT_STR_ARG(ACCT_STR_NAME))
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct daala_reader aom_reader;
-
-static INLINE int aom_reader_init(aom_reader *r, const uint8_t *buffer,
- size_t size) {
- return aom_daala_reader_init(r, buffer, (int)size);
-}
-
-static INLINE const uint8_t *aom_reader_find_begin(aom_reader *r) {
- return aom_daala_reader_find_begin(r);
-}
-
-static INLINE const uint8_t *aom_reader_find_end(aom_reader *r) {
- return aom_daala_reader_find_end(r);
-}
-
-static INLINE int aom_reader_has_error(aom_reader *r) {
- return aom_daala_reader_has_error(r);
-}
-
-// Returns true if the bit reader has tried to decode more data from the buffer
-// than was actually provided.
-static INLINE int aom_reader_has_overflowed(const aom_reader *r) {
- return aom_daala_reader_has_overflowed(r);
-}
-
-// Returns the position in the bit reader in bits.
-static INLINE uint32_t aom_reader_tell(const aom_reader *r) {
- return aom_daala_reader_tell(r);
-}
-
-// Returns the position in the bit reader in 1/8th bits.
-static INLINE uint32_t aom_reader_tell_frac(const aom_reader *r) {
- return aom_daala_reader_tell_frac(r);
-}
-
-#if CONFIG_ACCOUNTING
-static INLINE void aom_process_accounting(const aom_reader *r ACCT_STR_PARAM) {
- if (r->accounting != NULL) {
- uint32_t tell_frac;
- tell_frac = aom_reader_tell_frac(r);
- aom_accounting_record(r->accounting, ACCT_STR_NAME,
- tell_frac - r->accounting->last_tell_frac);
- r->accounting->last_tell_frac = tell_frac;
- }
-}
-
-static INLINE void aom_update_symb_counts(const aom_reader *r, int is_binary) {
- if (r->accounting != NULL) {
- r->accounting->syms.num_multi_syms += !is_binary;
- r->accounting->syms.num_binary_syms += !!is_binary;
- }
-}
-#endif
-
-static INLINE int aom_read_(aom_reader *r, int prob ACCT_STR_PARAM) {
- int ret;
- ret = aom_daala_read(r, prob);
-#if CONFIG_ACCOUNTING
- if (ACCT_STR_NAME) aom_process_accounting(r, ACCT_STR_NAME);
- aom_update_symb_counts(r, 1);
-#endif
- return ret;
-}
-
-static INLINE int aom_read_bit_(aom_reader *r ACCT_STR_PARAM) {
- int ret;
- ret = aom_read(r, 128, NULL); // aom_prob_half
-#if CONFIG_ACCOUNTING
- if (ACCT_STR_NAME) aom_process_accounting(r, ACCT_STR_NAME);
-#endif
- return ret;
-}
-
-static INLINE int aom_read_literal_(aom_reader *r, int bits ACCT_STR_PARAM) {
- int literal = 0, bit;
-
- for (bit = bits - 1; bit >= 0; bit--) literal |= aom_read_bit(r, NULL) << bit;
-#if CONFIG_ACCOUNTING
- if (ACCT_STR_NAME) aom_process_accounting(r, ACCT_STR_NAME);
-#endif
- return literal;
-}
-
-static INLINE int aom_read_cdf_(aom_reader *r, const aom_cdf_prob *cdf,
- int nsymbs ACCT_STR_PARAM) {
- int ret;
- ret = daala_read_symbol(r, cdf, nsymbs);
-
-#if CONFIG_ACCOUNTING
- if (ACCT_STR_NAME) aom_process_accounting(r, ACCT_STR_NAME);
- aom_update_symb_counts(r, (nsymbs == 2));
-#endif
- return ret;
-}
-
-static INLINE int aom_read_symbol_(aom_reader *r, aom_cdf_prob *cdf,
- int nsymbs ACCT_STR_PARAM) {
- int ret;
- ret = aom_read_cdf(r, cdf, nsymbs, ACCT_STR_NAME);
- if (r->allow_update_cdf) update_cdf(cdf, ret, nsymbs);
- return ret;
-}
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AOM_DSP_BITREADER_H_
diff --git a/third_party/aom/aom_dsp/bitreader_buffer.c b/third_party/aom/aom_dsp/bitreader_buffer.c
deleted file mode 100644
index b53211784..000000000
--- a/third_party/aom/aom_dsp/bitreader_buffer.c
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-
-#include "config/aom_config.h"
-
-#include "aom_dsp/bitreader_buffer.h"
-
-size_t aom_rb_bytes_read(const struct aom_read_bit_buffer *rb) {
- return (rb->bit_offset + 7) >> 3;
-}
-
-int aom_rb_read_bit(struct aom_read_bit_buffer *rb) {
- const uint32_t off = rb->bit_offset;
- const uint32_t p = off >> 3;
- const int q = 7 - (int)(off & 0x7);
- if (rb->bit_buffer + p < rb->bit_buffer_end) {
- const int bit = (rb->bit_buffer[p] >> q) & 1;
- rb->bit_offset = off + 1;
- return bit;
- } else {
- if (rb->error_handler) rb->error_handler(rb->error_handler_data);
- return 0;
- }
-}
-
-int aom_rb_read_literal(struct aom_read_bit_buffer *rb, int bits) {
- assert(bits <= 31);
- int value = 0, bit;
- for (bit = bits - 1; bit >= 0; bit--) value |= aom_rb_read_bit(rb) << bit;
- return value;
-}
-
-uint32_t aom_rb_read_unsigned_literal(struct aom_read_bit_buffer *rb,
- int bits) {
- assert(bits <= 32);
- uint32_t value = 0;
- int bit;
- for (bit = bits - 1; bit >= 0; bit--)
- value |= (uint32_t)aom_rb_read_bit(rb) << bit;
- return value;
-}
-
-int aom_rb_read_inv_signed_literal(struct aom_read_bit_buffer *rb, int bits) {
- const int nbits = sizeof(unsigned) * 8 - bits - 1;
- const unsigned value = (unsigned)aom_rb_read_literal(rb, bits + 1) << nbits;
- return ((int)value) >> nbits;
-}
-
-uint32_t aom_rb_read_uvlc(struct aom_read_bit_buffer *rb) {
- int leading_zeros = 0;
- while (!aom_rb_read_bit(rb)) ++leading_zeros;
- // Maximum 32 bits.
- if (leading_zeros >= 32) return UINT32_MAX;
- const uint32_t base = (1u << leading_zeros) - 1;
- const uint32_t value = aom_rb_read_literal(rb, leading_zeros);
- return base + value;
-}
diff --git a/third_party/aom/aom_dsp/bitreader_buffer.h b/third_party/aom/aom_dsp/bitreader_buffer.h
deleted file mode 100644
index 725ca1ea2..000000000
--- a/third_party/aom/aom_dsp/bitreader_buffer.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_BITREADER_BUFFER_H_
-#define AOM_AOM_DSP_BITREADER_BUFFER_H_
-
-#include <limits.h>
-
-#include "aom/aom_integer.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef void (*aom_rb_error_handler)(void *data);
-
-struct aom_read_bit_buffer {
- const uint8_t *bit_buffer;
- const uint8_t *bit_buffer_end;
- uint32_t bit_offset;
-
- void *error_handler_data;
- aom_rb_error_handler error_handler;
-};
-
-size_t aom_rb_bytes_read(const struct aom_read_bit_buffer *rb);
-
-int aom_rb_read_bit(struct aom_read_bit_buffer *rb);
-
-int aom_rb_read_literal(struct aom_read_bit_buffer *rb, int bits);
-
-uint32_t aom_rb_read_unsigned_literal(struct aom_read_bit_buffer *rb, int bits);
-
-int aom_rb_read_inv_signed_literal(struct aom_read_bit_buffer *rb, int bits);
-
-uint32_t aom_rb_read_uvlc(struct aom_read_bit_buffer *rb);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AOM_DSP_BITREADER_BUFFER_H_
diff --git a/third_party/aom/aom_dsp/bitwriter.h b/third_party/aom/aom_dsp/bitwriter.h
deleted file mode 100644
index b5ecc2382..000000000
--- a/third_party/aom/aom_dsp/bitwriter.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_BITWRITER_H_
-#define AOM_AOM_DSP_BITWRITER_H_
-
-#include <assert.h>
-
-#include "config/aom_config.h"
-
-#include "aom_dsp/daalaboolwriter.h"
-#include "aom_dsp/prob.h"
-
-#if CONFIG_RD_DEBUG
-#include "av1/common/blockd.h"
-#include "av1/encoder/cost.h"
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct daala_writer aom_writer;
-
-typedef struct TOKEN_STATS {
- int cost;
-#if CONFIG_RD_DEBUG
- int txb_coeff_cost_map[TXB_COEFF_COST_MAP_SIZE][TXB_COEFF_COST_MAP_SIZE];
-#endif
-} TOKEN_STATS;
-
-static INLINE void init_token_stats(TOKEN_STATS *token_stats) {
-#if CONFIG_RD_DEBUG
- int r, c;
- for (r = 0; r < TXB_COEFF_COST_MAP_SIZE; ++r) {
- for (c = 0; c < TXB_COEFF_COST_MAP_SIZE; ++c) {
- token_stats->txb_coeff_cost_map[r][c] = 0;
- }
- }
-#endif
- token_stats->cost = 0;
-}
-
-static INLINE void aom_start_encode(aom_writer *bc, uint8_t *buffer) {
- aom_daala_start_encode(bc, buffer);
-}
-
-static INLINE int aom_stop_encode(aom_writer *bc) {
- return aom_daala_stop_encode(bc);
-}
-
-static INLINE void aom_write(aom_writer *br, int bit, int probability) {
- aom_daala_write(br, bit, probability);
-}
-
-static INLINE void aom_write_bit(aom_writer *w, int bit) {
- aom_write(w, bit, 128); // aom_prob_half
-}
-
-static INLINE void aom_write_literal(aom_writer *w, int data, int bits) {
- int bit;
-
- for (bit = bits - 1; bit >= 0; bit--) aom_write_bit(w, 1 & (data >> bit));
-}
-
-static INLINE void aom_write_cdf(aom_writer *w, int symb,
- const aom_cdf_prob *cdf, int nsymbs) {
- daala_write_symbol(w, symb, cdf, nsymbs);
-}
-
-static INLINE void aom_write_symbol(aom_writer *w, int symb, aom_cdf_prob *cdf,
- int nsymbs) {
- aom_write_cdf(w, symb, cdf, nsymbs);
- if (w->allow_update_cdf) update_cdf(cdf, symb, nsymbs);
-}
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AOM_DSP_BITWRITER_H_
diff --git a/third_party/aom/aom_dsp/bitwriter_buffer.c b/third_party/aom/aom_dsp/bitwriter_buffer.c
deleted file mode 100644
index 596246deb..000000000
--- a/third_party/aom/aom_dsp/bitwriter_buffer.c
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <limits.h>
-#include <stdlib.h>
-
-#include "config/aom_config.h"
-
-#include "aom_dsp/bitwriter_buffer.h"
-
-int aom_wb_is_byte_aligned(const struct aom_write_bit_buffer *wb) {
- return (wb->bit_offset % CHAR_BIT == 0);
-}
-
-uint32_t aom_wb_bytes_written(const struct aom_write_bit_buffer *wb) {
- return wb->bit_offset / CHAR_BIT + (wb->bit_offset % CHAR_BIT > 0);
-}
-
-void aom_wb_write_bit(struct aom_write_bit_buffer *wb, int bit) {
- const int off = (int)wb->bit_offset;
- const int p = off / CHAR_BIT;
- const int q = CHAR_BIT - 1 - off % CHAR_BIT;
- if (q == CHAR_BIT - 1) {
- // Zero next char and write bit
- wb->bit_buffer[p] = bit << q;
- } else {
- wb->bit_buffer[p] &= ~(1 << q);
- wb->bit_buffer[p] |= bit << q;
- }
- wb->bit_offset = off + 1;
-}
-
-void aom_wb_overwrite_bit(struct aom_write_bit_buffer *wb, int bit) {
- // Do not zero bytes but overwrite exisiting values
- const int off = (int)wb->bit_offset;
- const int p = off / CHAR_BIT;
- const int q = CHAR_BIT - 1 - off % CHAR_BIT;
- wb->bit_buffer[p] &= ~(1 << q);
- wb->bit_buffer[p] |= bit << q;
- wb->bit_offset = off + 1;
-}
-
-void aom_wb_write_literal(struct aom_write_bit_buffer *wb, int data, int bits) {
- assert(bits <= 31);
- int bit;
- for (bit = bits - 1; bit >= 0; bit--) aom_wb_write_bit(wb, (data >> bit) & 1);
-}
-
-void aom_wb_write_unsigned_literal(struct aom_write_bit_buffer *wb,
- uint32_t data, int bits) {
- assert(bits <= 32);
- int bit;
- for (bit = bits - 1; bit >= 0; bit--) aom_wb_write_bit(wb, (data >> bit) & 1);
-}
-
-void aom_wb_overwrite_literal(struct aom_write_bit_buffer *wb, int data,
- int bits) {
- int bit;
- for (bit = bits - 1; bit >= 0; bit--)
- aom_wb_overwrite_bit(wb, (data >> bit) & 1);
-}
-
-void aom_wb_write_inv_signed_literal(struct aom_write_bit_buffer *wb, int data,
- int bits) {
- aom_wb_write_literal(wb, data, bits + 1);
-}
-
-void aom_wb_write_uvlc(struct aom_write_bit_buffer *wb, uint32_t v) {
- int64_t shift_val = ++v;
- int leading_zeroes = 1;
-
- assert(shift_val > 0);
-
- while (shift_val >>= 1) leading_zeroes += 2;
-
- aom_wb_write_literal(wb, 0, leading_zeroes >> 1);
- aom_wb_write_unsigned_literal(wb, v, (leading_zeroes + 1) >> 1);
-}
diff --git a/third_party/aom/aom_dsp/bitwriter_buffer.h b/third_party/aom/aom_dsp/bitwriter_buffer.h
deleted file mode 100644
index d0311284f..000000000
--- a/third_party/aom/aom_dsp/bitwriter_buffer.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_BITWRITER_BUFFER_H_
-#define AOM_AOM_DSP_BITWRITER_BUFFER_H_
-
-#include "aom/aom_integer.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct aom_write_bit_buffer {
- uint8_t *bit_buffer;
- uint32_t bit_offset;
-};
-
-int aom_wb_is_byte_aligned(const struct aom_write_bit_buffer *wb);
-
-uint32_t aom_wb_bytes_written(const struct aom_write_bit_buffer *wb);
-
-void aom_wb_write_bit(struct aom_write_bit_buffer *wb, int bit);
-
-void aom_wb_overwrite_bit(struct aom_write_bit_buffer *wb, int bit);
-
-void aom_wb_write_literal(struct aom_write_bit_buffer *wb, int data, int bits);
-
-void aom_wb_write_unsigned_literal(struct aom_write_bit_buffer *wb,
- uint32_t data, int bits);
-
-void aom_wb_overwrite_literal(struct aom_write_bit_buffer *wb, int data,
- int bits);
-
-void aom_wb_write_inv_signed_literal(struct aom_write_bit_buffer *wb, int data,
- int bits);
-
-void aom_wb_write_uvlc(struct aom_write_bit_buffer *wb, uint32_t v);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AOM_DSP_BITWRITER_BUFFER_H_
diff --git a/third_party/aom/aom_dsp/blend.h b/third_party/aom/aom_dsp/blend.h
deleted file mode 100644
index fd87dc181..000000000
--- a/third_party/aom/aom_dsp/blend.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_BLEND_H_
-#define AOM_AOM_DSP_BLEND_H_
-
-#include "aom_ports/mem.h"
-
-// Various blending functions and macros.
-// See also the aom_blend_* functions in aom_dsp_rtcd.h
-
-// Alpha blending with alpha values from the range [0, 64], where 64
-// means use the first input and 0 means use the second input.
-
-#define AOM_BLEND_A64_ROUND_BITS 6
-#define AOM_BLEND_A64_MAX_ALPHA (1 << AOM_BLEND_A64_ROUND_BITS) // 64
-
-#define AOM_BLEND_A64(a, v0, v1) \
- ROUND_POWER_OF_TWO((a) * (v0) + (AOM_BLEND_A64_MAX_ALPHA - (a)) * (v1), \
- AOM_BLEND_A64_ROUND_BITS)
-
-// Alpha blending with alpha values from the range [0, 256], where 256
-// means use the first input and 0 means use the second input.
-#define AOM_BLEND_A256_ROUND_BITS 8
-#define AOM_BLEND_A256_MAX_ALPHA (1 << AOM_BLEND_A256_ROUND_BITS) // 256
-
-#define AOM_BLEND_A256(a, v0, v1) \
- ROUND_POWER_OF_TWO((a) * (v0) + (AOM_BLEND_A256_MAX_ALPHA - (a)) * (v1), \
- AOM_BLEND_A256_ROUND_BITS)
-
-// Blending by averaging.
-#define AOM_BLEND_AVG(v0, v1) ROUND_POWER_OF_TWO((v0) + (v1), 1)
-
-#define DIFF_FACTOR_LOG2 4
-#define DIFF_FACTOR (1 << DIFF_FACTOR_LOG2)
-
-#endif // AOM_AOM_DSP_BLEND_H_
diff --git a/third_party/aom/aom_dsp/blend_a64_hmask.c b/third_party/aom/aom_dsp/blend_a64_hmask.c
deleted file mode 100644
index 0554b43d1..000000000
--- a/third_party/aom/aom_dsp/blend_a64_hmask.c
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-
-#include "aom/aom_integer.h"
-#include "aom_ports/mem.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/blend.h"
-
-#include "config/aom_dsp_rtcd.h"
-
-void aom_blend_a64_hmask_c(uint8_t *dst, uint32_t dst_stride,
- const uint8_t *src0, uint32_t src0_stride,
- const uint8_t *src1, uint32_t src1_stride,
- const uint8_t *mask, int w, int h) {
- int i, j;
-
- assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
- assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
-
- assert(h >= 1);
- assert(w >= 1);
- assert(IS_POWER_OF_TWO(h));
- assert(IS_POWER_OF_TWO(w));
-
- for (i = 0; i < h; ++i) {
- for (j = 0; j < w; ++j) {
- dst[i * dst_stride + j] = AOM_BLEND_A64(
- mask[j], src0[i * src0_stride + j], src1[i * src1_stride + j]);
- }
- }
-}
-
-void aom_highbd_blend_a64_hmask_c(uint8_t *dst_8, uint32_t dst_stride,
- const uint8_t *src0_8, uint32_t src0_stride,
- const uint8_t *src1_8, uint32_t src1_stride,
- const uint8_t *mask, int w, int h, int bd) {
- int i, j;
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst_8);
- const uint16_t *src0 = CONVERT_TO_SHORTPTR(src0_8);
- const uint16_t *src1 = CONVERT_TO_SHORTPTR(src1_8);
- (void)bd;
-
- assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
- assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
-
- assert(h >= 1);
- assert(w >= 1);
- assert(IS_POWER_OF_TWO(h));
- assert(IS_POWER_OF_TWO(w));
-
- assert(bd == 8 || bd == 10 || bd == 12);
-
- for (i = 0; i < h; ++i) {
- for (j = 0; j < w; ++j) {
- dst[i * dst_stride + j] = AOM_BLEND_A64(
- mask[j], src0[i * src0_stride + j], src1[i * src1_stride + j]);
- }
- }
-}
diff --git a/third_party/aom/aom_dsp/blend_a64_mask.c b/third_party/aom/aom_dsp/blend_a64_mask.c
deleted file mode 100644
index 992cc5c0c..000000000
--- a/third_party/aom/aom_dsp/blend_a64_mask.c
+++ /dev/null
@@ -1,345 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-
-#include "aom/aom_integer.h"
-#include "aom_ports/mem.h"
-#include "aom_dsp/blend.h"
-#include "aom_dsp/aom_dsp_common.h"
-
-#include "config/aom_dsp_rtcd.h"
-
-// Blending with alpha mask. Mask values come from the range [0, 64],
-// as described for AOM_BLEND_A64 in aom_dsp/blend.h. src0 or src1 can
-// be the same as dst, or dst can be different from both sources.
-
-// NOTE(david.barker): The input and output of aom_blend_a64_d32_mask_c() are
-// in a higher intermediate precision, and will later be rounded down to pixel
-// precision.
-// Thus, in order to avoid double-rounding, we want to use normal right shifts
-// within this function, not ROUND_POWER_OF_TWO.
-// This works because of the identity:
-// ROUND_POWER_OF_TWO(x >> y, z) == ROUND_POWER_OF_TWO(x, y+z)
-//
-// In contrast, the output of the non-d32 functions will not be further rounded,
-// so we *should* use ROUND_POWER_OF_TWO there.
-
-void aom_lowbd_blend_a64_d16_mask_c(
- uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0,
- uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh,
- ConvolveParams *conv_params) {
- int i, j;
- const int bd = 8;
- const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
- const int round_offset = (1 << (offset_bits - conv_params->round_1)) +
- (1 << (offset_bits - conv_params->round_1 - 1));
- const int round_bits =
- 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
-
- assert(IMPLIES((void *)src0 == dst, src0_stride == dst_stride));
- assert(IMPLIES((void *)src1 == dst, src1_stride == dst_stride));
-
- assert(h >= 4);
- assert(w >= 4);
- assert(IS_POWER_OF_TWO(h));
- assert(IS_POWER_OF_TWO(w));
-
- if (subw == 0 && subh == 0) {
- for (i = 0; i < h; ++i) {
- for (j = 0; j < w; ++j) {
- int32_t res;
- const int m = mask[i * mask_stride + j];
- res = ((m * (int32_t)src0[i * src0_stride + j] +
- (AOM_BLEND_A64_MAX_ALPHA - m) *
- (int32_t)src1[i * src1_stride + j]) >>
- AOM_BLEND_A64_ROUND_BITS);
- res -= round_offset;
- dst[i * dst_stride + j] =
- clip_pixel(ROUND_POWER_OF_TWO(res, round_bits));
- }
- }
- } else if (subw == 1 && subh == 1) {
- for (i = 0; i < h; ++i) {
- for (j = 0; j < w; ++j) {
- int32_t res;
- const int m = ROUND_POWER_OF_TWO(
- mask[(2 * i) * mask_stride + (2 * j)] +
- mask[(2 * i + 1) * mask_stride + (2 * j)] +
- mask[(2 * i) * mask_stride + (2 * j + 1)] +
- mask[(2 * i + 1) * mask_stride + (2 * j + 1)],
- 2);
- res = ((m * (int32_t)src0[i * src0_stride + j] +
- (AOM_BLEND_A64_MAX_ALPHA - m) *
- (int32_t)src1[i * src1_stride + j]) >>
- AOM_BLEND_A64_ROUND_BITS);
- res -= round_offset;
- dst[i * dst_stride + j] =
- clip_pixel(ROUND_POWER_OF_TWO(res, round_bits));
- }
- }
- } else if (subw == 1 && subh == 0) {
- for (i = 0; i < h; ++i) {
- for (j = 0; j < w; ++j) {
- int32_t res;
- const int m = AOM_BLEND_AVG(mask[i * mask_stride + (2 * j)],
- mask[i * mask_stride + (2 * j + 1)]);
- res = ((m * (int32_t)src0[i * src0_stride + j] +
- (AOM_BLEND_A64_MAX_ALPHA - m) *
- (int32_t)src1[i * src1_stride + j]) >>
- AOM_BLEND_A64_ROUND_BITS);
- res -= round_offset;
- dst[i * dst_stride + j] =
- clip_pixel(ROUND_POWER_OF_TWO(res, round_bits));
- }
- }
- } else {
- for (i = 0; i < h; ++i) {
- for (j = 0; j < w; ++j) {
- int32_t res;
- const int m = AOM_BLEND_AVG(mask[(2 * i) * mask_stride + j],
- mask[(2 * i + 1) * mask_stride + j]);
- res = ((int32_t)(m * (int32_t)src0[i * src0_stride + j] +
- (AOM_BLEND_A64_MAX_ALPHA - m) *
- (int32_t)src1[i * src1_stride + j]) >>
- AOM_BLEND_A64_ROUND_BITS);
- res -= round_offset;
- dst[i * dst_stride + j] =
- clip_pixel(ROUND_POWER_OF_TWO(res, round_bits));
- }
- }
- }
-}
-
-void aom_highbd_blend_a64_d16_mask_c(
- uint8_t *dst_8, uint32_t dst_stride, const CONV_BUF_TYPE *src0,
- uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh,
- ConvolveParams *conv_params, const int bd) {
- const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
- const int round_offset = (1 << (offset_bits - conv_params->round_1)) +
- (1 << (offset_bits - conv_params->round_1 - 1));
- const int round_bits =
- 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst_8);
-
- assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
- assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
-
- assert(h >= 1);
- assert(w >= 1);
- assert(IS_POWER_OF_TWO(h));
- assert(IS_POWER_OF_TWO(w));
-
- // excerpt from clip_pixel_highbd()
- // set saturation_value to (1 << bd) - 1
- unsigned int saturation_value;
- switch (bd) {
- case 8:
- default: saturation_value = 255; break;
- case 10: saturation_value = 1023; break;
- case 12: saturation_value = 4095; break;
- }
-
- if (subw == 0 && subh == 0) {
- for (int i = 0; i < h; ++i) {
- for (int j = 0; j < w; ++j) {
- int32_t res;
- const int m = mask[j];
- res = ((m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >>
- AOM_BLEND_A64_ROUND_BITS);
- res -= round_offset;
- unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits));
- dst[j] = AOMMIN(v, saturation_value);
- }
- mask += mask_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- dst += dst_stride;
- }
- } else if (subw == 1 && subh == 1) {
- for (int i = 0; i < h; ++i) {
- for (int j = 0; j < w; ++j) {
- int32_t res;
- const int m = ROUND_POWER_OF_TWO(
- mask[2 * j] + mask[mask_stride + 2 * j] + mask[2 * j + 1] +
- mask[mask_stride + 2 * j + 1],
- 2);
- res = (m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >>
- AOM_BLEND_A64_ROUND_BITS;
- res -= round_offset;
- unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits));
- dst[j] = AOMMIN(v, saturation_value);
- }
- mask += 2 * mask_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- dst += dst_stride;
- }
- } else if (subw == 1 && subh == 0) {
- for (int i = 0; i < h; ++i) {
- for (int j = 0; j < w; ++j) {
- int32_t res;
- const int m = AOM_BLEND_AVG(mask[2 * j], mask[2 * j + 1]);
- res = (m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >>
- AOM_BLEND_A64_ROUND_BITS;
- res -= round_offset;
- unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits));
- dst[j] = AOMMIN(v, saturation_value);
- }
- mask += mask_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- dst += dst_stride;
- }
- } else {
- for (int i = 0; i < h; ++i) {
- for (int j = 0; j < w; ++j) {
- int32_t res;
- const int m = AOM_BLEND_AVG(mask[j], mask[mask_stride + j]);
- res = (m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >>
- AOM_BLEND_A64_ROUND_BITS;
- res -= round_offset;
- unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits));
- dst[j] = AOMMIN(v, saturation_value);
- }
- mask += 2 * mask_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- dst += dst_stride;
- }
- }
-}
-
-// Blending with alpha mask. Mask values come from the range [0, 64],
-// as described for AOM_BLEND_A64 in aom_dsp/blend.h. src0 or src1 can
-// be the same as dst, or dst can be different from both sources.
-
-void aom_blend_a64_mask_c(uint8_t *dst, uint32_t dst_stride,
- const uint8_t *src0, uint32_t src0_stride,
- const uint8_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int w,
- int h, int subw, int subh) {
- int i, j;
-
- assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
- assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
-
- assert(h >= 1);
- assert(w >= 1);
- assert(IS_POWER_OF_TWO(h));
- assert(IS_POWER_OF_TWO(w));
-
- if (subw == 0 && subh == 0) {
- for (i = 0; i < h; ++i) {
- for (j = 0; j < w; ++j) {
- const int m = mask[i * mask_stride + j];
- dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
- src1[i * src1_stride + j]);
- }
- }
- } else if (subw == 1 && subh == 1) {
- for (i = 0; i < h; ++i) {
- for (j = 0; j < w; ++j) {
- const int m = ROUND_POWER_OF_TWO(
- mask[(2 * i) * mask_stride + (2 * j)] +
- mask[(2 * i + 1) * mask_stride + (2 * j)] +
- mask[(2 * i) * mask_stride + (2 * j + 1)] +
- mask[(2 * i + 1) * mask_stride + (2 * j + 1)],
- 2);
- dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
- src1[i * src1_stride + j]);
- }
- }
- } else if (subw == 1 && subh == 0) {
- for (i = 0; i < h; ++i) {
- for (j = 0; j < w; ++j) {
- const int m = AOM_BLEND_AVG(mask[i * mask_stride + (2 * j)],
- mask[i * mask_stride + (2 * j + 1)]);
- dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
- src1[i * src1_stride + j]);
- }
- }
- } else {
- for (i = 0; i < h; ++i) {
- for (j = 0; j < w; ++j) {
- const int m = AOM_BLEND_AVG(mask[(2 * i) * mask_stride + j],
- mask[(2 * i + 1) * mask_stride + j]);
- dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
- src1[i * src1_stride + j]);
- }
- }
- }
-}
-
-void aom_highbd_blend_a64_mask_c(uint8_t *dst_8, uint32_t dst_stride,
- const uint8_t *src0_8, uint32_t src0_stride,
- const uint8_t *src1_8, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride,
- int w, int h, int subw, int subh, int bd) {
- int i, j;
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst_8);
- const uint16_t *src0 = CONVERT_TO_SHORTPTR(src0_8);
- const uint16_t *src1 = CONVERT_TO_SHORTPTR(src1_8);
- (void)bd;
-
- assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
- assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
-
- assert(h >= 1);
- assert(w >= 1);
- assert(IS_POWER_OF_TWO(h));
- assert(IS_POWER_OF_TWO(w));
-
- assert(bd == 8 || bd == 10 || bd == 12);
-
- if (subw == 0 && subh == 0) {
- for (i = 0; i < h; ++i) {
- for (j = 0; j < w; ++j) {
- const int m = mask[i * mask_stride + j];
- dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
- src1[i * src1_stride + j]);
- }
- }
- } else if (subw == 1 && subh == 1) {
- for (i = 0; i < h; ++i) {
- for (j = 0; j < w; ++j) {
- const int m = ROUND_POWER_OF_TWO(
- mask[(2 * i) * mask_stride + (2 * j)] +
- mask[(2 * i + 1) * mask_stride + (2 * j)] +
- mask[(2 * i) * mask_stride + (2 * j + 1)] +
- mask[(2 * i + 1) * mask_stride + (2 * j + 1)],
- 2);
- dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
- src1[i * src1_stride + j]);
- }
- }
- } else if (subw == 1 && subh == 0) {
- for (i = 0; i < h; ++i) {
- for (j = 0; j < w; ++j) {
- const int m = AOM_BLEND_AVG(mask[i * mask_stride + (2 * j)],
- mask[i * mask_stride + (2 * j + 1)]);
- dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
- src1[i * src1_stride + j]);
- }
- }
- } else {
- for (i = 0; i < h; ++i) {
- for (j = 0; j < w; ++j) {
- const int m = AOM_BLEND_AVG(mask[(2 * i) * mask_stride + j],
- mask[(2 * i + 1) * mask_stride + j]);
- dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
- src1[i * src1_stride + j]);
- }
- }
- }
-}
diff --git a/third_party/aom/aom_dsp/blend_a64_vmask.c b/third_party/aom/aom_dsp/blend_a64_vmask.c
deleted file mode 100644
index 4f222e17f..000000000
--- a/third_party/aom/aom_dsp/blend_a64_vmask.c
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-
-#include "aom/aom_integer.h"
-#include "aom_ports/mem.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/blend.h"
-
-#include "config/aom_dsp_rtcd.h"
-
-void aom_blend_a64_vmask_c(uint8_t *dst, uint32_t dst_stride,
- const uint8_t *src0, uint32_t src0_stride,
- const uint8_t *src1, uint32_t src1_stride,
- const uint8_t *mask, int w, int h) {
- int i, j;
-
- assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
- assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
-
- assert(h >= 1);
- assert(w >= 1);
- assert(IS_POWER_OF_TWO(h));
- assert(IS_POWER_OF_TWO(w));
-
- for (i = 0; i < h; ++i) {
- const int m = mask[i];
- for (j = 0; j < w; ++j) {
- dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
- src1[i * src1_stride + j]);
- }
- }
-}
-
-void aom_highbd_blend_a64_vmask_c(uint8_t *dst_8, uint32_t dst_stride,
- const uint8_t *src0_8, uint32_t src0_stride,
- const uint8_t *src1_8, uint32_t src1_stride,
- const uint8_t *mask, int w, int h, int bd) {
- int i, j;
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst_8);
- const uint16_t *src0 = CONVERT_TO_SHORTPTR(src0_8);
- const uint16_t *src1 = CONVERT_TO_SHORTPTR(src1_8);
- (void)bd;
-
- assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
- assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
-
- assert(h >= 1);
- assert(w >= 1);
- assert(IS_POWER_OF_TWO(h));
- assert(IS_POWER_OF_TWO(w));
-
- assert(bd == 8 || bd == 10 || bd == 12);
-
- for (i = 0; i < h; ++i) {
- const int m = mask[i];
- for (j = 0; j < w; ++j) {
- dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
- src1[i * src1_stride + j]);
- }
- }
-}
diff --git a/third_party/aom/aom_dsp/buf_ans.c b/third_party/aom/aom_dsp/buf_ans.c
deleted file mode 100644
index f7703dffc..000000000
--- a/third_party/aom/aom_dsp/buf_ans.c
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <string.h>
-
-#include "aom_dsp/buf_ans.h"
-#include "aom_mem/aom_mem.h"
-#include "aom/internal/aom_codec_internal.h"
-
-void aom_buf_ans_alloc(struct BufAnsCoder *c,
- struct aom_internal_error_info *error) {
- c->error = error;
- assert(c->size > 1);
- AOM_CHECK_MEM_ERROR(error, c->buf, aom_malloc(c->size * sizeof(*c->buf)));
- // Initialize to overfull to trigger the assert in write.
- c->offset = c->size + 1;
-}
-
-void aom_buf_ans_free(struct BufAnsCoder *c) {
- aom_free(c->buf);
- c->buf = NULL;
- c->size = 0;
-}
-
-#if !ANS_MAX_SYMBOLS
-void aom_buf_ans_grow(struct BufAnsCoder *c) {
- struct buffered_ans_symbol *new_buf = NULL;
- int new_size = c->size * 2;
- AOM_CHECK_MEM_ERROR(c->error, new_buf,
- aom_malloc(new_size * sizeof(*new_buf)));
- memcpy(new_buf, c->buf, c->size * sizeof(*c->buf));
- aom_free(c->buf);
- c->buf = new_buf;
- c->size = new_size;
-}
-#endif
-
-void aom_buf_ans_flush(struct BufAnsCoder *const c) {
- int offset;
-#if ANS_MAX_SYMBOLS
- if (c->offset == 0) return;
-#endif
- assert(c->offset > 0);
- offset = c->offset - 1;
- // Code the first symbol such that it brings the state to the smallest normal
- // state from an initial state that would have been a subnormal/refill state.
- if (c->buf[offset].method == ANS_METHOD_RANS) {
- c->ans.state += c->buf[offset].val_start;
- } else {
- c->ans.state += c->buf[offset].val_start ? c->buf[offset].prob : 0;
- }
- for (offset = offset - 1; offset >= 0; --offset) {
- if (c->buf[offset].method == ANS_METHOD_RANS) {
- rans_write(&c->ans, c->buf[offset].val_start, c->buf[offset].prob);
- } else {
- rabs_write(&c->ans, (uint8_t)c->buf[offset].val_start,
- (AnsP8)c->buf[offset].prob);
- }
- }
- c->offset = 0;
- c->output_bytes += ans_write_end(&c->ans);
-}
diff --git a/third_party/aom/aom_dsp/buf_ans.h b/third_party/aom/aom_dsp/buf_ans.h
deleted file mode 100644
index 985fcdf9e..000000000
--- a/third_party/aom/aom_dsp/buf_ans.h
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_BUF_ANS_H_
-#define AOM_AOM_DSP_BUF_ANS_H_
-// Buffered forward ANS writer.
-// Symbols are written to the writer in forward (decode) order and serialized
-// backwards due to ANS's stack like behavior.
-
-#include <assert.h>
-#include "config/aom_config.h"
-
-#include "aom/aom_integer.h"
-#include "aom_dsp/ans.h"
-#include "aom_dsp/answriter.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif // __cplusplus
-
-#define ANS_METHOD_RABS 0
-#define ANS_METHOD_RANS 1
-
-struct buffered_ans_symbol {
- unsigned int method : 1; // one of ANS_METHOD_RABS or ANS_METHOD_RANS
- // TODO(aconverse): Should be possible to write this in terms of start for ABS
- unsigned int val_start : RANS_PROB_BITS; // Boolean value for ABS
- // start in symbol cycle for Rans
- unsigned int prob : RANS_PROB_BITS; // Probability of this symbol
-};
-
-struct BufAnsCoder {
- struct aom_internal_error_info *error;
- struct buffered_ans_symbol *buf;
- struct AnsCoder ans;
- int size;
- int offset;
- int output_bytes;
-#if ANS_MAX_SYMBOLS
- int window_size;
-#endif
- int pos; // Dummy variable to store the output buffer after closing
- uint8_t allow_update_cdf;
-};
-
-// Allocate a buffered ANS coder to store size symbols.
-// When ANS_MAX_SYMBOLS is turned on, the size is the fixed size of each ANS
-// partition.
-// When ANS_MAX_SYMBOLS is turned off, size is merely an initial hint and the
-// buffer will grow on demand
-void aom_buf_ans_alloc(struct BufAnsCoder *c,
- struct aom_internal_error_info *error);
-
-void aom_buf_ans_free(struct BufAnsCoder *c);
-
-#if !ANS_MAX_SYMBOLS
-void aom_buf_ans_grow(struct BufAnsCoder *c);
-#endif
-
-void aom_buf_ans_flush(struct BufAnsCoder *const c);
-
-static INLINE void buf_ans_write_init(struct BufAnsCoder *const c,
- uint8_t *const output_buffer) {
- c->offset = 0;
- c->output_bytes = 0;
- ans_write_init(&c->ans, output_buffer);
-}
-
-static INLINE void buf_rabs_write(struct BufAnsCoder *const c, uint8_t val,
- AnsP8 prob) {
- assert(c->offset <= c->size);
-#if !ANS_MAX_SYMBOLS
- if (c->offset == c->size) {
- aom_buf_ans_grow(c);
- }
-#endif
- c->buf[c->offset].method = ANS_METHOD_RABS;
- c->buf[c->offset].val_start = val;
- c->buf[c->offset].prob = prob;
- ++c->offset;
-#if ANS_MAX_SYMBOLS
- if (c->offset == c->size) aom_buf_ans_flush(c);
-#endif
-}
-
-// Buffer one symbol for encoding using rANS.
-// cum_prob: The cumulative probability before this symbol (the offset of
-// the symbol in the symbol cycle)
-// prob: The probability of this symbol (l_s from the paper)
-// RANS_PRECISION takes the place of m from the paper.
-static INLINE void buf_rans_write(struct BufAnsCoder *const c,
- aom_cdf_prob cum_prob, aom_cdf_prob prob) {
- assert(c->offset <= c->size);
-#if !ANS_MAX_SYMBOLS
- if (c->offset == c->size) {
- aom_buf_ans_grow(c);
- }
-#endif
- c->buf[c->offset].method = ANS_METHOD_RANS;
- c->buf[c->offset].val_start = cum_prob;
- c->buf[c->offset].prob = prob;
- ++c->offset;
-#if ANS_MAX_SYMBOLS
- if (c->offset == c->size) aom_buf_ans_flush(c);
-#endif
-}
-
-static INLINE void buf_rabs_write_bit(struct BufAnsCoder *c, int bit) {
- buf_rabs_write(c, bit, 128);
-}
-
-static INLINE void buf_rabs_write_literal(struct BufAnsCoder *c, int literal,
- int bits) {
- int bit;
-
- assert(bits < 31);
- for (bit = bits - 1; bit >= 0; bit--)
- buf_rabs_write_bit(c, 1 & (literal >> bit));
-}
-
-static INLINE int buf_ans_write_end(struct BufAnsCoder *const c) {
- assert(c->offset == 0);
- return c->output_bytes;
-}
-#ifdef __cplusplus
-} // extern "C"
-#endif // __cplusplus
-#endif // AOM_AOM_DSP_BUF_ANS_H_
diff --git a/third_party/aom/aom_dsp/daalaboolreader.c b/third_party/aom/aom_dsp/daalaboolreader.c
deleted file mode 100644
index 6c2259f23..000000000
--- a/third_party/aom/aom_dsp/daalaboolreader.c
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "aom_dsp/daalaboolreader.h"
-
-int aom_daala_reader_init(daala_reader *r, const uint8_t *buffer, int size) {
- if (size && !buffer) {
- return 1;
- }
- r->buffer_end = buffer + size;
- r->buffer = buffer;
- od_ec_dec_init(&r->ec, buffer, size);
-#if CONFIG_ACCOUNTING
- r->accounting = NULL;
-#endif
- return 0;
-}
-
-const uint8_t *aom_daala_reader_find_begin(daala_reader *r) {
- return r->buffer;
-}
-
-const uint8_t *aom_daala_reader_find_end(daala_reader *r) {
- return r->buffer_end;
-}
-
-uint32_t aom_daala_reader_tell(const daala_reader *r) {
- return od_ec_dec_tell(&r->ec);
-}
-
-uint32_t aom_daala_reader_tell_frac(const daala_reader *r) {
- return od_ec_dec_tell_frac(&r->ec);
-}
-
-int aom_daala_reader_has_overflowed(const daala_reader *r) {
- const uint32_t tell_bits = aom_daala_reader_tell(r);
- const uint32_t tell_bytes = (tell_bits + 7) >> 3;
- return ((ptrdiff_t)tell_bytes > r->buffer_end - r->buffer);
-}
diff --git a/third_party/aom/aom_dsp/daalaboolreader.h b/third_party/aom/aom_dsp/daalaboolreader.h
deleted file mode 100644
index ba78f916d..000000000
--- a/third_party/aom/aom_dsp/daalaboolreader.h
+++ /dev/null
@@ -1,160 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_DAALABOOLREADER_H_
-#define AOM_AOM_DSP_DAALABOOLREADER_H_
-
-#include "aom/aom_integer.h"
-#include "aom_dsp/entdec.h"
-#include "aom_dsp/prob.h"
-#if CONFIG_ACCOUNTING
-#include "av1/decoder/accounting.h"
-#endif
-#if CONFIG_BITSTREAM_DEBUG
-#include <stdio.h>
-#include "aom_util/debug_util.h"
-#endif // CONFIG_BITSTREAM_DEBUG
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct daala_reader {
- const uint8_t *buffer;
- const uint8_t *buffer_end;
- od_ec_dec ec;
-#if CONFIG_ACCOUNTING
- Accounting *accounting;
-#endif
- uint8_t allow_update_cdf;
-};
-
-typedef struct daala_reader daala_reader;
-
-int aom_daala_reader_init(daala_reader *r, const uint8_t *buffer, int size);
-const uint8_t *aom_daala_reader_find_begin(daala_reader *r);
-const uint8_t *aom_daala_reader_find_end(daala_reader *r);
-uint32_t aom_daala_reader_tell(const daala_reader *r);
-uint32_t aom_daala_reader_tell_frac(const daala_reader *r);
-// Returns true if the reader has tried to decode more data from the buffer
-// than was actually provided.
-int aom_daala_reader_has_overflowed(const daala_reader *r);
-
-static INLINE int aom_daala_read(daala_reader *r, int prob) {
- int bit;
- int p = (0x7FFFFF - (prob << 15) + prob) >> 8;
-#if CONFIG_BITSTREAM_DEBUG
-/*{
- const int queue_r = bitstream_queue_get_read();
- const int frame_idx = bitstream_queue_get_frame_read();
- if (frame_idx == 0 && queue_r == 0) {
- fprintf(stderr, "\n *** bitstream queue at frame_idx_r %d queue_r %d\n",
- frame_idx, queue_r);
- }
-}*/
-#endif
-
- bit = od_ec_decode_bool_q15(&r->ec, p);
-
-#if CONFIG_BITSTREAM_DEBUG
- {
- int i;
- int ref_bit, ref_nsymbs;
- aom_cdf_prob ref_cdf[16];
- const int queue_r = bitstream_queue_get_read();
- const int frame_idx = bitstream_queue_get_frame_read();
- bitstream_queue_pop(&ref_bit, ref_cdf, &ref_nsymbs);
- if (ref_nsymbs != 2) {
- fprintf(stderr,
- "\n *** [bit] nsymbs error, frame_idx_r %d nsymbs %d ref_nsymbs "
- "%d queue_r %d\n",
- frame_idx, 2, ref_nsymbs, queue_r);
- assert(0);
- }
- if ((ref_nsymbs != 2) || (ref_cdf[0] != (aom_cdf_prob)p) ||
- (ref_cdf[1] != 32767)) {
- fprintf(stderr,
- "\n *** [bit] cdf error, frame_idx_r %d cdf {%d, %d} ref_cdf {%d",
- frame_idx, p, 32767, ref_cdf[0]);
- for (i = 1; i < ref_nsymbs; ++i) fprintf(stderr, ", %d", ref_cdf[i]);
- fprintf(stderr, "} queue_r %d\n", queue_r);
- assert(0);
- }
- if (bit != ref_bit) {
- fprintf(stderr,
- "\n *** [bit] symb error, frame_idx_r %d symb %d ref_symb %d "
- "queue_r %d\n",
- frame_idx, bit, ref_bit, queue_r);
- assert(0);
- }
- }
-#endif
-
- return bit;
-}
-
-static INLINE int aom_daala_reader_has_error(daala_reader *r) {
- return r->ec.error;
-}
-
-static INLINE int daala_read_symbol(daala_reader *r, const aom_cdf_prob *cdf,
- int nsymbs) {
- int symb;
- assert(cdf != NULL);
- symb = od_ec_decode_cdf_q15(&r->ec, cdf, nsymbs);
-
-#if CONFIG_BITSTREAM_DEBUG
- {
- int i;
- int cdf_error = 0;
- int ref_symb, ref_nsymbs;
- aom_cdf_prob ref_cdf[16];
- const int queue_r = bitstream_queue_get_read();
- const int frame_idx = bitstream_queue_get_frame_read();
- bitstream_queue_pop(&ref_symb, ref_cdf, &ref_nsymbs);
- if (nsymbs != ref_nsymbs) {
- fprintf(stderr,
- "\n *** nsymbs error, frame_idx_r %d nsymbs %d ref_nsymbs %d "
- "queue_r %d\n",
- frame_idx, nsymbs, ref_nsymbs, queue_r);
- cdf_error = 0;
- assert(0);
- } else {
- for (i = 0; i < nsymbs; ++i)
- if (cdf[i] != ref_cdf[i]) cdf_error = 1;
- }
- if (cdf_error) {
- fprintf(stderr, "\n *** cdf error, frame_idx_r %d cdf {%d", frame_idx,
- cdf[0]);
- for (i = 1; i < nsymbs; ++i) fprintf(stderr, ", %d", cdf[i]);
- fprintf(stderr, "} ref_cdf {%d", ref_cdf[0]);
- for (i = 1; i < ref_nsymbs; ++i) fprintf(stderr, ", %d", ref_cdf[i]);
- fprintf(stderr, "} queue_r %d\n", queue_r);
- assert(0);
- }
- if (symb != ref_symb) {
- fprintf(
- stderr,
- "\n *** symb error, frame_idx_r %d symb %d ref_symb %d queue_r %d\n",
- frame_idx, symb, ref_symb, queue_r);
- assert(0);
- }
- }
-#endif
-
- return symb;
-}
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AOM_DSP_DAALABOOLREADER_H_
diff --git a/third_party/aom/aom_dsp/daalaboolwriter.c b/third_party/aom/aom_dsp/daalaboolwriter.c
deleted file mode 100644
index b24ffbf3f..000000000
--- a/third_party/aom/aom_dsp/daalaboolwriter.c
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <string.h>
-#include "aom_dsp/daalaboolwriter.h"
-
-void aom_daala_start_encode(daala_writer *br, uint8_t *source) {
- br->buffer = source;
- br->pos = 0;
- od_ec_enc_init(&br->ec, 62025);
-}
-
-int aom_daala_stop_encode(daala_writer *br) {
- int nb_bits;
- uint32_t daala_bytes;
- unsigned char *daala_data;
- daala_data = od_ec_enc_done(&br->ec, &daala_bytes);
- nb_bits = od_ec_enc_tell(&br->ec);
- memcpy(br->buffer, daala_data, daala_bytes);
- br->pos = daala_bytes;
- od_ec_enc_clear(&br->ec);
- return nb_bits;
-}
diff --git a/third_party/aom/aom_dsp/daalaboolwriter.h b/third_party/aom/aom_dsp/daalaboolwriter.h
deleted file mode 100644
index 3848877ce..000000000
--- a/third_party/aom/aom_dsp/daalaboolwriter.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_DAALABOOLWRITER_H_
-#define AOM_AOM_DSP_DAALABOOLWRITER_H_
-
-#include <stdio.h>
-
-#include "aom_dsp/entenc.h"
-#include "aom_dsp/prob.h"
-#if CONFIG_BITSTREAM_DEBUG
-#include "aom_util/debug_util.h"
-#endif // CONFIG_BITSTREAM_DEBUG
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct daala_writer {
- unsigned int pos;
- uint8_t *buffer;
- od_ec_enc ec;
- uint8_t allow_update_cdf;
-};
-
-typedef struct daala_writer daala_writer;
-
-void aom_daala_start_encode(daala_writer *w, uint8_t *buffer);
-int aom_daala_stop_encode(daala_writer *w);
-
-static INLINE void aom_daala_write(daala_writer *w, int bit, int prob) {
- int p = (0x7FFFFF - (prob << 15) + prob) >> 8;
-#if CONFIG_BITSTREAM_DEBUG
- aom_cdf_prob cdf[2] = { (aom_cdf_prob)p, 32767 };
- /*int queue_r = 0;
- int frame_idx_r = 0;
- int queue_w = bitstream_queue_get_write();
- int frame_idx_w = bitstream_queue_get_frame_write();
- if (frame_idx_w == frame_idx_r && queue_w == queue_r) {
- fprintf(stderr, "\n *** bitstream queue at frame_idx_w %d queue_w %d\n",
- frame_idx_w, queue_w);
- }*/
- bitstream_queue_push(bit, cdf, 2);
-#endif
-
- od_ec_encode_bool_q15(&w->ec, bit, p);
-}
-
-static INLINE void daala_write_symbol(daala_writer *w, int symb,
- const aom_cdf_prob *cdf, int nsymbs) {
-#if CONFIG_BITSTREAM_DEBUG
- /*int queue_r = 0;
- int frame_idx_r = 0;
- int queue_w = bitstream_queue_get_write();
- int frame_idx_w = bitstream_queue_get_frame_write();
- if (frame_idx_w == frame_idx_r && queue_w == queue_r) {
- fprintf(stderr, "\n *** bitstream queue at frame_idx_w %d queue_w %d\n",
- frame_idx_w, queue_w);
- }*/
- bitstream_queue_push(symb, cdf, nsymbs);
-#endif
-
- od_ec_encode_cdf_q15(&w->ec, symb, cdf, nsymbs);
-}
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AOM_DSP_DAALABOOLWRITER_H_
diff --git a/third_party/aom/aom_dsp/entcode.c b/third_party/aom/aom_dsp/entcode.c
deleted file mode 100644
index aad96c6fc..000000000
--- a/third_party/aom/aom_dsp/entcode.c
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "aom_dsp/entcode.h"
-
-/*Given the current total integer number of bits used and the current value of
- rng, computes the fraction number of bits used to OD_BITRES precision.
- This is used by od_ec_enc_tell_frac() and od_ec_dec_tell_frac().
- nbits_total: The number of whole bits currently used, i.e., the value
- returned by od_ec_enc_tell() or od_ec_dec_tell().
- rng: The current value of rng from either the encoder or decoder state.
- Return: The number of bits scaled by 2**OD_BITRES.
- This will always be slightly larger than the exact value (e.g., all
- rounding error is in the positive direction).*/
-uint32_t od_ec_tell_frac(uint32_t nbits_total, uint32_t rng) {
- uint32_t nbits;
- int l;
- int i;
- /*To handle the non-integral number of bits still left in the encoder/decoder
- state, we compute the worst-case number of bits of val that must be
- encoded to ensure that the value is inside the range for any possible
- subsequent bits.
- The computation here is independent of val itself (the decoder does not
- even track that value), even though the real number of bits used after
- od_ec_enc_done() may be 1 smaller if rng is a power of two and the
- corresponding trailing bits of val are all zeros.
- If we did try to track that special case, then coding a value with a
- probability of 1/(1 << n) might sometimes appear to use more than n bits.
- This may help explain the surprising result that a newly initialized
- encoder or decoder claims to have used 1 bit.*/
- nbits = nbits_total << OD_BITRES;
- l = 0;
- for (i = OD_BITRES; i-- > 0;) {
- int b;
- rng = rng * rng >> 15;
- b = (int)(rng >> 16);
- l = l << 1 | b;
- rng >>= b;
- }
- return nbits - l;
-}
diff --git a/third_party/aom/aom_dsp/entcode.h b/third_party/aom/aom_dsp/entcode.h
deleted file mode 100644
index 7ba2b1c39..000000000
--- a/third_party/aom/aom_dsp/entcode.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_ENTCODE_H_
-#define AOM_AOM_DSP_ENTCODE_H_
-
-#include <limits.h>
-#include <stddef.h>
-#include "av1/common/odintrin.h"
-#include "aom_dsp/prob.h"
-
-#define EC_PROB_SHIFT 6
-#define EC_MIN_PROB 4 // must be <= (1<<EC_PROB_SHIFT)/16
-
-/*OPT: od_ec_window must be at least 32 bits, but if you have fast arithmetic
- on a larger type, you can speed up the decoder by using it here.*/
-typedef uint32_t od_ec_window;
-
-#define OD_EC_WINDOW_SIZE ((int)sizeof(od_ec_window) * CHAR_BIT)
-
-/*The resolution of fractional-precision bit usage measurements, i.e.,
- 3 => 1/8th bits.*/
-#define OD_BITRES (3)
-
-#define OD_ICDF AOM_ICDF
-
-/*See entcode.c for further documentation.*/
-
-OD_WARN_UNUSED_RESULT uint32_t od_ec_tell_frac(uint32_t nbits_total,
- uint32_t rng);
-
-#endif // AOM_AOM_DSP_ENTCODE_H_
diff --git a/third_party/aom/aom_dsp/entdec.c b/third_party/aom/aom_dsp/entdec.c
deleted file mode 100644
index d1764c47b..000000000
--- a/third_party/aom/aom_dsp/entdec.c
+++ /dev/null
@@ -1,229 +0,0 @@
-/*
- * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include "aom_dsp/entdec.h"
-#include "aom_dsp/prob.h"
-
-/*A range decoder.
- This is an entropy decoder based upon \cite{Mar79}, which is itself a
- rediscovery of the FIFO arithmetic code introduced by \cite{Pas76}.
- It is very similar to arithmetic encoding, except that encoding is done with
- digits in any base, instead of with bits, and so it is faster when using
- larger bases (i.e.: a byte).
- The author claims an average waste of $\frac{1}{2}\log_b(2b)$ bits, where $b$
- is the base, longer than the theoretical optimum, but to my knowledge there
- is no published justification for this claim.
- This only seems true when using near-infinite precision arithmetic so that
- the process is carried out with no rounding errors.
-
- An excellent description of implementation details is available at
- http://www.arturocampos.com/ac_range.html
- A recent work \cite{MNW98} which proposes several changes to arithmetic
- encoding for efficiency actually re-discovers many of the principles
- behind range encoding, and presents a good theoretical analysis of them.
-
- End of stream is handled by writing out the smallest number of bits that
- ensures that the stream will be correctly decoded regardless of the value of
- any subsequent bits.
- od_ec_dec_tell() can be used to determine how many bits were needed to decode
- all the symbols thus far; other data can be packed in the remaining bits of
- the input buffer.
- @PHDTHESIS{Pas76,
- author="Richard Clark Pasco",
- title="Source coding algorithms for fast data compression",
- school="Dept. of Electrical Engineering, Stanford University",
- address="Stanford, CA",
- month=May,
- year=1976,
- URL="http://www.richpasco.org/scaffdc.pdf"
- }
- @INPROCEEDINGS{Mar79,
- author="Martin, G.N.N.",
- title="Range encoding: an algorithm for removing redundancy from a digitised
- message",
- booktitle="Video & Data Recording Conference",
- year=1979,
- address="Southampton",
- month=Jul,
- URL="http://www.compressconsult.com/rangecoder/rngcod.pdf.gz"
- }
- @ARTICLE{MNW98,
- author="Alistair Moffat and Radford Neal and Ian H. Witten",
- title="Arithmetic Coding Revisited",
- journal="{ACM} Transactions on Information Systems",
- year=1998,
- volume=16,
- number=3,
- pages="256--294",
- month=Jul,
- URL="http://researchcommons.waikato.ac.nz/bitstream/handle/10289/78/content.pdf"
- }*/
-
-/*This is meant to be a large, positive constant that can still be efficiently
- loaded as an immediate (on platforms like ARM, for example).
- Even relatively modest values like 100 would work fine.*/
-#define OD_EC_LOTS_OF_BITS (0x4000)
-
-/*The return value of od_ec_dec_tell does not change across an od_ec_dec_refill
- call.*/
-static void od_ec_dec_refill(od_ec_dec *dec) {
- int s;
- od_ec_window dif;
- int16_t cnt;
- const unsigned char *bptr;
- const unsigned char *end;
- dif = dec->dif;
- cnt = dec->cnt;
- bptr = dec->bptr;
- end = dec->end;
- s = OD_EC_WINDOW_SIZE - 9 - (cnt + 15);
- for (; s >= 0 && bptr < end; s -= 8, bptr++) {
- assert(s <= OD_EC_WINDOW_SIZE - 8);
- dif ^= (od_ec_window)bptr[0] << s;
- cnt += 8;
- }
- if (bptr >= end) {
- dec->tell_offs += OD_EC_LOTS_OF_BITS - cnt;
- cnt = OD_EC_LOTS_OF_BITS;
- }
- dec->dif = dif;
- dec->cnt = cnt;
- dec->bptr = bptr;
-}
-
-/*Takes updated dif and range values, renormalizes them so that
- 32768 <= rng < 65536 (reading more bytes from the stream into dif if
- necessary), and stores them back in the decoder context.
- dif: The new value of dif.
- rng: The new value of the range.
- ret: The value to return.
- Return: ret.
- This allows the compiler to jump to this function via a tail-call.*/
-static int od_ec_dec_normalize(od_ec_dec *dec, od_ec_window dif, unsigned rng,
- int ret) {
- int d;
- assert(rng <= 65535U);
- // The number of leading zeros in the 16-bit binary representation of rng.
- d = 16 - OD_ILOG_NZ(rng);
- dec->cnt -= d;
- /*This is equivalent to shifting in 1's instead of 0's.*/
- dec->dif = ((dif + 1) << d) - 1;
- dec->rng = rng << d;
- if (dec->cnt < 0) od_ec_dec_refill(dec);
- return ret;
-}
-
-/*Initializes the decoder.
- buf: The input buffer to use.
- Return: 0 on success, or a negative value on error.*/
-void od_ec_dec_init(od_ec_dec *dec, const unsigned char *buf,
- uint32_t storage) {
- dec->buf = buf;
- dec->tell_offs = 10 - (OD_EC_WINDOW_SIZE - 8);
- dec->end = buf + storage;
- dec->bptr = buf;
- dec->dif = ((od_ec_window)1 << (OD_EC_WINDOW_SIZE - 1)) - 1;
- dec->rng = 0x8000;
- dec->cnt = -15;
- dec->error = 0;
- od_ec_dec_refill(dec);
-}
-
-/*Decode a single binary value.
- f: The probability that the bit is one, scaled by 32768.
- Return: The value decoded (0 or 1).*/
-int od_ec_decode_bool_q15(od_ec_dec *dec, unsigned f) {
- od_ec_window dif;
- od_ec_window vw;
- unsigned r;
- unsigned r_new;
- unsigned v;
- int ret;
- assert(0 < f);
- assert(f < 32768U);
- dif = dec->dif;
- r = dec->rng;
- assert(dif >> (OD_EC_WINDOW_SIZE - 16) < r);
- assert(32768U <= r);
- v = ((r >> 8) * (uint32_t)(f >> EC_PROB_SHIFT) >> (7 - EC_PROB_SHIFT));
- v += EC_MIN_PROB;
- vw = (od_ec_window)v << (OD_EC_WINDOW_SIZE - 16);
- ret = 1;
- r_new = v;
- if (dif >= vw) {
- r_new = r - v;
- dif -= vw;
- ret = 0;
- }
- return od_ec_dec_normalize(dec, dif, r_new, ret);
-}
-
-/*Decodes a symbol given an inverse cumulative distribution function (CDF)
- table in Q15.
- icdf: CDF_PROB_TOP minus the CDF, such that symbol s falls in the range
- [s > 0 ? (CDF_PROB_TOP - icdf[s - 1]) : 0, CDF_PROB_TOP - icdf[s]).
- The values must be monotonically non-increasing, and icdf[nsyms - 1]
- must be 0.
- nsyms: The number of symbols in the alphabet.
- This should be at most 16.
- Return: The decoded symbol s.*/
-int od_ec_decode_cdf_q15(od_ec_dec *dec, const uint16_t *icdf, int nsyms) {
- od_ec_window dif;
- unsigned r;
- unsigned c;
- unsigned u;
- unsigned v;
- int ret;
- (void)nsyms;
- dif = dec->dif;
- r = dec->rng;
- const int N = nsyms - 1;
-
- assert(dif >> (OD_EC_WINDOW_SIZE - 16) < r);
- assert(icdf[nsyms - 1] == OD_ICDF(CDF_PROB_TOP));
- assert(32768U <= r);
- assert(7 - EC_PROB_SHIFT - CDF_SHIFT >= 0);
- c = (unsigned)(dif >> (OD_EC_WINDOW_SIZE - 16));
- v = r;
- ret = -1;
- do {
- u = v;
- v = ((r >> 8) * (uint32_t)(icdf[++ret] >> EC_PROB_SHIFT) >>
- (7 - EC_PROB_SHIFT - CDF_SHIFT));
- v += EC_MIN_PROB * (N - ret);
- } while (c < v);
- assert(v < u);
- assert(u <= r);
- r = u - v;
- dif -= (od_ec_window)v << (OD_EC_WINDOW_SIZE - 16);
- return od_ec_dec_normalize(dec, dif, r, ret);
-}
-
-/*Returns the number of bits "used" by the decoded symbols so far.
- This same number can be computed in either the encoder or the decoder, and is
- suitable for making coding decisions.
- Return: The number of bits.
- This will always be slightly larger than the exact value (e.g., all
- rounding error is in the positive direction).*/
-int od_ec_dec_tell(const od_ec_dec *dec) {
- return (int)((dec->bptr - dec->buf) * 8 - dec->cnt + dec->tell_offs);
-}
-
-/*Returns the number of bits "used" by the decoded symbols so far.
- This same number can be computed in either the encoder or the decoder, and is
- suitable for making coding decisions.
- Return: The number of bits scaled by 2**OD_BITRES.
- This will always be slightly larger than the exact value (e.g., all
- rounding error is in the positive direction).*/
-uint32_t od_ec_dec_tell_frac(const od_ec_dec *dec) {
- return od_ec_tell_frac(od_ec_dec_tell(dec), dec->rng);
-}
diff --git a/third_party/aom/aom_dsp/entdec.h b/third_party/aom/aom_dsp/entdec.h
deleted file mode 100644
index 283bf1831..000000000
--- a/third_party/aom/aom_dsp/entdec.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_ENTDEC_H_
-#define AOM_AOM_DSP_ENTDEC_H_
-#include <limits.h>
-#include "aom_dsp/entcode.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct od_ec_dec od_ec_dec;
-
-#if defined(OD_ACCOUNTING) && OD_ACCOUNTING
-#define OD_ACC_STR , char *acc_str
-#define od_ec_dec_bits(dec, ftb, str) od_ec_dec_bits_(dec, ftb, str)
-#else
-#define OD_ACC_STR
-#define od_ec_dec_bits(dec, ftb, str) od_ec_dec_bits_(dec, ftb)
-#endif
-
-/*The entropy decoder context.*/
-struct od_ec_dec {
- /*The start of the current input buffer.*/
- const unsigned char *buf;
- /*An offset used to keep track of tell after reaching the end of the stream.
- This is constant throughout most of the decoding process, but becomes
- important once we hit the end of the buffer and stop incrementing pointers
- (and instead pretend cnt has lots of bits).*/
- int32_t tell_offs;
- /*The end of the current input buffer.*/
- const unsigned char *end;
- /*The read pointer for the entropy-coded bits.*/
- const unsigned char *bptr;
- /*The difference between the high end of the current range, (low + rng), and
- the coded value, minus 1.
- This stores up to OD_EC_WINDOW_SIZE bits of that difference, but the
- decoder only uses the top 16 bits of the window to decode the next symbol.
- As we shift up during renormalization, if we don't have enough bits left in
- the window to fill the top 16, we'll read in more bits of the coded
- value.*/
- od_ec_window dif;
- /*The number of values in the current range.*/
- uint16_t rng;
- /*The number of bits of data in the current value.*/
- int16_t cnt;
- /*Nonzero if an error occurred.*/
- int error;
-};
-
-/*See entdec.c for further documentation.*/
-
-void od_ec_dec_init(od_ec_dec *dec, const unsigned char *buf, uint32_t storage)
- OD_ARG_NONNULL(1) OD_ARG_NONNULL(2);
-
-OD_WARN_UNUSED_RESULT int od_ec_decode_bool_q15(od_ec_dec *dec, unsigned f)
- OD_ARG_NONNULL(1);
-OD_WARN_UNUSED_RESULT int od_ec_decode_cdf_q15(od_ec_dec *dec,
- const uint16_t *cdf, int nsyms)
- OD_ARG_NONNULL(1) OD_ARG_NONNULL(2);
-
-OD_WARN_UNUSED_RESULT uint32_t od_ec_dec_bits_(od_ec_dec *dec, unsigned ftb)
- OD_ARG_NONNULL(1);
-
-OD_WARN_UNUSED_RESULT int od_ec_dec_tell(const od_ec_dec *dec)
- OD_ARG_NONNULL(1);
-OD_WARN_UNUSED_RESULT uint32_t od_ec_dec_tell_frac(const od_ec_dec *dec)
- OD_ARG_NONNULL(1);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AOM_DSP_ENTDEC_H_
diff --git a/third_party/aom/aom_dsp/entenc.c b/third_party/aom/aom_dsp/entenc.c
deleted file mode 100644
index a61da263c..000000000
--- a/third_party/aom/aom_dsp/entenc.c
+++ /dev/null
@@ -1,423 +0,0 @@
-/*
- * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <stdlib.h>
-#include <string.h>
-#include <math.h>
-#include <assert.h>
-#include "aom_dsp/entenc.h"
-#include "aom_dsp/prob.h"
-
-#if OD_MEASURE_EC_OVERHEAD
-#if !defined(M_LOG2E)
-#define M_LOG2E (1.4426950408889634073599246810019)
-#endif
-#define OD_LOG2(x) (M_LOG2E * log(x))
-#endif // OD_MEASURE_EC_OVERHEAD
-
-/*A range encoder.
- See entdec.c and the references for implementation details \cite{Mar79,MNW98}.
-
- @INPROCEEDINGS{Mar79,
- author="Martin, G.N.N.",
- title="Range encoding: an algorithm for removing redundancy from a digitised
- message",
- booktitle="Video \& Data Recording Conference",
- year=1979,
- address="Southampton",
- month=Jul,
- URL="http://www.compressconsult.com/rangecoder/rngcod.pdf.gz"
- }
- @ARTICLE{MNW98,
- author="Alistair Moffat and Radford Neal and Ian H. Witten",
- title="Arithmetic Coding Revisited",
- journal="{ACM} Transactions on Information Systems",
- year=1998,
- volume=16,
- number=3,
- pages="256--294",
- month=Jul,
- URL="http://researchcommons.waikato.ac.nz/bitstream/handle/10289/78/content.pdf"
- }*/
-
-/*Takes updated low and range values, renormalizes them so that
- 32768 <= rng < 65536 (flushing bytes from low to the pre-carry buffer if
- necessary), and stores them back in the encoder context.
- low: The new value of low.
- rng: The new value of the range.*/
-static void od_ec_enc_normalize(od_ec_enc *enc, od_ec_window low,
- unsigned rng) {
- int d;
- int c;
- int s;
- c = enc->cnt;
- assert(rng <= 65535U);
- // The number of leading zeros in the 16-bit binary representation of rng.
- d = 16 - OD_ILOG_NZ(rng);
- s = c + d;
- /*TODO: Right now we flush every time we have at least one byte available.
- Instead we should use an od_ec_window and flush right before we're about to
- shift bits off the end of the window.
- For a 32-bit window this is about the same amount of work, but for a 64-bit
- window it should be a fair win.*/
- if (s >= 0) {
- uint16_t *buf;
- uint32_t storage;
- uint32_t offs;
- unsigned m;
- buf = enc->precarry_buf;
- storage = enc->precarry_storage;
- offs = enc->offs;
- if (offs + 2 > storage) {
- storage = 2 * storage + 2;
- buf = (uint16_t *)realloc(buf, sizeof(*buf) * storage);
- if (buf == NULL) {
- enc->error = -1;
- enc->offs = 0;
- return;
- }
- enc->precarry_buf = buf;
- enc->precarry_storage = storage;
- }
- c += 16;
- m = (1 << c) - 1;
- if (s >= 8) {
- assert(offs < storage);
- buf[offs++] = (uint16_t)(low >> c);
- low &= m;
- c -= 8;
- m >>= 8;
- }
- assert(offs < storage);
- buf[offs++] = (uint16_t)(low >> c);
- s = c + d - 24;
- low &= m;
- enc->offs = offs;
- }
- enc->low = low << d;
- enc->rng = rng << d;
- enc->cnt = s;
-}
-
-/*Initializes the encoder.
- size: The initial size of the buffer, in bytes.*/
-void od_ec_enc_init(od_ec_enc *enc, uint32_t size) {
- od_ec_enc_reset(enc);
- enc->buf = (unsigned char *)malloc(sizeof(*enc->buf) * size);
- enc->storage = size;
- if (size > 0 && enc->buf == NULL) {
- enc->storage = 0;
- enc->error = -1;
- }
- enc->precarry_buf = (uint16_t *)malloc(sizeof(*enc->precarry_buf) * size);
- enc->precarry_storage = size;
- if (size > 0 && enc->precarry_buf == NULL) {
- enc->precarry_storage = 0;
- enc->error = -1;
- }
-}
-
-/*Reinitializes the encoder.*/
-void od_ec_enc_reset(od_ec_enc *enc) {
- enc->offs = 0;
- enc->low = 0;
- enc->rng = 0x8000;
- /*This is initialized to -9 so that it crosses zero after we've accumulated
- one byte + one carry bit.*/
- enc->cnt = -9;
- enc->error = 0;
-#if OD_MEASURE_EC_OVERHEAD
- enc->entropy = 0;
- enc->nb_symbols = 0;
-#endif
-}
-
-/*Frees the buffers used by the encoder.*/
-void od_ec_enc_clear(od_ec_enc *enc) {
- free(enc->precarry_buf);
- free(enc->buf);
-}
-
-/*Encodes a symbol given its frequency in Q15.
- fl: CDF_PROB_TOP minus the cumulative frequency of all symbols that come
- before the
- one to be encoded.
- fh: CDF_PROB_TOP minus the cumulative frequency of all symbols up to and
- including
- the one to be encoded.*/
-static void od_ec_encode_q15(od_ec_enc *enc, unsigned fl, unsigned fh, int s,
- int nsyms) {
- od_ec_window l;
- unsigned r;
- unsigned u;
- unsigned v;
- l = enc->low;
- r = enc->rng;
- assert(32768U <= r);
- assert(fh <= fl);
- assert(fl <= 32768U);
- assert(7 - EC_PROB_SHIFT - CDF_SHIFT >= 0);
- const int N = nsyms - 1;
- if (fl < CDF_PROB_TOP) {
- u = ((r >> 8) * (uint32_t)(fl >> EC_PROB_SHIFT) >>
- (7 - EC_PROB_SHIFT - CDF_SHIFT)) +
- EC_MIN_PROB * (N - (s - 1));
- v = ((r >> 8) * (uint32_t)(fh >> EC_PROB_SHIFT) >>
- (7 - EC_PROB_SHIFT - CDF_SHIFT)) +
- EC_MIN_PROB * (N - (s + 0));
- l += r - u;
- r = u - v;
- } else {
- r -= ((r >> 8) * (uint32_t)(fh >> EC_PROB_SHIFT) >>
- (7 - EC_PROB_SHIFT - CDF_SHIFT)) +
- EC_MIN_PROB * (N - (s + 0));
- }
- od_ec_enc_normalize(enc, l, r);
-#if OD_MEASURE_EC_OVERHEAD
- enc->entropy -= OD_LOG2((double)(OD_ICDF(fh) - OD_ICDF(fl)) / CDF_PROB_TOP.);
- enc->nb_symbols++;
-#endif
-}
-
-/*Encode a single binary value.
- val: The value to encode (0 or 1).
- f: The probability that the val is one, scaled by 32768.*/
-void od_ec_encode_bool_q15(od_ec_enc *enc, int val, unsigned f) {
- od_ec_window l;
- unsigned r;
- unsigned v;
- assert(0 < f);
- assert(f < 32768U);
- l = enc->low;
- r = enc->rng;
- assert(32768U <= r);
- v = ((r >> 8) * (uint32_t)(f >> EC_PROB_SHIFT) >> (7 - EC_PROB_SHIFT));
- v += EC_MIN_PROB;
- if (val) l += r - v;
- r = val ? v : r - v;
- od_ec_enc_normalize(enc, l, r);
-#if OD_MEASURE_EC_OVERHEAD
- enc->entropy -= OD_LOG2((double)(val ? f : (32768 - f)) / 32768.);
- enc->nb_symbols++;
-#endif
-}
-
-/*Encodes a symbol given a cumulative distribution function (CDF) table in Q15.
- s: The index of the symbol to encode.
- icdf: 32768 minus the CDF, such that symbol s falls in the range
- [s > 0 ? (32768 - icdf[s - 1]) : 0, 32768 - icdf[s]).
- The values must be monotonically decreasing, and icdf[nsyms - 1] must
- be 0.
- nsyms: The number of symbols in the alphabet.
- This should be at most 16.*/
-void od_ec_encode_cdf_q15(od_ec_enc *enc, int s, const uint16_t *icdf,
- int nsyms) {
- (void)nsyms;
- assert(s >= 0);
- assert(s < nsyms);
- assert(icdf[nsyms - 1] == OD_ICDF(CDF_PROB_TOP));
- od_ec_encode_q15(enc, s > 0 ? icdf[s - 1] : OD_ICDF(0), icdf[s], s, nsyms);
-}
-
-/*Overwrites a few bits at the very start of an existing stream, after they
- have already been encoded.
- This makes it possible to have a few flags up front, where it is easy for
- decoders to access them without parsing the whole stream, even if their
- values are not determined until late in the encoding process, without having
- to buffer all the intermediate symbols in the encoder.
- In order for this to work, at least nbits bits must have already been encoded
- using probabilities that are an exact power of two.
- The encoder can verify the number of encoded bits is sufficient, but cannot
- check this latter condition.
- val: The bits to encode (in the least nbits significant bits).
- They will be decoded in order from most-significant to least.
- nbits: The number of bits to overwrite.
- This must be no more than 8.*/
-void od_ec_enc_patch_initial_bits(od_ec_enc *enc, unsigned val, int nbits) {
- int shift;
- unsigned mask;
- assert(nbits >= 0);
- assert(nbits <= 8);
- assert(val < 1U << nbits);
- shift = 8 - nbits;
- mask = ((1U << nbits) - 1) << shift;
- if (enc->offs > 0) {
- /*The first byte has been finalized.*/
- enc->precarry_buf[0] =
- (uint16_t)((enc->precarry_buf[0] & ~mask) | val << shift);
- } else if (9 + enc->cnt + (enc->rng == 0x8000) > nbits) {
- /*The first byte has yet to be output.*/
- enc->low = (enc->low & ~((od_ec_window)mask << (16 + enc->cnt))) |
- (od_ec_window)val << (16 + enc->cnt + shift);
- } else {
- /*The encoder hasn't even encoded _nbits of data yet.*/
- enc->error = -1;
- }
-}
-
-#if OD_MEASURE_EC_OVERHEAD
-#include <stdio.h>
-#endif
-
-/*Indicates that there are no more symbols to encode.
- All remaining output bytes are flushed to the output buffer.
- od_ec_enc_reset() should be called before using the encoder again.
- bytes: Returns the size of the encoded data in the returned buffer.
- Return: A pointer to the start of the final buffer, or NULL if there was an
- encoding error.*/
-unsigned char *od_ec_enc_done(od_ec_enc *enc, uint32_t *nbytes) {
- unsigned char *out;
- uint32_t storage;
- uint16_t *buf;
- uint32_t offs;
- od_ec_window m;
- od_ec_window e;
- od_ec_window l;
- int c;
- int s;
- if (enc->error) return NULL;
-#if OD_MEASURE_EC_OVERHEAD
- {
- uint32_t tell;
- /* Don't count the 1 bit we lose to raw bits as overhead. */
- tell = od_ec_enc_tell(enc) - 1;
- fprintf(stderr, "overhead: %f%%\n",
- 100 * (tell - enc->entropy) / enc->entropy);
- fprintf(stderr, "efficiency: %f bits/symbol\n",
- (double)tell / enc->nb_symbols);
- }
-#endif
- /*We output the minimum number of bits that ensures that the symbols encoded
- thus far will be decoded correctly regardless of the bits that follow.*/
- l = enc->low;
- c = enc->cnt;
- s = 10;
- m = 0x3FFF;
- e = ((l + m) & ~m) | (m + 1);
- s += c;
- offs = enc->offs;
- buf = enc->precarry_buf;
- if (s > 0) {
- unsigned n;
- storage = enc->precarry_storage;
- if (offs + ((s + 7) >> 3) > storage) {
- storage = storage * 2 + ((s + 7) >> 3);
- buf = (uint16_t *)realloc(buf, sizeof(*buf) * storage);
- if (buf == NULL) {
- enc->error = -1;
- return NULL;
- }
- enc->precarry_buf = buf;
- enc->precarry_storage = storage;
- }
- n = (1 << (c + 16)) - 1;
- do {
- assert(offs < storage);
- buf[offs++] = (uint16_t)(e >> (c + 16));
- e &= n;
- s -= 8;
- c -= 8;
- n >>= 8;
- } while (s > 0);
- }
- /*Make sure there's enough room for the entropy-coded bits.*/
- out = enc->buf;
- storage = enc->storage;
- c = OD_MAXI((s + 7) >> 3, 0);
- if (offs + c > storage) {
- storage = offs + c;
- out = (unsigned char *)realloc(out, sizeof(*out) * storage);
- if (out == NULL) {
- enc->error = -1;
- return NULL;
- }
- enc->buf = out;
- enc->storage = storage;
- }
- *nbytes = offs;
- /*Perform carry propagation.*/
- assert(offs <= storage);
- out = out + storage - offs;
- c = 0;
- while (offs > 0) {
- offs--;
- c = buf[offs] + c;
- out[offs] = (unsigned char)c;
- c >>= 8;
- }
- /*Note: Unless there's an allocation error, if you keep encoding into the
- current buffer and call this function again later, everything will work
- just fine (you won't get a new packet out, but you will get a single
- buffer with the new data appended to the old).
- However, this function is O(N) where N is the amount of data coded so far,
- so calling it more than once for a given packet is a bad idea.*/
- return out;
-}
-
-/*Returns the number of bits "used" by the encoded symbols so far.
- This same number can be computed in either the encoder or the decoder, and is
- suitable for making coding decisions.
- Warning: The value returned by this function can decrease compared to an
- earlier call, even after encoding more data, if there is an encoding error
- (i.e., a failure to allocate enough space for the output buffer).
- Return: The number of bits.
- This will always be slightly larger than the exact value (e.g., all
- rounding error is in the positive direction).*/
-int od_ec_enc_tell(const od_ec_enc *enc) {
- /*The 10 here counteracts the offset of -9 baked into cnt, and adds 1 extra
- bit, which we reserve for terminating the stream.*/
- return (enc->cnt + 10) + enc->offs * 8;
-}
-
-/*Returns the number of bits "used" by the encoded symbols so far.
- This same number can be computed in either the encoder or the decoder, and is
- suitable for making coding decisions.
- Warning: The value returned by this function can decrease compared to an
- earlier call, even after encoding more data, if there is an encoding error
- (i.e., a failure to allocate enough space for the output buffer).
- Return: The number of bits scaled by 2**OD_BITRES.
- This will always be slightly larger than the exact value (e.g., all
- rounding error is in the positive direction).*/
-uint32_t od_ec_enc_tell_frac(const od_ec_enc *enc) {
- return od_ec_tell_frac(od_ec_enc_tell(enc), enc->rng);
-}
-
-/*Saves a entropy coder checkpoint to dst.
- This allows an encoder to reverse a series of entropy coder
- decisions if it decides that the information would have been
- better coded some other way.*/
-void od_ec_enc_checkpoint(od_ec_enc *dst, const od_ec_enc *src) {
- OD_COPY(dst, src, 1);
-}
-
-/*Restores an entropy coder checkpoint saved by od_ec_enc_checkpoint.
- This can only be used to restore from checkpoints earlier in the target
- state's history: you can not switch backwards and forwards or otherwise
- switch to a state which isn't a casual ancestor of the current state.
- Restore is also incompatible with patching the initial bits, as the
- changes will remain in the restored version.*/
-void od_ec_enc_rollback(od_ec_enc *dst, const od_ec_enc *src) {
- unsigned char *buf;
- uint32_t storage;
- uint16_t *precarry_buf;
- uint32_t precarry_storage;
- assert(dst->storage >= src->storage);
- assert(dst->precarry_storage >= src->precarry_storage);
- buf = dst->buf;
- storage = dst->storage;
- precarry_buf = dst->precarry_buf;
- precarry_storage = dst->precarry_storage;
- OD_COPY(dst, src, 1);
- dst->buf = buf;
- dst->storage = storage;
- dst->precarry_buf = precarry_buf;
- dst->precarry_storage = precarry_storage;
-}
diff --git a/third_party/aom/aom_dsp/entenc.h b/third_party/aom/aom_dsp/entenc.h
deleted file mode 100644
index 3551d4250..000000000
--- a/third_party/aom/aom_dsp/entenc.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_ENTENC_H_
-#define AOM_AOM_DSP_ENTENC_H_
-#include <stddef.h>
-#include "aom_dsp/entcode.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct od_ec_enc od_ec_enc;
-
-#define OD_MEASURE_EC_OVERHEAD (0)
-
-/*The entropy encoder context.*/
-struct od_ec_enc {
- /*Buffered output.
- This contains only the raw bits until the final call to od_ec_enc_done(),
- where all the arithmetic-coded data gets prepended to it.*/
- unsigned char *buf;
- /*The size of the buffer.*/
- uint32_t storage;
- /*A buffer for output bytes with their associated carry flags.*/
- uint16_t *precarry_buf;
- /*The size of the pre-carry buffer.*/
- uint32_t precarry_storage;
- /*The offset at which the next entropy-coded byte will be written.*/
- uint32_t offs;
- /*The low end of the current range.*/
- od_ec_window low;
- /*The number of values in the current range.*/
- uint16_t rng;
- /*The number of bits of data in the current value.*/
- int16_t cnt;
- /*Nonzero if an error occurred.*/
- int error;
-#if OD_MEASURE_EC_OVERHEAD
- double entropy;
- int nb_symbols;
-#endif
-};
-
-/*See entenc.c for further documentation.*/
-
-void od_ec_enc_init(od_ec_enc *enc, uint32_t size) OD_ARG_NONNULL(1);
-void od_ec_enc_reset(od_ec_enc *enc) OD_ARG_NONNULL(1);
-void od_ec_enc_clear(od_ec_enc *enc) OD_ARG_NONNULL(1);
-
-void od_ec_encode_bool_q15(od_ec_enc *enc, int val, unsigned f_q15)
- OD_ARG_NONNULL(1);
-void od_ec_encode_cdf_q15(od_ec_enc *enc, int s, const uint16_t *cdf, int nsyms)
- OD_ARG_NONNULL(1) OD_ARG_NONNULL(3);
-
-void od_ec_enc_bits(od_ec_enc *enc, uint32_t fl, unsigned ftb)
- OD_ARG_NONNULL(1);
-
-void od_ec_enc_patch_initial_bits(od_ec_enc *enc, unsigned val, int nbits)
- OD_ARG_NONNULL(1);
-OD_WARN_UNUSED_RESULT unsigned char *od_ec_enc_done(od_ec_enc *enc,
- uint32_t *nbytes)
- OD_ARG_NONNULL(1) OD_ARG_NONNULL(2);
-
-OD_WARN_UNUSED_RESULT int od_ec_enc_tell(const od_ec_enc *enc)
- OD_ARG_NONNULL(1);
-OD_WARN_UNUSED_RESULT uint32_t od_ec_enc_tell_frac(const od_ec_enc *enc)
- OD_ARG_NONNULL(1);
-
-void od_ec_enc_checkpoint(od_ec_enc *dst, const od_ec_enc *src);
-void od_ec_enc_rollback(od_ec_enc *dst, const od_ec_enc *src);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AOM_DSP_ENTENC_H_
diff --git a/third_party/aom/aom_dsp/fastssim.c b/third_party/aom/aom_dsp/fastssim.c
deleted file mode 100644
index 3804519b3..000000000
--- a/third_party/aom/aom_dsp/fastssim.c
+++ /dev/null
@@ -1,487 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- *
- * This code was originally written by: Nathan E. Egge, at the Daala
- * project.
- */
-#include <assert.h>
-#include <math.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/ssim.h"
-#include "aom_ports/system_state.h"
-
-typedef struct fs_level fs_level;
-typedef struct fs_ctx fs_ctx;
-
-#define SSIM_C1 (255 * 255 * 0.01 * 0.01)
-#define SSIM_C2 (255 * 255 * 0.03 * 0.03)
-#define SSIM_C1_10 (1023 * 1023 * 0.01 * 0.01)
-#define SSIM_C1_12 (4095 * 4095 * 0.01 * 0.01)
-#define SSIM_C2_10 (1023 * 1023 * 0.03 * 0.03)
-#define SSIM_C2_12 (4095 * 4095 * 0.03 * 0.03)
-
-#define FS_MINI(_a, _b) ((_a) < (_b) ? (_a) : (_b))
-#define FS_MAXI(_a, _b) ((_a) > (_b) ? (_a) : (_b))
-
-struct fs_level {
- uint32_t *im1;
- uint32_t *im2;
- double *ssim;
- int w;
- int h;
-};
-
-struct fs_ctx {
- fs_level *level;
- int nlevels;
- unsigned *col_buf;
-};
-
-static void fs_ctx_init(fs_ctx *_ctx, int _w, int _h, int _nlevels) {
- unsigned char *data;
- size_t data_size;
- int lw;
- int lh;
- int l;
- lw = (_w + 1) >> 1;
- lh = (_h + 1) >> 1;
- data_size =
- _nlevels * sizeof(fs_level) + 2 * (lw + 8) * 8 * sizeof(*_ctx->col_buf);
- for (l = 0; l < _nlevels; l++) {
- size_t im_size;
- size_t level_size;
- im_size = lw * (size_t)lh;
- level_size = 2 * im_size * sizeof(*_ctx->level[l].im1);
- level_size += sizeof(*_ctx->level[l].ssim) - 1;
- level_size /= sizeof(*_ctx->level[l].ssim);
- level_size += im_size;
- level_size *= sizeof(*_ctx->level[l].ssim);
- data_size += level_size;
- lw = (lw + 1) >> 1;
- lh = (lh + 1) >> 1;
- }
- data = (unsigned char *)malloc(data_size);
- _ctx->level = (fs_level *)data;
- _ctx->nlevels = _nlevels;
- data += _nlevels * sizeof(*_ctx->level);
- lw = (_w + 1) >> 1;
- lh = (_h + 1) >> 1;
- for (l = 0; l < _nlevels; l++) {
- size_t im_size;
- size_t level_size;
- _ctx->level[l].w = lw;
- _ctx->level[l].h = lh;
- im_size = lw * (size_t)lh;
- level_size = 2 * im_size * sizeof(*_ctx->level[l].im1);
- level_size += sizeof(*_ctx->level[l].ssim) - 1;
- level_size /= sizeof(*_ctx->level[l].ssim);
- level_size *= sizeof(*_ctx->level[l].ssim);
- _ctx->level[l].im1 = (uint32_t *)data;
- _ctx->level[l].im2 = _ctx->level[l].im1 + im_size;
- data += level_size;
- _ctx->level[l].ssim = (double *)data;
- data += im_size * sizeof(*_ctx->level[l].ssim);
- lw = (lw + 1) >> 1;
- lh = (lh + 1) >> 1;
- }
- _ctx->col_buf = (unsigned *)data;
-}
-
-static void fs_ctx_clear(fs_ctx *_ctx) { free(_ctx->level); }
-
-static void fs_downsample_level(fs_ctx *_ctx, int _l) {
- const uint32_t *src1;
- const uint32_t *src2;
- uint32_t *dst1;
- uint32_t *dst2;
- int w2;
- int h2;
- int w;
- int h;
- int i;
- int j;
- w = _ctx->level[_l].w;
- h = _ctx->level[_l].h;
- dst1 = _ctx->level[_l].im1;
- dst2 = _ctx->level[_l].im2;
- w2 = _ctx->level[_l - 1].w;
- h2 = _ctx->level[_l - 1].h;
- src1 = _ctx->level[_l - 1].im1;
- src2 = _ctx->level[_l - 1].im2;
- for (j = 0; j < h; j++) {
- int j0offs;
- int j1offs;
- j0offs = 2 * j * w2;
- j1offs = FS_MINI(2 * j + 1, h2) * w2;
- for (i = 0; i < w; i++) {
- int i0;
- int i1;
- i0 = 2 * i;
- i1 = FS_MINI(i0 + 1, w2);
- dst1[j * w + i] = src1[j0offs + i0] + src1[j0offs + i1] +
- src1[j1offs + i0] + src1[j1offs + i1];
- dst2[j * w + i] = src2[j0offs + i0] + src2[j0offs + i1] +
- src2[j1offs + i0] + src2[j1offs + i1];
- }
- }
-}
-
-static void fs_downsample_level0(fs_ctx *_ctx, const uint8_t *_src1,
- int _s1ystride, const uint8_t *_src2,
- int _s2ystride, int _w, int _h, uint32_t shift,
- int buf_is_hbd) {
- uint32_t *dst1;
- uint32_t *dst2;
- int w;
- int h;
- int i;
- int j;
- w = _ctx->level[0].w;
- h = _ctx->level[0].h;
- dst1 = _ctx->level[0].im1;
- dst2 = _ctx->level[0].im2;
- for (j = 0; j < h; j++) {
- int j0;
- int j1;
- j0 = 2 * j;
- j1 = FS_MINI(j0 + 1, _h);
- for (i = 0; i < w; i++) {
- int i0;
- int i1;
- i0 = 2 * i;
- i1 = FS_MINI(i0 + 1, _w);
- if (!buf_is_hbd) {
- dst1[j * w + i] =
- _src1[j0 * _s1ystride + i0] + _src1[j0 * _s1ystride + i1] +
- _src1[j1 * _s1ystride + i0] + _src1[j1 * _s1ystride + i1];
- dst2[j * w + i] =
- _src2[j0 * _s2ystride + i0] + _src2[j0 * _s2ystride + i1] +
- _src2[j1 * _s2ystride + i0] + _src2[j1 * _s2ystride + i1];
- } else {
- uint16_t *src1s = CONVERT_TO_SHORTPTR(_src1);
- uint16_t *src2s = CONVERT_TO_SHORTPTR(_src2);
- dst1[j * w + i] = (src1s[j0 * _s1ystride + i0] >> shift) +
- (src1s[j0 * _s1ystride + i1] >> shift) +
- (src1s[j1 * _s1ystride + i0] >> shift) +
- (src1s[j1 * _s1ystride + i1] >> shift);
- dst2[j * w + i] = (src2s[j0 * _s2ystride + i0] >> shift) +
- (src2s[j0 * _s2ystride + i1] >> shift) +
- (src2s[j1 * _s2ystride + i0] >> shift) +
- (src2s[j1 * _s2ystride + i1] >> shift);
- }
- }
- }
-}
-
-static void fs_apply_luminance(fs_ctx *_ctx, int _l, int bit_depth) {
- unsigned *col_sums_x;
- unsigned *col_sums_y;
- uint32_t *im1;
- uint32_t *im2;
- double *ssim;
- double c1;
- int w;
- int h;
- int j0offs;
- int j1offs;
- int i;
- int j;
- double ssim_c1 = SSIM_C1;
-
- if (bit_depth == 10) ssim_c1 = SSIM_C1_10;
- if (bit_depth == 12) ssim_c1 = SSIM_C1_12;
-
- w = _ctx->level[_l].w;
- h = _ctx->level[_l].h;
- col_sums_x = _ctx->col_buf;
- col_sums_y = col_sums_x + w;
- im1 = _ctx->level[_l].im1;
- im2 = _ctx->level[_l].im2;
- for (i = 0; i < w; i++) col_sums_x[i] = 5 * im1[i];
- for (i = 0; i < w; i++) col_sums_y[i] = 5 * im2[i];
- for (j = 1; j < 4; j++) {
- j1offs = FS_MINI(j, h - 1) * w;
- for (i = 0; i < w; i++) col_sums_x[i] += im1[j1offs + i];
- for (i = 0; i < w; i++) col_sums_y[i] += im2[j1offs + i];
- }
- ssim = _ctx->level[_l].ssim;
- c1 = (double)(ssim_c1 * 4096 * (1 << 4 * _l));
- for (j = 0; j < h; j++) {
- unsigned mux;
- unsigned muy;
- int i0;
- int i1;
- mux = 5 * col_sums_x[0];
- muy = 5 * col_sums_y[0];
- for (i = 1; i < 4; i++) {
- i1 = FS_MINI(i, w - 1);
- mux += col_sums_x[i1];
- muy += col_sums_y[i1];
- }
- for (i = 0; i < w; i++) {
- ssim[j * w + i] *= (2 * mux * (double)muy + c1) /
- (mux * (double)mux + muy * (double)muy + c1);
- if (i + 1 < w) {
- i0 = FS_MAXI(0, i - 4);
- i1 = FS_MINI(i + 4, w - 1);
- mux += col_sums_x[i1] - col_sums_x[i0];
- muy += col_sums_x[i1] - col_sums_x[i0];
- }
- }
- if (j + 1 < h) {
- j0offs = FS_MAXI(0, j - 4) * w;
- for (i = 0; i < w; i++) col_sums_x[i] -= im1[j0offs + i];
- for (i = 0; i < w; i++) col_sums_y[i] -= im2[j0offs + i];
- j1offs = FS_MINI(j + 4, h - 1) * w;
- for (i = 0; i < w; i++) col_sums_x[i] += im1[j1offs + i];
- for (i = 0; i < w; i++) col_sums_y[i] += im2[j1offs + i];
- }
- }
-}
-
-#define FS_COL_SET(_col, _joffs, _ioffs) \
- do { \
- unsigned gx; \
- unsigned gy; \
- gx = gx_buf[((j + (_joffs)) & 7) * stride + i + (_ioffs)]; \
- gy = gy_buf[((j + (_joffs)) & 7) * stride + i + (_ioffs)]; \
- col_sums_gx2[(_col)] = gx * (double)gx; \
- col_sums_gy2[(_col)] = gy * (double)gy; \
- col_sums_gxgy[(_col)] = gx * (double)gy; \
- } while (0)
-
-#define FS_COL_ADD(_col, _joffs, _ioffs) \
- do { \
- unsigned gx; \
- unsigned gy; \
- gx = gx_buf[((j + (_joffs)) & 7) * stride + i + (_ioffs)]; \
- gy = gy_buf[((j + (_joffs)) & 7) * stride + i + (_ioffs)]; \
- col_sums_gx2[(_col)] += gx * (double)gx; \
- col_sums_gy2[(_col)] += gy * (double)gy; \
- col_sums_gxgy[(_col)] += gx * (double)gy; \
- } while (0)
-
-#define FS_COL_SUB(_col, _joffs, _ioffs) \
- do { \
- unsigned gx; \
- unsigned gy; \
- gx = gx_buf[((j + (_joffs)) & 7) * stride + i + (_ioffs)]; \
- gy = gy_buf[((j + (_joffs)) & 7) * stride + i + (_ioffs)]; \
- col_sums_gx2[(_col)] -= gx * (double)gx; \
- col_sums_gy2[(_col)] -= gy * (double)gy; \
- col_sums_gxgy[(_col)] -= gx * (double)gy; \
- } while (0)
-
-#define FS_COL_COPY(_col1, _col2) \
- do { \
- col_sums_gx2[(_col1)] = col_sums_gx2[(_col2)]; \
- col_sums_gy2[(_col1)] = col_sums_gy2[(_col2)]; \
- col_sums_gxgy[(_col1)] = col_sums_gxgy[(_col2)]; \
- } while (0)
-
-#define FS_COL_HALVE(_col1, _col2) \
- do { \
- col_sums_gx2[(_col1)] = col_sums_gx2[(_col2)] * 0.5; \
- col_sums_gy2[(_col1)] = col_sums_gy2[(_col2)] * 0.5; \
- col_sums_gxgy[(_col1)] = col_sums_gxgy[(_col2)] * 0.5; \
- } while (0)
-
-#define FS_COL_DOUBLE(_col1, _col2) \
- do { \
- col_sums_gx2[(_col1)] = col_sums_gx2[(_col2)] * 2; \
- col_sums_gy2[(_col1)] = col_sums_gy2[(_col2)] * 2; \
- col_sums_gxgy[(_col1)] = col_sums_gxgy[(_col2)] * 2; \
- } while (0)
-
-static void fs_calc_structure(fs_ctx *_ctx, int _l, int bit_depth) {
- uint32_t *im1;
- uint32_t *im2;
- unsigned *gx_buf;
- unsigned *gy_buf;
- double *ssim;
- double col_sums_gx2[8];
- double col_sums_gy2[8];
- double col_sums_gxgy[8];
- double c2;
- int stride;
- int w;
- int h;
- int i;
- int j;
- double ssim_c2 = SSIM_C2;
- if (bit_depth == 10) ssim_c2 = SSIM_C2_10;
- if (bit_depth == 12) ssim_c2 = SSIM_C2_12;
-
- w = _ctx->level[_l].w;
- h = _ctx->level[_l].h;
- im1 = _ctx->level[_l].im1;
- im2 = _ctx->level[_l].im2;
- ssim = _ctx->level[_l].ssim;
- gx_buf = _ctx->col_buf;
- stride = w + 8;
- gy_buf = gx_buf + 8 * stride;
- memset(gx_buf, 0, 2 * 8 * stride * sizeof(*gx_buf));
- c2 = ssim_c2 * (1 << 4 * _l) * 16 * 104;
- for (j = 0; j < h + 4; j++) {
- if (j < h - 1) {
- for (i = 0; i < w - 1; i++) {
- unsigned g1;
- unsigned g2;
- unsigned gx;
- unsigned gy;
- g1 = abs((int)im1[(j + 1) * w + i + 1] - (int)im1[j * w + i]);
- g2 = abs((int)im1[(j + 1) * w + i] - (int)im1[j * w + i + 1]);
- gx = 4 * FS_MAXI(g1, g2) + FS_MINI(g1, g2);
- g1 = abs((int)im2[(j + 1) * w + i + 1] - (int)im2[j * w + i]);
- g2 = abs((int)im2[(j + 1) * w + i] - (int)im2[j * w + i + 1]);
- gy = 4 * FS_MAXI(g1, g2) + FS_MINI(g1, g2);
- gx_buf[(j & 7) * stride + i + 4] = gx;
- gy_buf[(j & 7) * stride + i + 4] = gy;
- }
- } else {
- memset(gx_buf + (j & 7) * stride, 0, stride * sizeof(*gx_buf));
- memset(gy_buf + (j & 7) * stride, 0, stride * sizeof(*gy_buf));
- }
- if (j >= 4) {
- int k;
- col_sums_gx2[3] = col_sums_gx2[2] = col_sums_gx2[1] = col_sums_gx2[0] = 0;
- col_sums_gy2[3] = col_sums_gy2[2] = col_sums_gy2[1] = col_sums_gy2[0] = 0;
- col_sums_gxgy[3] = col_sums_gxgy[2] = col_sums_gxgy[1] =
- col_sums_gxgy[0] = 0;
- for (i = 4; i < 8; i++) {
- FS_COL_SET(i, -1, 0);
- FS_COL_ADD(i, 0, 0);
- for (k = 1; k < 8 - i; k++) {
- FS_COL_DOUBLE(i, i);
- FS_COL_ADD(i, -k - 1, 0);
- FS_COL_ADD(i, k, 0);
- }
- }
- for (i = 0; i < w; i++) {
- double mugx2;
- double mugy2;
- double mugxgy;
- mugx2 = col_sums_gx2[0];
- for (k = 1; k < 8; k++) mugx2 += col_sums_gx2[k];
- mugy2 = col_sums_gy2[0];
- for (k = 1; k < 8; k++) mugy2 += col_sums_gy2[k];
- mugxgy = col_sums_gxgy[0];
- for (k = 1; k < 8; k++) mugxgy += col_sums_gxgy[k];
- ssim[(j - 4) * w + i] = (2 * mugxgy + c2) / (mugx2 + mugy2 + c2);
- if (i + 1 < w) {
- FS_COL_SET(0, -1, 1);
- FS_COL_ADD(0, 0, 1);
- FS_COL_SUB(2, -3, 2);
- FS_COL_SUB(2, 2, 2);
- FS_COL_HALVE(1, 2);
- FS_COL_SUB(3, -4, 3);
- FS_COL_SUB(3, 3, 3);
- FS_COL_HALVE(2, 3);
- FS_COL_COPY(3, 4);
- FS_COL_DOUBLE(4, 5);
- FS_COL_ADD(4, -4, 5);
- FS_COL_ADD(4, 3, 5);
- FS_COL_DOUBLE(5, 6);
- FS_COL_ADD(5, -3, 6);
- FS_COL_ADD(5, 2, 6);
- FS_COL_DOUBLE(6, 7);
- FS_COL_ADD(6, -2, 7);
- FS_COL_ADD(6, 1, 7);
- FS_COL_SET(7, -1, 8);
- FS_COL_ADD(7, 0, 8);
- }
- }
- }
- }
-}
-
-#define FS_NLEVELS (4)
-
-/*These weights were derived from the default weights found in Wang's original
- Matlab implementation: {0.0448, 0.2856, 0.2363, 0.1333}.
- We drop the finest scale and renormalize the rest to sum to 1.*/
-
-static const double FS_WEIGHTS[FS_NLEVELS] = {
- 0.2989654541015625, 0.3141326904296875, 0.2473602294921875, 0.1395416259765625
-};
-
-static double fs_average(fs_ctx *_ctx, int _l) {
- double *ssim;
- double ret;
- int w;
- int h;
- int i;
- int j;
- w = _ctx->level[_l].w;
- h = _ctx->level[_l].h;
- ssim = _ctx->level[_l].ssim;
- ret = 0;
- for (j = 0; j < h; j++)
- for (i = 0; i < w; i++) ret += ssim[j * w + i];
- return pow(ret / (w * h), FS_WEIGHTS[_l]);
-}
-
-static double convert_ssim_db(double _ssim, double _weight) {
- assert(_weight >= _ssim);
- if ((_weight - _ssim) < 1e-10) return MAX_SSIM_DB;
- return 10 * (log10(_weight) - log10(_weight - _ssim));
-}
-
-static double calc_ssim(const uint8_t *_src, int _systride, const uint8_t *_dst,
- int _dystride, int _w, int _h, uint32_t _bd,
- uint32_t _shift, int buf_is_hbd) {
- fs_ctx ctx;
- double ret;
- int l;
- ret = 1;
- fs_ctx_init(&ctx, _w, _h, FS_NLEVELS);
- fs_downsample_level0(&ctx, _src, _systride, _dst, _dystride, _w, _h, _shift,
- buf_is_hbd);
- for (l = 0; l < FS_NLEVELS - 1; l++) {
- fs_calc_structure(&ctx, l, _bd);
- ret *= fs_average(&ctx, l);
- fs_downsample_level(&ctx, l + 1);
- }
- fs_calc_structure(&ctx, l, _bd);
- fs_apply_luminance(&ctx, l, _bd);
- ret *= fs_average(&ctx, l);
- fs_ctx_clear(&ctx);
- return ret;
-}
-
-double aom_calc_fastssim(const YV12_BUFFER_CONFIG *source,
- const YV12_BUFFER_CONFIG *dest, double *ssim_y,
- double *ssim_u, double *ssim_v, uint32_t bd,
- uint32_t in_bd) {
- double ssimv;
- uint32_t bd_shift = 0;
- aom_clear_system_state();
- assert(bd >= in_bd);
- assert(source->flags == dest->flags);
- int buf_is_hbd = source->flags & YV12_FLAG_HIGHBITDEPTH;
- bd_shift = bd - in_bd;
-
- *ssim_y = calc_ssim(source->y_buffer, source->y_stride, dest->y_buffer,
- dest->y_stride, source->y_crop_width,
- source->y_crop_height, in_bd, bd_shift, buf_is_hbd);
- *ssim_u = calc_ssim(source->u_buffer, source->uv_stride, dest->u_buffer,
- dest->uv_stride, source->uv_crop_width,
- source->uv_crop_height, in_bd, bd_shift, buf_is_hbd);
- *ssim_v = calc_ssim(source->v_buffer, source->uv_stride, dest->v_buffer,
- dest->uv_stride, source->uv_crop_width,
- source->uv_crop_height, in_bd, bd_shift, buf_is_hbd);
- ssimv = (*ssim_y) * .8 + .1 * ((*ssim_u) + (*ssim_v));
- return convert_ssim_db(ssimv, 1.0);
-}
diff --git a/third_party/aom/aom_dsp/fft.c b/third_party/aom/aom_dsp/fft.c
deleted file mode 100644
index 0ba71cfb3..000000000
--- a/third_party/aom/aom_dsp/fft.c
+++ /dev/null
@@ -1,219 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/fft_common.h"
-
-static INLINE void simple_transpose(const float *A, float *B, int n) {
- for (int y = 0; y < n; y++) {
- for (int x = 0; x < n; x++) {
- B[y * n + x] = A[x * n + y];
- }
- }
-}
-
-// The 1d transform is real to complex and packs the complex results in
-// a way to take advantage of conjugate symmetry (e.g., the n/2 + 1 real
-// components, followed by the n/2 - 1 imaginary components). After the
-// transform is done on the rows, the first n/2 + 1 columns are real, and
-// the remaining are the imaginary components. After the transform on the
-// columns, the region of [0, n/2]x[0, n/2] contains the real part of
-// fft of the real columns. The real part of the 2d fft also includes the
-// imaginary part of transformed imaginary columns. This function assembles
-// the correct outputs while putting the real and imaginary components
-// next to each other.
-static INLINE void unpack_2d_output(const float *col_fft, float *output,
- int n) {
- for (int y = 0; y <= n / 2; ++y) {
- const int y2 = y + n / 2;
- const int y_extra = y2 > n / 2 && y2 < n;
-
- for (int x = 0; x <= n / 2; ++x) {
- const int x2 = x + n / 2;
- const int x_extra = x2 > n / 2 && x2 < n;
- output[2 * (y * n + x)] =
- col_fft[y * n + x] - (x_extra && y_extra ? col_fft[y2 * n + x2] : 0);
- output[2 * (y * n + x) + 1] = (y_extra ? col_fft[y2 * n + x] : 0) +
- (x_extra ? col_fft[y * n + x2] : 0);
- if (y_extra) {
- output[2 * ((n - y) * n + x)] =
- col_fft[y * n + x] +
- (x_extra && y_extra ? col_fft[y2 * n + x2] : 0);
- output[2 * ((n - y) * n + x) + 1] =
- -(y_extra ? col_fft[y2 * n + x] : 0) +
- (x_extra ? col_fft[y * n + x2] : 0);
- }
- }
- }
-}
-
-void aom_fft_2d_gen(const float *input, float *temp, float *output, int n,
- aom_fft_1d_func_t tform, aom_fft_transpose_func_t transpose,
- aom_fft_unpack_func_t unpack, int vec_size) {
- for (int x = 0; x < n; x += vec_size) {
- tform(input + x, output + x, n);
- }
- transpose(output, temp, n);
-
- for (int x = 0; x < n; x += vec_size) {
- tform(temp + x, output + x, n);
- }
- transpose(output, temp, n);
-
- unpack(temp, output, n);
-}
-
-static INLINE void store_float(float *output, float input) { *output = input; }
-static INLINE float add_float(float a, float b) { return a + b; }
-static INLINE float sub_float(float a, float b) { return a - b; }
-static INLINE float mul_float(float a, float b) { return a * b; }
-
-GEN_FFT_2(void, float, float, float, *, store_float);
-GEN_FFT_4(void, float, float, float, *, store_float, (float), add_float,
- sub_float);
-GEN_FFT_8(void, float, float, float, *, store_float, (float), add_float,
- sub_float, mul_float);
-GEN_FFT_16(void, float, float, float, *, store_float, (float), add_float,
- sub_float, mul_float);
-GEN_FFT_32(void, float, float, float, *, store_float, (float), add_float,
- sub_float, mul_float);
-
-void aom_fft2x2_float_c(const float *input, float *temp, float *output) {
- aom_fft_2d_gen(input, temp, output, 2, aom_fft1d_2_float, simple_transpose,
- unpack_2d_output, 1);
-}
-
-void aom_fft4x4_float_c(const float *input, float *temp, float *output) {
- aom_fft_2d_gen(input, temp, output, 4, aom_fft1d_4_float, simple_transpose,
- unpack_2d_output, 1);
-}
-
-void aom_fft8x8_float_c(const float *input, float *temp, float *output) {
- aom_fft_2d_gen(input, temp, output, 8, aom_fft1d_8_float, simple_transpose,
- unpack_2d_output, 1);
-}
-
-void aom_fft16x16_float_c(const float *input, float *temp, float *output) {
- aom_fft_2d_gen(input, temp, output, 16, aom_fft1d_16_float, simple_transpose,
- unpack_2d_output, 1);
-}
-
-void aom_fft32x32_float_c(const float *input, float *temp, float *output) {
- aom_fft_2d_gen(input, temp, output, 32, aom_fft1d_32_float, simple_transpose,
- unpack_2d_output, 1);
-}
-
-void aom_ifft_2d_gen(const float *input, float *temp, float *output, int n,
- aom_fft_1d_func_t fft_single, aom_fft_1d_func_t fft_multi,
- aom_fft_1d_func_t ifft_multi,
- aom_fft_transpose_func_t transpose, int vec_size) {
- // Column 0 and n/2 have conjugate symmetry, so we can directly do the ifft
- // and get real outputs.
- for (int y = 0; y <= n / 2; ++y) {
- output[y * n] = input[2 * y * n];
- output[y * n + 1] = input[2 * (y * n + n / 2)];
- }
- for (int y = n / 2 + 1; y < n; ++y) {
- output[y * n] = input[2 * (y - n / 2) * n + 1];
- output[y * n + 1] = input[2 * ((y - n / 2) * n + n / 2) + 1];
- }
-
- for (int i = 0; i < 2; i += vec_size) {
- ifft_multi(output + i, temp + i, n);
- }
-
- // For the other columns, since we don't have a full ifft for complex inputs
- // we have to split them into the real and imaginary counterparts.
- // Pack the real component, then the imaginary components.
- for (int y = 0; y < n; ++y) {
- for (int x = 1; x < n / 2; ++x) {
- output[y * n + (x + 1)] = input[2 * (y * n + x)];
- }
- for (int x = 1; x < n / 2; ++x) {
- output[y * n + (x + n / 2)] = input[2 * (y * n + x) + 1];
- }
- }
- for (int y = 2; y < vec_size; y++) {
- fft_single(output + y, temp + y, n);
- }
- // This is the part that can be sped up with SIMD
- for (int y = AOMMAX(2, vec_size); y < n; y += vec_size) {
- fft_multi(output + y, temp + y, n);
- }
-
- // Put the 0 and n/2 th results in the correct place.
- for (int x = 0; x < n; ++x) {
- output[x] = temp[x * n];
- output[(n / 2) * n + x] = temp[x * n + 1];
- }
- // This rearranges and transposes.
- for (int y = 1; y < n / 2; ++y) {
- // Fill in the real columns
- for (int x = 0; x <= n / 2; ++x) {
- output[x + y * n] =
- temp[(y + 1) + x * n] +
- ((x > 0 && x < n / 2) ? temp[(y + n / 2) + (x + n / 2) * n] : 0);
- }
- for (int x = n / 2 + 1; x < n; ++x) {
- output[x + y * n] = temp[(y + 1) + (n - x) * n] -
- temp[(y + n / 2) + ((n - x) + n / 2) * n];
- }
- // Fill in the imag columns
- for (int x = 0; x <= n / 2; ++x) {
- output[x + (y + n / 2) * n] =
- temp[(y + n / 2) + x * n] -
- ((x > 0 && x < n / 2) ? temp[(y + 1) + (x + n / 2) * n] : 0);
- }
- for (int x = n / 2 + 1; x < n; ++x) {
- output[x + (y + n / 2) * n] = temp[(y + 1) + ((n - x) + n / 2) * n] +
- temp[(y + n / 2) + (n - x) * n];
- }
- }
- for (int y = 0; y < n; y += vec_size) {
- ifft_multi(output + y, temp + y, n);
- }
- transpose(temp, output, n);
-}
-
-GEN_IFFT_2(void, float, float, float, *, store_float);
-GEN_IFFT_4(void, float, float, float, *, store_float, (float), add_float,
- sub_float);
-GEN_IFFT_8(void, float, float, float, *, store_float, (float), add_float,
- sub_float, mul_float);
-GEN_IFFT_16(void, float, float, float, *, store_float, (float), add_float,
- sub_float, mul_float);
-GEN_IFFT_32(void, float, float, float, *, store_float, (float), add_float,
- sub_float, mul_float);
-
-void aom_ifft2x2_float_c(const float *input, float *temp, float *output) {
- aom_ifft_2d_gen(input, temp, output, 2, aom_fft1d_2_float, aom_fft1d_2_float,
- aom_ifft1d_2_float, simple_transpose, 1);
-}
-
-void aom_ifft4x4_float_c(const float *input, float *temp, float *output) {
- aom_ifft_2d_gen(input, temp, output, 4, aom_fft1d_4_float, aom_fft1d_4_float,
- aom_ifft1d_4_float, simple_transpose, 1);
-}
-
-void aom_ifft8x8_float_c(const float *input, float *temp, float *output) {
- aom_ifft_2d_gen(input, temp, output, 8, aom_fft1d_8_float, aom_fft1d_8_float,
- aom_ifft1d_8_float, simple_transpose, 1);
-}
-
-void aom_ifft16x16_float_c(const float *input, float *temp, float *output) {
- aom_ifft_2d_gen(input, temp, output, 16, aom_fft1d_16_float,
- aom_fft1d_16_float, aom_ifft1d_16_float, simple_transpose, 1);
-}
-
-void aom_ifft32x32_float_c(const float *input, float *temp, float *output) {
- aom_ifft_2d_gen(input, temp, output, 32, aom_fft1d_32_float,
- aom_fft1d_32_float, aom_ifft1d_32_float, simple_transpose, 1);
-}
diff --git a/third_party/aom/aom_dsp/fft_common.h b/third_party/aom/aom_dsp/fft_common.h
deleted file mode 100644
index 5137331ae..000000000
--- a/third_party/aom/aom_dsp/fft_common.h
+++ /dev/null
@@ -1,1050 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_FFT_COMMON_H_
-#define AOM_AOM_DSP_FFT_COMMON_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*!\brief A function pointer for computing 1d fft and ifft.
- *
- * The function will point to an implementation for a specific transform size,
- * and may perform the transforms using vectorized instructions.
- *
- * For a non-vectorized forward transforms of size n, the input and output
- * buffers will be size n. The output takes advantage of conjugate symmetry and
- * packs the results as: [r_0, r_1, ..., r_{n/2}, i_1, ..., i_{n/2-1}], where
- * (r_{j}, i_{j}) is the complex output for index j.
- *
- * An inverse transform will assume that the complex "input" is packed
- * similarly. Its output will be real.
- *
- * Non-vectorized transforms (e.g., on a single row) would use a stride = 1.
- *
- * Vectorized implementations are parallelized along the columns so that the fft
- * can be performed on multiple columns at a time. In such cases the data block
- * for input and output is typically square (n x n) and the stride will
- * correspond to the spacing between rows. At minimum, the input size must be
- * n x simd_vector_length.
- *
- * \param[in] input Input buffer. See above for size restrictions.
- * \param[out] output Output buffer. See above for size restrictions.
- * \param[in] stride The spacing in number of elements between rows
- * (or elements)
- */
-typedef void (*aom_fft_1d_func_t)(const float *input, float *output,
- int stride);
-
-// Declare some of the forward non-vectorized transforms which are used in some
-// of the vectorized implementations
-void aom_fft1d_4_float(const float *input, float *output, int stride);
-void aom_fft1d_8_float(const float *input, float *output, int stride);
-void aom_fft1d_16_float(const float *input, float *output, int stride);
-void aom_fft1d_32_float(const float *input, float *output, int stride);
-
-/**\!brief Function pointer for transposing a matrix of floats.
- *
- * \param[in] input Input buffer (size n x n)
- * \param[out] output Output buffer (size n x n)
- * \param[in] n Extent of one dimension of the square matrix.
- */
-typedef void (*aom_fft_transpose_func_t)(const float *input, float *output,
- int n);
-
-/**\!brief Function pointer for re-arranging intermediate 2d transform results.
- *
- * After re-arrangement, the real and imaginary components will be packed
- * tightly next to each other.
- *
- * \param[in] input Input buffer (size n x n)
- * \param[out] output Output buffer (size 2 x n x n)
- * \param[in] n Extent of one dimension of the square matrix.
- */
-typedef void (*aom_fft_unpack_func_t)(const float *input, float *output, int n);
-
-/*!\brief Performs a 2d fft with the given functions.
- *
- * This generator function allows for multiple different implementations of 2d
- * fft with different vector operations, without having to redefine the main
- * body multiple times.
- *
- * \param[in] input Input buffer to run the transform on (size n x n)
- * \param[out] temp Working buffer for computing the transform (size n x n)
- * \param[out] output Output buffer (size 2 x n x n)
- * \param[in] tform Forward transform function
- * \param[in] transpose Transpose function (for n x n matrix)
- * \param[in] unpack Unpack function used to massage outputs to correct form
- * \param[in] vec_size Vector size (the transform is done vec_size units at
- * a time)
- */
-void aom_fft_2d_gen(const float *input, float *temp, float *output, int n,
- aom_fft_1d_func_t tform, aom_fft_transpose_func_t transpose,
- aom_fft_unpack_func_t unpack, int vec_size);
-
-/*!\brief Perform a 2d inverse fft with the given helper functions
- *
- * \param[in] input Input buffer to run the transform on (size 2 x n x n)
- * \param[out] temp Working buffer for computations (size 2 x n x n)
- * \param[out] output Output buffer (size n x n)
- * \param[in] fft_single Forward transform function (non vectorized)
- * \param[in] fft_multi Forward transform function (vectorized)
- * \param[in] ifft_multi Inverse transform function (vectorized)
- * \param[in] transpose Transpose function (for n x n matrix)
- * \param[in] vec_size Vector size (the transform is done vec_size
- * units at a time)
- */
-void aom_ifft_2d_gen(const float *input, float *temp, float *output, int n,
- aom_fft_1d_func_t fft_single, aom_fft_1d_func_t fft_multi,
- aom_fft_1d_func_t ifft_multi,
- aom_fft_transpose_func_t transpose, int vec_size);
-#ifdef __cplusplus
-}
-#endif
-
-// The macros below define 1D fft/ifft for different data types and for
-// different simd vector intrinsic types.
-
-#define GEN_FFT_2(ret, suffix, T, T_VEC, load, store) \
- ret aom_fft1d_2_##suffix(const T *input, T *output, int stride) { \
- const T_VEC i0 = load(input + 0 * stride); \
- const T_VEC i1 = load(input + 1 * stride); \
- store(output + 0 * stride, i0 + i1); \
- store(output + 1 * stride, i0 - i1); \
- }
-
-#define GEN_FFT_4(ret, suffix, T, T_VEC, load, store, constant, add, sub) \
- ret aom_fft1d_4_##suffix(const T *input, T *output, int stride) { \
- const T_VEC kWeight0 = constant(0.0f); \
- const T_VEC i0 = load(input + 0 * stride); \
- const T_VEC i1 = load(input + 1 * stride); \
- const T_VEC i2 = load(input + 2 * stride); \
- const T_VEC i3 = load(input + 3 * stride); \
- const T_VEC w0 = add(i0, i2); \
- const T_VEC w1 = sub(i0, i2); \
- const T_VEC w2 = add(i1, i3); \
- const T_VEC w3 = sub(i1, i3); \
- store(output + 0 * stride, add(w0, w2)); \
- store(output + 1 * stride, w1); \
- store(output + 2 * stride, sub(w0, w2)); \
- store(output + 3 * stride, sub(kWeight0, w3)); \
- }
-
-#define GEN_FFT_8(ret, suffix, T, T_VEC, load, store, constant, add, sub, mul) \
- ret aom_fft1d_8_##suffix(const T *input, T *output, int stride) { \
- const T_VEC kWeight0 = constant(0.0f); \
- const T_VEC kWeight2 = constant(0.707107f); \
- const T_VEC i0 = load(input + 0 * stride); \
- const T_VEC i1 = load(input + 1 * stride); \
- const T_VEC i2 = load(input + 2 * stride); \
- const T_VEC i3 = load(input + 3 * stride); \
- const T_VEC i4 = load(input + 4 * stride); \
- const T_VEC i5 = load(input + 5 * stride); \
- const T_VEC i6 = load(input + 6 * stride); \
- const T_VEC i7 = load(input + 7 * stride); \
- const T_VEC w0 = add(i0, i4); \
- const T_VEC w1 = sub(i0, i4); \
- const T_VEC w2 = add(i2, i6); \
- const T_VEC w3 = sub(i2, i6); \
- const T_VEC w4 = add(w0, w2); \
- const T_VEC w5 = sub(w0, w2); \
- const T_VEC w7 = add(i1, i5); \
- const T_VEC w8 = sub(i1, i5); \
- const T_VEC w9 = add(i3, i7); \
- const T_VEC w10 = sub(i3, i7); \
- const T_VEC w11 = add(w7, w9); \
- const T_VEC w12 = sub(w7, w9); \
- store(output + 0 * stride, add(w4, w11)); \
- store(output + 1 * stride, add(w1, mul(kWeight2, sub(w8, w10)))); \
- store(output + 2 * stride, w5); \
- store(output + 3 * stride, sub(w1, mul(kWeight2, sub(w8, w10)))); \
- store(output + 4 * stride, sub(w4, w11)); \
- store(output + 5 * stride, \
- sub(sub(kWeight0, w3), mul(kWeight2, add(w10, w8)))); \
- store(output + 6 * stride, sub(kWeight0, w12)); \
- store(output + 7 * stride, sub(w3, mul(kWeight2, add(w10, w8)))); \
- }
-
-#define GEN_FFT_16(ret, suffix, T, T_VEC, load, store, constant, add, sub, \
- mul) \
- ret aom_fft1d_16_##suffix(const T *input, T *output, int stride) { \
- const T_VEC kWeight0 = constant(0.0f); \
- const T_VEC kWeight2 = constant(0.707107f); \
- const T_VEC kWeight3 = constant(0.92388f); \
- const T_VEC kWeight4 = constant(0.382683f); \
- const T_VEC i0 = load(input + 0 * stride); \
- const T_VEC i1 = load(input + 1 * stride); \
- const T_VEC i2 = load(input + 2 * stride); \
- const T_VEC i3 = load(input + 3 * stride); \
- const T_VEC i4 = load(input + 4 * stride); \
- const T_VEC i5 = load(input + 5 * stride); \
- const T_VEC i6 = load(input + 6 * stride); \
- const T_VEC i7 = load(input + 7 * stride); \
- const T_VEC i8 = load(input + 8 * stride); \
- const T_VEC i9 = load(input + 9 * stride); \
- const T_VEC i10 = load(input + 10 * stride); \
- const T_VEC i11 = load(input + 11 * stride); \
- const T_VEC i12 = load(input + 12 * stride); \
- const T_VEC i13 = load(input + 13 * stride); \
- const T_VEC i14 = load(input + 14 * stride); \
- const T_VEC i15 = load(input + 15 * stride); \
- const T_VEC w0 = add(i0, i8); \
- const T_VEC w1 = sub(i0, i8); \
- const T_VEC w2 = add(i4, i12); \
- const T_VEC w3 = sub(i4, i12); \
- const T_VEC w4 = add(w0, w2); \
- const T_VEC w5 = sub(w0, w2); \
- const T_VEC w7 = add(i2, i10); \
- const T_VEC w8 = sub(i2, i10); \
- const T_VEC w9 = add(i6, i14); \
- const T_VEC w10 = sub(i6, i14); \
- const T_VEC w11 = add(w7, w9); \
- const T_VEC w12 = sub(w7, w9); \
- const T_VEC w14 = add(w4, w11); \
- const T_VEC w15 = sub(w4, w11); \
- const T_VEC w16[2] = { add(w1, mul(kWeight2, sub(w8, w10))), \
- sub(sub(kWeight0, w3), \
- mul(kWeight2, add(w10, w8))) }; \
- const T_VEC w18[2] = { sub(w1, mul(kWeight2, sub(w8, w10))), \
- sub(w3, mul(kWeight2, add(w10, w8))) }; \
- const T_VEC w19 = add(i1, i9); \
- const T_VEC w20 = sub(i1, i9); \
- const T_VEC w21 = add(i5, i13); \
- const T_VEC w22 = sub(i5, i13); \
- const T_VEC w23 = add(w19, w21); \
- const T_VEC w24 = sub(w19, w21); \
- const T_VEC w26 = add(i3, i11); \
- const T_VEC w27 = sub(i3, i11); \
- const T_VEC w28 = add(i7, i15); \
- const T_VEC w29 = sub(i7, i15); \
- const T_VEC w30 = add(w26, w28); \
- const T_VEC w31 = sub(w26, w28); \
- const T_VEC w33 = add(w23, w30); \
- const T_VEC w34 = sub(w23, w30); \
- const T_VEC w35[2] = { add(w20, mul(kWeight2, sub(w27, w29))), \
- sub(sub(kWeight0, w22), \
- mul(kWeight2, add(w29, w27))) }; \
- const T_VEC w37[2] = { sub(w20, mul(kWeight2, sub(w27, w29))), \
- sub(w22, mul(kWeight2, add(w29, w27))) }; \
- store(output + 0 * stride, add(w14, w33)); \
- store(output + 1 * stride, \
- add(w16[0], add(mul(kWeight3, w35[0]), mul(kWeight4, w35[1])))); \
- store(output + 2 * stride, add(w5, mul(kWeight2, sub(w24, w31)))); \
- store(output + 3 * stride, \
- add(w18[0], add(mul(kWeight4, w37[0]), mul(kWeight3, w37[1])))); \
- store(output + 4 * stride, w15); \
- store(output + 5 * stride, \
- add(w18[0], sub(sub(kWeight0, mul(kWeight4, w37[0])), \
- mul(kWeight3, w37[1])))); \
- store(output + 6 * stride, sub(w5, mul(kWeight2, sub(w24, w31)))); \
- store(output + 7 * stride, \
- add(w16[0], sub(sub(kWeight0, mul(kWeight3, w35[0])), \
- mul(kWeight4, w35[1])))); \
- store(output + 8 * stride, sub(w14, w33)); \
- store(output + 9 * stride, \
- add(w16[1], sub(mul(kWeight3, w35[1]), mul(kWeight4, w35[0])))); \
- store(output + 10 * stride, \
- sub(sub(kWeight0, w12), mul(kWeight2, add(w31, w24)))); \
- store(output + 11 * stride, \
- add(w18[1], sub(mul(kWeight4, w37[1]), mul(kWeight3, w37[0])))); \
- store(output + 12 * stride, sub(kWeight0, w34)); \
- store(output + 13 * stride, \
- sub(sub(kWeight0, w18[1]), \
- sub(mul(kWeight3, w37[0]), mul(kWeight4, w37[1])))); \
- store(output + 14 * stride, sub(w12, mul(kWeight2, add(w31, w24)))); \
- store(output + 15 * stride, \
- sub(sub(kWeight0, w16[1]), \
- sub(mul(kWeight4, w35[0]), mul(kWeight3, w35[1])))); \
- }
-
-#define GEN_FFT_32(ret, suffix, T, T_VEC, load, store, constant, add, sub, \
- mul) \
- ret aom_fft1d_32_##suffix(const T *input, T *output, int stride) { \
- const T_VEC kWeight0 = constant(0.0f); \
- const T_VEC kWeight2 = constant(0.707107f); \
- const T_VEC kWeight3 = constant(0.92388f); \
- const T_VEC kWeight4 = constant(0.382683f); \
- const T_VEC kWeight5 = constant(0.980785f); \
- const T_VEC kWeight6 = constant(0.19509f); \
- const T_VEC kWeight7 = constant(0.83147f); \
- const T_VEC kWeight8 = constant(0.55557f); \
- const T_VEC i0 = load(input + 0 * stride); \
- const T_VEC i1 = load(input + 1 * stride); \
- const T_VEC i2 = load(input + 2 * stride); \
- const T_VEC i3 = load(input + 3 * stride); \
- const T_VEC i4 = load(input + 4 * stride); \
- const T_VEC i5 = load(input + 5 * stride); \
- const T_VEC i6 = load(input + 6 * stride); \
- const T_VEC i7 = load(input + 7 * stride); \
- const T_VEC i8 = load(input + 8 * stride); \
- const T_VEC i9 = load(input + 9 * stride); \
- const T_VEC i10 = load(input + 10 * stride); \
- const T_VEC i11 = load(input + 11 * stride); \
- const T_VEC i12 = load(input + 12 * stride); \
- const T_VEC i13 = load(input + 13 * stride); \
- const T_VEC i14 = load(input + 14 * stride); \
- const T_VEC i15 = load(input + 15 * stride); \
- const T_VEC i16 = load(input + 16 * stride); \
- const T_VEC i17 = load(input + 17 * stride); \
- const T_VEC i18 = load(input + 18 * stride); \
- const T_VEC i19 = load(input + 19 * stride); \
- const T_VEC i20 = load(input + 20 * stride); \
- const T_VEC i21 = load(input + 21 * stride); \
- const T_VEC i22 = load(input + 22 * stride); \
- const T_VEC i23 = load(input + 23 * stride); \
- const T_VEC i24 = load(input + 24 * stride); \
- const T_VEC i25 = load(input + 25 * stride); \
- const T_VEC i26 = load(input + 26 * stride); \
- const T_VEC i27 = load(input + 27 * stride); \
- const T_VEC i28 = load(input + 28 * stride); \
- const T_VEC i29 = load(input + 29 * stride); \
- const T_VEC i30 = load(input + 30 * stride); \
- const T_VEC i31 = load(input + 31 * stride); \
- const T_VEC w0 = add(i0, i16); \
- const T_VEC w1 = sub(i0, i16); \
- const T_VEC w2 = add(i8, i24); \
- const T_VEC w3 = sub(i8, i24); \
- const T_VEC w4 = add(w0, w2); \
- const T_VEC w5 = sub(w0, w2); \
- const T_VEC w7 = add(i4, i20); \
- const T_VEC w8 = sub(i4, i20); \
- const T_VEC w9 = add(i12, i28); \
- const T_VEC w10 = sub(i12, i28); \
- const T_VEC w11 = add(w7, w9); \
- const T_VEC w12 = sub(w7, w9); \
- const T_VEC w14 = add(w4, w11); \
- const T_VEC w15 = sub(w4, w11); \
- const T_VEC w16[2] = { add(w1, mul(kWeight2, sub(w8, w10))), \
- sub(sub(kWeight0, w3), \
- mul(kWeight2, add(w10, w8))) }; \
- const T_VEC w18[2] = { sub(w1, mul(kWeight2, sub(w8, w10))), \
- sub(w3, mul(kWeight2, add(w10, w8))) }; \
- const T_VEC w19 = add(i2, i18); \
- const T_VEC w20 = sub(i2, i18); \
- const T_VEC w21 = add(i10, i26); \
- const T_VEC w22 = sub(i10, i26); \
- const T_VEC w23 = add(w19, w21); \
- const T_VEC w24 = sub(w19, w21); \
- const T_VEC w26 = add(i6, i22); \
- const T_VEC w27 = sub(i6, i22); \
- const T_VEC w28 = add(i14, i30); \
- const T_VEC w29 = sub(i14, i30); \
- const T_VEC w30 = add(w26, w28); \
- const T_VEC w31 = sub(w26, w28); \
- const T_VEC w33 = add(w23, w30); \
- const T_VEC w34 = sub(w23, w30); \
- const T_VEC w35[2] = { add(w20, mul(kWeight2, sub(w27, w29))), \
- sub(sub(kWeight0, w22), \
- mul(kWeight2, add(w29, w27))) }; \
- const T_VEC w37[2] = { sub(w20, mul(kWeight2, sub(w27, w29))), \
- sub(w22, mul(kWeight2, add(w29, w27))) }; \
- const T_VEC w38 = add(w14, w33); \
- const T_VEC w39 = sub(w14, w33); \
- const T_VEC w40[2] = { \
- add(w16[0], add(mul(kWeight3, w35[0]), mul(kWeight4, w35[1]))), \
- add(w16[1], sub(mul(kWeight3, w35[1]), mul(kWeight4, w35[0]))) \
- }; \
- const T_VEC w41[2] = { add(w5, mul(kWeight2, sub(w24, w31))), \
- sub(sub(kWeight0, w12), \
- mul(kWeight2, add(w31, w24))) }; \
- const T_VEC w42[2] = { \
- add(w18[0], add(mul(kWeight4, w37[0]), mul(kWeight3, w37[1]))), \
- add(w18[1], sub(mul(kWeight4, w37[1]), mul(kWeight3, w37[0]))) \
- }; \
- const T_VEC w44[2] = { \
- add(w18[0], \
- sub(sub(kWeight0, mul(kWeight4, w37[0])), mul(kWeight3, w37[1]))), \
- sub(sub(kWeight0, w18[1]), \
- sub(mul(kWeight3, w37[0]), mul(kWeight4, w37[1]))) \
- }; \
- const T_VEC w45[2] = { sub(w5, mul(kWeight2, sub(w24, w31))), \
- sub(w12, mul(kWeight2, add(w31, w24))) }; \
- const T_VEC w46[2] = { \
- add(w16[0], \
- sub(sub(kWeight0, mul(kWeight3, w35[0])), mul(kWeight4, w35[1]))), \
- sub(sub(kWeight0, w16[1]), \
- sub(mul(kWeight4, w35[0]), mul(kWeight3, w35[1]))) \
- }; \
- const T_VEC w47 = add(i1, i17); \
- const T_VEC w48 = sub(i1, i17); \
- const T_VEC w49 = add(i9, i25); \
- const T_VEC w50 = sub(i9, i25); \
- const T_VEC w51 = add(w47, w49); \
- const T_VEC w52 = sub(w47, w49); \
- const T_VEC w54 = add(i5, i21); \
- const T_VEC w55 = sub(i5, i21); \
- const T_VEC w56 = add(i13, i29); \
- const T_VEC w57 = sub(i13, i29); \
- const T_VEC w58 = add(w54, w56); \
- const T_VEC w59 = sub(w54, w56); \
- const T_VEC w61 = add(w51, w58); \
- const T_VEC w62 = sub(w51, w58); \
- const T_VEC w63[2] = { add(w48, mul(kWeight2, sub(w55, w57))), \
- sub(sub(kWeight0, w50), \
- mul(kWeight2, add(w57, w55))) }; \
- const T_VEC w65[2] = { sub(w48, mul(kWeight2, sub(w55, w57))), \
- sub(w50, mul(kWeight2, add(w57, w55))) }; \
- const T_VEC w66 = add(i3, i19); \
- const T_VEC w67 = sub(i3, i19); \
- const T_VEC w68 = add(i11, i27); \
- const T_VEC w69 = sub(i11, i27); \
- const T_VEC w70 = add(w66, w68); \
- const T_VEC w71 = sub(w66, w68); \
- const T_VEC w73 = add(i7, i23); \
- const T_VEC w74 = sub(i7, i23); \
- const T_VEC w75 = add(i15, i31); \
- const T_VEC w76 = sub(i15, i31); \
- const T_VEC w77 = add(w73, w75); \
- const T_VEC w78 = sub(w73, w75); \
- const T_VEC w80 = add(w70, w77); \
- const T_VEC w81 = sub(w70, w77); \
- const T_VEC w82[2] = { add(w67, mul(kWeight2, sub(w74, w76))), \
- sub(sub(kWeight0, w69), \
- mul(kWeight2, add(w76, w74))) }; \
- const T_VEC w84[2] = { sub(w67, mul(kWeight2, sub(w74, w76))), \
- sub(w69, mul(kWeight2, add(w76, w74))) }; \
- const T_VEC w85 = add(w61, w80); \
- const T_VEC w86 = sub(w61, w80); \
- const T_VEC w87[2] = { \
- add(w63[0], add(mul(kWeight3, w82[0]), mul(kWeight4, w82[1]))), \
- add(w63[1], sub(mul(kWeight3, w82[1]), mul(kWeight4, w82[0]))) \
- }; \
- const T_VEC w88[2] = { add(w52, mul(kWeight2, sub(w71, w78))), \
- sub(sub(kWeight0, w59), \
- mul(kWeight2, add(w78, w71))) }; \
- const T_VEC w89[2] = { \
- add(w65[0], add(mul(kWeight4, w84[0]), mul(kWeight3, w84[1]))), \
- add(w65[1], sub(mul(kWeight4, w84[1]), mul(kWeight3, w84[0]))) \
- }; \
- const T_VEC w91[2] = { \
- add(w65[0], \
- sub(sub(kWeight0, mul(kWeight4, w84[0])), mul(kWeight3, w84[1]))), \
- sub(sub(kWeight0, w65[1]), \
- sub(mul(kWeight3, w84[0]), mul(kWeight4, w84[1]))) \
- }; \
- const T_VEC w92[2] = { sub(w52, mul(kWeight2, sub(w71, w78))), \
- sub(w59, mul(kWeight2, add(w78, w71))) }; \
- const T_VEC w93[2] = { \
- add(w63[0], \
- sub(sub(kWeight0, mul(kWeight3, w82[0])), mul(kWeight4, w82[1]))), \
- sub(sub(kWeight0, w63[1]), \
- sub(mul(kWeight4, w82[0]), mul(kWeight3, w82[1]))) \
- }; \
- store(output + 0 * stride, add(w38, w85)); \
- store(output + 1 * stride, \
- add(w40[0], add(mul(kWeight5, w87[0]), mul(kWeight6, w87[1])))); \
- store(output + 2 * stride, \
- add(w41[0], add(mul(kWeight3, w88[0]), mul(kWeight4, w88[1])))); \
- store(output + 3 * stride, \
- add(w42[0], add(mul(kWeight7, w89[0]), mul(kWeight8, w89[1])))); \
- store(output + 4 * stride, add(w15, mul(kWeight2, sub(w62, w81)))); \
- store(output + 5 * stride, \
- add(w44[0], add(mul(kWeight8, w91[0]), mul(kWeight7, w91[1])))); \
- store(output + 6 * stride, \
- add(w45[0], add(mul(kWeight4, w92[0]), mul(kWeight3, w92[1])))); \
- store(output + 7 * stride, \
- add(w46[0], add(mul(kWeight6, w93[0]), mul(kWeight5, w93[1])))); \
- store(output + 8 * stride, w39); \
- store(output + 9 * stride, \
- add(w46[0], sub(sub(kWeight0, mul(kWeight6, w93[0])), \
- mul(kWeight5, w93[1])))); \
- store(output + 10 * stride, \
- add(w45[0], sub(sub(kWeight0, mul(kWeight4, w92[0])), \
- mul(kWeight3, w92[1])))); \
- store(output + 11 * stride, \
- add(w44[0], sub(sub(kWeight0, mul(kWeight8, w91[0])), \
- mul(kWeight7, w91[1])))); \
- store(output + 12 * stride, sub(w15, mul(kWeight2, sub(w62, w81)))); \
- store(output + 13 * stride, \
- add(w42[0], sub(sub(kWeight0, mul(kWeight7, w89[0])), \
- mul(kWeight8, w89[1])))); \
- store(output + 14 * stride, \
- add(w41[0], sub(sub(kWeight0, mul(kWeight3, w88[0])), \
- mul(kWeight4, w88[1])))); \
- store(output + 15 * stride, \
- add(w40[0], sub(sub(kWeight0, mul(kWeight5, w87[0])), \
- mul(kWeight6, w87[1])))); \
- store(output + 16 * stride, sub(w38, w85)); \
- store(output + 17 * stride, \
- add(w40[1], sub(mul(kWeight5, w87[1]), mul(kWeight6, w87[0])))); \
- store(output + 18 * stride, \
- add(w41[1], sub(mul(kWeight3, w88[1]), mul(kWeight4, w88[0])))); \
- store(output + 19 * stride, \
- add(w42[1], sub(mul(kWeight7, w89[1]), mul(kWeight8, w89[0])))); \
- store(output + 20 * stride, \
- sub(sub(kWeight0, w34), mul(kWeight2, add(w81, w62)))); \
- store(output + 21 * stride, \
- add(w44[1], sub(mul(kWeight8, w91[1]), mul(kWeight7, w91[0])))); \
- store(output + 22 * stride, \
- add(w45[1], sub(mul(kWeight4, w92[1]), mul(kWeight3, w92[0])))); \
- store(output + 23 * stride, \
- add(w46[1], sub(mul(kWeight6, w93[1]), mul(kWeight5, w93[0])))); \
- store(output + 24 * stride, sub(kWeight0, w86)); \
- store(output + 25 * stride, \
- sub(sub(kWeight0, w46[1]), \
- sub(mul(kWeight5, w93[0]), mul(kWeight6, w93[1])))); \
- store(output + 26 * stride, \
- sub(sub(kWeight0, w45[1]), \
- sub(mul(kWeight3, w92[0]), mul(kWeight4, w92[1])))); \
- store(output + 27 * stride, \
- sub(sub(kWeight0, w44[1]), \
- sub(mul(kWeight7, w91[0]), mul(kWeight8, w91[1])))); \
- store(output + 28 * stride, sub(w34, mul(kWeight2, add(w81, w62)))); \
- store(output + 29 * stride, \
- sub(sub(kWeight0, w42[1]), \
- sub(mul(kWeight8, w89[0]), mul(kWeight7, w89[1])))); \
- store(output + 30 * stride, \
- sub(sub(kWeight0, w41[1]), \
- sub(mul(kWeight4, w88[0]), mul(kWeight3, w88[1])))); \
- store(output + 31 * stride, \
- sub(sub(kWeight0, w40[1]), \
- sub(mul(kWeight6, w87[0]), mul(kWeight5, w87[1])))); \
- }
-
-#define GEN_IFFT_2(ret, suffix, T, T_VEC, load, store) \
- ret aom_ifft1d_2_##suffix(const T *input, T *output, int stride) { \
- const T_VEC i0 = load(input + 0 * stride); \
- const T_VEC i1 = load(input + 1 * stride); \
- store(output + 0 * stride, i0 + i1); \
- store(output + 1 * stride, i0 - i1); \
- }
-
-#define GEN_IFFT_4(ret, suffix, T, T_VEC, load, store, constant, add, sub) \
- ret aom_ifft1d_4_##suffix(const T *input, T *output, int stride) { \
- const T_VEC kWeight0 = constant(0.0f); \
- const T_VEC i0 = load(input + 0 * stride); \
- const T_VEC i1 = load(input + 1 * stride); \
- const T_VEC i2 = load(input + 2 * stride); \
- const T_VEC i3 = load(input + 3 * stride); \
- const T_VEC w2 = add(i0, i2); \
- const T_VEC w3 = sub(i0, i2); \
- const T_VEC w4[2] = { add(i1, i1), sub(i3, i3) }; \
- const T_VEC w5[2] = { sub(i1, i1), sub(sub(kWeight0, i3), i3) }; \
- store(output + 0 * stride, add(w2, w4[0])); \
- store(output + 1 * stride, add(w3, w5[1])); \
- store(output + 2 * stride, sub(w2, w4[0])); \
- store(output + 3 * stride, sub(w3, w5[1])); \
- }
-
-#define GEN_IFFT_8(ret, suffix, T, T_VEC, load, store, constant, add, sub, \
- mul) \
- ret aom_ifft1d_8_##suffix(const T *input, T *output, int stride) { \
- const T_VEC kWeight0 = constant(0.0f); \
- const T_VEC kWeight2 = constant(0.707107f); \
- const T_VEC i0 = load(input + 0 * stride); \
- const T_VEC i1 = load(input + 1 * stride); \
- const T_VEC i2 = load(input + 2 * stride); \
- const T_VEC i3 = load(input + 3 * stride); \
- const T_VEC i4 = load(input + 4 * stride); \
- const T_VEC i5 = load(input + 5 * stride); \
- const T_VEC i6 = load(input + 6 * stride); \
- const T_VEC i7 = load(input + 7 * stride); \
- const T_VEC w6 = add(i0, i4); \
- const T_VEC w7 = sub(i0, i4); \
- const T_VEC w8[2] = { add(i2, i2), sub(i6, i6) }; \
- const T_VEC w9[2] = { sub(i2, i2), sub(sub(kWeight0, i6), i6) }; \
- const T_VEC w10[2] = { add(w6, w8[0]), w8[1] }; \
- const T_VEC w11[2] = { sub(w6, w8[0]), sub(kWeight0, w8[1]) }; \
- const T_VEC w12[2] = { add(w7, w9[1]), sub(kWeight0, w9[0]) }; \
- const T_VEC w13[2] = { sub(w7, w9[1]), w9[0] }; \
- const T_VEC w14[2] = { add(i1, i3), sub(i7, i5) }; \
- const T_VEC w15[2] = { sub(i1, i3), sub(sub(kWeight0, i5), i7) }; \
- const T_VEC w16[2] = { add(i3, i1), sub(i5, i7) }; \
- const T_VEC w17[2] = { sub(i3, i1), sub(sub(kWeight0, i7), i5) }; \
- const T_VEC w18[2] = { add(w14[0], w16[0]), add(w14[1], w16[1]) }; \
- const T_VEC w19[2] = { sub(w14[0], w16[0]), sub(w14[1], w16[1]) }; \
- const T_VEC w20[2] = { add(w15[0], w17[1]), sub(w15[1], w17[0]) }; \
- const T_VEC w21[2] = { sub(w15[0], w17[1]), add(w15[1], w17[0]) }; \
- store(output + 0 * stride, add(w10[0], w18[0])); \
- store(output + 1 * stride, \
- add(w12[0], mul(kWeight2, add(w20[0], w20[1])))); \
- store(output + 2 * stride, add(w11[0], w19[1])); \
- store(output + 3 * stride, \
- sub(w13[0], mul(kWeight2, sub(w21[0], w21[1])))); \
- store(output + 4 * stride, sub(w10[0], w18[0])); \
- store(output + 5 * stride, \
- add(w12[0], sub(sub(kWeight0, mul(kWeight2, w20[0])), \
- mul(kWeight2, w20[1])))); \
- store(output + 6 * stride, sub(w11[0], w19[1])); \
- store(output + 7 * stride, \
- add(w13[0], mul(kWeight2, sub(w21[0], w21[1])))); \
- }
-
-#define GEN_IFFT_16(ret, suffix, T, T_VEC, load, store, constant, add, sub, \
- mul) \
- ret aom_ifft1d_16_##suffix(const T *input, T *output, int stride) { \
- const T_VEC kWeight0 = constant(0.0f); \
- const T_VEC kWeight2 = constant(0.707107f); \
- const T_VEC kWeight3 = constant(0.92388f); \
- const T_VEC kWeight4 = constant(0.382683f); \
- const T_VEC i0 = load(input + 0 * stride); \
- const T_VEC i1 = load(input + 1 * stride); \
- const T_VEC i2 = load(input + 2 * stride); \
- const T_VEC i3 = load(input + 3 * stride); \
- const T_VEC i4 = load(input + 4 * stride); \
- const T_VEC i5 = load(input + 5 * stride); \
- const T_VEC i6 = load(input + 6 * stride); \
- const T_VEC i7 = load(input + 7 * stride); \
- const T_VEC i8 = load(input + 8 * stride); \
- const T_VEC i9 = load(input + 9 * stride); \
- const T_VEC i10 = load(input + 10 * stride); \
- const T_VEC i11 = load(input + 11 * stride); \
- const T_VEC i12 = load(input + 12 * stride); \
- const T_VEC i13 = load(input + 13 * stride); \
- const T_VEC i14 = load(input + 14 * stride); \
- const T_VEC i15 = load(input + 15 * stride); \
- const T_VEC w14 = add(i0, i8); \
- const T_VEC w15 = sub(i0, i8); \
- const T_VEC w16[2] = { add(i4, i4), sub(i12, i12) }; \
- const T_VEC w17[2] = { sub(i4, i4), sub(sub(kWeight0, i12), i12) }; \
- const T_VEC w18[2] = { add(w14, w16[0]), w16[1] }; \
- const T_VEC w19[2] = { sub(w14, w16[0]), sub(kWeight0, w16[1]) }; \
- const T_VEC w20[2] = { add(w15, w17[1]), sub(kWeight0, w17[0]) }; \
- const T_VEC w21[2] = { sub(w15, w17[1]), w17[0] }; \
- const T_VEC w22[2] = { add(i2, i6), sub(i14, i10) }; \
- const T_VEC w23[2] = { sub(i2, i6), sub(sub(kWeight0, i10), i14) }; \
- const T_VEC w24[2] = { add(i6, i2), sub(i10, i14) }; \
- const T_VEC w25[2] = { sub(i6, i2), sub(sub(kWeight0, i14), i10) }; \
- const T_VEC w26[2] = { add(w22[0], w24[0]), add(w22[1], w24[1]) }; \
- const T_VEC w27[2] = { sub(w22[0], w24[0]), sub(w22[1], w24[1]) }; \
- const T_VEC w28[2] = { add(w23[0], w25[1]), sub(w23[1], w25[0]) }; \
- const T_VEC w29[2] = { sub(w23[0], w25[1]), add(w23[1], w25[0]) }; \
- const T_VEC w30[2] = { add(w18[0], w26[0]), add(w18[1], w26[1]) }; \
- const T_VEC w31[2] = { sub(w18[0], w26[0]), sub(w18[1], w26[1]) }; \
- const T_VEC w32[2] = { add(w20[0], mul(kWeight2, add(w28[0], w28[1]))), \
- add(w20[1], mul(kWeight2, sub(w28[1], w28[0]))) }; \
- const T_VEC w33[2] = { add(w20[0], \
- sub(sub(kWeight0, mul(kWeight2, w28[0])), \
- mul(kWeight2, w28[1]))), \
- add(w20[1], mul(kWeight2, sub(w28[0], w28[1]))) }; \
- const T_VEC w34[2] = { add(w19[0], w27[1]), sub(w19[1], w27[0]) }; \
- const T_VEC w35[2] = { sub(w19[0], w27[1]), add(w19[1], w27[0]) }; \
- const T_VEC w36[2] = { sub(w21[0], mul(kWeight2, sub(w29[0], w29[1]))), \
- sub(w21[1], mul(kWeight2, add(w29[1], w29[0]))) }; \
- const T_VEC w37[2] = { add(w21[0], mul(kWeight2, sub(w29[0], w29[1]))), \
- add(w21[1], mul(kWeight2, add(w29[1], w29[0]))) }; \
- const T_VEC w38[2] = { add(i1, i7), sub(i15, i9) }; \
- const T_VEC w39[2] = { sub(i1, i7), sub(sub(kWeight0, i9), i15) }; \
- const T_VEC w40[2] = { add(i5, i3), sub(i11, i13) }; \
- const T_VEC w41[2] = { sub(i5, i3), sub(sub(kWeight0, i13), i11) }; \
- const T_VEC w42[2] = { add(w38[0], w40[0]), add(w38[1], w40[1]) }; \
- const T_VEC w43[2] = { sub(w38[0], w40[0]), sub(w38[1], w40[1]) }; \
- const T_VEC w44[2] = { add(w39[0], w41[1]), sub(w39[1], w41[0]) }; \
- const T_VEC w45[2] = { sub(w39[0], w41[1]), add(w39[1], w41[0]) }; \
- const T_VEC w46[2] = { add(i3, i5), sub(i13, i11) }; \
- const T_VEC w47[2] = { sub(i3, i5), sub(sub(kWeight0, i11), i13) }; \
- const T_VEC w48[2] = { add(i7, i1), sub(i9, i15) }; \
- const T_VEC w49[2] = { sub(i7, i1), sub(sub(kWeight0, i15), i9) }; \
- const T_VEC w50[2] = { add(w46[0], w48[0]), add(w46[1], w48[1]) }; \
- const T_VEC w51[2] = { sub(w46[0], w48[0]), sub(w46[1], w48[1]) }; \
- const T_VEC w52[2] = { add(w47[0], w49[1]), sub(w47[1], w49[0]) }; \
- const T_VEC w53[2] = { sub(w47[0], w49[1]), add(w47[1], w49[0]) }; \
- const T_VEC w54[2] = { add(w42[0], w50[0]), add(w42[1], w50[1]) }; \
- const T_VEC w55[2] = { sub(w42[0], w50[0]), sub(w42[1], w50[1]) }; \
- const T_VEC w56[2] = { add(w44[0], mul(kWeight2, add(w52[0], w52[1]))), \
- add(w44[1], mul(kWeight2, sub(w52[1], w52[0]))) }; \
- const T_VEC w57[2] = { add(w44[0], \
- sub(sub(kWeight0, mul(kWeight2, w52[0])), \
- mul(kWeight2, w52[1]))), \
- add(w44[1], mul(kWeight2, sub(w52[0], w52[1]))) }; \
- const T_VEC w58[2] = { add(w43[0], w51[1]), sub(w43[1], w51[0]) }; \
- const T_VEC w59[2] = { sub(w43[0], w51[1]), add(w43[1], w51[0]) }; \
- const T_VEC w60[2] = { sub(w45[0], mul(kWeight2, sub(w53[0], w53[1]))), \
- sub(w45[1], mul(kWeight2, add(w53[1], w53[0]))) }; \
- const T_VEC w61[2] = { add(w45[0], mul(kWeight2, sub(w53[0], w53[1]))), \
- add(w45[1], mul(kWeight2, add(w53[1], w53[0]))) }; \
- store(output + 0 * stride, add(w30[0], w54[0])); \
- store(output + 1 * stride, \
- add(w32[0], add(mul(kWeight3, w56[0]), mul(kWeight4, w56[1])))); \
- store(output + 2 * stride, \
- add(w34[0], mul(kWeight2, add(w58[0], w58[1])))); \
- store(output + 3 * stride, \
- add(w36[0], add(mul(kWeight4, w60[0]), mul(kWeight3, w60[1])))); \
- store(output + 4 * stride, add(w31[0], w55[1])); \
- store(output + 5 * stride, \
- sub(w33[0], sub(mul(kWeight4, w57[0]), mul(kWeight3, w57[1])))); \
- store(output + 6 * stride, \
- sub(w35[0], mul(kWeight2, sub(w59[0], w59[1])))); \
- store(output + 7 * stride, \
- sub(w37[0], sub(mul(kWeight3, w61[0]), mul(kWeight4, w61[1])))); \
- store(output + 8 * stride, sub(w30[0], w54[0])); \
- store(output + 9 * stride, \
- add(w32[0], sub(sub(kWeight0, mul(kWeight3, w56[0])), \
- mul(kWeight4, w56[1])))); \
- store(output + 10 * stride, \
- add(w34[0], sub(sub(kWeight0, mul(kWeight2, w58[0])), \
- mul(kWeight2, w58[1])))); \
- store(output + 11 * stride, \
- add(w36[0], sub(sub(kWeight0, mul(kWeight4, w60[0])), \
- mul(kWeight3, w60[1])))); \
- store(output + 12 * stride, sub(w31[0], w55[1])); \
- store(output + 13 * stride, \
- add(w33[0], sub(mul(kWeight4, w57[0]), mul(kWeight3, w57[1])))); \
- store(output + 14 * stride, \
- add(w35[0], mul(kWeight2, sub(w59[0], w59[1])))); \
- store(output + 15 * stride, \
- add(w37[0], sub(mul(kWeight3, w61[0]), mul(kWeight4, w61[1])))); \
- }
-#define GEN_IFFT_32(ret, suffix, T, T_VEC, load, store, constant, add, sub, \
- mul) \
- ret aom_ifft1d_32_##suffix(const T *input, T *output, int stride) { \
- const T_VEC kWeight0 = constant(0.0f); \
- const T_VEC kWeight2 = constant(0.707107f); \
- const T_VEC kWeight3 = constant(0.92388f); \
- const T_VEC kWeight4 = constant(0.382683f); \
- const T_VEC kWeight5 = constant(0.980785f); \
- const T_VEC kWeight6 = constant(0.19509f); \
- const T_VEC kWeight7 = constant(0.83147f); \
- const T_VEC kWeight8 = constant(0.55557f); \
- const T_VEC i0 = load(input + 0 * stride); \
- const T_VEC i1 = load(input + 1 * stride); \
- const T_VEC i2 = load(input + 2 * stride); \
- const T_VEC i3 = load(input + 3 * stride); \
- const T_VEC i4 = load(input + 4 * stride); \
- const T_VEC i5 = load(input + 5 * stride); \
- const T_VEC i6 = load(input + 6 * stride); \
- const T_VEC i7 = load(input + 7 * stride); \
- const T_VEC i8 = load(input + 8 * stride); \
- const T_VEC i9 = load(input + 9 * stride); \
- const T_VEC i10 = load(input + 10 * stride); \
- const T_VEC i11 = load(input + 11 * stride); \
- const T_VEC i12 = load(input + 12 * stride); \
- const T_VEC i13 = load(input + 13 * stride); \
- const T_VEC i14 = load(input + 14 * stride); \
- const T_VEC i15 = load(input + 15 * stride); \
- const T_VEC i16 = load(input + 16 * stride); \
- const T_VEC i17 = load(input + 17 * stride); \
- const T_VEC i18 = load(input + 18 * stride); \
- const T_VEC i19 = load(input + 19 * stride); \
- const T_VEC i20 = load(input + 20 * stride); \
- const T_VEC i21 = load(input + 21 * stride); \
- const T_VEC i22 = load(input + 22 * stride); \
- const T_VEC i23 = load(input + 23 * stride); \
- const T_VEC i24 = load(input + 24 * stride); \
- const T_VEC i25 = load(input + 25 * stride); \
- const T_VEC i26 = load(input + 26 * stride); \
- const T_VEC i27 = load(input + 27 * stride); \
- const T_VEC i28 = load(input + 28 * stride); \
- const T_VEC i29 = load(input + 29 * stride); \
- const T_VEC i30 = load(input + 30 * stride); \
- const T_VEC i31 = load(input + 31 * stride); \
- const T_VEC w30 = add(i0, i16); \
- const T_VEC w31 = sub(i0, i16); \
- const T_VEC w32[2] = { add(i8, i8), sub(i24, i24) }; \
- const T_VEC w33[2] = { sub(i8, i8), sub(sub(kWeight0, i24), i24) }; \
- const T_VEC w34[2] = { add(w30, w32[0]), w32[1] }; \
- const T_VEC w35[2] = { sub(w30, w32[0]), sub(kWeight0, w32[1]) }; \
- const T_VEC w36[2] = { add(w31, w33[1]), sub(kWeight0, w33[0]) }; \
- const T_VEC w37[2] = { sub(w31, w33[1]), w33[0] }; \
- const T_VEC w38[2] = { add(i4, i12), sub(i28, i20) }; \
- const T_VEC w39[2] = { sub(i4, i12), sub(sub(kWeight0, i20), i28) }; \
- const T_VEC w40[2] = { add(i12, i4), sub(i20, i28) }; \
- const T_VEC w41[2] = { sub(i12, i4), sub(sub(kWeight0, i28), i20) }; \
- const T_VEC w42[2] = { add(w38[0], w40[0]), add(w38[1], w40[1]) }; \
- const T_VEC w43[2] = { sub(w38[0], w40[0]), sub(w38[1], w40[1]) }; \
- const T_VEC w44[2] = { add(w39[0], w41[1]), sub(w39[1], w41[0]) }; \
- const T_VEC w45[2] = { sub(w39[0], w41[1]), add(w39[1], w41[0]) }; \
- const T_VEC w46[2] = { add(w34[0], w42[0]), add(w34[1], w42[1]) }; \
- const T_VEC w47[2] = { sub(w34[0], w42[0]), sub(w34[1], w42[1]) }; \
- const T_VEC w48[2] = { add(w36[0], mul(kWeight2, add(w44[0], w44[1]))), \
- add(w36[1], mul(kWeight2, sub(w44[1], w44[0]))) }; \
- const T_VEC w49[2] = { add(w36[0], \
- sub(sub(kWeight0, mul(kWeight2, w44[0])), \
- mul(kWeight2, w44[1]))), \
- add(w36[1], mul(kWeight2, sub(w44[0], w44[1]))) }; \
- const T_VEC w50[2] = { add(w35[0], w43[1]), sub(w35[1], w43[0]) }; \
- const T_VEC w51[2] = { sub(w35[0], w43[1]), add(w35[1], w43[0]) }; \
- const T_VEC w52[2] = { sub(w37[0], mul(kWeight2, sub(w45[0], w45[1]))), \
- sub(w37[1], mul(kWeight2, add(w45[1], w45[0]))) }; \
- const T_VEC w53[2] = { add(w37[0], mul(kWeight2, sub(w45[0], w45[1]))), \
- add(w37[1], mul(kWeight2, add(w45[1], w45[0]))) }; \
- const T_VEC w54[2] = { add(i2, i14), sub(i30, i18) }; \
- const T_VEC w55[2] = { sub(i2, i14), sub(sub(kWeight0, i18), i30) }; \
- const T_VEC w56[2] = { add(i10, i6), sub(i22, i26) }; \
- const T_VEC w57[2] = { sub(i10, i6), sub(sub(kWeight0, i26), i22) }; \
- const T_VEC w58[2] = { add(w54[0], w56[0]), add(w54[1], w56[1]) }; \
- const T_VEC w59[2] = { sub(w54[0], w56[0]), sub(w54[1], w56[1]) }; \
- const T_VEC w60[2] = { add(w55[0], w57[1]), sub(w55[1], w57[0]) }; \
- const T_VEC w61[2] = { sub(w55[0], w57[1]), add(w55[1], w57[0]) }; \
- const T_VEC w62[2] = { add(i6, i10), sub(i26, i22) }; \
- const T_VEC w63[2] = { sub(i6, i10), sub(sub(kWeight0, i22), i26) }; \
- const T_VEC w64[2] = { add(i14, i2), sub(i18, i30) }; \
- const T_VEC w65[2] = { sub(i14, i2), sub(sub(kWeight0, i30), i18) }; \
- const T_VEC w66[2] = { add(w62[0], w64[0]), add(w62[1], w64[1]) }; \
- const T_VEC w67[2] = { sub(w62[0], w64[0]), sub(w62[1], w64[1]) }; \
- const T_VEC w68[2] = { add(w63[0], w65[1]), sub(w63[1], w65[0]) }; \
- const T_VEC w69[2] = { sub(w63[0], w65[1]), add(w63[1], w65[0]) }; \
- const T_VEC w70[2] = { add(w58[0], w66[0]), add(w58[1], w66[1]) }; \
- const T_VEC w71[2] = { sub(w58[0], w66[0]), sub(w58[1], w66[1]) }; \
- const T_VEC w72[2] = { add(w60[0], mul(kWeight2, add(w68[0], w68[1]))), \
- add(w60[1], mul(kWeight2, sub(w68[1], w68[0]))) }; \
- const T_VEC w73[2] = { add(w60[0], \
- sub(sub(kWeight0, mul(kWeight2, w68[0])), \
- mul(kWeight2, w68[1]))), \
- add(w60[1], mul(kWeight2, sub(w68[0], w68[1]))) }; \
- const T_VEC w74[2] = { add(w59[0], w67[1]), sub(w59[1], w67[0]) }; \
- const T_VEC w75[2] = { sub(w59[0], w67[1]), add(w59[1], w67[0]) }; \
- const T_VEC w76[2] = { sub(w61[0], mul(kWeight2, sub(w69[0], w69[1]))), \
- sub(w61[1], mul(kWeight2, add(w69[1], w69[0]))) }; \
- const T_VEC w77[2] = { add(w61[0], mul(kWeight2, sub(w69[0], w69[1]))), \
- add(w61[1], mul(kWeight2, add(w69[1], w69[0]))) }; \
- const T_VEC w78[2] = { add(w46[0], w70[0]), add(w46[1], w70[1]) }; \
- const T_VEC w79[2] = { sub(w46[0], w70[0]), sub(w46[1], w70[1]) }; \
- const T_VEC w80[2] = { \
- add(w48[0], add(mul(kWeight3, w72[0]), mul(kWeight4, w72[1]))), \
- add(w48[1], sub(mul(kWeight3, w72[1]), mul(kWeight4, w72[0]))) \
- }; \
- const T_VEC w81[2] = { \
- add(w48[0], \
- sub(sub(kWeight0, mul(kWeight3, w72[0])), mul(kWeight4, w72[1]))), \
- add(w48[1], sub(mul(kWeight4, w72[0]), mul(kWeight3, w72[1]))) \
- }; \
- const T_VEC w82[2] = { add(w50[0], mul(kWeight2, add(w74[0], w74[1]))), \
- add(w50[1], mul(kWeight2, sub(w74[1], w74[0]))) }; \
- const T_VEC w83[2] = { add(w50[0], \
- sub(sub(kWeight0, mul(kWeight2, w74[0])), \
- mul(kWeight2, w74[1]))), \
- add(w50[1], mul(kWeight2, sub(w74[0], w74[1]))) }; \
- const T_VEC w84[2] = { \
- add(w52[0], add(mul(kWeight4, w76[0]), mul(kWeight3, w76[1]))), \
- add(w52[1], sub(mul(kWeight4, w76[1]), mul(kWeight3, w76[0]))) \
- }; \
- const T_VEC w85[2] = { \
- add(w52[0], \
- sub(sub(kWeight0, mul(kWeight4, w76[0])), mul(kWeight3, w76[1]))), \
- add(w52[1], sub(mul(kWeight3, w76[0]), mul(kWeight4, w76[1]))) \
- }; \
- const T_VEC w86[2] = { add(w47[0], w71[1]), sub(w47[1], w71[0]) }; \
- const T_VEC w87[2] = { sub(w47[0], w71[1]), add(w47[1], w71[0]) }; \
- const T_VEC w88[2] = { \
- sub(w49[0], sub(mul(kWeight4, w73[0]), mul(kWeight3, w73[1]))), \
- add(w49[1], \
- sub(sub(kWeight0, mul(kWeight4, w73[1])), mul(kWeight3, w73[0]))) \
- }; \
- const T_VEC w89[2] = { \
- add(w49[0], sub(mul(kWeight4, w73[0]), mul(kWeight3, w73[1]))), \
- add(w49[1], add(mul(kWeight4, w73[1]), mul(kWeight3, w73[0]))) \
- }; \
- const T_VEC w90[2] = { sub(w51[0], mul(kWeight2, sub(w75[0], w75[1]))), \
- sub(w51[1], mul(kWeight2, add(w75[1], w75[0]))) }; \
- const T_VEC w91[2] = { add(w51[0], mul(kWeight2, sub(w75[0], w75[1]))), \
- add(w51[1], mul(kWeight2, add(w75[1], w75[0]))) }; \
- const T_VEC w92[2] = { \
- sub(w53[0], sub(mul(kWeight3, w77[0]), mul(kWeight4, w77[1]))), \
- add(w53[1], \
- sub(sub(kWeight0, mul(kWeight3, w77[1])), mul(kWeight4, w77[0]))) \
- }; \
- const T_VEC w93[2] = { \
- add(w53[0], sub(mul(kWeight3, w77[0]), mul(kWeight4, w77[1]))), \
- add(w53[1], add(mul(kWeight3, w77[1]), mul(kWeight4, w77[0]))) \
- }; \
- const T_VEC w94[2] = { add(i1, i15), sub(i31, i17) }; \
- const T_VEC w95[2] = { sub(i1, i15), sub(sub(kWeight0, i17), i31) }; \
- const T_VEC w96[2] = { add(i9, i7), sub(i23, i25) }; \
- const T_VEC w97[2] = { sub(i9, i7), sub(sub(kWeight0, i25), i23) }; \
- const T_VEC w98[2] = { add(w94[0], w96[0]), add(w94[1], w96[1]) }; \
- const T_VEC w99[2] = { sub(w94[0], w96[0]), sub(w94[1], w96[1]) }; \
- const T_VEC w100[2] = { add(w95[0], w97[1]), sub(w95[1], w97[0]) }; \
- const T_VEC w101[2] = { sub(w95[0], w97[1]), add(w95[1], w97[0]) }; \
- const T_VEC w102[2] = { add(i5, i11), sub(i27, i21) }; \
- const T_VEC w103[2] = { sub(i5, i11), sub(sub(kWeight0, i21), i27) }; \
- const T_VEC w104[2] = { add(i13, i3), sub(i19, i29) }; \
- const T_VEC w105[2] = { sub(i13, i3), sub(sub(kWeight0, i29), i19) }; \
- const T_VEC w106[2] = { add(w102[0], w104[0]), add(w102[1], w104[1]) }; \
- const T_VEC w107[2] = { sub(w102[0], w104[0]), sub(w102[1], w104[1]) }; \
- const T_VEC w108[2] = { add(w103[0], w105[1]), sub(w103[1], w105[0]) }; \
- const T_VEC w109[2] = { sub(w103[0], w105[1]), add(w103[1], w105[0]) }; \
- const T_VEC w110[2] = { add(w98[0], w106[0]), add(w98[1], w106[1]) }; \
- const T_VEC w111[2] = { sub(w98[0], w106[0]), sub(w98[1], w106[1]) }; \
- const T_VEC w112[2] = { \
- add(w100[0], mul(kWeight2, add(w108[0], w108[1]))), \
- add(w100[1], mul(kWeight2, sub(w108[1], w108[0]))) \
- }; \
- const T_VEC w113[2] = { \
- add(w100[0], \
- sub(sub(kWeight0, mul(kWeight2, w108[0])), mul(kWeight2, w108[1]))), \
- add(w100[1], mul(kWeight2, sub(w108[0], w108[1]))) \
- }; \
- const T_VEC w114[2] = { add(w99[0], w107[1]), sub(w99[1], w107[0]) }; \
- const T_VEC w115[2] = { sub(w99[0], w107[1]), add(w99[1], w107[0]) }; \
- const T_VEC w116[2] = { \
- sub(w101[0], mul(kWeight2, sub(w109[0], w109[1]))), \
- sub(w101[1], mul(kWeight2, add(w109[1], w109[0]))) \
- }; \
- const T_VEC w117[2] = { \
- add(w101[0], mul(kWeight2, sub(w109[0], w109[1]))), \
- add(w101[1], mul(kWeight2, add(w109[1], w109[0]))) \
- }; \
- const T_VEC w118[2] = { add(i3, i13), sub(i29, i19) }; \
- const T_VEC w119[2] = { sub(i3, i13), sub(sub(kWeight0, i19), i29) }; \
- const T_VEC w120[2] = { add(i11, i5), sub(i21, i27) }; \
- const T_VEC w121[2] = { sub(i11, i5), sub(sub(kWeight0, i27), i21) }; \
- const T_VEC w122[2] = { add(w118[0], w120[0]), add(w118[1], w120[1]) }; \
- const T_VEC w123[2] = { sub(w118[0], w120[0]), sub(w118[1], w120[1]) }; \
- const T_VEC w124[2] = { add(w119[0], w121[1]), sub(w119[1], w121[0]) }; \
- const T_VEC w125[2] = { sub(w119[0], w121[1]), add(w119[1], w121[0]) }; \
- const T_VEC w126[2] = { add(i7, i9), sub(i25, i23) }; \
- const T_VEC w127[2] = { sub(i7, i9), sub(sub(kWeight0, i23), i25) }; \
- const T_VEC w128[2] = { add(i15, i1), sub(i17, i31) }; \
- const T_VEC w129[2] = { sub(i15, i1), sub(sub(kWeight0, i31), i17) }; \
- const T_VEC w130[2] = { add(w126[0], w128[0]), add(w126[1], w128[1]) }; \
- const T_VEC w131[2] = { sub(w126[0], w128[0]), sub(w126[1], w128[1]) }; \
- const T_VEC w132[2] = { add(w127[0], w129[1]), sub(w127[1], w129[0]) }; \
- const T_VEC w133[2] = { sub(w127[0], w129[1]), add(w127[1], w129[0]) }; \
- const T_VEC w134[2] = { add(w122[0], w130[0]), add(w122[1], w130[1]) }; \
- const T_VEC w135[2] = { sub(w122[0], w130[0]), sub(w122[1], w130[1]) }; \
- const T_VEC w136[2] = { \
- add(w124[0], mul(kWeight2, add(w132[0], w132[1]))), \
- add(w124[1], mul(kWeight2, sub(w132[1], w132[0]))) \
- }; \
- const T_VEC w137[2] = { \
- add(w124[0], \
- sub(sub(kWeight0, mul(kWeight2, w132[0])), mul(kWeight2, w132[1]))), \
- add(w124[1], mul(kWeight2, sub(w132[0], w132[1]))) \
- }; \
- const T_VEC w138[2] = { add(w123[0], w131[1]), sub(w123[1], w131[0]) }; \
- const T_VEC w139[2] = { sub(w123[0], w131[1]), add(w123[1], w131[0]) }; \
- const T_VEC w140[2] = { \
- sub(w125[0], mul(kWeight2, sub(w133[0], w133[1]))), \
- sub(w125[1], mul(kWeight2, add(w133[1], w133[0]))) \
- }; \
- const T_VEC w141[2] = { \
- add(w125[0], mul(kWeight2, sub(w133[0], w133[1]))), \
- add(w125[1], mul(kWeight2, add(w133[1], w133[0]))) \
- }; \
- const T_VEC w142[2] = { add(w110[0], w134[0]), add(w110[1], w134[1]) }; \
- const T_VEC w143[2] = { sub(w110[0], w134[0]), sub(w110[1], w134[1]) }; \
- const T_VEC w144[2] = { \
- add(w112[0], add(mul(kWeight3, w136[0]), mul(kWeight4, w136[1]))), \
- add(w112[1], sub(mul(kWeight3, w136[1]), mul(kWeight4, w136[0]))) \
- }; \
- const T_VEC w145[2] = { \
- add(w112[0], \
- sub(sub(kWeight0, mul(kWeight3, w136[0])), mul(kWeight4, w136[1]))), \
- add(w112[1], sub(mul(kWeight4, w136[0]), mul(kWeight3, w136[1]))) \
- }; \
- const T_VEC w146[2] = { \
- add(w114[0], mul(kWeight2, add(w138[0], w138[1]))), \
- add(w114[1], mul(kWeight2, sub(w138[1], w138[0]))) \
- }; \
- const T_VEC w147[2] = { \
- add(w114[0], \
- sub(sub(kWeight0, mul(kWeight2, w138[0])), mul(kWeight2, w138[1]))), \
- add(w114[1], mul(kWeight2, sub(w138[0], w138[1]))) \
- }; \
- const T_VEC w148[2] = { \
- add(w116[0], add(mul(kWeight4, w140[0]), mul(kWeight3, w140[1]))), \
- add(w116[1], sub(mul(kWeight4, w140[1]), mul(kWeight3, w140[0]))) \
- }; \
- const T_VEC w149[2] = { \
- add(w116[0], \
- sub(sub(kWeight0, mul(kWeight4, w140[0])), mul(kWeight3, w140[1]))), \
- add(w116[1], sub(mul(kWeight3, w140[0]), mul(kWeight4, w140[1]))) \
- }; \
- const T_VEC w150[2] = { add(w111[0], w135[1]), sub(w111[1], w135[0]) }; \
- const T_VEC w151[2] = { sub(w111[0], w135[1]), add(w111[1], w135[0]) }; \
- const T_VEC w152[2] = { \
- sub(w113[0], sub(mul(kWeight4, w137[0]), mul(kWeight3, w137[1]))), \
- add(w113[1], \
- sub(sub(kWeight0, mul(kWeight4, w137[1])), mul(kWeight3, w137[0]))) \
- }; \
- const T_VEC w153[2] = { \
- add(w113[0], sub(mul(kWeight4, w137[0]), mul(kWeight3, w137[1]))), \
- add(w113[1], add(mul(kWeight4, w137[1]), mul(kWeight3, w137[0]))) \
- }; \
- const T_VEC w154[2] = { \
- sub(w115[0], mul(kWeight2, sub(w139[0], w139[1]))), \
- sub(w115[1], mul(kWeight2, add(w139[1], w139[0]))) \
- }; \
- const T_VEC w155[2] = { \
- add(w115[0], mul(kWeight2, sub(w139[0], w139[1]))), \
- add(w115[1], mul(kWeight2, add(w139[1], w139[0]))) \
- }; \
- const T_VEC w156[2] = { \
- sub(w117[0], sub(mul(kWeight3, w141[0]), mul(kWeight4, w141[1]))), \
- add(w117[1], \
- sub(sub(kWeight0, mul(kWeight3, w141[1])), mul(kWeight4, w141[0]))) \
- }; \
- const T_VEC w157[2] = { \
- add(w117[0], sub(mul(kWeight3, w141[0]), mul(kWeight4, w141[1]))), \
- add(w117[1], add(mul(kWeight3, w141[1]), mul(kWeight4, w141[0]))) \
- }; \
- store(output + 0 * stride, add(w78[0], w142[0])); \
- store(output + 1 * stride, \
- add(w80[0], add(mul(kWeight5, w144[0]), mul(kWeight6, w144[1])))); \
- store(output + 2 * stride, \
- add(w82[0], add(mul(kWeight3, w146[0]), mul(kWeight4, w146[1])))); \
- store(output + 3 * stride, \
- add(w84[0], add(mul(kWeight7, w148[0]), mul(kWeight8, w148[1])))); \
- store(output + 4 * stride, \
- add(w86[0], mul(kWeight2, add(w150[0], w150[1])))); \
- store(output + 5 * stride, \
- add(w88[0], add(mul(kWeight8, w152[0]), mul(kWeight7, w152[1])))); \
- store(output + 6 * stride, \
- add(w90[0], add(mul(kWeight4, w154[0]), mul(kWeight3, w154[1])))); \
- store(output + 7 * stride, \
- add(w92[0], add(mul(kWeight6, w156[0]), mul(kWeight5, w156[1])))); \
- store(output + 8 * stride, add(w79[0], w143[1])); \
- store(output + 9 * stride, \
- sub(w81[0], sub(mul(kWeight6, w145[0]), mul(kWeight5, w145[1])))); \
- store(output + 10 * stride, \
- sub(w83[0], sub(mul(kWeight4, w147[0]), mul(kWeight3, w147[1])))); \
- store(output + 11 * stride, \
- sub(w85[0], sub(mul(kWeight8, w149[0]), mul(kWeight7, w149[1])))); \
- store(output + 12 * stride, \
- sub(w87[0], mul(kWeight2, sub(w151[0], w151[1])))); \
- store(output + 13 * stride, \
- sub(w89[0], sub(mul(kWeight7, w153[0]), mul(kWeight8, w153[1])))); \
- store(output + 14 * stride, \
- sub(w91[0], sub(mul(kWeight3, w155[0]), mul(kWeight4, w155[1])))); \
- store(output + 15 * stride, \
- sub(w93[0], sub(mul(kWeight5, w157[0]), mul(kWeight6, w157[1])))); \
- store(output + 16 * stride, sub(w78[0], w142[0])); \
- store(output + 17 * stride, \
- add(w80[0], sub(sub(kWeight0, mul(kWeight5, w144[0])), \
- mul(kWeight6, w144[1])))); \
- store(output + 18 * stride, \
- add(w82[0], sub(sub(kWeight0, mul(kWeight3, w146[0])), \
- mul(kWeight4, w146[1])))); \
- store(output + 19 * stride, \
- add(w84[0], sub(sub(kWeight0, mul(kWeight7, w148[0])), \
- mul(kWeight8, w148[1])))); \
- store(output + 20 * stride, \
- add(w86[0], sub(sub(kWeight0, mul(kWeight2, w150[0])), \
- mul(kWeight2, w150[1])))); \
- store(output + 21 * stride, \
- add(w88[0], sub(sub(kWeight0, mul(kWeight8, w152[0])), \
- mul(kWeight7, w152[1])))); \
- store(output + 22 * stride, \
- add(w90[0], sub(sub(kWeight0, mul(kWeight4, w154[0])), \
- mul(kWeight3, w154[1])))); \
- store(output + 23 * stride, \
- add(w92[0], sub(sub(kWeight0, mul(kWeight6, w156[0])), \
- mul(kWeight5, w156[1])))); \
- store(output + 24 * stride, sub(w79[0], w143[1])); \
- store(output + 25 * stride, \
- add(w81[0], sub(mul(kWeight6, w145[0]), mul(kWeight5, w145[1])))); \
- store(output + 26 * stride, \
- add(w83[0], sub(mul(kWeight4, w147[0]), mul(kWeight3, w147[1])))); \
- store(output + 27 * stride, \
- add(w85[0], sub(mul(kWeight8, w149[0]), mul(kWeight7, w149[1])))); \
- store(output + 28 * stride, \
- add(w87[0], mul(kWeight2, sub(w151[0], w151[1])))); \
- store(output + 29 * stride, \
- add(w89[0], sub(mul(kWeight7, w153[0]), mul(kWeight8, w153[1])))); \
- store(output + 30 * stride, \
- add(w91[0], sub(mul(kWeight3, w155[0]), mul(kWeight4, w155[1])))); \
- store(output + 31 * stride, \
- add(w93[0], sub(mul(kWeight5, w157[0]), mul(kWeight6, w157[1])))); \
- }
-
-#endif // AOM_AOM_DSP_FFT_COMMON_H_
diff --git a/third_party/aom/aom_dsp/fwd_txfm.c b/third_party/aom/aom_dsp/fwd_txfm.c
deleted file mode 100644
index e50f951c1..000000000
--- a/third_party/aom/aom_dsp/fwd_txfm.c
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include "aom_dsp/txfm_common.h"
-#include "config/aom_dsp_rtcd.h"
-
-void aom_fdct8x8_c(const int16_t *input, tran_low_t *final_output, int stride) {
- int i, j;
- tran_low_t intermediate[64];
- int pass;
- tran_low_t *output = intermediate;
- const tran_low_t *in = NULL;
-
- // Transform columns
- for (pass = 0; pass < 2; ++pass) {
- tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16
- tran_high_t t0, t1, t2, t3; // needs32
- tran_high_t x0, x1, x2, x3; // canbe16
-
- for (i = 0; i < 8; i++) {
- // stage 1
- if (pass == 0) {
- s0 = (input[0 * stride] + input[7 * stride]) * 4;
- s1 = (input[1 * stride] + input[6 * stride]) * 4;
- s2 = (input[2 * stride] + input[5 * stride]) * 4;
- s3 = (input[3 * stride] + input[4 * stride]) * 4;
- s4 = (input[3 * stride] - input[4 * stride]) * 4;
- s5 = (input[2 * stride] - input[5 * stride]) * 4;
- s6 = (input[1 * stride] - input[6 * stride]) * 4;
- s7 = (input[0 * stride] - input[7 * stride]) * 4;
- ++input;
- } else {
- s0 = in[0 * 8] + in[7 * 8];
- s1 = in[1 * 8] + in[6 * 8];
- s2 = in[2 * 8] + in[5 * 8];
- s3 = in[3 * 8] + in[4 * 8];
- s4 = in[3 * 8] - in[4 * 8];
- s5 = in[2 * 8] - in[5 * 8];
- s6 = in[1 * 8] - in[6 * 8];
- s7 = in[0 * 8] - in[7 * 8];
- ++in;
- }
-
- // fdct4(step, step);
- x0 = s0 + s3;
- x1 = s1 + s2;
- x2 = s1 - s2;
- x3 = s0 - s3;
- t0 = (x0 + x1) * cospi_16_64;
- t1 = (x0 - x1) * cospi_16_64;
- t2 = x2 * cospi_24_64 + x3 * cospi_8_64;
- t3 = -x2 * cospi_8_64 + x3 * cospi_24_64;
- output[0] = (tran_low_t)fdct_round_shift(t0);
- output[2] = (tran_low_t)fdct_round_shift(t2);
- output[4] = (tran_low_t)fdct_round_shift(t1);
- output[6] = (tran_low_t)fdct_round_shift(t3);
-
- // Stage 2
- t0 = (s6 - s5) * cospi_16_64;
- t1 = (s6 + s5) * cospi_16_64;
- t2 = fdct_round_shift(t0);
- t3 = fdct_round_shift(t1);
-
- // Stage 3
- x0 = s4 + t2;
- x1 = s4 - t2;
- x2 = s7 - t3;
- x3 = s7 + t3;
-
- // Stage 4
- t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
- t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
- t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
- t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
- output[1] = (tran_low_t)fdct_round_shift(t0);
- output[3] = (tran_low_t)fdct_round_shift(t2);
- output[5] = (tran_low_t)fdct_round_shift(t1);
- output[7] = (tran_low_t)fdct_round_shift(t3);
- output += 8;
- }
- in = intermediate;
- output = final_output;
- }
-
- // Rows
- for (i = 0; i < 8; ++i) {
- for (j = 0; j < 8; ++j) final_output[j + i * 8] /= 2;
- }
-}
-
-void aom_highbd_fdct8x8_c(const int16_t *input, tran_low_t *final_output,
- int stride) {
- aom_fdct8x8_c(input, final_output, stride);
-}
diff --git a/third_party/aom/aom_dsp/grain_synthesis.c b/third_party/aom/aom_dsp/grain_synthesis.c
deleted file mode 100644
index b96e1c319..000000000
--- a/third_party/aom/aom_dsp/grain_synthesis.c
+++ /dev/null
@@ -1,1409 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-/*!\file
- * \brief Describes film grain parameters and film grain synthesis
- *
- */
-
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-#include <assert.h>
-#include "aom_dsp/grain_synthesis.h"
-#include "aom_mem/aom_mem.h"
-
-// Samples with Gaussian distribution in the range of [-2048, 2047] (12 bits)
-// with zero mean and standard deviation of about 512.
-// should be divided by 4 for 10-bit range and 16 for 8-bit range.
-static const int gaussian_sequence[2048] = {
- 56, 568, -180, 172, 124, -84, 172, -64, -900, 24, 820,
- 224, 1248, 996, 272, -8, -916, -388, -732, -104, -188, 800,
- 112, -652, -320, -376, 140, -252, 492, -168, 44, -788, 588,
- -584, 500, -228, 12, 680, 272, -476, 972, -100, 652, 368,
- 432, -196, -720, -192, 1000, -332, 652, -136, -552, -604, -4,
- 192, -220, -136, 1000, -52, 372, -96, -624, 124, -24, 396,
- 540, -12, -104, 640, 464, 244, -208, -84, 368, -528, -740,
- 248, -968, -848, 608, 376, -60, -292, -40, -156, 252, -292,
- 248, 224, -280, 400, -244, 244, -60, 76, -80, 212, 532,
- 340, 128, -36, 824, -352, -60, -264, -96, -612, 416, -704,
- 220, -204, 640, -160, 1220, -408, 900, 336, 20, -336, -96,
- -792, 304, 48, -28, -1232, -1172, -448, 104, -292, -520, 244,
- 60, -948, 0, -708, 268, 108, 356, -548, 488, -344, -136,
- 488, -196, -224, 656, -236, -1128, 60, 4, 140, 276, -676,
- -376, 168, -108, 464, 8, 564, 64, 240, 308, -300, -400,
- -456, -136, 56, 120, -408, -116, 436, 504, -232, 328, 844,
- -164, -84, 784, -168, 232, -224, 348, -376, 128, 568, 96,
- -1244, -288, 276, 848, 832, -360, 656, 464, -384, -332, -356,
- 728, -388, 160, -192, 468, 296, 224, 140, -776, -100, 280,
- 4, 196, 44, -36, -648, 932, 16, 1428, 28, 528, 808,
- 772, 20, 268, 88, -332, -284, 124, -384, -448, 208, -228,
- -1044, -328, 660, 380, -148, -300, 588, 240, 540, 28, 136,
- -88, -436, 256, 296, -1000, 1400, 0, -48, 1056, -136, 264,
- -528, -1108, 632, -484, -592, -344, 796, 124, -668, -768, 388,
- 1296, -232, -188, -200, -288, -4, 308, 100, -168, 256, -500,
- 204, -508, 648, -136, 372, -272, -120, -1004, -552, -548, -384,
- 548, -296, 428, -108, -8, -912, -324, -224, -88, -112, -220,
- -100, 996, -796, 548, 360, -216, 180, 428, -200, -212, 148,
- 96, 148, 284, 216, -412, -320, 120, -300, -384, -604, -572,
- -332, -8, -180, -176, 696, 116, -88, 628, 76, 44, -516,
- 240, -208, -40, 100, -592, 344, -308, -452, -228, 20, 916,
- -1752, -136, -340, -804, 140, 40, 512, 340, 248, 184, -492,
- 896, -156, 932, -628, 328, -688, -448, -616, -752, -100, 560,
- -1020, 180, -800, -64, 76, 576, 1068, 396, 660, 552, -108,
- -28, 320, -628, 312, -92, -92, -472, 268, 16, 560, 516,
- -672, -52, 492, -100, 260, 384, 284, 292, 304, -148, 88,
- -152, 1012, 1064, -228, 164, -376, -684, 592, -392, 156, 196,
- -524, -64, -884, 160, -176, 636, 648, 404, -396, -436, 864,
- 424, -728, 988, -604, 904, -592, 296, -224, 536, -176, -920,
- 436, -48, 1176, -884, 416, -776, -824, -884, 524, -548, -564,
- -68, -164, -96, 692, 364, -692, -1012, -68, 260, -480, 876,
- -1116, 452, -332, -352, 892, -1088, 1220, -676, 12, -292, 244,
- 496, 372, -32, 280, 200, 112, -440, -96, 24, -644, -184,
- 56, -432, 224, -980, 272, -260, 144, -436, 420, 356, 364,
- -528, 76, 172, -744, -368, 404, -752, -416, 684, -688, 72,
- 540, 416, 92, 444, 480, -72, -1416, 164, -1172, -68, 24,
- 424, 264, 1040, 128, -912, -524, -356, 64, 876, -12, 4,
- -88, 532, 272, -524, 320, 276, -508, 940, 24, -400, -120,
- 756, 60, 236, -412, 100, 376, -484, 400, -100, -740, -108,
- -260, 328, -268, 224, -200, -416, 184, -604, -564, -20, 296,
- 60, 892, -888, 60, 164, 68, -760, 216, -296, 904, -336,
- -28, 404, -356, -568, -208, -1480, -512, 296, 328, -360, -164,
- -1560, -776, 1156, -428, 164, -504, -112, 120, -216, -148, -264,
- 308, 32, 64, -72, 72, 116, 176, -64, -272, 460, -536,
- -784, -280, 348, 108, -752, -132, 524, -540, -776, 116, -296,
- -1196, -288, -560, 1040, -472, 116, -848, -1116, 116, 636, 696,
- 284, -176, 1016, 204, -864, -648, -248, 356, 972, -584, -204,
- 264, 880, 528, -24, -184, 116, 448, -144, 828, 524, 212,
- -212, 52, 12, 200, 268, -488, -404, -880, 824, -672, -40,
- 908, -248, 500, 716, -576, 492, -576, 16, 720, -108, 384,
- 124, 344, 280, 576, -500, 252, 104, -308, 196, -188, -8,
- 1268, 296, 1032, -1196, 436, 316, 372, -432, -200, -660, 704,
- -224, 596, -132, 268, 32, -452, 884, 104, -1008, 424, -1348,
- -280, 4, -1168, 368, 476, 696, 300, -8, 24, 180, -592,
- -196, 388, 304, 500, 724, -160, 244, -84, 272, -256, -420,
- 320, 208, -144, -156, 156, 364, 452, 28, 540, 316, 220,
- -644, -248, 464, 72, 360, 32, -388, 496, -680, -48, 208,
- -116, -408, 60, -604, -392, 548, -840, 784, -460, 656, -544,
- -388, -264, 908, -800, -628, -612, -568, 572, -220, 164, 288,
- -16, -308, 308, -112, -636, -760, 280, -668, 432, 364, 240,
- -196, 604, 340, 384, 196, 592, -44, -500, 432, -580, -132,
- 636, -76, 392, 4, -412, 540, 508, 328, -356, -36, 16,
- -220, -64, -248, -60, 24, -192, 368, 1040, 92, -24, -1044,
- -32, 40, 104, 148, 192, -136, -520, 56, -816, -224, 732,
- 392, 356, 212, -80, -424, -1008, -324, 588, -1496, 576, 460,
- -816, -848, 56, -580, -92, -1372, -112, -496, 200, 364, 52,
- -140, 48, -48, -60, 84, 72, 40, 132, -356, -268, -104,
- -284, -404, 732, -520, 164, -304, -540, 120, 328, -76, -460,
- 756, 388, 588, 236, -436, -72, -176, -404, -316, -148, 716,
- -604, 404, -72, -88, -888, -68, 944, 88, -220, -344, 960,
- 472, 460, -232, 704, 120, 832, -228, 692, -508, 132, -476,
- 844, -748, -364, -44, 1116, -1104, -1056, 76, 428, 552, -692,
- 60, 356, 96, -384, -188, -612, -576, 736, 508, 892, 352,
- -1132, 504, -24, -352, 324, 332, -600, -312, 292, 508, -144,
- -8, 484, 48, 284, -260, -240, 256, -100, -292, -204, -44,
- 472, -204, 908, -188, -1000, -256, 92, 1164, -392, 564, 356,
- 652, -28, -884, 256, 484, -192, 760, -176, 376, -524, -452,
- -436, 860, -736, 212, 124, 504, -476, 468, 76, -472, 552,
- -692, -944, -620, 740, -240, 400, 132, 20, 192, -196, 264,
- -668, -1012, -60, 296, -316, -828, 76, -156, 284, -768, -448,
- -832, 148, 248, 652, 616, 1236, 288, -328, -400, -124, 588,
- 220, 520, -696, 1032, 768, -740, -92, -272, 296, 448, -464,
- 412, -200, 392, 440, -200, 264, -152, -260, 320, 1032, 216,
- 320, -8, -64, 156, -1016, 1084, 1172, 536, 484, -432, 132,
- 372, -52, -256, 84, 116, -352, 48, 116, 304, -384, 412,
- 924, -300, 528, 628, 180, 648, 44, -980, -220, 1320, 48,
- 332, 748, 524, -268, -720, 540, -276, 564, -344, -208, -196,
- 436, 896, 88, -392, 132, 80, -964, -288, 568, 56, -48,
- -456, 888, 8, 552, -156, -292, 948, 288, 128, -716, -292,
- 1192, -152, 876, 352, -600, -260, -812, -468, -28, -120, -32,
- -44, 1284, 496, 192, 464, 312, -76, -516, -380, -456, -1012,
- -48, 308, -156, 36, 492, -156, -808, 188, 1652, 68, -120,
- -116, 316, 160, -140, 352, 808, -416, 592, 316, -480, 56,
- 528, -204, -568, 372, -232, 752, -344, 744, -4, 324, -416,
- -600, 768, 268, -248, -88, -132, -420, -432, 80, -288, 404,
- -316, -1216, -588, 520, -108, 92, -320, 368, -480, -216, -92,
- 1688, -300, 180, 1020, -176, 820, -68, -228, -260, 436, -904,
- 20, 40, -508, 440, -736, 312, 332, 204, 760, -372, 728,
- 96, -20, -632, -520, -560, 336, 1076, -64, -532, 776, 584,
- 192, 396, -728, -520, 276, -188, 80, -52, -612, -252, -48,
- 648, 212, -688, 228, -52, -260, 428, -412, -272, -404, 180,
- 816, -796, 48, 152, 484, -88, -216, 988, 696, 188, -528,
- 648, -116, -180, 316, 476, 12, -564, 96, 476, -252, -364,
- -376, -392, 556, -256, -576, 260, -352, 120, -16, -136, -260,
- -492, 72, 556, 660, 580, 616, 772, 436, 424, -32, -324,
- -1268, 416, -324, -80, 920, 160, 228, 724, 32, -516, 64,
- 384, 68, -128, 136, 240, 248, -204, -68, 252, -932, -120,
- -480, -628, -84, 192, 852, -404, -288, -132, 204, 100, 168,
- -68, -196, -868, 460, 1080, 380, -80, 244, 0, 484, -888,
- 64, 184, 352, 600, 460, 164, 604, -196, 320, -64, 588,
- -184, 228, 12, 372, 48, -848, -344, 224, 208, -200, 484,
- 128, -20, 272, -468, -840, 384, 256, -720, -520, -464, -580,
- 112, -120, 644, -356, -208, -608, -528, 704, 560, -424, 392,
- 828, 40, 84, 200, -152, 0, -144, 584, 280, -120, 80,
- -556, -972, -196, -472, 724, 80, 168, -32, 88, 160, -688,
- 0, 160, 356, 372, -776, 740, -128, 676, -248, -480, 4,
- -364, 96, 544, 232, -1032, 956, 236, 356, 20, -40, 300,
- 24, -676, -596, 132, 1120, -104, 532, -1096, 568, 648, 444,
- 508, 380, 188, -376, -604, 1488, 424, 24, 756, -220, -192,
- 716, 120, 920, 688, 168, 44, -460, 568, 284, 1144, 1160,
- 600, 424, 888, 656, -356, -320, 220, 316, -176, -724, -188,
- -816, -628, -348, -228, -380, 1012, -452, -660, 736, 928, 404,
- -696, -72, -268, -892, 128, 184, -344, -780, 360, 336, 400,
- 344, 428, 548, -112, 136, -228, -216, -820, -516, 340, 92,
- -136, 116, -300, 376, -244, 100, -316, -520, -284, -12, 824,
- 164, -548, -180, -128, 116, -924, -828, 268, -368, -580, 620,
- 192, 160, 0, -1676, 1068, 424, -56, -360, 468, -156, 720,
- 288, -528, 556, -364, 548, -148, 504, 316, 152, -648, -620,
- -684, -24, -376, -384, -108, -920, -1032, 768, 180, -264, -508,
- -1268, -260, -60, 300, -240, 988, 724, -376, -576, -212, -736,
- 556, 192, 1092, -620, -880, 376, -56, -4, -216, -32, 836,
- 268, 396, 1332, 864, -600, 100, 56, -412, -92, 356, 180,
- 884, -468, -436, 292, -388, -804, -704, -840, 368, -348, 140,
- -724, 1536, 940, 372, 112, -372, 436, -480, 1136, 296, -32,
- -228, 132, -48, -220, 868, -1016, -60, -1044, -464, 328, 916,
- 244, 12, -736, -296, 360, 468, -376, -108, -92, 788, 368,
- -56, 544, 400, -672, -420, 728, 16, 320, 44, -284, -380,
- -796, 488, 132, 204, -596, -372, 88, -152, -908, -636, -572,
- -624, -116, -692, -200, -56, 276, -88, 484, -324, 948, 864,
- 1000, -456, -184, -276, 292, -296, 156, 676, 320, 160, 908,
- -84, -1236, -288, -116, 260, -372, -644, 732, -756, -96, 84,
- 344, -520, 348, -688, 240, -84, 216, -1044, -136, -676, -396,
- -1500, 960, -40, 176, 168, 1516, 420, -504, -344, -364, -360,
- 1216, -940, -380, -212, 252, -660, -708, 484, -444, -152, 928,
- -120, 1112, 476, -260, 560, -148, -344, 108, -196, 228, -288,
- 504, 560, -328, -88, 288, -1008, 460, -228, 468, -836, -196,
- 76, 388, 232, 412, -1168, -716, -644, 756, -172, -356, -504,
- 116, 432, 528, 48, 476, -168, -608, 448, 160, -532, -272,
- 28, -676, -12, 828, 980, 456, 520, 104, -104, 256, -344,
- -4, -28, -368, -52, -524, -572, -556, -200, 768, 1124, -208,
- -512, 176, 232, 248, -148, -888, 604, -600, -304, 804, -156,
- -212, 488, -192, -804, -256, 368, -360, -916, -328, 228, -240,
- -448, -472, 856, -556, -364, 572, -12, -156, -368, -340, 432,
- 252, -752, -152, 288, 268, -580, -848, -592, 108, -76, 244,
- 312, -716, 592, -80, 436, 360, 4, -248, 160, 516, 584,
- 732, 44, -468, -280, -292, -156, -588, 28, 308, 912, 24,
- 124, 156, 180, -252, 944, -924, -772, -520, -428, -624, 300,
- -212, -1144, 32, -724, 800, -1128, -212, -1288, -848, 180, -416,
- 440, 192, -576, -792, -76, -1080, 80, -532, -352, -132, 380,
- -820, 148, 1112, 128, 164, 456, 700, -924, 144, -668, -384,
- 648, -832, 508, 552, -52, -100, -656, 208, -568, 748, -88,
- 680, 232, 300, 192, -408, -1012, -152, -252, -268, 272, -876,
- -664, -648, -332, -136, 16, 12, 1152, -28, 332, -536, 320,
- -672, -460, -316, 532, -260, 228, -40, 1052, -816, 180, 88,
- -496, -556, -672, -368, 428, 92, 356, 404, -408, 252, 196,
- -176, -556, 792, 268, 32, 372, 40, 96, -332, 328, 120,
- 372, -900, -40, 472, -264, -592, 952, 128, 656, 112, 664,
- -232, 420, 4, -344, -464, 556, 244, -416, -32, 252, 0,
- -412, 188, -696, 508, -476, 324, -1096, 656, -312, 560, 264,
- -136, 304, 160, -64, -580, 248, 336, -720, 560, -348, -288,
- -276, -196, -500, 852, -544, -236, -1128, -992, -776, 116, 56,
- 52, 860, 884, 212, -12, 168, 1020, 512, -552, 924, -148,
- 716, 188, 164, -340, -520, -184, 880, -152, -680, -208, -1156,
- -300, -528, -472, 364, 100, -744, -1056, -32, 540, 280, 144,
- -676, -32, -232, -280, -224, 96, 568, -76, 172, 148, 148,
- 104, 32, -296, -32, 788, -80, 32, -16, 280, 288, 944,
- 428, -484
-};
-
-static const int gauss_bits = 11;
-
-static int luma_subblock_size_y = 32;
-static int luma_subblock_size_x = 32;
-
-static int chroma_subblock_size_y = 16;
-static int chroma_subblock_size_x = 16;
-
-static const int min_luma_legal_range = 16;
-static const int max_luma_legal_range = 235;
-
-static const int min_chroma_legal_range = 16;
-static const int max_chroma_legal_range = 240;
-
-static int scaling_lut_y[256];
-static int scaling_lut_cb[256];
-static int scaling_lut_cr[256];
-
-static int grain_center;
-static int grain_min;
-static int grain_max;
-
-static uint16_t random_register = 0; // random number generator register
-
-static void init_arrays(const aom_film_grain_t *params, int luma_stride,
- int chroma_stride, int ***pred_pos_luma_p,
- int ***pred_pos_chroma_p, int **luma_grain_block,
- int **cb_grain_block, int **cr_grain_block,
- int **y_line_buf, int **cb_line_buf, int **cr_line_buf,
- int **y_col_buf, int **cb_col_buf, int **cr_col_buf,
- int luma_grain_samples, int chroma_grain_samples,
- int chroma_subsamp_y, int chroma_subsamp_x) {
- memset(scaling_lut_y, 0, sizeof(*scaling_lut_y) * 256);
- memset(scaling_lut_cb, 0, sizeof(*scaling_lut_cb) * 256);
- memset(scaling_lut_cr, 0, sizeof(*scaling_lut_cr) * 256);
-
- int num_pos_luma = 2 * params->ar_coeff_lag * (params->ar_coeff_lag + 1);
- int num_pos_chroma = num_pos_luma;
- if (params->num_y_points > 0) ++num_pos_chroma;
-
- int **pred_pos_luma;
- int **pred_pos_chroma;
-
- pred_pos_luma = (int **)aom_malloc(sizeof(*pred_pos_luma) * num_pos_luma);
-
- for (int row = 0; row < num_pos_luma; row++) {
- pred_pos_luma[row] = (int *)aom_malloc(sizeof(**pred_pos_luma) * 3);
- }
-
- pred_pos_chroma =
- (int **)aom_malloc(sizeof(*pred_pos_chroma) * num_pos_chroma);
-
- for (int row = 0; row < num_pos_chroma; row++) {
- pred_pos_chroma[row] = (int *)aom_malloc(sizeof(**pred_pos_chroma) * 3);
- }
-
- int pos_ar_index = 0;
-
- for (int row = -params->ar_coeff_lag; row < 0; row++) {
- for (int col = -params->ar_coeff_lag; col < params->ar_coeff_lag + 1;
- col++) {
- pred_pos_luma[pos_ar_index][0] = row;
- pred_pos_luma[pos_ar_index][1] = col;
- pred_pos_luma[pos_ar_index][2] = 0;
-
- pred_pos_chroma[pos_ar_index][0] = row;
- pred_pos_chroma[pos_ar_index][1] = col;
- pred_pos_chroma[pos_ar_index][2] = 0;
- ++pos_ar_index;
- }
- }
-
- for (int col = -params->ar_coeff_lag; col < 0; col++) {
- pred_pos_luma[pos_ar_index][0] = 0;
- pred_pos_luma[pos_ar_index][1] = col;
- pred_pos_luma[pos_ar_index][2] = 0;
-
- pred_pos_chroma[pos_ar_index][0] = 0;
- pred_pos_chroma[pos_ar_index][1] = col;
- pred_pos_chroma[pos_ar_index][2] = 0;
-
- ++pos_ar_index;
- }
-
- if (params->num_y_points > 0) {
- pred_pos_chroma[pos_ar_index][0] = 0;
- pred_pos_chroma[pos_ar_index][1] = 0;
- pred_pos_chroma[pos_ar_index][2] = 1;
- }
-
- *pred_pos_luma_p = pred_pos_luma;
- *pred_pos_chroma_p = pred_pos_chroma;
-
- *y_line_buf = (int *)aom_malloc(sizeof(**y_line_buf) * luma_stride * 2);
- *cb_line_buf = (int *)aom_malloc(sizeof(**cb_line_buf) * chroma_stride *
- (2 >> chroma_subsamp_y));
- *cr_line_buf = (int *)aom_malloc(sizeof(**cr_line_buf) * chroma_stride *
- (2 >> chroma_subsamp_y));
-
- *y_col_buf =
- (int *)aom_malloc(sizeof(**y_col_buf) * (luma_subblock_size_y + 2) * 2);
- *cb_col_buf =
- (int *)aom_malloc(sizeof(**cb_col_buf) *
- (chroma_subblock_size_y + (2 >> chroma_subsamp_y)) *
- (2 >> chroma_subsamp_x));
- *cr_col_buf =
- (int *)aom_malloc(sizeof(**cr_col_buf) *
- (chroma_subblock_size_y + (2 >> chroma_subsamp_y)) *
- (2 >> chroma_subsamp_x));
-
- *luma_grain_block =
- (int *)aom_malloc(sizeof(**luma_grain_block) * luma_grain_samples);
- *cb_grain_block =
- (int *)aom_malloc(sizeof(**cb_grain_block) * chroma_grain_samples);
- *cr_grain_block =
- (int *)aom_malloc(sizeof(**cr_grain_block) * chroma_grain_samples);
-}
-
-static void dealloc_arrays(const aom_film_grain_t *params, int ***pred_pos_luma,
- int ***pred_pos_chroma, int **luma_grain_block,
- int **cb_grain_block, int **cr_grain_block,
- int **y_line_buf, int **cb_line_buf,
- int **cr_line_buf, int **y_col_buf, int **cb_col_buf,
- int **cr_col_buf) {
- int num_pos_luma = 2 * params->ar_coeff_lag * (params->ar_coeff_lag + 1);
- int num_pos_chroma = num_pos_luma;
- if (params->num_y_points > 0) ++num_pos_chroma;
-
- for (int row = 0; row < num_pos_luma; row++) {
- aom_free((*pred_pos_luma)[row]);
- }
- aom_free(*pred_pos_luma);
-
- for (int row = 0; row < num_pos_chroma; row++) {
- aom_free((*pred_pos_chroma)[row]);
- }
- aom_free((*pred_pos_chroma));
-
- aom_free(*y_line_buf);
-
- aom_free(*cb_line_buf);
-
- aom_free(*cr_line_buf);
-
- aom_free(*y_col_buf);
-
- aom_free(*cb_col_buf);
-
- aom_free(*cr_col_buf);
-
- aom_free(*luma_grain_block);
-
- aom_free(*cb_grain_block);
-
- aom_free(*cr_grain_block);
-}
-
-// get a number between 0 and 2^bits - 1
-static INLINE int get_random_number(int bits) {
- uint16_t bit;
- bit = ((random_register >> 0) ^ (random_register >> 1) ^
- (random_register >> 3) ^ (random_register >> 12)) &
- 1;
- random_register = (random_register >> 1) | (bit << 15);
- return (random_register >> (16 - bits)) & ((1 << bits) - 1);
-}
-
-static void init_random_generator(int luma_line, uint16_t seed) {
- // same for the picture
-
- uint16_t msb = (seed >> 8) & 255;
- uint16_t lsb = seed & 255;
-
- random_register = (msb << 8) + lsb;
-
- // changes for each row
- int luma_num = luma_line >> 5;
-
- random_register ^= ((luma_num * 37 + 178) & 255) << 8;
- random_register ^= ((luma_num * 173 + 105) & 255);
-}
-
-// Return 0 for success, -1 for failure
-static int generate_luma_grain_block(
- const aom_film_grain_t *params, int **pred_pos_luma, int *luma_grain_block,
- int luma_block_size_y, int luma_block_size_x, int luma_grain_stride,
- int left_pad, int top_pad, int right_pad, int bottom_pad) {
- if (params->num_y_points == 0) {
- memset(luma_grain_block, 0,
- sizeof(*luma_grain_block) * luma_block_size_y * luma_grain_stride);
- return 0;
- }
-
- int bit_depth = params->bit_depth;
- int gauss_sec_shift = 12 - bit_depth + params->grain_scale_shift;
-
- int num_pos_luma = 2 * params->ar_coeff_lag * (params->ar_coeff_lag + 1);
- int rounding_offset = (1 << (params->ar_coeff_shift - 1));
-
- for (int i = 0; i < luma_block_size_y; i++)
- for (int j = 0; j < luma_block_size_x; j++)
- luma_grain_block[i * luma_grain_stride + j] =
- (gaussian_sequence[get_random_number(gauss_bits)] +
- ((1 << gauss_sec_shift) >> 1)) >>
- gauss_sec_shift;
-
- for (int i = top_pad; i < luma_block_size_y - bottom_pad; i++)
- for (int j = left_pad; j < luma_block_size_x - right_pad; j++) {
- int wsum = 0;
- for (int pos = 0; pos < num_pos_luma; pos++) {
- wsum = wsum + params->ar_coeffs_y[pos] *
- luma_grain_block[(i + pred_pos_luma[pos][0]) *
- luma_grain_stride +
- j + pred_pos_luma[pos][1]];
- }
- luma_grain_block[i * luma_grain_stride + j] =
- clamp(luma_grain_block[i * luma_grain_stride + j] +
- ((wsum + rounding_offset) >> params->ar_coeff_shift),
- grain_min, grain_max);
- }
- return 0;
-}
-
-// Return 0 for success, -1 for failure
-static int generate_chroma_grain_blocks(
- const aom_film_grain_t *params,
- // int** pred_pos_luma,
- int **pred_pos_chroma, int *luma_grain_block, int *cb_grain_block,
- int *cr_grain_block, int luma_grain_stride, int chroma_block_size_y,
- int chroma_block_size_x, int chroma_grain_stride, int left_pad, int top_pad,
- int right_pad, int bottom_pad, int chroma_subsamp_y, int chroma_subsamp_x) {
- int bit_depth = params->bit_depth;
- int gauss_sec_shift = 12 - bit_depth + params->grain_scale_shift;
-
- int num_pos_chroma = 2 * params->ar_coeff_lag * (params->ar_coeff_lag + 1);
- if (params->num_y_points > 0) ++num_pos_chroma;
- int rounding_offset = (1 << (params->ar_coeff_shift - 1));
- int chroma_grain_block_size = chroma_block_size_y * chroma_grain_stride;
-
- if (params->num_cb_points || params->chroma_scaling_from_luma) {
- init_random_generator(7 << 5, params->random_seed);
-
- for (int i = 0; i < chroma_block_size_y; i++)
- for (int j = 0; j < chroma_block_size_x; j++)
- cb_grain_block[i * chroma_grain_stride + j] =
- (gaussian_sequence[get_random_number(gauss_bits)] +
- ((1 << gauss_sec_shift) >> 1)) >>
- gauss_sec_shift;
- } else {
- memset(cb_grain_block, 0,
- sizeof(*cb_grain_block) * chroma_grain_block_size);
- }
-
- if (params->num_cr_points || params->chroma_scaling_from_luma) {
- init_random_generator(11 << 5, params->random_seed);
-
- for (int i = 0; i < chroma_block_size_y; i++)
- for (int j = 0; j < chroma_block_size_x; j++)
- cr_grain_block[i * chroma_grain_stride + j] =
- (gaussian_sequence[get_random_number(gauss_bits)] +
- ((1 << gauss_sec_shift) >> 1)) >>
- gauss_sec_shift;
- } else {
- memset(cr_grain_block, 0,
- sizeof(*cr_grain_block) * chroma_grain_block_size);
- }
-
- for (int i = top_pad; i < chroma_block_size_y - bottom_pad; i++)
- for (int j = left_pad; j < chroma_block_size_x - right_pad; j++) {
- int wsum_cb = 0;
- int wsum_cr = 0;
- for (int pos = 0; pos < num_pos_chroma; pos++) {
- if (pred_pos_chroma[pos][2] == 0) {
- wsum_cb = wsum_cb + params->ar_coeffs_cb[pos] *
- cb_grain_block[(i + pred_pos_chroma[pos][0]) *
- chroma_grain_stride +
- j + pred_pos_chroma[pos][1]];
- wsum_cr = wsum_cr + params->ar_coeffs_cr[pos] *
- cr_grain_block[(i + pred_pos_chroma[pos][0]) *
- chroma_grain_stride +
- j + pred_pos_chroma[pos][1]];
- } else if (pred_pos_chroma[pos][2] == 1) {
- int av_luma = 0;
- int luma_coord_y = ((i - top_pad) << chroma_subsamp_y) + top_pad;
- int luma_coord_x = ((j - left_pad) << chroma_subsamp_x) + left_pad;
-
- for (int k = luma_coord_y; k < luma_coord_y + chroma_subsamp_y + 1;
- k++)
- for (int l = luma_coord_x; l < luma_coord_x + chroma_subsamp_x + 1;
- l++)
- av_luma += luma_grain_block[k * luma_grain_stride + l];
-
- av_luma =
- (av_luma + ((1 << (chroma_subsamp_y + chroma_subsamp_x)) >> 1)) >>
- (chroma_subsamp_y + chroma_subsamp_x);
-
- wsum_cb = wsum_cb + params->ar_coeffs_cb[pos] * av_luma;
- wsum_cr = wsum_cr + params->ar_coeffs_cr[pos] * av_luma;
- } else {
- fprintf(
- stderr,
- "Grain synthesis: prediction between two chroma components is "
- "not supported!");
- return -1;
- }
- }
- if (params->num_cb_points || params->chroma_scaling_from_luma)
- cb_grain_block[i * chroma_grain_stride + j] =
- clamp(cb_grain_block[i * chroma_grain_stride + j] +
- ((wsum_cb + rounding_offset) >> params->ar_coeff_shift),
- grain_min, grain_max);
- if (params->num_cr_points || params->chroma_scaling_from_luma)
- cr_grain_block[i * chroma_grain_stride + j] =
- clamp(cr_grain_block[i * chroma_grain_stride + j] +
- ((wsum_cr + rounding_offset) >> params->ar_coeff_shift),
- grain_min, grain_max);
- }
- return 0;
-}
-
-static void init_scaling_function(const int scaling_points[][2], int num_points,
- int scaling_lut[]) {
- if (num_points == 0) return;
-
- for (int i = 0; i < scaling_points[0][0]; i++)
- scaling_lut[i] = scaling_points[0][1];
-
- for (int point = 0; point < num_points - 1; point++) {
- int delta_y = scaling_points[point + 1][1] - scaling_points[point][1];
- int delta_x = scaling_points[point + 1][0] - scaling_points[point][0];
-
- int64_t delta = delta_y * ((65536 + (delta_x >> 1)) / delta_x);
-
- for (int x = 0; x < delta_x; x++) {
- scaling_lut[scaling_points[point][0] + x] =
- scaling_points[point][1] + (int)((x * delta + 32768) >> 16);
- }
- }
-
- for (int i = scaling_points[num_points - 1][0]; i < 256; i++)
- scaling_lut[i] = scaling_points[num_points - 1][1];
-}
-
-// function that extracts samples from a LUT (and interpolates intemediate
-// frames for 10- and 12-bit video)
-static int scale_LUT(int *scaling_lut, int index, int bit_depth) {
- int x = index >> (bit_depth - 8);
-
- if (!(bit_depth - 8) || x == 255)
- return scaling_lut[x];
- else
- return scaling_lut[x] + (((scaling_lut[x + 1] - scaling_lut[x]) *
- (index & ((1 << (bit_depth - 8)) - 1)) +
- (1 << (bit_depth - 9))) >>
- (bit_depth - 8));
-}
-
-static void add_noise_to_block(const aom_film_grain_t *params, uint8_t *luma,
- uint8_t *cb, uint8_t *cr, int luma_stride,
- int chroma_stride, int *luma_grain,
- int *cb_grain, int *cr_grain,
- int luma_grain_stride, int chroma_grain_stride,
- int half_luma_height, int half_luma_width,
- int bit_depth, int chroma_subsamp_y,
- int chroma_subsamp_x, int mc_identity) {
- int cb_mult = params->cb_mult - 128; // fixed scale
- int cb_luma_mult = params->cb_luma_mult - 128; // fixed scale
- int cb_offset = params->cb_offset - 256;
-
- int cr_mult = params->cr_mult - 128; // fixed scale
- int cr_luma_mult = params->cr_luma_mult - 128; // fixed scale
- int cr_offset = params->cr_offset - 256;
-
- int rounding_offset = (1 << (params->scaling_shift - 1));
-
- int apply_y = params->num_y_points > 0 ? 1 : 0;
- int apply_cb =
- (params->num_cb_points > 0 || params->chroma_scaling_from_luma) ? 1 : 0;
- int apply_cr =
- (params->num_cr_points > 0 || params->chroma_scaling_from_luma) ? 1 : 0;
-
- if (params->chroma_scaling_from_luma) {
- cb_mult = 0; // fixed scale
- cb_luma_mult = 64; // fixed scale
- cb_offset = 0;
-
- cr_mult = 0; // fixed scale
- cr_luma_mult = 64; // fixed scale
- cr_offset = 0;
- }
-
- int min_luma, max_luma, min_chroma, max_chroma;
-
- if (params->clip_to_restricted_range) {
- min_luma = min_luma_legal_range;
- max_luma = max_luma_legal_range;
-
- if (mc_identity) {
- min_chroma = min_luma_legal_range;
- max_chroma = max_luma_legal_range;
- } else {
- min_chroma = min_chroma_legal_range;
- max_chroma = max_chroma_legal_range;
- }
- } else {
- min_luma = min_chroma = 0;
- max_luma = max_chroma = 255;
- }
-
- for (int i = 0; i < (half_luma_height << (1 - chroma_subsamp_y)); i++) {
- for (int j = 0; j < (half_luma_width << (1 - chroma_subsamp_x)); j++) {
- int average_luma = 0;
- if (chroma_subsamp_x) {
- average_luma = (luma[(i << chroma_subsamp_y) * luma_stride +
- (j << chroma_subsamp_x)] +
- luma[(i << chroma_subsamp_y) * luma_stride +
- (j << chroma_subsamp_x) + 1] +
- 1) >>
- 1;
- } else {
- average_luma = luma[(i << chroma_subsamp_y) * luma_stride + j];
- }
-
- if (apply_cb) {
- cb[i * chroma_stride + j] = clamp(
- cb[i * chroma_stride + j] +
- ((scale_LUT(scaling_lut_cb,
- clamp(((average_luma * cb_luma_mult +
- cb_mult * cb[i * chroma_stride + j]) >>
- 6) +
- cb_offset,
- 0, (256 << (bit_depth - 8)) - 1),
- 8) *
- cb_grain[i * chroma_grain_stride + j] +
- rounding_offset) >>
- params->scaling_shift),
- min_chroma, max_chroma);
- }
-
- if (apply_cr) {
- cr[i * chroma_stride + j] = clamp(
- cr[i * chroma_stride + j] +
- ((scale_LUT(scaling_lut_cr,
- clamp(((average_luma * cr_luma_mult +
- cr_mult * cr[i * chroma_stride + j]) >>
- 6) +
- cr_offset,
- 0, (256 << (bit_depth - 8)) - 1),
- 8) *
- cr_grain[i * chroma_grain_stride + j] +
- rounding_offset) >>
- params->scaling_shift),
- min_chroma, max_chroma);
- }
- }
- }
-
- if (apply_y) {
- for (int i = 0; i < (half_luma_height << 1); i++) {
- for (int j = 0; j < (half_luma_width << 1); j++) {
- luma[i * luma_stride + j] =
- clamp(luma[i * luma_stride + j] +
- ((scale_LUT(scaling_lut_y, luma[i * luma_stride + j], 8) *
- luma_grain[i * luma_grain_stride + j] +
- rounding_offset) >>
- params->scaling_shift),
- min_luma, max_luma);
- }
- }
- }
-}
-
-static void add_noise_to_block_hbd(
- const aom_film_grain_t *params, uint16_t *luma, uint16_t *cb, uint16_t *cr,
- int luma_stride, int chroma_stride, int *luma_grain, int *cb_grain,
- int *cr_grain, int luma_grain_stride, int chroma_grain_stride,
- int half_luma_height, int half_luma_width, int bit_depth,
- int chroma_subsamp_y, int chroma_subsamp_x, int mc_identity) {
- int cb_mult = params->cb_mult - 128; // fixed scale
- int cb_luma_mult = params->cb_luma_mult - 128; // fixed scale
- // offset value depends on the bit depth
- int cb_offset = (params->cb_offset << (bit_depth - 8)) - (1 << bit_depth);
-
- int cr_mult = params->cr_mult - 128; // fixed scale
- int cr_luma_mult = params->cr_luma_mult - 128; // fixed scale
- // offset value depends on the bit depth
- int cr_offset = (params->cr_offset << (bit_depth - 8)) - (1 << bit_depth);
-
- int rounding_offset = (1 << (params->scaling_shift - 1));
-
- int apply_y = params->num_y_points > 0 ? 1 : 0;
- int apply_cb =
- (params->num_cb_points > 0 || params->chroma_scaling_from_luma) > 0 ? 1
- : 0;
- int apply_cr =
- (params->num_cr_points > 0 || params->chroma_scaling_from_luma) > 0 ? 1
- : 0;
-
- if (params->chroma_scaling_from_luma) {
- cb_mult = 0; // fixed scale
- cb_luma_mult = 64; // fixed scale
- cb_offset = 0;
-
- cr_mult = 0; // fixed scale
- cr_luma_mult = 64; // fixed scale
- cr_offset = 0;
- }
-
- int min_luma, max_luma, min_chroma, max_chroma;
-
- if (params->clip_to_restricted_range) {
- min_luma = min_luma_legal_range << (bit_depth - 8);
- max_luma = max_luma_legal_range << (bit_depth - 8);
-
- if (mc_identity) {
- min_chroma = min_luma_legal_range << (bit_depth - 8);
- max_chroma = max_luma_legal_range << (bit_depth - 8);
- } else {
- min_chroma = min_chroma_legal_range << (bit_depth - 8);
- max_chroma = max_chroma_legal_range << (bit_depth - 8);
- }
- } else {
- min_luma = min_chroma = 0;
- max_luma = max_chroma = (256 << (bit_depth - 8)) - 1;
- }
-
- for (int i = 0; i < (half_luma_height << (1 - chroma_subsamp_y)); i++) {
- for (int j = 0; j < (half_luma_width << (1 - chroma_subsamp_x)); j++) {
- int average_luma = 0;
- if (chroma_subsamp_x) {
- average_luma = (luma[(i << chroma_subsamp_y) * luma_stride +
- (j << chroma_subsamp_x)] +
- luma[(i << chroma_subsamp_y) * luma_stride +
- (j << chroma_subsamp_x) + 1] +
- 1) >>
- 1;
- } else {
- average_luma = luma[(i << chroma_subsamp_y) * luma_stride + j];
- }
-
- if (apply_cb) {
- cb[i * chroma_stride + j] = clamp(
- cb[i * chroma_stride + j] +
- ((scale_LUT(scaling_lut_cb,
- clamp(((average_luma * cb_luma_mult +
- cb_mult * cb[i * chroma_stride + j]) >>
- 6) +
- cb_offset,
- 0, (256 << (bit_depth - 8)) - 1),
- bit_depth) *
- cb_grain[i * chroma_grain_stride + j] +
- rounding_offset) >>
- params->scaling_shift),
- min_chroma, max_chroma);
- }
- if (apply_cr) {
- cr[i * chroma_stride + j] = clamp(
- cr[i * chroma_stride + j] +
- ((scale_LUT(scaling_lut_cr,
- clamp(((average_luma * cr_luma_mult +
- cr_mult * cr[i * chroma_stride + j]) >>
- 6) +
- cr_offset,
- 0, (256 << (bit_depth - 8)) - 1),
- bit_depth) *
- cr_grain[i * chroma_grain_stride + j] +
- rounding_offset) >>
- params->scaling_shift),
- min_chroma, max_chroma);
- }
- }
- }
-
- if (apply_y) {
- for (int i = 0; i < (half_luma_height << 1); i++) {
- for (int j = 0; j < (half_luma_width << 1); j++) {
- luma[i * luma_stride + j] =
- clamp(luma[i * luma_stride + j] +
- ((scale_LUT(scaling_lut_y, luma[i * luma_stride + j],
- bit_depth) *
- luma_grain[i * luma_grain_stride + j] +
- rounding_offset) >>
- params->scaling_shift),
- min_luma, max_luma);
- }
- }
- }
-}
-
-static void copy_rect(uint8_t *src, int src_stride, uint8_t *dst,
- int dst_stride, int width, int height,
- int use_high_bit_depth) {
- int hbd_coeff = use_high_bit_depth ? 2 : 1;
- while (height) {
- memcpy(dst, src, width * sizeof(uint8_t) * hbd_coeff);
- src += src_stride;
- dst += dst_stride;
- --height;
- }
- return;
-}
-
-static void copy_area(int *src, int src_stride, int *dst, int dst_stride,
- int width, int height) {
- while (height) {
- memcpy(dst, src, width * sizeof(*src));
- src += src_stride;
- dst += dst_stride;
- --height;
- }
- return;
-}
-
-static void extend_even(uint8_t *dst, int dst_stride, int width, int height,
- int use_high_bit_depth) {
- if ((width & 1) == 0 && (height & 1) == 0) return;
- if (use_high_bit_depth) {
- uint16_t *dst16 = (uint16_t *)dst;
- int dst16_stride = dst_stride / 2;
- if (width & 1) {
- for (int i = 0; i < height; ++i)
- dst16[i * dst16_stride + width] = dst16[i * dst16_stride + width - 1];
- }
- width = (width + 1) & (~1);
- if (height & 1) {
- memcpy(&dst16[height * dst16_stride], &dst16[(height - 1) * dst16_stride],
- sizeof(*dst16) * width);
- }
- } else {
- if (width & 1) {
- for (int i = 0; i < height; ++i)
- dst[i * dst_stride + width] = dst[i * dst_stride + width - 1];
- }
- width = (width + 1) & (~1);
- if (height & 1) {
- memcpy(&dst[height * dst_stride], &dst[(height - 1) * dst_stride],
- sizeof(*dst) * width);
- }
- }
-}
-
-static void ver_boundary_overlap(int *left_block, int left_stride,
- int *right_block, int right_stride,
- int *dst_block, int dst_stride, int width,
- int height) {
- if (width == 1) {
- while (height) {
- *dst_block = clamp((*left_block * 23 + *right_block * 22 + 16) >> 5,
- grain_min, grain_max);
- left_block += left_stride;
- right_block += right_stride;
- dst_block += dst_stride;
- --height;
- }
- return;
- } else if (width == 2) {
- while (height) {
- dst_block[0] = clamp((27 * left_block[0] + 17 * right_block[0] + 16) >> 5,
- grain_min, grain_max);
- dst_block[1] = clamp((17 * left_block[1] + 27 * right_block[1] + 16) >> 5,
- grain_min, grain_max);
- left_block += left_stride;
- right_block += right_stride;
- dst_block += dst_stride;
- --height;
- }
- return;
- }
-}
-
-static void hor_boundary_overlap(int *top_block, int top_stride,
- int *bottom_block, int bottom_stride,
- int *dst_block, int dst_stride, int width,
- int height) {
- if (height == 1) {
- while (width) {
- *dst_block = clamp((*top_block * 23 + *bottom_block * 22 + 16) >> 5,
- grain_min, grain_max);
- ++top_block;
- ++bottom_block;
- ++dst_block;
- --width;
- }
- return;
- } else if (height == 2) {
- while (width) {
- dst_block[0] = clamp((27 * top_block[0] + 17 * bottom_block[0] + 16) >> 5,
- grain_min, grain_max);
- dst_block[dst_stride] = clamp((17 * top_block[top_stride] +
- 27 * bottom_block[bottom_stride] + 16) >>
- 5,
- grain_min, grain_max);
- ++top_block;
- ++bottom_block;
- ++dst_block;
- --width;
- }
- return;
- }
-}
-
-int av1_add_film_grain(const aom_film_grain_t *params, const aom_image_t *src,
- aom_image_t *dst) {
- uint8_t *luma, *cb, *cr;
- int height, width, luma_stride, chroma_stride;
- int use_high_bit_depth = 0;
- int chroma_subsamp_x = 0;
- int chroma_subsamp_y = 0;
- int mc_identity = src->mc == AOM_CICP_MC_IDENTITY ? 1 : 0;
-
- switch (src->fmt) {
- case AOM_IMG_FMT_AOMI420:
- case AOM_IMG_FMT_I420:
- use_high_bit_depth = 0;
- chroma_subsamp_x = 1;
- chroma_subsamp_y = 1;
- break;
- case AOM_IMG_FMT_I42016:
- use_high_bit_depth = 1;
- chroma_subsamp_x = 1;
- chroma_subsamp_y = 1;
- break;
- // case AOM_IMG_FMT_444A:
- case AOM_IMG_FMT_I444:
- use_high_bit_depth = 0;
- chroma_subsamp_x = 0;
- chroma_subsamp_y = 0;
- break;
- case AOM_IMG_FMT_I44416:
- use_high_bit_depth = 1;
- chroma_subsamp_x = 0;
- chroma_subsamp_y = 0;
- break;
- case AOM_IMG_FMT_I422:
- use_high_bit_depth = 0;
- chroma_subsamp_x = 1;
- chroma_subsamp_y = 0;
- break;
- case AOM_IMG_FMT_I42216:
- use_high_bit_depth = 1;
- chroma_subsamp_x = 1;
- chroma_subsamp_y = 0;
- break;
- default: // unknown input format
- fprintf(stderr, "Film grain error: input format is not supported!");
- return -1;
- }
-
- assert(params->bit_depth == src->bit_depth);
-
- dst->fmt = src->fmt;
- dst->bit_depth = src->bit_depth;
-
- dst->r_w = src->r_w;
- dst->r_h = src->r_h;
- dst->d_w = src->d_w;
- dst->d_h = src->d_h;
-
- dst->cp = src->cp;
- dst->tc = src->tc;
- dst->mc = src->mc;
-
- dst->monochrome = src->monochrome;
- dst->csp = src->csp;
- dst->range = src->range;
-
- dst->x_chroma_shift = src->x_chroma_shift;
- dst->y_chroma_shift = src->y_chroma_shift;
-
- dst->temporal_id = src->temporal_id;
- dst->spatial_id = src->spatial_id;
-
- width = src->d_w % 2 ? src->d_w + 1 : src->d_w;
- height = src->d_h % 2 ? src->d_h + 1 : src->d_h;
-
- copy_rect(src->planes[AOM_PLANE_Y], src->stride[AOM_PLANE_Y],
- dst->planes[AOM_PLANE_Y], dst->stride[AOM_PLANE_Y], src->d_w,
- src->d_h, use_high_bit_depth);
- // Note that dst is already assumed to be aligned to even.
- extend_even(dst->planes[AOM_PLANE_Y], dst->stride[AOM_PLANE_Y], src->d_w,
- src->d_h, use_high_bit_depth);
-
- if (!src->monochrome) {
- copy_rect(src->planes[AOM_PLANE_U], src->stride[AOM_PLANE_U],
- dst->planes[AOM_PLANE_U], dst->stride[AOM_PLANE_U],
- width >> chroma_subsamp_x, height >> chroma_subsamp_y,
- use_high_bit_depth);
-
- copy_rect(src->planes[AOM_PLANE_V], src->stride[AOM_PLANE_V],
- dst->planes[AOM_PLANE_V], dst->stride[AOM_PLANE_V],
- width >> chroma_subsamp_x, height >> chroma_subsamp_y,
- use_high_bit_depth);
- }
-
- luma = dst->planes[AOM_PLANE_Y];
- cb = dst->planes[AOM_PLANE_U];
- cr = dst->planes[AOM_PLANE_V];
-
- // luma and chroma strides in samples
- luma_stride = dst->stride[AOM_PLANE_Y] >> use_high_bit_depth;
- chroma_stride = dst->stride[AOM_PLANE_U] >> use_high_bit_depth;
-
- return av1_add_film_grain_run(
- params, luma, cb, cr, height, width, luma_stride, chroma_stride,
- use_high_bit_depth, chroma_subsamp_y, chroma_subsamp_x, mc_identity);
-}
-
-int av1_add_film_grain_run(const aom_film_grain_t *params, uint8_t *luma,
- uint8_t *cb, uint8_t *cr, int height, int width,
- int luma_stride, int chroma_stride,
- int use_high_bit_depth, int chroma_subsamp_y,
- int chroma_subsamp_x, int mc_identity) {
- int **pred_pos_luma;
- int **pred_pos_chroma;
- int *luma_grain_block;
- int *cb_grain_block;
- int *cr_grain_block;
-
- int *y_line_buf;
- int *cb_line_buf;
- int *cr_line_buf;
-
- int *y_col_buf;
- int *cb_col_buf;
- int *cr_col_buf;
-
- random_register = params->random_seed;
-
- int left_pad = 3;
- int right_pad = 3; // padding to offset for AR coefficients
- int top_pad = 3;
- int bottom_pad = 0;
-
- int ar_padding = 3; // maximum lag used for stabilization of AR coefficients
-
- luma_subblock_size_y = 32;
- luma_subblock_size_x = 32;
-
- chroma_subblock_size_y = luma_subblock_size_y >> chroma_subsamp_y;
- chroma_subblock_size_x = luma_subblock_size_x >> chroma_subsamp_x;
-
- // Initial padding is only needed for generation of
- // film grain templates (to stabilize the AR process)
- // Only a 64x64 luma and 32x32 chroma part of a template
- // is used later for adding grain, padding can be discarded
-
- int luma_block_size_y =
- top_pad + 2 * ar_padding + luma_subblock_size_y * 2 + bottom_pad;
- int luma_block_size_x = left_pad + 2 * ar_padding + luma_subblock_size_x * 2 +
- 2 * ar_padding + right_pad;
-
- int chroma_block_size_y = top_pad + (2 >> chroma_subsamp_y) * ar_padding +
- chroma_subblock_size_y * 2 + bottom_pad;
- int chroma_block_size_x = left_pad + (2 >> chroma_subsamp_x) * ar_padding +
- chroma_subblock_size_x * 2 +
- (2 >> chroma_subsamp_x) * ar_padding + right_pad;
-
- int luma_grain_stride = luma_block_size_x;
- int chroma_grain_stride = chroma_block_size_x;
-
- int overlap = params->overlap_flag;
- int bit_depth = params->bit_depth;
-
- grain_center = 128 << (bit_depth - 8);
- grain_min = 0 - grain_center;
- grain_max = (256 << (bit_depth - 8)) - 1 - grain_center;
-
- init_arrays(params, luma_stride, chroma_stride, &pred_pos_luma,
- &pred_pos_chroma, &luma_grain_block, &cb_grain_block,
- &cr_grain_block, &y_line_buf, &cb_line_buf, &cr_line_buf,
- &y_col_buf, &cb_col_buf, &cr_col_buf,
- luma_block_size_y * luma_block_size_x,
- chroma_block_size_y * chroma_block_size_x, chroma_subsamp_y,
- chroma_subsamp_x);
-
- if (generate_luma_grain_block(params, pred_pos_luma, luma_grain_block,
- luma_block_size_y, luma_block_size_x,
- luma_grain_stride, left_pad, top_pad, right_pad,
- bottom_pad))
- return -1;
-
- if (generate_chroma_grain_blocks(
- params,
- // pred_pos_luma,
- pred_pos_chroma, luma_grain_block, cb_grain_block, cr_grain_block,
- luma_grain_stride, chroma_block_size_y, chroma_block_size_x,
- chroma_grain_stride, left_pad, top_pad, right_pad, bottom_pad,
- chroma_subsamp_y, chroma_subsamp_x))
- return -1;
-
- init_scaling_function(params->scaling_points_y, params->num_y_points,
- scaling_lut_y);
-
- if (params->chroma_scaling_from_luma) {
- memcpy(scaling_lut_cb, scaling_lut_y, sizeof(*scaling_lut_y) * 256);
- memcpy(scaling_lut_cr, scaling_lut_y, sizeof(*scaling_lut_y) * 256);
- } else {
- init_scaling_function(params->scaling_points_cb, params->num_cb_points,
- scaling_lut_cb);
- init_scaling_function(params->scaling_points_cr, params->num_cr_points,
- scaling_lut_cr);
- }
- for (int y = 0; y < height / 2; y += (luma_subblock_size_y >> 1)) {
- init_random_generator(y * 2, params->random_seed);
-
- for (int x = 0; x < width / 2; x += (luma_subblock_size_x >> 1)) {
- int offset_y = get_random_number(8);
- int offset_x = (offset_y >> 4) & 15;
- offset_y &= 15;
-
- int luma_offset_y = left_pad + 2 * ar_padding + (offset_y << 1);
- int luma_offset_x = top_pad + 2 * ar_padding + (offset_x << 1);
-
- int chroma_offset_y = top_pad + (2 >> chroma_subsamp_y) * ar_padding +
- offset_y * (2 >> chroma_subsamp_y);
- int chroma_offset_x = left_pad + (2 >> chroma_subsamp_x) * ar_padding +
- offset_x * (2 >> chroma_subsamp_x);
-
- if (overlap && x) {
- ver_boundary_overlap(
- y_col_buf, 2,
- luma_grain_block + luma_offset_y * luma_grain_stride +
- luma_offset_x,
- luma_grain_stride, y_col_buf, 2, 2,
- AOMMIN(luma_subblock_size_y + 2, height - (y << 1)));
-
- ver_boundary_overlap(
- cb_col_buf, 2 >> chroma_subsamp_x,
- cb_grain_block + chroma_offset_y * chroma_grain_stride +
- chroma_offset_x,
- chroma_grain_stride, cb_col_buf, 2 >> chroma_subsamp_x,
- 2 >> chroma_subsamp_x,
- AOMMIN(chroma_subblock_size_y + (2 >> chroma_subsamp_y),
- (height - (y << 1)) >> chroma_subsamp_y));
-
- ver_boundary_overlap(
- cr_col_buf, 2 >> chroma_subsamp_x,
- cr_grain_block + chroma_offset_y * chroma_grain_stride +
- chroma_offset_x,
- chroma_grain_stride, cr_col_buf, 2 >> chroma_subsamp_x,
- 2 >> chroma_subsamp_x,
- AOMMIN(chroma_subblock_size_y + (2 >> chroma_subsamp_y),
- (height - (y << 1)) >> chroma_subsamp_y));
-
- int i = y ? 1 : 0;
-
- if (use_high_bit_depth) {
- add_noise_to_block_hbd(
- params,
- (uint16_t *)luma + ((y + i) << 1) * luma_stride + (x << 1),
- (uint16_t *)cb +
- ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
- (x << (1 - chroma_subsamp_x)),
- (uint16_t *)cr +
- ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
- (x << (1 - chroma_subsamp_x)),
- luma_stride, chroma_stride, y_col_buf + i * 4,
- cb_col_buf + i * (2 - chroma_subsamp_y) * (2 - chroma_subsamp_x),
- cr_col_buf + i * (2 - chroma_subsamp_y) * (2 - chroma_subsamp_x),
- 2, (2 - chroma_subsamp_x),
- AOMMIN(luma_subblock_size_y >> 1, height / 2 - y) - i, 1,
- bit_depth, chroma_subsamp_y, chroma_subsamp_x, mc_identity);
- } else {
- add_noise_to_block(
- params, luma + ((y + i) << 1) * luma_stride + (x << 1),
- cb + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
- (x << (1 - chroma_subsamp_x)),
- cr + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
- (x << (1 - chroma_subsamp_x)),
- luma_stride, chroma_stride, y_col_buf + i * 4,
- cb_col_buf + i * (2 - chroma_subsamp_y) * (2 - chroma_subsamp_x),
- cr_col_buf + i * (2 - chroma_subsamp_y) * (2 - chroma_subsamp_x),
- 2, (2 - chroma_subsamp_x),
- AOMMIN(luma_subblock_size_y >> 1, height / 2 - y) - i, 1,
- bit_depth, chroma_subsamp_y, chroma_subsamp_x, mc_identity);
- }
- }
-
- if (overlap && y) {
- if (x) {
- hor_boundary_overlap(y_line_buf + (x << 1), luma_stride, y_col_buf, 2,
- y_line_buf + (x << 1), luma_stride, 2, 2);
-
- hor_boundary_overlap(cb_line_buf + x * (2 >> chroma_subsamp_x),
- chroma_stride, cb_col_buf, 2 >> chroma_subsamp_x,
- cb_line_buf + x * (2 >> chroma_subsamp_x),
- chroma_stride, 2 >> chroma_subsamp_x,
- 2 >> chroma_subsamp_y);
-
- hor_boundary_overlap(cr_line_buf + x * (2 >> chroma_subsamp_x),
- chroma_stride, cr_col_buf, 2 >> chroma_subsamp_x,
- cr_line_buf + x * (2 >> chroma_subsamp_x),
- chroma_stride, 2 >> chroma_subsamp_x,
- 2 >> chroma_subsamp_y);
- }
-
- hor_boundary_overlap(
- y_line_buf + ((x ? x + 1 : 0) << 1), luma_stride,
- luma_grain_block + luma_offset_y * luma_grain_stride +
- luma_offset_x + (x ? 2 : 0),
- luma_grain_stride, y_line_buf + ((x ? x + 1 : 0) << 1), luma_stride,
- AOMMIN(luma_subblock_size_x - ((x ? 1 : 0) << 1),
- width - ((x ? x + 1 : 0) << 1)),
- 2);
-
- hor_boundary_overlap(
- cb_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
- chroma_stride,
- cb_grain_block + chroma_offset_y * chroma_grain_stride +
- chroma_offset_x + ((x ? 1 : 0) << (1 - chroma_subsamp_x)),
- chroma_grain_stride,
- cb_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
- chroma_stride,
- AOMMIN(chroma_subblock_size_x -
- ((x ? 1 : 0) << (1 - chroma_subsamp_x)),
- (width - ((x ? x + 1 : 0) << 1)) >> chroma_subsamp_x),
- 2 >> chroma_subsamp_y);
-
- hor_boundary_overlap(
- cr_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
- chroma_stride,
- cr_grain_block + chroma_offset_y * chroma_grain_stride +
- chroma_offset_x + ((x ? 1 : 0) << (1 - chroma_subsamp_x)),
- chroma_grain_stride,
- cr_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
- chroma_stride,
- AOMMIN(chroma_subblock_size_x -
- ((x ? 1 : 0) << (1 - chroma_subsamp_x)),
- (width - ((x ? x + 1 : 0) << 1)) >> chroma_subsamp_x),
- 2 >> chroma_subsamp_y);
-
- if (use_high_bit_depth) {
- add_noise_to_block_hbd(
- params, (uint16_t *)luma + (y << 1) * luma_stride + (x << 1),
- (uint16_t *)cb + (y << (1 - chroma_subsamp_y)) * chroma_stride +
- (x << ((1 - chroma_subsamp_x))),
- (uint16_t *)cr + (y << (1 - chroma_subsamp_y)) * chroma_stride +
- (x << ((1 - chroma_subsamp_x))),
- luma_stride, chroma_stride, y_line_buf + (x << 1),
- cb_line_buf + (x << (1 - chroma_subsamp_x)),
- cr_line_buf + (x << (1 - chroma_subsamp_x)), luma_stride,
- chroma_stride, 1,
- AOMMIN(luma_subblock_size_x >> 1, width / 2 - x), bit_depth,
- chroma_subsamp_y, chroma_subsamp_x, mc_identity);
- } else {
- add_noise_to_block(
- params, luma + (y << 1) * luma_stride + (x << 1),
- cb + (y << (1 - chroma_subsamp_y)) * chroma_stride +
- (x << ((1 - chroma_subsamp_x))),
- cr + (y << (1 - chroma_subsamp_y)) * chroma_stride +
- (x << ((1 - chroma_subsamp_x))),
- luma_stride, chroma_stride, y_line_buf + (x << 1),
- cb_line_buf + (x << (1 - chroma_subsamp_x)),
- cr_line_buf + (x << (1 - chroma_subsamp_x)), luma_stride,
- chroma_stride, 1,
- AOMMIN(luma_subblock_size_x >> 1, width / 2 - x), bit_depth,
- chroma_subsamp_y, chroma_subsamp_x, mc_identity);
- }
- }
-
- int i = overlap && y ? 1 : 0;
- int j = overlap && x ? 1 : 0;
-
- if (use_high_bit_depth) {
- add_noise_to_block_hbd(
- params,
- (uint16_t *)luma + ((y + i) << 1) * luma_stride + ((x + j) << 1),
- (uint16_t *)cb +
- ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
- ((x + j) << (1 - chroma_subsamp_x)),
- (uint16_t *)cr +
- ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
- ((x + j) << (1 - chroma_subsamp_x)),
- luma_stride, chroma_stride,
- luma_grain_block + (luma_offset_y + (i << 1)) * luma_grain_stride +
- luma_offset_x + (j << 1),
- cb_grain_block +
- (chroma_offset_y + (i << (1 - chroma_subsamp_y))) *
- chroma_grain_stride +
- chroma_offset_x + (j << (1 - chroma_subsamp_x)),
- cr_grain_block +
- (chroma_offset_y + (i << (1 - chroma_subsamp_y))) *
- chroma_grain_stride +
- chroma_offset_x + (j << (1 - chroma_subsamp_x)),
- luma_grain_stride, chroma_grain_stride,
- AOMMIN(luma_subblock_size_y >> 1, height / 2 - y) - i,
- AOMMIN(luma_subblock_size_x >> 1, width / 2 - x) - j, bit_depth,
- chroma_subsamp_y, chroma_subsamp_x, mc_identity);
- } else {
- add_noise_to_block(
- params, luma + ((y + i) << 1) * luma_stride + ((x + j) << 1),
- cb + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
- ((x + j) << (1 - chroma_subsamp_x)),
- cr + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
- ((x + j) << (1 - chroma_subsamp_x)),
- luma_stride, chroma_stride,
- luma_grain_block + (luma_offset_y + (i << 1)) * luma_grain_stride +
- luma_offset_x + (j << 1),
- cb_grain_block +
- (chroma_offset_y + (i << (1 - chroma_subsamp_y))) *
- chroma_grain_stride +
- chroma_offset_x + (j << (1 - chroma_subsamp_x)),
- cr_grain_block +
- (chroma_offset_y + (i << (1 - chroma_subsamp_y))) *
- chroma_grain_stride +
- chroma_offset_x + (j << (1 - chroma_subsamp_x)),
- luma_grain_stride, chroma_grain_stride,
- AOMMIN(luma_subblock_size_y >> 1, height / 2 - y) - i,
- AOMMIN(luma_subblock_size_x >> 1, width / 2 - x) - j, bit_depth,
- chroma_subsamp_y, chroma_subsamp_x, mc_identity);
- }
-
- if (overlap) {
- if (x) {
- // Copy overlapped column bufer to line buffer
- copy_area(y_col_buf + (luma_subblock_size_y << 1), 2,
- y_line_buf + (x << 1), luma_stride, 2, 2);
-
- copy_area(
- cb_col_buf + (chroma_subblock_size_y << (1 - chroma_subsamp_x)),
- 2 >> chroma_subsamp_x,
- cb_line_buf + (x << (1 - chroma_subsamp_x)), chroma_stride,
- 2 >> chroma_subsamp_x, 2 >> chroma_subsamp_y);
-
- copy_area(
- cr_col_buf + (chroma_subblock_size_y << (1 - chroma_subsamp_x)),
- 2 >> chroma_subsamp_x,
- cr_line_buf + (x << (1 - chroma_subsamp_x)), chroma_stride,
- 2 >> chroma_subsamp_x, 2 >> chroma_subsamp_y);
- }
-
- // Copy grain to the line buffer for overlap with a bottom block
- copy_area(
- luma_grain_block +
- (luma_offset_y + luma_subblock_size_y) * luma_grain_stride +
- luma_offset_x + ((x ? 2 : 0)),
- luma_grain_stride, y_line_buf + ((x ? x + 1 : 0) << 1), luma_stride,
- AOMMIN(luma_subblock_size_x, width - (x << 1)) - (x ? 2 : 0), 2);
-
- copy_area(cb_grain_block +
- (chroma_offset_y + chroma_subblock_size_y) *
- chroma_grain_stride +
- chroma_offset_x + (x ? 2 >> chroma_subsamp_x : 0),
- chroma_grain_stride,
- cb_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
- chroma_stride,
- AOMMIN(chroma_subblock_size_x,
- ((width - (x << 1)) >> chroma_subsamp_x)) -
- (x ? 2 >> chroma_subsamp_x : 0),
- 2 >> chroma_subsamp_y);
-
- copy_area(cr_grain_block +
- (chroma_offset_y + chroma_subblock_size_y) *
- chroma_grain_stride +
- chroma_offset_x + (x ? 2 >> chroma_subsamp_x : 0),
- chroma_grain_stride,
- cr_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
- chroma_stride,
- AOMMIN(chroma_subblock_size_x,
- ((width - (x << 1)) >> chroma_subsamp_x)) -
- (x ? 2 >> chroma_subsamp_x : 0),
- 2 >> chroma_subsamp_y);
-
- // Copy grain to the column buffer for overlap with the next block to
- // the right
-
- copy_area(luma_grain_block + luma_offset_y * luma_grain_stride +
- luma_offset_x + luma_subblock_size_x,
- luma_grain_stride, y_col_buf, 2, 2,
- AOMMIN(luma_subblock_size_y + 2, height - (y << 1)));
-
- copy_area(cb_grain_block + chroma_offset_y * chroma_grain_stride +
- chroma_offset_x + chroma_subblock_size_x,
- chroma_grain_stride, cb_col_buf, 2 >> chroma_subsamp_x,
- 2 >> chroma_subsamp_x,
- AOMMIN(chroma_subblock_size_y + (2 >> chroma_subsamp_y),
- (height - (y << 1)) >> chroma_subsamp_y));
-
- copy_area(cr_grain_block + chroma_offset_y * chroma_grain_stride +
- chroma_offset_x + chroma_subblock_size_x,
- chroma_grain_stride, cr_col_buf, 2 >> chroma_subsamp_x,
- 2 >> chroma_subsamp_x,
- AOMMIN(chroma_subblock_size_y + (2 >> chroma_subsamp_y),
- (height - (y << 1)) >> chroma_subsamp_y));
- }
- }
- }
-
- dealloc_arrays(params, &pred_pos_luma, &pred_pos_chroma, &luma_grain_block,
- &cb_grain_block, &cr_grain_block, &y_line_buf, &cb_line_buf,
- &cr_line_buf, &y_col_buf, &cb_col_buf, &cr_col_buf);
- return 0;
-}
diff --git a/third_party/aom/aom_dsp/grain_synthesis.h b/third_party/aom/aom_dsp/grain_synthesis.h
deleted file mode 100644
index 7aee6f6f4..000000000
--- a/third_party/aom/aom_dsp/grain_synthesis.h
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-/*!\file
- * \brief Describes film grain parameters and film grain synthesis
- *
- */
-#ifndef AOM_AOM_DSP_GRAIN_SYNTHESIS_H_
-#define AOM_AOM_DSP_GRAIN_SYNTHESIS_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom/aom_image.h"
-
-/*!\brief Structure containing film grain synthesis parameters for a frame
- *
- * This structure contains input parameters for film grain synthesis
- */
-typedef struct {
- int apply_grain;
-
- int update_parameters;
-
- // 8 bit values
- int scaling_points_y[14][2];
- int num_y_points; // value: 0..14
-
- // 8 bit values
- int scaling_points_cb[10][2];
- int num_cb_points; // value: 0..10
-
- // 8 bit values
- int scaling_points_cr[10][2];
- int num_cr_points; // value: 0..10
-
- int scaling_shift; // values : 8..11
-
- int ar_coeff_lag; // values: 0..3
-
- // 8 bit values
- int ar_coeffs_y[24];
- int ar_coeffs_cb[25];
- int ar_coeffs_cr[25];
-
- // Shift value: AR coeffs range
- // 6: [-2, 2)
- // 7: [-1, 1)
- // 8: [-0.5, 0.5)
- // 9: [-0.25, 0.25)
- int ar_coeff_shift; // values : 6..9
-
- int cb_mult; // 8 bits
- int cb_luma_mult; // 8 bits
- int cb_offset; // 9 bits
-
- int cr_mult; // 8 bits
- int cr_luma_mult; // 8 bits
- int cr_offset; // 9 bits
-
- int overlap_flag;
-
- int clip_to_restricted_range;
-
- unsigned int bit_depth; // video bit depth
-
- int chroma_scaling_from_luma;
-
- int grain_scale_shift;
-
- uint16_t random_seed;
-} aom_film_grain_t;
-
-/*!\brief Add film grain
- *
- * Add film grain to an image
- *
- * Returns 0 for success, -1 for failure
- *
- * \param[in] grain_params Grain parameters
- * \param[in] luma luma plane
- * \param[in] cb cb plane
- * \param[in] cr cr plane
- * \param[in] height luma plane height
- * \param[in] width luma plane width
- * \param[in] luma_stride luma plane stride
- * \param[in] chroma_stride chroma plane stride
- */
-int av1_add_film_grain_run(const aom_film_grain_t *grain_params, uint8_t *luma,
- uint8_t *cb, uint8_t *cr, int height, int width,
- int luma_stride, int chroma_stride,
- int use_high_bit_depth, int chroma_subsamp_y,
- int chroma_subsamp_x, int mc_identity);
-
-/*!\brief Add film grain
- *
- * Add film grain to an image
- *
- * Returns 0 for success, -1 for failure
- *
- * \param[in] grain_params Grain parameters
- * \param[in] src Source image
- * \param[out] dst Resulting image with grain
- */
-int av1_add_film_grain(const aom_film_grain_t *grain_params,
- const aom_image_t *src, aom_image_t *dst);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AOM_DSP_GRAIN_SYNTHESIS_H_
diff --git a/third_party/aom/aom_dsp/grain_table.c b/third_party/aom/aom_dsp/grain_table.c
deleted file mode 100644
index 0d6a73f55..000000000
--- a/third_party/aom/aom_dsp/grain_table.c
+++ /dev/null
@@ -1,333 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-/*!\file
- * \brief This file has the implementation details of the grain table.
- *
- * The file format is an ascii representation for readability and
- * editability. Array parameters are separated from the non-array
- * parameters and prefixed with a few characters to make for easy
- * localization with a parameter set. Each entry is prefixed with "E"
- * and the other parameters are only specified if "update-parms" is
- * non-zero.
- *
- * filmgrn1
- * E <start-time> <end-time> <apply-grain> <random-seed> <update-parms>
- * p <ar_coeff_lag> <ar_coeff_shift> <grain_scale_shift> ...
- * sY <num_y_points> <point_0_x> <point_0_y> ...
- * sCb <num_cb_points> <point_0_x> <point_0_y> ...
- * sCr <num_cr_points> <point_0_x> <point_0_y> ...
- * cY <ar_coeff_y_0> ....
- * cCb <ar_coeff_cb_0> ....
- * cCr <ar_coeff_cr_0> ....
- * E <start-time> ...
- */
-#include <string.h>
-#include <stdio.h>
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/grain_table.h"
-#include "aom_mem/aom_mem.h"
-
-static const char kFileMagic[8] = "filmgrn1";
-
-static void grain_table_entry_read(FILE *file,
- struct aom_internal_error_info *error_info,
- aom_film_grain_table_entry_t *entry) {
- aom_film_grain_t *pars = &entry->params;
- int num_read =
- fscanf(file, "E %" PRId64 " %" PRId64 " %d %hd %d\n", &entry->start_time,
- &entry->end_time, &pars->apply_grain, &pars->random_seed,
- &pars->update_parameters);
- if (num_read == 0 && feof(file)) return;
- if (num_read != 5) {
- aom_internal_error(error_info, AOM_CODEC_ERROR,
- "Unable to read entry header. Read %d != 5", num_read);
- return;
- }
- if (pars->update_parameters) {
- num_read = fscanf(file, "p %d %d %d %d %d %d %d %d %d %d %d %d\n",
- &pars->ar_coeff_lag, &pars->ar_coeff_shift,
- &pars->grain_scale_shift, &pars->scaling_shift,
- &pars->chroma_scaling_from_luma, &pars->overlap_flag,
- &pars->cb_mult, &pars->cb_luma_mult, &pars->cb_offset,
- &pars->cr_mult, &pars->cr_luma_mult, &pars->cr_offset);
- if (num_read != 12) {
- aom_internal_error(error_info, AOM_CODEC_ERROR,
- "Unable to read entry params. Read %d != 12",
- num_read);
- return;
- }
- if (!fscanf(file, "\tsY %d ", &pars->num_y_points)) {
- aom_internal_error(error_info, AOM_CODEC_ERROR,
- "Unable to read num y points");
- return;
- }
- for (int i = 0; i < pars->num_y_points; ++i) {
- if (2 != fscanf(file, "%d %d", &pars->scaling_points_y[i][0],
- &pars->scaling_points_y[i][1])) {
- aom_internal_error(error_info, AOM_CODEC_ERROR,
- "Unable to read y scaling points");
- return;
- }
- }
- if (!fscanf(file, "\n\tsCb %d", &pars->num_cb_points)) {
- aom_internal_error(error_info, AOM_CODEC_ERROR,
- "Unable to read num cb points");
- return;
- }
- for (int i = 0; i < pars->num_cb_points; ++i) {
- if (2 != fscanf(file, "%d %d", &pars->scaling_points_cb[i][0],
- &pars->scaling_points_cb[i][1])) {
- aom_internal_error(error_info, AOM_CODEC_ERROR,
- "Unable to read cb scaling points");
- return;
- }
- }
- if (!fscanf(file, "\n\tsCr %d", &pars->num_cr_points)) {
- aom_internal_error(error_info, AOM_CODEC_ERROR,
- "Unable to read num cr points");
- return;
- }
- for (int i = 0; i < pars->num_cr_points; ++i) {
- if (2 != fscanf(file, "%d %d", &pars->scaling_points_cr[i][0],
- &pars->scaling_points_cr[i][1])) {
- aom_internal_error(error_info, AOM_CODEC_ERROR,
- "Unable to read cr scaling points");
- return;
- }
- }
-
- fscanf(file, "\n\tcY");
- const int n = 2 * pars->ar_coeff_lag * (pars->ar_coeff_lag + 1);
- for (int i = 0; i < n; ++i) {
- if (1 != fscanf(file, "%d", &pars->ar_coeffs_y[i])) {
- aom_internal_error(error_info, AOM_CODEC_ERROR,
- "Unable to read Y coeffs");
- return;
- }
- }
- fscanf(file, "\n\tcCb");
- for (int i = 0; i <= n; ++i) {
- if (1 != fscanf(file, "%d", &pars->ar_coeffs_cb[i])) {
- aom_internal_error(error_info, AOM_CODEC_ERROR,
- "Unable to read Cb coeffs");
- return;
- }
- }
- fscanf(file, "\n\tcCr");
- for (int i = 0; i <= n; ++i) {
- if (1 != fscanf(file, "%d", &pars->ar_coeffs_cr[i])) {
- aom_internal_error(error_info, AOM_CODEC_ERROR,
- "Unable to read Cr coeffs");
- return;
- }
- }
- fscanf(file, "\n");
- }
-}
-
-void grain_table_entry_write(FILE *file, aom_film_grain_table_entry_t *entry) {
- const aom_film_grain_t *pars = &entry->params;
- fprintf(file, "E %" PRId64 " %" PRId64 " %d %d %d\n", entry->start_time,
- entry->end_time, pars->apply_grain, pars->random_seed,
- pars->update_parameters);
- if (pars->update_parameters) {
- fprintf(file, "\tp %d %d %d %d %d %d %d %d %d %d %d %d\n",
- pars->ar_coeff_lag, pars->ar_coeff_shift, pars->grain_scale_shift,
- pars->scaling_shift, pars->chroma_scaling_from_luma,
- pars->overlap_flag, pars->cb_mult, pars->cb_luma_mult,
- pars->cb_offset, pars->cr_mult, pars->cr_luma_mult,
- pars->cr_offset);
- fprintf(file, "\tsY %d ", pars->num_y_points);
- for (int i = 0; i < pars->num_y_points; ++i) {
- fprintf(file, " %d %d", pars->scaling_points_y[i][0],
- pars->scaling_points_y[i][1]);
- }
- fprintf(file, "\n\tsCb %d", pars->num_cb_points);
- for (int i = 0; i < pars->num_cb_points; ++i) {
- fprintf(file, " %d %d", pars->scaling_points_cb[i][0],
- pars->scaling_points_cb[i][1]);
- }
- fprintf(file, "\n\tsCr %d", pars->num_cr_points);
- for (int i = 0; i < pars->num_cr_points; ++i) {
- fprintf(file, " %d %d", pars->scaling_points_cr[i][0],
- pars->scaling_points_cr[i][1]);
- }
- fprintf(file, "\n\tcY");
- const int n = 2 * pars->ar_coeff_lag * (pars->ar_coeff_lag + 1);
- for (int i = 0; i < n; ++i) {
- fprintf(file, " %d", pars->ar_coeffs_y[i]);
- }
- fprintf(file, "\n\tcCb");
- for (int i = 0; i <= n; ++i) {
- fprintf(file, " %d", pars->ar_coeffs_cb[i]);
- }
- fprintf(file, "\n\tcCr");
- for (int i = 0; i <= n; ++i) {
- fprintf(file, " %d", pars->ar_coeffs_cr[i]);
- }
- fprintf(file, "\n");
- }
-}
-
-void aom_film_grain_table_append(aom_film_grain_table_t *t, int64_t time_stamp,
- int64_t end_time,
- const aom_film_grain_t *grain) {
- if (!t->tail || memcmp(grain, &t->tail->params, sizeof(*grain))) {
- aom_film_grain_table_entry_t *new_tail = aom_malloc(sizeof(*new_tail));
- memset(new_tail, 0, sizeof(*new_tail));
- if (t->tail) t->tail->next = new_tail;
- if (!t->head) t->head = new_tail;
- t->tail = new_tail;
-
- new_tail->start_time = time_stamp;
- new_tail->end_time = end_time;
- new_tail->params = *grain;
- } else {
- t->tail->end_time = AOMMAX(t->tail->end_time, end_time);
- t->tail->start_time = AOMMIN(t->tail->start_time, time_stamp);
- }
-}
-
-int aom_film_grain_table_lookup(aom_film_grain_table_t *t, int64_t time_stamp,
- int64_t end_time, int erase,
- aom_film_grain_t *grain) {
- aom_film_grain_table_entry_t *entry = t->head;
- aom_film_grain_table_entry_t *prev_entry = 0;
- int16_t random_seed = grain ? grain->random_seed : 0;
- if (grain) memset(grain, 0, sizeof(*grain));
-
- while (entry) {
- aom_film_grain_table_entry_t *next = entry->next;
- if (time_stamp >= entry->start_time && time_stamp < entry->end_time) {
- if (grain) {
- *grain = entry->params;
- if (time_stamp != 0) grain->random_seed = random_seed;
- }
- if (!erase) return 1;
-
- const int64_t entry_end_time = entry->end_time;
- if (time_stamp <= entry->start_time && end_time >= entry->end_time) {
- if (t->tail == entry) t->tail = prev_entry;
- if (prev_entry) {
- prev_entry->next = entry->next;
- } else {
- t->head = entry->next;
- }
- aom_free(entry);
- } else if (time_stamp <= entry->start_time &&
- end_time < entry->end_time) {
- entry->start_time = end_time;
- } else if (time_stamp > entry->start_time &&
- end_time >= entry->end_time) {
- entry->end_time = time_stamp;
- } else {
- aom_film_grain_table_entry_t *new_entry =
- aom_malloc(sizeof(*new_entry));
- new_entry->next = entry->next;
- new_entry->start_time = end_time;
- new_entry->end_time = entry->end_time;
- new_entry->params = entry->params;
- entry->next = new_entry;
- entry->end_time = time_stamp;
- if (t->tail == entry) t->tail = new_entry;
- }
- // If segments aren't aligned, delete from the beggining of subsequent
- // segments
- if (end_time > entry_end_time) {
- aom_film_grain_table_lookup(t, entry->end_time, end_time, 1, 0);
- }
- return 1;
- }
- prev_entry = entry;
- entry = next;
- }
- return 0;
-}
-
-aom_codec_err_t aom_film_grain_table_read(
- aom_film_grain_table_t *t, const char *filename,
- struct aom_internal_error_info *error_info) {
- FILE *file = fopen(filename, "rb");
- if (!file) {
- aom_internal_error(error_info, AOM_CODEC_ERROR, "Unable to open %s",
- filename);
- return error_info->error_code;
- }
- error_info->error_code = AOM_CODEC_OK;
-
- // Read in one extra character as there should be white space after
- // the header.
- char magic[9];
- if (!fread(magic, 9, 1, file) || memcmp(magic, kFileMagic, 8)) {
- aom_internal_error(error_info, AOM_CODEC_ERROR,
- "Unable to read (or invalid) file magic");
- fclose(file);
- return error_info->error_code;
- }
-
- aom_film_grain_table_entry_t *prev_entry = 0;
- while (!feof(file)) {
- aom_film_grain_table_entry_t *entry = aom_malloc(sizeof(*entry));
- memset(entry, 0, sizeof(*entry));
- grain_table_entry_read(file, error_info, entry);
- entry->next = 0;
-
- if (prev_entry) prev_entry->next = entry;
- if (!t->head) t->head = entry;
- t->tail = entry;
- prev_entry = entry;
-
- if (error_info->error_code != AOM_CODEC_OK) break;
- }
-
- fclose(file);
- return error_info->error_code;
-}
-
-aom_codec_err_t aom_film_grain_table_write(
- const aom_film_grain_table_t *t, const char *filename,
- struct aom_internal_error_info *error_info) {
- error_info->error_code = AOM_CODEC_OK;
-
- FILE *file = fopen(filename, "wb");
- if (!file) {
- aom_internal_error(error_info, AOM_CODEC_ERROR, "Unable to open file %s",
- filename);
- return error_info->error_code;
- }
-
- if (!fwrite(kFileMagic, 8, 1, file)) {
- aom_internal_error(error_info, AOM_CODEC_ERROR,
- "Unable to write file magic");
- fclose(file);
- return error_info->error_code;
- }
-
- fprintf(file, "\n");
- aom_film_grain_table_entry_t *entry = t->head;
- while (entry) {
- grain_table_entry_write(file, entry);
- entry = entry->next;
- }
- fclose(file);
- return error_info->error_code;
-}
-
-void aom_film_grain_table_free(aom_film_grain_table_t *t) {
- aom_film_grain_table_entry_t *entry = t->head;
- while (entry) {
- aom_film_grain_table_entry_t *next = entry->next;
- aom_free(entry);
- entry = next;
- }
- memset(t, 0, sizeof(*t));
-}
diff --git a/third_party/aom/aom_dsp/grain_table.h b/third_party/aom/aom_dsp/grain_table.h
deleted file mode 100644
index a8ac50730..000000000
--- a/third_party/aom/aom_dsp/grain_table.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-/*!\file
- * \brief A table mapping from time to corresponding film grain parameters.
- *
- * In order to apply grain synthesis in the decoder, the film grain parameters
- * need to be signalled in the encoder. The film grain parameters are time
- * varying, and for two-pass encoding (and denoiser implementation flexibility)
- * it is common to denoise the video and do parameter estimation before encoding
- * the denoised video.
- *
- * The film grain table is used to provide this flexibility and is used as a
- * parameter that is passed to the encoder.
- *
- * Further, if regraining is to be done in say a single pass mode, or in two
- * pass within the encoder (before frames are added to the lookahead buffer),
- * this data structure can be used to keep track of on-the-fly estimated grain
- * parameters, that are then extracted from the table before the encoded frame
- * is written.
- */
-#ifndef AOM_AOM_DSP_GRAIN_TABLE_H_
-#define AOM_AOM_DSP_GRAIN_TABLE_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "aom_dsp/grain_synthesis.h"
-#include "aom/internal/aom_codec_internal.h"
-
-typedef struct aom_film_grain_table_entry_t {
- aom_film_grain_t params;
- int64_t start_time;
- int64_t end_time;
- struct aom_film_grain_table_entry_t *next;
-} aom_film_grain_table_entry_t;
-
-typedef struct {
- aom_film_grain_table_entry_t *head;
- aom_film_grain_table_entry_t *tail;
-} aom_film_grain_table_t;
-
-/*!\brief Add a mapping from [time_stamp, end_time) to the given grain
- * parameters
- *
- * \param[in/out] table The grain table
- * \param[in] time_stamp The start time stamp
- * \param[in] end_stamp The end time_stamp
- * \param[in] grain The grain parameters
- */
-void aom_film_grain_table_append(aom_film_grain_table_t *table,
- int64_t time_stamp, int64_t end_time,
- const aom_film_grain_t *grain);
-
-/*!\brief Look-up (and optionally erase) the grain parameters for the given time
- *
- * \param[in] table The grain table
- * \param[in] time_stamp The start time stamp
- * \param[in] end_stamp The end time_stamp
- * \param[in] erase Whether the time segment can be deleted
- * \param[out] grain The output grain parameters
- */
-int aom_film_grain_table_lookup(aom_film_grain_table_t *t, int64_t time_stamp,
- int64_t end_time, int erase,
- aom_film_grain_t *grain);
-
-/*!\brief Reads the grain table from a file.
- *
- * \param[out] table The grain table
- * \param[in] filename The file to read from
- * \param[in] error_info Error info for tracking errors
- */
-aom_codec_err_t aom_film_grain_table_read(
- aom_film_grain_table_t *table, const char *filename,
- struct aom_internal_error_info *error_info);
-
-/*!\brief Writes the grain table from a file.
- *
- * \param[out] table The grain table
- * \param[in] filename The file to read from
- * \param[in] error_info Error info for tracking errors
- */
-aom_codec_err_t aom_film_grain_table_write(
- const aom_film_grain_table_t *t, const char *filename,
- struct aom_internal_error_info *error_info);
-
-void aom_film_grain_table_free(aom_film_grain_table_t *t);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // AOM_AOM_DSP_GRAIN_TABLE_H_
diff --git a/third_party/aom/aom_dsp/intrapred.c b/third_party/aom/aom_dsp/intrapred.c
deleted file mode 100644
index c6aa6b207..000000000
--- a/third_party/aom/aom_dsp/intrapred.c
+++ /dev/null
@@ -1,792 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <math.h>
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/intrapred_common.h"
-#include "aom_mem/aom_mem.h"
-#include "aom_ports/bitops.h"
-
-static INLINE void v_predictor(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
- const uint8_t *above, const uint8_t *left) {
- int r;
- (void)left;
-
- for (r = 0; r < bh; r++) {
- memcpy(dst, above, bw);
- dst += stride;
- }
-}
-
-static INLINE void h_predictor(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
- const uint8_t *above, const uint8_t *left) {
- int r;
- (void)above;
-
- for (r = 0; r < bh; r++) {
- memset(dst, left[r], bw);
- dst += stride;
- }
-}
-
-static INLINE int abs_diff(int a, int b) { return (a > b) ? a - b : b - a; }
-
-static INLINE uint16_t paeth_predictor_single(uint16_t left, uint16_t top,
- uint16_t top_left) {
- const int base = top + left - top_left;
- const int p_left = abs_diff(base, left);
- const int p_top = abs_diff(base, top);
- const int p_top_left = abs_diff(base, top_left);
-
- // Return nearest to base of left, top and top_left.
- return (p_left <= p_top && p_left <= p_top_left)
- ? left
- : (p_top <= p_top_left) ? top : top_left;
-}
-
-static INLINE void paeth_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
- int bh, const uint8_t *above,
- const uint8_t *left) {
- int r, c;
- const uint8_t ytop_left = above[-1];
-
- for (r = 0; r < bh; r++) {
- for (c = 0; c < bw; c++)
- dst[c] = (uint8_t)paeth_predictor_single(left[r], above[c], ytop_left);
- dst += stride;
- }
-}
-
-// Some basic checks on weights for smooth predictor.
-#define sm_weights_sanity_checks(weights_w, weights_h, weights_scale, \
- pred_scale) \
- assert(weights_w[0] < weights_scale); \
- assert(weights_h[0] < weights_scale); \
- assert(weights_scale - weights_w[bw - 1] < weights_scale); \
- assert(weights_scale - weights_h[bh - 1] < weights_scale); \
- assert(pred_scale < 31) // ensures no overflow when calculating predictor.
-
-#define divide_round(value, bits) (((value) + (1 << ((bits)-1))) >> (bits))
-
-static INLINE void smooth_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
- int bh, const uint8_t *above,
- const uint8_t *left) {
- const uint8_t below_pred = left[bh - 1]; // estimated by bottom-left pixel
- const uint8_t right_pred = above[bw - 1]; // estimated by top-right pixel
- const uint8_t *const sm_weights_w = sm_weight_arrays + bw;
- const uint8_t *const sm_weights_h = sm_weight_arrays + bh;
- // scale = 2 * 2^sm_weight_log2_scale
- const int log2_scale = 1 + sm_weight_log2_scale;
- const uint16_t scale = (1 << sm_weight_log2_scale);
- sm_weights_sanity_checks(sm_weights_w, sm_weights_h, scale,
- log2_scale + sizeof(*dst));
- int r;
- for (r = 0; r < bh; ++r) {
- int c;
- for (c = 0; c < bw; ++c) {
- const uint8_t pixels[] = { above[c], below_pred, left[r], right_pred };
- const uint8_t weights[] = { sm_weights_h[r], scale - sm_weights_h[r],
- sm_weights_w[c], scale - sm_weights_w[c] };
- uint32_t this_pred = 0;
- int i;
- assert(scale >= sm_weights_h[r] && scale >= sm_weights_w[c]);
- for (i = 0; i < 4; ++i) {
- this_pred += weights[i] * pixels[i];
- }
- dst[c] = divide_round(this_pred, log2_scale);
- }
- dst += stride;
- }
-}
-
-static INLINE void smooth_v_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
- int bh, const uint8_t *above,
- const uint8_t *left) {
- const uint8_t below_pred = left[bh - 1]; // estimated by bottom-left pixel
- const uint8_t *const sm_weights = sm_weight_arrays + bh;
- // scale = 2^sm_weight_log2_scale
- const int log2_scale = sm_weight_log2_scale;
- const uint16_t scale = (1 << sm_weight_log2_scale);
- sm_weights_sanity_checks(sm_weights, sm_weights, scale,
- log2_scale + sizeof(*dst));
-
- int r;
- for (r = 0; r < bh; r++) {
- int c;
- for (c = 0; c < bw; ++c) {
- const uint8_t pixels[] = { above[c], below_pred };
- const uint8_t weights[] = { sm_weights[r], scale - sm_weights[r] };
- uint32_t this_pred = 0;
- assert(scale >= sm_weights[r]);
- int i;
- for (i = 0; i < 2; ++i) {
- this_pred += weights[i] * pixels[i];
- }
- dst[c] = divide_round(this_pred, log2_scale);
- }
- dst += stride;
- }
-}
-
-static INLINE void smooth_h_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
- int bh, const uint8_t *above,
- const uint8_t *left) {
- const uint8_t right_pred = above[bw - 1]; // estimated by top-right pixel
- const uint8_t *const sm_weights = sm_weight_arrays + bw;
- // scale = 2^sm_weight_log2_scale
- const int log2_scale = sm_weight_log2_scale;
- const uint16_t scale = (1 << sm_weight_log2_scale);
- sm_weights_sanity_checks(sm_weights, sm_weights, scale,
- log2_scale + sizeof(*dst));
-
- int r;
- for (r = 0; r < bh; r++) {
- int c;
- for (c = 0; c < bw; ++c) {
- const uint8_t pixels[] = { left[r], right_pred };
- const uint8_t weights[] = { sm_weights[c], scale - sm_weights[c] };
- uint32_t this_pred = 0;
- assert(scale >= sm_weights[c]);
- int i;
- for (i = 0; i < 2; ++i) {
- this_pred += weights[i] * pixels[i];
- }
- dst[c] = divide_round(this_pred, log2_scale);
- }
- dst += stride;
- }
-}
-
-static INLINE void dc_128_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
- int bh, const uint8_t *above,
- const uint8_t *left) {
- int r;
- (void)above;
- (void)left;
-
- for (r = 0; r < bh; r++) {
- memset(dst, 128, bw);
- dst += stride;
- }
-}
-
-static INLINE void dc_left_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
- int bh, const uint8_t *above,
- const uint8_t *left) {
- int i, r, expected_dc, sum = 0;
- (void)above;
-
- for (i = 0; i < bh; i++) sum += left[i];
- expected_dc = (sum + (bh >> 1)) / bh;
-
- for (r = 0; r < bh; r++) {
- memset(dst, expected_dc, bw);
- dst += stride;
- }
-}
-
-static INLINE void dc_top_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
- int bh, const uint8_t *above,
- const uint8_t *left) {
- int i, r, expected_dc, sum = 0;
- (void)left;
-
- for (i = 0; i < bw; i++) sum += above[i];
- expected_dc = (sum + (bw >> 1)) / bw;
-
- for (r = 0; r < bh; r++) {
- memset(dst, expected_dc, bw);
- dst += stride;
- }
-}
-
-static INLINE void dc_predictor(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
- const uint8_t *above, const uint8_t *left) {
- int i, r, expected_dc, sum = 0;
- const int count = bw + bh;
-
- for (i = 0; i < bw; i++) {
- sum += above[i];
- }
- for (i = 0; i < bh; i++) {
- sum += left[i];
- }
-
- expected_dc = (sum + (count >> 1)) / count;
-
- for (r = 0; r < bh; r++) {
- memset(dst, expected_dc, bw);
- dst += stride;
- }
-}
-
-static INLINE int divide_using_multiply_shift(int num, int shift1,
- int multiplier, int shift2) {
- const int interm = num >> shift1;
- return interm * multiplier >> shift2;
-}
-
- // The constants (multiplier and shifts) for a given block size are obtained
- // as follows:
- // - Let sum_w_h = block width + block height.
- // - Shift 'sum_w_h' right until we reach an odd number. Let the number of
- // shifts for that block size be called 'shift1' (see the parameter in
- // dc_predictor_rect() function), and let the odd number be 'd'. [d has only 2
- // possible values: d = 3 for a 1:2 rect block and d = 5 for a 1:4 rect
- // block].
- // - Find multipliers for (i) dividing by 3, and (ii) dividing by 5,
- // using the "Algorithm 1" in:
- // http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=1467632
- // by ensuring that m + n = 16 (in that algorithm). This ensures that our 2nd
- // shift will be 16, regardless of the block size.
-
- // Note: For low bitdepth, assembly code may be optimized by using smaller
- // constants for smaller block sizes, where the range of the 'sum' is
- // restricted to fewer bits.
-
-#define DC_MULTIPLIER_1X2 0x5556
-#define DC_MULTIPLIER_1X4 0x3334
-
-#define DC_SHIFT2 16
-
-static INLINE void dc_predictor_rect(uint8_t *dst, ptrdiff_t stride, int bw,
- int bh, const uint8_t *above,
- const uint8_t *left, int shift1,
- int multiplier) {
- int sum = 0;
-
- for (int i = 0; i < bw; i++) {
- sum += above[i];
- }
- for (int i = 0; i < bh; i++) {
- sum += left[i];
- }
-
- const int expected_dc = divide_using_multiply_shift(
- sum + ((bw + bh) >> 1), shift1, multiplier, DC_SHIFT2);
- assert(expected_dc < (1 << 8));
-
- for (int r = 0; r < bh; r++) {
- memset(dst, expected_dc, bw);
- dst += stride;
- }
-}
-
-#undef DC_SHIFT2
-
-void aom_dc_predictor_4x8_c(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- dc_predictor_rect(dst, stride, 4, 8, above, left, 2, DC_MULTIPLIER_1X2);
-}
-
-void aom_dc_predictor_8x4_c(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- dc_predictor_rect(dst, stride, 8, 4, above, left, 2, DC_MULTIPLIER_1X2);
-}
-
-void aom_dc_predictor_4x16_c(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- dc_predictor_rect(dst, stride, 4, 16, above, left, 2, DC_MULTIPLIER_1X4);
-}
-
-void aom_dc_predictor_16x4_c(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- dc_predictor_rect(dst, stride, 16, 4, above, left, 2, DC_MULTIPLIER_1X4);
-}
-
-void aom_dc_predictor_8x16_c(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- dc_predictor_rect(dst, stride, 8, 16, above, left, 3, DC_MULTIPLIER_1X2);
-}
-
-void aom_dc_predictor_16x8_c(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- dc_predictor_rect(dst, stride, 16, 8, above, left, 3, DC_MULTIPLIER_1X2);
-}
-
-void aom_dc_predictor_8x32_c(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- dc_predictor_rect(dst, stride, 8, 32, above, left, 3, DC_MULTIPLIER_1X4);
-}
-
-void aom_dc_predictor_32x8_c(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- dc_predictor_rect(dst, stride, 32, 8, above, left, 3, DC_MULTIPLIER_1X4);
-}
-
-void aom_dc_predictor_16x32_c(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- dc_predictor_rect(dst, stride, 16, 32, above, left, 4, DC_MULTIPLIER_1X2);
-}
-
-void aom_dc_predictor_32x16_c(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- dc_predictor_rect(dst, stride, 32, 16, above, left, 4, DC_MULTIPLIER_1X2);
-}
-
-void aom_dc_predictor_16x64_c(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- dc_predictor_rect(dst, stride, 16, 64, above, left, 4, DC_MULTIPLIER_1X4);
-}
-
-void aom_dc_predictor_64x16_c(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- dc_predictor_rect(dst, stride, 64, 16, above, left, 4, DC_MULTIPLIER_1X4);
-}
-
-void aom_dc_predictor_32x64_c(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- dc_predictor_rect(dst, stride, 32, 64, above, left, 5, DC_MULTIPLIER_1X2);
-}
-
-void aom_dc_predictor_64x32_c(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- dc_predictor_rect(dst, stride, 64, 32, above, left, 5, DC_MULTIPLIER_1X2);
-}
-
-#undef DC_MULTIPLIER_1X2
-#undef DC_MULTIPLIER_1X4
-
-static INLINE void highbd_v_predictor(uint16_t *dst, ptrdiff_t stride, int bw,
- int bh, const uint16_t *above,
- const uint16_t *left, int bd) {
- int r;
- (void)left;
- (void)bd;
- for (r = 0; r < bh; r++) {
- memcpy(dst, above, bw * sizeof(uint16_t));
- dst += stride;
- }
-}
-
-static INLINE void highbd_h_predictor(uint16_t *dst, ptrdiff_t stride, int bw,
- int bh, const uint16_t *above,
- const uint16_t *left, int bd) {
- int r;
- (void)above;
- (void)bd;
- for (r = 0; r < bh; r++) {
- aom_memset16(dst, left[r], bw);
- dst += stride;
- }
-}
-
-static INLINE void highbd_paeth_predictor(uint16_t *dst, ptrdiff_t stride,
- int bw, int bh, const uint16_t *above,
- const uint16_t *left, int bd) {
- int r, c;
- const uint16_t ytop_left = above[-1];
- (void)bd;
-
- for (r = 0; r < bh; r++) {
- for (c = 0; c < bw; c++)
- dst[c] = paeth_predictor_single(left[r], above[c], ytop_left);
- dst += stride;
- }
-}
-
-static INLINE void highbd_smooth_predictor(uint16_t *dst, ptrdiff_t stride,
- int bw, int bh,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- (void)bd;
- const uint16_t below_pred = left[bh - 1]; // estimated by bottom-left pixel
- const uint16_t right_pred = above[bw - 1]; // estimated by top-right pixel
- const uint8_t *const sm_weights_w = sm_weight_arrays + bw;
- const uint8_t *const sm_weights_h = sm_weight_arrays + bh;
- // scale = 2 * 2^sm_weight_log2_scale
- const int log2_scale = 1 + sm_weight_log2_scale;
- const uint16_t scale = (1 << sm_weight_log2_scale);
- sm_weights_sanity_checks(sm_weights_w, sm_weights_h, scale,
- log2_scale + sizeof(*dst));
- int r;
- for (r = 0; r < bh; ++r) {
- int c;
- for (c = 0; c < bw; ++c) {
- const uint16_t pixels[] = { above[c], below_pred, left[r], right_pred };
- const uint8_t weights[] = { sm_weights_h[r], scale - sm_weights_h[r],
- sm_weights_w[c], scale - sm_weights_w[c] };
- uint32_t this_pred = 0;
- int i;
- assert(scale >= sm_weights_h[r] && scale >= sm_weights_w[c]);
- for (i = 0; i < 4; ++i) {
- this_pred += weights[i] * pixels[i];
- }
- dst[c] = divide_round(this_pred, log2_scale);
- }
- dst += stride;
- }
-}
-
-static INLINE void highbd_smooth_v_predictor(uint16_t *dst, ptrdiff_t stride,
- int bw, int bh,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- (void)bd;
- const uint16_t below_pred = left[bh - 1]; // estimated by bottom-left pixel
- const uint8_t *const sm_weights = sm_weight_arrays + bh;
- // scale = 2^sm_weight_log2_scale
- const int log2_scale = sm_weight_log2_scale;
- const uint16_t scale = (1 << sm_weight_log2_scale);
- sm_weights_sanity_checks(sm_weights, sm_weights, scale,
- log2_scale + sizeof(*dst));
-
- int r;
- for (r = 0; r < bh; r++) {
- int c;
- for (c = 0; c < bw; ++c) {
- const uint16_t pixels[] = { above[c], below_pred };
- const uint8_t weights[] = { sm_weights[r], scale - sm_weights[r] };
- uint32_t this_pred = 0;
- assert(scale >= sm_weights[r]);
- int i;
- for (i = 0; i < 2; ++i) {
- this_pred += weights[i] * pixels[i];
- }
- dst[c] = divide_round(this_pred, log2_scale);
- }
- dst += stride;
- }
-}
-
-static INLINE void highbd_smooth_h_predictor(uint16_t *dst, ptrdiff_t stride,
- int bw, int bh,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- (void)bd;
- const uint16_t right_pred = above[bw - 1]; // estimated by top-right pixel
- const uint8_t *const sm_weights = sm_weight_arrays + bw;
- // scale = 2^sm_weight_log2_scale
- const int log2_scale = sm_weight_log2_scale;
- const uint16_t scale = (1 << sm_weight_log2_scale);
- sm_weights_sanity_checks(sm_weights, sm_weights, scale,
- log2_scale + sizeof(*dst));
-
- int r;
- for (r = 0; r < bh; r++) {
- int c;
- for (c = 0; c < bw; ++c) {
- const uint16_t pixels[] = { left[r], right_pred };
- const uint8_t weights[] = { sm_weights[c], scale - sm_weights[c] };
- uint32_t this_pred = 0;
- assert(scale >= sm_weights[c]);
- int i;
- for (i = 0; i < 2; ++i) {
- this_pred += weights[i] * pixels[i];
- }
- dst[c] = divide_round(this_pred, log2_scale);
- }
- dst += stride;
- }
-}
-
-static INLINE void highbd_dc_128_predictor(uint16_t *dst, ptrdiff_t stride,
- int bw, int bh,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- int r;
- (void)above;
- (void)left;
-
- for (r = 0; r < bh; r++) {
- aom_memset16(dst, 128 << (bd - 8), bw);
- dst += stride;
- }
-}
-
-static INLINE void highbd_dc_left_predictor(uint16_t *dst, ptrdiff_t stride,
- int bw, int bh,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- int i, r, expected_dc, sum = 0;
- (void)above;
- (void)bd;
-
- for (i = 0; i < bh; i++) sum += left[i];
- expected_dc = (sum + (bh >> 1)) / bh;
-
- for (r = 0; r < bh; r++) {
- aom_memset16(dst, expected_dc, bw);
- dst += stride;
- }
-}
-
-static INLINE void highbd_dc_top_predictor(uint16_t *dst, ptrdiff_t stride,
- int bw, int bh,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- int i, r, expected_dc, sum = 0;
- (void)left;
- (void)bd;
-
- for (i = 0; i < bw; i++) sum += above[i];
- expected_dc = (sum + (bw >> 1)) / bw;
-
- for (r = 0; r < bh; r++) {
- aom_memset16(dst, expected_dc, bw);
- dst += stride;
- }
-}
-
-static INLINE void highbd_dc_predictor(uint16_t *dst, ptrdiff_t stride, int bw,
- int bh, const uint16_t *above,
- const uint16_t *left, int bd) {
- int i, r, expected_dc, sum = 0;
- const int count = bw + bh;
- (void)bd;
-
- for (i = 0; i < bw; i++) {
- sum += above[i];
- }
- for (i = 0; i < bh; i++) {
- sum += left[i];
- }
-
- expected_dc = (sum + (count >> 1)) / count;
-
- for (r = 0; r < bh; r++) {
- aom_memset16(dst, expected_dc, bw);
- dst += stride;
- }
-}
-
-// Obtained similarly as DC_MULTIPLIER_1X2 and DC_MULTIPLIER_1X4 above, but
-// assume 2nd shift of 17 bits instead of 16.
-// Note: Strictly speaking, 2nd shift needs to be 17 only when:
-// - bit depth == 12, and
-// - bw + bh is divisible by 5 (as opposed to divisible by 3).
-// All other cases can use half the multipliers with a shift of 16 instead.
-// This special optimization can be used when writing assembly code.
-#define HIGHBD_DC_MULTIPLIER_1X2 0xAAAB
-// Note: This constant is odd, but a smaller even constant (0x199a) with the
-// appropriate shift should work for neon in 8/10-bit.
-#define HIGHBD_DC_MULTIPLIER_1X4 0x6667
-
-#define HIGHBD_DC_SHIFT2 17
-
-static INLINE void highbd_dc_predictor_rect(uint16_t *dst, ptrdiff_t stride,
- int bw, int bh,
- const uint16_t *above,
- const uint16_t *left, int bd,
- int shift1, uint32_t multiplier) {
- int sum = 0;
- (void)bd;
-
- for (int i = 0; i < bw; i++) {
- sum += above[i];
- }
- for (int i = 0; i < bh; i++) {
- sum += left[i];
- }
-
- const int expected_dc = divide_using_multiply_shift(
- sum + ((bw + bh) >> 1), shift1, multiplier, HIGHBD_DC_SHIFT2);
- assert(expected_dc < (1 << bd));
-
- for (int r = 0; r < bh; r++) {
- aom_memset16(dst, expected_dc, bw);
- dst += stride;
- }
-}
-
-#undef HIGHBD_DC_SHIFT2
-
-void aom_highbd_dc_predictor_4x8_c(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above, const uint16_t *left,
- int bd) {
- highbd_dc_predictor_rect(dst, stride, 4, 8, above, left, bd, 2,
- HIGHBD_DC_MULTIPLIER_1X2);
-}
-
-void aom_highbd_dc_predictor_8x4_c(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above, const uint16_t *left,
- int bd) {
- highbd_dc_predictor_rect(dst, stride, 8, 4, above, left, bd, 2,
- HIGHBD_DC_MULTIPLIER_1X2);
-}
-
-void aom_highbd_dc_predictor_4x16_c(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above, const uint16_t *left,
- int bd) {
- highbd_dc_predictor_rect(dst, stride, 4, 16, above, left, bd, 2,
- HIGHBD_DC_MULTIPLIER_1X4);
-}
-
-void aom_highbd_dc_predictor_16x4_c(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above, const uint16_t *left,
- int bd) {
- highbd_dc_predictor_rect(dst, stride, 16, 4, above, left, bd, 2,
- HIGHBD_DC_MULTIPLIER_1X4);
-}
-
-void aom_highbd_dc_predictor_8x16_c(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above, const uint16_t *left,
- int bd) {
- highbd_dc_predictor_rect(dst, stride, 8, 16, above, left, bd, 3,
- HIGHBD_DC_MULTIPLIER_1X2);
-}
-
-void aom_highbd_dc_predictor_16x8_c(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above, const uint16_t *left,
- int bd) {
- highbd_dc_predictor_rect(dst, stride, 16, 8, above, left, bd, 3,
- HIGHBD_DC_MULTIPLIER_1X2);
-}
-
-void aom_highbd_dc_predictor_8x32_c(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above, const uint16_t *left,
- int bd) {
- highbd_dc_predictor_rect(dst, stride, 8, 32, above, left, bd, 3,
- HIGHBD_DC_MULTIPLIER_1X4);
-}
-
-void aom_highbd_dc_predictor_32x8_c(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above, const uint16_t *left,
- int bd) {
- highbd_dc_predictor_rect(dst, stride, 32, 8, above, left, bd, 3,
- HIGHBD_DC_MULTIPLIER_1X4);
-}
-
-void aom_highbd_dc_predictor_16x32_c(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- highbd_dc_predictor_rect(dst, stride, 16, 32, above, left, bd, 4,
- HIGHBD_DC_MULTIPLIER_1X2);
-}
-
-void aom_highbd_dc_predictor_32x16_c(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- highbd_dc_predictor_rect(dst, stride, 32, 16, above, left, bd, 4,
- HIGHBD_DC_MULTIPLIER_1X2);
-}
-
-void aom_highbd_dc_predictor_16x64_c(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- highbd_dc_predictor_rect(dst, stride, 16, 64, above, left, bd, 4,
- HIGHBD_DC_MULTIPLIER_1X4);
-}
-
-void aom_highbd_dc_predictor_64x16_c(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- highbd_dc_predictor_rect(dst, stride, 64, 16, above, left, bd, 4,
- HIGHBD_DC_MULTIPLIER_1X4);
-}
-
-void aom_highbd_dc_predictor_32x64_c(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- highbd_dc_predictor_rect(dst, stride, 32, 64, above, left, bd, 5,
- HIGHBD_DC_MULTIPLIER_1X2);
-}
-
-void aom_highbd_dc_predictor_64x32_c(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- highbd_dc_predictor_rect(dst, stride, 64, 32, above, left, bd, 5,
- HIGHBD_DC_MULTIPLIER_1X2);
-}
-
-#undef HIGHBD_DC_MULTIPLIER_1X2
-#undef HIGHBD_DC_MULTIPLIER_1X4
-
-// This serves as a wrapper function, so that all the prediction functions
-// can be unified and accessed as a pointer array. Note that the boundary
-// above and left are not necessarily used all the time.
-#define intra_pred_sized(type, width, height) \
- void aom_##type##_predictor_##width##x##height##_c( \
- uint8_t *dst, ptrdiff_t stride, const uint8_t *above, \
- const uint8_t *left) { \
- type##_predictor(dst, stride, width, height, above, left); \
- }
-
-#define intra_pred_highbd_sized(type, width, height) \
- void aom_highbd_##type##_predictor_##width##x##height##_c( \
- uint16_t *dst, ptrdiff_t stride, const uint16_t *above, \
- const uint16_t *left, int bd) { \
- highbd_##type##_predictor(dst, stride, width, height, above, left, bd); \
- }
-
-/* clang-format off */
-#define intra_pred_rectangular(type) \
- intra_pred_sized(type, 4, 8) \
- intra_pred_sized(type, 8, 4) \
- intra_pred_sized(type, 8, 16) \
- intra_pred_sized(type, 16, 8) \
- intra_pred_sized(type, 16, 32) \
- intra_pred_sized(type, 32, 16) \
- intra_pred_sized(type, 32, 64) \
- intra_pred_sized(type, 64, 32) \
- intra_pred_sized(type, 4, 16) \
- intra_pred_sized(type, 16, 4) \
- intra_pred_sized(type, 8, 32) \
- intra_pred_sized(type, 32, 8) \
- intra_pred_sized(type, 16, 64) \
- intra_pred_sized(type, 64, 16) \
- intra_pred_highbd_sized(type, 4, 8) \
- intra_pred_highbd_sized(type, 8, 4) \
- intra_pred_highbd_sized(type, 8, 16) \
- intra_pred_highbd_sized(type, 16, 8) \
- intra_pred_highbd_sized(type, 16, 32) \
- intra_pred_highbd_sized(type, 32, 16) \
- intra_pred_highbd_sized(type, 32, 64) \
- intra_pred_highbd_sized(type, 64, 32) \
- intra_pred_highbd_sized(type, 4, 16) \
- intra_pred_highbd_sized(type, 16, 4) \
- intra_pred_highbd_sized(type, 8, 32) \
- intra_pred_highbd_sized(type, 32, 8) \
- intra_pred_highbd_sized(type, 16, 64) \
- intra_pred_highbd_sized(type, 64, 16)
-#define intra_pred_above_4x4(type) \
- intra_pred_sized(type, 8, 8) \
- intra_pred_sized(type, 16, 16) \
- intra_pred_sized(type, 32, 32) \
- intra_pred_sized(type, 64, 64) \
- intra_pred_highbd_sized(type, 4, 4) \
- intra_pred_highbd_sized(type, 8, 8) \
- intra_pred_highbd_sized(type, 16, 16) \
- intra_pred_highbd_sized(type, 32, 32) \
- intra_pred_highbd_sized(type, 64, 64) \
- intra_pred_rectangular(type)
-#define intra_pred_allsizes(type) \
- intra_pred_sized(type, 4, 4) \
- intra_pred_above_4x4(type)
-#define intra_pred_square(type) \
- intra_pred_sized(type, 4, 4) \
- intra_pred_sized(type, 8, 8) \
- intra_pred_sized(type, 16, 16) \
- intra_pred_sized(type, 32, 32) \
- intra_pred_sized(type, 64, 64) \
- intra_pred_highbd_sized(type, 4, 4) \
- intra_pred_highbd_sized(type, 8, 8) \
- intra_pred_highbd_sized(type, 16, 16) \
- intra_pred_highbd_sized(type, 32, 32) \
- intra_pred_highbd_sized(type, 64, 64)
-
-intra_pred_allsizes(v)
-intra_pred_allsizes(h)
-intra_pred_allsizes(smooth)
-intra_pred_allsizes(smooth_v)
-intra_pred_allsizes(smooth_h)
-intra_pred_allsizes(paeth)
-intra_pred_allsizes(dc_128)
-intra_pred_allsizes(dc_left)
-intra_pred_allsizes(dc_top)
-intra_pred_square(dc)
-/* clang-format on */
-#undef intra_pred_allsizes
diff --git a/third_party/aom/aom_dsp/intrapred_common.h b/third_party/aom/aom_dsp/intrapred_common.h
deleted file mode 100644
index 3ec62a86e..000000000
--- a/third_party/aom/aom_dsp/intrapred_common.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_INTRAPRED_COMMON_H_
-#define AOM_AOM_DSP_INTRAPRED_COMMON_H_
-
-#include "config/aom_config.h"
-
-// Weights are quadratic from '1' to '1 / block_size', scaled by
-// 2^sm_weight_log2_scale.
-static const int sm_weight_log2_scale = 8;
-
-// max(block_size_wide[BLOCK_LARGEST], block_size_high[BLOCK_LARGEST])
-#define MAX_BLOCK_DIM 64
-
-/* clang-format off */
-static const uint8_t sm_weight_arrays[2 * MAX_BLOCK_DIM] = {
- // Unused, because we always offset by bs, which is at least 2.
- 0, 0,
- // bs = 2
- 255, 128,
- // bs = 4
- 255, 149, 85, 64,
- // bs = 8
- 255, 197, 146, 105, 73, 50, 37, 32,
- // bs = 16
- 255, 225, 196, 170, 145, 123, 102, 84, 68, 54, 43, 33, 26, 20, 17, 16,
- // bs = 32
- 255, 240, 225, 210, 196, 182, 169, 157, 145, 133, 122, 111, 101, 92, 83, 74,
- 66, 59, 52, 45, 39, 34, 29, 25, 21, 17, 14, 12, 10, 9, 8, 8,
- // bs = 64
- 255, 248, 240, 233, 225, 218, 210, 203, 196, 189, 182, 176, 169, 163, 156,
- 150, 144, 138, 133, 127, 121, 116, 111, 106, 101, 96, 91, 86, 82, 77, 73, 69,
- 65, 61, 57, 54, 50, 47, 44, 41, 38, 35, 32, 29, 27, 25, 22, 20, 18, 16, 15,
- 13, 12, 10, 9, 8, 7, 6, 6, 5, 5, 4, 4, 4,
-};
-/* clang-format on */
-
-#endif // AOM_AOM_DSP_INTRAPRED_COMMON_H_
diff --git a/third_party/aom/aom_dsp/loopfilter.c b/third_party/aom/aom_dsp/loopfilter.c
deleted file mode 100644
index a3f261824..000000000
--- a/third_party/aom/aom_dsp/loopfilter.c
+++ /dev/null
@@ -1,925 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <stdlib.h>
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_ports/mem.h"
-
-static INLINE int8_t signed_char_clamp(int t) {
- return (int8_t)clamp(t, -128, 127);
-}
-
-static INLINE int16_t signed_char_clamp_high(int t, int bd) {
- switch (bd) {
- case 10: return (int16_t)clamp(t, -128 * 4, 128 * 4 - 1);
- case 12: return (int16_t)clamp(t, -128 * 16, 128 * 16 - 1);
- case 8:
- default: return (int16_t)clamp(t, -128, 128 - 1);
- }
-}
-
-// should we apply any filter at all: 11111111 yes, 00000000 no
-static INLINE int8_t filter_mask2(uint8_t limit, uint8_t blimit, uint8_t p1,
- uint8_t p0, uint8_t q0, uint8_t q1) {
- int8_t mask = 0;
- mask |= (abs(p1 - p0) > limit) * -1;
- mask |= (abs(q1 - q0) > limit) * -1;
- mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1;
- return ~mask;
-}
-
-static INLINE int8_t filter_mask(uint8_t limit, uint8_t blimit, uint8_t p3,
- uint8_t p2, uint8_t p1, uint8_t p0, uint8_t q0,
- uint8_t q1, uint8_t q2, uint8_t q3) {
- int8_t mask = 0;
- mask |= (abs(p3 - p2) > limit) * -1;
- mask |= (abs(p2 - p1) > limit) * -1;
- mask |= (abs(p1 - p0) > limit) * -1;
- mask |= (abs(q1 - q0) > limit) * -1;
- mask |= (abs(q2 - q1) > limit) * -1;
- mask |= (abs(q3 - q2) > limit) * -1;
- mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1;
- return ~mask;
-}
-
-static INLINE int8_t filter_mask3_chroma(uint8_t limit, uint8_t blimit,
- uint8_t p2, uint8_t p1, uint8_t p0,
- uint8_t q0, uint8_t q1, uint8_t q2) {
- int8_t mask = 0;
- mask |= (abs(p2 - p1) > limit) * -1;
- mask |= (abs(p1 - p0) > limit) * -1;
- mask |= (abs(q1 - q0) > limit) * -1;
- mask |= (abs(q2 - q1) > limit) * -1;
- mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1;
- return ~mask;
-}
-
-static INLINE int8_t flat_mask3_chroma(uint8_t thresh, uint8_t p2, uint8_t p1,
- uint8_t p0, uint8_t q0, uint8_t q1,
- uint8_t q2) {
- int8_t mask = 0;
- mask |= (abs(p1 - p0) > thresh) * -1;
- mask |= (abs(q1 - q0) > thresh) * -1;
- mask |= (abs(p2 - p0) > thresh) * -1;
- mask |= (abs(q2 - q0) > thresh) * -1;
- return ~mask;
-}
-
-static INLINE int8_t flat_mask4(uint8_t thresh, uint8_t p3, uint8_t p2,
- uint8_t p1, uint8_t p0, uint8_t q0, uint8_t q1,
- uint8_t q2, uint8_t q3) {
- int8_t mask = 0;
- mask |= (abs(p1 - p0) > thresh) * -1;
- mask |= (abs(q1 - q0) > thresh) * -1;
- mask |= (abs(p2 - p0) > thresh) * -1;
- mask |= (abs(q2 - q0) > thresh) * -1;
- mask |= (abs(p3 - p0) > thresh) * -1;
- mask |= (abs(q3 - q0) > thresh) * -1;
- return ~mask;
-}
-
-// is there high edge variance internal edge: 11111111 yes, 00000000 no
-static INLINE int8_t hev_mask(uint8_t thresh, uint8_t p1, uint8_t p0,
- uint8_t q0, uint8_t q1) {
- int8_t hev = 0;
- hev |= (abs(p1 - p0) > thresh) * -1;
- hev |= (abs(q1 - q0) > thresh) * -1;
- return hev;
-}
-
-static INLINE void filter4(int8_t mask, uint8_t thresh, uint8_t *op1,
- uint8_t *op0, uint8_t *oq0, uint8_t *oq1) {
- int8_t filter1, filter2;
-
- const int8_t ps1 = (int8_t)*op1 ^ 0x80;
- const int8_t ps0 = (int8_t)*op0 ^ 0x80;
- const int8_t qs0 = (int8_t)*oq0 ^ 0x80;
- const int8_t qs1 = (int8_t)*oq1 ^ 0x80;
- const uint8_t hev = hev_mask(thresh, *op1, *op0, *oq0, *oq1);
-
- // add outer taps if we have high edge variance
- int8_t filter = signed_char_clamp(ps1 - qs1) & hev;
-
- // inner taps
- filter = signed_char_clamp(filter + 3 * (qs0 - ps0)) & mask;
-
- // save bottom 3 bits so that we round one side +4 and the other +3
- // if it equals 4 we'll set to adjust by -1 to account for the fact
- // we'd round 3 the other way
- filter1 = signed_char_clamp(filter + 4) >> 3;
- filter2 = signed_char_clamp(filter + 3) >> 3;
-
- *oq0 = signed_char_clamp(qs0 - filter1) ^ 0x80;
- *op0 = signed_char_clamp(ps0 + filter2) ^ 0x80;
-
- // outer tap adjustments
- filter = ROUND_POWER_OF_TWO(filter1, 1) & ~hev;
-
- *oq1 = signed_char_clamp(qs1 - filter) ^ 0x80;
- *op1 = signed_char_clamp(ps1 + filter) ^ 0x80;
-}
-
-void aom_lpf_horizontal_4_c(uint8_t *s, int p /* pitch */,
- const uint8_t *blimit, const uint8_t *limit,
- const uint8_t *thresh) {
- int i;
- int count = 4;
-
- // loop filter designed to work using chars so that we can make maximum use
- // of 8 bit simd instructions.
- for (i = 0; i < count; ++i) {
- const uint8_t p1 = s[-2 * p], p0 = s[-p];
- const uint8_t q0 = s[0 * p], q1 = s[1 * p];
- const int8_t mask = filter_mask2(*limit, *blimit, p1, p0, q0, q1);
- filter4(mask, *thresh, s - 2 * p, s - 1 * p, s, s + 1 * p);
- ++s;
- }
-}
-
-void aom_lpf_horizontal_4_dual_c(uint8_t *s, int p, const uint8_t *blimit0,
- const uint8_t *limit0, const uint8_t *thresh0,
- const uint8_t *blimit1, const uint8_t *limit1,
- const uint8_t *thresh1) {
- aom_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0);
- aom_lpf_horizontal_4_c(s + 4, p, blimit1, limit1, thresh1);
-}
-
-void aom_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh) {
- int i;
- int count = 4;
-
- // loop filter designed to work using chars so that we can make maximum use
- // of 8 bit simd instructions.
- for (i = 0; i < count; ++i) {
- const uint8_t p1 = s[-2], p0 = s[-1];
- const uint8_t q0 = s[0], q1 = s[1];
- const int8_t mask = filter_mask2(*limit, *blimit, p1, p0, q0, q1);
- filter4(mask, *thresh, s - 2, s - 1, s, s + 1);
- s += pitch;
- }
-}
-
-void aom_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0,
- const uint8_t *limit0, const uint8_t *thresh0,
- const uint8_t *blimit1, const uint8_t *limit1,
- const uint8_t *thresh1) {
- aom_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0);
- aom_lpf_vertical_4_c(s + 4 * pitch, pitch, blimit1, limit1, thresh1);
-}
-
-static INLINE void filter6(int8_t mask, uint8_t thresh, int8_t flat,
- uint8_t *op2, uint8_t *op1, uint8_t *op0,
- uint8_t *oq0, uint8_t *oq1, uint8_t *oq2) {
- if (flat && mask) {
- const uint8_t p2 = *op2, p1 = *op1, p0 = *op0;
- const uint8_t q0 = *oq0, q1 = *oq1, q2 = *oq2;
-
- // 5-tap filter [1, 2, 2, 2, 1]
- *op1 = ROUND_POWER_OF_TWO(p2 * 3 + p1 * 2 + p0 * 2 + q0, 3);
- *op0 = ROUND_POWER_OF_TWO(p2 + p1 * 2 + p0 * 2 + q0 * 2 + q1, 3);
- *oq0 = ROUND_POWER_OF_TWO(p1 + p0 * 2 + q0 * 2 + q1 * 2 + q2, 3);
- *oq1 = ROUND_POWER_OF_TWO(p0 + q0 * 2 + q1 * 2 + q2 * 3, 3);
- } else {
- filter4(mask, thresh, op1, op0, oq0, oq1);
- }
-}
-
-static INLINE void filter8(int8_t mask, uint8_t thresh, int8_t flat,
- uint8_t *op3, uint8_t *op2, uint8_t *op1,
- uint8_t *op0, uint8_t *oq0, uint8_t *oq1,
- uint8_t *oq2, uint8_t *oq3) {
- if (flat && mask) {
- const uint8_t p3 = *op3, p2 = *op2, p1 = *op1, p0 = *op0;
- const uint8_t q0 = *oq0, q1 = *oq1, q2 = *oq2, q3 = *oq3;
-
- // 7-tap filter [1, 1, 1, 2, 1, 1, 1]
- *op2 = ROUND_POWER_OF_TWO(p3 + p3 + p3 + 2 * p2 + p1 + p0 + q0, 3);
- *op1 = ROUND_POWER_OF_TWO(p3 + p3 + p2 + 2 * p1 + p0 + q0 + q1, 3);
- *op0 = ROUND_POWER_OF_TWO(p3 + p2 + p1 + 2 * p0 + q0 + q1 + q2, 3);
- *oq0 = ROUND_POWER_OF_TWO(p2 + p1 + p0 + 2 * q0 + q1 + q2 + q3, 3);
- *oq1 = ROUND_POWER_OF_TWO(p1 + p0 + q0 + 2 * q1 + q2 + q3 + q3, 3);
- *oq2 = ROUND_POWER_OF_TWO(p0 + q0 + q1 + 2 * q2 + q3 + q3 + q3, 3);
- } else {
- filter4(mask, thresh, op1, op0, oq0, oq1);
- }
-}
-
-void aom_lpf_horizontal_6_c(uint8_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh) {
- int i;
- int count = 4;
-
- // loop filter designed to work using chars so that we can make maximum use
- // of 8 bit simd instructions.
- for (i = 0; i < count; ++i) {
- const uint8_t p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p];
- const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p];
-
- const int8_t mask =
- filter_mask3_chroma(*limit, *blimit, p2, p1, p0, q0, q1, q2);
- const int8_t flat = flat_mask3_chroma(1, p2, p1, p0, q0, q1, q2);
- filter6(mask, *thresh, flat, s - 3 * p, s - 2 * p, s - 1 * p, s, s + 1 * p,
- s + 2 * p);
- ++s;
- }
-}
-
-void aom_lpf_horizontal_6_dual_c(uint8_t *s, int p, const uint8_t *blimit0,
- const uint8_t *limit0, const uint8_t *thresh0,
- const uint8_t *blimit1, const uint8_t *limit1,
- const uint8_t *thresh1) {
- aom_lpf_horizontal_6_c(s, p, blimit0, limit0, thresh0);
- aom_lpf_horizontal_6_c(s + 4, p, blimit1, limit1, thresh1);
-}
-
-void aom_lpf_horizontal_8_c(uint8_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh) {
- int i;
- int count = 4;
-
- // loop filter designed to work using chars so that we can make maximum use
- // of 8 bit simd instructions.
- for (i = 0; i < count; ++i) {
- const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p];
- const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p];
-
- const int8_t mask =
- filter_mask(*limit, *blimit, p3, p2, p1, p0, q0, q1, q2, q3);
- const int8_t flat = flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3);
- filter8(mask, *thresh, flat, s - 4 * p, s - 3 * p, s - 2 * p, s - 1 * p, s,
- s + 1 * p, s + 2 * p, s + 3 * p);
- ++s;
- }
-}
-
-void aom_lpf_horizontal_8_dual_c(uint8_t *s, int p, const uint8_t *blimit0,
- const uint8_t *limit0, const uint8_t *thresh0,
- const uint8_t *blimit1, const uint8_t *limit1,
- const uint8_t *thresh1) {
- aom_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0);
- aom_lpf_horizontal_8_c(s + 4, p, blimit1, limit1, thresh1);
-}
-
-void aom_lpf_vertical_6_c(uint8_t *s, int pitch, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh) {
- int i;
- int count = 4;
-
- for (i = 0; i < count; ++i) {
- const uint8_t p2 = s[-3], p1 = s[-2], p0 = s[-1];
- const uint8_t q0 = s[0], q1 = s[1], q2 = s[2];
- const int8_t mask =
- filter_mask3_chroma(*limit, *blimit, p2, p1, p0, q0, q1, q2);
- const int8_t flat = flat_mask3_chroma(1, p2, p1, p0, q0, q1, q2);
- filter6(mask, *thresh, flat, s - 3, s - 2, s - 1, s, s + 1, s + 2);
- s += pitch;
- }
-}
-
-void aom_lpf_vertical_6_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0,
- const uint8_t *limit0, const uint8_t *thresh0,
- const uint8_t *blimit1, const uint8_t *limit1,
- const uint8_t *thresh1) {
- aom_lpf_vertical_6_c(s, pitch, blimit0, limit0, thresh0);
- aom_lpf_vertical_6_c(s + 4 * pitch, pitch, blimit1, limit1, thresh1);
-}
-
-void aom_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh) {
- int i;
- int count = 4;
-
- for (i = 0; i < count; ++i) {
- const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
- const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3];
- const int8_t mask =
- filter_mask(*limit, *blimit, p3, p2, p1, p0, q0, q1, q2, q3);
- const int8_t flat = flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3);
- filter8(mask, *thresh, flat, s - 4, s - 3, s - 2, s - 1, s, s + 1, s + 2,
- s + 3);
- s += pitch;
- }
-}
-
-void aom_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0,
- const uint8_t *limit0, const uint8_t *thresh0,
- const uint8_t *blimit1, const uint8_t *limit1,
- const uint8_t *thresh1) {
- aom_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0);
- aom_lpf_vertical_8_c(s + 4 * pitch, pitch, blimit1, limit1, thresh1);
-}
-
-static INLINE void filter14(int8_t mask, uint8_t thresh, int8_t flat,
- int8_t flat2, uint8_t *op6, uint8_t *op5,
- uint8_t *op4, uint8_t *op3, uint8_t *op2,
- uint8_t *op1, uint8_t *op0, uint8_t *oq0,
- uint8_t *oq1, uint8_t *oq2, uint8_t *oq3,
- uint8_t *oq4, uint8_t *oq5, uint8_t *oq6) {
- if (flat2 && flat && mask) {
- const uint8_t p6 = *op6, p5 = *op5, p4 = *op4, p3 = *op3, p2 = *op2,
- p1 = *op1, p0 = *op0;
- const uint8_t q0 = *oq0, q1 = *oq1, q2 = *oq2, q3 = *oq3, q4 = *oq4,
- q5 = *oq5, q6 = *oq6;
-
- // 13-tap filter [1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1]
- *op5 = ROUND_POWER_OF_TWO(p6 * 7 + p5 * 2 + p4 * 2 + p3 + p2 + p1 + p0 + q0,
- 4);
- *op4 = ROUND_POWER_OF_TWO(
- p6 * 5 + p5 * 2 + p4 * 2 + p3 * 2 + p2 + p1 + p0 + q0 + q1, 4);
- *op3 = ROUND_POWER_OF_TWO(
- p6 * 4 + p5 + p4 * 2 + p3 * 2 + p2 * 2 + p1 + p0 + q0 + q1 + q2, 4);
- *op2 = ROUND_POWER_OF_TWO(
- p6 * 3 + p5 + p4 + p3 * 2 + p2 * 2 + p1 * 2 + p0 + q0 + q1 + q2 + q3,
- 4);
- *op1 = ROUND_POWER_OF_TWO(p6 * 2 + p5 + p4 + p3 + p2 * 2 + p1 * 2 + p0 * 2 +
- q0 + q1 + q2 + q3 + q4,
- 4);
- *op0 = ROUND_POWER_OF_TWO(p6 + p5 + p4 + p3 + p2 + p1 * 2 + p0 * 2 +
- q0 * 2 + q1 + q2 + q3 + q4 + q5,
- 4);
- *oq0 = ROUND_POWER_OF_TWO(p5 + p4 + p3 + p2 + p1 + p0 * 2 + q0 * 2 +
- q1 * 2 + q2 + q3 + q4 + q5 + q6,
- 4);
- *oq1 = ROUND_POWER_OF_TWO(p4 + p3 + p2 + p1 + p0 + q0 * 2 + q1 * 2 +
- q2 * 2 + q3 + q4 + q5 + q6 * 2,
- 4);
- *oq2 = ROUND_POWER_OF_TWO(
- p3 + p2 + p1 + p0 + q0 + q1 * 2 + q2 * 2 + q3 * 2 + q4 + q5 + q6 * 3,
- 4);
- *oq3 = ROUND_POWER_OF_TWO(
- p2 + p1 + p0 + q0 + q1 + q2 * 2 + q3 * 2 + q4 * 2 + q5 + q6 * 4, 4);
- *oq4 = ROUND_POWER_OF_TWO(
- p1 + p0 + q0 + q1 + q2 + q3 * 2 + q4 * 2 + q5 * 2 + q6 * 5, 4);
- *oq5 = ROUND_POWER_OF_TWO(p0 + q0 + q1 + q2 + q3 + q4 * 2 + q5 * 2 + q6 * 7,
- 4);
- } else {
- filter8(mask, thresh, flat, op3, op2, op1, op0, oq0, oq1, oq2, oq3);
- }
-}
-
-static void mb_lpf_horizontal_edge_w(uint8_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit,
- const uint8_t *thresh, int count) {
- int i;
- int step = 4;
-
- // loop filter designed to work using chars so that we can make maximum use
- // of 8 bit simd instructions.
- for (i = 0; i < step * count; ++i) {
- const uint8_t p6 = s[-7 * p], p5 = s[-6 * p], p4 = s[-5 * p],
- p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p];
- const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p],
- q4 = s[4 * p], q5 = s[5 * p], q6 = s[6 * p];
- const int8_t mask =
- filter_mask(*limit, *blimit, p3, p2, p1, p0, q0, q1, q2, q3);
- const int8_t flat = flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3);
- const int8_t flat2 = flat_mask4(1, p6, p5, p4, p0, q0, q4, q5, q6);
-
- filter14(mask, *thresh, flat, flat2, s - 7 * p, s - 6 * p, s - 5 * p,
- s - 4 * p, s - 3 * p, s - 2 * p, s - 1 * p, s, s + 1 * p,
- s + 2 * p, s + 3 * p, s + 4 * p, s + 5 * p, s + 6 * p);
- ++s;
- }
-}
-
-void aom_lpf_horizontal_14_c(uint8_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh) {
- mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 1);
-}
-
-void aom_lpf_horizontal_14_dual_c(uint8_t *s, int p, const uint8_t *blimit0,
- const uint8_t *limit0, const uint8_t *thresh0,
- const uint8_t *blimit1, const uint8_t *limit1,
- const uint8_t *thresh1) {
- mb_lpf_horizontal_edge_w(s, p, blimit0, limit0, thresh0, 1);
- mb_lpf_horizontal_edge_w(s + 4, p, blimit1, limit1, thresh1, 1);
-}
-
-static void mb_lpf_vertical_edge_w(uint8_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
- int count) {
- int i;
-
- for (i = 0; i < count; ++i) {
- const uint8_t p6 = s[-7], p5 = s[-6], p4 = s[-5], p3 = s[-4], p2 = s[-3],
- p1 = s[-2], p0 = s[-1];
- const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3], q4 = s[4],
- q5 = s[5], q6 = s[6];
- const int8_t mask =
- filter_mask(*limit, *blimit, p3, p2, p1, p0, q0, q1, q2, q3);
- const int8_t flat = flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3);
- const int8_t flat2 = flat_mask4(1, p6, p5, p4, p0, q0, q4, q5, q6);
-
- filter14(mask, *thresh, flat, flat2, s - 7, s - 6, s - 5, s - 4, s - 3,
- s - 2, s - 1, s, s + 1, s + 2, s + 3, s + 4, s + 5, s + 6);
- s += p;
- }
-}
-
-void aom_lpf_vertical_14_c(uint8_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh) {
- mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 4);
-}
-
-void aom_lpf_vertical_14_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0,
- const uint8_t *limit0, const uint8_t *thresh0,
- const uint8_t *blimit1, const uint8_t *limit1,
- const uint8_t *thresh1) {
- mb_lpf_vertical_edge_w(s, pitch, blimit0, limit0, thresh0, 4);
- mb_lpf_vertical_edge_w(s + 4 * pitch, pitch, blimit1, limit1, thresh1, 4);
-}
-
-// Should we apply any filter at all: 11111111 yes, 00000000 no ?
-static INLINE int8_t highbd_filter_mask2(uint8_t limit, uint8_t blimit,
- uint16_t p1, uint16_t p0, uint16_t q0,
- uint16_t q1, int bd) {
- int8_t mask = 0;
- int16_t limit16 = (uint16_t)limit << (bd - 8);
- int16_t blimit16 = (uint16_t)blimit << (bd - 8);
- mask |= (abs(p1 - p0) > limit16) * -1;
- mask |= (abs(q1 - q0) > limit16) * -1;
- mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit16) * -1;
- return ~mask;
-}
-
-// Should we apply any filter at all: 11111111 yes, 00000000 no ?
-static INLINE int8_t highbd_filter_mask(uint8_t limit, uint8_t blimit,
- uint16_t p3, uint16_t p2, uint16_t p1,
- uint16_t p0, uint16_t q0, uint16_t q1,
- uint16_t q2, uint16_t q3, int bd) {
- int8_t mask = 0;
- int16_t limit16 = (uint16_t)limit << (bd - 8);
- int16_t blimit16 = (uint16_t)blimit << (bd - 8);
- mask |= (abs(p3 - p2) > limit16) * -1;
- mask |= (abs(p2 - p1) > limit16) * -1;
- mask |= (abs(p1 - p0) > limit16) * -1;
- mask |= (abs(q1 - q0) > limit16) * -1;
- mask |= (abs(q2 - q1) > limit16) * -1;
- mask |= (abs(q3 - q2) > limit16) * -1;
- mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit16) * -1;
- return ~mask;
-}
-
-static INLINE int8_t highbd_filter_mask3_chroma(uint8_t limit, uint8_t blimit,
- uint16_t p2, uint16_t p1,
- uint16_t p0, uint16_t q0,
- uint16_t q1, uint16_t q2,
- int bd) {
- int8_t mask = 0;
- int16_t limit16 = (uint16_t)limit << (bd - 8);
- int16_t blimit16 = (uint16_t)blimit << (bd - 8);
- mask |= (abs(p2 - p1) > limit16) * -1;
- mask |= (abs(p1 - p0) > limit16) * -1;
- mask |= (abs(q1 - q0) > limit16) * -1;
- mask |= (abs(q2 - q1) > limit16) * -1;
- mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit16) * -1;
- return ~mask;
-}
-
-static INLINE int8_t highbd_flat_mask3_chroma(uint8_t thresh, uint16_t p2,
- uint16_t p1, uint16_t p0,
- uint16_t q0, uint16_t q1,
- uint16_t q2, int bd) {
- int8_t mask = 0;
- int16_t thresh16 = (uint16_t)thresh << (bd - 8);
- mask |= (abs(p1 - p0) > thresh16) * -1;
- mask |= (abs(q1 - q0) > thresh16) * -1;
- mask |= (abs(p2 - p0) > thresh16) * -1;
- mask |= (abs(q2 - q0) > thresh16) * -1;
- return ~mask;
-}
-
-static INLINE int8_t highbd_flat_mask4(uint8_t thresh, uint16_t p3, uint16_t p2,
- uint16_t p1, uint16_t p0, uint16_t q0,
- uint16_t q1, uint16_t q2, uint16_t q3,
- int bd) {
- int8_t mask = 0;
- int16_t thresh16 = (uint16_t)thresh << (bd - 8);
- mask |= (abs(p1 - p0) > thresh16) * -1;
- mask |= (abs(q1 - q0) > thresh16) * -1;
- mask |= (abs(p2 - p0) > thresh16) * -1;
- mask |= (abs(q2 - q0) > thresh16) * -1;
- mask |= (abs(p3 - p0) > thresh16) * -1;
- mask |= (abs(q3 - q0) > thresh16) * -1;
- return ~mask;
-}
-
-// Is there high edge variance internal edge:
-// 11111111_11111111 yes, 00000000_00000000 no ?
-static INLINE int16_t highbd_hev_mask(uint8_t thresh, uint16_t p1, uint16_t p0,
- uint16_t q0, uint16_t q1, int bd) {
- int16_t hev = 0;
- int16_t thresh16 = (uint16_t)thresh << (bd - 8);
- hev |= (abs(p1 - p0) > thresh16) * -1;
- hev |= (abs(q1 - q0) > thresh16) * -1;
- return hev;
-}
-
-static INLINE void highbd_filter4(int8_t mask, uint8_t thresh, uint16_t *op1,
- uint16_t *op0, uint16_t *oq0, uint16_t *oq1,
- int bd) {
- int16_t filter1, filter2;
- // ^0x80 equivalent to subtracting 0x80 from the values to turn them
- // into -128 to +127 instead of 0 to 255.
- int shift = bd - 8;
- const int16_t ps1 = (int16_t)*op1 - (0x80 << shift);
- const int16_t ps0 = (int16_t)*op0 - (0x80 << shift);
- const int16_t qs0 = (int16_t)*oq0 - (0x80 << shift);
- const int16_t qs1 = (int16_t)*oq1 - (0x80 << shift);
- const uint16_t hev = highbd_hev_mask(thresh, *op1, *op0, *oq0, *oq1, bd);
-
- // Add outer taps if we have high edge variance.
- int16_t filter = signed_char_clamp_high(ps1 - qs1, bd) & hev;
-
- // Inner taps.
- filter = signed_char_clamp_high(filter + 3 * (qs0 - ps0), bd) & mask;
-
- // Save bottom 3 bits so that we round one side +4 and the other +3
- // if it equals 4 we'll set to adjust by -1 to account for the fact
- // we'd round 3 the other way.
- filter1 = signed_char_clamp_high(filter + 4, bd) >> 3;
- filter2 = signed_char_clamp_high(filter + 3, bd) >> 3;
-
- *oq0 = signed_char_clamp_high(qs0 - filter1, bd) + (0x80 << shift);
- *op0 = signed_char_clamp_high(ps0 + filter2, bd) + (0x80 << shift);
-
- // Outer tap adjustments.
- filter = ROUND_POWER_OF_TWO(filter1, 1) & ~hev;
-
- *oq1 = signed_char_clamp_high(qs1 - filter, bd) + (0x80 << shift);
- *op1 = signed_char_clamp_high(ps1 + filter, bd) + (0x80 << shift);
-}
-
-void aom_highbd_lpf_horizontal_4_c(uint16_t *s, int p /* pitch */,
- const uint8_t *blimit, const uint8_t *limit,
- const uint8_t *thresh, int bd) {
- int i;
- int count = 4;
-
- // loop filter designed to work using chars so that we can make maximum use
- // of 8 bit simd instructions.
- for (i = 0; i < count; ++i) {
- const uint16_t p1 = s[-2 * p];
- const uint16_t p0 = s[-p];
- const uint16_t q0 = s[0 * p];
- const uint16_t q1 = s[1 * p];
- const int8_t mask =
- highbd_filter_mask2(*limit, *blimit, p1, p0, q0, q1, bd);
- highbd_filter4(mask, *thresh, s - 2 * p, s - 1 * p, s, s + 1 * p, bd);
- ++s;
- }
-}
-
-void aom_highbd_lpf_horizontal_4_dual_c(
- uint16_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0,
- const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1,
- const uint8_t *thresh1, int bd) {
- aom_highbd_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, bd);
- aom_highbd_lpf_horizontal_4_c(s + 4, p, blimit1, limit1, thresh1, bd);
-}
-
-void aom_highbd_lpf_vertical_4_c(uint16_t *s, int pitch, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
- int bd) {
- int i;
- int count = 4;
-
- // loop filter designed to work using chars so that we can make maximum use
- // of 8 bit simd instructions.
- for (i = 0; i < count; ++i) {
- const uint16_t p1 = s[-2], p0 = s[-1];
- const uint16_t q0 = s[0], q1 = s[1];
- const int8_t mask =
- highbd_filter_mask2(*limit, *blimit, p1, p0, q0, q1, bd);
- highbd_filter4(mask, *thresh, s - 2, s - 1, s, s + 1, bd);
- s += pitch;
- }
-}
-
-void aom_highbd_lpf_vertical_4_dual_c(
- uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0,
- const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1,
- const uint8_t *thresh1, int bd) {
- aom_highbd_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0, bd);
- aom_highbd_lpf_vertical_4_c(s + 4 * pitch, pitch, blimit1, limit1, thresh1,
- bd);
-}
-
-static INLINE void highbd_filter6(int8_t mask, uint8_t thresh, int8_t flat,
- uint16_t *op2, uint16_t *op1, uint16_t *op0,
- uint16_t *oq0, uint16_t *oq1, uint16_t *oq2,
- int bd) {
- if (flat && mask) {
- const uint16_t p2 = *op2, p1 = *op1, p0 = *op0;
- const uint16_t q0 = *oq0, q1 = *oq1, q2 = *oq2;
-
- // 5-tap filter [1, 2, 2, 2, 1]
- *op1 = ROUND_POWER_OF_TWO(p2 * 3 + p1 * 2 + p0 * 2 + q0, 3);
- *op0 = ROUND_POWER_OF_TWO(p2 + p1 * 2 + p0 * 2 + q0 * 2 + q1, 3);
- *oq0 = ROUND_POWER_OF_TWO(p1 + p0 * 2 + q0 * 2 + q1 * 2 + q2, 3);
- *oq1 = ROUND_POWER_OF_TWO(p0 + q0 * 2 + q1 * 2 + q2 * 3, 3);
- } else {
- highbd_filter4(mask, thresh, op1, op0, oq0, oq1, bd);
- }
-}
-
-static INLINE void highbd_filter8(int8_t mask, uint8_t thresh, int8_t flat,
- uint16_t *op3, uint16_t *op2, uint16_t *op1,
- uint16_t *op0, uint16_t *oq0, uint16_t *oq1,
- uint16_t *oq2, uint16_t *oq3, int bd) {
- if (flat && mask) {
- const uint16_t p3 = *op3, p2 = *op2, p1 = *op1, p0 = *op0;
- const uint16_t q0 = *oq0, q1 = *oq1, q2 = *oq2, q3 = *oq3;
-
- // 7-tap filter [1, 1, 1, 2, 1, 1, 1]
- *op2 = ROUND_POWER_OF_TWO(p3 + p3 + p3 + 2 * p2 + p1 + p0 + q0, 3);
- *op1 = ROUND_POWER_OF_TWO(p3 + p3 + p2 + 2 * p1 + p0 + q0 + q1, 3);
- *op0 = ROUND_POWER_OF_TWO(p3 + p2 + p1 + 2 * p0 + q0 + q1 + q2, 3);
- *oq0 = ROUND_POWER_OF_TWO(p2 + p1 + p0 + 2 * q0 + q1 + q2 + q3, 3);
- *oq1 = ROUND_POWER_OF_TWO(p1 + p0 + q0 + 2 * q1 + q2 + q3 + q3, 3);
- *oq2 = ROUND_POWER_OF_TWO(p0 + q0 + q1 + 2 * q2 + q3 + q3 + q3, 3);
- } else {
- highbd_filter4(mask, thresh, op1, op0, oq0, oq1, bd);
- }
-}
-
-void aom_highbd_lpf_horizontal_8_c(uint16_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
- int bd) {
- int i;
- int count = 4;
-
- // loop filter designed to work using chars so that we can make maximum use
- // of 8 bit simd instructions.
- for (i = 0; i < count; ++i) {
- const uint16_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p];
- const uint16_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p];
-
- const int8_t mask =
- highbd_filter_mask(*limit, *blimit, p3, p2, p1, p0, q0, q1, q2, q3, bd);
- const int8_t flat =
- highbd_flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3, bd);
- highbd_filter8(mask, *thresh, flat, s - 4 * p, s - 3 * p, s - 2 * p,
- s - 1 * p, s, s + 1 * p, s + 2 * p, s + 3 * p, bd);
- ++s;
- }
-}
-
-void aom_highbd_lpf_horizontal_6_c(uint16_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
- int bd) {
- int i;
- int count = 4;
-
- // loop filter designed to work using chars so that we can make maximum use
- // of 8 bit simd instructions.
- for (i = 0; i < count; ++i) {
- const uint16_t p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p];
- const uint16_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p];
-
- const int8_t mask =
- highbd_filter_mask3_chroma(*limit, *blimit, p2, p1, p0, q0, q1, q2, bd);
- const int8_t flat = highbd_flat_mask3_chroma(1, p2, p1, p0, q0, q1, q2, bd);
- highbd_filter6(mask, *thresh, flat, s - 3 * p, s - 2 * p, s - 1 * p, s,
- s + 1 * p, s + 2 * p, bd);
- ++s;
- }
-}
-
-void aom_highbd_lpf_horizontal_6_dual_c(
- uint16_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0,
- const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1,
- const uint8_t *thresh1, int bd) {
- aom_highbd_lpf_horizontal_6_c(s, p, blimit0, limit0, thresh0, bd);
- aom_highbd_lpf_horizontal_6_c(s + 4, p, blimit1, limit1, thresh1, bd);
-}
-
-void aom_highbd_lpf_horizontal_8_dual_c(
- uint16_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0,
- const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1,
- const uint8_t *thresh1, int bd) {
- aom_highbd_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0, bd);
- aom_highbd_lpf_horizontal_8_c(s + 4, p, blimit1, limit1, thresh1, bd);
-}
-
-void aom_highbd_lpf_vertical_6_c(uint16_t *s, int pitch, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
- int bd) {
- int i;
- int count = 4;
-
- for (i = 0; i < count; ++i) {
- const uint16_t p2 = s[-3], p1 = s[-2], p0 = s[-1];
- const uint16_t q0 = s[0], q1 = s[1], q2 = s[2];
- const int8_t mask =
- highbd_filter_mask3_chroma(*limit, *blimit, p2, p1, p0, q0, q1, q2, bd);
- const int8_t flat = highbd_flat_mask3_chroma(1, p2, p1, p0, q0, q1, q2, bd);
- highbd_filter6(mask, *thresh, flat, s - 3, s - 2, s - 1, s, s + 1, s + 2,
- bd);
- s += pitch;
- }
-}
-
-void aom_highbd_lpf_vertical_6_dual_c(
- uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0,
- const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1,
- const uint8_t *thresh1, int bd) {
- aom_highbd_lpf_vertical_6_c(s, pitch, blimit0, limit0, thresh0, bd);
- aom_highbd_lpf_vertical_6_c(s + 4 * pitch, pitch, blimit1, limit1, thresh1,
- bd);
-}
-
-void aom_highbd_lpf_vertical_8_c(uint16_t *s, int pitch, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
- int bd) {
- int i;
- int count = 4;
-
- for (i = 0; i < count; ++i) {
- const uint16_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
- const uint16_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3];
- const int8_t mask =
- highbd_filter_mask(*limit, *blimit, p3, p2, p1, p0, q0, q1, q2, q3, bd);
- const int8_t flat =
- highbd_flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3, bd);
- highbd_filter8(mask, *thresh, flat, s - 4, s - 3, s - 2, s - 1, s, s + 1,
- s + 2, s + 3, bd);
- s += pitch;
- }
-}
-
-void aom_highbd_lpf_vertical_8_dual_c(
- uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0,
- const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1,
- const uint8_t *thresh1, int bd) {
- aom_highbd_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0, bd);
- aom_highbd_lpf_vertical_8_c(s + 4 * pitch, pitch, blimit1, limit1, thresh1,
- bd);
-}
-
-static INLINE void highbd_filter14(int8_t mask, uint8_t thresh, int8_t flat,
- int8_t flat2, uint16_t *op6, uint16_t *op5,
- uint16_t *op4, uint16_t *op3, uint16_t *op2,
- uint16_t *op1, uint16_t *op0, uint16_t *oq0,
- uint16_t *oq1, uint16_t *oq2, uint16_t *oq3,
- uint16_t *oq4, uint16_t *oq5, uint16_t *oq6,
- int bd) {
- if (flat2 && flat && mask) {
- const uint16_t p6 = *op6;
- const uint16_t p5 = *op5;
- const uint16_t p4 = *op4;
- const uint16_t p3 = *op3;
- const uint16_t p2 = *op2;
- const uint16_t p1 = *op1;
- const uint16_t p0 = *op0;
- const uint16_t q0 = *oq0;
- const uint16_t q1 = *oq1;
- const uint16_t q2 = *oq2;
- const uint16_t q3 = *oq3;
- const uint16_t q4 = *oq4;
- const uint16_t q5 = *oq5;
- const uint16_t q6 = *oq6;
-
- // 13-tap filter [1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1]
- *op5 = ROUND_POWER_OF_TWO(p6 * 7 + p5 * 2 + p4 * 2 + p3 + p2 + p1 + p0 + q0,
- 4);
- *op4 = ROUND_POWER_OF_TWO(
- p6 * 5 + p5 * 2 + p4 * 2 + p3 * 2 + p2 + p1 + p0 + q0 + q1, 4);
- *op3 = ROUND_POWER_OF_TWO(
- p6 * 4 + p5 + p4 * 2 + p3 * 2 + p2 * 2 + p1 + p0 + q0 + q1 + q2, 4);
- *op2 = ROUND_POWER_OF_TWO(
- p6 * 3 + p5 + p4 + p3 * 2 + p2 * 2 + p1 * 2 + p0 + q0 + q1 + q2 + q3,
- 4);
- *op1 = ROUND_POWER_OF_TWO(p6 * 2 + p5 + p4 + p3 + p2 * 2 + p1 * 2 + p0 * 2 +
- q0 + q1 + q2 + q3 + q4,
- 4);
- *op0 = ROUND_POWER_OF_TWO(p6 + p5 + p4 + p3 + p2 + p1 * 2 + p0 * 2 +
- q0 * 2 + q1 + q2 + q3 + q4 + q5,
- 4);
- *oq0 = ROUND_POWER_OF_TWO(p5 + p4 + p3 + p2 + p1 + p0 * 2 + q0 * 2 +
- q1 * 2 + q2 + q3 + q4 + q5 + q6,
- 4);
- *oq1 = ROUND_POWER_OF_TWO(p4 + p3 + p2 + p1 + p0 + q0 * 2 + q1 * 2 +
- q2 * 2 + q3 + q4 + q5 + q6 * 2,
- 4);
- *oq2 = ROUND_POWER_OF_TWO(
- p3 + p2 + p1 + p0 + q0 + q1 * 2 + q2 * 2 + q3 * 2 + q4 + q5 + q6 * 3,
- 4);
- *oq3 = ROUND_POWER_OF_TWO(
- p2 + p1 + p0 + q0 + q1 + q2 * 2 + q3 * 2 + q4 * 2 + q5 + q6 * 4, 4);
- *oq4 = ROUND_POWER_OF_TWO(
- p1 + p0 + q0 + q1 + q2 + q3 * 2 + q4 * 2 + q5 * 2 + q6 * 5, 4);
- *oq5 = ROUND_POWER_OF_TWO(p0 + q0 + q1 + q2 + q3 + q4 * 2 + q5 * 2 + q6 * 7,
- 4);
- } else {
- highbd_filter8(mask, thresh, flat, op3, op2, op1, op0, oq0, oq1, oq2, oq3,
- bd);
- }
-}
-
-static void highbd_mb_lpf_horizontal_edge_w(uint16_t *s, int p,
- const uint8_t *blimit,
- const uint8_t *limit,
- const uint8_t *thresh, int count,
- int bd) {
- int i;
- int step = 4;
-
- // loop filter designed to work using chars so that we can make maximum use
- // of 8 bit simd instructions.
- for (i = 0; i < step * count; ++i) {
- const uint16_t p3 = s[-4 * p];
- const uint16_t p2 = s[-3 * p];
- const uint16_t p1 = s[-2 * p];
- const uint16_t p0 = s[-p];
- const uint16_t q0 = s[0 * p];
- const uint16_t q1 = s[1 * p];
- const uint16_t q2 = s[2 * p];
- const uint16_t q3 = s[3 * p];
- const int8_t mask =
- highbd_filter_mask(*limit, *blimit, p3, p2, p1, p0, q0, q1, q2, q3, bd);
- const int8_t flat =
- highbd_flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3, bd);
-
- const int8_t flat2 =
- highbd_flat_mask4(1, s[-7 * p], s[-6 * p], s[-5 * p], p0, q0, s[4 * p],
- s[5 * p], s[6 * p], bd);
-
- highbd_filter14(mask, *thresh, flat, flat2, s - 7 * p, s - 6 * p, s - 5 * p,
- s - 4 * p, s - 3 * p, s - 2 * p, s - 1 * p, s, s + 1 * p,
- s + 2 * p, s + 3 * p, s + 4 * p, s + 5 * p, s + 6 * p, bd);
- ++s;
- }
-}
-
-void aom_highbd_lpf_horizontal_14_c(uint16_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
- int bd) {
- highbd_mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 1, bd);
-}
-
-void aom_highbd_lpf_horizontal_14_dual_c(
- uint16_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0,
- const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1,
- const uint8_t *thresh1, int bd) {
- highbd_mb_lpf_horizontal_edge_w(s, p, blimit0, limit0, thresh0, 1, bd);
- highbd_mb_lpf_horizontal_edge_w(s + 4, p, blimit1, limit1, thresh1, 1, bd);
-}
-
-static void highbd_mb_lpf_vertical_edge_w(uint16_t *s, int p,
- const uint8_t *blimit,
- const uint8_t *limit,
- const uint8_t *thresh, int count,
- int bd) {
- int i;
-
- for (i = 0; i < count; ++i) {
- const uint16_t p3 = s[-4];
- const uint16_t p2 = s[-3];
- const uint16_t p1 = s[-2];
- const uint16_t p0 = s[-1];
- const uint16_t q0 = s[0];
- const uint16_t q1 = s[1];
- const uint16_t q2 = s[2];
- const uint16_t q3 = s[3];
- const int8_t mask =
- highbd_filter_mask(*limit, *blimit, p3, p2, p1, p0, q0, q1, q2, q3, bd);
- const int8_t flat =
- highbd_flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3, bd);
- const int8_t flat2 =
- highbd_flat_mask4(1, s[-7], s[-6], s[-5], p0, q0, s[4], s[5], s[6], bd);
-
- highbd_filter14(mask, *thresh, flat, flat2, s - 7, s - 6, s - 5, s - 4,
- s - 3, s - 2, s - 1, s, s + 1, s + 2, s + 3, s + 4, s + 5,
- s + 6, bd);
- s += p;
- }
-}
-
-void aom_highbd_lpf_vertical_14_c(uint16_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
- int bd) {
- highbd_mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 4, bd);
-}
-
-void aom_highbd_lpf_vertical_14_dual_c(
- uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0,
- const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1,
- const uint8_t *thresh1, int bd) {
- highbd_mb_lpf_vertical_edge_w(s, pitch, blimit0, limit0, thresh0, 4, bd);
- highbd_mb_lpf_vertical_edge_w(s + 4 * pitch, pitch, blimit1, limit1, thresh1,
- 4, bd);
-}
diff --git a/third_party/aom/aom_dsp/mips/add_noise_msa.c b/third_party/aom/aom_dsp/mips/add_noise_msa.c
deleted file mode 100644
index 96d04cff0..000000000
--- a/third_party/aom/aom_dsp/mips/add_noise_msa.c
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <stdlib.h>
-
-#include "aom_dsp/mips/macros_msa.h"
-
-void aom_plane_add_noise_msa(uint8_t *start_ptr, char *noise,
- char blackclamp[16], char whiteclamp[16],
- char bothclamp[16], uint32_t width,
- uint32_t height, int32_t pitch) {
- uint32_t i, j;
-
- for (i = 0; i < height / 2; ++i) {
- uint8_t *pos0_ptr = start_ptr + (2 * i) * pitch;
- int8_t *ref0_ptr = (int8_t *)(noise + (rand() & 0xff));
- uint8_t *pos1_ptr = start_ptr + (2 * i + 1) * pitch;
- int8_t *ref1_ptr = (int8_t *)(noise + (rand() & 0xff));
- for (j = width / 16; j--;) {
- v16i8 temp00_s, temp01_s;
- v16u8 temp00, temp01, black_clamp, white_clamp;
- v16u8 pos0, ref0, pos1, ref1;
- v16i8 const127 = __msa_ldi_b(127);
-
- pos0 = LD_UB(pos0_ptr);
- ref0 = LD_UB(ref0_ptr);
- pos1 = LD_UB(pos1_ptr);
- ref1 = LD_UB(ref1_ptr);
- black_clamp = (v16u8)__msa_fill_b(blackclamp[0]);
- white_clamp = (v16u8)__msa_fill_b(whiteclamp[0]);
- temp00 = (pos0 < black_clamp);
- pos0 = __msa_bmnz_v(pos0, black_clamp, temp00);
- temp01 = (pos1 < black_clamp);
- pos1 = __msa_bmnz_v(pos1, black_clamp, temp01);
- XORI_B2_128_UB(pos0, pos1);
- temp00_s = __msa_adds_s_b((v16i8)white_clamp, const127);
- temp00 = (v16u8)(temp00_s < pos0);
- pos0 = (v16u8)__msa_bmnz_v((v16u8)pos0, (v16u8)temp00_s, temp00);
- temp01_s = __msa_adds_s_b((v16i8)white_clamp, const127);
- temp01 = (temp01_s < pos1);
- pos1 = (v16u8)__msa_bmnz_v((v16u8)pos1, (v16u8)temp01_s, temp01);
- XORI_B2_128_UB(pos0, pos1);
- pos0 += ref0;
- ST_UB(pos0, pos0_ptr);
- pos1 += ref1;
- ST_UB(pos1, pos1_ptr);
- pos0_ptr += 16;
- pos1_ptr += 16;
- ref0_ptr += 16;
- ref1_ptr += 16;
- }
- }
-}
diff --git a/third_party/aom/aom_dsp/mips/aom_convolve8_horiz_msa.c b/third_party/aom/aom_dsp/mips/aom_convolve8_horiz_msa.c
deleted file mode 100644
index 363fad308..000000000
--- a/third_party/aom/aom_dsp/mips/aom_convolve8_horiz_msa.c
+++ /dev/null
@@ -1,694 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/mips/aom_convolve_msa.h"
-
-static void common_hz_8t_4x4_msa(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride,
- int8_t *filter) {
- v16u8 mask0, mask1, mask2, mask3, out;
- v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
- v8i16 filt, out0, out1;
-
- mask0 = LD_UB(&mc_filt_mask_arr[16]);
- src -= 3;
-
- /* rearranging filter */
- filt = LD_SH(filter);
- SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
-
- mask1 = mask0 + 2;
- mask2 = mask0 + 4;
- mask3 = mask0 + 6;
-
- LD_SB4(src, src_stride, src0, src1, src2, src3);
- XORI_B4_128_SB(src0, src1, src2, src3);
- HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2, mask3,
- filt0, filt1, filt2, filt3, out0, out1);
- SRARI_H2_SH(out0, out1, FILTER_BITS);
- SAT_SH2_SH(out0, out1, 7);
- out = PCKEV_XORI128_UB(out0, out1);
- ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
-}
-
-static void common_hz_8t_4x8_msa(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride,
- int8_t *filter) {
- v16i8 filt0, filt1, filt2, filt3;
- v16i8 src0, src1, src2, src3;
- v16u8 mask0, mask1, mask2, mask3, out;
- v8i16 filt, out0, out1, out2, out3;
-
- mask0 = LD_UB(&mc_filt_mask_arr[16]);
- src -= 3;
-
- /* rearranging filter */
- filt = LD_SH(filter);
- SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
-
- mask1 = mask0 + 2;
- mask2 = mask0 + 4;
- mask3 = mask0 + 6;
-
- LD_SB4(src, src_stride, src0, src1, src2, src3);
- XORI_B4_128_SB(src0, src1, src2, src3);
- src += (4 * src_stride);
- HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2, mask3,
- filt0, filt1, filt2, filt3, out0, out1);
- LD_SB4(src, src_stride, src0, src1, src2, src3);
- XORI_B4_128_SB(src0, src1, src2, src3);
- HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2, mask3,
- filt0, filt1, filt2, filt3, out2, out3);
- SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS);
- SAT_SH4_SH(out0, out1, out2, out3, 7);
- out = PCKEV_XORI128_UB(out0, out1);
- ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
- dst += (4 * dst_stride);
- out = PCKEV_XORI128_UB(out2, out3);
- ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
-}
-
-static void common_hz_8t_4w_msa(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride,
- int8_t *filter, int32_t height) {
- if (4 == height) {
- common_hz_8t_4x4_msa(src, src_stride, dst, dst_stride, filter);
- } else if (8 == height) {
- common_hz_8t_4x8_msa(src, src_stride, dst, dst_stride, filter);
- }
-}
-
-static void common_hz_8t_8x4_msa(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride,
- int8_t *filter) {
- v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
- v16u8 mask0, mask1, mask2, mask3, tmp0, tmp1;
- v8i16 filt, out0, out1, out2, out3;
-
- mask0 = LD_UB(&mc_filt_mask_arr[0]);
- src -= 3;
-
- /* rearranging filter */
- filt = LD_SH(filter);
- SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
-
- mask1 = mask0 + 2;
- mask2 = mask0 + 4;
- mask3 = mask0 + 6;
-
- LD_SB4(src, src_stride, src0, src1, src2, src3);
- XORI_B4_128_SB(src0, src1, src2, src3);
- HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2, mask3,
- filt0, filt1, filt2, filt3, out0, out1, out2,
- out3);
- SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS);
- SAT_SH4_SH(out0, out1, out2, out3, 7);
- tmp0 = PCKEV_XORI128_UB(out0, out1);
- tmp1 = PCKEV_XORI128_UB(out2, out3);
- ST8x4_UB(tmp0, tmp1, dst, dst_stride);
-}
-
-static void common_hz_8t_8x8mult_msa(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride,
- int8_t *filter, int32_t height) {
- uint32_t loop_cnt;
- v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
- v16u8 mask0, mask1, mask2, mask3, tmp0, tmp1;
- v8i16 filt, out0, out1, out2, out3;
-
- mask0 = LD_UB(&mc_filt_mask_arr[0]);
- src -= 3;
-
- /* rearranging filter */
- filt = LD_SH(filter);
- SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
-
- mask1 = mask0 + 2;
- mask2 = mask0 + 4;
- mask3 = mask0 + 6;
-
- for (loop_cnt = (height >> 2); loop_cnt--;) {
- LD_SB4(src, src_stride, src0, src1, src2, src3);
- XORI_B4_128_SB(src0, src1, src2, src3);
- src += (4 * src_stride);
- HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
- mask3, filt0, filt1, filt2, filt3, out0, out1,
- out2, out3);
- SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS);
- SAT_SH4_SH(out0, out1, out2, out3, 7);
- tmp0 = PCKEV_XORI128_UB(out0, out1);
- tmp1 = PCKEV_XORI128_UB(out2, out3);
- ST8x4_UB(tmp0, tmp1, dst, dst_stride);
- dst += (4 * dst_stride);
- }
-}
-
-static void common_hz_8t_8w_msa(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride,
- int8_t *filter, int32_t height) {
- if (4 == height) {
- common_hz_8t_8x4_msa(src, src_stride, dst, dst_stride, filter);
- } else {
- common_hz_8t_8x8mult_msa(src, src_stride, dst, dst_stride, filter, height);
- }
-}
-
-static void common_hz_8t_16w_msa(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride,
- int8_t *filter, int32_t height) {
- uint32_t loop_cnt;
- v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
- v16u8 mask0, mask1, mask2, mask3, out;
- v8i16 filt, out0, out1, out2, out3;
-
- mask0 = LD_UB(&mc_filt_mask_arr[0]);
- src -= 3;
-
- /* rearranging filter */
- filt = LD_SH(filter);
- SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
-
- mask1 = mask0 + 2;
- mask2 = mask0 + 4;
- mask3 = mask0 + 6;
-
- for (loop_cnt = (height >> 1); loop_cnt--;) {
- LD_SB2(src, src_stride, src0, src2);
- LD_SB2(src + 8, src_stride, src1, src3);
- XORI_B4_128_SB(src0, src1, src2, src3);
- src += (2 * src_stride);
- HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
- mask3, filt0, filt1, filt2, filt3, out0, out1,
- out2, out3);
- SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS);
- SAT_SH4_SH(out0, out1, out2, out3, 7);
- out = PCKEV_XORI128_UB(out0, out1);
- ST_UB(out, dst);
- dst += dst_stride;
- out = PCKEV_XORI128_UB(out2, out3);
- ST_UB(out, dst);
- dst += dst_stride;
- }
-}
-
-static void common_hz_8t_32w_msa(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride,
- int8_t *filter, int32_t height) {
- uint32_t loop_cnt;
- v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
- v16u8 mask0, mask1, mask2, mask3, out;
- v8i16 filt, out0, out1, out2, out3;
-
- mask0 = LD_UB(&mc_filt_mask_arr[0]);
- src -= 3;
-
- /* rearranging filter */
- filt = LD_SH(filter);
- SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
-
- mask1 = mask0 + 2;
- mask2 = mask0 + 4;
- mask3 = mask0 + 6;
-
- for (loop_cnt = (height >> 1); loop_cnt--;) {
- src0 = LD_SB(src);
- src2 = LD_SB(src + 16);
- src3 = LD_SB(src + 24);
- src1 = __msa_sldi_b(src2, src0, 8);
- src += src_stride;
- XORI_B4_128_SB(src0, src1, src2, src3);
- HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
- mask3, filt0, filt1, filt2, filt3, out0, out1,
- out2, out3);
- SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS);
- SAT_SH4_SH(out0, out1, out2, out3, 7);
-
- src0 = LD_SB(src);
- src2 = LD_SB(src + 16);
- src3 = LD_SB(src + 24);
- src1 = __msa_sldi_b(src2, src0, 8);
- src += src_stride;
-
- out = PCKEV_XORI128_UB(out0, out1);
- ST_UB(out, dst);
- out = PCKEV_XORI128_UB(out2, out3);
- ST_UB(out, dst + 16);
- dst += dst_stride;
-
- XORI_B4_128_SB(src0, src1, src2, src3);
- HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
- mask3, filt0, filt1, filt2, filt3, out0, out1,
- out2, out3);
- SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS);
- SAT_SH4_SH(out0, out1, out2, out3, 7);
- out = PCKEV_XORI128_UB(out0, out1);
- ST_UB(out, dst);
- out = PCKEV_XORI128_UB(out2, out3);
- ST_UB(out, dst + 16);
- dst += dst_stride;
- }
-}
-
-static void common_hz_8t_64w_msa(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride,
- int8_t *filter, int32_t height) {
- int32_t loop_cnt;
- v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
- v16u8 mask0, mask1, mask2, mask3, out;
- v8i16 filt, out0, out1, out2, out3;
-
- mask0 = LD_UB(&mc_filt_mask_arr[0]);
- src -= 3;
-
- /* rearranging filter */
- filt = LD_SH(filter);
- SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
-
- mask1 = mask0 + 2;
- mask2 = mask0 + 4;
- mask3 = mask0 + 6;
-
- for (loop_cnt = height; loop_cnt--;) {
- src0 = LD_SB(src);
- src2 = LD_SB(src + 16);
- src3 = LD_SB(src + 24);
- src1 = __msa_sldi_b(src2, src0, 8);
-
- XORI_B4_128_SB(src0, src1, src2, src3);
- HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
- mask3, filt0, filt1, filt2, filt3, out0, out1,
- out2, out3);
- SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS);
- SAT_SH4_SH(out0, out1, out2, out3, 7);
- out = PCKEV_XORI128_UB(out0, out1);
- ST_UB(out, dst);
- out = PCKEV_XORI128_UB(out2, out3);
- ST_UB(out, dst + 16);
-
- src0 = LD_SB(src + 32);
- src2 = LD_SB(src + 48);
- src3 = LD_SB(src + 56);
- src1 = __msa_sldi_b(src2, src0, 8);
- src += src_stride;
-
- XORI_B4_128_SB(src0, src1, src2, src3);
- HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
- mask3, filt0, filt1, filt2, filt3, out0, out1,
- out2, out3);
- SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS);
- SAT_SH4_SH(out0, out1, out2, out3, 7);
- out = PCKEV_XORI128_UB(out0, out1);
- ST_UB(out, dst + 32);
- out = PCKEV_XORI128_UB(out2, out3);
- ST_UB(out, dst + 48);
- dst += dst_stride;
- }
-}
-
-static void common_hz_2t_4x4_msa(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride,
- int8_t *filter) {
- v16i8 src0, src1, src2, src3, mask;
- v16u8 filt0, vec0, vec1, res0, res1;
- v8u16 vec2, vec3, filt;
-
- mask = LD_SB(&mc_filt_mask_arr[16]);
-
- /* rearranging filter */
- filt = LD_UH(filter);
- filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
-
- LD_SB4(src, src_stride, src0, src1, src2, src3);
- VSHF_B2_UB(src0, src1, src2, src3, mask, mask, vec0, vec1);
- DOTP_UB2_UH(vec0, vec1, filt0, filt0, vec2, vec3);
- SRARI_H2_UH(vec2, vec3, FILTER_BITS);
- PCKEV_B2_UB(vec2, vec2, vec3, vec3, res0, res1);
- ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
-}
-
-static void common_hz_2t_4x8_msa(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride,
- int8_t *filter) {
- v16u8 vec0, vec1, vec2, vec3, filt0;
- v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
- v16i8 res0, res1, res2, res3;
- v8u16 vec4, vec5, vec6, vec7, filt;
-
- mask = LD_SB(&mc_filt_mask_arr[16]);
-
- /* rearranging filter */
- filt = LD_UH(filter);
- filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
-
- LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
- VSHF_B2_UB(src0, src1, src2, src3, mask, mask, vec0, vec1);
- VSHF_B2_UB(src4, src5, src6, src7, mask, mask, vec2, vec3);
- DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec4, vec5,
- vec6, vec7);
- SRARI_H4_UH(vec4, vec5, vec6, vec7, FILTER_BITS);
- PCKEV_B4_SB(vec4, vec4, vec5, vec5, vec6, vec6, vec7, vec7, res0, res1, res2,
- res3);
- ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
- dst += (4 * dst_stride);
- ST4x4_UB(res2, res3, 0, 1, 0, 1, dst, dst_stride);
-}
-
-static void common_hz_2t_4w_msa(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride,
- int8_t *filter, int32_t height) {
- if (4 == height) {
- common_hz_2t_4x4_msa(src, src_stride, dst, dst_stride, filter);
- } else if (8 == height) {
- common_hz_2t_4x8_msa(src, src_stride, dst, dst_stride, filter);
- }
-}
-
-static void common_hz_2t_8x4_msa(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride,
- int8_t *filter) {
- v16u8 filt0;
- v16i8 src0, src1, src2, src3, mask;
- v8u16 vec0, vec1, vec2, vec3, filt;
-
- mask = LD_SB(&mc_filt_mask_arr[0]);
-
- /* rearranging filter */
- filt = LD_UH(filter);
- filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
-
- LD_SB4(src, src_stride, src0, src1, src2, src3);
- VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
- VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
- DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1,
- vec2, vec3);
- SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS);
- PCKEV_B2_SB(vec1, vec0, vec3, vec2, src0, src1);
- ST8x4_UB(src0, src1, dst, dst_stride);
-}
-
-static void common_hz_2t_8x8mult_msa(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride,
- int8_t *filter, int32_t height) {
- v16u8 filt0;
- v16i8 src0, src1, src2, src3, mask, out0, out1;
- v8u16 vec0, vec1, vec2, vec3, filt;
-
- mask = LD_SB(&mc_filt_mask_arr[0]);
-
- /* rearranging filter */
- filt = LD_UH(filter);
- filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
-
- LD_SB4(src, src_stride, src0, src1, src2, src3);
- src += (4 * src_stride);
-
- VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
- VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
- DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1,
- vec2, vec3);
- SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS);
-
- LD_SB4(src, src_stride, src0, src1, src2, src3);
- src += (4 * src_stride);
-
- PCKEV_B2_SB(vec1, vec0, vec3, vec2, out0, out1);
- ST8x4_UB(out0, out1, dst, dst_stride);
- dst += (4 * dst_stride);
-
- VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
- VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
- DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1,
- vec2, vec3);
- SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS);
- PCKEV_B2_SB(vec1, vec0, vec3, vec2, out0, out1);
- ST8x4_UB(out0, out1, dst, dst_stride);
- dst += (4 * dst_stride);
-
- if (16 == height) {
- LD_SB4(src, src_stride, src0, src1, src2, src3);
- src += (4 * src_stride);
-
- VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
- VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
- DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1,
- vec2, vec3);
- SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS);
- LD_SB4(src, src_stride, src0, src1, src2, src3);
- src += (4 * src_stride);
-
- PCKEV_B2_SB(vec1, vec0, vec3, vec2, out0, out1);
- ST8x4_UB(out0, out1, dst, dst_stride);
-
- VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
- VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
- DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1,
- vec2, vec3);
- SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS);
- PCKEV_B2_SB(vec1, vec0, vec3, vec2, out0, out1);
- ST8x4_UB(out0, out1, dst + 4 * dst_stride, dst_stride);
- }
-}
-
-static void common_hz_2t_8w_msa(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride,
- int8_t *filter, int32_t height) {
- if (4 == height) {
- common_hz_2t_8x4_msa(src, src_stride, dst, dst_stride, filter);
- } else {
- common_hz_2t_8x8mult_msa(src, src_stride, dst, dst_stride, filter, height);
- }
-}
-
-static void common_hz_2t_16w_msa(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride,
- int8_t *filter, int32_t height) {
- uint32_t loop_cnt;
- v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
- v16u8 filt0, vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
- v8u16 out0, out1, out2, out3, out4, out5, out6, out7, filt;
-
- mask = LD_SB(&mc_filt_mask_arr[0]);
-
- loop_cnt = (height >> 2) - 1;
-
- /* rearranging filter */
- filt = LD_UH(filter);
- filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
-
- LD_SB4(src, src_stride, src0, src2, src4, src6);
- LD_SB4(src + 8, src_stride, src1, src3, src5, src7);
- src += (4 * src_stride);
-
- VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1);
- VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3);
- VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5);
- VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7);
- DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, out0, out1,
- out2, out3);
- DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, out4, out5,
- out6, out7);
- SRARI_H4_UH(out0, out1, out2, out3, FILTER_BITS);
- SRARI_H4_UH(out4, out5, out6, out7, FILTER_BITS);
- PCKEV_ST_SB(out0, out1, dst);
- dst += dst_stride;
- PCKEV_ST_SB(out2, out3, dst);
- dst += dst_stride;
- PCKEV_ST_SB(out4, out5, dst);
- dst += dst_stride;
- PCKEV_ST_SB(out6, out7, dst);
- dst += dst_stride;
-
- for (; loop_cnt--;) {
- LD_SB4(src, src_stride, src0, src2, src4, src6);
- LD_SB4(src + 8, src_stride, src1, src3, src5, src7);
- src += (4 * src_stride);
-
- VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1);
- VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3);
- VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5);
- VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7);
- DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, out0, out1,
- out2, out3);
- DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, out4, out5,
- out6, out7);
- SRARI_H4_UH(out0, out1, out2, out3, FILTER_BITS);
- SRARI_H4_UH(out4, out5, out6, out7, FILTER_BITS);
- PCKEV_ST_SB(out0, out1, dst);
- dst += dst_stride;
- PCKEV_ST_SB(out2, out3, dst);
- dst += dst_stride;
- PCKEV_ST_SB(out4, out5, dst);
- dst += dst_stride;
- PCKEV_ST_SB(out6, out7, dst);
- dst += dst_stride;
- }
-}
-
-static void common_hz_2t_32w_msa(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride,
- int8_t *filter, int32_t height) {
- uint32_t loop_cnt;
- v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
- v16u8 filt0, vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
- v8u16 out0, out1, out2, out3, out4, out5, out6, out7, filt;
-
- mask = LD_SB(&mc_filt_mask_arr[0]);
-
- /* rearranging filter */
- filt = LD_UH(filter);
- filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
-
- for (loop_cnt = height >> 1; loop_cnt--;) {
- src0 = LD_SB(src);
- src2 = LD_SB(src + 16);
- src3 = LD_SB(src + 24);
- src1 = __msa_sldi_b(src2, src0, 8);
- src += src_stride;
- src4 = LD_SB(src);
- src6 = LD_SB(src + 16);
- src7 = LD_SB(src + 24);
- src5 = __msa_sldi_b(src6, src4, 8);
- src += src_stride;
-
- VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1);
- VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3);
- VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5);
- VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7);
- DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, out0, out1,
- out2, out3);
- DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, out4, out5,
- out6, out7);
- SRARI_H4_UH(out0, out1, out2, out3, FILTER_BITS);
- SRARI_H4_UH(out4, out5, out6, out7, FILTER_BITS);
- PCKEV_ST_SB(out0, out1, dst);
- PCKEV_ST_SB(out2, out3, dst + 16);
- dst += dst_stride;
- PCKEV_ST_SB(out4, out5, dst);
- PCKEV_ST_SB(out6, out7, dst + 16);
- dst += dst_stride;
- }
-}
-
-static void common_hz_2t_64w_msa(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride,
- int8_t *filter, int32_t height) {
- uint32_t loop_cnt;
- v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
- v16u8 filt0, vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
- v8u16 out0, out1, out2, out3, out4, out5, out6, out7, filt;
-
- mask = LD_SB(&mc_filt_mask_arr[0]);
-
- /* rearranging filter */
- filt = LD_UH(filter);
- filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
-
- for (loop_cnt = height; loop_cnt--;) {
- src0 = LD_SB(src);
- src2 = LD_SB(src + 16);
- src4 = LD_SB(src + 32);
- src6 = LD_SB(src + 48);
- src7 = LD_SB(src + 56);
- SLDI_B3_SB(src2, src4, src6, src0, src2, src4, src1, src3, src5, 8);
- src += src_stride;
-
- VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1);
- VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3);
- VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5);
- VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7);
- DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, out0, out1,
- out2, out3);
- DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, out4, out5,
- out6, out7);
- SRARI_H4_UH(out0, out1, out2, out3, FILTER_BITS);
- SRARI_H4_UH(out4, out5, out6, out7, FILTER_BITS);
- PCKEV_ST_SB(out0, out1, dst);
- PCKEV_ST_SB(out2, out3, dst + 16);
- PCKEV_ST_SB(out4, out5, dst + 32);
- PCKEV_ST_SB(out6, out7, dst + 48);
- dst += dst_stride;
- }
-}
-
-void aom_convolve8_horiz_msa(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4, int w,
- int h) {
- int8_t cnt, filt_hor[8];
-
- assert(x_step_q4 == 16);
- assert(((const int32_t *)filter_x)[1] != 0x800000);
-
- for (cnt = 0; cnt < 8; ++cnt) {
- filt_hor[cnt] = filter_x[cnt];
- }
-
- if (((const int32_t *)filter_x)[0] == 0) {
- switch (w) {
- case 4:
- common_hz_2t_4w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
- &filt_hor[3], h);
- break;
- case 8:
- common_hz_2t_8w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
- &filt_hor[3], h);
- break;
- case 16:
- common_hz_2t_16w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
- &filt_hor[3], h);
- break;
- case 32:
- common_hz_2t_32w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
- &filt_hor[3], h);
- break;
- case 64:
- common_hz_2t_64w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
- &filt_hor[3], h);
- break;
- default:
- aom_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x,
- x_step_q4, filter_y, y_step_q4, w, h);
- break;
- }
- } else {
- switch (w) {
- case 4:
- common_hz_8t_4w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
- filt_hor, h);
- break;
- case 8:
- common_hz_8t_8w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
- filt_hor, h);
- break;
- case 16:
- common_hz_8t_16w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
- filt_hor, h);
- break;
- case 32:
- common_hz_8t_32w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
- filt_hor, h);
- break;
- case 64:
- common_hz_8t_64w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
- filt_hor, h);
- break;
- default:
- aom_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x,
- x_step_q4, filter_y, y_step_q4, w, h);
- break;
- }
- }
-}
diff --git a/third_party/aom/aom_dsp/mips/aom_convolve8_vert_msa.c b/third_party/aom/aom_dsp/mips/aom_convolve8_vert_msa.c
deleted file mode 100644
index aa962b41f..000000000
--- a/third_party/aom/aom_dsp/mips/aom_convolve8_vert_msa.c
+++ /dev/null
@@ -1,701 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/mips/aom_convolve_msa.h"
-
-static void common_vt_8t_4w_msa(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride,
- int8_t *filter, int32_t height) {
- uint32_t loop_cnt;
- v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
- v16i8 src10_r, src32_r, src54_r, src76_r, src98_r, src21_r, src43_r;
- v16i8 src65_r, src87_r, src109_r, src2110, src4332, src6554, src8776;
- v16i8 src10998, filt0, filt1, filt2, filt3;
- v16u8 out;
- v8i16 filt, out10, out32;
-
- src -= (3 * src_stride);
-
- filt = LD_SH(filter);
- SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
-
- LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
- src += (7 * src_stride);
-
- ILVR_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_r, src32_r,
- src54_r, src21_r);
- ILVR_B2_SB(src4, src3, src6, src5, src43_r, src65_r);
- ILVR_D3_SB(src21_r, src10_r, src43_r, src32_r, src65_r, src54_r, src2110,
- src4332, src6554);
- XORI_B3_128_SB(src2110, src4332, src6554);
-
- for (loop_cnt = (height >> 2); loop_cnt--;) {
- LD_SB4(src, src_stride, src7, src8, src9, src10);
- src += (4 * src_stride);
-
- ILVR_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_r,
- src87_r, src98_r, src109_r);
- ILVR_D2_SB(src87_r, src76_r, src109_r, src98_r, src8776, src10998);
- XORI_B2_128_SB(src8776, src10998);
- out10 = FILT_8TAP_DPADD_S_H(src2110, src4332, src6554, src8776, filt0,
- filt1, filt2, filt3);
- out32 = FILT_8TAP_DPADD_S_H(src4332, src6554, src8776, src10998, filt0,
- filt1, filt2, filt3);
- SRARI_H2_SH(out10, out32, FILTER_BITS);
- SAT_SH2_SH(out10, out32, 7);
- out = PCKEV_XORI128_UB(out10, out32);
- ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
- dst += (4 * dst_stride);
-
- src2110 = src6554;
- src4332 = src8776;
- src6554 = src10998;
- src6 = src10;
- }
-}
-
-static void common_vt_8t_8w_msa(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride,
- int8_t *filter, int32_t height) {
- uint32_t loop_cnt;
- v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
- v16i8 src10_r, src32_r, src54_r, src76_r, src98_r, src21_r, src43_r;
- v16i8 src65_r, src87_r, src109_r, filt0, filt1, filt2, filt3;
- v16u8 tmp0, tmp1;
- v8i16 filt, out0_r, out1_r, out2_r, out3_r;
-
- src -= (3 * src_stride);
-
- filt = LD_SH(filter);
- SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
-
- LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
- XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
- src += (7 * src_stride);
- ILVR_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_r, src32_r,
- src54_r, src21_r);
- ILVR_B2_SB(src4, src3, src6, src5, src43_r, src65_r);
-
- for (loop_cnt = (height >> 2); loop_cnt--;) {
- LD_SB4(src, src_stride, src7, src8, src9, src10);
- XORI_B4_128_SB(src7, src8, src9, src10);
- src += (4 * src_stride);
-
- ILVR_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_r,
- src87_r, src98_r, src109_r);
- out0_r = FILT_8TAP_DPADD_S_H(src10_r, src32_r, src54_r, src76_r, filt0,
- filt1, filt2, filt3);
- out1_r = FILT_8TAP_DPADD_S_H(src21_r, src43_r, src65_r, src87_r, filt0,
- filt1, filt2, filt3);
- out2_r = FILT_8TAP_DPADD_S_H(src32_r, src54_r, src76_r, src98_r, filt0,
- filt1, filt2, filt3);
- out3_r = FILT_8TAP_DPADD_S_H(src43_r, src65_r, src87_r, src109_r, filt0,
- filt1, filt2, filt3);
- SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, FILTER_BITS);
- SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
- tmp0 = PCKEV_XORI128_UB(out0_r, out1_r);
- tmp1 = PCKEV_XORI128_UB(out2_r, out3_r);
- ST8x4_UB(tmp0, tmp1, dst, dst_stride);
- dst += (4 * dst_stride);
-
- src10_r = src54_r;
- src32_r = src76_r;
- src54_r = src98_r;
- src21_r = src65_r;
- src43_r = src87_r;
- src65_r = src109_r;
- src6 = src10;
- }
-}
-
-static void common_vt_8t_16w_msa(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride,
- int8_t *filter, int32_t height) {
- uint32_t loop_cnt;
- v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
- v16i8 filt0, filt1, filt2, filt3;
- v16i8 src10_r, src32_r, src54_r, src76_r, src98_r, src21_r, src43_r;
- v16i8 src65_r, src87_r, src109_r, src10_l, src32_l, src54_l, src76_l;
- v16i8 src98_l, src21_l, src43_l, src65_l, src87_l, src109_l;
- v16u8 tmp0, tmp1, tmp2, tmp3;
- v8i16 filt, out0_r, out1_r, out2_r, out3_r, out0_l, out1_l, out2_l, out3_l;
-
- src -= (3 * src_stride);
-
- filt = LD_SH(filter);
- SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
-
- LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
- XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
- src += (7 * src_stride);
- ILVR_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_r, src32_r,
- src54_r, src21_r);
- ILVR_B2_SB(src4, src3, src6, src5, src43_r, src65_r);
- ILVL_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_l, src32_l,
- src54_l, src21_l);
- ILVL_B2_SB(src4, src3, src6, src5, src43_l, src65_l);
-
- for (loop_cnt = (height >> 2); loop_cnt--;) {
- LD_SB4(src, src_stride, src7, src8, src9, src10);
- XORI_B4_128_SB(src7, src8, src9, src10);
- src += (4 * src_stride);
-
- ILVR_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_r,
- src87_r, src98_r, src109_r);
- ILVL_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_l,
- src87_l, src98_l, src109_l);
- out0_r = FILT_8TAP_DPADD_S_H(src10_r, src32_r, src54_r, src76_r, filt0,
- filt1, filt2, filt3);
- out1_r = FILT_8TAP_DPADD_S_H(src21_r, src43_r, src65_r, src87_r, filt0,
- filt1, filt2, filt3);
- out2_r = FILT_8TAP_DPADD_S_H(src32_r, src54_r, src76_r, src98_r, filt0,
- filt1, filt2, filt3);
- out3_r = FILT_8TAP_DPADD_S_H(src43_r, src65_r, src87_r, src109_r, filt0,
- filt1, filt2, filt3);
- out0_l = FILT_8TAP_DPADD_S_H(src10_l, src32_l, src54_l, src76_l, filt0,
- filt1, filt2, filt3);
- out1_l = FILT_8TAP_DPADD_S_H(src21_l, src43_l, src65_l, src87_l, filt0,
- filt1, filt2, filt3);
- out2_l = FILT_8TAP_DPADD_S_H(src32_l, src54_l, src76_l, src98_l, filt0,
- filt1, filt2, filt3);
- out3_l = FILT_8TAP_DPADD_S_H(src43_l, src65_l, src87_l, src109_l, filt0,
- filt1, filt2, filt3);
- SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, FILTER_BITS);
- SRARI_H4_SH(out0_l, out1_l, out2_l, out3_l, FILTER_BITS);
- SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
- SAT_SH4_SH(out0_l, out1_l, out2_l, out3_l, 7);
- PCKEV_B4_UB(out0_l, out0_r, out1_l, out1_r, out2_l, out2_r, out3_l, out3_r,
- tmp0, tmp1, tmp2, tmp3);
- XORI_B4_128_UB(tmp0, tmp1, tmp2, tmp3);
- ST_UB4(tmp0, tmp1, tmp2, tmp3, dst, dst_stride);
- dst += (4 * dst_stride);
-
- src10_r = src54_r;
- src32_r = src76_r;
- src54_r = src98_r;
- src21_r = src65_r;
- src43_r = src87_r;
- src65_r = src109_r;
- src10_l = src54_l;
- src32_l = src76_l;
- src54_l = src98_l;
- src21_l = src65_l;
- src43_l = src87_l;
- src65_l = src109_l;
- src6 = src10;
- }
-}
-
-static void common_vt_8t_16w_mult_msa(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride,
- int8_t *filter, int32_t height,
- int32_t width) {
- const uint8_t *src_tmp;
- uint8_t *dst_tmp;
- uint32_t loop_cnt, cnt;
- v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
- v16i8 filt0, filt1, filt2, filt3;
- v16i8 src10_r, src32_r, src54_r, src76_r, src98_r, src21_r, src43_r;
- v16i8 src65_r, src87_r, src109_r, src10_l, src32_l, src54_l, src76_l;
- v16i8 src98_l, src21_l, src43_l, src65_l, src87_l, src109_l;
- v16u8 tmp0, tmp1, tmp2, tmp3;
- v8i16 filt, out0_r, out1_r, out2_r, out3_r, out0_l, out1_l, out2_l, out3_l;
-
- src -= (3 * src_stride);
-
- filt = LD_SH(filter);
- SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
-
- for (cnt = (width >> 4); cnt--;) {
- src_tmp = src;
- dst_tmp = dst;
-
- LD_SB7(src_tmp, src_stride, src0, src1, src2, src3, src4, src5, src6);
- XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
- src_tmp += (7 * src_stride);
- ILVR_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_r, src32_r,
- src54_r, src21_r);
- ILVR_B2_SB(src4, src3, src6, src5, src43_r, src65_r);
- ILVL_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_l, src32_l,
- src54_l, src21_l);
- ILVL_B2_SB(src4, src3, src6, src5, src43_l, src65_l);
-
- for (loop_cnt = (height >> 2); loop_cnt--;) {
- LD_SB4(src_tmp, src_stride, src7, src8, src9, src10);
- XORI_B4_128_SB(src7, src8, src9, src10);
- src_tmp += (4 * src_stride);
- ILVR_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_r,
- src87_r, src98_r, src109_r);
- ILVL_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_l,
- src87_l, src98_l, src109_l);
- out0_r = FILT_8TAP_DPADD_S_H(src10_r, src32_r, src54_r, src76_r, filt0,
- filt1, filt2, filt3);
- out1_r = FILT_8TAP_DPADD_S_H(src21_r, src43_r, src65_r, src87_r, filt0,
- filt1, filt2, filt3);
- out2_r = FILT_8TAP_DPADD_S_H(src32_r, src54_r, src76_r, src98_r, filt0,
- filt1, filt2, filt3);
- out3_r = FILT_8TAP_DPADD_S_H(src43_r, src65_r, src87_r, src109_r, filt0,
- filt1, filt2, filt3);
- out0_l = FILT_8TAP_DPADD_S_H(src10_l, src32_l, src54_l, src76_l, filt0,
- filt1, filt2, filt3);
- out1_l = FILT_8TAP_DPADD_S_H(src21_l, src43_l, src65_l, src87_l, filt0,
- filt1, filt2, filt3);
- out2_l = FILT_8TAP_DPADD_S_H(src32_l, src54_l, src76_l, src98_l, filt0,
- filt1, filt2, filt3);
- out3_l = FILT_8TAP_DPADD_S_H(src43_l, src65_l, src87_l, src109_l, filt0,
- filt1, filt2, filt3);
- SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, FILTER_BITS);
- SRARI_H4_SH(out0_l, out1_l, out2_l, out3_l, FILTER_BITS);
- SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
- SAT_SH4_SH(out0_l, out1_l, out2_l, out3_l, 7);
- PCKEV_B4_UB(out0_l, out0_r, out1_l, out1_r, out2_l, out2_r, out3_l,
- out3_r, tmp0, tmp1, tmp2, tmp3);
- XORI_B4_128_UB(tmp0, tmp1, tmp2, tmp3);
- ST_UB4(tmp0, tmp1, tmp2, tmp3, dst_tmp, dst_stride);
- dst_tmp += (4 * dst_stride);
-
- src10_r = src54_r;
- src32_r = src76_r;
- src54_r = src98_r;
- src21_r = src65_r;
- src43_r = src87_r;
- src65_r = src109_r;
- src10_l = src54_l;
- src32_l = src76_l;
- src54_l = src98_l;
- src21_l = src65_l;
- src43_l = src87_l;
- src65_l = src109_l;
- src6 = src10;
- }
-
- src += 16;
- dst += 16;
- }
-}
-
-static void common_vt_8t_32w_msa(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride,
- int8_t *filter, int32_t height) {
- common_vt_8t_16w_mult_msa(src, src_stride, dst, dst_stride, filter, height,
- 32);
-}
-
-static void common_vt_8t_64w_msa(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride,
- int8_t *filter, int32_t height) {
- common_vt_8t_16w_mult_msa(src, src_stride, dst, dst_stride, filter, height,
- 64);
-}
-
-static void common_vt_2t_4x4_msa(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride,
- int8_t *filter) {
- v16i8 src0, src1, src2, src3, src4;
- v16i8 src10_r, src32_r, src21_r, src43_r, src2110, src4332;
- v16u8 filt0;
- v8i16 filt;
- v8u16 tmp0, tmp1;
-
- filt = LD_SH(filter);
- filt0 = (v16u8)__msa_splati_h(filt, 0);
-
- LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
- src += (5 * src_stride);
-
- ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r,
- src32_r, src43_r);
- ILVR_D2_SB(src21_r, src10_r, src43_r, src32_r, src2110, src4332);
- DOTP_UB2_UH(src2110, src4332, filt0, filt0, tmp0, tmp1);
- SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
- src2110 = __msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
- ST4x4_UB(src2110, src2110, 0, 1, 2, 3, dst, dst_stride);
-}
-
-static void common_vt_2t_4x8_msa(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride,
- int8_t *filter) {
- v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
- v16i8 src10_r, src32_r, src54_r, src76_r, src21_r, src43_r;
- v16i8 src65_r, src87_r, src2110, src4332, src6554, src8776;
- v8u16 tmp0, tmp1, tmp2, tmp3;
- v16u8 filt0;
- v8i16 filt;
-
- filt = LD_SH(filter);
- filt0 = (v16u8)__msa_splati_h(filt, 0);
-
- LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
- src += (8 * src_stride);
-
- src8 = LD_SB(src);
- src += src_stride;
-
- ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r,
- src32_r, src43_r);
- ILVR_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7, src54_r, src65_r,
- src76_r, src87_r);
- ILVR_D4_SB(src21_r, src10_r, src43_r, src32_r, src65_r, src54_r, src87_r,
- src76_r, src2110, src4332, src6554, src8776);
- DOTP_UB4_UH(src2110, src4332, src6554, src8776, filt0, filt0, filt0, filt0,
- tmp0, tmp1, tmp2, tmp3);
- SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS);
- PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, src2110, src4332);
- ST4x4_UB(src2110, src2110, 0, 1, 2, 3, dst, dst_stride);
- ST4x4_UB(src4332, src4332, 0, 1, 2, 3, dst + 4 * dst_stride, dst_stride);
-}
-
-static void common_vt_2t_4w_msa(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride,
- int8_t *filter, int32_t height) {
- if (4 == height) {
- common_vt_2t_4x4_msa(src, src_stride, dst, dst_stride, filter);
- } else if (8 == height) {
- common_vt_2t_4x8_msa(src, src_stride, dst, dst_stride, filter);
- }
-}
-
-static void common_vt_2t_8x4_msa(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride,
- int8_t *filter) {
- v16u8 src0, src1, src2, src3, src4, vec0, vec1, vec2, vec3, filt0;
- v16i8 out0, out1;
- v8u16 tmp0, tmp1, tmp2, tmp3;
- v8i16 filt;
-
- /* rearranging filter_y */
- filt = LD_SH(filter);
- filt0 = (v16u8)__msa_splati_h(filt, 0);
-
- LD_UB5(src, src_stride, src0, src1, src2, src3, src4);
- ILVR_B2_UB(src1, src0, src2, src1, vec0, vec1);
- ILVR_B2_UB(src3, src2, src4, src3, vec2, vec3);
- DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, tmp0, tmp1,
- tmp2, tmp3);
- SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS);
- PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, out0, out1);
- ST8x4_UB(out0, out1, dst, dst_stride);
-}
-
-static void common_vt_2t_8x8mult_msa(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride,
- int8_t *filter, int32_t height) {
- uint32_t loop_cnt;
- v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
- v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0;
- v16i8 out0, out1;
- v8u16 tmp0, tmp1, tmp2, tmp3;
- v8i16 filt;
-
- /* rearranging filter_y */
- filt = LD_SH(filter);
- filt0 = (v16u8)__msa_splati_h(filt, 0);
-
- src0 = LD_UB(src);
- src += src_stride;
-
- for (loop_cnt = (height >> 3); loop_cnt--;) {
- LD_UB8(src, src_stride, src1, src2, src3, src4, src5, src6, src7, src8);
- src += (8 * src_stride);
-
- ILVR_B4_UB(src1, src0, src2, src1, src3, src2, src4, src3, vec0, vec1, vec2,
- vec3);
- ILVR_B4_UB(src5, src4, src6, src5, src7, src6, src8, src7, vec4, vec5, vec6,
- vec7);
- DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, tmp0, tmp1,
- tmp2, tmp3);
- SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS);
- PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, out0, out1);
- ST8x4_UB(out0, out1, dst, dst_stride);
- dst += (4 * dst_stride);
-
- DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, tmp0, tmp1,
- tmp2, tmp3);
- SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS);
- PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, out0, out1);
- ST8x4_UB(out0, out1, dst, dst_stride);
- dst += (4 * dst_stride);
-
- src0 = src8;
- }
-}
-
-static void common_vt_2t_8w_msa(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride,
- int8_t *filter, int32_t height) {
- if (4 == height) {
- common_vt_2t_8x4_msa(src, src_stride, dst, dst_stride, filter);
- } else {
- common_vt_2t_8x8mult_msa(src, src_stride, dst, dst_stride, filter, height);
- }
-}
-
-static void common_vt_2t_16w_msa(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride,
- int8_t *filter, int32_t height) {
- uint32_t loop_cnt;
- v16u8 src0, src1, src2, src3, src4;
- v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0;
- v8u16 tmp0, tmp1, tmp2, tmp3;
- v8i16 filt;
-
- /* rearranging filter_y */
- filt = LD_SH(filter);
- filt0 = (v16u8)__msa_splati_h(filt, 0);
-
- src0 = LD_UB(src);
- src += src_stride;
-
- for (loop_cnt = (height >> 2); loop_cnt--;) {
- LD_UB4(src, src_stride, src1, src2, src3, src4);
- src += (4 * src_stride);
-
- ILVR_B2_UB(src1, src0, src2, src1, vec0, vec2);
- ILVL_B2_UB(src1, src0, src2, src1, vec1, vec3);
- DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
- SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
- PCKEV_ST_SB(tmp0, tmp1, dst);
- dst += dst_stride;
-
- ILVR_B2_UB(src3, src2, src4, src3, vec4, vec6);
- ILVL_B2_UB(src3, src2, src4, src3, vec5, vec7);
- DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
- SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
- PCKEV_ST_SB(tmp2, tmp3, dst);
- dst += dst_stride;
-
- DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp0, tmp1);
- SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
- PCKEV_ST_SB(tmp0, tmp1, dst);
- dst += dst_stride;
-
- DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp2, tmp3);
- SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
- PCKEV_ST_SB(tmp2, tmp3, dst);
- dst += dst_stride;
-
- src0 = src4;
- }
-}
-
-static void common_vt_2t_32w_msa(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride,
- int8_t *filter, int32_t height) {
- uint32_t loop_cnt;
- v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9;
- v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0;
- v8u16 tmp0, tmp1, tmp2, tmp3;
- v8i16 filt;
-
- /* rearranging filter_y */
- filt = LD_SH(filter);
- filt0 = (v16u8)__msa_splati_h(filt, 0);
-
- src0 = LD_UB(src);
- src5 = LD_UB(src + 16);
- src += src_stride;
-
- for (loop_cnt = (height >> 2); loop_cnt--;) {
- LD_UB4(src, src_stride, src1, src2, src3, src4);
- ILVR_B2_UB(src1, src0, src2, src1, vec0, vec2);
- ILVL_B2_UB(src1, src0, src2, src1, vec1, vec3);
-
- LD_UB4(src + 16, src_stride, src6, src7, src8, src9);
- src += (4 * src_stride);
-
- DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
- SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
- PCKEV_ST_SB(tmp0, tmp1, dst);
- DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
- SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
- PCKEV_ST_SB(tmp2, tmp3, dst + dst_stride);
-
- ILVR_B2_UB(src3, src2, src4, src3, vec4, vec6);
- ILVL_B2_UB(src3, src2, src4, src3, vec5, vec7);
- DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp0, tmp1);
- SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
- PCKEV_ST_SB(tmp0, tmp1, dst + 2 * dst_stride);
-
- DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp2, tmp3);
- SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
- PCKEV_ST_SB(tmp2, tmp3, dst + 3 * dst_stride);
-
- ILVR_B2_UB(src6, src5, src7, src6, vec0, vec2);
- ILVL_B2_UB(src6, src5, src7, src6, vec1, vec3);
- DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
- SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
- PCKEV_ST_SB(tmp0, tmp1, dst + 16);
-
- DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
- SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
- PCKEV_ST_SB(tmp2, tmp3, dst + 16 + dst_stride);
-
- ILVR_B2_UB(src8, src7, src9, src8, vec4, vec6);
- ILVL_B2_UB(src8, src7, src9, src8, vec5, vec7);
- DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp0, tmp1);
- SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
- PCKEV_ST_SB(tmp0, tmp1, dst + 16 + 2 * dst_stride);
-
- DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp2, tmp3);
- SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
- PCKEV_ST_SB(tmp2, tmp3, dst + 16 + 3 * dst_stride);
- dst += (4 * dst_stride);
-
- src0 = src4;
- src5 = src9;
- }
-}
-
-static void common_vt_2t_64w_msa(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride,
- int8_t *filter, int32_t height) {
- uint32_t loop_cnt;
- v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
- v16u8 src11, vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0;
- v8u16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
- v8i16 filt;
-
- /* rearranging filter_y */
- filt = LD_SH(filter);
- filt0 = (v16u8)__msa_splati_h(filt, 0);
-
- LD_UB4(src, 16, src0, src3, src6, src9);
- src += src_stride;
-
- for (loop_cnt = (height >> 1); loop_cnt--;) {
- LD_UB2(src, src_stride, src1, src2);
- LD_UB2(src + 16, src_stride, src4, src5);
- LD_UB2(src + 32, src_stride, src7, src8);
- LD_UB2(src + 48, src_stride, src10, src11);
- src += (2 * src_stride);
-
- ILVR_B2_UB(src1, src0, src2, src1, vec0, vec2);
- ILVL_B2_UB(src1, src0, src2, src1, vec1, vec3);
- DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
- SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
- PCKEV_ST_SB(tmp0, tmp1, dst);
-
- DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
- SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
- PCKEV_ST_SB(tmp2, tmp3, dst + dst_stride);
-
- ILVR_B2_UB(src4, src3, src5, src4, vec4, vec6);
- ILVL_B2_UB(src4, src3, src5, src4, vec5, vec7);
- DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp4, tmp5);
- SRARI_H2_UH(tmp4, tmp5, FILTER_BITS);
- PCKEV_ST_SB(tmp4, tmp5, dst + 16);
-
- DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp6, tmp7);
- SRARI_H2_UH(tmp6, tmp7, FILTER_BITS);
- PCKEV_ST_SB(tmp6, tmp7, dst + 16 + dst_stride);
-
- ILVR_B2_UB(src7, src6, src8, src7, vec0, vec2);
- ILVL_B2_UB(src7, src6, src8, src7, vec1, vec3);
- DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
- SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
- PCKEV_ST_SB(tmp0, tmp1, dst + 32);
-
- DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
- SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
- PCKEV_ST_SB(tmp2, tmp3, dst + 32 + dst_stride);
-
- ILVR_B2_UB(src10, src9, src11, src10, vec4, vec6);
- ILVL_B2_UB(src10, src9, src11, src10, vec5, vec7);
- DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp4, tmp5);
- SRARI_H2_UH(tmp4, tmp5, FILTER_BITS);
- PCKEV_ST_SB(tmp4, tmp5, dst + 48);
-
- DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp6, tmp7);
- SRARI_H2_UH(tmp6, tmp7, FILTER_BITS);
- PCKEV_ST_SB(tmp6, tmp7, dst + 48 + dst_stride);
- dst += (2 * dst_stride);
-
- src0 = src2;
- src3 = src5;
- src6 = src8;
- src9 = src11;
- }
-}
-
-void aom_convolve8_vert_msa(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4, int w,
- int h) {
- int8_t cnt, filt_ver[8];
-
- assert(y_step_q4 == 16);
- assert(((const int32_t *)filter_y)[1] != 0x800000);
-
- for (cnt = 8; cnt--;) {
- filt_ver[cnt] = filter_y[cnt];
- }
-
- if (((const int32_t *)filter_y)[0] == 0) {
- switch (w) {
- case 4:
- common_vt_2t_4w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
- &filt_ver[3], h);
- break;
- case 8:
- common_vt_2t_8w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
- &filt_ver[3], h);
- break;
- case 16:
- common_vt_2t_16w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
- &filt_ver[3], h);
- break;
- case 32:
- common_vt_2t_32w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
- &filt_ver[3], h);
- break;
- case 64:
- common_vt_2t_64w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
- &filt_ver[3], h);
- break;
- default:
- aom_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x,
- x_step_q4, filter_y, y_step_q4, w, h);
- break;
- }
- } else {
- switch (w) {
- case 4:
- common_vt_8t_4w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
- filt_ver, h);
- break;
- case 8:
- common_vt_8t_8w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
- filt_ver, h);
- break;
- case 16:
- common_vt_8t_16w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
- filt_ver, h);
- break;
- case 32:
- common_vt_8t_32w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
- filt_ver, h);
- break;
- case 64:
- common_vt_8t_64w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
- filt_ver, h);
- break;
- default:
- aom_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x,
- x_step_q4, filter_y, y_step_q4, w, h);
- break;
- }
- }
-}
diff --git a/third_party/aom/aom_dsp/mips/aom_convolve_copy_msa.c b/third_party/aom/aom_dsp/mips/aom_convolve_copy_msa.c
deleted file mode 100644
index f7f116f4d..000000000
--- a/third_party/aom/aom_dsp/mips/aom_convolve_copy_msa.c
+++ /dev/null
@@ -1,248 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <string.h>
-#include "aom_dsp/mips/macros_msa.h"
-
-static void copy_width8_msa(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride, int32_t height) {
- int32_t cnt;
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
- v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
-
- if (0 == height % 12) {
- for (cnt = (height / 12); cnt--;) {
- LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
- src += (8 * src_stride);
-
- out0 = __msa_copy_u_d((v2i64)src0, 0);
- out1 = __msa_copy_u_d((v2i64)src1, 0);
- out2 = __msa_copy_u_d((v2i64)src2, 0);
- out3 = __msa_copy_u_d((v2i64)src3, 0);
- out4 = __msa_copy_u_d((v2i64)src4, 0);
- out5 = __msa_copy_u_d((v2i64)src5, 0);
- out6 = __msa_copy_u_d((v2i64)src6, 0);
- out7 = __msa_copy_u_d((v2i64)src7, 0);
-
- SD4(out0, out1, out2, out3, dst, dst_stride);
- dst += (4 * dst_stride);
- SD4(out4, out5, out6, out7, dst, dst_stride);
- dst += (4 * dst_stride);
-
- LD_UB4(src, src_stride, src0, src1, src2, src3);
- src += (4 * src_stride);
-
- out0 = __msa_copy_u_d((v2i64)src0, 0);
- out1 = __msa_copy_u_d((v2i64)src1, 0);
- out2 = __msa_copy_u_d((v2i64)src2, 0);
- out3 = __msa_copy_u_d((v2i64)src3, 0);
- SD4(out0, out1, out2, out3, dst, dst_stride);
- dst += (4 * dst_stride);
- }
- } else if (0 == height % 8) {
- for (cnt = height >> 3; cnt--;) {
- LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
- src += (8 * src_stride);
-
- out0 = __msa_copy_u_d((v2i64)src0, 0);
- out1 = __msa_copy_u_d((v2i64)src1, 0);
- out2 = __msa_copy_u_d((v2i64)src2, 0);
- out3 = __msa_copy_u_d((v2i64)src3, 0);
- out4 = __msa_copy_u_d((v2i64)src4, 0);
- out5 = __msa_copy_u_d((v2i64)src5, 0);
- out6 = __msa_copy_u_d((v2i64)src6, 0);
- out7 = __msa_copy_u_d((v2i64)src7, 0);
-
- SD4(out0, out1, out2, out3, dst, dst_stride);
- dst += (4 * dst_stride);
- SD4(out4, out5, out6, out7, dst, dst_stride);
- dst += (4 * dst_stride);
- }
- } else if (0 == height % 4) {
- for (cnt = (height / 4); cnt--;) {
- LD_UB4(src, src_stride, src0, src1, src2, src3);
- src += (4 * src_stride);
- out0 = __msa_copy_u_d((v2i64)src0, 0);
- out1 = __msa_copy_u_d((v2i64)src1, 0);
- out2 = __msa_copy_u_d((v2i64)src2, 0);
- out3 = __msa_copy_u_d((v2i64)src3, 0);
-
- SD4(out0, out1, out2, out3, dst, dst_stride);
- dst += (4 * dst_stride);
- }
- } else if (0 == height % 2) {
- for (cnt = (height / 2); cnt--;) {
- LD_UB2(src, src_stride, src0, src1);
- src += (2 * src_stride);
- out0 = __msa_copy_u_d((v2i64)src0, 0);
- out1 = __msa_copy_u_d((v2i64)src1, 0);
-
- SD(out0, dst);
- dst += dst_stride;
- SD(out1, dst);
- dst += dst_stride;
- }
- }
-}
-
-static void copy_16multx8mult_msa(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride,
- int32_t height, int32_t width) {
- int32_t cnt, loop_cnt;
- const uint8_t *src_tmp;
- uint8_t *dst_tmp;
- v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
-
- for (cnt = (width >> 4); cnt--;) {
- src_tmp = src;
- dst_tmp = dst;
-
- for (loop_cnt = (height >> 3); loop_cnt--;) {
- LD_UB8(src_tmp, src_stride, src0, src1, src2, src3, src4, src5, src6,
- src7);
- src_tmp += (8 * src_stride);
-
- ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7, dst_tmp,
- dst_stride);
- dst_tmp += (8 * dst_stride);
- }
-
- src += 16;
- dst += 16;
- }
-}
-
-static void copy_width16_msa(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride, int32_t height) {
- int32_t cnt;
- v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
-
- if (0 == height % 12) {
- for (cnt = (height / 12); cnt--;) {
- LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
- src += (8 * src_stride);
- ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7, dst, dst_stride);
- dst += (8 * dst_stride);
-
- LD_UB4(src, src_stride, src0, src1, src2, src3);
- src += (4 * src_stride);
- ST_UB4(src0, src1, src2, src3, dst, dst_stride);
- dst += (4 * dst_stride);
- }
- } else if (0 == height % 8) {
- copy_16multx8mult_msa(src, src_stride, dst, dst_stride, height, 16);
- } else if (0 == height % 4) {
- for (cnt = (height >> 2); cnt--;) {
- LD_UB4(src, src_stride, src0, src1, src2, src3);
- src += (4 * src_stride);
-
- ST_UB4(src0, src1, src2, src3, dst, dst_stride);
- dst += (4 * dst_stride);
- }
- }
-}
-
-static void copy_width32_msa(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride, int32_t height) {
- int32_t cnt;
- v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
-
- if (0 == height % 12) {
- for (cnt = (height / 12); cnt--;) {
- LD_UB4(src, src_stride, src0, src1, src2, src3);
- LD_UB4(src + 16, src_stride, src4, src5, src6, src7);
- src += (4 * src_stride);
- ST_UB4(src0, src1, src2, src3, dst, dst_stride);
- ST_UB4(src4, src5, src6, src7, dst + 16, dst_stride);
- dst += (4 * dst_stride);
-
- LD_UB4(src, src_stride, src0, src1, src2, src3);
- LD_UB4(src + 16, src_stride, src4, src5, src6, src7);
- src += (4 * src_stride);
- ST_UB4(src0, src1, src2, src3, dst, dst_stride);
- ST_UB4(src4, src5, src6, src7, dst + 16, dst_stride);
- dst += (4 * dst_stride);
-
- LD_UB4(src, src_stride, src0, src1, src2, src3);
- LD_UB4(src + 16, src_stride, src4, src5, src6, src7);
- src += (4 * src_stride);
- ST_UB4(src0, src1, src2, src3, dst, dst_stride);
- ST_UB4(src4, src5, src6, src7, dst + 16, dst_stride);
- dst += (4 * dst_stride);
- }
- } else if (0 == height % 8) {
- copy_16multx8mult_msa(src, src_stride, dst, dst_stride, height, 32);
- } else if (0 == height % 4) {
- for (cnt = (height >> 2); cnt--;) {
- LD_UB4(src, src_stride, src0, src1, src2, src3);
- LD_UB4(src + 16, src_stride, src4, src5, src6, src7);
- src += (4 * src_stride);
- ST_UB4(src0, src1, src2, src3, dst, dst_stride);
- ST_UB4(src4, src5, src6, src7, dst + 16, dst_stride);
- dst += (4 * dst_stride);
- }
- }
-}
-
-static void copy_width64_msa(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride, int32_t height) {
- copy_16multx8mult_msa(src, src_stride, dst, dst_stride, height, 64);
-}
-
-void aom_convolve_copy_msa(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int32_t filter_x_stride,
- const int16_t *filter_y, int32_t filter_y_stride,
- int32_t w, int32_t h) {
- (void)filter_x;
- (void)filter_y;
- (void)filter_x_stride;
- (void)filter_y_stride;
-
- switch (w) {
- case 4: {
- uint32_t cnt, tmp;
- /* 1 word storage */
- for (cnt = h; cnt--;) {
- tmp = LW(src);
- SW(tmp, dst);
- src += src_stride;
- dst += dst_stride;
- }
- break;
- }
- case 8: {
- copy_width8_msa(src, src_stride, dst, dst_stride, h);
- break;
- }
- case 16: {
- copy_width16_msa(src, src_stride, dst, dst_stride, h);
- break;
- }
- case 32: {
- copy_width32_msa(src, src_stride, dst, dst_stride, h);
- break;
- }
- case 64: {
- copy_width64_msa(src, src_stride, dst, dst_stride, h);
- break;
- }
- default: {
- uint32_t cnt;
- for (cnt = h; cnt--;) {
- memcpy(dst, src, w);
- src += src_stride;
- dst += dst_stride;
- }
- break;
- }
- }
-}
diff --git a/third_party/aom/aom_dsp/mips/aom_convolve_msa.h b/third_party/aom/aom_dsp/mips/aom_convolve_msa.h
deleted file mode 100644
index 852415c20..000000000
--- a/third_party/aom/aom_dsp/mips/aom_convolve_msa.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_MIPS_AOM_CONVOLVE_MSA_H_
-#define AOM_AOM_DSP_MIPS_AOM_CONVOLVE_MSA_H_
-
-#include "aom_dsp/mips/macros_msa.h"
-#include "aom_dsp/aom_filter.h"
-
-extern const uint8_t mc_filt_mask_arr[16 * 3];
-
-#define FILT_8TAP_DPADD_S_H(vec0, vec1, vec2, vec3, filt0, filt1, filt2, \
- filt3) \
- ({ \
- v8i16 tmp_dpadd_0, tmp_dpadd_1; \
- \
- tmp_dpadd_0 = __msa_dotp_s_h((v16i8)vec0, (v16i8)filt0); \
- tmp_dpadd_0 = __msa_dpadd_s_h(tmp_dpadd_0, (v16i8)vec1, (v16i8)filt1); \
- tmp_dpadd_1 = __msa_dotp_s_h((v16i8)vec2, (v16i8)filt2); \
- tmp_dpadd_1 = __msa_dpadd_s_h(tmp_dpadd_1, (v16i8)vec3, (v16i8)filt3); \
- tmp_dpadd_0 = __msa_adds_s_h(tmp_dpadd_0, tmp_dpadd_1); \
- \
- tmp_dpadd_0; \
- })
-
-#define HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, \
- mask2, mask3, filt0, filt1, filt2, filt3, \
- out0, out1) \
- { \
- v16i8 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m; \
- v8i16 res0_m, res1_m, res2_m, res3_m; \
- \
- VSHF_B2_SB(src0, src1, src2, src3, mask0, mask0, vec0_m, vec1_m); \
- DOTP_SB2_SH(vec0_m, vec1_m, filt0, filt0, res0_m, res1_m); \
- VSHF_B2_SB(src0, src1, src2, src3, mask1, mask1, vec2_m, vec3_m); \
- DPADD_SB2_SH(vec2_m, vec3_m, filt1, filt1, res0_m, res1_m); \
- VSHF_B2_SB(src0, src1, src2, src3, mask2, mask2, vec4_m, vec5_m); \
- DOTP_SB2_SH(vec4_m, vec5_m, filt2, filt2, res2_m, res3_m); \
- VSHF_B2_SB(src0, src1, src2, src3, mask3, mask3, vec6_m, vec7_m); \
- DPADD_SB2_SH(vec6_m, vec7_m, filt3, filt3, res2_m, res3_m); \
- ADDS_SH2_SH(res0_m, res2_m, res1_m, res3_m, out0, out1); \
- }
-
-#define HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, \
- mask2, mask3, filt0, filt1, filt2, filt3, \
- out0, out1, out2, out3) \
- { \
- v16i8 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m; \
- v8i16 res0_m, res1_m, res2_m, res3_m, res4_m, res5_m, res6_m, res7_m; \
- \
- VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0_m, vec1_m); \
- VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec2_m, vec3_m); \
- DOTP_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt0, filt0, filt0, filt0, \
- res0_m, res1_m, res2_m, res3_m); \
- VSHF_B2_SB(src0, src0, src1, src1, mask2, mask2, vec0_m, vec1_m); \
- VSHF_B2_SB(src2, src2, src3, src3, mask2, mask2, vec2_m, vec3_m); \
- DOTP_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt2, filt2, filt2, filt2, \
- res4_m, res5_m, res6_m, res7_m); \
- VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec4_m, vec5_m); \
- VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec6_m, vec7_m); \
- DPADD_SB4_SH(vec4_m, vec5_m, vec6_m, vec7_m, filt1, filt1, filt1, filt1, \
- res0_m, res1_m, res2_m, res3_m); \
- VSHF_B2_SB(src0, src0, src1, src1, mask3, mask3, vec4_m, vec5_m); \
- VSHF_B2_SB(src2, src2, src3, src3, mask3, mask3, vec6_m, vec7_m); \
- DPADD_SB4_SH(vec4_m, vec5_m, vec6_m, vec7_m, filt3, filt3, filt3, filt3, \
- res4_m, res5_m, res6_m, res7_m); \
- ADDS_SH4_SH(res0_m, res4_m, res1_m, res5_m, res2_m, res6_m, res3_m, \
- res7_m, out0, out1, out2, out3); \
- }
-
-#endif // AOM_AOM_DSP_MIPS_AOM_CONVOLVE_MSA_H_
diff --git a/third_party/aom/aom_dsp/mips/common_dspr2.c b/third_party/aom/aom_dsp/mips/common_dspr2.c
deleted file mode 100644
index 00ab75dc3..000000000
--- a/third_party/aom/aom_dsp/mips/common_dspr2.c
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "aom_dsp/mips/common_dspr2.h"
-
-#if HAVE_DSPR2
-uint8_t aom_ff_cropTbl_a[256 + 2 * CROP_WIDTH];
-uint8_t *aom_ff_cropTbl;
-
-void aom_dsputil_static_init(void) {
- int i;
-
- for (i = 0; i < 256; i++) aom_ff_cropTbl_a[i + CROP_WIDTH] = i;
-
- for (i = 0; i < CROP_WIDTH; i++) {
- aom_ff_cropTbl_a[i] = 0;
- aom_ff_cropTbl_a[i + CROP_WIDTH + 256] = 255;
- }
-
- aom_ff_cropTbl = &aom_ff_cropTbl_a[CROP_WIDTH];
-}
-
-#endif
diff --git a/third_party/aom/aom_dsp/mips/common_dspr2.h b/third_party/aom/aom_dsp/mips/common_dspr2.h
deleted file mode 100644
index c42188d62..000000000
--- a/third_party/aom/aom_dsp/mips/common_dspr2.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_MIPS_COMMON_DSPR2_H_
-#define AOM_AOM_DSP_MIPS_COMMON_DSPR2_H_
-
-#include <assert.h>
-
-#include "config/aom_config.h"
-
-#include "aom/aom_integer.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if HAVE_DSPR2
-#define CROP_WIDTH 512
-
-extern uint8_t *aom_ff_cropTbl; // From "aom_dsp/mips/intrapred4_dspr2.c"
-
-static INLINE void prefetch_load(const unsigned char *src) {
- __asm__ __volatile__("pref 0, 0(%[src]) \n\t" : : [src] "r"(src));
-}
-
-/* prefetch data for store */
-static INLINE void prefetch_store(unsigned char *dst) {
- __asm__ __volatile__("pref 1, 0(%[dst]) \n\t" : : [dst] "r"(dst));
-}
-
-static INLINE void prefetch_load_streamed(const unsigned char *src) {
- __asm__ __volatile__("pref 4, 0(%[src]) \n\t" : : [src] "r"(src));
-}
-
-/* prefetch data for store */
-static INLINE void prefetch_store_streamed(unsigned char *dst) {
- __asm__ __volatile__("pref 5, 0(%[dst]) \n\t" : : [dst] "r"(dst));
-}
-#endif // #if HAVE_DSPR2
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AOM_DSP_MIPS_COMMON_DSPR2_H_
diff --git a/third_party/aom/aom_dsp/mips/convolve2_dspr2.c b/third_party/aom/aom_dsp/mips/convolve2_dspr2.c
deleted file mode 100644
index 08bf1ab30..000000000
--- a/third_party/aom/aom_dsp/mips/convolve2_dspr2.c
+++ /dev/null
@@ -1,1031 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <stdio.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/mips/convolve_common_dspr2.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/aom_filter.h"
-#include "aom_ports/mem.h"
-
-#if HAVE_DSPR2
-static void convolve_bi_horiz_4_transposed_dspr2(
- const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride,
- const int16_t *filter_x0, int32_t h) {
- int32_t y;
- uint8_t *cm = aom_ff_cropTbl;
- uint8_t *dst_ptr;
- int32_t Temp1, Temp2;
- uint32_t vector4a = 64;
- uint32_t tp1, tp2;
- uint32_t p1, p2;
- const int16_t *filter = &filter_x0[3];
- uint32_t filter45;
-
- filter45 = ((const int32_t *)filter)[0];
-
- for (y = h; y--;) {
- dst_ptr = dst;
- /* prefetch data to cache memory */
- prefetch_load(src + src_stride);
- prefetch_load(src + src_stride + 32);
-
- __asm__ __volatile__(
- "ulw %[tp1], 0(%[src]) \n\t"
- "ulw %[tp2], 4(%[src]) \n\t"
-
- /* even 1. pixel */
- "mtlo %[vector4a], $ac3 \n\t"
- "mthi $zero, $ac3 \n\t"
- "preceu.ph.qbr %[p1], %[tp1] \n\t"
- "preceu.ph.qbl %[p2], %[tp1] \n\t"
- "dpa.w.ph $ac3, %[p1], %[filter45] \n\t"
- "extp %[Temp1], $ac3, 31 \n\t"
-
- /* even 2. pixel */
- "mtlo %[vector4a], $ac2 \n\t"
- "mthi $zero, $ac2 \n\t"
- "balign %[tp2], %[tp1], 3 \n\t"
- "dpa.w.ph $ac2, %[p2], %[filter45] \n\t"
- "extp %[Temp2], $ac2, 31 \n\t"
-
- /* odd 1. pixel */
- "lbux %[tp1], %[Temp1](%[cm]) \n\t"
- "mtlo %[vector4a], $ac3 \n\t"
- "mthi $zero, $ac3 \n\t"
- "preceu.ph.qbr %[p1], %[tp2] \n\t"
- "preceu.ph.qbl %[p2], %[tp2] \n\t"
- "dpa.w.ph $ac3, %[p1], %[filter45] \n\t"
- "extp %[Temp1], $ac3, 31 \n\t"
-
- /* odd 2. pixel */
- "lbux %[tp2], %[Temp2](%[cm]) \n\t"
- "mtlo %[vector4a], $ac2 \n\t"
- "mthi $zero, $ac2 \n\t"
- "dpa.w.ph $ac2, %[p2], %[filter45] \n\t"
- "extp %[Temp2], $ac2, 31 \n\t"
-
- /* clamp */
- "lbux %[p1], %[Temp1](%[cm]) \n\t"
- "lbux %[p2], %[Temp2](%[cm]) \n\t"
-
- /* store bytes */
- "sb %[tp1], 0(%[dst_ptr]) \n\t"
- "addu %[dst_ptr], %[dst_ptr], %[dst_stride] \n\t"
-
- "sb %[p1], 0(%[dst_ptr]) \n\t"
- "addu %[dst_ptr], %[dst_ptr], %[dst_stride] \n\t"
-
- "sb %[tp2], 0(%[dst_ptr]) \n\t"
- "addu %[dst_ptr], %[dst_ptr], %[dst_stride] \n\t"
-
- "sb %[p2], 0(%[dst_ptr]) \n\t"
- "addu %[dst_ptr], %[dst_ptr], %[dst_stride] \n\t"
-
- : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [p1] "=&r"(p1), [p2] "=&r"(p2),
- [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [dst_ptr] "+r"(dst_ptr)
- : [filter45] "r"(filter45), [vector4a] "r"(vector4a), [cm] "r"(cm),
- [src] "r"(src), [dst_stride] "r"(dst_stride));
-
- /* Next row... */
- src += src_stride;
- dst += 1;
- }
-}
-
-static void convolve_bi_horiz_8_transposed_dspr2(
- const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride,
- const int16_t *filter_x0, int32_t h) {
- int32_t y;
- uint8_t *cm = aom_ff_cropTbl;
- uint8_t *dst_ptr;
- uint32_t vector4a = 64;
- int32_t Temp1, Temp2, Temp3;
- uint32_t tp1, tp2, tp3;
- uint32_t p1, p2, p3, p4;
- uint8_t *odd_dst;
- uint32_t dst_pitch_2 = (dst_stride << 1);
- const int16_t *filter = &filter_x0[3];
- uint32_t filter45;
-
- filter45 = ((const int32_t *)filter)[0];
-
- for (y = h; y--;) {
- /* prefetch data to cache memory */
- prefetch_load(src + src_stride);
- prefetch_load(src + src_stride + 32);
-
- dst_ptr = dst;
- odd_dst = (dst_ptr + dst_stride);
-
- __asm__ __volatile__(
- "ulw %[tp1], 0(%[src]) \n\t"
- "ulw %[tp2], 4(%[src]) \n\t"
-
- /* even 1. pixel */
- "mtlo %[vector4a], $ac3 \n\t"
- "mthi $zero, $ac3 \n\t"
- "mtlo %[vector4a], $ac2 \n\t"
- "mthi $zero, $ac2 \n\t"
- "preceu.ph.qbr %[p1], %[tp1] \n\t"
- "preceu.ph.qbl %[p2], %[tp1] \n\t"
- "preceu.ph.qbr %[p3], %[tp2] \n\t"
- "preceu.ph.qbl %[p4], %[tp2] \n\t"
- "ulw %[tp3], 8(%[src]) \n\t"
- "dpa.w.ph $ac3, %[p1], %[filter45] \n\t"
- "extp %[Temp1], $ac3, 31 \n\t"
-
- /* even 2. pixel */
- "dpa.w.ph $ac2, %[p2], %[filter45] \n\t"
- "extp %[Temp3], $ac2, 31 \n\t"
-
- /* even 3. pixel */
- "lbux %[Temp2], %[Temp1](%[cm]) \n\t"
- "mtlo %[vector4a], $ac1 \n\t"
- "mthi $zero, $ac1 \n\t"
- "balign %[tp3], %[tp2], 3 \n\t"
- "balign %[tp2], %[tp1], 3 \n\t"
- "dpa.w.ph $ac1, %[p3], %[filter45] \n\t"
- "lbux %[tp1], %[Temp3](%[cm]) \n\t"
- "extp %[p3], $ac1, 31 \n\t"
-
- /* even 4. pixel */
- "mtlo %[vector4a], $ac2 \n\t"
- "mthi $zero, $ac2 \n\t"
- "mtlo %[vector4a], $ac3 \n\t"
- "mthi $zero, $ac3 \n\t"
- "sb %[Temp2], 0(%[dst_ptr]) \n\t"
- "addu %[dst_ptr], %[dst_ptr], %[dst_pitch_2] \n\t"
- "sb %[tp1], 0(%[dst_ptr]) \n\t"
- "addu %[dst_ptr], %[dst_ptr], %[dst_pitch_2] \n\t"
-
- "dpa.w.ph $ac2, %[p4], %[filter45] \n\t"
- "extp %[Temp3], $ac2, 31 \n\t"
-
- "lbux %[Temp1], %[p3](%[cm]) "
- "\n\t"
-
- /* odd 1. pixel */
- "mtlo %[vector4a], $ac1 \n\t"
- "mthi $zero, $ac1 \n\t"
- "preceu.ph.qbr %[p1], %[tp2] \n\t"
- "preceu.ph.qbl %[p2], %[tp2] \n\t"
- "preceu.ph.qbr %[p3], %[tp3] \n\t"
- "preceu.ph.qbl %[p4], %[tp3] \n\t"
- "sb %[Temp1], 0(%[dst_ptr]) \n\t"
- "addu %[dst_ptr], %[dst_ptr], %[dst_pitch_2] \n\t"
-
- "dpa.w.ph $ac3, %[p1], %[filter45] \n\t"
- "extp %[Temp2], $ac3, 31 \n\t"
-
- /* odd 2. pixel */
- "lbux %[tp1], %[Temp3](%[cm]) \n\t"
- "mtlo %[vector4a], $ac3 \n\t"
- "mthi $zero, $ac3 \n\t"
- "mtlo %[vector4a], $ac2 \n\t"
- "mthi $zero, $ac2 \n\t"
- "dpa.w.ph $ac1, %[p2], %[filter45] \n\t"
- "sb %[tp1], 0(%[dst_ptr]) \n\t"
- "addu %[dst_ptr], %[dst_ptr], %[dst_pitch_2] \n\t"
- "extp %[Temp3], $ac1, 31 \n\t"
-
- /* odd 3. pixel */
- "lbux %[tp3], %[Temp2](%[cm]) \n\t"
- "dpa.w.ph $ac3, %[p3], %[filter45] \n\t"
- "extp %[Temp2], $ac3, 31 \n\t"
-
- /* odd 4. pixel */
- "sb %[tp3], 0(%[odd_dst]) \n\t"
- "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] \n\t"
- "dpa.w.ph $ac2, %[p4], %[filter45] \n\t"
- "extp %[Temp1], $ac2, 31 \n\t"
-
- /* clamp */
- "lbux %[p4], %[Temp3](%[cm]) \n\t"
- "lbux %[p2], %[Temp2](%[cm]) \n\t"
- "lbux %[p1], %[Temp1](%[cm]) \n\t"
-
- /* store bytes */
- "sb %[p4], 0(%[odd_dst]) \n\t"
- "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] \n\t"
-
- "sb %[p2], 0(%[odd_dst]) \n\t"
- "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] \n\t"
-
- "sb %[p1], 0(%[odd_dst]) \n\t"
-
- : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tp3] "=&r"(tp3), [p1] "=&r"(p1),
- [p2] "=&r"(p2), [p3] "=&r"(p3), [p4] "=&r"(p4), [Temp1] "=&r"(Temp1),
- [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3), [dst_ptr] "+r"(dst_ptr),
- [odd_dst] "+r"(odd_dst)
- : [filter45] "r"(filter45), [vector4a] "r"(vector4a), [cm] "r"(cm),
- [src] "r"(src), [dst_pitch_2] "r"(dst_pitch_2));
-
- /* Next row... */
- src += src_stride;
- dst += 1;
- }
-}
-
-static void convolve_bi_horiz_16_transposed_dspr2(
- const uint8_t *src_ptr, int32_t src_stride, uint8_t *dst_ptr,
- int32_t dst_stride, const int16_t *filter_x0, int32_t h, int32_t count) {
- int32_t c, y;
- const uint8_t *src;
- uint8_t *dst;
- uint8_t *cm = aom_ff_cropTbl;
- uint32_t vector_64 = 64;
- int32_t Temp1, Temp2, Temp3;
- uint32_t qload1, qload2;
- uint32_t p1, p2, p3, p4, p5;
- uint32_t st1, st2, st3;
- uint32_t dst_pitch_2 = (dst_stride << 1);
- uint8_t *odd_dst;
- const int16_t *filter = &filter_x0[3];
- uint32_t filter45;
-
- filter45 = ((const int32_t *)filter)[0];
-
- for (y = h; y--;) {
- /* prefetch data to cache memory */
- prefetch_load(src_ptr + src_stride);
- prefetch_load(src_ptr + src_stride + 32);
-
- src = src_ptr;
- dst = dst_ptr;
-
- odd_dst = (dst + dst_stride);
-
- for (c = 0; c < count; c++) {
- __asm__ __volatile__(
- "ulw %[qload1], 0(%[src]) "
- "\n\t"
- "ulw %[qload2], 4(%[src]) "
- "\n\t"
-
- /* even 1. pixel */
- "mtlo %[vector_64], $ac1 "
- "\n\t" /* even 1 */
- "mthi $zero, $ac1 "
- "\n\t"
- "mtlo %[vector_64], $ac2 "
- "\n\t" /* even 2 */
- "mthi $zero, $ac2 "
- "\n\t"
- "preceu.ph.qbr %[p1], %[qload1] "
- "\n\t"
- "preceu.ph.qbl %[p2], %[qload1] "
- "\n\t"
- "preceu.ph.qbr %[p3], %[qload2] "
- "\n\t"
- "preceu.ph.qbl %[p4], %[qload2] "
- "\n\t"
- "ulw %[qload1], 8(%[src]) "
- "\n\t"
- "dpa.w.ph $ac1, %[p1], %[filter45] "
- "\n\t" /* even 1 */
- "extp %[Temp1], $ac1, 31 "
- "\n\t" /* even 1 */
-
- /* even 2. pixel */
- "mtlo %[vector_64], $ac3 "
- "\n\t" /* even 3 */
- "mthi $zero, $ac3 "
- "\n\t"
- "preceu.ph.qbr %[p1], %[qload1] "
- "\n\t"
- "preceu.ph.qbl %[p5], %[qload1] "
- "\n\t"
- "ulw %[qload2], 12(%[src]) "
- "\n\t"
- "dpa.w.ph $ac2, %[p2], %[filter45] "
- "\n\t" /* even 1 */
- "lbux %[st1], %[Temp1](%[cm]) "
- "\n\t" /* even 1 */
- "extp %[Temp2], $ac2, 31 "
- "\n\t" /* even 1 */
-
- /* even 3. pixel */
- "mtlo %[vector_64], $ac1 "
- "\n\t" /* even 4 */
- "mthi $zero, $ac1 "
- "\n\t"
- "preceu.ph.qbr %[p2], %[qload2] "
- "\n\t"
- "sb %[st1], 0(%[dst]) "
- "\n\t" /* even 1 */
- "addu %[dst], %[dst], %[dst_pitch_2] "
- " \n\t"
- "dpa.w.ph $ac3, %[p3], %[filter45] "
- "\n\t" /* even 3 */
- "extp %[Temp3], $ac3, 31 "
- "\n\t" /* even 3 */
- "lbux %[st2], %[Temp2](%[cm]) "
- "\n\t" /* even 1 */
-
- /* even 4. pixel */
- "mtlo %[vector_64], $ac2 "
- "\n\t" /* even 5 */
- "mthi $zero, $ac2 "
- "\n\t"
- "preceu.ph.qbl %[p3], %[qload2] "
- "\n\t"
- "sb %[st2], 0(%[dst]) "
- "\n\t" /* even 2 */
- "addu %[dst], %[dst], %[dst_pitch_2] "
- "\n\t"
- "dpa.w.ph $ac1, %[p4], %[filter45] "
- "\n\t" /* even 4 */
- "extp %[Temp1], $ac1, 31 "
- "\n\t" /* even 4 */
- "lbux %[st3], %[Temp3](%[cm]) "
- "\n\t" /* even 3 */
-
- /* even 5. pixel */
- "mtlo %[vector_64], $ac3 "
- "\n\t" /* even 6 */
- "mthi $zero, $ac3 "
- "\n\t"
- "sb %[st3], 0(%[dst]) "
- "\n\t" /* even 3 */
- "addu %[dst], %[dst], %[dst_pitch_2] "
- "\n\t"
- "dpa.w.ph $ac2, %[p1], %[filter45] "
- "\n\t" /* even 5 */
- "extp %[Temp2], $ac2, 31 "
- "\n\t" /* even 5 */
- "lbux %[st1], %[Temp1](%[cm]) "
- "\n\t" /* even 4 */
-
- /* even 6. pixel */
- "mtlo %[vector_64], $ac1 "
- "\n\t" /* even 7 */
- "mthi $zero, $ac1 "
- "\n\t"
- "sb %[st1], 0(%[dst]) "
- "\n\t" /* even 4 */
- "addu %[dst], %[dst], %[dst_pitch_2] "
- "\n\t"
- "ulw %[qload1], 20(%[src]) "
- "\n\t"
- "dpa.w.ph $ac3, %[p5], %[filter45] "
- "\n\t" /* even 6 */
- "extp %[Temp3], $ac3, 31 "
- "\n\t" /* even 6 */
- "lbux %[st2], %[Temp2](%[cm]) "
- "\n\t" /* even 5 */
-
- /* even 7. pixel */
- "mtlo %[vector_64], $ac2 "
- "\n\t" /* even 8 */
- "mthi $zero, $ac2 "
- "\n\t"
- "preceu.ph.qbr %[p5], %[qload1] "
- "\n\t"
- "sb %[st2], 0(%[dst]) "
- "\n\t" /* even 5 */
- "addu %[dst], %[dst], %[dst_pitch_2] "
- "\n\t"
- "dpa.w.ph $ac1, %[p2], %[filter45] "
- "\n\t" /* even 7 */
- "extp %[Temp1], $ac1, 31 "
- "\n\t" /* even 7 */
- "lbux %[st3], %[Temp3](%[cm]) "
- "\n\t" /* even 6 */
-
- /* even 8. pixel */
- "mtlo %[vector_64], $ac3 "
- "\n\t" /* odd 1 */
- "mthi $zero, $ac3 "
- "\n\t"
- "dpa.w.ph $ac2, %[p3], %[filter45] "
- "\n\t" /* even 8 */
- "sb %[st3], 0(%[dst]) "
- "\n\t" /* even 6 */
- "addu %[dst], %[dst], %[dst_pitch_2] "
- "\n\t"
- "extp %[Temp2], $ac2, 31 "
- "\n\t" /* even 8 */
- "lbux %[st1], %[Temp1](%[cm]) "
- "\n\t" /* even 7 */
-
- /* ODD pixels */
- "ulw %[qload1], 1(%[src]) "
- "\n\t"
- "ulw %[qload2], 5(%[src]) "
- "\n\t"
-
- /* odd 1. pixel */
- "mtlo %[vector_64], $ac1 "
- "\n\t" /* odd 2 */
- "mthi $zero, $ac1 "
- "\n\t"
- "preceu.ph.qbr %[p1], %[qload1] "
- "\n\t"
- "preceu.ph.qbl %[p2], %[qload1] "
- "\n\t"
- "preceu.ph.qbr %[p3], %[qload2] "
- "\n\t"
- "preceu.ph.qbl %[p4], %[qload2] "
- "\n\t"
- "sb %[st1], 0(%[dst]) "
- "\n\t" /* even 7 */
- "addu %[dst], %[dst], %[dst_pitch_2] "
- "\n\t"
- "ulw %[qload2], 9(%[src]) "
- "\n\t"
- "dpa.w.ph $ac3, %[p1], %[filter45] "
- "\n\t" /* odd 1 */
- "extp %[Temp3], $ac3, 31 "
- "\n\t" /* odd 1 */
- "lbux %[st2], %[Temp2](%[cm]) "
- "\n\t" /* even 8 */
-
- /* odd 2. pixel */
- "mtlo %[vector_64], $ac2 "
- "\n\t" /* odd 3 */
- "mthi $zero, $ac2 "
- "\n\t"
- "preceu.ph.qbr %[p1], %[qload2] "
- "\n\t"
- "preceu.ph.qbl %[p5], %[qload2] "
- "\n\t"
- "sb %[st2], 0(%[dst]) "
- "\n\t" /* even 8 */
- "ulw %[qload1], 13(%[src]) "
- "\n\t"
- "dpa.w.ph $ac1, %[p2], %[filter45] "
- "\n\t" /* odd 2 */
- "extp %[Temp1], $ac1, 31 "
- "\n\t" /* odd 2 */
- "lbux %[st3], %[Temp3](%[cm]) "
- "\n\t" /* odd 1 */
-
- /* odd 3. pixel */
- "mtlo %[vector_64], $ac3 "
- "\n\t" /* odd 4 */
- "mthi $zero, $ac3 "
- "\n\t"
- "preceu.ph.qbr %[p2], %[qload1] "
- "\n\t"
- "sb %[st3], 0(%[odd_dst]) "
- "\n\t" /* odd 1 */
- "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] "
- "\n\t"
- "dpa.w.ph $ac2, %[p3], %[filter45] "
- "\n\t" /* odd 3 */
- "extp %[Temp2], $ac2, 31 "
- "\n\t" /* odd 3 */
- "lbux %[st1], %[Temp1](%[cm]) "
- "\n\t" /* odd 2 */
-
- /* odd 4. pixel */
- "mtlo %[vector_64], $ac1 "
- "\n\t" /* odd 5 */
- "mthi $zero, $ac1 "
- "\n\t"
- "preceu.ph.qbl %[p3], %[qload1] "
- "\n\t"
- "sb %[st1], 0(%[odd_dst]) "
- "\n\t" /* odd 2 */
- "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] "
- "\n\t"
- "dpa.w.ph $ac3, %[p4], %[filter45] "
- "\n\t" /* odd 4 */
- "extp %[Temp3], $ac3, 31 "
- "\n\t" /* odd 4 */
- "lbux %[st2], %[Temp2](%[cm]) "
- "\n\t" /* odd 3 */
-
- /* odd 5. pixel */
- "mtlo %[vector_64], $ac2 "
- "\n\t" /* odd 6 */
- "mthi $zero, $ac2 "
- "\n\t"
- "sb %[st2], 0(%[odd_dst]) "
- "\n\t" /* odd 3 */
- "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] "
- "\n\t"
- "dpa.w.ph $ac1, %[p1], %[filter45] "
- "\n\t" /* odd 5 */
- "extp %[Temp1], $ac1, 31 "
- "\n\t" /* odd 5 */
- "lbux %[st3], %[Temp3](%[cm]) "
- "\n\t" /* odd 4 */
-
- /* odd 6. pixel */
- "mtlo %[vector_64], $ac3 "
- "\n\t" /* odd 7 */
- "mthi $zero, $ac3 "
- "\n\t"
- "sb %[st3], 0(%[odd_dst]) "
- "\n\t" /* odd 4 */
- "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] "
- "\n\t"
- "ulw %[qload1], 21(%[src]) "
- "\n\t"
- "dpa.w.ph $ac2, %[p5], %[filter45] "
- "\n\t" /* odd 6 */
- "extp %[Temp2], $ac2, 31 "
- "\n\t" /* odd 6 */
- "lbux %[st1], %[Temp1](%[cm]) "
- "\n\t" /* odd 5 */
-
- /* odd 7. pixel */
- "mtlo %[vector_64], $ac1 "
- "\n\t" /* odd 8 */
- "mthi $zero, $ac1 "
- "\n\t"
- "preceu.ph.qbr %[p5], %[qload1] "
- "\n\t"
- "sb %[st1], 0(%[odd_dst]) "
- "\n\t" /* odd 5 */
- "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] "
- "\n\t"
- "dpa.w.ph $ac3, %[p2], %[filter45] "
- "\n\t" /* odd 7 */
- "extp %[Temp3], $ac3, 31 "
- "\n\t" /* odd 7 */
-
- /* odd 8. pixel */
- "dpa.w.ph $ac1, %[p3], %[filter45] "
- "\n\t" /* odd 8 */
- "extp %[Temp1], $ac1, 31 "
- "\n\t" /* odd 8 */
-
- "lbux %[st2], %[Temp2](%[cm]) "
- "\n\t" /* odd 6 */
- "lbux %[st3], %[Temp3](%[cm]) "
- "\n\t" /* odd 7 */
- "lbux %[st1], %[Temp1](%[cm]) "
- "\n\t" /* odd 8 */
-
- "sb %[st2], 0(%[odd_dst]) "
- "\n\t" /* odd 6 */
- "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] "
- "\n\t"
-
- "sb %[st3], 0(%[odd_dst]) "
- "\n\t" /* odd 7 */
- "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] "
- "\n\t"
-
- "sb %[st1], 0(%[odd_dst]) "
- "\n\t" /* odd 8 */
-
- : [qload1] "=&r"(qload1), [qload2] "=&r"(qload2), [p5] "=&r"(p5),
- [st1] "=&r"(st1), [st2] "=&r"(st2), [st3] "=&r"(st3),
- [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3), [p4] "=&r"(p4),
- [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3),
- [dst] "+r"(dst), [odd_dst] "+r"(odd_dst)
- : [filter45] "r"(filter45), [vector_64] "r"(vector_64), [cm] "r"(cm),
- [src] "r"(src), [dst_pitch_2] "r"(dst_pitch_2));
-
- src += 16;
- dst = (dst_ptr + ((c + 1) * 16 * dst_stride));
- odd_dst = (dst + dst_stride);
- }
-
- /* Next row... */
- src_ptr += src_stride;
- dst_ptr += 1;
- }
-}
-
-static void convolve_bi_horiz_64_transposed_dspr2(
- const uint8_t *src_ptr, int32_t src_stride, uint8_t *dst_ptr,
- int32_t dst_stride, const int16_t *filter_x0, int32_t h) {
- int32_t c, y;
- const uint8_t *src;
- uint8_t *dst;
- uint8_t *cm = aom_ff_cropTbl;
- uint32_t vector_64 = 64;
- int32_t Temp1, Temp2, Temp3;
- uint32_t qload1, qload2;
- uint32_t p1, p2, p3, p4, p5;
- uint32_t st1, st2, st3;
- uint32_t dst_pitch_2 = (dst_stride << 1);
- uint8_t *odd_dst;
- const int16_t *filter = &filter_x0[3];
- uint32_t filter45;
-
- filter45 = ((const int32_t *)filter)[0];
-
- for (y = h; y--;) {
- /* prefetch data to cache memory */
- prefetch_load(src_ptr + src_stride);
- prefetch_load(src_ptr + src_stride + 32);
- prefetch_load(src_ptr + src_stride + 64);
-
- src = src_ptr;
- dst = dst_ptr;
-
- odd_dst = (dst + dst_stride);
-
- for (c = 0; c < 4; c++) {
- __asm__ __volatile__(
- "ulw %[qload1], 0(%[src]) "
- "\n\t"
- "ulw %[qload2], 4(%[src]) "
- "\n\t"
-
- /* even 1. pixel */
- "mtlo %[vector_64], $ac1 "
- "\n\t" /* even 1 */
- "mthi $zero, $ac1 "
- "\n\t"
- "mtlo %[vector_64], $ac2 "
- "\n\t" /* even 2 */
- "mthi $zero, $ac2 "
- "\n\t"
- "preceu.ph.qbr %[p1], %[qload1] "
- "\n\t"
- "preceu.ph.qbl %[p2], %[qload1] "
- "\n\t"
- "preceu.ph.qbr %[p3], %[qload2] "
- "\n\t"
- "preceu.ph.qbl %[p4], %[qload2] "
- "\n\t"
- "ulw %[qload1], 8(%[src]) "
- "\n\t"
- "dpa.w.ph $ac1, %[p1], %[filter45] "
- "\n\t" /* even 1 */
- "extp %[Temp1], $ac1, 31 "
- "\n\t" /* even 1 */
-
- /* even 2. pixel */
- "mtlo %[vector_64], $ac3 "
- "\n\t" /* even 3 */
- "mthi $zero, $ac3 "
- "\n\t"
- "preceu.ph.qbr %[p1], %[qload1] "
- "\n\t"
- "preceu.ph.qbl %[p5], %[qload1] "
- "\n\t"
- "ulw %[qload2], 12(%[src]) "
- "\n\t"
- "dpa.w.ph $ac2, %[p2], %[filter45] "
- "\n\t" /* even 1 */
- "lbux %[st1], %[Temp1](%[cm]) "
- "\n\t" /* even 1 */
- "extp %[Temp2], $ac2, 31 "
- "\n\t" /* even 1 */
-
- /* even 3. pixel */
- "mtlo %[vector_64], $ac1 "
- "\n\t" /* even 4 */
- "mthi $zero, $ac1 "
- "\n\t"
- "preceu.ph.qbr %[p2], %[qload2] "
- "\n\t"
- "sb %[st1], 0(%[dst]) "
- "\n\t" /* even 1 */
- "addu %[dst], %[dst], %[dst_pitch_2] "
- " \n\t"
- "dpa.w.ph $ac3, %[p3], %[filter45] "
- "\n\t" /* even 3 */
- "extp %[Temp3], $ac3, 31 "
- "\n\t" /* even 3 */
- "lbux %[st2], %[Temp2](%[cm]) "
- "\n\t" /* even 1 */
-
- /* even 4. pixel */
- "mtlo %[vector_64], $ac2 "
- "\n\t" /* even 5 */
- "mthi $zero, $ac2 "
- "\n\t"
- "preceu.ph.qbl %[p3], %[qload2] "
- "\n\t"
- "sb %[st2], 0(%[dst]) "
- "\n\t" /* even 2 */
- "addu %[dst], %[dst], %[dst_pitch_2] "
- "\n\t"
- "dpa.w.ph $ac1, %[p4], %[filter45] "
- "\n\t" /* even 4 */
- "extp %[Temp1], $ac1, 31 "
- "\n\t" /* even 4 */
- "lbux %[st3], %[Temp3](%[cm]) "
- "\n\t" /* even 3 */
-
- /* even 5. pixel */
- "mtlo %[vector_64], $ac3 "
- "\n\t" /* even 6 */
- "mthi $zero, $ac3 "
- "\n\t"
- "sb %[st3], 0(%[dst]) "
- "\n\t" /* even 3 */
- "addu %[dst], %[dst], %[dst_pitch_2] "
- "\n\t"
- "dpa.w.ph $ac2, %[p1], %[filter45] "
- "\n\t" /* even 5 */
- "extp %[Temp2], $ac2, 31 "
- "\n\t" /* even 5 */
- "lbux %[st1], %[Temp1](%[cm]) "
- "\n\t" /* even 4 */
-
- /* even 6. pixel */
- "mtlo %[vector_64], $ac1 "
- "\n\t" /* even 7 */
- "mthi $zero, $ac1 "
- "\n\t"
- "sb %[st1], 0(%[dst]) "
- "\n\t" /* even 4 */
- "addu %[dst], %[dst], %[dst_pitch_2] "
- "\n\t"
- "ulw %[qload1], 20(%[src]) "
- "\n\t"
- "dpa.w.ph $ac3, %[p5], %[filter45] "
- "\n\t" /* even 6 */
- "extp %[Temp3], $ac3, 31 "
- "\n\t" /* even 6 */
- "lbux %[st2], %[Temp2](%[cm]) "
- "\n\t" /* even 5 */
-
- /* even 7. pixel */
- "mtlo %[vector_64], $ac2 "
- "\n\t" /* even 8 */
- "mthi $zero, $ac2 "
- "\n\t"
- "preceu.ph.qbr %[p5], %[qload1] "
- "\n\t"
- "sb %[st2], 0(%[dst]) "
- "\n\t" /* even 5 */
- "addu %[dst], %[dst], %[dst_pitch_2] "
- "\n\t"
- "dpa.w.ph $ac1, %[p2], %[filter45] "
- "\n\t" /* even 7 */
- "extp %[Temp1], $ac1, 31 "
- "\n\t" /* even 7 */
- "lbux %[st3], %[Temp3](%[cm]) "
- "\n\t" /* even 6 */
-
- /* even 8. pixel */
- "mtlo %[vector_64], $ac3 "
- "\n\t" /* odd 1 */
- "mthi $zero, $ac3 "
- "\n\t"
- "dpa.w.ph $ac2, %[p3], %[filter45] "
- "\n\t" /* even 8 */
- "sb %[st3], 0(%[dst]) "
- "\n\t" /* even 6 */
- "addu %[dst], %[dst], %[dst_pitch_2] "
- "\n\t"
- "extp %[Temp2], $ac2, 31 "
- "\n\t" /* even 8 */
- "lbux %[st1], %[Temp1](%[cm]) "
- "\n\t" /* even 7 */
-
- /* ODD pixels */
- "ulw %[qload1], 1(%[src]) "
- "\n\t"
- "ulw %[qload2], 5(%[src]) "
- "\n\t"
-
- /* odd 1. pixel */
- "mtlo %[vector_64], $ac1 "
- "\n\t" /* odd 2 */
- "mthi $zero, $ac1 "
- "\n\t"
- "preceu.ph.qbr %[p1], %[qload1] "
- "\n\t"
- "preceu.ph.qbl %[p2], %[qload1] "
- "\n\t"
- "preceu.ph.qbr %[p3], %[qload2] "
- "\n\t"
- "preceu.ph.qbl %[p4], %[qload2] "
- "\n\t"
- "sb %[st1], 0(%[dst]) "
- "\n\t" /* even 7 */
- "addu %[dst], %[dst], %[dst_pitch_2] "
- "\n\t"
- "ulw %[qload2], 9(%[src]) "
- "\n\t"
- "dpa.w.ph $ac3, %[p1], %[filter45] "
- "\n\t" /* odd 1 */
- "extp %[Temp3], $ac3, 31 "
- "\n\t" /* odd 1 */
- "lbux %[st2], %[Temp2](%[cm]) "
- "\n\t" /* even 8 */
-
- /* odd 2. pixel */
- "mtlo %[vector_64], $ac2 "
- "\n\t" /* odd 3 */
- "mthi $zero, $ac2 "
- "\n\t"
- "preceu.ph.qbr %[p1], %[qload2] "
- "\n\t"
- "preceu.ph.qbl %[p5], %[qload2] "
- "\n\t"
- "sb %[st2], 0(%[dst]) "
- "\n\t" /* even 8 */
- "ulw %[qload1], 13(%[src]) "
- "\n\t"
- "dpa.w.ph $ac1, %[p2], %[filter45] "
- "\n\t" /* odd 2 */
- "extp %[Temp1], $ac1, 31 "
- "\n\t" /* odd 2 */
- "lbux %[st3], %[Temp3](%[cm]) "
- "\n\t" /* odd 1 */
-
- /* odd 3. pixel */
- "mtlo %[vector_64], $ac3 "
- "\n\t" /* odd 4 */
- "mthi $zero, $ac3 "
- "\n\t"
- "preceu.ph.qbr %[p2], %[qload1] "
- "\n\t"
- "sb %[st3], 0(%[odd_dst]) "
- "\n\t" /* odd 1 */
- "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] "
- "\n\t"
- "dpa.w.ph $ac2, %[p3], %[filter45] "
- "\n\t" /* odd 3 */
- "extp %[Temp2], $ac2, 31 "
- "\n\t" /* odd 3 */
- "lbux %[st1], %[Temp1](%[cm]) "
- "\n\t" /* odd 2 */
-
- /* odd 4. pixel */
- "mtlo %[vector_64], $ac1 "
- "\n\t" /* odd 5 */
- "mthi $zero, $ac1 "
- "\n\t"
- "preceu.ph.qbl %[p3], %[qload1] "
- "\n\t"
- "sb %[st1], 0(%[odd_dst]) "
- "\n\t" /* odd 2 */
- "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] "
- "\n\t"
- "dpa.w.ph $ac3, %[p4], %[filter45] "
- "\n\t" /* odd 4 */
- "extp %[Temp3], $ac3, 31 "
- "\n\t" /* odd 4 */
- "lbux %[st2], %[Temp2](%[cm]) "
- "\n\t" /* odd 3 */
-
- /* odd 5. pixel */
- "mtlo %[vector_64], $ac2 "
- "\n\t" /* odd 6 */
- "mthi $zero, $ac2 "
- "\n\t"
- "sb %[st2], 0(%[odd_dst]) "
- "\n\t" /* odd 3 */
- "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] "
- "\n\t"
- "dpa.w.ph $ac1, %[p1], %[filter45] "
- "\n\t" /* odd 5 */
- "extp %[Temp1], $ac1, 31 "
- "\n\t" /* odd 5 */
- "lbux %[st3], %[Temp3](%[cm]) "
- "\n\t" /* odd 4 */
-
- /* odd 6. pixel */
- "mtlo %[vector_64], $ac3 "
- "\n\t" /* odd 7 */
- "mthi $zero, $ac3 "
- "\n\t"
- "sb %[st3], 0(%[odd_dst]) "
- "\n\t" /* odd 4 */
- "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] "
- "\n\t"
- "ulw %[qload1], 21(%[src]) "
- "\n\t"
- "dpa.w.ph $ac2, %[p5], %[filter45] "
- "\n\t" /* odd 6 */
- "extp %[Temp2], $ac2, 31 "
- "\n\t" /* odd 6 */
- "lbux %[st1], %[Temp1](%[cm]) "
- "\n\t" /* odd 5 */
-
- /* odd 7. pixel */
- "mtlo %[vector_64], $ac1 "
- "\n\t" /* odd 8 */
- "mthi $zero, $ac1 "
- "\n\t"
- "preceu.ph.qbr %[p5], %[qload1] "
- "\n\t"
- "sb %[st1], 0(%[odd_dst]) "
- "\n\t" /* odd 5 */
- "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] "
- "\n\t"
- "dpa.w.ph $ac3, %[p2], %[filter45] "
- "\n\t" /* odd 7 */
- "extp %[Temp3], $ac3, 31 "
- "\n\t" /* odd 7 */
-
- /* odd 8. pixel */
- "dpa.w.ph $ac1, %[p3], %[filter45] "
- "\n\t" /* odd 8 */
- "extp %[Temp1], $ac1, 31 "
- "\n\t" /* odd 8 */
-
- "lbux %[st2], %[Temp2](%[cm]) "
- "\n\t" /* odd 6 */
- "lbux %[st3], %[Temp3](%[cm]) "
- "\n\t" /* odd 7 */
- "lbux %[st1], %[Temp1](%[cm]) "
- "\n\t" /* odd 8 */
-
- "sb %[st2], 0(%[odd_dst]) "
- "\n\t" /* odd 6 */
- "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] "
- "\n\t"
-
- "sb %[st3], 0(%[odd_dst]) "
- "\n\t" /* odd 7 */
- "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] "
- "\n\t"
-
- "sb %[st1], 0(%[odd_dst]) "
- "\n\t" /* odd 8 */
-
- : [qload1] "=&r"(qload1), [qload2] "=&r"(qload2), [p5] "=&r"(p5),
- [st1] "=&r"(st1), [st2] "=&r"(st2), [st3] "=&r"(st3),
- [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3), [p4] "=&r"(p4),
- [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3),
- [dst] "+r"(dst), [odd_dst] "+r"(odd_dst)
- : [filter45] "r"(filter45), [vector_64] "r"(vector_64), [cm] "r"(cm),
- [src] "r"(src), [dst_pitch_2] "r"(dst_pitch_2));
-
- src += 16;
- dst = (dst_ptr + ((c + 1) * 16 * dst_stride));
- odd_dst = (dst + dst_stride);
- }
-
- /* Next row... */
- src_ptr += src_stride;
- dst_ptr += 1;
- }
-}
-
-void convolve_bi_horiz_transposed(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter, int w, int h) {
- int x, y;
-
- for (y = 0; y < h; ++y) {
- for (x = 0; x < w; ++x) {
- int sum = 0;
-
- sum += src[x] * filter[3];
- sum += src[x + 1] * filter[4];
-
- dst[x * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
- }
-
- src += src_stride;
- dst += 1;
- }
-}
-
-void aom_convolve2_dspr2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
- ptrdiff_t dst_stride, const int16_t *filter, int w,
- int h) {
- uint32_t pos = 38;
-
- /* bit positon for extract from acc */
- __asm__ __volatile__("wrdsp %[pos], 1 \n\t"
- :
- : [pos] "r"(pos));
-
- /* prefetch data to cache memory */
- prefetch_load(src);
- prefetch_load(src + 32);
-
- switch (w) {
- case 4:
- convolve_bi_horiz_4_transposed_dspr2(src, src_stride, dst, dst_stride,
- filter, h);
- break;
- case 8:
- convolve_bi_horiz_8_transposed_dspr2(src, src_stride, dst, dst_stride,
- filter, h);
- break;
- case 16:
- case 32:
- convolve_bi_horiz_16_transposed_dspr2(src, src_stride, dst, dst_stride,
- filter, h, (w / 16));
- break;
- case 64:
- prefetch_load(src + 32);
- convolve_bi_horiz_64_transposed_dspr2(src, src_stride, dst, dst_stride,
- filter, h);
- break;
- default:
- convolve_bi_horiz_transposed(src, src_stride, dst, dst_stride, filter, w,
- h);
- break;
- }
-}
-#endif
diff --git a/third_party/aom/aom_dsp/mips/convolve2_horiz_dspr2.c b/third_party/aom/aom_dsp/mips/convolve2_horiz_dspr2.c
deleted file mode 100644
index 097da73ca..000000000
--- a/third_party/aom/aom_dsp/mips/convolve2_horiz_dspr2.c
+++ /dev/null
@@ -1,681 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <stdio.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/mips/convolve_common_dspr2.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_ports/mem.h"
-
-#if HAVE_DSPR2
-static void convolve_bi_horiz_4_dspr2(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride,
- const int16_t *filter_x0, int32_t h) {
- int32_t y;
- uint8_t *cm = aom_ff_cropTbl;
- int32_t Temp1, Temp2, Temp3, Temp4;
- uint32_t vector4a = 64;
- uint32_t tp1, tp2;
- uint32_t p1, p2;
- const int16_t *filter = &filter_x0[3];
- uint32_t filter45;
-
- filter45 = ((const int32_t *)filter)[0];
-
- for (y = h; y--;) {
- /* prefetch data to cache memory */
- prefetch_load(src + src_stride);
- prefetch_load(src + src_stride + 32);
- prefetch_store(dst + dst_stride);
-
- __asm__ __volatile__(
- "ulw %[tp1], 0(%[src]) \n\t"
- "ulw %[tp2], 4(%[src]) \n\t"
-
- /* even 1. pixel */
- "mtlo %[vector4a], $ac3 \n\t"
- "mthi $zero, $ac3 \n\t"
- "preceu.ph.qbr %[p1], %[tp1] \n\t"
- "preceu.ph.qbl %[p2], %[tp1] \n\t"
- "dpa.w.ph $ac3, %[p1], %[filter45] \n\t"
- "extp %[Temp1], $ac3, 31 \n\t"
-
- /* even 2. pixel */
- "mtlo %[vector4a], $ac2 \n\t"
- "mthi $zero, $ac2 \n\t"
- "balign %[tp2], %[tp1], 3 \n\t"
- "dpa.w.ph $ac2, %[p2], %[filter45] \n\t"
- "extp %[Temp3], $ac2, 31 \n\t"
-
- /* odd 1. pixel */
- "lbux %[tp1], %[Temp1](%[cm]) \n\t"
- "mtlo %[vector4a], $ac3 \n\t"
- "mthi $zero, $ac3 \n\t"
- "preceu.ph.qbr %[p1], %[tp2] \n\t"
- "preceu.ph.qbl %[p2], %[tp2] \n\t"
- "dpa.w.ph $ac3, %[p1], %[filter45] \n\t"
- "extp %[Temp2], $ac3, 31 \n\t"
-
- /* odd 2. pixel */
- "lbux %[tp2], %[Temp3](%[cm]) \n\t"
- "mtlo %[vector4a], $ac2 \n\t"
- "mthi $zero, $ac2 \n\t"
- "dpa.w.ph $ac2, %[p2], %[filter45] \n\t"
- "extp %[Temp4], $ac2, 31 \n\t"
-
- /* clamp */
- "lbux %[p1], %[Temp2](%[cm]) \n\t"
- "lbux %[p2], %[Temp4](%[cm]) \n\t"
-
- /* store bytes */
- "sb %[tp1], 0(%[dst]) \n\t"
- "sb %[p1], 1(%[dst]) \n\t"
- "sb %[tp2], 2(%[dst]) \n\t"
- "sb %[p2], 3(%[dst]) \n\t"
-
- : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [p1] "=&r"(p1), [p2] "=&r"(p2),
- [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3),
- [Temp4] "=&r"(Temp4)
- : [filter45] "r"(filter45), [vector4a] "r"(vector4a), [cm] "r"(cm),
- [dst] "r"(dst), [src] "r"(src));
-
- /* Next row... */
- src += src_stride;
- dst += dst_stride;
- }
-}
-
-static void convolve_bi_horiz_8_dspr2(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride,
- const int16_t *filter_x0, int32_t h) {
- int32_t y;
- uint8_t *cm = aom_ff_cropTbl;
- uint32_t vector4a = 64;
- int32_t Temp1, Temp2, Temp3;
- uint32_t tp1, tp2, tp3;
- uint32_t p1, p2, p3, p4;
- uint32_t st0, st1;
- const int16_t *filter = &filter_x0[3];
- uint32_t filter45;
-
- filter45 = ((const int32_t *)filter)[0];
-
- for (y = h; y--;) {
- /* prefetch data to cache memory */
- prefetch_load(src + src_stride);
- prefetch_load(src + src_stride + 32);
- prefetch_store(dst + dst_stride);
-
- __asm__ __volatile__(
- "ulw %[tp1], 0(%[src]) \n\t"
- "ulw %[tp2], 4(%[src]) \n\t"
-
- /* even 1. pixel */
- "mtlo %[vector4a], $ac3 \n\t"
- "mthi $zero, $ac3 \n\t"
- "mtlo %[vector4a], $ac2 \n\t"
- "mthi $zero, $ac2 \n\t"
- "preceu.ph.qbr %[p1], %[tp1] \n\t"
- "preceu.ph.qbl %[p2], %[tp1] \n\t"
- "preceu.ph.qbr %[p3], %[tp2] \n\t"
- "preceu.ph.qbl %[p4], %[tp2] \n\t"
- "ulw %[tp3], 8(%[src]) \n\t"
- "dpa.w.ph $ac3, %[p1], %[filter45] \n\t"
- "extp %[Temp1], $ac3, 31 \n\t"
-
- /* even 2. pixel */
- "dpa.w.ph $ac2, %[p2], %[filter45] \n\t"
- "extp %[Temp3], $ac2, 31 \n\t"
-
- /* even 3. pixel */
- "lbux %[st0], %[Temp1](%[cm]) \n\t"
- "mtlo %[vector4a], $ac1 \n\t"
- "mthi $zero, $ac1 \n\t"
- "dpa.w.ph $ac1, %[p3], %[filter45] \n\t"
- "extp %[Temp1], $ac1, 31 \n\t"
-
- /* even 4. pixel */
- "mtlo %[vector4a], $ac2 \n\t"
- "mthi $zero, $ac2 \n\t"
- "mtlo %[vector4a], $ac3 \n\t"
- "mthi $zero, $ac3 \n\t"
- "sb %[st0], 0(%[dst]) \n\t"
- "lbux %[st1], %[Temp3](%[cm]) \n\t"
-
- "balign %[tp3], %[tp2], 3 \n\t"
- "balign %[tp2], %[tp1], 3 \n\t"
-
- "dpa.w.ph $ac2, %[p4], %[filter45] \n\t"
- "extp %[Temp3], $ac2, 31 \n\t"
-
- "lbux %[st0], %[Temp1](%[cm]) \n\t"
-
- /* odd 1. pixel */
- "mtlo %[vector4a], $ac1 \n\t"
- "mthi $zero, $ac1 \n\t"
- "sb %[st1], 2(%[dst]) \n\t"
- "preceu.ph.qbr %[p1], %[tp2] \n\t"
- "preceu.ph.qbl %[p2], %[tp2] \n\t"
- "preceu.ph.qbr %[p3], %[tp3] \n\t"
- "preceu.ph.qbl %[p4], %[tp3] \n\t"
- "sb %[st0], 4(%[dst]) \n\t"
- "dpa.w.ph $ac3, %[p1], %[filter45] \n\t"
- "extp %[Temp2], $ac3, 31 \n\t"
-
- /* odd 2. pixel */
- "mtlo %[vector4a], $ac3 \n\t"
- "mthi $zero, $ac3 \n\t"
- "mtlo %[vector4a], $ac2 \n\t"
- "mthi $zero, $ac2 \n\t"
- "lbux %[st0], %[Temp3](%[cm]) \n\t"
- "dpa.w.ph $ac1, %[p2], %[filter45] \n\t"
- "extp %[Temp3], $ac1, 31 \n\t"
-
- /* odd 3. pixel */
- "lbux %[st1], %[Temp2](%[cm]) \n\t"
- "dpa.w.ph $ac3, %[p3], %[filter45] \n\t"
- "extp %[Temp2], $ac3, 31 \n\t"
-
- /* odd 4. pixel */
- "sb %[st1], 1(%[dst]) \n\t"
- "sb %[st0], 6(%[dst]) \n\t"
- "dpa.w.ph $ac2, %[p4], %[filter45] \n\t"
- "extp %[Temp1], $ac2, 31 \n\t"
-
- /* clamp */
- "lbux %[p4], %[Temp3](%[cm]) \n\t"
- "lbux %[p2], %[Temp2](%[cm]) \n\t"
- "lbux %[p1], %[Temp1](%[cm]) \n\t"
-
- /* store bytes */
- "sb %[p4], 3(%[dst]) \n\t"
- "sb %[p2], 5(%[dst]) \n\t"
- "sb %[p1], 7(%[dst]) \n\t"
-
- : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tp3] "=&r"(tp3),
- [st0] "=&r"(st0), [st1] "=&r"(st1), [p1] "=&r"(p1), [p2] "=&r"(p2),
- [p3] "=&r"(p3), [p4] "=&r"(p4), [Temp1] "=&r"(Temp1),
- [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3)
- : [filter45] "r"(filter45), [vector4a] "r"(vector4a), [cm] "r"(cm),
- [dst] "r"(dst), [src] "r"(src));
-
- /* Next row... */
- src += src_stride;
- dst += dst_stride;
- }
-}
-
-static void convolve_bi_horiz_16_dspr2(const uint8_t *src_ptr,
- int32_t src_stride, uint8_t *dst_ptr,
- int32_t dst_stride,
- const int16_t *filter_x0, int32_t h,
- int32_t count) {
- int32_t y, c;
- const uint8_t *src;
- uint8_t *dst;
- uint8_t *cm = aom_ff_cropTbl;
- uint32_t vector_64 = 64;
- int32_t Temp1, Temp2, Temp3;
- uint32_t qload1, qload2, qload3;
- uint32_t p1, p2, p3, p4, p5;
- uint32_t st1, st2, st3;
- const int16_t *filter = &filter_x0[3];
- uint32_t filter45;
-
- filter45 = ((const int32_t *)filter)[0];
-
- for (y = h; y--;) {
- src = src_ptr;
- dst = dst_ptr;
-
- /* prefetch data to cache memory */
- prefetch_load(src_ptr + src_stride);
- prefetch_load(src_ptr + src_stride + 32);
- prefetch_store(dst_ptr + dst_stride);
-
- for (c = 0; c < count; c++) {
- __asm__ __volatile__(
- "ulw %[qload1], 0(%[src]) \n\t"
- "ulw %[qload2], 4(%[src]) \n\t"
-
- /* even 1. pixel */
- "mtlo %[vector_64], $ac1 \n\t" /* even 1 */
- "mthi $zero, $ac1 \n\t"
- "mtlo %[vector_64], $ac2 \n\t" /* even 2 */
- "mthi $zero, $ac2 \n\t"
- "preceu.ph.qbr %[p1], %[qload1] \n\t"
- "preceu.ph.qbl %[p2], %[qload1] \n\t"
- "preceu.ph.qbr %[p3], %[qload2] \n\t"
- "preceu.ph.qbl %[p4], %[qload2] \n\t"
- "ulw %[qload3], 8(%[src]) \n\t"
- "dpa.w.ph $ac1, %[p1], %[filter45] \n\t" /* even 1 */
- "extp %[Temp1], $ac1, 31 \n\t" /* even 1 */
-
- /* even 2. pixel */
- "mtlo %[vector_64], $ac3 \n\t" /* even 3 */
- "mthi $zero, $ac3 \n\t"
- "preceu.ph.qbr %[p1], %[qload3] \n\t"
- "preceu.ph.qbl %[p5], %[qload3] \n\t"
- "ulw %[qload1], 12(%[src]) \n\t"
- "dpa.w.ph $ac2, %[p2], %[filter45] \n\t" /* even 1 */
- "extp %[Temp2], $ac2, 31 \n\t" /* even 1 */
- "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 1 */
-
- /* even 3. pixel */
- "mtlo %[vector_64], $ac1 \n\t" /* even 4 */
- "mthi $zero, $ac1 \n\t"
- "preceu.ph.qbr %[p2], %[qload1] \n\t"
- "sb %[st1], 0(%[dst]) \n\t" /* even 1 */
- "dpa.w.ph $ac3, %[p3], %[filter45] \n\t" /* even 3 */
- "extp %[Temp3], $ac3, 31 \n\t" /* even 3 */
- "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 1 */
-
- /* even 4. pixel */
- "mtlo %[vector_64], $ac2 \n\t" /* even 5 */
- "mthi $zero, $ac2 \n\t"
- "preceu.ph.qbl %[p3], %[qload1] \n\t"
- "sb %[st2], 2(%[dst]) \n\t" /* even 1 */
- "dpa.w.ph $ac1, %[p4], %[filter45] \n\t" /* even 4 */
- "extp %[Temp1], $ac1, 31 \n\t" /* even 4 */
- "lbux %[st3], %[Temp3](%[cm]) \n\t" /* even 3 */
-
- /* even 5. pixel */
- "mtlo %[vector_64], $ac3 \n\t" /* even 6 */
- "mthi $zero, $ac3 \n\t"
- "sb %[st3], 4(%[dst]) \n\t" /* even 3 */
- "dpa.w.ph $ac2, %[p1], %[filter45] \n\t" /* even 5 */
- "extp %[Temp2], $ac2, 31 \n\t" /* even 5 */
- "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 4 */
-
- /* even 6. pixel */
- "mtlo %[vector_64], $ac1 \n\t" /* even 7 */
- "mthi $zero, $ac1 \n\t"
- "sb %[st1], 6(%[dst]) \n\t" /* even 4 */
- "dpa.w.ph $ac3, %[p5], %[filter45] \n\t" /* even 6 */
- "extp %[Temp3], $ac3, 31 \n\t" /* even 6 */
- "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 5 */
-
- /* even 7. pixel */
- "mtlo %[vector_64], $ac2 \n\t" /* even 8 */
- "mthi $zero, $ac2 \n\t"
- "sb %[st2], 8(%[dst]) \n\t" /* even 5 */
- "dpa.w.ph $ac1, %[p2], %[filter45] \n\t" /* even 7 */
- "extp %[Temp1], $ac1, 31 \n\t" /* even 7 */
- "lbux %[st3], %[Temp3](%[cm]) \n\t" /* even 6 */
-
- /* even 8. pixel */
- "mtlo %[vector_64], $ac3 \n\t" /* odd 1 */
- "mthi $zero, $ac3 \n\t"
- "dpa.w.ph $ac2, %[p3], %[filter45] \n\t" /* even 8 */
- "sb %[st3], 10(%[dst]) \n\t" /* even 6 */
- "extp %[Temp2], $ac2, 31 \n\t" /* even 8 */
- "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 7 */
-
- /* ODD pixels */
- "ulw %[qload1], 1(%[src]) \n\t"
- "ulw %[qload2], 5(%[src]) \n\t"
-
- /* odd 1. pixel */
- "mtlo %[vector_64], $ac1 \n\t" /* odd 2 */
- "mthi $zero, $ac1 \n\t"
- "preceu.ph.qbr %[p1], %[qload1] \n\t"
- "preceu.ph.qbl %[p2], %[qload1] \n\t"
- "preceu.ph.qbr %[p3], %[qload2] \n\t"
- "preceu.ph.qbl %[p4], %[qload2] \n\t"
- "sb %[st1], 12(%[dst]) \n\t" /* even 7 */
- "ulw %[qload3], 9(%[src]) \n\t"
- "dpa.w.ph $ac3, %[p1], %[filter45] \n\t" /* odd 1 */
- "extp %[Temp3], $ac3, 31 \n\t" /* odd 1 */
- "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 8 */
-
- /* odd 2. pixel */
- "mtlo %[vector_64], $ac2 \n\t" /* odd 3 */
- "mthi $zero, $ac2 \n\t"
- "preceu.ph.qbr %[p1], %[qload3] \n\t"
- "preceu.ph.qbl %[p5], %[qload3] \n\t"
- "sb %[st2], 14(%[dst]) \n\t" /* even 8 */
- "ulw %[qload1], 13(%[src]) \n\t"
- "dpa.w.ph $ac1, %[p2], %[filter45] \n\t" /* odd 2 */
- "extp %[Temp1], $ac1, 31 \n\t" /* odd 2 */
- "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 1 */
-
- /* odd 3. pixel */
- "mtlo %[vector_64], $ac3 \n\t" /* odd 4 */
- "mthi $zero, $ac3 \n\t"
- "preceu.ph.qbr %[p2], %[qload1] \n\t"
- "sb %[st3], 1(%[dst]) \n\t" /* odd 1 */
- "dpa.w.ph $ac2, %[p3], %[filter45] \n\t" /* odd 3 */
- "extp %[Temp2], $ac2, 31 \n\t" /* odd 3 */
- "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 2 */
-
- /* odd 4. pixel */
- "mtlo %[vector_64], $ac1 \n\t" /* odd 5 */
- "mthi $zero, $ac1 \n\t"
- "preceu.ph.qbl %[p3], %[qload1] \n\t"
- "sb %[st1], 3(%[dst]) \n\t" /* odd 2 */
- "dpa.w.ph $ac3, %[p4], %[filter45] \n\t" /* odd 4 */
- "extp %[Temp3], $ac3, 31 \n\t" /* odd 4 */
- "lbux %[st2], %[Temp2](%[cm]) \n\t" /* odd 3 */
-
- /* odd 5. pixel */
- "mtlo %[vector_64], $ac2 \n\t" /* odd 6 */
- "mthi $zero, $ac2 \n\t"
- "sb %[st2], 5(%[dst]) \n\t" /* odd 3 */
- "dpa.w.ph $ac1, %[p1], %[filter45] \n\t" /* odd 5 */
- "extp %[Temp1], $ac1, 31 \n\t" /* odd 5 */
- "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 4 */
-
- /* odd 6. pixel */
- "mtlo %[vector_64], $ac3 \n\t" /* odd 7 */
- "mthi $zero, $ac3 \n\t"
- "sb %[st3], 7(%[dst]) \n\t" /* odd 4 */
- "dpa.w.ph $ac2, %[p5], %[filter45] \n\t" /* odd 6 */
- "extp %[Temp2], $ac2, 31 \n\t" /* odd 6 */
- "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 5 */
-
- /* odd 7. pixel */
- "mtlo %[vector_64], $ac1 \n\t" /* odd 8 */
- "mthi $zero, $ac1 \n\t"
- "sb %[st1], 9(%[dst]) \n\t" /* odd 5 */
- "dpa.w.ph $ac3, %[p2], %[filter45] \n\t" /* odd 7 */
- "extp %[Temp3], $ac3, 31 \n\t" /* odd 7 */
-
- /* odd 8. pixel */
- "dpa.w.ph $ac1, %[p3], %[filter45] \n\t" /* odd 8 */
- "extp %[Temp1], $ac1, 31 \n\t" /* odd 8 */
-
- "lbux %[st2], %[Temp2](%[cm]) \n\t" /* odd 6 */
- "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 7 */
- "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 8 */
-
- "sb %[st2], 11(%[dst]) \n\t" /* odd 6 */
- "sb %[st3], 13(%[dst]) \n\t" /* odd 7 */
- "sb %[st1], 15(%[dst]) \n\t" /* odd 8 */
-
- : [qload1] "=&r"(qload1), [qload2] "=&r"(qload2),
- [qload3] "=&r"(qload3), [st1] "=&r"(st1), [st2] "=&r"(st2),
- [st3] "=&r"(st3), [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3),
- [p4] "=&r"(p4), [p5] "=&r"(p5), [Temp1] "=&r"(Temp1),
- [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3)
- : [filter45] "r"(filter45), [vector_64] "r"(vector_64), [cm] "r"(cm),
- [dst] "r"(dst), [src] "r"(src));
-
- src += 16;
- dst += 16;
- }
-
- /* Next row... */
- src_ptr += src_stride;
- dst_ptr += dst_stride;
- }
-}
-
-static void convolve_bi_horiz_64_dspr2(const uint8_t *src_ptr,
- int32_t src_stride, uint8_t *dst_ptr,
- int32_t dst_stride,
- const int16_t *filter_x0, int32_t h) {
- int32_t y, c;
- const uint8_t *src;
- uint8_t *dst;
- uint8_t *cm = aom_ff_cropTbl;
- uint32_t vector_64 = 64;
- int32_t Temp1, Temp2, Temp3;
- uint32_t qload1, qload2, qload3;
- uint32_t p1, p2, p3, p4, p5;
- uint32_t st1, st2, st3;
- const int16_t *filter = &filter_x0[3];
- uint32_t filter45;
-
- filter45 = ((const int32_t *)filter)[0];
-
- for (y = h; y--;) {
- src = src_ptr;
- dst = dst_ptr;
-
- /* prefetch data to cache memory */
- prefetch_load(src_ptr + src_stride);
- prefetch_load(src_ptr + src_stride + 32);
- prefetch_load(src_ptr + src_stride + 64);
- prefetch_store(dst_ptr + dst_stride);
- prefetch_store(dst_ptr + dst_stride + 32);
-
- for (c = 0; c < 4; c++) {
- __asm__ __volatile__(
- "ulw %[qload1], 0(%[src]) \n\t"
- "ulw %[qload2], 4(%[src]) \n\t"
-
- /* even 1. pixel */
- "mtlo %[vector_64], $ac1 \n\t" /* even 1 */
- "mthi $zero, $ac1 \n\t"
- "mtlo %[vector_64], $ac2 \n\t" /* even 2 */
- "mthi $zero, $ac2 \n\t"
- "preceu.ph.qbr %[p1], %[qload1] \n\t"
- "preceu.ph.qbl %[p2], %[qload1] \n\t"
- "preceu.ph.qbr %[p3], %[qload2] \n\t"
- "preceu.ph.qbl %[p4], %[qload2] \n\t"
- "ulw %[qload3], 8(%[src]) \n\t"
- "dpa.w.ph $ac1, %[p1], %[filter45] \n\t" /* even 1 */
- "extp %[Temp1], $ac1, 31 \n\t" /* even 1 */
-
- /* even 2. pixel */
- "mtlo %[vector_64], $ac3 \n\t" /* even 3 */
- "mthi $zero, $ac3 \n\t"
- "preceu.ph.qbr %[p1], %[qload3] \n\t"
- "preceu.ph.qbl %[p5], %[qload3] \n\t"
- "ulw %[qload1], 12(%[src]) \n\t"
- "dpa.w.ph $ac2, %[p2], %[filter45] \n\t" /* even 1 */
- "extp %[Temp2], $ac2, 31 \n\t" /* even 1 */
- "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 1 */
-
- /* even 3. pixel */
- "mtlo %[vector_64], $ac1 \n\t" /* even 4 */
- "mthi $zero, $ac1 \n\t"
- "preceu.ph.qbr %[p2], %[qload1] \n\t"
- "sb %[st1], 0(%[dst]) \n\t" /* even 1 */
- "dpa.w.ph $ac3, %[p3], %[filter45] \n\t" /* even 3 */
- "extp %[Temp3], $ac3, 31 \n\t" /* even 3 */
- "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 1 */
-
- /* even 4. pixel */
- "mtlo %[vector_64], $ac2 \n\t" /* even 5 */
- "mthi $zero, $ac2 \n\t"
- "preceu.ph.qbl %[p3], %[qload1] \n\t"
- "sb %[st2], 2(%[dst]) \n\t" /* even 1 */
- "dpa.w.ph $ac1, %[p4], %[filter45] \n\t" /* even 4 */
- "extp %[Temp1], $ac1, 31 \n\t" /* even 4 */
- "lbux %[st3], %[Temp3](%[cm]) \n\t" /* even 3 */
-
- /* even 5. pixel */
- "mtlo %[vector_64], $ac3 \n\t" /* even 6 */
- "mthi $zero, $ac3 \n\t"
- "sb %[st3], 4(%[dst]) \n\t" /* even 3 */
- "dpa.w.ph $ac2, %[p1], %[filter45] \n\t" /* even 5 */
- "extp %[Temp2], $ac2, 31 \n\t" /* even 5 */
- "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 4 */
-
- /* even 6. pixel */
- "mtlo %[vector_64], $ac1 \n\t" /* even 7 */
- "mthi $zero, $ac1 \n\t"
- "sb %[st1], 6(%[dst]) \n\t" /* even 4 */
- "dpa.w.ph $ac3, %[p5], %[filter45] \n\t" /* even 6 */
- "extp %[Temp3], $ac3, 31 \n\t" /* even 6 */
- "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 5 */
-
- /* even 7. pixel */
- "mtlo %[vector_64], $ac2 \n\t" /* even 8 */
- "mthi $zero, $ac2 \n\t"
- "sb %[st2], 8(%[dst]) \n\t" /* even 5 */
- "dpa.w.ph $ac1, %[p2], %[filter45] \n\t" /* even 7 */
- "extp %[Temp1], $ac1, 31 \n\t" /* even 7 */
- "lbux %[st3], %[Temp3](%[cm]) \n\t" /* even 6 */
-
- /* even 8. pixel */
- "mtlo %[vector_64], $ac3 \n\t" /* odd 1 */
- "mthi $zero, $ac3 \n\t"
- "dpa.w.ph $ac2, %[p3], %[filter45] \n\t" /* even 8 */
- "sb %[st3], 10(%[dst]) \n\t" /* even 6 */
- "extp %[Temp2], $ac2, 31 \n\t" /* even 8 */
- "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 7 */
-
- /* ODD pixels */
- "ulw %[qload1], 1(%[src]) \n\t"
- "ulw %[qload2], 5(%[src]) \n\t"
-
- /* odd 1. pixel */
- "mtlo %[vector_64], $ac1 \n\t" /* odd 2 */
- "mthi $zero, $ac1 \n\t"
- "preceu.ph.qbr %[p1], %[qload1] \n\t"
- "preceu.ph.qbl %[p2], %[qload1] \n\t"
- "preceu.ph.qbr %[p3], %[qload2] \n\t"
- "preceu.ph.qbl %[p4], %[qload2] \n\t"
- "sb %[st1], 12(%[dst]) \n\t" /* even 7 */
- "ulw %[qload3], 9(%[src]) \n\t"
- "dpa.w.ph $ac3, %[p1], %[filter45] \n\t" /* odd 1 */
- "extp %[Temp3], $ac3, 31 \n\t" /* odd 1 */
- "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 8 */
-
- /* odd 2. pixel */
- "mtlo %[vector_64], $ac2 \n\t" /* odd 3 */
- "mthi $zero, $ac2 \n\t"
- "preceu.ph.qbr %[p1], %[qload3] \n\t"
- "preceu.ph.qbl %[p5], %[qload3] \n\t"
- "sb %[st2], 14(%[dst]) \n\t" /* even 8 */
- "ulw %[qload1], 13(%[src]) \n\t"
- "dpa.w.ph $ac1, %[p2], %[filter45] \n\t" /* odd 2 */
- "extp %[Temp1], $ac1, 31 \n\t" /* odd 2 */
- "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 1 */
-
- /* odd 3. pixel */
- "mtlo %[vector_64], $ac3 \n\t" /* odd 4 */
- "mthi $zero, $ac3 \n\t"
- "preceu.ph.qbr %[p2], %[qload1] \n\t"
- "sb %[st3], 1(%[dst]) \n\t" /* odd 1 */
- "dpa.w.ph $ac2, %[p3], %[filter45] \n\t" /* odd 3 */
- "extp %[Temp2], $ac2, 31 \n\t" /* odd 3 */
- "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 2 */
-
- /* odd 4. pixel */
- "mtlo %[vector_64], $ac1 \n\t" /* odd 5 */
- "mthi $zero, $ac1 \n\t"
- "preceu.ph.qbl %[p3], %[qload1] \n\t"
- "sb %[st1], 3(%[dst]) \n\t" /* odd 2 */
- "dpa.w.ph $ac3, %[p4], %[filter45] \n\t" /* odd 4 */
- "extp %[Temp3], $ac3, 31 \n\t" /* odd 4 */
- "lbux %[st2], %[Temp2](%[cm]) \n\t" /* odd 3 */
-
- /* odd 5. pixel */
- "mtlo %[vector_64], $ac2 \n\t" /* odd 6 */
- "mthi $zero, $ac2 \n\t"
- "sb %[st2], 5(%[dst]) \n\t" /* odd 3 */
- "dpa.w.ph $ac1, %[p1], %[filter45] \n\t" /* odd 5 */
- "extp %[Temp1], $ac1, 31 \n\t" /* odd 5 */
- "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 4 */
-
- /* odd 6. pixel */
- "mtlo %[vector_64], $ac3 \n\t" /* odd 7 */
- "mthi $zero, $ac3 \n\t"
- "sb %[st3], 7(%[dst]) \n\t" /* odd 4 */
- "dpa.w.ph $ac2, %[p5], %[filter45] \n\t" /* odd 6 */
- "extp %[Temp2], $ac2, 31 \n\t" /* odd 6 */
- "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 5 */
-
- /* odd 7. pixel */
- "mtlo %[vector_64], $ac1 \n\t" /* odd 8 */
- "mthi $zero, $ac1 \n\t"
- "sb %[st1], 9(%[dst]) \n\t" /* odd 5 */
- "dpa.w.ph $ac3, %[p2], %[filter45] \n\t" /* odd 7 */
- "extp %[Temp3], $ac3, 31 \n\t" /* odd 7 */
-
- /* odd 8. pixel */
- "dpa.w.ph $ac1, %[p3], %[filter45] \n\t" /* odd 8 */
- "extp %[Temp1], $ac1, 31 \n\t" /* odd 8 */
-
- "lbux %[st2], %[Temp2](%[cm]) \n\t" /* odd 6 */
- "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 7 */
- "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 8 */
-
- "sb %[st2], 11(%[dst]) \n\t" /* odd 6 */
- "sb %[st3], 13(%[dst]) \n\t" /* odd 7 */
- "sb %[st1], 15(%[dst]) \n\t" /* odd 8 */
-
- : [qload1] "=&r"(qload1), [qload2] "=&r"(qload2),
- [qload3] "=&r"(qload3), [st1] "=&r"(st1), [st2] "=&r"(st2),
- [st3] "=&r"(st3), [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3),
- [p4] "=&r"(p4), [p5] "=&r"(p5), [Temp1] "=&r"(Temp1),
- [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3)
- : [filter45] "r"(filter45), [vector_64] "r"(vector_64), [cm] "r"(cm),
- [dst] "r"(dst), [src] "r"(src));
-
- src += 16;
- dst += 16;
- }
-
- /* Next row... */
- src_ptr += src_stride;
- dst_ptr += dst_stride;
- }
-}
-
-void aom_convolve2_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4, int w,
- int h) {
- uint32_t pos = 38;
-
- assert(x_step_q4 == 16);
-
- prefetch_load((const uint8_t *)filter_x);
-
- /* bit positon for extract from acc */
- __asm__ __volatile__("wrdsp %[pos], 1 \n\t"
- :
- : [pos] "r"(pos));
-
- /* prefetch data to cache memory */
- prefetch_load(src);
- prefetch_load(src + 32);
- prefetch_store(dst);
-
- switch (w) {
- case 4:
- convolve_bi_horiz_4_dspr2(src, (int32_t)src_stride, dst,
- (int32_t)dst_stride, filter_x, (int32_t)h);
- break;
- case 8:
- convolve_bi_horiz_8_dspr2(src, (int32_t)src_stride, dst,
- (int32_t)dst_stride, filter_x, (int32_t)h);
- break;
- case 16:
- convolve_bi_horiz_16_dspr2(src, (int32_t)src_stride, dst,
- (int32_t)dst_stride, filter_x, (int32_t)h, 1);
- break;
- case 32:
- convolve_bi_horiz_16_dspr2(src, (int32_t)src_stride, dst,
- (int32_t)dst_stride, filter_x, (int32_t)h, 2);
- break;
- case 64:
- prefetch_load(src + 64);
- prefetch_store(dst + 32);
-
- convolve_bi_horiz_64_dspr2(src, (int32_t)src_stride, dst,
- (int32_t)dst_stride, filter_x, (int32_t)h);
- break;
- default:
- aom_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x,
- x_step_q4, filter_y, y_step_q4, w, h);
- break;
- }
-}
-#endif
diff --git a/third_party/aom/aom_dsp/mips/convolve2_vert_dspr2.c b/third_party/aom/aom_dsp/mips/convolve2_vert_dspr2.c
deleted file mode 100644
index 40abfd89e..000000000
--- a/third_party/aom/aom_dsp/mips/convolve2_vert_dspr2.c
+++ /dev/null
@@ -1,237 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <stdio.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/mips/convolve_common_dspr2.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_ports/mem.h"
-
-#if HAVE_DSPR2
-static void convolve_bi_vert_4_dspr2(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride,
- const int16_t *filter_y, int32_t w,
- int32_t h) {
- int32_t x, y;
- const uint8_t *src_ptr;
- uint8_t *dst_ptr;
- uint8_t *cm = aom_ff_cropTbl;
- uint32_t vector4a = 64;
- uint32_t load1, load2;
- uint32_t p1, p2;
- uint32_t scratch1;
- uint32_t store1, store2;
- int32_t Temp1, Temp2;
- const int16_t *filter = &filter_y[3];
- uint32_t filter45;
-
- filter45 = ((const int32_t *)filter)[0];
-
- for (y = h; y--;) {
- /* prefetch data to cache memory */
- prefetch_store(dst + dst_stride);
-
- for (x = 0; x < w; x += 4) {
- src_ptr = src + x;
- dst_ptr = dst + x;
-
- __asm__ __volatile__(
- "ulw %[load1], 0(%[src_ptr]) \n\t"
- "add %[src_ptr], %[src_ptr], %[src_stride] \n\t"
- "ulw %[load2], 0(%[src_ptr]) \n\t"
-
- "mtlo %[vector4a], $ac0 \n\t"
- "mtlo %[vector4a], $ac1 \n\t"
- "mtlo %[vector4a], $ac2 \n\t"
- "mtlo %[vector4a], $ac3 \n\t"
- "mthi $zero, $ac0 \n\t"
- "mthi $zero, $ac1 \n\t"
- "mthi $zero, $ac2 \n\t"
- "mthi $zero, $ac3 \n\t"
-
- "preceu.ph.qbr %[scratch1], %[load1] \n\t"
- "preceu.ph.qbr %[p1], %[load2] \n\t"
-
- "precrq.ph.w %[p2], %[p1], %[scratch1] \n\t" /* pixel 2 */
- "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */
-
- "dpa.w.ph $ac0, %[p1], %[filter45] \n\t"
- "dpa.w.ph $ac1, %[p2], %[filter45] \n\t"
-
- "preceu.ph.qbl %[scratch1], %[load1] \n\t"
- "preceu.ph.qbl %[p1], %[load2] \n\t"
-
- "precrq.ph.w %[p2], %[p1], %[scratch1] \n\t" /* pixel 2 */
- "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */
-
- "dpa.w.ph $ac2, %[p1], %[filter45] \n\t"
- "dpa.w.ph $ac3, %[p2], %[filter45] \n\t"
-
- "extp %[Temp1], $ac0, 31 \n\t"
- "extp %[Temp2], $ac1, 31 \n\t"
-
- "lbux %[store1], %[Temp1](%[cm]) \n\t"
- "extp %[Temp1], $ac2, 31 \n\t"
-
- "lbux %[store2], %[Temp2](%[cm]) \n\t"
- "extp %[Temp2], $ac3, 31 \n\t"
-
- "sb %[store1], 0(%[dst_ptr]) \n\t"
- "sb %[store2], 1(%[dst_ptr]) \n\t"
-
- "lbux %[store1], %[Temp1](%[cm]) \n\t"
- "lbux %[store2], %[Temp2](%[cm]) \n\t"
-
- "sb %[store1], 2(%[dst_ptr]) \n\t"
- "sb %[store2], 3(%[dst_ptr]) \n\t"
-
- : [load1] "=&r"(load1), [load2] "=&r"(load2), [p1] "=&r"(p1),
- [p2] "=&r"(p2), [scratch1] "=&r"(scratch1), [Temp1] "=&r"(Temp1),
- [Temp2] "=&r"(Temp2), [store1] "=&r"(store1),
- [store2] "=&r"(store2), [src_ptr] "+r"(src_ptr)
- : [filter45] "r"(filter45), [vector4a] "r"(vector4a),
- [src_stride] "r"(src_stride), [cm] "r"(cm), [dst_ptr] "r"(dst_ptr));
- }
-
- /* Next row... */
- src += src_stride;
- dst += dst_stride;
- }
-}
-
-static void convolve_bi_vert_64_dspr2(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride,
- const int16_t *filter_y, int32_t h) {
- int32_t x, y;
- const uint8_t *src_ptr;
- uint8_t *dst_ptr;
- uint8_t *cm = aom_ff_cropTbl;
- uint32_t vector4a = 64;
- uint32_t load1, load2;
- uint32_t p1, p2;
- uint32_t scratch1;
- uint32_t store1, store2;
- int32_t Temp1, Temp2;
- const int16_t *filter = &filter_y[3];
- uint32_t filter45;
-
- filter45 = ((const int32_t *)filter)[0];
-
- for (y = h; y--;) {
- /* prefetch data to cache memory */
- prefetch_store(dst + dst_stride);
-
- for (x = 0; x < 64; x += 4) {
- src_ptr = src + x;
- dst_ptr = dst + x;
-
- __asm__ __volatile__(
- "ulw %[load1], 0(%[src_ptr]) \n\t"
- "add %[src_ptr], %[src_ptr], %[src_stride] \n\t"
- "ulw %[load2], 0(%[src_ptr]) \n\t"
-
- "mtlo %[vector4a], $ac0 \n\t"
- "mtlo %[vector4a], $ac1 \n\t"
- "mtlo %[vector4a], $ac2 \n\t"
- "mtlo %[vector4a], $ac3 \n\t"
- "mthi $zero, $ac0 \n\t"
- "mthi $zero, $ac1 \n\t"
- "mthi $zero, $ac2 \n\t"
- "mthi $zero, $ac3 \n\t"
-
- "preceu.ph.qbr %[scratch1], %[load1] \n\t"
- "preceu.ph.qbr %[p1], %[load2] \n\t"
-
- "precrq.ph.w %[p2], %[p1], %[scratch1] \n\t" /* pixel 2 */
- "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */
-
- "dpa.w.ph $ac0, %[p1], %[filter45] \n\t"
- "dpa.w.ph $ac1, %[p2], %[filter45] \n\t"
-
- "preceu.ph.qbl %[scratch1], %[load1] \n\t"
- "preceu.ph.qbl %[p1], %[load2] \n\t"
-
- "precrq.ph.w %[p2], %[p1], %[scratch1] \n\t" /* pixel 2 */
- "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */
-
- "dpa.w.ph $ac2, %[p1], %[filter45] \n\t"
- "dpa.w.ph $ac3, %[p2], %[filter45] \n\t"
-
- "extp %[Temp1], $ac0, 31 \n\t"
- "extp %[Temp2], $ac1, 31 \n\t"
-
- "lbux %[store1], %[Temp1](%[cm]) \n\t"
- "extp %[Temp1], $ac2, 31 \n\t"
-
- "lbux %[store2], %[Temp2](%[cm]) \n\t"
- "extp %[Temp2], $ac3, 31 \n\t"
-
- "sb %[store1], 0(%[dst_ptr]) \n\t"
- "sb %[store2], 1(%[dst_ptr]) \n\t"
-
- "lbux %[store1], %[Temp1](%[cm]) \n\t"
- "lbux %[store2], %[Temp2](%[cm]) \n\t"
-
- "sb %[store1], 2(%[dst_ptr]) \n\t"
- "sb %[store2], 3(%[dst_ptr]) \n\t"
-
- : [load1] "=&r"(load1), [load2] "=&r"(load2), [p1] "=&r"(p1),
- [p2] "=&r"(p2), [scratch1] "=&r"(scratch1), [Temp1] "=&r"(Temp1),
- [Temp2] "=&r"(Temp2), [store1] "=&r"(store1),
- [store2] "=&r"(store2), [src_ptr] "+r"(src_ptr)
- : [filter45] "r"(filter45), [vector4a] "r"(vector4a),
- [src_stride] "r"(src_stride), [cm] "r"(cm), [dst_ptr] "r"(dst_ptr));
- }
-
- /* Next row... */
- src += src_stride;
- dst += dst_stride;
- }
-}
-
-void aom_convolve2_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4, int w,
- int h) {
- uint32_t pos = 38;
-
- assert(y_step_q4 == 16);
-
- /* bit positon for extract from acc */
- __asm__ __volatile__("wrdsp %[pos], 1 \n\t"
- :
- : [pos] "r"(pos));
-
- prefetch_store(dst);
-
- switch (w) {
- case 4:
- case 8:
- case 16:
- case 32:
- convolve_bi_vert_4_dspr2(src, src_stride, dst, dst_stride, filter_y, w,
- h);
- break;
- case 64:
- prefetch_store(dst + 32);
- convolve_bi_vert_64_dspr2(src, src_stride, dst, dst_stride, filter_y, h);
- break;
- default:
- aom_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x,
- x_step_q4, filter_y, y_step_q4, w, h);
- break;
- }
-}
-#endif
diff --git a/third_party/aom/aom_dsp/mips/convolve8_dspr2.c b/third_party/aom/aom_dsp/mips/convolve8_dspr2.c
deleted file mode 100644
index af54b4264..000000000
--- a/third_party/aom/aom_dsp/mips/convolve8_dspr2.c
+++ /dev/null
@@ -1,222 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <stdio.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/mips/convolve_common_dspr2.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/aom_filter.h"
-#include "aom_ports/mem.h"
-
-#if HAVE_DSPR2
-void aom_convolve_copy_dspr2(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int filter_x_stride,
- const int16_t *filter_y, int filter_y_stride,
- int w, int h) {
- int x, y;
-
- (void)filter_x;
- (void)filter_x_stride;
- (void)filter_y;
- (void)filter_y_stride;
-
- /* prefetch data to cache memory */
- prefetch_load(src);
- prefetch_load(src + 32);
- prefetch_store(dst);
-
- switch (w) {
- case 4: {
- uint32_t tp1;
-
- /* 1 word storage */
- for (y = h; y--;) {
- prefetch_load(src + src_stride);
- prefetch_load(src + src_stride + 32);
- prefetch_store(dst + dst_stride);
-
- __asm__ __volatile__(
- "ulw %[tp1], (%[src]) \n\t"
- "sw %[tp1], (%[dst]) \n\t" /* store */
-
- : [tp1] "=&r"(tp1)
- : [src] "r"(src), [dst] "r"(dst));
-
- src += src_stride;
- dst += dst_stride;
- }
- } break;
- case 8: {
- uint32_t tp1, tp2;
-
- /* 2 word storage */
- for (y = h; y--;) {
- prefetch_load(src + src_stride);
- prefetch_load(src + src_stride + 32);
- prefetch_store(dst + dst_stride);
-
- __asm__ __volatile__(
- "ulw %[tp1], 0(%[src]) \n\t"
- "ulw %[tp2], 4(%[src]) \n\t"
- "sw %[tp1], 0(%[dst]) \n\t" /* store */
- "sw %[tp2], 4(%[dst]) \n\t" /* store */
-
- : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2)
- : [src] "r"(src), [dst] "r"(dst));
-
- src += src_stride;
- dst += dst_stride;
- }
- } break;
- case 16: {
- uint32_t tp1, tp2, tp3, tp4;
-
- /* 4 word storage */
- for (y = h; y--;) {
- prefetch_load(src + src_stride);
- prefetch_load(src + src_stride + 32);
- prefetch_store(dst + dst_stride);
-
- __asm__ __volatile__(
- "ulw %[tp1], 0(%[src]) \n\t"
- "ulw %[tp2], 4(%[src]) \n\t"
- "ulw %[tp3], 8(%[src]) \n\t"
- "ulw %[tp4], 12(%[src]) \n\t"
-
- "sw %[tp1], 0(%[dst]) \n\t" /* store */
- "sw %[tp2], 4(%[dst]) \n\t" /* store */
- "sw %[tp3], 8(%[dst]) \n\t" /* store */
- "sw %[tp4], 12(%[dst]) \n\t" /* store */
-
- : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tp3] "=&r"(tp3),
- [tp4] "=&r"(tp4)
- : [src] "r"(src), [dst] "r"(dst));
-
- src += src_stride;
- dst += dst_stride;
- }
- } break;
- case 32: {
- uint32_t tp1, tp2, tp3, tp4;
- uint32_t tp5, tp6, tp7, tp8;
-
- /* 8 word storage */
- for (y = h; y--;) {
- prefetch_load(src + src_stride);
- prefetch_load(src + src_stride + 32);
- prefetch_store(dst + dst_stride);
-
- __asm__ __volatile__(
- "ulw %[tp1], 0(%[src]) \n\t"
- "ulw %[tp2], 4(%[src]) \n\t"
- "ulw %[tp3], 8(%[src]) \n\t"
- "ulw %[tp4], 12(%[src]) \n\t"
- "ulw %[tp5], 16(%[src]) \n\t"
- "ulw %[tp6], 20(%[src]) \n\t"
- "ulw %[tp7], 24(%[src]) \n\t"
- "ulw %[tp8], 28(%[src]) \n\t"
-
- "sw %[tp1], 0(%[dst]) \n\t" /* store */
- "sw %[tp2], 4(%[dst]) \n\t" /* store */
- "sw %[tp3], 8(%[dst]) \n\t" /* store */
- "sw %[tp4], 12(%[dst]) \n\t" /* store */
- "sw %[tp5], 16(%[dst]) \n\t" /* store */
- "sw %[tp6], 20(%[dst]) \n\t" /* store */
- "sw %[tp7], 24(%[dst]) \n\t" /* store */
- "sw %[tp8], 28(%[dst]) \n\t" /* store */
-
- : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tp3] "=&r"(tp3),
- [tp4] "=&r"(tp4), [tp5] "=&r"(tp5), [tp6] "=&r"(tp6),
- [tp7] "=&r"(tp7), [tp8] "=&r"(tp8)
- : [src] "r"(src), [dst] "r"(dst));
-
- src += src_stride;
- dst += dst_stride;
- }
- } break;
- case 64: {
- uint32_t tp1, tp2, tp3, tp4;
- uint32_t tp5, tp6, tp7, tp8;
-
- prefetch_load(src + 64);
- prefetch_store(dst + 32);
-
- /* 16 word storage */
- for (y = h; y--;) {
- prefetch_load(src + src_stride);
- prefetch_load(src + src_stride + 32);
- prefetch_load(src + src_stride + 64);
- prefetch_store(dst + dst_stride);
- prefetch_store(dst + dst_stride + 32);
-
- __asm__ __volatile__(
- "ulw %[tp1], 0(%[src]) \n\t"
- "ulw %[tp2], 4(%[src]) \n\t"
- "ulw %[tp3], 8(%[src]) \n\t"
- "ulw %[tp4], 12(%[src]) \n\t"
- "ulw %[tp5], 16(%[src]) \n\t"
- "ulw %[tp6], 20(%[src]) \n\t"
- "ulw %[tp7], 24(%[src]) \n\t"
- "ulw %[tp8], 28(%[src]) \n\t"
-
- "sw %[tp1], 0(%[dst]) \n\t" /* store */
- "sw %[tp2], 4(%[dst]) \n\t" /* store */
- "sw %[tp3], 8(%[dst]) \n\t" /* store */
- "sw %[tp4], 12(%[dst]) \n\t" /* store */
- "sw %[tp5], 16(%[dst]) \n\t" /* store */
- "sw %[tp6], 20(%[dst]) \n\t" /* store */
- "sw %[tp7], 24(%[dst]) \n\t" /* store */
- "sw %[tp8], 28(%[dst]) \n\t" /* store */
-
- "ulw %[tp1], 32(%[src]) \n\t"
- "ulw %[tp2], 36(%[src]) \n\t"
- "ulw %[tp3], 40(%[src]) \n\t"
- "ulw %[tp4], 44(%[src]) \n\t"
- "ulw %[tp5], 48(%[src]) \n\t"
- "ulw %[tp6], 52(%[src]) \n\t"
- "ulw %[tp7], 56(%[src]) \n\t"
- "ulw %[tp8], 60(%[src]) \n\t"
-
- "sw %[tp1], 32(%[dst]) \n\t" /* store */
- "sw %[tp2], 36(%[dst]) \n\t" /* store */
- "sw %[tp3], 40(%[dst]) \n\t" /* store */
- "sw %[tp4], 44(%[dst]) \n\t" /* store */
- "sw %[tp5], 48(%[dst]) \n\t" /* store */
- "sw %[tp6], 52(%[dst]) \n\t" /* store */
- "sw %[tp7], 56(%[dst]) \n\t" /* store */
- "sw %[tp8], 60(%[dst]) \n\t" /* store */
-
- : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tp3] "=&r"(tp3),
- [tp4] "=&r"(tp4), [tp5] "=&r"(tp5), [tp6] "=&r"(tp6),
- [tp7] "=&r"(tp7), [tp8] "=&r"(tp8)
- : [src] "r"(src), [dst] "r"(dst));
-
- src += src_stride;
- dst += dst_stride;
- }
- } break;
- default:
- for (y = h; y--;) {
- for (x = 0; x < w; ++x) {
- dst[x] = src[x];
- }
-
- src += src_stride;
- dst += dst_stride;
- }
- break;
- }
-}
-#endif
diff --git a/third_party/aom/aom_dsp/mips/convolve8_horiz_dspr2.c b/third_party/aom/aom_dsp/mips/convolve8_horiz_dspr2.c
deleted file mode 100644
index f9c6879ab..000000000
--- a/third_party/aom/aom_dsp/mips/convolve8_horiz_dspr2.c
+++ /dev/null
@@ -1,879 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <stdio.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/mips/convolve_common_dspr2.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/aom_filter.h"
-#include "aom_ports/mem.h"
-
-#if HAVE_DSPR2
-static void convolve_horiz_4_dspr2(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride,
- const int16_t *filter_x0, int32_t h) {
- int32_t y;
- uint8_t *cm = aom_ff_cropTbl;
- int32_t vector1b, vector2b, vector3b, vector4b;
- int32_t Temp1, Temp2, Temp3, Temp4;
- uint32_t vector4a = 64;
- uint32_t tp1, tp2;
- uint32_t p1, p2, p3, p4;
- uint32_t n1, n2, n3, n4;
- uint32_t tn1, tn2;
-
- vector1b = ((const int32_t *)filter_x0)[0];
- vector2b = ((const int32_t *)filter_x0)[1];
- vector3b = ((const int32_t *)filter_x0)[2];
- vector4b = ((const int32_t *)filter_x0)[3];
-
- for (y = h; y--;) {
- /* prefetch data to cache memory */
- prefetch_load(src + src_stride);
- prefetch_load(src + src_stride + 32);
- prefetch_store(dst + dst_stride);
-
- __asm__ __volatile__(
- "ulw %[tp1], 0(%[src]) \n\t"
- "ulw %[tp2], 4(%[src]) \n\t"
-
- /* even 1. pixel */
- "mtlo %[vector4a], $ac3 \n\t"
- "mthi $zero, $ac3 \n\t"
- "preceu.ph.qbr %[p1], %[tp1] \n\t"
- "preceu.ph.qbl %[p2], %[tp1] \n\t"
- "preceu.ph.qbr %[p3], %[tp2] \n\t"
- "preceu.ph.qbl %[p4], %[tp2] \n\t"
- "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t"
- "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t"
- "dpa.w.ph $ac3, %[p3], %[vector3b] \n\t"
- "ulw %[tn2], 8(%[src]) \n\t"
- "dpa.w.ph $ac3, %[p4], %[vector4b] \n\t"
- "extp %[Temp1], $ac3, 31 \n\t"
-
- /* even 2. pixel */
- "mtlo %[vector4a], $ac2 \n\t"
- "mthi $zero, $ac2 \n\t"
- "preceu.ph.qbr %[p1], %[tn2] \n\t"
- "balign %[tn1], %[tn2], 3 \n\t"
- "balign %[tn2], %[tp2], 3 \n\t"
- "balign %[tp2], %[tp1], 3 \n\t"
- "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t"
- "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t"
- "dpa.w.ph $ac2, %[p4], %[vector3b] \n\t"
- "dpa.w.ph $ac2, %[p1], %[vector4b] \n\t"
- "extp %[Temp3], $ac2, 31 \n\t"
-
- /* odd 1. pixel */
- "lbux %[tp1], %[Temp1](%[cm]) \n\t"
- "mtlo %[vector4a], $ac3 \n\t"
- "mthi $zero, $ac3 \n\t"
- "preceu.ph.qbr %[n1], %[tp2] \n\t"
- "preceu.ph.qbl %[n2], %[tp2] \n\t"
- "preceu.ph.qbr %[n3], %[tn2] \n\t"
- "preceu.ph.qbl %[n4], %[tn2] \n\t"
- "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t"
- "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t"
- "dpa.w.ph $ac3, %[n3], %[vector3b] \n\t"
- "dpa.w.ph $ac3, %[n4], %[vector4b] \n\t"
- "extp %[Temp2], $ac3, 31 \n\t"
-
- /* odd 2. pixel */
- "lbux %[tp2], %[Temp3](%[cm]) \n\t"
- "mtlo %[vector4a], $ac2 \n\t"
- "mthi $zero, $ac2 \n\t"
- "preceu.ph.qbr %[n1], %[tn1] \n\t"
- "dpa.w.ph $ac2, %[n2], %[vector1b] \n\t"
- "dpa.w.ph $ac2, %[n3], %[vector2b] \n\t"
- "dpa.w.ph $ac2, %[n4], %[vector3b] \n\t"
- "dpa.w.ph $ac2, %[n1], %[vector4b] \n\t"
- "extp %[Temp4], $ac2, 31 \n\t"
-
- /* clamp */
- "lbux %[tn1], %[Temp2](%[cm]) \n\t"
- "lbux %[n2], %[Temp4](%[cm]) \n\t"
-
- /* store bytes */
- "sb %[tp1], 0(%[dst]) \n\t"
- "sb %[tn1], 1(%[dst]) \n\t"
- "sb %[tp2], 2(%[dst]) \n\t"
- "sb %[n2], 3(%[dst]) \n\t"
-
- : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tn1] "=&r"(tn1),
- [tn2] "=&r"(tn2), [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3),
- [p4] "=&r"(p4), [n1] "=&r"(n1), [n2] "=&r"(n2), [n3] "=&r"(n3),
- [n4] "=&r"(n4), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2),
- [Temp3] "=&r"(Temp3), [Temp4] "=&r"(Temp4)
- : [vector1b] "r"(vector1b), [vector2b] "r"(vector2b),
- [vector3b] "r"(vector3b), [vector4b] "r"(vector4b),
- [vector4a] "r"(vector4a), [cm] "r"(cm), [dst] "r"(dst),
- [src] "r"(src));
-
- /* Next row... */
- src += src_stride;
- dst += dst_stride;
- }
-}
-
-static void convolve_horiz_8_dspr2(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride,
- const int16_t *filter_x0, int32_t h) {
- int32_t y;
- uint8_t *cm = aom_ff_cropTbl;
- uint32_t vector4a = 64;
- int32_t vector1b, vector2b, vector3b, vector4b;
- int32_t Temp1, Temp2, Temp3;
- uint32_t tp1, tp2;
- uint32_t p1, p2, p3, p4, n1;
- uint32_t tn1, tn2, tn3;
- uint32_t st0, st1;
-
- vector1b = ((const int32_t *)filter_x0)[0];
- vector2b = ((const int32_t *)filter_x0)[1];
- vector3b = ((const int32_t *)filter_x0)[2];
- vector4b = ((const int32_t *)filter_x0)[3];
-
- for (y = h; y--;) {
- /* prefetch data to cache memory */
- prefetch_load(src + src_stride);
- prefetch_load(src + src_stride + 32);
- prefetch_store(dst + dst_stride);
-
- __asm__ __volatile__(
- "ulw %[tp1], 0(%[src]) \n\t"
- "ulw %[tp2], 4(%[src]) \n\t"
-
- /* even 1. pixel */
- "mtlo %[vector4a], $ac3 \n\t"
- "mthi $zero, $ac3 \n\t"
- "mtlo %[vector4a], $ac2 \n\t"
- "mthi $zero, $ac2 \n\t"
- "preceu.ph.qbr %[p1], %[tp1] \n\t"
- "preceu.ph.qbl %[p2], %[tp1] \n\t"
- "preceu.ph.qbr %[p3], %[tp2] \n\t"
- "preceu.ph.qbl %[p4], %[tp2] \n\t"
- "ulw %[tn2], 8(%[src]) \n\t"
- "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t"
- "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t"
- "dpa.w.ph $ac3, %[p3], %[vector3b] \n\t"
- "dpa.w.ph $ac3, %[p4], %[vector4b] \n\t"
- "extp %[Temp1], $ac3, 31 \n\t"
-
- /* even 2. pixel */
- "preceu.ph.qbr %[p1], %[tn2] \n\t"
- "preceu.ph.qbl %[n1], %[tn2] \n\t"
- "ulw %[tn1], 12(%[src]) \n\t"
- "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t"
- "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t"
- "dpa.w.ph $ac2, %[p4], %[vector3b] \n\t"
- "dpa.w.ph $ac2, %[p1], %[vector4b] \n\t"
- "extp %[Temp3], $ac2, 31 \n\t"
-
- /* even 3. pixel */
- "lbux %[st0], %[Temp1](%[cm]) \n\t"
- "mtlo %[vector4a], $ac1 \n\t"
- "mthi $zero, $ac1 \n\t"
- "preceu.ph.qbr %[p2], %[tn1] \n\t"
- "dpa.w.ph $ac1, %[p3], %[vector1b] \n\t"
- "dpa.w.ph $ac1, %[p4], %[vector2b] \n\t"
- "dpa.w.ph $ac1, %[p1], %[vector3b] \n\t"
- "dpa.w.ph $ac1, %[n1], %[vector4b] \n\t"
- "extp %[Temp1], $ac1, 31 \n\t"
-
- /* even 4. pixel */
- "mtlo %[vector4a], $ac2 \n\t"
- "mthi $zero, $ac2 \n\t"
- "mtlo %[vector4a], $ac3 \n\t"
- "mthi $zero, $ac3 \n\t"
- "sb %[st0], 0(%[dst]) \n\t"
- "lbux %[st1], %[Temp3](%[cm]) \n\t"
-
- "balign %[tn3], %[tn1], 3 \n\t"
- "balign %[tn1], %[tn2], 3 \n\t"
- "balign %[tn2], %[tp2], 3 \n\t"
- "balign %[tp2], %[tp1], 3 \n\t"
-
- "dpa.w.ph $ac2, %[p4], %[vector1b] \n\t"
- "dpa.w.ph $ac2, %[p1], %[vector2b] \n\t"
- "dpa.w.ph $ac2, %[n1], %[vector3b] \n\t"
- "dpa.w.ph $ac2, %[p2], %[vector4b] \n\t"
- "extp %[Temp3], $ac2, 31 \n\t"
-
- "lbux %[st0], %[Temp1](%[cm]) \n\t"
-
- /* odd 1. pixel */
- "mtlo %[vector4a], $ac1 \n\t"
- "mthi $zero, $ac1 \n\t"
- "sb %[st1], 2(%[dst]) \n\t"
- "preceu.ph.qbr %[p1], %[tp2] \n\t"
- "preceu.ph.qbl %[p2], %[tp2] \n\t"
- "preceu.ph.qbr %[p3], %[tn2] \n\t"
- "preceu.ph.qbl %[p4], %[tn2] \n\t"
- "sb %[st0], 4(%[dst]) \n\t"
- "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t"
- "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t"
- "dpa.w.ph $ac3, %[p3], %[vector3b] \n\t"
- "dpa.w.ph $ac3, %[p4], %[vector4b] \n\t"
- "extp %[Temp2], $ac3, 31 \n\t"
-
- /* odd 2. pixel */
- "mtlo %[vector4a], $ac3 \n\t"
- "mthi $zero, $ac3 \n\t"
- "mtlo %[vector4a], $ac2 \n\t"
- "mthi $zero, $ac2 \n\t"
- "preceu.ph.qbr %[p1], %[tn1] \n\t"
- "preceu.ph.qbl %[n1], %[tn1] \n\t"
- "lbux %[st0], %[Temp3](%[cm]) \n\t"
- "dpa.w.ph $ac1, %[p2], %[vector1b] \n\t"
- "dpa.w.ph $ac1, %[p3], %[vector2b] \n\t"
- "dpa.w.ph $ac1, %[p4], %[vector3b] \n\t"
- "dpa.w.ph $ac1, %[p1], %[vector4b] \n\t"
- "extp %[Temp3], $ac1, 31 \n\t"
-
- /* odd 3. pixel */
- "lbux %[st1], %[Temp2](%[cm]) \n\t"
- "preceu.ph.qbr %[p2], %[tn3] \n\t"
- "dpa.w.ph $ac3, %[p3], %[vector1b] \n\t"
- "dpa.w.ph $ac3, %[p4], %[vector2b] \n\t"
- "dpa.w.ph $ac3, %[p1], %[vector3b] \n\t"
- "dpa.w.ph $ac3, %[n1], %[vector4b] \n\t"
- "extp %[Temp2], $ac3, 31 \n\t"
-
- /* odd 4. pixel */
- "sb %[st1], 1(%[dst]) \n\t"
- "sb %[st0], 6(%[dst]) \n\t"
- "dpa.w.ph $ac2, %[p4], %[vector1b] \n\t"
- "dpa.w.ph $ac2, %[p1], %[vector2b] \n\t"
- "dpa.w.ph $ac2, %[n1], %[vector3b] \n\t"
- "dpa.w.ph $ac2, %[p2], %[vector4b] \n\t"
- "extp %[Temp1], $ac2, 31 \n\t"
-
- /* clamp */
- "lbux %[p4], %[Temp3](%[cm]) \n\t"
- "lbux %[p2], %[Temp2](%[cm]) \n\t"
- "lbux %[n1], %[Temp1](%[cm]) \n\t"
-
- /* store bytes */
- "sb %[p4], 3(%[dst]) \n\t"
- "sb %[p2], 5(%[dst]) \n\t"
- "sb %[n1], 7(%[dst]) \n\t"
-
- : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tn1] "=&r"(tn1),
- [tn2] "=&r"(tn2), [tn3] "=&r"(tn3), [st0] "=&r"(st0),
- [st1] "=&r"(st1), [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3),
- [p4] "=&r"(p4), [n1] "=&r"(n1), [Temp1] "=&r"(Temp1),
- [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3)
- : [vector1b] "r"(vector1b), [vector2b] "r"(vector2b),
- [vector3b] "r"(vector3b), [vector4b] "r"(vector4b),
- [vector4a] "r"(vector4a), [cm] "r"(cm), [dst] "r"(dst),
- [src] "r"(src));
-
- /* Next row... */
- src += src_stride;
- dst += dst_stride;
- }
-}
-
-static void convolve_horiz_16_dspr2(const uint8_t *src_ptr, int32_t src_stride,
- uint8_t *dst_ptr, int32_t dst_stride,
- const int16_t *filter_x0, int32_t h,
- int32_t count) {
- int32_t y, c;
- const uint8_t *src;
- uint8_t *dst;
- uint8_t *cm = aom_ff_cropTbl;
- uint32_t vector_64 = 64;
- int32_t filter12, filter34, filter56, filter78;
- int32_t Temp1, Temp2, Temp3;
- uint32_t qload1, qload2, qload3;
- uint32_t p1, p2, p3, p4, p5;
- uint32_t st1, st2, st3;
-
- filter12 = ((const int32_t *)filter_x0)[0];
- filter34 = ((const int32_t *)filter_x0)[1];
- filter56 = ((const int32_t *)filter_x0)[2];
- filter78 = ((const int32_t *)filter_x0)[3];
-
- for (y = h; y--;) {
- src = src_ptr;
- dst = dst_ptr;
-
- /* prefetch data to cache memory */
- prefetch_load(src_ptr + src_stride);
- prefetch_load(src_ptr + src_stride + 32);
- prefetch_store(dst_ptr + dst_stride);
-
- for (c = 0; c < count; c++) {
- __asm__ __volatile__(
- "ulw %[qload1], 0(%[src]) \n\t"
- "ulw %[qload2], 4(%[src]) \n\t"
-
- /* even 1. pixel */
- "mtlo %[vector_64], $ac1 \n\t" /* even 1 */
- "mthi $zero, $ac1 \n\t"
- "mtlo %[vector_64], $ac2 \n\t" /* even 2 */
- "mthi $zero, $ac2 \n\t"
- "preceu.ph.qbr %[p1], %[qload1] \n\t"
- "preceu.ph.qbl %[p2], %[qload1] \n\t"
- "preceu.ph.qbr %[p3], %[qload2] \n\t"
- "preceu.ph.qbl %[p4], %[qload2] \n\t"
- "ulw %[qload3], 8(%[src]) \n\t"
- "dpa.w.ph $ac1, %[p1], %[filter12] \n\t" /* even 1 */
- "dpa.w.ph $ac1, %[p2], %[filter34] \n\t" /* even 1 */
- "dpa.w.ph $ac1, %[p3], %[filter56] \n\t" /* even 1 */
- "dpa.w.ph $ac1, %[p4], %[filter78] \n\t" /* even 1 */
- "extp %[Temp1], $ac1, 31 \n\t" /* even 1 */
-
- /* even 2. pixel */
- "mtlo %[vector_64], $ac3 \n\t" /* even 3 */
- "mthi $zero, $ac3 \n\t"
- "preceu.ph.qbr %[p1], %[qload3] \n\t"
- "preceu.ph.qbl %[p5], %[qload3] \n\t"
- "ulw %[qload1], 12(%[src]) \n\t"
- "dpa.w.ph $ac2, %[p2], %[filter12] \n\t" /* even 1 */
- "dpa.w.ph $ac2, %[p3], %[filter34] \n\t" /* even 1 */
- "dpa.w.ph $ac2, %[p4], %[filter56] \n\t" /* even 1 */
- "dpa.w.ph $ac2, %[p1], %[filter78] \n\t" /* even 1 */
- "extp %[Temp2], $ac2, 31 \n\t" /* even 1 */
- "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 1 */
-
- /* even 3. pixel */
- "mtlo %[vector_64], $ac1 \n\t" /* even 4 */
- "mthi $zero, $ac1 \n\t"
- "preceu.ph.qbr %[p2], %[qload1] \n\t"
- "sb %[st1], 0(%[dst]) \n\t" /* even 1 */
- "dpa.w.ph $ac3, %[p3], %[filter12] \n\t" /* even 3 */
- "dpa.w.ph $ac3, %[p4], %[filter34] \n\t" /* even 3 */
- "dpa.w.ph $ac3, %[p1], %[filter56] \n\t" /* even 3 */
- "dpa.w.ph $ac3, %[p5], %[filter78] \n\t" /* even 3 */
- "extp %[Temp3], $ac3, 31 \n\t" /* even 3 */
- "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 1 */
-
- /* even 4. pixel */
- "mtlo %[vector_64], $ac2 \n\t" /* even 5 */
- "mthi $zero, $ac2 \n\t"
- "preceu.ph.qbl %[p3], %[qload1] \n\t"
- "sb %[st2], 2(%[dst]) \n\t" /* even 1 */
- "ulw %[qload2], 16(%[src]) \n\t"
- "dpa.w.ph $ac1, %[p4], %[filter12] \n\t" /* even 4 */
- "dpa.w.ph $ac1, %[p1], %[filter34] \n\t" /* even 4 */
- "dpa.w.ph $ac1, %[p5], %[filter56] \n\t" /* even 4 */
- "dpa.w.ph $ac1, %[p2], %[filter78] \n\t" /* even 4 */
- "extp %[Temp1], $ac1, 31 \n\t" /* even 4 */
- "lbux %[st3], %[Temp3](%[cm]) \n\t" /* even 3 */
-
- /* even 5. pixel */
- "mtlo %[vector_64], $ac3 \n\t" /* even 6 */
- "mthi $zero, $ac3 \n\t"
- "preceu.ph.qbr %[p4], %[qload2] \n\t"
- "sb %[st3], 4(%[dst]) \n\t" /* even 3 */
- "dpa.w.ph $ac2, %[p1], %[filter12] \n\t" /* even 5 */
- "dpa.w.ph $ac2, %[p5], %[filter34] \n\t" /* even 5 */
- "dpa.w.ph $ac2, %[p2], %[filter56] \n\t" /* even 5 */
- "dpa.w.ph $ac2, %[p3], %[filter78] \n\t" /* even 5 */
- "extp %[Temp2], $ac2, 31 \n\t" /* even 5 */
- "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 4 */
-
- /* even 6. pixel */
- "mtlo %[vector_64], $ac1 \n\t" /* even 7 */
- "mthi $zero, $ac1 \n\t"
- "preceu.ph.qbl %[p1], %[qload2] \n\t"
- "sb %[st1], 6(%[dst]) \n\t" /* even 4 */
- "ulw %[qload3], 20(%[src]) \n\t"
- "dpa.w.ph $ac3, %[p5], %[filter12] \n\t" /* even 6 */
- "dpa.w.ph $ac3, %[p2], %[filter34] \n\t" /* even 6 */
- "dpa.w.ph $ac3, %[p3], %[filter56] \n\t" /* even 6 */
- "dpa.w.ph $ac3, %[p4], %[filter78] \n\t" /* even 6 */
- "extp %[Temp3], $ac3, 31 \n\t" /* even 6 */
- "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 5 */
-
- /* even 7. pixel */
- "mtlo %[vector_64], $ac2 \n\t" /* even 8 */
- "mthi $zero, $ac2 \n\t"
- "preceu.ph.qbr %[p5], %[qload3] \n\t"
- "sb %[st2], 8(%[dst]) \n\t" /* even 5 */
- "dpa.w.ph $ac1, %[p2], %[filter12] \n\t" /* even 7 */
- "dpa.w.ph $ac1, %[p3], %[filter34] \n\t" /* even 7 */
- "dpa.w.ph $ac1, %[p4], %[filter56] \n\t" /* even 7 */
- "dpa.w.ph $ac1, %[p1], %[filter78] \n\t" /* even 7 */
- "extp %[Temp1], $ac1, 31 \n\t" /* even 7 */
- "lbux %[st3], %[Temp3](%[cm]) \n\t" /* even 6 */
-
- /* even 8. pixel */
- "mtlo %[vector_64], $ac3 \n\t" /* odd 1 */
- "mthi $zero, $ac3 \n\t"
- "dpa.w.ph $ac2, %[p3], %[filter12] \n\t" /* even 8 */
- "dpa.w.ph $ac2, %[p4], %[filter34] \n\t" /* even 8 */
- "sb %[st3], 10(%[dst]) \n\t" /* even 6 */
- "dpa.w.ph $ac2, %[p1], %[filter56] \n\t" /* even 8 */
- "dpa.w.ph $ac2, %[p5], %[filter78] \n\t" /* even 8 */
- "extp %[Temp2], $ac2, 31 \n\t" /* even 8 */
- "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 7 */
-
- /* ODD pixels */
- "ulw %[qload1], 1(%[src]) \n\t"
- "ulw %[qload2], 5(%[src]) \n\t"
-
- /* odd 1. pixel */
- "mtlo %[vector_64], $ac1 \n\t" /* odd 2 */
- "mthi $zero, $ac1 \n\t"
- "preceu.ph.qbr %[p1], %[qload1] \n\t"
- "preceu.ph.qbl %[p2], %[qload1] \n\t"
- "preceu.ph.qbr %[p3], %[qload2] \n\t"
- "preceu.ph.qbl %[p4], %[qload2] \n\t"
- "sb %[st1], 12(%[dst]) \n\t" /* even 7 */
- "ulw %[qload3], 9(%[src]) \n\t"
- "dpa.w.ph $ac3, %[p1], %[filter12] \n\t" /* odd 1 */
- "dpa.w.ph $ac3, %[p2], %[filter34] \n\t" /* odd 1 */
- "dpa.w.ph $ac3, %[p3], %[filter56] \n\t" /* odd 1 */
- "dpa.w.ph $ac3, %[p4], %[filter78] \n\t" /* odd 1 */
- "extp %[Temp3], $ac3, 31 \n\t" /* odd 1 */
- "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 8 */
-
- /* odd 2. pixel */
- "mtlo %[vector_64], $ac2 \n\t" /* odd 3 */
- "mthi $zero, $ac2 \n\t"
- "preceu.ph.qbr %[p1], %[qload3] \n\t"
- "preceu.ph.qbl %[p5], %[qload3] \n\t"
- "sb %[st2], 14(%[dst]) \n\t" /* even 8 */
- "ulw %[qload1], 13(%[src]) \n\t"
- "dpa.w.ph $ac1, %[p2], %[filter12] \n\t" /* odd 2 */
- "dpa.w.ph $ac1, %[p3], %[filter34] \n\t" /* odd 2 */
- "dpa.w.ph $ac1, %[p4], %[filter56] \n\t" /* odd 2 */
- "dpa.w.ph $ac1, %[p1], %[filter78] \n\t" /* odd 2 */
- "extp %[Temp1], $ac1, 31 \n\t" /* odd 2 */
- "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 1 */
-
- /* odd 3. pixel */
- "mtlo %[vector_64], $ac3 \n\t" /* odd 4 */
- "mthi $zero, $ac3 \n\t"
- "preceu.ph.qbr %[p2], %[qload1] \n\t"
- "sb %[st3], 1(%[dst]) \n\t" /* odd 1 */
- "dpa.w.ph $ac2, %[p3], %[filter12] \n\t" /* odd 3 */
- "dpa.w.ph $ac2, %[p4], %[filter34] \n\t" /* odd 3 */
- "dpa.w.ph $ac2, %[p1], %[filter56] \n\t" /* odd 3 */
- "dpa.w.ph $ac2, %[p5], %[filter78] \n\t" /* odd 3 */
- "extp %[Temp2], $ac2, 31 \n\t" /* odd 3 */
- "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 2 */
-
- /* odd 4. pixel */
- "mtlo %[vector_64], $ac1 \n\t" /* odd 5 */
- "mthi $zero, $ac1 \n\t"
- "preceu.ph.qbl %[p3], %[qload1] \n\t"
- "sb %[st1], 3(%[dst]) \n\t" /* odd 2 */
- "ulw %[qload2], 17(%[src]) \n\t"
- "dpa.w.ph $ac3, %[p4], %[filter12] \n\t" /* odd 4 */
- "dpa.w.ph $ac3, %[p1], %[filter34] \n\t" /* odd 4 */
- "dpa.w.ph $ac3, %[p5], %[filter56] \n\t" /* odd 4 */
- "dpa.w.ph $ac3, %[p2], %[filter78] \n\t" /* odd 4 */
- "extp %[Temp3], $ac3, 31 \n\t" /* odd 4 */
- "lbux %[st2], %[Temp2](%[cm]) \n\t" /* odd 3 */
-
- /* odd 5. pixel */
- "mtlo %[vector_64], $ac2 \n\t" /* odd 6 */
- "mthi $zero, $ac2 \n\t"
- "preceu.ph.qbr %[p4], %[qload2] \n\t"
- "sb %[st2], 5(%[dst]) \n\t" /* odd 3 */
- "dpa.w.ph $ac1, %[p1], %[filter12] \n\t" /* odd 5 */
- "dpa.w.ph $ac1, %[p5], %[filter34] \n\t" /* odd 5 */
- "dpa.w.ph $ac1, %[p2], %[filter56] \n\t" /* odd 5 */
- "dpa.w.ph $ac1, %[p3], %[filter78] \n\t" /* odd 5 */
- "extp %[Temp1], $ac1, 31 \n\t" /* odd 5 */
- "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 4 */
-
- /* odd 6. pixel */
- "mtlo %[vector_64], $ac3 \n\t" /* odd 7 */
- "mthi $zero, $ac3 \n\t"
- "preceu.ph.qbl %[p1], %[qload2] \n\t"
- "sb %[st3], 7(%[dst]) \n\t" /* odd 4 */
- "ulw %[qload3], 21(%[src]) \n\t"
- "dpa.w.ph $ac2, %[p5], %[filter12] \n\t" /* odd 6 */
- "dpa.w.ph $ac2, %[p2], %[filter34] \n\t" /* odd 6 */
- "dpa.w.ph $ac2, %[p3], %[filter56] \n\t" /* odd 6 */
- "dpa.w.ph $ac2, %[p4], %[filter78] \n\t" /* odd 6 */
- "extp %[Temp2], $ac2, 31 \n\t" /* odd 6 */
- "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 5 */
-
- /* odd 7. pixel */
- "mtlo %[vector_64], $ac1 \n\t" /* odd 8 */
- "mthi $zero, $ac1 \n\t"
- "preceu.ph.qbr %[p5], %[qload3] \n\t"
- "sb %[st1], 9(%[dst]) \n\t" /* odd 5 */
- "dpa.w.ph $ac3, %[p2], %[filter12] \n\t" /* odd 7 */
- "dpa.w.ph $ac3, %[p3], %[filter34] \n\t" /* odd 7 */
- "dpa.w.ph $ac3, %[p4], %[filter56] \n\t" /* odd 7 */
- "dpa.w.ph $ac3, %[p1], %[filter78] \n\t" /* odd 7 */
- "extp %[Temp3], $ac3, 31 \n\t" /* odd 7 */
-
- /* odd 8. pixel */
- "dpa.w.ph $ac1, %[p3], %[filter12] \n\t" /* odd 8 */
- "dpa.w.ph $ac1, %[p4], %[filter34] \n\t" /* odd 8 */
- "dpa.w.ph $ac1, %[p1], %[filter56] \n\t" /* odd 8 */
- "dpa.w.ph $ac1, %[p5], %[filter78] \n\t" /* odd 8 */
- "extp %[Temp1], $ac1, 31 \n\t" /* odd 8 */
-
- "lbux %[st2], %[Temp2](%[cm]) \n\t" /* odd 6 */
- "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 7 */
- "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 8 */
-
- "sb %[st2], 11(%[dst]) \n\t" /* odd 6 */
- "sb %[st3], 13(%[dst]) \n\t" /* odd 7 */
- "sb %[st1], 15(%[dst]) \n\t" /* odd 8 */
-
- : [qload1] "=&r"(qload1), [qload2] "=&r"(qload2),
- [qload3] "=&r"(qload3), [st1] "=&r"(st1), [st2] "=&r"(st2),
- [st3] "=&r"(st3), [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3),
- [p4] "=&r"(p4), [p5] "=&r"(p5), [Temp1] "=&r"(Temp1),
- [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3)
- : [filter12] "r"(filter12), [filter34] "r"(filter34),
- [filter56] "r"(filter56), [filter78] "r"(filter78),
- [vector_64] "r"(vector_64), [cm] "r"(cm), [dst] "r"(dst),
- [src] "r"(src));
-
- src += 16;
- dst += 16;
- }
-
- /* Next row... */
- src_ptr += src_stride;
- dst_ptr += dst_stride;
- }
-}
-
-static void convolve_horiz_64_dspr2(const uint8_t *src_ptr, int32_t src_stride,
- uint8_t *dst_ptr, int32_t dst_stride,
- const int16_t *filter_x0, int32_t h) {
- int32_t y, c;
- const uint8_t *src;
- uint8_t *dst;
- uint8_t *cm = aom_ff_cropTbl;
- uint32_t vector_64 = 64;
- int32_t filter12, filter34, filter56, filter78;
- int32_t Temp1, Temp2, Temp3;
- uint32_t qload1, qload2, qload3;
- uint32_t p1, p2, p3, p4, p5;
- uint32_t st1, st2, st3;
-
- filter12 = ((const int32_t *)filter_x0)[0];
- filter34 = ((const int32_t *)filter_x0)[1];
- filter56 = ((const int32_t *)filter_x0)[2];
- filter78 = ((const int32_t *)filter_x0)[3];
-
- for (y = h; y--;) {
- src = src_ptr;
- dst = dst_ptr;
-
- /* prefetch data to cache memory */
- prefetch_load(src_ptr + src_stride);
- prefetch_load(src_ptr + src_stride + 32);
- prefetch_load(src_ptr + src_stride + 64);
- prefetch_store(dst_ptr + dst_stride);
- prefetch_store(dst_ptr + dst_stride + 32);
-
- for (c = 0; c < 4; c++) {
- __asm__ __volatile__(
- "ulw %[qload1], 0(%[src]) \n\t"
- "ulw %[qload2], 4(%[src]) \n\t"
-
- /* even 1. pixel */
- "mtlo %[vector_64], $ac1 \n\t" /* even 1 */
- "mthi $zero, $ac1 \n\t"
- "mtlo %[vector_64], $ac2 \n\t" /* even 2 */
- "mthi $zero, $ac2 \n\t"
- "preceu.ph.qbr %[p1], %[qload1] \n\t"
- "preceu.ph.qbl %[p2], %[qload1] \n\t"
- "preceu.ph.qbr %[p3], %[qload2] \n\t"
- "preceu.ph.qbl %[p4], %[qload2] \n\t"
- "ulw %[qload3], 8(%[src]) \n\t"
- "dpa.w.ph $ac1, %[p1], %[filter12] \n\t" /* even 1 */
- "dpa.w.ph $ac1, %[p2], %[filter34] \n\t" /* even 1 */
- "dpa.w.ph $ac1, %[p3], %[filter56] \n\t" /* even 1 */
- "dpa.w.ph $ac1, %[p4], %[filter78] \n\t" /* even 1 */
- "extp %[Temp1], $ac1, 31 \n\t" /* even 1 */
-
- /* even 2. pixel */
- "mtlo %[vector_64], $ac3 \n\t" /* even 3 */
- "mthi $zero, $ac3 \n\t"
- "preceu.ph.qbr %[p1], %[qload3] \n\t"
- "preceu.ph.qbl %[p5], %[qload3] \n\t"
- "ulw %[qload1], 12(%[src]) \n\t"
- "dpa.w.ph $ac2, %[p2], %[filter12] \n\t" /* even 1 */
- "dpa.w.ph $ac2, %[p3], %[filter34] \n\t" /* even 1 */
- "dpa.w.ph $ac2, %[p4], %[filter56] \n\t" /* even 1 */
- "dpa.w.ph $ac2, %[p1], %[filter78] \n\t" /* even 1 */
- "extp %[Temp2], $ac2, 31 \n\t" /* even 1 */
- "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 1 */
-
- /* even 3. pixel */
- "mtlo %[vector_64], $ac1 \n\t" /* even 4 */
- "mthi $zero, $ac1 \n\t"
- "preceu.ph.qbr %[p2], %[qload1] \n\t"
- "sb %[st1], 0(%[dst]) \n\t" /* even 1 */
- "dpa.w.ph $ac3, %[p3], %[filter12] \n\t" /* even 3 */
- "dpa.w.ph $ac3, %[p4], %[filter34] \n\t" /* even 3 */
- "dpa.w.ph $ac3, %[p1], %[filter56] \n\t" /* even 3 */
- "dpa.w.ph $ac3, %[p5], %[filter78] \n\t" /* even 3 */
- "extp %[Temp3], $ac3, 31 \n\t" /* even 3 */
- "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 1 */
-
- /* even 4. pixel */
- "mtlo %[vector_64], $ac2 \n\t" /* even 5 */
- "mthi $zero, $ac2 \n\t"
- "preceu.ph.qbl %[p3], %[qload1] \n\t"
- "sb %[st2], 2(%[dst]) \n\t" /* even 1 */
- "ulw %[qload2], 16(%[src]) \n\t"
- "dpa.w.ph $ac1, %[p4], %[filter12] \n\t" /* even 4 */
- "dpa.w.ph $ac1, %[p1], %[filter34] \n\t" /* even 4 */
- "dpa.w.ph $ac1, %[p5], %[filter56] \n\t" /* even 4 */
- "dpa.w.ph $ac1, %[p2], %[filter78] \n\t" /* even 4 */
- "extp %[Temp1], $ac1, 31 \n\t" /* even 4 */
- "lbux %[st3], %[Temp3](%[cm]) \n\t" /* even 3 */
-
- /* even 5. pixel */
- "mtlo %[vector_64], $ac3 \n\t" /* even 6 */
- "mthi $zero, $ac3 \n\t"
- "preceu.ph.qbr %[p4], %[qload2] \n\t"
- "sb %[st3], 4(%[dst]) \n\t" /* even 3 */
- "dpa.w.ph $ac2, %[p1], %[filter12] \n\t" /* even 5 */
- "dpa.w.ph $ac2, %[p5], %[filter34] \n\t" /* even 5 */
- "dpa.w.ph $ac2, %[p2], %[filter56] \n\t" /* even 5 */
- "dpa.w.ph $ac2, %[p3], %[filter78] \n\t" /* even 5 */
- "extp %[Temp2], $ac2, 31 \n\t" /* even 5 */
- "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 4 */
-
- /* even 6. pixel */
- "mtlo %[vector_64], $ac1 \n\t" /* even 7 */
- "mthi $zero, $ac1 \n\t"
- "preceu.ph.qbl %[p1], %[qload2] \n\t"
- "sb %[st1], 6(%[dst]) \n\t" /* even 4 */
- "ulw %[qload3], 20(%[src]) \n\t"
- "dpa.w.ph $ac3, %[p5], %[filter12] \n\t" /* even 6 */
- "dpa.w.ph $ac3, %[p2], %[filter34] \n\t" /* even 6 */
- "dpa.w.ph $ac3, %[p3], %[filter56] \n\t" /* even 6 */
- "dpa.w.ph $ac3, %[p4], %[filter78] \n\t" /* even 6 */
- "extp %[Temp3], $ac3, 31 \n\t" /* even 6 */
- "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 5 */
-
- /* even 7. pixel */
- "mtlo %[vector_64], $ac2 \n\t" /* even 8 */
- "mthi $zero, $ac2 \n\t"
- "preceu.ph.qbr %[p5], %[qload3] \n\t"
- "sb %[st2], 8(%[dst]) \n\t" /* even 5 */
- "dpa.w.ph $ac1, %[p2], %[filter12] \n\t" /* even 7 */
- "dpa.w.ph $ac1, %[p3], %[filter34] \n\t" /* even 7 */
- "dpa.w.ph $ac1, %[p4], %[filter56] \n\t" /* even 7 */
- "dpa.w.ph $ac1, %[p1], %[filter78] \n\t" /* even 7 */
- "extp %[Temp1], $ac1, 31 \n\t" /* even 7 */
- "lbux %[st3], %[Temp3](%[cm]) \n\t" /* even 6 */
-
- /* even 8. pixel */
- "mtlo %[vector_64], $ac3 \n\t" /* odd 1 */
- "mthi $zero, $ac3 \n\t"
- "dpa.w.ph $ac2, %[p3], %[filter12] \n\t" /* even 8 */
- "dpa.w.ph $ac2, %[p4], %[filter34] \n\t" /* even 8 */
- "sb %[st3], 10(%[dst]) \n\t" /* even 6 */
- "dpa.w.ph $ac2, %[p1], %[filter56] \n\t" /* even 8 */
- "dpa.w.ph $ac2, %[p5], %[filter78] \n\t" /* even 8 */
- "extp %[Temp2], $ac2, 31 \n\t" /* even 8 */
- "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 7 */
-
- /* ODD pixels */
- "ulw %[qload1], 1(%[src]) \n\t"
- "ulw %[qload2], 5(%[src]) \n\t"
-
- /* odd 1. pixel */
- "mtlo %[vector_64], $ac1 \n\t" /* odd 2 */
- "mthi $zero, $ac1 \n\t"
- "preceu.ph.qbr %[p1], %[qload1] \n\t"
- "preceu.ph.qbl %[p2], %[qload1] \n\t"
- "preceu.ph.qbr %[p3], %[qload2] \n\t"
- "preceu.ph.qbl %[p4], %[qload2] \n\t"
- "sb %[st1], 12(%[dst]) \n\t" /* even 7 */
- "ulw %[qload3], 9(%[src]) \n\t"
- "dpa.w.ph $ac3, %[p1], %[filter12] \n\t" /* odd 1 */
- "dpa.w.ph $ac3, %[p2], %[filter34] \n\t" /* odd 1 */
- "dpa.w.ph $ac3, %[p3], %[filter56] \n\t" /* odd 1 */
- "dpa.w.ph $ac3, %[p4], %[filter78] \n\t" /* odd 1 */
- "extp %[Temp3], $ac3, 31 \n\t" /* odd 1 */
- "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 8 */
-
- /* odd 2. pixel */
- "mtlo %[vector_64], $ac2 \n\t" /* odd 3 */
- "mthi $zero, $ac2 \n\t"
- "preceu.ph.qbr %[p1], %[qload3] \n\t"
- "preceu.ph.qbl %[p5], %[qload3] \n\t"
- "sb %[st2], 14(%[dst]) \n\t" /* even 8 */
- "ulw %[qload1], 13(%[src]) \n\t"
- "dpa.w.ph $ac1, %[p2], %[filter12] \n\t" /* odd 2 */
- "dpa.w.ph $ac1, %[p3], %[filter34] \n\t" /* odd 2 */
- "dpa.w.ph $ac1, %[p4], %[filter56] \n\t" /* odd 2 */
- "dpa.w.ph $ac1, %[p1], %[filter78] \n\t" /* odd 2 */
- "extp %[Temp1], $ac1, 31 \n\t" /* odd 2 */
- "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 1 */
-
- /* odd 3. pixel */
- "mtlo %[vector_64], $ac3 \n\t" /* odd 4 */
- "mthi $zero, $ac3 \n\t"
- "preceu.ph.qbr %[p2], %[qload1] \n\t"
- "sb %[st3], 1(%[dst]) \n\t" /* odd 1 */
- "dpa.w.ph $ac2, %[p3], %[filter12] \n\t" /* odd 3 */
- "dpa.w.ph $ac2, %[p4], %[filter34] \n\t" /* odd 3 */
- "dpa.w.ph $ac2, %[p1], %[filter56] \n\t" /* odd 3 */
- "dpa.w.ph $ac2, %[p5], %[filter78] \n\t" /* odd 3 */
- "extp %[Temp2], $ac2, 31 \n\t" /* odd 3 */
- "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 2 */
-
- /* odd 4. pixel */
- "mtlo %[vector_64], $ac1 \n\t" /* odd 5 */
- "mthi $zero, $ac1 \n\t"
- "preceu.ph.qbl %[p3], %[qload1] \n\t"
- "sb %[st1], 3(%[dst]) \n\t" /* odd 2 */
- "ulw %[qload2], 17(%[src]) \n\t"
- "dpa.w.ph $ac3, %[p4], %[filter12] \n\t" /* odd 4 */
- "dpa.w.ph $ac3, %[p1], %[filter34] \n\t" /* odd 4 */
- "dpa.w.ph $ac3, %[p5], %[filter56] \n\t" /* odd 4 */
- "dpa.w.ph $ac3, %[p2], %[filter78] \n\t" /* odd 4 */
- "extp %[Temp3], $ac3, 31 \n\t" /* odd 4 */
- "lbux %[st2], %[Temp2](%[cm]) \n\t" /* odd 3 */
-
- /* odd 5. pixel */
- "mtlo %[vector_64], $ac2 \n\t" /* odd 6 */
- "mthi $zero, $ac2 \n\t"
- "preceu.ph.qbr %[p4], %[qload2] \n\t"
- "sb %[st2], 5(%[dst]) \n\t" /* odd 3 */
- "dpa.w.ph $ac1, %[p1], %[filter12] \n\t" /* odd 5 */
- "dpa.w.ph $ac1, %[p5], %[filter34] \n\t" /* odd 5 */
- "dpa.w.ph $ac1, %[p2], %[filter56] \n\t" /* odd 5 */
- "dpa.w.ph $ac1, %[p3], %[filter78] \n\t" /* odd 5 */
- "extp %[Temp1], $ac1, 31 \n\t" /* odd 5 */
- "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 4 */
-
- /* odd 6. pixel */
- "mtlo %[vector_64], $ac3 \n\t" /* odd 7 */
- "mthi $zero, $ac3 \n\t"
- "preceu.ph.qbl %[p1], %[qload2] \n\t"
- "sb %[st3], 7(%[dst]) \n\t" /* odd 4 */
- "ulw %[qload3], 21(%[src]) \n\t"
- "dpa.w.ph $ac2, %[p5], %[filter12] \n\t" /* odd 6 */
- "dpa.w.ph $ac2, %[p2], %[filter34] \n\t" /* odd 6 */
- "dpa.w.ph $ac2, %[p3], %[filter56] \n\t" /* odd 6 */
- "dpa.w.ph $ac2, %[p4], %[filter78] \n\t" /* odd 6 */
- "extp %[Temp2], $ac2, 31 \n\t" /* odd 6 */
- "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 5 */
-
- /* odd 7. pixel */
- "mtlo %[vector_64], $ac1 \n\t" /* odd 8 */
- "mthi $zero, $ac1 \n\t"
- "preceu.ph.qbr %[p5], %[qload3] \n\t"
- "sb %[st1], 9(%[dst]) \n\t" /* odd 5 */
- "dpa.w.ph $ac3, %[p2], %[filter12] \n\t" /* odd 7 */
- "dpa.w.ph $ac3, %[p3], %[filter34] \n\t" /* odd 7 */
- "dpa.w.ph $ac3, %[p4], %[filter56] \n\t" /* odd 7 */
- "dpa.w.ph $ac3, %[p1], %[filter78] \n\t" /* odd 7 */
- "extp %[Temp3], $ac3, 31 \n\t" /* odd 7 */
-
- /* odd 8. pixel */
- "dpa.w.ph $ac1, %[p3], %[filter12] \n\t" /* odd 8 */
- "dpa.w.ph $ac1, %[p4], %[filter34] \n\t" /* odd 8 */
- "dpa.w.ph $ac1, %[p1], %[filter56] \n\t" /* odd 8 */
- "dpa.w.ph $ac1, %[p5], %[filter78] \n\t" /* odd 8 */
- "extp %[Temp1], $ac1, 31 \n\t" /* odd 8 */
-
- "lbux %[st2], %[Temp2](%[cm]) \n\t" /* odd 6 */
- "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 7 */
- "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 8 */
-
- "sb %[st2], 11(%[dst]) \n\t" /* odd 6 */
- "sb %[st3], 13(%[dst]) \n\t" /* odd 7 */
- "sb %[st1], 15(%[dst]) \n\t" /* odd 8 */
-
- : [qload1] "=&r"(qload1), [qload2] "=&r"(qload2),
- [qload3] "=&r"(qload3), [st1] "=&r"(st1), [st2] "=&r"(st2),
- [st3] "=&r"(st3), [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3),
- [p4] "=&r"(p4), [p5] "=&r"(p5), [Temp1] "=&r"(Temp1),
- [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3)
- : [filter12] "r"(filter12), [filter34] "r"(filter34),
- [filter56] "r"(filter56), [filter78] "r"(filter78),
- [vector_64] "r"(vector_64), [cm] "r"(cm), [dst] "r"(dst),
- [src] "r"(src));
-
- src += 16;
- dst += 16;
- }
-
- /* Next row... */
- src_ptr += src_stride;
- dst_ptr += dst_stride;
- }
-}
-
-void aom_convolve8_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4, int w,
- int h) {
- assert(x_step_q4 == 16);
- assert(((const int32_t *)filter_x)[1] != 0x800000);
-
- if (((const int32_t *)filter_x)[0] == 0) {
- aom_convolve2_horiz_dspr2(src, src_stride, dst, dst_stride, filter_x,
- x_step_q4, filter_y, y_step_q4, w, h);
- } else {
- uint32_t pos = 38;
-
- prefetch_load((const uint8_t *)filter_x);
- src -= 3;
-
- /* bit positon for extract from acc */
- __asm__ __volatile__("wrdsp %[pos], 1 \n\t"
- :
- : [pos] "r"(pos));
-
- /* prefetch data to cache memory */
- prefetch_load(src);
- prefetch_load(src + 32);
- prefetch_store(dst);
-
- switch (w) {
- case 4:
- convolve_horiz_4_dspr2(src, (int32_t)src_stride, dst,
- (int32_t)dst_stride, filter_x, (int32_t)h);
- break;
- case 8:
- convolve_horiz_8_dspr2(src, (int32_t)src_stride, dst,
- (int32_t)dst_stride, filter_x, (int32_t)h);
- break;
- case 16:
- convolve_horiz_16_dspr2(src, (int32_t)src_stride, dst,
- (int32_t)dst_stride, filter_x, (int32_t)h, 1);
- break;
- case 32:
- convolve_horiz_16_dspr2(src, (int32_t)src_stride, dst,
- (int32_t)dst_stride, filter_x, (int32_t)h, 2);
- break;
- case 64:
- prefetch_load(src + 64);
- prefetch_store(dst + 32);
-
- convolve_horiz_64_dspr2(src, (int32_t)src_stride, dst,
- (int32_t)dst_stride, filter_x, (int32_t)h);
- break;
- default:
- aom_convolve8_horiz_c(src + 3, src_stride, dst, dst_stride, filter_x,
- x_step_q4, filter_y, y_step_q4, w, h);
- break;
- }
- }
-}
-#endif
diff --git a/third_party/aom/aom_dsp/mips/convolve8_vert_dspr2.c b/third_party/aom/aom_dsp/mips/convolve8_vert_dspr2.c
deleted file mode 100644
index 201e66427..000000000
--- a/third_party/aom/aom_dsp/mips/convolve8_vert_dspr2.c
+++ /dev/null
@@ -1,361 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <stdio.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/mips/convolve_common_dspr2.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/aom_filter.h"
-#include "aom_ports/mem.h"
-
-#if HAVE_DSPR2
-static void convolve_vert_4_dspr2(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride,
- const int16_t *filter_y, int32_t w,
- int32_t h) {
- int32_t x, y;
- const uint8_t *src_ptr;
- uint8_t *dst_ptr;
- uint8_t *cm = aom_ff_cropTbl;
- uint32_t vector4a = 64;
- uint32_t load1, load2, load3, load4;
- uint32_t p1, p2;
- uint32_t n1, n2;
- uint32_t scratch1, scratch2;
- uint32_t store1, store2;
- int32_t vector1b, vector2b, vector3b, vector4b;
- int32_t Temp1, Temp2;
-
- vector1b = ((const int32_t *)filter_y)[0];
- vector2b = ((const int32_t *)filter_y)[1];
- vector3b = ((const int32_t *)filter_y)[2];
- vector4b = ((const int32_t *)filter_y)[3];
-
- src -= 3 * src_stride;
-
- for (y = h; y--;) {
- /* prefetch data to cache memory */
- prefetch_store(dst + dst_stride);
-
- for (x = 0; x < w; x += 4) {
- src_ptr = src + x;
- dst_ptr = dst + x;
-
- __asm__ __volatile__(
- "ulw %[load1], 0(%[src_ptr]) \n\t"
- "add %[src_ptr], %[src_ptr], %[src_stride] \n\t"
- "ulw %[load2], 0(%[src_ptr]) \n\t"
- "add %[src_ptr], %[src_ptr], %[src_stride] \n\t"
- "ulw %[load3], 0(%[src_ptr]) \n\t"
- "add %[src_ptr], %[src_ptr], %[src_stride] \n\t"
- "ulw %[load4], 0(%[src_ptr]) \n\t"
-
- "mtlo %[vector4a], $ac0 \n\t"
- "mtlo %[vector4a], $ac1 \n\t"
- "mtlo %[vector4a], $ac2 \n\t"
- "mtlo %[vector4a], $ac3 \n\t"
- "mthi $zero, $ac0 \n\t"
- "mthi $zero, $ac1 \n\t"
- "mthi $zero, $ac2 \n\t"
- "mthi $zero, $ac3 \n\t"
-
- "preceu.ph.qbr %[scratch1], %[load1] \n\t"
- "preceu.ph.qbr %[p1], %[load2] \n\t"
- "precrq.ph.w %[n1], %[p1], %[scratch1] \n\t" /* pixel 2 */
- "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */
- "preceu.ph.qbr %[scratch2], %[load3] \n\t"
- "preceu.ph.qbr %[p2], %[load4] \n\t"
- "precrq.ph.w %[n2], %[p2], %[scratch2] \n\t" /* pixel 2 */
- "append %[p2], %[scratch2], 16 \n\t" /* pixel 1 */
-
- "dpa.w.ph $ac0, %[p1], %[vector1b] \n\t"
- "dpa.w.ph $ac0, %[p2], %[vector2b] \n\t"
- "dpa.w.ph $ac1, %[n1], %[vector1b] \n\t"
- "dpa.w.ph $ac1, %[n2], %[vector2b] \n\t"
-
- "preceu.ph.qbl %[scratch1], %[load1] \n\t"
- "preceu.ph.qbl %[p1], %[load2] \n\t"
- "precrq.ph.w %[n1], %[p1], %[scratch1] \n\t" /* pixel 2 */
- "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */
- "preceu.ph.qbl %[scratch2], %[load3] \n\t"
- "preceu.ph.qbl %[p2], %[load4] \n\t"
- "precrq.ph.w %[n2], %[p2], %[scratch2] \n\t" /* pixel 2 */
- "append %[p2], %[scratch2], 16 \n\t" /* pixel 1 */
-
- "dpa.w.ph $ac2, %[p1], %[vector1b] \n\t"
- "dpa.w.ph $ac2, %[p2], %[vector2b] \n\t"
- "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t"
- "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t"
-
- "add %[src_ptr], %[src_ptr], %[src_stride] \n\t"
- "ulw %[load1], 0(%[src_ptr]) \n\t"
- "add %[src_ptr], %[src_ptr], %[src_stride] \n\t"
- "ulw %[load2], 0(%[src_ptr]) \n\t"
- "add %[src_ptr], %[src_ptr], %[src_stride] \n\t"
- "ulw %[load3], 0(%[src_ptr]) \n\t"
- "add %[src_ptr], %[src_ptr], %[src_stride] \n\t"
- "ulw %[load4], 0(%[src_ptr]) \n\t"
-
- "preceu.ph.qbr %[scratch1], %[load1] \n\t"
- "preceu.ph.qbr %[p1], %[load2] \n\t"
- "precrq.ph.w %[n1], %[p1], %[scratch1] \n\t" /* pixel 2 */
- "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */
- "preceu.ph.qbr %[scratch2], %[load3] \n\t"
- "preceu.ph.qbr %[p2], %[load4] \n\t"
- "precrq.ph.w %[n2], %[p2], %[scratch2] \n\t" /* pixel 2 */
- "append %[p2], %[scratch2], 16 \n\t" /* pixel 1 */
-
- "dpa.w.ph $ac0, %[p1], %[vector3b] \n\t"
- "dpa.w.ph $ac0, %[p2], %[vector4b] \n\t"
- "extp %[Temp1], $ac0, 31 \n\t"
- "dpa.w.ph $ac1, %[n1], %[vector3b] \n\t"
- "dpa.w.ph $ac1, %[n2], %[vector4b] \n\t"
- "extp %[Temp2], $ac1, 31 \n\t"
-
- "preceu.ph.qbl %[scratch1], %[load1] \n\t"
- "preceu.ph.qbl %[p1], %[load2] \n\t"
- "precrq.ph.w %[n1], %[p1], %[scratch1] \n\t" /* pixel 2 */
- "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */
- "preceu.ph.qbl %[scratch2], %[load3] \n\t"
- "preceu.ph.qbl %[p2], %[load4] \n\t"
- "precrq.ph.w %[n2], %[p2], %[scratch2] \n\t" /* pixel 2 */
- "append %[p2], %[scratch2], 16 \n\t" /* pixel 1 */
-
- "lbux %[store1], %[Temp1](%[cm]) \n\t"
- "dpa.w.ph $ac2, %[p1], %[vector3b] \n\t"
- "dpa.w.ph $ac2, %[p2], %[vector4b] \n\t"
- "extp %[Temp1], $ac2, 31 \n\t"
-
- "lbux %[store2], %[Temp2](%[cm]) \n\t"
- "dpa.w.ph $ac3, %[n1], %[vector3b] \n\t"
- "dpa.w.ph $ac3, %[n2], %[vector4b] \n\t"
- "extp %[Temp2], $ac3, 31 \n\t"
-
- "sb %[store1], 0(%[dst_ptr]) \n\t"
- "sb %[store2], 1(%[dst_ptr]) \n\t"
-
- "lbux %[store1], %[Temp1](%[cm]) \n\t"
- "lbux %[store2], %[Temp2](%[cm]) \n\t"
-
- "sb %[store1], 2(%[dst_ptr]) \n\t"
- "sb %[store2], 3(%[dst_ptr]) \n\t"
-
- : [load1] "=&r"(load1), [load2] "=&r"(load2), [load3] "=&r"(load3),
- [load4] "=&r"(load4), [p1] "=&r"(p1), [p2] "=&r"(p2),
- [n1] "=&r"(n1), [n2] "=&r"(n2), [scratch1] "=&r"(scratch1),
- [scratch2] "=&r"(scratch2), [Temp1] "=&r"(Temp1),
- [Temp2] "=&r"(Temp2), [store1] "=&r"(store1),
- [store2] "=&r"(store2), [src_ptr] "+r"(src_ptr)
- : [vector1b] "r"(vector1b), [vector2b] "r"(vector2b),
- [vector3b] "r"(vector3b), [vector4b] "r"(vector4b),
- [vector4a] "r"(vector4a), [src_stride] "r"(src_stride),
- [cm] "r"(cm), [dst_ptr] "r"(dst_ptr));
- }
-
- /* Next row... */
- src += src_stride;
- dst += dst_stride;
- }
-}
-
-static void convolve_vert_64_dspr2(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride,
- const int16_t *filter_y, int32_t h) {
- int32_t x, y;
- const uint8_t *src_ptr;
- uint8_t *dst_ptr;
- uint8_t *cm = aom_ff_cropTbl;
- uint32_t vector4a = 64;
- uint32_t load1, load2, load3, load4;
- uint32_t p1, p2;
- uint32_t n1, n2;
- uint32_t scratch1, scratch2;
- uint32_t store1, store2;
- int32_t vector1b, vector2b, vector3b, vector4b;
- int32_t Temp1, Temp2;
-
- vector1b = ((const int32_t *)filter_y)[0];
- vector2b = ((const int32_t *)filter_y)[1];
- vector3b = ((const int32_t *)filter_y)[2];
- vector4b = ((const int32_t *)filter_y)[3];
-
- src -= 3 * src_stride;
-
- for (y = h; y--;) {
- /* prefetch data to cache memory */
- prefetch_store(dst + dst_stride);
- prefetch_store(dst + dst_stride + 32);
-
- for (x = 0; x < 64; x += 4) {
- src_ptr = src + x;
- dst_ptr = dst + x;
-
- __asm__ __volatile__(
- "ulw %[load1], 0(%[src_ptr]) \n\t"
- "add %[src_ptr], %[src_ptr], %[src_stride] \n\t"
- "ulw %[load2], 0(%[src_ptr]) \n\t"
- "add %[src_ptr], %[src_ptr], %[src_stride] \n\t"
- "ulw %[load3], 0(%[src_ptr]) \n\t"
- "add %[src_ptr], %[src_ptr], %[src_stride] \n\t"
- "ulw %[load4], 0(%[src_ptr]) \n\t"
-
- "mtlo %[vector4a], $ac0 \n\t"
- "mtlo %[vector4a], $ac1 \n\t"
- "mtlo %[vector4a], $ac2 \n\t"
- "mtlo %[vector4a], $ac3 \n\t"
- "mthi $zero, $ac0 \n\t"
- "mthi $zero, $ac1 \n\t"
- "mthi $zero, $ac2 \n\t"
- "mthi $zero, $ac3 \n\t"
-
- "preceu.ph.qbr %[scratch1], %[load1] \n\t"
- "preceu.ph.qbr %[p1], %[load2] \n\t"
- "precrq.ph.w %[n1], %[p1], %[scratch1] \n\t" /* pixel 2 */
- "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */
- "preceu.ph.qbr %[scratch2], %[load3] \n\t"
- "preceu.ph.qbr %[p2], %[load4] \n\t"
- "precrq.ph.w %[n2], %[p2], %[scratch2] \n\t" /* pixel 2 */
- "append %[p2], %[scratch2], 16 \n\t" /* pixel 1 */
-
- "dpa.w.ph $ac0, %[p1], %[vector1b] \n\t"
- "dpa.w.ph $ac0, %[p2], %[vector2b] \n\t"
- "dpa.w.ph $ac1, %[n1], %[vector1b] \n\t"
- "dpa.w.ph $ac1, %[n2], %[vector2b] \n\t"
-
- "preceu.ph.qbl %[scratch1], %[load1] \n\t"
- "preceu.ph.qbl %[p1], %[load2] \n\t"
- "precrq.ph.w %[n1], %[p1], %[scratch1] \n\t" /* pixel 2 */
- "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */
- "preceu.ph.qbl %[scratch2], %[load3] \n\t"
- "preceu.ph.qbl %[p2], %[load4] \n\t"
- "precrq.ph.w %[n2], %[p2], %[scratch2] \n\t" /* pixel 2 */
- "append %[p2], %[scratch2], 16 \n\t" /* pixel 1 */
-
- "dpa.w.ph $ac2, %[p1], %[vector1b] \n\t"
- "dpa.w.ph $ac2, %[p2], %[vector2b] \n\t"
- "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t"
- "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t"
-
- "add %[src_ptr], %[src_ptr], %[src_stride] \n\t"
- "ulw %[load1], 0(%[src_ptr]) \n\t"
- "add %[src_ptr], %[src_ptr], %[src_stride] \n\t"
- "ulw %[load2], 0(%[src_ptr]) \n\t"
- "add %[src_ptr], %[src_ptr], %[src_stride] \n\t"
- "ulw %[load3], 0(%[src_ptr]) \n\t"
- "add %[src_ptr], %[src_ptr], %[src_stride] \n\t"
- "ulw %[load4], 0(%[src_ptr]) \n\t"
-
- "preceu.ph.qbr %[scratch1], %[load1] \n\t"
- "preceu.ph.qbr %[p1], %[load2] \n\t"
- "precrq.ph.w %[n1], %[p1], %[scratch1] \n\t" /* pixel 2 */
- "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */
- "preceu.ph.qbr %[scratch2], %[load3] \n\t"
- "preceu.ph.qbr %[p2], %[load4] \n\t"
- "precrq.ph.w %[n2], %[p2], %[scratch2] \n\t" /* pixel 2 */
- "append %[p2], %[scratch2], 16 \n\t" /* pixel 1 */
-
- "dpa.w.ph $ac0, %[p1], %[vector3b] \n\t"
- "dpa.w.ph $ac0, %[p2], %[vector4b] \n\t"
- "extp %[Temp1], $ac0, 31 \n\t"
- "dpa.w.ph $ac1, %[n1], %[vector3b] \n\t"
- "dpa.w.ph $ac1, %[n2], %[vector4b] \n\t"
- "extp %[Temp2], $ac1, 31 \n\t"
-
- "preceu.ph.qbl %[scratch1], %[load1] \n\t"
- "preceu.ph.qbl %[p1], %[load2] \n\t"
- "precrq.ph.w %[n1], %[p1], %[scratch1] \n\t" /* pixel 2 */
- "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */
- "preceu.ph.qbl %[scratch2], %[load3] \n\t"
- "preceu.ph.qbl %[p2], %[load4] \n\t"
- "precrq.ph.w %[n2], %[p2], %[scratch2] \n\t" /* pixel 2 */
- "append %[p2], %[scratch2], 16 \n\t" /* pixel 1 */
-
- "lbux %[store1], %[Temp1](%[cm]) \n\t"
- "dpa.w.ph $ac2, %[p1], %[vector3b] \n\t"
- "dpa.w.ph $ac2, %[p2], %[vector4b] \n\t"
- "extp %[Temp1], $ac2, 31 \n\t"
-
- "lbux %[store2], %[Temp2](%[cm]) \n\t"
- "dpa.w.ph $ac3, %[n1], %[vector3b] \n\t"
- "dpa.w.ph $ac3, %[n2], %[vector4b] \n\t"
- "extp %[Temp2], $ac3, 31 \n\t"
-
- "sb %[store1], 0(%[dst_ptr]) \n\t"
- "sb %[store2], 1(%[dst_ptr]) \n\t"
-
- "lbux %[store1], %[Temp1](%[cm]) \n\t"
- "lbux %[store2], %[Temp2](%[cm]) \n\t"
-
- "sb %[store1], 2(%[dst_ptr]) \n\t"
- "sb %[store2], 3(%[dst_ptr]) \n\t"
-
- : [load1] "=&r"(load1), [load2] "=&r"(load2), [load3] "=&r"(load3),
- [load4] "=&r"(load4), [p1] "=&r"(p1), [p2] "=&r"(p2),
- [n1] "=&r"(n1), [n2] "=&r"(n2), [scratch1] "=&r"(scratch1),
- [scratch2] "=&r"(scratch2), [Temp1] "=&r"(Temp1),
- [Temp2] "=&r"(Temp2), [store1] "=&r"(store1),
- [store2] "=&r"(store2), [src_ptr] "+r"(src_ptr)
- : [vector1b] "r"(vector1b), [vector2b] "r"(vector2b),
- [vector3b] "r"(vector3b), [vector4b] "r"(vector4b),
- [vector4a] "r"(vector4a), [src_stride] "r"(src_stride),
- [cm] "r"(cm), [dst_ptr] "r"(dst_ptr));
- }
-
- /* Next row... */
- src += src_stride;
- dst += dst_stride;
- }
-}
-
-void aom_convolve8_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4, int w,
- int h) {
- assert(y_step_q4 == 16);
- assert(((const int32_t *)filter_y)[1] != 0x800000);
-
- if (((const int32_t *)filter_y)[0] == 0) {
- aom_convolve2_vert_dspr2(src, src_stride, dst, dst_stride, filter_x,
- x_step_q4, filter_y, y_step_q4, w, h);
- } else {
- uint32_t pos = 38;
-
- /* bit positon for extract from acc */
- __asm__ __volatile__("wrdsp %[pos], 1 \n\t"
- :
- : [pos] "r"(pos));
-
- prefetch_store(dst);
-
- switch (w) {
- case 4:
- case 8:
- case 16:
- case 32:
- convolve_vert_4_dspr2(src, src_stride, dst, dst_stride, filter_y, w, h);
- break;
- case 64:
- prefetch_store(dst + 32);
- convolve_vert_64_dspr2(src, src_stride, dst, dst_stride, filter_y, h);
- break;
- default:
- aom_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x,
- x_step_q4, filter_y, y_step_q4, w, h);
- break;
- }
- }
-}
-
-#endif
diff --git a/third_party/aom/aom_dsp/mips/convolve_common_dspr2.h b/third_party/aom/aom_dsp/mips/convolve_common_dspr2.h
deleted file mode 100644
index e5d48a884..000000000
--- a/third_party/aom/aom_dsp/mips/convolve_common_dspr2.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_MIPS_CONVOLVE_COMMON_DSPR2_H_
-#define AOM_AOM_DSP_MIPS_CONVOLVE_COMMON_DSPR2_H_
-
-#include <assert.h>
-
-#include "config/aom_config.h"
-
-#include "aom/aom_integer.h"
-#include "aom_dsp/mips/common_dspr2.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if HAVE_DSPR2
-void aom_convolve2_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4, int w,
- int h);
-
-void aom_convolve2_dspr2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
- ptrdiff_t dst_stride, const int16_t *filter, int w,
- int h);
-
-void aom_convolve2_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4, int w,
- int h);
-
-#endif // #if HAVE_DSPR2
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AOM_DSP_MIPS_CONVOLVE_COMMON_DSPR2_H_
diff --git a/third_party/aom/aom_dsp/mips/intrapred16_dspr2.c b/third_party/aom/aom_dsp/mips/intrapred16_dspr2.c
deleted file mode 100644
index 7c221ae89..000000000
--- a/third_party/aom/aom_dsp/mips/intrapred16_dspr2.c
+++ /dev/null
@@ -1,327 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "aom_dsp/mips/common_dspr2.h"
-
-#if HAVE_DSPR2
-void aom_h_predictor_16x16_dspr2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- int32_t tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
- int32_t tmp9, tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
-
- (void)above;
-
- __asm__ __volatile__(
- "lb %[tmp1], (%[left]) \n\t"
- "lb %[tmp2], 1(%[left]) \n\t"
- "lb %[tmp3], 2(%[left]) \n\t"
- "lb %[tmp4], 3(%[left]) \n\t"
- "lb %[tmp5], 4(%[left]) \n\t"
- "lb %[tmp6], 5(%[left]) \n\t"
- "lb %[tmp7], 6(%[left]) \n\t"
- "lb %[tmp8], 7(%[left]) \n\t"
- "lb %[tmp9], 8(%[left]) \n\t"
- "lb %[tmp10], 9(%[left]) \n\t"
- "lb %[tmp11], 10(%[left]) \n\t"
- "lb %[tmp12], 11(%[left]) \n\t"
- "lb %[tmp13], 12(%[left]) \n\t"
- "lb %[tmp14], 13(%[left]) \n\t"
- "lb %[tmp15], 14(%[left]) \n\t"
- "lb %[tmp16], 15(%[left]) \n\t"
-
- "replv.qb %[tmp1], %[tmp1] \n\t"
- "replv.qb %[tmp2], %[tmp2] \n\t"
- "replv.qb %[tmp3], %[tmp3] \n\t"
- "replv.qb %[tmp4], %[tmp4] \n\t"
- "replv.qb %[tmp5], %[tmp5] \n\t"
- "replv.qb %[tmp6], %[tmp6] \n\t"
- "replv.qb %[tmp7], %[tmp7] \n\t"
- "replv.qb %[tmp8], %[tmp8] \n\t"
- "replv.qb %[tmp9], %[tmp9] \n\t"
- "replv.qb %[tmp10], %[tmp10] \n\t"
- "replv.qb %[tmp11], %[tmp11] \n\t"
- "replv.qb %[tmp12], %[tmp12] \n\t"
- "replv.qb %[tmp13], %[tmp13] \n\t"
- "replv.qb %[tmp14], %[tmp14] \n\t"
- "replv.qb %[tmp15], %[tmp15] \n\t"
- "replv.qb %[tmp16], %[tmp16] \n\t"
-
- "sw %[tmp1], (%[dst]) \n\t"
- "sw %[tmp1], 4(%[dst]) \n\t"
- "sw %[tmp1], 8(%[dst]) \n\t"
- "sw %[tmp1], 12(%[dst]) \n\t"
-
- "add %[dst], %[dst], %[stride] \n\t"
- "sw %[tmp2], (%[dst]) \n\t"
- "sw %[tmp2], 4(%[dst]) \n\t"
- "sw %[tmp2], 8(%[dst]) \n\t"
- "sw %[tmp2], 12(%[dst]) \n\t"
-
- "add %[dst], %[dst], %[stride] \n\t"
- "sw %[tmp3], (%[dst]) \n\t"
- "sw %[tmp3], 4(%[dst]) \n\t"
- "sw %[tmp3], 8(%[dst]) \n\t"
- "sw %[tmp3], 12(%[dst]) \n\t"
-
- "add %[dst], %[dst], %[stride] \n\t"
- "sw %[tmp4], (%[dst]) \n\t"
- "sw %[tmp4], 4(%[dst]) \n\t"
- "sw %[tmp4], 8(%[dst]) \n\t"
- "sw %[tmp4], 12(%[dst]) \n\t"
-
- "add %[dst], %[dst], %[stride] \n\t"
- "sw %[tmp5], (%[dst]) \n\t"
- "sw %[tmp5], 4(%[dst]) \n\t"
- "sw %[tmp5], 8(%[dst]) \n\t"
- "sw %[tmp5], 12(%[dst]) \n\t"
-
- "add %[dst], %[dst], %[stride] \n\t"
- "sw %[tmp6], (%[dst]) \n\t"
- "sw %[tmp6], 4(%[dst]) \n\t"
- "sw %[tmp6], 8(%[dst]) \n\t"
- "sw %[tmp6], 12(%[dst]) \n\t"
-
- "add %[dst], %[dst], %[stride] \n\t"
- "sw %[tmp7], (%[dst]) \n\t"
- "sw %[tmp7], 4(%[dst]) \n\t"
- "sw %[tmp7], 8(%[dst]) \n\t"
- "sw %[tmp7], 12(%[dst]) \n\t"
-
- "add %[dst], %[dst], %[stride] \n\t"
- "sw %[tmp8], (%[dst]) \n\t"
- "sw %[tmp8], 4(%[dst]) \n\t"
- "sw %[tmp8], 8(%[dst]) \n\t"
- "sw %[tmp8], 12(%[dst]) \n\t"
-
- "add %[dst], %[dst], %[stride] \n\t"
- "sw %[tmp9], (%[dst]) \n\t"
- "sw %[tmp9], 4(%[dst]) \n\t"
- "sw %[tmp9], 8(%[dst]) \n\t"
- "sw %[tmp9], 12(%[dst]) \n\t"
-
- "add %[dst], %[dst], %[stride] \n\t"
- "sw %[tmp10], (%[dst]) \n\t"
- "sw %[tmp10], 4(%[dst]) \n\t"
- "sw %[tmp10], 8(%[dst]) \n\t"
- "sw %[tmp10], 12(%[dst]) \n\t"
-
- "add %[dst], %[dst], %[stride] \n\t"
- "sw %[tmp11], (%[dst]) \n\t"
- "sw %[tmp11], 4(%[dst]) \n\t"
- "sw %[tmp11], 8(%[dst]) \n\t"
- "sw %[tmp11], 12(%[dst]) \n\t"
-
- "add %[dst], %[dst], %[stride] \n\t"
- "sw %[tmp12], (%[dst]) \n\t"
- "sw %[tmp12], 4(%[dst]) \n\t"
- "sw %[tmp12], 8(%[dst]) \n\t"
- "sw %[tmp12], 12(%[dst]) \n\t"
-
- "add %[dst], %[dst], %[stride] \n\t"
- "sw %[tmp13], (%[dst]) \n\t"
- "sw %[tmp13], 4(%[dst]) \n\t"
- "sw %[tmp13], 8(%[dst]) \n\t"
- "sw %[tmp13], 12(%[dst]) \n\t"
-
- "add %[dst], %[dst], %[stride] \n\t"
- "sw %[tmp14], (%[dst]) \n\t"
- "sw %[tmp14], 4(%[dst]) \n\t"
- "sw %[tmp14], 8(%[dst]) \n\t"
- "sw %[tmp14], 12(%[dst]) \n\t"
-
- "add %[dst], %[dst], %[stride] \n\t"
- "sw %[tmp15], (%[dst]) \n\t"
- "sw %[tmp15], 4(%[dst]) \n\t"
- "sw %[tmp15], 8(%[dst]) \n\t"
- "sw %[tmp15], 12(%[dst]) \n\t"
-
- "add %[dst], %[dst], %[stride] \n\t"
- "sw %[tmp16], (%[dst]) \n\t"
- "sw %[tmp16], 4(%[dst]) \n\t"
- "sw %[tmp16], 8(%[dst]) \n\t"
- "sw %[tmp16], 12(%[dst]) \n\t"
-
- : [tmp1] "=&r"(tmp1), [tmp2] "=&r"(tmp2), [tmp3] "=&r"(tmp3),
- [tmp4] "=&r"(tmp4), [tmp5] "=&r"(tmp5), [tmp7] "=&r"(tmp7),
- [tmp6] "=&r"(tmp6), [tmp8] "=&r"(tmp8), [tmp9] "=&r"(tmp9),
- [tmp10] "=&r"(tmp10), [tmp11] "=&r"(tmp11), [tmp12] "=&r"(tmp12),
- [tmp13] "=&r"(tmp13), [tmp14] "=&r"(tmp14), [tmp15] "=&r"(tmp15),
- [tmp16] "=&r"(tmp16)
- : [left] "r"(left), [dst] "r"(dst), [stride] "r"(stride));
-}
-
-void aom_dc_predictor_16x16_dspr2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- int32_t expected_dc;
- int32_t average;
- int32_t tmp, above1, above_l1, above_r1, left1, left_r1, left_l1;
- int32_t above2, left2;
-
- __asm__ __volatile__(
- "lw %[above1], (%[above]) \n\t"
- "lw %[above2], 4(%[above]) \n\t"
- "lw %[left1], (%[left]) \n\t"
- "lw %[left2], 4(%[left]) \n\t"
-
- "preceu.ph.qbl %[above_l1], %[above1] \n\t"
- "preceu.ph.qbr %[above_r1], %[above1] \n\t"
- "preceu.ph.qbl %[left_l1], %[left1] \n\t"
- "preceu.ph.qbr %[left_r1], %[left1] \n\t"
-
- "addu.ph %[average], %[above_r1], %[above_l1] \n\t"
- "addu.ph %[average], %[average], %[left_l1] \n\t"
- "addu.ph %[average], %[average], %[left_r1] \n\t"
-
- "preceu.ph.qbl %[above_l1], %[above2] \n\t"
- "preceu.ph.qbr %[above_r1], %[above2] \n\t"
- "preceu.ph.qbl %[left_l1], %[left2] \n\t"
- "preceu.ph.qbr %[left_r1], %[left2] \n\t"
-
- "addu.ph %[average], %[average], %[above_l1] \n\t"
- "addu.ph %[average], %[average], %[above_r1] \n\t"
- "addu.ph %[average], %[average], %[left_l1] \n\t"
- "addu.ph %[average], %[average], %[left_r1] \n\t"
-
- "lw %[above1], 8(%[above]) \n\t"
- "lw %[above2], 12(%[above]) \n\t"
- "lw %[left1], 8(%[left]) \n\t"
- "lw %[left2], 12(%[left]) \n\t"
-
- "preceu.ph.qbl %[above_l1], %[above1] \n\t"
- "preceu.ph.qbr %[above_r1], %[above1] \n\t"
- "preceu.ph.qbl %[left_l1], %[left1] \n\t"
- "preceu.ph.qbr %[left_r1], %[left1] \n\t"
-
- "addu.ph %[average], %[average], %[above_l1] \n\t"
- "addu.ph %[average], %[average], %[above_r1] \n\t"
- "addu.ph %[average], %[average], %[left_l1] \n\t"
- "addu.ph %[average], %[average], %[left_r1] \n\t"
-
- "preceu.ph.qbl %[above_l1], %[above2] \n\t"
- "preceu.ph.qbr %[above_r1], %[above2] \n\t"
- "preceu.ph.qbl %[left_l1], %[left2] \n\t"
- "preceu.ph.qbr %[left_r1], %[left2] \n\t"
-
- "addu.ph %[average], %[average], %[above_l1] \n\t"
- "addu.ph %[average], %[average], %[above_r1] \n\t"
- "addu.ph %[average], %[average], %[left_l1] \n\t"
- "addu.ph %[average], %[average], %[left_r1] \n\t"
-
- "addiu %[average], %[average], 16 \n\t"
- "srl %[tmp], %[average], 16 \n\t"
- "addu.ph %[average], %[tmp], %[average] \n\t"
- "srl %[expected_dc], %[average], 5 \n\t"
- "replv.qb %[expected_dc], %[expected_dc] \n\t"
-
- "sw %[expected_dc], (%[dst]) \n\t"
- "sw %[expected_dc], 4(%[dst]) \n\t"
- "sw %[expected_dc], 8(%[dst]) \n\t"
- "sw %[expected_dc], 12(%[dst]) \n\t"
-
- "add %[dst], %[dst], %[stride] \n\t"
- "sw %[expected_dc], (%[dst]) \n\t"
- "sw %[expected_dc], 4(%[dst]) \n\t"
- "sw %[expected_dc], 8(%[dst]) \n\t"
- "sw %[expected_dc], 12(%[dst]) \n\t"
-
- "add %[dst], %[dst], %[stride] \n\t"
- "sw %[expected_dc], (%[dst]) \n\t"
- "sw %[expected_dc], 4(%[dst]) \n\t"
- "sw %[expected_dc], 8(%[dst]) \n\t"
- "sw %[expected_dc], 12(%[dst]) \n\t"
-
- "add %[dst], %[dst], %[stride] \n\t"
- "sw %[expected_dc], (%[dst]) \n\t"
- "sw %[expected_dc], 4(%[dst]) \n\t"
- "sw %[expected_dc], 8(%[dst]) \n\t"
- "sw %[expected_dc], 12(%[dst]) \n\t"
-
- "add %[dst], %[dst], %[stride] \n\t"
- "sw %[expected_dc], (%[dst]) \n\t"
- "sw %[expected_dc], 4(%[dst]) \n\t"
- "sw %[expected_dc], 8(%[dst]) \n\t"
- "sw %[expected_dc], 12(%[dst]) \n\t"
-
- "add %[dst], %[dst], %[stride] \n\t"
- "sw %[expected_dc], (%[dst]) \n\t"
- "sw %[expected_dc], 4(%[dst]) \n\t"
- "sw %[expected_dc], 8(%[dst]) \n\t"
- "sw %[expected_dc], 12(%[dst]) \n\t"
-
- "add %[dst], %[dst], %[stride] \n\t"
- "sw %[expected_dc], (%[dst]) \n\t"
- "sw %[expected_dc], 4(%[dst]) \n\t"
- "sw %[expected_dc], 8(%[dst]) \n\t"
- "sw %[expected_dc], 12(%[dst]) \n\t"
-
- "add %[dst], %[dst], %[stride] \n\t"
- "sw %[expected_dc], (%[dst]) \n\t"
- "sw %[expected_dc], 4(%[dst]) \n\t"
- "sw %[expected_dc], 8(%[dst]) \n\t"
- "sw %[expected_dc], 12(%[dst]) \n\t"
-
- "add %[dst], %[dst], %[stride] \n\t"
- "sw %[expected_dc], (%[dst]) \n\t"
- "sw %[expected_dc], 4(%[dst]) \n\t"
- "sw %[expected_dc], 8(%[dst]) \n\t"
- "sw %[expected_dc], 12(%[dst]) \n\t"
-
- "add %[dst], %[dst], %[stride] \n\t"
- "sw %[expected_dc], (%[dst]) \n\t"
- "sw %[expected_dc], 4(%[dst]) \n\t"
- "sw %[expected_dc], 8(%[dst]) \n\t"
- "sw %[expected_dc], 12(%[dst]) \n\t"
-
- "add %[dst], %[dst], %[stride] \n\t"
- "sw %[expected_dc], (%[dst]) \n\t"
- "sw %[expected_dc], 4(%[dst]) \n\t"
- "sw %[expected_dc], 8(%[dst]) \n\t"
- "sw %[expected_dc], 12(%[dst]) \n\t"
-
- "add %[dst], %[dst], %[stride] \n\t"
- "sw %[expected_dc], (%[dst]) \n\t"
- "sw %[expected_dc], 4(%[dst]) \n\t"
- "sw %[expected_dc], 8(%[dst]) \n\t"
- "sw %[expected_dc], 12(%[dst]) \n\t"
-
- "add %[dst], %[dst], %[stride] \n\t"
- "sw %[expected_dc], (%[dst]) \n\t"
- "sw %[expected_dc], 4(%[dst]) \n\t"
- "sw %[expected_dc], 8(%[dst]) \n\t"
- "sw %[expected_dc], 12(%[dst]) \n\t"
-
- "add %[dst], %[dst], %[stride] \n\t"
- "sw %[expected_dc], (%[dst]) \n\t"
- "sw %[expected_dc], 4(%[dst]) \n\t"
- "sw %[expected_dc], 8(%[dst]) \n\t"
- "sw %[expected_dc], 12(%[dst]) \n\t"
-
- "add %[dst], %[dst], %[stride] \n\t"
- "sw %[expected_dc], (%[dst]) \n\t"
- "sw %[expected_dc], 4(%[dst]) \n\t"
- "sw %[expected_dc], 8(%[dst]) \n\t"
- "sw %[expected_dc], 12(%[dst]) \n\t"
-
- "add %[dst], %[dst], %[stride] \n\t"
- "sw %[expected_dc], (%[dst]) \n\t"
- "sw %[expected_dc], 4(%[dst]) \n\t"
- "sw %[expected_dc], 8(%[dst]) \n\t"
- "sw %[expected_dc], 12(%[dst]) \n\t"
-
- : [left1] "=&r"(left1), [above1] "=&r"(above1), [left_l1] "=&r"(left_l1),
- [above_l1] "=&r"(above_l1), [left_r1] "=&r"(left_r1),
- [above_r1] "=&r"(above_r1), [above2] "=&r"(above2),
- [left2] "=&r"(left2), [average] "=&r"(average), [tmp] "=&r"(tmp),
- [expected_dc] "=&r"(expected_dc)
- : [above] "r"(above), [left] "r"(left), [dst] "r"(dst),
- [stride] "r"(stride));
-}
-#endif // #if HAVE_DSPR2
diff --git a/third_party/aom/aom_dsp/mips/intrapred4_dspr2.c b/third_party/aom/aom_dsp/mips/intrapred4_dspr2.c
deleted file mode 100644
index 0a21979c7..000000000
--- a/third_party/aom/aom_dsp/mips/intrapred4_dspr2.c
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "aom_dsp/mips/common_dspr2.h"
-
-#if HAVE_DSPR2
-void aom_h_predictor_4x4_dspr2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- int32_t tmp1, tmp2, tmp3, tmp4;
- (void)above;
-
- __asm__ __volatile__(
- "lb %[tmp1], (%[left]) \n\t"
- "lb %[tmp2], 1(%[left]) \n\t"
- "lb %[tmp3], 2(%[left]) \n\t"
- "lb %[tmp4], 3(%[left]) \n\t"
- "replv.qb %[tmp1], %[tmp1] \n\t"
- "replv.qb %[tmp2], %[tmp2] \n\t"
- "replv.qb %[tmp3], %[tmp3] \n\t"
- "replv.qb %[tmp4], %[tmp4] \n\t"
- "sw %[tmp1], (%[dst]) \n\t"
- "add %[dst], %[dst], %[stride] \n\t"
- "sw %[tmp2], (%[dst]) \n\t"
- "add %[dst], %[dst], %[stride] \n\t"
- "sw %[tmp3], (%[dst]) \n\t"
- "add %[dst], %[dst], %[stride] \n\t"
- "sw %[tmp4], (%[dst]) \n\t"
-
- : [tmp1] "=&r"(tmp1), [tmp2] "=&r"(tmp2), [tmp3] "=&r"(tmp3),
- [tmp4] "=&r"(tmp4)
- : [left] "r"(left), [dst] "r"(dst), [stride] "r"(stride));
-}
-
-void aom_dc_predictor_4x4_dspr2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- int32_t expected_dc;
- int32_t average;
- int32_t tmp, above_c, above_l, above_r, left_c, left_r, left_l;
-
- __asm__ __volatile__(
- "lw %[above_c], (%[above]) \n\t"
- "lw %[left_c], (%[left]) \n\t"
-
- "preceu.ph.qbl %[above_l], %[above_c] \n\t"
- "preceu.ph.qbr %[above_r], %[above_c] \n\t"
- "preceu.ph.qbl %[left_l], %[left_c] \n\t"
- "preceu.ph.qbr %[left_r], %[left_c] \n\t"
-
- "addu.ph %[average], %[above_r], %[above_l] \n\t"
- "addu.ph %[average], %[average], %[left_l] \n\t"
- "addu.ph %[average], %[average], %[left_r] \n\t"
- "addiu %[average], %[average], 4 \n\t"
- "srl %[tmp], %[average], 16 \n\t"
- "addu.ph %[average], %[tmp], %[average] \n\t"
- "srl %[expected_dc], %[average], 3 \n\t"
- "replv.qb %[expected_dc], %[expected_dc] \n\t"
-
- "sw %[expected_dc], (%[dst]) \n\t"
- "add %[dst], %[dst], %[stride] \n\t"
- "sw %[expected_dc], (%[dst]) \n\t"
- "add %[dst], %[dst], %[stride] \n\t"
- "sw %[expected_dc], (%[dst]) \n\t"
- "add %[dst], %[dst], %[stride] \n\t"
- "sw %[expected_dc], (%[dst]) \n\t"
-
- : [above_c] "=&r"(above_c), [above_l] "=&r"(above_l),
- [above_r] "=&r"(above_r), [left_c] "=&r"(left_c),
- [left_l] "=&r"(left_l), [left_r] "=&r"(left_r),
- [average] "=&r"(average), [tmp] "=&r"(tmp),
- [expected_dc] "=&r"(expected_dc)
- : [above] "r"(above), [left] "r"(left), [dst] "r"(dst),
- [stride] "r"(stride));
-}
-#endif // #if HAVE_DSPR2
diff --git a/third_party/aom/aom_dsp/mips/intrapred8_dspr2.c b/third_party/aom/aom_dsp/mips/intrapred8_dspr2.c
deleted file mode 100644
index d42a77c80..000000000
--- a/third_party/aom/aom_dsp/mips/intrapred8_dspr2.c
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "aom_dsp/mips/common_dspr2.h"
-
-#if HAVE_DSPR2
-void aom_h_predictor_8x8_dspr2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- int32_t tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
- (void)above;
-
- __asm__ __volatile__(
- "lb %[tmp1], (%[left]) \n\t"
- "lb %[tmp2], 1(%[left]) \n\t"
- "lb %[tmp3], 2(%[left]) \n\t"
- "lb %[tmp4], 3(%[left]) \n\t"
- "lb %[tmp5], 4(%[left]) \n\t"
- "lb %[tmp6], 5(%[left]) \n\t"
- "lb %[tmp7], 6(%[left]) \n\t"
- "lb %[tmp8], 7(%[left]) \n\t"
-
- "replv.qb %[tmp1], %[tmp1] \n\t"
- "replv.qb %[tmp2], %[tmp2] \n\t"
- "replv.qb %[tmp3], %[tmp3] \n\t"
- "replv.qb %[tmp4], %[tmp4] \n\t"
- "replv.qb %[tmp5], %[tmp5] \n\t"
- "replv.qb %[tmp6], %[tmp6] \n\t"
- "replv.qb %[tmp7], %[tmp7] \n\t"
- "replv.qb %[tmp8], %[tmp8] \n\t"
-
- "sw %[tmp1], (%[dst]) \n\t"
- "sw %[tmp1], 4(%[dst]) \n\t"
- "add %[dst], %[dst], %[stride] \n\t"
- "sw %[tmp2], (%[dst]) \n\t"
- "sw %[tmp2], 4(%[dst]) \n\t"
- "add %[dst], %[dst], %[stride] \n\t"
- "sw %[tmp3], (%[dst]) \n\t"
- "sw %[tmp3], 4(%[dst]) \n\t"
- "add %[dst], %[dst], %[stride] \n\t"
- "sw %[tmp4], (%[dst]) \n\t"
- "sw %[tmp4], 4(%[dst]) \n\t"
- "add %[dst], %[dst], %[stride] \n\t"
- "sw %[tmp5], (%[dst]) \n\t"
- "sw %[tmp5], 4(%[dst]) \n\t"
- "add %[dst], %[dst], %[stride] \n\t"
- "sw %[tmp6], (%[dst]) \n\t"
- "sw %[tmp6], 4(%[dst]) \n\t"
- "add %[dst], %[dst], %[stride] \n\t"
- "sw %[tmp7], (%[dst]) \n\t"
- "sw %[tmp7], 4(%[dst]) \n\t"
- "add %[dst], %[dst], %[stride] \n\t"
- "sw %[tmp8], (%[dst]) \n\t"
- "sw %[tmp8], 4(%[dst]) \n\t"
-
- : [tmp1] "=&r"(tmp1), [tmp2] "=&r"(tmp2), [tmp3] "=&r"(tmp3),
- [tmp4] "=&r"(tmp4), [tmp5] "=&r"(tmp5), [tmp7] "=&r"(tmp7),
- [tmp6] "=&r"(tmp6), [tmp8] "=&r"(tmp8)
- : [left] "r"(left), [dst] "r"(dst), [stride] "r"(stride));
-}
-
-void aom_dc_predictor_8x8_dspr2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- int32_t expected_dc;
- int32_t average;
- int32_t tmp, above1, above_l1, above_r1, left1, left_r1, left_l1;
- int32_t above2, above_l2, above_r2, left2, left_r2, left_l2;
-
- __asm__ __volatile__(
- "lw %[above1], (%[above]) \n\t"
- "lw %[above2], 4(%[above]) \n\t"
- "lw %[left1], (%[left]) \n\t"
- "lw %[left2], 4(%[left]) \n\t"
-
- "preceu.ph.qbl %[above_l1], %[above1] \n\t"
- "preceu.ph.qbr %[above_r1], %[above1] \n\t"
- "preceu.ph.qbl %[left_l1], %[left1] \n\t"
- "preceu.ph.qbr %[left_r1], %[left1] \n\t"
-
- "preceu.ph.qbl %[above_l2], %[above2] \n\t"
- "preceu.ph.qbr %[above_r2], %[above2] \n\t"
- "preceu.ph.qbl %[left_l2], %[left2] \n\t"
- "preceu.ph.qbr %[left_r2], %[left2] \n\t"
-
- "addu.ph %[average], %[above_r1], %[above_l1] \n\t"
- "addu.ph %[average], %[average], %[left_l1] \n\t"
- "addu.ph %[average], %[average], %[left_r1] \n\t"
-
- "addu.ph %[average], %[average], %[above_l2] \n\t"
- "addu.ph %[average], %[average], %[above_r2] \n\t"
- "addu.ph %[average], %[average], %[left_l2] \n\t"
- "addu.ph %[average], %[average], %[left_r2] \n\t"
-
- "addiu %[average], %[average], 8 \n\t"
-
- "srl %[tmp], %[average], 16 \n\t"
- "addu.ph %[average], %[tmp], %[average] \n\t"
- "srl %[expected_dc], %[average], 4 \n\t"
- "replv.qb %[expected_dc], %[expected_dc] \n\t"
-
- "sw %[expected_dc], (%[dst]) \n\t"
- "sw %[expected_dc], 4(%[dst]) \n\t"
-
- "add %[dst], %[dst], %[stride] \n\t"
- "sw %[expected_dc], (%[dst]) \n\t"
- "sw %[expected_dc], 4(%[dst]) \n\t"
-
- "add %[dst], %[dst], %[stride] \n\t"
- "sw %[expected_dc], (%[dst]) \n\t"
- "sw %[expected_dc], 4(%[dst]) \n\t"
-
- "add %[dst], %[dst], %[stride] \n\t"
- "sw %[expected_dc], (%[dst]) \n\t"
- "sw %[expected_dc], 4(%[dst]) \n\t"
-
- "add %[dst], %[dst], %[stride] \n\t"
- "sw %[expected_dc], (%[dst]) \n\t"
- "sw %[expected_dc], 4(%[dst]) \n\t"
-
- "add %[dst], %[dst], %[stride] \n\t"
- "sw %[expected_dc], (%[dst]) \n\t"
- "sw %[expected_dc], 4(%[dst]) \n\t"
-
- "add %[dst], %[dst], %[stride] \n\t"
- "sw %[expected_dc], (%[dst]) \n\t"
- "sw %[expected_dc], 4(%[dst]) \n\t"
-
- "add %[dst], %[dst], %[stride] \n\t"
- "sw %[expected_dc], (%[dst]) \n\t"
- "sw %[expected_dc], 4(%[dst]) \n\t"
-
- : [above1] "=&r"(above1), [above_l1] "=&r"(above_l1),
- [above_r1] "=&r"(above_r1), [left1] "=&r"(left1),
- [left_l1] "=&r"(left_l1), [left_r1] "=&r"(left_r1),
- [above2] "=&r"(above2), [above_l2] "=&r"(above_l2),
- [above_r2] "=&r"(above_r2), [left2] "=&r"(left2),
- [left_l2] "=&r"(left_l2), [left_r2] "=&r"(left_r2),
- [average] "=&r"(average), [tmp] "=&r"(tmp),
- [expected_dc] "=&r"(expected_dc)
- : [above] "r"(above), [left] "r"(left), [dst] "r"(dst),
- [stride] "r"(stride));
-}
-#endif // #if HAVE_DSPR2
diff --git a/third_party/aom/aom_dsp/mips/intrapred_msa.c b/third_party/aom/aom_dsp/mips/intrapred_msa.c
deleted file mode 100644
index 9f25cc1ca..000000000
--- a/third_party/aom/aom_dsp/mips/intrapred_msa.c
+++ /dev/null
@@ -1,550 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/mips/macros_msa.h"
-
-#define IPRED_SUBS_UH2_UH(in0, in1, out0, out1) \
- { \
- out0 = __msa_subs_u_h(out0, in0); \
- out1 = __msa_subs_u_h(out1, in1); \
- }
-
-static void intra_predict_vert_4x4_msa(const uint8_t *src, uint8_t *dst,
- int32_t dst_stride) {
- uint32_t src_data;
-
- src_data = LW(src);
-
- SW4(src_data, src_data, src_data, src_data, dst, dst_stride);
-}
-
-static void intra_predict_vert_8x8_msa(const uint8_t *src, uint8_t *dst,
- int32_t dst_stride) {
- uint32_t row;
- uint32_t src_data1, src_data2;
-
- src_data1 = LW(src);
- src_data2 = LW(src + 4);
-
- for (row = 8; row--;) {
- SW(src_data1, dst);
- SW(src_data2, (dst + 4));
- dst += dst_stride;
- }
-}
-
-static void intra_predict_vert_16x16_msa(const uint8_t *src, uint8_t *dst,
- int32_t dst_stride) {
- uint32_t row;
- v16u8 src0;
-
- src0 = LD_UB(src);
-
- for (row = 16; row--;) {
- ST_UB(src0, dst);
- dst += dst_stride;
- }
-}
-
-static void intra_predict_vert_32x32_msa(const uint8_t *src, uint8_t *dst,
- int32_t dst_stride) {
- uint32_t row;
- v16u8 src1, src2;
-
- src1 = LD_UB(src);
- src2 = LD_UB(src + 16);
-
- for (row = 32; row--;) {
- ST_UB2(src1, src2, dst, 16);
- dst += dst_stride;
- }
-}
-
-static void intra_predict_horiz_4x4_msa(const uint8_t *src, uint8_t *dst,
- int32_t dst_stride) {
- uint32_t out0, out1, out2, out3;
-
- out0 = src[0] * 0x01010101;
- out1 = src[1] * 0x01010101;
- out2 = src[2] * 0x01010101;
- out3 = src[3] * 0x01010101;
-
- SW4(out0, out1, out2, out3, dst, dst_stride);
-}
-
-static void intra_predict_horiz_8x8_msa(const uint8_t *src, uint8_t *dst,
- int32_t dst_stride) {
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
-
- out0 = src[0] * 0x0101010101010101ull;
- out1 = src[1] * 0x0101010101010101ull;
- out2 = src[2] * 0x0101010101010101ull;
- out3 = src[3] * 0x0101010101010101ull;
- out4 = src[4] * 0x0101010101010101ull;
- out5 = src[5] * 0x0101010101010101ull;
- out6 = src[6] * 0x0101010101010101ull;
- out7 = src[7] * 0x0101010101010101ull;
-
- SD4(out0, out1, out2, out3, dst, dst_stride);
- dst += (4 * dst_stride);
- SD4(out4, out5, out6, out7, dst, dst_stride);
-}
-
-static void intra_predict_horiz_16x16_msa(const uint8_t *src, uint8_t *dst,
- int32_t dst_stride) {
- uint32_t row;
- uint8_t inp0, inp1, inp2, inp3;
- v16u8 src0, src1, src2, src3;
-
- for (row = 4; row--;) {
- inp0 = src[0];
- inp1 = src[1];
- inp2 = src[2];
- inp3 = src[3];
- src += 4;
-
- src0 = (v16u8)__msa_fill_b(inp0);
- src1 = (v16u8)__msa_fill_b(inp1);
- src2 = (v16u8)__msa_fill_b(inp2);
- src3 = (v16u8)__msa_fill_b(inp3);
-
- ST_UB4(src0, src1, src2, src3, dst, dst_stride);
- dst += (4 * dst_stride);
- }
-}
-
-static void intra_predict_horiz_32x32_msa(const uint8_t *src, uint8_t *dst,
- int32_t dst_stride) {
- uint32_t row;
- uint8_t inp0, inp1, inp2, inp3;
- v16u8 src0, src1, src2, src3;
-
- for (row = 8; row--;) {
- inp0 = src[0];
- inp1 = src[1];
- inp2 = src[2];
- inp3 = src[3];
- src += 4;
-
- src0 = (v16u8)__msa_fill_b(inp0);
- src1 = (v16u8)__msa_fill_b(inp1);
- src2 = (v16u8)__msa_fill_b(inp2);
- src3 = (v16u8)__msa_fill_b(inp3);
-
- ST_UB2(src0, src0, dst, 16);
- dst += dst_stride;
- ST_UB2(src1, src1, dst, 16);
- dst += dst_stride;
- ST_UB2(src2, src2, dst, 16);
- dst += dst_stride;
- ST_UB2(src3, src3, dst, 16);
- dst += dst_stride;
- }
-}
-
-static void intra_predict_dc_4x4_msa(const uint8_t *src_top,
- const uint8_t *src_left, uint8_t *dst,
- int32_t dst_stride) {
- uint32_t val0, val1;
- v16i8 store, src = { 0 };
- v8u16 sum_h;
- v4u32 sum_w;
- v2u64 sum_d;
-
- val0 = LW(src_top);
- val1 = LW(src_left);
- INSERT_W2_SB(val0, val1, src);
- sum_h = __msa_hadd_u_h((v16u8)src, (v16u8)src);
- sum_w = __msa_hadd_u_w(sum_h, sum_h);
- sum_d = __msa_hadd_u_d(sum_w, sum_w);
- sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 3);
- store = __msa_splati_b((v16i8)sum_w, 0);
- val0 = __msa_copy_u_w((v4i32)store, 0);
-
- SW4(val0, val0, val0, val0, dst, dst_stride);
-}
-
-static void intra_predict_dc_tl_4x4_msa(const uint8_t *src, uint8_t *dst,
- int32_t dst_stride) {
- uint32_t val0;
- v16i8 store, data = { 0 };
- v8u16 sum_h;
- v4u32 sum_w;
-
- val0 = LW(src);
- data = (v16i8)__msa_insert_w((v4i32)data, 0, val0);
- sum_h = __msa_hadd_u_h((v16u8)data, (v16u8)data);
- sum_w = __msa_hadd_u_w(sum_h, sum_h);
- sum_w = (v4u32)__msa_srari_w((v4i32)sum_w, 2);
- store = __msa_splati_b((v16i8)sum_w, 0);
- val0 = __msa_copy_u_w((v4i32)store, 0);
-
- SW4(val0, val0, val0, val0, dst, dst_stride);
-}
-
-static void intra_predict_128dc_4x4_msa(uint8_t *dst, int32_t dst_stride) {
- uint32_t out;
- const v16i8 store = __msa_ldi_b(128);
-
- out = __msa_copy_u_w((v4i32)store, 0);
-
- SW4(out, out, out, out, dst, dst_stride);
-}
-
-static void intra_predict_dc_8x8_msa(const uint8_t *src_top,
- const uint8_t *src_left, uint8_t *dst,
- int32_t dst_stride) {
- uint64_t val0, val1;
- v16i8 store;
- v16u8 src = { 0 };
- v8u16 sum_h;
- v4u32 sum_w;
- v2u64 sum_d;
-
- val0 = LD(src_top);
- val1 = LD(src_left);
- INSERT_D2_UB(val0, val1, src);
- sum_h = __msa_hadd_u_h(src, src);
- sum_w = __msa_hadd_u_w(sum_h, sum_h);
- sum_d = __msa_hadd_u_d(sum_w, sum_w);
- sum_w = (v4u32)__msa_pckev_w((v4i32)sum_d, (v4i32)sum_d);
- sum_d = __msa_hadd_u_d(sum_w, sum_w);
- sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 4);
- store = __msa_splati_b((v16i8)sum_w, 0);
- val0 = __msa_copy_u_d((v2i64)store, 0);
-
- SD4(val0, val0, val0, val0, dst, dst_stride);
- dst += (4 * dst_stride);
- SD4(val0, val0, val0, val0, dst, dst_stride);
-}
-
-static void intra_predict_dc_tl_8x8_msa(const uint8_t *src, uint8_t *dst,
- int32_t dst_stride) {
- uint64_t val0;
- v16i8 store;
- v16u8 data = { 0 };
- v8u16 sum_h;
- v4u32 sum_w;
- v2u64 sum_d;
-
- val0 = LD(src);
- data = (v16u8)__msa_insert_d((v2i64)data, 0, val0);
- sum_h = __msa_hadd_u_h(data, data);
- sum_w = __msa_hadd_u_w(sum_h, sum_h);
- sum_d = __msa_hadd_u_d(sum_w, sum_w);
- sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 3);
- store = __msa_splati_b((v16i8)sum_w, 0);
- val0 = __msa_copy_u_d((v2i64)store, 0);
-
- SD4(val0, val0, val0, val0, dst, dst_stride);
- dst += (4 * dst_stride);
- SD4(val0, val0, val0, val0, dst, dst_stride);
-}
-
-static void intra_predict_128dc_8x8_msa(uint8_t *dst, int32_t dst_stride) {
- uint64_t out;
- const v16i8 store = __msa_ldi_b(128);
-
- out = __msa_copy_u_d((v2i64)store, 0);
-
- SD4(out, out, out, out, dst, dst_stride);
- dst += (4 * dst_stride);
- SD4(out, out, out, out, dst, dst_stride);
-}
-
-static void intra_predict_dc_16x16_msa(const uint8_t *src_top,
- const uint8_t *src_left, uint8_t *dst,
- int32_t dst_stride) {
- v16u8 top, left, out;
- v8u16 sum_h, sum_top, sum_left;
- v4u32 sum_w;
- v2u64 sum_d;
-
- top = LD_UB(src_top);
- left = LD_UB(src_left);
- HADD_UB2_UH(top, left, sum_top, sum_left);
- sum_h = sum_top + sum_left;
- sum_w = __msa_hadd_u_w(sum_h, sum_h);
- sum_d = __msa_hadd_u_d(sum_w, sum_w);
- sum_w = (v4u32)__msa_pckev_w((v4i32)sum_d, (v4i32)sum_d);
- sum_d = __msa_hadd_u_d(sum_w, sum_w);
- sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 5);
- out = (v16u8)__msa_splati_b((v16i8)sum_w, 0);
-
- ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);
- dst += (8 * dst_stride);
- ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);
-}
-
-static void intra_predict_dc_tl_16x16_msa(const uint8_t *src, uint8_t *dst,
- int32_t dst_stride) {
- v16u8 data, out;
- v8u16 sum_h;
- v4u32 sum_w;
- v2u64 sum_d;
-
- data = LD_UB(src);
- sum_h = __msa_hadd_u_h(data, data);
- sum_w = __msa_hadd_u_w(sum_h, sum_h);
- sum_d = __msa_hadd_u_d(sum_w, sum_w);
- sum_w = (v4u32)__msa_pckev_w((v4i32)sum_d, (v4i32)sum_d);
- sum_d = __msa_hadd_u_d(sum_w, sum_w);
- sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 4);
- out = (v16u8)__msa_splati_b((v16i8)sum_w, 0);
-
- ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);
- dst += (8 * dst_stride);
- ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);
-}
-
-static void intra_predict_128dc_16x16_msa(uint8_t *dst, int32_t dst_stride) {
- const v16u8 out = (v16u8)__msa_ldi_b(128);
-
- ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);
- dst += (8 * dst_stride);
- ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);
-}
-
-static void intra_predict_dc_32x32_msa(const uint8_t *src_top,
- const uint8_t *src_left, uint8_t *dst,
- int32_t dst_stride) {
- uint32_t row;
- v16u8 top0, top1, left0, left1, out;
- v8u16 sum_h, sum_top0, sum_top1, sum_left0, sum_left1;
- v4u32 sum_w;
- v2u64 sum_d;
-
- LD_UB2(src_top, 16, top0, top1);
- LD_UB2(src_left, 16, left0, left1);
- HADD_UB2_UH(top0, top1, sum_top0, sum_top1);
- HADD_UB2_UH(left0, left1, sum_left0, sum_left1);
- sum_h = sum_top0 + sum_top1;
- sum_h += sum_left0 + sum_left1;
- sum_w = __msa_hadd_u_w(sum_h, sum_h);
- sum_d = __msa_hadd_u_d(sum_w, sum_w);
- sum_w = (v4u32)__msa_pckev_w((v4i32)sum_d, (v4i32)sum_d);
- sum_d = __msa_hadd_u_d(sum_w, sum_w);
- sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 6);
- out = (v16u8)__msa_splati_b((v16i8)sum_w, 0);
-
- for (row = 16; row--;) {
- ST_UB2(out, out, dst, 16);
- dst += dst_stride;
- ST_UB2(out, out, dst, 16);
- dst += dst_stride;
- }
-}
-
-static void intra_predict_dc_tl_32x32_msa(const uint8_t *src, uint8_t *dst,
- int32_t dst_stride) {
- uint32_t row;
- v16u8 data0, data1, out;
- v8u16 sum_h, sum_data0, sum_data1;
- v4u32 sum_w;
- v2u64 sum_d;
-
- LD_UB2(src, 16, data0, data1);
- HADD_UB2_UH(data0, data1, sum_data0, sum_data1);
- sum_h = sum_data0 + sum_data1;
- sum_w = __msa_hadd_u_w(sum_h, sum_h);
- sum_d = __msa_hadd_u_d(sum_w, sum_w);
- sum_w = (v4u32)__msa_pckev_w((v4i32)sum_d, (v4i32)sum_d);
- sum_d = __msa_hadd_u_d(sum_w, sum_w);
- sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 5);
- out = (v16u8)__msa_splati_b((v16i8)sum_w, 0);
-
- for (row = 16; row--;) {
- ST_UB2(out, out, dst, 16);
- dst += dst_stride;
- ST_UB2(out, out, dst, 16);
- dst += dst_stride;
- }
-}
-
-static void intra_predict_128dc_32x32_msa(uint8_t *dst, int32_t dst_stride) {
- uint32_t row;
- const v16u8 out = (v16u8)__msa_ldi_b(128);
-
- for (row = 16; row--;) {
- ST_UB2(out, out, dst, 16);
- dst += dst_stride;
- ST_UB2(out, out, dst, 16);
- dst += dst_stride;
- }
-}
-
-void aom_v_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride,
- const uint8_t *above, const uint8_t *left) {
- (void)left;
-
- intra_predict_vert_4x4_msa(above, dst, y_stride);
-}
-
-void aom_v_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride,
- const uint8_t *above, const uint8_t *left) {
- (void)left;
-
- intra_predict_vert_8x8_msa(above, dst, y_stride);
-}
-
-void aom_v_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride,
- const uint8_t *above, const uint8_t *left) {
- (void)left;
-
- intra_predict_vert_16x16_msa(above, dst, y_stride);
-}
-
-void aom_v_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride,
- const uint8_t *above, const uint8_t *left) {
- (void)left;
-
- intra_predict_vert_32x32_msa(above, dst, y_stride);
-}
-
-void aom_h_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride,
- const uint8_t *above, const uint8_t *left) {
- (void)above;
-
- intra_predict_horiz_4x4_msa(left, dst, y_stride);
-}
-
-void aom_h_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride,
- const uint8_t *above, const uint8_t *left) {
- (void)above;
-
- intra_predict_horiz_8x8_msa(left, dst, y_stride);
-}
-
-void aom_h_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride,
- const uint8_t *above, const uint8_t *left) {
- (void)above;
-
- intra_predict_horiz_16x16_msa(left, dst, y_stride);
-}
-
-void aom_h_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride,
- const uint8_t *above, const uint8_t *left) {
- (void)above;
-
- intra_predict_horiz_32x32_msa(left, dst, y_stride);
-}
-
-void aom_dc_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride,
- const uint8_t *above, const uint8_t *left) {
- intra_predict_dc_4x4_msa(above, left, dst, y_stride);
-}
-
-void aom_dc_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride,
- const uint8_t *above, const uint8_t *left) {
- intra_predict_dc_8x8_msa(above, left, dst, y_stride);
-}
-
-void aom_dc_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride,
- const uint8_t *above, const uint8_t *left) {
- intra_predict_dc_16x16_msa(above, left, dst, y_stride);
-}
-
-void aom_dc_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride,
- const uint8_t *above, const uint8_t *left) {
- intra_predict_dc_32x32_msa(above, left, dst, y_stride);
-}
-
-void aom_dc_top_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride,
- const uint8_t *above, const uint8_t *left) {
- (void)left;
-
- intra_predict_dc_tl_4x4_msa(above, dst, y_stride);
-}
-
-void aom_dc_top_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride,
- const uint8_t *above, const uint8_t *left) {
- (void)left;
-
- intra_predict_dc_tl_8x8_msa(above, dst, y_stride);
-}
-
-void aom_dc_top_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride,
- const uint8_t *above, const uint8_t *left) {
- (void)left;
-
- intra_predict_dc_tl_16x16_msa(above, dst, y_stride);
-}
-
-void aom_dc_top_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride,
- const uint8_t *above, const uint8_t *left) {
- (void)left;
-
- intra_predict_dc_tl_32x32_msa(above, dst, y_stride);
-}
-
-void aom_dc_left_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride,
- const uint8_t *above, const uint8_t *left) {
- (void)above;
-
- intra_predict_dc_tl_4x4_msa(left, dst, y_stride);
-}
-
-void aom_dc_left_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride,
- const uint8_t *above, const uint8_t *left) {
- (void)above;
-
- intra_predict_dc_tl_8x8_msa(left, dst, y_stride);
-}
-
-void aom_dc_left_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride,
- const uint8_t *above,
- const uint8_t *left) {
- (void)above;
-
- intra_predict_dc_tl_16x16_msa(left, dst, y_stride);
-}
-
-void aom_dc_left_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride,
- const uint8_t *above,
- const uint8_t *left) {
- (void)above;
-
- intra_predict_dc_tl_32x32_msa(left, dst, y_stride);
-}
-
-void aom_dc_128_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride,
- const uint8_t *above, const uint8_t *left) {
- (void)above;
- (void)left;
-
- intra_predict_128dc_4x4_msa(dst, y_stride);
-}
-
-void aom_dc_128_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride,
- const uint8_t *above, const uint8_t *left) {
- (void)above;
- (void)left;
-
- intra_predict_128dc_8x8_msa(dst, y_stride);
-}
-
-void aom_dc_128_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride,
- const uint8_t *above, const uint8_t *left) {
- (void)above;
- (void)left;
-
- intra_predict_128dc_16x16_msa(dst, y_stride);
-}
-
-void aom_dc_128_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride,
- const uint8_t *above, const uint8_t *left) {
- (void)above;
- (void)left;
-
- intra_predict_128dc_32x32_msa(dst, y_stride);
-}
diff --git a/third_party/aom/aom_dsp/mips/loopfilter_16_msa.c b/third_party/aom/aom_dsp/mips/loopfilter_16_msa.c
deleted file mode 100644
index 38a10e9b2..000000000
--- a/third_party/aom/aom_dsp/mips/loopfilter_16_msa.c
+++ /dev/null
@@ -1,1488 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "aom_ports/mem.h"
-#include "aom_dsp/mips/loopfilter_msa.h"
-
-int32_t aom_hz_lpf_t4_and_t8_16w(uint8_t *src, int32_t pitch, uint8_t *filter48,
- const uint8_t *b_limit_ptr,
- const uint8_t *limit_ptr,
- const uint8_t *thresh_ptr) {
- v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
- v16u8 p2_out, p1_out, p0_out, q0_out, q1_out, q2_out;
- v16u8 flat, mask, hev, thresh, b_limit, limit;
- v8u16 p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r;
- v8u16 p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l;
- v8i16 p2_filt8_r, p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r;
- v8i16 p2_filt8_l, p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l;
- v16u8 zero = { 0 };
-
- /* load vector elements */
- LD_UB8(src - (4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
-
- thresh = (v16u8)__msa_fill_b(*thresh_ptr);
- b_limit = (v16u8)__msa_fill_b(*b_limit_ptr);
- limit = (v16u8)__msa_fill_b(*limit_ptr);
-
- /* mask and hev */
- LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev,
- mask, flat);
- AOM_FLAT4(p3, p2, p0, q0, q2, q3, flat);
- AOM_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
-
- if (__msa_test_bz_v(flat)) {
- ST_UB4(p1_out, p0_out, q0_out, q1_out, (src - 2 * pitch), pitch);
-
- return 1;
- } else {
- ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1, zero,
- q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r);
- AOM_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filt8_r,
- p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r);
-
- ILVL_B4_UH(zero, p3, zero, p2, zero, p1, zero, p0, p3_l, p2_l, p1_l, p0_l);
- ILVL_B4_UH(zero, q0, zero, q1, zero, q2, zero, q3, q0_l, q1_l, q2_l, q3_l);
- AOM_FILTER8(p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l, p2_filt8_l,
- p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l);
-
- /* convert 16 bit output data into 8 bit */
- PCKEV_B4_SH(p2_filt8_l, p2_filt8_r, p1_filt8_l, p1_filt8_r, p0_filt8_l,
- p0_filt8_r, q0_filt8_l, q0_filt8_r, p2_filt8_r, p1_filt8_r,
- p0_filt8_r, q0_filt8_r);
- PCKEV_B2_SH(q1_filt8_l, q1_filt8_r, q2_filt8_l, q2_filt8_r, q1_filt8_r,
- q2_filt8_r);
-
- /* store pixel values */
- p2_out = __msa_bmnz_v(p2, (v16u8)p2_filt8_r, flat);
- p1_out = __msa_bmnz_v(p1_out, (v16u8)p1_filt8_r, flat);
- p0_out = __msa_bmnz_v(p0_out, (v16u8)p0_filt8_r, flat);
- q0_out = __msa_bmnz_v(q0_out, (v16u8)q0_filt8_r, flat);
- q1_out = __msa_bmnz_v(q1_out, (v16u8)q1_filt8_r, flat);
- q2_out = __msa_bmnz_v(q2, (v16u8)q2_filt8_r, flat);
-
- ST_UB4(p2_out, p1_out, p0_out, q0_out, filter48, 16);
- filter48 += (4 * 16);
- ST_UB2(q1_out, q2_out, filter48, 16);
- filter48 += (2 * 16);
- ST_UB(flat, filter48);
-
- return 0;
- }
-}
-
-void aom_hz_lpf_t16_16w(uint8_t *src, int32_t pitch, uint8_t *filter48) {
- v16u8 flat, flat2, filter8;
- v16i8 zero = { 0 };
- v16u8 p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7;
- v8u16 p7_r_in, p6_r_in, p5_r_in, p4_r_in, p3_r_in, p2_r_in, p1_r_in, p0_r_in;
- v8u16 q7_r_in, q6_r_in, q5_r_in, q4_r_in, q3_r_in, q2_r_in, q1_r_in, q0_r_in;
- v8u16 p7_l_in, p6_l_in, p5_l_in, p4_l_in, p3_l_in, p2_l_in, p1_l_in, p0_l_in;
- v8u16 q7_l_in, q6_l_in, q5_l_in, q4_l_in, q3_l_in, q2_l_in, q1_l_in, q0_l_in;
- v8u16 tmp0_r, tmp1_r, tmp0_l, tmp1_l;
- v8i16 l_out, r_out;
-
- flat = LD_UB(filter48 + 96);
-
- LD_UB8((src - 8 * pitch), pitch, p7, p6, p5, p4, p3, p2, p1, p0);
- LD_UB8(src, pitch, q0, q1, q2, q3, q4, q5, q6, q7);
- AOM_FLAT5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, flat, flat2);
-
- if (__msa_test_bz_v(flat2)) {
- LD_UB4(filter48, 16, p2, p1, p0, q0);
- LD_UB2(filter48 + 4 * 16, 16, q1, q2);
-
- src -= 3 * pitch;
- ST_UB4(p2, p1, p0, q0, src, pitch);
- src += (4 * pitch);
- ST_UB2(q1, q2, src, pitch);
- } else {
- src -= 7 * pitch;
-
- ILVR_B8_UH(zero, p7, zero, p6, zero, p5, zero, p4, zero, p3, zero, p2, zero,
- p1, zero, p0, p7_r_in, p6_r_in, p5_r_in, p4_r_in, p3_r_in,
- p2_r_in, p1_r_in, p0_r_in);
-
- q0_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q0);
-
- tmp0_r = p7_r_in << 3;
- tmp0_r -= p7_r_in;
- tmp0_r += p6_r_in;
- tmp0_r += q0_r_in;
- tmp1_r = p6_r_in + p5_r_in;
- tmp1_r += p4_r_in;
- tmp1_r += p3_r_in;
- tmp1_r += p2_r_in;
- tmp1_r += p1_r_in;
- tmp1_r += p0_r_in;
- tmp1_r += tmp0_r;
- r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-
- ILVL_B4_UH(zero, p7, zero, p6, zero, p5, zero, p4, p7_l_in, p6_l_in,
- p5_l_in, p4_l_in);
- ILVL_B4_UH(zero, p3, zero, p2, zero, p1, zero, p0, p3_l_in, p2_l_in,
- p1_l_in, p0_l_in);
- q0_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q0);
-
- tmp0_l = p7_l_in << 3;
- tmp0_l -= p7_l_in;
- tmp0_l += p6_l_in;
- tmp0_l += q0_l_in;
- tmp1_l = p6_l_in + p5_l_in;
- tmp1_l += p4_l_in;
- tmp1_l += p3_l_in;
- tmp1_l += p2_l_in;
- tmp1_l += p1_l_in;
- tmp1_l += p0_l_in;
- tmp1_l += tmp0_l;
- l_out = __msa_srari_h((v8i16)tmp1_l, 4);
-
- r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
- p6 = __msa_bmnz_v(p6, (v16u8)r_out, flat2);
- ST_UB(p6, src);
- src += pitch;
-
- /* p5 */
- q1_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q1);
- tmp0_r = p5_r_in - p6_r_in;
- tmp0_r += q1_r_in;
- tmp0_r -= p7_r_in;
- tmp1_r += tmp0_r;
- r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-
- q1_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q1);
- tmp0_l = p5_l_in - p6_l_in;
- tmp0_l += q1_l_in;
- tmp0_l -= p7_l_in;
- tmp1_l += tmp0_l;
- l_out = __msa_srari_h((v8i16)tmp1_l, 4);
-
- r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
- p5 = __msa_bmnz_v(p5, (v16u8)r_out, flat2);
- ST_UB(p5, src);
- src += pitch;
-
- /* p4 */
- q2_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q2);
- tmp0_r = p4_r_in - p5_r_in;
- tmp0_r += q2_r_in;
- tmp0_r -= p7_r_in;
- tmp1_r += tmp0_r;
- r_out = (v8i16)__msa_srari_h((v8i16)tmp1_r, 4);
-
- q2_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q2);
- tmp0_l = p4_l_in - p5_l_in;
- tmp0_l += q2_l_in;
- tmp0_l -= p7_l_in;
- tmp1_l += tmp0_l;
- l_out = __msa_srari_h((v8i16)tmp1_l, 4);
-
- r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
- p4 = __msa_bmnz_v(p4, (v16u8)r_out, flat2);
- ST_UB(p4, src);
- src += pitch;
-
- /* p3 */
- q3_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q3);
- tmp0_r = p3_r_in - p4_r_in;
- tmp0_r += q3_r_in;
- tmp0_r -= p7_r_in;
- tmp1_r += tmp0_r;
- r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-
- q3_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q3);
- tmp0_l = p3_l_in - p4_l_in;
- tmp0_l += q3_l_in;
- tmp0_l -= p7_l_in;
- tmp1_l += tmp0_l;
- l_out = __msa_srari_h((v8i16)tmp1_l, 4);
-
- r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
- p3 = __msa_bmnz_v(p3, (v16u8)r_out, flat2);
- ST_UB(p3, src);
- src += pitch;
-
- /* p2 */
- q4_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q4);
- filter8 = LD_UB(filter48);
- tmp0_r = p2_r_in - p3_r_in;
- tmp0_r += q4_r_in;
- tmp0_r -= p7_r_in;
- tmp1_r += tmp0_r;
- r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-
- q4_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q4);
- tmp0_l = p2_l_in - p3_l_in;
- tmp0_l += q4_l_in;
- tmp0_l -= p7_l_in;
- tmp1_l += tmp0_l;
- l_out = __msa_srari_h((v8i16)tmp1_l, 4);
-
- r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
- filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
- ST_UB(filter8, src);
- src += pitch;
-
- /* p1 */
- q5_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q5);
- filter8 = LD_UB(filter48 + 16);
- tmp0_r = p1_r_in - p2_r_in;
- tmp0_r += q5_r_in;
- tmp0_r -= p7_r_in;
- tmp1_r += tmp0_r;
- r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-
- q5_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q5);
- tmp0_l = p1_l_in - p2_l_in;
- tmp0_l += q5_l_in;
- tmp0_l -= p7_l_in;
- tmp1_l += tmp0_l;
- l_out = __msa_srari_h((v8i16)tmp1_l, 4);
-
- r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
- filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
- ST_UB(filter8, src);
- src += pitch;
-
- /* p0 */
- q6_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q6);
- filter8 = LD_UB(filter48 + 32);
- tmp0_r = p0_r_in - p1_r_in;
- tmp0_r += q6_r_in;
- tmp0_r -= p7_r_in;
- tmp1_r += tmp0_r;
- r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-
- q6_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q6);
- tmp0_l = p0_l_in - p1_l_in;
- tmp0_l += q6_l_in;
- tmp0_l -= p7_l_in;
- tmp1_l += tmp0_l;
- l_out = __msa_srari_h((v8i16)tmp1_l, 4);
-
- r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
- filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
- ST_UB(filter8, src);
- src += pitch;
-
- /* q0 */
- q7_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q7);
- filter8 = LD_UB(filter48 + 48);
- tmp0_r = q7_r_in - p0_r_in;
- tmp0_r += q0_r_in;
- tmp0_r -= p7_r_in;
- tmp1_r += tmp0_r;
- r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-
- q7_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q7);
- tmp0_l = q7_l_in - p0_l_in;
- tmp0_l += q0_l_in;
- tmp0_l -= p7_l_in;
- tmp1_l += tmp0_l;
- l_out = __msa_srari_h((v8i16)tmp1_l, 4);
-
- r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
- filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
- ST_UB(filter8, src);
- src += pitch;
-
- /* q1 */
- filter8 = LD_UB(filter48 + 64);
- tmp0_r = q7_r_in - q0_r_in;
- tmp0_r += q1_r_in;
- tmp0_r -= p6_r_in;
- tmp1_r += tmp0_r;
- r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-
- tmp0_l = q7_l_in - q0_l_in;
- tmp0_l += q1_l_in;
- tmp0_l -= p6_l_in;
- tmp1_l += tmp0_l;
- l_out = __msa_srari_h((v8i16)tmp1_l, 4);
-
- r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
- filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
- ST_UB(filter8, src);
- src += pitch;
-
- /* q2 */
- filter8 = LD_UB(filter48 + 80);
- tmp0_r = q7_r_in - q1_r_in;
- tmp0_r += q2_r_in;
- tmp0_r -= p5_r_in;
- tmp1_r += tmp0_r;
- r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-
- tmp0_l = q7_l_in - q1_l_in;
- tmp0_l += q2_l_in;
- tmp0_l -= p5_l_in;
- tmp1_l += tmp0_l;
- l_out = __msa_srari_h((v8i16)tmp1_l, 4);
-
- r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
- filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
- ST_UB(filter8, src);
- src += pitch;
-
- /* q3 */
- tmp0_r = q7_r_in - q2_r_in;
- tmp0_r += q3_r_in;
- tmp0_r -= p4_r_in;
- tmp1_r += tmp0_r;
- r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-
- tmp0_l = q7_l_in - q2_l_in;
- tmp0_l += q3_l_in;
- tmp0_l -= p4_l_in;
- tmp1_l += tmp0_l;
- l_out = __msa_srari_h((v8i16)tmp1_l, 4);
-
- r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
- q3 = __msa_bmnz_v(q3, (v16u8)r_out, flat2);
- ST_UB(q3, src);
- src += pitch;
-
- /* q4 */
- tmp0_r = q7_r_in - q3_r_in;
- tmp0_r += q4_r_in;
- tmp0_r -= p3_r_in;
- tmp1_r += tmp0_r;
- r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-
- tmp0_l = q7_l_in - q3_l_in;
- tmp0_l += q4_l_in;
- tmp0_l -= p3_l_in;
- tmp1_l += tmp0_l;
- l_out = __msa_srari_h((v8i16)tmp1_l, 4);
-
- r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
- q4 = __msa_bmnz_v(q4, (v16u8)r_out, flat2);
- ST_UB(q4, src);
- src += pitch;
-
- /* q5 */
- tmp0_r = q7_r_in - q4_r_in;
- tmp0_r += q5_r_in;
- tmp0_r -= p2_r_in;
- tmp1_r += tmp0_r;
- r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-
- tmp0_l = q7_l_in - q4_l_in;
- tmp0_l += q5_l_in;
- tmp0_l -= p2_l_in;
- tmp1_l += tmp0_l;
- l_out = __msa_srari_h((v8i16)tmp1_l, 4);
-
- r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
- q5 = __msa_bmnz_v(q5, (v16u8)r_out, flat2);
- ST_UB(q5, src);
- src += pitch;
-
- /* q6 */
- tmp0_r = q7_r_in - q5_r_in;
- tmp0_r += q6_r_in;
- tmp0_r -= p1_r_in;
- tmp1_r += tmp0_r;
- r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-
- tmp0_l = q7_l_in - q5_l_in;
- tmp0_l += q6_l_in;
- tmp0_l -= p1_l_in;
- tmp1_l += tmp0_l;
- l_out = __msa_srari_h((v8i16)tmp1_l, 4);
-
- r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
- q6 = __msa_bmnz_v(q6, (v16u8)r_out, flat2);
- ST_UB(q6, src);
- }
-}
-
-static void mb_lpf_horizontal_edge_dual(uint8_t *src, int32_t pitch,
- const uint8_t *b_limit_ptr,
- const uint8_t *limit_ptr,
- const uint8_t *thresh_ptr,
- int32_t count) {
- DECLARE_ALIGNED(32, uint8_t, filter48[16 * 8]);
- uint8_t early_exit = 0;
-
- (void)count;
-
- early_exit = aom_hz_lpf_t4_and_t8_16w(src, pitch, &filter48[0], b_limit_ptr,
- limit_ptr, thresh_ptr);
-
- if (0 == early_exit) {
- aom_hz_lpf_t16_16w(src, pitch, filter48);
- }
-}
-
-static void mb_lpf_horizontal_edge(uint8_t *src, int32_t pitch,
- const uint8_t *b_limit_ptr,
- const uint8_t *limit_ptr,
- const uint8_t *thresh_ptr, int32_t count) {
- if (1 == count) {
- uint64_t p2_d, p1_d, p0_d, q0_d, q1_d, q2_d;
- uint64_t dword0, dword1;
- v16u8 flat2, mask, hev, flat, thresh, b_limit, limit;
- v16u8 p3, p2, p1, p0, q3, q2, q1, q0, p7, p6, p5, p4, q4, q5, q6, q7;
- v16u8 p2_out, p1_out, p0_out, q0_out, q1_out, q2_out;
- v16u8 p0_filter16, p1_filter16;
- v8i16 p2_filter8, p1_filter8, p0_filter8;
- v8i16 q0_filter8, q1_filter8, q2_filter8;
- v8u16 p7_r, p6_r, p5_r, p4_r, q7_r, q6_r, q5_r, q4_r;
- v8u16 p3_r, p2_r, p1_r, p0_r, q3_r, q2_r, q1_r, q0_r;
- v16i8 zero = { 0 };
- v8u16 tmp0, tmp1, tmp2;
-
- /* load vector elements */
- LD_UB8((src - 4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
-
- thresh = (v16u8)__msa_fill_b(*thresh_ptr);
- b_limit = (v16u8)__msa_fill_b(*b_limit_ptr);
- limit = (v16u8)__msa_fill_b(*limit_ptr);
-
- LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev,
- mask, flat);
- AOM_FLAT4(p3, p2, p0, q0, q2, q3, flat);
- AOM_LPF_FILTER4_8W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out,
- q1_out);
-
- flat = (v16u8)__msa_ilvr_d((v2i64)zero, (v2i64)flat);
-
- if (__msa_test_bz_v(flat)) {
- p1_d = __msa_copy_u_d((v2i64)p1_out, 0);
- p0_d = __msa_copy_u_d((v2i64)p0_out, 0);
- q0_d = __msa_copy_u_d((v2i64)q0_out, 0);
- q1_d = __msa_copy_u_d((v2i64)q1_out, 0);
- SD4(p1_d, p0_d, q0_d, q1_d, src - 2 * pitch, pitch);
- } else {
- /* convert 8 bit input data into 16 bit */
- ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1,
- zero, q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r,
- q3_r);
- AOM_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filter8,
- p1_filter8, p0_filter8, q0_filter8, q1_filter8, q2_filter8);
-
- /* convert 16 bit output data into 8 bit */
- PCKEV_B4_SH(zero, p2_filter8, zero, p1_filter8, zero, p0_filter8, zero,
- q0_filter8, p2_filter8, p1_filter8, p0_filter8, q0_filter8);
- PCKEV_B2_SH(zero, q1_filter8, zero, q2_filter8, q1_filter8, q2_filter8);
-
- /* store pixel values */
- p2_out = __msa_bmnz_v(p2, (v16u8)p2_filter8, flat);
- p1_out = __msa_bmnz_v(p1_out, (v16u8)p1_filter8, flat);
- p0_out = __msa_bmnz_v(p0_out, (v16u8)p0_filter8, flat);
- q0_out = __msa_bmnz_v(q0_out, (v16u8)q0_filter8, flat);
- q1_out = __msa_bmnz_v(q1_out, (v16u8)q1_filter8, flat);
- q2_out = __msa_bmnz_v(q2, (v16u8)q2_filter8, flat);
-
- /* load 16 vector elements */
- LD_UB4((src - 8 * pitch), pitch, p7, p6, p5, p4);
- LD_UB4(src + (4 * pitch), pitch, q4, q5, q6, q7);
-
- AOM_FLAT5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, flat, flat2);
-
- if (__msa_test_bz_v(flat2)) {
- p2_d = __msa_copy_u_d((v2i64)p2_out, 0);
- p1_d = __msa_copy_u_d((v2i64)p1_out, 0);
- p0_d = __msa_copy_u_d((v2i64)p0_out, 0);
- q0_d = __msa_copy_u_d((v2i64)q0_out, 0);
- q1_d = __msa_copy_u_d((v2i64)q1_out, 0);
- q2_d = __msa_copy_u_d((v2i64)q2_out, 0);
-
- SD4(p2_d, p1_d, p0_d, q0_d, src - 3 * pitch, pitch);
- SD(q1_d, src + pitch);
- SD(q2_d, src + 2 * pitch);
- } else {
- /* LSB(right) 8 pixel operation */
- ILVR_B8_UH(zero, p7, zero, p6, zero, p5, zero, p4, zero, q4, zero, q5,
- zero, q6, zero, q7, p7_r, p6_r, p5_r, p4_r, q4_r, q5_r, q6_r,
- q7_r);
-
- tmp0 = p7_r << 3;
- tmp0 -= p7_r;
- tmp0 += p6_r;
- tmp0 += q0_r;
-
- src -= 7 * pitch;
-
- /* calculation of p6 and p5 */
- tmp1 = p6_r + p5_r + p4_r + p3_r;
- tmp1 += (p2_r + p1_r + p0_r);
- tmp1 += tmp0;
- p0_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
- tmp0 = p5_r - p6_r + q1_r - p7_r;
- tmp1 += tmp0;
- p1_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
- PCKEV_B2_UB(zero, p0_filter16, zero, p1_filter16, p0_filter16,
- p1_filter16);
- p0_filter16 = __msa_bmnz_v(p6, p0_filter16, flat2);
- p1_filter16 = __msa_bmnz_v(p5, p1_filter16, flat2);
- dword0 = __msa_copy_u_d((v2i64)p0_filter16, 0);
- dword1 = __msa_copy_u_d((v2i64)p1_filter16, 0);
- SD(dword0, src);
- src += pitch;
- SD(dword1, src);
- src += pitch;
-
- /* calculation of p4 and p3 */
- tmp0 = p4_r - p5_r + q2_r - p7_r;
- tmp2 = p3_r - p4_r + q3_r - p7_r;
- tmp1 += tmp0;
- p0_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
- tmp1 += tmp2;
- p1_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
- PCKEV_B2_UB(zero, p0_filter16, zero, p1_filter16, p0_filter16,
- p1_filter16);
- p0_filter16 = __msa_bmnz_v(p4, p0_filter16, flat2);
- p1_filter16 = __msa_bmnz_v(p3, p1_filter16, flat2);
- dword0 = __msa_copy_u_d((v2i64)p0_filter16, 0);
- dword1 = __msa_copy_u_d((v2i64)p1_filter16, 0);
- SD(dword0, src);
- src += pitch;
- SD(dword1, src);
- src += pitch;
-
- /* calculation of p2 and p1 */
- tmp0 = p2_r - p3_r + q4_r - p7_r;
- tmp2 = p1_r - p2_r + q5_r - p7_r;
- tmp1 += tmp0;
- p0_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
- tmp1 += tmp2;
- p1_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
- PCKEV_B2_UB(zero, p0_filter16, zero, p1_filter16, p0_filter16,
- p1_filter16);
- p0_filter16 = __msa_bmnz_v(p2_out, p0_filter16, flat2);
- p1_filter16 = __msa_bmnz_v(p1_out, p1_filter16, flat2);
- dword0 = __msa_copy_u_d((v2i64)p0_filter16, 0);
- dword1 = __msa_copy_u_d((v2i64)p1_filter16, 0);
- SD(dword0, src);
- src += pitch;
- SD(dword1, src);
- src += pitch;
-
- /* calculation of p0 and q0 */
- tmp0 = (p0_r - p1_r) + (q6_r - p7_r);
- tmp2 = (q7_r - p0_r) + (q0_r - p7_r);
- tmp1 += tmp0;
- p0_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
- tmp1 += tmp2;
- p1_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
- PCKEV_B2_UB(zero, p0_filter16, zero, p1_filter16, p0_filter16,
- p1_filter16);
- p0_filter16 = __msa_bmnz_v(p0_out, p0_filter16, flat2);
- p1_filter16 = __msa_bmnz_v(q0_out, p1_filter16, flat2);
- dword0 = __msa_copy_u_d((v2i64)p0_filter16, 0);
- dword1 = __msa_copy_u_d((v2i64)p1_filter16, 0);
- SD(dword0, src);
- src += pitch;
- SD(dword1, src);
- src += pitch;
-
- /* calculation of q1 and q2 */
- tmp0 = q7_r - q0_r + q1_r - p6_r;
- tmp2 = q7_r - q1_r + q2_r - p5_r;
- tmp1 += tmp0;
- p0_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
- tmp1 += tmp2;
- p1_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
- PCKEV_B2_UB(zero, p0_filter16, zero, p1_filter16, p0_filter16,
- p1_filter16);
- p0_filter16 = __msa_bmnz_v(q1_out, p0_filter16, flat2);
- p1_filter16 = __msa_bmnz_v(q2_out, p1_filter16, flat2);
- dword0 = __msa_copy_u_d((v2i64)p0_filter16, 0);
- dword1 = __msa_copy_u_d((v2i64)p1_filter16, 0);
- SD(dword0, src);
- src += pitch;
- SD(dword1, src);
- src += pitch;
-
- /* calculation of q3 and q4 */
- tmp0 = (q7_r - q2_r) + (q3_r - p4_r);
- tmp2 = (q7_r - q3_r) + (q4_r - p3_r);
- tmp1 += tmp0;
- p0_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
- tmp1 += tmp2;
- p1_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
- PCKEV_B2_UB(zero, p0_filter16, zero, p1_filter16, p0_filter16,
- p1_filter16);
- p0_filter16 = __msa_bmnz_v(q3, p0_filter16, flat2);
- p1_filter16 = __msa_bmnz_v(q4, p1_filter16, flat2);
- dword0 = __msa_copy_u_d((v2i64)p0_filter16, 0);
- dword1 = __msa_copy_u_d((v2i64)p1_filter16, 0);
- SD(dword0, src);
- src += pitch;
- SD(dword1, src);
- src += pitch;
-
- /* calculation of q5 and q6 */
- tmp0 = (q7_r - q4_r) + (q5_r - p2_r);
- tmp2 = (q7_r - q5_r) + (q6_r - p1_r);
- tmp1 += tmp0;
- p0_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
- tmp1 += tmp2;
- p1_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
- PCKEV_B2_UB(zero, p0_filter16, zero, p1_filter16, p0_filter16,
- p1_filter16);
- p0_filter16 = __msa_bmnz_v(q5, p0_filter16, flat2);
- p1_filter16 = __msa_bmnz_v(q6, p1_filter16, flat2);
- dword0 = __msa_copy_u_d((v2i64)p0_filter16, 0);
- dword1 = __msa_copy_u_d((v2i64)p1_filter16, 0);
- SD(dword0, src);
- src += pitch;
- SD(dword1, src);
- }
- }
- } else {
- mb_lpf_horizontal_edge_dual(src, pitch, b_limit_ptr, limit_ptr, thresh_ptr,
- count);
- }
-}
-
-void aom_lpf_horizontal_16_msa(uint8_t *src, int32_t pitch,
- const uint8_t *b_limit_ptr,
- const uint8_t *limit_ptr,
- const uint8_t *thresh_ptr) {
- mb_lpf_horizontal_edge(src, pitch, b_limit_ptr, limit_ptr, thresh_ptr, 1);
-}
-
-void aom_lpf_horizontal_16_dual_msa(uint8_t *src, int32_t pitch,
- const uint8_t *b_limit_ptr,
- const uint8_t *limit_ptr,
- const uint8_t *thresh_ptr) {
- mb_lpf_horizontal_edge(src, pitch, b_limit_ptr, limit_ptr, thresh_ptr, 2);
-}
-
-static void transpose_16x8_to_8x16(uint8_t *input, int32_t in_pitch,
- uint8_t *output, int32_t out_pitch) {
- v16u8 p7_org, p6_org, p5_org, p4_org, p3_org, p2_org, p1_org, p0_org;
- v16i8 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
- v16u8 p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7;
-
- LD_UB8(input, in_pitch, p7_org, p6_org, p5_org, p4_org, p3_org, p2_org,
- p1_org, p0_org);
- /* 8x8 transpose */
- TRANSPOSE8x8_UB_UB(p7_org, p6_org, p5_org, p4_org, p3_org, p2_org, p1_org,
- p0_org, p7, p6, p5, p4, p3, p2, p1, p0);
- /* 8x8 transpose */
- ILVL_B4_SB(p5_org, p7_org, p4_org, p6_org, p1_org, p3_org, p0_org, p2_org,
- tmp0, tmp1, tmp2, tmp3);
- ILVR_B2_SB(tmp1, tmp0, tmp3, tmp2, tmp4, tmp6);
- ILVL_B2_SB(tmp1, tmp0, tmp3, tmp2, tmp5, tmp7);
- ILVR_W2_UB(tmp6, tmp4, tmp7, tmp5, q0, q4);
- ILVL_W2_UB(tmp6, tmp4, tmp7, tmp5, q2, q6);
- SLDI_B4_0_UB(q0, q2, q4, q6, q1, q3, q5, q7, 8);
-
- ST_UB8(p7, p6, p5, p4, p3, p2, p1, p0, output, out_pitch);
- output += (8 * out_pitch);
- ST_UB8(q0, q1, q2, q3, q4, q5, q6, q7, output, out_pitch);
-}
-
-static void transpose_8x16_to_16x8(uint8_t *input, int32_t in_pitch,
- uint8_t *output, int32_t out_pitch) {
- v16u8 p7_o, p6_o, p5_o, p4_o, p3_o, p2_o, p1_o, p0_o;
- v16u8 p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7;
-
- LD_UB8(input, in_pitch, p7, p6, p5, p4, p3, p2, p1, p0);
- LD_UB8(input + (8 * in_pitch), in_pitch, q0, q1, q2, q3, q4, q5, q6, q7);
- TRANSPOSE16x8_UB_UB(p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5,
- q6, q7, p7_o, p6_o, p5_o, p4_o, p3_o, p2_o, p1_o, p0_o);
- ST_UB8(p7_o, p6_o, p5_o, p4_o, p3_o, p2_o, p1_o, p0_o, output, out_pitch);
-}
-
-static void transpose_16x16(uint8_t *input, int32_t in_pitch, uint8_t *output,
- int32_t out_pitch) {
- v16u8 row0, row1, row2, row3, row4, row5, row6, row7;
- v16u8 row8, row9, row10, row11, row12, row13, row14, row15;
- v16u8 p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7;
- v8i16 tmp0, tmp1, tmp4, tmp5, tmp6, tmp7;
- v4i32 tmp2, tmp3;
-
- LD_UB8(input, in_pitch, row0, row1, row2, row3, row4, row5, row6, row7);
- input += (8 * in_pitch);
- LD_UB8(input, in_pitch, row8, row9, row10, row11, row12, row13, row14, row15);
-
- TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7, row8,
- row9, row10, row11, row12, row13, row14, row15, p7, p6,
- p5, p4, p3, p2, p1, p0);
-
- /* transpose 16x8 matrix into 8x16 */
- /* total 8 intermediate register and 32 instructions */
- q7 = (v16u8)__msa_ilvod_d((v2i64)row8, (v2i64)row0);
- q6 = (v16u8)__msa_ilvod_d((v2i64)row9, (v2i64)row1);
- q5 = (v16u8)__msa_ilvod_d((v2i64)row10, (v2i64)row2);
- q4 = (v16u8)__msa_ilvod_d((v2i64)row11, (v2i64)row3);
- q3 = (v16u8)__msa_ilvod_d((v2i64)row12, (v2i64)row4);
- q2 = (v16u8)__msa_ilvod_d((v2i64)row13, (v2i64)row5);
- q1 = (v16u8)__msa_ilvod_d((v2i64)row14, (v2i64)row6);
- q0 = (v16u8)__msa_ilvod_d((v2i64)row15, (v2i64)row7);
-
- ILVEV_B2_SH(q7, q6, q5, q4, tmp0, tmp1);
- tmp4 = (v8i16)__msa_ilvod_b((v16i8)q6, (v16i8)q7);
- tmp5 = (v8i16)__msa_ilvod_b((v16i8)q4, (v16i8)q5);
-
- ILVEV_B2_UB(q3, q2, q1, q0, q5, q7);
- tmp6 = (v8i16)__msa_ilvod_b((v16i8)q2, (v16i8)q3);
- tmp7 = (v8i16)__msa_ilvod_b((v16i8)q0, (v16i8)q1);
-
- ILVEV_H2_SW(tmp0, tmp1, q5, q7, tmp2, tmp3);
- q0 = (v16u8)__msa_ilvev_w(tmp3, tmp2);
- q4 = (v16u8)__msa_ilvod_w(tmp3, tmp2);
-
- tmp2 = (v4i32)__msa_ilvod_h(tmp1, tmp0);
- tmp3 = (v4i32)__msa_ilvod_h((v8i16)q7, (v8i16)q5);
- q2 = (v16u8)__msa_ilvev_w(tmp3, tmp2);
- q6 = (v16u8)__msa_ilvod_w(tmp3, tmp2);
-
- ILVEV_H2_SW(tmp4, tmp5, tmp6, tmp7, tmp2, tmp3);
- q1 = (v16u8)__msa_ilvev_w(tmp3, tmp2);
- q5 = (v16u8)__msa_ilvod_w(tmp3, tmp2);
-
- tmp2 = (v4i32)__msa_ilvod_h(tmp5, tmp4);
- tmp3 = (v4i32)__msa_ilvod_h(tmp7, tmp6);
- q3 = (v16u8)__msa_ilvev_w(tmp3, tmp2);
- q7 = (v16u8)__msa_ilvod_w(tmp3, tmp2);
-
- ST_UB8(p7, p6, p5, p4, p3, p2, p1, p0, output, out_pitch);
- output += (8 * out_pitch);
- ST_UB8(q0, q1, q2, q3, q4, q5, q6, q7, output, out_pitch);
-}
-
-int32_t aom_vt_lpf_t4_and_t8_8w(uint8_t *src, uint8_t *filter48,
- uint8_t *src_org, int32_t pitch_org,
- const uint8_t *b_limit_ptr,
- const uint8_t *limit_ptr,
- const uint8_t *thresh_ptr) {
- v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
- v16u8 p2_out, p1_out, p0_out, q0_out, q1_out, q2_out;
- v16u8 flat, mask, hev, thresh, b_limit, limit;
- v8u16 p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r;
- v8i16 p2_filt8_r, p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r;
- v16i8 zero = { 0 };
- v8i16 vec0, vec1, vec2, vec3;
-
- /* load vector elements */
- LD_UB8(src - (4 * 16), 16, p3, p2, p1, p0, q0, q1, q2, q3);
-
- thresh = (v16u8)__msa_fill_b(*thresh_ptr);
- b_limit = (v16u8)__msa_fill_b(*b_limit_ptr);
- limit = (v16u8)__msa_fill_b(*limit_ptr);
-
- /* mask and hev */
- LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev,
- mask, flat);
- /* flat4 */
- AOM_FLAT4(p3, p2, p0, q0, q2, q3, flat);
- /* filter4 */
- AOM_LPF_FILTER4_8W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
-
- flat = (v16u8)__msa_ilvr_d((v2i64)zero, (v2i64)flat);
-
- if (__msa_test_bz_v(flat)) {
- ILVR_B2_SH(p0_out, p1_out, q1_out, q0_out, vec0, vec1);
- ILVRL_H2_SH(vec1, vec0, vec2, vec3);
- ST4x8_UB(vec2, vec3, (src_org - 2), pitch_org);
- return 1;
- } else {
- ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1, zero,
- q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r);
- AOM_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filt8_r,
- p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r);
-
- /* convert 16 bit output data into 8 bit */
- p2_r = (v8u16)__msa_pckev_b((v16i8)p2_filt8_r, (v16i8)p2_filt8_r);
- p1_r = (v8u16)__msa_pckev_b((v16i8)p1_filt8_r, (v16i8)p1_filt8_r);
- p0_r = (v8u16)__msa_pckev_b((v16i8)p0_filt8_r, (v16i8)p0_filt8_r);
- q0_r = (v8u16)__msa_pckev_b((v16i8)q0_filt8_r, (v16i8)q0_filt8_r);
- q1_r = (v8u16)__msa_pckev_b((v16i8)q1_filt8_r, (v16i8)q1_filt8_r);
- q2_r = (v8u16)__msa_pckev_b((v16i8)q2_filt8_r, (v16i8)q2_filt8_r);
-
- /* store pixel values */
- p2_out = __msa_bmnz_v(p2, (v16u8)p2_r, flat);
- p1_out = __msa_bmnz_v(p1_out, (v16u8)p1_r, flat);
- p0_out = __msa_bmnz_v(p0_out, (v16u8)p0_r, flat);
- q0_out = __msa_bmnz_v(q0_out, (v16u8)q0_r, flat);
- q1_out = __msa_bmnz_v(q1_out, (v16u8)q1_r, flat);
- q2_out = __msa_bmnz_v(q2, (v16u8)q2_r, flat);
-
- ST_UB4(p2_out, p1_out, p0_out, q0_out, filter48, 16);
- filter48 += (4 * 16);
- ST_UB2(q1_out, q2_out, filter48, 16);
- filter48 += (2 * 16);
- ST_UB(flat, filter48);
-
- return 0;
- }
-}
-
-int32_t aom_vt_lpf_t16_8w(uint8_t *src, uint8_t *src_org, int32_t pitch,
- uint8_t *filter48) {
- v16i8 zero = { 0 };
- v16u8 filter8, flat, flat2;
- v16u8 p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7;
- v8u16 p7_r_in, p6_r_in, p5_r_in, p4_r_in, p3_r_in, p2_r_in, p1_r_in, p0_r_in;
- v8u16 q7_r_in, q6_r_in, q5_r_in, q4_r_in, q3_r_in, q2_r_in, q1_r_in, q0_r_in;
- v8u16 tmp0_r, tmp1_r;
- v8i16 r_out;
-
- flat = LD_UB(filter48 + 6 * 16);
-
- LD_UB8((src - 8 * 16), 16, p7, p6, p5, p4, p3, p2, p1, p0);
- LD_UB8(src, 16, q0, q1, q2, q3, q4, q5, q6, q7);
-
- AOM_FLAT5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, flat, flat2);
-
- if (__msa_test_bz_v(flat2)) {
- v8i16 vec0, vec1, vec2, vec3, vec4;
-
- LD_UB4(filter48, 16, p2, p1, p0, q0);
- LD_UB2(filter48 + 4 * 16, 16, q1, q2);
-
- ILVR_B2_SH(p1, p2, q0, p0, vec0, vec1);
- ILVRL_H2_SH(vec1, vec0, vec3, vec4);
- vec2 = (v8i16)__msa_ilvr_b((v16i8)q2, (v16i8)q1);
-
- src_org -= 3;
- ST4x4_UB(vec3, vec3, 0, 1, 2, 3, src_org, pitch);
- ST2x4_UB(vec2, 0, (src_org + 4), pitch);
- src_org += (4 * pitch);
- ST4x4_UB(vec4, vec4, 0, 1, 2, 3, src_org, pitch);
- ST2x4_UB(vec2, 4, (src_org + 4), pitch);
-
- return 1;
- } else {
- src -= 7 * 16;
-
- ILVR_B8_UH(zero, p7, zero, p6, zero, p5, zero, p4, zero, p3, zero, p2, zero,
- p1, zero, p0, p7_r_in, p6_r_in, p5_r_in, p4_r_in, p3_r_in,
- p2_r_in, p1_r_in, p0_r_in);
- q0_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q0);
-
- tmp0_r = p7_r_in << 3;
- tmp0_r -= p7_r_in;
- tmp0_r += p6_r_in;
- tmp0_r += q0_r_in;
- tmp1_r = p6_r_in + p5_r_in;
- tmp1_r += p4_r_in;
- tmp1_r += p3_r_in;
- tmp1_r += p2_r_in;
- tmp1_r += p1_r_in;
- tmp1_r += p0_r_in;
- tmp1_r += tmp0_r;
-
- r_out = __msa_srari_h((v8i16)tmp1_r, 4);
- r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
- p6 = __msa_bmnz_v(p6, (v16u8)r_out, flat2);
- ST8x1_UB(p6, src);
- src += 16;
-
- /* p5 */
- q1_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q1);
- tmp0_r = p5_r_in - p6_r_in;
- tmp0_r += q1_r_in;
- tmp0_r -= p7_r_in;
- tmp1_r += tmp0_r;
- r_out = __msa_srari_h((v8i16)tmp1_r, 4);
- r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
- p5 = __msa_bmnz_v(p5, (v16u8)r_out, flat2);
- ST8x1_UB(p5, src);
- src += 16;
-
- /* p4 */
- q2_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q2);
- tmp0_r = p4_r_in - p5_r_in;
- tmp0_r += q2_r_in;
- tmp0_r -= p7_r_in;
- tmp1_r += tmp0_r;
- r_out = __msa_srari_h((v8i16)tmp1_r, 4);
- r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
- p4 = __msa_bmnz_v(p4, (v16u8)r_out, flat2);
- ST8x1_UB(p4, src);
- src += 16;
-
- /* p3 */
- q3_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q3);
- tmp0_r = p3_r_in - p4_r_in;
- tmp0_r += q3_r_in;
- tmp0_r -= p7_r_in;
- tmp1_r += tmp0_r;
- r_out = __msa_srari_h((v8i16)tmp1_r, 4);
- r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
- p3 = __msa_bmnz_v(p3, (v16u8)r_out, flat2);
- ST8x1_UB(p3, src);
- src += 16;
-
- /* p2 */
- q4_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q4);
- filter8 = LD_UB(filter48);
- tmp0_r = p2_r_in - p3_r_in;
- tmp0_r += q4_r_in;
- tmp0_r -= p7_r_in;
- tmp1_r += tmp0_r;
- r_out = __msa_srari_h((v8i16)tmp1_r, 4);
- r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
- filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
- ST8x1_UB(filter8, src);
- src += 16;
-
- /* p1 */
- q5_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q5);
- filter8 = LD_UB(filter48 + 16);
- tmp0_r = p1_r_in - p2_r_in;
- tmp0_r += q5_r_in;
- tmp0_r -= p7_r_in;
- tmp1_r += tmp0_r;
- r_out = __msa_srari_h((v8i16)tmp1_r, 4);
- r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
- filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
- ST8x1_UB(filter8, src);
- src += 16;
-
- /* p0 */
- q6_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q6);
- filter8 = LD_UB(filter48 + 32);
- tmp0_r = p0_r_in - p1_r_in;
- tmp0_r += q6_r_in;
- tmp0_r -= p7_r_in;
- tmp1_r += tmp0_r;
- r_out = __msa_srari_h((v8i16)tmp1_r, 4);
- r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
- filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
- ST8x1_UB(filter8, src);
- src += 16;
-
- /* q0 */
- q7_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q7);
- filter8 = LD_UB(filter48 + 48);
- tmp0_r = q7_r_in - p0_r_in;
- tmp0_r += q0_r_in;
- tmp0_r -= p7_r_in;
- tmp1_r += tmp0_r;
- r_out = __msa_srari_h((v8i16)tmp1_r, 4);
- r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
- filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
- ST8x1_UB(filter8, src);
- src += 16;
-
- /* q1 */
- filter8 = LD_UB(filter48 + 64);
- tmp0_r = q7_r_in - q0_r_in;
- tmp0_r += q1_r_in;
- tmp0_r -= p6_r_in;
- tmp1_r += tmp0_r;
- r_out = __msa_srari_h((v8i16)tmp1_r, 4);
- r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
- filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
- ST8x1_UB(filter8, src);
- src += 16;
-
- /* q2 */
- filter8 = LD_UB(filter48 + 80);
- tmp0_r = q7_r_in - q1_r_in;
- tmp0_r += q2_r_in;
- tmp0_r -= p5_r_in;
- tmp1_r += tmp0_r;
- r_out = __msa_srari_h((v8i16)tmp1_r, 4);
- r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
- filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
- ST8x1_UB(filter8, src);
- src += 16;
-
- /* q3 */
- tmp0_r = q7_r_in - q2_r_in;
- tmp0_r += q3_r_in;
- tmp0_r -= p4_r_in;
- tmp1_r += tmp0_r;
- r_out = __msa_srari_h((v8i16)tmp1_r, 4);
- r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
- q3 = __msa_bmnz_v(q3, (v16u8)r_out, flat2);
- ST8x1_UB(q3, src);
- src += 16;
-
- /* q4 */
- tmp0_r = q7_r_in - q3_r_in;
- tmp0_r += q4_r_in;
- tmp0_r -= p3_r_in;
- tmp1_r += tmp0_r;
- r_out = __msa_srari_h((v8i16)tmp1_r, 4);
- r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
- q4 = __msa_bmnz_v(q4, (v16u8)r_out, flat2);
- ST8x1_UB(q4, src);
- src += 16;
-
- /* q5 */
- tmp0_r = q7_r_in - q4_r_in;
- tmp0_r += q5_r_in;
- tmp0_r -= p2_r_in;
- tmp1_r += tmp0_r;
- r_out = __msa_srari_h((v8i16)tmp1_r, 4);
- r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
- q5 = __msa_bmnz_v(q5, (v16u8)r_out, flat2);
- ST8x1_UB(q5, src);
- src += 16;
-
- /* q6 */
- tmp0_r = q7_r_in - q5_r_in;
- tmp0_r += q6_r_in;
- tmp0_r -= p1_r_in;
- tmp1_r += tmp0_r;
- r_out = __msa_srari_h((v8i16)tmp1_r, 4);
- r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
- q6 = __msa_bmnz_v(q6, (v16u8)r_out, flat2);
- ST8x1_UB(q6, src);
-
- return 0;
- }
-}
-
-void aom_lpf_vertical_16_msa(uint8_t *src, int32_t pitch,
- const uint8_t *b_limit_ptr,
- const uint8_t *limit_ptr,
- const uint8_t *thresh_ptr) {
- uint8_t early_exit = 0;
- DECLARE_ALIGNED(32, uint8_t, transposed_input[16 * 24]);
- uint8_t *filter48 = &transposed_input[16 * 16];
-
- transpose_16x8_to_8x16(src - 8, pitch, transposed_input, 16);
-
- early_exit =
- aom_vt_lpf_t4_and_t8_8w((transposed_input + 16 * 8), &filter48[0], src,
- pitch, b_limit_ptr, limit_ptr, thresh_ptr);
-
- if (0 == early_exit) {
- early_exit = aom_vt_lpf_t16_8w((transposed_input + 16 * 8), src, pitch,
- &filter48[0]);
-
- if (0 == early_exit) {
- transpose_8x16_to_16x8(transposed_input, 16, src - 8, pitch);
- }
- }
-}
-
-int32_t aom_vt_lpf_t4_and_t8_16w(uint8_t *src, uint8_t *filter48,
- uint8_t *src_org, int32_t pitch,
- const uint8_t *b_limit_ptr,
- const uint8_t *limit_ptr,
- const uint8_t *thresh_ptr) {
- v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
- v16u8 p2_out, p1_out, p0_out, q0_out, q1_out, q2_out;
- v16u8 flat, mask, hev, thresh, b_limit, limit;
- v8u16 p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r;
- v8u16 p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l;
- v8i16 p2_filt8_r, p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r;
- v8i16 p2_filt8_l, p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l;
- v16i8 zero = { 0 };
- v8i16 vec0, vec1, vec2, vec3, vec4, vec5;
-
- /* load vector elements */
- LD_UB8(src - (4 * 16), 16, p3, p2, p1, p0, q0, q1, q2, q3);
-
- thresh = (v16u8)__msa_fill_b(*thresh_ptr);
- b_limit = (v16u8)__msa_fill_b(*b_limit_ptr);
- limit = (v16u8)__msa_fill_b(*limit_ptr);
-
- /* mask and hev */
- LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev,
- mask, flat);
- /* flat4 */
- AOM_FLAT4(p3, p2, p0, q0, q2, q3, flat);
- /* filter4 */
- AOM_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
-
- if (__msa_test_bz_v(flat)) {
- ILVR_B2_SH(p0_out, p1_out, q1_out, q0_out, vec0, vec1);
- ILVRL_H2_SH(vec1, vec0, vec2, vec3);
- ILVL_B2_SH(p0_out, p1_out, q1_out, q0_out, vec0, vec1);
- ILVRL_H2_SH(vec1, vec0, vec4, vec5);
-
- src_org -= 2;
- ST4x8_UB(vec2, vec3, src_org, pitch);
- src_org += 8 * pitch;
- ST4x8_UB(vec4, vec5, src_org, pitch);
-
- return 1;
- } else {
- ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1, zero,
- q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r);
- AOM_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filt8_r,
- p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r);
- ILVL_B4_UH(zero, p3, zero, p2, zero, p1, zero, p0, p3_l, p2_l, p1_l, p0_l);
- ILVL_B4_UH(zero, q0, zero, q1, zero, q2, zero, q3, q0_l, q1_l, q2_l, q3_l);
- AOM_FILTER8(p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l, p2_filt8_l,
- p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l);
-
- /* convert 16 bit output data into 8 bit */
- PCKEV_B4_SH(p2_filt8_l, p2_filt8_r, p1_filt8_l, p1_filt8_r, p0_filt8_l,
- p0_filt8_r, q0_filt8_l, q0_filt8_r, p2_filt8_r, p1_filt8_r,
- p0_filt8_r, q0_filt8_r);
- PCKEV_B2_SH(q1_filt8_l, q1_filt8_r, q2_filt8_l, q2_filt8_r, q1_filt8_r,
- q2_filt8_r);
-
- /* store pixel values */
- p2_out = __msa_bmnz_v(p2, (v16u8)p2_filt8_r, flat);
- p1_out = __msa_bmnz_v(p1_out, (v16u8)p1_filt8_r, flat);
- p0_out = __msa_bmnz_v(p0_out, (v16u8)p0_filt8_r, flat);
- q0_out = __msa_bmnz_v(q0_out, (v16u8)q0_filt8_r, flat);
- q1_out = __msa_bmnz_v(q1_out, (v16u8)q1_filt8_r, flat);
- q2_out = __msa_bmnz_v(q2, (v16u8)q2_filt8_r, flat);
-
- ST_UB4(p2_out, p1_out, p0_out, q0_out, filter48, 16);
- filter48 += (4 * 16);
- ST_UB2(q1_out, q2_out, filter48, 16);
- filter48 += (2 * 16);
- ST_UB(flat, filter48);
-
- return 0;
- }
-}
-
-int32_t aom_vt_lpf_t16_16w(uint8_t *src, uint8_t *src_org, int32_t pitch,
- uint8_t *filter48) {
- v16u8 flat, flat2, filter8;
- v16i8 zero = { 0 };
- v16u8 p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7;
- v8u16 p7_r_in, p6_r_in, p5_r_in, p4_r_in, p3_r_in, p2_r_in, p1_r_in, p0_r_in;
- v8u16 q7_r_in, q6_r_in, q5_r_in, q4_r_in, q3_r_in, q2_r_in, q1_r_in, q0_r_in;
- v8u16 p7_l_in, p6_l_in, p5_l_in, p4_l_in, p3_l_in, p2_l_in, p1_l_in, p0_l_in;
- v8u16 q7_l_in, q6_l_in, q5_l_in, q4_l_in, q3_l_in, q2_l_in, q1_l_in, q0_l_in;
- v8u16 tmp0_r, tmp1_r, tmp0_l, tmp1_l;
- v8i16 l_out, r_out;
-
- flat = LD_UB(filter48 + 6 * 16);
-
- LD_UB8((src - 8 * 16), 16, p7, p6, p5, p4, p3, p2, p1, p0);
- LD_UB8(src, 16, q0, q1, q2, q3, q4, q5, q6, q7);
-
- AOM_FLAT5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, flat, flat2);
-
- if (__msa_test_bz_v(flat2)) {
- v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
-
- LD_UB4(filter48, 16, p2, p1, p0, q0);
- LD_UB2(filter48 + 4 * 16, 16, q1, q2);
-
- ILVR_B2_SH(p1, p2, q0, p0, vec0, vec1);
- ILVRL_H2_SH(vec1, vec0, vec3, vec4);
- ILVL_B2_SH(p1, p2, q0, p0, vec0, vec1);
- ILVRL_H2_SH(vec1, vec0, vec6, vec7);
- ILVRL_B2_SH(q2, q1, vec2, vec5);
-
- src_org -= 3;
- ST4x4_UB(vec3, vec3, 0, 1, 2, 3, src_org, pitch);
- ST2x4_UB(vec2, 0, (src_org + 4), pitch);
- src_org += (4 * pitch);
- ST4x4_UB(vec4, vec4, 0, 1, 2, 3, src_org, pitch);
- ST2x4_UB(vec2, 4, (src_org + 4), pitch);
- src_org += (4 * pitch);
- ST4x4_UB(vec6, vec6, 0, 1, 2, 3, src_org, pitch);
- ST2x4_UB(vec5, 0, (src_org + 4), pitch);
- src_org += (4 * pitch);
- ST4x4_UB(vec7, vec7, 0, 1, 2, 3, src_org, pitch);
- ST2x4_UB(vec5, 4, (src_org + 4), pitch);
-
- return 1;
- } else {
- src -= 7 * 16;
-
- ILVR_B8_UH(zero, p7, zero, p6, zero, p5, zero, p4, zero, p3, zero, p2, zero,
- p1, zero, p0, p7_r_in, p6_r_in, p5_r_in, p4_r_in, p3_r_in,
- p2_r_in, p1_r_in, p0_r_in);
- q0_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q0);
-
- tmp0_r = p7_r_in << 3;
- tmp0_r -= p7_r_in;
- tmp0_r += p6_r_in;
- tmp0_r += q0_r_in;
- tmp1_r = p6_r_in + p5_r_in;
- tmp1_r += p4_r_in;
- tmp1_r += p3_r_in;
- tmp1_r += p2_r_in;
- tmp1_r += p1_r_in;
- tmp1_r += p0_r_in;
- tmp1_r += tmp0_r;
- r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-
- ILVL_B4_UH(zero, p7, zero, p6, zero, p5, zero, p4, p7_l_in, p6_l_in,
- p5_l_in, p4_l_in);
- ILVL_B4_UH(zero, p3, zero, p2, zero, p1, zero, p0, p3_l_in, p2_l_in,
- p1_l_in, p0_l_in);
- q0_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q0);
-
- tmp0_l = p7_l_in << 3;
- tmp0_l -= p7_l_in;
- tmp0_l += p6_l_in;
- tmp0_l += q0_l_in;
- tmp1_l = p6_l_in + p5_l_in;
- tmp1_l += p4_l_in;
- tmp1_l += p3_l_in;
- tmp1_l += p2_l_in;
- tmp1_l += p1_l_in;
- tmp1_l += p0_l_in;
- tmp1_l += tmp0_l;
- l_out = __msa_srari_h((v8i16)tmp1_l, 4);
-
- r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
- p6 = __msa_bmnz_v(p6, (v16u8)r_out, flat2);
- ST_UB(p6, src);
- src += 16;
-
- /* p5 */
- q1_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q1);
- tmp0_r = p5_r_in - p6_r_in;
- tmp0_r += q1_r_in;
- tmp0_r -= p7_r_in;
- tmp1_r += tmp0_r;
- r_out = __msa_srari_h((v8i16)tmp1_r, 4);
- q1_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q1);
- tmp0_l = p5_l_in - p6_l_in;
- tmp0_l += q1_l_in;
- tmp0_l -= p7_l_in;
- tmp1_l += tmp0_l;
- l_out = __msa_srari_h((v8i16)tmp1_l, 4);
- r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
- p5 = __msa_bmnz_v(p5, (v16u8)r_out, flat2);
- ST_UB(p5, src);
- src += 16;
-
- /* p4 */
- q2_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q2);
- tmp0_r = p4_r_in - p5_r_in;
- tmp0_r += q2_r_in;
- tmp0_r -= p7_r_in;
- tmp1_r += tmp0_r;
- r_out = __msa_srari_h((v8i16)tmp1_r, 4);
- q2_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q2);
- tmp0_l = p4_l_in - p5_l_in;
- tmp0_l += q2_l_in;
- tmp0_l -= p7_l_in;
- tmp1_l += tmp0_l;
- l_out = __msa_srari_h((v8i16)tmp1_l, 4);
- r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
- p4 = __msa_bmnz_v(p4, (v16u8)r_out, flat2);
- ST_UB(p4, src);
- src += 16;
-
- /* p3 */
- q3_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q3);
- tmp0_r = p3_r_in - p4_r_in;
- tmp0_r += q3_r_in;
- tmp0_r -= p7_r_in;
- tmp1_r += tmp0_r;
- r_out = __msa_srari_h((v8i16)tmp1_r, 4);
- q3_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q3);
- tmp0_l = p3_l_in - p4_l_in;
- tmp0_l += q3_l_in;
- tmp0_l -= p7_l_in;
- tmp1_l += tmp0_l;
- l_out = __msa_srari_h((v8i16)tmp1_l, 4);
- r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
- p3 = __msa_bmnz_v(p3, (v16u8)r_out, flat2);
- ST_UB(p3, src);
- src += 16;
-
- /* p2 */
- q4_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q4);
- filter8 = LD_UB(filter48);
- tmp0_r = p2_r_in - p3_r_in;
- tmp0_r += q4_r_in;
- tmp0_r -= p7_r_in;
- tmp1_r += tmp0_r;
- r_out = __msa_srari_h((v8i16)tmp1_r, 4);
- q4_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q4);
- tmp0_l = p2_l_in - p3_l_in;
- tmp0_l += q4_l_in;
- tmp0_l -= p7_l_in;
- tmp1_l += tmp0_l;
- l_out = __msa_srari_h((v8i16)tmp1_l, 4);
- r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
- filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
- ST_UB(filter8, src);
- src += 16;
-
- /* p1 */
- q5_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q5);
- filter8 = LD_UB(filter48 + 16);
- tmp0_r = p1_r_in - p2_r_in;
- tmp0_r += q5_r_in;
- tmp0_r -= p7_r_in;
- tmp1_r += tmp0_r;
- r_out = __msa_srari_h((v8i16)tmp1_r, 4);
- q5_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q5);
- tmp0_l = p1_l_in - p2_l_in;
- tmp0_l += q5_l_in;
- tmp0_l -= p7_l_in;
- tmp1_l += tmp0_l;
- l_out = __msa_srari_h((v8i16)(tmp1_l), 4);
- r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
- filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
- ST_UB(filter8, src);
- src += 16;
-
- /* p0 */
- q6_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q6);
- filter8 = LD_UB(filter48 + 32);
- tmp0_r = p0_r_in - p1_r_in;
- tmp0_r += q6_r_in;
- tmp0_r -= p7_r_in;
- tmp1_r += tmp0_r;
- r_out = __msa_srari_h((v8i16)tmp1_r, 4);
- q6_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q6);
- tmp0_l = p0_l_in - p1_l_in;
- tmp0_l += q6_l_in;
- tmp0_l -= p7_l_in;
- tmp1_l += tmp0_l;
- l_out = __msa_srari_h((v8i16)tmp1_l, 4);
- r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
- filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
- ST_UB(filter8, src);
- src += 16;
-
- /* q0 */
- q7_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q7);
- filter8 = LD_UB(filter48 + 48);
- tmp0_r = q7_r_in - p0_r_in;
- tmp0_r += q0_r_in;
- tmp0_r -= p7_r_in;
- tmp1_r += tmp0_r;
- r_out = __msa_srari_h((v8i16)tmp1_r, 4);
- q7_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q7);
- tmp0_l = q7_l_in - p0_l_in;
- tmp0_l += q0_l_in;
- tmp0_l -= p7_l_in;
- tmp1_l += tmp0_l;
- l_out = __msa_srari_h((v8i16)tmp1_l, 4);
- r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
- filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
- ST_UB(filter8, src);
- src += 16;
-
- /* q1 */
- filter8 = LD_UB(filter48 + 64);
- tmp0_r = q7_r_in - q0_r_in;
- tmp0_r += q1_r_in;
- tmp0_r -= p6_r_in;
- tmp1_r += tmp0_r;
- r_out = __msa_srari_h((v8i16)tmp1_r, 4);
- tmp0_l = q7_l_in - q0_l_in;
- tmp0_l += q1_l_in;
- tmp0_l -= p6_l_in;
- tmp1_l += tmp0_l;
- l_out = __msa_srari_h((v8i16)tmp1_l, 4);
- r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
- filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
- ST_UB(filter8, src);
- src += 16;
-
- /* q2 */
- filter8 = LD_UB(filter48 + 80);
- tmp0_r = q7_r_in - q1_r_in;
- tmp0_r += q2_r_in;
- tmp0_r -= p5_r_in;
- tmp1_r += tmp0_r;
- r_out = __msa_srari_h((v8i16)tmp1_r, 4);
- tmp0_l = q7_l_in - q1_l_in;
- tmp0_l += q2_l_in;
- tmp0_l -= p5_l_in;
- tmp1_l += tmp0_l;
- l_out = __msa_srari_h((v8i16)tmp1_l, 4);
- r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
- filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
- ST_UB(filter8, src);
- src += 16;
-
- /* q3 */
- tmp0_r = q7_r_in - q2_r_in;
- tmp0_r += q3_r_in;
- tmp0_r -= p4_r_in;
- tmp1_r += tmp0_r;
- r_out = __msa_srari_h((v8i16)tmp1_r, 4);
- tmp0_l = q7_l_in - q2_l_in;
- tmp0_l += q3_l_in;
- tmp0_l -= p4_l_in;
- tmp1_l += tmp0_l;
- l_out = __msa_srari_h((v8i16)tmp1_l, 4);
- r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
- q3 = __msa_bmnz_v(q3, (v16u8)r_out, flat2);
- ST_UB(q3, src);
- src += 16;
-
- /* q4 */
- tmp0_r = q7_r_in - q3_r_in;
- tmp0_r += q4_r_in;
- tmp0_r -= p3_r_in;
- tmp1_r += tmp0_r;
- r_out = __msa_srari_h((v8i16)tmp1_r, 4);
- tmp0_l = q7_l_in - q3_l_in;
- tmp0_l += q4_l_in;
- tmp0_l -= p3_l_in;
- tmp1_l += tmp0_l;
- l_out = __msa_srari_h((v8i16)tmp1_l, 4);
- r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
- q4 = __msa_bmnz_v(q4, (v16u8)r_out, flat2);
- ST_UB(q4, src);
- src += 16;
-
- /* q5 */
- tmp0_r = q7_r_in - q4_r_in;
- tmp0_r += q5_r_in;
- tmp0_r -= p2_r_in;
- tmp1_r += tmp0_r;
- r_out = __msa_srari_h((v8i16)tmp1_r, 4);
- tmp0_l = q7_l_in - q4_l_in;
- tmp0_l += q5_l_in;
- tmp0_l -= p2_l_in;
- tmp1_l += tmp0_l;
- l_out = __msa_srari_h((v8i16)tmp1_l, 4);
- r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
- q5 = __msa_bmnz_v(q5, (v16u8)r_out, flat2);
- ST_UB(q5, src);
- src += 16;
-
- /* q6 */
- tmp0_r = q7_r_in - q5_r_in;
- tmp0_r += q6_r_in;
- tmp0_r -= p1_r_in;
- tmp1_r += tmp0_r;
- r_out = __msa_srari_h((v8i16)tmp1_r, 4);
- tmp0_l = q7_l_in - q5_l_in;
- tmp0_l += q6_l_in;
- tmp0_l -= p1_l_in;
- tmp1_l += tmp0_l;
- l_out = __msa_srari_h((v8i16)tmp1_l, 4);
- r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
- q6 = __msa_bmnz_v(q6, (v16u8)r_out, flat2);
- ST_UB(q6, src);
-
- return 0;
- }
-}
-
-void aom_lpf_vertical_16_dual_msa(uint8_t *src, int32_t pitch,
- const uint8_t *b_limit_ptr,
- const uint8_t *limit_ptr,
- const uint8_t *thresh_ptr) {
- uint8_t early_exit = 0;
- DECLARE_ALIGNED(32, uint8_t, transposed_input[16 * 24]);
- uint8_t *filter48 = &transposed_input[16 * 16];
-
- transpose_16x16((src - 8), pitch, &transposed_input[0], 16);
-
- early_exit =
- aom_vt_lpf_t4_and_t8_16w((transposed_input + 16 * 8), &filter48[0], src,
- pitch, b_limit_ptr, limit_ptr, thresh_ptr);
-
- if (0 == early_exit) {
- early_exit = aom_vt_lpf_t16_16w((transposed_input + 16 * 8), src, pitch,
- &filter48[0]);
-
- if (0 == early_exit) {
- transpose_16x16(transposed_input, 16, (src - 8), pitch);
- }
- }
-}
diff --git a/third_party/aom/aom_dsp/mips/loopfilter_4_msa.c b/third_party/aom/aom_dsp/mips/loopfilter_4_msa.c
deleted file mode 100644
index dc0a97764..000000000
--- a/third_party/aom/aom_dsp/mips/loopfilter_4_msa.c
+++ /dev/null
@@ -1,147 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "aom_dsp/mips/loopfilter_msa.h"
-
-void aom_lpf_horizontal_4_msa(uint8_t *src, int32_t pitch,
- const uint8_t *b_limit_ptr,
- const uint8_t *limit_ptr,
- const uint8_t *thresh_ptr) {
- uint64_t p1_d, p0_d, q0_d, q1_d;
- v16u8 mask, hev, flat, thresh, b_limit, limit;
- v16u8 p3, p2, p1, p0, q3, q2, q1, q0, p1_out, p0_out, q0_out, q1_out;
-
- /* load vector elements */
- LD_UB8((src - 4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
-
- thresh = (v16u8)__msa_fill_b(*thresh_ptr);
- b_limit = (v16u8)__msa_fill_b(*b_limit_ptr);
- limit = (v16u8)__msa_fill_b(*limit_ptr);
-
- LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev,
- mask, flat);
- AOM_LPF_FILTER4_8W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
-
- p1_d = __msa_copy_u_d((v2i64)p1_out, 0);
- p0_d = __msa_copy_u_d((v2i64)p0_out, 0);
- q0_d = __msa_copy_u_d((v2i64)q0_out, 0);
- q1_d = __msa_copy_u_d((v2i64)q1_out, 0);
- SD4(p1_d, p0_d, q0_d, q1_d, (src - 2 * pitch), pitch);
-}
-
-void aom_lpf_horizontal_4_dual_msa(uint8_t *src, int32_t pitch,
- const uint8_t *b_limit0_ptr,
- const uint8_t *limit0_ptr,
- const uint8_t *thresh0_ptr,
- const uint8_t *b_limit1_ptr,
- const uint8_t *limit1_ptr,
- const uint8_t *thresh1_ptr) {
- v16u8 mask, hev, flat, thresh0, b_limit0, limit0, thresh1, b_limit1, limit1;
- v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
-
- /* load vector elements */
- LD_UB8((src - 4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
-
- thresh0 = (v16u8)__msa_fill_b(*thresh0_ptr);
- thresh1 = (v16u8)__msa_fill_b(*thresh1_ptr);
- thresh0 = (v16u8)__msa_ilvr_d((v2i64)thresh1, (v2i64)thresh0);
-
- b_limit0 = (v16u8)__msa_fill_b(*b_limit0_ptr);
- b_limit1 = (v16u8)__msa_fill_b(*b_limit1_ptr);
- b_limit0 = (v16u8)__msa_ilvr_d((v2i64)b_limit1, (v2i64)b_limit0);
-
- limit0 = (v16u8)__msa_fill_b(*limit0_ptr);
- limit1 = (v16u8)__msa_fill_b(*limit1_ptr);
- limit0 = (v16u8)__msa_ilvr_d((v2i64)limit1, (v2i64)limit0);
-
- LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit0, b_limit0, thresh0, hev,
- mask, flat);
- AOM_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1, p0, q0, q1);
-
- ST_UB4(p1, p0, q0, q1, (src - 2 * pitch), pitch);
-}
-
-void aom_lpf_vertical_4_msa(uint8_t *src, int32_t pitch,
- const uint8_t *b_limit_ptr,
- const uint8_t *limit_ptr,
- const uint8_t *thresh_ptr) {
- v16u8 mask, hev, flat, limit, thresh, b_limit;
- v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
- v8i16 vec0, vec1, vec2, vec3;
-
- LD_UB8((src - 4), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
-
- thresh = (v16u8)__msa_fill_b(*thresh_ptr);
- b_limit = (v16u8)__msa_fill_b(*b_limit_ptr);
- limit = (v16u8)__msa_fill_b(*limit_ptr);
-
- TRANSPOSE8x8_UB_UB(p3, p2, p1, p0, q0, q1, q2, q3, p3, p2, p1, p0, q0, q1, q2,
- q3);
- LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev,
- mask, flat);
- AOM_LPF_FILTER4_8W(p1, p0, q0, q1, mask, hev, p1, p0, q0, q1);
- ILVR_B2_SH(p0, p1, q1, q0, vec0, vec1);
- ILVRL_H2_SH(vec1, vec0, vec2, vec3);
-
- src -= 2;
- ST4x4_UB(vec2, vec2, 0, 1, 2, 3, src, pitch);
- src += 4 * pitch;
- ST4x4_UB(vec3, vec3, 0, 1, 2, 3, src, pitch);
-}
-
-void aom_lpf_vertical_4_dual_msa(uint8_t *src, int32_t pitch,
- const uint8_t *b_limit0_ptr,
- const uint8_t *limit0_ptr,
- const uint8_t *thresh0_ptr,
- const uint8_t *b_limit1_ptr,
- const uint8_t *limit1_ptr,
- const uint8_t *thresh1_ptr) {
- v16u8 mask, hev, flat;
- v16u8 thresh0, b_limit0, limit0, thresh1, b_limit1, limit1;
- v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
- v16u8 row0, row1, row2, row3, row4, row5, row6, row7;
- v16u8 row8, row9, row10, row11, row12, row13, row14, row15;
- v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
-
- LD_UB8(src - 4, pitch, row0, row1, row2, row3, row4, row5, row6, row7);
- LD_UB8(src - 4 + (8 * pitch), pitch, row8, row9, row10, row11, row12, row13,
- row14, row15);
-
- TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7, row8,
- row9, row10, row11, row12, row13, row14, row15, p3, p2,
- p1, p0, q0, q1, q2, q3);
-
- thresh0 = (v16u8)__msa_fill_b(*thresh0_ptr);
- thresh1 = (v16u8)__msa_fill_b(*thresh1_ptr);
- thresh0 = (v16u8)__msa_ilvr_d((v2i64)thresh1, (v2i64)thresh0);
-
- b_limit0 = (v16u8)__msa_fill_b(*b_limit0_ptr);
- b_limit1 = (v16u8)__msa_fill_b(*b_limit1_ptr);
- b_limit0 = (v16u8)__msa_ilvr_d((v2i64)b_limit1, (v2i64)b_limit0);
-
- limit0 = (v16u8)__msa_fill_b(*limit0_ptr);
- limit1 = (v16u8)__msa_fill_b(*limit1_ptr);
- limit0 = (v16u8)__msa_ilvr_d((v2i64)limit1, (v2i64)limit0);
-
- LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit0, b_limit0, thresh0, hev,
- mask, flat);
- AOM_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1, p0, q0, q1);
- ILVR_B2_SH(p0, p1, q1, q0, tmp0, tmp1);
- ILVRL_H2_SH(tmp1, tmp0, tmp2, tmp3);
- ILVL_B2_SH(p0, p1, q1, q0, tmp0, tmp1);
- ILVRL_H2_SH(tmp1, tmp0, tmp4, tmp5);
-
- src -= 2;
-
- ST4x8_UB(tmp2, tmp3, src, pitch);
- src += (8 * pitch);
- ST4x8_UB(tmp4, tmp5, src, pitch);
-}
diff --git a/third_party/aom/aom_dsp/mips/loopfilter_8_msa.c b/third_party/aom/aom_dsp/mips/loopfilter_8_msa.c
deleted file mode 100644
index dc203e79c..000000000
--- a/third_party/aom/aom_dsp/mips/loopfilter_8_msa.c
+++ /dev/null
@@ -1,333 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "aom_dsp/mips/loopfilter_msa.h"
-
-void aom_lpf_horizontal_8_msa(uint8_t *src, int32_t pitch,
- const uint8_t *b_limit_ptr,
- const uint8_t *limit_ptr,
- const uint8_t *thresh_ptr) {
- uint64_t p2_d, p1_d, p0_d, q0_d, q1_d, q2_d;
- v16u8 mask, hev, flat, thresh, b_limit, limit;
- v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
- v16u8 p2_out, p1_out, p0_out, q0_out, q1_out, q2_out;
- v8i16 p2_filter8, p1_filter8, p0_filter8, q0_filter8, q1_filter8, q2_filter8;
- v8u16 p3_r, p2_r, p1_r, p0_r, q3_r, q2_r, q1_r, q0_r;
- v16i8 zero = { 0 };
-
- /* load vector elements */
- LD_UB8((src - 4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
-
- thresh = (v16u8)__msa_fill_b(*thresh_ptr);
- b_limit = (v16u8)__msa_fill_b(*b_limit_ptr);
- limit = (v16u8)__msa_fill_b(*limit_ptr);
-
- LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev,
- mask, flat);
- AOM_FLAT4(p3, p2, p0, q0, q2, q3, flat);
- AOM_LPF_FILTER4_8W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
-
- flat = (v16u8)__msa_ilvr_d((v2i64)zero, (v2i64)flat);
-
- if (__msa_test_bz_v(flat)) {
- p1_d = __msa_copy_u_d((v2i64)p1_out, 0);
- p0_d = __msa_copy_u_d((v2i64)p0_out, 0);
- q0_d = __msa_copy_u_d((v2i64)q0_out, 0);
- q1_d = __msa_copy_u_d((v2i64)q1_out, 0);
- SD4(p1_d, p0_d, q0_d, q1_d, (src - 2 * pitch), pitch);
- } else {
- ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1, zero,
- q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r);
- AOM_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filter8,
- p1_filter8, p0_filter8, q0_filter8, q1_filter8, q2_filter8);
-
- /* convert 16 bit output data into 8 bit */
- PCKEV_B4_SH(zero, p2_filter8, zero, p1_filter8, zero, p0_filter8, zero,
- q0_filter8, p2_filter8, p1_filter8, p0_filter8, q0_filter8);
- PCKEV_B2_SH(zero, q1_filter8, zero, q2_filter8, q1_filter8, q2_filter8);
-
- /* store pixel values */
- p2_out = __msa_bmnz_v(p2, (v16u8)p2_filter8, flat);
- p1_out = __msa_bmnz_v(p1_out, (v16u8)p1_filter8, flat);
- p0_out = __msa_bmnz_v(p0_out, (v16u8)p0_filter8, flat);
- q0_out = __msa_bmnz_v(q0_out, (v16u8)q0_filter8, flat);
- q1_out = __msa_bmnz_v(q1_out, (v16u8)q1_filter8, flat);
- q2_out = __msa_bmnz_v(q2, (v16u8)q2_filter8, flat);
-
- p2_d = __msa_copy_u_d((v2i64)p2_out, 0);
- p1_d = __msa_copy_u_d((v2i64)p1_out, 0);
- p0_d = __msa_copy_u_d((v2i64)p0_out, 0);
- q0_d = __msa_copy_u_d((v2i64)q0_out, 0);
- q1_d = __msa_copy_u_d((v2i64)q1_out, 0);
- q2_d = __msa_copy_u_d((v2i64)q2_out, 0);
-
- src -= 3 * pitch;
-
- SD4(p2_d, p1_d, p0_d, q0_d, src, pitch);
- src += (4 * pitch);
- SD(q1_d, src);
- src += pitch;
- SD(q2_d, src);
- }
-}
-
-void aom_lpf_horizontal_8_dual_msa(
- uint8_t *src, int32_t pitch, const uint8_t *b_limit0, const uint8_t *limit0,
- const uint8_t *thresh0, const uint8_t *b_limit1, const uint8_t *limit1,
- const uint8_t *thresh1) {
- v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
- v16u8 p2_out, p1_out, p0_out, q0_out, q1_out, q2_out;
- v16u8 flat, mask, hev, tmp, thresh, b_limit, limit;
- v8u16 p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r;
- v8u16 p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l;
- v8i16 p2_filt8_r, p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r;
- v8i16 p2_filt8_l, p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l;
- v16u8 zero = { 0 };
-
- /* load vector elements */
- LD_UB8(src - (4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
-
- thresh = (v16u8)__msa_fill_b(*thresh0);
- tmp = (v16u8)__msa_fill_b(*thresh1);
- thresh = (v16u8)__msa_ilvr_d((v2i64)tmp, (v2i64)thresh);
-
- b_limit = (v16u8)__msa_fill_b(*b_limit0);
- tmp = (v16u8)__msa_fill_b(*b_limit1);
- b_limit = (v16u8)__msa_ilvr_d((v2i64)tmp, (v2i64)b_limit);
-
- limit = (v16u8)__msa_fill_b(*limit0);
- tmp = (v16u8)__msa_fill_b(*limit1);
- limit = (v16u8)__msa_ilvr_d((v2i64)tmp, (v2i64)limit);
-
- /* mask and hev */
- LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev,
- mask, flat);
- AOM_FLAT4(p3, p2, p0, q0, q2, q3, flat);
- AOM_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
-
- if (__msa_test_bz_v(flat)) {
- ST_UB4(p1_out, p0_out, q0_out, q1_out, (src - 2 * pitch), pitch);
- } else {
- ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1, zero,
- q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r);
- AOM_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filt8_r,
- p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r);
-
- ILVL_B4_UH(zero, p3, zero, p2, zero, p1, zero, p0, p3_l, p2_l, p1_l, p0_l);
- ILVL_B4_UH(zero, q0, zero, q1, zero, q2, zero, q3, q0_l, q1_l, q2_l, q3_l);
- AOM_FILTER8(p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l, p2_filt8_l,
- p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l);
-
- /* convert 16 bit output data into 8 bit */
- PCKEV_B4_SH(p2_filt8_l, p2_filt8_r, p1_filt8_l, p1_filt8_r, p0_filt8_l,
- p0_filt8_r, q0_filt8_l, q0_filt8_r, p2_filt8_r, p1_filt8_r,
- p0_filt8_r, q0_filt8_r);
- PCKEV_B2_SH(q1_filt8_l, q1_filt8_r, q2_filt8_l, q2_filt8_r, q1_filt8_r,
- q2_filt8_r);
-
- /* store pixel values */
- p2_out = __msa_bmnz_v(p2, (v16u8)p2_filt8_r, flat);
- p1_out = __msa_bmnz_v(p1_out, (v16u8)p1_filt8_r, flat);
- p0_out = __msa_bmnz_v(p0_out, (v16u8)p0_filt8_r, flat);
- q0_out = __msa_bmnz_v(q0_out, (v16u8)q0_filt8_r, flat);
- q1_out = __msa_bmnz_v(q1_out, (v16u8)q1_filt8_r, flat);
- q2_out = __msa_bmnz_v(q2, (v16u8)q2_filt8_r, flat);
-
- src -= 3 * pitch;
-
- ST_UB4(p2_out, p1_out, p0_out, q0_out, src, pitch);
- src += (4 * pitch);
- ST_UB2(q1_out, q2_out, src, pitch);
- src += (2 * pitch);
- }
-}
-
-void aom_lpf_vertical_8_msa(uint8_t *src, int32_t pitch,
- const uint8_t *b_limit_ptr,
- const uint8_t *limit_ptr,
- const uint8_t *thresh_ptr) {
- v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
- v16u8 p1_out, p0_out, q0_out, q1_out;
- v16u8 flat, mask, hev, thresh, b_limit, limit;
- v8u16 p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r;
- v8i16 p2_filt8_r, p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r;
- v16u8 zero = { 0 };
- v8i16 vec0, vec1, vec2, vec3, vec4;
-
- /* load vector elements */
- LD_UB8(src - 4, pitch, p3, p2, p1, p0, q0, q1, q2, q3);
-
- TRANSPOSE8x8_UB_UB(p3, p2, p1, p0, q0, q1, q2, q3, p3, p2, p1, p0, q0, q1, q2,
- q3);
-
- thresh = (v16u8)__msa_fill_b(*thresh_ptr);
- b_limit = (v16u8)__msa_fill_b(*b_limit_ptr);
- limit = (v16u8)__msa_fill_b(*limit_ptr);
-
- /* mask and hev */
- LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev,
- mask, flat);
- /* flat4 */
- AOM_FLAT4(p3, p2, p0, q0, q2, q3, flat);
- /* filter4 */
- AOM_LPF_FILTER4_8W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
-
- flat = (v16u8)__msa_ilvr_d((v2i64)zero, (v2i64)flat);
-
- if (__msa_test_bz_v(flat)) {
- /* Store 4 pixels p1-_q1 */
- ILVR_B2_SH(p0_out, p1_out, q1_out, q0_out, vec0, vec1);
- ILVRL_H2_SH(vec1, vec0, vec2, vec3);
-
- src -= 2;
- ST4x4_UB(vec2, vec2, 0, 1, 2, 3, src, pitch);
- src += 4 * pitch;
- ST4x4_UB(vec3, vec3, 0, 1, 2, 3, src, pitch);
- } else {
- ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1, zero,
- q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r);
- AOM_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filt8_r,
- p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r);
- /* convert 16 bit output data into 8 bit */
- PCKEV_B4_SH(p2_filt8_r, p2_filt8_r, p1_filt8_r, p1_filt8_r, p0_filt8_r,
- p0_filt8_r, q0_filt8_r, q0_filt8_r, p2_filt8_r, p1_filt8_r,
- p0_filt8_r, q0_filt8_r);
- PCKEV_B2_SH(q1_filt8_r, q1_filt8_r, q2_filt8_r, q2_filt8_r, q1_filt8_r,
- q2_filt8_r);
-
- /* store pixel values */
- p2 = __msa_bmnz_v(p2, (v16u8)p2_filt8_r, flat);
- p1 = __msa_bmnz_v(p1_out, (v16u8)p1_filt8_r, flat);
- p0 = __msa_bmnz_v(p0_out, (v16u8)p0_filt8_r, flat);
- q0 = __msa_bmnz_v(q0_out, (v16u8)q0_filt8_r, flat);
- q1 = __msa_bmnz_v(q1_out, (v16u8)q1_filt8_r, flat);
- q2 = __msa_bmnz_v(q2, (v16u8)q2_filt8_r, flat);
-
- /* Store 6 pixels p2-_q2 */
- ILVR_B2_SH(p1, p2, q0, p0, vec0, vec1);
- ILVRL_H2_SH(vec1, vec0, vec2, vec3);
- vec4 = (v8i16)__msa_ilvr_b((v16i8)q2, (v16i8)q1);
-
- src -= 3;
- ST4x4_UB(vec2, vec2, 0, 1, 2, 3, src, pitch);
- ST2x4_UB(vec4, 0, src + 4, pitch);
- src += (4 * pitch);
- ST4x4_UB(vec3, vec3, 0, 1, 2, 3, src, pitch);
- ST2x4_UB(vec4, 4, src + 4, pitch);
- }
-}
-
-void aom_lpf_vertical_8_dual_msa(uint8_t *src, int32_t pitch,
- const uint8_t *b_limit0, const uint8_t *limit0,
- const uint8_t *thresh0,
- const uint8_t *b_limit1, const uint8_t *limit1,
- const uint8_t *thresh1) {
- uint8_t *temp_src;
- v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
- v16u8 p1_out, p0_out, q0_out, q1_out;
- v16u8 flat, mask, hev, thresh, b_limit, limit;
- v16u8 row4, row5, row6, row7, row12, row13, row14, row15;
- v8u16 p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r;
- v8u16 p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l;
- v8i16 p2_filt8_r, p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r;
- v8i16 p2_filt8_l, p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l;
- v16u8 zero = { 0 };
- v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
-
- temp_src = src - 4;
-
- LD_UB8(temp_src, pitch, p0, p1, p2, p3, row4, row5, row6, row7);
- temp_src += (8 * pitch);
- LD_UB8(temp_src, pitch, q3, q2, q1, q0, row12, row13, row14, row15);
-
- /* transpose 16x8 matrix into 8x16 */
- TRANSPOSE16x8_UB_UB(p0, p1, p2, p3, row4, row5, row6, row7, q3, q2, q1, q0,
- row12, row13, row14, row15, p3, p2, p1, p0, q0, q1, q2,
- q3);
-
- thresh = (v16u8)__msa_fill_b(*thresh0);
- vec0 = (v8i16)__msa_fill_b(*thresh1);
- thresh = (v16u8)__msa_ilvr_d((v2i64)vec0, (v2i64)thresh);
-
- b_limit = (v16u8)__msa_fill_b(*b_limit0);
- vec0 = (v8i16)__msa_fill_b(*b_limit1);
- b_limit = (v16u8)__msa_ilvr_d((v2i64)vec0, (v2i64)b_limit);
-
- limit = (v16u8)__msa_fill_b(*limit0);
- vec0 = (v8i16)__msa_fill_b(*limit1);
- limit = (v16u8)__msa_ilvr_d((v2i64)vec0, (v2i64)limit);
-
- /* mask and hev */
- LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev,
- mask, flat);
- /* flat4 */
- AOM_FLAT4(p3, p2, p0, q0, q2, q3, flat);
- /* filter4 */
- AOM_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
-
- if (__msa_test_bz_v(flat)) {
- ILVR_B2_SH(p0_out, p1_out, q1_out, q0_out, vec0, vec1);
- ILVRL_H2_SH(vec1, vec0, vec2, vec3);
- ILVL_B2_SH(p0_out, p1_out, q1_out, q0_out, vec0, vec1);
- ILVRL_H2_SH(vec1, vec0, vec4, vec5);
-
- src -= 2;
- ST4x8_UB(vec2, vec3, src, pitch);
- src += 8 * pitch;
- ST4x8_UB(vec4, vec5, src, pitch);
- } else {
- ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1, zero,
- q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r);
- AOM_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filt8_r,
- p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r);
-
- ILVL_B4_UH(zero, p3, zero, p2, zero, p1, zero, p0, p3_l, p2_l, p1_l, p0_l);
- ILVL_B4_UH(zero, q0, zero, q1, zero, q2, zero, q3, q0_l, q1_l, q2_l, q3_l);
-
- /* filter8 */
- AOM_FILTER8(p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l, p2_filt8_l,
- p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l);
-
- /* convert 16 bit output data into 8 bit */
- PCKEV_B4_SH(p2_filt8_l, p2_filt8_r, p1_filt8_l, p1_filt8_r, p0_filt8_l,
- p0_filt8_r, q0_filt8_l, q0_filt8_r, p2_filt8_r, p1_filt8_r,
- p0_filt8_r, q0_filt8_r);
- PCKEV_B2_SH(q1_filt8_l, q1_filt8_r, q2_filt8_l, q2_filt8_r, q1_filt8_r,
- q2_filt8_r);
-
- /* store pixel values */
- p2 = __msa_bmnz_v(p2, (v16u8)p2_filt8_r, flat);
- p1 = __msa_bmnz_v(p1_out, (v16u8)p1_filt8_r, flat);
- p0 = __msa_bmnz_v(p0_out, (v16u8)p0_filt8_r, flat);
- q0 = __msa_bmnz_v(q0_out, (v16u8)q0_filt8_r, flat);
- q1 = __msa_bmnz_v(q1_out, (v16u8)q1_filt8_r, flat);
- q2 = __msa_bmnz_v(q2, (v16u8)q2_filt8_r, flat);
-
- ILVR_B2_SH(p1, p2, q0, p0, vec0, vec1);
- ILVRL_H2_SH(vec1, vec0, vec3, vec4);
- ILVL_B2_SH(p1, p2, q0, p0, vec0, vec1);
- ILVRL_H2_SH(vec1, vec0, vec6, vec7);
- ILVRL_B2_SH(q2, q1, vec2, vec5);
-
- src -= 3;
- ST4x4_UB(vec3, vec3, 0, 1, 2, 3, src, pitch);
- ST2x4_UB(vec2, 0, src + 4, pitch);
- src += (4 * pitch);
- ST4x4_UB(vec4, vec4, 0, 1, 2, 3, src, pitch);
- ST2x4_UB(vec2, 4, src + 4, pitch);
- src += (4 * pitch);
- ST4x4_UB(vec6, vec6, 0, 1, 2, 3, src, pitch);
- ST2x4_UB(vec5, 0, src + 4, pitch);
- src += (4 * pitch);
- ST4x4_UB(vec7, vec7, 0, 1, 2, 3, src, pitch);
- ST2x4_UB(vec5, 4, src + 4, pitch);
- }
-}
diff --git a/third_party/aom/aom_dsp/mips/loopfilter_filters_dspr2.c b/third_party/aom/aom_dsp/mips/loopfilter_filters_dspr2.c
deleted file mode 100644
index 8c41278be..000000000
--- a/third_party/aom/aom_dsp/mips/loopfilter_filters_dspr2.c
+++ /dev/null
@@ -1,328 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <stdlib.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom/aom_integer.h"
-#include "aom_dsp/mips/common_dspr2.h"
-#include "aom_dsp/mips/loopfilter_filters_dspr2.h"
-#include "aom_dsp/mips/loopfilter_macros_dspr2.h"
-#include "aom_dsp/mips/loopfilter_masks_dspr2.h"
-#include "aom_mem/aom_mem.h"
-
-#if HAVE_DSPR2
-void aom_lpf_horizontal_4_dspr2(unsigned char *s, int pitch,
- const uint8_t *blimit, const uint8_t *limit,
- const uint8_t *thresh) {
- uint8_t i;
- uint32_t mask;
- uint32_t hev;
- uint32_t pm1, p0, p1, p2, p3, p4, p5, p6;
- uint8_t *sm1, *s0, *s1, *s2, *s3, *s4, *s5, *s6;
- uint32_t thresh_vec, flimit_vec, limit_vec;
- uint32_t uflimit, ulimit, uthresh;
-
- uflimit = *blimit;
- ulimit = *limit;
- uthresh = *thresh;
-
- /* create quad-byte */
- __asm__ __volatile__(
- "replv.qb %[thresh_vec], %[uthresh] \n\t"
- "replv.qb %[flimit_vec], %[uflimit] \n\t"
- "replv.qb %[limit_vec], %[ulimit] \n\t"
-
- : [thresh_vec] "=&r"(thresh_vec), [flimit_vec] "=&r"(flimit_vec),
- [limit_vec] "=r"(limit_vec)
- : [uthresh] "r"(uthresh), [uflimit] "r"(uflimit), [ulimit] "r"(ulimit));
-
- /* prefetch data for store */
- prefetch_store(s);
-
- /* loop filter designed to work using chars so that we can make maximum use
- of 8 bit simd instructions. */
- for (i = 0; i < 2; i++) {
- sm1 = s - (pitch << 2);
- s0 = sm1 + pitch;
- s1 = s0 + pitch;
- s2 = s - pitch;
- s3 = s;
- s4 = s + pitch;
- s5 = s4 + pitch;
- s6 = s5 + pitch;
-
- __asm__ __volatile__(
- "lw %[p1], (%[s1]) \n\t"
- "lw %[p2], (%[s2]) \n\t"
- "lw %[p3], (%[s3]) \n\t"
- "lw %[p4], (%[s4]) \n\t"
-
- : [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3), [p4] "=&r"(p4)
- : [s1] "r"(s1), [s2] "r"(s2), [s3] "r"(s3), [s4] "r"(s4));
-
- /* if (p1 - p4 == 0) and (p2 - p3 == 0)
- mask will be zero and filtering is not needed */
- if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) {
- __asm__ __volatile__(
- "lw %[pm1], (%[sm1]) \n\t"
- "lw %[p0], (%[s0]) \n\t"
- "lw %[p5], (%[s5]) \n\t"
- "lw %[p6], (%[s6]) \n\t"
-
- : [pm1] "=&r"(pm1), [p0] "=&r"(p0), [p5] "=&r"(p5), [p6] "=&r"(p6)
- : [sm1] "r"(sm1), [s0] "r"(s0), [s5] "r"(s5), [s6] "r"(s6));
-
- filter_hev_mask_dspr2(limit_vec, flimit_vec, p1, p2, pm1, p0, p3, p4, p5,
- p6, thresh_vec, &hev, &mask);
-
- /* if mask == 0 do filtering is not needed */
- if (mask) {
- /* filtering */
- filter_dspr2(mask, hev, &p1, &p2, &p3, &p4);
-
- __asm__ __volatile__(
- "sw %[p1], (%[s1]) \n\t"
- "sw %[p2], (%[s2]) \n\t"
- "sw %[p3], (%[s3]) \n\t"
- "sw %[p4], (%[s4]) \n\t"
-
- :
- : [p1] "r"(p1), [p2] "r"(p2), [p3] "r"(p3), [p4] "r"(p4),
- [s1] "r"(s1), [s2] "r"(s2), [s3] "r"(s3), [s4] "r"(s4));
- }
- }
-
- s = s + 4;
- }
-}
-
-void aom_lpf_vertical_4_dspr2(unsigned char *s, int pitch,
- const uint8_t *blimit, const uint8_t *limit,
- const uint8_t *thresh) {
- uint8_t i;
- uint32_t mask, hev;
- uint32_t pm1, p0, p1, p2, p3, p4, p5, p6;
- uint8_t *s1, *s2, *s3, *s4;
- uint32_t prim1, prim2, sec3, sec4, prim3, prim4;
- uint32_t thresh_vec, flimit_vec, limit_vec;
- uint32_t uflimit, ulimit, uthresh;
-
- uflimit = *blimit;
- ulimit = *limit;
- uthresh = *thresh;
-
- /* create quad-byte */
- __asm__ __volatile__(
- "replv.qb %[thresh_vec], %[uthresh] \n\t"
- "replv.qb %[flimit_vec], %[uflimit] \n\t"
- "replv.qb %[limit_vec], %[ulimit] \n\t"
-
- : [thresh_vec] "=&r"(thresh_vec), [flimit_vec] "=&r"(flimit_vec),
- [limit_vec] "=r"(limit_vec)
- : [uthresh] "r"(uthresh), [uflimit] "r"(uflimit), [ulimit] "r"(ulimit));
-
- /* prefetch data for store */
- prefetch_store(s + pitch);
-
- for (i = 0; i < 2; i++) {
- s1 = s;
- s2 = s + pitch;
- s3 = s2 + pitch;
- s4 = s3 + pitch;
- s = s4 + pitch;
-
- /* load quad-byte vectors
- * memory is 4 byte aligned
- */
- p2 = *((uint32_t *)(s1 - 4));
- p6 = *((uint32_t *)(s1));
- p1 = *((uint32_t *)(s2 - 4));
- p5 = *((uint32_t *)(s2));
- p0 = *((uint32_t *)(s3 - 4));
- p4 = *((uint32_t *)(s3));
- pm1 = *((uint32_t *)(s4 - 4));
- p3 = *((uint32_t *)(s4));
-
- /* transpose pm1, p0, p1, p2 */
- __asm__ __volatile__(
- "precrq.qb.ph %[prim1], %[p2], %[p1] \n\t"
- "precr.qb.ph %[prim2], %[p2], %[p1] \n\t"
- "precrq.qb.ph %[prim3], %[p0], %[pm1] \n\t"
- "precr.qb.ph %[prim4], %[p0], %[pm1] \n\t"
-
- "precrq.qb.ph %[p1], %[prim1], %[prim2] \n\t"
- "precr.qb.ph %[pm1], %[prim1], %[prim2] \n\t"
- "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t"
- "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t"
-
- "precrq.ph.w %[p2], %[p1], %[sec3] \n\t"
- "precrq.ph.w %[p0], %[pm1], %[sec4] \n\t"
- "append %[p1], %[sec3], 16 \n\t"
- "append %[pm1], %[sec4], 16 \n\t"
-
- : [prim1] "=&r"(prim1), [prim2] "=&r"(prim2), [prim3] "=&r"(prim3),
- [prim4] "=&r"(prim4), [p2] "+r"(p2), [p1] "+r"(p1), [p0] "+r"(p0),
- [pm1] "+r"(pm1), [sec3] "=&r"(sec3), [sec4] "=&r"(sec4)
- :);
-
- /* transpose p3, p4, p5, p6 */
- __asm__ __volatile__(
- "precrq.qb.ph %[prim1], %[p6], %[p5] \n\t"
- "precr.qb.ph %[prim2], %[p6], %[p5] \n\t"
- "precrq.qb.ph %[prim3], %[p4], %[p3] \n\t"
- "precr.qb.ph %[prim4], %[p4], %[p3] \n\t"
-
- "precrq.qb.ph %[p5], %[prim1], %[prim2] \n\t"
- "precr.qb.ph %[p3], %[prim1], %[prim2] \n\t"
- "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t"
- "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t"
-
- "precrq.ph.w %[p6], %[p5], %[sec3] \n\t"
- "precrq.ph.w %[p4], %[p3], %[sec4] \n\t"
- "append %[p5], %[sec3], 16 \n\t"
- "append %[p3], %[sec4], 16 \n\t"
-
- : [prim1] "=&r"(prim1), [prim2] "=&r"(prim2), [prim3] "=&r"(prim3),
- [prim4] "=&r"(prim4), [p6] "+r"(p6), [p5] "+r"(p5), [p4] "+r"(p4),
- [p3] "+r"(p3), [sec3] "=&r"(sec3), [sec4] "=&r"(sec4)
- :);
-
- /* if (p1 - p4 == 0) and (p2 - p3 == 0)
- * mask will be zero and filtering is not needed
- */
- if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) {
- filter_hev_mask_dspr2(limit_vec, flimit_vec, p1, p2, pm1, p0, p3, p4, p5,
- p6, thresh_vec, &hev, &mask);
-
- /* if mask == 0 do filtering is not needed */
- if (mask) {
- /* filtering */
- filter_dspr2(mask, hev, &p1, &p2, &p3, &p4);
-
- /* unpack processed 4x4 neighborhood
- * don't use transpose on output data
- * because memory isn't aligned
- */
- __asm__ __volatile__(
- "sb %[p4], 1(%[s4]) \n\t"
- "sb %[p3], 0(%[s4]) \n\t"
- "sb %[p2], -1(%[s4]) \n\t"
- "sb %[p1], -2(%[s4]) \n\t"
-
- :
- : [p4] "r"(p4), [p3] "r"(p3), [p2] "r"(p2), [p1] "r"(p1),
- [s4] "r"(s4));
-
- __asm__ __volatile__(
- "srl %[p4], %[p4], 8 \n\t"
- "srl %[p3], %[p3], 8 \n\t"
- "srl %[p2], %[p2], 8 \n\t"
- "srl %[p1], %[p1], 8 \n\t"
-
- : [p4] "+r"(p4), [p3] "+r"(p3), [p2] "+r"(p2), [p1] "+r"(p1)
- :);
-
- __asm__ __volatile__(
- "sb %[p4], 1(%[s3]) \n\t"
- "sb %[p3], 0(%[s3]) \n\t"
- "sb %[p2], -1(%[s3]) \n\t"
- "sb %[p1], -2(%[s3]) \n\t"
-
- : [p1] "+r"(p1)
- : [p4] "r"(p4), [p3] "r"(p3), [p2] "r"(p2), [s3] "r"(s3));
-
- __asm__ __volatile__(
- "srl %[p4], %[p4], 8 \n\t"
- "srl %[p3], %[p3], 8 \n\t"
- "srl %[p2], %[p2], 8 \n\t"
- "srl %[p1], %[p1], 8 \n\t"
-
- : [p4] "+r"(p4), [p3] "+r"(p3), [p2] "+r"(p2), [p1] "+r"(p1)
- :);
-
- __asm__ __volatile__(
- "sb %[p4], 1(%[s2]) \n\t"
- "sb %[p3], 0(%[s2]) \n\t"
- "sb %[p2], -1(%[s2]) \n\t"
- "sb %[p1], -2(%[s2]) \n\t"
-
- :
- : [p4] "r"(p4), [p3] "r"(p3), [p2] "r"(p2), [p1] "r"(p1),
- [s2] "r"(s2));
-
- __asm__ __volatile__(
- "srl %[p4], %[p4], 8 \n\t"
- "srl %[p3], %[p3], 8 \n\t"
- "srl %[p2], %[p2], 8 \n\t"
- "srl %[p1], %[p1], 8 \n\t"
-
- : [p4] "+r"(p4), [p3] "+r"(p3), [p2] "+r"(p2), [p1] "+r"(p1)
- :);
-
- __asm__ __volatile__(
- "sb %[p4], 1(%[s1]) \n\t"
- "sb %[p3], 0(%[s1]) \n\t"
- "sb %[p2], -1(%[s1]) \n\t"
- "sb %[p1], -2(%[s1]) \n\t"
-
- :
- : [p4] "r"(p4), [p3] "r"(p3), [p2] "r"(p2), [p1] "r"(p1),
- [s1] "r"(s1));
- }
- }
- }
-}
-
-void aom_lpf_horizontal_4_dual_dspr2(
- uint8_t *s, int p /* pitch */, const uint8_t *blimit0,
- const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1,
- const uint8_t *limit1, const uint8_t *thresh1) {
- aom_lpf_horizontal_4_dspr2(s, p, blimit0, limit0, thresh0);
- aom_lpf_horizontal_4_dspr2(s + 8, p, blimit1, limit1, thresh1);
-}
-
-void aom_lpf_horizontal_8_dual_dspr2(
- uint8_t *s, int p /* pitch */, const uint8_t *blimit0,
- const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1,
- const uint8_t *limit1, const uint8_t *thresh1) {
- aom_lpf_horizontal_8_dspr2(s, p, blimit0, limit0, thresh0);
- aom_lpf_horizontal_8_dspr2(s + 8, p, blimit1, limit1, thresh1);
-}
-
-void aom_lpf_vertical_4_dual_dspr2(uint8_t *s, int p, const uint8_t *blimit0,
- const uint8_t *limit0,
- const uint8_t *thresh0,
- const uint8_t *blimit1,
- const uint8_t *limit1,
- const uint8_t *thresh1) {
- aom_lpf_vertical_4_dspr2(s, p, blimit0, limit0, thresh0);
- aom_lpf_vertical_4_dspr2(s + 8 * p, p, blimit1, limit1, thresh1);
-}
-
-void aom_lpf_vertical_8_dual_dspr2(uint8_t *s, int p, const uint8_t *blimit0,
- const uint8_t *limit0,
- const uint8_t *thresh0,
- const uint8_t *blimit1,
- const uint8_t *limit1,
- const uint8_t *thresh1) {
- aom_lpf_vertical_8_dspr2(s, p, blimit0, limit0, thresh0);
- aom_lpf_vertical_8_dspr2(s + 8 * p, p, blimit1, limit1, thresh1);
-}
-
-void aom_lpf_vertical_16_dual_dspr2(uint8_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit,
- const uint8_t *thresh) {
- aom_lpf_vertical_16_dspr2(s, p, blimit, limit, thresh);
- aom_lpf_vertical_16_dspr2(s + 8 * p, p, blimit, limit, thresh);
-}
-#endif // #if HAVE_DSPR2
diff --git a/third_party/aom/aom_dsp/mips/loopfilter_filters_dspr2.h b/third_party/aom/aom_dsp/mips/loopfilter_filters_dspr2.h
deleted file mode 100644
index 28f0dc35a..000000000
--- a/third_party/aom/aom_dsp/mips/loopfilter_filters_dspr2.h
+++ /dev/null
@@ -1,736 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_MIPS_LOOPFILTER_FILTERS_DSPR2_H_
-#define AOM_AOM_DSP_MIPS_LOOPFILTER_FILTERS_DSPR2_H_
-
-#include <stdlib.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom/aom_integer.h"
-#include "aom_mem/aom_mem.h"
-#include "aom_ports/mem.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if HAVE_DSPR2
-/* inputs & outputs are quad-byte vectors */
-static INLINE void filter_dspr2(uint32_t mask, uint32_t hev, uint32_t *ps1,
- uint32_t *ps0, uint32_t *qs0, uint32_t *qs1) {
- int32_t aom_filter_l, aom_filter_r;
- int32_t Filter1_l, Filter1_r, Filter2_l, Filter2_r;
- int32_t subr_r, subr_l;
- uint32_t t1, t2, HWM, t3;
- uint32_t hev_l, hev_r, mask_l, mask_r, invhev_l, invhev_r;
- int32_t vps1, vps0, vqs0, vqs1;
- int32_t vps1_l, vps1_r, vps0_l, vps0_r, vqs0_l, vqs0_r, vqs1_l, vqs1_r;
- uint32_t N128;
-
- N128 = 0x80808080;
- t1 = 0x03000300;
- t2 = 0x04000400;
- t3 = 0x01000100;
- HWM = 0xFF00FF00;
-
- vps0 = (*ps0) ^ N128;
- vps1 = (*ps1) ^ N128;
- vqs0 = (*qs0) ^ N128;
- vqs1 = (*qs1) ^ N128;
-
- /* use halfword pairs instead quad-bytes because of accuracy */
- vps0_l = vps0 & HWM;
- vps0_r = vps0 << 8;
- vps0_r = vps0_r & HWM;
-
- vps1_l = vps1 & HWM;
- vps1_r = vps1 << 8;
- vps1_r = vps1_r & HWM;
-
- vqs0_l = vqs0 & HWM;
- vqs0_r = vqs0 << 8;
- vqs0_r = vqs0_r & HWM;
-
- vqs1_l = vqs1 & HWM;
- vqs1_r = vqs1 << 8;
- vqs1_r = vqs1_r & HWM;
-
- mask_l = mask & HWM;
- mask_r = mask << 8;
- mask_r = mask_r & HWM;
-
- hev_l = hev & HWM;
- hev_r = hev << 8;
- hev_r = hev_r & HWM;
-
- __asm__ __volatile__(
- /* aom_filter = aom_signed_char_clamp(ps1 - qs1); */
- "subq_s.ph %[aom_filter_l], %[vps1_l], %[vqs1_l] \n\t"
- "subq_s.ph %[aom_filter_r], %[vps1_r], %[vqs1_r] \n\t"
-
- /* qs0 - ps0 */
- "subq_s.ph %[subr_l], %[vqs0_l], %[vps0_l] \n\t"
- "subq_s.ph %[subr_r], %[vqs0_r], %[vps0_r] \n\t"
-
- /* aom_filter &= hev; */
- "and %[aom_filter_l], %[aom_filter_l], %[hev_l] \n\t"
- "and %[aom_filter_r], %[aom_filter_r], %[hev_r] \n\t"
-
- /* aom_filter = aom_signed_char_clamp(aom_filter + 3 * (qs0 - ps0)); */
- "addq_s.ph %[aom_filter_l], %[aom_filter_l], %[subr_l] \n\t"
- "addq_s.ph %[aom_filter_r], %[aom_filter_r], %[subr_r] \n\t"
- "xor %[invhev_l], %[hev_l], %[HWM] \n\t"
- "addq_s.ph %[aom_filter_l], %[aom_filter_l], %[subr_l] \n\t"
- "addq_s.ph %[aom_filter_r], %[aom_filter_r], %[subr_r] \n\t"
- "xor %[invhev_r], %[hev_r], %[HWM] \n\t"
- "addq_s.ph %[aom_filter_l], %[aom_filter_l], %[subr_l] \n\t"
- "addq_s.ph %[aom_filter_r], %[aom_filter_r], %[subr_r] \n\t"
-
- /* aom_filter &= mask; */
- "and %[aom_filter_l], %[aom_filter_l], %[mask_l] \n\t"
- "and %[aom_filter_r], %[aom_filter_r], %[mask_r] \n\t"
-
- : [aom_filter_l] "=&r"(aom_filter_l), [aom_filter_r] "=&r"(aom_filter_r),
- [subr_l] "=&r"(subr_l), [subr_r] "=&r"(subr_r),
- [invhev_l] "=&r"(invhev_l), [invhev_r] "=&r"(invhev_r)
- : [vps0_l] "r"(vps0_l), [vps0_r] "r"(vps0_r), [vps1_l] "r"(vps1_l),
- [vps1_r] "r"(vps1_r), [vqs0_l] "r"(vqs0_l), [vqs0_r] "r"(vqs0_r),
- [vqs1_l] "r"(vqs1_l), [vqs1_r] "r"(vqs1_r), [mask_l] "r"(mask_l),
- [mask_r] "r"(mask_r), [hev_l] "r"(hev_l), [hev_r] "r"(hev_r),
- [HWM] "r"(HWM));
-
- /* save bottom 3 bits so that we round one side +4 and the other +3 */
- __asm__ __volatile__(
- /* Filter2 = aom_signed_char_clamp(aom_filter + 3) >>= 3; */
- "addq_s.ph %[Filter1_l], %[aom_filter_l], %[t2] \n\t"
- "addq_s.ph %[Filter1_r], %[aom_filter_r], %[t2] \n\t"
-
- /* Filter1 = aom_signed_char_clamp(aom_filter + 4) >>= 3; */
- "addq_s.ph %[Filter2_l], %[aom_filter_l], %[t1] \n\t"
- "addq_s.ph %[Filter2_r], %[aom_filter_r], %[t1] \n\t"
- "shra.ph %[Filter1_r], %[Filter1_r], 3 \n\t"
- "shra.ph %[Filter1_l], %[Filter1_l], 3 \n\t"
-
- "shra.ph %[Filter2_l], %[Filter2_l], 3 \n\t"
- "shra.ph %[Filter2_r], %[Filter2_r], 3 \n\t"
-
- "and %[Filter1_l], %[Filter1_l], %[HWM] \n\t"
- "and %[Filter1_r], %[Filter1_r], %[HWM] \n\t"
-
- /* vps0 = aom_signed_char_clamp(ps0 + Filter2); */
- "addq_s.ph %[vps0_l], %[vps0_l], %[Filter2_l] \n\t"
- "addq_s.ph %[vps0_r], %[vps0_r], %[Filter2_r] \n\t"
-
- /* vqs0 = aom_signed_char_clamp(qs0 - Filter1); */
- "subq_s.ph %[vqs0_l], %[vqs0_l], %[Filter1_l] \n\t"
- "subq_s.ph %[vqs0_r], %[vqs0_r], %[Filter1_r] \n\t"
-
- : [Filter1_l] "=&r"(Filter1_l), [Filter1_r] "=&r"(Filter1_r),
- [Filter2_l] "=&r"(Filter2_l), [Filter2_r] "=&r"(Filter2_r),
- [vps0_l] "+r"(vps0_l), [vps0_r] "+r"(vps0_r), [vqs0_l] "+r"(vqs0_l),
- [vqs0_r] "+r"(vqs0_r)
- : [t1] "r"(t1), [t2] "r"(t2), [HWM] "r"(HWM),
- [aom_filter_l] "r"(aom_filter_l), [aom_filter_r] "r"(aom_filter_r));
-
- __asm__ __volatile__(
- /* (aom_filter += 1) >>= 1 */
- "addqh.ph %[Filter1_l], %[Filter1_l], %[t3] \n\t"
- "addqh.ph %[Filter1_r], %[Filter1_r], %[t3] \n\t"
-
- /* aom_filter &= ~hev; */
- "and %[Filter1_l], %[Filter1_l], %[invhev_l] \n\t"
- "and %[Filter1_r], %[Filter1_r], %[invhev_r] \n\t"
-
- /* vps1 = aom_signed_char_clamp(ps1 + aom_filter); */
- "addq_s.ph %[vps1_l], %[vps1_l], %[Filter1_l] \n\t"
- "addq_s.ph %[vps1_r], %[vps1_r], %[Filter1_r] \n\t"
-
- /* vqs1 = aom_signed_char_clamp(qs1 - aom_filter); */
- "subq_s.ph %[vqs1_l], %[vqs1_l], %[Filter1_l] \n\t"
- "subq_s.ph %[vqs1_r], %[vqs1_r], %[Filter1_r] \n\t"
-
- : [Filter1_l] "+r"(Filter1_l), [Filter1_r] "+r"(Filter1_r),
- [vps1_l] "+r"(vps1_l), [vps1_r] "+r"(vps1_r), [vqs1_l] "+r"(vqs1_l),
- [vqs1_r] "+r"(vqs1_r)
- : [t3] "r"(t3), [invhev_l] "r"(invhev_l), [invhev_r] "r"(invhev_r));
-
- /* Create quad-bytes from halfword pairs */
- vqs0_l = vqs0_l & HWM;
- vqs1_l = vqs1_l & HWM;
- vps0_l = vps0_l & HWM;
- vps1_l = vps1_l & HWM;
-
- __asm__ __volatile__(
- "shrl.ph %[vqs0_r], %[vqs0_r], 8 \n\t"
- "shrl.ph %[vps0_r], %[vps0_r], 8 \n\t"
- "shrl.ph %[vqs1_r], %[vqs1_r], 8 \n\t"
- "shrl.ph %[vps1_r], %[vps1_r], 8 \n\t"
-
- : [vps1_r] "+r"(vps1_r), [vqs1_r] "+r"(vqs1_r), [vps0_r] "+r"(vps0_r),
- [vqs0_r] "+r"(vqs0_r)
- :);
-
- vqs0 = vqs0_l | vqs0_r;
- vqs1 = vqs1_l | vqs1_r;
- vps0 = vps0_l | vps0_r;
- vps1 = vps1_l | vps1_r;
-
- *ps0 = vps0 ^ N128;
- *ps1 = vps1 ^ N128;
- *qs0 = vqs0 ^ N128;
- *qs1 = vqs1 ^ N128;
-}
-
-static INLINE void filter1_dspr2(uint32_t mask, uint32_t hev, uint32_t ps1,
- uint32_t ps0, uint32_t qs0, uint32_t qs1,
- uint32_t *p1_f0, uint32_t *p0_f0,
- uint32_t *q0_f0, uint32_t *q1_f0) {
- int32_t aom_filter_l, aom_filter_r;
- int32_t Filter1_l, Filter1_r, Filter2_l, Filter2_r;
- int32_t subr_r, subr_l;
- uint32_t t1, t2, HWM, t3;
- uint32_t hev_l, hev_r, mask_l, mask_r, invhev_l, invhev_r;
- int32_t vps1, vps0, vqs0, vqs1;
- int32_t vps1_l, vps1_r, vps0_l, vps0_r, vqs0_l, vqs0_r, vqs1_l, vqs1_r;
- uint32_t N128;
-
- N128 = 0x80808080;
- t1 = 0x03000300;
- t2 = 0x04000400;
- t3 = 0x01000100;
- HWM = 0xFF00FF00;
-
- vps0 = (ps0) ^ N128;
- vps1 = (ps1) ^ N128;
- vqs0 = (qs0) ^ N128;
- vqs1 = (qs1) ^ N128;
-
- /* use halfword pairs instead quad-bytes because of accuracy */
- vps0_l = vps0 & HWM;
- vps0_r = vps0 << 8;
- vps0_r = vps0_r & HWM;
-
- vps1_l = vps1 & HWM;
- vps1_r = vps1 << 8;
- vps1_r = vps1_r & HWM;
-
- vqs0_l = vqs0 & HWM;
- vqs0_r = vqs0 << 8;
- vqs0_r = vqs0_r & HWM;
-
- vqs1_l = vqs1 & HWM;
- vqs1_r = vqs1 << 8;
- vqs1_r = vqs1_r & HWM;
-
- mask_l = mask & HWM;
- mask_r = mask << 8;
- mask_r = mask_r & HWM;
-
- hev_l = hev & HWM;
- hev_r = hev << 8;
- hev_r = hev_r & HWM;
-
- __asm__ __volatile__(
- /* aom_filter = aom_signed_char_clamp(ps1 - qs1); */
- "subq_s.ph %[aom_filter_l], %[vps1_l], %[vqs1_l] \n\t"
- "subq_s.ph %[aom_filter_r], %[vps1_r], %[vqs1_r] \n\t"
-
- /* qs0 - ps0 */
- "subq_s.ph %[subr_l], %[vqs0_l], %[vps0_l] \n\t"
- "subq_s.ph %[subr_r], %[vqs0_r], %[vps0_r] \n\t"
-
- /* aom_filter &= hev; */
- "and %[aom_filter_l], %[aom_filter_l], %[hev_l] \n\t"
- "and %[aom_filter_r], %[aom_filter_r], %[hev_r] \n\t"
-
- /* aom_filter = aom_signed_char_clamp(aom_filter + 3 * (qs0 - ps0)); */
- "addq_s.ph %[aom_filter_l], %[aom_filter_l], %[subr_l] \n\t"
- "addq_s.ph %[aom_filter_r], %[aom_filter_r], %[subr_r] \n\t"
- "xor %[invhev_l], %[hev_l], %[HWM] \n\t"
- "addq_s.ph %[aom_filter_l], %[aom_filter_l], %[subr_l] \n\t"
- "addq_s.ph %[aom_filter_r], %[aom_filter_r], %[subr_r] \n\t"
- "xor %[invhev_r], %[hev_r], %[HWM] \n\t"
- "addq_s.ph %[aom_filter_l], %[aom_filter_l], %[subr_l] \n\t"
- "addq_s.ph %[aom_filter_r], %[aom_filter_r], %[subr_r] \n\t"
-
- /* aom_filter &= mask; */
- "and %[aom_filter_l], %[aom_filter_l], %[mask_l] \n\t"
- "and %[aom_filter_r], %[aom_filter_r], %[mask_r] \n\t"
-
- : [aom_filter_l] "=&r"(aom_filter_l), [aom_filter_r] "=&r"(aom_filter_r),
- [subr_l] "=&r"(subr_l), [subr_r] "=&r"(subr_r),
- [invhev_l] "=&r"(invhev_l), [invhev_r] "=&r"(invhev_r)
- : [vps0_l] "r"(vps0_l), [vps0_r] "r"(vps0_r), [vps1_l] "r"(vps1_l),
- [vps1_r] "r"(vps1_r), [vqs0_l] "r"(vqs0_l), [vqs0_r] "r"(vqs0_r),
- [vqs1_l] "r"(vqs1_l), [vqs1_r] "r"(vqs1_r), [mask_l] "r"(mask_l),
- [mask_r] "r"(mask_r), [hev_l] "r"(hev_l), [hev_r] "r"(hev_r),
- [HWM] "r"(HWM));
-
- /* save bottom 3 bits so that we round one side +4 and the other +3 */
- __asm__ __volatile__(
- /* Filter2 = aom_signed_char_clamp(aom_filter + 3) >>= 3; */
- "addq_s.ph %[Filter1_l], %[aom_filter_l], %[t2] \n\t"
- "addq_s.ph %[Filter1_r], %[aom_filter_r], %[t2] \n\t"
-
- /* Filter1 = aom_signed_char_clamp(aom_filter + 4) >>= 3; */
- "addq_s.ph %[Filter2_l], %[aom_filter_l], %[t1] \n\t"
- "addq_s.ph %[Filter2_r], %[aom_filter_r], %[t1] \n\t"
- "shra.ph %[Filter1_r], %[Filter1_r], 3 \n\t"
- "shra.ph %[Filter1_l], %[Filter1_l], 3 \n\t"
-
- "shra.ph %[Filter2_l], %[Filter2_l], 3 \n\t"
- "shra.ph %[Filter2_r], %[Filter2_r], 3 \n\t"
-
- "and %[Filter1_l], %[Filter1_l], %[HWM] \n\t"
- "and %[Filter1_r], %[Filter1_r], %[HWM] \n\t"
-
- /* vps0 = aom_signed_char_clamp(ps0 + Filter2); */
- "addq_s.ph %[vps0_l], %[vps0_l], %[Filter2_l] \n\t"
- "addq_s.ph %[vps0_r], %[vps0_r], %[Filter2_r] \n\t"
-
- /* vqs0 = aom_signed_char_clamp(qs0 - Filter1); */
- "subq_s.ph %[vqs0_l], %[vqs0_l], %[Filter1_l] \n\t"
- "subq_s.ph %[vqs0_r], %[vqs0_r], %[Filter1_r] \n\t"
-
- : [Filter1_l] "=&r"(Filter1_l), [Filter1_r] "=&r"(Filter1_r),
- [Filter2_l] "=&r"(Filter2_l), [Filter2_r] "=&r"(Filter2_r),
- [vps0_l] "+r"(vps0_l), [vps0_r] "+r"(vps0_r), [vqs0_l] "+r"(vqs0_l),
- [vqs0_r] "+r"(vqs0_r)
- : [t1] "r"(t1), [t2] "r"(t2), [HWM] "r"(HWM),
- [aom_filter_l] "r"(aom_filter_l), [aom_filter_r] "r"(aom_filter_r));
-
- __asm__ __volatile__(
- /* (aom_filter += 1) >>= 1 */
- "addqh.ph %[Filter1_l], %[Filter1_l], %[t3] \n\t"
- "addqh.ph %[Filter1_r], %[Filter1_r], %[t3] \n\t"
-
- /* aom_filter &= ~hev; */
- "and %[Filter1_l], %[Filter1_l], %[invhev_l] \n\t"
- "and %[Filter1_r], %[Filter1_r], %[invhev_r] \n\t"
-
- /* vps1 = aom_signed_char_clamp(ps1 + aom_filter); */
- "addq_s.ph %[vps1_l], %[vps1_l], %[Filter1_l] \n\t"
- "addq_s.ph %[vps1_r], %[vps1_r], %[Filter1_r] \n\t"
-
- /* vqs1 = aom_signed_char_clamp(qs1 - aom_filter); */
- "subq_s.ph %[vqs1_l], %[vqs1_l], %[Filter1_l] \n\t"
- "subq_s.ph %[vqs1_r], %[vqs1_r], %[Filter1_r] \n\t"
-
- : [Filter1_l] "+r"(Filter1_l), [Filter1_r] "+r"(Filter1_r),
- [vps1_l] "+r"(vps1_l), [vps1_r] "+r"(vps1_r), [vqs1_l] "+r"(vqs1_l),
- [vqs1_r] "+r"(vqs1_r)
- : [t3] "r"(t3), [invhev_l] "r"(invhev_l), [invhev_r] "r"(invhev_r));
-
- /* Create quad-bytes from halfword pairs */
- vqs0_l = vqs0_l & HWM;
- vqs1_l = vqs1_l & HWM;
- vps0_l = vps0_l & HWM;
- vps1_l = vps1_l & HWM;
-
- __asm__ __volatile__(
- "shrl.ph %[vqs0_r], %[vqs0_r], 8 \n\t"
- "shrl.ph %[vps0_r], %[vps0_r], 8 \n\t"
- "shrl.ph %[vqs1_r], %[vqs1_r], 8 \n\t"
- "shrl.ph %[vps1_r], %[vps1_r], 8 \n\t"
-
- : [vps1_r] "+r"(vps1_r), [vqs1_r] "+r"(vqs1_r), [vps0_r] "+r"(vps0_r),
- [vqs0_r] "+r"(vqs0_r)
- :);
-
- vqs0 = vqs0_l | vqs0_r;
- vqs1 = vqs1_l | vqs1_r;
- vps0 = vps0_l | vps0_r;
- vps1 = vps1_l | vps1_r;
-
- *p0_f0 = vps0 ^ N128;
- *p1_f0 = vps1 ^ N128;
- *q0_f0 = vqs0 ^ N128;
- *q1_f0 = vqs1 ^ N128;
-}
-
-static INLINE void mbfilter_dspr2(uint32_t *op3, uint32_t *op2, uint32_t *op1,
- uint32_t *op0, uint32_t *oq0, uint32_t *oq1,
- uint32_t *oq2, uint32_t *oq3) {
- /* use a 7 tap filter [1, 1, 1, 2, 1, 1, 1] for flat line */
- const uint32_t p3 = *op3, p2 = *op2, p1 = *op1, p0 = *op0;
- const uint32_t q0 = *oq0, q1 = *oq1, q2 = *oq2, q3 = *oq3;
- uint32_t res_op2, res_op1, res_op0;
- uint32_t res_oq0, res_oq1, res_oq2;
- uint32_t tmp;
- uint32_t add_p210_q012;
- uint32_t u32Four = 0x00040004;
-
- /* *op2 = ROUND_POWER_OF_TWO(p3 + p3 + p3 + p2 + p2 + p1 + p0 + q0, 3) 1 */
- /* *op1 = ROUND_POWER_OF_TWO(p3 + p3 + p2 + p1 + p1 + p0 + q0 + q1, 3) 2 */
- /* *op0 = ROUND_POWER_OF_TWO(p3 + p2 + p1 + p0 + p0 + q0 + q1 + q2, 3) 3 */
- /* *oq0 = ROUND_POWER_OF_TWO(p2 + p1 + p0 + q0 + q0 + q1 + q2 + q3, 3) 4 */
- /* *oq1 = ROUND_POWER_OF_TWO(p1 + p0 + q0 + q1 + q1 + q2 + q3 + q3, 3) 5 */
- /* *oq2 = ROUND_POWER_OF_TWO(p0 + q0 + q1 + q2 + q2 + q3 + q3 + q3, 3) 6 */
-
- __asm__ __volatile__(
- "addu.ph %[add_p210_q012], %[p2], %[p1] \n\t"
- "addu.ph %[add_p210_q012], %[add_p210_q012], %[p0] \n\t"
- "addu.ph %[add_p210_q012], %[add_p210_q012], %[q0] \n\t"
- "addu.ph %[add_p210_q012], %[add_p210_q012], %[q1] \n\t"
- "addu.ph %[add_p210_q012], %[add_p210_q012], %[q2] \n\t"
- "addu.ph %[add_p210_q012], %[add_p210_q012], %[u32Four] \n\t"
-
- "shll.ph %[tmp], %[p3], 1 \n\t"
- "addu.ph %[res_op2], %[tmp], %[p3] \n\t"
- "addu.ph %[res_op1], %[p3], %[p3] \n\t"
- "addu.ph %[res_op2], %[res_op2], %[p2] \n\t"
- "addu.ph %[res_op1], %[res_op1], %[p1] \n\t"
- "addu.ph %[res_op2], %[res_op2], %[add_p210_q012] \n\t"
- "addu.ph %[res_op1], %[res_op1], %[add_p210_q012] \n\t"
- "subu.ph %[res_op2], %[res_op2], %[q1] \n\t"
- "subu.ph %[res_op1], %[res_op1], %[q2] \n\t"
- "subu.ph %[res_op2], %[res_op2], %[q2] \n\t"
- "shrl.ph %[res_op1], %[res_op1], 3 \n\t"
- "shrl.ph %[res_op2], %[res_op2], 3 \n\t"
- "addu.ph %[res_op0], %[p3], %[p0] \n\t"
- "addu.ph %[res_oq0], %[q0], %[q3] \n\t"
- "addu.ph %[res_op0], %[res_op0], %[add_p210_q012] \n\t"
- "addu.ph %[res_oq0], %[res_oq0], %[add_p210_q012] \n\t"
- "addu.ph %[res_oq1], %[q3], %[q3] \n\t"
- "shll.ph %[tmp], %[q3], 1 \n\t"
- "addu.ph %[res_oq1], %[res_oq1], %[q1] \n\t"
- "addu.ph %[res_oq2], %[tmp], %[q3] \n\t"
- "addu.ph %[res_oq1], %[res_oq1], %[add_p210_q012] \n\t"
- "addu.ph %[res_oq2], %[res_oq2], %[add_p210_q012] \n\t"
- "subu.ph %[res_oq1], %[res_oq1], %[p2] \n\t"
- "addu.ph %[res_oq2], %[res_oq2], %[q2] \n\t"
- "shrl.ph %[res_oq1], %[res_oq1], 3 \n\t"
- "subu.ph %[res_oq2], %[res_oq2], %[p2] \n\t"
- "shrl.ph %[res_oq0], %[res_oq0], 3 \n\t"
- "subu.ph %[res_oq2], %[res_oq2], %[p1] \n\t"
- "shrl.ph %[res_op0], %[res_op0], 3 \n\t"
- "shrl.ph %[res_oq2], %[res_oq2], 3 \n\t"
-
- : [add_p210_q012] "=&r"(add_p210_q012), [tmp] "=&r"(tmp),
- [res_op2] "=&r"(res_op2), [res_op1] "=&r"(res_op1),
- [res_op0] "=&r"(res_op0), [res_oq0] "=&r"(res_oq0),
- [res_oq1] "=&r"(res_oq1), [res_oq2] "=&r"(res_oq2)
- : [p0] "r"(p0), [q0] "r"(q0), [p1] "r"(p1), [q1] "r"(q1), [p2] "r"(p2),
- [q2] "r"(q2), [p3] "r"(p3), [q3] "r"(q3), [u32Four] "r"(u32Four));
-
- *op2 = res_op2;
- *op1 = res_op1;
- *op0 = res_op0;
- *oq0 = res_oq0;
- *oq1 = res_oq1;
- *oq2 = res_oq2;
-}
-
-static INLINE void mbfilter1_dspr2(uint32_t p3, uint32_t p2, uint32_t p1,
- uint32_t p0, uint32_t q0, uint32_t q1,
- uint32_t q2, uint32_t q3, uint32_t *op2_f1,
- uint32_t *op1_f1, uint32_t *op0_f1,
- uint32_t *oq0_f1, uint32_t *oq1_f1,
- uint32_t *oq2_f1) {
- /* use a 7 tap filter [1, 1, 1, 2, 1, 1, 1] for flat line */
- uint32_t res_op2, res_op1, res_op0;
- uint32_t res_oq0, res_oq1, res_oq2;
- uint32_t tmp;
- uint32_t add_p210_q012;
- uint32_t u32Four = 0x00040004;
-
- /* *op2 = ROUND_POWER_OF_TWO(p3 + p3 + p3 + p2 + p2 + p1 + p0 + q0, 3) 1 */
- /* *op1 = ROUND_POWER_OF_TWO(p3 + p3 + p2 + p1 + p1 + p0 + q0 + q1, 3) 2 */
- /* *op0 = ROUND_POWER_OF_TWO(p3 + p2 + p1 + p0 + p0 + q0 + q1 + q2, 3) 3 */
- /* *oq0 = ROUND_POWER_OF_TWO(p2 + p1 + p0 + q0 + q0 + q1 + q2 + q3, 3) 4 */
- /* *oq1 = ROUND_POWER_OF_TWO(p1 + p0 + q0 + q1 + q1 + q2 + q3 + q3, 3) 5 */
- /* *oq2 = ROUND_POWER_OF_TWO(p0 + q0 + q1 + q2 + q2 + q3 + q3 + q3, 3) 6 */
-
- __asm__ __volatile__(
- "addu.ph %[add_p210_q012], %[p2], %[p1] \n\t"
- "addu.ph %[add_p210_q012], %[add_p210_q012], %[p0] \n\t"
- "addu.ph %[add_p210_q012], %[add_p210_q012], %[q0] \n\t"
- "addu.ph %[add_p210_q012], %[add_p210_q012], %[q1] \n\t"
- "addu.ph %[add_p210_q012], %[add_p210_q012], %[q2] \n\t"
- "addu.ph %[add_p210_q012], %[add_p210_q012], %[u32Four] \n\t"
-
- "shll.ph %[tmp], %[p3], 1 \n\t"
- "addu.ph %[res_op2], %[tmp], %[p3] \n\t"
- "addu.ph %[res_op1], %[p3], %[p3] \n\t"
- "addu.ph %[res_op2], %[res_op2], %[p2] \n\t"
- "addu.ph %[res_op1], %[res_op1], %[p1] \n\t"
- "addu.ph %[res_op2], %[res_op2], %[add_p210_q012] \n\t"
- "addu.ph %[res_op1], %[res_op1], %[add_p210_q012] \n\t"
- "subu.ph %[res_op2], %[res_op2], %[q1] \n\t"
- "subu.ph %[res_op1], %[res_op1], %[q2] \n\t"
- "subu.ph %[res_op2], %[res_op2], %[q2] \n\t"
- "shrl.ph %[res_op1], %[res_op1], 3 \n\t"
- "shrl.ph %[res_op2], %[res_op2], 3 \n\t"
- "addu.ph %[res_op0], %[p3], %[p0] \n\t"
- "addu.ph %[res_oq0], %[q0], %[q3] \n\t"
- "addu.ph %[res_op0], %[res_op0], %[add_p210_q012] \n\t"
- "addu.ph %[res_oq0], %[res_oq0], %[add_p210_q012] \n\t"
- "addu.ph %[res_oq1], %[q3], %[q3] \n\t"
- "shll.ph %[tmp], %[q3], 1 \n\t"
- "addu.ph %[res_oq1], %[res_oq1], %[q1] \n\t"
- "addu.ph %[res_oq2], %[tmp], %[q3] \n\t"
- "addu.ph %[res_oq1], %[res_oq1], %[add_p210_q012] \n\t"
- "addu.ph %[res_oq2], %[res_oq2], %[add_p210_q012] \n\t"
- "subu.ph %[res_oq1], %[res_oq1], %[p2] \n\t"
- "addu.ph %[res_oq2], %[res_oq2], %[q2] \n\t"
- "shrl.ph %[res_oq1], %[res_oq1], 3 \n\t"
- "subu.ph %[res_oq2], %[res_oq2], %[p2] \n\t"
- "shrl.ph %[res_oq0], %[res_oq0], 3 \n\t"
- "subu.ph %[res_oq2], %[res_oq2], %[p1] \n\t"
- "shrl.ph %[res_op0], %[res_op0], 3 \n\t"
- "shrl.ph %[res_oq2], %[res_oq2], 3 \n\t"
-
- : [add_p210_q012] "=&r"(add_p210_q012), [tmp] "=&r"(tmp),
- [res_op2] "=&r"(res_op2), [res_op1] "=&r"(res_op1),
- [res_op0] "=&r"(res_op0), [res_oq0] "=&r"(res_oq0),
- [res_oq1] "=&r"(res_oq1), [res_oq2] "=&r"(res_oq2)
- : [p0] "r"(p0), [q0] "r"(q0), [p1] "r"(p1), [q1] "r"(q1), [p2] "r"(p2),
- [q2] "r"(q2), [p3] "r"(p3), [q3] "r"(q3), [u32Four] "r"(u32Four));
-
- *op2_f1 = res_op2;
- *op1_f1 = res_op1;
- *op0_f1 = res_op0;
- *oq0_f1 = res_oq0;
- *oq1_f1 = res_oq1;
- *oq2_f1 = res_oq2;
-}
-
-static INLINE void wide_mbfilter_dspr2(
- uint32_t *op7, uint32_t *op6, uint32_t *op5, uint32_t *op4, uint32_t *op3,
- uint32_t *op2, uint32_t *op1, uint32_t *op0, uint32_t *oq0, uint32_t *oq1,
- uint32_t *oq2, uint32_t *oq3, uint32_t *oq4, uint32_t *oq5, uint32_t *oq6,
- uint32_t *oq7) {
- const uint32_t p7 = *op7, p6 = *op6, p5 = *op5, p4 = *op4;
- const uint32_t p3 = *op3, p2 = *op2, p1 = *op1, p0 = *op0;
- const uint32_t q0 = *oq0, q1 = *oq1, q2 = *oq2, q3 = *oq3;
- const uint32_t q4 = *oq4, q5 = *oq5, q6 = *oq6, q7 = *oq7;
- uint32_t res_op6, res_op5, res_op4, res_op3, res_op2, res_op1, res_op0;
- uint32_t res_oq0, res_oq1, res_oq2, res_oq3, res_oq4, res_oq5, res_oq6;
- uint32_t tmp;
- uint32_t add_p6toq6;
- uint32_t u32Eight = 0x00080008;
-
- __asm__ __volatile__(
- /* addition of p6,p5,p4,p3,p2,p1,p0,q0,q1,q2,q3,q4,q5,q6
- which is used most of the time */
- "addu.ph %[add_p6toq6], %[p6], %[p5] \n\t"
- "addu.ph %[add_p6toq6], %[add_p6toq6], %[p4] \n\t"
- "addu.ph %[add_p6toq6], %[add_p6toq6], %[p3] \n\t"
- "addu.ph %[add_p6toq6], %[add_p6toq6], %[p2] \n\t"
- "addu.ph %[add_p6toq6], %[add_p6toq6], %[p1] \n\t"
- "addu.ph %[add_p6toq6], %[add_p6toq6], %[p0] \n\t"
- "addu.ph %[add_p6toq6], %[add_p6toq6], %[q0] \n\t"
- "addu.ph %[add_p6toq6], %[add_p6toq6], %[q1] \n\t"
- "addu.ph %[add_p6toq6], %[add_p6toq6], %[q2] \n\t"
- "addu.ph %[add_p6toq6], %[add_p6toq6], %[q3] \n\t"
- "addu.ph %[add_p6toq6], %[add_p6toq6], %[q4] \n\t"
- "addu.ph %[add_p6toq6], %[add_p6toq6], %[q5] \n\t"
- "addu.ph %[add_p6toq6], %[add_p6toq6], %[q6] \n\t"
- "addu.ph %[add_p6toq6], %[add_p6toq6], %[u32Eight] \n\t"
-
- : [add_p6toq6] "=&r"(add_p6toq6)
- : [p6] "r"(p6), [p5] "r"(p5), [p4] "r"(p4), [p3] "r"(p3), [p2] "r"(p2),
- [p1] "r"(p1), [p0] "r"(p0), [q0] "r"(q0), [q1] "r"(q1), [q2] "r"(q2),
- [q3] "r"(q3), [q4] "r"(q4), [q5] "r"(q5), [q6] "r"(q6),
- [u32Eight] "r"(u32Eight));
-
- __asm__ __volatile__(
- /* *op6 = ROUND_POWER_OF_TWO(p7 * 7 + p6 * 2 + p5 + p4 +
- p3 + p2 + p1 + p0 + q0, 4) */
- "shll.ph %[tmp], %[p7], 3 \n\t"
- "subu.ph %[res_op6], %[tmp], %[p7] \n\t"
- "addu.ph %[res_op6], %[res_op6], %[p6] \n\t"
- "addu.ph %[res_op6], %[res_op6], %[add_p6toq6] \n\t"
- "subu.ph %[res_op6], %[res_op6], %[q1] \n\t"
- "subu.ph %[res_op6], %[res_op6], %[q2] \n\t"
- "subu.ph %[res_op6], %[res_op6], %[q3] \n\t"
- "subu.ph %[res_op6], %[res_op6], %[q4] \n\t"
- "subu.ph %[res_op6], %[res_op6], %[q5] \n\t"
- "subu.ph %[res_op6], %[res_op6], %[q6] \n\t"
- "shrl.ph %[res_op6], %[res_op6], 4 \n\t"
-
- /* *op5 = ROUND_POWER_OF_TWO(p7 * 6 + p6 + p5 * 2 + p4 + p3 +
- p2 + p1 + p0 + q0 + q1, 4) */
- "shll.ph %[tmp], %[p7], 2 \n\t"
- "addu.ph %[res_op5], %[tmp], %[p7] \n\t"
- "addu.ph %[res_op5], %[res_op5], %[p7] \n\t"
- "addu.ph %[res_op5], %[res_op5], %[p5] \n\t"
- "addu.ph %[res_op5], %[res_op5], %[add_p6toq6] \n\t"
- "subu.ph %[res_op5], %[res_op5], %[q2] \n\t"
- "subu.ph %[res_op5], %[res_op5], %[q3] \n\t"
- "subu.ph %[res_op5], %[res_op5], %[q4] \n\t"
- "subu.ph %[res_op5], %[res_op5], %[q5] \n\t"
- "subu.ph %[res_op5], %[res_op5], %[q6] \n\t"
- "shrl.ph %[res_op5], %[res_op5], 4 \n\t"
-
- /* *op4 = ROUND_POWER_OF_TWO(p7 * 5 + p6 + p5 + p4 * 2 + p3 + p2 +
- p1 + p0 + q0 + q1 + q2, 4) */
- "shll.ph %[tmp], %[p7], 2 \n\t"
- "addu.ph %[res_op4], %[tmp], %[p7] \n\t"
- "addu.ph %[res_op4], %[res_op4], %[p4] \n\t"
- "addu.ph %[res_op4], %[res_op4], %[add_p6toq6] \n\t"
- "subu.ph %[res_op4], %[res_op4], %[q3] \n\t"
- "subu.ph %[res_op4], %[res_op4], %[q4] \n\t"
- "subu.ph %[res_op4], %[res_op4], %[q5] \n\t"
- "subu.ph %[res_op4], %[res_op4], %[q6] \n\t"
- "shrl.ph %[res_op4], %[res_op4], 4 \n\t"
-
- /* *op3 = ROUND_POWER_OF_TWO(p7 * 4 + p6 + p5 + p4 + p3 * 2 + p2 +
- p1 + p0 + q0 + q1 + q2 + q3, 4) */
- "shll.ph %[tmp], %[p7], 2 \n\t"
- "addu.ph %[res_op3], %[tmp], %[p3] \n\t"
- "addu.ph %[res_op3], %[res_op3], %[add_p6toq6] \n\t"
- "subu.ph %[res_op3], %[res_op3], %[q4] \n\t"
- "subu.ph %[res_op3], %[res_op3], %[q5] \n\t"
- "subu.ph %[res_op3], %[res_op3], %[q6] \n\t"
- "shrl.ph %[res_op3], %[res_op3], 4 \n\t"
-
- /* *op2 = ROUND_POWER_OF_TWO(p7 * 3 + p6 + p5 + p4 + p3 + p2 * 2 + p1 +
- p0 + q0 + q1 + q2 + q3 + q4, 4) */
- "shll.ph %[tmp], %[p7], 1 \n\t"
- "addu.ph %[res_op2], %[tmp], %[p7] \n\t"
- "addu.ph %[res_op2], %[res_op2], %[p2] \n\t"
- "addu.ph %[res_op2], %[res_op2], %[add_p6toq6] \n\t"
- "subu.ph %[res_op2], %[res_op2], %[q5] \n\t"
- "subu.ph %[res_op2], %[res_op2], %[q6] \n\t"
- "shrl.ph %[res_op2], %[res_op2], 4 \n\t"
-
- /* *op1 = ROUND_POWER_OF_TWO(p7 * 2 + p6 + p5 + p4 + p3 + p2 + p1 * 2 +
- p0 + q0 + q1 + q2 + q3 + q4 + q5, 4); */
- "shll.ph %[tmp], %[p7], 1 \n\t"
- "addu.ph %[res_op1], %[tmp], %[p1] \n\t"
- "addu.ph %[res_op1], %[res_op1], %[add_p6toq6] \n\t"
- "subu.ph %[res_op1], %[res_op1], %[q6] \n\t"
- "shrl.ph %[res_op1], %[res_op1], 4 \n\t"
-
- /* *op0 = ROUND_POWER_OF_TWO(p7 + p6 + p5 + p4 + p3 + p2 + p1 + p0 * 2 +
- q0 + q1 + q2 + q3 + q4 + q5 + q6, 4) */
- "addu.ph %[res_op0], %[p7], %[p0] \n\t"
- "addu.ph %[res_op0], %[res_op0], %[add_p6toq6] \n\t"
- "shrl.ph %[res_op0], %[res_op0], 4 \n\t"
-
- : [res_op6] "=&r"(res_op6), [res_op5] "=&r"(res_op5),
- [res_op4] "=&r"(res_op4), [res_op3] "=&r"(res_op3),
- [res_op2] "=&r"(res_op2), [res_op1] "=&r"(res_op1),
- [res_op0] "=&r"(res_op0), [tmp] "=&r"(tmp)
- : [p7] "r"(p7), [p6] "r"(p6), [p5] "r"(p5), [p4] "r"(p4), [p3] "r"(p3),
- [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0), [q2] "r"(q2), [q1] "r"(q1),
- [q3] "r"(q3), [q4] "r"(q4), [q5] "r"(q5), [q6] "r"(q6),
- [add_p6toq6] "r"(add_p6toq6));
-
- *op6 = res_op6;
- *op5 = res_op5;
- *op4 = res_op4;
- *op3 = res_op3;
- *op2 = res_op2;
- *op1 = res_op1;
- *op0 = res_op0;
-
- __asm__ __volatile__(
- /* *oq0 = ROUND_POWER_OF_TWO(p6 + p5 + p4 + p3 + p2 + p1 + p0 + q0 * 2 +
- q1 + q2 + q3 + q4 + q5 + q6 + q7, 4); */
- "addu.ph %[res_oq0], %[q7], %[q0] \n\t"
- "addu.ph %[res_oq0], %[res_oq0], %[add_p6toq6] \n\t"
- "shrl.ph %[res_oq0], %[res_oq0], 4 \n\t"
-
- /* *oq1 = ROUND_POWER_OF_TWO(p5 + p4 + p3 + p2 + p1 + p0 + q0 + q1 * 2 +
- q2 + q3 + q4 + q5 + q6 + q7 * 2, 4) */
- "shll.ph %[tmp], %[q7], 1 \n\t"
- "addu.ph %[res_oq1], %[tmp], %[q1] \n\t"
- "addu.ph %[res_oq1], %[res_oq1], %[add_p6toq6] \n\t"
- "subu.ph %[res_oq1], %[res_oq1], %[p6] \n\t"
- "shrl.ph %[res_oq1], %[res_oq1], 4 \n\t"
-
- /* *oq2 = ROUND_POWER_OF_TWO(p4 + p3 + p2 + p1 + p0 + q0 + q1 + q2 * 2 +
- q3 + q4 + q5 + q6 + q7 * 3, 4) */
- "shll.ph %[tmp], %[q7], 1 \n\t"
- "addu.ph %[res_oq2], %[tmp], %[q7] \n\t"
- "addu.ph %[res_oq2], %[res_oq2], %[q2] \n\t"
- "addu.ph %[res_oq2], %[res_oq2], %[add_p6toq6] \n\t"
- "subu.ph %[res_oq2], %[res_oq2], %[p5] \n\t"
- "subu.ph %[res_oq2], %[res_oq2], %[p6] \n\t"
- "shrl.ph %[res_oq2], %[res_oq2], 4 \n\t"
-
- /* *oq3 = ROUND_POWER_OF_TWO(p3 + p2 + p1 + p0 + q0 + q1 + q2 +
- q3 * 2 + q4 + q5 + q6 + q7 * 4, 4) */
- "shll.ph %[tmp], %[q7], 2 \n\t"
- "addu.ph %[res_oq3], %[tmp], %[q3] \n\t"
- "addu.ph %[res_oq3], %[res_oq3], %[add_p6toq6] \n\t"
- "subu.ph %[res_oq3], %[res_oq3], %[p4] \n\t"
- "subu.ph %[res_oq3], %[res_oq3], %[p5] \n\t"
- "subu.ph %[res_oq3], %[res_oq3], %[p6] \n\t"
- "shrl.ph %[res_oq3], %[res_oq3], 4 \n\t"
-
- /* *oq4 = ROUND_POWER_OF_TWO(p2 + p1 + p0 + q0 + q1 + q2 + q3 +
- q4 * 2 + q5 + q6 + q7 * 5, 4) */
- "shll.ph %[tmp], %[q7], 2 \n\t"
- "addu.ph %[res_oq4], %[tmp], %[q7] \n\t"
- "addu.ph %[res_oq4], %[res_oq4], %[q4] \n\t"
- "addu.ph %[res_oq4], %[res_oq4], %[add_p6toq6] \n\t"
- "subu.ph %[res_oq4], %[res_oq4], %[p3] \n\t"
- "subu.ph %[res_oq4], %[res_oq4], %[p4] \n\t"
- "subu.ph %[res_oq4], %[res_oq4], %[p5] \n\t"
- "subu.ph %[res_oq4], %[res_oq4], %[p6] \n\t"
- "shrl.ph %[res_oq4], %[res_oq4], 4 \n\t"
-
- /* *oq5 = ROUND_POWER_OF_TWO(p1 + p0 + q0 + q1 + q2 + q3 + q4 +
- q5 * 2 + q6 + q7 * 6, 4) */
- "shll.ph %[tmp], %[q7], 2 \n\t"
- "addu.ph %[res_oq5], %[tmp], %[q7] \n\t"
- "addu.ph %[res_oq5], %[res_oq5], %[q7] \n\t"
- "addu.ph %[res_oq5], %[res_oq5], %[q5] \n\t"
- "addu.ph %[res_oq5], %[res_oq5], %[add_p6toq6] \n\t"
- "subu.ph %[res_oq5], %[res_oq5], %[p2] \n\t"
- "subu.ph %[res_oq5], %[res_oq5], %[p3] \n\t"
- "subu.ph %[res_oq5], %[res_oq5], %[p4] \n\t"
- "subu.ph %[res_oq5], %[res_oq5], %[p5] \n\t"
- "subu.ph %[res_oq5], %[res_oq5], %[p6] \n\t"
- "shrl.ph %[res_oq5], %[res_oq5], 4 \n\t"
-
- /* *oq6 = ROUND_POWER_OF_TWO(p0 + q0 + q1 + q2 + q3 +
- q4 + q5 + q6 * 2 + q7 * 7, 4) */
- "shll.ph %[tmp], %[q7], 3 \n\t"
- "subu.ph %[res_oq6], %[tmp], %[q7] \n\t"
- "addu.ph %[res_oq6], %[res_oq6], %[q6] \n\t"
- "addu.ph %[res_oq6], %[res_oq6], %[add_p6toq6] \n\t"
- "subu.ph %[res_oq6], %[res_oq6], %[p1] \n\t"
- "subu.ph %[res_oq6], %[res_oq6], %[p2] \n\t"
- "subu.ph %[res_oq6], %[res_oq6], %[p3] \n\t"
- "subu.ph %[res_oq6], %[res_oq6], %[p4] \n\t"
- "subu.ph %[res_oq6], %[res_oq6], %[p5] \n\t"
- "subu.ph %[res_oq6], %[res_oq6], %[p6] \n\t"
- "shrl.ph %[res_oq6], %[res_oq6], 4 \n\t"
-
- : [res_oq6] "=&r"(res_oq6), [res_oq5] "=&r"(res_oq5),
- [res_oq4] "=&r"(res_oq4), [res_oq3] "=&r"(res_oq3),
- [res_oq2] "=&r"(res_oq2), [res_oq1] "=&r"(res_oq1),
- [res_oq0] "=&r"(res_oq0), [tmp] "=&r"(tmp)
- : [q7] "r"(q7), [q6] "r"(q6), [q5] "r"(q5), [q4] "r"(q4), [q3] "r"(q3),
- [q2] "r"(q2), [q1] "r"(q1), [q0] "r"(q0), [p1] "r"(p1), [p2] "r"(p2),
- [p3] "r"(p3), [p4] "r"(p4), [p5] "r"(p5), [p6] "r"(p6),
- [add_p6toq6] "r"(add_p6toq6));
-
- *oq0 = res_oq0;
- *oq1 = res_oq1;
- *oq2 = res_oq2;
- *oq3 = res_oq3;
- *oq4 = res_oq4;
- *oq5 = res_oq5;
- *oq6 = res_oq6;
-}
-#endif // #if HAVE_DSPR2
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AOM_DSP_MIPS_LOOPFILTER_FILTERS_DSPR2_H_
diff --git a/third_party/aom/aom_dsp/mips/loopfilter_macros_dspr2.h b/third_party/aom/aom_dsp/mips/loopfilter_macros_dspr2.h
deleted file mode 100644
index 62295d69d..000000000
--- a/third_party/aom/aom_dsp/mips/loopfilter_macros_dspr2.h
+++ /dev/null
@@ -1,437 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_MIPS_LOOPFILTER_MACROS_DSPR2_H_
-#define AOM_AOM_DSP_MIPS_LOOPFILTER_MACROS_DSPR2_H_
-
-#include <stdlib.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom/aom_integer.h"
-#include "aom_mem/aom_mem.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if HAVE_DSPR2
-#define STORE_F0() \
- { \
- __asm__ __volatile__( \
- "sb %[q1_f0], 1(%[s4]) \n\t" \
- "sb %[q0_f0], 0(%[s4]) \n\t" \
- "sb %[p0_f0], -1(%[s4]) \n\t" \
- "sb %[p1_f0], -2(%[s4]) \n\t" \
- \
- : \
- : [q1_f0] "r"(q1_f0), [q0_f0] "r"(q0_f0), [p0_f0] "r"(p0_f0), \
- [p1_f0] "r"(p1_f0), [s4] "r"(s4)); \
- \
- __asm__ __volatile__( \
- "srl %[q1_f0], %[q1_f0], 8 \n\t" \
- "srl %[q0_f0], %[q0_f0], 8 \n\t" \
- "srl %[p0_f0], %[p0_f0], 8 \n\t" \
- "srl %[p1_f0], %[p1_f0], 8 \n\t" \
- \
- : [q1_f0] "+r"(q1_f0), [q0_f0] "+r"(q0_f0), [p0_f0] "+r"(p0_f0), \
- [p1_f0] "+r"(p1_f0) \
- :); \
- \
- __asm__ __volatile__( \
- "sb %[q1_f0], 1(%[s3]) \n\t" \
- "sb %[q0_f0], 0(%[s3]) \n\t" \
- "sb %[p0_f0], -1(%[s3]) \n\t" \
- "sb %[p1_f0], -2(%[s3]) \n\t" \
- \
- : [p1_f0] "+r"(p1_f0) \
- : [q1_f0] "r"(q1_f0), [q0_f0] "r"(q0_f0), [s3] "r"(s3), \
- [p0_f0] "r"(p0_f0)); \
- \
- __asm__ __volatile__( \
- "srl %[q1_f0], %[q1_f0], 8 \n\t" \
- "srl %[q0_f0], %[q0_f0], 8 \n\t" \
- "srl %[p0_f0], %[p0_f0], 8 \n\t" \
- "srl %[p1_f0], %[p1_f0], 8 \n\t" \
- \
- : [q1_f0] "+r"(q1_f0), [q0_f0] "+r"(q0_f0), [p0_f0] "+r"(p0_f0), \
- [p1_f0] "+r"(p1_f0) \
- :); \
- \
- __asm__ __volatile__( \
- "sb %[q1_f0], 1(%[s2]) \n\t" \
- "sb %[q0_f0], 0(%[s2]) \n\t" \
- "sb %[p0_f0], -1(%[s2]) \n\t" \
- "sb %[p1_f0], -2(%[s2]) \n\t" \
- \
- : \
- : [q1_f0] "r"(q1_f0), [q0_f0] "r"(q0_f0), [p0_f0] "r"(p0_f0), \
- [p1_f0] "r"(p1_f0), [s2] "r"(s2)); \
- \
- __asm__ __volatile__( \
- "srl %[q1_f0], %[q1_f0], 8 \n\t" \
- "srl %[q0_f0], %[q0_f0], 8 \n\t" \
- "srl %[p0_f0], %[p0_f0], 8 \n\t" \
- "srl %[p1_f0], %[p1_f0], 8 \n\t" \
- \
- : [q1_f0] "+r"(q1_f0), [q0_f0] "+r"(q0_f0), [p0_f0] "+r"(p0_f0), \
- [p1_f0] "+r"(p1_f0) \
- :); \
- \
- __asm__ __volatile__( \
- "sb %[q1_f0], 1(%[s1]) \n\t" \
- "sb %[q0_f0], 0(%[s1]) \n\t" \
- "sb %[p0_f0], -1(%[s1]) \n\t" \
- "sb %[p1_f0], -2(%[s1]) \n\t" \
- \
- : \
- : [q1_f0] "r"(q1_f0), [q0_f0] "r"(q0_f0), [p0_f0] "r"(p0_f0), \
- [p1_f0] "r"(p1_f0), [s1] "r"(s1)); \
- }
-
-#define STORE_F1() \
- { \
- __asm__ __volatile__( \
- "sb %[q2_r], 2(%[s4]) \n\t" \
- "sb %[q1_r], 1(%[s4]) \n\t" \
- "sb %[q0_r], 0(%[s4]) \n\t" \
- "sb %[p0_r], -1(%[s4]) \n\t" \
- "sb %[p1_r], -2(%[s4]) \n\t" \
- "sb %[p2_r], -3(%[s4]) \n\t" \
- \
- : \
- : [q2_r] "r"(q2_r), [q1_r] "r"(q1_r), [q0_r] "r"(q0_r), \
- [p0_r] "r"(p0_r), [p1_r] "r"(p1_r), [p2_r] "r"(p2_r), [s4] "r"(s4)); \
- \
- __asm__ __volatile__( \
- "srl %[q2_r], %[q2_r], 16 \n\t" \
- "srl %[q1_r], %[q1_r], 16 \n\t" \
- "srl %[q0_r], %[q0_r], 16 \n\t" \
- "srl %[p0_r], %[p0_r], 16 \n\t" \
- "srl %[p1_r], %[p1_r], 16 \n\t" \
- "srl %[p2_r], %[p2_r], 16 \n\t" \
- \
- : [q2_r] "+r"(q2_r), [q1_r] "+r"(q1_r), [q0_r] "+r"(q0_r), \
- [p0_r] "+r"(p0_r), [p1_r] "+r"(p1_r), [p2_r] "+r"(p2_r) \
- :); \
- \
- __asm__ __volatile__( \
- "sb %[q2_r], 2(%[s3]) \n\t" \
- "sb %[q1_r], 1(%[s3]) \n\t" \
- "sb %[q0_r], 0(%[s3]) \n\t" \
- "sb %[p0_r], -1(%[s3]) \n\t" \
- "sb %[p1_r], -2(%[s3]) \n\t" \
- "sb %[p2_r], -3(%[s3]) \n\t" \
- \
- : \
- : [q2_r] "r"(q2_r), [q1_r] "r"(q1_r), [q0_r] "r"(q0_r), \
- [p0_r] "r"(p0_r), [p1_r] "r"(p1_r), [p2_r] "r"(p2_r), [s3] "r"(s3)); \
- \
- __asm__ __volatile__( \
- "sb %[q2_l], 2(%[s2]) \n\t" \
- "sb %[q1_l], 1(%[s2]) \n\t" \
- "sb %[q0_l], 0(%[s2]) \n\t" \
- "sb %[p0_l], -1(%[s2]) \n\t" \
- "sb %[p1_l], -2(%[s2]) \n\t" \
- "sb %[p2_l], -3(%[s2]) \n\t" \
- \
- : \
- : [q2_l] "r"(q2_l), [q1_l] "r"(q1_l), [q0_l] "r"(q0_l), \
- [p0_l] "r"(p0_l), [p1_l] "r"(p1_l), [p2_l] "r"(p2_l), [s2] "r"(s2)); \
- \
- __asm__ __volatile__( \
- "srl %[q2_l], %[q2_l], 16 \n\t" \
- "srl %[q1_l], %[q1_l], 16 \n\t" \
- "srl %[q0_l], %[q0_l], 16 \n\t" \
- "srl %[p0_l], %[p0_l], 16 \n\t" \
- "srl %[p1_l], %[p1_l], 16 \n\t" \
- "srl %[p2_l], %[p2_l], 16 \n\t" \
- \
- : [q2_l] "+r"(q2_l), [q1_l] "+r"(q1_l), [q0_l] "+r"(q0_l), \
- [p0_l] "+r"(p0_l), [p1_l] "+r"(p1_l), [p2_l] "+r"(p2_l) \
- :); \
- \
- __asm__ __volatile__( \
- "sb %[q2_l], 2(%[s1]) \n\t" \
- "sb %[q1_l], 1(%[s1]) \n\t" \
- "sb %[q0_l], 0(%[s1]) \n\t" \
- "sb %[p0_l], -1(%[s1]) \n\t" \
- "sb %[p1_l], -2(%[s1]) \n\t" \
- "sb %[p2_l], -3(%[s1]) \n\t" \
- \
- : \
- : [q2_l] "r"(q2_l), [q1_l] "r"(q1_l), [q0_l] "r"(q0_l), \
- [p0_l] "r"(p0_l), [p1_l] "r"(p1_l), [p2_l] "r"(p2_l), [s1] "r"(s1)); \
- }
-
-#define STORE_F2() \
- { \
- __asm__ __volatile__( \
- "sb %[q6_r], 6(%[s4]) \n\t" \
- "sb %[q5_r], 5(%[s4]) \n\t" \
- "sb %[q4_r], 4(%[s4]) \n\t" \
- "sb %[q3_r], 3(%[s4]) \n\t" \
- "sb %[q2_r], 2(%[s4]) \n\t" \
- "sb %[q1_r], 1(%[s4]) \n\t" \
- "sb %[q0_r], 0(%[s4]) \n\t" \
- "sb %[p0_r], -1(%[s4]) \n\t" \
- "sb %[p1_r], -2(%[s4]) \n\t" \
- "sb %[p2_r], -3(%[s4]) \n\t" \
- "sb %[p3_r], -4(%[s4]) \n\t" \
- "sb %[p4_r], -5(%[s4]) \n\t" \
- "sb %[p5_r], -6(%[s4]) \n\t" \
- "sb %[p6_r], -7(%[s4]) \n\t" \
- \
- : \
- : [q6_r] "r"(q6_r), [q5_r] "r"(q5_r), [q4_r] "r"(q4_r), \
- [q3_r] "r"(q3_r), [q2_r] "r"(q2_r), [q1_r] "r"(q1_r), \
- [q0_r] "r"(q0_r), [p0_r] "r"(p0_r), [p1_r] "r"(p1_r), \
- [p2_r] "r"(p2_r), [p3_r] "r"(p3_r), [p4_r] "r"(p4_r), \
- [p5_r] "r"(p5_r), [p6_r] "r"(p6_r), [s4] "r"(s4)); \
- \
- __asm__ __volatile__( \
- "srl %[q6_r], %[q6_r], 16 \n\t" \
- "srl %[q5_r], %[q5_r], 16 \n\t" \
- "srl %[q4_r], %[q4_r], 16 \n\t" \
- "srl %[q3_r], %[q3_r], 16 \n\t" \
- "srl %[q2_r], %[q2_r], 16 \n\t" \
- "srl %[q1_r], %[q1_r], 16 \n\t" \
- "srl %[q0_r], %[q0_r], 16 \n\t" \
- "srl %[p0_r], %[p0_r], 16 \n\t" \
- "srl %[p1_r], %[p1_r], 16 \n\t" \
- "srl %[p2_r], %[p2_r], 16 \n\t" \
- "srl %[p3_r], %[p3_r], 16 \n\t" \
- "srl %[p4_r], %[p4_r], 16 \n\t" \
- "srl %[p5_r], %[p5_r], 16 \n\t" \
- "srl %[p6_r], %[p6_r], 16 \n\t" \
- \
- : [q6_r] "+r"(q6_r), [q5_r] "+r"(q5_r), [q4_r] "+r"(q4_r), \
- [q3_r] "+r"(q3_r), [q2_r] "+r"(q2_r), [q1_r] "+r"(q1_r), \
- [q0_r] "+r"(q0_r), [p0_r] "+r"(p0_r), [p1_r] "+r"(p1_r), \
- [p2_r] "+r"(p2_r), [p3_r] "+r"(p3_r), [p4_r] "+r"(p4_r), \
- [p5_r] "+r"(p5_r), [p6_r] "+r"(p6_r) \
- :); \
- \
- __asm__ __volatile__( \
- "sb %[q6_r], 6(%[s3]) \n\t" \
- "sb %[q5_r], 5(%[s3]) \n\t" \
- "sb %[q4_r], 4(%[s3]) \n\t" \
- "sb %[q3_r], 3(%[s3]) \n\t" \
- "sb %[q2_r], 2(%[s3]) \n\t" \
- "sb %[q1_r], 1(%[s3]) \n\t" \
- "sb %[q0_r], 0(%[s3]) \n\t" \
- "sb %[p0_r], -1(%[s3]) \n\t" \
- "sb %[p1_r], -2(%[s3]) \n\t" \
- "sb %[p2_r], -3(%[s3]) \n\t" \
- "sb %[p3_r], -4(%[s3]) \n\t" \
- "sb %[p4_r], -5(%[s3]) \n\t" \
- "sb %[p5_r], -6(%[s3]) \n\t" \
- "sb %[p6_r], -7(%[s3]) \n\t" \
- \
- : \
- : [q6_r] "r"(q6_r), [q5_r] "r"(q5_r), [q4_r] "r"(q4_r), \
- [q3_r] "r"(q3_r), [q2_r] "r"(q2_r), [q1_r] "r"(q1_r), \
- [q0_r] "r"(q0_r), [p0_r] "r"(p0_r), [p1_r] "r"(p1_r), \
- [p2_r] "r"(p2_r), [p3_r] "r"(p3_r), [p4_r] "r"(p4_r), \
- [p5_r] "r"(p5_r), [p6_r] "r"(p6_r), [s3] "r"(s3)); \
- \
- __asm__ __volatile__( \
- "sb %[q6_l], 6(%[s2]) \n\t" \
- "sb %[q5_l], 5(%[s2]) \n\t" \
- "sb %[q4_l], 4(%[s2]) \n\t" \
- "sb %[q3_l], 3(%[s2]) \n\t" \
- "sb %[q2_l], 2(%[s2]) \n\t" \
- "sb %[q1_l], 1(%[s2]) \n\t" \
- "sb %[q0_l], 0(%[s2]) \n\t" \
- "sb %[p0_l], -1(%[s2]) \n\t" \
- "sb %[p1_l], -2(%[s2]) \n\t" \
- "sb %[p2_l], -3(%[s2]) \n\t" \
- "sb %[p3_l], -4(%[s2]) \n\t" \
- "sb %[p4_l], -5(%[s2]) \n\t" \
- "sb %[p5_l], -6(%[s2]) \n\t" \
- "sb %[p6_l], -7(%[s2]) \n\t" \
- \
- : \
- : [q6_l] "r"(q6_l), [q5_l] "r"(q5_l), [q4_l] "r"(q4_l), \
- [q3_l] "r"(q3_l), [q2_l] "r"(q2_l), [q1_l] "r"(q1_l), \
- [q0_l] "r"(q0_l), [p0_l] "r"(p0_l), [p1_l] "r"(p1_l), \
- [p2_l] "r"(p2_l), [p3_l] "r"(p3_l), [p4_l] "r"(p4_l), \
- [p5_l] "r"(p5_l), [p6_l] "r"(p6_l), [s2] "r"(s2)); \
- \
- __asm__ __volatile__( \
- "srl %[q6_l], %[q6_l], 16 \n\t" \
- "srl %[q5_l], %[q5_l], 16 \n\t" \
- "srl %[q4_l], %[q4_l], 16 \n\t" \
- "srl %[q3_l], %[q3_l], 16 \n\t" \
- "srl %[q2_l], %[q2_l], 16 \n\t" \
- "srl %[q1_l], %[q1_l], 16 \n\t" \
- "srl %[q0_l], %[q0_l], 16 \n\t" \
- "srl %[p0_l], %[p0_l], 16 \n\t" \
- "srl %[p1_l], %[p1_l], 16 \n\t" \
- "srl %[p2_l], %[p2_l], 16 \n\t" \
- "srl %[p3_l], %[p3_l], 16 \n\t" \
- "srl %[p4_l], %[p4_l], 16 \n\t" \
- "srl %[p5_l], %[p5_l], 16 \n\t" \
- "srl %[p6_l], %[p6_l], 16 \n\t" \
- \
- : [q6_l] "+r"(q6_l), [q5_l] "+r"(q5_l), [q4_l] "+r"(q4_l), \
- [q3_l] "+r"(q3_l), [q2_l] "+r"(q2_l), [q1_l] "+r"(q1_l), \
- [q0_l] "+r"(q0_l), [p0_l] "+r"(p0_l), [p1_l] "+r"(p1_l), \
- [p2_l] "+r"(p2_l), [p3_l] "+r"(p3_l), [p4_l] "+r"(p4_l), \
- [p5_l] "+r"(p5_l), [p6_l] "+r"(p6_l) \
- :); \
- \
- __asm__ __volatile__( \
- "sb %[q6_l], 6(%[s1]) \n\t" \
- "sb %[q5_l], 5(%[s1]) \n\t" \
- "sb %[q4_l], 4(%[s1]) \n\t" \
- "sb %[q3_l], 3(%[s1]) \n\t" \
- "sb %[q2_l], 2(%[s1]) \n\t" \
- "sb %[q1_l], 1(%[s1]) \n\t" \
- "sb %[q0_l], 0(%[s1]) \n\t" \
- "sb %[p0_l], -1(%[s1]) \n\t" \
- "sb %[p1_l], -2(%[s1]) \n\t" \
- "sb %[p2_l], -3(%[s1]) \n\t" \
- "sb %[p3_l], -4(%[s1]) \n\t" \
- "sb %[p4_l], -5(%[s1]) \n\t" \
- "sb %[p5_l], -6(%[s1]) \n\t" \
- "sb %[p6_l], -7(%[s1]) \n\t" \
- \
- : \
- : [q6_l] "r"(q6_l), [q5_l] "r"(q5_l), [q4_l] "r"(q4_l), \
- [q3_l] "r"(q3_l), [q2_l] "r"(q2_l), [q1_l] "r"(q1_l), \
- [q0_l] "r"(q0_l), [p0_l] "r"(p0_l), [p1_l] "r"(p1_l), \
- [p2_l] "r"(p2_l), [p3_l] "r"(p3_l), [p4_l] "r"(p4_l), \
- [p5_l] "r"(p5_l), [p6_l] "r"(p6_l), [s1] "r"(s1)); \
- }
-
-#define PACK_LEFT_0TO3() \
- { \
- __asm__ __volatile__( \
- "preceu.ph.qbl %[p3_l], %[p3] \n\t" \
- "preceu.ph.qbl %[p2_l], %[p2] \n\t" \
- "preceu.ph.qbl %[p1_l], %[p1] \n\t" \
- "preceu.ph.qbl %[p0_l], %[p0] \n\t" \
- "preceu.ph.qbl %[q0_l], %[q0] \n\t" \
- "preceu.ph.qbl %[q1_l], %[q1] \n\t" \
- "preceu.ph.qbl %[q2_l], %[q2] \n\t" \
- "preceu.ph.qbl %[q3_l], %[q3] \n\t" \
- \
- : [p3_l] "=&r"(p3_l), [p2_l] "=&r"(p2_l), [p1_l] "=&r"(p1_l), \
- [p0_l] "=&r"(p0_l), [q0_l] "=&r"(q0_l), [q1_l] "=&r"(q1_l), \
- [q2_l] "=&r"(q2_l), [q3_l] "=&r"(q3_l) \
- : [p3] "r"(p3), [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0), \
- [q0] "r"(q0), [q1] "r"(q1), [q2] "r"(q2), [q3] "r"(q3)); \
- }
-
-#define PACK_LEFT_4TO7() \
- { \
- __asm__ __volatile__( \
- "preceu.ph.qbl %[p7_l], %[p7] \n\t" \
- "preceu.ph.qbl %[p6_l], %[p6] \n\t" \
- "preceu.ph.qbl %[p5_l], %[p5] \n\t" \
- "preceu.ph.qbl %[p4_l], %[p4] \n\t" \
- "preceu.ph.qbl %[q4_l], %[q4] \n\t" \
- "preceu.ph.qbl %[q5_l], %[q5] \n\t" \
- "preceu.ph.qbl %[q6_l], %[q6] \n\t" \
- "preceu.ph.qbl %[q7_l], %[q7] \n\t" \
- \
- : [p7_l] "=&r"(p7_l), [p6_l] "=&r"(p6_l), [p5_l] "=&r"(p5_l), \
- [p4_l] "=&r"(p4_l), [q4_l] "=&r"(q4_l), [q5_l] "=&r"(q5_l), \
- [q6_l] "=&r"(q6_l), [q7_l] "=&r"(q7_l) \
- : [p7] "r"(p7), [p6] "r"(p6), [p5] "r"(p5), [p4] "r"(p4), \
- [q4] "r"(q4), [q5] "r"(q5), [q6] "r"(q6), [q7] "r"(q7)); \
- }
-
-#define PACK_RIGHT_0TO3() \
- { \
- __asm__ __volatile__( \
- "preceu.ph.qbr %[p3_r], %[p3] \n\t" \
- "preceu.ph.qbr %[p2_r], %[p2] \n\t" \
- "preceu.ph.qbr %[p1_r], %[p1] \n\t" \
- "preceu.ph.qbr %[p0_r], %[p0] \n\t" \
- "preceu.ph.qbr %[q0_r], %[q0] \n\t" \
- "preceu.ph.qbr %[q1_r], %[q1] \n\t" \
- "preceu.ph.qbr %[q2_r], %[q2] \n\t" \
- "preceu.ph.qbr %[q3_r], %[q3] \n\t" \
- \
- : [p3_r] "=&r"(p3_r), [p2_r] "=&r"(p2_r), [p1_r] "=&r"(p1_r), \
- [p0_r] "=&r"(p0_r), [q0_r] "=&r"(q0_r), [q1_r] "=&r"(q1_r), \
- [q2_r] "=&r"(q2_r), [q3_r] "=&r"(q3_r) \
- : [p3] "r"(p3), [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0), \
- [q0] "r"(q0), [q1] "r"(q1), [q2] "r"(q2), [q3] "r"(q3)); \
- }
-
-#define PACK_RIGHT_4TO7() \
- { \
- __asm__ __volatile__( \
- "preceu.ph.qbr %[p7_r], %[p7] \n\t" \
- "preceu.ph.qbr %[p6_r], %[p6] \n\t" \
- "preceu.ph.qbr %[p5_r], %[p5] \n\t" \
- "preceu.ph.qbr %[p4_r], %[p4] \n\t" \
- "preceu.ph.qbr %[q4_r], %[q4] \n\t" \
- "preceu.ph.qbr %[q5_r], %[q5] \n\t" \
- "preceu.ph.qbr %[q6_r], %[q6] \n\t" \
- "preceu.ph.qbr %[q7_r], %[q7] \n\t" \
- \
- : [p7_r] "=&r"(p7_r), [p6_r] "=&r"(p6_r), [p5_r] "=&r"(p5_r), \
- [p4_r] "=&r"(p4_r), [q4_r] "=&r"(q4_r), [q5_r] "=&r"(q5_r), \
- [q6_r] "=&r"(q6_r), [q7_r] "=&r"(q7_r) \
- : [p7] "r"(p7), [p6] "r"(p6), [p5] "r"(p5), [p4] "r"(p4), \
- [q4] "r"(q4), [q5] "r"(q5), [q6] "r"(q6), [q7] "r"(q7)); \
- }
-
-#define COMBINE_LEFT_RIGHT_0TO2() \
- { \
- __asm__ __volatile__( \
- "precr.qb.ph %[p2], %[p2_l], %[p2_r] \n\t" \
- "precr.qb.ph %[p1], %[p1_l], %[p1_r] \n\t" \
- "precr.qb.ph %[p0], %[p0_l], %[p0_r] \n\t" \
- "precr.qb.ph %[q0], %[q0_l], %[q0_r] \n\t" \
- "precr.qb.ph %[q1], %[q1_l], %[q1_r] \n\t" \
- "precr.qb.ph %[q2], %[q2_l], %[q2_r] \n\t" \
- \
- : [p2] "=&r"(p2), [p1] "=&r"(p1), [p0] "=&r"(p0), [q0] "=&r"(q0), \
- [q1] "=&r"(q1), [q2] "=&r"(q2) \
- : [p2_l] "r"(p2_l), [p2_r] "r"(p2_r), [p1_l] "r"(p1_l), \
- [p1_r] "r"(p1_r), [p0_l] "r"(p0_l), [p0_r] "r"(p0_r), \
- [q0_l] "r"(q0_l), [q0_r] "r"(q0_r), [q1_l] "r"(q1_l), \
- [q1_r] "r"(q1_r), [q2_l] "r"(q2_l), [q2_r] "r"(q2_r)); \
- }
-
-#define COMBINE_LEFT_RIGHT_3TO6() \
- { \
- __asm__ __volatile__( \
- "precr.qb.ph %[p6], %[p6_l], %[p6_r] \n\t" \
- "precr.qb.ph %[p5], %[p5_l], %[p5_r] \n\t" \
- "precr.qb.ph %[p4], %[p4_l], %[p4_r] \n\t" \
- "precr.qb.ph %[p3], %[p3_l], %[p3_r] \n\t" \
- "precr.qb.ph %[q3], %[q3_l], %[q3_r] \n\t" \
- "precr.qb.ph %[q4], %[q4_l], %[q4_r] \n\t" \
- "precr.qb.ph %[q5], %[q5_l], %[q5_r] \n\t" \
- "precr.qb.ph %[q6], %[q6_l], %[q6_r] \n\t" \
- \
- : [p6] "=&r"(p6), [p5] "=&r"(p5), [p4] "=&r"(p4), [p3] "=&r"(p3), \
- [q3] "=&r"(q3), [q4] "=&r"(q4), [q5] "=&r"(q5), [q6] "=&r"(q6) \
- : [p6_l] "r"(p6_l), [p5_l] "r"(p5_l), [p4_l] "r"(p4_l), \
- [p3_l] "r"(p3_l), [p6_r] "r"(p6_r), [p5_r] "r"(p5_r), \
- [p4_r] "r"(p4_r), [p3_r] "r"(p3_r), [q3_l] "r"(q3_l), \
- [q4_l] "r"(q4_l), [q5_l] "r"(q5_l), [q6_l] "r"(q6_l), \
- [q3_r] "r"(q3_r), [q4_r] "r"(q4_r), [q5_r] "r"(q5_r), \
- [q6_r] "r"(q6_r)); \
- }
-
-#endif // #if HAVE_DSPR2
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AOM_DSP_MIPS_LOOPFILTER_MACROS_DSPR2_H_
diff --git a/third_party/aom/aom_dsp/mips/loopfilter_masks_dspr2.h b/third_party/aom/aom_dsp/mips/loopfilter_masks_dspr2.h
deleted file mode 100644
index a0f57f386..000000000
--- a/third_party/aom/aom_dsp/mips/loopfilter_masks_dspr2.h
+++ /dev/null
@@ -1,357 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_MIPS_LOOPFILTER_MASKS_DSPR2_H_
-#define AOM_AOM_DSP_MIPS_LOOPFILTER_MASKS_DSPR2_H_
-
-#include <stdlib.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom/aom_integer.h"
-#include "aom_mem/aom_mem.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if HAVE_DSPR2
-/* processing 4 pixels at the same time
- * compute hev and mask in the same function */
-static INLINE void filter_hev_mask_dspr2(uint32_t limit, uint32_t flimit,
- uint32_t p1, uint32_t p0, uint32_t p3,
- uint32_t p2, uint32_t q0, uint32_t q1,
- uint32_t q2, uint32_t q3,
- uint32_t thresh, uint32_t *hev,
- uint32_t *mask) {
- uint32_t c, r, r3, r_k;
- uint32_t s1, s2, s3;
- uint32_t ones = 0xFFFFFFFF;
- uint32_t hev1;
-
- __asm__ __volatile__(
- /* mask |= (abs(p3 - p2) > limit) */
- "subu_s.qb %[c], %[p3], %[p2] \n\t"
- "subu_s.qb %[r_k], %[p2], %[p3] \n\t"
- "or %[r_k], %[r_k], %[c] \n\t"
- "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t"
- "or %[r], $0, %[c] \n\t"
-
- /* mask |= (abs(p2 - p1) > limit) */
- "subu_s.qb %[c], %[p2], %[p1] \n\t"
- "subu_s.qb %[r_k], %[p1], %[p2] \n\t"
- "or %[r_k], %[r_k], %[c] \n\t"
- "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t"
- "or %[r], %[r], %[c] \n\t"
-
- /* mask |= (abs(p1 - p0) > limit)
- * hev |= (abs(p1 - p0) > thresh)
- */
- "subu_s.qb %[c], %[p1], %[p0] \n\t"
- "subu_s.qb %[r_k], %[p0], %[p1] \n\t"
- "or %[r_k], %[r_k], %[c] \n\t"
- "cmpgu.lt.qb %[c], %[thresh], %[r_k] \n\t"
- "or %[r3], $0, %[c] \n\t"
- "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t"
- "or %[r], %[r], %[c] \n\t"
-
- /* mask |= (abs(q1 - q0) > limit)
- * hev |= (abs(q1 - q0) > thresh)
- */
- "subu_s.qb %[c], %[q1], %[q0] \n\t"
- "subu_s.qb %[r_k], %[q0], %[q1] \n\t"
- "or %[r_k], %[r_k], %[c] \n\t"
- "cmpgu.lt.qb %[c], %[thresh], %[r_k] \n\t"
- "or %[r3], %[r3], %[c] \n\t"
- "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t"
- "or %[r], %[r], %[c] \n\t"
-
- /* mask |= (abs(q2 - q1) > limit) */
- "subu_s.qb %[c], %[q2], %[q1] \n\t"
- "subu_s.qb %[r_k], %[q1], %[q2] \n\t"
- "or %[r_k], %[r_k], %[c] \n\t"
- "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t"
- "or %[r], %[r], %[c] \n\t"
- "sll %[r3], %[r3], 24 \n\t"
-
- /* mask |= (abs(q3 - q2) > limit) */
- "subu_s.qb %[c], %[q3], %[q2] \n\t"
- "subu_s.qb %[r_k], %[q2], %[q3] \n\t"
- "or %[r_k], %[r_k], %[c] \n\t"
- "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t"
- "or %[r], %[r], %[c] \n\t"
-
- : [c] "=&r"(c), [r_k] "=&r"(r_k), [r] "=&r"(r), [r3] "=&r"(r3)
- : [limit] "r"(limit), [p3] "r"(p3), [p2] "r"(p2), [p1] "r"(p1),
- [p0] "r"(p0), [q1] "r"(q1), [q0] "r"(q0), [q2] "r"(q2), [q3] "r"(q3),
- [thresh] "r"(thresh));
-
- __asm__ __volatile__(
- /* abs(p0 - q0) */
- "subu_s.qb %[c], %[p0], %[q0] \n\t"
- "subu_s.qb %[r_k], %[q0], %[p0] \n\t"
- "wrdsp %[r3] \n\t"
- "or %[s1], %[r_k], %[c] \n\t"
-
- /* abs(p1 - q1) */
- "subu_s.qb %[c], %[p1], %[q1] \n\t"
- "addu_s.qb %[s3], %[s1], %[s1] \n\t"
- "pick.qb %[hev1], %[ones], $0 \n\t"
- "subu_s.qb %[r_k], %[q1], %[p1] \n\t"
- "or %[s2], %[r_k], %[c] \n\t"
-
- /* abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > flimit * 2 + limit */
- "shrl.qb %[s2], %[s2], 1 \n\t"
- "addu_s.qb %[s1], %[s2], %[s3] \n\t"
- "cmpgu.lt.qb %[c], %[flimit], %[s1] \n\t"
- "or %[r], %[r], %[c] \n\t"
- "sll %[r], %[r], 24 \n\t"
-
- "wrdsp %[r] \n\t"
- "pick.qb %[s2], $0, %[ones] \n\t"
-
- : [c] "=&r"(c), [r_k] "=&r"(r_k), [s1] "=&r"(s1), [hev1] "=&r"(hev1),
- [s2] "=&r"(s2), [r] "+r"(r), [s3] "=&r"(s3)
- : [p0] "r"(p0), [q0] "r"(q0), [p1] "r"(p1), [r3] "r"(r3), [q1] "r"(q1),
- [ones] "r"(ones), [flimit] "r"(flimit));
-
- *hev = hev1;
- *mask = s2;
-}
-
-static INLINE void filter_hev_mask_flatmask4_dspr2(
- uint32_t limit, uint32_t flimit, uint32_t thresh, uint32_t p1, uint32_t p0,
- uint32_t p3, uint32_t p2, uint32_t q0, uint32_t q1, uint32_t q2,
- uint32_t q3, uint32_t *hev, uint32_t *mask, uint32_t *flat) {
- uint32_t c, r, r3, r_k, r_flat;
- uint32_t s1, s2, s3;
- uint32_t ones = 0xFFFFFFFF;
- uint32_t flat_thresh = 0x01010101;
- uint32_t hev1;
- uint32_t flat1;
-
- __asm__ __volatile__(
- /* mask |= (abs(p3 - p2) > limit) */
- "subu_s.qb %[c], %[p3], %[p2] \n\t"
- "subu_s.qb %[r_k], %[p2], %[p3] \n\t"
- "or %[r_k], %[r_k], %[c] \n\t"
- "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t"
- "or %[r], $0, %[c] \n\t"
-
- /* mask |= (abs(p2 - p1) > limit) */
- "subu_s.qb %[c], %[p2], %[p1] \n\t"
- "subu_s.qb %[r_k], %[p1], %[p2] \n\t"
- "or %[r_k], %[r_k], %[c] \n\t"
- "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t"
- "or %[r], %[r], %[c] \n\t"
-
- /* mask |= (abs(p1 - p0) > limit)
- * hev |= (abs(p1 - p0) > thresh)
- * flat |= (abs(p1 - p0) > thresh)
- */
- "subu_s.qb %[c], %[p1], %[p0] \n\t"
- "subu_s.qb %[r_k], %[p0], %[p1] \n\t"
- "or %[r_k], %[r_k], %[c] \n\t"
- "cmpgu.lt.qb %[c], %[thresh], %[r_k] \n\t"
- "or %[r3], $0, %[c] \n\t"
- "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t"
- "or %[r], %[r], %[c] \n\t"
- "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t"
- "or %[r_flat], $0, %[c] \n\t"
-
- /* mask |= (abs(q1 - q0) > limit)
- * hev |= (abs(q1 - q0) > thresh)
- * flat |= (abs(q1 - q0) > thresh)
- */
- "subu_s.qb %[c], %[q1], %[q0] \n\t"
- "subu_s.qb %[r_k], %[q0], %[q1] \n\t"
- "or %[r_k], %[r_k], %[c] \n\t"
- "cmpgu.lt.qb %[c], %[thresh], %[r_k] \n\t"
- "or %[r3], %[r3], %[c] \n\t"
- "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t"
- "or %[r], %[r], %[c] \n\t"
- "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t"
- "or %[r_flat], %[r_flat], %[c] \n\t"
-
- /* flat |= (abs(p0 - p2) > thresh) */
- "subu_s.qb %[c], %[p0], %[p2] \n\t"
- "subu_s.qb %[r_k], %[p2], %[p0] \n\t"
- "or %[r_k], %[r_k], %[c] \n\t"
- "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t"
- "or %[r_flat], %[r_flat], %[c] \n\t"
-
- /* flat |= (abs(q0 - q2) > thresh) */
- "subu_s.qb %[c], %[q0], %[q2] \n\t"
- "subu_s.qb %[r_k], %[q2], %[q0] \n\t"
- "or %[r_k], %[r_k], %[c] \n\t"
- "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t"
- "or %[r_flat], %[r_flat], %[c] \n\t"
-
- /* flat |= (abs(p3 - p0) > thresh) */
- "subu_s.qb %[c], %[p3], %[p0] \n\t"
- "subu_s.qb %[r_k], %[p0], %[p3] \n\t"
- "or %[r_k], %[r_k], %[c] \n\t"
- "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t"
- "or %[r_flat], %[r_flat], %[c] \n\t"
-
- /* flat |= (abs(q3 - q0) > thresh) */
- "subu_s.qb %[c], %[q3], %[q0] \n\t"
- "subu_s.qb %[r_k], %[q0], %[q3] \n\t"
- "or %[r_k], %[r_k], %[c] \n\t"
- "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t"
- "or %[r_flat], %[r_flat], %[c] \n\t"
- "sll %[r_flat], %[r_flat], 24 \n\t"
- /* look at stall here */
- "wrdsp %[r_flat] \n\t"
- "pick.qb %[flat1], $0, %[ones] \n\t"
-
- /* mask |= (abs(q2 - q1) > limit) */
- "subu_s.qb %[c], %[q2], %[q1] \n\t"
- "subu_s.qb %[r_k], %[q1], %[q2] \n\t"
- "or %[r_k], %[r_k], %[c] \n\t"
- "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t"
- "or %[r], %[r], %[c] \n\t"
- "sll %[r3], %[r3], 24 \n\t"
-
- /* mask |= (abs(q3 - q2) > limit) */
- "subu_s.qb %[c], %[q3], %[q2] \n\t"
- "subu_s.qb %[r_k], %[q2], %[q3] \n\t"
- "or %[r_k], %[r_k], %[c] \n\t"
- "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t"
- "or %[r], %[r], %[c] \n\t"
-
- : [c] "=&r"(c), [r_k] "=&r"(r_k), [r] "=&r"(r), [r3] "=&r"(r3),
- [r_flat] "=&r"(r_flat), [flat1] "=&r"(flat1)
- : [limit] "r"(limit), [p3] "r"(p3), [p2] "r"(p2), [p1] "r"(p1),
- [p0] "r"(p0), [q1] "r"(q1), [q0] "r"(q0), [q2] "r"(q2), [q3] "r"(q3),
- [thresh] "r"(thresh), [flat_thresh] "r"(flat_thresh), [ones] "r"(ones));
-
- __asm__ __volatile__(
- /* abs(p0 - q0) */
- "subu_s.qb %[c], %[p0], %[q0] \n\t"
- "subu_s.qb %[r_k], %[q0], %[p0] \n\t"
- "wrdsp %[r3] \n\t"
- "or %[s1], %[r_k], %[c] \n\t"
-
- /* abs(p1 - q1) */
- "subu_s.qb %[c], %[p1], %[q1] \n\t"
- "addu_s.qb %[s3], %[s1], %[s1] \n\t"
- "pick.qb %[hev1], %[ones], $0 \n\t"
- "subu_s.qb %[r_k], %[q1], %[p1] \n\t"
- "or %[s2], %[r_k], %[c] \n\t"
-
- /* abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > flimit * 2 + limit */
- "shrl.qb %[s2], %[s2], 1 \n\t"
- "addu_s.qb %[s1], %[s2], %[s3] \n\t"
- "cmpgu.lt.qb %[c], %[flimit], %[s1] \n\t"
- "or %[r], %[r], %[c] \n\t"
- "sll %[r], %[r], 24 \n\t"
-
- "wrdsp %[r] \n\t"
- "pick.qb %[s2], $0, %[ones] \n\t"
-
- : [c] "=&r"(c), [r_k] "=&r"(r_k), [s1] "=&r"(s1), [hev1] "=&r"(hev1),
- [s2] "=&r"(s2), [r] "+r"(r), [s3] "=&r"(s3)
- : [p0] "r"(p0), [q0] "r"(q0), [p1] "r"(p1), [r3] "r"(r3), [q1] "r"(q1),
- [ones] "r"(ones), [flimit] "r"(flimit));
-
- *hev = hev1;
- *mask = s2;
- *flat = flat1;
-}
-
-static INLINE void flatmask5(uint32_t p4, uint32_t p3, uint32_t p2, uint32_t p1,
- uint32_t p0, uint32_t q0, uint32_t q1, uint32_t q2,
- uint32_t q3, uint32_t q4, uint32_t *flat2) {
- uint32_t c, r, r_k, r_flat;
- uint32_t ones = 0xFFFFFFFF;
- uint32_t flat_thresh = 0x01010101;
- uint32_t flat1, flat3;
-
- __asm__ __volatile__(
- /* flat |= (abs(p4 - p0) > thresh) */
- "subu_s.qb %[c], %[p4], %[p0] \n\t"
- "subu_s.qb %[r_k], %[p0], %[p4] \n\t"
- "or %[r_k], %[r_k], %[c] \n\t"
- "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t"
- "or %[r], $0, %[c] \n\t"
-
- /* flat |= (abs(q4 - q0) > thresh) */
- "subu_s.qb %[c], %[q4], %[q0] \n\t"
- "subu_s.qb %[r_k], %[q0], %[q4] \n\t"
- "or %[r_k], %[r_k], %[c] \n\t"
- "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t"
- "or %[r], %[r], %[c] \n\t"
- "sll %[r], %[r], 24 \n\t"
- "wrdsp %[r] \n\t"
- "pick.qb %[flat3], $0, %[ones] \n\t"
-
- /* flat |= (abs(p1 - p0) > thresh) */
- "subu_s.qb %[c], %[p1], %[p0] \n\t"
- "subu_s.qb %[r_k], %[p0], %[p1] \n\t"
- "or %[r_k], %[r_k], %[c] \n\t"
- "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t"
- "or %[r_flat], $0, %[c] \n\t"
-
- /* flat |= (abs(q1 - q0) > thresh) */
- "subu_s.qb %[c], %[q1], %[q0] \n\t"
- "subu_s.qb %[r_k], %[q0], %[q1] \n\t"
- "or %[r_k], %[r_k], %[c] \n\t"
- "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t"
- "or %[r_flat], %[r_flat], %[c] \n\t"
-
- /* flat |= (abs(p0 - p2) > thresh) */
- "subu_s.qb %[c], %[p0], %[p2] \n\t"
- "subu_s.qb %[r_k], %[p2], %[p0] \n\t"
- "or %[r_k], %[r_k], %[c] \n\t"
- "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t"
- "or %[r_flat], %[r_flat], %[c] \n\t"
-
- /* flat |= (abs(q0 - q2) > thresh) */
- "subu_s.qb %[c], %[q0], %[q2] \n\t"
- "subu_s.qb %[r_k], %[q2], %[q0] \n\t"
- "or %[r_k], %[r_k], %[c] \n\t"
- "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t"
- "or %[r_flat], %[r_flat], %[c] \n\t"
-
- /* flat |= (abs(p3 - p0) > thresh) */
- "subu_s.qb %[c], %[p3], %[p0] \n\t"
- "subu_s.qb %[r_k], %[p0], %[p3] \n\t"
- "or %[r_k], %[r_k], %[c] \n\t"
- "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t"
- "or %[r_flat], %[r_flat], %[c] \n\t"
-
- /* flat |= (abs(q3 - q0) > thresh) */
- "subu_s.qb %[c], %[q3], %[q0] \n\t"
- "subu_s.qb %[r_k], %[q0], %[q3] \n\t"
- "or %[r_k], %[r_k], %[c] \n\t"
- "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t"
- "or %[r_flat], %[r_flat], %[c] \n\t"
- "sll %[r_flat], %[r_flat], 24 \n\t"
- "wrdsp %[r_flat] \n\t"
- "pick.qb %[flat1], $0, %[ones] \n\t"
- /* flat & flatmask4(thresh, p3, p2, p1, p0, q0, q1, q2, q3) */
- "and %[flat1], %[flat3], %[flat1] \n\t"
-
- : [c] "=&r"(c), [r_k] "=&r"(r_k), [r] "=&r"(r), [r_flat] "=&r"(r_flat),
- [flat1] "=&r"(flat1), [flat3] "=&r"(flat3)
- : [p4] "r"(p4), [p3] "r"(p3), [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0),
- [q0] "r"(q0), [q1] "r"(q1), [q2] "r"(q2), [q3] "r"(q3), [q4] "r"(q4),
- [flat_thresh] "r"(flat_thresh), [ones] "r"(ones));
-
- *flat2 = flat1;
-}
-#endif // #if HAVE_DSPR2
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AOM_DSP_MIPS_LOOPFILTER_MASKS_DSPR2_H_
diff --git a/third_party/aom/aom_dsp/mips/loopfilter_mb_dspr2.c b/third_party/aom/aom_dsp/mips/loopfilter_mb_dspr2.c
deleted file mode 100644
index b67ccfe9d..000000000
--- a/third_party/aom/aom_dsp/mips/loopfilter_mb_dspr2.c
+++ /dev/null
@@ -1,590 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <stdlib.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom/aom_integer.h"
-#include "aom_dsp/mips/common_dspr2.h"
-#include "aom_dsp/mips/loopfilter_filters_dspr2.h"
-#include "aom_dsp/mips/loopfilter_macros_dspr2.h"
-#include "aom_dsp/mips/loopfilter_masks_dspr2.h"
-#include "aom_mem/aom_mem.h"
-
-#if HAVE_DSPR2
-void aom_lpf_horizontal_8_dspr2(unsigned char *s, int pitch,
- const uint8_t *blimit, const uint8_t *limit,
- const uint8_t *thresh) {
- uint32_t mask;
- uint32_t hev, flat;
- uint8_t i;
- uint8_t *sp3, *sp2, *sp1, *sp0, *sq0, *sq1, *sq2, *sq3;
- uint32_t thresh_vec, flimit_vec, limit_vec;
- uint32_t uflimit, ulimit, uthresh;
- uint32_t p1_f0, p0_f0, q0_f0, q1_f0;
- uint32_t p3, p2, p1, p0, q0, q1, q2, q3;
- uint32_t p0_l, p1_l, p2_l, p3_l, q0_l, q1_l, q2_l, q3_l;
- uint32_t p0_r, p1_r, p2_r, p3_r, q0_r, q1_r, q2_r, q3_r;
-
- uflimit = *blimit;
- ulimit = *limit;
- uthresh = *thresh;
-
- /* create quad-byte */
- __asm__ __volatile__(
- "replv.qb %[thresh_vec], %[uthresh] \n\t"
- "replv.qb %[flimit_vec], %[uflimit] \n\t"
- "replv.qb %[limit_vec], %[ulimit] \n\t"
-
- : [thresh_vec] "=&r"(thresh_vec), [flimit_vec] "=&r"(flimit_vec),
- [limit_vec] "=r"(limit_vec)
- : [uthresh] "r"(uthresh), [uflimit] "r"(uflimit), [ulimit] "r"(ulimit));
-
- /* prefetch data for store */
- prefetch_store(s);
-
- for (i = 0; i < 2; i++) {
- sp3 = s - (pitch << 2);
- sp2 = sp3 + pitch;
- sp1 = sp2 + pitch;
- sp0 = sp1 + pitch;
- sq0 = s;
- sq1 = s + pitch;
- sq2 = sq1 + pitch;
- sq3 = sq2 + pitch;
-
- __asm__ __volatile__(
- "lw %[p3], (%[sp3]) \n\t"
- "lw %[p2], (%[sp2]) \n\t"
- "lw %[p1], (%[sp1]) \n\t"
- "lw %[p0], (%[sp0]) \n\t"
- "lw %[q0], (%[sq0]) \n\t"
- "lw %[q1], (%[sq1]) \n\t"
- "lw %[q2], (%[sq2]) \n\t"
- "lw %[q3], (%[sq3]) \n\t"
-
- : [p3] "=&r"(p3), [p2] "=&r"(p2), [p1] "=&r"(p1), [p0] "=&r"(p0),
- [q3] "=&r"(q3), [q2] "=&r"(q2), [q1] "=&r"(q1), [q0] "=&r"(q0)
- : [sp3] "r"(sp3), [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0),
- [sq3] "r"(sq3), [sq2] "r"(sq2), [sq1] "r"(sq1), [sq0] "r"(sq0));
-
- filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec, p1, p0,
- p3, p2, q0, q1, q2, q3, &hev, &mask, &flat);
-
- if ((flat == 0) && (mask != 0)) {
- filter1_dspr2(mask, hev, p1, p0, q0, q1, &p1_f0, &p0_f0, &q0_f0, &q1_f0);
-
- __asm__ __volatile__(
- "sw %[p1_f0], (%[sp1]) \n\t"
- "sw %[p0_f0], (%[sp0]) \n\t"
- "sw %[q0_f0], (%[sq0]) \n\t"
- "sw %[q1_f0], (%[sq1]) \n\t"
-
- :
- : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
- [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0),
- [sq1] "r"(sq1));
- } else if ((mask & flat) == 0xFFFFFFFF) {
- /* left 2 element operation */
- PACK_LEFT_0TO3()
- mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l, &q0_l, &q1_l, &q2_l, &q3_l);
-
- /* right 2 element operation */
- PACK_RIGHT_0TO3()
- mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r, &q0_r, &q1_r, &q2_r, &q3_r);
-
- COMBINE_LEFT_RIGHT_0TO2()
-
- __asm__ __volatile__(
- "sw %[p2], (%[sp2]) \n\t"
- "sw %[p1], (%[sp1]) \n\t"
- "sw %[p0], (%[sp0]) \n\t"
- "sw %[q0], (%[sq0]) \n\t"
- "sw %[q1], (%[sq1]) \n\t"
- "sw %[q2], (%[sq2]) \n\t"
-
- :
- : [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0), [q0] "r"(q0),
- [q1] "r"(q1), [q2] "r"(q2), [sp2] "r"(sp2), [sp1] "r"(sp1),
- [sp0] "r"(sp0), [sq0] "r"(sq0), [sq1] "r"(sq1), [sq2] "r"(sq2));
- } else if ((flat != 0) && (mask != 0)) {
- /* filtering */
- filter1_dspr2(mask, hev, p1, p0, q0, q1, &p1_f0, &p0_f0, &q0_f0, &q1_f0);
-
- /* left 2 element operation */
- PACK_LEFT_0TO3()
- mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l, &q0_l, &q1_l, &q2_l, &q3_l);
-
- /* right 2 element operation */
- PACK_RIGHT_0TO3()
- mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r, &q0_r, &q1_r, &q2_r, &q3_r);
-
- if (mask & flat & 0x000000FF) {
- __asm__ __volatile__(
- "sb %[p2_r], (%[sp2]) \n\t"
- "sb %[p1_r], (%[sp1]) \n\t"
- "sb %[p0_r], (%[sp0]) \n\t"
- "sb %[q0_r], (%[sq0]) \n\t"
- "sb %[q1_r], (%[sq1]) \n\t"
- "sb %[q2_r], (%[sq2]) \n\t"
-
- :
- : [p2_r] "r"(p2_r), [p1_r] "r"(p1_r), [p0_r] "r"(p0_r),
- [q0_r] "r"(q0_r), [q1_r] "r"(q1_r), [q2_r] "r"(q2_r),
- [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0),
- [sq1] "r"(sq1), [sq2] "r"(sq2));
- } else if (mask & 0x000000FF) {
- __asm__ __volatile__(
- "sb %[p1_f0], (%[sp1]) \n\t"
- "sb %[p0_f0], (%[sp0]) \n\t"
- "sb %[q0_f0], (%[sq0]) \n\t"
- "sb %[q1_f0], (%[sq1]) \n\t"
-
- :
- : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
- [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0),
- [sq0] "r"(sq0), [sq1] "r"(sq1));
- }
-
- __asm__ __volatile__(
- "srl %[p2_r], %[p2_r], 16 \n\t"
- "srl %[p1_r], %[p1_r], 16 \n\t"
- "srl %[p0_r], %[p0_r], 16 \n\t"
- "srl %[q0_r], %[q0_r], 16 \n\t"
- "srl %[q1_r], %[q1_r], 16 \n\t"
- "srl %[q2_r], %[q2_r], 16 \n\t"
- "srl %[p1_f0], %[p1_f0], 8 \n\t"
- "srl %[p0_f0], %[p0_f0], 8 \n\t"
- "srl %[q0_f0], %[q0_f0], 8 \n\t"
- "srl %[q1_f0], %[q1_f0], 8 \n\t"
-
- : [p2_r] "+r"(p2_r), [p1_r] "+r"(p1_r), [p0_r] "+r"(p0_r),
- [q0_r] "+r"(q0_r), [q1_r] "+r"(q1_r), [q2_r] "+r"(q2_r),
- [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0),
- [q1_f0] "+r"(q1_f0)
- :);
-
- if (mask & flat & 0x0000FF00) {
- __asm__ __volatile__(
- "sb %[p2_r], +1(%[sp2]) \n\t"
- "sb %[p1_r], +1(%[sp1]) \n\t"
- "sb %[p0_r], +1(%[sp0]) \n\t"
- "sb %[q0_r], +1(%[sq0]) \n\t"
- "sb %[q1_r], +1(%[sq1]) \n\t"
- "sb %[q2_r], +1(%[sq2]) \n\t"
-
- :
- : [p2_r] "r"(p2_r), [p1_r] "r"(p1_r), [p0_r] "r"(p0_r),
- [q0_r] "r"(q0_r), [q1_r] "r"(q1_r), [q2_r] "r"(q2_r),
- [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0),
- [sq1] "r"(sq1), [sq2] "r"(sq2));
- } else if (mask & 0x0000FF00) {
- __asm__ __volatile__(
- "sb %[p1_f0], +1(%[sp1]) \n\t"
- "sb %[p0_f0], +1(%[sp0]) \n\t"
- "sb %[q0_f0], +1(%[sq0]) \n\t"
- "sb %[q1_f0], +1(%[sq1]) \n\t"
-
- :
- : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
- [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0),
- [sq0] "r"(sq0), [sq1] "r"(sq1));
- }
-
- __asm__ __volatile__(
- "srl %[p1_f0], %[p1_f0], 8 \n\t"
- "srl %[p0_f0], %[p0_f0], 8 \n\t"
- "srl %[q0_f0], %[q0_f0], 8 \n\t"
- "srl %[q1_f0], %[q1_f0], 8 \n\t"
-
- : [p2] "+r"(p2), [p1] "+r"(p1), [p0] "+r"(p0), [q0] "+r"(q0),
- [q1] "+r"(q1), [q2] "+r"(q2), [p1_f0] "+r"(p1_f0),
- [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0), [q1_f0] "+r"(q1_f0)
- :);
-
- if (mask & flat & 0x00FF0000) {
- __asm__ __volatile__(
- "sb %[p2_l], +2(%[sp2]) \n\t"
- "sb %[p1_l], +2(%[sp1]) \n\t"
- "sb %[p0_l], +2(%[sp0]) \n\t"
- "sb %[q0_l], +2(%[sq0]) \n\t"
- "sb %[q1_l], +2(%[sq1]) \n\t"
- "sb %[q2_l], +2(%[sq2]) \n\t"
-
- :
- : [p2_l] "r"(p2_l), [p1_l] "r"(p1_l), [p0_l] "r"(p0_l),
- [q0_l] "r"(q0_l), [q1_l] "r"(q1_l), [q2_l] "r"(q2_l),
- [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0),
- [sq1] "r"(sq1), [sq2] "r"(sq2));
- } else if (mask & 0x00FF0000) {
- __asm__ __volatile__(
- "sb %[p1_f0], +2(%[sp1]) \n\t"
- "sb %[p0_f0], +2(%[sp0]) \n\t"
- "sb %[q0_f0], +2(%[sq0]) \n\t"
- "sb %[q1_f0], +2(%[sq1]) \n\t"
-
- :
- : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
- [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0),
- [sq0] "r"(sq0), [sq1] "r"(sq1));
- }
-
- __asm__ __volatile__(
- "srl %[p2_l], %[p2_l], 16 \n\t"
- "srl %[p1_l], %[p1_l], 16 \n\t"
- "srl %[p0_l], %[p0_l], 16 \n\t"
- "srl %[q0_l], %[q0_l], 16 \n\t"
- "srl %[q1_l], %[q1_l], 16 \n\t"
- "srl %[q2_l], %[q2_l], 16 \n\t"
- "srl %[p1_f0], %[p1_f0], 8 \n\t"
- "srl %[p0_f0], %[p0_f0], 8 \n\t"
- "srl %[q0_f0], %[q0_f0], 8 \n\t"
- "srl %[q1_f0], %[q1_f0], 8 \n\t"
-
- : [p2_l] "+r"(p2_l), [p1_l] "+r"(p1_l), [p0_l] "+r"(p0_l),
- [q0_l] "+r"(q0_l), [q1_l] "+r"(q1_l), [q2_l] "+r"(q2_l),
- [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0),
- [q1_f0] "+r"(q1_f0)
- :);
-
- if (mask & flat & 0xFF000000) {
- __asm__ __volatile__(
- "sb %[p2_l], +3(%[sp2]) \n\t"
- "sb %[p1_l], +3(%[sp1]) \n\t"
- "sb %[p0_l], +3(%[sp0]) \n\t"
- "sb %[q0_l], +3(%[sq0]) \n\t"
- "sb %[q1_l], +3(%[sq1]) \n\t"
- "sb %[q2_l], +3(%[sq2]) \n\t"
-
- :
- : [p2_l] "r"(p2_l), [p1_l] "r"(p1_l), [p0_l] "r"(p0_l),
- [q0_l] "r"(q0_l), [q1_l] "r"(q1_l), [q2_l] "r"(q2_l),
- [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0),
- [sq1] "r"(sq1), [sq2] "r"(sq2));
- } else if (mask & 0xFF000000) {
- __asm__ __volatile__(
- "sb %[p1_f0], +3(%[sp1]) \n\t"
- "sb %[p0_f0], +3(%[sp0]) \n\t"
- "sb %[q0_f0], +3(%[sq0]) \n\t"
- "sb %[q1_f0], +3(%[sq1]) \n\t"
-
- :
- : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
- [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0),
- [sq0] "r"(sq0), [sq1] "r"(sq1));
- }
- }
-
- s = s + 4;
- }
-}
-
-void aom_lpf_vertical_8_dspr2(unsigned char *s, int pitch,
- const uint8_t *blimit, const uint8_t *limit,
- const uint8_t *thresh) {
- uint8_t i;
- uint32_t mask, hev, flat;
- uint8_t *s1, *s2, *s3, *s4;
- uint32_t prim1, prim2, sec3, sec4, prim3, prim4;
- uint32_t thresh_vec, flimit_vec, limit_vec;
- uint32_t uflimit, ulimit, uthresh;
- uint32_t p3, p2, p1, p0, q3, q2, q1, q0;
- uint32_t p1_f0, p0_f0, q0_f0, q1_f0;
- uint32_t p0_l, p1_l, p2_l, p3_l, q0_l, q1_l, q2_l, q3_l;
- uint32_t p0_r, p1_r, p2_r, p3_r, q0_r, q1_r, q2_r, q3_r;
-
- uflimit = *blimit;
- ulimit = *limit;
- uthresh = *thresh;
-
- /* create quad-byte */
- __asm__ __volatile__(
- "replv.qb %[thresh_vec], %[uthresh] \n\t"
- "replv.qb %[flimit_vec], %[uflimit] \n\t"
- "replv.qb %[limit_vec], %[ulimit] \n\t"
-
- : [thresh_vec] "=&r"(thresh_vec), [flimit_vec] "=&r"(flimit_vec),
- [limit_vec] "=r"(limit_vec)
- : [uthresh] "r"(uthresh), [uflimit] "r"(uflimit), [ulimit] "r"(ulimit));
-
- prefetch_store(s + pitch);
-
- for (i = 0; i < 2; i++) {
- s1 = s;
- s2 = s + pitch;
- s3 = s2 + pitch;
- s4 = s3 + pitch;
- s = s4 + pitch;
-
- __asm__ __volatile__(
- "lw %[p0], -4(%[s1]) \n\t"
- "lw %[p1], -4(%[s2]) \n\t"
- "lw %[p2], -4(%[s3]) \n\t"
- "lw %[p3], -4(%[s4]) \n\t"
- "lw %[q3], (%[s1]) \n\t"
- "lw %[q2], (%[s2]) \n\t"
- "lw %[q1], (%[s3]) \n\t"
- "lw %[q0], (%[s4]) \n\t"
-
- : [p3] "=&r"(p3), [p2] "=&r"(p2), [p1] "=&r"(p1), [p0] "=&r"(p0),
- [q0] "=&r"(q0), [q1] "=&r"(q1), [q2] "=&r"(q2), [q3] "=&r"(q3)
- : [s1] "r"(s1), [s2] "r"(s2), [s3] "r"(s3), [s4] "r"(s4));
-
- /* transpose p3, p2, p1, p0
- original (when loaded from memory)
- register -4 -3 -2 -1
- p0 p0_0 p0_1 p0_2 p0_3
- p1 p1_0 p1_1 p1_2 p1_3
- p2 p2_0 p2_1 p2_2 p2_3
- p3 p3_0 p3_1 p3_2 p3_3
-
- after transpose
- register
- p0 p3_3 p2_3 p1_3 p0_3
- p1 p3_2 p2_2 p1_2 p0_2
- p2 p3_1 p2_1 p1_1 p0_1
- p3 p3_0 p2_0 p1_0 p0_0
- */
- __asm__ __volatile__(
- "precrq.qb.ph %[prim1], %[p0], %[p1] \n\t"
- "precr.qb.ph %[prim2], %[p0], %[p1] \n\t"
- "precrq.qb.ph %[prim3], %[p2], %[p3] \n\t"
- "precr.qb.ph %[prim4], %[p2], %[p3] \n\t"
-
- "precrq.qb.ph %[p1], %[prim1], %[prim2] \n\t"
- "precr.qb.ph %[p3], %[prim1], %[prim2] \n\t"
- "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t"
- "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t"
-
- "precrq.ph.w %[p0], %[p1], %[sec3] \n\t"
- "precrq.ph.w %[p2], %[p3], %[sec4] \n\t"
- "append %[p1], %[sec3], 16 \n\t"
- "append %[p3], %[sec4], 16 \n\t"
-
- : [prim1] "=&r"(prim1), [prim2] "=&r"(prim2), [prim3] "=&r"(prim3),
- [prim4] "=&r"(prim4), [p0] "+r"(p0), [p1] "+r"(p1), [p2] "+r"(p2),
- [p3] "+r"(p3), [sec3] "=&r"(sec3), [sec4] "=&r"(sec4)
- :);
-
- /* transpose q0, q1, q2, q3
- original (when loaded from memory)
- register +1 +2 +3 +4
- q3 q3_0 q3_1 q3_2 q3_3
- q2 q2_0 q2_1 q2_2 q2_3
- q1 q1_0 q1_1 q1_2 q1_3
- q0 q0_0 q0_1 q0_2 q0_3
-
- after transpose
- register
- q3 q0_3 q1_3 q2_3 q3_3
- q2 q0_2 q1_2 q2_2 q3_2
- q1 q0_1 q1_1 q2_1 q3_1
- q0 q0_0 q1_0 q2_0 q3_0
- */
- __asm__ __volatile__(
- "precrq.qb.ph %[prim1], %[q3], %[q2] \n\t"
- "precr.qb.ph %[prim2], %[q3], %[q2] \n\t"
- "precrq.qb.ph %[prim3], %[q1], %[q0] \n\t"
- "precr.qb.ph %[prim4], %[q1], %[q0] \n\t"
-
- "precrq.qb.ph %[q2], %[prim1], %[prim2] \n\t"
- "precr.qb.ph %[q0], %[prim1], %[prim2] \n\t"
- "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t"
- "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t"
-
- "precrq.ph.w %[q3], %[q2], %[sec3] \n\t"
- "precrq.ph.w %[q1], %[q0], %[sec4] \n\t"
- "append %[q2], %[sec3], 16 \n\t"
- "append %[q0], %[sec4], 16 \n\t"
-
- : [prim1] "=&r"(prim1), [prim2] "=&r"(prim2), [prim3] "=&r"(prim3),
- [prim4] "=&r"(prim4), [q3] "+r"(q3), [q2] "+r"(q2), [q1] "+r"(q1),
- [q0] "+r"(q0), [sec3] "=&r"(sec3), [sec4] "=&r"(sec4)
- :);
-
- filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec, p1, p0,
- p3, p2, q0, q1, q2, q3, &hev, &mask, &flat);
-
- if ((flat == 0) && (mask != 0)) {
- filter1_dspr2(mask, hev, p1, p0, q0, q1, &p1_f0, &p0_f0, &q0_f0, &q1_f0);
- STORE_F0()
- } else if ((mask & flat) == 0xFFFFFFFF) {
- /* left 2 element operation */
- PACK_LEFT_0TO3()
- mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l, &q0_l, &q1_l, &q2_l, &q3_l);
-
- /* right 2 element operation */
- PACK_RIGHT_0TO3()
- mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r, &q0_r, &q1_r, &q2_r, &q3_r);
-
- STORE_F1()
- } else if ((flat != 0) && (mask != 0)) {
- filter1_dspr2(mask, hev, p1, p0, q0, q1, &p1_f0, &p0_f0, &q0_f0, &q1_f0);
-
- /* left 2 element operation */
- PACK_LEFT_0TO3()
- mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l, &q0_l, &q1_l, &q2_l, &q3_l);
-
- /* right 2 element operation */
- PACK_RIGHT_0TO3()
- mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r, &q0_r, &q1_r, &q2_r, &q3_r);
-
- if (mask & flat & 0x000000FF) {
- __asm__ __volatile__(
- "sb %[p2_r], -3(%[s4]) \n\t"
- "sb %[p1_r], -2(%[s4]) \n\t"
- "sb %[p0_r], -1(%[s4]) \n\t"
- "sb %[q0_r], (%[s4]) \n\t"
- "sb %[q1_r], +1(%[s4]) \n\t"
- "sb %[q2_r], +2(%[s4]) \n\t"
-
- :
- : [p2_r] "r"(p2_r), [p1_r] "r"(p1_r), [p0_r] "r"(p0_r),
- [q0_r] "r"(q0_r), [q1_r] "r"(q1_r), [q2_r] "r"(q2_r),
- [s4] "r"(s4));
- } else if (mask & 0x000000FF) {
- __asm__ __volatile__(
- "sb %[p1_f0], -2(%[s4]) \n\t"
- "sb %[p0_f0], -1(%[s4]) \n\t"
- "sb %[q0_f0], (%[s4]) \n\t"
- "sb %[q1_f0], +1(%[s4]) \n\t"
-
- :
- : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
- [q1_f0] "r"(q1_f0), [s4] "r"(s4));
- }
-
- __asm__ __volatile__(
- "srl %[p2_r], %[p2_r], 16 \n\t"
- "srl %[p1_r], %[p1_r], 16 \n\t"
- "srl %[p0_r], %[p0_r], 16 \n\t"
- "srl %[q0_r], %[q0_r], 16 \n\t"
- "srl %[q1_r], %[q1_r], 16 \n\t"
- "srl %[q2_r], %[q2_r], 16 \n\t"
- "srl %[p1_f0], %[p1_f0], 8 \n\t"
- "srl %[p0_f0], %[p0_f0], 8 \n\t"
- "srl %[q0_f0], %[q0_f0], 8 \n\t"
- "srl %[q1_f0], %[q1_f0], 8 \n\t"
-
- : [p2_r] "+r"(p2_r), [p1_r] "+r"(p1_r), [p0_r] "+r"(p0_r),
- [q0_r] "+r"(q0_r), [q1_r] "+r"(q1_r), [q2_r] "+r"(q2_r),
- [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0),
- [q1_f0] "+r"(q1_f0)
- :);
-
- if (mask & flat & 0x0000FF00) {
- __asm__ __volatile__(
- "sb %[p2_r], -3(%[s3]) \n\t"
- "sb %[p1_r], -2(%[s3]) \n\t"
- "sb %[p0_r], -1(%[s3]) \n\t"
- "sb %[q0_r], (%[s3]) \n\t"
- "sb %[q1_r], +1(%[s3]) \n\t"
- "sb %[q2_r], +2(%[s3]) \n\t"
-
- :
- : [p2_r] "r"(p2_r), [p1_r] "r"(p1_r), [p0_r] "r"(p0_r),
- [q0_r] "r"(q0_r), [q1_r] "r"(q1_r), [q2_r] "r"(q2_r),
- [s3] "r"(s3));
- } else if (mask & 0x0000FF00) {
- __asm__ __volatile__(
- "sb %[p1_f0], -2(%[s3]) \n\t"
- "sb %[p0_f0], -1(%[s3]) \n\t"
- "sb %[q0_f0], (%[s3]) \n\t"
- "sb %[q1_f0], +1(%[s3]) \n\t"
-
- :
- : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
- [q1_f0] "r"(q1_f0), [s3] "r"(s3));
- }
-
- __asm__ __volatile__(
- "srl %[p1_f0], %[p1_f0], 8 \n\t"
- "srl %[p0_f0], %[p0_f0], 8 \n\t"
- "srl %[q0_f0], %[q0_f0], 8 \n\t"
- "srl %[q1_f0], %[q1_f0], 8 \n\t"
-
- : [p2] "+r"(p2), [p1] "+r"(p1), [p0] "+r"(p0), [q0] "+r"(q0),
- [q1] "+r"(q1), [q2] "+r"(q2), [p1_f0] "+r"(p1_f0),
- [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0), [q1_f0] "+r"(q1_f0)
- :);
-
- if (mask & flat & 0x00FF0000) {
- __asm__ __volatile__(
- "sb %[p2_l], -3(%[s2]) \n\t"
- "sb %[p1_l], -2(%[s2]) \n\t"
- "sb %[p0_l], -1(%[s2]) \n\t"
- "sb %[q0_l], (%[s2]) \n\t"
- "sb %[q1_l], +1(%[s2]) \n\t"
- "sb %[q2_l], +2(%[s2]) \n\t"
-
- :
- : [p2_l] "r"(p2_l), [p1_l] "r"(p1_l), [p0_l] "r"(p0_l),
- [q0_l] "r"(q0_l), [q1_l] "r"(q1_l), [q2_l] "r"(q2_l),
- [s2] "r"(s2));
- } else if (mask & 0x00FF0000) {
- __asm__ __volatile__(
- "sb %[p1_f0], -2(%[s2]) \n\t"
- "sb %[p0_f0], -1(%[s2]) \n\t"
- "sb %[q0_f0], (%[s2]) \n\t"
- "sb %[q1_f0], +1(%[s2]) \n\t"
-
- :
- : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
- [q1_f0] "r"(q1_f0), [s2] "r"(s2));
- }
-
- __asm__ __volatile__(
- "srl %[p2_l], %[p2_l], 16 \n\t"
- "srl %[p1_l], %[p1_l], 16 \n\t"
- "srl %[p0_l], %[p0_l], 16 \n\t"
- "srl %[q0_l], %[q0_l], 16 \n\t"
- "srl %[q1_l], %[q1_l], 16 \n\t"
- "srl %[q2_l], %[q2_l], 16 \n\t"
- "srl %[p1_f0], %[p1_f0], 8 \n\t"
- "srl %[p0_f0], %[p0_f0], 8 \n\t"
- "srl %[q0_f0], %[q0_f0], 8 \n\t"
- "srl %[q1_f0], %[q1_f0], 8 \n\t"
-
- : [p2_l] "+r"(p2_l), [p1_l] "+r"(p1_l), [p0_l] "+r"(p0_l),
- [q0_l] "+r"(q0_l), [q1_l] "+r"(q1_l), [q2_l] "+r"(q2_l),
- [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0),
- [q1_f0] "+r"(q1_f0)
- :);
-
- if (mask & flat & 0xFF000000) {
- __asm__ __volatile__(
- "sb %[p2_l], -3(%[s1]) \n\t"
- "sb %[p1_l], -2(%[s1]) \n\t"
- "sb %[p0_l], -1(%[s1]) \n\t"
- "sb %[q0_l], (%[s1]) \n\t"
- "sb %[q1_l], +1(%[s1]) \n\t"
- "sb %[q2_l], +2(%[s1]) \n\t"
-
- :
- : [p2_l] "r"(p2_l), [p1_l] "r"(p1_l), [p0_l] "r"(p0_l),
- [q0_l] "r"(q0_l), [q1_l] "r"(q1_l), [q2_l] "r"(q2_l),
- [s1] "r"(s1));
- } else if (mask & 0xFF000000) {
- __asm__ __volatile__(
- "sb %[p1_f0], -2(%[s1]) \n\t"
- "sb %[p0_f0], -1(%[s1]) \n\t"
- "sb %[q0_f0], (%[s1]) \n\t"
- "sb %[q1_f0], +1(%[s1]) \n\t"
-
- :
- : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
- [q1_f0] "r"(q1_f0), [s1] "r"(s1));
- }
- }
- }
-}
-#endif // #if HAVE_DSPR2
diff --git a/third_party/aom/aom_dsp/mips/loopfilter_mb_horiz_dspr2.c b/third_party/aom/aom_dsp/mips/loopfilter_mb_horiz_dspr2.c
deleted file mode 100644
index 34733e42e..000000000
--- a/third_party/aom/aom_dsp/mips/loopfilter_mb_horiz_dspr2.c
+++ /dev/null
@@ -1,734 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <stdlib.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom/aom_integer.h"
-#include "aom_dsp/mips/common_dspr2.h"
-#include "aom_dsp/mips/loopfilter_filters_dspr2.h"
-#include "aom_dsp/mips/loopfilter_macros_dspr2.h"
-#include "aom_dsp/mips/loopfilter_masks_dspr2.h"
-#include "aom_mem/aom_mem.h"
-
-#if HAVE_DSPR2
-static void mb_lpf_horizontal_edge(unsigned char *s, int pitch,
- const uint8_t *blimit, const uint8_t *limit,
- const uint8_t *thresh, int count) {
- uint32_t mask;
- uint32_t hev, flat, flat2;
- uint8_t i;
- uint8_t *sp7, *sp6, *sp5, *sp4, *sp3, *sp2, *sp1, *sp0;
- uint8_t *sq0, *sq1, *sq2, *sq3, *sq4, *sq5, *sq6, *sq7;
- uint32_t thresh_vec, flimit_vec, limit_vec;
- uint32_t uflimit, ulimit, uthresh;
- uint32_t p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7;
- uint32_t p1_f0, p0_f0, q0_f0, q1_f0;
- uint32_t p7_l, p6_l, p5_l, p4_l, p3_l, p2_l, p1_l, p0_l;
- uint32_t q0_l, q1_l, q2_l, q3_l, q4_l, q5_l, q6_l, q7_l;
- uint32_t p7_r, p6_r, p5_r, p4_r, p3_r, p2_r, p1_r, p0_r;
- uint32_t q0_r, q1_r, q2_r, q3_r, q4_r, q5_r, q6_r, q7_r;
- uint32_t p2_l_f1, p1_l_f1, p0_l_f1, p2_r_f1, p1_r_f1, p0_r_f1;
- uint32_t q0_l_f1, q1_l_f1, q2_l_f1, q0_r_f1, q1_r_f1, q2_r_f1;
-
- uflimit = *blimit;
- ulimit = *limit;
- uthresh = *thresh;
-
- /* create quad-byte */
- __asm__ __volatile__(
- "replv.qb %[thresh_vec], %[uthresh] \n\t"
- "replv.qb %[flimit_vec], %[uflimit] \n\t"
- "replv.qb %[limit_vec], %[ulimit] \n\t"
-
- : [thresh_vec] "=&r"(thresh_vec), [flimit_vec] "=&r"(flimit_vec),
- [limit_vec] "=r"(limit_vec)
- : [uthresh] "r"(uthresh), [uflimit] "r"(uflimit), [ulimit] "r"(ulimit));
-
- /* prefetch data for store */
- prefetch_store(s);
-
- for (i = 0; i < (2 * count); i++) {
- sp7 = s - (pitch << 3);
- sp6 = sp7 + pitch;
- sp5 = sp6 + pitch;
- sp4 = sp5 + pitch;
- sp3 = sp4 + pitch;
- sp2 = sp3 + pitch;
- sp1 = sp2 + pitch;
- sp0 = sp1 + pitch;
- sq0 = s;
- sq1 = s + pitch;
- sq2 = sq1 + pitch;
- sq3 = sq2 + pitch;
- sq4 = sq3 + pitch;
- sq5 = sq4 + pitch;
- sq6 = sq5 + pitch;
- sq7 = sq6 + pitch;
-
- __asm__ __volatile__(
- "lw %[p7], (%[sp7]) \n\t"
- "lw %[p6], (%[sp6]) \n\t"
- "lw %[p5], (%[sp5]) \n\t"
- "lw %[p4], (%[sp4]) \n\t"
- "lw %[p3], (%[sp3]) \n\t"
- "lw %[p2], (%[sp2]) \n\t"
- "lw %[p1], (%[sp1]) \n\t"
- "lw %[p0], (%[sp0]) \n\t"
-
- : [p3] "=&r"(p3), [p2] "=&r"(p2), [p1] "=&r"(p1), [p0] "=&r"(p0),
- [p7] "=&r"(p7), [p6] "=&r"(p6), [p5] "=&r"(p5), [p4] "=&r"(p4)
- : [sp3] "r"(sp3), [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0),
- [sp4] "r"(sp4), [sp5] "r"(sp5), [sp6] "r"(sp6), [sp7] "r"(sp7));
-
- __asm__ __volatile__(
- "lw %[q0], (%[sq0]) \n\t"
- "lw %[q1], (%[sq1]) \n\t"
- "lw %[q2], (%[sq2]) \n\t"
- "lw %[q3], (%[sq3]) \n\t"
- "lw %[q4], (%[sq4]) \n\t"
- "lw %[q5], (%[sq5]) \n\t"
- "lw %[q6], (%[sq6]) \n\t"
- "lw %[q7], (%[sq7]) \n\t"
-
- : [q3] "=&r"(q3), [q2] "=&r"(q2), [q1] "=&r"(q1), [q0] "=&r"(q0),
- [q7] "=&r"(q7), [q6] "=&r"(q6), [q5] "=&r"(q5), [q4] "=&r"(q4)
- : [sq3] "r"(sq3), [sq2] "r"(sq2), [sq1] "r"(sq1), [sq0] "r"(sq0),
- [sq4] "r"(sq4), [sq5] "r"(sq5), [sq6] "r"(sq6), [sq7] "r"(sq7));
-
- filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec, p1, p0,
- p3, p2, q0, q1, q2, q3, &hev, &mask, &flat);
-
- flatmask5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, &flat2);
-
- /* f0 */
- if (((flat2 == 0) && (flat == 0) && (mask != 0)) ||
- ((flat2 != 0) && (flat == 0) && (mask != 0))) {
- filter1_dspr2(mask, hev, p1, p0, q0, q1, &p1_f0, &p0_f0, &q0_f0, &q1_f0);
-
- __asm__ __volatile__(
- "sw %[p1_f0], (%[sp1]) \n\t"
- "sw %[p0_f0], (%[sp0]) \n\t"
- "sw %[q0_f0], (%[sq0]) \n\t"
- "sw %[q1_f0], (%[sq1]) \n\t"
-
- :
- : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
- [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0),
- [sq1] "r"(sq1));
- } else if ((flat2 == 0XFFFFFFFF) && (flat == 0xFFFFFFFF) &&
- (mask == 0xFFFFFFFF)) {
- /* f2 */
- PACK_LEFT_0TO3()
- PACK_LEFT_4TO7()
- wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l, &p3_l, &p2_l, &p1_l,
- &p0_l, &q0_l, &q1_l, &q2_l, &q3_l, &q4_l, &q5_l,
- &q6_l, &q7_l);
-
- PACK_RIGHT_0TO3()
- PACK_RIGHT_4TO7()
- wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r, &p3_r, &p2_r, &p1_r,
- &p0_r, &q0_r, &q1_r, &q2_r, &q3_r, &q4_r, &q5_r,
- &q6_r, &q7_r);
-
- COMBINE_LEFT_RIGHT_0TO2()
- COMBINE_LEFT_RIGHT_3TO6()
-
- __asm__ __volatile__(
- "sw %[p6], (%[sp6]) \n\t"
- "sw %[p5], (%[sp5]) \n\t"
- "sw %[p4], (%[sp4]) \n\t"
- "sw %[p3], (%[sp3]) \n\t"
- "sw %[p2], (%[sp2]) \n\t"
- "sw %[p1], (%[sp1]) \n\t"
- "sw %[p0], (%[sp0]) \n\t"
-
- :
- : [p6] "r"(p6), [p5] "r"(p5), [p4] "r"(p4), [p3] "r"(p3),
- [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0), [sp6] "r"(sp6),
- [sp5] "r"(sp5), [sp4] "r"(sp4), [sp3] "r"(sp3), [sp2] "r"(sp2),
- [sp1] "r"(sp1), [sp0] "r"(sp0));
-
- __asm__ __volatile__(
- "sw %[q6], (%[sq6]) \n\t"
- "sw %[q5], (%[sq5]) \n\t"
- "sw %[q4], (%[sq4]) \n\t"
- "sw %[q3], (%[sq3]) \n\t"
- "sw %[q2], (%[sq2]) \n\t"
- "sw %[q1], (%[sq1]) \n\t"
- "sw %[q0], (%[sq0]) \n\t"
-
- :
- : [q6] "r"(q6), [q5] "r"(q5), [q4] "r"(q4), [q3] "r"(q3),
- [q2] "r"(q2), [q1] "r"(q1), [q0] "r"(q0), [sq6] "r"(sq6),
- [sq5] "r"(sq5), [sq4] "r"(sq4), [sq3] "r"(sq3), [sq2] "r"(sq2),
- [sq1] "r"(sq1), [sq0] "r"(sq0));
- } else if ((flat2 == 0) && (flat == 0xFFFFFFFF) && (mask == 0xFFFFFFFF)) {
- /* f1 */
- /* left 2 element operation */
- PACK_LEFT_0TO3()
- mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l, &q0_l, &q1_l, &q2_l, &q3_l);
-
- /* right 2 element operation */
- PACK_RIGHT_0TO3()
- mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r, &q0_r, &q1_r, &q2_r, &q3_r);
-
- COMBINE_LEFT_RIGHT_0TO2()
-
- __asm__ __volatile__(
- "sw %[p2], (%[sp2]) \n\t"
- "sw %[p1], (%[sp1]) \n\t"
- "sw %[p0], (%[sp0]) \n\t"
- "sw %[q0], (%[sq0]) \n\t"
- "sw %[q1], (%[sq1]) \n\t"
- "sw %[q2], (%[sq2]) \n\t"
-
- :
- : [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0), [q0] "r"(q0),
- [q1] "r"(q1), [q2] "r"(q2), [sp2] "r"(sp2), [sp1] "r"(sp1),
- [sp0] "r"(sp0), [sq0] "r"(sq0), [sq1] "r"(sq1), [sq2] "r"(sq2));
- } else if ((flat2 == 0) && (flat != 0) && (mask != 0)) {
- /* f0+f1 */
- filter1_dspr2(mask, hev, p1, p0, q0, q1, &p1_f0, &p0_f0, &q0_f0, &q1_f0);
-
- /* left 2 element operation */
- PACK_LEFT_0TO3()
- mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l, &q0_l, &q1_l, &q2_l, &q3_l);
-
- /* right 2 element operation */
- PACK_RIGHT_0TO3()
- mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r, &q0_r, &q1_r, &q2_r, &q3_r);
-
- if (mask & flat & 0x000000FF) {
- __asm__ __volatile__(
- "sb %[p2_r], (%[sp2]) \n\t"
- "sb %[p1_r], (%[sp1]) \n\t"
- "sb %[p0_r], (%[sp0]) \n\t"
- "sb %[q0_r], (%[sq0]) \n\t"
- "sb %[q1_r], (%[sq1]) \n\t"
- "sb %[q2_r], (%[sq2]) \n\t"
-
- :
- : [p2_r] "r"(p2_r), [p1_r] "r"(p1_r), [p0_r] "r"(p0_r),
- [q0_r] "r"(q0_r), [q1_r] "r"(q1_r), [q2_r] "r"(q2_r),
- [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0),
- [sq1] "r"(sq1), [sq2] "r"(sq2));
- } else if (mask & 0x000000FF) {
- __asm__ __volatile__(
- "sb %[p1_f0], (%[sp1]) \n\t"
- "sb %[p0_f0], (%[sp0]) \n\t"
- "sb %[q0_f0], (%[sq0]) \n\t"
- "sb %[q1_f0], (%[sq1]) \n\t"
-
- :
- : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
- [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0),
- [sq0] "r"(sq0), [sq1] "r"(sq1));
- }
-
- __asm__ __volatile__(
- "srl %[p2_r], %[p2_r], 16 \n\t"
- "srl %[p1_r], %[p1_r], 16 \n\t"
- "srl %[p0_r], %[p0_r], 16 \n\t"
- "srl %[q0_r], %[q0_r], 16 \n\t"
- "srl %[q1_r], %[q1_r], 16 \n\t"
- "srl %[q2_r], %[q2_r], 16 \n\t"
- "srl %[p1_f0], %[p1_f0], 8 \n\t"
- "srl %[p0_f0], %[p0_f0], 8 \n\t"
- "srl %[q0_f0], %[q0_f0], 8 \n\t"
- "srl %[q1_f0], %[q1_f0], 8 \n\t"
-
- : [p2_r] "+r"(p2_r), [p1_r] "+r"(p1_r), [p0_r] "+r"(p0_r),
- [q0_r] "+r"(q0_r), [q1_r] "+r"(q1_r), [q2_r] "+r"(q2_r),
- [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0),
- [q1_f0] "+r"(q1_f0)
- :);
-
- if (mask & flat & 0x0000FF00) {
- __asm__ __volatile__(
- "sb %[p2_r], +1(%[sp2]) \n\t"
- "sb %[p1_r], +1(%[sp1]) \n\t"
- "sb %[p0_r], +1(%[sp0]) \n\t"
- "sb %[q0_r], +1(%[sq0]) \n\t"
- "sb %[q1_r], +1(%[sq1]) \n\t"
- "sb %[q2_r], +1(%[sq2]) \n\t"
-
- :
- : [p2_r] "r"(p2_r), [p1_r] "r"(p1_r), [p0_r] "r"(p0_r),
- [q0_r] "r"(q0_r), [q1_r] "r"(q1_r), [q2_r] "r"(q2_r),
- [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0),
- [sq1] "r"(sq1), [sq2] "r"(sq2));
- } else if (mask & 0x0000FF00) {
- __asm__ __volatile__(
- "sb %[p1_f0], +1(%[sp1]) \n\t"
- "sb %[p0_f0], +1(%[sp0]) \n\t"
- "sb %[q0_f0], +1(%[sq0]) \n\t"
- "sb %[q1_f0], +1(%[sq1]) \n\t"
-
- :
- : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
- [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0),
- [sq0] "r"(sq0), [sq1] "r"(sq1));
- }
-
- __asm__ __volatile__(
- "srl %[p1_f0], %[p1_f0], 8 \n\t"
- "srl %[p0_f0], %[p0_f0], 8 \n\t"
- "srl %[q0_f0], %[q0_f0], 8 \n\t"
- "srl %[q1_f0], %[q1_f0], 8 \n\t"
-
- : [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0),
- [q1_f0] "+r"(q1_f0)
- :);
-
- if (mask & flat & 0x00FF0000) {
- __asm__ __volatile__(
- "sb %[p2_l], +2(%[sp2]) \n\t"
- "sb %[p1_l], +2(%[sp1]) \n\t"
- "sb %[p0_l], +2(%[sp0]) \n\t"
- "sb %[q0_l], +2(%[sq0]) \n\t"
- "sb %[q1_l], +2(%[sq1]) \n\t"
- "sb %[q2_l], +2(%[sq2]) \n\t"
-
- :
- : [p2_l] "r"(p2_l), [p1_l] "r"(p1_l), [p0_l] "r"(p0_l),
- [q0_l] "r"(q0_l), [q1_l] "r"(q1_l), [q2_l] "r"(q2_l),
- [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0),
- [sq1] "r"(sq1), [sq2] "r"(sq2));
- } else if (mask & 0x00FF0000) {
- __asm__ __volatile__(
- "sb %[p1_f0], +2(%[sp1]) \n\t"
- "sb %[p0_f0], +2(%[sp0]) \n\t"
- "sb %[q0_f0], +2(%[sq0]) \n\t"
- "sb %[q1_f0], +2(%[sq1]) \n\t"
-
- :
- : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
- [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0),
- [sq0] "r"(sq0), [sq1] "r"(sq1));
- }
-
- __asm__ __volatile__(
- "srl %[p2_l], %[p2_l], 16 \n\t"
- "srl %[p1_l], %[p1_l], 16 \n\t"
- "srl %[p0_l], %[p0_l], 16 \n\t"
- "srl %[q0_l], %[q0_l], 16 \n\t"
- "srl %[q1_l], %[q1_l], 16 \n\t"
- "srl %[q2_l], %[q2_l], 16 \n\t"
- "srl %[p1_f0], %[p1_f0], 8 \n\t"
- "srl %[p0_f0], %[p0_f0], 8 \n\t"
- "srl %[q0_f0], %[q0_f0], 8 \n\t"
- "srl %[q1_f0], %[q1_f0], 8 \n\t"
-
- : [p2_l] "+r"(p2_l), [p1_l] "+r"(p1_l), [p0_l] "+r"(p0_l),
- [q0_l] "+r"(q0_l), [q1_l] "+r"(q1_l), [q2_l] "+r"(q2_l),
- [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0),
- [q1_f0] "+r"(q1_f0)
- :);
-
- if (mask & flat & 0xFF000000) {
- __asm__ __volatile__(
- "sb %[p2_l], +3(%[sp2]) \n\t"
- "sb %[p1_l], +3(%[sp1]) \n\t"
- "sb %[p0_l], +3(%[sp0]) \n\t"
- "sb %[q0_l], +3(%[sq0]) \n\t"
- "sb %[q1_l], +3(%[sq1]) \n\t"
- "sb %[q2_l], +3(%[sq2]) \n\t"
-
- :
- : [p2_l] "r"(p2_l), [p1_l] "r"(p1_l), [p0_l] "r"(p0_l),
- [q0_l] "r"(q0_l), [q1_l] "r"(q1_l), [q2_l] "r"(q2_l),
- [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0),
- [sq1] "r"(sq1), [sq2] "r"(sq2));
- } else if (mask & 0xFF000000) {
- __asm__ __volatile__(
- "sb %[p1_f0], +3(%[sp1]) \n\t"
- "sb %[p0_f0], +3(%[sp0]) \n\t"
- "sb %[q0_f0], +3(%[sq0]) \n\t"
- "sb %[q1_f0], +3(%[sq1]) \n\t"
-
- :
- : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
- [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0),
- [sq0] "r"(sq0), [sq1] "r"(sq1));
- }
- } else if ((flat2 != 0) && (flat != 0) && (mask != 0)) {
- /* f0 + f1 + f2 */
- /* f0 function */
- filter1_dspr2(mask, hev, p1, p0, q0, q1, &p1_f0, &p0_f0, &q0_f0, &q1_f0);
-
- /* f1 function */
- /* left 2 element operation */
- PACK_LEFT_0TO3()
- mbfilter1_dspr2(p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l, &p2_l_f1,
- &p1_l_f1, &p0_l_f1, &q0_l_f1, &q1_l_f1, &q2_l_f1);
-
- /* right 2 element operation */
- PACK_RIGHT_0TO3()
- mbfilter1_dspr2(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, &p2_r_f1,
- &p1_r_f1, &p0_r_f1, &q0_r_f1, &q1_r_f1, &q2_r_f1);
-
- /* f2 function */
- PACK_LEFT_4TO7()
- wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l, &p3_l, &p2_l, &p1_l,
- &p0_l, &q0_l, &q1_l, &q2_l, &q3_l, &q4_l, &q5_l,
- &q6_l, &q7_l);
-
- PACK_RIGHT_4TO7()
- wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r, &p3_r, &p2_r, &p1_r,
- &p0_r, &q0_r, &q1_r, &q2_r, &q3_r, &q4_r, &q5_r,
- &q6_r, &q7_r);
-
- if (mask & flat & flat2 & 0x000000FF) {
- __asm__ __volatile__(
- "sb %[p6_r], (%[sp6]) \n\t"
- "sb %[p5_r], (%[sp5]) \n\t"
- "sb %[p4_r], (%[sp4]) \n\t"
- "sb %[p3_r], (%[sp3]) \n\t"
- "sb %[p2_r], (%[sp2]) \n\t"
- "sb %[p1_r], (%[sp1]) \n\t"
- "sb %[p0_r], (%[sp0]) \n\t"
-
- :
- : [p6_r] "r"(p6_r), [p5_r] "r"(p5_r), [p4_r] "r"(p4_r),
- [p3_r] "r"(p3_r), [p2_r] "r"(p2_r), [p1_r] "r"(p1_r),
- [sp6] "r"(sp6), [sp5] "r"(sp5), [sp4] "r"(sp4), [sp3] "r"(sp3),
- [sp2] "r"(sp2), [sp1] "r"(sp1), [p0_r] "r"(p0_r), [sp0] "r"(sp0));
-
- __asm__ __volatile__(
- "sb %[q0_r], (%[sq0]) \n\t"
- "sb %[q1_r], (%[sq1]) \n\t"
- "sb %[q2_r], (%[sq2]) \n\t"
- "sb %[q3_r], (%[sq3]) \n\t"
- "sb %[q4_r], (%[sq4]) \n\t"
- "sb %[q5_r], (%[sq5]) \n\t"
- "sb %[q6_r], (%[sq6]) \n\t"
-
- :
- : [q0_r] "r"(q0_r), [q1_r] "r"(q1_r), [q2_r] "r"(q2_r),
- [q3_r] "r"(q3_r), [q4_r] "r"(q4_r), [q5_r] "r"(q5_r),
- [q6_r] "r"(q6_r), [sq0] "r"(sq0), [sq1] "r"(sq1), [sq2] "r"(sq2),
- [sq3] "r"(sq3), [sq4] "r"(sq4), [sq5] "r"(sq5), [sq6] "r"(sq6));
- } else if (mask & flat & 0x000000FF) {
- __asm__ __volatile__(
- "sb %[p2_r_f1], (%[sp2]) \n\t"
- "sb %[p1_r_f1], (%[sp1]) \n\t"
- "sb %[p0_r_f1], (%[sp0]) \n\t"
- "sb %[q0_r_f1], (%[sq0]) \n\t"
- "sb %[q1_r_f1], (%[sq1]) \n\t"
- "sb %[q2_r_f1], (%[sq2]) \n\t"
-
- :
- : [p2_r_f1] "r"(p2_r_f1), [p1_r_f1] "r"(p1_r_f1),
- [p0_r_f1] "r"(p0_r_f1), [q0_r_f1] "r"(q0_r_f1),
- [q1_r_f1] "r"(q1_r_f1), [q2_r_f1] "r"(q2_r_f1), [sp2] "r"(sp2),
- [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0), [sq1] "r"(sq1),
- [sq2] "r"(sq2));
- } else if (mask & 0x000000FF) {
- __asm__ __volatile__(
- "sb %[p1_f0], (%[sp1]) \n\t"
- "sb %[p0_f0], (%[sp0]) \n\t"
- "sb %[q0_f0], (%[sq0]) \n\t"
- "sb %[q1_f0], (%[sq1]) \n\t"
-
- :
- : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
- [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0),
- [sq0] "r"(sq0), [sq1] "r"(sq1));
- }
-
- __asm__ __volatile__(
- "srl %[p6_r], %[p6_r], 16 \n\t"
- "srl %[p5_r], %[p5_r], 16 \n\t"
- "srl %[p4_r], %[p4_r], 16 \n\t"
- "srl %[p3_r], %[p3_r], 16 \n\t"
- "srl %[p2_r], %[p2_r], 16 \n\t"
- "srl %[p1_r], %[p1_r], 16 \n\t"
- "srl %[p0_r], %[p0_r], 16 \n\t"
- "srl %[q0_r], %[q0_r], 16 \n\t"
- "srl %[q1_r], %[q1_r], 16 \n\t"
- "srl %[q2_r], %[q2_r], 16 \n\t"
- "srl %[q3_r], %[q3_r], 16 \n\t"
- "srl %[q4_r], %[q4_r], 16 \n\t"
- "srl %[q5_r], %[q5_r], 16 \n\t"
- "srl %[q6_r], %[q6_r], 16 \n\t"
-
- : [q0_r] "+r"(q0_r), [q1_r] "+r"(q1_r), [q2_r] "+r"(q2_r),
- [q3_r] "+r"(q3_r), [q4_r] "+r"(q4_r), [q5_r] "+r"(q5_r),
- [p6_r] "+r"(p6_r), [p5_r] "+r"(p5_r), [p4_r] "+r"(p4_r),
- [p3_r] "+r"(p3_r), [p2_r] "+r"(p2_r), [p1_r] "+r"(p1_r),
- [q6_r] "+r"(q6_r), [p0_r] "+r"(p0_r)
- :);
-
- __asm__ __volatile__(
- "srl %[p2_r_f1], %[p2_r_f1], 16 \n\t"
- "srl %[p1_r_f1], %[p1_r_f1], 16 \n\t"
- "srl %[p0_r_f1], %[p0_r_f1], 16 \n\t"
- "srl %[q0_r_f1], %[q0_r_f1], 16 \n\t"
- "srl %[q1_r_f1], %[q1_r_f1], 16 \n\t"
- "srl %[q2_r_f1], %[q2_r_f1], 16 \n\t"
- "srl %[p1_f0], %[p1_f0], 8 \n\t"
- "srl %[p0_f0], %[p0_f0], 8 \n\t"
- "srl %[q0_f0], %[q0_f0], 8 \n\t"
- "srl %[q1_f0], %[q1_f0], 8 \n\t"
-
- : [p2_r_f1] "+r"(p2_r_f1), [p1_r_f1] "+r"(p1_r_f1),
- [p0_r_f1] "+r"(p0_r_f1), [q0_r_f1] "+r"(q0_r_f1),
- [q1_r_f1] "+r"(q1_r_f1), [q2_r_f1] "+r"(q2_r_f1),
- [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0),
- [q1_f0] "+r"(q1_f0)
- :);
-
- if (mask & flat & flat2 & 0x0000FF00) {
- __asm__ __volatile__(
- "sb %[p6_r], +1(%[sp6]) \n\t"
- "sb %[p5_r], +1(%[sp5]) \n\t"
- "sb %[p4_r], +1(%[sp4]) \n\t"
- "sb %[p3_r], +1(%[sp3]) \n\t"
- "sb %[p2_r], +1(%[sp2]) \n\t"
- "sb %[p1_r], +1(%[sp1]) \n\t"
- "sb %[p0_r], +1(%[sp0]) \n\t"
-
- :
- : [p6_r] "r"(p6_r), [p5_r] "r"(p5_r), [p4_r] "r"(p4_r),
- [p3_r] "r"(p3_r), [p2_r] "r"(p2_r), [p1_r] "r"(p1_r),
- [p0_r] "r"(p0_r), [sp6] "r"(sp6), [sp5] "r"(sp5), [sp4] "r"(sp4),
- [sp3] "r"(sp3), [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0));
-
- __asm__ __volatile__(
- "sb %[q0_r], +1(%[sq0]) \n\t"
- "sb %[q1_r], +1(%[sq1]) \n\t"
- "sb %[q2_r], +1(%[sq2]) \n\t"
- "sb %[q3_r], +1(%[sq3]) \n\t"
- "sb %[q4_r], +1(%[sq4]) \n\t"
- "sb %[q5_r], +1(%[sq5]) \n\t"
- "sb %[q6_r], +1(%[sq6]) \n\t"
-
- :
- : [q0_r] "r"(q0_r), [q1_r] "r"(q1_r), [q2_r] "r"(q2_r),
- [q3_r] "r"(q3_r), [q4_r] "r"(q4_r), [q5_r] "r"(q5_r),
- [q6_r] "r"(q6_r), [sq0] "r"(sq0), [sq1] "r"(sq1), [sq2] "r"(sq2),
- [sq3] "r"(sq3), [sq4] "r"(sq4), [sq5] "r"(sq5), [sq6] "r"(sq6));
- } else if (mask & flat & 0x0000FF00) {
- __asm__ __volatile__(
- "sb %[p2_r_f1], +1(%[sp2]) \n\t"
- "sb %[p1_r_f1], +1(%[sp1]) \n\t"
- "sb %[p0_r_f1], +1(%[sp0]) \n\t"
- "sb %[q0_r_f1], +1(%[sq0]) \n\t"
- "sb %[q1_r_f1], +1(%[sq1]) \n\t"
- "sb %[q2_r_f1], +1(%[sq2]) \n\t"
-
- :
- : [p2_r_f1] "r"(p2_r_f1), [p1_r_f1] "r"(p1_r_f1),
- [p0_r_f1] "r"(p0_r_f1), [q0_r_f1] "r"(q0_r_f1),
- [q1_r_f1] "r"(q1_r_f1), [q2_r_f1] "r"(q2_r_f1), [sp2] "r"(sp2),
- [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0), [sq1] "r"(sq1),
- [sq2] "r"(sq2));
- } else if (mask & 0x0000FF00) {
- __asm__ __volatile__(
- "sb %[p1_f0], +1(%[sp1]) \n\t"
- "sb %[p0_f0], +1(%[sp0]) \n\t"
- "sb %[q0_f0], +1(%[sq0]) \n\t"
- "sb %[q1_f0], +1(%[sq1]) \n\t"
-
- :
- : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
- [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0),
- [sq0] "r"(sq0), [sq1] "r"(sq1));
- }
-
- __asm__ __volatile__(
- "srl %[p1_f0], %[p1_f0], 8 \n\t"
- "srl %[p0_f0], %[p0_f0], 8 \n\t"
- "srl %[q0_f0], %[q0_f0], 8 \n\t"
- "srl %[q1_f0], %[q1_f0], 8 \n\t"
-
- : [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0),
- [q1_f0] "+r"(q1_f0)
- :);
-
- if (mask & flat & flat2 & 0x00FF0000) {
- __asm__ __volatile__(
- "sb %[p6_l], +2(%[sp6]) \n\t"
- "sb %[p5_l], +2(%[sp5]) \n\t"
- "sb %[p4_l], +2(%[sp4]) \n\t"
- "sb %[p3_l], +2(%[sp3]) \n\t"
- "sb %[p2_l], +2(%[sp2]) \n\t"
- "sb %[p1_l], +2(%[sp1]) \n\t"
- "sb %[p0_l], +2(%[sp0]) \n\t"
-
- :
- : [p6_l] "r"(p6_l), [p5_l] "r"(p5_l), [p4_l] "r"(p4_l),
- [p3_l] "r"(p3_l), [p2_l] "r"(p2_l), [p1_l] "r"(p1_l),
- [p0_l] "r"(p0_l), [sp6] "r"(sp6), [sp5] "r"(sp5), [sp4] "r"(sp4),
- [sp3] "r"(sp3), [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0));
-
- __asm__ __volatile__(
- "sb %[q0_l], +2(%[sq0]) \n\t"
- "sb %[q1_l], +2(%[sq1]) \n\t"
- "sb %[q2_l], +2(%[sq2]) \n\t"
- "sb %[q3_l], +2(%[sq3]) \n\t"
- "sb %[q4_l], +2(%[sq4]) \n\t"
- "sb %[q5_l], +2(%[sq5]) \n\t"
- "sb %[q6_l], +2(%[sq6]) \n\t"
-
- :
- : [q0_l] "r"(q0_l), [q1_l] "r"(q1_l), [q2_l] "r"(q2_l),
- [q3_l] "r"(q3_l), [q4_l] "r"(q4_l), [q5_l] "r"(q5_l),
- [q6_l] "r"(q6_l), [sq0] "r"(sq0), [sq1] "r"(sq1), [sq2] "r"(sq2),
- [sq3] "r"(sq3), [sq4] "r"(sq4), [sq5] "r"(sq5), [sq6] "r"(sq6));
- } else if (mask & flat & 0x00FF0000) {
- __asm__ __volatile__(
- "sb %[p2_l_f1], +2(%[sp2]) \n\t"
- "sb %[p1_l_f1], +2(%[sp1]) \n\t"
- "sb %[p0_l_f1], +2(%[sp0]) \n\t"
- "sb %[q0_l_f1], +2(%[sq0]) \n\t"
- "sb %[q1_l_f1], +2(%[sq1]) \n\t"
- "sb %[q2_l_f1], +2(%[sq2]) \n\t"
-
- :
- : [p2_l_f1] "r"(p2_l_f1), [p1_l_f1] "r"(p1_l_f1),
- [p0_l_f1] "r"(p0_l_f1), [q0_l_f1] "r"(q0_l_f1),
- [q1_l_f1] "r"(q1_l_f1), [q2_l_f1] "r"(q2_l_f1), [sp2] "r"(sp2),
- [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0), [sq1] "r"(sq1),
- [sq2] "r"(sq2));
- } else if (mask & 0x00FF0000) {
- __asm__ __volatile__(
- "sb %[p1_f0], +2(%[sp1]) \n\t"
- "sb %[p0_f0], +2(%[sp0]) \n\t"
- "sb %[q0_f0], +2(%[sq0]) \n\t"
- "sb %[q1_f0], +2(%[sq1]) \n\t"
-
- :
- : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
- [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0),
- [sq0] "r"(sq0), [sq1] "r"(sq1));
- }
-
- __asm__ __volatile__(
- "srl %[p6_l], %[p6_l], 16 \n\t"
- "srl %[p5_l], %[p5_l], 16 \n\t"
- "srl %[p4_l], %[p4_l], 16 \n\t"
- "srl %[p3_l], %[p3_l], 16 \n\t"
- "srl %[p2_l], %[p2_l], 16 \n\t"
- "srl %[p1_l], %[p1_l], 16 \n\t"
- "srl %[p0_l], %[p0_l], 16 \n\t"
- "srl %[q0_l], %[q0_l], 16 \n\t"
- "srl %[q1_l], %[q1_l], 16 \n\t"
- "srl %[q2_l], %[q2_l], 16 \n\t"
- "srl %[q3_l], %[q3_l], 16 \n\t"
- "srl %[q4_l], %[q4_l], 16 \n\t"
- "srl %[q5_l], %[q5_l], 16 \n\t"
- "srl %[q6_l], %[q6_l], 16 \n\t"
-
- : [q0_l] "+r"(q0_l), [q1_l] "+r"(q1_l), [q2_l] "+r"(q2_l),
- [q3_l] "+r"(q3_l), [q4_l] "+r"(q4_l), [q5_l] "+r"(q5_l),
- [q6_l] "+r"(q6_l), [p6_l] "+r"(p6_l), [p5_l] "+r"(p5_l),
- [p4_l] "+r"(p4_l), [p3_l] "+r"(p3_l), [p2_l] "+r"(p2_l),
- [p1_l] "+r"(p1_l), [p0_l] "+r"(p0_l)
- :);
-
- __asm__ __volatile__(
- "srl %[p2_l_f1], %[p2_l_f1], 16 \n\t"
- "srl %[p1_l_f1], %[p1_l_f1], 16 \n\t"
- "srl %[p0_l_f1], %[p0_l_f1], 16 \n\t"
- "srl %[q0_l_f1], %[q0_l_f1], 16 \n\t"
- "srl %[q1_l_f1], %[q1_l_f1], 16 \n\t"
- "srl %[q2_l_f1], %[q2_l_f1], 16 \n\t"
- "srl %[p1_f0], %[p1_f0], 8 \n\t"
- "srl %[p0_f0], %[p0_f0], 8 \n\t"
- "srl %[q0_f0], %[q0_f0], 8 \n\t"
- "srl %[q1_f0], %[q1_f0], 8 \n\t"
-
- : [p2_l_f1] "+r"(p2_l_f1), [p1_l_f1] "+r"(p1_l_f1),
- [p0_l_f1] "+r"(p0_l_f1), [q0_l_f1] "+r"(q0_l_f1),
- [q1_l_f1] "+r"(q1_l_f1), [q2_l_f1] "+r"(q2_l_f1),
- [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0),
- [q1_f0] "+r"(q1_f0)
- :);
-
- if (mask & flat & flat2 & 0xFF000000) {
- __asm__ __volatile__(
- "sb %[p6_l], +3(%[sp6]) \n\t"
- "sb %[p5_l], +3(%[sp5]) \n\t"
- "sb %[p4_l], +3(%[sp4]) \n\t"
- "sb %[p3_l], +3(%[sp3]) \n\t"
- "sb %[p2_l], +3(%[sp2]) \n\t"
- "sb %[p1_l], +3(%[sp1]) \n\t"
- "sb %[p0_l], +3(%[sp0]) \n\t"
-
- :
- : [p6_l] "r"(p6_l), [p5_l] "r"(p5_l), [p4_l] "r"(p4_l),
- [p3_l] "r"(p3_l), [p2_l] "r"(p2_l), [p1_l] "r"(p1_l),
- [p0_l] "r"(p0_l), [sp6] "r"(sp6), [sp5] "r"(sp5), [sp4] "r"(sp4),
- [sp3] "r"(sp3), [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0));
-
- __asm__ __volatile__(
- "sb %[q0_l], +3(%[sq0]) \n\t"
- "sb %[q1_l], +3(%[sq1]) \n\t"
- "sb %[q2_l], +3(%[sq2]) \n\t"
- "sb %[q3_l], +3(%[sq3]) \n\t"
- "sb %[q4_l], +3(%[sq4]) \n\t"
- "sb %[q5_l], +3(%[sq5]) \n\t"
- "sb %[q6_l], +3(%[sq6]) \n\t"
-
- :
- : [q0_l] "r"(q0_l), [q1_l] "r"(q1_l), [q2_l] "r"(q2_l),
- [q3_l] "r"(q3_l), [q4_l] "r"(q4_l), [q5_l] "r"(q5_l),
- [sq0] "r"(sq0), [sq1] "r"(sq1), [sq2] "r"(sq2), [sq3] "r"(sq3),
- [sq4] "r"(sq4), [sq5] "r"(sq5), [q6_l] "r"(q6_l), [sq6] "r"(sq6));
- } else if (mask & flat & 0xFF000000) {
- __asm__ __volatile__(
- "sb %[p2_l_f1], +3(%[sp2]) \n\t"
- "sb %[p1_l_f1], +3(%[sp1]) \n\t"
- "sb %[p0_l_f1], +3(%[sp0]) \n\t"
- "sb %[q0_l_f1], +3(%[sq0]) \n\t"
- "sb %[q1_l_f1], +3(%[sq1]) \n\t"
- "sb %[q2_l_f1], +3(%[sq2]) \n\t"
-
- :
- : [p2_l_f1] "r"(p2_l_f1), [p1_l_f1] "r"(p1_l_f1),
- [p0_l_f1] "r"(p0_l_f1), [q0_l_f1] "r"(q0_l_f1),
- [q1_l_f1] "r"(q1_l_f1), [q2_l_f1] "r"(q2_l_f1), [sp2] "r"(sp2),
- [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0), [sq1] "r"(sq1),
- [sq2] "r"(sq2));
- } else if (mask & 0xFF000000) {
- __asm__ __volatile__(
- "sb %[p1_f0], +3(%[sp1]) \n\t"
- "sb %[p0_f0], +3(%[sp0]) \n\t"
- "sb %[q0_f0], +3(%[sq0]) \n\t"
- "sb %[q1_f0], +3(%[sq1]) \n\t"
-
- :
- : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
- [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0),
- [sq0] "r"(sq0), [sq1] "r"(sq1));
- }
- }
-
- s = s + 4;
- }
-}
-
-void aom_lpf_horizontal_16_dspr2(unsigned char *s, int pitch,
- const uint8_t *blimit, const uint8_t *limit,
- const uint8_t *thresh) {
- mb_lpf_horizontal_edge(s, pitch, blimit, limit, thresh, 1);
-}
-
-void aom_lpf_horizontal_16_dual_dspr2(unsigned char *s, int pitch,
- const uint8_t *blimit,
- const uint8_t *limit,
- const uint8_t *thresh) {
- mb_lpf_horizontal_edge(s, pitch, blimit, limit, thresh, 2);
-}
-#endif // #if HAVE_DSPR2
diff --git a/third_party/aom/aom_dsp/mips/loopfilter_mb_vert_dspr2.c b/third_party/aom/aom_dsp/mips/loopfilter_mb_vert_dspr2.c
deleted file mode 100644
index 3d3f1ec97..000000000
--- a/third_party/aom/aom_dsp/mips/loopfilter_mb_vert_dspr2.c
+++ /dev/null
@@ -1,758 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <stdlib.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom/aom_integer.h"
-#include "aom_dsp/mips/common_dspr2.h"
-#include "aom_dsp/mips/loopfilter_filters_dspr2.h"
-#include "aom_dsp/mips/loopfilter_macros_dspr2.h"
-#include "aom_dsp/mips/loopfilter_masks_dspr2.h"
-#include "aom_mem/aom_mem.h"
-
-#if HAVE_DSPR2
-void aom_lpf_vertical_16_dspr2(uint8_t *s, int pitch, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh) {
- uint8_t i;
- uint32_t mask, hev, flat, flat2;
- uint8_t *s1, *s2, *s3, *s4;
- uint32_t prim1, prim2, sec3, sec4, prim3, prim4;
- uint32_t thresh_vec, flimit_vec, limit_vec;
- uint32_t uflimit, ulimit, uthresh;
- uint32_t p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7;
- uint32_t p1_f0, p0_f0, q0_f0, q1_f0;
- uint32_t p7_l, p6_l, p5_l, p4_l, p3_l, p2_l, p1_l, p0_l;
- uint32_t q0_l, q1_l, q2_l, q3_l, q4_l, q5_l, q6_l, q7_l;
- uint32_t p7_r, p6_r, p5_r, p4_r, p3_r, p2_r, p1_r, p0_r;
- uint32_t q0_r, q1_r, q2_r, q3_r, q4_r, q5_r, q6_r, q7_r;
- uint32_t p2_l_f1, p1_l_f1, p0_l_f1, p2_r_f1, p1_r_f1, p0_r_f1;
- uint32_t q0_l_f1, q1_l_f1, q2_l_f1, q0_r_f1, q1_r_f1, q2_r_f1;
-
- uflimit = *blimit;
- ulimit = *limit;
- uthresh = *thresh;
-
- /* create quad-byte */
- __asm__ __volatile__(
- "replv.qb %[thresh_vec], %[uthresh] \n\t"
- "replv.qb %[flimit_vec], %[uflimit] \n\t"
- "replv.qb %[limit_vec], %[ulimit] \n\t"
-
- : [thresh_vec] "=&r"(thresh_vec), [flimit_vec] "=&r"(flimit_vec),
- [limit_vec] "=r"(limit_vec)
- : [uthresh] "r"(uthresh), [uflimit] "r"(uflimit), [ulimit] "r"(ulimit));
-
- prefetch_store(s + pitch);
-
- for (i = 0; i < 2; i++) {
- s1 = s;
- s2 = s + pitch;
- s3 = s2 + pitch;
- s4 = s3 + pitch;
- s = s4 + pitch;
-
- __asm__ __volatile__(
- "lw %[p0], -4(%[s1]) \n\t"
- "lw %[p1], -4(%[s2]) \n\t"
- "lw %[p2], -4(%[s3]) \n\t"
- "lw %[p3], -4(%[s4]) \n\t"
- "lw %[p4], -8(%[s1]) \n\t"
- "lw %[p5], -8(%[s2]) \n\t"
- "lw %[p6], -8(%[s3]) \n\t"
- "lw %[p7], -8(%[s4]) \n\t"
-
- : [p3] "=&r"(p3), [p2] "=&r"(p2), [p1] "=&r"(p1), [p0] "=&r"(p0),
- [p7] "=&r"(p7), [p6] "=&r"(p6), [p5] "=&r"(p5), [p4] "=&r"(p4)
- : [s1] "r"(s1), [s2] "r"(s2), [s3] "r"(s3), [s4] "r"(s4));
-
- __asm__ __volatile__(
- "lw %[q3], (%[s1]) \n\t"
- "lw %[q2], (%[s2]) \n\t"
- "lw %[q1], (%[s3]) \n\t"
- "lw %[q0], (%[s4]) \n\t"
- "lw %[q7], +4(%[s1]) \n\t"
- "lw %[q6], +4(%[s2]) \n\t"
- "lw %[q5], +4(%[s3]) \n\t"
- "lw %[q4], +4(%[s4]) \n\t"
-
- : [q3] "=&r"(q3), [q2] "=&r"(q2), [q1] "=&r"(q1), [q0] "=&r"(q0),
- [q7] "=&r"(q7), [q6] "=&r"(q6), [q5] "=&r"(q5), [q4] "=&r"(q4)
- : [s1] "r"(s1), [s2] "r"(s2), [s3] "r"(s3), [s4] "r"(s4));
-
- /* transpose p3, p2, p1, p0
- original (when loaded from memory)
- register -4 -3 -2 -1
- p0 p0_0 p0_1 p0_2 p0_3
- p1 p1_0 p1_1 p1_2 p1_3
- p2 p2_0 p2_1 p2_2 p2_3
- p3 p3_0 p3_1 p3_2 p3_3
-
- after transpose
- register
- p0 p3_3 p2_3 p1_3 p0_3
- p1 p3_2 p2_2 p1_2 p0_2
- p2 p3_1 p2_1 p1_1 p0_1
- p3 p3_0 p2_0 p1_0 p0_0
- */
- __asm__ __volatile__(
- "precrq.qb.ph %[prim1], %[p0], %[p1] \n\t"
- "precr.qb.ph %[prim2], %[p0], %[p1] \n\t"
- "precrq.qb.ph %[prim3], %[p2], %[p3] \n\t"
- "precr.qb.ph %[prim4], %[p2], %[p3] \n\t"
-
- "precrq.qb.ph %[p1], %[prim1], %[prim2] \n\t"
- "precr.qb.ph %[p3], %[prim1], %[prim2] \n\t"
- "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t"
- "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t"
-
- "precrq.ph.w %[p0], %[p1], %[sec3] \n\t"
- "precrq.ph.w %[p2], %[p3], %[sec4] \n\t"
- "append %[p1], %[sec3], 16 \n\t"
- "append %[p3], %[sec4], 16 \n\t"
-
- : [prim1] "=&r"(prim1), [prim2] "=&r"(prim2), [prim3] "=&r"(prim3),
- [prim4] "=&r"(prim4), [p0] "+r"(p0), [p1] "+r"(p1), [p2] "+r"(p2),
- [p3] "+r"(p3), [sec3] "=&r"(sec3), [sec4] "=&r"(sec4)
- :);
-
- /* transpose q0, q1, q2, q3
- original (when loaded from memory)
- register +1 +2 +3 +4
- q3 q3_0 q3_1 q3_2 q3_3
- q2 q2_0 q2_1 q2_2 q2_3
- q1 q1_0 q1_1 q1_2 q1_3
- q0 q0_0 q0_1 q0_2 q0_3
-
- after transpose
- register
- q3 q0_3 q1_3 q2_3 q3_3
- q2 q0_2 q1_2 q2_2 q3_2
- q1 q0_1 q1_1 q2_1 q3_1
- q0 q0_0 q1_0 q2_0 q3_0
- */
- __asm__ __volatile__(
- "precrq.qb.ph %[prim1], %[q3], %[q2] \n\t"
- "precr.qb.ph %[prim2], %[q3], %[q2] \n\t"
- "precrq.qb.ph %[prim3], %[q1], %[q0] \n\t"
- "precr.qb.ph %[prim4], %[q1], %[q0] \n\t"
-
- "precrq.qb.ph %[q2], %[prim1], %[prim2] \n\t"
- "precr.qb.ph %[q0], %[prim1], %[prim2] \n\t"
- "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t"
- "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t"
-
- "precrq.ph.w %[q3], %[q2], %[sec3] \n\t"
- "precrq.ph.w %[q1], %[q0], %[sec4] \n\t"
- "append %[q2], %[sec3], 16 \n\t"
- "append %[q0], %[sec4], 16 \n\t"
-
- : [prim1] "=&r"(prim1), [prim2] "=&r"(prim2), [prim3] "=&r"(prim3),
- [prim4] "=&r"(prim4), [q3] "+r"(q3), [q2] "+r"(q2), [q1] "+r"(q1),
- [q0] "+r"(q0), [sec3] "=&r"(sec3), [sec4] "=&r"(sec4)
- :);
-
- /* transpose p7, p6, p5, p4
- original (when loaded from memory)
- register -8 -7 -6 -5
- p4 p4_0 p4_1 p4_2 p4_3
- p5 p5_0 p5_1 p5_2 p5_3
- p6 p6_0 p6_1 p6_2 p6_3
- p7 p7_0 p7_1 p7_2 p7_3
-
- after transpose
- register
- p4 p7_3 p6_3 p5_3 p4_3
- p5 p7_2 p6_2 p5_2 p4_2
- p6 p7_1 p6_1 p5_1 p4_1
- p7 p7_0 p6_0 p5_0 p4_0
- */
- __asm__ __volatile__(
- "precrq.qb.ph %[prim1], %[p4], %[p5] \n\t"
- "precr.qb.ph %[prim2], %[p4], %[p5] \n\t"
- "precrq.qb.ph %[prim3], %[p6], %[p7] \n\t"
- "precr.qb.ph %[prim4], %[p6], %[p7] \n\t"
-
- "precrq.qb.ph %[p5], %[prim1], %[prim2] \n\t"
- "precr.qb.ph %[p7], %[prim1], %[prim2] \n\t"
- "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t"
- "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t"
-
- "precrq.ph.w %[p4], %[p5], %[sec3] \n\t"
- "precrq.ph.w %[p6], %[p7], %[sec4] \n\t"
- "append %[p5], %[sec3], 16 \n\t"
- "append %[p7], %[sec4], 16 \n\t"
-
- : [prim1] "=&r"(prim1), [prim2] "=&r"(prim2), [prim3] "=&r"(prim3),
- [prim4] "=&r"(prim4), [p4] "+r"(p4), [p5] "+r"(p5), [p6] "+r"(p6),
- [p7] "+r"(p7), [sec3] "=&r"(sec3), [sec4] "=&r"(sec4)
- :);
-
- /* transpose q4, q5, q6, q7
- original (when loaded from memory)
- register +5 +6 +7 +8
- q7 q7_0 q7_1 q7_2 q7_3
- q6 q6_0 q6_1 q6_2 q6_3
- q5 q5_0 q5_1 q5_2 q5_3
- q4 q4_0 q4_1 q4_2 q4_3
-
- after transpose
- register
- q7 q4_3 q5_3 q26_3 q7_3
- q6 q4_2 q5_2 q26_2 q7_2
- q5 q4_1 q5_1 q26_1 q7_1
- q4 q4_0 q5_0 q26_0 q7_0
- */
- __asm__ __volatile__(
- "precrq.qb.ph %[prim1], %[q7], %[q6] \n\t"
- "precr.qb.ph %[prim2], %[q7], %[q6] \n\t"
- "precrq.qb.ph %[prim3], %[q5], %[q4] \n\t"
- "precr.qb.ph %[prim4], %[q5], %[q4] \n\t"
-
- "precrq.qb.ph %[q6], %[prim1], %[prim2] \n\t"
- "precr.qb.ph %[q4], %[prim1], %[prim2] \n\t"
- "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t"
- "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t"
-
- "precrq.ph.w %[q7], %[q6], %[sec3] \n\t"
- "precrq.ph.w %[q5], %[q4], %[sec4] \n\t"
- "append %[q6], %[sec3], 16 \n\t"
- "append %[q4], %[sec4], 16 \n\t"
-
- : [prim1] "=&r"(prim1), [prim2] "=&r"(prim2), [prim3] "=&r"(prim3),
- [prim4] "=&r"(prim4), [q7] "+r"(q7), [q6] "+r"(q6), [q5] "+r"(q5),
- [q4] "+r"(q4), [sec3] "=&r"(sec3), [sec4] "=&r"(sec4)
- :);
-
- filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec, p1, p0,
- p3, p2, q0, q1, q2, q3, &hev, &mask, &flat);
-
- flatmask5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, &flat2);
-
- /* f0 */
- if (((flat2 == 0) && (flat == 0) && (mask != 0)) ||
- ((flat2 != 0) && (flat == 0) && (mask != 0))) {
- filter1_dspr2(mask, hev, p1, p0, q0, q1, &p1_f0, &p0_f0, &q0_f0, &q1_f0);
- STORE_F0()
- } else if ((flat2 == 0XFFFFFFFF) && (flat == 0xFFFFFFFF) &&
- (mask == 0xFFFFFFFF)) {
- /* f2 */
- PACK_LEFT_0TO3()
- PACK_LEFT_4TO7()
- wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l, &p3_l, &p2_l, &p1_l,
- &p0_l, &q0_l, &q1_l, &q2_l, &q3_l, &q4_l, &q5_l,
- &q6_l, &q7_l);
-
- PACK_RIGHT_0TO3()
- PACK_RIGHT_4TO7()
- wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r, &p3_r, &p2_r, &p1_r,
- &p0_r, &q0_r, &q1_r, &q2_r, &q3_r, &q4_r, &q5_r,
- &q6_r, &q7_r);
-
- STORE_F2()
- } else if ((flat2 == 0) && (flat == 0xFFFFFFFF) && (mask == 0xFFFFFFFF)) {
- /* f1 */
- PACK_LEFT_0TO3()
- mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l, &q0_l, &q1_l, &q2_l, &q3_l);
-
- PACK_RIGHT_0TO3()
- mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r, &q0_r, &q1_r, &q2_r, &q3_r);
-
- STORE_F1()
- } else if ((flat2 == 0) && (flat != 0) && (mask != 0)) {
- /* f0 + f1 */
- filter1_dspr2(mask, hev, p1, p0, q0, q1, &p1_f0, &p0_f0, &q0_f0, &q1_f0);
-
- /* left 2 element operation */
- PACK_LEFT_0TO3()
- mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l, &q0_l, &q1_l, &q2_l, &q3_l);
-
- /* right 2 element operation */
- PACK_RIGHT_0TO3()
- mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r, &q0_r, &q1_r, &q2_r, &q3_r);
-
- if (mask & flat & 0x000000FF) {
- __asm__ __volatile__(
- "sb %[p2_r], -3(%[s4]) \n\t"
- "sb %[p1_r], -2(%[s4]) \n\t"
- "sb %[p0_r], -1(%[s4]) \n\t"
- "sb %[q0_r], (%[s4]) \n\t"
- "sb %[q1_r], +1(%[s4]) \n\t"
- "sb %[q2_r], +2(%[s4]) \n\t"
-
- :
- : [p2_r] "r"(p2_r), [p1_r] "r"(p1_r), [p0_r] "r"(p0_r),
- [q0_r] "r"(q0_r), [q1_r] "r"(q1_r), [q2_r] "r"(q2_r),
- [s4] "r"(s4));
- } else if (mask & 0x000000FF) {
- __asm__ __volatile__(
- "sb %[p1_f0], -2(%[s4]) \n\t"
- "sb %[p0_f0], -1(%[s4]) \n\t"
- "sb %[q0_f0], (%[s4]) \n\t"
- "sb %[q1_f0], +1(%[s4]) \n\t"
-
- :
- : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
- [q1_f0] "r"(q1_f0), [s4] "r"(s4));
- }
-
- __asm__ __volatile__(
- "srl %[p2_r], %[p2_r], 16 \n\t"
- "srl %[p1_r], %[p1_r], 16 \n\t"
- "srl %[p0_r], %[p0_r], 16 \n\t"
- "srl %[q0_r], %[q0_r], 16 \n\t"
- "srl %[q1_r], %[q1_r], 16 \n\t"
- "srl %[q2_r], %[q2_r], 16 \n\t"
- "srl %[p1_f0], %[p1_f0], 8 \n\t"
- "srl %[p0_f0], %[p0_f0], 8 \n\t"
- "srl %[q0_f0], %[q0_f0], 8 \n\t"
- "srl %[q1_f0], %[q1_f0], 8 \n\t"
-
- : [p2_r] "+r"(p2_r), [p1_r] "+r"(p1_r), [p0_r] "+r"(p0_r),
- [q0_r] "+r"(q0_r), [q1_r] "+r"(q1_r), [q2_r] "+r"(q2_r),
- [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0),
- [q1_f0] "+r"(q1_f0)
- :);
-
- if (mask & flat & 0x0000FF00) {
- __asm__ __volatile__(
- "sb %[p2_r], -3(%[s3]) \n\t"
- "sb %[p1_r], -2(%[s3]) \n\t"
- "sb %[p0_r], -1(%[s3]) \n\t"
- "sb %[q0_r], (%[s3]) \n\t"
- "sb %[q1_r], +1(%[s3]) \n\t"
- "sb %[q2_r], +2(%[s3]) \n\t"
-
- :
- : [p2_r] "r"(p2_r), [p1_r] "r"(p1_r), [p0_r] "r"(p0_r),
- [q0_r] "r"(q0_r), [q1_r] "r"(q1_r), [q2_r] "r"(q2_r),
- [s3] "r"(s3));
- } else if (mask & 0x0000FF00) {
- __asm__ __volatile__(
- "sb %[p1_f0], -2(%[s3]) \n\t"
- "sb %[p0_f0], -1(%[s3]) \n\t"
- "sb %[q0_f0], (%[s3]) \n\t"
- "sb %[q1_f0], +1(%[s3]) \n\t"
-
- :
- : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
- [q1_f0] "r"(q1_f0), [s3] "r"(s3));
- }
-
- __asm__ __volatile__(
- "srl %[p1_f0], %[p1_f0], 8 \n\t"
- "srl %[p0_f0], %[p0_f0], 8 \n\t"
- "srl %[q0_f0], %[q0_f0], 8 \n\t"
- "srl %[q1_f0], %[q1_f0], 8 \n\t"
-
- : [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0),
- [q1_f0] "+r"(q1_f0)
- :);
-
- if (mask & flat & 0x00FF0000) {
- __asm__ __volatile__(
- "sb %[p2_l], -3(%[s2]) \n\t"
- "sb %[p1_l], -2(%[s2]) \n\t"
- "sb %[p0_l], -1(%[s2]) \n\t"
- "sb %[q0_l], (%[s2]) \n\t"
- "sb %[q1_l], +1(%[s2]) \n\t"
- "sb %[q2_l], +2(%[s2]) \n\t"
-
- :
- : [p2_l] "r"(p2_l), [p1_l] "r"(p1_l), [p0_l] "r"(p0_l),
- [q0_l] "r"(q0_l), [q1_l] "r"(q1_l), [q2_l] "r"(q2_l),
- [s2] "r"(s2));
- } else if (mask & 0x00FF0000) {
- __asm__ __volatile__(
- "sb %[p1_f0], -2(%[s2]) \n\t"
- "sb %[p0_f0], -1(%[s2]) \n\t"
- "sb %[q0_f0], (%[s2]) \n\t"
- "sb %[q1_f0], +1(%[s2]) \n\t"
-
- :
- : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
- [q1_f0] "r"(q1_f0), [s2] "r"(s2));
- }
-
- __asm__ __volatile__(
- "srl %[p2_l], %[p2_l], 16 \n\t"
- "srl %[p1_l], %[p1_l], 16 \n\t"
- "srl %[p0_l], %[p0_l], 16 \n\t"
- "srl %[q0_l], %[q0_l], 16 \n\t"
- "srl %[q1_l], %[q1_l], 16 \n\t"
- "srl %[q2_l], %[q2_l], 16 \n\t"
- "srl %[p1_f0], %[p1_f0], 8 \n\t"
- "srl %[p0_f0], %[p0_f0], 8 \n\t"
- "srl %[q0_f0], %[q0_f0], 8 \n\t"
- "srl %[q1_f0], %[q1_f0], 8 \n\t"
-
- : [p2_l] "+r"(p2_l), [p1_l] "+r"(p1_l), [p0_l] "+r"(p0_l),
- [q0_l] "+r"(q0_l), [q1_l] "+r"(q1_l), [q2_l] "+r"(q2_l),
- [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0),
- [q1_f0] "+r"(q1_f0)
- :);
-
- if (mask & flat & 0xFF000000) {
- __asm__ __volatile__(
- "sb %[p2_l], -3(%[s1]) \n\t"
- "sb %[p1_l], -2(%[s1]) \n\t"
- "sb %[p0_l], -1(%[s1]) \n\t"
- "sb %[q0_l], (%[s1]) \n\t"
- "sb %[q1_l], +1(%[s1]) \n\t"
- "sb %[q2_l], +2(%[s1]) \n\t"
-
- :
- : [p2_l] "r"(p2_l), [p1_l] "r"(p1_l), [p0_l] "r"(p0_l),
- [q0_l] "r"(q0_l), [q1_l] "r"(q1_l), [q2_l] "r"(q2_l),
- [s1] "r"(s1));
- } else if (mask & 0xFF000000) {
- __asm__ __volatile__(
- "sb %[p1_f0], -2(%[s1]) \n\t"
- "sb %[p0_f0], -1(%[s1]) \n\t"
- "sb %[q0_f0], (%[s1]) \n\t"
- "sb %[q1_f0], +1(%[s1]) \n\t"
-
- :
- : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
- [q1_f0] "r"(q1_f0), [s1] "r"(s1));
- }
- } else if ((flat2 != 0) && (flat != 0) && (mask != 0)) {
- /* f0+f1+f2 */
- filter1_dspr2(mask, hev, p1, p0, q0, q1, &p1_f0, &p0_f0, &q0_f0, &q1_f0);
-
- PACK_LEFT_0TO3()
- mbfilter1_dspr2(p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l, &p2_l_f1,
- &p1_l_f1, &p0_l_f1, &q0_l_f1, &q1_l_f1, &q2_l_f1);
-
- PACK_RIGHT_0TO3()
- mbfilter1_dspr2(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, &p2_r_f1,
- &p1_r_f1, &p0_r_f1, &q0_r_f1, &q1_r_f1, &q2_r_f1);
-
- PACK_LEFT_4TO7()
- wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l, &p3_l, &p2_l, &p1_l,
- &p0_l, &q0_l, &q1_l, &q2_l, &q3_l, &q4_l, &q5_l,
- &q6_l, &q7_l);
-
- PACK_RIGHT_4TO7()
- wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r, &p3_r, &p2_r, &p1_r,
- &p0_r, &q0_r, &q1_r, &q2_r, &q3_r, &q4_r, &q5_r,
- &q6_r, &q7_r);
-
- if (mask & flat & flat2 & 0x000000FF) {
- __asm__ __volatile__(
- "sb %[p6_r], -7(%[s4]) \n\t"
- "sb %[p5_r], -6(%[s4]) \n\t"
- "sb %[p4_r], -5(%[s4]) \n\t"
- "sb %[p3_r], -4(%[s4]) \n\t"
- "sb %[p2_r], -3(%[s4]) \n\t"
- "sb %[p1_r], -2(%[s4]) \n\t"
- "sb %[p0_r], -1(%[s4]) \n\t"
-
- :
- : [p6_r] "r"(p6_r), [p5_r] "r"(p5_r), [p4_r] "r"(p4_r),
- [p3_r] "r"(p3_r), [p2_r] "r"(p2_r), [p1_r] "r"(p1_r),
- [p0_r] "r"(p0_r), [s4] "r"(s4));
-
- __asm__ __volatile__(
- "sb %[q0_r], (%[s4]) \n\t"
- "sb %[q1_r], +1(%[s4]) \n\t"
- "sb %[q2_r], +2(%[s4]) \n\t"
- "sb %[q3_r], +3(%[s4]) \n\t"
- "sb %[q4_r], +4(%[s4]) \n\t"
- "sb %[q5_r], +5(%[s4]) \n\t"
- "sb %[q6_r], +6(%[s4]) \n\t"
-
- :
- : [q0_r] "r"(q0_r), [q1_r] "r"(q1_r), [q2_r] "r"(q2_r),
- [q3_r] "r"(q3_r), [q4_r] "r"(q4_r), [q5_r] "r"(q5_r),
- [q6_r] "r"(q6_r), [s4] "r"(s4));
- } else if (mask & flat & 0x000000FF) {
- __asm__ __volatile__(
- "sb %[p2_r_f1], -3(%[s4]) \n\t"
- "sb %[p1_r_f1], -2(%[s4]) \n\t"
- "sb %[p0_r_f1], -1(%[s4]) \n\t"
- "sb %[q0_r_f1], (%[s4]) \n\t"
- "sb %[q1_r_f1], +1(%[s4]) \n\t"
- "sb %[q2_r_f1], +2(%[s4]) \n\t"
-
- :
- : [p2_r_f1] "r"(p2_r_f1), [p1_r_f1] "r"(p1_r_f1),
- [p0_r_f1] "r"(p0_r_f1), [q0_r_f1] "r"(q0_r_f1),
- [q1_r_f1] "r"(q1_r_f1), [q2_r_f1] "r"(q2_r_f1), [s4] "r"(s4));
- } else if (mask & 0x000000FF) {
- __asm__ __volatile__(
- "sb %[p1_f0], -2(%[s4]) \n\t"
- "sb %[p0_f0], -1(%[s4]) \n\t"
- "sb %[q0_f0], (%[s4]) \n\t"
- "sb %[q1_f0], +1(%[s4]) \n\t"
-
- :
- : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
- [q1_f0] "r"(q1_f0), [s4] "r"(s4));
- }
-
- __asm__ __volatile__(
- "srl %[p6_r], %[p6_r], 16 \n\t"
- "srl %[p5_r], %[p5_r], 16 \n\t"
- "srl %[p4_r], %[p4_r], 16 \n\t"
- "srl %[p3_r], %[p3_r], 16 \n\t"
- "srl %[p2_r], %[p2_r], 16 \n\t"
- "srl %[p1_r], %[p1_r], 16 \n\t"
- "srl %[p0_r], %[p0_r], 16 \n\t"
- "srl %[q0_r], %[q0_r], 16 \n\t"
- "srl %[q1_r], %[q1_r], 16 \n\t"
- "srl %[q2_r], %[q2_r], 16 \n\t"
- "srl %[q3_r], %[q3_r], 16 \n\t"
- "srl %[q4_r], %[q4_r], 16 \n\t"
- "srl %[q5_r], %[q5_r], 16 \n\t"
- "srl %[q6_r], %[q6_r], 16 \n\t"
-
- : [q0_r] "+r"(q0_r), [q1_r] "+r"(q1_r), [q2_r] "+r"(q2_r),
- [q3_r] "+r"(q3_r), [q4_r] "+r"(q4_r), [q5_r] "+r"(q5_r),
- [q6_r] "+r"(q6_r), [p6_r] "+r"(p6_r), [p5_r] "+r"(p5_r),
- [p4_r] "+r"(p4_r), [p3_r] "+r"(p3_r), [p2_r] "+r"(p2_r),
- [p1_r] "+r"(p1_r), [p0_r] "+r"(p0_r)
- :);
-
- __asm__ __volatile__(
- "srl %[p2_r_f1], %[p2_r_f1], 16 \n\t"
- "srl %[p1_r_f1], %[p1_r_f1], 16 \n\t"
- "srl %[p0_r_f1], %[p0_r_f1], 16 \n\t"
- "srl %[q0_r_f1], %[q0_r_f1], 16 \n\t"
- "srl %[q1_r_f1], %[q1_r_f1], 16 \n\t"
- "srl %[q2_r_f1], %[q2_r_f1], 16 \n\t"
- "srl %[p1_f0], %[p1_f0], 8 \n\t"
- "srl %[p0_f0], %[p0_f0], 8 \n\t"
- "srl %[q0_f0], %[q0_f0], 8 \n\t"
- "srl %[q1_f0], %[q1_f0], 8 \n\t"
-
- : [p2_r_f1] "+r"(p2_r_f1), [p1_r_f1] "+r"(p1_r_f1),
- [p0_r_f1] "+r"(p0_r_f1), [q0_r_f1] "+r"(q0_r_f1),
- [q1_r_f1] "+r"(q1_r_f1), [q2_r_f1] "+r"(q2_r_f1),
- [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0),
- [q1_f0] "+r"(q1_f0)
- :);
-
- if (mask & flat & flat2 & 0x0000FF00) {
- __asm__ __volatile__(
- "sb %[p6_r], -7(%[s3]) \n\t"
- "sb %[p5_r], -6(%[s3]) \n\t"
- "sb %[p4_r], -5(%[s3]) \n\t"
- "sb %[p3_r], -4(%[s3]) \n\t"
- "sb %[p2_r], -3(%[s3]) \n\t"
- "sb %[p1_r], -2(%[s3]) \n\t"
- "sb %[p0_r], -1(%[s3]) \n\t"
-
- :
- : [p6_r] "r"(p6_r), [p5_r] "r"(p5_r), [p4_r] "r"(p4_r),
- [p3_r] "r"(p3_r), [p2_r] "r"(p2_r), [p1_r] "r"(p1_r),
- [p0_r] "r"(p0_r), [s3] "r"(s3));
-
- __asm__ __volatile__(
- "sb %[q0_r], (%[s3]) \n\t"
- "sb %[q1_r], +1(%[s3]) \n\t"
- "sb %[q2_r], +2(%[s3]) \n\t"
- "sb %[q3_r], +3(%[s3]) \n\t"
- "sb %[q4_r], +4(%[s3]) \n\t"
- "sb %[q5_r], +5(%[s3]) \n\t"
- "sb %[q6_r], +6(%[s3]) \n\t"
-
- :
- : [q0_r] "r"(q0_r), [q1_r] "r"(q1_r), [q2_r] "r"(q2_r),
- [q3_r] "r"(q3_r), [q4_r] "r"(q4_r), [q5_r] "r"(q5_r),
- [q6_r] "r"(q6_r), [s3] "r"(s3));
- } else if (mask & flat & 0x0000FF00) {
- __asm__ __volatile__(
- "sb %[p2_r_f1], -3(%[s3]) \n\t"
- "sb %[p1_r_f1], -2(%[s3]) \n\t"
- "sb %[p0_r_f1], -1(%[s3]) \n\t"
- "sb %[q0_r_f1], (%[s3]) \n\t"
- "sb %[q1_r_f1], +1(%[s3]) \n\t"
- "sb %[q2_r_f1], +2(%[s3]) \n\t"
-
- :
- : [p2_r_f1] "r"(p2_r_f1), [p1_r_f1] "r"(p1_r_f1),
- [p0_r_f1] "r"(p0_r_f1), [q0_r_f1] "r"(q0_r_f1),
- [q1_r_f1] "r"(q1_r_f1), [q2_r_f1] "r"(q2_r_f1), [s3] "r"(s3));
- } else if (mask & 0x0000FF00) {
- __asm__ __volatile__(
- "sb %[p1_f0], -2(%[s3]) \n\t"
- "sb %[p0_f0], -1(%[s3]) \n\t"
- "sb %[q0_f0], (%[s3]) \n\t"
- "sb %[q1_f0], +1(%[s3]) \n\t"
-
- :
- : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
- [q1_f0] "r"(q1_f0), [s3] "r"(s3));
- }
-
- __asm__ __volatile__(
- "srl %[p1_f0], %[p1_f0], 8 \n\t"
- "srl %[p0_f0], %[p0_f0], 8 \n\t"
- "srl %[q0_f0], %[q0_f0], 8 \n\t"
- "srl %[q1_f0], %[q1_f0], 8 \n\t"
-
- : [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0),
- [q1_f0] "+r"(q1_f0)
- :);
-
- if (mask & flat & flat2 & 0x00FF0000) {
- __asm__ __volatile__(
- "sb %[p6_l], -7(%[s2]) \n\t"
- "sb %[p5_l], -6(%[s2]) \n\t"
- "sb %[p4_l], -5(%[s2]) \n\t"
- "sb %[p3_l], -4(%[s2]) \n\t"
- "sb %[p2_l], -3(%[s2]) \n\t"
- "sb %[p1_l], -2(%[s2]) \n\t"
- "sb %[p0_l], -1(%[s2]) \n\t"
-
- :
- : [p6_l] "r"(p6_l), [p5_l] "r"(p5_l), [p4_l] "r"(p4_l),
- [p3_l] "r"(p3_l), [p2_l] "r"(p2_l), [p1_l] "r"(p1_l),
- [p0_l] "r"(p0_l), [s2] "r"(s2));
-
- __asm__ __volatile__(
- "sb %[q0_l], (%[s2]) \n\t"
- "sb %[q1_l], +1(%[s2]) \n\t"
- "sb %[q2_l], +2(%[s2]) \n\t"
- "sb %[q3_l], +3(%[s2]) \n\t"
- "sb %[q4_l], +4(%[s2]) \n\t"
- "sb %[q5_l], +5(%[s2]) \n\t"
- "sb %[q6_l], +6(%[s2]) \n\t"
-
- :
- : [q0_l] "r"(q0_l), [q1_l] "r"(q1_l), [q2_l] "r"(q2_l),
- [q3_l] "r"(q3_l), [q4_l] "r"(q4_l), [q5_l] "r"(q5_l),
- [q6_l] "r"(q6_l), [s2] "r"(s2));
- } else if (mask & flat & 0x00FF0000) {
- __asm__ __volatile__(
- "sb %[p2_l_f1], -3(%[s2]) \n\t"
- "sb %[p1_l_f1], -2(%[s2]) \n\t"
- "sb %[p0_l_f1], -1(%[s2]) \n\t"
- "sb %[q0_l_f1], (%[s2]) \n\t"
- "sb %[q1_l_f1], +1(%[s2]) \n\t"
- "sb %[q2_l_f1], +2(%[s2]) \n\t"
-
- :
- : [p2_l_f1] "r"(p2_l_f1), [p1_l_f1] "r"(p1_l_f1),
- [p0_l_f1] "r"(p0_l_f1), [q0_l_f1] "r"(q0_l_f1),
- [q1_l_f1] "r"(q1_l_f1), [q2_l_f1] "r"(q2_l_f1), [s2] "r"(s2));
- } else if (mask & 0x00FF0000) {
- __asm__ __volatile__(
- "sb %[p1_f0], -2(%[s2]) \n\t"
- "sb %[p0_f0], -1(%[s2]) \n\t"
- "sb %[q0_f0], (%[s2]) \n\t"
- "sb %[q1_f0], +1(%[s2]) \n\t"
-
- :
- : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
- [q1_f0] "r"(q1_f0), [s2] "r"(s2));
- }
-
- __asm__ __volatile__(
- "srl %[p6_l], %[p6_l], 16 \n\t"
- "srl %[p5_l], %[p5_l], 16 \n\t"
- "srl %[p4_l], %[p4_l], 16 \n\t"
- "srl %[p3_l], %[p3_l], 16 \n\t"
- "srl %[p2_l], %[p2_l], 16 \n\t"
- "srl %[p1_l], %[p1_l], 16 \n\t"
- "srl %[p0_l], %[p0_l], 16 \n\t"
- "srl %[q0_l], %[q0_l], 16 \n\t"
- "srl %[q1_l], %[q1_l], 16 \n\t"
- "srl %[q2_l], %[q2_l], 16 \n\t"
- "srl %[q3_l], %[q3_l], 16 \n\t"
- "srl %[q4_l], %[q4_l], 16 \n\t"
- "srl %[q5_l], %[q5_l], 16 \n\t"
- "srl %[q6_l], %[q6_l], 16 \n\t"
-
- : [q0_l] "+r"(q0_l), [q1_l] "+r"(q1_l), [q2_l] "+r"(q2_l),
- [q3_l] "+r"(q3_l), [q4_l] "+r"(q4_l), [q5_l] "+r"(q5_l),
- [q6_l] "+r"(q6_l), [p6_l] "+r"(p6_l), [p5_l] "+r"(p5_l),
- [p4_l] "+r"(p4_l), [p3_l] "+r"(p3_l), [p2_l] "+r"(p2_l),
- [p1_l] "+r"(p1_l), [p0_l] "+r"(p0_l)
- :);
-
- __asm__ __volatile__(
- "srl %[p2_l_f1], %[p2_l_f1], 16 \n\t"
- "srl %[p1_l_f1], %[p1_l_f1], 16 \n\t"
- "srl %[p0_l_f1], %[p0_l_f1], 16 \n\t"
- "srl %[q0_l_f1], %[q0_l_f1], 16 \n\t"
- "srl %[q1_l_f1], %[q1_l_f1], 16 \n\t"
- "srl %[q2_l_f1], %[q2_l_f1], 16 \n\t"
- "srl %[p1_f0], %[p1_f0], 8 \n\t"
- "srl %[p0_f0], %[p0_f0], 8 \n\t"
- "srl %[q0_f0], %[q0_f0], 8 \n\t"
- "srl %[q1_f0], %[q1_f0], 8 \n\t"
-
- : [p2_l_f1] "+r"(p2_l_f1), [p1_l_f1] "+r"(p1_l_f1),
- [p0_l_f1] "+r"(p0_l_f1), [q0_l_f1] "+r"(q0_l_f1),
- [q1_l_f1] "+r"(q1_l_f1), [q2_l_f1] "+r"(q2_l_f1),
- [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0),
- [q1_f0] "+r"(q1_f0)
- :);
-
- if (mask & flat & flat2 & 0xFF000000) {
- __asm__ __volatile__(
- "sb %[p6_l], -7(%[s1]) \n\t"
- "sb %[p5_l], -6(%[s1]) \n\t"
- "sb %[p4_l], -5(%[s1]) \n\t"
- "sb %[p3_l], -4(%[s1]) \n\t"
- "sb %[p2_l], -3(%[s1]) \n\t"
- "sb %[p1_l], -2(%[s1]) \n\t"
- "sb %[p0_l], -1(%[s1]) \n\t"
-
- :
- : [p6_l] "r"(p6_l), [p5_l] "r"(p5_l), [p4_l] "r"(p4_l),
- [p3_l] "r"(p3_l), [p2_l] "r"(p2_l), [p1_l] "r"(p1_l),
- [p0_l] "r"(p0_l), [s1] "r"(s1));
-
- __asm__ __volatile__(
- "sb %[q0_l], (%[s1]) \n\t"
- "sb %[q1_l], 1(%[s1]) \n\t"
- "sb %[q2_l], 2(%[s1]) \n\t"
- "sb %[q3_l], 3(%[s1]) \n\t"
- "sb %[q4_l], 4(%[s1]) \n\t"
- "sb %[q5_l], 5(%[s1]) \n\t"
- "sb %[q6_l], 6(%[s1]) \n\t"
-
- :
- : [q0_l] "r"(q0_l), [q1_l] "r"(q1_l), [q2_l] "r"(q2_l),
- [q3_l] "r"(q3_l), [q4_l] "r"(q4_l), [q5_l] "r"(q5_l),
- [q6_l] "r"(q6_l), [s1] "r"(s1));
- } else if (mask & flat & 0xFF000000) {
- __asm__ __volatile__(
- "sb %[p2_l_f1], -3(%[s1]) \n\t"
- "sb %[p1_l_f1], -2(%[s1]) \n\t"
- "sb %[p0_l_f1], -1(%[s1]) \n\t"
- "sb %[q0_l_f1], (%[s1]) \n\t"
- "sb %[q1_l_f1], +1(%[s1]) \n\t"
- "sb %[q2_l_f1], +2(%[s1]) \n\t"
-
- :
- : [p2_l_f1] "r"(p2_l_f1), [p1_l_f1] "r"(p1_l_f1),
- [p0_l_f1] "r"(p0_l_f1), [q0_l_f1] "r"(q0_l_f1),
- [q1_l_f1] "r"(q1_l_f1), [q2_l_f1] "r"(q2_l_f1), [s1] "r"(s1));
- } else if (mask & 0xFF000000) {
- __asm__ __volatile__(
- "sb %[p1_f0], -2(%[s1]) \n\t"
- "sb %[p0_f0], -1(%[s1]) \n\t"
- "sb %[q0_f0], (%[s1]) \n\t"
- "sb %[q1_f0], +1(%[s1]) \n\t"
-
- :
- : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
- [q1_f0] "r"(q1_f0), [s1] "r"(s1));
- }
- }
- }
-}
-#endif // #if HAVE_DSPR2
diff --git a/third_party/aom/aom_dsp/mips/loopfilter_msa.h b/third_party/aom/aom_dsp/mips/loopfilter_msa.h
deleted file mode 100644
index 54b0bb4bd..000000000
--- a/third_party/aom/aom_dsp/mips/loopfilter_msa.h
+++ /dev/null
@@ -1,251 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_MIPS_LOOPFILTER_MSA_H_
-#define AOM_AOM_DSP_MIPS_LOOPFILTER_MSA_H_
-
-#include "aom_dsp/mips/macros_msa.h"
-
-#define AOM_LPF_FILTER4_8W(p1_in, p0_in, q0_in, q1_in, mask_in, hev_in, \
- p1_out, p0_out, q0_out, q1_out) \
- { \
- v16i8 p1_m, p0_m, q0_m, q1_m, q0_sub_p0, filt_sign; \
- v16i8 filt, filt1, filt2, cnst4b, cnst3b; \
- v8i16 q0_sub_p0_r, filt_r, cnst3h; \
- \
- p1_m = (v16i8)__msa_xori_b(p1_in, 0x80); \
- p0_m = (v16i8)__msa_xori_b(p0_in, 0x80); \
- q0_m = (v16i8)__msa_xori_b(q0_in, 0x80); \
- q1_m = (v16i8)__msa_xori_b(q1_in, 0x80); \
- \
- filt = __msa_subs_s_b(p1_m, q1_m); \
- filt = filt & (v16i8)hev_in; \
- q0_sub_p0 = q0_m - p0_m; \
- filt_sign = __msa_clti_s_b(filt, 0); \
- \
- cnst3h = __msa_ldi_h(3); \
- q0_sub_p0_r = (v8i16)__msa_ilvr_b(q0_sub_p0, q0_sub_p0); \
- q0_sub_p0_r = __msa_dotp_s_h((v16i8)q0_sub_p0_r, (v16i8)cnst3h); \
- filt_r = (v8i16)__msa_ilvr_b(filt_sign, filt); \
- filt_r += q0_sub_p0_r; \
- filt_r = __msa_sat_s_h(filt_r, 7); \
- \
- /* combine left and right part */ \
- filt = __msa_pckev_b((v16i8)filt_r, (v16i8)filt_r); \
- \
- filt = filt & (v16i8)mask_in; \
- cnst4b = __msa_ldi_b(4); \
- filt1 = __msa_adds_s_b(filt, cnst4b); \
- filt1 >>= 3; \
- \
- cnst3b = __msa_ldi_b(3); \
- filt2 = __msa_adds_s_b(filt, cnst3b); \
- filt2 >>= 3; \
- \
- q0_m = __msa_subs_s_b(q0_m, filt1); \
- q0_out = __msa_xori_b((v16u8)q0_m, 0x80); \
- p0_m = __msa_adds_s_b(p0_m, filt2); \
- p0_out = __msa_xori_b((v16u8)p0_m, 0x80); \
- \
- filt = __msa_srari_b(filt1, 1); \
- hev_in = __msa_xori_b((v16u8)hev_in, 0xff); \
- filt = filt & (v16i8)hev_in; \
- \
- q1_m = __msa_subs_s_b(q1_m, filt); \
- q1_out = __msa_xori_b((v16u8)q1_m, 0x80); \
- p1_m = __msa_adds_s_b(p1_m, filt); \
- p1_out = __msa_xori_b((v16u8)p1_m, 0x80); \
- }
-
-#define AOM_LPF_FILTER4_4W(p1_in, p0_in, q0_in, q1_in, mask_in, hev_in, \
- p1_out, p0_out, q0_out, q1_out) \
- { \
- v16i8 p1_m, p0_m, q0_m, q1_m, q0_sub_p0, filt_sign; \
- v16i8 filt, filt1, filt2, cnst4b, cnst3b; \
- v8i16 q0_sub_p0_r, q0_sub_p0_l, filt_l, filt_r, cnst3h; \
- \
- p1_m = (v16i8)__msa_xori_b(p1_in, 0x80); \
- p0_m = (v16i8)__msa_xori_b(p0_in, 0x80); \
- q0_m = (v16i8)__msa_xori_b(q0_in, 0x80); \
- q1_m = (v16i8)__msa_xori_b(q1_in, 0x80); \
- \
- filt = __msa_subs_s_b(p1_m, q1_m); \
- \
- filt = filt & (v16i8)hev_in; \
- \
- q0_sub_p0 = q0_m - p0_m; \
- filt_sign = __msa_clti_s_b(filt, 0); \
- \
- cnst3h = __msa_ldi_h(3); \
- q0_sub_p0_r = (v8i16)__msa_ilvr_b(q0_sub_p0, q0_sub_p0); \
- q0_sub_p0_r = __msa_dotp_s_h((v16i8)q0_sub_p0_r, (v16i8)cnst3h); \
- filt_r = (v8i16)__msa_ilvr_b(filt_sign, filt); \
- filt_r += q0_sub_p0_r; \
- filt_r = __msa_sat_s_h(filt_r, 7); \
- \
- q0_sub_p0_l = (v8i16)__msa_ilvl_b(q0_sub_p0, q0_sub_p0); \
- q0_sub_p0_l = __msa_dotp_s_h((v16i8)q0_sub_p0_l, (v16i8)cnst3h); \
- filt_l = (v8i16)__msa_ilvl_b(filt_sign, filt); \
- filt_l += q0_sub_p0_l; \
- filt_l = __msa_sat_s_h(filt_l, 7); \
- \
- filt = __msa_pckev_b((v16i8)filt_l, (v16i8)filt_r); \
- filt = filt & (v16i8)mask_in; \
- \
- cnst4b = __msa_ldi_b(4); \
- filt1 = __msa_adds_s_b(filt, cnst4b); \
- filt1 >>= 3; \
- \
- cnst3b = __msa_ldi_b(3); \
- filt2 = __msa_adds_s_b(filt, cnst3b); \
- filt2 >>= 3; \
- \
- q0_m = __msa_subs_s_b(q0_m, filt1); \
- q0_out = __msa_xori_b((v16u8)q0_m, 0x80); \
- p0_m = __msa_adds_s_b(p0_m, filt2); \
- p0_out = __msa_xori_b((v16u8)p0_m, 0x80); \
- \
- filt = __msa_srari_b(filt1, 1); \
- hev_in = __msa_xori_b((v16u8)hev_in, 0xff); \
- filt = filt & (v16i8)hev_in; \
- \
- q1_m = __msa_subs_s_b(q1_m, filt); \
- q1_out = __msa_xori_b((v16u8)q1_m, 0x80); \
- p1_m = __msa_adds_s_b(p1_m, filt); \
- p1_out = __msa_xori_b((v16u8)p1_m, 0x80); \
- }
-
-#define AOM_FLAT4(p3_in, p2_in, p0_in, q0_in, q2_in, q3_in, flat_out) \
- { \
- v16u8 tmp_flat4, p2_a_sub_p0, q2_a_sub_q0, p3_a_sub_p0, q3_a_sub_q0; \
- v16u8 zero_in = { 0 }; \
- \
- tmp_flat4 = __msa_ori_b(zero_in, 1); \
- p2_a_sub_p0 = __msa_asub_u_b(p2_in, p0_in); \
- q2_a_sub_q0 = __msa_asub_u_b(q2_in, q0_in); \
- p3_a_sub_p0 = __msa_asub_u_b(p3_in, p0_in); \
- q3_a_sub_q0 = __msa_asub_u_b(q3_in, q0_in); \
- \
- p2_a_sub_p0 = __msa_max_u_b(p2_a_sub_p0, q2_a_sub_q0); \
- flat_out = __msa_max_u_b(p2_a_sub_p0, flat_out); \
- p3_a_sub_p0 = __msa_max_u_b(p3_a_sub_p0, q3_a_sub_q0); \
- flat_out = __msa_max_u_b(p3_a_sub_p0, flat_out); \
- \
- flat_out = (tmp_flat4 < (v16u8)flat_out); \
- flat_out = __msa_xori_b(flat_out, 0xff); \
- flat_out = flat_out & (mask); \
- }
-
-#define AOM_FLAT5(p7_in, p6_in, p5_in, p4_in, p0_in, q0_in, q4_in, q5_in, \
- q6_in, q7_in, flat_in, flat2_out) \
- { \
- v16u8 tmp_flat5, zero_in = { 0 }; \
- v16u8 p4_a_sub_p0, q4_a_sub_q0, p5_a_sub_p0, q5_a_sub_q0; \
- v16u8 p6_a_sub_p0, q6_a_sub_q0, p7_a_sub_p0, q7_a_sub_q0; \
- \
- tmp_flat5 = __msa_ori_b(zero_in, 1); \
- p4_a_sub_p0 = __msa_asub_u_b(p4_in, p0_in); \
- q4_a_sub_q0 = __msa_asub_u_b(q4_in, q0_in); \
- p5_a_sub_p0 = __msa_asub_u_b(p5_in, p0_in); \
- q5_a_sub_q0 = __msa_asub_u_b(q5_in, q0_in); \
- p6_a_sub_p0 = __msa_asub_u_b(p6_in, p0_in); \
- q6_a_sub_q0 = __msa_asub_u_b(q6_in, q0_in); \
- p7_a_sub_p0 = __msa_asub_u_b(p7_in, p0_in); \
- q7_a_sub_q0 = __msa_asub_u_b(q7_in, q0_in); \
- \
- p4_a_sub_p0 = __msa_max_u_b(p4_a_sub_p0, q4_a_sub_q0); \
- flat2_out = __msa_max_u_b(p5_a_sub_p0, q5_a_sub_q0); \
- flat2_out = __msa_max_u_b(p4_a_sub_p0, flat2_out); \
- p6_a_sub_p0 = __msa_max_u_b(p6_a_sub_p0, q6_a_sub_q0); \
- flat2_out = __msa_max_u_b(p6_a_sub_p0, flat2_out); \
- p7_a_sub_p0 = __msa_max_u_b(p7_a_sub_p0, q7_a_sub_q0); \
- flat2_out = __msa_max_u_b(p7_a_sub_p0, flat2_out); \
- \
- flat2_out = (tmp_flat5 < (v16u8)flat2_out); \
- flat2_out = __msa_xori_b(flat2_out, 0xff); \
- flat2_out = flat2_out & flat_in; \
- }
-
-#define AOM_FILTER8(p3_in, p2_in, p1_in, p0_in, q0_in, q1_in, q2_in, q3_in, \
- p2_filt8_out, p1_filt8_out, p0_filt8_out, q0_filt8_out, \
- q1_filt8_out, q2_filt8_out) \
- { \
- v8u16 tmp_filt8_0, tmp_filt8_1, tmp_filt8_2; \
- \
- tmp_filt8_2 = p2_in + p1_in + p0_in; \
- tmp_filt8_0 = p3_in << 1; \
- \
- tmp_filt8_0 = tmp_filt8_0 + tmp_filt8_2 + q0_in; \
- tmp_filt8_1 = tmp_filt8_0 + p3_in + p2_in; \
- p2_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_1, 3); \
- \
- tmp_filt8_1 = tmp_filt8_0 + p1_in + q1_in; \
- p1_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_1, 3); \
- \
- tmp_filt8_1 = q2_in + q1_in + q0_in; \
- tmp_filt8_2 = tmp_filt8_2 + tmp_filt8_1; \
- tmp_filt8_0 = tmp_filt8_2 + (p0_in); \
- tmp_filt8_0 = tmp_filt8_0 + (p3_in); \
- p0_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_0, 3); \
- \
- tmp_filt8_0 = q2_in + q3_in; \
- tmp_filt8_0 = p0_in + tmp_filt8_1 + tmp_filt8_0; \
- tmp_filt8_1 = q3_in + q3_in; \
- tmp_filt8_1 = tmp_filt8_1 + tmp_filt8_0; \
- q2_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_1, 3); \
- \
- tmp_filt8_0 = tmp_filt8_2 + q3_in; \
- tmp_filt8_1 = tmp_filt8_0 + q0_in; \
- q0_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_1, 3); \
- \
- tmp_filt8_1 = tmp_filt8_0 - p2_in; \
- tmp_filt8_0 = q1_in + q3_in; \
- tmp_filt8_1 = tmp_filt8_0 + tmp_filt8_1; \
- q1_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_1, 3); \
- }
-
-#define LPF_MASK_HEV(p3_in, p2_in, p1_in, p0_in, q0_in, q1_in, q2_in, q3_in, \
- limit_in, b_limit_in, thresh_in, hev_out, mask_out, \
- flat_out) \
- { \
- v16u8 p3_asub_p2_m, p2_asub_p1_m, p1_asub_p0_m, q1_asub_q0_m; \
- v16u8 p1_asub_q1_m, p0_asub_q0_m, q3_asub_q2_m, q2_asub_q1_m; \
- \
- /* absolute subtraction of pixel values */ \
- p3_asub_p2_m = __msa_asub_u_b(p3_in, p2_in); \
- p2_asub_p1_m = __msa_asub_u_b(p2_in, p1_in); \
- p1_asub_p0_m = __msa_asub_u_b(p1_in, p0_in); \
- q1_asub_q0_m = __msa_asub_u_b(q1_in, q0_in); \
- q2_asub_q1_m = __msa_asub_u_b(q2_in, q1_in); \
- q3_asub_q2_m = __msa_asub_u_b(q3_in, q2_in); \
- p0_asub_q0_m = __msa_asub_u_b(p0_in, q0_in); \
- p1_asub_q1_m = __msa_asub_u_b(p1_in, q1_in); \
- \
- /* calculation of hev */ \
- flat_out = __msa_max_u_b(p1_asub_p0_m, q1_asub_q0_m); \
- hev_out = thresh_in < (v16u8)flat_out; \
- \
- /* calculation of mask */ \
- p0_asub_q0_m = __msa_adds_u_b(p0_asub_q0_m, p0_asub_q0_m); \
- p1_asub_q1_m >>= 1; \
- p0_asub_q0_m = __msa_adds_u_b(p0_asub_q0_m, p1_asub_q1_m); \
- \
- mask_out = b_limit_in < p0_asub_q0_m; \
- mask_out = __msa_max_u_b(flat_out, mask_out); \
- p3_asub_p2_m = __msa_max_u_b(p3_asub_p2_m, p2_asub_p1_m); \
- mask_out = __msa_max_u_b(p3_asub_p2_m, mask_out); \
- q2_asub_q1_m = __msa_max_u_b(q2_asub_q1_m, q3_asub_q2_m); \
- mask_out = __msa_max_u_b(q2_asub_q1_m, mask_out); \
- \
- mask_out = limit_in < (v16u8)mask_out; \
- mask_out = __msa_xori_b(mask_out, 0xff); \
- }
-#endif // AOM_AOM_DSP_MIPS_LOOPFILTER_MSA_H_
diff --git a/third_party/aom/aom_dsp/mips/macros_msa.h b/third_party/aom/aom_dsp/mips/macros_msa.h
deleted file mode 100644
index 9bfc27147..000000000
--- a/third_party/aom/aom_dsp/mips/macros_msa.h
+++ /dev/null
@@ -1,2058 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_MIPS_MACROS_MSA_H_
-#define AOM_AOM_DSP_MIPS_MACROS_MSA_H_
-
-#include <msa.h>
-
-#include "config/aom_config.h"
-
-#include "aom/aom_integer.h"
-
-#define LD_B(RTYPE, psrc) *((const RTYPE *)(psrc))
-#define LD_UB(...) LD_B(v16u8, __VA_ARGS__)
-#define LD_SB(...) LD_B(v16i8, __VA_ARGS__)
-
-#define LD_H(RTYPE, psrc) *((const RTYPE *)(psrc))
-#define LD_UH(...) LD_H(v8u16, __VA_ARGS__)
-#define LD_SH(...) LD_H(v8i16, __VA_ARGS__)
-
-#define LD_W(RTYPE, psrc) *((const RTYPE *)(psrc))
-#define LD_SW(...) LD_W(v4i32, __VA_ARGS__)
-
-#define ST_B(RTYPE, in, pdst) *((RTYPE *)(pdst)) = (in)
-#define ST_UB(...) ST_B(v16u8, __VA_ARGS__)
-#define ST_SB(...) ST_B(v16i8, __VA_ARGS__)
-
-#define ST_H(RTYPE, in, pdst) *((RTYPE *)(pdst)) = (in)
-#define ST_SH(...) ST_H(v8i16, __VA_ARGS__)
-
-#define ST_W(RTYPE, in, pdst) *((RTYPE *)(pdst)) = (in)
-#define ST_SW(...) ST_W(v4i32, __VA_ARGS__)
-
-#if (__mips_isa_rev >= 6)
-#define LH(psrc) \
- ({ \
- const uint8_t *psrc_m = (const uint8_t *)(psrc); \
- uint16_t val_m; \
- \
- __asm__ __volatile__("lh %[val_m], %[psrc_m] \n\t" \
- \
- : [val_m] "=r"(val_m) \
- : [psrc_m] "m"(*psrc_m)); \
- \
- val_m; \
- })
-
-#define LW(psrc) \
- ({ \
- const uint8_t *psrc_m = (const uint8_t *)(psrc); \
- uint32_t val_m; \
- \
- __asm__ __volatile__("lw %[val_m], %[psrc_m] \n\t" \
- \
- : [val_m] "=r"(val_m) \
- : [psrc_m] "m"(*psrc_m)); \
- \
- val_m; \
- })
-
-#if (__mips == 64)
-#define LD(psrc) \
- ({ \
- const uint8_t *psrc_m = (const uint8_t *)(psrc); \
- uint64_t val_m = 0; \
- \
- __asm__ __volatile__("ld %[val_m], %[psrc_m] \n\t" \
- \
- : [val_m] "=r"(val_m) \
- : [psrc_m] "m"(*psrc_m)); \
- \
- val_m; \
- })
-#else // !(__mips == 64)
-#define LD(psrc) \
- ({ \
- const uint8_t *psrc_m = (const uint8_t *)(psrc); \
- uint32_t val0_m, val1_m; \
- uint64_t val_m = 0; \
- \
- val0_m = LW(psrc_m); \
- val1_m = LW(psrc_m + 4); \
- \
- val_m = (uint64_t)(val1_m); \
- val_m = (uint64_t)((val_m << 32) & 0xFFFFFFFF00000000); \
- val_m = (uint64_t)(val_m | (uint64_t)val0_m); \
- \
- val_m; \
- })
-#endif // (__mips == 64)
-
-#define SH(val, pdst) \
- { \
- uint8_t *pdst_m = (uint8_t *)(pdst); \
- const uint16_t val_m = (val); \
- \
- __asm__ __volatile__("sh %[val_m], %[pdst_m] \n\t" \
- \
- : [pdst_m] "=m"(*pdst_m) \
- : [val_m] "r"(val_m)); \
- }
-
-#define SW(val, pdst) \
- { \
- uint8_t *pdst_m = (uint8_t *)(pdst); \
- const uint32_t val_m = (val); \
- \
- __asm__ __volatile__("sw %[val_m], %[pdst_m] \n\t" \
- \
- : [pdst_m] "=m"(*pdst_m) \
- : [val_m] "r"(val_m)); \
- }
-
-#define SD(val, pdst) \
- { \
- uint8_t *pdst_m = (uint8_t *)(pdst); \
- const uint64_t val_m = (val); \
- \
- __asm__ __volatile__("sd %[val_m], %[pdst_m] \n\t" \
- \
- : [pdst_m] "=m"(*pdst_m) \
- : [val_m] "r"(val_m)); \
- }
-#else // !(__mips_isa_rev >= 6)
-#define LH(psrc) \
- ({ \
- const uint8_t *psrc_m = (const uint8_t *)(psrc); \
- uint16_t val_m; \
- \
- __asm__ __volatile__("ulh %[val_m], %[psrc_m] \n\t" \
- \
- : [val_m] "=r"(val_m) \
- : [psrc_m] "m"(*psrc_m)); \
- \
- val_m; \
- })
-
-#define LW(psrc) \
- ({ \
- const uint8_t *psrc_m = (const uint8_t *)(psrc); \
- uint32_t val_m; \
- \
- __asm__ __volatile__("ulw %[val_m], %[psrc_m] \n\t" \
- \
- : [val_m] "=r"(val_m) \
- : [psrc_m] "m"(*psrc_m)); \
- \
- val_m; \
- })
-
-#if (__mips == 64)
-#define LD(psrc) \
- ({ \
- const uint8_t *psrc_m = (const uint8_t *)(psrc); \
- uint64_t val_m = 0; \
- \
- __asm__ __volatile__("uld %[val_m], %[psrc_m] \n\t" \
- \
- : [val_m] "=r"(val_m) \
- : [psrc_m] "m"(*psrc_m)); \
- \
- val_m; \
- })
-#else // !(__mips == 64)
-#define LD(psrc) \
- ({ \
- const uint8_t *psrc_m1 = (const uint8_t *)(psrc); \
- uint32_t val0_m, val1_m; \
- uint64_t val_m_combined = 0; \
- \
- val0_m = LW(psrc_m1); \
- val1_m = LW(psrc_m1 + 4); \
- \
- val_m_combined = (uint64_t)(val1_m); \
- val_m_combined = (uint64_t)((val_m_combined << 32) & 0xFFFFFFFF00000000); \
- val_m_combined = (uint64_t)(val_m_combined | (uint64_t)val0_m); \
- \
- val_m_combined; \
- })
-#endif // (__mips == 64)
-
-#define SH(val, pdst) \
- { \
- uint8_t *pdst_m = (uint8_t *)(pdst); \
- const uint16_t val_m = (val); \
- \
- __asm__ __volatile__("ush %[val_m], %[pdst_m] \n\t" \
- \
- : [pdst_m] "=m"(*pdst_m) \
- : [val_m] "r"(val_m)); \
- }
-
-#define SW(val, pdst) \
- { \
- uint8_t *pdst_m = (uint8_t *)(pdst); \
- const uint32_t val_m = (val); \
- \
- __asm__ __volatile__("usw %[val_m], %[pdst_m] \n\t" \
- \
- : [pdst_m] "=m"(*pdst_m) \
- : [val_m] "r"(val_m)); \
- }
-
-#define SD(val, pdst) \
- { \
- uint8_t *pdst_m1 = (uint8_t *)(pdst); \
- uint32_t val0_m, val1_m; \
- \
- val0_m = (uint32_t)((val)&0x00000000FFFFFFFF); \
- val1_m = (uint32_t)(((val) >> 32) & 0x00000000FFFFFFFF); \
- \
- SW(val0_m, pdst_m1); \
- SW(val1_m, pdst_m1 + 4); \
- }
-#endif // (__mips_isa_rev >= 6)
-
-/* Description : Load 4 words with stride
- Arguments : Inputs - psrc, stride
- Outputs - out0, out1, out2, out3
- Details : Load word in 'out0' from (psrc)
- Load word in 'out1' from (psrc + stride)
- Load word in 'out2' from (psrc + 2 * stride)
- Load word in 'out3' from (psrc + 3 * stride)
-*/
-#define LW4(psrc, stride, out0, out1, out2, out3) \
- { \
- out0 = LW((psrc)); \
- out1 = LW((psrc) + stride); \
- out2 = LW((psrc) + 2 * stride); \
- out3 = LW((psrc) + 3 * stride); \
- }
-
-/* Description : Load double words with stride
- Arguments : Inputs - psrc, stride
- Outputs - out0, out1
- Details : Load double word in 'out0' from (psrc)
- Load double word in 'out1' from (psrc + stride)
-*/
-#define LD2(psrc, stride, out0, out1) \
- { \
- out0 = LD((psrc)); \
- out1 = LD((psrc) + stride); \
- }
-#define LD4(psrc, stride, out0, out1, out2, out3) \
- { \
- LD2((psrc), stride, out0, out1); \
- LD2((psrc) + 2 * stride, stride, out2, out3); \
- }
-
-/* Description : Store 4 words with stride
- Arguments : Inputs - in0, in1, in2, in3, pdst, stride
- Details : Store word from 'in0' to (pdst)
- Store word from 'in1' to (pdst + stride)
- Store word from 'in2' to (pdst + 2 * stride)
- Store word from 'in3' to (pdst + 3 * stride)
-*/
-#define SW4(in0, in1, in2, in3, pdst, stride) \
- { \
- SW(in0, (pdst)) \
- SW(in1, (pdst) + stride); \
- SW(in2, (pdst) + 2 * stride); \
- SW(in3, (pdst) + 3 * stride); \
- }
-
-/* Description : Store 4 double words with stride
- Arguments : Inputs - in0, in1, in2, in3, pdst, stride
- Details : Store double word from 'in0' to (pdst)
- Store double word from 'in1' to (pdst + stride)
- Store double word from 'in2' to (pdst + 2 * stride)
- Store double word from 'in3' to (pdst + 3 * stride)
-*/
-#define SD4(in0, in1, in2, in3, pdst, stride) \
- { \
- SD(in0, (pdst)) \
- SD(in1, (pdst) + stride); \
- SD(in2, (pdst) + 2 * stride); \
- SD(in3, (pdst) + 3 * stride); \
- }
-
-/* Description : Load vectors with 16 byte elements with stride
- Arguments : Inputs - psrc, stride
- Outputs - out0, out1
- Return Type - as per RTYPE
- Details : Load 16 byte elements in 'out0' from (psrc)
- Load 16 byte elements in 'out1' from (psrc + stride)
-*/
-#define LD_B2(RTYPE, psrc, stride, out0, out1) \
- { \
- out0 = LD_B(RTYPE, (psrc)); \
- out1 = LD_B(RTYPE, (psrc) + stride); \
- }
-#define LD_UB2(...) LD_B2(v16u8, __VA_ARGS__)
-#define LD_SB2(...) LD_B2(v16i8, __VA_ARGS__)
-
-#define LD_B3(RTYPE, psrc, stride, out0, out1, out2) \
- { \
- LD_B2(RTYPE, (psrc), stride, out0, out1); \
- out2 = LD_B(RTYPE, (psrc) + 2 * stride); \
- }
-#define LD_UB3(...) LD_B3(v16u8, __VA_ARGS__)
-
-#define LD_B4(RTYPE, psrc, stride, out0, out1, out2, out3) \
- { \
- LD_B2(RTYPE, (psrc), stride, out0, out1); \
- LD_B2(RTYPE, (psrc) + 2 * stride, stride, out2, out3); \
- }
-#define LD_UB4(...) LD_B4(v16u8, __VA_ARGS__)
-#define LD_SB4(...) LD_B4(v16i8, __VA_ARGS__)
-
-#define LD_B5(RTYPE, psrc, stride, out0, out1, out2, out3, out4) \
- { \
- LD_B4(RTYPE, (psrc), stride, out0, out1, out2, out3); \
- out4 = LD_B(RTYPE, (psrc) + 4 * stride); \
- }
-#define LD_UB5(...) LD_B5(v16u8, __VA_ARGS__)
-#define LD_SB5(...) LD_B5(v16i8, __VA_ARGS__)
-
-#define LD_B7(RTYPE, psrc, stride, out0, out1, out2, out3, out4, out5, out6) \
- { \
- LD_B5(RTYPE, (psrc), stride, out0, out1, out2, out3, out4); \
- LD_B2(RTYPE, (psrc) + 5 * stride, stride, out5, out6); \
- }
-#define LD_SB7(...) LD_B7(v16i8, __VA_ARGS__)
-
-#define LD_B8(RTYPE, psrc, stride, out0, out1, out2, out3, out4, out5, out6, \
- out7) \
- { \
- LD_B4(RTYPE, (psrc), stride, out0, out1, out2, out3); \
- LD_B4(RTYPE, (psrc) + 4 * stride, stride, out4, out5, out6, out7); \
- }
-#define LD_UB8(...) LD_B8(v16u8, __VA_ARGS__)
-#define LD_SB8(...) LD_B8(v16i8, __VA_ARGS__)
-
-/* Description : Load vectors with 8 halfword elements with stride
- Arguments : Inputs - psrc, stride
- Outputs - out0, out1
- Details : Load 8 halfword elements in 'out0' from (psrc)
- Load 8 halfword elements in 'out1' from (psrc + stride)
-*/
-#define LD_H2(RTYPE, psrc, stride, out0, out1) \
- { \
- out0 = LD_H(RTYPE, (psrc)); \
- out1 = LD_H(RTYPE, (psrc) + (stride)); \
- }
-#define LD_SH2(...) LD_H2(v8i16, __VA_ARGS__)
-
-#define LD_H4(RTYPE, psrc, stride, out0, out1, out2, out3) \
- { \
- LD_H2(RTYPE, (psrc), stride, out0, out1); \
- LD_H2(RTYPE, (psrc) + 2 * stride, stride, out2, out3); \
- }
-#define LD_SH4(...) LD_H4(v8i16, __VA_ARGS__)
-
-#define LD_H8(RTYPE, psrc, stride, out0, out1, out2, out3, out4, out5, out6, \
- out7) \
- { \
- LD_H4(RTYPE, (psrc), stride, out0, out1, out2, out3); \
- LD_H4(RTYPE, (psrc) + 4 * stride, stride, out4, out5, out6, out7); \
- }
-#define LD_SH8(...) LD_H8(v8i16, __VA_ARGS__)
-
-#define LD_H16(RTYPE, psrc, stride, out0, out1, out2, out3, out4, out5, out6, \
- out7, out8, out9, out10, out11, out12, out13, out14, out15) \
- { \
- LD_H8(RTYPE, (psrc), stride, out0, out1, out2, out3, out4, out5, out6, \
- out7); \
- LD_H8(RTYPE, (psrc) + 8 * stride, stride, out8, out9, out10, out11, out12, \
- out13, out14, out15); \
- }
-#define LD_SH16(...) LD_H16(v8i16, __VA_ARGS__)
-
-/* Description : Load 4x4 block of signed halfword elements from 1D source
- data into 4 vectors (Each vector with 4 signed halfwords)
- Arguments : Input - psrc
- Outputs - out0, out1, out2, out3
-*/
-#define LD4x4_SH(psrc, out0, out1, out2, out3) \
- { \
- out0 = LD_SH(psrc); \
- out2 = LD_SH(psrc + 8); \
- out1 = (v8i16)__msa_ilvl_d((v2i64)out0, (v2i64)out0); \
- out3 = (v8i16)__msa_ilvl_d((v2i64)out2, (v2i64)out2); \
- }
-
-/* Description : Load 2 vectors of signed word elements with stride
- Arguments : Inputs - psrc, stride
- Outputs - out0, out1
- Return Type - signed word
-*/
-#define LD_SW2(psrc, stride, out0, out1) \
- { \
- out0 = LD_SW((psrc)); \
- out1 = LD_SW((psrc) + stride); \
- }
-
-/* Description : Store vectors of 16 byte elements with stride
- Arguments : Inputs - in0, in1, pdst, stride
- Details : Store 16 byte elements from 'in0' to (pdst)
- Store 16 byte elements from 'in1' to (pdst + stride)
-*/
-#define ST_B2(RTYPE, in0, in1, pdst, stride) \
- { \
- ST_B(RTYPE, in0, (pdst)); \
- ST_B(RTYPE, in1, (pdst) + stride); \
- }
-#define ST_UB2(...) ST_B2(v16u8, __VA_ARGS__)
-
-#define ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride) \
- { \
- ST_B2(RTYPE, in0, in1, (pdst), stride); \
- ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \
- }
-#define ST_UB4(...) ST_B4(v16u8, __VA_ARGS__)
-
-#define ST_B8(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride) \
- { \
- ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride); \
- ST_B4(RTYPE, in4, in5, in6, in7, (pdst) + 4 * stride, stride); \
- }
-#define ST_UB8(...) ST_B8(v16u8, __VA_ARGS__)
-
-/* Description : Store vectors of 8 halfword elements with stride
- Arguments : Inputs - in0, in1, pdst, stride
- Details : Store 8 halfword elements from 'in0' to (pdst)
- Store 8 halfword elements from 'in1' to (pdst + stride)
-*/
-#define ST_H2(RTYPE, in0, in1, pdst, stride) \
- { \
- ST_H(RTYPE, in0, (pdst)); \
- ST_H(RTYPE, in1, (pdst) + stride); \
- }
-#define ST_SH2(...) ST_H2(v8i16, __VA_ARGS__)
-
-#define ST_H4(RTYPE, in0, in1, in2, in3, pdst, stride) \
- { \
- ST_H2(RTYPE, in0, in1, (pdst), stride); \
- ST_H2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \
- }
-#define ST_SH4(...) ST_H4(v8i16, __VA_ARGS__)
-
-#define ST_H8(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride) \
- { \
- ST_H4(RTYPE, in0, in1, in2, in3, (pdst), stride); \
- ST_H4(RTYPE, in4, in5, in6, in7, (pdst) + 4 * stride, stride); \
- }
-#define ST_SH8(...) ST_H8(v8i16, __VA_ARGS__)
-
-/* Description : Store vectors of word elements with stride
- Arguments : Inputs - in0, in1, pdst, stride
- Details : Store 4 word elements from 'in0' to (pdst)
- Store 4 word elements from 'in1' to (pdst + stride)
-*/
-#define ST_SW2(in0, in1, pdst, stride) \
- { \
- ST_SW(in0, (pdst)); \
- ST_SW(in1, (pdst) + stride); \
- }
-
-/* Description : Store 2x4 byte block to destination memory from input vector
- Arguments : Inputs - in, stidx, pdst, stride
- Details : Index 'stidx' halfword element from 'in' vector is copied to
- the GP register and stored to (pdst)
- Index 'stidx+1' halfword element from 'in' vector is copied to
- the GP register and stored to (pdst + stride)
- Index 'stidx+2' halfword element from 'in' vector is copied to
- the GP register and stored to (pdst + 2 * stride)
- Index 'stidx+3' halfword element from 'in' vector is copied to
- the GP register and stored to (pdst + 3 * stride)
-*/
-#define ST2x4_UB(in, stidx, pdst, stride) \
- { \
- uint16_t out0_m, out1_m, out2_m, out3_m; \
- uint8_t *pblk_2x4_m = (uint8_t *)(pdst); \
- \
- out0_m = __msa_copy_u_h((v8i16)in, (stidx)); \
- out1_m = __msa_copy_u_h((v8i16)in, (stidx + 1)); \
- out2_m = __msa_copy_u_h((v8i16)in, (stidx + 2)); \
- out3_m = __msa_copy_u_h((v8i16)in, (stidx + 3)); \
- \
- SH(out0_m, pblk_2x4_m); \
- SH(out1_m, pblk_2x4_m + stride); \
- SH(out2_m, pblk_2x4_m + 2 * stride); \
- SH(out3_m, pblk_2x4_m + 3 * stride); \
- }
-
-/* Description : Store 4x2 byte block to destination memory from input vector
- Arguments : Inputs - in, pdst, stride
- Details : Index 0 word element from 'in' vector is copied to the GP
- register and stored to (pdst)
- Index 1 word element from 'in' vector is copied to the GP
- register and stored to (pdst + stride)
-*/
-#define ST4x2_UB(in, pdst, stride) \
- { \
- uint32_t out0_m, out1_m; \
- uint8_t *pblk_4x2_m = (uint8_t *)(pdst); \
- \
- out0_m = __msa_copy_u_w((v4i32)in, 0); \
- out1_m = __msa_copy_u_w((v4i32)in, 1); \
- \
- SW(out0_m, pblk_4x2_m); \
- SW(out1_m, pblk_4x2_m + stride); \
- }
-
-/* Description : Store 4x4 byte block to destination memory from input vector
- Arguments : Inputs - in0, in1, pdst, stride
- Details : 'Idx0' word element from input vector 'in0' is copied to the
- GP register and stored to (pdst)
- 'Idx1' word element from input vector 'in0' is copied to the
- GP register and stored to (pdst + stride)
- 'Idx2' word element from input vector 'in0' is copied to the
- GP register and stored to (pdst + 2 * stride)
- 'Idx3' word element from input vector 'in0' is copied to the
- GP register and stored to (pdst + 3 * stride)
-*/
-#define ST4x4_UB(in0, in1, idx0, idx1, idx2, idx3, pdst, stride) \
- { \
- uint32_t out0_m, out1_m, out2_m, out3_m; \
- uint8_t *pblk_4x4_m = (uint8_t *)(pdst); \
- \
- out0_m = __msa_copy_u_w((v4i32)in0, idx0); \
- out1_m = __msa_copy_u_w((v4i32)in0, idx1); \
- out2_m = __msa_copy_u_w((v4i32)in1, idx2); \
- out3_m = __msa_copy_u_w((v4i32)in1, idx3); \
- \
- SW4(out0_m, out1_m, out2_m, out3_m, pblk_4x4_m, stride); \
- }
-#define ST4x8_UB(in0, in1, pdst, stride) \
- { \
- uint8_t *pblk_4x8 = (uint8_t *)(pdst); \
- \
- ST4x4_UB(in0, in0, 0, 1, 2, 3, pblk_4x8, stride); \
- ST4x4_UB(in1, in1, 0, 1, 2, 3, pblk_4x8 + 4 * stride, stride); \
- }
-
-/* Description : Store 8x1 byte block to destination memory from input vector
- Arguments : Inputs - in, pdst
- Details : Index 0 double word element from 'in' vector is copied to the
- GP register and stored to (pdst)
-*/
-#define ST8x1_UB(in, pdst) \
- { \
- uint64_t out0_m; \
- \
- out0_m = __msa_copy_u_d((v2i64)in, 0); \
- SD(out0_m, pdst); \
- }
-
-/* Description : Store 8x2 byte block to destination memory from input vector
- Arguments : Inputs - in, pdst, stride
- Details : Index 0 double word element from 'in' vector is copied to the
- GP register and stored to (pdst)
- Index 1 double word element from 'in' vector is copied to the
- GP register and stored to (pdst + stride)
-*/
-#define ST8x2_UB(in, pdst, stride) \
- { \
- uint64_t out0_m, out1_m; \
- uint8_t *pblk_8x2_m = (uint8_t *)(pdst); \
- \
- out0_m = __msa_copy_u_d((v2i64)in, 0); \
- out1_m = __msa_copy_u_d((v2i64)in, 1); \
- \
- SD(out0_m, pblk_8x2_m); \
- SD(out1_m, pblk_8x2_m + stride); \
- }
-
-/* Description : Store 8x4 byte block to destination memory from input
- vectors
- Arguments : Inputs - in0, in1, pdst, stride
- Details : Index 0 double word element from 'in0' vector is copied to the
- GP register and stored to (pdst)
- Index 1 double word element from 'in0' vector is copied to the
- GP register and stored to (pdst + stride)
- Index 0 double word element from 'in1' vector is copied to the
- GP register and stored to (pdst + 2 * stride)
- Index 1 double word element from 'in1' vector is copied to the
- GP register and stored to (pdst + 3 * stride)
-*/
-#define ST8x4_UB(in0, in1, pdst, stride) \
- { \
- uint64_t out0_m, out1_m, out2_m, out3_m; \
- uint8_t *pblk_8x4_m = (uint8_t *)(pdst); \
- \
- out0_m = __msa_copy_u_d((v2i64)in0, 0); \
- out1_m = __msa_copy_u_d((v2i64)in0, 1); \
- out2_m = __msa_copy_u_d((v2i64)in1, 0); \
- out3_m = __msa_copy_u_d((v2i64)in1, 1); \
- \
- SD4(out0_m, out1_m, out2_m, out3_m, pblk_8x4_m, stride); \
- }
-
-/* Description : average with rounding (in0 + in1 + 1) / 2.
- Arguments : Inputs - in0, in1, in2, in3,
- Outputs - out0, out1
- Return Type - as per RTYPE
- Details : Each unsigned byte element from 'in0' vector is added with
- each unsigned byte element from 'in1' vector. Then the average
- with rounding is calculated and written to 'out0'
-*/
-#define AVER_UB2(RTYPE, in0, in1, in2, in3, out0, out1) \
- { \
- out0 = (RTYPE)__msa_aver_u_b((v16u8)in0, (v16u8)in1); \
- out1 = (RTYPE)__msa_aver_u_b((v16u8)in2, (v16u8)in3); \
- }
-#define AVER_UB2_UB(...) AVER_UB2(v16u8, __VA_ARGS__)
-
-#define AVER_UB4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \
- out2, out3) \
- { \
- AVER_UB2(RTYPE, in0, in1, in2, in3, out0, out1) \
- AVER_UB2(RTYPE, in4, in5, in6, in7, out2, out3) \
- }
-#define AVER_UB4_UB(...) AVER_UB4(v16u8, __VA_ARGS__)
-
-/* Description : Immediate number of elements to slide with zero
- Arguments : Inputs - in0, in1, slide_val
- Outputs - out0, out1
- Return Type - as per RTYPE
- Details : Byte elements from 'zero_m' vector are slid into 'in0' by
- value specified in the 'slide_val'
-*/
-#define SLDI_B2_0(RTYPE, in0, in1, out0, out1, slide_val) \
- { \
- v16i8 zero_m = { 0 }; \
- out0 = (RTYPE)__msa_sldi_b((v16i8)zero_m, (v16i8)in0, slide_val); \
- out1 = (RTYPE)__msa_sldi_b((v16i8)zero_m, (v16i8)in1, slide_val); \
- }
-#define SLDI_B2_0_SW(...) SLDI_B2_0(v4i32, __VA_ARGS__)
-
-#define SLDI_B4_0(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3, \
- slide_val) \
- { \
- SLDI_B2_0(RTYPE, in0, in1, out0, out1, slide_val); \
- SLDI_B2_0(RTYPE, in2, in3, out2, out3, slide_val); \
- }
-#define SLDI_B4_0_UB(...) SLDI_B4_0(v16u8, __VA_ARGS__)
-
-/* Description : Immediate number of elements to slide
- Arguments : Inputs - in0_0, in0_1, in1_0, in1_1, slide_val
- Outputs - out0, out1
- Return Type - as per RTYPE
- Details : Byte elements from 'in0_0' vector are slid into 'in1_0' by
- value specified in the 'slide_val'
-*/
-#define SLDI_B2(RTYPE, in0_0, in0_1, in1_0, in1_1, out0, out1, slide_val) \
- { \
- out0 = (RTYPE)__msa_sldi_b((v16i8)in0_0, (v16i8)in1_0, slide_val); \
- out1 = (RTYPE)__msa_sldi_b((v16i8)in0_1, (v16i8)in1_1, slide_val); \
- }
-#define SLDI_B2_UB(...) SLDI_B2(v16u8, __VA_ARGS__)
-#define SLDI_B2_SH(...) SLDI_B2(v8i16, __VA_ARGS__)
-
-#define SLDI_B3(RTYPE, in0_0, in0_1, in0_2, in1_0, in1_1, in1_2, out0, out1, \
- out2, slide_val) \
- { \
- SLDI_B2(RTYPE, in0_0, in0_1, in1_0, in1_1, out0, out1, slide_val) \
- out2 = (RTYPE)__msa_sldi_b((v16i8)in0_2, (v16i8)in1_2, slide_val); \
- }
-#define SLDI_B3_SB(...) SLDI_B3(v16i8, __VA_ARGS__)
-#define SLDI_B3_UH(...) SLDI_B3(v8u16, __VA_ARGS__)
-
-/* Description : Shuffle byte vector elements as per mask vector
- Arguments : Inputs - in0, in1, in2, in3, mask0, mask1
- Outputs - out0, out1
- Return Type - as per RTYPE
- Details : Byte elements from 'in0' & 'in1' are copied selectively to
- 'out0' as per control vector 'mask0'
-*/
-#define VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) \
- { \
- out0 = (RTYPE)__msa_vshf_b((v16i8)mask0, (v16i8)in1, (v16i8)in0); \
- out1 = (RTYPE)__msa_vshf_b((v16i8)mask1, (v16i8)in3, (v16i8)in2); \
- }
-#define VSHF_B2_UB(...) VSHF_B2(v16u8, __VA_ARGS__)
-#define VSHF_B2_SB(...) VSHF_B2(v16i8, __VA_ARGS__)
-#define VSHF_B2_UH(...) VSHF_B2(v8u16, __VA_ARGS__)
-
-#define VSHF_B4(RTYPE, in0, in1, mask0, mask1, mask2, mask3, out0, out1, out2, \
- out3) \
- { \
- VSHF_B2(RTYPE, in0, in1, in0, in1, mask0, mask1, out0, out1); \
- VSHF_B2(RTYPE, in0, in1, in0, in1, mask2, mask3, out2, out3); \
- }
-#define VSHF_B4_SB(...) VSHF_B4(v16i8, __VA_ARGS__)
-#define VSHF_B4_SH(...) VSHF_B4(v8i16, __VA_ARGS__)
-
-/* Description : Dot product of byte vector elements
- Arguments : Inputs - mult0, mult1, cnst0, cnst1
- Outputs - out0, out1
- Return Type - as per RTYPE
- Details : Unsigned byte elements from 'mult0' are multiplied with
- unsigned byte elements from 'cnst0' producing a result
- twice the size of input i.e. unsigned halfword.
- The multiplication result of adjacent odd-even elements
- are added together and written to the 'out0' vector
-*/
-#define DOTP_UB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \
- { \
- out0 = (RTYPE)__msa_dotp_u_h((v16u8)mult0, (v16u8)cnst0); \
- out1 = (RTYPE)__msa_dotp_u_h((v16u8)mult1, (v16u8)cnst1); \
- }
-#define DOTP_UB2_UH(...) DOTP_UB2(v8u16, __VA_ARGS__)
-
-#define DOTP_UB4(RTYPE, mult0, mult1, mult2, mult3, cnst0, cnst1, cnst2, \
- cnst3, out0, out1, out2, out3) \
- { \
- DOTP_UB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \
- DOTP_UB2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3); \
- }
-#define DOTP_UB4_UH(...) DOTP_UB4(v8u16, __VA_ARGS__)
-
-/* Description : Dot product of byte vector elements
- Arguments : Inputs - mult0, mult1, cnst0, cnst1
- Outputs - out0, out1
- Return Type - as per RTYPE
- Details : Signed byte elements from 'mult0' are multiplied with
- signed byte elements from 'cnst0' producing a result
- twice the size of input i.e. signed halfword.
- The multiplication result of adjacent odd-even elements
- are added together and written to the 'out0' vector
-*/
-#define DOTP_SB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \
- { \
- out0 = (RTYPE)__msa_dotp_s_h((v16i8)mult0, (v16i8)cnst0); \
- out1 = (RTYPE)__msa_dotp_s_h((v16i8)mult1, (v16i8)cnst1); \
- }
-#define DOTP_SB2_SH(...) DOTP_SB2(v8i16, __VA_ARGS__)
-
-#define DOTP_SB4(RTYPE, mult0, mult1, mult2, mult3, cnst0, cnst1, cnst2, \
- cnst3, out0, out1, out2, out3) \
- { \
- DOTP_SB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \
- DOTP_SB2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3); \
- }
-#define DOTP_SB4_SH(...) DOTP_SB4(v8i16, __VA_ARGS__)
-
-/* Description : Dot product of halfword vector elements
- Arguments : Inputs - mult0, mult1, cnst0, cnst1
- Outputs - out0, out1
- Return Type - as per RTYPE
- Details : Signed halfword elements from 'mult0' are multiplied with
- signed halfword elements from 'cnst0' producing a result
- twice the size of input i.e. signed word.
- The multiplication result of adjacent odd-even elements
- are added together and written to the 'out0' vector
-*/
-#define DOTP_SH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \
- { \
- out0 = (RTYPE)__msa_dotp_s_w((v8i16)mult0, (v8i16)cnst0); \
- out1 = (RTYPE)__msa_dotp_s_w((v8i16)mult1, (v8i16)cnst1); \
- }
-#define DOTP_SH2_SW(...) DOTP_SH2(v4i32, __VA_ARGS__)
-
-#define DOTP_SH4(RTYPE, mult0, mult1, mult2, mult3, cnst0, cnst1, cnst2, \
- cnst3, out0, out1, out2, out3) \
- { \
- DOTP_SH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \
- DOTP_SH2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3); \
- }
-#define DOTP_SH4_SW(...) DOTP_SH4(v4i32, __VA_ARGS__)
-
-/* Description : Dot product of word vector elements
- Arguments : Inputs - mult0, mult1, cnst0, cnst1
- Outputs - out0, out1
- Return Type - as per RTYPE
- Details : Signed word elements from 'mult0' are multiplied with
- signed word elements from 'cnst0' producing a result
- twice the size of input i.e. signed double word.
- The multiplication result of adjacent odd-even elements
- are added together and written to the 'out0' vector
-*/
-#define DOTP_SW2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \
- { \
- out0 = (RTYPE)__msa_dotp_s_d((v4i32)mult0, (v4i32)cnst0); \
- out1 = (RTYPE)__msa_dotp_s_d((v4i32)mult1, (v4i32)cnst1); \
- }
-#define DOTP_SW2_SD(...) DOTP_SW2(v2i64, __VA_ARGS__)
-
-/* Description : Dot product & addition of byte vector elements
- Arguments : Inputs - mult0, mult1, cnst0, cnst1
- Outputs - out0, out1
- Return Type - as per RTYPE
- Details : Signed byte elements from 'mult0' are multiplied with
- signed byte elements from 'cnst0' producing a result
- twice the size of input i.e. signed halfword.
- The multiplication result of adjacent odd-even elements
- are added to the 'out0' vector
-*/
-#define DPADD_SB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \
- { \
- out0 = (RTYPE)__msa_dpadd_s_h((v8i16)out0, (v16i8)mult0, (v16i8)cnst0); \
- out1 = (RTYPE)__msa_dpadd_s_h((v8i16)out1, (v16i8)mult1, (v16i8)cnst1); \
- }
-#define DPADD_SB2_SH(...) DPADD_SB2(v8i16, __VA_ARGS__)
-
-#define DPADD_SB4(RTYPE, mult0, mult1, mult2, mult3, cnst0, cnst1, cnst2, \
- cnst3, out0, out1, out2, out3) \
- { \
- DPADD_SB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \
- DPADD_SB2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3); \
- }
-#define DPADD_SB4_SH(...) DPADD_SB4(v8i16, __VA_ARGS__)
-
-/* Description : Dot product & addition of halfword vector elements
- Arguments : Inputs - mult0, mult1, cnst0, cnst1
- Outputs - out0, out1
- Return Type - as per RTYPE
- Details : Signed halfword elements from 'mult0' are multiplied with
- signed halfword elements from 'cnst0' producing a result
- twice the size of input i.e. signed word.
- The multiplication result of adjacent odd-even elements
- are added to the 'out0' vector
-*/
-#define DPADD_SH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \
- { \
- out0 = (RTYPE)__msa_dpadd_s_w((v4i32)out0, (v8i16)mult0, (v8i16)cnst0); \
- out1 = (RTYPE)__msa_dpadd_s_w((v4i32)out1, (v8i16)mult1, (v8i16)cnst1); \
- }
-#define DPADD_SH2_SW(...) DPADD_SH2(v4i32, __VA_ARGS__)
-
-/* Description : Dot product & addition of double word vector elements
- Arguments : Inputs - mult0, mult1
- Outputs - out0, out1
- Return Type - as per RTYPE
- Details : Each signed word element from 'mult0' is multiplied with itself
- producing an intermediate result twice the size of input
- i.e. signed double word
- The multiplication result of adjacent odd-even elements
- are added to the 'out0' vector
-*/
-#define DPADD_SD2(RTYPE, mult0, mult1, out0, out1) \
- { \
- out0 = (RTYPE)__msa_dpadd_s_d((v2i64)out0, (v4i32)mult0, (v4i32)mult0); \
- out1 = (RTYPE)__msa_dpadd_s_d((v2i64)out1, (v4i32)mult1, (v4i32)mult1); \
- }
-#define DPADD_SD2_SD(...) DPADD_SD2(v2i64, __VA_ARGS__)
-
-/* Description : Minimum values between unsigned elements of
- either vector are copied to the output vector
- Arguments : Inputs - in0, in1, min_vec
- Outputs - in place operation
- Return Type - as per RTYPE
- Details : Minimum of unsigned halfword element values from 'in0' and
- 'min_vec' are written to output vector 'in0'
-*/
-#define MIN_UH2(RTYPE, in0, in1, min_vec) \
- { \
- in0 = (RTYPE)__msa_min_u_h((v8u16)in0, min_vec); \
- in1 = (RTYPE)__msa_min_u_h((v8u16)in1, min_vec); \
- }
-#define MIN_UH2_UH(...) MIN_UH2(v8u16, __VA_ARGS__)
-
-#define MIN_UH4(RTYPE, in0, in1, in2, in3, min_vec) \
- { \
- MIN_UH2(RTYPE, in0, in1, min_vec); \
- MIN_UH2(RTYPE, in2, in3, min_vec); \
- }
-#define MIN_UH4_UH(...) MIN_UH4(v8u16, __VA_ARGS__)
-
-/* Description : Clips all signed halfword elements of input vector
- between 0 & 255
- Arguments : Input - in
- Output - out_m
- Return Type - signed halfword
-*/
-#define CLIP_SH_0_255(in) \
- ({ \
- v8i16 max_m = __msa_ldi_h(255); \
- v8i16 out_m; \
- \
- out_m = __msa_maxi_s_h((v8i16)in, 0); \
- out_m = __msa_min_s_h((v8i16)max_m, (v8i16)out_m); \
- out_m; \
- })
-#define CLIP_SH2_0_255(in0, in1) \
- { \
- in0 = CLIP_SH_0_255(in0); \
- in1 = CLIP_SH_0_255(in1); \
- }
-#define CLIP_SH4_0_255(in0, in1, in2, in3) \
- { \
- CLIP_SH2_0_255(in0, in1); \
- CLIP_SH2_0_255(in2, in3); \
- }
-
-/* Description : Horizontal addition of 4 signed word elements of input vector
- Arguments : Input - in (signed word vector)
- Output - sum_m (i32 sum)
- Return Type - signed word (GP)
- Details : 4 signed word elements of 'in' vector are added together and
- the resulting integer sum is returned
-*/
-#define HADD_SW_S32(in) \
- ({ \
- v2i64 res0_m, res1_m; \
- int32_t sum_m; \
- \
- res0_m = __msa_hadd_s_d((v4i32)in, (v4i32)in); \
- res1_m = __msa_splati_d(res0_m, 1); \
- res0_m = res0_m + res1_m; \
- sum_m = __msa_copy_s_w((v4i32)res0_m, 0); \
- sum_m; \
- })
-
-/* Description : Horizontal addition of 8 unsigned halfword elements
- Arguments : Inputs - in (unsigned halfword vector)
- Outputs - sum_m (u32 sum)
- Return Type - unsigned word
- Details : 8 unsigned halfword elements of input vector are added
- together and the resulting integer sum is returned
-*/
-#define HADD_UH_U32(in) \
- ({ \
- v4u32 res_m; \
- v2u64 res0_m, res1_m; \
- uint32_t sum_m; \
- \
- res_m = __msa_hadd_u_w((v8u16)in, (v8u16)in); \
- res0_m = __msa_hadd_u_d(res_m, res_m); \
- res1_m = (v2u64)__msa_splati_d((v2i64)res0_m, 1); \
- res0_m = res0_m + res1_m; \
- sum_m = __msa_copy_u_w((v4i32)res0_m, 0); \
- sum_m; \
- })
-
-/* Description : Horizontal addition of unsigned byte vector elements
- Arguments : Inputs - in0, in1
- Outputs - out0, out1
- Return Type - as per RTYPE
- Details : Each unsigned odd byte element from 'in0' is added to
- even unsigned byte element from 'in0' (pairwise) and the
- halfword result is written to 'out0'
-*/
-#define HADD_UB2(RTYPE, in0, in1, out0, out1) \
- { \
- out0 = (RTYPE)__msa_hadd_u_h((v16u8)in0, (v16u8)in0); \
- out1 = (RTYPE)__msa_hadd_u_h((v16u8)in1, (v16u8)in1); \
- }
-#define HADD_UB2_UH(...) HADD_UB2(v8u16, __VA_ARGS__)
-
-#define HADD_UB4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) \
- { \
- HADD_UB2(RTYPE, in0, in1, out0, out1); \
- HADD_UB2(RTYPE, in2, in3, out2, out3); \
- }
-#define HADD_UB4_UH(...) HADD_UB4(v8u16, __VA_ARGS__)
-
-/* Description : Horizontal subtraction of unsigned byte vector elements
- Arguments : Inputs - in0, in1
- Outputs - out0, out1
- Return Type - as per RTYPE
- Details : Each unsigned odd byte element from 'in0' is subtracted from
- even unsigned byte element from 'in0' (pairwise) and the
- halfword result is written to 'out0'
-*/
-#define HSUB_UB2(RTYPE, in0, in1, out0, out1) \
- { \
- out0 = (RTYPE)__msa_hsub_u_h((v16u8)in0, (v16u8)in0); \
- out1 = (RTYPE)__msa_hsub_u_h((v16u8)in1, (v16u8)in1); \
- }
-#define HSUB_UB2_SH(...) HSUB_UB2(v8i16, __VA_ARGS__)
-
-/* Description : SAD (Sum of Absolute Difference)
- Arguments : Inputs - in0, in1, ref0, ref1
- Outputs - sad_m (halfword vector)
- Return Type - unsigned halfword
- Details : Absolute difference of all the byte elements from 'in0' with
- 'ref0' is calculated and preserved in 'diff0'. Then even-odd
- pairs are added together to generate 8 halfword results.
-*/
-#define SAD_UB2_UH(in0, in1, ref0, ref1) \
- ({ \
- v16u8 diff0_m, diff1_m; \
- v8u16 sad_m = { 0 }; \
- \
- diff0_m = __msa_asub_u_b((v16u8)in0, (v16u8)ref0); \
- diff1_m = __msa_asub_u_b((v16u8)in1, (v16u8)ref1); \
- \
- sad_m += __msa_hadd_u_h((v16u8)diff0_m, (v16u8)diff0_m); \
- sad_m += __msa_hadd_u_h((v16u8)diff1_m, (v16u8)diff1_m); \
- \
- sad_m; \
- })
-
-/* Description : Horizontal subtraction of signed halfword vector elements
- Arguments : Inputs - in0, in1
- Outputs - out0, out1
- Return Type - as per RTYPE
- Details : Each signed odd halfword element from 'in0' is subtracted from
- even signed halfword element from 'in0' (pairwise) and the
- word result is written to 'out0'
-*/
-#define HSUB_UH2(RTYPE, in0, in1, out0, out1) \
- { \
- out0 = (RTYPE)__msa_hsub_s_w((v8i16)in0, (v8i16)in0); \
- out1 = (RTYPE)__msa_hsub_s_w((v8i16)in1, (v8i16)in1); \
- }
-#define HSUB_UH2_SW(...) HSUB_UH2(v4i32, __VA_ARGS__)
-
-/* Description : Set element n input vector to GPR value
- Arguments : Inputs - in0, in1, in2, in3
- Output - out
- Return Type - as per RTYPE
- Details : Set element 0 in vector 'out' to value specified in 'in0'
-*/
-#define INSERT_W2(RTYPE, in0, in1, out) \
- { \
- out = (RTYPE)__msa_insert_w((v4i32)out, 0, in0); \
- out = (RTYPE)__msa_insert_w((v4i32)out, 1, in1); \
- }
-#define INSERT_W2_SB(...) INSERT_W2(v16i8, __VA_ARGS__)
-
-#define INSERT_W4(RTYPE, in0, in1, in2, in3, out) \
- { \
- out = (RTYPE)__msa_insert_w((v4i32)out, 0, in0); \
- out = (RTYPE)__msa_insert_w((v4i32)out, 1, in1); \
- out = (RTYPE)__msa_insert_w((v4i32)out, 2, in2); \
- out = (RTYPE)__msa_insert_w((v4i32)out, 3, in3); \
- }
-#define INSERT_W4_UB(...) INSERT_W4(v16u8, __VA_ARGS__)
-#define INSERT_W4_SB(...) INSERT_W4(v16i8, __VA_ARGS__)
-
-#define INSERT_D2(RTYPE, in0, in1, out) \
- { \
- out = (RTYPE)__msa_insert_d((v2i64)out, 0, in0); \
- out = (RTYPE)__msa_insert_d((v2i64)out, 1, in1); \
- }
-#define INSERT_D2_UB(...) INSERT_D2(v16u8, __VA_ARGS__)
-#define INSERT_D2_SB(...) INSERT_D2(v16i8, __VA_ARGS__)
-
-/* Description : Interleave even byte elements from vectors
- Arguments : Inputs - in0, in1, in2, in3
- Outputs - out0, out1
- Return Type - as per RTYPE
- Details : Even byte elements of 'in0' and 'in1' are interleaved
- and written to 'out0'
-*/
-#define ILVEV_B2(RTYPE, in0, in1, in2, in3, out0, out1) \
- { \
- out0 = (RTYPE)__msa_ilvev_b((v16i8)in1, (v16i8)in0); \
- out1 = (RTYPE)__msa_ilvev_b((v16i8)in3, (v16i8)in2); \
- }
-#define ILVEV_B2_UB(...) ILVEV_B2(v16u8, __VA_ARGS__)
-#define ILVEV_B2_SH(...) ILVEV_B2(v8i16, __VA_ARGS__)
-
-/* Description : Interleave even halfword elements from vectors
- Arguments : Inputs - in0, in1, in2, in3
- Outputs - out0, out1
- Return Type - as per RTYPE
- Details : Even halfword elements of 'in0' and 'in1' are interleaved
- and written to 'out0'
-*/
-#define ILVEV_H2(RTYPE, in0, in1, in2, in3, out0, out1) \
- { \
- out0 = (RTYPE)__msa_ilvev_h((v8i16)in1, (v8i16)in0); \
- out1 = (RTYPE)__msa_ilvev_h((v8i16)in3, (v8i16)in2); \
- }
-#define ILVEV_H2_UB(...) ILVEV_H2(v16u8, __VA_ARGS__)
-#define ILVEV_H2_SH(...) ILVEV_H2(v8i16, __VA_ARGS__)
-#define ILVEV_H2_SW(...) ILVEV_H2(v4i32, __VA_ARGS__)
-
-/* Description : Interleave even word elements from vectors
- Arguments : Inputs - in0, in1, in2, in3
- Outputs - out0, out1
- Return Type - as per RTYPE
- Details : Even word elements of 'in0' and 'in1' are interleaved
- and written to 'out0'
-*/
-#define ILVEV_W2(RTYPE, in0, in1, in2, in3, out0, out1) \
- { \
- out0 = (RTYPE)__msa_ilvev_w((v4i32)in1, (v4i32)in0); \
- out1 = (RTYPE)__msa_ilvev_w((v4i32)in3, (v4i32)in2); \
- }
-#define ILVEV_W2_SB(...) ILVEV_W2(v16i8, __VA_ARGS__)
-
-/* Description : Interleave even double word elements from vectors
- Arguments : Inputs - in0, in1, in2, in3
- Outputs - out0, out1
- Return Type - as per RTYPE
- Details : Even double word elements of 'in0' and 'in1' are interleaved
- and written to 'out0'
-*/
-#define ILVEV_D2(RTYPE, in0, in1, in2, in3, out0, out1) \
- { \
- out0 = (RTYPE)__msa_ilvev_d((v2i64)in1, (v2i64)in0); \
- out1 = (RTYPE)__msa_ilvev_d((v2i64)in3, (v2i64)in2); \
- }
-#define ILVEV_D2_UB(...) ILVEV_D2(v16u8, __VA_ARGS__)
-
-/* Description : Interleave left half of byte elements from vectors
- Arguments : Inputs - in0, in1, in2, in3
- Outputs - out0, out1
- Return Type - as per RTYPE
- Details : Left half of byte elements of 'in0' and 'in1' are interleaved
- and written to 'out0'.
-*/
-#define ILVL_B2(RTYPE, in0, in1, in2, in3, out0, out1) \
- { \
- out0 = (RTYPE)__msa_ilvl_b((v16i8)in0, (v16i8)in1); \
- out1 = (RTYPE)__msa_ilvl_b((v16i8)in2, (v16i8)in3); \
- }
-#define ILVL_B2_UB(...) ILVL_B2(v16u8, __VA_ARGS__)
-#define ILVL_B2_SB(...) ILVL_B2(v16i8, __VA_ARGS__)
-#define ILVL_B2_UH(...) ILVL_B2(v8u16, __VA_ARGS__)
-#define ILVL_B2_SH(...) ILVL_B2(v8i16, __VA_ARGS__)
-
-#define ILVL_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \
- out2, out3) \
- { \
- ILVL_B2(RTYPE, in0, in1, in2, in3, out0, out1); \
- ILVL_B2(RTYPE, in4, in5, in6, in7, out2, out3); \
- }
-#define ILVL_B4_SB(...) ILVL_B4(v16i8, __VA_ARGS__)
-#define ILVL_B4_SH(...) ILVL_B4(v8i16, __VA_ARGS__)
-#define ILVL_B4_UH(...) ILVL_B4(v8u16, __VA_ARGS__)
-
-/* Description : Interleave left half of halfword elements from vectors
- Arguments : Inputs - in0, in1, in2, in3
- Outputs - out0, out1
- Return Type - as per RTYPE
- Details : Left half of halfword elements of 'in0' and 'in1' are
- interleaved and written to 'out0'.
-*/
-#define ILVL_H2(RTYPE, in0, in1, in2, in3, out0, out1) \
- { \
- out0 = (RTYPE)__msa_ilvl_h((v8i16)in0, (v8i16)in1); \
- out1 = (RTYPE)__msa_ilvl_h((v8i16)in2, (v8i16)in3); \
- }
-#define ILVL_H2_SH(...) ILVL_H2(v8i16, __VA_ARGS__)
-#define ILVL_H2_SW(...) ILVL_H2(v4i32, __VA_ARGS__)
-
-/* Description : Interleave left half of word elements from vectors
- Arguments : Inputs - in0, in1, in2, in3
- Outputs - out0, out1
- Return Type - as per RTYPE
- Details : Left half of word elements of 'in0' and 'in1' are interleaved
- and written to 'out0'.
-*/
-#define ILVL_W2(RTYPE, in0, in1, in2, in3, out0, out1) \
- { \
- out0 = (RTYPE)__msa_ilvl_w((v4i32)in0, (v4i32)in1); \
- out1 = (RTYPE)__msa_ilvl_w((v4i32)in2, (v4i32)in3); \
- }
-#define ILVL_W2_UB(...) ILVL_W2(v16u8, __VA_ARGS__)
-#define ILVL_W2_SH(...) ILVL_W2(v8i16, __VA_ARGS__)
-
-/* Description : Interleave right half of byte elements from vectors
- Arguments : Inputs - in0, in1, in2, in3
- Outputs - out0, out1
- Return Type - as per RTYPE
- Details : Right half of byte elements of 'in0' and 'in1' are interleaved
- and written to out0.
-*/
-#define ILVR_B2(RTYPE, in0, in1, in2, in3, out0, out1) \
- { \
- out0 = (RTYPE)__msa_ilvr_b((v16i8)in0, (v16i8)in1); \
- out1 = (RTYPE)__msa_ilvr_b((v16i8)in2, (v16i8)in3); \
- }
-#define ILVR_B2_UB(...) ILVR_B2(v16u8, __VA_ARGS__)
-#define ILVR_B2_SB(...) ILVR_B2(v16i8, __VA_ARGS__)
-#define ILVR_B2_UH(...) ILVR_B2(v8u16, __VA_ARGS__)
-#define ILVR_B2_SH(...) ILVR_B2(v8i16, __VA_ARGS__)
-
-#define ILVR_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \
- out2, out3) \
- { \
- ILVR_B2(RTYPE, in0, in1, in2, in3, out0, out1); \
- ILVR_B2(RTYPE, in4, in5, in6, in7, out2, out3); \
- }
-#define ILVR_B4_UB(...) ILVR_B4(v16u8, __VA_ARGS__)
-#define ILVR_B4_SB(...) ILVR_B4(v16i8, __VA_ARGS__)
-#define ILVR_B4_UH(...) ILVR_B4(v8u16, __VA_ARGS__)
-#define ILVR_B4_SH(...) ILVR_B4(v8i16, __VA_ARGS__)
-
-#define ILVR_B8(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, in10, \
- in11, in12, in13, in14, in15, out0, out1, out2, out3, out4, \
- out5, out6, out7) \
- { \
- ILVR_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, \
- out3); \
- ILVR_B4(RTYPE, in8, in9, in10, in11, in12, in13, in14, in15, out4, out5, \
- out6, out7); \
- }
-#define ILVR_B8_UH(...) ILVR_B8(v8u16, __VA_ARGS__)
-
-/* Description : Interleave right half of halfword elements from vectors
- Arguments : Inputs - in0, in1, in2, in3
- Outputs - out0, out1
- Return Type - as per RTYPE
- Details : Right half of halfword elements of 'in0' and 'in1' are
- interleaved and written to 'out0'.
-*/
-#define ILVR_H2(RTYPE, in0, in1, in2, in3, out0, out1) \
- { \
- out0 = (RTYPE)__msa_ilvr_h((v8i16)in0, (v8i16)in1); \
- out1 = (RTYPE)__msa_ilvr_h((v8i16)in2, (v8i16)in3); \
- }
-#define ILVR_H2_SH(...) ILVR_H2(v8i16, __VA_ARGS__)
-#define ILVR_H2_SW(...) ILVR_H2(v4i32, __VA_ARGS__)
-
-#define ILVR_H4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \
- out2, out3) \
- { \
- ILVR_H2(RTYPE, in0, in1, in2, in3, out0, out1); \
- ILVR_H2(RTYPE, in4, in5, in6, in7, out2, out3); \
- }
-#define ILVR_H4_SH(...) ILVR_H4(v8i16, __VA_ARGS__)
-
-#define ILVR_W2(RTYPE, in0, in1, in2, in3, out0, out1) \
- { \
- out0 = (RTYPE)__msa_ilvr_w((v4i32)in0, (v4i32)in1); \
- out1 = (RTYPE)__msa_ilvr_w((v4i32)in2, (v4i32)in3); \
- }
-#define ILVR_W2_UB(...) ILVR_W2(v16u8, __VA_ARGS__)
-#define ILVR_W2_SH(...) ILVR_W2(v8i16, __VA_ARGS__)
-
-#define ILVR_W4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \
- out2, out3) \
- { \
- ILVR_W2(RTYPE, in0, in1, in2, in3, out0, out1); \
- ILVR_W2(RTYPE, in4, in5, in6, in7, out2, out3); \
- }
-#define ILVR_W4_UB(...) ILVR_W4(v16u8, __VA_ARGS__)
-
-/* Description : Interleave right half of double word elements from vectors
- Arguments : Inputs - in0, in1, in2, in3
- Outputs - out0, out1
- Return Type - as per RTYPE
- Details : Right half of double word elements of 'in0' and 'in1' are
- interleaved and written to 'out0'.
-*/
-#define ILVR_D2(RTYPE, in0, in1, in2, in3, out0, out1) \
- { \
- out0 = (RTYPE)__msa_ilvr_d((v2i64)(in0), (v2i64)(in1)); \
- out1 = (RTYPE)__msa_ilvr_d((v2i64)(in2), (v2i64)(in3)); \
- }
-#define ILVR_D2_UB(...) ILVR_D2(v16u8, __VA_ARGS__)
-#define ILVR_D2_SB(...) ILVR_D2(v16i8, __VA_ARGS__)
-#define ILVR_D2_SH(...) ILVR_D2(v8i16, __VA_ARGS__)
-
-#define ILVR_D3(RTYPE, in0, in1, in2, in3, in4, in5, out0, out1, out2) \
- { \
- ILVR_D2(RTYPE, in0, in1, in2, in3, out0, out1); \
- out2 = (RTYPE)__msa_ilvr_d((v2i64)(in4), (v2i64)(in5)); \
- }
-#define ILVR_D3_SB(...) ILVR_D3(v16i8, __VA_ARGS__)
-
-#define ILVR_D4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \
- out2, out3) \
- { \
- ILVR_D2(RTYPE, in0, in1, in2, in3, out0, out1); \
- ILVR_D2(RTYPE, in4, in5, in6, in7, out2, out3); \
- }
-#define ILVR_D4_SB(...) ILVR_D4(v16i8, __VA_ARGS__)
-#define ILVR_D4_UB(...) ILVR_D4(v16u8, __VA_ARGS__)
-
-/* Description : Interleave both left and right half of input vectors
- Arguments : Inputs - in0, in1
- Outputs - out0, out1
- Return Type - as per RTYPE
- Details : Right half of byte elements from 'in0' and 'in1' are
- interleaved and written to 'out0'
-*/
-#define ILVRL_B2(RTYPE, in0, in1, out0, out1) \
- { \
- out0 = (RTYPE)__msa_ilvr_b((v16i8)in0, (v16i8)in1); \
- out1 = (RTYPE)__msa_ilvl_b((v16i8)in0, (v16i8)in1); \
- }
-#define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__)
-#define ILVRL_B2_SB(...) ILVRL_B2(v16i8, __VA_ARGS__)
-#define ILVRL_B2_UH(...) ILVRL_B2(v8u16, __VA_ARGS__)
-#define ILVRL_B2_SH(...) ILVRL_B2(v8i16, __VA_ARGS__)
-
-#define ILVRL_H2(RTYPE, in0, in1, out0, out1) \
- { \
- out0 = (RTYPE)__msa_ilvr_h((v8i16)in0, (v8i16)in1); \
- out1 = (RTYPE)__msa_ilvl_h((v8i16)in0, (v8i16)in1); \
- }
-#define ILVRL_H2_SH(...) ILVRL_H2(v8i16, __VA_ARGS__)
-#define ILVRL_H2_SW(...) ILVRL_H2(v4i32, __VA_ARGS__)
-
-#define ILVRL_W2(RTYPE, in0, in1, out0, out1) \
- { \
- out0 = (RTYPE)__msa_ilvr_w((v4i32)in0, (v4i32)in1); \
- out1 = (RTYPE)__msa_ilvl_w((v4i32)in0, (v4i32)in1); \
- }
-#define ILVRL_W2_UB(...) ILVRL_W2(v16u8, __VA_ARGS__)
-#define ILVRL_W2_SH(...) ILVRL_W2(v8i16, __VA_ARGS__)
-#define ILVRL_W2_SW(...) ILVRL_W2(v4i32, __VA_ARGS__)
-
-/* Description : Saturate the halfword element values to the max
- unsigned value of (sat_val + 1) bits
- The element data width remains unchanged
- Arguments : Inputs - in0, in1, sat_val
- Outputs - in place operation
- Return Type - as per RTYPE
- Details : Each unsigned halfword element from 'in0' is saturated to the
- value generated with (sat_val + 1) bit range.
- The results are written in place
-*/
-#define SAT_UH2(RTYPE, in0, in1, sat_val) \
- { \
- in0 = (RTYPE)__msa_sat_u_h((v8u16)in0, sat_val); \
- in1 = (RTYPE)__msa_sat_u_h((v8u16)in1, sat_val); \
- }
-#define SAT_UH2_UH(...) SAT_UH2(v8u16, __VA_ARGS__)
-
-#define SAT_UH4(RTYPE, in0, in1, in2, in3, sat_val) \
- { \
- SAT_UH2(RTYPE, in0, in1, sat_val); \
- SAT_UH2(RTYPE, in2, in3, sat_val) \
- }
-#define SAT_UH4_UH(...) SAT_UH4(v8u16, __VA_ARGS__)
-
-/* Description : Saturate the halfword element values to the max
- unsigned value of (sat_val + 1) bits
- The element data width remains unchanged
- Arguments : Inputs - in0, in1, sat_val
- Outputs - in place operation
- Return Type - as per RTYPE
- Details : Each unsigned halfword element from 'in0' is saturated to the
- value generated with (sat_val + 1) bit range
- The results are written in place
-*/
-#define SAT_SH2(RTYPE, in0, in1, sat_val) \
- { \
- in0 = (RTYPE)__msa_sat_s_h((v8i16)in0, sat_val); \
- in1 = (RTYPE)__msa_sat_s_h((v8i16)in1, sat_val); \
- }
-#define SAT_SH2_SH(...) SAT_SH2(v8i16, __VA_ARGS__)
-
-#define SAT_SH4(RTYPE, in0, in1, in2, in3, sat_val) \
- { \
- SAT_SH2(RTYPE, in0, in1, sat_val); \
- SAT_SH2(RTYPE, in2, in3, sat_val); \
- }
-#define SAT_SH4_SH(...) SAT_SH4(v8i16, __VA_ARGS__)
-
-/* Description : Indexed halfword element values are replicated to all
- elements in output vector
- Arguments : Inputs - in, idx0, idx1
- Outputs - out0, out1
- Return Type - as per RTYPE
- Details : 'idx0' element value from 'in' vector is replicated to all
- elements in 'out0' vector
- Valid index range for halfword operation is 0-7
-*/
-#define SPLATI_H2(RTYPE, in, idx0, idx1, out0, out1) \
- { \
- out0 = (RTYPE)__msa_splati_h((v8i16)in, idx0); \
- out1 = (RTYPE)__msa_splati_h((v8i16)in, idx1); \
- }
-#define SPLATI_H2_SH(...) SPLATI_H2(v8i16, __VA_ARGS__)
-
-#define SPLATI_H4(RTYPE, in, idx0, idx1, idx2, idx3, out0, out1, out2, out3) \
- { \
- SPLATI_H2(RTYPE, in, idx0, idx1, out0, out1); \
- SPLATI_H2(RTYPE, in, idx2, idx3, out2, out3); \
- }
-#define SPLATI_H4_SB(...) SPLATI_H4(v16i8, __VA_ARGS__)
-#define SPLATI_H4_SH(...) SPLATI_H4(v8i16, __VA_ARGS__)
-
-/* Description : Pack even byte elements of vector pairs
- Arguments : Inputs - in0, in1, in2, in3
- Outputs - out0, out1
- Return Type - as per RTYPE
- Details : Even byte elements of 'in0' are copied to the left half of
- 'out0' & even byte elements of 'in1' are copied to the right
- half of 'out0'.
-*/
-#define PCKEV_B2(RTYPE, in0, in1, in2, in3, out0, out1) \
- { \
- out0 = (RTYPE)__msa_pckev_b((v16i8)in0, (v16i8)in1); \
- out1 = (RTYPE)__msa_pckev_b((v16i8)in2, (v16i8)in3); \
- }
-#define PCKEV_B2_SB(...) PCKEV_B2(v16i8, __VA_ARGS__)
-#define PCKEV_B2_UB(...) PCKEV_B2(v16u8, __VA_ARGS__)
-#define PCKEV_B2_SH(...) PCKEV_B2(v8i16, __VA_ARGS__)
-
-#define PCKEV_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \
- out2, out3) \
- { \
- PCKEV_B2(RTYPE, in0, in1, in2, in3, out0, out1); \
- PCKEV_B2(RTYPE, in4, in5, in6, in7, out2, out3); \
- }
-#define PCKEV_B4_SB(...) PCKEV_B4(v16i8, __VA_ARGS__)
-#define PCKEV_B4_UB(...) PCKEV_B4(v16u8, __VA_ARGS__)
-#define PCKEV_B4_SH(...) PCKEV_B4(v8i16, __VA_ARGS__)
-
-/* Description : Pack even halfword elements of vector pairs
- Arguments : Inputs - in0, in1, in2, in3
- Outputs - out0, out1
- Return Type - as per RTYPE
- Details : Even halfword elements of 'in0' are copied to the left half of
- 'out0' & even halfword elements of 'in1' are copied to the
- right half of 'out0'.
-*/
-#define PCKEV_H2(RTYPE, in0, in1, in2, in3, out0, out1) \
- { \
- out0 = (RTYPE)__msa_pckev_h((v8i16)in0, (v8i16)in1); \
- out1 = (RTYPE)__msa_pckev_h((v8i16)in2, (v8i16)in3); \
- }
-#define PCKEV_H2_SH(...) PCKEV_H2(v8i16, __VA_ARGS__)
-#define PCKEV_H2_SW(...) PCKEV_H2(v4i32, __VA_ARGS__)
-
-#define PCKEV_H4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \
- out2, out3) \
- { \
- PCKEV_H2(RTYPE, in0, in1, in2, in3, out0, out1); \
- PCKEV_H2(RTYPE, in4, in5, in6, in7, out2, out3); \
- }
-#define PCKEV_H4_SH(...) PCKEV_H4(v8i16, __VA_ARGS__)
-
-/* Description : Pack even double word elements of vector pairs
- Arguments : Inputs - in0, in1, in2, in3
- Outputs - out0, out1
- Return Type - as per RTYPE
- Details : Even double elements of 'in0' are copied to the left half of
- 'out0' & even double elements of 'in1' are copied to the right
- half of 'out0'.
-*/
-#define PCKEV_D2(RTYPE, in0, in1, in2, in3, out0, out1) \
- { \
- out0 = (RTYPE)__msa_pckev_d((v2i64)in0, (v2i64)in1); \
- out1 = (RTYPE)__msa_pckev_d((v2i64)in2, (v2i64)in3); \
- }
-#define PCKEV_D2_UB(...) PCKEV_D2(v16u8, __VA_ARGS__)
-#define PCKEV_D2_SH(...) PCKEV_D2(v8i16, __VA_ARGS__)
-
-#define PCKEV_D4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \
- out2, out3) \
- { \
- PCKEV_D2(RTYPE, in0, in1, in2, in3, out0, out1); \
- PCKEV_D2(RTYPE, in4, in5, in6, in7, out2, out3); \
- }
-#define PCKEV_D4_UB(...) PCKEV_D4(v16u8, __VA_ARGS__)
-
-/* Description : Each byte element is logically xor'ed with immediate 128
- Arguments : Inputs - in0, in1
- Outputs - in place operation
- Return Type - as per RTYPE
- Details : Each unsigned byte element from input vector 'in0' is
- logically xor'ed with 128 and the result is stored in-place.
-*/
-#define XORI_B2_128(RTYPE, in0, in1) \
- { \
- in0 = (RTYPE)__msa_xori_b((v16u8)in0, 128); \
- in1 = (RTYPE)__msa_xori_b((v16u8)in1, 128); \
- }
-#define XORI_B2_128_UB(...) XORI_B2_128(v16u8, __VA_ARGS__)
-#define XORI_B2_128_SB(...) XORI_B2_128(v16i8, __VA_ARGS__)
-
-#define XORI_B3_128(RTYPE, in0, in1, in2) \
- { \
- XORI_B2_128(RTYPE, in0, in1); \
- in2 = (RTYPE)__msa_xori_b((v16u8)in2, 128); \
- }
-#define XORI_B3_128_SB(...) XORI_B3_128(v16i8, __VA_ARGS__)
-
-#define XORI_B4_128(RTYPE, in0, in1, in2, in3) \
- { \
- XORI_B2_128(RTYPE, in0, in1); \
- XORI_B2_128(RTYPE, in2, in3); \
- }
-#define XORI_B4_128_UB(...) XORI_B4_128(v16u8, __VA_ARGS__)
-#define XORI_B4_128_SB(...) XORI_B4_128(v16i8, __VA_ARGS__)
-
-#define XORI_B7_128(RTYPE, in0, in1, in2, in3, in4, in5, in6) \
- { \
- XORI_B4_128(RTYPE, in0, in1, in2, in3); \
- XORI_B3_128(RTYPE, in4, in5, in6); \
- }
-#define XORI_B7_128_SB(...) XORI_B7_128(v16i8, __VA_ARGS__)
-
-/* Description : Average of signed halfword elements -> (a + b) / 2
- Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7
- Outputs - out0, out1, out2, out3
- Return Type - as per RTYPE
- Details : Each signed halfword element from 'in0' is added to each
- signed halfword element of 'in1' with full precision resulting
- in one extra bit in the result. The result is then divided by
- 2 and written to 'out0'
-*/
-#define AVE_SH4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \
- out2, out3) \
- { \
- out0 = (RTYPE)__msa_ave_s_h((v8i16)in0, (v8i16)in1); \
- out1 = (RTYPE)__msa_ave_s_h((v8i16)in2, (v8i16)in3); \
- out2 = (RTYPE)__msa_ave_s_h((v8i16)in4, (v8i16)in5); \
- out3 = (RTYPE)__msa_ave_s_h((v8i16)in6, (v8i16)in7); \
- }
-#define AVE_SH4_SH(...) AVE_SH4(v8i16, __VA_ARGS__)
-
-/* Description : Addition of signed halfword elements and signed saturation
- Arguments : Inputs - in0, in1, in2, in3
- Outputs - out0, out1
- Return Type - as per RTYPE
- Details : Signed halfword elements from 'in0' are added to signed
- halfword elements of 'in1'. The result is then signed saturated
- between halfword data type range
-*/
-#define ADDS_SH2(RTYPE, in0, in1, in2, in3, out0, out1) \
- { \
- out0 = (RTYPE)__msa_adds_s_h((v8i16)in0, (v8i16)in1); \
- out1 = (RTYPE)__msa_adds_s_h((v8i16)in2, (v8i16)in3); \
- }
-#define ADDS_SH2_SH(...) ADDS_SH2(v8i16, __VA_ARGS__)
-
-#define ADDS_SH4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \
- out2, out3) \
- { \
- ADDS_SH2(RTYPE, in0, in1, in2, in3, out0, out1); \
- ADDS_SH2(RTYPE, in4, in5, in6, in7, out2, out3); \
- }
-#define ADDS_SH4_SH(...) ADDS_SH4(v8i16, __VA_ARGS__)
-
-/* Description : Shift left all elements of vector (generic for all data types)
- Arguments : Inputs - in0, in1, in2, in3, shift
- Outputs - in place operation
- Return Type - as per input vector RTYPE
- Details : Each element of vector 'in0' is left shifted by 'shift' and
- the result is written in-place.
-*/
-#define SLLI_4V(in0, in1, in2, in3, shift) \
- { \
- in0 = in0 << shift; \
- in1 = in1 << shift; \
- in2 = in2 << shift; \
- in3 = in3 << shift; \
- }
-
-/* Description : Arithmetic shift right all elements of vector
- (generic for all data types)
- Arguments : Inputs - in0, in1, in2, in3, shift
- Outputs - in place operation
- Return Type - as per input vector RTYPE
- Details : Each element of vector 'in0' is right shifted by 'shift' and
- the result is written in-place. 'shift' is a GP variable.
-*/
-#define SRA_4V(in0, in1, in2, in3, shift) \
- { \
- in0 = in0 >> shift; \
- in1 = in1 >> shift; \
- in2 = in2 >> shift; \
- in3 = in3 >> shift; \
- }
-
-/* Description : Shift right arithmetic rounded words
- Arguments : Inputs - in0, in1, shift
- Outputs - in place operation
- Return Type - as per RTYPE
- Details : Each element of vector 'in0' is shifted right arithmetically by
- the number of bits in the corresponding element in the vector
- 'shift'. The last discarded bit is added to shifted value for
- rounding and the result is written in-place.
- 'shift' is a vector.
-*/
-#define SRAR_W2(RTYPE, in0, in1, shift) \
- { \
- in0 = (RTYPE)__msa_srar_w((v4i32)in0, (v4i32)shift); \
- in1 = (RTYPE)__msa_srar_w((v4i32)in1, (v4i32)shift); \
- }
-
-#define SRAR_W4(RTYPE, in0, in1, in2, in3, shift) \
- { \
- SRAR_W2(RTYPE, in0, in1, shift) \
- SRAR_W2(RTYPE, in2, in3, shift) \
- }
-#define SRAR_W4_SW(...) SRAR_W4(v4i32, __VA_ARGS__)
-
-/* Description : Shift right arithmetic rounded (immediate)
- Arguments : Inputs - in0, in1, shift
- Outputs - in place operation
- Return Type - as per RTYPE
- Details : Each element of vector 'in0' is shifted right arithmetically by
- the value in 'shift'. The last discarded bit is added to the
- shifted value for rounding and the result is written in-place.
- 'shift' is an immediate value.
-*/
-#define SRARI_H2(RTYPE, in0, in1, shift) \
- { \
- in0 = (RTYPE)__msa_srari_h((v8i16)in0, shift); \
- in1 = (RTYPE)__msa_srari_h((v8i16)in1, shift); \
- }
-#define SRARI_H2_UH(...) SRARI_H2(v8u16, __VA_ARGS__)
-#define SRARI_H2_SH(...) SRARI_H2(v8i16, __VA_ARGS__)
-
-#define SRARI_H4(RTYPE, in0, in1, in2, in3, shift) \
- { \
- SRARI_H2(RTYPE, in0, in1, shift); \
- SRARI_H2(RTYPE, in2, in3, shift); \
- }
-#define SRARI_H4_UH(...) SRARI_H4(v8u16, __VA_ARGS__)
-#define SRARI_H4_SH(...) SRARI_H4(v8i16, __VA_ARGS__)
-
-#define SRARI_W2(RTYPE, in0, in1, shift) \
- { \
- in0 = (RTYPE)__msa_srari_w((v4i32)in0, shift); \
- in1 = (RTYPE)__msa_srari_w((v4i32)in1, shift); \
- }
-#define SRARI_W2_SW(...) SRARI_W2(v4i32, __VA_ARGS__)
-
-#define SRARI_W4(RTYPE, in0, in1, in2, in3, shift) \
- { \
- SRARI_W2(RTYPE, in0, in1, shift); \
- SRARI_W2(RTYPE, in2, in3, shift); \
- }
-#define SRARI_W4_SW(...) SRARI_W4(v4i32, __VA_ARGS__)
-
-/* Description : Logical shift right all elements of vector (immediate)
- Arguments : Inputs - in0, in1, in2, in3, shift
- Outputs - out0, out1, out2, out3
- Return Type - as per RTYPE
- Details : Each element of vector 'in0' is right shifted by 'shift' and
- the result is written in-place. 'shift' is an immediate value.
-*/
-#define SRLI_H4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3, shift) \
- { \
- out0 = (RTYPE)__msa_srli_h((v8i16)in0, shift); \
- out1 = (RTYPE)__msa_srli_h((v8i16)in1, shift); \
- out2 = (RTYPE)__msa_srli_h((v8i16)in2, shift); \
- out3 = (RTYPE)__msa_srli_h((v8i16)in3, shift); \
- }
-#define SRLI_H4_SH(...) SRLI_H4(v8i16, __VA_ARGS__)
-
-/* Description : Multiplication of pairs of vectors
- Arguments : Inputs - in0, in1, in2, in3
- Outputs - out0, out1
- Details : Each element from 'in0' is multiplied with elements from 'in1'
- and the result is written to 'out0'
-*/
-#define MUL2(in0, in1, in2, in3, out0, out1) \
- { \
- out0 = in0 * in1; \
- out1 = in2 * in3; \
- }
-#define MUL4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \
- { \
- MUL2(in0, in1, in2, in3, out0, out1); \
- MUL2(in4, in5, in6, in7, out2, out3); \
- }
-
-/* Description : Addition of 2 pairs of vectors
- Arguments : Inputs - in0, in1, in2, in3
- Outputs - out0, out1
- Details : Each element in 'in0' is added to 'in1' and result is written
- to 'out0'.
-*/
-#define ADD2(in0, in1, in2, in3, out0, out1) \
- { \
- out0 = in0 + in1; \
- out1 = in2 + in3; \
- }
-#define ADD4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \
- { \
- ADD2(in0, in1, in2, in3, out0, out1); \
- ADD2(in4, in5, in6, in7, out2, out3); \
- }
-
-/* Description : Subtraction of 2 pairs of vectors
- Arguments : Inputs - in0, in1, in2, in3
- Outputs - out0, out1
- Details : Each element in 'in1' is subtracted from 'in0' and result is
- written to 'out0'.
-*/
-#define SUB2(in0, in1, in2, in3, out0, out1) \
- { \
- out0 = in0 - in1; \
- out1 = in2 - in3; \
- }
-#define SUB4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \
- { \
- out0 = in0 - in1; \
- out1 = in2 - in3; \
- out2 = in4 - in5; \
- out3 = in6 - in7; \
- }
-
-/* Description : Sign extend halfword elements from right half of the vector
- Arguments : Input - in (halfword vector)
- Output - out (sign extended word vector)
- Return Type - signed word
- Details : Sign bit of halfword elements from input vector 'in' is
- extracted and interleaved with same vector 'in0' to generate
- 4 word elements keeping sign intact
-*/
-#define UNPCK_R_SH_SW(in, out) \
- { \
- v8i16 sign_m; \
- \
- sign_m = __msa_clti_s_h((v8i16)in, 0); \
- out = (v4i32)__msa_ilvr_h(sign_m, (v8i16)in); \
- }
-
-/* Description : Zero extend unsigned byte elements to halfword elements
- Arguments : Input - in (unsigned byte vector)
- Outputs - out0, out1 (unsigned halfword vectors)
- Return Type - signed halfword
- Details : Zero extended right half of vector is returned in 'out0'
- Zero extended left half of vector is returned in 'out1'
-*/
-#define UNPCK_UB_SH(in, out0, out1) \
- { \
- v16i8 zero_m = { 0 }; \
- \
- ILVRL_B2_SH(zero_m, in, out0, out1); \
- }
-
-/* Description : Sign extend halfword elements from input vector and return
- the result in pair of vectors
- Arguments : Input - in (halfword vector)
- Outputs - out0, out1 (sign extended word vectors)
- Return Type - signed word
- Details : Sign bit of halfword elements from input vector 'in' is
- extracted and interleaved right with same vector 'in0' to
- generate 4 signed word elements in 'out0'
- Then interleaved left with same vector 'in0' to
- generate 4 signed word elements in 'out1'
-*/
-#define UNPCK_SH_SW(in, out0, out1) \
- { \
- v8i16 tmp_m; \
- \
- tmp_m = __msa_clti_s_h((v8i16)in, 0); \
- ILVRL_H2_SW(tmp_m, in, out0, out1); \
- }
-
-/* Description : Butterfly of 4 input vectors
- Arguments : Inputs - in0, in1, in2, in3
- Outputs - out0, out1, out2, out3
- Details : Butterfly operation
-*/
-#define BUTTERFLY_4(in0, in1, in2, in3, out0, out1, out2, out3) \
- { \
- out0 = in0 + in3; \
- out1 = in1 + in2; \
- \
- out2 = in1 - in2; \
- out3 = in0 - in3; \
- }
-
-/* Description : Butterfly of 8 input vectors
- Arguments : Inputs - in0 ... in7
- Outputs - out0 .. out7
- Details : Butterfly operation
-*/
-#define BUTTERFLY_8(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, \
- out3, out4, out5, out6, out7) \
- { \
- out0 = in0 + in7; \
- out1 = in1 + in6; \
- out2 = in2 + in5; \
- out3 = in3 + in4; \
- \
- out4 = in3 - in4; \
- out5 = in2 - in5; \
- out6 = in1 - in6; \
- out7 = in0 - in7; \
- }
-
-/* Description : Butterfly of 16 input vectors
- Arguments : Inputs - in0 ... in15
- Outputs - out0 .. out15
- Details : Butterfly operation
-*/
-#define BUTTERFLY_16(in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, in10, \
- in11, in12, in13, in14, in15, out0, out1, out2, out3, \
- out4, out5, out6, out7, out8, out9, out10, out11, out12, \
- out13, out14, out15) \
- { \
- out0 = in0 + in15; \
- out1 = in1 + in14; \
- out2 = in2 + in13; \
- out3 = in3 + in12; \
- out4 = in4 + in11; \
- out5 = in5 + in10; \
- out6 = in6 + in9; \
- out7 = in7 + in8; \
- \
- out8 = in7 - in8; \
- out9 = in6 - in9; \
- out10 = in5 - in10; \
- out11 = in4 - in11; \
- out12 = in3 - in12; \
- out13 = in2 - in13; \
- out14 = in1 - in14; \
- out15 = in0 - in15; \
- }
-
-/* Description : Transpose input 8x8 byte block
- Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7
- Outputs - out0, out1, out2, out3, out4, out5, out6, out7
- Return Type - as per RTYPE
-*/
-#define TRANSPOSE8x8_UB(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, \
- out1, out2, out3, out4, out5, out6, out7) \
- { \
- v16i8 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \
- v16i8 tmp4_m, tmp5_m, tmp6_m, tmp7_m; \
- \
- ILVR_B4_SB(in2, in0, in3, in1, in6, in4, in7, in5, tmp0_m, tmp1_m, tmp2_m, \
- tmp3_m); \
- ILVRL_B2_SB(tmp1_m, tmp0_m, tmp4_m, tmp5_m); \
- ILVRL_B2_SB(tmp3_m, tmp2_m, tmp6_m, tmp7_m); \
- ILVRL_W2(RTYPE, tmp6_m, tmp4_m, out0, out2); \
- ILVRL_W2(RTYPE, tmp7_m, tmp5_m, out4, out6); \
- SLDI_B2_0(RTYPE, out0, out2, out1, out3, 8); \
- SLDI_B2_0(RTYPE, out4, out6, out5, out7, 8); \
- }
-#define TRANSPOSE8x8_UB_UB(...) TRANSPOSE8x8_UB(v16u8, __VA_ARGS__)
-
-/* Description : Transpose 16x8 block into 8x16 with byte elements in vectors
- Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7,
- in8, in9, in10, in11, in12, in13, in14, in15
- Outputs - out0, out1, out2, out3, out4, out5, out6, out7
- Return Type - unsigned byte
-*/
-#define TRANSPOSE16x8_UB_UB(in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, \
- in10, in11, in12, in13, in14, in15, out0, out1, \
- out2, out3, out4, out5, out6, out7) \
- { \
- v16u8 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \
- v16u8 tmp4_m, tmp5_m, tmp6_m, tmp7_m; \
- \
- ILVEV_D2_UB(in0, in8, in1, in9, out7, out6); \
- ILVEV_D2_UB(in2, in10, in3, in11, out5, out4); \
- ILVEV_D2_UB(in4, in12, in5, in13, out3, out2); \
- ILVEV_D2_UB(in6, in14, in7, in15, out1, out0); \
- \
- tmp0_m = (v16u8)__msa_ilvev_b((v16i8)out6, (v16i8)out7); \
- tmp4_m = (v16u8)__msa_ilvod_b((v16i8)out6, (v16i8)out7); \
- tmp1_m = (v16u8)__msa_ilvev_b((v16i8)out4, (v16i8)out5); \
- tmp5_m = (v16u8)__msa_ilvod_b((v16i8)out4, (v16i8)out5); \
- out5 = (v16u8)__msa_ilvev_b((v16i8)out2, (v16i8)out3); \
- tmp6_m = (v16u8)__msa_ilvod_b((v16i8)out2, (v16i8)out3); \
- out7 = (v16u8)__msa_ilvev_b((v16i8)out0, (v16i8)out1); \
- tmp7_m = (v16u8)__msa_ilvod_b((v16i8)out0, (v16i8)out1); \
- \
- ILVEV_H2_UB(tmp0_m, tmp1_m, out5, out7, tmp2_m, tmp3_m); \
- out0 = (v16u8)__msa_ilvev_w((v4i32)tmp3_m, (v4i32)tmp2_m); \
- out4 = (v16u8)__msa_ilvod_w((v4i32)tmp3_m, (v4i32)tmp2_m); \
- \
- tmp2_m = (v16u8)__msa_ilvod_h((v8i16)tmp1_m, (v8i16)tmp0_m); \
- tmp3_m = (v16u8)__msa_ilvod_h((v8i16)out7, (v8i16)out5); \
- out2 = (v16u8)__msa_ilvev_w((v4i32)tmp3_m, (v4i32)tmp2_m); \
- out6 = (v16u8)__msa_ilvod_w((v4i32)tmp3_m, (v4i32)tmp2_m); \
- \
- ILVEV_H2_UB(tmp4_m, tmp5_m, tmp6_m, tmp7_m, tmp2_m, tmp3_m); \
- out1 = (v16u8)__msa_ilvev_w((v4i32)tmp3_m, (v4i32)tmp2_m); \
- out5 = (v16u8)__msa_ilvod_w((v4i32)tmp3_m, (v4i32)tmp2_m); \
- \
- tmp2_m = (v16u8)__msa_ilvod_h((v8i16)tmp5_m, (v8i16)tmp4_m); \
- tmp2_m = (v16u8)__msa_ilvod_h((v8i16)tmp5_m, (v8i16)tmp4_m); \
- tmp3_m = (v16u8)__msa_ilvod_h((v8i16)tmp7_m, (v8i16)tmp6_m); \
- tmp3_m = (v16u8)__msa_ilvod_h((v8i16)tmp7_m, (v8i16)tmp6_m); \
- out3 = (v16u8)__msa_ilvev_w((v4i32)tmp3_m, (v4i32)tmp2_m); \
- out7 = (v16u8)__msa_ilvod_w((v4i32)tmp3_m, (v4i32)tmp2_m); \
- }
-
-/* Description : Transpose 4x4 block with half word elements in vectors
- Arguments : Inputs - in0, in1, in2, in3
- Outputs - out0, out1, out2, out3
- Return Type - signed halfword
-*/
-#define TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, out0, out1, out2, out3) \
- { \
- v8i16 s0_m, s1_m; \
- \
- ILVR_H2_SH(in1, in0, in3, in2, s0_m, s1_m); \
- ILVRL_W2_SH(s1_m, s0_m, out0, out2); \
- out1 = (v8i16)__msa_ilvl_d((v2i64)out0, (v2i64)out0); \
- out3 = (v8i16)__msa_ilvl_d((v2i64)out0, (v2i64)out2); \
- }
-
-/* Description : Transpose 4x8 block with half word elements in vectors
- Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7
- Outputs - out0, out1, out2, out3, out4, out5, out6, out7
- Return Type - signed halfword
-*/
-#define TRANSPOSE4X8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \
- out2, out3, out4, out5, out6, out7) \
- { \
- v8i16 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \
- v8i16 tmp0_n, tmp1_n, tmp2_n, tmp3_n; \
- v8i16 zero_m = { 0 }; \
- \
- ILVR_H4_SH(in1, in0, in3, in2, in5, in4, in7, in6, tmp0_n, tmp1_n, tmp2_n, \
- tmp3_n); \
- ILVRL_W2_SH(tmp1_n, tmp0_n, tmp0_m, tmp2_m); \
- ILVRL_W2_SH(tmp3_n, tmp2_n, tmp1_m, tmp3_m); \
- \
- out0 = (v8i16)__msa_ilvr_d((v2i64)tmp1_m, (v2i64)tmp0_m); \
- out1 = (v8i16)__msa_ilvl_d((v2i64)tmp1_m, (v2i64)tmp0_m); \
- out2 = (v8i16)__msa_ilvr_d((v2i64)tmp3_m, (v2i64)tmp2_m); \
- out3 = (v8i16)__msa_ilvl_d((v2i64)tmp3_m, (v2i64)tmp2_m); \
- \
- out4 = zero_m; \
- out5 = zero_m; \
- out6 = zero_m; \
- out7 = zero_m; \
- }
-
-/* Description : Transpose 8x4 block with half word elements in vectors
- Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7
- Outputs - out0, out1, out2, out3, out4, out5, out6, out7
- Return Type - signed halfword
-*/
-#define TRANSPOSE8X4_SH_SH(in0, in1, in2, in3, out0, out1, out2, out3) \
- { \
- v8i16 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \
- \
- ILVR_H2_SH(in1, in0, in3, in2, tmp0_m, tmp1_m); \
- ILVL_H2_SH(in1, in0, in3, in2, tmp2_m, tmp3_m); \
- ILVR_W2_SH(tmp1_m, tmp0_m, tmp3_m, tmp2_m, out0, out2); \
- ILVL_W2_SH(tmp1_m, tmp0_m, tmp3_m, tmp2_m, out1, out3); \
- }
-
-/* Description : Transpose 8x8 block with half word elements in vectors
- Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7
- Outputs - out0, out1, out2, out3, out4, out5, out6, out7
- Return Type - as per RTYPE
-*/
-#define TRANSPOSE8x8_H(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, \
- out1, out2, out3, out4, out5, out6, out7) \
- { \
- v8i16 s0_m, s1_m; \
- v8i16 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \
- v8i16 tmp4_m, tmp5_m, tmp6_m, tmp7_m; \
- \
- ILVR_H2_SH(in6, in4, in7, in5, s0_m, s1_m); \
- ILVRL_H2_SH(s1_m, s0_m, tmp0_m, tmp1_m); \
- ILVL_H2_SH(in6, in4, in7, in5, s0_m, s1_m); \
- ILVRL_H2_SH(s1_m, s0_m, tmp2_m, tmp3_m); \
- ILVR_H2_SH(in2, in0, in3, in1, s0_m, s1_m); \
- ILVRL_H2_SH(s1_m, s0_m, tmp4_m, tmp5_m); \
- ILVL_H2_SH(in2, in0, in3, in1, s0_m, s1_m); \
- ILVRL_H2_SH(s1_m, s0_m, tmp6_m, tmp7_m); \
- PCKEV_D4(RTYPE, tmp0_m, tmp4_m, tmp1_m, tmp5_m, tmp2_m, tmp6_m, tmp3_m, \
- tmp7_m, out0, out2, out4, out6); \
- out1 = (RTYPE)__msa_pckod_d((v2i64)tmp0_m, (v2i64)tmp4_m); \
- out3 = (RTYPE)__msa_pckod_d((v2i64)tmp1_m, (v2i64)tmp5_m); \
- out5 = (RTYPE)__msa_pckod_d((v2i64)tmp2_m, (v2i64)tmp6_m); \
- out7 = (RTYPE)__msa_pckod_d((v2i64)tmp3_m, (v2i64)tmp7_m); \
- }
-#define TRANSPOSE8x8_SH_SH(...) TRANSPOSE8x8_H(v8i16, __VA_ARGS__)
-
-/* Description : Transpose 4x4 block with word elements in vectors
- Arguments : Inputs - in0, in1, in2, in3
- Outputs - out0, out1, out2, out3
- Return Type - signed word
-*/
-#define TRANSPOSE4x4_SW_SW(in0, in1, in2, in3, out0, out1, out2, out3) \
- { \
- v4i32 s0_m, s1_m, s2_m, s3_m; \
- \
- ILVRL_W2_SW(in1, in0, s0_m, s1_m); \
- ILVRL_W2_SW(in3, in2, s2_m, s3_m); \
- \
- out0 = (v4i32)__msa_ilvr_d((v2i64)s2_m, (v2i64)s0_m); \
- out1 = (v4i32)__msa_ilvl_d((v2i64)s2_m, (v2i64)s0_m); \
- out2 = (v4i32)__msa_ilvr_d((v2i64)s3_m, (v2i64)s1_m); \
- out3 = (v4i32)__msa_ilvl_d((v2i64)s3_m, (v2i64)s1_m); \
- }
-
-/* Description : Add block 4x4
- Arguments : Inputs - in0, in1, in2, in3, pdst, stride
- Details : Least significant 4 bytes from each input vector are added to
- the destination bytes, clipped between 0-255 and stored.
-*/
-#define ADDBLK_ST4x4_UB(in0, in1, in2, in3, pdst, stride) \
- { \
- uint32_t src0_m, src1_m, src2_m, src3_m; \
- v8i16 inp0_m, inp1_m, res0_m, res1_m; \
- v16i8 dst0_m = { 0 }; \
- v16i8 dst1_m = { 0 }; \
- v16i8 zero_m = { 0 }; \
- \
- ILVR_D2_SH(in1, in0, in3, in2, inp0_m, inp1_m) \
- LW4(pdst, stride, src0_m, src1_m, src2_m, src3_m); \
- INSERT_W2_SB(src0_m, src1_m, dst0_m); \
- INSERT_W2_SB(src2_m, src3_m, dst1_m); \
- ILVR_B2_SH(zero_m, dst0_m, zero_m, dst1_m, res0_m, res1_m); \
- ADD2(res0_m, inp0_m, res1_m, inp1_m, res0_m, res1_m); \
- CLIP_SH2_0_255(res0_m, res1_m); \
- PCKEV_B2_SB(res0_m, res0_m, res1_m, res1_m, dst0_m, dst1_m); \
- ST4x4_UB(dst0_m, dst1_m, 0, 1, 0, 1, pdst, stride); \
- }
-
-/* Description : Pack even elements of input vectors & xor with 128
- Arguments : Inputs - in0, in1
- Output - out_m
- Return Type - unsigned byte
- Details : Signed byte even elements from 'in0' and 'in1' are packed
- together in one vector and the resulting vector is xor'ed with
- 128 to shift the range from signed to unsigned byte
-*/
-#define PCKEV_XORI128_UB(in0, in1) \
- ({ \
- v16u8 out_m; \
- \
- out_m = (v16u8)__msa_pckev_b((v16i8)in1, (v16i8)in0); \
- out_m = (v16u8)__msa_xori_b((v16u8)out_m, 128); \
- out_m; \
- })
-
-/* Description : Converts inputs to unsigned bytes, interleave, average & store
- as 8x4 unsigned byte block
- Arguments : Inputs - in0, in1, in2, in3, dst0, dst1, dst2, dst3,
- pdst, stride
-*/
-#define CONVERT_UB_AVG_ST8x4_UB(in0, in1, in2, in3, dst0, dst1, dst2, dst3, \
- pdst, stride) \
- { \
- v16u8 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \
- \
- tmp0_m = PCKEV_XORI128_UB(in0, in1); \
- tmp1_m = PCKEV_XORI128_UB(in2, in3); \
- ILVR_D2_UB(dst1, dst0, dst3, dst2, tmp2_m, tmp3_m); \
- AVER_UB2_UB(tmp0_m, tmp2_m, tmp1_m, tmp3_m, tmp0_m, tmp1_m); \
- ST8x4_UB(tmp0_m, tmp1_m, pdst, stride); \
- }
-
-/* Description : Pack even byte elements and store byte vector in destination
- memory
- Arguments : Inputs - in0, in1, pdst
-*/
-#define PCKEV_ST_SB(in0, in1, pdst) \
- { \
- v16i8 tmp_m; \
- \
- tmp_m = __msa_pckev_b((v16i8)in1, (v16i8)in0); \
- ST_SB(tmp_m, (pdst)); \
- }
-
-/* Description : Horizontal 2 tap filter kernel code
- Arguments : Inputs - in0, in1, mask, coeff, shift
-*/
-#define HORIZ_2TAP_FILT_UH(in0, in1, mask, coeff, shift) \
- ({ \
- v16i8 tmp0_m; \
- v8u16 tmp1_m; \
- \
- tmp0_m = __msa_vshf_b((v16i8)mask, (v16i8)in1, (v16i8)in0); \
- tmp1_m = __msa_dotp_u_h((v16u8)tmp0_m, (v16u8)coeff); \
- tmp1_m = (v8u16)__msa_srari_h((v8i16)tmp1_m, shift); \
- \
- tmp1_m; \
- })
-#endif // AOM_AOM_DSP_MIPS_MACROS_MSA_H_
diff --git a/third_party/aom/aom_dsp/mips/sad_msa.c b/third_party/aom/aom_dsp/mips/sad_msa.c
deleted file mode 100644
index 58cdd80d9..000000000
--- a/third_party/aom/aom_dsp/mips/sad_msa.c
+++ /dev/null
@@ -1,800 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/mips/macros_msa.h"
-
-#define SAD_INSVE_W4(RTYPE, in0, in1, in2, in3, out) \
- { \
- out = (RTYPE)__msa_insve_w((v4i32)out, 0, (v4i32)in0); \
- out = (RTYPE)__msa_insve_w((v4i32)out, 1, (v4i32)in1); \
- out = (RTYPE)__msa_insve_w((v4i32)out, 2, (v4i32)in2); \
- out = (RTYPE)__msa_insve_w((v4i32)out, 3, (v4i32)in3); \
- }
-#define SAD_INSVE_W4_UB(...) SAD_INSVE_W4(v16u8, __VA_ARGS__)
-
-static uint32_t sad_4width_msa(const uint8_t *src_ptr, int32_t src_stride,
- const uint8_t *ref_ptr, int32_t ref_stride,
- int32_t height) {
- int32_t ht_cnt;
- uint32_t src0, src1, src2, src3, ref0, ref1, ref2, ref3;
- v16u8 src = { 0 };
- v16u8 ref = { 0 };
- v16u8 diff;
- v8u16 sad = { 0 };
-
- for (ht_cnt = (height >> 2); ht_cnt--;) {
- LW4(src_ptr, src_stride, src0, src1, src2, src3);
- src_ptr += (4 * src_stride);
- LW4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3);
- ref_ptr += (4 * ref_stride);
-
- INSERT_W4_UB(src0, src1, src2, src3, src);
- INSERT_W4_UB(ref0, ref1, ref2, ref3, ref);
-
- diff = __msa_asub_u_b(src, ref);
- sad += __msa_hadd_u_h(diff, diff);
- }
-
- return HADD_UH_U32(sad);
-}
-
-static uint32_t sad_8width_msa(const uint8_t *src, int32_t src_stride,
- const uint8_t *ref, int32_t ref_stride,
- int32_t height) {
- int32_t ht_cnt;
- v16u8 src0, src1, src2, src3, ref0, ref1, ref2, ref3;
- v8u16 sad = { 0 };
-
- for (ht_cnt = (height >> 2); ht_cnt--;) {
- LD_UB4(src, src_stride, src0, src1, src2, src3);
- src += (4 * src_stride);
- LD_UB4(ref, ref_stride, ref0, ref1, ref2, ref3);
- ref += (4 * ref_stride);
-
- PCKEV_D4_UB(src1, src0, src3, src2, ref1, ref0, ref3, ref2, src0, src1,
- ref0, ref1);
- sad += SAD_UB2_UH(src0, src1, ref0, ref1);
- }
-
- return HADD_UH_U32(sad);
-}
-
-static uint32_t sad_16width_msa(const uint8_t *src, int32_t src_stride,
- const uint8_t *ref, int32_t ref_stride,
- int32_t height) {
- int32_t ht_cnt;
- v16u8 src0, src1, ref0, ref1;
- v8u16 sad = { 0 };
-
- for (ht_cnt = (height >> 2); ht_cnt--;) {
- LD_UB2(src, src_stride, src0, src1);
- src += (2 * src_stride);
- LD_UB2(ref, ref_stride, ref0, ref1);
- ref += (2 * ref_stride);
- sad += SAD_UB2_UH(src0, src1, ref0, ref1);
-
- LD_UB2(src, src_stride, src0, src1);
- src += (2 * src_stride);
- LD_UB2(ref, ref_stride, ref0, ref1);
- ref += (2 * ref_stride);
- sad += SAD_UB2_UH(src0, src1, ref0, ref1);
- }
-
- return HADD_UH_U32(sad);
-}
-
-static uint32_t sad_32width_msa(const uint8_t *src, int32_t src_stride,
- const uint8_t *ref, int32_t ref_stride,
- int32_t height) {
- int32_t ht_cnt;
- v16u8 src0, src1, ref0, ref1;
- v8u16 sad = { 0 };
-
- for (ht_cnt = (height >> 2); ht_cnt--;) {
- LD_UB2(src, 16, src0, src1);
- src += src_stride;
- LD_UB2(ref, 16, ref0, ref1);
- ref += ref_stride;
- sad += SAD_UB2_UH(src0, src1, ref0, ref1);
-
- LD_UB2(src, 16, src0, src1);
- src += src_stride;
- LD_UB2(ref, 16, ref0, ref1);
- ref += ref_stride;
- sad += SAD_UB2_UH(src0, src1, ref0, ref1);
-
- LD_UB2(src, 16, src0, src1);
- src += src_stride;
- LD_UB2(ref, 16, ref0, ref1);
- ref += ref_stride;
- sad += SAD_UB2_UH(src0, src1, ref0, ref1);
-
- LD_UB2(src, 16, src0, src1);
- src += src_stride;
- LD_UB2(ref, 16, ref0, ref1);
- ref += ref_stride;
- sad += SAD_UB2_UH(src0, src1, ref0, ref1);
- }
-
- return HADD_UH_U32(sad);
-}
-
-static uint32_t sad_64width_msa(const uint8_t *src, int32_t src_stride,
- const uint8_t *ref, int32_t ref_stride,
- int32_t height) {
- int32_t ht_cnt;
- uint32_t sad = 0;
- v16u8 src0, src1, src2, src3;
- v16u8 ref0, ref1, ref2, ref3;
- v8u16 sad0 = { 0 };
- v8u16 sad1 = { 0 };
-
- for (ht_cnt = (height >> 1); ht_cnt--;) {
- LD_UB4(src, 16, src0, src1, src2, src3);
- src += src_stride;
- LD_UB4(ref, 16, ref0, ref1, ref2, ref3);
- ref += ref_stride;
- sad0 += SAD_UB2_UH(src0, src1, ref0, ref1);
- sad1 += SAD_UB2_UH(src2, src3, ref2, ref3);
-
- LD_UB4(src, 16, src0, src1, src2, src3);
- src += src_stride;
- LD_UB4(ref, 16, ref0, ref1, ref2, ref3);
- ref += ref_stride;
- sad0 += SAD_UB2_UH(src0, src1, ref0, ref1);
- sad1 += SAD_UB2_UH(src2, src3, ref2, ref3);
- }
-
- sad = HADD_UH_U32(sad0);
- sad += HADD_UH_U32(sad1);
-
- return sad;
-}
-
-static void sad_4width_x4d_msa(const uint8_t *src_ptr, int32_t src_stride,
- const uint8_t *const aref_ptr[],
- int32_t ref_stride, int32_t height,
- uint32_t *sad_array) {
- const uint8_t *ref0_ptr, *ref1_ptr, *ref2_ptr, *ref3_ptr;
- int32_t ht_cnt;
- uint32_t src0, src1, src2, src3;
- uint32_t ref0, ref1, ref2, ref3;
- v16u8 src = { 0 };
- v16u8 ref = { 0 };
- v16u8 diff;
- v8u16 sad0 = { 0 };
- v8u16 sad1 = { 0 };
- v8u16 sad2 = { 0 };
- v8u16 sad3 = { 0 };
-
- ref0_ptr = aref_ptr[0];
- ref1_ptr = aref_ptr[1];
- ref2_ptr = aref_ptr[2];
- ref3_ptr = aref_ptr[3];
-
- for (ht_cnt = (height >> 2); ht_cnt--;) {
- LW4(src_ptr, src_stride, src0, src1, src2, src3);
- INSERT_W4_UB(src0, src1, src2, src3, src);
- src_ptr += (4 * src_stride);
-
- LW4(ref0_ptr, ref_stride, ref0, ref1, ref2, ref3);
- INSERT_W4_UB(ref0, ref1, ref2, ref3, ref);
- ref0_ptr += (4 * ref_stride);
-
- diff = __msa_asub_u_b(src, ref);
- sad0 += __msa_hadd_u_h(diff, diff);
-
- LW4(ref1_ptr, ref_stride, ref0, ref1, ref2, ref3);
- INSERT_W4_UB(ref0, ref1, ref2, ref3, ref);
- ref1_ptr += (4 * ref_stride);
-
- diff = __msa_asub_u_b(src, ref);
- sad1 += __msa_hadd_u_h(diff, diff);
-
- LW4(ref2_ptr, ref_stride, ref0, ref1, ref2, ref3);
- INSERT_W4_UB(ref0, ref1, ref2, ref3, ref);
- ref2_ptr += (4 * ref_stride);
-
- diff = __msa_asub_u_b(src, ref);
- sad2 += __msa_hadd_u_h(diff, diff);
-
- LW4(ref3_ptr, ref_stride, ref0, ref1, ref2, ref3);
- INSERT_W4_UB(ref0, ref1, ref2, ref3, ref);
- ref3_ptr += (4 * ref_stride);
-
- diff = __msa_asub_u_b(src, ref);
- sad3 += __msa_hadd_u_h(diff, diff);
- }
-
- sad_array[0] = HADD_UH_U32(sad0);
- sad_array[1] = HADD_UH_U32(sad1);
- sad_array[2] = HADD_UH_U32(sad2);
- sad_array[3] = HADD_UH_U32(sad3);
-}
-
-static void sad_8width_x4d_msa(const uint8_t *src_ptr, int32_t src_stride,
- const uint8_t *const aref_ptr[],
- int32_t ref_stride, int32_t height,
- uint32_t *sad_array) {
- int32_t ht_cnt;
- const uint8_t *ref0_ptr, *ref1_ptr, *ref2_ptr, *ref3_ptr;
- v16u8 src0, src1, src2, src3;
- v16u8 ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7;
- v16u8 ref8, ref9, ref10, ref11, ref12, ref13, ref14, ref15;
- v8u16 sad0 = { 0 };
- v8u16 sad1 = { 0 };
- v8u16 sad2 = { 0 };
- v8u16 sad3 = { 0 };
-
- ref0_ptr = aref_ptr[0];
- ref1_ptr = aref_ptr[1];
- ref2_ptr = aref_ptr[2];
- ref3_ptr = aref_ptr[3];
-
- for (ht_cnt = (height >> 2); ht_cnt--;) {
- LD_UB4(src_ptr, src_stride, src0, src1, src2, src3);
- src_ptr += (4 * src_stride);
- LD_UB4(ref0_ptr, ref_stride, ref0, ref1, ref2, ref3);
- ref0_ptr += (4 * ref_stride);
- LD_UB4(ref1_ptr, ref_stride, ref4, ref5, ref6, ref7);
- ref1_ptr += (4 * ref_stride);
- LD_UB4(ref2_ptr, ref_stride, ref8, ref9, ref10, ref11);
- ref2_ptr += (4 * ref_stride);
- LD_UB4(ref3_ptr, ref_stride, ref12, ref13, ref14, ref15);
- ref3_ptr += (4 * ref_stride);
-
- PCKEV_D2_UB(src1, src0, src3, src2, src0, src1);
- PCKEV_D2_UB(ref1, ref0, ref3, ref2, ref0, ref1);
- sad0 += SAD_UB2_UH(src0, src1, ref0, ref1);
-
- PCKEV_D2_UB(ref5, ref4, ref7, ref6, ref0, ref1);
- sad1 += SAD_UB2_UH(src0, src1, ref0, ref1);
-
- PCKEV_D2_UB(ref9, ref8, ref11, ref10, ref0, ref1);
- sad2 += SAD_UB2_UH(src0, src1, ref0, ref1);
-
- PCKEV_D2_UB(ref13, ref12, ref15, ref14, ref0, ref1);
- sad3 += SAD_UB2_UH(src0, src1, ref0, ref1);
- }
-
- sad_array[0] = HADD_UH_U32(sad0);
- sad_array[1] = HADD_UH_U32(sad1);
- sad_array[2] = HADD_UH_U32(sad2);
- sad_array[3] = HADD_UH_U32(sad3);
-}
-
-static void sad_16width_x4d_msa(const uint8_t *src_ptr, int32_t src_stride,
- const uint8_t *const aref_ptr[],
- int32_t ref_stride, int32_t height,
- uint32_t *sad_array) {
- int32_t ht_cnt;
- const uint8_t *ref0_ptr, *ref1_ptr, *ref2_ptr, *ref3_ptr;
- v16u8 src, ref0, ref1, ref2, ref3, diff;
- v8u16 sad0 = { 0 };
- v8u16 sad1 = { 0 };
- v8u16 sad2 = { 0 };
- v8u16 sad3 = { 0 };
-
- ref0_ptr = aref_ptr[0];
- ref1_ptr = aref_ptr[1];
- ref2_ptr = aref_ptr[2];
- ref3_ptr = aref_ptr[3];
-
- for (ht_cnt = (height >> 1); ht_cnt--;) {
- src = LD_UB(src_ptr);
- src_ptr += src_stride;
- ref0 = LD_UB(ref0_ptr);
- ref0_ptr += ref_stride;
- ref1 = LD_UB(ref1_ptr);
- ref1_ptr += ref_stride;
- ref2 = LD_UB(ref2_ptr);
- ref2_ptr += ref_stride;
- ref3 = LD_UB(ref3_ptr);
- ref3_ptr += ref_stride;
-
- diff = __msa_asub_u_b(src, ref0);
- sad0 += __msa_hadd_u_h(diff, diff);
- diff = __msa_asub_u_b(src, ref1);
- sad1 += __msa_hadd_u_h(diff, diff);
- diff = __msa_asub_u_b(src, ref2);
- sad2 += __msa_hadd_u_h(diff, diff);
- diff = __msa_asub_u_b(src, ref3);
- sad3 += __msa_hadd_u_h(diff, diff);
-
- src = LD_UB(src_ptr);
- src_ptr += src_stride;
- ref0 = LD_UB(ref0_ptr);
- ref0_ptr += ref_stride;
- ref1 = LD_UB(ref1_ptr);
- ref1_ptr += ref_stride;
- ref2 = LD_UB(ref2_ptr);
- ref2_ptr += ref_stride;
- ref3 = LD_UB(ref3_ptr);
- ref3_ptr += ref_stride;
-
- diff = __msa_asub_u_b(src, ref0);
- sad0 += __msa_hadd_u_h(diff, diff);
- diff = __msa_asub_u_b(src, ref1);
- sad1 += __msa_hadd_u_h(diff, diff);
- diff = __msa_asub_u_b(src, ref2);
- sad2 += __msa_hadd_u_h(diff, diff);
- diff = __msa_asub_u_b(src, ref3);
- sad3 += __msa_hadd_u_h(diff, diff);
- }
-
- sad_array[0] = HADD_UH_U32(sad0);
- sad_array[1] = HADD_UH_U32(sad1);
- sad_array[2] = HADD_UH_U32(sad2);
- sad_array[3] = HADD_UH_U32(sad3);
-}
-
-static void sad_32width_x4d_msa(const uint8_t *src, int32_t src_stride,
- const uint8_t *const aref_ptr[],
- int32_t ref_stride, int32_t height,
- uint32_t *sad_array) {
- const uint8_t *ref0_ptr, *ref1_ptr, *ref2_ptr, *ref3_ptr;
- int32_t ht_cnt;
- v16u8 src0, src1, ref0, ref1;
- v8u16 sad0 = { 0 };
- v8u16 sad1 = { 0 };
- v8u16 sad2 = { 0 };
- v8u16 sad3 = { 0 };
-
- ref0_ptr = aref_ptr[0];
- ref1_ptr = aref_ptr[1];
- ref2_ptr = aref_ptr[2];
- ref3_ptr = aref_ptr[3];
-
- for (ht_cnt = height; ht_cnt--;) {
- LD_UB2(src, 16, src0, src1);
- src += src_stride;
-
- LD_UB2(ref0_ptr, 16, ref0, ref1);
- ref0_ptr += ref_stride;
- sad0 += SAD_UB2_UH(src0, src1, ref0, ref1);
-
- LD_UB2(ref1_ptr, 16, ref0, ref1);
- ref1_ptr += ref_stride;
- sad1 += SAD_UB2_UH(src0, src1, ref0, ref1);
-
- LD_UB2(ref2_ptr, 16, ref0, ref1);
- ref2_ptr += ref_stride;
- sad2 += SAD_UB2_UH(src0, src1, ref0, ref1);
-
- LD_UB2(ref3_ptr, 16, ref0, ref1);
- ref3_ptr += ref_stride;
- sad3 += SAD_UB2_UH(src0, src1, ref0, ref1);
- }
-
- sad_array[0] = HADD_UH_U32(sad0);
- sad_array[1] = HADD_UH_U32(sad1);
- sad_array[2] = HADD_UH_U32(sad2);
- sad_array[3] = HADD_UH_U32(sad3);
-}
-
-static void sad_64width_x4d_msa(const uint8_t *src, int32_t src_stride,
- const uint8_t *const aref_ptr[],
- int32_t ref_stride, int32_t height,
- uint32_t *sad_array) {
- const uint8_t *ref0_ptr, *ref1_ptr, *ref2_ptr, *ref3_ptr;
- int32_t ht_cnt;
- v16u8 src0, src1, src2, src3;
- v16u8 ref0, ref1, ref2, ref3;
- v8u16 sad0_0 = { 0 };
- v8u16 sad0_1 = { 0 };
- v8u16 sad1_0 = { 0 };
- v8u16 sad1_1 = { 0 };
- v8u16 sad2_0 = { 0 };
- v8u16 sad2_1 = { 0 };
- v8u16 sad3_0 = { 0 };
- v8u16 sad3_1 = { 0 };
-
- ref0_ptr = aref_ptr[0];
- ref1_ptr = aref_ptr[1];
- ref2_ptr = aref_ptr[2];
- ref3_ptr = aref_ptr[3];
-
- for (ht_cnt = height; ht_cnt--;) {
- LD_UB4(src, 16, src0, src1, src2, src3);
- src += src_stride;
-
- LD_UB4(ref0_ptr, 16, ref0, ref1, ref2, ref3);
- ref0_ptr += ref_stride;
- sad0_0 += SAD_UB2_UH(src0, src1, ref0, ref1);
- sad0_1 += SAD_UB2_UH(src2, src3, ref2, ref3);
-
- LD_UB4(ref1_ptr, 16, ref0, ref1, ref2, ref3);
- ref1_ptr += ref_stride;
- sad1_0 += SAD_UB2_UH(src0, src1, ref0, ref1);
- sad1_1 += SAD_UB2_UH(src2, src3, ref2, ref3);
-
- LD_UB4(ref2_ptr, 16, ref0, ref1, ref2, ref3);
- ref2_ptr += ref_stride;
- sad2_0 += SAD_UB2_UH(src0, src1, ref0, ref1);
- sad2_1 += SAD_UB2_UH(src2, src3, ref2, ref3);
-
- LD_UB4(ref3_ptr, 16, ref0, ref1, ref2, ref3);
- ref3_ptr += ref_stride;
- sad3_0 += SAD_UB2_UH(src0, src1, ref0, ref1);
- sad3_1 += SAD_UB2_UH(src2, src3, ref2, ref3);
- }
-
- sad_array[0] = HADD_UH_U32(sad0_0);
- sad_array[0] += HADD_UH_U32(sad0_1);
- sad_array[1] = HADD_UH_U32(sad1_0);
- sad_array[1] += HADD_UH_U32(sad1_1);
- sad_array[2] = HADD_UH_U32(sad2_0);
- sad_array[2] += HADD_UH_U32(sad2_1);
- sad_array[3] = HADD_UH_U32(sad3_0);
- sad_array[3] += HADD_UH_U32(sad3_1);
-}
-
-static uint32_t avgsad_4width_msa(const uint8_t *src_ptr, int32_t src_stride,
- const uint8_t *ref_ptr, int32_t ref_stride,
- int32_t height, const uint8_t *sec_pred) {
- int32_t ht_cnt;
- uint32_t src0, src1, src2, src3, ref0, ref1, ref2, ref3;
- v16u8 src = { 0 };
- v16u8 ref = { 0 };
- v16u8 diff, pred, comp;
- v8u16 sad = { 0 };
-
- for (ht_cnt = (height >> 2); ht_cnt--;) {
- LW4(src_ptr, src_stride, src0, src1, src2, src3);
- src_ptr += (4 * src_stride);
- LW4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3);
- ref_ptr += (4 * ref_stride);
- pred = LD_UB(sec_pred);
- sec_pred += 16;
-
- INSERT_W4_UB(src0, src1, src2, src3, src);
- INSERT_W4_UB(ref0, ref1, ref2, ref3, ref);
-
- comp = __msa_aver_u_b(pred, ref);
- diff = __msa_asub_u_b(src, comp);
- sad += __msa_hadd_u_h(diff, diff);
- }
-
- return HADD_UH_U32(sad);
-}
-
-static uint32_t avgsad_8width_msa(const uint8_t *src, int32_t src_stride,
- const uint8_t *ref, int32_t ref_stride,
- int32_t height, const uint8_t *sec_pred) {
- int32_t ht_cnt;
- v16u8 src0, src1, src2, src3, ref0, ref1, ref2, ref3;
- v16u8 diff0, diff1, pred0, pred1;
- v8u16 sad = { 0 };
-
- for (ht_cnt = (height >> 2); ht_cnt--;) {
- LD_UB4(src, src_stride, src0, src1, src2, src3);
- src += (4 * src_stride);
- LD_UB4(ref, ref_stride, ref0, ref1, ref2, ref3);
- ref += (4 * ref_stride);
- LD_UB2(sec_pred, 16, pred0, pred1);
- sec_pred += 32;
- PCKEV_D4_UB(src1, src0, src3, src2, ref1, ref0, ref3, ref2, src0, src1,
- ref0, ref1);
- AVER_UB2_UB(pred0, ref0, pred1, ref1, diff0, diff1);
- sad += SAD_UB2_UH(src0, src1, diff0, diff1);
- }
-
- return HADD_UH_U32(sad);
-}
-
-static uint32_t avgsad_16width_msa(const uint8_t *src, int32_t src_stride,
- const uint8_t *ref, int32_t ref_stride,
- int32_t height, const uint8_t *sec_pred) {
- int32_t ht_cnt;
- v16u8 src0, src1, src2, src3, ref0, ref1, ref2, ref3;
- v16u8 pred0, pred1, pred2, pred3, comp0, comp1;
- v8u16 sad = { 0 };
-
- for (ht_cnt = (height >> 3); ht_cnt--;) {
- LD_UB4(src, src_stride, src0, src1, src2, src3);
- src += (4 * src_stride);
- LD_UB4(ref, ref_stride, ref0, ref1, ref2, ref3);
- ref += (4 * ref_stride);
- LD_UB4(sec_pred, 16, pred0, pred1, pred2, pred3);
- sec_pred += (4 * 16);
- AVER_UB2_UB(pred0, ref0, pred1, ref1, comp0, comp1);
- sad += SAD_UB2_UH(src0, src1, comp0, comp1);
- AVER_UB2_UB(pred2, ref2, pred3, ref3, comp0, comp1);
- sad += SAD_UB2_UH(src2, src3, comp0, comp1);
-
- LD_UB4(src, src_stride, src0, src1, src2, src3);
- src += (4 * src_stride);
- LD_UB4(ref, ref_stride, ref0, ref1, ref2, ref3);
- ref += (4 * ref_stride);
- LD_UB4(sec_pred, 16, pred0, pred1, pred2, pred3);
- sec_pred += (4 * 16);
- AVER_UB2_UB(pred0, ref0, pred1, ref1, comp0, comp1);
- sad += SAD_UB2_UH(src0, src1, comp0, comp1);
- AVER_UB2_UB(pred2, ref2, pred3, ref3, comp0, comp1);
- sad += SAD_UB2_UH(src2, src3, comp0, comp1);
- }
-
- return HADD_UH_U32(sad);
-}
-
-static uint32_t avgsad_32width_msa(const uint8_t *src, int32_t src_stride,
- const uint8_t *ref, int32_t ref_stride,
- int32_t height, const uint8_t *sec_pred) {
- int32_t ht_cnt;
- v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
- v16u8 ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7;
- v16u8 pred0, pred1, pred2, pred3, pred4, pred5, pred6, pred7;
- v16u8 comp0, comp1;
- v8u16 sad = { 0 };
-
- for (ht_cnt = (height >> 2); ht_cnt--;) {
- LD_UB4(src, src_stride, src0, src2, src4, src6);
- LD_UB4(src + 16, src_stride, src1, src3, src5, src7);
- src += (4 * src_stride);
-
- LD_UB4(ref, ref_stride, ref0, ref2, ref4, ref6);
- LD_UB4(ref + 16, ref_stride, ref1, ref3, ref5, ref7);
- ref += (4 * ref_stride);
-
- LD_UB4(sec_pred, 32, pred0, pred2, pred4, pred6);
- LD_UB4(sec_pred + 16, 32, pred1, pred3, pred5, pred7);
- sec_pred += (4 * 32);
-
- AVER_UB2_UB(pred0, ref0, pred1, ref1, comp0, comp1);
- sad += SAD_UB2_UH(src0, src1, comp0, comp1);
- AVER_UB2_UB(pred2, ref2, pred3, ref3, comp0, comp1);
- sad += SAD_UB2_UH(src2, src3, comp0, comp1);
- AVER_UB2_UB(pred4, ref4, pred5, ref5, comp0, comp1);
- sad += SAD_UB2_UH(src4, src5, comp0, comp1);
- AVER_UB2_UB(pred6, ref6, pred7, ref7, comp0, comp1);
- sad += SAD_UB2_UH(src6, src7, comp0, comp1);
- }
-
- return HADD_UH_U32(sad);
-}
-
-static uint32_t avgsad_64width_msa(const uint8_t *src, int32_t src_stride,
- const uint8_t *ref, int32_t ref_stride,
- int32_t height, const uint8_t *sec_pred) {
- int32_t ht_cnt;
- v16u8 src0, src1, src2, src3;
- v16u8 ref0, ref1, ref2, ref3;
- v16u8 comp0, comp1, comp2, comp3;
- v16u8 pred0, pred1, pred2, pred3;
- v8u16 sad0 = { 0 };
- v8u16 sad1 = { 0 };
- v4u32 sad;
-
- for (ht_cnt = (height >> 2); ht_cnt--;) {
- LD_UB4(src, 16, src0, src1, src2, src3);
- src += src_stride;
- LD_UB4(ref, 16, ref0, ref1, ref2, ref3);
- ref += ref_stride;
- LD_UB4(sec_pred, 16, pred0, pred1, pred2, pred3);
- sec_pred += 64;
- AVER_UB4_UB(pred0, ref0, pred1, ref1, pred2, ref2, pred3, ref3, comp0,
- comp1, comp2, comp3);
- sad0 += SAD_UB2_UH(src0, src1, comp0, comp1);
- sad1 += SAD_UB2_UH(src2, src3, comp2, comp3);
-
- LD_UB4(src, 16, src0, src1, src2, src3);
- src += src_stride;
- LD_UB4(ref, 16, ref0, ref1, ref2, ref3);
- ref += ref_stride;
- LD_UB4(sec_pred, 16, pred0, pred1, pred2, pred3);
- sec_pred += 64;
- AVER_UB4_UB(pred0, ref0, pred1, ref1, pred2, ref2, pred3, ref3, comp0,
- comp1, comp2, comp3);
- sad0 += SAD_UB2_UH(src0, src1, comp0, comp1);
- sad1 += SAD_UB2_UH(src2, src3, comp2, comp3);
-
- LD_UB4(src, 16, src0, src1, src2, src3);
- src += src_stride;
- LD_UB4(ref, 16, ref0, ref1, ref2, ref3);
- ref += ref_stride;
- LD_UB4(sec_pred, 16, pred0, pred1, pred2, pred3);
- sec_pred += 64;
- AVER_UB4_UB(pred0, ref0, pred1, ref1, pred2, ref2, pred3, ref3, comp0,
- comp1, comp2, comp3);
- sad0 += SAD_UB2_UH(src0, src1, comp0, comp1);
- sad1 += SAD_UB2_UH(src2, src3, comp2, comp3);
-
- LD_UB4(src, 16, src0, src1, src2, src3);
- src += src_stride;
- LD_UB4(ref, 16, ref0, ref1, ref2, ref3);
- ref += ref_stride;
- LD_UB4(sec_pred, 16, pred0, pred1, pred2, pred3);
- sec_pred += 64;
- AVER_UB4_UB(pred0, ref0, pred1, ref1, pred2, ref2, pred3, ref3, comp0,
- comp1, comp2, comp3);
- sad0 += SAD_UB2_UH(src0, src1, comp0, comp1);
- sad1 += SAD_UB2_UH(src2, src3, comp2, comp3);
- }
-
- sad = __msa_hadd_u_w(sad0, sad0);
- sad += __msa_hadd_u_w(sad1, sad1);
-
- return HADD_SW_S32(sad);
-}
-
-#define AOM_SAD_4xHEIGHT_MSA(height) \
- uint32_t aom_sad4x##height##_msa(const uint8_t *src, int32_t src_stride, \
- const uint8_t *ref, int32_t ref_stride) { \
- return sad_4width_msa(src, src_stride, ref, ref_stride, height); \
- }
-
-#define AOM_SAD_8xHEIGHT_MSA(height) \
- uint32_t aom_sad8x##height##_msa(const uint8_t *src, int32_t src_stride, \
- const uint8_t *ref, int32_t ref_stride) { \
- return sad_8width_msa(src, src_stride, ref, ref_stride, height); \
- }
-
-#define AOM_SAD_16xHEIGHT_MSA(height) \
- uint32_t aom_sad16x##height##_msa(const uint8_t *src, int32_t src_stride, \
- const uint8_t *ref, int32_t ref_stride) { \
- return sad_16width_msa(src, src_stride, ref, ref_stride, height); \
- }
-
-#define AOM_SAD_32xHEIGHT_MSA(height) \
- uint32_t aom_sad32x##height##_msa(const uint8_t *src, int32_t src_stride, \
- const uint8_t *ref, int32_t ref_stride) { \
- return sad_32width_msa(src, src_stride, ref, ref_stride, height); \
- }
-
-#define AOM_SAD_64xHEIGHT_MSA(height) \
- uint32_t aom_sad64x##height##_msa(const uint8_t *src, int32_t src_stride, \
- const uint8_t *ref, int32_t ref_stride) { \
- return sad_64width_msa(src, src_stride, ref, ref_stride, height); \
- }
-
-#define AOM_SAD_4xHEIGHTx4D_MSA(height) \
- void aom_sad4x##height##x4d_msa(const uint8_t *src, int32_t src_stride, \
- const uint8_t *const refs[], \
- int32_t ref_stride, uint32_t *sads) { \
- sad_4width_x4d_msa(src, src_stride, refs, ref_stride, height, sads); \
- }
-
-#define AOM_SAD_8xHEIGHTx4D_MSA(height) \
- void aom_sad8x##height##x4d_msa(const uint8_t *src, int32_t src_stride, \
- const uint8_t *const refs[], \
- int32_t ref_stride, uint32_t *sads) { \
- sad_8width_x4d_msa(src, src_stride, refs, ref_stride, height, sads); \
- }
-
-#define AOM_SAD_16xHEIGHTx4D_MSA(height) \
- void aom_sad16x##height##x4d_msa(const uint8_t *src, int32_t src_stride, \
- const uint8_t *const refs[], \
- int32_t ref_stride, uint32_t *sads) { \
- sad_16width_x4d_msa(src, src_stride, refs, ref_stride, height, sads); \
- }
-
-#define AOM_SAD_32xHEIGHTx4D_MSA(height) \
- void aom_sad32x##height##x4d_msa(const uint8_t *src, int32_t src_stride, \
- const uint8_t *const refs[], \
- int32_t ref_stride, uint32_t *sads) { \
- sad_32width_x4d_msa(src, src_stride, refs, ref_stride, height, sads); \
- }
-
-#define AOM_SAD_64xHEIGHTx4D_MSA(height) \
- void aom_sad64x##height##x4d_msa(const uint8_t *src, int32_t src_stride, \
- const uint8_t *const refs[], \
- int32_t ref_stride, uint32_t *sads) { \
- sad_64width_x4d_msa(src, src_stride, refs, ref_stride, height, sads); \
- }
-
-#define AOM_AVGSAD_4xHEIGHT_MSA(height) \
- uint32_t aom_sad4x##height##_avg_msa(const uint8_t *src, int32_t src_stride, \
- const uint8_t *ref, int32_t ref_stride, \
- const uint8_t *second_pred) { \
- return avgsad_4width_msa(src, src_stride, ref, ref_stride, height, \
- second_pred); \
- }
-
-#define AOM_AVGSAD_8xHEIGHT_MSA(height) \
- uint32_t aom_sad8x##height##_avg_msa(const uint8_t *src, int32_t src_stride, \
- const uint8_t *ref, int32_t ref_stride, \
- const uint8_t *second_pred) { \
- return avgsad_8width_msa(src, src_stride, ref, ref_stride, height, \
- second_pred); \
- }
-
-#define AOM_AVGSAD_16xHEIGHT_MSA(height) \
- uint32_t aom_sad16x##height##_avg_msa( \
- const uint8_t *src, int32_t src_stride, const uint8_t *ref, \
- int32_t ref_stride, const uint8_t *second_pred) { \
- return avgsad_16width_msa(src, src_stride, ref, ref_stride, height, \
- second_pred); \
- }
-
-#define AOM_AVGSAD_32xHEIGHT_MSA(height) \
- uint32_t aom_sad32x##height##_avg_msa( \
- const uint8_t *src, int32_t src_stride, const uint8_t *ref, \
- int32_t ref_stride, const uint8_t *second_pred) { \
- return avgsad_32width_msa(src, src_stride, ref, ref_stride, height, \
- second_pred); \
- }
-
-#define AOM_AVGSAD_64xHEIGHT_MSA(height) \
- uint32_t aom_sad64x##height##_avg_msa( \
- const uint8_t *src, int32_t src_stride, const uint8_t *ref, \
- int32_t ref_stride, const uint8_t *second_pred) { \
- return avgsad_64width_msa(src, src_stride, ref, ref_stride, height, \
- second_pred); \
- }
-
-/* clang-format off */
-// 64x64
-AOM_SAD_64xHEIGHT_MSA(64)
-AOM_SAD_64xHEIGHTx4D_MSA(64)
-AOM_AVGSAD_64xHEIGHT_MSA(64)
-
-// 64x32
-AOM_SAD_64xHEIGHT_MSA(32)
-AOM_SAD_64xHEIGHTx4D_MSA(32)
-AOM_AVGSAD_64xHEIGHT_MSA(32)
-
-// 32x64
-AOM_SAD_32xHEIGHT_MSA(64)
-AOM_SAD_32xHEIGHTx4D_MSA(64)
-AOM_AVGSAD_32xHEIGHT_MSA(64)
-
-// 32x32
-AOM_SAD_32xHEIGHT_MSA(32)
-AOM_SAD_32xHEIGHTx4D_MSA(32)
-AOM_AVGSAD_32xHEIGHT_MSA(32)
-
-// 32x16
-AOM_SAD_32xHEIGHT_MSA(16)
-AOM_SAD_32xHEIGHTx4D_MSA(16)
-AOM_AVGSAD_32xHEIGHT_MSA(16)
-
-// 16x32
-AOM_SAD_16xHEIGHT_MSA(32)
-AOM_SAD_16xHEIGHTx4D_MSA(32)
-AOM_AVGSAD_16xHEIGHT_MSA(32)
-
-// 16x16
-AOM_SAD_16xHEIGHT_MSA(16)
-AOM_SAD_16xHEIGHTx4D_MSA(16)
-AOM_AVGSAD_16xHEIGHT_MSA(16)
-
-// 16x8
-AOM_SAD_16xHEIGHT_MSA(8)
-AOM_SAD_16xHEIGHTx4D_MSA(8)
-AOM_AVGSAD_16xHEIGHT_MSA(8)
-
-// 8x16
-AOM_SAD_8xHEIGHT_MSA(16)
-AOM_SAD_8xHEIGHTx4D_MSA(16)
-AOM_AVGSAD_8xHEIGHT_MSA(16)
-
-// 8x8
-AOM_SAD_8xHEIGHT_MSA(8)
-AOM_SAD_8xHEIGHTx4D_MSA(8)
-AOM_AVGSAD_8xHEIGHT_MSA(8)
-
-// 8x4
-AOM_SAD_8xHEIGHT_MSA(4)
-AOM_SAD_8xHEIGHTx4D_MSA(4)
-AOM_AVGSAD_8xHEIGHT_MSA(4)
-
-// 4x8
-AOM_SAD_4xHEIGHT_MSA(8)
-AOM_SAD_4xHEIGHTx4D_MSA(8)
-AOM_AVGSAD_4xHEIGHT_MSA(8)
-
-// 4x4
-AOM_SAD_4xHEIGHT_MSA(4)
-AOM_SAD_4xHEIGHTx4D_MSA(4)
-AOM_AVGSAD_4xHEIGHT_MSA(4)
- /* clang-format on */
diff --git a/third_party/aom/aom_dsp/mips/sub_pixel_variance_msa.c b/third_party/aom/aom_dsp/mips/sub_pixel_variance_msa.c
deleted file mode 100644
index 810b6efaa..000000000
--- a/third_party/aom/aom_dsp/mips/sub_pixel_variance_msa.c
+++ /dev/null
@@ -1,1792 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_ports/mem.h"
-#include "aom_dsp/mips/macros_msa.h"
-#include "aom_dsp/aom_filter.h"
-#include "aom_dsp/variance.h"
-
-#define CALC_MSE_AVG_B(src, ref, var, sub) \
- { \
- v16u8 src_l0_m, src_l1_m; \
- v8i16 res_l0_m, res_l1_m; \
- \
- ILVRL_B2_UB(src, ref, src_l0_m, src_l1_m); \
- HSUB_UB2_SH(src_l0_m, src_l1_m, res_l0_m, res_l1_m); \
- DPADD_SH2_SW(res_l0_m, res_l1_m, res_l0_m, res_l1_m, var, var); \
- \
- sub += res_l0_m + res_l1_m; \
- }
-
-#define VARIANCE_WxH(sse, diff, shift) sse - (((uint32_t)diff * diff) >> shift)
-
-#define VARIANCE_LARGE_WxH(sse, diff, shift) \
- sse - (((int64_t)diff * diff) >> shift)
-
-static uint32_t avg_sse_diff_4width_msa(const uint8_t *src_ptr,
- int32_t src_stride,
- const uint8_t *ref_ptr,
- int32_t ref_stride,
- const uint8_t *sec_pred, int32_t height,
- int32_t *diff) {
- int32_t ht_cnt;
- uint32_t src0, src1, src2, src3;
- uint32_t ref0, ref1, ref2, ref3;
- v16u8 pred, src = { 0 };
- v16u8 ref = { 0 };
- v8i16 avg = { 0 };
- v4i32 vec, var = { 0 };
-
- for (ht_cnt = (height >> 2); ht_cnt--;) {
- pred = LD_UB(sec_pred);
- sec_pred += 16;
- LW4(src_ptr, src_stride, src0, src1, src2, src3);
- src_ptr += (4 * src_stride);
- LW4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3);
- ref_ptr += (4 * ref_stride);
-
- INSERT_W4_UB(src0, src1, src2, src3, src);
- INSERT_W4_UB(ref0, ref1, ref2, ref3, ref);
-
- src = __msa_aver_u_b(src, pred);
- CALC_MSE_AVG_B(src, ref, var, avg);
- }
-
- vec = __msa_hadd_s_w(avg, avg);
- *diff = HADD_SW_S32(vec);
-
- return HADD_SW_S32(var);
-}
-
-static uint32_t avg_sse_diff_8width_msa(const uint8_t *src_ptr,
- int32_t src_stride,
- const uint8_t *ref_ptr,
- int32_t ref_stride,
- const uint8_t *sec_pred, int32_t height,
- int32_t *diff) {
- int32_t ht_cnt;
- v16u8 src0, src1, src2, src3;
- v16u8 ref0, ref1, ref2, ref3;
- v16u8 pred0, pred1;
- v8i16 avg = { 0 };
- v4i32 vec, var = { 0 };
-
- for (ht_cnt = (height >> 2); ht_cnt--;) {
- LD_UB2(sec_pred, 16, pred0, pred1);
- sec_pred += 32;
- LD_UB4(src_ptr, src_stride, src0, src1, src2, src3);
- src_ptr += (4 * src_stride);
- LD_UB4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3);
- ref_ptr += (4 * ref_stride);
-
- PCKEV_D4_UB(src1, src0, src3, src2, ref1, ref0, ref3, ref2, src0, src1,
- ref0, ref1);
- AVER_UB2_UB(src0, pred0, src1, pred1, src0, src1);
- CALC_MSE_AVG_B(src0, ref0, var, avg);
- CALC_MSE_AVG_B(src1, ref1, var, avg);
- }
-
- vec = __msa_hadd_s_w(avg, avg);
- *diff = HADD_SW_S32(vec);
-
- return HADD_SW_S32(var);
-}
-
-static uint32_t avg_sse_diff_16width_msa(const uint8_t *src_ptr,
- int32_t src_stride,
- const uint8_t *ref_ptr,
- int32_t ref_stride,
- const uint8_t *sec_pred,
- int32_t height, int32_t *diff) {
- int32_t ht_cnt;
- v16u8 src, ref, pred;
- v8i16 avg = { 0 };
- v4i32 vec, var = { 0 };
-
- for (ht_cnt = (height >> 2); ht_cnt--;) {
- pred = LD_UB(sec_pred);
- sec_pred += 16;
- src = LD_UB(src_ptr);
- src_ptr += src_stride;
- ref = LD_UB(ref_ptr);
- ref_ptr += ref_stride;
- src = __msa_aver_u_b(src, pred);
- CALC_MSE_AVG_B(src, ref, var, avg);
-
- pred = LD_UB(sec_pred);
- sec_pred += 16;
- src = LD_UB(src_ptr);
- src_ptr += src_stride;
- ref = LD_UB(ref_ptr);
- ref_ptr += ref_stride;
- src = __msa_aver_u_b(src, pred);
- CALC_MSE_AVG_B(src, ref, var, avg);
-
- pred = LD_UB(sec_pred);
- sec_pred += 16;
- src = LD_UB(src_ptr);
- src_ptr += src_stride;
- ref = LD_UB(ref_ptr);
- ref_ptr += ref_stride;
- src = __msa_aver_u_b(src, pred);
- CALC_MSE_AVG_B(src, ref, var, avg);
-
- pred = LD_UB(sec_pred);
- sec_pred += 16;
- src = LD_UB(src_ptr);
- src_ptr += src_stride;
- ref = LD_UB(ref_ptr);
- ref_ptr += ref_stride;
- src = __msa_aver_u_b(src, pred);
- CALC_MSE_AVG_B(src, ref, var, avg);
- }
-
- vec = __msa_hadd_s_w(avg, avg);
- *diff = HADD_SW_S32(vec);
-
- return HADD_SW_S32(var);
-}
-
-static uint32_t avg_sse_diff_32width_msa(const uint8_t *src_ptr,
- int32_t src_stride,
- const uint8_t *ref_ptr,
- int32_t ref_stride,
- const uint8_t *sec_pred,
- int32_t height, int32_t *diff) {
- int32_t ht_cnt;
- v16u8 src0, src1, ref0, ref1, pred0, pred1;
- v8i16 avg = { 0 };
- v4i32 vec, var = { 0 };
-
- for (ht_cnt = (height >> 2); ht_cnt--;) {
- LD_UB2(sec_pred, 16, pred0, pred1);
- sec_pred += 32;
- LD_UB2(src_ptr, 16, src0, src1);
- src_ptr += src_stride;
- LD_UB2(ref_ptr, 16, ref0, ref1);
- ref_ptr += ref_stride;
- AVER_UB2_UB(src0, pred0, src1, pred1, src0, src1);
- CALC_MSE_AVG_B(src0, ref0, var, avg);
- CALC_MSE_AVG_B(src1, ref1, var, avg);
-
- LD_UB2(sec_pred, 16, pred0, pred1);
- sec_pred += 32;
- LD_UB2(src_ptr, 16, src0, src1);
- src_ptr += src_stride;
- LD_UB2(ref_ptr, 16, ref0, ref1);
- ref_ptr += ref_stride;
- AVER_UB2_UB(src0, pred0, src1, pred1, src0, src1);
- CALC_MSE_AVG_B(src0, ref0, var, avg);
- CALC_MSE_AVG_B(src1, ref1, var, avg);
-
- LD_UB2(sec_pred, 16, pred0, pred1);
- sec_pred += 32;
- LD_UB2(src_ptr, 16, src0, src1);
- src_ptr += src_stride;
- LD_UB2(ref_ptr, 16, ref0, ref1);
- ref_ptr += ref_stride;
- AVER_UB2_UB(src0, pred0, src1, pred1, src0, src1);
- CALC_MSE_AVG_B(src0, ref0, var, avg);
- CALC_MSE_AVG_B(src1, ref1, var, avg);
-
- LD_UB2(sec_pred, 16, pred0, pred1);
- sec_pred += 32;
- LD_UB2(src_ptr, 16, src0, src1);
- src_ptr += src_stride;
- LD_UB2(ref_ptr, 16, ref0, ref1);
- ref_ptr += ref_stride;
- AVER_UB2_UB(src0, pred0, src1, pred1, src0, src1);
- CALC_MSE_AVG_B(src0, ref0, var, avg);
- CALC_MSE_AVG_B(src1, ref1, var, avg);
- }
-
- vec = __msa_hadd_s_w(avg, avg);
- *diff = HADD_SW_S32(vec);
-
- return HADD_SW_S32(var);
-}
-
-static uint32_t avg_sse_diff_32x64_msa(const uint8_t *src_ptr,
- int32_t src_stride,
- const uint8_t *ref_ptr,
- int32_t ref_stride,
- const uint8_t *sec_pred, int32_t *diff) {
- int32_t ht_cnt;
- v16u8 src0, src1, ref0, ref1, pred0, pred1;
- v8i16 avg0 = { 0 };
- v8i16 avg1 = { 0 };
- v4i32 vec, var = { 0 };
-
- for (ht_cnt = 16; ht_cnt--;) {
- LD_UB2(sec_pred, 16, pred0, pred1);
- sec_pred += 32;
- LD_UB2(src_ptr, 16, src0, src1);
- src_ptr += src_stride;
- LD_UB2(ref_ptr, 16, ref0, ref1);
- ref_ptr += ref_stride;
- AVER_UB2_UB(src0, pred0, src1, pred1, src0, src1);
- CALC_MSE_AVG_B(src0, ref0, var, avg0);
- CALC_MSE_AVG_B(src1, ref1, var, avg1);
-
- LD_UB2(sec_pred, 16, pred0, pred1);
- sec_pred += 32;
- LD_UB2(src_ptr, 16, src0, src1);
- src_ptr += src_stride;
- LD_UB2(ref_ptr, 16, ref0, ref1);
- ref_ptr += ref_stride;
- AVER_UB2_UB(src0, pred0, src1, pred1, src0, src1);
- CALC_MSE_AVG_B(src0, ref0, var, avg0);
- CALC_MSE_AVG_B(src1, ref1, var, avg1);
-
- LD_UB2(sec_pred, 16, pred0, pred1);
- sec_pred += 32;
- LD_UB2(src_ptr, 16, src0, src1);
- src_ptr += src_stride;
- LD_UB2(ref_ptr, 16, ref0, ref1);
- ref_ptr += ref_stride;
- AVER_UB2_UB(src0, pred0, src1, pred1, src0, src1);
- CALC_MSE_AVG_B(src0, ref0, var, avg0);
- CALC_MSE_AVG_B(src1, ref1, var, avg1);
-
- LD_UB2(sec_pred, 16, pred0, pred1);
- sec_pred += 32;
- LD_UB2(src_ptr, 16, src0, src1);
- src_ptr += src_stride;
- LD_UB2(ref_ptr, 16, ref0, ref1);
- ref_ptr += ref_stride;
- AVER_UB2_UB(src0, pred0, src1, pred1, src0, src1);
- CALC_MSE_AVG_B(src0, ref0, var, avg0);
- CALC_MSE_AVG_B(src1, ref1, var, avg1);
- }
-
- vec = __msa_hadd_s_w(avg0, avg0);
- vec += __msa_hadd_s_w(avg1, avg1);
- *diff = HADD_SW_S32(vec);
-
- return HADD_SW_S32(var);
-}
-
-static uint32_t avg_sse_diff_64x32_msa(const uint8_t *src_ptr,
- int32_t src_stride,
- const uint8_t *ref_ptr,
- int32_t ref_stride,
- const uint8_t *sec_pred, int32_t *diff) {
- int32_t ht_cnt;
- v16u8 src0, src1, src2, src3;
- v16u8 ref0, ref1, ref2, ref3;
- v16u8 pred0, pred1, pred2, pred3;
- v8i16 avg0 = { 0 };
- v8i16 avg1 = { 0 };
- v4i32 vec, var = { 0 };
-
- for (ht_cnt = 16; ht_cnt--;) {
- LD_UB4(sec_pred, 16, pred0, pred1, pred2, pred3);
- sec_pred += 64;
- LD_UB4(src_ptr, 16, src0, src1, src2, src3);
- src_ptr += src_stride;
- LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3);
- ref_ptr += ref_stride;
- AVER_UB4_UB(src0, pred0, src1, pred1, src2, pred2, src3, pred3, src0, src1,
- src2, src3);
- CALC_MSE_AVG_B(src0, ref0, var, avg0);
- CALC_MSE_AVG_B(src2, ref2, var, avg0);
- CALC_MSE_AVG_B(src1, ref1, var, avg1);
- CALC_MSE_AVG_B(src3, ref3, var, avg1);
-
- LD_UB4(sec_pred, 16, pred0, pred1, pred2, pred3);
- sec_pred += 64;
- LD_UB4(src_ptr, 16, src0, src1, src2, src3);
- src_ptr += src_stride;
- LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3);
- ref_ptr += ref_stride;
- AVER_UB4_UB(src0, pred0, src1, pred1, src2, pred2, src3, pred3, src0, src1,
- src2, src3);
- CALC_MSE_AVG_B(src0, ref0, var, avg0);
- CALC_MSE_AVG_B(src2, ref2, var, avg0);
- CALC_MSE_AVG_B(src1, ref1, var, avg1);
- CALC_MSE_AVG_B(src3, ref3, var, avg1);
- }
-
- vec = __msa_hadd_s_w(avg0, avg0);
- vec += __msa_hadd_s_w(avg1, avg1);
-
- *diff = HADD_SW_S32(vec);
-
- return HADD_SW_S32(var);
-}
-
-static uint32_t avg_sse_diff_64x64_msa(const uint8_t *src_ptr,
- int32_t src_stride,
- const uint8_t *ref_ptr,
- int32_t ref_stride,
- const uint8_t *sec_pred, int32_t *diff) {
- int32_t ht_cnt;
- v16u8 src0, src1, src2, src3;
- v16u8 ref0, ref1, ref2, ref3;
- v16u8 pred0, pred1, pred2, pred3;
- v8i16 avg0 = { 0 };
- v8i16 avg1 = { 0 };
- v8i16 avg2 = { 0 };
- v8i16 avg3 = { 0 };
- v4i32 vec, var = { 0 };
-
- for (ht_cnt = 32; ht_cnt--;) {
- LD_UB4(sec_pred, 16, pred0, pred1, pred2, pred3);
- sec_pred += 64;
- LD_UB4(src_ptr, 16, src0, src1, src2, src3);
- src_ptr += src_stride;
- LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3);
- ref_ptr += ref_stride;
- AVER_UB4_UB(src0, pred0, src1, pred1, src2, pred2, src3, pred3, src0, src1,
- src2, src3);
- CALC_MSE_AVG_B(src0, ref0, var, avg0);
- CALC_MSE_AVG_B(src1, ref1, var, avg1);
- CALC_MSE_AVG_B(src2, ref2, var, avg2);
- CALC_MSE_AVG_B(src3, ref3, var, avg3);
-
- LD_UB4(sec_pred, 16, pred0, pred1, pred2, pred3);
- sec_pred += 64;
- LD_UB4(src_ptr, 16, src0, src1, src2, src3);
- src_ptr += src_stride;
- LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3);
- ref_ptr += ref_stride;
- AVER_UB4_UB(src0, pred0, src1, pred1, src2, pred2, src3, pred3, src0, src1,
- src2, src3);
- CALC_MSE_AVG_B(src0, ref0, var, avg0);
- CALC_MSE_AVG_B(src1, ref1, var, avg1);
- CALC_MSE_AVG_B(src2, ref2, var, avg2);
- CALC_MSE_AVG_B(src3, ref3, var, avg3);
- }
-
- vec = __msa_hadd_s_w(avg0, avg0);
- vec += __msa_hadd_s_w(avg1, avg1);
- vec += __msa_hadd_s_w(avg2, avg2);
- vec += __msa_hadd_s_w(avg3, avg3);
- *diff = HADD_SW_S32(vec);
-
- return HADD_SW_S32(var);
-}
-
-static uint32_t sub_pixel_sse_diff_4width_h_msa(
- const uint8_t *src, int32_t src_stride, const uint8_t *dst,
- int32_t dst_stride, const uint8_t *filter, int32_t height, int32_t *diff) {
- int16_t filtval;
- uint32_t loop_cnt;
- uint32_t ref0, ref1, ref2, ref3;
- v16u8 filt0, ref = { 0 };
- v16i8 src0, src1, src2, src3;
- v16i8 mask = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
- v8u16 vec0, vec1, vec2, vec3;
- v8i16 avg = { 0 };
- v4i32 vec, var = { 0 };
-
- filtval = LH(filter);
- filt0 = (v16u8)__msa_fill_h(filtval);
-
- for (loop_cnt = (height >> 2); loop_cnt--;) {
- LD_SB4(src, src_stride, src0, src1, src2, src3);
- src += (4 * src_stride);
- LW4(dst, dst_stride, ref0, ref1, ref2, ref3);
- dst += (4 * dst_stride);
- INSERT_W4_UB(ref0, ref1, ref2, ref3, ref);
- VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
- VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
- DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1,
- vec2, vec3);
- SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS);
- PCKEV_B4_SB(vec0, vec0, vec1, vec1, vec2, vec2, vec3, vec3, src0, src1,
- src2, src3);
- ILVEV_W2_SB(src0, src1, src2, src3, src0, src2);
- src0 = (v16i8)__msa_ilvev_d((v2i64)src2, (v2i64)src0);
- CALC_MSE_AVG_B(src0, ref, var, avg);
- }
-
- vec = __msa_hadd_s_w(avg, avg);
- *diff = HADD_SW_S32(vec);
-
- return HADD_SW_S32(var);
-}
-
-static uint32_t sub_pixel_sse_diff_8width_h_msa(
- const uint8_t *src, int32_t src_stride, const uint8_t *dst,
- int32_t dst_stride, const uint8_t *filter, int32_t height, int32_t *diff) {
- int16_t filtval;
- uint32_t loop_cnt;
- v16u8 filt0, out, ref0, ref1, ref2, ref3;
- v16i8 src0, src1, src2, src3;
- v16i8 mask = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
- v8u16 vec0, vec1, vec2, vec3;
- v8i16 avg = { 0 };
- v4i32 vec, var = { 0 };
-
- filtval = LH(filter);
- filt0 = (v16u8)__msa_fill_h(filtval);
-
- for (loop_cnt = (height >> 2); loop_cnt--;) {
- LD_SB4(src, src_stride, src0, src1, src2, src3);
- src += (4 * src_stride);
- LD_UB4(dst, dst_stride, ref0, ref1, ref2, ref3);
- dst += (4 * dst_stride);
-
- PCKEV_D2_UB(ref1, ref0, ref3, ref2, ref0, ref1);
- VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
- VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
- DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1,
- vec2, vec3);
- SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS);
- PCKEV_B4_SB(vec0, vec0, vec1, vec1, vec2, vec2, vec3, vec3, src0, src1,
- src2, src3);
- out = (v16u8)__msa_ilvev_d((v2i64)src1, (v2i64)src0);
- CALC_MSE_AVG_B(out, ref0, var, avg);
- out = (v16u8)__msa_ilvev_d((v2i64)src3, (v2i64)src2);
- CALC_MSE_AVG_B(out, ref1, var, avg);
- }
-
- vec = __msa_hadd_s_w(avg, avg);
- *diff = HADD_SW_S32(vec);
-
- return HADD_SW_S32(var);
-}
-
-static uint32_t sub_pixel_sse_diff_16width_h_msa(
- const uint8_t *src, int32_t src_stride, const uint8_t *dst,
- int32_t dst_stride, const uint8_t *filter, int32_t height, int32_t *diff) {
- int16_t filtval;
- uint32_t loop_cnt;
- v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
- v16i8 mask = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
- v16u8 dst0, dst1, dst2, dst3, filt0;
- v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
- v8u16 out0, out1, out2, out3, out4, out5, out6, out7;
- v8i16 avg = { 0 };
- v4i32 vec, var = { 0 };
-
- filtval = LH(filter);
- filt0 = (v16u8)__msa_fill_h(filtval);
-
- for (loop_cnt = (height >> 2); loop_cnt--;) {
- LD_SB4(src, src_stride, src0, src2, src4, src6);
- LD_SB4(src + 8, src_stride, src1, src3, src5, src7);
- src += (4 * src_stride);
- LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
- dst += (4 * dst_stride);
-
- VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
- VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
- VSHF_B2_UH(src4, src4, src5, src5, mask, mask, vec4, vec5);
- VSHF_B2_UH(src6, src6, src7, src7, mask, mask, vec6, vec7);
- DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, out0, out1,
- out2, out3);
- DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, out4, out5,
- out6, out7);
- SRARI_H4_UH(out0, out1, out2, out3, FILTER_BITS);
- SRARI_H4_UH(out4, out5, out6, out7, FILTER_BITS);
- PCKEV_B4_SB(out1, out0, out3, out2, out5, out4, out7, out6, src0, src1,
- src2, src3);
- CALC_MSE_AVG_B(src0, dst0, var, avg);
- CALC_MSE_AVG_B(src1, dst1, var, avg);
- CALC_MSE_AVG_B(src2, dst2, var, avg);
- CALC_MSE_AVG_B(src3, dst3, var, avg);
- }
-
- vec = __msa_hadd_s_w(avg, avg);
- *diff = HADD_SW_S32(vec);
-
- return HADD_SW_S32(var);
-}
-
-static uint32_t sub_pixel_sse_diff_32width_h_msa(
- const uint8_t *src, int32_t src_stride, const uint8_t *dst,
- int32_t dst_stride, const uint8_t *filter, int32_t height, int32_t *diff) {
- uint32_t loop_cnt, sse = 0;
- int32_t diff0[2];
-
- for (loop_cnt = 0; loop_cnt < 2; ++loop_cnt) {
- sse += sub_pixel_sse_diff_16width_h_msa(src, src_stride, dst, dst_stride,
- filter, height, &diff0[loop_cnt]);
- src += 16;
- dst += 16;
- }
-
- *diff = diff0[0] + diff0[1];
-
- return sse;
-}
-
-static uint32_t sub_pixel_sse_diff_64width_h_msa(
- const uint8_t *src, int32_t src_stride, const uint8_t *dst,
- int32_t dst_stride, const uint8_t *filter, int32_t height, int32_t *diff) {
- uint32_t loop_cnt, sse = 0;
- int32_t diff0[4];
-
- for (loop_cnt = 0; loop_cnt < 4; ++loop_cnt) {
- sse += sub_pixel_sse_diff_16width_h_msa(src, src_stride, dst, dst_stride,
- filter, height, &diff0[loop_cnt]);
- src += 16;
- dst += 16;
- }
-
- *diff = diff0[0] + diff0[1] + diff0[2] + diff0[3];
-
- return sse;
-}
-
-static uint32_t sub_pixel_sse_diff_4width_v_msa(
- const uint8_t *src, int32_t src_stride, const uint8_t *dst,
- int32_t dst_stride, const uint8_t *filter, int32_t height, int32_t *diff) {
- int16_t filtval;
- uint32_t loop_cnt;
- uint32_t ref0, ref1, ref2, ref3;
- v16u8 src0, src1, src2, src3, src4, out;
- v16u8 src10_r, src32_r, src21_r, src43_r;
- v16u8 ref = { 0 };
- v16u8 src2110, src4332;
- v16u8 filt0;
- v8i16 avg = { 0 };
- v4i32 vec, var = { 0 };
- v8u16 tmp0, tmp1;
-
- filtval = LH(filter);
- filt0 = (v16u8)__msa_fill_h(filtval);
-
- src0 = LD_UB(src);
- src += src_stride;
-
- for (loop_cnt = (height >> 2); loop_cnt--;) {
- LD_UB4(src, src_stride, src1, src2, src3, src4);
- src += (4 * src_stride);
- LW4(dst, dst_stride, ref0, ref1, ref2, ref3);
- dst += (4 * dst_stride);
-
- INSERT_W4_UB(ref0, ref1, ref2, ref3, ref);
- ILVR_B4_UB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r,
- src32_r, src43_r);
- ILVR_D2_UB(src21_r, src10_r, src43_r, src32_r, src2110, src4332);
- DOTP_UB2_UH(src2110, src4332, filt0, filt0, tmp0, tmp1);
- SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
- out = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
- CALC_MSE_AVG_B(out, ref, var, avg);
- src0 = src4;
- }
-
- vec = __msa_hadd_s_w(avg, avg);
- *diff = HADD_SW_S32(vec);
-
- return HADD_SW_S32(var);
-}
-
-static uint32_t sub_pixel_sse_diff_8width_v_msa(
- const uint8_t *src, int32_t src_stride, const uint8_t *dst,
- int32_t dst_stride, const uint8_t *filter, int32_t height, int32_t *diff) {
- int16_t filtval;
- uint32_t loop_cnt;
- v16u8 src0, src1, src2, src3, src4;
- v16u8 ref0, ref1, ref2, ref3;
- v8u16 vec0, vec1, vec2, vec3;
- v8u16 tmp0, tmp1, tmp2, tmp3;
- v16u8 filt0;
- v8i16 avg = { 0 };
- v4i32 vec, var = { 0 };
-
- filtval = LH(filter);
- filt0 = (v16u8)__msa_fill_h(filtval);
-
- src0 = LD_UB(src);
- src += src_stride;
-
- for (loop_cnt = (height >> 2); loop_cnt--;) {
- LD_UB4(src, src_stride, src1, src2, src3, src4);
- src += (4 * src_stride);
- LD_UB4(dst, dst_stride, ref0, ref1, ref2, ref3);
- dst += (4 * dst_stride);
-
- PCKEV_D2_UB(ref1, ref0, ref3, ref2, ref0, ref1);
- ILVR_B4_UH(src1, src0, src2, src1, src3, src2, src4, src3, vec0, vec1, vec2,
- vec3);
- DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, tmp0, tmp1,
- tmp2, tmp3);
- SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS);
- PCKEV_B2_UB(tmp1, tmp0, tmp3, tmp2, src0, src1);
- CALC_MSE_AVG_B(src0, ref0, var, avg);
- CALC_MSE_AVG_B(src1, ref1, var, avg);
- src0 = src4;
- }
-
- vec = __msa_hadd_s_w(avg, avg);
- *diff = HADD_SW_S32(vec);
-
- return HADD_SW_S32(var);
-}
-
-static uint32_t sub_pixel_sse_diff_16width_v_msa(
- const uint8_t *src, int32_t src_stride, const uint8_t *dst,
- int32_t dst_stride, const uint8_t *filter, int32_t height, int32_t *diff) {
- int16_t filtval;
- uint32_t loop_cnt;
- v16u8 ref0, ref1, ref2, ref3;
- v16u8 src0, src1, src2, src3, src4;
- v16u8 out0, out1, out2, out3;
- v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
- v8u16 tmp0, tmp1, tmp2, tmp3;
- v16u8 filt0;
- v8i16 avg = { 0 };
- v4i32 vec, var = { 0 };
-
- filtval = LH(filter);
- filt0 = (v16u8)__msa_fill_h(filtval);
-
- src0 = LD_UB(src);
- src += src_stride;
-
- for (loop_cnt = (height >> 2); loop_cnt--;) {
- LD_UB4(src, src_stride, src1, src2, src3, src4);
- src += (4 * src_stride);
- LD_UB4(dst, dst_stride, ref0, ref1, ref2, ref3);
- dst += (4 * dst_stride);
-
- ILVR_B2_UB(src1, src0, src2, src1, vec0, vec2);
- ILVL_B2_UB(src1, src0, src2, src1, vec1, vec3);
- DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
- SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
- out0 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
-
- ILVR_B2_UB(src3, src2, src4, src3, vec4, vec6);
- ILVL_B2_UB(src3, src2, src4, src3, vec5, vec7);
- DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
- SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
- out1 = (v16u8)__msa_pckev_b((v16i8)tmp3, (v16i8)tmp2);
-
- DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp0, tmp1);
- SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
- out2 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
- DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp2, tmp3);
- SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
- out3 = (v16u8)__msa_pckev_b((v16i8)tmp3, (v16i8)tmp2);
-
- src0 = src4;
-
- CALC_MSE_AVG_B(out0, ref0, var, avg);
- CALC_MSE_AVG_B(out1, ref1, var, avg);
- CALC_MSE_AVG_B(out2, ref2, var, avg);
- CALC_MSE_AVG_B(out3, ref3, var, avg);
- }
-
- vec = __msa_hadd_s_w(avg, avg);
- *diff = HADD_SW_S32(vec);
-
- return HADD_SW_S32(var);
-}
-
-static uint32_t sub_pixel_sse_diff_32width_v_msa(
- const uint8_t *src, int32_t src_stride, const uint8_t *dst,
- int32_t dst_stride, const uint8_t *filter, int32_t height, int32_t *diff) {
- uint32_t loop_cnt, sse = 0;
- int32_t diff0[2];
-
- for (loop_cnt = 0; loop_cnt < 2; ++loop_cnt) {
- sse += sub_pixel_sse_diff_16width_v_msa(src, src_stride, dst, dst_stride,
- filter, height, &diff0[loop_cnt]);
- src += 16;
- dst += 16;
- }
-
- *diff = diff0[0] + diff0[1];
-
- return sse;
-}
-
-static uint32_t sub_pixel_sse_diff_64width_v_msa(
- const uint8_t *src, int32_t src_stride, const uint8_t *dst,
- int32_t dst_stride, const uint8_t *filter, int32_t height, int32_t *diff) {
- uint32_t loop_cnt, sse = 0;
- int32_t diff0[4];
-
- for (loop_cnt = 0; loop_cnt < 4; ++loop_cnt) {
- sse += sub_pixel_sse_diff_16width_v_msa(src, src_stride, dst, dst_stride,
- filter, height, &diff0[loop_cnt]);
- src += 16;
- dst += 16;
- }
-
- *diff = diff0[0] + diff0[1] + diff0[2] + diff0[3];
-
- return sse;
-}
-
-static uint32_t sub_pixel_sse_diff_4width_hv_msa(
- const uint8_t *src, int32_t src_stride, const uint8_t *dst,
- int32_t dst_stride, const uint8_t *filter_horiz, const uint8_t *filter_vert,
- int32_t height, int32_t *diff) {
- int16_t filtval;
- uint32_t loop_cnt;
- uint32_t ref0, ref1, ref2, ref3;
- v16u8 src0, src1, src2, src3, src4;
- v16u8 out, ref = { 0 };
- v16u8 filt_vt, filt_hz, vec0, vec1;
- v16u8 mask = { 0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20 };
- v8u16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4;
- v8u16 tmp0, tmp1;
- v8i16 avg = { 0 };
- v4i32 vec, var = { 0 };
-
- filtval = LH(filter_horiz);
- filt_hz = (v16u8)__msa_fill_h(filtval);
- filtval = LH(filter_vert);
- filt_vt = (v16u8)__msa_fill_h(filtval);
-
- src0 = LD_UB(src);
- src += src_stride;
-
- for (loop_cnt = (height >> 2); loop_cnt--;) {
- LD_UB4(src, src_stride, src1, src2, src3, src4);
- src += (4 * src_stride);
- LW4(dst, dst_stride, ref0, ref1, ref2, ref3);
- dst += (4 * dst_stride);
- INSERT_W4_UB(ref0, ref1, ref2, ref3, ref);
- hz_out0 = HORIZ_2TAP_FILT_UH(src0, src1, mask, filt_hz, FILTER_BITS);
- hz_out2 = HORIZ_2TAP_FILT_UH(src2, src3, mask, filt_hz, FILTER_BITS);
- hz_out4 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS);
- hz_out1 = (v8u16)__msa_sldi_b((v16i8)hz_out2, (v16i8)hz_out0, 8);
- hz_out3 = (v8u16)__msa_pckod_d((v2i64)hz_out4, (v2i64)hz_out2);
- ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
- DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1);
- SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
- out = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
- CALC_MSE_AVG_B(out, ref, var, avg);
- src0 = src4;
- }
-
- vec = __msa_hadd_s_w(avg, avg);
- *diff = HADD_SW_S32(vec);
-
- return HADD_SW_S32(var);
-}
-
-static uint32_t sub_pixel_sse_diff_8width_hv_msa(
- const uint8_t *src, int32_t src_stride, const uint8_t *dst,
- int32_t dst_stride, const uint8_t *filter_horiz, const uint8_t *filter_vert,
- int32_t height, int32_t *diff) {
- int16_t filtval;
- uint32_t loop_cnt;
- v16u8 ref0, ref1, ref2, ref3;
- v16u8 src0, src1, src2, src3, src4;
- v16u8 out0, out1;
- v16u8 mask = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
- v8u16 hz_out0, hz_out1;
- v8u16 tmp0, tmp1, tmp2, tmp3;
- v16u8 filt_vt, filt_hz, vec0;
- v8i16 avg = { 0 };
- v4i32 vec, var = { 0 };
-
- filtval = LH(filter_horiz);
- filt_hz = (v16u8)__msa_fill_h(filtval);
- filtval = LH(filter_vert);
- filt_vt = (v16u8)__msa_fill_h(filtval);
-
- src0 = LD_UB(src);
- src += src_stride;
- hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, FILTER_BITS);
-
- for (loop_cnt = (height >> 2); loop_cnt--;) {
- LD_UB4(src, src_stride, src1, src2, src3, src4);
- src += (4 * src_stride);
- LD_UB4(dst, dst_stride, ref0, ref1, ref2, ref3);
- dst += (4 * dst_stride);
-
- PCKEV_D2_UB(ref1, ref0, ref3, ref2, ref0, ref1);
- hz_out1 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS);
- vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
- tmp0 = __msa_dotp_u_h(vec0, filt_vt);
- hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, FILTER_BITS);
- vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
- tmp1 = __msa_dotp_u_h(vec0, filt_vt);
- SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
- hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, FILTER_BITS);
- vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
- tmp2 = __msa_dotp_u_h(vec0, filt_vt);
- hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS);
- vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
- tmp3 = __msa_dotp_u_h(vec0, filt_vt);
- SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
- PCKEV_B2_UB(tmp1, tmp0, tmp3, tmp2, out0, out1);
- CALC_MSE_AVG_B(out0, ref0, var, avg);
- CALC_MSE_AVG_B(out1, ref1, var, avg);
- }
-
- vec = __msa_hadd_s_w(avg, avg);
- *diff = HADD_SW_S32(vec);
-
- return HADD_SW_S32(var);
-}
-
-static uint32_t sub_pixel_sse_diff_16width_hv_msa(
- const uint8_t *src, int32_t src_stride, const uint8_t *dst,
- int32_t dst_stride, const uint8_t *filter_horiz, const uint8_t *filter_vert,
- int32_t height, int32_t *diff) {
- int16_t filtval;
- uint32_t loop_cnt;
- v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
- v16u8 ref0, ref1, ref2, ref3;
- v16u8 filt_hz, filt_vt, vec0, vec1;
- v16u8 mask = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
- v8u16 hz_out0, hz_out1, hz_out2, hz_out3;
- v8u16 tmp0, tmp1;
- v8i16 avg = { 0 };
- v4i32 vec, var = { 0 };
-
- filtval = LH(filter_horiz);
- filt_hz = (v16u8)__msa_fill_h(filtval);
- filtval = LH(filter_vert);
- filt_vt = (v16u8)__msa_fill_h(filtval);
-
- LD_UB2(src, 8, src0, src1);
- src += src_stride;
-
- hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, FILTER_BITS);
- hz_out2 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS);
-
- for (loop_cnt = (height >> 2); loop_cnt--;) {
- LD_UB4(src, src_stride, src0, src2, src4, src6);
- LD_UB4(src + 8, src_stride, src1, src3, src5, src7);
- src += (4 * src_stride);
- LD_UB4(dst, dst_stride, ref0, ref1, ref2, ref3);
- dst += (4 * dst_stride);
-
- hz_out1 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, FILTER_BITS);
- hz_out3 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS);
- ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
- DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1);
- SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
- src0 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
-
- hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, FILTER_BITS);
- hz_out2 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, FILTER_BITS);
- ILVEV_B2_UB(hz_out1, hz_out0, hz_out3, hz_out2, vec0, vec1);
- DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1);
- SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
- src1 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
-
- hz_out1 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS);
- hz_out3 = HORIZ_2TAP_FILT_UH(src5, src5, mask, filt_hz, FILTER_BITS);
- ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
- DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1);
- SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
- src2 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
-
- hz_out0 = HORIZ_2TAP_FILT_UH(src6, src6, mask, filt_hz, FILTER_BITS);
- hz_out2 = HORIZ_2TAP_FILT_UH(src7, src7, mask, filt_hz, FILTER_BITS);
- ILVEV_B2_UB(hz_out1, hz_out0, hz_out3, hz_out2, vec0, vec1);
- DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1);
- SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
- src3 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
-
- CALC_MSE_AVG_B(src0, ref0, var, avg);
- CALC_MSE_AVG_B(src1, ref1, var, avg);
- CALC_MSE_AVG_B(src2, ref2, var, avg);
- CALC_MSE_AVG_B(src3, ref3, var, avg);
- }
-
- vec = __msa_hadd_s_w(avg, avg);
- *diff = HADD_SW_S32(vec);
-
- return HADD_SW_S32(var);
-}
-
-static uint32_t sub_pixel_sse_diff_32width_hv_msa(
- const uint8_t *src, int32_t src_stride, const uint8_t *dst,
- int32_t dst_stride, const uint8_t *filter_horiz, const uint8_t *filter_vert,
- int32_t height, int32_t *diff) {
- uint32_t loop_cnt, sse = 0;
- int32_t diff0[2];
-
- for (loop_cnt = 0; loop_cnt < 2; ++loop_cnt) {
- sse += sub_pixel_sse_diff_16width_hv_msa(src, src_stride, dst, dst_stride,
- filter_horiz, filter_vert, height,
- &diff0[loop_cnt]);
- src += 16;
- dst += 16;
- }
-
- *diff = diff0[0] + diff0[1];
-
- return sse;
-}
-
-static uint32_t sub_pixel_sse_diff_64width_hv_msa(
- const uint8_t *src, int32_t src_stride, const uint8_t *dst,
- int32_t dst_stride, const uint8_t *filter_horiz, const uint8_t *filter_vert,
- int32_t height, int32_t *diff) {
- uint32_t loop_cnt, sse = 0;
- int32_t diff0[4];
-
- for (loop_cnt = 0; loop_cnt < 4; ++loop_cnt) {
- sse += sub_pixel_sse_diff_16width_hv_msa(src, src_stride, dst, dst_stride,
- filter_horiz, filter_vert, height,
- &diff0[loop_cnt]);
- src += 16;
- dst += 16;
- }
-
- *diff = diff0[0] + diff0[1] + diff0[2] + diff0[3];
-
- return sse;
-}
-
-static uint32_t sub_pixel_avg_sse_diff_4width_h_msa(
- const uint8_t *src, int32_t src_stride, const uint8_t *dst,
- int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter,
- int32_t height, int32_t *diff) {
- int16_t filtval;
- uint32_t loop_cnt;
- uint32_t ref0, ref1, ref2, ref3;
- v16u8 out, pred, filt0, ref = { 0 };
- v16i8 src0, src1, src2, src3;
- v16i8 mask = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
- v8u16 vec0, vec1, vec2, vec3;
- v8i16 avg = { 0 };
- v4i32 vec, var = { 0 };
-
- filtval = LH(filter);
- filt0 = (v16u8)__msa_fill_h(filtval);
-
- for (loop_cnt = (height >> 2); loop_cnt--;) {
- LD_SB4(src, src_stride, src0, src1, src2, src3);
- src += (4 * src_stride);
- pred = LD_UB(sec_pred);
- sec_pred += 16;
- LW4(dst, dst_stride, ref0, ref1, ref2, ref3);
- dst += (4 * dst_stride);
-
- INSERT_W4_UB(ref0, ref1, ref2, ref3, ref);
- VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
- VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
- DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1,
- vec2, vec3);
- SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS);
- PCKEV_B4_SB(vec0, vec0, vec1, vec1, vec2, vec2, vec3, vec3, src0, src1,
- src2, src3);
- ILVEV_W2_SB(src0, src1, src2, src3, src0, src2);
- out = (v16u8)__msa_ilvev_d((v2i64)src2, (v2i64)src0);
- out = __msa_aver_u_b(out, pred);
- CALC_MSE_AVG_B(out, ref, var, avg);
- }
-
- vec = __msa_hadd_s_w(avg, avg);
- *diff = HADD_SW_S32(vec);
-
- return HADD_SW_S32(var);
-}
-
-static uint32_t sub_pixel_avg_sse_diff_8width_h_msa(
- const uint8_t *src, int32_t src_stride, const uint8_t *dst,
- int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter,
- int32_t height, int32_t *diff) {
- int16_t filtval;
- uint32_t loop_cnt;
- v16u8 out, pred, filt0;
- v16u8 ref0, ref1, ref2, ref3;
- v16i8 src0, src1, src2, src3;
- v16i8 mask = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
- v8u16 vec0, vec1, vec2, vec3;
- v8i16 avg = { 0 };
- v4i32 vec, var = { 0 };
-
- filtval = LH(filter);
- filt0 = (v16u8)__msa_fill_h(filtval);
-
- for (loop_cnt = (height >> 2); loop_cnt--;) {
- LD_SB4(src, src_stride, src0, src1, src2, src3);
- src += (4 * src_stride);
- LD_UB4(dst, dst_stride, ref0, ref1, ref2, ref3);
- dst += (4 * dst_stride);
-
- PCKEV_D2_UB(ref1, ref0, ref3, ref2, ref0, ref1);
- VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
- VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
- DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1,
- vec2, vec3);
- SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS);
- PCKEV_B4_SB(vec0, vec0, vec1, vec1, vec2, vec2, vec3, vec3, src0, src1,
- src2, src3);
- out = (v16u8)__msa_ilvev_d((v2i64)src1, (v2i64)src0);
-
- pred = LD_UB(sec_pred);
- sec_pred += 16;
- out = __msa_aver_u_b(out, pred);
- CALC_MSE_AVG_B(out, ref0, var, avg);
- out = (v16u8)__msa_ilvev_d((v2i64)src3, (v2i64)src2);
- pred = LD_UB(sec_pred);
- sec_pred += 16;
- out = __msa_aver_u_b(out, pred);
- CALC_MSE_AVG_B(out, ref1, var, avg);
- }
-
- vec = __msa_hadd_s_w(avg, avg);
- *diff = HADD_SW_S32(vec);
-
- return HADD_SW_S32(var);
-}
-
-static uint32_t subpel_avg_ssediff_16w_h_msa(
- const uint8_t *src, int32_t src_stride, const uint8_t *dst,
- int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter,
- int32_t height, int32_t *diff, int32_t width) {
- int16_t filtval;
- uint32_t loop_cnt;
- v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
- v16i8 mask = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
- v16u8 dst0, dst1, dst2, dst3;
- v16u8 tmp0, tmp1, tmp2, tmp3;
- v16u8 pred0, pred1, pred2, pred3, filt0;
- v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
- v8u16 out0, out1, out2, out3, out4, out5, out6, out7;
- v8i16 avg = { 0 };
- v4i32 vec, var = { 0 };
-
- filtval = LH(filter);
- filt0 = (v16u8)__msa_fill_h(filtval);
-
- for (loop_cnt = (height >> 2); loop_cnt--;) {
- LD_SB4(src, src_stride, src0, src2, src4, src6);
- LD_SB4(src + 8, src_stride, src1, src3, src5, src7);
- src += (4 * src_stride);
- LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
- dst += (4 * dst_stride);
- LD_UB4(sec_pred, width, pred0, pred1, pred2, pred3);
- sec_pred += (4 * width);
-
- VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
- VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
- VSHF_B2_UH(src4, src4, src5, src5, mask, mask, vec4, vec5);
- VSHF_B2_UH(src6, src6, src7, src7, mask, mask, vec6, vec7);
- DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, out0, out1,
- out2, out3);
- DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, out4, out5,
- out6, out7);
- SRARI_H4_UH(out0, out1, out2, out3, FILTER_BITS);
- SRARI_H4_UH(out4, out5, out6, out7, FILTER_BITS);
- PCKEV_B4_UB(out1, out0, out3, out2, out5, out4, out7, out6, tmp0, tmp1,
- tmp2, tmp3);
- AVER_UB4_UB(tmp0, pred0, tmp1, pred1, tmp2, pred2, tmp3, pred3, tmp0, tmp1,
- tmp2, tmp3);
-
- CALC_MSE_AVG_B(tmp0, dst0, var, avg);
- CALC_MSE_AVG_B(tmp1, dst1, var, avg);
- CALC_MSE_AVG_B(tmp2, dst2, var, avg);
- CALC_MSE_AVG_B(tmp3, dst3, var, avg);
- }
-
- vec = __msa_hadd_s_w(avg, avg);
- *diff = HADD_SW_S32(vec);
-
- return HADD_SW_S32(var);
-}
-
-static uint32_t sub_pixel_avg_sse_diff_16width_h_msa(
- const uint8_t *src, int32_t src_stride, const uint8_t *dst,
- int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter,
- int32_t height, int32_t *diff) {
- return subpel_avg_ssediff_16w_h_msa(src, src_stride, dst, dst_stride,
- sec_pred, filter, height, diff, 16);
-}
-
-static uint32_t sub_pixel_avg_sse_diff_32width_h_msa(
- const uint8_t *src, int32_t src_stride, const uint8_t *dst,
- int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter,
- int32_t height, int32_t *diff) {
- uint32_t loop_cnt, sse = 0;
- int32_t diff0[2];
-
- for (loop_cnt = 0; loop_cnt < 2; ++loop_cnt) {
- sse +=
- subpel_avg_ssediff_16w_h_msa(src, src_stride, dst, dst_stride, sec_pred,
- filter, height, &diff0[loop_cnt], 32);
- src += 16;
- dst += 16;
- sec_pred += 16;
- }
-
- *diff = diff0[0] + diff0[1];
-
- return sse;
-}
-
-static uint32_t sub_pixel_avg_sse_diff_64width_h_msa(
- const uint8_t *src, int32_t src_stride, const uint8_t *dst,
- int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter,
- int32_t height, int32_t *diff) {
- uint32_t loop_cnt, sse = 0;
- int32_t diff0[4];
-
- for (loop_cnt = 0; loop_cnt < 4; ++loop_cnt) {
- sse +=
- subpel_avg_ssediff_16w_h_msa(src, src_stride, dst, dst_stride, sec_pred,
- filter, height, &diff0[loop_cnt], 64);
- src += 16;
- dst += 16;
- sec_pred += 16;
- }
-
- *diff = diff0[0] + diff0[1] + diff0[2] + diff0[3];
-
- return sse;
-}
-
-static uint32_t sub_pixel_avg_sse_diff_4width_v_msa(
- const uint8_t *src, int32_t src_stride, const uint8_t *dst,
- int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter,
- int32_t height, int32_t *diff) {
- int16_t filtval;
- uint32_t loop_cnt;
- uint32_t ref0, ref1, ref2, ref3;
- v16u8 src0, src1, src2, src3, src4;
- v16u8 src10_r, src32_r, src21_r, src43_r;
- v16u8 out, pred, ref = { 0 };
- v16u8 src2110, src4332, filt0;
- v8i16 avg = { 0 };
- v4i32 vec, var = { 0 };
- v8u16 tmp0, tmp1;
-
- filtval = LH(filter);
- filt0 = (v16u8)__msa_fill_h(filtval);
-
- src0 = LD_UB(src);
- src += src_stride;
-
- for (loop_cnt = (height >> 2); loop_cnt--;) {
- LD_UB4(src, src_stride, src1, src2, src3, src4);
- src += (4 * src_stride);
- pred = LD_UB(sec_pred);
- sec_pred += 16;
- LW4(dst, dst_stride, ref0, ref1, ref2, ref3);
- dst += (4 * dst_stride);
-
- INSERT_W4_UB(ref0, ref1, ref2, ref3, ref);
- ILVR_B4_UB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r,
- src32_r, src43_r);
- ILVR_D2_UB(src21_r, src10_r, src43_r, src32_r, src2110, src4332);
- DOTP_UB2_UH(src2110, src4332, filt0, filt0, tmp0, tmp1);
- SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
-
- out = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
- out = __msa_aver_u_b(out, pred);
- CALC_MSE_AVG_B(out, ref, var, avg);
- src0 = src4;
- }
-
- vec = __msa_hadd_s_w(avg, avg);
- *diff = HADD_SW_S32(vec);
-
- return HADD_SW_S32(var);
-}
-
-static uint32_t sub_pixel_avg_sse_diff_8width_v_msa(
- const uint8_t *src, int32_t src_stride, const uint8_t *dst,
- int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter,
- int32_t height, int32_t *diff) {
- int16_t filtval;
- uint32_t loop_cnt;
- v16u8 src0, src1, src2, src3, src4;
- v16u8 ref0, ref1, ref2, ref3;
- v16u8 pred0, pred1, filt0;
- v8u16 vec0, vec1, vec2, vec3;
- v8u16 tmp0, tmp1, tmp2, tmp3;
- v8i16 avg = { 0 };
- v4i32 vec, var = { 0 };
-
- filtval = LH(filter);
- filt0 = (v16u8)__msa_fill_h(filtval);
-
- src0 = LD_UB(src);
- src += src_stride;
-
- for (loop_cnt = (height >> 2); loop_cnt--;) {
- LD_UB4(src, src_stride, src1, src2, src3, src4);
- src += (4 * src_stride);
- LD_UB2(sec_pred, 16, pred0, pred1);
- sec_pred += 32;
- LD_UB4(dst, dst_stride, ref0, ref1, ref2, ref3);
- dst += (4 * dst_stride);
- PCKEV_D2_UB(ref1, ref0, ref3, ref2, ref0, ref1);
- ILVR_B4_UH(src1, src0, src2, src1, src3, src2, src4, src3, vec0, vec1, vec2,
- vec3);
- DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, tmp0, tmp1,
- tmp2, tmp3);
- SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS);
- PCKEV_B2_UB(tmp1, tmp0, tmp3, tmp2, src0, src1);
- AVER_UB2_UB(src0, pred0, src1, pred1, src0, src1);
- CALC_MSE_AVG_B(src0, ref0, var, avg);
- CALC_MSE_AVG_B(src1, ref1, var, avg);
-
- src0 = src4;
- }
-
- vec = __msa_hadd_s_w(avg, avg);
- *diff = HADD_SW_S32(vec);
-
- return HADD_SW_S32(var);
-}
-
-static uint32_t subpel_avg_ssediff_16w_v_msa(
- const uint8_t *src, int32_t src_stride, const uint8_t *dst,
- int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter,
- int32_t height, int32_t *diff, int32_t width) {
- int16_t filtval;
- uint32_t loop_cnt;
- v16u8 ref0, ref1, ref2, ref3;
- v16u8 pred0, pred1, pred2, pred3;
- v16u8 src0, src1, src2, src3, src4;
- v16u8 out0, out1, out2, out3, filt0;
- v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
- v8u16 tmp0, tmp1, tmp2, tmp3;
- v8i16 avg = { 0 };
- v4i32 vec, var = { 0 };
-
- filtval = LH(filter);
- filt0 = (v16u8)__msa_fill_h(filtval);
-
- src0 = LD_UB(src);
- src += src_stride;
-
- for (loop_cnt = (height >> 2); loop_cnt--;) {
- LD_UB4(src, src_stride, src1, src2, src3, src4);
- src += (4 * src_stride);
- LD_UB4(sec_pred, width, pred0, pred1, pred2, pred3);
- sec_pred += (4 * width);
-
- ILVR_B2_UH(src1, src0, src2, src1, vec0, vec2);
- ILVL_B2_UH(src1, src0, src2, src1, vec1, vec3);
- DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
- SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
- out0 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
-
- ILVR_B2_UH(src3, src2, src4, src3, vec4, vec6);
- ILVL_B2_UH(src3, src2, src4, src3, vec5, vec7);
- DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
- SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
- out1 = (v16u8)__msa_pckev_b((v16i8)tmp3, (v16i8)tmp2);
-
- DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp0, tmp1);
- SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
- out2 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
-
- DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp2, tmp3);
- SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
- out3 = (v16u8)__msa_pckev_b((v16i8)tmp3, (v16i8)tmp2);
-
- src0 = src4;
- LD_UB4(dst, dst_stride, ref0, ref1, ref2, ref3);
- dst += (4 * dst_stride);
-
- AVER_UB4_UB(out0, pred0, out1, pred1, out2, pred2, out3, pred3, out0, out1,
- out2, out3);
-
- CALC_MSE_AVG_B(out0, ref0, var, avg);
- CALC_MSE_AVG_B(out1, ref1, var, avg);
- CALC_MSE_AVG_B(out2, ref2, var, avg);
- CALC_MSE_AVG_B(out3, ref3, var, avg);
- }
-
- vec = __msa_hadd_s_w(avg, avg);
- *diff = HADD_SW_S32(vec);
-
- return HADD_SW_S32(var);
-}
-
-static uint32_t sub_pixel_avg_sse_diff_16width_v_msa(
- const uint8_t *src, int32_t src_stride, const uint8_t *dst,
- int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter,
- int32_t height, int32_t *diff) {
- return subpel_avg_ssediff_16w_v_msa(src, src_stride, dst, dst_stride,
- sec_pred, filter, height, diff, 16);
-}
-
-static uint32_t sub_pixel_avg_sse_diff_32width_v_msa(
- const uint8_t *src, int32_t src_stride, const uint8_t *dst,
- int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter,
- int32_t height, int32_t *diff) {
- uint32_t loop_cnt, sse = 0;
- int32_t diff0[2];
-
- for (loop_cnt = 0; loop_cnt < 2; ++loop_cnt) {
- sse +=
- subpel_avg_ssediff_16w_v_msa(src, src_stride, dst, dst_stride, sec_pred,
- filter, height, &diff0[loop_cnt], 32);
- src += 16;
- dst += 16;
- sec_pred += 16;
- }
-
- *diff = diff0[0] + diff0[1];
-
- return sse;
-}
-
-static uint32_t sub_pixel_avg_sse_diff_64width_v_msa(
- const uint8_t *src, int32_t src_stride, const uint8_t *dst,
- int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter,
- int32_t height, int32_t *diff) {
- uint32_t loop_cnt, sse = 0;
- int32_t diff0[4];
-
- for (loop_cnt = 0; loop_cnt < 4; ++loop_cnt) {
- sse +=
- subpel_avg_ssediff_16w_v_msa(src, src_stride, dst, dst_stride, sec_pred,
- filter, height, &diff0[loop_cnt], 64);
- src += 16;
- dst += 16;
- sec_pred += 16;
- }
-
- *diff = diff0[0] + diff0[1] + diff0[2] + diff0[3];
-
- return sse;
-}
-
-static uint32_t sub_pixel_avg_sse_diff_4width_hv_msa(
- const uint8_t *src, int32_t src_stride, const uint8_t *dst,
- int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter_horiz,
- const uint8_t *filter_vert, int32_t height, int32_t *diff) {
- int16_t filtval;
- uint32_t loop_cnt;
- uint32_t ref0, ref1, ref2, ref3;
- v16u8 src0, src1, src2, src3, src4;
- v16u8 mask = { 0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20 };
- v16u8 filt_hz, filt_vt, vec0, vec1;
- v16u8 out, pred, ref = { 0 };
- v8u16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, tmp0, tmp1;
- v8i16 avg = { 0 };
- v4i32 vec, var = { 0 };
-
- filtval = LH(filter_horiz);
- filt_hz = (v16u8)__msa_fill_h(filtval);
- filtval = LH(filter_vert);
- filt_vt = (v16u8)__msa_fill_h(filtval);
-
- src0 = LD_UB(src);
- src += src_stride;
-
- for (loop_cnt = (height >> 2); loop_cnt--;) {
- LD_UB4(src, src_stride, src1, src2, src3, src4);
- src += (4 * src_stride);
- pred = LD_UB(sec_pred);
- sec_pred += 16;
- LW4(dst, dst_stride, ref0, ref1, ref2, ref3);
- dst += (4 * dst_stride);
- INSERT_W4_UB(ref0, ref1, ref2, ref3, ref);
- hz_out0 = HORIZ_2TAP_FILT_UH(src0, src1, mask, filt_hz, FILTER_BITS);
- hz_out2 = HORIZ_2TAP_FILT_UH(src2, src3, mask, filt_hz, FILTER_BITS);
- hz_out4 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS);
- hz_out1 = (v8u16)__msa_sldi_b((v16i8)hz_out2, (v16i8)hz_out0, 8);
- hz_out3 = (v8u16)__msa_pckod_d((v2i64)hz_out4, (v2i64)hz_out2);
- ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
- DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1);
- SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
- out = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
- out = __msa_aver_u_b(out, pred);
- CALC_MSE_AVG_B(out, ref, var, avg);
- src0 = src4;
- }
-
- vec = __msa_hadd_s_w(avg, avg);
- *diff = HADD_SW_S32(vec);
-
- return HADD_SW_S32(var);
-}
-
-static uint32_t sub_pixel_avg_sse_diff_8width_hv_msa(
- const uint8_t *src, int32_t src_stride, const uint8_t *dst,
- int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter_horiz,
- const uint8_t *filter_vert, int32_t height, int32_t *diff) {
- int16_t filtval;
- uint32_t loop_cnt;
- v16u8 ref0, ref1, ref2, ref3;
- v16u8 src0, src1, src2, src3, src4;
- v16u8 pred0, pred1, out0, out1;
- v16u8 filt_hz, filt_vt, vec0;
- v16u8 mask = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
- v8u16 hz_out0, hz_out1, tmp0, tmp1, tmp2, tmp3;
- v8i16 avg = { 0 };
- v4i32 vec, var = { 0 };
-
- filtval = LH(filter_horiz);
- filt_hz = (v16u8)__msa_fill_h(filtval);
- filtval = LH(filter_vert);
- filt_vt = (v16u8)__msa_fill_h(filtval);
-
- src0 = LD_UB(src);
- src += src_stride;
- hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, FILTER_BITS);
-
- for (loop_cnt = (height >> 2); loop_cnt--;) {
- LD_UB4(src, src_stride, src1, src2, src3, src4);
- src += (4 * src_stride);
- LD_UB2(sec_pred, 16, pred0, pred1);
- sec_pred += 32;
- LD_UB4(dst, dst_stride, ref0, ref1, ref2, ref3);
- dst += (4 * dst_stride);
-
- PCKEV_D2_UB(ref1, ref0, ref3, ref2, ref0, ref1);
- hz_out1 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS);
-
- vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
- tmp0 = __msa_dotp_u_h(vec0, filt_vt);
- hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, FILTER_BITS);
-
- vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
- tmp1 = __msa_dotp_u_h(vec0, filt_vt);
- SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
- hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, FILTER_BITS);
-
- vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
- tmp2 = __msa_dotp_u_h(vec0, filt_vt);
- hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS);
-
- vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
- tmp3 = __msa_dotp_u_h(vec0, filt_vt);
-
- SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
- PCKEV_B2_UB(tmp1, tmp0, tmp3, tmp2, out0, out1);
- AVER_UB2_UB(out0, pred0, out1, pred1, out0, out1);
-
- CALC_MSE_AVG_B(out0, ref0, var, avg);
- CALC_MSE_AVG_B(out1, ref1, var, avg);
- }
-
- vec = __msa_hadd_s_w(avg, avg);
- *diff = HADD_SW_S32(vec);
-
- return HADD_SW_S32(var);
-}
-
-static uint32_t subpel_avg_ssediff_16w_hv_msa(
- const uint8_t *src, int32_t src_stride, const uint8_t *dst,
- int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter_horiz,
- const uint8_t *filter_vert, int32_t height, int32_t *diff, int32_t width) {
- int16_t filtval;
- uint32_t loop_cnt;
- v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
- v16u8 ref0, ref1, ref2, ref3;
- v16u8 pred0, pred1, pred2, pred3;
- v16u8 out0, out1, out2, out3;
- v16u8 filt_hz, filt_vt, vec0, vec1;
- v16u8 mask = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
- v8u16 hz_out0, hz_out1, hz_out2, hz_out3, tmp0, tmp1;
- v8i16 avg = { 0 };
- v4i32 vec, var = { 0 };
-
- filtval = LH(filter_horiz);
- filt_hz = (v16u8)__msa_fill_h(filtval);
- filtval = LH(filter_vert);
- filt_vt = (v16u8)__msa_fill_h(filtval);
-
- LD_UB2(src, 8, src0, src1);
- src += src_stride;
-
- hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, FILTER_BITS);
- hz_out2 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS);
-
- for (loop_cnt = (height >> 2); loop_cnt--;) {
- LD_UB4(src, src_stride, src0, src2, src4, src6);
- LD_UB4(src + 8, src_stride, src1, src3, src5, src7);
- src += (4 * src_stride);
- LD_UB4(sec_pred, width, pred0, pred1, pred2, pred3);
- sec_pred += (4 * width);
-
- hz_out1 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, FILTER_BITS);
- hz_out3 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS);
- ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
- DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1);
- SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
- out0 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
-
- hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, FILTER_BITS);
- hz_out2 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, FILTER_BITS);
- ILVEV_B2_UB(hz_out1, hz_out0, hz_out3, hz_out2, vec0, vec1);
- DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1);
- SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
- out1 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
-
- hz_out1 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS);
- hz_out3 = HORIZ_2TAP_FILT_UH(src5, src5, mask, filt_hz, FILTER_BITS);
- ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
- DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1);
- SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
- out2 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
-
- hz_out0 = HORIZ_2TAP_FILT_UH(src6, src6, mask, filt_hz, FILTER_BITS);
- hz_out2 = HORIZ_2TAP_FILT_UH(src7, src7, mask, filt_hz, FILTER_BITS);
- ILVEV_B2_UB(hz_out1, hz_out0, hz_out3, hz_out2, vec0, vec1);
- DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1);
- SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
- out3 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
-
- LD_UB4(dst, dst_stride, ref0, ref1, ref2, ref3);
- dst += (4 * dst_stride);
-
- AVER_UB4_UB(out0, pred0, out1, pred1, out2, pred2, out3, pred3, out0, out1,
- out2, out3);
-
- CALC_MSE_AVG_B(out0, ref0, var, avg);
- CALC_MSE_AVG_B(out1, ref1, var, avg);
- CALC_MSE_AVG_B(out2, ref2, var, avg);
- CALC_MSE_AVG_B(out3, ref3, var, avg);
- }
-
- vec = __msa_hadd_s_w(avg, avg);
- *diff = HADD_SW_S32(vec);
-
- return HADD_SW_S32(var);
-}
-
-static uint32_t sub_pixel_avg_sse_diff_16width_hv_msa(
- const uint8_t *src, int32_t src_stride, const uint8_t *dst,
- int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter_horiz,
- const uint8_t *filter_vert, int32_t height, int32_t *diff) {
- return subpel_avg_ssediff_16w_hv_msa(src, src_stride, dst, dst_stride,
- sec_pred, filter_horiz, filter_vert,
- height, diff, 16);
-}
-
-static uint32_t sub_pixel_avg_sse_diff_32width_hv_msa(
- const uint8_t *src, int32_t src_stride, const uint8_t *dst,
- int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter_horiz,
- const uint8_t *filter_vert, int32_t height, int32_t *diff) {
- uint32_t loop_cnt, sse = 0;
- int32_t diff0[2];
-
- for (loop_cnt = 0; loop_cnt < 2; ++loop_cnt) {
- sse += subpel_avg_ssediff_16w_hv_msa(src, src_stride, dst, dst_stride,
- sec_pred, filter_horiz, filter_vert,
- height, &diff0[loop_cnt], 32);
- src += 16;
- dst += 16;
- sec_pred += 16;
- }
-
- *diff = diff0[0] + diff0[1];
-
- return sse;
-}
-
-static uint32_t sub_pixel_avg_sse_diff_64width_hv_msa(
- const uint8_t *src, int32_t src_stride, const uint8_t *dst,
- int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter_horiz,
- const uint8_t *filter_vert, int32_t height, int32_t *diff) {
- uint32_t loop_cnt, sse = 0;
- int32_t diff0[4];
-
- for (loop_cnt = 0; loop_cnt < 4; ++loop_cnt) {
- sse += subpel_avg_ssediff_16w_hv_msa(src, src_stride, dst, dst_stride,
- sec_pred, filter_horiz, filter_vert,
- height, &diff0[loop_cnt], 64);
- src += 16;
- dst += 16;
- sec_pred += 16;
- }
-
- *diff = diff0[0] + diff0[1] + diff0[2] + diff0[3];
-
- return sse;
-}
-
-#define VARIANCE_4Wx4H(sse, diff) VARIANCE_WxH(sse, diff, 4);
-#define VARIANCE_4Wx8H(sse, diff) VARIANCE_WxH(sse, diff, 5);
-#define VARIANCE_8Wx4H(sse, diff) VARIANCE_WxH(sse, diff, 5);
-#define VARIANCE_8Wx8H(sse, diff) VARIANCE_WxH(sse, diff, 6);
-#define VARIANCE_8Wx16H(sse, diff) VARIANCE_WxH(sse, diff, 7);
-#define VARIANCE_16Wx8H(sse, diff) VARIANCE_WxH(sse, diff, 7);
-#define VARIANCE_16Wx16H(sse, diff) VARIANCE_WxH(sse, diff, 8);
-
-#define VARIANCE_16Wx32H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 9);
-#define VARIANCE_32Wx16H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 9);
-#define VARIANCE_32Wx32H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 10);
-#define VARIANCE_32Wx64H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 11);
-#define VARIANCE_64Wx32H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 11);
-#define VARIANCE_64Wx64H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 12);
-
-#define AOM_SUB_PIXEL_VARIANCE_WDXHT_MSA(wd, ht) \
- uint32_t aom_sub_pixel_variance##wd##x##ht##_msa( \
- const uint8_t *src, int32_t src_stride, int32_t xoffset, \
- int32_t yoffset, const uint8_t *ref, int32_t ref_stride, \
- uint32_t *sse) { \
- int32_t diff; \
- uint32_t var; \
- const uint8_t *h_filter = bilinear_filters_2t[xoffset]; \
- const uint8_t *v_filter = bilinear_filters_2t[yoffset]; \
- \
- if (yoffset) { \
- if (xoffset) { \
- *sse = sub_pixel_sse_diff_##wd##width_hv_msa( \
- src, src_stride, ref, ref_stride, h_filter, v_filter, ht, &diff); \
- } else { \
- *sse = sub_pixel_sse_diff_##wd##width_v_msa( \
- src, src_stride, ref, ref_stride, v_filter, ht, &diff); \
- } \
- \
- var = VARIANCE_##wd##Wx##ht##H(*sse, diff); \
- } else { \
- if (xoffset) { \
- *sse = sub_pixel_sse_diff_##wd##width_h_msa( \
- src, src_stride, ref, ref_stride, h_filter, ht, &diff); \
- \
- var = VARIANCE_##wd##Wx##ht##H(*sse, diff); \
- } else { \
- var = aom_variance##wd##x##ht##_msa(src, src_stride, ref, ref_stride, \
- sse); \
- } \
- } \
- \
- return var; \
- }
-
-/* clang-format off */
-AOM_SUB_PIXEL_VARIANCE_WDXHT_MSA(4, 4)
-AOM_SUB_PIXEL_VARIANCE_WDXHT_MSA(4, 8)
-
-AOM_SUB_PIXEL_VARIANCE_WDXHT_MSA(8, 4)
-AOM_SUB_PIXEL_VARIANCE_WDXHT_MSA(8, 8)
-AOM_SUB_PIXEL_VARIANCE_WDXHT_MSA(8, 16)
-
-AOM_SUB_PIXEL_VARIANCE_WDXHT_MSA(16, 8)
-AOM_SUB_PIXEL_VARIANCE_WDXHT_MSA(16, 16)
-AOM_SUB_PIXEL_VARIANCE_WDXHT_MSA(16, 32)
-
-AOM_SUB_PIXEL_VARIANCE_WDXHT_MSA(32, 16)
-AOM_SUB_PIXEL_VARIANCE_WDXHT_MSA(32, 32)
-AOM_SUB_PIXEL_VARIANCE_WDXHT_MSA(32, 64)
-
-AOM_SUB_PIXEL_VARIANCE_WDXHT_MSA(64, 32)
-AOM_SUB_PIXEL_VARIANCE_WDXHT_MSA(64, 64)
-/* clang-format on */
-
-#define AOM_SUB_PIXEL_AVG_VARIANCE_WDXHT_MSA(wd, ht) \
- uint32_t aom_sub_pixel_avg_variance##wd##x##ht##_msa( \
- const uint8_t *src_ptr, int32_t src_stride, int32_t xoffset, \
- int32_t yoffset, const uint8_t *ref_ptr, int32_t ref_stride, \
- uint32_t *sse, const uint8_t *sec_pred) { \
- int32_t diff; \
- const uint8_t *h_filter = bilinear_filters_2t[xoffset]; \
- const uint8_t *v_filter = bilinear_filters_2t[yoffset]; \
- \
- if (yoffset) { \
- if (xoffset) { \
- *sse = sub_pixel_avg_sse_diff_##wd##width_hv_msa( \
- src_ptr, src_stride, ref_ptr, ref_stride, sec_pred, h_filter, \
- v_filter, ht, &diff); \
- } else { \
- *sse = sub_pixel_avg_sse_diff_##wd##width_v_msa( \
- src_ptr, src_stride, ref_ptr, ref_stride, sec_pred, v_filter, ht, \
- &diff); \
- } \
- } else { \
- if (xoffset) { \
- *sse = sub_pixel_avg_sse_diff_##wd##width_h_msa( \
- src_ptr, src_stride, ref_ptr, ref_stride, sec_pred, h_filter, ht, \
- &diff); \
- } else { \
- *sse = avg_sse_diff_##wd##width_msa(src_ptr, src_stride, ref_ptr, \
- ref_stride, sec_pred, ht, &diff); \
- } \
- } \
- \
- return VARIANCE_##wd##Wx##ht##H(*sse, diff); \
- }
-
-/* clang-format off */
-AOM_SUB_PIXEL_AVG_VARIANCE_WDXHT_MSA(4, 4)
-AOM_SUB_PIXEL_AVG_VARIANCE_WDXHT_MSA(4, 8)
-
-AOM_SUB_PIXEL_AVG_VARIANCE_WDXHT_MSA(8, 4)
-AOM_SUB_PIXEL_AVG_VARIANCE_WDXHT_MSA(8, 8)
-AOM_SUB_PIXEL_AVG_VARIANCE_WDXHT_MSA(8, 16)
-
-AOM_SUB_PIXEL_AVG_VARIANCE_WDXHT_MSA(16, 8)
-AOM_SUB_PIXEL_AVG_VARIANCE_WDXHT_MSA(16, 16)
-AOM_SUB_PIXEL_AVG_VARIANCE_WDXHT_MSA(16, 32)
-
-AOM_SUB_PIXEL_AVG_VARIANCE_WDXHT_MSA(32, 16)
-AOM_SUB_PIXEL_AVG_VARIANCE_WDXHT_MSA(32, 32)
-/* clang-format on */
-
-uint32_t aom_sub_pixel_avg_variance32x64_msa(const uint8_t *src_ptr,
- int32_t src_stride,
- int32_t xoffset, int32_t yoffset,
- const uint8_t *ref_ptr,
- int32_t ref_stride, uint32_t *sse,
- const uint8_t *sec_pred) {
- int32_t diff;
- const uint8_t *h_filter = bilinear_filters_2t[xoffset];
- const uint8_t *v_filter = bilinear_filters_2t[yoffset];
-
- if (yoffset) {
- if (xoffset) {
- *sse = sub_pixel_avg_sse_diff_32width_hv_msa(
- src_ptr, src_stride, ref_ptr, ref_stride, sec_pred, h_filter,
- v_filter, 64, &diff);
- } else {
- *sse = sub_pixel_avg_sse_diff_32width_v_msa(src_ptr, src_stride, ref_ptr,
- ref_stride, sec_pred,
- v_filter, 64, &diff);
- }
- } else {
- if (xoffset) {
- *sse = sub_pixel_avg_sse_diff_32width_h_msa(src_ptr, src_stride, ref_ptr,
- ref_stride, sec_pred,
- h_filter, 64, &diff);
- } else {
- *sse = avg_sse_diff_32x64_msa(src_ptr, src_stride, ref_ptr, ref_stride,
- sec_pred, &diff);
- }
- }
-
- return VARIANCE_32Wx64H(*sse, diff);
-}
-
-#define AOM_SUB_PIXEL_AVG_VARIANCE64XHEIGHT_MSA(ht) \
- uint32_t aom_sub_pixel_avg_variance64x##ht##_msa( \
- const uint8_t *src_ptr, int32_t src_stride, int32_t xoffset, \
- int32_t yoffset, const uint8_t *ref_ptr, int32_t ref_stride, \
- uint32_t *sse, const uint8_t *sec_pred) { \
- int32_t diff; \
- const uint8_t *h_filter = bilinear_filters_2t[xoffset]; \
- const uint8_t *v_filter = bilinear_filters_2t[yoffset]; \
- \
- if (yoffset) { \
- if (xoffset) { \
- *sse = sub_pixel_avg_sse_diff_64width_hv_msa( \
- src_ptr, src_stride, ref_ptr, ref_stride, sec_pred, h_filter, \
- v_filter, ht, &diff); \
- } else { \
- *sse = sub_pixel_avg_sse_diff_64width_v_msa( \
- src_ptr, src_stride, ref_ptr, ref_stride, sec_pred, v_filter, ht, \
- &diff); \
- } \
- } else { \
- if (xoffset) { \
- *sse = sub_pixel_avg_sse_diff_64width_h_msa( \
- src_ptr, src_stride, ref_ptr, ref_stride, sec_pred, h_filter, ht, \
- &diff); \
- } else { \
- *sse = avg_sse_diff_64x##ht##_msa(src_ptr, src_stride, ref_ptr, \
- ref_stride, sec_pred, &diff); \
- } \
- } \
- \
- return VARIANCE_64Wx##ht##H(*sse, diff); \
- }
-
-/* clang-format off */
-AOM_SUB_PIXEL_AVG_VARIANCE64XHEIGHT_MSA(32)
-AOM_SUB_PIXEL_AVG_VARIANCE64XHEIGHT_MSA(64)
-/* clang-format on */
diff --git a/third_party/aom/aom_dsp/mips/subtract_msa.c b/third_party/aom/aom_dsp/mips/subtract_msa.c
deleted file mode 100644
index bfed773ac..000000000
--- a/third_party/aom/aom_dsp/mips/subtract_msa.c
+++ /dev/null
@@ -1,266 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/mips/macros_msa.h"
-
-static void sub_blk_4x4_msa(const uint8_t *src_ptr, int32_t src_stride,
- const uint8_t *pred_ptr, int32_t pred_stride,
- int16_t *diff_ptr, int32_t diff_stride) {
- uint32_t src0, src1, src2, src3;
- uint32_t pred0, pred1, pred2, pred3;
- v16i8 src = { 0 };
- v16i8 pred = { 0 };
- v16u8 src_l0, src_l1;
- v8i16 diff0, diff1;
-
- LW4(src_ptr, src_stride, src0, src1, src2, src3);
- LW4(pred_ptr, pred_stride, pred0, pred1, pred2, pred3);
- INSERT_W4_SB(src0, src1, src2, src3, src);
- INSERT_W4_SB(pred0, pred1, pred2, pred3, pred);
- ILVRL_B2_UB(src, pred, src_l0, src_l1);
- HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
- ST8x4_UB(diff0, diff1, diff_ptr, (2 * diff_stride));
-}
-
-static void sub_blk_8x8_msa(const uint8_t *src_ptr, int32_t src_stride,
- const uint8_t *pred_ptr, int32_t pred_stride,
- int16_t *diff_ptr, int32_t diff_stride) {
- uint32_t loop_cnt;
- uint64_t src0, src1, pred0, pred1;
- v16i8 src = { 0 };
- v16i8 pred = { 0 };
- v16u8 src_l0, src_l1;
- v8i16 diff0, diff1;
-
- for (loop_cnt = 4; loop_cnt--;) {
- LD2(src_ptr, src_stride, src0, src1);
- src_ptr += (2 * src_stride);
- LD2(pred_ptr, pred_stride, pred0, pred1);
- pred_ptr += (2 * pred_stride);
-
- INSERT_D2_SB(src0, src1, src);
- INSERT_D2_SB(pred0, pred1, pred);
- ILVRL_B2_UB(src, pred, src_l0, src_l1);
- HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
- ST_SH2(diff0, diff1, diff_ptr, diff_stride);
- diff_ptr += (2 * diff_stride);
- }
-}
-
-static void sub_blk_16x16_msa(const uint8_t *src, int32_t src_stride,
- const uint8_t *pred, int32_t pred_stride,
- int16_t *diff, int32_t diff_stride) {
- int8_t count;
- v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
- v16i8 pred0, pred1, pred2, pred3, pred4, pred5, pred6, pred7;
- v16u8 src_l0, src_l1;
- v8i16 diff0, diff1;
-
- for (count = 2; count--;) {
- LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
- src += (8 * src_stride);
-
- LD_SB8(pred, pred_stride, pred0, pred1, pred2, pred3, pred4, pred5, pred6,
- pred7);
- pred += (8 * pred_stride);
-
- ILVRL_B2_UB(src0, pred0, src_l0, src_l1);
- HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
- ST_SH2(diff0, diff1, diff, 8);
- diff += diff_stride;
-
- ILVRL_B2_UB(src1, pred1, src_l0, src_l1);
- HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
- ST_SH2(diff0, diff1, diff, 8);
- diff += diff_stride;
-
- ILVRL_B2_UB(src2, pred2, src_l0, src_l1);
- HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
- ST_SH2(diff0, diff1, diff, 8);
- diff += diff_stride;
-
- ILVRL_B2_UB(src3, pred3, src_l0, src_l1);
- HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
- ST_SH2(diff0, diff1, diff, 8);
- diff += diff_stride;
-
- ILVRL_B2_UB(src4, pred4, src_l0, src_l1);
- HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
- ST_SH2(diff0, diff1, diff, 8);
- diff += diff_stride;
-
- ILVRL_B2_UB(src5, pred5, src_l0, src_l1);
- HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
- ST_SH2(diff0, diff1, diff, 8);
- diff += diff_stride;
-
- ILVRL_B2_UB(src6, pred6, src_l0, src_l1);
- HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
- ST_SH2(diff0, diff1, diff, 8);
- diff += diff_stride;
-
- ILVRL_B2_UB(src7, pred7, src_l0, src_l1);
- HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
- ST_SH2(diff0, diff1, diff, 8);
- diff += diff_stride;
- }
-}
-
-static void sub_blk_32x32_msa(const uint8_t *src, int32_t src_stride,
- const uint8_t *pred, int32_t pred_stride,
- int16_t *diff, int32_t diff_stride) {
- uint32_t loop_cnt;
- v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
- v16i8 pred0, pred1, pred2, pred3, pred4, pred5, pred6, pred7;
- v16u8 src_l0, src_l1;
- v8i16 diff0, diff1;
-
- for (loop_cnt = 8; loop_cnt--;) {
- LD_SB2(src, 16, src0, src1);
- src += src_stride;
- LD_SB2(src, 16, src2, src3);
- src += src_stride;
- LD_SB2(src, 16, src4, src5);
- src += src_stride;
- LD_SB2(src, 16, src6, src7);
- src += src_stride;
-
- LD_SB2(pred, 16, pred0, pred1);
- pred += pred_stride;
- LD_SB2(pred, 16, pred2, pred3);
- pred += pred_stride;
- LD_SB2(pred, 16, pred4, pred5);
- pred += pred_stride;
- LD_SB2(pred, 16, pred6, pred7);
- pred += pred_stride;
-
- ILVRL_B2_UB(src0, pred0, src_l0, src_l1);
- HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
- ST_SH2(diff0, diff1, diff, 8);
- ILVRL_B2_UB(src1, pred1, src_l0, src_l1);
- HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
- ST_SH2(diff0, diff1, diff + 16, 8);
- diff += diff_stride;
-
- ILVRL_B2_UB(src2, pred2, src_l0, src_l1);
- HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
- ST_SH2(diff0, diff1, diff, 8);
- ILVRL_B2_UB(src3, pred3, src_l0, src_l1);
- HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
- ST_SH2(diff0, diff1, diff + 16, 8);
- diff += diff_stride;
-
- ILVRL_B2_UB(src4, pred4, src_l0, src_l1);
- HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
- ST_SH2(diff0, diff1, diff, 8);
- ILVRL_B2_UB(src5, pred5, src_l0, src_l1);
- HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
- ST_SH2(diff0, diff1, diff + 16, 8);
- diff += diff_stride;
-
- ILVRL_B2_UB(src6, pred6, src_l0, src_l1);
- HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
- ST_SH2(diff0, diff1, diff, 8);
- ILVRL_B2_UB(src7, pred7, src_l0, src_l1);
- HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
- ST_SH2(diff0, diff1, diff + 16, 8);
- diff += diff_stride;
- }
-}
-
-static void sub_blk_64x64_msa(const uint8_t *src, int32_t src_stride,
- const uint8_t *pred, int32_t pred_stride,
- int16_t *diff, int32_t diff_stride) {
- uint32_t loop_cnt;
- v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
- v16i8 pred0, pred1, pred2, pred3, pred4, pred5, pred6, pred7;
- v16u8 src_l0, src_l1;
- v8i16 diff0, diff1;
-
- for (loop_cnt = 32; loop_cnt--;) {
- LD_SB4(src, 16, src0, src1, src2, src3);
- src += src_stride;
- LD_SB4(src, 16, src4, src5, src6, src7);
- src += src_stride;
-
- LD_SB4(pred, 16, pred0, pred1, pred2, pred3);
- pred += pred_stride;
- LD_SB4(pred, 16, pred4, pred5, pred6, pred7);
- pred += pred_stride;
-
- ILVRL_B2_UB(src0, pred0, src_l0, src_l1);
- HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
- ST_SH2(diff0, diff1, diff, 8);
- ILVRL_B2_UB(src1, pred1, src_l0, src_l1);
- HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
- ST_SH2(diff0, diff1, diff + 16, 8);
- ILVRL_B2_UB(src2, pred2, src_l0, src_l1);
- HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
- ST_SH2(diff0, diff1, diff + 32, 8);
- ILVRL_B2_UB(src3, pred3, src_l0, src_l1);
- HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
- ST_SH2(diff0, diff1, diff + 48, 8);
- diff += diff_stride;
-
- ILVRL_B2_UB(src4, pred4, src_l0, src_l1);
- HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
- ST_SH2(diff0, diff1, diff, 8);
- ILVRL_B2_UB(src5, pred5, src_l0, src_l1);
- HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
- ST_SH2(diff0, diff1, diff + 16, 8);
- ILVRL_B2_UB(src6, pred6, src_l0, src_l1);
- HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
- ST_SH2(diff0, diff1, diff + 32, 8);
- ILVRL_B2_UB(src7, pred7, src_l0, src_l1);
- HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
- ST_SH2(diff0, diff1, diff + 48, 8);
- diff += diff_stride;
- }
-}
-
-void aom_subtract_block_msa(int32_t rows, int32_t cols, int16_t *diff_ptr,
- ptrdiff_t diff_stride, const uint8_t *src_ptr,
- ptrdiff_t src_stride, const uint8_t *pred_ptr,
- ptrdiff_t pred_stride) {
- if (rows == cols) {
- switch (rows) {
- case 4:
- sub_blk_4x4_msa(src_ptr, src_stride, pred_ptr, pred_stride, diff_ptr,
- diff_stride);
- break;
- case 8:
- sub_blk_8x8_msa(src_ptr, src_stride, pred_ptr, pred_stride, diff_ptr,
- diff_stride);
- break;
- case 16:
- sub_blk_16x16_msa(src_ptr, src_stride, pred_ptr, pred_stride, diff_ptr,
- diff_stride);
- break;
- case 32:
- sub_blk_32x32_msa(src_ptr, src_stride, pred_ptr, pred_stride, diff_ptr,
- diff_stride);
- break;
- case 64:
- sub_blk_64x64_msa(src_ptr, src_stride, pred_ptr, pred_stride, diff_ptr,
- diff_stride);
- break;
- default:
- aom_subtract_block_c(rows, cols, diff_ptr, diff_stride, src_ptr,
- src_stride, pred_ptr, pred_stride);
- break;
- }
- } else {
- aom_subtract_block_c(rows, cols, diff_ptr, diff_stride, src_ptr, src_stride,
- pred_ptr, pred_stride);
- }
-}
diff --git a/third_party/aom/aom_dsp/mips/variance_msa.c b/third_party/aom/aom_dsp/mips/variance_msa.c
deleted file mode 100644
index 065c09ac5..000000000
--- a/third_party/aom/aom_dsp/mips/variance_msa.c
+++ /dev/null
@@ -1,633 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/mips/macros_msa.h"
-
-#define CALC_MSE_B(src, ref, var) \
- { \
- v16u8 src_l0_m, src_l1_m; \
- v8i16 res_l0_m, res_l1_m; \
- \
- ILVRL_B2_UB(src, ref, src_l0_m, src_l1_m); \
- HSUB_UB2_SH(src_l0_m, src_l1_m, res_l0_m, res_l1_m); \
- DPADD_SH2_SW(res_l0_m, res_l1_m, res_l0_m, res_l1_m, var, var); \
- }
-
-#define CALC_MSE_AVG_B(src, ref, var, sub) \
- { \
- v16u8 src_l0_m, src_l1_m; \
- v8i16 res_l0_m, res_l1_m; \
- \
- ILVRL_B2_UB(src, ref, src_l0_m, src_l1_m); \
- HSUB_UB2_SH(src_l0_m, src_l1_m, res_l0_m, res_l1_m); \
- DPADD_SH2_SW(res_l0_m, res_l1_m, res_l0_m, res_l1_m, var, var); \
- \
- sub += res_l0_m + res_l1_m; \
- }
-
-#define VARIANCE_WxH(sse, diff, shift) sse - (((uint32_t)diff * diff) >> shift)
-
-#define VARIANCE_LARGE_WxH(sse, diff, shift) \
- sse - (((int64_t)diff * diff) >> shift)
-
-static uint32_t sse_diff_4width_msa(const uint8_t *src_ptr, int32_t src_stride,
- const uint8_t *ref_ptr, int32_t ref_stride,
- int32_t height, int32_t *diff) {
- uint32_t src0, src1, src2, src3;
- uint32_t ref0, ref1, ref2, ref3;
- int32_t ht_cnt;
- v16u8 src = { 0 };
- v16u8 ref = { 0 };
- v8i16 avg = { 0 };
- v4i32 vec, var = { 0 };
-
- for (ht_cnt = (height >> 2); ht_cnt--;) {
- LW4(src_ptr, src_stride, src0, src1, src2, src3);
- src_ptr += (4 * src_stride);
- LW4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3);
- ref_ptr += (4 * ref_stride);
-
- INSERT_W4_UB(src0, src1, src2, src3, src);
- INSERT_W4_UB(ref0, ref1, ref2, ref3, ref);
- CALC_MSE_AVG_B(src, ref, var, avg);
- }
-
- vec = __msa_hadd_s_w(avg, avg);
- *diff = HADD_SW_S32(vec);
-
- return HADD_SW_S32(var);
-}
-
-static uint32_t sse_diff_8width_msa(const uint8_t *src_ptr, int32_t src_stride,
- const uint8_t *ref_ptr, int32_t ref_stride,
- int32_t height, int32_t *diff) {
- int32_t ht_cnt;
- v16u8 src0, src1, src2, src3;
- v16u8 ref0, ref1, ref2, ref3;
- v8i16 avg = { 0 };
- v4i32 vec, var = { 0 };
-
- for (ht_cnt = (height >> 2); ht_cnt--;) {
- LD_UB4(src_ptr, src_stride, src0, src1, src2, src3);
- src_ptr += (4 * src_stride);
- LD_UB4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3);
- ref_ptr += (4 * ref_stride);
-
- PCKEV_D4_UB(src1, src0, src3, src2, ref1, ref0, ref3, ref2, src0, src1,
- ref0, ref1);
- CALC_MSE_AVG_B(src0, ref0, var, avg);
- CALC_MSE_AVG_B(src1, ref1, var, avg);
- }
-
- vec = __msa_hadd_s_w(avg, avg);
- *diff = HADD_SW_S32(vec);
-
- return HADD_SW_S32(var);
-}
-
-static uint32_t sse_diff_16width_msa(const uint8_t *src_ptr, int32_t src_stride,
- const uint8_t *ref_ptr, int32_t ref_stride,
- int32_t height, int32_t *diff) {
- int32_t ht_cnt;
- v16u8 src, ref;
- v8i16 avg = { 0 };
- v4i32 vec, var = { 0 };
-
- for (ht_cnt = (height >> 2); ht_cnt--;) {
- src = LD_UB(src_ptr);
- src_ptr += src_stride;
- ref = LD_UB(ref_ptr);
- ref_ptr += ref_stride;
- CALC_MSE_AVG_B(src, ref, var, avg);
-
- src = LD_UB(src_ptr);
- src_ptr += src_stride;
- ref = LD_UB(ref_ptr);
- ref_ptr += ref_stride;
- CALC_MSE_AVG_B(src, ref, var, avg);
-
- src = LD_UB(src_ptr);
- src_ptr += src_stride;
- ref = LD_UB(ref_ptr);
- ref_ptr += ref_stride;
- CALC_MSE_AVG_B(src, ref, var, avg);
-
- src = LD_UB(src_ptr);
- src_ptr += src_stride;
- ref = LD_UB(ref_ptr);
- ref_ptr += ref_stride;
- CALC_MSE_AVG_B(src, ref, var, avg);
- }
-
- vec = __msa_hadd_s_w(avg, avg);
- *diff = HADD_SW_S32(vec);
-
- return HADD_SW_S32(var);
-}
-
-static uint32_t sse_diff_32width_msa(const uint8_t *src_ptr, int32_t src_stride,
- const uint8_t *ref_ptr, int32_t ref_stride,
- int32_t height, int32_t *diff) {
- int32_t ht_cnt;
- v16u8 src0, src1, ref0, ref1;
- v8i16 avg = { 0 };
- v4i32 vec, var = { 0 };
-
- for (ht_cnt = (height >> 2); ht_cnt--;) {
- LD_UB2(src_ptr, 16, src0, src1);
- src_ptr += src_stride;
- LD_UB2(ref_ptr, 16, ref0, ref1);
- ref_ptr += ref_stride;
- CALC_MSE_AVG_B(src0, ref0, var, avg);
- CALC_MSE_AVG_B(src1, ref1, var, avg);
-
- LD_UB2(src_ptr, 16, src0, src1);
- src_ptr += src_stride;
- LD_UB2(ref_ptr, 16, ref0, ref1);
- ref_ptr += ref_stride;
- CALC_MSE_AVG_B(src0, ref0, var, avg);
- CALC_MSE_AVG_B(src1, ref1, var, avg);
-
- LD_UB2(src_ptr, 16, src0, src1);
- src_ptr += src_stride;
- LD_UB2(ref_ptr, 16, ref0, ref1);
- ref_ptr += ref_stride;
- CALC_MSE_AVG_B(src0, ref0, var, avg);
- CALC_MSE_AVG_B(src1, ref1, var, avg);
-
- LD_UB2(src_ptr, 16, src0, src1);
- src_ptr += src_stride;
- LD_UB2(ref_ptr, 16, ref0, ref1);
- ref_ptr += ref_stride;
- CALC_MSE_AVG_B(src0, ref0, var, avg);
- CALC_MSE_AVG_B(src1, ref1, var, avg);
- }
-
- vec = __msa_hadd_s_w(avg, avg);
- *diff = HADD_SW_S32(vec);
-
- return HADD_SW_S32(var);
-}
-
-static uint32_t sse_diff_32x64_msa(const uint8_t *src_ptr, int32_t src_stride,
- const uint8_t *ref_ptr, int32_t ref_stride,
- int32_t *diff) {
- int32_t ht_cnt;
- v16u8 src0, src1, ref0, ref1;
- v8i16 avg0 = { 0 };
- v8i16 avg1 = { 0 };
- v4i32 vec, var = { 0 };
-
- for (ht_cnt = 16; ht_cnt--;) {
- LD_UB2(src_ptr, 16, src0, src1);
- src_ptr += src_stride;
- LD_UB2(ref_ptr, 16, ref0, ref1);
- ref_ptr += ref_stride;
- CALC_MSE_AVG_B(src0, ref0, var, avg0);
- CALC_MSE_AVG_B(src1, ref1, var, avg1);
-
- LD_UB2(src_ptr, 16, src0, src1);
- src_ptr += src_stride;
- LD_UB2(ref_ptr, 16, ref0, ref1);
- ref_ptr += ref_stride;
- CALC_MSE_AVG_B(src0, ref0, var, avg0);
- CALC_MSE_AVG_B(src1, ref1, var, avg1);
-
- LD_UB2(src_ptr, 16, src0, src1);
- src_ptr += src_stride;
- LD_UB2(ref_ptr, 16, ref0, ref1);
- ref_ptr += ref_stride;
- CALC_MSE_AVG_B(src0, ref0, var, avg0);
- CALC_MSE_AVG_B(src1, ref1, var, avg1);
-
- LD_UB2(src_ptr, 16, src0, src1);
- src_ptr += src_stride;
- LD_UB2(ref_ptr, 16, ref0, ref1);
- ref_ptr += ref_stride;
- CALC_MSE_AVG_B(src0, ref0, var, avg0);
- CALC_MSE_AVG_B(src1, ref1, var, avg1);
- }
-
- vec = __msa_hadd_s_w(avg0, avg0);
- vec += __msa_hadd_s_w(avg1, avg1);
- *diff = HADD_SW_S32(vec);
-
- return HADD_SW_S32(var);
-}
-
-static uint32_t sse_diff_64x32_msa(const uint8_t *src_ptr, int32_t src_stride,
- const uint8_t *ref_ptr, int32_t ref_stride,
- int32_t *diff) {
- int32_t ht_cnt;
- v16u8 src0, src1, src2, src3;
- v16u8 ref0, ref1, ref2, ref3;
- v8i16 avg0 = { 0 };
- v8i16 avg1 = { 0 };
- v4i32 vec, var = { 0 };
-
- for (ht_cnt = 16; ht_cnt--;) {
- LD_UB4(src_ptr, 16, src0, src1, src2, src3);
- src_ptr += src_stride;
- LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3);
- ref_ptr += ref_stride;
- CALC_MSE_AVG_B(src0, ref0, var, avg0);
- CALC_MSE_AVG_B(src2, ref2, var, avg0);
- CALC_MSE_AVG_B(src1, ref1, var, avg1);
- CALC_MSE_AVG_B(src3, ref3, var, avg1);
-
- LD_UB4(src_ptr, 16, src0, src1, src2, src3);
- src_ptr += src_stride;
- LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3);
- ref_ptr += ref_stride;
- CALC_MSE_AVG_B(src0, ref0, var, avg0);
- CALC_MSE_AVG_B(src2, ref2, var, avg0);
- CALC_MSE_AVG_B(src1, ref1, var, avg1);
- CALC_MSE_AVG_B(src3, ref3, var, avg1);
- }
-
- vec = __msa_hadd_s_w(avg0, avg0);
- vec += __msa_hadd_s_w(avg1, avg1);
- *diff = HADD_SW_S32(vec);
-
- return HADD_SW_S32(var);
-}
-
-static uint32_t sse_diff_64x64_msa(const uint8_t *src_ptr, int32_t src_stride,
- const uint8_t *ref_ptr, int32_t ref_stride,
- int32_t *diff) {
- int32_t ht_cnt;
- v16u8 src0, src1, src2, src3;
- v16u8 ref0, ref1, ref2, ref3;
- v8i16 avg0 = { 0 };
- v8i16 avg1 = { 0 };
- v8i16 avg2 = { 0 };
- v8i16 avg3 = { 0 };
- v4i32 vec, var = { 0 };
-
- for (ht_cnt = 32; ht_cnt--;) {
- LD_UB4(src_ptr, 16, src0, src1, src2, src3);
- src_ptr += src_stride;
- LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3);
- ref_ptr += ref_stride;
-
- CALC_MSE_AVG_B(src0, ref0, var, avg0);
- CALC_MSE_AVG_B(src1, ref1, var, avg1);
- CALC_MSE_AVG_B(src2, ref2, var, avg2);
- CALC_MSE_AVG_B(src3, ref3, var, avg3);
- LD_UB4(src_ptr, 16, src0, src1, src2, src3);
- src_ptr += src_stride;
- LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3);
- ref_ptr += ref_stride;
- CALC_MSE_AVG_B(src0, ref0, var, avg0);
- CALC_MSE_AVG_B(src1, ref1, var, avg1);
- CALC_MSE_AVG_B(src2, ref2, var, avg2);
- CALC_MSE_AVG_B(src3, ref3, var, avg3);
- }
-
- vec = __msa_hadd_s_w(avg0, avg0);
- vec += __msa_hadd_s_w(avg1, avg1);
- vec += __msa_hadd_s_w(avg2, avg2);
- vec += __msa_hadd_s_w(avg3, avg3);
- *diff = HADD_SW_S32(vec);
-
- return HADD_SW_S32(var);
-}
-
-static uint32_t get_mb_ss_msa(const int16_t *src) {
- uint32_t sum, cnt;
- v8i16 src0, src1, src2, src3;
- v4i32 src0_l, src1_l, src2_l, src3_l;
- v4i32 src0_r, src1_r, src2_r, src3_r;
- v2i64 sq_src_l = { 0 };
- v2i64 sq_src_r = { 0 };
-
- for (cnt = 8; cnt--;) {
- LD_SH4(src, 8, src0, src1, src2, src3);
- src += 4 * 8;
-
- UNPCK_SH_SW(src0, src0_l, src0_r);
- UNPCK_SH_SW(src1, src1_l, src1_r);
- UNPCK_SH_SW(src2, src2_l, src2_r);
- UNPCK_SH_SW(src3, src3_l, src3_r);
-
- DPADD_SD2_SD(src0_l, src0_r, sq_src_l, sq_src_r);
- DPADD_SD2_SD(src1_l, src1_r, sq_src_l, sq_src_r);
- DPADD_SD2_SD(src2_l, src2_r, sq_src_l, sq_src_r);
- DPADD_SD2_SD(src3_l, src3_r, sq_src_l, sq_src_r);
- }
-
- sq_src_l += __msa_splati_d(sq_src_l, 1);
- sq_src_r += __msa_splati_d(sq_src_r, 1);
-
- sum = __msa_copy_s_d(sq_src_l, 0);
- sum += __msa_copy_s_d(sq_src_r, 0);
-
- return sum;
-}
-
-static uint32_t sse_4width_msa(const uint8_t *src_ptr, int32_t src_stride,
- const uint8_t *ref_ptr, int32_t ref_stride,
- int32_t height) {
- int32_t ht_cnt;
- uint32_t src0, src1, src2, src3;
- uint32_t ref0, ref1, ref2, ref3;
- v16u8 src = { 0 };
- v16u8 ref = { 0 };
- v4i32 var = { 0 };
-
- for (ht_cnt = (height >> 2); ht_cnt--;) {
- LW4(src_ptr, src_stride, src0, src1, src2, src3);
- src_ptr += (4 * src_stride);
- LW4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3);
- ref_ptr += (4 * ref_stride);
-
- INSERT_W4_UB(src0, src1, src2, src3, src);
- INSERT_W4_UB(ref0, ref1, ref2, ref3, ref);
- CALC_MSE_B(src, ref, var);
- }
-
- return HADD_SW_S32(var);
-}
-
-static uint32_t sse_8width_msa(const uint8_t *src_ptr, int32_t src_stride,
- const uint8_t *ref_ptr, int32_t ref_stride,
- int32_t height) {
- int32_t ht_cnt;
- v16u8 src0, src1, src2, src3;
- v16u8 ref0, ref1, ref2, ref3;
- v4i32 var = { 0 };
-
- for (ht_cnt = (height >> 2); ht_cnt--;) {
- LD_UB4(src_ptr, src_stride, src0, src1, src2, src3);
- src_ptr += (4 * src_stride);
- LD_UB4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3);
- ref_ptr += (4 * ref_stride);
-
- PCKEV_D4_UB(src1, src0, src3, src2, ref1, ref0, ref3, ref2, src0, src1,
- ref0, ref1);
- CALC_MSE_B(src0, ref0, var);
- CALC_MSE_B(src1, ref1, var);
- }
-
- return HADD_SW_S32(var);
-}
-
-static uint32_t sse_16width_msa(const uint8_t *src_ptr, int32_t src_stride,
- const uint8_t *ref_ptr, int32_t ref_stride,
- int32_t height) {
- int32_t ht_cnt;
- v16u8 src, ref;
- v4i32 var = { 0 };
-
- for (ht_cnt = (height >> 2); ht_cnt--;) {
- src = LD_UB(src_ptr);
- src_ptr += src_stride;
- ref = LD_UB(ref_ptr);
- ref_ptr += ref_stride;
- CALC_MSE_B(src, ref, var);
-
- src = LD_UB(src_ptr);
- src_ptr += src_stride;
- ref = LD_UB(ref_ptr);
- ref_ptr += ref_stride;
- CALC_MSE_B(src, ref, var);
-
- src = LD_UB(src_ptr);
- src_ptr += src_stride;
- ref = LD_UB(ref_ptr);
- ref_ptr += ref_stride;
- CALC_MSE_B(src, ref, var);
-
- src = LD_UB(src_ptr);
- src_ptr += src_stride;
- ref = LD_UB(ref_ptr);
- ref_ptr += ref_stride;
- CALC_MSE_B(src, ref, var);
- }
-
- return HADD_SW_S32(var);
-}
-
-static uint32_t sse_32width_msa(const uint8_t *src_ptr, int32_t src_stride,
- const uint8_t *ref_ptr, int32_t ref_stride,
- int32_t height) {
- int32_t ht_cnt;
- v16u8 src0, src1, ref0, ref1;
- v4i32 var = { 0 };
-
- for (ht_cnt = (height >> 2); ht_cnt--;) {
- LD_UB2(src_ptr, 16, src0, src1);
- src_ptr += src_stride;
- LD_UB2(ref_ptr, 16, ref0, ref1);
- ref_ptr += ref_stride;
- CALC_MSE_B(src0, ref0, var);
- CALC_MSE_B(src1, ref1, var);
-
- LD_UB2(src_ptr, 16, src0, src1);
- src_ptr += src_stride;
- LD_UB2(ref_ptr, 16, ref0, ref1);
- ref_ptr += ref_stride;
- CALC_MSE_B(src0, ref0, var);
- CALC_MSE_B(src1, ref1, var);
-
- LD_UB2(src_ptr, 16, src0, src1);
- src_ptr += src_stride;
- LD_UB2(ref_ptr, 16, ref0, ref1);
- ref_ptr += ref_stride;
- CALC_MSE_B(src0, ref0, var);
- CALC_MSE_B(src1, ref1, var);
-
- LD_UB2(src_ptr, 16, src0, src1);
- src_ptr += src_stride;
- LD_UB2(ref_ptr, 16, ref0, ref1);
- ref_ptr += ref_stride;
- CALC_MSE_B(src0, ref0, var);
- CALC_MSE_B(src1, ref1, var);
- }
-
- return HADD_SW_S32(var);
-}
-
-static uint32_t sse_64width_msa(const uint8_t *src_ptr, int32_t src_stride,
- const uint8_t *ref_ptr, int32_t ref_stride,
- int32_t height) {
- int32_t ht_cnt;
- v16u8 src0, src1, src2, src3;
- v16u8 ref0, ref1, ref2, ref3;
- v4i32 var = { 0 };
-
- for (ht_cnt = height >> 1; ht_cnt--;) {
- LD_UB4(src_ptr, 16, src0, src1, src2, src3);
- src_ptr += src_stride;
- LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3);
- ref_ptr += ref_stride;
- CALC_MSE_B(src0, ref0, var);
- CALC_MSE_B(src2, ref2, var);
- CALC_MSE_B(src1, ref1, var);
- CALC_MSE_B(src3, ref3, var);
-
- LD_UB4(src_ptr, 16, src0, src1, src2, src3);
- src_ptr += src_stride;
- LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3);
- ref_ptr += ref_stride;
- CALC_MSE_B(src0, ref0, var);
- CALC_MSE_B(src2, ref2, var);
- CALC_MSE_B(src1, ref1, var);
- CALC_MSE_B(src3, ref3, var);
- }
-
- return HADD_SW_S32(var);
-}
-
-uint32_t aom_get4x4sse_cs_msa(const uint8_t *src_ptr, int32_t src_stride,
- const uint8_t *ref_ptr, int32_t ref_stride) {
- uint32_t err = 0;
- uint32_t src0, src1, src2, src3;
- uint32_t ref0, ref1, ref2, ref3;
- v16i8 src = { 0 };
- v16i8 ref = { 0 };
- v16u8 src_vec0, src_vec1;
- v8i16 diff0, diff1;
- v4i32 err0 = { 0 };
- v4i32 err1 = { 0 };
-
- LW4(src_ptr, src_stride, src0, src1, src2, src3);
- LW4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3);
- INSERT_W4_SB(src0, src1, src2, src3, src);
- INSERT_W4_SB(ref0, ref1, ref2, ref3, ref);
- ILVRL_B2_UB(src, ref, src_vec0, src_vec1);
- HSUB_UB2_SH(src_vec0, src_vec1, diff0, diff1);
- DPADD_SH2_SW(diff0, diff1, diff0, diff1, err0, err1);
- err = HADD_SW_S32(err0);
- err += HADD_SW_S32(err1);
-
- return err;
-}
-
-#define VARIANCE_4Wx4H(sse, diff) VARIANCE_WxH(sse, diff, 4);
-#define VARIANCE_4Wx8H(sse, diff) VARIANCE_WxH(sse, diff, 5);
-#define VARIANCE_8Wx4H(sse, diff) VARIANCE_WxH(sse, diff, 5);
-#define VARIANCE_8Wx8H(sse, diff) VARIANCE_WxH(sse, diff, 6);
-#define VARIANCE_8Wx16H(sse, diff) VARIANCE_WxH(sse, diff, 7);
-#define VARIANCE_16Wx8H(sse, diff) VARIANCE_WxH(sse, diff, 7);
-#define VARIANCE_16Wx16H(sse, diff) VARIANCE_WxH(sse, diff, 8);
-
-#define VARIANCE_16Wx32H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 9);
-#define VARIANCE_32Wx16H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 9);
-#define VARIANCE_32Wx32H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 10);
-#define VARIANCE_32Wx64H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 11);
-#define VARIANCE_64Wx32H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 11);
-#define VARIANCE_64Wx64H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 12);
-
-#define AOM_VARIANCE_WDXHT_MSA(wd, ht) \
- uint32_t aom_variance##wd##x##ht##_msa( \
- const uint8_t *src, int32_t src_stride, const uint8_t *ref, \
- int32_t ref_stride, uint32_t *sse) { \
- int32_t diff; \
- \
- *sse = \
- sse_diff_##wd##width_msa(src, src_stride, ref, ref_stride, ht, &diff); \
- \
- return VARIANCE_##wd##Wx##ht##H(*sse, diff); \
- }
-
-/* clang-format off */
-AOM_VARIANCE_WDXHT_MSA(4, 4)
-AOM_VARIANCE_WDXHT_MSA(4, 8)
-
-AOM_VARIANCE_WDXHT_MSA(8, 4)
-AOM_VARIANCE_WDXHT_MSA(8, 8)
-AOM_VARIANCE_WDXHT_MSA(8, 16)
-
-AOM_VARIANCE_WDXHT_MSA(16, 8)
-AOM_VARIANCE_WDXHT_MSA(16, 16)
-AOM_VARIANCE_WDXHT_MSA(16, 32)
-
-AOM_VARIANCE_WDXHT_MSA(32, 16)
-AOM_VARIANCE_WDXHT_MSA(32, 32)
-/* clang-format on */
-
-uint32_t aom_variance32x64_msa(const uint8_t *src, int32_t src_stride,
- const uint8_t *ref, int32_t ref_stride,
- uint32_t *sse) {
- int32_t diff;
-
- *sse = sse_diff_32x64_msa(src, src_stride, ref, ref_stride, &diff);
-
- return VARIANCE_32Wx64H(*sse, diff);
-}
-
-uint32_t aom_variance64x32_msa(const uint8_t *src, int32_t src_stride,
- const uint8_t *ref, int32_t ref_stride,
- uint32_t *sse) {
- int32_t diff;
-
- *sse = sse_diff_64x32_msa(src, src_stride, ref, ref_stride, &diff);
-
- return VARIANCE_64Wx32H(*sse, diff);
-}
-
-uint32_t aom_variance64x64_msa(const uint8_t *src, int32_t src_stride,
- const uint8_t *ref, int32_t ref_stride,
- uint32_t *sse) {
- int32_t diff;
-
- *sse = sse_diff_64x64_msa(src, src_stride, ref, ref_stride, &diff);
-
- return VARIANCE_64Wx64H(*sse, diff);
-}
-
-uint32_t aom_mse8x8_msa(const uint8_t *src, int32_t src_stride,
- const uint8_t *ref, int32_t ref_stride, uint32_t *sse) {
- *sse = sse_8width_msa(src, src_stride, ref, ref_stride, 8);
-
- return *sse;
-}
-
-uint32_t aom_mse8x16_msa(const uint8_t *src, int32_t src_stride,
- const uint8_t *ref, int32_t ref_stride,
- uint32_t *sse) {
- *sse = sse_8width_msa(src, src_stride, ref, ref_stride, 16);
-
- return *sse;
-}
-
-uint32_t aom_mse16x8_msa(const uint8_t *src, int32_t src_stride,
- const uint8_t *ref, int32_t ref_stride,
- uint32_t *sse) {
- *sse = sse_16width_msa(src, src_stride, ref, ref_stride, 8);
-
- return *sse;
-}
-
-uint32_t aom_mse16x16_msa(const uint8_t *src, int32_t src_stride,
- const uint8_t *ref, int32_t ref_stride,
- uint32_t *sse) {
- *sse = sse_16width_msa(src, src_stride, ref, ref_stride, 16);
-
- return *sse;
-}
-
-void aom_get8x8var_msa(const uint8_t *src, int32_t src_stride,
- const uint8_t *ref, int32_t ref_stride, uint32_t *sse,
- int32_t *sum) {
- *sse = sse_diff_8width_msa(src, src_stride, ref, ref_stride, 8, sum);
-}
-
-void aom_get16x16var_msa(const uint8_t *src, int32_t src_stride,
- const uint8_t *ref, int32_t ref_stride, uint32_t *sse,
- int32_t *sum) {
- *sse = sse_diff_16width_msa(src, src_stride, ref, ref_stride, 16, sum);
-}
-
-uint32_t aom_get_mb_ss_msa(const int16_t *src) { return get_mb_ss_msa(src); }
diff --git a/third_party/aom/aom_dsp/noise_model.c b/third_party/aom/aom_dsp/noise_model.c
deleted file mode 100644
index 2faee8506..000000000
--- a/third_party/aom/aom_dsp/noise_model.c
+++ /dev/null
@@ -1,1648 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/noise_model.h"
-#include "aom_dsp/noise_util.h"
-#include "aom_mem/aom_mem.h"
-#include "av1/common/common.h"
-#include "av1/encoder/mathutils.h"
-
-#define kLowPolyNumParams 3
-
-static const int kMaxLag = 4;
-
-// Defines a function that can be used to obtain the mean of a block for the
-// provided data type (uint8_t, or uint16_t)
-#define GET_BLOCK_MEAN(INT_TYPE, suffix) \
- static double get_block_mean_##suffix(const INT_TYPE *data, int w, int h, \
- int stride, int x_o, int y_o, \
- int block_size) { \
- const int max_h = AOMMIN(h - y_o, block_size); \
- const int max_w = AOMMIN(w - x_o, block_size); \
- double block_mean = 0; \
- for (int y = 0; y < max_h; ++y) { \
- for (int x = 0; x < max_w; ++x) { \
- block_mean += data[(y_o + y) * stride + x_o + x]; \
- } \
- } \
- return block_mean / (max_w * max_h); \
- }
-
-GET_BLOCK_MEAN(uint8_t, lowbd);
-GET_BLOCK_MEAN(uint16_t, highbd);
-
-static INLINE double get_block_mean(const uint8_t *data, int w, int h,
- int stride, int x_o, int y_o,
- int block_size, int use_highbd) {
- if (use_highbd)
- return get_block_mean_highbd((const uint16_t *)data, w, h, stride, x_o, y_o,
- block_size);
- return get_block_mean_lowbd(data, w, h, stride, x_o, y_o, block_size);
-}
-
-// Defines a function that can be used to obtain the variance of a block
-// for the provided data type (uint8_t, or uint16_t)
-#define GET_NOISE_VAR(INT_TYPE, suffix) \
- static double get_noise_var_##suffix( \
- const INT_TYPE *data, const INT_TYPE *denoised, int stride, int w, \
- int h, int x_o, int y_o, int block_size_x, int block_size_y) { \
- const int max_h = AOMMIN(h - y_o, block_size_y); \
- const int max_w = AOMMIN(w - x_o, block_size_x); \
- double noise_var = 0; \
- double noise_mean = 0; \
- for (int y = 0; y < max_h; ++y) { \
- for (int x = 0; x < max_w; ++x) { \
- double noise = (double)data[(y_o + y) * stride + x_o + x] - \
- denoised[(y_o + y) * stride + x_o + x]; \
- noise_mean += noise; \
- noise_var += noise * noise; \
- } \
- } \
- noise_mean /= (max_w * max_h); \
- return noise_var / (max_w * max_h) - noise_mean * noise_mean; \
- }
-
-GET_NOISE_VAR(uint8_t, lowbd);
-GET_NOISE_VAR(uint16_t, highbd);
-
-static INLINE double get_noise_var(const uint8_t *data, const uint8_t *denoised,
- int w, int h, int stride, int x_o, int y_o,
- int block_size_x, int block_size_y,
- int use_highbd) {
- if (use_highbd)
- return get_noise_var_highbd((const uint16_t *)data,
- (const uint16_t *)denoised, w, h, stride, x_o,
- y_o, block_size_x, block_size_y);
- return get_noise_var_lowbd(data, denoised, w, h, stride, x_o, y_o,
- block_size_x, block_size_y);
-}
-
-static void equation_system_clear(aom_equation_system_t *eqns) {
- const int n = eqns->n;
- memset(eqns->A, 0, sizeof(*eqns->A) * n * n);
- memset(eqns->x, 0, sizeof(*eqns->x) * n);
- memset(eqns->b, 0, sizeof(*eqns->b) * n);
-}
-
-static void equation_system_copy(aom_equation_system_t *dst,
- const aom_equation_system_t *src) {
- const int n = dst->n;
- memcpy(dst->A, src->A, sizeof(*dst->A) * n * n);
- memcpy(dst->x, src->x, sizeof(*dst->x) * n);
- memcpy(dst->b, src->b, sizeof(*dst->b) * n);
-}
-
-static int equation_system_init(aom_equation_system_t *eqns, int n) {
- eqns->A = (double *)aom_malloc(sizeof(*eqns->A) * n * n);
- eqns->b = (double *)aom_malloc(sizeof(*eqns->b) * n);
- eqns->x = (double *)aom_malloc(sizeof(*eqns->x) * n);
- eqns->n = n;
- if (!eqns->A || !eqns->b || !eqns->x) {
- fprintf(stderr, "Failed to allocate system of equations of size %d\n", n);
- aom_free(eqns->A);
- aom_free(eqns->b);
- aom_free(eqns->x);
- memset(eqns, 0, sizeof(*eqns));
- return 0;
- }
- equation_system_clear(eqns);
- return 1;
-}
-
-static int equation_system_solve(aom_equation_system_t *eqns) {
- const int n = eqns->n;
- double *b = (double *)aom_malloc(sizeof(*b) * n);
- double *A = (double *)aom_malloc(sizeof(*A) * n * n);
- int ret = 0;
- if (A == NULL || b == NULL) {
- fprintf(stderr, "Unable to allocate temp values of size %dx%d\n", n, n);
- aom_free(b);
- aom_free(A);
- return 0;
- }
- memcpy(A, eqns->A, sizeof(*eqns->A) * n * n);
- memcpy(b, eqns->b, sizeof(*eqns->b) * n);
- ret = linsolve(n, A, eqns->n, b, eqns->x);
- aom_free(b);
- aom_free(A);
-
- if (ret == 0) {
- return 0;
- }
- return 1;
-}
-
-static void equation_system_add(aom_equation_system_t *dest,
- aom_equation_system_t *src) {
- const int n = dest->n;
- int i, j;
- for (i = 0; i < n; ++i) {
- for (j = 0; j < n; ++j) {
- dest->A[i * n + j] += src->A[i * n + j];
- }
- dest->b[i] += src->b[i];
- }
-}
-
-static void equation_system_free(aom_equation_system_t *eqns) {
- if (!eqns) return;
- aom_free(eqns->A);
- aom_free(eqns->b);
- aom_free(eqns->x);
- memset(eqns, 0, sizeof(*eqns));
-}
-
-static void noise_strength_solver_clear(aom_noise_strength_solver_t *solver) {
- equation_system_clear(&solver->eqns);
- solver->num_equations = 0;
- solver->total = 0;
-}
-
-static void noise_strength_solver_add(aom_noise_strength_solver_t *dest,
- aom_noise_strength_solver_t *src) {
- equation_system_add(&dest->eqns, &src->eqns);
- dest->num_equations += src->num_equations;
- dest->total += src->total;
-}
-
-// Return the number of coefficients required for the given parameters
-static int num_coeffs(const aom_noise_model_params_t params) {
- const int n = 2 * params.lag + 1;
- switch (params.shape) {
- case AOM_NOISE_SHAPE_DIAMOND: return params.lag * (params.lag + 1);
- case AOM_NOISE_SHAPE_SQUARE: return (n * n) / 2;
- }
- return 0;
-}
-
-static int noise_state_init(aom_noise_state_t *state, int n, int bit_depth) {
- const int kNumBins = 20;
- if (!equation_system_init(&state->eqns, n)) {
- fprintf(stderr, "Failed initialization noise state with size %d\n", n);
- return 0;
- }
- state->ar_gain = 1.0;
- state->num_observations = 0;
- return aom_noise_strength_solver_init(&state->strength_solver, kNumBins,
- bit_depth);
-}
-
-static void set_chroma_coefficient_fallback_soln(aom_equation_system_t *eqns) {
- const double kTolerance = 1e-6;
- const int last = eqns->n - 1;
- // Set all of the AR coefficients to zero, but try to solve for correlation
- // with the luma channel
- memset(eqns->x, 0, sizeof(*eqns->x) * eqns->n);
- if (fabs(eqns->A[last * eqns->n + last]) > kTolerance) {
- eqns->x[last] = eqns->b[last] / eqns->A[last * eqns->n + last];
- }
-}
-
-int aom_noise_strength_lut_init(aom_noise_strength_lut_t *lut, int num_points) {
- if (!lut) return 0;
- lut->points = (double(*)[2])aom_malloc(num_points * sizeof(*lut->points));
- if (!lut->points) return 0;
- lut->num_points = num_points;
- memset(lut->points, 0, sizeof(*lut->points) * num_points);
- return 1;
-}
-
-void aom_noise_strength_lut_free(aom_noise_strength_lut_t *lut) {
- if (!lut) return;
- aom_free(lut->points);
- memset(lut, 0, sizeof(*lut));
-}
-
-double aom_noise_strength_lut_eval(const aom_noise_strength_lut_t *lut,
- double x) {
- int i = 0;
- // Constant extrapolation for x < x_0.
- if (x < lut->points[0][0]) return lut->points[0][1];
- for (i = 0; i < lut->num_points - 1; ++i) {
- if (x >= lut->points[i][0] && x <= lut->points[i + 1][0]) {
- const double a =
- (x - lut->points[i][0]) / (lut->points[i + 1][0] - lut->points[i][0]);
- return lut->points[i + 1][1] * a + lut->points[i][1] * (1.0 - a);
- }
- }
- // Constant extrapolation for x > x_{n-1}
- return lut->points[lut->num_points - 1][1];
-}
-
-static double noise_strength_solver_get_bin_index(
- const aom_noise_strength_solver_t *solver, double value) {
- const double val =
- fclamp(value, solver->min_intensity, solver->max_intensity);
- const double range = solver->max_intensity - solver->min_intensity;
- return (solver->num_bins - 1) * (val - solver->min_intensity) / range;
-}
-
-static double noise_strength_solver_get_value(
- const aom_noise_strength_solver_t *solver, double x) {
- const double bin = noise_strength_solver_get_bin_index(solver, x);
- const int bin_i0 = (int)floor(bin);
- const int bin_i1 = AOMMIN(solver->num_bins - 1, bin_i0 + 1);
- const double a = bin - bin_i0;
- return (1.0 - a) * solver->eqns.x[bin_i0] + a * solver->eqns.x[bin_i1];
-}
-
-void aom_noise_strength_solver_add_measurement(
- aom_noise_strength_solver_t *solver, double block_mean, double noise_std) {
- const double bin = noise_strength_solver_get_bin_index(solver, block_mean);
- const int bin_i0 = (int)floor(bin);
- const int bin_i1 = AOMMIN(solver->num_bins - 1, bin_i0 + 1);
- const double a = bin - bin_i0;
- const int n = solver->num_bins;
- solver->eqns.A[bin_i0 * n + bin_i0] += (1.0 - a) * (1.0 - a);
- solver->eqns.A[bin_i1 * n + bin_i0] += a * (1.0 - a);
- solver->eqns.A[bin_i1 * n + bin_i1] += a * a;
- solver->eqns.A[bin_i0 * n + bin_i1] += a * (1.0 - a);
- solver->eqns.b[bin_i0] += (1.0 - a) * noise_std;
- solver->eqns.b[bin_i1] += a * noise_std;
- solver->total += noise_std;
- solver->num_equations++;
-}
-
-int aom_noise_strength_solver_solve(aom_noise_strength_solver_t *solver) {
- // Add regularization proportional to the number of constraints
- const int n = solver->num_bins;
- const double kAlpha = 2.0 * (double)(solver->num_equations) / n;
- int result = 0;
- double mean = 0;
-
- // Do this in a non-destructive manner so it is not confusing to the caller
- double *old_A = solver->eqns.A;
- double *A = (double *)aom_malloc(sizeof(*A) * n * n);
- if (!A) {
- fprintf(stderr, "Unable to allocate copy of A\n");
- return 0;
- }
- memcpy(A, old_A, sizeof(*A) * n * n);
-
- for (int i = 0; i < n; ++i) {
- const int i_lo = AOMMAX(0, i - 1);
- const int i_hi = AOMMIN(n - 1, i + 1);
- A[i * n + i_lo] -= kAlpha;
- A[i * n + i] += 2 * kAlpha;
- A[i * n + i_hi] -= kAlpha;
- }
-
- // Small regularization to give average noise strength
- mean = solver->total / solver->num_equations;
- for (int i = 0; i < n; ++i) {
- A[i * n + i] += 1.0 / 8192.;
- solver->eqns.b[i] += mean / 8192.;
- }
- solver->eqns.A = A;
- result = equation_system_solve(&solver->eqns);
- solver->eqns.A = old_A;
-
- aom_free(A);
- return result;
-}
-
-int aom_noise_strength_solver_init(aom_noise_strength_solver_t *solver,
- int num_bins, int bit_depth) {
- if (!solver) return 0;
- memset(solver, 0, sizeof(*solver));
- solver->num_bins = num_bins;
- solver->min_intensity = 0;
- solver->max_intensity = (1 << bit_depth) - 1;
- solver->total = 0;
- solver->num_equations = 0;
- return equation_system_init(&solver->eqns, num_bins);
-}
-
-void aom_noise_strength_solver_free(aom_noise_strength_solver_t *solver) {
- if (!solver) return;
- equation_system_free(&solver->eqns);
-}
-
-double aom_noise_strength_solver_get_center(
- const aom_noise_strength_solver_t *solver, int i) {
- const double range = solver->max_intensity - solver->min_intensity;
- const int n = solver->num_bins;
- return ((double)i) / (n - 1) * range + solver->min_intensity;
-}
-
-// Computes the residual if a point were to be removed from the lut. This is
-// calculated as the area between the output of the solver and the line segment
-// that would be formed between [x_{i - 1}, x_{i + 1}).
-static void update_piecewise_linear_residual(
- const aom_noise_strength_solver_t *solver,
- const aom_noise_strength_lut_t *lut, double *residual, int start, int end) {
- const double dx = 255. / solver->num_bins;
- for (int i = AOMMAX(start, 1); i < AOMMIN(end, lut->num_points - 1); ++i) {
- const int lower = AOMMAX(0, (int)floor(noise_strength_solver_get_bin_index(
- solver, lut->points[i - 1][0])));
- const int upper = AOMMIN(solver->num_bins - 1,
- (int)ceil(noise_strength_solver_get_bin_index(
- solver, lut->points[i + 1][0])));
- double r = 0;
- for (int j = lower; j <= upper; ++j) {
- const double x = aom_noise_strength_solver_get_center(solver, j);
- if (x < lut->points[i - 1][0]) continue;
- if (x >= lut->points[i + 1][0]) continue;
- const double y = solver->eqns.x[j];
- const double a = (x - lut->points[i - 1][0]) /
- (lut->points[i + 1][0] - lut->points[i - 1][0]);
- const double estimate_y =
- lut->points[i - 1][1] * (1.0 - a) + lut->points[i + 1][1] * a;
- r += fabs(y - estimate_y);
- }
- residual[i] = r * dx;
- }
-}
-
-int aom_noise_strength_solver_fit_piecewise(
- const aom_noise_strength_solver_t *solver, int max_output_points,
- aom_noise_strength_lut_t *lut) {
- // The tolerance is normalized to be give consistent results between
- // different bit-depths.
- const double kTolerance = solver->max_intensity * 0.00625 / 255.0;
- if (!aom_noise_strength_lut_init(lut, solver->num_bins)) {
- fprintf(stderr, "Failed to init lut\n");
- return 0;
- }
- for (int i = 0; i < solver->num_bins; ++i) {
- lut->points[i][0] = aom_noise_strength_solver_get_center(solver, i);
- lut->points[i][1] = solver->eqns.x[i];
- }
- if (max_output_points < 0) {
- max_output_points = solver->num_bins;
- }
-
- double *residual = aom_malloc(solver->num_bins * sizeof(*residual));
- memset(residual, 0, sizeof(*residual) * solver->num_bins);
-
- update_piecewise_linear_residual(solver, lut, residual, 0, solver->num_bins);
-
- // Greedily remove points if there are too many or if it doesn't hurt local
- // approximation (never remove the end points)
- while (lut->num_points > 2) {
- int min_index = 1;
- for (int j = 1; j < lut->num_points - 1; ++j) {
- if (residual[j] < residual[min_index]) {
- min_index = j;
- }
- }
- const double dx =
- lut->points[min_index + 1][0] - lut->points[min_index - 1][0];
- const double avg_residual = residual[min_index] / dx;
- if (lut->num_points <= max_output_points && avg_residual > kTolerance) {
- break;
- }
-
- const int num_remaining = lut->num_points - min_index - 1;
- memmove(lut->points + min_index, lut->points + min_index + 1,
- sizeof(lut->points[0]) * num_remaining);
- lut->num_points--;
-
- update_piecewise_linear_residual(solver, lut, residual, min_index - 1,
- min_index + 1);
- }
- aom_free(residual);
- return 1;
-}
-
-int aom_flat_block_finder_init(aom_flat_block_finder_t *block_finder,
- int block_size, int bit_depth, int use_highbd) {
- const int n = block_size * block_size;
- aom_equation_system_t eqns;
- double *AtA_inv = 0;
- double *A = 0;
- int x = 0, y = 0, i = 0, j = 0;
- if (!equation_system_init(&eqns, kLowPolyNumParams)) {
- fprintf(stderr, "Failed to init equation system for block_size=%d\n",
- block_size);
- return 0;
- }
-
- AtA_inv = (double *)aom_malloc(kLowPolyNumParams * kLowPolyNumParams *
- sizeof(*AtA_inv));
- A = (double *)aom_malloc(kLowPolyNumParams * n * sizeof(*A));
- if (AtA_inv == NULL || A == NULL) {
- fprintf(stderr, "Failed to alloc A or AtA_inv for block_size=%d\n",
- block_size);
- aom_free(AtA_inv);
- aom_free(A);
- equation_system_free(&eqns);
- return 0;
- }
-
- block_finder->A = A;
- block_finder->AtA_inv = AtA_inv;
- block_finder->block_size = block_size;
- block_finder->normalization = (1 << bit_depth) - 1;
- block_finder->use_highbd = use_highbd;
-
- for (y = 0; y < block_size; ++y) {
- const double yd = ((double)y - block_size / 2.) / (block_size / 2.);
- for (x = 0; x < block_size; ++x) {
- const double xd = ((double)x - block_size / 2.) / (block_size / 2.);
- const double coords[3] = { yd, xd, 1 };
- const int row = y * block_size + x;
- A[kLowPolyNumParams * row + 0] = yd;
- A[kLowPolyNumParams * row + 1] = xd;
- A[kLowPolyNumParams * row + 2] = 1;
-
- for (i = 0; i < kLowPolyNumParams; ++i) {
- for (j = 0; j < kLowPolyNumParams; ++j) {
- eqns.A[kLowPolyNumParams * i + j] += coords[i] * coords[j];
- }
- }
- }
- }
-
- // Lazy inverse using existing equation solver.
- for (i = 0; i < kLowPolyNumParams; ++i) {
- memset(eqns.b, 0, sizeof(*eqns.b) * kLowPolyNumParams);
- eqns.b[i] = 1;
- equation_system_solve(&eqns);
-
- for (j = 0; j < kLowPolyNumParams; ++j) {
- AtA_inv[j * kLowPolyNumParams + i] = eqns.x[j];
- }
- }
- equation_system_free(&eqns);
- return 1;
-}
-
-void aom_flat_block_finder_free(aom_flat_block_finder_t *block_finder) {
- if (!block_finder) return;
- aom_free(block_finder->A);
- aom_free(block_finder->AtA_inv);
- memset(block_finder, 0, sizeof(*block_finder));
-}
-
-void aom_flat_block_finder_extract_block(
- const aom_flat_block_finder_t *block_finder, const uint8_t *const data,
- int w, int h, int stride, int offsx, int offsy, double *plane,
- double *block) {
- const int block_size = block_finder->block_size;
- const int n = block_size * block_size;
- const double *A = block_finder->A;
- const double *AtA_inv = block_finder->AtA_inv;
- double plane_coords[kLowPolyNumParams];
- double AtA_inv_b[kLowPolyNumParams];
- int xi, yi, i;
-
- if (block_finder->use_highbd) {
- const uint16_t *const data16 = (const uint16_t *const)data;
- for (yi = 0; yi < block_size; ++yi) {
- const int y = clamp(offsy + yi, 0, h - 1);
- for (xi = 0; xi < block_size; ++xi) {
- const int x = clamp(offsx + xi, 0, w - 1);
- block[yi * block_size + xi] =
- ((double)data16[y * stride + x]) / block_finder->normalization;
- }
- }
- } else {
- for (yi = 0; yi < block_size; ++yi) {
- const int y = clamp(offsy + yi, 0, h - 1);
- for (xi = 0; xi < block_size; ++xi) {
- const int x = clamp(offsx + xi, 0, w - 1);
- block[yi * block_size + xi] =
- ((double)data[y * stride + x]) / block_finder->normalization;
- }
- }
- }
- multiply_mat(block, A, AtA_inv_b, 1, n, kLowPolyNumParams);
- multiply_mat(AtA_inv, AtA_inv_b, plane_coords, kLowPolyNumParams,
- kLowPolyNumParams, 1);
- multiply_mat(A, plane_coords, plane, n, kLowPolyNumParams, 1);
-
- for (i = 0; i < n; ++i) {
- block[i] -= plane[i];
- }
-}
-
-typedef struct {
- int index;
- float score;
-} index_and_score_t;
-
-static int compare_scores(const void *a, const void *b) {
- const float diff =
- ((index_and_score_t *)a)->score - ((index_and_score_t *)b)->score;
- if (diff < 0)
- return -1;
- else if (diff > 0)
- return 1;
- return 0;
-}
-
-int aom_flat_block_finder_run(const aom_flat_block_finder_t *block_finder,
- const uint8_t *const data, int w, int h,
- int stride, uint8_t *flat_blocks) {
- // The gradient-based features used in this code are based on:
- // A. Kokaram, D. Kelly, H. Denman and A. Crawford, "Measuring noise
- // correlation for improved video denoising," 2012 19th, ICIP.
- // The thresholds are more lenient to allow for correct grain modeling
- // if extreme cases.
- const int block_size = block_finder->block_size;
- const int n = block_size * block_size;
- const double kTraceThreshold = 0.15 / (32 * 32);
- const double kRatioThreshold = 1.25;
- const double kNormThreshold = 0.08 / (32 * 32);
- const double kVarThreshold = 0.005 / (double)n;
- const int num_blocks_w = (w + block_size - 1) / block_size;
- const int num_blocks_h = (h + block_size - 1) / block_size;
- int num_flat = 0;
- int bx = 0, by = 0;
- double *plane = (double *)aom_malloc(n * sizeof(*plane));
- double *block = (double *)aom_malloc(n * sizeof(*block));
- index_and_score_t *scores = (index_and_score_t *)aom_malloc(
- num_blocks_w * num_blocks_h * sizeof(*scores));
- if (plane == NULL || block == NULL || scores == NULL) {
- fprintf(stderr, "Failed to allocate memory for block of size %d\n", n);
- aom_free(plane);
- aom_free(block);
- aom_free(scores);
- return -1;
- }
-
-#ifdef NOISE_MODEL_LOG_SCORE
- fprintf(stderr, "score = [");
-#endif
- for (by = 0; by < num_blocks_h; ++by) {
- for (bx = 0; bx < num_blocks_w; ++bx) {
- // Compute gradient covariance matrix.
- double Gxx = 0, Gxy = 0, Gyy = 0;
- double var = 0;
- double mean = 0;
- int xi, yi;
- aom_flat_block_finder_extract_block(block_finder, data, w, h, stride,
- bx * block_size, by * block_size,
- plane, block);
-
- for (yi = 1; yi < block_size - 1; ++yi) {
- for (xi = 1; xi < block_size - 1; ++xi) {
- const double gx = (block[yi * block_size + xi + 1] -
- block[yi * block_size + xi - 1]) /
- 2;
- const double gy = (block[yi * block_size + xi + block_size] -
- block[yi * block_size + xi - block_size]) /
- 2;
- Gxx += gx * gx;
- Gxy += gx * gy;
- Gyy += gy * gy;
-
- mean += block[yi * block_size + xi];
- var += block[yi * block_size + xi] * block[yi * block_size + xi];
- }
- }
- mean /= (block_size - 2) * (block_size - 2);
-
- // Normalize gradients by block_size.
- Gxx /= ((block_size - 2) * (block_size - 2));
- Gxy /= ((block_size - 2) * (block_size - 2));
- Gyy /= ((block_size - 2) * (block_size - 2));
- var = var / ((block_size - 2) * (block_size - 2)) - mean * mean;
-
- {
- const double trace = Gxx + Gyy;
- const double det = Gxx * Gyy - Gxy * Gxy;
- const double e1 = (trace + sqrt(trace * trace - 4 * det)) / 2.;
- const double e2 = (trace - sqrt(trace * trace - 4 * det)) / 2.;
- const double norm = e1; // Spectral norm
- const double ratio = (e1 / AOMMAX(e2, 1e-6));
- const int is_flat = (trace < kTraceThreshold) &&
- (ratio < kRatioThreshold) &&
- (norm < kNormThreshold) && (var > kVarThreshold);
- // The following weights are used to combine the above features to give
- // a sigmoid score for flatness. If the input was normalized to [0,100]
- // the magnitude of these values would be close to 1 (e.g., weights
- // corresponding to variance would be a factor of 10000x smaller).
- // The weights are given in the following order:
- // [{var}, {ratio}, {trace}, {norm}, offset]
- // with one of the most discriminative being simply the variance.
- const double weights[5] = { -6682, -0.2056, 13087, -12434, 2.5694 };
- const float score =
- (float)(1.0 / (1 + exp(-(weights[0] * var + weights[1] * ratio +
- weights[2] * trace + weights[3] * norm +
- weights[4]))));
- flat_blocks[by * num_blocks_w + bx] = is_flat ? 255 : 0;
- scores[by * num_blocks_w + bx].score = var > kVarThreshold ? score : 0;
- scores[by * num_blocks_w + bx].index = by * num_blocks_w + bx;
-#ifdef NOISE_MODEL_LOG_SCORE
- fprintf(stderr, "%g %g %g %g %g %d ", score, var, ratio, trace, norm,
- is_flat);
-#endif
- num_flat += is_flat;
- }
- }
-#ifdef NOISE_MODEL_LOG_SCORE
- fprintf(stderr, "\n");
-#endif
- }
-#ifdef NOISE_MODEL_LOG_SCORE
- fprintf(stderr, "];\n");
-#endif
- // Find the top-scored blocks (most likely to be flat) and set the flat blocks
- // be the union of the thresholded results and the top 10th percentile of the
- // scored results.
- qsort(scores, num_blocks_w * num_blocks_h, sizeof(*scores), &compare_scores);
- const int top_nth_percentile = num_blocks_w * num_blocks_h * 90 / 100;
- const float score_threshold = scores[top_nth_percentile].score;
- for (int i = 0; i < num_blocks_w * num_blocks_h; ++i) {
- if (scores[i].score >= score_threshold) {
- num_flat += flat_blocks[scores[i].index] == 0;
- flat_blocks[scores[i].index] |= 1;
- }
- }
- aom_free(block);
- aom_free(plane);
- aom_free(scores);
- return num_flat;
-}
-
-int aom_noise_model_init(aom_noise_model_t *model,
- const aom_noise_model_params_t params) {
- const int n = num_coeffs(params);
- const int lag = params.lag;
- const int bit_depth = params.bit_depth;
- int x = 0, y = 0, i = 0, c = 0;
-
- memset(model, 0, sizeof(*model));
- if (params.lag < 1) {
- fprintf(stderr, "Invalid noise param: lag = %d must be >= 1\n", params.lag);
- return 0;
- }
- if (params.lag > kMaxLag) {
- fprintf(stderr, "Invalid noise param: lag = %d must be <= %d\n", params.lag,
- kMaxLag);
- return 0;
- }
-
- memcpy(&model->params, &params, sizeof(params));
- for (c = 0; c < 3; ++c) {
- if (!noise_state_init(&model->combined_state[c], n + (c > 0), bit_depth)) {
- fprintf(stderr, "Failed to allocate noise state for channel %d\n", c);
- aom_noise_model_free(model);
- return 0;
- }
- if (!noise_state_init(&model->latest_state[c], n + (c > 0), bit_depth)) {
- fprintf(stderr, "Failed to allocate noise state for channel %d\n", c);
- aom_noise_model_free(model);
- return 0;
- }
- }
- model->n = n;
- model->coords = (int(*)[2])aom_malloc(sizeof(*model->coords) * n);
-
- for (y = -lag; y <= 0; ++y) {
- const int max_x = y == 0 ? -1 : lag;
- for (x = -lag; x <= max_x; ++x) {
- switch (params.shape) {
- case AOM_NOISE_SHAPE_DIAMOND:
- if (abs(x) <= y + lag) {
- model->coords[i][0] = x;
- model->coords[i][1] = y;
- ++i;
- }
- break;
- case AOM_NOISE_SHAPE_SQUARE:
- model->coords[i][0] = x;
- model->coords[i][1] = y;
- ++i;
- break;
- default:
- fprintf(stderr, "Invalid shape\n");
- aom_noise_model_free(model);
- return 0;
- }
- }
- }
- assert(i == n);
- return 1;
-}
-
-void aom_noise_model_free(aom_noise_model_t *model) {
- int c = 0;
- if (!model) return;
-
- aom_free(model->coords);
- for (c = 0; c < 3; ++c) {
- equation_system_free(&model->latest_state[c].eqns);
- equation_system_free(&model->combined_state[c].eqns);
-
- equation_system_free(&model->latest_state[c].strength_solver.eqns);
- equation_system_free(&model->combined_state[c].strength_solver.eqns);
- }
- memset(model, 0, sizeof(*model));
-}
-
-// Extracts the neighborhood defined by coords around point (x, y) from
-// the difference between the data and denoised images. Also extracts the
-// entry (possibly downsampled) for (x, y) in the alt_data (e.g., luma).
-#define EXTRACT_AR_ROW(INT_TYPE, suffix) \
- static double extract_ar_row_##suffix( \
- int(*coords)[2], int num_coords, const INT_TYPE *const data, \
- const INT_TYPE *const denoised, int stride, int sub_log2[2], \
- const INT_TYPE *const alt_data, const INT_TYPE *const alt_denoised, \
- int alt_stride, int x, int y, double *buffer) { \
- for (int i = 0; i < num_coords; ++i) { \
- const int x_i = x + coords[i][0], y_i = y + coords[i][1]; \
- buffer[i] = \
- (double)data[y_i * stride + x_i] - denoised[y_i * stride + x_i]; \
- } \
- const double val = \
- (double)data[y * stride + x] - denoised[y * stride + x]; \
- \
- if (alt_data && alt_denoised) { \
- double avg_data = 0, avg_denoised = 0; \
- int num_samples = 0; \
- for (int dy_i = 0; dy_i < (1 << sub_log2[1]); dy_i++) { \
- const int y_up = (y << sub_log2[1]) + dy_i; \
- for (int dx_i = 0; dx_i < (1 << sub_log2[0]); dx_i++) { \
- const int x_up = (x << sub_log2[0]) + dx_i; \
- avg_data += alt_data[y_up * alt_stride + x_up]; \
- avg_denoised += alt_denoised[y_up * alt_stride + x_up]; \
- num_samples++; \
- } \
- } \
- buffer[num_coords] = (avg_data - avg_denoised) / num_samples; \
- } \
- return val; \
- }
-
-EXTRACT_AR_ROW(uint8_t, lowbd);
-EXTRACT_AR_ROW(uint16_t, highbd);
-
-static int add_block_observations(
- aom_noise_model_t *noise_model, int c, const uint8_t *const data,
- const uint8_t *const denoised, int w, int h, int stride, int sub_log2[2],
- const uint8_t *const alt_data, const uint8_t *const alt_denoised,
- int alt_stride, const uint8_t *const flat_blocks, int block_size,
- int num_blocks_w, int num_blocks_h) {
- const int lag = noise_model->params.lag;
- const int num_coords = noise_model->n;
- const double normalization = (1 << noise_model->params.bit_depth) - 1;
- double *A = noise_model->latest_state[c].eqns.A;
- double *b = noise_model->latest_state[c].eqns.b;
- double *buffer = (double *)aom_malloc(sizeof(*buffer) * (num_coords + 1));
- const int n = noise_model->latest_state[c].eqns.n;
-
- if (!buffer) {
- fprintf(stderr, "Unable to allocate buffer of size %d\n", num_coords + 1);
- return 0;
- }
- for (int by = 0; by < num_blocks_h; ++by) {
- const int y_o = by * (block_size >> sub_log2[1]);
- for (int bx = 0; bx < num_blocks_w; ++bx) {
- const int x_o = bx * (block_size >> sub_log2[0]);
- if (!flat_blocks[by * num_blocks_w + bx]) {
- continue;
- }
- int y_start =
- (by > 0 && flat_blocks[(by - 1) * num_blocks_w + bx]) ? 0 : lag;
- int x_start =
- (bx > 0 && flat_blocks[by * num_blocks_w + bx - 1]) ? 0 : lag;
- int y_end = AOMMIN((h >> sub_log2[1]) - by * (block_size >> sub_log2[1]),
- block_size >> sub_log2[1]);
- int x_end = AOMMIN(
- (w >> sub_log2[0]) - bx * (block_size >> sub_log2[0]) - lag,
- (bx + 1 < num_blocks_w && flat_blocks[by * num_blocks_w + bx + 1])
- ? (block_size >> sub_log2[0])
- : ((block_size >> sub_log2[0]) - lag));
- for (int y = y_start; y < y_end; ++y) {
- for (int x = x_start; x < x_end; ++x) {
- const double val =
- noise_model->params.use_highbd
- ? extract_ar_row_highbd(noise_model->coords, num_coords,
- (const uint16_t *const)data,
- (const uint16_t *const)denoised,
- stride, sub_log2,
- (const uint16_t *const)alt_data,
- (const uint16_t *const)alt_denoised,
- alt_stride, x + x_o, y + y_o, buffer)
- : extract_ar_row_lowbd(noise_model->coords, num_coords, data,
- denoised, stride, sub_log2, alt_data,
- alt_denoised, alt_stride, x + x_o,
- y + y_o, buffer);
- for (int i = 0; i < n; ++i) {
- for (int j = 0; j < n; ++j) {
- A[i * n + j] +=
- (buffer[i] * buffer[j]) / (normalization * normalization);
- }
- b[i] += (buffer[i] * val) / (normalization * normalization);
- }
- noise_model->latest_state[c].num_observations++;
- }
- }
- }
- }
- aom_free(buffer);
- return 1;
-}
-
-static void add_noise_std_observations(
- aom_noise_model_t *noise_model, int c, const double *coeffs,
- const uint8_t *const data, const uint8_t *const denoised, int w, int h,
- int stride, int sub_log2[2], const uint8_t *const alt_data, int alt_stride,
- const uint8_t *const flat_blocks, int block_size, int num_blocks_w,
- int num_blocks_h) {
- const int num_coords = noise_model->n;
- aom_noise_strength_solver_t *noise_strength_solver =
- &noise_model->latest_state[c].strength_solver;
-
- const aom_noise_strength_solver_t *noise_strength_luma =
- &noise_model->latest_state[0].strength_solver;
- const double luma_gain = noise_model->latest_state[0].ar_gain;
- const double noise_gain = noise_model->latest_state[c].ar_gain;
- for (int by = 0; by < num_blocks_h; ++by) {
- const int y_o = by * (block_size >> sub_log2[1]);
- for (int bx = 0; bx < num_blocks_w; ++bx) {
- const int x_o = bx * (block_size >> sub_log2[0]);
- if (!flat_blocks[by * num_blocks_w + bx]) {
- continue;
- }
- const int num_samples_h =
- AOMMIN((h >> sub_log2[1]) - by * (block_size >> sub_log2[1]),
- block_size >> sub_log2[1]);
- const int num_samples_w =
- AOMMIN((w >> sub_log2[0]) - bx * (block_size >> sub_log2[0]),
- (block_size >> sub_log2[0]));
- // Make sure that we have a reasonable amount of samples to consider the
- // block
- if (num_samples_w * num_samples_h > block_size) {
- const double block_mean = get_block_mean(
- alt_data ? alt_data : data, w, h, alt_data ? alt_stride : stride,
- x_o << sub_log2[0], y_o << sub_log2[1], block_size,
- noise_model->params.use_highbd);
- const double noise_var = get_noise_var(
- data, denoised, stride, w >> sub_log2[0], h >> sub_log2[1], x_o,
- y_o, block_size >> sub_log2[0], block_size >> sub_log2[1],
- noise_model->params.use_highbd);
- // We want to remove the part of the noise that came from being
- // correlated with luma. Note that the noise solver for luma must
- // have already been run.
- const double luma_strength =
- c > 0 ? luma_gain * noise_strength_solver_get_value(
- noise_strength_luma, block_mean)
- : 0;
- const double corr = c > 0 ? coeffs[num_coords] : 0;
- // Chroma noise:
- // N(0, noise_var) = N(0, uncorr_var) + corr * N(0, luma_strength^2)
- // The uncorrelated component:
- // uncorr_var = noise_var - (corr * luma_strength)^2
- // But don't allow fully correlated noise (hence the max), since the
- // synthesis cannot model it.
- const double uncorr_std = sqrt(
- AOMMAX(noise_var / 16, noise_var - pow(corr * luma_strength, 2)));
- // After we've removed correlation with luma, undo the gain that will
- // come from running the IIR filter.
- const double adjusted_strength = uncorr_std / noise_gain;
- aom_noise_strength_solver_add_measurement(
- noise_strength_solver, block_mean, adjusted_strength);
- }
- }
- }
-}
-
-// Return true if the noise estimate appears to be different from the combined
-// (multi-frame) estimate. The difference is measured by checking whether the
-// AR coefficients have diverged (using a threshold on normalized cross
-// correlation), or whether the noise strength has changed.
-static int is_noise_model_different(aom_noise_model_t *const noise_model) {
- // These thresholds are kind of arbitrary and will likely need further tuning
- // (or exported as parameters). The threshold on noise strength is a weighted
- // difference between the noise strength histograms
- const double kCoeffThreshold = 0.9;
- const double kStrengthThreshold =
- 0.005 * (1 << (noise_model->params.bit_depth - 8));
- for (int c = 0; c < 1; ++c) {
- const double corr =
- aom_normalized_cross_correlation(noise_model->latest_state[c].eqns.x,
- noise_model->combined_state[c].eqns.x,
- noise_model->combined_state[c].eqns.n);
- if (corr < kCoeffThreshold) return 1;
-
- const double dx =
- 1.0 / noise_model->latest_state[c].strength_solver.num_bins;
-
- const aom_equation_system_t *latest_eqns =
- &noise_model->latest_state[c].strength_solver.eqns;
- const aom_equation_system_t *combined_eqns =
- &noise_model->combined_state[c].strength_solver.eqns;
- double diff = 0;
- double total_weight = 0;
- for (int j = 0; j < latest_eqns->n; ++j) {
- double weight = 0;
- for (int i = 0; i < latest_eqns->n; ++i) {
- weight += latest_eqns->A[i * latest_eqns->n + j];
- }
- weight = sqrt(weight);
- diff += weight * fabs(latest_eqns->x[j] - combined_eqns->x[j]);
- total_weight += weight;
- }
- if (diff * dx / total_weight > kStrengthThreshold) return 1;
- }
- return 0;
-}
-
-static int ar_equation_system_solve(aom_noise_state_t *state, int is_chroma) {
- const int ret = equation_system_solve(&state->eqns);
- state->ar_gain = 1.0;
- if (!ret) return ret;
-
- // Update the AR gain from the equation system as it will be used to fit
- // the noise strength as a function of intensity. In the Yule-Walker
- // equations, the diagonal should be the variance of the correlated noise.
- // In the case of the least squares estimate, there will be some variability
- // in the diagonal. So use the mean of the diagonal as the estimate of
- // overall variance (this works for least squares or Yule-Walker formulation).
- double var = 0;
- const int n = state->eqns.n;
- for (int i = 0; i < (state->eqns.n - is_chroma); ++i) {
- var += state->eqns.A[i * n + i] / state->num_observations;
- }
- var /= (n - is_chroma);
-
- // Keep track of E(Y^2) = <b, x> + E(X^2)
- // In the case that we are using chroma and have an estimate of correlation
- // with luma we adjust that estimate slightly to remove the correlated bits by
- // subtracting out the last column of a scaled by our correlation estimate
- // from b. E(y^2) = <b - A(:, end)*x(end), x>
- double sum_covar = 0;
- for (int i = 0; i < state->eqns.n - is_chroma; ++i) {
- double bi = state->eqns.b[i];
- if (is_chroma) {
- bi -= state->eqns.A[i * n + (n - 1)] * state->eqns.x[n - 1];
- }
- sum_covar += (bi * state->eqns.x[i]) / state->num_observations;
- }
- // Now, get an estimate of the variance of uncorrelated noise signal and use
- // it to determine the gain of the AR filter.
- const double noise_var = AOMMAX(var - sum_covar, 1e-6);
- state->ar_gain = AOMMAX(1, sqrt(AOMMAX(var / noise_var, 1e-6)));
- return ret;
-}
-
-aom_noise_status_t aom_noise_model_update(
- aom_noise_model_t *const noise_model, const uint8_t *const data[3],
- const uint8_t *const denoised[3], int w, int h, int stride[3],
- int chroma_sub_log2[2], const uint8_t *const flat_blocks, int block_size) {
- const int num_blocks_w = (w + block_size - 1) / block_size;
- const int num_blocks_h = (h + block_size - 1) / block_size;
- int y_model_different = 0;
- int num_blocks = 0;
- int i = 0, channel = 0;
-
- if (block_size <= 1) {
- fprintf(stderr, "block_size = %d must be > 1\n", block_size);
- return AOM_NOISE_STATUS_INVALID_ARGUMENT;
- }
-
- if (block_size < noise_model->params.lag * 2 + 1) {
- fprintf(stderr, "block_size = %d must be >= %d\n", block_size,
- noise_model->params.lag * 2 + 1);
- return AOM_NOISE_STATUS_INVALID_ARGUMENT;
- }
-
- // Clear the latest equation system
- for (i = 0; i < 3; ++i) {
- equation_system_clear(&noise_model->latest_state[i].eqns);
- noise_model->latest_state[i].num_observations = 0;
- noise_strength_solver_clear(&noise_model->latest_state[i].strength_solver);
- }
-
- // Check that we have enough flat blocks
- for (i = 0; i < num_blocks_h * num_blocks_w; ++i) {
- if (flat_blocks[i]) {
- num_blocks++;
- }
- }
-
- if (num_blocks <= 1) {
- fprintf(stderr, "Not enough flat blocks to update noise estimate\n");
- return AOM_NOISE_STATUS_INSUFFICIENT_FLAT_BLOCKS;
- }
-
- for (channel = 0; channel < 3; ++channel) {
- int no_subsampling[2] = { 0, 0 };
- const uint8_t *alt_data = channel > 0 ? data[0] : 0;
- const uint8_t *alt_denoised = channel > 0 ? denoised[0] : 0;
- int *sub = channel > 0 ? chroma_sub_log2 : no_subsampling;
- const int is_chroma = channel != 0;
- if (!data[channel] || !denoised[channel]) break;
- if (!add_block_observations(noise_model, channel, data[channel],
- denoised[channel], w, h, stride[channel], sub,
- alt_data, alt_denoised, stride[0], flat_blocks,
- block_size, num_blocks_w, num_blocks_h)) {
- fprintf(stderr, "Adding block observation failed\n");
- return AOM_NOISE_STATUS_INTERNAL_ERROR;
- }
-
- if (!ar_equation_system_solve(&noise_model->latest_state[channel],
- is_chroma)) {
- if (is_chroma) {
- set_chroma_coefficient_fallback_soln(
- &noise_model->latest_state[channel].eqns);
- } else {
- fprintf(stderr, "Solving latest noise equation system failed %d!\n",
- channel);
- return AOM_NOISE_STATUS_INTERNAL_ERROR;
- }
- }
-
- add_noise_std_observations(
- noise_model, channel, noise_model->latest_state[channel].eqns.x,
- data[channel], denoised[channel], w, h, stride[channel], sub, alt_data,
- stride[0], flat_blocks, block_size, num_blocks_w, num_blocks_h);
-
- if (!aom_noise_strength_solver_solve(
- &noise_model->latest_state[channel].strength_solver)) {
- fprintf(stderr, "Solving latest noise strength failed!\n");
- return AOM_NOISE_STATUS_INTERNAL_ERROR;
- }
-
- // Check noise characteristics and return if error.
- if (channel == 0 &&
- noise_model->combined_state[channel].strength_solver.num_equations >
- 0 &&
- is_noise_model_different(noise_model)) {
- y_model_different = 1;
- }
-
- // Don't update the combined stats if the y model is different.
- if (y_model_different) continue;
-
- noise_model->combined_state[channel].num_observations +=
- noise_model->latest_state[channel].num_observations;
- equation_system_add(&noise_model->combined_state[channel].eqns,
- &noise_model->latest_state[channel].eqns);
- if (!ar_equation_system_solve(&noise_model->combined_state[channel],
- is_chroma)) {
- if (is_chroma) {
- set_chroma_coefficient_fallback_soln(
- &noise_model->combined_state[channel].eqns);
- } else {
- fprintf(stderr, "Solving combined noise equation system failed %d!\n",
- channel);
- return AOM_NOISE_STATUS_INTERNAL_ERROR;
- }
- }
-
- noise_strength_solver_add(
- &noise_model->combined_state[channel].strength_solver,
- &noise_model->latest_state[channel].strength_solver);
-
- if (!aom_noise_strength_solver_solve(
- &noise_model->combined_state[channel].strength_solver)) {
- fprintf(stderr, "Solving combined noise strength failed!\n");
- return AOM_NOISE_STATUS_INTERNAL_ERROR;
- }
- }
-
- return y_model_different ? AOM_NOISE_STATUS_DIFFERENT_NOISE_TYPE
- : AOM_NOISE_STATUS_OK;
-}
-
-void aom_noise_model_save_latest(aom_noise_model_t *noise_model) {
- for (int c = 0; c < 3; c++) {
- equation_system_copy(&noise_model->combined_state[c].eqns,
- &noise_model->latest_state[c].eqns);
- equation_system_copy(&noise_model->combined_state[c].strength_solver.eqns,
- &noise_model->latest_state[c].strength_solver.eqns);
- noise_model->combined_state[c].strength_solver.num_equations =
- noise_model->latest_state[c].strength_solver.num_equations;
- noise_model->combined_state[c].num_observations =
- noise_model->latest_state[c].num_observations;
- noise_model->combined_state[c].ar_gain =
- noise_model->latest_state[c].ar_gain;
- }
-}
-
-int aom_noise_model_get_grain_parameters(aom_noise_model_t *const noise_model,
- aom_film_grain_t *film_grain) {
- if (noise_model->params.lag > 3) {
- fprintf(stderr, "params.lag = %d > 3\n", noise_model->params.lag);
- return 0;
- }
- uint16_t random_seed = film_grain->random_seed;
- memset(film_grain, 0, sizeof(*film_grain));
- film_grain->random_seed = random_seed;
-
- film_grain->apply_grain = 1;
- film_grain->update_parameters = 1;
-
- film_grain->ar_coeff_lag = noise_model->params.lag;
-
- // Convert the scaling functions to 8 bit values
- aom_noise_strength_lut_t scaling_points[3];
- aom_noise_strength_solver_fit_piecewise(
- &noise_model->combined_state[0].strength_solver, 14, scaling_points + 0);
- aom_noise_strength_solver_fit_piecewise(
- &noise_model->combined_state[1].strength_solver, 10, scaling_points + 1);
- aom_noise_strength_solver_fit_piecewise(
- &noise_model->combined_state[2].strength_solver, 10, scaling_points + 2);
-
- // Both the domain and the range of the scaling functions in the film_grain
- // are normalized to 8-bit (e.g., they are implicitly scaled during grain
- // synthesis).
- const double strength_divisor = 1 << (noise_model->params.bit_depth - 8);
- double max_scaling_value = 1e-4;
- for (int c = 0; c < 3; ++c) {
- for (int i = 0; i < scaling_points[c].num_points; ++i) {
- scaling_points[c].points[i][0] =
- AOMMIN(255, scaling_points[c].points[i][0] / strength_divisor);
- scaling_points[c].points[i][1] =
- AOMMIN(255, scaling_points[c].points[i][1] / strength_divisor);
- max_scaling_value =
- AOMMAX(scaling_points[c].points[i][1], max_scaling_value);
- }
- }
-
- // Scaling_shift values are in the range [8,11]
- const int max_scaling_value_log2 =
- clamp((int)floor(log2(max_scaling_value) + 1), 2, 5);
- film_grain->scaling_shift = 5 + (8 - max_scaling_value_log2);
-
- const double scale_factor = 1 << (8 - max_scaling_value_log2);
- film_grain->num_y_points = scaling_points[0].num_points;
- film_grain->num_cb_points = scaling_points[1].num_points;
- film_grain->num_cr_points = scaling_points[2].num_points;
-
- int(*film_grain_scaling[3])[2] = {
- film_grain->scaling_points_y,
- film_grain->scaling_points_cb,
- film_grain->scaling_points_cr,
- };
- for (int c = 0; c < 3; c++) {
- for (int i = 0; i < scaling_points[c].num_points; ++i) {
- film_grain_scaling[c][i][0] = (int)(scaling_points[c].points[i][0] + 0.5);
- film_grain_scaling[c][i][1] = clamp(
- (int)(scale_factor * scaling_points[c].points[i][1] + 0.5), 0, 255);
- }
- }
- aom_noise_strength_lut_free(scaling_points + 0);
- aom_noise_strength_lut_free(scaling_points + 1);
- aom_noise_strength_lut_free(scaling_points + 2);
-
- // Convert the ar_coeffs into 8-bit values
- const int n_coeff = noise_model->combined_state[0].eqns.n;
- double max_coeff = 1e-4, min_coeff = -1e-4;
- double y_corr[2] = { 0, 0 };
- double avg_luma_strength = 0;
- for (int c = 0; c < 3; c++) {
- aom_equation_system_t *eqns = &noise_model->combined_state[c].eqns;
- for (int i = 0; i < n_coeff; ++i) {
- max_coeff = AOMMAX(max_coeff, eqns->x[i]);
- min_coeff = AOMMIN(min_coeff, eqns->x[i]);
- }
- // Since the correlation between luma/chroma was computed in an already
- // scaled space, we adjust it in the un-scaled space.
- aom_noise_strength_solver_t *solver =
- &noise_model->combined_state[c].strength_solver;
- // Compute a weighted average of the strength for the channel.
- double average_strength = 0, total_weight = 0;
- for (int i = 0; i < solver->eqns.n; ++i) {
- double w = 0;
- for (int j = 0; j < solver->eqns.n; ++j) {
- w += solver->eqns.A[i * solver->eqns.n + j];
- }
- w = sqrt(w);
- average_strength += solver->eqns.x[i] * w;
- total_weight += w;
- }
- if (total_weight == 0)
- average_strength = 1;
- else
- average_strength /= total_weight;
- if (c == 0) {
- avg_luma_strength = average_strength;
- } else {
- y_corr[c - 1] = avg_luma_strength * eqns->x[n_coeff] / average_strength;
- max_coeff = AOMMAX(max_coeff, y_corr[c - 1]);
- min_coeff = AOMMIN(min_coeff, y_corr[c - 1]);
- }
- }
- // Shift value: AR coeffs range (values 6-9)
- // 6: [-2, 2), 7: [-1, 1), 8: [-0.5, 0.5), 9: [-0.25, 0.25)
- film_grain->ar_coeff_shift =
- clamp(7 - (int)AOMMAX(1 + floor(log2(max_coeff)), ceil(log2(-min_coeff))),
- 6, 9);
- double scale_ar_coeff = 1 << film_grain->ar_coeff_shift;
- int *ar_coeffs[3] = {
- film_grain->ar_coeffs_y,
- film_grain->ar_coeffs_cb,
- film_grain->ar_coeffs_cr,
- };
- for (int c = 0; c < 3; ++c) {
- aom_equation_system_t *eqns = &noise_model->combined_state[c].eqns;
- for (int i = 0; i < n_coeff; ++i) {
- ar_coeffs[c][i] =
- clamp((int)round(scale_ar_coeff * eqns->x[i]), -128, 127);
- }
- if (c > 0) {
- ar_coeffs[c][n_coeff] =
- clamp((int)round(scale_ar_coeff * y_corr[c - 1]), -128, 127);
- }
- }
-
- // At the moment, the noise modeling code assumes that the chroma scaling
- // functions are a function of luma.
- film_grain->cb_mult = 128; // 8 bits
- film_grain->cb_luma_mult = 192; // 8 bits
- film_grain->cb_offset = 256; // 9 bits
-
- film_grain->cr_mult = 128; // 8 bits
- film_grain->cr_luma_mult = 192; // 8 bits
- film_grain->cr_offset = 256; // 9 bits
-
- film_grain->chroma_scaling_from_luma = 0;
- film_grain->grain_scale_shift = 0;
- film_grain->overlap_flag = 1;
- return 1;
-}
-
-static void pointwise_multiply(const float *a, float *b, int n) {
- for (int i = 0; i < n; ++i) {
- b[i] *= a[i];
- }
-}
-
-static float *get_half_cos_window(int block_size) {
- float *window_function =
- (float *)aom_malloc(block_size * block_size * sizeof(*window_function));
- for (int y = 0; y < block_size; ++y) {
- const double cos_yd = cos((.5 + y) * PI / block_size - PI / 2);
- for (int x = 0; x < block_size; ++x) {
- const double cos_xd = cos((.5 + x) * PI / block_size - PI / 2);
- window_function[y * block_size + x] = (float)(cos_yd * cos_xd);
- }
- }
- return window_function;
-}
-
-#define DITHER_AND_QUANTIZE(INT_TYPE, suffix) \
- static void dither_and_quantize_##suffix( \
- float *result, int result_stride, INT_TYPE *denoised, int w, int h, \
- int stride, int chroma_sub_w, int chroma_sub_h, int block_size, \
- float block_normalization) { \
- for (int y = 0; y < (h >> chroma_sub_h); ++y) { \
- for (int x = 0; x < (w >> chroma_sub_w); ++x) { \
- const int result_idx = \
- (y + (block_size >> chroma_sub_h)) * result_stride + x + \
- (block_size >> chroma_sub_w); \
- INT_TYPE new_val = (INT_TYPE)AOMMIN( \
- AOMMAX(result[result_idx] * block_normalization + 0.5f, 0), \
- block_normalization); \
- const float err = \
- -(((float)new_val) / block_normalization - result[result_idx]); \
- denoised[y * stride + x] = new_val; \
- if (x + 1 < (w >> chroma_sub_w)) { \
- result[result_idx + 1] += err * 7.0f / 16.0f; \
- } \
- if (y + 1 < (h >> chroma_sub_h)) { \
- if (x > 0) { \
- result[result_idx + result_stride - 1] += err * 3.0f / 16.0f; \
- } \
- result[result_idx + result_stride] += err * 5.0f / 16.0f; \
- if (x + 1 < (w >> chroma_sub_w)) { \
- result[result_idx + result_stride + 1] += err * 1.0f / 16.0f; \
- } \
- } \
- } \
- } \
- }
-
-DITHER_AND_QUANTIZE(uint8_t, lowbd);
-DITHER_AND_QUANTIZE(uint16_t, highbd);
-
-int aom_wiener_denoise_2d(const uint8_t *const data[3], uint8_t *denoised[3],
- int w, int h, int stride[3], int chroma_sub[2],
- float *noise_psd[3], int block_size, int bit_depth,
- int use_highbd) {
- float *plane = NULL, *block = NULL, *window_full = NULL,
- *window_chroma = NULL;
- double *block_d = NULL, *plane_d = NULL;
- struct aom_noise_tx_t *tx_full = NULL;
- struct aom_noise_tx_t *tx_chroma = NULL;
- const int num_blocks_w = (w + block_size - 1) / block_size;
- const int num_blocks_h = (h + block_size - 1) / block_size;
- const int result_stride = (num_blocks_w + 2) * block_size;
- const int result_height = (num_blocks_h + 2) * block_size;
- float *result = NULL;
- int init_success = 1;
- aom_flat_block_finder_t block_finder_full;
- aom_flat_block_finder_t block_finder_chroma;
- const float kBlockNormalization = (float)((1 << bit_depth) - 1);
- if (chroma_sub[0] != chroma_sub[1]) {
- fprintf(stderr,
- "aom_wiener_denoise_2d doesn't handle different chroma "
- "subsampling");
- return 0;
- }
- init_success &= aom_flat_block_finder_init(&block_finder_full, block_size,
- bit_depth, use_highbd);
- result = (float *)aom_malloc((num_blocks_h + 2) * block_size * result_stride *
- sizeof(*result));
- plane = (float *)aom_malloc(block_size * block_size * sizeof(*plane));
- block =
- (float *)aom_memalign(32, 2 * block_size * block_size * sizeof(*block));
- block_d = (double *)aom_malloc(block_size * block_size * sizeof(*block_d));
- plane_d = (double *)aom_malloc(block_size * block_size * sizeof(*plane_d));
- window_full = get_half_cos_window(block_size);
- tx_full = aom_noise_tx_malloc(block_size);
-
- if (chroma_sub[0] != 0) {
- init_success &= aom_flat_block_finder_init(&block_finder_chroma,
- block_size >> chroma_sub[0],
- bit_depth, use_highbd);
- window_chroma = get_half_cos_window(block_size >> chroma_sub[0]);
- tx_chroma = aom_noise_tx_malloc(block_size >> chroma_sub[0]);
- } else {
- window_chroma = window_full;
- tx_chroma = tx_full;
- }
-
- init_success &= (tx_full != NULL) && (tx_chroma != NULL) && (plane != NULL) &&
- (plane_d != NULL) && (block != NULL) && (block_d != NULL) &&
- (window_full != NULL) && (window_chroma != NULL) &&
- (result != NULL);
- for (int c = init_success ? 0 : 3; c < 3; ++c) {
- float *window_function = c == 0 ? window_full : window_chroma;
- aom_flat_block_finder_t *block_finder = &block_finder_full;
- const int chroma_sub_h = c > 0 ? chroma_sub[1] : 0;
- const int chroma_sub_w = c > 0 ? chroma_sub[0] : 0;
- struct aom_noise_tx_t *tx =
- (c > 0 && chroma_sub[0] > 0) ? tx_chroma : tx_full;
- if (!data[c] || !denoised[c]) continue;
- if (c > 0 && chroma_sub[0] != 0) {
- block_finder = &block_finder_chroma;
- }
- memset(result, 0, sizeof(*result) * result_stride * result_height);
- // Do overlapped block processing (half overlapped). The block rows can
- // easily be done in parallel
- for (int offsy = 0; offsy < (block_size >> chroma_sub_h);
- offsy += (block_size >> chroma_sub_h) / 2) {
- for (int offsx = 0; offsx < (block_size >> chroma_sub_w);
- offsx += (block_size >> chroma_sub_w) / 2) {
- // Pad the boundary when processing each block-set.
- for (int by = -1; by < num_blocks_h; ++by) {
- for (int bx = -1; bx < num_blocks_w; ++bx) {
- const int pixels_per_block =
- (block_size >> chroma_sub_w) * (block_size >> chroma_sub_h);
- aom_flat_block_finder_extract_block(
- block_finder, data[c], w >> chroma_sub_w, h >> chroma_sub_h,
- stride[c], bx * (block_size >> chroma_sub_w) + offsx,
- by * (block_size >> chroma_sub_h) + offsy, plane_d, block_d);
- for (int j = 0; j < pixels_per_block; ++j) {
- block[j] = (float)block_d[j];
- plane[j] = (float)plane_d[j];
- }
- pointwise_multiply(window_function, block, pixels_per_block);
- aom_noise_tx_forward(tx, block);
- aom_noise_tx_filter(tx, noise_psd[c]);
- aom_noise_tx_inverse(tx, block);
-
- // Apply window function to the plane approximation (we will apply
- // it to the sum of plane + block when composing the results).
- pointwise_multiply(window_function, plane, pixels_per_block);
-
- for (int y = 0; y < (block_size >> chroma_sub_h); ++y) {
- const int y_result =
- y + (by + 1) * (block_size >> chroma_sub_h) + offsy;
- for (int x = 0; x < (block_size >> chroma_sub_w); ++x) {
- const int x_result =
- x + (bx + 1) * (block_size >> chroma_sub_w) + offsx;
- result[y_result * result_stride + x_result] +=
- (block[y * (block_size >> chroma_sub_w) + x] +
- plane[y * (block_size >> chroma_sub_w) + x]) *
- window_function[y * (block_size >> chroma_sub_w) + x];
- }
- }
- }
- }
- }
- }
- if (use_highbd) {
- dither_and_quantize_highbd(result, result_stride, (uint16_t *)denoised[c],
- w, h, stride[c], chroma_sub_w, chroma_sub_h,
- block_size, kBlockNormalization);
- } else {
- dither_and_quantize_lowbd(result, result_stride, denoised[c], w, h,
- stride[c], chroma_sub_w, chroma_sub_h,
- block_size, kBlockNormalization);
- }
- }
- aom_free(result);
- aom_free(plane);
- aom_free(block);
- aom_free(plane_d);
- aom_free(block_d);
- aom_free(window_full);
-
- aom_noise_tx_free(tx_full);
-
- aom_flat_block_finder_free(&block_finder_full);
- if (chroma_sub[0] != 0) {
- aom_flat_block_finder_free(&block_finder_chroma);
- aom_free(window_chroma);
- aom_noise_tx_free(tx_chroma);
- }
- return init_success;
-}
-
-struct aom_denoise_and_model_t {
- int block_size;
- int bit_depth;
- float noise_level;
-
- // Size of current denoised buffer and flat_block buffer
- int width;
- int height;
- int y_stride;
- int uv_stride;
- int num_blocks_w;
- int num_blocks_h;
-
- // Buffers for image and noise_psd allocated on the fly
- float *noise_psd[3];
- uint8_t *denoised[3];
- uint8_t *flat_blocks;
-
- aom_flat_block_finder_t flat_block_finder;
- aom_noise_model_t noise_model;
-};
-
-struct aom_denoise_and_model_t *aom_denoise_and_model_alloc(int bit_depth,
- int block_size,
- float noise_level) {
- struct aom_denoise_and_model_t *ctx =
- (struct aom_denoise_and_model_t *)aom_malloc(
- sizeof(struct aom_denoise_and_model_t));
- if (!ctx) {
- fprintf(stderr, "Unable to allocate denoise_and_model struct\n");
- return NULL;
- }
- memset(ctx, 0, sizeof(*ctx));
-
- ctx->block_size = block_size;
- ctx->noise_level = noise_level;
- ctx->bit_depth = bit_depth;
-
- ctx->noise_psd[0] =
- aom_malloc(sizeof(*ctx->noise_psd[0]) * block_size * block_size);
- ctx->noise_psd[1] =
- aom_malloc(sizeof(*ctx->noise_psd[1]) * block_size * block_size);
- ctx->noise_psd[2] =
- aom_malloc(sizeof(*ctx->noise_psd[2]) * block_size * block_size);
- if (!ctx->noise_psd[0] || !ctx->noise_psd[1] || !ctx->noise_psd[2]) {
- fprintf(stderr, "Unable to allocate noise PSD buffers\n");
- aom_denoise_and_model_free(ctx);
- return NULL;
- }
- return ctx;
-}
-
-void aom_denoise_and_model_free(struct aom_denoise_and_model_t *ctx) {
- aom_free(ctx->flat_blocks);
- for (int i = 0; i < 3; ++i) {
- aom_free(ctx->denoised[i]);
- aom_free(ctx->noise_psd[i]);
- }
- aom_noise_model_free(&ctx->noise_model);
- aom_flat_block_finder_free(&ctx->flat_block_finder);
- aom_free(ctx);
-}
-
-static int denoise_and_model_realloc_if_necessary(
- struct aom_denoise_and_model_t *ctx, YV12_BUFFER_CONFIG *sd) {
- if (ctx->width == sd->y_width && ctx->height == sd->y_height &&
- ctx->y_stride == sd->y_stride && ctx->uv_stride == sd->uv_stride)
- return 1;
- const int use_highbd = (sd->flags & YV12_FLAG_HIGHBITDEPTH) != 0;
- const int block_size = ctx->block_size;
-
- ctx->width = sd->y_width;
- ctx->height = sd->y_height;
- ctx->y_stride = sd->y_stride;
- ctx->uv_stride = sd->uv_stride;
-
- for (int i = 0; i < 3; ++i) {
- aom_free(ctx->denoised[i]);
- ctx->denoised[i] = NULL;
- }
- aom_free(ctx->flat_blocks);
- ctx->flat_blocks = NULL;
-
- ctx->denoised[0] = aom_malloc((sd->y_stride * sd->y_height) << use_highbd);
- ctx->denoised[1] = aom_malloc((sd->uv_stride * sd->uv_height) << use_highbd);
- ctx->denoised[2] = aom_malloc((sd->uv_stride * sd->uv_height) << use_highbd);
- if (!ctx->denoised[0] || !ctx->denoised[1] || !ctx->denoised[2]) {
- fprintf(stderr, "Unable to allocate denoise buffers\n");
- return 0;
- }
- ctx->num_blocks_w = (sd->y_width + ctx->block_size - 1) / ctx->block_size;
- ctx->num_blocks_h = (sd->y_height + ctx->block_size - 1) / ctx->block_size;
- ctx->flat_blocks = aom_malloc(ctx->num_blocks_w * ctx->num_blocks_h);
-
- aom_flat_block_finder_free(&ctx->flat_block_finder);
- if (!aom_flat_block_finder_init(&ctx->flat_block_finder, ctx->block_size,
- ctx->bit_depth, use_highbd)) {
- fprintf(stderr, "Unable to init flat block finder\n");
- return 0;
- }
-
- const aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, 3,
- ctx->bit_depth, use_highbd };
- aom_noise_model_free(&ctx->noise_model);
- if (!aom_noise_model_init(&ctx->noise_model, params)) {
- fprintf(stderr, "Unable to init noise model\n");
- return 0;
- }
-
- // Simply use a flat PSD (although we could use the flat blocks to estimate
- // PSD) those to estimate an actual noise PSD)
- const float y_noise_level =
- aom_noise_psd_get_default_value(ctx->block_size, ctx->noise_level);
- const float uv_noise_level = aom_noise_psd_get_default_value(
- ctx->block_size >> sd->subsampling_x, ctx->noise_level);
- for (int i = 0; i < block_size * block_size; ++i) {
- ctx->noise_psd[0][i] = y_noise_level;
- ctx->noise_psd[1][i] = ctx->noise_psd[2][i] = uv_noise_level;
- }
- return 1;
-}
-
-int aom_denoise_and_model_run(struct aom_denoise_and_model_t *ctx,
- YV12_BUFFER_CONFIG *sd,
- aom_film_grain_t *film_grain) {
- const int block_size = ctx->block_size;
- const int use_highbd = (sd->flags & YV12_FLAG_HIGHBITDEPTH) != 0;
- uint8_t *raw_data[3] = {
- use_highbd ? (uint8_t *)CONVERT_TO_SHORTPTR(sd->y_buffer) : sd->y_buffer,
- use_highbd ? (uint8_t *)CONVERT_TO_SHORTPTR(sd->u_buffer) : sd->u_buffer,
- use_highbd ? (uint8_t *)CONVERT_TO_SHORTPTR(sd->v_buffer) : sd->v_buffer,
- };
- const uint8_t *const data[3] = { raw_data[0], raw_data[1], raw_data[2] };
- int strides[3] = { sd->y_stride, sd->uv_stride, sd->uv_stride };
- int chroma_sub_log2[2] = { sd->subsampling_x, sd->subsampling_y };
-
- if (!denoise_and_model_realloc_if_necessary(ctx, sd)) {
- fprintf(stderr, "Unable to realloc buffers\n");
- return 0;
- }
-
- aom_flat_block_finder_run(&ctx->flat_block_finder, data[0], sd->y_width,
- sd->y_height, strides[0], ctx->flat_blocks);
-
- if (!aom_wiener_denoise_2d(data, ctx->denoised, sd->y_width, sd->y_height,
- strides, chroma_sub_log2, ctx->noise_psd,
- block_size, ctx->bit_depth, use_highbd)) {
- fprintf(stderr, "Unable to denoise image\n");
- return 0;
- }
-
- const aom_noise_status_t status = aom_noise_model_update(
- &ctx->noise_model, data, (const uint8_t *const *)ctx->denoised,
- sd->y_width, sd->y_height, strides, chroma_sub_log2, ctx->flat_blocks,
- block_size);
- int have_noise_estimate = 0;
- if (status == AOM_NOISE_STATUS_OK) {
- have_noise_estimate = 1;
- } else if (status == AOM_NOISE_STATUS_DIFFERENT_NOISE_TYPE) {
- aom_noise_model_save_latest(&ctx->noise_model);
- have_noise_estimate = 1;
- } else {
- // Unable to update noise model; proceed if we have a previous estimate.
- have_noise_estimate =
- (ctx->noise_model.combined_state[0].strength_solver.num_equations > 0);
- }
-
- film_grain->apply_grain = 0;
- if (have_noise_estimate) {
- if (!aom_noise_model_get_grain_parameters(&ctx->noise_model, film_grain)) {
- fprintf(stderr, "Unable to get grain parameters.\n");
- return 0;
- }
- if (!film_grain->random_seed) {
- film_grain->random_seed = 7391;
- }
- memcpy(raw_data[0], ctx->denoised[0],
- (strides[0] * sd->y_height) << use_highbd);
- memcpy(raw_data[1], ctx->denoised[1],
- (strides[1] * sd->uv_height) << use_highbd);
- memcpy(raw_data[2], ctx->denoised[2],
- (strides[2] * sd->uv_height) << use_highbd);
- }
- return 1;
-}
diff --git a/third_party/aom/aom_dsp/noise_model.h b/third_party/aom/aom_dsp/noise_model.h
deleted file mode 100644
index 049d5be15..000000000
--- a/third_party/aom/aom_dsp/noise_model.h
+++ /dev/null
@@ -1,323 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_NOISE_MODEL_H_
-#define AOM_AOM_DSP_NOISE_MODEL_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif // __cplusplus
-
-#include <stdint.h>
-#include "aom_dsp/grain_synthesis.h"
-#include "aom_scale/yv12config.h"
-
-/*!\brief Wrapper of data required to represent linear system of eqns and soln.
- */
-typedef struct {
- double *A;
- double *b;
- double *x;
- int n;
-} aom_equation_system_t;
-
-/*!\brief Representation of a piecewise linear curve
- *
- * Holds n points as (x, y) pairs, that store the curve.
- */
-typedef struct {
- double (*points)[2];
- int num_points;
-} aom_noise_strength_lut_t;
-
-/*!\brief Init the noise strength lut with the given number of points*/
-int aom_noise_strength_lut_init(aom_noise_strength_lut_t *lut, int num_points);
-
-/*!\brief Frees the noise strength lut. */
-void aom_noise_strength_lut_free(aom_noise_strength_lut_t *lut);
-
-/*!\brief Evaluate the lut at the point x.
- *
- * \param[in] lut The lut data.
- * \param[in] x The coordinate to evaluate the lut.
- */
-double aom_noise_strength_lut_eval(const aom_noise_strength_lut_t *lut,
- double x);
-
-/*!\brief Helper struct to model noise strength as a function of intensity.
- *
- * Internally, this structure holds a representation of a linear system
- * of equations that models noise strength (standard deviation) as a
- * function of intensity. The mapping is initially stored using a
- * piecewise representation with evenly spaced bins that cover the entire
- * domain from [min_intensity, max_intensity]. Each observation (x,y) gives a
- * constraint of the form:
- * y_{i} (1 - a) + y_{i+1} a = y
- * where y_{i} is the value of bin i and x_{i} <= x <= x_{i+1} and
- * a = x/(x_{i+1} - x{i}). The equation system holds the corresponding
- * normal equations.
- *
- * As there may be missing data, the solution is regularized to get a
- * complete set of values for the bins. A reduced representation after
- * solving can be obtained by getting the corresponding noise_strength_lut_t.
- */
-typedef struct {
- aom_equation_system_t eqns;
- double min_intensity;
- double max_intensity;
- int num_bins;
- int num_equations;
- double total;
-} aom_noise_strength_solver_t;
-
-/*!\brief Initializes the noise solver with the given number of bins.
- *
- * Returns 0 if initialization fails.
- *
- * \param[in] solver The noise solver to be initialized.
- * \param[in] num_bins Number of bins to use in the internal representation.
- * \param[in] bit_depth The bit depth used to derive {min,max}_intensity.
- */
-int aom_noise_strength_solver_init(aom_noise_strength_solver_t *solver,
- int num_bins, int bit_depth);
-void aom_noise_strength_solver_free(aom_noise_strength_solver_t *solver);
-
-/*!\brief Gets the x coordinate of bin i.
- *
- * \param[in] i The bin whose coordinate to query.
- */
-double aom_noise_strength_solver_get_center(
- const aom_noise_strength_solver_t *solver, int i);
-
-/*!\brief Add an observation of the block mean intensity to its noise strength.
- *
- * \param[in] block_mean The average block intensity,
- * \param[in] noise_std The observed noise strength.
- */
-void aom_noise_strength_solver_add_measurement(
- aom_noise_strength_solver_t *solver, double block_mean, double noise_std);
-
-/*!\brief Solves the current set of equations for the noise strength. */
-int aom_noise_strength_solver_solve(aom_noise_strength_solver_t *solver);
-
-/*!\brief Fits a reduced piecewise linear lut to the internal solution
- *
- * \param[in] max_num_points The maximum number of output points
- * \param[out] lut The output piecewise linear lut.
- */
-int aom_noise_strength_solver_fit_piecewise(
- const aom_noise_strength_solver_t *solver, int max_num_points,
- aom_noise_strength_lut_t *lut);
-
-/*!\brief Helper for holding precomputed data for finding flat blocks.
- *
- * Internally a block is modeled with a low-order polynomial model. A
- * planar model would be a bunch of equations like:
- * <[y_i x_i 1], [a_1, a_2, a_3]> = b_i
- * for each point in the block. The system matrix A with row i as [y_i x_i 1]
- * is maintained as is the inverse, inv(A'*A), so that the plane parameters
- * can be fit for each block.
- */
-typedef struct {
- double *AtA_inv;
- double *A;
- int num_params; // The number of parameters used for internal low-order model
- int block_size; // The block size the finder was initialized with
- double normalization; // Normalization factor (1 / (2^(bit_depth) - 1))
- int use_highbd; // Whether input data should be interpreted as uint16
-} aom_flat_block_finder_t;
-
-/*!\brief Init the block_finder with the given block size, bit_depth */
-int aom_flat_block_finder_init(aom_flat_block_finder_t *block_finder,
- int block_size, int bit_depth, int use_highbd);
-void aom_flat_block_finder_free(aom_flat_block_finder_t *block_finder);
-
-/*!\brief Helper to extract a block and low order "planar" model. */
-void aom_flat_block_finder_extract_block(
- const aom_flat_block_finder_t *block_finder, const uint8_t *const data,
- int w, int h, int stride, int offsx, int offsy, double *plane,
- double *block);
-
-/*!\brief Runs the flat block finder on the input data.
- *
- * Find flat blocks in the input image data. Returns a map of
- * flat_blocks, where the value of flat_blocks map will be non-zero
- * when a block is determined to be flat. A higher value indicates a bigger
- * confidence in the decision.
- */
-int aom_flat_block_finder_run(const aom_flat_block_finder_t *block_finder,
- const uint8_t *const data, int w, int h,
- int stride, uint8_t *flat_blocks);
-
-// The noise shape indicates the allowed coefficients in the AR model.
-typedef enum {
- AOM_NOISE_SHAPE_DIAMOND = 0,
- AOM_NOISE_SHAPE_SQUARE = 1
-} aom_noise_shape;
-
-// The parameters of the noise model include the shape type, lag, the
-// bit depth of the input images provided, and whether the input images
-// will be using uint16 (or uint8) representation.
-typedef struct {
- aom_noise_shape shape;
- int lag;
- int bit_depth;
- int use_highbd;
-} aom_noise_model_params_t;
-
-/*!\brief State of a noise model estimate for a single channel.
- *
- * This contains a system of equations that can be used to solve
- * for the auto-regressive coefficients as well as a noise strength
- * solver that can be used to model noise strength as a function of
- * intensity.
- */
-typedef struct {
- aom_equation_system_t eqns;
- aom_noise_strength_solver_t strength_solver;
- int num_observations; // The number of observations in the eqn system
- double ar_gain; // The gain of the current AR filter
-} aom_noise_state_t;
-
-/*!\brief Complete model of noise for a planar video
- *
- * This includes a noise model for the latest frame and an aggregated
- * estimate over all previous frames that had similar parameters.
- */
-typedef struct {
- aom_noise_model_params_t params;
- aom_noise_state_t combined_state[3]; // Combined state per channel
- aom_noise_state_t latest_state[3]; // Latest state per channel
- int (*coords)[2]; // Offsets (x,y) of the coefficient samples
- int n; // Number of parameters (size of coords)
- int bit_depth;
-} aom_noise_model_t;
-
-/*!\brief Result of a noise model update. */
-typedef enum {
- AOM_NOISE_STATUS_OK = 0,
- AOM_NOISE_STATUS_INVALID_ARGUMENT,
- AOM_NOISE_STATUS_INSUFFICIENT_FLAT_BLOCKS,
- AOM_NOISE_STATUS_DIFFERENT_NOISE_TYPE,
- AOM_NOISE_STATUS_INTERNAL_ERROR,
-} aom_noise_status_t;
-
-/*!\brief Initializes a noise model with the given parameters.
- *
- * Returns 0 on failure.
- */
-int aom_noise_model_init(aom_noise_model_t *model,
- const aom_noise_model_params_t params);
-void aom_noise_model_free(aom_noise_model_t *model);
-
-/*!\brief Updates the noise model with a new frame observation.
- *
- * Updates the noise model with measurements from the given input frame and a
- * denoised variant of it. Noise is sampled from flat blocks using the flat
- * block map.
- *
- * Returns a noise_status indicating if the update was successful. If the
- * Update was successful, the combined_state is updated with measurements from
- * the provided frame. If status is OK or DIFFERENT_NOISE_TYPE, the latest noise
- * state will be updated with measurements from the provided frame.
- *
- * \param[in,out] noise_model The noise model to be updated
- * \param[in] data Raw frame data
- * \param[in] denoised Denoised frame data.
- * \param[in] w Frame width
- * \param[in] h Frame height
- * \param[in] strides Stride of the planes
- * \param[in] chroma_sub_log2 Chroma subsampling for planes != 0.
- * \param[in] flat_blocks A map to blocks that have been determined flat
- * \param[in] block_size The size of blocks.
- */
-aom_noise_status_t aom_noise_model_update(
- aom_noise_model_t *const noise_model, const uint8_t *const data[3],
- const uint8_t *const denoised[3], int w, int h, int strides[3],
- int chroma_sub_log2[2], const uint8_t *const flat_blocks, int block_size);
-
-/*\brief Save the "latest" estimate into the "combined" estimate.
- *
- * This is meant to be called when the noise modeling detected a change
- * in parameters (or for example, if a user wanted to reset estimation at
- * a shot boundary).
- */
-void aom_noise_model_save_latest(aom_noise_model_t *noise_model);
-
-/*!\brief Converts the noise_model parameters to the corresponding
- * grain_parameters.
- *
- * The noise structs in this file are suitable for estimation (e.g., using
- * floats), but the grain parameters in the bitstream are quantized. This
- * function does the conversion by selecting the correct quantization levels.
- */
-int aom_noise_model_get_grain_parameters(aom_noise_model_t *const noise_model,
- aom_film_grain_t *film_grain);
-
-/*!\brief Perform a Wiener filter denoising in 2D using the provided noise psd.
- *
- * \param[in] data Raw frame data
- * \param[out] denoised Denoised frame data
- * \param[in] w Frame width
- * \param[in] h Frame height
- * \param[in] stride Stride of the planes
- * \param[in] chroma_sub_log2 Chroma subsampling for planes != 0.
- * \param[in] noise_psd The power spectral density of the noise
- * \param[in] block_size The size of blocks
- * \param[in] bit_depth Bit depth of the image
- * \param[in] use_highbd If true, uint8 pointers are interpreted as
- * uint16 and stride is measured in uint16.
- * This must be true when bit_depth >= 10.
- */
-int aom_wiener_denoise_2d(const uint8_t *const data[3], uint8_t *denoised[3],
- int w, int h, int stride[3], int chroma_sub_log2[2],
- float *noise_psd[3], int block_size, int bit_depth,
- int use_highbd);
-
-struct aom_denoise_and_model_t;
-
-/*!\brief Denoise the buffer and model the residual noise.
- *
- * This is meant to be called sequentially on input frames. The input buffer
- * is denoised and the residual noise is modelled. The current noise estimate
- * is populated in film_grain. Returns true on success. The grain.apply_grain
- * parameter will be true when the input buffer was successfully denoised and
- * grain was modelled. Returns false on error.
- *
- * \param[in] ctx Struct allocated with aom_denoise_and_model_alloc
- * that holds some buffers for denoising and the current
- * noise estimate.
- * \param[in/out] buf The raw input buffer to be denoised.
- * \param[out] grain Output film grain parameters
- */
-int aom_denoise_and_model_run(struct aom_denoise_and_model_t *ctx,
- YV12_BUFFER_CONFIG *buf, aom_film_grain_t *grain);
-
-/*!\brief Allocates a context that can be used for denoising and noise modeling.
- *
- * \param[in] bit_depth Bit depth of buffers this will be run on.
- * \param[in] block_size Block size for noise modeling and flat block
- * estimation
- * \param[in] noise_level The noise_level (2.5 for moderate noise, and 5 for
- * higher levels of noise)
- */
-struct aom_denoise_and_model_t *aom_denoise_and_model_alloc(int bit_depth,
- int block_size,
- float noise_level);
-
-/*!\brief Frees the denoise context allocated with aom_denoise_and_model_alloc
- */
-void aom_denoise_and_model_free(struct aom_denoise_and_model_t *denoise_model);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif // __cplusplus
-#endif // AOM_AOM_DSP_NOISE_MODEL_H_
diff --git a/third_party/aom/aom_dsp/noise_util.c b/third_party/aom/aom_dsp/noise_util.c
deleted file mode 100644
index 87e8e9fec..000000000
--- a/third_party/aom/aom_dsp/noise_util.c
+++ /dev/null
@@ -1,221 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <math.h>
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "aom_dsp/noise_util.h"
-#include "aom_dsp/fft_common.h"
-#include "aom_mem/aom_mem.h"
-#include "config/aom_dsp_rtcd.h"
-
-float aom_noise_psd_get_default_value(int block_size, float factor) {
- return (factor * factor / 10000) * block_size * block_size / 8;
-}
-
-// Internal representation of noise transform. It keeps track of the
-// transformed data and a temporary working buffer to use during the
-// transform.
-struct aom_noise_tx_t {
- float *tx_block;
- float *temp;
- int block_size;
- void (*fft)(const float *, float *, float *);
- void (*ifft)(const float *, float *, float *);
-};
-
-struct aom_noise_tx_t *aom_noise_tx_malloc(int block_size) {
- struct aom_noise_tx_t *noise_tx =
- (struct aom_noise_tx_t *)aom_malloc(sizeof(struct aom_noise_tx_t));
- if (!noise_tx) return NULL;
- memset(noise_tx, 0, sizeof(*noise_tx));
- switch (block_size) {
- case 2:
- noise_tx->fft = aom_fft2x2_float;
- noise_tx->ifft = aom_ifft2x2_float;
- break;
- case 4:
- noise_tx->fft = aom_fft4x4_float;
- noise_tx->ifft = aom_ifft4x4_float;
- break;
- case 8:
- noise_tx->fft = aom_fft8x8_float;
- noise_tx->ifft = aom_ifft8x8_float;
- break;
- case 16:
- noise_tx->fft = aom_fft16x16_float;
- noise_tx->ifft = aom_ifft16x16_float;
- break;
- case 32:
- noise_tx->fft = aom_fft32x32_float;
- noise_tx->ifft = aom_ifft32x32_float;
- break;
- default:
- aom_free(noise_tx);
- fprintf(stderr, "Unsupported block size %d\n", block_size);
- return NULL;
- }
- noise_tx->block_size = block_size;
- noise_tx->tx_block = (float *)aom_memalign(
- 32, 2 * sizeof(*noise_tx->tx_block) * block_size * block_size);
- noise_tx->temp = (float *)aom_memalign(
- 32, 2 * sizeof(*noise_tx->temp) * block_size * block_size);
- if (!noise_tx->tx_block || !noise_tx->temp) {
- aom_noise_tx_free(noise_tx);
- return NULL;
- }
- // Clear the buffers up front. Some outputs of the forward transform are
- // real only (the imaginary component will never be touched)
- memset(noise_tx->tx_block, 0,
- 2 * sizeof(*noise_tx->tx_block) * block_size * block_size);
- memset(noise_tx->temp, 0,
- 2 * sizeof(*noise_tx->temp) * block_size * block_size);
- return noise_tx;
-}
-
-void aom_noise_tx_forward(struct aom_noise_tx_t *noise_tx, const float *data) {
- noise_tx->fft(data, noise_tx->temp, noise_tx->tx_block);
-}
-
-void aom_noise_tx_filter(struct aom_noise_tx_t *noise_tx, const float *psd) {
- const int block_size = noise_tx->block_size;
- const float kBeta = 1.1f;
- const float kEps = 1e-6f;
- for (int y = 0; y < block_size; ++y) {
- for (int x = 0; x < block_size; ++x) {
- int i = y * block_size + x;
- float *c = noise_tx->tx_block + 2 * i;
- const float p = c[0] * c[0] + c[1] * c[1];
- if (p > kBeta * psd[i] && p > 1e-6) {
- noise_tx->tx_block[2 * i + 0] *= (p - psd[i]) / AOMMAX(p, kEps);
- noise_tx->tx_block[2 * i + 1] *= (p - psd[i]) / AOMMAX(p, kEps);
- } else {
- noise_tx->tx_block[2 * i + 0] *= (kBeta - 1.0f) / kBeta;
- noise_tx->tx_block[2 * i + 1] *= (kBeta - 1.0f) / kBeta;
- }
- }
- }
-}
-
-void aom_noise_tx_inverse(struct aom_noise_tx_t *noise_tx, float *data) {
- const int n = noise_tx->block_size * noise_tx->block_size;
- noise_tx->ifft(noise_tx->tx_block, noise_tx->temp, data);
- for (int i = 0; i < n; ++i) {
- data[i] /= n;
- }
-}
-
-void aom_noise_tx_add_energy(const struct aom_noise_tx_t *noise_tx,
- float *psd) {
- const int block_size = noise_tx->block_size;
- for (int yb = 0; yb < block_size; ++yb) {
- for (int xb = 0; xb <= block_size / 2; ++xb) {
- float *c = noise_tx->tx_block + 2 * (yb * block_size + xb);
- psd[yb * block_size + xb] += c[0] * c[0] + c[1] * c[1];
- }
- }
-}
-
-void aom_noise_tx_free(struct aom_noise_tx_t *noise_tx) {
- if (!noise_tx) return;
- aom_free(noise_tx->tx_block);
- aom_free(noise_tx->temp);
- aom_free(noise_tx);
-}
-
-double aom_normalized_cross_correlation(const double *a, const double *b,
- int n) {
- double c = 0;
- double a_len = 0;
- double b_len = 0;
- for (int i = 0; i < n; ++i) {
- a_len += a[i] * a[i];
- b_len += b[i] * b[i];
- c += a[i] * b[i];
- }
- return c / (sqrt(a_len) * sqrt(b_len));
-}
-
-int aom_noise_data_validate(const double *data, int w, int h) {
- const double kVarianceThreshold = 2;
- const double kMeanThreshold = 2;
-
- int x = 0, y = 0;
- int ret_value = 1;
- double var = 0, mean = 0;
- double *mean_x, *mean_y, *var_x, *var_y;
-
- // Check that noise variance is not increasing in x or y
- // and that the data is zero mean.
- mean_x = (double *)aom_malloc(sizeof(*mean_x) * w);
- var_x = (double *)aom_malloc(sizeof(*var_x) * w);
- mean_y = (double *)aom_malloc(sizeof(*mean_x) * h);
- var_y = (double *)aom_malloc(sizeof(*var_y) * h);
-
- memset(mean_x, 0, sizeof(*mean_x) * w);
- memset(var_x, 0, sizeof(*var_x) * w);
- memset(mean_y, 0, sizeof(*mean_y) * h);
- memset(var_y, 0, sizeof(*var_y) * h);
-
- for (y = 0; y < h; ++y) {
- for (x = 0; x < w; ++x) {
- const double d = data[y * w + x];
- var_x[x] += d * d;
- var_y[y] += d * d;
- mean_x[x] += d;
- mean_y[y] += d;
- var += d * d;
- mean += d;
- }
- }
- mean /= (w * h);
- var = var / (w * h) - mean * mean;
-
- for (y = 0; y < h; ++y) {
- mean_y[y] /= h;
- var_y[y] = var_y[y] / h - mean_y[y] * mean_y[y];
- if (fabs(var_y[y] - var) >= kVarianceThreshold) {
- fprintf(stderr, "Variance distance too large %f %f\n", var_y[y], var);
- ret_value = 0;
- break;
- }
- if (fabs(mean_y[y] - mean) >= kMeanThreshold) {
- fprintf(stderr, "Mean distance too large %f %f\n", mean_y[y], mean);
- ret_value = 0;
- break;
- }
- }
-
- for (x = 0; x < w; ++x) {
- mean_x[x] /= w;
- var_x[x] = var_x[x] / w - mean_x[x] * mean_x[x];
- if (fabs(var_x[x] - var) >= kVarianceThreshold) {
- fprintf(stderr, "Variance distance too large %f %f\n", var_x[x], var);
- ret_value = 0;
- break;
- }
- if (fabs(mean_x[x] - mean) >= kMeanThreshold) {
- fprintf(stderr, "Mean distance too large %f %f\n", mean_x[x], mean);
- ret_value = 0;
- break;
- }
- }
-
- aom_free(mean_x);
- aom_free(mean_y);
- aom_free(var_x);
- aom_free(var_y);
-
- return ret_value;
-}
diff --git a/third_party/aom/aom_dsp/noise_util.h b/third_party/aom/aom_dsp/noise_util.h
deleted file mode 100644
index 2284a171a..000000000
--- a/third_party/aom/aom_dsp/noise_util.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_NOISE_UTIL_H_
-#define AOM_AOM_DSP_NOISE_UTIL_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif // __cplusplus
-
-// aom_noise_tx_t is an abstraction of a transform that is used for denoising.
-// It is meant to be lightweight and does hold the transformed data (as
-// the user should not be manipulating the transformed data directly).
-struct aom_noise_tx_t;
-
-// Allocates and returns a aom_noise_tx_t useful for denoising the given
-// block_size. The resulting aom_noise_tx_t should be free'd with
-// aom_noise_tx_free.
-struct aom_noise_tx_t *aom_noise_tx_malloc(int block_size);
-void aom_noise_tx_free(struct aom_noise_tx_t *aom_noise_tx);
-
-// Transforms the internal data and holds it in the aom_noise_tx's internal
-// buffer. For compatibility with existing SIMD implementations, "data" must
-// be 32-byte aligned.
-void aom_noise_tx_forward(struct aom_noise_tx_t *aom_noise_tx,
- const float *data);
-
-// Filters aom_noise_tx's internal data using the provided noise power spectral
-// density. The PSD must be at least block_size * block_size and should be
-// populated with a constant or via estimates taken from
-// aom_noise_tx_add_energy.
-void aom_noise_tx_filter(struct aom_noise_tx_t *aom_noise_tx, const float *psd);
-
-// Performs an inverse transform using the internal transform data.
-// For compatibility with existing SIMD implementations, "data" must be 32-byte
-// aligned.
-void aom_noise_tx_inverse(struct aom_noise_tx_t *aom_noise_tx, float *data);
-
-// Aggregates the power of the buffered transform data into the psd buffer.
-void aom_noise_tx_add_energy(const struct aom_noise_tx_t *aom_noise_tx,
- float *psd);
-
-// Returns a default value suitable for denosing a transform of the given
-// block_size. The noise "factor" determines the strength of the noise to
-// be removed. A value of about 2.5 can be used for moderate denoising,
-// where a value of 5.0 can be used for a high level of denoising.
-float aom_noise_psd_get_default_value(int block_size, float factor);
-
-// Computes normalized cross correlation of two vectors a and b of length n.
-double aom_normalized_cross_correlation(const double *a, const double *b,
- int n);
-
-// Validates the correlated noise in the data buffer of size (w, h).
-int aom_noise_data_validate(const double *data, int w, int h);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif // __cplusplus
-
-#endif // AOM_AOM_DSP_NOISE_UTIL_H_
diff --git a/third_party/aom/aom_dsp/postproc.h b/third_party/aom/aom_dsp/postproc.h
deleted file mode 100644
index f3d87f264..000000000
--- a/third_party/aom/aom_dsp/postproc.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_POSTPROC_H_
-#define AOM_AOM_DSP_POSTPROC_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// Fills a noise buffer with gaussian noise strength determined by sigma.
-int aom_setup_noise(double sigma, int size, char *noise);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // AOM_AOM_DSP_POSTPROC_H_
diff --git a/third_party/aom/aom_dsp/prob.h b/third_party/aom/aom_dsp/prob.h
deleted file mode 100644
index d003a986e..000000000
--- a/third_party/aom/aom_dsp/prob.h
+++ /dev/null
@@ -1,671 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_PROB_H_
-#define AOM_AOM_DSP_PROB_H_
-
-#include <assert.h>
-#include <stdio.h>
-
-#include "config/aom_config.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/entcode.h"
-#include "aom_ports/bitops.h"
-#include "aom_ports/mem.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// TODO(negge): Rename this aom_prob once we remove vpxbool.
-typedef uint16_t aom_cdf_prob;
-
-#define CDF_SIZE(x) ((x) + 1)
-#define CDF_PROB_BITS 15
-#define CDF_PROB_TOP (1 << CDF_PROB_BITS)
-#define CDF_INIT_TOP 32768
-#define CDF_SHIFT (15 - CDF_PROB_BITS)
-/*The value stored in an iCDF is CDF_PROB_TOP minus the actual cumulative
- probability (an "inverse" CDF).
- This function converts from one representation to the other (and is its own
- inverse).*/
-#define AOM_ICDF(x) (CDF_PROB_TOP - (x))
-
-#if CDF_SHIFT == 0
-
-#define AOM_CDF2(a0) AOM_ICDF(a0), AOM_ICDF(CDF_PROB_TOP), 0
-#define AOM_CDF3(a0, a1) AOM_ICDF(a0), AOM_ICDF(a1), AOM_ICDF(CDF_PROB_TOP), 0
-#define AOM_CDF4(a0, a1, a2) \
- AOM_ICDF(a0), AOM_ICDF(a1), AOM_ICDF(a2), AOM_ICDF(CDF_PROB_TOP), 0
-#define AOM_CDF5(a0, a1, a2, a3) \
- AOM_ICDF(a0) \
- , AOM_ICDF(a1), AOM_ICDF(a2), AOM_ICDF(a3), AOM_ICDF(CDF_PROB_TOP), 0
-#define AOM_CDF6(a0, a1, a2, a3, a4) \
- AOM_ICDF(a0) \
- , AOM_ICDF(a1), AOM_ICDF(a2), AOM_ICDF(a3), AOM_ICDF(a4), \
- AOM_ICDF(CDF_PROB_TOP), 0
-#define AOM_CDF7(a0, a1, a2, a3, a4, a5) \
- AOM_ICDF(a0) \
- , AOM_ICDF(a1), AOM_ICDF(a2), AOM_ICDF(a3), AOM_ICDF(a4), AOM_ICDF(a5), \
- AOM_ICDF(CDF_PROB_TOP), 0
-#define AOM_CDF8(a0, a1, a2, a3, a4, a5, a6) \
- AOM_ICDF(a0) \
- , AOM_ICDF(a1), AOM_ICDF(a2), AOM_ICDF(a3), AOM_ICDF(a4), AOM_ICDF(a5), \
- AOM_ICDF(a6), AOM_ICDF(CDF_PROB_TOP), 0
-#define AOM_CDF9(a0, a1, a2, a3, a4, a5, a6, a7) \
- AOM_ICDF(a0) \
- , AOM_ICDF(a1), AOM_ICDF(a2), AOM_ICDF(a3), AOM_ICDF(a4), AOM_ICDF(a5), \
- AOM_ICDF(a6), AOM_ICDF(a7), AOM_ICDF(CDF_PROB_TOP), 0
-#define AOM_CDF10(a0, a1, a2, a3, a4, a5, a6, a7, a8) \
- AOM_ICDF(a0) \
- , AOM_ICDF(a1), AOM_ICDF(a2), AOM_ICDF(a3), AOM_ICDF(a4), AOM_ICDF(a5), \
- AOM_ICDF(a6), AOM_ICDF(a7), AOM_ICDF(a8), AOM_ICDF(CDF_PROB_TOP), 0
-#define AOM_CDF11(a0, a1, a2, a3, a4, a5, a6, a7, a8, a9) \
- AOM_ICDF(a0) \
- , AOM_ICDF(a1), AOM_ICDF(a2), AOM_ICDF(a3), AOM_ICDF(a4), AOM_ICDF(a5), \
- AOM_ICDF(a6), AOM_ICDF(a7), AOM_ICDF(a8), AOM_ICDF(a9), \
- AOM_ICDF(CDF_PROB_TOP), 0
-#define AOM_CDF12(a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10) \
- AOM_ICDF(a0) \
- , AOM_ICDF(a1), AOM_ICDF(a2), AOM_ICDF(a3), AOM_ICDF(a4), AOM_ICDF(a5), \
- AOM_ICDF(a6), AOM_ICDF(a7), AOM_ICDF(a8), AOM_ICDF(a9), AOM_ICDF(a10), \
- AOM_ICDF(CDF_PROB_TOP), 0
-#define AOM_CDF13(a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11) \
- AOM_ICDF(a0) \
- , AOM_ICDF(a1), AOM_ICDF(a2), AOM_ICDF(a3), AOM_ICDF(a4), AOM_ICDF(a5), \
- AOM_ICDF(a6), AOM_ICDF(a7), AOM_ICDF(a8), AOM_ICDF(a9), AOM_ICDF(a10), \
- AOM_ICDF(a11), AOM_ICDF(CDF_PROB_TOP), 0
-#define AOM_CDF14(a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12) \
- AOM_ICDF(a0) \
- , AOM_ICDF(a1), AOM_ICDF(a2), AOM_ICDF(a3), AOM_ICDF(a4), AOM_ICDF(a5), \
- AOM_ICDF(a6), AOM_ICDF(a7), AOM_ICDF(a8), AOM_ICDF(a9), AOM_ICDF(a10), \
- AOM_ICDF(a11), AOM_ICDF(a12), AOM_ICDF(CDF_PROB_TOP), 0
-#define AOM_CDF15(a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13) \
- AOM_ICDF(a0) \
- , AOM_ICDF(a1), AOM_ICDF(a2), AOM_ICDF(a3), AOM_ICDF(a4), AOM_ICDF(a5), \
- AOM_ICDF(a6), AOM_ICDF(a7), AOM_ICDF(a8), AOM_ICDF(a9), AOM_ICDF(a10), \
- AOM_ICDF(a11), AOM_ICDF(a12), AOM_ICDF(a13), AOM_ICDF(CDF_PROB_TOP), 0
-#define AOM_CDF16(a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, \
- a14) \
- AOM_ICDF(a0) \
- , AOM_ICDF(a1), AOM_ICDF(a2), AOM_ICDF(a3), AOM_ICDF(a4), AOM_ICDF(a5), \
- AOM_ICDF(a6), AOM_ICDF(a7), AOM_ICDF(a8), AOM_ICDF(a9), AOM_ICDF(a10), \
- AOM_ICDF(a11), AOM_ICDF(a12), AOM_ICDF(a13), AOM_ICDF(a14), \
- AOM_ICDF(CDF_PROB_TOP), 0
-
-#else
-#define AOM_CDF2(a0) \
- AOM_ICDF((((a0)-1) * ((CDF_INIT_TOP >> CDF_SHIFT) - 2) + \
- ((CDF_INIT_TOP - 2) >> 1)) / \
- ((CDF_INIT_TOP - 2)) + \
- 1) \
- , AOM_ICDF(CDF_PROB_TOP), 0
-#define AOM_CDF3(a0, a1) \
- AOM_ICDF((((a0)-1) * ((CDF_INIT_TOP >> CDF_SHIFT) - 3) + \
- ((CDF_INIT_TOP - 3) >> 1)) / \
- ((CDF_INIT_TOP - 3)) + \
- 1) \
- , \
- AOM_ICDF((((a1)-2) * ((CDF_INIT_TOP >> CDF_SHIFT) - 3) + \
- ((CDF_INIT_TOP - 3) >> 1)) / \
- ((CDF_INIT_TOP - 3)) + \
- 2), \
- AOM_ICDF(CDF_PROB_TOP), 0
-#define AOM_CDF4(a0, a1, a2) \
- AOM_ICDF((((a0)-1) * ((CDF_INIT_TOP >> CDF_SHIFT) - 4) + \
- ((CDF_INIT_TOP - 4) >> 1)) / \
- ((CDF_INIT_TOP - 4)) + \
- 1) \
- , \
- AOM_ICDF((((a1)-2) * ((CDF_INIT_TOP >> CDF_SHIFT) - 4) + \
- ((CDF_INIT_TOP - 4) >> 1)) / \
- ((CDF_INIT_TOP - 4)) + \
- 2), \
- AOM_ICDF((((a2)-3) * ((CDF_INIT_TOP >> CDF_SHIFT) - 4) + \
- ((CDF_INIT_TOP - 4) >> 1)) / \
- ((CDF_INIT_TOP - 4)) + \
- 3), \
- AOM_ICDF(CDF_PROB_TOP), 0
-#define AOM_CDF5(a0, a1, a2, a3) \
- AOM_ICDF((((a0)-1) * ((CDF_INIT_TOP >> CDF_SHIFT) - 5) + \
- ((CDF_INIT_TOP - 5) >> 1)) / \
- ((CDF_INIT_TOP - 5)) + \
- 1) \
- , \
- AOM_ICDF((((a1)-2) * ((CDF_INIT_TOP >> CDF_SHIFT) - 5) + \
- ((CDF_INIT_TOP - 5) >> 1)) / \
- ((CDF_INIT_TOP - 5)) + \
- 2), \
- AOM_ICDF((((a2)-3) * ((CDF_INIT_TOP >> CDF_SHIFT) - 5) + \
- ((CDF_INIT_TOP - 5) >> 1)) / \
- ((CDF_INIT_TOP - 5)) + \
- 3), \
- AOM_ICDF((((a3)-4) * ((CDF_INIT_TOP >> CDF_SHIFT) - 5) + \
- ((CDF_INIT_TOP - 5) >> 1)) / \
- ((CDF_INIT_TOP - 5)) + \
- 4), \
- AOM_ICDF(CDF_PROB_TOP), 0
-#define AOM_CDF6(a0, a1, a2, a3, a4) \
- AOM_ICDF((((a0)-1) * ((CDF_INIT_TOP >> CDF_SHIFT) - 6) + \
- ((CDF_INIT_TOP - 6) >> 1)) / \
- ((CDF_INIT_TOP - 6)) + \
- 1) \
- , \
- AOM_ICDF((((a1)-2) * ((CDF_INIT_TOP >> CDF_SHIFT) - 6) + \
- ((CDF_INIT_TOP - 6) >> 1)) / \
- ((CDF_INIT_TOP - 6)) + \
- 2), \
- AOM_ICDF((((a2)-3) * ((CDF_INIT_TOP >> CDF_SHIFT) - 6) + \
- ((CDF_INIT_TOP - 6) >> 1)) / \
- ((CDF_INIT_TOP - 6)) + \
- 3), \
- AOM_ICDF((((a3)-4) * ((CDF_INIT_TOP >> CDF_SHIFT) - 6) + \
- ((CDF_INIT_TOP - 6) >> 1)) / \
- ((CDF_INIT_TOP - 6)) + \
- 4), \
- AOM_ICDF((((a4)-5) * ((CDF_INIT_TOP >> CDF_SHIFT) - 6) + \
- ((CDF_INIT_TOP - 6) >> 1)) / \
- ((CDF_INIT_TOP - 6)) + \
- 5), \
- AOM_ICDF(CDF_PROB_TOP), 0
-#define AOM_CDF7(a0, a1, a2, a3, a4, a5) \
- AOM_ICDF((((a0)-1) * ((CDF_INIT_TOP >> CDF_SHIFT) - 7) + \
- ((CDF_INIT_TOP - 7) >> 1)) / \
- ((CDF_INIT_TOP - 7)) + \
- 1) \
- , \
- AOM_ICDF((((a1)-2) * ((CDF_INIT_TOP >> CDF_SHIFT) - 7) + \
- ((CDF_INIT_TOP - 7) >> 1)) / \
- ((CDF_INIT_TOP - 7)) + \
- 2), \
- AOM_ICDF((((a2)-3) * ((CDF_INIT_TOP >> CDF_SHIFT) - 7) + \
- ((CDF_INIT_TOP - 7) >> 1)) / \
- ((CDF_INIT_TOP - 7)) + \
- 3), \
- AOM_ICDF((((a3)-4) * ((CDF_INIT_TOP >> CDF_SHIFT) - 7) + \
- ((CDF_INIT_TOP - 7) >> 1)) / \
- ((CDF_INIT_TOP - 7)) + \
- 4), \
- AOM_ICDF((((a4)-5) * ((CDF_INIT_TOP >> CDF_SHIFT) - 7) + \
- ((CDF_INIT_TOP - 7) >> 1)) / \
- ((CDF_INIT_TOP - 7)) + \
- 5), \
- AOM_ICDF((((a5)-6) * ((CDF_INIT_TOP >> CDF_SHIFT) - 7) + \
- ((CDF_INIT_TOP - 7) >> 1)) / \
- ((CDF_INIT_TOP - 7)) + \
- 6), \
- AOM_ICDF(CDF_PROB_TOP), 0
-#define AOM_CDF8(a0, a1, a2, a3, a4, a5, a6) \
- AOM_ICDF((((a0)-1) * ((CDF_INIT_TOP >> CDF_SHIFT) - 8) + \
- ((CDF_INIT_TOP - 8) >> 1)) / \
- ((CDF_INIT_TOP - 8)) + \
- 1) \
- , \
- AOM_ICDF((((a1)-2) * ((CDF_INIT_TOP >> CDF_SHIFT) - 8) + \
- ((CDF_INIT_TOP - 8) >> 1)) / \
- ((CDF_INIT_TOP - 8)) + \
- 2), \
- AOM_ICDF((((a2)-3) * ((CDF_INIT_TOP >> CDF_SHIFT) - 8) + \
- ((CDF_INIT_TOP - 8) >> 1)) / \
- ((CDF_INIT_TOP - 8)) + \
- 3), \
- AOM_ICDF((((a3)-4) * ((CDF_INIT_TOP >> CDF_SHIFT) - 8) + \
- ((CDF_INIT_TOP - 8) >> 1)) / \
- ((CDF_INIT_TOP - 8)) + \
- 4), \
- AOM_ICDF((((a4)-5) * ((CDF_INIT_TOP >> CDF_SHIFT) - 8) + \
- ((CDF_INIT_TOP - 8) >> 1)) / \
- ((CDF_INIT_TOP - 8)) + \
- 5), \
- AOM_ICDF((((a5)-6) * ((CDF_INIT_TOP >> CDF_SHIFT) - 8) + \
- ((CDF_INIT_TOP - 8) >> 1)) / \
- ((CDF_INIT_TOP - 8)) + \
- 6), \
- AOM_ICDF((((a6)-7) * ((CDF_INIT_TOP >> CDF_SHIFT) - 8) + \
- ((CDF_INIT_TOP - 8) >> 1)) / \
- ((CDF_INIT_TOP - 8)) + \
- 7), \
- AOM_ICDF(CDF_PROB_TOP), 0
-#define AOM_CDF9(a0, a1, a2, a3, a4, a5, a6, a7) \
- AOM_ICDF((((a0)-1) * ((CDF_INIT_TOP >> CDF_SHIFT) - 9) + \
- ((CDF_INIT_TOP - 9) >> 1)) / \
- ((CDF_INIT_TOP - 9)) + \
- 1) \
- , \
- AOM_ICDF((((a1)-2) * ((CDF_INIT_TOP >> CDF_SHIFT) - 9) + \
- ((CDF_INIT_TOP - 9) >> 1)) / \
- ((CDF_INIT_TOP - 9)) + \
- 2), \
- AOM_ICDF((((a2)-3) * ((CDF_INIT_TOP >> CDF_SHIFT) - 9) + \
- ((CDF_INIT_TOP - 9) >> 1)) / \
- ((CDF_INIT_TOP - 9)) + \
- 3), \
- AOM_ICDF((((a3)-4) * ((CDF_INIT_TOP >> CDF_SHIFT) - 9) + \
- ((CDF_INIT_TOP - 9) >> 1)) / \
- ((CDF_INIT_TOP - 9)) + \
- 4), \
- AOM_ICDF((((a4)-5) * ((CDF_INIT_TOP >> CDF_SHIFT) - 9) + \
- ((CDF_INIT_TOP - 9) >> 1)) / \
- ((CDF_INIT_TOP - 9)) + \
- 5), \
- AOM_ICDF((((a5)-6) * ((CDF_INIT_TOP >> CDF_SHIFT) - 9) + \
- ((CDF_INIT_TOP - 9) >> 1)) / \
- ((CDF_INIT_TOP - 9)) + \
- 6), \
- AOM_ICDF((((a6)-7) * ((CDF_INIT_TOP >> CDF_SHIFT) - 9) + \
- ((CDF_INIT_TOP - 9) >> 1)) / \
- ((CDF_INIT_TOP - 9)) + \
- 7), \
- AOM_ICDF((((a7)-8) * ((CDF_INIT_TOP >> CDF_SHIFT) - 9) + \
- ((CDF_INIT_TOP - 9) >> 1)) / \
- ((CDF_INIT_TOP - 9)) + \
- 8), \
- AOM_ICDF(CDF_PROB_TOP), 0
-#define AOM_CDF10(a0, a1, a2, a3, a4, a5, a6, a7, a8) \
- AOM_ICDF((((a0)-1) * ((CDF_INIT_TOP >> CDF_SHIFT) - 10) + \
- ((CDF_INIT_TOP - 10) >> 1)) / \
- ((CDF_INIT_TOP - 10)) + \
- 1) \
- , \
- AOM_ICDF((((a1)-2) * ((CDF_INIT_TOP >> CDF_SHIFT) - 10) + \
- ((CDF_INIT_TOP - 10) >> 1)) / \
- ((CDF_INIT_TOP - 10)) + \
- 2), \
- AOM_ICDF((((a2)-3) * ((CDF_INIT_TOP >> CDF_SHIFT) - 10) + \
- ((CDF_INIT_TOP - 10) >> 1)) / \
- ((CDF_INIT_TOP - 10)) + \
- 3), \
- AOM_ICDF((((a3)-4) * ((CDF_INIT_TOP >> CDF_SHIFT) - 10) + \
- ((CDF_INIT_TOP - 10) >> 1)) / \
- ((CDF_INIT_TOP - 10)) + \
- 4), \
- AOM_ICDF((((a4)-5) * ((CDF_INIT_TOP >> CDF_SHIFT) - 10) + \
- ((CDF_INIT_TOP - 10) >> 1)) / \
- ((CDF_INIT_TOP - 10)) + \
- 5), \
- AOM_ICDF((((a5)-6) * ((CDF_INIT_TOP >> CDF_SHIFT) - 10) + \
- ((CDF_INIT_TOP - 10) >> 1)) / \
- ((CDF_INIT_TOP - 10)) + \
- 6), \
- AOM_ICDF((((a6)-7) * ((CDF_INIT_TOP >> CDF_SHIFT) - 10) + \
- ((CDF_INIT_TOP - 10) >> 1)) / \
- ((CDF_INIT_TOP - 10)) + \
- 7), \
- AOM_ICDF((((a7)-8) * ((CDF_INIT_TOP >> CDF_SHIFT) - 10) + \
- ((CDF_INIT_TOP - 10) >> 1)) / \
- ((CDF_INIT_TOP - 10)) + \
- 8), \
- AOM_ICDF((((a8)-9) * ((CDF_INIT_TOP >> CDF_SHIFT) - 10) + \
- ((CDF_INIT_TOP - 10) >> 1)) / \
- ((CDF_INIT_TOP - 10)) + \
- 9), \
- AOM_ICDF(CDF_PROB_TOP), 0
-#define AOM_CDF11(a0, a1, a2, a3, a4, a5, a6, a7, a8, a9) \
- AOM_ICDF((((a0)-1) * ((CDF_INIT_TOP >> CDF_SHIFT) - 11) + \
- ((CDF_INIT_TOP - 11) >> 1)) / \
- ((CDF_INIT_TOP - 11)) + \
- 1) \
- , \
- AOM_ICDF((((a1)-2) * ((CDF_INIT_TOP >> CDF_SHIFT) - 11) + \
- ((CDF_INIT_TOP - 11) >> 1)) / \
- ((CDF_INIT_TOP - 11)) + \
- 2), \
- AOM_ICDF((((a2)-3) * ((CDF_INIT_TOP >> CDF_SHIFT) - 11) + \
- ((CDF_INIT_TOP - 11) >> 1)) / \
- ((CDF_INIT_TOP - 11)) + \
- 3), \
- AOM_ICDF((((a3)-4) * ((CDF_INIT_TOP >> CDF_SHIFT) - 11) + \
- ((CDF_INIT_TOP - 11) >> 1)) / \
- ((CDF_INIT_TOP - 11)) + \
- 4), \
- AOM_ICDF((((a4)-5) * ((CDF_INIT_TOP >> CDF_SHIFT) - 11) + \
- ((CDF_INIT_TOP - 11) >> 1)) / \
- ((CDF_INIT_TOP - 11)) + \
- 5), \
- AOM_ICDF((((a5)-6) * ((CDF_INIT_TOP >> CDF_SHIFT) - 11) + \
- ((CDF_INIT_TOP - 11) >> 1)) / \
- ((CDF_INIT_TOP - 11)) + \
- 6), \
- AOM_ICDF((((a6)-7) * ((CDF_INIT_TOP >> CDF_SHIFT) - 11) + \
- ((CDF_INIT_TOP - 11) >> 1)) / \
- ((CDF_INIT_TOP - 11)) + \
- 7), \
- AOM_ICDF((((a7)-8) * ((CDF_INIT_TOP >> CDF_SHIFT) - 11) + \
- ((CDF_INIT_TOP - 11) >> 1)) / \
- ((CDF_INIT_TOP - 11)) + \
- 8), \
- AOM_ICDF((((a8)-9) * ((CDF_INIT_TOP >> CDF_SHIFT) - 11) + \
- ((CDF_INIT_TOP - 11) >> 1)) / \
- ((CDF_INIT_TOP - 11)) + \
- 9), \
- AOM_ICDF((((a9)-10) * ((CDF_INIT_TOP >> CDF_SHIFT) - 11) + \
- ((CDF_INIT_TOP - 11) >> 1)) / \
- ((CDF_INIT_TOP - 11)) + \
- 10), \
- AOM_ICDF(CDF_PROB_TOP), 0
-#define AOM_CDF12(a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10) \
- AOM_ICDF((((a0)-1) * ((CDF_INIT_TOP >> CDF_SHIFT) - 12) + \
- ((CDF_INIT_TOP - 12) >> 1)) / \
- ((CDF_INIT_TOP - 12)) + \
- 1) \
- , \
- AOM_ICDF((((a1)-2) * ((CDF_INIT_TOP >> CDF_SHIFT) - 12) + \
- ((CDF_INIT_TOP - 12) >> 1)) / \
- ((CDF_INIT_TOP - 12)) + \
- 2), \
- AOM_ICDF((((a2)-3) * ((CDF_INIT_TOP >> CDF_SHIFT) - 12) + \
- ((CDF_INIT_TOP - 12) >> 1)) / \
- ((CDF_INIT_TOP - 12)) + \
- 3), \
- AOM_ICDF((((a3)-4) * ((CDF_INIT_TOP >> CDF_SHIFT) - 12) + \
- ((CDF_INIT_TOP - 12) >> 1)) / \
- ((CDF_INIT_TOP - 12)) + \
- 4), \
- AOM_ICDF((((a4)-5) * ((CDF_INIT_TOP >> CDF_SHIFT) - 12) + \
- ((CDF_INIT_TOP - 12) >> 1)) / \
- ((CDF_INIT_TOP - 12)) + \
- 5), \
- AOM_ICDF((((a5)-6) * ((CDF_INIT_TOP >> CDF_SHIFT) - 12) + \
- ((CDF_INIT_TOP - 12) >> 1)) / \
- ((CDF_INIT_TOP - 12)) + \
- 6), \
- AOM_ICDF((((a6)-7) * ((CDF_INIT_TOP >> CDF_SHIFT) - 12) + \
- ((CDF_INIT_TOP - 12) >> 1)) / \
- ((CDF_INIT_TOP - 12)) + \
- 7), \
- AOM_ICDF((((a7)-8) * ((CDF_INIT_TOP >> CDF_SHIFT) - 12) + \
- ((CDF_INIT_TOP - 12) >> 1)) / \
- ((CDF_INIT_TOP - 12)) + \
- 8), \
- AOM_ICDF((((a8)-9) * ((CDF_INIT_TOP >> CDF_SHIFT) - 12) + \
- ((CDF_INIT_TOP - 12) >> 1)) / \
- ((CDF_INIT_TOP - 12)) + \
- 9), \
- AOM_ICDF((((a9)-10) * ((CDF_INIT_TOP >> CDF_SHIFT) - 12) + \
- ((CDF_INIT_TOP - 12) >> 1)) / \
- ((CDF_INIT_TOP - 12)) + \
- 10), \
- AOM_ICDF((((a10)-11) * ((CDF_INIT_TOP >> CDF_SHIFT) - 12) + \
- ((CDF_INIT_TOP - 12) >> 1)) / \
- ((CDF_INIT_TOP - 12)) + \
- 11), \
- AOM_ICDF(CDF_PROB_TOP), 0
-#define AOM_CDF13(a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11) \
- AOM_ICDF((((a0)-1) * ((CDF_INIT_TOP >> CDF_SHIFT) - 13) + \
- ((CDF_INIT_TOP - 13) >> 1)) / \
- ((CDF_INIT_TOP - 13)) + \
- 1) \
- , \
- AOM_ICDF((((a1)-2) * ((CDF_INIT_TOP >> CDF_SHIFT) - 13) + \
- ((CDF_INIT_TOP - 13) >> 1)) / \
- ((CDF_INIT_TOP - 13)) + \
- 2), \
- AOM_ICDF((((a2)-3) * ((CDF_INIT_TOP >> CDF_SHIFT) - 13) + \
- ((CDF_INIT_TOP - 13) >> 1)) / \
- ((CDF_INIT_TOP - 13)) + \
- 3), \
- AOM_ICDF((((a3)-4) * ((CDF_INIT_TOP >> CDF_SHIFT) - 13) + \
- ((CDF_INIT_TOP - 13) >> 1)) / \
- ((CDF_INIT_TOP - 13)) + \
- 4), \
- AOM_ICDF((((a4)-5) * ((CDF_INIT_TOP >> CDF_SHIFT) - 13) + \
- ((CDF_INIT_TOP - 13) >> 1)) / \
- ((CDF_INIT_TOP - 13)) + \
- 5), \
- AOM_ICDF((((a5)-6) * ((CDF_INIT_TOP >> CDF_SHIFT) - 13) + \
- ((CDF_INIT_TOP - 13) >> 1)) / \
- ((CDF_INIT_TOP - 13)) + \
- 6), \
- AOM_ICDF((((a6)-7) * ((CDF_INIT_TOP >> CDF_SHIFT) - 13) + \
- ((CDF_INIT_TOP - 13) >> 1)) / \
- ((CDF_INIT_TOP - 13)) + \
- 7), \
- AOM_ICDF((((a7)-8) * ((CDF_INIT_TOP >> CDF_SHIFT) - 13) + \
- ((CDF_INIT_TOP - 13) >> 1)) / \
- ((CDF_INIT_TOP - 13)) + \
- 8), \
- AOM_ICDF((((a8)-9) * ((CDF_INIT_TOP >> CDF_SHIFT) - 13) + \
- ((CDF_INIT_TOP - 13) >> 1)) / \
- ((CDF_INIT_TOP - 13)) + \
- 9), \
- AOM_ICDF((((a9)-10) * ((CDF_INIT_TOP >> CDF_SHIFT) - 13) + \
- ((CDF_INIT_TOP - 13) >> 1)) / \
- ((CDF_INIT_TOP - 13)) + \
- 10), \
- AOM_ICDF((((a10)-11) * ((CDF_INIT_TOP >> CDF_SHIFT) - 13) + \
- ((CDF_INIT_TOP - 13) >> 1)) / \
- ((CDF_INIT_TOP - 13)) + \
- 11), \
- AOM_ICDF((((a11)-12) * ((CDF_INIT_TOP >> CDF_SHIFT) - 13) + \
- ((CDF_INIT_TOP - 13) >> 1)) / \
- ((CDF_INIT_TOP - 13)) + \
- 12), \
- AOM_ICDF(CDF_PROB_TOP), 0
-#define AOM_CDF14(a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12) \
- AOM_ICDF((((a0)-1) * ((CDF_INIT_TOP >> CDF_SHIFT) - 14) + \
- ((CDF_INIT_TOP - 14) >> 1)) / \
- ((CDF_INIT_TOP - 14)) + \
- 1) \
- , \
- AOM_ICDF((((a1)-2) * ((CDF_INIT_TOP >> CDF_SHIFT) - 14) + \
- ((CDF_INIT_TOP - 14) >> 1)) / \
- ((CDF_INIT_TOP - 14)) + \
- 2), \
- AOM_ICDF((((a2)-3) * ((CDF_INIT_TOP >> CDF_SHIFT) - 14) + \
- ((CDF_INIT_TOP - 14) >> 1)) / \
- ((CDF_INIT_TOP - 14)) + \
- 3), \
- AOM_ICDF((((a3)-4) * ((CDF_INIT_TOP >> CDF_SHIFT) - 14) + \
- ((CDF_INIT_TOP - 14) >> 1)) / \
- ((CDF_INIT_TOP - 14)) + \
- 4), \
- AOM_ICDF((((a4)-5) * ((CDF_INIT_TOP >> CDF_SHIFT) - 14) + \
- ((CDF_INIT_TOP - 14) >> 1)) / \
- ((CDF_INIT_TOP - 14)) + \
- 5), \
- AOM_ICDF((((a5)-6) * ((CDF_INIT_TOP >> CDF_SHIFT) - 14) + \
- ((CDF_INIT_TOP - 14) >> 1)) / \
- ((CDF_INIT_TOP - 14)) + \
- 6), \
- AOM_ICDF((((a6)-7) * ((CDF_INIT_TOP >> CDF_SHIFT) - 14) + \
- ((CDF_INIT_TOP - 14) >> 1)) / \
- ((CDF_INIT_TOP - 14)) + \
- 7), \
- AOM_ICDF((((a7)-8) * ((CDF_INIT_TOP >> CDF_SHIFT) - 14) + \
- ((CDF_INIT_TOP - 14) >> 1)) / \
- ((CDF_INIT_TOP - 14)) + \
- 8), \
- AOM_ICDF((((a8)-9) * ((CDF_INIT_TOP >> CDF_SHIFT) - 14) + \
- ((CDF_INIT_TOP - 14) >> 1)) / \
- ((CDF_INIT_TOP - 14)) + \
- 9), \
- AOM_ICDF((((a9)-10) * ((CDF_INIT_TOP >> CDF_SHIFT) - 14) + \
- ((CDF_INIT_TOP - 14) >> 1)) / \
- ((CDF_INIT_TOP - 14)) + \
- 10), \
- AOM_ICDF((((a10)-11) * ((CDF_INIT_TOP >> CDF_SHIFT) - 14) + \
- ((CDF_INIT_TOP - 14) >> 1)) / \
- ((CDF_INIT_TOP - 14)) + \
- 11), \
- AOM_ICDF((((a11)-12) * ((CDF_INIT_TOP >> CDF_SHIFT) - 14) + \
- ((CDF_INIT_TOP - 14) >> 1)) / \
- ((CDF_INIT_TOP - 14)) + \
- 12), \
- AOM_ICDF((((a12)-13) * ((CDF_INIT_TOP >> CDF_SHIFT) - 14) + \
- ((CDF_INIT_TOP - 14) >> 1)) / \
- ((CDF_INIT_TOP - 14)) + \
- 13), \
- AOM_ICDF(CDF_PROB_TOP), 0
-#define AOM_CDF15(a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13) \
- AOM_ICDF((((a0)-1) * ((CDF_INIT_TOP >> CDF_SHIFT) - 15) + \
- ((CDF_INIT_TOP - 15) >> 1)) / \
- ((CDF_INIT_TOP - 15)) + \
- 1) \
- , \
- AOM_ICDF((((a1)-2) * ((CDF_INIT_TOP >> CDF_SHIFT) - 15) + \
- ((CDF_INIT_TOP - 15) >> 1)) / \
- ((CDF_INIT_TOP - 15)) + \
- 2), \
- AOM_ICDF((((a2)-3) * ((CDF_INIT_TOP >> CDF_SHIFT) - 15) + \
- ((CDF_INIT_TOP - 15) >> 1)) / \
- ((CDF_INIT_TOP - 15)) + \
- 3), \
- AOM_ICDF((((a3)-4) * ((CDF_INIT_TOP >> CDF_SHIFT) - 15) + \
- ((CDF_INIT_TOP - 15) >> 1)) / \
- ((CDF_INIT_TOP - 15)) + \
- 4), \
- AOM_ICDF((((a4)-5) * ((CDF_INIT_TOP >> CDF_SHIFT) - 15) + \
- ((CDF_INIT_TOP - 15) >> 1)) / \
- ((CDF_INIT_TOP - 15)) + \
- 5), \
- AOM_ICDF((((a5)-6) * ((CDF_INIT_TOP >> CDF_SHIFT) - 15) + \
- ((CDF_INIT_TOP - 15) >> 1)) / \
- ((CDF_INIT_TOP - 15)) + \
- 6), \
- AOM_ICDF((((a6)-7) * ((CDF_INIT_TOP >> CDF_SHIFT) - 15) + \
- ((CDF_INIT_TOP - 15) >> 1)) / \
- ((CDF_INIT_TOP - 15)) + \
- 7), \
- AOM_ICDF((((a7)-8) * ((CDF_INIT_TOP >> CDF_SHIFT) - 15) + \
- ((CDF_INIT_TOP - 15) >> 1)) / \
- ((CDF_INIT_TOP - 15)) + \
- 8), \
- AOM_ICDF((((a8)-9) * ((CDF_INIT_TOP >> CDF_SHIFT) - 15) + \
- ((CDF_INIT_TOP - 15) >> 1)) / \
- ((CDF_INIT_TOP - 15)) + \
- 9), \
- AOM_ICDF((((a9)-10) * ((CDF_INIT_TOP >> CDF_SHIFT) - 15) + \
- ((CDF_INIT_TOP - 15) >> 1)) / \
- ((CDF_INIT_TOP - 15)) + \
- 10), \
- AOM_ICDF((((a10)-11) * ((CDF_INIT_TOP >> CDF_SHIFT) - 15) + \
- ((CDF_INIT_TOP - 15) >> 1)) / \
- ((CDF_INIT_TOP - 15)) + \
- 11), \
- AOM_ICDF((((a11)-12) * ((CDF_INIT_TOP >> CDF_SHIFT) - 15) + \
- ((CDF_INIT_TOP - 15) >> 1)) / \
- ((CDF_INIT_TOP - 15)) + \
- 12), \
- AOM_ICDF((((a12)-13) * ((CDF_INIT_TOP >> CDF_SHIFT) - 15) + \
- ((CDF_INIT_TOP - 15) >> 1)) / \
- ((CDF_INIT_TOP - 15)) + \
- 13), \
- AOM_ICDF((((a13)-14) * ((CDF_INIT_TOP >> CDF_SHIFT) - 15) + \
- ((CDF_INIT_TOP - 15) >> 1)) / \
- ((CDF_INIT_TOP - 15)) + \
- 14), \
- AOM_ICDF(CDF_PROB_TOP), 0
-#define AOM_CDF16(a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, \
- a14) \
- AOM_ICDF((((a0)-1) * ((CDF_INIT_TOP >> CDF_SHIFT) - 16) + \
- ((CDF_INIT_TOP - 16) >> 1)) / \
- ((CDF_INIT_TOP - 16)) + \
- 1) \
- , \
- AOM_ICDF((((a1)-2) * ((CDF_INIT_TOP >> CDF_SHIFT) - 16) + \
- ((CDF_INIT_TOP - 16) >> 1)) / \
- ((CDF_INIT_TOP - 16)) + \
- 2), \
- AOM_ICDF((((a2)-3) * ((CDF_INIT_TOP >> CDF_SHIFT) - 16) + \
- ((CDF_INIT_TOP - 16) >> 1)) / \
- ((CDF_INIT_TOP - 16)) + \
- 3), \
- AOM_ICDF((((a3)-4) * ((CDF_INIT_TOP >> CDF_SHIFT) - 16) + \
- ((CDF_INIT_TOP - 16) >> 1)) / \
- ((CDF_INIT_TOP - 16)) + \
- 4), \
- AOM_ICDF((((a4)-5) * ((CDF_INIT_TOP >> CDF_SHIFT) - 16) + \
- ((CDF_INIT_TOP - 16) >> 1)) / \
- ((CDF_INIT_TOP - 16)) + \
- 5), \
- AOM_ICDF((((a5)-6) * ((CDF_INIT_TOP >> CDF_SHIFT) - 16) + \
- ((CDF_INIT_TOP - 16) >> 1)) / \
- ((CDF_INIT_TOP - 16)) + \
- 6), \
- AOM_ICDF((((a6)-7) * ((CDF_INIT_TOP >> CDF_SHIFT) - 16) + \
- ((CDF_INIT_TOP - 16) >> 1)) / \
- ((CDF_INIT_TOP - 16)) + \
- 7), \
- AOM_ICDF((((a7)-8) * ((CDF_INIT_TOP >> CDF_SHIFT) - 16) + \
- ((CDF_INIT_TOP - 16) >> 1)) / \
- ((CDF_INIT_TOP - 16)) + \
- 8), \
- AOM_ICDF((((a8)-9) * ((CDF_INIT_TOP >> CDF_SHIFT) - 16) + \
- ((CDF_INIT_TOP - 16) >> 1)) / \
- ((CDF_INIT_TOP - 16)) + \
- 9), \
- AOM_ICDF((((a9)-10) * ((CDF_INIT_TOP >> CDF_SHIFT) - 16) + \
- ((CDF_INIT_TOP - 16) >> 1)) / \
- ((CDF_INIT_TOP - 16)) + \
- 10), \
- AOM_ICDF((((a10)-11) * ((CDF_INIT_TOP >> CDF_SHIFT) - 16) + \
- ((CDF_INIT_TOP - 16) >> 1)) / \
- ((CDF_INIT_TOP - 16)) + \
- 11), \
- AOM_ICDF((((a11)-12) * ((CDF_INIT_TOP >> CDF_SHIFT) - 16) + \
- ((CDF_INIT_TOP - 16) >> 1)) / \
- ((CDF_INIT_TOP - 16)) + \
- 12), \
- AOM_ICDF((((a12)-13) * ((CDF_INIT_TOP >> CDF_SHIFT) - 16) + \
- ((CDF_INIT_TOP - 16) >> 1)) / \
- ((CDF_INIT_TOP - 16)) + \
- 13), \
- AOM_ICDF((((a13)-14) * ((CDF_INIT_TOP >> CDF_SHIFT) - 16) + \
- ((CDF_INIT_TOP - 16) >> 1)) / \
- ((CDF_INIT_TOP - 16)) + \
- 14), \
- AOM_ICDF((((a14)-15) * ((CDF_INIT_TOP >> CDF_SHIFT) - 16) + \
- ((CDF_INIT_TOP - 16) >> 1)) / \
- ((CDF_INIT_TOP - 16)) + \
- 15), \
- AOM_ICDF(CDF_PROB_TOP), 0
-
-#endif
-
-static INLINE uint8_t get_prob(unsigned int num, unsigned int den) {
- assert(den != 0);
- {
- const int p = (int)(((uint64_t)num * 256 + (den >> 1)) / den);
- // (p > 255) ? 255 : (p < 1) ? 1 : p;
- const int clipped_prob = p | ((255 - p) >> 23) | (p == 0);
- return (uint8_t)clipped_prob;
- }
-}
-
-static INLINE void update_cdf(aom_cdf_prob *cdf, int val, int nsymbs) {
- int rate;
- int i, tmp;
-
- static const int nsymbs2speed[17] = { 0, 0, 1, 1, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2 };
- assert(nsymbs < 17);
- rate = 3 + (cdf[nsymbs] > 15) + (cdf[nsymbs] > 31) +
- nsymbs2speed[nsymbs]; // + get_msb(nsymbs);
- tmp = AOM_ICDF(0);
-
- // Single loop (faster)
- for (i = 0; i < nsymbs - 1; ++i) {
- tmp = (i == val) ? 0 : tmp;
- if (tmp < cdf[i]) {
- cdf[i] -= ((cdf[i] - tmp) >> rate);
- } else {
- cdf[i] += ((tmp - cdf[i]) >> rate);
- }
- }
- cdf[nsymbs] += (cdf[nsymbs] < 32);
-}
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AOM_DSP_PROB_H_
diff --git a/third_party/aom/aom_dsp/psnr.c b/third_party/aom/aom_dsp/psnr.c
deleted file mode 100644
index 50f376a4a..000000000
--- a/third_party/aom/aom_dsp/psnr.c
+++ /dev/null
@@ -1,381 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <math.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/psnr.h"
-#include "aom_scale/yv12config.h"
-
-double aom_sse_to_psnr(double samples, double peak, double sse) {
- if (sse > 0.0) {
- const double psnr = 10.0 * log10(samples * peak * peak / sse);
- return psnr > MAX_PSNR ? MAX_PSNR : psnr;
- } else {
- return MAX_PSNR;
- }
-}
-
-/* TODO(yaowu): The block_variance calls the unoptimized versions of variance()
- * and highbd_8_variance(). It should not.
- */
-static void encoder_variance(const uint8_t *a, int a_stride, const uint8_t *b,
- int b_stride, int w, int h, unsigned int *sse,
- int *sum) {
- int i, j;
-
- *sum = 0;
- *sse = 0;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- const int diff = a[j] - b[j];
- *sum += diff;
- *sse += diff * diff;
- }
-
- a += a_stride;
- b += b_stride;
- }
-}
-
-static void encoder_highbd_variance64(const uint8_t *a8, int a_stride,
- const uint8_t *b8, int b_stride, int w,
- int h, uint64_t *sse, int64_t *sum) {
- const uint16_t *a = CONVERT_TO_SHORTPTR(a8);
- const uint16_t *b = CONVERT_TO_SHORTPTR(b8);
- int64_t tsum = 0;
- uint64_t tsse = 0;
- for (int i = 0; i < h; ++i) {
- int32_t lsum = 0;
- for (int j = 0; j < w; ++j) {
- const int diff = a[j] - b[j];
- lsum += diff;
- tsse += (uint32_t)(diff * diff);
- }
- tsum += lsum;
- a += a_stride;
- b += b_stride;
- }
- *sum = tsum;
- *sse = tsse;
-}
-
-static void encoder_highbd_8_variance(const uint8_t *a8, int a_stride,
- const uint8_t *b8, int b_stride, int w,
- int h, unsigned int *sse, int *sum) {
- uint64_t sse_long = 0;
- int64_t sum_long = 0;
- encoder_highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long,
- &sum_long);
- *sse = (unsigned int)sse_long;
- *sum = (int)sum_long;
-}
-
-static int64_t get_sse(const uint8_t *a, int a_stride, const uint8_t *b,
- int b_stride, int width, int height) {
- const int dw = width % 16;
- const int dh = height % 16;
- int64_t total_sse = 0;
- unsigned int sse = 0;
- int sum = 0;
- int x, y;
-
- if (dw > 0) {
- encoder_variance(&a[width - dw], a_stride, &b[width - dw], b_stride, dw,
- height, &sse, &sum);
- total_sse += sse;
- }
-
- if (dh > 0) {
- encoder_variance(&a[(height - dh) * a_stride], a_stride,
- &b[(height - dh) * b_stride], b_stride, width - dw, dh,
- &sse, &sum);
- total_sse += sse;
- }
-
- for (y = 0; y < height / 16; ++y) {
- const uint8_t *pa = a;
- const uint8_t *pb = b;
- for (x = 0; x < width / 16; ++x) {
- aom_mse16x16(pa, a_stride, pb, b_stride, &sse);
- total_sse += sse;
-
- pa += 16;
- pb += 16;
- }
-
- a += 16 * a_stride;
- b += 16 * b_stride;
- }
-
- return total_sse;
-}
-
-static int64_t highbd_get_sse_shift(const uint8_t *a8, int a_stride,
- const uint8_t *b8, int b_stride, int width,
- int height, unsigned int input_shift) {
- const uint16_t *a = CONVERT_TO_SHORTPTR(a8);
- const uint16_t *b = CONVERT_TO_SHORTPTR(b8);
- int64_t total_sse = 0;
- int x, y;
- for (y = 0; y < height; ++y) {
- for (x = 0; x < width; ++x) {
- int64_t diff;
- diff = (a[x] >> input_shift) - (b[x] >> input_shift);
- total_sse += diff * diff;
- }
- a += a_stride;
- b += b_stride;
- }
- return total_sse;
-}
-
-static int64_t highbd_get_sse(const uint8_t *a, int a_stride, const uint8_t *b,
- int b_stride, int width, int height) {
- int64_t total_sse = 0;
- int x, y;
- const int dw = width % 16;
- const int dh = height % 16;
- unsigned int sse = 0;
- int sum = 0;
- if (dw > 0) {
- encoder_highbd_8_variance(&a[width - dw], a_stride, &b[width - dw],
- b_stride, dw, height, &sse, &sum);
- total_sse += sse;
- }
- if (dh > 0) {
- encoder_highbd_8_variance(&a[(height - dh) * a_stride], a_stride,
- &b[(height - dh) * b_stride], b_stride,
- width - dw, dh, &sse, &sum);
- total_sse += sse;
- }
- for (y = 0; y < height / 16; ++y) {
- const uint8_t *pa = a;
- const uint8_t *pb = b;
- for (x = 0; x < width / 16; ++x) {
- aom_highbd_8_mse16x16(pa, a_stride, pb, b_stride, &sse);
- total_sse += sse;
- pa += 16;
- pb += 16;
- }
- a += 16 * a_stride;
- b += 16 * b_stride;
- }
- return total_sse;
-}
-
-int64_t aom_get_y_sse_part(const YV12_BUFFER_CONFIG *a,
- const YV12_BUFFER_CONFIG *b, int hstart, int width,
- int vstart, int height) {
- return get_sse(a->y_buffer + vstart * a->y_stride + hstart, a->y_stride,
- b->y_buffer + vstart * b->y_stride + hstart, b->y_stride,
- width, height);
-}
-
-int64_t aom_get_y_sse(const YV12_BUFFER_CONFIG *a,
- const YV12_BUFFER_CONFIG *b) {
- assert(a->y_crop_width == b->y_crop_width);
- assert(a->y_crop_height == b->y_crop_height);
-
- return get_sse(a->y_buffer, a->y_stride, b->y_buffer, b->y_stride,
- a->y_crop_width, a->y_crop_height);
-}
-
-int64_t aom_get_u_sse_part(const YV12_BUFFER_CONFIG *a,
- const YV12_BUFFER_CONFIG *b, int hstart, int width,
- int vstart, int height) {
- return get_sse(a->u_buffer + vstart * a->uv_stride + hstart, a->uv_stride,
- b->u_buffer + vstart * b->uv_stride + hstart, b->uv_stride,
- width, height);
-}
-
-int64_t aom_get_u_sse(const YV12_BUFFER_CONFIG *a,
- const YV12_BUFFER_CONFIG *b) {
- assert(a->uv_crop_width == b->uv_crop_width);
- assert(a->uv_crop_height == b->uv_crop_height);
-
- return get_sse(a->u_buffer, a->uv_stride, b->u_buffer, b->uv_stride,
- a->uv_crop_width, a->uv_crop_height);
-}
-
-int64_t aom_get_v_sse_part(const YV12_BUFFER_CONFIG *a,
- const YV12_BUFFER_CONFIG *b, int hstart, int width,
- int vstart, int height) {
- return get_sse(a->v_buffer + vstart * a->uv_stride + hstart, a->uv_stride,
- b->v_buffer + vstart * b->uv_stride + hstart, b->uv_stride,
- width, height);
-}
-
-int64_t aom_get_v_sse(const YV12_BUFFER_CONFIG *a,
- const YV12_BUFFER_CONFIG *b) {
- assert(a->uv_crop_width == b->uv_crop_width);
- assert(a->uv_crop_height == b->uv_crop_height);
-
- return get_sse(a->v_buffer, a->uv_stride, b->v_buffer, b->uv_stride,
- a->uv_crop_width, a->uv_crop_height);
-}
-
-int64_t aom_highbd_get_y_sse_part(const YV12_BUFFER_CONFIG *a,
- const YV12_BUFFER_CONFIG *b, int hstart,
- int width, int vstart, int height) {
- return highbd_get_sse(
- a->y_buffer + vstart * a->y_stride + hstart, a->y_stride,
- b->y_buffer + vstart * b->y_stride + hstart, b->y_stride, width, height);
-}
-
-int64_t aom_highbd_get_y_sse(const YV12_BUFFER_CONFIG *a,
- const YV12_BUFFER_CONFIG *b) {
- assert(a->y_crop_width == b->y_crop_width);
- assert(a->y_crop_height == b->y_crop_height);
- assert((a->flags & YV12_FLAG_HIGHBITDEPTH) != 0);
- assert((b->flags & YV12_FLAG_HIGHBITDEPTH) != 0);
-
- return highbd_get_sse(a->y_buffer, a->y_stride, b->y_buffer, b->y_stride,
- a->y_crop_width, a->y_crop_height);
-}
-
-int64_t aom_highbd_get_u_sse_part(const YV12_BUFFER_CONFIG *a,
- const YV12_BUFFER_CONFIG *b, int hstart,
- int width, int vstart, int height) {
- return highbd_get_sse(a->u_buffer + vstart * a->uv_stride + hstart,
- a->uv_stride,
- b->u_buffer + vstart * b->uv_stride + hstart,
- b->uv_stride, width, height);
-}
-
-int64_t aom_highbd_get_u_sse(const YV12_BUFFER_CONFIG *a,
- const YV12_BUFFER_CONFIG *b) {
- assert(a->uv_crop_width == b->uv_crop_width);
- assert(a->uv_crop_height == b->uv_crop_height);
- assert((a->flags & YV12_FLAG_HIGHBITDEPTH) != 0);
- assert((b->flags & YV12_FLAG_HIGHBITDEPTH) != 0);
-
- return highbd_get_sse(a->u_buffer, a->uv_stride, b->u_buffer, b->uv_stride,
- a->uv_crop_width, a->uv_crop_height);
-}
-
-int64_t aom_highbd_get_v_sse_part(const YV12_BUFFER_CONFIG *a,
- const YV12_BUFFER_CONFIG *b, int hstart,
- int width, int vstart, int height) {
- return highbd_get_sse(a->v_buffer + vstart * a->uv_stride + hstart,
- a->uv_stride,
- b->v_buffer + vstart * b->uv_stride + hstart,
- b->uv_stride, width, height);
-}
-
-int64_t aom_highbd_get_v_sse(const YV12_BUFFER_CONFIG *a,
- const YV12_BUFFER_CONFIG *b) {
- assert(a->uv_crop_width == b->uv_crop_width);
- assert(a->uv_crop_height == b->uv_crop_height);
- assert((a->flags & YV12_FLAG_HIGHBITDEPTH) != 0);
- assert((b->flags & YV12_FLAG_HIGHBITDEPTH) != 0);
-
- return highbd_get_sse(a->v_buffer, a->uv_stride, b->v_buffer, b->uv_stride,
- a->uv_crop_width, a->uv_crop_height);
-}
-
-int64_t aom_get_sse_plane(const YV12_BUFFER_CONFIG *a,
- const YV12_BUFFER_CONFIG *b, int plane, int highbd) {
- if (highbd) {
- switch (plane) {
- case 0: return aom_highbd_get_y_sse(a, b);
- case 1: return aom_highbd_get_u_sse(a, b);
- case 2: return aom_highbd_get_v_sse(a, b);
- default: assert(plane >= 0 && plane <= 2); return 0;
- }
- }
- switch (plane) {
- case 0: return aom_get_y_sse(a, b);
- case 1: return aom_get_u_sse(a, b);
- case 2: return aom_get_v_sse(a, b);
- default: assert(plane >= 0 && plane <= 2); return 0;
- }
-}
-
-void aom_calc_highbd_psnr(const YV12_BUFFER_CONFIG *a,
- const YV12_BUFFER_CONFIG *b, PSNR_STATS *psnr,
- uint32_t bit_depth, uint32_t in_bit_depth) {
- const int widths[3] = { a->y_crop_width, a->uv_crop_width, a->uv_crop_width };
- const int heights[3] = { a->y_crop_height, a->uv_crop_height,
- a->uv_crop_height };
- const int a_strides[3] = { a->y_stride, a->uv_stride, a->uv_stride };
- const int b_strides[3] = { b->y_stride, b->uv_stride, b->uv_stride };
- int i;
- uint64_t total_sse = 0;
- uint32_t total_samples = 0;
- const double peak = (double)((1 << in_bit_depth) - 1);
- const unsigned int input_shift = bit_depth - in_bit_depth;
-
- for (i = 0; i < 3; ++i) {
- const int w = widths[i];
- const int h = heights[i];
- const uint32_t samples = w * h;
- uint64_t sse;
- if (a->flags & YV12_FLAG_HIGHBITDEPTH) {
- if (input_shift) {
- sse = highbd_get_sse_shift(a->buffers[i], a_strides[i], b->buffers[i],
- b_strides[i], w, h, input_shift);
- } else {
- sse = highbd_get_sse(a->buffers[i], a_strides[i], b->buffers[i],
- b_strides[i], w, h);
- }
- } else {
- sse = get_sse(a->buffers[i], a_strides[i], b->buffers[i], b_strides[i], w,
- h);
- }
- psnr->sse[1 + i] = sse;
- psnr->samples[1 + i] = samples;
- psnr->psnr[1 + i] = aom_sse_to_psnr(samples, peak, (double)sse);
-
- total_sse += sse;
- total_samples += samples;
- }
-
- psnr->sse[0] = total_sse;
- psnr->samples[0] = total_samples;
- psnr->psnr[0] =
- aom_sse_to_psnr((double)total_samples, peak, (double)total_sse);
-}
-
-void aom_calc_psnr(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b,
- PSNR_STATS *psnr) {
- static const double peak = 255.0;
- const int widths[3] = { a->y_crop_width, a->uv_crop_width, a->uv_crop_width };
- const int heights[3] = { a->y_crop_height, a->uv_crop_height,
- a->uv_crop_height };
- const int a_strides[3] = { a->y_stride, a->uv_stride, a->uv_stride };
- const int b_strides[3] = { b->y_stride, b->uv_stride, b->uv_stride };
- int i;
- uint64_t total_sse = 0;
- uint32_t total_samples = 0;
-
- for (i = 0; i < 3; ++i) {
- const int w = widths[i];
- const int h = heights[i];
- const uint32_t samples = w * h;
- const uint64_t sse =
- get_sse(a->buffers[i], a_strides[i], b->buffers[i], b_strides[i], w, h);
- psnr->sse[1 + i] = sse;
- psnr->samples[1 + i] = samples;
- psnr->psnr[1 + i] = aom_sse_to_psnr(samples, peak, (double)sse);
-
- total_sse += sse;
- total_samples += samples;
- }
-
- psnr->sse[0] = total_sse;
- psnr->samples[0] = total_samples;
- psnr->psnr[0] =
- aom_sse_to_psnr((double)total_samples, peak, (double)total_sse);
-}
diff --git a/third_party/aom/aom_dsp/psnr.h b/third_party/aom/aom_dsp/psnr.h
deleted file mode 100644
index 58e4e71ee..000000000
--- a/third_party/aom/aom_dsp/psnr.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_PSNR_H_
-#define AOM_AOM_DSP_PSNR_H_
-
-#include "aom_scale/yv12config.h"
-
-#define MAX_PSNR 100.0
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct {
- double psnr[4]; // total/y/u/v
- uint64_t sse[4]; // total/y/u/v
- uint32_t samples[4]; // total/y/u/v
-} PSNR_STATS;
-
-/*!\brief Converts SSE to PSNR
- *
- * Converts sum of squared errros (SSE) to peak signal-to-noise ratio (PNSR).
- *
- * \param[in] samples Number of samples
- * \param[in] peak Max sample value
- * \param[in] sse Sum of squared errors
- */
-double aom_sse_to_psnr(double samples, double peak, double sse);
-int64_t aom_get_y_sse_part(const YV12_BUFFER_CONFIG *a,
- const YV12_BUFFER_CONFIG *b, int hstart, int width,
- int vstart, int height);
-int64_t aom_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b);
-int64_t aom_get_u_sse_part(const YV12_BUFFER_CONFIG *a,
- const YV12_BUFFER_CONFIG *b, int hstart, int width,
- int vstart, int height);
-int64_t aom_get_u_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b);
-int64_t aom_get_v_sse_part(const YV12_BUFFER_CONFIG *a,
- const YV12_BUFFER_CONFIG *b, int hstart, int width,
- int vstart, int height);
-int64_t aom_get_v_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b);
-int64_t aom_get_sse_plane(const YV12_BUFFER_CONFIG *a,
- const YV12_BUFFER_CONFIG *b, int plane, int highbd);
-int64_t aom_highbd_get_y_sse_part(const YV12_BUFFER_CONFIG *a,
- const YV12_BUFFER_CONFIG *b, int hstart,
- int width, int vstart, int height);
-int64_t aom_highbd_get_y_sse(const YV12_BUFFER_CONFIG *a,
- const YV12_BUFFER_CONFIG *b);
-int64_t aom_highbd_get_u_sse_part(const YV12_BUFFER_CONFIG *a,
- const YV12_BUFFER_CONFIG *b, int hstart,
- int width, int vstart, int height);
-int64_t aom_highbd_get_u_sse(const YV12_BUFFER_CONFIG *a,
- const YV12_BUFFER_CONFIG *b);
-int64_t aom_highbd_get_v_sse_part(const YV12_BUFFER_CONFIG *a,
- const YV12_BUFFER_CONFIG *b, int hstart,
- int width, int vstart, int height);
-int64_t aom_highbd_get_v_sse(const YV12_BUFFER_CONFIG *a,
- const YV12_BUFFER_CONFIG *b);
-void aom_calc_highbd_psnr(const YV12_BUFFER_CONFIG *a,
- const YV12_BUFFER_CONFIG *b, PSNR_STATS *psnr,
- unsigned int bit_depth, unsigned int in_bit_depth);
-void aom_calc_psnr(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b,
- PSNR_STATS *psnr);
-
-double aom_psnrhvs(const YV12_BUFFER_CONFIG *source,
- const YV12_BUFFER_CONFIG *dest, double *phvs_y,
- double *phvs_u, double *phvs_v, uint32_t bd, uint32_t in_bd);
-#ifdef __cplusplus
-} // extern "C"
-#endif
-#endif // AOM_AOM_DSP_PSNR_H_
diff --git a/third_party/aom/aom_dsp/psnrhvs.c b/third_party/aom/aom_dsp/psnrhvs.c
deleted file mode 100644
index 30fe21d9c..000000000
--- a/third_party/aom/aom_dsp/psnrhvs.c
+++ /dev/null
@@ -1,272 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- *
- * This code was originally written by: Gregory Maxwell, at the Daala
- * project.
- */
-
-#include <assert.h>
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/psnr.h"
-#include "aom_dsp/ssim.h"
-#include "aom_ports/system_state.h"
-
-static void od_bin_fdct8x8(tran_low_t *y, int ystride, const int16_t *x,
- int xstride) {
- int i, j;
- (void)xstride;
- aom_fdct8x8(x, y, ystride);
- for (i = 0; i < 8; i++)
- for (j = 0; j < 8; j++)
- *(y + ystride * i + j) = (*(y + ystride * i + j) + 4) >> 3;
-}
-
-static void hbd_od_bin_fdct8x8(tran_low_t *y, int ystride, const int16_t *x,
- int xstride) {
- int i, j;
- (void)xstride;
- aom_highbd_fdct8x8(x, y, ystride);
- for (i = 0; i < 8; i++)
- for (j = 0; j < 8; j++)
- *(y + ystride * i + j) = (*(y + ystride * i + j) + 4) >> 3;
-}
-
-/* Normalized inverse quantization matrix for 8x8 DCT at the point of
- * transparency. This is not the JPEG based matrix from the paper,
- this one gives a slightly higher MOS agreement.*/
-static const double csf_y[8][8] = {
- { 1.6193873005, 2.2901594831, 2.08509755623, 1.48366094411, 1.00227514334,
- 0.678296995242, 0.466224900598, 0.3265091542 },
- { 2.2901594831, 1.94321815382, 2.04793073064, 1.68731108984, 1.2305666963,
- 0.868920337363, 0.61280991668, 0.436405793551 },
- { 2.08509755623, 2.04793073064, 1.34329019223, 1.09205635862, 0.875748795257,
- 0.670882927016, 0.501731932449, 0.372504254596 },
- { 1.48366094411, 1.68731108984, 1.09205635862, 0.772819797575, 0.605636379554,
- 0.48309405692, 0.380429446972, 0.295774038565 },
- { 1.00227514334, 1.2305666963, 0.875748795257, 0.605636379554, 0.448996256676,
- 0.352889268808, 0.283006984131, 0.226951348204 },
- { 0.678296995242, 0.868920337363, 0.670882927016, 0.48309405692,
- 0.352889268808, 0.27032073436, 0.215017739696, 0.17408067321 },
- { 0.466224900598, 0.61280991668, 0.501731932449, 0.380429446972,
- 0.283006984131, 0.215017739696, 0.168869545842, 0.136153931001 },
- { 0.3265091542, 0.436405793551, 0.372504254596, 0.295774038565,
- 0.226951348204, 0.17408067321, 0.136153931001, 0.109083846276 }
-};
-static const double csf_cb420[8][8] = {
- { 1.91113096927, 2.46074210438, 1.18284184739, 1.14982565193, 1.05017074788,
- 0.898018824055, 0.74725392039, 0.615105596242 },
- { 2.46074210438, 1.58529308355, 1.21363250036, 1.38190029285, 1.33100189972,
- 1.17428548929, 0.996404342439, 0.830890433625 },
- { 1.18284184739, 1.21363250036, 0.978712413627, 1.02624506078, 1.03145147362,
- 0.960060382087, 0.849823426169, 0.731221236837 },
- { 1.14982565193, 1.38190029285, 1.02624506078, 0.861317501629, 0.801821139099,
- 0.751437590932, 0.685398513368, 0.608694761374 },
- { 1.05017074788, 1.33100189972, 1.03145147362, 0.801821139099, 0.676555426187,
- 0.605503172737, 0.55002013668, 0.495804539034 },
- { 0.898018824055, 1.17428548929, 0.960060382087, 0.751437590932,
- 0.605503172737, 0.514674450957, 0.454353482512, 0.407050308965 },
- { 0.74725392039, 0.996404342439, 0.849823426169, 0.685398513368,
- 0.55002013668, 0.454353482512, 0.389234902883, 0.342353999733 },
- { 0.615105596242, 0.830890433625, 0.731221236837, 0.608694761374,
- 0.495804539034, 0.407050308965, 0.342353999733, 0.295530605237 }
-};
-static const double csf_cr420[8][8] = {
- { 2.03871978502, 2.62502345193, 1.26180942886, 1.11019789803, 1.01397751469,
- 0.867069376285, 0.721500455585, 0.593906509971 },
- { 2.62502345193, 1.69112867013, 1.17180569821, 1.3342742857, 1.28513006198,
- 1.13381474809, 0.962064122248, 0.802254508198 },
- { 1.26180942886, 1.17180569821, 0.944981930573, 0.990876405848,
- 0.995903384143, 0.926972725286, 0.820534991409, 0.706020324706 },
- { 1.11019789803, 1.3342742857, 0.990876405848, 0.831632933426, 0.77418706195,
- 0.725539939514, 0.661776842059, 0.587716619023 },
- { 1.01397751469, 1.28513006198, 0.995903384143, 0.77418706195, 0.653238524286,
- 0.584635025748, 0.531064164893, 0.478717061273 },
- { 0.867069376285, 1.13381474809, 0.926972725286, 0.725539939514,
- 0.584635025748, 0.496936637883, 0.438694579826, 0.393021669543 },
- { 0.721500455585, 0.962064122248, 0.820534991409, 0.661776842059,
- 0.531064164893, 0.438694579826, 0.375820256136, 0.330555063063 },
- { 0.593906509971, 0.802254508198, 0.706020324706, 0.587716619023,
- 0.478717061273, 0.393021669543, 0.330555063063, 0.285345396658 }
-};
-
-static double convert_score_db(double _score, double _weight, int bit_depth) {
- int16_t pix_max = 255;
- assert(_score * _weight >= 0.0);
- if (bit_depth == 10)
- pix_max = 1023;
- else if (bit_depth == 12)
- pix_max = 4095;
-
- if (_weight * _score < pix_max * pix_max * 1e-10) return MAX_PSNR;
- return 10 * (log10(pix_max * pix_max) - log10(_weight * _score));
-}
-
-static double calc_psnrhvs(const unsigned char *src, int _systride,
- const unsigned char *dst, int _dystride, double _par,
- int _w, int _h, int _step, const double _csf[8][8],
- uint32_t _shift, int buf_is_hbd) {
- double ret;
- const uint8_t *_src8 = src;
- const uint8_t *_dst8 = dst;
- const uint16_t *_src16 = CONVERT_TO_SHORTPTR(src);
- const uint16_t *_dst16 = CONVERT_TO_SHORTPTR(dst);
- DECLARE_ALIGNED(16, int16_t, dct_s[8 * 8]);
- DECLARE_ALIGNED(16, int16_t, dct_d[8 * 8]);
- DECLARE_ALIGNED(16, tran_low_t, dct_s_coef[8 * 8]);
- DECLARE_ALIGNED(16, tran_low_t, dct_d_coef[8 * 8]);
- double mask[8][8];
- int pixels;
- int x;
- int y;
- (void)_par;
- ret = pixels = 0;
- /*In the PSNR-HVS-M paper[1] the authors describe the construction of
- their masking table as "we have used the quantization table for the
- color component Y of JPEG [6] that has been also obtained on the
- basis of CSF. Note that the values in quantization table JPEG have
- been normalized and then squared." Their CSF matrix (from PSNR-HVS)
- was also constructed from the JPEG matrices. I can not find any obvious
- scheme of normalizing to produce their table, but if I multiply their
- CSF by 0.38857 and square the result I get their masking table.
- I have no idea where this constant comes from, but deviating from it
- too greatly hurts MOS agreement.
-
- [1] Nikolay Ponomarenko, Flavia Silvestri, Karen Egiazarian, Marco Carli,
- Jaakko Astola, Vladimir Lukin, "On between-coefficient contrast masking
- of DCT basis functions", CD-ROM Proceedings of the Third
- International Workshop on Video Processing and Quality Metrics for Consumer
- Electronics VPQM-07, Scottsdale, Arizona, USA, 25-26 January, 2007, 4 p.*/
- for (x = 0; x < 8; x++)
- for (y = 0; y < 8; y++)
- mask[x][y] =
- (_csf[x][y] * 0.3885746225901003) * (_csf[x][y] * 0.3885746225901003);
- for (y = 0; y < _h - 7; y += _step) {
- for (x = 0; x < _w - 7; x += _step) {
- int i;
- int j;
- double s_means[4];
- double d_means[4];
- double s_vars[4];
- double d_vars[4];
- double s_gmean = 0;
- double d_gmean = 0;
- double s_gvar = 0;
- double d_gvar = 0;
- double s_mask = 0;
- double d_mask = 0;
- for (i = 0; i < 4; i++)
- s_means[i] = d_means[i] = s_vars[i] = d_vars[i] = 0;
- for (i = 0; i < 8; i++) {
- for (j = 0; j < 8; j++) {
- int sub = ((i & 12) >> 2) + ((j & 12) >> 1);
- if (!buf_is_hbd) {
- dct_s[i * 8 + j] = _src8[(y + i) * _systride + (j + x)];
- dct_d[i * 8 + j] = _dst8[(y + i) * _dystride + (j + x)];
- } else {
- dct_s[i * 8 + j] = _src16[(y + i) * _systride + (j + x)] >> _shift;
- dct_d[i * 8 + j] = _dst16[(y + i) * _dystride + (j + x)] >> _shift;
- }
- s_gmean += dct_s[i * 8 + j];
- d_gmean += dct_d[i * 8 + j];
- s_means[sub] += dct_s[i * 8 + j];
- d_means[sub] += dct_d[i * 8 + j];
- }
- }
- s_gmean /= 64.f;
- d_gmean /= 64.f;
- for (i = 0; i < 4; i++) s_means[i] /= 16.f;
- for (i = 0; i < 4; i++) d_means[i] /= 16.f;
- for (i = 0; i < 8; i++) {
- for (j = 0; j < 8; j++) {
- int sub = ((i & 12) >> 2) + ((j & 12) >> 1);
- s_gvar += (dct_s[i * 8 + j] - s_gmean) * (dct_s[i * 8 + j] - s_gmean);
- d_gvar += (dct_d[i * 8 + j] - d_gmean) * (dct_d[i * 8 + j] - d_gmean);
- s_vars[sub] += (dct_s[i * 8 + j] - s_means[sub]) *
- (dct_s[i * 8 + j] - s_means[sub]);
- d_vars[sub] += (dct_d[i * 8 + j] - d_means[sub]) *
- (dct_d[i * 8 + j] - d_means[sub]);
- }
- }
- s_gvar *= 1 / 63.f * 64;
- d_gvar *= 1 / 63.f * 64;
- for (i = 0; i < 4; i++) s_vars[i] *= 1 / 15.f * 16;
- for (i = 0; i < 4; i++) d_vars[i] *= 1 / 15.f * 16;
- if (s_gvar > 0)
- s_gvar = (s_vars[0] + s_vars[1] + s_vars[2] + s_vars[3]) / s_gvar;
- if (d_gvar > 0)
- d_gvar = (d_vars[0] + d_vars[1] + d_vars[2] + d_vars[3]) / d_gvar;
- if (!buf_is_hbd) {
- od_bin_fdct8x8(dct_s_coef, 8, dct_s, 8);
- od_bin_fdct8x8(dct_d_coef, 8, dct_d, 8);
- } else {
- hbd_od_bin_fdct8x8(dct_s_coef, 8, dct_s, 8);
- hbd_od_bin_fdct8x8(dct_d_coef, 8, dct_d, 8);
- }
- for (i = 0; i < 8; i++)
- for (j = (i == 0); j < 8; j++)
- s_mask += dct_s_coef[i * 8 + j] * dct_s_coef[i * 8 + j] * mask[i][j];
- for (i = 0; i < 8; i++)
- for (j = (i == 0); j < 8; j++)
- d_mask += dct_d_coef[i * 8 + j] * dct_d_coef[i * 8 + j] * mask[i][j];
- s_mask = sqrt(s_mask * s_gvar) / 32.f;
- d_mask = sqrt(d_mask * d_gvar) / 32.f;
- if (d_mask > s_mask) s_mask = d_mask;
- for (i = 0; i < 8; i++) {
- for (j = 0; j < 8; j++) {
- double err;
- err = fabs((double)(dct_s_coef[i * 8 + j] - dct_d_coef[i * 8 + j]));
- if (i != 0 || j != 0)
- err = err < s_mask / mask[i][j] ? 0 : err - s_mask / mask[i][j];
- ret += (err * _csf[i][j]) * (err * _csf[i][j]);
- pixels++;
- }
- }
- }
- }
- if (pixels <= 0) return 0;
- ret /= pixels;
- return ret;
-}
-
-double aom_psnrhvs(const YV12_BUFFER_CONFIG *src, const YV12_BUFFER_CONFIG *dst,
- double *y_psnrhvs, double *u_psnrhvs, double *v_psnrhvs,
- uint32_t bd, uint32_t in_bd) {
- double psnrhvs;
- const double par = 1.0;
- const int step = 7;
- uint32_t bd_shift = 0;
- aom_clear_system_state();
- assert(bd == 8 || bd == 10 || bd == 12);
- assert(bd >= in_bd);
- assert(src->flags == dst->flags);
- const int buf_is_hbd = src->flags & YV12_FLAG_HIGHBITDEPTH;
-
- bd_shift = bd - in_bd;
-
- *y_psnrhvs = calc_psnrhvs(
- src->y_buffer, src->y_stride, dst->y_buffer, dst->y_stride, par,
- src->y_crop_width, src->y_crop_height, step, csf_y, bd_shift, buf_is_hbd);
- *u_psnrhvs =
- calc_psnrhvs(src->u_buffer, src->uv_stride, dst->u_buffer, dst->uv_stride,
- par, src->uv_crop_width, src->uv_crop_height, step,
- csf_cb420, bd_shift, buf_is_hbd);
- *v_psnrhvs =
- calc_psnrhvs(src->v_buffer, src->uv_stride, dst->v_buffer, dst->uv_stride,
- par, src->uv_crop_width, src->uv_crop_height, step,
- csf_cr420, bd_shift, buf_is_hbd);
- psnrhvs = (*y_psnrhvs) * .8 + .1 * ((*u_psnrhvs) + (*v_psnrhvs));
- return convert_score_db(psnrhvs, 1.0, in_bd);
-}
diff --git a/third_party/aom/aom_dsp/quantize.c b/third_party/aom/aom_dsp/quantize.c
deleted file mode 100644
index 62dbd86a9..000000000
--- a/third_party/aom/aom_dsp/quantize.c
+++ /dev/null
@@ -1,206 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "aom_dsp/quantize.h"
-#include "aom_mem/aom_mem.h"
-
-void quantize_b_helper_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
- const int16_t *zbin_ptr, const int16_t *round_ptr,
- const int16_t *quant_ptr,
- const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
- tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
- uint16_t *eob_ptr, const int16_t *scan,
- const int16_t *iscan, const qm_val_t *qm_ptr,
- const qm_val_t *iqm_ptr, const int log_scale) {
- const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], log_scale),
- ROUND_POWER_OF_TWO(zbin_ptr[1], log_scale) };
- const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
- int i, non_zero_count = (int)n_coeffs, eob = -1;
- (void)iscan;
-
- memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
- memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
- // Pre-scan pass
- for (i = (int)n_coeffs - 1; i >= 0; i--) {
- const int rc = scan[i];
- const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
- const int coeff = coeff_ptr[rc] * wt;
-
- if (coeff < (zbins[rc != 0] * (1 << AOM_QM_BITS)) &&
- coeff > (nzbins[rc != 0] * (1 << AOM_QM_BITS)))
- non_zero_count--;
- else
- break;
- }
-
- // Quantization pass: All coefficients with index >= zero_flag are
- // skippable. Note: zero_flag can be zero.
- for (i = 0; i < non_zero_count; i++) {
- const int rc = scan[i];
- const int coeff = coeff_ptr[rc];
- const int coeff_sign = (coeff >> 31);
- const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
- int tmp32;
-
- const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
- if (abs_coeff * wt >= (zbins[rc != 0] << AOM_QM_BITS)) {
- int64_t tmp =
- clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale),
- INT16_MIN, INT16_MAX);
- tmp *= wt;
- tmp32 = (int)(((((tmp * quant_ptr[rc != 0]) >> 16) + tmp) *
- quant_shift_ptr[rc != 0]) >>
- (16 - log_scale + AOM_QM_BITS)); // quantization
- qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
- const int iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
- const int dequant =
- (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
- AOM_QM_BITS;
- const tran_low_t abs_dqcoeff = (tmp32 * dequant) >> log_scale;
- dqcoeff_ptr[rc] = (tran_low_t)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
-
- if (tmp32) eob = i;
- }
- }
- *eob_ptr = eob + 1;
-}
-
-void highbd_quantize_b_helper_c(
- const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
- const int16_t *round_ptr, const int16_t *quant_ptr,
- const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
- tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr,
- const qm_val_t *iqm_ptr, const int log_scale) {
- int i, eob = -1;
- const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], log_scale),
- ROUND_POWER_OF_TWO(zbin_ptr[1], log_scale) };
- const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
- int dequant;
- int idx_arr[4096];
- (void)iscan;
- int idx = 0;
-
- memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
- memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
- // Pre-scan pass
- for (i = 0; i < n_coeffs; i++) {
- const int rc = scan[i];
- const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
- const int coeff = coeff_ptr[rc] * wt;
-
- // If the coefficient is out of the base ZBIN range, keep it for
- // quantization.
- if (coeff >= (zbins[rc != 0] * (1 << AOM_QM_BITS)) ||
- coeff <= (nzbins[rc != 0] * (1 << AOM_QM_BITS)))
- idx_arr[idx++] = i;
- }
-
- // Quantization pass: only process the coefficients selected in
- // pre-scan pass. Note: idx can be zero.
- for (i = 0; i < idx; i++) {
- const int rc = scan[idx_arr[i]];
- const int coeff = coeff_ptr[rc];
- const int coeff_sign = (coeff >> 31);
- const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
- const qm_val_t iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
- const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
- const int64_t tmp1 =
- abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale);
- const int64_t tmpw = tmp1 * wt;
- const int64_t tmp2 = ((tmpw * quant_ptr[rc != 0]) >> 16) + tmpw;
- const int abs_qcoeff = (int)((tmp2 * quant_shift_ptr[rc != 0]) >>
- (16 - log_scale + AOM_QM_BITS));
- qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
- dequant =
- (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
- const tran_low_t abs_dqcoeff = (abs_qcoeff * dequant) >> log_scale;
- dqcoeff_ptr[rc] = (tran_low_t)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
- if (abs_qcoeff) eob = idx_arr[i];
- }
- *eob_ptr = eob + 1;
-}
-
-/* These functions should only be called when quantisation matrices
- are not used. */
-void aom_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
- const int16_t *zbin_ptr, const int16_t *round_ptr,
- const int16_t *quant_ptr, const int16_t *quant_shift_ptr,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan) {
- quantize_b_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr,
- quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr,
- eob_ptr, scan, iscan, NULL, NULL, 0);
-}
-
-void aom_quantize_b_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
- const int16_t *zbin_ptr, const int16_t *round_ptr,
- const int16_t *quant_ptr,
- const int16_t *quant_shift_ptr,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan) {
- quantize_b_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr,
- quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr,
- eob_ptr, scan, iscan, NULL, NULL, 1);
-}
-
-void aom_quantize_b_64x64_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
- const int16_t *zbin_ptr, const int16_t *round_ptr,
- const int16_t *quant_ptr,
- const int16_t *quant_shift_ptr,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan) {
- quantize_b_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr,
- quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr,
- eob_ptr, scan, iscan, NULL, NULL, 2);
-}
-
-void aom_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
- const int16_t *zbin_ptr, const int16_t *round_ptr,
- const int16_t *quant_ptr,
- const int16_t *quant_shift_ptr,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan) {
- highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr,
- quant_ptr, quant_shift_ptr, qcoeff_ptr,
- dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan,
- NULL, NULL, 0);
-}
-
-void aom_highbd_quantize_b_32x32_c(
- const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
- const int16_t *round_ptr, const int16_t *quant_ptr,
- const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
- tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan) {
- highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr,
- quant_ptr, quant_shift_ptr, qcoeff_ptr,
- dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan,
- NULL, NULL, 1);
-}
-
-void aom_highbd_quantize_b_64x64_c(
- const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
- const int16_t *round_ptr, const int16_t *quant_ptr,
- const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
- tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan) {
- highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr,
- quant_ptr, quant_shift_ptr, qcoeff_ptr,
- dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan,
- NULL, NULL, 2);
-}
diff --git a/third_party/aom/aom_dsp/quantize.h b/third_party/aom/aom_dsp/quantize.h
deleted file mode 100644
index c55ab234e..000000000
--- a/third_party/aom/aom_dsp/quantize.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_QUANTIZE_H_
-#define AOM_AOM_DSP_QUANTIZE_H_
-
-#include "config/aom_config.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void quantize_b_helper_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
- const int16_t *zbin_ptr, const int16_t *round_ptr,
- const int16_t *quant_ptr,
- const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
- tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
- uint16_t *eob_ptr, const int16_t *scan,
- const int16_t *iscan, const qm_val_t *qm_ptr,
- const qm_val_t *iqm_ptr, const int log_scale);
-
-void aom_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
- const int16_t *zbin_ptr, const int16_t *round_ptr,
- const int16_t *quant_ptr, const int16_t *quant_shift_ptr,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan);
-
-void highbd_quantize_b_helper_c(
- const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
- const int16_t *round_ptr, const int16_t *quant_ptr,
- const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
- tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr,
- const qm_val_t *iqm_ptr, const int log_scale);
-
-void aom_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
- const int16_t *zbin_ptr, const int16_t *round_ptr,
- const int16_t *quant_ptr,
- const int16_t *quant_shift_ptr,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AOM_DSP_QUANTIZE_H_
diff --git a/third_party/aom/aom_dsp/sad.c b/third_party/aom/aom_dsp/sad.c
deleted file mode 100644
index 1e24df4a5..000000000
--- a/third_party/aom/aom_dsp/sad.c
+++ /dev/null
@@ -1,304 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <stdlib.h>
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom/aom_integer.h"
-#include "aom_ports/mem.h"
-#include "aom_dsp/blend.h"
-
-/* Sum the difference between every corresponding element of the buffers. */
-static INLINE unsigned int sad(const uint8_t *a, int a_stride, const uint8_t *b,
- int b_stride, int width, int height) {
- int y, x;
- unsigned int sad = 0;
-
- for (y = 0; y < height; y++) {
- for (x = 0; x < width; x++) sad += abs(a[x] - b[x]);
-
- a += a_stride;
- b += b_stride;
- }
- return sad;
-}
-
-#define sadMxh(m) \
- unsigned int aom_sad##m##xh_c(const uint8_t *a, int a_stride, \
- const uint8_t *b, int b_stride, int width, \
- int height) { \
- return sad(a, a_stride, b, b_stride, width, height); \
- }
-
-#define sadMxN(m, n) \
- unsigned int aom_sad##m##x##n##_c(const uint8_t *src, int src_stride, \
- const uint8_t *ref, int ref_stride) { \
- return sad(src, src_stride, ref, ref_stride, m, n); \
- } \
- unsigned int aom_sad##m##x##n##_avg_c(const uint8_t *src, int src_stride, \
- const uint8_t *ref, int ref_stride, \
- const uint8_t *second_pred) { \
- uint8_t comp_pred[m * n]; \
- aom_comp_avg_pred(comp_pred, second_pred, m, n, ref, ref_stride); \
- return sad(src, src_stride, comp_pred, m, m, n); \
- } \
- unsigned int aom_jnt_sad##m##x##n##_avg_c( \
- const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, \
- const uint8_t *second_pred, const JNT_COMP_PARAMS *jcp_param) { \
- uint8_t comp_pred[m * n]; \
- aom_jnt_comp_avg_pred_c(comp_pred, second_pred, m, n, ref, ref_stride, \
- jcp_param); \
- return sad(src, src_stride, comp_pred, m, m, n); \
- }
-
-// Calculate sad against 4 reference locations and store each in sad_array
-#define sadMxNx4D(m, n) \
- void aom_sad##m##x##n##x4d_c(const uint8_t *src, int src_stride, \
- const uint8_t *const ref_array[], \
- int ref_stride, uint32_t *sad_array) { \
- int i; \
- for (i = 0; i < 4; ++i) \
- sad_array[i] = \
- aom_sad##m##x##n##_c(src, src_stride, ref_array[i], ref_stride); \
- }
-
-/* clang-format off */
-// 128x128
-sadMxN(128, 128)
-sadMxNx4D(128, 128)
-
-// 128x64
-sadMxN(128, 64)
-sadMxNx4D(128, 64)
-
-// 64x128
-sadMxN(64, 128)
-sadMxNx4D(64, 128)
-
-// 64x64
-sadMxN(64, 64)
-sadMxNx4D(64, 64)
-
-// 64x32
-sadMxN(64, 32)
-sadMxNx4D(64, 32)
-
-// 32x64
-sadMxN(32, 64)
-sadMxNx4D(32, 64)
-
-// 32x32
-sadMxN(32, 32)
-sadMxNx4D(32, 32)
-
-// 32x16
-sadMxN(32, 16)
-sadMxNx4D(32, 16)
-
-// 16x32
-sadMxN(16, 32)
-sadMxNx4D(16, 32)
-
-// 16x16
-sadMxN(16, 16)
-sadMxNx4D(16, 16)
-
-// 16x8
-sadMxN(16, 8)
-sadMxNx4D(16, 8)
-
-// 8x16
-sadMxN(8, 16)
-sadMxNx4D(8, 16)
-
-// 8x8
-sadMxN(8, 8)
-sadMxNx4D(8, 8)
-
-// 8x4
-sadMxN(8, 4)
-sadMxNx4D(8, 4)
-
-// 4x8
-sadMxN(4, 8)
-sadMxNx4D(4, 8)
-
-// 4x4
-sadMxN(4, 4)
-sadMxNx4D(4, 4)
-
-sadMxh(128);
-sadMxh(64);
-sadMxh(32);
-sadMxh(16);
-sadMxh(8);
-sadMxh(4);
-
-sadMxN(4, 16)
-sadMxNx4D(4, 16)
-sadMxN(16, 4)
-sadMxNx4D(16, 4)
-sadMxN(8, 32)
-sadMxNx4D(8, 32)
-sadMxN(32, 8)
-sadMxNx4D(32, 8)
-sadMxN(16, 64)
-sadMxNx4D(16, 64)
-sadMxN(64, 16)
-sadMxNx4D(64, 16)
-
- /* clang-format on */
-
- static INLINE
- unsigned int highbd_sad(const uint8_t *a8, int a_stride, const uint8_t *b8,
- int b_stride, int width, int height) {
- int y, x;
- unsigned int sad = 0;
- const uint16_t *a = CONVERT_TO_SHORTPTR(a8);
- const uint16_t *b = CONVERT_TO_SHORTPTR(b8);
- for (y = 0; y < height; y++) {
- for (x = 0; x < width; x++) sad += abs(a[x] - b[x]);
-
- a += a_stride;
- b += b_stride;
- }
- return sad;
-}
-
-static INLINE unsigned int highbd_sadb(const uint8_t *a8, int a_stride,
- const uint16_t *b, int b_stride,
- int width, int height) {
- int y, x;
- unsigned int sad = 0;
- const uint16_t *a = CONVERT_TO_SHORTPTR(a8);
- for (y = 0; y < height; y++) {
- for (x = 0; x < width; x++) sad += abs(a[x] - b[x]);
-
- a += a_stride;
- b += b_stride;
- }
- return sad;
-}
-
-#define highbd_sadMxN(m, n) \
- unsigned int aom_highbd_sad##m##x##n##_c(const uint8_t *src, int src_stride, \
- const uint8_t *ref, \
- int ref_stride) { \
- return highbd_sad(src, src_stride, ref, ref_stride, m, n); \
- } \
- unsigned int aom_highbd_sad##m##x##n##_avg_c( \
- const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, \
- const uint8_t *second_pred) { \
- uint16_t comp_pred[m * n]; \
- aom_highbd_comp_avg_pred(CONVERT_TO_BYTEPTR(comp_pred), second_pred, m, n, \
- ref, ref_stride); \
- return highbd_sadb(src, src_stride, comp_pred, m, m, n); \
- } \
- unsigned int aom_highbd_jnt_sad##m##x##n##_avg_c( \
- const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, \
- const uint8_t *second_pred, const JNT_COMP_PARAMS *jcp_param) { \
- uint16_t comp_pred[m * n]; \
- aom_highbd_jnt_comp_avg_pred(CONVERT_TO_BYTEPTR(comp_pred), second_pred, \
- m, n, ref, ref_stride, jcp_param); \
- return highbd_sadb(src, src_stride, comp_pred, m, m, n); \
- }
-
-#define highbd_sadMxNx4D(m, n) \
- void aom_highbd_sad##m##x##n##x4d_c(const uint8_t *src, int src_stride, \
- const uint8_t *const ref_array[], \
- int ref_stride, uint32_t *sad_array) { \
- int i; \
- for (i = 0; i < 4; ++i) { \
- sad_array[i] = aom_highbd_sad##m##x##n##_c(src, src_stride, \
- ref_array[i], ref_stride); \
- } \
- }
-
-/* clang-format off */
-// 128x128
-highbd_sadMxN(128, 128)
-highbd_sadMxNx4D(128, 128)
-
-// 128x64
-highbd_sadMxN(128, 64)
-highbd_sadMxNx4D(128, 64)
-
-// 64x128
-highbd_sadMxN(64, 128)
-highbd_sadMxNx4D(64, 128)
-
-// 64x64
-highbd_sadMxN(64, 64)
-highbd_sadMxNx4D(64, 64)
-
-// 64x32
-highbd_sadMxN(64, 32)
-highbd_sadMxNx4D(64, 32)
-
-// 32x64
-highbd_sadMxN(32, 64)
-highbd_sadMxNx4D(32, 64)
-
-// 32x32
-highbd_sadMxN(32, 32)
-highbd_sadMxNx4D(32, 32)
-
-// 32x16
-highbd_sadMxN(32, 16)
-highbd_sadMxNx4D(32, 16)
-
-// 16x32
-highbd_sadMxN(16, 32)
-highbd_sadMxNx4D(16, 32)
-
-// 16x16
-highbd_sadMxN(16, 16)
-highbd_sadMxNx4D(16, 16)
-
-// 16x8
-highbd_sadMxN(16, 8)
-highbd_sadMxNx4D(16, 8)
-
-// 8x16
-highbd_sadMxN(8, 16)
-highbd_sadMxNx4D(8, 16)
-
-// 8x8
-highbd_sadMxN(8, 8)
-highbd_sadMxNx4D(8, 8)
-
-// 8x4
-highbd_sadMxN(8, 4)
-highbd_sadMxNx4D(8, 4)
-
-// 4x8
-highbd_sadMxN(4, 8)
-highbd_sadMxNx4D(4, 8)
-
-// 4x4
-highbd_sadMxN(4, 4)
-highbd_sadMxNx4D(4, 4)
-
-highbd_sadMxN(4, 16)
-highbd_sadMxNx4D(4, 16)
-highbd_sadMxN(16, 4)
-highbd_sadMxNx4D(16, 4)
-highbd_sadMxN(8, 32)
-highbd_sadMxNx4D(8, 32)
-highbd_sadMxN(32, 8)
-highbd_sadMxNx4D(32, 8)
-highbd_sadMxN(16, 64)
-highbd_sadMxNx4D(16, 64)
-highbd_sadMxN(64, 16)
-highbd_sadMxNx4D(64, 16)
- /* clang-format on */
diff --git a/third_party/aom/aom_dsp/sad_av1.c b/third_party/aom/aom_dsp/sad_av1.c
deleted file mode 100644
index c176001d6..000000000
--- a/third_party/aom/aom_dsp/sad_av1.c
+++ /dev/null
@@ -1,248 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <stdlib.h>
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom/aom_integer.h"
-#include "aom_ports/mem.h"
-#include "aom_dsp/blend.h"
-
-static INLINE unsigned int masked_sad(const uint8_t *src, int src_stride,
- const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride,
- const uint8_t *m, int m_stride, int width,
- int height) {
- int y, x;
- unsigned int sad = 0;
- for (y = 0; y < height; y++) {
- for (x = 0; x < width; x++) {
- const int16_t pred = AOM_BLEND_A64(m[x], a[x], b[x]);
- sad += abs(pred - src[x]);
- }
- src += src_stride;
- a += a_stride;
- b += b_stride;
- m += m_stride;
- }
- sad = (sad + 31) >> 6;
- return sad;
-}
-
-#define MASKSADMxN(m, n) \
- unsigned int aom_masked_sad##m##x##n##_c( \
- const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, \
- const uint8_t *second_pred, const uint8_t *msk, int msk_stride, \
- int invert_mask) { \
- if (!invert_mask) \
- return masked_sad(src, src_stride, ref, ref_stride, second_pred, m, msk, \
- msk_stride, m, n); \
- else \
- return masked_sad(src, src_stride, second_pred, m, ref, ref_stride, msk, \
- msk_stride, m, n); \
- }
-
-/* clang-format off */
-MASKSADMxN(128, 128)
-MASKSADMxN(128, 64)
-MASKSADMxN(64, 128)
-MASKSADMxN(64, 64)
-MASKSADMxN(64, 32)
-MASKSADMxN(32, 64)
-MASKSADMxN(32, 32)
-MASKSADMxN(32, 16)
-MASKSADMxN(16, 32)
-MASKSADMxN(16, 16)
-MASKSADMxN(16, 8)
-MASKSADMxN(8, 16)
-MASKSADMxN(8, 8)
-MASKSADMxN(8, 4)
-MASKSADMxN(4, 8)
-MASKSADMxN(4, 4)
-MASKSADMxN(4, 16)
-MASKSADMxN(16, 4)
-MASKSADMxN(8, 32)
-MASKSADMxN(32, 8)
-MASKSADMxN(16, 64)
-MASKSADMxN(64, 16)
-
- /* clang-format on */
-
- static INLINE
- unsigned int highbd_masked_sad(const uint8_t *src8, int src_stride,
- const uint8_t *a8, int a_stride,
- const uint8_t *b8, int b_stride,
- const uint8_t *m, int m_stride, int width,
- int height) {
- int y, x;
- unsigned int sad = 0;
- const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- const uint16_t *a = CONVERT_TO_SHORTPTR(a8);
- const uint16_t *b = CONVERT_TO_SHORTPTR(b8);
-
- for (y = 0; y < height; y++) {
- for (x = 0; x < width; x++) {
- const uint16_t pred = AOM_BLEND_A64(m[x], a[x], b[x]);
- sad += abs(pred - src[x]);
- }
-
- src += src_stride;
- a += a_stride;
- b += b_stride;
- m += m_stride;
- }
- sad = (sad + 31) >> 6;
-
- return sad;
-}
-
-#define HIGHBD_MASKSADMXN(m, n) \
- unsigned int aom_highbd_masked_sad##m##x##n##_c( \
- const uint8_t *src8, int src_stride, const uint8_t *ref8, \
- int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, \
- int msk_stride, int invert_mask) { \
- if (!invert_mask) \
- return highbd_masked_sad(src8, src_stride, ref8, ref_stride, \
- second_pred8, m, msk, msk_stride, m, n); \
- else \
- return highbd_masked_sad(src8, src_stride, second_pred8, m, ref8, \
- ref_stride, msk, msk_stride, m, n); \
- }
-
-HIGHBD_MASKSADMXN(128, 128)
-HIGHBD_MASKSADMXN(128, 64)
-HIGHBD_MASKSADMXN(64, 128)
-HIGHBD_MASKSADMXN(64, 64)
-HIGHBD_MASKSADMXN(64, 32)
-HIGHBD_MASKSADMXN(32, 64)
-HIGHBD_MASKSADMXN(32, 32)
-HIGHBD_MASKSADMXN(32, 16)
-HIGHBD_MASKSADMXN(16, 32)
-HIGHBD_MASKSADMXN(16, 16)
-HIGHBD_MASKSADMXN(16, 8)
-HIGHBD_MASKSADMXN(8, 16)
-HIGHBD_MASKSADMXN(8, 8)
-HIGHBD_MASKSADMXN(8, 4)
-HIGHBD_MASKSADMXN(4, 8)
-HIGHBD_MASKSADMXN(4, 4)
-HIGHBD_MASKSADMXN(4, 16)
-HIGHBD_MASKSADMXN(16, 4)
-HIGHBD_MASKSADMXN(8, 32)
-HIGHBD_MASKSADMXN(32, 8)
-HIGHBD_MASKSADMXN(16, 64)
-HIGHBD_MASKSADMXN(64, 16)
-
-// pre: predictor being evaluated
-// wsrc: target weighted prediction (has been *4096 to keep precision)
-// mask: 2d weights (scaled by 4096)
-static INLINE unsigned int obmc_sad(const uint8_t *pre, int pre_stride,
- const int32_t *wsrc, const int32_t *mask,
- int width, int height) {
- int y, x;
- unsigned int sad = 0;
-
- for (y = 0; y < height; y++) {
- for (x = 0; x < width; x++)
- sad += ROUND_POWER_OF_TWO(abs(wsrc[x] - pre[x] * mask[x]), 12);
-
- pre += pre_stride;
- wsrc += width;
- mask += width;
- }
-
- return sad;
-}
-
-#define OBMCSADMxN(m, n) \
- unsigned int aom_obmc_sad##m##x##n##_c(const uint8_t *ref, int ref_stride, \
- const int32_t *wsrc, \
- const int32_t *mask) { \
- return obmc_sad(ref, ref_stride, wsrc, mask, m, n); \
- }
-
-/* clang-format off */
-OBMCSADMxN(128, 128)
-OBMCSADMxN(128, 64)
-OBMCSADMxN(64, 128)
-OBMCSADMxN(64, 64)
-OBMCSADMxN(64, 32)
-OBMCSADMxN(32, 64)
-OBMCSADMxN(32, 32)
-OBMCSADMxN(32, 16)
-OBMCSADMxN(16, 32)
-OBMCSADMxN(16, 16)
-OBMCSADMxN(16, 8)
-OBMCSADMxN(8, 16)
-OBMCSADMxN(8, 8)
-OBMCSADMxN(8, 4)
-OBMCSADMxN(4, 8)
-OBMCSADMxN(4, 4)
-OBMCSADMxN(4, 16)
-OBMCSADMxN(16, 4)
-OBMCSADMxN(8, 32)
-OBMCSADMxN(32, 8)
-OBMCSADMxN(16, 64)
-OBMCSADMxN(64, 16)
- /* clang-format on */
-
- static INLINE
- unsigned int highbd_obmc_sad(const uint8_t *pre8, int pre_stride,
- const int32_t *wsrc, const int32_t *mask,
- int width, int height) {
- int y, x;
- unsigned int sad = 0;
- const uint16_t *pre = CONVERT_TO_SHORTPTR(pre8);
-
- for (y = 0; y < height; y++) {
- for (x = 0; x < width; x++)
- sad += ROUND_POWER_OF_TWO(abs(wsrc[x] - pre[x] * mask[x]), 12);
-
- pre += pre_stride;
- wsrc += width;
- mask += width;
- }
-
- return sad;
-}
-
-#define HIGHBD_OBMCSADMXN(m, n) \
- unsigned int aom_highbd_obmc_sad##m##x##n##_c( \
- const uint8_t *ref, int ref_stride, const int32_t *wsrc, \
- const int32_t *mask) { \
- return highbd_obmc_sad(ref, ref_stride, wsrc, mask, m, n); \
- }
-
-/* clang-format off */
-HIGHBD_OBMCSADMXN(128, 128)
-HIGHBD_OBMCSADMXN(128, 64)
-HIGHBD_OBMCSADMXN(64, 128)
-HIGHBD_OBMCSADMXN(64, 64)
-HIGHBD_OBMCSADMXN(64, 32)
-HIGHBD_OBMCSADMXN(32, 64)
-HIGHBD_OBMCSADMXN(32, 32)
-HIGHBD_OBMCSADMXN(32, 16)
-HIGHBD_OBMCSADMXN(16, 32)
-HIGHBD_OBMCSADMXN(16, 16)
-HIGHBD_OBMCSADMXN(16, 8)
-HIGHBD_OBMCSADMXN(8, 16)
-HIGHBD_OBMCSADMXN(8, 8)
-HIGHBD_OBMCSADMXN(8, 4)
-HIGHBD_OBMCSADMXN(4, 8)
-HIGHBD_OBMCSADMXN(4, 4)
-HIGHBD_OBMCSADMXN(4, 16)
-HIGHBD_OBMCSADMXN(16, 4)
-HIGHBD_OBMCSADMXN(8, 32)
-HIGHBD_OBMCSADMXN(32, 8)
-HIGHBD_OBMCSADMXN(16, 64)
-HIGHBD_OBMCSADMXN(64, 16)
-/* clang-format on */
diff --git a/third_party/aom/aom_dsp/simd/v128_intrinsics.h b/third_party/aom/aom_dsp/simd/v128_intrinsics.h
deleted file mode 100644
index 01dbb8fd2..000000000
--- a/third_party/aom/aom_dsp/simd/v128_intrinsics.h
+++ /dev/null
@@ -1,344 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_SIMD_V128_INTRINSICS_H_
-#define AOM_AOM_DSP_SIMD_V128_INTRINSICS_H_
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "aom_dsp/simd/v128_intrinsics_c.h"
-#include "aom_dsp/simd/v64_intrinsics.h"
-
-/* Fallback to plain, unoptimised C. */
-
-typedef c_v128 v128;
-
-SIMD_INLINE uint32_t v128_low_u32(v128 a) { return c_v128_low_u32(a); }
-SIMD_INLINE v64 v128_low_v64(v128 a) { return c_v128_low_v64(a); }
-SIMD_INLINE v64 v128_high_v64(v128 a) { return c_v128_high_v64(a); }
-SIMD_INLINE v128 v128_from_64(uint64_t hi, uint64_t lo) {
- return c_v128_from_64(hi, lo);
-}
-SIMD_INLINE v128 v128_from_v64(v64 hi, v64 lo) {
- return c_v128_from_v64(hi, lo);
-}
-SIMD_INLINE v128 v128_from_32(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
- return c_v128_from_32(a, b, c, d);
-}
-
-SIMD_INLINE v128 v128_load_unaligned(const void *p) {
- return c_v128_load_unaligned(p);
-}
-SIMD_INLINE v128 v128_load_aligned(const void *p) {
- return c_v128_load_aligned(p);
-}
-
-SIMD_INLINE void v128_store_unaligned(void *p, v128 a) {
- c_v128_store_unaligned(p, a);
-}
-SIMD_INLINE void v128_store_aligned(void *p, v128 a) {
- c_v128_store_aligned(p, a);
-}
-
-SIMD_INLINE v128 v128_align(v128 a, v128 b, unsigned int c) {
- return c_v128_align(a, b, c);
-}
-
-SIMD_INLINE v128 v128_zero() { return c_v128_zero(); }
-SIMD_INLINE v128 v128_dup_8(uint8_t x) { return c_v128_dup_8(x); }
-SIMD_INLINE v128 v128_dup_16(uint16_t x) { return c_v128_dup_16(x); }
-SIMD_INLINE v128 v128_dup_32(uint32_t x) { return c_v128_dup_32(x); }
-SIMD_INLINE v128 v128_dup_64(uint64_t x) { return c_v128_dup_64(x); }
-
-typedef uint32_t sad128_internal;
-SIMD_INLINE sad128_internal v128_sad_u8_init() { return c_v128_sad_u8_init(); }
-SIMD_INLINE sad128_internal v128_sad_u8(sad128_internal s, v128 a, v128 b) {
- return c_v128_sad_u8(s, a, b);
-}
-SIMD_INLINE uint32_t v128_sad_u8_sum(sad128_internal s) {
- return c_v128_sad_u8_sum(s);
-}
-typedef uint32_t ssd128_internal;
-SIMD_INLINE ssd128_internal v128_ssd_u8_init() { return c_v128_ssd_u8_init(); }
-SIMD_INLINE ssd128_internal v128_ssd_u8(ssd128_internal s, v128 a, v128 b) {
- return c_v128_ssd_u8(s, a, b);
-}
-SIMD_INLINE uint32_t v128_ssd_u8_sum(ssd128_internal s) {
- return c_v128_ssd_u8_sum(s);
-}
-SIMD_INLINE int64_t v128_dotp_su8(v128 a, v128 b) {
- return c_v128_dotp_su8(a, b);
-}
-SIMD_INLINE int64_t v128_dotp_s16(v128 a, v128 b) {
- return c_v128_dotp_s16(a, b);
-}
-SIMD_INLINE int64_t v128_dotp_s32(v128 a, v128 b) {
- return c_v128_dotp_s32(a, b);
-}
-SIMD_INLINE uint64_t v128_hadd_u8(v128 a) { return c_v128_hadd_u8(a); }
-
-SIMD_INLINE v128 v128_or(v128 a, v128 b) { return c_v128_or(a, b); }
-SIMD_INLINE v128 v128_xor(v128 a, v128 b) { return c_v128_xor(a, b); }
-SIMD_INLINE v128 v128_and(v128 a, v128 b) { return c_v128_and(a, b); }
-SIMD_INLINE v128 v128_andn(v128 a, v128 b) { return c_v128_andn(a, b); }
-
-SIMD_INLINE v128 v128_add_8(v128 a, v128 b) { return c_v128_add_8(a, b); }
-SIMD_INLINE v128 v128_add_16(v128 a, v128 b) { return c_v128_add_16(a, b); }
-SIMD_INLINE v128 v128_sadd_u8(v128 a, v128 b) { return c_v128_sadd_u8(a, b); }
-SIMD_INLINE v128 v128_sadd_s8(v128 a, v128 b) { return c_v128_sadd_s8(a, b); }
-SIMD_INLINE v128 v128_sadd_s16(v128 a, v128 b) { return c_v128_sadd_s16(a, b); }
-SIMD_INLINE v128 v128_add_32(v128 a, v128 b) { return c_v128_add_32(a, b); }
-SIMD_INLINE v128 v128_add_64(v128 a, v128 b) { return c_v128_add_64(a, b); }
-SIMD_INLINE v128 v128_padd_u8(v128 a) { return c_v128_padd_u8(a); }
-SIMD_INLINE v128 v128_padd_s16(v128 a) { return c_v128_padd_s16(a); }
-SIMD_INLINE v128 v128_sub_8(v128 a, v128 b) { return c_v128_sub_8(a, b); }
-SIMD_INLINE v128 v128_ssub_u8(v128 a, v128 b) { return c_v128_ssub_u8(a, b); }
-SIMD_INLINE v128 v128_ssub_s8(v128 a, v128 b) { return c_v128_ssub_s8(a, b); }
-SIMD_INLINE v128 v128_sub_16(v128 a, v128 b) { return c_v128_sub_16(a, b); }
-SIMD_INLINE v128 v128_ssub_s16(v128 a, v128 b) { return c_v128_ssub_s16(a, b); }
-SIMD_INLINE v128 v128_ssub_u16(v128 a, v128 b) { return c_v128_ssub_u16(a, b); }
-SIMD_INLINE v128 v128_sub_32(v128 a, v128 b) { return c_v128_sub_32(a, b); }
-SIMD_INLINE v128 v128_sub_64(v128 a, v128 b) { return c_v128_sub_64(a, b); }
-SIMD_INLINE v128 v128_abs_s16(v128 a) { return c_v128_abs_s16(a); }
-SIMD_INLINE v128 v128_abs_s8(v128 a) { return c_v128_abs_s8(a); }
-
-SIMD_INLINE v128 v128_mul_s16(v64 a, v64 b) { return c_v128_mul_s16(a, b); }
-SIMD_INLINE v128 v128_mullo_s16(v128 a, v128 b) {
- return c_v128_mullo_s16(a, b);
-}
-SIMD_INLINE v128 v128_mulhi_s16(v128 a, v128 b) {
- return c_v128_mulhi_s16(a, b);
-}
-SIMD_INLINE v128 v128_mullo_s32(v128 a, v128 b) {
- return c_v128_mullo_s32(a, b);
-}
-SIMD_INLINE v128 v128_madd_s16(v128 a, v128 b) { return c_v128_madd_s16(a, b); }
-SIMD_INLINE v128 v128_madd_us8(v128 a, v128 b) { return c_v128_madd_us8(a, b); }
-
-SIMD_INLINE uint32_t v128_movemask_8(v128 a) { return c_v128_movemask_8(a); }
-SIMD_INLINE v128 v128_blend_8(v128 a, v128 b, v128 c) {
- return c_v128_blend_8(a, b, c);
-}
-
-SIMD_INLINE v128 v128_avg_u8(v128 a, v128 b) { return c_v128_avg_u8(a, b); }
-SIMD_INLINE v128 v128_rdavg_u8(v128 a, v128 b) { return c_v128_rdavg_u8(a, b); }
-SIMD_INLINE v128 v128_rdavg_u16(v128 a, v128 b) {
- return c_v128_rdavg_u16(a, b);
-}
-SIMD_INLINE v128 v128_avg_u16(v128 a, v128 b) { return c_v128_avg_u16(a, b); }
-SIMD_INLINE v128 v128_min_u8(v128 a, v128 b) { return c_v128_min_u8(a, b); }
-SIMD_INLINE v128 v128_max_u8(v128 a, v128 b) { return c_v128_max_u8(a, b); }
-SIMD_INLINE v128 v128_min_s8(v128 a, v128 b) { return c_v128_min_s8(a, b); }
-SIMD_INLINE v128 v128_max_s8(v128 a, v128 b) { return c_v128_max_s8(a, b); }
-SIMD_INLINE v128 v128_min_s16(v128 a, v128 b) { return c_v128_min_s16(a, b); }
-SIMD_INLINE v128 v128_max_s16(v128 a, v128 b) { return c_v128_max_s16(a, b); }
-SIMD_INLINE v128 v128_min_s32(v128 a, v128 b) { return c_v128_min_s32(a, b); }
-SIMD_INLINE v128 v128_max_s32(v128 a, v128 b) { return c_v128_max_s32(a, b); }
-
-SIMD_INLINE v128 v128_ziplo_8(v128 a, v128 b) { return c_v128_ziplo_8(a, b); }
-SIMD_INLINE v128 v128_ziphi_8(v128 a, v128 b) { return c_v128_ziphi_8(a, b); }
-SIMD_INLINE v128 v128_ziplo_16(v128 a, v128 b) { return c_v128_ziplo_16(a, b); }
-SIMD_INLINE v128 v128_ziphi_16(v128 a, v128 b) { return c_v128_ziphi_16(a, b); }
-SIMD_INLINE v128 v128_ziplo_32(v128 a, v128 b) { return c_v128_ziplo_32(a, b); }
-SIMD_INLINE v128 v128_ziphi_32(v128 a, v128 b) { return c_v128_ziphi_32(a, b); }
-SIMD_INLINE v128 v128_ziplo_64(v128 a, v128 b) { return c_v128_ziplo_64(a, b); }
-SIMD_INLINE v128 v128_ziphi_64(v128 a, v128 b) { return c_v128_ziphi_64(a, b); }
-SIMD_INLINE v128 v128_zip_8(v64 a, v64 b) { return c_v128_zip_8(a, b); }
-SIMD_INLINE v128 v128_zip_16(v64 a, v64 b) { return c_v128_zip_16(a, b); }
-SIMD_INLINE v128 v128_zip_32(v64 a, v64 b) { return c_v128_zip_32(a, b); }
-SIMD_INLINE v128 v128_unziplo_8(v128 a, v128 b) {
- return c_v128_unziplo_8(a, b);
-}
-SIMD_INLINE v128 v128_unziphi_8(v128 a, v128 b) {
- return c_v128_unziphi_8(a, b);
-}
-SIMD_INLINE v128 v128_unziplo_16(v128 a, v128 b) {
- return c_v128_unziplo_16(a, b);
-}
-SIMD_INLINE v128 v128_unziphi_16(v128 a, v128 b) {
- return c_v128_unziphi_16(a, b);
-}
-SIMD_INLINE v128 v128_unziplo_32(v128 a, v128 b) {
- return c_v128_unziplo_32(a, b);
-}
-SIMD_INLINE v128 v128_unziphi_32(v128 a, v128 b) {
- return c_v128_unziphi_32(a, b);
-}
-SIMD_INLINE v128 v128_unpack_u8_s16(v64 a) { return c_v128_unpack_u8_s16(a); }
-SIMD_INLINE v128 v128_unpacklo_u8_s16(v128 a) {
- return c_v128_unpacklo_u8_s16(a);
-}
-SIMD_INLINE v128 v128_unpackhi_u8_s16(v128 a) {
- return c_v128_unpackhi_u8_s16(a);
-}
-SIMD_INLINE v128 v128_unpack_s8_s16(v64 a) { return c_v128_unpack_s8_s16(a); }
-SIMD_INLINE v128 v128_unpacklo_s8_s16(v128 a) {
- return c_v128_unpacklo_s8_s16(a);
-}
-SIMD_INLINE v128 v128_unpackhi_s8_s16(v128 a) {
- return c_v128_unpackhi_s8_s16(a);
-}
-SIMD_INLINE v128 v128_pack_s32_s16(v128 a, v128 b) {
- return c_v128_pack_s32_s16(a, b);
-}
-SIMD_INLINE v128 v128_pack_s32_u16(v128 a, v128 b) {
- return c_v128_pack_s32_u16(a, b);
-}
-SIMD_INLINE v128 v128_pack_s16_u8(v128 a, v128 b) {
- return c_v128_pack_s16_u8(a, b);
-}
-SIMD_INLINE v128 v128_pack_s16_s8(v128 a, v128 b) {
- return c_v128_pack_s16_s8(a, b);
-}
-SIMD_INLINE v128 v128_unpack_u16_s32(v64 a) { return c_v128_unpack_u16_s32(a); }
-SIMD_INLINE v128 v128_unpack_s16_s32(v64 a) { return c_v128_unpack_s16_s32(a); }
-SIMD_INLINE v128 v128_unpacklo_u16_s32(v128 a) {
- return c_v128_unpacklo_u16_s32(a);
-}
-SIMD_INLINE v128 v128_unpacklo_s16_s32(v128 a) {
- return c_v128_unpacklo_s16_s32(a);
-}
-SIMD_INLINE v128 v128_unpackhi_u16_s32(v128 a) {
- return c_v128_unpackhi_u16_s32(a);
-}
-SIMD_INLINE v128 v128_unpackhi_s16_s32(v128 a) {
- return c_v128_unpackhi_s16_s32(a);
-}
-SIMD_INLINE v128 v128_shuffle_8(v128 a, v128 pattern) {
- return c_v128_shuffle_8(a, pattern);
-}
-
-SIMD_INLINE v128 v128_cmpgt_s8(v128 a, v128 b) { return c_v128_cmpgt_s8(a, b); }
-SIMD_INLINE v128 v128_cmplt_s8(v128 a, v128 b) { return c_v128_cmplt_s8(a, b); }
-SIMD_INLINE v128 v128_cmpeq_8(v128 a, v128 b) { return c_v128_cmpeq_8(a, b); }
-SIMD_INLINE v128 v128_cmpgt_s16(v128 a, v128 b) {
- return c_v128_cmpgt_s16(a, b);
-}
-SIMD_INLINE v128 v128_cmplt_s16(v128 a, v128 b) {
- return c_v128_cmplt_s16(a, b);
-}
-SIMD_INLINE v128 v128_cmpeq_16(v128 a, v128 b) { return c_v128_cmpeq_16(a, b); }
-
-SIMD_INLINE v128 v128_cmpgt_s32(v128 a, v128 b) {
- return c_v128_cmpgt_s32(a, b);
-}
-SIMD_INLINE v128 v128_cmplt_s32(v128 a, v128 b) {
- return c_v128_cmplt_s32(a, b);
-}
-SIMD_INLINE v128 v128_cmpeq_32(v128 a, v128 b) { return c_v128_cmpeq_32(a, b); }
-
-SIMD_INLINE v128 v128_shl_8(v128 a, unsigned int c) {
- return c_v128_shl_8(a, c);
-}
-SIMD_INLINE v128 v128_shr_u8(v128 a, unsigned int c) {
- return c_v128_shr_u8(a, c);
-}
-SIMD_INLINE v128 v128_shr_s8(v128 a, unsigned int c) {
- return c_v128_shr_s8(a, c);
-}
-SIMD_INLINE v128 v128_shl_16(v128 a, unsigned int c) {
- return c_v128_shl_16(a, c);
-}
-SIMD_INLINE v128 v128_shr_u16(v128 a, unsigned int c) {
- return c_v128_shr_u16(a, c);
-}
-SIMD_INLINE v128 v128_shr_s16(v128 a, unsigned int c) {
- return c_v128_shr_s16(a, c);
-}
-SIMD_INLINE v128 v128_shl_32(v128 a, unsigned int c) {
- return c_v128_shl_32(a, c);
-}
-SIMD_INLINE v128 v128_shr_u32(v128 a, unsigned int c) {
- return c_v128_shr_u32(a, c);
-}
-SIMD_INLINE v128 v128_shr_s32(v128 a, unsigned int c) {
- return c_v128_shr_s32(a, c);
-}
-SIMD_INLINE v128 v128_shl_64(v128 a, unsigned int c) {
- return c_v128_shl_64(a, c);
-}
-SIMD_INLINE v128 v128_shr_u64(v128 a, unsigned int c) {
- return c_v128_shr_u64(a, c);
-}
-SIMD_INLINE v128 v128_shr_s64(v128 a, unsigned int c) {
- return c_v128_shr_s64(a, c);
-}
-
-SIMD_INLINE v128 v128_shr_n_byte(v128 a, unsigned int n) {
- return c_v128_shr_n_byte(a, n);
-}
-SIMD_INLINE v128 v128_shl_n_byte(v128 a, unsigned int n) {
- return c_v128_shl_n_byte(a, n);
-}
-SIMD_INLINE v128 v128_shl_n_8(v128 a, unsigned int n) {
- return c_v128_shl_n_8(a, n);
-}
-SIMD_INLINE v128 v128_shl_n_16(v128 a, unsigned int n) {
- return c_v128_shl_n_16(a, n);
-}
-SIMD_INLINE v128 v128_shl_n_32(v128 a, unsigned int n) {
- return c_v128_shl_n_32(a, n);
-}
-SIMD_INLINE v128 v128_shl_n_64(v128 a, unsigned int n) {
- return c_v128_shl_n_64(a, n);
-}
-SIMD_INLINE v128 v128_shr_n_u8(v128 a, unsigned int n) {
- return c_v128_shr_n_u8(a, n);
-}
-SIMD_INLINE v128 v128_shr_n_u16(v128 a, unsigned int n) {
- return c_v128_shr_n_u16(a, n);
-}
-SIMD_INLINE v128 v128_shr_n_u32(v128 a, unsigned int n) {
- return c_v128_shr_n_u32(a, n);
-}
-SIMD_INLINE v128 v128_shr_n_u64(v128 a, unsigned int n) {
- return c_v128_shr_n_u64(a, n);
-}
-SIMD_INLINE v128 v128_shr_n_s8(v128 a, unsigned int n) {
- return c_v128_shr_n_s8(a, n);
-}
-SIMD_INLINE v128 v128_shr_n_s16(v128 a, unsigned int n) {
- return c_v128_shr_n_s16(a, n);
-}
-SIMD_INLINE v128 v128_shr_n_s32(v128 a, unsigned int n) {
- return c_v128_shr_n_s32(a, n);
-}
-SIMD_INLINE v128 v128_shr_n_s64(v128 a, unsigned int n) {
- return c_v128_shr_n_s64(a, n);
-}
-
-typedef uint32_t sad128_internal_u16;
-SIMD_INLINE sad128_internal_u16 v128_sad_u16_init() {
- return c_v128_sad_u16_init();
-}
-SIMD_INLINE sad128_internal_u16 v128_sad_u16(sad128_internal_u16 s, v128 a,
- v128 b) {
- return c_v128_sad_u16(s, a, b);
-}
-SIMD_INLINE uint32_t v128_sad_u16_sum(sad128_internal_u16 s) {
- return c_v128_sad_u16_sum(s);
-}
-
-typedef uint64_t ssd128_internal_s16;
-SIMD_INLINE ssd128_internal_s16 v128_ssd_s16_init() {
- return c_v128_ssd_s16_init();
-}
-SIMD_INLINE ssd128_internal_s16 v128_ssd_s16(ssd128_internal_s16 s, v128 a,
- v128 b) {
- return c_v128_ssd_s16(s, a, b);
-}
-SIMD_INLINE uint64_t v128_ssd_s16_sum(ssd128_internal_s16 s) {
- return c_v128_ssd_s16_sum(s);
-}
-
-#endif // AOM_AOM_DSP_SIMD_V128_INTRINSICS_H_
diff --git a/third_party/aom/aom_dsp/simd/v128_intrinsics_arm.h b/third_party/aom/aom_dsp/simd/v128_intrinsics_arm.h
deleted file mode 100644
index 3c669d579..000000000
--- a/third_party/aom/aom_dsp/simd/v128_intrinsics_arm.h
+++ /dev/null
@@ -1,958 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_SIMD_V128_INTRINSICS_ARM_H_
-#define AOM_AOM_DSP_SIMD_V128_INTRINSICS_ARM_H_
-
-#include <arm_neon.h>
-
-#include "aom_dsp/simd/v64_intrinsics_arm.h"
-
-typedef int64x2_t v128;
-
-SIMD_INLINE uint32_t v128_low_u32(v128 a) {
- return v64_low_u32(vget_low_s64(a));
-}
-
-SIMD_INLINE v64 v128_low_v64(v128 a) { return vget_low_s64(a); }
-
-SIMD_INLINE v64 v128_high_v64(v128 a) { return vget_high_s64(a); }
-
-SIMD_INLINE v128 v128_from_v64(v64 a, v64 b) { return vcombine_s64(b, a); }
-
-SIMD_INLINE v128 v128_from_64(uint64_t a, uint64_t b) {
- return vcombine_s64((int64x1_t)b, (int64x1_t)a);
-}
-
-SIMD_INLINE v128 v128_from_32(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
- return vcombine_s64(v64_from_32(c, d), v64_from_32(a, b));
-}
-
-SIMD_INLINE v128 v128_load_aligned(const void *p) {
- return vreinterpretq_s64_u8(vld1q_u8((const uint8_t *)p));
-}
-
-SIMD_INLINE v128 v128_load_unaligned(const void *p) {
- return v128_load_aligned(p);
-}
-
-SIMD_INLINE void v128_store_aligned(void *p, v128 r) {
- vst1q_u8((uint8_t *)p, vreinterpretq_u8_s64(r));
-}
-
-SIMD_INLINE void v128_store_unaligned(void *p, v128 r) {
- vst1q_u8((uint8_t *)p, vreinterpretq_u8_s64(r));
-}
-
-SIMD_INLINE v128 v128_align(v128 a, v128 b, unsigned int c) {
-// The following functions require an immediate.
-// Some compilers will check this during optimisation, others wont.
-#if defined(__OPTIMIZE__) && __OPTIMIZE__ && !defined(__clang__)
- return c ? vreinterpretq_s64_s8(
- vextq_s8(vreinterpretq_s8_s64(b), vreinterpretq_s8_s64(a), c))
- : b;
-#else
- return c < 8 ? v128_from_v64(v64_align(v128_low_v64(a), v128_high_v64(b), c),
- v64_align(v128_high_v64(b), v128_low_v64(b), c))
- : v128_from_v64(
- v64_align(v128_high_v64(a), v128_low_v64(a), c - 8),
- v64_align(v128_low_v64(a), v128_high_v64(b), c - 8));
-#endif
-}
-
-SIMD_INLINE v128 v128_zero() { return vreinterpretq_s64_u8(vdupq_n_u8(0)); }
-
-SIMD_INLINE v128 v128_ones() { return vreinterpretq_s64_u8(vdupq_n_u8(-1)); }
-
-SIMD_INLINE v128 v128_dup_8(uint8_t x) {
- return vreinterpretq_s64_u8(vdupq_n_u8(x));
-}
-
-SIMD_INLINE v128 v128_dup_16(uint16_t x) {
- return vreinterpretq_s64_u16(vdupq_n_u16(x));
-}
-
-SIMD_INLINE v128 v128_dup_32(uint32_t x) {
- return vreinterpretq_s64_u32(vdupq_n_u32(x));
-}
-
-SIMD_INLINE v128 v128_dup_64(uint64_t x) {
- return vreinterpretq_s64_u64(vdupq_n_u64(x));
-}
-
-SIMD_INLINE int64_t v128_dotp_su8(v128 a, v128 b) {
- int16x8_t t1 = vmulq_s16(
- vmovl_s8(vreinterpret_s8_s64(vget_low_s64(a))),
- vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_s64(vget_low_s64(b)))));
- int16x8_t t2 = vmulq_s16(
- vmovl_s8(vreinterpret_s8_s64(vget_high_s64(a))),
- vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_s64(vget_high_s64(b)))));
-#if defined(__aarch64__)
- return vaddlvq_s16(t1) + vaddlvq_s16(t2);
-#else
- int64x2_t t = vpaddlq_s32(vaddq_s32(vpaddlq_s16(t1), vpaddlq_s16(t2)));
- return (int64_t)vget_high_s64(t) + (int64_t)vget_low_s64(t);
-#endif
-}
-
-SIMD_INLINE int64_t v128_dotp_s16(v128 a, v128 b) {
- return v64_dotp_s16(vget_high_s64(a), vget_high_s64(b)) +
- v64_dotp_s16(vget_low_s64(a), vget_low_s64(b));
-}
-
-SIMD_INLINE int64_t v128_dotp_s32(v128 a, v128 b) {
- int64x2_t t = vpaddlq_s32(
- vmulq_s32(vreinterpretq_s32_s64(a), vreinterpretq_s32_s64(b)));
- return (int64_t)vget_high_s64(t) + (int64_t)vget_low_s64(t);
-}
-
-SIMD_INLINE uint64_t v128_hadd_u8(v128 x) {
-#if defined(__aarch64__)
- return vaddlvq_u8(vreinterpretq_u8_s64(x));
-#else
- uint64x2_t t = vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(vreinterpretq_u8_s64(x))));
- return vget_lane_s32(
- vreinterpret_s32_u64(vadd_u64(vget_high_u64(t), vget_low_u64(t))), 0);
-#endif
-}
-
-SIMD_INLINE v128 v128_padd_s16(v128 a) {
- return vreinterpretq_s64_s32(vpaddlq_s16(vreinterpretq_s16_s64(a)));
-}
-
-SIMD_INLINE v128 v128_padd_u8(v128 a) {
- return vreinterpretq_s64_u16(vpaddlq_u8(vreinterpretq_u8_s64(a)));
-}
-
-typedef struct {
- sad64_internal hi, lo;
-} sad128_internal;
-
-SIMD_INLINE sad128_internal v128_sad_u8_init() {
- sad128_internal s;
- s.hi = s.lo = vdupq_n_u16(0);
- return s;
-}
-
-/* Implementation dependent return value. Result must be finalised with
- v128_sad_u8_sum().
- The result for more than 32 v128_sad_u8() calls is undefined. */
-SIMD_INLINE sad128_internal v128_sad_u8(sad128_internal s, v128 a, v128 b) {
- sad128_internal r;
- r.hi = v64_sad_u8(s.hi, vget_high_s64(a), vget_high_s64(b));
- r.lo = v64_sad_u8(s.lo, vget_low_s64(a), vget_low_s64(b));
- return r;
-}
-
-SIMD_INLINE uint32_t v128_sad_u8_sum(sad128_internal s) {
-#if defined(__aarch64__)
- return vaddlvq_u16(s.hi) + vaddlvq_u16(s.lo);
-#else
- uint64x2_t t = vpaddlq_u32(vpaddlq_u16(vaddq_u16(s.hi, s.lo)));
- return (uint32_t)(uint64_t)(vget_high_u64(t) + vget_low_u64(t));
-#endif
-}
-
-typedef struct {
- ssd64_internal hi, lo;
-} ssd128_internal;
-
-SIMD_INLINE ssd128_internal v128_ssd_u8_init() {
- ssd128_internal s;
- s.hi = s.lo = v64_ssd_u8_init();
- return s;
-}
-
-/* Implementation dependent return value. Result must be finalised with
- * v128_ssd_u8_sum(). */
-SIMD_INLINE ssd128_internal v128_ssd_u8(ssd128_internal s, v128 a, v128 b) {
- ssd128_internal r;
- r.hi = v64_ssd_u8(s.hi, vget_high_s64(a), vget_high_s64(b));
- r.lo = v64_ssd_u8(s.lo, vget_low_s64(a), vget_low_s64(b));
- return r;
-}
-
-SIMD_INLINE uint32_t v128_ssd_u8_sum(ssd128_internal s) {
- return (uint32_t)(v64_ssd_u8_sum(s.hi) + v64_ssd_u8_sum(s.lo));
-}
-
-SIMD_INLINE v128 v128_or(v128 x, v128 y) { return vorrq_s64(x, y); }
-
-SIMD_INLINE v128 v128_xor(v128 x, v128 y) { return veorq_s64(x, y); }
-
-SIMD_INLINE v128 v128_and(v128 x, v128 y) { return vandq_s64(x, y); }
-
-SIMD_INLINE v128 v128_andn(v128 x, v128 y) { return vbicq_s64(x, y); }
-
-SIMD_INLINE v128 v128_add_8(v128 x, v128 y) {
- return vreinterpretq_s64_u8(
- vaddq_u8(vreinterpretq_u8_s64(x), vreinterpretq_u8_s64(y)));
-}
-
-SIMD_INLINE v128 v128_sadd_u8(v128 x, v128 y) {
- return vreinterpretq_s64_u8(
- vqaddq_u8(vreinterpretq_u8_s64(x), vreinterpretq_u8_s64(y)));
-}
-
-SIMD_INLINE v128 v128_sadd_s8(v128 x, v128 y) {
- return vreinterpretq_s64_s8(
- vqaddq_s8(vreinterpretq_s8_s64(x), vreinterpretq_s8_s64(y)));
-}
-
-SIMD_INLINE v128 v128_add_16(v128 x, v128 y) {
- return vreinterpretq_s64_s16(
- vaddq_s16(vreinterpretq_s16_s64(x), vreinterpretq_s16_s64(y)));
-}
-
-SIMD_INLINE v128 v128_sadd_s16(v128 x, v128 y) {
- return vreinterpretq_s64_s16(
- vqaddq_s16(vreinterpretq_s16_s64(x), vreinterpretq_s16_s64(y)));
-}
-
-SIMD_INLINE v128 v128_add_32(v128 x, v128 y) {
- return vreinterpretq_s64_u32(
- vaddq_u32(vreinterpretq_u32_s64(x), vreinterpretq_u32_s64(y)));
-}
-
-SIMD_INLINE v128 v128_add_64(v128 x, v128 y) {
- return vreinterpretq_s64_u64(
- vaddq_u64(vreinterpretq_u64_s64(x), vreinterpretq_u64_s64(y)));
-}
-
-SIMD_INLINE v128 v128_sub_8(v128 x, v128 y) {
- return vreinterpretq_s64_u8(
- vsubq_u8(vreinterpretq_u8_s64(x), vreinterpretq_u8_s64(y)));
-}
-
-SIMD_INLINE v128 v128_sub_16(v128 x, v128 y) {
- return vreinterpretq_s64_s16(
- vsubq_s16(vreinterpretq_s16_s64(x), vreinterpretq_s16_s64(y)));
-}
-
-SIMD_INLINE v128 v128_ssub_s16(v128 x, v128 y) {
- return vreinterpretq_s64_s16(
- vqsubq_s16(vreinterpretq_s16_s64(x), vreinterpretq_s16_s64(y)));
-}
-
-SIMD_INLINE v128 v128_ssub_u16(v128 x, v128 y) {
- return vreinterpretq_s64_u16(
- vqsubq_u16(vreinterpretq_u16_s64(x), vreinterpretq_u16_s64(y)));
-}
-
-SIMD_INLINE v128 v128_ssub_u8(v128 x, v128 y) {
- return vreinterpretq_s64_u8(
- vqsubq_u8(vreinterpretq_u8_s64(x), vreinterpretq_u8_s64(y)));
-}
-
-SIMD_INLINE v128 v128_ssub_s8(v128 x, v128 y) {
- return vreinterpretq_s64_s8(
- vqsubq_s8(vreinterpretq_s8_s64(x), vreinterpretq_s8_s64(y)));
-}
-
-SIMD_INLINE v128 v128_sub_32(v128 x, v128 y) {
- return vreinterpretq_s64_s32(
- vsubq_s32(vreinterpretq_s32_s64(x), vreinterpretq_s32_s64(y)));
-}
-
-SIMD_INLINE v128 v128_sub_64(v128 x, v128 y) { return vsubq_s64(x, y); }
-
-SIMD_INLINE v128 v128_abs_s16(v128 x) {
- return vreinterpretq_s64_s16(vabsq_s16(vreinterpretq_s16_s64(x)));
-}
-
-SIMD_INLINE v128 v128_abs_s8(v128 x) {
- return vreinterpretq_s64_s8(vabsq_s8(vreinterpretq_s8_s64(x)));
-}
-
-SIMD_INLINE v128 v128_mul_s16(v64 a, v64 b) {
- return vreinterpretq_s64_s32(
- vmull_s16(vreinterpret_s16_s64(a), vreinterpret_s16_s64(b)));
-}
-
-SIMD_INLINE v128 v128_mullo_s16(v128 a, v128 b) {
- return vreinterpretq_s64_s16(
- vmulq_s16(vreinterpretq_s16_s64(a), vreinterpretq_s16_s64(b)));
-}
-
-SIMD_INLINE v128 v128_mulhi_s16(v128 a, v128 b) {
-#if defined(__aarch64__)
- return vreinterpretq_s64_s16(vuzp2q_s16(
- vreinterpretq_s16_s32(vmull_s16(vreinterpret_s16_s64(vget_low_s64(a)),
- vreinterpret_s16_s64(vget_low_s64(b)))),
- vreinterpretq_s16_s32(
- vmull_high_s16(vreinterpretq_s16_s64(a), vreinterpretq_s16_s64(b)))));
-#else
- return v128_from_v64(v64_mulhi_s16(vget_high_s64(a), vget_high_s64(b)),
- v64_mulhi_s16(vget_low_s64(a), vget_low_s64(b)));
-#endif
-}
-
-SIMD_INLINE v128 v128_mullo_s32(v128 a, v128 b) {
- return vreinterpretq_s64_s32(
- vmulq_s32(vreinterpretq_s32_s64(a), vreinterpretq_s32_s64(b)));
-}
-
-SIMD_INLINE v128 v128_madd_s16(v128 a, v128 b) {
-#if defined(__aarch64__)
- int32x4_t t1 = vmull_s16(vreinterpret_s16_s64(vget_low_s64(a)),
- vreinterpret_s16_s64(vget_low_s64(b)));
- int32x4_t t2 =
- vmull_high_s16(vreinterpretq_s16_s64(a), vreinterpretq_s16_s64(b));
- return vreinterpretq_s64_s32(vpaddq_s32(t1, t2));
-#else
- return v128_from_v64(v64_madd_s16(vget_high_s64(a), vget_high_s64(b)),
- v64_madd_s16(vget_low_s64(a), vget_low_s64(b)));
-#endif
-}
-
-SIMD_INLINE v128 v128_madd_us8(v128 a, v128 b) {
-#if defined(__aarch64__)
- int16x8_t t1 = vmulq_s16(
- vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_s64(vget_low_s64(a)))),
- vmovl_s8(vreinterpret_s8_s64(vget_low_s64(b))));
- int16x8_t t2 = vmulq_s16(
- vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_s64(vget_high_s64(a)))),
- vmovl_s8(vreinterpret_s8_s64(vget_high_s64(b))));
- return vreinterpretq_s64_s16(
- vqaddq_s16(vuzp1q_s16(t1, t2), vuzp2q_s16(t1, t2)));
-#else
- return v128_from_v64(v64_madd_us8(vget_high_s64(a), vget_high_s64(b)),
- v64_madd_us8(vget_low_s64(a), vget_low_s64(b)));
-#endif
-}
-
-SIMD_INLINE v128 v128_avg_u8(v128 x, v128 y) {
- return vreinterpretq_s64_u8(
- vrhaddq_u8(vreinterpretq_u8_s64(x), vreinterpretq_u8_s64(y)));
-}
-
-SIMD_INLINE v128 v128_rdavg_u8(v128 x, v128 y) {
- return vreinterpretq_s64_u8(
- vhaddq_u8(vreinterpretq_u8_s64(x), vreinterpretq_u8_s64(y)));
-}
-
-SIMD_INLINE v128 v128_rdavg_u16(v128 x, v128 y) {
- return vreinterpretq_s64_u16(
- vhaddq_u16(vreinterpretq_u16_s64(x), vreinterpretq_u16_s64(y)));
-}
-
-SIMD_INLINE v128 v128_avg_u16(v128 x, v128 y) {
- return vreinterpretq_s64_u16(
- vrhaddq_u16(vreinterpretq_u16_s64(x), vreinterpretq_u16_s64(y)));
-}
-
-SIMD_INLINE v128 v128_min_u8(v128 x, v128 y) {
- return vreinterpretq_s64_u8(
- vminq_u8(vreinterpretq_u8_s64(x), vreinterpretq_u8_s64(y)));
-}
-
-SIMD_INLINE v128 v128_max_u8(v128 x, v128 y) {
- return vreinterpretq_s64_u8(
- vmaxq_u8(vreinterpretq_u8_s64(x), vreinterpretq_u8_s64(y)));
-}
-
-SIMD_INLINE v128 v128_min_s8(v128 x, v128 y) {
- return vreinterpretq_s64_s8(
- vminq_s8(vreinterpretq_s8_s64(x), vreinterpretq_s8_s64(y)));
-}
-
-SIMD_INLINE uint32_t v128_movemask_8(v128 a) {
- a = vreinterpretq_s64_u8(vcltq_s8(vreinterpretq_s8_s64(a), vdupq_n_s8(0)));
-#if defined(__aarch64__)
- uint8x16_t m =
- vandq_u8(vreinterpretq_u8_s64(a),
- vreinterpretq_u8_u64(vdupq_n_u64(0x8040201008040201ULL)));
- return vaddv_u8(vget_low_u8(m)) + (vaddv_u8(vget_high_u8(m)) << 8);
-#else
- uint64x2_t m = vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(
- vandq_u8(vreinterpretq_u8_s64(a),
- vreinterpretq_u8_u64(vdupq_n_u64(0x8040201008040201ULL))))));
- return v64_low_u32(
- v64_ziplo_8(v128_high_v64((v128)m), v128_low_v64((v128)m)));
-#endif
-}
-
-SIMD_INLINE v128 v128_blend_8(v128 a, v128 b, v128 c) {
- c = vreinterpretq_s64_u8(vcltq_s8(vreinterpretq_s8_s64(c), vdupq_n_s8(0)));
- return v128_or(v128_and(b, c), v128_andn(a, c));
-}
-
-SIMD_INLINE v128 v128_max_s8(v128 x, v128 y) {
- return vreinterpretq_s64_s8(
- vmaxq_s8(vreinterpretq_s8_s64(x), vreinterpretq_s8_s64(y)));
-}
-
-SIMD_INLINE v128 v128_min_s16(v128 x, v128 y) {
- return vreinterpretq_s64_s16(
- vminq_s16(vreinterpretq_s16_s64(x), vreinterpretq_s16_s64(y)));
-}
-
-SIMD_INLINE v128 v128_max_s16(v128 x, v128 y) {
- return vreinterpretq_s64_s16(
- vmaxq_s16(vreinterpretq_s16_s64(x), vreinterpretq_s16_s64(y)));
-}
-
-SIMD_INLINE v128 v128_min_s32(v128 x, v128 y) {
- return vreinterpretq_s64_s32(
- vminq_s32(vreinterpretq_s32_s64(x), vreinterpretq_s32_s64(y)));
-}
-
-SIMD_INLINE v128 v128_max_s32(v128 x, v128 y) {
- return vreinterpretq_s64_s32(
- vmaxq_s32(vreinterpretq_s32_s64(x), vreinterpretq_s32_s64(y)));
-}
-
-SIMD_INLINE v128 v128_ziplo_8(v128 x, v128 y) {
-#if defined(__aarch64__)
- return vreinterpretq_s64_u8(
- vzip1q_u8(vreinterpretq_u8_s64(y), vreinterpretq_u8_s64(x)));
-#else
- uint8x16x2_t r = vzipq_u8(vreinterpretq_u8_s64(y), vreinterpretq_u8_s64(x));
- return vreinterpretq_s64_u8(r.val[0]);
-#endif
-}
-
-SIMD_INLINE v128 v128_ziphi_8(v128 x, v128 y) {
-#if defined(__aarch64__)
- return vreinterpretq_s64_u8(
- vzip2q_u8(vreinterpretq_u8_s64(y), vreinterpretq_u8_s64(x)));
-#else
- uint8x16x2_t r = vzipq_u8(vreinterpretq_u8_s64(y), vreinterpretq_u8_s64(x));
- return vreinterpretq_s64_u8(r.val[1]);
-#endif
-}
-
-SIMD_INLINE v128 v128_zip_8(v64 x, v64 y) {
- uint8x8x2_t r = vzip_u8(vreinterpret_u8_s64(y), vreinterpret_u8_s64(x));
- return vreinterpretq_s64_u8(vcombine_u8(r.val[0], r.val[1]));
-}
-
-SIMD_INLINE v128 v128_ziplo_16(v128 x, v128 y) {
-#if defined(__aarch64__)
- return vreinterpretq_s64_u16(
- vzip1q_u16(vreinterpretq_u16_s64(y), vreinterpretq_u16_s64(x)));
-#else
- int16x8x2_t r = vzipq_s16(vreinterpretq_s16_s64(y), vreinterpretq_s16_s64(x));
- return vreinterpretq_s64_s16(r.val[0]);
-#endif
-}
-
-SIMD_INLINE v128 v128_ziphi_16(v128 x, v128 y) {
-#if defined(__aarch64__)
- return vreinterpretq_s64_u16(
- vzip2q_u16(vreinterpretq_u16_s64(y), vreinterpretq_u16_s64(x)));
-#else
- int16x8x2_t r = vzipq_s16(vreinterpretq_s16_s64(y), vreinterpretq_s16_s64(x));
- return vreinterpretq_s64_s16(r.val[1]);
-#endif
-}
-
-SIMD_INLINE v128 v128_zip_16(v64 x, v64 y) {
- uint16x4x2_t r = vzip_u16(vreinterpret_u16_s64(y), vreinterpret_u16_s64(x));
- return vreinterpretq_s64_u16(vcombine_u16(r.val[0], r.val[1]));
-}
-
-SIMD_INLINE v128 v128_ziplo_32(v128 x, v128 y) {
-#if defined(__aarch64__)
- return vreinterpretq_s64_u32(
- vzip1q_u32(vreinterpretq_u32_s64(y), vreinterpretq_u32_s64(x)));
-#else
- int32x4x2_t r = vzipq_s32(vreinterpretq_s32_s64(y), vreinterpretq_s32_s64(x));
- return vreinterpretq_s64_s32(r.val[0]);
-#endif
-}
-
-SIMD_INLINE v128 v128_ziphi_32(v128 x, v128 y) {
-#if defined(__aarch64__)
- return vreinterpretq_s64_u32(
- vzip2q_u32(vreinterpretq_u32_s64(y), vreinterpretq_u32_s64(x)));
-#else
- int32x4x2_t r = vzipq_s32(vreinterpretq_s32_s64(y), vreinterpretq_s32_s64(x));
- return vreinterpretq_s64_s32(r.val[1]);
-#endif
-}
-
-SIMD_INLINE v128 v128_zip_32(v64 x, v64 y) {
- uint32x2x2_t r = vzip_u32(vreinterpret_u32_s64(y), vreinterpret_u32_s64(x));
- return vreinterpretq_s64_u32(vcombine_u32(r.val[0], r.val[1]));
-}
-
-SIMD_INLINE v128 v128_ziplo_64(v128 a, v128 b) {
- return v128_from_v64(vget_low_s64((int64x2_t)a), vget_low_s64((int64x2_t)b));
-}
-
-SIMD_INLINE v128 v128_ziphi_64(v128 a, v128 b) {
- return v128_from_v64(vget_high_s64((int64x2_t)a),
- vget_high_s64((int64x2_t)b));
-}
-
-SIMD_INLINE v128 v128_unziplo_8(v128 x, v128 y) {
-#if defined(__aarch64__)
- return vreinterpretq_s64_u8(
- vuzp1q_u8(vreinterpretq_u8_s64(y), vreinterpretq_u8_s64(x)));
-#else
- uint8x16x2_t r = vuzpq_u8(vreinterpretq_u8_s64(y), vreinterpretq_u8_s64(x));
- return vreinterpretq_s64_u8(r.val[0]);
-#endif
-}
-
-SIMD_INLINE v128 v128_unziphi_8(v128 x, v128 y) {
-#if defined(__aarch64__)
- return vreinterpretq_s64_u8(
- vuzp2q_u8(vreinterpretq_u8_s64(y), vreinterpretq_u8_s64(x)));
-#else
- uint8x16x2_t r = vuzpq_u8(vreinterpretq_u8_s64(y), vreinterpretq_u8_s64(x));
- return vreinterpretq_s64_u8(r.val[1]);
-#endif
-}
-
-SIMD_INLINE v128 v128_unziplo_16(v128 x, v128 y) {
-#if defined(__aarch64__)
- return vreinterpretq_s64_u16(
- vuzp1q_u16(vreinterpretq_u16_s64(y), vreinterpretq_u16_s64(x)));
-#else
- uint16x8x2_t r =
- vuzpq_u16(vreinterpretq_u16_s64(y), vreinterpretq_u16_s64(x));
- return vreinterpretq_s64_u16(r.val[0]);
-#endif
-}
-
-SIMD_INLINE v128 v128_unziphi_16(v128 x, v128 y) {
-#if defined(__aarch64__)
- return vreinterpretq_s64_u16(
- vuzp2q_u16(vreinterpretq_u16_s64(y), vreinterpretq_u16_s64(x)));
-#else
- uint16x8x2_t r =
- vuzpq_u16(vreinterpretq_u16_s64(y), vreinterpretq_u16_s64(x));
- return vreinterpretq_s64_u16(r.val[1]);
-#endif
-}
-
-SIMD_INLINE v128 v128_unziplo_32(v128 x, v128 y) {
-#if defined(__aarch64__)
- return vreinterpretq_s64_u32(
- vuzp1q_u32(vreinterpretq_u32_s64(y), vreinterpretq_u32_s64(x)));
-#else
- uint32x4x2_t r =
- vuzpq_u32(vreinterpretq_u32_s64(y), vreinterpretq_u32_s64(x));
- return vreinterpretq_s64_u32(r.val[0]);
-#endif
-}
-
-SIMD_INLINE v128 v128_unziphi_32(v128 x, v128 y) {
-#if defined(__aarch64__)
- return vreinterpretq_s64_u32(
- vuzp2q_u32(vreinterpretq_u32_s64(y), vreinterpretq_u32_s64(x)));
-#else
- uint32x4x2_t r =
- vuzpq_u32(vreinterpretq_u32_s64(y), vreinterpretq_u32_s64(x));
- return vreinterpretq_s64_u32(r.val[1]);
-#endif
-}
-
-SIMD_INLINE v128 v128_unpack_u8_s16(v64 a) {
- return vreinterpretq_s64_u16(vmovl_u8(vreinterpret_u8_s64(a)));
-}
-
-SIMD_INLINE v128 v128_unpacklo_u8_s16(v128 a) {
- return vreinterpretq_s64_u16(vmovl_u8(vreinterpret_u8_s64(vget_low_s64(a))));
-}
-
-SIMD_INLINE v128 v128_unpackhi_u8_s16(v128 a) {
- return vreinterpretq_s64_u16(vmovl_u8(vreinterpret_u8_s64(vget_high_s64(a))));
-}
-
-SIMD_INLINE v128 v128_unpack_s8_s16(v64 a) {
- return vreinterpretq_s64_s16(vmovl_s8(vreinterpret_s8_s64(a)));
-}
-
-SIMD_INLINE v128 v128_unpacklo_s8_s16(v128 a) {
- return vreinterpretq_s64_s16(vmovl_s8(vreinterpret_s8_s64(vget_low_s64(a))));
-}
-
-SIMD_INLINE v128 v128_unpackhi_s8_s16(v128 a) {
- return vreinterpretq_s64_s16(vmovl_s8(vreinterpret_s8_s64(vget_high_s64(a))));
-}
-
-SIMD_INLINE v128 v128_pack_s32_s16(v128 a, v128 b) {
- return v128_from_v64(
- vreinterpret_s64_s16(vqmovn_s32(vreinterpretq_s32_s64(a))),
- vreinterpret_s64_s16(vqmovn_s32(vreinterpretq_s32_s64(b))));
-}
-
-SIMD_INLINE v128 v128_pack_s32_u16(v128 a, v128 b) {
- return v128_from_v64(
- vreinterpret_s64_u16(vqmovun_s32(vreinterpretq_s32_s64(a))),
- vreinterpret_s64_u16(vqmovun_s32(vreinterpretq_s32_s64(b))));
-}
-
-SIMD_INLINE v128 v128_pack_s16_u8(v128 a, v128 b) {
- return v128_from_v64(
- vreinterpret_s64_u8(vqmovun_s16(vreinterpretq_s16_s64(a))),
- vreinterpret_s64_u8(vqmovun_s16(vreinterpretq_s16_s64(b))));
-}
-
-SIMD_INLINE v128 v128_pack_s16_s8(v128 a, v128 b) {
- return v128_from_v64(
- vreinterpret_s64_s8(vqmovn_s16(vreinterpretq_s16_s64(a))),
- vreinterpret_s64_s8(vqmovn_s16(vreinterpretq_s16_s64(b))));
-}
-
-SIMD_INLINE v128 v128_unpack_u16_s32(v64 a) {
- return vreinterpretq_s64_u32(vmovl_u16(vreinterpret_u16_s64(a)));
-}
-
-SIMD_INLINE v128 v128_unpack_s16_s32(v64 a) {
- return vreinterpretq_s64_s32(vmovl_s16(vreinterpret_s16_s64(a)));
-}
-
-SIMD_INLINE v128 v128_unpacklo_u16_s32(v128 a) {
- return vreinterpretq_s64_u32(
- vmovl_u16(vreinterpret_u16_s64(vget_low_s64(a))));
-}
-
-SIMD_INLINE v128 v128_unpacklo_s16_s32(v128 a) {
- return vreinterpretq_s64_s32(
- vmovl_s16(vreinterpret_s16_s64(vget_low_s64(a))));
-}
-
-SIMD_INLINE v128 v128_unpackhi_u16_s32(v128 a) {
- return vreinterpretq_s64_u32(
- vmovl_u16(vreinterpret_u16_s64(vget_high_s64(a))));
-}
-
-SIMD_INLINE v128 v128_unpackhi_s16_s32(v128 a) {
- return vreinterpretq_s64_s32(
- vmovl_s16(vreinterpret_s16_s64(vget_high_s64(a))));
-}
-
-SIMD_INLINE v128 v128_shuffle_8(v128 x, v128 pattern) {
-#if defined(__aarch64__)
- return vreinterpretq_s64_u8(
- vqtbl1q_u8(vreinterpretq_u8_s64(x), vreinterpretq_u8_s64(pattern)));
-#else
- uint8x8x2_t p = { { vget_low_u8(vreinterpretq_u8_s64(x)),
- vget_high_u8(vreinterpretq_u8_s64(x)) } };
- return v128_from_64((uint64_t)vreinterpret_s64_u8(vtbl2_u8(
- p, vreinterpret_u8_s64(vget_high_s64(pattern)))),
- (uint64_t)vreinterpret_s64_u8(vtbl2_u8(
- p, vreinterpret_u8_s64(vget_low_s64(pattern)))));
-#endif
-}
-
-SIMD_INLINE v128 v128_cmpgt_s8(v128 x, v128 y) {
- return vreinterpretq_s64_u8(
- vcgtq_s8(vreinterpretq_s8_s64(x), vreinterpretq_s8_s64(y)));
-}
-
-SIMD_INLINE v128 v128_cmplt_s8(v128 x, v128 y) {
- return vreinterpretq_s64_u8(
- vcltq_s8(vreinterpretq_s8_s64(x), vreinterpretq_s8_s64(y)));
-}
-
-SIMD_INLINE v128 v128_cmpeq_8(v128 x, v128 y) {
- return vreinterpretq_s64_u8(
- vceqq_u8(vreinterpretq_u8_s64(x), vreinterpretq_u8_s64(y)));
-}
-
-SIMD_INLINE v128 v128_cmpgt_s16(v128 x, v128 y) {
- return vreinterpretq_s64_u16(
- vcgtq_s16(vreinterpretq_s16_s64(x), vreinterpretq_s16_s64(y)));
-}
-
-SIMD_INLINE v128 v128_cmplt_s16(v128 x, v128 y) {
- return vreinterpretq_s64_u16(
- vcltq_s16(vreinterpretq_s16_s64(x), vreinterpretq_s16_s64(y)));
-}
-
-SIMD_INLINE v128 v128_cmpeq_16(v128 x, v128 y) {
- return vreinterpretq_s64_u16(
- vceqq_s16(vreinterpretq_s16_s64(x), vreinterpretq_s16_s64(y)));
-}
-
-SIMD_INLINE v128 v128_cmpgt_s32(v128 x, v128 y) {
- return vreinterpretq_s64_u32(
- vcgtq_s32(vreinterpretq_s32_s64(x), vreinterpretq_s32_s64(y)));
-}
-
-SIMD_INLINE v128 v128_cmplt_s32(v128 x, v128 y) {
- return vreinterpretq_s64_u32(
- vcltq_s32(vreinterpretq_s32_s64(x), vreinterpretq_s32_s64(y)));
-}
-
-SIMD_INLINE v128 v128_cmpeq_32(v128 x, v128 y) {
- return vreinterpretq_s64_u32(
- vceqq_s32(vreinterpretq_s32_s64(x), vreinterpretq_s32_s64(y)));
-}
-
-SIMD_INLINE v128 v128_shl_8(v128 a, unsigned int c) {
- return (c > 7) ? v128_zero()
- : vreinterpretq_s64_u8(
- vshlq_u8(vreinterpretq_u8_s64(a), vdupq_n_s8(c)));
-}
-
-SIMD_INLINE v128 v128_shr_u8(v128 a, unsigned int c) {
- return (c > 7) ? v128_zero()
- : vreinterpretq_s64_u8(
- vshlq_u8(vreinterpretq_u8_s64(a), vdupq_n_s8(-c)));
-}
-
-SIMD_INLINE v128 v128_shr_s8(v128 a, unsigned int c) {
- return (c > 7) ? v128_ones()
- : vreinterpretq_s64_s8(
- vshlq_s8(vreinterpretq_s8_s64(a), vdupq_n_s8(-c)));
-}
-
-SIMD_INLINE v128 v128_shl_16(v128 a, unsigned int c) {
- return (c > 15) ? v128_zero()
- : vreinterpretq_s64_u16(
- vshlq_u16(vreinterpretq_u16_s64(a), vdupq_n_s16(c)));
-}
-
-SIMD_INLINE v128 v128_shr_u16(v128 a, unsigned int c) {
- return (c > 15) ? v128_zero()
- : vreinterpretq_s64_u16(
- vshlq_u16(vreinterpretq_u16_s64(a), vdupq_n_s16(-c)));
-}
-
-SIMD_INLINE v128 v128_shr_s16(v128 a, unsigned int c) {
- return (c > 15) ? v128_ones()
- : vreinterpretq_s64_s16(
- vshlq_s16(vreinterpretq_s16_s64(a), vdupq_n_s16(-c)));
-}
-
-SIMD_INLINE v128 v128_shl_32(v128 a, unsigned int c) {
- return (c > 31) ? v128_zero()
- : vreinterpretq_s64_u32(
- vshlq_u32(vreinterpretq_u32_s64(a), vdupq_n_s32(c)));
-}
-
-SIMD_INLINE v128 v128_shr_u32(v128 a, unsigned int c) {
- return (c > 31) ? v128_zero()
- : vreinterpretq_s64_u32(
- vshlq_u32(vreinterpretq_u32_s64(a), vdupq_n_s32(-c)));
-}
-
-SIMD_INLINE v128 v128_shr_s32(v128 a, unsigned int c) {
- return (c > 31) ? v128_ones()
- : vreinterpretq_s64_s32(
- vshlq_s32(vreinterpretq_s32_s64(a), vdupq_n_s32(-c)));
-}
-
-SIMD_INLINE v128 v128_shl_64(v128 a, unsigned int c) {
- return (c > 63) ? v128_zero()
- : vreinterpretq_s64_u64(
- vshlq_u64(vreinterpretq_u64_s64(a), vdupq_n_s64(c)));
-}
-
-SIMD_INLINE v128 v128_shr_u64(v128 a, unsigned int c) {
- return (c > 63) ? v128_zero()
- : vreinterpretq_s64_u64(
- vshlq_u64(vreinterpretq_u64_s64(a), vdupq_n_s64(-c)));
-}
-
-SIMD_INLINE v128 v128_shr_s64(v128 a, unsigned int c) {
- return (c > 63) ? v128_ones() : vshlq_s64(a, vdupq_n_s64(-c));
-}
-
-#if defined(__OPTIMIZE__) && __OPTIMIZE__ && !defined(__clang__)
-
-SIMD_INLINE v128 v128_shl_n_byte(v128 a, unsigned int n) {
- return n < 8
- ? v128_from_64(
- (uint64_t)vorr_u64(
- vshl_n_u64(vreinterpret_u64_s64(vget_high_s64(a)),
- n * 8),
- vshr_n_u64(vreinterpret_u64_s64(vget_low_s64(a)),
- (8 - n) * 8)),
- (uint64_t)vshl_n_u64(vreinterpret_u64_s64(vget_low_s64(a)),
- n * 8))
- : (n == 8 ? v128_from_64(
- (uint64_t)vreinterpret_u64_s64(vget_low_s64(a)), 0)
- : v128_from_64((uint64_t)vshl_n_u64(
- vreinterpret_u64_s64(vget_low_s64(a)),
- (n - 8) * 8),
- 0));
-}
-
-SIMD_INLINE v128 v128_shr_n_byte(v128 a, unsigned int n) {
- return n < 8
- ? v128_from_64(
- (uint64_t)vshr_n_u64(vreinterpret_u64_s64(vget_high_s64(a)),
- n * 8),
- (uint64_t)vorr_u64(
- vshr_n_u64(vreinterpret_u64_s64(vget_low_s64(a)), n * 8),
- vshl_n_u64(vreinterpret_u64_s64(vget_high_s64(a)),
- (8 - n) * 8)))
- : (n == 8 ? v128_from_64(0, (uint64_t)vreinterpret_u64_s64(
- vget_high_s64(a)))
- : v128_from_64(
- 0, (uint64_t)vshr_n_u64(
- vreinterpret_u64_s64(vget_high_s64(a)),
- (n - 8) * 8)));
-}
-
-SIMD_INLINE v128 v128_shl_n_8(v128 a, unsigned int c) {
- return vreinterpretq_s64_u8(vshlq_n_u8(vreinterpretq_u8_s64(a), c));
-}
-
-SIMD_INLINE v128 v128_shr_n_u8(v128 a, unsigned int c) {
- return vreinterpretq_s64_u8(vshrq_n_u8(vreinterpretq_u8_s64(a), c));
-}
-
-SIMD_INLINE v128 v128_shr_n_s8(v128 a, unsigned int c) {
- return vreinterpretq_s64_s8(vshrq_n_s8(vreinterpretq_s8_s64(a), c));
-}
-
-SIMD_INLINE v128 v128_shl_n_16(v128 a, unsigned int c) {
- return vreinterpretq_s64_u16(vshlq_n_u16(vreinterpretq_u16_s64(a), c));
-}
-
-SIMD_INLINE v128 v128_shr_n_u16(v128 a, unsigned int c) {
- return vreinterpretq_s64_u16(vshrq_n_u16(vreinterpretq_u16_s64(a), c));
-}
-
-SIMD_INLINE v128 v128_shr_n_s16(v128 a, unsigned int c) {
- return vreinterpretq_s64_s16(vshrq_n_s16(vreinterpretq_s16_s64(a), c));
-}
-
-SIMD_INLINE v128 v128_shl_n_32(v128 a, unsigned int c) {
- return vreinterpretq_s64_u32(vshlq_n_u32(vreinterpretq_u32_s64(a), c));
-}
-
-SIMD_INLINE v128 v128_shr_n_u32(v128 a, unsigned int c) {
- return vreinterpretq_s64_u32(vshrq_n_u32(vreinterpretq_u32_s64(a), c));
-}
-
-SIMD_INLINE v128 v128_shr_n_s32(v128 a, unsigned int c) {
- return vreinterpretq_s64_s32(vshrq_n_s32(vreinterpretq_s32_s64(a), c));
-}
-
-SIMD_INLINE v128 v128_shl_n_64(v128 a, unsigned int c) {
- return vreinterpretq_s64_u64(vshlq_n_u64(vreinterpretq_u64_s64(a), c));
-}
-
-SIMD_INLINE v128 v128_shr_n_u64(v128 a, unsigned int c) {
- return vreinterpretq_s64_u64(vshrq_n_u64(vreinterpretq_u64_s64(a), c));
-}
-
-SIMD_INLINE v128 v128_shr_n_s64(v128 a, unsigned int c) {
- return vshrq_n_s64(a, c);
-}
-
-#else
-
-SIMD_INLINE v128 v128_shl_n_byte(v128 a, unsigned int n) {
- if (n < 8)
- return v128_from_v64(v64_or(v64_shl_n_byte(v128_high_v64(a), n),
- v64_shr_n_byte(v128_low_v64(a), 8 - n)),
- v64_shl_n_byte(v128_low_v64(a), n));
- else
- return v128_from_v64(v64_shl_n_byte(v128_low_v64(a), n - 8), v64_zero());
-}
-
-SIMD_INLINE v128 v128_shr_n_byte(v128 a, unsigned int n) {
- if (n < 8)
- return v128_from_v64(v64_shr_n_byte(v128_high_v64(a), n),
- v64_or(v64_shr_n_byte(v128_low_v64(a), n),
- v64_shl_n_byte(v128_high_v64(a), 8 - n)));
- else
- return v128_from_v64(v64_zero(), v64_shr_n_byte(v128_high_v64(a), n - 8));
-}
-
-SIMD_INLINE v128 v128_shl_n_8(v128 a, unsigned int c) {
- return v128_shl_8(a, c);
-}
-
-SIMD_INLINE v128 v128_shr_n_u8(v128 a, unsigned int c) {
- return v128_shr_u8(a, c);
-}
-
-SIMD_INLINE v128 v128_shr_n_s8(v128 a, unsigned int c) {
- return v128_shr_s8(a, c);
-}
-
-SIMD_INLINE v128 v128_shl_n_16(v128 a, unsigned int c) {
- return v128_shl_16(a, c);
-}
-
-SIMD_INLINE v128 v128_shr_n_u16(v128 a, unsigned int c) {
- return v128_shr_u16(a, c);
-}
-
-SIMD_INLINE v128 v128_shr_n_s16(v128 a, unsigned int c) {
- return v128_shr_s16(a, c);
-}
-
-SIMD_INLINE v128 v128_shl_n_32(v128 a, unsigned int c) {
- return v128_shl_32(a, c);
-}
-
-SIMD_INLINE v128 v128_shr_n_u32(v128 a, unsigned int c) {
- return v128_shr_u32(a, c);
-}
-
-SIMD_INLINE v128 v128_shr_n_s32(v128 a, unsigned int c) {
- return v128_shr_s32(a, c);
-}
-
-SIMD_INLINE v128 v128_shl_n_64(v128 a, unsigned int c) {
- return v128_shl_64(a, c);
-}
-
-SIMD_INLINE v128 v128_shr_n_u64(v128 a, unsigned int c) {
- return v128_shr_u64(a, c);
-}
-
-SIMD_INLINE v128 v128_shr_n_s64(v128 a, unsigned int c) {
- return v128_shr_s64(a, c);
-}
-
-#endif
-
-typedef uint32x4_t sad128_internal_u16;
-
-SIMD_INLINE sad128_internal_u16 v128_sad_u16_init() { return vdupq_n_u32(0); }
-
-/* Implementation dependent return value. Result must be finalised with
- * v128_sad_u16_sum(). */
-SIMD_INLINE sad128_internal_u16 v128_sad_u16(sad128_internal_u16 s, v128 a,
- v128 b) {
- return vaddq_u32(
- s, vpaddlq_u16(vsubq_u16(
- vmaxq_u16(vreinterpretq_u16_s64(a), vreinterpretq_u16_s64(b)),
- vminq_u16(vreinterpretq_u16_s64(a), vreinterpretq_u16_s64(b)))));
-}
-
-SIMD_INLINE uint32_t v128_sad_u16_sum(sad128_internal_u16 s) {
- uint64x2_t t = vpaddlq_u32(s);
- return (uint32_t)(uint64_t)vget_high_u64(t) +
- (uint32_t)(uint64_t)vget_low_u64(t);
-}
-
-typedef v128 ssd128_internal_s16;
-SIMD_INLINE ssd128_internal_s16 v128_ssd_s16_init() { return v128_zero(); }
-
-/* Implementation dependent return value. Result must be finalised with
- * v128_ssd_s16_sum(). */
-SIMD_INLINE ssd128_internal_s16 v128_ssd_s16(ssd128_internal_s16 s, v128 a,
- v128 b) {
- v128 d = v128_sub_16(a, b);
- d = v128_madd_s16(d, d);
- return v128_add_64(
- s, vreinterpretq_s64_u64(vpaddlq_u32(vreinterpretq_u32_s64(d))));
-}
-
-SIMD_INLINE uint64_t v128_ssd_s16_sum(ssd128_internal_s16 s) {
- return v64_u64(v128_low_v64(s)) + v64_u64(v128_high_v64(s));
-}
-
-#endif // AOM_AOM_DSP_SIMD_V128_INTRINSICS_ARM_H_
diff --git a/third_party/aom/aom_dsp/simd/v128_intrinsics_c.h b/third_party/aom/aom_dsp/simd/v128_intrinsics_c.h
deleted file mode 100644
index bbe9a9d28..000000000
--- a/third_party/aom/aom_dsp/simd/v128_intrinsics_c.h
+++ /dev/null
@@ -1,888 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_SIMD_V128_INTRINSICS_C_H_
-#define AOM_AOM_DSP_SIMD_V128_INTRINSICS_C_H_
-
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "config/aom_config.h"
-
-#include "aom_dsp/simd/v64_intrinsics_c.h"
-
-typedef union {
- uint8_t u8[16];
- uint16_t u16[8];
- uint32_t u32[4];
- uint64_t u64[2];
- int8_t s8[16];
- int16_t s16[8];
- int32_t s32[4];
- int64_t s64[2];
- c_v64 v64[2];
-} c_v128;
-
-SIMD_INLINE uint32_t c_v128_low_u32(c_v128 a) { return a.u32[0]; }
-
-SIMD_INLINE c_v64 c_v128_low_v64(c_v128 a) { return a.v64[0]; }
-
-SIMD_INLINE c_v64 c_v128_high_v64(c_v128 a) { return a.v64[1]; }
-
-SIMD_INLINE c_v128 c_v128_from_64(uint64_t hi, uint64_t lo) {
- c_v128 t;
- t.u64[1] = hi;
- t.u64[0] = lo;
- return t;
-}
-
-SIMD_INLINE c_v128 c_v128_from_v64(c_v64 hi, c_v64 lo) {
- c_v128 t;
- t.v64[1] = hi;
- t.v64[0] = lo;
- return t;
-}
-
-SIMD_INLINE c_v128 c_v128_from_32(uint32_t a, uint32_t b, uint32_t c,
- uint32_t d) {
- c_v128 t;
- t.u32[3] = a;
- t.u32[2] = b;
- t.u32[1] = c;
- t.u32[0] = d;
- return t;
-}
-
-SIMD_INLINE c_v128 c_v128_load_unaligned(const void *p) {
- c_v128 t;
- uint8_t *pp = (uint8_t *)p;
- uint8_t *q = (uint8_t *)&t;
- int c;
- for (c = 0; c < 16; c++) q[c] = pp[c];
- return t;
-}
-
-SIMD_INLINE c_v128 c_v128_load_aligned(const void *p) {
- if (SIMD_CHECK && (uintptr_t)p & 15) {
- fprintf(stderr, "Error: unaligned v128 load at %p\n", p);
- abort();
- }
- return c_v128_load_unaligned(p);
-}
-
-SIMD_INLINE void c_v128_store_unaligned(void *p, c_v128 a) {
- uint8_t *pp = (uint8_t *)p;
- uint8_t *q = (uint8_t *)&a;
- int c;
- for (c = 0; c < 16; c++) pp[c] = q[c];
-}
-
-SIMD_INLINE void c_v128_store_aligned(void *p, c_v128 a) {
- if (SIMD_CHECK && (uintptr_t)p & 15) {
- fprintf(stderr, "Error: unaligned v128 store at %p\n", p);
- abort();
- }
- c_v128_store_unaligned(p, a);
-}
-
-SIMD_INLINE c_v128 c_v128_zero() {
- c_v128 t;
- t.u64[1] = t.u64[0] = 0;
- return t;
-}
-
-SIMD_INLINE c_v128 c_v128_dup_8(uint8_t x) {
- c_v128 t;
- t.v64[1] = t.v64[0] = c_v64_dup_8(x);
- return t;
-}
-
-SIMD_INLINE c_v128 c_v128_dup_16(uint16_t x) {
- c_v128 t;
- t.v64[1] = t.v64[0] = c_v64_dup_16(x);
- return t;
-}
-
-SIMD_INLINE c_v128 c_v128_dup_32(uint32_t x) {
- c_v128 t;
- t.v64[1] = t.v64[0] = c_v64_dup_32(x);
- return t;
-}
-
-SIMD_INLINE c_v128 c_v128_dup_64(uint64_t x) {
- c_v128 t;
- t.u64[1] = t.u64[0] = x;
- return t;
-}
-
-SIMD_INLINE int64_t c_v128_dotp_su8(c_v128 a, c_v128 b) {
- return c_v64_dotp_su8(a.v64[1], b.v64[1]) +
- c_v64_dotp_su8(a.v64[0], b.v64[0]);
-}
-
-SIMD_INLINE int64_t c_v128_dotp_s16(c_v128 a, c_v128 b) {
- return c_v64_dotp_s16(a.v64[1], b.v64[1]) +
- c_v64_dotp_s16(a.v64[0], b.v64[0]);
-}
-
-SIMD_INLINE int64_t c_v128_dotp_s32(c_v128 a, c_v128 b) {
- // 32 bit products, 64 bit sum
- return (int64_t)(int32_t)((int64_t)a.s32[3] * b.s32[3]) +
- (int64_t)(int32_t)((int64_t)a.s32[2] * b.s32[2]) +
- (int64_t)(int32_t)((int64_t)a.s32[1] * b.s32[1]) +
- (int64_t)(int32_t)((int64_t)a.s32[0] * b.s32[0]);
-}
-
-SIMD_INLINE uint64_t c_v128_hadd_u8(c_v128 a) {
- return c_v64_hadd_u8(a.v64[1]) + c_v64_hadd_u8(a.v64[0]);
-}
-
-typedef uint32_t c_sad128_internal;
-
-SIMD_INLINE c_sad128_internal c_v128_sad_u8_init() { return 0; }
-
-/* Implementation dependent return value. Result must be finalised with
- v128_sad_u8_sum().
- The result for more than 32 v128_sad_u8() calls is undefined. */
-SIMD_INLINE c_sad128_internal c_v128_sad_u8(c_sad128_internal s, c_v128 a,
- c_v128 b) {
- int c;
- for (c = 0; c < 16; c++)
- s += a.u8[c] > b.u8[c] ? a.u8[c] - b.u8[c] : b.u8[c] - a.u8[c];
- return s;
-}
-
-SIMD_INLINE uint32_t c_v128_sad_u8_sum(c_sad128_internal s) { return s; }
-
-typedef uint32_t c_ssd128_internal;
-
-SIMD_INLINE c_ssd128_internal c_v128_ssd_u8_init() { return 0; }
-
-/* Implementation dependent return value. Result must be finalised with
- * v128_ssd_u8_sum(). */
-SIMD_INLINE c_ssd128_internal c_v128_ssd_u8(c_ssd128_internal s, c_v128 a,
- c_v128 b) {
- int c;
- for (c = 0; c < 16; c++) s += (a.u8[c] - b.u8[c]) * (a.u8[c] - b.u8[c]);
- return s;
-}
-
-SIMD_INLINE uint32_t c_v128_ssd_u8_sum(c_ssd128_internal s) { return s; }
-
-SIMD_INLINE c_v128 c_v128_or(c_v128 a, c_v128 b) {
- return c_v128_from_v64(c_v64_or(a.v64[1], b.v64[1]),
- c_v64_or(a.v64[0], b.v64[0]));
-}
-
-SIMD_INLINE c_v128 c_v128_xor(c_v128 a, c_v128 b) {
- return c_v128_from_v64(c_v64_xor(a.v64[1], b.v64[1]),
- c_v64_xor(a.v64[0], b.v64[0]));
-}
-
-SIMD_INLINE c_v128 c_v128_and(c_v128 a, c_v128 b) {
- return c_v128_from_v64(c_v64_and(a.v64[1], b.v64[1]),
- c_v64_and(a.v64[0], b.v64[0]));
-}
-
-SIMD_INLINE c_v128 c_v128_andn(c_v128 a, c_v128 b) {
- return c_v128_from_v64(c_v64_andn(a.v64[1], b.v64[1]),
- c_v64_andn(a.v64[0], b.v64[0]));
-}
-
-SIMD_INLINE c_v128 c_v128_add_8(c_v128 a, c_v128 b) {
- return c_v128_from_v64(c_v64_add_8(a.v64[1], b.v64[1]),
- c_v64_add_8(a.v64[0], b.v64[0]));
-}
-
-SIMD_INLINE c_v128 c_v128_add_16(c_v128 a, c_v128 b) {
- return c_v128_from_v64(c_v64_add_16(a.v64[1], b.v64[1]),
- c_v64_add_16(a.v64[0], b.v64[0]));
-}
-
-SIMD_INLINE c_v128 c_v128_sadd_u8(c_v128 a, c_v128 b) {
- return c_v128_from_v64(c_v64_sadd_u8(a.v64[1], b.v64[1]),
- c_v64_sadd_u8(a.v64[0], b.v64[0]));
-}
-
-SIMD_INLINE c_v128 c_v128_sadd_s8(c_v128 a, c_v128 b) {
- return c_v128_from_v64(c_v64_sadd_s8(a.v64[1], b.v64[1]),
- c_v64_sadd_s8(a.v64[0], b.v64[0]));
-}
-
-SIMD_INLINE c_v128 c_v128_sadd_s16(c_v128 a, c_v128 b) {
- return c_v128_from_v64(c_v64_sadd_s16(a.v64[1], b.v64[1]),
- c_v64_sadd_s16(a.v64[0], b.v64[0]));
-}
-
-SIMD_INLINE c_v128 c_v128_add_32(c_v128 a, c_v128 b) {
- return c_v128_from_v64(c_v64_add_32(a.v64[1], b.v64[1]),
- c_v64_add_32(a.v64[0], b.v64[0]));
-}
-
-SIMD_INLINE c_v128 c_v128_add_64(c_v128 a, c_v128 b) {
- // Two complement overflow (silences sanitizers)
- return c_v128_from_64(
- a.v64[1].u64 > ~b.v64[1].u64 ? a.v64[1].u64 - ~b.v64[1].u64 - 1
- : a.v64[1].u64 + b.v64[1].u64,
- a.v64[0].u64 > ~b.v64[0].u64 ? a.v64[0].u64 - ~b.v64[0].u64 - 1
- : a.v64[0].u64 + b.v64[0].u64);
-}
-
-SIMD_INLINE c_v128 c_v128_padd_s16(c_v128 a) {
- c_v128 t;
- t.s32[0] = (int32_t)a.s16[0] + (int32_t)a.s16[1];
- t.s32[1] = (int32_t)a.s16[2] + (int32_t)a.s16[3];
- t.s32[2] = (int32_t)a.s16[4] + (int32_t)a.s16[5];
- t.s32[3] = (int32_t)a.s16[6] + (int32_t)a.s16[7];
- return t;
-}
-
-SIMD_INLINE c_v128 c_v128_padd_u8(c_v128 a) {
- c_v128 t;
- t.u16[0] = (uint16_t)a.u8[0] + (uint16_t)a.u8[1];
- t.u16[1] = (uint16_t)a.u8[2] + (uint16_t)a.u8[3];
- t.u16[2] = (uint16_t)a.u8[4] + (uint16_t)a.u8[5];
- t.u16[3] = (uint16_t)a.u8[6] + (uint16_t)a.u8[7];
- t.u16[4] = (uint16_t)a.u8[8] + (uint16_t)a.u8[9];
- t.u16[5] = (uint16_t)a.u8[10] + (uint16_t)a.u8[11];
- t.u16[6] = (uint16_t)a.u8[12] + (uint16_t)a.u8[13];
- t.u16[7] = (uint16_t)a.u8[14] + (uint16_t)a.u8[15];
- return t;
-}
-
-SIMD_INLINE c_v128 c_v128_sub_8(c_v128 a, c_v128 b) {
- return c_v128_from_v64(c_v64_sub_8(a.v64[1], b.v64[1]),
- c_v64_sub_8(a.v64[0], b.v64[0]));
-}
-
-SIMD_INLINE c_v128 c_v128_ssub_u8(c_v128 a, c_v128 b) {
- return c_v128_from_v64(c_v64_ssub_u8(a.v64[1], b.v64[1]),
- c_v64_ssub_u8(a.v64[0], b.v64[0]));
-}
-
-SIMD_INLINE c_v128 c_v128_ssub_s8(c_v128 a, c_v128 b) {
- return c_v128_from_v64(c_v64_ssub_s8(a.v64[1], b.v64[1]),
- c_v64_ssub_s8(a.v64[0], b.v64[0]));
-}
-
-SIMD_INLINE c_v128 c_v128_sub_16(c_v128 a, c_v128 b) {
- return c_v128_from_v64(c_v64_sub_16(a.v64[1], b.v64[1]),
- c_v64_sub_16(a.v64[0], b.v64[0]));
-}
-
-SIMD_INLINE c_v128 c_v128_ssub_s16(c_v128 a, c_v128 b) {
- return c_v128_from_v64(c_v64_ssub_s16(a.v64[1], b.v64[1]),
- c_v64_ssub_s16(a.v64[0], b.v64[0]));
-}
-
-SIMD_INLINE c_v128 c_v128_ssub_u16(c_v128 a, c_v128 b) {
- return c_v128_from_v64(c_v64_ssub_u16(a.v64[1], b.v64[1]),
- c_v64_ssub_u16(a.v64[0], b.v64[0]));
-}
-
-SIMD_INLINE c_v128 c_v128_sub_32(c_v128 a, c_v128 b) {
- return c_v128_from_v64(c_v64_sub_32(a.v64[1], b.v64[1]),
- c_v64_sub_32(a.v64[0], b.v64[0]));
-}
-
-SIMD_INLINE c_v128 c_v128_sub_64(c_v128 a, c_v128 b) {
- // Two complement underflow (silences sanitizers)
- return c_v128_from_64(
- a.v64[1].u64 < b.v64[1].u64 ? a.v64[1].u64 + ~b.v64[1].u64 + 1
- : a.v64[1].u64 - b.v64[1].u64,
- a.v64[0].u64 < b.v64[0].u64 ? a.v64[0].u64 + ~b.v64[0].u64 + 1
- : a.v64[0].u64 - b.v64[0].u64);
-}
-
-SIMD_INLINE c_v128 c_v128_abs_s16(c_v128 a) {
- return c_v128_from_v64(c_v64_abs_s16(a.v64[1]), c_v64_abs_s16(a.v64[0]));
-}
-
-SIMD_INLINE c_v128 c_v128_abs_s8(c_v128 a) {
- return c_v128_from_v64(c_v64_abs_s8(a.v64[1]), c_v64_abs_s8(a.v64[0]));
-}
-
-SIMD_INLINE c_v128 c_v128_mul_s16(c_v64 a, c_v64 b) {
- c_v64 lo_bits = c_v64_mullo_s16(a, b);
- c_v64 hi_bits = c_v64_mulhi_s16(a, b);
- return c_v128_from_v64(c_v64_ziphi_16(hi_bits, lo_bits),
- c_v64_ziplo_16(hi_bits, lo_bits));
-}
-
-SIMD_INLINE c_v128 c_v128_mullo_s16(c_v128 a, c_v128 b) {
- return c_v128_from_v64(c_v64_mullo_s16(a.v64[1], b.v64[1]),
- c_v64_mullo_s16(a.v64[0], b.v64[0]));
-}
-
-SIMD_INLINE c_v128 c_v128_mulhi_s16(c_v128 a, c_v128 b) {
- return c_v128_from_v64(c_v64_mulhi_s16(a.v64[1], b.v64[1]),
- c_v64_mulhi_s16(a.v64[0], b.v64[0]));
-}
-
-SIMD_INLINE c_v128 c_v128_mullo_s32(c_v128 a, c_v128 b) {
- return c_v128_from_v64(c_v64_mullo_s32(a.v64[1], b.v64[1]),
- c_v64_mullo_s32(a.v64[0], b.v64[0]));
-}
-
-SIMD_INLINE c_v128 c_v128_madd_s16(c_v128 a, c_v128 b) {
- return c_v128_from_v64(c_v64_madd_s16(a.v64[1], b.v64[1]),
- c_v64_madd_s16(a.v64[0], b.v64[0]));
-}
-
-SIMD_INLINE c_v128 c_v128_madd_us8(c_v128 a, c_v128 b) {
- return c_v128_from_v64(c_v64_madd_us8(a.v64[1], b.v64[1]),
- c_v64_madd_us8(a.v64[0], b.v64[0]));
-}
-
-SIMD_INLINE c_v128 c_v128_avg_u8(c_v128 a, c_v128 b) {
- return c_v128_from_v64(c_v64_avg_u8(a.v64[1], b.v64[1]),
- c_v64_avg_u8(a.v64[0], b.v64[0]));
-}
-
-SIMD_INLINE c_v128 c_v128_rdavg_u8(c_v128 a, c_v128 b) {
- return c_v128_from_v64(c_v64_rdavg_u8(a.v64[1], b.v64[1]),
- c_v64_rdavg_u8(a.v64[0], b.v64[0]));
-}
-
-SIMD_INLINE c_v128 c_v128_rdavg_u16(c_v128 a, c_v128 b) {
- return c_v128_from_v64(c_v64_rdavg_u16(a.v64[1], b.v64[1]),
- c_v64_rdavg_u16(a.v64[0], b.v64[0]));
-}
-
-SIMD_INLINE c_v128 c_v128_avg_u16(c_v128 a, c_v128 b) {
- return c_v128_from_v64(c_v64_avg_u16(a.v64[1], b.v64[1]),
- c_v64_avg_u16(a.v64[0], b.v64[0]));
-}
-
-SIMD_INLINE c_v128 c_v128_min_u8(c_v128 a, c_v128 b) {
- return c_v128_from_v64(c_v64_min_u8(a.v64[1], b.v64[1]),
- c_v64_min_u8(a.v64[0], b.v64[0]));
-}
-
-SIMD_INLINE c_v128 c_v128_max_u8(c_v128 a, c_v128 b) {
- return c_v128_from_v64(c_v64_max_u8(a.v64[1], b.v64[1]),
- c_v64_max_u8(a.v64[0], b.v64[0]));
-}
-
-SIMD_INLINE c_v128 c_v128_min_s8(c_v128 a, c_v128 b) {
- return c_v128_from_v64(c_v64_min_s8(a.v64[1], b.v64[1]),
- c_v64_min_s8(a.v64[0], b.v64[0]));
-}
-
-SIMD_INLINE uint32_t c_v128_movemask_8(c_v128 a) {
- return ((a.s8[15] < 0) << 15) | ((a.s8[14] < 0) << 14) |
- ((a.s8[13] < 0) << 13) | ((a.s8[12] < 0) << 12) |
- ((a.s8[11] < 0) << 11) | ((a.s8[10] < 0) << 10) |
- ((a.s8[9] < 0) << 9) | ((a.s8[8] < 0) << 8) | ((a.s8[7] < 0) << 7) |
- ((a.s8[6] < 0) << 6) | ((a.s8[5] < 0) << 5) | ((a.s8[4] < 0) << 4) |
- ((a.s8[3] < 0) << 3) | ((a.s8[2] < 0) << 2) | ((a.s8[1] < 0) << 1) |
- ((a.s8[0] < 0) << 0);
-}
-
-SIMD_INLINE c_v128 c_v128_blend_8(c_v128 a, c_v128 b, c_v128 c) {
- c_v128 t;
- for (int i = 0; i < 16; i++) t.u8[i] = c.s8[i] < 0 ? b.u8[i] : a.u8[i];
- return t;
-}
-
-SIMD_INLINE c_v128 c_v128_max_s8(c_v128 a, c_v128 b) {
- return c_v128_from_v64(c_v64_max_s8(a.v64[1], b.v64[1]),
- c_v64_max_s8(a.v64[0], b.v64[0]));
-}
-
-SIMD_INLINE c_v128 c_v128_min_s16(c_v128 a, c_v128 b) {
- return c_v128_from_v64(c_v64_min_s16(a.v64[1], b.v64[1]),
- c_v64_min_s16(a.v64[0], b.v64[0]));
-}
-
-SIMD_INLINE c_v128 c_v128_max_s16(c_v128 a, c_v128 b) {
- return c_v128_from_v64(c_v64_max_s16(a.v64[1], b.v64[1]),
- c_v64_max_s16(a.v64[0], b.v64[0]));
-}
-
-SIMD_INLINE c_v128 c_v128_max_s32(c_v128 a, c_v128 b) {
- c_v128 t;
- int c;
- for (c = 0; c < 4; c++) t.s32[c] = a.s32[c] > b.s32[c] ? a.s32[c] : b.s32[c];
- return t;
-}
-
-SIMD_INLINE c_v128 c_v128_min_s32(c_v128 a, c_v128 b) {
- c_v128 t;
- int c;
- for (c = 0; c < 4; c++) t.s32[c] = a.s32[c] > b.s32[c] ? b.s32[c] : a.s32[c];
- return t;
-}
-
-SIMD_INLINE c_v128 c_v128_ziplo_8(c_v128 a, c_v128 b) {
- return c_v128_from_v64(c_v64_ziphi_8(a.v64[0], b.v64[0]),
- c_v64_ziplo_8(a.v64[0], b.v64[0]));
-}
-
-SIMD_INLINE c_v128 c_v128_ziphi_8(c_v128 a, c_v128 b) {
- return c_v128_from_v64(c_v64_ziphi_8(a.v64[1], b.v64[1]),
- c_v64_ziplo_8(a.v64[1], b.v64[1]));
-}
-
-SIMD_INLINE c_v128 c_v128_ziplo_16(c_v128 a, c_v128 b) {
- return c_v128_from_v64(c_v64_ziphi_16(a.v64[0], b.v64[0]),
- c_v64_ziplo_16(a.v64[0], b.v64[0]));
-}
-
-SIMD_INLINE c_v128 c_v128_ziphi_16(c_v128 a, c_v128 b) {
- return c_v128_from_v64(c_v64_ziphi_16(a.v64[1], b.v64[1]),
- c_v64_ziplo_16(a.v64[1], b.v64[1]));
-}
-
-SIMD_INLINE c_v128 c_v128_ziplo_32(c_v128 a, c_v128 b) {
- return c_v128_from_v64(c_v64_ziphi_32(a.v64[0], b.v64[0]),
- c_v64_ziplo_32(a.v64[0], b.v64[0]));
-}
-
-SIMD_INLINE c_v128 c_v128_ziphi_32(c_v128 a, c_v128 b) {
- return c_v128_from_v64(c_v64_ziphi_32(a.v64[1], b.v64[1]),
- c_v64_ziplo_32(a.v64[1], b.v64[1]));
-}
-
-SIMD_INLINE c_v128 c_v128_ziplo_64(c_v128 a, c_v128 b) {
- return c_v128_from_v64(a.v64[0], b.v64[0]);
-}
-
-SIMD_INLINE c_v128 c_v128_ziphi_64(c_v128 a, c_v128 b) {
- return c_v128_from_v64(a.v64[1], b.v64[1]);
-}
-
-SIMD_INLINE c_v128 c_v128_zip_8(c_v64 a, c_v64 b) {
- return c_v128_from_v64(c_v64_ziphi_8(a, b), c_v64_ziplo_8(a, b));
-}
-
-SIMD_INLINE c_v128 c_v128_zip_16(c_v64 a, c_v64 b) {
- return c_v128_from_v64(c_v64_ziphi_16(a, b), c_v64_ziplo_16(a, b));
-}
-
-SIMD_INLINE c_v128 c_v128_zip_32(c_v64 a, c_v64 b) {
- return c_v128_from_v64(c_v64_ziphi_32(a, b), c_v64_ziplo_32(a, b));
-}
-
-SIMD_INLINE c_v128 _c_v128_unzip_8(c_v128 a, c_v128 b, int mode) {
- c_v128 t;
- if (mode) {
- t.u8[15] = b.u8[15];
- t.u8[14] = b.u8[13];
- t.u8[13] = b.u8[11];
- t.u8[12] = b.u8[9];
- t.u8[11] = b.u8[7];
- t.u8[10] = b.u8[5];
- t.u8[9] = b.u8[3];
- t.u8[8] = b.u8[1];
- t.u8[7] = a.u8[15];
- t.u8[6] = a.u8[13];
- t.u8[5] = a.u8[11];
- t.u8[4] = a.u8[9];
- t.u8[3] = a.u8[7];
- t.u8[2] = a.u8[5];
- t.u8[1] = a.u8[3];
- t.u8[0] = a.u8[1];
- } else {
- t.u8[15] = a.u8[14];
- t.u8[14] = a.u8[12];
- t.u8[13] = a.u8[10];
- t.u8[12] = a.u8[8];
- t.u8[11] = a.u8[6];
- t.u8[10] = a.u8[4];
- t.u8[9] = a.u8[2];
- t.u8[8] = a.u8[0];
- t.u8[7] = b.u8[14];
- t.u8[6] = b.u8[12];
- t.u8[5] = b.u8[10];
- t.u8[4] = b.u8[8];
- t.u8[3] = b.u8[6];
- t.u8[2] = b.u8[4];
- t.u8[1] = b.u8[2];
- t.u8[0] = b.u8[0];
- }
- return t;
-}
-
-SIMD_INLINE c_v128 c_v128_unziplo_8(c_v128 a, c_v128 b) {
- return CONFIG_BIG_ENDIAN ? _c_v128_unzip_8(a, b, 1)
- : _c_v128_unzip_8(a, b, 0);
-}
-
-SIMD_INLINE c_v128 c_v128_unziphi_8(c_v128 a, c_v128 b) {
- return CONFIG_BIG_ENDIAN ? _c_v128_unzip_8(b, a, 0)
- : _c_v128_unzip_8(b, a, 1);
-}
-
-SIMD_INLINE c_v128 _c_v128_unzip_16(c_v128 a, c_v128 b, int mode) {
- c_v128 t;
- if (mode) {
- t.u16[7] = b.u16[7];
- t.u16[6] = b.u16[5];
- t.u16[5] = b.u16[3];
- t.u16[4] = b.u16[1];
- t.u16[3] = a.u16[7];
- t.u16[2] = a.u16[5];
- t.u16[1] = a.u16[3];
- t.u16[0] = a.u16[1];
- } else {
- t.u16[7] = a.u16[6];
- t.u16[6] = a.u16[4];
- t.u16[5] = a.u16[2];
- t.u16[4] = a.u16[0];
- t.u16[3] = b.u16[6];
- t.u16[2] = b.u16[4];
- t.u16[1] = b.u16[2];
- t.u16[0] = b.u16[0];
- }
- return t;
-}
-
-SIMD_INLINE c_v128 c_v128_unziplo_16(c_v128 a, c_v128 b) {
- return CONFIG_BIG_ENDIAN ? _c_v128_unzip_16(a, b, 1)
- : _c_v128_unzip_16(a, b, 0);
-}
-
-SIMD_INLINE c_v128 c_v128_unziphi_16(c_v128 a, c_v128 b) {
- return CONFIG_BIG_ENDIAN ? _c_v128_unzip_16(b, a, 0)
- : _c_v128_unzip_16(b, a, 1);
-}
-
-SIMD_INLINE c_v128 _c_v128_unzip_32(c_v128 a, c_v128 b, int mode) {
- c_v128 t;
- if (mode) {
- t.u32[3] = b.u32[3];
- t.u32[2] = b.u32[1];
- t.u32[1] = a.u32[3];
- t.u32[0] = a.u32[1];
- } else {
- t.u32[3] = a.u32[2];
- t.u32[2] = a.u32[0];
- t.u32[1] = b.u32[2];
- t.u32[0] = b.u32[0];
- }
- return t;
-}
-
-SIMD_INLINE c_v128 c_v128_unziplo_32(c_v128 a, c_v128 b) {
- return CONFIG_BIG_ENDIAN ? _c_v128_unzip_32(a, b, 1)
- : _c_v128_unzip_32(a, b, 0);
-}
-
-SIMD_INLINE c_v128 c_v128_unziphi_32(c_v128 a, c_v128 b) {
- return CONFIG_BIG_ENDIAN ? _c_v128_unzip_32(b, a, 0)
- : _c_v128_unzip_32(b, a, 1);
-}
-
-SIMD_INLINE c_v128 c_v128_unpack_u8_s16(c_v64 a) {
- return c_v128_from_v64(c_v64_unpackhi_u8_s16(a), c_v64_unpacklo_u8_s16(a));
-}
-
-SIMD_INLINE c_v128 c_v128_unpacklo_u8_s16(c_v128 a) {
- return c_v128_from_v64(c_v64_unpackhi_u8_s16(a.v64[0]),
- c_v64_unpacklo_u8_s16(a.v64[0]));
-}
-
-SIMD_INLINE c_v128 c_v128_unpackhi_u8_s16(c_v128 a) {
- return c_v128_from_v64(c_v64_unpackhi_u8_s16(a.v64[1]),
- c_v64_unpacklo_u8_s16(a.v64[1]));
-}
-
-SIMD_INLINE c_v128 c_v128_unpack_s8_s16(c_v64 a) {
- return c_v128_from_v64(c_v64_unpackhi_s8_s16(a), c_v64_unpacklo_s8_s16(a));
-}
-
-SIMD_INLINE c_v128 c_v128_unpacklo_s8_s16(c_v128 a) {
- return c_v128_from_v64(c_v64_unpackhi_s8_s16(a.v64[0]),
- c_v64_unpacklo_s8_s16(a.v64[0]));
-}
-
-SIMD_INLINE c_v128 c_v128_unpackhi_s8_s16(c_v128 a) {
- return c_v128_from_v64(c_v64_unpackhi_s8_s16(a.v64[1]),
- c_v64_unpacklo_s8_s16(a.v64[1]));
-}
-
-SIMD_INLINE c_v128 c_v128_pack_s32_s16(c_v128 a, c_v128 b) {
- return c_v128_from_v64(c_v64_pack_s32_s16(a.v64[1], a.v64[0]),
- c_v64_pack_s32_s16(b.v64[1], b.v64[0]));
-}
-
-SIMD_INLINE c_v128 c_v128_pack_s32_u16(c_v128 a, c_v128 b) {
- return c_v128_from_v64(c_v64_pack_s32_u16(a.v64[1], a.v64[0]),
- c_v64_pack_s32_u16(b.v64[1], b.v64[0]));
-}
-
-SIMD_INLINE c_v128 c_v128_pack_s16_u8(c_v128 a, c_v128 b) {
- return c_v128_from_v64(c_v64_pack_s16_u8(a.v64[1], a.v64[0]),
- c_v64_pack_s16_u8(b.v64[1], b.v64[0]));
-}
-
-SIMD_INLINE c_v128 c_v128_pack_s16_s8(c_v128 a, c_v128 b) {
- return c_v128_from_v64(c_v64_pack_s16_s8(a.v64[1], a.v64[0]),
- c_v64_pack_s16_s8(b.v64[1], b.v64[0]));
-}
-
-SIMD_INLINE c_v128 c_v128_unpack_u16_s32(c_v64 a) {
- return c_v128_from_v64(c_v64_unpackhi_u16_s32(a), c_v64_unpacklo_u16_s32(a));
-}
-
-SIMD_INLINE c_v128 c_v128_unpack_s16_s32(c_v64 a) {
- return c_v128_from_v64(c_v64_unpackhi_s16_s32(a), c_v64_unpacklo_s16_s32(a));
-}
-
-SIMD_INLINE c_v128 c_v128_unpacklo_u16_s32(c_v128 a) {
- return c_v128_from_v64(c_v64_unpackhi_u16_s32(a.v64[0]),
- c_v64_unpacklo_u16_s32(a.v64[0]));
-}
-
-SIMD_INLINE c_v128 c_v128_unpacklo_s16_s32(c_v128 a) {
- return c_v128_from_v64(c_v64_unpackhi_s16_s32(a.v64[0]),
- c_v64_unpacklo_s16_s32(a.v64[0]));
-}
-
-SIMD_INLINE c_v128 c_v128_unpackhi_u16_s32(c_v128 a) {
- return c_v128_from_v64(c_v64_unpackhi_u16_s32(a.v64[1]),
- c_v64_unpacklo_u16_s32(a.v64[1]));
-}
-
-SIMD_INLINE c_v128 c_v128_unpackhi_s16_s32(c_v128 a) {
- return c_v128_from_v64(c_v64_unpackhi_s16_s32(a.v64[1]),
- c_v64_unpacklo_s16_s32(a.v64[1]));
-}
-
-SIMD_INLINE c_v128 c_v128_shuffle_8(c_v128 a, c_v128 pattern) {
- c_v128 t;
- int c;
- for (c = 0; c < 16; c++)
- t.u8[c] = a.u8[CONFIG_BIG_ENDIAN ? 15 - (pattern.u8[c] & 15)
- : pattern.u8[c] & 15];
-
- return t;
-}
-
-SIMD_INLINE c_v128 c_v128_cmpgt_s8(c_v128 a, c_v128 b) {
- return c_v128_from_v64(c_v64_cmpgt_s8(a.v64[1], b.v64[1]),
- c_v64_cmpgt_s8(a.v64[0], b.v64[0]));
-}
-
-SIMD_INLINE c_v128 c_v128_cmplt_s8(c_v128 a, c_v128 b) {
- return c_v128_from_v64(c_v64_cmplt_s8(a.v64[1], b.v64[1]),
- c_v64_cmplt_s8(a.v64[0], b.v64[0]));
-}
-
-SIMD_INLINE c_v128 c_v128_cmpeq_8(c_v128 a, c_v128 b) {
- return c_v128_from_v64(c_v64_cmpeq_8(a.v64[1], b.v64[1]),
- c_v64_cmpeq_8(a.v64[0], b.v64[0]));
-}
-
-SIMD_INLINE c_v128 c_v128_cmpgt_s16(c_v128 a, c_v128 b) {
- return c_v128_from_v64(c_v64_cmpgt_s16(a.v64[1], b.v64[1]),
- c_v64_cmpgt_s16(a.v64[0], b.v64[0]));
-}
-
-SIMD_INLINE c_v128 c_v128_cmplt_s16(c_v128 a, c_v128 b) {
- return c_v128_from_v64(c_v64_cmplt_s16(a.v64[1], b.v64[1]),
- c_v64_cmplt_s16(a.v64[0], b.v64[0]));
-}
-
-SIMD_INLINE c_v128 c_v128_cmpeq_16(c_v128 a, c_v128 b) {
- return c_v128_from_v64(c_v64_cmpeq_16(a.v64[1], b.v64[1]),
- c_v64_cmpeq_16(a.v64[0], b.v64[0]));
-}
-
-SIMD_INLINE c_v128 c_v128_cmpgt_s32(c_v128 a, c_v128 b) {
- c_v128 t;
- int c;
- for (c = 0; c < 4; c++) t.s32[c] = -(a.s32[c] > b.s32[c]);
- return t;
-}
-
-SIMD_INLINE c_v128 c_v128_cmplt_s32(c_v128 a, c_v128 b) {
- c_v128 t;
- int c;
- for (c = 0; c < 4; c++) t.s32[c] = -(a.s32[c] < b.s32[c]);
- return t;
-}
-
-SIMD_INLINE c_v128 c_v128_cmpeq_32(c_v128 a, c_v128 b) {
- c_v128 t;
- int c;
- for (c = 0; c < 4; c++) t.s32[c] = -(a.s32[c] == b.s32[c]);
- return t;
-}
-
-SIMD_INLINE c_v128 c_v128_shl_n_byte(c_v128 a, const unsigned int n) {
- if (n < 8)
- return c_v128_from_v64(c_v64_or(c_v64_shl_n_byte(a.v64[1], n),
- c_v64_shr_n_byte(a.v64[0], 8 - n)),
- c_v64_shl_n_byte(a.v64[0], n));
- else
- return c_v128_from_v64(c_v64_shl_n_byte(a.v64[0], n - 8), c_v64_zero());
-}
-
-SIMD_INLINE c_v128 c_v128_shr_n_byte(c_v128 a, const unsigned int n) {
- if (n < 8)
- return c_v128_from_v64(c_v64_shr_n_byte(a.v64[1], n),
- c_v64_or(c_v64_shr_n_byte(a.v64[0], n),
- c_v64_shl_n_byte(a.v64[1], 8 - n)));
- else
- return c_v128_from_v64(c_v64_zero(), c_v64_shr_n_byte(a.v64[1], n - 8));
-}
-
-SIMD_INLINE c_v128 c_v128_align(c_v128 a, c_v128 b, const unsigned int c) {
- if (SIMD_CHECK && c > 15) {
- fprintf(stderr, "Error: undefined alignment %d\n", c);
- abort();
- }
- return c ? c_v128_or(c_v128_shr_n_byte(b, c), c_v128_shl_n_byte(a, 16 - c))
- : b;
-}
-
-SIMD_INLINE c_v128 c_v128_shl_8(c_v128 a, const unsigned int c) {
- return c_v128_from_v64(c_v64_shl_8(a.v64[1], c), c_v64_shl_8(a.v64[0], c));
-}
-
-SIMD_INLINE c_v128 c_v128_shr_u8(c_v128 a, const unsigned int c) {
- return c_v128_from_v64(c_v64_shr_u8(a.v64[1], c), c_v64_shr_u8(a.v64[0], c));
-}
-
-SIMD_INLINE c_v128 c_v128_shr_s8(c_v128 a, const unsigned int c) {
- return c_v128_from_v64(c_v64_shr_s8(a.v64[1], c), c_v64_shr_s8(a.v64[0], c));
-}
-
-SIMD_INLINE c_v128 c_v128_shl_16(c_v128 a, const unsigned int c) {
- return c_v128_from_v64(c_v64_shl_16(a.v64[1], c), c_v64_shl_16(a.v64[0], c));
-}
-
-SIMD_INLINE c_v128 c_v128_shr_u16(c_v128 a, const unsigned int c) {
- return c_v128_from_v64(c_v64_shr_u16(a.v64[1], c),
- c_v64_shr_u16(a.v64[0], c));
-}
-
-SIMD_INLINE c_v128 c_v128_shr_s16(c_v128 a, const unsigned int c) {
- return c_v128_from_v64(c_v64_shr_s16(a.v64[1], c),
- c_v64_shr_s16(a.v64[0], c));
-}
-
-SIMD_INLINE c_v128 c_v128_shl_32(c_v128 a, const unsigned int c) {
- return c_v128_from_v64(c_v64_shl_32(a.v64[1], c), c_v64_shl_32(a.v64[0], c));
-}
-
-SIMD_INLINE c_v128 c_v128_shr_u32(c_v128 a, const unsigned int c) {
- return c_v128_from_v64(c_v64_shr_u32(a.v64[1], c),
- c_v64_shr_u32(a.v64[0], c));
-}
-
-SIMD_INLINE c_v128 c_v128_shr_s32(c_v128 a, const unsigned int c) {
- return c_v128_from_v64(c_v64_shr_s32(a.v64[1], c),
- c_v64_shr_s32(a.v64[0], c));
-}
-
-SIMD_INLINE c_v128 c_v128_shl_64(c_v128 a, const unsigned int c) {
- a.v64[1].u64 <<= c;
- a.v64[0].u64 <<= c;
- return c_v128_from_v64(a.v64[1], a.v64[0]);
-}
-
-SIMD_INLINE c_v128 c_v128_shr_u64(c_v128 a, const unsigned int c) {
- a.v64[1].u64 >>= c;
- a.v64[0].u64 >>= c;
- return c_v128_from_v64(a.v64[1], a.v64[0]);
-}
-
-SIMD_INLINE c_v128 c_v128_shr_s64(c_v128 a, const unsigned int c) {
- a.v64[1].s64 >>= c;
- a.v64[0].s64 >>= c;
- return c_v128_from_v64(a.v64[1], a.v64[0]);
-}
-
-SIMD_INLINE c_v128 c_v128_shl_n_8(c_v128 a, const unsigned int n) {
- return c_v128_shl_8(a, n);
-}
-
-SIMD_INLINE c_v128 c_v128_shl_n_16(c_v128 a, const unsigned int n) {
- return c_v128_shl_16(a, n);
-}
-
-SIMD_INLINE c_v128 c_v128_shl_n_32(c_v128 a, const unsigned int n) {
- return c_v128_shl_32(a, n);
-}
-
-SIMD_INLINE c_v128 c_v128_shl_n_64(c_v128 a, const unsigned int n) {
- return c_v128_shl_64(a, n);
-}
-
-SIMD_INLINE c_v128 c_v128_shr_n_u8(c_v128 a, const unsigned int n) {
- return c_v128_shr_u8(a, n);
-}
-
-SIMD_INLINE c_v128 c_v128_shr_n_u16(c_v128 a, const unsigned int n) {
- return c_v128_shr_u16(a, n);
-}
-
-SIMD_INLINE c_v128 c_v128_shr_n_u32(c_v128 a, const unsigned int n) {
- return c_v128_shr_u32(a, n);
-}
-
-SIMD_INLINE c_v128 c_v128_shr_n_u64(c_v128 a, const unsigned int n) {
- return c_v128_shr_u64(a, n);
-}
-
-SIMD_INLINE c_v128 c_v128_shr_n_s8(c_v128 a, const unsigned int n) {
- return c_v128_shr_s8(a, n);
-}
-
-SIMD_INLINE c_v128 c_v128_shr_n_s16(c_v128 a, const unsigned int n) {
- return c_v128_shr_s16(a, n);
-}
-
-SIMD_INLINE c_v128 c_v128_shr_n_s32(c_v128 a, const unsigned int n) {
- return c_v128_shr_s32(a, n);
-}
-
-SIMD_INLINE c_v128 c_v128_shr_n_s64(c_v128 a, const unsigned int n) {
- return c_v128_shr_s64(a, n);
-}
-
-typedef uint32_t c_sad128_internal_u16;
-
-SIMD_INLINE c_sad128_internal_u16 c_v128_sad_u16_init() { return 0; }
-
-/* Implementation dependent return value. Result must be finalised with
- * v128_sad_u16_sum(). */
-SIMD_INLINE c_sad128_internal_u16 c_v128_sad_u16(c_sad128_internal_u16 s,
- c_v128 a, c_v128 b) {
- int c;
- for (c = 0; c < 8; c++)
- s += a.u16[c] > b.u16[c] ? a.u16[c] - b.u16[c] : b.u16[c] - a.u16[c];
- return s;
-}
-
-SIMD_INLINE uint32_t c_v128_sad_u16_sum(c_sad128_internal_u16 s) { return s; }
-
-typedef uint64_t c_ssd128_internal_s16;
-
-SIMD_INLINE c_ssd128_internal_s16 c_v128_ssd_s16_init() { return 0; }
-
-/* Implementation dependent return value. Result must be finalised with
- * v128_ssd_s16_sum(). */
-SIMD_INLINE c_ssd128_internal_s16 c_v128_ssd_s16(c_ssd128_internal_s16 s,
- c_v128 a, c_v128 b) {
- int c;
- for (c = 0; c < 8; c++)
- s += (int32_t)(int16_t)(a.s16[c] - b.s16[c]) *
- (int32_t)(int16_t)(a.s16[c] - b.s16[c]);
- return s;
-}
-
-SIMD_INLINE uint64_t c_v128_ssd_s16_sum(c_ssd128_internal_s16 s) { return s; }
-
-#endif // AOM_AOM_DSP_SIMD_V128_INTRINSICS_C_H_
diff --git a/third_party/aom/aom_dsp/simd/v128_intrinsics_x86.h b/third_party/aom/aom_dsp/simd/v128_intrinsics_x86.h
deleted file mode 100644
index 6c7241ff4..000000000
--- a/third_party/aom/aom_dsp/simd/v128_intrinsics_x86.h
+++ /dev/null
@@ -1,656 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_SIMD_V128_INTRINSICS_X86_H_
-#define AOM_AOM_DSP_SIMD_V128_INTRINSICS_X86_H_
-
-#include <stdint.h>
-#include "aom_dsp/simd/v64_intrinsics_x86.h"
-
-typedef __m128i v128;
-
-SIMD_INLINE uint32_t v128_low_u32(v128 a) {
- return (uint32_t)_mm_cvtsi128_si32(a);
-}
-
-SIMD_INLINE v64 v128_low_v64(v128 a) {
- return _mm_unpacklo_epi64(a, v64_zero());
-}
-
-SIMD_INLINE v64 v128_high_v64(v128 a) { return _mm_srli_si128(a, 8); }
-
-SIMD_INLINE v128 v128_from_v64(v64 a, v64 b) {
- return _mm_unpacklo_epi64(b, a);
-}
-
-SIMD_INLINE v128 v128_from_64(uint64_t a, uint64_t b) {
- return v128_from_v64(v64_from_64(a), v64_from_64(b));
-}
-
-SIMD_INLINE v128 v128_from_32(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
- return _mm_set_epi32(a, b, c, d);
-}
-
-SIMD_INLINE v128 v128_load_aligned(const void *p) {
- return _mm_load_si128((__m128i *)p);
-}
-
-SIMD_INLINE v128 v128_load_unaligned(const void *p) {
-#if defined(__SSSE3__)
- return (__m128i)_mm_lddqu_si128((__m128i *)p);
-#else
- return _mm_loadu_si128((__m128i *)p);
-#endif
-}
-
-SIMD_INLINE void v128_store_aligned(void *p, v128 a) {
- _mm_store_si128((__m128i *)p, a);
-}
-
-SIMD_INLINE void v128_store_unaligned(void *p, v128 a) {
- _mm_storeu_si128((__m128i *)p, a);
-}
-
-// The following function requires an immediate.
-// Some compilers will check this during optimisation, others wont.
-#if defined(__OPTIMIZE__) && __OPTIMIZE__ && !defined(__clang__)
-#if defined(__SSSE3__)
-SIMD_INLINE v128 v128_align(v128 a, v128 b, const unsigned int c) {
- return c ? _mm_alignr_epi8(a, b, c) : b;
-}
-#else
-#define v128_align(a, b, c) \
- ((c) ? _mm_or_si128(_mm_srli_si128(b, c), _mm_slli_si128(a, 16 - (c))) : (b))
-#endif
-#else
-#if defined(__SSSE3__)
-#define v128_align(a, b, c) ((c) ? _mm_alignr_epi8(a, b, (uint8_t)(c)) : (b))
-#else
-#define v128_align(a, b, c) \
- ((c) ? _mm_or_si128(_mm_srli_si128(b, c), _mm_slli_si128(a, 16 - (c))) : (b))
-#endif
-#endif
-
-SIMD_INLINE v128 v128_zero() { return _mm_setzero_si128(); }
-
-SIMD_INLINE v128 v128_dup_8(uint8_t x) { return _mm_set1_epi8(x); }
-
-SIMD_INLINE v128 v128_dup_16(uint16_t x) { return _mm_set1_epi16(x); }
-
-SIMD_INLINE v128 v128_dup_32(uint32_t x) { return _mm_set1_epi32(x); }
-
-SIMD_INLINE v128 v128_dup_64(uint64_t x) {
- // _mm_set_pi64x and _mm_cvtsi64x_si64 missing in some compilers
- return _mm_set_epi32(x >> 32, (uint32_t)x, x >> 32, (uint32_t)x);
-}
-
-SIMD_INLINE v128 v128_add_8(v128 a, v128 b) { return _mm_add_epi8(a, b); }
-
-SIMD_INLINE v128 v128_add_16(v128 a, v128 b) { return _mm_add_epi16(a, b); }
-
-SIMD_INLINE v128 v128_sadd_u8(v128 a, v128 b) { return _mm_adds_epu8(a, b); }
-
-SIMD_INLINE v128 v128_sadd_s8(v128 a, v128 b) { return _mm_adds_epi8(a, b); }
-
-SIMD_INLINE v128 v128_sadd_s16(v128 a, v128 b) { return _mm_adds_epi16(a, b); }
-
-SIMD_INLINE v128 v128_add_32(v128 a, v128 b) { return _mm_add_epi32(a, b); }
-
-SIMD_INLINE v128 v128_add_64(v128 a, v128 b) { return _mm_add_epi64(a, b); }
-
-SIMD_INLINE v128 v128_padd_s16(v128 a) {
- return _mm_madd_epi16(a, _mm_set1_epi16(1));
-}
-
-SIMD_INLINE v128 v128_sub_8(v128 a, v128 b) { return _mm_sub_epi8(a, b); }
-
-SIMD_INLINE v128 v128_ssub_u8(v128 a, v128 b) { return _mm_subs_epu8(a, b); }
-
-SIMD_INLINE v128 v128_ssub_s8(v128 a, v128 b) { return _mm_subs_epi8(a, b); }
-
-SIMD_INLINE v128 v128_sub_16(v128 a, v128 b) { return _mm_sub_epi16(a, b); }
-
-SIMD_INLINE v128 v128_ssub_s16(v128 a, v128 b) { return _mm_subs_epi16(a, b); }
-
-SIMD_INLINE v128 v128_ssub_u16(v128 a, v128 b) { return _mm_subs_epu16(a, b); }
-
-SIMD_INLINE v128 v128_sub_32(v128 a, v128 b) { return _mm_sub_epi32(a, b); }
-
-SIMD_INLINE v128 v128_sub_64(v128 a, v128 b) { return _mm_sub_epi64(a, b); }
-
-SIMD_INLINE v128 v128_abs_s16(v128 a) {
-#if defined(__SSSE3__)
- return _mm_abs_epi16(a);
-#else
- return _mm_max_epi16(a, _mm_sub_epi16(_mm_setzero_si128(), a));
-#endif
-}
-
-SIMD_INLINE v128 v128_abs_s8(v128 a) {
-#if defined(__SSSE3__)
- return _mm_abs_epi8(a);
-#else
- v128 sign = _mm_cmplt_epi8(a, _mm_setzero_si128());
- return _mm_xor_si128(sign, _mm_add_epi8(a, sign));
-#endif
-}
-
-SIMD_INLINE v128 v128_ziplo_8(v128 a, v128 b) {
- return _mm_unpacklo_epi8(b, a);
-}
-
-SIMD_INLINE v128 v128_ziphi_8(v128 a, v128 b) {
- return _mm_unpackhi_epi8(b, a);
-}
-
-SIMD_INLINE v128 v128_ziplo_16(v128 a, v128 b) {
- return _mm_unpacklo_epi16(b, a);
-}
-
-SIMD_INLINE v128 v128_ziphi_16(v128 a, v128 b) {
- return _mm_unpackhi_epi16(b, a);
-}
-
-SIMD_INLINE v128 v128_ziplo_32(v128 a, v128 b) {
- return _mm_unpacklo_epi32(b, a);
-}
-
-SIMD_INLINE v128 v128_ziphi_32(v128 a, v128 b) {
- return _mm_unpackhi_epi32(b, a);
-}
-
-SIMD_INLINE v128 v128_ziplo_64(v128 a, v128 b) {
- return _mm_unpacklo_epi64(b, a);
-}
-
-SIMD_INLINE v128 v128_ziphi_64(v128 a, v128 b) {
- return _mm_unpackhi_epi64(b, a);
-}
-
-SIMD_INLINE v128 v128_zip_8(v64 a, v64 b) { return _mm_unpacklo_epi8(b, a); }
-
-SIMD_INLINE v128 v128_zip_16(v64 a, v64 b) { return _mm_unpacklo_epi16(b, a); }
-
-SIMD_INLINE v128 v128_zip_32(v64 a, v64 b) { return _mm_unpacklo_epi32(b, a); }
-
-SIMD_INLINE v128 v128_unziphi_8(v128 a, v128 b) {
- return _mm_packs_epi16(_mm_srai_epi16(b, 8), _mm_srai_epi16(a, 8));
-}
-
-SIMD_INLINE v128 v128_unziplo_8(v128 a, v128 b) {
-#if defined(__SSSE3__)
-#ifdef __x86_64__
- v128 order = _mm_cvtsi64_si128(0x0e0c0a0806040200LL);
-#else
- v128 order = _mm_set_epi32(0, 0, 0x0e0c0a08, 0x06040200);
-#endif
- return _mm_unpacklo_epi64(_mm_shuffle_epi8(b, order),
- _mm_shuffle_epi8(a, order));
-#else
- return v128_unziphi_8(_mm_slli_si128(a, 1), _mm_slli_si128(b, 1));
-#endif
-}
-
-SIMD_INLINE v128 v128_unziphi_16(v128 a, v128 b) {
- return _mm_packs_epi32(_mm_srai_epi32(b, 16), _mm_srai_epi32(a, 16));
-}
-
-SIMD_INLINE v128 v128_unziplo_16(v128 a, v128 b) {
-#if defined(__SSSE3__)
-#ifdef __x86_64__
- v128 order = _mm_cvtsi64_si128(0x0d0c090805040100LL);
-#else
- v128 order = _mm_set_epi32(0, 0, 0x0d0c0908, 0x05040100);
-#endif
- return _mm_unpacklo_epi64(_mm_shuffle_epi8(b, order),
- _mm_shuffle_epi8(a, order));
-#else
- return v128_unziphi_16(_mm_slli_si128(a, 2), _mm_slli_si128(b, 2));
-#endif
-}
-
-SIMD_INLINE v128 v128_unziphi_32(v128 a, v128 b) {
- return _mm_castps_si128(_mm_shuffle_ps(
- _mm_castsi128_ps(b), _mm_castsi128_ps(a), _MM_SHUFFLE(3, 1, 3, 1)));
-}
-
-SIMD_INLINE v128 v128_unziplo_32(v128 a, v128 b) {
- return _mm_castps_si128(_mm_shuffle_ps(
- _mm_castsi128_ps(b), _mm_castsi128_ps(a), _MM_SHUFFLE(2, 0, 2, 0)));
-}
-
-SIMD_INLINE v128 v128_unpack_u8_s16(v64 a) {
- return _mm_unpacklo_epi8(a, _mm_setzero_si128());
-}
-
-SIMD_INLINE v128 v128_unpacklo_u8_s16(v128 a) {
- return _mm_unpacklo_epi8(a, _mm_setzero_si128());
-}
-
-SIMD_INLINE v128 v128_unpackhi_u8_s16(v128 a) {
- return _mm_unpackhi_epi8(a, _mm_setzero_si128());
-}
-
-SIMD_INLINE v128 v128_unpack_s8_s16(v64 a) {
- return _mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8);
-}
-
-SIMD_INLINE v128 v128_unpacklo_s8_s16(v128 a) {
- return _mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8);
-}
-
-SIMD_INLINE v128 v128_unpackhi_s8_s16(v128 a) {
- return _mm_srai_epi16(_mm_unpackhi_epi8(a, a), 8);
-}
-
-SIMD_INLINE v128 v128_pack_s32_s16(v128 a, v128 b) {
- return _mm_packs_epi32(b, a);
-}
-
-SIMD_INLINE v128 v128_pack_s32_u16(v128 a, v128 b) {
-#if defined(__SSE4_1__)
- return _mm_packus_epi32(b, a);
-#else
- return v128_from_v64(v64_pack_s32_u16(v128_high_v64(a), v128_low_v64(a)),
- v64_pack_s32_u16(v128_high_v64(b), v128_low_v64(b)));
-#endif
-}
-
-SIMD_INLINE v128 v128_pack_s16_u8(v128 a, v128 b) {
- return _mm_packus_epi16(b, a);
-}
-
-SIMD_INLINE v128 v128_pack_s16_s8(v128 a, v128 b) {
- return _mm_packs_epi16(b, a);
-}
-
-SIMD_INLINE v128 v128_unpack_u16_s32(v64 a) {
- return _mm_unpacklo_epi16(a, _mm_setzero_si128());
-}
-
-SIMD_INLINE v128 v128_unpack_s16_s32(v64 a) {
- return _mm_srai_epi32(_mm_unpacklo_epi16(a, a), 16);
-}
-
-SIMD_INLINE v128 v128_unpacklo_u16_s32(v128 a) {
- return _mm_unpacklo_epi16(a, _mm_setzero_si128());
-}
-
-SIMD_INLINE v128 v128_unpacklo_s16_s32(v128 a) {
- return _mm_srai_epi32(_mm_unpacklo_epi16(a, a), 16);
-}
-
-SIMD_INLINE v128 v128_unpackhi_u16_s32(v128 a) {
- return _mm_unpackhi_epi16(a, _mm_setzero_si128());
-}
-
-SIMD_INLINE v128 v128_unpackhi_s16_s32(v128 a) {
- return _mm_srai_epi32(_mm_unpackhi_epi16(a, a), 16);
-}
-
-SIMD_INLINE v128 v128_shuffle_8(v128 x, v128 pattern) {
-#if defined(__SSSE3__)
- return _mm_shuffle_epi8(x, pattern);
-#else
- v128 output;
- unsigned char *input = (unsigned char *)&x;
- unsigned char *index = (unsigned char *)&pattern;
- char *selected = (char *)&output;
- int counter;
-
- for (counter = 0; counter < 16; counter++) {
- selected[counter] = input[index[counter] & 15];
- }
-
- return output;
-#endif
-}
-
-SIMD_INLINE int64_t v128_dotp_su8(v128 a, v128 b) {
- v128 t1 = _mm_madd_epi16(v128_unpackhi_s8_s16(a), v128_unpackhi_u8_s16(b));
- v128 t2 = _mm_madd_epi16(v128_unpacklo_s8_s16(a), v128_unpacklo_u8_s16(b));
- v128 t = v128_add_32(t1, t2);
- t = v128_add_32(t, _mm_srli_si128(t, 8));
- t = v128_add_32(t, _mm_srli_si128(t, 4));
- return (int32_t)v128_low_u32(t);
-}
-
-SIMD_INLINE int64_t v128_dotp_s16(v128 a, v128 b) {
- v128 r = _mm_madd_epi16(a, b);
-#if defined(__SSE4_1__) && defined(__x86_64__)
- v128 c = _mm_add_epi64(_mm_cvtepi32_epi64(r),
- _mm_cvtepi32_epi64(_mm_srli_si128(r, 8)));
- return _mm_cvtsi128_si64(_mm_add_epi64(c, _mm_srli_si128(c, 8)));
-#else
- return (int64_t)_mm_cvtsi128_si32(r) +
- (int64_t)_mm_cvtsi128_si32(_mm_srli_si128(r, 4)) +
- (int64_t)_mm_cvtsi128_si32(_mm_srli_si128(r, 8)) +
- (int64_t)_mm_cvtsi128_si32(_mm_srli_si128(r, 12));
-#endif
-}
-
-SIMD_INLINE uint64_t v128_hadd_u8(v128 a) {
- v128 t = _mm_sad_epu8(a, _mm_setzero_si128());
- return v64_low_u32(v128_low_v64(t)) + v64_low_u32(v128_high_v64(t));
-}
-
-typedef v128 sad128_internal;
-
-SIMD_INLINE sad128_internal v128_sad_u8_init() { return _mm_setzero_si128(); }
-
-/* Implementation dependent return value. Result must be finalised with
- v128_sad_sum().
- The result for more than 32 v128_sad_u8() calls is undefined. */
-SIMD_INLINE sad128_internal v128_sad_u8(sad128_internal s, v128 a, v128 b) {
- return _mm_add_epi64(s, _mm_sad_epu8(a, b));
-}
-
-SIMD_INLINE uint32_t v128_sad_u8_sum(sad128_internal s) {
- return v128_low_u32(_mm_add_epi32(s, _mm_unpackhi_epi64(s, s)));
-}
-
-typedef int32_t ssd128_internal;
-
-SIMD_INLINE ssd128_internal v128_ssd_u8_init() { return 0; }
-
-/* Implementation dependent return value. Result must be finalised with
- * v128_ssd_sum(). */
-SIMD_INLINE ssd128_internal v128_ssd_u8(ssd128_internal s, v128 a, v128 b) {
- v128 z = _mm_setzero_si128();
- v128 l = _mm_sub_epi16(_mm_unpacklo_epi8(a, z), _mm_unpacklo_epi8(b, z));
- v128 h = _mm_sub_epi16(_mm_unpackhi_epi8(a, z), _mm_unpackhi_epi8(b, z));
- v128 rl = _mm_madd_epi16(l, l);
- v128 rh = _mm_madd_epi16(h, h);
- v128 r = _mm_add_epi32(rl, rh);
- r = _mm_add_epi32(r, _mm_srli_si128(r, 8));
- r = _mm_add_epi32(r, _mm_srli_si128(r, 4));
- return s + _mm_cvtsi128_si32(r);
-}
-
-SIMD_INLINE int32_t v128_ssd_u8_sum(ssd128_internal s) { return s; }
-
-SIMD_INLINE v128 v128_or(v128 a, v128 b) { return _mm_or_si128(a, b); }
-
-SIMD_INLINE v128 v128_xor(v128 a, v128 b) { return _mm_xor_si128(a, b); }
-
-SIMD_INLINE v128 v128_and(v128 a, v128 b) { return _mm_and_si128(a, b); }
-
-SIMD_INLINE v128 v128_andn(v128 a, v128 b) { return _mm_andnot_si128(b, a); }
-
-SIMD_INLINE v128 v128_mul_s16(v64 a, v64 b) {
- v64 lo_bits = v64_mullo_s16(a, b);
- v64 hi_bits = v64_mulhi_s16(a, b);
- return v128_from_v64(v64_ziphi_16(hi_bits, lo_bits),
- v64_ziplo_16(hi_bits, lo_bits));
-}
-
-SIMD_INLINE v128 v128_mullo_s16(v128 a, v128 b) {
- return _mm_mullo_epi16(a, b);
-}
-
-SIMD_INLINE v128 v128_mulhi_s16(v128 a, v128 b) {
- return _mm_mulhi_epi16(a, b);
-}
-
-SIMD_INLINE v128 v128_mullo_s32(v128 a, v128 b) {
-#if defined(__SSE4_1__)
- return _mm_mullo_epi32(a, b);
-#else
- return _mm_unpacklo_epi32(
- _mm_shuffle_epi32(_mm_mul_epu32(a, b), 8),
- _mm_shuffle_epi32(
- _mm_mul_epu32(_mm_srli_si128(a, 4), _mm_srli_si128(b, 4)), 8));
-#endif
-}
-
-SIMD_INLINE int64_t v128_dotp_s32(v128 a, v128 b) {
- v128 r = v128_mullo_s32(a, b);
- return (int64_t)_mm_cvtsi128_si32(r) +
- (int64_t)_mm_cvtsi128_si32(_mm_srli_si128(r, 4)) +
- (int64_t)_mm_cvtsi128_si32(_mm_srli_si128(r, 8)) +
- (int64_t)_mm_cvtsi128_si32(_mm_srli_si128(r, 12));
-}
-
-SIMD_INLINE v128 v128_madd_s16(v128 a, v128 b) { return _mm_madd_epi16(a, b); }
-
-SIMD_INLINE v128 v128_madd_us8(v128 a, v128 b) {
-#if defined(__SSSE3__)
- return _mm_maddubs_epi16(a, b);
-#else
- return _mm_packs_epi32(
- _mm_madd_epi16(_mm_unpacklo_epi8(a, _mm_setzero_si128()),
- _mm_srai_epi16(_mm_unpacklo_epi8(b, b), 8)),
- _mm_madd_epi16(_mm_unpackhi_epi8(a, _mm_setzero_si128()),
- _mm_srai_epi16(_mm_unpackhi_epi8(b, b), 8)));
-#endif
-}
-
-SIMD_INLINE v128 v128_padd_u8(v128 a) {
- return v128_madd_us8(a, _mm_set1_epi8(1));
-}
-
-SIMD_INLINE v128 v128_avg_u8(v128 a, v128 b) { return _mm_avg_epu8(a, b); }
-
-SIMD_INLINE v128 v128_rdavg_u8(v128 a, v128 b) {
- return _mm_sub_epi8(_mm_avg_epu8(a, b),
- _mm_and_si128(_mm_xor_si128(a, b), v128_dup_8(1)));
-}
-
-SIMD_INLINE v128 v128_rdavg_u16(v128 a, v128 b) {
- return _mm_sub_epi16(_mm_avg_epu16(a, b),
- _mm_and_si128(_mm_xor_si128(a, b), v128_dup_16(1)));
-}
-
-SIMD_INLINE v128 v128_avg_u16(v128 a, v128 b) { return _mm_avg_epu16(a, b); }
-
-SIMD_INLINE v128 v128_min_u8(v128 a, v128 b) { return _mm_min_epu8(a, b); }
-
-SIMD_INLINE v128 v128_max_u8(v128 a, v128 b) { return _mm_max_epu8(a, b); }
-
-SIMD_INLINE v128 v128_min_s8(v128 a, v128 b) {
-#if defined(__SSE4_1__)
- return _mm_min_epi8(a, b);
-#else
- v128 mask = _mm_cmplt_epi8(a, b);
- return _mm_or_si128(_mm_andnot_si128(mask, b), _mm_and_si128(mask, a));
-#endif
-}
-
-SIMD_INLINE uint32_t v128_movemask_8(v128 a) { return _mm_movemask_epi8(a); }
-
-SIMD_INLINE v128 v128_blend_8(v128 a, v128 b, v128 c) {
-#if defined(__SSE4_1__)
- return _mm_blendv_epi8(a, b, c);
-#else
- c = _mm_cmplt_epi8(c, v128_zero());
- return v128_or(v128_and(b, c), v128_andn(a, c));
-#endif
-}
-
-SIMD_INLINE v128 v128_max_s8(v128 a, v128 b) {
-#if defined(__SSE4_1__)
- return _mm_max_epi8(a, b);
-#else
- v128 mask = _mm_cmplt_epi8(b, a);
- return _mm_or_si128(_mm_andnot_si128(mask, b), _mm_and_si128(mask, a));
-#endif
-}
-
-SIMD_INLINE v128 v128_min_s16(v128 a, v128 b) { return _mm_min_epi16(a, b); }
-
-SIMD_INLINE v128 v128_max_s16(v128 a, v128 b) { return _mm_max_epi16(a, b); }
-
-SIMD_INLINE v128 v128_min_s32(v128 a, v128 b) {
-#if defined(__SSE4_1__)
- return _mm_min_epi32(a, b);
-#else
- v128 mask = _mm_cmplt_epi32(a, b);
- return _mm_or_si128(_mm_andnot_si128(mask, b), _mm_and_si128(mask, a));
-#endif
-}
-
-SIMD_INLINE v128 v128_max_s32(v128 a, v128 b) {
-#if defined(__SSE4_1__)
- return _mm_max_epi32(a, b);
-#else
- v128 mask = _mm_cmplt_epi32(b, a);
- return _mm_or_si128(_mm_andnot_si128(mask, b), _mm_and_si128(mask, a));
-#endif
-}
-
-SIMD_INLINE v128 v128_cmpgt_s8(v128 a, v128 b) { return _mm_cmpgt_epi8(a, b); }
-
-SIMD_INLINE v128 v128_cmplt_s8(v128 a, v128 b) { return _mm_cmplt_epi8(a, b); }
-
-SIMD_INLINE v128 v128_cmpeq_8(v128 a, v128 b) { return _mm_cmpeq_epi8(a, b); }
-
-SIMD_INLINE v128 v128_cmpgt_s16(v128 a, v128 b) {
- return _mm_cmpgt_epi16(a, b);
-}
-
-SIMD_INLINE v128 v128_cmplt_s16(v128 a, v128 b) {
- return _mm_cmplt_epi16(a, b);
-}
-
-SIMD_INLINE v128 v128_cmpeq_32(v128 a, v128 b) { return _mm_cmpeq_epi32(a, b); }
-
-SIMD_INLINE v128 v128_cmpgt_s32(v128 a, v128 b) {
- return _mm_cmpgt_epi32(a, b);
-}
-
-SIMD_INLINE v128 v128_cmplt_s32(v128 a, v128 b) {
- return _mm_cmplt_epi32(a, b);
-}
-
-SIMD_INLINE v128 v128_cmpeq_16(v128 a, v128 b) { return _mm_cmpeq_epi16(a, b); }
-
-SIMD_INLINE v128 v128_shl_8(v128 a, unsigned int c) {
- return _mm_and_si128(_mm_set1_epi8((uint8_t)(0xff << c)),
- _mm_sll_epi16(a, _mm_cvtsi32_si128(c)));
-}
-
-SIMD_INLINE v128 v128_shr_u8(v128 a, unsigned int c) {
- return _mm_and_si128(_mm_set1_epi8(0xff >> c),
- _mm_srl_epi16(a, _mm_cvtsi32_si128(c)));
-}
-
-SIMD_INLINE v128 v128_shr_s8(v128 a, unsigned int c) {
- __m128i x = _mm_cvtsi32_si128(c + 8);
- return _mm_packs_epi16(_mm_sra_epi16(_mm_unpacklo_epi8(a, a), x),
- _mm_sra_epi16(_mm_unpackhi_epi8(a, a), x));
-}
-
-SIMD_INLINE v128 v128_shl_16(v128 a, unsigned int c) {
- return _mm_sll_epi16(a, _mm_cvtsi32_si128(c));
-}
-
-SIMD_INLINE v128 v128_shr_u16(v128 a, unsigned int c) {
- return _mm_srl_epi16(a, _mm_cvtsi32_si128(c));
-}
-
-SIMD_INLINE v128 v128_shr_s16(v128 a, unsigned int c) {
- return _mm_sra_epi16(a, _mm_cvtsi32_si128(c));
-}
-
-SIMD_INLINE v128 v128_shl_32(v128 a, unsigned int c) {
- return _mm_sll_epi32(a, _mm_cvtsi32_si128(c));
-}
-
-SIMD_INLINE v128 v128_shr_u32(v128 a, unsigned int c) {
- return _mm_srl_epi32(a, _mm_cvtsi32_si128(c));
-}
-
-SIMD_INLINE v128 v128_shr_s32(v128 a, unsigned int c) {
- return _mm_sra_epi32(a, _mm_cvtsi32_si128(c));
-}
-
-SIMD_INLINE v128 v128_shl_64(v128 a, unsigned int c) {
- return _mm_sll_epi64(a, _mm_cvtsi32_si128(c));
-}
-
-SIMD_INLINE v128 v128_shr_u64(v128 a, unsigned int c) {
- return _mm_srl_epi64(a, _mm_cvtsi32_si128(c));
-}
-
-SIMD_INLINE v128 v128_shr_s64(v128 a, unsigned int c) {
- // _mm_sra_epi64 is missing in gcc?
- return v128_from_64((int64_t)v64_u64(v128_high_v64(a)) >> c,
- (int64_t)v64_u64(v128_low_v64(a)) >> c);
- // return _mm_sra_epi64(a, _mm_cvtsi32_si128(c));
-}
-
-/* These intrinsics require immediate values, so we must use #defines
- to enforce that. */
-#define v128_shl_n_byte(a, c) _mm_slli_si128(a, (c)&127)
-#define v128_shr_n_byte(a, c) _mm_srli_si128(a, (c)&127)
-#define v128_shl_n_8(a, c) \
- _mm_and_si128(_mm_set1_epi8((uint8_t)(0xff << (c))), _mm_slli_epi16(a, c))
-#define v128_shr_n_u8(a, c) \
- _mm_and_si128(_mm_set1_epi8(0xff >> (c)), _mm_srli_epi16(a, c))
-#define v128_shr_n_s8(a, c) \
- _mm_packs_epi16(_mm_srai_epi16(_mm_unpacklo_epi8(a, a), (c) + 8), \
- _mm_srai_epi16(_mm_unpackhi_epi8(a, a), (c) + 8))
-#define v128_shl_n_16(a, c) _mm_slli_epi16(a, c)
-#define v128_shr_n_u16(a, c) _mm_srli_epi16(a, c)
-#define v128_shr_n_s16(a, c) _mm_srai_epi16(a, c)
-#define v128_shl_n_32(a, c) _mm_slli_epi32(a, c)
-#define v128_shr_n_u32(a, c) _mm_srli_epi32(a, c)
-#define v128_shr_n_s32(a, c) _mm_srai_epi32(a, c)
-#define v128_shl_n_64(a, c) _mm_slli_epi64(a, c)
-#define v128_shr_n_u64(a, c) _mm_srli_epi64(a, c)
-#define v128_shr_n_s64(a, c) \
- v128_shr_s64(a, c) // _mm_srai_epi64 missing in gcc?
-
-typedef v128 sad128_internal_u16;
-
-SIMD_INLINE sad128_internal_u16 v128_sad_u16_init() { return v128_zero(); }
-
-/* Implementation dependent return value. Result must be finalised with
- * v128_sad_u16_sum(). */
-SIMD_INLINE sad128_internal_u16 v128_sad_u16(sad128_internal_u16 s, v128 a,
- v128 b) {
-#if defined(__SSE4_1__)
- v128 t = v128_sub_16(_mm_max_epu16(a, b), _mm_min_epu16(a, b));
-#else
- v128 t = v128_cmplt_s16(v128_xor(a, v128_dup_16(32768)),
- v128_xor(b, v128_dup_16(32768)));
- t = v128_sub_16(v128_or(v128_and(b, t), v128_andn(a, t)),
- v128_or(v128_and(a, t), v128_andn(b, t)));
-#endif
- return v128_add_32(
- s, v128_add_32(v128_unpackhi_u16_s32(t), v128_unpacklo_u16_s32(t)));
-}
-
-SIMD_INLINE uint32_t v128_sad_u16_sum(sad128_internal_u16 s) {
- return v128_low_u32(s) + v128_low_u32(v128_shr_n_byte(s, 4)) +
- v128_low_u32(v128_shr_n_byte(s, 8)) +
- v128_low_u32(v128_shr_n_byte(s, 12));
-}
-
-typedef v128 ssd128_internal_s16;
-
-SIMD_INLINE ssd128_internal_s16 v128_ssd_s16_init() { return v128_zero(); }
-
-/* Implementation dependent return value. Result must be finalised with
- * v128_ssd_s16_sum(). */
-SIMD_INLINE ssd128_internal_s16 v128_ssd_s16(ssd128_internal_s16 s, v128 a,
- v128 b) {
- v128 d = v128_sub_16(a, b);
- d = v128_madd_s16(d, d);
- return v128_add_64(s, v128_add_64(_mm_unpackhi_epi32(d, v128_zero()),
- _mm_unpacklo_epi32(d, v128_zero())));
-}
-
-SIMD_INLINE uint64_t v128_ssd_s16_sum(ssd128_internal_s16 s) {
- return v64_u64(v128_low_v64(s)) + v64_u64(v128_high_v64(s));
-}
-
-#endif // AOM_AOM_DSP_SIMD_V128_INTRINSICS_X86_H_
diff --git a/third_party/aom/aom_dsp/simd/v256_intrinsics.h b/third_party/aom/aom_dsp/simd/v256_intrinsics.h
deleted file mode 100644
index cb99d35b7..000000000
--- a/third_party/aom/aom_dsp/simd/v256_intrinsics.h
+++ /dev/null
@@ -1,376 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_SIMD_V256_INTRINSICS_H_
-#define AOM_AOM_DSP_SIMD_V256_INTRINSICS_H_
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "aom_dsp/simd/v256_intrinsics_c.h"
-#include "aom_dsp/simd/v128_intrinsics.h"
-#include "aom_dsp/simd/v64_intrinsics.h"
-
-/* Fallback to plain, unoptimised C. */
-
-typedef c_v256 v256;
-
-SIMD_INLINE uint32_t v256_low_u32(v256 a) { return c_v256_low_u32(a); }
-SIMD_INLINE v64 v256_low_v64(v256 a) { return c_v256_low_v64(a); }
-SIMD_INLINE uint64_t v256_low_u64(v256 a) { return c_v256_low_u64(a); }
-SIMD_INLINE v128 v256_low_v128(v256 a) { return c_v256_low_v128(a); }
-SIMD_INLINE v128 v256_high_v128(v256 a) { return c_v256_high_v128(a); }
-SIMD_INLINE v256 v256_from_v128(v128 hi, v128 lo) {
- return c_v256_from_v128(hi, lo);
-}
-SIMD_INLINE v256 v256_from_64(uint64_t a, uint64_t b, uint64_t c, uint64_t d) {
- return c_v256_from_64(a, b, c, d);
-}
-SIMD_INLINE v256 v256_from_v64(v64 a, v64 b, v64 c, v64 d) {
- return c_v256_from_v64(a, b, c, d);
-}
-
-SIMD_INLINE v256 v256_load_unaligned(const void *p) {
- return c_v256_load_unaligned(p);
-}
-SIMD_INLINE v256 v256_load_aligned(const void *p) {
- return c_v256_load_aligned(p);
-}
-
-SIMD_INLINE void v256_store_unaligned(void *p, v256 a) {
- c_v256_store_unaligned(p, a);
-}
-SIMD_INLINE void v256_store_aligned(void *p, v256 a) {
- c_v256_store_aligned(p, a);
-}
-
-SIMD_INLINE v256 v256_align(v256 a, v256 b, unsigned int c) {
- return c_v256_align(a, b, c);
-}
-
-SIMD_INLINE v256 v256_zero() { return c_v256_zero(); }
-SIMD_INLINE v256 v256_dup_8(uint8_t x) { return c_v256_dup_8(x); }
-SIMD_INLINE v256 v256_dup_16(uint16_t x) { return c_v256_dup_16(x); }
-SIMD_INLINE v256 v256_dup_32(uint32_t x) { return c_v256_dup_32(x); }
-SIMD_INLINE v256 v256_dup_64(uint64_t x) { return c_v256_dup_64(x); }
-
-typedef uint32_t sad256_internal;
-SIMD_INLINE sad256_internal v256_sad_u8_init() { return c_v256_sad_u8_init(); }
-SIMD_INLINE sad256_internal v256_sad_u8(sad256_internal s, v256 a, v256 b) {
- return c_v256_sad_u8(s, a, b);
-}
-SIMD_INLINE uint32_t v256_sad_u8_sum(sad256_internal s) {
- return c_v256_sad_u8_sum(s);
-}
-typedef uint32_t ssd256_internal;
-SIMD_INLINE ssd256_internal v256_ssd_u8_init() { return c_v256_ssd_u8_init(); }
-SIMD_INLINE ssd256_internal v256_ssd_u8(ssd256_internal s, v256 a, v256 b) {
- return c_v256_ssd_u8(s, a, b);
-}
-SIMD_INLINE uint32_t v256_ssd_u8_sum(ssd256_internal s) {
- return c_v256_ssd_u8_sum(s);
-}
-
-SIMD_INLINE int64_t v256_dotp_su8(v256 a, v256 b) {
- return c_v256_dotp_su8(a, b);
-}
-SIMD_INLINE int64_t v256_dotp_s16(v256 a, v256 b) {
- return c_v256_dotp_s16(a, b);
-}
-SIMD_INLINE int64_t v256_dotp_s32(v256 a, v256 b) {
- return c_v256_dotp_s32(a, b);
-}
-SIMD_INLINE uint64_t v256_hadd_u8(v256 a) { return c_v256_hadd_u8(a); }
-
-SIMD_INLINE v256 v256_or(v256 a, v256 b) { return c_v256_or(a, b); }
-SIMD_INLINE v256 v256_xor(v256 a, v256 b) { return c_v256_xor(a, b); }
-SIMD_INLINE v256 v256_and(v256 a, v256 b) { return c_v256_and(a, b); }
-SIMD_INLINE v256 v256_andn(v256 a, v256 b) { return c_v256_andn(a, b); }
-
-SIMD_INLINE v256 v256_add_8(v256 a, v256 b) { return c_v256_add_8(a, b); }
-SIMD_INLINE v256 v256_add_16(v256 a, v256 b) { return c_v256_add_16(a, b); }
-SIMD_INLINE v256 v256_sadd_s8(v256 a, v256 b) { return c_v256_sadd_s8(a, b); }
-SIMD_INLINE v256 v256_sadd_u8(v256 a, v256 b) { return c_v256_sadd_u8(a, b); }
-SIMD_INLINE v256 v256_sadd_s16(v256 a, v256 b) { return c_v256_sadd_s16(a, b); }
-SIMD_INLINE v256 v256_add_32(v256 a, v256 b) { return c_v256_add_32(a, b); }
-SIMD_INLINE v256 v256_add_64(v256 a, v256 b) { return c_v256_add_64(a, b); }
-SIMD_INLINE v256 v256_sub_64(v256 a, v256 b) { return c_v256_sub_64(a, b); }
-SIMD_INLINE v256 v256_padd_u8(v256 a) { return c_v256_padd_u8(a); }
-SIMD_INLINE v256 v256_padd_s16(v256 a) { return c_v256_padd_s16(a); }
-SIMD_INLINE v256 v256_sub_8(v256 a, v256 b) { return c_v256_sub_8(a, b); }
-SIMD_INLINE v256 v256_ssub_u8(v256 a, v256 b) { return c_v256_ssub_u8(a, b); }
-SIMD_INLINE v256 v256_ssub_s8(v256 a, v256 b) { return c_v256_ssub_s8(a, b); }
-SIMD_INLINE v256 v256_sub_16(v256 a, v256 b) { return c_v256_sub_16(a, b); }
-SIMD_INLINE v256 v256_ssub_s16(v256 a, v256 b) { return c_v256_ssub_s16(a, b); }
-SIMD_INLINE v256 v256_ssub_u16(v256 a, v256 b) { return c_v256_ssub_u16(a, b); }
-SIMD_INLINE v256 v256_sub_32(v256 a, v256 b) { return c_v256_sub_32(a, b); }
-SIMD_INLINE v256 v256_abs_s16(v256 a) { return c_v256_abs_s16(a); }
-SIMD_INLINE v256 v256_abs_s8(v256 a) { return c_v256_abs_s8(a); }
-
-SIMD_INLINE v256 v256_mul_s16(v128 a, v128 b) { return c_v256_mul_s16(a, b); }
-SIMD_INLINE v256 v256_mullo_s16(v256 a, v256 b) {
- return c_v256_mullo_s16(a, b);
-}
-SIMD_INLINE v256 v256_mulhi_s16(v256 a, v256 b) {
- return c_v256_mulhi_s16(a, b);
-}
-SIMD_INLINE v256 v256_mullo_s32(v256 a, v256 b) {
- return c_v256_mullo_s32(a, b);
-}
-SIMD_INLINE v256 v256_madd_s16(v256 a, v256 b) { return c_v256_madd_s16(a, b); }
-SIMD_INLINE v256 v256_madd_us8(v256 a, v256 b) { return c_v256_madd_us8(a, b); }
-
-SIMD_INLINE uint32_t v256_movemask_8(v256 a) { return c_v256_movemask_8(a); }
-SIMD_INLINE v256 v256_blend_8(v256 a, v256 b, v256 c) {
- return c_v256_blend_8(a, b, c);
-}
-
-SIMD_INLINE v256 v256_avg_u8(v256 a, v256 b) { return c_v256_avg_u8(a, b); }
-SIMD_INLINE v256 v256_rdavg_u8(v256 a, v256 b) { return c_v256_rdavg_u8(a, b); }
-SIMD_INLINE v256 v256_rdavg_u16(v256 a, v256 b) {
- return c_v256_rdavg_u16(a, b);
-}
-SIMD_INLINE v256 v256_avg_u16(v256 a, v256 b) { return c_v256_avg_u16(a, b); }
-SIMD_INLINE v256 v256_min_u8(v256 a, v256 b) { return c_v256_min_u8(a, b); }
-SIMD_INLINE v256 v256_max_u8(v256 a, v256 b) { return c_v256_max_u8(a, b); }
-SIMD_INLINE v256 v256_min_s8(v256 a, v256 b) { return c_v256_min_s8(a, b); }
-SIMD_INLINE v256 v256_max_s8(v256 a, v256 b) { return c_v256_max_s8(a, b); }
-SIMD_INLINE v256 v256_min_s16(v256 a, v256 b) { return c_v256_min_s16(a, b); }
-SIMD_INLINE v256 v256_max_s16(v256 a, v256 b) { return c_v256_max_s16(a, b); }
-SIMD_INLINE v256 v256_min_s32(v256 a, v256 b) { return c_v256_min_s32(a, b); }
-SIMD_INLINE v256 v256_max_s32(v256 a, v256 b) { return c_v256_max_s32(a, b); }
-
-SIMD_INLINE v256 v256_ziplo_8(v256 a, v256 b) { return c_v256_ziplo_8(a, b); }
-SIMD_INLINE v256 v256_ziphi_8(v256 a, v256 b) { return c_v256_ziphi_8(a, b); }
-SIMD_INLINE v256 v256_ziplo_16(v256 a, v256 b) { return c_v256_ziplo_16(a, b); }
-SIMD_INLINE v256 v256_ziphi_16(v256 a, v256 b) { return c_v256_ziphi_16(a, b); }
-SIMD_INLINE v256 v256_ziplo_32(v256 a, v256 b) { return c_v256_ziplo_32(a, b); }
-SIMD_INLINE v256 v256_ziphi_32(v256 a, v256 b) { return c_v256_ziphi_32(a, b); }
-SIMD_INLINE v256 v256_ziplo_64(v256 a, v256 b) { return c_v256_ziplo_64(a, b); }
-SIMD_INLINE v256 v256_ziphi_64(v256 a, v256 b) { return c_v256_ziphi_64(a, b); }
-SIMD_INLINE v256 v256_ziplo_128(v256 a, v256 b) {
- return c_v256_ziplo_128(a, b);
-}
-SIMD_INLINE v256 v256_ziphi_128(v256 a, v256 b) {
- return c_v256_ziphi_128(a, b);
-}
-SIMD_INLINE v256 v256_zip_8(v128 a, v128 b) { return c_v256_zip_8(a, b); }
-SIMD_INLINE v256 v256_zip_16(v128 a, v128 b) { return c_v256_zip_16(a, b); }
-SIMD_INLINE v256 v256_zip_32(v128 a, v128 b) { return c_v256_zip_32(a, b); }
-SIMD_INLINE v256 v256_unziplo_8(v256 a, v256 b) {
- return c_v256_unziplo_8(a, b);
-}
-SIMD_INLINE v256 v256_unziphi_8(v256 a, v256 b) {
- return c_v256_unziphi_8(a, b);
-}
-SIMD_INLINE v256 v256_unziplo_16(v256 a, v256 b) {
- return c_v256_unziplo_16(a, b);
-}
-SIMD_INLINE v256 v256_unziphi_16(v256 a, v256 b) {
- return c_v256_unziphi_16(a, b);
-}
-SIMD_INLINE v256 v256_unziplo_32(v256 a, v256 b) {
- return c_v256_unziplo_32(a, b);
-}
-SIMD_INLINE v256 v256_unziphi_32(v256 a, v256 b) {
- return c_v256_unziphi_32(a, b);
-}
-SIMD_INLINE v256 v256_unziplo_64(v256 a, v256 b) {
- return c_v256_unziplo_64(a, b);
-}
-SIMD_INLINE v256 v256_unziphi_64(v256 a, v256 b) {
- return c_v256_unziphi_64(a, b);
-}
-SIMD_INLINE v256 v256_unpack_u8_s16(v128 a) { return c_v256_unpack_u8_s16(a); }
-SIMD_INLINE v256 v256_unpacklo_u8_s16(v256 a) {
- return c_v256_unpacklo_u8_s16(a);
-}
-SIMD_INLINE v256 v256_unpackhi_u8_s16(v256 a) {
- return c_v256_unpackhi_u8_s16(a);
-}
-SIMD_INLINE v256 v256_unpack_s8_s16(v128 a) { return c_v256_unpack_s8_s16(a); }
-SIMD_INLINE v256 v256_unpacklo_s8_s16(v256 a) {
- return c_v256_unpacklo_s8_s16(a);
-}
-SIMD_INLINE v256 v256_unpackhi_s8_s16(v256 a) {
- return c_v256_unpackhi_s8_s16(a);
-}
-SIMD_INLINE v256 v256_pack_s32_s16(v256 a, v256 b) {
- return c_v256_pack_s32_s16(a, b);
-}
-SIMD_INLINE v256 v256_pack_s32_u16(v256 a, v256 b) {
- return c_v256_pack_s32_u16(a, b);
-}
-SIMD_INLINE v256 v256_pack_s16_u8(v256 a, v256 b) {
- return c_v256_pack_s16_u8(a, b);
-}
-SIMD_INLINE v256 v256_pack_s16_s8(v256 a, v256 b) {
- return c_v256_pack_s16_s8(a, b);
-}
-SIMD_INLINE v256 v256_unpack_u16_s32(v128 a) {
- return c_v256_unpack_u16_s32(a);
-}
-SIMD_INLINE v256 v256_unpack_s16_s32(v128 a) {
- return c_v256_unpack_s16_s32(a);
-}
-SIMD_INLINE v256 v256_unpacklo_u16_s32(v256 a) {
- return c_v256_unpacklo_u16_s32(a);
-}
-SIMD_INLINE v256 v256_unpacklo_s16_s32(v256 a) {
- return c_v256_unpacklo_s16_s32(a);
-}
-SIMD_INLINE v256 v256_unpackhi_u16_s32(v256 a) {
- return c_v256_unpackhi_u16_s32(a);
-}
-SIMD_INLINE v256 v256_unpackhi_s16_s32(v256 a) {
- return c_v256_unpackhi_s16_s32(a);
-}
-SIMD_INLINE v256 v256_shuffle_8(v256 a, v256 pattern) {
- return c_v256_shuffle_8(a, pattern);
-}
-SIMD_INLINE v256 v256_wideshuffle_8(v256 a, v256 b, v256 pattern) {
- return c_v256_wideshuffle_8(a, b, pattern);
-}
-SIMD_INLINE v256 v256_pshuffle_8(v256 a, v256 pattern) {
- return c_v256_pshuffle_8(a, pattern);
-}
-
-SIMD_INLINE v256 v256_cmpgt_s8(v256 a, v256 b) { return c_v256_cmpgt_s8(a, b); }
-SIMD_INLINE v256 v256_cmplt_s8(v256 a, v256 b) { return c_v256_cmplt_s8(a, b); }
-SIMD_INLINE v256 v256_cmpeq_8(v256 a, v256 b) { return c_v256_cmpeq_8(a, b); }
-SIMD_INLINE v256 v256_cmpgt_s16(v256 a, v256 b) {
- return c_v256_cmpgt_s16(a, b);
-}
-SIMD_INLINE v256 v256_cmplt_s16(v256 a, v256 b) {
- return c_v256_cmplt_s16(a, b);
-}
-SIMD_INLINE v256 v256_cmpeq_16(v256 a, v256 b) { return c_v256_cmpeq_16(a, b); }
-SIMD_INLINE v256 v256_cmpeq_32(v256 a, v256 b) { return c_v256_cmpeq_32(a, b); }
-
-SIMD_INLINE v256 v256_cmpgt_s32(v256 a, v256 b) {
- return c_v256_cmpgt_s32(a, b);
-}
-SIMD_INLINE v256 v256_cmplt_s32(v256 a, v256 b) {
- return c_v256_cmplt_s32(a, b);
-}
-SIMD_INLINE v256 v256_shl_8(v256 a, unsigned int c) {
- return c_v256_shl_8(a, c);
-}
-SIMD_INLINE v256 v256_shr_u8(v256 a, unsigned int c) {
- return c_v256_shr_u8(a, c);
-}
-SIMD_INLINE v256 v256_shr_s8(v256 a, unsigned int c) {
- return c_v256_shr_s8(a, c);
-}
-SIMD_INLINE v256 v256_shl_16(v256 a, unsigned int c) {
- return c_v256_shl_16(a, c);
-}
-SIMD_INLINE v256 v256_shr_u16(v256 a, unsigned int c) {
- return c_v256_shr_u16(a, c);
-}
-SIMD_INLINE v256 v256_shr_s16(v256 a, unsigned int c) {
- return c_v256_shr_s16(a, c);
-}
-SIMD_INLINE v256 v256_shl_32(v256 a, unsigned int c) {
- return c_v256_shl_32(a, c);
-}
-SIMD_INLINE v256 v256_shr_u32(v256 a, unsigned int c) {
- return c_v256_shr_u32(a, c);
-}
-SIMD_INLINE v256 v256_shr_s32(v256 a, unsigned int c) {
- return c_v256_shr_s32(a, c);
-}
-SIMD_INLINE v256 v256_shl_64(v256 a, unsigned int c) {
- return c_v256_shl_64(a, c);
-}
-SIMD_INLINE v256 v256_shr_u64(v256 a, unsigned int c) {
- return c_v256_shr_u64(a, c);
-}
-SIMD_INLINE v256 v256_shr_s64(v256 a, unsigned int c) {
- return c_v256_shr_s64(a, c);
-}
-
-SIMD_INLINE v256 v256_shr_n_byte(v256 a, unsigned int n) {
- return c_v256_shr_n_byte(a, n);
-}
-SIMD_INLINE v256 v256_shl_n_byte(v256 a, unsigned int n) {
- return c_v256_shl_n_byte(a, n);
-}
-SIMD_INLINE v256 v256_shl_n_8(v256 a, unsigned int n) {
- return c_v256_shl_n_8(a, n);
-}
-SIMD_INLINE v256 v256_shl_n_16(v256 a, unsigned int n) {
- return c_v256_shl_n_16(a, n);
-}
-SIMD_INLINE v256 v256_shl_n_32(v256 a, unsigned int n) {
- return c_v256_shl_n_32(a, n);
-}
-SIMD_INLINE v256 v256_shl_n_64(v256 a, unsigned int n) {
- return c_v256_shl_n_64(a, n);
-}
-SIMD_INLINE v256 v256_shr_n_u8(v256 a, unsigned int n) {
- return c_v256_shr_n_u8(a, n);
-}
-SIMD_INLINE v256 v256_shr_n_u16(v256 a, unsigned int n) {
- return c_v256_shr_n_u16(a, n);
-}
-SIMD_INLINE v256 v256_shr_n_u32(v256 a, unsigned int n) {
- return c_v256_shr_n_u32(a, n);
-}
-SIMD_INLINE v256 v256_shr_n_u64(v256 a, unsigned int n) {
- return c_v256_shr_n_u64(a, n);
-}
-SIMD_INLINE v256 v256_shr_n_s8(v256 a, unsigned int n) {
- return c_v256_shr_n_s8(a, n);
-}
-SIMD_INLINE v256 v256_shr_n_s16(v256 a, unsigned int n) {
- return c_v256_shr_n_s16(a, n);
-}
-SIMD_INLINE v256 v256_shr_n_s32(v256 a, unsigned int n) {
- return c_v256_shr_n_s32(a, n);
-}
-SIMD_INLINE v256 v256_shr_n_s64(v256 a, unsigned int n) {
- return c_v256_shr_n_s64(a, n);
-}
-
-SIMD_INLINE v256 v256_shr_n_word(v256 a, unsigned int n) {
- return c_v256_shr_n_word(a, n);
-}
-SIMD_INLINE v256 v256_shl_n_word(v256 a, unsigned int n) {
- return c_v256_shl_n_word(a, n);
-}
-
-typedef uint32_t sad256_internal_u16;
-SIMD_INLINE sad256_internal_u16 v256_sad_u16_init() {
- return c_v256_sad_u16_init();
-}
-SIMD_INLINE sad256_internal_u16 v256_sad_u16(sad256_internal_u16 s, v256 a,
- v256 b) {
- return c_v256_sad_u16(s, a, b);
-}
-SIMD_INLINE uint32_t v256_sad_u16_sum(sad256_internal_u16 s) {
- return c_v256_sad_u16_sum(s);
-}
-
-typedef uint64_t ssd256_internal_s16;
-SIMD_INLINE ssd256_internal_s16 v256_ssd_s16_init() {
- return c_v256_ssd_s16_init();
-}
-SIMD_INLINE ssd256_internal_s16 v256_ssd_s16(ssd256_internal_s16 s, v256 a,
- v256 b) {
- return c_v256_ssd_s16(s, a, b);
-}
-SIMD_INLINE uint64_t v256_ssd_s16_sum(ssd256_internal_s16 s) {
- return c_v256_ssd_s16_sum(s);
-}
-
-#endif // AOM_AOM_DSP_SIMD_V256_INTRINSICS_H_
diff --git a/third_party/aom/aom_dsp/simd/v256_intrinsics_arm.h b/third_party/aom/aom_dsp/simd/v256_intrinsics_arm.h
deleted file mode 100644
index bd86ea172..000000000
--- a/third_party/aom/aom_dsp/simd/v256_intrinsics_arm.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_SIMD_V256_INTRINSICS_ARM_H_
-#define AOM_AOM_DSP_SIMD_V256_INTRINSICS_ARM_H_
-
-#include "aom_dsp/simd/v256_intrinsics_v128.h"
-
-#endif // AOM_AOM_DSP_SIMD_V256_INTRINSICS_ARM_H_
diff --git a/third_party/aom/aom_dsp/simd/v256_intrinsics_c.h b/third_party/aom/aom_dsp/simd/v256_intrinsics_c.h
deleted file mode 100644
index a1c08e95a..000000000
--- a/third_party/aom/aom_dsp/simd/v256_intrinsics_c.h
+++ /dev/null
@@ -1,953 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_SIMD_V256_INTRINSICS_C_H_
-#define AOM_AOM_DSP_SIMD_V256_INTRINSICS_C_H_
-
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "config/aom_config.h"
-
-#include "aom_dsp/simd/v128_intrinsics_c.h"
-
-typedef union {
- uint8_t u8[32];
- uint16_t u16[16];
- uint32_t u32[8];
- uint64_t u64[4];
- int8_t s8[32];
- int16_t s16[16];
- int32_t s32[8];
- int64_t s64[4];
- c_v64 v64[4];
- c_v128 v128[2];
-} c_v256;
-
-SIMD_INLINE uint32_t c_v256_low_u32(c_v256 a) { return a.u32[0]; }
-
-SIMD_INLINE c_v64 c_v256_low_v64(c_v256 a) { return a.v64[0]; }
-
-SIMD_INLINE uint64_t c_v256_low_u64(c_v256 a) { return a.u64[0]; }
-
-SIMD_INLINE c_v128 c_v256_low_v128(c_v256 a) { return a.v128[0]; }
-
-SIMD_INLINE c_v128 c_v256_high_v128(c_v256 a) { return a.v128[1]; }
-
-SIMD_INLINE c_v256 c_v256_from_v128(c_v128 hi, c_v128 lo) {
- c_v256 t;
- t.v128[1] = hi;
- t.v128[0] = lo;
- return t;
-}
-
-SIMD_INLINE c_v256 c_v256_from_64(uint64_t a, uint64_t b, uint64_t c,
- uint64_t d) {
- c_v256 t;
- t.u64[3] = a;
- t.u64[2] = b;
- t.u64[1] = c;
- t.u64[0] = d;
- return t;
-}
-
-SIMD_INLINE c_v256 c_v256_from_v64(c_v64 a, c_v64 b, c_v64 c, c_v64 d) {
- c_v256 t;
- t.u64[3] = a.u64;
- t.u64[2] = b.u64;
- t.u64[1] = c.u64;
- t.u64[0] = d.u64;
- return t;
-}
-
-SIMD_INLINE c_v256 c_v256_load_unaligned(const void *p) {
- c_v256 t;
- uint8_t *pp = (uint8_t *)p;
- uint8_t *q = (uint8_t *)&t;
- int c;
- for (c = 0; c < 32; c++) q[c] = pp[c];
- return t;
-}
-
-SIMD_INLINE c_v256 c_v256_load_aligned(const void *p) {
- if (SIMD_CHECK && (uintptr_t)p & 31) {
- fprintf(stderr, "Error: unaligned v256 load at %p\n", p);
- abort();
- }
- return c_v256_load_unaligned(p);
-}
-
-SIMD_INLINE void c_v256_store_unaligned(void *p, c_v256 a) {
- uint8_t *pp = (uint8_t *)p;
- uint8_t *q = (uint8_t *)&a;
- int c;
- for (c = 0; c < 32; c++) pp[c] = q[c];
-}
-
-SIMD_INLINE void c_v256_store_aligned(void *p, c_v256 a) {
- if (SIMD_CHECK && (uintptr_t)p & 31) {
- fprintf(stderr, "Error: unaligned v256 store at %p\n", p);
- abort();
- }
- c_v256_store_unaligned(p, a);
-}
-
-SIMD_INLINE c_v256 c_v256_zero() {
- c_v256 t;
- t.u64[3] = t.u64[2] = t.u64[1] = t.u64[0] = 0;
- return t;
-}
-
-SIMD_INLINE c_v256 c_v256_dup_8(uint8_t x) {
- c_v256 t;
- t.v64[3] = t.v64[2] = t.v64[1] = t.v64[0] = c_v64_dup_8(x);
- return t;
-}
-
-SIMD_INLINE c_v256 c_v256_dup_16(uint16_t x) {
- c_v256 t;
- t.v64[3] = t.v64[2] = t.v64[1] = t.v64[0] = c_v64_dup_16(x);
- return t;
-}
-
-SIMD_INLINE c_v256 c_v256_dup_32(uint32_t x) {
- c_v256 t;
- t.v64[3] = t.v64[2] = t.v64[1] = t.v64[0] = c_v64_dup_32(x);
- return t;
-}
-
-SIMD_INLINE c_v256 c_v256_dup_64(uint64_t x) {
- c_v256 t;
- t.u64[3] = t.u64[2] = t.u64[1] = t.u64[0] = x;
- return t;
-}
-
-SIMD_INLINE int64_t c_v256_dotp_su8(c_v256 a, c_v256 b) {
- return c_v128_dotp_su8(a.v128[1], b.v128[1]) +
- c_v128_dotp_su8(a.v128[0], b.v128[0]);
-}
-
-SIMD_INLINE int64_t c_v256_dotp_s16(c_v256 a, c_v256 b) {
- return c_v128_dotp_s16(a.v128[1], b.v128[1]) +
- c_v128_dotp_s16(a.v128[0], b.v128[0]);
-}
-
-SIMD_INLINE int64_t c_v256_dotp_s32(c_v256 a, c_v256 b) {
- return c_v128_dotp_s32(a.v128[1], b.v128[1]) +
- c_v128_dotp_s32(a.v128[0], b.v128[0]);
-}
-
-SIMD_INLINE uint64_t c_v256_hadd_u8(c_v256 a) {
- return c_v128_hadd_u8(a.v128[1]) + c_v128_hadd_u8(a.v128[0]);
-}
-
-typedef uint32_t c_sad256_internal;
-
-SIMD_INLINE c_sad256_internal c_v256_sad_u8_init() { return 0; }
-
-/* Implementation dependent return value. Result must be finalised with
- v256_sad_u8_sum().
- The result for more than 16 v256_sad_u8() calls is undefined. */
-SIMD_INLINE c_sad256_internal c_v256_sad_u8(c_sad256_internal s, c_v256 a,
- c_v256 b) {
- int c;
- for (c = 0; c < 32; c++)
- s += a.u8[c] > b.u8[c] ? a.u8[c] - b.u8[c] : b.u8[c] - a.u8[c];
- return s;
-}
-
-SIMD_INLINE uint32_t c_v256_sad_u8_sum(c_sad256_internal s) { return s; }
-
-typedef uint32_t c_ssd256_internal;
-
-SIMD_INLINE c_ssd256_internal c_v256_ssd_u8_init() { return 0; }
-
-/* Implementation dependent return value. Result must be finalised with
- * v256_ssd_u8_sum(). */
-SIMD_INLINE c_ssd256_internal c_v256_ssd_u8(c_ssd256_internal s, c_v256 a,
- c_v256 b) {
- int c;
- for (c = 0; c < 32; c++) s += (a.u8[c] - b.u8[c]) * (a.u8[c] - b.u8[c]);
- return s;
-}
-
-SIMD_INLINE uint32_t c_v256_ssd_u8_sum(c_ssd256_internal s) { return s; }
-
-SIMD_INLINE c_v256 c_v256_or(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_or(a.v128[1], b.v128[1]),
- c_v128_or(a.v128[0], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_xor(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_xor(a.v128[1], b.v128[1]),
- c_v128_xor(a.v128[0], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_and(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_and(a.v128[1], b.v128[1]),
- c_v128_and(a.v128[0], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_andn(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_andn(a.v128[1], b.v128[1]),
- c_v128_andn(a.v128[0], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_add_8(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_add_8(a.v128[1], b.v128[1]),
- c_v128_add_8(a.v128[0], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_add_16(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_add_16(a.v128[1], b.v128[1]),
- c_v128_add_16(a.v128[0], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_sadd_s8(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_sadd_s8(a.v128[1], b.v128[1]),
- c_v128_sadd_s8(a.v128[0], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_sadd_u8(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_sadd_u8(a.v128[1], b.v128[1]),
- c_v128_sadd_u8(a.v128[0], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_sadd_s16(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_sadd_s16(a.v128[1], b.v128[1]),
- c_v128_sadd_s16(a.v128[0], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_add_32(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_add_32(a.v128[1], b.v128[1]),
- c_v128_add_32(a.v128[0], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_add_64(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_add_64(a.v128[1], b.v128[1]),
- c_v128_add_64(a.v128[0], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_sub_64(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_sub_64(a.v128[1], b.v128[1]),
- c_v128_sub_64(a.v128[0], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_padd_u8(c_v256 a) {
- c_v256 t;
- for (int i = 0; i < 16; i++)
- t.u16[i] = (uint16_t)a.u8[i * 2] + (uint16_t)a.u8[i * 2 + 1];
- return t;
-}
-
-SIMD_INLINE c_v256 c_v256_padd_s16(c_v256 a) {
- c_v256 t;
- t.s32[0] = (int32_t)a.s16[0] + (int32_t)a.s16[1];
- t.s32[1] = (int32_t)a.s16[2] + (int32_t)a.s16[3];
- t.s32[2] = (int32_t)a.s16[4] + (int32_t)a.s16[5];
- t.s32[3] = (int32_t)a.s16[6] + (int32_t)a.s16[7];
- t.s32[4] = (int32_t)a.s16[8] + (int32_t)a.s16[9];
- t.s32[5] = (int32_t)a.s16[10] + (int32_t)a.s16[11];
- t.s32[6] = (int32_t)a.s16[12] + (int32_t)a.s16[13];
- t.s32[7] = (int32_t)a.s16[14] + (int32_t)a.s16[15];
- return t;
-}
-
-SIMD_INLINE c_v256 c_v256_sub_8(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_sub_8(a.v128[1], b.v128[1]),
- c_v128_sub_8(a.v128[0], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_ssub_u8(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_ssub_u8(a.v128[1], b.v128[1]),
- c_v128_ssub_u8(a.v128[0], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_ssub_s8(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_ssub_s8(a.v128[1], b.v128[1]),
- c_v128_ssub_s8(a.v128[0], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_sub_16(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_sub_16(a.v128[1], b.v128[1]),
- c_v128_sub_16(a.v128[0], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_ssub_s16(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_ssub_s16(a.v128[1], b.v128[1]),
- c_v128_ssub_s16(a.v128[0], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_ssub_u16(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_ssub_u16(a.v128[1], b.v128[1]),
- c_v128_ssub_u16(a.v128[0], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_sub_32(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_sub_32(a.v128[1], b.v128[1]),
- c_v128_sub_32(a.v128[0], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_abs_s16(c_v256 a) {
- return c_v256_from_v128(c_v128_abs_s16(a.v128[1]), c_v128_abs_s16(a.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_abs_s8(c_v256 a) {
- return c_v256_from_v128(c_v128_abs_s8(a.v128[1]), c_v128_abs_s8(a.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_mul_s16(c_v128 a, c_v128 b) {
- c_v128 lo_bits = c_v128_mullo_s16(a, b);
- c_v128 hi_bits = c_v128_mulhi_s16(a, b);
- return c_v256_from_v128(c_v128_ziphi_16(hi_bits, lo_bits),
- c_v128_ziplo_16(hi_bits, lo_bits));
-}
-
-SIMD_INLINE c_v256 c_v256_mullo_s16(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_mullo_s16(a.v128[1], b.v128[1]),
- c_v128_mullo_s16(a.v128[0], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_mulhi_s16(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_mulhi_s16(a.v128[1], b.v128[1]),
- c_v128_mulhi_s16(a.v128[0], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_mullo_s32(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_mullo_s32(a.v128[1], b.v128[1]),
- c_v128_mullo_s32(a.v128[0], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_madd_s16(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_madd_s16(a.v128[1], b.v128[1]),
- c_v128_madd_s16(a.v128[0], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_madd_us8(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_madd_us8(a.v128[1], b.v128[1]),
- c_v128_madd_us8(a.v128[0], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_avg_u8(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_avg_u8(a.v128[1], b.v128[1]),
- c_v128_avg_u8(a.v128[0], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_rdavg_u8(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_rdavg_u8(a.v128[1], b.v128[1]),
- c_v128_rdavg_u8(a.v128[0], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_rdavg_u16(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_rdavg_u16(a.v128[1], b.v128[1]),
- c_v128_rdavg_u16(a.v128[0], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_avg_u16(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_avg_u16(a.v128[1], b.v128[1]),
- c_v128_avg_u16(a.v128[0], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_min_u8(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_min_u8(a.v128[1], b.v128[1]),
- c_v128_min_u8(a.v128[0], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_max_u8(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_max_u8(a.v128[1], b.v128[1]),
- c_v128_max_u8(a.v128[0], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_min_s8(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_min_s8(a.v128[1], b.v128[1]),
- c_v128_min_s8(a.v128[0], b.v128[0]));
-}
-
-SIMD_INLINE uint32_t c_v256_movemask_8(c_v256 a) {
- return ((a.s8[31] < 0) << 31) | ((a.s8[30] < 0) << 30) |
- ((a.s8[29] < 0) << 29) | ((a.s8[28] < 0) << 28) |
- ((a.s8[27] < 0) << 27) | ((a.s8[26] < 0) << 26) |
- ((a.s8[25] < 0) << 25) | ((a.s8[24] < 0) << 24) |
- ((a.s8[23] < 0) << 23) | ((a.s8[22] < 0) << 22) |
- ((a.s8[21] < 0) << 21) | ((a.s8[20] < 0) << 20) |
- ((a.s8[19] < 0) << 19) | ((a.s8[18] < 0) << 18) |
- ((a.s8[17] < 0) << 17) | ((a.s8[16] < 0) << 16) |
- ((a.s8[15] < 0) << 15) | ((a.s8[14] < 0) << 14) |
- ((a.s8[13] < 0) << 13) | ((a.s8[12] < 0) << 12) |
- ((a.s8[11] < 0) << 11) | ((a.s8[10] < 0) << 10) |
- ((a.s8[9] < 0) << 9) | ((a.s8[8] < 0) << 8) | ((a.s8[7] < 0) << 7) |
- ((a.s8[6] < 0) << 6) | ((a.s8[5] < 0) << 5) | ((a.s8[4] < 0) << 4) |
- ((a.s8[3] < 0) << 3) | ((a.s8[2] < 0) << 2) | ((a.s8[1] < 0) << 1) |
- ((a.s8[0] < 0) << 0);
-}
-
-SIMD_INLINE c_v256 c_v256_blend_8(c_v256 a, c_v256 b, c_v256 c) {
- c_v256 t;
- for (int i = 0; i < 32; i++) t.u8[i] = c.s8[i] < 0 ? b.u8[i] : a.u8[i];
- return t;
-}
-
-SIMD_INLINE c_v256 c_v256_max_s8(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_max_s8(a.v128[1], b.v128[1]),
- c_v128_max_s8(a.v128[0], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_min_s16(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_min_s16(a.v128[1], b.v128[1]),
- c_v128_min_s16(a.v128[0], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_max_s16(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_max_s16(a.v128[1], b.v128[1]),
- c_v128_max_s16(a.v128[0], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_min_s32(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_min_s32(a.v128[1], b.v128[1]),
- c_v128_min_s32(a.v128[0], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_max_s32(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_max_s32(a.v128[1], b.v128[1]),
- c_v128_max_s32(a.v128[0], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_ziplo_8(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_ziphi_8(a.v128[0], b.v128[0]),
- c_v128_ziplo_8(a.v128[0], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_ziphi_8(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_ziphi_8(a.v128[1], b.v128[1]),
- c_v128_ziplo_8(a.v128[1], b.v128[1]));
-}
-
-SIMD_INLINE c_v256 c_v256_ziplo_16(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_ziphi_16(a.v128[0], b.v128[0]),
- c_v128_ziplo_16(a.v128[0], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_ziphi_16(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_ziphi_16(a.v128[1], b.v128[1]),
- c_v128_ziplo_16(a.v128[1], b.v128[1]));
-}
-
-SIMD_INLINE c_v256 c_v256_ziplo_32(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_ziphi_32(a.v128[0], b.v128[0]),
- c_v128_ziplo_32(a.v128[0], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_ziphi_32(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_ziphi_32(a.v128[1], b.v128[1]),
- c_v128_ziplo_32(a.v128[1], b.v128[1]));
-}
-
-SIMD_INLINE c_v256 c_v256_ziplo_64(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_ziphi_64(a.v128[0], b.v128[0]),
- c_v128_ziplo_64(a.v128[0], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_ziphi_64(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_ziphi_64(a.v128[1], b.v128[1]),
- c_v128_ziplo_64(a.v128[1], b.v128[1]));
-}
-
-SIMD_INLINE c_v256 c_v256_ziplo_128(c_v256 a, c_v256 b) {
- return c_v256_from_v128(a.v128[0], b.v128[0]);
-}
-
-SIMD_INLINE c_v256 c_v256_ziphi_128(c_v256 a, c_v256 b) {
- return c_v256_from_v128(a.v128[1], b.v128[1]);
-}
-
-SIMD_INLINE c_v256 c_v256_zip_8(c_v128 a, c_v128 b) {
- return c_v256_from_v128(c_v128_ziphi_8(a, b), c_v128_ziplo_8(a, b));
-}
-
-SIMD_INLINE c_v256 c_v256_zip_16(c_v128 a, c_v128 b) {
- return c_v256_from_v128(c_v128_ziphi_16(a, b), c_v128_ziplo_16(a, b));
-}
-
-SIMD_INLINE c_v256 c_v256_zip_32(c_v128 a, c_v128 b) {
- return c_v256_from_v128(c_v128_ziphi_32(a, b), c_v128_ziplo_32(a, b));
-}
-
-SIMD_INLINE c_v256 _c_v256_unzip_8(c_v256 a, c_v256 b, int mode) {
- c_v256 t;
- int i;
- if (mode) {
- for (i = 0; i < 16; i++) {
- t.u8[i] = a.u8[i * 2 + 1];
- t.u8[i + 16] = b.u8[i * 2 + 1];
- }
- } else {
- for (i = 0; i < 16; i++) {
- t.u8[i] = b.u8[i * 2];
- t.u8[i + 16] = a.u8[i * 2];
- }
- }
- return t;
-}
-
-SIMD_INLINE c_v256 c_v256_unziplo_8(c_v256 a, c_v256 b) {
- return CONFIG_BIG_ENDIAN ? _c_v256_unzip_8(a, b, 1)
- : _c_v256_unzip_8(a, b, 0);
-}
-
-SIMD_INLINE c_v256 c_v256_unziphi_8(c_v256 a, c_v256 b) {
- return CONFIG_BIG_ENDIAN ? _c_v256_unzip_8(b, a, 0)
- : _c_v256_unzip_8(b, a, 1);
-}
-
-SIMD_INLINE c_v256 _c_v256_unzip_16(c_v256 a, c_v256 b, int mode) {
- c_v256 t;
- int i;
- if (mode) {
- for (i = 0; i < 8; i++) {
- t.u16[i] = a.u16[i * 2 + 1];
- t.u16[i + 8] = b.u16[i * 2 + 1];
- }
- } else {
- for (i = 0; i < 8; i++) {
- t.u16[i] = b.u16[i * 2];
- t.u16[i + 8] = a.u16[i * 2];
- }
- }
- return t;
-}
-
-SIMD_INLINE c_v256 c_v256_unziplo_16(c_v256 a, c_v256 b) {
- return CONFIG_BIG_ENDIAN ? _c_v256_unzip_16(a, b, 1)
- : _c_v256_unzip_16(a, b, 0);
-}
-
-SIMD_INLINE c_v256 c_v256_unziphi_16(c_v256 a, c_v256 b) {
- return CONFIG_BIG_ENDIAN ? _c_v256_unzip_16(b, a, 0)
- : _c_v256_unzip_16(b, a, 1);
-}
-
-SIMD_INLINE c_v256 _c_v256_unzip_32(c_v256 a, c_v256 b, int mode) {
- c_v256 t;
- if (mode) {
- t.u32[7] = b.u32[7];
- t.u32[6] = b.u32[5];
- t.u32[5] = b.u32[3];
- t.u32[4] = b.u32[1];
- t.u32[3] = a.u32[7];
- t.u32[2] = a.u32[5];
- t.u32[1] = a.u32[3];
- t.u32[0] = a.u32[1];
- } else {
- t.u32[7] = a.u32[6];
- t.u32[6] = a.u32[4];
- t.u32[5] = a.u32[2];
- t.u32[4] = a.u32[0];
- t.u32[3] = b.u32[6];
- t.u32[2] = b.u32[4];
- t.u32[1] = b.u32[2];
- t.u32[0] = b.u32[0];
- }
- return t;
-}
-
-SIMD_INLINE c_v256 c_v256_unziplo_32(c_v256 a, c_v256 b) {
- return CONFIG_BIG_ENDIAN ? _c_v256_unzip_32(a, b, 1)
- : _c_v256_unzip_32(a, b, 0);
-}
-
-SIMD_INLINE c_v256 c_v256_unziphi_32(c_v256 a, c_v256 b) {
- return CONFIG_BIG_ENDIAN ? _c_v256_unzip_32(b, a, 0)
- : _c_v256_unzip_32(b, a, 1);
-}
-
-SIMD_INLINE c_v256 _c_v256_unzip_64(c_v256 a, c_v256 b, int mode) {
- c_v256 t;
- if (mode) {
- t.u64[3] = b.u64[3];
- t.u64[2] = b.u64[1];
- t.u64[1] = a.u64[3];
- t.u64[0] = a.u64[1];
- } else {
- t.u64[3] = a.u64[2];
- t.u64[2] = a.u64[0];
- t.u64[1] = b.u64[2];
- t.u64[0] = b.u64[0];
- }
- return t;
-}
-
-SIMD_INLINE c_v256 c_v256_unziplo_64(c_v256 a, c_v256 b) {
- return CONFIG_BIG_ENDIAN ? _c_v256_unzip_64(a, b, 1)
- : _c_v256_unzip_64(a, b, 0);
-}
-
-SIMD_INLINE c_v256 c_v256_unziphi_64(c_v256 a, c_v256 b) {
- return CONFIG_BIG_ENDIAN ? _c_v256_unzip_64(b, a, 0)
- : _c_v256_unzip_64(b, a, 1);
-}
-
-SIMD_INLINE c_v256 c_v256_unpack_u8_s16(c_v128 a) {
- return c_v256_from_v128(c_v128_unpackhi_u8_s16(a), c_v128_unpacklo_u8_s16(a));
-}
-
-SIMD_INLINE c_v256 c_v256_unpacklo_u8_s16(c_v256 a) {
- return c_v256_from_v128(c_v128_unpackhi_u8_s16(a.v128[0]),
- c_v128_unpacklo_u8_s16(a.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_unpackhi_u8_s16(c_v256 a) {
- return c_v256_from_v128(c_v128_unpackhi_u8_s16(a.v128[1]),
- c_v128_unpacklo_u8_s16(a.v128[1]));
-}
-
-SIMD_INLINE c_v256 c_v256_unpack_s8_s16(c_v128 a) {
- return c_v256_from_v128(c_v128_unpackhi_s8_s16(a), c_v128_unpacklo_s8_s16(a));
-}
-
-SIMD_INLINE c_v256 c_v256_unpacklo_s8_s16(c_v256 a) {
- return c_v256_from_v128(c_v128_unpackhi_s8_s16(a.v128[0]),
- c_v128_unpacklo_s8_s16(a.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_unpackhi_s8_s16(c_v256 a) {
- return c_v256_from_v128(c_v128_unpackhi_s8_s16(a.v128[1]),
- c_v128_unpacklo_s8_s16(a.v128[1]));
-}
-
-SIMD_INLINE c_v256 c_v256_pack_s32_s16(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_pack_s32_s16(a.v128[1], a.v128[0]),
- c_v128_pack_s32_s16(b.v128[1], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_pack_s32_u16(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_pack_s32_u16(a.v128[1], a.v128[0]),
- c_v128_pack_s32_u16(b.v128[1], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_pack_s16_u8(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_pack_s16_u8(a.v128[1], a.v128[0]),
- c_v128_pack_s16_u8(b.v128[1], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_pack_s16_s8(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_pack_s16_s8(a.v128[1], a.v128[0]),
- c_v128_pack_s16_s8(b.v128[1], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_unpack_u16_s32(c_v128 a) {
- return c_v256_from_v128(c_v128_unpackhi_u16_s32(a),
- c_v128_unpacklo_u16_s32(a));
-}
-
-SIMD_INLINE c_v256 c_v256_unpack_s16_s32(c_v128 a) {
- return c_v256_from_v128(c_v128_unpackhi_s16_s32(a),
- c_v128_unpacklo_s16_s32(a));
-}
-
-SIMD_INLINE c_v256 c_v256_unpacklo_u16_s32(c_v256 a) {
- return c_v256_from_v128(c_v128_unpackhi_u16_s32(a.v128[0]),
- c_v128_unpacklo_u16_s32(a.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_unpacklo_s16_s32(c_v256 a) {
- return c_v256_from_v128(c_v128_unpackhi_s16_s32(a.v128[0]),
- c_v128_unpacklo_s16_s32(a.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_unpackhi_u16_s32(c_v256 a) {
- return c_v256_from_v128(c_v128_unpackhi_u16_s32(a.v128[1]),
- c_v128_unpacklo_u16_s32(a.v128[1]));
-}
-
-SIMD_INLINE c_v256 c_v256_unpackhi_s16_s32(c_v256 a) {
- return c_v256_from_v128(c_v128_unpackhi_s16_s32(a.v128[1]),
- c_v128_unpacklo_s16_s32(a.v128[1]));
-}
-
-SIMD_INLINE c_v256 c_v256_shuffle_8(c_v256 a, c_v256 pattern) {
- c_v256 t;
- int c;
- for (c = 0; c < 32; c++)
- t.u8[c] = a.u8[CONFIG_BIG_ENDIAN ? 31 - (pattern.u8[c] & 31)
- : pattern.u8[c] & 31];
-
- return t;
-}
-
-SIMD_INLINE c_v256 c_v256_wideshuffle_8(c_v256 a, c_v256 b, c_v256 pattern) {
- c_v256 t;
- int c;
- for (c = 0; c < 32; c++)
- t.u8[c] = (pattern.u8[c] < 32
- ? b.u8
- : a.u8)[CONFIG_BIG_ENDIAN ? 31 - (pattern.u8[c] & 31)
- : pattern.u8[c] & 31];
- return t;
-}
-
-// Pairwise / dual-lane shuffle: shuffle two 128 bit lates.
-SIMD_INLINE c_v256 c_v256_pshuffle_8(c_v256 a, c_v256 pattern) {
- return c_v256_from_v128(
- c_v128_shuffle_8(c_v256_high_v128(a), c_v256_high_v128(pattern)),
- c_v128_shuffle_8(c_v256_low_v128(a), c_v256_low_v128(pattern)));
-}
-
-SIMD_INLINE c_v256 c_v256_cmpgt_s8(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_cmpgt_s8(a.v128[1], b.v128[1]),
- c_v128_cmpgt_s8(a.v128[0], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_cmplt_s8(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_cmplt_s8(a.v128[1], b.v128[1]),
- c_v128_cmplt_s8(a.v128[0], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_cmpeq_8(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_cmpeq_8(a.v128[1], b.v128[1]),
- c_v128_cmpeq_8(a.v128[0], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_cmpgt_s16(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_cmpgt_s16(a.v128[1], b.v128[1]),
- c_v128_cmpgt_s16(a.v128[0], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_cmplt_s16(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_cmplt_s16(a.v128[1], b.v128[1]),
- c_v128_cmplt_s16(a.v128[0], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_cmpeq_16(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_cmpeq_16(a.v128[1], b.v128[1]),
- c_v128_cmpeq_16(a.v128[0], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_cmpgt_s32(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_cmpgt_s32(a.v128[1], b.v128[1]),
- c_v128_cmpgt_s32(a.v128[0], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_cmplt_s32(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_cmplt_s32(a.v128[1], b.v128[1]),
- c_v128_cmplt_s32(a.v128[0], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_cmpeq_32(c_v256 a, c_v256 b) {
- return c_v256_from_v128(c_v128_cmpeq_32(a.v128[1], b.v128[1]),
- c_v128_cmpeq_32(a.v128[0], b.v128[0]));
-}
-
-SIMD_INLINE c_v256 c_v256_shl_n_byte(c_v256 a, unsigned int n) {
- if (n < 16)
- return c_v256_from_v128(c_v128_or(c_v128_shl_n_byte(a.v128[1], n),
- c_v128_shr_n_byte(a.v128[0], 16 - n)),
- c_v128_shl_n_byte(a.v128[0], n));
- else if (n > 16)
- return c_v256_from_v128(c_v128_shl_n_byte(a.v128[0], n - 16),
- c_v128_zero());
- else
- return c_v256_from_v128(c_v256_low_v128(a), c_v128_zero());
-}
-
-SIMD_INLINE c_v256 c_v256_shr_n_byte(c_v256 a, unsigned int n) {
- if (n < 16)
- return c_v256_from_v128(c_v128_shr_n_byte(a.v128[1], n),
- c_v128_or(c_v128_shr_n_byte(a.v128[0], n),
- c_v128_shl_n_byte(a.v128[1], 16 - n)));
- else if (n > 16)
- return c_v256_from_v128(c_v128_zero(),
- c_v128_shr_n_byte(a.v128[1], n - 16));
- else
- return c_v256_from_v128(c_v128_zero(), c_v256_high_v128(a));
-}
-
-SIMD_INLINE c_v256 c_v256_align(c_v256 a, c_v256 b, unsigned int c) {
- if (SIMD_CHECK && c > 31) {
- fprintf(stderr, "Error: undefined alignment %d\n", c);
- abort();
- }
- return c ? c_v256_or(c_v256_shr_n_byte(b, c), c_v256_shl_n_byte(a, 32 - c))
- : b;
-}
-
-SIMD_INLINE c_v256 c_v256_shl_8(c_v256 a, unsigned int c) {
- return c_v256_from_v128(c_v128_shl_8(a.v128[1], c),
- c_v128_shl_8(a.v128[0], c));
-}
-
-SIMD_INLINE c_v256 c_v256_shr_u8(c_v256 a, unsigned int c) {
- return c_v256_from_v128(c_v128_shr_u8(a.v128[1], c),
- c_v128_shr_u8(a.v128[0], c));
-}
-
-SIMD_INLINE c_v256 c_v256_shr_s8(c_v256 a, unsigned int c) {
- return c_v256_from_v128(c_v128_shr_s8(a.v128[1], c),
- c_v128_shr_s8(a.v128[0], c));
-}
-
-SIMD_INLINE c_v256 c_v256_shl_16(c_v256 a, unsigned int c) {
- return c_v256_from_v128(c_v128_shl_16(a.v128[1], c),
- c_v128_shl_16(a.v128[0], c));
-}
-
-SIMD_INLINE c_v256 c_v256_shr_u16(c_v256 a, unsigned int c) {
- return c_v256_from_v128(c_v128_shr_u16(a.v128[1], c),
- c_v128_shr_u16(a.v128[0], c));
-}
-
-SIMD_INLINE c_v256 c_v256_shr_s16(c_v256 a, unsigned int c) {
- return c_v256_from_v128(c_v128_shr_s16(a.v128[1], c),
- c_v128_shr_s16(a.v128[0], c));
-}
-
-SIMD_INLINE c_v256 c_v256_shl_32(c_v256 a, unsigned int c) {
- return c_v256_from_v128(c_v128_shl_32(a.v128[1], c),
- c_v128_shl_32(a.v128[0], c));
-}
-
-SIMD_INLINE c_v256 c_v256_shr_u32(c_v256 a, unsigned int c) {
- return c_v256_from_v128(c_v128_shr_u32(a.v128[1], c),
- c_v128_shr_u32(a.v128[0], c));
-}
-
-SIMD_INLINE c_v256 c_v256_shr_s32(c_v256 a, unsigned int c) {
- return c_v256_from_v128(c_v128_shr_s32(a.v128[1], c),
- c_v128_shr_s32(a.v128[0], c));
-}
-
-SIMD_INLINE c_v256 c_v256_shr_s64(c_v256 a, unsigned int n) {
- c_v256 t;
- if (SIMD_CHECK && n > 63) {
- fprintf(stderr, "Error: undefined s64 shift right %d\n", n);
- abort();
- }
- t.s64[3] = a.s64[3] >> n;
- t.s64[2] = a.s64[2] >> n;
- t.s64[1] = a.s64[1] >> n;
- t.s64[0] = a.s64[0] >> n;
- return t;
-}
-
-SIMD_INLINE c_v256 c_v256_shr_u64(c_v256 a, unsigned int n) {
- c_v256 t;
- if (SIMD_CHECK && n > 63) {
- fprintf(stderr, "Error: undefined s64 shift right %d\n", n);
- abort();
- }
- t.u64[3] = a.u64[3] >> n;
- t.u64[2] = a.u64[2] >> n;
- t.u64[1] = a.u64[1] >> n;
- t.u64[0] = a.u64[0] >> n;
- return t;
-}
-
-SIMD_INLINE c_v256 c_v256_shl_64(c_v256 a, unsigned int n) {
- c_v256 t;
- if (SIMD_CHECK && n > 63) {
- fprintf(stderr, "Error: undefined s64 shift right %d\n", n);
- abort();
- }
- t.u64[3] = a.u64[3] << n;
- t.u64[2] = a.u64[2] << n;
- t.u64[1] = a.u64[1] << n;
- t.u64[0] = a.u64[0] << n;
- return t;
-}
-
-SIMD_INLINE c_v256 c_v256_shl_n_8(c_v256 a, unsigned int n) {
- return c_v256_shl_8(a, n);
-}
-
-SIMD_INLINE c_v256 c_v256_shl_n_16(c_v256 a, unsigned int n) {
- return c_v256_shl_16(a, n);
-}
-
-SIMD_INLINE c_v256 c_v256_shl_n_32(c_v256 a, unsigned int n) {
- return c_v256_shl_32(a, n);
-}
-
-SIMD_INLINE c_v256 c_v256_shl_n_64(c_v256 a, unsigned int n) {
- return c_v256_shl_64(a, n);
-}
-
-SIMD_INLINE c_v256 c_v256_shr_n_u8(c_v256 a, unsigned int n) {
- return c_v256_shr_u8(a, n);
-}
-
-SIMD_INLINE c_v256 c_v256_shr_n_u16(c_v256 a, unsigned int n) {
- return c_v256_shr_u16(a, n);
-}
-
-SIMD_INLINE c_v256 c_v256_shr_n_u32(c_v256 a, unsigned int n) {
- return c_v256_shr_u32(a, n);
-}
-
-SIMD_INLINE c_v256 c_v256_shr_n_u64(c_v256 a, unsigned int n) {
- return c_v256_shr_u64(a, n);
-}
-
-SIMD_INLINE c_v256 c_v256_shr_n_s8(c_v256 a, unsigned int n) {
- return c_v256_shr_s8(a, n);
-}
-
-SIMD_INLINE c_v256 c_v256_shr_n_s16(c_v256 a, unsigned int n) {
- return c_v256_shr_s16(a, n);
-}
-
-SIMD_INLINE c_v256 c_v256_shr_n_s32(c_v256 a, unsigned int n) {
- return c_v256_shr_s32(a, n);
-}
-
-SIMD_INLINE c_v256 c_v256_shr_n_s64(c_v256 a, unsigned int n) {
- return c_v256_shr_s64(a, n);
-}
-
-SIMD_INLINE c_v256 c_v256_shr_n_word(c_v256 a, const unsigned int n) {
- return c_v256_shr_n_byte(a, 2 * n);
-}
-SIMD_INLINE c_v256 c_v256_shl_n_word(c_v256 a, const unsigned int n) {
- return c_v256_shl_n_byte(a, 2 * n);
-}
-
-typedef uint32_t c_sad256_internal_u16;
-
-SIMD_INLINE c_sad256_internal_u16 c_v256_sad_u16_init() { return 0; }
-
-/* Implementation dependent return value. Result must be finalised with
- v256_sad_u16_sum(). */
-SIMD_INLINE c_sad256_internal_u16 c_v256_sad_u16(c_sad256_internal_u16 s,
- c_v256 a, c_v256 b) {
- int c;
- for (c = 0; c < 16; c++)
- s += a.u16[c] > b.u16[c] ? a.u16[c] - b.u16[c] : b.u16[c] - a.u16[c];
- return s;
-}
-
-SIMD_INLINE uint32_t c_v256_sad_u16_sum(c_sad256_internal_u16 s) { return s; }
-
-typedef uint64_t c_ssd256_internal_s16;
-
-SIMD_INLINE c_ssd256_internal_s16 c_v256_ssd_s16_init() { return 0; }
-
-/* Implementation dependent return value. Result must be finalised with
- * v256_ssd_s16_sum(). */
-SIMD_INLINE c_ssd256_internal_s16 c_v256_ssd_s16(c_ssd256_internal_s16 s,
- c_v256 a, c_v256 b) {
- int c;
- for (c = 0; c < 16; c++)
- s += (int32_t)(int16_t)(a.s16[c] - b.s16[c]) *
- (int32_t)(int16_t)(a.s16[c] - b.s16[c]);
- return s;
-}
-
-SIMD_INLINE uint64_t c_v256_ssd_s16_sum(c_ssd256_internal_s16 s) { return s; }
-
-#endif // AOM_AOM_DSP_SIMD_V256_INTRINSICS_C_H_
diff --git a/third_party/aom/aom_dsp/simd/v256_intrinsics_v128.h b/third_party/aom/aom_dsp/simd/v256_intrinsics_v128.h
deleted file mode 100644
index d5b7905ef..000000000
--- a/third_party/aom/aom_dsp/simd/v256_intrinsics_v128.h
+++ /dev/null
@@ -1,873 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_SIMD_V256_INTRINSICS_V128_H_
-#define AOM_AOM_DSP_SIMD_V256_INTRINSICS_V128_H_
-
-#if HAVE_NEON
-#include "aom_dsp/simd/v128_intrinsics_arm.h"
-#elif HAVE_SSE2
-#include "aom_dsp/simd/v128_intrinsics_x86.h"
-#else
-#include "aom_dsp/simd/v128_intrinsics.h"
-#endif
-
-#if HAVE_NEON
-typedef int64x2x2_t v256;
-#else
-typedef struct {
- v128 val[2];
-} v256;
-#endif
-
-SIMD_INLINE uint32_t v256_low_u32(v256 a) { return v128_low_u32(a.val[0]); }
-
-SIMD_INLINE v64 v256_low_v64(v256 a) { return v128_low_v64(a.val[0]); }
-
-SIMD_INLINE uint64_t v256_low_u64(v256 a) { return v64_u64(v256_low_v64(a)); }
-
-SIMD_INLINE v128 v256_low_v128(v256 a) { return a.val[0]; }
-
-SIMD_INLINE v128 v256_high_v128(v256 a) { return a.val[1]; }
-
-SIMD_INLINE v256 v256_from_v128(v128 hi, v128 lo) {
- v256 t;
- t.val[1] = hi;
- t.val[0] = lo;
- return t;
-}
-
-SIMD_INLINE v256 v256_from_64(uint64_t a, uint64_t b, uint64_t c, uint64_t d) {
- return v256_from_v128(v128_from_64(a, b), v128_from_64(c, d));
-}
-
-SIMD_INLINE v256 v256_from_v64(v64 a, v64 b, v64 c, v64 d) {
- return v256_from_v128(v128_from_v64(a, b), v128_from_v64(c, d));
-}
-
-SIMD_INLINE v256 v256_load_unaligned(const void *p) {
- return v256_from_v128(v128_load_unaligned((uint8_t *)p + 16),
- v128_load_unaligned(p));
-}
-
-SIMD_INLINE v256 v256_load_aligned(const void *p) {
- return v256_from_v128(v128_load_aligned((uint8_t *)p + 16),
- v128_load_aligned(p));
-}
-
-SIMD_INLINE void v256_store_unaligned(void *p, v256 a) {
- v128_store_unaligned(p, a.val[0]);
- v128_store_unaligned((uint8_t *)p + 16, a.val[1]);
-}
-
-SIMD_INLINE void v256_store_aligned(void *p, v256 a) {
- v128_store_aligned(p, a.val[0]);
- v128_store_aligned((uint8_t *)p + 16, a.val[1]);
-}
-
-SIMD_INLINE v256 v256_zero() {
- return v256_from_v128(v128_zero(), v128_zero());
-}
-
-SIMD_INLINE v256 v256_dup_8(uint8_t x) {
- v128 t = v128_dup_8(x);
- return v256_from_v128(t, t);
-}
-
-SIMD_INLINE v256 v256_dup_16(uint16_t x) {
- v128 t = v128_dup_16(x);
- return v256_from_v128(t, t);
-}
-
-SIMD_INLINE v256 v256_dup_32(uint32_t x) {
- v128 t = v128_dup_32(x);
- return v256_from_v128(t, t);
-}
-
-SIMD_INLINE v256 v256_dup_64(uint64_t x) {
- v128 t = v128_dup_64(x);
- return v256_from_v128(t, t);
-}
-
-SIMD_INLINE int64_t v256_dotp_su8(v256 a, v256 b) {
- return v128_dotp_su8(a.val[1], b.val[1]) + v128_dotp_su8(a.val[0], b.val[0]);
-}
-
-SIMD_INLINE int64_t v256_dotp_s16(v256 a, v256 b) {
- return v128_dotp_s16(a.val[1], b.val[1]) + v128_dotp_s16(a.val[0], b.val[0]);
-}
-
-SIMD_INLINE int64_t v256_dotp_s32(v256 a, v256 b) {
- return v128_dotp_s32(a.val[1], b.val[1]) + v128_dotp_s32(a.val[0], b.val[0]);
-}
-
-SIMD_INLINE uint64_t v256_hadd_u8(v256 a) {
- return v128_hadd_u8(a.val[1]) + v128_hadd_u8(a.val[0]);
-}
-
-typedef struct {
- sad128_internal val[2];
-} sad256_internal;
-
-SIMD_INLINE sad256_internal v256_sad_u8_init() {
- sad256_internal t;
- t.val[1] = v128_sad_u8_init();
- t.val[0] = v128_sad_u8_init();
- return t;
-}
-
-/* Implementation dependent return value. Result must be finalised with
- v256_sad_u8_sum().
- The result for more than 16 v256_sad_u8() calls is undefined. */
-SIMD_INLINE sad256_internal v256_sad_u8(sad256_internal s, v256 a, v256 b) {
- sad256_internal t;
- t.val[1] = v128_sad_u8(s.val[1], a.val[1], b.val[1]);
- t.val[0] = v128_sad_u8(s.val[0], a.val[0], b.val[0]);
- return t;
-}
-
-SIMD_INLINE uint32_t v256_sad_u8_sum(sad256_internal s) {
- return v128_sad_u8_sum(s.val[1]) + v128_sad_u8_sum(s.val[0]);
-}
-
-typedef struct {
- ssd128_internal val[2];
-} ssd256_internal;
-
-SIMD_INLINE ssd256_internal v256_ssd_u8_init() {
- ssd256_internal t;
- t.val[1] = v128_ssd_u8_init();
- t.val[0] = v128_ssd_u8_init();
- return t;
-}
-
-/* Implementation dependent return value. Result must be finalised with
- * v256_ssd_u8_sum(). */
-SIMD_INLINE ssd256_internal v256_ssd_u8(ssd256_internal s, v256 a, v256 b) {
- ssd256_internal t;
- t.val[1] = v128_ssd_u8(s.val[1], a.val[1], b.val[1]);
- t.val[0] = v128_ssd_u8(s.val[0], a.val[0], b.val[0]);
- return t;
-}
-
-SIMD_INLINE uint32_t v256_ssd_u8_sum(ssd256_internal s) {
- return v128_ssd_u8_sum(s.val[1]) + v128_ssd_u8_sum(s.val[0]);
-}
-
-SIMD_INLINE v256 v256_or(v256 a, v256 b) {
- return v256_from_v128(v128_or(a.val[1], b.val[1]),
- v128_or(a.val[0], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_xor(v256 a, v256 b) {
- return v256_from_v128(v128_xor(a.val[1], b.val[1]),
- v128_xor(a.val[0], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_and(v256 a, v256 b) {
- return v256_from_v128(v128_and(a.val[1], b.val[1]),
- v128_and(a.val[0], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_andn(v256 a, v256 b) {
- return v256_from_v128(v128_andn(a.val[1], b.val[1]),
- v128_andn(a.val[0], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_add_8(v256 a, v256 b) {
- return v256_from_v128(v128_add_8(a.val[1], b.val[1]),
- v128_add_8(a.val[0], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_add_16(v256 a, v256 b) {
- return v256_from_v128(v128_add_16(a.val[1], b.val[1]),
- v128_add_16(a.val[0], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_sadd_s8(v256 a, v256 b) {
- return v256_from_v128(v128_sadd_s8(a.val[1], b.val[1]),
- v128_sadd_s8(a.val[0], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_sadd_u8(v256 a, v256 b) {
- return v256_from_v128(v128_sadd_u8(a.val[1], b.val[1]),
- v128_sadd_u8(a.val[0], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_sadd_s16(v256 a, v256 b) {
- return v256_from_v128(v128_sadd_s16(a.val[1], b.val[1]),
- v128_sadd_s16(a.val[0], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_add_32(v256 a, v256 b) {
- return v256_from_v128(v128_add_32(a.val[1], b.val[1]),
- v128_add_32(a.val[0], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_add_64(v256 a, v256 b) {
- return v256_from_v128(v128_add_64(a.val[1], b.val[1]),
- v128_add_64(a.val[0], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_padd_u8(v256 a) {
- return v256_from_v128(v128_padd_u8(a.val[1]), v128_padd_u8(a.val[0]));
-}
-
-SIMD_INLINE v256 v256_padd_s16(v256 a) {
- return v256_from_v128(v128_padd_s16(a.val[1]), v128_padd_s16(a.val[0]));
-}
-
-SIMD_INLINE v256 v256_sub_8(v256 a, v256 b) {
- return v256_from_v128(v128_sub_8(a.val[1], b.val[1]),
- v128_sub_8(a.val[0], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_ssub_u8(v256 a, v256 b) {
- return v256_from_v128(v128_ssub_u8(a.val[1], b.val[1]),
- v128_ssub_u8(a.val[0], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_ssub_s8(v256 a, v256 b) {
- return v256_from_v128(v128_ssub_s8(a.val[1], b.val[1]),
- v128_ssub_s8(a.val[0], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_sub_16(v256 a, v256 b) {
- return v256_from_v128(v128_sub_16(a.val[1], b.val[1]),
- v128_sub_16(a.val[0], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_ssub_s16(v256 a, v256 b) {
- return v256_from_v128(v128_ssub_s16(a.val[1], b.val[1]),
- v128_ssub_s16(a.val[0], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_ssub_u16(v256 a, v256 b) {
- return v256_from_v128(v128_ssub_u16(a.val[1], b.val[1]),
- v128_ssub_u16(a.val[0], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_sub_32(v256 a, v256 b) {
- return v256_from_v128(v128_sub_32(a.val[1], b.val[1]),
- v128_sub_32(a.val[0], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_sub_64(v256 a, v256 b) {
- return v256_from_v128(v128_sub_64(a.val[1], b.val[1]),
- v128_sub_64(a.val[0], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_abs_s16(v256 a) {
- return v256_from_v128(v128_abs_s16(a.val[1]), v128_abs_s16(a.val[0]));
-}
-
-SIMD_INLINE v256 v256_abs_s8(v256 a) {
- return v256_from_v128(v128_abs_s8(a.val[1]), v128_abs_s8(a.val[0]));
-}
-
-SIMD_INLINE v256 v256_mul_s16(v128 a, v128 b) {
- v128 lo_bits = v128_mullo_s16(a, b);
- v128 hi_bits = v128_mulhi_s16(a, b);
- return v256_from_v128(v128_ziphi_16(hi_bits, lo_bits),
- v128_ziplo_16(hi_bits, lo_bits));
-}
-
-SIMD_INLINE v256 v256_mullo_s16(v256 a, v256 b) {
- return v256_from_v128(v128_mullo_s16(a.val[1], b.val[1]),
- v128_mullo_s16(a.val[0], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_mulhi_s16(v256 a, v256 b) {
- return v256_from_v128(v128_mulhi_s16(a.val[1], b.val[1]),
- v128_mulhi_s16(a.val[0], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_mullo_s32(v256 a, v256 b) {
- return v256_from_v128(v128_mullo_s32(a.val[1], b.val[1]),
- v128_mullo_s32(a.val[0], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_madd_s16(v256 a, v256 b) {
- return v256_from_v128(v128_madd_s16(a.val[1], b.val[1]),
- v128_madd_s16(a.val[0], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_madd_us8(v256 a, v256 b) {
- return v256_from_v128(v128_madd_us8(a.val[1], b.val[1]),
- v128_madd_us8(a.val[0], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_avg_u8(v256 a, v256 b) {
- return v256_from_v128(v128_avg_u8(a.val[1], b.val[1]),
- v128_avg_u8(a.val[0], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_rdavg_u8(v256 a, v256 b) {
- return v256_from_v128(v128_rdavg_u8(a.val[1], b.val[1]),
- v128_rdavg_u8(a.val[0], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_rdavg_u16(v256 a, v256 b) {
- return v256_from_v128(v128_rdavg_u16(a.val[1], b.val[1]),
- v128_rdavg_u16(a.val[0], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_avg_u16(v256 a, v256 b) {
- return v256_from_v128(v128_avg_u16(a.val[1], b.val[1]),
- v128_avg_u16(a.val[0], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_min_u8(v256 a, v256 b) {
- return v256_from_v128(v128_min_u8(a.val[1], b.val[1]),
- v128_min_u8(a.val[0], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_max_u8(v256 a, v256 b) {
- return v256_from_v128(v128_max_u8(a.val[1], b.val[1]),
- v128_max_u8(a.val[0], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_min_s8(v256 a, v256 b) {
- return v256_from_v128(v128_min_s8(a.val[1], b.val[1]),
- v128_min_s8(a.val[0], b.val[0]));
-}
-
-SIMD_INLINE uint32_t v256_movemask_8(v256 a) {
- return (v128_movemask_8(v256_high_v128(a)) << 16) |
- v128_movemask_8(v256_low_v128(a));
-}
-
-SIMD_INLINE v256 v256_blend_8(v256 a, v256 b, v256 c) {
- return v256_from_v128(v128_blend_8(a.val[1], b.val[1], c.val[1]),
- v128_blend_8(a.val[0], b.val[0], c.val[0]));
-}
-
-SIMD_INLINE v256 v256_max_s8(v256 a, v256 b) {
- return v256_from_v128(v128_max_s8(a.val[1], b.val[1]),
- v128_max_s8(a.val[0], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_min_s16(v256 a, v256 b) {
- return v256_from_v128(v128_min_s16(a.val[1], b.val[1]),
- v128_min_s16(a.val[0], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_max_s16(v256 a, v256 b) {
- return v256_from_v128(v128_max_s16(a.val[1], b.val[1]),
- v128_max_s16(a.val[0], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_min_s32(v256 a, v256 b) {
- return v256_from_v128(v128_min_s32(a.val[1], b.val[1]),
- v128_min_s32(a.val[0], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_max_s32(v256 a, v256 b) {
- return v256_from_v128(v128_max_s32(a.val[1], b.val[1]),
- v128_max_s32(a.val[0], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_ziplo_8(v256 a, v256 b) {
- return v256_from_v128(v128_ziphi_8(a.val[0], b.val[0]),
- v128_ziplo_8(a.val[0], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_ziphi_8(v256 a, v256 b) {
- return v256_from_v128(v128_ziphi_8(a.val[1], b.val[1]),
- v128_ziplo_8(a.val[1], b.val[1]));
-}
-
-SIMD_INLINE v256 v256_ziplo_16(v256 a, v256 b) {
- return v256_from_v128(v128_ziphi_16(a.val[0], b.val[0]),
- v128_ziplo_16(a.val[0], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_ziphi_16(v256 a, v256 b) {
- return v256_from_v128(v128_ziphi_16(a.val[1], b.val[1]),
- v128_ziplo_16(a.val[1], b.val[1]));
-}
-
-SIMD_INLINE v256 v256_ziplo_32(v256 a, v256 b) {
- return v256_from_v128(v128_ziphi_32(a.val[0], b.val[0]),
- v128_ziplo_32(a.val[0], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_ziphi_32(v256 a, v256 b) {
- return v256_from_v128(v128_ziphi_32(a.val[1], b.val[1]),
- v128_ziplo_32(a.val[1], b.val[1]));
-}
-
-SIMD_INLINE v256 v256_ziplo_64(v256 a, v256 b) {
- return v256_from_v128(v128_ziphi_64(a.val[0], b.val[0]),
- v128_ziplo_64(a.val[0], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_ziphi_64(v256 a, v256 b) {
- return v256_from_v128(v128_ziphi_64(a.val[1], b.val[1]),
- v128_ziplo_64(a.val[1], b.val[1]));
-}
-
-SIMD_INLINE v256 v256_ziplo_128(v256 a, v256 b) {
- return v256_from_v128(a.val[0], b.val[0]);
-}
-
-SIMD_INLINE v256 v256_ziphi_128(v256 a, v256 b) {
- return v256_from_v128(a.val[1], b.val[1]);
-}
-
-SIMD_INLINE v256 v256_zip_8(v128 a, v128 b) {
- return v256_from_v128(v128_ziphi_8(a, b), v128_ziplo_8(a, b));
-}
-
-SIMD_INLINE v256 v256_zip_16(v128 a, v128 b) {
- return v256_from_v128(v128_ziphi_16(a, b), v128_ziplo_16(a, b));
-}
-
-SIMD_INLINE v256 v256_zip_32(v128 a, v128 b) {
- return v256_from_v128(v128_ziphi_32(a, b), v128_ziplo_32(a, b));
-}
-
-SIMD_INLINE v256 v256_unziplo_8(v256 a, v256 b) {
- return v256_from_v128(v128_unziplo_8(a.val[1], a.val[0]),
- v128_unziplo_8(b.val[1], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_unziphi_8(v256 a, v256 b) {
- return v256_from_v128(v128_unziphi_8(a.val[1], a.val[0]),
- v128_unziphi_8(b.val[1], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_unziplo_16(v256 a, v256 b) {
- return v256_from_v128(v128_unziplo_16(a.val[1], a.val[0]),
- v128_unziplo_16(b.val[1], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_unziphi_16(v256 a, v256 b) {
- return v256_from_v128(v128_unziphi_16(a.val[1], a.val[0]),
- v128_unziphi_16(b.val[1], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_unziplo_32(v256 a, v256 b) {
- return v256_from_v128(v128_unziplo_32(a.val[1], a.val[0]),
- v128_unziplo_32(b.val[1], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_unziphi_32(v256 a, v256 b) {
- return v256_from_v128(v128_unziphi_32(a.val[1], a.val[0]),
- v128_unziphi_32(b.val[1], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_unziplo_64(v256 a, v256 b) {
-#if HAVE_SSE2
- return v256_from_v128(
- _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(a.val[0]),
- _mm_castsi128_pd(a.val[1]), 0)),
- _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(b.val[0]),
- _mm_castsi128_pd(b.val[1]), 0)));
-#else
- return v256_from_v64(v128_low_v64(a.val[1]), v128_low_v64(a.val[0]),
- v128_low_v64(b.val[1]), v128_low_v64(b.val[0]));
-#endif
-}
-
-SIMD_INLINE v256 v256_unziphi_64(v256 a, v256 b) {
-#if HAVE_SSE2
- return v256_from_v128(
- _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(a.val[0]),
- _mm_castsi128_pd(a.val[1]), 3)),
- _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(b.val[0]),
- _mm_castsi128_pd(b.val[1]), 3)));
-#else
- return v256_from_v64(v128_high_v64(a.val[1]), v128_high_v64(a.val[0]),
- v128_high_v64(b.val[1]), v128_high_v64(b.val[0]));
-#endif
-}
-
-SIMD_INLINE v256 v256_unpack_u8_s16(v128 a) {
- return v256_from_v128(v128_unpackhi_u8_s16(a), v128_unpacklo_u8_s16(a));
-}
-
-SIMD_INLINE v256 v256_unpacklo_u8_s16(v256 a) {
- return v256_from_v128(v128_unpackhi_u8_s16(a.val[0]),
- v128_unpacklo_u8_s16(a.val[0]));
-}
-
-SIMD_INLINE v256 v256_unpackhi_u8_s16(v256 a) {
- return v256_from_v128(v128_unpackhi_u8_s16(a.val[1]),
- v128_unpacklo_u8_s16(a.val[1]));
-}
-
-SIMD_INLINE v256 v256_unpack_s8_s16(v128 a) {
- return v256_from_v128(v128_unpackhi_s8_s16(a), v128_unpacklo_s8_s16(a));
-}
-
-SIMD_INLINE v256 v256_unpacklo_s8_s16(v256 a) {
- return v256_from_v128(v128_unpackhi_s8_s16(a.val[0]),
- v128_unpacklo_s8_s16(a.val[0]));
-}
-
-SIMD_INLINE v256 v256_unpackhi_s8_s16(v256 a) {
- return v256_from_v128(v128_unpackhi_s8_s16(a.val[1]),
- v128_unpacklo_s8_s16(a.val[1]));
-}
-
-SIMD_INLINE v256 v256_pack_s32_s16(v256 a, v256 b) {
- return v256_from_v128(v128_pack_s32_s16(a.val[1], a.val[0]),
- v128_pack_s32_s16(b.val[1], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_pack_s32_u16(v256 a, v256 b) {
- return v256_from_v128(v128_pack_s32_u16(a.val[1], a.val[0]),
- v128_pack_s32_u16(b.val[1], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_pack_s16_u8(v256 a, v256 b) {
- return v256_from_v128(v128_pack_s16_u8(a.val[1], a.val[0]),
- v128_pack_s16_u8(b.val[1], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_pack_s16_s8(v256 a, v256 b) {
- return v256_from_v128(v128_pack_s16_s8(a.val[1], a.val[0]),
- v128_pack_s16_s8(b.val[1], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_unpack_u16_s32(v128 a) {
- return v256_from_v128(v128_unpackhi_u16_s32(a), v128_unpacklo_u16_s32(a));
-}
-
-SIMD_INLINE v256 v256_unpack_s16_s32(v128 a) {
- return v256_from_v128(v128_unpackhi_s16_s32(a), v128_unpacklo_s16_s32(a));
-}
-
-SIMD_INLINE v256 v256_unpacklo_u16_s32(v256 a) {
- return v256_from_v128(v128_unpackhi_u16_s32(a.val[0]),
- v128_unpacklo_u16_s32(a.val[0]));
-}
-
-SIMD_INLINE v256 v256_unpacklo_s16_s32(v256 a) {
- return v256_from_v128(v128_unpackhi_s16_s32(a.val[0]),
- v128_unpacklo_s16_s32(a.val[0]));
-}
-
-SIMD_INLINE v256 v256_unpackhi_u16_s32(v256 a) {
- return v256_from_v128(v128_unpackhi_u16_s32(a.val[1]),
- v128_unpacklo_u16_s32(a.val[1]));
-}
-
-SIMD_INLINE v256 v256_unpackhi_s16_s32(v256 a) {
- return v256_from_v128(v128_unpackhi_s16_s32(a.val[1]),
- v128_unpacklo_s16_s32(a.val[1]));
-}
-
-SIMD_INLINE v256 v256_cmpgt_s8(v256 a, v256 b) {
- return v256_from_v128(v128_cmpgt_s8(a.val[1], b.val[1]),
- v128_cmpgt_s8(a.val[0], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_cmplt_s8(v256 a, v256 b) {
- return v256_from_v128(v128_cmplt_s8(a.val[1], b.val[1]),
- v128_cmplt_s8(a.val[0], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_cmpeq_8(v256 a, v256 b) {
- return v256_from_v128(v128_cmpeq_8(a.val[1], b.val[1]),
- v128_cmpeq_8(a.val[0], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_cmpgt_s16(v256 a, v256 b) {
- return v256_from_v128(v128_cmpgt_s16(a.val[1], b.val[1]),
- v128_cmpgt_s16(a.val[0], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_cmplt_s16(v256 a, v256 b) {
- return v256_from_v128(v128_cmplt_s16(a.val[1], b.val[1]),
- v128_cmplt_s16(a.val[0], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_cmpeq_16(v256 a, v256 b) {
- return v256_from_v128(v128_cmpeq_16(a.val[1], b.val[1]),
- v128_cmpeq_16(a.val[0], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_cmpgt_s32(v256 a, v256 b) {
- return v256_from_v128(v128_cmpgt_s32(a.val[1], b.val[1]),
- v128_cmpgt_s32(a.val[0], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_cmplt_s32(v256 a, v256 b) {
- return v256_from_v128(v128_cmplt_s32(a.val[1], b.val[1]),
- v128_cmplt_s32(a.val[0], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_cmpeq_32(v256 a, v256 b) {
- return v256_from_v128(v128_cmpeq_32(a.val[1], b.val[1]),
- v128_cmpeq_32(a.val[0], b.val[0]));
-}
-
-SIMD_INLINE v256 v256_shuffle_8(v256 x, v256 pattern) {
-#if HAVE_NEON
-#if defined(__aarch64__)
- uint8x16x2_t p = { { vreinterpretq_u8_s64(x.val[0]),
- vreinterpretq_u8_s64(x.val[1]) } };
- return v256_from_v128(
- vreinterpretq_s64_u8(vqtbl2q_u8(p, vreinterpretq_u8_s64(pattern.val[1]))),
- vreinterpretq_s64_u8(
- vqtbl2q_u8(p, vreinterpretq_u8_s64(pattern.val[0]))));
-#else
- uint8x8x4_t p = { { vget_low_u8(vreinterpretq_u8_s64(x.val[0])),
- vget_high_u8(vreinterpretq_u8_s64(x.val[0])),
- vget_low_u8(vreinterpretq_u8_s64(x.val[1])),
- vget_high_u8(vreinterpretq_u8_s64(x.val[1])) } };
- return v256_from_64(
- (uint64_t)vreinterpret_s64_u8(
- vtbl4_u8(p, vreinterpret_u8_s64(vget_high_s64(pattern.val[1])))),
- (uint64_t)vreinterpret_s64_u8(
- vtbl4_u8(p, vreinterpret_u8_s64(vget_low_s64(pattern.val[1])))),
- (uint64_t)vreinterpret_s64_u8(
- vtbl4_u8(p, vreinterpret_u8_s64(vget_high_s64(pattern.val[0])))),
- (uint64_t)vreinterpret_s64_u8(
- vtbl4_u8(p, vreinterpret_u8_s64(vget_low_s64(pattern.val[0])))));
-#endif
-#else
- v128 c16 = v128_dup_8(16);
- v128 maskhi = v128_cmplt_s8(pattern.val[1], c16);
- v128 masklo = v128_cmplt_s8(pattern.val[0], c16);
- return v256_from_v128(
- v128_blend_8(v128_shuffle_8(x.val[1], v128_sub_8(pattern.val[1], c16)),
- v128_shuffle_8(x.val[0], pattern.val[1]), maskhi),
- v128_blend_8(v128_shuffle_8(x.val[1], v128_sub_8(pattern.val[0], c16)),
- v128_shuffle_8(x.val[0], pattern.val[0]), masklo));
-#endif
-}
-
-SIMD_INLINE v256 v256_wideshuffle_8(v256 x, v256 y, v256 pattern) {
-#if HAVE_NEON
-#if defined(__aarch64__)
- uint8x16x4_t p = { {
- vreinterpretq_u8_s64(y.val[0]),
- vreinterpretq_u8_s64(y.val[1]),
- vreinterpretq_u8_s64(x.val[0]),
- vreinterpretq_u8_s64(x.val[1]),
- } };
- return v256_from_v128(
- vreinterpretq_s64_u8(vqtbl4q_u8(p, vreinterpretq_u8_s64(pattern.val[1]))),
- vreinterpretq_s64_u8(
- vqtbl4q_u8(p, vreinterpretq_u8_s64(pattern.val[0]))));
-#else
- v256 c32 = v256_dup_8(32);
- v256 p32 = v256_sub_8(pattern, c32);
- uint8x8x4_t p = { { vget_low_u8(vreinterpretq_u8_s64(x.val[0])),
- vget_high_u8(vreinterpretq_u8_s64(x.val[0])),
- vget_low_u8(vreinterpretq_u8_s64(x.val[1])),
- vget_high_u8(vreinterpretq_u8_s64(x.val[1])) } };
- uint8x8x4_t q = { { vget_low_u8(vreinterpretq_u8_s64(y.val[0])),
- vget_high_u8(vreinterpretq_u8_s64(y.val[0])),
- vget_low_u8(vreinterpretq_u8_s64(y.val[1])),
- vget_high_u8(vreinterpretq_u8_s64(y.val[1])) } };
- v256 r1 =
- v256_from_64((uint64_t)vreinterpret_s64_u8(vtbl4_u8(
- p, vreinterpret_u8_s64(vget_high_s64(p32.val[1])))),
- (uint64_t)vreinterpret_s64_u8(vtbl4_u8(
- p, vreinterpret_u8_s64(vget_low_s64(p32.val[1])))),
- (uint64_t)vreinterpret_s64_u8(vtbl4_u8(
- p, vreinterpret_u8_s64(vget_high_s64(p32.val[0])))),
- (uint64_t)vreinterpret_s64_u8(vtbl4_u8(
- p, vreinterpret_u8_s64(vget_low_s64(p32.val[0])))));
- v256 r2 =
- v256_from_64((uint64_t)vreinterpret_s64_u8(vtbl4_u8(
- q, vreinterpret_u8_s64(vget_high_s64(pattern.val[1])))),
- (uint64_t)vreinterpret_s64_u8(vtbl4_u8(
- q, vreinterpret_u8_s64(vget_low_s64(pattern.val[1])))),
- (uint64_t)vreinterpret_s64_u8(vtbl4_u8(
- q, vreinterpret_u8_s64(vget_high_s64(pattern.val[0])))),
- (uint64_t)vreinterpret_s64_u8(vtbl4_u8(
- q, vreinterpret_u8_s64(vget_low_s64(pattern.val[0])))));
- return v256_blend_8(r1, r2, v256_cmplt_s8(pattern, c32));
-#endif
-#else
- v128 c16 = v128_dup_8(16);
- v128 c32 = v128_dup_8(32);
- v128 c48 = v128_dup_8(48);
- v128 maskhi16 = v128_cmpgt_s8(c16, pattern.val[1]);
- v128 masklo16 = v128_cmpgt_s8(c16, pattern.val[0]);
- v128 maskhi48 = v128_cmpgt_s8(c48, pattern.val[1]);
- v128 masklo48 = v128_cmpgt_s8(c48, pattern.val[0]);
- v256 r1 = v256_from_v128(
- v128_blend_8(v128_shuffle_8(x.val[1], v128_sub_8(pattern.val[1], c48)),
- v128_shuffle_8(x.val[0], v128_sub_8(pattern.val[1], c32)),
- maskhi48),
- v128_blend_8(v128_shuffle_8(x.val[1], v128_sub_8(pattern.val[0], c48)),
- v128_shuffle_8(x.val[0], v128_sub_8(pattern.val[0], c32)),
- masklo48));
- v256 r2 = v256_from_v128(
- v128_blend_8(v128_shuffle_8(y.val[1], v128_sub_8(pattern.val[1], c16)),
- v128_shuffle_8(y.val[0], pattern.val[1]), maskhi16),
- v128_blend_8(v128_shuffle_8(y.val[1], v128_sub_8(pattern.val[0], c16)),
- v128_shuffle_8(y.val[0], pattern.val[0]), masklo16));
- return v256_blend_8(r1, r2, v256_cmpgt_s8(v256_from_v128(c32, c32), pattern));
-#endif
-}
-
-SIMD_INLINE v256 v256_pshuffle_8(v256 a, v256 pattern) {
- return v256_from_v128(
- v128_shuffle_8(v256_high_v128(a), v256_high_v128(pattern)),
- v128_shuffle_8(v256_low_v128(a), v256_low_v128(pattern)));
-}
-
-SIMD_INLINE v256 v256_shl_8(v256 a, const unsigned int c) {
- return v256_from_v128(v128_shl_8(a.val[1], c), v128_shl_8(a.val[0], c));
-}
-
-SIMD_INLINE v256 v256_shr_u8(v256 a, const unsigned int c) {
- return v256_from_v128(v128_shr_u8(a.val[1], c), v128_shr_u8(a.val[0], c));
-}
-
-SIMD_INLINE v256 v256_shr_s8(v256 a, const unsigned int c) {
- return v256_from_v128(v128_shr_s8(a.val[1], c), v128_shr_s8(a.val[0], c));
-}
-
-SIMD_INLINE v256 v256_shl_16(v256 a, const unsigned int c) {
- return v256_from_v128(v128_shl_16(a.val[1], c), v128_shl_16(a.val[0], c));
-}
-
-SIMD_INLINE v256 v256_shr_u16(v256 a, const unsigned int c) {
- return v256_from_v128(v128_shr_u16(a.val[1], c), v128_shr_u16(a.val[0], c));
-}
-
-SIMD_INLINE v256 v256_shr_s16(v256 a, const unsigned int c) {
- return v256_from_v128(v128_shr_s16(a.val[1], c), v128_shr_s16(a.val[0], c));
-}
-
-SIMD_INLINE v256 v256_shl_32(v256 a, const unsigned int c) {
- return v256_from_v128(v128_shl_32(a.val[1], c), v128_shl_32(a.val[0], c));
-}
-
-SIMD_INLINE v256 v256_shr_u32(v256 a, const unsigned int c) {
- return v256_from_v128(v128_shr_u32(a.val[1], c), v128_shr_u32(a.val[0], c));
-}
-
-SIMD_INLINE v256 v256_shr_s32(v256 a, const unsigned int c) {
- return v256_from_v128(v128_shr_s32(a.val[1], c), v128_shr_s32(a.val[0], c));
-}
-
-SIMD_INLINE v256 v256_shl_64(v256 a, const unsigned int c) {
- return v256_from_v128(v128_shl_64(a.val[1], c), v128_shl_64(a.val[0], c));
-}
-
-SIMD_INLINE v256 v256_shr_u64(v256 a, const unsigned int c) {
- return v256_from_v128(v128_shr_u64(a.val[1], c), v128_shr_u64(a.val[0], c));
-}
-
-SIMD_INLINE v256 v256_shr_s64(v256 a, const unsigned int c) {
- return v256_from_v128(v128_shr_s64(a.val[1], c), v128_shr_s64(a.val[0], c));
-}
-
-/* These intrinsics require immediate values, so we must use #defines
- to enforce that. */
-#define v256_shl_n_byte(a, n) \
- ((n) < 16 ? v256_from_v128(v128_or(v128_shl_n_byte(a.val[1], n), \
- v128_shr_n_byte(a.val[0], 16 - (n))), \
- v128_shl_n_byte(a.val[0], (n))) \
- : v256_from_v128( \
- (n) > 16 ? v128_shl_n_byte(a.val[0], (n)-16) : a.val[0], \
- v128_zero()))
-
-#define v256_shr_n_byte(a, n) \
- ((n) < 16 ? v256_from_v128(v128_shr_n_byte(a.val[1], n), \
- v128_or(v128_shr_n_byte(a.val[0], n), \
- v128_shl_n_byte(a.val[1], 16 - (n)))) \
- : v256_from_v128( \
- v128_zero(), \
- (n) > 16 ? v128_shr_n_byte(a.val[1], (n)-16) : a.val[1]))
-
-#define v256_align(a, b, c) \
- ((c) ? v256_or(v256_shr_n_byte(b, c), v256_shl_n_byte(a, 32 - (c))) : b)
-
-#define v256_shl_n_8(a, n) \
- v256_from_v128(v128_shl_n_8(a.val[1], n), v128_shl_n_8(a.val[0], n))
-#define v256_shl_n_16(a, n) \
- v256_from_v128(v128_shl_n_16(a.val[1], n), v128_shl_n_16(a.val[0], n))
-#define v256_shl_n_32(a, n) \
- v256_from_v128(v128_shl_n_32(a.val[1], n), v128_shl_n_32(a.val[0], n))
-#define v256_shl_n_64(a, n) \
- v256_from_v128(v128_shl_n_64(a.val[1], n), v128_shl_n_64(a.val[0], n))
-#define v256_shr_n_u8(a, n) \
- v256_from_v128(v128_shr_n_u8(a.val[1], n), v128_shr_n_u8(a.val[0], n))
-#define v256_shr_n_u16(a, n) \
- v256_from_v128(v128_shr_n_u16(a.val[1], n), v128_shr_n_u16(a.val[0], n))
-#define v256_shr_n_u32(a, n) \
- v256_from_v128(v128_shr_n_u32(a.val[1], n), v128_shr_n_u32(a.val[0], n))
-#define v256_shr_n_u64(a, n) \
- v256_from_v128(v128_shr_n_u64(a.val[1], n), v128_shr_n_u64(a.val[0], n))
-#define v256_shr_n_s8(a, n) \
- v256_from_v128(v128_shr_n_s8(a.val[1], n), v128_shr_n_s8(a.val[0], n))
-#define v256_shr_n_s16(a, n) \
- v256_from_v128(v128_shr_n_s16(a.val[1], n), v128_shr_n_s16(a.val[0], n))
-#define v256_shr_n_s32(a, n) \
- v256_from_v128(v128_shr_n_s32(a.val[1], n), v128_shr_n_s32(a.val[0], n))
-#define v256_shr_n_s64(a, n) \
- v256_from_v128(v128_shr_n_s64(a.val[1], n), v128_shr_n_s64(a.val[0], n))
-
-#define v256_shr_n_word(a, n) v256_shr_n_byte(a, 2 * (n))
-#define v256_shl_n_word(a, n) v256_shl_n_byte(a, 2 * (n))
-
-typedef struct {
- sad128_internal_u16 val[2];
-} sad256_internal_u16;
-
-SIMD_INLINE sad256_internal_u16 v256_sad_u16_init() {
- sad256_internal_u16 t;
- t.val[1] = v128_sad_u16_init();
- t.val[0] = v128_sad_u16_init();
- return t;
-}
-
-/* Implementation dependent return value. Result must be finalised with
- v256_sad_u16_sum().
- The result for more than 16 v256_sad_u16() calls is undefined. */
-SIMD_INLINE sad256_internal_u16 v256_sad_u16(sad256_internal_u16 s, v256 a,
- v256 b) {
- sad256_internal_u16 t;
- t.val[1] = v128_sad_u16(s.val[1], a.val[1], b.val[1]);
- t.val[0] = v128_sad_u16(s.val[0], a.val[0], b.val[0]);
- return t;
-}
-
-SIMD_INLINE uint32_t v256_sad_u16_sum(sad256_internal_u16 s) {
- return v128_sad_u16_sum(s.val[1]) + v128_sad_u16_sum(s.val[0]);
-}
-
-typedef struct {
- ssd128_internal_s16 val[2];
-} ssd256_internal_s16;
-
-SIMD_INLINE ssd256_internal_s16 v256_ssd_s16_init() {
- ssd256_internal_s16 t;
- t.val[1] = v128_ssd_s16_init();
- t.val[0] = v128_ssd_s16_init();
- return t;
-}
-
-/* Implementation dependent return value. Result must be finalised with
- * v256_ssd_s16_sum(). */
-SIMD_INLINE ssd256_internal_s16 v256_ssd_s16(ssd256_internal_s16 s, v256 a,
- v256 b) {
- ssd256_internal_s16 t;
- t.val[1] = v128_ssd_s16(s.val[1], a.val[1], b.val[1]);
- t.val[0] = v128_ssd_s16(s.val[0], a.val[0], b.val[0]);
- return t;
-}
-
-SIMD_INLINE uint64_t v256_ssd_s16_sum(ssd256_internal_s16 s) {
- return v128_ssd_s16_sum(s.val[1]) + v128_ssd_s16_sum(s.val[0]);
-}
-
-#endif // AOM_AOM_DSP_SIMD_V256_INTRINSICS_V128_H_
diff --git a/third_party/aom/aom_dsp/simd/v256_intrinsics_x86.h b/third_party/aom/aom_dsp/simd/v256_intrinsics_x86.h
deleted file mode 100644
index 44594bc41..000000000
--- a/third_party/aom/aom_dsp/simd/v256_intrinsics_x86.h
+++ /dev/null
@@ -1,750 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_SIMD_V256_INTRINSICS_X86_H_
-#define AOM_AOM_DSP_SIMD_V256_INTRINSICS_X86_H_
-
-#if !defined(__AVX2__)
-
-#include "aom_dsp/simd/v256_intrinsics_v128.h"
-
-#else
-
-// The _m256i type seems to cause problems for g++'s mangling prior to
-// version 5, but adding -fabi-version=0 fixes this.
-#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 5 && \
- defined(__AVX2__) && defined(__cplusplus)
-#pragma GCC optimize "-fabi-version=0"
-#endif
-
-#include <immintrin.h>
-
-#include "aom_dsp/simd/v128_intrinsics_x86.h"
-
-typedef __m256i v256;
-
-SIMD_INLINE uint32_t v256_low_u32(v256 a) {
- return (uint32_t)_mm_cvtsi128_si32(_mm256_extracti128_si256(a, 0));
-}
-
-SIMD_INLINE v64 v256_low_v64(v256 a) {
- return _mm_unpacklo_epi64(_mm256_extracti128_si256(a, 0), v64_zero());
-}
-
-SIMD_INLINE uint64_t v256_low_u64(v256 a) { return v64_u64(v256_low_v64(a)); }
-
-SIMD_INLINE v128 v256_low_v128(v256 a) { return _mm256_castsi256_si128(a); }
-
-SIMD_INLINE v128 v256_high_v128(v256 a) {
- return _mm256_extracti128_si256(a, 1);
-}
-
-SIMD_INLINE v256 v256_from_v128(v128 a, v128 b) {
- // gcc seems to be missing _mm256_set_m128i()
- return _mm256_inserti128_si256(_mm256_castsi128_si256(b), a, 1);
-}
-
-SIMD_INLINE v256 v256_from_v64(v64 a, v64 b, v64 c, v64 d) {
- return v256_from_v128(v128_from_v64(a, b), v128_from_v64(c, d));
-}
-
-SIMD_INLINE v256 v256_from_64(uint64_t a, uint64_t b, uint64_t c, uint64_t d) {
- return v256_from_v128(v128_from_64(a, b), v128_from_64(c, d));
-}
-
-SIMD_INLINE v256 v256_load_aligned(const void *p) {
- return _mm256_load_si256((const __m256i *)p);
-}
-
-SIMD_INLINE v256 v256_load_unaligned(const void *p) {
- return _mm256_loadu_si256((const __m256i *)p);
-}
-
-SIMD_INLINE void v256_store_aligned(void *p, v256 a) {
- _mm256_store_si256((__m256i *)p, a);
-}
-
-SIMD_INLINE void v256_store_unaligned(void *p, v256 a) {
- _mm256_storeu_si256((__m256i *)p, a);
-}
-
-SIMD_INLINE v256 v256_zero() { return _mm256_setzero_si256(); }
-
-SIMD_INLINE v256 v256_dup_8(uint8_t x) { return _mm256_set1_epi8(x); }
-
-SIMD_INLINE v256 v256_dup_16(uint16_t x) { return _mm256_set1_epi16(x); }
-
-SIMD_INLINE v256 v256_dup_32(uint32_t x) { return _mm256_set1_epi32(x); }
-
-SIMD_INLINE v256 v256_dup_64(uint64_t x) { return _mm256_set1_epi64x(x); }
-
-SIMD_INLINE v256 v256_add_8(v256 a, v256 b) { return _mm256_add_epi8(a, b); }
-
-SIMD_INLINE v256 v256_add_16(v256 a, v256 b) { return _mm256_add_epi16(a, b); }
-
-SIMD_INLINE v256 v256_sadd_u8(v256 a, v256 b) { return _mm256_adds_epu8(a, b); }
-
-SIMD_INLINE v256 v256_sadd_s8(v256 a, v256 b) { return _mm256_adds_epi8(a, b); }
-
-SIMD_INLINE v256 v256_sadd_s16(v256 a, v256 b) {
- return _mm256_adds_epi16(a, b);
-}
-
-SIMD_INLINE v256 v256_add_32(v256 a, v256 b) { return _mm256_add_epi32(a, b); }
-
-SIMD_INLINE v256 v256_add_64(v256 a, v256 b) { return _mm256_add_epi64(a, b); }
-
-SIMD_INLINE v256 v256_padd_u8(v256 a) {
- return _mm256_maddubs_epi16(a, _mm256_set1_epi8(1));
-}
-
-SIMD_INLINE v256 v256_padd_s16(v256 a) {
- return _mm256_madd_epi16(a, _mm256_set1_epi16(1));
-}
-
-SIMD_INLINE v256 v256_sub_8(v256 a, v256 b) { return _mm256_sub_epi8(a, b); }
-
-SIMD_INLINE v256 v256_ssub_u8(v256 a, v256 b) { return _mm256_subs_epu8(a, b); }
-
-SIMD_INLINE v256 v256_ssub_s8(v256 a, v256 b) { return _mm256_subs_epi8(a, b); }
-
-SIMD_INLINE v256 v256_sub_16(v256 a, v256 b) { return _mm256_sub_epi16(a, b); }
-
-SIMD_INLINE v256 v256_ssub_s16(v256 a, v256 b) {
- return _mm256_subs_epi16(a, b);
-}
-
-SIMD_INLINE v256 v256_ssub_u16(v256 a, v256 b) {
- return _mm256_subs_epu16(a, b);
-}
-
-SIMD_INLINE v256 v256_sub_32(v256 a, v256 b) { return _mm256_sub_epi32(a, b); }
-
-SIMD_INLINE v256 v256_sub_64(v256 a, v256 b) { return _mm256_sub_epi64(a, b); }
-
-SIMD_INLINE v256 v256_abs_s16(v256 a) { return _mm256_abs_epi16(a); }
-
-SIMD_INLINE v256 v256_abs_s8(v256 a) { return _mm256_abs_epi8(a); }
-
-// AVX doesn't have the direct intrinsics to zip/unzip 8, 16, 32 bit
-// lanes of lower or upper halves of a 256bit vector because the
-// unpack/pack intrinsics operate on the 256 bit input vector as 2
-// independent 128 bit vectors.
-SIMD_INLINE v256 v256_ziplo_8(v256 a, v256 b) {
- return _mm256_unpacklo_epi8(
- _mm256_permute4x64_epi64(b, _MM_SHUFFLE(3, 1, 2, 0)),
- _mm256_permute4x64_epi64(a, _MM_SHUFFLE(3, 1, 2, 0)));
-}
-
-SIMD_INLINE v256 v256_ziphi_8(v256 a, v256 b) {
- return _mm256_unpackhi_epi8(
- _mm256_permute4x64_epi64(b, _MM_SHUFFLE(3, 1, 2, 0)),
- _mm256_permute4x64_epi64(a, _MM_SHUFFLE(3, 1, 2, 0)));
-}
-
-SIMD_INLINE v256 v256_ziplo_16(v256 a, v256 b) {
- return _mm256_unpacklo_epi16(
- _mm256_permute4x64_epi64(b, _MM_SHUFFLE(3, 1, 2, 0)),
- _mm256_permute4x64_epi64(a, _MM_SHUFFLE(3, 1, 2, 0)));
-}
-
-SIMD_INLINE v256 v256_ziphi_16(v256 a, v256 b) {
- return _mm256_unpackhi_epi16(
- _mm256_permute4x64_epi64(b, _MM_SHUFFLE(3, 1, 2, 0)),
- _mm256_permute4x64_epi64(a, _MM_SHUFFLE(3, 1, 2, 0)));
-}
-
-SIMD_INLINE v256 v256_ziplo_32(v256 a, v256 b) {
- return _mm256_unpacklo_epi32(
- _mm256_permute4x64_epi64(b, _MM_SHUFFLE(3, 1, 2, 0)),
- _mm256_permute4x64_epi64(a, _MM_SHUFFLE(3, 1, 2, 0)));
-}
-
-SIMD_INLINE v256 v256_ziphi_32(v256 a, v256 b) {
- return _mm256_unpackhi_epi32(
- _mm256_permute4x64_epi64(b, _MM_SHUFFLE(3, 1, 2, 0)),
- _mm256_permute4x64_epi64(a, _MM_SHUFFLE(3, 1, 2, 0)));
-}
-
-SIMD_INLINE v256 v256_ziplo_64(v256 a, v256 b) {
- return _mm256_unpacklo_epi64(
- _mm256_permute4x64_epi64(b, _MM_SHUFFLE(3, 1, 2, 0)),
- _mm256_permute4x64_epi64(a, _MM_SHUFFLE(3, 1, 2, 0)));
-}
-
-SIMD_INLINE v256 v256_ziphi_64(v256 a, v256 b) {
- return _mm256_unpackhi_epi64(
- _mm256_permute4x64_epi64(b, _MM_SHUFFLE(3, 1, 2, 0)),
- _mm256_permute4x64_epi64(a, _MM_SHUFFLE(3, 1, 2, 0)));
-}
-
-SIMD_INLINE v256 v256_ziplo_128(v256 a, v256 b) {
- return v256_from_v128(v256_low_v128(a), v256_low_v128(b));
-}
-
-SIMD_INLINE v256 v256_ziphi_128(v256 a, v256 b) {
- return v256_from_v128(v256_high_v128(a), v256_high_v128(b));
-}
-
-SIMD_INLINE v256 v256_zip_8(v128 a, v128 b) {
- return v256_from_v128(v128_ziphi_8(a, b), v128_ziplo_8(a, b));
-}
-
-SIMD_INLINE v256 v256_zip_16(v128 a, v128 b) {
- return v256_from_v128(v128_ziphi_16(a, b), v128_ziplo_16(a, b));
-}
-
-SIMD_INLINE v256 v256_zip_32(v128 a, v128 b) {
- return v256_from_v128(v128_ziphi_32(a, b), v128_ziplo_32(a, b));
-}
-
-SIMD_INLINE v256 v256_unziphi_8(v256 a, v256 b) {
- return _mm256_permute4x64_epi64(
- _mm256_packs_epi16(_mm256_srai_epi16(b, 8), _mm256_srai_epi16(a, 8)),
- _MM_SHUFFLE(3, 1, 2, 0));
-}
-
-SIMD_INLINE v256 v256_unziplo_8(v256 a, v256 b) {
- return v256_unziphi_8(_mm256_slli_si256(a, 1), _mm256_slli_si256(b, 1));
-}
-
-SIMD_INLINE v256 v256_unziphi_16(v256 a, v256 b) {
- return _mm256_permute4x64_epi64(
- _mm256_packs_epi32(_mm256_srai_epi32(b, 16), _mm256_srai_epi32(a, 16)),
- _MM_SHUFFLE(3, 1, 2, 0));
-}
-
-SIMD_INLINE v256 v256_unziplo_16(v256 a, v256 b) {
- return v256_unziphi_16(_mm256_slli_si256(a, 2), _mm256_slli_si256(b, 2));
-}
-
-SIMD_INLINE v256 v256_unziphi_32(v256 a, v256 b) {
- return _mm256_permute4x64_epi64(
- _mm256_castps_si256(_mm256_shuffle_ps(_mm256_castsi256_ps(b),
- _mm256_castsi256_ps(a),
- _MM_SHUFFLE(3, 1, 3, 1))),
- _MM_SHUFFLE(3, 1, 2, 0));
-}
-
-SIMD_INLINE v256 v256_unziplo_32(v256 a, v256 b) {
- return _mm256_permute4x64_epi64(
- _mm256_castps_si256(_mm256_shuffle_ps(_mm256_castsi256_ps(b),
- _mm256_castsi256_ps(a),
- _MM_SHUFFLE(2, 0, 2, 0))),
- _MM_SHUFFLE(3, 1, 2, 0));
-}
-
-SIMD_INLINE v256 v256_unziphi_64(v256 a, v256 b) {
- return _mm256_permute4x64_epi64(
- _mm256_castpd_si256(_mm256_shuffle_pd(_mm256_castsi256_pd(b),
- _mm256_castsi256_pd(a), 15)),
- _MM_SHUFFLE(3, 1, 2, 0));
-}
-
-SIMD_INLINE v256 v256_unziplo_64(v256 a, v256 b) {
- return _mm256_permute4x64_epi64(
- _mm256_castpd_si256(
- _mm256_shuffle_pd(_mm256_castsi256_pd(b), _mm256_castsi256_pd(a), 0)),
- _MM_SHUFFLE(3, 1, 2, 0));
-}
-
-SIMD_INLINE v256 v256_unpack_u8_s16(v128 a) {
- return v256_from_v128(v128_unpackhi_u8_s16(a), v128_unpacklo_u8_s16(a));
-}
-
-SIMD_INLINE v256 v256_unpacklo_u8_s16(v256 a) {
- return _mm256_unpacklo_epi8(
- _mm256_permute4x64_epi64(a, _MM_SHUFFLE(3, 1, 2, 0)),
- _mm256_setzero_si256());
-}
-
-SIMD_INLINE v256 v256_unpackhi_u8_s16(v256 a) {
- return _mm256_unpackhi_epi8(
- _mm256_permute4x64_epi64(a, _MM_SHUFFLE(3, 1, 2, 0)),
- _mm256_setzero_si256());
-}
-
-SIMD_INLINE v256 v256_unpack_s8_s16(v128 a) {
- return v256_from_v128(v128_unpackhi_s8_s16(a), v128_unpacklo_s8_s16(a));
-}
-
-SIMD_INLINE v256 v256_unpacklo_s8_s16(v256 a) {
- return _mm256_srai_epi16(
- _mm256_unpacklo_epi8(
- a, _mm256_permute4x64_epi64(a, _MM_SHUFFLE(3, 1, 2, 0))),
- 8);
-}
-
-SIMD_INLINE v256 v256_unpackhi_s8_s16(v256 a) {
- return _mm256_srai_epi16(
- _mm256_unpackhi_epi8(
- a, _mm256_permute4x64_epi64(a, _MM_SHUFFLE(3, 1, 2, 0))),
- 8);
-}
-
-SIMD_INLINE v256 v256_pack_s32_s16(v256 a, v256 b) {
- return _mm256_permute4x64_epi64(_mm256_packs_epi32(b, a),
- _MM_SHUFFLE(3, 1, 2, 0));
-}
-
-SIMD_INLINE v256 v256_pack_s32_u16(v256 a, v256 b) {
- return _mm256_permute4x64_epi64(_mm256_packus_epi32(b, a),
- _MM_SHUFFLE(3, 1, 2, 0));
-}
-
-SIMD_INLINE v256 v256_pack_s16_u8(v256 a, v256 b) {
- return _mm256_permute4x64_epi64(_mm256_packus_epi16(b, a),
- _MM_SHUFFLE(3, 1, 2, 0));
-}
-
-SIMD_INLINE v256 v256_pack_s16_s8(v256 a, v256 b) {
- return _mm256_permute4x64_epi64(_mm256_packs_epi16(b, a),
- _MM_SHUFFLE(3, 1, 2, 0));
-}
-
-SIMD_INLINE v256 v256_unpack_u16_s32(v128 a) {
- return v256_from_v128(v128_unpackhi_u16_s32(a), v128_unpacklo_u16_s32(a));
-}
-
-SIMD_INLINE v256 v256_unpack_s16_s32(v128 a) {
- return v256_from_v128(v128_unpackhi_s16_s32(a), v128_unpacklo_s16_s32(a));
-}
-
-SIMD_INLINE v256 v256_unpacklo_u16_s32(v256 a) {
- return _mm256_unpacklo_epi16(
- _mm256_permute4x64_epi64(a, _MM_SHUFFLE(3, 1, 2, 0)),
- _mm256_setzero_si256());
-}
-
-SIMD_INLINE v256 v256_unpacklo_s16_s32(v256 a) {
- return _mm256_srai_epi32(
- _mm256_unpacklo_epi16(
- a, _mm256_permute4x64_epi64(a, _MM_SHUFFLE(3, 1, 2, 0))),
- 16);
-}
-
-SIMD_INLINE v256 v256_unpackhi_u16_s32(v256 a) {
- return _mm256_unpackhi_epi16(
- _mm256_permute4x64_epi64(a, _MM_SHUFFLE(3, 1, 2, 0)),
- _mm256_setzero_si256());
-}
-
-SIMD_INLINE v256 v256_unpackhi_s16_s32(v256 a) {
- return _mm256_srai_epi32(
- _mm256_unpackhi_epi16(
- a, _mm256_permute4x64_epi64(a, _MM_SHUFFLE(3, 1, 2, 0))),
- 16);
-}
-
-SIMD_INLINE v256 v256_shuffle_8(v256 a, v256 pattern) {
- return _mm256_blendv_epi8(
- _mm256_shuffle_epi8(
- _mm256_permute2x128_si256(a, a, _MM_SHUFFLE(0, 1, 0, 1)), pattern),
- _mm256_shuffle_epi8(
- _mm256_permute2x128_si256(a, a, _MM_SHUFFLE(0, 0, 0, 0)), pattern),
- _mm256_cmpgt_epi8(v256_dup_8(16), pattern));
-}
-
-SIMD_INLINE v256 v256_wideshuffle_8(v256 a, v256 b, v256 pattern) {
- v256 c32 = v256_dup_8(32);
- v256 p32 = v256_sub_8(pattern, c32);
- v256 r1 = _mm256_blendv_epi8(
- _mm256_shuffle_epi8(
- _mm256_permute2x128_si256(a, b, _MM_SHUFFLE(0, 1, 0, 1)), p32),
- _mm256_shuffle_epi8(
- _mm256_permute2x128_si256(a, b, _MM_SHUFFLE(0, 0, 0, 0)), p32),
- _mm256_cmpgt_epi8(v256_dup_8(48), pattern));
- v256 r2 = _mm256_blendv_epi8(
- _mm256_shuffle_epi8(
- _mm256_permute2x128_si256(a, b, _MM_SHUFFLE(0, 3, 0, 3)), pattern),
- _mm256_shuffle_epi8(
- _mm256_permute2x128_si256(a, b, _MM_SHUFFLE(0, 2, 0, 2)), pattern),
- _mm256_cmpgt_epi8(v256_dup_8(16), pattern));
- return _mm256_blendv_epi8(r1, r2, _mm256_cmpgt_epi8(c32, pattern));
-}
-
-SIMD_INLINE v256 v256_pshuffle_8(v256 a, v256 pattern) {
- return _mm256_shuffle_epi8(a, pattern);
-}
-
-SIMD_INLINE int64_t v256_dotp_su8(v256 a, v256 b) {
- v256 t1 = _mm256_madd_epi16(v256_unpackhi_s8_s16(a), v256_unpackhi_u8_s16(b));
- v256 t2 = _mm256_madd_epi16(v256_unpacklo_s8_s16(a), v256_unpacklo_u8_s16(b));
- t1 = _mm256_add_epi32(t1, t2);
- v128 t = _mm_add_epi32(_mm256_extracti128_si256(t1, 0),
- _mm256_extracti128_si256(t1, 1));
- t = _mm_add_epi32(t, _mm_srli_si128(t, 8));
- t = _mm_add_epi32(t, _mm_srli_si128(t, 4));
- return (int32_t)v128_low_u32(t);
-}
-
-SIMD_INLINE int64_t v256_dotp_s16(v256 a, v256 b) {
- v256 r = _mm256_madd_epi16(a, b);
-#if defined(__x86_64__)
- v128 t;
- r = _mm256_add_epi64(_mm256_cvtepi32_epi64(v256_high_v128(r)),
- _mm256_cvtepi32_epi64(v256_low_v128(r)));
- t = v256_low_v128(_mm256_add_epi64(
- r, _mm256_permute2x128_si256(r, r, _MM_SHUFFLE(2, 0, 0, 1))));
- return _mm_cvtsi128_si64(_mm_add_epi64(t, _mm_srli_si128(t, 8)));
-#else
- v128 l = v256_low_v128(r);
- v128 h = v256_high_v128(r);
- return (int64_t)_mm_cvtsi128_si32(l) +
- (int64_t)_mm_cvtsi128_si32(_mm_srli_si128(l, 4)) +
- (int64_t)_mm_cvtsi128_si32(_mm_srli_si128(l, 8)) +
- (int64_t)_mm_cvtsi128_si32(_mm_srli_si128(l, 12)) +
- (int64_t)_mm_cvtsi128_si32(h) +
- (int64_t)_mm_cvtsi128_si32(_mm_srli_si128(h, 4)) +
- (int64_t)_mm_cvtsi128_si32(_mm_srli_si128(h, 8)) +
- (int64_t)_mm_cvtsi128_si32(_mm_srli_si128(h, 12));
-#endif
-}
-
-SIMD_INLINE int64_t v256_dotp_s32(v256 a, v256 b) {
- v256 r = _mm256_mullo_epi32(a, b);
-#if defined(__x86_64__)
- v128 t;
- r = _mm256_add_epi64(_mm256_cvtepi32_epi64(v256_high_v128(r)),
- _mm256_cvtepi32_epi64(v256_low_v128(r)));
- t = v256_low_v128(_mm256_add_epi64(
- r, _mm256_permute2x128_si256(r, r, _MM_SHUFFLE(2, 0, 0, 1))));
- return _mm_cvtsi128_si64(_mm_add_epi64(t, _mm_srli_si128(t, 8)));
-#else
- v128 l = v256_low_v128(r);
- v128 h = v256_high_v128(r);
- return (int64_t)_mm_cvtsi128_si32(l) +
- (int64_t)_mm_cvtsi128_si32(_mm_srli_si128(l, 4)) +
- (int64_t)_mm_cvtsi128_si32(_mm_srli_si128(l, 8)) +
- (int64_t)_mm_cvtsi128_si32(_mm_srli_si128(l, 12)) +
- (int64_t)_mm_cvtsi128_si32(h) +
- (int64_t)_mm_cvtsi128_si32(_mm_srli_si128(h, 4)) +
- (int64_t)_mm_cvtsi128_si32(_mm_srli_si128(h, 8)) +
- (int64_t)_mm_cvtsi128_si32(_mm_srli_si128(h, 12));
-#endif
-}
-
-SIMD_INLINE uint64_t v256_hadd_u8(v256 a) {
- v256 t = _mm256_sad_epu8(a, _mm256_setzero_si256());
- v128 lo = v256_low_v128(t);
- v128 hi = v256_high_v128(t);
- lo = v128_add_32(lo, hi);
- return v64_low_u32(v128_low_v64(lo)) + v128_low_u32(v128_high_v64(lo));
-}
-
-typedef v256 sad256_internal;
-
-SIMD_INLINE sad256_internal v256_sad_u8_init() {
- return _mm256_setzero_si256();
-}
-
-/* Implementation dependent return value. Result must be finalised with
- v256_sad_u8_sum().
- The result for more than 32 v256_sad_u8() calls is undefined. */
-SIMD_INLINE sad256_internal v256_sad_u8(sad256_internal s, v256 a, v256 b) {
- return _mm256_add_epi64(s, _mm256_sad_epu8(a, b));
-}
-
-SIMD_INLINE uint32_t v256_sad_u8_sum(sad256_internal s) {
- v256 t = _mm256_add_epi32(s, _mm256_unpackhi_epi64(s, s));
- return v128_low_u32(_mm_add_epi32(v256_high_v128(t), v256_low_v128(t)));
-}
-
-typedef v256 ssd256_internal;
-
-SIMD_INLINE ssd256_internal v256_ssd_u8_init() {
- return _mm256_setzero_si256();
-}
-
-/* Implementation dependent return value. Result must be finalised with
- * v256_ssd_u8_sum(). */
-SIMD_INLINE ssd256_internal v256_ssd_u8(ssd256_internal s, v256 a, v256 b) {
- v256 l = _mm256_sub_epi16(_mm256_unpacklo_epi8(a, _mm256_setzero_si256()),
- _mm256_unpacklo_epi8(b, _mm256_setzero_si256()));
- v256 h = _mm256_sub_epi16(_mm256_unpackhi_epi8(a, _mm256_setzero_si256()),
- _mm256_unpackhi_epi8(b, _mm256_setzero_si256()));
- v256 rl = _mm256_madd_epi16(l, l);
- v256 rh = _mm256_madd_epi16(h, h);
- v128 c = _mm_cvtsi32_si128(32);
- rl = _mm256_add_epi32(rl, _mm256_srli_si256(rl, 8));
- rl = _mm256_add_epi32(rl, _mm256_srli_si256(rl, 4));
- rh = _mm256_add_epi32(rh, _mm256_srli_si256(rh, 8));
- rh = _mm256_add_epi32(rh, _mm256_srli_si256(rh, 4));
- return _mm256_add_epi64(
- s,
- _mm256_srl_epi64(_mm256_sll_epi64(_mm256_unpacklo_epi64(rl, rh), c), c));
-}
-
-SIMD_INLINE uint32_t v256_ssd_u8_sum(ssd256_internal s) {
- v256 t = _mm256_add_epi32(s, _mm256_unpackhi_epi64(s, s));
- return v128_low_u32(_mm_add_epi32(v256_high_v128(t), v256_low_v128(t)));
-}
-
-SIMD_INLINE v256 v256_or(v256 a, v256 b) { return _mm256_or_si256(a, b); }
-
-SIMD_INLINE v256 v256_xor(v256 a, v256 b) { return _mm256_xor_si256(a, b); }
-
-SIMD_INLINE v256 v256_and(v256 a, v256 b) { return _mm256_and_si256(a, b); }
-
-SIMD_INLINE v256 v256_andn(v256 a, v256 b) { return _mm256_andnot_si256(b, a); }
-
-SIMD_INLINE v256 v256_mul_s16(v64 a, v64 b) {
- v128 lo_bits = v128_mullo_s16(a, b);
- v128 hi_bits = v128_mulhi_s16(a, b);
- return v256_from_v128(v128_ziphi_16(hi_bits, lo_bits),
- v128_ziplo_16(hi_bits, lo_bits));
-}
-
-SIMD_INLINE v256 v256_mullo_s16(v256 a, v256 b) {
- return _mm256_mullo_epi16(a, b);
-}
-
-SIMD_INLINE v256 v256_mulhi_s16(v256 a, v256 b) {
- return _mm256_mulhi_epi16(a, b);
-}
-
-SIMD_INLINE v256 v256_mullo_s32(v256 a, v256 b) {
- return _mm256_mullo_epi32(a, b);
-}
-
-SIMD_INLINE v256 v256_madd_s16(v256 a, v256 b) {
- return _mm256_madd_epi16(a, b);
-}
-
-SIMD_INLINE v256 v256_madd_us8(v256 a, v256 b) {
- return _mm256_maddubs_epi16(a, b);
-}
-
-SIMD_INLINE v256 v256_avg_u8(v256 a, v256 b) { return _mm256_avg_epu8(a, b); }
-
-SIMD_INLINE v256 v256_rdavg_u8(v256 a, v256 b) {
- return _mm256_sub_epi8(
- _mm256_avg_epu8(a, b),
- _mm256_and_si256(_mm256_xor_si256(a, b), v256_dup_8(1)));
-}
-
-SIMD_INLINE v256 v256_rdavg_u16(v256 a, v256 b) {
- return _mm256_sub_epi16(
- _mm256_avg_epu16(a, b),
- _mm256_and_si256(_mm256_xor_si256(a, b), v256_dup_16(1)));
-}
-
-SIMD_INLINE v256 v256_avg_u16(v256 a, v256 b) { return _mm256_avg_epu16(a, b); }
-
-SIMD_INLINE v256 v256_min_u8(v256 a, v256 b) { return _mm256_min_epu8(a, b); }
-
-SIMD_INLINE v256 v256_max_u8(v256 a, v256 b) { return _mm256_max_epu8(a, b); }
-
-SIMD_INLINE v256 v256_min_s8(v256 a, v256 b) { return _mm256_min_epi8(a, b); }
-
-SIMD_INLINE uint32_t v256_movemask_8(v256 a) { return _mm256_movemask_epi8(a); }
-
-SIMD_INLINE v256 v256_blend_8(v256 a, v256 b, v256 c) {
- return _mm256_blendv_epi8(a, b, c);
-}
-
-SIMD_INLINE v256 v256_max_s8(v256 a, v256 b) { return _mm256_max_epi8(a, b); }
-
-SIMD_INLINE v256 v256_min_s16(v256 a, v256 b) { return _mm256_min_epi16(a, b); }
-
-SIMD_INLINE v256 v256_max_s16(v256 a, v256 b) { return _mm256_max_epi16(a, b); }
-
-SIMD_INLINE v256 v256_min_s32(v256 a, v256 b) { return _mm256_min_epi32(a, b); }
-
-SIMD_INLINE v256 v256_max_s32(v256 a, v256 b) { return _mm256_max_epi32(a, b); }
-
-SIMD_INLINE v256 v256_cmpgt_s8(v256 a, v256 b) {
- return _mm256_cmpgt_epi8(a, b);
-}
-
-SIMD_INLINE v256 v256_cmplt_s8(v256 a, v256 b) {
- return _mm256_cmpgt_epi8(b, a);
-}
-
-SIMD_INLINE v256 v256_cmpeq_8(v256 a, v256 b) {
- return _mm256_cmpeq_epi8(a, b);
-}
-
-SIMD_INLINE v256 v256_cmpgt_s16(v256 a, v256 b) {
- return _mm256_cmpgt_epi16(a, b);
-}
-
-SIMD_INLINE v256 v256_cmplt_s16(v256 a, v256 b) {
- return _mm256_cmpgt_epi16(b, a);
-}
-
-SIMD_INLINE v256 v256_cmpeq_16(v256 a, v256 b) {
- return _mm256_cmpeq_epi16(a, b);
-}
-
-SIMD_INLINE v256 v256_cmpgt_s32(v256 a, v256 b) {
- return _mm256_cmpgt_epi32(a, b);
-}
-
-SIMD_INLINE v256 v256_cmplt_s32(v256 a, v256 b) {
- return _mm256_cmpgt_epi32(b, a);
-}
-
-SIMD_INLINE v256 v256_cmpeq_32(v256 a, v256 b) {
- return _mm256_cmpeq_epi32(a, b);
-}
-
-SIMD_INLINE v256 v256_shl_8(v256 a, unsigned int c) {
- return _mm256_and_si256(_mm256_set1_epi8((uint8_t)(0xff << c)),
- _mm256_sll_epi16(a, _mm_cvtsi32_si128(c)));
-}
-
-SIMD_INLINE v256 v256_shr_u8(v256 a, unsigned int c) {
- return _mm256_and_si256(_mm256_set1_epi8(0xff >> c),
- _mm256_srl_epi16(a, _mm_cvtsi32_si128(c)));
-}
-
-SIMD_INLINE v256 v256_shr_s8(v256 a, unsigned int c) {
- __m128i x = _mm_cvtsi32_si128(c + 8);
- return _mm256_packs_epi16(_mm256_sra_epi16(_mm256_unpacklo_epi8(a, a), x),
- _mm256_sra_epi16(_mm256_unpackhi_epi8(a, a), x));
-}
-
-SIMD_INLINE v256 v256_shl_16(v256 a, unsigned int c) {
- return _mm256_sll_epi16(a, _mm_cvtsi32_si128(c));
-}
-
-SIMD_INLINE v256 v256_shr_u16(v256 a, unsigned int c) {
- return _mm256_srl_epi16(a, _mm_cvtsi32_si128(c));
-}
-
-SIMD_INLINE v256 v256_shr_s16(v256 a, unsigned int c) {
- return _mm256_sra_epi16(a, _mm_cvtsi32_si128(c));
-}
-
-SIMD_INLINE v256 v256_shl_32(v256 a, unsigned int c) {
- return _mm256_sll_epi32(a, _mm_cvtsi32_si128(c));
-}
-
-SIMD_INLINE v256 v256_shr_u32(v256 a, unsigned int c) {
- return _mm256_srl_epi32(a, _mm_cvtsi32_si128(c));
-}
-
-SIMD_INLINE v256 v256_shr_s32(v256 a, unsigned int c) {
- return _mm256_sra_epi32(a, _mm_cvtsi32_si128(c));
-}
-
-SIMD_INLINE v256 v256_shl_64(v256 a, unsigned int c) {
- return _mm256_sll_epi64(a, _mm_cvtsi32_si128(c));
-}
-
-SIMD_INLINE v256 v256_shr_u64(v256 a, unsigned int c) {
- return _mm256_srl_epi64(a, _mm_cvtsi32_si128(c));
-}
-
-SIMD_INLINE v256 v256_shr_s64(v256 a, unsigned int c) {
-#if defined(__AVX512F__)
- return _mm256_sra_epi64(a, _mm_cvtsi32_si128(c));
-#else
- return v256_from_v128(v128_shr_s64(v256_high_v128(a), c),
- v128_shr_s64(v256_low_v128(a), c));
-#endif
-}
-
-/* These intrinsics require immediate values, so we must use #defines
- to enforce that. */
-// _mm256_slli_si256 works on 128 bit lanes and can't be used
-#define v256_shl_n_byte(a, n) \
- ((n) < 16 ? v256_from_v128( \
- v128_align(v256_high_v128(a), v256_low_v128(a), 16 - (n)), \
- v128_shl_n_byte(v256_low_v128(a), n)) \
- : _mm256_inserti128_si256( \
- _mm256_setzero_si256(), \
- v128_shl_n_byte(v256_low_v128(a), (n)-16), 1))
-
-// _mm256_srli_si256 works on 128 bit lanes and can't be used
-#define v256_shr_n_byte(a, n) \
- ((n) < 16 \
- ? _mm256_alignr_epi8( \
- _mm256_permute2x128_si256(a, a, _MM_SHUFFLE(2, 0, 0, 1)), a, n) \
- : _mm256_inserti128_si256( \
- _mm256_setzero_si256(), \
- v128_align(v256_high_v128(a), v256_high_v128(a), n), 0))
-
-// _mm256_alignr_epi8 works on two 128 bit lanes and can't be used
-#define v256_align(a, b, c) \
- ((c) ? v256_or(v256_shr_n_byte(b, c), v256_shl_n_byte(a, 32 - c)) : b)
-
-#define v256_shl_n_8(a, c) \
- _mm256_and_si256(_mm256_set1_epi8((uint8_t)(0xff << (c))), \
- _mm256_slli_epi16(a, c))
-#define v256_shr_n_u8(a, c) \
- _mm256_and_si256(_mm256_set1_epi8(0xff >> (c)), _mm256_srli_epi16(a, c))
-#define v256_shr_n_s8(a, c) \
- _mm256_packs_epi16(_mm256_srai_epi16(_mm256_unpacklo_epi8(a, a), (c) + 8), \
- _mm256_srai_epi16(_mm256_unpackhi_epi8(a, a), (c) + 8))
-#define v256_shl_n_16(a, c) _mm256_slli_epi16(a, c)
-#define v256_shr_n_u16(a, c) _mm256_srli_epi16(a, c)
-#define v256_shr_n_s16(a, c) _mm256_srai_epi16(a, c)
-#define v256_shl_n_32(a, c) _mm256_slli_epi32(a, c)
-#define v256_shr_n_u32(a, c) _mm256_srli_epi32(a, c)
-#define v256_shr_n_s32(a, c) _mm256_srai_epi32(a, c)
-#define v256_shl_n_64(a, c) _mm256_slli_epi64(a, c)
-#define v256_shr_n_u64(a, c) _mm256_srli_epi64(a, c)
-#define v256_shr_n_s64(a, c) \
- v256_shr_s64((a), (c)) // _mm256_srai_epi64 broken in gcc?
-#define v256_shr_n_word(a, n) v256_shr_n_byte(a, 2 * (n))
-#define v256_shl_n_word(a, n) v256_shl_n_byte(a, 2 * (n))
-
-typedef v256 sad256_internal_u16;
-
-SIMD_INLINE sad256_internal_u16 v256_sad_u16_init() { return v256_zero(); }
-
-/* Implementation dependent return value. Result must be finalised with
- * v256_sad_u16_sum(). */
-SIMD_INLINE sad256_internal_u16 v256_sad_u16(sad256_internal_u16 s, v256 a,
- v256 b) {
-#if defined(__SSE4_1__)
- v256 t = v256_sub_16(_mm256_max_epu16(a, b), _mm256_min_epu16(a, b));
-#else
- v256 t = v256_cmplt_s16(v256_xor(a, v256_dup_16(32768)),
- v256_xor(b, v256_dup_16(32768)));
- t = v256_sub_16(v256_or(v256_and(b, t), v256_andn(a, t)),
- v256_or(v256_and(a, t), v256_andn(b, t)));
-#endif
- return v256_add_32(
- s, v256_add_32(v256_unpackhi_u16_s32(t), v256_unpacklo_u16_s32(t)));
-}
-
-SIMD_INLINE uint32_t v256_sad_u16_sum(sad256_internal_u16 s) {
- v128 t = v128_add_32(v256_high_v128(s), v256_low_v128(s));
- return v128_low_u32(t) + v128_low_u32(v128_shr_n_byte(t, 4)) +
- v128_low_u32(v128_shr_n_byte(t, 8)) +
- v128_low_u32(v128_shr_n_byte(t, 12));
-}
-
-typedef v256 ssd256_internal_s16;
-
-SIMD_INLINE ssd256_internal_s16 v256_ssd_s16_init() { return v256_zero(); }
-
-/* Implementation dependent return value. Result must be finalised with
- * v256_ssd_s16_sum(). */
-SIMD_INLINE ssd256_internal_s16 v256_ssd_s16(ssd256_internal_s16 s, v256 a,
- v256 b) {
- v256 d = v256_sub_16(a, b);
- d = v256_madd_s16(d, d);
- return v256_add_64(s, v256_add_64(_mm256_unpackhi_epi32(d, v256_zero()),
- _mm256_unpacklo_epi32(d, v256_zero())));
-}
-
-SIMD_INLINE uint64_t v256_ssd_s16_sum(ssd256_internal_s16 s) {
- v128 t = v128_add_64(v256_high_v128(s), v256_low_v128(s));
- return v64_u64(v128_low_v64(t)) + v64_u64(v128_high_v64(t));
-}
-
-#endif
-
-#endif // AOM_AOM_DSP_SIMD_V256_INTRINSICS_X86_H_
diff --git a/third_party/aom/aom_dsp/simd/v64_intrinsics.h b/third_party/aom/aom_dsp/simd/v64_intrinsics.h
deleted file mode 100644
index afc55428d..000000000
--- a/third_party/aom/aom_dsp/simd/v64_intrinsics.h
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_SIMD_V64_INTRINSICS_H_
-#define AOM_AOM_DSP_SIMD_V64_INTRINSICS_H_
-
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "aom_dsp/simd/v64_intrinsics_c.h"
-
-/* Fallback to plain, unoptimised C. */
-
-typedef c_v64 v64;
-
-SIMD_INLINE uint32_t v64_low_u32(v64 a) { return c_v64_low_u32(a); }
-SIMD_INLINE uint32_t v64_high_u32(v64 a) { return c_v64_high_u32(a); }
-SIMD_INLINE int32_t v64_low_s32(v64 a) { return c_v64_low_s32(a); }
-SIMD_INLINE int32_t v64_high_s32(v64 a) { return c_v64_high_s32(a); }
-SIMD_INLINE v64 v64_from_32(uint32_t x, uint32_t y) {
- return c_v64_from_32(x, y);
-}
-SIMD_INLINE v64 v64_from_64(uint64_t x) { return c_v64_from_64(x); }
-SIMD_INLINE uint64_t v64_u64(v64 x) { return c_v64_u64(x); }
-SIMD_INLINE v64 v64_from_16(uint16_t a, uint16_t b, uint16_t c, uint16_t d) {
- return c_v64_from_16(a, b, c, d);
-}
-
-SIMD_INLINE uint32_t u32_load_unaligned(const void *p) {
- return c_u32_load_unaligned(p);
-}
-SIMD_INLINE uint32_t u32_load_aligned(const void *p) {
- return c_u32_load_aligned(p);
-}
-SIMD_INLINE void u32_store_unaligned(void *p, uint32_t a) {
- c_u32_store_unaligned(p, a);
-}
-SIMD_INLINE void u32_store_aligned(void *p, uint32_t a) {
- c_u32_store_aligned(p, a);
-}
-
-SIMD_INLINE v64 v64_load_unaligned(const void *p) {
- return c_v64_load_unaligned(p);
-}
-SIMD_INLINE v64 v64_load_aligned(const void *p) {
- return c_v64_load_aligned(p);
-}
-
-SIMD_INLINE void v64_store_unaligned(void *p, v64 a) {
- c_v64_store_unaligned(p, a);
-}
-SIMD_INLINE void v64_store_aligned(void *p, v64 a) {
- c_v64_store_aligned(p, a);
-}
-
-SIMD_INLINE v64 v64_align(v64 a, v64 b, unsigned int c) {
- return c_v64_align(a, b, c);
-}
-
-SIMD_INLINE v64 v64_zero() { return c_v64_zero(); }
-SIMD_INLINE v64 v64_dup_8(uint8_t x) { return c_v64_dup_8(x); }
-SIMD_INLINE v64 v64_dup_16(uint16_t x) { return c_v64_dup_16(x); }
-SIMD_INLINE v64 v64_dup_32(uint32_t x) { return c_v64_dup_32(x); }
-
-SIMD_INLINE v64 v64_add_8(v64 a, v64 b) { return c_v64_add_8(a, b); }
-SIMD_INLINE v64 v64_add_16(v64 a, v64 b) { return c_v64_add_16(a, b); }
-SIMD_INLINE v64 v64_sadd_u8(v64 a, v64 b) { return c_v64_sadd_u8(a, b); }
-SIMD_INLINE v64 v64_sadd_s8(v64 a, v64 b) { return c_v64_sadd_s8(a, b); }
-SIMD_INLINE v64 v64_sadd_s16(v64 a, v64 b) { return c_v64_sadd_s16(a, b); }
-SIMD_INLINE v64 v64_add_32(v64 a, v64 b) { return c_v64_add_32(a, b); }
-SIMD_INLINE v64 v64_sub_8(v64 a, v64 b) { return c_v64_sub_8(a, b); }
-SIMD_INLINE v64 v64_ssub_u8(v64 a, v64 b) { return c_v64_ssub_u8(a, b); }
-SIMD_INLINE v64 v64_ssub_s8(v64 a, v64 b) { return c_v64_ssub_s8(a, b); }
-SIMD_INLINE v64 v64_sub_16(v64 a, v64 b) { return c_v64_sub_16(a, b); }
-SIMD_INLINE v64 v64_ssub_s16(v64 a, v64 b) { return c_v64_ssub_s16(a, b); }
-SIMD_INLINE v64 v64_ssub_u16(v64 a, v64 b) { return c_v64_ssub_u16(a, b); }
-SIMD_INLINE v64 v64_sub_32(v64 a, v64 b) { return c_v64_sub_32(a, b); }
-SIMD_INLINE v64 v64_abs_s16(v64 a) { return c_v64_abs_s16(a); }
-SIMD_INLINE v64 v64_abs_s8(v64 a) { return c_v64_abs_s8(a); }
-
-SIMD_INLINE v64 v64_ziplo_8(v64 a, v64 b) { return c_v64_ziplo_8(a, b); }
-SIMD_INLINE v64 v64_ziphi_8(v64 a, v64 b) { return c_v64_ziphi_8(a, b); }
-SIMD_INLINE v64 v64_ziplo_16(v64 a, v64 b) { return c_v64_ziplo_16(a, b); }
-SIMD_INLINE v64 v64_ziphi_16(v64 a, v64 b) { return c_v64_ziphi_16(a, b); }
-SIMD_INLINE v64 v64_ziplo_32(v64 a, v64 b) { return c_v64_ziplo_32(a, b); }
-SIMD_INLINE v64 v64_ziphi_32(v64 a, v64 b) { return c_v64_ziphi_32(a, b); }
-SIMD_INLINE v64 v64_unziplo_8(v64 a, v64 b) { return c_v64_unziplo_8(a, b); }
-SIMD_INLINE v64 v64_unziphi_8(v64 a, v64 b) { return c_v64_unziphi_8(a, b); }
-SIMD_INLINE v64 v64_unziplo_16(v64 a, v64 b) { return c_v64_unziplo_16(a, b); }
-SIMD_INLINE v64 v64_unziphi_16(v64 a, v64 b) { return c_v64_unziphi_16(a, b); }
-SIMD_INLINE v64 v64_unpacklo_u8_s16(v64 a) { return c_v64_unpacklo_u8_s16(a); }
-SIMD_INLINE v64 v64_unpackhi_u8_s16(v64 a) { return c_v64_unpackhi_u8_s16(a); }
-SIMD_INLINE v64 v64_unpacklo_s8_s16(v64 a) { return c_v64_unpacklo_s8_s16(a); }
-SIMD_INLINE v64 v64_unpackhi_s8_s16(v64 a) { return c_v64_unpackhi_s8_s16(a); }
-SIMD_INLINE v64 v64_pack_s32_s16(v64 a, v64 b) {
- return c_v64_pack_s32_s16(a, b);
-}
-SIMD_INLINE v64 v64_pack_s32_u16(v64 a, v64 b) {
- return c_v64_pack_s32_u16(a, b);
-}
-SIMD_INLINE v64 v64_pack_s16_u8(v64 a, v64 b) {
- return c_v64_pack_s16_u8(a, b);
-}
-SIMD_INLINE v64 v64_pack_s16_s8(v64 a, v64 b) {
- return c_v64_pack_s16_s8(a, b);
-}
-SIMD_INLINE v64 v64_unpacklo_u16_s32(v64 a) {
- return c_v64_unpacklo_u16_s32(a);
-}
-SIMD_INLINE v64 v64_unpacklo_s16_s32(v64 a) {
- return c_v64_unpacklo_s16_s32(a);
-}
-SIMD_INLINE v64 v64_unpackhi_u16_s32(v64 a) {
- return c_v64_unpackhi_u16_s32(a);
-}
-SIMD_INLINE v64 v64_unpackhi_s16_s32(v64 a) {
- return c_v64_unpackhi_s16_s32(a);
-}
-SIMD_INLINE v64 v64_shuffle_8(v64 a, v64 pattern) {
- return c_v64_shuffle_8(a, pattern);
-}
-
-typedef uint32_t sad64_internal;
-SIMD_INLINE sad64_internal v64_sad_u8_init() { return c_v64_sad_u8_init(); }
-SIMD_INLINE sad64_internal v64_sad_u8(sad64_internal s, v64 a, v64 b) {
- return c_v64_sad_u8(s, a, b);
-}
-SIMD_INLINE uint32_t v64_sad_u8_sum(sad64_internal s) {
- return c_v64_sad_u8_sum(s);
-}
-typedef uint32_t ssd64_internal;
-SIMD_INLINE ssd64_internal v64_ssd_u8_init() { return c_v64_ssd_u8_init(); }
-SIMD_INLINE ssd64_internal v64_ssd_u8(ssd64_internal s, v64 a, v64 b) {
- return c_v64_ssd_u8(s, a, b);
-}
-SIMD_INLINE uint32_t v64_ssd_u8_sum(ssd64_internal s) {
- return c_v64_ssd_u8_sum(s);
-}
-SIMD_INLINE int64_t v64_dotp_su8(v64 a, v64 b) { return c_v64_dotp_su8(a, b); }
-SIMD_INLINE int64_t v64_dotp_s16(v64 a, v64 b) { return c_v64_dotp_s16(a, b); }
-SIMD_INLINE uint64_t v64_hadd_u8(v64 a) { return c_v64_hadd_u8(a); }
-SIMD_INLINE int64_t v64_hadd_s16(v64 a) { return c_v64_hadd_s16(a); }
-
-SIMD_INLINE v64 v64_or(v64 a, v64 b) { return c_v64_or(a, b); }
-SIMD_INLINE v64 v64_xor(v64 a, v64 b) { return c_v64_xor(a, b); }
-SIMD_INLINE v64 v64_and(v64 a, v64 b) { return c_v64_and(a, b); }
-SIMD_INLINE v64 v64_andn(v64 a, v64 b) { return c_v64_andn(a, b); }
-
-SIMD_INLINE v64 v64_mullo_s16(v64 a, v64 b) { return c_v64_mullo_s16(a, b); }
-SIMD_INLINE v64 v64_mulhi_s16(v64 a, v64 b) { return c_v64_mulhi_s16(a, b); }
-SIMD_INLINE v64 v64_mullo_s32(v64 a, v64 b) { return c_v64_mullo_s32(a, b); }
-SIMD_INLINE v64 v64_madd_s16(v64 a, v64 b) { return c_v64_madd_s16(a, b); }
-SIMD_INLINE v64 v64_madd_us8(v64 a, v64 b) { return c_v64_madd_us8(a, b); }
-
-SIMD_INLINE v64 v64_avg_u8(v64 a, v64 b) { return c_v64_avg_u8(a, b); }
-SIMD_INLINE v64 v64_rdavg_u8(v64 a, v64 b) { return c_v64_rdavg_u8(a, b); }
-SIMD_INLINE v64 v64_rdavg_u16(v64 a, v64 b) { return c_v64_rdavg_u16(a, b); }
-SIMD_INLINE v64 v64_avg_u16(v64 a, v64 b) { return c_v64_avg_u16(a, b); }
-SIMD_INLINE v64 v64_min_u8(v64 a, v64 b) { return c_v64_min_u8(a, b); }
-SIMD_INLINE v64 v64_max_u8(v64 a, v64 b) { return c_v64_max_u8(a, b); }
-SIMD_INLINE v64 v64_min_s8(v64 a, v64 b) { return c_v64_min_s8(a, b); }
-SIMD_INLINE v64 v64_max_s8(v64 a, v64 b) { return c_v64_max_s8(a, b); }
-SIMD_INLINE v64 v64_min_s16(v64 a, v64 b) { return c_v64_min_s16(a, b); }
-SIMD_INLINE v64 v64_max_s16(v64 a, v64 b) { return c_v64_max_s16(a, b); }
-
-SIMD_INLINE v64 v64_cmpgt_s8(v64 a, v64 b) { return c_v64_cmpgt_s8(a, b); }
-SIMD_INLINE v64 v64_cmplt_s8(v64 a, v64 b) { return c_v64_cmplt_s8(a, b); }
-SIMD_INLINE v64 v64_cmpeq_8(v64 a, v64 b) { return c_v64_cmpeq_8(a, b); }
-SIMD_INLINE v64 v64_cmpgt_s16(v64 a, v64 b) { return c_v64_cmpgt_s16(a, b); }
-SIMD_INLINE v64 v64_cmplt_s16(v64 a, v64 b) { return c_v64_cmplt_s16(a, b); }
-SIMD_INLINE v64 v64_cmpeq_16(v64 a, v64 b) { return c_v64_cmpeq_16(a, b); }
-
-SIMD_INLINE v64 v64_shl_8(v64 a, unsigned int n) { return c_v64_shl_8(a, n); }
-SIMD_INLINE v64 v64_shr_u8(v64 a, unsigned int n) { return c_v64_shr_u8(a, n); }
-SIMD_INLINE v64 v64_shr_s8(v64 a, unsigned int n) { return c_v64_shr_s8(a, n); }
-SIMD_INLINE v64 v64_shl_16(v64 a, unsigned int n) { return c_v64_shl_16(a, n); }
-SIMD_INLINE v64 v64_shr_u16(v64 a, unsigned int n) {
- return c_v64_shr_u16(a, n);
-}
-SIMD_INLINE v64 v64_shr_s16(v64 a, unsigned int n) {
- return c_v64_shr_s16(a, n);
-}
-SIMD_INLINE v64 v64_shl_32(v64 a, unsigned int n) { return c_v64_shl_32(a, n); }
-SIMD_INLINE v64 v64_shr_u32(v64 a, unsigned int n) {
- return c_v64_shr_u32(a, n);
-}
-SIMD_INLINE v64 v64_shr_s32(v64 a, unsigned int n) {
- return c_v64_shr_s32(a, n);
-}
-SIMD_INLINE v64 v64_shr_n_byte(v64 a, unsigned int n) {
- return c_v64_shr_n_byte(a, n);
-}
-SIMD_INLINE v64 v64_shl_n_byte(v64 a, unsigned int n) {
- return c_v64_shl_n_byte(a, n);
-}
-SIMD_INLINE v64 v64_shl_n_8(v64 a, unsigned int c) {
- return c_v64_shl_n_8(a, c);
-}
-SIMD_INLINE v64 v64_shr_n_u8(v64 a, unsigned int c) {
- return c_v64_shr_n_u8(a, c);
-}
-SIMD_INLINE v64 v64_shr_n_s8(v64 a, unsigned int c) {
- return c_v64_shr_n_s8(a, c);
-}
-SIMD_INLINE v64 v64_shl_n_16(v64 a, unsigned int c) {
- return c_v64_shl_n_16(a, c);
-}
-SIMD_INLINE v64 v64_shr_n_u16(v64 a, unsigned int c) {
- return c_v64_shr_n_u16(a, c);
-}
-SIMD_INLINE v64 v64_shr_n_s16(v64 a, unsigned int c) {
- return c_v64_shr_n_s16(a, c);
-}
-SIMD_INLINE v64 v64_shl_n_32(v64 a, unsigned int c) {
- return c_v64_shl_n_32(a, c);
-}
-SIMD_INLINE v64 v64_shr_n_u32(v64 a, unsigned int c) {
- return c_v64_shr_n_u32(a, c);
-}
-SIMD_INLINE v64 v64_shr_n_s32(v64 a, unsigned int c) {
- return c_v64_shr_n_s32(a, c);
-}
-
-#endif // AOM_AOM_DSP_SIMD_V64_INTRINSICS_H_
diff --git a/third_party/aom/aom_dsp/simd/v64_intrinsics_arm.h b/third_party/aom/aom_dsp/simd/v64_intrinsics_arm.h
deleted file mode 100644
index 8f39ad6e8..000000000
--- a/third_party/aom/aom_dsp/simd/v64_intrinsics_arm.h
+++ /dev/null
@@ -1,680 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_SIMD_V64_INTRINSICS_ARM_H_
-#define AOM_AOM_DSP_SIMD_V64_INTRINSICS_ARM_H_
-
-#include <arm_neon.h>
-
-#include "aom_dsp/simd/v64_intrinsics_arm.h"
-#include "aom_ports/arm.h"
-
-#ifdef AOM_INCOMPATIBLE_GCC
-#error Incompatible gcc
-#endif
-
-typedef int64x1_t v64;
-
-SIMD_INLINE uint32_t v64_low_u32(v64 a) {
- return vget_lane_u32(vreinterpret_u32_s64(a), 0);
-}
-
-SIMD_INLINE uint32_t v64_high_u32(v64 a) {
- return vget_lane_u32(vreinterpret_u32_s64(a), 1);
-}
-
-SIMD_INLINE int32_t v64_low_s32(v64 a) {
- return vget_lane_s32(vreinterpret_s32_s64(a), 0);
-}
-
-SIMD_INLINE int32_t v64_high_s32(v64 a) {
- return vget_lane_s32(vreinterpret_s32_s64(a), 1);
-}
-
-SIMD_INLINE v64 v64_from_16(uint16_t a, uint16_t b, uint16_t c, uint16_t d) {
- return vcreate_s64((uint64_t)a << 48 | (uint64_t)b << 32 | (uint64_t)c << 16 |
- d);
-}
-
-SIMD_INLINE v64 v64_from_32(uint32_t x, uint32_t y) {
- return vcreate_s64((uint64_t)x << 32 | y);
-}
-
-SIMD_INLINE v64 v64_from_64(uint64_t x) { return vcreate_s64(x); }
-
-SIMD_INLINE uint64_t v64_u64(v64 x) { return (uint64_t)x; }
-
-SIMD_INLINE uint32_t u32_load_aligned(const void *p) {
- return *((uint32_t *)p);
-}
-
-SIMD_INLINE uint32_t u32_load_unaligned(const void *p) {
- return vget_lane_u32(vreinterpret_u32_u8(vld1_u8((const uint8_t *)p)), 0);
-}
-
-SIMD_INLINE void u32_store_aligned(void *p, uint32_t a) {
- *((uint32_t *)p) = a;
-}
-
-SIMD_INLINE void u32_store_unaligned(void *p, uint32_t a) {
-#if defined(__clang__)
- vst1_lane_u32((uint32_t *)p, vreinterpret_u32_s64((uint64x1_t)(uint64_t)a),
- 0);
-#elif defined(__CC_ARM)
- *(__packed uint32_t *)p) = a;
-#elif defined(__GNUC__)
- *((__attribute((packed)) uint32_t *)p) = a;
-#else
- vst1_lane_u32((uint32_t *)p, vreinterpret_u32_s64((uint64x1_t)(uint64_t)a),
- 0);
-#endif
-}
-
-SIMD_INLINE v64 v64_load_aligned(const void *p) {
- return vreinterpret_s64_u8(vld1_u8((const uint8_t *)p));
-}
-
-SIMD_INLINE v64 v64_load_unaligned(const void *p) {
- return v64_load_aligned(p);
-}
-
-SIMD_INLINE void v64_store_aligned(void *p, v64 r) {
- vst1_u8((uint8_t *)p, vreinterpret_u8_s64(r));
-}
-
-SIMD_INLINE void v64_store_unaligned(void *p, v64 r) {
- vst1_u8((uint8_t *)p, vreinterpret_u8_s64(r));
-}
-
-// The following function requires an immediate.
-// Some compilers will check this if it's optimising, others wont.
-SIMD_INLINE v64 v64_align(v64 a, v64 b, unsigned int c) {
-#if defined(__OPTIMIZE__) && __OPTIMIZE__ && !defined(__clang__)
- return c ? vreinterpret_s64_s8(
- vext_s8(vreinterpret_s8_s64(b), vreinterpret_s8_s64(a), c))
- : b;
-#else
- return c ? v64_from_64(((uint64_t)b >> c * 8) | ((uint64_t)a << (8 - c) * 8))
- : b;
-#endif
-}
-
-SIMD_INLINE v64 v64_zero() { return vreinterpret_s64_u8(vdup_n_u8(0)); }
-
-SIMD_INLINE v64 v64_dup_8(uint8_t x) {
- return vreinterpret_s64_u8(vdup_n_u8(x));
-}
-
-SIMD_INLINE v64 v64_dup_16(uint16_t x) {
- return vreinterpret_s64_u16(vdup_n_u16(x));
-}
-
-SIMD_INLINE v64 v64_dup_32(uint32_t x) {
- return vreinterpret_s64_u32(vdup_n_u32(x));
-}
-
-SIMD_INLINE int64_t v64_dotp_su8(v64 x, v64 y) {
- int16x8_t t =
- vmulq_s16(vmovl_s8(vreinterpret_s8_s64(x)),
- vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_s64(y))));
-#if defined(__aarch64__)
- return vaddlvq_s16(t);
-#else
- int64x2_t r = vpaddlq_s32(vpaddlq_s16(t));
- return (int64_t)vadd_s64(vget_high_s64(r), vget_low_s64(r));
-#endif
-}
-
-SIMD_INLINE int64_t v64_dotp_s16(v64 x, v64 y) {
-#if defined(__aarch64__)
- return vaddlvq_s32(
- vmull_s16(vreinterpret_s16_s64(x), vreinterpret_s16_s64(y)));
-#else
- int64x2_t r =
- vpaddlq_s32(vmull_s16(vreinterpret_s16_s64(x), vreinterpret_s16_s64(y)));
- return (int64_t)(vget_high_s64(r) + vget_low_s64(r));
-#endif
-}
-
-SIMD_INLINE uint64_t v64_hadd_u8(v64 x) {
-#if defined(__aarch64__)
- return vaddlv_u8(vreinterpret_u8_s64(x));
-#else
- return (uint64_t)vpaddl_u32(vpaddl_u16(vpaddl_u8(vreinterpret_u8_s64(x))));
-#endif
-}
-
-SIMD_INLINE int64_t v64_hadd_s16(v64 a) {
- return (int64_t)vpaddl_s32(vpaddl_s16(vreinterpret_s16_s64(a)));
-}
-
-typedef uint16x8_t sad64_internal;
-
-SIMD_INLINE sad64_internal v64_sad_u8_init() { return vdupq_n_u16(0); }
-
-// Implementation dependent return value. Result must be finalised with
-// v64_sad_u8_sum().
-SIMD_INLINE sad64_internal v64_sad_u8(sad64_internal s, v64 a, v64 b) {
- return vabal_u8(s, vreinterpret_u8_s64(a), vreinterpret_u8_s64(b));
-}
-
-SIMD_INLINE uint32_t v64_sad_u8_sum(sad64_internal s) {
-#if defined(__aarch64__)
- return vaddlvq_u16(s);
-#else
- uint64x2_t r = vpaddlq_u32(vpaddlq_u16(s));
- return (uint32_t)(uint64_t)(vget_high_u64(r) + vget_low_u64(r));
-#endif
-}
-
-typedef uint32x4_t ssd64_internal;
-
-SIMD_INLINE ssd64_internal v64_ssd_u8_init() { return vdupq_n_u32(0); }
-
-// Implementation dependent return value. Result must be finalised with
-// v64_ssd_u8_sum().
-SIMD_INLINE ssd64_internal v64_ssd_u8(ssd64_internal s, v64 a, v64 b) {
- uint8x8_t t = vabd_u8(vreinterpret_u8_s64(a), vreinterpret_u8_s64(b));
- return vaddq_u32(s, vpaddlq_u16(vmull_u8(t, t)));
-}
-
-SIMD_INLINE uint32_t v64_ssd_u8_sum(ssd64_internal s) {
-#if defined(__aarch64__)
- return vaddvq_u32(s);
-#else
- uint64x2_t t = vpaddlq_u32(s);
- return vget_lane_u32(
- vreinterpret_u32_u64(vadd_u64(vget_high_u64(t), vget_low_u64(t))), 0);
-#endif
-}
-
-SIMD_INLINE v64 v64_or(v64 x, v64 y) { return vorr_s64(x, y); }
-
-SIMD_INLINE v64 v64_xor(v64 x, v64 y) { return veor_s64(x, y); }
-
-SIMD_INLINE v64 v64_and(v64 x, v64 y) { return vand_s64(x, y); }
-
-SIMD_INLINE v64 v64_andn(v64 x, v64 y) { return vbic_s64(x, y); }
-
-SIMD_INLINE v64 v64_add_8(v64 x, v64 y) {
- return vreinterpret_s64_u8(
- vadd_u8(vreinterpret_u8_s64(x), vreinterpret_u8_s64(y)));
-}
-
-SIMD_INLINE v64 v64_sadd_u8(v64 x, v64 y) {
- return vreinterpret_s64_u8(
- vqadd_u8(vreinterpret_u8_s64(x), vreinterpret_u8_s64(y)));
-}
-
-SIMD_INLINE v64 v64_sadd_s8(v64 x, v64 y) {
- return vreinterpret_s64_s8(
- vqadd_s8(vreinterpret_s8_s64(x), vreinterpret_s8_s64(y)));
-}
-
-SIMD_INLINE v64 v64_add_16(v64 x, v64 y) {
- return vreinterpret_s64_s16(
- vadd_s16(vreinterpret_s16_s64(x), vreinterpret_s16_s64(y)));
-}
-
-SIMD_INLINE v64 v64_sadd_s16(v64 x, v64 y) {
- return vreinterpret_s64_s16(
- vqadd_s16(vreinterpret_s16_s64(x), vreinterpret_s16_s64(y)));
-}
-
-SIMD_INLINE v64 v64_add_32(v64 x, v64 y) {
- return vreinterpret_s64_u32(
- vadd_u32(vreinterpret_u32_s64(x), vreinterpret_u32_s64(y)));
-}
-
-SIMD_INLINE v64 v64_sub_8(v64 x, v64 y) {
- return vreinterpret_s64_u8(
- vsub_u8(vreinterpret_u8_s64(x), vreinterpret_u8_s64(y)));
-}
-
-SIMD_INLINE v64 v64_sub_16(v64 x, v64 y) {
- return vreinterpret_s64_s16(
- vsub_s16(vreinterpret_s16_s64(x), vreinterpret_s16_s64(y)));
-}
-
-SIMD_INLINE v64 v64_ssub_s16(v64 x, v64 y) {
- return vreinterpret_s64_s16(
- vqsub_s16(vreinterpret_s16_s64(x), vreinterpret_s16_s64(y)));
-}
-
-SIMD_INLINE v64 v64_ssub_u16(v64 x, v64 y) {
- return vreinterpret_s64_u16(
- vqsub_u16(vreinterpret_u16_s64(x), vreinterpret_u16_s64(y)));
-}
-
-SIMD_INLINE v64 v64_ssub_u8(v64 x, v64 y) {
- return vreinterpret_s64_u8(
- vqsub_u8(vreinterpret_u8_s64(x), vreinterpret_u8_s64(y)));
-}
-
-SIMD_INLINE v64 v64_ssub_s8(v64 x, v64 y) {
- return vreinterpret_s64_s8(
- vqsub_s8(vreinterpret_s8_s64(x), vreinterpret_s8_s64(y)));
-}
-
-SIMD_INLINE v64 v64_sub_32(v64 x, v64 y) {
- return vreinterpret_s64_s32(
- vsub_s32(vreinterpret_s32_s64(x), vreinterpret_s32_s64(y)));
-}
-
-SIMD_INLINE v64 v64_abs_s16(v64 x) {
- return vreinterpret_s64_s16(vabs_s16(vreinterpret_s16_s64(x)));
-}
-
-SIMD_INLINE v64 v64_abs_s8(v64 x) {
- return vreinterpret_s64_s8(vabs_s8(vreinterpret_s8_s64(x)));
-}
-
-SIMD_INLINE v64 v64_mullo_s16(v64 x, v64 y) {
- return vreinterpret_s64_s16(
- vmul_s16(vreinterpret_s16_s64(x), vreinterpret_s16_s64(y)));
-}
-
-SIMD_INLINE v64 v64_mulhi_s16(v64 x, v64 y) {
-#if defined(__aarch64__)
- int16x8_t t = vreinterpretq_s16_s32(
- vmull_s16(vreinterpret_s16_s64(x), vreinterpret_s16_s64(y)));
- return vget_low_s64(vreinterpretq_s64_s16(vuzp2q_s16(t, t)));
-#else
- return vreinterpret_s64_s16(vmovn_s32(vshrq_n_s32(
- vmull_s16(vreinterpret_s16_s64(x), vreinterpret_s16_s64(y)), 16)));
-#endif
-}
-
-SIMD_INLINE v64 v64_mullo_s32(v64 x, v64 y) {
- return vreinterpret_s64_s32(
- vmul_s32(vreinterpret_s32_s64(x), vreinterpret_s32_s64(y)));
-}
-
-SIMD_INLINE v64 v64_madd_s16(v64 x, v64 y) {
- int32x4_t t = vmull_s16(vreinterpret_s16_s64(x), vreinterpret_s16_s64(y));
- return vreinterpret_s64_s32(
- vpadd_s32(vreinterpret_s32_s64(vget_low_s64(vreinterpretq_s64_s32(t))),
- vreinterpret_s32_s64(vget_high_s64(vreinterpretq_s64_s32(t)))));
-}
-
-SIMD_INLINE v64 v64_madd_us8(v64 x, v64 y) {
- int16x8_t t =
- vmulq_s16(vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_s64(x))),
- vmovl_s8(vreinterpret_s8_s64(y)));
- return vreinterpret_s64_s16(vqmovn_s32(vpaddlq_s16(t)));
-}
-
-SIMD_INLINE v64 v64_avg_u8(v64 x, v64 y) {
- return vreinterpret_s64_u8(
- vrhadd_u8(vreinterpret_u8_s64(x), vreinterpret_u8_s64(y)));
-}
-
-SIMD_INLINE v64 v64_rdavg_u8(v64 x, v64 y) {
- return vreinterpret_s64_u8(
- vhadd_u8(vreinterpret_u8_s64(x), vreinterpret_u8_s64(y)));
-}
-
-SIMD_INLINE v64 v64_rdavg_u16(v64 x, v64 y) {
- return vreinterpret_s64_u16(
- vhadd_u16(vreinterpret_u16_s64(x), vreinterpret_u16_s64(y)));
-}
-
-SIMD_INLINE v64 v64_avg_u16(v64 x, v64 y) {
- return vreinterpret_s64_u16(
- vrhadd_u16(vreinterpret_u16_s64(x), vreinterpret_u16_s64(y)));
-}
-
-SIMD_INLINE v64 v64_max_u8(v64 x, v64 y) {
- return vreinterpret_s64_u8(
- vmax_u8(vreinterpret_u8_s64(x), vreinterpret_u8_s64(y)));
-}
-
-SIMD_INLINE v64 v64_min_u8(v64 x, v64 y) {
- return vreinterpret_s64_u8(
- vmin_u8(vreinterpret_u8_s64(x), vreinterpret_u8_s64(y)));
-}
-
-SIMD_INLINE v64 v64_max_s8(v64 x, v64 y) {
- return vreinterpret_s64_s8(
- vmax_s8(vreinterpret_s8_s64(x), vreinterpret_s8_s64(y)));
-}
-
-SIMD_INLINE v64 v64_min_s8(v64 x, v64 y) {
- return vreinterpret_s64_s8(
- vmin_s8(vreinterpret_s8_s64(x), vreinterpret_s8_s64(y)));
-}
-
-SIMD_INLINE v64 v64_max_s16(v64 x, v64 y) {
- return vreinterpret_s64_s16(
- vmax_s16(vreinterpret_s16_s64(x), vreinterpret_s16_s64(y)));
-}
-
-SIMD_INLINE v64 v64_min_s16(v64 x, v64 y) {
- return vreinterpret_s64_s16(
- vmin_s16(vreinterpret_s16_s64(x), vreinterpret_s16_s64(y)));
-}
-
-SIMD_INLINE v64 v64_ziplo_8(v64 x, v64 y) {
-#if defined(__aarch64__)
- return vreinterpret_s64_u8(
- vzip1_u8(vreinterpret_u8_s64(y), vreinterpret_u8_s64(x)));
-#else
- uint8x8x2_t r = vzip_u8(vreinterpret_u8_s64(y), vreinterpret_u8_s64(x));
- return vreinterpret_s64_u8(r.val[0]);
-#endif
-}
-
-SIMD_INLINE v64 v64_ziphi_8(v64 x, v64 y) {
-#if defined(__aarch64__)
- return vreinterpret_s64_u8(
- vzip2_u8(vreinterpret_u8_s64(y), vreinterpret_u8_s64(x)));
-#else
- uint8x8x2_t r = vzip_u8(vreinterpret_u8_s64(y), vreinterpret_u8_s64(x));
- return vreinterpret_s64_u8(r.val[1]);
-#endif
-}
-
-SIMD_INLINE v64 v64_ziplo_16(v64 x, v64 y) {
-#if defined(__aarch64__)
- return vreinterpret_s64_u16(
- vzip1_u16(vreinterpret_u16_s64(y), vreinterpret_u16_s64(x)));
-#else
- int16x4x2_t r = vzip_s16(vreinterpret_s16_s64(y), vreinterpret_s16_s64(x));
- return vreinterpret_s64_s16(r.val[0]);
-#endif
-}
-
-SIMD_INLINE v64 v64_ziphi_16(v64 x, v64 y) {
-#if defined(__aarch64__)
- return vreinterpret_s64_u16(
- vzip2_u16(vreinterpret_u16_s64(y), vreinterpret_u16_s64(x)));
-#else
- int16x4x2_t r = vzip_s16(vreinterpret_s16_s64(y), vreinterpret_s16_s64(x));
- return vreinterpret_s64_s16(r.val[1]);
-#endif
-}
-
-SIMD_INLINE v64 v64_ziplo_32(v64 x, v64 y) {
-#if defined(__aarch64__)
- return vreinterpret_s64_u32(
- vzip1_u32(vreinterpret_u32_s64(y), vreinterpret_u32_s64(x)));
-#else
- int32x2x2_t r = vzip_s32(vreinterpret_s32_s64(y), vreinterpret_s32_s64(x));
- return vreinterpret_s64_s32(r.val[0]);
-#endif
-}
-
-SIMD_INLINE v64 v64_ziphi_32(v64 x, v64 y) {
-#if defined(__aarch64__)
- return vreinterpret_s64_u32(
- vzip2_u32(vreinterpret_u32_s64(y), vreinterpret_u32_s64(x)));
-#else
- int32x2x2_t r = vzip_s32(vreinterpret_s32_s64(y), vreinterpret_s32_s64(x));
- return vreinterpret_s64_s32(r.val[1]);
-#endif
-}
-
-SIMD_INLINE v64 v64_unpacklo_u8_s16(v64 a) {
- return vreinterpret_s64_u16(vget_low_u16(vmovl_u8(vreinterpret_u8_s64(a))));
-}
-
-SIMD_INLINE v64 v64_unpackhi_u8_s16(v64 a) {
- return vreinterpret_s64_u16(vget_high_u16(vmovl_u8(vreinterpret_u8_s64(a))));
-}
-
-SIMD_INLINE v64 v64_unpacklo_s8_s16(v64 a) {
- return vreinterpret_s64_s16(vget_low_s16(vmovl_s8(vreinterpret_s8_s64(a))));
-}
-
-SIMD_INLINE v64 v64_unpackhi_s8_s16(v64 a) {
- return vreinterpret_s64_s16(vget_high_s16(vmovl_s8(vreinterpret_s8_s64(a))));
-}
-
-SIMD_INLINE v64 v64_pack_s32_s16(v64 x, v64 y) {
- return vreinterpret_s64_s16(vqmovn_s32(
- vcombine_s32(vreinterpret_s32_s64(y), vreinterpret_s32_s64(x))));
-}
-
-SIMD_INLINE v64 v64_pack_s32_u16(v64 x, v64 y) {
- return vreinterpret_s64_u16(vqmovun_s32(
- vcombine_s32(vreinterpret_s32_s64(y), vreinterpret_s32_s64(x))));
-}
-
-SIMD_INLINE v64 v64_pack_s16_u8(v64 x, v64 y) {
- return vreinterpret_s64_u8(vqmovun_s16(vreinterpretq_s16_s32(
- vcombine_s32(vreinterpret_s32_s64(y), vreinterpret_s32_s64(x)))));
-}
-
-SIMD_INLINE v64 v64_pack_s16_s8(v64 x, v64 y) {
- return vreinterpret_s64_s8(vqmovn_s16(vreinterpretq_s16_s32(
- vcombine_s32(vreinterpret_s32_s64(y), vreinterpret_s32_s64(x)))));
-}
-
-SIMD_INLINE v64 v64_unziplo_8(v64 x, v64 y) {
-#if defined(__aarch64__)
- return vreinterpret_s64_u8(
- vuzp1_u8(vreinterpret_u8_s64(y), vreinterpret_u8_s64(x)));
-#else
- uint8x8x2_t r = vuzp_u8(vreinterpret_u8_s64(y), vreinterpret_u8_s64(x));
- return vreinterpret_s64_u8(r.val[0]);
-#endif
-}
-
-SIMD_INLINE v64 v64_unziphi_8(v64 x, v64 y) {
-#if defined(__aarch64__)
- return vreinterpret_s64_u8(
- vuzp2_u8(vreinterpret_u8_s64(y), vreinterpret_u8_s64(x)));
-#else
- uint8x8x2_t r = vuzp_u8(vreinterpret_u8_s64(y), vreinterpret_u8_s64(x));
- return vreinterpret_s64_u8(r.val[1]);
-#endif
-}
-
-SIMD_INLINE v64 v64_unziplo_16(v64 x, v64 y) {
-#if defined(__aarch64__)
- return vreinterpret_s64_u16(
- vuzp1_u16(vreinterpret_u16_s64(y), vreinterpret_u16_s64(x)));
-#else
- uint16x4x2_t r = vuzp_u16(vreinterpret_u16_s64(y), vreinterpret_u16_s64(x));
- return vreinterpret_s64_u16(r.val[0]);
-#endif
-}
-
-SIMD_INLINE v64 v64_unziphi_16(v64 x, v64 y) {
-#if defined(__aarch64__)
- return vreinterpret_s64_u16(
- vuzp2_u16(vreinterpret_u16_s64(y), vreinterpret_u16_s64(x)));
-#else
- uint16x4x2_t r = vuzp_u16(vreinterpret_u16_s64(y), vreinterpret_u16_s64(x));
- return vreinterpret_s64_u16(r.val[1]);
-#endif
-}
-
-SIMD_INLINE v64 v64_unpacklo_s16_s32(v64 x) {
- return vreinterpret_s64_s32(vget_low_s32(vmovl_s16(vreinterpret_s16_s64(x))));
-}
-
-SIMD_INLINE v64 v64_unpacklo_u16_s32(v64 x) {
- return vreinterpret_s64_u32(vget_low_u32(vmovl_u16(vreinterpret_u16_s64(x))));
-}
-
-SIMD_INLINE v64 v64_unpackhi_s16_s32(v64 x) {
- return vreinterpret_s64_s32(
- vget_high_s32(vmovl_s16(vreinterpret_s16_s64(x))));
-}
-
-SIMD_INLINE v64 v64_unpackhi_u16_s32(v64 x) {
- return vreinterpret_s64_u32(
- vget_high_u32(vmovl_u16(vreinterpret_u16_s64(x))));
-}
-
-SIMD_INLINE v64 v64_shuffle_8(v64 x, v64 pattern) {
- return vreinterpret_s64_u8(
- vtbl1_u8(vreinterpret_u8_s64(x), vreinterpret_u8_s64(pattern)));
-}
-
-SIMD_INLINE v64 v64_cmpgt_s8(v64 x, v64 y) {
- return vreinterpret_s64_u8(
- vcgt_s8(vreinterpret_s8_s64(x), vreinterpret_s8_s64(y)));
-}
-
-SIMD_INLINE v64 v64_cmplt_s8(v64 x, v64 y) {
- return vreinterpret_s64_u8(
- vclt_s8(vreinterpret_s8_s64(x), vreinterpret_s8_s64(y)));
-}
-
-SIMD_INLINE v64 v64_cmpeq_8(v64 x, v64 y) {
- return vreinterpret_s64_u8(
- vceq_u8(vreinterpret_u8_s64(x), vreinterpret_u8_s64(y)));
-}
-
-SIMD_INLINE v64 v64_cmpgt_s16(v64 x, v64 y) {
- return vreinterpret_s64_u16(
- vcgt_s16(vreinterpret_s16_s64(x), vreinterpret_s16_s64(y)));
-}
-
-SIMD_INLINE v64 v64_cmplt_s16(v64 x, v64 y) {
- return vreinterpret_s64_u16(
- vclt_s16(vreinterpret_s16_s64(x), vreinterpret_s16_s64(y)));
-}
-
-SIMD_INLINE v64 v64_cmpeq_16(v64 x, v64 y) {
- return vreinterpret_s64_u16(
- vceq_s16(vreinterpret_s16_s64(x), vreinterpret_s16_s64(y)));
-}
-
-SIMD_INLINE v64 v64_shl_8(v64 a, unsigned int c) {
- return vreinterpret_s64_u8(vshl_u8(vreinterpret_u8_s64(a), vdup_n_s8(c)));
-}
-
-SIMD_INLINE v64 v64_shr_u8(v64 a, unsigned int c) {
- return vreinterpret_s64_u8(vshl_u8(vreinterpret_u8_s64(a), vdup_n_s8(-c)));
-}
-
-SIMD_INLINE v64 v64_shr_s8(v64 a, unsigned int c) {
- return vreinterpret_s64_s8(vshl_s8(vreinterpret_s8_s64(a), vdup_n_s8(-c)));
-}
-
-SIMD_INLINE v64 v64_shl_16(v64 a, unsigned int c) {
- return vreinterpret_s64_u16(vshl_u16(vreinterpret_u16_s64(a), vdup_n_s16(c)));
-}
-
-SIMD_INLINE v64 v64_shr_u16(v64 a, unsigned int c) {
- return vreinterpret_s64_u16(
- vshl_u16(vreinterpret_u16_s64(a), vdup_n_s16(-(int)c)));
-}
-
-SIMD_INLINE v64 v64_shr_s16(v64 a, unsigned int c) {
- return vreinterpret_s64_s16(
- vshl_s16(vreinterpret_s16_s64(a), vdup_n_s16(-(int)c)));
-}
-
-SIMD_INLINE v64 v64_shl_32(v64 a, unsigned int c) {
- return vreinterpret_s64_u32(vshl_u32(vreinterpret_u32_s64(a), vdup_n_s32(c)));
-}
-
-SIMD_INLINE v64 v64_shr_u32(v64 a, unsigned int c) {
- return vreinterpret_s64_u32(
- vshl_u32(vreinterpret_u32_s64(a), vdup_n_s32(-(int)c)));
-}
-
-SIMD_INLINE v64 v64_shr_s32(v64 a, unsigned int c) {
- return vreinterpret_s64_s32(
- vshl_s32(vreinterpret_s32_s64(a), vdup_n_s32(-(int)c)));
-}
-
-// The following functions require an immediate.
-// Some compilers will check this during optimisation, others wont.
-#if defined(__OPTIMIZE__) && __OPTIMIZE__ && !defined(__clang__)
-
-SIMD_INLINE v64 v64_shl_n_byte(v64 a, unsigned int c) {
- return vshl_n_s64(a, c * 8);
-}
-
-SIMD_INLINE v64 v64_shr_n_byte(v64 a, unsigned int c) {
- return c ? (v64)vshr_n_u64(vreinterpret_u64_s64(a), c * 8) : a;
-}
-
-SIMD_INLINE v64 v64_shl_n_8(v64 a, unsigned int c) {
- return vreinterpret_s64_u8(vshl_n_u8(vreinterpret_u8_s64(a), c));
-}
-
-SIMD_INLINE v64 v64_shr_n_u8(v64 a, unsigned int c) {
- return vreinterpret_s64_u8(vshr_n_u8(vreinterpret_u8_s64(a), c));
-}
-
-SIMD_INLINE v64 v64_shr_n_s8(v64 a, unsigned int c) {
- return vreinterpret_s64_s8(vshr_n_s8(vreinterpret_s8_s64(a), c));
-}
-
-SIMD_INLINE v64 v64_shl_n_16(v64 a, unsigned int c) {
- return vreinterpret_s64_u16(vshl_n_u16(vreinterpret_u16_s64(a), c));
-}
-
-SIMD_INLINE v64 v64_shr_n_u16(v64 a, unsigned int c) {
- return vreinterpret_s64_u16(vshr_n_u16(vreinterpret_u16_s64(a), c));
-}
-
-SIMD_INLINE v64 v64_shr_n_s16(v64 a, unsigned int c) {
- return vreinterpret_s64_s16(vshr_n_s16(vreinterpret_s16_s64(a), c));
-}
-
-SIMD_INLINE v64 v64_shl_n_32(v64 a, unsigned int c) {
- return vreinterpret_s64_u32(vshl_n_u32(vreinterpret_u32_s64(a), c));
-}
-
-SIMD_INLINE v64 v64_shr_n_u32(v64 a, unsigned int c) {
- return vreinterpret_s64_u32(vshr_n_u32(vreinterpret_u32_s64(a), c));
-}
-
-SIMD_INLINE v64 v64_shr_n_s32(v64 a, unsigned int c) {
- return vreinterpret_s64_s32(vshr_n_s32(vreinterpret_s32_s64(a), c));
-}
-
-#else
-
-SIMD_INLINE v64 v64_shl_n_byte(v64 a, unsigned int c) {
- return v64_from_64(v64_u64(a) << c * 8);
-}
-
-SIMD_INLINE v64 v64_shr_n_byte(v64 a, unsigned int c) {
- return v64_from_64(v64_u64(a) >> c * 8);
-}
-
-SIMD_INLINE v64 v64_shl_n_8(v64 a, unsigned int c) { return v64_shl_8(a, c); }
-
-SIMD_INLINE v64 v64_shr_n_u8(v64 a, unsigned int c) { return v64_shr_u8(a, c); }
-
-SIMD_INLINE v64 v64_shr_n_s8(v64 a, unsigned int c) { return v64_shr_s8(a, c); }
-
-SIMD_INLINE v64 v64_shl_n_16(v64 a, unsigned int c) { return v64_shl_16(a, c); }
-
-SIMD_INLINE v64 v64_shr_n_u16(v64 a, unsigned int c) {
- return v64_shr_u16(a, c);
-}
-
-SIMD_INLINE v64 v64_shr_n_s16(v64 a, unsigned int c) {
- return v64_shr_s16(a, c);
-}
-
-SIMD_INLINE v64 v64_shl_n_32(v64 a, unsigned int c) { return v64_shl_32(a, c); }
-
-SIMD_INLINE v64 v64_shr_n_u32(v64 a, unsigned int c) {
- return v64_shr_u32(a, c);
-}
-
-SIMD_INLINE v64 v64_shr_n_s32(v64 a, unsigned int c) {
- return v64_shr_s32(a, c);
-}
-
-#endif
-
-#endif // AOM_AOM_DSP_SIMD_V64_INTRINSICS_ARM_H_
diff --git a/third_party/aom/aom_dsp/simd/v64_intrinsics_c.h b/third_party/aom/aom_dsp/simd/v64_intrinsics_c.h
deleted file mode 100644
index 028d68c4f..000000000
--- a/third_party/aom/aom_dsp/simd/v64_intrinsics_c.h
+++ /dev/null
@@ -1,968 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_SIMD_V64_INTRINSICS_C_H_
-#define AOM_AOM_DSP_SIMD_V64_INTRINSICS_C_H_
-
-/* Note: This implements the intrinsics in plain, unoptimised C.
- Intended for reference, porting or debugging. */
-
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "config/aom_config.h"
-
-typedef union {
- uint8_t u8[8];
- uint16_t u16[4];
- uint32_t u32[2];
- uint64_t u64;
- int8_t s8[8];
- int16_t s16[4];
- int32_t s32[2];
- int64_t s64;
-} c_v64;
-
-SIMD_INLINE uint32_t c_v64_low_u32(c_v64 a) {
- return a.u32[!!CONFIG_BIG_ENDIAN];
-}
-
-SIMD_INLINE uint32_t c_v64_high_u32(c_v64 a) {
- return a.u32[!CONFIG_BIG_ENDIAN];
-}
-
-SIMD_INLINE int32_t c_v64_low_s32(c_v64 a) {
- return a.s32[!!CONFIG_BIG_ENDIAN];
-}
-
-SIMD_INLINE int32_t c_v64_high_s32(c_v64 a) {
- return a.s32[!CONFIG_BIG_ENDIAN];
-}
-
-SIMD_INLINE c_v64 c_v64_from_32(uint32_t x, uint32_t y) {
- c_v64 t;
- t.u32[!CONFIG_BIG_ENDIAN] = x;
- t.u32[!!CONFIG_BIG_ENDIAN] = y;
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_from_64(uint64_t x) {
- c_v64 t;
- t.u64 = x;
- return t;
-}
-
-SIMD_INLINE uint64_t c_v64_u64(c_v64 x) { return x.u64; }
-
-SIMD_INLINE c_v64 c_v64_from_16(uint16_t a, uint16_t b, uint16_t c,
- uint16_t d) {
- c_v64 t;
- if (CONFIG_BIG_ENDIAN) {
- t.u16[0] = a;
- t.u16[1] = b;
- t.u16[2] = c;
- t.u16[3] = d;
- } else {
- t.u16[3] = a;
- t.u16[2] = b;
- t.u16[1] = c;
- t.u16[0] = d;
- }
- return t;
-}
-
-SIMD_INLINE uint32_t c_u32_load_unaligned(const void *p) {
- uint32_t t;
- uint8_t *pp = (uint8_t *)p;
- uint8_t *q = (uint8_t *)&t;
- int c;
- for (c = 0; c < 4; c++) q[c] = pp[c];
- return t;
-}
-
-SIMD_INLINE void c_u32_store_unaligned(void *p, uint32_t a) {
- uint8_t *pp = (uint8_t *)p;
- uint8_t *q = (uint8_t *)&a;
- int c;
- for (c = 0; c < 4; c++) pp[c] = q[c];
-}
-
-SIMD_INLINE uint32_t c_u32_load_aligned(const void *p) {
- if (SIMD_CHECK && (uintptr_t)p & 3) {
- fprintf(stderr, "Error: Unaligned u32 load at %p\n", p);
- abort();
- }
- return c_u32_load_unaligned(p);
-}
-
-SIMD_INLINE void c_u32_store_aligned(void *p, uint32_t a) {
- if (SIMD_CHECK && (uintptr_t)p & 3) {
- fprintf(stderr, "Error: Unaligned u32 store at %p\n", p);
- abort();
- }
- c_u32_store_unaligned(p, a);
-}
-
-SIMD_INLINE c_v64 c_v64_load_unaligned(const void *p) {
- c_v64 t;
- uint8_t *pp = (uint8_t *)p;
- uint8_t *q = (uint8_t *)&t;
- int c;
- for (c = 0; c < 8; c++) q[c] = pp[c];
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_load_aligned(const void *p) {
- if (SIMD_CHECK && (uintptr_t)p & 7) {
- fprintf(stderr, "Error: Unaligned c_v64 load at %p\n", p);
- abort();
- }
- return c_v64_load_unaligned(p);
-}
-
-SIMD_INLINE void c_v64_store_unaligned(void *p, c_v64 a) {
- uint8_t *q = (uint8_t *)p;
- uint8_t *r = (uint8_t *)&a;
- int c;
- for (c = 0; c < 8; c++) q[c] = r[c];
-}
-
-SIMD_INLINE void c_v64_store_aligned(void *p, c_v64 a) {
- if (SIMD_CHECK && (uintptr_t)p & 7) {
- fprintf(stderr, "Error: Unaligned c_v64 store at %p\n", p);
- abort();
- }
- c_v64_store_unaligned(p, a);
-}
-
-SIMD_INLINE c_v64 c_v64_zero() {
- c_v64 t;
- t.u64 = 0;
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_dup_8(uint8_t x) {
- c_v64 t;
- t.u8[0] = t.u8[1] = t.u8[2] = t.u8[3] = t.u8[4] = t.u8[5] = t.u8[6] =
- t.u8[7] = x;
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_dup_16(uint16_t x) {
- c_v64 t;
- t.u16[0] = t.u16[1] = t.u16[2] = t.u16[3] = x;
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_dup_32(uint32_t x) {
- c_v64 t;
- t.u32[0] = t.u32[1] = x;
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_add_8(c_v64 a, c_v64 b) {
- c_v64 t;
- int c;
- for (c = 0; c < 8; c++) t.u8[c] = a.u8[c] + b.u8[c];
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_add_16(c_v64 a, c_v64 b) {
- c_v64 t;
- int c;
- for (c = 0; c < 4; c++) t.u16[c] = a.u16[c] + b.u16[c];
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_sadd_u8(c_v64 a, c_v64 b) {
- c_v64 t;
- int c;
- for (c = 0; c < 8; c++)
- t.u8[c] = (int16_t)a.u8[c] + (int16_t)b.u8[c] > 255
- ? 255
- : (int16_t)a.u8[c] + (int16_t)b.u8[c] < 0
- ? 0
- : (int16_t)a.u8[c] + (int16_t)b.u8[c];
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_sadd_s8(c_v64 a, c_v64 b) {
- c_v64 t;
- int c;
- for (c = 0; c < 8; c++)
- t.s8[c] = (int16_t)a.s8[c] + (int16_t)b.s8[c] > 127
- ? 127
- : (int16_t)a.s8[c] + (int16_t)b.s8[c] < -128
- ? -128
- : (int16_t)a.s8[c] + (int16_t)b.s8[c];
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_sadd_s16(c_v64 a, c_v64 b) {
- c_v64 t;
- int c;
- for (c = 0; c < 4; c++)
- t.s16[c] = (int32_t)a.s16[c] + (int32_t)b.s16[c] > 32767
- ? 32767
- : (int32_t)a.s16[c] + (int32_t)b.s16[c] < -32768
- ? -32768
- : (int32_t)a.s16[c] + (int32_t)b.s16[c];
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_add_32(c_v64 a, c_v64 b) {
- c_v64 t;
- t.u32[0] = (uint32_t)((uint64_t)a.u32[0] + b.u32[0]);
- t.u32[1] = (uint32_t)((uint64_t)a.u32[1] + b.u32[1]);
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_sub_8(c_v64 a, c_v64 b) {
- c_v64 t;
- int c;
- for (c = 0; c < 8; c++) t.u8[c] = a.u8[c] - b.u8[c];
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_ssub_u8(c_v64 a, c_v64 b) {
- c_v64 t;
- int c;
- for (c = 0; c < 8; c++) t.u8[c] = a.u8[c] < b.u8[c] ? 0 : a.u8[c] - b.u8[c];
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_ssub_s8(c_v64 a, c_v64 b) {
- c_v64 t;
- int c;
- for (c = 0; c < 8; c++) {
- int16_t d = (int16_t)a.s8[c] - (int16_t)b.s8[c];
- t.s8[c] = d > 127 ? 127 : (d < -128 ? -128 : d);
- }
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_sub_16(c_v64 a, c_v64 b) {
- c_v64 t;
- int c;
- for (c = 0; c < 4; c++) t.u16[c] = a.u16[c] - b.u16[c];
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_ssub_s16(c_v64 a, c_v64 b) {
- c_v64 t;
- int c;
- for (c = 0; c < 4; c++)
- t.s16[c] = (int32_t)a.s16[c] - (int32_t)b.s16[c] < -32768
- ? -32768
- : (int32_t)a.s16[c] - (int32_t)b.s16[c] > 32767
- ? 32767
- : (int32_t)a.s16[c] - (int32_t)b.s16[c];
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_ssub_u16(c_v64 a, c_v64 b) {
- c_v64 t;
- int c;
- for (c = 0; c < 4; c++)
- t.u16[c] =
- (int32_t)a.u16[c] - (int32_t)b.u16[c] < 0 ? 0 : a.u16[c] - b.u16[c];
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_sub_32(c_v64 a, c_v64 b) {
- c_v64 t;
- t.u32[0] = (uint32_t)((int64_t)a.u32[0] - b.u32[0]);
- t.u32[1] = (uint32_t)((int64_t)a.u32[1] - b.u32[1]);
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_abs_s16(c_v64 a) {
- c_v64 t;
- int c;
- for (c = 0; c < 4; c++)
- t.u16[c] = (int16_t)a.u16[c] > 0 ? a.u16[c] : -a.u16[c];
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_abs_s8(c_v64 a) {
- c_v64 t;
- int c;
- for (c = 0; c < 8; c++) t.u8[c] = (int8_t)a.u8[c] > 0 ? a.u8[c] : -a.u8[c];
- return t;
-}
-
-SIMD_INLINE c_v64 _c_v64_zip_8(c_v64 a, c_v64 b, int mode) {
- c_v64 t;
- if (mode) {
- t.u8[7] = a.u8[7];
- t.u8[6] = b.u8[7];
- t.u8[5] = a.u8[6];
- t.u8[4] = b.u8[6];
- t.u8[3] = a.u8[5];
- t.u8[2] = b.u8[5];
- t.u8[1] = a.u8[4];
- t.u8[0] = b.u8[4];
- } else {
- t.u8[7] = a.u8[3];
- t.u8[6] = b.u8[3];
- t.u8[5] = a.u8[2];
- t.u8[4] = b.u8[2];
- t.u8[3] = a.u8[1];
- t.u8[2] = b.u8[1];
- t.u8[1] = a.u8[0];
- t.u8[0] = b.u8[0];
- }
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_ziplo_8(c_v64 a, c_v64 b) {
- return CONFIG_BIG_ENDIAN ? _c_v64_zip_8(b, a, 1) : _c_v64_zip_8(a, b, 0);
-}
-
-SIMD_INLINE c_v64 c_v64_ziphi_8(c_v64 a, c_v64 b) {
- return CONFIG_BIG_ENDIAN ? _c_v64_zip_8(b, a, 0) : _c_v64_zip_8(a, b, 1);
-}
-
-SIMD_INLINE c_v64 _c_v64_zip_16(c_v64 a, c_v64 b, int mode) {
- c_v64 t;
- if (mode) {
- t.u16[3] = a.u16[3];
- t.u16[2] = b.u16[3];
- t.u16[1] = a.u16[2];
- t.u16[0] = b.u16[2];
- } else {
- t.u16[3] = a.u16[1];
- t.u16[2] = b.u16[1];
- t.u16[1] = a.u16[0];
- t.u16[0] = b.u16[0];
- }
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_ziplo_16(c_v64 a, c_v64 b) {
- return CONFIG_BIG_ENDIAN ? _c_v64_zip_16(b, a, 1) : _c_v64_zip_16(a, b, 0);
-}
-
-SIMD_INLINE c_v64 c_v64_ziphi_16(c_v64 a, c_v64 b) {
- return CONFIG_BIG_ENDIAN ? _c_v64_zip_16(b, a, 0) : _c_v64_zip_16(a, b, 1);
-}
-
-SIMD_INLINE c_v64 _c_v64_zip_32(c_v64 a, c_v64 b, int mode) {
- c_v64 t;
- if (mode) {
- t.u32[1] = a.u32[1];
- t.u32[0] = b.u32[1];
- } else {
- t.u32[1] = a.u32[0];
- t.u32[0] = b.u32[0];
- }
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_ziplo_32(c_v64 a, c_v64 b) {
- return CONFIG_BIG_ENDIAN ? _c_v64_zip_32(b, a, 1) : _c_v64_zip_32(a, b, 0);
-}
-
-SIMD_INLINE c_v64 c_v64_ziphi_32(c_v64 a, c_v64 b) {
- return CONFIG_BIG_ENDIAN ? _c_v64_zip_32(b, a, 0) : _c_v64_zip_32(a, b, 1);
-}
-
-SIMD_INLINE c_v64 _c_v64_unzip_8(c_v64 a, c_v64 b, int mode) {
- c_v64 t;
- if (mode) {
- t.u8[7] = b.u8[7];
- t.u8[6] = b.u8[5];
- t.u8[5] = b.u8[3];
- t.u8[4] = b.u8[1];
- t.u8[3] = a.u8[7];
- t.u8[2] = a.u8[5];
- t.u8[1] = a.u8[3];
- t.u8[0] = a.u8[1];
- } else {
- t.u8[7] = a.u8[6];
- t.u8[6] = a.u8[4];
- t.u8[5] = a.u8[2];
- t.u8[4] = a.u8[0];
- t.u8[3] = b.u8[6];
- t.u8[2] = b.u8[4];
- t.u8[1] = b.u8[2];
- t.u8[0] = b.u8[0];
- }
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_unziplo_8(c_v64 a, c_v64 b) {
- return CONFIG_BIG_ENDIAN ? _c_v64_unzip_8(a, b, 1) : _c_v64_unzip_8(a, b, 0);
-}
-
-SIMD_INLINE c_v64 c_v64_unziphi_8(c_v64 a, c_v64 b) {
- return CONFIG_BIG_ENDIAN ? _c_v64_unzip_8(b, a, 0) : _c_v64_unzip_8(b, a, 1);
-}
-
-SIMD_INLINE c_v64 _c_v64_unzip_16(c_v64 a, c_v64 b, int mode) {
- c_v64 t;
- if (mode) {
- t.u16[3] = b.u16[3];
- t.u16[2] = b.u16[1];
- t.u16[1] = a.u16[3];
- t.u16[0] = a.u16[1];
- } else {
- t.u16[3] = a.u16[2];
- t.u16[2] = a.u16[0];
- t.u16[1] = b.u16[2];
- t.u16[0] = b.u16[0];
- }
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_unziplo_16(c_v64 a, c_v64 b) {
- return CONFIG_BIG_ENDIAN ? _c_v64_unzip_16(a, b, 1)
- : _c_v64_unzip_16(a, b, 0);
-}
-
-SIMD_INLINE c_v64 c_v64_unziphi_16(c_v64 a, c_v64 b) {
- return CONFIG_BIG_ENDIAN ? _c_v64_unzip_16(b, a, 0)
- : _c_v64_unzip_16(b, a, 1);
-}
-
-SIMD_INLINE c_v64 c_v64_unpacklo_u8_s16(c_v64 a) {
- c_v64 t;
- int endian = !!CONFIG_BIG_ENDIAN * 4;
- t.s16[3] = (int16_t)a.u8[3 + endian];
- t.s16[2] = (int16_t)a.u8[2 + endian];
- t.s16[1] = (int16_t)a.u8[1 + endian];
- t.s16[0] = (int16_t)a.u8[0 + endian];
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_unpackhi_u8_s16(c_v64 a) {
- c_v64 t;
- int endian = !!CONFIG_BIG_ENDIAN * 4;
- t.s16[3] = (int16_t)a.u8[7 - endian];
- t.s16[2] = (int16_t)a.u8[6 - endian];
- t.s16[1] = (int16_t)a.u8[5 - endian];
- t.s16[0] = (int16_t)a.u8[4 - endian];
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_unpacklo_s8_s16(c_v64 a) {
- c_v64 t;
- int endian = !!CONFIG_BIG_ENDIAN * 4;
- t.s16[3] = (int16_t)a.s8[3 + endian];
- t.s16[2] = (int16_t)a.s8[2 + endian];
- t.s16[1] = (int16_t)a.s8[1 + endian];
- t.s16[0] = (int16_t)a.s8[0 + endian];
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_unpackhi_s8_s16(c_v64 a) {
- c_v64 t;
- int endian = !!CONFIG_BIG_ENDIAN * 4;
- t.s16[3] = (int16_t)a.s8[7 - endian];
- t.s16[2] = (int16_t)a.s8[6 - endian];
- t.s16[1] = (int16_t)a.s8[5 - endian];
- t.s16[0] = (int16_t)a.s8[4 - endian];
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_pack_s32_s16(c_v64 a, c_v64 b) {
- c_v64 t;
- if (CONFIG_BIG_ENDIAN) {
- c_v64 u = a;
- a = b;
- b = u;
- }
- t.s16[3] = a.s32[1] > 32767 ? 32767 : a.s32[1] < -32768 ? -32768 : a.s32[1];
- t.s16[2] = a.s32[0] > 32767 ? 32767 : a.s32[0] < -32768 ? -32768 : a.s32[0];
- t.s16[1] = b.s32[1] > 32767 ? 32767 : b.s32[1] < -32768 ? -32768 : b.s32[1];
- t.s16[0] = b.s32[0] > 32767 ? 32767 : b.s32[0] < -32768 ? -32768 : b.s32[0];
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_pack_s32_u16(c_v64 a, c_v64 b) {
- c_v64 t;
- if (CONFIG_BIG_ENDIAN) {
- c_v64 u = a;
- a = b;
- b = u;
- }
- t.u16[3] = a.s32[1] > 65535 ? 65535 : a.s32[1] < 0 ? 0 : a.s32[1];
- t.u16[2] = a.s32[0] > 65535 ? 65535 : a.s32[0] < 0 ? 0 : a.s32[0];
- t.u16[1] = b.s32[1] > 65535 ? 65535 : b.s32[1] < 0 ? 0 : b.s32[1];
- t.u16[0] = b.s32[0] > 65535 ? 65535 : b.s32[0] < 0 ? 0 : b.s32[0];
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_pack_s16_u8(c_v64 a, c_v64 b) {
- c_v64 t;
- if (CONFIG_BIG_ENDIAN) {
- c_v64 u = a;
- a = b;
- b = u;
- }
- t.u8[7] = a.s16[3] > 255 ? 255 : a.s16[3] < 0 ? 0 : a.s16[3];
- t.u8[6] = a.s16[2] > 255 ? 255 : a.s16[2] < 0 ? 0 : a.s16[2];
- t.u8[5] = a.s16[1] > 255 ? 255 : a.s16[1] < 0 ? 0 : a.s16[1];
- t.u8[4] = a.s16[0] > 255 ? 255 : a.s16[0] < 0 ? 0 : a.s16[0];
- t.u8[3] = b.s16[3] > 255 ? 255 : b.s16[3] < 0 ? 0 : b.s16[3];
- t.u8[2] = b.s16[2] > 255 ? 255 : b.s16[2] < 0 ? 0 : b.s16[2];
- t.u8[1] = b.s16[1] > 255 ? 255 : b.s16[1] < 0 ? 0 : b.s16[1];
- t.u8[0] = b.s16[0] > 255 ? 255 : b.s16[0] < 0 ? 0 : b.s16[0];
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_pack_s16_s8(c_v64 a, c_v64 b) {
- c_v64 t;
- if (CONFIG_BIG_ENDIAN) {
- c_v64 u = a;
- a = b;
- b = u;
- }
- t.u8[7] = a.s16[3] > 127 ? 127 : a.s16[3] < -128 ? 128 : a.s16[3];
- t.u8[6] = a.s16[2] > 127 ? 127 : a.s16[2] < -128 ? 128 : a.s16[2];
- t.u8[5] = a.s16[1] > 127 ? 127 : a.s16[1] < -128 ? 128 : a.s16[1];
- t.u8[4] = a.s16[0] > 127 ? 127 : a.s16[0] < -128 ? 128 : a.s16[0];
- t.u8[3] = b.s16[3] > 127 ? 127 : b.s16[3] < -128 ? 128 : b.s16[3];
- t.u8[2] = b.s16[2] > 127 ? 127 : b.s16[2] < -128 ? 128 : b.s16[2];
- t.u8[1] = b.s16[1] > 127 ? 127 : b.s16[1] < -128 ? 128 : b.s16[1];
- t.u8[0] = b.s16[0] > 127 ? 127 : b.s16[0] < -128 ? 128 : b.s16[0];
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_unpacklo_u16_s32(c_v64 a) {
- c_v64 t;
- t.s32[1] = a.u16[1 + !!CONFIG_BIG_ENDIAN * 2];
- t.s32[0] = a.u16[0 + !!CONFIG_BIG_ENDIAN * 2];
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_unpacklo_s16_s32(c_v64 a) {
- c_v64 t;
- t.s32[1] = a.s16[1 + !!CONFIG_BIG_ENDIAN * 2];
- t.s32[0] = a.s16[0 + !!CONFIG_BIG_ENDIAN * 2];
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_unpackhi_u16_s32(c_v64 a) {
- c_v64 t;
- t.s32[1] = a.u16[3 - !!CONFIG_BIG_ENDIAN * 2];
- t.s32[0] = a.u16[2 - !!CONFIG_BIG_ENDIAN * 2];
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_unpackhi_s16_s32(c_v64 a) {
- c_v64 t;
- t.s32[1] = a.s16[3 - !!CONFIG_BIG_ENDIAN * 2];
- t.s32[0] = a.s16[2 - !!CONFIG_BIG_ENDIAN * 2];
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_shuffle_8(c_v64 a, c_v64 pattern) {
- c_v64 t;
- int c;
- for (c = 0; c < 8; c++) {
- if (SIMD_CHECK && (pattern.u8[c] & ~7)) {
- fprintf(stderr, "Error: Undefined v64_shuffle_8 index %d/%d\n",
- pattern.u8[c], c);
- abort();
- }
- t.u8[c] =
- a.u8[CONFIG_BIG_ENDIAN ? 7 - (pattern.u8[c] & 7) : pattern.u8[c] & 7];
- }
- return t;
-}
-
-SIMD_INLINE int64_t c_v64_dotp_su8(c_v64 a, c_v64 b) {
- return a.s8[7] * b.u8[7] + a.s8[6] * b.u8[6] + a.s8[5] * b.u8[5] +
- a.s8[4] * b.u8[4] + a.s8[3] * b.u8[3] + a.s8[2] * b.u8[2] +
- a.s8[1] * b.u8[1] + a.s8[0] * b.u8[0];
-}
-
-SIMD_INLINE int64_t c_v64_dotp_s16(c_v64 a, c_v64 b) {
- return (int64_t)(a.s16[3] * b.s16[3] + a.s16[2] * b.s16[2]) +
- (int64_t)(a.s16[1] * b.s16[1] + a.s16[0] * b.s16[0]);
-}
-
-SIMD_INLINE uint64_t c_v64_hadd_u8(c_v64 a) {
- return a.u8[7] + a.u8[6] + a.u8[5] + a.u8[4] + a.u8[3] + a.u8[2] + a.u8[1] +
- a.u8[0];
-}
-
-SIMD_INLINE int64_t c_v64_hadd_s16(c_v64 a) {
- return a.s16[3] + a.s16[2] + a.s16[1] + a.s16[0];
-}
-
-typedef uint32_t c_sad64_internal;
-
-/* Implementation dependent return value. Result must be finalised with
- v64_sad_u8_sum().
- The result for more than 32 v64_sad_u8() calls is undefined. */
-SIMD_INLINE c_sad64_internal c_v64_sad_u8_init() { return 0; }
-
-SIMD_INLINE c_sad64_internal c_v64_sad_u8(c_sad64_internal s, c_v64 a,
- c_v64 b) {
- int c;
- for (c = 0; c < 8; c++)
- s += a.u8[c] > b.u8[c] ? a.u8[c] - b.u8[c] : b.u8[c] - a.u8[c];
- return s;
-}
-
-SIMD_INLINE uint32_t c_v64_sad_u8_sum(c_sad64_internal s) { return s; }
-
-typedef uint32_t c_ssd64_internal;
-
-/* Implementation dependent return value. Result must be finalised with
- * v64_ssd_u8_sum(). */
-SIMD_INLINE c_ssd64_internal c_v64_ssd_u8_init() { return 0; }
-
-SIMD_INLINE c_ssd64_internal c_v64_ssd_u8(c_ssd64_internal s, c_v64 a,
- c_v64 b) {
- int c;
- for (c = 0; c < 8; c++) s += (a.u8[c] - b.u8[c]) * (a.u8[c] - b.u8[c]);
- return s;
-}
-
-SIMD_INLINE uint32_t c_v64_ssd_u8_sum(c_ssd64_internal s) { return s; }
-
-SIMD_INLINE c_v64 c_v64_or(c_v64 a, c_v64 b) {
- c_v64 t;
- t.u64 = a.u64 | b.u64;
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_xor(c_v64 a, c_v64 b) {
- c_v64 t;
- t.u64 = a.u64 ^ b.u64;
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_and(c_v64 a, c_v64 b) {
- c_v64 t;
- t.u64 = a.u64 & b.u64;
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_andn(c_v64 a, c_v64 b) {
- c_v64 t;
- t.u64 = a.u64 & ~b.u64;
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_mullo_s16(c_v64 a, c_v64 b) {
- c_v64 t;
- int c;
- for (c = 0; c < 4; c++) t.s16[c] = (int16_t)(a.s16[c] * b.s16[c]);
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_mulhi_s16(c_v64 a, c_v64 b) {
- c_v64 t;
- int c;
- for (c = 0; c < 4; c++) t.s16[c] = (a.s16[c] * b.s16[c]) >> 16;
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_mullo_s32(c_v64 a, c_v64 b) {
- c_v64 t;
- t.s32[0] = (int32_t)((int64_t)a.s32[0] * b.s32[0]);
- t.s32[1] = (int32_t)((int64_t)a.s32[1] * b.s32[1]);
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_madd_s16(c_v64 a, c_v64 b) {
- c_v64 t;
- t.s32[0] = a.s16[0] * b.s16[0] + a.s16[1] * b.s16[1];
- t.s32[1] = a.s16[2] * b.s16[2] + a.s16[3] * b.s16[3];
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_madd_us8(c_v64 a, c_v64 b) {
- c_v64 t;
- int32_t u;
- u = a.u8[0] * b.s8[0] + a.u8[1] * b.s8[1];
- t.s16[0] = u > 32767 ? 32767 : u < -32768 ? -32768 : u;
- u = a.u8[2] * b.s8[2] + a.u8[3] * b.s8[3];
- t.s16[1] = u > 32767 ? 32767 : u < -32768 ? -32768 : u;
- u = a.u8[4] * b.s8[4] + a.u8[5] * b.s8[5];
- t.s16[2] = u > 32767 ? 32767 : u < -32768 ? -32768 : u;
- u = a.u8[6] * b.s8[6] + a.u8[7] * b.s8[7];
- t.s16[3] = u > 32767 ? 32767 : u < -32768 ? -32768 : u;
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_avg_u8(c_v64 a, c_v64 b) {
- c_v64 t;
- int c;
- for (c = 0; c < 8; c++) t.u8[c] = (a.u8[c] + b.u8[c] + 1) >> 1;
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_rdavg_u8(c_v64 a, c_v64 b) {
- c_v64 t;
- int c;
- for (c = 0; c < 8; c++) t.u8[c] = (a.u8[c] + b.u8[c]) >> 1;
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_rdavg_u16(c_v64 a, c_v64 b) {
- c_v64 t;
- int c;
- for (c = 0; c < 4; c++) t.u16[c] = (a.u16[c] + b.u16[c]) >> 1;
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_avg_u16(c_v64 a, c_v64 b) {
- c_v64 t;
- int c;
- for (c = 0; c < 4; c++) t.u16[c] = (a.u16[c] + b.u16[c] + 1) >> 1;
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_min_u8(c_v64 a, c_v64 b) {
- c_v64 t;
- int c;
- for (c = 0; c < 8; c++) t.u8[c] = a.u8[c] > b.u8[c] ? b.u8[c] : a.u8[c];
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_max_u8(c_v64 a, c_v64 b) {
- c_v64 t;
- int c;
- for (c = 0; c < 8; c++) t.u8[c] = a.u8[c] > b.u8[c] ? a.u8[c] : b.u8[c];
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_min_s8(c_v64 a, c_v64 b) {
- c_v64 t;
- int c;
- for (c = 0; c < 8; c++) t.s8[c] = a.s8[c] > b.s8[c] ? b.s8[c] : a.s8[c];
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_max_s8(c_v64 a, c_v64 b) {
- c_v64 t;
- int c;
- for (c = 0; c < 8; c++) t.s8[c] = a.s8[c] > b.s8[c] ? a.s8[c] : b.s8[c];
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_min_s16(c_v64 a, c_v64 b) {
- c_v64 t;
- int c;
- for (c = 0; c < 4; c++) t.s16[c] = a.s16[c] > b.s16[c] ? b.s16[c] : a.s16[c];
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_max_s16(c_v64 a, c_v64 b) {
- c_v64 t;
- int c;
- for (c = 0; c < 4; c++) t.s16[c] = a.s16[c] > b.s16[c] ? a.s16[c] : b.s16[c];
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_cmpgt_s8(c_v64 a, c_v64 b) {
- c_v64 t;
- int c;
- for (c = 0; c < 8; c++) t.s8[c] = -(a.s8[c] > b.s8[c]);
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_cmplt_s8(c_v64 a, c_v64 b) {
- c_v64 t;
- int c;
- for (c = 0; c < 8; c++) t.s8[c] = -(a.s8[c] < b.s8[c]);
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_cmpeq_8(c_v64 a, c_v64 b) {
- c_v64 t;
- int c;
- for (c = 0; c < 8; c++) t.s8[c] = -(a.u8[c] == b.u8[c]);
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_cmpgt_s16(c_v64 a, c_v64 b) {
- c_v64 t;
- int c;
- for (c = 0; c < 4; c++) t.s16[c] = -(a.s16[c] > b.s16[c]);
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_cmplt_s16(c_v64 a, c_v64 b) {
- c_v64 t;
- int c;
- for (c = 0; c < 4; c++) t.s16[c] = -(a.s16[c] < b.s16[c]);
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_cmpeq_16(c_v64 a, c_v64 b) {
- c_v64 t;
- int c;
- for (c = 0; c < 4; c++) t.s16[c] = -(a.u16[c] == b.u16[c]);
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_shl_8(c_v64 a, unsigned int n) {
- c_v64 t;
- int c;
- if (SIMD_CHECK && n > 7) {
- fprintf(stderr, "Error: Undefined u8 shift left %d\n", n);
- abort();
- }
- for (c = 0; c < 8; c++) t.s8[c] = a.u8[c] << n;
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_shr_u8(c_v64 a, unsigned int n) {
- c_v64 t;
- int c;
- if (SIMD_CHECK && n > 7) {
- fprintf(stderr, "Error: Undefined u8 shift right %d\n", n);
- abort();
- }
- for (c = 0; c < 8; c++) t.u8[c] = a.u8[c] >> n;
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_shr_s8(c_v64 a, unsigned int n) {
- c_v64 t;
- int c;
- if (SIMD_CHECK && n > 7) {
- fprintf(stderr, "Error: Undefined s8 shift right %d\n", n);
- abort();
- }
- for (c = 0; c < 8; c++) t.s8[c] = a.s8[c] >> n;
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_shl_16(c_v64 a, unsigned int n) {
- c_v64 t;
- int c;
- if (SIMD_CHECK && n > 15) {
- fprintf(stderr, "Error: Undefined u16 shift left %d\n", n);
- abort();
- }
- for (c = 0; c < 4; c++) t.u16[c] = a.u16[c] << n;
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_shr_u16(c_v64 a, unsigned int n) {
- c_v64 t;
- int c;
- if (SIMD_CHECK && n > 15) {
- fprintf(stderr, "Error: Undefined u16 shift right %d\n", n);
- abort();
- }
- for (c = 0; c < 4; c++) t.u16[c] = a.u16[c] >> n;
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_shr_s16(c_v64 a, unsigned int n) {
- c_v64 t;
- int c;
- if (SIMD_CHECK && n > 15) {
- fprintf(stderr, "Error: undefined s16 shift right %d\n", n);
- abort();
- }
- for (c = 0; c < 4; c++) t.s16[c] = a.s16[c] >> n;
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_shl_32(c_v64 a, unsigned int n) {
- c_v64 t;
- if (SIMD_CHECK && n > 31) {
- fprintf(stderr, "Error: undefined u32 shift left %d\n", n);
- abort();
- }
- t.u32[1] = a.u32[1] << n;
- t.u32[0] = a.u32[0] << n;
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_shr_u32(c_v64 a, unsigned int n) {
- c_v64 t;
- if (SIMD_CHECK && n > 31) {
- fprintf(stderr, "Error: undefined u32 shift right %d\n", n);
- abort();
- }
- t.u32[1] = a.u32[1] >> n;
- t.u32[0] = a.u32[0] >> n;
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_shr_s32(c_v64 a, unsigned int n) {
- c_v64 t;
- if (SIMD_CHECK && n > 31) {
- fprintf(stderr, "Error: undefined s32 shift right %d\n", n);
- abort();
- }
- t.s32[1] = a.s32[1] >> n;
- t.s32[0] = a.s32[0] >> n;
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_shr_n_byte(c_v64 x, unsigned int i) {
- c_v64 t;
- t.u64 = x.u64 >> i * 8;
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_shl_n_byte(c_v64 x, unsigned int i) {
- c_v64 t;
- t.u64 = x.u64 << i * 8;
- return t;
-}
-
-SIMD_INLINE c_v64 c_v64_align(c_v64 a, c_v64 b, unsigned int c) {
- if (SIMD_CHECK && c > 7) {
- fprintf(stderr, "Error: undefined alignment %d\n", c);
- abort();
- }
- return c ? c_v64_or(c_v64_shr_n_byte(b, c), c_v64_shl_n_byte(a, 8 - c)) : b;
-}
-
-SIMD_INLINE c_v64 c_v64_shl_n_8(c_v64 a, unsigned int c) {
- return c_v64_shl_8(a, c);
-}
-
-SIMD_INLINE c_v64 c_v64_shr_n_u8(c_v64 a, unsigned int c) {
- return c_v64_shr_u8(a, c);
-}
-
-SIMD_INLINE c_v64 c_v64_shr_n_s8(c_v64 a, unsigned int c) {
- return c_v64_shr_s8(a, c);
-}
-
-SIMD_INLINE c_v64 c_v64_shl_n_16(c_v64 a, unsigned int c) {
- return c_v64_shl_16(a, c);
-}
-
-SIMD_INLINE c_v64 c_v64_shr_n_u16(c_v64 a, unsigned int c) {
- return c_v64_shr_u16(a, c);
-}
-
-SIMD_INLINE c_v64 c_v64_shr_n_s16(c_v64 a, unsigned int c) {
- return c_v64_shr_s16(a, c);
-}
-
-SIMD_INLINE c_v64 c_v64_shl_n_32(c_v64 a, unsigned int c) {
- return c_v64_shl_32(a, c);
-}
-
-SIMD_INLINE c_v64 c_v64_shr_n_u32(c_v64 a, unsigned int c) {
- return c_v64_shr_u32(a, c);
-}
-
-SIMD_INLINE c_v64 c_v64_shr_n_s32(c_v64 a, unsigned int c) {
- return c_v64_shr_s32(a, c);
-}
-
-#endif // AOM_AOM_DSP_SIMD_V64_INTRINSICS_C_H_
diff --git a/third_party/aom/aom_dsp/simd/v64_intrinsics_x86.h b/third_party/aom/aom_dsp/simd/v64_intrinsics_x86.h
deleted file mode 100644
index 5f9a57b37..000000000
--- a/third_party/aom/aom_dsp/simd/v64_intrinsics_x86.h
+++ /dev/null
@@ -1,491 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_SIMD_V64_INTRINSICS_X86_H_
-#define AOM_AOM_DSP_SIMD_V64_INTRINSICS_X86_H_
-
-#include <emmintrin.h>
-#if defined(__SSSE3__)
-#include <tmmintrin.h>
-#endif
-#if defined(__SSE4_1__)
-#include <smmintrin.h>
-#endif
-
-typedef __m128i v64;
-
-SIMD_INLINE uint32_t v64_low_u32(v64 a) {
- return (uint32_t)_mm_cvtsi128_si32(a);
-}
-
-SIMD_INLINE uint32_t v64_high_u32(v64 a) {
- return (uint32_t)_mm_cvtsi128_si32(_mm_srli_si128(a, 4));
-}
-
-SIMD_INLINE int32_t v64_low_s32(v64 a) { return (int32_t)_mm_cvtsi128_si32(a); }
-
-SIMD_INLINE int32_t v64_high_s32(v64 a) {
- return (int32_t)_mm_cvtsi128_si32(_mm_srli_si128(a, 4));
-}
-
-SIMD_INLINE v64 v64_from_16(uint16_t a, uint16_t b, uint16_t c, uint16_t d) {
- return _mm_packs_epi32(
- _mm_set_epi32((int16_t)a, (int16_t)b, (int16_t)c, (int16_t)d),
- _mm_setzero_si128());
-}
-
-SIMD_INLINE v64 v64_from_32(uint32_t x, uint32_t y) {
- return _mm_set_epi32(0, 0, x, y);
-}
-
-SIMD_INLINE v64 v64_from_64(uint64_t x) {
-#ifdef __x86_64__
- return _mm_cvtsi64_si128(x);
-#else
- return _mm_set_epi32(0, 0, x >> 32, (uint32_t)x);
-#endif
-}
-
-SIMD_INLINE uint64_t v64_u64(v64 x) {
- return (uint64_t)v64_low_u32(x) | ((uint64_t)v64_high_u32(x) << 32);
-}
-
-SIMD_INLINE uint32_t u32_load_aligned(const void *p) {
- return *((uint32_t *)p);
-}
-
-SIMD_INLINE uint32_t u32_load_unaligned(const void *p) {
- return *((uint32_t *)p);
-}
-
-SIMD_INLINE void u32_store_aligned(void *p, uint32_t a) {
- *((uint32_t *)p) = a;
-}
-
-SIMD_INLINE void u32_store_unaligned(void *p, uint32_t a) {
- *((uint32_t *)p) = a;
-}
-
-SIMD_INLINE v64 v64_load_aligned(const void *p) {
- return _mm_loadl_epi64((__m128i *)p);
-}
-
-SIMD_INLINE v64 v64_load_unaligned(const void *p) {
- return _mm_loadl_epi64((__m128i *)p);
-}
-
-SIMD_INLINE void v64_store_aligned(void *p, v64 a) {
- _mm_storel_epi64((__m128i *)p, a);
-}
-
-SIMD_INLINE void v64_store_unaligned(void *p, v64 a) {
- _mm_storel_epi64((__m128i *)p, a);
-}
-
-#if defined(__OPTIMIZE__) && __OPTIMIZE__ && !defined(__clang__)
-#define v64_align(a, b, c) \
- ((c) ? _mm_srli_si128(_mm_unpacklo_epi64(b, a), (c)) : b)
-#else
-#define v64_align(a, b, c) \
- ((c) ? v64_from_64((v64_u64(b) >> (c)*8) | (v64_u64(a) << (8 - (c)) * 8)) \
- : (b))
-#endif
-
-SIMD_INLINE v64 v64_zero() { return _mm_setzero_si128(); }
-
-SIMD_INLINE v64 v64_dup_8(uint8_t x) { return _mm_set1_epi8(x); }
-
-SIMD_INLINE v64 v64_dup_16(uint16_t x) { return _mm_set1_epi16(x); }
-
-SIMD_INLINE v64 v64_dup_32(uint32_t x) { return _mm_set1_epi32(x); }
-
-SIMD_INLINE v64 v64_add_8(v64 a, v64 b) { return _mm_add_epi8(a, b); }
-
-SIMD_INLINE v64 v64_add_16(v64 a, v64 b) { return _mm_add_epi16(a, b); }
-
-SIMD_INLINE v64 v64_sadd_u8(v64 a, v64 b) { return _mm_adds_epu8(a, b); }
-
-SIMD_INLINE v64 v64_sadd_s8(v64 a, v64 b) { return _mm_adds_epi8(a, b); }
-
-SIMD_INLINE v64 v64_sadd_s16(v64 a, v64 b) { return _mm_adds_epi16(a, b); }
-
-SIMD_INLINE v64 v64_add_32(v64 a, v64 b) { return _mm_add_epi32(a, b); }
-
-SIMD_INLINE v64 v64_sub_8(v64 a, v64 b) { return _mm_sub_epi8(a, b); }
-
-SIMD_INLINE v64 v64_ssub_u8(v64 a, v64 b) { return _mm_subs_epu8(a, b); }
-
-SIMD_INLINE v64 v64_ssub_s8(v64 a, v64 b) { return _mm_subs_epi8(a, b); }
-
-SIMD_INLINE v64 v64_sub_16(v64 a, v64 b) { return _mm_sub_epi16(a, b); }
-
-SIMD_INLINE v64 v64_ssub_s16(v64 a, v64 b) { return _mm_subs_epi16(a, b); }
-
-SIMD_INLINE v64 v64_ssub_u16(v64 a, v64 b) { return _mm_subs_epu16(a, b); }
-
-SIMD_INLINE v64 v64_sub_32(v64 a, v64 b) { return _mm_sub_epi32(a, b); }
-
-SIMD_INLINE v64 v64_abs_s16(v64 a) {
-#if defined(__SSSE3__)
- return _mm_abs_epi16(a);
-#else
- return _mm_max_epi16(a, _mm_sub_epi16(_mm_setzero_si128(), a));
-#endif
-}
-
-SIMD_INLINE v64 v64_abs_s8(v64 a) {
-#if defined(__SSSE3__)
- return _mm_abs_epi8(a);
-#else
- v64 sign = _mm_cmplt_epi8(a, _mm_setzero_si128());
- return _mm_xor_si128(sign, _mm_add_epi8(a, sign));
-#endif
-}
-
-SIMD_INLINE v64 v64_ziplo_8(v64 a, v64 b) { return _mm_unpacklo_epi8(b, a); }
-
-SIMD_INLINE v64 v64_ziphi_8(v64 a, v64 b) {
- return _mm_srli_si128(_mm_unpacklo_epi8(b, a), 8);
-}
-
-SIMD_INLINE v64 v64_ziplo_16(v64 a, v64 b) { return _mm_unpacklo_epi16(b, a); }
-
-SIMD_INLINE v64 v64_ziphi_16(v64 a, v64 b) {
- return _mm_srli_si128(_mm_unpacklo_epi16(b, a), 8);
-}
-
-SIMD_INLINE v64 v64_ziplo_32(v64 a, v64 b) { return _mm_unpacklo_epi32(b, a); }
-
-SIMD_INLINE v64 v64_ziphi_32(v64 a, v64 b) {
- return _mm_srli_si128(_mm_unpacklo_epi32(b, a), 8);
-}
-
-SIMD_INLINE v64 v64_pack_s32_s16(v64 a, v64 b) {
- __m128i t = _mm_unpacklo_epi64(b, a);
- return _mm_packs_epi32(t, t);
-}
-
-SIMD_INLINE v64 v64_pack_s32_u16(v64 a, v64 b) {
-#if defined(__SSE4_1__)
- __m128i t = _mm_unpacklo_epi64(b, a);
- return _mm_packus_epi32(t, t);
-#else
- int32_t ah = v64_high_u32(a);
- int32_t al = v64_low_u32(a);
- int32_t bh = v64_high_u32(b);
- int32_t bl = v64_low_u32(b);
- return v64_from_16(ah > 65535 ? 65535 : ah < 0 ? 0 : ah,
- al > 65535 ? 65535 : al < 0 ? 0 : al,
- bh > 65535 ? 65535 : bh < 0 ? 0 : bh,
- bl > 65535 ? 65535 : bl < 0 ? 0 : bl);
-#endif
-}
-
-SIMD_INLINE v64 v64_pack_s16_u8(v64 a, v64 b) {
- __m128i t = _mm_unpacklo_epi64(b, a);
- return _mm_packus_epi16(t, t);
-}
-
-SIMD_INLINE v64 v64_pack_s16_s8(v64 a, v64 b) {
- __m128i t = _mm_unpacklo_epi64(b, a);
- return _mm_packs_epi16(t, t);
-}
-
-SIMD_INLINE v64 v64_unziphi_8(v64 a, v64 b) {
-#if defined(__SSSE3__)
- return _mm_shuffle_epi8(_mm_unpacklo_epi64(b, a),
- v64_from_64(0x0f0d0b0907050301LL));
-#else
- return _mm_packus_epi16(
- _mm_unpacklo_epi64(_mm_srli_epi16(b, 8), _mm_srli_epi16(a, 8)),
- _mm_setzero_si128());
-#endif
-}
-
-SIMD_INLINE v64 v64_unziplo_8(v64 a, v64 b) {
-#if defined(__SSSE3__)
- return _mm_shuffle_epi8(_mm_unpacklo_epi64(b, a),
- v64_from_64(0x0e0c0a0806040200LL));
-#else
- return v64_unziphi_8(_mm_slli_si128(a, 1), _mm_slli_si128(b, 1));
-#endif
-}
-
-SIMD_INLINE v64 v64_unziphi_16(v64 a, v64 b) {
-#if defined(__SSSE3__)
- return _mm_shuffle_epi8(_mm_unpacklo_epi64(b, a),
- v64_from_64(0x0f0e0b0a07060302LL));
-#else
- return _mm_packs_epi32(
- _mm_unpacklo_epi64(_mm_srai_epi32(b, 16), _mm_srai_epi32(a, 16)),
- _mm_setzero_si128());
-#endif
-}
-
-SIMD_INLINE v64 v64_unziplo_16(v64 a, v64 b) {
-#if defined(__SSSE3__)
- return _mm_shuffle_epi8(_mm_unpacklo_epi64(b, a),
- v64_from_64(0x0d0c090805040100LL));
-#else
- return v64_unziphi_16(_mm_slli_si128(a, 2), _mm_slli_si128(b, 2));
-#endif
-}
-
-SIMD_INLINE v64 v64_unpacklo_u8_s16(v64 a) {
- return _mm_unpacklo_epi8(a, _mm_setzero_si128());
-}
-
-SIMD_INLINE v64 v64_unpackhi_u8_s16(v64 a) {
- return _mm_srli_si128(_mm_unpacklo_epi8(a, _mm_setzero_si128()), 8);
-}
-
-SIMD_INLINE v64 v64_unpacklo_s8_s16(v64 a) {
- return _mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8);
-}
-
-SIMD_INLINE v64 v64_unpackhi_s8_s16(v64 a) {
- return _mm_srli_si128(_mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8), 8);
-}
-
-SIMD_INLINE v64 v64_unpacklo_u16_s32(v64 a) {
- return _mm_unpacklo_epi16(a, _mm_setzero_si128());
-}
-
-SIMD_INLINE v64 v64_unpacklo_s16_s32(v64 a) {
- return _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), a), 16);
-}
-
-SIMD_INLINE v64 v64_unpackhi_u16_s32(v64 a) {
- return _mm_srli_si128(_mm_unpacklo_epi16(a, _mm_setzero_si128()), 8);
-}
-
-SIMD_INLINE v64 v64_unpackhi_s16_s32(v64 a) {
- return _mm_srli_si128(
- _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), a), 16), 8);
-}
-
-SIMD_INLINE v64 v64_shuffle_8(v64 x, v64 pattern) {
-#if defined(__SSSE3__)
- return _mm_shuffle_epi8(x, pattern);
-#else
- v64 output;
- unsigned char *input = (unsigned char *)&x;
- unsigned char *index = (unsigned char *)&pattern;
- char *selected = (char *)&output;
- int counter;
-
- for (counter = 0; counter < 8; counter++) {
- selected[counter] = input[index[counter]];
- }
-
- return output;
-#endif
-}
-
-SIMD_INLINE int64_t v64_dotp_su8(v64 a, v64 b) {
- __m128i t = _mm_madd_epi16(_mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8),
- _mm_unpacklo_epi8(b, _mm_setzero_si128()));
- t = _mm_add_epi32(t, _mm_srli_si128(t, 8));
- t = _mm_add_epi32(t, _mm_srli_si128(t, 4));
- return (int32_t)v64_low_u32(t);
-}
-
-SIMD_INLINE int64_t v64_dotp_s16(v64 a, v64 b) {
- __m128i r = _mm_madd_epi16(a, b);
-#if defined(__SSE4_1__) && defined(__x86_64__)
- __m128i x = _mm_cvtepi32_epi64(r);
- return _mm_cvtsi128_si64(_mm_add_epi64(x, _mm_srli_si128(x, 8)));
-#else
- return (int64_t)_mm_cvtsi128_si32(_mm_srli_si128(r, 4)) +
- (int64_t)_mm_cvtsi128_si32(r);
-#endif
-}
-
-SIMD_INLINE uint64_t v64_hadd_u8(v64 a) {
- return v64_low_u32(_mm_sad_epu8(a, _mm_setzero_si128()));
-}
-
-SIMD_INLINE int64_t v64_hadd_s16(v64 a) {
- return v64_dotp_s16(a, v64_dup_16(1));
-}
-
-typedef v64 sad64_internal;
-
-SIMD_INLINE sad64_internal v64_sad_u8_init() { return _mm_setzero_si128(); }
-
-/* Implementation dependent return value. Result must be finalised with
- v64_sad_u8_sum().
- The result for more than 32 v64_sad_u8() calls is undefined. */
-SIMD_INLINE sad64_internal v64_sad_u8(sad64_internal s, v64 a, v64 b) {
- return _mm_add_epi64(s, _mm_sad_epu8(a, b));
-}
-
-SIMD_INLINE uint32_t v64_sad_u8_sum(sad64_internal s) { return v64_low_u32(s); }
-
-typedef v64 ssd64_internal;
-
-SIMD_INLINE ssd64_internal v64_ssd_u8_init() { return _mm_setzero_si128(); }
-
-/* Implementation dependent return value. Result must be finalised with
- * v64_ssd_u8_sum(). */
-SIMD_INLINE ssd64_internal v64_ssd_u8(ssd64_internal s, v64 a, v64 b) {
- v64 l = v64_sub_16(v64_ziplo_8(v64_zero(), a), v64_ziplo_8(v64_zero(), b));
- v64 h = v64_sub_16(v64_ziphi_8(v64_zero(), a), v64_ziphi_8(v64_zero(), b));
- v64 r = v64_add_32(_mm_madd_epi16(l, l), _mm_madd_epi16(h, h));
- return _mm_add_epi64(
- s, v64_ziplo_32(v64_zero(), _mm_add_epi32(r, _mm_srli_si128(r, 4))));
-}
-
-SIMD_INLINE uint32_t v64_ssd_u8_sum(sad64_internal s) { return v64_low_u32(s); }
-
-SIMD_INLINE v64 v64_or(v64 a, v64 b) { return _mm_or_si128(a, b); }
-
-SIMD_INLINE v64 v64_xor(v64 a, v64 b) { return _mm_xor_si128(a, b); }
-
-SIMD_INLINE v64 v64_and(v64 a, v64 b) { return _mm_and_si128(a, b); }
-
-SIMD_INLINE v64 v64_andn(v64 a, v64 b) { return _mm_andnot_si128(b, a); }
-
-SIMD_INLINE v64 v64_mullo_s16(v64 a, v64 b) { return _mm_mullo_epi16(a, b); }
-
-SIMD_INLINE v64 v64_mulhi_s16(v64 a, v64 b) { return _mm_mulhi_epi16(a, b); }
-
-SIMD_INLINE v64 v64_mullo_s32(v64 a, v64 b) {
-#if defined(__SSE4_1__)
- return _mm_mullo_epi32(a, b);
-#else
- return _mm_unpacklo_epi32(
- _mm_mul_epu32(a, b),
- _mm_mul_epu32(_mm_srli_si128(a, 4), _mm_srli_si128(b, 4)));
-#endif
-}
-
-SIMD_INLINE v64 v64_madd_s16(v64 a, v64 b) { return _mm_madd_epi16(a, b); }
-
-SIMD_INLINE v64 v64_madd_us8(v64 a, v64 b) {
-#if defined(__SSSE3__)
- return _mm_maddubs_epi16(a, b);
-#else
- __m128i t = _mm_madd_epi16(_mm_unpacklo_epi8(a, _mm_setzero_si128()),
- _mm_srai_epi16(_mm_unpacklo_epi8(b, b), 8));
- return _mm_packs_epi32(t, t);
-#endif
-}
-
-SIMD_INLINE v64 v64_avg_u8(v64 a, v64 b) { return _mm_avg_epu8(a, b); }
-
-SIMD_INLINE v64 v64_rdavg_u8(v64 a, v64 b) {
- return _mm_sub_epi8(_mm_avg_epu8(a, b),
- _mm_and_si128(_mm_xor_si128(a, b), v64_dup_8(1)));
-}
-
-SIMD_INLINE v64 v64_rdavg_u16(v64 a, v64 b) {
- return _mm_sub_epi16(_mm_avg_epu16(a, b),
- _mm_and_si128(_mm_xor_si128(a, b), v64_dup_16(1)));
-}
-
-SIMD_INLINE v64 v64_avg_u16(v64 a, v64 b) { return _mm_avg_epu16(a, b); }
-
-SIMD_INLINE v64 v64_min_u8(v64 a, v64 b) { return _mm_min_epu8(a, b); }
-
-SIMD_INLINE v64 v64_max_u8(v64 a, v64 b) { return _mm_max_epu8(a, b); }
-
-SIMD_INLINE v64 v64_min_s8(v64 a, v64 b) {
-#if defined(__SSE4_1__)
- return _mm_min_epi8(a, b);
-#else
- v64 mask = _mm_cmplt_epi8(a, b);
- return _mm_or_si128(_mm_andnot_si128(mask, b), _mm_and_si128(mask, a));
-#endif
-}
-
-SIMD_INLINE v64 v64_max_s8(v64 a, v64 b) {
-#if defined(__SSE4_1__)
- return _mm_max_epi8(a, b);
-#else
- v64 mask = _mm_cmplt_epi8(b, a);
- return _mm_or_si128(_mm_andnot_si128(mask, b), _mm_and_si128(mask, a));
-#endif
-}
-
-SIMD_INLINE v64 v64_min_s16(v64 a, v64 b) { return _mm_min_epi16(a, b); }
-
-SIMD_INLINE v64 v64_max_s16(v64 a, v64 b) { return _mm_max_epi16(a, b); }
-
-SIMD_INLINE v64 v64_cmpgt_s8(v64 a, v64 b) { return _mm_cmpgt_epi8(a, b); }
-
-SIMD_INLINE v64 v64_cmplt_s8(v64 a, v64 b) { return _mm_cmplt_epi8(a, b); }
-
-SIMD_INLINE v64 v64_cmpeq_8(v64 a, v64 b) { return _mm_cmpeq_epi8(a, b); }
-
-SIMD_INLINE v64 v64_cmpgt_s16(v64 a, v64 b) { return _mm_cmpgt_epi16(a, b); }
-
-SIMD_INLINE v64 v64_cmplt_s16(v64 a, v64 b) { return _mm_cmplt_epi16(a, b); }
-
-SIMD_INLINE v64 v64_cmpeq_16(v64 a, v64 b) { return _mm_cmpeq_epi16(a, b); }
-
-SIMD_INLINE v64 v64_shl_8(v64 a, unsigned int c) {
- return _mm_and_si128(_mm_set1_epi8((uint8_t)(0xff << c)),
- _mm_sll_epi16(a, _mm_cvtsi32_si128(c)));
-}
-
-SIMD_INLINE v64 v64_shr_u8(v64 a, unsigned int c) {
- return _mm_and_si128(_mm_set1_epi8(0xff >> c),
- _mm_srl_epi16(a, _mm_cvtsi32_si128(c)));
-}
-
-SIMD_INLINE v64 v64_shr_s8(v64 a, unsigned int c) {
- return _mm_packs_epi16(
- _mm_sra_epi16(_mm_unpacklo_epi8(a, a), _mm_cvtsi32_si128(c + 8)), a);
-}
-
-SIMD_INLINE v64 v64_shl_16(v64 a, unsigned int c) {
- return _mm_sll_epi16(a, _mm_cvtsi32_si128(c));
-}
-
-SIMD_INLINE v64 v64_shr_u16(v64 a, unsigned int c) {
- return _mm_srl_epi16(a, _mm_cvtsi32_si128(c));
-}
-
-SIMD_INLINE v64 v64_shr_s16(v64 a, unsigned int c) {
- return _mm_sra_epi16(a, _mm_cvtsi32_si128(c));
-}
-
-SIMD_INLINE v64 v64_shl_32(v64 a, unsigned int c) {
- return _mm_sll_epi32(a, _mm_cvtsi32_si128(c));
-}
-
-SIMD_INLINE v64 v64_shr_u32(v64 a, unsigned int c) {
- return _mm_srl_epi32(a, _mm_cvtsi32_si128(c));
-}
-
-SIMD_INLINE v64 v64_shr_s32(v64 a, unsigned int c) {
- return _mm_sra_epi32(a, _mm_cvtsi32_si128(c));
-}
-
-/* These intrinsics require immediate values, so we must use #defines
- to enforce that. */
-#define v64_shl_n_byte(a, c) _mm_slli_si128(a, c)
-#define v64_shr_n_byte(a, c) _mm_srli_si128(_mm_unpacklo_epi64(a, a), c + 8)
-#define v64_shl_n_8(a, c) \
- _mm_and_si128(_mm_set1_epi8((uint8_t)(0xff << (c))), _mm_slli_epi16(a, c))
-#define v64_shr_n_u8(a, c) \
- _mm_and_si128(_mm_set1_epi8(0xff >> (c)), _mm_srli_epi16(a, c))
-#define v64_shr_n_s8(a, c) \
- _mm_packs_epi16(_mm_srai_epi16(_mm_unpacklo_epi8(a, a), (c) + 8), a)
-#define v64_shl_n_16(a, c) _mm_slli_epi16(a, c)
-#define v64_shr_n_u16(a, c) _mm_srli_epi16(a, c)
-#define v64_shr_n_s16(a, c) _mm_srai_epi16(a, c)
-#define v64_shl_n_32(a, c) _mm_slli_epi32(a, c)
-#define v64_shr_n_u32(a, c) _mm_srli_epi32(a, c)
-#define v64_shr_n_s32(a, c) _mm_srai_epi32(a, c)
-
-#endif // AOM_AOM_DSP_SIMD_V64_INTRINSICS_X86_H_
diff --git a/third_party/aom/aom_dsp/sse.c b/third_party/aom/aom_dsp/sse.c
deleted file mode 100644
index 249394807..000000000
--- a/third_party/aom/aom_dsp/sse.c
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-/* Sum the difference between every corresponding element of the buffers. */
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom/aom_integer.h"
-
-int64_t aom_sse_c(const uint8_t *a, int a_stride, const uint8_t *b,
- int b_stride, int width, int height) {
- int y, x;
- int64_t sse = 0;
-
- for (y = 0; y < height; y++) {
- for (x = 0; x < width; x++) {
- const int32_t diff = abs(a[x] - b[x]);
- sse += diff * diff;
- }
-
- a += a_stride;
- b += b_stride;
- }
- return sse;
-}
-
-int64_t aom_highbd_sse_c(const uint8_t *a8, int a_stride, const uint8_t *b8,
- int b_stride, int width, int height) {
- int y, x;
- int64_t sse = 0;
- uint16_t *a = CONVERT_TO_SHORTPTR(a8);
- uint16_t *b = CONVERT_TO_SHORTPTR(b8);
- for (y = 0; y < height; y++) {
- for (x = 0; x < width; x++) {
- const int32_t diff = (int32_t)(a[x]) - (int32_t)(b[x]);
- sse += diff * diff;
- }
-
- a += a_stride;
- b += b_stride;
- }
- return sse;
-}
diff --git a/third_party/aom/aom_dsp/ssim.c b/third_party/aom/aom_dsp/ssim.c
deleted file mode 100644
index 681770ba9..000000000
--- a/third_party/aom/aom_dsp/ssim.c
+++ /dev/null
@@ -1,439 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <math.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/ssim.h"
-#include "aom_ports/mem.h"
-#include "aom_ports/system_state.h"
-
-void aom_ssim_parms_16x16_c(const uint8_t *s, int sp, const uint8_t *r, int rp,
- uint32_t *sum_s, uint32_t *sum_r,
- uint32_t *sum_sq_s, uint32_t *sum_sq_r,
- uint32_t *sum_sxr) {
- int i, j;
- for (i = 0; i < 16; i++, s += sp, r += rp) {
- for (j = 0; j < 16; j++) {
- *sum_s += s[j];
- *sum_r += r[j];
- *sum_sq_s += s[j] * s[j];
- *sum_sq_r += r[j] * r[j];
- *sum_sxr += s[j] * r[j];
- }
- }
-}
-
-void aom_ssim_parms_8x8_c(const uint8_t *s, int sp, const uint8_t *r, int rp,
- uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s,
- uint32_t *sum_sq_r, uint32_t *sum_sxr) {
- int i, j;
- for (i = 0; i < 8; i++, s += sp, r += rp) {
- for (j = 0; j < 8; j++) {
- *sum_s += s[j];
- *sum_r += r[j];
- *sum_sq_s += s[j] * s[j];
- *sum_sq_r += r[j] * r[j];
- *sum_sxr += s[j] * r[j];
- }
- }
-}
-
-void aom_highbd_ssim_parms_8x8_c(const uint16_t *s, int sp, const uint16_t *r,
- int rp, uint32_t *sum_s, uint32_t *sum_r,
- uint32_t *sum_sq_s, uint32_t *sum_sq_r,
- uint32_t *sum_sxr) {
- int i, j;
- for (i = 0; i < 8; i++, s += sp, r += rp) {
- for (j = 0; j < 8; j++) {
- *sum_s += s[j];
- *sum_r += r[j];
- *sum_sq_s += s[j] * s[j];
- *sum_sq_r += r[j] * r[j];
- *sum_sxr += s[j] * r[j];
- }
- }
-}
-
-static const int64_t cc1 = 26634; // (64^2*(.01*255)^2
-static const int64_t cc2 = 239708; // (64^2*(.03*255)^2
-static const int64_t cc1_10 = 428658; // (64^2*(.01*1023)^2
-static const int64_t cc2_10 = 3857925; // (64^2*(.03*1023)^2
-static const int64_t cc1_12 = 6868593; // (64^2*(.01*4095)^2
-static const int64_t cc2_12 = 61817334; // (64^2*(.03*4095)^2
-
-static double similarity(uint32_t sum_s, uint32_t sum_r, uint32_t sum_sq_s,
- uint32_t sum_sq_r, uint32_t sum_sxr, int count,
- uint32_t bd) {
- int64_t ssim_n, ssim_d;
- int64_t c1, c2;
- if (bd == 8) {
- // scale the constants by number of pixels
- c1 = (cc1 * count * count) >> 12;
- c2 = (cc2 * count * count) >> 12;
- } else if (bd == 10) {
- c1 = (cc1_10 * count * count) >> 12;
- c2 = (cc2_10 * count * count) >> 12;
- } else if (bd == 12) {
- c1 = (cc1_12 * count * count) >> 12;
- c2 = (cc2_12 * count * count) >> 12;
- } else {
- c1 = c2 = 0;
- assert(0);
- }
-
- ssim_n = (2 * sum_s * sum_r + c1) *
- ((int64_t)2 * count * sum_sxr - (int64_t)2 * sum_s * sum_r + c2);
-
- ssim_d = (sum_s * sum_s + sum_r * sum_r + c1) *
- ((int64_t)count * sum_sq_s - (int64_t)sum_s * sum_s +
- (int64_t)count * sum_sq_r - (int64_t)sum_r * sum_r + c2);
-
- return ssim_n * 1.0 / ssim_d;
-}
-
-static double ssim_8x8(const uint8_t *s, int sp, const uint8_t *r, int rp) {
- uint32_t sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0;
- aom_ssim_parms_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r,
- &sum_sxr);
- return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 64, 8);
-}
-
-static double highbd_ssim_8x8(const uint16_t *s, int sp, const uint16_t *r,
- int rp, uint32_t bd, uint32_t shift) {
- uint32_t sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0;
- aom_highbd_ssim_parms_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r,
- &sum_sxr);
- return similarity(sum_s >> shift, sum_r >> shift, sum_sq_s >> (2 * shift),
- sum_sq_r >> (2 * shift), sum_sxr >> (2 * shift), 64, bd);
-}
-
-// We are using a 8x8 moving window with starting location of each 8x8 window
-// on the 4x4 pixel grid. Such arrangement allows the windows to overlap
-// block boundaries to penalize blocking artifacts.
-static double aom_ssim2(const uint8_t *img1, const uint8_t *img2,
- int stride_img1, int stride_img2, int width,
- int height) {
- int i, j;
- int samples = 0;
- double ssim_total = 0;
-
- // sample point start with each 4x4 location
- for (i = 0; i <= height - 8;
- i += 4, img1 += stride_img1 * 4, img2 += stride_img2 * 4) {
- for (j = 0; j <= width - 8; j += 4) {
- double v = ssim_8x8(img1 + j, stride_img1, img2 + j, stride_img2);
- ssim_total += v;
- samples++;
- }
- }
- ssim_total /= samples;
- return ssim_total;
-}
-
-static double aom_highbd_ssim2(const uint8_t *img1, const uint8_t *img2,
- int stride_img1, int stride_img2, int width,
- int height, uint32_t bd, uint32_t shift) {
- int i, j;
- int samples = 0;
- double ssim_total = 0;
-
- // sample point start with each 4x4 location
- for (i = 0; i <= height - 8;
- i += 4, img1 += stride_img1 * 4, img2 += stride_img2 * 4) {
- for (j = 0; j <= width - 8; j += 4) {
- double v = highbd_ssim_8x8(CONVERT_TO_SHORTPTR(img1 + j), stride_img1,
- CONVERT_TO_SHORTPTR(img2 + j), stride_img2, bd,
- shift);
- ssim_total += v;
- samples++;
- }
- }
- ssim_total /= samples;
- return ssim_total;
-}
-
-double aom_calc_ssim(const YV12_BUFFER_CONFIG *source,
- const YV12_BUFFER_CONFIG *dest, double *weight) {
- double abc[3];
- for (int i = 0; i < 3; ++i) {
- const int is_uv = i > 0;
- abc[i] = aom_ssim2(source->buffers[i], dest->buffers[i],
- source->strides[is_uv], dest->strides[is_uv],
- source->crop_widths[is_uv], source->crop_heights[is_uv]);
- }
-
- *weight = 1;
- return abc[0] * .8 + .1 * (abc[1] + abc[2]);
-}
-
-// traditional ssim as per: http://en.wikipedia.org/wiki/Structural_similarity
-//
-// Re working out the math ->
-//
-// ssim(x,y) = (2*mean(x)*mean(y) + c1)*(2*cov(x,y)+c2) /
-// ((mean(x)^2+mean(y)^2+c1)*(var(x)+var(y)+c2))
-//
-// mean(x) = sum(x) / n
-//
-// cov(x,y) = (n*sum(xi*yi)-sum(x)*sum(y))/(n*n)
-//
-// var(x) = (n*sum(xi*xi)-sum(xi)*sum(xi))/(n*n)
-//
-// ssim(x,y) =
-// (2*sum(x)*sum(y)/(n*n) + c1)*(2*(n*sum(xi*yi)-sum(x)*sum(y))/(n*n)+c2) /
-// (((sum(x)*sum(x)+sum(y)*sum(y))/(n*n) +c1) *
-// ((n*sum(xi*xi) - sum(xi)*sum(xi))/(n*n)+
-// (n*sum(yi*yi) - sum(yi)*sum(yi))/(n*n)+c2)))
-//
-// factoring out n*n
-//
-// ssim(x,y) =
-// (2*sum(x)*sum(y) + n*n*c1)*(2*(n*sum(xi*yi)-sum(x)*sum(y))+n*n*c2) /
-// (((sum(x)*sum(x)+sum(y)*sum(y)) + n*n*c1) *
-// (n*sum(xi*xi)-sum(xi)*sum(xi)+n*sum(yi*yi)-sum(yi)*sum(yi)+n*n*c2))
-//
-// Replace c1 with n*n * c1 for the final step that leads to this code:
-// The final step scales by 12 bits so we don't lose precision in the constants.
-
-static double ssimv_similarity(const Ssimv *sv, int64_t n) {
- // Scale the constants by number of pixels.
- const int64_t c1 = (cc1 * n * n) >> 12;
- const int64_t c2 = (cc2 * n * n) >> 12;
-
- const double l = 1.0 * (2 * sv->sum_s * sv->sum_r + c1) /
- (sv->sum_s * sv->sum_s + sv->sum_r * sv->sum_r + c1);
-
- // Since these variables are unsigned sums, convert to double so
- // math is done in double arithmetic.
- const double v = (2.0 * n * sv->sum_sxr - 2 * sv->sum_s * sv->sum_r + c2) /
- (n * sv->sum_sq_s - sv->sum_s * sv->sum_s +
- n * sv->sum_sq_r - sv->sum_r * sv->sum_r + c2);
-
- return l * v;
-}
-
-// The first term of the ssim metric is a luminance factor.
-//
-// (2*mean(x)*mean(y) + c1)/ (mean(x)^2+mean(y)^2+c1)
-//
-// This luminance factor is super sensitive to the dark side of luminance
-// values and completely insensitive on the white side. check out 2 sets
-// (1,3) and (250,252) the term gives ( 2*1*3/(1+9) = .60
-// 2*250*252/ (250^2+252^2) => .99999997
-//
-// As a result in this tweaked version of the calculation in which the
-// luminance is taken as percentage off from peak possible.
-//
-// 255 * 255 - (sum_s - sum_r) / count * (sum_s - sum_r) / count
-//
-static double ssimv_similarity2(const Ssimv *sv, int64_t n) {
- // Scale the constants by number of pixels.
- const int64_t c1 = (cc1 * n * n) >> 12;
- const int64_t c2 = (cc2 * n * n) >> 12;
-
- const double mean_diff = (1.0 * sv->sum_s - sv->sum_r) / n;
- const double l = (255 * 255 - mean_diff * mean_diff + c1) / (255 * 255 + c1);
-
- // Since these variables are unsigned, sums convert to double so
- // math is done in double arithmetic.
- const double v = (2.0 * n * sv->sum_sxr - 2 * sv->sum_s * sv->sum_r + c2) /
- (n * sv->sum_sq_s - sv->sum_s * sv->sum_s +
- n * sv->sum_sq_r - sv->sum_r * sv->sum_r + c2);
-
- return l * v;
-}
-static void ssimv_parms(uint8_t *img1, int img1_pitch, uint8_t *img2,
- int img2_pitch, Ssimv *sv) {
- aom_ssim_parms_8x8(img1, img1_pitch, img2, img2_pitch, &sv->sum_s, &sv->sum_r,
- &sv->sum_sq_s, &sv->sum_sq_r, &sv->sum_sxr);
-}
-
-double aom_get_ssim_metrics(uint8_t *img1, int img1_pitch, uint8_t *img2,
- int img2_pitch, int width, int height, Ssimv *sv2,
- Metrics *m, int do_inconsistency) {
- double dssim_total = 0;
- double ssim_total = 0;
- double ssim2_total = 0;
- double inconsistency_total = 0;
- int i, j;
- int c = 0;
- double norm;
- double old_ssim_total = 0;
- aom_clear_system_state();
- // We can sample points as frequently as we like start with 1 per 4x4.
- for (i = 0; i < height;
- i += 4, img1 += img1_pitch * 4, img2 += img2_pitch * 4) {
- for (j = 0; j < width; j += 4, ++c) {
- Ssimv sv = { 0, 0, 0, 0, 0, 0 };
- double ssim;
- double ssim2;
- double dssim;
- uint32_t var_new;
- uint32_t var_old;
- uint32_t mean_new;
- uint32_t mean_old;
- double ssim_new;
- double ssim_old;
-
- // Not sure there's a great way to handle the edge pixels
- // in ssim when using a window. Seems biased against edge pixels
- // however you handle this. This uses only samples that are
- // fully in the frame.
- if (j + 8 <= width && i + 8 <= height) {
- ssimv_parms(img1 + j, img1_pitch, img2 + j, img2_pitch, &sv);
- }
-
- ssim = ssimv_similarity(&sv, 64);
- ssim2 = ssimv_similarity2(&sv, 64);
-
- sv.ssim = ssim2;
-
- // dssim is calculated to use as an actual error metric and
- // is scaled up to the same range as sum square error.
- // Since we are subsampling every 16th point maybe this should be
- // *16 ?
- dssim = 255 * 255 * (1 - ssim2) / 2;
-
- // Here I introduce a new error metric: consistency-weighted
- // SSIM-inconsistency. This metric isolates frames where the
- // SSIM 'suddenly' changes, e.g. if one frame in every 8 is much
- // sharper or blurrier than the others. Higher values indicate a
- // temporally inconsistent SSIM. There are two ideas at work:
- //
- // 1) 'SSIM-inconsistency': the total inconsistency value
- // reflects how much SSIM values are changing between this
- // source / reference frame pair and the previous pair.
- //
- // 2) 'consistency-weighted': weights de-emphasize areas in the
- // frame where the scene content has changed. Changes in scene
- // content are detected via changes in local variance and local
- // mean.
- //
- // Thus the overall measure reflects how inconsistent the SSIM
- // values are, over consistent regions of the frame.
- //
- // The metric has three terms:
- //
- // term 1 -> uses change in scene Variance to weight error score
- // 2 * var(Fi)*var(Fi-1) / (var(Fi)^2+var(Fi-1)^2)
- // larger changes from one frame to the next mean we care
- // less about consistency.
- //
- // term 2 -> uses change in local scene luminance to weight error
- // 2 * avg(Fi)*avg(Fi-1) / (avg(Fi)^2+avg(Fi-1)^2)
- // larger changes from one frame to the next mean we care
- // less about consistency.
- //
- // term3 -> measures inconsistency in ssim scores between frames
- // 1 - ( 2 * ssim(Fi)*ssim(Fi-1)/(ssim(Fi)^2+sssim(Fi-1)^2).
- //
- // This term compares the ssim score for the same location in 2
- // subsequent frames.
- var_new = sv.sum_sq_s - sv.sum_s * sv.sum_s / 64;
- var_old = sv2[c].sum_sq_s - sv2[c].sum_s * sv2[c].sum_s / 64;
- mean_new = sv.sum_s;
- mean_old = sv2[c].sum_s;
- ssim_new = sv.ssim;
- ssim_old = sv2[c].ssim;
-
- if (do_inconsistency) {
- // We do the metric once for every 4x4 block in the image. Since
- // we are scaling the error to SSE for use in a psnr calculation
- // 1.0 = 4x4x255x255 the worst error we can possibly have.
- static const double kScaling = 4. * 4 * 255 * 255;
-
- // The constants have to be non 0 to avoid potential divide by 0
- // issues other than that they affect kind of a weighting between
- // the terms. No testing of what the right terms should be has been
- // done.
- static const double c1 = 1, c2 = 1, c3 = 1;
-
- // This measures how much consistent variance is in two consecutive
- // source frames. 1.0 means they have exactly the same variance.
- const double variance_term =
- (2.0 * var_old * var_new + c1) /
- (1.0 * var_old * var_old + 1.0 * var_new * var_new + c1);
-
- // This measures how consistent the local mean are between two
- // consecutive frames. 1.0 means they have exactly the same mean.
- const double mean_term =
- (2.0 * mean_old * mean_new + c2) /
- (1.0 * mean_old * mean_old + 1.0 * mean_new * mean_new + c2);
-
- // This measures how consistent the ssims of two
- // consecutive frames is. 1.0 means they are exactly the same.
- double ssim_term =
- pow((2.0 * ssim_old * ssim_new + c3) /
- (ssim_old * ssim_old + ssim_new * ssim_new + c3),
- 5);
-
- double this_inconsistency;
-
- // Floating point math sometimes makes this > 1 by a tiny bit.
- // We want the metric to scale between 0 and 1.0 so we can convert
- // it to an snr scaled value.
- if (ssim_term > 1) ssim_term = 1;
-
- // This converts the consistency metric to an inconsistency metric
- // ( so we can scale it like psnr to something like sum square error.
- // The reason for the variance and mean terms is the assumption that
- // if there are big changes in the source we shouldn't penalize
- // inconsistency in ssim scores a bit less as it will be less visible
- // to the user.
- this_inconsistency = (1 - ssim_term) * variance_term * mean_term;
-
- this_inconsistency *= kScaling;
- inconsistency_total += this_inconsistency;
- }
- sv2[c] = sv;
- ssim_total += ssim;
- ssim2_total += ssim2;
- dssim_total += dssim;
-
- old_ssim_total += ssim_old;
- }
- old_ssim_total += 0;
- }
-
- norm = 1. / (width / 4) / (height / 4);
- ssim_total *= norm;
- ssim2_total *= norm;
- m->ssim2 = ssim2_total;
- m->ssim = ssim_total;
- if (old_ssim_total == 0) inconsistency_total = 0;
-
- m->ssimc = inconsistency_total;
-
- m->dssim = dssim_total;
- return inconsistency_total;
-}
-
-double aom_highbd_calc_ssim(const YV12_BUFFER_CONFIG *source,
- const YV12_BUFFER_CONFIG *dest, double *weight,
- uint32_t bd, uint32_t in_bd) {
- assert(bd >= in_bd);
- const uint32_t shift = bd - in_bd;
-
- double abc[3];
- for (int i = 0; i < 3; ++i) {
- const int is_uv = i > 0;
- abc[i] = aom_highbd_ssim2(source->buffers[i], dest->buffers[i],
- source->strides[is_uv], dest->strides[is_uv],
- source->crop_widths[is_uv],
- source->crop_heights[is_uv], in_bd, shift);
- }
-
- *weight = 1;
- return abc[0] * .8 + .1 * (abc[1] + abc[2]);
-}
diff --git a/third_party/aom/aom_dsp/ssim.h b/third_party/aom/aom_dsp/ssim.h
deleted file mode 100644
index 55038f4c2..000000000
--- a/third_party/aom/aom_dsp/ssim.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_SSIM_H_
-#define AOM_AOM_DSP_SSIM_H_
-
-#define MAX_SSIM_DB 100.0;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "config/aom_config.h"
-
-#include "aom_scale/yv12config.h"
-
-// metrics used for calculating ssim, ssim2, dssim, and ssimc
-typedef struct {
- // source sum ( over 8x8 region )
- uint32_t sum_s;
-
- // reference sum (over 8x8 region )
- uint32_t sum_r;
-
- // source sum squared ( over 8x8 region )
- uint32_t sum_sq_s;
-
- // reference sum squared (over 8x8 region )
- uint32_t sum_sq_r;
-
- // sum of source times reference (over 8x8 region)
- uint32_t sum_sxr;
-
- // calculated ssim score between source and reference
- double ssim;
-} Ssimv;
-
-// metrics collected on a frame basis
-typedef struct {
- // ssim consistency error metric ( see code for explanation )
- double ssimc;
-
- // standard ssim
- double ssim;
-
- // revised ssim ( see code for explanation)
- double ssim2;
-
- // ssim restated as an error metric like sse
- double dssim;
-
- // dssim converted to decibels
- double dssimd;
-
- // ssimc converted to decibels
- double ssimcd;
-} Metrics;
-
-double aom_get_ssim_metrics(uint8_t *img1, int img1_pitch, uint8_t *img2,
- int img2_pitch, int width, int height, Ssimv *sv2,
- Metrics *m, int do_inconsistency);
-
-double aom_calc_ssim(const YV12_BUFFER_CONFIG *source,
- const YV12_BUFFER_CONFIG *dest, double *weight);
-
-double aom_calc_fastssim(const YV12_BUFFER_CONFIG *source,
- const YV12_BUFFER_CONFIG *dest, double *ssim_y,
- double *ssim_u, double *ssim_v, uint32_t bd,
- uint32_t in_bd);
-
-double aom_highbd_calc_ssim(const YV12_BUFFER_CONFIG *source,
- const YV12_BUFFER_CONFIG *dest, double *weight,
- uint32_t bd, uint32_t in_bd);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AOM_DSP_SSIM_H_
diff --git a/third_party/aom/aom_dsp/subtract.c b/third_party/aom/aom_dsp/subtract.c
deleted file mode 100644
index 2f6da96e5..000000000
--- a/third_party/aom/aom_dsp/subtract.c
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <stdlib.h>
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom/aom_integer.h"
-#include "aom_ports/mem.h"
-
-void aom_subtract_block_c(int rows, int cols, int16_t *diff,
- ptrdiff_t diff_stride, const uint8_t *src,
- ptrdiff_t src_stride, const uint8_t *pred,
- ptrdiff_t pred_stride) {
- int r, c;
-
- for (r = 0; r < rows; r++) {
- for (c = 0; c < cols; c++) diff[c] = src[c] - pred[c];
-
- diff += diff_stride;
- pred += pred_stride;
- src += src_stride;
- }
-}
-
-void aom_highbd_subtract_block_c(int rows, int cols, int16_t *diff,
- ptrdiff_t diff_stride, const uint8_t *src8,
- ptrdiff_t src_stride, const uint8_t *pred8,
- ptrdiff_t pred_stride, int bd) {
- int r, c;
- uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
- (void)bd;
-
- for (r = 0; r < rows; r++) {
- for (c = 0; c < cols; c++) {
- diff[c] = src[c] - pred[c];
- }
-
- diff += diff_stride;
- pred += pred_stride;
- src += src_stride;
- }
-}
diff --git a/third_party/aom/aom_dsp/sum_squares.c b/third_party/aom/aom_dsp/sum_squares.c
deleted file mode 100644
index 44ec41f2e..000000000
--- a/third_party/aom/aom_dsp/sum_squares.c
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-uint64_t aom_sum_squares_2d_i16_c(const int16_t *src, int src_stride, int width,
- int height) {
- int r, c;
- uint64_t ss = 0;
-
- for (r = 0; r < height; r++) {
- for (c = 0; c < width; c++) {
- const int16_t v = src[c];
- ss += v * v;
- }
- src += src_stride;
- }
-
- return ss;
-}
-
-uint64_t aom_sum_squares_i16_c(const int16_t *src, uint32_t n) {
- uint64_t ss = 0;
- do {
- const int16_t v = *src++;
- ss += v * v;
- } while (--n);
-
- return ss;
-}
diff --git a/third_party/aom/aom_dsp/txfm_common.h b/third_party/aom/aom_dsp/txfm_common.h
deleted file mode 100644
index f98242840..000000000
--- a/third_party/aom/aom_dsp/txfm_common.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_TXFM_COMMON_H_
-#define AOM_AOM_DSP_TXFM_COMMON_H_
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "av1/common/enums.h"
-
-// Constants and Macros used by all idct/dct functions
-#define DCT_CONST_BITS 14
-#define DCT_CONST_ROUNDING (1 << (DCT_CONST_BITS - 1))
-
-#define UNIT_QUANT_SHIFT 2
-#define UNIT_QUANT_FACTOR (1 << UNIT_QUANT_SHIFT)
-
-typedef struct txfm_param {
- // for both forward and inverse transforms
- TX_TYPE tx_type;
- TX_SIZE tx_size;
- int lossless;
- int bd;
- // are the pixel buffers octets or shorts? This should collapse to
- // bd==8 implies !is_hbd, but that's not certain right now.
- int is_hbd;
- TxSetType tx_set_type;
- // for inverse transforms only
- int eob;
-} TxfmParam;
-
-// Constants:
-// for (int i = 1; i< 32; ++i)
-// printf("static const int cospi_%d_64 = %.0f;\n", i,
-// round(16384 * cos(i*M_PI/64)));
-// Note: sin(k*Pi/64) = cos((32-k)*Pi/64)
-static const tran_high_t cospi_1_64 = 16364;
-static const tran_high_t cospi_2_64 = 16305;
-static const tran_high_t cospi_3_64 = 16207;
-static const tran_high_t cospi_4_64 = 16069;
-static const tran_high_t cospi_5_64 = 15893;
-static const tran_high_t cospi_6_64 = 15679;
-static const tran_high_t cospi_7_64 = 15426;
-static const tran_high_t cospi_8_64 = 15137;
-static const tran_high_t cospi_9_64 = 14811;
-static const tran_high_t cospi_10_64 = 14449;
-static const tran_high_t cospi_11_64 = 14053;
-static const tran_high_t cospi_12_64 = 13623;
-static const tran_high_t cospi_13_64 = 13160;
-static const tran_high_t cospi_14_64 = 12665;
-static const tran_high_t cospi_15_64 = 12140;
-static const tran_high_t cospi_16_64 = 11585;
-static const tran_high_t cospi_17_64 = 11003;
-static const tran_high_t cospi_18_64 = 10394;
-static const tran_high_t cospi_19_64 = 9760;
-static const tran_high_t cospi_20_64 = 9102;
-static const tran_high_t cospi_21_64 = 8423;
-static const tran_high_t cospi_22_64 = 7723;
-static const tran_high_t cospi_23_64 = 7005;
-static const tran_high_t cospi_24_64 = 6270;
-static const tran_high_t cospi_25_64 = 5520;
-static const tran_high_t cospi_26_64 = 4756;
-static const tran_high_t cospi_27_64 = 3981;
-static const tran_high_t cospi_28_64 = 3196;
-static const tran_high_t cospi_29_64 = 2404;
-static const tran_high_t cospi_30_64 = 1606;
-static const tran_high_t cospi_31_64 = 804;
-
-// 16384 * sqrt(2) * sin(kPi/9) * 2 / 3
-static const tran_high_t sinpi_1_9 = 5283;
-static const tran_high_t sinpi_2_9 = 9929;
-static const tran_high_t sinpi_3_9 = 13377;
-static const tran_high_t sinpi_4_9 = 15212;
-
-// 16384 * sqrt(2)
-static const tran_high_t Sqrt2 = 23170;
-static const tran_high_t InvSqrt2 = 11585;
-
-static INLINE tran_high_t fdct_round_shift(tran_high_t input) {
- tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
- return rv;
-}
-
-#endif // AOM_AOM_DSP_TXFM_COMMON_H_
diff --git a/third_party/aom/aom_dsp/variance.c b/third_party/aom/aom_dsp/variance.c
deleted file mode 100644
index 23b715309..000000000
--- a/third_party/aom/aom_dsp/variance.c
+++ /dev/null
@@ -1,1579 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#include <assert.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-#include "config/av1_rtcd.h"
-
-#include "aom/aom_integer.h"
-#include "aom_ports/mem.h"
-
-#include "aom_dsp/aom_filter.h"
-#include "aom_dsp/blend.h"
-#include "aom_dsp/variance.h"
-
-#include "av1/common/filter.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/common/reconinter.h"
-
-uint32_t aom_get4x4sse_cs_c(const uint8_t *a, int a_stride, const uint8_t *b,
- int b_stride) {
- int distortion = 0;
- int r, c;
-
- for (r = 0; r < 4; ++r) {
- for (c = 0; c < 4; ++c) {
- int diff = a[c] - b[c];
- distortion += diff * diff;
- }
-
- a += a_stride;
- b += b_stride;
- }
-
- return distortion;
-}
-
-uint32_t aom_get_mb_ss_c(const int16_t *a) {
- unsigned int i, sum = 0;
-
- for (i = 0; i < 256; ++i) {
- sum += a[i] * a[i];
- }
-
- return sum;
-}
-
-static void variance(const uint8_t *a, int a_stride, const uint8_t *b,
- int b_stride, int w, int h, uint32_t *sse, int *sum) {
- int i, j;
-
- *sum = 0;
- *sse = 0;
-
- for (i = 0; i < h; ++i) {
- for (j = 0; j < w; ++j) {
- const int diff = a[j] - b[j];
- *sum += diff;
- *sse += diff * diff;
- }
-
- a += a_stride;
- b += b_stride;
- }
-}
-
-uint32_t aom_sse_odd_size(const uint8_t *a, int a_stride, const uint8_t *b,
- int b_stride, int w, int h) {
- uint32_t sse;
- int sum;
- variance(a, a_stride, b, b_stride, w, h, &sse, &sum);
- return sse;
-}
-
-// Applies a 1-D 2-tap bilinear filter to the source block in either horizontal
-// or vertical direction to produce the filtered output block. Used to implement
-// the first-pass of 2-D separable filter.
-//
-// Produces int16_t output to retain precision for the next pass. Two filter
-// taps should sum to FILTER_WEIGHT. pixel_step defines whether the filter is
-// applied horizontally (pixel_step = 1) or vertically (pixel_step = stride).
-// It defines the offset required to move from one input to the next.
-void aom_var_filter_block2d_bil_first_pass_c(const uint8_t *a, uint16_t *b,
- unsigned int src_pixels_per_line,
- unsigned int pixel_step,
- unsigned int output_height,
- unsigned int output_width,
- const uint8_t *filter) {
- unsigned int i, j;
-
- for (i = 0; i < output_height; ++i) {
- for (j = 0; j < output_width; ++j) {
- b[j] = ROUND_POWER_OF_TWO(
- (int)a[0] * filter[0] + (int)a[pixel_step] * filter[1], FILTER_BITS);
-
- ++a;
- }
-
- a += src_pixels_per_line - output_width;
- b += output_width;
- }
-}
-
-// Applies a 1-D 2-tap bilinear filter to the source block in either horizontal
-// or vertical direction to produce the filtered output block. Used to implement
-// the second-pass of 2-D separable filter.
-//
-// Requires 16-bit input as produced by filter_block2d_bil_first_pass. Two
-// filter taps should sum to FILTER_WEIGHT. pixel_step defines whether the
-// filter is applied horizontally (pixel_step = 1) or vertically
-// (pixel_step = stride). It defines the offset required to move from one input
-// to the next. Output is 8-bit.
-void aom_var_filter_block2d_bil_second_pass_c(const uint16_t *a, uint8_t *b,
- unsigned int src_pixels_per_line,
- unsigned int pixel_step,
- unsigned int output_height,
- unsigned int output_width,
- const uint8_t *filter) {
- unsigned int i, j;
-
- for (i = 0; i < output_height; ++i) {
- for (j = 0; j < output_width; ++j) {
- b[j] = ROUND_POWER_OF_TWO(
- (int)a[0] * filter[0] + (int)a[pixel_step] * filter[1], FILTER_BITS);
- ++a;
- }
-
- a += src_pixels_per_line - output_width;
- b += output_width;
- }
-}
-
-#define VAR(W, H) \
- uint32_t aom_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
- const uint8_t *b, int b_stride, \
- uint32_t *sse) { \
- int sum; \
- variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
- return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \
- }
-
-#define SUBPIX_VAR(W, H) \
- uint32_t aom_sub_pixel_variance##W##x##H##_c( \
- const uint8_t *a, int a_stride, int xoffset, int yoffset, \
- const uint8_t *b, int b_stride, uint32_t *sse) { \
- uint16_t fdata3[(H + 1) * W]; \
- uint8_t temp2[H * W]; \
- \
- aom_var_filter_block2d_bil_first_pass_c(a, fdata3, a_stride, 1, H + 1, W, \
- bilinear_filters_2t[xoffset]); \
- aom_var_filter_block2d_bil_second_pass_c(fdata3, temp2, W, W, H, W, \
- bilinear_filters_2t[yoffset]); \
- \
- return aom_variance##W##x##H##_c(temp2, W, b, b_stride, sse); \
- }
-
-#define SUBPIX_AVG_VAR(W, H) \
- uint32_t aom_sub_pixel_avg_variance##W##x##H##_c( \
- const uint8_t *a, int a_stride, int xoffset, int yoffset, \
- const uint8_t *b, int b_stride, uint32_t *sse, \
- const uint8_t *second_pred) { \
- uint16_t fdata3[(H + 1) * W]; \
- uint8_t temp2[H * W]; \
- DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \
- \
- aom_var_filter_block2d_bil_first_pass_c(a, fdata3, a_stride, 1, H + 1, W, \
- bilinear_filters_2t[xoffset]); \
- aom_var_filter_block2d_bil_second_pass_c(fdata3, temp2, W, W, H, W, \
- bilinear_filters_2t[yoffset]); \
- \
- aom_comp_avg_pred(temp3, second_pred, W, H, temp2, W); \
- \
- return aom_variance##W##x##H##_c(temp3, W, b, b_stride, sse); \
- } \
- uint32_t aom_jnt_sub_pixel_avg_variance##W##x##H##_c( \
- const uint8_t *a, int a_stride, int xoffset, int yoffset, \
- const uint8_t *b, int b_stride, uint32_t *sse, \
- const uint8_t *second_pred, const JNT_COMP_PARAMS *jcp_param) { \
- uint16_t fdata3[(H + 1) * W]; \
- uint8_t temp2[H * W]; \
- DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \
- \
- aom_var_filter_block2d_bil_first_pass_c(a, fdata3, a_stride, 1, H + 1, W, \
- bilinear_filters_2t[xoffset]); \
- aom_var_filter_block2d_bil_second_pass_c(fdata3, temp2, W, W, H, W, \
- bilinear_filters_2t[yoffset]); \
- \
- aom_jnt_comp_avg_pred(temp3, second_pred, W, H, temp2, W, jcp_param); \
- \
- return aom_variance##W##x##H(temp3, W, b, b_stride, sse); \
- }
-
-/* Identical to the variance call except it takes an additional parameter, sum,
- * and returns that value using pass-by-reference instead of returning
- * sse - sum^2 / w*h
- */
-#define GET_VAR(W, H) \
- void aom_get##W##x##H##var_c(const uint8_t *a, int a_stride, \
- const uint8_t *b, int b_stride, uint32_t *sse, \
- int *sum) { \
- variance(a, a_stride, b, b_stride, W, H, sse, sum); \
- }
-
-/* Identical to the variance call except it does not calculate the
- * sse - sum^2 / w*h and returns sse in addtion to modifying the passed in
- * variable.
- */
-#define MSE(W, H) \
- uint32_t aom_mse##W##x##H##_c(const uint8_t *a, int a_stride, \
- const uint8_t *b, int b_stride, \
- uint32_t *sse) { \
- int sum; \
- variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
- return *sse; \
- }
-
-/* All three forms of the variance are available in the same sizes. */
-#define VARIANCES(W, H) \
- VAR(W, H) \
- SUBPIX_VAR(W, H) \
- SUBPIX_AVG_VAR(W, H)
-
-VARIANCES(128, 128)
-VARIANCES(128, 64)
-VARIANCES(64, 128)
-VARIANCES(64, 64)
-VARIANCES(64, 32)
-VARIANCES(32, 64)
-VARIANCES(32, 32)
-VARIANCES(32, 16)
-VARIANCES(16, 32)
-VARIANCES(16, 16)
-VARIANCES(16, 8)
-VARIANCES(8, 16)
-VARIANCES(8, 8)
-VARIANCES(8, 4)
-VARIANCES(4, 8)
-VARIANCES(4, 4)
-VARIANCES(4, 2)
-VARIANCES(2, 4)
-VARIANCES(2, 2)
-VARIANCES(4, 16)
-VARIANCES(16, 4)
-VARIANCES(8, 32)
-VARIANCES(32, 8)
-VARIANCES(16, 64)
-VARIANCES(64, 16)
-
-GET_VAR(16, 16)
-GET_VAR(8, 8)
-
-MSE(16, 16)
-MSE(16, 8)
-MSE(8, 16)
-MSE(8, 8)
-
-void aom_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width,
- int height, const uint8_t *ref, int ref_stride) {
- int i, j;
-
- for (i = 0; i < height; ++i) {
- for (j = 0; j < width; ++j) {
- const int tmp = pred[j] + ref[j];
- comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
- }
- comp_pred += width;
- pred += width;
- ref += ref_stride;
- }
-}
-
-// Get pred block from up-sampled reference.
-void aom_upsampled_pred_c(MACROBLOCKD *xd, const AV1_COMMON *const cm,
- int mi_row, int mi_col, const MV *const mv,
- uint8_t *comp_pred, int width, int height,
- int subpel_x_q3, int subpel_y_q3, const uint8_t *ref,
- int ref_stride, int subpel_search) {
- // expect xd == NULL only in tests
- if (xd != NULL) {
- const MB_MODE_INFO *mi = xd->mi[0];
- const int ref_num = 0;
- const int is_intrabc = is_intrabc_block(mi);
- const struct scale_factors *const sf =
- is_intrabc ? &cm->sf_identity : &xd->block_refs[ref_num]->sf;
- const int is_scaled = av1_is_scaled(sf);
-
- if (is_scaled) {
- // Note: This is mostly a copy from the >=8X8 case in
- // build_inter_predictors() function, with some small tweaks.
-
- // Some assumptions.
- const int plane = 0;
-
- // Get pre-requisites.
- const struct macroblockd_plane *const pd = &xd->plane[plane];
- const int ssx = pd->subsampling_x;
- const int ssy = pd->subsampling_y;
- assert(ssx == 0 && ssy == 0);
- const struct buf_2d *const dst_buf = &pd->dst;
- const struct buf_2d *const pre_buf =
- is_intrabc ? dst_buf : &pd->pre[ref_num];
- const int mi_x = mi_col * MI_SIZE;
- const int mi_y = mi_row * MI_SIZE;
-
- // Calculate subpel_x/y and x/y_step.
- const int row_start = 0; // Because ss_y is 0.
- const int col_start = 0; // Because ss_x is 0.
- const int pre_x = (mi_x + MI_SIZE * col_start) >> ssx;
- const int pre_y = (mi_y + MI_SIZE * row_start) >> ssy;
- int orig_pos_y = pre_y << SUBPEL_BITS;
- orig_pos_y += mv->row * (1 << (1 - ssy));
- int orig_pos_x = pre_x << SUBPEL_BITS;
- orig_pos_x += mv->col * (1 << (1 - ssx));
- int pos_y = sf->scale_value_y(orig_pos_y, sf);
- int pos_x = sf->scale_value_x(orig_pos_x, sf);
- pos_x += SCALE_EXTRA_OFF;
- pos_y += SCALE_EXTRA_OFF;
-
- const int top = -AOM_LEFT_TOP_MARGIN_SCALED(ssy);
- const int left = -AOM_LEFT_TOP_MARGIN_SCALED(ssx);
- const int bottom = (pre_buf->height + AOM_INTERP_EXTEND)
- << SCALE_SUBPEL_BITS;
- const int right = (pre_buf->width + AOM_INTERP_EXTEND)
- << SCALE_SUBPEL_BITS;
- pos_y = clamp(pos_y, top, bottom);
- pos_x = clamp(pos_x, left, right);
-
- const uint8_t *const pre =
- pre_buf->buf0 + (pos_y >> SCALE_SUBPEL_BITS) * pre_buf->stride +
- (pos_x >> SCALE_SUBPEL_BITS);
-
- const SubpelParams subpel_params = { sf->x_step_q4, sf->y_step_q4,
- pos_x & SCALE_SUBPEL_MASK,
- pos_y & SCALE_SUBPEL_MASK };
-
- // Get warp types.
- const WarpedMotionParams *const wm =
- &xd->global_motion[mi->ref_frame[ref_num]];
- const int is_global = is_global_mv_block(mi, wm->wmtype);
- WarpTypesAllowed warp_types;
- warp_types.global_warp_allowed = is_global;
- warp_types.local_warp_allowed = mi->motion_mode == WARPED_CAUSAL;
-
- // Get convolve parameters.
- ConvolveParams conv_params = get_conv_params(0, plane, xd->bd);
- const InterpFilters filters =
- av1_broadcast_interp_filter(EIGHTTAP_REGULAR);
-
- // Get the inter predictor.
- const int build_for_obmc = 0;
- av1_make_inter_predictor(pre, pre_buf->stride, comp_pred, width,
- &subpel_params, sf, width, height, &conv_params,
- filters, &warp_types, mi_x >> pd->subsampling_x,
- mi_y >> pd->subsampling_y, plane, ref_num, mi,
- build_for_obmc, xd, cm->allow_warped_motion);
-
- return;
- }
- }
-
- const InterpFilterParams *filter =
- (subpel_search == 1)
- ? av1_get_4tap_interp_filter_params(EIGHTTAP_REGULAR)
- : av1_get_interp_filter_params_with_block_size(EIGHTTAP_REGULAR, 8);
-
- if (!subpel_x_q3 && !subpel_y_q3) {
- for (int i = 0; i < height; i++) {
- memcpy(comp_pred, ref, width * sizeof(*comp_pred));
- comp_pred += width;
- ref += ref_stride;
- }
- } else if (!subpel_y_q3) {
- const int16_t *const kernel =
- av1_get_interp_filter_subpel_kernel(filter, subpel_x_q3 << 1);
- aom_convolve8_horiz_c(ref, ref_stride, comp_pred, width, kernel, 16, NULL,
- -1, width, height);
- } else if (!subpel_x_q3) {
- const int16_t *const kernel =
- av1_get_interp_filter_subpel_kernel(filter, subpel_y_q3 << 1);
- aom_convolve8_vert_c(ref, ref_stride, comp_pred, width, NULL, -1, kernel,
- 16, width, height);
- } else {
- DECLARE_ALIGNED(16, uint8_t,
- temp[((MAX_SB_SIZE * 2 + 16) + 16) * MAX_SB_SIZE]);
- const int16_t *const kernel_x =
- av1_get_interp_filter_subpel_kernel(filter, subpel_x_q3 << 1);
- const int16_t *const kernel_y =
- av1_get_interp_filter_subpel_kernel(filter, subpel_y_q3 << 1);
- const int intermediate_height =
- (((height - 1) * 8 + subpel_y_q3) >> 3) + filter->taps;
- assert(intermediate_height <= (MAX_SB_SIZE * 2 + 16) + 16);
- aom_convolve8_horiz_c(ref - ref_stride * ((filter->taps >> 1) - 1),
- ref_stride, temp, MAX_SB_SIZE, kernel_x, 16, NULL, -1,
- width, intermediate_height);
- aom_convolve8_vert_c(temp + MAX_SB_SIZE * ((filter->taps >> 1) - 1),
- MAX_SB_SIZE, comp_pred, width, NULL, -1, kernel_y, 16,
- width, height);
- }
-}
-
-void aom_comp_avg_upsampled_pred_c(MACROBLOCKD *xd, const AV1_COMMON *const cm,
- int mi_row, int mi_col, const MV *const mv,
- uint8_t *comp_pred, const uint8_t *pred,
- int width, int height, int subpel_x_q3,
- int subpel_y_q3, const uint8_t *ref,
- int ref_stride, int subpel_search) {
- int i, j;
-
- aom_upsampled_pred(xd, cm, mi_row, mi_col, mv, comp_pred, width, height,
- subpel_x_q3, subpel_y_q3, ref, ref_stride, subpel_search);
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++) {
- comp_pred[j] = ROUND_POWER_OF_TWO(comp_pred[j] + pred[j], 1);
- }
- comp_pred += width;
- pred += width;
- }
-}
-
-void aom_jnt_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width,
- int height, const uint8_t *ref, int ref_stride,
- const JNT_COMP_PARAMS *jcp_param) {
- int i, j;
- const int fwd_offset = jcp_param->fwd_offset;
- const int bck_offset = jcp_param->bck_offset;
-
- for (i = 0; i < height; ++i) {
- for (j = 0; j < width; ++j) {
- int tmp = pred[j] * bck_offset + ref[j] * fwd_offset;
- tmp = ROUND_POWER_OF_TWO(tmp, DIST_PRECISION_BITS);
- comp_pred[j] = (uint8_t)tmp;
- }
- comp_pred += width;
- pred += width;
- ref += ref_stride;
- }
-}
-
-void aom_jnt_comp_avg_upsampled_pred_c(
- MACROBLOCKD *xd, const AV1_COMMON *const cm, int mi_row, int mi_col,
- const MV *const mv, uint8_t *comp_pred, const uint8_t *pred, int width,
- int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref,
- int ref_stride, const JNT_COMP_PARAMS *jcp_param, int subpel_search) {
- int i, j;
- const int fwd_offset = jcp_param->fwd_offset;
- const int bck_offset = jcp_param->bck_offset;
-
- aom_upsampled_pred_c(xd, cm, mi_row, mi_col, mv, comp_pred, width, height,
- subpel_x_q3, subpel_y_q3, ref, ref_stride,
- subpel_search);
-
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++) {
- int tmp = pred[j] * bck_offset + comp_pred[j] * fwd_offset;
- tmp = ROUND_POWER_OF_TWO(tmp, DIST_PRECISION_BITS);
- comp_pred[j] = (uint8_t)tmp;
- }
- comp_pred += width;
- pred += width;
- }
-}
-
-static void highbd_variance64(const uint8_t *a8, int a_stride,
- const uint8_t *b8, int b_stride, int w, int h,
- uint64_t *sse, int64_t *sum) {
- const uint16_t *a = CONVERT_TO_SHORTPTR(a8);
- const uint16_t *b = CONVERT_TO_SHORTPTR(b8);
- int64_t tsum = 0;
- uint64_t tsse = 0;
- for (int i = 0; i < h; ++i) {
- int32_t lsum = 0;
- for (int j = 0; j < w; ++j) {
- const int diff = a[j] - b[j];
- lsum += diff;
- tsse += (uint32_t)(diff * diff);
- }
- tsum += lsum;
- a += a_stride;
- b += b_stride;
- }
- *sum = tsum;
- *sse = tsse;
-}
-
-uint64_t aom_highbd_sse_odd_size(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride, int w, int h) {
- uint64_t sse;
- int64_t sum;
- highbd_variance64(a, a_stride, b, b_stride, w, h, &sse, &sum);
- return sse;
-}
-
-static void highbd_8_variance(const uint8_t *a8, int a_stride,
- const uint8_t *b8, int b_stride, int w, int h,
- uint32_t *sse, int *sum) {
- uint64_t sse_long = 0;
- int64_t sum_long = 0;
- highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
- *sse = (uint32_t)sse_long;
- *sum = (int)sum_long;
-}
-
-static void highbd_10_variance(const uint8_t *a8, int a_stride,
- const uint8_t *b8, int b_stride, int w, int h,
- uint32_t *sse, int *sum) {
- uint64_t sse_long = 0;
- int64_t sum_long = 0;
- highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
- *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 4);
- *sum = (int)ROUND_POWER_OF_TWO(sum_long, 2);
-}
-
-static void highbd_12_variance(const uint8_t *a8, int a_stride,
- const uint8_t *b8, int b_stride, int w, int h,
- uint32_t *sse, int *sum) {
- uint64_t sse_long = 0;
- int64_t sum_long = 0;
- highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
- *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 8);
- *sum = (int)ROUND_POWER_OF_TWO(sum_long, 4);
-}
-
-#define HIGHBD_VAR(W, H) \
- uint32_t aom_highbd_8_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
- const uint8_t *b, int b_stride, \
- uint32_t *sse) { \
- int sum; \
- highbd_8_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
- return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \
- } \
- \
- uint32_t aom_highbd_10_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
- const uint8_t *b, int b_stride, \
- uint32_t *sse) { \
- int sum; \
- int64_t var; \
- highbd_10_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
- var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \
- return (var >= 0) ? (uint32_t)var : 0; \
- } \
- \
- uint32_t aom_highbd_12_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
- const uint8_t *b, int b_stride, \
- uint32_t *sse) { \
- int sum; \
- int64_t var; \
- highbd_12_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
- var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \
- return (var >= 0) ? (uint32_t)var : 0; \
- }
-
-#define HIGHBD_GET_VAR(S) \
- void aom_highbd_8_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
- const uint8_t *ref, int ref_stride, \
- uint32_t *sse, int *sum) { \
- highbd_8_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \
- } \
- \
- void aom_highbd_10_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
- const uint8_t *ref, int ref_stride, \
- uint32_t *sse, int *sum) { \
- highbd_10_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \
- } \
- \
- void aom_highbd_12_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
- const uint8_t *ref, int ref_stride, \
- uint32_t *sse, int *sum) { \
- highbd_12_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \
- }
-
-#define HIGHBD_MSE(W, H) \
- uint32_t aom_highbd_8_mse##W##x##H##_c(const uint8_t *src, int src_stride, \
- const uint8_t *ref, int ref_stride, \
- uint32_t *sse) { \
- int sum; \
- highbd_8_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
- return *sse; \
- } \
- \
- uint32_t aom_highbd_10_mse##W##x##H##_c(const uint8_t *src, int src_stride, \
- const uint8_t *ref, int ref_stride, \
- uint32_t *sse) { \
- int sum; \
- highbd_10_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
- return *sse; \
- } \
- \
- uint32_t aom_highbd_12_mse##W##x##H##_c(const uint8_t *src, int src_stride, \
- const uint8_t *ref, int ref_stride, \
- uint32_t *sse) { \
- int sum; \
- highbd_12_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
- return *sse; \
- }
-
-void aom_highbd_var_filter_block2d_bil_first_pass(
- const uint8_t *src_ptr8, uint16_t *output_ptr,
- unsigned int src_pixels_per_line, int pixel_step,
- unsigned int output_height, unsigned int output_width,
- const uint8_t *filter) {
- unsigned int i, j;
- uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src_ptr8);
- for (i = 0; i < output_height; ++i) {
- for (j = 0; j < output_width; ++j) {
- output_ptr[j] = ROUND_POWER_OF_TWO(
- (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
- FILTER_BITS);
-
- ++src_ptr;
- }
-
- // Next row...
- src_ptr += src_pixels_per_line - output_width;
- output_ptr += output_width;
- }
-}
-
-void aom_highbd_var_filter_block2d_bil_second_pass(
- const uint16_t *src_ptr, uint16_t *output_ptr,
- unsigned int src_pixels_per_line, unsigned int pixel_step,
- unsigned int output_height, unsigned int output_width,
- const uint8_t *filter) {
- unsigned int i, j;
-
- for (i = 0; i < output_height; ++i) {
- for (j = 0; j < output_width; ++j) {
- output_ptr[j] = ROUND_POWER_OF_TWO(
- (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
- FILTER_BITS);
- ++src_ptr;
- }
-
- src_ptr += src_pixels_per_line - output_width;
- output_ptr += output_width;
- }
-}
-
-#define HIGHBD_SUBPIX_VAR(W, H) \
- uint32_t aom_highbd_8_sub_pixel_variance##W##x##H##_c( \
- const uint8_t *src, int src_stride, int xoffset, int yoffset, \
- const uint8_t *dst, int dst_stride, uint32_t *sse) { \
- uint16_t fdata3[(H + 1) * W]; \
- uint16_t temp2[H * W]; \
- \
- aom_highbd_var_filter_block2d_bil_first_pass( \
- src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
- aom_highbd_var_filter_block2d_bil_second_pass( \
- fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
- \
- return aom_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
- dst, dst_stride, sse); \
- } \
- \
- uint32_t aom_highbd_10_sub_pixel_variance##W##x##H##_c( \
- const uint8_t *src, int src_stride, int xoffset, int yoffset, \
- const uint8_t *dst, int dst_stride, uint32_t *sse) { \
- uint16_t fdata3[(H + 1) * W]; \
- uint16_t temp2[H * W]; \
- \
- aom_highbd_var_filter_block2d_bil_first_pass( \
- src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
- aom_highbd_var_filter_block2d_bil_second_pass( \
- fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
- \
- return aom_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
- dst, dst_stride, sse); \
- } \
- \
- uint32_t aom_highbd_12_sub_pixel_variance##W##x##H##_c( \
- const uint8_t *src, int src_stride, int xoffset, int yoffset, \
- const uint8_t *dst, int dst_stride, uint32_t *sse) { \
- uint16_t fdata3[(H + 1) * W]; \
- uint16_t temp2[H * W]; \
- \
- aom_highbd_var_filter_block2d_bil_first_pass( \
- src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
- aom_highbd_var_filter_block2d_bil_second_pass( \
- fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
- \
- return aom_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
- dst, dst_stride, sse); \
- }
-
-#define HIGHBD_SUBPIX_AVG_VAR(W, H) \
- uint32_t aom_highbd_8_sub_pixel_avg_variance##W##x##H##_c( \
- const uint8_t *src, int src_stride, int xoffset, int yoffset, \
- const uint8_t *dst, int dst_stride, uint32_t *sse, \
- const uint8_t *second_pred) { \
- uint16_t fdata3[(H + 1) * W]; \
- uint16_t temp2[H * W]; \
- DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
- \
- aom_highbd_var_filter_block2d_bil_first_pass( \
- src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
- aom_highbd_var_filter_block2d_bil_second_pass( \
- fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
- \
- aom_highbd_comp_avg_pred_c(CONVERT_TO_BYTEPTR(temp3), second_pred, W, H, \
- CONVERT_TO_BYTEPTR(temp2), W); \
- \
- return aom_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
- dst, dst_stride, sse); \
- } \
- \
- uint32_t aom_highbd_10_sub_pixel_avg_variance##W##x##H##_c( \
- const uint8_t *src, int src_stride, int xoffset, int yoffset, \
- const uint8_t *dst, int dst_stride, uint32_t *sse, \
- const uint8_t *second_pred) { \
- uint16_t fdata3[(H + 1) * W]; \
- uint16_t temp2[H * W]; \
- DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
- \
- aom_highbd_var_filter_block2d_bil_first_pass( \
- src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
- aom_highbd_var_filter_block2d_bil_second_pass( \
- fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
- \
- aom_highbd_comp_avg_pred_c(CONVERT_TO_BYTEPTR(temp3), second_pred, W, H, \
- CONVERT_TO_BYTEPTR(temp2), W); \
- \
- return aom_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
- dst, dst_stride, sse); \
- } \
- \
- uint32_t aom_highbd_12_sub_pixel_avg_variance##W##x##H##_c( \
- const uint8_t *src, int src_stride, int xoffset, int yoffset, \
- const uint8_t *dst, int dst_stride, uint32_t *sse, \
- const uint8_t *second_pred) { \
- uint16_t fdata3[(H + 1) * W]; \
- uint16_t temp2[H * W]; \
- DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
- \
- aom_highbd_var_filter_block2d_bil_first_pass( \
- src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
- aom_highbd_var_filter_block2d_bil_second_pass( \
- fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
- \
- aom_highbd_comp_avg_pred_c(CONVERT_TO_BYTEPTR(temp3), second_pred, W, H, \
- CONVERT_TO_BYTEPTR(temp2), W); \
- \
- return aom_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
- dst, dst_stride, sse); \
- } \
- \
- uint32_t aom_highbd_8_jnt_sub_pixel_avg_variance##W##x##H##_c( \
- const uint8_t *src, int src_stride, int xoffset, int yoffset, \
- const uint8_t *dst, int dst_stride, uint32_t *sse, \
- const uint8_t *second_pred, const JNT_COMP_PARAMS *jcp_param) { \
- uint16_t fdata3[(H + 1) * W]; \
- uint16_t temp2[H * W]; \
- DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
- \
- aom_highbd_var_filter_block2d_bil_first_pass( \
- src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
- aom_highbd_var_filter_block2d_bil_second_pass( \
- fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
- \
- aom_highbd_jnt_comp_avg_pred(CONVERT_TO_BYTEPTR(temp3), second_pred, W, H, \
- CONVERT_TO_BYTEPTR(temp2), W, jcp_param); \
- \
- return aom_highbd_8_variance##W##x##H(CONVERT_TO_BYTEPTR(temp3), W, dst, \
- dst_stride, sse); \
- } \
- \
- uint32_t aom_highbd_10_jnt_sub_pixel_avg_variance##W##x##H##_c( \
- const uint8_t *src, int src_stride, int xoffset, int yoffset, \
- const uint8_t *dst, int dst_stride, uint32_t *sse, \
- const uint8_t *second_pred, const JNT_COMP_PARAMS *jcp_param) { \
- uint16_t fdata3[(H + 1) * W]; \
- uint16_t temp2[H * W]; \
- DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
- \
- aom_highbd_var_filter_block2d_bil_first_pass( \
- src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
- aom_highbd_var_filter_block2d_bil_second_pass( \
- fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
- \
- aom_highbd_jnt_comp_avg_pred(CONVERT_TO_BYTEPTR(temp3), second_pred, W, H, \
- CONVERT_TO_BYTEPTR(temp2), W, jcp_param); \
- \
- return aom_highbd_10_variance##W##x##H(CONVERT_TO_BYTEPTR(temp3), W, dst, \
- dst_stride, sse); \
- } \
- \
- uint32_t aom_highbd_12_jnt_sub_pixel_avg_variance##W##x##H##_c( \
- const uint8_t *src, int src_stride, int xoffset, int yoffset, \
- const uint8_t *dst, int dst_stride, uint32_t *sse, \
- const uint8_t *second_pred, const JNT_COMP_PARAMS *jcp_param) { \
- uint16_t fdata3[(H + 1) * W]; \
- uint16_t temp2[H * W]; \
- DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
- \
- aom_highbd_var_filter_block2d_bil_first_pass( \
- src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
- aom_highbd_var_filter_block2d_bil_second_pass( \
- fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
- \
- aom_highbd_jnt_comp_avg_pred(CONVERT_TO_BYTEPTR(temp3), second_pred, W, H, \
- CONVERT_TO_BYTEPTR(temp2), W, jcp_param); \
- \
- return aom_highbd_12_variance##W##x##H(CONVERT_TO_BYTEPTR(temp3), W, dst, \
- dst_stride, sse); \
- }
-
-/* All three forms of the variance are available in the same sizes. */
-#define HIGHBD_VARIANCES(W, H) \
- HIGHBD_VAR(W, H) \
- HIGHBD_SUBPIX_VAR(W, H) \
- HIGHBD_SUBPIX_AVG_VAR(W, H)
-
-HIGHBD_VARIANCES(128, 128)
-HIGHBD_VARIANCES(128, 64)
-HIGHBD_VARIANCES(64, 128)
-HIGHBD_VARIANCES(64, 64)
-HIGHBD_VARIANCES(64, 32)
-HIGHBD_VARIANCES(32, 64)
-HIGHBD_VARIANCES(32, 32)
-HIGHBD_VARIANCES(32, 16)
-HIGHBD_VARIANCES(16, 32)
-HIGHBD_VARIANCES(16, 16)
-HIGHBD_VARIANCES(16, 8)
-HIGHBD_VARIANCES(8, 16)
-HIGHBD_VARIANCES(8, 8)
-HIGHBD_VARIANCES(8, 4)
-HIGHBD_VARIANCES(4, 8)
-HIGHBD_VARIANCES(4, 4)
-HIGHBD_VARIANCES(4, 2)
-HIGHBD_VARIANCES(2, 4)
-HIGHBD_VARIANCES(2, 2)
-HIGHBD_VARIANCES(4, 16)
-HIGHBD_VARIANCES(16, 4)
-HIGHBD_VARIANCES(8, 32)
-HIGHBD_VARIANCES(32, 8)
-HIGHBD_VARIANCES(16, 64)
-HIGHBD_VARIANCES(64, 16)
-
-HIGHBD_GET_VAR(8)
-HIGHBD_GET_VAR(16)
-
-HIGHBD_MSE(16, 16)
-HIGHBD_MSE(16, 8)
-HIGHBD_MSE(8, 16)
-HIGHBD_MSE(8, 8)
-
-void aom_highbd_comp_avg_pred_c(uint8_t *comp_pred8, const uint8_t *pred8,
- int width, int height, const uint8_t *ref8,
- int ref_stride) {
- int i, j;
- uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
- uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
- uint16_t *comp_pred = CONVERT_TO_SHORTPTR(comp_pred8);
- for (i = 0; i < height; ++i) {
- for (j = 0; j < width; ++j) {
- const int tmp = pred[j] + ref[j];
- comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
- }
- comp_pred += width;
- pred += width;
- ref += ref_stride;
- }
-}
-
-void aom_highbd_upsampled_pred_c(MACROBLOCKD *xd,
- const struct AV1Common *const cm, int mi_row,
- int mi_col, const MV *const mv,
- uint8_t *comp_pred8, int width, int height,
- int subpel_x_q3, int subpel_y_q3,
- const uint8_t *ref8, int ref_stride, int bd,
- int subpel_search) {
- // expect xd == NULL only in tests
- if (xd != NULL) {
- const MB_MODE_INFO *mi = xd->mi[0];
- const int ref_num = 0;
- const int is_intrabc = is_intrabc_block(mi);
- const struct scale_factors *const sf =
- is_intrabc ? &cm->sf_identity : &xd->block_refs[ref_num]->sf;
- const int is_scaled = av1_is_scaled(sf);
-
- if (is_scaled) {
- // Note: This is mostly a copy from the >=8X8 case in
- // build_inter_predictors() function, with some small tweaks.
- // Some assumptions.
- const int plane = 0;
-
- // Get pre-requisites.
- const struct macroblockd_plane *const pd = &xd->plane[plane];
- const int ssx = pd->subsampling_x;
- const int ssy = pd->subsampling_y;
- assert(ssx == 0 && ssy == 0);
- const struct buf_2d *const dst_buf = &pd->dst;
- const struct buf_2d *const pre_buf =
- is_intrabc ? dst_buf : &pd->pre[ref_num];
- const int mi_x = mi_col * MI_SIZE;
- const int mi_y = mi_row * MI_SIZE;
-
- // Calculate subpel_x/y and x/y_step.
- const int row_start = 0; // Because ss_y is 0.
- const int col_start = 0; // Because ss_x is 0.
- const int pre_x = (mi_x + MI_SIZE * col_start) >> ssx;
- const int pre_y = (mi_y + MI_SIZE * row_start) >> ssy;
- int orig_pos_y = pre_y << SUBPEL_BITS;
- orig_pos_y += mv->row * (1 << (1 - ssy));
- int orig_pos_x = pre_x << SUBPEL_BITS;
- orig_pos_x += mv->col * (1 << (1 - ssx));
- int pos_y = sf->scale_value_y(orig_pos_y, sf);
- int pos_x = sf->scale_value_x(orig_pos_x, sf);
- pos_x += SCALE_EXTRA_OFF;
- pos_y += SCALE_EXTRA_OFF;
-
- const int top = -AOM_LEFT_TOP_MARGIN_SCALED(ssy);
- const int left = -AOM_LEFT_TOP_MARGIN_SCALED(ssx);
- const int bottom = (pre_buf->height + AOM_INTERP_EXTEND)
- << SCALE_SUBPEL_BITS;
- const int right = (pre_buf->width + AOM_INTERP_EXTEND)
- << SCALE_SUBPEL_BITS;
- pos_y = clamp(pos_y, top, bottom);
- pos_x = clamp(pos_x, left, right);
-
- const uint8_t *const pre =
- pre_buf->buf0 + (pos_y >> SCALE_SUBPEL_BITS) * pre_buf->stride +
- (pos_x >> SCALE_SUBPEL_BITS);
-
- const SubpelParams subpel_params = { sf->x_step_q4, sf->y_step_q4,
- pos_x & SCALE_SUBPEL_MASK,
- pos_y & SCALE_SUBPEL_MASK };
-
- // Get warp types.
- const WarpedMotionParams *const wm =
- &xd->global_motion[mi->ref_frame[ref_num]];
- const int is_global = is_global_mv_block(mi, wm->wmtype);
- WarpTypesAllowed warp_types;
- warp_types.global_warp_allowed = is_global;
- warp_types.local_warp_allowed = mi->motion_mode == WARPED_CAUSAL;
-
- // Get convolve parameters.
- ConvolveParams conv_params = get_conv_params(0, plane, xd->bd);
- const InterpFilters filters =
- av1_broadcast_interp_filter(EIGHTTAP_REGULAR);
-
- // Get the inter predictor.
- const int build_for_obmc = 0;
- av1_make_inter_predictor(pre, pre_buf->stride, comp_pred8, width,
- &subpel_params, sf, width, height, &conv_params,
- filters, &warp_types, mi_x >> pd->subsampling_x,
- mi_y >> pd->subsampling_y, plane, ref_num, mi,
- build_for_obmc, xd, cm->allow_warped_motion);
-
- return;
- }
- }
-
- const InterpFilterParams *filter =
- (subpel_search == 1)
- ? av1_get_4tap_interp_filter_params(EIGHTTAP_REGULAR)
- : av1_get_interp_filter_params_with_block_size(EIGHTTAP_REGULAR, 8);
-
- if (!subpel_x_q3 && !subpel_y_q3) {
- const uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
- uint16_t *comp_pred = CONVERT_TO_SHORTPTR(comp_pred8);
- for (int i = 0; i < height; i++) {
- memcpy(comp_pred, ref, width * sizeof(*comp_pred));
- comp_pred += width;
- ref += ref_stride;
- }
- } else if (!subpel_y_q3) {
- const int16_t *const kernel =
- av1_get_interp_filter_subpel_kernel(filter, subpel_x_q3 << 1);
- aom_highbd_convolve8_horiz(ref8, ref_stride, comp_pred8, width, kernel, 16,
- NULL, -1, width, height, bd);
- } else if (!subpel_x_q3) {
- const int16_t *const kernel =
- av1_get_interp_filter_subpel_kernel(filter, subpel_y_q3 << 1);
- aom_highbd_convolve8_vert(ref8, ref_stride, comp_pred8, width, NULL, -1,
- kernel, 16, width, height, bd);
- } else {
- DECLARE_ALIGNED(16, uint16_t,
- temp[((MAX_SB_SIZE + 16) + 16) * MAX_SB_SIZE]);
- const int16_t *const kernel_x =
- av1_get_interp_filter_subpel_kernel(filter, subpel_x_q3 << 1);
- const int16_t *const kernel_y =
- av1_get_interp_filter_subpel_kernel(filter, subpel_y_q3 << 1);
- const int intermediate_height =
- (((height - 1) * 8 + subpel_y_q3) >> 3) + filter->taps;
- assert(intermediate_height <= (MAX_SB_SIZE * 2 + 16) + 16);
- aom_highbd_convolve8_horiz(ref8 - ref_stride * ((filter->taps >> 1) - 1),
- ref_stride, CONVERT_TO_BYTEPTR(temp),
- MAX_SB_SIZE, kernel_x, 16, NULL, -1, width,
- intermediate_height, bd);
- aom_highbd_convolve8_vert(
- CONVERT_TO_BYTEPTR(temp + MAX_SB_SIZE * ((filter->taps >> 1) - 1)),
- MAX_SB_SIZE, comp_pred8, width, NULL, -1, kernel_y, 16, width, height,
- bd);
- }
-}
-
-void aom_highbd_comp_avg_upsampled_pred_c(
- MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
- const MV *const mv, uint8_t *comp_pred8, const uint8_t *pred8, int width,
- int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref8,
- int ref_stride, int bd, int subpel_search) {
- int i, j;
-
- const uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
- uint16_t *comp_pred = CONVERT_TO_SHORTPTR(comp_pred8);
- aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, mv, comp_pred8, width,
- height, subpel_x_q3, subpel_y_q3, ref8, ref_stride,
- bd, subpel_search);
- for (i = 0; i < height; ++i) {
- for (j = 0; j < width; ++j) {
- comp_pred[j] = ROUND_POWER_OF_TWO(pred[j] + comp_pred[j], 1);
- }
- comp_pred += width;
- pred += width;
- }
-}
-
-void aom_highbd_jnt_comp_avg_pred_c(uint8_t *comp_pred8, const uint8_t *pred8,
- int width, int height, const uint8_t *ref8,
- int ref_stride,
- const JNT_COMP_PARAMS *jcp_param) {
- int i, j;
- const int fwd_offset = jcp_param->fwd_offset;
- const int bck_offset = jcp_param->bck_offset;
- uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
- uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
- uint16_t *comp_pred = CONVERT_TO_SHORTPTR(comp_pred8);
-
- for (i = 0; i < height; ++i) {
- for (j = 0; j < width; ++j) {
- int tmp = pred[j] * bck_offset + ref[j] * fwd_offset;
- tmp = ROUND_POWER_OF_TWO(tmp, DIST_PRECISION_BITS);
- comp_pred[j] = (uint16_t)tmp;
- }
- comp_pred += width;
- pred += width;
- ref += ref_stride;
- }
-}
-
-void aom_highbd_jnt_comp_avg_upsampled_pred_c(
- MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
- const MV *const mv, uint8_t *comp_pred8, const uint8_t *pred8, int width,
- int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref8,
- int ref_stride, int bd, const JNT_COMP_PARAMS *jcp_param,
- int subpel_search) {
- int i, j;
- const int fwd_offset = jcp_param->fwd_offset;
- const int bck_offset = jcp_param->bck_offset;
- const uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
- uint16_t *comp_pred = CONVERT_TO_SHORTPTR(comp_pred8);
- aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, mv, comp_pred8, width,
- height, subpel_x_q3, subpel_y_q3, ref8, ref_stride,
- bd, subpel_search);
-
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++) {
- int tmp = pred[j] * bck_offset + comp_pred[j] * fwd_offset;
- tmp = ROUND_POWER_OF_TWO(tmp, DIST_PRECISION_BITS);
- comp_pred[j] = (uint16_t)tmp;
- }
- comp_pred += width;
- pred += width;
- }
-}
-
-void aom_comp_mask_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width,
- int height, const uint8_t *ref, int ref_stride,
- const uint8_t *mask, int mask_stride,
- int invert_mask) {
- int i, j;
- const uint8_t *src0 = invert_mask ? pred : ref;
- const uint8_t *src1 = invert_mask ? ref : pred;
- const int stride0 = invert_mask ? width : ref_stride;
- const int stride1 = invert_mask ? ref_stride : width;
- for (i = 0; i < height; ++i) {
- for (j = 0; j < width; ++j) {
- comp_pred[j] = AOM_BLEND_A64(mask[j], src0[j], src1[j]);
- }
- comp_pred += width;
- src0 += stride0;
- src1 += stride1;
- mask += mask_stride;
- }
-}
-
-void aom_comp_mask_upsampled_pred_c(MACROBLOCKD *xd, const AV1_COMMON *const cm,
- int mi_row, int mi_col, const MV *const mv,
- uint8_t *comp_pred, const uint8_t *pred,
- int width, int height, int subpel_x_q3,
- int subpel_y_q3, const uint8_t *ref,
- int ref_stride, const uint8_t *mask,
- int mask_stride, int invert_mask,
- int subpel_search) {
- if (subpel_x_q3 | subpel_y_q3) {
- aom_upsampled_pred_c(xd, cm, mi_row, mi_col, mv, comp_pred, width, height,
- subpel_x_q3, subpel_y_q3, ref, ref_stride,
- subpel_search);
- ref = comp_pred;
- ref_stride = width;
- }
- aom_comp_mask_pred_c(comp_pred, pred, width, height, ref, ref_stride, mask,
- mask_stride, invert_mask);
-}
-
-#define MASK_SUBPIX_VAR(W, H) \
- unsigned int aom_masked_sub_pixel_variance##W##x##H##_c( \
- const uint8_t *src, int src_stride, int xoffset, int yoffset, \
- const uint8_t *ref, int ref_stride, const uint8_t *second_pred, \
- const uint8_t *msk, int msk_stride, int invert_mask, \
- unsigned int *sse) { \
- uint16_t fdata3[(H + 1) * W]; \
- uint8_t temp2[H * W]; \
- DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \
- \
- aom_var_filter_block2d_bil_first_pass_c(src, fdata3, src_stride, 1, H + 1, \
- W, bilinear_filters_2t[xoffset]); \
- aom_var_filter_block2d_bil_second_pass_c(fdata3, temp2, W, W, H, W, \
- bilinear_filters_2t[yoffset]); \
- \
- aom_comp_mask_pred_c(temp3, second_pred, W, H, temp2, W, msk, msk_stride, \
- invert_mask); \
- return aom_variance##W##x##H##_c(temp3, W, ref, ref_stride, sse); \
- }
-
-MASK_SUBPIX_VAR(4, 4)
-MASK_SUBPIX_VAR(4, 8)
-MASK_SUBPIX_VAR(8, 4)
-MASK_SUBPIX_VAR(8, 8)
-MASK_SUBPIX_VAR(8, 16)
-MASK_SUBPIX_VAR(16, 8)
-MASK_SUBPIX_VAR(16, 16)
-MASK_SUBPIX_VAR(16, 32)
-MASK_SUBPIX_VAR(32, 16)
-MASK_SUBPIX_VAR(32, 32)
-MASK_SUBPIX_VAR(32, 64)
-MASK_SUBPIX_VAR(64, 32)
-MASK_SUBPIX_VAR(64, 64)
-MASK_SUBPIX_VAR(64, 128)
-MASK_SUBPIX_VAR(128, 64)
-MASK_SUBPIX_VAR(128, 128)
-MASK_SUBPIX_VAR(4, 16)
-MASK_SUBPIX_VAR(16, 4)
-MASK_SUBPIX_VAR(8, 32)
-MASK_SUBPIX_VAR(32, 8)
-MASK_SUBPIX_VAR(16, 64)
-MASK_SUBPIX_VAR(64, 16)
-
-void aom_highbd_comp_mask_pred_c(uint8_t *comp_pred8, const uint8_t *pred8,
- int width, int height, const uint8_t *ref8,
- int ref_stride, const uint8_t *mask,
- int mask_stride, int invert_mask) {
- int i, j;
- uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
- uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
- uint16_t *comp_pred = CONVERT_TO_SHORTPTR(comp_pred8);
- for (i = 0; i < height; ++i) {
- for (j = 0; j < width; ++j) {
- if (!invert_mask)
- comp_pred[j] = AOM_BLEND_A64(mask[j], ref[j], pred[j]);
- else
- comp_pred[j] = AOM_BLEND_A64(mask[j], pred[j], ref[j]);
- }
- comp_pred += width;
- pred += width;
- ref += ref_stride;
- mask += mask_stride;
- }
-}
-
-void aom_highbd_comp_mask_upsampled_pred(
- MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
- const MV *const mv, uint8_t *comp_pred8, const uint8_t *pred8, int width,
- int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref8,
- int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask,
- int bd, int subpel_search) {
- aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, mv, comp_pred8, width,
- height, subpel_x_q3, subpel_y_q3, ref8, ref_stride,
- bd, subpel_search);
- aom_highbd_comp_mask_pred(comp_pred8, pred8, width, height, comp_pred8, width,
- mask, mask_stride, invert_mask);
-}
-
-#define HIGHBD_MASK_SUBPIX_VAR(W, H) \
- unsigned int aom_highbd_8_masked_sub_pixel_variance##W##x##H##_c( \
- const uint8_t *src, int src_stride, int xoffset, int yoffset, \
- const uint8_t *ref, int ref_stride, const uint8_t *second_pred, \
- const uint8_t *msk, int msk_stride, int invert_mask, \
- unsigned int *sse) { \
- uint16_t fdata3[(H + 1) * W]; \
- uint16_t temp2[H * W]; \
- DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
- \
- aom_highbd_var_filter_block2d_bil_first_pass( \
- src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
- aom_highbd_var_filter_block2d_bil_second_pass( \
- fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
- \
- aom_highbd_comp_mask_pred_c(CONVERT_TO_BYTEPTR(temp3), second_pred, W, H, \
- CONVERT_TO_BYTEPTR(temp2), W, msk, msk_stride, \
- invert_mask); \
- \
- return aom_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
- ref, ref_stride, sse); \
- } \
- \
- unsigned int aom_highbd_10_masked_sub_pixel_variance##W##x##H##_c( \
- const uint8_t *src, int src_stride, int xoffset, int yoffset, \
- const uint8_t *ref, int ref_stride, const uint8_t *second_pred, \
- const uint8_t *msk, int msk_stride, int invert_mask, \
- unsigned int *sse) { \
- uint16_t fdata3[(H + 1) * W]; \
- uint16_t temp2[H * W]; \
- DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
- \
- aom_highbd_var_filter_block2d_bil_first_pass( \
- src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
- aom_highbd_var_filter_block2d_bil_second_pass( \
- fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
- \
- aom_highbd_comp_mask_pred_c(CONVERT_TO_BYTEPTR(temp3), second_pred, W, H, \
- CONVERT_TO_BYTEPTR(temp2), W, msk, msk_stride, \
- invert_mask); \
- \
- return aom_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
- ref, ref_stride, sse); \
- } \
- \
- unsigned int aom_highbd_12_masked_sub_pixel_variance##W##x##H##_c( \
- const uint8_t *src, int src_stride, int xoffset, int yoffset, \
- const uint8_t *ref, int ref_stride, const uint8_t *second_pred, \
- const uint8_t *msk, int msk_stride, int invert_mask, \
- unsigned int *sse) { \
- uint16_t fdata3[(H + 1) * W]; \
- uint16_t temp2[H * W]; \
- DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
- \
- aom_highbd_var_filter_block2d_bil_first_pass( \
- src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
- aom_highbd_var_filter_block2d_bil_second_pass( \
- fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
- \
- aom_highbd_comp_mask_pred_c(CONVERT_TO_BYTEPTR(temp3), second_pred, W, H, \
- CONVERT_TO_BYTEPTR(temp2), W, msk, msk_stride, \
- invert_mask); \
- \
- return aom_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
- ref, ref_stride, sse); \
- }
-
-HIGHBD_MASK_SUBPIX_VAR(4, 4)
-HIGHBD_MASK_SUBPIX_VAR(4, 8)
-HIGHBD_MASK_SUBPIX_VAR(8, 4)
-HIGHBD_MASK_SUBPIX_VAR(8, 8)
-HIGHBD_MASK_SUBPIX_VAR(8, 16)
-HIGHBD_MASK_SUBPIX_VAR(16, 8)
-HIGHBD_MASK_SUBPIX_VAR(16, 16)
-HIGHBD_MASK_SUBPIX_VAR(16, 32)
-HIGHBD_MASK_SUBPIX_VAR(32, 16)
-HIGHBD_MASK_SUBPIX_VAR(32, 32)
-HIGHBD_MASK_SUBPIX_VAR(32, 64)
-HIGHBD_MASK_SUBPIX_VAR(64, 32)
-HIGHBD_MASK_SUBPIX_VAR(64, 64)
-HIGHBD_MASK_SUBPIX_VAR(64, 128)
-HIGHBD_MASK_SUBPIX_VAR(128, 64)
-HIGHBD_MASK_SUBPIX_VAR(128, 128)
-HIGHBD_MASK_SUBPIX_VAR(4, 16)
-HIGHBD_MASK_SUBPIX_VAR(16, 4)
-HIGHBD_MASK_SUBPIX_VAR(8, 32)
-HIGHBD_MASK_SUBPIX_VAR(32, 8)
-HIGHBD_MASK_SUBPIX_VAR(16, 64)
-HIGHBD_MASK_SUBPIX_VAR(64, 16)
-
-static INLINE void obmc_variance(const uint8_t *pre, int pre_stride,
- const int32_t *wsrc, const int32_t *mask,
- int w, int h, unsigned int *sse, int *sum) {
- int i, j;
-
- *sse = 0;
- *sum = 0;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- int diff = ROUND_POWER_OF_TWO_SIGNED(wsrc[j] - pre[j] * mask[j], 12);
- *sum += diff;
- *sse += diff * diff;
- }
-
- pre += pre_stride;
- wsrc += w;
- mask += w;
- }
-}
-
-#define OBMC_VAR(W, H) \
- unsigned int aom_obmc_variance##W##x##H##_c( \
- const uint8_t *pre, int pre_stride, const int32_t *wsrc, \
- const int32_t *mask, unsigned int *sse) { \
- int sum; \
- obmc_variance(pre, pre_stride, wsrc, mask, W, H, sse, &sum); \
- return *sse - (unsigned int)(((int64_t)sum * sum) / (W * H)); \
- }
-
-#define OBMC_SUBPIX_VAR(W, H) \
- unsigned int aom_obmc_sub_pixel_variance##W##x##H##_c( \
- const uint8_t *pre, int pre_stride, int xoffset, int yoffset, \
- const int32_t *wsrc, const int32_t *mask, unsigned int *sse) { \
- uint16_t fdata3[(H + 1) * W]; \
- uint8_t temp2[H * W]; \
- \
- aom_var_filter_block2d_bil_first_pass_c(pre, fdata3, pre_stride, 1, H + 1, \
- W, bilinear_filters_2t[xoffset]); \
- aom_var_filter_block2d_bil_second_pass_c(fdata3, temp2, W, W, H, W, \
- bilinear_filters_2t[yoffset]); \
- \
- return aom_obmc_variance##W##x##H##_c(temp2, W, wsrc, mask, sse); \
- }
-
-OBMC_VAR(4, 4)
-OBMC_SUBPIX_VAR(4, 4)
-
-OBMC_VAR(4, 8)
-OBMC_SUBPIX_VAR(4, 8)
-
-OBMC_VAR(8, 4)
-OBMC_SUBPIX_VAR(8, 4)
-
-OBMC_VAR(8, 8)
-OBMC_SUBPIX_VAR(8, 8)
-
-OBMC_VAR(8, 16)
-OBMC_SUBPIX_VAR(8, 16)
-
-OBMC_VAR(16, 8)
-OBMC_SUBPIX_VAR(16, 8)
-
-OBMC_VAR(16, 16)
-OBMC_SUBPIX_VAR(16, 16)
-
-OBMC_VAR(16, 32)
-OBMC_SUBPIX_VAR(16, 32)
-
-OBMC_VAR(32, 16)
-OBMC_SUBPIX_VAR(32, 16)
-
-OBMC_VAR(32, 32)
-OBMC_SUBPIX_VAR(32, 32)
-
-OBMC_VAR(32, 64)
-OBMC_SUBPIX_VAR(32, 64)
-
-OBMC_VAR(64, 32)
-OBMC_SUBPIX_VAR(64, 32)
-
-OBMC_VAR(64, 64)
-OBMC_SUBPIX_VAR(64, 64)
-
-OBMC_VAR(64, 128)
-OBMC_SUBPIX_VAR(64, 128)
-
-OBMC_VAR(128, 64)
-OBMC_SUBPIX_VAR(128, 64)
-
-OBMC_VAR(128, 128)
-OBMC_SUBPIX_VAR(128, 128)
-
-OBMC_VAR(4, 16)
-OBMC_SUBPIX_VAR(4, 16)
-OBMC_VAR(16, 4)
-OBMC_SUBPIX_VAR(16, 4)
-OBMC_VAR(8, 32)
-OBMC_SUBPIX_VAR(8, 32)
-OBMC_VAR(32, 8)
-OBMC_SUBPIX_VAR(32, 8)
-OBMC_VAR(16, 64)
-OBMC_SUBPIX_VAR(16, 64)
-OBMC_VAR(64, 16)
-OBMC_SUBPIX_VAR(64, 16)
-
-static INLINE void highbd_obmc_variance64(const uint8_t *pre8, int pre_stride,
- const int32_t *wsrc,
- const int32_t *mask, int w, int h,
- uint64_t *sse, int64_t *sum) {
- int i, j;
- uint16_t *pre = CONVERT_TO_SHORTPTR(pre8);
-
- *sse = 0;
- *sum = 0;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- int diff = ROUND_POWER_OF_TWO_SIGNED(wsrc[j] - pre[j] * mask[j], 12);
- *sum += diff;
- *sse += diff * diff;
- }
-
- pre += pre_stride;
- wsrc += w;
- mask += w;
- }
-}
-
-static INLINE void highbd_obmc_variance(const uint8_t *pre8, int pre_stride,
- const int32_t *wsrc,
- const int32_t *mask, int w, int h,
- unsigned int *sse, int *sum) {
- int64_t sum64;
- uint64_t sse64;
- highbd_obmc_variance64(pre8, pre_stride, wsrc, mask, w, h, &sse64, &sum64);
- *sum = (int)sum64;
- *sse = (unsigned int)sse64;
-}
-
-static INLINE void highbd_10_obmc_variance(const uint8_t *pre8, int pre_stride,
- const int32_t *wsrc,
- const int32_t *mask, int w, int h,
- unsigned int *sse, int *sum) {
- int64_t sum64;
- uint64_t sse64;
- highbd_obmc_variance64(pre8, pre_stride, wsrc, mask, w, h, &sse64, &sum64);
- *sum = (int)ROUND_POWER_OF_TWO(sum64, 2);
- *sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 4);
-}
-
-static INLINE void highbd_12_obmc_variance(const uint8_t *pre8, int pre_stride,
- const int32_t *wsrc,
- const int32_t *mask, int w, int h,
- unsigned int *sse, int *sum) {
- int64_t sum64;
- uint64_t sse64;
- highbd_obmc_variance64(pre8, pre_stride, wsrc, mask, w, h, &sse64, &sum64);
- *sum = (int)ROUND_POWER_OF_TWO(sum64, 4);
- *sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 8);
-}
-
-#define HIGHBD_OBMC_VAR(W, H) \
- unsigned int aom_highbd_obmc_variance##W##x##H##_c( \
- const uint8_t *pre, int pre_stride, const int32_t *wsrc, \
- const int32_t *mask, unsigned int *sse) { \
- int sum; \
- highbd_obmc_variance(pre, pre_stride, wsrc, mask, W, H, sse, &sum); \
- return *sse - (unsigned int)(((int64_t)sum * sum) / (W * H)); \
- } \
- \
- unsigned int aom_highbd_10_obmc_variance##W##x##H##_c( \
- const uint8_t *pre, int pre_stride, const int32_t *wsrc, \
- const int32_t *mask, unsigned int *sse) { \
- int sum; \
- int64_t var; \
- highbd_10_obmc_variance(pre, pre_stride, wsrc, mask, W, H, sse, &sum); \
- var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \
- return (var >= 0) ? (uint32_t)var : 0; \
- } \
- \
- unsigned int aom_highbd_12_obmc_variance##W##x##H##_c( \
- const uint8_t *pre, int pre_stride, const int32_t *wsrc, \
- const int32_t *mask, unsigned int *sse) { \
- int sum; \
- int64_t var; \
- highbd_12_obmc_variance(pre, pre_stride, wsrc, mask, W, H, sse, &sum); \
- var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \
- return (var >= 0) ? (uint32_t)var : 0; \
- }
-
-#define HIGHBD_OBMC_SUBPIX_VAR(W, H) \
- unsigned int aom_highbd_obmc_sub_pixel_variance##W##x##H##_c( \
- const uint8_t *pre, int pre_stride, int xoffset, int yoffset, \
- const int32_t *wsrc, const int32_t *mask, unsigned int *sse) { \
- uint16_t fdata3[(H + 1) * W]; \
- uint16_t temp2[H * W]; \
- \
- aom_highbd_var_filter_block2d_bil_first_pass( \
- pre, fdata3, pre_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
- aom_highbd_var_filter_block2d_bil_second_pass( \
- fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
- \
- return aom_highbd_obmc_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
- wsrc, mask, sse); \
- } \
- \
- unsigned int aom_highbd_10_obmc_sub_pixel_variance##W##x##H##_c( \
- const uint8_t *pre, int pre_stride, int xoffset, int yoffset, \
- const int32_t *wsrc, const int32_t *mask, unsigned int *sse) { \
- uint16_t fdata3[(H + 1) * W]; \
- uint16_t temp2[H * W]; \
- \
- aom_highbd_var_filter_block2d_bil_first_pass( \
- pre, fdata3, pre_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
- aom_highbd_var_filter_block2d_bil_second_pass( \
- fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
- \
- return aom_highbd_10_obmc_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
- W, wsrc, mask, sse); \
- } \
- \
- unsigned int aom_highbd_12_obmc_sub_pixel_variance##W##x##H##_c( \
- const uint8_t *pre, int pre_stride, int xoffset, int yoffset, \
- const int32_t *wsrc, const int32_t *mask, unsigned int *sse) { \
- uint16_t fdata3[(H + 1) * W]; \
- uint16_t temp2[H * W]; \
- \
- aom_highbd_var_filter_block2d_bil_first_pass( \
- pre, fdata3, pre_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
- aom_highbd_var_filter_block2d_bil_second_pass( \
- fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
- \
- return aom_highbd_12_obmc_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
- W, wsrc, mask, sse); \
- }
-
-HIGHBD_OBMC_VAR(4, 4)
-HIGHBD_OBMC_SUBPIX_VAR(4, 4)
-
-HIGHBD_OBMC_VAR(4, 8)
-HIGHBD_OBMC_SUBPIX_VAR(4, 8)
-
-HIGHBD_OBMC_VAR(8, 4)
-HIGHBD_OBMC_SUBPIX_VAR(8, 4)
-
-HIGHBD_OBMC_VAR(8, 8)
-HIGHBD_OBMC_SUBPIX_VAR(8, 8)
-
-HIGHBD_OBMC_VAR(8, 16)
-HIGHBD_OBMC_SUBPIX_VAR(8, 16)
-
-HIGHBD_OBMC_VAR(16, 8)
-HIGHBD_OBMC_SUBPIX_VAR(16, 8)
-
-HIGHBD_OBMC_VAR(16, 16)
-HIGHBD_OBMC_SUBPIX_VAR(16, 16)
-
-HIGHBD_OBMC_VAR(16, 32)
-HIGHBD_OBMC_SUBPIX_VAR(16, 32)
-
-HIGHBD_OBMC_VAR(32, 16)
-HIGHBD_OBMC_SUBPIX_VAR(32, 16)
-
-HIGHBD_OBMC_VAR(32, 32)
-HIGHBD_OBMC_SUBPIX_VAR(32, 32)
-
-HIGHBD_OBMC_VAR(32, 64)
-HIGHBD_OBMC_SUBPIX_VAR(32, 64)
-
-HIGHBD_OBMC_VAR(64, 32)
-HIGHBD_OBMC_SUBPIX_VAR(64, 32)
-
-HIGHBD_OBMC_VAR(64, 64)
-HIGHBD_OBMC_SUBPIX_VAR(64, 64)
-
-HIGHBD_OBMC_VAR(64, 128)
-HIGHBD_OBMC_SUBPIX_VAR(64, 128)
-
-HIGHBD_OBMC_VAR(128, 64)
-HIGHBD_OBMC_SUBPIX_VAR(128, 64)
-
-HIGHBD_OBMC_VAR(128, 128)
-HIGHBD_OBMC_SUBPIX_VAR(128, 128)
-
-HIGHBD_OBMC_VAR(4, 16)
-HIGHBD_OBMC_SUBPIX_VAR(4, 16)
-HIGHBD_OBMC_VAR(16, 4)
-HIGHBD_OBMC_SUBPIX_VAR(16, 4)
-HIGHBD_OBMC_VAR(8, 32)
-HIGHBD_OBMC_SUBPIX_VAR(8, 32)
-HIGHBD_OBMC_VAR(32, 8)
-HIGHBD_OBMC_SUBPIX_VAR(32, 8)
-HIGHBD_OBMC_VAR(16, 64)
-HIGHBD_OBMC_SUBPIX_VAR(16, 64)
-HIGHBD_OBMC_VAR(64, 16)
-HIGHBD_OBMC_SUBPIX_VAR(64, 16)
diff --git a/third_party/aom/aom_dsp/variance.h b/third_party/aom/aom_dsp/variance.h
deleted file mode 100644
index 362da29d3..000000000
--- a/third_party/aom/aom_dsp/variance.h
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_VARIANCE_H_
-#define AOM_AOM_DSP_VARIANCE_H_
-
-#include "config/aom_config.h"
-
-#include "aom/aom_integer.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define FILTER_BITS 7
-#define FILTER_WEIGHT 128
-
-typedef unsigned int (*aom_sad_fn_t)(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride);
-
-typedef unsigned int (*aom_sad_avg_fn_t)(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride,
- const uint8_t *second_pred);
-
-typedef void (*aom_copy32xn_fn_t)(const uint8_t *a, int a_stride, uint8_t *b,
- int b_stride, int n);
-
-typedef void (*aom_sad_multi_d_fn_t)(const uint8_t *a, int a_stride,
- const uint8_t *const b_array[],
- int b_stride, unsigned int *sad_array);
-
-typedef unsigned int (*aom_variance_fn_t)(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride,
- unsigned int *sse);
-
-typedef unsigned int (*aom_subpixvariance_fn_t)(const uint8_t *a, int a_stride,
- int xoffset, int yoffset,
- const uint8_t *b, int b_stride,
- unsigned int *sse);
-
-typedef unsigned int (*aom_subp_avg_variance_fn_t)(
- const uint8_t *a, int a_stride, int xoffset, int yoffset, const uint8_t *b,
- int b_stride, unsigned int *sse, const uint8_t *second_pred);
-
-typedef unsigned int (*aom_jnt_sad_avg_fn_t)(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride,
- const uint8_t *second_pred,
- const JNT_COMP_PARAMS *jcp_param);
-
-typedef unsigned int (*aom_jnt_subp_avg_variance_fn_t)(
- const uint8_t *a, int a_stride, int xoffset, int yoffset, const uint8_t *b,
- int b_stride, unsigned int *sse, const uint8_t *second_pred,
- const JNT_COMP_PARAMS *jcp_param);
-
-typedef unsigned int (*aom_masked_sad_fn_t)(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- const uint8_t *second_pred,
- const uint8_t *msk, int msk_stride,
- int invert_mask);
-typedef unsigned int (*aom_masked_subpixvariance_fn_t)(
- const uint8_t *src, int src_stride, int xoffset, int yoffset,
- const uint8_t *ref, int ref_stride, const uint8_t *second_pred,
- const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse);
-
-void aom_highbd_comp_mask_upsampled_pred(
- MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
- const MV *const mv, uint8_t *comp_pred8, const uint8_t *pred8, int width,
- int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref8,
- int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask,
- int bd, int subpel_search);
-
-typedef unsigned int (*aom_obmc_sad_fn_t)(const uint8_t *pred, int pred_stride,
- const int32_t *wsrc,
- const int32_t *msk);
-typedef unsigned int (*aom_obmc_variance_fn_t)(const uint8_t *pred,
- int pred_stride,
- const int32_t *wsrc,
- const int32_t *msk,
- unsigned int *sse);
-typedef unsigned int (*aom_obmc_subpixvariance_fn_t)(
- const uint8_t *pred, int pred_stride, int xoffset, int yoffset,
- const int32_t *wsrc, const int32_t *msk, unsigned int *sse);
-
-typedef struct aom_variance_vtable {
- aom_sad_fn_t sdf;
- aom_sad_avg_fn_t sdaf;
- aom_variance_fn_t vf;
- aom_subpixvariance_fn_t svf;
- aom_subp_avg_variance_fn_t svaf;
- aom_sad_multi_d_fn_t sdx4df;
- aom_masked_sad_fn_t msdf;
- aom_masked_subpixvariance_fn_t msvf;
- aom_obmc_sad_fn_t osdf;
- aom_obmc_variance_fn_t ovf;
- aom_obmc_subpixvariance_fn_t osvf;
- aom_jnt_sad_avg_fn_t jsdaf;
- aom_jnt_subp_avg_variance_fn_t jsvaf;
-} aom_variance_fn_ptr_t;
-
-void aom_highbd_var_filter_block2d_bil_first_pass(
- const uint8_t *src_ptr8, uint16_t *output_ptr,
- unsigned int src_pixels_per_line, int pixel_step,
- unsigned int output_height, unsigned int output_width,
- const uint8_t *filter);
-
-void aom_highbd_var_filter_block2d_bil_second_pass(
- const uint16_t *src_ptr, uint16_t *output_ptr,
- unsigned int src_pixels_per_line, unsigned int pixel_step,
- unsigned int output_height, unsigned int output_width,
- const uint8_t *filter);
-
-uint32_t aom_sse_odd_size(const uint8_t *a, int a_stride, const uint8_t *b,
- int b_stride, int w, int h);
-
-uint64_t aom_highbd_sse_odd_size(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride, int w, int h);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AOM_DSP_VARIANCE_H_
diff --git a/third_party/aom/aom_dsp/x86/aom_asm_stubs.c b/third_party/aom/aom_dsp/x86/aom_asm_stubs.c
deleted file mode 100644
index 5f5bf5f14..000000000
--- a/third_party/aom/aom_dsp/x86/aom_asm_stubs.c
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/x86/convolve.h"
-
-#if HAVE_SSE2
-filter8_1dfunction aom_filter_block1d16_v8_sse2;
-filter8_1dfunction aom_filter_block1d16_h8_sse2;
-filter8_1dfunction aom_filter_block1d8_v8_sse2;
-filter8_1dfunction aom_filter_block1d8_h8_sse2;
-filter8_1dfunction aom_filter_block1d4_v8_sse2;
-filter8_1dfunction aom_filter_block1d4_h8_sse2;
-
-#define aom_filter_block1d16_h4_sse2 aom_filter_block1d16_h8_sse2
-#define aom_filter_block1d16_v4_sse2 aom_filter_block1d16_v8_sse2
-#define aom_filter_block1d8_h4_sse2 aom_filter_block1d8_h8_sse2
-#define aom_filter_block1d8_v4_sse2 aom_filter_block1d8_v8_sse2
-#define aom_filter_block1d4_h4_sse2 aom_filter_block1d4_h8_sse2
-#define aom_filter_block1d4_v4_sse2 aom_filter_block1d4_v8_sse2
-
-filter8_1dfunction aom_filter_block1d16_v2_sse2;
-filter8_1dfunction aom_filter_block1d16_h2_sse2;
-filter8_1dfunction aom_filter_block1d8_v2_sse2;
-filter8_1dfunction aom_filter_block1d8_h2_sse2;
-filter8_1dfunction aom_filter_block1d4_v2_sse2;
-filter8_1dfunction aom_filter_block1d4_h2_sse2;
-
-// void aom_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride,
-// uint8_t *dst, ptrdiff_t dst_stride,
-// const int16_t *filter_x, int x_step_q4,
-// const int16_t *filter_y, int y_step_q4,
-// int w, int h);
-// void aom_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride,
-// uint8_t *dst, ptrdiff_t dst_stride,
-// const int16_t *filter_x, int x_step_q4,
-// const int16_t *filter_y, int y_step_q4,
-// int w, int h);
-FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , sse2);
-FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , sse2);
-
-#if ARCH_X86_64
-highbd_filter8_1dfunction aom_highbd_filter_block1d16_v8_sse2;
-highbd_filter8_1dfunction aom_highbd_filter_block1d16_h8_sse2;
-highbd_filter8_1dfunction aom_highbd_filter_block1d8_v8_sse2;
-highbd_filter8_1dfunction aom_highbd_filter_block1d8_h8_sse2;
-highbd_filter8_1dfunction aom_highbd_filter_block1d4_v8_sse2;
-highbd_filter8_1dfunction aom_highbd_filter_block1d4_h8_sse2;
-
-highbd_filter8_1dfunction aom_highbd_filter_block1d16_v2_sse2;
-highbd_filter8_1dfunction aom_highbd_filter_block1d16_h2_sse2;
-highbd_filter8_1dfunction aom_highbd_filter_block1d8_v2_sse2;
-highbd_filter8_1dfunction aom_highbd_filter_block1d8_h2_sse2;
-highbd_filter8_1dfunction aom_highbd_filter_block1d4_v2_sse2;
-highbd_filter8_1dfunction aom_highbd_filter_block1d4_h2_sse2;
-
-// void aom_highbd_convolve8_horiz_sse2(const uint8_t *src,
-// ptrdiff_t src_stride,
-// uint8_t *dst,
-// ptrdiff_t dst_stride,
-// const int16_t *filter_x,
-// int x_step_q4,
-// const int16_t *filter_y,
-// int y_step_q4,
-// int w, int h, int bd);
-// void aom_highbd_convolve8_vert_sse2(const uint8_t *src,
-// ptrdiff_t src_stride,
-// uint8_t *dst,
-// ptrdiff_t dst_stride,
-// const int16_t *filter_x,
-// int x_step_q4,
-// const int16_t *filter_y,
-// int y_step_q4,
-// int w, int h, int bd);
-HIGH_FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , sse2);
-HIGH_FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , sse2);
-
-#endif // ARCH_X86_64
-#endif // HAVE_SSE2
diff --git a/third_party/aom/aom_dsp/x86/aom_convolve_copy_sse2.asm b/third_party/aom/aom_dsp/x86/aom_convolve_copy_sse2.asm
deleted file mode 100644
index 7283c32b8..000000000
--- a/third_party/aom/aom_dsp/x86/aom_convolve_copy_sse2.asm
+++ /dev/null
@@ -1,297 +0,0 @@
-;
-; Copyright (c) 2016, Alliance for Open Media. All rights reserved
-;
-; This source code is subject to the terms of the BSD 2 Clause License and
-; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-; was not distributed with this source code in the LICENSE file, you can
-; obtain it at www.aomedia.org/license/software. If the Alliance for Open
-; Media Patent License 1.0 was not distributed with this source code in the
-; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-;
-
-;
-
-%include "third_party/x86inc/x86inc.asm"
-
-SECTION .text
-
-%macro convolve_fn 1-2
-%ifidn %1, avg
-%define AUX_XMM_REGS 4
-%else
-%define AUX_XMM_REGS 0
-%endif
-%ifidn %2, highbd
-%define pavg pavgw
-cglobal %2_convolve_%1, 4, 7, 4+AUX_XMM_REGS, src, src_stride, \
- dst, dst_stride, \
- fx, fxs, fy, fys, w, h, bd
-%else
-%define pavg pavgb
-cglobal convolve_%1, 4, 7, 4+AUX_XMM_REGS, src, src_stride, \
- dst, dst_stride, \
- fx, fxs, fy, fys, w, h
-%endif
- mov r4d, dword wm
-%ifidn %2, highbd
- shl r4d, 1
- shl srcq, 1
- shl src_strideq, 1
- shl dstq, 1
- shl dst_strideq, 1
-%else
- cmp r4d, 4
- je .w4
-%endif
- cmp r4d, 8
- je .w8
- cmp r4d, 16
- je .w16
- cmp r4d, 32
- je .w32
-
- cmp r4d, 64
- je .w64
-%ifidn %2, highbd
- cmp r4d, 128
- je .w128
-
-.w256:
- mov r4d, dword hm
-.loop256:
- movu m0, [srcq]
- movu m1, [srcq+16]
- movu m2, [srcq+32]
- movu m3, [srcq+48]
-%ifidn %1, avg
- pavg m0, [dstq]
- pavg m1, [dstq+16]
- pavg m2, [dstq+32]
- pavg m3, [dstq+48]
-%endif
- mova [dstq ], m0
- mova [dstq+16], m1
- mova [dstq+32], m2
- mova [dstq+48], m3
- movu m0, [srcq+64]
- movu m1, [srcq+80]
- movu m2, [srcq+96]
- movu m3, [srcq+112]
-%ifidn %1, avg
- pavg m0, [dstq+64]
- pavg m1, [dstq+80]
- pavg m2, [dstq+96]
- pavg m3, [dstq+112]
-%endif
- mova [dstq+64], m0
- mova [dstq+80], m1
- mova [dstq+96], m2
- mova [dstq+112], m3
- movu m0, [srcq+128]
- movu m1, [srcq+128+16]
- movu m2, [srcq+128+32]
- movu m3, [srcq+128+48]
-%ifidn %1, avg
- pavg m0, [dstq+128]
- pavg m1, [dstq+128+16]
- pavg m2, [dstq+128+32]
- pavg m3, [dstq+128+48]
-%endif
- mova [dstq+128 ], m0
- mova [dstq+128+16], m1
- mova [dstq+128+32], m2
- mova [dstq+128+48], m3
- movu m0, [srcq+128+64]
- movu m1, [srcq+128+80]
- movu m2, [srcq+128+96]
- movu m3, [srcq+128+112]
- add srcq, src_strideq
-%ifidn %1, avg
- pavg m0, [dstq+128+64]
- pavg m1, [dstq+128+80]
- pavg m2, [dstq+128+96]
- pavg m3, [dstq+128+112]
-%endif
- mova [dstq+128+64], m0
- mova [dstq+128+80], m1
- mova [dstq+128+96], m2
- mova [dstq+128+112], m3
- add dstq, dst_strideq
- sub r4d, 1
- jnz .loop256
- RET
-%endif
-
-.w128:
- mov r4d, dword hm
-.loop128:
- movu m0, [srcq]
- movu m1, [srcq+16]
- movu m2, [srcq+32]
- movu m3, [srcq+48]
-%ifidn %1, avg
- pavg m0, [dstq]
- pavg m1, [dstq+16]
- pavg m2, [dstq+32]
- pavg m3, [dstq+48]
-%endif
- mova [dstq ], m0
- mova [dstq+16], m1
- mova [dstq+32], m2
- mova [dstq+48], m3
- movu m0, [srcq+64]
- movu m1, [srcq+80]
- movu m2, [srcq+96]
- movu m3, [srcq+112]
- add srcq, src_strideq
-%ifidn %1, avg
- pavg m0, [dstq+64]
- pavg m1, [dstq+80]
- pavg m2, [dstq+96]
- pavg m3, [dstq+112]
-%endif
- mova [dstq+64], m0
- mova [dstq+80], m1
- mova [dstq+96], m2
- mova [dstq+112], m3
- add dstq, dst_strideq
- sub r4d, 1
- jnz .loop128
- RET
-
-.w64:
- mov r4d, dword hm
-.loop64:
- movu m0, [srcq]
- movu m1, [srcq+16]
- movu m2, [srcq+32]
- movu m3, [srcq+48]
- add srcq, src_strideq
-%ifidn %1, avg
- pavg m0, [dstq]
- pavg m1, [dstq+16]
- pavg m2, [dstq+32]
- pavg m3, [dstq+48]
-%endif
- mova [dstq ], m0
- mova [dstq+16], m1
- mova [dstq+32], m2
- mova [dstq+48], m3
- add dstq, dst_strideq
- sub r4d, 1
- jnz .loop64
- RET
-
-.w32:
- mov r4d, dword hm
-.loop32:
- movu m0, [srcq]
- movu m1, [srcq+16]
- movu m2, [srcq+src_strideq]
- movu m3, [srcq+src_strideq+16]
- lea srcq, [srcq+src_strideq*2]
-%ifidn %1, avg
- pavg m0, [dstq]
- pavg m1, [dstq +16]
- pavg m2, [dstq+dst_strideq]
- pavg m3, [dstq+dst_strideq+16]
-%endif
- mova [dstq ], m0
- mova [dstq +16], m1
- mova [dstq+dst_strideq ], m2
- mova [dstq+dst_strideq+16], m3
- lea dstq, [dstq+dst_strideq*2]
- sub r4d, 2
- jnz .loop32
- RET
-
-.w16:
- mov r4d, dword hm
- lea r5q, [src_strideq*3]
- lea r6q, [dst_strideq*3]
-.loop16:
- movu m0, [srcq]
- movu m1, [srcq+src_strideq]
- movu m2, [srcq+src_strideq*2]
- movu m3, [srcq+r5q]
- lea srcq, [srcq+src_strideq*4]
-%ifidn %1, avg
- pavg m0, [dstq]
- pavg m1, [dstq+dst_strideq]
- pavg m2, [dstq+dst_strideq*2]
- pavg m3, [dstq+r6q]
-%endif
- mova [dstq ], m0
- mova [dstq+dst_strideq ], m1
- mova [dstq+dst_strideq*2], m2
- mova [dstq+r6q ], m3
- lea dstq, [dstq+dst_strideq*4]
- sub r4d, 4
- jnz .loop16
- RET
-
-.w8:
- mov r4d, dword hm
- lea r5q, [src_strideq*3]
- lea r6q, [dst_strideq*3]
-.loop8:
- movh m0, [srcq]
- movh m1, [srcq+src_strideq]
- movh m2, [srcq+src_strideq*2]
- movh m3, [srcq+r5q]
- lea srcq, [srcq+src_strideq*4]
-%ifidn %1, avg
- movh m4, [dstq]
- movh m5, [dstq+dst_strideq]
- movh m6, [dstq+dst_strideq*2]
- movh m7, [dstq+r6q]
- pavg m0, m4
- pavg m1, m5
- pavg m2, m6
- pavg m3, m7
-%endif
- movh [dstq ], m0
- movh [dstq+dst_strideq ], m1
- movh [dstq+dst_strideq*2], m2
- movh [dstq+r6q ], m3
- lea dstq, [dstq+dst_strideq*4]
- sub r4d, 4
- jnz .loop8
- RET
-
-%ifnidn %2, highbd
-.w4:
- mov r4d, dword hm
- lea r5q, [src_strideq*3]
- lea r6q, [dst_strideq*3]
-.loop4:
- movd m0, [srcq]
- movd m1, [srcq+src_strideq]
- movd m2, [srcq+src_strideq*2]
- movd m3, [srcq+r5q]
- lea srcq, [srcq+src_strideq*4]
-%ifidn %1, avg
- movd m4, [dstq]
- movd m5, [dstq+dst_strideq]
- movd m6, [dstq+dst_strideq*2]
- movd m7, [dstq+r6q]
- pavg m0, m4
- pavg m1, m5
- pavg m2, m6
- pavg m3, m7
-%endif
- movd [dstq ], m0
- movd [dstq+dst_strideq ], m1
- movd [dstq+dst_strideq*2], m2
- movd [dstq+r6q ], m3
- lea dstq, [dstq+dst_strideq*4]
- sub r4d, 4
- jnz .loop4
- RET
-%endif
-%endmacro
-
-INIT_XMM sse2
-convolve_fn copy
-convolve_fn avg
-convolve_fn copy, highbd
diff --git a/third_party/aom/aom_dsp/x86/aom_high_subpixel_8t_sse2.asm b/third_party/aom/aom_dsp/x86/aom_high_subpixel_8t_sse2.asm
deleted file mode 100644
index b6f040791..000000000
--- a/third_party/aom/aom_dsp/x86/aom_high_subpixel_8t_sse2.asm
+++ /dev/null
@@ -1,613 +0,0 @@
-;
-; Copyright (c) 2016, Alliance for Open Media. All rights reserved
-;
-; This source code is subject to the terms of the BSD 2 Clause License and
-; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-; was not distributed with this source code in the LICENSE file, you can
-; obtain it at www.aomedia.org/license/software. If the Alliance for Open
-; Media Patent License 1.0 was not distributed with this source code in the
-; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-;
-
-;
-
-
-%include "aom_ports/x86_abi_support.asm"
-
-;Note: tap3 and tap4 have to be applied and added after other taps to avoid
-;overflow.
-
-%macro HIGH_GET_FILTERS_4 0
- mov rdx, arg(5) ;filter ptr
- mov rcx, 0x00000040
-
- movdqa xmm7, [rdx] ;load filters
- pshuflw xmm0, xmm7, 0b ;k0
- pshuflw xmm1, xmm7, 01010101b ;k1
- pshuflw xmm2, xmm7, 10101010b ;k2
- pshuflw xmm3, xmm7, 11111111b ;k3
- psrldq xmm7, 8
- pshuflw xmm4, xmm7, 0b ;k4
- pshuflw xmm5, xmm7, 01010101b ;k5
- pshuflw xmm6, xmm7, 10101010b ;k6
- pshuflw xmm7, xmm7, 11111111b ;k7
-
- punpcklwd xmm0, xmm6
- punpcklwd xmm2, xmm5
- punpcklwd xmm3, xmm4
- punpcklwd xmm1, xmm7
-
- movdqa k0k6, xmm0
- movdqa k2k5, xmm2
- movdqa k3k4, xmm3
- movdqa k1k7, xmm1
-
- movq xmm6, rcx
- pshufd xmm6, xmm6, 0
- movdqa krd, xmm6
-
- ;Compute max and min values of a pixel
- mov rdx, 0x00010001
- movsxd rcx, DWORD PTR arg(6) ;bps
- movq xmm0, rdx
- movq xmm1, rcx
- pshufd xmm0, xmm0, 0b
- movdqa xmm2, xmm0
- psllw xmm0, xmm1
- psubw xmm0, xmm2
- pxor xmm1, xmm1
- movdqa max, xmm0 ;max value (for clamping)
- movdqa min, xmm1 ;min value (for clamping)
-
-%endm
-
-%macro HIGH_APPLY_FILTER_4 1
- punpcklwd xmm0, xmm6 ;two row in one register
- punpcklwd xmm1, xmm7
- punpcklwd xmm2, xmm5
- punpcklwd xmm3, xmm4
-
- pmaddwd xmm0, k0k6 ;multiply the filter factors
- pmaddwd xmm1, k1k7
- pmaddwd xmm2, k2k5
- pmaddwd xmm3, k3k4
-
- paddd xmm0, xmm1 ;sum
- paddd xmm0, xmm2
- paddd xmm0, xmm3
-
- paddd xmm0, krd ;rounding
- psrad xmm0, 7 ;shift
- packssdw xmm0, xmm0 ;pack to word
-
- ;clamp the values
- pminsw xmm0, max
- pmaxsw xmm0, min
-
-%if %1
- movq xmm1, [rdi]
- pavgw xmm0, xmm1
-%endif
- movq [rdi], xmm0
-%endm
-
-%macro HIGH_GET_FILTERS 0
- mov rdx, arg(5) ;filter ptr
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;output_ptr
- mov rcx, 0x00000040
-
- movdqa xmm7, [rdx] ;load filters
- pshuflw xmm0, xmm7, 0b ;k0
- pshuflw xmm1, xmm7, 01010101b ;k1
- pshuflw xmm2, xmm7, 10101010b ;k2
- pshuflw xmm3, xmm7, 11111111b ;k3
- pshufhw xmm4, xmm7, 0b ;k4
- pshufhw xmm5, xmm7, 01010101b ;k5
- pshufhw xmm6, xmm7, 10101010b ;k6
- pshufhw xmm7, xmm7, 11111111b ;k7
- punpcklqdq xmm2, xmm2
- punpcklqdq xmm3, xmm3
- punpcklwd xmm0, xmm1
- punpckhwd xmm6, xmm7
- punpckhwd xmm2, xmm5
- punpckhwd xmm3, xmm4
-
- movdqa k0k1, xmm0 ;store filter factors on stack
- movdqa k6k7, xmm6
- movdqa k2k5, xmm2
- movdqa k3k4, xmm3
-
- movq xmm6, rcx
- pshufd xmm6, xmm6, 0
- movdqa krd, xmm6 ;rounding
-
- ;Compute max and min values of a pixel
- mov rdx, 0x00010001
- movsxd rcx, DWORD PTR arg(6) ;bps
- movq xmm0, rdx
- movq xmm1, rcx
- pshufd xmm0, xmm0, 0b
- movdqa xmm2, xmm0
- psllw xmm0, xmm1
- psubw xmm0, xmm2
- pxor xmm1, xmm1
- movdqa max, xmm0 ;max value (for clamping)
- movdqa min, xmm1 ;min value (for clamping)
-%endm
-
-%macro LOAD_VERT_8 1
- movdqu xmm0, [rsi + %1] ;0
- movdqu xmm1, [rsi + rax + %1] ;1
- movdqu xmm6, [rsi + rdx * 2 + %1] ;6
- lea rsi, [rsi + rax]
- movdqu xmm7, [rsi + rdx * 2 + %1] ;7
- movdqu xmm2, [rsi + rax + %1] ;2
- movdqu xmm3, [rsi + rax * 2 + %1] ;3
- movdqu xmm4, [rsi + rdx + %1] ;4
- movdqu xmm5, [rsi + rax * 4 + %1] ;5
-%endm
-
-%macro HIGH_APPLY_FILTER_8 2
- movdqu temp, xmm4
- movdqa xmm4, xmm0
- punpcklwd xmm0, xmm1
- punpckhwd xmm4, xmm1
- movdqa xmm1, xmm6
- punpcklwd xmm6, xmm7
- punpckhwd xmm1, xmm7
- movdqa xmm7, xmm2
- punpcklwd xmm2, xmm5
- punpckhwd xmm7, xmm5
-
- movdqu xmm5, temp
- movdqu temp, xmm4
- movdqa xmm4, xmm3
- punpcklwd xmm3, xmm5
- punpckhwd xmm4, xmm5
- movdqu xmm5, temp
-
- pmaddwd xmm0, k0k1
- pmaddwd xmm5, k0k1
- pmaddwd xmm6, k6k7
- pmaddwd xmm1, k6k7
- pmaddwd xmm2, k2k5
- pmaddwd xmm7, k2k5
- pmaddwd xmm3, k3k4
- pmaddwd xmm4, k3k4
-
- paddd xmm0, xmm6
- paddd xmm0, xmm2
- paddd xmm0, xmm3
- paddd xmm5, xmm1
- paddd xmm5, xmm7
- paddd xmm5, xmm4
-
- paddd xmm0, krd ;rounding
- paddd xmm5, krd
- psrad xmm0, 7 ;shift
- psrad xmm5, 7
- packssdw xmm0, xmm5 ;pack back to word
-
- ;clamp the values
- pminsw xmm0, max
- pmaxsw xmm0, min
-
-%if %1
- movdqu xmm1, [rdi + %2]
- pavgw xmm0, xmm1
-%endif
- movdqu [rdi + %2], xmm0
-%endm
-
-SECTION .text
-
-;void aom_filter_block1d4_v8_sse2
-;(
-; unsigned char *src_ptr,
-; unsigned int src_pitch,
-; unsigned char *output_ptr,
-; unsigned int out_pitch,
-; unsigned int output_height,
-; short *filter
-;)
-global sym(aom_highbd_filter_block1d4_v8_sse2) PRIVATE
-sym(aom_highbd_filter_block1d4_v8_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 7
- SAVE_XMM 7
- push rsi
- push rdi
- push rbx
- ; end prolog
-
- ALIGN_STACK 16, rax
- sub rsp, 16 * 7
- %define k0k6 [rsp + 16 * 0]
- %define k2k5 [rsp + 16 * 1]
- %define k3k4 [rsp + 16 * 2]
- %define k1k7 [rsp + 16 * 3]
- %define krd [rsp + 16 * 4]
- %define max [rsp + 16 * 5]
- %define min [rsp + 16 * 6]
-
- HIGH_GET_FILTERS_4
-
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;output_ptr
-
- movsxd rax, DWORD PTR arg(1) ;pixels_per_line
- movsxd rbx, DWORD PTR arg(3) ;out_pitch
- lea rax, [rax + rax] ;bytes per line
- lea rbx, [rbx + rbx]
- lea rdx, [rax + rax * 2]
- movsxd rcx, DWORD PTR arg(4) ;output_height
-
-.loop:
- movq xmm0, [rsi] ;load src: row 0
- movq xmm1, [rsi + rax] ;1
- movq xmm6, [rsi + rdx * 2] ;6
- lea rsi, [rsi + rax]
- movq xmm7, [rsi + rdx * 2] ;7
- movq xmm2, [rsi + rax] ;2
- movq xmm3, [rsi + rax * 2] ;3
- movq xmm4, [rsi + rdx] ;4
- movq xmm5, [rsi + rax * 4] ;5
-
- HIGH_APPLY_FILTER_4 0
-
- lea rdi, [rdi + rbx]
- dec rcx
- jnz .loop
-
- add rsp, 16 * 7
- pop rsp
- pop rbx
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void aom_filter_block1d8_v8_sse2
-;(
-; unsigned char *src_ptr,
-; unsigned int src_pitch,
-; unsigned char *output_ptr,
-; unsigned int out_pitch,
-; unsigned int output_height,
-; short *filter
-;)
-global sym(aom_highbd_filter_block1d8_v8_sse2) PRIVATE
-sym(aom_highbd_filter_block1d8_v8_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 7
- SAVE_XMM 7
- push rsi
- push rdi
- push rbx
- ; end prolog
-
- ALIGN_STACK 16, rax
- sub rsp, 16 * 8
- %define k0k1 [rsp + 16 * 0]
- %define k6k7 [rsp + 16 * 1]
- %define k2k5 [rsp + 16 * 2]
- %define k3k4 [rsp + 16 * 3]
- %define krd [rsp + 16 * 4]
- %define temp [rsp + 16 * 5]
- %define max [rsp + 16 * 6]
- %define min [rsp + 16 * 7]
-
- HIGH_GET_FILTERS
-
- movsxd rax, DWORD PTR arg(1) ;pixels_per_line
- movsxd rbx, DWORD PTR arg(3) ;out_pitch
- lea rax, [rax + rax] ;bytes per line
- lea rbx, [rbx + rbx]
- lea rdx, [rax + rax * 2]
- movsxd rcx, DWORD PTR arg(4) ;output_height
-
-.loop:
- LOAD_VERT_8 0
- HIGH_APPLY_FILTER_8 0, 0
-
- lea rdi, [rdi + rbx]
- dec rcx
- jnz .loop
-
- add rsp, 16 * 8
- pop rsp
- pop rbx
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void aom_filter_block1d16_v8_sse2
-;(
-; unsigned char *src_ptr,
-; unsigned int src_pitch,
-; unsigned char *output_ptr,
-; unsigned int out_pitch,
-; unsigned int output_height,
-; short *filter
-;)
-global sym(aom_highbd_filter_block1d16_v8_sse2) PRIVATE
-sym(aom_highbd_filter_block1d16_v8_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 7
- SAVE_XMM 7
- push rsi
- push rdi
- push rbx
- ; end prolog
-
- ALIGN_STACK 16, rax
- sub rsp, 16 * 8
- %define k0k1 [rsp + 16 * 0]
- %define k6k7 [rsp + 16 * 1]
- %define k2k5 [rsp + 16 * 2]
- %define k3k4 [rsp + 16 * 3]
- %define krd [rsp + 16 * 4]
- %define temp [rsp + 16 * 5]
- %define max [rsp + 16 * 6]
- %define min [rsp + 16 * 7]
-
- HIGH_GET_FILTERS
-
- movsxd rax, DWORD PTR arg(1) ;pixels_per_line
- movsxd rbx, DWORD PTR arg(3) ;out_pitch
- lea rax, [rax + rax] ;bytes per line
- lea rbx, [rbx + rbx]
- lea rdx, [rax + rax * 2]
- movsxd rcx, DWORD PTR arg(4) ;output_height
-
-.loop:
- LOAD_VERT_8 0
- HIGH_APPLY_FILTER_8 0, 0
- sub rsi, rax
-
- LOAD_VERT_8 16
- HIGH_APPLY_FILTER_8 0, 16
- add rdi, rbx
-
- dec rcx
- jnz .loop
-
- add rsp, 16 * 8
- pop rsp
- pop rbx
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void aom_filter_block1d4_h8_sse2
-;(
-; unsigned char *src_ptr,
-; unsigned int src_pixels_per_line,
-; unsigned char *output_ptr,
-; unsigned int output_pitch,
-; unsigned int output_height,
-; short *filter
-;)
-global sym(aom_highbd_filter_block1d4_h8_sse2) PRIVATE
-sym(aom_highbd_filter_block1d4_h8_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 7
- SAVE_XMM 7
- push rsi
- push rdi
- ; end prolog
-
- ALIGN_STACK 16, rax
- sub rsp, 16 * 7
- %define k0k6 [rsp + 16 * 0]
- %define k2k5 [rsp + 16 * 1]
- %define k3k4 [rsp + 16 * 2]
- %define k1k7 [rsp + 16 * 3]
- %define krd [rsp + 16 * 4]
- %define max [rsp + 16 * 5]
- %define min [rsp + 16 * 6]
-
- HIGH_GET_FILTERS_4
-
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;output_ptr
-
- movsxd rax, DWORD PTR arg(1) ;pixels_per_line
- movsxd rdx, DWORD PTR arg(3) ;out_pitch
- lea rax, [rax + rax] ;bytes per line
- lea rdx, [rdx + rdx]
- movsxd rcx, DWORD PTR arg(4) ;output_height
-
-.loop:
- movdqu xmm0, [rsi - 6] ;load src
- movdqu xmm4, [rsi + 2]
- movdqa xmm1, xmm0
- movdqa xmm6, xmm4
- movdqa xmm7, xmm4
- movdqa xmm2, xmm0
- movdqa xmm3, xmm0
- movdqa xmm5, xmm4
-
- psrldq xmm1, 2
- psrldq xmm6, 4
- psrldq xmm7, 6
- psrldq xmm2, 4
- psrldq xmm3, 6
- psrldq xmm5, 2
-
- HIGH_APPLY_FILTER_4 0
-
- lea rsi, [rsi + rax]
- lea rdi, [rdi + rdx]
- dec rcx
- jnz .loop
-
- add rsp, 16 * 7
- pop rsp
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void aom_filter_block1d8_h8_sse2
-;(
-; unsigned char *src_ptr,
-; unsigned int src_pixels_per_line,
-; unsigned char *output_ptr,
-; unsigned int output_pitch,
-; unsigned int output_height,
-; short *filter
-;)
-global sym(aom_highbd_filter_block1d8_h8_sse2) PRIVATE
-sym(aom_highbd_filter_block1d8_h8_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 7
- SAVE_XMM 7
- push rsi
- push rdi
- ; end prolog
-
- ALIGN_STACK 16, rax
- sub rsp, 16 * 8
- %define k0k1 [rsp + 16 * 0]
- %define k6k7 [rsp + 16 * 1]
- %define k2k5 [rsp + 16 * 2]
- %define k3k4 [rsp + 16 * 3]
- %define krd [rsp + 16 * 4]
- %define temp [rsp + 16 * 5]
- %define max [rsp + 16 * 6]
- %define min [rsp + 16 * 7]
-
- HIGH_GET_FILTERS
-
- movsxd rax, DWORD PTR arg(1) ;pixels_per_line
- movsxd rdx, DWORD PTR arg(3) ;out_pitch
- lea rax, [rax + rax] ;bytes per line
- lea rdx, [rdx + rdx]
- movsxd rcx, DWORD PTR arg(4) ;output_height
-
-.loop:
- movdqu xmm0, [rsi - 6] ;load src
- movdqu xmm1, [rsi - 4]
- movdqu xmm2, [rsi - 2]
- movdqu xmm3, [rsi]
- movdqu xmm4, [rsi + 2]
- movdqu xmm5, [rsi + 4]
- movdqu xmm6, [rsi + 6]
- movdqu xmm7, [rsi + 8]
-
- HIGH_APPLY_FILTER_8 0, 0
-
- lea rsi, [rsi + rax]
- lea rdi, [rdi + rdx]
- dec rcx
- jnz .loop
-
- add rsp, 16 * 8
- pop rsp
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void aom_filter_block1d16_h8_sse2
-;(
-; unsigned char *src_ptr,
-; unsigned int src_pixels_per_line,
-; unsigned char *output_ptr,
-; unsigned int output_pitch,
-; unsigned int output_height,
-; short *filter
-;)
-global sym(aom_highbd_filter_block1d16_h8_sse2) PRIVATE
-sym(aom_highbd_filter_block1d16_h8_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 7
- SAVE_XMM 7
- push rsi
- push rdi
- ; end prolog
-
- ALIGN_STACK 16, rax
- sub rsp, 16 * 8
- %define k0k1 [rsp + 16 * 0]
- %define k6k7 [rsp + 16 * 1]
- %define k2k5 [rsp + 16 * 2]
- %define k3k4 [rsp + 16 * 3]
- %define krd [rsp + 16 * 4]
- %define temp [rsp + 16 * 5]
- %define max [rsp + 16 * 6]
- %define min [rsp + 16 * 7]
-
- HIGH_GET_FILTERS
-
- movsxd rax, DWORD PTR arg(1) ;pixels_per_line
- movsxd rdx, DWORD PTR arg(3) ;out_pitch
- lea rax, [rax + rax] ;bytes per line
- lea rdx, [rdx + rdx]
- movsxd rcx, DWORD PTR arg(4) ;output_height
-
-.loop:
- movdqu xmm0, [rsi - 6] ;load src
- movdqu xmm1, [rsi - 4]
- movdqu xmm2, [rsi - 2]
- movdqu xmm3, [rsi]
- movdqu xmm4, [rsi + 2]
- movdqu xmm5, [rsi + 4]
- movdqu xmm6, [rsi + 6]
- movdqu xmm7, [rsi + 8]
-
- HIGH_APPLY_FILTER_8 0, 0
-
- movdqu xmm0, [rsi + 10] ;load src
- movdqu xmm1, [rsi + 12]
- movdqu xmm2, [rsi + 14]
- movdqu xmm3, [rsi + 16]
- movdqu xmm4, [rsi + 18]
- movdqu xmm5, [rsi + 20]
- movdqu xmm6, [rsi + 22]
- movdqu xmm7, [rsi + 24]
-
- HIGH_APPLY_FILTER_8 0, 16
-
- lea rsi, [rsi + rax]
- lea rdi, [rdi + rdx]
- dec rcx
- jnz .loop
-
- add rsp, 16 * 8
- pop rsp
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
diff --git a/third_party/aom/aom_dsp/x86/aom_high_subpixel_bilinear_sse2.asm b/third_party/aom/aom_dsp/x86/aom_high_subpixel_bilinear_sse2.asm
deleted file mode 100644
index 7b3fe6419..000000000
--- a/third_party/aom/aom_dsp/x86/aom_high_subpixel_bilinear_sse2.asm
+++ /dev/null
@@ -1,338 +0,0 @@
-;
-; Copyright (c) 2016, Alliance for Open Media. All rights reserved
-;
-; This source code is subject to the terms of the BSD 2 Clause License and
-; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-; was not distributed with this source code in the LICENSE file, you can
-; obtain it at www.aomedia.org/license/software. If the Alliance for Open
-; Media Patent License 1.0 was not distributed with this source code in the
-; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-;
-
-;
-
-%include "aom_ports/x86_abi_support.asm"
-
-%macro HIGH_GET_PARAM_4 0
- mov rdx, arg(5) ;filter ptr
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;output_ptr
- mov rcx, 0x00000040
-
- movdqa xmm3, [rdx] ;load filters
- pshuflw xmm4, xmm3, 11111111b ;k3
- psrldq xmm3, 8
- pshuflw xmm3, xmm3, 0b ;k4
- punpcklwd xmm4, xmm3 ;k3k4
-
- movq xmm3, rcx ;rounding
- pshufd xmm3, xmm3, 0
-
- mov rdx, 0x00010001
- movsxd rcx, DWORD PTR arg(6) ;bps
- movq xmm5, rdx
- movq xmm2, rcx
- pshufd xmm5, xmm5, 0b
- movdqa xmm1, xmm5
- psllw xmm5, xmm2
- psubw xmm5, xmm1 ;max value (for clamping)
- pxor xmm2, xmm2 ;min value (for clamping)
-
- movsxd rax, DWORD PTR arg(1) ;pixels_per_line
- movsxd rdx, DWORD PTR arg(3) ;out_pitch
- movsxd rcx, DWORD PTR arg(4) ;output_height
-%endm
-
-%macro HIGH_APPLY_FILTER_4 1
-
- punpcklwd xmm0, xmm1 ;two row in one register
- pmaddwd xmm0, xmm4 ;multiply the filter factors
-
- paddd xmm0, xmm3 ;rounding
- psrad xmm0, 7 ;shift
- packssdw xmm0, xmm0 ;pack to word
-
- ;clamp the values
- pminsw xmm0, xmm5
- pmaxsw xmm0, xmm2
-
-%if %1
- movq xmm1, [rdi]
- pavgw xmm0, xmm1
-%endif
-
- movq [rdi], xmm0
- lea rsi, [rsi + 2*rax]
- lea rdi, [rdi + 2*rdx]
- dec rcx
-%endm
-
-%if ARCH_X86_64
-%macro HIGH_GET_PARAM 0
- mov rdx, arg(5) ;filter ptr
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;output_ptr
- mov rcx, 0x00000040
-
- movdqa xmm6, [rdx] ;load filters
-
- pshuflw xmm7, xmm6, 11111111b ;k3
- pshufhw xmm6, xmm6, 0b ;k4
- psrldq xmm6, 8
- punpcklwd xmm7, xmm6 ;k3k4k3k4k3k4k3k4
-
- movq xmm4, rcx ;rounding
- pshufd xmm4, xmm4, 0
-
- mov rdx, 0x00010001
- movsxd rcx, DWORD PTR arg(6) ;bps
- movq xmm8, rdx
- movq xmm5, rcx
- pshufd xmm8, xmm8, 0b
- movdqa xmm1, xmm8
- psllw xmm8, xmm5
- psubw xmm8, xmm1 ;max value (for clamping)
- pxor xmm5, xmm5 ;min value (for clamping)
-
- movsxd rax, DWORD PTR arg(1) ;pixels_per_line
- movsxd rdx, DWORD PTR arg(3) ;out_pitch
- movsxd rcx, DWORD PTR arg(4) ;output_height
-%endm
-
-%macro HIGH_APPLY_FILTER_8 1
- movdqa xmm6, xmm0
- punpckhwd xmm6, xmm1
- punpcklwd xmm0, xmm1
- pmaddwd xmm6, xmm7
- pmaddwd xmm0, xmm7
-
- paddd xmm6, xmm4 ;rounding
- paddd xmm0, xmm4 ;rounding
- psrad xmm6, 7 ;shift
- psrad xmm0, 7 ;shift
- packssdw xmm0, xmm6 ;pack back to word
-
- ;clamp the values
- pminsw xmm0, xmm8
- pmaxsw xmm0, xmm5
-
-%if %1
- movdqu xmm1, [rdi]
- pavgw xmm0, xmm1
-%endif
- movdqu [rdi], xmm0 ;store the result
-
- lea rsi, [rsi + 2*rax]
- lea rdi, [rdi + 2*rdx]
- dec rcx
-%endm
-
-%macro HIGH_APPLY_FILTER_16 1
- movdqa xmm9, xmm0
- movdqa xmm6, xmm2
- punpckhwd xmm9, xmm1
- punpckhwd xmm6, xmm3
- punpcklwd xmm0, xmm1
- punpcklwd xmm2, xmm3
-
- pmaddwd xmm9, xmm7
- pmaddwd xmm6, xmm7
- pmaddwd xmm0, xmm7
- pmaddwd xmm2, xmm7
-
- paddd xmm9, xmm4 ;rounding
- paddd xmm6, xmm4
- paddd xmm0, xmm4
- paddd xmm2, xmm4
-
- psrad xmm9, 7 ;shift
- psrad xmm6, 7
- psrad xmm0, 7
- psrad xmm2, 7
-
- packssdw xmm0, xmm9 ;pack back to word
- packssdw xmm2, xmm6 ;pack back to word
-
- ;clamp the values
- pminsw xmm0, xmm8
- pmaxsw xmm0, xmm5
- pminsw xmm2, xmm8
- pmaxsw xmm2, xmm5
-
-%if %1
- movdqu xmm1, [rdi]
- movdqu xmm3, [rdi + 16]
- pavgw xmm0, xmm1
- pavgw xmm2, xmm3
-%endif
- movdqu [rdi], xmm0 ;store the result
- movdqu [rdi + 16], xmm2 ;store the result
-
- lea rsi, [rsi + 2*rax]
- lea rdi, [rdi + 2*rdx]
- dec rcx
-%endm
-%endif
-
-SECTION .text
-
-global sym(aom_highbd_filter_block1d4_v2_sse2) PRIVATE
-sym(aom_highbd_filter_block1d4_v2_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 7
- push rsi
- push rdi
- ; end prolog
-
- HIGH_GET_PARAM_4
-.loop:
- movq xmm0, [rsi] ;load src
- movq xmm1, [rsi + 2*rax]
-
- HIGH_APPLY_FILTER_4 0
- jnz .loop
-
- ; begin epilog
- pop rdi
- pop rsi
- UNSHADOW_ARGS
- pop rbp
- ret
-
-%if ARCH_X86_64
-global sym(aom_highbd_filter_block1d8_v2_sse2) PRIVATE
-sym(aom_highbd_filter_block1d8_v2_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 7
- SAVE_XMM 8
- push rsi
- push rdi
- ; end prolog
-
- HIGH_GET_PARAM
-.loop:
- movdqu xmm0, [rsi] ;0
- movdqu xmm1, [rsi + 2*rax] ;1
-
- HIGH_APPLY_FILTER_8 0
- jnz .loop
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-global sym(aom_highbd_filter_block1d16_v2_sse2) PRIVATE
-sym(aom_highbd_filter_block1d16_v2_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 7
- SAVE_XMM 9
- push rsi
- push rdi
- ; end prolog
-
- HIGH_GET_PARAM
-.loop:
- movdqu xmm0, [rsi] ;0
- movdqu xmm2, [rsi + 16]
- movdqu xmm1, [rsi + 2*rax] ;1
- movdqu xmm3, [rsi + 2*rax + 16]
-
- HIGH_APPLY_FILTER_16 0
- jnz .loop
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-%endif
-
-global sym(aom_highbd_filter_block1d4_h2_sse2) PRIVATE
-sym(aom_highbd_filter_block1d4_h2_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 7
- push rsi
- push rdi
- ; end prolog
-
- HIGH_GET_PARAM_4
-.loop:
- movdqu xmm0, [rsi] ;load src
- movdqa xmm1, xmm0
- psrldq xmm1, 2
-
- HIGH_APPLY_FILTER_4 0
- jnz .loop
-
- ; begin epilog
- pop rdi
- pop rsi
- UNSHADOW_ARGS
- pop rbp
- ret
-
-%if ARCH_X86_64
-global sym(aom_highbd_filter_block1d8_h2_sse2) PRIVATE
-sym(aom_highbd_filter_block1d8_h2_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 7
- SAVE_XMM 8
- push rsi
- push rdi
- ; end prolog
-
- HIGH_GET_PARAM
-.loop:
- movdqu xmm0, [rsi] ;load src
- movdqu xmm1, [rsi + 2]
-
- HIGH_APPLY_FILTER_8 0
- jnz .loop
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-global sym(aom_highbd_filter_block1d16_h2_sse2) PRIVATE
-sym(aom_highbd_filter_block1d16_h2_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 7
- SAVE_XMM 9
- push rsi
- push rdi
- ; end prolog
-
- HIGH_GET_PARAM
-.loop:
- movdqu xmm0, [rsi] ;load src
- movdqu xmm1, [rsi + 2]
- movdqu xmm2, [rsi + 16]
- movdqu xmm3, [rsi + 18]
-
- HIGH_APPLY_FILTER_16 0
- jnz .loop
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-%endif
diff --git a/third_party/aom/aom_dsp/x86/aom_subpixel_8t_intrin_avx2.c b/third_party/aom/aom_dsp/x86/aom_subpixel_8t_intrin_avx2.c
deleted file mode 100644
index 94b5da171..000000000
--- a/third_party/aom/aom_dsp/x86/aom_subpixel_8t_intrin_avx2.c
+++ /dev/null
@@ -1,1441 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <immintrin.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/x86/convolve.h"
-#include "aom_dsp/x86/convolve_avx2.h"
-#include "aom_ports/mem.h"
-
-#if defined(__clang__)
-#if (__clang_major__ > 0 && __clang_major__ < 3) || \
- (__clang_major__ == 3 && __clang_minor__ <= 3) || \
- (defined(__APPLE__) && defined(__apple_build_version__) && \
- ((__clang_major__ == 4 && __clang_minor__ <= 2) || \
- (__clang_major__ == 5 && __clang_minor__ == 0)))
-#define MM256_BROADCASTSI128_SI256(x) \
- _mm_broadcastsi128_si256((__m128i const *)&(x))
-#else // clang > 3.3, and not 5.0 on macosx.
-#define MM256_BROADCASTSI128_SI256(x) _mm256_broadcastsi128_si256(x)
-#endif // clang <= 3.3
-#elif defined(__GNUC__)
-#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ <= 6)
-#define MM256_BROADCASTSI128_SI256(x) \
- _mm_broadcastsi128_si256((__m128i const *)&(x))
-#elif __GNUC__ == 4 && __GNUC_MINOR__ == 7
-#define MM256_BROADCASTSI128_SI256(x) _mm_broadcastsi128_si256(x)
-#else // gcc > 4.7
-#define MM256_BROADCASTSI128_SI256(x) _mm256_broadcastsi128_si256(x)
-#endif // gcc <= 4.6
-#else // !(gcc || clang)
-#define MM256_BROADCASTSI128_SI256(x) _mm256_broadcastsi128_si256(x)
-#endif // __clang__
-
-static INLINE void xx_storeu2_epi32(const uint8_t *output_ptr,
- const ptrdiff_t stride, const __m256i *a) {
- *((uint32_t *)(output_ptr)) = _mm_cvtsi128_si32(_mm256_castsi256_si128(*a));
- *((uint32_t *)(output_ptr + stride)) =
- _mm_cvtsi128_si32(_mm256_extracti128_si256(*a, 1));
-}
-
-static INLINE __m256i xx_loadu2_epi64(const void *hi, const void *lo) {
- __m256i a = _mm256_castsi128_si256(_mm_loadl_epi64((const __m128i *)(lo)));
- a = _mm256_inserti128_si256(a, _mm_loadl_epi64((const __m128i *)(hi)), 1);
- return a;
-}
-
-static INLINE void xx_storeu2_epi64(const uint8_t *output_ptr,
- const ptrdiff_t stride, const __m256i *a) {
- _mm_storel_epi64((__m128i *)output_ptr, _mm256_castsi256_si128(*a));
- _mm_storel_epi64((__m128i *)(output_ptr + stride),
- _mm256_extractf128_si256(*a, 1));
-}
-
-static INLINE __m256i xx_loadu2_mi128(const void *hi, const void *lo) {
- __m256i a = _mm256_castsi128_si256(_mm_loadu_si128((const __m128i *)(lo)));
- a = _mm256_inserti128_si256(a, _mm_loadu_si128((const __m128i *)(hi)), 1);
- return a;
-}
-
-static INLINE void xx_store2_mi128(const uint8_t *output_ptr,
- const ptrdiff_t stride, const __m256i *a) {
- _mm_store_si128((__m128i *)output_ptr, _mm256_castsi256_si128(*a));
- _mm_store_si128((__m128i *)(output_ptr + stride),
- _mm256_extractf128_si256(*a, 1));
-}
-
-static void aom_filter_block1d4_h4_avx2(
- const uint8_t *src_ptr, ptrdiff_t src_pixels_per_line, uint8_t *output_ptr,
- ptrdiff_t output_pitch, uint32_t output_height, const int16_t *filter) {
- __m128i filtersReg;
- __m256i addFilterReg32, filt1Reg, firstFilters, srcReg32b1, srcRegFilt32b1_1;
- unsigned int i;
- ptrdiff_t src_stride, dst_stride;
- src_ptr -= 3;
- addFilterReg32 = _mm256_set1_epi16(32);
- filtersReg = _mm_loadu_si128((const __m128i *)filter);
- filtersReg = _mm_srai_epi16(filtersReg, 1);
- // converting the 16 bit (short) to 8 bit (byte) and have the same data
- // in both lanes of 128 bit register.
- filtersReg = _mm_packs_epi16(filtersReg, filtersReg);
- // have the same data in both lanes of a 256 bit register
- const __m256i filtersReg32 = MM256_BROADCASTSI128_SI256(filtersReg);
-
- firstFilters =
- _mm256_shuffle_epi8(filtersReg32, _mm256_set1_epi32(0x5040302u));
- filt1Reg = _mm256_load_si256((__m256i const *)(filt4_d4_global_avx2));
-
- // multiple the size of the source and destination stride by two
- src_stride = src_pixels_per_line << 1;
- dst_stride = output_pitch << 1;
- for (i = output_height; i > 1; i -= 2) {
- // load the 2 strides of source
- srcReg32b1 = xx_loadu2_mi128(src_ptr + src_pixels_per_line, src_ptr);
-
- // filter the source buffer
- srcRegFilt32b1_1 = _mm256_shuffle_epi8(srcReg32b1, filt1Reg);
-
- // multiply 4 adjacent elements with the filter and add the result
- srcRegFilt32b1_1 = _mm256_maddubs_epi16(srcRegFilt32b1_1, firstFilters);
-
- srcRegFilt32b1_1 =
- _mm256_hadds_epi16(srcRegFilt32b1_1, _mm256_setzero_si256());
-
- // shift by 6 bit each 16 bit
- srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1, addFilterReg32);
- srcRegFilt32b1_1 = _mm256_srai_epi16(srcRegFilt32b1_1, 6);
-
- // shrink to 8 bit each 16 bits, the first lane contain the first
- // convolve result and the second lane contain the second convolve result
- srcRegFilt32b1_1 =
- _mm256_packus_epi16(srcRegFilt32b1_1, _mm256_setzero_si256());
-
- src_ptr += src_stride;
-
- xx_storeu2_epi32(output_ptr, output_pitch, &srcRegFilt32b1_1);
- output_ptr += dst_stride;
- }
-
- // if the number of strides is odd.
- // process only 4 bytes
- if (i > 0) {
- __m128i srcReg1, srcRegFilt1_1;
-
- srcReg1 = _mm_loadu_si128((const __m128i *)(src_ptr));
-
- // filter the source buffer
- srcRegFilt1_1 = _mm_shuffle_epi8(srcReg1, _mm256_castsi256_si128(filt1Reg));
-
- // multiply 4 adjacent elements with the filter and add the result
- srcRegFilt1_1 =
- _mm_maddubs_epi16(srcRegFilt1_1, _mm256_castsi256_si128(firstFilters));
-
- srcRegFilt1_1 = _mm_hadds_epi16(srcRegFilt1_1, _mm_setzero_si128());
- // shift by 6 bit each 16 bit
- srcRegFilt1_1 =
- _mm_adds_epi16(srcRegFilt1_1, _mm256_castsi256_si128(addFilterReg32));
- srcRegFilt1_1 = _mm_srai_epi16(srcRegFilt1_1, 6);
-
- // shrink to 8 bit each 16 bits, the first lane contain the first
- // convolve result and the second lane contain the second convolve result
- srcRegFilt1_1 = _mm_packus_epi16(srcRegFilt1_1, _mm_setzero_si128());
-
- // save 4 bytes
- *((uint32_t *)(output_ptr)) = _mm_cvtsi128_si32(srcRegFilt1_1);
- }
-}
-
-static void aom_filter_block1d4_h8_avx2(
- const uint8_t *src_ptr, ptrdiff_t src_pixels_per_line, uint8_t *output_ptr,
- ptrdiff_t output_pitch, uint32_t output_height, const int16_t *filter) {
- __m128i filtersReg;
- __m256i addFilterReg32, filt1Reg, filt2Reg;
- __m256i firstFilters, secondFilters;
- __m256i srcRegFilt32b1_1, srcRegFilt32b2;
- __m256i srcReg32b1;
- unsigned int i;
- ptrdiff_t src_stride, dst_stride;
- src_ptr -= 3;
- addFilterReg32 = _mm256_set1_epi16(32);
- filtersReg = _mm_loadu_si128((const __m128i *)filter);
- filtersReg = _mm_srai_epi16(filtersReg, 1);
- // converting the 16 bit (short) to 8 bit (byte) and have the same data
- // in both lanes of 128 bit register.
- filtersReg = _mm_packs_epi16(filtersReg, filtersReg);
- // have the same data in both lanes of a 256 bit register
- const __m256i filtersReg32 = MM256_BROADCASTSI128_SI256(filtersReg);
-
- // duplicate only the first 32 bits
- firstFilters = _mm256_shuffle_epi32(filtersReg32, 0);
- // duplicate only the second 32 bits
- secondFilters = _mm256_shuffle_epi32(filtersReg32, 0x55);
-
- filt1Reg = _mm256_load_si256((__m256i const *)filt_d4_global_avx2);
- filt2Reg = _mm256_load_si256((__m256i const *)(filt_d4_global_avx2 + 32));
-
- // multiple the size of the source and destination stride by two
- src_stride = src_pixels_per_line << 1;
- dst_stride = output_pitch << 1;
- for (i = output_height; i > 1; i -= 2) {
- // load the 2 strides of source
- srcReg32b1 = xx_loadu2_mi128(src_ptr + src_pixels_per_line, src_ptr);
-
- // filter the source buffer
- srcRegFilt32b1_1 = _mm256_shuffle_epi8(srcReg32b1, filt1Reg);
-
- // multiply 4 adjacent elements with the filter and add the result
- srcRegFilt32b1_1 = _mm256_maddubs_epi16(srcRegFilt32b1_1, firstFilters);
-
- // filter the source buffer
- srcRegFilt32b2 = _mm256_shuffle_epi8(srcReg32b1, filt2Reg);
-
- // multiply 4 adjacent elements with the filter and add the result
- srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, secondFilters);
-
- srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1, srcRegFilt32b2);
-
- srcRegFilt32b1_1 =
- _mm256_hadds_epi16(srcRegFilt32b1_1, _mm256_setzero_si256());
-
- // shift by 6 bit each 16 bit
- srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1, addFilterReg32);
- srcRegFilt32b1_1 = _mm256_srai_epi16(srcRegFilt32b1_1, 6);
-
- // shrink to 8 bit each 16 bits, the first lane contain the first
- // convolve result and the second lane contain the second convolve result
- srcRegFilt32b1_1 =
- _mm256_packus_epi16(srcRegFilt32b1_1, _mm256_setzero_si256());
-
- src_ptr += src_stride;
-
- xx_storeu2_epi32(output_ptr, output_pitch, &srcRegFilt32b1_1);
- output_ptr += dst_stride;
- }
-
- // if the number of strides is odd.
- // process only 4 bytes
- if (i > 0) {
- __m128i srcReg1, srcRegFilt1_1;
- __m128i srcRegFilt2;
-
- srcReg1 = _mm_loadu_si128((const __m128i *)(src_ptr));
-
- // filter the source buffer
- srcRegFilt1_1 = _mm_shuffle_epi8(srcReg1, _mm256_castsi256_si128(filt1Reg));
-
- // multiply 4 adjacent elements with the filter and add the result
- srcRegFilt1_1 =
- _mm_maddubs_epi16(srcRegFilt1_1, _mm256_castsi256_si128(firstFilters));
-
- // filter the source buffer
- srcRegFilt2 = _mm_shuffle_epi8(srcReg1, _mm256_castsi256_si128(filt2Reg));
-
- // multiply 4 adjacent elements with the filter and add the result
- srcRegFilt2 =
- _mm_maddubs_epi16(srcRegFilt2, _mm256_castsi256_si128(secondFilters));
-
- srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1, srcRegFilt2);
- srcRegFilt1_1 = _mm_hadds_epi16(srcRegFilt1_1, _mm_setzero_si128());
- // shift by 6 bit each 16 bit
- srcRegFilt1_1 =
- _mm_adds_epi16(srcRegFilt1_1, _mm256_castsi256_si128(addFilterReg32));
- srcRegFilt1_1 = _mm_srai_epi16(srcRegFilt1_1, 6);
-
- // shrink to 8 bit each 16 bits, the first lane contain the first
- // convolve result and the second lane contain the second convolve result
- srcRegFilt1_1 = _mm_packus_epi16(srcRegFilt1_1, _mm_setzero_si128());
-
- // save 4 bytes
- *((uint32_t *)(output_ptr)) = _mm_cvtsi128_si32(srcRegFilt1_1);
- }
-}
-
-static void aom_filter_block1d8_h4_avx2(
- const uint8_t *src_ptr, ptrdiff_t src_pixels_per_line, uint8_t *output_ptr,
- ptrdiff_t output_pitch, uint32_t output_height, const int16_t *filter) {
- __m128i filtersReg;
- __m256i addFilterReg32, filt2Reg, filt3Reg;
- __m256i secondFilters, thirdFilters;
- __m256i srcRegFilt32b1_1, srcRegFilt32b2, srcRegFilt32b3;
- __m256i srcReg32b1, filtersReg32;
- unsigned int i;
- ptrdiff_t src_stride, dst_stride;
- src_ptr -= 3;
- addFilterReg32 = _mm256_set1_epi16(32);
- filtersReg = _mm_loadu_si128((const __m128i *)filter);
- filtersReg = _mm_srai_epi16(filtersReg, 1);
- // converting the 16 bit (short) to 8 bit (byte) and have the same data
- // in both lanes of 128 bit register.
- filtersReg = _mm_packs_epi16(filtersReg, filtersReg);
- // have the same data in both lanes of a 256 bit register
- filtersReg32 = MM256_BROADCASTSI128_SI256(filtersReg);
-
- // duplicate only the second 16 bits (third and forth byte)
- // across 256 bit register
- secondFilters = _mm256_shuffle_epi8(filtersReg32, _mm256_set1_epi16(0x302u));
- // duplicate only the third 16 bits (fifth and sixth byte)
- // across 256 bit register
- thirdFilters = _mm256_shuffle_epi8(filtersReg32, _mm256_set1_epi16(0x504u));
-
- filt2Reg = _mm256_load_si256((__m256i const *)(filt_global_avx2 + 32));
- filt3Reg = _mm256_load_si256((__m256i const *)(filt_global_avx2 + 32 * 2));
-
- // multiply the size of the source and destination stride by two
- src_stride = src_pixels_per_line << 1;
- dst_stride = output_pitch << 1;
- for (i = output_height; i > 1; i -= 2) {
- // load the 2 strides of source
- srcReg32b1 = xx_loadu2_mi128(src_ptr + src_pixels_per_line, src_ptr);
-
- // filter the source buffer
- srcRegFilt32b3 = _mm256_shuffle_epi8(srcReg32b1, filt2Reg);
- srcRegFilt32b2 = _mm256_shuffle_epi8(srcReg32b1, filt3Reg);
-
- // multiply 2 adjacent elements with the filter and add the result
- srcRegFilt32b3 = _mm256_maddubs_epi16(srcRegFilt32b3, secondFilters);
- srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, thirdFilters);
-
- srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b3, srcRegFilt32b2);
-
- // shift by 6 bit each 16 bit
- srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1, addFilterReg32);
- srcRegFilt32b1_1 = _mm256_srai_epi16(srcRegFilt32b1_1, 6);
-
- // shrink to 8 bit each 16 bits
- srcRegFilt32b1_1 = _mm256_packus_epi16(srcRegFilt32b1_1, srcRegFilt32b1_1);
-
- src_ptr += src_stride;
-
- xx_storeu2_epi64(output_ptr, output_pitch, &srcRegFilt32b1_1);
- output_ptr += dst_stride;
- }
-
- // if the number of strides is odd.
- // process only 8 bytes
- if (i > 0) {
- __m128i srcReg1, srcRegFilt1_1;
- __m128i srcRegFilt2, srcRegFilt3;
-
- srcReg1 = _mm_loadu_si128((const __m128i *)(src_ptr));
-
- // filter the source buffer
- srcRegFilt2 = _mm_shuffle_epi8(srcReg1, _mm256_castsi256_si128(filt2Reg));
- srcRegFilt3 = _mm_shuffle_epi8(srcReg1, _mm256_castsi256_si128(filt3Reg));
-
- // multiply 2 adjacent elements with the filter and add the result
- srcRegFilt2 =
- _mm_maddubs_epi16(srcRegFilt2, _mm256_castsi256_si128(secondFilters));
- srcRegFilt3 =
- _mm_maddubs_epi16(srcRegFilt3, _mm256_castsi256_si128(thirdFilters));
-
- // add and saturate the results together
- srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt2, srcRegFilt3);
-
- // shift by 6 bit each 16 bit
- srcRegFilt1_1 =
- _mm_adds_epi16(srcRegFilt1_1, _mm256_castsi256_si128(addFilterReg32));
- srcRegFilt1_1 = _mm_srai_epi16(srcRegFilt1_1, 6);
-
- // shrink to 8 bit each 16 bits
- srcRegFilt1_1 = _mm_packus_epi16(srcRegFilt1_1, _mm_setzero_si128());
-
- // save 8 bytes
- _mm_storel_epi64((__m128i *)output_ptr, srcRegFilt1_1);
- }
-}
-
-static void aom_filter_block1d8_h8_avx2(
- const uint8_t *src_ptr, ptrdiff_t src_pixels_per_line, uint8_t *output_ptr,
- ptrdiff_t output_pitch, uint32_t output_height, const int16_t *filter) {
- __m128i filtersReg;
- __m256i addFilterReg32, filt1Reg, filt2Reg, filt3Reg, filt4Reg;
- __m256i firstFilters, secondFilters, thirdFilters, forthFilters;
- __m256i srcRegFilt32b1_1, srcRegFilt32b2, srcRegFilt32b3;
- __m256i srcReg32b1;
- unsigned int i;
- ptrdiff_t src_stride, dst_stride;
- src_ptr -= 3;
- addFilterReg32 = _mm256_set1_epi16(32);
- filtersReg = _mm_loadu_si128((const __m128i *)filter);
- filtersReg = _mm_srai_epi16(filtersReg, 1);
- // converting the 16 bit (short) to 8 bit (byte) and have the same data
- // in both lanes of 128 bit register.
- filtersReg = _mm_packs_epi16(filtersReg, filtersReg);
- // have the same data in both lanes of a 256 bit register
- const __m256i filtersReg32 = MM256_BROADCASTSI128_SI256(filtersReg);
-
- // duplicate only the first 16 bits (first and second byte)
- // across 256 bit register
- firstFilters = _mm256_shuffle_epi8(filtersReg32, _mm256_set1_epi16(0x100u));
- // duplicate only the second 16 bits (third and forth byte)
- // across 256 bit register
- secondFilters = _mm256_shuffle_epi8(filtersReg32, _mm256_set1_epi16(0x302u));
- // duplicate only the third 16 bits (fifth and sixth byte)
- // across 256 bit register
- thirdFilters = _mm256_shuffle_epi8(filtersReg32, _mm256_set1_epi16(0x504u));
- // duplicate only the forth 16 bits (seventh and eighth byte)
- // across 256 bit register
- forthFilters = _mm256_shuffle_epi8(filtersReg32, _mm256_set1_epi16(0x706u));
-
- filt1Reg = _mm256_load_si256((__m256i const *)filt_global_avx2);
- filt2Reg = _mm256_load_si256((__m256i const *)(filt_global_avx2 + 32));
- filt3Reg = _mm256_load_si256((__m256i const *)(filt_global_avx2 + 32 * 2));
- filt4Reg = _mm256_load_si256((__m256i const *)(filt_global_avx2 + 32 * 3));
-
- // multiple the size of the source and destination stride by two
- src_stride = src_pixels_per_line << 1;
- dst_stride = output_pitch << 1;
- for (i = output_height; i > 1; i -= 2) {
- // load the 2 strides of source
- srcReg32b1 = xx_loadu2_mi128(src_ptr + src_pixels_per_line, src_ptr);
-
- // filter the source buffer
- srcRegFilt32b1_1 = _mm256_shuffle_epi8(srcReg32b1, filt1Reg);
- srcRegFilt32b2 = _mm256_shuffle_epi8(srcReg32b1, filt4Reg);
-
- // multiply 2 adjacent elements with the filter and add the result
- srcRegFilt32b1_1 = _mm256_maddubs_epi16(srcRegFilt32b1_1, firstFilters);
- srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, forthFilters);
-
- // add and saturate the results together
- srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1, srcRegFilt32b2);
-
- // filter the source buffer
- srcRegFilt32b3 = _mm256_shuffle_epi8(srcReg32b1, filt2Reg);
- srcRegFilt32b2 = _mm256_shuffle_epi8(srcReg32b1, filt3Reg);
-
- // multiply 2 adjacent elements with the filter and add the result
- srcRegFilt32b3 = _mm256_maddubs_epi16(srcRegFilt32b3, secondFilters);
- srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, thirdFilters);
-
- __m256i sum23 = _mm256_adds_epi16(srcRegFilt32b3, srcRegFilt32b2);
- srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1, sum23);
-
- // shift by 6 bit each 16 bit
- srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1, addFilterReg32);
- srcRegFilt32b1_1 = _mm256_srai_epi16(srcRegFilt32b1_1, 6);
-
- // shrink to 8 bit each 16 bits, the first lane contain the first
- // convolve result and the second lane contain the second convolve result
- srcRegFilt32b1_1 =
- _mm256_packus_epi16(srcRegFilt32b1_1, _mm256_setzero_si256());
-
- src_ptr += src_stride;
-
- xx_storeu2_epi64(output_ptr, output_pitch, &srcRegFilt32b1_1);
- output_ptr += dst_stride;
- }
-
- // if the number of strides is odd.
- // process only 8 bytes
- if (i > 0) {
- __m128i srcReg1, srcRegFilt1_1;
- __m128i srcRegFilt2, srcRegFilt3;
-
- srcReg1 = _mm_loadu_si128((const __m128i *)(src_ptr));
-
- // filter the source buffer
- srcRegFilt1_1 = _mm_shuffle_epi8(srcReg1, _mm256_castsi256_si128(filt1Reg));
- srcRegFilt2 = _mm_shuffle_epi8(srcReg1, _mm256_castsi256_si128(filt4Reg));
-
- // multiply 2 adjacent elements with the filter and add the result
- srcRegFilt1_1 =
- _mm_maddubs_epi16(srcRegFilt1_1, _mm256_castsi256_si128(firstFilters));
- srcRegFilt2 =
- _mm_maddubs_epi16(srcRegFilt2, _mm256_castsi256_si128(forthFilters));
-
- // add and saturate the results together
- srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1, srcRegFilt2);
-
- // filter the source buffer
- srcRegFilt3 = _mm_shuffle_epi8(srcReg1, _mm256_castsi256_si128(filt2Reg));
- srcRegFilt2 = _mm_shuffle_epi8(srcReg1, _mm256_castsi256_si128(filt3Reg));
-
- // multiply 2 adjacent elements with the filter and add the result
- srcRegFilt3 =
- _mm_maddubs_epi16(srcRegFilt3, _mm256_castsi256_si128(secondFilters));
- srcRegFilt2 =
- _mm_maddubs_epi16(srcRegFilt2, _mm256_castsi256_si128(thirdFilters));
-
- // add and saturate the results together
- srcRegFilt1_1 =
- _mm_adds_epi16(srcRegFilt1_1, _mm_adds_epi16(srcRegFilt3, srcRegFilt2));
-
- // shift by 6 bit each 16 bit
- srcRegFilt1_1 =
- _mm_adds_epi16(srcRegFilt1_1, _mm256_castsi256_si128(addFilterReg32));
- srcRegFilt1_1 = _mm_srai_epi16(srcRegFilt1_1, 6);
-
- // shrink to 8 bit each 16 bits, the first lane contain the first
- // convolve result and the second lane contain the second convolve
- // result
- srcRegFilt1_1 = _mm_packus_epi16(srcRegFilt1_1, _mm_setzero_si128());
-
- // save 8 bytes
- _mm_storel_epi64((__m128i *)output_ptr, srcRegFilt1_1);
- }
-}
-
-static void aom_filter_block1d16_h4_avx2(
- const uint8_t *src_ptr, ptrdiff_t src_pixels_per_line, uint8_t *output_ptr,
- ptrdiff_t output_pitch, uint32_t output_height, const int16_t *filter) {
- __m128i filtersReg;
- __m256i addFilterReg32, filt2Reg, filt3Reg;
- __m256i secondFilters, thirdFilters;
- __m256i srcRegFilt32b1_1, srcRegFilt32b2_1, srcRegFilt32b2, srcRegFilt32b3;
- __m256i srcReg32b1, srcReg32b2, filtersReg32;
- unsigned int i;
- ptrdiff_t src_stride, dst_stride;
- src_ptr -= 3;
- addFilterReg32 = _mm256_set1_epi16(32);
- filtersReg = _mm_loadu_si128((const __m128i *)filter);
- filtersReg = _mm_srai_epi16(filtersReg, 1);
- // converting the 16 bit (short) to 8 bit (byte) and have the same data
- // in both lanes of 128 bit register.
- filtersReg = _mm_packs_epi16(filtersReg, filtersReg);
- // have the same data in both lanes of a 256 bit register
- filtersReg32 = MM256_BROADCASTSI128_SI256(filtersReg);
-
- // duplicate only the second 16 bits (third and forth byte)
- // across 256 bit register
- secondFilters = _mm256_shuffle_epi8(filtersReg32, _mm256_set1_epi16(0x302u));
- // duplicate only the third 16 bits (fifth and sixth byte)
- // across 256 bit register
- thirdFilters = _mm256_shuffle_epi8(filtersReg32, _mm256_set1_epi16(0x504u));
-
- filt2Reg = _mm256_load_si256((__m256i const *)(filt_global_avx2 + 32));
- filt3Reg = _mm256_load_si256((__m256i const *)(filt_global_avx2 + 32 * 2));
-
- // multiply the size of the source and destination stride by two
- src_stride = src_pixels_per_line << 1;
- dst_stride = output_pitch << 1;
- for (i = output_height; i > 1; i -= 2) {
- // load the 2 strides of source
- srcReg32b1 = xx_loadu2_mi128(src_ptr + src_pixels_per_line, src_ptr);
-
- // filter the source buffer
- srcRegFilt32b3 = _mm256_shuffle_epi8(srcReg32b1, filt2Reg);
- srcRegFilt32b2 = _mm256_shuffle_epi8(srcReg32b1, filt3Reg);
-
- // multiply 2 adjacent elements with the filter and add the result
- srcRegFilt32b3 = _mm256_maddubs_epi16(srcRegFilt32b3, secondFilters);
- srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, thirdFilters);
-
- srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b3, srcRegFilt32b2);
-
- // reading 2 strides of the next 16 bytes
- // (part of it was being read by earlier read)
- srcReg32b2 =
- xx_loadu2_mi128(src_ptr + src_pixels_per_line + 8, src_ptr + 8);
-
- // filter the source buffer
- srcRegFilt32b3 = _mm256_shuffle_epi8(srcReg32b2, filt2Reg);
- srcRegFilt32b2 = _mm256_shuffle_epi8(srcReg32b2, filt3Reg);
-
- // multiply 2 adjacent elements with the filter and add the result
- srcRegFilt32b3 = _mm256_maddubs_epi16(srcRegFilt32b3, secondFilters);
- srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, thirdFilters);
-
- // add and saturate the results together
- srcRegFilt32b2_1 = _mm256_adds_epi16(srcRegFilt32b3, srcRegFilt32b2);
-
- // shift by 6 bit each 16 bit
- srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1, addFilterReg32);
- srcRegFilt32b2_1 = _mm256_adds_epi16(srcRegFilt32b2_1, addFilterReg32);
- srcRegFilt32b1_1 = _mm256_srai_epi16(srcRegFilt32b1_1, 6);
- srcRegFilt32b2_1 = _mm256_srai_epi16(srcRegFilt32b2_1, 6);
-
- // shrink to 8 bit each 16 bits, the first lane contain the first
- // convolve result and the second lane contain the second convolve result
- srcRegFilt32b1_1 = _mm256_packus_epi16(srcRegFilt32b1_1, srcRegFilt32b2_1);
-
- src_ptr += src_stride;
-
- xx_store2_mi128(output_ptr, output_pitch, &srcRegFilt32b1_1);
- output_ptr += dst_stride;
- }
-
- // if the number of strides is odd.
- // process only 16 bytes
- if (i > 0) {
- __m256i srcReg1, srcReg12;
- __m256i srcRegFilt2, srcRegFilt3, srcRegFilt1_1;
-
- srcReg1 = _mm256_loadu_si256((const __m256i *)(src_ptr));
- srcReg12 = _mm256_permute4x64_epi64(srcReg1, 0x94);
-
- // filter the source buffer
- srcRegFilt2 = _mm256_shuffle_epi8(srcReg12, filt2Reg);
- srcRegFilt3 = _mm256_shuffle_epi8(srcReg12, filt3Reg);
-
- // multiply 2 adjacent elements with the filter and add the result
- srcRegFilt2 = _mm256_maddubs_epi16(srcRegFilt2, secondFilters);
- srcRegFilt3 = _mm256_maddubs_epi16(srcRegFilt3, thirdFilters);
-
- // add and saturate the results together
- srcRegFilt1_1 = _mm256_adds_epi16(srcRegFilt2, srcRegFilt3);
-
- // shift by 6 bit each 16 bit
- srcRegFilt1_1 = _mm256_adds_epi16(srcRegFilt1_1, addFilterReg32);
- srcRegFilt1_1 = _mm256_srai_epi16(srcRegFilt1_1, 6);
-
- // shrink to 8 bit each 16 bits, the first lane contain the first
- // convolve result and the second lane contain the second convolve
- // result
- srcRegFilt1_1 = _mm256_packus_epi16(srcRegFilt1_1, srcRegFilt1_1);
- srcRegFilt1_1 = _mm256_permute4x64_epi64(srcRegFilt1_1, 0x8);
-
- // save 16 bytes
- _mm_store_si128((__m128i *)output_ptr,
- _mm256_castsi256_si128(srcRegFilt1_1));
- }
-}
-
-static void aom_filter_block1d16_h8_avx2(
- const uint8_t *src_ptr, ptrdiff_t src_pixels_per_line, uint8_t *output_ptr,
- ptrdiff_t output_pitch, uint32_t output_height, const int16_t *filter) {
- __m128i filtersReg;
- __m256i addFilterReg32, filt1Reg, filt2Reg, filt3Reg, filt4Reg;
- __m256i firstFilters, secondFilters, thirdFilters, forthFilters;
- __m256i srcRegFilt32b1_1, srcRegFilt32b2_1, srcRegFilt32b2, srcRegFilt32b3;
- __m256i srcReg32b1, srcReg32b2, filtersReg32;
- unsigned int i;
- ptrdiff_t src_stride, dst_stride;
- src_ptr -= 3;
- addFilterReg32 = _mm256_set1_epi16(32);
- filtersReg = _mm_loadu_si128((const __m128i *)filter);
- filtersReg = _mm_srai_epi16(filtersReg, 1);
- // converting the 16 bit (short) to 8 bit (byte) and have the same data
- // in both lanes of 128 bit register.
- filtersReg = _mm_packs_epi16(filtersReg, filtersReg);
- // have the same data in both lanes of a 256 bit register
- filtersReg32 = MM256_BROADCASTSI128_SI256(filtersReg);
-
- // duplicate only the first 16 bits (first and second byte)
- // across 256 bit register
- firstFilters = _mm256_shuffle_epi8(filtersReg32, _mm256_set1_epi16(0x100u));
- // duplicate only the second 16 bits (third and forth byte)
- // across 256 bit register
- secondFilters = _mm256_shuffle_epi8(filtersReg32, _mm256_set1_epi16(0x302u));
- // duplicate only the third 16 bits (fifth and sixth byte)
- // across 256 bit register
- thirdFilters = _mm256_shuffle_epi8(filtersReg32, _mm256_set1_epi16(0x504u));
- // duplicate only the forth 16 bits (seventh and eighth byte)
- // across 256 bit register
- forthFilters = _mm256_shuffle_epi8(filtersReg32, _mm256_set1_epi16(0x706u));
-
- filt1Reg = _mm256_load_si256((__m256i const *)filt_global_avx2);
- filt2Reg = _mm256_load_si256((__m256i const *)(filt_global_avx2 + 32));
- filt3Reg = _mm256_load_si256((__m256i const *)(filt_global_avx2 + 32 * 2));
- filt4Reg = _mm256_load_si256((__m256i const *)(filt_global_avx2 + 32 * 3));
-
- // multiple the size of the source and destination stride by two
- src_stride = src_pixels_per_line << 1;
- dst_stride = output_pitch << 1;
- for (i = output_height; i > 1; i -= 2) {
- // load the 2 strides of source
- srcReg32b1 = xx_loadu2_mi128(src_ptr + src_pixels_per_line, src_ptr);
-
- // filter the source buffer
- srcRegFilt32b1_1 = _mm256_shuffle_epi8(srcReg32b1, filt1Reg);
- srcRegFilt32b2 = _mm256_shuffle_epi8(srcReg32b1, filt4Reg);
-
- // multiply 2 adjacent elements with the filter and add the result
- srcRegFilt32b1_1 = _mm256_maddubs_epi16(srcRegFilt32b1_1, firstFilters);
- srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, forthFilters);
-
- // add and saturate the results together
- srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1, srcRegFilt32b2);
-
- // filter the source buffer
- srcRegFilt32b3 = _mm256_shuffle_epi8(srcReg32b1, filt2Reg);
- srcRegFilt32b2 = _mm256_shuffle_epi8(srcReg32b1, filt3Reg);
-
- // multiply 2 adjacent elements with the filter and add the result
- srcRegFilt32b3 = _mm256_maddubs_epi16(srcRegFilt32b3, secondFilters);
- srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, thirdFilters);
-
- __m256i sum23 = _mm256_adds_epi16(srcRegFilt32b3, srcRegFilt32b2);
- srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1, sum23);
-
- // reading 2 strides of the next 16 bytes
- // (part of it was being read by earlier read)
- srcReg32b2 =
- xx_loadu2_mi128(src_ptr + src_pixels_per_line + 8, src_ptr + 8);
-
- // filter the source buffer
- srcRegFilt32b2_1 = _mm256_shuffle_epi8(srcReg32b2, filt1Reg);
- srcRegFilt32b2 = _mm256_shuffle_epi8(srcReg32b2, filt4Reg);
-
- // multiply 2 adjacent elements with the filter and add the result
- srcRegFilt32b2_1 = _mm256_maddubs_epi16(srcRegFilt32b2_1, firstFilters);
- srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, forthFilters);
-
- // add and saturate the results together
- srcRegFilt32b2_1 = _mm256_adds_epi16(srcRegFilt32b2_1, srcRegFilt32b2);
-
- // filter the source buffer
- srcRegFilt32b3 = _mm256_shuffle_epi8(srcReg32b2, filt2Reg);
- srcRegFilt32b2 = _mm256_shuffle_epi8(srcReg32b2, filt3Reg);
-
- // multiply 2 adjacent elements with the filter and add the result
- srcRegFilt32b3 = _mm256_maddubs_epi16(srcRegFilt32b3, secondFilters);
- srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, thirdFilters);
-
- // add and saturate the results together
- srcRegFilt32b2_1 = _mm256_adds_epi16(
- srcRegFilt32b2_1, _mm256_adds_epi16(srcRegFilt32b3, srcRegFilt32b2));
-
- // shift by 6 bit each 16 bit
- srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1, addFilterReg32);
- srcRegFilt32b2_1 = _mm256_adds_epi16(srcRegFilt32b2_1, addFilterReg32);
- srcRegFilt32b1_1 = _mm256_srai_epi16(srcRegFilt32b1_1, 6);
- srcRegFilt32b2_1 = _mm256_srai_epi16(srcRegFilt32b2_1, 6);
-
- // shrink to 8 bit each 16 bits, the first lane contain the first
- // convolve result and the second lane contain the second convolve result
- srcRegFilt32b1_1 = _mm256_packus_epi16(srcRegFilt32b1_1, srcRegFilt32b2_1);
-
- src_ptr += src_stride;
-
- xx_store2_mi128(output_ptr, output_pitch, &srcRegFilt32b1_1);
- output_ptr += dst_stride;
- }
-
- // if the number of strides is odd.
- // process only 16 bytes
- if (i > 0) {
- __m128i srcReg1, srcReg2, srcRegFilt1_1, srcRegFilt2_1;
- __m128i srcRegFilt2, srcRegFilt3;
-
- srcReg1 = _mm_loadu_si128((const __m128i *)(src_ptr));
-
- // filter the source buffer
- srcRegFilt1_1 = _mm_shuffle_epi8(srcReg1, _mm256_castsi256_si128(filt1Reg));
- srcRegFilt2 = _mm_shuffle_epi8(srcReg1, _mm256_castsi256_si128(filt4Reg));
-
- // multiply 2 adjacent elements with the filter and add the result
- srcRegFilt1_1 =
- _mm_maddubs_epi16(srcRegFilt1_1, _mm256_castsi256_si128(firstFilters));
- srcRegFilt2 =
- _mm_maddubs_epi16(srcRegFilt2, _mm256_castsi256_si128(forthFilters));
-
- // add and saturate the results together
- srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1, srcRegFilt2);
-
- // filter the source buffer
- srcRegFilt3 = _mm_shuffle_epi8(srcReg1, _mm256_castsi256_si128(filt2Reg));
- srcRegFilt2 = _mm_shuffle_epi8(srcReg1, _mm256_castsi256_si128(filt3Reg));
-
- // multiply 2 adjacent elements with the filter and add the result
- srcRegFilt3 =
- _mm_maddubs_epi16(srcRegFilt3, _mm256_castsi256_si128(secondFilters));
- srcRegFilt2 =
- _mm_maddubs_epi16(srcRegFilt2, _mm256_castsi256_si128(thirdFilters));
-
- // add and saturate the results together
- srcRegFilt1_1 =
- _mm_adds_epi16(srcRegFilt1_1, _mm_adds_epi16(srcRegFilt3, srcRegFilt2));
-
- // reading the next 16 bytes
- // (part of it was being read by earlier read)
- srcReg2 = _mm_loadu_si128((const __m128i *)(src_ptr + 8));
-
- // filter the source buffer
- srcRegFilt2_1 = _mm_shuffle_epi8(srcReg2, _mm256_castsi256_si128(filt1Reg));
- srcRegFilt2 = _mm_shuffle_epi8(srcReg2, _mm256_castsi256_si128(filt4Reg));
-
- // multiply 2 adjacent elements with the filter and add the result
- srcRegFilt2_1 =
- _mm_maddubs_epi16(srcRegFilt2_1, _mm256_castsi256_si128(firstFilters));
- srcRegFilt2 =
- _mm_maddubs_epi16(srcRegFilt2, _mm256_castsi256_si128(forthFilters));
-
- // add and saturate the results together
- srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1, srcRegFilt2);
-
- // filter the source buffer
- srcRegFilt3 = _mm_shuffle_epi8(srcReg2, _mm256_castsi256_si128(filt2Reg));
- srcRegFilt2 = _mm_shuffle_epi8(srcReg2, _mm256_castsi256_si128(filt3Reg));
-
- // multiply 2 adjacent elements with the filter and add the result
- srcRegFilt3 =
- _mm_maddubs_epi16(srcRegFilt3, _mm256_castsi256_si128(secondFilters));
- srcRegFilt2 =
- _mm_maddubs_epi16(srcRegFilt2, _mm256_castsi256_si128(thirdFilters));
-
- // add and saturate the results together
- srcRegFilt2_1 =
- _mm_adds_epi16(srcRegFilt2_1, _mm_adds_epi16(srcRegFilt3, srcRegFilt2));
-
- // shift by 6 bit each 16 bit
- srcRegFilt1_1 =
- _mm_adds_epi16(srcRegFilt1_1, _mm256_castsi256_si128(addFilterReg32));
- srcRegFilt1_1 = _mm_srai_epi16(srcRegFilt1_1, 6);
-
- srcRegFilt2_1 =
- _mm_adds_epi16(srcRegFilt2_1, _mm256_castsi256_si128(addFilterReg32));
- srcRegFilt2_1 = _mm_srai_epi16(srcRegFilt2_1, 6);
-
- // shrink to 8 bit each 16 bits, the first lane contain the first
- // convolve result and the second lane contain the second convolve
- // result
- srcRegFilt1_1 = _mm_packus_epi16(srcRegFilt1_1, srcRegFilt2_1);
-
- // save 16 bytes
- _mm_store_si128((__m128i *)output_ptr, srcRegFilt1_1);
- }
-}
-
-static void aom_filter_block1d8_v4_avx2(
- const uint8_t *src_ptr, ptrdiff_t src_pitch, uint8_t *output_ptr,
- ptrdiff_t out_pitch, uint32_t output_height, const int16_t *filter) {
- __m128i filtersReg;
- __m256i filtersReg32, addFilterReg32;
- __m256i srcReg23, srcReg4x, srcReg34, srcReg5x, srcReg45, srcReg6x, srcReg56;
- __m256i srcReg23_34_lo, srcReg45_56_lo;
- __m256i resReg23_34_lo, resReg45_56_lo;
- __m256i resReglo, resReg;
- __m256i secondFilters, thirdFilters;
- unsigned int i;
- ptrdiff_t src_stride, dst_stride;
-
- addFilterReg32 = _mm256_set1_epi16(32);
- filtersReg = _mm_loadu_si128((const __m128i *)filter);
- // converting the 16 bit (short) to 8 bit (byte) and have the
- // same data in both lanes of 128 bit register.
- filtersReg = _mm_srai_epi16(filtersReg, 1);
- filtersReg = _mm_packs_epi16(filtersReg, filtersReg);
- // have the same data in both lanes of a 256 bit register
- filtersReg32 = MM256_BROADCASTSI128_SI256(filtersReg);
-
- // duplicate only the second 16 bits (third and forth byte)
- // across 256 bit register
- secondFilters = _mm256_shuffle_epi8(filtersReg32, _mm256_set1_epi16(0x302u));
- // duplicate only the third 16 bits (fifth and sixth byte)
- // across 256 bit register
- thirdFilters = _mm256_shuffle_epi8(filtersReg32, _mm256_set1_epi16(0x504u));
-
- // multiple the size of the source and destination stride by two
- src_stride = src_pitch << 1;
- dst_stride = out_pitch << 1;
-
- srcReg23 = xx_loadu2_epi64(src_ptr + src_pitch * 3, src_ptr + src_pitch * 2);
- srcReg4x = _mm256_castsi128_si256(
- _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 4)));
-
- // have consecutive loads on the same 256 register
- srcReg34 = _mm256_permute2x128_si256(srcReg23, srcReg4x, 0x21);
-
- srcReg23_34_lo = _mm256_unpacklo_epi8(srcReg23, srcReg34);
-
- for (i = output_height; i > 1; i -= 2) {
- // load the last 2 loads of 16 bytes and have every two
- // consecutive loads in the same 256 bit register
- srcReg5x = _mm256_castsi128_si256(
- _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 5)));
- srcReg45 =
- _mm256_inserti128_si256(srcReg4x, _mm256_castsi256_si128(srcReg5x), 1);
-
- srcReg6x = _mm256_castsi128_si256(
- _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 6)));
- srcReg56 =
- _mm256_inserti128_si256(srcReg5x, _mm256_castsi256_si128(srcReg6x), 1);
-
- // merge every two consecutive registers
- srcReg45_56_lo = _mm256_unpacklo_epi8(srcReg45, srcReg56);
-
- // multiply 2 adjacent elements with the filter and add the result
- resReg23_34_lo = _mm256_maddubs_epi16(srcReg23_34_lo, secondFilters);
- resReg45_56_lo = _mm256_maddubs_epi16(srcReg45_56_lo, thirdFilters);
-
- // add and saturate the results together
- resReglo = _mm256_adds_epi16(resReg23_34_lo, resReg45_56_lo);
-
- // shift by 6 bit each 16 bit
- resReglo = _mm256_adds_epi16(resReglo, addFilterReg32);
- resReglo = _mm256_srai_epi16(resReglo, 6);
-
- // shrink to 8 bit each 16 bits, the first lane contain the first
- // convolve result and the second lane contain the second convolve
- // result
- resReg = _mm256_packus_epi16(resReglo, resReglo);
-
- src_ptr += src_stride;
-
- xx_storeu2_epi64(output_ptr, out_pitch, &resReg);
-
- output_ptr += dst_stride;
-
- // save part of the registers for next strides
- srcReg23_34_lo = srcReg45_56_lo;
- srcReg4x = srcReg6x;
- }
-}
-
-static void aom_filter_block1d8_v8_avx2(
- const uint8_t *src_ptr, ptrdiff_t src_pitch, uint8_t *output_ptr,
- ptrdiff_t out_pitch, uint32_t output_height, const int16_t *filter) {
- __m128i filtersReg;
- __m256i addFilterReg32;
- __m256i srcReg32b1, srcReg32b2, srcReg32b3, srcReg32b4, srcReg32b5;
- __m256i srcReg32b6, srcReg32b7, srcReg32b8, srcReg32b9, srcReg32b10;
- __m256i srcReg32b11, srcReg32b12, filtersReg32;
- __m256i firstFilters, secondFilters, thirdFilters, forthFilters;
- unsigned int i;
- ptrdiff_t src_stride, dst_stride;
-
- addFilterReg32 = _mm256_set1_epi16(32);
- filtersReg = _mm_loadu_si128((const __m128i *)filter);
- // converting the 16 bit (short) to 8 bit (byte) and have the
- // same data in both lanes of 128 bit register.
- filtersReg = _mm_srai_epi16(filtersReg, 1);
- filtersReg = _mm_packs_epi16(filtersReg, filtersReg);
- // have the same data in both lanes of a 256 bit register
- filtersReg32 = MM256_BROADCASTSI128_SI256(filtersReg);
-
- // duplicate only the first 16 bits (first and second byte)
- // across 256 bit register
- firstFilters = _mm256_shuffle_epi8(filtersReg32, _mm256_set1_epi16(0x100u));
- // duplicate only the second 16 bits (third and forth byte)
- // across 256 bit register
- secondFilters = _mm256_shuffle_epi8(filtersReg32, _mm256_set1_epi16(0x302u));
- // duplicate only the third 16 bits (fifth and sixth byte)
- // across 256 bit register
- thirdFilters = _mm256_shuffle_epi8(filtersReg32, _mm256_set1_epi16(0x504u));
- // duplicate only the forth 16 bits (seventh and eighth byte)
- // across 256 bit register
- forthFilters = _mm256_shuffle_epi8(filtersReg32, _mm256_set1_epi16(0x706u));
-
- // multiple the size of the source and destination stride by two
- src_stride = src_pitch << 1;
- dst_stride = out_pitch << 1;
-
- // load 16 bytes 7 times in stride of src_pitch
- srcReg32b1 = xx_loadu2_epi64(src_ptr + src_pitch, src_ptr);
- srcReg32b3 =
- xx_loadu2_epi64(src_ptr + src_pitch * 3, src_ptr + src_pitch * 2);
- srcReg32b5 =
- xx_loadu2_epi64(src_ptr + src_pitch * 5, src_ptr + src_pitch * 4);
- srcReg32b7 = _mm256_castsi128_si256(
- _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 6)));
-
- // have each consecutive loads on the same 256 register
- srcReg32b2 = _mm256_permute2x128_si256(srcReg32b1, srcReg32b3, 0x21);
- srcReg32b4 = _mm256_permute2x128_si256(srcReg32b3, srcReg32b5, 0x21);
- srcReg32b6 = _mm256_permute2x128_si256(srcReg32b5, srcReg32b7, 0x21);
- // merge every two consecutive registers except the last one
- srcReg32b10 = _mm256_unpacklo_epi8(srcReg32b1, srcReg32b2);
- srcReg32b11 = _mm256_unpacklo_epi8(srcReg32b3, srcReg32b4);
- srcReg32b2 = _mm256_unpacklo_epi8(srcReg32b5, srcReg32b6);
-
- for (i = output_height; i > 1; i -= 2) {
- // load the last 2 loads of 16 bytes and have every two
- // consecutive loads in the same 256 bit register
- srcReg32b8 = _mm256_castsi128_si256(
- _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 7)));
- srcReg32b7 = _mm256_inserti128_si256(srcReg32b7,
- _mm256_castsi256_si128(srcReg32b8), 1);
- srcReg32b9 = _mm256_castsi128_si256(
- _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 8)));
- srcReg32b8 = _mm256_inserti128_si256(srcReg32b8,
- _mm256_castsi256_si128(srcReg32b9), 1);
-
- // merge every two consecutive registers
- // save
- srcReg32b4 = _mm256_unpacklo_epi8(srcReg32b7, srcReg32b8);
-
- // multiply 2 adjacent elements with the filter and add the result
- srcReg32b10 = _mm256_maddubs_epi16(srcReg32b10, firstFilters);
- srcReg32b6 = _mm256_maddubs_epi16(srcReg32b4, forthFilters);
-
- // add and saturate the results together
- srcReg32b10 = _mm256_adds_epi16(srcReg32b10, srcReg32b6);
-
- // multiply 2 adjacent elements with the filter and add the result
- srcReg32b8 = _mm256_maddubs_epi16(srcReg32b11, secondFilters);
- srcReg32b12 = _mm256_maddubs_epi16(srcReg32b2, thirdFilters);
-
- // add and saturate the results together
- srcReg32b10 = _mm256_adds_epi16(srcReg32b10,
- _mm256_adds_epi16(srcReg32b8, srcReg32b12));
-
- // shift by 6 bit each 16 bit
- srcReg32b10 = _mm256_adds_epi16(srcReg32b10, addFilterReg32);
- srcReg32b10 = _mm256_srai_epi16(srcReg32b10, 6);
-
- // shrink to 8 bit each 16 bits, the first lane contain the first
- // convolve result and the second lane contain the second convolve
- // result
- srcReg32b1 = _mm256_packus_epi16(srcReg32b10, _mm256_setzero_si256());
-
- src_ptr += src_stride;
-
- xx_storeu2_epi64(output_ptr, out_pitch, &srcReg32b1);
-
- output_ptr += dst_stride;
-
- // save part of the registers for next strides
- srcReg32b10 = srcReg32b11;
- srcReg32b11 = srcReg32b2;
- srcReg32b2 = srcReg32b4;
- srcReg32b7 = srcReg32b9;
- }
- if (i > 0) {
- __m128i srcRegFilt1, srcRegFilt4, srcRegFilt6, srcRegFilt8;
- // load the last 16 bytes
- srcRegFilt8 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 7));
-
- // merge the last 2 results together
- srcRegFilt4 =
- _mm_unpacklo_epi8(_mm256_castsi256_si128(srcReg32b7), srcRegFilt8);
-
- // multiply 2 adjacent elements with the filter and add the result
- srcRegFilt1 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b10),
- _mm256_castsi256_si128(firstFilters));
- srcRegFilt4 =
- _mm_maddubs_epi16(srcRegFilt4, _mm256_castsi256_si128(forthFilters));
-
- // add and saturate the results together
- srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt4);
-
- // multiply 2 adjacent elements with the filter and add the result
- srcRegFilt4 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b11),
- _mm256_castsi256_si128(secondFilters));
-
- // multiply 2 adjacent elements with the filter and add the result
- srcRegFilt6 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b2),
- _mm256_castsi256_si128(thirdFilters));
-
- // add and saturate the results together
- srcRegFilt1 =
- _mm_adds_epi16(srcRegFilt1, _mm_adds_epi16(srcRegFilt4, srcRegFilt6));
-
- // shift by 6 bit each 16 bit
- srcRegFilt1 =
- _mm_adds_epi16(srcRegFilt1, _mm256_castsi256_si128(addFilterReg32));
- srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 6);
-
- // shrink to 8 bit each 16 bits, the first lane contain the first
- // convolve result and the second lane contain the second convolve result
- srcRegFilt1 = _mm_packus_epi16(srcRegFilt1, _mm_setzero_si128());
-
- // save 8 bytes
- _mm_storel_epi64((__m128i *)output_ptr, srcRegFilt1);
- }
-}
-
-static void aom_filter_block1d16_v4_avx2(
- const uint8_t *src_ptr, ptrdiff_t src_pitch, uint8_t *output_ptr,
- ptrdiff_t out_pitch, uint32_t output_height, const int16_t *filter) {
- __m128i filtersReg;
- __m256i filtersReg32, addFilterReg32;
- __m256i srcReg23, srcReg4x, srcReg34, srcReg5x, srcReg45, srcReg6x, srcReg56;
- __m256i srcReg23_34_lo, srcReg23_34_hi, srcReg45_56_lo, srcReg45_56_hi;
- __m256i resReg23_34_lo, resReg23_34_hi, resReg45_56_lo, resReg45_56_hi;
- __m256i resReglo, resReghi, resReg;
- __m256i secondFilters, thirdFilters;
- unsigned int i;
- ptrdiff_t src_stride, dst_stride;
-
- addFilterReg32 = _mm256_set1_epi16(32);
- filtersReg = _mm_loadu_si128((const __m128i *)filter);
- // converting the 16 bit (short) to 8 bit (byte) and have the
- // same data in both lanes of 128 bit register.
- filtersReg = _mm_srai_epi16(filtersReg, 1);
- filtersReg = _mm_packs_epi16(filtersReg, filtersReg);
- // have the same data in both lanes of a 256 bit register
- filtersReg32 = MM256_BROADCASTSI128_SI256(filtersReg);
-
- // duplicate only the second 16 bits (third and forth byte)
- // across 256 bit register
- secondFilters = _mm256_shuffle_epi8(filtersReg32, _mm256_set1_epi16(0x302u));
- // duplicate only the third 16 bits (fifth and sixth byte)
- // across 256 bit register
- thirdFilters = _mm256_shuffle_epi8(filtersReg32, _mm256_set1_epi16(0x504u));
-
- // multiple the size of the source and destination stride by two
- src_stride = src_pitch << 1;
- dst_stride = out_pitch << 1;
-
- srcReg23 = xx_loadu2_mi128(src_ptr + src_pitch * 3, src_ptr + src_pitch * 2);
- srcReg4x = _mm256_castsi128_si256(
- _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 4)));
-
- // have consecutive loads on the same 256 register
- srcReg34 = _mm256_permute2x128_si256(srcReg23, srcReg4x, 0x21);
-
- srcReg23_34_lo = _mm256_unpacklo_epi8(srcReg23, srcReg34);
- srcReg23_34_hi = _mm256_unpackhi_epi8(srcReg23, srcReg34);
-
- for (i = output_height; i > 1; i -= 2) {
- // load the last 2 loads of 16 bytes and have every two
- // consecutive loads in the same 256 bit register
- srcReg5x = _mm256_castsi128_si256(
- _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 5)));
- srcReg45 =
- _mm256_inserti128_si256(srcReg4x, _mm256_castsi256_si128(srcReg5x), 1);
-
- srcReg6x = _mm256_castsi128_si256(
- _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 6)));
- srcReg56 =
- _mm256_inserti128_si256(srcReg5x, _mm256_castsi256_si128(srcReg6x), 1);
-
- // merge every two consecutive registers
- srcReg45_56_lo = _mm256_unpacklo_epi8(srcReg45, srcReg56);
- srcReg45_56_hi = _mm256_unpackhi_epi8(srcReg45, srcReg56);
-
- // multiply 2 adjacent elements with the filter and add the result
- resReg23_34_lo = _mm256_maddubs_epi16(srcReg23_34_lo, secondFilters);
- resReg45_56_lo = _mm256_maddubs_epi16(srcReg45_56_lo, thirdFilters);
-
- // add and saturate the results together
- resReglo = _mm256_adds_epi16(resReg23_34_lo, resReg45_56_lo);
-
- // multiply 2 adjacent elements with the filter and add the result
- resReg23_34_hi = _mm256_maddubs_epi16(srcReg23_34_hi, secondFilters);
- resReg45_56_hi = _mm256_maddubs_epi16(srcReg45_56_hi, thirdFilters);
-
- // add and saturate the results together
- resReghi = _mm256_adds_epi16(resReg23_34_hi, resReg45_56_hi);
-
- // shift by 6 bit each 16 bit
- resReglo = _mm256_adds_epi16(resReglo, addFilterReg32);
- resReghi = _mm256_adds_epi16(resReghi, addFilterReg32);
- resReglo = _mm256_srai_epi16(resReglo, 6);
- resReghi = _mm256_srai_epi16(resReghi, 6);
-
- // shrink to 8 bit each 16 bits, the first lane contain the first
- // convolve result and the second lane contain the second convolve
- // result
- resReg = _mm256_packus_epi16(resReglo, resReghi);
-
- src_ptr += src_stride;
-
- xx_store2_mi128(output_ptr, out_pitch, &resReg);
-
- output_ptr += dst_stride;
-
- // save part of the registers for next strides
- srcReg23_34_lo = srcReg45_56_lo;
- srcReg23_34_hi = srcReg45_56_hi;
- srcReg4x = srcReg6x;
- }
-}
-
-static void aom_filter_block1d16_v8_avx2(
- const uint8_t *src_ptr, ptrdiff_t src_pitch, uint8_t *output_ptr,
- ptrdiff_t out_pitch, uint32_t output_height, const int16_t *filter) {
- __m128i filtersReg;
- __m256i addFilterReg32;
- __m256i srcReg32b1, srcReg32b2, srcReg32b3, srcReg32b4, srcReg32b5;
- __m256i srcReg32b6, srcReg32b7, srcReg32b8, srcReg32b9, srcReg32b10;
- __m256i srcReg32b11, srcReg32b12, filtersReg32;
- __m256i firstFilters, secondFilters, thirdFilters, forthFilters;
- unsigned int i;
- ptrdiff_t src_stride, dst_stride;
-
- addFilterReg32 = _mm256_set1_epi16(32);
- filtersReg = _mm_loadu_si128((const __m128i *)filter);
- // converting the 16 bit (short) to 8 bit (byte) and have the
- // same data in both lanes of 128 bit register.
- filtersReg = _mm_srai_epi16(filtersReg, 1);
- filtersReg = _mm_packs_epi16(filtersReg, filtersReg);
- // have the same data in both lanes of a 256 bit register
- filtersReg32 = MM256_BROADCASTSI128_SI256(filtersReg);
-
- // duplicate only the first 16 bits (first and second byte)
- // across 256 bit register
- firstFilters = _mm256_shuffle_epi8(filtersReg32, _mm256_set1_epi16(0x100u));
- // duplicate only the second 16 bits (third and forth byte)
- // across 256 bit register
- secondFilters = _mm256_shuffle_epi8(filtersReg32, _mm256_set1_epi16(0x302u));
- // duplicate only the third 16 bits (fifth and sixth byte)
- // across 256 bit register
- thirdFilters = _mm256_shuffle_epi8(filtersReg32, _mm256_set1_epi16(0x504u));
- // duplicate only the forth 16 bits (seventh and eighth byte)
- // across 256 bit register
- forthFilters = _mm256_shuffle_epi8(filtersReg32, _mm256_set1_epi16(0x706u));
-
- // multiple the size of the source and destination stride by two
- src_stride = src_pitch << 1;
- dst_stride = out_pitch << 1;
-
- // load 16 bytes 7 times in stride of src_pitch
- srcReg32b1 = xx_loadu2_mi128(src_ptr + src_pitch, src_ptr);
- srcReg32b3 =
- xx_loadu2_mi128(src_ptr + src_pitch * 3, src_ptr + src_pitch * 2);
- srcReg32b5 =
- xx_loadu2_mi128(src_ptr + src_pitch * 5, src_ptr + src_pitch * 4);
- srcReg32b7 = _mm256_castsi128_si256(
- _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 6)));
-
- // have each consecutive loads on the same 256 register
- srcReg32b2 = _mm256_permute2x128_si256(srcReg32b1, srcReg32b3, 0x21);
- srcReg32b4 = _mm256_permute2x128_si256(srcReg32b3, srcReg32b5, 0x21);
- srcReg32b6 = _mm256_permute2x128_si256(srcReg32b5, srcReg32b7, 0x21);
- // merge every two consecutive registers except the last one
- srcReg32b10 = _mm256_unpacklo_epi8(srcReg32b1, srcReg32b2);
- srcReg32b1 = _mm256_unpackhi_epi8(srcReg32b1, srcReg32b2);
-
- // save
- srcReg32b11 = _mm256_unpacklo_epi8(srcReg32b3, srcReg32b4);
- srcReg32b3 = _mm256_unpackhi_epi8(srcReg32b3, srcReg32b4);
- srcReg32b2 = _mm256_unpacklo_epi8(srcReg32b5, srcReg32b6);
- srcReg32b5 = _mm256_unpackhi_epi8(srcReg32b5, srcReg32b6);
-
- for (i = output_height; i > 1; i -= 2) {
- // load the last 2 loads of 16 bytes and have every two
- // consecutive loads in the same 256 bit register
- srcReg32b8 = _mm256_castsi128_si256(
- _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 7)));
- srcReg32b7 = _mm256_inserti128_si256(srcReg32b7,
- _mm256_castsi256_si128(srcReg32b8), 1);
- srcReg32b9 = _mm256_castsi128_si256(
- _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 8)));
- srcReg32b8 = _mm256_inserti128_si256(srcReg32b8,
- _mm256_castsi256_si128(srcReg32b9), 1);
-
- // merge every two consecutive registers
- // save
- srcReg32b4 = _mm256_unpacklo_epi8(srcReg32b7, srcReg32b8);
- srcReg32b7 = _mm256_unpackhi_epi8(srcReg32b7, srcReg32b8);
-
- // multiply 2 adjacent elements with the filter and add the result
- srcReg32b10 = _mm256_maddubs_epi16(srcReg32b10, firstFilters);
- srcReg32b6 = _mm256_maddubs_epi16(srcReg32b4, forthFilters);
-
- // add and saturate the results together
- srcReg32b10 = _mm256_adds_epi16(srcReg32b10, srcReg32b6);
-
- // multiply 2 adjacent elements with the filter and add the result
- srcReg32b8 = _mm256_maddubs_epi16(srcReg32b11, secondFilters);
- srcReg32b12 = _mm256_maddubs_epi16(srcReg32b2, thirdFilters);
-
- // add and saturate the results together
- srcReg32b10 = _mm256_adds_epi16(srcReg32b10,
- _mm256_adds_epi16(srcReg32b8, srcReg32b12));
-
- // multiply 2 adjacent elements with the filter and add the result
- srcReg32b1 = _mm256_maddubs_epi16(srcReg32b1, firstFilters);
- srcReg32b6 = _mm256_maddubs_epi16(srcReg32b7, forthFilters);
-
- srcReg32b1 = _mm256_adds_epi16(srcReg32b1, srcReg32b6);
-
- // multiply 2 adjacent elements with the filter and add the result
- srcReg32b8 = _mm256_maddubs_epi16(srcReg32b3, secondFilters);
- srcReg32b12 = _mm256_maddubs_epi16(srcReg32b5, thirdFilters);
-
- // add and saturate the results together
- srcReg32b1 = _mm256_adds_epi16(srcReg32b1,
- _mm256_adds_epi16(srcReg32b8, srcReg32b12));
-
- // shift by 6 bit each 16 bit
- srcReg32b10 = _mm256_adds_epi16(srcReg32b10, addFilterReg32);
- srcReg32b1 = _mm256_adds_epi16(srcReg32b1, addFilterReg32);
- srcReg32b10 = _mm256_srai_epi16(srcReg32b10, 6);
- srcReg32b1 = _mm256_srai_epi16(srcReg32b1, 6);
-
- // shrink to 8 bit each 16 bits, the first lane contain the first
- // convolve result and the second lane contain the second convolve
- // result
- srcReg32b1 = _mm256_packus_epi16(srcReg32b10, srcReg32b1);
-
- src_ptr += src_stride;
-
- xx_store2_mi128(output_ptr, out_pitch, &srcReg32b1);
-
- output_ptr += dst_stride;
-
- // save part of the registers for next strides
- srcReg32b10 = srcReg32b11;
- srcReg32b1 = srcReg32b3;
- srcReg32b11 = srcReg32b2;
- srcReg32b3 = srcReg32b5;
- srcReg32b2 = srcReg32b4;
- srcReg32b5 = srcReg32b7;
- srcReg32b7 = srcReg32b9;
- }
- if (i > 0) {
- __m128i srcRegFilt1, srcRegFilt3, srcRegFilt4, srcRegFilt5;
- __m128i srcRegFilt6, srcRegFilt7, srcRegFilt8;
- // load the last 16 bytes
- srcRegFilt8 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 7));
-
- // merge the last 2 results together
- srcRegFilt4 =
- _mm_unpacklo_epi8(_mm256_castsi256_si128(srcReg32b7), srcRegFilt8);
- srcRegFilt7 =
- _mm_unpackhi_epi8(_mm256_castsi256_si128(srcReg32b7), srcRegFilt8);
-
- // multiply 2 adjacent elements with the filter and add the result
- srcRegFilt1 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b10),
- _mm256_castsi256_si128(firstFilters));
- srcRegFilt4 =
- _mm_maddubs_epi16(srcRegFilt4, _mm256_castsi256_si128(forthFilters));
- srcRegFilt3 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b1),
- _mm256_castsi256_si128(firstFilters));
- srcRegFilt7 =
- _mm_maddubs_epi16(srcRegFilt7, _mm256_castsi256_si128(forthFilters));
-
- // add and saturate the results together
- srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt4);
- srcRegFilt3 = _mm_adds_epi16(srcRegFilt3, srcRegFilt7);
-
- // multiply 2 adjacent elements with the filter and add the result
- srcRegFilt4 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b11),
- _mm256_castsi256_si128(secondFilters));
- srcRegFilt5 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b3),
- _mm256_castsi256_si128(secondFilters));
-
- // multiply 2 adjacent elements with the filter and add the result
- srcRegFilt6 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b2),
- _mm256_castsi256_si128(thirdFilters));
- srcRegFilt7 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b5),
- _mm256_castsi256_si128(thirdFilters));
-
- // add and saturate the results together
- srcRegFilt1 =
- _mm_adds_epi16(srcRegFilt1, _mm_adds_epi16(srcRegFilt4, srcRegFilt6));
- srcRegFilt3 =
- _mm_adds_epi16(srcRegFilt3, _mm_adds_epi16(srcRegFilt5, srcRegFilt7));
-
- // shift by 6 bit each 16 bit
- srcRegFilt1 =
- _mm_adds_epi16(srcRegFilt1, _mm256_castsi256_si128(addFilterReg32));
- srcRegFilt3 =
- _mm_adds_epi16(srcRegFilt3, _mm256_castsi256_si128(addFilterReg32));
- srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 6);
- srcRegFilt3 = _mm_srai_epi16(srcRegFilt3, 6);
-
- // shrink to 8 bit each 16 bits, the first lane contain the first
- // convolve result and the second lane contain the second convolve
- // result
- srcRegFilt1 = _mm_packus_epi16(srcRegFilt1, srcRegFilt3);
-
- // save 16 bytes
- _mm_store_si128((__m128i *)output_ptr, srcRegFilt1);
- }
-}
-
-static void aom_filter_block1d4_v4_avx2(
- const uint8_t *src_ptr, ptrdiff_t src_pitch, uint8_t *output_ptr,
- ptrdiff_t out_pitch, uint32_t output_height, const int16_t *filter) {
- __m128i filtersReg;
- __m256i filtersReg32, addFilterReg32;
- __m256i srcReg23, srcReg4x, srcReg34, srcReg5x, srcReg45, srcReg6x, srcReg56;
- __m256i srcReg23_34_lo, srcReg45_56_lo;
- __m256i srcReg2345_3456_lo;
- __m256i resReglo, resReg;
- __m256i firstFilters;
- unsigned int i;
- ptrdiff_t src_stride, dst_stride;
-
- addFilterReg32 = _mm256_set1_epi16(32);
- filtersReg = _mm_loadu_si128((const __m128i *)filter);
- // converting the 16 bit (short) to 8 bit (byte) and have the
- // same data in both lanes of 128 bit register.
- filtersReg = _mm_srai_epi16(filtersReg, 1);
- filtersReg = _mm_packs_epi16(filtersReg, filtersReg);
- // have the same data in both lanes of a 256 bit register
- filtersReg32 = MM256_BROADCASTSI128_SI256(filtersReg);
-
- firstFilters =
- _mm256_shuffle_epi8(filtersReg32, _mm256_set1_epi32(0x5040302u));
-
- // multiple the size of the source and destination stride by two
- src_stride = src_pitch << 1;
- dst_stride = out_pitch << 1;
-
- srcReg23 = xx_loadu2_epi64(src_ptr + src_pitch * 3, src_ptr + src_pitch * 2);
- srcReg4x = _mm256_castsi128_si256(
- _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 4)));
-
- // have consecutive loads on the same 256 register
- srcReg34 = _mm256_permute2x128_si256(srcReg23, srcReg4x, 0x21);
-
- srcReg23_34_lo = _mm256_unpacklo_epi8(srcReg23, srcReg34);
-
- for (i = output_height; i > 1; i -= 2) {
- // load the last 2 loads of 16 bytes and have every two
- // consecutive loads in the same 256 bit register
- srcReg5x = _mm256_castsi128_si256(
- _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 5)));
- srcReg45 =
- _mm256_inserti128_si256(srcReg4x, _mm256_castsi256_si128(srcReg5x), 1);
-
- srcReg6x = _mm256_castsi128_si256(
- _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 6)));
- srcReg56 =
- _mm256_inserti128_si256(srcReg5x, _mm256_castsi256_si128(srcReg6x), 1);
-
- // merge every two consecutive registers
- srcReg45_56_lo = _mm256_unpacklo_epi8(srcReg45, srcReg56);
-
- srcReg2345_3456_lo = _mm256_unpacklo_epi16(srcReg23_34_lo, srcReg45_56_lo);
-
- // multiply 2 adjacent elements with the filter and add the result
- resReglo = _mm256_maddubs_epi16(srcReg2345_3456_lo, firstFilters);
-
- resReglo = _mm256_hadds_epi16(resReglo, _mm256_setzero_si256());
-
- // shift by 6 bit each 16 bit
- resReglo = _mm256_adds_epi16(resReglo, addFilterReg32);
- resReglo = _mm256_srai_epi16(resReglo, 6);
-
- // shrink to 8 bit each 16 bits, the first lane contain the first
- // convolve result and the second lane contain the second convolve
- // result
- resReg = _mm256_packus_epi16(resReglo, resReglo);
-
- src_ptr += src_stride;
-
- xx_storeu2_epi32(output_ptr, out_pitch, &resReg);
-
- output_ptr += dst_stride;
-
- // save part of the registers for next strides
- srcReg23_34_lo = srcReg45_56_lo;
- srcReg4x = srcReg6x;
- }
-}
-
-#if HAVE_AVX2 && HAVE_SSSE3
-filter8_1dfunction aom_filter_block1d4_v8_ssse3;
-filter8_1dfunction aom_filter_block1d16_v2_ssse3;
-filter8_1dfunction aom_filter_block1d16_h2_ssse3;
-filter8_1dfunction aom_filter_block1d8_v2_ssse3;
-filter8_1dfunction aom_filter_block1d8_h2_ssse3;
-filter8_1dfunction aom_filter_block1d4_v2_ssse3;
-filter8_1dfunction aom_filter_block1d4_h2_ssse3;
-#define aom_filter_block1d4_v8_avx2 aom_filter_block1d4_v8_ssse3
-#define aom_filter_block1d16_v2_avx2 aom_filter_block1d16_v2_ssse3
-#define aom_filter_block1d16_h2_avx2 aom_filter_block1d16_h2_ssse3
-#define aom_filter_block1d8_v2_avx2 aom_filter_block1d8_v2_ssse3
-#define aom_filter_block1d8_h2_avx2 aom_filter_block1d8_h2_ssse3
-#define aom_filter_block1d4_v2_avx2 aom_filter_block1d4_v2_ssse3
-#define aom_filter_block1d4_h2_avx2 aom_filter_block1d4_h2_ssse3
-// void aom_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride,
-// uint8_t *dst, ptrdiff_t dst_stride,
-// const int16_t *filter_x, int x_step_q4,
-// const int16_t *filter_y, int y_step_q4,
-// int w, int h);
-// void aom_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride,
-// uint8_t *dst, ptrdiff_t dst_stride,
-// const int16_t *filter_x, int x_step_q4,
-// const int16_t *filter_y, int y_step_q4,
-// int w, int h);
-FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , avx2);
-FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , avx2);
-
-#endif // HAVE_AX2 && HAVE_SSSE3
diff --git a/third_party/aom/aom_dsp/x86/aom_subpixel_8t_intrin_ssse3.c b/third_party/aom/aom_dsp/x86/aom_subpixel_8t_intrin_ssse3.c
deleted file mode 100644
index 325a21b76..000000000
--- a/third_party/aom/aom_dsp/x86/aom_subpixel_8t_intrin_ssse3.c
+++ /dev/null
@@ -1,315 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <tmmintrin.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/aom_filter.h"
-#include "aom_dsp/x86/convolve.h"
-#include "aom_mem/aom_mem.h"
-#include "aom_ports/mem.h"
-#include "aom_ports/emmintrin_compat.h"
-
-// filters only for the 4_h8 convolution
-DECLARE_ALIGNED(16, static const uint8_t, filt1_4_h8[16]) = {
- 0, 1, 1, 2, 2, 3, 3, 4, 2, 3, 3, 4, 4, 5, 5, 6
-};
-
-DECLARE_ALIGNED(16, static const uint8_t, filt2_4_h8[16]) = {
- 4, 5, 5, 6, 6, 7, 7, 8, 6, 7, 7, 8, 8, 9, 9, 10
-};
-
-// filters for 8_h8 and 16_h8
-DECLARE_ALIGNED(16, static const uint8_t, filt1_global[16]) = {
- 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8
-};
-
-DECLARE_ALIGNED(16, static const uint8_t, filt2_global[16]) = {
- 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10
-};
-
-DECLARE_ALIGNED(16, static const uint8_t, filt3_global[16]) = {
- 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12
-};
-
-DECLARE_ALIGNED(16, static const uint8_t, filt4_global[16]) = {
- 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14
-};
-
-// These are reused by the avx2 intrinsics.
-filter8_1dfunction aom_filter_block1d8_v8_intrin_ssse3;
-filter8_1dfunction aom_filter_block1d8_h8_intrin_ssse3;
-filter8_1dfunction aom_filter_block1d4_h8_intrin_ssse3;
-
-void aom_filter_block1d4_h8_intrin_ssse3(
- const uint8_t *src_ptr, ptrdiff_t src_pixels_per_line, uint8_t *output_ptr,
- ptrdiff_t output_pitch, uint32_t output_height, const int16_t *filter) {
- __m128i firstFilters, secondFilters, shuffle1, shuffle2;
- __m128i srcRegFilt1, srcRegFilt2, srcRegFilt3, srcRegFilt4;
- __m128i addFilterReg64, filtersReg, srcReg, minReg;
- unsigned int i;
-
- // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64
- addFilterReg64 = _mm_set1_epi32((int)0x0400040u);
- filtersReg = _mm_loadu_si128((const __m128i *)filter);
- // converting the 16 bit (short) to 8 bit (byte) and have the same data
- // in both lanes of 128 bit register.
- filtersReg = _mm_packs_epi16(filtersReg, filtersReg);
-
- // duplicate only the first 16 bits in the filter into the first lane
- firstFilters = _mm_shufflelo_epi16(filtersReg, 0);
- // duplicate only the third 16 bit in the filter into the first lane
- secondFilters = _mm_shufflelo_epi16(filtersReg, 0xAAu);
- // duplicate only the seconds 16 bits in the filter into the second lane
- // firstFilters: k0 k1 k0 k1 k0 k1 k0 k1 k2 k3 k2 k3 k2 k3 k2 k3
- firstFilters = _mm_shufflehi_epi16(firstFilters, 0x55u);
- // duplicate only the forth 16 bits in the filter into the second lane
- // secondFilters: k4 k5 k4 k5 k4 k5 k4 k5 k6 k7 k6 k7 k6 k7 k6 k7
- secondFilters = _mm_shufflehi_epi16(secondFilters, 0xFFu);
-
- // loading the local filters
- shuffle1 = _mm_load_si128((__m128i const *)filt1_4_h8);
- shuffle2 = _mm_load_si128((__m128i const *)filt2_4_h8);
-
- for (i = 0; i < output_height; i++) {
- srcReg = _mm_loadu_si128((const __m128i *)(src_ptr - 3));
-
- // filter the source buffer
- srcRegFilt1 = _mm_shuffle_epi8(srcReg, shuffle1);
- srcRegFilt2 = _mm_shuffle_epi8(srcReg, shuffle2);
-
- // multiply 2 adjacent elements with the filter and add the result
- srcRegFilt1 = _mm_maddubs_epi16(srcRegFilt1, firstFilters);
- srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, secondFilters);
-
- // extract the higher half of the lane
- srcRegFilt3 = _mm_srli_si128(srcRegFilt1, 8);
- srcRegFilt4 = _mm_srli_si128(srcRegFilt2, 8);
-
- minReg = _mm_min_epi16(srcRegFilt3, srcRegFilt2);
-
- // add and saturate all the results together
- srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt4);
- srcRegFilt3 = _mm_max_epi16(srcRegFilt3, srcRegFilt2);
- srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, minReg);
- srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt3);
- srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, addFilterReg64);
-
- // shift by 7 bit each 16 bits
- srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 7);
-
- // shrink to 8 bit each 16 bits
- srcRegFilt1 = _mm_packus_epi16(srcRegFilt1, srcRegFilt1);
- src_ptr += src_pixels_per_line;
-
- // save only 4 bytes
- *((int *)&output_ptr[0]) = _mm_cvtsi128_si32(srcRegFilt1);
-
- output_ptr += output_pitch;
- }
-}
-
-void aom_filter_block1d8_h8_intrin_ssse3(
- const uint8_t *src_ptr, ptrdiff_t src_pixels_per_line, uint8_t *output_ptr,
- ptrdiff_t output_pitch, uint32_t output_height, const int16_t *filter) {
- __m128i firstFilters, secondFilters, thirdFilters, forthFilters, srcReg;
- __m128i filt1Reg, filt2Reg, filt3Reg, filt4Reg;
- __m128i srcRegFilt1, srcRegFilt2, srcRegFilt3, srcRegFilt4;
- __m128i addFilterReg64, filtersReg, minReg;
- unsigned int i;
-
- // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64
- addFilterReg64 = _mm_set1_epi32((int)0x0400040u);
- filtersReg = _mm_loadu_si128((const __m128i *)filter);
- // converting the 16 bit (short) to 8 bit (byte) and have the same data
- // in both lanes of 128 bit register.
- filtersReg = _mm_packs_epi16(filtersReg, filtersReg);
-
- // duplicate only the first 16 bits (first and second byte)
- // across 128 bit register
- firstFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x100u));
- // duplicate only the second 16 bits (third and forth byte)
- // across 128 bit register
- secondFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x302u));
- // duplicate only the third 16 bits (fifth and sixth byte)
- // across 128 bit register
- thirdFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x504u));
- // duplicate only the forth 16 bits (seventh and eighth byte)
- // across 128 bit register
- forthFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x706u));
-
- filt1Reg = _mm_load_si128((__m128i const *)filt1_global);
- filt2Reg = _mm_load_si128((__m128i const *)filt2_global);
- filt3Reg = _mm_load_si128((__m128i const *)filt3_global);
- filt4Reg = _mm_load_si128((__m128i const *)filt4_global);
-
- for (i = 0; i < output_height; i++) {
- srcReg = _mm_loadu_si128((const __m128i *)(src_ptr - 3));
-
- // filter the source buffer
- srcRegFilt1 = _mm_shuffle_epi8(srcReg, filt1Reg);
- srcRegFilt2 = _mm_shuffle_epi8(srcReg, filt2Reg);
-
- // multiply 2 adjacent elements with the filter and add the result
- srcRegFilt1 = _mm_maddubs_epi16(srcRegFilt1, firstFilters);
- srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, secondFilters);
-
- // filter the source buffer
- srcRegFilt3 = _mm_shuffle_epi8(srcReg, filt3Reg);
- srcRegFilt4 = _mm_shuffle_epi8(srcReg, filt4Reg);
-
- // multiply 2 adjacent elements with the filter and add the result
- srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, thirdFilters);
- srcRegFilt4 = _mm_maddubs_epi16(srcRegFilt4, forthFilters);
-
- // add and saturate all the results together
- minReg = _mm_min_epi16(srcRegFilt2, srcRegFilt3);
- srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt4);
-
- srcRegFilt2 = _mm_max_epi16(srcRegFilt2, srcRegFilt3);
- srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, minReg);
- srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt2);
- srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, addFilterReg64);
-
- // shift by 7 bit each 16 bits
- srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 7);
-
- // shrink to 8 bit each 16 bits
- srcRegFilt1 = _mm_packus_epi16(srcRegFilt1, srcRegFilt1);
-
- src_ptr += src_pixels_per_line;
-
- // save only 8 bytes
- _mm_storel_epi64((__m128i *)&output_ptr[0], srcRegFilt1);
-
- output_ptr += output_pitch;
- }
-}
-
-void aom_filter_block1d8_v8_intrin_ssse3(
- const uint8_t *src_ptr, ptrdiff_t src_pitch, uint8_t *output_ptr,
- ptrdiff_t out_pitch, uint32_t output_height, const int16_t *filter) {
- __m128i addFilterReg64, filtersReg, minReg;
- __m128i firstFilters, secondFilters, thirdFilters, forthFilters;
- __m128i srcRegFilt1, srcRegFilt2, srcRegFilt3, srcRegFilt5;
- __m128i srcReg1, srcReg2, srcReg3, srcReg4, srcReg5, srcReg6, srcReg7;
- __m128i srcReg8;
- unsigned int i;
-
- // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64
- addFilterReg64 = _mm_set1_epi32((int)0x0400040u);
- filtersReg = _mm_loadu_si128((const __m128i *)filter);
- // converting the 16 bit (short) to 8 bit (byte) and have the same data
- // in both lanes of 128 bit register.
- filtersReg = _mm_packs_epi16(filtersReg, filtersReg);
-
- // duplicate only the first 16 bits in the filter
- firstFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x100u));
- // duplicate only the second 16 bits in the filter
- secondFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x302u));
- // duplicate only the third 16 bits in the filter
- thirdFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x504u));
- // duplicate only the forth 16 bits in the filter
- forthFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x706u));
-
- // load the first 7 rows of 8 bytes
- srcReg1 = _mm_loadl_epi64((const __m128i *)src_ptr);
- srcReg2 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch));
- srcReg3 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 2));
- srcReg4 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 3));
- srcReg5 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 4));
- srcReg6 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 5));
- srcReg7 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 6));
-
- for (i = 0; i < output_height; i++) {
- // load the last 8 bytes
- srcReg8 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 7));
-
- // merge the result together
- srcRegFilt1 = _mm_unpacklo_epi8(srcReg1, srcReg2);
- srcRegFilt3 = _mm_unpacklo_epi8(srcReg3, srcReg4);
-
- // merge the result together
- srcRegFilt2 = _mm_unpacklo_epi8(srcReg5, srcReg6);
- srcRegFilt5 = _mm_unpacklo_epi8(srcReg7, srcReg8);
-
- // multiply 2 adjacent elements with the filter and add the result
- srcRegFilt1 = _mm_maddubs_epi16(srcRegFilt1, firstFilters);
- srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, secondFilters);
- srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, thirdFilters);
- srcRegFilt5 = _mm_maddubs_epi16(srcRegFilt5, forthFilters);
-
- // add and saturate the results together
- minReg = _mm_min_epi16(srcRegFilt2, srcRegFilt3);
- srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt5);
- srcRegFilt2 = _mm_max_epi16(srcRegFilt2, srcRegFilt3);
- srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, minReg);
- srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt2);
- srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, addFilterReg64);
-
- // shift by 7 bit each 16 bit
- srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 7);
-
- // shrink to 8 bit each 16 bits
- srcRegFilt1 = _mm_packus_epi16(srcRegFilt1, srcRegFilt1);
-
- src_ptr += src_pitch;
-
- // shift down a row
- srcReg1 = srcReg2;
- srcReg2 = srcReg3;
- srcReg3 = srcReg4;
- srcReg4 = srcReg5;
- srcReg5 = srcReg6;
- srcReg6 = srcReg7;
- srcReg7 = srcReg8;
-
- // save only 8 bytes convolve result
- _mm_storel_epi64((__m128i *)&output_ptr[0], srcRegFilt1);
-
- output_ptr += out_pitch;
- }
-}
-
-filter8_1dfunction aom_filter_block1d16_v8_ssse3;
-filter8_1dfunction aom_filter_block1d16_h8_ssse3;
-filter8_1dfunction aom_filter_block1d8_v8_ssse3;
-filter8_1dfunction aom_filter_block1d8_h8_ssse3;
-filter8_1dfunction aom_filter_block1d4_v8_ssse3;
-filter8_1dfunction aom_filter_block1d4_h8_ssse3;
-
-#define aom_filter_block1d16_h4_ssse3 aom_filter_block1d16_h8_ssse3
-#define aom_filter_block1d16_v4_ssse3 aom_filter_block1d16_v8_ssse3
-#define aom_filter_block1d8_h4_ssse3 aom_filter_block1d8_h8_ssse3
-#define aom_filter_block1d8_v4_ssse3 aom_filter_block1d8_v8_ssse3
-#define aom_filter_block1d4_h4_ssse3 aom_filter_block1d4_h8_ssse3
-#define aom_filter_block1d4_v4_ssse3 aom_filter_block1d4_v8_ssse3
-
-filter8_1dfunction aom_filter_block1d16_v2_ssse3;
-filter8_1dfunction aom_filter_block1d16_h2_ssse3;
-filter8_1dfunction aom_filter_block1d8_v2_ssse3;
-filter8_1dfunction aom_filter_block1d8_h2_ssse3;
-filter8_1dfunction aom_filter_block1d4_v2_ssse3;
-filter8_1dfunction aom_filter_block1d4_h2_ssse3;
-
-// void aom_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
-// uint8_t *dst, ptrdiff_t dst_stride,
-// const int16_t *filter_x, int x_step_q4,
-// const int16_t *filter_y, int y_step_q4,
-// int w, int h);
-// void aom_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride,
-// uint8_t *dst, ptrdiff_t dst_stride,
-// const int16_t *filter_x, int x_step_q4,
-// const int16_t *filter_y, int y_step_q4,
-// int w, int h);
-FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , ssse3);
-FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , ssse3);
diff --git a/third_party/aom/aom_dsp/x86/aom_subpixel_8t_sse2.asm b/third_party/aom/aom_dsp/x86/aom_subpixel_8t_sse2.asm
deleted file mode 100644
index c88fc9ffb..000000000
--- a/third_party/aom/aom_dsp/x86/aom_subpixel_8t_sse2.asm
+++ /dev/null
@@ -1,615 +0,0 @@
-;
-; Copyright (c) 2016, Alliance for Open Media. All rights reserved
-;
-; This source code is subject to the terms of the BSD 2 Clause License and
-; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-; was not distributed with this source code in the LICENSE file, you can
-; obtain it at www.aomedia.org/license/software. If the Alliance for Open
-; Media Patent License 1.0 was not distributed with this source code in the
-; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-;
-
-;
-
-
-%include "aom_ports/x86_abi_support.asm"
-
-;Note: tap3 and tap4 have to be applied and added after other taps to avoid
-;overflow.
-
-%macro GET_FILTERS_4 0
- mov rdx, arg(5) ;filter ptr
- mov rcx, 0x0400040
-
- movdqa xmm7, [rdx] ;load filters
- pshuflw xmm0, xmm7, 0b ;k0
- pshuflw xmm1, xmm7, 01010101b ;k1
- pshuflw xmm2, xmm7, 10101010b ;k2
- pshuflw xmm3, xmm7, 11111111b ;k3
- psrldq xmm7, 8
- pshuflw xmm4, xmm7, 0b ;k4
- pshuflw xmm5, xmm7, 01010101b ;k5
- pshuflw xmm6, xmm7, 10101010b ;k6
- pshuflw xmm7, xmm7, 11111111b ;k7
-
- punpcklqdq xmm0, xmm1
- punpcklqdq xmm2, xmm3
- punpcklqdq xmm5, xmm4
- punpcklqdq xmm6, xmm7
-
- movdqa k0k1, xmm0
- movdqa k2k3, xmm2
- movdqa k5k4, xmm5
- movdqa k6k7, xmm6
-
- movq xmm6, rcx
- pshufd xmm6, xmm6, 0
- movdqa krd, xmm6
-
- pxor xmm7, xmm7
- movdqa zero, xmm7
-%endm
-
-%macro APPLY_FILTER_4 1
- punpckldq xmm0, xmm1 ;two row in one register
- punpckldq xmm6, xmm7
- punpckldq xmm2, xmm3
- punpckldq xmm5, xmm4
-
- punpcklbw xmm0, zero ;unpack to word
- punpcklbw xmm6, zero
- punpcklbw xmm2, zero
- punpcklbw xmm5, zero
-
- pmullw xmm0, k0k1 ;multiply the filter factors
- pmullw xmm6, k6k7
- pmullw xmm2, k2k3
- pmullw xmm5, k5k4
-
- paddsw xmm0, xmm6 ;sum
- movdqa xmm1, xmm0
- psrldq xmm1, 8
- paddsw xmm0, xmm1
- paddsw xmm0, xmm2
- psrldq xmm2, 8
- paddsw xmm0, xmm5
- psrldq xmm5, 8
- paddsw xmm0, xmm2
- paddsw xmm0, xmm5
-
- paddsw xmm0, krd ;rounding
- psraw xmm0, 7 ;shift
- packuswb xmm0, xmm0 ;pack to byte
-
-%if %1
- movd xmm1, [rdi]
- pavgb xmm0, xmm1
-%endif
- movd [rdi], xmm0
-%endm
-
-%macro GET_FILTERS 0
- mov rdx, arg(5) ;filter ptr
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;output_ptr
- mov rcx, 0x0400040
-
- movdqa xmm7, [rdx] ;load filters
- pshuflw xmm0, xmm7, 0b ;k0
- pshuflw xmm1, xmm7, 01010101b ;k1
- pshuflw xmm2, xmm7, 10101010b ;k2
- pshuflw xmm3, xmm7, 11111111b ;k3
- pshufhw xmm4, xmm7, 0b ;k4
- pshufhw xmm5, xmm7, 01010101b ;k5
- pshufhw xmm6, xmm7, 10101010b ;k6
- pshufhw xmm7, xmm7, 11111111b ;k7
-
- punpcklwd xmm0, xmm0
- punpcklwd xmm1, xmm1
- punpcklwd xmm2, xmm2
- punpcklwd xmm3, xmm3
- punpckhwd xmm4, xmm4
- punpckhwd xmm5, xmm5
- punpckhwd xmm6, xmm6
- punpckhwd xmm7, xmm7
-
- movdqa k0, xmm0 ;store filter factors on stack
- movdqa k1, xmm1
- movdqa k2, xmm2
- movdqa k3, xmm3
- movdqa k4, xmm4
- movdqa k5, xmm5
- movdqa k6, xmm6
- movdqa k7, xmm7
-
- movq xmm6, rcx
- pshufd xmm6, xmm6, 0
- movdqa krd, xmm6 ;rounding
-
- pxor xmm7, xmm7
- movdqa zero, xmm7
-%endm
-
-%macro LOAD_VERT_8 1
- movq xmm0, [rsi + %1] ;0
- movq xmm1, [rsi + rax + %1] ;1
- movq xmm6, [rsi + rdx * 2 + %1] ;6
- lea rsi, [rsi + rax]
- movq xmm7, [rsi + rdx * 2 + %1] ;7
- movq xmm2, [rsi + rax + %1] ;2
- movq xmm3, [rsi + rax * 2 + %1] ;3
- movq xmm4, [rsi + rdx + %1] ;4
- movq xmm5, [rsi + rax * 4 + %1] ;5
-%endm
-
-%macro APPLY_FILTER_8 2
- punpcklbw xmm0, zero
- punpcklbw xmm1, zero
- punpcklbw xmm6, zero
- punpcklbw xmm7, zero
- punpcklbw xmm2, zero
- punpcklbw xmm5, zero
- punpcklbw xmm3, zero
- punpcklbw xmm4, zero
-
- pmullw xmm0, k0
- pmullw xmm1, k1
- pmullw xmm6, k6
- pmullw xmm7, k7
- pmullw xmm2, k2
- pmullw xmm5, k5
- pmullw xmm3, k3
- pmullw xmm4, k4
-
- paddsw xmm0, xmm1
- paddsw xmm0, xmm6
- paddsw xmm0, xmm7
- paddsw xmm0, xmm2
- paddsw xmm0, xmm5
- paddsw xmm0, xmm3
- paddsw xmm0, xmm4
-
- paddsw xmm0, krd ;rounding
- psraw xmm0, 7 ;shift
- packuswb xmm0, xmm0 ;pack back to byte
-%if %1
- movq xmm1, [rdi + %2]
- pavgb xmm0, xmm1
-%endif
- movq [rdi + %2], xmm0
-%endm
-
-SECTION .text
-
-;void aom_filter_block1d4_v8_sse2
-;(
-; unsigned char *src_ptr,
-; unsigned int src_pitch,
-; unsigned char *output_ptr,
-; unsigned int out_pitch,
-; unsigned int output_height,
-; short *filter
-;)
-global sym(aom_filter_block1d4_v8_sse2) PRIVATE
-sym(aom_filter_block1d4_v8_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
- push rsi
- push rdi
- push rbx
- ; end prolog
-
- ALIGN_STACK 16, rax
- sub rsp, 16 * 6
- %define k0k1 [rsp + 16 * 0]
- %define k2k3 [rsp + 16 * 1]
- %define k5k4 [rsp + 16 * 2]
- %define k6k7 [rsp + 16 * 3]
- %define krd [rsp + 16 * 4]
- %define zero [rsp + 16 * 5]
-
- GET_FILTERS_4
-
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;output_ptr
-
- movsxd rax, DWORD PTR arg(1) ;pixels_per_line
- movsxd rbx, DWORD PTR arg(3) ;out_pitch
- lea rdx, [rax + rax * 2]
- movsxd rcx, DWORD PTR arg(4) ;output_height
-
-.loop:
- movd xmm0, [rsi] ;load src: row 0
- movd xmm1, [rsi + rax] ;1
- movd xmm6, [rsi + rdx * 2] ;6
- lea rsi, [rsi + rax]
- movd xmm7, [rsi + rdx * 2] ;7
- movd xmm2, [rsi + rax] ;2
- movd xmm3, [rsi + rax * 2] ;3
- movd xmm4, [rsi + rdx] ;4
- movd xmm5, [rsi + rax * 4] ;5
-
- APPLY_FILTER_4 0
-
- lea rdi, [rdi + rbx]
- dec rcx
- jnz .loop
-
- add rsp, 16 * 6
- pop rsp
- pop rbx
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void aom_filter_block1d8_v8_sse2
-;(
-; unsigned char *src_ptr,
-; unsigned int src_pitch,
-; unsigned char *output_ptr,
-; unsigned int out_pitch,
-; unsigned int output_height,
-; short *filter
-;)
-global sym(aom_filter_block1d8_v8_sse2) PRIVATE
-sym(aom_filter_block1d8_v8_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
- push rsi
- push rdi
- push rbx
- ; end prolog
-
- ALIGN_STACK 16, rax
- sub rsp, 16 * 10
- %define k0 [rsp + 16 * 0]
- %define k1 [rsp + 16 * 1]
- %define k2 [rsp + 16 * 2]
- %define k3 [rsp + 16 * 3]
- %define k4 [rsp + 16 * 4]
- %define k5 [rsp + 16 * 5]
- %define k6 [rsp + 16 * 6]
- %define k7 [rsp + 16 * 7]
- %define krd [rsp + 16 * 8]
- %define zero [rsp + 16 * 9]
-
- GET_FILTERS
-
- movsxd rax, DWORD PTR arg(1) ;pixels_per_line
- movsxd rbx, DWORD PTR arg(3) ;out_pitch
- lea rdx, [rax + rax * 2]
- movsxd rcx, DWORD PTR arg(4) ;output_height
-
-.loop:
- LOAD_VERT_8 0
- APPLY_FILTER_8 0, 0
-
- lea rdi, [rdi + rbx]
- dec rcx
- jnz .loop
-
- add rsp, 16 * 10
- pop rsp
- pop rbx
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void aom_filter_block1d16_v8_sse2
-;(
-; unsigned char *src_ptr,
-; unsigned int src_pitch,
-; unsigned char *output_ptr,
-; unsigned int out_pitch,
-; unsigned int output_height,
-; short *filter
-;)
-global sym(aom_filter_block1d16_v8_sse2) PRIVATE
-sym(aom_filter_block1d16_v8_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
- push rsi
- push rdi
- push rbx
- ; end prolog
-
- ALIGN_STACK 16, rax
- sub rsp, 16 * 10
- %define k0 [rsp + 16 * 0]
- %define k1 [rsp + 16 * 1]
- %define k2 [rsp + 16 * 2]
- %define k3 [rsp + 16 * 3]
- %define k4 [rsp + 16 * 4]
- %define k5 [rsp + 16 * 5]
- %define k6 [rsp + 16 * 6]
- %define k7 [rsp + 16 * 7]
- %define krd [rsp + 16 * 8]
- %define zero [rsp + 16 * 9]
-
- GET_FILTERS
-
- movsxd rax, DWORD PTR arg(1) ;pixels_per_line
- movsxd rbx, DWORD PTR arg(3) ;out_pitch
- lea rdx, [rax + rax * 2]
- movsxd rcx, DWORD PTR arg(4) ;output_height
-
-.loop:
- LOAD_VERT_8 0
- APPLY_FILTER_8 0, 0
- sub rsi, rax
-
- LOAD_VERT_8 8
- APPLY_FILTER_8 0, 8
- add rdi, rbx
-
- dec rcx
- jnz .loop
-
- add rsp, 16 * 10
- pop rsp
- pop rbx
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void aom_filter_block1d4_h8_sse2
-;(
-; unsigned char *src_ptr,
-; unsigned int src_pixels_per_line,
-; unsigned char *output_ptr,
-; unsigned int output_pitch,
-; unsigned int output_height,
-; short *filter
-;)
-global sym(aom_filter_block1d4_h8_sse2) PRIVATE
-sym(aom_filter_block1d4_h8_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
- push rsi
- push rdi
- ; end prolog
-
- ALIGN_STACK 16, rax
- sub rsp, 16 * 6
- %define k0k1 [rsp + 16 * 0]
- %define k2k3 [rsp + 16 * 1]
- %define k5k4 [rsp + 16 * 2]
- %define k6k7 [rsp + 16 * 3]
- %define krd [rsp + 16 * 4]
- %define zero [rsp + 16 * 5]
-
- GET_FILTERS_4
-
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;output_ptr
-
- movsxd rax, DWORD PTR arg(1) ;pixels_per_line
- movsxd rdx, DWORD PTR arg(3) ;out_pitch
- movsxd rcx, DWORD PTR arg(4) ;output_height
-
-.loop:
- movdqu xmm0, [rsi - 3] ;load src
-
- movdqa xmm1, xmm0
- movdqa xmm6, xmm0
- movdqa xmm7, xmm0
- movdqa xmm2, xmm0
- movdqa xmm3, xmm0
- movdqa xmm5, xmm0
- movdqa xmm4, xmm0
-
- psrldq xmm1, 1
- psrldq xmm6, 6
- psrldq xmm7, 7
- psrldq xmm2, 2
- psrldq xmm3, 3
- psrldq xmm5, 5
- psrldq xmm4, 4
-
- APPLY_FILTER_4 0
-
- lea rsi, [rsi + rax]
- lea rdi, [rdi + rdx]
- dec rcx
- jnz .loop
-
- add rsp, 16 * 6
- pop rsp
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void aom_filter_block1d8_h8_sse2
-;(
-; unsigned char *src_ptr,
-; unsigned int src_pixels_per_line,
-; unsigned char *output_ptr,
-; unsigned int output_pitch,
-; unsigned int output_height,
-; short *filter
-;)
-global sym(aom_filter_block1d8_h8_sse2) PRIVATE
-sym(aom_filter_block1d8_h8_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
- push rsi
- push rdi
- ; end prolog
-
- ALIGN_STACK 16, rax
- sub rsp, 16 * 10
- %define k0 [rsp + 16 * 0]
- %define k1 [rsp + 16 * 1]
- %define k2 [rsp + 16 * 2]
- %define k3 [rsp + 16 * 3]
- %define k4 [rsp + 16 * 4]
- %define k5 [rsp + 16 * 5]
- %define k6 [rsp + 16 * 6]
- %define k7 [rsp + 16 * 7]
- %define krd [rsp + 16 * 8]
- %define zero [rsp + 16 * 9]
-
- GET_FILTERS
-
- movsxd rax, DWORD PTR arg(1) ;pixels_per_line
- movsxd rdx, DWORD PTR arg(3) ;out_pitch
- movsxd rcx, DWORD PTR arg(4) ;output_height
-
-.loop:
- movdqu xmm0, [rsi - 3] ;load src
-
- movdqa xmm1, xmm0
- movdqa xmm6, xmm0
- movdqa xmm7, xmm0
- movdqa xmm2, xmm0
- movdqa xmm5, xmm0
- movdqa xmm3, xmm0
- movdqa xmm4, xmm0
-
- psrldq xmm1, 1
- psrldq xmm6, 6
- psrldq xmm7, 7
- psrldq xmm2, 2
- psrldq xmm5, 5
- psrldq xmm3, 3
- psrldq xmm4, 4
-
- APPLY_FILTER_8 0, 0
-
- lea rsi, [rsi + rax]
- lea rdi, [rdi + rdx]
- dec rcx
- jnz .loop
-
- add rsp, 16 * 10
- pop rsp
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void aom_filter_block1d16_h8_sse2
-;(
-; unsigned char *src_ptr,
-; unsigned int src_pixels_per_line,
-; unsigned char *output_ptr,
-; unsigned int output_pitch,
-; unsigned int output_height,
-; short *filter
-;)
-global sym(aom_filter_block1d16_h8_sse2) PRIVATE
-sym(aom_filter_block1d16_h8_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
- push rsi
- push rdi
- ; end prolog
-
- ALIGN_STACK 16, rax
- sub rsp, 16 * 10
- %define k0 [rsp + 16 * 0]
- %define k1 [rsp + 16 * 1]
- %define k2 [rsp + 16 * 2]
- %define k3 [rsp + 16 * 3]
- %define k4 [rsp + 16 * 4]
- %define k5 [rsp + 16 * 5]
- %define k6 [rsp + 16 * 6]
- %define k7 [rsp + 16 * 7]
- %define krd [rsp + 16 * 8]
- %define zero [rsp + 16 * 9]
-
- GET_FILTERS
-
- movsxd rax, DWORD PTR arg(1) ;pixels_per_line
- movsxd rdx, DWORD PTR arg(3) ;out_pitch
- movsxd rcx, DWORD PTR arg(4) ;output_height
-
-.loop:
- movdqu xmm0, [rsi - 3] ;load src
-
- movdqa xmm1, xmm0
- movdqa xmm6, xmm0
- movdqa xmm7, xmm0
- movdqa xmm2, xmm0
- movdqa xmm5, xmm0
- movdqa xmm3, xmm0
- movdqa xmm4, xmm0
-
- psrldq xmm1, 1
- psrldq xmm6, 6
- psrldq xmm7, 7
- psrldq xmm2, 2
- psrldq xmm5, 5
- psrldq xmm3, 3
- psrldq xmm4, 4
-
- APPLY_FILTER_8 0, 0
-
- movdqu xmm0, [rsi + 5] ;load src
-
- movdqa xmm1, xmm0
- movdqa xmm6, xmm0
- movdqa xmm7, xmm0
- movdqa xmm2, xmm0
- movdqa xmm5, xmm0
- movdqa xmm3, xmm0
- movdqa xmm4, xmm0
-
- psrldq xmm1, 1
- psrldq xmm6, 6
- psrldq xmm7, 7
- psrldq xmm2, 2
- psrldq xmm5, 5
- psrldq xmm3, 3
- psrldq xmm4, 4
-
- APPLY_FILTER_8 0, 8
-
- lea rsi, [rsi + rax]
- lea rdi, [rdi + rdx]
- dec rcx
- jnz .loop
-
- add rsp, 16 * 10
- pop rsp
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
diff --git a/third_party/aom/aom_dsp/x86/aom_subpixel_8t_ssse3.asm b/third_party/aom/aom_dsp/x86/aom_subpixel_8t_ssse3.asm
deleted file mode 100644
index 3ca7921b6..000000000
--- a/third_party/aom/aom_dsp/x86/aom_subpixel_8t_ssse3.asm
+++ /dev/null
@@ -1,870 +0,0 @@
-;
-; Copyright (c) 2016, Alliance for Open Media. All rights reserved
-;
-; This source code is subject to the terms of the BSD 2 Clause License and
-; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-; was not distributed with this source code in the LICENSE file, you can
-; obtain it at www.aomedia.org/license/software. If the Alliance for Open
-; Media Patent License 1.0 was not distributed with this source code in the
-; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-;
-
-;
-
-%include "third_party/x86inc/x86inc.asm"
-
-SECTION_RODATA
-pw_64: times 8 dw 64
-even_byte_mask: times 8 dw 0x00ff
-
-; %define USE_PMULHRSW
-; NOTE: pmulhrsw has a latency of 5 cycles. Tests showed a performance loss
-; when using this instruction.
-;
-; The add order below (based on ffav1) must be followed to prevent outranges.
-; x = k0k1 + k4k5
-; y = k2k3 + k6k7
-; z = signed SAT(x + y)
-
-SECTION .text
-%define LOCAL_VARS_SIZE 16*6
-
-%macro SETUP_LOCAL_VARS 0
- ; TODO(slavarnway): using xmm registers for these on ARCH_X86_64 +
- ; pmaddubsw has a higher latency on some platforms, this might be eased by
- ; interleaving the instructions.
- %define k0k1 [rsp + 16*0]
- %define k2k3 [rsp + 16*1]
- %define k4k5 [rsp + 16*2]
- %define k6k7 [rsp + 16*3]
- packsswb m4, m4
- ; TODO(slavarnway): multiple pshufb instructions had a higher latency on
- ; some platforms.
- pshuflw m0, m4, 0b ;k0_k1
- pshuflw m1, m4, 01010101b ;k2_k3
- pshuflw m2, m4, 10101010b ;k4_k5
- pshuflw m3, m4, 11111111b ;k6_k7
- punpcklqdq m0, m0
- punpcklqdq m1, m1
- punpcklqdq m2, m2
- punpcklqdq m3, m3
- mova k0k1, m0
- mova k2k3, m1
- mova k4k5, m2
- mova k6k7, m3
-%if ARCH_X86_64
- %define krd m12
- %define tmp0 [rsp + 16*4]
- %define tmp1 [rsp + 16*5]
- mova krd, [GLOBAL(pw_64)]
-%else
- %define krd [rsp + 16*4]
-%if CONFIG_PIC=0
- mova m6, [GLOBAL(pw_64)]
-%else
- ; build constants without accessing global memory
- pcmpeqb m6, m6 ;all ones
- psrlw m6, 15
- psllw m6, 6 ;aka pw_64
-%endif
- mova krd, m6
-%endif
-%endm
-
-;-------------------------------------------------------------------------------
-%if ARCH_X86_64
- %define LOCAL_VARS_SIZE_H4 0
-%else
- %define LOCAL_VARS_SIZE_H4 16*4
-%endif
-
-%macro SUBPIX_HFILTER4 1
-cglobal filter_block1d4_%1, 6, 6, 11, LOCAL_VARS_SIZE_H4, \
- src, sstride, dst, dstride, height, filter
- mova m4, [filterq]
- packsswb m4, m4
-%if ARCH_X86_64
- %define k0k1k4k5 m8
- %define k2k3k6k7 m9
- %define krd m10
- mova krd, [GLOBAL(pw_64)]
- pshuflw k0k1k4k5, m4, 0b ;k0_k1
- pshufhw k0k1k4k5, k0k1k4k5, 10101010b ;k0_k1_k4_k5
- pshuflw k2k3k6k7, m4, 01010101b ;k2_k3
- pshufhw k2k3k6k7, k2k3k6k7, 11111111b ;k2_k3_k6_k7
-%else
- %define k0k1k4k5 [rsp + 16*0]
- %define k2k3k6k7 [rsp + 16*1]
- %define krd [rsp + 16*2]
- pshuflw m6, m4, 0b ;k0_k1
- pshufhw m6, m6, 10101010b ;k0_k1_k4_k5
- pshuflw m7, m4, 01010101b ;k2_k3
- pshufhw m7, m7, 11111111b ;k2_k3_k6_k7
-%if CONFIG_PIC=0
- mova m1, [GLOBAL(pw_64)]
-%else
- ; build constants without accessing global memory
- pcmpeqb m1, m1 ;all ones
- psrlw m1, 15
- psllw m1, 6 ;aka pw_64
-%endif
- mova k0k1k4k5, m6
- mova k2k3k6k7, m7
- mova krd, m1
-%endif
- dec heightd
-
-.loop:
- ;Do two rows at once
- movu m4, [srcq - 3]
- movu m5, [srcq + sstrideq - 3]
- punpckhbw m1, m4, m4
- punpcklbw m4, m4
- punpckhbw m3, m5, m5
- punpcklbw m5, m5
- palignr m0, m1, m4, 1
- pmaddubsw m0, k0k1k4k5
- palignr m1, m4, 5
- pmaddubsw m1, k2k3k6k7
- palignr m2, m3, m5, 1
- pmaddubsw m2, k0k1k4k5
- palignr m3, m5, 5
- pmaddubsw m3, k2k3k6k7
- punpckhqdq m4, m0, m2
- punpcklqdq m0, m2
- punpckhqdq m5, m1, m3
- punpcklqdq m1, m3
- paddsw m0, m4
- paddsw m1, m5
-%ifidn %1, h8_avg
- movd m4, [dstq]
- movd m5, [dstq + dstrideq]
-%endif
- paddsw m0, m1
- paddsw m0, krd
- psraw m0, 7
-%ifidn %1, h8_add_src
- pxor m3, m3
- movu m4, [srcq]
- movu m5, [srcq + sstrideq]
- punpckldq m4, m5 ; Bytes 0,1,2,3 from row 0, then 0,1,2,3 from row 2
- punpcklbw m4, m3
- paddsw m0, m4
-%endif
- packuswb m0, m0
- psrldq m1, m0, 4
-
-%ifidn %1, h8_avg
- pavgb m0, m4
- pavgb m1, m5
-%endif
- movd [dstq], m0
- movd [dstq + dstrideq], m1
-
- lea srcq, [srcq + sstrideq ]
- prefetcht0 [srcq + 4 * sstrideq - 3]
- lea srcq, [srcq + sstrideq ]
- lea dstq, [dstq + 2 * dstrideq ]
- prefetcht0 [srcq + 2 * sstrideq - 3]
-
- sub heightd, 2
- jg .loop
-
- ; Do last row if output_height is odd
- jne .done
-
- movu m4, [srcq - 3]
- punpckhbw m1, m4, m4
- punpcklbw m4, m4
- palignr m0, m1, m4, 1
- palignr m1, m4, 5
- pmaddubsw m0, k0k1k4k5
- pmaddubsw m1, k2k3k6k7
- psrldq m2, m0, 8
- psrldq m3, m1, 8
- paddsw m0, m2
- paddsw m1, m3
- paddsw m0, m1
- paddsw m0, krd
- psraw m0, 7
-%ifidn %1, h8_add_src
- pxor m3, m3
- movu m4, [srcq]
- punpcklbw m4, m3
- paddsw m0, m4
-%endif
- packuswb m0, m0
-%ifidn %1, h8_avg
- movd m4, [dstq]
- pavgb m0, m4
-%endif
- movd [dstq], m0
-.done:
- REP_RET
-%endm
-
-;-------------------------------------------------------------------------------
-%macro SUBPIX_HFILTER8 1
-cglobal filter_block1d8_%1, 6, 6, 14, LOCAL_VARS_SIZE, \
- src, sstride, dst, dstride, height, filter
- mova m4, [filterq]
- SETUP_LOCAL_VARS
- dec heightd
-
-.loop:
- ;Do two rows at once
- movu m0, [srcq - 3]
- movu m4, [srcq + sstrideq - 3]
- punpckhbw m1, m0, m0
- punpcklbw m0, m0
- palignr m5, m1, m0, 13
- pmaddubsw m5, k6k7
- palignr m2, m1, m0, 5
- palignr m3, m1, m0, 9
- palignr m1, m0, 1
- pmaddubsw m1, k0k1
- punpckhbw m6, m4, m4
- punpcklbw m4, m4
- pmaddubsw m2, k2k3
- pmaddubsw m3, k4k5
-
- palignr m7, m6, m4, 13
- palignr m0, m6, m4, 5
- pmaddubsw m7, k6k7
- paddsw m1, m3
- paddsw m2, m5
- paddsw m1, m2
-%ifidn %1, h8_avg
- movh m2, [dstq]
- movhps m2, [dstq + dstrideq]
-%endif
- palignr m5, m6, m4, 9
- palignr m6, m4, 1
- pmaddubsw m0, k2k3
- pmaddubsw m6, k0k1
- paddsw m1, krd
- pmaddubsw m5, k4k5
- psraw m1, 7
- paddsw m0, m7
- paddsw m6, m5
- paddsw m6, m0
- paddsw m6, krd
- psraw m6, 7
-%ifidn %1, h8_add_src
- pxor m3, m3
- movu m4, [srcq]
- movu m5, [srcq + sstrideq]
- punpcklbw m4, m3
- punpcklbw m5, m3
- paddsw m1, m4
- paddsw m6, m5
-%endif
- packuswb m1, m6
-%ifidn %1, h8_avg
- pavgb m1, m2
-%endif
- movh [dstq], m1
- movhps [dstq + dstrideq], m1
-
- lea srcq, [srcq + sstrideq ]
- prefetcht0 [srcq + 4 * sstrideq - 3]
- lea srcq, [srcq + sstrideq ]
- lea dstq, [dstq + 2 * dstrideq ]
- prefetcht0 [srcq + 2 * sstrideq - 3]
- sub heightd, 2
- jg .loop
-
- ; Do last row if output_height is odd
- jne .done
-
- movu m0, [srcq - 3]
- punpckhbw m3, m0, m0
- punpcklbw m0, m0
- palignr m1, m3, m0, 1
- palignr m2, m3, m0, 5
- palignr m4, m3, m0, 13
- palignr m3, m0, 9
- pmaddubsw m1, k0k1
- pmaddubsw m2, k2k3
- pmaddubsw m3, k4k5
- pmaddubsw m4, k6k7
- paddsw m1, m3
- paddsw m4, m2
- paddsw m1, m4
- paddsw m1, krd
- psraw m1, 7
-%ifidn %1, h8_add_src
- pxor m6, m6
- movu m5, [srcq]
- punpcklbw m5, m6
- paddsw m1, m5
-%endif
- packuswb m1, m1
-%ifidn %1, h8_avg
- movh m0, [dstq]
- pavgb m1, m0
-%endif
- movh [dstq], m1
-.done:
- REP_RET
-%endm
-
-;-------------------------------------------------------------------------------
-%macro SUBPIX_HFILTER16 1
-cglobal filter_block1d16_%1, 6, 6, 14, LOCAL_VARS_SIZE, \
- src, sstride, dst, dstride, height, filter
- mova m4, [filterq]
- SETUP_LOCAL_VARS
-
-.loop:
- prefetcht0 [srcq + 2 * sstrideq -3]
-
- movu m0, [srcq - 3]
- movu m4, [srcq - 2]
- pmaddubsw m0, k0k1
- pmaddubsw m4, k0k1
- movu m1, [srcq - 1]
- movu m5, [srcq + 0]
- pmaddubsw m1, k2k3
- pmaddubsw m5, k2k3
- movu m2, [srcq + 1]
- movu m6, [srcq + 2]
- pmaddubsw m2, k4k5
- pmaddubsw m6, k4k5
- movu m3, [srcq + 3]
- movu m7, [srcq + 4]
- pmaddubsw m3, k6k7
- pmaddubsw m7, k6k7
- paddsw m0, m2
- paddsw m1, m3
- paddsw m0, m1
- paddsw m4, m6
- paddsw m5, m7
- paddsw m4, m5
- paddsw m0, krd
- paddsw m4, krd
- psraw m0, 7
- psraw m4, 7
-%ifidn %1, h8_add_src
-%if ARCH_X86=1 && CONFIG_PIC=1
- pcmpeqb m2, m2 ;all ones
- psrlw m2, 8 ;even_byte_mask
-%else
- mova m2, [GLOBAL(even_byte_mask)]
-%endif
- movu m5, [srcq]
- mova m7, m5
- pand m5, m2
- psrlw m7, 8
- paddsw m0, m5
- paddsw m4, m7
-%endif
- packuswb m0, m0
- packuswb m4, m4
- punpcklbw m0, m4
-%ifidn %1, h8_avg
- pavgb m0, [dstq]
-%endif
- lea srcq, [srcq + sstrideq]
- mova [dstq], m0
- lea dstq, [dstq + dstrideq]
- dec heightd
- jnz .loop
- REP_RET
-%endm
-
-INIT_XMM ssse3
-SUBPIX_HFILTER16 h8
-SUBPIX_HFILTER8 h8
-SUBPIX_HFILTER4 h8
-
-;-------------------------------------------------------------------------------
-
-; TODO(Linfeng): Detect cpu type and choose the code with better performance.
-%define X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON 1
-
-%if ARCH_X86_64 && X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON
- %define NUM_GENERAL_REG_USED 9
-%else
- %define NUM_GENERAL_REG_USED 6
-%endif
-
-%macro SUBPIX_VFILTER 2
-cglobal filter_block1d%2_%1, 6, NUM_GENERAL_REG_USED, 15, LOCAL_VARS_SIZE, \
- src, sstride, dst, dstride, height, filter
- mova m4, [filterq]
- SETUP_LOCAL_VARS
-
-%ifidn %2, 8
- %define movx movh
-%else
- %define movx movd
-%endif
-
- dec heightd
-
-%if ARCH_X86 || X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON
-
-%if ARCH_X86_64
- %define src1q r7
- %define sstride6q r8
- %define dst_stride dstrideq
-%else
- %define src1q filterq
- %define sstride6q dstrideq
- %define dst_stride dstridemp
-%endif
- mov src1q, srcq
- add src1q, sstrideq
- lea sstride6q, [sstrideq + sstrideq * 4]
- add sstride6q, sstrideq ;pitch * 6
-
-.loop:
- ;Do two rows at once
- movx m0, [srcq ] ;A
- movx m1, [src1q ] ;B
- punpcklbw m0, m1 ;A B
- movx m2, [srcq + sstrideq * 2 ] ;C
- pmaddubsw m0, k0k1
- mova m6, m2
- movx m3, [src1q + sstrideq * 2] ;D
- punpcklbw m2, m3 ;C D
- pmaddubsw m2, k2k3
- movx m4, [srcq + sstrideq * 4 ] ;E
- mova m7, m4
- movx m5, [src1q + sstrideq * 4] ;F
- punpcklbw m4, m5 ;E F
- pmaddubsw m4, k4k5
- punpcklbw m1, m6 ;A B next iter
- movx m6, [srcq + sstride6q ] ;G
- punpcklbw m5, m6 ;E F next iter
- punpcklbw m3, m7 ;C D next iter
- pmaddubsw m5, k4k5
- movx m7, [src1q + sstride6q ] ;H
- punpcklbw m6, m7 ;G H
- pmaddubsw m6, k6k7
- pmaddubsw m3, k2k3
- pmaddubsw m1, k0k1
- paddsw m0, m4
- paddsw m2, m6
- movx m6, [srcq + sstrideq * 8 ] ;H next iter
- punpcklbw m7, m6
- pmaddubsw m7, k6k7
- paddsw m0, m2
- paddsw m0, krd
- psraw m0, 7
- paddsw m1, m5
-%ifidn %1, v8_add_src
- pxor m6, m6
- movu m4, [srcq]
- punpcklbw m4, m6
- paddsw m0, m4
-%endif
- packuswb m0, m0
-
- paddsw m3, m7
- paddsw m1, m3
- paddsw m1, krd
- psraw m1, 7
-%ifidn %1, v8_add_src
- movu m4, [src1q]
- punpcklbw m4, m6
- paddsw m1, m4
-%endif
- lea srcq, [srcq + sstrideq * 2 ]
- lea src1q, [src1q + sstrideq * 2]
- packuswb m1, m1
-
-%ifidn %1, v8_avg
- movx m2, [dstq]
- pavgb m0, m2
-%endif
- movx [dstq], m0
- add dstq, dst_stride
-%ifidn %1, v8_avg
- movx m3, [dstq]
- pavgb m1, m3
-%endif
- movx [dstq], m1
- add dstq, dst_stride
- sub heightd, 2
- jg .loop
-
- ; Do last row if output_height is odd
- jne .done
-
- movx m0, [srcq ] ;A
- movx m1, [srcq + sstrideq ] ;B
- movx m6, [srcq + sstride6q ] ;G
- punpcklbw m0, m1 ;A B
- movx m7, [src1q + sstride6q ] ;H
- pmaddubsw m0, k0k1
- movx m2, [srcq + sstrideq * 2 ] ;C
- punpcklbw m6, m7 ;G H
- movx m3, [src1q + sstrideq * 2] ;D
- pmaddubsw m6, k6k7
- movx m4, [srcq + sstrideq * 4 ] ;E
- punpcklbw m2, m3 ;C D
- movx m5, [src1q + sstrideq * 4] ;F
- punpcklbw m4, m5 ;E F
- pmaddubsw m2, k2k3
- pmaddubsw m4, k4k5
- paddsw m2, m6
- paddsw m0, m4
- paddsw m0, m2
- paddsw m0, krd
- psraw m0, 7
-%ifidn %1, v8_add_src
- pxor m6, m6
- movu m4, [srcq]
- punpcklbw m4, m6
- paddsw m0, m4
-%endif
- packuswb m0, m0
-%ifidn %1, v8_avg
- movx m1, [dstq]
- pavgb m0, m1
-%endif
- movx [dstq], m0
-
-%else
- ; ARCH_X86_64
-
- movx m0, [srcq ] ;A
- movx m1, [srcq + sstrideq ] ;B
- lea srcq, [srcq + sstrideq * 2 ]
- movx m2, [srcq] ;C
- movx m3, [srcq + sstrideq] ;D
- lea srcq, [srcq + sstrideq * 2 ]
- movx m4, [srcq] ;E
- movx m5, [srcq + sstrideq] ;F
- lea srcq, [srcq + sstrideq * 2 ]
- movx m6, [srcq] ;G
- punpcklbw m0, m1 ;A B
- punpcklbw m1, m2 ;A B next iter
- punpcklbw m2, m3 ;C D
- punpcklbw m3, m4 ;C D next iter
- punpcklbw m4, m5 ;E F
- punpcklbw m5, m6 ;E F next iter
-
-.loop:
- ;Do two rows at once
- movx m7, [srcq + sstrideq] ;H
- lea srcq, [srcq + sstrideq * 2 ]
- movx m14, [srcq] ;H next iter
- punpcklbw m6, m7 ;G H
- punpcklbw m7, m14 ;G H next iter
- pmaddubsw m8, m0, k0k1
- pmaddubsw m9, m1, k0k1
- mova m0, m2
- mova m1, m3
- pmaddubsw m10, m2, k2k3
- pmaddubsw m11, m3, k2k3
- mova m2, m4
- mova m3, m5
- pmaddubsw m4, k4k5
- pmaddubsw m5, k4k5
- paddsw m8, m4
- paddsw m9, m5
- mova m4, m6
- mova m5, m7
- pmaddubsw m6, k6k7
- pmaddubsw m7, k6k7
- paddsw m10, m6
- paddsw m11, m7
- paddsw m8, m10
- paddsw m9, m11
- mova m6, m14
- paddsw m8, krd
- paddsw m9, krd
- psraw m8, 7
- psraw m9, 7
-%ifidn %2, 4
- packuswb m8, m8
- packuswb m9, m9
-%else
- packuswb m8, m9
-%endif
-
-%ifidn %1, v8_avg
- movx m7, [dstq]
-%ifidn %2, 4
- movx m10, [dstq + dstrideq]
- pavgb m9, m10
-%else
- movhpd m7, [dstq + dstrideq]
-%endif
- pavgb m8, m7
-%endif
- movx [dstq], m8
-%ifidn %2, 4
- movx [dstq + dstrideq], m9
-%else
- movhpd [dstq + dstrideq], m8
-%endif
-
- lea dstq, [dstq + dstrideq * 2 ]
- sub heightd, 2
- jg .loop
-
- ; Do last row if output_height is odd
- jne .done
-
- movx m7, [srcq + sstrideq] ;H
- punpcklbw m6, m7 ;G H
- pmaddubsw m0, k0k1
- pmaddubsw m2, k2k3
- pmaddubsw m4, k4k5
- pmaddubsw m6, k6k7
- paddsw m0, m4
- paddsw m2, m6
- paddsw m0, m2
- paddsw m0, krd
- psraw m0, 7
- packuswb m0, m0
-%ifidn %1, v8_avg
- movx m1, [dstq]
- pavgb m0, m1
-%endif
- movx [dstq], m0
-
-%endif ; ARCH_X86_64
-
-.done:
- REP_RET
-
-%endm
-
-;-------------------------------------------------------------------------------
-%macro SUBPIX_VFILTER16 1
-cglobal filter_block1d16_%1, 6, NUM_GENERAL_REG_USED, 16, LOCAL_VARS_SIZE, \
- src, sstride, dst, dstride, height, filter
- mova m4, [filterq]
- SETUP_LOCAL_VARS
-
-%if ARCH_X86 || X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON
-
-%if ARCH_X86_64
- %define src1q r7
- %define sstride6q r8
- %define dst_stride dstrideq
-%else
- %define src1q filterq
- %define sstride6q dstrideq
- %define dst_stride dstridemp
-%endif
- lea src1q, [srcq + sstrideq]
- lea sstride6q, [sstrideq + sstrideq * 4]
- add sstride6q, sstrideq ;pitch * 6
-
-.loop:
- movh m0, [srcq ] ;A
- movh m1, [src1q ] ;B
- movh m2, [srcq + sstrideq * 2 ] ;C
- movh m3, [src1q + sstrideq * 2] ;D
- movh m4, [srcq + sstrideq * 4 ] ;E
- movh m5, [src1q + sstrideq * 4] ;F
-
- punpcklbw m0, m1 ;A B
- movh m6, [srcq + sstride6q] ;G
- punpcklbw m2, m3 ;C D
- movh m7, [src1q + sstride6q] ;H
- punpcklbw m4, m5 ;E F
- pmaddubsw m0, k0k1
- movh m3, [srcq + 8] ;A
- pmaddubsw m2, k2k3
- punpcklbw m6, m7 ;G H
- movh m5, [srcq + sstrideq + 8] ;B
- pmaddubsw m4, k4k5
- punpcklbw m3, m5 ;A B
- movh m7, [srcq + sstrideq * 2 + 8] ;C
- pmaddubsw m6, k6k7
- movh m5, [src1q + sstrideq * 2 + 8] ;D
- punpcklbw m7, m5 ;C D
- paddsw m2, m6
- pmaddubsw m3, k0k1
- movh m1, [srcq + sstrideq * 4 + 8] ;E
- paddsw m0, m4
- pmaddubsw m7, k2k3
- movh m6, [src1q + sstrideq * 4 + 8] ;F
- punpcklbw m1, m6 ;E F
- paddsw m0, m2
- paddsw m0, krd
- movh m2, [srcq + sstride6q + 8] ;G
- pmaddubsw m1, k4k5
- movh m5, [src1q + sstride6q + 8] ;H
- psraw m0, 7
- punpcklbw m2, m5 ;G H
- pmaddubsw m2, k6k7
- paddsw m7, m2
- paddsw m3, m1
- paddsw m3, m7
- paddsw m3, krd
- psraw m3, 7
-%ifidn %1, v8_add_src
- pxor m6, m6
- movu m4, [src1q + 2 * sstrideq] ; Fetch from 3 rows down
- mova m5, m4
- punpcklbw m4, m6
- punpckhbw m5, m6
- paddsw m0, m4
- paddsw m3, m5
-%endif
- packuswb m0, m3
-
- add srcq, sstrideq
- add src1q, sstrideq
-%ifidn %1, v8_avg
- pavgb m0, [dstq]
-%endif
- mova [dstq], m0
- add dstq, dst_stride
- dec heightd
- jnz .loop
- REP_RET
-
-%else
- ; ARCH_X86_64
- dec heightd
-
- movu m1, [srcq ] ;A
- movu m3, [srcq + sstrideq ] ;B
- lea srcq, [srcq + sstrideq * 2]
- punpcklbw m0, m1, m3 ;A B
- punpckhbw m1, m3 ;A B
- movu m5, [srcq] ;C
- punpcklbw m2, m3, m5 ;A B next iter
- punpckhbw m3, m5 ;A B next iter
- mova tmp0, m2 ;store to stack
- mova tmp1, m3 ;store to stack
- movu m7, [srcq + sstrideq] ;D
- lea srcq, [srcq + sstrideq * 2]
- punpcklbw m4, m5, m7 ;C D
- punpckhbw m5, m7 ;C D
- movu m9, [srcq] ;E
- punpcklbw m6, m7, m9 ;C D next iter
- punpckhbw m7, m9 ;C D next iter
- movu m11, [srcq + sstrideq] ;F
- lea srcq, [srcq + sstrideq * 2]
- punpcklbw m8, m9, m11 ;E F
- punpckhbw m9, m11 ;E F
- movu m2, [srcq] ;G
- punpcklbw m10, m11, m2 ;E F next iter
- punpckhbw m11, m2 ;E F next iter
-
-.loop:
- ;Do two rows at once
- pmaddubsw m13, m0, k0k1
- mova m0, m4
- pmaddubsw m14, m8, k4k5
- pmaddubsw m15, m4, k2k3
- mova m4, m8
- paddsw m13, m14
- movu m3, [srcq + sstrideq] ;H
- lea srcq, [srcq + sstrideq * 2]
- punpcklbw m14, m2, m3 ;G H
- mova m8, m14
- pmaddubsw m14, k6k7
- paddsw m15, m14
- paddsw m13, m15
- paddsw m13, krd
- psraw m13, 7
-
- pmaddubsw m14, m1, k0k1
- pmaddubsw m1, m9, k4k5
- pmaddubsw m15, m5, k2k3
- paddsw m14, m1
- mova m1, m5
- mova m5, m9
- punpckhbw m2, m3 ;G H
- mova m9, m2
- pmaddubsw m2, k6k7
- paddsw m15, m2
- paddsw m14, m15
- paddsw m14, krd
- psraw m14, 7
- packuswb m13, m14
-%ifidn %1, v8_avg
- pavgb m13, [dstq]
-%endif
- mova [dstq], m13
-
- ; next iter
- pmaddubsw m15, tmp0, k0k1
- pmaddubsw m14, m10, k4k5
- pmaddubsw m13, m6, k2k3
- paddsw m15, m14
- mova tmp0, m6
- mova m6, m10
- movu m2, [srcq] ;G next iter
- punpcklbw m14, m3, m2 ;G H next iter
- mova m10, m14
- pmaddubsw m14, k6k7
- paddsw m13, m14
- paddsw m15, m13
- paddsw m15, krd
- psraw m15, 7
-
- pmaddubsw m14, tmp1, k0k1
- mova tmp1, m7
- pmaddubsw m13, m7, k2k3
- mova m7, m11
- pmaddubsw m11, k4k5
- paddsw m14, m11
- punpckhbw m3, m2 ;G H next iter
- mova m11, m3
- pmaddubsw m3, k6k7
- paddsw m13, m3
- paddsw m14, m13
- paddsw m14, krd
- psraw m14, 7
- packuswb m15, m14
-%ifidn %1, v8_avg
- pavgb m15, [dstq + dstrideq]
-%endif
- mova [dstq + dstrideq], m15
- lea dstq, [dstq + dstrideq * 2]
- sub heightd, 2
- jg .loop
-
- ; Do last row if output_height is odd
- jne .done
-
- movu m3, [srcq + sstrideq] ;H
- punpcklbw m6, m2, m3 ;G H
- punpckhbw m2, m3 ;G H
- pmaddubsw m0, k0k1
- pmaddubsw m1, k0k1
- pmaddubsw m4, k2k3
- pmaddubsw m5, k2k3
- pmaddubsw m8, k4k5
- pmaddubsw m9, k4k5
- pmaddubsw m6, k6k7
- pmaddubsw m2, k6k7
- paddsw m0, m8
- paddsw m1, m9
- paddsw m4, m6
- paddsw m5, m2
- paddsw m0, m4
- paddsw m1, m5
- paddsw m0, krd
- paddsw m1, krd
- psraw m0, 7
- psraw m1, 7
- packuswb m0, m1
-%ifidn %1, v8_avg
- pavgb m0, [dstq]
-%endif
- mova [dstq], m0
-
-.done:
- REP_RET
-
-%endif ; ARCH_X86_64
-
-%endm
-
-INIT_XMM ssse3
-SUBPIX_VFILTER16 v8
-SUBPIX_VFILTER v8, 8
-SUBPIX_VFILTER v8, 4
diff --git a/third_party/aom/aom_dsp/x86/aom_subpixel_bilinear_sse2.asm b/third_party/aom/aom_dsp/x86/aom_subpixel_bilinear_sse2.asm
deleted file mode 100644
index d0b4b2839..000000000
--- a/third_party/aom/aom_dsp/x86/aom_subpixel_bilinear_sse2.asm
+++ /dev/null
@@ -1,295 +0,0 @@
-;
-; Copyright (c) 2016, Alliance for Open Media. All rights reserved
-;
-; This source code is subject to the terms of the BSD 2 Clause License and
-; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-; was not distributed with this source code in the LICENSE file, you can
-; obtain it at www.aomedia.org/license/software. If the Alliance for Open
-; Media Patent License 1.0 was not distributed with this source code in the
-; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-;
-
-;
-
-%include "aom_ports/x86_abi_support.asm"
-
-%macro GET_PARAM_4 0
- mov rdx, arg(5) ;filter ptr
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;output_ptr
- mov rcx, 0x0400040
-
- movdqa xmm3, [rdx] ;load filters
- pshuflw xmm4, xmm3, 11111111b ;k3
- psrldq xmm3, 8
- pshuflw xmm3, xmm3, 0b ;k4
- punpcklqdq xmm4, xmm3 ;k3k4
-
- movq xmm3, rcx ;rounding
- pshufd xmm3, xmm3, 0
-
- pxor xmm2, xmm2
-
- movsxd rax, DWORD PTR arg(1) ;pixels_per_line
- movsxd rdx, DWORD PTR arg(3) ;out_pitch
- movsxd rcx, DWORD PTR arg(4) ;output_height
-%endm
-
-%macro APPLY_FILTER_4 1
-
- punpckldq xmm0, xmm1 ;two row in one register
- punpcklbw xmm0, xmm2 ;unpack to word
- pmullw xmm0, xmm4 ;multiply the filter factors
-
- movdqa xmm1, xmm0
- psrldq xmm1, 8
- paddsw xmm0, xmm1
-
- paddsw xmm0, xmm3 ;rounding
- psraw xmm0, 7 ;shift
- packuswb xmm0, xmm0 ;pack to byte
-
-%if %1
- movd xmm1, [rdi]
- pavgb xmm0, xmm1
-%endif
-
- movd [rdi], xmm0
- lea rsi, [rsi + rax]
- lea rdi, [rdi + rdx]
- dec rcx
-%endm
-
-%macro GET_PARAM 0
- mov rdx, arg(5) ;filter ptr
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;output_ptr
- mov rcx, 0x0400040
-
- movdqa xmm7, [rdx] ;load filters
-
- pshuflw xmm6, xmm7, 11111111b ;k3
- pshufhw xmm7, xmm7, 0b ;k4
- punpcklwd xmm6, xmm6
- punpckhwd xmm7, xmm7
-
- movq xmm4, rcx ;rounding
- pshufd xmm4, xmm4, 0
-
- pxor xmm5, xmm5
-
- movsxd rax, DWORD PTR arg(1) ;pixels_per_line
- movsxd rdx, DWORD PTR arg(3) ;out_pitch
- movsxd rcx, DWORD PTR arg(4) ;output_height
-%endm
-
-%macro APPLY_FILTER_8 1
- punpcklbw xmm0, xmm5
- punpcklbw xmm1, xmm5
-
- pmullw xmm0, xmm6
- pmullw xmm1, xmm7
- paddsw xmm0, xmm1
- paddsw xmm0, xmm4 ;rounding
- psraw xmm0, 7 ;shift
- packuswb xmm0, xmm0 ;pack back to byte
-%if %1
- movq xmm1, [rdi]
- pavgb xmm0, xmm1
-%endif
- movq [rdi], xmm0 ;store the result
-
- lea rsi, [rsi + rax]
- lea rdi, [rdi + rdx]
- dec rcx
-%endm
-
-%macro APPLY_FILTER_16 1
- punpcklbw xmm0, xmm5
- punpcklbw xmm1, xmm5
- punpckhbw xmm2, xmm5
- punpckhbw xmm3, xmm5
-
- pmullw xmm0, xmm6
- pmullw xmm1, xmm7
- pmullw xmm2, xmm6
- pmullw xmm3, xmm7
-
- paddsw xmm0, xmm1
- paddsw xmm2, xmm3
-
- paddsw xmm0, xmm4 ;rounding
- paddsw xmm2, xmm4
- psraw xmm0, 7 ;shift
- psraw xmm2, 7
- packuswb xmm0, xmm2 ;pack back to byte
-%if %1
- movdqu xmm1, [rdi]
- pavgb xmm0, xmm1
-%endif
- movdqu [rdi], xmm0 ;store the result
-
- lea rsi, [rsi + rax]
- lea rdi, [rdi + rdx]
- dec rcx
-%endm
-
-SECTION .text
-
-global sym(aom_filter_block1d4_v2_sse2) PRIVATE
-sym(aom_filter_block1d4_v2_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- push rsi
- push rdi
- ; end prolog
-
- GET_PARAM_4
-.loop:
- movd xmm0, [rsi] ;load src
- movd xmm1, [rsi + rax]
-
- APPLY_FILTER_4 0
- jnz .loop
-
- ; begin epilog
- pop rdi
- pop rsi
- UNSHADOW_ARGS
- pop rbp
- ret
-
-global sym(aom_filter_block1d8_v2_sse2) PRIVATE
-sym(aom_filter_block1d8_v2_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
- push rsi
- push rdi
- ; end prolog
-
- GET_PARAM
-.loop:
- movq xmm0, [rsi] ;0
- movq xmm1, [rsi + rax] ;1
-
- APPLY_FILTER_8 0
- jnz .loop
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-global sym(aom_filter_block1d16_v2_sse2) PRIVATE
-sym(aom_filter_block1d16_v2_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
- push rsi
- push rdi
- ; end prolog
-
- GET_PARAM
-.loop:
- movdqu xmm0, [rsi] ;0
- movdqu xmm1, [rsi + rax] ;1
- movdqa xmm2, xmm0
- movdqa xmm3, xmm1
-
- APPLY_FILTER_16 0
- jnz .loop
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-global sym(aom_filter_block1d4_h2_sse2) PRIVATE
-sym(aom_filter_block1d4_h2_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- push rsi
- push rdi
- ; end prolog
-
- GET_PARAM_4
-.loop:
- movdqu xmm0, [rsi] ;load src
- movdqa xmm1, xmm0
- psrldq xmm1, 1
-
- APPLY_FILTER_4 0
- jnz .loop
-
- ; begin epilog
- pop rdi
- pop rsi
- UNSHADOW_ARGS
- pop rbp
- ret
-
-global sym(aom_filter_block1d8_h2_sse2) PRIVATE
-sym(aom_filter_block1d8_h2_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
- push rsi
- push rdi
- ; end prolog
-
- GET_PARAM
-.loop:
- movdqu xmm0, [rsi] ;load src
- movdqa xmm1, xmm0
- psrldq xmm1, 1
-
- APPLY_FILTER_8 0
- jnz .loop
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-global sym(aom_filter_block1d16_h2_sse2) PRIVATE
-sym(aom_filter_block1d16_h2_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
- push rsi
- push rdi
- ; end prolog
-
- GET_PARAM
-.loop:
- movdqu xmm0, [rsi] ;load src
- movdqu xmm1, [rsi + 1]
- movdqa xmm2, xmm0
- movdqa xmm3, xmm1
-
- APPLY_FILTER_16 0
- jnz .loop
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
diff --git a/third_party/aom/aom_dsp/x86/aom_subpixel_bilinear_ssse3.asm b/third_party/aom/aom_dsp/x86/aom_subpixel_bilinear_ssse3.asm
deleted file mode 100644
index 59edc49a9..000000000
--- a/third_party/aom/aom_dsp/x86/aom_subpixel_bilinear_ssse3.asm
+++ /dev/null
@@ -1,267 +0,0 @@
-;
-; Copyright (c) 2016, Alliance for Open Media. All rights reserved
-;
-; This source code is subject to the terms of the BSD 2 Clause License and
-; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-; was not distributed with this source code in the LICENSE file, you can
-; obtain it at www.aomedia.org/license/software. If the Alliance for Open
-; Media Patent License 1.0 was not distributed with this source code in the
-; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-;
-
-;
-
-%include "aom_ports/x86_abi_support.asm"
-
-%macro GET_PARAM_4 0
- mov rdx, arg(5) ;filter ptr
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;output_ptr
- mov ecx, 0x01000100
-
- movdqa xmm3, [rdx] ;load filters
- psrldq xmm3, 6
- packsswb xmm3, xmm3
- pshuflw xmm3, xmm3, 0b ;k3_k4
-
- movd xmm2, ecx ;rounding_shift
- pshufd xmm2, xmm2, 0
-
- movsxd rax, DWORD PTR arg(1) ;pixels_per_line
- movsxd rdx, DWORD PTR arg(3) ;out_pitch
- movsxd rcx, DWORD PTR arg(4) ;output_height
-%endm
-
-%macro APPLY_FILTER_4 1
- punpcklbw xmm0, xmm1
- pmaddubsw xmm0, xmm3
-
- pmulhrsw xmm0, xmm2 ;rounding(+64)+shift(>>7)
- packuswb xmm0, xmm0 ;pack to byte
-
-%if %1
- movd xmm1, [rdi]
- pavgb xmm0, xmm1
-%endif
- movd [rdi], xmm0
- lea rsi, [rsi + rax]
- lea rdi, [rdi + rdx]
- dec rcx
-%endm
-
-%macro GET_PARAM 0
- mov rdx, arg(5) ;filter ptr
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;output_ptr
- mov ecx, 0x01000100
-
- movdqa xmm7, [rdx] ;load filters
- psrldq xmm7, 6
- packsswb xmm7, xmm7
- pshuflw xmm7, xmm7, 0b ;k3_k4
- punpcklwd xmm7, xmm7
-
- movd xmm6, ecx ;rounding_shift
- pshufd xmm6, xmm6, 0
-
- movsxd rax, DWORD PTR arg(1) ;pixels_per_line
- movsxd rdx, DWORD PTR arg(3) ;out_pitch
- movsxd rcx, DWORD PTR arg(4) ;output_height
-%endm
-
-%macro APPLY_FILTER_8 1
- punpcklbw xmm0, xmm1
- pmaddubsw xmm0, xmm7
-
- pmulhrsw xmm0, xmm6 ;rounding(+64)+shift(>>7)
- packuswb xmm0, xmm0 ;pack back to byte
-
-%if %1
- movq xmm1, [rdi]
- pavgb xmm0, xmm1
-%endif
- movq [rdi], xmm0 ;store the result
-
- lea rsi, [rsi + rax]
- lea rdi, [rdi + rdx]
- dec rcx
-%endm
-
-%macro APPLY_FILTER_16 1
- punpcklbw xmm0, xmm1
- punpckhbw xmm2, xmm1
- pmaddubsw xmm0, xmm7
- pmaddubsw xmm2, xmm7
-
- pmulhrsw xmm0, xmm6 ;rounding(+64)+shift(>>7)
- pmulhrsw xmm2, xmm6
- packuswb xmm0, xmm2 ;pack back to byte
-
-%if %1
- movdqu xmm1, [rdi]
- pavgb xmm0, xmm1
-%endif
- movdqu [rdi], xmm0 ;store the result
-
- lea rsi, [rsi + rax]
- lea rdi, [rdi + rdx]
- dec rcx
-%endm
-
-SECTION .text
-
-global sym(aom_filter_block1d4_v2_ssse3) PRIVATE
-sym(aom_filter_block1d4_v2_ssse3):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- push rsi
- push rdi
- ; end prolog
-
- GET_PARAM_4
-.loop:
- movd xmm0, [rsi] ;load src
- movd xmm1, [rsi + rax]
-
- APPLY_FILTER_4 0
- jnz .loop
-
- ; begin epilog
- pop rdi
- pop rsi
- UNSHADOW_ARGS
- pop rbp
- ret
-
-global sym(aom_filter_block1d8_v2_ssse3) PRIVATE
-sym(aom_filter_block1d8_v2_ssse3):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
- push rsi
- push rdi
- ; end prolog
-
- GET_PARAM
-.loop:
- movq xmm0, [rsi] ;0
- movq xmm1, [rsi + rax] ;1
-
- APPLY_FILTER_8 0
- jnz .loop
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-global sym(aom_filter_block1d16_v2_ssse3) PRIVATE
-sym(aom_filter_block1d16_v2_ssse3):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
- push rsi
- push rdi
- ; end prolog
-
- GET_PARAM
-.loop:
- movdqu xmm0, [rsi] ;0
- movdqu xmm1, [rsi + rax] ;1
- movdqa xmm2, xmm0
-
- APPLY_FILTER_16 0
- jnz .loop
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-global sym(aom_filter_block1d4_h2_ssse3) PRIVATE
-sym(aom_filter_block1d4_h2_ssse3):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- push rsi
- push rdi
- ; end prolog
-
- GET_PARAM_4
-.loop:
- movdqu xmm0, [rsi] ;load src
- movdqa xmm1, xmm0
- psrldq xmm1, 1
-
- APPLY_FILTER_4 0
- jnz .loop
-
- ; begin epilog
- pop rdi
- pop rsi
- UNSHADOW_ARGS
- pop rbp
- ret
-
-global sym(aom_filter_block1d8_h2_ssse3) PRIVATE
-sym(aom_filter_block1d8_h2_ssse3):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
- push rsi
- push rdi
- ; end prolog
-
- GET_PARAM
-.loop:
- movdqu xmm0, [rsi] ;load src
- movdqa xmm1, xmm0
- psrldq xmm1, 1
-
- APPLY_FILTER_8 0
- jnz .loop
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-global sym(aom_filter_block1d16_h2_ssse3) PRIVATE
-sym(aom_filter_block1d16_h2_ssse3):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
- push rsi
- push rdi
- ; end prolog
-
- GET_PARAM
-.loop:
- movdqu xmm0, [rsi] ;load src
- movdqu xmm1, [rsi + 1]
- movdqa xmm2, xmm0
-
- APPLY_FILTER_16 0
- jnz .loop
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
diff --git a/third_party/aom/aom_dsp/x86/blend_a64_hmask_sse4.c b/third_party/aom/aom_dsp/x86/blend_a64_hmask_sse4.c
deleted file mode 100644
index 4f5e3f8c1..000000000
--- a/third_party/aom/aom_dsp/x86/blend_a64_hmask_sse4.c
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "aom/aom_integer.h"
-
-#include "config/aom_dsp_rtcd.h"
-
-// To start out, just dispatch to the function using the 2D mask and
-// pass mask stride as 0. This can be improved upon if necessary.
-
-void aom_blend_a64_hmask_sse4_1(uint8_t *dst, uint32_t dst_stride,
- const uint8_t *src0, uint32_t src0_stride,
- const uint8_t *src1, uint32_t src1_stride,
- const uint8_t *mask, int w, int h) {
- aom_blend_a64_mask_sse4_1(dst, dst_stride, src0, src0_stride, src1,
- src1_stride, mask, 0, w, h, 0, 0);
-}
-
-void aom_highbd_blend_a64_hmask_sse4_1(
- uint8_t *dst_8, uint32_t dst_stride, const uint8_t *src0_8,
- uint32_t src0_stride, const uint8_t *src1_8, uint32_t src1_stride,
- const uint8_t *mask, int w, int h, int bd) {
- aom_highbd_blend_a64_mask_sse4_1(dst_8, dst_stride, src0_8, src0_stride,
- src1_8, src1_stride, mask, 0, w, h, 0, 0,
- bd);
-}
diff --git a/third_party/aom/aom_dsp/x86/blend_a64_mask_avx2.c b/third_party/aom/aom_dsp/x86/blend_a64_mask_avx2.c
deleted file mode 100644
index 67fb4d32b..000000000
--- a/third_party/aom/aom_dsp/x86/blend_a64_mask_avx2.c
+++ /dev/null
@@ -1,900 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <smmintrin.h> // SSE4.1
-#include <immintrin.h> // AVX2
-
-#include <assert.h>
-
-#include "aom/aom_integer.h"
-#include "aom_ports/mem.h"
-#include "aom_dsp/aom_dsp_common.h"
-
-#include "aom_dsp/x86/synonyms.h"
-#include "aom_dsp/x86/synonyms_avx2.h"
-#include "aom_dsp/x86/blend_sse4.h"
-#include "aom_dsp/x86/blend_mask_sse4.h"
-
-#include "config/aom_dsp_rtcd.h"
-
-static INLINE void blend_a64_d16_mask_w16_avx2(
- uint8_t *dst, const CONV_BUF_TYPE *src0, const CONV_BUF_TYPE *src1,
- const __m256i *m0, const __m256i *v_round_offset, const __m256i *v_maxval,
- int shift) {
- const __m256i max_minus_m0 = _mm256_sub_epi16(*v_maxval, *m0);
- const __m256i s0_0 = yy_loadu_256(src0);
- const __m256i s1_0 = yy_loadu_256(src1);
- __m256i res0_lo = _mm256_madd_epi16(_mm256_unpacklo_epi16(s0_0, s1_0),
- _mm256_unpacklo_epi16(*m0, max_minus_m0));
- __m256i res0_hi = _mm256_madd_epi16(_mm256_unpackhi_epi16(s0_0, s1_0),
- _mm256_unpackhi_epi16(*m0, max_minus_m0));
- res0_lo =
- _mm256_srai_epi32(_mm256_sub_epi32(res0_lo, *v_round_offset), shift);
- res0_hi =
- _mm256_srai_epi32(_mm256_sub_epi32(res0_hi, *v_round_offset), shift);
- const __m256i res0 = _mm256_packs_epi32(res0_lo, res0_hi);
- __m256i res = _mm256_packus_epi16(res0, res0);
- res = _mm256_permute4x64_epi64(res, 0xd8);
- _mm_storeu_si128((__m128i *)(dst), _mm256_castsi256_si128(res));
-}
-
-static INLINE void blend_a64_d16_mask_w32_avx2(
- uint8_t *dst, const CONV_BUF_TYPE *src0, const CONV_BUF_TYPE *src1,
- const __m256i *m0, const __m256i *m1, const __m256i *v_round_offset,
- const __m256i *v_maxval, int shift) {
- const __m256i max_minus_m0 = _mm256_sub_epi16(*v_maxval, *m0);
- const __m256i max_minus_m1 = _mm256_sub_epi16(*v_maxval, *m1);
- const __m256i s0_0 = yy_loadu_256(src0);
- const __m256i s0_1 = yy_loadu_256(src0 + 16);
- const __m256i s1_0 = yy_loadu_256(src1);
- const __m256i s1_1 = yy_loadu_256(src1 + 16);
- __m256i res0_lo = _mm256_madd_epi16(_mm256_unpacklo_epi16(s0_0, s1_0),
- _mm256_unpacklo_epi16(*m0, max_minus_m0));
- __m256i res0_hi = _mm256_madd_epi16(_mm256_unpackhi_epi16(s0_0, s1_0),
- _mm256_unpackhi_epi16(*m0, max_minus_m0));
- __m256i res1_lo = _mm256_madd_epi16(_mm256_unpacklo_epi16(s0_1, s1_1),
- _mm256_unpacklo_epi16(*m1, max_minus_m1));
- __m256i res1_hi = _mm256_madd_epi16(_mm256_unpackhi_epi16(s0_1, s1_1),
- _mm256_unpackhi_epi16(*m1, max_minus_m1));
- res0_lo =
- _mm256_srai_epi32(_mm256_sub_epi32(res0_lo, *v_round_offset), shift);
- res0_hi =
- _mm256_srai_epi32(_mm256_sub_epi32(res0_hi, *v_round_offset), shift);
- res1_lo =
- _mm256_srai_epi32(_mm256_sub_epi32(res1_lo, *v_round_offset), shift);
- res1_hi =
- _mm256_srai_epi32(_mm256_sub_epi32(res1_hi, *v_round_offset), shift);
- const __m256i res0 = _mm256_packs_epi32(res0_lo, res0_hi);
- const __m256i res1 = _mm256_packs_epi32(res1_lo, res1_hi);
- __m256i res = _mm256_packus_epi16(res0, res1);
- res = _mm256_permute4x64_epi64(res, 0xd8);
- _mm256_storeu_si256((__m256i *)(dst), res);
-}
-
-static INLINE void lowbd_blend_a64_d16_mask_subw0_subh0_w16_avx2(
- uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0,
- uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int h,
- const __m256i *round_offset, int shift) {
- const __m256i v_maxval = _mm256_set1_epi16(AOM_BLEND_A64_MAX_ALPHA);
- for (int i = 0; i < h; ++i) {
- const __m128i m = xx_loadu_128(mask);
- const __m256i m0 = _mm256_cvtepu8_epi16(m);
-
- blend_a64_d16_mask_w16_avx2(dst, src0, src1, &m0, round_offset, &v_maxval,
- shift);
- mask += mask_stride;
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- }
-}
-
-static INLINE void lowbd_blend_a64_d16_mask_subw0_subh0_w32_avx2(
- uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0,
- uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int h, int w,
- const __m256i *round_offset, int shift) {
- const __m256i v_maxval = _mm256_set1_epi16(AOM_BLEND_A64_MAX_ALPHA);
- for (int i = 0; i < h; ++i) {
- for (int j = 0; j < w; j += 32) {
- const __m256i m = yy_loadu_256(mask + j);
- const __m256i m0 = _mm256_cvtepu8_epi16(_mm256_castsi256_si128(m));
- const __m256i m1 = _mm256_cvtepu8_epi16(_mm256_extracti128_si256(m, 1));
-
- blend_a64_d16_mask_w32_avx2(dst + j, src0 + j, src1 + j, &m0, &m1,
- round_offset, &v_maxval, shift);
- }
- mask += mask_stride;
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- }
-}
-
-static INLINE void lowbd_blend_a64_d16_mask_subw1_subh1_w16_avx2(
- uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0,
- uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int h,
- const __m256i *round_offset, int shift) {
- const __m256i v_maxval = _mm256_set1_epi16(AOM_BLEND_A64_MAX_ALPHA);
- const __m256i one_b = _mm256_set1_epi8(1);
- const __m256i two_w = _mm256_set1_epi16(2);
- for (int i = 0; i < h; ++i) {
- const __m256i m_i00 = yy_loadu_256(mask);
- const __m256i m_i10 = yy_loadu_256(mask + mask_stride);
-
- const __m256i m0_ac = _mm256_adds_epu8(m_i00, m_i10);
- const __m256i m0_acbd = _mm256_maddubs_epi16(m0_ac, one_b);
- const __m256i m0 = _mm256_srli_epi16(_mm256_add_epi16(m0_acbd, two_w), 2);
-
- blend_a64_d16_mask_w16_avx2(dst, src0, src1, &m0, round_offset, &v_maxval,
- shift);
- mask += mask_stride << 1;
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- }
-}
-
-static INLINE void lowbd_blend_a64_d16_mask_subw1_subh1_w32_avx2(
- uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0,
- uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int h, int w,
- const __m256i *round_offset, int shift) {
- const __m256i v_maxval = _mm256_set1_epi16(AOM_BLEND_A64_MAX_ALPHA);
- const __m256i one_b = _mm256_set1_epi8(1);
- const __m256i two_w = _mm256_set1_epi16(2);
- for (int i = 0; i < h; ++i) {
- for (int j = 0; j < w; j += 32) {
- const __m256i m_i00 = yy_loadu_256(mask + 2 * j);
- const __m256i m_i01 = yy_loadu_256(mask + 2 * j + 32);
- const __m256i m_i10 = yy_loadu_256(mask + mask_stride + 2 * j);
- const __m256i m_i11 = yy_loadu_256(mask + mask_stride + 2 * j + 32);
-
- const __m256i m0_ac = _mm256_adds_epu8(m_i00, m_i10);
- const __m256i m1_ac = _mm256_adds_epu8(m_i01, m_i11);
- const __m256i m0_acbd = _mm256_maddubs_epi16(m0_ac, one_b);
- const __m256i m1_acbd = _mm256_maddubs_epi16(m1_ac, one_b);
- const __m256i m0 = _mm256_srli_epi16(_mm256_add_epi16(m0_acbd, two_w), 2);
- const __m256i m1 = _mm256_srli_epi16(_mm256_add_epi16(m1_acbd, two_w), 2);
-
- blend_a64_d16_mask_w32_avx2(dst + j, src0 + j, src1 + j, &m0, &m1,
- round_offset, &v_maxval, shift);
- }
- mask += mask_stride << 1;
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- }
-}
-
-static INLINE void lowbd_blend_a64_d16_mask_subw1_subh0_w16_avx2(
- uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0,
- uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int h, int w,
- const __m256i *round_offset, int shift) {
- const __m256i v_maxval = _mm256_set1_epi16(AOM_BLEND_A64_MAX_ALPHA);
- const __m256i one_b = _mm256_set1_epi8(1);
- const __m256i zeros = _mm256_setzero_si256();
- for (int i = 0; i < h; ++i) {
- for (int j = 0; j < w; j += 16) {
- const __m256i m_i00 = yy_loadu_256(mask + 2 * j);
- const __m256i m0_ac = _mm256_maddubs_epi16(m_i00, one_b);
- const __m256i m0 = _mm256_avg_epu16(m0_ac, zeros);
-
- blend_a64_d16_mask_w16_avx2(dst + j, src0 + j, src1 + j, &m0,
- round_offset, &v_maxval, shift);
- }
- mask += mask_stride;
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- }
-}
-
-static INLINE void lowbd_blend_a64_d16_mask_subw1_subh0_w32_avx2(
- uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0,
- uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int h, int w,
- const __m256i *round_offset, int shift) {
- const __m256i v_maxval = _mm256_set1_epi16(AOM_BLEND_A64_MAX_ALPHA);
- const __m256i one_b = _mm256_set1_epi8(1);
- const __m256i zeros = _mm256_setzero_si256();
- for (int i = 0; i < h; ++i) {
- for (int j = 0; j < w; j += 32) {
- const __m256i m_i00 = yy_loadu_256(mask + 2 * j);
- const __m256i m_i01 = yy_loadu_256(mask + 2 * j + 32);
- const __m256i m0_ac = _mm256_maddubs_epi16(m_i00, one_b);
- const __m256i m1_ac = _mm256_maddubs_epi16(m_i01, one_b);
- const __m256i m0 = _mm256_avg_epu16(m0_ac, zeros);
- const __m256i m1 = _mm256_avg_epu16(m1_ac, zeros);
-
- blend_a64_d16_mask_w32_avx2(dst + j, src0 + j, src1 + j, &m0, &m1,
- round_offset, &v_maxval, shift);
- }
- mask += mask_stride;
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- }
-}
-
-static INLINE void lowbd_blend_a64_d16_mask_subw0_subh1_w16_avx2(
- uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0,
- uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int h, int w,
- const __m256i *round_offset, int shift) {
- const __m256i v_maxval = _mm256_set1_epi16(AOM_BLEND_A64_MAX_ALPHA);
- const __m128i zeros = _mm_setzero_si128();
- for (int i = 0; i < h; ++i) {
- for (int j = 0; j < w; j += 16) {
- const __m128i m_i00 = xx_loadu_128(mask + j);
- const __m128i m_i10 = xx_loadu_128(mask + mask_stride + j);
-
- const __m128i m_ac = _mm_avg_epu8(_mm_adds_epu8(m_i00, m_i10), zeros);
- const __m256i m0 = _mm256_cvtepu8_epi16(m_ac);
-
- blend_a64_d16_mask_w16_avx2(dst + j, src0 + j, src1 + j, &m0,
- round_offset, &v_maxval, shift);
- }
- mask += mask_stride << 1;
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- }
-}
-
-static INLINE void lowbd_blend_a64_d16_mask_subw0_subh1_w32_avx2(
- uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0,
- uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int h, int w,
- const __m256i *round_offset, int shift) {
- const __m256i v_maxval = _mm256_set1_epi16(AOM_BLEND_A64_MAX_ALPHA);
- const __m256i zeros = _mm256_setzero_si256();
- for (int i = 0; i < h; ++i) {
- for (int j = 0; j < w; j += 32) {
- const __m256i m_i00 = yy_loadu_256(mask + j);
- const __m256i m_i10 = yy_loadu_256(mask + mask_stride + j);
-
- const __m256i m_ac =
- _mm256_avg_epu8(_mm256_adds_epu8(m_i00, m_i10), zeros);
- const __m256i m0 = _mm256_cvtepu8_epi16(_mm256_castsi256_si128(m_ac));
- const __m256i m1 =
- _mm256_cvtepu8_epi16(_mm256_extracti128_si256(m_ac, 1));
-
- blend_a64_d16_mask_w32_avx2(dst + j, src0 + j, src1 + j, &m0, &m1,
- round_offset, &v_maxval, shift);
- }
- mask += mask_stride << 1;
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- }
-}
-
-void aom_lowbd_blend_a64_d16_mask_avx2(
- uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0,
- uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh,
- ConvolveParams *conv_params) {
- const int bd = 8;
- const int round_bits =
- 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
-
- const int round_offset =
- ((1 << (round_bits + bd)) + (1 << (round_bits + bd - 1)) -
- (1 << (round_bits - 1)))
- << AOM_BLEND_A64_ROUND_BITS;
-
- const int shift = round_bits + AOM_BLEND_A64_ROUND_BITS;
- assert(IMPLIES((void *)src0 == dst, src0_stride == dst_stride));
- assert(IMPLIES((void *)src1 == dst, src1_stride == dst_stride));
-
- assert(h >= 4);
- assert(w >= 4);
- assert(IS_POWER_OF_TWO(h));
- assert(IS_POWER_OF_TWO(w));
- const __m128i v_round_offset = _mm_set1_epi32(round_offset);
- const __m256i y_round_offset = _mm256_set1_epi32(round_offset);
-
- if (subw == 0 && subh == 0) {
- switch (w) {
- case 4:
- aom_lowbd_blend_a64_d16_mask_subw0_subh0_w4_sse4_1(
- dst, dst_stride, src0, src0_stride, src1, src1_stride, mask,
- mask_stride, h, &v_round_offset, shift);
- break;
- case 8:
- aom_lowbd_blend_a64_d16_mask_subw0_subh0_w8_sse4_1(
- dst, dst_stride, src0, src0_stride, src1, src1_stride, mask,
- mask_stride, h, &v_round_offset, shift);
- break;
- case 16:
- lowbd_blend_a64_d16_mask_subw0_subh0_w16_avx2(
- dst, dst_stride, src0, src0_stride, src1, src1_stride, mask,
- mask_stride, h, &y_round_offset, shift);
- break;
- default:
- lowbd_blend_a64_d16_mask_subw0_subh0_w32_avx2(
- dst, dst_stride, src0, src0_stride, src1, src1_stride, mask,
- mask_stride, h, w, &y_round_offset, shift);
- break;
- }
- } else if (subw == 1 && subh == 1) {
- switch (w) {
- case 4:
- aom_lowbd_blend_a64_d16_mask_subw1_subh1_w4_sse4_1(
- dst, dst_stride, src0, src0_stride, src1, src1_stride, mask,
- mask_stride, h, &v_round_offset, shift);
- break;
- case 8:
- aom_lowbd_blend_a64_d16_mask_subw1_subh1_w8_sse4_1(
- dst, dst_stride, src0, src0_stride, src1, src1_stride, mask,
- mask_stride, h, &v_round_offset, shift);
- break;
- case 16:
- lowbd_blend_a64_d16_mask_subw1_subh1_w16_avx2(
- dst, dst_stride, src0, src0_stride, src1, src1_stride, mask,
- mask_stride, h, &y_round_offset, shift);
- break;
- default:
- lowbd_blend_a64_d16_mask_subw1_subh1_w32_avx2(
- dst, dst_stride, src0, src0_stride, src1, src1_stride, mask,
- mask_stride, h, w, &y_round_offset, shift);
- break;
- }
- } else if (subw == 1 && subh == 0) {
- switch (w) {
- case 4:
- aom_lowbd_blend_a64_d16_mask_subw1_subh0_w4_sse4_1(
- dst, dst_stride, src0, src0_stride, src1, src1_stride, mask,
- mask_stride, h, &v_round_offset, shift);
- break;
- case 8:
- aom_lowbd_blend_a64_d16_mask_subw1_subh0_w8_sse4_1(
- dst, dst_stride, src0, src0_stride, src1, src1_stride, mask,
- mask_stride, h, &v_round_offset, shift);
- break;
- case 16:
- lowbd_blend_a64_d16_mask_subw1_subh0_w16_avx2(
- dst, dst_stride, src0, src0_stride, src1, src1_stride, mask,
- mask_stride, h, w, &y_round_offset, shift);
- break;
- default:
- lowbd_blend_a64_d16_mask_subw1_subh0_w32_avx2(
- dst, dst_stride, src0, src0_stride, src1, src1_stride, mask,
- mask_stride, h, w, &y_round_offset, shift);
- break;
- }
- } else {
- switch (w) {
- case 4:
- aom_lowbd_blend_a64_d16_mask_subw0_subh1_w4_sse4_1(
- dst, dst_stride, src0, src0_stride, src1, src1_stride, mask,
- mask_stride, h, &v_round_offset, shift);
- break;
- case 8:
- aom_lowbd_blend_a64_d16_mask_subw0_subh1_w8_sse4_1(
- dst, dst_stride, src0, src0_stride, src1, src1_stride, mask,
- mask_stride, h, &v_round_offset, shift);
- break;
- case 16:
- lowbd_blend_a64_d16_mask_subw0_subh1_w16_avx2(
- dst, dst_stride, src0, src0_stride, src1, src1_stride, mask,
- mask_stride, h, w, &y_round_offset, shift);
- break;
- default:
- lowbd_blend_a64_d16_mask_subw0_subh1_w32_avx2(
- dst, dst_stride, src0, src0_stride, src1, src1_stride, mask,
- mask_stride, h, w, &y_round_offset, shift);
- break;
- }
- }
-}
-
-static INLINE __m256i blend_16_u8_avx2(const uint8_t *src0, const uint8_t *src1,
- const __m256i *v_m0_b,
- const __m256i *v_m1_b,
- const int32_t bits) {
- const __m256i v_s0_b = _mm256_castsi128_si256(xx_loadu_128(src0));
- const __m256i v_s1_b = _mm256_castsi128_si256(xx_loadu_128(src1));
- const __m256i v_s0_s_b = _mm256_permute4x64_epi64(v_s0_b, 0xd8);
- const __m256i v_s1_s_b = _mm256_permute4x64_epi64(v_s1_b, 0xd8);
-
- const __m256i v_p0_w =
- _mm256_maddubs_epi16(_mm256_unpacklo_epi8(v_s0_s_b, v_s1_s_b),
- _mm256_unpacklo_epi8(*v_m0_b, *v_m1_b));
-
- const __m256i v_res0_w = yy_roundn_epu16(v_p0_w, bits);
- const __m256i v_res_b = _mm256_packus_epi16(v_res0_w, v_res0_w);
- const __m256i v_res = _mm256_permute4x64_epi64(v_res_b, 0xd8);
- return v_res;
-}
-
-static INLINE __m256i blend_32_u8_avx2(const uint8_t *src0, const uint8_t *src1,
- const __m256i *v_m0_b,
- const __m256i *v_m1_b,
- const int32_t bits) {
- const __m256i v_s0_b = yy_loadu_256(src0);
- const __m256i v_s1_b = yy_loadu_256(src1);
-
- const __m256i v_p0_w =
- _mm256_maddubs_epi16(_mm256_unpacklo_epi8(v_s0_b, v_s1_b),
- _mm256_unpacklo_epi8(*v_m0_b, *v_m1_b));
- const __m256i v_p1_w =
- _mm256_maddubs_epi16(_mm256_unpackhi_epi8(v_s0_b, v_s1_b),
- _mm256_unpackhi_epi8(*v_m0_b, *v_m1_b));
-
- const __m256i v_res0_w = yy_roundn_epu16(v_p0_w, bits);
- const __m256i v_res1_w = yy_roundn_epu16(v_p1_w, bits);
- const __m256i v_res = _mm256_packus_epi16(v_res0_w, v_res1_w);
- return v_res;
-}
-
-static INLINE void blend_a64_mask_sx_sy_w16_avx2(
- uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
- uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int h) {
- const __m256i v_zmask_b = _mm256_set1_epi16(0xFF);
- const __m256i v_maxval_b = _mm256_set1_epi8(AOM_BLEND_A64_MAX_ALPHA);
- do {
- const __m256i v_ral_b = yy_loadu_256(mask);
- const __m256i v_rbl_b = yy_loadu_256(mask + mask_stride);
- const __m256i v_rvsl_b = _mm256_add_epi8(v_ral_b, v_rbl_b);
- const __m256i v_rvsal_w = _mm256_and_si256(v_rvsl_b, v_zmask_b);
- const __m256i v_rvsbl_w =
- _mm256_and_si256(_mm256_srli_si256(v_rvsl_b, 1), v_zmask_b);
- const __m256i v_rsl_w = _mm256_add_epi16(v_rvsal_w, v_rvsbl_w);
-
- const __m256i v_m0_w = yy_roundn_epu16(v_rsl_w, 2);
- const __m256i v_m0_b = _mm256_packus_epi16(v_m0_w, v_m0_w);
- const __m256i v_m1_b = _mm256_sub_epi8(v_maxval_b, v_m0_b);
-
- const __m256i y_res_b = blend_16_u8_avx2(src0, src1, &v_m0_b, &v_m1_b,
- AOM_BLEND_A64_ROUND_BITS);
-
- xx_storeu_128(dst, _mm256_castsi256_si128(y_res_b));
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- mask += 2 * mask_stride;
- } while (--h);
-}
-
-static INLINE void blend_a64_mask_sx_sy_w32n_avx2(
- uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
- uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int w, int h) {
- const __m256i v_maxval_b = _mm256_set1_epi8(AOM_BLEND_A64_MAX_ALPHA);
- const __m256i v_zmask_b = _mm256_set1_epi16(0xFF);
- do {
- int c;
- for (c = 0; c < w; c += 32) {
- const __m256i v_ral_b = yy_loadu_256(mask + 2 * c);
- const __m256i v_rah_b = yy_loadu_256(mask + 2 * c + 32);
- const __m256i v_rbl_b = yy_loadu_256(mask + mask_stride + 2 * c);
- const __m256i v_rbh_b = yy_loadu_256(mask + mask_stride + 2 * c + 32);
- const __m256i v_rvsl_b = _mm256_add_epi8(v_ral_b, v_rbl_b);
- const __m256i v_rvsh_b = _mm256_add_epi8(v_rah_b, v_rbh_b);
- const __m256i v_rvsal_w = _mm256_and_si256(v_rvsl_b, v_zmask_b);
- const __m256i v_rvsah_w = _mm256_and_si256(v_rvsh_b, v_zmask_b);
- const __m256i v_rvsbl_w =
- _mm256_and_si256(_mm256_srli_si256(v_rvsl_b, 1), v_zmask_b);
- const __m256i v_rvsbh_w =
- _mm256_and_si256(_mm256_srli_si256(v_rvsh_b, 1), v_zmask_b);
- const __m256i v_rsl_w = _mm256_add_epi16(v_rvsal_w, v_rvsbl_w);
- const __m256i v_rsh_w = _mm256_add_epi16(v_rvsah_w, v_rvsbh_w);
-
- const __m256i v_m0l_w = yy_roundn_epu16(v_rsl_w, 2);
- const __m256i v_m0h_w = yy_roundn_epu16(v_rsh_w, 2);
- const __m256i v_m0_b =
- _mm256_permute4x64_epi64(_mm256_packus_epi16(v_m0l_w, v_m0h_w), 0xd8);
- const __m256i v_m1_b = _mm256_sub_epi8(v_maxval_b, v_m0_b);
-
- const __m256i v_res_b = blend_32_u8_avx2(
- src0 + c, src1 + c, &v_m0_b, &v_m1_b, AOM_BLEND_A64_ROUND_BITS);
-
- yy_storeu_256(dst + c, v_res_b);
- }
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- mask += 2 * mask_stride;
- } while (--h);
-}
-
-static INLINE void blend_a64_mask_sx_sy_avx2(
- uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
- uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int w, int h) {
- const __m128i v_shuffle_b = xx_loadu_128(g_blend_a64_mask_shuffle);
- const __m128i v_maxval_b = _mm_set1_epi8(AOM_BLEND_A64_MAX_ALPHA);
- const __m128i _r = _mm_set1_epi16(1 << (15 - AOM_BLEND_A64_ROUND_BITS));
- switch (w) {
- case 4:
- do {
- const __m128i v_ra_b = xx_loadl_64(mask);
- const __m128i v_rb_b = xx_loadl_64(mask + mask_stride);
- const __m128i v_rvs_b = _mm_add_epi8(v_ra_b, v_rb_b);
- const __m128i v_r_s_b = _mm_shuffle_epi8(v_rvs_b, v_shuffle_b);
- const __m128i v_r0_s_w = _mm_cvtepu8_epi16(v_r_s_b);
- const __m128i v_r1_s_w = _mm_cvtepu8_epi16(_mm_srli_si128(v_r_s_b, 8));
- const __m128i v_rs_w = _mm_add_epi16(v_r0_s_w, v_r1_s_w);
- const __m128i v_m0_w = xx_roundn_epu16(v_rs_w, 2);
- const __m128i v_m0_b = _mm_packus_epi16(v_m0_w, v_m0_w);
- const __m128i v_m1_b = _mm_sub_epi8(v_maxval_b, v_m0_b);
-
- const __m128i v_res_b = blend_4_u8(src0, src1, &v_m0_b, &v_m1_b, &_r);
-
- xx_storel_32(dst, v_res_b);
-
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- mask += 2 * mask_stride;
- } while (--h);
- break;
- case 8:
- do {
- const __m128i v_ra_b = xx_loadu_128(mask);
- const __m128i v_rb_b = xx_loadu_128(mask + mask_stride);
- const __m128i v_rvs_b = _mm_add_epi8(v_ra_b, v_rb_b);
- const __m128i v_r_s_b = _mm_shuffle_epi8(v_rvs_b, v_shuffle_b);
- const __m128i v_r0_s_w = _mm_cvtepu8_epi16(v_r_s_b);
- const __m128i v_r1_s_w = _mm_cvtepu8_epi16(_mm_srli_si128(v_r_s_b, 8));
- const __m128i v_rs_w = _mm_add_epi16(v_r0_s_w, v_r1_s_w);
- const __m128i v_m0_w = xx_roundn_epu16(v_rs_w, 2);
- const __m128i v_m0_b = _mm_packus_epi16(v_m0_w, v_m0_w);
- const __m128i v_m1_b = _mm_sub_epi8(v_maxval_b, v_m0_b);
-
- const __m128i v_res_b = blend_8_u8(src0, src1, &v_m0_b, &v_m1_b, &_r);
-
- xx_storel_64(dst, v_res_b);
-
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- mask += 2 * mask_stride;
- } while (--h);
- break;
- case 16:
- blend_a64_mask_sx_sy_w16_avx2(dst, dst_stride, src0, src0_stride, src1,
- src1_stride, mask, mask_stride, h);
- break;
- default:
- blend_a64_mask_sx_sy_w32n_avx2(dst, dst_stride, src0, src0_stride, src1,
- src1_stride, mask, mask_stride, w, h);
- break;
- }
-}
-
-static INLINE void blend_a64_mask_sx_w16_avx2(
- uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
- uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int h) {
- const __m256i v_maxval_b = _mm256_set1_epi8(AOM_BLEND_A64_MAX_ALPHA);
- const __m256i v_zmask_b = _mm256_set1_epi16(0xff);
- do {
- const __m256i v_rl_b = yy_loadu_256(mask);
- const __m256i v_al_b =
- _mm256_avg_epu8(v_rl_b, _mm256_srli_si256(v_rl_b, 1));
-
- const __m256i v_m0_w = _mm256_and_si256(v_al_b, v_zmask_b);
- const __m256i v_m0_b = _mm256_packus_epi16(v_m0_w, _mm256_setzero_si256());
- const __m256i v_m1_b = _mm256_sub_epi8(v_maxval_b, v_m0_b);
-
- const __m256i v_res_b = blend_16_u8_avx2(src0, src1, &v_m0_b, &v_m1_b,
- AOM_BLEND_A64_ROUND_BITS);
-
- xx_storeu_128(dst, _mm256_castsi256_si128(v_res_b));
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- mask += mask_stride;
- } while (--h);
-}
-
-static INLINE void blend_a64_mask_sx_w32n_avx2(
- uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
- uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int w, int h) {
- const __m256i v_shuffle_b = yy_loadu_256(g_blend_a64_mask_shuffle);
- const __m256i v_maxval_b = _mm256_set1_epi8(AOM_BLEND_A64_MAX_ALPHA);
- do {
- int c;
- for (c = 0; c < w; c += 32) {
- const __m256i v_r0_b = yy_loadu_256(mask + 2 * c);
- const __m256i v_r1_b = yy_loadu_256(mask + 2 * c + 32);
- const __m256i v_r0_s_b = _mm256_shuffle_epi8(v_r0_b, v_shuffle_b);
- const __m256i v_r1_s_b = _mm256_shuffle_epi8(v_r1_b, v_shuffle_b);
- const __m256i v_al_b =
- _mm256_avg_epu8(v_r0_s_b, _mm256_srli_si256(v_r0_s_b, 8));
- const __m256i v_ah_b =
- _mm256_avg_epu8(v_r1_s_b, _mm256_srli_si256(v_r1_s_b, 8));
-
- const __m256i v_m0_b =
- _mm256_permute4x64_epi64(_mm256_unpacklo_epi64(v_al_b, v_ah_b), 0xd8);
- const __m256i v_m1_b = _mm256_sub_epi8(v_maxval_b, v_m0_b);
-
- const __m256i v_res_b = blend_32_u8_avx2(
- src0 + c, src1 + c, &v_m0_b, &v_m1_b, AOM_BLEND_A64_ROUND_BITS);
-
- yy_storeu_256(dst + c, v_res_b);
- }
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- mask += mask_stride;
- } while (--h);
-}
-
-static INLINE void blend_a64_mask_sx_avx2(
- uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
- uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int w, int h) {
- const __m128i v_shuffle_b = xx_loadu_128(g_blend_a64_mask_shuffle);
- const __m128i v_maxval_b = _mm_set1_epi8(AOM_BLEND_A64_MAX_ALPHA);
- const __m128i _r = _mm_set1_epi16(1 << (15 - AOM_BLEND_A64_ROUND_BITS));
- switch (w) {
- case 4:
- do {
- const __m128i v_r_b = xx_loadl_64(mask);
- const __m128i v_r0_s_b = _mm_shuffle_epi8(v_r_b, v_shuffle_b);
- const __m128i v_r_lo_b = _mm_unpacklo_epi64(v_r0_s_b, v_r0_s_b);
- const __m128i v_r_hi_b = _mm_unpackhi_epi64(v_r0_s_b, v_r0_s_b);
- const __m128i v_m0_b = _mm_avg_epu8(v_r_lo_b, v_r_hi_b);
- const __m128i v_m1_b = _mm_sub_epi8(v_maxval_b, v_m0_b);
-
- const __m128i v_res_b = blend_4_u8(src0, src1, &v_m0_b, &v_m1_b, &_r);
-
- xx_storel_32(dst, v_res_b);
-
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- mask += mask_stride;
- } while (--h);
- break;
- case 8:
- do {
- const __m128i v_r_b = xx_loadu_128(mask);
- const __m128i v_r0_s_b = _mm_shuffle_epi8(v_r_b, v_shuffle_b);
- const __m128i v_r_lo_b = _mm_unpacklo_epi64(v_r0_s_b, v_r0_s_b);
- const __m128i v_r_hi_b = _mm_unpackhi_epi64(v_r0_s_b, v_r0_s_b);
- const __m128i v_m0_b = _mm_avg_epu8(v_r_lo_b, v_r_hi_b);
- const __m128i v_m1_b = _mm_sub_epi8(v_maxval_b, v_m0_b);
-
- const __m128i v_res_b = blend_8_u8(src0, src1, &v_m0_b, &v_m1_b, &_r);
-
- xx_storel_64(dst, v_res_b);
-
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- mask += mask_stride;
- } while (--h);
- break;
- case 16:
- blend_a64_mask_sx_w16_avx2(dst, dst_stride, src0, src0_stride, src1,
- src1_stride, mask, mask_stride, h);
- break;
- default:
- blend_a64_mask_sx_w32n_avx2(dst, dst_stride, src0, src0_stride, src1,
- src1_stride, mask, mask_stride, w, h);
- break;
- }
-}
-
-static INLINE void blend_a64_mask_sy_w16_avx2(
- uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
- uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int h) {
- const __m128i _r = _mm_set1_epi16(1 << (15 - AOM_BLEND_A64_ROUND_BITS));
- const __m128i v_maxval_b = _mm_set1_epi8(AOM_BLEND_A64_MAX_ALPHA);
- do {
- const __m128i v_ra_b = xx_loadu_128(mask);
- const __m128i v_rb_b = xx_loadu_128(mask + mask_stride);
- const __m128i v_m0_b = _mm_avg_epu8(v_ra_b, v_rb_b);
-
- const __m128i v_m1_b = _mm_sub_epi16(v_maxval_b, v_m0_b);
- const __m128i v_res_b = blend_16_u8(src0, src1, &v_m0_b, &v_m1_b, &_r);
-
- xx_storeu_128(dst, v_res_b);
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- mask += 2 * mask_stride;
- } while (--h);
-}
-
-static INLINE void blend_a64_mask_sy_w32n_avx2(
- uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
- uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int w, int h) {
- const __m256i v_maxval_b = _mm256_set1_epi8(AOM_BLEND_A64_MAX_ALPHA);
- do {
- int c;
- for (c = 0; c < w; c += 32) {
- const __m256i v_ra_b = yy_loadu_256(mask + c);
- const __m256i v_rb_b = yy_loadu_256(mask + c + mask_stride);
- const __m256i v_m0_b = _mm256_avg_epu8(v_ra_b, v_rb_b);
- const __m256i v_m1_b = _mm256_sub_epi8(v_maxval_b, v_m0_b);
- const __m256i v_res_b = blend_32_u8_avx2(
- src0 + c, src1 + c, &v_m0_b, &v_m1_b, AOM_BLEND_A64_ROUND_BITS);
-
- yy_storeu_256(dst + c, v_res_b);
- }
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- mask += 2 * mask_stride;
- } while (--h);
-}
-
-static INLINE void blend_a64_mask_sy_avx2(
- uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
- uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int w, int h) {
- const __m128i _r = _mm_set1_epi16(1 << (15 - AOM_BLEND_A64_ROUND_BITS));
- const __m128i v_maxval_b = _mm_set1_epi8(AOM_BLEND_A64_MAX_ALPHA);
- switch (w) {
- case 4:
- do {
- const __m128i v_ra_b = xx_loadl_32(mask);
- const __m128i v_rb_b = xx_loadl_32(mask + mask_stride);
- const __m128i v_m0_b = _mm_avg_epu8(v_ra_b, v_rb_b);
- const __m128i v_m1_b = _mm_sub_epi8(v_maxval_b, v_m0_b);
- const __m128i v_res_b = blend_4_u8(src0, src1, &v_m0_b, &v_m1_b, &_r);
-
- xx_storel_32(dst, v_res_b);
-
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- mask += 2 * mask_stride;
- } while (--h);
- break;
- case 8:
- do {
- const __m128i v_ra_b = xx_loadl_64(mask);
- const __m128i v_rb_b = xx_loadl_64(mask + mask_stride);
- const __m128i v_m0_b = _mm_avg_epu8(v_ra_b, v_rb_b);
- const __m128i v_m1_b = _mm_sub_epi8(v_maxval_b, v_m0_b);
- const __m128i v_res_b = blend_8_u8(src0, src1, &v_m0_b, &v_m1_b, &_r);
-
- xx_storel_64(dst, v_res_b);
-
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- mask += 2 * mask_stride;
- } while (--h);
- break;
- case 16:
- blend_a64_mask_sy_w16_avx2(dst, dst_stride, src0, src0_stride, src1,
- src1_stride, mask, mask_stride, h);
- break;
- default:
- blend_a64_mask_sy_w32n_avx2(dst, dst_stride, src0, src0_stride, src1,
- src1_stride, mask, mask_stride, w, h);
- }
-}
-
-static INLINE void blend_a64_mask_w32n_avx2(
- uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
- uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int w, int h) {
- const __m256i v_maxval_b = _mm256_set1_epi8(AOM_BLEND_A64_MAX_ALPHA);
- do {
- int c;
- for (c = 0; c < w; c += 32) {
- const __m256i v_m0_b = yy_loadu_256(mask + c);
- const __m256i v_m1_b = _mm256_sub_epi8(v_maxval_b, v_m0_b);
-
- const __m256i v_res_b = blend_32_u8_avx2(
- src0 + c, src1 + c, &v_m0_b, &v_m1_b, AOM_BLEND_A64_ROUND_BITS);
-
- yy_storeu_256(dst + c, v_res_b);
- }
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- mask += mask_stride;
- } while (--h);
-}
-
-static INLINE void blend_a64_mask_avx2(
- uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
- uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int w, int h) {
- const __m128i v_maxval_b = _mm_set1_epi8(AOM_BLEND_A64_MAX_ALPHA);
- const __m128i _r = _mm_set1_epi16(1 << (15 - AOM_BLEND_A64_ROUND_BITS));
- switch (w) {
- case 4:
- do {
- const __m128i v_m0_b = xx_loadl_32(mask);
- const __m128i v_m1_b = _mm_sub_epi8(v_maxval_b, v_m0_b);
- const __m128i v_res_b = blend_4_u8(src0, src1, &v_m0_b, &v_m1_b, &_r);
-
- xx_storel_32(dst, v_res_b);
-
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- mask += mask_stride;
- } while (--h);
- break;
- case 8:
- do {
- const __m128i v_m0_b = xx_loadl_64(mask);
- const __m128i v_m1_b = _mm_sub_epi8(v_maxval_b, v_m0_b);
- const __m128i v_res_b = blend_8_u8(src0, src1, &v_m0_b, &v_m1_b, &_r);
-
- xx_storel_64(dst, v_res_b);
-
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- mask += mask_stride;
- } while (--h);
- break;
- case 16:
- do {
- const __m128i v_m0_b = xx_loadu_128(mask);
- const __m128i v_m1_b = _mm_sub_epi8(v_maxval_b, v_m0_b);
- const __m128i v_res_b = blend_16_u8(src0, src1, &v_m0_b, &v_m1_b, &_r);
-
- xx_storeu_128(dst, v_res_b);
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- mask += mask_stride;
- } while (--h);
- break;
- default:
- blend_a64_mask_w32n_avx2(dst, dst_stride, src0, src0_stride, src1,
- src1_stride, mask, mask_stride, w, h);
- }
-}
-
-void aom_blend_a64_mask_avx2(uint8_t *dst, uint32_t dst_stride,
- const uint8_t *src0, uint32_t src0_stride,
- const uint8_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int w,
- int h, int subx, int suby) {
- assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
- assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
-
- assert(h >= 1);
- assert(w >= 1);
- assert(IS_POWER_OF_TWO(h));
- assert(IS_POWER_OF_TWO(w));
-
- if (UNLIKELY((h | w) & 3)) { // if (w <= 2 || h <= 2)
- aom_blend_a64_mask_c(dst, dst_stride, src0, src0_stride, src1, src1_stride,
- mask, mask_stride, w, h, subx, suby);
- } else {
- if (subx & suby) {
- blend_a64_mask_sx_sy_avx2(dst, dst_stride, src0, src0_stride, src1,
- src1_stride, mask, mask_stride, w, h);
- } else if (subx) {
- blend_a64_mask_sx_avx2(dst, dst_stride, src0, src0_stride, src1,
- src1_stride, mask, mask_stride, w, h);
- } else if (suby) {
- blend_a64_mask_sy_avx2(dst, dst_stride, src0, src0_stride, src1,
- src1_stride, mask, mask_stride, w, h);
- } else {
- blend_a64_mask_avx2(dst, dst_stride, src0, src0_stride, src1, src1_stride,
- mask, mask_stride, w, h);
- }
- }
-}
diff --git a/third_party/aom/aom_dsp/x86/blend_a64_mask_sse4.c b/third_party/aom/aom_dsp/x86/blend_a64_mask_sse4.c
deleted file mode 100644
index 9d6b4c2f7..000000000
--- a/third_party/aom/aom_dsp/x86/blend_a64_mask_sse4.c
+++ /dev/null
@@ -1,1109 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <smmintrin.h> // SSE4.1
-
-#include <assert.h>
-
-#include "aom/aom_integer.h"
-#include "aom_ports/mem.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/blend.h"
-
-#include "aom_dsp/x86/synonyms.h"
-#include "aom_dsp/x86/blend_sse4.h"
-#include "aom_dsp/x86/blend_mask_sse4.h"
-
-#include "config/aom_dsp_rtcd.h"
-
-//////////////////////////////////////////////////////////////////////////////
-// No sub-sampling
-//////////////////////////////////////////////////////////////////////////////
-
-static void blend_a64_mask_w4_sse4_1(uint8_t *dst, uint32_t dst_stride,
- const uint8_t *src0, uint32_t src0_stride,
- const uint8_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride,
- int w, int h) {
- (void)w;
- const __m128i v_maxval_b = _mm_set1_epi8(AOM_BLEND_A64_MAX_ALPHA);
- const __m128i _r = _mm_set1_epi16(1 << (15 - AOM_BLEND_A64_ROUND_BITS));
- do {
- const __m128i v_m0_b = xx_loadl_32(mask);
- const __m128i v_m1_b = _mm_sub_epi8(v_maxval_b, v_m0_b);
- const __m128i v_res_b = blend_4_u8(src0, src1, &v_m0_b, &v_m1_b, &_r);
- xx_storel_32(dst, v_res_b);
-
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- mask += mask_stride;
- } while (--h);
-}
-
-static void blend_a64_mask_w8_sse4_1(uint8_t *dst, uint32_t dst_stride,
- const uint8_t *src0, uint32_t src0_stride,
- const uint8_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride,
- int w, int h) {
- (void)w;
- const __m128i v_maxval_b = _mm_set1_epi8(AOM_BLEND_A64_MAX_ALPHA);
- const __m128i _r = _mm_set1_epi16(1 << (15 - AOM_BLEND_A64_ROUND_BITS));
- do {
- const __m128i v_m0_b = xx_loadl_64(mask);
- const __m128i v_m1_b = _mm_sub_epi8(v_maxval_b, v_m0_b);
- const __m128i v_res_b = blend_8_u8(src0, src1, &v_m0_b, &v_m1_b, &_r);
- xx_storel_64(dst, v_res_b);
-
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- mask += mask_stride;
- } while (--h);
-}
-
-static void blend_a64_mask_w16n_sse4_1(
- uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
- uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int w, int h) {
- const __m128i v_maxval_b = _mm_set1_epi8(AOM_BLEND_A64_MAX_ALPHA);
- const __m128i _r = _mm_set1_epi16(1 << (15 - AOM_BLEND_A64_ROUND_BITS));
-
- do {
- int c;
- for (c = 0; c < w; c += 16) {
- const __m128i v_m0_b = xx_loadu_128(mask + c);
- const __m128i v_m1_b = _mm_sub_epi8(v_maxval_b, v_m0_b);
-
- const __m128i v_res_b =
- blend_16_u8(src0 + c, src1 + c, &v_m0_b, &v_m1_b, &_r);
-
- xx_storeu_128(dst + c, v_res_b);
- }
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- mask += mask_stride;
- } while (--h);
-}
-
-//////////////////////////////////////////////////////////////////////////////
-// Horizontal sub-sampling
-//////////////////////////////////////////////////////////////////////////////
-
-static void blend_a64_mask_sx_w4_sse4_1(
- uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
- uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int w, int h) {
- (void)w;
-
- const __m128i v_shuffle_b = xx_loadu_128(g_blend_a64_mask_shuffle);
- const __m128i v_maxval_b = _mm_set1_epi8(AOM_BLEND_A64_MAX_ALPHA);
- const __m128i _r = _mm_set1_epi16(1 << (15 - AOM_BLEND_A64_ROUND_BITS));
- do {
- const __m128i v_r_b = xx_loadl_64(mask);
- const __m128i v_r0_s_b = _mm_shuffle_epi8(v_r_b, v_shuffle_b);
- const __m128i v_r_lo_b = _mm_unpacklo_epi64(v_r0_s_b, v_r0_s_b);
- const __m128i v_r_hi_b = _mm_unpackhi_epi64(v_r0_s_b, v_r0_s_b);
- const __m128i v_m0_b = _mm_avg_epu8(v_r_lo_b, v_r_hi_b);
- const __m128i v_m1_b = _mm_sub_epi8(v_maxval_b, v_m0_b);
-
- const __m128i v_res_b = blend_4_u8(src0, src1, &v_m0_b, &v_m1_b, &_r);
- xx_storel_32(dst, v_res_b);
-
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- mask += mask_stride;
- } while (--h);
-}
-
-static void blend_a64_mask_sx_w8_sse4_1(
- uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
- uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int w, int h) {
- (void)w;
-
- const __m128i v_shuffle_b = xx_loadu_128(g_blend_a64_mask_shuffle);
- const __m128i v_maxval_b = _mm_set1_epi8(AOM_BLEND_A64_MAX_ALPHA);
- const __m128i _r = _mm_set1_epi16(1 << (15 - AOM_BLEND_A64_ROUND_BITS));
- do {
- const __m128i v_r_b = xx_loadu_128(mask);
- const __m128i v_r0_s_b = _mm_shuffle_epi8(v_r_b, v_shuffle_b);
- const __m128i v_r_lo_b = _mm_unpacklo_epi64(v_r0_s_b, v_r0_s_b);
- const __m128i v_r_hi_b = _mm_unpackhi_epi64(v_r0_s_b, v_r0_s_b);
- const __m128i v_m0_b = _mm_avg_epu8(v_r_lo_b, v_r_hi_b);
- const __m128i v_m1_b = _mm_sub_epi8(v_maxval_b, v_m0_b);
-
- const __m128i v_res_b = blend_8_u8(src0, src1, &v_m0_b, &v_m1_b, &_r);
-
- xx_storel_64(dst, v_res_b);
-
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- mask += mask_stride;
- } while (--h);
-}
-
-static void blend_a64_mask_sx_w16n_sse4_1(
- uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
- uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int w, int h) {
- const __m128i v_shuffle_b = xx_loadu_128(g_blend_a64_mask_shuffle);
- const __m128i v_maxval_b = _mm_set1_epi8(AOM_BLEND_A64_MAX_ALPHA);
- const __m128i _r = _mm_set1_epi16(1 << (15 - AOM_BLEND_A64_ROUND_BITS));
-
- do {
- int c;
- for (c = 0; c < w; c += 16) {
- const __m128i v_r0_b = xx_loadu_128(mask + 2 * c);
- const __m128i v_r1_b = xx_loadu_128(mask + 2 * c + 16);
- const __m128i v_r0_s_b = _mm_shuffle_epi8(v_r0_b, v_shuffle_b);
- const __m128i v_r1_s_b = _mm_shuffle_epi8(v_r1_b, v_shuffle_b);
- const __m128i v_r_lo_b = _mm_unpacklo_epi64(v_r0_s_b, v_r1_s_b);
- const __m128i v_r_hi_b = _mm_unpackhi_epi64(v_r0_s_b, v_r1_s_b);
- const __m128i v_m0_b = _mm_avg_epu8(v_r_lo_b, v_r_hi_b);
- const __m128i v_m1_b = _mm_sub_epi8(v_maxval_b, v_m0_b);
-
- const __m128i v_res_b =
- blend_16_u8(src0 + c, src1 + c, &v_m0_b, &v_m1_b, &_r);
-
- xx_storeu_128(dst + c, v_res_b);
- }
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- mask += mask_stride;
- } while (--h);
-}
-
-//////////////////////////////////////////////////////////////////////////////
-// Vertical sub-sampling
-//////////////////////////////////////////////////////////////////////////////
-
-static void blend_a64_mask_sy_w4_sse4_1(
- uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
- uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int w, int h) {
- (void)w;
-
- const __m128i v_maxval_b = _mm_set1_epi8(AOM_BLEND_A64_MAX_ALPHA);
- const __m128i _r = _mm_set1_epi16(1 << (15 - AOM_BLEND_A64_ROUND_BITS));
-
- do {
- const __m128i v_ra_b = xx_loadl_32(mask);
- const __m128i v_rb_b = xx_loadl_32(mask + mask_stride);
- const __m128i v_m0_b = _mm_avg_epu8(v_ra_b, v_rb_b);
- const __m128i v_m1_b = _mm_sub_epi8(v_maxval_b, v_m0_b);
-
- const __m128i v_res_b = blend_4_u8(src0, src1, &v_m0_b, &v_m1_b, &_r);
-
- xx_storel_32(dst, v_res_b);
-
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- mask += 2 * mask_stride;
- } while (--h);
-}
-
-static void blend_a64_mask_sy_w8_sse4_1(
- uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
- uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int w, int h) {
- (void)w;
-
- const __m128i v_maxval_b = _mm_set1_epi8(AOM_BLEND_A64_MAX_ALPHA);
- const __m128i _r = _mm_set1_epi16(1 << (15 - AOM_BLEND_A64_ROUND_BITS));
- do {
- const __m128i v_ra_b = xx_loadl_64(mask);
- const __m128i v_rb_b = xx_loadl_64(mask + mask_stride);
- const __m128i v_m0_b = _mm_avg_epu8(v_ra_b, v_rb_b);
- const __m128i v_m1_b = _mm_sub_epi8(v_maxval_b, v_m0_b);
- const __m128i v_res_b = blend_8_u8(src0, src1, &v_m0_b, &v_m1_b, &_r);
-
- xx_storel_64(dst, v_res_b);
-
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- mask += 2 * mask_stride;
- } while (--h);
-}
-
-static void blend_a64_mask_sy_w16n_sse4_1(
- uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
- uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int w, int h) {
- const __m128i v_maxval_b = _mm_set1_epi8(AOM_BLEND_A64_MAX_ALPHA);
- const __m128i _r = _mm_set1_epi16(1 << (15 - AOM_BLEND_A64_ROUND_BITS));
- do {
- int c;
- for (c = 0; c < w; c += 16) {
- const __m128i v_ra_b = xx_loadu_128(mask + c);
- const __m128i v_rb_b = xx_loadu_128(mask + c + mask_stride);
- const __m128i v_m0_b = _mm_avg_epu8(v_ra_b, v_rb_b);
- const __m128i v_m1_b = _mm_sub_epi8(v_maxval_b, v_m0_b);
-
- const __m128i v_res_b =
- blend_16_u8(src0 + c, src1 + c, &v_m0_b, &v_m1_b, &_r);
-
- xx_storeu_128(dst + c, v_res_b);
- }
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- mask += 2 * mask_stride;
- } while (--h);
-}
-
-//////////////////////////////////////////////////////////////////////////////
-// Horizontal and Vertical sub-sampling
-//////////////////////////////////////////////////////////////////////////////
-
-static void blend_a64_mask_sx_sy_w4_sse4_1(
- uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
- uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int w, int h) {
- const __m128i v_shuffle_b = xx_loadu_128(g_blend_a64_mask_shuffle);
- const __m128i v_maxval_b = _mm_set1_epi8(AOM_BLEND_A64_MAX_ALPHA);
- const __m128i _r = _mm_set1_epi16(1 << (15 - AOM_BLEND_A64_ROUND_BITS));
- (void)w;
-
- do {
- const __m128i v_ra_b = xx_loadl_64(mask);
- const __m128i v_rb_b = xx_loadl_64(mask + mask_stride);
- const __m128i v_rvs_b = _mm_add_epi8(v_ra_b, v_rb_b);
- const __m128i v_r_s_b = _mm_shuffle_epi8(v_rvs_b, v_shuffle_b);
- const __m128i v_r0_s_w = _mm_cvtepu8_epi16(v_r_s_b);
- const __m128i v_r1_s_w = _mm_cvtepu8_epi16(_mm_srli_si128(v_r_s_b, 8));
- const __m128i v_rs_w = _mm_add_epi16(v_r0_s_w, v_r1_s_w);
- const __m128i v_m0_w = xx_roundn_epu16(v_rs_w, 2);
- const __m128i v_m0_b = _mm_packus_epi16(v_m0_w, v_m0_w);
- const __m128i v_m1_b = _mm_sub_epi8(v_maxval_b, v_m0_b);
-
- const __m128i v_res_b = blend_4_u8(src0, src1, &v_m0_b, &v_m1_b, &_r);
-
- xx_storel_32(dst, v_res_b);
-
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- mask += 2 * mask_stride;
- } while (--h);
-}
-
-static void blend_a64_mask_sx_sy_w8_sse4_1(
- uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
- uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int w, int h) {
- const __m128i v_shuffle_b = xx_loadu_128(g_blend_a64_mask_shuffle);
- const __m128i v_maxval_b = _mm_set1_epi8(AOM_BLEND_A64_MAX_ALPHA);
- const __m128i _r = _mm_set1_epi16(1 << (15 - AOM_BLEND_A64_ROUND_BITS));
- (void)w;
-
- do {
- const __m128i v_ra_b = xx_loadu_128(mask);
- const __m128i v_rb_b = xx_loadu_128(mask + mask_stride);
-
- const __m128i v_rvs_b = _mm_add_epi8(v_ra_b, v_rb_b);
- const __m128i v_r_s_b = _mm_shuffle_epi8(v_rvs_b, v_shuffle_b);
- const __m128i v_r0_s_w = _mm_cvtepu8_epi16(v_r_s_b);
- const __m128i v_r1_s_w = _mm_cvtepu8_epi16(_mm_srli_si128(v_r_s_b, 8));
- const __m128i v_rs_w = _mm_add_epi16(v_r0_s_w, v_r1_s_w);
- const __m128i v_m0_w = xx_roundn_epu16(v_rs_w, 2);
- const __m128i v_m0_b = _mm_packus_epi16(v_m0_w, v_m0_w);
- const __m128i v_m1_b = _mm_sub_epi8(v_maxval_b, v_m0_b);
-
- const __m128i v_res_b = blend_8_u8(src0, src1, &v_m0_b, &v_m1_b, &_r);
-
- xx_storel_64(dst, v_res_b);
-
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- mask += 2 * mask_stride;
- } while (--h);
-}
-
-static void blend_a64_mask_sx_sy_w16n_sse4_1(
- uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
- uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int w, int h) {
- const __m128i v_zmask_b = _mm_set_epi8(0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff, 0,
- 0xff, 0, 0xff, 0, 0xff, 0, 0xff);
- const __m128i v_maxval_b = _mm_set1_epi8(AOM_BLEND_A64_MAX_ALPHA);
- const __m128i _r = _mm_set1_epi16(1 << (15 - AOM_BLEND_A64_ROUND_BITS));
- do {
- int c;
- for (c = 0; c < w; c += 16) {
- const __m128i v_ral_b = xx_loadu_128(mask + 2 * c);
- const __m128i v_rah_b = xx_loadu_128(mask + 2 * c + 16);
- const __m128i v_rbl_b = xx_loadu_128(mask + mask_stride + 2 * c);
- const __m128i v_rbh_b = xx_loadu_128(mask + mask_stride + 2 * c + 16);
- const __m128i v_rvsl_b = _mm_add_epi8(v_ral_b, v_rbl_b);
- const __m128i v_rvsh_b = _mm_add_epi8(v_rah_b, v_rbh_b);
- const __m128i v_rvsal_w = _mm_and_si128(v_rvsl_b, v_zmask_b);
- const __m128i v_rvsah_w = _mm_and_si128(v_rvsh_b, v_zmask_b);
- const __m128i v_rvsbl_w =
- _mm_and_si128(_mm_srli_si128(v_rvsl_b, 1), v_zmask_b);
- const __m128i v_rvsbh_w =
- _mm_and_si128(_mm_srli_si128(v_rvsh_b, 1), v_zmask_b);
- const __m128i v_rsl_w = _mm_add_epi16(v_rvsal_w, v_rvsbl_w);
- const __m128i v_rsh_w = _mm_add_epi16(v_rvsah_w, v_rvsbh_w);
-
- const __m128i v_m0l_w = xx_roundn_epu16(v_rsl_w, 2);
- const __m128i v_m0h_w = xx_roundn_epu16(v_rsh_w, 2);
- const __m128i v_m0_b = _mm_packus_epi16(v_m0l_w, v_m0h_w);
- const __m128i v_m1_b = _mm_sub_epi8(v_maxval_b, v_m0_b);
-
- const __m128i v_res_b =
- blend_16_u8(src0 + c, src1 + c, &v_m0_b, &v_m1_b, &_r);
-
- xx_storeu_128(dst + c, v_res_b);
- }
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- mask += 2 * mask_stride;
- } while (--h);
-}
-
-//////////////////////////////////////////////////////////////////////////////
-// Dispatch
-//////////////////////////////////////////////////////////////////////////////
-
-void aom_blend_a64_mask_sse4_1(uint8_t *dst, uint32_t dst_stride,
- const uint8_t *src0, uint32_t src0_stride,
- const uint8_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int w,
- int h, int subx, int suby) {
- typedef void (*blend_fn)(
- uint8_t * dst, uint32_t dst_stride, const uint8_t *src0,
- uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int w, int h);
-
- // Dimensions are: width_index X subx X suby
- static const blend_fn blend[3][2][2] = {
- { // w % 16 == 0
- { blend_a64_mask_w16n_sse4_1, blend_a64_mask_sy_w16n_sse4_1 },
- { blend_a64_mask_sx_w16n_sse4_1, blend_a64_mask_sx_sy_w16n_sse4_1 } },
- { // w == 4
- { blend_a64_mask_w4_sse4_1, blend_a64_mask_sy_w4_sse4_1 },
- { blend_a64_mask_sx_w4_sse4_1, blend_a64_mask_sx_sy_w4_sse4_1 } },
- { // w == 8
- { blend_a64_mask_w8_sse4_1, blend_a64_mask_sy_w8_sse4_1 },
- { blend_a64_mask_sx_w8_sse4_1, blend_a64_mask_sx_sy_w8_sse4_1 } }
- };
-
- assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
- assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
-
- assert(h >= 1);
- assert(w >= 1);
- assert(IS_POWER_OF_TWO(h));
- assert(IS_POWER_OF_TWO(w));
-
- if (UNLIKELY((h | w) & 3)) { // if (w <= 2 || h <= 2)
- aom_blend_a64_mask_c(dst, dst_stride, src0, src0_stride, src1, src1_stride,
- mask, mask_stride, w, h, subx, suby);
- } else {
- blend[(w >> 2) & 3][subx != 0][suby != 0](dst, dst_stride, src0,
- src0_stride, src1, src1_stride,
- mask, mask_stride, w, h);
- }
-}
-
-//////////////////////////////////////////////////////////////////////////////
-// No sub-sampling
-//////////////////////////////////////////////////////////////////////////////
-
-static INLINE void blend_a64_mask_bn_w4_sse4_1(
- uint16_t *dst, uint32_t dst_stride, const uint16_t *src0,
- uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int h, blend_unit_fn blend) {
- const __m128i v_maxval_w = _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA);
-
- do {
- const __m128i v_m0_b = xx_loadl_32(mask);
- const __m128i v_m0_w = _mm_cvtepu8_epi16(v_m0_b);
- const __m128i v_m1_w = _mm_sub_epi16(v_maxval_w, v_m0_w);
-
- const __m128i v_res_w = blend(src0, src1, v_m0_w, v_m1_w);
-
- xx_storel_64(dst, v_res_w);
-
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- mask += mask_stride;
- } while (--h);
-}
-
-static void blend_a64_mask_b10_w4_sse4_1(
- uint16_t *dst, uint32_t dst_stride, const uint16_t *src0,
- uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int w, int h) {
- (void)w;
- blend_a64_mask_bn_w4_sse4_1(dst, dst_stride, src0, src0_stride, src1,
- src1_stride, mask, mask_stride, h, blend_4_b10);
-}
-
-static void blend_a64_mask_b12_w4_sse4_1(
- uint16_t *dst, uint32_t dst_stride, const uint16_t *src0,
- uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int w, int h) {
- (void)w;
- blend_a64_mask_bn_w4_sse4_1(dst, dst_stride, src0, src0_stride, src1,
- src1_stride, mask, mask_stride, h, blend_4_b12);
-}
-
-static INLINE void blend_a64_mask_bn_w8n_sse4_1(
- uint16_t *dst, uint32_t dst_stride, const uint16_t *src0,
- uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int w, int h,
- blend_unit_fn blend) {
- const __m128i v_maxval_w = _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA);
-
- do {
- int c;
- for (c = 0; c < w; c += 8) {
- const __m128i v_m0_b = xx_loadl_64(mask + c);
- const __m128i v_m0_w = _mm_cvtepu8_epi16(v_m0_b);
- const __m128i v_m1_w = _mm_sub_epi16(v_maxval_w, v_m0_w);
-
- const __m128i v_res_w = blend(src0 + c, src1 + c, v_m0_w, v_m1_w);
-
- xx_storeu_128(dst + c, v_res_w);
- }
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- mask += mask_stride;
- } while (--h);
-}
-
-static void blend_a64_mask_b10_w8n_sse4_1(
- uint16_t *dst, uint32_t dst_stride, const uint16_t *src0,
- uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int w, int h) {
- blend_a64_mask_bn_w8n_sse4_1(dst, dst_stride, src0, src0_stride, src1,
- src1_stride, mask, mask_stride, w, h,
- blend_8_b10);
-}
-
-static void blend_a64_mask_b12_w8n_sse4_1(
- uint16_t *dst, uint32_t dst_stride, const uint16_t *src0,
- uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int w, int h) {
- blend_a64_mask_bn_w8n_sse4_1(dst, dst_stride, src0, src0_stride, src1,
- src1_stride, mask, mask_stride, w, h,
- blend_8_b12);
-}
-
-//////////////////////////////////////////////////////////////////////////////
-// Horizontal sub-sampling
-//////////////////////////////////////////////////////////////////////////////
-
-static INLINE void blend_a64_mask_bn_sx_w4_sse4_1(
- uint16_t *dst, uint32_t dst_stride, const uint16_t *src0,
- uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int h, blend_unit_fn blend) {
- const __m128i v_zmask_b = _mm_set_epi8(0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff, 0,
- 0xff, 0, 0xff, 0, 0xff, 0, 0xff);
- const __m128i v_maxval_w = _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA);
-
- do {
- const __m128i v_r_b = xx_loadl_64(mask);
- const __m128i v_a_b = _mm_avg_epu8(v_r_b, _mm_srli_si128(v_r_b, 1));
-
- const __m128i v_m0_w = _mm_and_si128(v_a_b, v_zmask_b);
- const __m128i v_m1_w = _mm_sub_epi16(v_maxval_w, v_m0_w);
-
- const __m128i v_res_w = blend(src0, src1, v_m0_w, v_m1_w);
-
- xx_storel_64(dst, v_res_w);
-
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- mask += mask_stride;
- } while (--h);
-}
-
-static void blend_a64_mask_b10_sx_w4_sse4_1(
- uint16_t *dst, uint32_t dst_stride, const uint16_t *src0,
- uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int w, int h) {
- (void)w;
- blend_a64_mask_bn_sx_w4_sse4_1(dst, dst_stride, src0, src0_stride, src1,
- src1_stride, mask, mask_stride, h,
- blend_4_b10);
-}
-
-static void blend_a64_mask_b12_sx_w4_sse4_1(
- uint16_t *dst, uint32_t dst_stride, const uint16_t *src0,
- uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int w, int h) {
- (void)w;
- blend_a64_mask_bn_sx_w4_sse4_1(dst, dst_stride, src0, src0_stride, src1,
- src1_stride, mask, mask_stride, h,
- blend_4_b12);
-}
-
-static INLINE void blend_a64_mask_bn_sx_w8n_sse4_1(
- uint16_t *dst, uint32_t dst_stride, const uint16_t *src0,
- uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int w, int h,
- blend_unit_fn blend) {
- const __m128i v_zmask_b = _mm_set_epi8(0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff, 0,
- 0xff, 0, 0xff, 0, 0xff, 0, 0xff);
- const __m128i v_maxval_w = _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA);
-
- do {
- int c;
- for (c = 0; c < w; c += 8) {
- const __m128i v_r_b = xx_loadu_128(mask + 2 * c);
- const __m128i v_a_b = _mm_avg_epu8(v_r_b, _mm_srli_si128(v_r_b, 1));
-
- const __m128i v_m0_w = _mm_and_si128(v_a_b, v_zmask_b);
- const __m128i v_m1_w = _mm_sub_epi16(v_maxval_w, v_m0_w);
-
- const __m128i v_res_w = blend(src0 + c, src1 + c, v_m0_w, v_m1_w);
-
- xx_storeu_128(dst + c, v_res_w);
- }
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- mask += mask_stride;
- } while (--h);
-}
-
-static void blend_a64_mask_b10_sx_w8n_sse4_1(
- uint16_t *dst, uint32_t dst_stride, const uint16_t *src0,
- uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int w, int h) {
- blend_a64_mask_bn_sx_w8n_sse4_1(dst, dst_stride, src0, src0_stride, src1,
- src1_stride, mask, mask_stride, w, h,
- blend_8_b10);
-}
-
-static void blend_a64_mask_b12_sx_w8n_sse4_1(
- uint16_t *dst, uint32_t dst_stride, const uint16_t *src0,
- uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int w, int h) {
- blend_a64_mask_bn_sx_w8n_sse4_1(dst, dst_stride, src0, src0_stride, src1,
- src1_stride, mask, mask_stride, w, h,
- blend_8_b12);
-}
-
-//////////////////////////////////////////////////////////////////////////////
-// Vertical sub-sampling
-//////////////////////////////////////////////////////////////////////////////
-
-static INLINE void blend_a64_mask_bn_sy_w4_sse4_1(
- uint16_t *dst, uint32_t dst_stride, const uint16_t *src0,
- uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int h, blend_unit_fn blend) {
- const __m128i v_maxval_w = _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA);
-
- do {
- const __m128i v_ra_b = xx_loadl_32(mask);
- const __m128i v_rb_b = xx_loadl_32(mask + mask_stride);
- const __m128i v_a_b = _mm_avg_epu8(v_ra_b, v_rb_b);
-
- const __m128i v_m0_w = _mm_cvtepu8_epi16(v_a_b);
- const __m128i v_m1_w = _mm_sub_epi16(v_maxval_w, v_m0_w);
-
- const __m128i v_res_w = blend(src0, src1, v_m0_w, v_m1_w);
-
- xx_storel_64(dst, v_res_w);
-
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- mask += 2 * mask_stride;
- } while (--h);
-}
-
-static void blend_a64_mask_b10_sy_w4_sse4_1(
- uint16_t *dst, uint32_t dst_stride, const uint16_t *src0,
- uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int w, int h) {
- (void)w;
- blend_a64_mask_bn_sy_w4_sse4_1(dst, dst_stride, src0, src0_stride, src1,
- src1_stride, mask, mask_stride, h,
- blend_4_b10);
-}
-
-static void blend_a64_mask_b12_sy_w4_sse4_1(
- uint16_t *dst, uint32_t dst_stride, const uint16_t *src0,
- uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int w, int h) {
- (void)w;
- blend_a64_mask_bn_sy_w4_sse4_1(dst, dst_stride, src0, src0_stride, src1,
- src1_stride, mask, mask_stride, h,
- blend_4_b12);
-}
-
-static INLINE void blend_a64_mask_bn_sy_w8n_sse4_1(
- uint16_t *dst, uint32_t dst_stride, const uint16_t *src0,
- uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int w, int h,
- blend_unit_fn blend) {
- const __m128i v_maxval_w = _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA);
-
- do {
- int c;
- for (c = 0; c < w; c += 8) {
- const __m128i v_ra_b = xx_loadl_64(mask + c);
- const __m128i v_rb_b = xx_loadl_64(mask + c + mask_stride);
- const __m128i v_a_b = _mm_avg_epu8(v_ra_b, v_rb_b);
-
- const __m128i v_m0_w = _mm_cvtepu8_epi16(v_a_b);
- const __m128i v_m1_w = _mm_sub_epi16(v_maxval_w, v_m0_w);
-
- const __m128i v_res_w = blend(src0 + c, src1 + c, v_m0_w, v_m1_w);
-
- xx_storeu_128(dst + c, v_res_w);
- }
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- mask += 2 * mask_stride;
- } while (--h);
-}
-
-static void blend_a64_mask_b10_sy_w8n_sse4_1(
- uint16_t *dst, uint32_t dst_stride, const uint16_t *src0,
- uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int w, int h) {
- blend_a64_mask_bn_sy_w8n_sse4_1(dst, dst_stride, src0, src0_stride, src1,
- src1_stride, mask, mask_stride, w, h,
- blend_8_b10);
-}
-
-static void blend_a64_mask_b12_sy_w8n_sse4_1(
- uint16_t *dst, uint32_t dst_stride, const uint16_t *src0,
- uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int w, int h) {
- blend_a64_mask_bn_sy_w8n_sse4_1(dst, dst_stride, src0, src0_stride, src1,
- src1_stride, mask, mask_stride, w, h,
- blend_8_b12);
-}
-
-//////////////////////////////////////////////////////////////////////////////
-// Horizontal and Vertical sub-sampling
-//////////////////////////////////////////////////////////////////////////////
-
-static INLINE void blend_a64_mask_bn_sx_sy_w4_sse4_1(
- uint16_t *dst, uint32_t dst_stride, const uint16_t *src0,
- uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int h, blend_unit_fn blend) {
- const __m128i v_zmask_b = _mm_set_epi8(0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff, 0,
- 0xff, 0, 0xff, 0, 0xff, 0, 0xff);
- const __m128i v_maxval_w = _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA);
-
- do {
- const __m128i v_ra_b = xx_loadl_64(mask);
- const __m128i v_rb_b = xx_loadl_64(mask + mask_stride);
- const __m128i v_rvs_b = _mm_add_epi8(v_ra_b, v_rb_b);
- const __m128i v_rvsa_w = _mm_and_si128(v_rvs_b, v_zmask_b);
- const __m128i v_rvsb_w =
- _mm_and_si128(_mm_srli_si128(v_rvs_b, 1), v_zmask_b);
- const __m128i v_rs_w = _mm_add_epi16(v_rvsa_w, v_rvsb_w);
-
- const __m128i v_m0_w = xx_roundn_epu16(v_rs_w, 2);
- const __m128i v_m1_w = _mm_sub_epi16(v_maxval_w, v_m0_w);
-
- const __m128i v_res_w = blend(src0, src1, v_m0_w, v_m1_w);
-
- xx_storel_64(dst, v_res_w);
-
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- mask += 2 * mask_stride;
- } while (--h);
-}
-
-static void blend_a64_mask_b10_sx_sy_w4_sse4_1(
- uint16_t *dst, uint32_t dst_stride, const uint16_t *src0,
- uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int w, int h) {
- (void)w;
- blend_a64_mask_bn_sx_sy_w4_sse4_1(dst, dst_stride, src0, src0_stride, src1,
- src1_stride, mask, mask_stride, h,
- blend_4_b10);
-}
-
-static void blend_a64_mask_b12_sx_sy_w4_sse4_1(
- uint16_t *dst, uint32_t dst_stride, const uint16_t *src0,
- uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int w, int h) {
- (void)w;
- blend_a64_mask_bn_sx_sy_w4_sse4_1(dst, dst_stride, src0, src0_stride, src1,
- src1_stride, mask, mask_stride, h,
- blend_4_b12);
-}
-
-static INLINE void blend_a64_mask_bn_sx_sy_w8n_sse4_1(
- uint16_t *dst, uint32_t dst_stride, const uint16_t *src0,
- uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int w, int h,
- blend_unit_fn blend) {
- const __m128i v_zmask_b = _mm_set_epi8(0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff, 0,
- 0xff, 0, 0xff, 0, 0xff, 0, 0xff);
- const __m128i v_maxval_w = _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA);
-
- do {
- int c;
- for (c = 0; c < w; c += 8) {
- const __m128i v_ra_b = xx_loadu_128(mask + 2 * c);
- const __m128i v_rb_b = xx_loadu_128(mask + 2 * c + mask_stride);
- const __m128i v_rvs_b = _mm_add_epi8(v_ra_b, v_rb_b);
- const __m128i v_rvsa_w = _mm_and_si128(v_rvs_b, v_zmask_b);
- const __m128i v_rvsb_w =
- _mm_and_si128(_mm_srli_si128(v_rvs_b, 1), v_zmask_b);
- const __m128i v_rs_w = _mm_add_epi16(v_rvsa_w, v_rvsb_w);
-
- const __m128i v_m0_w = xx_roundn_epu16(v_rs_w, 2);
- const __m128i v_m1_w = _mm_sub_epi16(v_maxval_w, v_m0_w);
-
- const __m128i v_res_w = blend(src0 + c, src1 + c, v_m0_w, v_m1_w);
-
- xx_storeu_128(dst + c, v_res_w);
- }
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- mask += 2 * mask_stride;
- } while (--h);
-}
-
-static void blend_a64_mask_b10_sx_sy_w8n_sse4_1(
- uint16_t *dst, uint32_t dst_stride, const uint16_t *src0,
- uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int w, int h) {
- blend_a64_mask_bn_sx_sy_w8n_sse4_1(dst, dst_stride, src0, src0_stride, src1,
- src1_stride, mask, mask_stride, w, h,
- blend_8_b10);
-}
-
-static void blend_a64_mask_b12_sx_sy_w8n_sse4_1(
- uint16_t *dst, uint32_t dst_stride, const uint16_t *src0,
- uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int w, int h) {
- blend_a64_mask_bn_sx_sy_w8n_sse4_1(dst, dst_stride, src0, src0_stride, src1,
- src1_stride, mask, mask_stride, w, h,
- blend_8_b12);
-}
-
-//////////////////////////////////////////////////////////////////////////////
-// Dispatch
-//////////////////////////////////////////////////////////////////////////////
-
-void aom_highbd_blend_a64_mask_sse4_1(uint8_t *dst_8, uint32_t dst_stride,
- const uint8_t *src0_8,
- uint32_t src0_stride,
- const uint8_t *src1_8,
- uint32_t src1_stride, const uint8_t *mask,
- uint32_t mask_stride, int w, int h,
- int subx, int suby, int bd) {
- typedef void (*blend_fn)(
- uint16_t * dst, uint32_t dst_stride, const uint16_t *src0,
- uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int w, int h);
-
- // Dimensions are: bd_index X width_index X subx X suby
- static const blend_fn blend[2][2][2][2] = {
- { // bd == 8 or 10
- { // w % 8 == 0
- { blend_a64_mask_b10_w8n_sse4_1, blend_a64_mask_b10_sy_w8n_sse4_1 },
- { blend_a64_mask_b10_sx_w8n_sse4_1,
- blend_a64_mask_b10_sx_sy_w8n_sse4_1 } },
- { // w == 4
- { blend_a64_mask_b10_w4_sse4_1, blend_a64_mask_b10_sy_w4_sse4_1 },
- { blend_a64_mask_b10_sx_w4_sse4_1,
- blend_a64_mask_b10_sx_sy_w4_sse4_1 } } },
- { // bd == 12
- { // w % 8 == 0
- { blend_a64_mask_b12_w8n_sse4_1, blend_a64_mask_b12_sy_w8n_sse4_1 },
- { blend_a64_mask_b12_sx_w8n_sse4_1,
- blend_a64_mask_b12_sx_sy_w8n_sse4_1 } },
- { // w == 4
- { blend_a64_mask_b12_w4_sse4_1, blend_a64_mask_b12_sy_w4_sse4_1 },
- { blend_a64_mask_b12_sx_w4_sse4_1,
- blend_a64_mask_b12_sx_sy_w4_sse4_1 } } }
- };
-
- assert(IMPLIES(src0_8 == dst_8, src0_stride == dst_stride));
- assert(IMPLIES(src1_8 == dst_8, src1_stride == dst_stride));
-
- assert(h >= 1);
- assert(w >= 1);
- assert(IS_POWER_OF_TWO(h));
- assert(IS_POWER_OF_TWO(w));
-
- assert(bd == 8 || bd == 10 || bd == 12);
- if (UNLIKELY((h | w) & 3)) { // if (w <= 2 || h <= 2)
- aom_highbd_blend_a64_mask_c(dst_8, dst_stride, src0_8, src0_stride, src1_8,
- src1_stride, mask, mask_stride, w, h, subx,
- suby, bd);
- } else {
- uint16_t *const dst = CONVERT_TO_SHORTPTR(dst_8);
- const uint16_t *const src0 = CONVERT_TO_SHORTPTR(src0_8);
- const uint16_t *const src1 = CONVERT_TO_SHORTPTR(src1_8);
-
- blend[bd == 12][(w >> 2) & 1][subx != 0][suby != 0](
- dst, dst_stride, src0, src0_stride, src1, src1_stride, mask,
- mask_stride, w, h);
- }
-}
-
-static INLINE void blend_a64_d16_mask_w16_sse41(
- uint8_t *dst, const CONV_BUF_TYPE *src0, const CONV_BUF_TYPE *src1,
- const __m128i *m0, const __m128i *m1, const __m128i *v_round_offset,
- const __m128i *v_maxval, int shift) {
- const __m128i max_minus_m0 = _mm_sub_epi16(*v_maxval, *m0);
- const __m128i max_minus_m1 = _mm_sub_epi16(*v_maxval, *m1);
- const __m128i s0_0 = xx_loadu_128(src0);
- const __m128i s0_1 = xx_loadu_128(src0 + 8);
- const __m128i s1_0 = xx_loadu_128(src1);
- const __m128i s1_1 = xx_loadu_128(src1 + 8);
- __m128i res0_lo = _mm_madd_epi16(_mm_unpacklo_epi16(s0_0, s1_0),
- _mm_unpacklo_epi16(*m0, max_minus_m0));
- __m128i res0_hi = _mm_madd_epi16(_mm_unpackhi_epi16(s0_0, s1_0),
- _mm_unpackhi_epi16(*m0, max_minus_m0));
- __m128i res1_lo = _mm_madd_epi16(_mm_unpacklo_epi16(s0_1, s1_1),
- _mm_unpacklo_epi16(*m1, max_minus_m1));
- __m128i res1_hi = _mm_madd_epi16(_mm_unpackhi_epi16(s0_1, s1_1),
- _mm_unpackhi_epi16(*m1, max_minus_m1));
- res0_lo = _mm_srai_epi32(_mm_sub_epi32(res0_lo, *v_round_offset), shift);
- res0_hi = _mm_srai_epi32(_mm_sub_epi32(res0_hi, *v_round_offset), shift);
- res1_lo = _mm_srai_epi32(_mm_sub_epi32(res1_lo, *v_round_offset), shift);
- res1_hi = _mm_srai_epi32(_mm_sub_epi32(res1_hi, *v_round_offset), shift);
- const __m128i res0 = _mm_packs_epi32(res0_lo, res0_hi);
- const __m128i res1 = _mm_packs_epi32(res1_lo, res1_hi);
- const __m128i res = _mm_packus_epi16(res0, res1);
-
- _mm_storeu_si128((__m128i *)(dst), res);
-}
-
-static INLINE void lowbd_blend_a64_d16_mask_subw0_subh0_w16_sse4_1(
- uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0,
- uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int h, int w,
- const __m128i *round_offset, int shift) {
- const __m128i v_maxval = _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA);
- for (int i = 0; i < h; ++i) {
- for (int j = 0; j < w; j += 16) {
- const __m128i m = xx_loadu_128(mask + j);
- const __m128i m0 = _mm_cvtepu8_epi16(m);
- const __m128i m1 = _mm_cvtepu8_epi16(_mm_srli_si128(m, 8));
-
- blend_a64_d16_mask_w16_sse41(dst + j, src0 + j, src1 + j, &m0, &m1,
- round_offset, &v_maxval, shift);
- }
- mask += mask_stride;
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- }
-}
-
-static INLINE void lowbd_blend_a64_d16_mask_subw1_subh1_w16_sse4_1(
- uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0,
- uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int h, int w,
- const __m128i *round_offset, int shift) {
- const __m128i v_maxval = _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA);
- const __m128i one_b = _mm_set1_epi8(1);
- const __m128i two_w = _mm_set1_epi16(2);
- for (int i = 0; i < h; ++i) {
- for (int j = 0; j < w; j += 16) {
- const __m128i m_i00 = xx_loadu_128(mask + 2 * j);
- const __m128i m_i01 = xx_loadu_128(mask + 2 * j + 16);
- const __m128i m_i10 = xx_loadu_128(mask + mask_stride + 2 * j);
- const __m128i m_i11 = xx_loadu_128(mask + mask_stride + 2 * j + 16);
-
- const __m128i m0_ac = _mm_adds_epu8(m_i00, m_i10);
- const __m128i m1_ac = _mm_adds_epu8(m_i01, m_i11);
- const __m128i m0_acbd = _mm_maddubs_epi16(m0_ac, one_b);
- const __m128i m1_acbd = _mm_maddubs_epi16(m1_ac, one_b);
- const __m128i m0 = _mm_srli_epi16(_mm_add_epi16(m0_acbd, two_w), 2);
- const __m128i m1 = _mm_srli_epi16(_mm_add_epi16(m1_acbd, two_w), 2);
-
- blend_a64_d16_mask_w16_sse41(dst + j, src0 + j, src1 + j, &m0, &m1,
- round_offset, &v_maxval, shift);
- }
- mask += mask_stride << 1;
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- }
-}
-
-static INLINE void lowbd_blend_a64_d16_mask_subw1_subh0_w16_sse4_1(
- uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0,
- uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int h, int w,
- const __m128i *round_offset, int shift) {
- const __m128i v_maxval = _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA);
- const __m128i one_b = _mm_set1_epi8(1);
- const __m128i zeros = _mm_setzero_si128();
- for (int i = 0; i < h; ++i) {
- for (int j = 0; j < w; j += 16) {
- const __m128i m_i00 = xx_loadu_128(mask + 2 * j);
- const __m128i m_i01 = xx_loadu_128(mask + 2 * j + 16);
- const __m128i m0_ac = _mm_maddubs_epi16(m_i00, one_b);
- const __m128i m1_ac = _mm_maddubs_epi16(m_i01, one_b);
- const __m128i m0 = _mm_avg_epu16(m0_ac, zeros);
- const __m128i m1 = _mm_avg_epu16(m1_ac, zeros);
-
- blend_a64_d16_mask_w16_sse41(dst + j, src0 + j, src1 + j, &m0, &m1,
- round_offset, &v_maxval, shift);
- }
- mask += mask_stride;
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- }
-}
-
-static INLINE void lowbd_blend_a64_d16_mask_subw0_subh1_w16_sse4_1(
- uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0,
- uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int h, int w,
- const __m128i *round_offset, int shift) {
- const __m128i v_maxval = _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA);
- const __m128i zeros = _mm_setzero_si128();
- for (int i = 0; i < h; ++i) {
- for (int j = 0; j < w; j += 16) {
- const __m128i m_i00 = xx_loadu_128(mask + j);
- const __m128i m_i10 = xx_loadu_128(mask + mask_stride + j);
-
- const __m128i m_ac = _mm_avg_epu8(_mm_adds_epu8(m_i00, m_i10), zeros);
- const __m128i m0 = _mm_cvtepu8_epi16(m_ac);
- const __m128i m1 = _mm_cvtepu8_epi16(_mm_srli_si128(m_ac, 8));
-
- blend_a64_d16_mask_w16_sse41(dst + j, src0 + j, src1 + j, &m0, &m1,
- round_offset, &v_maxval, shift);
- }
- mask += mask_stride << 1;
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- }
-}
-
-void aom_lowbd_blend_a64_d16_mask_sse4_1(
- uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0,
- uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh,
- ConvolveParams *conv_params) {
- const int bd = 8;
- const int round_bits =
- 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
-
- const int round_offset =
- ((1 << (round_bits + bd)) + (1 << (round_bits + bd - 1)) -
- (1 << (round_bits - 1)))
- << AOM_BLEND_A64_ROUND_BITS;
-
- const int shift = round_bits + AOM_BLEND_A64_ROUND_BITS;
- assert(IMPLIES((void *)src0 == dst, src0_stride == dst_stride));
- assert(IMPLIES((void *)src1 == dst, src1_stride == dst_stride));
-
- assert(h >= 4);
- assert(w >= 4);
- assert(IS_POWER_OF_TWO(h));
- assert(IS_POWER_OF_TWO(w));
-
- const __m128i v_round_offset = _mm_set1_epi32(round_offset);
-
- if (subw == 0 && subh == 0) {
- switch (w) {
- case 4:
- aom_lowbd_blend_a64_d16_mask_subw0_subh0_w4_sse4_1(
- dst, dst_stride, src0, src0_stride, src1, src1_stride, mask,
- mask_stride, h, &v_round_offset, shift);
- break;
- case 8:
- aom_lowbd_blend_a64_d16_mask_subw0_subh0_w8_sse4_1(
- dst, dst_stride, src0, src0_stride, src1, src1_stride, mask,
- mask_stride, h, &v_round_offset, shift);
- break;
- default:
- lowbd_blend_a64_d16_mask_subw0_subh0_w16_sse4_1(
- dst, dst_stride, src0, src0_stride, src1, src1_stride, mask,
- mask_stride, h, w, &v_round_offset, shift);
- break;
- }
-
- } else if (subw == 1 && subh == 1) {
- switch (w) {
- case 4:
- aom_lowbd_blend_a64_d16_mask_subw1_subh1_w4_sse4_1(
- dst, dst_stride, src0, src0_stride, src1, src1_stride, mask,
- mask_stride, h, &v_round_offset, shift);
- break;
- case 8:
- aom_lowbd_blend_a64_d16_mask_subw1_subh1_w8_sse4_1(
- dst, dst_stride, src0, src0_stride, src1, src1_stride, mask,
- mask_stride, h, &v_round_offset, shift);
- break;
- default:
- lowbd_blend_a64_d16_mask_subw1_subh1_w16_sse4_1(
- dst, dst_stride, src0, src0_stride, src1, src1_stride, mask,
- mask_stride, h, w, &v_round_offset, shift);
- break;
- }
- } else if (subw == 1 && subh == 0) {
- switch (w) {
- case 4:
- aom_lowbd_blend_a64_d16_mask_subw1_subh0_w4_sse4_1(
- dst, dst_stride, src0, src0_stride, src1, src1_stride, mask,
- mask_stride, h, &v_round_offset, shift);
- break;
- case 8:
- aom_lowbd_blend_a64_d16_mask_subw1_subh0_w8_sse4_1(
- dst, dst_stride, src0, src0_stride, src1, src1_stride, mask,
- mask_stride, h, &v_round_offset, shift);
- break;
- default:
- lowbd_blend_a64_d16_mask_subw1_subh0_w16_sse4_1(
- dst, dst_stride, src0, src0_stride, src1, src1_stride, mask,
- mask_stride, h, w, &v_round_offset, shift);
- break;
- }
- } else {
- switch (w) {
- case 4:
- aom_lowbd_blend_a64_d16_mask_subw0_subh1_w4_sse4_1(
- dst, dst_stride, src0, src0_stride, src1, src1_stride, mask,
- mask_stride, h, &v_round_offset, shift);
- break;
- case 8:
- aom_lowbd_blend_a64_d16_mask_subw0_subh1_w8_sse4_1(
- dst, dst_stride, src0, src0_stride, src1, src1_stride, mask,
- mask_stride, h, &v_round_offset, shift);
- break;
- default:
- lowbd_blend_a64_d16_mask_subw0_subh1_w16_sse4_1(
- dst, dst_stride, src0, src0_stride, src1, src1_stride, mask,
- mask_stride, h, w, &v_round_offset, shift);
- break;
- }
- }
-}
diff --git a/third_party/aom/aom_dsp/x86/blend_a64_vmask_sse4.c b/third_party/aom/aom_dsp/x86/blend_a64_vmask_sse4.c
deleted file mode 100644
index 064910232..000000000
--- a/third_party/aom/aom_dsp/x86/blend_a64_vmask_sse4.c
+++ /dev/null
@@ -1,283 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <smmintrin.h> // SSE4.1
-
-#include <assert.h>
-
-#include "aom/aom_integer.h"
-#include "aom_ports/mem.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/blend.h"
-
-#include "aom_dsp/x86/synonyms.h"
-#include "aom_dsp/x86/blend_sse4.h"
-
-#include "config/aom_dsp_rtcd.h"
-
-//////////////////////////////////////////////////////////////////////////////
-// Implementation - No sub-sampling
-//////////////////////////////////////////////////////////////////////////////
-
-static void blend_a64_vmask_w4_sse4_1(uint8_t *dst, uint32_t dst_stride,
- const uint8_t *src0, uint32_t src0_stride,
- const uint8_t *src1, uint32_t src1_stride,
- const uint8_t *mask, int w, int h) {
- const __m128i v_maxval_w = _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA);
-
- (void)w;
-
- do {
- const __m128i v_m0_w = _mm_set1_epi16(*mask);
- const __m128i v_m1_w = _mm_sub_epi16(v_maxval_w, v_m0_w);
-
- const __m128i v_res_w = blend_4(src0, src1, &v_m0_w, &v_m1_w);
-
- const __m128i v_res_b = _mm_packus_epi16(v_res_w, v_res_w);
-
- xx_storel_32(dst, v_res_b);
-
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- mask += 1;
- } while (--h);
-}
-
-static void blend_a64_vmask_w8_sse4_1(uint8_t *dst, uint32_t dst_stride,
- const uint8_t *src0, uint32_t src0_stride,
- const uint8_t *src1, uint32_t src1_stride,
- const uint8_t *mask, int w, int h) {
- const __m128i v_maxval_w = _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA);
-
- (void)w;
-
- do {
- const __m128i v_m0_w = _mm_set1_epi16(*mask);
- const __m128i v_m1_w = _mm_sub_epi16(v_maxval_w, v_m0_w);
-
- const __m128i v_res_w = blend_8(src0, src1, &v_m0_w, &v_m1_w);
-
- const __m128i v_res_b = _mm_packus_epi16(v_res_w, v_res_w);
-
- xx_storel_64(dst, v_res_b);
-
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- mask += 1;
- } while (--h);
-}
-
-static void blend_a64_vmask_w16n_sse4_1(uint8_t *dst, uint32_t dst_stride,
- const uint8_t *src0,
- uint32_t src0_stride,
- const uint8_t *src1,
- uint32_t src1_stride,
- const uint8_t *mask, int w, int h) {
- const __m128i v_maxval_w = _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA);
-
- do {
- int c;
- const __m128i v_m0_w = _mm_set1_epi16(*mask);
- const __m128i v_m1_w = _mm_sub_epi16(v_maxval_w, v_m0_w);
- for (c = 0; c < w; c += 16) {
- const __m128i v_resl_w = blend_8(src0 + c, src1 + c, &v_m0_w, &v_m1_w);
- const __m128i v_resh_w =
- blend_8(src0 + c + 8, src1 + c + 8, &v_m0_w, &v_m1_w);
-
- const __m128i v_res_b = _mm_packus_epi16(v_resl_w, v_resh_w);
-
- xx_storeu_128(dst + c, v_res_b);
- }
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- mask += 1;
- } while (--h);
-}
-
-//////////////////////////////////////////////////////////////////////////////
-// Dispatch
-//////////////////////////////////////////////////////////////////////////////
-
-void aom_blend_a64_vmask_sse4_1(uint8_t *dst, uint32_t dst_stride,
- const uint8_t *src0, uint32_t src0_stride,
- const uint8_t *src1, uint32_t src1_stride,
- const uint8_t *mask, int w, int h) {
- typedef void (*blend_fn)(uint8_t * dst, uint32_t dst_stride,
- const uint8_t *src0, uint32_t src0_stride,
- const uint8_t *src1, uint32_t src1_stride,
- const uint8_t *mask, int w, int h);
-
- // Dimension: width_index
- static const blend_fn blend[9] = {
- blend_a64_vmask_w16n_sse4_1, // w % 16 == 0
- aom_blend_a64_vmask_c, // w == 1
- aom_blend_a64_vmask_c, // w == 2
- NULL, // INVALID
- blend_a64_vmask_w4_sse4_1, // w == 4
- NULL, // INVALID
- NULL, // INVALID
- NULL, // INVALID
- blend_a64_vmask_w8_sse4_1, // w == 8
- };
-
- assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
- assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
-
- assert(h >= 1);
- assert(w >= 1);
- assert(IS_POWER_OF_TWO(h));
- assert(IS_POWER_OF_TWO(w));
-
- blend[w & 0xf](dst, dst_stride, src0, src0_stride, src1, src1_stride, mask, w,
- h);
-}
-
-//////////////////////////////////////////////////////////////////////////////
-// Implementation - No sub-sampling
-//////////////////////////////////////////////////////////////////////////////
-
-static INLINE void blend_a64_vmask_bn_w4_sse4_1(
- uint16_t *dst, uint32_t dst_stride, const uint16_t *src0,
- uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride,
- const uint8_t *mask, int h, blend_unit_fn blend) {
- const __m128i v_maxval_w = _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA);
-
- do {
- const __m128i v_m0_w = _mm_set1_epi16(*mask);
- const __m128i v_m1_w = _mm_sub_epi16(v_maxval_w, v_m0_w);
-
- const __m128i v_res_w = blend(src0, src1, v_m0_w, v_m1_w);
-
- xx_storel_64(dst, v_res_w);
-
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- mask += 1;
- } while (--h);
-}
-
-static void blend_a64_vmask_b10_w4_sse4_1(uint16_t *dst, uint32_t dst_stride,
- const uint16_t *src0,
- uint32_t src0_stride,
- const uint16_t *src1,
- uint32_t src1_stride,
- const uint8_t *mask, int w, int h) {
- (void)w;
- blend_a64_vmask_bn_w4_sse4_1(dst, dst_stride, src0, src0_stride, src1,
- src1_stride, mask, h, blend_4_b10);
-}
-
-static void blend_a64_vmask_b12_w4_sse4_1(uint16_t *dst, uint32_t dst_stride,
- const uint16_t *src0,
- uint32_t src0_stride,
- const uint16_t *src1,
- uint32_t src1_stride,
- const uint8_t *mask, int w, int h) {
- (void)w;
- blend_a64_vmask_bn_w4_sse4_1(dst, dst_stride, src0, src0_stride, src1,
- src1_stride, mask, h, blend_4_b12);
-}
-
-static INLINE void blend_a64_vmask_bn_w8n_sse4_1(
- uint16_t *dst, uint32_t dst_stride, const uint16_t *src0,
- uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride,
- const uint8_t *mask, int w, int h, blend_unit_fn blend) {
- const __m128i v_maxval_w = _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA);
-
- do {
- int c;
- const __m128i v_m0_w = _mm_set1_epi16(*mask);
- const __m128i v_m1_w = _mm_sub_epi16(v_maxval_w, v_m0_w);
- for (c = 0; c < w; c += 8) {
- const __m128i v_res_w = blend(src0 + c, src1 + c, v_m0_w, v_m1_w);
-
- xx_storeu_128(dst + c, v_res_w);
- }
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- mask += 1;
- } while (--h);
-}
-
-static void blend_a64_vmask_b10_w8n_sse4_1(uint16_t *dst, uint32_t dst_stride,
- const uint16_t *src0,
- uint32_t src0_stride,
- const uint16_t *src1,
- uint32_t src1_stride,
- const uint8_t *mask, int w, int h) {
- blend_a64_vmask_bn_w8n_sse4_1(dst, dst_stride, src0, src0_stride, src1,
- src1_stride, mask, w, h, blend_8_b10);
-}
-
-static void blend_a64_vmask_b12_w8n_sse4_1(uint16_t *dst, uint32_t dst_stride,
- const uint16_t *src0,
- uint32_t src0_stride,
- const uint16_t *src1,
- uint32_t src1_stride,
- const uint8_t *mask, int w, int h) {
- blend_a64_vmask_bn_w8n_sse4_1(dst, dst_stride, src0, src0_stride, src1,
- src1_stride, mask, w, h, blend_8_b12);
-}
-
-//////////////////////////////////////////////////////////////////////////////
-// Dispatch
-//////////////////////////////////////////////////////////////////////////////
-
-void aom_highbd_blend_a64_vmask_sse4_1(
- uint8_t *dst_8, uint32_t dst_stride, const uint8_t *src0_8,
- uint32_t src0_stride, const uint8_t *src1_8, uint32_t src1_stride,
- const uint8_t *mask, int w, int h, int bd) {
- typedef void (*blend_fn)(uint16_t * dst, uint32_t dst_stride,
- const uint16_t *src0, uint32_t src0_stride,
- const uint16_t *src1, uint32_t src1_stride,
- const uint8_t *mask, int w, int h);
-
- // Dimensions are: bd_index X width_index
- static const blend_fn blend[2][2] = {
- {
- // bd == 8 or 10
- blend_a64_vmask_b10_w8n_sse4_1, // w % 8 == 0
- blend_a64_vmask_b10_w4_sse4_1, // w == 4
- },
- {
- // bd == 12
- blend_a64_vmask_b12_w8n_sse4_1, // w % 8 == 0
- blend_a64_vmask_b12_w4_sse4_1, // w == 4
- }
- };
-
- assert(IMPLIES(src0_8 == dst_8, src0_stride == dst_stride));
- assert(IMPLIES(src1_8 == dst_8, src1_stride == dst_stride));
-
- assert(h >= 1);
- assert(w >= 1);
- assert(IS_POWER_OF_TWO(h));
- assert(IS_POWER_OF_TWO(w));
-
- assert(bd == 8 || bd == 10 || bd == 12);
-
- if (UNLIKELY((h | w) & 3)) { // if (w <= 2 || h <= 2)
- aom_highbd_blend_a64_vmask_c(dst_8, dst_stride, src0_8, src0_stride, src1_8,
- src1_stride, mask, w, h, bd);
- } else {
- uint16_t *const dst = CONVERT_TO_SHORTPTR(dst_8);
- const uint16_t *const src0 = CONVERT_TO_SHORTPTR(src0_8);
- const uint16_t *const src1 = CONVERT_TO_SHORTPTR(src1_8);
-
- blend[bd == 12][(w >> 2) & 1](dst, dst_stride, src0, src0_stride, src1,
- src1_stride, mask, w, h);
- }
-}
diff --git a/third_party/aom/aom_dsp/x86/blend_mask_sse4.h b/third_party/aom/aom_dsp/x86/blend_mask_sse4.h
deleted file mode 100644
index c071fdcfc..000000000
--- a/third_party/aom/aom_dsp/x86/blend_mask_sse4.h
+++ /dev/null
@@ -1,237 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_X86_BLEND_MASK_SSE4_H_
-#define AOM_AOM_DSP_X86_BLEND_MASK_SSE4_H_
-#include <smmintrin.h> // SSE4.1
-
-#include <assert.h>
-
-#include "aom/aom_integer.h"
-#include "aom_ports/mem.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/blend.h"
-
-#include "aom_dsp/x86/synonyms.h"
-
-#include "config/aom_dsp_rtcd.h"
-
-static INLINE void blend_a64_d16_mask_w4_sse41(
- uint8_t *dst, const CONV_BUF_TYPE *src0, const CONV_BUF_TYPE *src1,
- const __m128i *m, const __m128i *v_round_offset, const __m128i *v_maxval,
- int shift) {
- const __m128i max_minus_m = _mm_sub_epi16(*v_maxval, *m);
- const __m128i s0 = xx_loadl_64(src0);
- const __m128i s1 = xx_loadl_64(src1);
- const __m128i s0_s1 = _mm_unpacklo_epi16(s0, s1);
- const __m128i m_max_minus_m = _mm_unpacklo_epi16(*m, max_minus_m);
- const __m128i res_a = _mm_madd_epi16(s0_s1, m_max_minus_m);
- const __m128i res_c = _mm_sub_epi32(res_a, *v_round_offset);
- const __m128i res_d = _mm_srai_epi32(res_c, shift);
- const __m128i res_e = _mm_packs_epi32(res_d, res_d);
- const __m128i res = _mm_packus_epi16(res_e, res_e);
-
- xx_storel_32(dst, res);
-}
-
-static INLINE void blend_a64_d16_mask_w8_sse41(
- uint8_t *dst, const CONV_BUF_TYPE *src0, const CONV_BUF_TYPE *src1,
- const __m128i *m, const __m128i *v_round_offset, const __m128i *v_maxval,
- int shift) {
- const __m128i max_minus_m = _mm_sub_epi16(*v_maxval, *m);
- const __m128i s0 = xx_loadu_128(src0);
- const __m128i s1 = xx_loadu_128(src1);
- __m128i res_lo = _mm_madd_epi16(_mm_unpacklo_epi16(s0, s1),
- _mm_unpacklo_epi16(*m, max_minus_m));
- __m128i res_hi = _mm_madd_epi16(_mm_unpackhi_epi16(s0, s1),
- _mm_unpackhi_epi16(*m, max_minus_m));
- res_lo = _mm_srai_epi32(_mm_sub_epi32(res_lo, *v_round_offset), shift);
- res_hi = _mm_srai_epi32(_mm_sub_epi32(res_hi, *v_round_offset), shift);
- const __m128i res_e = _mm_packs_epi32(res_lo, res_hi);
- const __m128i res = _mm_packus_epi16(res_e, res_e);
-
- _mm_storel_epi64((__m128i *)(dst), res);
-}
-
-static INLINE void aom_lowbd_blend_a64_d16_mask_subw0_subh0_w4_sse4_1(
- uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0,
- uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int h,
- const __m128i *round_offset, int shift) {
- const __m128i v_maxval = _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA);
- for (int i = 0; i < h; ++i) {
- const __m128i m0 = xx_loadl_32(mask);
- const __m128i m = _mm_cvtepu8_epi16(m0);
-
- blend_a64_d16_mask_w4_sse41(dst, src0, src1, &m, round_offset, &v_maxval,
- shift);
- mask += mask_stride;
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- }
-}
-
-static INLINE void aom_lowbd_blend_a64_d16_mask_subw0_subh0_w8_sse4_1(
- uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0,
- uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int h,
- const __m128i *round_offset, int shift) {
- const __m128i v_maxval = _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA);
- for (int i = 0; i < h; ++i) {
- const __m128i m0 = xx_loadl_64(mask);
- const __m128i m = _mm_cvtepu8_epi16(m0);
- blend_a64_d16_mask_w8_sse41(dst, src0, src1, &m, round_offset, &v_maxval,
- shift);
- mask += mask_stride;
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- }
-}
-
-static INLINE void aom_lowbd_blend_a64_d16_mask_subw1_subh1_w4_sse4_1(
- uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0,
- uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int h,
- const __m128i *round_offset, int shift) {
- const __m128i v_maxval = _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA);
- const __m128i one_b = _mm_set1_epi8(1);
- const __m128i two_w = _mm_set1_epi16(2);
- for (int i = 0; i < h; ++i) {
- const __m128i m_i0 = xx_loadl_64(mask);
- const __m128i m_i1 = xx_loadl_64(mask + mask_stride);
- const __m128i m_ac = _mm_adds_epu8(m_i0, m_i1);
- const __m128i m_acbd = _mm_maddubs_epi16(m_ac, one_b);
- const __m128i m_acbd_2 = _mm_add_epi16(m_acbd, two_w);
- const __m128i m = _mm_srli_epi16(m_acbd_2, 2);
-
- blend_a64_d16_mask_w4_sse41(dst, src0, src1, &m, round_offset, &v_maxval,
- shift);
- mask += mask_stride << 1;
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- }
-}
-
-static INLINE void aom_lowbd_blend_a64_d16_mask_subw1_subh1_w8_sse4_1(
- uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0,
- uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int h,
- const __m128i *round_offset, int shift) {
- const __m128i v_maxval = _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA);
- const __m128i one_b = _mm_set1_epi8(1);
- const __m128i two_w = _mm_set1_epi16(2);
- for (int i = 0; i < h; ++i) {
- const __m128i m_i0 = xx_loadu_128(mask);
- const __m128i m_i1 = xx_loadu_128(mask + mask_stride);
- const __m128i m_ac = _mm_adds_epu8(m_i0, m_i1);
- const __m128i m_acbd = _mm_maddubs_epi16(m_ac, one_b);
- const __m128i m_acbd_2 = _mm_add_epi16(m_acbd, two_w);
- const __m128i m = _mm_srli_epi16(m_acbd_2, 2);
-
- blend_a64_d16_mask_w8_sse41(dst, src0, src1, &m, round_offset, &v_maxval,
- shift);
- mask += mask_stride << 1;
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- }
-}
-
-static INLINE void aom_lowbd_blend_a64_d16_mask_subw1_subh0_w4_sse4_1(
- uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0,
- uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int h,
- const __m128i *round_offset, int shift) {
- const __m128i v_maxval = _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA);
- const __m128i one_b = _mm_set1_epi8(1);
- const __m128i zeros = _mm_setzero_si128();
- for (int i = 0; i < h; ++i) {
- const __m128i m_i0 = xx_loadl_64(mask);
- const __m128i m_ac = _mm_maddubs_epi16(m_i0, one_b);
- const __m128i m = _mm_avg_epu16(m_ac, zeros);
-
- blend_a64_d16_mask_w4_sse41(dst, src0, src1, &m, round_offset, &v_maxval,
- shift);
- mask += mask_stride;
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- }
-}
-
-static INLINE void aom_lowbd_blend_a64_d16_mask_subw1_subh0_w8_sse4_1(
- uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0,
- uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int h,
- const __m128i *round_offset, int shift) {
- const __m128i v_maxval = _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA);
- const __m128i one_b = _mm_set1_epi8(1);
- const __m128i zeros = _mm_setzero_si128();
- for (int i = 0; i < h; ++i) {
- const __m128i m_i0 = xx_loadu_128(mask);
- const __m128i m_ac = _mm_maddubs_epi16(m_i0, one_b);
- const __m128i m = _mm_avg_epu16(m_ac, zeros);
-
- blend_a64_d16_mask_w8_sse41(dst, src0, src1, &m, round_offset, &v_maxval,
- shift);
- mask += mask_stride;
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- }
-}
-static INLINE void aom_lowbd_blend_a64_d16_mask_subw0_subh1_w4_sse4_1(
- uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0,
- uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int h,
- const __m128i *round_offset, int shift) {
- const __m128i v_maxval = _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA);
- const __m128i zeros = _mm_setzero_si128();
- for (int i = 0; i < h; ++i) {
- const __m128i m_i0 = xx_loadl_64(mask);
- const __m128i m_i1 = xx_loadl_64(mask + mask_stride);
- const __m128i m_ac = _mm_adds_epu8(m_i0, m_i1);
- const __m128i m = _mm_cvtepu8_epi16(_mm_avg_epu8(m_ac, zeros));
-
- blend_a64_d16_mask_w4_sse41(dst, src0, src1, &m, round_offset, &v_maxval,
- shift);
- mask += mask_stride << 1;
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- }
-}
-
-static INLINE void aom_lowbd_blend_a64_d16_mask_subw0_subh1_w8_sse4_1(
- uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0,
- uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int h,
- const __m128i *round_offset, int shift) {
- const __m128i v_maxval = _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA);
- const __m128i zeros = _mm_setzero_si128();
- for (int i = 0; i < h; ++i) {
- const __m128i m_i0 = xx_loadl_64(mask);
- const __m128i m_i1 = xx_loadl_64(mask + mask_stride);
- const __m128i m_ac = _mm_adds_epu8(m_i0, m_i1);
- const __m128i m = _mm_cvtepu8_epi16(_mm_avg_epu8(m_ac, zeros));
-
- blend_a64_d16_mask_w8_sse41(dst, src0, src1, &m, round_offset, &v_maxval,
- shift);
- mask += mask_stride << 1;
- dst += dst_stride;
- src0 += src0_stride;
- src1 += src1_stride;
- }
-}
-#endif // AOM_AOM_DSP_X86_BLEND_MASK_SSE4_H_
diff --git a/third_party/aom/aom_dsp/x86/blend_sse4.h b/third_party/aom/aom_dsp/x86/blend_sse4.h
deleted file mode 100644
index 8d9b32510..000000000
--- a/third_party/aom/aom_dsp/x86/blend_sse4.h
+++ /dev/null
@@ -1,191 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_X86_BLEND_SSE4_H_
-#define AOM_AOM_DSP_X86_BLEND_SSE4_H_
-
-#include "aom_dsp/blend.h"
-#include "aom_dsp/x86/synonyms.h"
-static const uint8_t g_blend_a64_mask_shuffle[32] = {
- 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15,
- 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15,
-};
-
-//////////////////////////////////////////////////////////////////////////////
-// Common kernels
-//////////////////////////////////////////////////////////////////////////////
-
-static INLINE __m128i blend_4(const uint8_t *src0, const uint8_t *src1,
- const __m128i *v_m0_w, const __m128i *v_m1_w) {
- const __m128i v_s0_b = xx_loadl_32(src0);
- const __m128i v_s1_b = xx_loadl_32(src1);
- const __m128i v_s0_w = _mm_cvtepu8_epi16(v_s0_b);
- const __m128i v_s1_w = _mm_cvtepu8_epi16(v_s1_b);
-
- const __m128i v_p0_w = _mm_mullo_epi16(v_s0_w, *v_m0_w);
- const __m128i v_p1_w = _mm_mullo_epi16(v_s1_w, *v_m1_w);
- const __m128i v_sum_w = _mm_add_epi16(v_p0_w, v_p1_w);
- const __m128i v_res_w = xx_roundn_epu16(v_sum_w, AOM_BLEND_A64_ROUND_BITS);
-
- return v_res_w;
-}
-
-static INLINE __m128i blend_8(const uint8_t *src0, const uint8_t *src1,
- const __m128i *v_m0_w, const __m128i *v_m1_w) {
- const __m128i v_s0_b = xx_loadl_64(src0);
- const __m128i v_s1_b = xx_loadl_64(src1);
- const __m128i v_s0_w = _mm_cvtepu8_epi16(v_s0_b);
- const __m128i v_s1_w = _mm_cvtepu8_epi16(v_s1_b);
-
- const __m128i v_p0_w = _mm_mullo_epi16(v_s0_w, *v_m0_w);
- const __m128i v_p1_w = _mm_mullo_epi16(v_s1_w, *v_m1_w);
-
- const __m128i v_sum_w = _mm_add_epi16(v_p0_w, v_p1_w);
-
- const __m128i v_res_w = xx_roundn_epu16(v_sum_w, AOM_BLEND_A64_ROUND_BITS);
-
- return v_res_w;
-}
-
-static INLINE __m128i blend_4_u8(const uint8_t *src0, const uint8_t *src1,
- const __m128i *v_m0_b, const __m128i *v_m1_b,
- const __m128i *rounding) {
- const __m128i v_s0_b = xx_loadl_32(src0);
- const __m128i v_s1_b = xx_loadl_32(src1);
-
- const __m128i v_p0_w = _mm_maddubs_epi16(_mm_unpacklo_epi8(v_s0_b, v_s1_b),
- _mm_unpacklo_epi8(*v_m0_b, *v_m1_b));
-
- const __m128i v_res_w = _mm_mulhrs_epi16(v_p0_w, *rounding);
- const __m128i v_res = _mm_packus_epi16(v_res_w, v_res_w);
- return v_res;
-}
-
-static INLINE __m128i blend_8_u8(const uint8_t *src0, const uint8_t *src1,
- const __m128i *v_m0_b, const __m128i *v_m1_b,
- const __m128i *rounding) {
- const __m128i v_s0_b = xx_loadl_64(src0);
- const __m128i v_s1_b = xx_loadl_64(src1);
-
- const __m128i v_p0_w = _mm_maddubs_epi16(_mm_unpacklo_epi8(v_s0_b, v_s1_b),
- _mm_unpacklo_epi8(*v_m0_b, *v_m1_b));
-
- const __m128i v_res_w = _mm_mulhrs_epi16(v_p0_w, *rounding);
- const __m128i v_res = _mm_packus_epi16(v_res_w, v_res_w);
- return v_res;
-}
-
-static INLINE __m128i blend_16_u8(const uint8_t *src0, const uint8_t *src1,
- const __m128i *v_m0_b, const __m128i *v_m1_b,
- const __m128i *rounding) {
- const __m128i v_s0_b = xx_loadu_128(src0);
- const __m128i v_s1_b = xx_loadu_128(src1);
-
- const __m128i v_p0_w = _mm_maddubs_epi16(_mm_unpacklo_epi8(v_s0_b, v_s1_b),
- _mm_unpacklo_epi8(*v_m0_b, *v_m1_b));
- const __m128i v_p1_w = _mm_maddubs_epi16(_mm_unpackhi_epi8(v_s0_b, v_s1_b),
- _mm_unpackhi_epi8(*v_m0_b, *v_m1_b));
-
- const __m128i v_res0_w = _mm_mulhrs_epi16(v_p0_w, *rounding);
- const __m128i v_res1_w = _mm_mulhrs_epi16(v_p1_w, *rounding);
- const __m128i v_res = _mm_packus_epi16(v_res0_w, v_res1_w);
- return v_res;
-}
-
-typedef __m128i (*blend_unit_fn)(const uint16_t *src0, const uint16_t *src1,
- const __m128i v_m0_w, const __m128i v_m1_w);
-
-static INLINE __m128i blend_4_b10(const uint16_t *src0, const uint16_t *src1,
- const __m128i v_m0_w, const __m128i v_m1_w) {
- const __m128i v_s0_w = xx_loadl_64(src0);
- const __m128i v_s1_w = xx_loadl_64(src1);
-
- const __m128i v_p0_w = _mm_mullo_epi16(v_s0_w, v_m0_w);
- const __m128i v_p1_w = _mm_mullo_epi16(v_s1_w, v_m1_w);
-
- const __m128i v_sum_w = _mm_add_epi16(v_p0_w, v_p1_w);
-
- const __m128i v_res_w = xx_roundn_epu16(v_sum_w, AOM_BLEND_A64_ROUND_BITS);
-
- return v_res_w;
-}
-
-static INLINE __m128i blend_8_b10(const uint16_t *src0, const uint16_t *src1,
- const __m128i v_m0_w, const __m128i v_m1_w) {
- const __m128i v_s0_w = xx_loadu_128(src0);
- const __m128i v_s1_w = xx_loadu_128(src1);
-
- const __m128i v_p0_w = _mm_mullo_epi16(v_s0_w, v_m0_w);
- const __m128i v_p1_w = _mm_mullo_epi16(v_s1_w, v_m1_w);
-
- const __m128i v_sum_w = _mm_add_epi16(v_p0_w, v_p1_w);
-
- const __m128i v_res_w = xx_roundn_epu16(v_sum_w, AOM_BLEND_A64_ROUND_BITS);
-
- return v_res_w;
-}
-
-static INLINE __m128i blend_4_b12(const uint16_t *src0, const uint16_t *src1,
- const __m128i v_m0_w, const __m128i v_m1_w) {
- const __m128i v_s0_w = xx_loadl_64(src0);
- const __m128i v_s1_w = xx_loadl_64(src1);
-
- // Interleave
- const __m128i v_m01_w = _mm_unpacklo_epi16(v_m0_w, v_m1_w);
- const __m128i v_s01_w = _mm_unpacklo_epi16(v_s0_w, v_s1_w);
-
- // Multiply-Add
- const __m128i v_sum_d = _mm_madd_epi16(v_s01_w, v_m01_w);
-
- // Scale
- const __m128i v_ssum_d =
- _mm_srli_epi32(v_sum_d, AOM_BLEND_A64_ROUND_BITS - 1);
-
- // Pack
- const __m128i v_pssum_d = _mm_packs_epi32(v_ssum_d, v_ssum_d);
-
- // Round
- const __m128i v_res_w = xx_round_epu16(v_pssum_d);
-
- return v_res_w;
-}
-
-static INLINE __m128i blend_8_b12(const uint16_t *src0, const uint16_t *src1,
- const __m128i v_m0_w, const __m128i v_m1_w) {
- const __m128i v_s0_w = xx_loadu_128(src0);
- const __m128i v_s1_w = xx_loadu_128(src1);
-
- // Interleave
- const __m128i v_m01l_w = _mm_unpacklo_epi16(v_m0_w, v_m1_w);
- const __m128i v_m01h_w = _mm_unpackhi_epi16(v_m0_w, v_m1_w);
- const __m128i v_s01l_w = _mm_unpacklo_epi16(v_s0_w, v_s1_w);
- const __m128i v_s01h_w = _mm_unpackhi_epi16(v_s0_w, v_s1_w);
-
- // Multiply-Add
- const __m128i v_suml_d = _mm_madd_epi16(v_s01l_w, v_m01l_w);
- const __m128i v_sumh_d = _mm_madd_epi16(v_s01h_w, v_m01h_w);
-
- // Scale
- const __m128i v_ssuml_d =
- _mm_srli_epi32(v_suml_d, AOM_BLEND_A64_ROUND_BITS - 1);
- const __m128i v_ssumh_d =
- _mm_srli_epi32(v_sumh_d, AOM_BLEND_A64_ROUND_BITS - 1);
-
- // Pack
- const __m128i v_pssum_d = _mm_packs_epi32(v_ssuml_d, v_ssumh_d);
-
- // Round
- const __m128i v_res_w = xx_round_epu16(v_pssum_d);
-
- return v_res_w;
-}
-
-#endif // AOM_AOM_DSP_X86_BLEND_SSE4_H_
diff --git a/third_party/aom/aom_dsp/x86/common_avx2.h b/third_party/aom/aom_dsp/x86/common_avx2.h
deleted file mode 100644
index 96fe4ebb6..000000000
--- a/third_party/aom/aom_dsp/x86/common_avx2.h
+++ /dev/null
@@ -1,147 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_X86_COMMON_AVX2_H_
-#define AOM_AOM_DSP_X86_COMMON_AVX2_H_
-
-#include <immintrin.h>
-
-#include "config/aom_config.h"
-
-// Note: in and out could have the same value
-static INLINE void mm256_transpose_16x16(const __m256i *in, __m256i *out) {
- __m256i tr0_0 = _mm256_unpacklo_epi16(in[0], in[1]);
- __m256i tr0_1 = _mm256_unpackhi_epi16(in[0], in[1]);
- __m256i tr0_2 = _mm256_unpacklo_epi16(in[2], in[3]);
- __m256i tr0_3 = _mm256_unpackhi_epi16(in[2], in[3]);
- __m256i tr0_4 = _mm256_unpacklo_epi16(in[4], in[5]);
- __m256i tr0_5 = _mm256_unpackhi_epi16(in[4], in[5]);
- __m256i tr0_6 = _mm256_unpacklo_epi16(in[6], in[7]);
- __m256i tr0_7 = _mm256_unpackhi_epi16(in[6], in[7]);
-
- __m256i tr0_8 = _mm256_unpacklo_epi16(in[8], in[9]);
- __m256i tr0_9 = _mm256_unpackhi_epi16(in[8], in[9]);
- __m256i tr0_a = _mm256_unpacklo_epi16(in[10], in[11]);
- __m256i tr0_b = _mm256_unpackhi_epi16(in[10], in[11]);
- __m256i tr0_c = _mm256_unpacklo_epi16(in[12], in[13]);
- __m256i tr0_d = _mm256_unpackhi_epi16(in[12], in[13]);
- __m256i tr0_e = _mm256_unpacklo_epi16(in[14], in[15]);
- __m256i tr0_f = _mm256_unpackhi_epi16(in[14], in[15]);
-
- // 00 10 01 11 02 12 03 13 08 18 09 19 0a 1a 0b 1b
- // 04 14 05 15 06 16 07 17 0c 1c 0d 1d 0e 1e 0f 1f
- // 20 30 21 31 22 32 23 33 28 38 29 39 2a 3a 2b 3b
- // 24 34 25 35 26 36 27 37 2c 3c 2d 3d 2e 3e 2f 3f
- // 40 50 41 51 42 52 43 53 48 58 49 59 4a 5a 4b 5b
- // 44 54 45 55 46 56 47 57 4c 5c 4d 5d 4e 5e 4f 5f
- // 60 70 61 71 62 72 63 73 68 78 69 79 6a 7a 6b 7b
- // 64 74 65 75 66 76 67 77 6c 7c 6d 7d 6e 7e 6f 7f
-
- // 80 90 81 91 82 92 83 93 88 98 89 99 8a 9a 8b 9b
- // 84 94 85 95 86 96 87 97 8c 9c 8d 9d 8e 9e 8f 9f
- // a0 b0 a1 b1 a2 b2 a3 b3 a8 b8 a9 b9 aa ba ab bb
- // a4 b4 a5 b5 a6 b6 a7 b7 ac bc ad bd ae be af bf
- // c0 d0 c1 d1 c2 d2 c3 d3 c8 d8 c9 d9 ca da cb db
- // c4 d4 c5 d5 c6 d6 c7 d7 cc dc cd dd ce de cf df
- // e0 f0 e1 f1 e2 f2 e3 f3 e8 f8 e9 f9 ea fa eb fb
- // e4 f4 e5 f5 e6 f6 e7 f7 ec fc ed fd ee fe ef ff
-
- __m256i tr1_0 = _mm256_unpacklo_epi32(tr0_0, tr0_2);
- __m256i tr1_1 = _mm256_unpackhi_epi32(tr0_0, tr0_2);
- __m256i tr1_2 = _mm256_unpacklo_epi32(tr0_1, tr0_3);
- __m256i tr1_3 = _mm256_unpackhi_epi32(tr0_1, tr0_3);
- __m256i tr1_4 = _mm256_unpacklo_epi32(tr0_4, tr0_6);
- __m256i tr1_5 = _mm256_unpackhi_epi32(tr0_4, tr0_6);
- __m256i tr1_6 = _mm256_unpacklo_epi32(tr0_5, tr0_7);
- __m256i tr1_7 = _mm256_unpackhi_epi32(tr0_5, tr0_7);
-
- __m256i tr1_8 = _mm256_unpacklo_epi32(tr0_8, tr0_a);
- __m256i tr1_9 = _mm256_unpackhi_epi32(tr0_8, tr0_a);
- __m256i tr1_a = _mm256_unpacklo_epi32(tr0_9, tr0_b);
- __m256i tr1_b = _mm256_unpackhi_epi32(tr0_9, tr0_b);
- __m256i tr1_c = _mm256_unpacklo_epi32(tr0_c, tr0_e);
- __m256i tr1_d = _mm256_unpackhi_epi32(tr0_c, tr0_e);
- __m256i tr1_e = _mm256_unpacklo_epi32(tr0_d, tr0_f);
- __m256i tr1_f = _mm256_unpackhi_epi32(tr0_d, tr0_f);
-
- // 00 10 20 30 01 11 21 31 08 18 28 38 09 19 29 39
- // 02 12 22 32 03 13 23 33 0a 1a 2a 3a 0b 1b 2b 3b
- // 04 14 24 34 05 15 25 35 0c 1c 2c 3c 0d 1d 2d 3d
- // 06 16 26 36 07 17 27 37 0e 1e 2e 3e 0f 1f 2f 3f
- // 40 50 60 70 41 51 61 71 48 58 68 78 49 59 69 79
- // 42 52 62 72 43 53 63 73 4a 5a 6a 7a 4b 5b 6b 7b
- // 44 54 64 74 45 55 65 75 4c 5c 6c 7c 4d 5d 6d 7d
- // 46 56 66 76 47 57 67 77 4e 5e 6e 7e 4f 5f 6f 7f
-
- // 80 90 a0 b0 81 91 a1 b1 88 98 a8 b8 89 99 a9 b9
- // 82 92 a2 b2 83 93 a3 b3 8a 9a aa ba 8b 9b ab bb
- // 84 94 a4 b4 85 95 a5 b5 8c 9c ac bc 8d 9d ad bd
- // 86 96 a6 b6 87 97 a7 b7 8e ae 9e be 8f 9f af bf
- // c0 d0 e0 f0 c1 d1 e1 f1 c8 d8 e8 f8 c9 d9 e9 f9
- // c2 d2 e2 f2 c3 d3 e3 f3 ca da ea fa cb db eb fb
- // c4 d4 e4 f4 c5 d5 e5 f5 cc dc ef fc cd dd ed fd
- // c6 d6 e6 f6 c7 d7 e7 f7 ce de ee fe cf df ef ff
-
- tr0_0 = _mm256_unpacklo_epi64(tr1_0, tr1_4);
- tr0_1 = _mm256_unpackhi_epi64(tr1_0, tr1_4);
- tr0_2 = _mm256_unpacklo_epi64(tr1_1, tr1_5);
- tr0_3 = _mm256_unpackhi_epi64(tr1_1, tr1_5);
- tr0_4 = _mm256_unpacklo_epi64(tr1_2, tr1_6);
- tr0_5 = _mm256_unpackhi_epi64(tr1_2, tr1_6);
- tr0_6 = _mm256_unpacklo_epi64(tr1_3, tr1_7);
- tr0_7 = _mm256_unpackhi_epi64(tr1_3, tr1_7);
-
- tr0_8 = _mm256_unpacklo_epi64(tr1_8, tr1_c);
- tr0_9 = _mm256_unpackhi_epi64(tr1_8, tr1_c);
- tr0_a = _mm256_unpacklo_epi64(tr1_9, tr1_d);
- tr0_b = _mm256_unpackhi_epi64(tr1_9, tr1_d);
- tr0_c = _mm256_unpacklo_epi64(tr1_a, tr1_e);
- tr0_d = _mm256_unpackhi_epi64(tr1_a, tr1_e);
- tr0_e = _mm256_unpacklo_epi64(tr1_b, tr1_f);
- tr0_f = _mm256_unpackhi_epi64(tr1_b, tr1_f);
-
- // 00 10 20 30 40 50 60 70 08 18 28 38 48 58 68 78
- // 01 11 21 31 41 51 61 71 09 19 29 39 49 59 69 79
- // 02 12 22 32 42 52 62 72 0a 1a 2a 3a 4a 5a 6a 7a
- // 03 13 23 33 43 53 63 73 0b 1b 2b 3b 4b 5b 6b 7b
- // 04 14 24 34 44 54 64 74 0c 1c 2c 3c 4c 5c 6c 7c
- // 05 15 25 35 45 55 65 75 0d 1d 2d 3d 4d 5d 6d 7d
- // 06 16 26 36 46 56 66 76 0e 1e 2e 3e 4e 5e 6e 7e
- // 07 17 27 37 47 57 67 77 0f 1f 2f 3f 4f 5f 6f 7f
-
- // 80 90 a0 b0 c0 d0 e0 f0 88 98 a8 b8 c8 d8 e8 f8
- // 81 91 a1 b1 c1 d1 e1 f1 89 99 a9 b9 c9 d9 e9 f9
- // 82 92 a2 b2 c2 d2 e2 f2 8a 9a aa ba ca da ea fa
- // 83 93 a3 b3 c3 d3 e3 f3 8b 9b ab bb cb db eb fb
- // 84 94 a4 b4 c4 d4 e4 f4 8c 9c ac bc cc dc ef fc
- // 85 95 a5 b5 c5 d5 e5 f5 8d 9d ad bd cd dd ed fd
- // 86 96 a6 b6 c6 d6 e6 f6 8e ae 9e be ce de ee fe
- // 87 97 a7 b7 c7 d7 e7 f7 8f 9f af bf cf df ef ff
-
- out[0] = _mm256_permute2x128_si256(tr0_0, tr0_8, 0x20); // 0010 0000
- out[8] = _mm256_permute2x128_si256(tr0_0, tr0_8, 0x31); // 0011 0001
- out[1] = _mm256_permute2x128_si256(tr0_1, tr0_9, 0x20);
- out[9] = _mm256_permute2x128_si256(tr0_1, tr0_9, 0x31);
- out[2] = _mm256_permute2x128_si256(tr0_2, tr0_a, 0x20);
- out[10] = _mm256_permute2x128_si256(tr0_2, tr0_a, 0x31);
- out[3] = _mm256_permute2x128_si256(tr0_3, tr0_b, 0x20);
- out[11] = _mm256_permute2x128_si256(tr0_3, tr0_b, 0x31);
-
- out[4] = _mm256_permute2x128_si256(tr0_4, tr0_c, 0x20);
- out[12] = _mm256_permute2x128_si256(tr0_4, tr0_c, 0x31);
- out[5] = _mm256_permute2x128_si256(tr0_5, tr0_d, 0x20);
- out[13] = _mm256_permute2x128_si256(tr0_5, tr0_d, 0x31);
- out[6] = _mm256_permute2x128_si256(tr0_6, tr0_e, 0x20);
- out[14] = _mm256_permute2x128_si256(tr0_6, tr0_e, 0x31);
- out[7] = _mm256_permute2x128_si256(tr0_7, tr0_f, 0x20);
- out[15] = _mm256_permute2x128_si256(tr0_7, tr0_f, 0x31);
-}
-#endif // AOM_AOM_DSP_X86_COMMON_AVX2_H_
diff --git a/third_party/aom/aom_dsp/x86/convolve.h b/third_party/aom/aom_dsp/x86/convolve.h
deleted file mode 100644
index 3e19682cd..000000000
--- a/third_party/aom/aom_dsp/x86/convolve.h
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#ifndef AOM_AOM_DSP_X86_CONVOLVE_H_
-#define AOM_AOM_DSP_X86_CONVOLVE_H_
-
-#include <assert.h>
-
-#include "config/aom_config.h"
-
-#include "aom/aom_integer.h"
-#include "aom_ports/mem.h"
-
-typedef void filter8_1dfunction(const uint8_t *src_ptr, ptrdiff_t src_pitch,
- uint8_t *output_ptr, ptrdiff_t out_pitch,
- uint32_t output_height, const int16_t *filter);
-
-#define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \
- void aom_convolve8_##name##_##opt( \
- const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \
- ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, \
- const int16_t *filter_y, int y_step_q4, int w, int h) { \
- (void)filter_x; \
- (void)x_step_q4; \
- (void)filter_y; \
- (void)y_step_q4; \
- assert((-128 <= filter[3]) && (filter[3] <= 127)); \
- assert(step_q4 == 16); \
- if (((filter[0] | filter[1] | filter[6] | filter[7]) == 0) && \
- (filter[2] | filter[5])) { \
- while (w >= 16) { \
- aom_filter_block1d16_##dir##4_##avg##opt(src_start, src_stride, dst, \
- dst_stride, h, filter); \
- src += 16; \
- dst += 16; \
- w -= 16; \
- } \
- while (w >= 8) { \
- aom_filter_block1d8_##dir##4_##avg##opt(src_start, src_stride, dst, \
- dst_stride, h, filter); \
- src += 8; \
- dst += 8; \
- w -= 8; \
- } \
- while (w >= 4) { \
- aom_filter_block1d4_##dir##4_##avg##opt(src_start, src_stride, dst, \
- dst_stride, h, filter); \
- src += 4; \
- dst += 4; \
- w -= 4; \
- } \
- } else if (filter[0] | filter[1] | filter[2]) { \
- while (w >= 16) { \
- aom_filter_block1d16_##dir##8_##avg##opt(src_start, src_stride, dst, \
- dst_stride, h, filter); \
- src += 16; \
- dst += 16; \
- w -= 16; \
- } \
- while (w >= 8) { \
- aom_filter_block1d8_##dir##8_##avg##opt(src_start, src_stride, dst, \
- dst_stride, h, filter); \
- src += 8; \
- dst += 8; \
- w -= 8; \
- } \
- while (w >= 4) { \
- aom_filter_block1d4_##dir##8_##avg##opt(src_start, src_stride, dst, \
- dst_stride, h, filter); \
- src += 4; \
- dst += 4; \
- w -= 4; \
- } \
- } else { \
- while (w >= 16) { \
- aom_filter_block1d16_##dir##2_##avg##opt(src, src_stride, dst, \
- dst_stride, h, filter); \
- src += 16; \
- dst += 16; \
- w -= 16; \
- } \
- while (w >= 8) { \
- aom_filter_block1d8_##dir##2_##avg##opt(src, src_stride, dst, \
- dst_stride, h, filter); \
- src += 8; \
- dst += 8; \
- w -= 8; \
- } \
- while (w >= 4) { \
- aom_filter_block1d4_##dir##2_##avg##opt(src, src_stride, dst, \
- dst_stride, h, filter); \
- src += 4; \
- dst += 4; \
- w -= 4; \
- } \
- } \
- if (w) { \
- aom_convolve8_##name##_c(src, src_stride, dst, dst_stride, filter_x, \
- x_step_q4, filter_y, y_step_q4, w, h); \
- } \
- }
-
-typedef void highbd_filter8_1dfunction(const uint16_t *src_ptr,
- const ptrdiff_t src_pitch,
- uint16_t *output_ptr,
- ptrdiff_t out_pitch,
- unsigned int output_height,
- const int16_t *filter, int bd);
-
-#define HIGH_FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \
- void aom_highbd_convolve8_##name##_##opt( \
- const uint8_t *src8, ptrdiff_t src_stride, uint8_t *dst8, \
- ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, \
- const int16_t *filter_y, int y_step_q4, int w, int h, int bd) { \
- uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \
- if (step_q4 == 16 && filter[3] != 128) { \
- if (filter[0] | filter[1] | filter[2]) { \
- while (w >= 16) { \
- aom_highbd_filter_block1d16_##dir##8_##avg##opt( \
- src_start, src_stride, dst, dst_stride, h, filter, bd); \
- src += 16; \
- dst += 16; \
- w -= 16; \
- } \
- while (w >= 8) { \
- aom_highbd_filter_block1d8_##dir##8_##avg##opt( \
- src_start, src_stride, dst, dst_stride, h, filter, bd); \
- src += 8; \
- dst += 8; \
- w -= 8; \
- } \
- while (w >= 4) { \
- aom_highbd_filter_block1d4_##dir##8_##avg##opt( \
- src_start, src_stride, dst, dst_stride, h, filter, bd); \
- src += 4; \
- dst += 4; \
- w -= 4; \
- } \
- } else { \
- while (w >= 16) { \
- aom_highbd_filter_block1d16_##dir##2_##avg##opt( \
- src, src_stride, dst, dst_stride, h, filter, bd); \
- src += 16; \
- dst += 16; \
- w -= 16; \
- } \
- while (w >= 8) { \
- aom_highbd_filter_block1d8_##dir##2_##avg##opt( \
- src, src_stride, dst, dst_stride, h, filter, bd); \
- src += 8; \
- dst += 8; \
- w -= 8; \
- } \
- while (w >= 4) { \
- aom_highbd_filter_block1d4_##dir##2_##avg##opt( \
- src, src_stride, dst, dst_stride, h, filter, bd); \
- src += 4; \
- dst += 4; \
- w -= 4; \
- } \
- } \
- } \
- if (w) { \
- aom_highbd_convolve8_##name##_c( \
- CONVERT_TO_BYTEPTR(src), src_stride, CONVERT_TO_BYTEPTR(dst), \
- dst_stride, filter_x, x_step_q4, filter_y, y_step_q4, w, h, bd); \
- } \
- }
-
-#endif // AOM_AOM_DSP_X86_CONVOLVE_H_
diff --git a/third_party/aom/aom_dsp/x86/convolve_avx2.h b/third_party/aom/aom_dsp/x86/convolve_avx2.h
deleted file mode 100644
index 30253f65c..000000000
--- a/third_party/aom/aom_dsp/x86/convolve_avx2.h
+++ /dev/null
@@ -1,199 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_X86_CONVOLVE_AVX2_H_
-#define AOM_AOM_DSP_X86_CONVOLVE_AVX2_H_
-
-// filters for 16
-DECLARE_ALIGNED(32, static const uint8_t, filt_global_avx2[]) = {
- 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 0, 1, 1,
- 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 2, 3, 3, 4, 4, 5,
- 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 2, 3, 3, 4, 4, 5, 5, 6, 6,
- 7, 7, 8, 8, 9, 9, 10, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10,
- 10, 11, 11, 12, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
- 12, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 6, 7,
- 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14
-};
-
-DECLARE_ALIGNED(32, static const uint8_t, filt_d4_global_avx2[]) = {
- 0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6, 0, 1, 2, 3, 1, 2,
- 3, 4, 2, 3, 4, 5, 3, 4, 5, 6, 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9,
- 7, 8, 9, 10, 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10,
-};
-
-DECLARE_ALIGNED(32, static const uint8_t, filt4_d4_global_avx2[]) = {
- 2, 3, 4, 5, 3, 4, 5, 6, 4, 5, 6, 7, 5, 6, 7, 8,
- 2, 3, 4, 5, 3, 4, 5, 6, 4, 5, 6, 7, 5, 6, 7, 8,
-};
-
-static INLINE void prepare_coeffs_lowbd(
- const InterpFilterParams *const filter_params, const int subpel_q4,
- __m256i *const coeffs /* [4] */) {
- const int16_t *const filter = av1_get_interp_filter_subpel_kernel(
- filter_params, subpel_q4 & SUBPEL_MASK);
- const __m128i coeffs_8 = _mm_loadu_si128((__m128i *)filter);
- const __m256i filter_coeffs = _mm256_broadcastsi128_si256(coeffs_8);
-
- // right shift all filter co-efficients by 1 to reduce the bits required.
- // This extra right shift will be taken care of at the end while rounding
- // the result.
- // Since all filter co-efficients are even, this change will not affect the
- // end result
- assert(_mm_test_all_zeros(_mm_and_si128(coeffs_8, _mm_set1_epi16(1)),
- _mm_set1_epi16(0xffff)));
-
- const __m256i coeffs_1 = _mm256_srai_epi16(filter_coeffs, 1);
-
- // coeffs 0 1 0 1 0 1 0 1
- coeffs[0] = _mm256_shuffle_epi8(coeffs_1, _mm256_set1_epi16(0x0200u));
- // coeffs 2 3 2 3 2 3 2 3
- coeffs[1] = _mm256_shuffle_epi8(coeffs_1, _mm256_set1_epi16(0x0604u));
- // coeffs 4 5 4 5 4 5 4 5
- coeffs[2] = _mm256_shuffle_epi8(coeffs_1, _mm256_set1_epi16(0x0a08u));
- // coeffs 6 7 6 7 6 7 6 7
- coeffs[3] = _mm256_shuffle_epi8(coeffs_1, _mm256_set1_epi16(0x0e0cu));
-}
-
-static INLINE void prepare_coeffs(const InterpFilterParams *const filter_params,
- const int subpel_q4,
- __m256i *const coeffs /* [4] */) {
- const int16_t *filter = av1_get_interp_filter_subpel_kernel(
- filter_params, subpel_q4 & SUBPEL_MASK);
-
- const __m128i coeff_8 = _mm_loadu_si128((__m128i *)filter);
- const __m256i coeff = _mm256_broadcastsi128_si256(coeff_8);
-
- // coeffs 0 1 0 1 0 1 0 1
- coeffs[0] = _mm256_shuffle_epi32(coeff, 0x00);
- // coeffs 2 3 2 3 2 3 2 3
- coeffs[1] = _mm256_shuffle_epi32(coeff, 0x55);
- // coeffs 4 5 4 5 4 5 4 5
- coeffs[2] = _mm256_shuffle_epi32(coeff, 0xaa);
- // coeffs 6 7 6 7 6 7 6 7
- coeffs[3] = _mm256_shuffle_epi32(coeff, 0xff);
-}
-
-static INLINE __m256i convolve_lowbd(const __m256i *const s,
- const __m256i *const coeffs) {
- const __m256i res_01 = _mm256_maddubs_epi16(s[0], coeffs[0]);
- const __m256i res_23 = _mm256_maddubs_epi16(s[1], coeffs[1]);
- const __m256i res_45 = _mm256_maddubs_epi16(s[2], coeffs[2]);
- const __m256i res_67 = _mm256_maddubs_epi16(s[3], coeffs[3]);
-
- // order: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
- const __m256i res = _mm256_add_epi16(_mm256_add_epi16(res_01, res_45),
- _mm256_add_epi16(res_23, res_67));
-
- return res;
-}
-
-static INLINE __m256i convolve(const __m256i *const s,
- const __m256i *const coeffs) {
- const __m256i res_0 = _mm256_madd_epi16(s[0], coeffs[0]);
- const __m256i res_1 = _mm256_madd_epi16(s[1], coeffs[1]);
- const __m256i res_2 = _mm256_madd_epi16(s[2], coeffs[2]);
- const __m256i res_3 = _mm256_madd_epi16(s[3], coeffs[3]);
-
- const __m256i res = _mm256_add_epi32(_mm256_add_epi32(res_0, res_1),
- _mm256_add_epi32(res_2, res_3));
-
- return res;
-}
-
-static INLINE __m256i convolve_lowbd_x(const __m256i data,
- const __m256i *const coeffs,
- const __m256i *const filt) {
- __m256i s[4];
-
- s[0] = _mm256_shuffle_epi8(data, filt[0]);
- s[1] = _mm256_shuffle_epi8(data, filt[1]);
- s[2] = _mm256_shuffle_epi8(data, filt[2]);
- s[3] = _mm256_shuffle_epi8(data, filt[3]);
-
- return convolve_lowbd(s, coeffs);
-}
-
-static INLINE void add_store_aligned_256(CONV_BUF_TYPE *const dst,
- const __m256i *const res,
- const int do_average) {
- __m256i d;
- if (do_average) {
- d = _mm256_load_si256((__m256i *)dst);
- d = _mm256_add_epi32(d, *res);
- d = _mm256_srai_epi32(d, 1);
- } else {
- d = *res;
- }
- _mm256_store_si256((__m256i *)dst, d);
-}
-
-static INLINE __m256i comp_avg(const __m256i *const data_ref_0,
- const __m256i *const res_unsigned,
- const __m256i *const wt,
- const int use_jnt_comp_avg) {
- __m256i res;
- if (use_jnt_comp_avg) {
- const __m256i data_lo = _mm256_unpacklo_epi16(*data_ref_0, *res_unsigned);
- const __m256i data_hi = _mm256_unpackhi_epi16(*data_ref_0, *res_unsigned);
-
- const __m256i wt_res_lo = _mm256_madd_epi16(data_lo, *wt);
- const __m256i wt_res_hi = _mm256_madd_epi16(data_hi, *wt);
-
- const __m256i res_lo = _mm256_srai_epi32(wt_res_lo, DIST_PRECISION_BITS);
- const __m256i res_hi = _mm256_srai_epi32(wt_res_hi, DIST_PRECISION_BITS);
-
- res = _mm256_packs_epi32(res_lo, res_hi);
- } else {
- const __m256i wt_res = _mm256_add_epi16(*data_ref_0, *res_unsigned);
- res = _mm256_srai_epi16(wt_res, 1);
- }
- return res;
-}
-
-static INLINE __m256i convolve_rounding(const __m256i *const res_unsigned,
- const __m256i *const offset_const,
- const __m256i *const round_const,
- const int round_shift) {
- const __m256i res_signed = _mm256_sub_epi16(*res_unsigned, *offset_const);
- const __m256i res_round = _mm256_srai_epi16(
- _mm256_add_epi16(res_signed, *round_const), round_shift);
- return res_round;
-}
-
-static INLINE __m256i highbd_comp_avg(const __m256i *const data_ref_0,
- const __m256i *const res_unsigned,
- const __m256i *const wt0,
- const __m256i *const wt1,
- const int use_jnt_comp_avg) {
- __m256i res;
- if (use_jnt_comp_avg) {
- const __m256i wt0_res = _mm256_mullo_epi32(*data_ref_0, *wt0);
- const __m256i wt1_res = _mm256_mullo_epi32(*res_unsigned, *wt1);
- const __m256i wt_res = _mm256_add_epi32(wt0_res, wt1_res);
- res = _mm256_srai_epi32(wt_res, DIST_PRECISION_BITS);
- } else {
- const __m256i wt_res = _mm256_add_epi32(*data_ref_0, *res_unsigned);
- res = _mm256_srai_epi32(wt_res, 1);
- }
- return res;
-}
-
-static INLINE __m256i highbd_convolve_rounding(
- const __m256i *const res_unsigned, const __m256i *const offset_const,
- const __m256i *const round_const, const int round_shift) {
- const __m256i res_signed = _mm256_sub_epi32(*res_unsigned, *offset_const);
- const __m256i res_round = _mm256_srai_epi32(
- _mm256_add_epi32(res_signed, *round_const), round_shift);
-
- return res_round;
-}
-
-#endif // AOM_AOM_DSP_X86_CONVOLVE_AVX2_H_
diff --git a/third_party/aom/aom_dsp/x86/convolve_common_intrin.h b/third_party/aom/aom_dsp/x86/convolve_common_intrin.h
deleted file mode 100644
index 707bd2d78..000000000
--- a/third_party/aom/aom_dsp/x86/convolve_common_intrin.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_X86_CONVOLVE_COMMON_INTRIN_H_
-#define AOM_AOM_DSP_X86_CONVOLVE_COMMON_INTRIN_H_
-
-// Note:
-// This header file should be put below any x86 intrinsics head file
-
-static INLINE void add_store(CONV_BUF_TYPE *const dst, const __m128i *const res,
- const int do_average) {
- __m128i d;
- if (do_average) {
- d = _mm_load_si128((__m128i *)dst);
- d = _mm_add_epi32(d, *res);
- d = _mm_srai_epi32(d, 1);
- } else {
- d = *res;
- }
- _mm_store_si128((__m128i *)dst, d);
-}
-
-#endif // AOM_AOM_DSP_X86_CONVOLVE_COMMON_INTRIN_H_
diff --git a/third_party/aom/aom_dsp/x86/convolve_sse2.h b/third_party/aom/aom_dsp/x86/convolve_sse2.h
deleted file mode 100644
index 445d04b10..000000000
--- a/third_party/aom/aom_dsp/x86/convolve_sse2.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_X86_CONVOLVE_SSE2_H_
-#define AOM_AOM_DSP_X86_CONVOLVE_SSE2_H_
-
-// Note:
-// This header file should be put below any x86 intrinsics head file
-
-static INLINE void prepare_coeffs(const InterpFilterParams *const filter_params,
- const int subpel_q4,
- __m128i *const coeffs /* [4] */) {
- const int16_t *filter = av1_get_interp_filter_subpel_kernel(
- filter_params, subpel_q4 & SUBPEL_MASK);
- const __m128i coeff = _mm_loadu_si128((__m128i *)filter);
-
- // coeffs 0 1 0 1 0 1 0 1
- coeffs[0] = _mm_shuffle_epi32(coeff, 0x00);
- // coeffs 2 3 2 3 2 3 2 3
- coeffs[1] = _mm_shuffle_epi32(coeff, 0x55);
- // coeffs 4 5 4 5 4 5 4 5
- coeffs[2] = _mm_shuffle_epi32(coeff, 0xaa);
- // coeffs 6 7 6 7 6 7 6 7
- coeffs[3] = _mm_shuffle_epi32(coeff, 0xff);
-}
-
-static INLINE __m128i convolve(const __m128i *const s,
- const __m128i *const coeffs) {
- const __m128i res_0 = _mm_madd_epi16(s[0], coeffs[0]);
- const __m128i res_1 = _mm_madd_epi16(s[1], coeffs[1]);
- const __m128i res_2 = _mm_madd_epi16(s[2], coeffs[2]);
- const __m128i res_3 = _mm_madd_epi16(s[3], coeffs[3]);
-
- const __m128i res =
- _mm_add_epi32(_mm_add_epi32(res_0, res_1), _mm_add_epi32(res_2, res_3));
-
- return res;
-}
-
-static INLINE __m128i convolve_lo_x(const __m128i *const s,
- const __m128i *const coeffs) {
- __m128i ss[4];
- ss[0] = _mm_unpacklo_epi8(s[0], _mm_setzero_si128());
- ss[1] = _mm_unpacklo_epi8(s[1], _mm_setzero_si128());
- ss[2] = _mm_unpacklo_epi8(s[2], _mm_setzero_si128());
- ss[3] = _mm_unpacklo_epi8(s[3], _mm_setzero_si128());
- return convolve(ss, coeffs);
-}
-
-static INLINE __m128i convolve_lo_y(const __m128i *const s,
- const __m128i *const coeffs) {
- __m128i ss[4];
- ss[0] = _mm_unpacklo_epi8(s[0], _mm_setzero_si128());
- ss[1] = _mm_unpacklo_epi8(s[2], _mm_setzero_si128());
- ss[2] = _mm_unpacklo_epi8(s[4], _mm_setzero_si128());
- ss[3] = _mm_unpacklo_epi8(s[6], _mm_setzero_si128());
- return convolve(ss, coeffs);
-}
-
-static INLINE __m128i convolve_hi_y(const __m128i *const s,
- const __m128i *const coeffs) {
- __m128i ss[4];
- ss[0] = _mm_unpackhi_epi8(s[0], _mm_setzero_si128());
- ss[1] = _mm_unpackhi_epi8(s[2], _mm_setzero_si128());
- ss[2] = _mm_unpackhi_epi8(s[4], _mm_setzero_si128());
- ss[3] = _mm_unpackhi_epi8(s[6], _mm_setzero_si128());
- return convolve(ss, coeffs);
-}
-
-static INLINE __m128i comp_avg(const __m128i *const data_ref_0,
- const __m128i *const res_unsigned,
- const __m128i *const wt,
- const int use_jnt_comp_avg) {
- __m128i res;
- if (use_jnt_comp_avg) {
- const __m128i data_lo = _mm_unpacklo_epi16(*data_ref_0, *res_unsigned);
- const __m128i data_hi = _mm_unpackhi_epi16(*data_ref_0, *res_unsigned);
-
- const __m128i wt_res_lo = _mm_madd_epi16(data_lo, *wt);
- const __m128i wt_res_hi = _mm_madd_epi16(data_hi, *wt);
-
- const __m128i res_lo = _mm_srai_epi32(wt_res_lo, DIST_PRECISION_BITS);
- const __m128i res_hi = _mm_srai_epi32(wt_res_hi, DIST_PRECISION_BITS);
-
- res = _mm_packs_epi32(res_lo, res_hi);
- } else {
- const __m128i wt_res = _mm_add_epi16(*data_ref_0, *res_unsigned);
- res = _mm_srai_epi16(wt_res, 1);
- }
- return res;
-}
-
-static INLINE __m128i convolve_rounding(const __m128i *const res_unsigned,
- const __m128i *const offset_const,
- const __m128i *const round_const,
- const int round_shift) {
- const __m128i res_signed = _mm_sub_epi16(*res_unsigned, *offset_const);
- const __m128i res_round =
- _mm_srai_epi16(_mm_add_epi16(res_signed, *round_const), round_shift);
- return res_round;
-}
-
-static INLINE __m128i highbd_convolve_rounding_sse2(
- const __m128i *const res_unsigned, const __m128i *const offset_const,
- const __m128i *const round_const, const int round_shift) {
- const __m128i res_signed = _mm_sub_epi32(*res_unsigned, *offset_const);
- const __m128i res_round =
- _mm_srai_epi32(_mm_add_epi32(res_signed, *round_const), round_shift);
-
- return res_round;
-}
-
-#endif // AOM_AOM_DSP_X86_CONVOLVE_SSE2_H_
diff --git a/third_party/aom/aom_dsp/x86/convolve_sse4_1.h b/third_party/aom/aom_dsp/x86/convolve_sse4_1.h
deleted file mode 100644
index 6b8388d84..000000000
--- a/third_party/aom/aom_dsp/x86/convolve_sse4_1.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_X86_CONVOLVE_SSE4_1_H_
-#define AOM_AOM_DSP_X86_CONVOLVE_SSE4_1_H_
-
-// Note:
-// This header file should be put below any x86 intrinsics head file
-
-static INLINE void mult_add_store(CONV_BUF_TYPE *const dst,
- const __m128i *const res,
- const __m128i *const wt0,
- const __m128i *const wt1,
- const int do_average) {
- __m128i d;
- if (do_average) {
- d = _mm_load_si128((__m128i *)dst);
- d = _mm_add_epi32(_mm_mullo_epi32(d, *wt0), _mm_mullo_epi32(*res, *wt1));
- d = _mm_srai_epi32(d, DIST_PRECISION_BITS);
- } else {
- d = *res;
- }
- _mm_store_si128((__m128i *)dst, d);
-}
-
-static INLINE __m128i highbd_comp_avg_sse4_1(const __m128i *const data_ref_0,
- const __m128i *const res_unsigned,
- const __m128i *const wt0,
- const __m128i *const wt1,
- const int use_jnt_comp_avg) {
- __m128i res;
- if (use_jnt_comp_avg) {
- const __m128i wt0_res = _mm_mullo_epi32(*data_ref_0, *wt0);
- const __m128i wt1_res = _mm_mullo_epi32(*res_unsigned, *wt1);
-
- const __m128i wt_res = _mm_add_epi32(wt0_res, wt1_res);
- res = _mm_srai_epi32(wt_res, DIST_PRECISION_BITS);
- } else {
- const __m128i wt_res = _mm_add_epi32(*data_ref_0, *res_unsigned);
- res = _mm_srai_epi32(wt_res, 1);
- }
- return res;
-}
-
-#endif // AOM_AOM_DSP_X86_CONVOLVE_SSE4_1_H_
diff --git a/third_party/aom/aom_dsp/x86/fft_avx2.c b/third_party/aom/aom_dsp/x86/fft_avx2.c
deleted file mode 100644
index 54da02253..000000000
--- a/third_party/aom/aom_dsp/x86/fft_avx2.c
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <immintrin.h>
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/fft_common.h"
-
-extern void aom_transpose_float_sse2(const float *A, float *B, int n);
-extern void aom_fft_unpack_2d_output_sse2(const float *col_fft, float *output,
- int n);
-
-// Generate the 1d forward transforms for float using _mm256
-GEN_FFT_8(static INLINE void, avx2, float, __m256, _mm256_load_ps,
- _mm256_store_ps, _mm256_set1_ps, _mm256_add_ps, _mm256_sub_ps,
- _mm256_mul_ps);
-GEN_FFT_16(static INLINE void, avx2, float, __m256, _mm256_load_ps,
- _mm256_store_ps, _mm256_set1_ps, _mm256_add_ps, _mm256_sub_ps,
- _mm256_mul_ps);
-GEN_FFT_32(static INLINE void, avx2, float, __m256, _mm256_load_ps,
- _mm256_store_ps, _mm256_set1_ps, _mm256_add_ps, _mm256_sub_ps,
- _mm256_mul_ps);
-
-void aom_fft8x8_float_avx2(const float *input, float *temp, float *output) {
- aom_fft_2d_gen(input, temp, output, 8, aom_fft1d_8_avx2,
- aom_transpose_float_sse2, aom_fft_unpack_2d_output_sse2, 8);
-}
-
-void aom_fft16x16_float_avx2(const float *input, float *temp, float *output) {
- aom_fft_2d_gen(input, temp, output, 16, aom_fft1d_16_avx2,
- aom_transpose_float_sse2, aom_fft_unpack_2d_output_sse2, 8);
-}
-
-void aom_fft32x32_float_avx2(const float *input, float *temp, float *output) {
- aom_fft_2d_gen(input, temp, output, 32, aom_fft1d_32_avx2,
- aom_transpose_float_sse2, aom_fft_unpack_2d_output_sse2, 8);
-}
-
-// Generate the 1d inverse transforms for float using _mm256
-GEN_IFFT_8(static INLINE void, avx2, float, __m256, _mm256_load_ps,
- _mm256_store_ps, _mm256_set1_ps, _mm256_add_ps, _mm256_sub_ps,
- _mm256_mul_ps);
-GEN_IFFT_16(static INLINE void, avx2, float, __m256, _mm256_load_ps,
- _mm256_store_ps, _mm256_set1_ps, _mm256_add_ps, _mm256_sub_ps,
- _mm256_mul_ps);
-GEN_IFFT_32(static INLINE void, avx2, float, __m256, _mm256_load_ps,
- _mm256_store_ps, _mm256_set1_ps, _mm256_add_ps, _mm256_sub_ps,
- _mm256_mul_ps);
-
-void aom_ifft8x8_float_avx2(const float *input, float *temp, float *output) {
- aom_ifft_2d_gen(input, temp, output, 8, aom_fft1d_8_float, aom_fft1d_8_avx2,
- aom_ifft1d_8_avx2, aom_transpose_float_sse2, 8);
-}
-
-void aom_ifft16x16_float_avx2(const float *input, float *temp, float *output) {
- aom_ifft_2d_gen(input, temp, output, 16, aom_fft1d_16_float,
- aom_fft1d_16_avx2, aom_ifft1d_16_avx2,
- aom_transpose_float_sse2, 8);
-}
-
-void aom_ifft32x32_float_avx2(const float *input, float *temp, float *output) {
- aom_ifft_2d_gen(input, temp, output, 32, aom_fft1d_32_float,
- aom_fft1d_32_avx2, aom_ifft1d_32_avx2,
- aom_transpose_float_sse2, 8);
-}
diff --git a/third_party/aom/aom_dsp/x86/fft_sse2.c b/third_party/aom/aom_dsp/x86/fft_sse2.c
deleted file mode 100644
index 12bdc3e18..000000000
--- a/third_party/aom/aom_dsp/x86/fft_sse2.c
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
-s * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <xmmintrin.h>
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/fft_common.h"
-
-static INLINE void transpose4x4(const float *A, float *B, const int lda,
- const int ldb) {
- __m128 row1 = _mm_load_ps(&A[0 * lda]);
- __m128 row2 = _mm_load_ps(&A[1 * lda]);
- __m128 row3 = _mm_load_ps(&A[2 * lda]);
- __m128 row4 = _mm_load_ps(&A[3 * lda]);
- _MM_TRANSPOSE4_PS(row1, row2, row3, row4);
- _mm_store_ps(&B[0 * ldb], row1);
- _mm_store_ps(&B[1 * ldb], row2);
- _mm_store_ps(&B[2 * ldb], row3);
- _mm_store_ps(&B[3 * ldb], row4);
-}
-
-void aom_transpose_float_sse2(const float *A, float *B, int n) {
- for (int y = 0; y < n; y += 4) {
- for (int x = 0; x < n; x += 4) {
- transpose4x4(A + y * n + x, B + x * n + y, n, n);
- }
- }
-}
-
-void aom_fft_unpack_2d_output_sse2(const float *packed, float *output, int n) {
- const int n2 = n / 2;
- output[0] = packed[0];
- output[1] = 0;
- output[2 * (n2 * n)] = packed[n2 * n];
- output[2 * (n2 * n) + 1] = 0;
-
- output[2 * n2] = packed[n2];
- output[2 * n2 + 1] = 0;
- output[2 * (n2 * n + n2)] = packed[n2 * n + n2];
- output[2 * (n2 * n + n2) + 1] = 0;
-
- for (int c = 1; c < n2; ++c) {
- output[2 * (0 * n + c)] = packed[c];
- output[2 * (0 * n + c) + 1] = packed[c + n2];
- output[2 * (n2 * n + c) + 0] = packed[n2 * n + c];
- output[2 * (n2 * n + c) + 1] = packed[n2 * n + c + n2];
- }
- for (int r = 1; r < n2; ++r) {
- output[2 * (r * n + 0)] = packed[r * n];
- output[2 * (r * n + 0) + 1] = packed[(r + n2) * n];
- output[2 * (r * n + n2) + 0] = packed[r * n + n2];
- output[2 * (r * n + n2) + 1] = packed[(r + n2) * n + n2];
-
- for (int c = 1; c < AOMMIN(n2, 4); ++c) {
- output[2 * (r * n + c)] =
- packed[r * n + c] - packed[(r + n2) * n + c + n2];
- output[2 * (r * n + c) + 1] =
- packed[(r + n2) * n + c] + packed[r * n + c + n2];
- }
-
- for (int c = 4; c < n2; c += 4) {
- __m128 real1 = _mm_load_ps(packed + r * n + c);
- __m128 real2 = _mm_load_ps(packed + (r + n2) * n + c + n2);
- __m128 imag1 = _mm_load_ps(packed + (r + n2) * n + c);
- __m128 imag2 = _mm_load_ps(packed + r * n + c + n2);
- real1 = _mm_sub_ps(real1, real2);
- imag1 = _mm_add_ps(imag1, imag2);
- _mm_store_ps(output + 2 * (r * n + c), _mm_unpacklo_ps(real1, imag1));
- _mm_store_ps(output + 2 * (r * n + c + 2), _mm_unpackhi_ps(real1, imag1));
- }
-
- int r2 = r + n2;
- int r3 = n - r2;
- output[2 * (r2 * n + 0)] = packed[r3 * n];
- output[2 * (r2 * n + 0) + 1] = -packed[(r3 + n2) * n];
- output[2 * (r2 * n + n2)] = packed[r3 * n + n2];
- output[2 * (r2 * n + n2) + 1] = -packed[(r3 + n2) * n + n2];
- for (int c = 1; c < AOMMIN(4, n2); ++c) {
- output[2 * (r2 * n + c)] =
- packed[r3 * n + c] + packed[(r3 + n2) * n + c + n2];
- output[2 * (r2 * n + c) + 1] =
- -packed[(r3 + n2) * n + c] + packed[r3 * n + c + n2];
- }
- for (int c = 4; c < n2; c += 4) {
- __m128 real1 = _mm_load_ps(packed + r3 * n + c);
- __m128 real2 = _mm_load_ps(packed + (r3 + n2) * n + c + n2);
- __m128 imag1 = _mm_load_ps(packed + (r3 + n2) * n + c);
- __m128 imag2 = _mm_load_ps(packed + r3 * n + c + n2);
- real1 = _mm_add_ps(real1, real2);
- imag1 = _mm_sub_ps(imag2, imag1);
- _mm_store_ps(output + 2 * (r2 * n + c), _mm_unpacklo_ps(real1, imag1));
- _mm_store_ps(output + 2 * (r2 * n + c + 2),
- _mm_unpackhi_ps(real1, imag1));
- }
- }
-}
-
-// Generate definitions for 1d transforms using float and __mm128
-GEN_FFT_4(static INLINE void, sse2, float, __m128, _mm_load_ps, _mm_store_ps,
- _mm_set1_ps, _mm_add_ps, _mm_sub_ps);
-GEN_FFT_8(static INLINE void, sse2, float, __m128, _mm_load_ps, _mm_store_ps,
- _mm_set1_ps, _mm_add_ps, _mm_sub_ps, _mm_mul_ps);
-GEN_FFT_16(static INLINE void, sse2, float, __m128, _mm_load_ps, _mm_store_ps,
- _mm_set1_ps, _mm_add_ps, _mm_sub_ps, _mm_mul_ps);
-GEN_FFT_32(static INLINE void, sse2, float, __m128, _mm_load_ps, _mm_store_ps,
- _mm_set1_ps, _mm_add_ps, _mm_sub_ps, _mm_mul_ps);
-
-void aom_fft4x4_float_sse2(const float *input, float *temp, float *output) {
- aom_fft_2d_gen(input, temp, output, 4, aom_fft1d_4_sse2,
- aom_transpose_float_sse2, aom_fft_unpack_2d_output_sse2, 4);
-}
-
-void aom_fft8x8_float_sse2(const float *input, float *temp, float *output) {
- aom_fft_2d_gen(input, temp, output, 8, aom_fft1d_8_sse2,
- aom_transpose_float_sse2, aom_fft_unpack_2d_output_sse2, 4);
-}
-
-void aom_fft16x16_float_sse2(const float *input, float *temp, float *output) {
- aom_fft_2d_gen(input, temp, output, 16, aom_fft1d_16_sse2,
- aom_transpose_float_sse2, aom_fft_unpack_2d_output_sse2, 4);
-}
-
-void aom_fft32x32_float_sse2(const float *input, float *temp, float *output) {
- aom_fft_2d_gen(input, temp, output, 32, aom_fft1d_32_sse2,
- aom_transpose_float_sse2, aom_fft_unpack_2d_output_sse2, 4);
-}
-
-// Generate definitions for 1d inverse transforms using float and mm128
-GEN_IFFT_4(static INLINE void, sse2, float, __m128, _mm_load_ps, _mm_store_ps,
- _mm_set1_ps, _mm_add_ps, _mm_sub_ps);
-GEN_IFFT_8(static INLINE void, sse2, float, __m128, _mm_load_ps, _mm_store_ps,
- _mm_set1_ps, _mm_add_ps, _mm_sub_ps, _mm_mul_ps);
-GEN_IFFT_16(static INLINE void, sse2, float, __m128, _mm_load_ps, _mm_store_ps,
- _mm_set1_ps, _mm_add_ps, _mm_sub_ps, _mm_mul_ps);
-GEN_IFFT_32(static INLINE void, sse2, float, __m128, _mm_load_ps, _mm_store_ps,
- _mm_set1_ps, _mm_add_ps, _mm_sub_ps, _mm_mul_ps);
-
-void aom_ifft4x4_float_sse2(const float *input, float *temp, float *output) {
- aom_ifft_2d_gen(input, temp, output, 4, aom_fft1d_4_float, aom_fft1d_4_sse2,
- aom_ifft1d_4_sse2, aom_transpose_float_sse2, 4);
-}
-
-void aom_ifft8x8_float_sse2(const float *input, float *temp, float *output) {
- aom_ifft_2d_gen(input, temp, output, 8, aom_fft1d_8_float, aom_fft1d_8_sse2,
- aom_ifft1d_8_sse2, aom_transpose_float_sse2, 4);
-}
-
-void aom_ifft16x16_float_sse2(const float *input, float *temp, float *output) {
- aom_ifft_2d_gen(input, temp, output, 16, aom_fft1d_16_float,
- aom_fft1d_16_sse2, aom_ifft1d_16_sse2,
- aom_transpose_float_sse2, 4);
-}
-
-void aom_ifft32x32_float_sse2(const float *input, float *temp, float *output) {
- aom_ifft_2d_gen(input, temp, output, 32, aom_fft1d_32_float,
- aom_fft1d_32_sse2, aom_ifft1d_32_sse2,
- aom_transpose_float_sse2, 4);
-}
diff --git a/third_party/aom/aom_dsp/x86/fwd_txfm_impl_sse2.h b/third_party/aom/aom_dsp/x86/fwd_txfm_impl_sse2.h
deleted file mode 100644
index 1e3d13ec8..000000000
--- a/third_party/aom/aom_dsp/x86/fwd_txfm_impl_sse2.h
+++ /dev/null
@@ -1,344 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <emmintrin.h> // SSE2
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/txfm_common.h"
-#include "aom_dsp/x86/fwd_txfm_sse2.h"
-#include "aom_dsp/x86/txfm_common_sse2.h"
-#include "aom_ports/mem.h"
-
-// TODO(jingning) The high bit-depth functions need rework for performance.
-// After we properly fix the high bit-depth function implementations, this
-// file's dependency should be substantially simplified.
-#if DCT_HIGH_BIT_DEPTH
-#define ADD_EPI16 _mm_adds_epi16
-#define SUB_EPI16 _mm_subs_epi16
-
-#else
-#define ADD_EPI16 _mm_add_epi16
-#define SUB_EPI16 _mm_sub_epi16
-#endif
-
-void FDCT8x8_2D(const int16_t *input, tran_low_t *output, int stride) {
- int pass;
- // Constants
- // When we use them, in one case, they are all the same. In all others
- // it's a pair of them that we need to repeat four times. This is done
- // by constructing the 32 bit constant corresponding to that pair.
- const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
- const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
- const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64);
- const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64);
- const __m128i k__cospi_p28_p04 = pair_set_epi16(cospi_28_64, cospi_4_64);
- const __m128i k__cospi_m04_p28 = pair_set_epi16(-cospi_4_64, cospi_28_64);
- const __m128i k__cospi_p12_p20 = pair_set_epi16(cospi_12_64, cospi_20_64);
- const __m128i k__cospi_m20_p12 = pair_set_epi16(-cospi_20_64, cospi_12_64);
- const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
-#if DCT_HIGH_BIT_DEPTH
- int overflow;
-#endif
- // Load input
- __m128i in0 = _mm_load_si128((const __m128i *)(input + 0 * stride));
- __m128i in1 = _mm_load_si128((const __m128i *)(input + 1 * stride));
- __m128i in2 = _mm_load_si128((const __m128i *)(input + 2 * stride));
- __m128i in3 = _mm_load_si128((const __m128i *)(input + 3 * stride));
- __m128i in4 = _mm_load_si128((const __m128i *)(input + 4 * stride));
- __m128i in5 = _mm_load_si128((const __m128i *)(input + 5 * stride));
- __m128i in6 = _mm_load_si128((const __m128i *)(input + 6 * stride));
- __m128i in7 = _mm_load_si128((const __m128i *)(input + 7 * stride));
- // Pre-condition input (shift by two)
- in0 = _mm_slli_epi16(in0, 2);
- in1 = _mm_slli_epi16(in1, 2);
- in2 = _mm_slli_epi16(in2, 2);
- in3 = _mm_slli_epi16(in3, 2);
- in4 = _mm_slli_epi16(in4, 2);
- in5 = _mm_slli_epi16(in5, 2);
- in6 = _mm_slli_epi16(in6, 2);
- in7 = _mm_slli_epi16(in7, 2);
-
- // We do two passes, first the columns, then the rows. The results of the
- // first pass are transposed so that the same column code can be reused. The
- // results of the second pass are also transposed so that the rows (processed
- // as columns) are put back in row positions.
- for (pass = 0; pass < 2; pass++) {
- // To store results of each pass before the transpose.
- __m128i res0, res1, res2, res3, res4, res5, res6, res7;
- // Add/subtract
- const __m128i q0 = ADD_EPI16(in0, in7);
- const __m128i q1 = ADD_EPI16(in1, in6);
- const __m128i q2 = ADD_EPI16(in2, in5);
- const __m128i q3 = ADD_EPI16(in3, in4);
- const __m128i q4 = SUB_EPI16(in3, in4);
- const __m128i q5 = SUB_EPI16(in2, in5);
- const __m128i q6 = SUB_EPI16(in1, in6);
- const __m128i q7 = SUB_EPI16(in0, in7);
-#if DCT_HIGH_BIT_DEPTH
- if (pass == 1) {
- overflow =
- check_epi16_overflow_x8(&q0, &q1, &q2, &q3, &q4, &q5, &q6, &q7);
- if (overflow) {
- aom_highbd_fdct8x8_c(input, output, stride);
- return;
- }
- }
-#endif // DCT_HIGH_BIT_DEPTH
- // Work on first four results
- {
- // Add/subtract
- const __m128i r0 = ADD_EPI16(q0, q3);
- const __m128i r1 = ADD_EPI16(q1, q2);
- const __m128i r2 = SUB_EPI16(q1, q2);
- const __m128i r3 = SUB_EPI16(q0, q3);
-#if DCT_HIGH_BIT_DEPTH
- overflow = check_epi16_overflow_x4(&r0, &r1, &r2, &r3);
- if (overflow) {
- aom_highbd_fdct8x8_c(input, output, stride);
- return;
- }
-#endif // DCT_HIGH_BIT_DEPTH
- // Interleave to do the multiply by constants which gets us into 32bits
- {
- const __m128i t0 = _mm_unpacklo_epi16(r0, r1);
- const __m128i t1 = _mm_unpackhi_epi16(r0, r1);
- const __m128i t2 = _mm_unpacklo_epi16(r2, r3);
- const __m128i t3 = _mm_unpackhi_epi16(r2, r3);
- const __m128i u0 = _mm_madd_epi16(t0, k__cospi_p16_p16);
- const __m128i u1 = _mm_madd_epi16(t1, k__cospi_p16_p16);
- const __m128i u2 = _mm_madd_epi16(t0, k__cospi_p16_m16);
- const __m128i u3 = _mm_madd_epi16(t1, k__cospi_p16_m16);
- const __m128i u4 = _mm_madd_epi16(t2, k__cospi_p24_p08);
- const __m128i u5 = _mm_madd_epi16(t3, k__cospi_p24_p08);
- const __m128i u6 = _mm_madd_epi16(t2, k__cospi_m08_p24);
- const __m128i u7 = _mm_madd_epi16(t3, k__cospi_m08_p24);
- // dct_const_round_shift
- const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING);
- const __m128i v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING);
- const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING);
- const __m128i v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING);
- const __m128i v4 = _mm_add_epi32(u4, k__DCT_CONST_ROUNDING);
- const __m128i v5 = _mm_add_epi32(u5, k__DCT_CONST_ROUNDING);
- const __m128i v6 = _mm_add_epi32(u6, k__DCT_CONST_ROUNDING);
- const __m128i v7 = _mm_add_epi32(u7, k__DCT_CONST_ROUNDING);
- const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS);
- const __m128i w1 = _mm_srai_epi32(v1, DCT_CONST_BITS);
- const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS);
- const __m128i w3 = _mm_srai_epi32(v3, DCT_CONST_BITS);
- const __m128i w4 = _mm_srai_epi32(v4, DCT_CONST_BITS);
- const __m128i w5 = _mm_srai_epi32(v5, DCT_CONST_BITS);
- const __m128i w6 = _mm_srai_epi32(v6, DCT_CONST_BITS);
- const __m128i w7 = _mm_srai_epi32(v7, DCT_CONST_BITS);
- // Combine
- res0 = _mm_packs_epi32(w0, w1);
- res4 = _mm_packs_epi32(w2, w3);
- res2 = _mm_packs_epi32(w4, w5);
- res6 = _mm_packs_epi32(w6, w7);
-#if DCT_HIGH_BIT_DEPTH
- overflow = check_epi16_overflow_x4(&res0, &res4, &res2, &res6);
- if (overflow) {
- aom_highbd_fdct8x8_c(input, output, stride);
- return;
- }
-#endif // DCT_HIGH_BIT_DEPTH
- }
- }
- // Work on next four results
- {
- // Interleave to do the multiply by constants which gets us into 32bits
- const __m128i d0 = _mm_unpacklo_epi16(q6, q5);
- const __m128i d1 = _mm_unpackhi_epi16(q6, q5);
- const __m128i e0 = _mm_madd_epi16(d0, k__cospi_p16_m16);
- const __m128i e1 = _mm_madd_epi16(d1, k__cospi_p16_m16);
- const __m128i e2 = _mm_madd_epi16(d0, k__cospi_p16_p16);
- const __m128i e3 = _mm_madd_epi16(d1, k__cospi_p16_p16);
- // dct_const_round_shift
- const __m128i f0 = _mm_add_epi32(e0, k__DCT_CONST_ROUNDING);
- const __m128i f1 = _mm_add_epi32(e1, k__DCT_CONST_ROUNDING);
- const __m128i f2 = _mm_add_epi32(e2, k__DCT_CONST_ROUNDING);
- const __m128i f3 = _mm_add_epi32(e3, k__DCT_CONST_ROUNDING);
- const __m128i s0 = _mm_srai_epi32(f0, DCT_CONST_BITS);
- const __m128i s1 = _mm_srai_epi32(f1, DCT_CONST_BITS);
- const __m128i s2 = _mm_srai_epi32(f2, DCT_CONST_BITS);
- const __m128i s3 = _mm_srai_epi32(f3, DCT_CONST_BITS);
- // Combine
- const __m128i r0 = _mm_packs_epi32(s0, s1);
- const __m128i r1 = _mm_packs_epi32(s2, s3);
-#if DCT_HIGH_BIT_DEPTH
- overflow = check_epi16_overflow_x2(&r0, &r1);
- if (overflow) {
- aom_highbd_fdct8x8_c(input, output, stride);
- return;
- }
-#endif // DCT_HIGH_BIT_DEPTH
- {
- // Add/subtract
- const __m128i x0 = ADD_EPI16(q4, r0);
- const __m128i x1 = SUB_EPI16(q4, r0);
- const __m128i x2 = SUB_EPI16(q7, r1);
- const __m128i x3 = ADD_EPI16(q7, r1);
-#if DCT_HIGH_BIT_DEPTH
- overflow = check_epi16_overflow_x4(&x0, &x1, &x2, &x3);
- if (overflow) {
- aom_highbd_fdct8x8_c(input, output, stride);
- return;
- }
-#endif // DCT_HIGH_BIT_DEPTH
- // Interleave to do the multiply by constants which gets us into 32bits
- {
- const __m128i t0 = _mm_unpacklo_epi16(x0, x3);
- const __m128i t1 = _mm_unpackhi_epi16(x0, x3);
- const __m128i t2 = _mm_unpacklo_epi16(x1, x2);
- const __m128i t3 = _mm_unpackhi_epi16(x1, x2);
- const __m128i u0 = _mm_madd_epi16(t0, k__cospi_p28_p04);
- const __m128i u1 = _mm_madd_epi16(t1, k__cospi_p28_p04);
- const __m128i u2 = _mm_madd_epi16(t0, k__cospi_m04_p28);
- const __m128i u3 = _mm_madd_epi16(t1, k__cospi_m04_p28);
- const __m128i u4 = _mm_madd_epi16(t2, k__cospi_p12_p20);
- const __m128i u5 = _mm_madd_epi16(t3, k__cospi_p12_p20);
- const __m128i u6 = _mm_madd_epi16(t2, k__cospi_m20_p12);
- const __m128i u7 = _mm_madd_epi16(t3, k__cospi_m20_p12);
- // dct_const_round_shift
- const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING);
- const __m128i v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING);
- const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING);
- const __m128i v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING);
- const __m128i v4 = _mm_add_epi32(u4, k__DCT_CONST_ROUNDING);
- const __m128i v5 = _mm_add_epi32(u5, k__DCT_CONST_ROUNDING);
- const __m128i v6 = _mm_add_epi32(u6, k__DCT_CONST_ROUNDING);
- const __m128i v7 = _mm_add_epi32(u7, k__DCT_CONST_ROUNDING);
- const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS);
- const __m128i w1 = _mm_srai_epi32(v1, DCT_CONST_BITS);
- const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS);
- const __m128i w3 = _mm_srai_epi32(v3, DCT_CONST_BITS);
- const __m128i w4 = _mm_srai_epi32(v4, DCT_CONST_BITS);
- const __m128i w5 = _mm_srai_epi32(v5, DCT_CONST_BITS);
- const __m128i w6 = _mm_srai_epi32(v6, DCT_CONST_BITS);
- const __m128i w7 = _mm_srai_epi32(v7, DCT_CONST_BITS);
- // Combine
- res1 = _mm_packs_epi32(w0, w1);
- res7 = _mm_packs_epi32(w2, w3);
- res5 = _mm_packs_epi32(w4, w5);
- res3 = _mm_packs_epi32(w6, w7);
-#if DCT_HIGH_BIT_DEPTH
- overflow = check_epi16_overflow_x4(&res1, &res7, &res5, &res3);
- if (overflow) {
- aom_highbd_fdct8x8_c(input, output, stride);
- return;
- }
-#endif // DCT_HIGH_BIT_DEPTH
- }
- }
- }
- // Transpose the 8x8.
- {
- // 00 01 02 03 04 05 06 07
- // 10 11 12 13 14 15 16 17
- // 20 21 22 23 24 25 26 27
- // 30 31 32 33 34 35 36 37
- // 40 41 42 43 44 45 46 47
- // 50 51 52 53 54 55 56 57
- // 60 61 62 63 64 65 66 67
- // 70 71 72 73 74 75 76 77
- const __m128i tr0_0 = _mm_unpacklo_epi16(res0, res1);
- const __m128i tr0_1 = _mm_unpacklo_epi16(res2, res3);
- const __m128i tr0_2 = _mm_unpackhi_epi16(res0, res1);
- const __m128i tr0_3 = _mm_unpackhi_epi16(res2, res3);
- const __m128i tr0_4 = _mm_unpacklo_epi16(res4, res5);
- const __m128i tr0_5 = _mm_unpacklo_epi16(res6, res7);
- const __m128i tr0_6 = _mm_unpackhi_epi16(res4, res5);
- const __m128i tr0_7 = _mm_unpackhi_epi16(res6, res7);
- // 00 10 01 11 02 12 03 13
- // 20 30 21 31 22 32 23 33
- // 04 14 05 15 06 16 07 17
- // 24 34 25 35 26 36 27 37
- // 40 50 41 51 42 52 43 53
- // 60 70 61 71 62 72 63 73
- // 54 54 55 55 56 56 57 57
- // 64 74 65 75 66 76 67 77
- const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1);
- const __m128i tr1_1 = _mm_unpacklo_epi32(tr0_2, tr0_3);
- const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1);
- const __m128i tr1_3 = _mm_unpackhi_epi32(tr0_2, tr0_3);
- const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_4, tr0_5);
- const __m128i tr1_5 = _mm_unpacklo_epi32(tr0_6, tr0_7);
- const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_4, tr0_5);
- const __m128i tr1_7 = _mm_unpackhi_epi32(tr0_6, tr0_7);
- // 00 10 20 30 01 11 21 31
- // 40 50 60 70 41 51 61 71
- // 02 12 22 32 03 13 23 33
- // 42 52 62 72 43 53 63 73
- // 04 14 24 34 05 15 21 36
- // 44 54 64 74 45 55 61 76
- // 06 16 26 36 07 17 27 37
- // 46 56 66 76 47 57 67 77
- in0 = _mm_unpacklo_epi64(tr1_0, tr1_4);
- in1 = _mm_unpackhi_epi64(tr1_0, tr1_4);
- in2 = _mm_unpacklo_epi64(tr1_2, tr1_6);
- in3 = _mm_unpackhi_epi64(tr1_2, tr1_6);
- in4 = _mm_unpacklo_epi64(tr1_1, tr1_5);
- in5 = _mm_unpackhi_epi64(tr1_1, tr1_5);
- in6 = _mm_unpacklo_epi64(tr1_3, tr1_7);
- in7 = _mm_unpackhi_epi64(tr1_3, tr1_7);
- // 00 10 20 30 40 50 60 70
- // 01 11 21 31 41 51 61 71
- // 02 12 22 32 42 52 62 72
- // 03 13 23 33 43 53 63 73
- // 04 14 24 34 44 54 64 74
- // 05 15 25 35 45 55 65 75
- // 06 16 26 36 46 56 66 76
- // 07 17 27 37 47 57 67 77
- }
- }
- // Post-condition output and store it
- {
- // Post-condition (division by two)
- // division of two 16 bits signed numbers using shifts
- // n / 2 = (n - (n >> 15)) >> 1
- const __m128i sign_in0 = _mm_srai_epi16(in0, 15);
- const __m128i sign_in1 = _mm_srai_epi16(in1, 15);
- const __m128i sign_in2 = _mm_srai_epi16(in2, 15);
- const __m128i sign_in3 = _mm_srai_epi16(in3, 15);
- const __m128i sign_in4 = _mm_srai_epi16(in4, 15);
- const __m128i sign_in5 = _mm_srai_epi16(in5, 15);
- const __m128i sign_in6 = _mm_srai_epi16(in6, 15);
- const __m128i sign_in7 = _mm_srai_epi16(in7, 15);
- in0 = _mm_sub_epi16(in0, sign_in0);
- in1 = _mm_sub_epi16(in1, sign_in1);
- in2 = _mm_sub_epi16(in2, sign_in2);
- in3 = _mm_sub_epi16(in3, sign_in3);
- in4 = _mm_sub_epi16(in4, sign_in4);
- in5 = _mm_sub_epi16(in5, sign_in5);
- in6 = _mm_sub_epi16(in6, sign_in6);
- in7 = _mm_sub_epi16(in7, sign_in7);
- in0 = _mm_srai_epi16(in0, 1);
- in1 = _mm_srai_epi16(in1, 1);
- in2 = _mm_srai_epi16(in2, 1);
- in3 = _mm_srai_epi16(in3, 1);
- in4 = _mm_srai_epi16(in4, 1);
- in5 = _mm_srai_epi16(in5, 1);
- in6 = _mm_srai_epi16(in6, 1);
- in7 = _mm_srai_epi16(in7, 1);
- // store results
- store_output(&in0, (output + 0 * 8));
- store_output(&in1, (output + 1 * 8));
- store_output(&in2, (output + 2 * 8));
- store_output(&in3, (output + 3 * 8));
- store_output(&in4, (output + 4 * 8));
- store_output(&in5, (output + 5 * 8));
- store_output(&in6, (output + 6 * 8));
- store_output(&in7, (output + 7 * 8));
- }
-}
-
-#undef ADD_EPI16
-#undef SUB_EPI16
diff --git a/third_party/aom/aom_dsp/x86/fwd_txfm_sse2.c b/third_party/aom/aom_dsp/x86/fwd_txfm_sse2.c
deleted file mode 100644
index 2d8f8f71e..000000000
--- a/third_party/aom/aom_dsp/x86/fwd_txfm_sse2.c
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <emmintrin.h> // SSE2
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/x86/fwd_txfm_sse2.h"
-
-void aom_fdct8x8_1_sse2(const int16_t *input, tran_low_t *output, int stride) {
- __m128i in0 = _mm_load_si128((const __m128i *)(input + 0 * stride));
- __m128i in1 = _mm_load_si128((const __m128i *)(input + 1 * stride));
- __m128i in2 = _mm_load_si128((const __m128i *)(input + 2 * stride));
- __m128i in3 = _mm_load_si128((const __m128i *)(input + 3 * stride));
- __m128i u0, u1, sum;
-
- u0 = _mm_add_epi16(in0, in1);
- u1 = _mm_add_epi16(in2, in3);
-
- in0 = _mm_load_si128((const __m128i *)(input + 4 * stride));
- in1 = _mm_load_si128((const __m128i *)(input + 5 * stride));
- in2 = _mm_load_si128((const __m128i *)(input + 6 * stride));
- in3 = _mm_load_si128((const __m128i *)(input + 7 * stride));
-
- sum = _mm_add_epi16(u0, u1);
-
- in0 = _mm_add_epi16(in0, in1);
- in2 = _mm_add_epi16(in2, in3);
- sum = _mm_add_epi16(sum, in0);
-
- u0 = _mm_setzero_si128();
- sum = _mm_add_epi16(sum, in2);
-
- in0 = _mm_unpacklo_epi16(u0, sum);
- in1 = _mm_unpackhi_epi16(u0, sum);
- in0 = _mm_srai_epi32(in0, 16);
- in1 = _mm_srai_epi32(in1, 16);
-
- sum = _mm_add_epi32(in0, in1);
- in0 = _mm_unpacklo_epi32(sum, u0);
- in1 = _mm_unpackhi_epi32(sum, u0);
-
- sum = _mm_add_epi32(in0, in1);
- in0 = _mm_srli_si128(sum, 8);
-
- in1 = _mm_add_epi32(sum, in0);
- output[0] = (tran_low_t)_mm_cvtsi128_si32(in1);
-}
-
-#define DCT_HIGH_BIT_DEPTH 0
-#define FDCT8x8_2D aom_fdct8x8_sse2
-#include "aom_dsp/x86/fwd_txfm_impl_sse2.h"
-#undef FDCT8x8_2D
-
-#undef DCT_HIGH_BIT_DEPTH
-#define DCT_HIGH_BIT_DEPTH 1
-#define FDCT8x8_2D aom_highbd_fdct8x8_sse2
-#include "aom_dsp/x86/fwd_txfm_impl_sse2.h" // NOLINT
-#undef FDCT8x8_2D
diff --git a/third_party/aom/aom_dsp/x86/fwd_txfm_sse2.h b/third_party/aom/aom_dsp/x86/fwd_txfm_sse2.h
deleted file mode 100644
index 260d8dd58..000000000
--- a/third_party/aom/aom_dsp/x86/fwd_txfm_sse2.h
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_X86_FWD_TXFM_SSE2_H_
-#define AOM_AOM_DSP_X86_FWD_TXFM_SSE2_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-static INLINE __m128i k_madd_epi32(__m128i a, __m128i b) {
- __m128i buf0, buf1;
- buf0 = _mm_mul_epu32(a, b);
- a = _mm_srli_epi64(a, 32);
- b = _mm_srli_epi64(b, 32);
- buf1 = _mm_mul_epu32(a, b);
- return _mm_add_epi64(buf0, buf1);
-}
-
-static INLINE __m128i k_packs_epi64(__m128i a, __m128i b) {
- __m128i buf0 = _mm_shuffle_epi32(a, _MM_SHUFFLE(0, 0, 2, 0));
- __m128i buf1 = _mm_shuffle_epi32(b, _MM_SHUFFLE(0, 0, 2, 0));
- return _mm_unpacklo_epi64(buf0, buf1);
-}
-
-static INLINE int check_epi16_overflow_x2(const __m128i *preg0,
- const __m128i *preg1) {
- const __m128i max_overflow = _mm_set1_epi16(0x7fff);
- const __m128i min_overflow = _mm_set1_epi16(0x8000);
- __m128i cmp0 = _mm_or_si128(_mm_cmpeq_epi16(*preg0, max_overflow),
- _mm_cmpeq_epi16(*preg0, min_overflow));
- __m128i cmp1 = _mm_or_si128(_mm_cmpeq_epi16(*preg1, max_overflow),
- _mm_cmpeq_epi16(*preg1, min_overflow));
- cmp0 = _mm_or_si128(cmp0, cmp1);
- return _mm_movemask_epi8(cmp0);
-}
-
-static INLINE int check_epi16_overflow_x4(const __m128i *preg0,
- const __m128i *preg1,
- const __m128i *preg2,
- const __m128i *preg3) {
- const __m128i max_overflow = _mm_set1_epi16(0x7fff);
- const __m128i min_overflow = _mm_set1_epi16(0x8000);
- __m128i cmp0 = _mm_or_si128(_mm_cmpeq_epi16(*preg0, max_overflow),
- _mm_cmpeq_epi16(*preg0, min_overflow));
- __m128i cmp1 = _mm_or_si128(_mm_cmpeq_epi16(*preg1, max_overflow),
- _mm_cmpeq_epi16(*preg1, min_overflow));
- __m128i cmp2 = _mm_or_si128(_mm_cmpeq_epi16(*preg2, max_overflow),
- _mm_cmpeq_epi16(*preg2, min_overflow));
- __m128i cmp3 = _mm_or_si128(_mm_cmpeq_epi16(*preg3, max_overflow),
- _mm_cmpeq_epi16(*preg3, min_overflow));
- cmp0 = _mm_or_si128(_mm_or_si128(cmp0, cmp1), _mm_or_si128(cmp2, cmp3));
- return _mm_movemask_epi8(cmp0);
-}
-
-static INLINE int check_epi16_overflow_x8(
- const __m128i *preg0, const __m128i *preg1, const __m128i *preg2,
- const __m128i *preg3, const __m128i *preg4, const __m128i *preg5,
- const __m128i *preg6, const __m128i *preg7) {
- int res0, res1;
- res0 = check_epi16_overflow_x4(preg0, preg1, preg2, preg3);
- res1 = check_epi16_overflow_x4(preg4, preg5, preg6, preg7);
- return res0 + res1;
-}
-
-static INLINE int check_epi16_overflow_x12(
- const __m128i *preg0, const __m128i *preg1, const __m128i *preg2,
- const __m128i *preg3, const __m128i *preg4, const __m128i *preg5,
- const __m128i *preg6, const __m128i *preg7, const __m128i *preg8,
- const __m128i *preg9, const __m128i *preg10, const __m128i *preg11) {
- int res0, res1;
- res0 = check_epi16_overflow_x4(preg0, preg1, preg2, preg3);
- res1 = check_epi16_overflow_x4(preg4, preg5, preg6, preg7);
- if (!res0) res0 = check_epi16_overflow_x4(preg8, preg9, preg10, preg11);
- return res0 + res1;
-}
-
-static INLINE int check_epi16_overflow_x16(
- const __m128i *preg0, const __m128i *preg1, const __m128i *preg2,
- const __m128i *preg3, const __m128i *preg4, const __m128i *preg5,
- const __m128i *preg6, const __m128i *preg7, const __m128i *preg8,
- const __m128i *preg9, const __m128i *preg10, const __m128i *preg11,
- const __m128i *preg12, const __m128i *preg13, const __m128i *preg14,
- const __m128i *preg15) {
- int res0, res1;
- res0 = check_epi16_overflow_x4(preg0, preg1, preg2, preg3);
- res1 = check_epi16_overflow_x4(preg4, preg5, preg6, preg7);
- if (!res0) {
- res0 = check_epi16_overflow_x4(preg8, preg9, preg10, preg11);
- if (!res1) res1 = check_epi16_overflow_x4(preg12, preg13, preg14, preg15);
- }
- return res0 + res1;
-}
-
-static INLINE int check_epi16_overflow_x32(
- const __m128i *preg0, const __m128i *preg1, const __m128i *preg2,
- const __m128i *preg3, const __m128i *preg4, const __m128i *preg5,
- const __m128i *preg6, const __m128i *preg7, const __m128i *preg8,
- const __m128i *preg9, const __m128i *preg10, const __m128i *preg11,
- const __m128i *preg12, const __m128i *preg13, const __m128i *preg14,
- const __m128i *preg15, const __m128i *preg16, const __m128i *preg17,
- const __m128i *preg18, const __m128i *preg19, const __m128i *preg20,
- const __m128i *preg21, const __m128i *preg22, const __m128i *preg23,
- const __m128i *preg24, const __m128i *preg25, const __m128i *preg26,
- const __m128i *preg27, const __m128i *preg28, const __m128i *preg29,
- const __m128i *preg30, const __m128i *preg31) {
- int res0, res1;
- res0 = check_epi16_overflow_x4(preg0, preg1, preg2, preg3);
- res1 = check_epi16_overflow_x4(preg4, preg5, preg6, preg7);
- if (!res0) {
- res0 = check_epi16_overflow_x4(preg8, preg9, preg10, preg11);
- if (!res1) {
- res1 = check_epi16_overflow_x4(preg12, preg13, preg14, preg15);
- if (!res0) {
- res0 = check_epi16_overflow_x4(preg16, preg17, preg18, preg19);
- if (!res1) {
- res1 = check_epi16_overflow_x4(preg20, preg21, preg22, preg23);
- if (!res0) {
- res0 = check_epi16_overflow_x4(preg24, preg25, preg26, preg27);
- if (!res1)
- res1 = check_epi16_overflow_x4(preg28, preg29, preg30, preg31);
- }
- }
- }
- }
- }
- return res0 + res1;
-}
-
-static INLINE void store_output(const __m128i *poutput, tran_low_t *dst_ptr) {
- if (sizeof(tran_low_t) == 4) {
- const __m128i zero = _mm_setzero_si128();
- const __m128i sign_bits = _mm_cmplt_epi16(*poutput, zero);
- __m128i out0 = _mm_unpacklo_epi16(*poutput, sign_bits);
- __m128i out1 = _mm_unpackhi_epi16(*poutput, sign_bits);
- _mm_store_si128((__m128i *)(dst_ptr), out0);
- _mm_store_si128((__m128i *)(dst_ptr + 4), out1);
- } else {
- _mm_store_si128((__m128i *)(dst_ptr), *poutput);
- }
-}
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AOM_DSP_X86_FWD_TXFM_SSE2_H_
diff --git a/third_party/aom/aom_dsp/x86/fwd_txfm_ssse3_x86_64.asm b/third_party/aom/aom_dsp/x86/fwd_txfm_ssse3_x86_64.asm
deleted file mode 100644
index c1fb259a1..000000000
--- a/third_party/aom/aom_dsp/x86/fwd_txfm_ssse3_x86_64.asm
+++ /dev/null
@@ -1,379 +0,0 @@
-;
-; Copyright (c) 2016, Alliance for Open Media. All rights reserved
-;
-; This source code is subject to the terms of the BSD 2 Clause License and
-; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-; was not distributed with this source code in the LICENSE file, you can
-; obtain it at www.aomedia.org/license/software. If the Alliance for Open
-; Media Patent License 1.0 was not distributed with this source code in the
-; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-;
-
-;
-
-%include "third_party/x86inc/x86inc.asm"
-
-SECTION_RODATA
-
-pw_11585x2: times 8 dw 23170
-pd_8192: times 4 dd 8192
-
-%macro TRANSFORM_COEFFS 2
-pw_%1_%2: dw %1, %2, %1, %2, %1, %2, %1, %2
-pw_%2_m%1: dw %2, -%1, %2, -%1, %2, -%1, %2, -%1
-%endmacro
-
-TRANSFORM_COEFFS 11585, 11585
-TRANSFORM_COEFFS 15137, 6270
-TRANSFORM_COEFFS 16069, 3196
-TRANSFORM_COEFFS 9102, 13623
-
-%macro STORE_OUTPUT 2 ; index, result
- ; const __m128i sign_bits = _mm_cmplt_epi16(*poutput, zero);
- ; __m128i out0 = _mm_unpacklo_epi16(*poutput, sign_bits);
- ; __m128i out1 = _mm_unpackhi_epi16(*poutput, sign_bits);
- ; _mm_store_si128((__m128i *)(dst_ptr), out0);
- ; _mm_store_si128((__m128i *)(dst_ptr + 4), out1);
- pxor m11, m11
- pcmpgtw m11, m%2
- movdqa m12, m%2
- punpcklwd m%2, m11
- punpckhwd m12, m11
- mova [outputq + 4*%1 + 0], m%2
- mova [outputq + 4*%1 + 16], m12
-%endmacro
-
-SECTION .text
-
-%if ARCH_X86_64
-INIT_XMM ssse3
-cglobal fdct8x8, 3, 5, 13, input, output, stride
-
- mova m8, [GLOBAL(pd_8192)]
- mova m12, [GLOBAL(pw_11585x2)]
-
- lea r3, [2 * strideq]
- lea r4, [4 * strideq]
- mova m0, [inputq]
- mova m1, [inputq + r3]
- lea inputq, [inputq + r4]
- mova m2, [inputq]
- mova m3, [inputq + r3]
- lea inputq, [inputq + r4]
- mova m4, [inputq]
- mova m5, [inputq + r3]
- lea inputq, [inputq + r4]
- mova m6, [inputq]
- mova m7, [inputq + r3]
-
- ; left shift by 2 to increase forward transformation precision
- psllw m0, 2
- psllw m1, 2
- psllw m2, 2
- psllw m3, 2
- psllw m4, 2
- psllw m5, 2
- psllw m6, 2
- psllw m7, 2
-
- ; column transform
- ; stage 1
- paddw m10, m0, m7
- psubw m0, m7
-
- paddw m9, m1, m6
- psubw m1, m6
-
- paddw m7, m2, m5
- psubw m2, m5
-
- paddw m6, m3, m4
- psubw m3, m4
-
- ; stage 2
- paddw m5, m9, m7
- psubw m9, m7
-
- paddw m4, m10, m6
- psubw m10, m6
-
- paddw m7, m1, m2
- psubw m1, m2
-
- ; stage 3
- paddw m6, m4, m5
- psubw m4, m5
-
- pmulhrsw m1, m12
- pmulhrsw m7, m12
-
- ; sin(pi / 8), cos(pi / 8)
- punpcklwd m2, m10, m9
- punpckhwd m10, m9
- pmaddwd m5, m2, [GLOBAL(pw_15137_6270)]
- pmaddwd m2, [GLOBAL(pw_6270_m15137)]
- pmaddwd m9, m10, [GLOBAL(pw_15137_6270)]
- pmaddwd m10, [GLOBAL(pw_6270_m15137)]
- paddd m5, m8
- paddd m2, m8
- paddd m9, m8
- paddd m10, m8
- psrad m5, 14
- psrad m2, 14
- psrad m9, 14
- psrad m10, 14
- packssdw m5, m9
- packssdw m2, m10
-
- pmulhrsw m6, m12
- pmulhrsw m4, m12
-
- paddw m9, m3, m1
- psubw m3, m1
-
- paddw m10, m0, m7
- psubw m0, m7
-
- ; stage 4
- ; sin(pi / 16), cos(pi / 16)
- punpcklwd m1, m10, m9
- punpckhwd m10, m9
- pmaddwd m7, m1, [GLOBAL(pw_16069_3196)]
- pmaddwd m1, [GLOBAL(pw_3196_m16069)]
- pmaddwd m9, m10, [GLOBAL(pw_16069_3196)]
- pmaddwd m10, [GLOBAL(pw_3196_m16069)]
- paddd m7, m8
- paddd m1, m8
- paddd m9, m8
- paddd m10, m8
- psrad m7, 14
- psrad m1, 14
- psrad m9, 14
- psrad m10, 14
- packssdw m7, m9
- packssdw m1, m10
-
- ; sin(3 * pi / 16), cos(3 * pi / 16)
- punpcklwd m11, m0, m3
- punpckhwd m0, m3
- pmaddwd m9, m11, [GLOBAL(pw_9102_13623)]
- pmaddwd m11, [GLOBAL(pw_13623_m9102)]
- pmaddwd m3, m0, [GLOBAL(pw_9102_13623)]
- pmaddwd m0, [GLOBAL(pw_13623_m9102)]
- paddd m9, m8
- paddd m11, m8
- paddd m3, m8
- paddd m0, m8
- psrad m9, 14
- psrad m11, 14
- psrad m3, 14
- psrad m0, 14
- packssdw m9, m3
- packssdw m11, m0
-
- ; transpose
- ; stage 1
- punpcklwd m0, m6, m7
- punpcklwd m3, m5, m11
- punpckhwd m6, m7
- punpckhwd m5, m11
- punpcklwd m7, m4, m9
- punpcklwd m10, m2, m1
- punpckhwd m4, m9
- punpckhwd m2, m1
-
- ; stage 2
- punpckldq m9, m0, m3
- punpckldq m1, m6, m5
- punpckhdq m0, m3
- punpckhdq m6, m5
- punpckldq m3, m7, m10
- punpckldq m5, m4, m2
- punpckhdq m7, m10
- punpckhdq m4, m2
-
- ; stage 3
- punpcklqdq m10, m9, m3
- punpckhqdq m9, m3
- punpcklqdq m2, m0, m7
- punpckhqdq m0, m7
- punpcklqdq m3, m1, m5
- punpckhqdq m1, m5
- punpcklqdq m7, m6, m4
- punpckhqdq m6, m4
-
- ; row transform
- ; stage 1
- paddw m5, m10, m6
- psubw m10, m6
-
- paddw m4, m9, m7
- psubw m9, m7
-
- paddw m6, m2, m1
- psubw m2, m1
-
- paddw m7, m0, m3
- psubw m0, m3
-
- ;stage 2
- paddw m1, m5, m7
- psubw m5, m7
-
- paddw m3, m4, m6
- psubw m4, m6
-
- paddw m7, m9, m2
- psubw m9, m2
-
- ; stage 3
- punpcklwd m6, m1, m3
- punpckhwd m1, m3
- pmaddwd m2, m6, [GLOBAL(pw_11585_11585)]
- pmaddwd m6, [GLOBAL(pw_11585_m11585)]
- pmaddwd m3, m1, [GLOBAL(pw_11585_11585)]
- pmaddwd m1, [GLOBAL(pw_11585_m11585)]
- paddd m2, m8
- paddd m6, m8
- paddd m3, m8
- paddd m1, m8
- psrad m2, 14
- psrad m6, 14
- psrad m3, 14
- psrad m1, 14
- packssdw m2, m3
- packssdw m6, m1
-
- pmulhrsw m7, m12
- pmulhrsw m9, m12
-
- punpcklwd m3, m5, m4
- punpckhwd m5, m4
- pmaddwd m1, m3, [GLOBAL(pw_15137_6270)]
- pmaddwd m3, [GLOBAL(pw_6270_m15137)]
- pmaddwd m4, m5, [GLOBAL(pw_15137_6270)]
- pmaddwd m5, [GLOBAL(pw_6270_m15137)]
- paddd m1, m8
- paddd m3, m8
- paddd m4, m8
- paddd m5, m8
- psrad m1, 14
- psrad m3, 14
- psrad m4, 14
- psrad m5, 14
- packssdw m1, m4
- packssdw m3, m5
-
- paddw m4, m0, m9
- psubw m0, m9
-
- paddw m5, m10, m7
- psubw m10, m7
-
- ; stage 4
- punpcklwd m9, m5, m4
- punpckhwd m5, m4
- pmaddwd m7, m9, [GLOBAL(pw_16069_3196)]
- pmaddwd m9, [GLOBAL(pw_3196_m16069)]
- pmaddwd m4, m5, [GLOBAL(pw_16069_3196)]
- pmaddwd m5, [GLOBAL(pw_3196_m16069)]
- paddd m7, m8
- paddd m9, m8
- paddd m4, m8
- paddd m5, m8
- psrad m7, 14
- psrad m9, 14
- psrad m4, 14
- psrad m5, 14
- packssdw m7, m4
- packssdw m9, m5
-
- punpcklwd m4, m10, m0
- punpckhwd m10, m0
- pmaddwd m5, m4, [GLOBAL(pw_9102_13623)]
- pmaddwd m4, [GLOBAL(pw_13623_m9102)]
- pmaddwd m0, m10, [GLOBAL(pw_9102_13623)]
- pmaddwd m10, [GLOBAL(pw_13623_m9102)]
- paddd m5, m8
- paddd m4, m8
- paddd m0, m8
- paddd m10, m8
- psrad m5, 14
- psrad m4, 14
- psrad m0, 14
- psrad m10, 14
- packssdw m5, m0
- packssdw m4, m10
-
- ; transpose
- ; stage 1
- punpcklwd m0, m2, m7
- punpcklwd m10, m1, m4
- punpckhwd m2, m7
- punpckhwd m1, m4
- punpcklwd m7, m6, m5
- punpcklwd m4, m3, m9
- punpckhwd m6, m5
- punpckhwd m3, m9
-
- ; stage 2
- punpckldq m5, m0, m10
- punpckldq m9, m2, m1
- punpckhdq m0, m10
- punpckhdq m2, m1
- punpckldq m10, m7, m4
- punpckldq m1, m6, m3
- punpckhdq m7, m4
- punpckhdq m6, m3
-
- ; stage 3
- punpcklqdq m4, m5, m10
- punpckhqdq m5, m10
- punpcklqdq m3, m0, m7
- punpckhqdq m0, m7
- punpcklqdq m10, m9, m1
- punpckhqdq m9, m1
- punpcklqdq m7, m2, m6
- punpckhqdq m2, m6
-
- psraw m1, m4, 15
- psraw m6, m5, 15
- psraw m8, m3, 15
- psraw m11, m0, 15
-
- psubw m4, m1
- psubw m5, m6
- psubw m3, m8
- psubw m0, m11
-
- psraw m4, 1
- psraw m5, 1
- psraw m3, 1
- psraw m0, 1
-
- psraw m1, m10, 15
- psraw m6, m9, 15
- psraw m8, m7, 15
- psraw m11, m2, 15
-
- psubw m10, m1
- psubw m9, m6
- psubw m7, m8
- psubw m2, m11
-
- psraw m10, 1
- psraw m9, 1
- psraw m7, 1
- psraw m2, 1
-
- STORE_OUTPUT 0, 4
- STORE_OUTPUT 8, 5
- STORE_OUTPUT 16, 3
- STORE_OUTPUT 24, 0
- STORE_OUTPUT 32, 10
- STORE_OUTPUT 40, 9
- STORE_OUTPUT 48, 7
- STORE_OUTPUT 56, 2
-
- RET
-%endif
diff --git a/third_party/aom/aom_dsp/x86/highbd_convolve_avx2.c b/third_party/aom/aom_dsp/x86/highbd_convolve_avx2.c
deleted file mode 100644
index 099fcf7fc..000000000
--- a/third_party/aom/aom_dsp/x86/highbd_convolve_avx2.c
+++ /dev/null
@@ -1,998 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#include <immintrin.h>
-#include <string.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/x86/convolve.h"
-#include "aom_dsp/x86/convolve_avx2.h"
-#include "aom_dsp/x86/synonyms.h"
-
-// -----------------------------------------------------------------------------
-// Copy and average
-
-void aom_highbd_convolve_copy_avx2(const uint8_t *src8, ptrdiff_t src_stride,
- uint8_t *dst8, ptrdiff_t dst_stride,
- const int16_t *filter_x, int filter_x_stride,
- const int16_t *filter_y, int filter_y_stride,
- int width, int h, int bd) {
- const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
- (void)filter_x;
- (void)filter_y;
- (void)filter_x_stride;
- (void)filter_y_stride;
- (void)bd;
-
- assert(width % 4 == 0);
- if (width > 32) { // width = 64
- do {
- const __m256i p0 = _mm256_loadu_si256((const __m256i *)src);
- const __m256i p1 = _mm256_loadu_si256((const __m256i *)(src + 16));
- const __m256i p2 = _mm256_loadu_si256((const __m256i *)(src + 32));
- const __m256i p3 = _mm256_loadu_si256((const __m256i *)(src + 48));
- src += src_stride;
- _mm256_storeu_si256((__m256i *)dst, p0);
- _mm256_storeu_si256((__m256i *)(dst + 16), p1);
- _mm256_storeu_si256((__m256i *)(dst + 32), p2);
- _mm256_storeu_si256((__m256i *)(dst + 48), p3);
- dst += dst_stride;
- h--;
- } while (h > 0);
- } else if (width > 16) { // width = 32
- do {
- const __m256i p0 = _mm256_loadu_si256((const __m256i *)src);
- const __m256i p1 = _mm256_loadu_si256((const __m256i *)(src + 16));
- src += src_stride;
- _mm256_storeu_si256((__m256i *)dst, p0);
- _mm256_storeu_si256((__m256i *)(dst + 16), p1);
- dst += dst_stride;
- h--;
- } while (h > 0);
- } else if (width > 8) { // width = 16
- __m256i p0, p1;
- do {
- p0 = _mm256_loadu_si256((const __m256i *)src);
- src += src_stride;
- p1 = _mm256_loadu_si256((const __m256i *)src);
- src += src_stride;
-
- _mm256_storeu_si256((__m256i *)dst, p0);
- dst += dst_stride;
- _mm256_storeu_si256((__m256i *)dst, p1);
- dst += dst_stride;
- h -= 2;
- } while (h > 0);
- } else if (width > 4) { // width = 8
- __m128i p0, p1;
- do {
- p0 = _mm_loadu_si128((const __m128i *)src);
- src += src_stride;
- p1 = _mm_loadu_si128((const __m128i *)src);
- src += src_stride;
-
- _mm_storeu_si128((__m128i *)dst, p0);
- dst += dst_stride;
- _mm_storeu_si128((__m128i *)dst, p1);
- dst += dst_stride;
- h -= 2;
- } while (h > 0);
- } else { // width = 4
- __m128i p0, p1;
- do {
- p0 = _mm_loadl_epi64((const __m128i *)src);
- src += src_stride;
- p1 = _mm_loadl_epi64((const __m128i *)src);
- src += src_stride;
-
- _mm_storel_epi64((__m128i *)dst, p0);
- dst += dst_stride;
- _mm_storel_epi64((__m128i *)dst, p1);
- dst += dst_stride;
- h -= 2;
- } while (h > 0);
- }
-}
-
-void av1_highbd_convolve_y_sr_avx2(const uint16_t *src, int src_stride,
- uint16_t *dst, int dst_stride, int w, int h,
- const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y,
- const int subpel_x_q4, const int subpel_y_q4,
- ConvolveParams *conv_params, int bd) {
- int i, j;
- const int fo_vert = filter_params_y->taps / 2 - 1;
- const uint16_t *const src_ptr = src - fo_vert * src_stride;
- (void)filter_params_x;
- (void)subpel_x_q4;
- (void)conv_params;
-
- assert(conv_params->round_0 <= FILTER_BITS);
- assert(((conv_params->round_0 + conv_params->round_1) <= (FILTER_BITS + 1)) ||
- ((conv_params->round_0 + conv_params->round_1) == (2 * FILTER_BITS)));
-
- __m256i s[8], coeffs_y[4];
-
- const int bits = FILTER_BITS;
-
- const __m128i round_shift_bits = _mm_cvtsi32_si128(bits);
- const __m256i round_const_bits = _mm256_set1_epi32((1 << bits) >> 1);
- const __m256i clip_pixel =
- _mm256_set1_epi16(bd == 10 ? 1023 : (bd == 12 ? 4095 : 255));
- const __m256i zero = _mm256_setzero_si256();
-
- prepare_coeffs(filter_params_y, subpel_y_q4, coeffs_y);
-
- for (j = 0; j < w; j += 8) {
- const uint16_t *data = &src_ptr[j];
- /* Vertical filter */
- {
- __m256i src6;
- __m256i s01 = _mm256_permute2x128_si256(
- _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(data + 0 * src_stride))),
- _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(data + 1 * src_stride))),
- 0x20);
- __m256i s12 = _mm256_permute2x128_si256(
- _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(data + 1 * src_stride))),
- _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(data + 2 * src_stride))),
- 0x20);
- __m256i s23 = _mm256_permute2x128_si256(
- _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(data + 2 * src_stride))),
- _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(data + 3 * src_stride))),
- 0x20);
- __m256i s34 = _mm256_permute2x128_si256(
- _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(data + 3 * src_stride))),
- _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(data + 4 * src_stride))),
- 0x20);
- __m256i s45 = _mm256_permute2x128_si256(
- _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(data + 4 * src_stride))),
- _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(data + 5 * src_stride))),
- 0x20);
- src6 = _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(data + 6 * src_stride)));
- __m256i s56 = _mm256_permute2x128_si256(
- _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(data + 5 * src_stride))),
- src6, 0x20);
-
- s[0] = _mm256_unpacklo_epi16(s01, s12);
- s[1] = _mm256_unpacklo_epi16(s23, s34);
- s[2] = _mm256_unpacklo_epi16(s45, s56);
-
- s[4] = _mm256_unpackhi_epi16(s01, s12);
- s[5] = _mm256_unpackhi_epi16(s23, s34);
- s[6] = _mm256_unpackhi_epi16(s45, s56);
-
- for (i = 0; i < h; i += 2) {
- data = &src_ptr[i * src_stride + j];
-
- const __m256i s67 = _mm256_permute2x128_si256(
- src6,
- _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(data + 7 * src_stride))),
- 0x20);
-
- src6 = _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(data + 8 * src_stride)));
-
- const __m256i s78 = _mm256_permute2x128_si256(
- _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(data + 7 * src_stride))),
- src6, 0x20);
-
- s[3] = _mm256_unpacklo_epi16(s67, s78);
- s[7] = _mm256_unpackhi_epi16(s67, s78);
-
- const __m256i res_a = convolve(s, coeffs_y);
-
- __m256i res_a_round = _mm256_sra_epi32(
- _mm256_add_epi32(res_a, round_const_bits), round_shift_bits);
-
- if (w - j > 4) {
- const __m256i res_b = convolve(s + 4, coeffs_y);
- __m256i res_b_round = _mm256_sra_epi32(
- _mm256_add_epi32(res_b, round_const_bits), round_shift_bits);
-
- __m256i res_16bit = _mm256_packs_epi32(res_a_round, res_b_round);
- res_16bit = _mm256_min_epi16(res_16bit, clip_pixel);
- res_16bit = _mm256_max_epi16(res_16bit, zero);
-
- _mm_storeu_si128((__m128i *)&dst[i * dst_stride + j],
- _mm256_castsi256_si128(res_16bit));
- _mm_storeu_si128((__m128i *)&dst[i * dst_stride + j + dst_stride],
- _mm256_extracti128_si256(res_16bit, 1));
- } else if (w == 4) {
- res_a_round = _mm256_packs_epi32(res_a_round, res_a_round);
- res_a_round = _mm256_min_epi16(res_a_round, clip_pixel);
- res_a_round = _mm256_max_epi16(res_a_round, zero);
-
- _mm_storel_epi64((__m128i *)&dst[i * dst_stride + j],
- _mm256_castsi256_si128(res_a_round));
- _mm_storel_epi64((__m128i *)&dst[i * dst_stride + j + dst_stride],
- _mm256_extracti128_si256(res_a_round, 1));
- } else {
- res_a_round = _mm256_packs_epi32(res_a_round, res_a_round);
- res_a_round = _mm256_min_epi16(res_a_round, clip_pixel);
- res_a_round = _mm256_max_epi16(res_a_round, zero);
-
- xx_storel_32((__m128i *)&dst[i * dst_stride + j],
- _mm256_castsi256_si128(res_a_round));
- xx_storel_32((__m128i *)&dst[i * dst_stride + j + dst_stride],
- _mm256_extracti128_si256(res_a_round, 1));
- }
-
- s[0] = s[1];
- s[1] = s[2];
- s[2] = s[3];
-
- s[4] = s[5];
- s[5] = s[6];
- s[6] = s[7];
- }
- }
- }
-}
-
-void av1_highbd_convolve_x_sr_avx2(const uint16_t *src, int src_stride,
- uint16_t *dst, int dst_stride, int w, int h,
- const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y,
- const int subpel_x_q4, const int subpel_y_q4,
- ConvolveParams *conv_params, int bd) {
- int i, j;
- const int fo_horiz = filter_params_x->taps / 2 - 1;
- const uint16_t *const src_ptr = src - fo_horiz;
- (void)subpel_y_q4;
- (void)filter_params_y;
-
- // Check that, even with 12-bit input, the intermediate values will fit
- // into an unsigned 16-bit intermediate array.
- assert(bd + FILTER_BITS + 2 - conv_params->round_0 <= 16);
-
- __m256i s[4], coeffs_x[4];
-
- const __m256i round_const_x =
- _mm256_set1_epi32(((1 << conv_params->round_0) >> 1));
- const __m128i round_shift_x = _mm_cvtsi32_si128(conv_params->round_0);
-
- const int bits = FILTER_BITS - conv_params->round_0;
- const __m128i round_shift_bits = _mm_cvtsi32_si128(bits);
- const __m256i round_const_bits = _mm256_set1_epi32((1 << bits) >> 1);
- const __m256i clip_pixel =
- _mm256_set1_epi16(bd == 10 ? 1023 : (bd == 12 ? 4095 : 255));
- const __m256i zero = _mm256_setzero_si256();
-
- assert(bits >= 0);
- assert((FILTER_BITS - conv_params->round_1) >= 0 ||
- ((conv_params->round_0 + conv_params->round_1) == 2 * FILTER_BITS));
-
- prepare_coeffs(filter_params_x, subpel_x_q4, coeffs_x);
-
- for (j = 0; j < w; j += 8) {
- /* Horizontal filter */
- for (i = 0; i < h; i += 2) {
- const __m256i row0 =
- _mm256_loadu_si256((__m256i *)&src_ptr[i * src_stride + j]);
- __m256i row1 =
- _mm256_loadu_si256((__m256i *)&src_ptr[(i + 1) * src_stride + j]);
-
- const __m256i r0 = _mm256_permute2x128_si256(row0, row1, 0x20);
- const __m256i r1 = _mm256_permute2x128_si256(row0, row1, 0x31);
-
- // even pixels
- s[0] = _mm256_alignr_epi8(r1, r0, 0);
- s[1] = _mm256_alignr_epi8(r1, r0, 4);
- s[2] = _mm256_alignr_epi8(r1, r0, 8);
- s[3] = _mm256_alignr_epi8(r1, r0, 12);
-
- __m256i res_even = convolve(s, coeffs_x);
- res_even = _mm256_sra_epi32(_mm256_add_epi32(res_even, round_const_x),
- round_shift_x);
-
- // odd pixels
- s[0] = _mm256_alignr_epi8(r1, r0, 2);
- s[1] = _mm256_alignr_epi8(r1, r0, 6);
- s[2] = _mm256_alignr_epi8(r1, r0, 10);
- s[3] = _mm256_alignr_epi8(r1, r0, 14);
-
- __m256i res_odd = convolve(s, coeffs_x);
- res_odd = _mm256_sra_epi32(_mm256_add_epi32(res_odd, round_const_x),
- round_shift_x);
-
- res_even = _mm256_sra_epi32(_mm256_add_epi32(res_even, round_const_bits),
- round_shift_bits);
- res_odd = _mm256_sra_epi32(_mm256_add_epi32(res_odd, round_const_bits),
- round_shift_bits);
-
- __m256i res_even1 = _mm256_packs_epi32(res_even, res_even);
- __m256i res_odd1 = _mm256_packs_epi32(res_odd, res_odd);
-
- __m256i res = _mm256_unpacklo_epi16(res_even1, res_odd1);
- res = _mm256_min_epi16(res, clip_pixel);
- res = _mm256_max_epi16(res, zero);
-
- if (w - j > 4) {
- _mm_storeu_si128((__m128i *)&dst[i * dst_stride + j],
- _mm256_castsi256_si128(res));
- _mm_storeu_si128((__m128i *)&dst[i * dst_stride + j + dst_stride],
- _mm256_extracti128_si256(res, 1));
- } else if (w == 4) {
- _mm_storel_epi64((__m128i *)&dst[i * dst_stride + j],
- _mm256_castsi256_si128(res));
- _mm_storel_epi64((__m128i *)&dst[i * dst_stride + j + dst_stride],
- _mm256_extracti128_si256(res, 1));
- } else {
- xx_storel_32((__m128i *)&dst[i * dst_stride + j],
- _mm256_castsi256_si128(res));
- xx_storel_32((__m128i *)&dst[i * dst_stride + j + dst_stride],
- _mm256_extracti128_si256(res, 1));
- }
- }
- }
-}
-
-#define CONV8_ROUNDING_BITS (7)
-
-// -----------------------------------------------------------------------------
-// Horizontal and vertical filtering
-
-static const uint8_t signal_pattern_0[32] = { 0, 1, 2, 3, 2, 3, 4, 5, 4, 5, 6,
- 7, 6, 7, 8, 9, 0, 1, 2, 3, 2, 3,
- 4, 5, 4, 5, 6, 7, 6, 7, 8, 9 };
-
-static const uint8_t signal_pattern_1[32] = { 4, 5, 6, 7, 6, 7, 8, 9,
- 8, 9, 10, 11, 10, 11, 12, 13,
- 4, 5, 6, 7, 6, 7, 8, 9,
- 8, 9, 10, 11, 10, 11, 12, 13 };
-
-static const uint8_t signal_pattern_2[32] = { 6, 7, 8, 9, 8, 9, 10, 11,
- 10, 11, 12, 13, 12, 13, 14, 15,
- 6, 7, 8, 9, 8, 9, 10, 11,
- 10, 11, 12, 13, 12, 13, 14, 15 };
-
-static const uint32_t signal_index[8] = { 2, 3, 4, 5, 2, 3, 4, 5 };
-
-// -----------------------------------------------------------------------------
-// Horizontal Filtering
-
-static INLINE void pack_pixels(const __m256i *s, __m256i *p /*p[4]*/) {
- const __m256i idx = _mm256_loadu_si256((const __m256i *)signal_index);
- const __m256i sf0 = _mm256_loadu_si256((const __m256i *)signal_pattern_0);
- const __m256i sf1 = _mm256_loadu_si256((const __m256i *)signal_pattern_1);
- const __m256i c = _mm256_permutevar8x32_epi32(*s, idx);
-
- p[0] = _mm256_shuffle_epi8(*s, sf0); // x0x6
- p[1] = _mm256_shuffle_epi8(*s, sf1); // x1x7
- p[2] = _mm256_shuffle_epi8(c, sf0); // x2x4
- p[3] = _mm256_shuffle_epi8(c, sf1); // x3x5
-}
-
-// Note:
-// Shared by 8x2 and 16x1 block
-static INLINE void pack_16_pixels(const __m256i *s0, const __m256i *s1,
- __m256i *x /*x[8]*/) {
- __m256i pp[8];
- pack_pixels(s0, pp);
- pack_pixels(s1, &pp[4]);
- x[0] = _mm256_permute2x128_si256(pp[0], pp[4], 0x20);
- x[1] = _mm256_permute2x128_si256(pp[1], pp[5], 0x20);
- x[2] = _mm256_permute2x128_si256(pp[2], pp[6], 0x20);
- x[3] = _mm256_permute2x128_si256(pp[3], pp[7], 0x20);
- x[4] = x[2];
- x[5] = x[3];
- x[6] = _mm256_permute2x128_si256(pp[0], pp[4], 0x31);
- x[7] = _mm256_permute2x128_si256(pp[1], pp[5], 0x31);
-}
-
-static INLINE void pack_8x1_pixels(const uint16_t *src, __m256i *x) {
- __m256i pp[8];
- __m256i s0;
- s0 = _mm256_loadu_si256((const __m256i *)src);
- pack_pixels(&s0, pp);
- x[0] = _mm256_permute2x128_si256(pp[0], pp[2], 0x30);
- x[1] = _mm256_permute2x128_si256(pp[1], pp[3], 0x30);
- x[2] = _mm256_permute2x128_si256(pp[2], pp[0], 0x30);
- x[3] = _mm256_permute2x128_si256(pp[3], pp[1], 0x30);
-}
-
-static INLINE void pack_8x2_pixels(const uint16_t *src, ptrdiff_t stride,
- __m256i *x) {
- __m256i s0, s1;
- s0 = _mm256_loadu_si256((const __m256i *)src);
- s1 = _mm256_loadu_si256((const __m256i *)(src + stride));
- pack_16_pixels(&s0, &s1, x);
-}
-
-static INLINE void pack_16x1_pixels(const uint16_t *src, __m256i *x) {
- __m256i s0, s1;
- s0 = _mm256_loadu_si256((const __m256i *)src);
- s1 = _mm256_loadu_si256((const __m256i *)(src + 8));
- pack_16_pixels(&s0, &s1, x);
-}
-
-// Note:
-// Shared by horizontal and vertical filtering
-static INLINE void pack_filters(const int16_t *filter, __m256i *f /*f[4]*/) {
- const __m128i h = _mm_loadu_si128((const __m128i *)filter);
- const __m256i hh = _mm256_insertf128_si256(_mm256_castsi128_si256(h), h, 1);
- const __m256i p0 = _mm256_set1_epi32(0x03020100);
- const __m256i p1 = _mm256_set1_epi32(0x07060504);
- const __m256i p2 = _mm256_set1_epi32(0x0b0a0908);
- const __m256i p3 = _mm256_set1_epi32(0x0f0e0d0c);
- f[0] = _mm256_shuffle_epi8(hh, p0);
- f[1] = _mm256_shuffle_epi8(hh, p1);
- f[2] = _mm256_shuffle_epi8(hh, p2);
- f[3] = _mm256_shuffle_epi8(hh, p3);
-}
-
-static INLINE void filter_8x1_pixels(const __m256i *sig /*sig[4]*/,
- const __m256i *fil /*fil[4]*/,
- __m256i *y) {
- __m256i a, a0, a1;
-
- a0 = _mm256_madd_epi16(fil[0], sig[0]);
- a1 = _mm256_madd_epi16(fil[3], sig[3]);
- a = _mm256_add_epi32(a0, a1);
-
- a0 = _mm256_madd_epi16(fil[1], sig[1]);
- a1 = _mm256_madd_epi16(fil[2], sig[2]);
-
- {
- const __m256i min = _mm256_min_epi32(a0, a1);
- a = _mm256_add_epi32(a, min);
- }
- {
- const __m256i max = _mm256_max_epi32(a0, a1);
- a = _mm256_add_epi32(a, max);
- }
- {
- const __m256i rounding = _mm256_set1_epi32(1 << (CONV8_ROUNDING_BITS - 1));
- a = _mm256_add_epi32(a, rounding);
- *y = _mm256_srai_epi32(a, CONV8_ROUNDING_BITS);
- }
-}
-
-static INLINE void store_8x1_pixels(const __m256i *y, const __m256i *mask,
- uint16_t *dst) {
- const __m128i a0 = _mm256_castsi256_si128(*y);
- const __m128i a1 = _mm256_extractf128_si256(*y, 1);
- __m128i res = _mm_packus_epi32(a0, a1);
- res = _mm_min_epi16(res, _mm256_castsi256_si128(*mask));
- _mm_storeu_si128((__m128i *)dst, res);
-}
-
-static INLINE void store_8x2_pixels(const __m256i *y0, const __m256i *y1,
- const __m256i *mask, uint16_t *dst,
- ptrdiff_t pitch) {
- __m256i a = _mm256_packus_epi32(*y0, *y1);
- a = _mm256_min_epi16(a, *mask);
- _mm_storeu_si128((__m128i *)dst, _mm256_castsi256_si128(a));
- _mm_storeu_si128((__m128i *)(dst + pitch), _mm256_extractf128_si256(a, 1));
-}
-
-static INLINE void store_16x1_pixels(const __m256i *y0, const __m256i *y1,
- const __m256i *mask, uint16_t *dst) {
- __m256i a = _mm256_packus_epi32(*y0, *y1);
- a = _mm256_min_epi16(a, *mask);
- _mm256_storeu_si256((__m256i *)dst, a);
-}
-
-static void aom_highbd_filter_block1d8_h8_avx2(
- const uint16_t *src_ptr, ptrdiff_t src_pitch, uint16_t *dst_ptr,
- ptrdiff_t dst_pitch, uint32_t height, const int16_t *filter, int bd) {
- __m256i signal[8], res0, res1;
- const __m256i max = _mm256_set1_epi16((1 << bd) - 1);
-
- __m256i ff[4];
- pack_filters(filter, ff);
-
- src_ptr -= 3;
- do {
- pack_8x2_pixels(src_ptr, src_pitch, signal);
- filter_8x1_pixels(signal, ff, &res0);
- filter_8x1_pixels(&signal[4], ff, &res1);
- store_8x2_pixels(&res0, &res1, &max, dst_ptr, dst_pitch);
- height -= 2;
- src_ptr += src_pitch << 1;
- dst_ptr += dst_pitch << 1;
- } while (height > 1);
-
- if (height > 0) {
- pack_8x1_pixels(src_ptr, signal);
- filter_8x1_pixels(signal, ff, &res0);
- store_8x1_pixels(&res0, &max, dst_ptr);
- }
-}
-
-static void aom_highbd_filter_block1d16_h8_avx2(
- const uint16_t *src_ptr, ptrdiff_t src_pitch, uint16_t *dst_ptr,
- ptrdiff_t dst_pitch, uint32_t height, const int16_t *filter, int bd) {
- __m256i signal[8], res0, res1;
- const __m256i max = _mm256_set1_epi16((1 << bd) - 1);
-
- __m256i ff[4];
- pack_filters(filter, ff);
-
- src_ptr -= 3;
- do {
- pack_16x1_pixels(src_ptr, signal);
- filter_8x1_pixels(signal, ff, &res0);
- filter_8x1_pixels(&signal[4], ff, &res1);
- store_16x1_pixels(&res0, &res1, &max, dst_ptr);
- height -= 1;
- src_ptr += src_pitch;
- dst_ptr += dst_pitch;
- } while (height > 0);
-}
-
-// -----------------------------------------------------------------------------
-// 2-tap horizontal filtering
-
-static INLINE void pack_2t_filter(const int16_t *filter, __m256i *f) {
- const __m128i h = _mm_loadu_si128((const __m128i *)filter);
- const __m256i hh = _mm256_insertf128_si256(_mm256_castsi128_si256(h), h, 1);
- const __m256i p = _mm256_set1_epi32(0x09080706);
- f[0] = _mm256_shuffle_epi8(hh, p);
-}
-
-// can be used by pack_8x2_2t_pixels() and pack_16x1_2t_pixels()
-// the difference is s0/s1 specifies first and second rows or,
-// first 16 samples and 8-sample shifted 16 samples
-static INLINE void pack_16_2t_pixels(const __m256i *s0, const __m256i *s1,
- __m256i *sig) {
- const __m256i idx = _mm256_loadu_si256((const __m256i *)signal_index);
- const __m256i sf2 = _mm256_loadu_si256((const __m256i *)signal_pattern_2);
- __m256i x0 = _mm256_shuffle_epi8(*s0, sf2);
- __m256i x1 = _mm256_shuffle_epi8(*s1, sf2);
- __m256i r0 = _mm256_permutevar8x32_epi32(*s0, idx);
- __m256i r1 = _mm256_permutevar8x32_epi32(*s1, idx);
- r0 = _mm256_shuffle_epi8(r0, sf2);
- r1 = _mm256_shuffle_epi8(r1, sf2);
- sig[0] = _mm256_permute2x128_si256(x0, x1, 0x20);
- sig[1] = _mm256_permute2x128_si256(r0, r1, 0x20);
-}
-
-static INLINE void pack_8x2_2t_pixels(const uint16_t *src,
- const ptrdiff_t pitch, __m256i *sig) {
- const __m256i r0 = _mm256_loadu_si256((const __m256i *)src);
- const __m256i r1 = _mm256_loadu_si256((const __m256i *)(src + pitch));
- pack_16_2t_pixels(&r0, &r1, sig);
-}
-
-static INLINE void pack_16x1_2t_pixels(const uint16_t *src,
- __m256i *sig /*sig[2]*/) {
- const __m256i r0 = _mm256_loadu_si256((const __m256i *)src);
- const __m256i r1 = _mm256_loadu_si256((const __m256i *)(src + 8));
- pack_16_2t_pixels(&r0, &r1, sig);
-}
-
-static INLINE void pack_8x1_2t_pixels(const uint16_t *src,
- __m256i *sig /*sig[2]*/) {
- const __m256i idx = _mm256_loadu_si256((const __m256i *)signal_index);
- const __m256i sf2 = _mm256_loadu_si256((const __m256i *)signal_pattern_2);
- __m256i r0 = _mm256_loadu_si256((const __m256i *)src);
- __m256i x0 = _mm256_shuffle_epi8(r0, sf2);
- r0 = _mm256_permutevar8x32_epi32(r0, idx);
- r0 = _mm256_shuffle_epi8(r0, sf2);
- sig[0] = _mm256_permute2x128_si256(x0, r0, 0x20);
-}
-
-// can be used by filter_8x2_2t_pixels() and filter_16x1_2t_pixels()
-static INLINE void filter_16_2t_pixels(const __m256i *sig, const __m256i *f,
- __m256i *y0, __m256i *y1) {
- const __m256i rounding = _mm256_set1_epi32(1 << (CONV8_ROUNDING_BITS - 1));
- __m256i x0 = _mm256_madd_epi16(sig[0], *f);
- __m256i x1 = _mm256_madd_epi16(sig[1], *f);
- x0 = _mm256_add_epi32(x0, rounding);
- x1 = _mm256_add_epi32(x1, rounding);
- *y0 = _mm256_srai_epi32(x0, CONV8_ROUNDING_BITS);
- *y1 = _mm256_srai_epi32(x1, CONV8_ROUNDING_BITS);
-}
-
-static INLINE void filter_8x1_2t_pixels(const __m256i *sig, const __m256i *f,
- __m256i *y0) {
- const __m256i rounding = _mm256_set1_epi32(1 << (CONV8_ROUNDING_BITS - 1));
- __m256i x0 = _mm256_madd_epi16(sig[0], *f);
- x0 = _mm256_add_epi32(x0, rounding);
- *y0 = _mm256_srai_epi32(x0, CONV8_ROUNDING_BITS);
-}
-
-static void aom_highbd_filter_block1d8_h2_avx2(
- const uint16_t *src_ptr, ptrdiff_t src_pitch, uint16_t *dst_ptr,
- ptrdiff_t dst_pitch, uint32_t height, const int16_t *filter, int bd) {
- __m256i signal[2], res0, res1;
- const __m256i max = _mm256_set1_epi16((1 << bd) - 1);
-
- __m256i ff;
- pack_2t_filter(filter, &ff);
-
- src_ptr -= 3;
- do {
- pack_8x2_2t_pixels(src_ptr, src_pitch, signal);
- filter_16_2t_pixels(signal, &ff, &res0, &res1);
- store_8x2_pixels(&res0, &res1, &max, dst_ptr, dst_pitch);
- height -= 2;
- src_ptr += src_pitch << 1;
- dst_ptr += dst_pitch << 1;
- } while (height > 1);
-
- if (height > 0) {
- pack_8x1_2t_pixels(src_ptr, signal);
- filter_8x1_2t_pixels(signal, &ff, &res0);
- store_8x1_pixels(&res0, &max, dst_ptr);
- }
-}
-
-static void aom_highbd_filter_block1d16_h2_avx2(
- const uint16_t *src_ptr, ptrdiff_t src_pitch, uint16_t *dst_ptr,
- ptrdiff_t dst_pitch, uint32_t height, const int16_t *filter, int bd) {
- __m256i signal[2], res0, res1;
- const __m256i max = _mm256_set1_epi16((1 << bd) - 1);
-
- __m256i ff;
- pack_2t_filter(filter, &ff);
-
- src_ptr -= 3;
- do {
- pack_16x1_2t_pixels(src_ptr, signal);
- filter_16_2t_pixels(signal, &ff, &res0, &res1);
- store_16x1_pixels(&res0, &res1, &max, dst_ptr);
- height -= 1;
- src_ptr += src_pitch;
- dst_ptr += dst_pitch;
- } while (height > 0);
-}
-
-// -----------------------------------------------------------------------------
-// Vertical Filtering
-
-static void pack_8x9_init(const uint16_t *src, ptrdiff_t pitch, __m256i *sig) {
- __m256i s0 = _mm256_castsi128_si256(_mm_loadu_si128((const __m128i *)src));
- __m256i s1 =
- _mm256_castsi128_si256(_mm_loadu_si128((const __m128i *)(src + pitch)));
- __m256i s2 = _mm256_castsi128_si256(
- _mm_loadu_si128((const __m128i *)(src + 2 * pitch)));
- __m256i s3 = _mm256_castsi128_si256(
- _mm_loadu_si128((const __m128i *)(src + 3 * pitch)));
- __m256i s4 = _mm256_castsi128_si256(
- _mm_loadu_si128((const __m128i *)(src + 4 * pitch)));
- __m256i s5 = _mm256_castsi128_si256(
- _mm_loadu_si128((const __m128i *)(src + 5 * pitch)));
- __m256i s6 = _mm256_castsi128_si256(
- _mm_loadu_si128((const __m128i *)(src + 6 * pitch)));
-
- s0 = _mm256_inserti128_si256(s0, _mm256_castsi256_si128(s1), 1);
- s1 = _mm256_inserti128_si256(s1, _mm256_castsi256_si128(s2), 1);
- s2 = _mm256_inserti128_si256(s2, _mm256_castsi256_si128(s3), 1);
- s3 = _mm256_inserti128_si256(s3, _mm256_castsi256_si128(s4), 1);
- s4 = _mm256_inserti128_si256(s4, _mm256_castsi256_si128(s5), 1);
- s5 = _mm256_inserti128_si256(s5, _mm256_castsi256_si128(s6), 1);
-
- sig[0] = _mm256_unpacklo_epi16(s0, s1);
- sig[4] = _mm256_unpackhi_epi16(s0, s1);
- sig[1] = _mm256_unpacklo_epi16(s2, s3);
- sig[5] = _mm256_unpackhi_epi16(s2, s3);
- sig[2] = _mm256_unpacklo_epi16(s4, s5);
- sig[6] = _mm256_unpackhi_epi16(s4, s5);
- sig[8] = s6;
-}
-
-static INLINE void pack_8x9_pixels(const uint16_t *src, ptrdiff_t pitch,
- __m256i *sig) {
- // base + 7th row
- __m256i s0 = _mm256_castsi128_si256(
- _mm_loadu_si128((const __m128i *)(src + 7 * pitch)));
- // base + 8th row
- __m256i s1 = _mm256_castsi128_si256(
- _mm_loadu_si128((const __m128i *)(src + 8 * pitch)));
- __m256i s2 = _mm256_inserti128_si256(sig[8], _mm256_castsi256_si128(s0), 1);
- __m256i s3 = _mm256_inserti128_si256(s0, _mm256_castsi256_si128(s1), 1);
- sig[3] = _mm256_unpacklo_epi16(s2, s3);
- sig[7] = _mm256_unpackhi_epi16(s2, s3);
- sig[8] = s1;
-}
-
-static INLINE void filter_8x9_pixels(const __m256i *sig, const __m256i *f,
- __m256i *y0, __m256i *y1) {
- filter_8x1_pixels(sig, f, y0);
- filter_8x1_pixels(&sig[4], f, y1);
-}
-
-static INLINE void update_pixels(__m256i *sig) {
- int i;
- for (i = 0; i < 3; ++i) {
- sig[i] = sig[i + 1];
- sig[i + 4] = sig[i + 5];
- }
-}
-
-static void aom_highbd_filter_block1d8_v8_avx2(
- const uint16_t *src_ptr, ptrdiff_t src_pitch, uint16_t *dst_ptr,
- ptrdiff_t dst_pitch, uint32_t height, const int16_t *filter, int bd) {
- __m256i signal[9], res0, res1;
- const __m256i max = _mm256_set1_epi16((1 << bd) - 1);
-
- __m256i ff[4];
- pack_filters(filter, ff);
-
- pack_8x9_init(src_ptr, src_pitch, signal);
-
- do {
- pack_8x9_pixels(src_ptr, src_pitch, signal);
-
- filter_8x9_pixels(signal, ff, &res0, &res1);
- store_8x2_pixels(&res0, &res1, &max, dst_ptr, dst_pitch);
- update_pixels(signal);
-
- src_ptr += src_pitch << 1;
- dst_ptr += dst_pitch << 1;
- height -= 2;
- } while (height > 0);
-}
-
-static void pack_16x9_init(const uint16_t *src, ptrdiff_t pitch, __m256i *sig) {
- __m256i u0, u1, u2, u3;
- // load 0-6 rows
- const __m256i s0 = _mm256_loadu_si256((const __m256i *)src);
- const __m256i s1 = _mm256_loadu_si256((const __m256i *)(src + pitch));
- const __m256i s2 = _mm256_loadu_si256((const __m256i *)(src + 2 * pitch));
- const __m256i s3 = _mm256_loadu_si256((const __m256i *)(src + 3 * pitch));
- const __m256i s4 = _mm256_loadu_si256((const __m256i *)(src + 4 * pitch));
- const __m256i s5 = _mm256_loadu_si256((const __m256i *)(src + 5 * pitch));
- const __m256i s6 = _mm256_loadu_si256((const __m256i *)(src + 6 * pitch));
-
- u0 = _mm256_permute2x128_si256(s0, s1, 0x20); // 0, 1 low
- u1 = _mm256_permute2x128_si256(s0, s1, 0x31); // 0, 1 high
-
- u2 = _mm256_permute2x128_si256(s1, s2, 0x20); // 1, 2 low
- u3 = _mm256_permute2x128_si256(s1, s2, 0x31); // 1, 2 high
-
- sig[0] = _mm256_unpacklo_epi16(u0, u2);
- sig[4] = _mm256_unpackhi_epi16(u0, u2);
-
- sig[8] = _mm256_unpacklo_epi16(u1, u3);
- sig[12] = _mm256_unpackhi_epi16(u1, u3);
-
- u0 = _mm256_permute2x128_si256(s2, s3, 0x20);
- u1 = _mm256_permute2x128_si256(s2, s3, 0x31);
-
- u2 = _mm256_permute2x128_si256(s3, s4, 0x20);
- u3 = _mm256_permute2x128_si256(s3, s4, 0x31);
-
- sig[1] = _mm256_unpacklo_epi16(u0, u2);
- sig[5] = _mm256_unpackhi_epi16(u0, u2);
-
- sig[9] = _mm256_unpacklo_epi16(u1, u3);
- sig[13] = _mm256_unpackhi_epi16(u1, u3);
-
- u0 = _mm256_permute2x128_si256(s4, s5, 0x20);
- u1 = _mm256_permute2x128_si256(s4, s5, 0x31);
-
- u2 = _mm256_permute2x128_si256(s5, s6, 0x20);
- u3 = _mm256_permute2x128_si256(s5, s6, 0x31);
-
- sig[2] = _mm256_unpacklo_epi16(u0, u2);
- sig[6] = _mm256_unpackhi_epi16(u0, u2);
-
- sig[10] = _mm256_unpacklo_epi16(u1, u3);
- sig[14] = _mm256_unpackhi_epi16(u1, u3);
-
- sig[16] = s6;
-}
-
-static void pack_16x9_pixels(const uint16_t *src, ptrdiff_t pitch,
- __m256i *sig) {
- // base + 7th row
- const __m256i s7 = _mm256_loadu_si256((const __m256i *)(src + 7 * pitch));
- // base + 8th row
- const __m256i s8 = _mm256_loadu_si256((const __m256i *)(src + 8 * pitch));
-
- __m256i u0, u1, u2, u3;
- u0 = _mm256_permute2x128_si256(sig[16], s7, 0x20);
- u1 = _mm256_permute2x128_si256(sig[16], s7, 0x31);
-
- u2 = _mm256_permute2x128_si256(s7, s8, 0x20);
- u3 = _mm256_permute2x128_si256(s7, s8, 0x31);
-
- sig[3] = _mm256_unpacklo_epi16(u0, u2);
- sig[7] = _mm256_unpackhi_epi16(u0, u2);
-
- sig[11] = _mm256_unpacklo_epi16(u1, u3);
- sig[15] = _mm256_unpackhi_epi16(u1, u3);
-
- sig[16] = s8;
-}
-
-static INLINE void filter_16x9_pixels(const __m256i *sig, const __m256i *f,
- __m256i *y0, __m256i *y1) {
- __m256i res[4];
- int i;
- for (i = 0; i < 4; ++i) {
- filter_8x1_pixels(&sig[i << 2], f, &res[i]);
- }
-
- {
- const __m256i l0l1 = _mm256_packus_epi32(res[0], res[1]);
- const __m256i h0h1 = _mm256_packus_epi32(res[2], res[3]);
- *y0 = _mm256_permute2x128_si256(l0l1, h0h1, 0x20);
- *y1 = _mm256_permute2x128_si256(l0l1, h0h1, 0x31);
- }
-}
-
-static INLINE void store_16x2_pixels(const __m256i *y0, const __m256i *y1,
- const __m256i *mask, uint16_t *dst,
- ptrdiff_t pitch) {
- __m256i p = _mm256_min_epi16(*y0, *mask);
- _mm256_storeu_si256((__m256i *)dst, p);
- p = _mm256_min_epi16(*y1, *mask);
- _mm256_storeu_si256((__m256i *)(dst + pitch), p);
-}
-
-static void update_16x9_pixels(__m256i *sig) {
- update_pixels(&sig[0]);
- update_pixels(&sig[8]);
-}
-
-static void aom_highbd_filter_block1d16_v8_avx2(
- const uint16_t *src_ptr, ptrdiff_t src_pitch, uint16_t *dst_ptr,
- ptrdiff_t dst_pitch, uint32_t height, const int16_t *filter, int bd) {
- __m256i signal[17], res0, res1;
- const __m256i max = _mm256_set1_epi16((1 << bd) - 1);
-
- __m256i ff[4];
- pack_filters(filter, ff);
-
- pack_16x9_init(src_ptr, src_pitch, signal);
-
- do {
- pack_16x9_pixels(src_ptr, src_pitch, signal);
- filter_16x9_pixels(signal, ff, &res0, &res1);
- store_16x2_pixels(&res0, &res1, &max, dst_ptr, dst_pitch);
- update_16x9_pixels(signal);
-
- src_ptr += src_pitch << 1;
- dst_ptr += dst_pitch << 1;
- height -= 2;
- } while (height > 0);
-}
-
-// -----------------------------------------------------------------------------
-// 2-tap vertical filtering
-
-static void pack_16x2_init(const uint16_t *src, __m256i *sig) {
- sig[2] = _mm256_loadu_si256((const __m256i *)src);
-}
-
-static INLINE void pack_16x2_2t_pixels(const uint16_t *src, ptrdiff_t pitch,
- __m256i *sig) {
- // load the next row
- const __m256i u = _mm256_loadu_si256((const __m256i *)(src + pitch));
- sig[0] = _mm256_unpacklo_epi16(sig[2], u);
- sig[1] = _mm256_unpackhi_epi16(sig[2], u);
- sig[2] = u;
-}
-
-static INLINE void filter_16x2_2t_pixels(const __m256i *sig, const __m256i *f,
- __m256i *y0, __m256i *y1) {
- filter_16_2t_pixels(sig, f, y0, y1);
-}
-
-static void aom_highbd_filter_block1d16_v2_avx2(
- const uint16_t *src_ptr, ptrdiff_t src_pitch, uint16_t *dst_ptr,
- ptrdiff_t dst_pitch, uint32_t height, const int16_t *filter, int bd) {
- __m256i signal[3], res0, res1;
- const __m256i max = _mm256_set1_epi16((1 << bd) - 1);
- __m256i ff;
-
- pack_2t_filter(filter, &ff);
- pack_16x2_init(src_ptr, signal);
-
- do {
- pack_16x2_2t_pixels(src_ptr, src_pitch, signal);
- filter_16x2_2t_pixels(signal, &ff, &res0, &res1);
- store_16x1_pixels(&res0, &res1, &max, dst_ptr);
-
- src_ptr += src_pitch;
- dst_ptr += dst_pitch;
- height -= 1;
- } while (height > 0);
-}
-
-static INLINE void pack_8x1_2t_filter(const int16_t *filter, __m128i *f) {
- const __m128i h = _mm_loadu_si128((const __m128i *)filter);
- const __m128i p = _mm_set1_epi32(0x09080706);
- f[0] = _mm_shuffle_epi8(h, p);
-}
-
-static void pack_8x2_init(const uint16_t *src, __m128i *sig) {
- sig[2] = _mm_loadu_si128((const __m128i *)src);
-}
-
-static INLINE void pack_8x2_2t_pixels_ver(const uint16_t *src, ptrdiff_t pitch,
- __m128i *sig) {
- // load the next row
- const __m128i u = _mm_loadu_si128((const __m128i *)(src + pitch));
- sig[0] = _mm_unpacklo_epi16(sig[2], u);
- sig[1] = _mm_unpackhi_epi16(sig[2], u);
- sig[2] = u;
-}
-
-static INLINE void filter_8_2t_pixels(const __m128i *sig, const __m128i *f,
- __m128i *y0, __m128i *y1) {
- const __m128i rounding = _mm_set1_epi32(1 << (CONV8_ROUNDING_BITS - 1));
- __m128i x0 = _mm_madd_epi16(sig[0], *f);
- __m128i x1 = _mm_madd_epi16(sig[1], *f);
- x0 = _mm_add_epi32(x0, rounding);
- x1 = _mm_add_epi32(x1, rounding);
- *y0 = _mm_srai_epi32(x0, CONV8_ROUNDING_BITS);
- *y1 = _mm_srai_epi32(x1, CONV8_ROUNDING_BITS);
-}
-
-static INLINE void store_8x1_2t_pixels_ver(const __m128i *y0, const __m128i *y1,
- const __m128i *mask, uint16_t *dst) {
- __m128i res = _mm_packus_epi32(*y0, *y1);
- res = _mm_min_epi16(res, *mask);
- _mm_storeu_si128((__m128i *)dst, res);
-}
-
-static void aom_highbd_filter_block1d8_v2_avx2(
- const uint16_t *src_ptr, ptrdiff_t src_pitch, uint16_t *dst_ptr,
- ptrdiff_t dst_pitch, uint32_t height, const int16_t *filter, int bd) {
- __m128i signal[3], res0, res1;
- const __m128i max = _mm_set1_epi16((1 << bd) - 1);
- __m128i ff;
-
- pack_8x1_2t_filter(filter, &ff);
- pack_8x2_init(src_ptr, signal);
-
- do {
- pack_8x2_2t_pixels_ver(src_ptr, src_pitch, signal);
- filter_8_2t_pixels(signal, &ff, &res0, &res1);
- store_8x1_2t_pixels_ver(&res0, &res1, &max, dst_ptr);
-
- src_ptr += src_pitch;
- dst_ptr += dst_pitch;
- height -= 1;
- } while (height > 0);
-}
-
-void aom_highbd_filter_block1d4_h8_sse2(const uint16_t *, ptrdiff_t, uint16_t *,
- ptrdiff_t, uint32_t, const int16_t *,
- int);
-void aom_highbd_filter_block1d4_h2_sse2(const uint16_t *, ptrdiff_t, uint16_t *,
- ptrdiff_t, uint32_t, const int16_t *,
- int);
-void aom_highbd_filter_block1d4_v8_sse2(const uint16_t *, ptrdiff_t, uint16_t *,
- ptrdiff_t, uint32_t, const int16_t *,
- int);
-void aom_highbd_filter_block1d4_v2_sse2(const uint16_t *, ptrdiff_t, uint16_t *,
- ptrdiff_t, uint32_t, const int16_t *,
- int);
-#define aom_highbd_filter_block1d4_h8_avx2 aom_highbd_filter_block1d4_h8_sse2
-#define aom_highbd_filter_block1d4_h2_avx2 aom_highbd_filter_block1d4_h2_sse2
-#define aom_highbd_filter_block1d4_v8_avx2 aom_highbd_filter_block1d4_v8_sse2
-#define aom_highbd_filter_block1d4_v2_avx2 aom_highbd_filter_block1d4_v2_sse2
-
-HIGH_FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , avx2);
-HIGH_FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , avx2);
-
-#undef HIGHBD_FUNC
diff --git a/third_party/aom/aom_dsp/x86/highbd_convolve_ssse3.c b/third_party/aom/aom_dsp/x86/highbd_convolve_ssse3.c
deleted file mode 100644
index e7b33d1c4..000000000
--- a/third_party/aom/aom_dsp/x86/highbd_convolve_ssse3.c
+++ /dev/null
@@ -1,251 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <tmmintrin.h>
-#include <assert.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/x86/convolve_sse2.h"
-
-void av1_highbd_convolve_y_sr_ssse3(const uint16_t *src, int src_stride,
- uint16_t *dst, int dst_stride, int w, int h,
- const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y,
- const int subpel_x_q4,
- const int subpel_y_q4,
- ConvolveParams *conv_params, int bd) {
- int i, j;
- const int fo_vert = filter_params_y->taps / 2 - 1;
- const uint16_t *const src_ptr = src - fo_vert * src_stride;
- (void)filter_params_x;
- (void)subpel_x_q4;
- (void)conv_params;
-
- assert(conv_params->round_0 <= FILTER_BITS);
- assert(((conv_params->round_0 + conv_params->round_1) <= (FILTER_BITS + 1)) ||
- ((conv_params->round_0 + conv_params->round_1) == (2 * FILTER_BITS)));
-
- __m128i s[16], coeffs_y[4];
-
- const int bits = FILTER_BITS;
-
- const __m128i round_shift_bits = _mm_cvtsi32_si128(bits);
- const __m128i round_const_bits = _mm_set1_epi32((1 << bits) >> 1);
- const __m128i clip_pixel =
- _mm_set1_epi16(bd == 10 ? 1023 : (bd == 12 ? 4095 : 255));
- const __m128i zero = _mm_setzero_si128();
-
- prepare_coeffs(filter_params_y, subpel_y_q4, coeffs_y);
-
- for (j = 0; j < w; j += 8) {
- const uint16_t *data = &src_ptr[j];
- /* Vertical filter */
- {
- __m128i s0 = _mm_loadu_si128((__m128i *)(data + 0 * src_stride));
- __m128i s1 = _mm_loadu_si128((__m128i *)(data + 1 * src_stride));
- __m128i s2 = _mm_loadu_si128((__m128i *)(data + 2 * src_stride));
- __m128i s3 = _mm_loadu_si128((__m128i *)(data + 3 * src_stride));
- __m128i s4 = _mm_loadu_si128((__m128i *)(data + 4 * src_stride));
- __m128i s5 = _mm_loadu_si128((__m128i *)(data + 5 * src_stride));
- __m128i s6 = _mm_loadu_si128((__m128i *)(data + 6 * src_stride));
-
- s[0] = _mm_unpacklo_epi16(s0, s1);
- s[1] = _mm_unpacklo_epi16(s2, s3);
- s[2] = _mm_unpacklo_epi16(s4, s5);
-
- s[4] = _mm_unpackhi_epi16(s0, s1);
- s[5] = _mm_unpackhi_epi16(s2, s3);
- s[6] = _mm_unpackhi_epi16(s4, s5);
-
- s[0 + 8] = _mm_unpacklo_epi16(s1, s2);
- s[1 + 8] = _mm_unpacklo_epi16(s3, s4);
- s[2 + 8] = _mm_unpacklo_epi16(s5, s6);
-
- s[4 + 8] = _mm_unpackhi_epi16(s1, s2);
- s[5 + 8] = _mm_unpackhi_epi16(s3, s4);
- s[6 + 8] = _mm_unpackhi_epi16(s5, s6);
-
- for (i = 0; i < h; i += 2) {
- data = &src_ptr[i * src_stride + j];
-
- __m128i s7 = _mm_loadu_si128((__m128i *)(data + 7 * src_stride));
- __m128i s8 = _mm_loadu_si128((__m128i *)(data + 8 * src_stride));
-
- s[3] = _mm_unpacklo_epi16(s6, s7);
- s[7] = _mm_unpackhi_epi16(s6, s7);
-
- s[3 + 8] = _mm_unpacklo_epi16(s7, s8);
- s[7 + 8] = _mm_unpackhi_epi16(s7, s8);
-
- const __m128i res_a0 = convolve(s, coeffs_y);
- __m128i res_a_round0 = _mm_sra_epi32(
- _mm_add_epi32(res_a0, round_const_bits), round_shift_bits);
-
- const __m128i res_a1 = convolve(s + 8, coeffs_y);
- __m128i res_a_round1 = _mm_sra_epi32(
- _mm_add_epi32(res_a1, round_const_bits), round_shift_bits);
-
- if (w - j > 4) {
- const __m128i res_b0 = convolve(s + 4, coeffs_y);
- __m128i res_b_round0 = _mm_sra_epi32(
- _mm_add_epi32(res_b0, round_const_bits), round_shift_bits);
-
- const __m128i res_b1 = convolve(s + 4 + 8, coeffs_y);
- __m128i res_b_round1 = _mm_sra_epi32(
- _mm_add_epi32(res_b1, round_const_bits), round_shift_bits);
-
- __m128i res_16bit0 = _mm_packs_epi32(res_a_round0, res_b_round0);
- res_16bit0 = _mm_min_epi16(res_16bit0, clip_pixel);
- res_16bit0 = _mm_max_epi16(res_16bit0, zero);
-
- __m128i res_16bit1 = _mm_packs_epi32(res_a_round1, res_b_round1);
- res_16bit1 = _mm_min_epi16(res_16bit1, clip_pixel);
- res_16bit1 = _mm_max_epi16(res_16bit1, zero);
-
- _mm_storeu_si128((__m128i *)&dst[i * dst_stride + j], res_16bit0);
- _mm_storeu_si128((__m128i *)&dst[i * dst_stride + j + dst_stride],
- res_16bit1);
- } else if (w == 4) {
- res_a_round0 = _mm_packs_epi32(res_a_round0, res_a_round0);
- res_a_round0 = _mm_min_epi16(res_a_round0, clip_pixel);
- res_a_round0 = _mm_max_epi16(res_a_round0, zero);
-
- res_a_round1 = _mm_packs_epi32(res_a_round1, res_a_round1);
- res_a_round1 = _mm_min_epi16(res_a_round1, clip_pixel);
- res_a_round1 = _mm_max_epi16(res_a_round1, zero);
-
- _mm_storel_epi64((__m128i *)&dst[i * dst_stride + j], res_a_round0);
- _mm_storel_epi64((__m128i *)&dst[i * dst_stride + j + dst_stride],
- res_a_round1);
- } else {
- res_a_round0 = _mm_packs_epi32(res_a_round0, res_a_round0);
- res_a_round0 = _mm_min_epi16(res_a_round0, clip_pixel);
- res_a_round0 = _mm_max_epi16(res_a_round0, zero);
-
- res_a_round1 = _mm_packs_epi32(res_a_round1, res_a_round1);
- res_a_round1 = _mm_min_epi16(res_a_round1, clip_pixel);
- res_a_round1 = _mm_max_epi16(res_a_round1, zero);
-
- *((uint32_t *)(&dst[i * dst_stride + j])) =
- _mm_cvtsi128_si32(res_a_round0);
-
- *((uint32_t *)(&dst[i * dst_stride + j + dst_stride])) =
- _mm_cvtsi128_si32(res_a_round1);
- }
-
- s[0] = s[1];
- s[1] = s[2];
- s[2] = s[3];
-
- s[4] = s[5];
- s[5] = s[6];
- s[6] = s[7];
-
- s[0 + 8] = s[1 + 8];
- s[1 + 8] = s[2 + 8];
- s[2 + 8] = s[3 + 8];
-
- s[4 + 8] = s[5 + 8];
- s[5 + 8] = s[6 + 8];
- s[6 + 8] = s[7 + 8];
-
- s6 = s8;
- }
- }
- }
-}
-
-void av1_highbd_convolve_x_sr_ssse3(const uint16_t *src, int src_stride,
- uint16_t *dst, int dst_stride, int w, int h,
- const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y,
- const int subpel_x_q4,
- const int subpel_y_q4,
- ConvolveParams *conv_params, int bd) {
- int i, j;
- const int fo_horiz = filter_params_x->taps / 2 - 1;
- const uint16_t *const src_ptr = src - fo_horiz;
- (void)subpel_y_q4;
- (void)filter_params_y;
-
- // Check that, even with 12-bit input, the intermediate values will fit
- // into an unsigned 16-bit intermediate array.
- assert(bd + FILTER_BITS + 2 - conv_params->round_0 <= 16);
-
- __m128i s[4], coeffs_x[4];
-
- const __m128i round_const_x =
- _mm_set1_epi32(((1 << conv_params->round_0) >> 1));
- const __m128i round_shift_x = _mm_cvtsi32_si128(conv_params->round_0);
-
- const int bits = FILTER_BITS - conv_params->round_0;
-
- const __m128i round_shift_bits = _mm_cvtsi32_si128(bits);
- const __m128i round_const_bits = _mm_set1_epi32((1 << bits) >> 1);
- const __m128i clip_pixel =
- _mm_set1_epi16(bd == 10 ? 1023 : (bd == 12 ? 4095 : 255));
- const __m128i zero = _mm_setzero_si128();
-
- prepare_coeffs(filter_params_x, subpel_x_q4, coeffs_x);
-
- for (j = 0; j < w; j += 8) {
- /* Horizontal filter */
- {
- for (i = 0; i < h; i += 1) {
- const __m128i row00 =
- _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j]);
- const __m128i row01 =
- _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + (j + 8)]);
-
- // even pixels
- s[0] = _mm_alignr_epi8(row01, row00, 0);
- s[1] = _mm_alignr_epi8(row01, row00, 4);
- s[2] = _mm_alignr_epi8(row01, row00, 8);
- s[3] = _mm_alignr_epi8(row01, row00, 12);
-
- __m128i res_even = convolve(s, coeffs_x);
- res_even = _mm_sra_epi32(_mm_add_epi32(res_even, round_const_x),
- round_shift_x);
-
- // odd pixels
- s[0] = _mm_alignr_epi8(row01, row00, 2);
- s[1] = _mm_alignr_epi8(row01, row00, 6);
- s[2] = _mm_alignr_epi8(row01, row00, 10);
- s[3] = _mm_alignr_epi8(row01, row00, 14);
-
- __m128i res_odd = convolve(s, coeffs_x);
- res_odd =
- _mm_sra_epi32(_mm_add_epi32(res_odd, round_const_x), round_shift_x);
-
- res_even = _mm_sra_epi32(_mm_add_epi32(res_even, round_const_bits),
- round_shift_bits);
- res_odd = _mm_sra_epi32(_mm_add_epi32(res_odd, round_const_bits),
- round_shift_bits);
-
- __m128i res_even1 = _mm_packs_epi32(res_even, res_even);
- __m128i res_odd1 = _mm_packs_epi32(res_odd, res_odd);
- __m128i res = _mm_unpacklo_epi16(res_even1, res_odd1);
-
- res = _mm_min_epi16(res, clip_pixel);
- res = _mm_max_epi16(res, zero);
-
- if (w - j > 4) {
- _mm_storeu_si128((__m128i *)&dst[i * dst_stride + j], res);
- } else if (w == 4) {
- _mm_storel_epi64((__m128i *)&dst[i * dst_stride + j], res);
- } else {
- *((uint32_t *)(&dst[i * dst_stride + j])) = _mm_cvtsi128_si32(res);
- }
- }
- }
- }
-}
diff --git a/third_party/aom/aom_dsp/x86/highbd_intrapred_sse2.c b/third_party/aom/aom_dsp/x86/highbd_intrapred_sse2.c
deleted file mode 100644
index 5a55736c4..000000000
--- a/third_party/aom/aom_dsp/x86/highbd_intrapred_sse2.c
+++ /dev/null
@@ -1,984 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <emmintrin.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-// -----------------------------------------------------------------------------
-// H_PRED
-
-void aom_highbd_h_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- const __m128i left_u16 = _mm_loadl_epi64((const __m128i *)left);
- const __m128i row0 = _mm_shufflelo_epi16(left_u16, 0x0);
- const __m128i row1 = _mm_shufflelo_epi16(left_u16, 0x55);
- const __m128i row2 = _mm_shufflelo_epi16(left_u16, 0xaa);
- const __m128i row3 = _mm_shufflelo_epi16(left_u16, 0xff);
- (void)above;
- (void)bd;
- _mm_storel_epi64((__m128i *)dst, row0);
- dst += stride;
- _mm_storel_epi64((__m128i *)dst, row1);
- dst += stride;
- _mm_storel_epi64((__m128i *)dst, row2);
- dst += stride;
- _mm_storel_epi64((__m128i *)dst, row3);
-}
-
-void aom_highbd_h_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- aom_highbd_h_predictor_4x4_sse2(dst, stride, above, left, bd);
- dst += stride << 2;
- left += 4;
- aom_highbd_h_predictor_4x4_sse2(dst, stride, above, left, bd);
-}
-
-void aom_highbd_h_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- const __m128i left_u16 = _mm_load_si128((const __m128i *)left);
- const __m128i row0 = _mm_shufflelo_epi16(left_u16, 0x0);
- const __m128i row1 = _mm_shufflelo_epi16(left_u16, 0x55);
- const __m128i row2 = _mm_shufflelo_epi16(left_u16, 0xaa);
- const __m128i row3 = _mm_shufflelo_epi16(left_u16, 0xff);
- (void)above;
- (void)bd;
- _mm_store_si128((__m128i *)dst, _mm_unpacklo_epi64(row0, row0));
- dst += stride;
- _mm_store_si128((__m128i *)dst, _mm_unpacklo_epi64(row1, row1));
- dst += stride;
- _mm_store_si128((__m128i *)dst, _mm_unpacklo_epi64(row2, row2));
- dst += stride;
- _mm_store_si128((__m128i *)dst, _mm_unpacklo_epi64(row3, row3));
-}
-
-void aom_highbd_h_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- const __m128i left_u16 = _mm_load_si128((const __m128i *)left);
- const __m128i row0 = _mm_shufflelo_epi16(left_u16, 0x0);
- const __m128i row1 = _mm_shufflelo_epi16(left_u16, 0x55);
- const __m128i row2 = _mm_shufflelo_epi16(left_u16, 0xaa);
- const __m128i row3 = _mm_shufflelo_epi16(left_u16, 0xff);
- const __m128i row4 = _mm_shufflehi_epi16(left_u16, 0x0);
- const __m128i row5 = _mm_shufflehi_epi16(left_u16, 0x55);
- const __m128i row6 = _mm_shufflehi_epi16(left_u16, 0xaa);
- const __m128i row7 = _mm_shufflehi_epi16(left_u16, 0xff);
- (void)above;
- (void)bd;
- _mm_store_si128((__m128i *)dst, _mm_unpacklo_epi64(row0, row0));
- dst += stride;
- _mm_store_si128((__m128i *)dst, _mm_unpacklo_epi64(row1, row1));
- dst += stride;
- _mm_store_si128((__m128i *)dst, _mm_unpacklo_epi64(row2, row2));
- dst += stride;
- _mm_store_si128((__m128i *)dst, _mm_unpacklo_epi64(row3, row3));
- dst += stride;
- _mm_store_si128((__m128i *)dst, _mm_unpackhi_epi64(row4, row4));
- dst += stride;
- _mm_store_si128((__m128i *)dst, _mm_unpackhi_epi64(row5, row5));
- dst += stride;
- _mm_store_si128((__m128i *)dst, _mm_unpackhi_epi64(row6, row6));
- dst += stride;
- _mm_store_si128((__m128i *)dst, _mm_unpackhi_epi64(row7, row7));
-}
-
-void aom_highbd_h_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- aom_highbd_h_predictor_8x8_sse2(dst, stride, above, left, bd);
- dst += stride << 3;
- left += 8;
- aom_highbd_h_predictor_8x8_sse2(dst, stride, above, left, bd);
-}
-
-static INLINE void h_store_16_unpacklo(uint16_t **dst, const ptrdiff_t stride,
- const __m128i *row) {
- const __m128i val = _mm_unpacklo_epi64(*row, *row);
- _mm_store_si128((__m128i *)*dst, val);
- _mm_store_si128((__m128i *)(*dst + 8), val);
- *dst += stride;
-}
-
-static INLINE void h_store_16_unpackhi(uint16_t **dst, const ptrdiff_t stride,
- const __m128i *row) {
- const __m128i val = _mm_unpackhi_epi64(*row, *row);
- _mm_store_si128((__m128i *)(*dst), val);
- _mm_store_si128((__m128i *)(*dst + 8), val);
- *dst += stride;
-}
-
-static INLINE void h_predictor_16x8(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *left) {
- const __m128i left_u16 = _mm_load_si128((const __m128i *)left);
- const __m128i row0 = _mm_shufflelo_epi16(left_u16, 0x0);
- const __m128i row1 = _mm_shufflelo_epi16(left_u16, 0x55);
- const __m128i row2 = _mm_shufflelo_epi16(left_u16, 0xaa);
- const __m128i row3 = _mm_shufflelo_epi16(left_u16, 0xff);
- const __m128i row4 = _mm_shufflehi_epi16(left_u16, 0x0);
- const __m128i row5 = _mm_shufflehi_epi16(left_u16, 0x55);
- const __m128i row6 = _mm_shufflehi_epi16(left_u16, 0xaa);
- const __m128i row7 = _mm_shufflehi_epi16(left_u16, 0xff);
- h_store_16_unpacklo(&dst, stride, &row0);
- h_store_16_unpacklo(&dst, stride, &row1);
- h_store_16_unpacklo(&dst, stride, &row2);
- h_store_16_unpacklo(&dst, stride, &row3);
- h_store_16_unpackhi(&dst, stride, &row4);
- h_store_16_unpackhi(&dst, stride, &row5);
- h_store_16_unpackhi(&dst, stride, &row6);
- h_store_16_unpackhi(&dst, stride, &row7);
-}
-
-void aom_highbd_h_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- (void)above;
- (void)bd;
- h_predictor_16x8(dst, stride, left);
-}
-
-void aom_highbd_h_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- int i;
- (void)above;
- (void)bd;
-
- for (i = 0; i < 2; i++, left += 8) {
- h_predictor_16x8(dst, stride, left);
- dst += stride << 3;
- }
-}
-
-void aom_highbd_h_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- int i;
- (void)above;
- (void)bd;
-
- for (i = 0; i < 4; i++, left += 8) {
- h_predictor_16x8(dst, stride, left);
- dst += stride << 3;
- }
-}
-
-static INLINE void h_store_32_unpacklo(uint16_t **dst, const ptrdiff_t stride,
- const __m128i *row) {
- const __m128i val = _mm_unpacklo_epi64(*row, *row);
- _mm_store_si128((__m128i *)(*dst), val);
- _mm_store_si128((__m128i *)(*dst + 8), val);
- _mm_store_si128((__m128i *)(*dst + 16), val);
- _mm_store_si128((__m128i *)(*dst + 24), val);
- *dst += stride;
-}
-
-static INLINE void h_store_32_unpackhi(uint16_t **dst, const ptrdiff_t stride,
- const __m128i *row) {
- const __m128i val = _mm_unpackhi_epi64(*row, *row);
- _mm_store_si128((__m128i *)(*dst), val);
- _mm_store_si128((__m128i *)(*dst + 8), val);
- _mm_store_si128((__m128i *)(*dst + 16), val);
- _mm_store_si128((__m128i *)(*dst + 24), val);
- *dst += stride;
-}
-
-static INLINE void h_predictor_32x8(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *left) {
- const __m128i left_u16 = _mm_load_si128((const __m128i *)left);
- const __m128i row0 = _mm_shufflelo_epi16(left_u16, 0x0);
- const __m128i row1 = _mm_shufflelo_epi16(left_u16, 0x55);
- const __m128i row2 = _mm_shufflelo_epi16(left_u16, 0xaa);
- const __m128i row3 = _mm_shufflelo_epi16(left_u16, 0xff);
- const __m128i row4 = _mm_shufflehi_epi16(left_u16, 0x0);
- const __m128i row5 = _mm_shufflehi_epi16(left_u16, 0x55);
- const __m128i row6 = _mm_shufflehi_epi16(left_u16, 0xaa);
- const __m128i row7 = _mm_shufflehi_epi16(left_u16, 0xff);
- h_store_32_unpacklo(&dst, stride, &row0);
- h_store_32_unpacklo(&dst, stride, &row1);
- h_store_32_unpacklo(&dst, stride, &row2);
- h_store_32_unpacklo(&dst, stride, &row3);
- h_store_32_unpackhi(&dst, stride, &row4);
- h_store_32_unpackhi(&dst, stride, &row5);
- h_store_32_unpackhi(&dst, stride, &row6);
- h_store_32_unpackhi(&dst, stride, &row7);
-}
-
-void aom_highbd_h_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- int i;
- (void)above;
- (void)bd;
-
- for (i = 0; i < 2; i++, left += 8) {
- h_predictor_32x8(dst, stride, left);
- dst += stride << 3;
- }
-}
-
-void aom_highbd_h_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- int i;
- (void)above;
- (void)bd;
-
- for (i = 0; i < 4; i++, left += 8) {
- h_predictor_32x8(dst, stride, left);
- dst += stride << 3;
- }
-}
-
-// -----------------------------------------------------------------------------
-// DC_TOP, DC_LEFT, DC_128
-
-// 4x4
-
-static INLINE __m128i dc_sum_4(const uint16_t *ref) {
- const __m128i _dcba = _mm_loadl_epi64((const __m128i *)ref);
- const __m128i _xxdc = _mm_shufflelo_epi16(_dcba, 0xe);
- const __m128i a = _mm_add_epi16(_dcba, _xxdc);
- return _mm_add_epi16(a, _mm_shufflelo_epi16(a, 0x1));
-}
-
-static INLINE void dc_store_4x4(uint16_t *dst, ptrdiff_t stride,
- const __m128i *dc) {
- const __m128i dc_dup = _mm_shufflelo_epi16(*dc, 0x0);
- int i;
- for (i = 0; i < 4; ++i, dst += stride) {
- _mm_storel_epi64((__m128i *)dst, dc_dup);
- }
-}
-
-void aom_highbd_dc_left_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- const __m128i two = _mm_cvtsi32_si128(2);
- const __m128i sum = dc_sum_4(left);
- const __m128i dc = _mm_srli_epi16(_mm_add_epi16(sum, two), 2);
- (void)above;
- (void)bd;
- dc_store_4x4(dst, stride, &dc);
-}
-
-void aom_highbd_dc_top_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- const __m128i two = _mm_cvtsi32_si128(2);
- const __m128i sum = dc_sum_4(above);
- const __m128i dc = _mm_srli_epi16(_mm_add_epi16(sum, two), 2);
- (void)left;
- (void)bd;
- dc_store_4x4(dst, stride, &dc);
-}
-
-void aom_highbd_dc_128_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- const __m128i dc = _mm_cvtsi32_si128(1 << (bd - 1));
- const __m128i dc_dup = _mm_shufflelo_epi16(dc, 0x0);
- (void)above;
- (void)left;
- dc_store_4x4(dst, stride, &dc_dup);
-}
-
-// -----------------------------------------------------------------------------
-// 4x8
-
-static INLINE void dc_store_4x8(uint16_t *dst, ptrdiff_t stride,
- const __m128i *dc) {
- const __m128i dc_dup = _mm_shufflelo_epi16(*dc, 0x0);
- int i;
- for (i = 0; i < 8; ++i, dst += stride) {
- _mm_storel_epi64((__m128i *)dst, dc_dup);
- }
-}
-
-// Shared with DC 8xh
-static INLINE __m128i dc_sum_8(const uint16_t *ref) {
- const __m128i ref_u16 = _mm_load_si128((const __m128i *)ref);
- const __m128i _dcba = _mm_add_epi16(ref_u16, _mm_srli_si128(ref_u16, 8));
- const __m128i _xxdc = _mm_shufflelo_epi16(_dcba, 0xe);
- const __m128i a = _mm_add_epi16(_dcba, _xxdc);
-
- return _mm_add_epi16(a, _mm_shufflelo_epi16(a, 0x1));
-}
-
-void aom_highbd_dc_left_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- const __m128i sum = dc_sum_8(left);
- const __m128i four = _mm_cvtsi32_si128(4);
- const __m128i dc = _mm_srli_epi16(_mm_add_epi16(sum, four), 3);
- (void)above;
- (void)bd;
- dc_store_4x8(dst, stride, &dc);
-}
-
-void aom_highbd_dc_top_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- const __m128i two = _mm_cvtsi32_si128(2);
- const __m128i sum = dc_sum_4(above);
- const __m128i dc = _mm_srli_epi16(_mm_add_epi16(sum, two), 2);
- (void)left;
- (void)bd;
- dc_store_4x8(dst, stride, &dc);
-}
-
-void aom_highbd_dc_128_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- const __m128i dc = _mm_cvtsi32_si128(1 << (bd - 1));
- const __m128i dc_dup = _mm_shufflelo_epi16(dc, 0x0);
- (void)above;
- (void)left;
- dc_store_4x8(dst, stride, &dc_dup);
-}
-
-// -----------------------------------------------------------------------------
-// 8xh
-
-static INLINE void dc_store_8xh(uint16_t *dst, ptrdiff_t stride, int height,
- const __m128i *dc) {
- const __m128i dc_dup_lo = _mm_shufflelo_epi16(*dc, 0);
- const __m128i dc_dup = _mm_unpacklo_epi64(dc_dup_lo, dc_dup_lo);
- int i;
- for (i = 0; i < height; ++i, dst += stride) {
- _mm_store_si128((__m128i *)dst, dc_dup);
- }
-}
-
-// -----------------------------------------------------------------------------
-// DC_TOP
-
-static INLINE void dc_top_predictor_8xh(uint16_t *dst, ptrdiff_t stride,
- int height, const uint16_t *above) {
- const __m128i four = _mm_cvtsi32_si128(4);
- const __m128i sum = dc_sum_8(above);
- const __m128i dc = _mm_srli_epi16(_mm_add_epi16(sum, four), 3);
- dc_store_8xh(dst, stride, height, &dc);
-}
-
-void aom_highbd_dc_top_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- (void)left;
- (void)bd;
- dc_top_predictor_8xh(dst, stride, 4, above);
-}
-
-void aom_highbd_dc_top_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- (void)left;
- (void)bd;
- dc_top_predictor_8xh(dst, stride, 8, above);
-}
-
-void aom_highbd_dc_top_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- (void)left;
- (void)bd;
- dc_top_predictor_8xh(dst, stride, 16, above);
-}
-
-// -----------------------------------------------------------------------------
-// DC_LEFT
-
-void aom_highbd_dc_left_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- const __m128i two = _mm_cvtsi32_si128(2);
- const __m128i sum = dc_sum_4(left);
- const __m128i dc = _mm_srli_epi16(_mm_add_epi16(sum, two), 2);
- (void)above;
- (void)bd;
- dc_store_8xh(dst, stride, 4, &dc);
-}
-
-void aom_highbd_dc_left_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- const __m128i four = _mm_cvtsi32_si128(4);
- const __m128i sum = dc_sum_8(left);
- const __m128i dc = _mm_srli_epi16(_mm_add_epi16(sum, four), 3);
- (void)above;
- (void)bd;
- dc_store_8xh(dst, stride, 8, &dc);
-}
-
-// Shared with DC 16xh
-static INLINE __m128i dc_sum_16(const uint16_t *ref) {
- const __m128i sum_lo = dc_sum_8(ref);
- const __m128i sum_hi = dc_sum_8(ref + 8);
- return _mm_add_epi16(sum_lo, sum_hi);
-}
-
-void aom_highbd_dc_left_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- const __m128i eight = _mm_cvtsi32_si128(8);
- const __m128i sum = dc_sum_16(left);
- const __m128i dc = _mm_srli_epi16(_mm_add_epi16(sum, eight), 4);
- (void)above;
- (void)bd;
- dc_store_8xh(dst, stride, 16, &dc);
-}
-
-// -----------------------------------------------------------------------------
-// DC_128
-
-static INLINE void dc_128_predictor_8xh(uint16_t *dst, ptrdiff_t stride,
- int height, int bd) {
- const __m128i dc = _mm_cvtsi32_si128(1 << (bd - 1));
- const __m128i dc_dup = _mm_shufflelo_epi16(dc, 0x0);
- dc_store_8xh(dst, stride, height, &dc_dup);
-}
-
-void aom_highbd_dc_128_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- (void)above;
- (void)left;
- dc_128_predictor_8xh(dst, stride, 4, bd);
-}
-
-void aom_highbd_dc_128_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- (void)above;
- (void)left;
- dc_128_predictor_8xh(dst, stride, 8, bd);
-}
-
-void aom_highbd_dc_128_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- (void)above;
- (void)left;
- dc_128_predictor_8xh(dst, stride, 16, bd);
-}
-
-// -----------------------------------------------------------------------------
-// 16xh
-
-static INLINE void dc_store_16xh(uint16_t *dst, ptrdiff_t stride, int height,
- const __m128i *dc) {
- const __m128i dc_dup_lo = _mm_shufflelo_epi16(*dc, 0);
- const __m128i dc_dup = _mm_unpacklo_epi64(dc_dup_lo, dc_dup_lo);
- int i;
- for (i = 0; i < height; ++i, dst += stride) {
- _mm_store_si128((__m128i *)dst, dc_dup);
- _mm_store_si128((__m128i *)(dst + 8), dc_dup);
- }
-}
-
-// -----------------------------------------------------------------------------
-// DC_LEFT
-
-void aom_highbd_dc_left_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- const __m128i four = _mm_cvtsi32_si128(4);
- const __m128i sum = dc_sum_8(left);
- const __m128i dc = _mm_srli_epi16(_mm_add_epi16(sum, four), 3);
- (void)above;
- (void)bd;
- dc_store_16xh(dst, stride, 8, &dc);
-}
-
-void aom_highbd_dc_left_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- const __m128i eight = _mm_cvtsi32_si128(8);
- const __m128i sum = dc_sum_16(left);
- const __m128i dc = _mm_srli_epi16(_mm_add_epi16(sum, eight), 4);
- (void)above;
- (void)bd;
- dc_store_16xh(dst, stride, 16, &dc);
-}
-
-// Shared with 32xh
-static INLINE __m128i dc_sum_32(const uint16_t *ref) {
- const __m128i zero = _mm_setzero_si128();
- const __m128i sum_a = dc_sum_16(ref);
- const __m128i sum_b = dc_sum_16(ref + 16);
- // 12 bit bd will outrange, so expand to 32 bit before adding final total
- return _mm_add_epi32(_mm_unpacklo_epi16(sum_a, zero),
- _mm_unpacklo_epi16(sum_b, zero));
-}
-
-void aom_highbd_dc_left_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- const __m128i sixteen = _mm_cvtsi32_si128(16);
- const __m128i sum = dc_sum_32(left);
- const __m128i dc = _mm_srli_epi32(_mm_add_epi32(sum, sixteen), 5);
- (void)above;
- (void)bd;
- dc_store_16xh(dst, stride, 32, &dc);
-}
-
-// -----------------------------------------------------------------------------
-// DC_TOP
-
-void aom_highbd_dc_top_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- const __m128i eight = _mm_cvtsi32_si128(8);
- const __m128i sum = dc_sum_16(above);
- const __m128i dc = _mm_srli_epi16(_mm_add_epi16(sum, eight), 4);
- (void)left;
- (void)bd;
- dc_store_16xh(dst, stride, 8, &dc);
-}
-
-void aom_highbd_dc_top_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- const __m128i eight = _mm_cvtsi32_si128(8);
- const __m128i sum = dc_sum_16(above);
- const __m128i dc = _mm_srli_epi16(_mm_add_epi16(sum, eight), 4);
- (void)left;
- (void)bd;
- dc_store_16xh(dst, stride, 16, &dc);
-}
-
-void aom_highbd_dc_top_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- const __m128i eight = _mm_cvtsi32_si128(8);
- const __m128i sum = dc_sum_16(above);
- const __m128i dc = _mm_srli_epi16(_mm_add_epi16(sum, eight), 4);
- (void)left;
- (void)bd;
- dc_store_16xh(dst, stride, 32, &dc);
-}
-
-// -----------------------------------------------------------------------------
-// DC_128
-
-void aom_highbd_dc_128_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- const __m128i dc = _mm_cvtsi32_si128(1 << (bd - 1));
- const __m128i dc_dup = _mm_shufflelo_epi16(dc, 0x0);
- (void)above;
- (void)left;
- dc_store_16xh(dst, stride, 8, &dc_dup);
-}
-
-void aom_highbd_dc_128_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- const __m128i dc = _mm_cvtsi32_si128(1 << (bd - 1));
- const __m128i dc_dup = _mm_shufflelo_epi16(dc, 0x0);
- (void)above;
- (void)left;
- dc_store_16xh(dst, stride, 16, &dc_dup);
-}
-
-void aom_highbd_dc_128_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- const __m128i dc = _mm_cvtsi32_si128(1 << (bd - 1));
- const __m128i dc_dup = _mm_shufflelo_epi16(dc, 0x0);
- (void)above;
- (void)left;
- dc_store_16xh(dst, stride, 32, &dc_dup);
-}
-
-// -----------------------------------------------------------------------------
-// 32xh
-
-static INLINE void dc_store_32xh(uint16_t *dst, ptrdiff_t stride, int height,
- const __m128i *dc) {
- const __m128i dc_dup_lo = _mm_shufflelo_epi16(*dc, 0);
- const __m128i dc_dup = _mm_unpacklo_epi64(dc_dup_lo, dc_dup_lo);
- int i;
- for (i = 0; i < height; ++i, dst += stride) {
- _mm_store_si128((__m128i *)dst, dc_dup);
- _mm_store_si128((__m128i *)(dst + 8), dc_dup);
- _mm_store_si128((__m128i *)(dst + 16), dc_dup);
- _mm_store_si128((__m128i *)(dst + 24), dc_dup);
- }
-}
-
-void aom_highbd_dc_left_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- const __m128i eight = _mm_cvtsi32_si128(8);
- const __m128i sum = dc_sum_16(left);
- const __m128i dc = _mm_srli_epi16(_mm_add_epi16(sum, eight), 4);
- (void)above;
- (void)bd;
- dc_store_32xh(dst, stride, 16, &dc);
-}
-
-void aom_highbd_dc_left_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- const __m128i sixteen = _mm_cvtsi32_si128(16);
- const __m128i sum = dc_sum_32(left);
- const __m128i dc = _mm_srli_epi32(_mm_add_epi32(sum, sixteen), 5);
- (void)above;
- (void)bd;
- dc_store_32xh(dst, stride, 32, &dc);
-}
-
-void aom_highbd_dc_top_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- const __m128i sixteen = _mm_cvtsi32_si128(16);
- const __m128i sum = dc_sum_32(above);
- const __m128i dc = _mm_srli_epi32(_mm_add_epi32(sum, sixteen), 5);
- (void)left;
- (void)bd;
- dc_store_32xh(dst, stride, 16, &dc);
-}
-
-void aom_highbd_dc_128_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- const __m128i dc = _mm_cvtsi32_si128(1 << (bd - 1));
- const __m128i dc_dup = _mm_shufflelo_epi16(dc, 0x0);
- (void)above;
- (void)left;
- dc_store_32xh(dst, stride, 16, &dc_dup);
-}
-
-void aom_highbd_dc_top_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- const __m128i sixteen = _mm_cvtsi32_si128(16);
- const __m128i sum = dc_sum_32(above);
- const __m128i dc = _mm_srli_epi32(_mm_add_epi32(sum, sixteen), 5);
- (void)left;
- (void)bd;
- dc_store_32xh(dst, stride, 32, &dc);
-}
-
-void aom_highbd_dc_128_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- const __m128i dc = _mm_cvtsi32_si128(1 << (bd - 1));
- const __m128i dc_dup = _mm_shufflelo_epi16(dc, 0x0);
- (void)above;
- (void)left;
- dc_store_32xh(dst, stride, 32, &dc_dup);
-}
-
-// -----------------------------------------------------------------------------
-// V_PRED
-
-void aom_highbd_v_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- (void)left;
- (void)bd;
- const __m128i above_u16 = _mm_loadl_epi64((const __m128i *)above);
- int i;
- for (i = 0; i < 2; ++i) {
- _mm_storel_epi64((__m128i *)dst, above_u16);
- _mm_storel_epi64((__m128i *)(dst + stride), above_u16);
- _mm_storel_epi64((__m128i *)(dst + 2 * stride), above_u16);
- _mm_storel_epi64((__m128i *)(dst + 3 * stride), above_u16);
- dst += stride << 2;
- }
-}
-
-void aom_highbd_v_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- (void)left;
- (void)bd;
- const __m128i above_u16 = _mm_load_si128((const __m128i *)above);
- _mm_store_si128((__m128i *)dst, above_u16);
- _mm_store_si128((__m128i *)(dst + stride), above_u16);
- _mm_store_si128((__m128i *)(dst + 2 * stride), above_u16);
- _mm_store_si128((__m128i *)(dst + 3 * stride), above_u16);
-}
-
-void aom_highbd_v_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- (void)left;
- (void)bd;
- const __m128i above_u16 = _mm_load_si128((const __m128i *)above);
- int i;
- for (i = 0; i < 4; ++i) {
- _mm_store_si128((__m128i *)dst, above_u16);
- _mm_store_si128((__m128i *)(dst + stride), above_u16);
- _mm_store_si128((__m128i *)(dst + 2 * stride), above_u16);
- _mm_store_si128((__m128i *)(dst + 3 * stride), above_u16);
- dst += stride << 2;
- }
-}
-
-void aom_highbd_v_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- (void)left;
- (void)bd;
- const __m128i above0_u16 = _mm_load_si128((const __m128i *)above);
- const __m128i above1_u16 = _mm_load_si128((const __m128i *)(above + 8));
- int i;
- for (i = 0; i < 2; ++i) {
- _mm_store_si128((__m128i *)dst, above0_u16);
- _mm_store_si128((__m128i *)(dst + 8), above1_u16);
- dst += stride;
- _mm_store_si128((__m128i *)dst, above0_u16);
- _mm_store_si128((__m128i *)(dst + 8), above1_u16);
- dst += stride;
- _mm_store_si128((__m128i *)dst, above0_u16);
- _mm_store_si128((__m128i *)(dst + 8), above1_u16);
- dst += stride;
- _mm_store_si128((__m128i *)dst, above0_u16);
- _mm_store_si128((__m128i *)(dst + 8), above1_u16);
- dst += stride;
- }
-}
-
-void aom_highbd_v_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- (void)left;
- (void)bd;
- const __m128i above0_u16 = _mm_load_si128((const __m128i *)above);
- const __m128i above1_u16 = _mm_load_si128((const __m128i *)(above + 8));
- int i;
- for (i = 0; i < 8; ++i) {
- _mm_store_si128((__m128i *)dst, above0_u16);
- _mm_store_si128((__m128i *)(dst + 8), above1_u16);
- dst += stride;
- _mm_store_si128((__m128i *)dst, above0_u16);
- _mm_store_si128((__m128i *)(dst + 8), above1_u16);
- dst += stride;
- _mm_store_si128((__m128i *)dst, above0_u16);
- _mm_store_si128((__m128i *)(dst + 8), above1_u16);
- dst += stride;
- _mm_store_si128((__m128i *)dst, above0_u16);
- _mm_store_si128((__m128i *)(dst + 8), above1_u16);
- dst += stride;
- }
-}
-
-void aom_highbd_v_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- (void)left;
- (void)bd;
- const __m128i above0_u16 = _mm_load_si128((const __m128i *)above);
- const __m128i above1_u16 = _mm_load_si128((const __m128i *)(above + 8));
- const __m128i above2_u16 = _mm_load_si128((const __m128i *)(above + 16));
- const __m128i above3_u16 = _mm_load_si128((const __m128i *)(above + 24));
- int i;
- for (i = 0; i < 4; ++i) {
- _mm_store_si128((__m128i *)dst, above0_u16);
- _mm_store_si128((__m128i *)(dst + 8), above1_u16);
- _mm_store_si128((__m128i *)(dst + 16), above2_u16);
- _mm_store_si128((__m128i *)(dst + 24), above3_u16);
- dst += stride;
- _mm_store_si128((__m128i *)dst, above0_u16);
- _mm_store_si128((__m128i *)(dst + 8), above1_u16);
- _mm_store_si128((__m128i *)(dst + 16), above2_u16);
- _mm_store_si128((__m128i *)(dst + 24), above3_u16);
- dst += stride;
- _mm_store_si128((__m128i *)dst, above0_u16);
- _mm_store_si128((__m128i *)(dst + 8), above1_u16);
- _mm_store_si128((__m128i *)(dst + 16), above2_u16);
- _mm_store_si128((__m128i *)(dst + 24), above3_u16);
- dst += stride;
- _mm_store_si128((__m128i *)dst, above0_u16);
- _mm_store_si128((__m128i *)(dst + 8), above1_u16);
- _mm_store_si128((__m128i *)(dst + 16), above2_u16);
- _mm_store_si128((__m128i *)(dst + 24), above3_u16);
- dst += stride;
- }
-}
-
-// -----------------------------------------------------------------------------
-// DC_PRED
-
-void aom_highbd_dc_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- (void)bd;
- const __m128i sum_above = dc_sum_4(above);
- const __m128i sum_left = dc_sum_8(left);
- const __m128i sum = _mm_add_epi16(sum_above, sum_left);
- uint32_t sum32 = _mm_cvtsi128_si32(sum);
- sum32 >>= 16;
- sum32 += 6;
- sum32 /= 12;
- const __m128i row = _mm_set1_epi16((uint16_t)sum32);
- int i;
- for (i = 0; i < 4; ++i) {
- _mm_storel_epi64((__m128i *)dst, row);
- dst += stride;
- _mm_storel_epi64((__m128i *)dst, row);
- dst += stride;
- }
-}
-
-void aom_highbd_dc_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- (void)bd;
- const __m128i sum_left = dc_sum_4(left);
- const __m128i sum_above = dc_sum_8(above);
- const __m128i sum = _mm_add_epi16(sum_above, sum_left);
- uint32_t sum32 = _mm_cvtsi128_si32(sum);
- sum32 >>= 16;
- sum32 += 6;
- sum32 /= 12;
- const __m128i row = _mm_set1_epi16((uint16_t)sum32);
-
- _mm_store_si128((__m128i *)dst, row);
- dst += stride;
- _mm_store_si128((__m128i *)dst, row);
- dst += stride;
- _mm_store_si128((__m128i *)dst, row);
- dst += stride;
- _mm_store_si128((__m128i *)dst, row);
-}
-
-void aom_highbd_dc_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- (void)bd;
- __m128i sum_left = dc_sum_16(left);
- __m128i sum_above = dc_sum_8(above);
- const __m128i zero = _mm_setzero_si128();
- sum_left = _mm_unpacklo_epi16(sum_left, zero);
- sum_above = _mm_unpacklo_epi16(sum_above, zero);
- const __m128i sum = _mm_add_epi32(sum_left, sum_above);
- uint32_t sum32 = _mm_cvtsi128_si32(sum);
- sum32 += 12;
- sum32 /= 24;
- const __m128i row = _mm_set1_epi16((uint16_t)sum32);
- int i;
- for (i = 0; i < 4; ++i) {
- _mm_store_si128((__m128i *)dst, row);
- dst += stride;
- _mm_store_si128((__m128i *)dst, row);
- dst += stride;
- _mm_store_si128((__m128i *)dst, row);
- dst += stride;
- _mm_store_si128((__m128i *)dst, row);
- dst += stride;
- }
-}
-
-void aom_highbd_dc_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- (void)bd;
- __m128i sum_left = dc_sum_8(left);
- __m128i sum_above = dc_sum_16(above);
- const __m128i zero = _mm_setzero_si128();
- sum_left = _mm_unpacklo_epi16(sum_left, zero);
- sum_above = _mm_unpacklo_epi16(sum_above, zero);
- const __m128i sum = _mm_add_epi32(sum_left, sum_above);
- uint32_t sum32 = _mm_cvtsi128_si32(sum);
- sum32 += 12;
- sum32 /= 24;
- const __m128i row = _mm_set1_epi16((uint16_t)sum32);
- int i;
- for (i = 0; i < 2; ++i) {
- _mm_store_si128((__m128i *)dst, row);
- _mm_store_si128((__m128i *)(dst + 8), row);
- dst += stride;
- _mm_store_si128((__m128i *)dst, row);
- _mm_store_si128((__m128i *)(dst + 8), row);
- dst += stride;
- _mm_store_si128((__m128i *)dst, row);
- _mm_store_si128((__m128i *)(dst + 8), row);
- dst += stride;
- _mm_store_si128((__m128i *)dst, row);
- _mm_store_si128((__m128i *)(dst + 8), row);
- dst += stride;
- }
-}
-
-void aom_highbd_dc_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- (void)bd;
- __m128i sum_left = dc_sum_32(left);
- __m128i sum_above = dc_sum_16(above);
- const __m128i zero = _mm_setzero_si128();
- sum_above = _mm_unpacklo_epi16(sum_above, zero);
- const __m128i sum = _mm_add_epi32(sum_left, sum_above);
- uint32_t sum32 = _mm_cvtsi128_si32(sum);
- sum32 += 24;
- sum32 /= 48;
- const __m128i row = _mm_set1_epi16((uint16_t)sum32);
- int i;
- for (i = 0; i < 8; ++i) {
- _mm_store_si128((__m128i *)dst, row);
- _mm_store_si128((__m128i *)(dst + 8), row);
- dst += stride;
- _mm_store_si128((__m128i *)dst, row);
- _mm_store_si128((__m128i *)(dst + 8), row);
- dst += stride;
- _mm_store_si128((__m128i *)dst, row);
- _mm_store_si128((__m128i *)(dst + 8), row);
- dst += stride;
- _mm_store_si128((__m128i *)dst, row);
- _mm_store_si128((__m128i *)(dst + 8), row);
- dst += stride;
- }
-}
-
-void aom_highbd_dc_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- (void)bd;
- __m128i sum_left = dc_sum_16(left);
- __m128i sum_above = dc_sum_32(above);
- const __m128i zero = _mm_setzero_si128();
- sum_left = _mm_unpacklo_epi16(sum_left, zero);
- const __m128i sum = _mm_add_epi32(sum_left, sum_above);
- uint32_t sum32 = _mm_cvtsi128_si32(sum);
- sum32 += 24;
- sum32 /= 48;
- const __m128i row = _mm_set1_epi16((uint16_t)sum32);
- int i;
- for (i = 0; i < 4; ++i) {
- _mm_store_si128((__m128i *)dst, row);
- _mm_store_si128((__m128i *)(dst + 8), row);
- _mm_store_si128((__m128i *)(dst + 16), row);
- _mm_store_si128((__m128i *)(dst + 24), row);
- dst += stride;
- _mm_store_si128((__m128i *)dst, row);
- _mm_store_si128((__m128i *)(dst + 8), row);
- _mm_store_si128((__m128i *)(dst + 16), row);
- _mm_store_si128((__m128i *)(dst + 24), row);
- dst += stride;
- _mm_store_si128((__m128i *)dst, row);
- _mm_store_si128((__m128i *)(dst + 8), row);
- _mm_store_si128((__m128i *)(dst + 16), row);
- _mm_store_si128((__m128i *)(dst + 24), row);
- dst += stride;
- _mm_store_si128((__m128i *)dst, row);
- _mm_store_si128((__m128i *)(dst + 8), row);
- _mm_store_si128((__m128i *)(dst + 16), row);
- _mm_store_si128((__m128i *)(dst + 24), row);
- dst += stride;
- }
-}
diff --git a/third_party/aom/aom_dsp/x86/highbd_intrapred_sse2_asm.asm b/third_party/aom/aom_dsp/x86/highbd_intrapred_sse2_asm.asm
deleted file mode 100644
index 91b3d126c..000000000
--- a/third_party/aom/aom_dsp/x86/highbd_intrapred_sse2_asm.asm
+++ /dev/null
@@ -1,259 +0,0 @@
-;
-; Copyright (c) 2016, Alliance for Open Media. All rights reserved
-;
-; This source code is subject to the terms of the BSD 2 Clause License and
-; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-; was not distributed with this source code in the LICENSE file, you can
-; obtain it at www.aomedia.org/license/software. If the Alliance for Open
-; Media Patent License 1.0 was not distributed with this source code in the
-; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-;
-
-;
-
-%include "third_party/x86inc/x86inc.asm"
-
-SECTION_RODATA
-pw_4: times 8 dw 4
-pw_8: times 8 dw 8
-pw_16: times 4 dd 16
-pw_32: times 4 dd 32
-
-SECTION .text
-INIT_XMM sse2
-cglobal highbd_dc_predictor_4x4, 4, 5, 4, dst, stride, above, left, goffset
- GET_GOT goffsetq
-
- movq m0, [aboveq]
- movq m2, [leftq]
- paddw m0, m2
- pshuflw m1, m0, 0xe
- paddw m0, m1
- pshuflw m1, m0, 0x1
- paddw m0, m1
- paddw m0, [GLOBAL(pw_4)]
- psraw m0, 3
- pshuflw m0, m0, 0x0
- movq [dstq ], m0
- movq [dstq+strideq*2], m0
- lea dstq, [dstq+strideq*4]
- movq [dstq ], m0
- movq [dstq+strideq*2], m0
-
- RESTORE_GOT
- RET
-
-INIT_XMM sse2
-cglobal highbd_dc_predictor_8x8, 4, 5, 4, dst, stride, above, left, goffset
- GET_GOT goffsetq
-
- pxor m1, m1
- mova m0, [aboveq]
- mova m2, [leftq]
- DEFINE_ARGS dst, stride, stride3, one
- mov oned, 0x00010001
- lea stride3q, [strideq*3]
- movd m3, oned
- pshufd m3, m3, 0x0
- paddw m0, m2
- pmaddwd m0, m3
- packssdw m0, m1
- pmaddwd m0, m3
- packssdw m0, m1
- pmaddwd m0, m3
- paddw m0, [GLOBAL(pw_8)]
- psrlw m0, 4
- pshuflw m0, m0, 0x0
- punpcklqdq m0, m0
- mova [dstq ], m0
- mova [dstq+strideq*2 ], m0
- mova [dstq+strideq*4 ], m0
- mova [dstq+stride3q*2], m0
- lea dstq, [dstq+strideq*8]
- mova [dstq ], m0
- mova [dstq+strideq*2 ], m0
- mova [dstq+strideq*4 ], m0
- mova [dstq+stride3q*2], m0
-
- RESTORE_GOT
- RET
-
-INIT_XMM sse2
-cglobal highbd_dc_predictor_16x16, 4, 5, 5, dst, stride, above, left, goffset
- GET_GOT goffsetq
-
- pxor m1, m1
- mova m0, [aboveq]
- mova m3, [aboveq+16]
- mova m2, [leftq]
- mova m4, [leftq+16]
- DEFINE_ARGS dst, stride, stride3, lines4
- lea stride3q, [strideq*3]
- mov lines4d, 4
- paddw m0, m2
- paddw m0, m3
- paddw m0, m4
- movhlps m2, m0
- paddw m0, m2
- punpcklwd m0, m1
- movhlps m2, m0
- paddd m0, m2
- punpckldq m0, m1
- movhlps m2, m0
- paddd m0, m2
- paddd m0, [GLOBAL(pw_16)]
- psrad m0, 5
- pshuflw m0, m0, 0x0
- punpcklqdq m0, m0
-.loop:
- mova [dstq ], m0
- mova [dstq +16], m0
- mova [dstq+strideq*2 ], m0
- mova [dstq+strideq*2 +16], m0
- mova [dstq+strideq*4 ], m0
- mova [dstq+strideq*4 +16], m0
- mova [dstq+stride3q*2 ], m0
- mova [dstq+stride3q*2+16], m0
- lea dstq, [dstq+strideq*8]
- dec lines4d
- jnz .loop
-
- RESTORE_GOT
- REP_RET
-
-INIT_XMM sse2
-cglobal highbd_dc_predictor_32x32, 4, 5, 7, dst, stride, above, left, goffset
- GET_GOT goffsetq
-
- mova m0, [aboveq]
- mova m2, [aboveq+16]
- mova m3, [aboveq+32]
- mova m4, [aboveq+48]
- paddw m0, m2
- paddw m3, m4
- mova m2, [leftq]
- mova m4, [leftq+16]
- mova m5, [leftq+32]
- mova m6, [leftq+48]
- paddw m2, m4
- paddw m5, m6
- paddw m0, m3
- paddw m2, m5
- pxor m1, m1
- paddw m0, m2
- DEFINE_ARGS dst, stride, stride3, lines4
- lea stride3q, [strideq*3]
- mov lines4d, 8
- movhlps m2, m0
- paddw m0, m2
- punpcklwd m0, m1
- movhlps m2, m0
- paddd m0, m2
- punpckldq m0, m1
- movhlps m2, m0
- paddd m0, m2
- paddd m0, [GLOBAL(pw_32)]
- psrad m0, 6
- pshuflw m0, m0, 0x0
- punpcklqdq m0, m0
-.loop:
- mova [dstq ], m0
- mova [dstq +16 ], m0
- mova [dstq +32 ], m0
- mova [dstq +48 ], m0
- mova [dstq+strideq*2 ], m0
- mova [dstq+strideq*2+16 ], m0
- mova [dstq+strideq*2+32 ], m0
- mova [dstq+strideq*2+48 ], m0
- mova [dstq+strideq*4 ], m0
- mova [dstq+strideq*4+16 ], m0
- mova [dstq+strideq*4+32 ], m0
- mova [dstq+strideq*4+48 ], m0
- mova [dstq+stride3q*2 ], m0
- mova [dstq+stride3q*2 +16], m0
- mova [dstq+stride3q*2 +32], m0
- mova [dstq+stride3q*2 +48], m0
- lea dstq, [dstq+strideq*8]
- dec lines4d
- jnz .loop
-
- RESTORE_GOT
- REP_RET
-
-INIT_XMM sse2
-cglobal highbd_v_predictor_4x4, 3, 3, 1, dst, stride, above
- movq m0, [aboveq]
- movq [dstq ], m0
- movq [dstq+strideq*2], m0
- lea dstq, [dstq+strideq*4]
- movq [dstq ], m0
- movq [dstq+strideq*2], m0
- RET
-
-INIT_XMM sse2
-cglobal highbd_v_predictor_8x8, 3, 3, 1, dst, stride, above
- mova m0, [aboveq]
- DEFINE_ARGS dst, stride, stride3
- lea stride3q, [strideq*3]
- mova [dstq ], m0
- mova [dstq+strideq*2 ], m0
- mova [dstq+strideq*4 ], m0
- mova [dstq+stride3q*2], m0
- lea dstq, [dstq+strideq*8]
- mova [dstq ], m0
- mova [dstq+strideq*2 ], m0
- mova [dstq+strideq*4 ], m0
- mova [dstq+stride3q*2], m0
- RET
-
-INIT_XMM sse2
-cglobal highbd_v_predictor_16x16, 3, 4, 2, dst, stride, above
- mova m0, [aboveq]
- mova m1, [aboveq+16]
- DEFINE_ARGS dst, stride, stride3, nlines4
- lea stride3q, [strideq*3]
- mov nlines4d, 4
-.loop:
- mova [dstq ], m0
- mova [dstq +16], m1
- mova [dstq+strideq*2 ], m0
- mova [dstq+strideq*2 +16], m1
- mova [dstq+strideq*4 ], m0
- mova [dstq+strideq*4 +16], m1
- mova [dstq+stride3q*2 ], m0
- mova [dstq+stride3q*2+16], m1
- lea dstq, [dstq+strideq*8]
- dec nlines4d
- jnz .loop
- REP_RET
-
-INIT_XMM sse2
-cglobal highbd_v_predictor_32x32, 3, 4, 4, dst, stride, above
- mova m0, [aboveq]
- mova m1, [aboveq+16]
- mova m2, [aboveq+32]
- mova m3, [aboveq+48]
- DEFINE_ARGS dst, stride, stride3, nlines4
- lea stride3q, [strideq*3]
- mov nlines4d, 8
-.loop:
- mova [dstq ], m0
- mova [dstq +16], m1
- mova [dstq +32], m2
- mova [dstq +48], m3
- mova [dstq+strideq*2 ], m0
- mova [dstq+strideq*2 +16], m1
- mova [dstq+strideq*2 +32], m2
- mova [dstq+strideq*2 +48], m3
- mova [dstq+strideq*4 ], m0
- mova [dstq+strideq*4 +16], m1
- mova [dstq+strideq*4 +32], m2
- mova [dstq+strideq*4 +48], m3
- mova [dstq+stride3q*2 ], m0
- mova [dstq+stride3q*2 +16], m1
- mova [dstq+stride3q*2 +32], m2
- mova [dstq+stride3q*2 +48], m3
- lea dstq, [dstq+strideq*8]
- dec nlines4d
- jnz .loop
- REP_RET
diff --git a/third_party/aom/aom_dsp/x86/highbd_loopfilter_avx2.c b/third_party/aom/aom_dsp/x86/highbd_loopfilter_avx2.c
deleted file mode 100644
index c954da94e..000000000
--- a/third_party/aom/aom_dsp/x86/highbd_loopfilter_avx2.c
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <immintrin.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/x86/common_avx2.h"
-#include "aom_dsp/x86/lpf_common_sse2.h"
-#include "aom/aom_integer.h"
-
-void aom_highbd_lpf_horizontal_14_dual_avx2(
- uint16_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0,
- const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1,
- const uint8_t *thresh1, int bd) {
- aom_highbd_lpf_horizontal_14_dual_sse2(s, p, blimit0, limit0, thresh0,
- blimit1, limit1, thresh1, bd);
-}
-
-void aom_highbd_lpf_vertical_14_dual_avx2(
- uint16_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0,
- const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1,
- const uint8_t *thresh1, int bd) {
- aom_highbd_lpf_vertical_14_dual_sse2(s, p, blimit0, limit0, thresh0, blimit1,
- limit1, thresh1, bd);
-}
-
-void aom_highbd_lpf_horizontal_4_dual_avx2(
- uint16_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0,
- const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1,
- const uint8_t *thresh1, int bd) {
- aom_highbd_lpf_horizontal_4_dual_sse2(s, p, blimit0, limit0, thresh0, blimit1,
- limit1, thresh1, bd);
-}
-
-void aom_highbd_lpf_horizontal_8_dual_avx2(
- uint16_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0,
- const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1,
- const uint8_t *thresh1, int bd) {
- aom_highbd_lpf_horizontal_8_dual_sse2(s, p, blimit0, limit0, thresh0, blimit1,
- limit1, thresh1, bd);
-}
-
-void aom_highbd_lpf_vertical_4_dual_avx2(
- uint16_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0,
- const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1,
- const uint8_t *thresh1, int bd) {
- aom_highbd_lpf_vertical_4_dual_sse2(s, p, blimit0, limit0, thresh0, blimit1,
- limit1, thresh1, bd);
-}
-
-void aom_highbd_lpf_vertical_8_dual_avx2(
- uint16_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0,
- const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1,
- const uint8_t *thresh1, int bd) {
- aom_highbd_lpf_vertical_8_dual_sse2(s, p, blimit0, limit0, thresh0, blimit1,
- limit1, thresh1, bd);
-}
diff --git a/third_party/aom/aom_dsp/x86/highbd_loopfilter_sse2.c b/third_party/aom/aom_dsp/x86/highbd_loopfilter_sse2.c
deleted file mode 100644
index 097e0778f..000000000
--- a/third_party/aom/aom_dsp/x86/highbd_loopfilter_sse2.c
+++ /dev/null
@@ -1,1697 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <emmintrin.h> // SSE2
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/x86/lpf_common_sse2.h"
-
-static AOM_FORCE_INLINE void pixel_clamp(const __m128i *min, const __m128i *max,
- __m128i *pixel) {
- *pixel = _mm_min_epi16(*pixel, *max);
- *pixel = _mm_max_epi16(*pixel, *min);
-}
-
-static AOM_FORCE_INLINE __m128i abs_diff16(__m128i a, __m128i b) {
- return _mm_or_si128(_mm_subs_epu16(a, b), _mm_subs_epu16(b, a));
-}
-
-static INLINE void get_limit(const uint8_t *bl, const uint8_t *l,
- const uint8_t *t, int bd, __m128i *blt,
- __m128i *lt, __m128i *thr, __m128i *t80_out) {
- const int shift = bd - 8;
- const __m128i zero = _mm_setzero_si128();
-
- __m128i x = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)bl), zero);
- *blt = _mm_slli_epi16(x, shift);
-
- x = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)l), zero);
- *lt = _mm_slli_epi16(x, shift);
-
- x = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)t), zero);
- *thr = _mm_slli_epi16(x, shift);
-
- *t80_out = _mm_set1_epi16(1 << (bd - 1));
-}
-
-static INLINE void get_limit_dual(
- const uint8_t *_blimit0, const uint8_t *_limit0, const uint8_t *_thresh0,
- const uint8_t *_blimit1, const uint8_t *_limit1, const uint8_t *_thresh1,
- int bd, __m128i *blt_out, __m128i *lt_out, __m128i *thr_out,
- __m128i *t80_out) {
- const int shift = bd - 8;
- const __m128i zero = _mm_setzero_si128();
-
- __m128i x0 =
- _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_blimit0), zero);
- __m128i x1 =
- _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_blimit1), zero);
- x0 = _mm_unpacklo_epi64(x0, x1);
- *blt_out = _mm_slli_epi16(x0, shift);
-
- x0 = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_limit0), zero);
- x1 = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_limit1), zero);
- x0 = _mm_unpacklo_epi64(x0, x1);
- *lt_out = _mm_slli_epi16(x0, shift);
-
- x0 = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_thresh0), zero);
- x1 = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_thresh1), zero);
- x0 = _mm_unpacklo_epi64(x0, x1);
- *thr_out = _mm_slli_epi16(x0, shift);
-
- *t80_out = _mm_set1_epi16(1 << (bd - 1));
-}
-
-static INLINE void load_highbd_pixel(const uint16_t *s, int size, int pitch,
- __m128i *p, __m128i *q) {
- int i;
- for (i = 0; i < size; i++) {
- p[i] = _mm_loadu_si128((__m128i *)(s - (i + 1) * pitch));
- q[i] = _mm_loadu_si128((__m128i *)(s + i * pitch));
- }
-}
-
-static INLINE void highbd_filter_mask_dual(const __m128i *p, const __m128i *q,
- const __m128i *l, const __m128i *bl,
- __m128i *mask) {
- __m128i abs_p0q0 = abs_diff16(p[0], q[0]);
- __m128i abs_p1q1 = abs_diff16(p[1], q[1]);
- abs_p0q0 = _mm_adds_epu16(abs_p0q0, abs_p0q0);
- abs_p1q1 = _mm_srli_epi16(abs_p1q1, 1);
-
- const __m128i zero = _mm_setzero_si128();
- const __m128i one = _mm_set1_epi16(1);
- const __m128i ffff = _mm_set1_epi16(0xFFFF);
-
- __m128i max = _mm_subs_epu16(_mm_adds_epu16(abs_p0q0, abs_p1q1), *bl);
- max = _mm_xor_si128(_mm_cmpeq_epi16(max, zero), ffff);
- max = _mm_and_si128(max, _mm_adds_epu16(*l, one));
-
- int i;
- for (i = 1; i < 4; ++i) {
- max = _mm_max_epi16(max, abs_diff16(p[i], p[i - 1]));
- max = _mm_max_epi16(max, abs_diff16(q[i], q[i - 1]));
- }
- max = _mm_subs_epu16(max, *l);
- *mask = _mm_cmpeq_epi16(max, zero); // return ~mask
-}
-
-static INLINE void highbd_hev_filter_mask_x_sse2(__m128i *pq, int x,
- __m128i *p1p0, __m128i *q1q0,
- __m128i *abs_p1p0, __m128i *l,
- __m128i *bl, __m128i *t,
- __m128i *hev, __m128i *mask) {
- const __m128i zero = _mm_setzero_si128();
- const __m128i one = _mm_set1_epi16(1);
- const __m128i ffff = _mm_set1_epi16(0xFFFF);
- __m128i abs_p0q0_p1q1, abs_p0q0, abs_p1q1, abs_q1q0;
- __m128i max, max01, h;
-
- *p1p0 = _mm_unpacklo_epi64(pq[0], pq[1]);
- *q1q0 = _mm_unpackhi_epi64(pq[0], pq[1]);
-
- abs_p0q0_p1q1 = abs_diff16(*p1p0, *q1q0);
- abs_p0q0 = _mm_adds_epu16(abs_p0q0_p1q1, abs_p0q0_p1q1);
- abs_p0q0 = _mm_unpacklo_epi64(abs_p0q0, zero);
-
- abs_p1q1 = _mm_srli_si128(abs_p0q0_p1q1, 8);
- abs_p1q1 = _mm_srli_epi16(abs_p1q1, 1); // divide by 2
-
- max = _mm_subs_epu16(_mm_adds_epu16(abs_p0q0, abs_p1q1), *bl);
- max = _mm_xor_si128(_mm_cmpeq_epi16(max, zero), ffff);
- // mask |= (abs(*p0 - *q0) * 2 + abs(*p1 - *q1) / 2 > blimit) * -1;
- // So taking maximums continues to work:
- max = _mm_and_si128(max, _mm_adds_epu16(*l, one));
-
- *abs_p1p0 = abs_diff16(pq[0], pq[1]);
- abs_q1q0 = _mm_srli_si128(*abs_p1p0, 8);
- max01 = _mm_max_epi16(*abs_p1p0, abs_q1q0);
- // mask |= (abs(*p1 - *p0) > limit) * -1;
- // mask |= (abs(*q1 - *q0) > limit) * -1;
- h = _mm_subs_epu16(max01, *t);
-
- *hev = _mm_xor_si128(_mm_cmpeq_epi16(h, zero), ffff);
- // replicate for the further "merged variables" usage
- *hev = _mm_unpacklo_epi64(*hev, *hev);
-
- max = _mm_max_epi16(max, max01);
- int i;
- for (i = 2; i < x; ++i) {
- max = _mm_max_epi16(max, abs_diff16(pq[i], pq[i - 1]));
- }
- max = _mm_max_epi16(max, _mm_srli_si128(max, 8));
-
- max = _mm_subs_epu16(max, *l);
- *mask = _mm_cmpeq_epi16(max, zero); // ~mask
-}
-
-static INLINE void flat_mask_internal(const __m128i *th, const __m128i *pq,
- int start, int end, __m128i *flat) {
- int i;
- __m128i max = _mm_max_epi16(abs_diff16(pq[start], pq[0]),
- abs_diff16(pq[start + 1], pq[0]));
-
- for (i = start + 2; i < end; ++i) {
- max = _mm_max_epi16(max, abs_diff16(pq[i], pq[0]));
- }
- max = _mm_max_epi16(max, _mm_srli_si128(max, 8));
-
- __m128i ft;
- ft = _mm_subs_epu16(max, *th);
-
- const __m128i zero = _mm_setzero_si128();
- *flat = _mm_cmpeq_epi16(ft, zero);
-}
-
-static INLINE void flat_mask_internal_dual(const __m128i *th, const __m128i *p,
- const __m128i *q, int start, int end,
- __m128i *flat) {
- int i;
- __m128i max =
- _mm_max_epi16(abs_diff16(q[start], q[0]), abs_diff16(p[start], p[0]));
-
- for (i = start + 1; i < end; ++i) {
- max = _mm_max_epi16(max, abs_diff16(p[i], p[0]));
- max = _mm_max_epi16(max, abs_diff16(q[i], q[0]));
- }
-
- __m128i ft;
- ft = _mm_subs_epu16(max, *th);
-
- const __m128i zero = _mm_setzero_si128();
- *flat = _mm_cmpeq_epi16(ft, zero);
-}
-
-static INLINE void highbd_flat_mask4_sse2(__m128i *pq, __m128i *flat,
- __m128i *flat2, int bd) {
- // check the distance 1,2,3 against 0
- __m128i th = _mm_set1_epi16(1);
- th = _mm_slli_epi16(th, bd - 8);
- flat_mask_internal(&th, pq, 1, 4, flat);
- flat_mask_internal(&th, pq, 4, 7, flat2);
-}
-
-static INLINE void highbd_flat_mask4_dual_sse2(const __m128i *p,
- const __m128i *q, __m128i *flat,
- __m128i *flat2, int bd) {
- // check the distance 1,2,3 against 0
- __m128i th = _mm_set1_epi16(1);
- th = _mm_slli_epi16(th, bd - 8);
- flat_mask_internal_dual(&th, p, q, 1, 4, flat);
- flat_mask_internal_dual(&th, p, q, 4, 7, flat2);
-}
-
-static AOM_FORCE_INLINE void highbd_filter4_sse2(__m128i *p1p0, __m128i *q1q0,
- __m128i *hev, __m128i *mask,
- __m128i *qs1qs0,
- __m128i *ps1ps0, __m128i *t80,
- int bd) {
- const __m128i zero = _mm_setzero_si128();
- const __m128i one = _mm_set1_epi16(1);
- const __m128i pmax =
- _mm_subs_epi16(_mm_subs_epi16(_mm_slli_epi16(one, bd), one), *t80);
- const __m128i pmin = _mm_subs_epi16(zero, *t80);
-
- const __m128i t3t4 = _mm_set_epi16(3, 3, 3, 3, 4, 4, 4, 4);
- __m128i ps1ps0_work, qs1qs0_work, work;
- __m128i filt, filter2filter1, filter2filt, filter1filt;
-
- ps1ps0_work = _mm_subs_epi16(*p1p0, *t80);
- qs1qs0_work = _mm_subs_epi16(*q1q0, *t80);
-
- work = _mm_subs_epi16(ps1ps0_work, qs1qs0_work);
- pixel_clamp(&pmin, &pmax, &work);
- filt = _mm_and_si128(_mm_srli_si128(work, 8), *hev);
-
- filt = _mm_subs_epi16(filt, work);
- filt = _mm_subs_epi16(filt, work);
- filt = _mm_subs_epi16(filt, work);
- // (aom_filter + 3 * (qs0 - ps0)) & mask
- pixel_clamp(&pmin, &pmax, &filt);
- filt = _mm_and_si128(filt, *mask);
- filt = _mm_unpacklo_epi64(filt, filt);
-
- filter2filter1 = _mm_adds_epi16(filt, t3t4); /* signed_short_clamp */
- pixel_clamp(&pmin, &pmax, &filter2filter1);
- filter2filter1 = _mm_srai_epi16(filter2filter1, 3); /* >> 3 */
-
- filt = _mm_unpacklo_epi64(filter2filter1, filter2filter1);
-
- // filt >> 1
- filt = _mm_adds_epi16(filt, one);
- filt = _mm_srai_epi16(filt, 1);
- filt = _mm_andnot_si128(*hev, filt);
-
- filter2filt = _mm_unpackhi_epi64(filter2filter1, filt);
- filter1filt = _mm_unpacklo_epi64(filter2filter1, filt);
-
- qs1qs0_work = _mm_subs_epi16(qs1qs0_work, filter1filt);
- ps1ps0_work = _mm_adds_epi16(ps1ps0_work, filter2filt);
-
- pixel_clamp(&pmin, &pmax, &qs1qs0_work);
- pixel_clamp(&pmin, &pmax, &ps1ps0_work);
-
- *qs1qs0 = _mm_adds_epi16(qs1qs0_work, *t80);
- *ps1ps0 = _mm_adds_epi16(ps1ps0_work, *t80);
-}
-
-static INLINE void highbd_filter4_dual_sse2(__m128i *p, __m128i *q, __m128i *ps,
- __m128i *qs, const __m128i *mask,
- const __m128i *th, int bd,
- __m128i *t80) {
- __m128i ps0 = _mm_subs_epi16(p[0], *t80);
- __m128i ps1 = _mm_subs_epi16(p[1], *t80);
- __m128i qs0 = _mm_subs_epi16(q[0], *t80);
- __m128i qs1 = _mm_subs_epi16(q[1], *t80);
- const __m128i one = _mm_set1_epi16(1);
- const __m128i pmax =
- _mm_subs_epi16(_mm_subs_epi16(_mm_slli_epi16(one, bd), one), *t80);
-
- const __m128i zero = _mm_setzero_si128();
- const __m128i pmin = _mm_subs_epi16(zero, *t80);
- __m128i filter = _mm_subs_epi16(ps1, qs1);
- pixel_clamp(&pmin, &pmax, &filter);
-
- // hev_filter
- __m128i hev;
- const __m128i abs_p1p0 = abs_diff16(p[1], p[0]);
- const __m128i abs_q1q0 = abs_diff16(q[1], q[0]);
- __m128i h = _mm_max_epi16(abs_p1p0, abs_q1q0);
- h = _mm_subs_epu16(h, *th);
- const __m128i ffff = _mm_cmpeq_epi16(h, h);
- hev = _mm_xor_si128(_mm_cmpeq_epi16(h, zero), ffff);
-
- filter = _mm_and_si128(filter, hev);
-
- const __m128i x = _mm_subs_epi16(qs0, ps0);
- filter = _mm_adds_epi16(filter, x);
- filter = _mm_adds_epi16(filter, x);
- filter = _mm_adds_epi16(filter, x);
- pixel_clamp(&pmin, &pmax, &filter);
- filter = _mm_and_si128(filter, *mask);
- const __m128i t3 = _mm_set1_epi16(3);
- const __m128i t4 = _mm_set1_epi16(4);
- __m128i filter1 = _mm_adds_epi16(filter, t4);
- __m128i filter2 = _mm_adds_epi16(filter, t3);
- pixel_clamp(&pmin, &pmax, &filter1);
- pixel_clamp(&pmin, &pmax, &filter2);
- filter1 = _mm_srai_epi16(filter1, 3);
- filter2 = _mm_srai_epi16(filter2, 3);
- qs0 = _mm_subs_epi16(qs0, filter1);
- pixel_clamp(&pmin, &pmax, &qs0);
- ps0 = _mm_adds_epi16(ps0, filter2);
- pixel_clamp(&pmin, &pmax, &ps0);
- qs[0] = _mm_adds_epi16(qs0, *t80);
- ps[0] = _mm_adds_epi16(ps0, *t80);
- filter = _mm_adds_epi16(filter1, one);
- filter = _mm_srai_epi16(filter, 1);
- filter = _mm_andnot_si128(hev, filter);
- qs1 = _mm_subs_epi16(qs1, filter);
- pixel_clamp(&pmin, &pmax, &qs1);
- ps1 = _mm_adds_epi16(ps1, filter);
- pixel_clamp(&pmin, &pmax, &ps1);
- qs[1] = _mm_adds_epi16(qs1, *t80);
- ps[1] = _mm_adds_epi16(ps1, *t80);
-}
-
-static AOM_FORCE_INLINE void highbd_lpf_internal_14_sse2(
- __m128i *p, __m128i *q, __m128i *pq, const unsigned char *blt,
- const unsigned char *lt, const unsigned char *thr, int bd) {
- int i;
- const __m128i zero = _mm_setzero_si128();
- __m128i blimit, limit, thresh;
- __m128i t80;
- get_limit(blt, lt, thr, bd, &blimit, &limit, &thresh, &t80);
-
- for (i = 0; i < 7; i++) {
- pq[i] = _mm_unpacklo_epi64(p[i], q[i]);
- }
- __m128i mask, hevhev;
- __m128i p1p0, q1q0, abs_p1p0;
-
- highbd_hev_filter_mask_x_sse2(pq, 4, &p1p0, &q1q0, &abs_p1p0, &limit, &blimit,
- &thresh, &hevhev, &mask);
-
- __m128i ps0ps1, qs0qs1;
- // filter4
- highbd_filter4_sse2(&p1p0, &q1q0, &hevhev, &mask, &qs0qs1, &ps0ps1, &t80, bd);
-
- __m128i flat, flat2;
- highbd_flat_mask4_sse2(pq, &flat, &flat2, bd);
-
- flat = _mm_and_si128(flat, mask);
- flat2 = _mm_and_si128(flat2, flat);
-
- // replicate for the further "merged variables" usage
- flat = _mm_unpacklo_epi64(flat, flat);
- flat2 = _mm_unpacklo_epi64(flat2, flat2);
-
- // flat and wide flat calculations
-
- // if flat ==0 then flat2 is zero as well and we don't need any calc below
- // sse4.1 if (0==_mm_test_all_zeros(flat,ff))
- if (0xffff != _mm_movemask_epi8(_mm_cmpeq_epi16(flat, zero))) {
- __m128i flat_p[3], flat_q[3], flat_pq[3];
- __m128i flat2_p[6], flat2_q[6];
- __m128i flat2_pq[6];
- __m128i sum_p6, sum_p3;
- const __m128i eight = _mm_set1_epi16(8);
- const __m128i four = _mm_set1_epi16(4);
-
- __m128i work0, work0_0, work0_1, sum_p_0;
- __m128i sum_p = _mm_add_epi16(pq[5], _mm_add_epi16(pq[4], pq[3]));
- __m128i sum_lp = _mm_add_epi16(pq[0], _mm_add_epi16(pq[2], pq[1]));
- sum_p = _mm_add_epi16(sum_p, sum_lp);
-
- __m128i sum_lq = _mm_srli_si128(sum_lp, 8);
- __m128i sum_q = _mm_srli_si128(sum_p, 8);
-
- sum_p_0 = _mm_add_epi16(eight, _mm_add_epi16(sum_p, sum_q));
- sum_lp = _mm_add_epi16(four, _mm_add_epi16(sum_lp, sum_lq));
-
- flat_p[0] = _mm_add_epi16(sum_lp, _mm_add_epi16(pq[3], pq[0]));
- flat_q[0] = _mm_add_epi16(sum_lp, _mm_add_epi16(q[3], q[0]));
-
- sum_p6 = _mm_add_epi16(pq[6], pq[6]);
- sum_p3 = _mm_add_epi16(pq[3], pq[3]);
-
- sum_q = _mm_sub_epi16(sum_p_0, pq[5]);
- sum_p = _mm_sub_epi16(sum_p_0, q[5]);
-
- work0_0 = _mm_add_epi16(_mm_add_epi16(pq[6], pq[0]), pq[1]);
- work0_1 = _mm_add_epi16(sum_p6,
- _mm_add_epi16(pq[1], _mm_add_epi16(pq[2], pq[0])));
-
- sum_lq = _mm_sub_epi16(sum_lp, pq[2]);
- sum_lp = _mm_sub_epi16(sum_lp, q[2]);
-
- work0 = _mm_add_epi16(sum_p3, pq[1]);
- flat_p[1] = _mm_add_epi16(sum_lp, work0);
- flat_q[1] = _mm_add_epi16(sum_lq, _mm_srli_si128(work0, 8));
-
- flat_pq[0] = _mm_srli_epi16(_mm_unpacklo_epi64(flat_p[0], flat_q[0]), 3);
- flat_pq[1] = _mm_srli_epi16(_mm_unpacklo_epi64(flat_p[1], flat_q[1]), 3);
-
- sum_lp = _mm_sub_epi16(sum_lp, q[1]);
- sum_lq = _mm_sub_epi16(sum_lq, pq[1]);
-
- sum_p3 = _mm_add_epi16(sum_p3, pq[3]);
- work0 = _mm_add_epi16(sum_p3, pq[2]);
-
- flat_p[2] = _mm_add_epi16(sum_lp, work0);
- flat_q[2] = _mm_add_epi16(sum_lq, _mm_srli_si128(work0, 8));
- flat_pq[2] = _mm_srli_epi16(_mm_unpacklo_epi64(flat_p[2], flat_q[2]), 3);
-
- int flat2_mask =
- (0xffff != _mm_movemask_epi8(_mm_cmpeq_epi16(flat2, zero)));
- if (flat2_mask) {
- flat2_p[0] = _mm_add_epi16(sum_p_0, _mm_add_epi16(work0_0, q[0]));
- flat2_q[0] = _mm_add_epi16(
- sum_p_0, _mm_add_epi16(_mm_srli_si128(work0_0, 8), pq[0]));
-
- flat2_p[1] = _mm_add_epi16(sum_p, work0_1);
- flat2_q[1] = _mm_add_epi16(sum_q, _mm_srli_si128(work0_1, 8));
-
- flat2_pq[0] =
- _mm_srli_epi16(_mm_unpacklo_epi64(flat2_p[0], flat2_q[0]), 4);
- flat2_pq[1] =
- _mm_srli_epi16(_mm_unpacklo_epi64(flat2_p[1], flat2_q[1]), 4);
-
- sum_p = _mm_sub_epi16(sum_p, q[4]);
- sum_q = _mm_sub_epi16(sum_q, pq[4]);
-
- sum_p6 = _mm_add_epi16(sum_p6, pq[6]);
- work0 = _mm_add_epi16(sum_p6,
- _mm_add_epi16(pq[2], _mm_add_epi16(pq[3], pq[1])));
- flat2_p[2] = _mm_add_epi16(sum_p, work0);
- flat2_q[2] = _mm_add_epi16(sum_q, _mm_srli_si128(work0, 8));
- flat2_pq[2] =
- _mm_srli_epi16(_mm_unpacklo_epi64(flat2_p[2], flat2_q[2]), 4);
-
- sum_p6 = _mm_add_epi16(sum_p6, pq[6]);
- sum_p = _mm_sub_epi16(sum_p, q[3]);
- sum_q = _mm_sub_epi16(sum_q, pq[3]);
-
- work0 = _mm_add_epi16(sum_p6,
- _mm_add_epi16(pq[3], _mm_add_epi16(pq[4], pq[2])));
- flat2_p[3] = _mm_add_epi16(sum_p, work0);
- flat2_q[3] = _mm_add_epi16(sum_q, _mm_srli_si128(work0, 8));
- flat2_pq[3] =
- _mm_srli_epi16(_mm_unpacklo_epi64(flat2_p[3], flat2_q[3]), 4);
-
- sum_p6 = _mm_add_epi16(sum_p6, pq[6]);
- sum_p = _mm_sub_epi16(sum_p, q[2]);
- sum_q = _mm_sub_epi16(sum_q, pq[2]);
-
- work0 = _mm_add_epi16(sum_p6,
- _mm_add_epi16(pq[4], _mm_add_epi16(pq[5], pq[3])));
- flat2_p[4] = _mm_add_epi16(sum_p, work0);
- flat2_q[4] = _mm_add_epi16(sum_q, _mm_srli_si128(work0, 8));
- flat2_pq[4] =
- _mm_srli_epi16(_mm_unpacklo_epi64(flat2_p[4], flat2_q[4]), 4);
-
- sum_p6 = _mm_add_epi16(sum_p6, pq[6]);
- sum_p = _mm_sub_epi16(sum_p, q[1]);
- sum_q = _mm_sub_epi16(sum_q, pq[1]);
-
- work0 = _mm_add_epi16(sum_p6,
- _mm_add_epi16(pq[5], _mm_add_epi16(pq[6], pq[4])));
- flat2_p[5] = _mm_add_epi16(sum_p, work0);
- flat2_q[5] = _mm_add_epi16(sum_q, _mm_srli_si128(work0, 8));
- flat2_pq[5] =
- _mm_srli_epi16(_mm_unpacklo_epi64(flat2_p[5], flat2_q[5]), 4);
- } // flat2
- // ~~~~~~~~~~ apply flat ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- // highbd_filter8
- pq[0] = _mm_unpacklo_epi64(ps0ps1, qs0qs1);
- pq[1] = _mm_unpackhi_epi64(ps0ps1, qs0qs1);
-
- for (i = 0; i < 3; i++) {
- pq[i] = _mm_andnot_si128(flat, pq[i]);
- flat_pq[i] = _mm_and_si128(flat, flat_pq[i]);
- pq[i] = _mm_or_si128(pq[i], flat_pq[i]);
- }
-
- // wide flat
- // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- if (flat2_mask) {
- for (i = 0; i < 6; i++) {
- pq[i] = _mm_andnot_si128(flat2, pq[i]);
- flat2_pq[i] = _mm_and_si128(flat2, flat2_pq[i]);
- pq[i] = _mm_or_si128(pq[i], flat2_pq[i]); // full list of pq values
- }
- }
- } else {
- pq[0] = _mm_unpacklo_epi64(ps0ps1, qs0qs1);
- pq[1] = _mm_unpackhi_epi64(ps0ps1, qs0qs1);
- }
-}
-
-void aom_highbd_lpf_horizontal_14_sse2(uint16_t *s, int pitch,
- const uint8_t *blt, const uint8_t *lt,
- const uint8_t *thr, int bd) {
- __m128i p[7], q[7], pq[7];
- int i;
-
- for (i = 0; i < 7; i++) {
- p[i] = _mm_loadl_epi64((__m128i *)(s - (i + 1) * pitch));
- q[i] = _mm_loadl_epi64((__m128i *)(s + i * pitch));
- }
-
- highbd_lpf_internal_14_sse2(p, q, pq, blt, lt, thr, bd);
-
- for (i = 0; i < 6; i++) {
- _mm_storel_epi64((__m128i *)(s - (i + 1) * pitch), pq[i]);
- _mm_storel_epi64((__m128i *)(s + i * pitch), _mm_srli_si128(pq[i], 8));
- }
-}
-
-static AOM_FORCE_INLINE void highbd_lpf_internal_14_dual_sse2(
- __m128i *p, __m128i *q, const uint8_t *blt0, const uint8_t *lt0,
- const uint8_t *thr0, const uint8_t *blt1, const uint8_t *lt1,
- const uint8_t *thr1, int bd) {
- __m128i blimit, limit, thresh, t80;
- const __m128i zero = _mm_setzero_si128();
-
- get_limit_dual(blt0, lt0, thr0, blt1, lt1, thr1, bd, &blimit, &limit, &thresh,
- &t80);
- __m128i mask;
- highbd_filter_mask_dual(p, q, &limit, &blimit, &mask);
- __m128i flat, flat2;
- highbd_flat_mask4_dual_sse2(p, q, &flat, &flat2, bd);
-
- flat = _mm_and_si128(flat, mask);
- flat2 = _mm_and_si128(flat2, flat);
- __m128i ps[2], qs[2];
- highbd_filter4_dual_sse2(p, q, ps, qs, &mask, &thresh, bd, &t80);
- // flat and wide flat calculations
-
- // if flat ==0 then flat2 is zero as well and we don't need any calc below
- // sse4.1 if (0==_mm_test_all_zeros(flat,ff))
- if (0xffff != _mm_movemask_epi8(_mm_cmpeq_epi16(flat, zero))) {
- __m128i flat_p[3], flat_q[3];
- __m128i flat2_p[6], flat2_q[6];
- const __m128i eight = _mm_set1_epi16(8);
- const __m128i four = _mm_set1_epi16(4);
- __m128i sum_p_0 = _mm_add_epi16(p[5], _mm_add_epi16(p[4], p[3]));
- __m128i sum_q = _mm_add_epi16(q[5], _mm_add_epi16(q[4], q[3]));
- __m128i sum_lp = _mm_add_epi16(p[0], _mm_add_epi16(p[2], p[1]));
- sum_p_0 = _mm_add_epi16(sum_p_0, sum_lp);
- __m128i sum_lq = _mm_add_epi16(q[0], _mm_add_epi16(q[2], q[1]));
- sum_q = _mm_add_epi16(sum_q, sum_lq);
- sum_p_0 = _mm_add_epi16(eight, _mm_add_epi16(sum_p_0, sum_q));
- sum_lp = _mm_add_epi16(four, _mm_add_epi16(sum_lp, sum_lq));
- flat_p[0] =
- _mm_srli_epi16(_mm_add_epi16(sum_lp, _mm_add_epi16(p[3], p[0])), 3);
- flat_q[0] =
- _mm_srli_epi16(_mm_add_epi16(sum_lp, _mm_add_epi16(q[3], q[0])), 3);
- __m128i sum_p6 = _mm_add_epi16(p[6], p[6]);
- __m128i sum_q6 = _mm_add_epi16(q[6], q[6]);
- __m128i sum_p3 = _mm_add_epi16(p[3], p[3]);
- __m128i sum_q3 = _mm_add_epi16(q[3], q[3]);
-
- sum_q = _mm_sub_epi16(sum_p_0, p[5]);
- __m128i sum_p = _mm_sub_epi16(sum_p_0, q[5]);
-
- sum_lq = _mm_sub_epi16(sum_lp, p[2]);
- sum_lp = _mm_sub_epi16(sum_lp, q[2]);
- flat_p[1] =
- _mm_srli_epi16(_mm_add_epi16(sum_lp, _mm_add_epi16(sum_p3, p[1])), 3);
- flat_q[1] =
- _mm_srli_epi16(_mm_add_epi16(sum_lq, _mm_add_epi16(sum_q3, q[1])), 3);
-
- sum_lp = _mm_sub_epi16(sum_lp, q[1]);
- sum_lq = _mm_sub_epi16(sum_lq, p[1]);
- sum_p3 = _mm_add_epi16(sum_p3, p[3]);
- sum_q3 = _mm_add_epi16(sum_q3, q[3]);
- flat_p[2] =
- _mm_srli_epi16(_mm_add_epi16(sum_lp, _mm_add_epi16(sum_p3, p[2])), 3);
- flat_q[2] =
- _mm_srli_epi16(_mm_add_epi16(sum_lq, _mm_add_epi16(sum_q3, q[2])), 3);
-
- int flat2_mask =
- (0xffff != _mm_movemask_epi8(_mm_cmpeq_epi16(flat2, zero)));
- if (flat2_mask) {
- flat2_p[0] = _mm_srli_epi16(
- _mm_add_epi16(sum_p_0, _mm_add_epi16(_mm_add_epi16(p[6], p[0]),
- _mm_add_epi16(p[1], q[0]))),
- 4);
- flat2_q[0] = _mm_srli_epi16(
- _mm_add_epi16(sum_p_0, _mm_add_epi16(_mm_add_epi16(q[6], q[0]),
- _mm_add_epi16(p[0], q[1]))),
- 4);
-
- flat2_p[1] = _mm_srli_epi16(
- _mm_add_epi16(
- sum_p,
- _mm_add_epi16(sum_p6,
- _mm_add_epi16(p[1], _mm_add_epi16(p[2], p[0])))),
- 4);
- flat2_q[1] = _mm_srli_epi16(
- _mm_add_epi16(
- sum_q,
- _mm_add_epi16(sum_q6,
- _mm_add_epi16(q[1], _mm_add_epi16(q[0], q[2])))),
- 4);
- sum_p6 = _mm_add_epi16(sum_p6, p[6]);
- sum_q6 = _mm_add_epi16(sum_q6, q[6]);
- sum_p = _mm_sub_epi16(sum_p, q[4]);
- sum_q = _mm_sub_epi16(sum_q, p[4]);
- flat2_p[2] = _mm_srli_epi16(
- _mm_add_epi16(
- sum_p,
- _mm_add_epi16(sum_p6,
- _mm_add_epi16(p[2], _mm_add_epi16(p[3], p[1])))),
- 4);
- flat2_q[2] = _mm_srli_epi16(
- _mm_add_epi16(
- sum_q,
- _mm_add_epi16(sum_q6,
- _mm_add_epi16(q[2], _mm_add_epi16(q[1], q[3])))),
- 4);
- sum_p6 = _mm_add_epi16(sum_p6, p[6]);
- sum_q6 = _mm_add_epi16(sum_q6, q[6]);
- sum_p = _mm_sub_epi16(sum_p, q[3]);
- sum_q = _mm_sub_epi16(sum_q, p[3]);
- flat2_p[3] = _mm_srli_epi16(
- _mm_add_epi16(
- sum_p,
- _mm_add_epi16(sum_p6,
- _mm_add_epi16(p[3], _mm_add_epi16(p[4], p[2])))),
- 4);
- flat2_q[3] = _mm_srli_epi16(
- _mm_add_epi16(
- sum_q,
- _mm_add_epi16(sum_q6,
- _mm_add_epi16(q[3], _mm_add_epi16(q[2], q[4])))),
- 4);
- sum_p6 = _mm_add_epi16(sum_p6, p[6]);
- sum_q6 = _mm_add_epi16(sum_q6, q[6]);
- sum_p = _mm_sub_epi16(sum_p, q[2]);
- sum_q = _mm_sub_epi16(sum_q, p[2]);
- flat2_p[4] = _mm_srli_epi16(
- _mm_add_epi16(
- sum_p,
- _mm_add_epi16(sum_p6,
- _mm_add_epi16(p[4], _mm_add_epi16(p[5], p[3])))),
- 4);
- flat2_q[4] = _mm_srli_epi16(
- _mm_add_epi16(
- sum_q,
- _mm_add_epi16(sum_q6,
- _mm_add_epi16(q[4], _mm_add_epi16(q[3], q[5])))),
- 4);
- sum_p6 = _mm_add_epi16(sum_p6, p[6]);
- sum_q6 = _mm_add_epi16(sum_q6, q[6]);
- sum_p = _mm_sub_epi16(sum_p, q[1]);
- sum_q = _mm_sub_epi16(sum_q, p[1]);
- flat2_p[5] = _mm_srli_epi16(
- _mm_add_epi16(
- sum_p,
- _mm_add_epi16(sum_p6,
- _mm_add_epi16(p[5], _mm_add_epi16(p[6], p[4])))),
- 4);
- flat2_q[5] = _mm_srli_epi16(
- _mm_add_epi16(
- sum_q,
- _mm_add_epi16(sum_q6,
- _mm_add_epi16(q[5], _mm_add_epi16(q[4], q[6])))),
- 4);
- }
- // highbd_filter8
- int i;
- for (i = 0; i < 2; i++) {
- ps[i] = _mm_andnot_si128(flat, ps[i]);
- flat_p[i] = _mm_and_si128(flat, flat_p[i]);
- p[i] = _mm_or_si128(ps[i], flat_p[i]);
- qs[i] = _mm_andnot_si128(flat, qs[i]);
- flat_q[i] = _mm_and_si128(flat, flat_q[i]);
- q[i] = _mm_or_si128(qs[i], flat_q[i]);
- }
- p[2] = _mm_andnot_si128(flat, p[2]);
- // p2 remains unchanged if !(flat && mask)
- flat_p[2] = _mm_and_si128(flat, flat_p[2]);
- // when (flat && mask)
- p[2] = _mm_or_si128(p[2], flat_p[2]); // full list of p2 values
- q[2] = _mm_andnot_si128(flat, q[2]);
- flat_q[2] = _mm_and_si128(flat, flat_q[2]);
- q[2] = _mm_or_si128(q[2], flat_q[2]); // full list of q2 values
-
- for (i = 0; i < 2; i++) {
- ps[i] = _mm_andnot_si128(flat, ps[i]);
- flat_p[i] = _mm_and_si128(flat, flat_p[i]);
- p[i] = _mm_or_si128(ps[i], flat_p[i]);
- qs[i] = _mm_andnot_si128(flat, qs[i]);
- flat_q[i] = _mm_and_si128(flat, flat_q[i]);
- q[i] = _mm_or_si128(qs[i], flat_q[i]);
- }
- // highbd_filter16
- if (flat2_mask) {
- for (i = 0; i < 6; i++) {
- // p[i] remains unchanged if !(flat2 && flat && mask)
- p[i] = _mm_andnot_si128(flat2, p[i]);
- flat2_p[i] = _mm_and_si128(flat2, flat2_p[i]);
- // get values for when (flat2 && flat && mask)
- p[i] = _mm_or_si128(p[i], flat2_p[i]); // full list of p values
- q[i] = _mm_andnot_si128(flat2, q[i]);
- flat2_q[i] = _mm_and_si128(flat2, flat2_q[i]);
- q[i] = _mm_or_si128(q[i], flat2_q[i]);
- }
- }
- } else {
- p[0] = ps[0];
- q[0] = qs[0];
- p[1] = ps[1];
- q[1] = qs[1];
- }
-}
-
-void aom_highbd_lpf_horizontal_14_dual_sse2(
- uint16_t *s, int pitch, const uint8_t *_blimit0, const uint8_t *_limit0,
- const uint8_t *_thresh0, const uint8_t *_blimit1, const uint8_t *_limit1,
- const uint8_t *_thresh1, int bd) {
- __m128i p[7], q[7];
- int i;
- load_highbd_pixel(s, 7, pitch, p, q);
-
- highbd_lpf_internal_14_dual_sse2(p, q, _blimit0, _limit0, _thresh0, _blimit1,
- _limit1, _thresh1, bd);
-
- for (i = 0; i < 6; i++) {
- _mm_store_si128((__m128i *)(s - (i + 1) * pitch), p[i]);
- _mm_store_si128((__m128i *)(s + i * pitch), q[i]);
- }
-}
-
-static AOM_FORCE_INLINE void highbd_lpf_internal_6_sse2(
- __m128i *p2, __m128i *p1, __m128i *p0, __m128i *q0, __m128i *q1,
- __m128i *q2, __m128i *p1p0_out, __m128i *q1q0_out, const uint8_t *_blimit,
- const uint8_t *_limit, const uint8_t *_thresh, int bd) {
- __m128i blimit, limit, thresh;
- __m128i mask, hev, flat;
- __m128i pq[3];
- __m128i p1p0, q1q0, abs_p1p0, ps1ps0, qs1qs0;
- __m128i flat_p1p0, flat_q0q1;
-
- pq[0] = _mm_unpacklo_epi64(*p0, *q0);
- pq[1] = _mm_unpacklo_epi64(*p1, *q1);
- pq[2] = _mm_unpacklo_epi64(*p2, *q2);
-
- const __m128i zero = _mm_setzero_si128();
- const __m128i four = _mm_set1_epi16(4);
- __m128i t80;
- const __m128i one = _mm_set1_epi16(0x1);
-
- get_limit(_blimit, _limit, _thresh, bd, &blimit, &limit, &thresh, &t80);
-
- highbd_hev_filter_mask_x_sse2(pq, 3, &p1p0, &q1q0, &abs_p1p0, &limit, &blimit,
- &thresh, &hev, &mask);
-
- // lp filter
- highbd_filter4_sse2(&p1p0, &q1q0, &hev, &mask, q1q0_out, p1p0_out, &t80, bd);
-
- // flat_mask
- flat = _mm_max_epi16(abs_diff16(pq[2], pq[0]), abs_p1p0);
- flat = _mm_max_epi16(flat, _mm_srli_si128(flat, 8));
-
- flat = _mm_subs_epu16(flat, _mm_slli_epi16(one, bd - 8));
-
- flat = _mm_cmpeq_epi16(flat, zero);
- flat = _mm_and_si128(flat, mask);
- // replicate for the further "merged variables" usage
- flat = _mm_unpacklo_epi64(flat, flat);
-
- // 5 tap filter
- // need it only if flat !=0
- if (0xffff != _mm_movemask_epi8(_mm_cmpeq_epi16(flat, zero))) {
- __m128i workp_a, workp_b, workp_c;
- __m128i pq0x2_pq1, pq1_pq2;
-
- // op1
- pq0x2_pq1 =
- _mm_add_epi16(_mm_add_epi16(pq[0], pq[0]), pq[1]); // p0 *2 + p1
- pq1_pq2 = _mm_add_epi16(pq[1], pq[2]); // p1 + p2
- workp_a = _mm_add_epi16(_mm_add_epi16(pq0x2_pq1, four),
- pq1_pq2); // p2 + p0 * 2 + p1 * 2 + 4
-
- workp_b = _mm_add_epi16(_mm_add_epi16(pq[2], pq[2]), *q0);
- workp_b =
- _mm_add_epi16(workp_a, workp_b); // p2 * 3 + p1 * 2 + p0 * 2 + q0 + 4
-
- // op0
- workp_c = _mm_srli_si128(pq0x2_pq1, 8); // q0 * 2 + q1
- workp_a = _mm_add_epi16(workp_a,
- workp_c); // p2 + p0 * 2 + p1 * 2 + q0 * 2 + q1 + 4
- workp_b = _mm_unpacklo_epi64(workp_a, workp_b);
- flat_p1p0 = _mm_srli_epi16(workp_b, 3);
-
- // oq0
- workp_a = _mm_sub_epi16(_mm_sub_epi16(workp_a, pq[2]),
- pq[1]); // p0 * 2 + p1 + q0 * 2 + q1 + 4
- workp_b = _mm_srli_si128(pq1_pq2, 8);
- workp_a = _mm_add_epi16(
- workp_a, workp_b); // p0 * 2 + p1 + q0 * 2 + q1 * 2 + q2 + 4
- // workp_shft0 = _mm_srli_epi16(workp_a, 3);
-
- // oq1
- workp_c = _mm_sub_epi16(_mm_sub_epi16(workp_a, pq[1]),
- pq[0]); // p0 + q0 * 2 + q1 * 2 + q2 + 4
- workp_b = _mm_add_epi16(*q2, *q2);
- workp_b =
- _mm_add_epi16(workp_c, workp_b); // p0 + q0 * 2 + q1 * 2 + q2 * 3 + 4
-
- workp_a = _mm_unpacklo_epi64(workp_a, workp_b);
- flat_q0q1 = _mm_srli_epi16(workp_a, 3);
-
- qs1qs0 = _mm_andnot_si128(flat, *q1q0_out);
- q1q0 = _mm_and_si128(flat, flat_q0q1);
- *q1q0_out = _mm_or_si128(qs1qs0, q1q0);
-
- ps1ps0 = _mm_andnot_si128(flat, *p1p0_out);
- p1p0 = _mm_and_si128(flat, flat_p1p0);
- *p1p0_out = _mm_or_si128(ps1ps0, p1p0);
- }
-}
-
-static AOM_FORCE_INLINE void highbd_lpf_internal_6_dual_sse2(
- __m128i *p2, __m128i *p1, __m128i *p0, __m128i *q0, __m128i *q1,
- __m128i *q2, const unsigned char *_blimit0, const unsigned char *_limit0,
- const unsigned char *_thresh0, const unsigned char *_blimit1,
- const unsigned char *_limit1, const unsigned char *_thresh1, int bd) {
- const __m128i zero = _mm_setzero_si128();
- __m128i blimit0, limit0, thresh0;
- __m128i t80;
- __m128i mask, flat, work;
- __m128i abs_p1q1, abs_p0q0, abs_p1p0, abs_p2p1, abs_q1q0, abs_q2q1;
- __m128i op1, op0, oq0, oq1;
- const __m128i four = _mm_set1_epi16(4);
- const __m128i one = _mm_set1_epi16(0x1);
- const __m128i ffff = _mm_cmpeq_epi16(one, one);
-
- get_limit_dual(_blimit0, _limit0, _thresh0, _blimit1, _limit1, _thresh1, bd,
- &blimit0, &limit0, &thresh0, &t80);
-
- abs_p2p1 = abs_diff16(*p2, *p1);
- abs_p1p0 = abs_diff16(*p1, *p0);
- abs_q1q0 = abs_diff16(*q1, *q0);
- abs_q2q1 = abs_diff16(*q2, *q1);
-
- abs_p0q0 = abs_diff16(*p0, *q0);
- abs_p1q1 = abs_diff16(*p1, *q1);
-
- abs_p0q0 = _mm_adds_epu16(abs_p0q0, abs_p0q0);
- abs_p1q1 = _mm_srli_epi16(abs_p1q1, 1);
- mask = _mm_subs_epu16(_mm_adds_epu16(abs_p0q0, abs_p1q1), blimit0);
- mask = _mm_xor_si128(_mm_cmpeq_epi16(mask, zero), ffff);
- // mask |= (abs(*p0 - *q0) * 2 + abs(*p1 - *q1) / 2 > blimit) * -1;
- // So taking maximums continues to work:
- mask = _mm_and_si128(mask, _mm_adds_epu16(limit0, one));
-
- mask = _mm_max_epi16(abs_q2q1, mask);
- work = _mm_max_epi16(abs_p1p0, abs_q1q0);
- mask = _mm_max_epi16(work, mask);
- mask = _mm_max_epi16(mask, abs_p2p1);
- mask = _mm_subs_epu16(mask, limit0);
- mask = _mm_cmpeq_epi16(mask, zero);
-
- // lp filter
- __m128i ps[2], qs[2], p[2], q[2];
- {
- p[0] = *p0;
- p[1] = *p1;
- q[0] = *q0;
- q[1] = *q1;
- // filter_mask and hev_mask
- highbd_filter4_dual_sse2(p, q, ps, qs, &mask, &thresh0, bd, &t80);
- }
-
- // flat_mask
- flat = _mm_max_epi16(abs_diff16(*q2, *q0), abs_diff16(*p2, *p0));
- flat = _mm_max_epi16(flat, work);
-
- flat = _mm_subs_epu16(flat, _mm_slli_epi16(one, bd - 8));
-
- flat = _mm_cmpeq_epi16(flat, zero);
- flat = _mm_and_si128(flat, mask); // flat & mask
-
- // 5 tap filter
- // need it only if flat !=0
- if (0xffff != _mm_movemask_epi8(_mm_cmpeq_epi16(flat, zero))) {
- __m128i workp_a, workp_b, workp_shft0, workp_shft1;
-
- // op1
- workp_a = _mm_add_epi16(_mm_add_epi16(*p0, *p0),
- _mm_add_epi16(*p1, *p1)); // *p0 *2 + *p1 * 2
- workp_a = _mm_add_epi16(_mm_add_epi16(workp_a, four),
- *p2); // *p2 + *p0 * 2 + *p1 * 2 + 4
-
- workp_b = _mm_add_epi16(_mm_add_epi16(*p2, *p2), *q0);
- workp_shft0 = _mm_add_epi16(
- workp_a, workp_b); // *p2 * 3 + *p1 * 2 + *p0 * 2 + *q0 + 4
- op1 = _mm_srli_epi16(workp_shft0, 3);
-
- // op0
- workp_b = _mm_add_epi16(_mm_add_epi16(*q0, *q0), *q1); // *q0 * 2 + *q1
- workp_a =
- _mm_add_epi16(workp_a,
- workp_b); // *p2 + *p0 * 2 + *p1 * 2 + *q0 * 2 + *q1 + 4
- op0 = _mm_srli_epi16(workp_a, 3);
-
- // oq0
- workp_a = _mm_sub_epi16(_mm_sub_epi16(workp_a, *p2),
- *p1); // *p0 * 2 + *p1 + *q0 * 2 + *q1 + 4
- workp_b = _mm_add_epi16(*q1, *q2);
- workp_shft0 = _mm_add_epi16(
- workp_a, workp_b); // *p0 * 2 + *p1 + *q0 * 2 + *q1 * 2 + *q2 + 4
- oq0 = _mm_srli_epi16(workp_shft0, 3);
-
- // oq1
- workp_a = _mm_sub_epi16(_mm_sub_epi16(workp_shft0, *p1),
- *p0); // *p0 + *q0 * 2 + *q1 * 2 + *q2 + 4
- workp_b = _mm_add_epi16(*q2, *q2);
- workp_shft1 = _mm_add_epi16(
- workp_a, workp_b); // *p0 + *q0 * 2 + *q1 * 2 + *q2 * 3 + 4
- oq1 = _mm_srli_epi16(workp_shft1, 3);
-
- qs[0] = _mm_andnot_si128(flat, qs[0]);
- oq0 = _mm_and_si128(flat, oq0);
- *q0 = _mm_or_si128(qs[0], oq0);
-
- qs[1] = _mm_andnot_si128(flat, qs[1]);
- oq1 = _mm_and_si128(flat, oq1);
- *q1 = _mm_or_si128(qs[1], oq1);
-
- ps[0] = _mm_andnot_si128(flat, ps[0]);
- op0 = _mm_and_si128(flat, op0);
- *p0 = _mm_or_si128(ps[0], op0);
-
- ps[1] = _mm_andnot_si128(flat, ps[1]);
- op1 = _mm_and_si128(flat, op1);
- *p1 = _mm_or_si128(ps[1], op1);
- } else {
- *q0 = qs[0];
- *q1 = qs[1];
- *p0 = ps[0];
- *p1 = ps[1];
- }
-}
-
-void aom_highbd_lpf_horizontal_6_sse2(uint16_t *s, int p,
- const uint8_t *_blimit,
- const uint8_t *_limit,
- const uint8_t *_thresh, int bd) {
- __m128i p2, p1, p0, q0, q1, q2, p1p0_out, q1q0_out;
-
- p2 = _mm_loadl_epi64((__m128i *)(s - 3 * p));
- p1 = _mm_loadl_epi64((__m128i *)(s - 2 * p));
- p0 = _mm_loadl_epi64((__m128i *)(s - 1 * p));
- q0 = _mm_loadl_epi64((__m128i *)(s + 0 * p));
- q1 = _mm_loadl_epi64((__m128i *)(s + 1 * p));
- q2 = _mm_loadl_epi64((__m128i *)(s + 2 * p));
-
- highbd_lpf_internal_6_sse2(&p2, &p1, &p0, &q0, &q1, &q2, &p1p0_out, &q1q0_out,
- _blimit, _limit, _thresh, bd);
-
- _mm_storel_epi64((__m128i *)(s - 2 * p), _mm_srli_si128(p1p0_out, 8));
- _mm_storel_epi64((__m128i *)(s - 1 * p), p1p0_out);
- _mm_storel_epi64((__m128i *)(s + 0 * p), q1q0_out);
- _mm_storel_epi64((__m128i *)(s + 1 * p), _mm_srli_si128(q1q0_out, 8));
-}
-
-void aom_highbd_lpf_horizontal_6_dual_sse2(
- uint16_t *s, int p, const uint8_t *_blimit0, const uint8_t *_limit0,
- const uint8_t *_thresh0, const uint8_t *_blimit1, const uint8_t *_limit1,
- const uint8_t *_thresh1, int bd) {
- __m128i p2, p1, p0, q0, q1, q2;
-
- p2 = _mm_loadu_si128((__m128i *)(s - 3 * p));
- p1 = _mm_loadu_si128((__m128i *)(s - 2 * p));
- p0 = _mm_loadu_si128((__m128i *)(s - 1 * p));
- q0 = _mm_loadu_si128((__m128i *)(s + 0 * p));
- q1 = _mm_loadu_si128((__m128i *)(s + 1 * p));
- q2 = _mm_loadu_si128((__m128i *)(s + 2 * p));
-
- highbd_lpf_internal_6_dual_sse2(&p2, &p1, &p0, &q0, &q1, &q2, _blimit0,
- _limit0, _thresh0, _blimit1, _limit1,
- _thresh1, bd);
-
- _mm_storeu_si128((__m128i *)(s - 2 * p), p1);
- _mm_storeu_si128((__m128i *)(s - 1 * p), p0);
- _mm_storeu_si128((__m128i *)(s + 0 * p), q0);
- _mm_storeu_si128((__m128i *)(s + 1 * p), q1);
-}
-
-static AOM_FORCE_INLINE void highbd_lpf_internal_8_sse2(
- __m128i *p3, __m128i *q3, __m128i *p2, __m128i *q2, __m128i *p1,
- __m128i *q1, __m128i *p0, __m128i *q0, __m128i *q1q0_out, __m128i *p1p0_out,
- const unsigned char *_blimit, const unsigned char *_limit,
- const unsigned char *_thresh, int bd) {
- const __m128i zero = _mm_setzero_si128();
- __m128i blimit, limit, thresh;
- __m128i mask, hev, flat;
- __m128i pq[4];
- __m128i p1p0, q1q0, ps1ps0, qs1qs0;
- __m128i work_a, opq2, flat_p1p0, flat_q0q1;
-
- pq[0] = _mm_unpacklo_epi64(*p0, *q0);
- pq[1] = _mm_unpacklo_epi64(*p1, *q1);
- pq[2] = _mm_unpacklo_epi64(*p2, *q2);
- pq[3] = _mm_unpacklo_epi64(*p3, *q3);
-
- __m128i abs_p1p0;
-
- const __m128i four = _mm_set1_epi16(4);
- __m128i t80;
- const __m128i one = _mm_set1_epi16(0x1);
-
- get_limit(_blimit, _limit, _thresh, bd, &blimit, &limit, &thresh, &t80);
-
- highbd_hev_filter_mask_x_sse2(pq, 4, &p1p0, &q1q0, &abs_p1p0, &limit, &blimit,
- &thresh, &hev, &mask);
-
- // lp filter
- highbd_filter4_sse2(&p1p0, &q1q0, &hev, &mask, q1q0_out, p1p0_out, &t80, bd);
-
- // flat_mask4
- flat = _mm_max_epi16(abs_diff16(pq[2], pq[0]), abs_diff16(pq[3], pq[0]));
- flat = _mm_max_epi16(abs_p1p0, flat);
- flat = _mm_max_epi16(flat, _mm_srli_si128(flat, 8));
-
- flat = _mm_subs_epu16(flat, _mm_slli_epi16(one, bd - 8));
-
- flat = _mm_cmpeq_epi16(flat, zero);
- flat = _mm_and_si128(flat, mask);
- // replicate for the further "merged variables" usage
- flat = _mm_unpacklo_epi64(flat, flat);
-
- if (0xffff != _mm_movemask_epi8(_mm_cmpeq_epi16(flat, zero))) {
- __m128i workp_a, workp_b, workp_c, workp_shft0, workp_shft1;
- // Added before shift for rounding part of ROUND_POWER_OF_TWO
-
- // o*p2
- workp_a = _mm_add_epi16(_mm_add_epi16(*p3, *p3), _mm_add_epi16(*p2, *p1));
- workp_a = _mm_add_epi16(_mm_add_epi16(workp_a, four), *p0);
- workp_c = _mm_add_epi16(_mm_add_epi16(*q0, *p2), *p3);
- workp_c = _mm_add_epi16(workp_a, workp_c);
-
- // o*p1
- workp_b = _mm_add_epi16(_mm_add_epi16(*q0, *q1), *p1);
- workp_shft0 = _mm_add_epi16(workp_a, workp_b);
-
- // o*p0
- workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, *p3), *q2);
- workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, *p1), *p0);
- workp_shft1 = _mm_add_epi16(workp_a, workp_b);
-
- flat_p1p0 = _mm_srli_epi16(_mm_unpacklo_epi64(workp_shft1, workp_shft0), 3);
-
- // oq0
- workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, *p3), *q3);
- workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, *p0), *q0);
- workp_shft0 = _mm_add_epi16(workp_a, workp_b);
-
- // oq1
- workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, *p2), *q3);
- workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, *q0), *q1);
- workp_shft1 = _mm_add_epi16(workp_a, workp_b);
-
- flat_q0q1 = _mm_srli_epi16(_mm_unpacklo_epi64(workp_shft0, workp_shft1), 3);
-
- // oq2
- workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, *p1), *q3);
- workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, *q1), *q2);
- workp_a = _mm_add_epi16(workp_a, workp_b);
- opq2 = _mm_srli_epi16(_mm_unpacklo_epi64(workp_c, workp_a), 3);
-
- qs1qs0 = _mm_andnot_si128(flat, *q1q0_out);
- q1q0 = _mm_and_si128(flat, flat_q0q1);
- *q1q0_out = _mm_or_si128(qs1qs0, q1q0);
-
- ps1ps0 = _mm_andnot_si128(flat, *p1p0_out);
- p1p0 = _mm_and_si128(flat, flat_p1p0);
- *p1p0_out = _mm_or_si128(ps1ps0, p1p0);
-
- work_a = _mm_andnot_si128(flat, pq[2]);
- *p2 = _mm_and_si128(flat, opq2);
- *p2 = _mm_or_si128(work_a, *p2);
- *q2 = _mm_srli_si128(*p2, 8);
- }
-}
-
-static AOM_FORCE_INLINE void highbd_lpf_internal_8_dual_sse2(
- __m128i *p3, __m128i *q3, __m128i *p2, __m128i *q2, __m128i *p1,
- __m128i *q1, __m128i *p0, __m128i *q0, const unsigned char *_blimit0,
- const unsigned char *_limit0, const unsigned char *_thresh0,
- const unsigned char *_blimit1, const unsigned char *_limit1,
- const unsigned char *_thresh1, int bd) {
- __m128i blimit0, limit0, thresh0;
- __m128i t80;
- __m128i mask, flat;
- __m128i work_a, op2, oq2, op1, op0, oq0, oq1;
- __m128i abs_p1q1, abs_p0q0, work0, work1, work2;
-
- const __m128i zero = _mm_setzero_si128();
- const __m128i four = _mm_set1_epi16(4);
- const __m128i one = _mm_set1_epi16(0x1);
- const __m128i ffff = _mm_cmpeq_epi16(one, one);
-
- get_limit_dual(_blimit0, _limit0, _thresh0, _blimit1, _limit1, _thresh1, bd,
- &blimit0, &limit0, &thresh0, &t80);
-
- abs_p0q0 = abs_diff16(*p0, *q0);
- abs_p1q1 = abs_diff16(*p1, *q1);
-
- abs_p0q0 = _mm_adds_epu16(abs_p0q0, abs_p0q0);
- abs_p1q1 = _mm_srli_epi16(abs_p1q1, 1);
- mask = _mm_subs_epu16(_mm_adds_epu16(abs_p0q0, abs_p1q1), blimit0);
- mask = _mm_xor_si128(_mm_cmpeq_epi16(mask, zero), ffff);
- // mask |= (abs(*p0 - q0) * 2 + abs(*p1 - q1) / 2 > blimit) * -1;
-
- // So taking maximums continues to work:
- mask = _mm_and_si128(mask, _mm_adds_epu16(limit0, one));
-
- work0 = _mm_max_epi16(abs_diff16(*p3, *p2), abs_diff16(*p2, *p1));
- work1 =
- _mm_max_epi16(abs_diff16(*p1, *p0), abs_diff16(*q1, *q0)); // tbu 4 flat
- work0 = _mm_max_epi16(work0, work1);
- work2 = _mm_max_epi16(abs_diff16(*q2, *q1), abs_diff16(*q2, *q3));
- work2 = _mm_max_epi16(work2, work0);
- mask = _mm_max_epi16(work2, mask);
-
- mask = _mm_subs_epu16(mask, limit0);
- mask = _mm_cmpeq_epi16(mask, zero);
-
- // lp filter
- __m128i ps[2], qs[2], p[2], q[2];
- {
- p[0] = *p0;
- p[1] = *p1;
- q[0] = *q0;
- q[1] = *q1;
- // filter_mask and hev_mask
- highbd_filter4_dual_sse2(p, q, ps, qs, &mask, &thresh0, bd, &t80);
- }
-
- flat = _mm_max_epi16(abs_diff16(*p2, *p0), abs_diff16(*q2, *q0));
- flat = _mm_max_epi16(work1, flat);
- work0 = _mm_max_epi16(abs_diff16(*p3, *p0), abs_diff16(*q3, *q0));
- flat = _mm_max_epi16(work0, flat);
-
- flat = _mm_subs_epu16(flat, _mm_slli_epi16(one, bd - 8));
- flat = _mm_cmpeq_epi16(flat, zero);
- flat = _mm_and_si128(flat, mask); // flat & mask
-
- // filter8 need it only if flat !=0
- if (0xffff != _mm_movemask_epi8(_mm_cmpeq_epi16(flat, zero))) {
- __m128i workp_a, workp_b;
- // Added before shift for rounding part of ROUND_POWER_OF_TWO
-
- // o*p2
- workp_a = _mm_add_epi16(_mm_add_epi16(*p3, *p3), _mm_add_epi16(*p2, *p1));
- workp_a = _mm_add_epi16(_mm_add_epi16(workp_a, four), *p0);
- workp_b = _mm_add_epi16(_mm_add_epi16(*q0, *p2), *p3);
- op2 = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3);
-
- // o*p1
- workp_b = _mm_add_epi16(_mm_add_epi16(*q0, *q1), *p1);
- op1 = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3);
-
- // o*p0
- workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, *p3), *q2);
- workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, *p1), *p0);
- op0 = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3);
-
- // oq0
- workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, *p3), *q3);
- workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, *p0), *q0);
- oq0 = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3);
-
- // oq1
- workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, *p2), *q3);
- workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, *q0), *q1);
- oq1 = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3);
-
- // oq2
- workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, *p1), *q3);
- workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, *q1), *q2);
- oq2 = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3);
-
- qs[0] = _mm_andnot_si128(flat, qs[0]);
- oq0 = _mm_and_si128(flat, oq0);
- *q0 = _mm_or_si128(qs[0], oq0);
-
- qs[1] = _mm_andnot_si128(flat, qs[1]);
- oq1 = _mm_and_si128(flat, oq1);
- *q1 = _mm_or_si128(qs[1], oq1);
-
- ps[0] = _mm_andnot_si128(flat, ps[0]);
- op0 = _mm_and_si128(flat, op0);
- *p0 = _mm_or_si128(ps[0], op0);
-
- ps[1] = _mm_andnot_si128(flat, ps[1]);
- op1 = _mm_and_si128(flat, op1);
- *p1 = _mm_or_si128(ps[1], op1);
-
- work_a = _mm_andnot_si128(flat, *q2);
- *q2 = _mm_and_si128(flat, oq2);
- *q2 = _mm_or_si128(work_a, *q2);
-
- work_a = _mm_andnot_si128(flat, *p2);
- *p2 = _mm_and_si128(flat, op2);
- *p2 = _mm_or_si128(work_a, *p2);
- } else {
- *q0 = qs[0];
- *q1 = qs[1];
- *p0 = ps[0];
- *p1 = ps[1];
- }
-}
-
-void aom_highbd_lpf_horizontal_8_sse2(uint16_t *s, int p,
- const uint8_t *_blimit,
- const uint8_t *_limit,
- const uint8_t *_thresh, int bd) {
- __m128i p2, p1, p0, q0, q1, q2, p3, q3;
- __m128i q1q0, p1p0;
-
- p3 = _mm_loadl_epi64((__m128i *)(s - 4 * p));
- q3 = _mm_loadl_epi64((__m128i *)(s + 3 * p));
- p2 = _mm_loadl_epi64((__m128i *)(s - 3 * p));
- q2 = _mm_loadl_epi64((__m128i *)(s + 2 * p));
- p1 = _mm_loadl_epi64((__m128i *)(s - 2 * p));
- q1 = _mm_loadl_epi64((__m128i *)(s + 1 * p));
- p0 = _mm_loadl_epi64((__m128i *)(s - 1 * p));
- q0 = _mm_loadl_epi64((__m128i *)(s + 0 * p));
-
- highbd_lpf_internal_8_sse2(&p3, &q3, &p2, &q2, &p1, &q1, &p0, &q0, &q1q0,
- &p1p0, _blimit, _limit, _thresh, bd);
-
- _mm_storel_epi64((__m128i *)(s - 3 * p), p2);
- _mm_storel_epi64((__m128i *)(s - 2 * p), _mm_srli_si128(p1p0, 8));
- _mm_storel_epi64((__m128i *)(s - 1 * p), p1p0);
- _mm_storel_epi64((__m128i *)(s + 0 * p), q1q0);
- _mm_storel_epi64((__m128i *)(s + 1 * p), _mm_srli_si128(q1q0, 8));
- _mm_storel_epi64((__m128i *)(s + 2 * p), q2);
-}
-
-void aom_highbd_lpf_horizontal_8_dual_sse2(
- uint16_t *s, int p, const uint8_t *_blimit0, const uint8_t *_limit0,
- const uint8_t *_thresh0, const uint8_t *_blimit1, const uint8_t *_limit1,
- const uint8_t *_thresh1, int bd) {
- __m128i p2, p1, p0, q0, q1, q2, p3, q3;
-
- p3 = _mm_loadu_si128((__m128i *)(s - 4 * p));
- q3 = _mm_loadu_si128((__m128i *)(s + 3 * p));
- p2 = _mm_loadu_si128((__m128i *)(s - 3 * p));
- q2 = _mm_loadu_si128((__m128i *)(s + 2 * p));
- p1 = _mm_loadu_si128((__m128i *)(s - 2 * p));
- q1 = _mm_loadu_si128((__m128i *)(s + 1 * p));
- p0 = _mm_loadu_si128((__m128i *)(s - 1 * p));
- q0 = _mm_loadu_si128((__m128i *)(s + 0 * p));
-
- highbd_lpf_internal_8_dual_sse2(&p3, &q3, &p2, &q2, &p1, &q1, &p0, &q0,
- _blimit0, _limit0, _thresh0, _blimit1,
- _limit1, _thresh1, bd);
-
- _mm_storeu_si128((__m128i *)(s - 3 * p), p2);
- _mm_storeu_si128((__m128i *)(s - 2 * p), p1);
- _mm_storeu_si128((__m128i *)(s - 1 * p), p0);
- _mm_storeu_si128((__m128i *)(s + 0 * p), q0);
- _mm_storeu_si128((__m128i *)(s + 1 * p), q1);
- _mm_storeu_si128((__m128i *)(s + 2 * p), q2);
-}
-
-static AOM_FORCE_INLINE void highbd_lpf_internal_4_sse2(
- __m128i *p1, __m128i *p0, __m128i *q0, __m128i *q1, __m128i *q1q0_out,
- __m128i *p1p0_out, const uint8_t *_blimit, const uint8_t *_limit,
- const uint8_t *_thresh, int bd) {
- __m128i blimit, limit, thresh;
- __m128i mask, hev;
- __m128i p1p0, q1q0;
- __m128i pq[2];
-
- __m128i abs_p1p0;
-
- __m128i t80;
- get_limit(_blimit, _limit, _thresh, bd, &blimit, &limit, &thresh, &t80);
-
- pq[0] = _mm_unpacklo_epi64(*p0, *q0);
- pq[1] = _mm_unpacklo_epi64(*p1, *q1);
-
- highbd_hev_filter_mask_x_sse2(pq, 2, &p1p0, &q1q0, &abs_p1p0, &limit, &blimit,
- &thresh, &hev, &mask);
-
- highbd_filter4_sse2(&p1p0, &q1q0, &hev, &mask, q1q0_out, p1p0_out, &t80, bd);
-}
-
-static AOM_FORCE_INLINE void highbd_lpf_internal_4_dual_sse2(
- __m128i *p1, __m128i *p0, __m128i *q0, __m128i *q1, __m128i *ps,
- __m128i *qs, const uint8_t *_blimit0, const uint8_t *_limit0,
- const uint8_t *_thresh0, const uint8_t *_blimit1, const uint8_t *_limit1,
- const uint8_t *_thresh1, int bd) {
- __m128i blimit0, limit0, thresh0;
- __m128i mask, flat;
- __m128i p[2], q[2];
-
- const __m128i zero = _mm_setzero_si128();
- __m128i abs_p0q0 = abs_diff16(*q0, *p0);
- __m128i abs_p1q1 = abs_diff16(*q1, *p1);
-
- __m128i abs_p1p0 = abs_diff16(*p1, *p0);
- __m128i abs_q1q0 = abs_diff16(*q1, *q0);
-
- const __m128i ffff = _mm_cmpeq_epi16(abs_p1p0, abs_p1p0);
- const __m128i one = _mm_set1_epi16(1);
-
- __m128i t80;
-
- get_limit_dual(_blimit0, _limit0, _thresh0, _blimit1, _limit1, _thresh1, bd,
- &blimit0, &limit0, &thresh0, &t80);
-
- // filter_mask and hev_mask
- flat = _mm_max_epi16(abs_p1p0, abs_q1q0);
-
- abs_p0q0 = _mm_adds_epu16(abs_p0q0, abs_p0q0);
- abs_p1q1 = _mm_srli_epi16(abs_p1q1, 1);
-
- mask = _mm_subs_epu16(_mm_adds_epu16(abs_p0q0, abs_p1q1), blimit0);
- mask = _mm_xor_si128(_mm_cmpeq_epi16(mask, zero), ffff);
- // mask |= (abs(*p0 - *q0) * 2 + abs(*p1 - *q1) / 2 > blimit) * -1;
- // So taking maximums continues to work:
- mask = _mm_and_si128(mask, _mm_adds_epu16(limit0, one));
- mask = _mm_max_epi16(flat, mask);
-
- mask = _mm_subs_epu16(mask, limit0);
- mask = _mm_cmpeq_epi16(mask, zero);
-
- p[0] = *p0;
- p[1] = *p1;
- q[0] = *q0;
- q[1] = *q1;
-
- highbd_filter4_dual_sse2(p, q, ps, qs, &mask, &thresh0, bd, &t80);
-}
-
-void aom_highbd_lpf_horizontal_4_sse2(uint16_t *s, int p,
- const uint8_t *_blimit,
- const uint8_t *_limit,
- const uint8_t *_thresh, int bd) {
- __m128i p1p0, q1q0;
- __m128i p1 = _mm_loadl_epi64((__m128i *)(s - 2 * p));
- __m128i p0 = _mm_loadl_epi64((__m128i *)(s - 1 * p));
- __m128i q0 = _mm_loadl_epi64((__m128i *)(s - 0 * p));
- __m128i q1 = _mm_loadl_epi64((__m128i *)(s + 1 * p));
-
- highbd_lpf_internal_4_sse2(&p1, &p0, &q0, &q1, &q1q0, &p1p0, _blimit, _limit,
- _thresh, bd);
-
- _mm_storel_epi64((__m128i *)(s - 2 * p), _mm_srli_si128(p1p0, 8));
- _mm_storel_epi64((__m128i *)(s - 1 * p), p1p0);
- _mm_storel_epi64((__m128i *)(s + 0 * p), q1q0);
- _mm_storel_epi64((__m128i *)(s + 1 * p), _mm_srli_si128(q1q0, 8));
-}
-
-void aom_highbd_lpf_horizontal_4_dual_sse2(
- uint16_t *s, int p, const uint8_t *_blimit0, const uint8_t *_limit0,
- const uint8_t *_thresh0, const uint8_t *_blimit1, const uint8_t *_limit1,
- const uint8_t *_thresh1, int bd) {
- __m128i p1 = _mm_loadu_si128((__m128i *)(s - 2 * p));
- __m128i p0 = _mm_loadu_si128((__m128i *)(s - 1 * p));
- __m128i q0 = _mm_loadu_si128((__m128i *)(s - 0 * p));
- __m128i q1 = _mm_loadu_si128((__m128i *)(s + 1 * p));
- __m128i ps[2], qs[2];
-
- highbd_lpf_internal_4_dual_sse2(&p1, &p0, &q0, &q1, ps, qs, _blimit0, _limit0,
- _thresh0, _blimit1, _limit1, _thresh1, bd);
-
- _mm_storeu_si128((__m128i *)(s - 2 * p), ps[1]);
- _mm_storeu_si128((__m128i *)(s - 1 * p), ps[0]);
- _mm_storeu_si128((__m128i *)(s + 0 * p), qs[0]);
- _mm_storeu_si128((__m128i *)(s + 1 * p), qs[1]);
-}
-
-void aom_highbd_lpf_vertical_4_sse2(uint16_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
- int bd) {
- __m128i x0, x1, x2, x3, d0, d1, d2, d3;
- __m128i p1p0, q1q0;
- __m128i p1, q1;
-
- x0 = _mm_loadl_epi64((__m128i *)(s - 2 + 0 * p));
- x1 = _mm_loadl_epi64((__m128i *)(s - 2 + 1 * p));
- x2 = _mm_loadl_epi64((__m128i *)(s - 2 + 2 * p));
- x3 = _mm_loadl_epi64((__m128i *)(s - 2 + 3 * p));
-
- highbd_transpose4x8_8x4_low_sse2(&x0, &x1, &x2, &x3, &d0, &d1, &d2, &d3);
-
- highbd_lpf_internal_4_sse2(&d0, &d1, &d2, &d3, &q1q0, &p1p0, blimit, limit,
- thresh, bd);
-
- p1 = _mm_srli_si128(p1p0, 8);
- q1 = _mm_srli_si128(q1q0, 8);
-
- // transpose from 8x4 to 4x8
- highbd_transpose4x8_8x4_low_sse2(&p1, &p1p0, &q1q0, &q1, &d0, &d1, &d2, &d3);
-
- _mm_storel_epi64((__m128i *)(s - 2 + 0 * p), d0);
- _mm_storel_epi64((__m128i *)(s - 2 + 1 * p), d1);
- _mm_storel_epi64((__m128i *)(s - 2 + 2 * p), d2);
- _mm_storel_epi64((__m128i *)(s - 2 + 3 * p), d3);
-}
-
-void aom_highbd_lpf_vertical_4_dual_sse2(
- uint16_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0,
- const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1,
- const uint8_t *thresh1, int bd) {
- __m128i x0, x1, x2, x3, x4, x5, x6, x7;
- __m128i d0, d1, d2, d3, d4, d5, d6, d7;
- __m128i ps[2], qs[2];
-
- x0 = _mm_loadl_epi64((__m128i *)(s - 2 + 0 * p));
- x1 = _mm_loadl_epi64((__m128i *)(s - 2 + 1 * p));
- x2 = _mm_loadl_epi64((__m128i *)(s - 2 + 2 * p));
- x3 = _mm_loadl_epi64((__m128i *)(s - 2 + 3 * p));
- x4 = _mm_loadl_epi64((__m128i *)(s - 2 + 4 * p));
- x5 = _mm_loadl_epi64((__m128i *)(s - 2 + 5 * p));
- x6 = _mm_loadl_epi64((__m128i *)(s - 2 + 6 * p));
- x7 = _mm_loadl_epi64((__m128i *)(s - 2 + 7 * p));
-
- highbd_transpose8x8_low_sse2(&x0, &x1, &x2, &x3, &x4, &x5, &x6, &x7, &d0, &d1,
- &d2, &d3);
-
- highbd_lpf_internal_4_dual_sse2(&d0, &d1, &d2, &d3, ps, qs, blimit0, limit0,
- thresh0, blimit1, limit1, thresh1, bd);
-
- highbd_transpose4x8_8x4_sse2(&ps[1], &ps[0], &qs[0], &qs[1], &d0, &d1, &d2,
- &d3, &d4, &d5, &d6, &d7);
-
- _mm_storel_epi64((__m128i *)(s - 2 + 0 * p), d0);
- _mm_storel_epi64((__m128i *)(s - 2 + 1 * p), d1);
- _mm_storel_epi64((__m128i *)(s - 2 + 2 * p), d2);
- _mm_storel_epi64((__m128i *)(s - 2 + 3 * p), d3);
- _mm_storel_epi64((__m128i *)(s - 2 + 4 * p), d4);
- _mm_storel_epi64((__m128i *)(s - 2 + 5 * p), d5);
- _mm_storel_epi64((__m128i *)(s - 2 + 6 * p), d6);
- _mm_storel_epi64((__m128i *)(s - 2 + 7 * p), d7);
-}
-
-void aom_highbd_lpf_vertical_6_sse2(uint16_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
- int bd) {
- __m128i d0, d1, d2, d3, d4, d5, d6, d7;
- __m128i x3, x2, x1, x0, p0, q0;
- __m128i p1p0, q1q0;
-
- x3 = _mm_loadu_si128((__m128i *)((s - 3) + 0 * p));
- x2 = _mm_loadu_si128((__m128i *)((s - 3) + 1 * p));
- x1 = _mm_loadu_si128((__m128i *)((s - 3) + 2 * p));
- x0 = _mm_loadu_si128((__m128i *)((s - 3) + 3 * p));
-
- highbd_transpose4x8_8x4_sse2(&x3, &x2, &x1, &x0, &d0, &d1, &d2, &d3, &d4, &d5,
- &d6, &d7);
-
- highbd_lpf_internal_6_sse2(&d0, &d1, &d2, &d3, &d4, &d5, &p1p0, &q1q0, blimit,
- limit, thresh, bd);
-
- p0 = _mm_srli_si128(p1p0, 8);
- q0 = _mm_srli_si128(q1q0, 8);
-
- highbd_transpose4x8_8x4_low_sse2(&p0, &p1p0, &q1q0, &q0, &d0, &d1, &d2, &d3);
-
- _mm_storel_epi64((__m128i *)(s - 2 + 0 * p), d0);
- _mm_storel_epi64((__m128i *)(s - 2 + 1 * p), d1);
- _mm_storel_epi64((__m128i *)(s - 2 + 2 * p), d2);
- _mm_storel_epi64((__m128i *)(s - 2 + 3 * p), d3);
-}
-
-void aom_highbd_lpf_vertical_6_dual_sse2(
- uint16_t *s, int p, const uint8_t *_blimit0, const uint8_t *_limit0,
- const uint8_t *_thresh0, const uint8_t *_blimit1, const uint8_t *_limit1,
- const uint8_t *_thresh1, int bd) {
- __m128i d0, d1, d2, d3, d4, d5, d6, d7;
- __m128i x0, x1, x2, x3, x4, x5, x6, x7;
- __m128i p0, q0, p1, q1, p2, q2;
-
- x0 = _mm_loadu_si128((__m128i *)((s - 3) + 0 * p));
- x1 = _mm_loadu_si128((__m128i *)((s - 3) + 1 * p));
- x2 = _mm_loadu_si128((__m128i *)((s - 3) + 2 * p));
- x3 = _mm_loadu_si128((__m128i *)((s - 3) + 3 * p));
- x4 = _mm_loadu_si128((__m128i *)((s - 3) + 4 * p));
- x5 = _mm_loadu_si128((__m128i *)((s - 3) + 5 * p));
- x6 = _mm_loadu_si128((__m128i *)((s - 3) + 6 * p));
- x7 = _mm_loadu_si128((__m128i *)((s - 3) + 7 * p));
-
- highbd_transpose8x8_sse2(&x0, &x1, &x2, &x3, &x4, &x5, &x6, &x7, &p2, &p1,
- &p0, &q0, &q1, &q2, &d6, &d7);
-
- highbd_lpf_internal_6_dual_sse2(&p2, &p1, &p0, &q0, &q1, &q2, _blimit0,
- _limit0, _thresh0, _blimit1, _limit1,
- _thresh1, bd);
-
- highbd_transpose4x8_8x4_sse2(&p1, &p0, &q0, &q1, &d0, &d1, &d2, &d3, &d4, &d5,
- &d6, &d7);
-
- _mm_storel_epi64((__m128i *)(s - 2 + 0 * p), d0);
- _mm_storel_epi64((__m128i *)(s - 2 + 1 * p), d1);
- _mm_storel_epi64((__m128i *)(s - 2 + 2 * p), d2);
- _mm_storel_epi64((__m128i *)(s - 2 + 3 * p), d3);
- _mm_storel_epi64((__m128i *)(s - 2 + 4 * p), d4);
- _mm_storel_epi64((__m128i *)(s - 2 + 5 * p), d5);
- _mm_storel_epi64((__m128i *)(s - 2 + 6 * p), d6);
- _mm_storel_epi64((__m128i *)(s - 2 + 7 * p), d7);
-}
-
-void aom_highbd_lpf_vertical_8_sse2(uint16_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
- int bd) {
- __m128i d0, d1, d2, d3, d4, d5, d6, d7;
- __m128i p2, p1, p0, p3, q0;
- __m128i q1q0, p1p0;
-
- p3 = _mm_loadu_si128((__m128i *)((s - 4) + 0 * p));
- p2 = _mm_loadu_si128((__m128i *)((s - 4) + 1 * p));
- p1 = _mm_loadu_si128((__m128i *)((s - 4) + 2 * p));
- p0 = _mm_loadu_si128((__m128i *)((s - 4) + 3 * p));
-
- highbd_transpose4x8_8x4_sse2(&p3, &p2, &p1, &p0, &d0, &d1, &d2, &d3, &d4, &d5,
- &d6, &d7);
-
- // Loop filtering
- highbd_lpf_internal_8_sse2(&d0, &d7, &d1, &d6, &d2, &d5, &d3, &d4, &q1q0,
- &p1p0, blimit, limit, thresh, bd);
-
- p0 = _mm_srli_si128(p1p0, 8);
- q0 = _mm_srli_si128(q1q0, 8);
-
- highbd_transpose8x8_low_sse2(&d0, &d1, &p0, &p1p0, &q1q0, &q0, &d6, &d7, &d0,
- &d1, &d2, &d3);
-
- _mm_storeu_si128((__m128i *)(s - 4 + 0 * p), d0);
- _mm_storeu_si128((__m128i *)(s - 4 + 1 * p), d1);
- _mm_storeu_si128((__m128i *)(s - 4 + 2 * p), d2);
- _mm_storeu_si128((__m128i *)(s - 4 + 3 * p), d3);
-}
-
-void aom_highbd_lpf_vertical_8_dual_sse2(
- uint16_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0,
- const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1,
- const uint8_t *thresh1, int bd) {
- __m128i x0, x1, x2, x3, x4, x5, x6, x7;
- __m128i d0, d1, d2, d3, d4, d5, d6, d7;
-
- x0 = _mm_loadu_si128((__m128i *)(s - 4 + 0 * p));
- x1 = _mm_loadu_si128((__m128i *)(s - 4 + 1 * p));
- x2 = _mm_loadu_si128((__m128i *)(s - 4 + 2 * p));
- x3 = _mm_loadu_si128((__m128i *)(s - 4 + 3 * p));
- x4 = _mm_loadu_si128((__m128i *)(s - 4 + 4 * p));
- x5 = _mm_loadu_si128((__m128i *)(s - 4 + 5 * p));
- x6 = _mm_loadu_si128((__m128i *)(s - 4 + 6 * p));
- x7 = _mm_loadu_si128((__m128i *)(s - 4 + 7 * p));
-
- highbd_transpose8x8_sse2(&x0, &x1, &x2, &x3, &x4, &x5, &x6, &x7, &d0, &d1,
- &d2, &d3, &d4, &d5, &d6, &d7);
-
- highbd_lpf_internal_8_dual_sse2(&d0, &d7, &d1, &d6, &d2, &d5, &d3, &d4,
- blimit0, limit0, thresh0, blimit1, limit1,
- thresh1, bd);
-
- highbd_transpose8x8_sse2(&d0, &d1, &d2, &d3, &d4, &d5, &d6, &d7, &x0, &x1,
- &x2, &x3, &x4, &x5, &x6, &x7);
-
- _mm_storeu_si128((__m128i *)(s - 4 + 0 * p), x0);
- _mm_storeu_si128((__m128i *)(s - 4 + 1 * p), x1);
- _mm_storeu_si128((__m128i *)(s - 4 + 2 * p), x2);
- _mm_storeu_si128((__m128i *)(s - 4 + 3 * p), x3);
- _mm_storeu_si128((__m128i *)(s - 4 + 4 * p), x4);
- _mm_storeu_si128((__m128i *)(s - 4 + 5 * p), x5);
- _mm_storeu_si128((__m128i *)(s - 4 + 6 * p), x6);
- _mm_storeu_si128((__m128i *)(s - 4 + 7 * p), x7);
-}
-
-void aom_highbd_lpf_vertical_14_sse2(uint16_t *s, int pitch,
- const uint8_t *blimit,
- const uint8_t *limit,
- const uint8_t *thresh, int bd) {
- __m128i q[7], p[7], pq[7];
- __m128i p6, p5, p4, p3;
- __m128i p6_2, p5_2, p4_2, p3_2;
- __m128i d0, d1, d2, d3;
- __m128i d0_2, d1_2, d2_2, d3_2, d7_2;
-
- p6 = _mm_loadu_si128((__m128i *)((s - 8) + 0 * pitch));
- p5 = _mm_loadu_si128((__m128i *)((s - 8) + 1 * pitch));
- p4 = _mm_loadu_si128((__m128i *)((s - 8) + 2 * pitch));
- p3 = _mm_loadu_si128((__m128i *)((s - 8) + 3 * pitch));
-
- highbd_transpose4x8_8x4_sse2(&p6, &p5, &p4, &p3, &d0, &p[6], &p[5], &p[4],
- &p[3], &p[2], &p[1], &p[0]);
-
- p6_2 = _mm_loadu_si128((__m128i *)(s + 0 * pitch));
- p5_2 = _mm_loadu_si128((__m128i *)(s + 1 * pitch));
- p4_2 = _mm_loadu_si128((__m128i *)(s + 2 * pitch));
- p3_2 = _mm_loadu_si128((__m128i *)(s + 3 * pitch));
-
- highbd_transpose4x8_8x4_sse2(&p6_2, &p5_2, &p4_2, &p3_2, &q[0], &q[1], &q[2],
- &q[3], &q[4], &q[5], &q[6], &d7_2);
-
- highbd_lpf_internal_14_sse2(p, q, pq, blimit, limit, thresh, bd);
-
- highbd_transpose8x8_low_sse2(&d0, &p[6], &pq[5], &pq[4], &pq[3], &pq[2],
- &pq[1], &pq[0], &d0, &d1, &d2, &d3);
-
- q[0] = _mm_srli_si128(pq[0], 8);
- q[1] = _mm_srli_si128(pq[1], 8);
- q[2] = _mm_srli_si128(pq[2], 8);
- q[3] = _mm_srli_si128(pq[3], 8);
- q[4] = _mm_srli_si128(pq[4], 8);
- q[5] = _mm_srli_si128(pq[5], 8);
-
- highbd_transpose8x8_low_sse2(&q[0], &q[1], &q[2], &q[3], &q[4], &q[5], &q[6],
- &d7_2, &d0_2, &d1_2, &d2_2, &d3_2);
-
- _mm_storeu_si128((__m128i *)(s - 8 + 0 * pitch), d0);
- _mm_storeu_si128((__m128i *)(s + 0 * pitch), d0_2);
-
- _mm_storeu_si128((__m128i *)(s - 8 + 1 * pitch), d1);
- _mm_storeu_si128((__m128i *)(s + 1 * pitch), d1_2);
-
- _mm_storeu_si128((__m128i *)(s - 8 + 2 * pitch), d2);
- _mm_storeu_si128((__m128i *)(s + 2 * pitch), d2_2);
-
- _mm_storeu_si128((__m128i *)(s - 8 + 3 * pitch), d3);
- _mm_storeu_si128((__m128i *)(s + 3 * pitch), d3_2);
-}
-
-void aom_highbd_lpf_vertical_14_dual_sse2(
- uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0,
- const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1,
- const uint8_t *thresh1, int bd) {
- __m128i q[7], p[7];
- __m128i p6, p5, p4, p3, p2, p1, p0, q0;
- __m128i p6_2, p5_2, p4_2, p3_2, p2_2, p1_2, q0_2, p0_2;
- __m128i d0, d7;
- __m128i d0_out, d1_out, d2_out, d3_out, d4_out, d5_out, d6_out, d7_out;
-
- p6 = _mm_loadu_si128((__m128i *)((s - 8) + 0 * pitch));
- p5 = _mm_loadu_si128((__m128i *)((s - 8) + 1 * pitch));
- p4 = _mm_loadu_si128((__m128i *)((s - 8) + 2 * pitch));
- p3 = _mm_loadu_si128((__m128i *)((s - 8) + 3 * pitch));
- p2 = _mm_loadu_si128((__m128i *)((s - 8) + 4 * pitch));
- p1 = _mm_loadu_si128((__m128i *)((s - 8) + 5 * pitch));
- p0 = _mm_loadu_si128((__m128i *)((s - 8) + 6 * pitch));
- q0 = _mm_loadu_si128((__m128i *)((s - 8) + 7 * pitch));
-
- highbd_transpose8x8_sse2(&p6, &p5, &p4, &p3, &p2, &p1, &p0, &q0, &d0, &p[6],
- &p[5], &p[4], &p[3], &p[2], &p[1], &p[0]);
-
- p6_2 = _mm_loadu_si128((__m128i *)(s + 0 * pitch));
- p5_2 = _mm_loadu_si128((__m128i *)(s + 1 * pitch));
- p4_2 = _mm_loadu_si128((__m128i *)(s + 2 * pitch));
- p3_2 = _mm_loadu_si128((__m128i *)(s + 3 * pitch));
- p2_2 = _mm_loadu_si128((__m128i *)(s + 4 * pitch));
- p1_2 = _mm_loadu_si128((__m128i *)(s + 5 * pitch));
- p0_2 = _mm_loadu_si128((__m128i *)(s + 6 * pitch));
- q0_2 = _mm_loadu_si128((__m128i *)(s + 7 * pitch));
-
- highbd_transpose8x8_sse2(&p6_2, &p5_2, &p4_2, &p3_2, &p2_2, &p1_2, &p0_2,
- &q0_2, &q[0], &q[1], &q[2], &q[3], &q[4], &q[5],
- &q[6], &d7);
-
- highbd_lpf_internal_14_dual_sse2(p, q, blimit0, limit0, thresh0, blimit1,
- limit1, thresh1, bd);
-
- highbd_transpose8x8_sse2(&d0, &p[6], &p[5], &p[4], &p[3], &p[2], &p[1], &p[0],
- &d0_out, &d1_out, &d2_out, &d3_out, &d4_out, &d5_out,
- &d6_out, &d7_out);
-
- _mm_storeu_si128((__m128i *)(s - 8 + 0 * pitch), d0_out);
- _mm_storeu_si128((__m128i *)(s - 8 + 1 * pitch), d1_out);
- _mm_storeu_si128((__m128i *)(s - 8 + 2 * pitch), d2_out);
- _mm_storeu_si128((__m128i *)(s - 8 + 3 * pitch), d3_out);
- _mm_storeu_si128((__m128i *)(s - 8 + 4 * pitch), d4_out);
- _mm_storeu_si128((__m128i *)(s - 8 + 5 * pitch), d5_out);
- _mm_storeu_si128((__m128i *)(s - 8 + 6 * pitch), d6_out);
- _mm_storeu_si128((__m128i *)(s - 8 + 7 * pitch), d7_out);
-
- highbd_transpose8x8_sse2(&q[0], &q[1], &q[2], &q[3], &q[4], &q[5], &q[6], &d7,
- &d0_out, &d1_out, &d2_out, &d3_out, &d4_out, &d5_out,
- &d6_out, &d7_out);
-
- _mm_storeu_si128((__m128i *)(s + 0 * pitch), d0_out);
- _mm_storeu_si128((__m128i *)(s + 1 * pitch), d1_out);
- _mm_storeu_si128((__m128i *)(s + 2 * pitch), d2_out);
- _mm_storeu_si128((__m128i *)(s + 3 * pitch), d3_out);
- _mm_storeu_si128((__m128i *)(s + 4 * pitch), d4_out);
- _mm_storeu_si128((__m128i *)(s + 5 * pitch), d5_out);
- _mm_storeu_si128((__m128i *)(s + 6 * pitch), d6_out);
- _mm_storeu_si128((__m128i *)(s + 7 * pitch), d7_out);
-}
diff --git a/third_party/aom/aom_dsp/x86/highbd_quantize_intrin_avx2.c b/third_party/aom/aom_dsp/x86/highbd_quantize_intrin_avx2.c
deleted file mode 100644
index b9689202a..000000000
--- a/third_party/aom/aom_dsp/x86/highbd_quantize_intrin_avx2.c
+++ /dev/null
@@ -1,160 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <immintrin.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom/aom_integer.h"
-
-static INLINE void init_one_qp(const __m128i *p, __m256i *qp) {
- const __m128i sign = _mm_srai_epi16(*p, 15);
- const __m128i dc = _mm_unpacklo_epi16(*p, sign);
- const __m128i ac = _mm_unpackhi_epi16(*p, sign);
- *qp = _mm256_insertf128_si256(_mm256_castsi128_si256(dc), ac, 1);
-}
-
-static INLINE void update_qp(__m256i *qp) {
- int i;
- for (i = 0; i < 5; ++i) {
- qp[i] = _mm256_permute2x128_si256(qp[i], qp[i], 0x11);
- }
-}
-
-static INLINE void init_qp(const int16_t *zbin_ptr, const int16_t *round_ptr,
- const int16_t *quant_ptr, const int16_t *dequant_ptr,
- const int16_t *quant_shift_ptr, __m256i *qp) {
- const __m128i zbin = _mm_loadu_si128((const __m128i *)zbin_ptr);
- const __m128i round = _mm_loadu_si128((const __m128i *)round_ptr);
- const __m128i quant = _mm_loadu_si128((const __m128i *)quant_ptr);
- const __m128i dequant = _mm_loadu_si128((const __m128i *)dequant_ptr);
- const __m128i quant_shift = _mm_loadu_si128((const __m128i *)quant_shift_ptr);
- init_one_qp(&zbin, &qp[0]);
- init_one_qp(&round, &qp[1]);
- init_one_qp(&quant, &qp[2]);
- init_one_qp(&dequant, &qp[3]);
- init_one_qp(&quant_shift, &qp[4]);
-}
-
-// Note:
-// *x is vector multiplied by *y which is 16 int32_t parallel multiplication
-// and right shift 16. The output, 16 int32_t is save in *p.
-static INLINE void mm256_mul_shift_epi32(const __m256i *x, const __m256i *y,
- __m256i *p) {
- __m256i prod_lo = _mm256_mul_epi32(*x, *y);
- __m256i prod_hi = _mm256_srli_epi64(*x, 32);
- const __m256i mult_hi = _mm256_srli_epi64(*y, 32);
- prod_hi = _mm256_mul_epi32(prod_hi, mult_hi);
-
- prod_lo = _mm256_srli_epi64(prod_lo, 16);
- const __m256i mask = _mm256_set_epi32(0, -1, 0, -1, 0, -1, 0, -1);
- prod_lo = _mm256_and_si256(prod_lo, mask);
- prod_hi = _mm256_srli_epi64(prod_hi, 16);
-
- prod_hi = _mm256_slli_epi64(prod_hi, 32);
- *p = _mm256_or_si256(prod_lo, prod_hi);
-}
-
-static INLINE void quantize(const __m256i *qp, __m256i *c,
- const int16_t *iscan_ptr, tran_low_t *qcoeff,
- tran_low_t *dqcoeff, __m256i *eob) {
- const __m256i abs = _mm256_abs_epi32(*c);
- const __m256i flag1 = _mm256_cmpgt_epi32(abs, qp[0]);
- __m256i flag2 = _mm256_cmpeq_epi32(abs, qp[0]);
- flag2 = _mm256_or_si256(flag1, flag2);
- const int32_t nzflag = _mm256_movemask_epi8(flag2);
-
- if (LIKELY(nzflag)) {
- __m256i q = _mm256_add_epi32(abs, qp[1]);
- __m256i tmp;
- mm256_mul_shift_epi32(&q, &qp[2], &tmp);
- q = _mm256_add_epi32(tmp, q);
-
- mm256_mul_shift_epi32(&q, &qp[4], &q);
- __m256i dq = _mm256_mullo_epi32(q, qp[3]);
-
- q = _mm256_sign_epi32(q, *c);
- dq = _mm256_sign_epi32(dq, *c);
- q = _mm256_and_si256(q, flag2);
- dq = _mm256_and_si256(dq, flag2);
-
- _mm256_storeu_si256((__m256i *)qcoeff, q);
- _mm256_storeu_si256((__m256i *)dqcoeff, dq);
-
- const __m128i isc = _mm_loadu_si128((const __m128i *)iscan_ptr);
- const __m128i zr = _mm_setzero_si128();
- const __m128i lo = _mm_unpacklo_epi16(isc, zr);
- const __m128i hi = _mm_unpackhi_epi16(isc, zr);
- const __m256i iscan =
- _mm256_insertf128_si256(_mm256_castsi128_si256(lo), hi, 1);
-
- const __m256i zero = _mm256_setzero_si256();
- const __m256i zc = _mm256_cmpeq_epi32(dq, zero);
- const __m256i nz = _mm256_cmpeq_epi32(zc, zero);
- __m256i cur_eob = _mm256_sub_epi32(iscan, nz);
- cur_eob = _mm256_and_si256(cur_eob, nz);
- *eob = _mm256_max_epi32(cur_eob, *eob);
- } else {
- const __m256i zero = _mm256_setzero_si256();
- _mm256_storeu_si256((__m256i *)qcoeff, zero);
- _mm256_storeu_si256((__m256i *)dqcoeff, zero);
- }
-}
-
-void aom_highbd_quantize_b_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
- const int16_t *zbin_ptr,
- const int16_t *round_ptr,
- const int16_t *quant_ptr,
- const int16_t *quant_shift_ptr,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan) {
- (void)scan;
- const unsigned int step = 8;
-
- __m256i qp[5], coeff;
- init_qp(zbin_ptr, round_ptr, quant_ptr, dequant_ptr, quant_shift_ptr, qp);
- coeff = _mm256_loadu_si256((const __m256i *)coeff_ptr);
-
- __m256i eob = _mm256_setzero_si256();
- quantize(qp, &coeff, iscan, qcoeff_ptr, dqcoeff_ptr, &eob);
-
- coeff_ptr += step;
- qcoeff_ptr += step;
- dqcoeff_ptr += step;
- iscan += step;
- n_coeffs -= step;
-
- update_qp(qp);
-
- while (n_coeffs > 0) {
- coeff = _mm256_loadu_si256((const __m256i *)coeff_ptr);
- quantize(qp, &coeff, iscan, qcoeff_ptr, dqcoeff_ptr, &eob);
-
- coeff_ptr += step;
- qcoeff_ptr += step;
- dqcoeff_ptr += step;
- iscan += step;
- n_coeffs -= step;
- }
- {
- __m256i eob_s;
- eob_s = _mm256_shuffle_epi32(eob, 0xe);
- eob = _mm256_max_epi16(eob, eob_s);
- eob_s = _mm256_shufflelo_epi16(eob, 0xe);
- eob = _mm256_max_epi16(eob, eob_s);
- eob_s = _mm256_shufflelo_epi16(eob, 1);
- eob = _mm256_max_epi16(eob, eob_s);
- const __m128i final_eob = _mm_max_epi16(_mm256_castsi256_si128(eob),
- _mm256_extractf128_si256(eob, 1));
- *eob_ptr = _mm_extract_epi16(final_eob, 0);
- }
-}
diff --git a/third_party/aom/aom_dsp/x86/highbd_quantize_intrin_sse2.c b/third_party/aom/aom_dsp/x86/highbd_quantize_intrin_sse2.c
deleted file mode 100644
index 58e5f98e5..000000000
--- a/third_party/aom/aom_dsp/x86/highbd_quantize_intrin_sse2.c
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <emmintrin.h>
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_mem/aom_mem.h"
-#include "aom_ports/mem.h"
-
-void aom_highbd_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t count,
- const int16_t *zbin_ptr,
- const int16_t *round_ptr,
- const int16_t *quant_ptr,
- const int16_t *quant_shift_ptr,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan) {
- int i, j, non_zero_regs = (int)count / 4, eob_i = -1;
- __m128i zbins[2];
- __m128i nzbins[2];
-
- zbins[0] = _mm_set_epi32((int)zbin_ptr[1], (int)zbin_ptr[1], (int)zbin_ptr[1],
- (int)zbin_ptr[0]);
- zbins[1] = _mm_set1_epi32((int)zbin_ptr[1]);
-
- nzbins[0] = _mm_setzero_si128();
- nzbins[1] = _mm_setzero_si128();
- nzbins[0] = _mm_sub_epi32(nzbins[0], zbins[0]);
- nzbins[1] = _mm_sub_epi32(nzbins[1], zbins[1]);
-
- (void)scan;
-
- memset(qcoeff_ptr, 0, count * sizeof(*qcoeff_ptr));
- memset(dqcoeff_ptr, 0, count * sizeof(*dqcoeff_ptr));
-
- // Pre-scan pass
- for (i = ((int)count / 4) - 1; i >= 0; i--) {
- __m128i coeffs, cmp1, cmp2;
- int test;
- coeffs = _mm_load_si128((const __m128i *)(coeff_ptr + i * 4));
- cmp1 = _mm_cmplt_epi32(coeffs, zbins[i != 0]);
- cmp2 = _mm_cmpgt_epi32(coeffs, nzbins[i != 0]);
- cmp1 = _mm_and_si128(cmp1, cmp2);
- test = _mm_movemask_epi8(cmp1);
- if (test == 0xffff)
- non_zero_regs--;
- else
- break;
- }
-
- // Quantization pass:
- for (i = 0; i < non_zero_regs; i++) {
- __m128i coeffs, coeffs_sign, tmp1, tmp2;
- int test;
- int abs_coeff[4];
- int coeff_sign[4];
-
- coeffs = _mm_load_si128((const __m128i *)(coeff_ptr + i * 4));
- coeffs_sign = _mm_srai_epi32(coeffs, 31);
- coeffs = _mm_sub_epi32(_mm_xor_si128(coeffs, coeffs_sign), coeffs_sign);
- tmp1 = _mm_cmpgt_epi32(coeffs, zbins[i != 0]);
- tmp2 = _mm_cmpeq_epi32(coeffs, zbins[i != 0]);
- tmp1 = _mm_or_si128(tmp1, tmp2);
- test = _mm_movemask_epi8(tmp1);
- _mm_storeu_si128((__m128i *)abs_coeff, coeffs);
- _mm_storeu_si128((__m128i *)coeff_sign, coeffs_sign);
-
- for (j = 0; j < 4; j++) {
- if (test & (1 << (4 * j))) {
- int k = 4 * i + j;
- const int64_t tmp3 = abs_coeff[j] + round_ptr[k != 0];
- const int64_t tmp4 = ((tmp3 * quant_ptr[k != 0]) >> 16) + tmp3;
- const uint32_t abs_qcoeff =
- (uint32_t)((tmp4 * quant_shift_ptr[k != 0]) >> 16);
- qcoeff_ptr[k] = (int)(abs_qcoeff ^ coeff_sign[j]) - coeff_sign[j];
- dqcoeff_ptr[k] = qcoeff_ptr[k] * dequant_ptr[k != 0];
- if (abs_qcoeff) eob_i = iscan[k] > eob_i ? iscan[k] : eob_i;
- }
- }
- }
- *eob_ptr = eob_i + 1;
-}
-
-void aom_highbd_quantize_b_32x32_sse2(
- const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
- const int16_t *round_ptr, const int16_t *quant_ptr,
- const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
- tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan) {
- __m128i zbins[2];
- __m128i nzbins[2];
- int idx = 0;
- int idx_arr[1024];
- int i, eob = -1;
- const int zbin0_tmp = ROUND_POWER_OF_TWO(zbin_ptr[0], 1);
- const int zbin1_tmp = ROUND_POWER_OF_TWO(zbin_ptr[1], 1);
- (void)scan;
- zbins[0] = _mm_set_epi32(zbin1_tmp, zbin1_tmp, zbin1_tmp, zbin0_tmp);
- zbins[1] = _mm_set1_epi32(zbin1_tmp);
-
- nzbins[0] = _mm_setzero_si128();
- nzbins[1] = _mm_setzero_si128();
- nzbins[0] = _mm_sub_epi32(nzbins[0], zbins[0]);
- nzbins[1] = _mm_sub_epi32(nzbins[1], zbins[1]);
-
- memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
- memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
- // Pre-scan pass
- for (i = 0; i < n_coeffs / 4; i++) {
- __m128i coeffs, cmp1, cmp2;
- int test;
- coeffs = _mm_load_si128((const __m128i *)(coeff_ptr + i * 4));
- cmp1 = _mm_cmplt_epi32(coeffs, zbins[i != 0]);
- cmp2 = _mm_cmpgt_epi32(coeffs, nzbins[i != 0]);
- cmp1 = _mm_and_si128(cmp1, cmp2);
- test = _mm_movemask_epi8(cmp1);
- if (!(test & 0xf)) idx_arr[idx++] = i * 4;
- if (!(test & 0xf0)) idx_arr[idx++] = i * 4 + 1;
- if (!(test & 0xf00)) idx_arr[idx++] = i * 4 + 2;
- if (!(test & 0xf000)) idx_arr[idx++] = i * 4 + 3;
- }
-
- // Quantization pass: only process the coefficients selected in
- // pre-scan pass. Note: idx can be zero.
- for (i = 0; i < idx; i++) {
- const int rc = idx_arr[i];
- const int coeff = coeff_ptr[rc];
- const int coeff_sign = (coeff >> 31);
- const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
- const int64_t tmp1 = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
- const int64_t tmp2 = ((tmp1 * quant_ptr[rc != 0]) >> 16) + tmp1;
- const uint32_t abs_qcoeff =
- (uint32_t)((tmp2 * quant_shift_ptr[rc != 0]) >> 15);
- qcoeff_ptr[rc] = (int)(abs_qcoeff ^ coeff_sign) - coeff_sign;
- dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
- if (abs_qcoeff) eob = iscan[idx_arr[i]] > eob ? iscan[idx_arr[i]] : eob;
- }
- *eob_ptr = eob + 1;
-}
diff --git a/third_party/aom/aom_dsp/x86/highbd_sad4d_sse2.asm b/third_party/aom/aom_dsp/x86/highbd_sad4d_sse2.asm
deleted file mode 100644
index e0d22522d..000000000
--- a/third_party/aom/aom_dsp/x86/highbd_sad4d_sse2.asm
+++ /dev/null
@@ -1,296 +0,0 @@
-;
-; Copyright (c) 2016, Alliance for Open Media. All rights reserved
-;
-; This source code is subject to the terms of the BSD 2 Clause License and
-; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-; was not distributed with this source code in the LICENSE file, you can
-; obtain it at www.aomedia.org/license/software. If the Alliance for Open
-; Media Patent License 1.0 was not distributed with this source code in the
-; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-;
-
-;
-
-%include "third_party/x86inc/x86inc.asm"
-
-SECTION .text
-
-; HIGH_PROCESS_4x2x4 first, off_{first,second}_{src,ref}, advance_at_end
-%macro HIGH_PROCESS_4x2x4 5-6 0
- movh m0, [srcq +%2*2]
-%if %1 == 1
- movu m4, [ref1q+%3*2]
- movu m5, [ref2q+%3*2]
- movu m6, [ref3q+%3*2]
- movu m7, [ref4q+%3*2]
- movhps m0, [srcq +%4*2]
- movhps m4, [ref1q+%5*2]
- movhps m5, [ref2q+%5*2]
- movhps m6, [ref3q+%5*2]
- movhps m7, [ref4q+%5*2]
- mova m3, m0
- mova m2, m0
- psubusw m3, m4
- psubusw m2, m5
- psubusw m4, m0
- psubusw m5, m0
- por m4, m3
- por m5, m2
- pmaddwd m4, m1
- pmaddwd m5, m1
- mova m3, m0
- mova m2, m0
- psubusw m3, m6
- psubusw m2, m7
- psubusw m6, m0
- psubusw m7, m0
- por m6, m3
- por m7, m2
- pmaddwd m6, m1
- pmaddwd m7, m1
-%else
- movu m2, [ref1q+%3*2]
- movhps m0, [srcq +%4*2]
- movhps m2, [ref1q+%5*2]
- mova m3, m0
- psubusw m3, m2
- psubusw m2, m0
- por m2, m3
- pmaddwd m2, m1
- paddd m4, m2
-
- movu m2, [ref2q+%3*2]
- mova m3, m0
- movhps m2, [ref2q+%5*2]
- psubusw m3, m2
- psubusw m2, m0
- por m2, m3
- pmaddwd m2, m1
- paddd m5, m2
-
- movu m2, [ref3q+%3*2]
- mova m3, m0
- movhps m2, [ref3q+%5*2]
- psubusw m3, m2
- psubusw m2, m0
- por m2, m3
- pmaddwd m2, m1
- paddd m6, m2
-
- movu m2, [ref4q+%3*2]
- mova m3, m0
- movhps m2, [ref4q+%5*2]
- psubusw m3, m2
- psubusw m2, m0
- por m2, m3
- pmaddwd m2, m1
- paddd m7, m2
-%endif
-%if %6 == 1
- lea srcq, [srcq +src_strideq*4]
- lea ref1q, [ref1q+ref_strideq*4]
- lea ref2q, [ref2q+ref_strideq*4]
- lea ref3q, [ref3q+ref_strideq*4]
- lea ref4q, [ref4q+ref_strideq*4]
-%endif
-%endmacro
-
-; PROCESS_8x2x4 first, off_{first,second}_{src,ref}, advance_at_end
-%macro HIGH_PROCESS_8x2x4 5-6 0
- ; 1st 8 px
- mova m0, [srcq +%2*2]
-%if %1 == 1
- movu m4, [ref1q+%3*2]
- movu m5, [ref2q+%3*2]
- movu m6, [ref3q+%3*2]
- movu m7, [ref4q+%3*2]
- mova m3, m0
- mova m2, m0
- psubusw m3, m4
- psubusw m2, m5
- psubusw m4, m0
- psubusw m5, m0
- por m4, m3
- por m5, m2
- pmaddwd m4, m1
- pmaddwd m5, m1
- mova m3, m0
- mova m2, m0
- psubusw m3, m6
- psubusw m2, m7
- psubusw m6, m0
- psubusw m7, m0
- por m6, m3
- por m7, m2
- pmaddwd m6, m1
- pmaddwd m7, m1
-%else
- mova m3, m0
- movu m2, [ref1q+%3*2]
- psubusw m3, m2
- psubusw m2, m0
- por m2, m3
- mova m3, m0
- pmaddwd m2, m1
- paddd m4, m2
- movu m2, [ref2q+%3*2]
- psubusw m3, m2
- psubusw m2, m0
- por m2, m3
- mova m3, m0
- pmaddwd m2, m1
- paddd m5, m2
- movu m2, [ref3q+%3*2]
- psubusw m3, m2
- psubusw m2, m0
- por m2, m3
- mova m3, m0
- pmaddwd m2, m1
- paddd m6, m2
- movu m2, [ref4q+%3*2]
- psubusw m3, m2
- psubusw m2, m0
- por m2, m3
- pmaddwd m2, m1
- paddd m7, m2
-%endif
-
- ; 2nd 8 px
- mova m0, [srcq +(%4)*2]
- mova m3, m0
- movu m2, [ref1q+(%5)*2]
- psubusw m3, m2
- psubusw m2, m0
- por m2, m3
- mova m3, m0
- pmaddwd m2, m1
- paddd m4, m2
- movu m2, [ref2q+(%5)*2]
- psubusw m3, m2
- psubusw m2, m0
- por m2, m3
- mova m3, m0
- pmaddwd m2, m1
- paddd m5, m2
- movu m2, [ref3q+(%5)*2]
- psubusw m3, m2
- psubusw m2, m0
- por m2, m3
- mova m3, m0
- pmaddwd m2, m1
- paddd m6, m2
- movu m2, [ref4q+(%5)*2]
- psubusw m3, m2
- psubusw m2, m0
-%if %6 == 1
- lea srcq, [srcq +src_strideq*4]
- lea ref1q, [ref1q+ref_strideq*4]
- lea ref2q, [ref2q+ref_strideq*4]
- lea ref3q, [ref3q+ref_strideq*4]
- lea ref4q, [ref4q+ref_strideq*4]
-%endif
- por m2, m3
- pmaddwd m2, m1
- paddd m7, m2
-%endmacro
-
-; HIGH_PROCESS_16x2x4 first, off_{first,second}_{src,ref}, advance_at_end
-%macro HIGH_PROCESS_16x2x4 5-6 0
- HIGH_PROCESS_8x2x4 %1, %2, %3, (%2 + 8), (%3 + 8)
- HIGH_PROCESS_8x2x4 0, %4, %5, (%4 + 8), (%5 + 8), %6
-%endmacro
-
-; HIGH_PROCESS_32x2x4 first, off_{first,second}_{src,ref}, advance_at_end
-%macro HIGH_PROCESS_32x2x4 5-6 0
- HIGH_PROCESS_16x2x4 %1, %2, %3, (%2 + 16), (%3 + 16)
- HIGH_PROCESS_16x2x4 0, %4, %5, (%4 + 16), (%5 + 16), %6
-%endmacro
-
-; HIGH_PROCESS_64x2x4 first, off_{first,second}_{src,ref}, advance_at_end
-%macro HIGH_PROCESS_64x2x4 5-6 0
- HIGH_PROCESS_32x2x4 %1, %2, %3, (%2 + 32), (%3 + 32)
- HIGH_PROCESS_32x2x4 0, %4, %5, (%4 + 32), (%5 + 32), %6
-%endmacro
-
-; void aom_highbd_sadNxNx4d_sse2(uint8_t *src, int src_stride,
-; uint8_t *ref[4], int ref_stride,
-; uint32_t res[4]);
-; where NxN = 64x64, 32x32, 16x16, 16x8, 8x16 or 8x8
-%macro HIGH_SADNXN4D 2
-%if UNIX64
-cglobal highbd_sad%1x%2x4d, 5, 8, 8, src, src_stride, ref1, ref_stride, \
- res, ref2, ref3, ref4
-%else
-cglobal highbd_sad%1x%2x4d, 4, 7, 8, src, src_stride, ref1, ref_stride, \
- ref2, ref3, ref4
-%endif
-
-; set m1
- push srcq
- mov srcd, 0x00010001
- movd m1, srcd
- pshufd m1, m1, 0x0
- pop srcq
-
- movsxdifnidn src_strideq, src_strided
- movsxdifnidn ref_strideq, ref_strided
- mov ref2q, [ref1q+gprsize*1]
- mov ref3q, [ref1q+gprsize*2]
- mov ref4q, [ref1q+gprsize*3]
- mov ref1q, [ref1q+gprsize*0]
-
-; convert byte pointers to short pointers
- shl srcq, 1
- shl ref2q, 1
- shl ref3q, 1
- shl ref4q, 1
- shl ref1q, 1
-
- HIGH_PROCESS_%1x2x4 1, 0, 0, src_strideq, ref_strideq, 1
-%rep (%2-4)/2
- HIGH_PROCESS_%1x2x4 0, 0, 0, src_strideq, ref_strideq, 1
-%endrep
- HIGH_PROCESS_%1x2x4 0, 0, 0, src_strideq, ref_strideq, 0
- ; N.B. HIGH_PROCESS outputs dwords (32 bits)
- ; so in high bit depth even the smallest width (4) needs 128bits i.e. XMM
- movhlps m0, m4
- movhlps m1, m5
- movhlps m2, m6
- movhlps m3, m7
- paddd m4, m0
- paddd m5, m1
- paddd m6, m2
- paddd m7, m3
- punpckldq m4, m5
- punpckldq m6, m7
- movhlps m0, m4
- movhlps m1, m6
- paddd m4, m0
- paddd m6, m1
- punpcklqdq m4, m6
- movifnidn r4, r4mp
- movu [r4], m4
- RET
-%endmacro
-
-
-INIT_XMM sse2
-HIGH_SADNXN4D 64, 64
-HIGH_SADNXN4D 64, 32
-HIGH_SADNXN4D 32, 64
-HIGH_SADNXN4D 32, 32
-HIGH_SADNXN4D 32, 16
-HIGH_SADNXN4D 16, 32
-HIGH_SADNXN4D 16, 16
-HIGH_SADNXN4D 16, 8
-HIGH_SADNXN4D 8, 16
-HIGH_SADNXN4D 8, 8
-HIGH_SADNXN4D 8, 4
-HIGH_SADNXN4D 4, 8
-HIGH_SADNXN4D 4, 4
-HIGH_SADNXN4D 4, 16
-HIGH_SADNXN4D 16, 4
-HIGH_SADNXN4D 8, 32
-HIGH_SADNXN4D 32, 8
-HIGH_SADNXN4D 16, 64
-HIGH_SADNXN4D 64, 16
diff --git a/third_party/aom/aom_dsp/x86/highbd_sad_sse2.asm b/third_party/aom/aom_dsp/x86/highbd_sad_sse2.asm
deleted file mode 100644
index 3398d8a2a..000000000
--- a/third_party/aom/aom_dsp/x86/highbd_sad_sse2.asm
+++ /dev/null
@@ -1,374 +0,0 @@
-;
-; Copyright (c) 2016, Alliance for Open Media. All rights reserved
-;
-; This source code is subject to the terms of the BSD 2 Clause License and
-; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-; was not distributed with this source code in the LICENSE file, you can
-; obtain it at www.aomedia.org/license/software. If the Alliance for Open
-; Media Patent License 1.0 was not distributed with this source code in the
-; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-;
-
-;
-
-%include "third_party/x86inc/x86inc.asm"
-
-SECTION .text
-
-%macro HIGH_SAD_FN 4
-%if %4 == 0
-%if %3 == 5
-cglobal highbd_sad%1x%2, 4, %3, 7, src, src_stride, ref, ref_stride, n_rows
-%else ; %3 == 7
-cglobal highbd_sad%1x%2, 4, %3, 7, src, src_stride, ref, ref_stride, \
- src_stride3, ref_stride3, n_rows
-%endif ; %3 == 5/7
-%else ; avg
-%if %3 == 5
-cglobal highbd_sad%1x%2_avg, 5, 1 + %3, 7, src, src_stride, ref, ref_stride, \
- second_pred, n_rows
-%else ; %3 == 7
-cglobal highbd_sad%1x%2_avg, 5, ARCH_X86_64 + %3, 7, src, src_stride, \
- ref, ref_stride, \
- second_pred, \
- src_stride3, ref_stride3
-%if ARCH_X86_64
-%define n_rowsd r7d
-%else ; x86-32
-%define n_rowsd dword r0m
-%endif ; x86-32/64
-%endif ; %3 == 5/7
-%endif ; avg/sad
- movsxdifnidn src_strideq, src_strided
- movsxdifnidn ref_strideq, ref_strided
-%if %3 == 7
- lea src_stride3q, [src_strideq*3]
- lea ref_stride3q, [ref_strideq*3]
-%endif ; %3 == 7
-; convert src, ref & second_pred to short ptrs (from byte ptrs)
- shl srcq, 1
- shl refq, 1
-%if %4 == 1
- shl second_predq, 1
-%endif
-%endmacro
-
-; unsigned int aom_highbd_sad64x{16,32,64}_sse2(uint8_t *src, int src_stride,
-; uint8_t *ref, int ref_stride);
-%macro HIGH_SAD64XN 1-2 0
- HIGH_SAD_FN 64, %1, 5, %2
- mov n_rowsd, %1
- pxor m0, m0
- pxor m6, m6
-
-.loop:
- ; first half of each row
- movu m1, [refq]
- movu m2, [refq+16]
- movu m3, [refq+32]
- movu m4, [refq+48]
-%if %2 == 1
- pavgw m1, [second_predq+mmsize*0]
- pavgw m2, [second_predq+mmsize*1]
- pavgw m3, [second_predq+mmsize*2]
- pavgw m4, [second_predq+mmsize*3]
- lea second_predq, [second_predq+mmsize*4]
-%endif
- mova m5, [srcq]
- psubusw m5, m1
- psubusw m1, [srcq]
- por m1, m5
- mova m5, [srcq+16]
- psubusw m5, m2
- psubusw m2, [srcq+16]
- por m2, m5
- mova m5, [srcq+32]
- psubusw m5, m3
- psubusw m3, [srcq+32]
- por m3, m5
- mova m5, [srcq+48]
- psubusw m5, m4
- psubusw m4, [srcq+48]
- por m4, m5
- paddw m1, m2
- paddw m3, m4
- movhlps m2, m1
- movhlps m4, m3
- paddw m1, m2
- paddw m3, m4
- punpcklwd m1, m6
- punpcklwd m3, m6
- paddd m0, m1
- paddd m0, m3
- ; second half of each row
- movu m1, [refq+64]
- movu m2, [refq+80]
- movu m3, [refq+96]
- movu m4, [refq+112]
-%if %2 == 1
- pavgw m1, [second_predq+mmsize*0]
- pavgw m2, [second_predq+mmsize*1]
- pavgw m3, [second_predq+mmsize*2]
- pavgw m4, [second_predq+mmsize*3]
- lea second_predq, [second_predq+mmsize*4]
-%endif
- mova m5, [srcq+64]
- psubusw m5, m1
- psubusw m1, [srcq+64]
- por m1, m5
- mova m5, [srcq+80]
- psubusw m5, m2
- psubusw m2, [srcq+80]
- por m2, m5
- mova m5, [srcq+96]
- psubusw m5, m3
- psubusw m3, [srcq+96]
- por m3, m5
- mova m5, [srcq+112]
- psubusw m5, m4
- psubusw m4, [srcq+112]
- por m4, m5
- paddw m1, m2
- paddw m3, m4
- movhlps m2, m1
- movhlps m4, m3
- paddw m1, m2
- paddw m3, m4
- punpcklwd m1, m6
- punpcklwd m3, m6
- lea refq, [refq+ref_strideq*2]
- paddd m0, m1
- lea srcq, [srcq+src_strideq*2]
- paddd m0, m3
-
- dec n_rowsd
- jg .loop
-
- movhlps m1, m0
- paddd m0, m1
- punpckldq m0, m6
- movhlps m1, m0
- paddd m0, m1
- movd eax, m0
- RET
-%endmacro
-
-INIT_XMM sse2
-HIGH_SAD64XN 64 ; highbd_sad64x64_sse2
-HIGH_SAD64XN 32 ; highbd_sad64x32_sse2
-HIGH_SAD64XN 64, 1 ; highbd_sad64x64_avg_sse2
-HIGH_SAD64XN 32, 1 ; highbd_sad64x32_avg_sse2
-HIGH_SAD64XN 16 ; highbd_sad_64x16_sse2
-HIGH_SAD64XN 16, 1 ; highbd_sad_64x16_avg_sse2
-
-; unsigned int aom_highbd_sad32x{16,32,64}_sse2(uint8_t *src, int src_stride,
-; uint8_t *ref, int ref_stride);
-%macro HIGH_SAD32XN 1-2 0
- HIGH_SAD_FN 32, %1, 5, %2
- mov n_rowsd, %1
- pxor m0, m0
- pxor m6, m6
-
-.loop:
- movu m1, [refq]
- movu m2, [refq+16]
- movu m3, [refq+32]
- movu m4, [refq+48]
-%if %2 == 1
- pavgw m1, [second_predq+mmsize*0]
- pavgw m2, [second_predq+mmsize*1]
- pavgw m3, [second_predq+mmsize*2]
- pavgw m4, [second_predq+mmsize*3]
- lea second_predq, [second_predq+mmsize*4]
-%endif
- mova m5, [srcq]
- psubusw m5, m1
- psubusw m1, [srcq]
- por m1, m5
- mova m5, [srcq+16]
- psubusw m5, m2
- psubusw m2, [srcq+16]
- por m2, m5
- mova m5, [srcq+32]
- psubusw m5, m3
- psubusw m3, [srcq+32]
- por m3, m5
- mova m5, [srcq+48]
- psubusw m5, m4
- psubusw m4, [srcq+48]
- por m4, m5
- paddw m1, m2
- paddw m3, m4
- movhlps m2, m1
- movhlps m4, m3
- paddw m1, m2
- paddw m3, m4
- punpcklwd m1, m6
- punpcklwd m3, m6
- lea refq, [refq+ref_strideq*2]
- paddd m0, m1
- lea srcq, [srcq+src_strideq*2]
- paddd m0, m3
- dec n_rowsd
- jg .loop
-
- movhlps m1, m0
- paddd m0, m1
- punpckldq m0, m6
- movhlps m1, m0
- paddd m0, m1
- movd eax, m0
- RET
-%endmacro
-
-INIT_XMM sse2
-HIGH_SAD32XN 64 ; highbd_sad32x64_sse2
-HIGH_SAD32XN 32 ; highbd_sad32x32_sse2
-HIGH_SAD32XN 16 ; highbd_sad32x16_sse2
-HIGH_SAD32XN 64, 1 ; highbd_sad32x64_avg_sse2
-HIGH_SAD32XN 32, 1 ; highbd_sad32x32_avg_sse2
-HIGH_SAD32XN 16, 1 ; highbd_sad32x16_avg_sse2
-HIGH_SAD32XN 8 ; highbd_sad_32x8_sse2
-HIGH_SAD32XN 8, 1 ; highbd_sad_32x8_avg_sse2
-
-; unsigned int aom_highbd_sad16x{8,16,32}_sse2(uint8_t *src, int src_stride,
-; uint8_t *ref, int ref_stride);
-%macro HIGH_SAD16XN 1-2 0
- HIGH_SAD_FN 16, %1, 5, %2
- mov n_rowsd, %1/2
- pxor m0, m0
- pxor m6, m6
-
-.loop:
- movu m1, [refq]
- movu m2, [refq+16]
- movu m3, [refq+ref_strideq*2]
- movu m4, [refq+ref_strideq*2+16]
-%if %2 == 1
- pavgw m1, [second_predq+mmsize*0]
- pavgw m2, [second_predq+16]
- pavgw m3, [second_predq+mmsize*2]
- pavgw m4, [second_predq+mmsize*2+16]
- lea second_predq, [second_predq+mmsize*4]
-%endif
- mova m5, [srcq]
- psubusw m5, m1
- psubusw m1, [srcq]
- por m1, m5
- mova m5, [srcq+16]
- psubusw m5, m2
- psubusw m2, [srcq+16]
- por m2, m5
- mova m5, [srcq+src_strideq*2]
- psubusw m5, m3
- psubusw m3, [srcq+src_strideq*2]
- por m3, m5
- mova m5, [srcq+src_strideq*2+16]
- psubusw m5, m4
- psubusw m4, [srcq+src_strideq*2+16]
- por m4, m5
- paddw m1, m2
- paddw m3, m4
- movhlps m2, m1
- movhlps m4, m3
- paddw m1, m2
- paddw m3, m4
- punpcklwd m1, m6
- punpcklwd m3, m6
- lea refq, [refq+ref_strideq*4]
- paddd m0, m1
- lea srcq, [srcq+src_strideq*4]
- paddd m0, m3
- dec n_rowsd
- jg .loop
-
- movhlps m1, m0
- paddd m0, m1
- punpckldq m0, m6
- movhlps m1, m0
- paddd m0, m1
- movd eax, m0
- RET
-%endmacro
-
-INIT_XMM sse2
-HIGH_SAD16XN 32 ; highbd_sad16x32_sse2
-HIGH_SAD16XN 16 ; highbd_sad16x16_sse2
-HIGH_SAD16XN 8 ; highbd_sad16x8_sse2
-HIGH_SAD16XN 32, 1 ; highbd_sad16x32_avg_sse2
-HIGH_SAD16XN 16, 1 ; highbd_sad16x16_avg_sse2
-HIGH_SAD16XN 8, 1 ; highbd_sad16x8_avg_sse2
-HIGH_SAD16XN 4 ; highbd_sad_16x4_sse2
-HIGH_SAD16XN 4, 1 ; highbd_sad_16x4_avg_sse2
-HIGH_SAD16XN 64 ; highbd_sad_16x64_sse2
-HIGH_SAD16XN 64, 1 ; highbd_sad_16x64_avg_sse2
-
-; unsigned int aom_highbd_sad8x{4,8,16}_sse2(uint8_t *src, int src_stride,
-; uint8_t *ref, int ref_stride);
-%macro HIGH_SAD8XN 1-2 0
- HIGH_SAD_FN 8, %1, 7, %2
- mov n_rowsd, %1/4
- pxor m0, m0
- pxor m6, m6
-
-.loop:
- movu m1, [refq]
- movu m2, [refq+ref_strideq*2]
- movu m3, [refq+ref_strideq*4]
- movu m4, [refq+ref_stride3q*2]
-%if %2 == 1
- pavgw m1, [second_predq+mmsize*0]
- pavgw m2, [second_predq+mmsize*1]
- pavgw m3, [second_predq+mmsize*2]
- pavgw m4, [second_predq+mmsize*3]
- lea second_predq, [second_predq+mmsize*4]
-%endif
- mova m5, [srcq]
- psubusw m5, m1
- psubusw m1, [srcq]
- por m1, m5
- mova m5, [srcq+src_strideq*2]
- psubusw m5, m2
- psubusw m2, [srcq+src_strideq*2]
- por m2, m5
- mova m5, [srcq+src_strideq*4]
- psubusw m5, m3
- psubusw m3, [srcq+src_strideq*4]
- por m3, m5
- mova m5, [srcq+src_stride3q*2]
- psubusw m5, m4
- psubusw m4, [srcq+src_stride3q*2]
- por m4, m5
- paddw m1, m2
- paddw m3, m4
- movhlps m2, m1
- movhlps m4, m3
- paddw m1, m2
- paddw m3, m4
- punpcklwd m1, m6
- punpcklwd m3, m6
- lea refq, [refq+ref_strideq*8]
- paddd m0, m1
- lea srcq, [srcq+src_strideq*8]
- paddd m0, m3
- dec n_rowsd
- jg .loop
-
- movhlps m1, m0
- paddd m0, m1
- punpckldq m0, m6
- movhlps m1, m0
- paddd m0, m1
- movd eax, m0
- RET
-%endmacro
-
-INIT_XMM sse2
-HIGH_SAD8XN 16 ; highbd_sad8x16_sse2
-HIGH_SAD8XN 8 ; highbd_sad8x8_sse2
-HIGH_SAD8XN 4 ; highbd_sad8x4_sse2
-HIGH_SAD8XN 16, 1 ; highbd_sad8x16_avg_sse2
-HIGH_SAD8XN 8, 1 ; highbd_sad8x8_avg_sse2
-HIGH_SAD8XN 4, 1 ; highbd_sad8x4_avg_sse2
-HIGH_SAD8XN 32 ; highbd_sad_8x32_sse2
-HIGH_SAD8XN 32, 1 ; highbd_sad_8x32_avg_sse2
diff --git a/third_party/aom/aom_dsp/x86/highbd_subpel_variance_impl_sse2.asm b/third_party/aom/aom_dsp/x86/highbd_subpel_variance_impl_sse2.asm
deleted file mode 100644
index 61f5b8e86..000000000
--- a/third_party/aom/aom_dsp/x86/highbd_subpel_variance_impl_sse2.asm
+++ /dev/null
@@ -1,1036 +0,0 @@
-;
-; Copyright (c) 2016, Alliance for Open Media. All rights reserved
-;
-; This source code is subject to the terms of the BSD 2 Clause License and
-; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-; was not distributed with this source code in the LICENSE file, you can
-; obtain it at www.aomedia.org/license/software. If the Alliance for Open
-; Media Patent License 1.0 was not distributed with this source code in the
-; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-;
-
-;
-
-%include "third_party/x86inc/x86inc.asm"
-
-SECTION_RODATA
-pw_8: times 8 dw 8
-bilin_filter_m_sse2: times 8 dw 16
- times 8 dw 0
- times 8 dw 14
- times 8 dw 2
- times 8 dw 12
- times 8 dw 4
- times 8 dw 10
- times 8 dw 6
- times 16 dw 8
- times 8 dw 6
- times 8 dw 10
- times 8 dw 4
- times 8 dw 12
- times 8 dw 2
- times 8 dw 14
-
-SECTION .text
-
-; int aom_sub_pixel_varianceNxh(const uint8_t *src, ptrdiff_t src_stride,
-; int x_offset, int y_offset,
-; const uint8_t *dst, ptrdiff_t dst_stride,
-; int height, unsigned int *sse);
-;
-; This function returns the SE and stores SSE in the given pointer.
-
-%macro SUM_SSE 6 ; src1, dst1, src2, dst2, sum, sse
- psubw %3, %4
- psubw %1, %2
- mova %4, %3 ; make copies to manipulate to calc sum
- mova %2, %1 ; use originals for calc sse
- pmaddwd %3, %3
- paddw %4, %2
- pmaddwd %1, %1
- movhlps %2, %4
- paddd %6, %3
- paddw %4, %2
- pxor %2, %2
- pcmpgtw %2, %4 ; mask for 0 > %4 (sum)
- punpcklwd %4, %2 ; sign-extend word to dword
- paddd %6, %1
- paddd %5, %4
-
-%endmacro
-
-%macro STORE_AND_RET 0
-%if mmsize == 16
- ; if H=64 and W=16, we have 8 words of each 2(1bit)x64(6bit)x9bit=16bit
- ; in m6, i.e. it _exactly_ fits in a signed word per word in the xmm reg.
- ; We have to sign-extend it before adding the words within the register
- ; and outputing to a dword.
- movhlps m3, m7
- movhlps m4, m6
- paddd m7, m3
- paddd m6, m4
- pshufd m3, m7, 0x1
- pshufd m4, m6, 0x1
- paddd m7, m3
- paddd m6, m4
- mov r1, ssem ; r1 = unsigned int *sse
- movd [r1], m7 ; store sse
- movd eax, m6 ; store sum as return value
-%endif
- RET
-%endmacro
-
-%macro INC_SRC_BY_SRC_STRIDE 0
-%if ARCH_X86=1 && CONFIG_PIC=1
- add srcq, src_stridemp
- add srcq, src_stridemp
-%else
- lea srcq, [srcq + src_strideq*2]
-%endif
-%endmacro
-
-%macro SUBPEL_VARIANCE 1-2 0 ; W
-%define bilin_filter_m bilin_filter_m_sse2
-%define filter_idx_shift 5
-
-
-%if ARCH_X86_64
- %if %2 == 1 ; avg
- cglobal highbd_sub_pixel_avg_variance%1xh, 9, 10, 13, src, src_stride, \
- x_offset, y_offset, \
- dst, dst_stride, \
- sec, sec_stride, height, sse
- %define sec_str sec_strideq
- %else
- cglobal highbd_sub_pixel_variance%1xh, 7, 8, 13, src, src_stride, \
- x_offset, y_offset, \
- dst, dst_stride, height, sse
- %endif
- %define block_height heightd
- %define bilin_filter sseq
-%else
- %if CONFIG_PIC=1
- %if %2 == 1 ; avg
- cglobal highbd_sub_pixel_avg_variance%1xh, 7, 7, 13, src, src_stride, \
- x_offset, y_offset, \
- dst, dst_stride, \
- sec, sec_stride, height, sse, \
- g_bilin_filter, g_pw_8
- %define block_height dword heightm
- %define sec_str sec_stridemp
-
- ; Store bilin_filter and pw_8 location in stack
- %if GET_GOT_DEFINED == 1
- GET_GOT eax
- add esp, 4 ; restore esp
- %endif
-
- lea ecx, [GLOBAL(bilin_filter_m)]
- mov g_bilin_filterm, ecx
-
- lea ecx, [GLOBAL(pw_8)]
- mov g_pw_8m, ecx
-
- LOAD_IF_USED 0, 1 ; load eax, ecx back
- %else
- cglobal highbd_sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, \
- x_offset, y_offset, \
- dst, dst_stride, height, sse, \
- g_bilin_filter, g_pw_8
- %define block_height heightd
-
- ; Store bilin_filter and pw_8 location in stack
- %if GET_GOT_DEFINED == 1
- GET_GOT eax
- add esp, 4 ; restore esp
- %endif
-
- lea ecx, [GLOBAL(bilin_filter_m)]
- mov g_bilin_filterm, ecx
-
- lea ecx, [GLOBAL(pw_8)]
- mov g_pw_8m, ecx
-
- LOAD_IF_USED 0, 1 ; load eax, ecx back
- %endif
- %else
- %if %2 == 1 ; avg
- cglobal highbd_sub_pixel_avg_variance%1xh, 7, 7, 13, src, src_stride, \
- x_offset, y_offset, \
- dst, dst_stride, \
- sec, sec_stride, height, sse
- %define block_height dword heightm
- %define sec_str sec_stridemp
- %else
- cglobal highbd_sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, \
- x_offset, y_offset, \
- dst, dst_stride, height, sse
- %define block_height heightd
- %endif
-
- %define bilin_filter bilin_filter_m
- %endif
-%endif
-
- ASSERT %1 <= 16 ; m6 overflows if w > 16
- pxor m6, m6 ; sum
- pxor m7, m7 ; sse
-
-%if %1 < 16
- sar block_height, 1
-%endif
-%if %2 == 1 ; avg
- shl sec_str, 1
-%endif
-
- ; FIXME(rbultje) replace by jumptable?
- test x_offsetd, x_offsetd
- jnz .x_nonzero
- ; x_offset == 0
- test y_offsetd, y_offsetd
- jnz .x_zero_y_nonzero
-
- ; x_offset == 0 && y_offset == 0
-.x_zero_y_zero_loop:
-%if %1 == 16
- movu m0, [srcq]
- movu m2, [srcq + 16]
- mova m1, [dstq]
- mova m3, [dstq + 16]
-%if %2 == 1 ; avg
- pavgw m0, [secq]
- pavgw m2, [secq+16]
-%endif
- SUM_SSE m0, m1, m2, m3, m6, m7
-
- lea srcq, [srcq + src_strideq*2]
- lea dstq, [dstq + dst_strideq*2]
-%if %2 == 1 ; avg
- add secq, sec_str
-%endif
-%else ; %1 < 16
- movu m0, [srcq]
- movu m2, [srcq + src_strideq*2]
- mova m1, [dstq]
- mova m3, [dstq + dst_strideq*2]
-%if %2 == 1 ; avg
- pavgw m0, [secq]
- add secq, sec_str
- pavgw m2, [secq]
-%endif
- SUM_SSE m0, m1, m2, m3, m6, m7
-
- lea srcq, [srcq + src_strideq*4]
- lea dstq, [dstq + dst_strideq*4]
-%if %2 == 1 ; avg
- add secq, sec_str
-%endif
-%endif
- dec block_height
- jg .x_zero_y_zero_loop
- STORE_AND_RET
-
-.x_zero_y_nonzero:
- cmp y_offsetd, 8
- jne .x_zero_y_nonhalf
-
- ; x_offset == 0 && y_offset == 0.5
-.x_zero_y_half_loop:
-%if %1 == 16
- movu m0, [srcq]
- movu m1, [srcq+16]
- movu m4, [srcq+src_strideq*2]
- movu m5, [srcq+src_strideq*2+16]
- mova m2, [dstq]
- mova m3, [dstq+16]
- pavgw m0, m4
- pavgw m1, m5
-%if %2 == 1 ; avg
- pavgw m0, [secq]
- pavgw m1, [secq+16]
-%endif
- SUM_SSE m0, m2, m1, m3, m6, m7
-
- lea srcq, [srcq + src_strideq*2]
- lea dstq, [dstq + dst_strideq*2]
-%if %2 == 1 ; avg
- add secq, sec_str
-%endif
-%else ; %1 < 16
- movu m0, [srcq]
- movu m1, [srcq+src_strideq*2]
- movu m5, [srcq+src_strideq*4]
- mova m2, [dstq]
- mova m3, [dstq+dst_strideq*2]
- pavgw m0, m1
- pavgw m1, m5
-%if %2 == 1 ; avg
- pavgw m0, [secq]
- add secq, sec_str
- pavgw m1, [secq]
-%endif
- SUM_SSE m0, m2, m1, m3, m6, m7
-
- lea srcq, [srcq + src_strideq*4]
- lea dstq, [dstq + dst_strideq*4]
-%if %2 == 1 ; avg
- add secq, sec_str
-%endif
-%endif
- dec block_height
- jg .x_zero_y_half_loop
- STORE_AND_RET
-
-.x_zero_y_nonhalf:
- ; x_offset == 0 && y_offset == bilin interpolation
-%if ARCH_X86_64
- lea bilin_filter, [GLOBAL(bilin_filter_m)]
-%endif
- shl y_offsetd, filter_idx_shift
-%if ARCH_X86_64 && mmsize == 16
- mova m8, [bilin_filter+y_offsetq]
- mova m9, [bilin_filter+y_offsetq+16]
- mova m10, [GLOBAL(pw_8)]
-%define filter_y_a m8
-%define filter_y_b m9
-%define filter_rnd m10
-%else ; x86-32 or mmx
-%if ARCH_X86=1 && CONFIG_PIC=1
-; x_offset == 0, reuse x_offset reg
-%define tempq x_offsetq
- add y_offsetq, g_bilin_filterm
-%define filter_y_a [y_offsetq]
-%define filter_y_b [y_offsetq+16]
- mov tempq, g_pw_8m
-%define filter_rnd [tempq]
-%else
- add y_offsetq, bilin_filter
-%define filter_y_a [y_offsetq]
-%define filter_y_b [y_offsetq+16]
-%define filter_rnd [GLOBAL(pw_8)]
-%endif
-%endif
-
-.x_zero_y_other_loop:
-%if %1 == 16
- movu m0, [srcq]
- movu m1, [srcq + 16]
- movu m4, [srcq+src_strideq*2]
- movu m5, [srcq+src_strideq*2+16]
- mova m2, [dstq]
- mova m3, [dstq+16]
- ; FIXME(rbultje) instead of out=((num-x)*in1+x*in2+rnd)>>log2(num), we can
- ; also do out=in1+(((num-x)*(in2-in1)+rnd)>>log2(num)). Total number of
- ; instructions is the same (5), but it is 1 mul instead of 2, so might be
- ; slightly faster because of pmullw latency. It would also cut our rodata
- ; tables in half for this function, and save 1-2 registers on x86-64.
- pmullw m1, filter_y_a
- pmullw m5, filter_y_b
- paddw m1, filter_rnd
- pmullw m0, filter_y_a
- pmullw m4, filter_y_b
- paddw m0, filter_rnd
- paddw m1, m5
- paddw m0, m4
- psrlw m1, 4
- psrlw m0, 4
-%if %2 == 1 ; avg
- pavgw m0, [secq]
- pavgw m1, [secq+16]
-%endif
- SUM_SSE m0, m2, m1, m3, m6, m7
-
- lea srcq, [srcq + src_strideq*2]
- lea dstq, [dstq + dst_strideq*2]
-%if %2 == 1 ; avg
- add secq, sec_str
-%endif
-%else ; %1 < 16
- movu m0, [srcq]
- movu m1, [srcq+src_strideq*2]
- movu m5, [srcq+src_strideq*4]
- mova m4, m1
- mova m2, [dstq]
- mova m3, [dstq+dst_strideq*2]
- pmullw m1, filter_y_a
- pmullw m5, filter_y_b
- paddw m1, filter_rnd
- pmullw m0, filter_y_a
- pmullw m4, filter_y_b
- paddw m0, filter_rnd
- paddw m1, m5
- paddw m0, m4
- psrlw m1, 4
- psrlw m0, 4
-%if %2 == 1 ; avg
- pavgw m0, [secq]
- add secq, sec_str
- pavgw m1, [secq]
-%endif
- SUM_SSE m0, m2, m1, m3, m6, m7
-
- lea srcq, [srcq + src_strideq*4]
- lea dstq, [dstq + dst_strideq*4]
-%if %2 == 1 ; avg
- add secq, sec_str
-%endif
-%endif
- dec block_height
- jg .x_zero_y_other_loop
-%undef filter_y_a
-%undef filter_y_b
-%undef filter_rnd
- STORE_AND_RET
-
-.x_nonzero:
- cmp x_offsetd, 8
- jne .x_nonhalf
- ; x_offset == 0.5
- test y_offsetd, y_offsetd
- jnz .x_half_y_nonzero
-
- ; x_offset == 0.5 && y_offset == 0
-.x_half_y_zero_loop:
-%if %1 == 16
- movu m0, [srcq]
- movu m1, [srcq + 16]
- movu m4, [srcq + 2]
- movu m5, [srcq + 18]
- mova m2, [dstq]
- mova m3, [dstq + 16]
- pavgw m0, m4
- pavgw m1, m5
-%if %2 == 1 ; avg
- pavgw m0, [secq]
- pavgw m1, [secq+16]
-%endif
- SUM_SSE m0, m2, m1, m3, m6, m7
-
- lea srcq, [srcq + src_strideq*2]
- lea dstq, [dstq + dst_strideq*2]
-%if %2 == 1 ; avg
- add secq, sec_str
-%endif
-%else ; %1 < 16
- movu m0, [srcq]
- movu m1, [srcq + src_strideq*2]
- movu m4, [srcq + 2]
- movu m5, [srcq + src_strideq*2 + 2]
- mova m2, [dstq]
- mova m3, [dstq + dst_strideq*2]
- pavgw m0, m4
- pavgw m1, m5
-%if %2 == 1 ; avg
- pavgw m0, [secq]
- add secq, sec_str
- pavgw m1, [secq]
-%endif
- SUM_SSE m0, m2, m1, m3, m6, m7
-
- lea srcq, [srcq + src_strideq*4]
- lea dstq, [dstq + dst_strideq*4]
-%if %2 == 1 ; avg
- add secq, sec_str
-%endif
-%endif
- dec block_height
- jg .x_half_y_zero_loop
- STORE_AND_RET
-
-.x_half_y_nonzero:
- cmp y_offsetd, 8
- jne .x_half_y_nonhalf
-
- ; x_offset == 0.5 && y_offset == 0.5
-%if %1 == 16
- movu m0, [srcq]
- movu m1, [srcq+16]
- movu m2, [srcq+2]
- movu m3, [srcq+18]
- lea srcq, [srcq + src_strideq*2]
- pavgw m0, m2
- pavgw m1, m3
-.x_half_y_half_loop:
- movu m2, [srcq]
- movu m3, [srcq + 16]
- movu m4, [srcq + 2]
- movu m5, [srcq + 18]
- pavgw m2, m4
- pavgw m3, m5
- pavgw m0, m2
- pavgw m1, m3
- mova m4, [dstq]
- mova m5, [dstq + 16]
-%if %2 == 1 ; avg
- pavgw m0, [secq]
- pavgw m1, [secq+16]
-%endif
- SUM_SSE m0, m4, m1, m5, m6, m7
- mova m0, m2
- mova m1, m3
-
- lea srcq, [srcq + src_strideq*2]
- lea dstq, [dstq + dst_strideq*2]
-%if %2 == 1 ; avg
- add secq, sec_str
-%endif
-%else ; %1 < 16
- movu m0, [srcq]
- movu m2, [srcq+2]
- lea srcq, [srcq + src_strideq*2]
- pavgw m0, m2
-.x_half_y_half_loop:
- movu m2, [srcq]
- movu m3, [srcq + src_strideq*2]
- movu m4, [srcq + 2]
- movu m5, [srcq + src_strideq*2 + 2]
- pavgw m2, m4
- pavgw m3, m5
- pavgw m0, m2
- pavgw m2, m3
- mova m4, [dstq]
- mova m5, [dstq + dst_strideq*2]
-%if %2 == 1 ; avg
- pavgw m0, [secq]
- add secq, sec_str
- pavgw m2, [secq]
-%endif
- SUM_SSE m0, m4, m2, m5, m6, m7
- mova m0, m3
-
- lea srcq, [srcq + src_strideq*4]
- lea dstq, [dstq + dst_strideq*4]
-%if %2 == 1 ; avg
- add secq, sec_str
-%endif
-%endif
- dec block_height
- jg .x_half_y_half_loop
- STORE_AND_RET
-
-.x_half_y_nonhalf:
- ; x_offset == 0.5 && y_offset == bilin interpolation
-%if ARCH_X86_64
- lea bilin_filter, [GLOBAL(bilin_filter_m)]
-%endif
- shl y_offsetd, filter_idx_shift
-%if ARCH_X86_64 && mmsize == 16
- mova m8, [bilin_filter+y_offsetq]
- mova m9, [bilin_filter+y_offsetq+16]
- mova m10, [GLOBAL(pw_8)]
-%define filter_y_a m8
-%define filter_y_b m9
-%define filter_rnd m10
-%else ; x86_32
-%if ARCH_X86=1 && CONFIG_PIC=1
-; x_offset == 0.5. We can reuse x_offset reg
-%define tempq x_offsetq
- add y_offsetq, g_bilin_filterm
-%define filter_y_a [y_offsetq]
-%define filter_y_b [y_offsetq+16]
- mov tempq, g_pw_8m
-%define filter_rnd [tempq]
-%else
- add y_offsetq, bilin_filter
-%define filter_y_a [y_offsetq]
-%define filter_y_b [y_offsetq+16]
-%define filter_rnd [GLOBAL(pw_8)]
-%endif
-%endif
-
-%if %1 == 16
- movu m0, [srcq]
- movu m1, [srcq+16]
- movu m2, [srcq+2]
- movu m3, [srcq+18]
- lea srcq, [srcq + src_strideq*2]
- pavgw m0, m2
- pavgw m1, m3
-.x_half_y_other_loop:
- movu m2, [srcq]
- movu m3, [srcq+16]
- movu m4, [srcq+2]
- movu m5, [srcq+18]
- pavgw m2, m4
- pavgw m3, m5
- mova m4, m2
- mova m5, m3
- pmullw m1, filter_y_a
- pmullw m3, filter_y_b
- paddw m1, filter_rnd
- paddw m1, m3
- pmullw m0, filter_y_a
- pmullw m2, filter_y_b
- paddw m0, filter_rnd
- psrlw m1, 4
- paddw m0, m2
- mova m2, [dstq]
- psrlw m0, 4
- mova m3, [dstq+16]
-%if %2 == 1 ; avg
- pavgw m0, [secq]
- pavgw m1, [secq+16]
-%endif
- SUM_SSE m0, m2, m1, m3, m6, m7
- mova m0, m4
- mova m1, m5
-
- lea srcq, [srcq + src_strideq*2]
- lea dstq, [dstq + dst_strideq*2]
-%if %2 == 1 ; avg
- add secq, sec_str
-%endif
-%else ; %1 < 16
- movu m0, [srcq]
- movu m2, [srcq+2]
- lea srcq, [srcq + src_strideq*2]
- pavgw m0, m2
-.x_half_y_other_loop:
- movu m2, [srcq]
- movu m3, [srcq+src_strideq*2]
- movu m4, [srcq+2]
- movu m5, [srcq+src_strideq*2+2]
- pavgw m2, m4
- pavgw m3, m5
- mova m4, m2
- mova m5, m3
- pmullw m4, filter_y_a
- pmullw m3, filter_y_b
- paddw m4, filter_rnd
- paddw m4, m3
- pmullw m0, filter_y_a
- pmullw m2, filter_y_b
- paddw m0, filter_rnd
- psrlw m4, 4
- paddw m0, m2
- mova m2, [dstq]
- psrlw m0, 4
- mova m3, [dstq+dst_strideq*2]
-%if %2 == 1 ; avg
- pavgw m0, [secq]
- add secq, sec_str
- pavgw m4, [secq]
-%endif
- SUM_SSE m0, m2, m4, m3, m6, m7
- mova m0, m5
-
- lea srcq, [srcq + src_strideq*4]
- lea dstq, [dstq + dst_strideq*4]
-%if %2 == 1 ; avg
- add secq, sec_str
-%endif
-%endif
- dec block_height
- jg .x_half_y_other_loop
-%undef filter_y_a
-%undef filter_y_b
-%undef filter_rnd
- STORE_AND_RET
-
-.x_nonhalf:
- test y_offsetd, y_offsetd
- jnz .x_nonhalf_y_nonzero
-
- ; x_offset == bilin interpolation && y_offset == 0
-%if ARCH_X86_64
- lea bilin_filter, [GLOBAL(bilin_filter_m)]
-%endif
- shl x_offsetd, filter_idx_shift
-%if ARCH_X86_64 && mmsize == 16
- mova m8, [bilin_filter+x_offsetq]
- mova m9, [bilin_filter+x_offsetq+16]
- mova m10, [GLOBAL(pw_8)]
-%define filter_x_a m8
-%define filter_x_b m9
-%define filter_rnd m10
-%else ; x86-32
-%if ARCH_X86=1 && CONFIG_PIC=1
-; y_offset == 0. We can reuse y_offset reg.
-%define tempq y_offsetq
- add x_offsetq, g_bilin_filterm
-%define filter_x_a [x_offsetq]
-%define filter_x_b [x_offsetq+16]
- mov tempq, g_pw_8m
-%define filter_rnd [tempq]
-%else
- add x_offsetq, bilin_filter
-%define filter_x_a [x_offsetq]
-%define filter_x_b [x_offsetq+16]
-%define filter_rnd [GLOBAL(pw_8)]
-%endif
-%endif
-
-.x_other_y_zero_loop:
-%if %1 == 16
- movu m0, [srcq]
- movu m1, [srcq+16]
- movu m2, [srcq+2]
- movu m3, [srcq+18]
- mova m4, [dstq]
- mova m5, [dstq+16]
- pmullw m1, filter_x_a
- pmullw m3, filter_x_b
- paddw m1, filter_rnd
- pmullw m0, filter_x_a
- pmullw m2, filter_x_b
- paddw m0, filter_rnd
- paddw m1, m3
- paddw m0, m2
- psrlw m1, 4
- psrlw m0, 4
-%if %2 == 1 ; avg
- pavgw m0, [secq]
- pavgw m1, [secq+16]
-%endif
- SUM_SSE m0, m4, m1, m5, m6, m7
-
- lea srcq, [srcq+src_strideq*2]
- lea dstq, [dstq+dst_strideq*2]
-%if %2 == 1 ; avg
- add secq, sec_str
-%endif
-%else ; %1 < 16
- movu m0, [srcq]
- movu m1, [srcq+src_strideq*2]
- movu m2, [srcq+2]
- movu m3, [srcq+src_strideq*2+2]
- mova m4, [dstq]
- mova m5, [dstq+dst_strideq*2]
- pmullw m1, filter_x_a
- pmullw m3, filter_x_b
- paddw m1, filter_rnd
- pmullw m0, filter_x_a
- pmullw m2, filter_x_b
- paddw m0, filter_rnd
- paddw m1, m3
- paddw m0, m2
- psrlw m1, 4
- psrlw m0, 4
-%if %2 == 1 ; avg
- pavgw m0, [secq]
- add secq, sec_str
- pavgw m1, [secq]
-%endif
- SUM_SSE m0, m4, m1, m5, m6, m7
-
- lea srcq, [srcq+src_strideq*4]
- lea dstq, [dstq+dst_strideq*4]
-%if %2 == 1 ; avg
- add secq, sec_str
-%endif
-%endif
- dec block_height
- jg .x_other_y_zero_loop
-%undef filter_x_a
-%undef filter_x_b
-%undef filter_rnd
- STORE_AND_RET
-
-.x_nonhalf_y_nonzero:
- cmp y_offsetd, 8
- jne .x_nonhalf_y_nonhalf
-
- ; x_offset == bilin interpolation && y_offset == 0.5
-%if ARCH_X86_64
- lea bilin_filter, [GLOBAL(bilin_filter_m)]
-%endif
- shl x_offsetd, filter_idx_shift
-%if ARCH_X86_64 && mmsize == 16
- mova m8, [bilin_filter+x_offsetq]
- mova m9, [bilin_filter+x_offsetq+16]
- mova m10, [GLOBAL(pw_8)]
-%define filter_x_a m8
-%define filter_x_b m9
-%define filter_rnd m10
-%else ; x86-32
-%if ARCH_X86=1 && CONFIG_PIC=1
-; y_offset == 0.5. We can reuse y_offset reg.
-%define tempq y_offsetq
- add x_offsetq, g_bilin_filterm
-%define filter_x_a [x_offsetq]
-%define filter_x_b [x_offsetq+16]
- mov tempq, g_pw_8m
-%define filter_rnd [tempq]
-%else
- add x_offsetq, bilin_filter
-%define filter_x_a [x_offsetq]
-%define filter_x_b [x_offsetq+16]
-%define filter_rnd [GLOBAL(pw_8)]
-%endif
-%endif
-
-%if %1 == 16
- movu m0, [srcq]
- movu m1, [srcq+16]
- movu m2, [srcq+2]
- movu m3, [srcq+18]
- pmullw m0, filter_x_a
- pmullw m2, filter_x_b
- paddw m0, filter_rnd
- pmullw m1, filter_x_a
- pmullw m3, filter_x_b
- paddw m1, filter_rnd
- paddw m0, m2
- paddw m1, m3
- psrlw m0, 4
- psrlw m1, 4
- lea srcq, [srcq+src_strideq*2]
-.x_other_y_half_loop:
- movu m2, [srcq]
- movu m3, [srcq+16]
- movu m4, [srcq+2]
- movu m5, [srcq+18]
- pmullw m2, filter_x_a
- pmullw m4, filter_x_b
- paddw m2, filter_rnd
- pmullw m3, filter_x_a
- pmullw m5, filter_x_b
- paddw m3, filter_rnd
- paddw m2, m4
- paddw m3, m5
- mova m4, [dstq]
- mova m5, [dstq+16]
- psrlw m2, 4
- psrlw m3, 4
- pavgw m0, m2
- pavgw m1, m3
-%if %2 == 1 ; avg
- pavgw m0, [secq]
- pavgw m1, [secq+16]
-%endif
- SUM_SSE m0, m4, m1, m5, m6, m7
- mova m0, m2
- mova m1, m3
-
- lea srcq, [srcq+src_strideq*2]
- lea dstq, [dstq+dst_strideq*2]
-%if %2 == 1 ; avg
- add secq, sec_str
-%endif
-%else ; %1 < 16
- movu m0, [srcq]
- movu m2, [srcq+2]
- pmullw m0, filter_x_a
- pmullw m2, filter_x_b
- paddw m0, filter_rnd
- paddw m0, m2
- psrlw m0, 4
- lea srcq, [srcq+src_strideq*2]
-.x_other_y_half_loop:
- movu m2, [srcq]
- movu m3, [srcq+src_strideq*2]
- movu m4, [srcq+2]
- movu m5, [srcq+src_strideq*2+2]
- pmullw m2, filter_x_a
- pmullw m4, filter_x_b
- paddw m2, filter_rnd
- pmullw m3, filter_x_a
- pmullw m5, filter_x_b
- paddw m3, filter_rnd
- paddw m2, m4
- paddw m3, m5
- mova m4, [dstq]
- mova m5, [dstq+dst_strideq*2]
- psrlw m2, 4
- psrlw m3, 4
- pavgw m0, m2
- pavgw m2, m3
-%if %2 == 1 ; avg
- pavgw m0, [secq]
- add secq, sec_str
- pavgw m2, [secq]
-%endif
- SUM_SSE m0, m4, m2, m5, m6, m7
- mova m0, m3
-
- lea srcq, [srcq+src_strideq*4]
- lea dstq, [dstq+dst_strideq*4]
-%if %2 == 1 ; avg
- add secq, sec_str
-%endif
-%endif
- dec block_height
- jg .x_other_y_half_loop
-%undef filter_x_a
-%undef filter_x_b
-%undef filter_rnd
- STORE_AND_RET
-
-.x_nonhalf_y_nonhalf:
-; loading filter - this is same as in 8-bit depth
-%if ARCH_X86_64
- lea bilin_filter, [GLOBAL(bilin_filter_m)]
-%endif
- shl x_offsetd, filter_idx_shift ; filter_idx_shift = 5
- shl y_offsetd, filter_idx_shift
-%if ARCH_X86_64 && mmsize == 16
- mova m8, [bilin_filter+x_offsetq]
- mova m9, [bilin_filter+x_offsetq+16]
- mova m10, [bilin_filter+y_offsetq]
- mova m11, [bilin_filter+y_offsetq+16]
- mova m12, [GLOBAL(pw_8)]
-%define filter_x_a m8
-%define filter_x_b m9
-%define filter_y_a m10
-%define filter_y_b m11
-%define filter_rnd m12
-%else ; x86-32
-%if ARCH_X86=1 && CONFIG_PIC=1
-; In this case, there is NO unused register. Used src_stride register. Later,
-; src_stride has to be loaded from stack when it is needed.
-%define tempq src_strideq
- mov tempq, g_bilin_filterm
- add x_offsetq, tempq
- add y_offsetq, tempq
-%define filter_x_a [x_offsetq]
-%define filter_x_b [x_offsetq+16]
-%define filter_y_a [y_offsetq]
-%define filter_y_b [y_offsetq+16]
-
- mov tempq, g_pw_8m
-%define filter_rnd [tempq]
-%else
- add x_offsetq, bilin_filter
- add y_offsetq, bilin_filter
-%define filter_x_a [x_offsetq]
-%define filter_x_b [x_offsetq+16]
-%define filter_y_a [y_offsetq]
-%define filter_y_b [y_offsetq+16]
-%define filter_rnd [GLOBAL(pw_8)]
-%endif
-%endif
-; end of load filter
-
- ; x_offset == bilin interpolation && y_offset == bilin interpolation
-%if %1 == 16
- movu m0, [srcq]
- movu m2, [srcq+2]
- movu m1, [srcq+16]
- movu m3, [srcq+18]
- pmullw m0, filter_x_a
- pmullw m2, filter_x_b
- paddw m0, filter_rnd
- pmullw m1, filter_x_a
- pmullw m3, filter_x_b
- paddw m1, filter_rnd
- paddw m0, m2
- paddw m1, m3
- psrlw m0, 4
- psrlw m1, 4
-
- INC_SRC_BY_SRC_STRIDE
-
-.x_other_y_other_loop:
- movu m2, [srcq]
- movu m4, [srcq+2]
- movu m3, [srcq+16]
- movu m5, [srcq+18]
- pmullw m2, filter_x_a
- pmullw m4, filter_x_b
- paddw m2, filter_rnd
- pmullw m3, filter_x_a
- pmullw m5, filter_x_b
- paddw m3, filter_rnd
- paddw m2, m4
- paddw m3, m5
- psrlw m2, 4
- psrlw m3, 4
- mova m4, m2
- mova m5, m3
- pmullw m0, filter_y_a
- pmullw m2, filter_y_b
- paddw m0, filter_rnd
- pmullw m1, filter_y_a
- pmullw m3, filter_y_b
- paddw m0, m2
- paddw m1, filter_rnd
- mova m2, [dstq]
- paddw m1, m3
- psrlw m0, 4
- psrlw m1, 4
- mova m3, [dstq+16]
-%if %2 == 1 ; avg
- pavgw m0, [secq]
- pavgw m1, [secq+16]
-%endif
- SUM_SSE m0, m2, m1, m3, m6, m7
- mova m0, m4
- mova m1, m5
-
- INC_SRC_BY_SRC_STRIDE
- lea dstq, [dstq + dst_strideq * 2]
-%if %2 == 1 ; avg
- add secq, sec_str
-%endif
-%else ; %1 < 16
- movu m0, [srcq]
- movu m2, [srcq+2]
- pmullw m0, filter_x_a
- pmullw m2, filter_x_b
- paddw m0, filter_rnd
- paddw m0, m2
- psrlw m0, 4
-
- INC_SRC_BY_SRC_STRIDE
-
-.x_other_y_other_loop:
- movu m2, [srcq]
- movu m4, [srcq+2]
- INC_SRC_BY_SRC_STRIDE
- movu m3, [srcq]
- movu m5, [srcq+2]
- pmullw m2, filter_x_a
- pmullw m4, filter_x_b
- paddw m2, filter_rnd
- pmullw m3, filter_x_a
- pmullw m5, filter_x_b
- paddw m3, filter_rnd
- paddw m2, m4
- paddw m3, m5
- psrlw m2, 4
- psrlw m3, 4
- mova m4, m2
- mova m5, m3
- pmullw m0, filter_y_a
- pmullw m2, filter_y_b
- paddw m0, filter_rnd
- pmullw m4, filter_y_a
- pmullw m3, filter_y_b
- paddw m0, m2
- paddw m4, filter_rnd
- mova m2, [dstq]
- paddw m4, m3
- psrlw m0, 4
- psrlw m4, 4
- mova m3, [dstq+dst_strideq*2]
-%if %2 == 1 ; avg
- pavgw m0, [secq]
- add secq, sec_str
- pavgw m4, [secq]
-%endif
- SUM_SSE m0, m2, m4, m3, m6, m7
- mova m0, m5
-
- INC_SRC_BY_SRC_STRIDE
- lea dstq, [dstq + dst_strideq * 4]
-%if %2 == 1 ; avg
- add secq, sec_str
-%endif
-%endif
- dec block_height
- jg .x_other_y_other_loop
-%undef filter_x_a
-%undef filter_x_b
-%undef filter_y_a
-%undef filter_y_b
-%undef filter_rnd
- STORE_AND_RET
-%endmacro
-
-INIT_XMM sse2
-SUBPEL_VARIANCE 8
-SUBPEL_VARIANCE 16
-
-INIT_XMM sse2
-SUBPEL_VARIANCE 8, 1
-SUBPEL_VARIANCE 16, 1
diff --git a/third_party/aom/aom_dsp/x86/highbd_subtract_sse2.c b/third_party/aom/aom_dsp/x86/highbd_subtract_sse2.c
deleted file mode 100644
index 18eb03d12..000000000
--- a/third_party/aom/aom_dsp/x86/highbd_subtract_sse2.c
+++ /dev/null
@@ -1,267 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <emmintrin.h>
-#include <stddef.h>
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-
-typedef void (*SubtractWxHFuncType)(int16_t *diff, ptrdiff_t diff_stride,
- const uint16_t *src, ptrdiff_t src_stride,
- const uint16_t *pred,
- ptrdiff_t pred_stride);
-
-static void subtract_4x4(int16_t *diff, ptrdiff_t diff_stride,
- const uint16_t *src, ptrdiff_t src_stride,
- const uint16_t *pred, ptrdiff_t pred_stride) {
- __m128i u0, u1, u2, u3;
- __m128i v0, v1, v2, v3;
- __m128i x0, x1, x2, x3;
- int64_t *store_diff = (int64_t *)(diff + 0 * diff_stride);
-
- u0 = _mm_loadu_si128((__m128i const *)(src + 0 * src_stride));
- u1 = _mm_loadu_si128((__m128i const *)(src + 1 * src_stride));
- u2 = _mm_loadu_si128((__m128i const *)(src + 2 * src_stride));
- u3 = _mm_loadu_si128((__m128i const *)(src + 3 * src_stride));
-
- v0 = _mm_loadu_si128((__m128i const *)(pred + 0 * pred_stride));
- v1 = _mm_loadu_si128((__m128i const *)(pred + 1 * pred_stride));
- v2 = _mm_loadu_si128((__m128i const *)(pred + 2 * pred_stride));
- v3 = _mm_loadu_si128((__m128i const *)(pred + 3 * pred_stride));
-
- x0 = _mm_sub_epi16(u0, v0);
- x1 = _mm_sub_epi16(u1, v1);
- x2 = _mm_sub_epi16(u2, v2);
- x3 = _mm_sub_epi16(u3, v3);
-
- _mm_storel_epi64((__m128i *)store_diff, x0);
- store_diff = (int64_t *)(diff + 1 * diff_stride);
- _mm_storel_epi64((__m128i *)store_diff, x1);
- store_diff = (int64_t *)(diff + 2 * diff_stride);
- _mm_storel_epi64((__m128i *)store_diff, x2);
- store_diff = (int64_t *)(diff + 3 * diff_stride);
- _mm_storel_epi64((__m128i *)store_diff, x3);
-}
-
-static void subtract_4x8(int16_t *diff, ptrdiff_t diff_stride,
- const uint16_t *src, ptrdiff_t src_stride,
- const uint16_t *pred, ptrdiff_t pred_stride) {
- __m128i u0, u1, u2, u3, u4, u5, u6, u7;
- __m128i v0, v1, v2, v3, v4, v5, v6, v7;
- __m128i x0, x1, x2, x3, x4, x5, x6, x7;
- int64_t *store_diff = (int64_t *)(diff + 0 * diff_stride);
-
- u0 = _mm_loadu_si128((__m128i const *)(src + 0 * src_stride));
- u1 = _mm_loadu_si128((__m128i const *)(src + 1 * src_stride));
- u2 = _mm_loadu_si128((__m128i const *)(src + 2 * src_stride));
- u3 = _mm_loadu_si128((__m128i const *)(src + 3 * src_stride));
- u4 = _mm_loadu_si128((__m128i const *)(src + 4 * src_stride));
- u5 = _mm_loadu_si128((__m128i const *)(src + 5 * src_stride));
- u6 = _mm_loadu_si128((__m128i const *)(src + 6 * src_stride));
- u7 = _mm_loadu_si128((__m128i const *)(src + 7 * src_stride));
-
- v0 = _mm_loadu_si128((__m128i const *)(pred + 0 * pred_stride));
- v1 = _mm_loadu_si128((__m128i const *)(pred + 1 * pred_stride));
- v2 = _mm_loadu_si128((__m128i const *)(pred + 2 * pred_stride));
- v3 = _mm_loadu_si128((__m128i const *)(pred + 3 * pred_stride));
- v4 = _mm_loadu_si128((__m128i const *)(pred + 4 * pred_stride));
- v5 = _mm_loadu_si128((__m128i const *)(pred + 5 * pred_stride));
- v6 = _mm_loadu_si128((__m128i const *)(pred + 6 * pred_stride));
- v7 = _mm_loadu_si128((__m128i const *)(pred + 7 * pred_stride));
-
- x0 = _mm_sub_epi16(u0, v0);
- x1 = _mm_sub_epi16(u1, v1);
- x2 = _mm_sub_epi16(u2, v2);
- x3 = _mm_sub_epi16(u3, v3);
- x4 = _mm_sub_epi16(u4, v4);
- x5 = _mm_sub_epi16(u5, v5);
- x6 = _mm_sub_epi16(u6, v6);
- x7 = _mm_sub_epi16(u7, v7);
-
- _mm_storel_epi64((__m128i *)store_diff, x0);
- store_diff = (int64_t *)(diff + 1 * diff_stride);
- _mm_storel_epi64((__m128i *)store_diff, x1);
- store_diff = (int64_t *)(diff + 2 * diff_stride);
- _mm_storel_epi64((__m128i *)store_diff, x2);
- store_diff = (int64_t *)(diff + 3 * diff_stride);
- _mm_storel_epi64((__m128i *)store_diff, x3);
- store_diff = (int64_t *)(diff + 4 * diff_stride);
- _mm_storel_epi64((__m128i *)store_diff, x4);
- store_diff = (int64_t *)(diff + 5 * diff_stride);
- _mm_storel_epi64((__m128i *)store_diff, x5);
- store_diff = (int64_t *)(diff + 6 * diff_stride);
- _mm_storel_epi64((__m128i *)store_diff, x6);
- store_diff = (int64_t *)(diff + 7 * diff_stride);
- _mm_storel_epi64((__m128i *)store_diff, x7);
-}
-
-static void subtract_8x4(int16_t *diff, ptrdiff_t diff_stride,
- const uint16_t *src, ptrdiff_t src_stride,
- const uint16_t *pred, ptrdiff_t pred_stride) {
- __m128i u0, u1, u2, u3;
- __m128i v0, v1, v2, v3;
- __m128i x0, x1, x2, x3;
-
- u0 = _mm_loadu_si128((__m128i const *)(src + 0 * src_stride));
- u1 = _mm_loadu_si128((__m128i const *)(src + 1 * src_stride));
- u2 = _mm_loadu_si128((__m128i const *)(src + 2 * src_stride));
- u3 = _mm_loadu_si128((__m128i const *)(src + 3 * src_stride));
-
- v0 = _mm_loadu_si128((__m128i const *)(pred + 0 * pred_stride));
- v1 = _mm_loadu_si128((__m128i const *)(pred + 1 * pred_stride));
- v2 = _mm_loadu_si128((__m128i const *)(pred + 2 * pred_stride));
- v3 = _mm_loadu_si128((__m128i const *)(pred + 3 * pred_stride));
-
- x0 = _mm_sub_epi16(u0, v0);
- x1 = _mm_sub_epi16(u1, v1);
- x2 = _mm_sub_epi16(u2, v2);
- x3 = _mm_sub_epi16(u3, v3);
-
- _mm_storeu_si128((__m128i *)(diff + 0 * diff_stride), x0);
- _mm_storeu_si128((__m128i *)(diff + 1 * diff_stride), x1);
- _mm_storeu_si128((__m128i *)(diff + 2 * diff_stride), x2);
- _mm_storeu_si128((__m128i *)(diff + 3 * diff_stride), x3);
-}
-
-static void subtract_8x8(int16_t *diff, ptrdiff_t diff_stride,
- const uint16_t *src, ptrdiff_t src_stride,
- const uint16_t *pred, ptrdiff_t pred_stride) {
- __m128i u0, u1, u2, u3, u4, u5, u6, u7;
- __m128i v0, v1, v2, v3, v4, v5, v6, v7;
- __m128i x0, x1, x2, x3, x4, x5, x6, x7;
-
- u0 = _mm_loadu_si128((__m128i const *)(src + 0 * src_stride));
- u1 = _mm_loadu_si128((__m128i const *)(src + 1 * src_stride));
- u2 = _mm_loadu_si128((__m128i const *)(src + 2 * src_stride));
- u3 = _mm_loadu_si128((__m128i const *)(src + 3 * src_stride));
- u4 = _mm_loadu_si128((__m128i const *)(src + 4 * src_stride));
- u5 = _mm_loadu_si128((__m128i const *)(src + 5 * src_stride));
- u6 = _mm_loadu_si128((__m128i const *)(src + 6 * src_stride));
- u7 = _mm_loadu_si128((__m128i const *)(src + 7 * src_stride));
-
- v0 = _mm_loadu_si128((__m128i const *)(pred + 0 * pred_stride));
- v1 = _mm_loadu_si128((__m128i const *)(pred + 1 * pred_stride));
- v2 = _mm_loadu_si128((__m128i const *)(pred + 2 * pred_stride));
- v3 = _mm_loadu_si128((__m128i const *)(pred + 3 * pred_stride));
- v4 = _mm_loadu_si128((__m128i const *)(pred + 4 * pred_stride));
- v5 = _mm_loadu_si128((__m128i const *)(pred + 5 * pred_stride));
- v6 = _mm_loadu_si128((__m128i const *)(pred + 6 * pred_stride));
- v7 = _mm_loadu_si128((__m128i const *)(pred + 7 * pred_stride));
-
- x0 = _mm_sub_epi16(u0, v0);
- x1 = _mm_sub_epi16(u1, v1);
- x2 = _mm_sub_epi16(u2, v2);
- x3 = _mm_sub_epi16(u3, v3);
- x4 = _mm_sub_epi16(u4, v4);
- x5 = _mm_sub_epi16(u5, v5);
- x6 = _mm_sub_epi16(u6, v6);
- x7 = _mm_sub_epi16(u7, v7);
-
- _mm_storeu_si128((__m128i *)(diff + 0 * diff_stride), x0);
- _mm_storeu_si128((__m128i *)(diff + 1 * diff_stride), x1);
- _mm_storeu_si128((__m128i *)(diff + 2 * diff_stride), x2);
- _mm_storeu_si128((__m128i *)(diff + 3 * diff_stride), x3);
- _mm_storeu_si128((__m128i *)(diff + 4 * diff_stride), x4);
- _mm_storeu_si128((__m128i *)(diff + 5 * diff_stride), x5);
- _mm_storeu_si128((__m128i *)(diff + 6 * diff_stride), x6);
- _mm_storeu_si128((__m128i *)(diff + 7 * diff_stride), x7);
-}
-
-#define STACK_V(h, fun) \
- do { \
- fun(diff, diff_stride, src, src_stride, pred, pred_stride); \
- fun(diff + diff_stride * h, diff_stride, src + src_stride * h, src_stride, \
- pred + pred_stride * h, pred_stride); \
- } while (0)
-
-#define STACK_H(w, fun) \
- do { \
- fun(diff, diff_stride, src, src_stride, pred, pred_stride); \
- fun(diff + w, diff_stride, src + w, src_stride, pred + w, pred_stride); \
- } while (0)
-
-#define SUBTRACT_FUN(size) \
- static void subtract_##size(int16_t *diff, ptrdiff_t diff_stride, \
- const uint16_t *src, ptrdiff_t src_stride, \
- const uint16_t *pred, ptrdiff_t pred_stride)
-
-SUBTRACT_FUN(8x16) { STACK_V(8, subtract_8x8); }
-SUBTRACT_FUN(16x8) { STACK_H(8, subtract_8x8); }
-SUBTRACT_FUN(16x16) { STACK_V(8, subtract_16x8); }
-SUBTRACT_FUN(16x32) { STACK_V(16, subtract_16x16); }
-SUBTRACT_FUN(32x16) { STACK_H(16, subtract_16x16); }
-SUBTRACT_FUN(32x32) { STACK_V(16, subtract_32x16); }
-SUBTRACT_FUN(32x64) { STACK_V(32, subtract_32x32); }
-SUBTRACT_FUN(64x32) { STACK_H(32, subtract_32x32); }
-SUBTRACT_FUN(64x64) { STACK_V(32, subtract_64x32); }
-SUBTRACT_FUN(64x128) { STACK_V(64, subtract_64x64); }
-SUBTRACT_FUN(128x64) { STACK_H(64, subtract_64x64); }
-SUBTRACT_FUN(128x128) { STACK_V(64, subtract_128x64); }
-SUBTRACT_FUN(4x16) { STACK_V(8, subtract_4x8); }
-SUBTRACT_FUN(16x4) { STACK_H(8, subtract_8x4); }
-SUBTRACT_FUN(8x32) { STACK_V(16, subtract_8x16); }
-SUBTRACT_FUN(32x8) { STACK_H(16, subtract_16x8); }
-SUBTRACT_FUN(16x64) { STACK_V(32, subtract_16x32); }
-SUBTRACT_FUN(64x16) { STACK_H(32, subtract_32x16); }
-
-static SubtractWxHFuncType getSubtractFunc(int rows, int cols) {
- if (rows == 4) {
- if (cols == 4) return subtract_4x4;
- if (cols == 8) return subtract_8x4;
- if (cols == 16) return subtract_16x4;
- }
- if (rows == 8) {
- if (cols == 4) return subtract_4x8;
- if (cols == 8) return subtract_8x8;
- if (cols == 16) return subtract_16x8;
- if (cols == 32) return subtract_32x8;
- }
- if (rows == 16) {
- if (cols == 4) return subtract_4x16;
- if (cols == 8) return subtract_8x16;
- if (cols == 16) return subtract_16x16;
- if (cols == 32) return subtract_32x16;
- if (cols == 64) return subtract_64x16;
- }
- if (rows == 32) {
- if (cols == 8) return subtract_8x32;
- if (cols == 16) return subtract_16x32;
- if (cols == 32) return subtract_32x32;
- if (cols == 64) return subtract_64x32;
- }
- if (rows == 64) {
- if (cols == 16) return subtract_16x64;
- if (cols == 32) return subtract_32x64;
- if (cols == 64) return subtract_64x64;
- if (cols == 128) return subtract_128x64;
- }
- if (rows == 128) {
- if (cols == 64) return subtract_64x128;
- if (cols == 128) return subtract_128x128;
- }
- assert(0);
- return NULL;
-}
-
-void aom_highbd_subtract_block_sse2(int rows, int cols, int16_t *diff,
- ptrdiff_t diff_stride, const uint8_t *src8,
- ptrdiff_t src_stride, const uint8_t *pred8,
- ptrdiff_t pred_stride, int bd) {
- uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
- SubtractWxHFuncType func;
- (void)bd;
-
- func = getSubtractFunc(rows, cols);
- func(diff, diff_stride, src, src_stride, pred, pred_stride);
-}
diff --git a/third_party/aom/aom_dsp/x86/highbd_variance_avx2.c b/third_party/aom/aom_dsp/x86/highbd_variance_avx2.c
deleted file mode 100644
index 9b1b4c9de..000000000
--- a/third_party/aom/aom_dsp/x86/highbd_variance_avx2.c
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <immintrin.h> // AVX2
-
-#include "config/aom_dsp_rtcd.h"
-
-typedef void (*high_variance_fn_t)(const uint16_t *src, int src_stride,
- const uint16_t *ref, int ref_stride,
- uint32_t *sse, int *sum);
-
-void aom_highbd_calc8x8var_avx2(const uint16_t *src, int src_stride,
- const uint16_t *ref, int ref_stride,
- uint32_t *sse, int *sum) {
- __m256i v_sum_d = _mm256_setzero_si256();
- __m256i v_sse_d = _mm256_setzero_si256();
- for (int i = 0; i < 8; i += 2) {
- const __m128i v_p_a0 = _mm_loadu_si128((const __m128i *)src);
- const __m128i v_p_a1 = _mm_loadu_si128((const __m128i *)(src + src_stride));
- const __m128i v_p_b0 = _mm_loadu_si128((const __m128i *)ref);
- const __m128i v_p_b1 = _mm_loadu_si128((const __m128i *)(ref + ref_stride));
- __m256i v_p_a = _mm256_castsi128_si256(v_p_a0);
- __m256i v_p_b = _mm256_castsi128_si256(v_p_b0);
- v_p_a = _mm256_inserti128_si256(v_p_a, v_p_a1, 1);
- v_p_b = _mm256_inserti128_si256(v_p_b, v_p_b1, 1);
- const __m256i v_diff = _mm256_sub_epi16(v_p_a, v_p_b);
- const __m256i v_sqrdiff = _mm256_madd_epi16(v_diff, v_diff);
- v_sum_d = _mm256_add_epi16(v_sum_d, v_diff);
- v_sse_d = _mm256_add_epi32(v_sse_d, v_sqrdiff);
- src += src_stride * 2;
- ref += ref_stride * 2;
- }
- __m256i v_sum00 = _mm256_cvtepi16_epi32(_mm256_castsi256_si128(v_sum_d));
- __m256i v_sum01 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(v_sum_d, 1));
- __m256i v_sum0 = _mm256_add_epi32(v_sum00, v_sum01);
- __m256i v_d_l = _mm256_unpacklo_epi32(v_sum0, v_sse_d);
- __m256i v_d_h = _mm256_unpackhi_epi32(v_sum0, v_sse_d);
- __m256i v_d_lh = _mm256_add_epi32(v_d_l, v_d_h);
- const __m128i v_d0_d = _mm256_castsi256_si128(v_d_lh);
- const __m128i v_d1_d = _mm256_extracti128_si256(v_d_lh, 1);
- __m128i v_d = _mm_add_epi32(v_d0_d, v_d1_d);
- v_d = _mm_add_epi32(v_d, _mm_srli_si128(v_d, 8));
- *sum = _mm_extract_epi32(v_d, 0);
- *sse = _mm_extract_epi32(v_d, 1);
-}
-
-void aom_highbd_calc16x16var_avx2(const uint16_t *src, int src_stride,
- const uint16_t *ref, int ref_stride,
- uint32_t *sse, int *sum) {
- __m256i v_sum_d = _mm256_setzero_si256();
- __m256i v_sse_d = _mm256_setzero_si256();
- const __m256i one = _mm256_set1_epi16(1);
- for (int i = 0; i < 16; ++i) {
- const __m256i v_p_a = _mm256_loadu_si256((const __m256i *)src);
- const __m256i v_p_b = _mm256_loadu_si256((const __m256i *)ref);
- const __m256i v_diff = _mm256_sub_epi16(v_p_a, v_p_b);
- const __m256i v_sqrdiff = _mm256_madd_epi16(v_diff, v_diff);
- v_sum_d = _mm256_add_epi16(v_sum_d, v_diff);
- v_sse_d = _mm256_add_epi32(v_sse_d, v_sqrdiff);
- src += src_stride;
- ref += ref_stride;
- }
- __m256i v_sum0 = _mm256_madd_epi16(v_sum_d, one);
- __m256i v_d_l = _mm256_unpacklo_epi32(v_sum0, v_sse_d);
- __m256i v_d_h = _mm256_unpackhi_epi32(v_sum0, v_sse_d);
- __m256i v_d_lh = _mm256_add_epi32(v_d_l, v_d_h);
- const __m128i v_d0_d = _mm256_castsi256_si128(v_d_lh);
- const __m128i v_d1_d = _mm256_extracti128_si256(v_d_lh, 1);
- __m128i v_d = _mm_add_epi32(v_d0_d, v_d1_d);
- v_d = _mm_add_epi32(v_d, _mm_srli_si128(v_d, 8));
- *sum = _mm_extract_epi32(v_d, 0);
- *sse = _mm_extract_epi32(v_d, 1);
-}
-
-static void highbd_10_variance_avx2(const uint16_t *src, int src_stride,
- const uint16_t *ref, int ref_stride, int w,
- int h, uint32_t *sse, int *sum,
- high_variance_fn_t var_fn, int block_size) {
- int i, j;
- uint64_t sse_long = 0;
- int32_t sum_long = 0;
-
- for (i = 0; i < h; i += block_size) {
- for (j = 0; j < w; j += block_size) {
- unsigned int sse0;
- int sum0;
- var_fn(src + src_stride * i + j, src_stride, ref + ref_stride * i + j,
- ref_stride, &sse0, &sum0);
- sse_long += sse0;
- sum_long += sum0;
- }
- }
- *sum = ROUND_POWER_OF_TWO(sum_long, 2);
- *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 4);
-}
-
-#define VAR_FN(w, h, block_size, shift) \
- uint32_t aom_highbd_10_variance##w##x##h##_avx2( \
- const uint8_t *src8, int src_stride, const uint8_t *ref8, \
- int ref_stride, uint32_t *sse) { \
- int sum; \
- int64_t var; \
- uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
- uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
- highbd_10_variance_avx2( \
- src, src_stride, ref, ref_stride, w, h, sse, &sum, \
- aom_highbd_calc##block_size##x##block_size##var_avx2, block_size); \
- var = (int64_t)(*sse) - (((int64_t)sum * sum) >> shift); \
- return (var >= 0) ? (uint32_t)var : 0; \
- }
-
-VAR_FN(128, 128, 16, 14);
-VAR_FN(128, 64, 16, 13);
-VAR_FN(64, 128, 16, 13);
-VAR_FN(64, 64, 16, 12);
-VAR_FN(64, 32, 16, 11);
-VAR_FN(32, 64, 16, 11);
-VAR_FN(32, 32, 16, 10);
-VAR_FN(32, 16, 16, 9);
-VAR_FN(16, 32, 16, 9);
-VAR_FN(16, 16, 16, 8);
-VAR_FN(16, 8, 8, 7);
-VAR_FN(8, 16, 8, 7);
-VAR_FN(8, 8, 8, 6);
-VAR_FN(16, 4, 16, 6);
-VAR_FN(8, 32, 8, 8);
-VAR_FN(32, 8, 8, 8);
-VAR_FN(16, 64, 16, 10);
-VAR_FN(64, 16, 16, 10);
-
-#undef VAR_FN
diff --git a/third_party/aom/aom_dsp/x86/highbd_variance_impl_sse2.asm b/third_party/aom/aom_dsp/x86/highbd_variance_impl_sse2.asm
deleted file mode 100644
index 0d954e178..000000000
--- a/third_party/aom/aom_dsp/x86/highbd_variance_impl_sse2.asm
+++ /dev/null
@@ -1,318 +0,0 @@
-;
-; Copyright (c) 2016, Alliance for Open Media. All rights reserved
-;
-; This source code is subject to the terms of the BSD 2 Clause License and
-; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-; was not distributed with this source code in the LICENSE file, you can
-; obtain it at www.aomedia.org/license/software. If the Alliance for Open
-; Media Patent License 1.0 was not distributed with this source code in the
-; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-;
-
-;
-
-
-%include "aom_ports/x86_abi_support.asm"
-
-SECTION .text
-
-;unsigned int aom_highbd_calc16x16var_sse2
-;(
-; unsigned char * src_ptr,
-; int source_stride,
-; unsigned char * ref_ptr,
-; int recon_stride,
-; unsigned int * SSE,
-; int * Sum
-;)
-global sym(aom_highbd_calc16x16var_sse2) PRIVATE
-sym(aom_highbd_calc16x16var_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
- push rbx
- push rsi
- push rdi
- ; end prolog
-
- mov rsi, arg(0) ;[src_ptr]
- mov rdi, arg(2) ;[ref_ptr]
-
- movsxd rax, DWORD PTR arg(1) ;[source_stride]
- movsxd rdx, DWORD PTR arg(3) ;[recon_stride]
- add rax, rax ; source stride in bytes
- add rdx, rdx ; recon stride in bytes
-
- ; Prefetch data
- prefetcht0 [rsi]
- prefetcht0 [rsi+16]
- prefetcht0 [rsi+rax]
- prefetcht0 [rsi+rax+16]
- lea rbx, [rsi+rax*2]
- prefetcht0 [rbx]
- prefetcht0 [rbx+16]
- prefetcht0 [rbx+rax]
- prefetcht0 [rbx+rax+16]
-
- prefetcht0 [rdi]
- prefetcht0 [rdi+16]
- prefetcht0 [rdi+rdx]
- prefetcht0 [rdi+rdx+16]
- lea rbx, [rdi+rdx*2]
- prefetcht0 [rbx]
- prefetcht0 [rbx+16]
- prefetcht0 [rbx+rdx]
- prefetcht0 [rbx+rdx+16]
-
- pxor xmm0, xmm0 ; clear xmm0 for unpack
- pxor xmm7, xmm7 ; clear xmm7 for accumulating diffs
-
- pxor xmm6, xmm6 ; clear xmm6 for accumulating sse
- mov rcx, 16
-
-.var16loop:
- movdqu xmm1, XMMWORD PTR [rsi]
- movdqu xmm2, XMMWORD PTR [rdi]
-
- lea rbx, [rsi+rax*2]
- prefetcht0 [rbx]
- prefetcht0 [rbx+16]
- prefetcht0 [rbx+rax]
- prefetcht0 [rbx+rax+16]
- lea rbx, [rdi+rdx*2]
- prefetcht0 [rbx]
- prefetcht0 [rbx+16]
- prefetcht0 [rbx+rdx]
- prefetcht0 [rbx+rdx+16]
-
- pxor xmm5, xmm5
-
- psubw xmm1, xmm2
- movdqu xmm3, XMMWORD PTR [rsi+16]
- paddw xmm5, xmm1
- pmaddwd xmm1, xmm1
- movdqu xmm2, XMMWORD PTR [rdi+16]
- paddd xmm6, xmm1
-
- psubw xmm3, xmm2
- movdqu xmm1, XMMWORD PTR [rsi+rax]
- paddw xmm5, xmm3
- pmaddwd xmm3, xmm3
- movdqu xmm2, XMMWORD PTR [rdi+rdx]
- paddd xmm6, xmm3
-
- psubw xmm1, xmm2
- movdqu xmm3, XMMWORD PTR [rsi+rax+16]
- paddw xmm5, xmm1
- pmaddwd xmm1, xmm1
- movdqu xmm2, XMMWORD PTR [rdi+rdx+16]
- paddd xmm6, xmm1
-
- psubw xmm3, xmm2
- paddw xmm5, xmm3
- pmaddwd xmm3, xmm3
- paddd xmm6, xmm3
-
- movdqa xmm1, xmm5
- movdqa xmm2, xmm5
- pcmpgtw xmm1, xmm0
- pcmpeqw xmm2, xmm0
- por xmm1, xmm2
- pcmpeqw xmm1, xmm0
- movdqa xmm2, xmm5
- punpcklwd xmm5, xmm1
- punpckhwd xmm2, xmm1
- paddd xmm7, xmm5
- paddd xmm7, xmm2
-
- lea rsi, [rsi + 2*rax]
- lea rdi, [rdi + 2*rdx]
- sub rcx, 2
- jnz .var16loop
-
- movdqa xmm4, xmm6
- punpckldq xmm6, xmm0
-
- punpckhdq xmm4, xmm0
- movdqa xmm5, xmm7
-
- paddd xmm6, xmm4
- punpckldq xmm7, xmm0
-
- punpckhdq xmm5, xmm0
- paddd xmm7, xmm5
-
- movdqa xmm4, xmm6
- movdqa xmm5, xmm7
-
- psrldq xmm4, 8
- psrldq xmm5, 8
-
- paddd xmm6, xmm4
- paddd xmm7, xmm5
-
- mov rdi, arg(4) ; [SSE]
- mov rax, arg(5) ; [Sum]
-
- movd DWORD PTR [rdi], xmm6
- movd DWORD PTR [rax], xmm7
-
-
- ; begin epilog
- pop rdi
- pop rsi
- pop rbx
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-;unsigned int aom_highbd_calc8x8var_sse2
-;(
-; unsigned char * src_ptr,
-; int source_stride,
-; unsigned char * ref_ptr,
-; int recon_stride,
-; unsigned int * SSE,
-; int * Sum
-;)
-global sym(aom_highbd_calc8x8var_sse2) PRIVATE
-sym(aom_highbd_calc8x8var_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
- push rbx
- push rsi
- push rdi
- ; end prolog
-
- mov rsi, arg(0) ;[src_ptr]
- mov rdi, arg(2) ;[ref_ptr]
-
- movsxd rax, DWORD PTR arg(1) ;[source_stride]
- movsxd rdx, DWORD PTR arg(3) ;[recon_stride]
- add rax, rax ; source stride in bytes
- add rdx, rdx ; recon stride in bytes
-
- ; Prefetch data
- prefetcht0 [rsi]
- prefetcht0 [rsi+rax]
- lea rbx, [rsi+rax*2]
- prefetcht0 [rbx]
- prefetcht0 [rbx+rax]
-
- prefetcht0 [rdi]
- prefetcht0 [rdi+rdx]
- lea rbx, [rdi+rdx*2]
- prefetcht0 [rbx]
- prefetcht0 [rbx+rdx]
-
- pxor xmm0, xmm0 ; clear xmm0 for unpack
- pxor xmm7, xmm7 ; clear xmm7 for accumulating diffs
-
- pxor xmm6, xmm6 ; clear xmm6 for accumulating sse
- mov rcx, 8
-
-.var8loop:
- movdqu xmm1, XMMWORD PTR [rsi]
- movdqu xmm2, XMMWORD PTR [rdi]
-
- lea rbx, [rsi+rax*4]
- prefetcht0 [rbx]
- prefetcht0 [rbx+rax]
- lea rbx, [rbx+rax*2]
- prefetcht0 [rbx]
- prefetcht0 [rbx+rax]
- lea rbx, [rdi+rdx*4]
- prefetcht0 [rbx]
- prefetcht0 [rbx+rdx]
- lea rbx, [rbx+rdx*2]
- prefetcht0 [rbx]
- prefetcht0 [rbx+rdx]
-
- pxor xmm5, xmm5
-
- psubw xmm1, xmm2
- movdqu xmm3, XMMWORD PTR [rsi+rax]
- paddw xmm5, xmm1
- pmaddwd xmm1, xmm1
- movdqu xmm2, XMMWORD PTR [rdi+rdx]
- paddd xmm6, xmm1
-
- lea rsi, [rsi + 2*rax]
- lea rdi, [rdi + 2*rdx]
-
- psubw xmm3, xmm2
- movdqu xmm1, XMMWORD PTR [rsi]
- paddw xmm5, xmm3
- pmaddwd xmm3, xmm3
- movdqu xmm2, XMMWORD PTR [rdi]
- paddd xmm6, xmm3
-
- psubw xmm1, xmm2
- movdqu xmm3, XMMWORD PTR [rsi+rax]
- paddw xmm5, xmm1
- pmaddwd xmm1, xmm1
- movdqu xmm2, XMMWORD PTR [rdi+rdx]
- paddd xmm6, xmm1
-
- psubw xmm3, xmm2
- paddw xmm5, xmm3
- pmaddwd xmm3, xmm3
- paddd xmm6, xmm3
-
- movdqa xmm1, xmm5
- movdqa xmm2, xmm5
- pcmpgtw xmm1, xmm0
- pcmpeqw xmm2, xmm0
- por xmm1, xmm2
- pcmpeqw xmm1, xmm0
- movdqa xmm2, xmm5
- punpcklwd xmm5, xmm1
- punpckhwd xmm2, xmm1
- paddd xmm7, xmm5
- paddd xmm7, xmm2
-
- lea rsi, [rsi + 2*rax]
- lea rdi, [rdi + 2*rdx]
- sub rcx, 4
- jnz .var8loop
-
- movdqa xmm4, xmm6
- punpckldq xmm6, xmm0
-
- punpckhdq xmm4, xmm0
- movdqa xmm5, xmm7
-
- paddd xmm6, xmm4
- punpckldq xmm7, xmm0
-
- punpckhdq xmm5, xmm0
- paddd xmm7, xmm5
-
- movdqa xmm4, xmm6
- movdqa xmm5, xmm7
-
- psrldq xmm4, 8
- psrldq xmm5, 8
-
- paddd xmm6, xmm4
- paddd xmm7, xmm5
-
- mov rdi, arg(4) ; [SSE]
- mov rax, arg(5) ; [Sum]
-
- movd DWORD PTR [rdi], xmm6
- movd DWORD PTR [rax], xmm7
-
- ; begin epilog
- pop rdi
- pop rsi
- pop rbx
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
diff --git a/third_party/aom/aom_dsp/x86/highbd_variance_sse2.c b/third_party/aom/aom_dsp/x86/highbd_variance_sse2.c
deleted file mode 100644
index 47b052abc..000000000
--- a/third_party/aom/aom_dsp/x86/highbd_variance_sse2.c
+++ /dev/null
@@ -1,868 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <emmintrin.h> // SSE2
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-#include "config/av1_rtcd.h"
-
-#include "aom_dsp/x86/synonyms.h"
-
-#include "aom_ports/mem.h"
-
-#include "av1/common/filter.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/common/reconinter.h"
-
-typedef uint32_t (*high_variance_fn_t)(const uint16_t *src, int src_stride,
- const uint16_t *ref, int ref_stride,
- uint32_t *sse, int *sum);
-
-uint32_t aom_highbd_calc8x8var_sse2(const uint16_t *src, int src_stride,
- const uint16_t *ref, int ref_stride,
- uint32_t *sse, int *sum);
-
-uint32_t aom_highbd_calc16x16var_sse2(const uint16_t *src, int src_stride,
- const uint16_t *ref, int ref_stride,
- uint32_t *sse, int *sum);
-
-static void highbd_8_variance_sse2(const uint16_t *src, int src_stride,
- const uint16_t *ref, int ref_stride, int w,
- int h, uint32_t *sse, int *sum,
- high_variance_fn_t var_fn, int block_size) {
- int i, j;
-
- *sse = 0;
- *sum = 0;
-
- for (i = 0; i < h; i += block_size) {
- for (j = 0; j < w; j += block_size) {
- unsigned int sse0;
- int sum0;
- var_fn(src + src_stride * i + j, src_stride, ref + ref_stride * i + j,
- ref_stride, &sse0, &sum0);
- *sse += sse0;
- *sum += sum0;
- }
- }
-}
-
-static void highbd_10_variance_sse2(const uint16_t *src, int src_stride,
- const uint16_t *ref, int ref_stride, int w,
- int h, uint32_t *sse, int *sum,
- high_variance_fn_t var_fn, int block_size) {
- int i, j;
- uint64_t sse_long = 0;
- int32_t sum_long = 0;
-
- for (i = 0; i < h; i += block_size) {
- for (j = 0; j < w; j += block_size) {
- unsigned int sse0;
- int sum0;
- var_fn(src + src_stride * i + j, src_stride, ref + ref_stride * i + j,
- ref_stride, &sse0, &sum0);
- sse_long += sse0;
- sum_long += sum0;
- }
- }
- *sum = ROUND_POWER_OF_TWO(sum_long, 2);
- *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 4);
-}
-
-static void highbd_12_variance_sse2(const uint16_t *src, int src_stride,
- const uint16_t *ref, int ref_stride, int w,
- int h, uint32_t *sse, int *sum,
- high_variance_fn_t var_fn, int block_size) {
- int i, j;
- uint64_t sse_long = 0;
- int32_t sum_long = 0;
-
- for (i = 0; i < h; i += block_size) {
- for (j = 0; j < w; j += block_size) {
- unsigned int sse0;
- int sum0;
- var_fn(src + src_stride * i + j, src_stride, ref + ref_stride * i + j,
- ref_stride, &sse0, &sum0);
- sse_long += sse0;
- sum_long += sum0;
- }
- }
- *sum = ROUND_POWER_OF_TWO(sum_long, 4);
- *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 8);
-}
-
-#define HIGH_GET_VAR(S) \
- void aom_highbd_get##S##x##S##var_sse2(const uint8_t *src8, int src_stride, \
- const uint8_t *ref8, int ref_stride, \
- uint32_t *sse, int *sum) { \
- uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
- uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
- aom_highbd_calc##S##x##S##var_sse2(src, src_stride, ref, ref_stride, sse, \
- sum); \
- } \
- \
- void aom_highbd_10_get##S##x##S##var_sse2( \
- const uint8_t *src8, int src_stride, const uint8_t *ref8, \
- int ref_stride, uint32_t *sse, int *sum) { \
- uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
- uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
- aom_highbd_calc##S##x##S##var_sse2(src, src_stride, ref, ref_stride, sse, \
- sum); \
- *sum = ROUND_POWER_OF_TWO(*sum, 2); \
- *sse = ROUND_POWER_OF_TWO(*sse, 4); \
- } \
- \
- void aom_highbd_12_get##S##x##S##var_sse2( \
- const uint8_t *src8, int src_stride, const uint8_t *ref8, \
- int ref_stride, uint32_t *sse, int *sum) { \
- uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
- uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
- aom_highbd_calc##S##x##S##var_sse2(src, src_stride, ref, ref_stride, sse, \
- sum); \
- *sum = ROUND_POWER_OF_TWO(*sum, 4); \
- *sse = ROUND_POWER_OF_TWO(*sse, 8); \
- }
-
-HIGH_GET_VAR(16);
-HIGH_GET_VAR(8);
-
-#undef HIGH_GET_VAR
-
-#define VAR_FN(w, h, block_size, shift) \
- uint32_t aom_highbd_8_variance##w##x##h##_sse2( \
- const uint8_t *src8, int src_stride, const uint8_t *ref8, \
- int ref_stride, uint32_t *sse) { \
- int sum; \
- uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
- uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
- highbd_8_variance_sse2( \
- src, src_stride, ref, ref_stride, w, h, sse, &sum, \
- aom_highbd_calc##block_size##x##block_size##var_sse2, block_size); \
- return *sse - (uint32_t)(((int64_t)sum * sum) >> shift); \
- } \
- \
- uint32_t aom_highbd_10_variance##w##x##h##_sse2( \
- const uint8_t *src8, int src_stride, const uint8_t *ref8, \
- int ref_stride, uint32_t *sse) { \
- int sum; \
- int64_t var; \
- uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
- uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
- highbd_10_variance_sse2( \
- src, src_stride, ref, ref_stride, w, h, sse, &sum, \
- aom_highbd_calc##block_size##x##block_size##var_sse2, block_size); \
- var = (int64_t)(*sse) - (((int64_t)sum * sum) >> shift); \
- return (var >= 0) ? (uint32_t)var : 0; \
- } \
- \
- uint32_t aom_highbd_12_variance##w##x##h##_sse2( \
- const uint8_t *src8, int src_stride, const uint8_t *ref8, \
- int ref_stride, uint32_t *sse) { \
- int sum; \
- int64_t var; \
- uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
- uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
- highbd_12_variance_sse2( \
- src, src_stride, ref, ref_stride, w, h, sse, &sum, \
- aom_highbd_calc##block_size##x##block_size##var_sse2, block_size); \
- var = (int64_t)(*sse) - (((int64_t)sum * sum) >> shift); \
- return (var >= 0) ? (uint32_t)var : 0; \
- }
-
-VAR_FN(128, 128, 16, 14);
-VAR_FN(128, 64, 16, 13);
-VAR_FN(64, 128, 16, 13);
-VAR_FN(64, 64, 16, 12);
-VAR_FN(64, 32, 16, 11);
-VAR_FN(32, 64, 16, 11);
-VAR_FN(32, 32, 16, 10);
-VAR_FN(32, 16, 16, 9);
-VAR_FN(16, 32, 16, 9);
-VAR_FN(16, 16, 16, 8);
-VAR_FN(16, 8, 8, 7);
-VAR_FN(8, 16, 8, 7);
-VAR_FN(8, 8, 8, 6);
-VAR_FN(16, 4, 16, 6);
-VAR_FN(8, 32, 8, 8);
-VAR_FN(32, 8, 8, 8);
-VAR_FN(16, 64, 16, 10);
-VAR_FN(64, 16, 16, 10);
-
-#undef VAR_FN
-
-unsigned int aom_highbd_8_mse16x16_sse2(const uint8_t *src8, int src_stride,
- const uint8_t *ref8, int ref_stride,
- unsigned int *sse) {
- int sum;
- uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
- highbd_8_variance_sse2(src, src_stride, ref, ref_stride, 16, 16, sse, &sum,
- aom_highbd_calc16x16var_sse2, 16);
- return *sse;
-}
-
-unsigned int aom_highbd_10_mse16x16_sse2(const uint8_t *src8, int src_stride,
- const uint8_t *ref8, int ref_stride,
- unsigned int *sse) {
- int sum;
- uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
- highbd_10_variance_sse2(src, src_stride, ref, ref_stride, 16, 16, sse, &sum,
- aom_highbd_calc16x16var_sse2, 16);
- return *sse;
-}
-
-unsigned int aom_highbd_12_mse16x16_sse2(const uint8_t *src8, int src_stride,
- const uint8_t *ref8, int ref_stride,
- unsigned int *sse) {
- int sum;
- uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
- highbd_12_variance_sse2(src, src_stride, ref, ref_stride, 16, 16, sse, &sum,
- aom_highbd_calc16x16var_sse2, 16);
- return *sse;
-}
-
-unsigned int aom_highbd_8_mse8x8_sse2(const uint8_t *src8, int src_stride,
- const uint8_t *ref8, int ref_stride,
- unsigned int *sse) {
- int sum;
- uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
- highbd_8_variance_sse2(src, src_stride, ref, ref_stride, 8, 8, sse, &sum,
- aom_highbd_calc8x8var_sse2, 8);
- return *sse;
-}
-
-unsigned int aom_highbd_10_mse8x8_sse2(const uint8_t *src8, int src_stride,
- const uint8_t *ref8, int ref_stride,
- unsigned int *sse) {
- int sum;
- uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
- highbd_10_variance_sse2(src, src_stride, ref, ref_stride, 8, 8, sse, &sum,
- aom_highbd_calc8x8var_sse2, 8);
- return *sse;
-}
-
-unsigned int aom_highbd_12_mse8x8_sse2(const uint8_t *src8, int src_stride,
- const uint8_t *ref8, int ref_stride,
- unsigned int *sse) {
- int sum;
- uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
- highbd_12_variance_sse2(src, src_stride, ref, ref_stride, 8, 8, sse, &sum,
- aom_highbd_calc8x8var_sse2, 8);
- return *sse;
-}
-
-// The 2 unused parameters are place holders for PIC enabled build.
-// These definitions are for functions defined in
-// highbd_subpel_variance_impl_sse2.asm
-#define DECL(w, opt) \
- int aom_highbd_sub_pixel_variance##w##xh_##opt( \
- const uint16_t *src, ptrdiff_t src_stride, int x_offset, int y_offset, \
- const uint16_t *dst, ptrdiff_t dst_stride, int height, \
- unsigned int *sse, void *unused0, void *unused);
-#define DECLS(opt) \
- DECL(8, opt); \
- DECL(16, opt)
-
-DECLS(sse2);
-
-#undef DECLS
-#undef DECL
-
-#define FN(w, h, wf, wlog2, hlog2, opt, cast) \
- uint32_t aom_highbd_8_sub_pixel_variance##w##x##h##_##opt( \
- const uint8_t *src8, int src_stride, int x_offset, int y_offset, \
- const uint8_t *dst8, int dst_stride, uint32_t *sse_ptr) { \
- uint32_t sse; \
- uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \
- int se = aom_highbd_sub_pixel_variance##wf##xh_##opt( \
- src, src_stride, x_offset, y_offset, dst, dst_stride, h, &sse, NULL, \
- NULL); \
- if (w > wf) { \
- unsigned int sse2; \
- int se2 = aom_highbd_sub_pixel_variance##wf##xh_##opt( \
- src + 16, src_stride, x_offset, y_offset, dst + 16, dst_stride, h, \
- &sse2, NULL, NULL); \
- se += se2; \
- sse += sse2; \
- if (w > wf * 2) { \
- se2 = aom_highbd_sub_pixel_variance##wf##xh_##opt( \
- src + 32, src_stride, x_offset, y_offset, dst + 32, dst_stride, h, \
- &sse2, NULL, NULL); \
- se += se2; \
- sse += sse2; \
- se2 = aom_highbd_sub_pixel_variance##wf##xh_##opt( \
- src + 48, src_stride, x_offset, y_offset, dst + 48, dst_stride, h, \
- &sse2, NULL, NULL); \
- se += se2; \
- sse += sse2; \
- } \
- } \
- *sse_ptr = sse; \
- return sse - (uint32_t)((cast se * se) >> (wlog2 + hlog2)); \
- } \
- \
- uint32_t aom_highbd_10_sub_pixel_variance##w##x##h##_##opt( \
- const uint8_t *src8, int src_stride, int x_offset, int y_offset, \
- const uint8_t *dst8, int dst_stride, uint32_t *sse_ptr) { \
- int64_t var; \
- uint32_t sse; \
- uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \
- int se = aom_highbd_sub_pixel_variance##wf##xh_##opt( \
- src, src_stride, x_offset, y_offset, dst, dst_stride, h, &sse, NULL, \
- NULL); \
- if (w > wf) { \
- uint32_t sse2; \
- int se2 = aom_highbd_sub_pixel_variance##wf##xh_##opt( \
- src + 16, src_stride, x_offset, y_offset, dst + 16, dst_stride, h, \
- &sse2, NULL, NULL); \
- se += se2; \
- sse += sse2; \
- if (w > wf * 2) { \
- se2 = aom_highbd_sub_pixel_variance##wf##xh_##opt( \
- src + 32, src_stride, x_offset, y_offset, dst + 32, dst_stride, h, \
- &sse2, NULL, NULL); \
- se += se2; \
- sse += sse2; \
- se2 = aom_highbd_sub_pixel_variance##wf##xh_##opt( \
- src + 48, src_stride, x_offset, y_offset, dst + 48, dst_stride, h, \
- &sse2, NULL, NULL); \
- se += se2; \
- sse += sse2; \
- } \
- } \
- se = ROUND_POWER_OF_TWO(se, 2); \
- sse = ROUND_POWER_OF_TWO(sse, 4); \
- *sse_ptr = sse; \
- var = (int64_t)(sse) - ((cast se * se) >> (wlog2 + hlog2)); \
- return (var >= 0) ? (uint32_t)var : 0; \
- } \
- \
- uint32_t aom_highbd_12_sub_pixel_variance##w##x##h##_##opt( \
- const uint8_t *src8, int src_stride, int x_offset, int y_offset, \
- const uint8_t *dst8, int dst_stride, uint32_t *sse_ptr) { \
- int start_row; \
- uint32_t sse; \
- int se = 0; \
- int64_t var; \
- uint64_t long_sse = 0; \
- uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \
- for (start_row = 0; start_row < h; start_row += 16) { \
- uint32_t sse2; \
- int height = h - start_row < 16 ? h - start_row : 16; \
- int se2 = aom_highbd_sub_pixel_variance##wf##xh_##opt( \
- src + (start_row * src_stride), src_stride, x_offset, y_offset, \
- dst + (start_row * dst_stride), dst_stride, height, &sse2, NULL, \
- NULL); \
- se += se2; \
- long_sse += sse2; \
- if (w > wf) { \
- se2 = aom_highbd_sub_pixel_variance##wf##xh_##opt( \
- src + 16 + (start_row * src_stride), src_stride, x_offset, \
- y_offset, dst + 16 + (start_row * dst_stride), dst_stride, height, \
- &sse2, NULL, NULL); \
- se += se2; \
- long_sse += sse2; \
- if (w > wf * 2) { \
- se2 = aom_highbd_sub_pixel_variance##wf##xh_##opt( \
- src + 32 + (start_row * src_stride), src_stride, x_offset, \
- y_offset, dst + 32 + (start_row * dst_stride), dst_stride, \
- height, &sse2, NULL, NULL); \
- se += se2; \
- long_sse += sse2; \
- se2 = aom_highbd_sub_pixel_variance##wf##xh_##opt( \
- src + 48 + (start_row * src_stride), src_stride, x_offset, \
- y_offset, dst + 48 + (start_row * dst_stride), dst_stride, \
- height, &sse2, NULL, NULL); \
- se += se2; \
- long_sse += sse2; \
- } \
- } \
- } \
- se = ROUND_POWER_OF_TWO(se, 4); \
- sse = (uint32_t)ROUND_POWER_OF_TWO(long_sse, 8); \
- *sse_ptr = sse; \
- var = (int64_t)(sse) - ((cast se * se) >> (wlog2 + hlog2)); \
- return (var >= 0) ? (uint32_t)var : 0; \
- }
-
-#define FNS(opt) \
- FN(64, 64, 16, 6, 6, opt, (int64_t)); \
- FN(64, 32, 16, 6, 5, opt, (int64_t)); \
- FN(32, 64, 16, 5, 6, opt, (int64_t)); \
- FN(32, 32, 16, 5, 5, opt, (int64_t)); \
- FN(32, 16, 16, 5, 4, opt, (int64_t)); \
- FN(16, 32, 16, 4, 5, opt, (int64_t)); \
- FN(16, 16, 16, 4, 4, opt, (int64_t)); \
- FN(16, 8, 16, 4, 3, opt, (int64_t)); \
- FN(8, 16, 8, 3, 4, opt, (int64_t)); \
- FN(8, 8, 8, 3, 3, opt, (int64_t)); \
- FN(8, 4, 8, 3, 2, opt, (int64_t)); \
- FN(16, 4, 16, 4, 2, opt, (int64_t)); \
- FN(8, 32, 8, 3, 5, opt, (int64_t)); \
- FN(32, 8, 16, 5, 3, opt, (int64_t)); \
- FN(16, 64, 16, 4, 6, opt, (int64_t)); \
- FN(64, 16, 16, 6, 4, opt, (int64_t))
-
-FNS(sse2);
-
-#undef FNS
-#undef FN
-
-// The 2 unused parameters are place holders for PIC enabled build.
-#define DECL(w, opt) \
- int aom_highbd_sub_pixel_avg_variance##w##xh_##opt( \
- const uint16_t *src, ptrdiff_t src_stride, int x_offset, int y_offset, \
- const uint16_t *dst, ptrdiff_t dst_stride, const uint16_t *sec, \
- ptrdiff_t sec_stride, int height, unsigned int *sse, void *unused0, \
- void *unused);
-#define DECLS(opt) \
- DECL(16, opt) \
- DECL(8, opt)
-
-DECLS(sse2);
-#undef DECL
-#undef DECLS
-
-#define FN(w, h, wf, wlog2, hlog2, opt, cast) \
- uint32_t aom_highbd_8_sub_pixel_avg_variance##w##x##h##_##opt( \
- const uint8_t *src8, int src_stride, int x_offset, int y_offset, \
- const uint8_t *dst8, int dst_stride, uint32_t *sse_ptr, \
- const uint8_t *sec8) { \
- uint32_t sse; \
- uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \
- uint16_t *sec = CONVERT_TO_SHORTPTR(sec8); \
- int se = aom_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
- src, src_stride, x_offset, y_offset, dst, dst_stride, sec, w, h, &sse, \
- NULL, NULL); \
- if (w > wf) { \
- uint32_t sse2; \
- int se2 = aom_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
- src + 16, src_stride, x_offset, y_offset, dst + 16, dst_stride, \
- sec + 16, w, h, &sse2, NULL, NULL); \
- se += se2; \
- sse += sse2; \
- if (w > wf * 2) { \
- se2 = aom_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
- src + 32, src_stride, x_offset, y_offset, dst + 32, dst_stride, \
- sec + 32, w, h, &sse2, NULL, NULL); \
- se += se2; \
- sse += sse2; \
- se2 = aom_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
- src + 48, src_stride, x_offset, y_offset, dst + 48, dst_stride, \
- sec + 48, w, h, &sse2, NULL, NULL); \
- se += se2; \
- sse += sse2; \
- } \
- } \
- *sse_ptr = sse; \
- return sse - (uint32_t)((cast se * se) >> (wlog2 + hlog2)); \
- } \
- \
- uint32_t aom_highbd_10_sub_pixel_avg_variance##w##x##h##_##opt( \
- const uint8_t *src8, int src_stride, int x_offset, int y_offset, \
- const uint8_t *dst8, int dst_stride, uint32_t *sse_ptr, \
- const uint8_t *sec8) { \
- int64_t var; \
- uint32_t sse; \
- uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \
- uint16_t *sec = CONVERT_TO_SHORTPTR(sec8); \
- int se = aom_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
- src, src_stride, x_offset, y_offset, dst, dst_stride, sec, w, h, &sse, \
- NULL, NULL); \
- if (w > wf) { \
- uint32_t sse2; \
- int se2 = aom_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
- src + 16, src_stride, x_offset, y_offset, dst + 16, dst_stride, \
- sec + 16, w, h, &sse2, NULL, NULL); \
- se += se2; \
- sse += sse2; \
- if (w > wf * 2) { \
- se2 = aom_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
- src + 32, src_stride, x_offset, y_offset, dst + 32, dst_stride, \
- sec + 32, w, h, &sse2, NULL, NULL); \
- se += se2; \
- sse += sse2; \
- se2 = aom_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
- src + 48, src_stride, x_offset, y_offset, dst + 48, dst_stride, \
- sec + 48, w, h, &sse2, NULL, NULL); \
- se += se2; \
- sse += sse2; \
- } \
- } \
- se = ROUND_POWER_OF_TWO(se, 2); \
- sse = ROUND_POWER_OF_TWO(sse, 4); \
- *sse_ptr = sse; \
- var = (int64_t)(sse) - ((cast se * se) >> (wlog2 + hlog2)); \
- return (var >= 0) ? (uint32_t)var : 0; \
- } \
- \
- uint32_t aom_highbd_12_sub_pixel_avg_variance##w##x##h##_##opt( \
- const uint8_t *src8, int src_stride, int x_offset, int y_offset, \
- const uint8_t *dst8, int dst_stride, uint32_t *sse_ptr, \
- const uint8_t *sec8) { \
- int start_row; \
- int64_t var; \
- uint32_t sse; \
- int se = 0; \
- uint64_t long_sse = 0; \
- uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \
- uint16_t *sec = CONVERT_TO_SHORTPTR(sec8); \
- for (start_row = 0; start_row < h; start_row += 16) { \
- uint32_t sse2; \
- int height = h - start_row < 16 ? h - start_row : 16; \
- int se2 = aom_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
- src + (start_row * src_stride), src_stride, x_offset, y_offset, \
- dst + (start_row * dst_stride), dst_stride, sec + (start_row * w), \
- w, height, &sse2, NULL, NULL); \
- se += se2; \
- long_sse += sse2; \
- if (w > wf) { \
- se2 = aom_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
- src + 16 + (start_row * src_stride), src_stride, x_offset, \
- y_offset, dst + 16 + (start_row * dst_stride), dst_stride, \
- sec + 16 + (start_row * w), w, height, &sse2, NULL, NULL); \
- se += se2; \
- long_sse += sse2; \
- if (w > wf * 2) { \
- se2 = aom_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
- src + 32 + (start_row * src_stride), src_stride, x_offset, \
- y_offset, dst + 32 + (start_row * dst_stride), dst_stride, \
- sec + 32 + (start_row * w), w, height, &sse2, NULL, NULL); \
- se += se2; \
- long_sse += sse2; \
- se2 = aom_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
- src + 48 + (start_row * src_stride), src_stride, x_offset, \
- y_offset, dst + 48 + (start_row * dst_stride), dst_stride, \
- sec + 48 + (start_row * w), w, height, &sse2, NULL, NULL); \
- se += se2; \
- long_sse += sse2; \
- } \
- } \
- } \
- se = ROUND_POWER_OF_TWO(se, 4); \
- sse = (uint32_t)ROUND_POWER_OF_TWO(long_sse, 8); \
- *sse_ptr = sse; \
- var = (int64_t)(sse) - ((cast se * se) >> (wlog2 + hlog2)); \
- return (var >= 0) ? (uint32_t)var : 0; \
- }
-
-#define FNS(opt) \
- FN(64, 64, 16, 6, 6, opt, (int64_t)); \
- FN(64, 32, 16, 6, 5, opt, (int64_t)); \
- FN(32, 64, 16, 5, 6, opt, (int64_t)); \
- FN(32, 32, 16, 5, 5, opt, (int64_t)); \
- FN(32, 16, 16, 5, 4, opt, (int64_t)); \
- FN(16, 32, 16, 4, 5, opt, (int64_t)); \
- FN(16, 16, 16, 4, 4, opt, (int64_t)); \
- FN(16, 8, 16, 4, 3, opt, (int64_t)); \
- FN(8, 16, 8, 3, 4, opt, (int64_t)); \
- FN(8, 8, 8, 3, 3, opt, (int64_t)); \
- FN(8, 4, 8, 3, 2, opt, (int64_t)); \
- FN(16, 4, 16, 4, 2, opt, (int64_t)); \
- FN(8, 32, 8, 3, 5, opt, (int64_t)); \
- FN(32, 8, 16, 5, 3, opt, (int64_t)); \
- FN(16, 64, 16, 4, 6, opt, (int64_t)); \
- FN(64, 16, 16, 6, 4, opt, (int64_t));
-
-FNS(sse2);
-
-#undef FNS
-#undef FN
-
-void aom_highbd_upsampled_pred_sse2(MACROBLOCKD *xd,
- const struct AV1Common *const cm,
- int mi_row, int mi_col, const MV *const mv,
- uint8_t *comp_pred8, int width, int height,
- int subpel_x_q3, int subpel_y_q3,
- const uint8_t *ref8, int ref_stride, int bd,
- int subpel_search) {
- // expect xd == NULL only in tests
- if (xd != NULL) {
- const MB_MODE_INFO *mi = xd->mi[0];
- const int ref_num = 0;
- const int is_intrabc = is_intrabc_block(mi);
- const struct scale_factors *const sf =
- is_intrabc ? &cm->sf_identity : &xd->block_refs[ref_num]->sf;
- const int is_scaled = av1_is_scaled(sf);
-
- if (is_scaled) {
- // Note: This is mostly a copy from the >=8X8 case in
- // build_inter_predictors() function, with some small tweaks.
- // Some assumptions.
- const int plane = 0;
-
- // Get pre-requisites.
- const struct macroblockd_plane *const pd = &xd->plane[plane];
- const int ssx = pd->subsampling_x;
- const int ssy = pd->subsampling_y;
- assert(ssx == 0 && ssy == 0);
- const struct buf_2d *const dst_buf = &pd->dst;
- const struct buf_2d *const pre_buf =
- is_intrabc ? dst_buf : &pd->pre[ref_num];
- const int mi_x = mi_col * MI_SIZE;
- const int mi_y = mi_row * MI_SIZE;
-
- // Calculate subpel_x/y and x/y_step.
- const int row_start = 0; // Because ss_y is 0.
- const int col_start = 0; // Because ss_x is 0.
- const int pre_x = (mi_x + MI_SIZE * col_start) >> ssx;
- const int pre_y = (mi_y + MI_SIZE * row_start) >> ssy;
- int orig_pos_y = pre_y << SUBPEL_BITS;
- orig_pos_y += mv->row * (1 << (1 - ssy));
- int orig_pos_x = pre_x << SUBPEL_BITS;
- orig_pos_x += mv->col * (1 << (1 - ssx));
- int pos_y = sf->scale_value_y(orig_pos_y, sf);
- int pos_x = sf->scale_value_x(orig_pos_x, sf);
- pos_x += SCALE_EXTRA_OFF;
- pos_y += SCALE_EXTRA_OFF;
-
- const int top = -AOM_LEFT_TOP_MARGIN_SCALED(ssy);
- const int left = -AOM_LEFT_TOP_MARGIN_SCALED(ssx);
- const int bottom = (pre_buf->height + AOM_INTERP_EXTEND)
- << SCALE_SUBPEL_BITS;
- const int right = (pre_buf->width + AOM_INTERP_EXTEND)
- << SCALE_SUBPEL_BITS;
- pos_y = clamp(pos_y, top, bottom);
- pos_x = clamp(pos_x, left, right);
-
- const uint8_t *const pre =
- pre_buf->buf0 + (pos_y >> SCALE_SUBPEL_BITS) * pre_buf->stride +
- (pos_x >> SCALE_SUBPEL_BITS);
- const SubpelParams subpel_params = { sf->x_step_q4, sf->y_step_q4,
- pos_x & SCALE_SUBPEL_MASK,
- pos_y & SCALE_SUBPEL_MASK };
-
- // Get warp types.
- const WarpedMotionParams *const wm =
- &xd->global_motion[mi->ref_frame[ref_num]];
- const int is_global = is_global_mv_block(mi, wm->wmtype);
- WarpTypesAllowed warp_types;
- warp_types.global_warp_allowed = is_global;
- warp_types.local_warp_allowed = mi->motion_mode == WARPED_CAUSAL;
-
- // Get convolve parameters.
- ConvolveParams conv_params = get_conv_params(0, plane, xd->bd);
- const InterpFilters filters =
- av1_broadcast_interp_filter(EIGHTTAP_REGULAR);
-
- // Get the inter predictor.
- const int build_for_obmc = 0;
- av1_make_inter_predictor(pre, pre_buf->stride, comp_pred8, width,
- &subpel_params, sf, width, height, &conv_params,
- filters, &warp_types, mi_x >> pd->subsampling_x,
- mi_y >> pd->subsampling_y, plane, ref_num, mi,
- build_for_obmc, xd, cm->allow_warped_motion);
- return;
- }
- }
-
- const InterpFilterParams *filter =
- (subpel_search == 1)
- ? av1_get_4tap_interp_filter_params(EIGHTTAP_REGULAR)
- : av1_get_interp_filter_params_with_block_size(EIGHTTAP_REGULAR, 8);
-
- if (!subpel_x_q3 && !subpel_y_q3) {
- uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
- uint16_t *comp_pred = CONVERT_TO_SHORTPTR(comp_pred8);
- if (width >= 8) {
- int i;
- assert(!(width & 7));
- /*Read 8 pixels one row at a time.*/
- for (i = 0; i < height; i++) {
- int j;
- for (j = 0; j < width; j += 8) {
- __m128i s0 = _mm_loadu_si128((const __m128i *)ref);
- _mm_storeu_si128((__m128i *)comp_pred, s0);
- comp_pred += 8;
- ref += 8;
- }
- ref += ref_stride - width;
- }
- } else {
- int i;
- assert(!(width & 3));
- /*Read 4 pixels two rows at a time.*/
- for (i = 0; i < height; i += 2) {
- __m128i s0 = _mm_loadl_epi64((const __m128i *)ref);
- __m128i s1 = _mm_loadl_epi64((const __m128i *)(ref + ref_stride));
- __m128i t0 = _mm_unpacklo_epi64(s0, s1);
- _mm_storeu_si128((__m128i *)comp_pred, t0);
- comp_pred += 8;
- ref += 2 * ref_stride;
- }
- }
- } else if (!subpel_y_q3) {
- const int16_t *const kernel =
- av1_get_interp_filter_subpel_kernel(filter, subpel_x_q3 << 1);
- aom_highbd_convolve8_horiz(ref8, ref_stride, comp_pred8, width, kernel, 16,
- NULL, -1, width, height, bd);
- } else if (!subpel_x_q3) {
- const int16_t *const kernel =
- av1_get_interp_filter_subpel_kernel(filter, subpel_y_q3 << 1);
- aom_highbd_convolve8_vert(ref8, ref_stride, comp_pred8, width, NULL, -1,
- kernel, 16, width, height, bd);
- } else {
- DECLARE_ALIGNED(16, uint16_t,
- temp[((MAX_SB_SIZE + 16) + 16) * MAX_SB_SIZE]);
- const int16_t *const kernel_x =
- av1_get_interp_filter_subpel_kernel(filter, subpel_x_q3 << 1);
- const int16_t *const kernel_y =
- av1_get_interp_filter_subpel_kernel(filter, subpel_y_q3 << 1);
- const int intermediate_height =
- (((height - 1) * 8 + subpel_y_q3) >> 3) + filter->taps;
- assert(intermediate_height <= (MAX_SB_SIZE * 2 + 16) + 16);
- aom_highbd_convolve8_horiz(ref8 - ref_stride * ((filter->taps >> 1) - 1),
- ref_stride, CONVERT_TO_BYTEPTR(temp),
- MAX_SB_SIZE, kernel_x, 16, NULL, -1, width,
- intermediate_height, bd);
- aom_highbd_convolve8_vert(
- CONVERT_TO_BYTEPTR(temp + MAX_SB_SIZE * ((filter->taps >> 1) - 1)),
- MAX_SB_SIZE, comp_pred8, width, NULL, -1, kernel_y, 16, width, height,
- bd);
- }
-}
-
-void aom_highbd_comp_avg_upsampled_pred_sse2(
- MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
- const MV *const mv, uint8_t *comp_pred8, const uint8_t *pred8, int width,
- int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref8,
- int ref_stride, int bd, int subpel_search) {
- aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, mv, comp_pred8, width,
- height, subpel_x_q3, subpel_y_q3, ref8, ref_stride,
- bd, subpel_search);
- uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
- uint16_t *comp_pred16 = CONVERT_TO_SHORTPTR(comp_pred8);
- /*The total number of pixels must be a multiple of 8 (e.g., 4x4).*/
- assert(!(width * height & 7));
- int n = width * height >> 3;
- for (int i = 0; i < n; i++) {
- __m128i s0 = _mm_loadu_si128((const __m128i *)comp_pred16);
- __m128i p0 = _mm_loadu_si128((const __m128i *)pred);
- _mm_storeu_si128((__m128i *)comp_pred16, _mm_avg_epu16(s0, p0));
- comp_pred16 += 8;
- pred += 8;
- }
-}
-
-static INLINE void highbd_compute_jnt_comp_avg(__m128i *p0, __m128i *p1,
- const __m128i *w0,
- const __m128i *w1,
- const __m128i *r,
- void *const result) {
- assert(DIST_PRECISION_BITS <= 4);
- __m128i mult0 = _mm_mullo_epi16(*p0, *w0);
- __m128i mult1 = _mm_mullo_epi16(*p1, *w1);
- __m128i sum = _mm_adds_epu16(mult0, mult1);
- __m128i round = _mm_adds_epu16(sum, *r);
- __m128i shift = _mm_srli_epi16(round, DIST_PRECISION_BITS);
-
- xx_storeu_128(result, shift);
-}
-
-void aom_highbd_jnt_comp_avg_pred_sse2(uint8_t *comp_pred8,
- const uint8_t *pred8, int width,
- int height, const uint8_t *ref8,
- int ref_stride,
- const JNT_COMP_PARAMS *jcp_param) {
- int i;
- const uint16_t wt0 = (uint16_t)jcp_param->fwd_offset;
- const uint16_t wt1 = (uint16_t)jcp_param->bck_offset;
- const __m128i w0 = _mm_set_epi16(wt0, wt0, wt0, wt0, wt0, wt0, wt0, wt0);
- const __m128i w1 = _mm_set_epi16(wt1, wt1, wt1, wt1, wt1, wt1, wt1, wt1);
- const uint16_t round = ((1 << DIST_PRECISION_BITS) >> 1);
- const __m128i r =
- _mm_set_epi16(round, round, round, round, round, round, round, round);
- uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
- uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
- uint16_t *comp_pred = CONVERT_TO_SHORTPTR(comp_pred8);
-
- if (width >= 8) {
- // Read 8 pixels one row at a time
- assert(!(width & 7));
- for (i = 0; i < height; ++i) {
- int j;
- for (j = 0; j < width; j += 8) {
- __m128i p0 = xx_loadu_128(ref);
- __m128i p1 = xx_loadu_128(pred);
-
- highbd_compute_jnt_comp_avg(&p0, &p1, &w0, &w1, &r, comp_pred);
-
- comp_pred += 8;
- pred += 8;
- ref += 8;
- }
- ref += ref_stride - width;
- }
- } else {
- // Read 4 pixels two rows at a time
- assert(!(width & 3));
- for (i = 0; i < height; i += 2) {
- __m128i p0_0 = xx_loadl_64(ref + 0 * ref_stride);
- __m128i p0_1 = xx_loadl_64(ref + 1 * ref_stride);
- __m128i p0 = _mm_unpacklo_epi64(p0_0, p0_1);
- __m128i p1 = xx_loadu_128(pred);
-
- highbd_compute_jnt_comp_avg(&p0, &p1, &w0, &w1, &r, comp_pred);
-
- comp_pred += 8;
- pred += 8;
- ref += 2 * ref_stride;
- }
- }
-}
-
-void aom_highbd_jnt_comp_avg_upsampled_pred_sse2(
- MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
- const MV *const mv, uint8_t *comp_pred8, const uint8_t *pred8, int width,
- int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref8,
- int ref_stride, int bd, const JNT_COMP_PARAMS *jcp_param,
- int subpel_search) {
- uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
- int n;
- int i;
- aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, mv, comp_pred8, width,
- height, subpel_x_q3, subpel_y_q3, ref8, ref_stride,
- bd, subpel_search);
- assert(!(width * height & 7));
- n = width * height >> 3;
-
- const uint16_t wt0 = (uint16_t)jcp_param->fwd_offset;
- const uint16_t wt1 = (uint16_t)jcp_param->bck_offset;
- const __m128i w0 = _mm_set_epi16(wt0, wt0, wt0, wt0, wt0, wt0, wt0, wt0);
- const __m128i w1 = _mm_set_epi16(wt1, wt1, wt1, wt1, wt1, wt1, wt1, wt1);
- const uint16_t round = ((1 << DIST_PRECISION_BITS) >> 1);
- const __m128i r =
- _mm_set_epi16(round, round, round, round, round, round, round, round);
-
- uint16_t *comp_pred16 = CONVERT_TO_SHORTPTR(comp_pred8);
- for (i = 0; i < n; i++) {
- __m128i p0 = xx_loadu_128(comp_pred16);
- __m128i p1 = xx_loadu_128(pred);
-
- highbd_compute_jnt_comp_avg(&p0, &p1, &w0, &w1, &r, comp_pred16);
-
- comp_pred16 += 8;
- pred += 8;
- }
-}
diff --git a/third_party/aom/aom_dsp/x86/highbd_variance_sse4.c b/third_party/aom/aom_dsp/x86/highbd_variance_sse4.c
deleted file mode 100644
index df5449a9d..000000000
--- a/third_party/aom/aom_dsp/x86/highbd_variance_sse4.c
+++ /dev/null
@@ -1,216 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <smmintrin.h> /* SSE4.1 */
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/variance.h"
-#include "aom_dsp/aom_filter.h"
-
-static INLINE void variance4x4_64_sse4_1(const uint8_t *a8, int a_stride,
- const uint8_t *b8, int b_stride,
- uint64_t *sse, int64_t *sum) {
- __m128i u0, u1, u2, u3;
- __m128i s0, s1, s2, s3;
- __m128i t0, t1, x0, y0;
- __m128i a0, a1, a2, a3;
- __m128i b0, b1, b2, b3;
- __m128i k_one_epi16 = _mm_set1_epi16((int16_t)1);
-
- uint16_t *a = CONVERT_TO_SHORTPTR(a8);
- uint16_t *b = CONVERT_TO_SHORTPTR(b8);
-
- a0 = _mm_loadl_epi64((__m128i const *)(a + 0 * a_stride));
- a1 = _mm_loadl_epi64((__m128i const *)(a + 1 * a_stride));
- a2 = _mm_loadl_epi64((__m128i const *)(a + 2 * a_stride));
- a3 = _mm_loadl_epi64((__m128i const *)(a + 3 * a_stride));
-
- b0 = _mm_loadl_epi64((__m128i const *)(b + 0 * b_stride));
- b1 = _mm_loadl_epi64((__m128i const *)(b + 1 * b_stride));
- b2 = _mm_loadl_epi64((__m128i const *)(b + 2 * b_stride));
- b3 = _mm_loadl_epi64((__m128i const *)(b + 3 * b_stride));
-
- u0 = _mm_unpacklo_epi16(a0, a1);
- u1 = _mm_unpacklo_epi16(a2, a3);
- u2 = _mm_unpacklo_epi16(b0, b1);
- u3 = _mm_unpacklo_epi16(b2, b3);
-
- s0 = _mm_sub_epi16(u0, u2);
- s1 = _mm_sub_epi16(u1, u3);
-
- t0 = _mm_madd_epi16(s0, k_one_epi16);
- t1 = _mm_madd_epi16(s1, k_one_epi16);
-
- s2 = _mm_hadd_epi32(t0, t1);
- s3 = _mm_hadd_epi32(s2, s2);
- y0 = _mm_hadd_epi32(s3, s3);
-
- t0 = _mm_madd_epi16(s0, s0);
- t1 = _mm_madd_epi16(s1, s1);
-
- s2 = _mm_hadd_epi32(t0, t1);
- s3 = _mm_hadd_epi32(s2, s2);
- x0 = _mm_hadd_epi32(s3, s3);
-
- *sse = (uint64_t)_mm_extract_epi32(x0, 0);
- *sum = (int64_t)_mm_extract_epi32(y0, 0);
-}
-
-uint32_t aom_highbd_8_variance4x4_sse4_1(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride,
- uint32_t *sse) {
- int64_t sum, diff;
- uint64_t local_sse;
-
- variance4x4_64_sse4_1(a, a_stride, b, b_stride, &local_sse, &sum);
- *sse = (uint32_t)local_sse;
-
- diff = (int64_t)*sse - ((sum * sum) >> 4);
- return (diff >= 0) ? (uint32_t)diff : 0;
-}
-
-uint32_t aom_highbd_10_variance4x4_sse4_1(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride,
- uint32_t *sse) {
- int64_t sum, diff;
- uint64_t local_sse;
-
- variance4x4_64_sse4_1(a, a_stride, b, b_stride, &local_sse, &sum);
- *sse = (uint32_t)ROUND_POWER_OF_TWO(local_sse, 4);
- sum = ROUND_POWER_OF_TWO(sum, 2);
-
- diff = (int64_t)*sse - ((sum * sum) >> 4);
- return (diff >= 0) ? (uint32_t)diff : 0;
-}
-
-uint32_t aom_highbd_12_variance4x4_sse4_1(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride,
- uint32_t *sse) {
- int64_t sum, diff;
- uint64_t local_sse;
-
- variance4x4_64_sse4_1(a, a_stride, b, b_stride, &local_sse, &sum);
- *sse = (uint32_t)ROUND_POWER_OF_TWO(local_sse, 8);
- sum = ROUND_POWER_OF_TWO(sum, 4);
-
- diff = (int64_t)*sse - ((sum * sum) >> 4);
- return diff >= 0 ? (uint32_t)diff : 0;
-}
-
-// Sub-pixel
-uint32_t aom_highbd_8_sub_pixel_variance4x4_sse4_1(
- const uint8_t *src, int src_stride, int xoffset, int yoffset,
- const uint8_t *dst, int dst_stride, uint32_t *sse) {
- uint16_t fdata3[(4 + 1) * 4];
- uint16_t temp2[4 * 4];
-
- aom_highbd_var_filter_block2d_bil_first_pass(
- src, fdata3, src_stride, 1, 4 + 1, 4, bilinear_filters_2t[xoffset]);
- aom_highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4,
- bilinear_filters_2t[yoffset]);
-
- return aom_highbd_8_variance4x4(CONVERT_TO_BYTEPTR(temp2), 4, dst, dst_stride,
- sse);
-}
-
-uint32_t aom_highbd_10_sub_pixel_variance4x4_sse4_1(
- const uint8_t *src, int src_stride, int xoffset, int yoffset,
- const uint8_t *dst, int dst_stride, uint32_t *sse) {
- uint16_t fdata3[(4 + 1) * 4];
- uint16_t temp2[4 * 4];
-
- aom_highbd_var_filter_block2d_bil_first_pass(
- src, fdata3, src_stride, 1, 4 + 1, 4, bilinear_filters_2t[xoffset]);
- aom_highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4,
- bilinear_filters_2t[yoffset]);
-
- return aom_highbd_10_variance4x4(CONVERT_TO_BYTEPTR(temp2), 4, dst,
- dst_stride, sse);
-}
-
-uint32_t aom_highbd_12_sub_pixel_variance4x4_sse4_1(
- const uint8_t *src, int src_stride, int xoffset, int yoffset,
- const uint8_t *dst, int dst_stride, uint32_t *sse) {
- uint16_t fdata3[(4 + 1) * 4];
- uint16_t temp2[4 * 4];
-
- aom_highbd_var_filter_block2d_bil_first_pass(
- src, fdata3, src_stride, 1, 4 + 1, 4, bilinear_filters_2t[xoffset]);
- aom_highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4,
- bilinear_filters_2t[yoffset]);
-
- return aom_highbd_12_variance4x4(CONVERT_TO_BYTEPTR(temp2), 4, dst,
- dst_stride, sse);
-}
-
-// Sub-pixel average
-
-uint32_t aom_highbd_8_sub_pixel_avg_variance4x4_sse4_1(
- const uint8_t *src, int src_stride, int xoffset, int yoffset,
- const uint8_t *dst, int dst_stride, uint32_t *sse,
- const uint8_t *second_pred) {
- uint16_t fdata3[(4 + 1) * 4];
- uint16_t temp2[4 * 4];
- DECLARE_ALIGNED(16, uint16_t, temp3[4 * 4]);
-
- aom_highbd_var_filter_block2d_bil_first_pass(
- src, fdata3, src_stride, 1, 4 + 1, 4, bilinear_filters_2t[xoffset]);
- aom_highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4,
- bilinear_filters_2t[yoffset]);
-
- aom_highbd_comp_avg_pred(CONVERT_TO_BYTEPTR(temp3), second_pred, 4, 4,
- CONVERT_TO_BYTEPTR(temp2), 4);
-
- return aom_highbd_8_variance4x4(CONVERT_TO_BYTEPTR(temp3), 4, dst, dst_stride,
- sse);
-}
-
-uint32_t aom_highbd_10_sub_pixel_avg_variance4x4_sse4_1(
- const uint8_t *src, int src_stride, int xoffset, int yoffset,
- const uint8_t *dst, int dst_stride, uint32_t *sse,
- const uint8_t *second_pred) {
- uint16_t fdata3[(4 + 1) * 4];
- uint16_t temp2[4 * 4];
- DECLARE_ALIGNED(16, uint16_t, temp3[4 * 4]);
-
- aom_highbd_var_filter_block2d_bil_first_pass(
- src, fdata3, src_stride, 1, 4 + 1, 4, bilinear_filters_2t[xoffset]);
- aom_highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4,
- bilinear_filters_2t[yoffset]);
-
- aom_highbd_comp_avg_pred(CONVERT_TO_BYTEPTR(temp3), second_pred, 4, 4,
- CONVERT_TO_BYTEPTR(temp2), 4);
-
- return aom_highbd_10_variance4x4(CONVERT_TO_BYTEPTR(temp3), 4, dst,
- dst_stride, sse);
-}
-
-uint32_t aom_highbd_12_sub_pixel_avg_variance4x4_sse4_1(
- const uint8_t *src, int src_stride, int xoffset, int yoffset,
- const uint8_t *dst, int dst_stride, uint32_t *sse,
- const uint8_t *second_pred) {
- uint16_t fdata3[(4 + 1) * 4];
- uint16_t temp2[4 * 4];
- DECLARE_ALIGNED(16, uint16_t, temp3[4 * 4]);
-
- aom_highbd_var_filter_block2d_bil_first_pass(
- src, fdata3, src_stride, 1, 4 + 1, 4, bilinear_filters_2t[xoffset]);
- aom_highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4,
- bilinear_filters_2t[yoffset]);
-
- aom_highbd_comp_avg_pred(CONVERT_TO_BYTEPTR(temp3), second_pred, 4, 4,
- CONVERT_TO_BYTEPTR(temp2), 4);
-
- return aom_highbd_12_variance4x4(CONVERT_TO_BYTEPTR(temp3), 4, dst,
- dst_stride, sse);
-}
diff --git a/third_party/aom/aom_dsp/x86/intrapred_avx2.c b/third_party/aom/aom_dsp/x86/intrapred_avx2.c
deleted file mode 100644
index 1e67d392e..000000000
--- a/third_party/aom/aom_dsp/x86/intrapred_avx2.c
+++ /dev/null
@@ -1,811 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <immintrin.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-static INLINE __m256i dc_sum_64(const uint8_t *ref) {
- const __m256i x0 = _mm256_loadu_si256((const __m256i *)ref);
- const __m256i x1 = _mm256_loadu_si256((const __m256i *)(ref + 32));
- const __m256i zero = _mm256_setzero_si256();
- __m256i y0 = _mm256_sad_epu8(x0, zero);
- __m256i y1 = _mm256_sad_epu8(x1, zero);
- y0 = _mm256_add_epi64(y0, y1);
- __m256i u0 = _mm256_permute2x128_si256(y0, y0, 1);
- y0 = _mm256_add_epi64(u0, y0);
- u0 = _mm256_unpackhi_epi64(y0, y0);
- return _mm256_add_epi16(y0, u0);
-}
-
-static INLINE __m256i dc_sum_32(const uint8_t *ref) {
- const __m256i x = _mm256_loadu_si256((const __m256i *)ref);
- const __m256i zero = _mm256_setzero_si256();
- __m256i y = _mm256_sad_epu8(x, zero);
- __m256i u = _mm256_permute2x128_si256(y, y, 1);
- y = _mm256_add_epi64(u, y);
- u = _mm256_unpackhi_epi64(y, y);
- return _mm256_add_epi16(y, u);
-}
-
-static INLINE void row_store_32xh(const __m256i *r, int height, uint8_t *dst,
- ptrdiff_t stride) {
- for (int i = 0; i < height; ++i) {
- _mm256_storeu_si256((__m256i *)dst, *r);
- dst += stride;
- }
-}
-
-static INLINE void row_store_32x2xh(const __m256i *r0, const __m256i *r1,
- int height, uint8_t *dst,
- ptrdiff_t stride) {
- for (int i = 0; i < height; ++i) {
- _mm256_storeu_si256((__m256i *)dst, *r0);
- _mm256_storeu_si256((__m256i *)(dst + 32), *r1);
- dst += stride;
- }
-}
-
-static INLINE void row_store_64xh(const __m256i *r, int height, uint8_t *dst,
- ptrdiff_t stride) {
- for (int i = 0; i < height; ++i) {
- _mm256_storeu_si256((__m256i *)dst, *r);
- _mm256_storeu_si256((__m256i *)(dst + 32), *r);
- dst += stride;
- }
-}
-
-void aom_dc_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- const __m256i sum_above = dc_sum_32(above);
- __m256i sum_left = dc_sum_32(left);
- sum_left = _mm256_add_epi16(sum_left, sum_above);
- const __m256i thirtytwo = _mm256_set1_epi16(32);
- sum_left = _mm256_add_epi16(sum_left, thirtytwo);
- sum_left = _mm256_srai_epi16(sum_left, 6);
- const __m256i zero = _mm256_setzero_si256();
- __m256i row = _mm256_shuffle_epi8(sum_left, zero);
- row_store_32xh(&row, 32, dst, stride);
-}
-
-void aom_dc_top_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- __m256i sum = dc_sum_32(above);
- (void)left;
-
- const __m256i sixteen = _mm256_set1_epi16(16);
- sum = _mm256_add_epi16(sum, sixteen);
- sum = _mm256_srai_epi16(sum, 5);
- const __m256i zero = _mm256_setzero_si256();
- __m256i row = _mm256_shuffle_epi8(sum, zero);
- row_store_32xh(&row, 32, dst, stride);
-}
-
-void aom_dc_left_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- __m256i sum = dc_sum_32(left);
- (void)above;
-
- const __m256i sixteen = _mm256_set1_epi16(16);
- sum = _mm256_add_epi16(sum, sixteen);
- sum = _mm256_srai_epi16(sum, 5);
- const __m256i zero = _mm256_setzero_si256();
- __m256i row = _mm256_shuffle_epi8(sum, zero);
- row_store_32xh(&row, 32, dst, stride);
-}
-
-void aom_dc_128_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- (void)above;
- (void)left;
- const __m256i row = _mm256_set1_epi8((uint8_t)0x80);
- row_store_32xh(&row, 32, dst, stride);
-}
-
-void aom_v_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- const __m256i row = _mm256_loadu_si256((const __m256i *)above);
- (void)left;
- row_store_32xh(&row, 32, dst, stride);
-}
-
-// There are 32 rows togeter. This function does line:
-// 0,1,2,3, and 16,17,18,19. The next call would do
-// 4,5,6,7, and 20,21,22,23. So 4 times of calling
-// would finish 32 rows.
-static INLINE void h_predictor_32x8line(const __m256i *row, uint8_t *dst,
- ptrdiff_t stride) {
- __m256i t[4];
- __m256i m = _mm256_setzero_si256();
- const __m256i inc = _mm256_set1_epi8(4);
- int i;
-
- for (i = 0; i < 4; i++) {
- t[i] = _mm256_shuffle_epi8(*row, m);
- __m256i r0 = _mm256_permute2x128_si256(t[i], t[i], 0);
- __m256i r1 = _mm256_permute2x128_si256(t[i], t[i], 0x11);
- _mm256_storeu_si256((__m256i *)dst, r0);
- _mm256_storeu_si256((__m256i *)(dst + (stride << 4)), r1);
- dst += stride;
- m = _mm256_add_epi8(m, inc);
- }
-}
-
-void aom_h_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- (void)above;
- const __m256i left_col = _mm256_loadu_si256((__m256i const *)left);
-
- __m256i u = _mm256_unpacklo_epi8(left_col, left_col);
-
- __m256i v = _mm256_unpacklo_epi8(u, u);
- h_predictor_32x8line(&v, dst, stride);
- dst += stride << 2;
-
- v = _mm256_unpackhi_epi8(u, u);
- h_predictor_32x8line(&v, dst, stride);
- dst += stride << 2;
-
- u = _mm256_unpackhi_epi8(left_col, left_col);
-
- v = _mm256_unpacklo_epi8(u, u);
- h_predictor_32x8line(&v, dst, stride);
- dst += stride << 2;
-
- v = _mm256_unpackhi_epi8(u, u);
- h_predictor_32x8line(&v, dst, stride);
-}
-
-// -----------------------------------------------------------------------------
-// Rectangle
-
-// TODO(luoyi) The following two functions are shared with intrapred_sse2.c.
-// Use a header file, intrapred_common_x86.h
-static INLINE __m128i dc_sum_16_sse2(const uint8_t *ref) {
- __m128i x = _mm_load_si128((__m128i const *)ref);
- const __m128i zero = _mm_setzero_si128();
- x = _mm_sad_epu8(x, zero);
- const __m128i high = _mm_unpackhi_epi64(x, x);
- return _mm_add_epi16(x, high);
-}
-
-static INLINE __m128i dc_sum_32_sse2(const uint8_t *ref) {
- __m128i x0 = _mm_load_si128((__m128i const *)ref);
- __m128i x1 = _mm_load_si128((__m128i const *)(ref + 16));
- const __m128i zero = _mm_setzero_si128();
- x0 = _mm_sad_epu8(x0, zero);
- x1 = _mm_sad_epu8(x1, zero);
- x0 = _mm_add_epi16(x0, x1);
- const __m128i high = _mm_unpackhi_epi64(x0, x0);
- return _mm_add_epi16(x0, high);
-}
-
-void aom_dc_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- const __m128i top_sum = dc_sum_32_sse2(above);
- __m128i left_sum = dc_sum_16_sse2(left);
- left_sum = _mm_add_epi16(top_sum, left_sum);
- uint32_t sum = _mm_cvtsi128_si32(left_sum);
- sum += 24;
- sum /= 48;
- const __m256i row = _mm256_set1_epi8((uint8_t)sum);
- row_store_32xh(&row, 16, dst, stride);
-}
-
-void aom_dc_predictor_32x64_avx2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- const __m256i sum_above = dc_sum_32(above);
- __m256i sum_left = dc_sum_64(left);
- sum_left = _mm256_add_epi16(sum_left, sum_above);
- uint32_t sum = _mm_cvtsi128_si32(_mm256_castsi256_si128(sum_left));
- sum += 48;
- sum /= 96;
- const __m256i row = _mm256_set1_epi8((uint8_t)sum);
- row_store_32xh(&row, 64, dst, stride);
-}
-
-void aom_dc_predictor_64x64_avx2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- const __m256i sum_above = dc_sum_64(above);
- __m256i sum_left = dc_sum_64(left);
- sum_left = _mm256_add_epi16(sum_left, sum_above);
- uint32_t sum = _mm_cvtsi128_si32(_mm256_castsi256_si128(sum_left));
- sum += 64;
- sum /= 128;
- const __m256i row = _mm256_set1_epi8((uint8_t)sum);
- row_store_64xh(&row, 64, dst, stride);
-}
-
-void aom_dc_predictor_64x32_avx2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- const __m256i sum_above = dc_sum_64(above);
- __m256i sum_left = dc_sum_32(left);
- sum_left = _mm256_add_epi16(sum_left, sum_above);
- uint32_t sum = _mm_cvtsi128_si32(_mm256_castsi256_si128(sum_left));
- sum += 48;
- sum /= 96;
- const __m256i row = _mm256_set1_epi8((uint8_t)sum);
- row_store_64xh(&row, 32, dst, stride);
-}
-
-void aom_dc_predictor_64x16_avx2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- const __m256i sum_above = dc_sum_64(above);
- __m256i sum_left = _mm256_castsi128_si256(dc_sum_16_sse2(left));
- sum_left = _mm256_add_epi16(sum_left, sum_above);
- uint32_t sum = _mm_cvtsi128_si32(_mm256_castsi256_si128(sum_left));
- sum += 40;
- sum /= 80;
- const __m256i row = _mm256_set1_epi8((uint8_t)sum);
- row_store_64xh(&row, 16, dst, stride);
-}
-
-void aom_dc_top_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- __m256i sum = dc_sum_32(above);
- (void)left;
-
- const __m256i sixteen = _mm256_set1_epi16(16);
- sum = _mm256_add_epi16(sum, sixteen);
- sum = _mm256_srai_epi16(sum, 5);
- const __m256i zero = _mm256_setzero_si256();
- __m256i row = _mm256_shuffle_epi8(sum, zero);
- row_store_32xh(&row, 16, dst, stride);
-}
-
-void aom_dc_top_predictor_32x64_avx2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- __m256i sum = dc_sum_32(above);
- (void)left;
-
- const __m256i sixteen = _mm256_set1_epi16(16);
- sum = _mm256_add_epi16(sum, sixteen);
- sum = _mm256_srai_epi16(sum, 5);
- const __m256i zero = _mm256_setzero_si256();
- __m256i row = _mm256_shuffle_epi8(sum, zero);
- row_store_32xh(&row, 64, dst, stride);
-}
-
-void aom_dc_top_predictor_64x64_avx2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- __m256i sum = dc_sum_64(above);
- (void)left;
-
- const __m256i thirtytwo = _mm256_set1_epi16(32);
- sum = _mm256_add_epi16(sum, thirtytwo);
- sum = _mm256_srai_epi16(sum, 6);
- const __m256i zero = _mm256_setzero_si256();
- __m256i row = _mm256_shuffle_epi8(sum, zero);
- row_store_64xh(&row, 64, dst, stride);
-}
-
-void aom_dc_top_predictor_64x32_avx2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- __m256i sum = dc_sum_64(above);
- (void)left;
-
- const __m256i thirtytwo = _mm256_set1_epi16(32);
- sum = _mm256_add_epi16(sum, thirtytwo);
- sum = _mm256_srai_epi16(sum, 6);
- const __m256i zero = _mm256_setzero_si256();
- __m256i row = _mm256_shuffle_epi8(sum, zero);
- row_store_64xh(&row, 32, dst, stride);
-}
-
-void aom_dc_top_predictor_64x16_avx2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- __m256i sum = dc_sum_64(above);
- (void)left;
-
- const __m256i thirtytwo = _mm256_set1_epi16(32);
- sum = _mm256_add_epi16(sum, thirtytwo);
- sum = _mm256_srai_epi16(sum, 6);
- const __m256i zero = _mm256_setzero_si256();
- __m256i row = _mm256_shuffle_epi8(sum, zero);
- row_store_64xh(&row, 16, dst, stride);
-}
-
-void aom_dc_left_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- __m128i sum = dc_sum_16_sse2(left);
- (void)above;
-
- const __m128i eight = _mm_set1_epi16(8);
- sum = _mm_add_epi16(sum, eight);
- sum = _mm_srai_epi16(sum, 4);
- const __m128i zero = _mm_setzero_si128();
- const __m128i r = _mm_shuffle_epi8(sum, zero);
- const __m256i row = _mm256_inserti128_si256(_mm256_castsi128_si256(r), r, 1);
- row_store_32xh(&row, 16, dst, stride);
-}
-
-void aom_dc_left_predictor_32x64_avx2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- __m256i sum = dc_sum_64(left);
- (void)above;
-
- const __m256i thirtytwo = _mm256_set1_epi16(32);
- sum = _mm256_add_epi16(sum, thirtytwo);
- sum = _mm256_srai_epi16(sum, 6);
- const __m256i zero = _mm256_setzero_si256();
- __m256i row = _mm256_shuffle_epi8(sum, zero);
- row_store_32xh(&row, 64, dst, stride);
-}
-
-void aom_dc_left_predictor_64x64_avx2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- __m256i sum = dc_sum_64(left);
- (void)above;
-
- const __m256i thirtytwo = _mm256_set1_epi16(32);
- sum = _mm256_add_epi16(sum, thirtytwo);
- sum = _mm256_srai_epi16(sum, 6);
- const __m256i zero = _mm256_setzero_si256();
- __m256i row = _mm256_shuffle_epi8(sum, zero);
- row_store_64xh(&row, 64, dst, stride);
-}
-
-void aom_dc_left_predictor_64x32_avx2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- __m256i sum = dc_sum_32(left);
- (void)above;
-
- const __m256i sixteen = _mm256_set1_epi16(16);
- sum = _mm256_add_epi16(sum, sixteen);
- sum = _mm256_srai_epi16(sum, 5);
- const __m256i zero = _mm256_setzero_si256();
- __m256i row = _mm256_shuffle_epi8(sum, zero);
- row_store_64xh(&row, 32, dst, stride);
-}
-
-void aom_dc_left_predictor_64x16_avx2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- __m128i sum = dc_sum_16_sse2(left);
- (void)above;
-
- const __m128i eight = _mm_set1_epi16(8);
- sum = _mm_add_epi16(sum, eight);
- sum = _mm_srai_epi16(sum, 4);
- const __m128i zero = _mm_setzero_si128();
- const __m128i r = _mm_shuffle_epi8(sum, zero);
- const __m256i row = _mm256_inserti128_si256(_mm256_castsi128_si256(r), r, 1);
- row_store_64xh(&row, 16, dst, stride);
-}
-
-void aom_dc_128_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- (void)above;
- (void)left;
- const __m256i row = _mm256_set1_epi8((uint8_t)0x80);
- row_store_32xh(&row, 16, dst, stride);
-}
-
-void aom_dc_128_predictor_32x64_avx2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- (void)above;
- (void)left;
- const __m256i row = _mm256_set1_epi8((uint8_t)0x80);
- row_store_32xh(&row, 64, dst, stride);
-}
-
-void aom_dc_128_predictor_64x64_avx2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- (void)above;
- (void)left;
- const __m256i row = _mm256_set1_epi8((uint8_t)0x80);
- row_store_64xh(&row, 64, dst, stride);
-}
-
-void aom_dc_128_predictor_64x32_avx2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- (void)above;
- (void)left;
- const __m256i row = _mm256_set1_epi8((uint8_t)0x80);
- row_store_64xh(&row, 32, dst, stride);
-}
-
-void aom_dc_128_predictor_64x16_avx2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- (void)above;
- (void)left;
- const __m256i row = _mm256_set1_epi8((uint8_t)0x80);
- row_store_64xh(&row, 16, dst, stride);
-}
-
-void aom_v_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- const __m256i row = _mm256_loadu_si256((const __m256i *)above);
- (void)left;
- row_store_32xh(&row, 16, dst, stride);
-}
-
-void aom_v_predictor_32x64_avx2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- const __m256i row = _mm256_loadu_si256((const __m256i *)above);
- (void)left;
- row_store_32xh(&row, 64, dst, stride);
-}
-
-void aom_v_predictor_64x64_avx2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- const __m256i row0 = _mm256_loadu_si256((const __m256i *)above);
- const __m256i row1 = _mm256_loadu_si256((const __m256i *)(above + 32));
- (void)left;
- row_store_32x2xh(&row0, &row1, 64, dst, stride);
-}
-
-void aom_v_predictor_64x32_avx2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- const __m256i row0 = _mm256_loadu_si256((const __m256i *)above);
- const __m256i row1 = _mm256_loadu_si256((const __m256i *)(above + 32));
- (void)left;
- row_store_32x2xh(&row0, &row1, 32, dst, stride);
-}
-
-void aom_v_predictor_64x16_avx2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- const __m256i row0 = _mm256_loadu_si256((const __m256i *)above);
- const __m256i row1 = _mm256_loadu_si256((const __m256i *)(above + 32));
- (void)left;
- row_store_32x2xh(&row0, &row1, 16, dst, stride);
-}
-
-// -----------------------------------------------------------------------------
-// PAETH_PRED
-
-// Return 16 16-bit pixels in one row (__m256i)
-static INLINE __m256i paeth_pred(const __m256i *left, const __m256i *top,
- const __m256i *topleft) {
- const __m256i base =
- _mm256_sub_epi16(_mm256_add_epi16(*top, *left), *topleft);
-
- __m256i pl = _mm256_abs_epi16(_mm256_sub_epi16(base, *left));
- __m256i pt = _mm256_abs_epi16(_mm256_sub_epi16(base, *top));
- __m256i ptl = _mm256_abs_epi16(_mm256_sub_epi16(base, *topleft));
-
- __m256i mask1 = _mm256_cmpgt_epi16(pl, pt);
- mask1 = _mm256_or_si256(mask1, _mm256_cmpgt_epi16(pl, ptl));
- __m256i mask2 = _mm256_cmpgt_epi16(pt, ptl);
-
- pl = _mm256_andnot_si256(mask1, *left);
-
- ptl = _mm256_and_si256(mask2, *topleft);
- pt = _mm256_andnot_si256(mask2, *top);
- pt = _mm256_or_si256(pt, ptl);
- pt = _mm256_and_si256(mask1, pt);
-
- return _mm256_or_si256(pt, pl);
-}
-
-// Return 16 8-bit pixels in one row (__m128i)
-static INLINE __m128i paeth_16x1_pred(const __m256i *left, const __m256i *top,
- const __m256i *topleft) {
- const __m256i p0 = paeth_pred(left, top, topleft);
- const __m256i p1 = _mm256_permute4x64_epi64(p0, 0xe);
- const __m256i p = _mm256_packus_epi16(p0, p1);
- return _mm256_castsi256_si128(p);
-}
-
-static INLINE __m256i get_top_vector(const uint8_t *above) {
- const __m128i x = _mm_load_si128((const __m128i *)above);
- const __m128i zero = _mm_setzero_si128();
- const __m128i t0 = _mm_unpacklo_epi8(x, zero);
- const __m128i t1 = _mm_unpackhi_epi8(x, zero);
- return _mm256_inserti128_si256(_mm256_castsi128_si256(t0), t1, 1);
-}
-
-void aom_paeth_predictor_16x8_avx2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- __m128i x = _mm_loadl_epi64((const __m128i *)left);
- const __m256i l = _mm256_inserti128_si256(_mm256_castsi128_si256(x), x, 1);
- const __m256i tl16 = _mm256_set1_epi16((uint16_t)above[-1]);
- __m256i rep = _mm256_set1_epi16(0x8000);
- const __m256i one = _mm256_set1_epi16(1);
- const __m256i top = get_top_vector(above);
-
- int i;
- for (i = 0; i < 8; ++i) {
- const __m256i l16 = _mm256_shuffle_epi8(l, rep);
- const __m128i row = paeth_16x1_pred(&l16, &top, &tl16);
-
- _mm_store_si128((__m128i *)dst, row);
- dst += stride;
- rep = _mm256_add_epi16(rep, one);
- }
-}
-
-static INLINE __m256i get_left_vector(const uint8_t *left) {
- const __m128i x = _mm_load_si128((const __m128i *)left);
- return _mm256_inserti128_si256(_mm256_castsi128_si256(x), x, 1);
-}
-
-void aom_paeth_predictor_16x16_avx2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- const __m256i l = get_left_vector(left);
- const __m256i tl16 = _mm256_set1_epi16((uint16_t)above[-1]);
- __m256i rep = _mm256_set1_epi16(0x8000);
- const __m256i one = _mm256_set1_epi16(1);
- const __m256i top = get_top_vector(above);
-
- int i;
- for (i = 0; i < 16; ++i) {
- const __m256i l16 = _mm256_shuffle_epi8(l, rep);
- const __m128i row = paeth_16x1_pred(&l16, &top, &tl16);
-
- _mm_store_si128((__m128i *)dst, row);
- dst += stride;
- rep = _mm256_add_epi16(rep, one);
- }
-}
-
-void aom_paeth_predictor_16x32_avx2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- __m256i l = get_left_vector(left);
- const __m256i tl16 = _mm256_set1_epi16((uint16_t)above[-1]);
- __m256i rep = _mm256_set1_epi16(0x8000);
- const __m256i one = _mm256_set1_epi16(1);
- const __m256i top = get_top_vector(above);
-
- int i;
- for (i = 0; i < 16; ++i) {
- const __m256i l16 = _mm256_shuffle_epi8(l, rep);
- const __m128i row = paeth_16x1_pred(&l16, &top, &tl16);
-
- _mm_store_si128((__m128i *)dst, row);
- dst += stride;
- rep = _mm256_add_epi16(rep, one);
- }
-
- l = get_left_vector(left + 16);
- rep = _mm256_set1_epi16(0x8000);
- for (i = 0; i < 16; ++i) {
- const __m256i l16 = _mm256_shuffle_epi8(l, rep);
- const __m128i row = paeth_16x1_pred(&l16, &top, &tl16);
-
- _mm_store_si128((__m128i *)dst, row);
- dst += stride;
- rep = _mm256_add_epi16(rep, one);
- }
-}
-
-void aom_paeth_predictor_16x64_avx2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- const __m256i tl16 = _mm256_set1_epi16((uint16_t)above[-1]);
- const __m256i one = _mm256_set1_epi16(1);
- const __m256i top = get_top_vector(above);
-
- for (int j = 0; j < 4; ++j) {
- const __m256i l = get_left_vector(left + j * 16);
- __m256i rep = _mm256_set1_epi16(0x8000);
- for (int i = 0; i < 16; ++i) {
- const __m256i l16 = _mm256_shuffle_epi8(l, rep);
- const __m128i row = paeth_16x1_pred(&l16, &top, &tl16);
-
- _mm_store_si128((__m128i *)dst, row);
- dst += stride;
- rep = _mm256_add_epi16(rep, one);
- }
- }
-}
-
-// Return 32 8-bit pixels in one row (__m256i)
-static INLINE __m256i paeth_32x1_pred(const __m256i *left, const __m256i *top0,
- const __m256i *top1,
- const __m256i *topleft) {
- __m256i p0 = paeth_pred(left, top0, topleft);
- __m256i p1 = _mm256_permute4x64_epi64(p0, 0xe);
- const __m256i x0 = _mm256_packus_epi16(p0, p1);
-
- p0 = paeth_pred(left, top1, topleft);
- p1 = _mm256_permute4x64_epi64(p0, 0xe);
- const __m256i x1 = _mm256_packus_epi16(p0, p1);
-
- return _mm256_permute2x128_si256(x0, x1, 0x20);
-}
-
-void aom_paeth_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- const __m256i l = get_left_vector(left);
- const __m256i t0 = get_top_vector(above);
- const __m256i t1 = get_top_vector(above + 16);
- const __m256i tl = _mm256_set1_epi16((uint16_t)above[-1]);
- __m256i rep = _mm256_set1_epi16(0x8000);
- const __m256i one = _mm256_set1_epi16(1);
-
- int i;
- for (i = 0; i < 16; ++i) {
- const __m256i l16 = _mm256_shuffle_epi8(l, rep);
-
- const __m256i r = paeth_32x1_pred(&l16, &t0, &t1, &tl);
-
- _mm256_storeu_si256((__m256i *)dst, r);
-
- dst += stride;
- rep = _mm256_add_epi16(rep, one);
- }
-}
-
-void aom_paeth_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- __m256i l = get_left_vector(left);
- const __m256i t0 = get_top_vector(above);
- const __m256i t1 = get_top_vector(above + 16);
- const __m256i tl = _mm256_set1_epi16((uint16_t)above[-1]);
- __m256i rep = _mm256_set1_epi16(0x8000);
- const __m256i one = _mm256_set1_epi16(1);
-
- int i;
- for (i = 0; i < 16; ++i) {
- const __m256i l16 = _mm256_shuffle_epi8(l, rep);
-
- const __m128i r0 = paeth_16x1_pred(&l16, &t0, &tl);
- const __m128i r1 = paeth_16x1_pred(&l16, &t1, &tl);
-
- _mm_store_si128((__m128i *)dst, r0);
- _mm_store_si128((__m128i *)(dst + 16), r1);
-
- dst += stride;
- rep = _mm256_add_epi16(rep, one);
- }
-
- l = get_left_vector(left + 16);
- rep = _mm256_set1_epi16(0x8000);
- for (i = 0; i < 16; ++i) {
- const __m256i l16 = _mm256_shuffle_epi8(l, rep);
-
- const __m128i r0 = paeth_16x1_pred(&l16, &t0, &tl);
- const __m128i r1 = paeth_16x1_pred(&l16, &t1, &tl);
-
- _mm_store_si128((__m128i *)dst, r0);
- _mm_store_si128((__m128i *)(dst + 16), r1);
-
- dst += stride;
- rep = _mm256_add_epi16(rep, one);
- }
-}
-
-void aom_paeth_predictor_32x64_avx2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- const __m256i t0 = get_top_vector(above);
- const __m256i t1 = get_top_vector(above + 16);
- const __m256i tl = _mm256_set1_epi16((uint16_t)above[-1]);
- const __m256i one = _mm256_set1_epi16(1);
-
- int i, j;
- for (j = 0; j < 4; ++j) {
- const __m256i l = get_left_vector(left + j * 16);
- __m256i rep = _mm256_set1_epi16(0x8000);
- for (i = 0; i < 16; ++i) {
- const __m256i l16 = _mm256_shuffle_epi8(l, rep);
-
- const __m128i r0 = paeth_16x1_pred(&l16, &t0, &tl);
- const __m128i r1 = paeth_16x1_pred(&l16, &t1, &tl);
-
- _mm_store_si128((__m128i *)dst, r0);
- _mm_store_si128((__m128i *)(dst + 16), r1);
-
- dst += stride;
- rep = _mm256_add_epi16(rep, one);
- }
- }
-}
-
-void aom_paeth_predictor_64x32_avx2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- const __m256i t0 = get_top_vector(above);
- const __m256i t1 = get_top_vector(above + 16);
- const __m256i t2 = get_top_vector(above + 32);
- const __m256i t3 = get_top_vector(above + 48);
- const __m256i tl = _mm256_set1_epi16((uint16_t)above[-1]);
- const __m256i one = _mm256_set1_epi16(1);
-
- int i, j;
- for (j = 0; j < 2; ++j) {
- const __m256i l = get_left_vector(left + j * 16);
- __m256i rep = _mm256_set1_epi16(0x8000);
- for (i = 0; i < 16; ++i) {
- const __m256i l16 = _mm256_shuffle_epi8(l, rep);
-
- const __m128i r0 = paeth_16x1_pred(&l16, &t0, &tl);
- const __m128i r1 = paeth_16x1_pred(&l16, &t1, &tl);
- const __m128i r2 = paeth_16x1_pred(&l16, &t2, &tl);
- const __m128i r3 = paeth_16x1_pred(&l16, &t3, &tl);
-
- _mm_store_si128((__m128i *)dst, r0);
- _mm_store_si128((__m128i *)(dst + 16), r1);
- _mm_store_si128((__m128i *)(dst + 32), r2);
- _mm_store_si128((__m128i *)(dst + 48), r3);
-
- dst += stride;
- rep = _mm256_add_epi16(rep, one);
- }
- }
-}
-
-void aom_paeth_predictor_64x64_avx2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- const __m256i t0 = get_top_vector(above);
- const __m256i t1 = get_top_vector(above + 16);
- const __m256i t2 = get_top_vector(above + 32);
- const __m256i t3 = get_top_vector(above + 48);
- const __m256i tl = _mm256_set1_epi16((uint16_t)above[-1]);
- const __m256i one = _mm256_set1_epi16(1);
-
- int i, j;
- for (j = 0; j < 4; ++j) {
- const __m256i l = get_left_vector(left + j * 16);
- __m256i rep = _mm256_set1_epi16(0x8000);
- for (i = 0; i < 16; ++i) {
- const __m256i l16 = _mm256_shuffle_epi8(l, rep);
-
- const __m128i r0 = paeth_16x1_pred(&l16, &t0, &tl);
- const __m128i r1 = paeth_16x1_pred(&l16, &t1, &tl);
- const __m128i r2 = paeth_16x1_pred(&l16, &t2, &tl);
- const __m128i r3 = paeth_16x1_pred(&l16, &t3, &tl);
-
- _mm_store_si128((__m128i *)dst, r0);
- _mm_store_si128((__m128i *)(dst + 16), r1);
- _mm_store_si128((__m128i *)(dst + 32), r2);
- _mm_store_si128((__m128i *)(dst + 48), r3);
-
- dst += stride;
- rep = _mm256_add_epi16(rep, one);
- }
- }
-}
-
-void aom_paeth_predictor_64x16_avx2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- const __m256i t0 = get_top_vector(above);
- const __m256i t1 = get_top_vector(above + 16);
- const __m256i t2 = get_top_vector(above + 32);
- const __m256i t3 = get_top_vector(above + 48);
- const __m256i tl = _mm256_set1_epi16((uint16_t)above[-1]);
- const __m256i one = _mm256_set1_epi16(1);
-
- int i;
- const __m256i l = get_left_vector(left);
- __m256i rep = _mm256_set1_epi16(0x8000);
- for (i = 0; i < 16; ++i) {
- const __m256i l16 = _mm256_shuffle_epi8(l, rep);
-
- const __m128i r0 = paeth_16x1_pred(&l16, &t0, &tl);
- const __m128i r1 = paeth_16x1_pred(&l16, &t1, &tl);
- const __m128i r2 = paeth_16x1_pred(&l16, &t2, &tl);
- const __m128i r3 = paeth_16x1_pred(&l16, &t3, &tl);
-
- _mm_store_si128((__m128i *)dst, r0);
- _mm_store_si128((__m128i *)(dst + 16), r1);
- _mm_store_si128((__m128i *)(dst + 32), r2);
- _mm_store_si128((__m128i *)(dst + 48), r3);
-
- dst += stride;
- rep = _mm256_add_epi16(rep, one);
- }
-}
diff --git a/third_party/aom/aom_dsp/x86/intrapred_sse2.c b/third_party/aom/aom_dsp/x86/intrapred_sse2.c
deleted file mode 100644
index 5b2452c8e..000000000
--- a/third_party/aom/aom_dsp/x86/intrapred_sse2.c
+++ /dev/null
@@ -1,1430 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <emmintrin.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-static INLINE void dc_store_4xh(uint32_t dc, int height, uint8_t *dst,
- ptrdiff_t stride) {
- for (int i = 0; i < height; i += 2) {
- *(uint32_t *)dst = dc;
- dst += stride;
- *(uint32_t *)dst = dc;
- dst += stride;
- }
-}
-
-static INLINE void dc_store_8xh(const __m128i *row, int height, uint8_t *dst,
- ptrdiff_t stride) {
- int i;
- for (i = 0; i < height; ++i) {
- _mm_storel_epi64((__m128i *)dst, *row);
- dst += stride;
- }
-}
-
-static INLINE void dc_store_16xh(const __m128i *row, int height, uint8_t *dst,
- ptrdiff_t stride) {
- int i;
- for (i = 0; i < height; ++i) {
- _mm_store_si128((__m128i *)dst, *row);
- dst += stride;
- }
-}
-
-static INLINE void dc_store_32xh(const __m128i *row, int height, uint8_t *dst,
- ptrdiff_t stride) {
- int i;
- for (i = 0; i < height; ++i) {
- _mm_store_si128((__m128i *)dst, *row);
- _mm_store_si128((__m128i *)(dst + 16), *row);
- dst += stride;
- }
-}
-
-static INLINE void dc_store_64xh(const __m128i *row, int height, uint8_t *dst,
- ptrdiff_t stride) {
- for (int i = 0; i < height; ++i) {
- _mm_store_si128((__m128i *)dst, *row);
- _mm_store_si128((__m128i *)(dst + 16), *row);
- _mm_store_si128((__m128i *)(dst + 32), *row);
- _mm_store_si128((__m128i *)(dst + 48), *row);
- dst += stride;
- }
-}
-
-static INLINE __m128i dc_sum_4(const uint8_t *ref) {
- __m128i x = _mm_loadl_epi64((__m128i const *)ref);
- const __m128i zero = _mm_setzero_si128();
- x = _mm_unpacklo_epi8(x, zero);
- return _mm_sad_epu8(x, zero);
-}
-
-static INLINE __m128i dc_sum_8(const uint8_t *ref) {
- __m128i x = _mm_loadl_epi64((__m128i const *)ref);
- const __m128i zero = _mm_setzero_si128();
- return _mm_sad_epu8(x, zero);
-}
-
-static INLINE __m128i dc_sum_16(const uint8_t *ref) {
- __m128i x = _mm_load_si128((__m128i const *)ref);
- const __m128i zero = _mm_setzero_si128();
- x = _mm_sad_epu8(x, zero);
- const __m128i high = _mm_unpackhi_epi64(x, x);
- return _mm_add_epi16(x, high);
-}
-
-static INLINE __m128i dc_sum_32(const uint8_t *ref) {
- __m128i x0 = _mm_load_si128((__m128i const *)ref);
- __m128i x1 = _mm_load_si128((__m128i const *)(ref + 16));
- const __m128i zero = _mm_setzero_si128();
- x0 = _mm_sad_epu8(x0, zero);
- x1 = _mm_sad_epu8(x1, zero);
- x0 = _mm_add_epi16(x0, x1);
- const __m128i high = _mm_unpackhi_epi64(x0, x0);
- return _mm_add_epi16(x0, high);
-}
-
-static INLINE __m128i dc_sum_64(const uint8_t *ref) {
- __m128i x0 = _mm_load_si128((__m128i const *)ref);
- __m128i x1 = _mm_load_si128((__m128i const *)(ref + 16));
- __m128i x2 = _mm_load_si128((__m128i const *)(ref + 32));
- __m128i x3 = _mm_load_si128((__m128i const *)(ref + 48));
- const __m128i zero = _mm_setzero_si128();
- x0 = _mm_sad_epu8(x0, zero);
- x1 = _mm_sad_epu8(x1, zero);
- x2 = _mm_sad_epu8(x2, zero);
- x3 = _mm_sad_epu8(x3, zero);
- x0 = _mm_add_epi16(x0, x1);
- x2 = _mm_add_epi16(x2, x3);
- x0 = _mm_add_epi16(x0, x2);
- const __m128i high = _mm_unpackhi_epi64(x0, x0);
- return _mm_add_epi16(x0, high);
-}
-
-#define DC_MULTIPLIER_1X2 0x5556
-#define DC_MULTIPLIER_1X4 0x3334
-
-#define DC_SHIFT2 16
-
-static INLINE int divide_using_multiply_shift(int num, int shift1,
- int multiplier) {
- const int interm = num >> shift1;
- return interm * multiplier >> DC_SHIFT2;
-}
-
-// -----------------------------------------------------------------------------
-// DC_PRED
-
-void aom_dc_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- const __m128i sum_left = dc_sum_8(left);
- __m128i sum_above = dc_sum_4(above);
- sum_above = _mm_add_epi16(sum_left, sum_above);
-
- uint32_t sum = _mm_cvtsi128_si32(sum_above);
- sum += 6;
- sum = divide_using_multiply_shift(sum, 2, DC_MULTIPLIER_1X2);
-
- const __m128i row = _mm_set1_epi8((uint8_t)sum);
- const uint32_t pred = _mm_cvtsi128_si32(row);
- dc_store_4xh(pred, 8, dst, stride);
-}
-
-void aom_dc_predictor_4x16_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- const __m128i sum_left = dc_sum_16(left);
- __m128i sum_above = dc_sum_4(above);
- sum_above = _mm_add_epi16(sum_left, sum_above);
-
- uint32_t sum = _mm_cvtsi128_si32(sum_above);
- sum += 10;
- sum = divide_using_multiply_shift(sum, 2, DC_MULTIPLIER_1X4);
-
- const __m128i row = _mm_set1_epi8((uint8_t)sum);
- const uint32_t pred = _mm_cvtsi128_si32(row);
- dc_store_4xh(pred, 16, dst, stride);
-}
-
-void aom_dc_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- const __m128i sum_left = dc_sum_4(left);
- __m128i sum_above = dc_sum_8(above);
- sum_above = _mm_add_epi16(sum_above, sum_left);
-
- uint32_t sum = _mm_cvtsi128_si32(sum_above);
- sum += 6;
- sum = divide_using_multiply_shift(sum, 2, DC_MULTIPLIER_1X2);
-
- const __m128i row = _mm_set1_epi8((uint8_t)sum);
- dc_store_8xh(&row, 4, dst, stride);
-}
-
-void aom_dc_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- const __m128i sum_left = dc_sum_16(left);
- __m128i sum_above = dc_sum_8(above);
- sum_above = _mm_add_epi16(sum_above, sum_left);
-
- uint32_t sum = _mm_cvtsi128_si32(sum_above);
- sum += 12;
- sum = divide_using_multiply_shift(sum, 3, DC_MULTIPLIER_1X2);
- const __m128i row = _mm_set1_epi8((uint8_t)sum);
- dc_store_8xh(&row, 16, dst, stride);
-}
-
-void aom_dc_predictor_8x32_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- const __m128i sum_left = dc_sum_32(left);
- __m128i sum_above = dc_sum_8(above);
- sum_above = _mm_add_epi16(sum_above, sum_left);
-
- uint32_t sum = _mm_cvtsi128_si32(sum_above);
- sum += 20;
- sum = divide_using_multiply_shift(sum, 3, DC_MULTIPLIER_1X4);
- const __m128i row = _mm_set1_epi8((uint8_t)sum);
- dc_store_8xh(&row, 32, dst, stride);
-}
-
-void aom_dc_predictor_16x4_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- const __m128i sum_left = dc_sum_4(left);
- __m128i sum_above = dc_sum_16(above);
- sum_above = _mm_add_epi16(sum_above, sum_left);
-
- uint32_t sum = _mm_cvtsi128_si32(sum_above);
- sum += 10;
- sum = divide_using_multiply_shift(sum, 2, DC_MULTIPLIER_1X4);
- const __m128i row = _mm_set1_epi8((uint8_t)sum);
- dc_store_16xh(&row, 4, dst, stride);
-}
-
-void aom_dc_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- const __m128i sum_left = dc_sum_8(left);
- __m128i sum_above = dc_sum_16(above);
- sum_above = _mm_add_epi16(sum_above, sum_left);
-
- uint32_t sum = _mm_cvtsi128_si32(sum_above);
- sum += 12;
- sum = divide_using_multiply_shift(sum, 3, DC_MULTIPLIER_1X2);
- const __m128i row = _mm_set1_epi8((uint8_t)sum);
- dc_store_16xh(&row, 8, dst, stride);
-}
-
-void aom_dc_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- const __m128i sum_left = dc_sum_32(left);
- __m128i sum_above = dc_sum_16(above);
- sum_above = _mm_add_epi16(sum_left, sum_above);
-
- uint32_t sum = _mm_cvtsi128_si32(sum_above);
- sum += 24;
- sum = divide_using_multiply_shift(sum, 4, DC_MULTIPLIER_1X2);
- const __m128i row = _mm_set1_epi8((uint8_t)sum);
- dc_store_16xh(&row, 32, dst, stride);
-}
-
-void aom_dc_predictor_16x64_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- const __m128i sum_left = dc_sum_64(left);
- __m128i sum_above = dc_sum_16(above);
- sum_above = _mm_add_epi16(sum_left, sum_above);
-
- uint32_t sum = _mm_cvtsi128_si32(sum_above);
- sum += 40;
- sum = divide_using_multiply_shift(sum, 4, DC_MULTIPLIER_1X4);
- const __m128i row = _mm_set1_epi8((uint8_t)sum);
- dc_store_16xh(&row, 64, dst, stride);
-}
-
-void aom_dc_predictor_32x8_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- __m128i sum_above = dc_sum_32(above);
- const __m128i sum_left = dc_sum_8(left);
- sum_above = _mm_add_epi16(sum_above, sum_left);
-
- uint32_t sum = _mm_cvtsi128_si32(sum_above);
- sum += 20;
- sum = divide_using_multiply_shift(sum, 3, DC_MULTIPLIER_1X4);
- const __m128i row = _mm_set1_epi8((uint8_t)sum);
- dc_store_32xh(&row, 8, dst, stride);
-}
-
-void aom_dc_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- __m128i sum_above = dc_sum_32(above);
- const __m128i sum_left = dc_sum_16(left);
- sum_above = _mm_add_epi16(sum_above, sum_left);
-
- uint32_t sum = _mm_cvtsi128_si32(sum_above);
- sum += 24;
- sum = divide_using_multiply_shift(sum, 4, DC_MULTIPLIER_1X2);
- const __m128i row = _mm_set1_epi8((uint8_t)sum);
- dc_store_32xh(&row, 16, dst, stride);
-}
-
-void aom_dc_predictor_32x64_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- __m128i sum_above = dc_sum_32(above);
- const __m128i sum_left = dc_sum_64(left);
- sum_above = _mm_add_epi16(sum_above, sum_left);
-
- uint32_t sum = _mm_cvtsi128_si32(sum_above);
- sum += 48;
- sum = divide_using_multiply_shift(sum, 5, DC_MULTIPLIER_1X2);
- const __m128i row = _mm_set1_epi8((uint8_t)sum);
- dc_store_32xh(&row, 64, dst, stride);
-}
-
-void aom_dc_predictor_64x64_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- __m128i sum_above = dc_sum_64(above);
- const __m128i sum_left = dc_sum_64(left);
- sum_above = _mm_add_epi16(sum_above, sum_left);
-
- uint32_t sum = _mm_cvtsi128_si32(sum_above);
- sum += 64;
- sum /= 128;
- const __m128i row = _mm_set1_epi8((uint8_t)sum);
- dc_store_64xh(&row, 64, dst, stride);
-}
-
-void aom_dc_predictor_64x32_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- __m128i sum_above = dc_sum_64(above);
- const __m128i sum_left = dc_sum_32(left);
- sum_above = _mm_add_epi16(sum_above, sum_left);
-
- uint32_t sum = _mm_cvtsi128_si32(sum_above);
- sum += 48;
- sum = divide_using_multiply_shift(sum, 5, DC_MULTIPLIER_1X2);
- const __m128i row = _mm_set1_epi8((uint8_t)sum);
- dc_store_64xh(&row, 32, dst, stride);
-}
-
-void aom_dc_predictor_64x16_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- __m128i sum_above = dc_sum_64(above);
- const __m128i sum_left = dc_sum_16(left);
- sum_above = _mm_add_epi16(sum_above, sum_left);
-
- uint32_t sum = _mm_cvtsi128_si32(sum_above);
- sum += 40;
- sum = divide_using_multiply_shift(sum, 4, DC_MULTIPLIER_1X4);
- const __m128i row = _mm_set1_epi8((uint8_t)sum);
- dc_store_64xh(&row, 16, dst, stride);
-}
-
-// -----------------------------------------------------------------------------
-// DC_TOP
-
-void aom_dc_top_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- (void)left;
- __m128i sum_above = dc_sum_4(above);
- const __m128i two = _mm_set1_epi16((int16_t)2);
- sum_above = _mm_add_epi16(sum_above, two);
- sum_above = _mm_srai_epi16(sum_above, 2);
- sum_above = _mm_shufflelo_epi16(sum_above, 0);
- sum_above = _mm_packus_epi16(sum_above, sum_above);
-
- const uint32_t pred = _mm_cvtsi128_si32(sum_above);
- dc_store_4xh(pred, 8, dst, stride);
-}
-
-void aom_dc_top_predictor_4x16_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- (void)left;
- __m128i sum_above = dc_sum_4(above);
- const __m128i two = _mm_set1_epi16((int16_t)2);
- sum_above = _mm_add_epi16(sum_above, two);
- sum_above = _mm_srai_epi16(sum_above, 2);
- sum_above = _mm_shufflelo_epi16(sum_above, 0);
- sum_above = _mm_packus_epi16(sum_above, sum_above);
-
- const uint32_t pred = _mm_cvtsi128_si32(sum_above);
- dc_store_4xh(pred, 16, dst, stride);
-}
-
-void aom_dc_top_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- (void)left;
- __m128i sum_above = dc_sum_8(above);
- const __m128i four = _mm_set1_epi16((uint16_t)4);
- sum_above = _mm_add_epi16(sum_above, four);
- sum_above = _mm_srai_epi16(sum_above, 3);
- sum_above = _mm_unpacklo_epi8(sum_above, sum_above);
- const __m128i row = _mm_shufflelo_epi16(sum_above, 0);
- dc_store_8xh(&row, 4, dst, stride);
-}
-
-void aom_dc_top_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- (void)left;
- __m128i sum_above = dc_sum_8(above);
- const __m128i four = _mm_set1_epi16((uint16_t)4);
- sum_above = _mm_add_epi16(sum_above, four);
- sum_above = _mm_srai_epi16(sum_above, 3);
- sum_above = _mm_unpacklo_epi8(sum_above, sum_above);
- const __m128i row = _mm_shufflelo_epi16(sum_above, 0);
- dc_store_8xh(&row, 16, dst, stride);
-}
-
-void aom_dc_top_predictor_8x32_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- (void)left;
- __m128i sum_above = dc_sum_8(above);
- const __m128i four = _mm_set1_epi16((uint16_t)4);
- sum_above = _mm_add_epi16(sum_above, four);
- sum_above = _mm_srai_epi16(sum_above, 3);
- sum_above = _mm_unpacklo_epi8(sum_above, sum_above);
- const __m128i row = _mm_shufflelo_epi16(sum_above, 0);
- dc_store_8xh(&row, 32, dst, stride);
-}
-
-void aom_dc_top_predictor_16x4_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- (void)left;
- __m128i sum_above = dc_sum_16(above);
- const __m128i eight = _mm_set1_epi16((uint16_t)8);
- sum_above = _mm_add_epi16(sum_above, eight);
- sum_above = _mm_srai_epi16(sum_above, 4);
- sum_above = _mm_unpacklo_epi8(sum_above, sum_above);
- sum_above = _mm_shufflelo_epi16(sum_above, 0);
- const __m128i row = _mm_unpacklo_epi64(sum_above, sum_above);
- dc_store_16xh(&row, 4, dst, stride);
-}
-
-void aom_dc_top_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- (void)left;
- __m128i sum_above = dc_sum_16(above);
- const __m128i eight = _mm_set1_epi16((uint16_t)8);
- sum_above = _mm_add_epi16(sum_above, eight);
- sum_above = _mm_srai_epi16(sum_above, 4);
- sum_above = _mm_unpacklo_epi8(sum_above, sum_above);
- sum_above = _mm_shufflelo_epi16(sum_above, 0);
- const __m128i row = _mm_unpacklo_epi64(sum_above, sum_above);
- dc_store_16xh(&row, 8, dst, stride);
-}
-
-void aom_dc_top_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- (void)left;
- __m128i sum_above = dc_sum_16(above);
- const __m128i eight = _mm_set1_epi16((uint16_t)8);
- sum_above = _mm_add_epi16(sum_above, eight);
- sum_above = _mm_srai_epi16(sum_above, 4);
- sum_above = _mm_unpacklo_epi8(sum_above, sum_above);
- sum_above = _mm_shufflelo_epi16(sum_above, 0);
- const __m128i row = _mm_unpacklo_epi64(sum_above, sum_above);
- dc_store_16xh(&row, 32, dst, stride);
-}
-
-void aom_dc_top_predictor_16x64_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- (void)left;
- __m128i sum_above = dc_sum_16(above);
- const __m128i eight = _mm_set1_epi16((uint16_t)8);
- sum_above = _mm_add_epi16(sum_above, eight);
- sum_above = _mm_srai_epi16(sum_above, 4);
- sum_above = _mm_unpacklo_epi8(sum_above, sum_above);
- sum_above = _mm_shufflelo_epi16(sum_above, 0);
- const __m128i row = _mm_unpacklo_epi64(sum_above, sum_above);
- dc_store_16xh(&row, 64, dst, stride);
-}
-
-void aom_dc_top_predictor_32x8_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- (void)left;
- __m128i sum_above = dc_sum_32(above);
- const __m128i sixteen = _mm_set1_epi16((uint16_t)16);
- sum_above = _mm_add_epi16(sum_above, sixteen);
- sum_above = _mm_srai_epi16(sum_above, 5);
- sum_above = _mm_unpacklo_epi8(sum_above, sum_above);
- sum_above = _mm_shufflelo_epi16(sum_above, 0);
- const __m128i row = _mm_unpacklo_epi64(sum_above, sum_above);
- dc_store_32xh(&row, 8, dst, stride);
-}
-
-void aom_dc_top_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- (void)left;
- __m128i sum_above = dc_sum_32(above);
- const __m128i sixteen = _mm_set1_epi16((uint16_t)16);
- sum_above = _mm_add_epi16(sum_above, sixteen);
- sum_above = _mm_srai_epi16(sum_above, 5);
- sum_above = _mm_unpacklo_epi8(sum_above, sum_above);
- sum_above = _mm_shufflelo_epi16(sum_above, 0);
- const __m128i row = _mm_unpacklo_epi64(sum_above, sum_above);
- dc_store_32xh(&row, 16, dst, stride);
-}
-
-void aom_dc_top_predictor_32x64_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- (void)left;
- __m128i sum_above = dc_sum_32(above);
- const __m128i sixteen = _mm_set1_epi16((uint16_t)16);
- sum_above = _mm_add_epi16(sum_above, sixteen);
- sum_above = _mm_srai_epi16(sum_above, 5);
- sum_above = _mm_unpacklo_epi8(sum_above, sum_above);
- sum_above = _mm_shufflelo_epi16(sum_above, 0);
- const __m128i row = _mm_unpacklo_epi64(sum_above, sum_above);
- dc_store_32xh(&row, 64, dst, stride);
-}
-
-void aom_dc_top_predictor_64x64_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- (void)left;
- __m128i sum_above = dc_sum_64(above);
- const __m128i thirtytwo = _mm_set1_epi16((uint16_t)32);
- sum_above = _mm_add_epi16(sum_above, thirtytwo);
- sum_above = _mm_srai_epi16(sum_above, 6);
- sum_above = _mm_unpacklo_epi8(sum_above, sum_above);
- sum_above = _mm_shufflelo_epi16(sum_above, 0);
- const __m128i row = _mm_unpacklo_epi64(sum_above, sum_above);
- dc_store_64xh(&row, 64, dst, stride);
-}
-
-void aom_dc_top_predictor_64x32_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- (void)left;
- __m128i sum_above = dc_sum_64(above);
- const __m128i thirtytwo = _mm_set1_epi16((uint16_t)32);
- sum_above = _mm_add_epi16(sum_above, thirtytwo);
- sum_above = _mm_srai_epi16(sum_above, 6);
- sum_above = _mm_unpacklo_epi8(sum_above, sum_above);
- sum_above = _mm_shufflelo_epi16(sum_above, 0);
- const __m128i row = _mm_unpacklo_epi64(sum_above, sum_above);
- dc_store_64xh(&row, 32, dst, stride);
-}
-
-void aom_dc_top_predictor_64x16_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- (void)left;
- __m128i sum_above = dc_sum_64(above);
- const __m128i thirtytwo = _mm_set1_epi16((uint16_t)32);
- sum_above = _mm_add_epi16(sum_above, thirtytwo);
- sum_above = _mm_srai_epi16(sum_above, 6);
- sum_above = _mm_unpacklo_epi8(sum_above, sum_above);
- sum_above = _mm_shufflelo_epi16(sum_above, 0);
- const __m128i row = _mm_unpacklo_epi64(sum_above, sum_above);
- dc_store_64xh(&row, 16, dst, stride);
-}
-
-// -----------------------------------------------------------------------------
-// DC_LEFT
-
-void aom_dc_left_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- (void)above;
- __m128i sum_left = dc_sum_8(left);
- const __m128i four = _mm_set1_epi16((uint16_t)4);
- sum_left = _mm_add_epi16(sum_left, four);
- sum_left = _mm_srai_epi16(sum_left, 3);
- sum_left = _mm_shufflelo_epi16(sum_left, 0);
- sum_left = _mm_packus_epi16(sum_left, sum_left);
-
- const uint32_t pred = _mm_cvtsi128_si32(sum_left);
- dc_store_4xh(pred, 8, dst, stride);
-}
-
-void aom_dc_left_predictor_4x16_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- (void)above;
- __m128i sum_left = dc_sum_16(left);
- const __m128i eight = _mm_set1_epi16((uint16_t)8);
- sum_left = _mm_add_epi16(sum_left, eight);
- sum_left = _mm_srai_epi16(sum_left, 4);
- sum_left = _mm_shufflelo_epi16(sum_left, 0);
- sum_left = _mm_packus_epi16(sum_left, sum_left);
-
- const uint32_t pred = _mm_cvtsi128_si32(sum_left);
- dc_store_4xh(pred, 16, dst, stride);
-}
-
-void aom_dc_left_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- (void)above;
- __m128i sum_left = dc_sum_4(left);
- const __m128i two = _mm_set1_epi16((uint16_t)2);
- sum_left = _mm_add_epi16(sum_left, two);
- sum_left = _mm_srai_epi16(sum_left, 2);
- sum_left = _mm_unpacklo_epi8(sum_left, sum_left);
- const __m128i row = _mm_shufflelo_epi16(sum_left, 0);
- dc_store_8xh(&row, 4, dst, stride);
-}
-
-void aom_dc_left_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- (void)above;
- __m128i sum_left = dc_sum_16(left);
- const __m128i eight = _mm_set1_epi16((uint16_t)8);
- sum_left = _mm_add_epi16(sum_left, eight);
- sum_left = _mm_srai_epi16(sum_left, 4);
- sum_left = _mm_unpacklo_epi8(sum_left, sum_left);
- const __m128i row = _mm_shufflelo_epi16(sum_left, 0);
- dc_store_8xh(&row, 16, dst, stride);
-}
-
-void aom_dc_left_predictor_8x32_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- (void)above;
- __m128i sum_left = dc_sum_32(left);
- const __m128i sixteen = _mm_set1_epi16((uint16_t)16);
- sum_left = _mm_add_epi16(sum_left, sixteen);
- sum_left = _mm_srai_epi16(sum_left, 5);
- sum_left = _mm_unpacklo_epi8(sum_left, sum_left);
- const __m128i row = _mm_shufflelo_epi16(sum_left, 0);
- dc_store_8xh(&row, 32, dst, stride);
-}
-
-void aom_dc_left_predictor_16x4_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- (void)above;
- __m128i sum_left = dc_sum_4(left);
- const __m128i two = _mm_set1_epi16((uint16_t)2);
- sum_left = _mm_add_epi16(sum_left, two);
- sum_left = _mm_srai_epi16(sum_left, 2);
- sum_left = _mm_unpacklo_epi8(sum_left, sum_left);
- sum_left = _mm_shufflelo_epi16(sum_left, 0);
- const __m128i row = _mm_unpacklo_epi64(sum_left, sum_left);
- dc_store_16xh(&row, 4, dst, stride);
-}
-
-void aom_dc_left_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- (void)above;
- __m128i sum_left = dc_sum_8(left);
- const __m128i four = _mm_set1_epi16((uint16_t)4);
- sum_left = _mm_add_epi16(sum_left, four);
- sum_left = _mm_srai_epi16(sum_left, 3);
- sum_left = _mm_unpacklo_epi8(sum_left, sum_left);
- sum_left = _mm_shufflelo_epi16(sum_left, 0);
- const __m128i row = _mm_unpacklo_epi64(sum_left, sum_left);
- dc_store_16xh(&row, 8, dst, stride);
-}
-
-void aom_dc_left_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- (void)above;
- __m128i sum_left = dc_sum_32(left);
- const __m128i sixteen = _mm_set1_epi16((uint16_t)16);
- sum_left = _mm_add_epi16(sum_left, sixteen);
- sum_left = _mm_srai_epi16(sum_left, 5);
- sum_left = _mm_unpacklo_epi8(sum_left, sum_left);
- sum_left = _mm_shufflelo_epi16(sum_left, 0);
- const __m128i row = _mm_unpacklo_epi64(sum_left, sum_left);
- dc_store_16xh(&row, 32, dst, stride);
-}
-
-void aom_dc_left_predictor_16x64_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- (void)above;
- __m128i sum_left = dc_sum_64(left);
- const __m128i thirtytwo = _mm_set1_epi16((uint16_t)32);
- sum_left = _mm_add_epi16(sum_left, thirtytwo);
- sum_left = _mm_srai_epi16(sum_left, 6);
- sum_left = _mm_unpacklo_epi8(sum_left, sum_left);
- sum_left = _mm_shufflelo_epi16(sum_left, 0);
- const __m128i row = _mm_unpacklo_epi64(sum_left, sum_left);
- dc_store_16xh(&row, 64, dst, stride);
-}
-
-void aom_dc_left_predictor_32x8_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- (void)above;
- __m128i sum_left = dc_sum_8(left);
- const __m128i four = _mm_set1_epi16((uint16_t)4);
- sum_left = _mm_add_epi16(sum_left, four);
- sum_left = _mm_srai_epi16(sum_left, 3);
- sum_left = _mm_unpacklo_epi8(sum_left, sum_left);
- sum_left = _mm_shufflelo_epi16(sum_left, 0);
- const __m128i row = _mm_unpacklo_epi64(sum_left, sum_left);
- dc_store_32xh(&row, 8, dst, stride);
-}
-
-void aom_dc_left_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- (void)above;
- __m128i sum_left = dc_sum_16(left);
- const __m128i eight = _mm_set1_epi16((uint16_t)8);
- sum_left = _mm_add_epi16(sum_left, eight);
- sum_left = _mm_srai_epi16(sum_left, 4);
- sum_left = _mm_unpacklo_epi8(sum_left, sum_left);
- sum_left = _mm_shufflelo_epi16(sum_left, 0);
- const __m128i row = _mm_unpacklo_epi64(sum_left, sum_left);
- dc_store_32xh(&row, 16, dst, stride);
-}
-
-void aom_dc_left_predictor_32x64_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- (void)above;
- __m128i sum_left = dc_sum_64(left);
- const __m128i thirtytwo = _mm_set1_epi16((uint16_t)32);
- sum_left = _mm_add_epi16(sum_left, thirtytwo);
- sum_left = _mm_srai_epi16(sum_left, 6);
- sum_left = _mm_unpacklo_epi8(sum_left, sum_left);
- sum_left = _mm_shufflelo_epi16(sum_left, 0);
- const __m128i row = _mm_unpacklo_epi64(sum_left, sum_left);
- dc_store_32xh(&row, 64, dst, stride);
-}
-
-void aom_dc_left_predictor_64x64_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- (void)above;
- __m128i sum_left = dc_sum_64(left);
- const __m128i thirtytwo = _mm_set1_epi16((uint16_t)32);
- sum_left = _mm_add_epi16(sum_left, thirtytwo);
- sum_left = _mm_srai_epi16(sum_left, 6);
- sum_left = _mm_unpacklo_epi8(sum_left, sum_left);
- sum_left = _mm_shufflelo_epi16(sum_left, 0);
- const __m128i row = _mm_unpacklo_epi64(sum_left, sum_left);
- dc_store_64xh(&row, 64, dst, stride);
-}
-
-void aom_dc_left_predictor_64x32_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- (void)above;
- __m128i sum_left = dc_sum_32(left);
- const __m128i sixteen = _mm_set1_epi16((uint16_t)16);
- sum_left = _mm_add_epi16(sum_left, sixteen);
- sum_left = _mm_srai_epi16(sum_left, 5);
- sum_left = _mm_unpacklo_epi8(sum_left, sum_left);
- sum_left = _mm_shufflelo_epi16(sum_left, 0);
- const __m128i row = _mm_unpacklo_epi64(sum_left, sum_left);
- dc_store_64xh(&row, 32, dst, stride);
-}
-
-void aom_dc_left_predictor_64x16_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- (void)above;
- __m128i sum_left = dc_sum_16(left);
- const __m128i eight = _mm_set1_epi16((uint16_t)8);
- sum_left = _mm_add_epi16(sum_left, eight);
- sum_left = _mm_srai_epi16(sum_left, 4);
- sum_left = _mm_unpacklo_epi8(sum_left, sum_left);
- sum_left = _mm_shufflelo_epi16(sum_left, 0);
- const __m128i row = _mm_unpacklo_epi64(sum_left, sum_left);
- dc_store_64xh(&row, 16, dst, stride);
-}
-
-// -----------------------------------------------------------------------------
-// DC_128
-
-void aom_dc_128_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- (void)above;
- (void)left;
- const uint32_t pred = 0x80808080;
- dc_store_4xh(pred, 8, dst, stride);
-}
-
-void aom_dc_128_predictor_4x16_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- (void)above;
- (void)left;
- const uint32_t pred = 0x80808080;
- dc_store_4xh(pred, 16, dst, stride);
-}
-
-void aom_dc_128_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- (void)above;
- (void)left;
- const __m128i row = _mm_set1_epi8((uint8_t)128);
- dc_store_8xh(&row, 4, dst, stride);
-}
-
-void aom_dc_128_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- (void)above;
- (void)left;
- const __m128i row = _mm_set1_epi8((uint8_t)128);
- dc_store_8xh(&row, 16, dst, stride);
-}
-
-void aom_dc_128_predictor_8x32_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- (void)above;
- (void)left;
- const __m128i row = _mm_set1_epi8((uint8_t)128);
- dc_store_8xh(&row, 32, dst, stride);
-}
-
-void aom_dc_128_predictor_16x4_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- (void)above;
- (void)left;
- const __m128i row = _mm_set1_epi8((uint8_t)128);
- dc_store_16xh(&row, 4, dst, stride);
-}
-
-void aom_dc_128_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- (void)above;
- (void)left;
- const __m128i row = _mm_set1_epi8((uint8_t)128);
- dc_store_16xh(&row, 8, dst, stride);
-}
-
-void aom_dc_128_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- (void)above;
- (void)left;
- const __m128i row = _mm_set1_epi8((uint8_t)128);
- dc_store_16xh(&row, 32, dst, stride);
-}
-
-void aom_dc_128_predictor_16x64_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- (void)above;
- (void)left;
- const __m128i row = _mm_set1_epi8((uint8_t)128);
- dc_store_16xh(&row, 64, dst, stride);
-}
-
-void aom_dc_128_predictor_32x8_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- (void)above;
- (void)left;
- const __m128i row = _mm_set1_epi8((uint8_t)128);
- dc_store_32xh(&row, 8, dst, stride);
-}
-
-void aom_dc_128_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- (void)above;
- (void)left;
- const __m128i row = _mm_set1_epi8((uint8_t)128);
- dc_store_32xh(&row, 16, dst, stride);
-}
-
-void aom_dc_128_predictor_32x64_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- (void)above;
- (void)left;
- const __m128i row = _mm_set1_epi8((uint8_t)128);
- dc_store_32xh(&row, 64, dst, stride);
-}
-
-void aom_dc_128_predictor_64x64_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- (void)above;
- (void)left;
- const __m128i row = _mm_set1_epi8((uint8_t)128);
- dc_store_64xh(&row, 64, dst, stride);
-}
-
-void aom_dc_128_predictor_64x32_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- (void)above;
- (void)left;
- const __m128i row = _mm_set1_epi8((uint8_t)128);
- dc_store_64xh(&row, 32, dst, stride);
-}
-
-void aom_dc_128_predictor_64x16_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- (void)above;
- (void)left;
- const __m128i row = _mm_set1_epi8((uint8_t)128);
- dc_store_64xh(&row, 16, dst, stride);
-}
-
-// -----------------------------------------------------------------------------
-// V_PRED
-
-void aom_v_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- const uint32_t pred = *(uint32_t *)above;
- (void)left;
- dc_store_4xh(pred, 8, dst, stride);
-}
-
-void aom_v_predictor_4x16_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- const uint32_t pred = *(uint32_t *)above;
- (void)left;
- dc_store_4xh(pred, 16, dst, stride);
-}
-
-void aom_v_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- const __m128i row = _mm_loadl_epi64((__m128i const *)above);
- (void)left;
- dc_store_8xh(&row, 4, dst, stride);
-}
-
-void aom_v_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- const __m128i row = _mm_loadl_epi64((__m128i const *)above);
- (void)left;
- dc_store_8xh(&row, 16, dst, stride);
-}
-
-void aom_v_predictor_8x32_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- const __m128i row = _mm_loadl_epi64((__m128i const *)above);
- (void)left;
- dc_store_8xh(&row, 32, dst, stride);
-}
-
-void aom_v_predictor_16x4_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- const __m128i row = _mm_load_si128((__m128i const *)above);
- (void)left;
- dc_store_16xh(&row, 4, dst, stride);
-}
-
-void aom_v_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- const __m128i row = _mm_load_si128((__m128i const *)above);
- (void)left;
- dc_store_16xh(&row, 8, dst, stride);
-}
-
-void aom_v_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- const __m128i row = _mm_load_si128((__m128i const *)above);
- (void)left;
- dc_store_16xh(&row, 32, dst, stride);
-}
-
-void aom_v_predictor_16x64_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- const __m128i row = _mm_load_si128((__m128i const *)above);
- (void)left;
- dc_store_16xh(&row, 64, dst, stride);
-}
-
-static INLINE void v_predictor_32xh(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, int height) {
- const __m128i row0 = _mm_load_si128((__m128i const *)above);
- const __m128i row1 = _mm_load_si128((__m128i const *)(above + 16));
- for (int i = 0; i < height; ++i) {
- _mm_store_si128((__m128i *)dst, row0);
- _mm_store_si128((__m128i *)(dst + 16), row1);
- dst += stride;
- }
-}
-
-void aom_v_predictor_32x8_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- (void)left;
- v_predictor_32xh(dst, stride, above, 8);
-}
-
-void aom_v_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- (void)left;
- v_predictor_32xh(dst, stride, above, 16);
-}
-
-void aom_v_predictor_32x64_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- (void)left;
- v_predictor_32xh(dst, stride, above, 64);
-}
-
-static INLINE void v_predictor_64xh(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, int height) {
- const __m128i row0 = _mm_load_si128((__m128i const *)above);
- const __m128i row1 = _mm_load_si128((__m128i const *)(above + 16));
- const __m128i row2 = _mm_load_si128((__m128i const *)(above + 32));
- const __m128i row3 = _mm_load_si128((__m128i const *)(above + 48));
- for (int i = 0; i < height; ++i) {
- _mm_store_si128((__m128i *)dst, row0);
- _mm_store_si128((__m128i *)(dst + 16), row1);
- _mm_store_si128((__m128i *)(dst + 32), row2);
- _mm_store_si128((__m128i *)(dst + 48), row3);
- dst += stride;
- }
-}
-
-void aom_v_predictor_64x64_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- (void)left;
- v_predictor_64xh(dst, stride, above, 64);
-}
-
-void aom_v_predictor_64x32_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- (void)left;
- v_predictor_64xh(dst, stride, above, 32);
-}
-
-void aom_v_predictor_64x16_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- (void)left;
- v_predictor_64xh(dst, stride, above, 16);
-}
-
-// -----------------------------------------------------------------------------
-// H_PRED
-
-void aom_h_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- (void)above;
- __m128i left_col = _mm_loadl_epi64((__m128i const *)left);
- left_col = _mm_unpacklo_epi8(left_col, left_col);
- __m128i row0 = _mm_shufflelo_epi16(left_col, 0);
- __m128i row1 = _mm_shufflelo_epi16(left_col, 0x55);
- __m128i row2 = _mm_shufflelo_epi16(left_col, 0xaa);
- __m128i row3 = _mm_shufflelo_epi16(left_col, 0xff);
- *(uint32_t *)dst = _mm_cvtsi128_si32(row0);
- dst += stride;
- *(uint32_t *)dst = _mm_cvtsi128_si32(row1);
- dst += stride;
- *(uint32_t *)dst = _mm_cvtsi128_si32(row2);
- dst += stride;
- *(uint32_t *)dst = _mm_cvtsi128_si32(row3);
- dst += stride;
- left_col = _mm_unpackhi_epi64(left_col, left_col);
- row0 = _mm_shufflelo_epi16(left_col, 0);
- row1 = _mm_shufflelo_epi16(left_col, 0x55);
- row2 = _mm_shufflelo_epi16(left_col, 0xaa);
- row3 = _mm_shufflelo_epi16(left_col, 0xff);
- *(uint32_t *)dst = _mm_cvtsi128_si32(row0);
- dst += stride;
- *(uint32_t *)dst = _mm_cvtsi128_si32(row1);
- dst += stride;
- *(uint32_t *)dst = _mm_cvtsi128_si32(row2);
- dst += stride;
- *(uint32_t *)dst = _mm_cvtsi128_si32(row3);
-}
-
-void aom_h_predictor_4x16_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- (void)above;
- const __m128i left_col = _mm_load_si128((__m128i const *)left);
- __m128i left_col_low = _mm_unpacklo_epi8(left_col, left_col);
- __m128i left_col_high = _mm_unpackhi_epi8(left_col, left_col);
-
- __m128i row0 = _mm_shufflelo_epi16(left_col_low, 0);
- __m128i row1 = _mm_shufflelo_epi16(left_col_low, 0x55);
- __m128i row2 = _mm_shufflelo_epi16(left_col_low, 0xaa);
- __m128i row3 = _mm_shufflelo_epi16(left_col_low, 0xff);
- *(uint32_t *)dst = _mm_cvtsi128_si32(row0);
- dst += stride;
- *(uint32_t *)dst = _mm_cvtsi128_si32(row1);
- dst += stride;
- *(uint32_t *)dst = _mm_cvtsi128_si32(row2);
- dst += stride;
- *(uint32_t *)dst = _mm_cvtsi128_si32(row3);
- dst += stride;
-
- left_col_low = _mm_unpackhi_epi64(left_col_low, left_col_low);
- row0 = _mm_shufflelo_epi16(left_col_low, 0);
- row1 = _mm_shufflelo_epi16(left_col_low, 0x55);
- row2 = _mm_shufflelo_epi16(left_col_low, 0xaa);
- row3 = _mm_shufflelo_epi16(left_col_low, 0xff);
- *(uint32_t *)dst = _mm_cvtsi128_si32(row0);
- dst += stride;
- *(uint32_t *)dst = _mm_cvtsi128_si32(row1);
- dst += stride;
- *(uint32_t *)dst = _mm_cvtsi128_si32(row2);
- dst += stride;
- *(uint32_t *)dst = _mm_cvtsi128_si32(row3);
- dst += stride;
-
- row0 = _mm_shufflelo_epi16(left_col_high, 0);
- row1 = _mm_shufflelo_epi16(left_col_high, 0x55);
- row2 = _mm_shufflelo_epi16(left_col_high, 0xaa);
- row3 = _mm_shufflelo_epi16(left_col_high, 0xff);
- *(uint32_t *)dst = _mm_cvtsi128_si32(row0);
- dst += stride;
- *(uint32_t *)dst = _mm_cvtsi128_si32(row1);
- dst += stride;
- *(uint32_t *)dst = _mm_cvtsi128_si32(row2);
- dst += stride;
- *(uint32_t *)dst = _mm_cvtsi128_si32(row3);
- dst += stride;
-
- left_col_high = _mm_unpackhi_epi64(left_col_high, left_col_high);
- row0 = _mm_shufflelo_epi16(left_col_high, 0);
- row1 = _mm_shufflelo_epi16(left_col_high, 0x55);
- row2 = _mm_shufflelo_epi16(left_col_high, 0xaa);
- row3 = _mm_shufflelo_epi16(left_col_high, 0xff);
- *(uint32_t *)dst = _mm_cvtsi128_si32(row0);
- dst += stride;
- *(uint32_t *)dst = _mm_cvtsi128_si32(row1);
- dst += stride;
- *(uint32_t *)dst = _mm_cvtsi128_si32(row2);
- dst += stride;
- *(uint32_t *)dst = _mm_cvtsi128_si32(row3);
-}
-
-void aom_h_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- (void)above;
- __m128i left_col = _mm_loadl_epi64((__m128i const *)left);
- left_col = _mm_unpacklo_epi8(left_col, left_col);
- __m128i row0 = _mm_shufflelo_epi16(left_col, 0);
- __m128i row1 = _mm_shufflelo_epi16(left_col, 0x55);
- __m128i row2 = _mm_shufflelo_epi16(left_col, 0xaa);
- __m128i row3 = _mm_shufflelo_epi16(left_col, 0xff);
- _mm_storel_epi64((__m128i *)dst, row0);
- dst += stride;
- _mm_storel_epi64((__m128i *)dst, row1);
- dst += stride;
- _mm_storel_epi64((__m128i *)dst, row2);
- dst += stride;
- _mm_storel_epi64((__m128i *)dst, row3);
-}
-
-static INLINE void h_predictor_8x16xc(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left,
- int count) {
- (void)above;
- for (int i = 0; i < count; ++i) {
- const __m128i left_col = _mm_load_si128((__m128i const *)left);
- __m128i left_col_low = _mm_unpacklo_epi8(left_col, left_col);
- __m128i left_col_high = _mm_unpackhi_epi8(left_col, left_col);
-
- __m128i row0 = _mm_shufflelo_epi16(left_col_low, 0);
- __m128i row1 = _mm_shufflelo_epi16(left_col_low, 0x55);
- __m128i row2 = _mm_shufflelo_epi16(left_col_low, 0xaa);
- __m128i row3 = _mm_shufflelo_epi16(left_col_low, 0xff);
- _mm_storel_epi64((__m128i *)dst, row0);
- dst += stride;
- _mm_storel_epi64((__m128i *)dst, row1);
- dst += stride;
- _mm_storel_epi64((__m128i *)dst, row2);
- dst += stride;
- _mm_storel_epi64((__m128i *)dst, row3);
- dst += stride;
-
- left_col_low = _mm_unpackhi_epi64(left_col_low, left_col_low);
- row0 = _mm_shufflelo_epi16(left_col_low, 0);
- row1 = _mm_shufflelo_epi16(left_col_low, 0x55);
- row2 = _mm_shufflelo_epi16(left_col_low, 0xaa);
- row3 = _mm_shufflelo_epi16(left_col_low, 0xff);
- _mm_storel_epi64((__m128i *)dst, row0);
- dst += stride;
- _mm_storel_epi64((__m128i *)dst, row1);
- dst += stride;
- _mm_storel_epi64((__m128i *)dst, row2);
- dst += stride;
- _mm_storel_epi64((__m128i *)dst, row3);
- dst += stride;
-
- row0 = _mm_shufflelo_epi16(left_col_high, 0);
- row1 = _mm_shufflelo_epi16(left_col_high, 0x55);
- row2 = _mm_shufflelo_epi16(left_col_high, 0xaa);
- row3 = _mm_shufflelo_epi16(left_col_high, 0xff);
- _mm_storel_epi64((__m128i *)dst, row0);
- dst += stride;
- _mm_storel_epi64((__m128i *)dst, row1);
- dst += stride;
- _mm_storel_epi64((__m128i *)dst, row2);
- dst += stride;
- _mm_storel_epi64((__m128i *)dst, row3);
- dst += stride;
-
- left_col_high = _mm_unpackhi_epi64(left_col_high, left_col_high);
- row0 = _mm_shufflelo_epi16(left_col_high, 0);
- row1 = _mm_shufflelo_epi16(left_col_high, 0x55);
- row2 = _mm_shufflelo_epi16(left_col_high, 0xaa);
- row3 = _mm_shufflelo_epi16(left_col_high, 0xff);
- _mm_storel_epi64((__m128i *)dst, row0);
- dst += stride;
- _mm_storel_epi64((__m128i *)dst, row1);
- dst += stride;
- _mm_storel_epi64((__m128i *)dst, row2);
- dst += stride;
- _mm_storel_epi64((__m128i *)dst, row3);
- dst += stride;
- left += 16;
- }
-}
-
-void aom_h_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- h_predictor_8x16xc(dst, stride, above, left, 1);
-}
-
-void aom_h_predictor_8x32_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- h_predictor_8x16xc(dst, stride, above, left, 2);
-}
-
-static INLINE void h_pred_store_16xh(const __m128i *row, int h, uint8_t *dst,
- ptrdiff_t stride) {
- int i;
- for (i = 0; i < h; ++i) {
- _mm_store_si128((__m128i *)dst, row[i]);
- dst += stride;
- }
-}
-
-static INLINE void repeat_low_4pixels(const __m128i *x, __m128i *row) {
- const __m128i u0 = _mm_shufflelo_epi16(*x, 0);
- const __m128i u1 = _mm_shufflelo_epi16(*x, 0x55);
- const __m128i u2 = _mm_shufflelo_epi16(*x, 0xaa);
- const __m128i u3 = _mm_shufflelo_epi16(*x, 0xff);
-
- row[0] = _mm_unpacklo_epi64(u0, u0);
- row[1] = _mm_unpacklo_epi64(u1, u1);
- row[2] = _mm_unpacklo_epi64(u2, u2);
- row[3] = _mm_unpacklo_epi64(u3, u3);
-}
-
-static INLINE void repeat_high_4pixels(const __m128i *x, __m128i *row) {
- const __m128i u0 = _mm_shufflehi_epi16(*x, 0);
- const __m128i u1 = _mm_shufflehi_epi16(*x, 0x55);
- const __m128i u2 = _mm_shufflehi_epi16(*x, 0xaa);
- const __m128i u3 = _mm_shufflehi_epi16(*x, 0xff);
-
- row[0] = _mm_unpackhi_epi64(u0, u0);
- row[1] = _mm_unpackhi_epi64(u1, u1);
- row[2] = _mm_unpackhi_epi64(u2, u2);
- row[3] = _mm_unpackhi_epi64(u3, u3);
-}
-
-// Process 16x8, first 4 rows
-// Use first 8 bytes of left register: xxxxxxxx33221100
-static INLINE void h_prediction_16x8_1(const __m128i *left, uint8_t *dst,
- ptrdiff_t stride) {
- __m128i row[4];
- repeat_low_4pixels(left, row);
- h_pred_store_16xh(row, 4, dst, stride);
-}
-
-// Process 16x8, second 4 rows
-// Use second 8 bytes of left register: 77665544xxxxxxxx
-static INLINE void h_prediction_16x8_2(const __m128i *left, uint8_t *dst,
- ptrdiff_t stride) {
- __m128i row[4];
- repeat_high_4pixels(left, row);
- h_pred_store_16xh(row, 4, dst, stride);
-}
-
-void aom_h_predictor_16x4_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- (void)above;
- const __m128i left_col = _mm_loadl_epi64((const __m128i *)left);
- const __m128i left_col_8p = _mm_unpacklo_epi8(left_col, left_col);
- h_prediction_16x8_1(&left_col_8p, dst, stride);
-}
-
-void aom_h_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- (void)above;
- const __m128i left_col = _mm_loadl_epi64((const __m128i *)left);
- const __m128i left_col_8p = _mm_unpacklo_epi8(left_col, left_col);
- h_prediction_16x8_1(&left_col_8p, dst, stride);
- dst += stride << 2;
- h_prediction_16x8_2(&left_col_8p, dst, stride);
-}
-
-static INLINE void h_predictor_16xh(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *left, int count) {
- int i = 0;
- do {
- const __m128i left_col = _mm_load_si128((const __m128i *)left);
- const __m128i left_col_8p_lo = _mm_unpacklo_epi8(left_col, left_col);
- h_prediction_16x8_1(&left_col_8p_lo, dst, stride);
- dst += stride << 2;
- h_prediction_16x8_2(&left_col_8p_lo, dst, stride);
- dst += stride << 2;
-
- const __m128i left_col_8p_hi = _mm_unpackhi_epi8(left_col, left_col);
- h_prediction_16x8_1(&left_col_8p_hi, dst, stride);
- dst += stride << 2;
- h_prediction_16x8_2(&left_col_8p_hi, dst, stride);
- dst += stride << 2;
-
- left += 16;
- i++;
- } while (i < count);
-}
-
-void aom_h_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- (void)above;
- h_predictor_16xh(dst, stride, left, 2);
-}
-
-void aom_h_predictor_16x64_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- (void)above;
- h_predictor_16xh(dst, stride, left, 4);
-}
-
-static INLINE void h_pred_store_32xh(const __m128i *row, int h, uint8_t *dst,
- ptrdiff_t stride) {
- int i;
- for (i = 0; i < h; ++i) {
- _mm_store_si128((__m128i *)dst, row[i]);
- _mm_store_si128((__m128i *)(dst + 16), row[i]);
- dst += stride;
- }
-}
-
-// Process 32x8, first 4 rows
-// Use first 8 bytes of left register: xxxxxxxx33221100
-static INLINE void h_prediction_32x8_1(const __m128i *left, uint8_t *dst,
- ptrdiff_t stride) {
- __m128i row[4];
- repeat_low_4pixels(left, row);
- h_pred_store_32xh(row, 4, dst, stride);
-}
-
-// Process 32x8, second 4 rows
-// Use second 8 bytes of left register: 77665544xxxxxxxx
-static INLINE void h_prediction_32x8_2(const __m128i *left, uint8_t *dst,
- ptrdiff_t stride) {
- __m128i row[4];
- repeat_high_4pixels(left, row);
- h_pred_store_32xh(row, 4, dst, stride);
-}
-
-void aom_h_predictor_32x8_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- __m128i left_col, left_col_8p;
- (void)above;
-
- left_col = _mm_load_si128((const __m128i *)left);
-
- left_col_8p = _mm_unpacklo_epi8(left_col, left_col);
- h_prediction_32x8_1(&left_col_8p, dst, stride);
- dst += stride << 2;
- h_prediction_32x8_2(&left_col_8p, dst, stride);
-}
-
-void aom_h_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- __m128i left_col, left_col_8p;
- (void)above;
-
- left_col = _mm_load_si128((const __m128i *)left);
-
- left_col_8p = _mm_unpacklo_epi8(left_col, left_col);
- h_prediction_32x8_1(&left_col_8p, dst, stride);
- dst += stride << 2;
- h_prediction_32x8_2(&left_col_8p, dst, stride);
- dst += stride << 2;
-
- left_col_8p = _mm_unpackhi_epi8(left_col, left_col);
- h_prediction_32x8_1(&left_col_8p, dst, stride);
- dst += stride << 2;
- h_prediction_32x8_2(&left_col_8p, dst, stride);
-}
-
-static INLINE void h_predictor_32xh(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *left, int height) {
- int i = height >> 2;
- do {
- __m128i left4 = _mm_cvtsi32_si128(((uint32_t *)left)[0]);
- left4 = _mm_unpacklo_epi8(left4, left4);
- left4 = _mm_unpacklo_epi8(left4, left4);
- const __m128i r0 = _mm_shuffle_epi32(left4, 0x0);
- const __m128i r1 = _mm_shuffle_epi32(left4, 0x55);
- _mm_store_si128((__m128i *)dst, r0);
- _mm_store_si128((__m128i *)(dst + 16), r0);
- _mm_store_si128((__m128i *)(dst + stride), r1);
- _mm_store_si128((__m128i *)(dst + stride + 16), r1);
- const __m128i r2 = _mm_shuffle_epi32(left4, 0xaa);
- const __m128i r3 = _mm_shuffle_epi32(left4, 0xff);
- _mm_store_si128((__m128i *)(dst + stride * 2), r2);
- _mm_store_si128((__m128i *)(dst + stride * 2 + 16), r2);
- _mm_store_si128((__m128i *)(dst + stride * 3), r3);
- _mm_store_si128((__m128i *)(dst + stride * 3 + 16), r3);
- left += 4;
- dst += stride * 4;
- } while (--i);
-}
-
-void aom_h_predictor_32x64_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- (void)above;
- h_predictor_32xh(dst, stride, left, 64);
-}
-
-static INLINE void h_predictor_64xh(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *left, int height) {
- int i = height >> 2;
- do {
- __m128i left4 = _mm_cvtsi32_si128(((uint32_t *)left)[0]);
- left4 = _mm_unpacklo_epi8(left4, left4);
- left4 = _mm_unpacklo_epi8(left4, left4);
- const __m128i r0 = _mm_shuffle_epi32(left4, 0x0);
- const __m128i r1 = _mm_shuffle_epi32(left4, 0x55);
- _mm_store_si128((__m128i *)dst, r0);
- _mm_store_si128((__m128i *)(dst + 16), r0);
- _mm_store_si128((__m128i *)(dst + 32), r0);
- _mm_store_si128((__m128i *)(dst + 48), r0);
- _mm_store_si128((__m128i *)(dst + stride), r1);
- _mm_store_si128((__m128i *)(dst + stride + 16), r1);
- _mm_store_si128((__m128i *)(dst + stride + 32), r1);
- _mm_store_si128((__m128i *)(dst + stride + 48), r1);
- const __m128i r2 = _mm_shuffle_epi32(left4, 0xaa);
- const __m128i r3 = _mm_shuffle_epi32(left4, 0xff);
- _mm_store_si128((__m128i *)(dst + stride * 2), r2);
- _mm_store_si128((__m128i *)(dst + stride * 2 + 16), r2);
- _mm_store_si128((__m128i *)(dst + stride * 2 + 32), r2);
- _mm_store_si128((__m128i *)(dst + stride * 2 + 48), r2);
- _mm_store_si128((__m128i *)(dst + stride * 3), r3);
- _mm_store_si128((__m128i *)(dst + stride * 3 + 16), r3);
- _mm_store_si128((__m128i *)(dst + stride * 3 + 32), r3);
- _mm_store_si128((__m128i *)(dst + stride * 3 + 48), r3);
- left += 4;
- dst += stride * 4;
- } while (--i);
-}
-
-void aom_h_predictor_64x64_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- (void)above;
- h_predictor_64xh(dst, stride, left, 64);
-}
-
-void aom_h_predictor_64x32_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- (void)above;
- h_predictor_64xh(dst, stride, left, 32);
-}
-
-void aom_h_predictor_64x16_sse2(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- (void)above;
- h_predictor_64xh(dst, stride, left, 16);
-}
diff --git a/third_party/aom/aom_dsp/x86/intrapred_sse2_asm.asm b/third_party/aom/aom_dsp/x86/intrapred_sse2_asm.asm
deleted file mode 100644
index 9aece27be..000000000
--- a/third_party/aom/aom_dsp/x86/intrapred_sse2_asm.asm
+++ /dev/null
@@ -1,625 +0,0 @@
-;
-; Copyright (c) 2016, Alliance for Open Media. All rights reserved
-;
-; This source code is subject to the terms of the BSD 2 Clause License and
-; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-; was not distributed with this source code in the LICENSE file, you can
-; obtain it at www.aomedia.org/license/software. If the Alliance for Open
-; Media Patent License 1.0 was not distributed with this source code in the
-; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-;
-
-;
-
-%include "third_party/x86inc/x86inc.asm"
-
-SECTION_RODATA
-pb_1: times 16 db 1
-pw_4: times 8 dw 4
-pw_8: times 8 dw 8
-pw_16: times 8 dw 16
-pw_32: times 8 dw 32
-dc_128: times 16 db 128
-pw2_4: times 8 dw 2
-pw2_8: times 8 dw 4
-pw2_16: times 8 dw 8
-pw2_32: times 8 dw 16
-
-SECTION .text
-
-; ------------------------------------------
-; input: x, y, z, result
-;
-; trick from pascal
-; (x+2y+z+2)>>2 can be calculated as:
-; result = avg(x,z)
-; result -= xor(x,z) & 1
-; result = avg(result,y)
-; ------------------------------------------
-%macro X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 4
- pavgb %4, %1, %3
- pxor %3, %1
- pand %3, [GLOBAL(pb_1)]
- psubb %4, %3
- pavgb %4, %2
-%endmacro
-
-INIT_XMM sse2
-cglobal dc_predictor_4x4, 4, 5, 3, dst, stride, above, left, goffset
- GET_GOT goffsetq
-
- movd m2, [leftq]
- movd m0, [aboveq]
- pxor m1, m1
- punpckldq m0, m2
- psadbw m0, m1
- paddw m0, [GLOBAL(pw_4)]
- psraw m0, 3
- pshuflw m0, m0, 0x0
- packuswb m0, m0
- movd [dstq ], m0
- movd [dstq+strideq], m0
- lea dstq, [dstq+strideq*2]
- movd [dstq ], m0
- movd [dstq+strideq], m0
-
- RESTORE_GOT
- RET
-
-INIT_XMM sse2
-cglobal dc_left_predictor_4x4, 2, 5, 2, dst, stride, above, left, goffset
- movifnidn leftq, leftmp
- GET_GOT goffsetq
-
- pxor m1, m1
- movd m0, [leftq]
- psadbw m0, m1
- paddw m0, [GLOBAL(pw2_4)]
- psraw m0, 2
- pshuflw m0, m0, 0x0
- packuswb m0, m0
- movd [dstq ], m0
- movd [dstq+strideq], m0
- lea dstq, [dstq+strideq*2]
- movd [dstq ], m0
- movd [dstq+strideq], m0
-
- RESTORE_GOT
- RET
-
-INIT_XMM sse2
-cglobal dc_top_predictor_4x4, 3, 5, 2, dst, stride, above, left, goffset
- GET_GOT goffsetq
-
- pxor m1, m1
- movd m0, [aboveq]
- psadbw m0, m1
- paddw m0, [GLOBAL(pw2_4)]
- psraw m0, 2
- pshuflw m0, m0, 0x0
- packuswb m0, m0
- movd [dstq ], m0
- movd [dstq+strideq], m0
- lea dstq, [dstq+strideq*2]
- movd [dstq ], m0
- movd [dstq+strideq], m0
-
- RESTORE_GOT
- RET
-
-INIT_XMM sse2
-cglobal dc_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset
- GET_GOT goffsetq
-
- pxor m1, m1
- movq m0, [aboveq]
- movq m2, [leftq]
- DEFINE_ARGS dst, stride, stride3
- lea stride3q, [strideq*3]
- psadbw m0, m1
- psadbw m2, m1
- paddw m0, m2
- paddw m0, [GLOBAL(pw_8)]
- psraw m0, 4
- punpcklbw m0, m0
- pshuflw m0, m0, 0x0
- movq [dstq ], m0
- movq [dstq+strideq ], m0
- movq [dstq+strideq*2], m0
- movq [dstq+stride3q ], m0
- lea dstq, [dstq+strideq*4]
- movq [dstq ], m0
- movq [dstq+strideq ], m0
- movq [dstq+strideq*2], m0
- movq [dstq+stride3q ], m0
-
- RESTORE_GOT
- RET
-
-INIT_XMM sse2
-cglobal dc_top_predictor_8x8, 3, 5, 2, dst, stride, above, left, goffset
- GET_GOT goffsetq
-
- pxor m1, m1
- movq m0, [aboveq]
- DEFINE_ARGS dst, stride, stride3
- lea stride3q, [strideq*3]
- psadbw m0, m1
- paddw m0, [GLOBAL(pw2_8)]
- psraw m0, 3
- punpcklbw m0, m0
- pshuflw m0, m0, 0x0
- movq [dstq ], m0
- movq [dstq+strideq ], m0
- movq [dstq+strideq*2], m0
- movq [dstq+stride3q ], m0
- lea dstq, [dstq+strideq*4]
- movq [dstq ], m0
- movq [dstq+strideq ], m0
- movq [dstq+strideq*2], m0
- movq [dstq+stride3q ], m0
-
- RESTORE_GOT
- RET
-
-INIT_XMM sse2
-cglobal dc_left_predictor_8x8, 2, 5, 2, dst, stride, above, left, goffset
- movifnidn leftq, leftmp
- GET_GOT goffsetq
-
- pxor m1, m1
- movq m0, [leftq]
- DEFINE_ARGS dst, stride, stride3
- lea stride3q, [strideq*3]
- psadbw m0, m1
- paddw m0, [GLOBAL(pw2_8)]
- psraw m0, 3
- punpcklbw m0, m0
- pshuflw m0, m0, 0x0
- movq [dstq ], m0
- movq [dstq+strideq ], m0
- movq [dstq+strideq*2], m0
- movq [dstq+stride3q ], m0
- lea dstq, [dstq+strideq*4]
- movq [dstq ], m0
- movq [dstq+strideq ], m0
- movq [dstq+strideq*2], m0
- movq [dstq+stride3q ], m0
-
- RESTORE_GOT
- RET
-
-INIT_XMM sse2
-cglobal dc_128_predictor_4x4, 2, 5, 1, dst, stride, above, left, goffset
- GET_GOT goffsetq
-
- DEFINE_ARGS dst, stride, stride3
- lea stride3q, [strideq*3]
- movd m0, [GLOBAL(dc_128)]
- movd [dstq ], m0
- movd [dstq+strideq ], m0
- movd [dstq+strideq*2], m0
- movd [dstq+stride3q ], m0
- RESTORE_GOT
- RET
-
-INIT_XMM sse2
-cglobal dc_128_predictor_8x8, 2, 5, 1, dst, stride, above, left, goffset
- GET_GOT goffsetq
-
- DEFINE_ARGS dst, stride, stride3
- lea stride3q, [strideq*3]
- movq m0, [GLOBAL(dc_128)]
- movq [dstq ], m0
- movq [dstq+strideq ], m0
- movq [dstq+strideq*2], m0
- movq [dstq+stride3q ], m0
- lea dstq, [dstq+strideq*4]
- movq [dstq ], m0
- movq [dstq+strideq ], m0
- movq [dstq+strideq*2], m0
- movq [dstq+stride3q ], m0
- RESTORE_GOT
- RET
-
-INIT_XMM sse2
-cglobal dc_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset
- GET_GOT goffsetq
-
- pxor m1, m1
- mova m0, [aboveq]
- mova m2, [leftq]
- DEFINE_ARGS dst, stride, stride3, lines4
- lea stride3q, [strideq*3]
- mov lines4d, 4
- psadbw m0, m1
- psadbw m2, m1
- paddw m0, m2
- movhlps m2, m0
- paddw m0, m2
- paddw m0, [GLOBAL(pw_16)]
- psraw m0, 5
- pshuflw m0, m0, 0x0
- punpcklqdq m0, m0
- packuswb m0, m0
-.loop:
- mova [dstq ], m0
- mova [dstq+strideq ], m0
- mova [dstq+strideq*2], m0
- mova [dstq+stride3q ], m0
- lea dstq, [dstq+strideq*4]
- dec lines4d
- jnz .loop
-
- RESTORE_GOT
- REP_RET
-
-
-INIT_XMM sse2
-cglobal dc_top_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset
- GET_GOT goffsetq
-
- pxor m1, m1
- mova m0, [aboveq]
- DEFINE_ARGS dst, stride, stride3, lines4
- lea stride3q, [strideq*3]
- mov lines4d, 4
- psadbw m0, m1
- movhlps m2, m0
- paddw m0, m2
- paddw m0, [GLOBAL(pw2_16)]
- psraw m0, 4
- pshuflw m0, m0, 0x0
- punpcklqdq m0, m0
- packuswb m0, m0
-.loop:
- mova [dstq ], m0
- mova [dstq+strideq ], m0
- mova [dstq+strideq*2], m0
- mova [dstq+stride3q ], m0
- lea dstq, [dstq+strideq*4]
- dec lines4d
- jnz .loop
-
- RESTORE_GOT
- REP_RET
-
-INIT_XMM sse2
-cglobal dc_left_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset
- GET_GOT goffsetq
-
- pxor m1, m1
- mova m0, [leftq]
- DEFINE_ARGS dst, stride, stride3, lines4
- lea stride3q, [strideq*3]
- mov lines4d, 4
- psadbw m0, m1
- movhlps m2, m0
- paddw m0, m2
- paddw m0, [GLOBAL(pw2_16)]
- psraw m0, 4
- pshuflw m0, m0, 0x0
- punpcklqdq m0, m0
- packuswb m0, m0
-.loop:
- mova [dstq ], m0
- mova [dstq+strideq ], m0
- mova [dstq+strideq*2], m0
- mova [dstq+stride3q ], m0
- lea dstq, [dstq+strideq*4]
- dec lines4d
- jnz .loop
-
- RESTORE_GOT
- REP_RET
-
-INIT_XMM sse2
-cglobal dc_128_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset
- GET_GOT goffsetq
-
- DEFINE_ARGS dst, stride, stride3, lines4
- lea stride3q, [strideq*3]
- mov lines4d, 4
- mova m0, [GLOBAL(dc_128)]
-.loop:
- mova [dstq ], m0
- mova [dstq+strideq ], m0
- mova [dstq+strideq*2], m0
- mova [dstq+stride3q ], m0
- lea dstq, [dstq+strideq*4]
- dec lines4d
- jnz .loop
- RESTORE_GOT
- RET
-
-
-INIT_XMM sse2
-cglobal dc_predictor_32x32, 4, 5, 5, dst, stride, above, left, goffset
- GET_GOT goffsetq
-
- pxor m1, m1
- mova m0, [aboveq]
- mova m2, [aboveq+16]
- mova m3, [leftq]
- mova m4, [leftq+16]
- DEFINE_ARGS dst, stride, stride3, lines4
- lea stride3q, [strideq*3]
- mov lines4d, 8
- psadbw m0, m1
- psadbw m2, m1
- psadbw m3, m1
- psadbw m4, m1
- paddw m0, m2
- paddw m0, m3
- paddw m0, m4
- movhlps m2, m0
- paddw m0, m2
- paddw m0, [GLOBAL(pw_32)]
- psraw m0, 6
- pshuflw m0, m0, 0x0
- punpcklqdq m0, m0
- packuswb m0, m0
-.loop:
- mova [dstq ], m0
- mova [dstq +16], m0
- mova [dstq+strideq ], m0
- mova [dstq+strideq +16], m0
- mova [dstq+strideq*2 ], m0
- mova [dstq+strideq*2+16], m0
- mova [dstq+stride3q ], m0
- mova [dstq+stride3q +16], m0
- lea dstq, [dstq+strideq*4]
- dec lines4d
- jnz .loop
-
- RESTORE_GOT
- REP_RET
-
-INIT_XMM sse2
-cglobal dc_top_predictor_32x32, 4, 5, 5, dst, stride, above, left, goffset
- GET_GOT goffsetq
-
- pxor m1, m1
- mova m0, [aboveq]
- mova m2, [aboveq+16]
- DEFINE_ARGS dst, stride, stride3, lines4
- lea stride3q, [strideq*3]
- mov lines4d, 8
- psadbw m0, m1
- psadbw m2, m1
- paddw m0, m2
- movhlps m2, m0
- paddw m0, m2
- paddw m0, [GLOBAL(pw2_32)]
- psraw m0, 5
- pshuflw m0, m0, 0x0
- punpcklqdq m0, m0
- packuswb m0, m0
-.loop:
- mova [dstq ], m0
- mova [dstq +16], m0
- mova [dstq+strideq ], m0
- mova [dstq+strideq +16], m0
- mova [dstq+strideq*2 ], m0
- mova [dstq+strideq*2+16], m0
- mova [dstq+stride3q ], m0
- mova [dstq+stride3q +16], m0
- lea dstq, [dstq+strideq*4]
- dec lines4d
- jnz .loop
-
- RESTORE_GOT
- REP_RET
-
-INIT_XMM sse2
-cglobal dc_left_predictor_32x32, 4, 5, 5, dst, stride, above, left, goffset
- GET_GOT goffsetq
-
- pxor m1, m1
- mova m0, [leftq]
- mova m2, [leftq+16]
- DEFINE_ARGS dst, stride, stride3, lines4
- lea stride3q, [strideq*3]
- mov lines4d, 8
- psadbw m0, m1
- psadbw m2, m1
- paddw m0, m2
- movhlps m2, m0
- paddw m0, m2
- paddw m0, [GLOBAL(pw2_32)]
- psraw m0, 5
- pshuflw m0, m0, 0x0
- punpcklqdq m0, m0
- packuswb m0, m0
-.loop:
- mova [dstq ], m0
- mova [dstq +16], m0
- mova [dstq+strideq ], m0
- mova [dstq+strideq +16], m0
- mova [dstq+strideq*2 ], m0
- mova [dstq+strideq*2+16], m0
- mova [dstq+stride3q ], m0
- mova [dstq+stride3q +16], m0
- lea dstq, [dstq+strideq*4]
- dec lines4d
- jnz .loop
-
- RESTORE_GOT
- REP_RET
-
-INIT_XMM sse2
-cglobal dc_128_predictor_32x32, 4, 5, 3, dst, stride, above, left, goffset
- GET_GOT goffsetq
-
- DEFINE_ARGS dst, stride, stride3, lines4
- lea stride3q, [strideq*3]
- mov lines4d, 8
- mova m0, [GLOBAL(dc_128)]
-.loop:
- mova [dstq ], m0
- mova [dstq +16], m0
- mova [dstq+strideq ], m0
- mova [dstq+strideq +16], m0
- mova [dstq+strideq*2 ], m0
- mova [dstq+strideq*2+16], m0
- mova [dstq+stride3q ], m0
- mova [dstq+stride3q +16], m0
- lea dstq, [dstq+strideq*4]
- dec lines4d
- jnz .loop
- RESTORE_GOT
- RET
-
-INIT_XMM sse2
-cglobal v_predictor_4x4, 3, 3, 1, dst, stride, above
- movd m0, [aboveq]
- movd [dstq ], m0
- movd [dstq+strideq], m0
- lea dstq, [dstq+strideq*2]
- movd [dstq ], m0
- movd [dstq+strideq], m0
- RET
-
-INIT_XMM sse2
-cglobal v_predictor_8x8, 3, 3, 1, dst, stride, above
- movq m0, [aboveq]
- DEFINE_ARGS dst, stride, stride3
- lea stride3q, [strideq*3]
- movq [dstq ], m0
- movq [dstq+strideq ], m0
- movq [dstq+strideq*2], m0
- movq [dstq+stride3q ], m0
- lea dstq, [dstq+strideq*4]
- movq [dstq ], m0
- movq [dstq+strideq ], m0
- movq [dstq+strideq*2], m0
- movq [dstq+stride3q ], m0
- RET
-
-INIT_XMM sse2
-cglobal v_predictor_16x16, 3, 4, 1, dst, stride, above
- mova m0, [aboveq]
- DEFINE_ARGS dst, stride, stride3, nlines4
- lea stride3q, [strideq*3]
- mov nlines4d, 4
-.loop:
- mova [dstq ], m0
- mova [dstq+strideq ], m0
- mova [dstq+strideq*2], m0
- mova [dstq+stride3q ], m0
- lea dstq, [dstq+strideq*4]
- dec nlines4d
- jnz .loop
- REP_RET
-
-INIT_XMM sse2
-cglobal v_predictor_32x32, 3, 4, 2, dst, stride, above
- mova m0, [aboveq]
- mova m1, [aboveq+16]
- DEFINE_ARGS dst, stride, stride3, nlines4
- lea stride3q, [strideq*3]
- mov nlines4d, 8
-.loop:
- mova [dstq ], m0
- mova [dstq +16], m1
- mova [dstq+strideq ], m0
- mova [dstq+strideq +16], m1
- mova [dstq+strideq*2 ], m0
- mova [dstq+strideq*2+16], m1
- mova [dstq+stride3q ], m0
- mova [dstq+stride3q +16], m1
- lea dstq, [dstq+strideq*4]
- dec nlines4d
- jnz .loop
- REP_RET
-
-INIT_XMM sse2
-cglobal h_predictor_4x4, 2, 4, 4, dst, stride, line, left
- movifnidn leftq, leftmp
- movd m0, [leftq]
- punpcklbw m0, m0
- punpcklbw m0, m0
- pshufd m1, m0, 0x1
- movd [dstq ], m0
- movd [dstq+strideq], m1
- pshufd m2, m0, 0x2
- lea dstq, [dstq+strideq*2]
- pshufd m3, m0, 0x3
- movd [dstq ], m2
- movd [dstq+strideq], m3
- RET
-
-INIT_XMM sse2
-cglobal h_predictor_8x8, 2, 5, 3, dst, stride, line, left
- movifnidn leftq, leftmp
- mov lineq, -2
- DEFINE_ARGS dst, stride, line, left, stride3
- lea stride3q, [strideq*3]
- movq m0, [leftq ]
- punpcklbw m0, m0 ; l1 l1 l2 l2 ... l8 l8
-.loop:
- pshuflw m1, m0, 0x0 ; l1 l1 l1 l1 l1 l1 l1 l1
- pshuflw m2, m0, 0x55 ; l2 l2 l2 l2 l2 l2 l2 l2
- movq [dstq ], m1
- movq [dstq+strideq], m2
- pshuflw m1, m0, 0xaa
- pshuflw m2, m0, 0xff
- movq [dstq+strideq*2], m1
- movq [dstq+stride3q ], m2
- pshufd m0, m0, 0xe ; [63:0] l5 l5 l6 l6 l7 l7 l8 l8
- inc lineq
- lea dstq, [dstq+strideq*4]
- jnz .loop
- REP_RET
-
-INIT_XMM sse2
-cglobal h_predictor_16x16, 2, 5, 3, dst, stride, line, left
- movifnidn leftq, leftmp
- mov lineq, -4
- DEFINE_ARGS dst, stride, line, left, stride3
- lea stride3q, [strideq*3]
-.loop:
- movd m0, [leftq]
- punpcklbw m0, m0
- punpcklbw m0, m0 ; l1 to l4 each repeated 4 times
- pshufd m1, m0, 0x0 ; l1 repeated 16 times
- pshufd m2, m0, 0x55 ; l2 repeated 16 times
- mova [dstq ], m1
- mova [dstq+strideq ], m2
- pshufd m1, m0, 0xaa
- pshufd m2, m0, 0xff
- mova [dstq+strideq*2], m1
- mova [dstq+stride3q ], m2
- inc lineq
- lea leftq, [leftq+4 ]
- lea dstq, [dstq+strideq*4]
- jnz .loop
- REP_RET
-
-INIT_XMM sse2
-cglobal h_predictor_32x32, 2, 5, 3, dst, stride, line, left
- movifnidn leftq, leftmp
- mov lineq, -8
- DEFINE_ARGS dst, stride, line, left, stride3
- lea stride3q, [strideq*3]
-.loop:
- movd m0, [leftq]
- punpcklbw m0, m0
- punpcklbw m0, m0 ; l1 to l4 each repeated 4 times
- pshufd m1, m0, 0x0 ; l1 repeated 16 times
- pshufd m2, m0, 0x55 ; l2 repeated 16 times
- mova [dstq ], m1
- mova [dstq+16 ], m1
- mova [dstq+strideq ], m2
- mova [dstq+strideq+16 ], m2
- pshufd m1, m0, 0xaa
- pshufd m2, m0, 0xff
- mova [dstq+strideq*2 ], m1
- mova [dstq+strideq*2+16], m1
- mova [dstq+stride3q ], m2
- mova [dstq+stride3q+16 ], m2
- inc lineq
- lea leftq, [leftq+4 ]
- lea dstq, [dstq+strideq*4]
- jnz .loop
- REP_RET
diff --git a/third_party/aom/aom_dsp/x86/intrapred_ssse3.c b/third_party/aom/aom_dsp/x86/intrapred_ssse3.c
deleted file mode 100644
index 807ed1770..000000000
--- a/third_party/aom/aom_dsp/x86/intrapred_ssse3.c
+++ /dev/null
@@ -1,1692 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <tmmintrin.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/intrapred_common.h"
-
-// -----------------------------------------------------------------------------
-// PAETH_PRED
-
-// Return 8 16-bit pixels in one row
-static INLINE __m128i paeth_8x1_pred(const __m128i *left, const __m128i *top,
- const __m128i *topleft) {
- const __m128i base = _mm_sub_epi16(_mm_add_epi16(*top, *left), *topleft);
-
- __m128i pl = _mm_abs_epi16(_mm_sub_epi16(base, *left));
- __m128i pt = _mm_abs_epi16(_mm_sub_epi16(base, *top));
- __m128i ptl = _mm_abs_epi16(_mm_sub_epi16(base, *topleft));
-
- __m128i mask1 = _mm_cmpgt_epi16(pl, pt);
- mask1 = _mm_or_si128(mask1, _mm_cmpgt_epi16(pl, ptl));
- __m128i mask2 = _mm_cmpgt_epi16(pt, ptl);
-
- pl = _mm_andnot_si128(mask1, *left);
-
- ptl = _mm_and_si128(mask2, *topleft);
- pt = _mm_andnot_si128(mask2, *top);
- pt = _mm_or_si128(pt, ptl);
- pt = _mm_and_si128(mask1, pt);
-
- return _mm_or_si128(pl, pt);
-}
-
-void aom_paeth_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- __m128i l = _mm_loadl_epi64((const __m128i *)left);
- const __m128i t = _mm_loadl_epi64((const __m128i *)above);
- const __m128i zero = _mm_setzero_si128();
- const __m128i t16 = _mm_unpacklo_epi8(t, zero);
- const __m128i tl16 = _mm_set1_epi16((uint16_t)above[-1]);
- __m128i rep = _mm_set1_epi16(0x8000);
- const __m128i one = _mm_set1_epi16(1);
-
- int i;
- for (i = 0; i < 4; ++i) {
- const __m128i l16 = _mm_shuffle_epi8(l, rep);
- const __m128i row = paeth_8x1_pred(&l16, &t16, &tl16);
-
- *(uint32_t *)dst = _mm_cvtsi128_si32(_mm_packus_epi16(row, row));
- dst += stride;
- rep = _mm_add_epi16(rep, one);
- }
-}
-
-void aom_paeth_predictor_4x8_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- __m128i l = _mm_loadl_epi64((const __m128i *)left);
- const __m128i t = _mm_loadl_epi64((const __m128i *)above);
- const __m128i zero = _mm_setzero_si128();
- const __m128i t16 = _mm_unpacklo_epi8(t, zero);
- const __m128i tl16 = _mm_set1_epi16((uint16_t)above[-1]);
- __m128i rep = _mm_set1_epi16(0x8000);
- const __m128i one = _mm_set1_epi16(1);
-
- int i;
- for (i = 0; i < 8; ++i) {
- const __m128i l16 = _mm_shuffle_epi8(l, rep);
- const __m128i row = paeth_8x1_pred(&l16, &t16, &tl16);
-
- *(uint32_t *)dst = _mm_cvtsi128_si32(_mm_packus_epi16(row, row));
- dst += stride;
- rep = _mm_add_epi16(rep, one);
- }
-}
-
-void aom_paeth_predictor_4x16_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- __m128i l = _mm_load_si128((const __m128i *)left);
- const __m128i t = _mm_cvtsi32_si128(((const uint32_t *)above)[0]);
- const __m128i zero = _mm_setzero_si128();
- const __m128i t16 = _mm_unpacklo_epi8(t, zero);
- const __m128i tl16 = _mm_set1_epi16((uint16_t)above[-1]);
- __m128i rep = _mm_set1_epi16(0x8000);
- const __m128i one = _mm_set1_epi16(1);
-
- for (int i = 0; i < 16; ++i) {
- const __m128i l16 = _mm_shuffle_epi8(l, rep);
- const __m128i row = paeth_8x1_pred(&l16, &t16, &tl16);
-
- *(uint32_t *)dst = _mm_cvtsi128_si32(_mm_packus_epi16(row, row));
- dst += stride;
- rep = _mm_add_epi16(rep, one);
- }
-}
-
-void aom_paeth_predictor_8x4_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- __m128i l = _mm_loadl_epi64((const __m128i *)left);
- const __m128i t = _mm_loadl_epi64((const __m128i *)above);
- const __m128i zero = _mm_setzero_si128();
- const __m128i t16 = _mm_unpacklo_epi8(t, zero);
- const __m128i tl16 = _mm_set1_epi16((uint16_t)above[-1]);
- __m128i rep = _mm_set1_epi16(0x8000);
- const __m128i one = _mm_set1_epi16(1);
-
- int i;
- for (i = 0; i < 4; ++i) {
- const __m128i l16 = _mm_shuffle_epi8(l, rep);
- const __m128i row = paeth_8x1_pred(&l16, &t16, &tl16);
-
- _mm_storel_epi64((__m128i *)dst, _mm_packus_epi16(row, row));
- dst += stride;
- rep = _mm_add_epi16(rep, one);
- }
-}
-
-void aom_paeth_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- __m128i l = _mm_loadl_epi64((const __m128i *)left);
- const __m128i t = _mm_loadl_epi64((const __m128i *)above);
- const __m128i zero = _mm_setzero_si128();
- const __m128i t16 = _mm_unpacklo_epi8(t, zero);
- const __m128i tl16 = _mm_set1_epi16((uint16_t)above[-1]);
- __m128i rep = _mm_set1_epi16(0x8000);
- const __m128i one = _mm_set1_epi16(1);
-
- int i;
- for (i = 0; i < 8; ++i) {
- const __m128i l16 = _mm_shuffle_epi8(l, rep);
- const __m128i row = paeth_8x1_pred(&l16, &t16, &tl16);
-
- _mm_storel_epi64((__m128i *)dst, _mm_packus_epi16(row, row));
- dst += stride;
- rep = _mm_add_epi16(rep, one);
- }
-}
-
-void aom_paeth_predictor_8x16_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- __m128i l = _mm_load_si128((const __m128i *)left);
- const __m128i t = _mm_loadl_epi64((const __m128i *)above);
- const __m128i zero = _mm_setzero_si128();
- const __m128i t16 = _mm_unpacklo_epi8(t, zero);
- const __m128i tl16 = _mm_set1_epi16((uint16_t)above[-1]);
- __m128i rep = _mm_set1_epi16(0x8000);
- const __m128i one = _mm_set1_epi16(1);
-
- int i;
- for (i = 0; i < 16; ++i) {
- const __m128i l16 = _mm_shuffle_epi8(l, rep);
- const __m128i row = paeth_8x1_pred(&l16, &t16, &tl16);
-
- _mm_storel_epi64((__m128i *)dst, _mm_packus_epi16(row, row));
- dst += stride;
- rep = _mm_add_epi16(rep, one);
- }
-}
-
-void aom_paeth_predictor_8x32_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- const __m128i t = _mm_loadl_epi64((const __m128i *)above);
- const __m128i zero = _mm_setzero_si128();
- const __m128i t16 = _mm_unpacklo_epi8(t, zero);
- const __m128i tl16 = _mm_set1_epi16((uint16_t)above[-1]);
- const __m128i one = _mm_set1_epi16(1);
-
- for (int j = 0; j < 2; ++j) {
- const __m128i l = _mm_load_si128((const __m128i *)(left + j * 16));
- __m128i rep = _mm_set1_epi16(0x8000);
- for (int i = 0; i < 16; ++i) {
- const __m128i l16 = _mm_shuffle_epi8(l, rep);
- const __m128i row = paeth_8x1_pred(&l16, &t16, &tl16);
-
- _mm_storel_epi64((__m128i *)dst, _mm_packus_epi16(row, row));
- dst += stride;
- rep = _mm_add_epi16(rep, one);
- }
- }
-}
-
-// Return 16 8-bit pixels in one row
-static INLINE __m128i paeth_16x1_pred(const __m128i *left, const __m128i *top0,
- const __m128i *top1,
- const __m128i *topleft) {
- const __m128i p0 = paeth_8x1_pred(left, top0, topleft);
- const __m128i p1 = paeth_8x1_pred(left, top1, topleft);
- return _mm_packus_epi16(p0, p1);
-}
-
-void aom_paeth_predictor_16x4_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- __m128i l = _mm_cvtsi32_si128(((const uint32_t *)left)[0]);
- const __m128i t = _mm_load_si128((const __m128i *)above);
- const __m128i zero = _mm_setzero_si128();
- const __m128i top0 = _mm_unpacklo_epi8(t, zero);
- const __m128i top1 = _mm_unpackhi_epi8(t, zero);
- const __m128i tl16 = _mm_set1_epi16((uint16_t)above[-1]);
- __m128i rep = _mm_set1_epi16(0x8000);
- const __m128i one = _mm_set1_epi16(1);
-
- for (int i = 0; i < 4; ++i) {
- const __m128i l16 = _mm_shuffle_epi8(l, rep);
- const __m128i row = paeth_16x1_pred(&l16, &top0, &top1, &tl16);
-
- _mm_store_si128((__m128i *)dst, row);
- dst += stride;
- rep = _mm_add_epi16(rep, one);
- }
-}
-
-void aom_paeth_predictor_16x8_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- __m128i l = _mm_loadl_epi64((const __m128i *)left);
- const __m128i t = _mm_load_si128((const __m128i *)above);
- const __m128i zero = _mm_setzero_si128();
- const __m128i top0 = _mm_unpacklo_epi8(t, zero);
- const __m128i top1 = _mm_unpackhi_epi8(t, zero);
- const __m128i tl16 = _mm_set1_epi16((uint16_t)above[-1]);
- __m128i rep = _mm_set1_epi16(0x8000);
- const __m128i one = _mm_set1_epi16(1);
-
- int i;
- for (i = 0; i < 8; ++i) {
- const __m128i l16 = _mm_shuffle_epi8(l, rep);
- const __m128i row = paeth_16x1_pred(&l16, &top0, &top1, &tl16);
-
- _mm_store_si128((__m128i *)dst, row);
- dst += stride;
- rep = _mm_add_epi16(rep, one);
- }
-}
-
-void aom_paeth_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- __m128i l = _mm_load_si128((const __m128i *)left);
- const __m128i t = _mm_load_si128((const __m128i *)above);
- const __m128i zero = _mm_setzero_si128();
- const __m128i top0 = _mm_unpacklo_epi8(t, zero);
- const __m128i top1 = _mm_unpackhi_epi8(t, zero);
- const __m128i tl16 = _mm_set1_epi16((uint16_t)above[-1]);
- __m128i rep = _mm_set1_epi16(0x8000);
- const __m128i one = _mm_set1_epi16(1);
-
- int i;
- for (i = 0; i < 16; ++i) {
- const __m128i l16 = _mm_shuffle_epi8(l, rep);
- const __m128i row = paeth_16x1_pred(&l16, &top0, &top1, &tl16);
-
- _mm_store_si128((__m128i *)dst, row);
- dst += stride;
- rep = _mm_add_epi16(rep, one);
- }
-}
-
-void aom_paeth_predictor_16x32_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- __m128i l = _mm_load_si128((const __m128i *)left);
- const __m128i t = _mm_load_si128((const __m128i *)above);
- const __m128i zero = _mm_setzero_si128();
- const __m128i top0 = _mm_unpacklo_epi8(t, zero);
- const __m128i top1 = _mm_unpackhi_epi8(t, zero);
- const __m128i tl16 = _mm_set1_epi16((uint16_t)above[-1]);
- __m128i rep = _mm_set1_epi16(0x8000);
- const __m128i one = _mm_set1_epi16(1);
- __m128i l16;
-
- int i;
- for (i = 0; i < 16; ++i) {
- l16 = _mm_shuffle_epi8(l, rep);
- const __m128i row = paeth_16x1_pred(&l16, &top0, &top1, &tl16);
-
- _mm_store_si128((__m128i *)dst, row);
- dst += stride;
- rep = _mm_add_epi16(rep, one);
- }
-
- l = _mm_load_si128((const __m128i *)(left + 16));
- rep = _mm_set1_epi16(0x8000);
- for (i = 0; i < 16; ++i) {
- l16 = _mm_shuffle_epi8(l, rep);
- const __m128i row = paeth_16x1_pred(&l16, &top0, &top1, &tl16);
-
- _mm_store_si128((__m128i *)dst, row);
- dst += stride;
- rep = _mm_add_epi16(rep, one);
- }
-}
-
-void aom_paeth_predictor_16x64_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- const __m128i t = _mm_load_si128((const __m128i *)above);
- const __m128i zero = _mm_setzero_si128();
- const __m128i top0 = _mm_unpacklo_epi8(t, zero);
- const __m128i top1 = _mm_unpackhi_epi8(t, zero);
- const __m128i tl16 = _mm_set1_epi16((uint16_t)above[-1]);
- const __m128i one = _mm_set1_epi16(1);
-
- for (int j = 0; j < 4; ++j) {
- const __m128i l = _mm_load_si128((const __m128i *)(left + j * 16));
- __m128i rep = _mm_set1_epi16(0x8000);
- for (int i = 0; i < 16; ++i) {
- const __m128i l16 = _mm_shuffle_epi8(l, rep);
- const __m128i row = paeth_16x1_pred(&l16, &top0, &top1, &tl16);
- _mm_store_si128((__m128i *)dst, row);
- dst += stride;
- rep = _mm_add_epi16(rep, one);
- }
- }
-}
-
-void aom_paeth_predictor_32x8_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- const __m128i a = _mm_load_si128((const __m128i *)above);
- const __m128i b = _mm_load_si128((const __m128i *)(above + 16));
- const __m128i zero = _mm_setzero_si128();
- const __m128i al = _mm_unpacklo_epi8(a, zero);
- const __m128i ah = _mm_unpackhi_epi8(a, zero);
- const __m128i bl = _mm_unpacklo_epi8(b, zero);
- const __m128i bh = _mm_unpackhi_epi8(b, zero);
-
- const __m128i tl16 = _mm_set1_epi16((uint16_t)above[-1]);
- __m128i rep = _mm_set1_epi16(0x8000);
- const __m128i one = _mm_set1_epi16(1);
- const __m128i l = _mm_loadl_epi64((const __m128i *)left);
- __m128i l16;
-
- for (int i = 0; i < 8; ++i) {
- l16 = _mm_shuffle_epi8(l, rep);
- const __m128i r32l = paeth_16x1_pred(&l16, &al, &ah, &tl16);
- const __m128i r32h = paeth_16x1_pred(&l16, &bl, &bh, &tl16);
-
- _mm_store_si128((__m128i *)dst, r32l);
- _mm_store_si128((__m128i *)(dst + 16), r32h);
- dst += stride;
- rep = _mm_add_epi16(rep, one);
- }
-}
-
-void aom_paeth_predictor_32x16_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- const __m128i a = _mm_load_si128((const __m128i *)above);
- const __m128i b = _mm_load_si128((const __m128i *)(above + 16));
- const __m128i zero = _mm_setzero_si128();
- const __m128i al = _mm_unpacklo_epi8(a, zero);
- const __m128i ah = _mm_unpackhi_epi8(a, zero);
- const __m128i bl = _mm_unpacklo_epi8(b, zero);
- const __m128i bh = _mm_unpackhi_epi8(b, zero);
-
- const __m128i tl16 = _mm_set1_epi16((uint16_t)above[-1]);
- __m128i rep = _mm_set1_epi16(0x8000);
- const __m128i one = _mm_set1_epi16(1);
- __m128i l = _mm_load_si128((const __m128i *)left);
- __m128i l16;
-
- int i;
- for (i = 0; i < 16; ++i) {
- l16 = _mm_shuffle_epi8(l, rep);
- const __m128i r32l = paeth_16x1_pred(&l16, &al, &ah, &tl16);
- const __m128i r32h = paeth_16x1_pred(&l16, &bl, &bh, &tl16);
-
- _mm_store_si128((__m128i *)dst, r32l);
- _mm_store_si128((__m128i *)(dst + 16), r32h);
- dst += stride;
- rep = _mm_add_epi16(rep, one);
- }
-}
-
-void aom_paeth_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- const __m128i a = _mm_load_si128((const __m128i *)above);
- const __m128i b = _mm_load_si128((const __m128i *)(above + 16));
- const __m128i zero = _mm_setzero_si128();
- const __m128i al = _mm_unpacklo_epi8(a, zero);
- const __m128i ah = _mm_unpackhi_epi8(a, zero);
- const __m128i bl = _mm_unpacklo_epi8(b, zero);
- const __m128i bh = _mm_unpackhi_epi8(b, zero);
-
- const __m128i tl16 = _mm_set1_epi16((uint16_t)above[-1]);
- __m128i rep = _mm_set1_epi16(0x8000);
- const __m128i one = _mm_set1_epi16(1);
- __m128i l = _mm_load_si128((const __m128i *)left);
- __m128i l16;
-
- int i;
- for (i = 0; i < 16; ++i) {
- l16 = _mm_shuffle_epi8(l, rep);
- const __m128i r32l = paeth_16x1_pred(&l16, &al, &ah, &tl16);
- const __m128i r32h = paeth_16x1_pred(&l16, &bl, &bh, &tl16);
-
- _mm_store_si128((__m128i *)dst, r32l);
- _mm_store_si128((__m128i *)(dst + 16), r32h);
- dst += stride;
- rep = _mm_add_epi16(rep, one);
- }
-
- rep = _mm_set1_epi16(0x8000);
- l = _mm_load_si128((const __m128i *)(left + 16));
- for (i = 0; i < 16; ++i) {
- l16 = _mm_shuffle_epi8(l, rep);
- const __m128i r32l = paeth_16x1_pred(&l16, &al, &ah, &tl16);
- const __m128i r32h = paeth_16x1_pred(&l16, &bl, &bh, &tl16);
-
- _mm_store_si128((__m128i *)dst, r32l);
- _mm_store_si128((__m128i *)(dst + 16), r32h);
- dst += stride;
- rep = _mm_add_epi16(rep, one);
- }
-}
-
-void aom_paeth_predictor_32x64_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- const __m128i a = _mm_load_si128((const __m128i *)above);
- const __m128i b = _mm_load_si128((const __m128i *)(above + 16));
- const __m128i zero = _mm_setzero_si128();
- const __m128i al = _mm_unpacklo_epi8(a, zero);
- const __m128i ah = _mm_unpackhi_epi8(a, zero);
- const __m128i bl = _mm_unpacklo_epi8(b, zero);
- const __m128i bh = _mm_unpackhi_epi8(b, zero);
-
- const __m128i tl16 = _mm_set1_epi16((uint16_t)above[-1]);
- const __m128i one = _mm_set1_epi16(1);
- __m128i l16;
-
- int i, j;
- for (j = 0; j < 4; ++j) {
- const __m128i l = _mm_load_si128((const __m128i *)(left + j * 16));
- __m128i rep = _mm_set1_epi16(0x8000);
- for (i = 0; i < 16; ++i) {
- l16 = _mm_shuffle_epi8(l, rep);
- const __m128i r32l = paeth_16x1_pred(&l16, &al, &ah, &tl16);
- const __m128i r32h = paeth_16x1_pred(&l16, &bl, &bh, &tl16);
-
- _mm_store_si128((__m128i *)dst, r32l);
- _mm_store_si128((__m128i *)(dst + 16), r32h);
- dst += stride;
- rep = _mm_add_epi16(rep, one);
- }
- }
-}
-
-void aom_paeth_predictor_64x32_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- const __m128i a = _mm_load_si128((const __m128i *)above);
- const __m128i b = _mm_load_si128((const __m128i *)(above + 16));
- const __m128i c = _mm_load_si128((const __m128i *)(above + 32));
- const __m128i d = _mm_load_si128((const __m128i *)(above + 48));
- const __m128i zero = _mm_setzero_si128();
- const __m128i al = _mm_unpacklo_epi8(a, zero);
- const __m128i ah = _mm_unpackhi_epi8(a, zero);
- const __m128i bl = _mm_unpacklo_epi8(b, zero);
- const __m128i bh = _mm_unpackhi_epi8(b, zero);
- const __m128i cl = _mm_unpacklo_epi8(c, zero);
- const __m128i ch = _mm_unpackhi_epi8(c, zero);
- const __m128i dl = _mm_unpacklo_epi8(d, zero);
- const __m128i dh = _mm_unpackhi_epi8(d, zero);
-
- const __m128i tl16 = _mm_set1_epi16((uint16_t)above[-1]);
- const __m128i one = _mm_set1_epi16(1);
- __m128i l16;
-
- int i, j;
- for (j = 0; j < 2; ++j) {
- const __m128i l = _mm_load_si128((const __m128i *)(left + j * 16));
- __m128i rep = _mm_set1_epi16(0x8000);
- for (i = 0; i < 16; ++i) {
- l16 = _mm_shuffle_epi8(l, rep);
- const __m128i r0 = paeth_16x1_pred(&l16, &al, &ah, &tl16);
- const __m128i r1 = paeth_16x1_pred(&l16, &bl, &bh, &tl16);
- const __m128i r2 = paeth_16x1_pred(&l16, &cl, &ch, &tl16);
- const __m128i r3 = paeth_16x1_pred(&l16, &dl, &dh, &tl16);
-
- _mm_store_si128((__m128i *)dst, r0);
- _mm_store_si128((__m128i *)(dst + 16), r1);
- _mm_store_si128((__m128i *)(dst + 32), r2);
- _mm_store_si128((__m128i *)(dst + 48), r3);
- dst += stride;
- rep = _mm_add_epi16(rep, one);
- }
- }
-}
-
-void aom_paeth_predictor_64x64_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- const __m128i a = _mm_load_si128((const __m128i *)above);
- const __m128i b = _mm_load_si128((const __m128i *)(above + 16));
- const __m128i c = _mm_load_si128((const __m128i *)(above + 32));
- const __m128i d = _mm_load_si128((const __m128i *)(above + 48));
- const __m128i zero = _mm_setzero_si128();
- const __m128i al = _mm_unpacklo_epi8(a, zero);
- const __m128i ah = _mm_unpackhi_epi8(a, zero);
- const __m128i bl = _mm_unpacklo_epi8(b, zero);
- const __m128i bh = _mm_unpackhi_epi8(b, zero);
- const __m128i cl = _mm_unpacklo_epi8(c, zero);
- const __m128i ch = _mm_unpackhi_epi8(c, zero);
- const __m128i dl = _mm_unpacklo_epi8(d, zero);
- const __m128i dh = _mm_unpackhi_epi8(d, zero);
-
- const __m128i tl16 = _mm_set1_epi16((uint16_t)above[-1]);
- const __m128i one = _mm_set1_epi16(1);
- __m128i l16;
-
- int i, j;
- for (j = 0; j < 4; ++j) {
- const __m128i l = _mm_load_si128((const __m128i *)(left + j * 16));
- __m128i rep = _mm_set1_epi16(0x8000);
- for (i = 0; i < 16; ++i) {
- l16 = _mm_shuffle_epi8(l, rep);
- const __m128i r0 = paeth_16x1_pred(&l16, &al, &ah, &tl16);
- const __m128i r1 = paeth_16x1_pred(&l16, &bl, &bh, &tl16);
- const __m128i r2 = paeth_16x1_pred(&l16, &cl, &ch, &tl16);
- const __m128i r3 = paeth_16x1_pred(&l16, &dl, &dh, &tl16);
-
- _mm_store_si128((__m128i *)dst, r0);
- _mm_store_si128((__m128i *)(dst + 16), r1);
- _mm_store_si128((__m128i *)(dst + 32), r2);
- _mm_store_si128((__m128i *)(dst + 48), r3);
- dst += stride;
- rep = _mm_add_epi16(rep, one);
- }
- }
-}
-
-void aom_paeth_predictor_64x16_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- const __m128i a = _mm_load_si128((const __m128i *)above);
- const __m128i b = _mm_load_si128((const __m128i *)(above + 16));
- const __m128i c = _mm_load_si128((const __m128i *)(above + 32));
- const __m128i d = _mm_load_si128((const __m128i *)(above + 48));
- const __m128i zero = _mm_setzero_si128();
- const __m128i al = _mm_unpacklo_epi8(a, zero);
- const __m128i ah = _mm_unpackhi_epi8(a, zero);
- const __m128i bl = _mm_unpacklo_epi8(b, zero);
- const __m128i bh = _mm_unpackhi_epi8(b, zero);
- const __m128i cl = _mm_unpacklo_epi8(c, zero);
- const __m128i ch = _mm_unpackhi_epi8(c, zero);
- const __m128i dl = _mm_unpacklo_epi8(d, zero);
- const __m128i dh = _mm_unpackhi_epi8(d, zero);
-
- const __m128i tl16 = _mm_set1_epi16((uint16_t)above[-1]);
- const __m128i one = _mm_set1_epi16(1);
- __m128i l16;
-
- int i;
- const __m128i l = _mm_load_si128((const __m128i *)left);
- __m128i rep = _mm_set1_epi16(0x8000);
- for (i = 0; i < 16; ++i) {
- l16 = _mm_shuffle_epi8(l, rep);
- const __m128i r0 = paeth_16x1_pred(&l16, &al, &ah, &tl16);
- const __m128i r1 = paeth_16x1_pred(&l16, &bl, &bh, &tl16);
- const __m128i r2 = paeth_16x1_pred(&l16, &cl, &ch, &tl16);
- const __m128i r3 = paeth_16x1_pred(&l16, &dl, &dh, &tl16);
-
- _mm_store_si128((__m128i *)dst, r0);
- _mm_store_si128((__m128i *)(dst + 16), r1);
- _mm_store_si128((__m128i *)(dst + 32), r2);
- _mm_store_si128((__m128i *)(dst + 48), r3);
- dst += stride;
- rep = _mm_add_epi16(rep, one);
- }
-}
-
-// -----------------------------------------------------------------------------
-// SMOOTH_PRED
-
-// pixels[0]: above and below_pred interleave vector
-// pixels[1]: left vector
-// pixels[2]: right_pred vector
-static INLINE void load_pixel_w4(const uint8_t *above, const uint8_t *left,
- int height, __m128i *pixels) {
- __m128i d = _mm_cvtsi32_si128(((const uint32_t *)above)[0]);
- if (height == 4)
- pixels[1] = _mm_cvtsi32_si128(((const uint32_t *)left)[0]);
- else if (height == 8)
- pixels[1] = _mm_loadl_epi64(((const __m128i *)left));
- else
- pixels[1] = _mm_loadu_si128(((const __m128i *)left));
-
- pixels[2] = _mm_set1_epi16((uint16_t)above[3]);
-
- const __m128i bp = _mm_set1_epi16((uint16_t)left[height - 1]);
- const __m128i zero = _mm_setzero_si128();
- d = _mm_unpacklo_epi8(d, zero);
- pixels[0] = _mm_unpacklo_epi16(d, bp);
-}
-
-// weight_h[0]: weight_h vector
-// weight_h[1]: scale - weight_h vector
-// weight_h[2]: same as [0], second half for height = 16 only
-// weight_h[3]: same as [1], second half for height = 16 only
-// weight_w[0]: weights_w and scale - weights_w interleave vector
-static INLINE void load_weight_w4(const uint8_t *weight_array, int height,
- __m128i *weight_h, __m128i *weight_w) {
- const __m128i zero = _mm_setzero_si128();
- const __m128i d = _mm_set1_epi16((uint16_t)(1 << sm_weight_log2_scale));
- const __m128i t = _mm_cvtsi32_si128(((const uint32_t *)weight_array)[1]);
- weight_h[0] = _mm_unpacklo_epi8(t, zero);
- weight_h[1] = _mm_sub_epi16(d, weight_h[0]);
- weight_w[0] = _mm_unpacklo_epi16(weight_h[0], weight_h[1]);
-
- if (height == 8) {
- const __m128i weight = _mm_loadl_epi64((const __m128i *)&weight_array[8]);
- weight_h[0] = _mm_unpacklo_epi8(weight, zero);
- weight_h[1] = _mm_sub_epi16(d, weight_h[0]);
- } else if (height == 16) {
- const __m128i weight = _mm_loadu_si128((const __m128i *)&weight_array[16]);
- weight_h[0] = _mm_unpacklo_epi8(weight, zero);
- weight_h[1] = _mm_sub_epi16(d, weight_h[0]);
- weight_h[2] = _mm_unpackhi_epi8(weight, zero);
- weight_h[3] = _mm_sub_epi16(d, weight_h[2]);
- }
-}
-
-static INLINE void smooth_pred_4xh(const __m128i *pixel, const __m128i *wh,
- const __m128i *ww, int h, uint8_t *dst,
- ptrdiff_t stride, int second_half) {
- const __m128i round = _mm_set1_epi32((1 << sm_weight_log2_scale));
- const __m128i one = _mm_set1_epi16(1);
- const __m128i inc = _mm_set1_epi16(0x202);
- const __m128i gat = _mm_set1_epi32(0xc080400);
- __m128i rep = second_half ? _mm_set1_epi16(0x8008) : _mm_set1_epi16(0x8000);
- __m128i d = _mm_set1_epi16(0x100);
-
- for (int i = 0; i < h; ++i) {
- const __m128i wg_wg = _mm_shuffle_epi8(wh[0], d);
- const __m128i sc_sc = _mm_shuffle_epi8(wh[1], d);
- const __m128i wh_sc = _mm_unpacklo_epi16(wg_wg, sc_sc);
- __m128i s = _mm_madd_epi16(pixel[0], wh_sc);
-
- __m128i b = _mm_shuffle_epi8(pixel[1], rep);
- b = _mm_unpacklo_epi16(b, pixel[2]);
- __m128i sum = _mm_madd_epi16(b, ww[0]);
-
- sum = _mm_add_epi32(s, sum);
- sum = _mm_add_epi32(sum, round);
- sum = _mm_srai_epi32(sum, 1 + sm_weight_log2_scale);
-
- sum = _mm_shuffle_epi8(sum, gat);
- *(uint32_t *)dst = _mm_cvtsi128_si32(sum);
- dst += stride;
-
- rep = _mm_add_epi16(rep, one);
- d = _mm_add_epi16(d, inc);
- }
-}
-
-void aom_smooth_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- __m128i pixels[3];
- load_pixel_w4(above, left, 4, pixels);
-
- __m128i wh[4], ww[2];
- load_weight_w4(sm_weight_arrays, 4, wh, ww);
-
- smooth_pred_4xh(pixels, wh, ww, 4, dst, stride, 0);
-}
-
-void aom_smooth_predictor_4x8_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- __m128i pixels[3];
- load_pixel_w4(above, left, 8, pixels);
-
- __m128i wh[4], ww[2];
- load_weight_w4(sm_weight_arrays, 8, wh, ww);
-
- smooth_pred_4xh(pixels, wh, ww, 8, dst, stride, 0);
-}
-
-void aom_smooth_predictor_4x16_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- __m128i pixels[3];
- load_pixel_w4(above, left, 16, pixels);
-
- __m128i wh[4], ww[2];
- load_weight_w4(sm_weight_arrays, 16, wh, ww);
-
- smooth_pred_4xh(pixels, wh, ww, 8, dst, stride, 0);
- dst += stride << 3;
- smooth_pred_4xh(pixels, &wh[2], ww, 8, dst, stride, 1);
-}
-
-// pixels[0]: above and below_pred interleave vector, first half
-// pixels[1]: above and below_pred interleave vector, second half
-// pixels[2]: left vector
-// pixels[3]: right_pred vector
-// pixels[4]: above and below_pred interleave vector, first half
-// pixels[5]: above and below_pred interleave vector, second half
-// pixels[6]: left vector + 16
-// pixels[7]: right_pred vector
-static INLINE void load_pixel_w8(const uint8_t *above, const uint8_t *left,
- int height, __m128i *pixels) {
- const __m128i zero = _mm_setzero_si128();
- const __m128i bp = _mm_set1_epi16((uint16_t)left[height - 1]);
- __m128i d = _mm_loadl_epi64((const __m128i *)above);
- d = _mm_unpacklo_epi8(d, zero);
- pixels[0] = _mm_unpacklo_epi16(d, bp);
- pixels[1] = _mm_unpackhi_epi16(d, bp);
-
- pixels[3] = _mm_set1_epi16((uint16_t)above[7]);
-
- if (height == 4) {
- pixels[2] = _mm_cvtsi32_si128(((const uint32_t *)left)[0]);
- } else if (height == 8) {
- pixels[2] = _mm_loadl_epi64((const __m128i *)left);
- } else if (height == 16) {
- pixels[2] = _mm_load_si128((const __m128i *)left);
- } else {
- pixels[2] = _mm_load_si128((const __m128i *)left);
- pixels[4] = pixels[0];
- pixels[5] = pixels[1];
- pixels[6] = _mm_load_si128((const __m128i *)(left + 16));
- pixels[7] = pixels[3];
- }
-}
-
-// weight_h[0]: weight_h vector
-// weight_h[1]: scale - weight_h vector
-// weight_h[2]: same as [0], offset 8
-// weight_h[3]: same as [1], offset 8
-// weight_h[4]: same as [0], offset 16
-// weight_h[5]: same as [1], offset 16
-// weight_h[6]: same as [0], offset 24
-// weight_h[7]: same as [1], offset 24
-// weight_w[0]: weights_w and scale - weights_w interleave vector, first half
-// weight_w[1]: weights_w and scale - weights_w interleave vector, second half
-static INLINE void load_weight_w8(const uint8_t *weight_array, int height,
- __m128i *weight_h, __m128i *weight_w) {
- const __m128i zero = _mm_setzero_si128();
- const int we_offset = height < 8 ? 4 : 8;
- __m128i we = _mm_loadu_si128((const __m128i *)&weight_array[we_offset]);
- weight_h[0] = _mm_unpacklo_epi8(we, zero);
- const __m128i d = _mm_set1_epi16((uint16_t)(1 << sm_weight_log2_scale));
- weight_h[1] = _mm_sub_epi16(d, weight_h[0]);
-
- if (height == 4) {
- we = _mm_srli_si128(we, 4);
- __m128i tmp1 = _mm_unpacklo_epi8(we, zero);
- __m128i tmp2 = _mm_sub_epi16(d, tmp1);
- weight_w[0] = _mm_unpacklo_epi16(tmp1, tmp2);
- weight_w[1] = _mm_unpackhi_epi16(tmp1, tmp2);
- } else {
- weight_w[0] = _mm_unpacklo_epi16(weight_h[0], weight_h[1]);
- weight_w[1] = _mm_unpackhi_epi16(weight_h[0], weight_h[1]);
- }
-
- if (height == 16) {
- we = _mm_loadu_si128((const __m128i *)&weight_array[16]);
- weight_h[0] = _mm_unpacklo_epi8(we, zero);
- weight_h[1] = _mm_sub_epi16(d, weight_h[0]);
- weight_h[2] = _mm_unpackhi_epi8(we, zero);
- weight_h[3] = _mm_sub_epi16(d, weight_h[2]);
- } else if (height == 32) {
- const __m128i weight_lo =
- _mm_loadu_si128((const __m128i *)&weight_array[32]);
- weight_h[0] = _mm_unpacklo_epi8(weight_lo, zero);
- weight_h[1] = _mm_sub_epi16(d, weight_h[0]);
- weight_h[2] = _mm_unpackhi_epi8(weight_lo, zero);
- weight_h[3] = _mm_sub_epi16(d, weight_h[2]);
- const __m128i weight_hi =
- _mm_loadu_si128((const __m128i *)&weight_array[32 + 16]);
- weight_h[4] = _mm_unpacklo_epi8(weight_hi, zero);
- weight_h[5] = _mm_sub_epi16(d, weight_h[4]);
- weight_h[6] = _mm_unpackhi_epi8(weight_hi, zero);
- weight_h[7] = _mm_sub_epi16(d, weight_h[6]);
- }
-}
-
-static INLINE void smooth_pred_8xh(const __m128i *pixels, const __m128i *wh,
- const __m128i *ww, int h, uint8_t *dst,
- ptrdiff_t stride, int second_half) {
- const __m128i round = _mm_set1_epi32((1 << sm_weight_log2_scale));
- const __m128i one = _mm_set1_epi16(1);
- const __m128i inc = _mm_set1_epi16(0x202);
- const __m128i gat = _mm_set_epi32(0, 0, 0xe0c0a08, 0x6040200);
-
- __m128i rep = second_half ? _mm_set1_epi16(0x8008) : _mm_set1_epi16(0x8000);
- __m128i d = _mm_set1_epi16(0x100);
-
- int i;
- for (i = 0; i < h; ++i) {
- const __m128i wg_wg = _mm_shuffle_epi8(wh[0], d);
- const __m128i sc_sc = _mm_shuffle_epi8(wh[1], d);
- const __m128i wh_sc = _mm_unpacklo_epi16(wg_wg, sc_sc);
- __m128i s0 = _mm_madd_epi16(pixels[0], wh_sc);
- __m128i s1 = _mm_madd_epi16(pixels[1], wh_sc);
-
- __m128i b = _mm_shuffle_epi8(pixels[2], rep);
- b = _mm_unpacklo_epi16(b, pixels[3]);
- __m128i sum0 = _mm_madd_epi16(b, ww[0]);
- __m128i sum1 = _mm_madd_epi16(b, ww[1]);
-
- s0 = _mm_add_epi32(s0, sum0);
- s0 = _mm_add_epi32(s0, round);
- s0 = _mm_srai_epi32(s0, 1 + sm_weight_log2_scale);
-
- s1 = _mm_add_epi32(s1, sum1);
- s1 = _mm_add_epi32(s1, round);
- s1 = _mm_srai_epi32(s1, 1 + sm_weight_log2_scale);
-
- sum0 = _mm_packus_epi16(s0, s1);
- sum0 = _mm_shuffle_epi8(sum0, gat);
- _mm_storel_epi64((__m128i *)dst, sum0);
- dst += stride;
-
- rep = _mm_add_epi16(rep, one);
- d = _mm_add_epi16(d, inc);
- }
-}
-
-void aom_smooth_predictor_8x4_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- __m128i pixels[4];
- load_pixel_w8(above, left, 4, pixels);
-
- __m128i wh[4], ww[2];
- load_weight_w8(sm_weight_arrays, 4, wh, ww);
-
- smooth_pred_8xh(pixels, wh, ww, 4, dst, stride, 0);
-}
-
-void aom_smooth_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- __m128i pixels[4];
- load_pixel_w8(above, left, 8, pixels);
-
- __m128i wh[4], ww[2];
- load_weight_w8(sm_weight_arrays, 8, wh, ww);
-
- smooth_pred_8xh(pixels, wh, ww, 8, dst, stride, 0);
-}
-
-void aom_smooth_predictor_8x16_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- __m128i pixels[4];
- load_pixel_w8(above, left, 16, pixels);
-
- __m128i wh[4], ww[2];
- load_weight_w8(sm_weight_arrays, 16, wh, ww);
-
- smooth_pred_8xh(pixels, wh, ww, 8, dst, stride, 0);
- dst += stride << 3;
- smooth_pred_8xh(pixels, &wh[2], ww, 8, dst, stride, 1);
-}
-
-void aom_smooth_predictor_8x32_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- __m128i pixels[8];
- load_pixel_w8(above, left, 32, pixels);
-
- __m128i wh[8], ww[2];
- load_weight_w8(sm_weight_arrays, 32, wh, ww);
-
- smooth_pred_8xh(&pixels[0], wh, ww, 8, dst, stride, 0);
- dst += stride << 3;
- smooth_pred_8xh(&pixels[0], &wh[2], ww, 8, dst, stride, 1);
- dst += stride << 3;
- smooth_pred_8xh(&pixels[4], &wh[4], ww, 8, dst, stride, 0);
- dst += stride << 3;
- smooth_pred_8xh(&pixels[4], &wh[6], ww, 8, dst, stride, 1);
-}
-
-static INLINE void smooth_predictor_wxh(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left, uint32_t bw,
- uint32_t bh) {
- const uint8_t *const sm_weights_w = sm_weight_arrays + bw;
- const uint8_t *const sm_weights_h = sm_weight_arrays + bh;
- const __m128i zero = _mm_setzero_si128();
- const __m128i scale_value =
- _mm_set1_epi16((uint16_t)(1 << sm_weight_log2_scale));
- const __m128i bottom_left = _mm_cvtsi32_si128((uint32_t)left[bh - 1]);
- const __m128i dup16 = _mm_set1_epi32(0x01000100);
- const __m128i top_right =
- _mm_shuffle_epi8(_mm_cvtsi32_si128((uint32_t)above[bw - 1]), dup16);
- const __m128i gat = _mm_set_epi32(0, 0, 0xe0c0a08, 0x6040200);
- const __m128i round = _mm_set1_epi32((uint16_t)(1 << sm_weight_log2_scale));
-
- for (uint32_t y = 0; y < bh; ++y) {
- const __m128i weights_y = _mm_cvtsi32_si128((uint32_t)sm_weights_h[y]);
- const __m128i left_y = _mm_cvtsi32_si128((uint32_t)left[y]);
- const __m128i scale_m_weights_y = _mm_sub_epi16(scale_value, weights_y);
- __m128i pred_scaled_bl = _mm_mullo_epi16(scale_m_weights_y, bottom_left);
- const __m128i wl_y =
- _mm_shuffle_epi32(_mm_unpacklo_epi16(weights_y, left_y), 0);
- pred_scaled_bl = _mm_add_epi32(pred_scaled_bl, round);
- pred_scaled_bl = _mm_shuffle_epi32(pred_scaled_bl, 0);
-
- for (uint32_t x = 0; x < bw; x += 8) {
- const __m128i top_x = _mm_loadl_epi64((const __m128i *)(above + x));
- const __m128i weights_x =
- _mm_loadl_epi64((const __m128i *)(sm_weights_w + x));
- const __m128i tw_x = _mm_unpacklo_epi8(top_x, weights_x);
- const __m128i tw_x_lo = _mm_unpacklo_epi8(tw_x, zero);
- const __m128i tw_x_hi = _mm_unpackhi_epi8(tw_x, zero);
-
- __m128i pred_lo = _mm_madd_epi16(tw_x_lo, wl_y);
- __m128i pred_hi = _mm_madd_epi16(tw_x_hi, wl_y);
-
- const __m128i scale_m_weights_x =
- _mm_sub_epi16(scale_value, _mm_unpacklo_epi8(weights_x, zero));
- const __m128i swxtr = _mm_mullo_epi16(scale_m_weights_x, top_right);
- const __m128i swxtr_lo = _mm_unpacklo_epi16(swxtr, zero);
- const __m128i swxtr_hi = _mm_unpackhi_epi16(swxtr, zero);
-
- pred_lo = _mm_add_epi32(pred_lo, pred_scaled_bl);
- pred_hi = _mm_add_epi32(pred_hi, pred_scaled_bl);
-
- pred_lo = _mm_add_epi32(pred_lo, swxtr_lo);
- pred_hi = _mm_add_epi32(pred_hi, swxtr_hi);
-
- pred_lo = _mm_srai_epi32(pred_lo, (1 + sm_weight_log2_scale));
- pred_hi = _mm_srai_epi32(pred_hi, (1 + sm_weight_log2_scale));
-
- __m128i pred = _mm_packus_epi16(pred_lo, pred_hi);
- pred = _mm_shuffle_epi8(pred, gat);
- _mm_storel_epi64((__m128i *)(dst + x), pred);
- }
- dst += stride;
- }
-}
-
-void aom_smooth_predictor_16x4_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- smooth_predictor_wxh(dst, stride, above, left, 16, 4);
-}
-
-void aom_smooth_predictor_16x8_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- smooth_predictor_wxh(dst, stride, above, left, 16, 8);
-}
-
-void aom_smooth_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- smooth_predictor_wxh(dst, stride, above, left, 16, 16);
-}
-
-void aom_smooth_predictor_16x32_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- smooth_predictor_wxh(dst, stride, above, left, 16, 32);
-}
-
-void aom_smooth_predictor_32x8_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- smooth_predictor_wxh(dst, stride, above, left, 32, 8);
-}
-
-void aom_smooth_predictor_32x16_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- smooth_predictor_wxh(dst, stride, above, left, 32, 16);
-}
-
-void aom_smooth_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- smooth_predictor_wxh(dst, stride, above, left, 32, 32);
-}
-
-void aom_smooth_predictor_32x64_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- smooth_predictor_wxh(dst, stride, above, left, 32, 64);
-}
-
-void aom_smooth_predictor_64x64_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- smooth_predictor_wxh(dst, stride, above, left, 64, 64);
-}
-
-void aom_smooth_predictor_64x32_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- smooth_predictor_wxh(dst, stride, above, left, 64, 32);
-}
-
-void aom_smooth_predictor_64x16_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- smooth_predictor_wxh(dst, stride, above, left, 64, 16);
-}
-
-void aom_smooth_predictor_16x64_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- smooth_predictor_wxh(dst, stride, above, left, 16, 64);
-}
-
-// -----------------------------------------------------------------------------
-// SMOOTH_V_PRED
-
-// pixels[0]: above and below_pred interleave vector
-static INLINE void load_pixel_v_w4(const uint8_t *above, const uint8_t *left,
- int height, __m128i *pixels) {
- const __m128i zero = _mm_setzero_si128();
- __m128i d = _mm_cvtsi32_si128(((const uint32_t *)above)[0]);
- const __m128i bp = _mm_set1_epi16((uint16_t)left[height - 1]);
- d = _mm_unpacklo_epi8(d, zero);
- pixels[0] = _mm_unpacklo_epi16(d, bp);
-}
-
-// weights[0]: weights_h vector
-// weights[1]: scale - weights_h vector
-static INLINE void load_weight_v_w4(const uint8_t *weight_array, int height,
- __m128i *weights) {
- const __m128i zero = _mm_setzero_si128();
- const __m128i d = _mm_set1_epi16((uint16_t)(1 << sm_weight_log2_scale));
-
- if (height == 4) {
- const __m128i weight =
- _mm_cvtsi32_si128(((const uint32_t *)weight_array)[1]);
- weights[0] = _mm_unpacklo_epi8(weight, zero);
- weights[1] = _mm_sub_epi16(d, weights[0]);
- } else if (height == 8) {
- const __m128i weight = _mm_loadl_epi64((const __m128i *)&weight_array[8]);
- weights[0] = _mm_unpacklo_epi8(weight, zero);
- weights[1] = _mm_sub_epi16(d, weights[0]);
- } else {
- const __m128i weight = _mm_loadu_si128((const __m128i *)&weight_array[16]);
- weights[0] = _mm_unpacklo_epi8(weight, zero);
- weights[1] = _mm_sub_epi16(d, weights[0]);
- weights[2] = _mm_unpackhi_epi8(weight, zero);
- weights[3] = _mm_sub_epi16(d, weights[2]);
- }
-}
-
-static INLINE void smooth_v_pred_4xh(const __m128i *pixel,
- const __m128i *weight, int h, uint8_t *dst,
- ptrdiff_t stride) {
- const __m128i pred_round = _mm_set1_epi32((1 << (sm_weight_log2_scale - 1)));
- const __m128i inc = _mm_set1_epi16(0x202);
- const __m128i gat = _mm_set1_epi32(0xc080400);
- __m128i d = _mm_set1_epi16(0x100);
-
- for (int i = 0; i < h; ++i) {
- const __m128i wg_wg = _mm_shuffle_epi8(weight[0], d);
- const __m128i sc_sc = _mm_shuffle_epi8(weight[1], d);
- const __m128i wh_sc = _mm_unpacklo_epi16(wg_wg, sc_sc);
- __m128i sum = _mm_madd_epi16(pixel[0], wh_sc);
- sum = _mm_add_epi32(sum, pred_round);
- sum = _mm_srai_epi32(sum, sm_weight_log2_scale);
- sum = _mm_shuffle_epi8(sum, gat);
- *(uint32_t *)dst = _mm_cvtsi128_si32(sum);
- dst += stride;
- d = _mm_add_epi16(d, inc);
- }
-}
-
-void aom_smooth_v_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- __m128i pixels;
- load_pixel_v_w4(above, left, 4, &pixels);
-
- __m128i weights[2];
- load_weight_v_w4(sm_weight_arrays, 4, weights);
-
- smooth_v_pred_4xh(&pixels, weights, 4, dst, stride);
-}
-
-void aom_smooth_v_predictor_4x8_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- __m128i pixels;
- load_pixel_v_w4(above, left, 8, &pixels);
-
- __m128i weights[2];
- load_weight_v_w4(sm_weight_arrays, 8, weights);
-
- smooth_v_pred_4xh(&pixels, weights, 8, dst, stride);
-}
-
-void aom_smooth_v_predictor_4x16_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- __m128i pixels;
- load_pixel_v_w4(above, left, 16, &pixels);
-
- __m128i weights[4];
- load_weight_v_w4(sm_weight_arrays, 16, weights);
-
- smooth_v_pred_4xh(&pixels, weights, 8, dst, stride);
- dst += stride << 3;
- smooth_v_pred_4xh(&pixels, &weights[2], 8, dst, stride);
-}
-
-// pixels[0]: above and below_pred interleave vector, first half
-// pixels[1]: above and below_pred interleave vector, second half
-static INLINE void load_pixel_v_w8(const uint8_t *above, const uint8_t *left,
- int height, __m128i *pixels) {
- const __m128i zero = _mm_setzero_si128();
- __m128i d = _mm_loadl_epi64((const __m128i *)above);
- const __m128i bp = _mm_set1_epi16((uint16_t)left[height - 1]);
- d = _mm_unpacklo_epi8(d, zero);
- pixels[0] = _mm_unpacklo_epi16(d, bp);
- pixels[1] = _mm_unpackhi_epi16(d, bp);
-}
-
-// weight_h[0]: weight_h vector
-// weight_h[1]: scale - weight_h vector
-// weight_h[2]: same as [0], offset 8
-// weight_h[3]: same as [1], offset 8
-// weight_h[4]: same as [0], offset 16
-// weight_h[5]: same as [1], offset 16
-// weight_h[6]: same as [0], offset 24
-// weight_h[7]: same as [1], offset 24
-static INLINE void load_weight_v_w8(const uint8_t *weight_array, int height,
- __m128i *weight_h) {
- const __m128i zero = _mm_setzero_si128();
- const __m128i d = _mm_set1_epi16((uint16_t)(1 << sm_weight_log2_scale));
-
- if (height < 16) {
- const int offset = height < 8 ? 4 : 8;
- const __m128i weight =
- _mm_loadu_si128((const __m128i *)&weight_array[offset]);
- weight_h[0] = _mm_unpacklo_epi8(weight, zero);
- weight_h[1] = _mm_sub_epi16(d, weight_h[0]);
- } else if (height == 16) {
- const __m128i weight = _mm_loadu_si128((const __m128i *)&weight_array[16]);
- weight_h[0] = _mm_unpacklo_epi8(weight, zero);
- weight_h[1] = _mm_sub_epi16(d, weight_h[0]);
- weight_h[2] = _mm_unpackhi_epi8(weight, zero);
- weight_h[3] = _mm_sub_epi16(d, weight_h[2]);
- } else {
- const __m128i weight_lo =
- _mm_loadu_si128((const __m128i *)&weight_array[32]);
- weight_h[0] = _mm_unpacklo_epi8(weight_lo, zero);
- weight_h[1] = _mm_sub_epi16(d, weight_h[0]);
- weight_h[2] = _mm_unpackhi_epi8(weight_lo, zero);
- weight_h[3] = _mm_sub_epi16(d, weight_h[2]);
- const __m128i weight_hi =
- _mm_loadu_si128((const __m128i *)&weight_array[32 + 16]);
- weight_h[4] = _mm_unpacklo_epi8(weight_hi, zero);
- weight_h[5] = _mm_sub_epi16(d, weight_h[4]);
- weight_h[6] = _mm_unpackhi_epi8(weight_hi, zero);
- weight_h[7] = _mm_sub_epi16(d, weight_h[6]);
- }
-}
-
-static INLINE void smooth_v_pred_8xh(const __m128i *pixels, const __m128i *wh,
- int h, uint8_t *dst, ptrdiff_t stride) {
- const __m128i pred_round = _mm_set1_epi32((1 << (sm_weight_log2_scale - 1)));
- const __m128i inc = _mm_set1_epi16(0x202);
- const __m128i gat = _mm_set_epi32(0, 0, 0xe0c0a08, 0x6040200);
- __m128i d = _mm_set1_epi16(0x100);
-
- for (int i = 0; i < h; ++i) {
- const __m128i wg_wg = _mm_shuffle_epi8(wh[0], d);
- const __m128i sc_sc = _mm_shuffle_epi8(wh[1], d);
- const __m128i wh_sc = _mm_unpacklo_epi16(wg_wg, sc_sc);
- __m128i s0 = _mm_madd_epi16(pixels[0], wh_sc);
- __m128i s1 = _mm_madd_epi16(pixels[1], wh_sc);
-
- s0 = _mm_add_epi32(s0, pred_round);
- s0 = _mm_srai_epi32(s0, sm_weight_log2_scale);
-
- s1 = _mm_add_epi32(s1, pred_round);
- s1 = _mm_srai_epi32(s1, sm_weight_log2_scale);
-
- __m128i sum01 = _mm_packus_epi16(s0, s1);
- sum01 = _mm_shuffle_epi8(sum01, gat);
- _mm_storel_epi64((__m128i *)dst, sum01);
- dst += stride;
-
- d = _mm_add_epi16(d, inc);
- }
-}
-
-void aom_smooth_v_predictor_8x4_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- __m128i pixels[2];
- load_pixel_v_w8(above, left, 4, pixels);
-
- __m128i wh[2];
- load_weight_v_w8(sm_weight_arrays, 4, wh);
-
- smooth_v_pred_8xh(pixels, wh, 4, dst, stride);
-}
-
-void aom_smooth_v_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- __m128i pixels[2];
- load_pixel_v_w8(above, left, 8, pixels);
-
- __m128i wh[2];
- load_weight_v_w8(sm_weight_arrays, 8, wh);
-
- smooth_v_pred_8xh(pixels, wh, 8, dst, stride);
-}
-
-void aom_smooth_v_predictor_8x16_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- __m128i pixels[2];
- load_pixel_v_w8(above, left, 16, pixels);
-
- __m128i wh[4];
- load_weight_v_w8(sm_weight_arrays, 16, wh);
-
- smooth_v_pred_8xh(pixels, wh, 8, dst, stride);
- dst += stride << 3;
- smooth_v_pred_8xh(pixels, &wh[2], 8, dst, stride);
-}
-
-void aom_smooth_v_predictor_8x32_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- __m128i pixels[2];
- load_pixel_v_w8(above, left, 32, pixels);
-
- __m128i wh[8];
- load_weight_v_w8(sm_weight_arrays, 32, wh);
-
- smooth_v_pred_8xh(pixels, &wh[0], 8, dst, stride);
- dst += stride << 3;
- smooth_v_pred_8xh(pixels, &wh[2], 8, dst, stride);
- dst += stride << 3;
- smooth_v_pred_8xh(pixels, &wh[4], 8, dst, stride);
- dst += stride << 3;
- smooth_v_pred_8xh(pixels, &wh[6], 8, dst, stride);
-}
-
-static INLINE void smooth_v_predictor_wxh(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left, uint32_t bw,
- uint32_t bh) {
- const uint8_t *const sm_weights_h = sm_weight_arrays + bh;
- const __m128i zero = _mm_setzero_si128();
- const __m128i scale_value =
- _mm_set1_epi16((uint16_t)(1 << sm_weight_log2_scale));
- const __m128i dup16 = _mm_set1_epi32(0x01000100);
- const __m128i bottom_left =
- _mm_shuffle_epi8(_mm_cvtsi32_si128((uint32_t)left[bh - 1]), dup16);
- const __m128i gat = _mm_set_epi32(0, 0, 0xe0c0a08, 0x6040200);
- const __m128i round =
- _mm_set1_epi32((uint16_t)(1 << (sm_weight_log2_scale - 1)));
-
- for (uint32_t y = 0; y < bh; ++y) {
- const __m128i weights_y = _mm_cvtsi32_si128((uint32_t)sm_weights_h[y]);
- const __m128i scale_m_weights_y =
- _mm_shuffle_epi8(_mm_sub_epi16(scale_value, weights_y), dup16);
- const __m128i wl_y =
- _mm_shuffle_epi32(_mm_unpacklo_epi16(weights_y, bottom_left), 0);
-
- for (uint32_t x = 0; x < bw; x += 8) {
- const __m128i top_x = _mm_loadl_epi64((const __m128i *)(above + x));
- // 8 -> 16
- const __m128i tw_x = _mm_unpacklo_epi8(top_x, zero);
- const __m128i tw_x_lo = _mm_unpacklo_epi16(tw_x, scale_m_weights_y);
- const __m128i tw_x_hi = _mm_unpackhi_epi16(tw_x, scale_m_weights_y);
- // top_x * weights_y + scale_m_weights_y * bottom_left
- __m128i pred_lo = _mm_madd_epi16(tw_x_lo, wl_y);
- __m128i pred_hi = _mm_madd_epi16(tw_x_hi, wl_y);
-
- pred_lo = _mm_add_epi32(pred_lo, round);
- pred_hi = _mm_add_epi32(pred_hi, round);
- pred_lo = _mm_srai_epi32(pred_lo, sm_weight_log2_scale);
- pred_hi = _mm_srai_epi32(pred_hi, sm_weight_log2_scale);
-
- __m128i pred = _mm_packus_epi16(pred_lo, pred_hi);
- pred = _mm_shuffle_epi8(pred, gat);
- _mm_storel_epi64((__m128i *)(dst + x), pred);
- }
- dst += stride;
- }
-}
-
-void aom_smooth_v_predictor_16x4_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- smooth_v_predictor_wxh(dst, stride, above, left, 16, 4);
-}
-
-void aom_smooth_v_predictor_16x8_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- smooth_v_predictor_wxh(dst, stride, above, left, 16, 8);
-}
-
-void aom_smooth_v_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- smooth_v_predictor_wxh(dst, stride, above, left, 16, 16);
-}
-
-void aom_smooth_v_predictor_16x32_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- smooth_v_predictor_wxh(dst, stride, above, left, 16, 32);
-}
-
-void aom_smooth_v_predictor_32x8_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- smooth_v_predictor_wxh(dst, stride, above, left, 32, 8);
-}
-
-void aom_smooth_v_predictor_32x16_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- smooth_v_predictor_wxh(dst, stride, above, left, 32, 16);
-}
-
-void aom_smooth_v_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- smooth_v_predictor_wxh(dst, stride, above, left, 32, 32);
-}
-
-void aom_smooth_v_predictor_32x64_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- smooth_v_predictor_wxh(dst, stride, above, left, 32, 64);
-}
-
-void aom_smooth_v_predictor_64x64_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- smooth_v_predictor_wxh(dst, stride, above, left, 64, 64);
-}
-
-void aom_smooth_v_predictor_64x32_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- smooth_v_predictor_wxh(dst, stride, above, left, 64, 32);
-}
-
-void aom_smooth_v_predictor_64x16_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- smooth_v_predictor_wxh(dst, stride, above, left, 64, 16);
-}
-
-void aom_smooth_v_predictor_16x64_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- smooth_v_predictor_wxh(dst, stride, above, left, 16, 64);
-}
-
-// -----------------------------------------------------------------------------
-// SMOOTH_H_PRED
-
-// pixels[0]: left vector
-// pixels[1]: right_pred vector
-static INLINE void load_pixel_h_w4(const uint8_t *above, const uint8_t *left,
- int height, __m128i *pixels) {
- if (height == 4)
- pixels[0] = _mm_cvtsi32_si128(((const uint32_t *)left)[0]);
- else if (height == 8)
- pixels[0] = _mm_loadl_epi64(((const __m128i *)left));
- else
- pixels[0] = _mm_loadu_si128(((const __m128i *)left));
- pixels[1] = _mm_set1_epi16((uint16_t)above[3]);
-}
-
-// weights[0]: weights_w and scale - weights_w interleave vector
-static INLINE void load_weight_h_w4(const uint8_t *weight_array, int height,
- __m128i *weights) {
- (void)height;
- const __m128i t = _mm_loadu_si128((const __m128i *)&weight_array[4]);
- const __m128i zero = _mm_setzero_si128();
-
- const __m128i weights_0 = _mm_unpacklo_epi8(t, zero);
- const __m128i d = _mm_set1_epi16((uint16_t)(1 << sm_weight_log2_scale));
- const __m128i weights_1 = _mm_sub_epi16(d, weights_0);
- weights[0] = _mm_unpacklo_epi16(weights_0, weights_1);
-}
-
-static INLINE void smooth_h_pred_4xh(const __m128i *pixel,
- const __m128i *weight, int h, uint8_t *dst,
- ptrdiff_t stride) {
- const __m128i pred_round = _mm_set1_epi32((1 << (sm_weight_log2_scale - 1)));
- const __m128i one = _mm_set1_epi16(1);
- const __m128i gat = _mm_set1_epi32(0xc080400);
- __m128i rep = _mm_set1_epi16(0x8000);
-
- for (int i = 0; i < h; ++i) {
- __m128i b = _mm_shuffle_epi8(pixel[0], rep);
- b = _mm_unpacklo_epi16(b, pixel[1]);
- __m128i sum = _mm_madd_epi16(b, weight[0]);
-
- sum = _mm_add_epi32(sum, pred_round);
- sum = _mm_srai_epi32(sum, sm_weight_log2_scale);
-
- sum = _mm_shuffle_epi8(sum, gat);
- *(uint32_t *)dst = _mm_cvtsi128_si32(sum);
- dst += stride;
-
- rep = _mm_add_epi16(rep, one);
- }
-}
-
-void aom_smooth_h_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- __m128i pixels[2];
- load_pixel_h_w4(above, left, 4, pixels);
-
- __m128i weights;
- load_weight_h_w4(sm_weight_arrays, 4, &weights);
-
- smooth_h_pred_4xh(pixels, &weights, 4, dst, stride);
-}
-
-void aom_smooth_h_predictor_4x8_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- __m128i pixels[2];
- load_pixel_h_w4(above, left, 8, pixels);
-
- __m128i weights;
- load_weight_h_w4(sm_weight_arrays, 8, &weights);
-
- smooth_h_pred_4xh(pixels, &weights, 8, dst, stride);
-}
-
-void aom_smooth_h_predictor_4x16_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- __m128i pixels[2];
- load_pixel_h_w4(above, left, 16, pixels);
-
- __m128i weights;
- load_weight_h_w4(sm_weight_arrays, 8, &weights);
-
- smooth_h_pred_4xh(pixels, &weights, 8, dst, stride);
- dst += stride << 3;
-
- pixels[0] = _mm_srli_si128(pixels[0], 8);
- smooth_h_pred_4xh(pixels, &weights, 8, dst, stride);
-}
-
-// pixels[0]: left vector
-// pixels[1]: right_pred vector
-// pixels[2]: left vector + 16
-// pixels[3]: right_pred vector
-static INLINE void load_pixel_h_w8(const uint8_t *above, const uint8_t *left,
- int height, __m128i *pixels) {
- pixels[1] = _mm_set1_epi16((uint16_t)above[7]);
-
- if (height == 4) {
- pixels[0] = _mm_cvtsi32_si128(((const uint32_t *)left)[0]);
- } else if (height == 8) {
- pixels[0] = _mm_loadl_epi64((const __m128i *)left);
- } else if (height == 16) {
- pixels[0] = _mm_load_si128((const __m128i *)left);
- } else {
- pixels[0] = _mm_load_si128((const __m128i *)left);
- pixels[2] = _mm_load_si128((const __m128i *)(left + 16));
- pixels[3] = pixels[1];
- }
-}
-
-// weight_w[0]: weights_w and scale - weights_w interleave vector, first half
-// weight_w[1]: weights_w and scale - weights_w interleave vector, second half
-static INLINE void load_weight_h_w8(const uint8_t *weight_array, int height,
- __m128i *weight_w) {
- (void)height;
- const __m128i zero = _mm_setzero_si128();
- const __m128i d = _mm_set1_epi16((uint16_t)(1 << sm_weight_log2_scale));
- const __m128i we = _mm_loadu_si128((const __m128i *)&weight_array[8]);
- const __m128i tmp1 = _mm_unpacklo_epi8(we, zero);
- const __m128i tmp2 = _mm_sub_epi16(d, tmp1);
- weight_w[0] = _mm_unpacklo_epi16(tmp1, tmp2);
- weight_w[1] = _mm_unpackhi_epi16(tmp1, tmp2);
-}
-
-static INLINE void smooth_h_pred_8xh(const __m128i *pixels, const __m128i *ww,
- int h, uint8_t *dst, ptrdiff_t stride,
- int second_half) {
- const __m128i pred_round = _mm_set1_epi32((1 << (sm_weight_log2_scale - 1)));
- const __m128i one = _mm_set1_epi16(1);
- const __m128i gat = _mm_set_epi32(0, 0, 0xe0c0a08, 0x6040200);
- __m128i rep = second_half ? _mm_set1_epi16(0x8008) : _mm_set1_epi16(0x8000);
-
- for (int i = 0; i < h; ++i) {
- __m128i b = _mm_shuffle_epi8(pixels[0], rep);
- b = _mm_unpacklo_epi16(b, pixels[1]);
- __m128i sum0 = _mm_madd_epi16(b, ww[0]);
- __m128i sum1 = _mm_madd_epi16(b, ww[1]);
-
- sum0 = _mm_add_epi32(sum0, pred_round);
- sum0 = _mm_srai_epi32(sum0, sm_weight_log2_scale);
-
- sum1 = _mm_add_epi32(sum1, pred_round);
- sum1 = _mm_srai_epi32(sum1, sm_weight_log2_scale);
-
- sum0 = _mm_packus_epi16(sum0, sum1);
- sum0 = _mm_shuffle_epi8(sum0, gat);
- _mm_storel_epi64((__m128i *)dst, sum0);
- dst += stride;
-
- rep = _mm_add_epi16(rep, one);
- }
-}
-
-void aom_smooth_h_predictor_8x4_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- __m128i pixels[2];
- load_pixel_h_w8(above, left, 4, pixels);
-
- __m128i ww[2];
- load_weight_h_w8(sm_weight_arrays, 4, ww);
-
- smooth_h_pred_8xh(pixels, ww, 4, dst, stride, 0);
-}
-
-void aom_smooth_h_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- __m128i pixels[2];
- load_pixel_h_w8(above, left, 8, pixels);
-
- __m128i ww[2];
- load_weight_h_w8(sm_weight_arrays, 8, ww);
-
- smooth_h_pred_8xh(pixels, ww, 8, dst, stride, 0);
-}
-
-void aom_smooth_h_predictor_8x16_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- __m128i pixels[2];
- load_pixel_h_w8(above, left, 16, pixels);
-
- __m128i ww[2];
- load_weight_h_w8(sm_weight_arrays, 16, ww);
-
- smooth_h_pred_8xh(pixels, ww, 8, dst, stride, 0);
- dst += stride << 3;
- smooth_h_pred_8xh(pixels, ww, 8, dst, stride, 1);
-}
-
-void aom_smooth_h_predictor_8x32_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- __m128i pixels[4];
- load_pixel_h_w8(above, left, 32, pixels);
-
- __m128i ww[2];
- load_weight_h_w8(sm_weight_arrays, 32, ww);
-
- smooth_h_pred_8xh(&pixels[0], ww, 8, dst, stride, 0);
- dst += stride << 3;
- smooth_h_pred_8xh(&pixels[0], ww, 8, dst, stride, 1);
- dst += stride << 3;
- smooth_h_pred_8xh(&pixels[2], ww, 8, dst, stride, 0);
- dst += stride << 3;
- smooth_h_pred_8xh(&pixels[2], ww, 8, dst, stride, 1);
-}
-
-static INLINE void smooth_h_predictor_wxh(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left, uint32_t bw,
- uint32_t bh) {
- const uint8_t *const sm_weights_w = sm_weight_arrays + bw;
- const __m128i zero = _mm_setzero_si128();
- const __m128i scale_value =
- _mm_set1_epi16((uint16_t)(1 << sm_weight_log2_scale));
- const __m128i top_right = _mm_cvtsi32_si128((uint32_t)above[bw - 1]);
- const __m128i gat = _mm_set_epi32(0, 0, 0xe0c0a08, 0x6040200);
- const __m128i pred_round = _mm_set1_epi32((1 << (sm_weight_log2_scale - 1)));
-
- for (uint32_t y = 0; y < bh; ++y) {
- const __m128i left_y = _mm_cvtsi32_si128((uint32_t)left[y]);
- const __m128i tr_ly =
- _mm_shuffle_epi32(_mm_unpacklo_epi16(top_right, left_y), 0);
-
- for (uint32_t x = 0; x < bw; x += 8) {
- const __m128i weights_x =
- _mm_loadl_epi64((const __m128i *)(sm_weights_w + x));
- const __m128i weights_xw = _mm_unpacklo_epi8(weights_x, zero);
- const __m128i scale_m_weights_x = _mm_sub_epi16(scale_value, weights_xw);
- const __m128i wx_lo = _mm_unpacklo_epi16(scale_m_weights_x, weights_xw);
- const __m128i wx_hi = _mm_unpackhi_epi16(scale_m_weights_x, weights_xw);
- __m128i pred_lo = _mm_madd_epi16(wx_lo, tr_ly);
- __m128i pred_hi = _mm_madd_epi16(wx_hi, tr_ly);
-
- pred_lo = _mm_add_epi32(pred_lo, pred_round);
- pred_hi = _mm_add_epi32(pred_hi, pred_round);
-
- pred_lo = _mm_srai_epi32(pred_lo, sm_weight_log2_scale);
- pred_hi = _mm_srai_epi32(pred_hi, sm_weight_log2_scale);
-
- __m128i pred = _mm_packus_epi16(pred_lo, pred_hi);
- pred = _mm_shuffle_epi8(pred, gat);
- _mm_storel_epi64((__m128i *)(dst + x), pred);
- }
- dst += stride;
- }
-}
-
-void aom_smooth_h_predictor_16x4_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- smooth_h_predictor_wxh(dst, stride, above, left, 16, 4);
-}
-
-void aom_smooth_h_predictor_16x8_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- smooth_h_predictor_wxh(dst, stride, above, left, 16, 8);
-}
-
-void aom_smooth_h_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- smooth_h_predictor_wxh(dst, stride, above, left, 16, 16);
-}
-
-void aom_smooth_h_predictor_16x32_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- smooth_h_predictor_wxh(dst, stride, above, left, 16, 32);
-}
-
-void aom_smooth_h_predictor_16x64_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- smooth_h_predictor_wxh(dst, stride, above, left, 16, 64);
-}
-
-void aom_smooth_h_predictor_32x8_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- smooth_h_predictor_wxh(dst, stride, above, left, 32, 8);
-}
-
-void aom_smooth_h_predictor_32x16_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- smooth_h_predictor_wxh(dst, stride, above, left, 32, 16);
-}
-
-void aom_smooth_h_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- smooth_h_predictor_wxh(dst, stride, above, left, 32, 32);
-}
-
-void aom_smooth_h_predictor_32x64_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- smooth_h_predictor_wxh(dst, stride, above, left, 32, 64);
-}
-
-void aom_smooth_h_predictor_64x64_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- smooth_h_predictor_wxh(dst, stride, above, left, 64, 64);
-}
-
-void aom_smooth_h_predictor_64x32_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- smooth_h_predictor_wxh(dst, stride, above, left, 64, 32);
-}
-
-void aom_smooth_h_predictor_64x16_ssse3(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above,
- const uint8_t *left) {
- smooth_h_predictor_wxh(dst, stride, above, left, 64, 16);
-}
diff --git a/third_party/aom/aom_dsp/x86/inv_wht_sse2.asm b/third_party/aom/aom_dsp/x86/inv_wht_sse2.asm
deleted file mode 100644
index 0bc841a7a..000000000
--- a/third_party/aom/aom_dsp/x86/inv_wht_sse2.asm
+++ /dev/null
@@ -1,107 +0,0 @@
-;
-; Copyright (c) 2016, Alliance for Open Media. All rights reserved
-;
-; This source code is subject to the terms of the BSD 2 Clause License and
-; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-; was not distributed with this source code in the LICENSE file, you can
-; obtain it at www.aomedia.org/license/software. If the Alliance for Open
-; Media Patent License 1.0 was not distributed with this source code in the
-; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-;
-
-;
-
-%include "third_party/x86inc/x86inc.asm"
-
-SECTION .text
-
-%macro REORDER_INPUTS 0
- ; a c d b to a b c d
- SWAP 1, 3, 2
-%endmacro
-
-%macro TRANSFORM_COLS 0
- ; input:
- ; m0 a
- ; m1 b
- ; m2 c
- ; m3 d
- paddw m0, m2
- psubw m3, m1
-
- ; wide subtract
- punpcklwd m4, m0
- punpcklwd m5, m3
- psrad m4, 16
- psrad m5, 16
- psubd m4, m5
- psrad m4, 1
- packssdw m4, m4 ; e
-
- psubw m5, m4, m1 ; b
- psubw m4, m2 ; c
- psubw m0, m5
- paddw m3, m4
- ; m0 a
- SWAP 1, 5 ; m1 b
- SWAP 2, 4 ; m2 c
- ; m3 d
-%endmacro
-
-%macro TRANSPOSE_4X4 0
- punpcklwd m0, m2
- punpcklwd m1, m3
- mova m2, m0
- punpcklwd m0, m1
- punpckhwd m2, m1
- pshufd m1, m0, 0x0e
- pshufd m3, m2, 0x0e
-%endmacro
-
-; transpose a 4x4 int16 matrix in xmm0 and xmm1 to the bottom half of xmm0-xmm3
-%macro TRANSPOSE_4X4_WIDE 0
- mova m3, m0
- punpcklwd m0, m1
- punpckhwd m3, m1
- mova m2, m0
- punpcklwd m0, m3
- punpckhwd m2, m3
- pshufd m1, m0, 0x0e
- pshufd m3, m2, 0x0e
-%endmacro
-
-%macro ADD_STORE_4P_2X 5 ; src1, src2, tmp1, tmp2, zero
- movd m%3, [outputq]
- movd m%4, [outputq + strideq]
- punpcklbw m%3, m%5
- punpcklbw m%4, m%5
- paddw m%1, m%3
- paddw m%2, m%4
- packuswb m%1, m%5
- packuswb m%2, m%5
- movd [outputq], m%1
- movd [outputq + strideq], m%2
-%endmacro
-
-INIT_XMM sse2
-cglobal iwht4x4_16_add, 3, 3, 7, input, output, stride
- mova m0, [inputq + 0]
- packssdw m0, [inputq + 16]
- mova m1, [inputq + 32]
- packssdw m1, [inputq + 48]
- psraw m0, 2
- psraw m1, 2
-
- TRANSPOSE_4X4_WIDE
- REORDER_INPUTS
- TRANSFORM_COLS
- TRANSPOSE_4X4
- REORDER_INPUTS
- TRANSFORM_COLS
-
- pxor m4, m4
- ADD_STORE_4P_2X 0, 1, 5, 6, 4
- lea outputq, [outputq + 2 * strideq]
- ADD_STORE_4P_2X 2, 3, 5, 6, 4
-
- RET
diff --git a/third_party/aom/aom_dsp/x86/jnt_sad_ssse3.c b/third_party/aom/aom_dsp/x86/jnt_sad_ssse3.c
deleted file mode 100644
index c3c88245a..000000000
--- a/third_party/aom/aom_dsp/x86/jnt_sad_ssse3.c
+++ /dev/null
@@ -1,238 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <emmintrin.h> // SSE2
-#include <tmmintrin.h>
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-#include "config/av1_rtcd.h"
-
-#include "aom_dsp/x86/synonyms.h"
-
-unsigned int aom_sad4xh_sse2(const uint8_t *a, int a_stride, const uint8_t *b,
- int b_stride, int width, int height) {
- int i;
- assert(width == 4);
- (void)width;
-
- __m128i sad = _mm_setzero_si128();
- for (i = 0; i < height; i += 4) {
- __m128i x0 = xx_loadl_32(a + 0 * a_stride);
- __m128i x1 = xx_loadl_32(a + 1 * a_stride);
- __m128i x2 = xx_loadl_32(a + 2 * a_stride);
- __m128i x3 = xx_loadl_32(a + 3 * a_stride);
- __m128i x_lo = _mm_unpacklo_epi32(x0, x1);
- __m128i x_hi = _mm_unpacklo_epi32(x2, x3);
-
- __m128i x = _mm_unpacklo_epi64(x_lo, x_hi);
-
- x0 = xx_loadl_32(b + 0 * b_stride);
- x1 = xx_loadl_32(b + 1 * b_stride);
- x2 = xx_loadl_32(b + 2 * b_stride);
- x3 = xx_loadl_32(b + 3 * b_stride);
- x_lo = _mm_unpacklo_epi32(x0, x1);
- x_hi = _mm_unpacklo_epi32(x2, x3);
-
- __m128i y = _mm_unpacklo_epi64(x_lo, x_hi);
-
- __m128i sad4x4 = _mm_sad_epu8(x, y);
- sad = _mm_add_epi32(sad, sad4x4);
-
- a += 4 * a_stride;
- b += 4 * b_stride;
- }
-
- // At this point, we have two 32-bit partial SADs at bit[0:31] and [64:95].
- const unsigned int res =
- _mm_cvtsi128_si32(sad) + _mm_cvtsi128_si32(_mm_srli_si128(sad, 8));
-
- return res;
-}
-
-unsigned int aom_sad8xh_sse2(const uint8_t *a, int a_stride, const uint8_t *b,
- int b_stride, int width, int height) {
- int i;
- assert(width == 8);
- (void)width;
-
- __m128i sad = _mm_setzero_si128();
- for (i = 0; i < height; i += 2) {
- __m128i x0 = xx_loadl_64(a + 0 * a_stride);
- __m128i x1 = xx_loadl_64(a + 1 * a_stride);
-
- __m128i x = _mm_unpacklo_epi64(x0, x1);
-
- x0 = xx_loadl_64(b + 0 * b_stride);
- x1 = xx_loadl_64(b + 1 * b_stride);
-
- __m128i y = _mm_unpacklo_epi64(x0, x1);
-
- __m128i sad8x2 = _mm_sad_epu8(x, y);
- sad = _mm_add_epi32(sad, sad8x2);
-
- a += 2 * a_stride;
- b += 2 * b_stride;
- }
-
- const unsigned int res =
- _mm_cvtsi128_si32(sad) + _mm_cvtsi128_si32(_mm_srli_si128(sad, 8));
-
- return res;
-}
-
-unsigned int aom_sad16xh_sse2(const uint8_t *a, int a_stride, const uint8_t *b,
- int b_stride, int width, int height) {
- int i;
- assert(width == 16);
- (void)width;
-
- __m128i sad = _mm_setzero_si128();
- for (i = 0; i < height; ++i) {
- __m128i x = xx_loadu_128(a);
- __m128i y = xx_loadu_128(b);
-
- __m128i sad16x1 = _mm_sad_epu8(x, y);
- sad = _mm_add_epi32(sad, sad16x1);
-
- a += a_stride;
- b += b_stride;
- }
-
- const unsigned int res =
- _mm_cvtsi128_si32(sad) + _mm_cvtsi128_si32(_mm_srli_si128(sad, 8));
-
- return res;
-}
-
-unsigned int aom_sad32xh_sse2(const uint8_t *a, int a_stride, const uint8_t *b,
- int b_stride, int width, int height) {
- int i, j;
- assert(width == 32);
- (void)width;
-
- __m128i sad = _mm_setzero_si128();
- for (i = 0; i < height; ++i) {
- for (j = 0; j < 2; ++j) {
- __m128i x = xx_loadu_128(a + j * 16);
- __m128i y = xx_loadu_128(b + j * 16);
-
- __m128i sad32_half = _mm_sad_epu8(x, y);
- sad = _mm_add_epi32(sad, sad32_half);
- }
-
- a += a_stride;
- b += b_stride;
- }
-
- const unsigned int res =
- _mm_cvtsi128_si32(sad) + _mm_cvtsi128_si32(_mm_srli_si128(sad, 8));
-
- return res;
-}
-
-unsigned int aom_sad64xh_sse2(const uint8_t *a, int a_stride, const uint8_t *b,
- int b_stride, int width, int height) {
- int i, j;
- assert(width == 64);
- (void)width;
-
- __m128i sad = _mm_setzero_si128();
- for (i = 0; i < height; ++i) {
- for (j = 0; j < 4; ++j) {
- __m128i x = xx_loadu_128(a + j * 16);
- __m128i y = xx_loadu_128(b + j * 16);
-
- __m128i sad64_quarter = _mm_sad_epu8(x, y);
- sad = _mm_add_epi32(sad, sad64_quarter);
- }
-
- a += a_stride;
- b += b_stride;
- }
-
- const unsigned int res =
- _mm_cvtsi128_si32(sad) + _mm_cvtsi128_si32(_mm_srli_si128(sad, 8));
-
- return res;
-}
-
-unsigned int aom_sad128xh_sse2(const uint8_t *a, int a_stride, const uint8_t *b,
- int b_stride, int width, int height) {
- int i, j;
- assert(width == 128);
- (void)width;
-
- __m128i sad = _mm_setzero_si128();
- for (i = 0; i < height; ++i) {
- for (j = 0; j < 8; ++j) {
- __m128i x = xx_loadu_128(a + j * 16);
- __m128i y = xx_loadu_128(b + j * 16);
-
- __m128i sad64_quarter = _mm_sad_epu8(x, y);
- sad = _mm_add_epi32(sad, sad64_quarter);
- }
-
- a += a_stride;
- b += b_stride;
- }
-
- const unsigned int res =
- _mm_cvtsi128_si32(sad) + _mm_cvtsi128_si32(_mm_srli_si128(sad, 8));
-
- return res;
-}
-
-#define jnt_sadMxN_sse2(m, n) \
- unsigned int aom_jnt_sad##m##x##n##_avg_ssse3( \
- const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, \
- const uint8_t *second_pred, const JNT_COMP_PARAMS *jcp_param) { \
- uint8_t comp_pred[m * n]; \
- aom_jnt_comp_avg_pred(comp_pred, second_pred, m, n, ref, ref_stride, \
- jcp_param); \
- return aom_sad##m##xh_sse2(src, src_stride, comp_pred, m, m, n); \
- }
-
-#define jnt_sadMxN_avx2(m, n) \
- unsigned int aom_jnt_sad##m##x##n##_avg_avx2( \
- const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, \
- const uint8_t *second_pred, const JNT_COMP_PARAMS *jcp_param) { \
- uint8_t comp_pred[m * n]; \
- aom_jnt_comp_avg_pred(comp_pred, second_pred, m, n, ref, ref_stride, \
- jcp_param); \
- return aom_sad##m##xh_avx2(src, src_stride, comp_pred, m, m, n); \
- }
-
-/* clang-format off */
-jnt_sadMxN_sse2(128, 128)
-jnt_sadMxN_sse2(128, 64)
-jnt_sadMxN_sse2(64, 128)
-jnt_sadMxN_sse2(64, 64)
-jnt_sadMxN_sse2(64, 32)
-jnt_sadMxN_sse2(32, 64)
-jnt_sadMxN_sse2(32, 32)
-jnt_sadMxN_sse2(32, 16)
-jnt_sadMxN_sse2(16, 32)
-jnt_sadMxN_sse2(16, 16)
-jnt_sadMxN_sse2(16, 8)
-jnt_sadMxN_sse2(8, 16)
-jnt_sadMxN_sse2(8, 8)
-jnt_sadMxN_sse2(8, 4)
-jnt_sadMxN_sse2(4, 8)
-jnt_sadMxN_sse2(4, 4)
-jnt_sadMxN_sse2(4, 16)
-jnt_sadMxN_sse2(16, 4)
-jnt_sadMxN_sse2(8, 32)
-jnt_sadMxN_sse2(32, 8)
-jnt_sadMxN_sse2(16, 64)
-jnt_sadMxN_sse2(64, 16)
- /* clang-format on */
diff --git a/third_party/aom/aom_dsp/x86/jnt_variance_ssse3.c b/third_party/aom/aom_dsp/x86/jnt_variance_ssse3.c
deleted file mode 100644
index f9a41a210..000000000
--- a/third_party/aom/aom_dsp/x86/jnt_variance_ssse3.c
+++ /dev/null
@@ -1,192 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <emmintrin.h> // SSE2
-#include <tmmintrin.h>
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-#include "config/av1_rtcd.h"
-
-#include "aom_dsp/x86/synonyms.h"
-
-void aom_var_filter_block2d_bil_first_pass_ssse3(
- const uint8_t *a, uint16_t *b, unsigned int src_pixels_per_line,
- unsigned int pixel_step, unsigned int output_height,
- unsigned int output_width, const uint8_t *filter);
-
-void aom_var_filter_block2d_bil_second_pass_ssse3(
- const uint16_t *a, uint8_t *b, unsigned int src_pixels_per_line,
- unsigned int pixel_step, unsigned int output_height,
- unsigned int output_width, const uint8_t *filter);
-
-static INLINE void compute_jnt_comp_avg(__m128i *p0, __m128i *p1,
- const __m128i *w, const __m128i *r,
- void *const result) {
- __m128i p_lo = _mm_unpacklo_epi8(*p0, *p1);
- __m128i mult_lo = _mm_maddubs_epi16(p_lo, *w);
- __m128i round_lo = _mm_add_epi16(mult_lo, *r);
- __m128i shift_lo = _mm_srai_epi16(round_lo, DIST_PRECISION_BITS);
-
- __m128i p_hi = _mm_unpackhi_epi8(*p0, *p1);
- __m128i mult_hi = _mm_maddubs_epi16(p_hi, *w);
- __m128i round_hi = _mm_add_epi16(mult_hi, *r);
- __m128i shift_hi = _mm_srai_epi16(round_hi, DIST_PRECISION_BITS);
-
- xx_storeu_128(result, _mm_packus_epi16(shift_lo, shift_hi));
-}
-
-void aom_jnt_comp_avg_pred_ssse3(uint8_t *comp_pred, const uint8_t *pred,
- int width, int height, const uint8_t *ref,
- int ref_stride,
- const JNT_COMP_PARAMS *jcp_param) {
- int i;
- const uint8_t w0 = (uint8_t)jcp_param->fwd_offset;
- const uint8_t w1 = (uint8_t)jcp_param->bck_offset;
- const __m128i w = _mm_set_epi8(w1, w0, w1, w0, w1, w0, w1, w0, w1, w0, w1, w0,
- w1, w0, w1, w0);
- const uint16_t round = ((1 << DIST_PRECISION_BITS) >> 1);
- const __m128i r =
- _mm_set_epi16(round, round, round, round, round, round, round, round);
-
- if (width >= 16) {
- // Read 16 pixels one row at a time
- assert(!(width & 15));
- for (i = 0; i < height; ++i) {
- int j;
- for (j = 0; j < width; j += 16) {
- __m128i p0 = xx_loadu_128(ref);
- __m128i p1 = xx_loadu_128(pred);
-
- compute_jnt_comp_avg(&p0, &p1, &w, &r, comp_pred);
-
- comp_pred += 16;
- pred += 16;
- ref += 16;
- }
- ref += ref_stride - width;
- }
- } else if (width >= 8) {
- // Read 8 pixels two row at a time
- assert(!(width & 7));
- assert(!(width & 1));
- for (i = 0; i < height; i += 2) {
- __m128i p0_0 = xx_loadl_64(ref + 0 * ref_stride);
- __m128i p0_1 = xx_loadl_64(ref + 1 * ref_stride);
- __m128i p0 = _mm_unpacklo_epi64(p0_0, p0_1);
- __m128i p1 = xx_loadu_128(pred);
-
- compute_jnt_comp_avg(&p0, &p1, &w, &r, comp_pred);
-
- comp_pred += 16;
- pred += 16;
- ref += 2 * ref_stride;
- }
- } else {
- // Read 4 pixels four row at a time
- assert(!(width & 3));
- assert(!(height & 3));
- for (i = 0; i < height; i += 4) {
- const uint8_t *row0 = ref + 0 * ref_stride;
- const uint8_t *row1 = ref + 1 * ref_stride;
- const uint8_t *row2 = ref + 2 * ref_stride;
- const uint8_t *row3 = ref + 3 * ref_stride;
-
- __m128i p0 =
- _mm_setr_epi8(row0[0], row0[1], row0[2], row0[3], row1[0], row1[1],
- row1[2], row1[3], row2[0], row2[1], row2[2], row2[3],
- row3[0], row3[1], row3[2], row3[3]);
- __m128i p1 = xx_loadu_128(pred);
-
- compute_jnt_comp_avg(&p0, &p1, &w, &r, comp_pred);
-
- comp_pred += 16;
- pred += 16;
- ref += 4 * ref_stride;
- }
- }
-}
-
-void aom_jnt_comp_avg_upsampled_pred_ssse3(
- MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
- const MV *const mv, uint8_t *comp_pred, const uint8_t *pred, int width,
- int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref,
- int ref_stride, const JNT_COMP_PARAMS *jcp_param, int subpel_search) {
- int n;
- int i;
- aom_upsampled_pred(xd, cm, mi_row, mi_col, mv, comp_pred, width, height,
- subpel_x_q3, subpel_y_q3, ref, ref_stride, subpel_search);
- /*The total number of pixels must be a multiple of 16 (e.g., 4x4).*/
- assert(!(width * height & 15));
- n = width * height >> 4;
-
- const uint8_t w0 = (uint8_t)jcp_param->fwd_offset;
- const uint8_t w1 = (uint8_t)jcp_param->bck_offset;
- const __m128i w = _mm_set_epi8(w1, w0, w1, w0, w1, w0, w1, w0, w1, w0, w1, w0,
- w1, w0, w1, w0);
- const uint16_t round = ((1 << DIST_PRECISION_BITS) >> 1);
- const __m128i r =
- _mm_set_epi16(round, round, round, round, round, round, round, round);
-
- for (i = 0; i < n; i++) {
- __m128i p0 = xx_loadu_128(comp_pred);
- __m128i p1 = xx_loadu_128(pred);
-
- compute_jnt_comp_avg(&p0, &p1, &w, &r, comp_pred);
-
- comp_pred += 16;
- pred += 16;
- }
-}
-
-#define JNT_SUBPIX_AVG_VAR(W, H) \
- uint32_t aom_jnt_sub_pixel_avg_variance##W##x##H##_ssse3( \
- const uint8_t *a, int a_stride, int xoffset, int yoffset, \
- const uint8_t *b, int b_stride, uint32_t *sse, \
- const uint8_t *second_pred, const JNT_COMP_PARAMS *jcp_param) { \
- uint16_t fdata3[(H + 1) * W]; \
- uint8_t temp2[H * W]; \
- DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \
- \
- aom_var_filter_block2d_bil_first_pass_ssse3( \
- a, fdata3, a_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
- aom_var_filter_block2d_bil_second_pass_ssse3( \
- fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
- \
- aom_jnt_comp_avg_pred_ssse3(temp3, second_pred, W, H, temp2, W, \
- jcp_param); \
- \
- return aom_variance##W##x##H(temp3, W, b, b_stride, sse); \
- }
-
-JNT_SUBPIX_AVG_VAR(128, 128)
-JNT_SUBPIX_AVG_VAR(128, 64)
-JNT_SUBPIX_AVG_VAR(64, 128)
-JNT_SUBPIX_AVG_VAR(64, 64)
-JNT_SUBPIX_AVG_VAR(64, 32)
-JNT_SUBPIX_AVG_VAR(32, 64)
-JNT_SUBPIX_AVG_VAR(32, 32)
-JNT_SUBPIX_AVG_VAR(32, 16)
-JNT_SUBPIX_AVG_VAR(16, 32)
-JNT_SUBPIX_AVG_VAR(16, 16)
-JNT_SUBPIX_AVG_VAR(16, 8)
-JNT_SUBPIX_AVG_VAR(8, 16)
-JNT_SUBPIX_AVG_VAR(8, 8)
-JNT_SUBPIX_AVG_VAR(8, 4)
-JNT_SUBPIX_AVG_VAR(4, 8)
-JNT_SUBPIX_AVG_VAR(4, 4)
-JNT_SUBPIX_AVG_VAR(4, 16)
-JNT_SUBPIX_AVG_VAR(16, 4)
-JNT_SUBPIX_AVG_VAR(8, 32)
-JNT_SUBPIX_AVG_VAR(32, 8)
-JNT_SUBPIX_AVG_VAR(16, 64)
-JNT_SUBPIX_AVG_VAR(64, 16)
diff --git a/third_party/aom/aom_dsp/x86/loopfilter_sse2.c b/third_party/aom/aom_dsp/x86/loopfilter_sse2.c
deleted file mode 100644
index 9d88b5e49..000000000
--- a/third_party/aom/aom_dsp/x86/loopfilter_sse2.c
+++ /dev/null
@@ -1,2385 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <emmintrin.h> // SSE2
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/x86/synonyms.h"
-#include "aom_ports/mem.h"
-#include "aom_ports/emmintrin_compat.h"
-
-static INLINE __m128i abs_diff(__m128i a, __m128i b) {
- return _mm_or_si128(_mm_subs_epu8(a, b), _mm_subs_epu8(b, a));
-}
-
-static INLINE void transpose4x8_8x4_low_sse2(__m128i *x0, __m128i *x1,
- __m128i *x2, __m128i *x3,
- __m128i *d0, __m128i *d1,
- __m128i *d2, __m128i *d3) {
- // input
- // x0 00 01 02 03 04 05 06 07 xx xx xx xx xx xx xx xx
- // x1 10 11 12 13 14 15 16 17 xx xx xx xx xx xx xx xx
- // x2 20 21 22 23 24 25 26 27 xx xx xx xx xx xx xx xx
- // x3 30 31 32 33 34 35 36 37 xx xx xx xx xx xx xx xx
- // output
- // 00 10 20 30 xx xx xx xx xx xx xx xx xx xx xx xx
- // 01 11 21 31 xx xx xx xx xx xx xx xx xx xx xx xx
- // 02 12 22 32 xx xx xx xx xx xx xx xx xx xx xx xx
- // 03 13 23 33 xx xx xx xx xx xx xx xx xx xx xx xx
-
- __m128i w0, w1;
-
- w0 = _mm_unpacklo_epi8(
- *x0, *x1); // 00 10 01 11 02 12 03 13 04 14 05 15 06 16 07 17
- w1 = _mm_unpacklo_epi8(
- *x2, *x3); // 20 30 21 31 22 32 23 33 24 34 25 35 26 36 27 37
-
- *d0 = _mm_unpacklo_epi16(
- w0, w1); // 00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33
-
- *d1 = _mm_srli_si128(*d0,
- 4); // 01 11 21 31 xx xx xx xx xx xx xx xx xx xx xx xx
- *d2 = _mm_srli_si128(*d0,
- 8); // 02 12 22 32 xx xx xx xx xx xx xx xx xx xx xx xx
- *d3 = _mm_srli_si128(*d0,
- 12); // 03 13 23 33 xx xx xx xx xx xx xx xx xx xx xx xx
-}
-
-static INLINE void transpose4x8_8x4_sse2(__m128i *x0, __m128i *x1, __m128i *x2,
- __m128i *x3, __m128i *d0, __m128i *d1,
- __m128i *d2, __m128i *d3, __m128i *d4,
- __m128i *d5, __m128i *d6,
- __m128i *d7) {
- // input
- // x0 00 01 02 03 04 05 06 07 xx xx xx xx xx xx xx xx
- // x1 10 11 12 13 14 15 16 17 xx xx xx xx xx xx xx xx
- // x2 20 21 22 23 24 25 26 27 xx xx xx xx xx xx xx xx
- // x3 30 31 32 33 34 35 36 37 xx xx xx xx xx xx xx xx
- // output
- // 00 10 20 30 xx xx xx xx xx xx xx xx xx xx xx xx
- // 01 11 21 31 xx xx xx xx xx xx xx xx xx xx xx xx
- // 02 12 22 32 xx xx xx xx xx xx xx xx xx xx xx xx
- // 03 13 23 33 xx xx xx xx xx xx xx xx xx xx xx xx
- // 04 14 24 34 xx xx xx xx xx xx xx xx xx xx xx xx
- // 05 15 25 35 xx xx xx xx xx xx xx xx xx xx xx xx
- // 06 16 26 36 xx xx xx xx xx xx xx xx xx xx xx xx
- // 07 17 27 37 xx xx xx xx xx xx xx xx xx xx xx xx
-
- __m128i w0, w1, ww0, ww1;
-
- w0 = _mm_unpacklo_epi8(
- *x0, *x1); // 00 10 01 11 02 12 03 13 04 14 05 15 06 16 07 17
- w1 = _mm_unpacklo_epi8(
- *x2, *x3); // 20 30 21 31 22 32 23 33 24 34 25 35 26 36 27 37
-
- ww0 = _mm_unpacklo_epi16(
- w0, w1); // 00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33
- ww1 = _mm_unpackhi_epi16(
- w0, w1); // 04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37
-
- *d0 = ww0; // 00 10 20 30 xx xx xx xx xx xx xx xx xx xx xx xx
- *d1 = _mm_srli_si128(ww0,
- 4); // 01 11 21 31 xx xx xx xx xx xx xx xx xx xx xx xx
- *d2 = _mm_srli_si128(ww0,
- 8); // 02 12 22 32 xx xx xx xx xx xx xx xx xx xx xx xx
- *d3 = _mm_srli_si128(ww0,
- 12); // 03 13 23 33 xx xx xx xx xx xx xx xx xx xx xx xx
-
- *d4 = ww1; // 04 14 24 34 xx xx xx xx xx xx xx xx xx xx xx xx
- *d5 = _mm_srli_si128(ww1,
- 4); // 05 15 25 35 xx xx xx xx xx xx xx xx xx xx xx xx
- *d6 = _mm_srli_si128(ww1,
- 8); // 06 16 26 36 xx xx xx xx xx xx xx xx xx xx xx xx
- *d7 = _mm_srli_si128(ww1,
- 12); // 07 17 27 37 xx xx xx xx xx xx xx xx xx xx xx xx
-}
-
-static INLINE void transpose8x8_low_sse2(__m128i *x0, __m128i *x1, __m128i *x2,
- __m128i *x3, __m128i *x4, __m128i *x5,
- __m128i *x6, __m128i *x7, __m128i *d0,
- __m128i *d1, __m128i *d2,
- __m128i *d3) {
- // input
- // x0 00 01 02 03 04 05 06 07
- // x1 10 11 12 13 14 15 16 17
- // x2 20 21 22 23 24 25 26 27
- // x3 30 31 32 33 34 35 36 37
- // x4 40 41 42 43 44 45 46 47
- // x5 50 51 52 53 54 55 56 57
- // x6 60 61 62 63 64 65 66 67
- // x7 70 71 72 73 74 75 76 77
- // output
- // d0 00 10 20 30 40 50 60 70 xx xx xx xx xx xx xx
- // d1 01 11 21 31 41 51 61 71 xx xx xx xx xx xx xx xx
- // d2 02 12 22 32 42 52 62 72 xx xx xx xx xx xx xx xx
- // d3 03 13 23 33 43 53 63 73 xx xx xx xx xx xx xx xx
-
- __m128i w0, w1, w2, w3, w4, w5;
-
- w0 = _mm_unpacklo_epi8(
- *x0, *x1); // 00 10 01 11 02 12 03 13 04 14 05 15 06 16 07 17
-
- w1 = _mm_unpacklo_epi8(
- *x2, *x3); // 20 30 21 31 22 32 23 33 24 34 25 35 26 36 27 37
-
- w2 = _mm_unpacklo_epi8(
- *x4, *x5); // 40 50 41 51 42 52 43 53 44 54 45 55 46 56 47 57
-
- w3 = _mm_unpacklo_epi8(
- *x6, *x7); // 60 70 61 71 62 72 63 73 64 74 65 75 66 76 67 77
-
- w4 = _mm_unpacklo_epi16(
- w0, w1); // 00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33
- w5 = _mm_unpacklo_epi16(
- w2, w3); // 40 50 60 70 41 51 61 71 42 52 62 72 43 53 63 73
-
- *d0 = _mm_unpacklo_epi32(
- w4, w5); // 00 10 20 30 40 50 60 70 01 11 21 31 41 51 61 71
- *d1 = _mm_srli_si128(*d0, 8);
- *d2 = _mm_unpackhi_epi32(
- w4, w5); // 02 12 22 32 42 52 62 72 03 13 23 33 43 53 63 73
- *d3 = _mm_srli_si128(*d2, 8);
-}
-
-static INLINE void transpose8x8_sse2(__m128i *x0, __m128i *x1, __m128i *x2,
- __m128i *x3, __m128i *x4, __m128i *x5,
- __m128i *x6, __m128i *x7, __m128i *d0d1,
- __m128i *d2d3, __m128i *d4d5,
- __m128i *d6d7) {
- __m128i w0, w1, w2, w3, w4, w5, w6, w7;
- // x0 00 01 02 03 04 05 06 07
- // x1 10 11 12 13 14 15 16 17
- w0 = _mm_unpacklo_epi8(
- *x0, *x1); // 00 10 01 11 02 12 03 13 04 14 05 15 06 16 07 17
-
- // x2 20 21 22 23 24 25 26 27
- // x3 30 31 32 33 34 35 36 37
- w1 = _mm_unpacklo_epi8(
- *x2, *x3); // 20 30 21 31 22 32 23 33 24 34 25 35 26 36 27 37
-
- // x4 40 41 42 43 44 45 46 47
- // x5 50 51 52 53 54 55 56 57
- w2 = _mm_unpacklo_epi8(
- *x4, *x5); // 40 50 41 51 42 52 43 53 44 54 45 55 46 56 47 57
-
- // x6 60 61 62 63 64 65 66 67
- // x7 70 71 72 73 74 75 76 77
- w3 = _mm_unpacklo_epi8(
- *x6, *x7); // 60 70 61 71 62 72 63 73 64 74 65 75 66 76 67 77
-
- w4 = _mm_unpacklo_epi16(
- w0, w1); // 00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33
- w5 = _mm_unpacklo_epi16(
- w2, w3); // 40 50 60 70 41 51 61 71 42 52 62 72 43 53 63 73
-
- *d0d1 = _mm_unpacklo_epi32(
- w4, w5); // 00 10 20 30 40 50 60 70 01 11 21 31 41 51 61 71
- *d2d3 = _mm_unpackhi_epi32(
- w4, w5); // 02 12 22 32 42 52 62 72 03 13 23 33 43 53 63 73
-
- w6 = _mm_unpackhi_epi16(
- w0, w1); // 04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37
- w7 = _mm_unpackhi_epi16(
- w2, w3); // 44 54 64 74 45 55 65 75 46 56 66 76 47 57 67 77
-
- *d4d5 = _mm_unpacklo_epi32(
- w6, w7); // 04 14 24 34 44 54 64 74 05 15 25 35 45 55 65 75
- *d6d7 = _mm_unpackhi_epi32(
- w6, w7); // 06 16 26 36 46 56 66 76 07 17 27 37 47 57 67 77
-}
-
-static INLINE void transpose16x8_8x16_sse2(
- __m128i *x0, __m128i *x1, __m128i *x2, __m128i *x3, __m128i *x4,
- __m128i *x5, __m128i *x6, __m128i *x7, __m128i *x8, __m128i *x9,
- __m128i *x10, __m128i *x11, __m128i *x12, __m128i *x13, __m128i *x14,
- __m128i *x15, __m128i *d0, __m128i *d1, __m128i *d2, __m128i *d3,
- __m128i *d4, __m128i *d5, __m128i *d6, __m128i *d7) {
- __m128i w0, w1, w2, w3, w4, w5, w6, w7, w8, w9;
- __m128i w10, w11, w12, w13, w14, w15;
-
- w0 = _mm_unpacklo_epi8(*x0, *x1);
- w1 = _mm_unpacklo_epi8(*x2, *x3);
- w2 = _mm_unpacklo_epi8(*x4, *x5);
- w3 = _mm_unpacklo_epi8(*x6, *x7);
-
- w8 = _mm_unpacklo_epi8(*x8, *x9);
- w9 = _mm_unpacklo_epi8(*x10, *x11);
- w10 = _mm_unpacklo_epi8(*x12, *x13);
- w11 = _mm_unpacklo_epi8(*x14, *x15);
-
- w4 = _mm_unpacklo_epi16(w0, w1);
- w5 = _mm_unpacklo_epi16(w2, w3);
- w12 = _mm_unpacklo_epi16(w8, w9);
- w13 = _mm_unpacklo_epi16(w10, w11);
-
- w6 = _mm_unpacklo_epi32(w4, w5);
- w7 = _mm_unpackhi_epi32(w4, w5);
- w14 = _mm_unpacklo_epi32(w12, w13);
- w15 = _mm_unpackhi_epi32(w12, w13);
-
- // Store first 4-line result
- *d0 = _mm_unpacklo_epi64(w6, w14);
- *d1 = _mm_unpackhi_epi64(w6, w14);
- *d2 = _mm_unpacklo_epi64(w7, w15);
- *d3 = _mm_unpackhi_epi64(w7, w15);
-
- w4 = _mm_unpackhi_epi16(w0, w1);
- w5 = _mm_unpackhi_epi16(w2, w3);
- w12 = _mm_unpackhi_epi16(w8, w9);
- w13 = _mm_unpackhi_epi16(w10, w11);
-
- w6 = _mm_unpacklo_epi32(w4, w5);
- w7 = _mm_unpackhi_epi32(w4, w5);
- w14 = _mm_unpacklo_epi32(w12, w13);
- w15 = _mm_unpackhi_epi32(w12, w13);
-
- // Store second 4-line result
- *d4 = _mm_unpacklo_epi64(w6, w14);
- *d5 = _mm_unpackhi_epi64(w6, w14);
- *d6 = _mm_unpacklo_epi64(w7, w15);
- *d7 = _mm_unpackhi_epi64(w7, w15);
-}
-
-// this function treats its input as 2 parallel 8x4 matrices, transposes each of
-// them independently while flipping the second matrix horizontaly Used for 14
-// taps filter pq pairs inverse
-static INLINE void transpose_pq_14_inv_sse2(__m128i *x0, __m128i *x1,
- __m128i *x2, __m128i *x3,
- __m128i *x4, __m128i *x5,
- __m128i *x6, __m128i *x7,
- __m128i *pq0, __m128i *pq1,
- __m128i *pq2, __m128i *pq3) {
- __m128i w10, w11, w12, w13;
- __m128i w0, w1, w2, w3, w4, w5;
- __m128i d0, d1, d2, d3;
-
- w0 = _mm_unpacklo_epi8(
- *x0, *x1); // p 00 10 01 11 02 12 03 13 04 14 05 15 06 16 07 17
- w1 = _mm_unpacklo_epi8(
- *x2, *x3); // p 20 30 21 31 22 32 23 33 24 34 25 35 26 36 27 37
- w2 = _mm_unpacklo_epi8(
- *x4, *x5); // p 40 50 41 51 42 52 43 53 44 54 45 55 46 56 47 57
- w3 = _mm_unpacklo_epi8(
- *x6, *x7); // p 60 70 61 71 62 72 63 73 64 74 65 75 66 76 67 77
-
- w4 = _mm_unpacklo_epi16(
- w0, w1); // 00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33
- w5 = _mm_unpacklo_epi16(
- w2, w3); // 40 50 60 70 41 51 61 71 42 52 62 72 43 53 63 73
-
- d0 = _mm_unpacklo_epi32(
- w4, w5); // 00 10 20 30 40 50 60 70 01 11 21 31 41 51 61 71
- d2 = _mm_unpackhi_epi32(
- w4, w5); // 02 12 22 32 42 52 62 72 03 13 23 33 43 53 63 73
-
- w10 = _mm_unpacklo_epi8(
- *x7, *x6); // q xx xx xx xx xx xx xx xx 00 10 01 11 02 12 03 13
- w11 = _mm_unpacklo_epi8(
- *x5, *x4); // q xx xx xx xx xx xx xx xx 20 30 21 31 22 32 23 33
- w12 = _mm_unpacklo_epi8(
- *x3, *x2); // q xx xx xx xx xx xx xx xx 40 50 41 51 42 52 43 53
- w13 = _mm_unpacklo_epi8(
- *x1, *x0); // q xx xx xx xx xx xx xx xx 60 70 61 71 62 72 63 73
-
- w4 = _mm_unpackhi_epi16(
- w10, w11); // 00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33
- w5 = _mm_unpackhi_epi16(
- w12, w13); // 40 50 60 70 41 51 61 71 42 52 62 72 43 53 63 73
-
- d1 = _mm_unpacklo_epi32(
- w4, w5); // 00 10 20 30 40 50 60 70 01 11 21 31 41 51 61 71
- d3 = _mm_unpackhi_epi32(
- w4, w5); // 02 12 22 32 42 52 62 72 03 13 23 33 43 53 63 73
-
- *pq0 = _mm_unpacklo_epi64(d0, d1); // pq
- *pq1 = _mm_unpackhi_epi64(d0, d1); // pq
- *pq2 = _mm_unpacklo_epi64(d2, d3); // pq
- *pq3 = _mm_unpackhi_epi64(d2, d3); // pq
-}
-
-static INLINE void transpose8x16_16x8_sse2(
- __m128i *x0, __m128i *x1, __m128i *x2, __m128i *x3, __m128i *x4,
- __m128i *x5, __m128i *x6, __m128i *x7, __m128i *d0d1, __m128i *d2d3,
- __m128i *d4d5, __m128i *d6d7, __m128i *d8d9, __m128i *d10d11,
- __m128i *d12d13, __m128i *d14d15) {
- __m128i w0, w1, w2, w3, w4, w5, w6, w7, w8, w9;
- __m128i w10, w11, w12, w13, w14, w15;
-
- w0 = _mm_unpacklo_epi8(*x0, *x1);
- w1 = _mm_unpacklo_epi8(*x2, *x3);
- w2 = _mm_unpacklo_epi8(*x4, *x5);
- w3 = _mm_unpacklo_epi8(*x6, *x7);
-
- w8 = _mm_unpackhi_epi8(*x0, *x1);
- w9 = _mm_unpackhi_epi8(*x2, *x3);
- w10 = _mm_unpackhi_epi8(*x4, *x5);
- w11 = _mm_unpackhi_epi8(*x6, *x7);
-
- w4 = _mm_unpacklo_epi16(w0, w1);
- w5 = _mm_unpacklo_epi16(w2, w3);
- w12 = _mm_unpacklo_epi16(w8, w9);
- w13 = _mm_unpacklo_epi16(w10, w11);
-
- w6 = _mm_unpacklo_epi32(w4, w5);
- w7 = _mm_unpackhi_epi32(w4, w5);
- w14 = _mm_unpacklo_epi32(w12, w13);
- w15 = _mm_unpackhi_epi32(w12, w13);
-
- // Store first 4-line result
- *d0d1 = _mm_unpacklo_epi64(w6, w14);
- *d2d3 = _mm_unpackhi_epi64(w6, w14);
- *d4d5 = _mm_unpacklo_epi64(w7, w15);
- *d6d7 = _mm_unpackhi_epi64(w7, w15);
-
- w4 = _mm_unpackhi_epi16(w0, w1);
- w5 = _mm_unpackhi_epi16(w2, w3);
- w12 = _mm_unpackhi_epi16(w8, w9);
- w13 = _mm_unpackhi_epi16(w10, w11);
-
- w6 = _mm_unpacklo_epi32(w4, w5);
- w7 = _mm_unpackhi_epi32(w4, w5);
- w14 = _mm_unpacklo_epi32(w12, w13);
- w15 = _mm_unpackhi_epi32(w12, w13);
-
- // Store second 4-line result
- *d8d9 = _mm_unpacklo_epi64(w6, w14);
- *d10d11 = _mm_unpackhi_epi64(w6, w14);
- *d12d13 = _mm_unpacklo_epi64(w7, w15);
- *d14d15 = _mm_unpackhi_epi64(w7, w15);
-}
-
-// this function treats its input as 2 parallel 8x4 matrices, transposes each of
-// them to 4x8 independently while flipping the second matrix horizontaly. Used
-// for 14 taps pq pairs creation
-static INLINE void transpose_pq_14_sse2(__m128i *x0, __m128i *x1, __m128i *x2,
- __m128i *x3, __m128i *q0p0,
- __m128i *q1p1, __m128i *q2p2,
- __m128i *q3p3, __m128i *q4p4,
- __m128i *q5p5, __m128i *q6p6,
- __m128i *q7p7) {
- __m128i w0, w1, ww0, ww1, w2, w3, ww2, ww3;
- w0 = _mm_unpacklo_epi8(
- *x0, *x1); // 00 10 01 11 02 12 03 13 04 14 05 15 06 16 07 17
- w1 = _mm_unpacklo_epi8(
- *x2, *x3); // 20 30 21 31 22 32 23 33 24 34 25 35 26 36 27 37
- w2 = _mm_unpackhi_epi8(
- *x0, *x1); // 08 18 09 19 010 110 011 111 012 112 013 113 014 114 015 115
- w3 = _mm_unpackhi_epi8(
- *x2, *x3); // 28 38 29 39 210 310 211 311 212 312 213 313 214 314 215 315
-
- ww0 = _mm_unpacklo_epi16(
- w0, w1); // 00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33
- ww1 = _mm_unpackhi_epi16(
- w0, w1); // 04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37
- ww2 = _mm_unpacklo_epi16(
- w2, w3); // 08 18 28 38 09 19 29 39 010 110 210 310 011 111 211 311
- ww3 = _mm_unpackhi_epi16(
- w2,
- w3); // 012 112 212 312 013 113 213 313 014 114 214 314 015 115 215 315
-
- *q7p7 = _mm_unpacklo_epi32(
- ww0,
- _mm_srli_si128(
- ww3, 12)); // 00 10 20 30 015 115 215 315 xx xx xx xx xx xx xx xx
- *q6p6 = _mm_unpackhi_epi32(
- _mm_slli_si128(ww0, 4),
- ww3); // 01 11 21 31 014 114 214 314 xx xx xx xxxx xx xx xx
- *q5p5 = _mm_unpackhi_epi32(
- ww0,
- _mm_slli_si128(
- ww3, 4)); // 02 12 22 32 013 113 213 313 xx xx xx x xx xx xx xxx
- *q4p4 = _mm_unpacklo_epi32(
- _mm_srli_si128(ww0, 12),
- ww3); // 03 13 23 33 012 112 212 312 xx xx xx xx xx xx xx xx
- *q3p3 = _mm_unpacklo_epi32(
- ww1,
- _mm_srli_si128(
- ww2, 12)); // 04 14 24 34 011 111 211 311 xx xx xx xx xx xx xx xx
- *q2p2 = _mm_unpackhi_epi32(
- _mm_slli_si128(ww1, 4),
- ww2); // 05 15 25 35 010 110 210 310 xx xx xx xx xx xx xx xx
- *q1p1 = _mm_unpackhi_epi32(
- ww1,
- _mm_slli_si128(
- ww2, 4)); // 06 16 26 36 09 19 29 39 xx xx xx xx xx xx xx xx
- *q0p0 = _mm_unpacklo_epi32(
- _mm_srli_si128(ww1, 12),
- ww2); // 07 17 27 37 08 18 28 38 xx xx xx xx xx xx xx xx
-}
-
-static AOM_FORCE_INLINE void filter4_sse2(__m128i *p1p0, __m128i *q1q0,
- __m128i *hev, __m128i *mask,
- __m128i *qs1qs0, __m128i *ps1ps0) {
- __m128i filter, filter2filter1, work;
- __m128i ps1ps0_work, qs1qs0_work;
- __m128i hev1;
- const __m128i t3t4 =
- _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 4, 4, 4, 4);
- const __m128i t80 = _mm_set1_epi8(0x80);
- const __m128i ff = _mm_cmpeq_epi8(t80, t80);
-
- ps1ps0_work = _mm_xor_si128(*p1p0, t80); /* ^ 0x80 */
- qs1qs0_work = _mm_xor_si128(*q1q0, t80);
-
- /* int8_t filter = signed_char_clamp(ps1 - qs1) & hev; */
- work = _mm_subs_epi8(ps1ps0_work, qs1qs0_work);
- filter = _mm_and_si128(_mm_srli_si128(work, 4), *hev);
- /* filter = signed_char_clamp(filter + 3 * (qs0 - ps0)) & mask; */
- filter = _mm_subs_epi8(filter, work);
- filter = _mm_subs_epi8(filter, work);
- filter = _mm_subs_epi8(filter, work); /* + 3 * (qs0 - ps0) */
- filter = _mm_and_si128(filter, *mask); /* & mask */
- filter = _mm_unpacklo_epi32(filter, filter);
-
- /* filter1 = signed_char_clamp(filter + 4) >> 3; */
- /* filter2 = signed_char_clamp(filter + 3) >> 3; */
- filter2filter1 = _mm_adds_epi8(filter, t3t4); /* signed_char_clamp */
- filter2filter1 =
- _mm_unpacklo_epi8(filter2filter1, filter2filter1); // goto 16 bit
- filter2filter1 = _mm_srai_epi16(filter2filter1, 11); /* >> 3 */
- filter2filter1 = _mm_packs_epi16(filter2filter1, filter2filter1);
-
- /* filter = ROUND_POWER_OF_TWO(filter1, 1) & ~hev; */
- filter = _mm_subs_epi8(filter2filter1, ff); /* + 1 */
- filter = _mm_unpacklo_epi8(filter, filter); // goto 16 bit
- filter = _mm_srai_epi16(filter, 9); /* round */
- filter = _mm_packs_epi16(filter, filter);
- filter = _mm_andnot_si128(*hev, filter);
- filter = _mm_unpacklo_epi32(filter, filter);
-
- filter2filter1 = _mm_unpacklo_epi32(filter2filter1, filter);
- hev1 = _mm_srli_si128(filter2filter1, 8);
- /* signed_char_clamp(qs1 - filter), signed_char_clamp(qs0 - filter1) */
- qs1qs0_work = _mm_subs_epi8(qs1qs0_work, filter2filter1);
- /* signed_char_clamp(ps1 + filter), signed_char_clamp(ps0 + filter2) */
- ps1ps0_work = _mm_adds_epi8(ps1ps0_work, hev1);
-
- *qs1qs0 = _mm_xor_si128(qs1qs0_work, t80); /* ^ 0x80 */
- *ps1ps0 = _mm_xor_si128(ps1ps0_work, t80); /* ^ 0x80 */
-}
-
-static AOM_FORCE_INLINE void filter4_dual_sse2(__m128i *p1p0, __m128i *q1q0,
- __m128i *hev, __m128i *mask,
- __m128i *qs1qs0,
- __m128i *ps1ps0) {
- const __m128i t3t4 =
- _mm_set_epi8(3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4);
- const __m128i t80 = _mm_set1_epi8(0x80);
- __m128i filter, filter2filter1, work;
- __m128i ps1ps0_work, qs1qs0_work;
- __m128i hev1;
- const __m128i ff = _mm_cmpeq_epi8(t80, t80);
-
- ps1ps0_work = _mm_xor_si128(*p1p0, t80); /* ^ 0x80 */
- qs1qs0_work = _mm_xor_si128(*q1q0, t80);
-
- /* int8_t filter = signed_char_clamp(ps1 - qs1) & hev; */
- work = _mm_subs_epi8(ps1ps0_work, qs1qs0_work);
- filter = _mm_and_si128(_mm_srli_si128(work, 8), *hev);
- /* filter = signed_char_clamp(filter + 3 * (qs0 - ps0)) & mask; */
- filter = _mm_subs_epi8(filter, work);
- filter = _mm_subs_epi8(filter, work);
- filter = _mm_subs_epi8(filter, work); /* + 3 * (qs0 - ps0) */
- filter = _mm_and_si128(filter, *mask); /* & mask */
- filter = _mm_unpacklo_epi64(filter, filter);
-
- /* filter1 = signed_char_clamp(filter + 4) >> 3; */
- /* filter2 = signed_char_clamp(filter + 3) >> 3; */
- filter2filter1 = _mm_adds_epi8(filter, t3t4); /* signed_char_clamp */
- filter = _mm_unpackhi_epi8(filter2filter1, filter2filter1);
- filter2filter1 = _mm_unpacklo_epi8(filter2filter1, filter2filter1);
- filter2filter1 = _mm_srai_epi16(filter2filter1, 11); /* >> 3 */
- filter = _mm_srai_epi16(filter, 11); /* >> 3 */
- filter2filter1 = _mm_packs_epi16(filter2filter1, filter);
-
- /* filter = ROUND_POWER_OF_TWO(filter1, 1) & ~hev; */
- filter = _mm_subs_epi8(filter2filter1, ff); /* + 1 */
- filter = _mm_unpacklo_epi8(filter, filter);
- filter = _mm_srai_epi16(filter, 9); /* round */
- filter = _mm_packs_epi16(filter, filter);
- filter = _mm_andnot_si128(*hev, filter);
-
- hev1 = _mm_unpackhi_epi64(filter2filter1, filter);
- filter2filter1 = _mm_unpacklo_epi64(filter2filter1, filter);
-
- /* signed_char_clamp(qs1 - filter), signed_char_clamp(qs0 - filter1) */
- qs1qs0_work = _mm_subs_epi8(qs1qs0_work, filter2filter1);
- /* signed_char_clamp(ps1 + filter), signed_char_clamp(ps0 + filter2) */
- ps1ps0_work = _mm_adds_epi8(ps1ps0_work, hev1);
- *qs1qs0 = _mm_xor_si128(qs1qs0_work, t80); /* ^ 0x80 */
- *ps1ps0 = _mm_xor_si128(ps1ps0_work, t80); /* ^ 0x80 */
-}
-
-static AOM_FORCE_INLINE void lpf_internal_4_sse2(
- __m128i *p1, __m128i *p0, __m128i *q0, __m128i *q1, __m128i *limit,
- __m128i *thresh, __m128i *q1q0_out, __m128i *p1p0_out) {
- __m128i q1p1, q0p0, p1p0, q1q0;
- __m128i abs_p0q0, abs_p1q1;
- __m128i mask, flat, hev;
- const __m128i zero = _mm_setzero_si128();
-
- q1p1 = _mm_unpacklo_epi32(*p1, *q1);
- q0p0 = _mm_unpacklo_epi32(*p0, *q0);
-
- p1p0 = _mm_unpacklo_epi32(q0p0, q1p1);
- q1q0 = _mm_srli_si128(p1p0, 8);
-
- /* (abs(q1 - q0), abs(p1 - p0) */
- flat = abs_diff(q1p1, q0p0);
- /* abs(p1 - q1), abs(p0 - q0) */
- __m128i abs_p1q1p0q0 = abs_diff(p1p0, q1q0);
-
- /* const uint8_t hev = hev_mask(thresh, *op1, *op0, *oq0, *oq1); */
- flat = _mm_max_epu8(flat, _mm_srli_si128(flat, 4));
- hev = _mm_unpacklo_epi8(flat, zero);
-
- hev = _mm_cmpgt_epi16(hev, *thresh);
- hev = _mm_packs_epi16(hev, hev);
- hev = _mm_unpacklo_epi32(hev, hev);
-
- abs_p0q0 = _mm_adds_epu8(abs_p1q1p0q0, abs_p1q1p0q0); /* abs(p0 - q0) * 2 */
- abs_p1q1 = _mm_srli_si128(abs_p1q1p0q0, 4); /* abs(p1 - q1) */
- abs_p1q1 = _mm_unpacklo_epi8(abs_p1q1, abs_p1q1);
- abs_p1q1 = _mm_srli_epi16(abs_p1q1, 9);
- abs_p1q1 = _mm_packs_epi16(abs_p1q1, abs_p1q1); /* abs(p1 - q1) / 2 */
- /* abs(p0 - q0) * 2 + abs(p1 - q1) / 2 */
-
- mask = _mm_adds_epu8(abs_p0q0, abs_p1q1);
- mask = _mm_unpacklo_epi32(mask, flat);
- mask = _mm_subs_epu8(mask, *limit);
- mask = _mm_cmpeq_epi8(mask, zero);
- mask = _mm_and_si128(mask, _mm_srli_si128(mask, 4));
-
- filter4_sse2(&p1p0, &q1q0, &hev, &mask, q1q0_out, p1p0_out);
-}
-
-static AOM_FORCE_INLINE void lpf_internal_4_dual_sse2(
- __m128i *p1, __m128i *p0, __m128i *q0, __m128i *q1, __m128i *limit,
- __m128i *thresh, __m128i *q1q0_out, __m128i *p1p0_out) {
- __m128i q1p1, q0p0, p1p0, q1q0;
- __m128i abs_p0q0, abs_p1q1;
- __m128i mask, hev;
- const __m128i zero = _mm_setzero_si128();
-
- q1p1 = _mm_unpacklo_epi64(*p1, *q1);
- q0p0 = _mm_unpacklo_epi64(*p0, *q0);
-
- p1p0 = _mm_unpacklo_epi64(q0p0, q1p1);
- q1q0 = _mm_unpackhi_epi64(q0p0, q1p1);
-
- /* (abs(q1 - q0), abs(p1 - p0) */
- __m128i flat = abs_diff(q1p1, q0p0);
- /* abs(p1 - q1), abs(p0 - q0) */
- const __m128i abs_p1q1p0q0 = abs_diff(p1p0, q1q0);
-
- /* const uint8_t hev = hev_mask(thresh, *op1, *op0, *oq0, *oq1); */
- flat = _mm_max_epu8(flat, _mm_srli_si128(flat, 8));
- hev = _mm_unpacklo_epi8(flat, zero);
-
- hev = _mm_cmpgt_epi16(hev, *thresh);
- hev = _mm_packs_epi16(hev, hev);
-
- /* const int8_t mask = filter_mask2(*limit, *blimit, */
- /* p1, p0, q0, q1); */
- abs_p0q0 = _mm_adds_epu8(abs_p1q1p0q0, abs_p1q1p0q0); /* abs(p0 - q0) * 2 */
- abs_p1q1 = _mm_unpackhi_epi8(abs_p1q1p0q0, abs_p1q1p0q0); /* abs(p1 - q1) */
- abs_p1q1 = _mm_srli_epi16(abs_p1q1, 9);
- abs_p1q1 = _mm_packs_epi16(abs_p1q1, abs_p1q1); /* abs(p1 - q1) / 2 */
- /* abs(p0 - q0) * 2 + abs(p1 - q1) / 2 */
- mask = _mm_adds_epu8(abs_p0q0, abs_p1q1);
- mask = _mm_unpacklo_epi64(mask, flat);
- mask = _mm_subs_epu8(mask, *limit);
- mask = _mm_cmpeq_epi8(mask, zero);
- mask = _mm_and_si128(mask, _mm_srli_si128(mask, 8));
-
- filter4_dual_sse2(&p1p0, &q1q0, &hev, &mask, q1q0_out, p1p0_out);
-}
-
-void aom_lpf_horizontal_4_sse2(uint8_t *s, int p /* pitch */,
- const uint8_t *_blimit, const uint8_t *_limit,
- const uint8_t *_thresh) {
- const __m128i zero = _mm_setzero_si128();
- __m128i limit = _mm_unpacklo_epi32(_mm_loadl_epi64((const __m128i *)_blimit),
- _mm_loadl_epi64((const __m128i *)_limit));
- __m128i thresh =
- _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)_thresh), zero);
-
- __m128i qs1qs0, ps1ps0;
- __m128i p1, p0, q0, q1;
-
- p1 = _mm_cvtsi32_si128(*(int *)(s - 2 * p));
- p0 = _mm_cvtsi32_si128(*(int *)(s - 1 * p));
- q0 = _mm_cvtsi32_si128(*(int *)(s + 0 * p));
- q1 = _mm_cvtsi32_si128(*(int *)(s + 1 * p));
-
- lpf_internal_4_sse2(&p1, &p0, &q0, &q1, &limit, &thresh, &qs1qs0, &ps1ps0);
-
- xx_storel_32(s - 1 * p, ps1ps0);
- xx_storel_32(s - 2 * p, _mm_srli_si128(ps1ps0, 4));
- xx_storel_32(s + 0 * p, qs1qs0);
- xx_storel_32(s + 1 * p, _mm_srli_si128(qs1qs0, 4));
-}
-
-void aom_lpf_vertical_4_sse2(uint8_t *s, int p /* pitch */,
- const uint8_t *_blimit, const uint8_t *_limit,
- const uint8_t *_thresh) {
- __m128i p1p0, q1q0;
- __m128i p1, p0, q0, q1;
-
- const __m128i zero = _mm_setzero_si128();
- __m128i limit = _mm_unpacklo_epi32(_mm_loadl_epi64((const __m128i *)_blimit),
- _mm_loadl_epi64((const __m128i *)_limit));
- __m128i thresh =
- _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)_thresh), zero);
-
- __m128i x0, x1, x2, x3;
- __m128i d0, d1, d2, d3;
- x0 = _mm_loadl_epi64((__m128i *)(s - 2 + 0 * p));
- x1 = _mm_loadl_epi64((__m128i *)(s - 2 + 1 * p));
- x2 = _mm_loadl_epi64((__m128i *)(s - 2 + 2 * p));
- x3 = _mm_loadl_epi64((__m128i *)(s - 2 + 3 * p));
-
- transpose4x8_8x4_low_sse2(&x0, &x1, &x2, &x3, &p1, &p0, &q0, &q1);
-
- lpf_internal_4_sse2(&p1, &p0, &q0, &q1, &limit, &thresh, &q1q0, &p1p0);
-
- // Transpose 8x4 to 4x8
- p1 = _mm_srli_si128(p1p0, 4);
- q1 = _mm_srli_si128(q1q0, 4);
-
- transpose4x8_8x4_low_sse2(&p1, &p1p0, &q1q0, &q1, &d0, &d1, &d2, &d3);
-
- xx_storel_32(s + 0 * p - 2, d0);
- xx_storel_32(s + 1 * p - 2, d1);
- xx_storel_32(s + 2 * p - 2, d2);
- xx_storel_32(s + 3 * p - 2, d3);
-}
-
-static INLINE void store_buffer_horz_8(__m128i x, int p, int num, uint8_t *s) {
- xx_storel_32(s - (num + 1) * p, x);
- xx_storel_32(s + num * p, _mm_srli_si128(x, 4));
-}
-
-static AOM_FORCE_INLINE void lpf_internal_14_dual_sse2(
- __m128i *q6p6, __m128i *q5p5, __m128i *q4p4, __m128i *q3p3, __m128i *q2p2,
- __m128i *q1p1, __m128i *q0p0, __m128i *blimit, __m128i *limit,
- __m128i *thresh) {
- const __m128i zero = _mm_setzero_si128();
- const __m128i one = _mm_set1_epi8(1);
- __m128i mask, hev, flat, flat2;
- __m128i qs0ps0, qs1ps1;
- __m128i p1p0, q1q0, qs1qs0, ps1ps0;
- __m128i abs_p1p0;
-
- p1p0 = _mm_unpacklo_epi64(*q0p0, *q1p1);
- q1q0 = _mm_unpackhi_epi64(*q0p0, *q1p1);
-
- {
- __m128i abs_p1q1, abs_p0q0, abs_q1q0;
- __m128i fe, ff, work;
- abs_p1p0 = abs_diff(*q1p1, *q0p0);
- abs_q1q0 = _mm_srli_si128(abs_p1p0, 8);
- fe = _mm_set1_epi8(0xfe);
- ff = _mm_cmpeq_epi8(abs_p1p0, abs_p1p0);
- abs_p0q0 = abs_diff(p1p0, q1q0);
- abs_p1q1 = _mm_srli_si128(abs_p0q0, 8);
- abs_p0q0 = _mm_unpacklo_epi64(abs_p0q0, zero);
-
- flat = _mm_max_epu8(abs_p1p0, abs_q1q0);
- hev = _mm_subs_epu8(flat, *thresh);
- hev = _mm_xor_si128(_mm_cmpeq_epi8(hev, zero), ff);
- // replicate for the further "merged variables" usage
- hev = _mm_unpacklo_epi64(hev, hev);
-
- abs_p0q0 = _mm_adds_epu8(abs_p0q0, abs_p0q0);
- abs_p1q1 = _mm_srli_epi16(_mm_and_si128(abs_p1q1, fe), 1);
- mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), *blimit);
- mask = _mm_xor_si128(_mm_cmpeq_epi8(mask, zero), ff);
- // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1;
- mask = _mm_max_epu8(abs_p1p0, mask);
- // mask |= (abs(p1 - p0) > limit) * -1;
- // mask |= (abs(q1 - q0) > limit) * -1;
-
- work = _mm_max_epu8(abs_diff(*q2p2, *q1p1), abs_diff(*q3p3, *q2p2));
- mask = _mm_max_epu8(work, mask);
- mask = _mm_max_epu8(mask, _mm_srli_si128(mask, 8));
- mask = _mm_subs_epu8(mask, *limit);
- mask = _mm_cmpeq_epi8(mask, zero);
- }
-
- // lp filter - the same for 6, 8 and 14 versions
- filter4_dual_sse2(&p1p0, &q1q0, &hev, &mask, &qs1qs0, &ps1ps0);
- qs0ps0 = _mm_unpacklo_epi64(ps1ps0, qs1qs0);
- qs1ps1 = _mm_unpackhi_epi64(ps1ps0, qs1qs0);
- // loopfilter done
-
- __m128i flat2_q5p5, flat2_q4p4, flat2_q3p3, flat2_q2p2;
- __m128i flat2_q1p1, flat2_q0p0, flat_q2p2, flat_q1p1, flat_q0p0;
-
- __m128i work;
- flat = _mm_max_epu8(abs_diff(*q2p2, *q0p0), abs_diff(*q3p3, *q0p0));
- flat = _mm_max_epu8(abs_p1p0, flat);
- flat = _mm_max_epu8(flat, _mm_srli_si128(flat, 8));
- flat = _mm_subs_epu8(flat, one);
- flat = _mm_cmpeq_epi8(flat, zero);
- flat = _mm_and_si128(flat, mask);
-
- // if flat ==0 then flat2 is zero as well and we don't need any calc below
- // sse4.1 if (0==_mm_test_all_zeros(flat,ff))
- if (0xffff != _mm_movemask_epi8(_mm_cmpeq_epi8(flat, zero))) {
- // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- // flat and wide flat calculations
-
- const __m128i eight = _mm_set1_epi16(8);
- const __m128i four = _mm_set1_epi16(4);
- __m128i p6_16, p5_16, p4_16, p3_16, p2_16, p1_16, p0_16;
- __m128i q6_16, q5_16, q4_16, q3_16, q2_16, q1_16, q0_16;
- __m128i pixelFilter_p, pixelFilter_q;
- __m128i pixetFilter_p2p1p0, pixetFilter_q2q1q0;
- __m128i sum_p6, sum_q6;
- __m128i sum_p3, sum_q3, res_p, res_q;
-
- p6_16 = _mm_unpacklo_epi8(*q6p6, zero);
- p5_16 = _mm_unpacklo_epi8(*q5p5, zero);
- p4_16 = _mm_unpacklo_epi8(*q4p4, zero);
- p3_16 = _mm_unpacklo_epi8(*q3p3, zero);
- p2_16 = _mm_unpacklo_epi8(*q2p2, zero);
- p1_16 = _mm_unpacklo_epi8(*q1p1, zero);
- p0_16 = _mm_unpacklo_epi8(*q0p0, zero);
- q0_16 = _mm_unpackhi_epi8(*q0p0, zero);
- q1_16 = _mm_unpackhi_epi8(*q1p1, zero);
- q2_16 = _mm_unpackhi_epi8(*q2p2, zero);
- q3_16 = _mm_unpackhi_epi8(*q3p3, zero);
- q4_16 = _mm_unpackhi_epi8(*q4p4, zero);
- q5_16 = _mm_unpackhi_epi8(*q5p5, zero);
- q6_16 = _mm_unpackhi_epi8(*q6p6, zero);
- pixelFilter_p = _mm_add_epi16(p5_16, _mm_add_epi16(p4_16, p3_16));
- pixelFilter_q = _mm_add_epi16(q5_16, _mm_add_epi16(q4_16, q3_16));
-
- pixetFilter_p2p1p0 = _mm_add_epi16(p0_16, _mm_add_epi16(p2_16, p1_16));
- pixelFilter_p = _mm_add_epi16(pixelFilter_p, pixetFilter_p2p1p0);
-
- pixetFilter_q2q1q0 = _mm_add_epi16(q0_16, _mm_add_epi16(q2_16, q1_16));
- pixelFilter_q = _mm_add_epi16(pixelFilter_q, pixetFilter_q2q1q0);
- pixelFilter_p =
- _mm_add_epi16(eight, _mm_add_epi16(pixelFilter_p, pixelFilter_q));
- pixetFilter_p2p1p0 = _mm_add_epi16(
- four, _mm_add_epi16(pixetFilter_p2p1p0, pixetFilter_q2q1q0));
- res_p = _mm_srli_epi16(
- _mm_add_epi16(pixelFilter_p,
- _mm_add_epi16(_mm_add_epi16(p6_16, p0_16),
- _mm_add_epi16(p1_16, q0_16))),
- 4);
- res_q = _mm_srli_epi16(
- _mm_add_epi16(pixelFilter_p,
- _mm_add_epi16(_mm_add_epi16(q6_16, q0_16),
- _mm_add_epi16(p0_16, q1_16))),
- 4);
- flat2_q0p0 = _mm_packus_epi16(res_p, res_q);
-
- res_p = _mm_srli_epi16(
- _mm_add_epi16(pixetFilter_p2p1p0, _mm_add_epi16(p3_16, p0_16)), 3);
- res_q = _mm_srli_epi16(
- _mm_add_epi16(pixetFilter_p2p1p0, _mm_add_epi16(q3_16, q0_16)), 3);
-
- flat_q0p0 = _mm_packus_epi16(res_p, res_q);
-
- sum_p6 = _mm_add_epi16(p6_16, p6_16);
- sum_q6 = _mm_add_epi16(q6_16, q6_16);
- sum_p3 = _mm_add_epi16(p3_16, p3_16);
- sum_q3 = _mm_add_epi16(q3_16, q3_16);
-
- pixelFilter_q = _mm_sub_epi16(pixelFilter_p, p5_16);
- pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q5_16);
-
- res_p = _mm_srli_epi16(
- _mm_add_epi16(
- pixelFilter_p,
- _mm_add_epi16(sum_p6,
- _mm_add_epi16(p1_16, _mm_add_epi16(p2_16, p0_16)))),
- 4);
- res_q = _mm_srli_epi16(
- _mm_add_epi16(
- pixelFilter_q,
- _mm_add_epi16(sum_q6,
- _mm_add_epi16(q1_16, _mm_add_epi16(q0_16, q2_16)))),
- 4);
- flat2_q1p1 = _mm_packus_epi16(res_p, res_q);
-
- pixetFilter_q2q1q0 = _mm_sub_epi16(pixetFilter_p2p1p0, p2_16);
- pixetFilter_p2p1p0 = _mm_sub_epi16(pixetFilter_p2p1p0, q2_16);
- res_p = _mm_srli_epi16(
- _mm_add_epi16(pixetFilter_p2p1p0, _mm_add_epi16(sum_p3, p1_16)), 3);
- res_q = _mm_srli_epi16(
- _mm_add_epi16(pixetFilter_q2q1q0, _mm_add_epi16(sum_q3, q1_16)), 3);
- flat_q1p1 = _mm_packus_epi16(res_p, res_q);
-
- pixetFilter_p2p1p0 = _mm_sub_epi16(pixetFilter_p2p1p0, q1_16);
- pixetFilter_q2q1q0 = _mm_sub_epi16(pixetFilter_q2q1q0, p1_16);
-
- sum_p3 = _mm_add_epi16(sum_p3, p3_16);
- sum_q3 = _mm_add_epi16(sum_q3, q3_16);
-
- res_p = _mm_srli_epi16(
- _mm_add_epi16(pixetFilter_p2p1p0, _mm_add_epi16(sum_p3, p2_16)), 3);
- res_q = _mm_srli_epi16(
- _mm_add_epi16(pixetFilter_q2q1q0, _mm_add_epi16(sum_q3, q2_16)), 3);
- flat_q2p2 = _mm_packus_epi16(res_p, res_q);
-
- // work with flat2
- flat2 = _mm_max_epu8(abs_diff(*q4p4, *q0p0), abs_diff(*q5p5, *q0p0));
- work = abs_diff(*q6p6, *q0p0);
- flat2 = _mm_max_epu8(work, flat2);
- flat2 = _mm_max_epu8(flat2, _mm_srli_si128(flat2, 8));
- flat2 = _mm_subs_epu8(flat2, one);
- flat2 = _mm_cmpeq_epi8(flat2, zero);
- flat2 = _mm_and_si128(flat2, flat); // flat2 & flat & mask
-
- // ~~~~~~~~~~ apply flat ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- flat = _mm_unpacklo_epi64(flat, flat);
- *q2p2 = _mm_andnot_si128(flat, *q2p2);
- flat_q2p2 = _mm_and_si128(flat, flat_q2p2);
- *q2p2 = _mm_or_si128(*q2p2, flat_q2p2);
-
- qs1ps1 = _mm_andnot_si128(flat, qs1ps1);
- flat_q1p1 = _mm_and_si128(flat, flat_q1p1);
- *q1p1 = _mm_or_si128(qs1ps1, flat_q1p1);
-
- qs0ps0 = _mm_andnot_si128(flat, qs0ps0);
- flat_q0p0 = _mm_and_si128(flat, flat_q0p0);
- *q0p0 = _mm_or_si128(qs0ps0, flat_q0p0);
-
- if (0xffff != _mm_movemask_epi8(_mm_cmpeq_epi8(flat2, zero))) {
- pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q4_16);
- pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p4_16);
-
- sum_p6 = _mm_add_epi16(sum_p6, p6_16);
- sum_q6 = _mm_add_epi16(sum_q6, q6_16);
-
- res_p = _mm_srli_epi16(
- _mm_add_epi16(
- pixelFilter_p,
- _mm_add_epi16(sum_p6,
- _mm_add_epi16(p2_16, _mm_add_epi16(p3_16, p1_16)))),
- 4);
- res_q = _mm_srli_epi16(
- _mm_add_epi16(
- pixelFilter_q,
- _mm_add_epi16(sum_q6,
- _mm_add_epi16(q2_16, _mm_add_epi16(q1_16, q3_16)))),
- 4);
- flat2_q2p2 = _mm_packus_epi16(res_p, res_q);
-
- sum_p6 = _mm_add_epi16(sum_p6, p6_16);
- sum_q6 = _mm_add_epi16(sum_q6, q6_16);
-
- pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q3_16);
- pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p3_16);
-
- res_p = _mm_srli_epi16(
- _mm_add_epi16(
- pixelFilter_p,
- _mm_add_epi16(sum_p6,
- _mm_add_epi16(p3_16, _mm_add_epi16(p4_16, p2_16)))),
- 4);
- res_q = _mm_srli_epi16(
- _mm_add_epi16(
- pixelFilter_q,
- _mm_add_epi16(sum_q6,
- _mm_add_epi16(q3_16, _mm_add_epi16(q2_16, q4_16)))),
- 4);
- flat2_q3p3 = _mm_packus_epi16(res_p, res_q);
-
- sum_p6 = _mm_add_epi16(sum_p6, p6_16);
- sum_q6 = _mm_add_epi16(sum_q6, q6_16);
-
- pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q2_16);
- pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p2_16);
-
- res_p = _mm_srli_epi16(
- _mm_add_epi16(
- pixelFilter_p,
- _mm_add_epi16(sum_p6,
- _mm_add_epi16(p4_16, _mm_add_epi16(p5_16, p3_16)))),
- 4);
- res_q = _mm_srli_epi16(
- _mm_add_epi16(
- pixelFilter_q,
- _mm_add_epi16(sum_q6,
- _mm_add_epi16(q4_16, _mm_add_epi16(q3_16, q5_16)))),
- 4);
- flat2_q4p4 = _mm_packus_epi16(res_p, res_q);
-
- sum_p6 = _mm_add_epi16(sum_p6, p6_16);
- sum_q6 = _mm_add_epi16(sum_q6, q6_16);
- pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q1_16);
- pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p1_16);
-
- res_p = _mm_srli_epi16(
- _mm_add_epi16(
- pixelFilter_p,
- _mm_add_epi16(sum_p6,
- _mm_add_epi16(p5_16, _mm_add_epi16(p6_16, p4_16)))),
- 4);
- res_q = _mm_srli_epi16(
- _mm_add_epi16(
- pixelFilter_q,
- _mm_add_epi16(sum_q6,
- _mm_add_epi16(q5_16, _mm_add_epi16(q6_16, q4_16)))),
- 4);
- flat2_q5p5 = _mm_packus_epi16(res_p, res_q);
-
- // wide flat
- // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- flat2 = _mm_unpacklo_epi64(flat2, flat2);
-
- *q5p5 = _mm_andnot_si128(flat2, *q5p5);
- flat2_q5p5 = _mm_and_si128(flat2, flat2_q5p5);
- *q5p5 = _mm_or_si128(*q5p5, flat2_q5p5);
-
- *q4p4 = _mm_andnot_si128(flat2, *q4p4);
- flat2_q4p4 = _mm_and_si128(flat2, flat2_q4p4);
- *q4p4 = _mm_or_si128(*q4p4, flat2_q4p4);
-
- *q3p3 = _mm_andnot_si128(flat2, *q3p3);
- flat2_q3p3 = _mm_and_si128(flat2, flat2_q3p3);
- *q3p3 = _mm_or_si128(*q3p3, flat2_q3p3);
-
- *q2p2 = _mm_andnot_si128(flat2, *q2p2);
- flat2_q2p2 = _mm_and_si128(flat2, flat2_q2p2);
- *q2p2 = _mm_or_si128(*q2p2, flat2_q2p2);
-
- *q1p1 = _mm_andnot_si128(flat2, *q1p1);
- flat2_q1p1 = _mm_and_si128(flat2, flat2_q1p1);
- *q1p1 = _mm_or_si128(*q1p1, flat2_q1p1);
-
- *q0p0 = _mm_andnot_si128(flat2, *q0p0);
- flat2_q0p0 = _mm_and_si128(flat2, flat2_q0p0);
- *q0p0 = _mm_or_si128(*q0p0, flat2_q0p0);
- }
- } else {
- *q0p0 = qs0ps0;
- *q1p1 = qs1ps1;
- }
-}
-
-static AOM_FORCE_INLINE void lpf_internal_14_sse2(
- __m128i *q6p6, __m128i *q5p5, __m128i *q4p4, __m128i *q3p3, __m128i *q2p2,
- __m128i *q1p1, __m128i *q0p0, __m128i *blimit, __m128i *limit,
- __m128i *thresh) {
- const __m128i zero = _mm_setzero_si128();
- const __m128i one = _mm_set1_epi8(1);
- __m128i mask, hev, flat, flat2;
- __m128i flat2_pq[6], flat_pq[3];
- __m128i qs0ps0, qs1ps1;
- __m128i p1p0, q1q0, qs1qs0, ps1ps0;
- __m128i abs_p1p0;
-
- p1p0 = _mm_unpacklo_epi32(*q0p0, *q1p1);
- q1q0 = _mm_srli_si128(p1p0, 8);
-
- __m128i fe, ff, work;
- {
- __m128i abs_p1q1, abs_p0q0, abs_q1q0;
- abs_p1p0 = abs_diff(*q1p1, *q0p0);
- abs_q1q0 = _mm_srli_si128(abs_p1p0, 4);
- fe = _mm_set1_epi8(0xfe);
- ff = _mm_cmpeq_epi8(fe, fe);
- abs_p0q0 = abs_diff(p1p0, q1q0);
- abs_p1q1 = _mm_srli_si128(abs_p0q0, 4);
-
- flat = _mm_max_epu8(abs_p1p0, abs_q1q0);
-
- hev = _mm_subs_epu8(flat, *thresh);
- hev = _mm_xor_si128(_mm_cmpeq_epi8(hev, zero), ff);
- // replicate for the further "merged variables" usage
- hev = _mm_unpacklo_epi32(hev, hev);
-
- abs_p0q0 = _mm_adds_epu8(abs_p0q0, abs_p0q0);
- abs_p1q1 = _mm_srli_epi16(_mm_and_si128(abs_p1q1, fe), 1);
- mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), *blimit);
- mask = _mm_unpacklo_epi32(mask, zero);
- mask = _mm_xor_si128(_mm_cmpeq_epi8(mask, zero), ff);
- // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1;
- mask = _mm_max_epu8(abs_p1p0, mask);
- // mask |= (abs(p1 - p0) > limit) * -1;
- // mask |= (abs(q1 - q0) > limit) * -1;
-
- work = _mm_max_epu8(abs_diff(*q2p2, *q1p1), abs_diff(*q3p3, *q2p2));
- mask = _mm_max_epu8(work, mask);
- mask = _mm_max_epu8(mask, _mm_srli_si128(mask, 4));
- mask = _mm_subs_epu8(mask, *limit);
- mask = _mm_cmpeq_epi8(mask, zero);
- }
-
- // lp filter - the same for 6, 8 and 14 versions
- filter4_sse2(&p1p0, &q1q0, &hev, &mask, &qs1qs0, &ps1ps0);
- qs0ps0 = _mm_unpacklo_epi32(ps1ps0, qs1qs0);
- qs1ps1 = _mm_srli_si128(qs0ps0, 8);
- // loopfilter done
-
- flat = _mm_max_epu8(abs_diff(*q2p2, *q0p0), abs_diff(*q3p3, *q0p0));
- flat = _mm_max_epu8(abs_p1p0, flat);
- flat = _mm_max_epu8(flat, _mm_srli_si128(flat, 4));
- flat = _mm_subs_epu8(flat, one);
- flat = _mm_cmpeq_epi8(flat, zero);
- flat = _mm_and_si128(flat, mask);
- flat = _mm_unpacklo_epi32(flat, flat);
- flat = _mm_unpacklo_epi64(flat, flat);
-
- // if flat ==0 then flat2 is zero as well and we don't need any calc below
- // sse4.1 if (0==_mm_test_all_zeros(flat,ff))
- if (0xffff != _mm_movemask_epi8(_mm_cmpeq_epi8(flat, zero))) {
- // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- // flat and wide flat calculations
- __m128i q5_16, q4_16, q3_16, q2_16, q1_16, q0_16;
- __m128i pq_16[7];
- const __m128i eight = _mm_set1_epi16(8);
- const __m128i four = _mm_set1_epi16(4);
- __m128i sum_p6;
- __m128i sum_p3;
-
- pq_16[0] = _mm_unpacklo_epi8(*q0p0, zero);
- pq_16[1] = _mm_unpacklo_epi8(*q1p1, zero);
- pq_16[2] = _mm_unpacklo_epi8(*q2p2, zero);
- pq_16[3] = _mm_unpacklo_epi8(*q3p3, zero);
- pq_16[4] = _mm_unpacklo_epi8(*q4p4, zero);
- pq_16[5] = _mm_unpacklo_epi8(*q5p5, zero);
- pq_16[6] = _mm_unpacklo_epi8(*q6p6, zero);
- q0_16 = _mm_srli_si128(pq_16[0], 8);
- q1_16 = _mm_srli_si128(pq_16[1], 8);
- q2_16 = _mm_srli_si128(pq_16[2], 8);
- q3_16 = _mm_srli_si128(pq_16[3], 8);
- q4_16 = _mm_srli_si128(pq_16[4], 8);
- q5_16 = _mm_srli_si128(pq_16[5], 8);
-
- __m128i flat_p[3], flat_q[3];
- __m128i flat2_p[6], flat2_q[6];
-
- __m128i work0, work0_0, work0_1, sum_p_0;
- __m128i sum_p = _mm_add_epi16(pq_16[5], _mm_add_epi16(pq_16[4], pq_16[3]));
- __m128i sum_lp = _mm_add_epi16(pq_16[0], _mm_add_epi16(pq_16[2], pq_16[1]));
- sum_p = _mm_add_epi16(sum_p, sum_lp);
-
- __m128i sum_lq = _mm_srli_si128(sum_lp, 8);
- __m128i sum_q = _mm_srli_si128(sum_p, 8);
-
- sum_p_0 = _mm_add_epi16(eight, _mm_add_epi16(sum_p, sum_q));
- sum_lp = _mm_add_epi16(four, _mm_add_epi16(sum_lp, sum_lq));
-
- flat_p[0] = _mm_add_epi16(sum_lp, _mm_add_epi16(pq_16[3], pq_16[0]));
- flat_q[0] = _mm_add_epi16(sum_lp, _mm_add_epi16(q3_16, q0_16));
-
- sum_p6 = _mm_add_epi16(pq_16[6], pq_16[6]);
- sum_p3 = _mm_add_epi16(pq_16[3], pq_16[3]);
-
- sum_q = _mm_sub_epi16(sum_p_0, pq_16[5]);
- sum_p = _mm_sub_epi16(sum_p_0, q5_16);
-
- work0_0 = _mm_add_epi16(_mm_add_epi16(pq_16[6], pq_16[0]), pq_16[1]);
- work0_1 = _mm_add_epi16(
- sum_p6, _mm_add_epi16(pq_16[1], _mm_add_epi16(pq_16[2], pq_16[0])));
-
- sum_lq = _mm_sub_epi16(sum_lp, pq_16[2]);
- sum_lp = _mm_sub_epi16(sum_lp, q2_16);
-
- work0 = _mm_add_epi16(sum_p3, pq_16[1]);
- flat_p[1] = _mm_add_epi16(sum_lp, work0);
- flat_q[1] = _mm_add_epi16(sum_lq, _mm_srli_si128(work0, 8));
-
- flat_pq[0] = _mm_srli_epi16(_mm_unpacklo_epi64(flat_p[0], flat_q[0]), 3);
- flat_pq[1] = _mm_srli_epi16(_mm_unpacklo_epi64(flat_p[1], flat_q[1]), 3);
- flat_pq[0] = _mm_packus_epi16(flat_pq[0], flat_pq[0]);
- flat_pq[1] = _mm_packus_epi16(flat_pq[1], flat_pq[1]);
-
- sum_lp = _mm_sub_epi16(sum_lp, q1_16);
- sum_lq = _mm_sub_epi16(sum_lq, pq_16[1]);
-
- sum_p3 = _mm_add_epi16(sum_p3, pq_16[3]);
- work0 = _mm_add_epi16(sum_p3, pq_16[2]);
-
- flat_p[2] = _mm_add_epi16(sum_lp, work0);
- flat_q[2] = _mm_add_epi16(sum_lq, _mm_srli_si128(work0, 8));
- flat_pq[2] = _mm_srli_epi16(_mm_unpacklo_epi64(flat_p[2], flat_q[2]), 3);
- flat_pq[2] = _mm_packus_epi16(flat_pq[2], flat_pq[2]);
-
- // ~~~~~~~~~~~~~~~~~~~~~~~~~~~ flat 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- flat2 = _mm_max_epu8(abs_diff(*q4p4, *q0p0), abs_diff(*q5p5, *q0p0));
-
- work = abs_diff(*q6p6, *q0p0);
- flat2 = _mm_max_epu8(work, flat2);
- flat2 = _mm_max_epu8(flat2, _mm_srli_si128(flat2, 4));
- flat2 = _mm_subs_epu8(flat2, one);
- flat2 = _mm_cmpeq_epi8(flat2, zero);
- flat2 = _mm_and_si128(flat2, flat); // flat2 & flat & mask
- flat2 = _mm_unpacklo_epi32(flat2, flat2);
-
- // ~~~~~~~~~~ apply flat ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- qs0ps0 = _mm_andnot_si128(flat, qs0ps0);
- flat_pq[0] = _mm_and_si128(flat, flat_pq[0]);
- *q0p0 = _mm_or_si128(qs0ps0, flat_pq[0]);
-
- qs1ps1 = _mm_andnot_si128(flat, qs1ps1);
- flat_pq[1] = _mm_and_si128(flat, flat_pq[1]);
- *q1p1 = _mm_or_si128(qs1ps1, flat_pq[1]);
-
- *q2p2 = _mm_andnot_si128(flat, *q2p2);
- flat_pq[2] = _mm_and_si128(flat, flat_pq[2]);
- *q2p2 = _mm_or_si128(*q2p2, flat_pq[2]);
-
- if (0xffff != _mm_movemask_epi8(_mm_cmpeq_epi8(flat2, zero))) {
- flat2_p[0] = _mm_add_epi16(sum_p_0, _mm_add_epi16(work0_0, q0_16));
- flat2_q[0] = _mm_add_epi16(
- sum_p_0, _mm_add_epi16(_mm_srli_si128(work0_0, 8), pq_16[0]));
-
- flat2_p[1] = _mm_add_epi16(sum_p, work0_1);
- flat2_q[1] = _mm_add_epi16(sum_q, _mm_srli_si128(work0_1, 8));
-
- flat2_pq[0] =
- _mm_srli_epi16(_mm_unpacklo_epi64(flat2_p[0], flat2_q[0]), 4);
- flat2_pq[1] =
- _mm_srli_epi16(_mm_unpacklo_epi64(flat2_p[1], flat2_q[1]), 4);
- flat2_pq[0] = _mm_packus_epi16(flat2_pq[0], flat2_pq[0]);
- flat2_pq[1] = _mm_packus_epi16(flat2_pq[1], flat2_pq[1]);
-
- sum_p = _mm_sub_epi16(sum_p, q4_16);
- sum_q = _mm_sub_epi16(sum_q, pq_16[4]);
-
- sum_p6 = _mm_add_epi16(sum_p6, pq_16[6]);
- work0 = _mm_add_epi16(
- sum_p6, _mm_add_epi16(pq_16[2], _mm_add_epi16(pq_16[3], pq_16[1])));
- flat2_p[2] = _mm_add_epi16(sum_p, work0);
- flat2_q[2] = _mm_add_epi16(sum_q, _mm_srli_si128(work0, 8));
- flat2_pq[2] =
- _mm_srli_epi16(_mm_unpacklo_epi64(flat2_p[2], flat2_q[2]), 4);
- flat2_pq[2] = _mm_packus_epi16(flat2_pq[2], flat2_pq[2]);
-
- sum_p6 = _mm_add_epi16(sum_p6, pq_16[6]);
- sum_p = _mm_sub_epi16(sum_p, q3_16);
- sum_q = _mm_sub_epi16(sum_q, pq_16[3]);
-
- work0 = _mm_add_epi16(
- sum_p6, _mm_add_epi16(pq_16[3], _mm_add_epi16(pq_16[4], pq_16[2])));
- flat2_p[3] = _mm_add_epi16(sum_p, work0);
- flat2_q[3] = _mm_add_epi16(sum_q, _mm_srli_si128(work0, 8));
- flat2_pq[3] =
- _mm_srli_epi16(_mm_unpacklo_epi64(flat2_p[3], flat2_q[3]), 4);
- flat2_pq[3] = _mm_packus_epi16(flat2_pq[3], flat2_pq[3]);
-
- sum_p6 = _mm_add_epi16(sum_p6, pq_16[6]);
- sum_p = _mm_sub_epi16(sum_p, q2_16);
- sum_q = _mm_sub_epi16(sum_q, pq_16[2]);
-
- work0 = _mm_add_epi16(
- sum_p6, _mm_add_epi16(pq_16[4], _mm_add_epi16(pq_16[5], pq_16[3])));
- flat2_p[4] = _mm_add_epi16(sum_p, work0);
- flat2_q[4] = _mm_add_epi16(sum_q, _mm_srli_si128(work0, 8));
- flat2_pq[4] =
- _mm_srli_epi16(_mm_unpacklo_epi64(flat2_p[4], flat2_q[4]), 4);
- flat2_pq[4] = _mm_packus_epi16(flat2_pq[4], flat2_pq[4]);
-
- sum_p6 = _mm_add_epi16(sum_p6, pq_16[6]);
- sum_p = _mm_sub_epi16(sum_p, q1_16);
- sum_q = _mm_sub_epi16(sum_q, pq_16[1]);
-
- work0 = _mm_add_epi16(
- sum_p6, _mm_add_epi16(pq_16[5], _mm_add_epi16(pq_16[6], pq_16[4])));
- flat2_p[5] = _mm_add_epi16(sum_p, work0);
- flat2_q[5] = _mm_add_epi16(sum_q, _mm_srli_si128(work0, 8));
- flat2_pq[5] =
- _mm_srli_epi16(_mm_unpacklo_epi64(flat2_p[5], flat2_q[5]), 4);
- flat2_pq[5] = _mm_packus_epi16(flat2_pq[5], flat2_pq[5]);
-
- // wide flat
- // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
- *q0p0 = _mm_andnot_si128(flat2, *q0p0);
- flat2_pq[0] = _mm_and_si128(flat2, flat2_pq[0]);
- *q0p0 = _mm_or_si128(*q0p0, flat2_pq[0]);
-
- *q1p1 = _mm_andnot_si128(flat2, *q1p1);
- flat2_pq[1] = _mm_and_si128(flat2, flat2_pq[1]);
- *q1p1 = _mm_or_si128(*q1p1, flat2_pq[1]);
-
- *q2p2 = _mm_andnot_si128(flat2, *q2p2);
- flat2_pq[2] = _mm_and_si128(flat2, flat2_pq[2]);
- *q2p2 = _mm_or_si128(*q2p2, flat2_pq[2]);
-
- *q3p3 = _mm_andnot_si128(flat2, *q3p3);
- flat2_pq[3] = _mm_and_si128(flat2, flat2_pq[3]);
- *q3p3 = _mm_or_si128(*q3p3, flat2_pq[3]);
-
- *q4p4 = _mm_andnot_si128(flat2, *q4p4);
- flat2_pq[4] = _mm_and_si128(flat2, flat2_pq[4]);
- *q4p4 = _mm_or_si128(*q4p4, flat2_pq[4]);
-
- *q5p5 = _mm_andnot_si128(flat2, *q5p5);
- flat2_pq[5] = _mm_and_si128(flat2, flat2_pq[5]);
- *q5p5 = _mm_or_si128(*q5p5, flat2_pq[5]);
- }
- } else {
- *q0p0 = qs0ps0;
- *q1p1 = qs1ps1;
- }
-}
-
-void aom_lpf_horizontal_14_sse2(unsigned char *s, int p,
- const unsigned char *_blimit,
- const unsigned char *_limit,
- const unsigned char *_thresh) {
- __m128i q6p6, q5p5, q4p4, q3p3, q2p2, q1p1, q0p0;
- __m128i blimit = _mm_load_si128((const __m128i *)_blimit);
- __m128i limit = _mm_load_si128((const __m128i *)_limit);
- __m128i thresh = _mm_load_si128((const __m128i *)_thresh);
-
- q4p4 = _mm_unpacklo_epi32(_mm_cvtsi32_si128(*(int *)(s - 5 * p)),
- _mm_cvtsi32_si128(*(int *)(s + 4 * p)));
- q3p3 = _mm_unpacklo_epi32(_mm_cvtsi32_si128(*(int *)(s - 4 * p)),
- _mm_cvtsi32_si128(*(int *)(s + 3 * p)));
- q2p2 = _mm_unpacklo_epi32(_mm_cvtsi32_si128(*(int *)(s - 3 * p)),
- _mm_cvtsi32_si128(*(int *)(s + 2 * p)));
- q1p1 = _mm_unpacklo_epi32(_mm_cvtsi32_si128(*(int *)(s - 2 * p)),
- _mm_cvtsi32_si128(*(int *)(s + 1 * p)));
-
- q0p0 = _mm_unpacklo_epi32(_mm_cvtsi32_si128(*(int *)(s - 1 * p)),
- _mm_cvtsi32_si128(*(int *)(s - 0 * p)));
-
- q5p5 = _mm_unpacklo_epi32(_mm_cvtsi32_si128(*(int *)(s - 6 * p)),
- _mm_cvtsi32_si128(*(int *)(s + 5 * p)));
-
- q6p6 = _mm_unpacklo_epi32(_mm_cvtsi32_si128(*(int *)(s - 7 * p)),
- _mm_cvtsi32_si128(*(int *)(s + 6 * p)));
-
- lpf_internal_14_sse2(&q6p6, &q5p5, &q4p4, &q3p3, &q2p2, &q1p1, &q0p0, &blimit,
- &limit, &thresh);
-
- store_buffer_horz_8(q0p0, p, 0, s);
- store_buffer_horz_8(q1p1, p, 1, s);
- store_buffer_horz_8(q2p2, p, 2, s);
- store_buffer_horz_8(q3p3, p, 3, s);
- store_buffer_horz_8(q4p4, p, 4, s);
- store_buffer_horz_8(q5p5, p, 5, s);
-}
-
-static AOM_FORCE_INLINE void lpf_internal_6_dual_sse2(
- __m128i *p2, __m128i *q2, __m128i *p1, __m128i *q1, __m128i *p0,
- __m128i *q0, __m128i *q1q0, __m128i *p1p0, __m128i *blimit, __m128i *limit,
- __m128i *thresh) {
- const __m128i zero = _mm_setzero_si128();
- __m128i mask, hev, flat;
- __m128i q2p2, q1p1, q0p0, flat_p1p0, flat_q0q1;
- __m128i p2_16, q2_16, p1_16, q1_16, p0_16, q0_16;
- __m128i ps1ps0, qs1qs0;
-
- q2p2 = _mm_unpacklo_epi64(*p2, *q2);
- q1p1 = _mm_unpacklo_epi64(*p1, *q1);
- q0p0 = _mm_unpacklo_epi64(*p0, *q0);
-
- *p1p0 = _mm_unpacklo_epi64(q0p0, q1p1);
- *q1q0 = _mm_unpackhi_epi64(q0p0, q1p1);
-
- const __m128i one = _mm_set1_epi8(1);
- const __m128i fe = _mm_set1_epi8(0xfe);
- const __m128i ff = _mm_cmpeq_epi8(fe, fe);
-
- {
- // filter_mask and hev_mask
- __m128i abs_p1q1, abs_p0q0, abs_q1q0, abs_p1p0, work;
- abs_p1p0 = abs_diff(q1p1, q0p0);
- abs_q1q0 = _mm_srli_si128(abs_p1p0, 8);
-
- abs_p0q0 = abs_diff(*p1p0, *q1q0);
- abs_p1q1 = _mm_srli_si128(abs_p0q0, 8);
- abs_p0q0 = _mm_unpacklo_epi64(abs_p0q0, zero);
-
- // considering sse doesn't have unsigned elements comparison the idea is
- // to find at least one case when X > limit, it means the corresponding
- // mask bit is set.
- // to achieve that we find global max value of all inputs of abs(x-y) or
- // (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 If it is > limit the mask is set
- // otherwise - not
-
- flat = _mm_max_epu8(abs_p1p0, abs_q1q0);
- hev = _mm_subs_epu8(flat, *thresh);
- hev = _mm_xor_si128(_mm_cmpeq_epi8(hev, zero), ff);
- // replicate for the further "merged variables" usage
- hev = _mm_unpacklo_epi64(hev, hev);
-
- abs_p0q0 = _mm_adds_epu8(abs_p0q0, abs_p0q0);
- abs_p1q1 = _mm_srli_epi16(_mm_and_si128(abs_p1q1, fe), 1);
- mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), *blimit);
- mask = _mm_xor_si128(_mm_cmpeq_epi8(mask, zero), ff);
- // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1;
- mask = _mm_max_epu8(abs_p1p0, mask);
- // mask |= (abs(p1 - p0) > limit) * -1;
- // mask |= (abs(q1 - q0) > limit) * -1;
-
- work = abs_diff(q2p2, q1p1);
- mask = _mm_max_epu8(work, mask);
- mask = _mm_max_epu8(mask, _mm_srli_si128(mask, 8));
- mask = _mm_subs_epu8(mask, *limit);
- mask = _mm_cmpeq_epi8(mask, zero);
-
- // lp filter - the same for 6, 8 and 14 versions
- filter4_dual_sse2(p1p0, q1q0, &hev, &mask, q1q0, p1p0);
-
- // flat_mask
- flat = _mm_max_epu8(abs_diff(q2p2, q0p0), abs_p1p0);
- flat = _mm_max_epu8(flat, _mm_srli_si128(flat, 8));
- flat = _mm_subs_epu8(flat, one);
- flat = _mm_cmpeq_epi8(flat, zero);
- flat = _mm_and_si128(flat, mask);
- // replicate for the further "merged variables" usage
- flat = _mm_unpacklo_epi64(flat, flat);
- }
-
- // 5 tap filter
- // need it only if flat !=0
- if (0xffff != _mm_movemask_epi8(_mm_cmpeq_epi8(flat, zero))) {
- const __m128i four = _mm_set1_epi16(4);
- __m128i workp_a, workp_b, workp_shft0, workp_shft1;
- p2_16 = _mm_unpacklo_epi8(*p2, zero);
- p1_16 = _mm_unpacklo_epi8(*p1, zero);
- p0_16 = _mm_unpacklo_epi8(*p0, zero);
- q0_16 = _mm_unpacklo_epi8(*q0, zero);
- q1_16 = _mm_unpacklo_epi8(*q1, zero);
- q2_16 = _mm_unpacklo_epi8(*q2, zero);
-
- // op1
- workp_a = _mm_add_epi16(_mm_add_epi16(p0_16, p0_16),
- _mm_add_epi16(p1_16, p1_16)); // p0 *2 + p1 * 2
- workp_a = _mm_add_epi16(_mm_add_epi16(workp_a, four),
- p2_16); // p2 + p0 * 2 + p1 * 2 + 4
-
- workp_b = _mm_add_epi16(_mm_add_epi16(p2_16, p2_16), q0_16);
- workp_shft0 = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b),
- 3); // p2 * 3 + p1 * 2 + p0 * 2 + q0 + 4
-
- // op0
- workp_b = _mm_add_epi16(_mm_add_epi16(q0_16, q0_16), q1_16); // q0 * 2 + q1
- workp_a = _mm_add_epi16(workp_a,
- workp_b); // p2 + p0 * 2 + p1 * 2 + q0 * 2 + q1 + 4
- workp_shft1 = _mm_srli_epi16(workp_a, 3);
-
- flat_p1p0 = _mm_packus_epi16(workp_shft1, workp_shft0);
-
- // oq0
- workp_a = _mm_sub_epi16(_mm_sub_epi16(workp_a, p2_16),
- p1_16); // p0 * 2 + p1 + q0 * 2 + q1 + 4
- workp_b = _mm_add_epi16(q1_16, q2_16);
- workp_a = _mm_add_epi16(
- workp_a, workp_b); // p0 * 2 + p1 + q0 * 2 + q1 * 2 + q2 + 4
- workp_shft0 = _mm_srli_epi16(workp_a, 3);
-
- // oq1
- workp_a = _mm_sub_epi16(_mm_sub_epi16(workp_a, p1_16),
- p0_16); // p0 + q0 * 2 + q1 * 2 + q2 + 4
- workp_b = _mm_add_epi16(q2_16, q2_16);
- workp_shft1 = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b),
- 3); // p0 + q0 * 2 + q1 * 2 + q2 * 3 + 4
-
- flat_q0q1 = _mm_packus_epi16(workp_shft0, workp_shft1);
-
- qs1qs0 = _mm_andnot_si128(flat, *q1q0);
- *q1q0 = _mm_and_si128(flat, flat_q0q1);
- *q1q0 = _mm_or_si128(qs1qs0, *q1q0);
-
- ps1ps0 = _mm_andnot_si128(flat, *p1p0);
- *p1p0 = _mm_and_si128(flat, flat_p1p0);
- *p1p0 = _mm_or_si128(ps1ps0, *p1p0);
- }
-}
-
-static AOM_FORCE_INLINE void lpf_internal_6_sse2(
- __m128i *p2, __m128i *q2, __m128i *p1, __m128i *q1, __m128i *p0,
- __m128i *q0, __m128i *q1q0, __m128i *p1p0, __m128i *blimit, __m128i *limit,
- __m128i *thresh) {
- const __m128i zero = _mm_setzero_si128();
- __m128i mask, hev, flat;
- __m128i q2p2, q1p1, q0p0, flat_p1p0, flat_q0q1;
- __m128i pq2_16, q2_16, pq1_16, pq0_16, q0_16;
- __m128i ps1ps0, qs1qs0;
-
- q2p2 = _mm_unpacklo_epi32(*p2, *q2);
- q1p1 = _mm_unpacklo_epi32(*p1, *q1);
- q0p0 = _mm_unpacklo_epi32(*p0, *q0);
-
- *p1p0 = _mm_unpacklo_epi32(*p0, *p1);
- *q1q0 = _mm_unpacklo_epi32(*q0, *q1);
-
- const __m128i one = _mm_set1_epi8(1);
- const __m128i fe = _mm_set1_epi8(0xfe);
- const __m128i ff = _mm_cmpeq_epi8(fe, fe);
- {
- // filter_mask and hev_mask
- __m128i abs_p1q1, abs_p0q0, abs_q1q0, abs_p1p0, work;
- abs_p1p0 = abs_diff(q1p1, q0p0);
- abs_q1q0 = _mm_srli_si128(abs_p1p0, 4);
-
- abs_p0q0 = abs_diff(*p1p0, *q1q0);
- abs_p1q1 = _mm_srli_si128(abs_p0q0, 4);
-
- // considering sse doesn't have unsigned elements comparison the idea is
- // to find at least one case when X > limit, it means the corresponding
- // mask bit is set.
- // to achieve that we find global max value of all inputs of abs(x-y) or
- // (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 If it is > limit the mask is set
- // otherwise - not
-
- flat = _mm_max_epu8(abs_p1p0, abs_q1q0);
- hev = _mm_subs_epu8(flat, *thresh);
- hev = _mm_xor_si128(_mm_cmpeq_epi8(hev, zero), ff);
- // replicate for the further "merged variables" usage
- hev = _mm_unpacklo_epi32(hev, hev);
-
- abs_p0q0 = _mm_adds_epu8(abs_p0q0, abs_p0q0);
- abs_p1q1 = _mm_srli_epi16(_mm_and_si128(abs_p1q1, fe), 1);
- mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), *blimit);
- mask = _mm_unpacklo_epi32(mask, zero);
- mask = _mm_xor_si128(_mm_cmpeq_epi8(mask, zero), ff);
- // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1;
- mask = _mm_max_epu8(abs_p1p0, mask);
- // mask |= (abs(p1 - p0) > limit) * -1;
- // mask |= (abs(q1 - q0) > limit) * -1;
-
- work = abs_diff(q2p2, q1p1);
- mask = _mm_max_epu8(work, mask);
- mask = _mm_max_epu8(mask, _mm_srli_si128(mask, 4));
- mask = _mm_subs_epu8(mask, *limit);
- mask = _mm_cmpeq_epi8(mask, zero);
-
- // lp filter - the same for 6, 8 and 14 versions
- filter4_sse2(p1p0, q1q0, &hev, &mask, q1q0, p1p0);
-
- // flat_mask
- flat = _mm_max_epu8(abs_diff(q2p2, q0p0), abs_p1p0);
- flat = _mm_max_epu8(flat, _mm_srli_si128(flat, 4));
- flat = _mm_subs_epu8(flat, one);
- flat = _mm_cmpeq_epi8(flat, zero);
- flat = _mm_and_si128(flat, mask);
- // replicate for the further "merged variables" usage
- flat = _mm_unpacklo_epi32(flat, flat);
- flat = _mm_unpacklo_epi64(flat, flat);
- }
-
- // 5 tap filter
- // need it only if flat !=0
- if (0xffff != _mm_movemask_epi8(_mm_cmpeq_epi8(flat, zero))) {
- const __m128i four = _mm_set1_epi16(4);
- __m128i workp_a, workp_b, workp_c;
- __m128i pq0x2_pq1, pq1_pq2;
- pq2_16 = _mm_unpacklo_epi8(q2p2, zero);
- pq1_16 = _mm_unpacklo_epi8(q1p1, zero);
- pq0_16 = _mm_unpacklo_epi8(q0p0, zero);
- q0_16 = _mm_srli_si128(pq0_16, 8);
- q2_16 = _mm_srli_si128(pq2_16, 8);
-
- // op1
- pq0x2_pq1 =
- _mm_add_epi16(_mm_add_epi16(pq0_16, pq0_16), pq1_16); // p0 *2 + p1
- pq1_pq2 = _mm_add_epi16(pq1_16, pq2_16); // p1 + p2
- workp_a = _mm_add_epi16(_mm_add_epi16(pq0x2_pq1, four),
- pq1_pq2); // p2 + p0 * 2 + p1 * 2 + 4
-
- workp_b = _mm_add_epi16(_mm_add_epi16(pq2_16, pq2_16), q0_16);
- workp_b =
- _mm_add_epi16(workp_a, workp_b); // p2 * 3 + p1 * 2 + p0 * 2 + q0 + 4
-
- // op0
- workp_c = _mm_srli_si128(pq0x2_pq1, 8); // q0 * 2 + q1
- workp_a = _mm_add_epi16(workp_a,
- workp_c); // p2 + p0 * 2 + p1 * 2 + q0 * 2 + q1 + 4
- workp_b = _mm_unpacklo_epi64(workp_a, workp_b);
- workp_b = _mm_srli_epi16(workp_b, 3);
-
- flat_p1p0 = _mm_packus_epi16(workp_b, workp_b);
-
- // oq0
- workp_a = _mm_sub_epi16(_mm_sub_epi16(workp_a, pq2_16),
- pq1_16); // p0 * 2 + p1 + q0 * 2 + q1 + 4
- workp_b = _mm_srli_si128(pq1_pq2, 8);
- workp_a = _mm_add_epi16(
- workp_a, workp_b); // p0 * 2 + p1 + q0 * 2 + q1 * 2 + q2 + 4
- // workp_shft0 = _mm_srli_epi16(workp_a, 3);
-
- // oq1
- workp_c = _mm_sub_epi16(_mm_sub_epi16(workp_a, pq1_16),
- pq0_16); // p0 + q0 * 2 + q1 * 2 + q2 + 4
- workp_b = _mm_add_epi16(q2_16, q2_16);
- workp_b =
- _mm_add_epi16(workp_c, workp_b); // p0 + q0 * 2 + q1 * 2 + q2 * 3 + 4
-
- workp_a = _mm_unpacklo_epi64(workp_a, workp_b);
- workp_a = _mm_srli_epi16(workp_a, 3);
-
- flat_q0q1 = _mm_packus_epi16(workp_a, workp_a);
-
- qs1qs0 = _mm_andnot_si128(flat, *q1q0);
- *q1q0 = _mm_and_si128(flat, flat_q0q1);
- *q1q0 = _mm_or_si128(qs1qs0, *q1q0);
-
- ps1ps0 = _mm_andnot_si128(flat, *p1p0);
- *p1p0 = _mm_and_si128(flat, flat_p1p0);
- *p1p0 = _mm_or_si128(ps1ps0, *p1p0);
- }
-}
-
-void aom_lpf_horizontal_6_sse2(unsigned char *s, int p,
- const unsigned char *_blimit,
- const unsigned char *_limit,
- const unsigned char *_thresh) {
- __m128i p2, p1, p0, q0, q1, q2;
- __m128i p1p0, q1q0;
- __m128i blimit = _mm_load_si128((__m128i *)_blimit);
- __m128i limit = _mm_load_si128((__m128i *)_limit);
- __m128i thresh = _mm_load_si128((__m128i *)_thresh);
-
- p2 = _mm_cvtsi32_si128(*(int *)(s - 3 * p));
- p1 = _mm_cvtsi32_si128(*(int *)(s - 2 * p));
- p0 = _mm_cvtsi32_si128(*(int *)(s - 1 * p));
- q0 = _mm_cvtsi32_si128(*(int *)(s - 0 * p));
- q1 = _mm_cvtsi32_si128(*(int *)(s + 1 * p));
- q2 = _mm_cvtsi32_si128(*(int *)(s + 2 * p));
-
- lpf_internal_6_sse2(&p2, &q2, &p1, &q1, &p0, &q0, &q1q0, &p1p0, &blimit,
- &limit, &thresh);
-
- xx_storel_32(s - 1 * p, p1p0);
- xx_storel_32(s - 2 * p, _mm_srli_si128(p1p0, 4));
- xx_storel_32(s + 0 * p, q1q0);
- xx_storel_32(s + 1 * p, _mm_srli_si128(q1q0, 4));
-}
-
-void aom_lpf_horizontal_6_dual_sse2(unsigned char *s, int p,
- const unsigned char *_blimit0,
- const unsigned char *_limit0,
- const unsigned char *_thresh0,
- const unsigned char *_blimit1,
- const unsigned char *_limit1,
- const unsigned char *_thresh1) {
- __m128i blimit = _mm_unpacklo_epi32(_mm_load_si128((__m128i *)_blimit0),
- _mm_load_si128((__m128i *)_blimit1));
- __m128i limit = _mm_unpacklo_epi32(_mm_load_si128((__m128i *)_limit0),
- _mm_load_si128((__m128i *)_limit1));
- __m128i thresh = _mm_unpacklo_epi32(_mm_load_si128((__m128i *)_thresh0),
- _mm_load_si128((__m128i *)_thresh1));
-
- __m128i p2, p1, p0, q0, q1, q2;
- __m128i p1p0, q1q0;
-
- p2 = _mm_loadl_epi64((__m128i *)(s - 3 * p));
- p1 = _mm_loadl_epi64((__m128i *)(s - 2 * p));
- p0 = _mm_loadl_epi64((__m128i *)(s - 1 * p));
- q0 = _mm_loadl_epi64((__m128i *)(s - 0 * p));
- q1 = _mm_loadl_epi64((__m128i *)(s + 1 * p));
- q2 = _mm_loadl_epi64((__m128i *)(s + 2 * p));
-
- lpf_internal_6_dual_sse2(&p2, &q2, &p1, &q1, &p0, &q0, &q1q0, &p1p0, &blimit,
- &limit, &thresh);
-
- _mm_storel_epi64((__m128i *)(s - 1 * p), p1p0);
- _mm_storel_epi64((__m128i *)(s - 2 * p), _mm_srli_si128(p1p0, 8));
- _mm_storel_epi64((__m128i *)(s + 0 * p), q1q0);
- _mm_storel_epi64((__m128i *)(s + 1 * p), _mm_srli_si128(q1q0, 8));
-}
-
-static AOM_FORCE_INLINE void lpf_internal_8_sse2(
- __m128i *p3, __m128i *q3, __m128i *p2, __m128i *q2, __m128i *p1,
- __m128i *q1, __m128i *p0, __m128i *q0, __m128i *q1q0_out, __m128i *p1p0_out,
- __m128i *blimit, __m128i *limit, __m128i *thresh) {
- const __m128i zero = _mm_setzero_si128();
- __m128i mask, hev, flat;
- __m128i p2_16, q2_16, p1_16, p0_16, q0_16, q1_16, p3_16, q3_16, q3p3,
- flat_p1p0, flat_q0q1;
- __m128i q2p2, q1p1, q0p0;
- __m128i q1q0, p1p0, ps1ps0, qs1qs0;
- __m128i work_pq, opq2, pq2;
-
- q3p3 = _mm_unpacklo_epi32(*p3, *q3);
- q2p2 = _mm_unpacklo_epi32(*p2, *q2);
- q1p1 = _mm_unpacklo_epi32(*p1, *q1);
- q0p0 = _mm_unpacklo_epi32(*p0, *q0);
-
- p1p0 = _mm_unpacklo_epi32(q0p0, q1p1); // p1p0 q1q0
- q1q0 = _mm_srli_si128(p1p0, 8);
-
- // filter_mask and hev_mask
-
- // considering sse doesn't have unsigned elements comparison the idea is to
- // find at least one case when X > limit, it means the corresponding mask
- // bit is set.
- // to achieve that we find global max value of all inputs of abs(x-y) or
- // (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 If it is > limit the mask is set
- // otherwise - not
-
- const __m128i one = _mm_set1_epi8(1);
- const __m128i fe = _mm_set1_epi8(0xfe);
- const __m128i ff = _mm_cmpeq_epi8(fe, fe);
- __m128i abs_p1q1, abs_p0q0, abs_q1q0, abs_p1p0, work;
-
- abs_p1p0 = abs_diff(q1p1, q0p0);
- abs_q1q0 = _mm_srli_si128(abs_p1p0, 4);
-
- abs_p0q0 = abs_diff(p1p0, q1q0);
- abs_p1q1 = _mm_srli_si128(abs_p0q0, 4);
-
- flat = _mm_max_epu8(abs_p1p0, abs_q1q0);
- hev = _mm_subs_epu8(flat, *thresh);
- hev = _mm_xor_si128(_mm_cmpeq_epi8(hev, zero), ff);
- // replicate for the further "merged variables" usage
- hev = _mm_unpacklo_epi32(hev, hev);
-
- abs_p0q0 = _mm_adds_epu8(abs_p0q0, abs_p0q0);
- abs_p1q1 = _mm_srli_epi16(_mm_and_si128(abs_p1q1, fe), 1);
- mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), *blimit);
- mask = _mm_unpacklo_epi32(mask, zero);
- mask = _mm_xor_si128(_mm_cmpeq_epi8(mask, zero), ff);
- // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1;
- mask = _mm_max_epu8(abs_p1p0, mask);
- // mask |= (abs(p1 - p0) > limit) * -1;
- // mask |= (abs(q1 - q0) > limit) * -1;
-
- work = _mm_max_epu8(abs_diff(q2p2, q1p1), abs_diff(q3p3, q2p2));
-
- mask = _mm_max_epu8(work, mask);
- mask = _mm_max_epu8(mask, _mm_srli_si128(mask, 4));
- mask = _mm_subs_epu8(mask, *limit);
- mask = _mm_cmpeq_epi8(mask, zero);
-
- // lp filter - the same for 6, 8 and 14 versions
- filter4_sse2(&p1p0, &q1q0, &hev, &mask, q1q0_out, p1p0_out);
-
- // flat_mask4
- flat = _mm_max_epu8(abs_diff(q2p2, q0p0), abs_diff(q3p3, q0p0));
- flat = _mm_max_epu8(abs_p1p0, flat);
-
- flat = _mm_max_epu8(flat, _mm_srli_si128(flat, 4));
- flat = _mm_subs_epu8(flat, one);
- flat = _mm_cmpeq_epi8(flat, zero);
- flat = _mm_and_si128(flat, mask);
- // replicate for the further "merged variables" usage
- flat = _mm_unpacklo_epi32(flat, flat);
- flat = _mm_unpacklo_epi64(flat, flat);
-
- // filter8 need it only if flat !=0
- if (0xffff != _mm_movemask_epi8(_mm_cmpeq_epi8(flat, zero))) {
- const __m128i four = _mm_set1_epi16(4);
- __m128i workp_a, workp_b, workp_c, workp_d, workp_shft1, workp_shft2;
- p2_16 = _mm_unpacklo_epi8(*p2, zero);
- p1_16 = _mm_unpacklo_epi8(*p1, zero);
- p0_16 = _mm_unpacklo_epi8(*p0, zero);
- q0_16 = _mm_unpacklo_epi8(*q0, zero);
- q1_16 = _mm_unpacklo_epi8(*q1, zero);
- q2_16 = _mm_unpacklo_epi8(*q2, zero);
- p3_16 = _mm_unpacklo_epi8(*p3, zero);
- q3_16 = _mm_unpacklo_epi8(*q3, zero);
-
- // op2
- workp_a =
- _mm_add_epi16(_mm_add_epi16(p3_16, p3_16), _mm_add_epi16(p2_16, p1_16));
- workp_a = _mm_add_epi16(_mm_add_epi16(workp_a, four), p0_16);
- workp_b = _mm_add_epi16(_mm_add_epi16(q0_16, p2_16), p3_16);
- workp_shft2 = _mm_add_epi16(workp_a, workp_b);
-
- // op1
- workp_b = _mm_add_epi16(_mm_add_epi16(q0_16, q1_16), p1_16);
- workp_c = _mm_add_epi16(workp_a, workp_b);
- // workp_shft0 = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3);
-
- // op0
- workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p3_16), q2_16);
- workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p1_16), p0_16);
- workp_d = _mm_add_epi16(workp_a, workp_b);
- // workp_shft1 = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3);
-
- workp_c = _mm_unpacklo_epi64(workp_d, workp_c);
- workp_c = _mm_srli_epi16(workp_c, 3);
- flat_p1p0 = _mm_packus_epi16(workp_c, workp_c);
-
- // oq0
- workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p3_16), q3_16);
- workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p0_16), q0_16);
- // workp_shft0 = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3);
- workp_c = _mm_add_epi16(workp_a, workp_b);
-
- // oq1
- workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p2_16), q3_16);
- workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, q0_16), q1_16);
- workp_d = _mm_add_epi16(workp_a, workp_b);
- // workp_shft1 = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3);
-
- workp_c = _mm_unpacklo_epi64(workp_c, workp_d);
- workp_c = _mm_srli_epi16(workp_c, 3);
- flat_q0q1 = _mm_packus_epi16(workp_c, workp_c);
-
- // oq2
- workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p1_16), q3_16);
- workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, q1_16), q2_16);
- workp_shft1 = _mm_add_epi16(workp_a, workp_b);
-
- workp_c = _mm_unpacklo_epi64(workp_shft2, workp_shft1);
- workp_c = _mm_srli_epi16(workp_c, 3);
-
- opq2 = _mm_packus_epi16(workp_c, workp_c);
-
- work_pq = _mm_andnot_si128(flat, q2p2);
- pq2 = _mm_and_si128(flat, opq2);
- *p2 = _mm_or_si128(work_pq, pq2);
- *q2 = _mm_srli_si128(*p2, 4);
-
- qs1qs0 = _mm_andnot_si128(flat, *q1q0_out);
- q1q0 = _mm_and_si128(flat, flat_q0q1);
- *q1q0_out = _mm_or_si128(qs1qs0, q1q0);
-
- ps1ps0 = _mm_andnot_si128(flat, *p1p0_out);
- p1p0 = _mm_and_si128(flat, flat_p1p0);
- *p1p0_out = _mm_or_si128(ps1ps0, p1p0);
- }
-}
-
-static AOM_FORCE_INLINE void lpf_internal_8_dual_sse2(
- __m128i *p3, __m128i *q3, __m128i *p2, __m128i *q2, __m128i *p1,
- __m128i *q1, __m128i *p0, __m128i *q0, __m128i *q1q0_out, __m128i *p1p0_out,
- __m128i *blimit, __m128i *limit, __m128i *thresh) {
- const __m128i zero = _mm_setzero_si128();
- __m128i mask, hev, flat;
- __m128i p2_16, q2_16, p1_16, p0_16, q0_16, q1_16, p3_16, q3_16, q3p3,
- flat_p1p0, flat_q0q1;
- __m128i q2p2, q1p1, q0p0;
- __m128i q1q0, p1p0, ps1ps0, qs1qs0;
- __m128i work_pq, opq2, pq2;
-
- q3p3 = _mm_unpacklo_epi64(*p3, *q3);
- q2p2 = _mm_unpacklo_epi64(*p2, *q2);
- q1p1 = _mm_unpacklo_epi64(*p1, *q1);
- q0p0 = _mm_unpacklo_epi64(*p0, *q0);
-
- p1p0 = _mm_unpacklo_epi64(q0p0, q1p1);
- q1q0 = _mm_unpackhi_epi64(q0p0, q1p1);
-
- {
- // filter_mask and hev_mask
-
- // considering sse doesn't have unsigned elements comparison the idea is to
- // find at least one case when X > limit, it means the corresponding mask
- // bit is set.
- // to achieve that we find global max value of all inputs of abs(x-y) or
- // (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 If it is > limit the mask is set
- // otherwise - not
-
- const __m128i one = _mm_set1_epi8(1);
- const __m128i fe = _mm_set1_epi8(0xfe);
- const __m128i ff = _mm_cmpeq_epi8(fe, fe);
- __m128i abs_p1q1, abs_p0q0, abs_q1q0, abs_p1p0, work;
-
- abs_p1p0 = abs_diff(q1p1, q0p0);
- abs_q1q0 = _mm_srli_si128(abs_p1p0, 8);
-
- abs_p0q0 = abs_diff(p1p0, q1q0);
- abs_p1q1 = _mm_srli_si128(abs_p0q0, 8);
- abs_p0q0 = _mm_unpacklo_epi64(abs_p0q0, abs_p0q0);
-
- flat = _mm_max_epu8(abs_p1p0, abs_q1q0);
- hev = _mm_subs_epu8(flat, *thresh);
- hev = _mm_xor_si128(_mm_cmpeq_epi8(hev, zero), ff);
- // replicate for the further "merged variables" usage
- hev = _mm_unpacklo_epi64(hev, hev);
-
- abs_p0q0 = _mm_adds_epu8(abs_p0q0, abs_p0q0);
- abs_p1q1 = _mm_srli_epi16(_mm_and_si128(abs_p1q1, fe), 1);
- mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), *blimit);
- mask = _mm_xor_si128(_mm_cmpeq_epi8(mask, zero), ff);
- // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1;
- mask = _mm_max_epu8(abs_p1p0, mask);
- // mask |= (abs(p1 - p0) > limit) * -1;
- // mask |= (abs(q1 - q0) > limit) * -1;
-
- work = _mm_max_epu8(abs_diff(q2p2, q1p1), abs_diff(q3p3, q2p2));
-
- mask = _mm_max_epu8(work, mask);
- mask = _mm_max_epu8(mask, _mm_srli_si128(mask, 8));
- mask = _mm_subs_epu8(mask, *limit);
- mask = _mm_cmpeq_epi8(mask, zero);
-
- // lp filter - the same for 6, 8 and 14 versions
- filter4_dual_sse2(&p1p0, &q1q0, &hev, &mask, q1q0_out, p1p0_out);
-
- // flat_mask4
- flat = _mm_max_epu8(abs_diff(q2p2, q0p0), abs_diff(q3p3, q0p0));
- flat = _mm_max_epu8(abs_p1p0, flat);
-
- flat = _mm_max_epu8(flat, _mm_srli_si128(flat, 8));
- flat = _mm_subs_epu8(flat, one);
- flat = _mm_cmpeq_epi8(flat, zero);
- flat = _mm_and_si128(flat, mask);
- // replicate for the further "merged variables" usage
- flat = _mm_unpacklo_epi64(flat, flat);
- }
-
- // filter8 need it only if flat !=0
- if (0xffff != _mm_movemask_epi8(_mm_cmpeq_epi8(flat, zero))) {
- const __m128i four = _mm_set1_epi16(4);
-
- __m128i workp_a, workp_b, workp_shft0, workp_shft1, workp_shft2;
- p2_16 = _mm_unpacklo_epi8(*p2, zero);
- p1_16 = _mm_unpacklo_epi8(*p1, zero);
- p0_16 = _mm_unpacklo_epi8(*p0, zero);
- q0_16 = _mm_unpacklo_epi8(*q0, zero);
- q1_16 = _mm_unpacklo_epi8(*q1, zero);
- q2_16 = _mm_unpacklo_epi8(*q2, zero);
- p3_16 = _mm_unpacklo_epi8(*p3, zero);
- q3_16 = _mm_unpacklo_epi8(*q3, zero);
-
- // op2
- workp_a =
- _mm_add_epi16(_mm_add_epi16(p3_16, p3_16), _mm_add_epi16(p2_16, p1_16));
- workp_a = _mm_add_epi16(_mm_add_epi16(workp_a, four), p0_16);
- workp_b = _mm_add_epi16(_mm_add_epi16(q0_16, p2_16), p3_16);
- workp_shft2 = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3);
-
- // op1
- workp_b = _mm_add_epi16(_mm_add_epi16(q0_16, q1_16), p1_16);
- workp_shft0 = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3);
-
- // op0
- workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p3_16), q2_16);
- workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p1_16), p0_16);
- workp_shft1 = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3);
-
- flat_p1p0 = _mm_packus_epi16(workp_shft1, workp_shft0);
-
- // oq0
- workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p3_16), q3_16);
- workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p0_16), q0_16);
- workp_shft0 = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3);
-
- // oq1
- workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p2_16), q3_16);
- workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, q0_16), q1_16);
- workp_shft1 = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3);
-
- flat_q0q1 = _mm_packus_epi16(workp_shft0, workp_shft1);
-
- // oq2
- workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p1_16), q3_16);
- workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, q1_16), q2_16);
- workp_shft1 = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3);
-
- opq2 = _mm_packus_epi16(workp_shft2, workp_shft1);
-
- work_pq = _mm_andnot_si128(flat, q2p2);
- pq2 = _mm_and_si128(flat, opq2);
- *p2 = _mm_or_si128(work_pq, pq2);
- *q2 = _mm_srli_si128(*p2, 8);
-
- qs1qs0 = _mm_andnot_si128(flat, *q1q0_out);
- q1q0 = _mm_and_si128(flat, flat_q0q1);
- *q1q0_out = _mm_or_si128(qs1qs0, q1q0);
-
- ps1ps0 = _mm_andnot_si128(flat, *p1p0_out);
- p1p0 = _mm_and_si128(flat, flat_p1p0);
- *p1p0_out = _mm_or_si128(ps1ps0, p1p0);
- }
-}
-
-void aom_lpf_horizontal_8_sse2(unsigned char *s, int p,
- const unsigned char *_blimit,
- const unsigned char *_limit,
- const unsigned char *_thresh) {
- __m128i p2, p1, p0, q0, q1, q2, p3, q3;
- __m128i q1q0, p1p0;
- __m128i blimit = _mm_load_si128((const __m128i *)_blimit);
- __m128i limit = _mm_load_si128((const __m128i *)_limit);
- __m128i thresh = _mm_load_si128((const __m128i *)_thresh);
-
- p3 = _mm_cvtsi32_si128(*(int *)(s - 4 * p));
- p2 = _mm_cvtsi32_si128(*(int *)(s - 3 * p));
- p1 = _mm_cvtsi32_si128(*(int *)(s - 2 * p));
- p0 = _mm_cvtsi32_si128(*(int *)(s - 1 * p));
- q0 = _mm_cvtsi32_si128(*(int *)(s - 0 * p));
- q1 = _mm_cvtsi32_si128(*(int *)(s + 1 * p));
- q2 = _mm_cvtsi32_si128(*(int *)(s + 2 * p));
- q3 = _mm_cvtsi32_si128(*(int *)(s + 3 * p));
-
- lpf_internal_8_sse2(&p3, &q3, &p2, &q2, &p1, &q1, &p0, &q0, &q1q0, &p1p0,
- &blimit, &limit, &thresh);
-
- xx_storel_32(s - 1 * p, p1p0);
- xx_storel_32(s - 2 * p, _mm_srli_si128(p1p0, 4));
- xx_storel_32(s + 0 * p, q1q0);
- xx_storel_32(s + 1 * p, _mm_srli_si128(q1q0, 4));
- xx_storel_32(s - 3 * p, p2);
- xx_storel_32(s + 2 * p, q2);
-}
-
-void aom_lpf_horizontal_14_dual_sse2(unsigned char *s, int p,
- const unsigned char *_blimit0,
- const unsigned char *_limit0,
- const unsigned char *_thresh0,
- const unsigned char *_blimit1,
- const unsigned char *_limit1,
- const unsigned char *_thresh1) {
- __m128i q6p6, q5p5, q4p4, q3p3, q2p2, q1p1, q0p0;
- __m128i blimit =
- _mm_unpacklo_epi32(_mm_load_si128((const __m128i *)_blimit0),
- _mm_load_si128((const __m128i *)_blimit1));
- __m128i limit = _mm_unpacklo_epi32(_mm_load_si128((const __m128i *)_limit0),
- _mm_load_si128((const __m128i *)_limit1));
- __m128i thresh =
- _mm_unpacklo_epi32(_mm_load_si128((const __m128i *)_thresh0),
- _mm_load_si128((const __m128i *)_thresh1));
-
- q4p4 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 5 * p)),
- _mm_loadl_epi64((__m128i *)(s + 4 * p)));
- q3p3 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 4 * p)),
- _mm_loadl_epi64((__m128i *)(s + 3 * p)));
- q2p2 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 3 * p)),
- _mm_loadl_epi64((__m128i *)(s + 2 * p)));
- q1p1 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 2 * p)),
- _mm_loadl_epi64((__m128i *)(s + 1 * p)));
-
- q0p0 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 1 * p)),
- _mm_loadl_epi64((__m128i *)(s - 0 * p)));
-
- q5p5 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 6 * p)),
- _mm_loadl_epi64((__m128i *)(s + 5 * p)));
-
- q6p6 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 7 * p)),
- _mm_loadl_epi64((__m128i *)(s + 6 * p)));
-
- lpf_internal_14_dual_sse2(&q6p6, &q5p5, &q4p4, &q3p3, &q2p2, &q1p1, &q0p0,
- &blimit, &limit, &thresh);
-
- _mm_storel_epi64((__m128i *)(s - 1 * p), q0p0);
- _mm_storel_epi64((__m128i *)(s + 0 * p), _mm_srli_si128(q0p0, 8));
- _mm_storel_epi64((__m128i *)(s - 2 * p), q1p1);
- _mm_storel_epi64((__m128i *)(s + 1 * p), _mm_srli_si128(q1p1, 8));
- _mm_storel_epi64((__m128i *)(s - 3 * p), q2p2);
- _mm_storel_epi64((__m128i *)(s + 2 * p), _mm_srli_si128(q2p2, 8));
- _mm_storel_epi64((__m128i *)(s - 4 * p), q3p3);
- _mm_storel_epi64((__m128i *)(s + 3 * p), _mm_srli_si128(q3p3, 8));
- _mm_storel_epi64((__m128i *)(s - 5 * p), q4p4);
- _mm_storel_epi64((__m128i *)(s + 4 * p), _mm_srli_si128(q4p4, 8));
- _mm_storel_epi64((__m128i *)(s - 6 * p), q5p5);
- _mm_storel_epi64((__m128i *)(s + 5 * p), _mm_srli_si128(q5p5, 8));
-}
-
-void aom_lpf_horizontal_8_dual_sse2(uint8_t *s, int p, const uint8_t *_blimit0,
- const uint8_t *_limit0,
- const uint8_t *_thresh0,
- const uint8_t *_blimit1,
- const uint8_t *_limit1,
- const uint8_t *_thresh1) {
- __m128i blimit = _mm_unpacklo_epi32(_mm_load_si128((__m128i *)_blimit0),
- _mm_load_si128((__m128i *)_blimit1));
- __m128i limit = _mm_unpacklo_epi32(_mm_load_si128((__m128i *)_limit0),
- _mm_load_si128((__m128i *)_limit1));
- __m128i thresh = _mm_unpacklo_epi32(_mm_load_si128((__m128i *)_thresh0),
- _mm_load_si128((__m128i *)_thresh1));
-
- __m128i p2, p1, p0, q0, q1, q2, p3, q3;
- __m128i q1q0, p1p0;
-
- p3 = _mm_loadl_epi64((__m128i *)(s - 4 * p));
- p2 = _mm_loadl_epi64((__m128i *)(s - 3 * p));
- p1 = _mm_loadl_epi64((__m128i *)(s - 2 * p));
- p0 = _mm_loadl_epi64((__m128i *)(s - 1 * p));
- q0 = _mm_loadl_epi64((__m128i *)(s - 0 * p));
- q1 = _mm_loadl_epi64((__m128i *)(s + 1 * p));
- q2 = _mm_loadl_epi64((__m128i *)(s + 2 * p));
- q3 = _mm_loadl_epi64((__m128i *)(s + 3 * p));
-
- lpf_internal_8_dual_sse2(&p3, &q3, &p2, &q2, &p1, &q1, &p0, &q0, &q1q0, &p1p0,
- &blimit, &limit, &thresh);
-
- _mm_storel_epi64((__m128i *)(s - 1 * p), p1p0);
- _mm_storel_epi64((__m128i *)(s - 2 * p), _mm_srli_si128(p1p0, 8));
- _mm_storel_epi64((__m128i *)(s + 0 * p), q1q0);
- _mm_storel_epi64((__m128i *)(s + 1 * p), _mm_srli_si128(q1q0, 8));
- _mm_storel_epi64((__m128i *)(s - 3 * p), p2);
- _mm_storel_epi64((__m128i *)(s + 2 * p), q2);
-}
-
-void aom_lpf_horizontal_4_dual_sse2(unsigned char *s, int p,
- const unsigned char *_blimit0,
- const unsigned char *_limit0,
- const unsigned char *_thresh0,
- const unsigned char *_blimit1,
- const unsigned char *_limit1,
- const unsigned char *_thresh1) {
- __m128i p1, p0, q0, q1;
- __m128i qs1qs0, ps1ps0;
-
- p1 = _mm_loadl_epi64((__m128i *)(s - 2 * p));
- p0 = _mm_loadl_epi64((__m128i *)(s - 1 * p));
- q0 = _mm_loadl_epi64((__m128i *)(s - 0 * p));
- q1 = _mm_loadl_epi64((__m128i *)(s + 1 * p));
-
- const __m128i zero = _mm_setzero_si128();
- const __m128i blimit =
- _mm_unpacklo_epi32(_mm_load_si128((const __m128i *)_blimit0),
- _mm_load_si128((const __m128i *)_blimit1));
- const __m128i limit =
- _mm_unpacklo_epi32(_mm_load_si128((const __m128i *)_limit0),
- _mm_load_si128((const __m128i *)_limit1));
-
- __m128i l = _mm_unpacklo_epi64(blimit, limit);
-
- __m128i thresh0 =
- _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)_thresh0), zero);
-
- __m128i thresh1 =
- _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)_thresh1), zero);
-
- __m128i t = _mm_unpacklo_epi64(thresh0, thresh1);
-
- lpf_internal_4_dual_sse2(&p1, &p0, &q0, &q1, &l, &t, &qs1qs0, &ps1ps0);
-
- _mm_storel_epi64((__m128i *)(s - 1 * p), ps1ps0);
- _mm_storel_epi64((__m128i *)(s - 2 * p), _mm_srli_si128(ps1ps0, 8));
- _mm_storel_epi64((__m128i *)(s + 0 * p), qs1qs0);
- _mm_storel_epi64((__m128i *)(s + 1 * p), _mm_srli_si128(qs1qs0, 8));
-}
-
-void aom_lpf_vertical_4_dual_sse2(uint8_t *s, int p, const uint8_t *_blimit0,
- const uint8_t *_limit0,
- const uint8_t *_thresh0,
- const uint8_t *_blimit1,
- const uint8_t *_limit1,
- const uint8_t *_thresh1) {
- __m128i p0, q0, q1, p1;
- __m128i x0, x1, x2, x3, x4, x5, x6, x7;
- __m128i d0, d1, d2, d3, d4, d5, d6, d7;
- __m128i qs1qs0, ps1ps0;
-
- const __m128i zero = _mm_setzero_si128();
- const __m128i blimit =
- _mm_unpacklo_epi32(_mm_load_si128((const __m128i *)_blimit0),
- _mm_load_si128((const __m128i *)_blimit1));
- const __m128i limit =
- _mm_unpacklo_epi32(_mm_load_si128((const __m128i *)_limit0),
- _mm_load_si128((const __m128i *)_limit1));
-
- __m128i l = _mm_unpacklo_epi64(blimit, limit);
-
- __m128i thresh0 =
- _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)_thresh0), zero);
-
- __m128i thresh1 =
- _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)_thresh1), zero);
-
- __m128i t = _mm_unpacklo_epi64(thresh0, thresh1);
-
- x0 = _mm_loadl_epi64((__m128i *)((s - 2)));
- x1 = _mm_loadl_epi64((__m128i *)((s - 2) + p));
- x2 = _mm_loadl_epi64((__m128i *)((s - 2) + 2 * p));
- x3 = _mm_loadl_epi64((__m128i *)((s - 2) + 3 * p));
- x4 = _mm_loadl_epi64((__m128i *)((s - 2) + 4 * p));
- x5 = _mm_loadl_epi64((__m128i *)((s - 2) + 5 * p));
- x6 = _mm_loadl_epi64((__m128i *)((s - 2) + 6 * p));
- x7 = _mm_loadl_epi64((__m128i *)((s - 2) + 7 * p));
-
- transpose8x8_low_sse2(&x0, &x1, &x2, &x3, &x4, &x5, &x6, &x7, &p1, &p0, &q0,
- &q1);
-
- lpf_internal_4_dual_sse2(&p1, &p0, &q0, &q1, &l, &t, &qs1qs0, &ps1ps0);
-
- p1 = _mm_srli_si128(ps1ps0, 8);
- q1 = _mm_srli_si128(qs1qs0, 8);
-
- transpose4x8_8x4_sse2(&p1, &ps1ps0, &qs1qs0, &q1, &d0, &d1, &d2, &d3, &d4,
- &d5, &d6, &d7);
-
- xx_storel_32((s - 2 + 0 * p), d0);
- xx_storel_32((s - 2 + 1 * p), d1);
- xx_storel_32((s - 2 + 2 * p), d2);
- xx_storel_32((s - 2 + 3 * p), d3);
- xx_storel_32((s - 2 + 4 * p), d4);
- xx_storel_32((s - 2 + 5 * p), d5);
- xx_storel_32((s - 2 + 6 * p), d6);
- xx_storel_32((s - 2 + 7 * p), d7);
-}
-
-void aom_lpf_vertical_6_sse2(unsigned char *s, int p,
- const unsigned char *_blimit,
- const unsigned char *_limit,
- const unsigned char *_thresh) {
- __m128i d0, d1, d2, d3, d4, d5, d6, d7;
- __m128i x2, x1, x0, x3;
- __m128i p0, q0;
- __m128i p1p0, q1q0;
- __m128i blimit = _mm_load_si128((__m128i *)_blimit);
- __m128i limit = _mm_load_si128((__m128i *)_limit);
- __m128i thresh = _mm_load_si128((__m128i *)_thresh);
-
- x3 = _mm_loadl_epi64((__m128i *)((s - 3) + 0 * p));
- x2 = _mm_loadl_epi64((__m128i *)((s - 3) + 1 * p));
- x1 = _mm_loadl_epi64((__m128i *)((s - 3) + 2 * p));
- x0 = _mm_loadl_epi64((__m128i *)((s - 3) + 3 * p));
-
- transpose4x8_8x4_sse2(&x3, &x2, &x1, &x0, &d0, &d1, &d2, &d3, &d4, &d5, &d6,
- &d7);
-
- lpf_internal_6_sse2(&d0, &d5, &d1, &d4, &d2, &d3, &q1q0, &p1p0, &blimit,
- &limit, &thresh);
-
- p0 = _mm_srli_si128(p1p0, 4);
- q0 = _mm_srli_si128(q1q0, 4);
-
- transpose4x8_8x4_low_sse2(&p0, &p1p0, &q1q0, &q0, &d0, &d1, &d2, &d3);
-
- xx_storel_32(s + 0 * p - 2, d0);
- xx_storel_32(s + 1 * p - 2, d1);
- xx_storel_32(s + 2 * p - 2, d2);
- xx_storel_32(s + 3 * p - 2, d3);
-}
-
-void aom_lpf_vertical_6_dual_sse2(uint8_t *s, int p, const uint8_t *_blimit0,
- const uint8_t *_limit0,
- const uint8_t *_thresh0,
- const uint8_t *_blimit1,
- const uint8_t *_limit1,
- const uint8_t *_thresh1) {
- __m128i blimit = _mm_unpacklo_epi32(_mm_load_si128((__m128i *)_blimit0),
- _mm_load_si128((__m128i *)_blimit1));
- __m128i limit = _mm_unpacklo_epi32(_mm_load_si128((__m128i *)_limit0),
- _mm_load_si128((__m128i *)_limit1));
- __m128i thresh = _mm_unpacklo_epi32(_mm_load_si128((__m128i *)_thresh0),
- _mm_load_si128((__m128i *)_thresh1));
-
- __m128i d0, d1, d2, d3, d4, d5, d6, d7;
- __m128i x0, x1, x2, x3, x4, x5, x6, x7;
- __m128i p0, q0;
- __m128i p1p0, q1q0;
- __m128i d0d1, d2d3, d4d5, d6d7;
-
- x0 = _mm_loadl_epi64((__m128i *)((s - 3) + 0 * p));
- x1 = _mm_loadl_epi64((__m128i *)((s - 3) + 1 * p));
- x2 = _mm_loadl_epi64((__m128i *)((s - 3) + 2 * p));
- x3 = _mm_loadl_epi64((__m128i *)((s - 3) + 3 * p));
- x4 = _mm_loadl_epi64((__m128i *)((s - 3) + 4 * p));
- x5 = _mm_loadl_epi64((__m128i *)((s - 3) + 5 * p));
- x6 = _mm_loadl_epi64((__m128i *)((s - 3) + 6 * p));
- x7 = _mm_loadl_epi64((__m128i *)((s - 3) + 7 * p));
-
- transpose8x8_sse2(&x0, &x1, &x2, &x3, &x4, &x5, &x6, &x7, &d0d1, &d2d3, &d4d5,
- &d6d7);
-
- d1 = _mm_srli_si128(d0d1, 8);
- d3 = _mm_srli_si128(d2d3, 8);
- d5 = _mm_srli_si128(d4d5, 8);
- d7 = _mm_srli_si128(d6d7, 8);
-
- lpf_internal_6_dual_sse2(&d0d1, &d5, &d1, &d4d5, &d2d3, &d3, &q1q0, &p1p0,
- &blimit, &limit, &thresh);
-
- p0 = _mm_srli_si128(p1p0, 8);
- q0 = _mm_srli_si128(q1q0, 8);
-
- transpose4x8_8x4_sse2(&p0, &p1p0, &q1q0, &q0, &d0, &d1, &d2, &d3, &d4, &d5,
- &d6, &d7);
-
- xx_storel_32((s - 2 + 0 * p), d0);
- xx_storel_32((s - 2 + 1 * p), d1);
- xx_storel_32((s - 2 + 2 * p), d2);
- xx_storel_32((s - 2 + 3 * p), d3);
- xx_storel_32((s - 2 + 4 * p), d4);
- xx_storel_32((s - 2 + 5 * p), d5);
- xx_storel_32((s - 2 + 6 * p), d6);
- xx_storel_32((s - 2 + 7 * p), d7);
-}
-
-void aom_lpf_vertical_8_sse2(unsigned char *s, int p,
- const unsigned char *_blimit,
- const unsigned char *_limit,
- const unsigned char *_thresh) {
- __m128i d0, d1, d2, d3, d4, d5, d6, d7;
-
- __m128i p0, q0;
- __m128i x2, x1, x0, x3;
- __m128i q1q0, p1p0;
- __m128i blimit = _mm_load_si128((const __m128i *)_blimit);
- __m128i limit = _mm_load_si128((const __m128i *)_limit);
- __m128i thresh = _mm_load_si128((const __m128i *)_thresh);
-
- x3 = _mm_loadl_epi64((__m128i *)((s - 4) + 0 * p));
- x2 = _mm_loadl_epi64((__m128i *)((s - 4) + 1 * p));
- x1 = _mm_loadl_epi64((__m128i *)((s - 4) + 2 * p));
- x0 = _mm_loadl_epi64((__m128i *)((s - 4) + 3 * p));
-
- transpose4x8_8x4_sse2(&x3, &x2, &x1, &x0, &d0, &d1, &d2, &d3, &d4, &d5, &d6,
- &d7);
- // Loop filtering
- lpf_internal_8_sse2(&d0, &d7, &d1, &d6, &d2, &d5, &d3, &d4, &q1q0, &p1p0,
- &blimit, &limit, &thresh);
-
- p0 = _mm_srli_si128(p1p0, 4);
- q0 = _mm_srli_si128(q1q0, 4);
-
- transpose8x8_low_sse2(&d0, &d1, &p0, &p1p0, &q1q0, &q0, &d6, &d7, &d0, &d1,
- &d2, &d3);
-
- _mm_storel_epi64((__m128i *)(s - 4 + 0 * p), d0);
- _mm_storel_epi64((__m128i *)(s - 4 + 1 * p), d1);
- _mm_storel_epi64((__m128i *)(s - 4 + 2 * p), d2);
- _mm_storel_epi64((__m128i *)(s - 4 + 3 * p), d3);
-}
-
-void aom_lpf_vertical_8_dual_sse2(uint8_t *s, int p, const uint8_t *_blimit0,
- const uint8_t *_limit0,
- const uint8_t *_thresh0,
- const uint8_t *_blimit1,
- const uint8_t *_limit1,
- const uint8_t *_thresh1) {
- __m128i blimit = _mm_unpacklo_epi32(_mm_load_si128((__m128i *)_blimit0),
- _mm_load_si128((__m128i *)_blimit1));
- __m128i limit = _mm_unpacklo_epi32(_mm_load_si128((__m128i *)_limit0),
- _mm_load_si128((__m128i *)_limit1));
- __m128i thresh = _mm_unpacklo_epi32(_mm_load_si128((__m128i *)_thresh0),
- _mm_load_si128((__m128i *)_thresh1));
-
- __m128i x0, x1, x2, x3, x4, x5, x6, x7;
- __m128i d1, d3, d5, d7;
- __m128i q1q0, p1p0;
- __m128i p1, q1;
- __m128i d0d1, d2d3, d4d5, d6d7;
-
- x0 = _mm_loadl_epi64((__m128i *)(s - 4 + 0 * p));
- x1 = _mm_loadl_epi64((__m128i *)(s - 4 + 1 * p));
- x2 = _mm_loadl_epi64((__m128i *)(s - 4 + 2 * p));
- x3 = _mm_loadl_epi64((__m128i *)(s - 4 + 3 * p));
- x4 = _mm_loadl_epi64((__m128i *)(s - 4 + 4 * p));
- x5 = _mm_loadl_epi64((__m128i *)(s - 4 + 5 * p));
- x6 = _mm_loadl_epi64((__m128i *)(s - 4 + 6 * p));
- x7 = _mm_loadl_epi64((__m128i *)(s - 4 + 7 * p));
-
- transpose8x8_sse2(&x0, &x1, &x2, &x3, &x4, &x5, &x6, &x7, &d0d1, &d2d3, &d4d5,
- &d6d7);
-
- d1 = _mm_srli_si128(d0d1, 8);
- d3 = _mm_srli_si128(d2d3, 8);
- d5 = _mm_srli_si128(d4d5, 8);
- d7 = _mm_srli_si128(d6d7, 8);
-
- lpf_internal_8_dual_sse2(&d0d1, &d7, &d1, &d6d7, &d2d3, &d5, &d3, &d4d5,
- &q1q0, &p1p0, &blimit, &limit, &thresh);
-
- p1 = _mm_srli_si128(p1p0, 8);
- q1 = _mm_srli_si128(q1q0, 8);
-
- transpose8x8_sse2(&d0d1, &d1, &p1, &p1p0, &q1q0, &q1, &d6d7, &d7, &d0d1,
- &d2d3, &d4d5, &d6d7);
-
- _mm_storel_epi64((__m128i *)(s - 4 + 0 * p), d0d1);
- _mm_storel_epi64((__m128i *)(s - 4 + 1 * p), _mm_srli_si128(d0d1, 8));
- _mm_storel_epi64((__m128i *)(s - 4 + 2 * p), d2d3);
- _mm_storel_epi64((__m128i *)(s - 4 + 3 * p), _mm_srli_si128(d2d3, 8));
- _mm_storel_epi64((__m128i *)(s - 4 + 4 * p), d4d5);
- _mm_storel_epi64((__m128i *)(s - 4 + 5 * p), _mm_srli_si128(d4d5, 8));
- _mm_storel_epi64((__m128i *)(s - 4 + 6 * p), d6d7);
- _mm_storel_epi64((__m128i *)(s - 4 + 7 * p), _mm_srli_si128(d6d7, 8));
-}
-
-void aom_lpf_vertical_14_sse2(unsigned char *s, int p,
- const unsigned char *_blimit,
- const unsigned char *_limit,
- const unsigned char *_thresh) {
- __m128i q7p7, q6p6, q5p5, q4p4, q3p3, q2p2, q1p1, q0p0;
- __m128i x6, x5, x4, x3;
- __m128i pq0, pq1, pq2, pq3;
- __m128i blimit = _mm_load_si128((__m128i *)_blimit);
- __m128i limit = _mm_load_si128((__m128i *)_limit);
- __m128i thresh = _mm_load_si128((__m128i *)_thresh);
-
- x6 = _mm_loadu_si128((__m128i *)((s - 8) + 0 * p));
- x5 = _mm_loadu_si128((__m128i *)((s - 8) + 1 * p));
- x4 = _mm_loadu_si128((__m128i *)((s - 8) + 2 * p));
- x3 = _mm_loadu_si128((__m128i *)((s - 8) + 3 * p));
-
- transpose_pq_14_sse2(&x6, &x5, &x4, &x3, &q0p0, &q1p1, &q2p2, &q3p3, &q4p4,
- &q5p5, &q6p6, &q7p7);
-
- lpf_internal_14_sse2(&q6p6, &q5p5, &q4p4, &q3p3, &q2p2, &q1p1, &q0p0, &blimit,
- &limit, &thresh);
-
- transpose_pq_14_inv_sse2(&q7p7, &q6p6, &q5p5, &q4p4, &q3p3, &q2p2, &q1p1,
- &q0p0, &pq0, &pq1, &pq2, &pq3);
- _mm_storeu_si128((__m128i *)(s - 8 + 0 * p), pq0);
- _mm_storeu_si128((__m128i *)(s - 8 + 1 * p), pq1);
- _mm_storeu_si128((__m128i *)(s - 8 + 2 * p), pq2);
- _mm_storeu_si128((__m128i *)(s - 8 + 3 * p), pq3);
-}
-
-void aom_lpf_vertical_14_dual_sse2(
- unsigned char *s, int p, const uint8_t *_blimit0, const uint8_t *_limit0,
- const uint8_t *_thresh0, const uint8_t *_blimit1, const uint8_t *_limit1,
- const uint8_t *_thresh1) {
- __m128i q6p6, q5p5, q4p4, q3p3, q2p2, q1p1, q0p0;
- __m128i x7, x6, x5, x4, x3, x2, x1, x0;
- __m128i d0d1, d2d3, d4d5, d6d7, d8d9, d10d11, d12d13, d14d15;
- __m128i q0, q1, q2, q3, q7;
- __m128i p0p1, p2p3, p4p5, p6p7;
-
- __m128i blimit =
- _mm_unpacklo_epi32(_mm_load_si128((const __m128i *)_blimit0),
- _mm_load_si128((const __m128i *)_blimit1));
- __m128i limit = _mm_unpacklo_epi32(_mm_load_si128((const __m128i *)_limit0),
- _mm_load_si128((const __m128i *)_limit1));
- __m128i thresh =
- _mm_unpacklo_epi32(_mm_load_si128((const __m128i *)_thresh0),
- _mm_load_si128((const __m128i *)_thresh1));
-
- x7 = _mm_loadu_si128((__m128i *)((s - 8) + 0 * p));
- x6 = _mm_loadu_si128((__m128i *)((s - 8) + 1 * p));
- x5 = _mm_loadu_si128((__m128i *)((s - 8) + 2 * p));
- x4 = _mm_loadu_si128((__m128i *)((s - 8) + 3 * p));
- x3 = _mm_loadu_si128((__m128i *)((s - 8) + 4 * p));
- x2 = _mm_loadu_si128((__m128i *)((s - 8) + 5 * p));
- x1 = _mm_loadu_si128((__m128i *)((s - 8) + 6 * p));
- x0 = _mm_loadu_si128((__m128i *)((s - 8) + 7 * p));
-
- transpose8x16_16x8_sse2(&x7, &x6, &x5, &x4, &x3, &x2, &x1, &x0, &d0d1, &d2d3,
- &d4d5, &d6d7, &d8d9, &d10d11, &d12d13, &d14d15);
-
- q6p6 = _mm_unpacklo_epi64(d2d3, _mm_srli_si128(d12d13, 8));
- q5p5 = _mm_unpacklo_epi64(d4d5, _mm_srli_si128(d10d11, 8));
- q4p4 = _mm_unpacklo_epi64(d6d7, _mm_srli_si128(d8d9, 8));
- q3p3 = _mm_unpacklo_epi64(d8d9, _mm_srli_si128(d6d7, 8));
- q2p2 = _mm_unpacklo_epi64(d10d11, _mm_srli_si128(d4d5, 8));
- q1p1 = _mm_unpacklo_epi64(d12d13, _mm_srli_si128(d2d3, 8));
- q0p0 = _mm_unpacklo_epi64(d14d15, _mm_srli_si128(d0d1, 8));
- q7 = _mm_srli_si128(d14d15, 8);
-
- lpf_internal_14_dual_sse2(&q6p6, &q5p5, &q4p4, &q3p3, &q2p2, &q1p1, &q0p0,
- &blimit, &limit, &thresh);
-
- x0 = _mm_srli_si128(q0p0, 8);
- x1 = _mm_srli_si128(q1p1, 8);
- x2 = _mm_srli_si128(q2p2, 8);
- x3 = _mm_srli_si128(q3p3, 8);
- x4 = _mm_srli_si128(q4p4, 8);
- x5 = _mm_srli_si128(q5p5, 8);
- x6 = _mm_srli_si128(q6p6, 8);
-
- transpose16x8_8x16_sse2(&d0d1, &q6p6, &q5p5, &q4p4, &q3p3, &q2p2, &q1p1,
- &q0p0, &x0, &x1, &x2, &x3, &x4, &x5, &x6, &q7, &p0p1,
- &p2p3, &p4p5, &p6p7, &q0, &q1, &q2, &q3);
-
- _mm_storeu_si128((__m128i *)(s - 8 + 0 * p), p0p1);
- _mm_storeu_si128((__m128i *)(s - 8 + 1 * p), p2p3);
- _mm_storeu_si128((__m128i *)(s - 8 + 2 * p), p4p5);
- _mm_storeu_si128((__m128i *)(s - 8 + 3 * p), p6p7);
- _mm_storeu_si128((__m128i *)(s - 8 + 4 * p), q0);
- _mm_storeu_si128((__m128i *)(s - 8 + 5 * p), q1);
- _mm_storeu_si128((__m128i *)(s - 8 + 6 * p), q2);
- _mm_storeu_si128((__m128i *)(s - 8 + 7 * p), q3);
-}
diff --git a/third_party/aom/aom_dsp/x86/lpf_common_sse2.h b/third_party/aom/aom_dsp/x86/lpf_common_sse2.h
deleted file mode 100644
index 8970fe7dd..000000000
--- a/third_party/aom/aom_dsp/x86/lpf_common_sse2.h
+++ /dev/null
@@ -1,215 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_X86_LPF_COMMON_SSE2_H_
-#define AOM_AOM_DSP_X86_LPF_COMMON_SSE2_H_
-
-#include <emmintrin.h> // SSE2
-
-#include "config/aom_config.h"
-
-static INLINE void highbd_transpose6x6_sse2(__m128i *x0, __m128i *x1,
- __m128i *x2, __m128i *x3,
- __m128i *x4, __m128i *x5,
- __m128i *d0, __m128i *d1,
- __m128i *d2, __m128i *d3,
- __m128i *d4, __m128i *d5) {
- __m128i w0, w1, w2, w3, w4, w5, ww0;
-
- // 00 01 02 03 04 05 xx xx
- // 10 11 12 13 14 15 xx xx
- // 20 21 22 23 24 25 xx xx
- // 30 31 32 33 34 35 xx xx
- // 40 41 42 43 44 45 xx xx
- // 50 51 52 53 54 55 xx xx
-
- w0 = _mm_unpacklo_epi16(*x0, *x1); // 00 10 01 11 02 12 03 13
- w1 = _mm_unpacklo_epi16(*x2, *x3); // 20 30 21 31 22 32 23 33
- w2 = _mm_unpacklo_epi16(*x4, *x5); // 40 50 41 51 42 52 43 53
-
- ww0 = _mm_unpacklo_epi32(w0, w1); // 00 10 20 30 01 11 21 31
- *d0 = _mm_unpacklo_epi64(ww0, w2); // 00 10 20 30 40 50 41 51
- *d1 = _mm_unpackhi_epi64(ww0,
- _mm_srli_si128(w2, 4)); // 01 11 21 31 41 51 xx xx
-
- ww0 = _mm_unpackhi_epi32(w0, w1); // 02 12 22 32 03 13 23 33
- *d2 = _mm_unpacklo_epi64(ww0,
- _mm_srli_si128(w2, 8)); // 02 12 22 32 42 52 xx xx
-
- w3 = _mm_unpackhi_epi16(*x0, *x1); // 04 14 05 15 xx xx xx xx
- w4 = _mm_unpackhi_epi16(*x2, *x3); // 24 34 25 35 xx xx xx xx
- w5 = _mm_unpackhi_epi16(*x4, *x5); // 44 54 45 55 xx xx xx xx
-
- *d3 = _mm_unpackhi_epi64(ww0, _mm_srli_si128(w2, 4)); // 03 13 23 33 43 53
-
- ww0 = _mm_unpacklo_epi32(w3, w4); // 04 14 24 34 05 15 25 35
- *d4 = _mm_unpacklo_epi64(ww0, w5); // 04 14 24 34 44 54 45 55
- *d5 = _mm_unpackhi_epi64(ww0,
- _mm_slli_si128(w5, 4)); // 05 15 25 35 45 55 xx xx
-}
-
-static INLINE void highbd_transpose4x8_8x4_low_sse2(__m128i *x0, __m128i *x1,
- __m128i *x2, __m128i *x3,
- __m128i *d0, __m128i *d1,
- __m128i *d2, __m128i *d3) {
- __m128i zero = _mm_setzero_si128();
- __m128i w0, w1, ww0, ww1;
-
- w0 = _mm_unpacklo_epi16(*x0, *x1); // 00 10 01 11 02 12 03 13
- w1 = _mm_unpacklo_epi16(*x2, *x3); // 20 30 21 31 22 32 23 33
-
- ww0 = _mm_unpacklo_epi32(w0, w1); // 00 10 20 30 01 11 21 31
- ww1 = _mm_unpackhi_epi32(w0, w1); // 02 12 22 32 03 13 23 33
-
- *d0 = _mm_unpacklo_epi64(ww0, zero); // 00 10 20 30 xx xx xx xx
- *d1 = _mm_unpackhi_epi64(ww0, zero); // 01 11 21 31 xx xx xx xx
- *d2 = _mm_unpacklo_epi64(ww1, zero); // 02 12 22 32 xx xx xx xx
- *d3 = _mm_unpackhi_epi64(ww1, zero); // 03 13 23 33 xx xx xx xx
-}
-
-static INLINE void highbd_transpose4x8_8x4_high_sse2(__m128i *x0, __m128i *x1,
- __m128i *x2, __m128i *x3,
- __m128i *d4, __m128i *d5,
- __m128i *d6, __m128i *d7) {
- __m128i w0, w1, ww2, ww3;
- __m128i zero = _mm_setzero_si128();
-
- w0 = _mm_unpackhi_epi16(*x0, *x1); // 04 14 05 15 06 16 07 17
- w1 = _mm_unpackhi_epi16(*x2, *x3); // 24 34 25 35 26 36 27 37
-
- ww2 = _mm_unpacklo_epi32(w0, w1); // 04 14 24 34 05 15 25 35
- ww3 = _mm_unpackhi_epi32(w0, w1); // 06 16 26 36 07 17 27 37
-
- *d4 = _mm_unpacklo_epi64(ww2, zero); // 04 14 24 34 xx xx xx xx
- *d5 = _mm_unpackhi_epi64(ww2, zero); // 05 15 25 35 xx xx xx xx
- *d6 = _mm_unpacklo_epi64(ww3, zero); // 06 16 26 36 xx xx xx xx
- *d7 = _mm_unpackhi_epi64(ww3, zero); // 07 17 27 37 xx xx xx xx
-}
-
-// here in and out pointers (x and d) should be different! we don't store their
-// values inside
-static INLINE void highbd_transpose4x8_8x4_sse2(__m128i *x0, __m128i *x1,
- __m128i *x2, __m128i *x3,
- __m128i *d0, __m128i *d1,
- __m128i *d2, __m128i *d3,
- __m128i *d4, __m128i *d5,
- __m128i *d6, __m128i *d7) {
- // input
- // x0 00 01 02 03 04 05 06 07
- // x1 10 11 12 13 14 15 16 17
- // x2 20 21 22 23 24 25 26 27
- // x3 30 31 32 33 34 35 36 37
- // output
- // 00 10 20 30 xx xx xx xx
- // 01 11 21 31 xx xx xx xx
- // 02 12 22 32 xx xx xx xx
- // 03 13 23 33 xx xx xx xx
- // 04 14 24 34 xx xx xx xx
- // 05 15 25 35 xx xx xx xx
- // 06 16 26 36 xx xx xx xx
- // 07 17 27 37 xx xx xx xx
- highbd_transpose4x8_8x4_low_sse2(x0, x1, x2, x3, d0, d1, d2, d3);
- highbd_transpose4x8_8x4_high_sse2(x0, x1, x2, x3, d4, d5, d6, d7);
-}
-
-static INLINE void highbd_transpose8x8_low_sse2(__m128i *x0, __m128i *x1,
- __m128i *x2, __m128i *x3,
- __m128i *x4, __m128i *x5,
- __m128i *x6, __m128i *x7,
- __m128i *d0, __m128i *d1,
- __m128i *d2, __m128i *d3) {
- __m128i w0, w1, w2, w3, ww0, ww1;
- // x0 00 01 02 03 04 05 06 07
- // x1 10 11 12 13 14 15 16 17
- // x2 20 21 22 23 24 25 26 27
- // x3 30 31 32 33 34 35 36 37
- // x4 40 41 42 43 44 45 46 47
- // x5 50 51 52 53 54 55 56 57
- // x6 60 61 62 63 64 65 66 67
- // x7 70 71 72 73 74 75 76 77
-
- w0 = _mm_unpacklo_epi16(*x0, *x1); // 00 10 01 11 02 12 03 13
- w1 = _mm_unpacklo_epi16(*x2, *x3); // 20 30 21 31 22 32 23 33
- w2 = _mm_unpacklo_epi16(*x4, *x5); // 40 50 41 51 42 52 43 53
- w3 = _mm_unpacklo_epi16(*x6, *x7); // 60 70 61 71 62 72 63 73
-
- ww0 = _mm_unpacklo_epi32(w0, w1); // 00 10 20 30 01 11 21 31
- ww1 = _mm_unpacklo_epi32(w2, w3); // 40 50 60 70 41 51 61 71
-
- *d0 = _mm_unpacklo_epi64(ww0, ww1); // 00 10 20 30 40 50 60 70
- *d1 = _mm_unpackhi_epi64(ww0, ww1); // 01 11 21 31 41 51 61 71
-
- ww0 = _mm_unpackhi_epi32(w0, w1); // 02 12 22 32 03 13 23 33
- ww1 = _mm_unpackhi_epi32(w2, w3); // 42 52 62 72 43 53 63 73
-
- *d2 = _mm_unpacklo_epi64(ww0, ww1); // 02 12 22 32 42 52 62 72
- *d3 = _mm_unpackhi_epi64(ww0, ww1); // 03 13 23 33 43 53 63 73
-}
-
-static INLINE void highbd_transpose8x8_high_sse2(__m128i *x0, __m128i *x1,
- __m128i *x2, __m128i *x3,
- __m128i *x4, __m128i *x5,
- __m128i *x6, __m128i *x7,
- __m128i *d4, __m128i *d5,
- __m128i *d6, __m128i *d7) {
- __m128i w0, w1, w2, w3, ww0, ww1;
- // x0 00 01 02 03 04 05 06 07
- // x1 10 11 12 13 14 15 16 17
- // x2 20 21 22 23 24 25 26 27
- // x3 30 31 32 33 34 35 36 37
- // x4 40 41 42 43 44 45 46 47
- // x5 50 51 52 53 54 55 56 57
- // x6 60 61 62 63 64 65 66 67
- // x7 70 71 72 73 74 75 76 77
- w0 = _mm_unpackhi_epi16(*x0, *x1); // 04 14 05 15 06 16 07 17
- w1 = _mm_unpackhi_epi16(*x2, *x3); // 24 34 25 35 26 36 27 37
- w2 = _mm_unpackhi_epi16(*x4, *x5); // 44 54 45 55 46 56 47 57
- w3 = _mm_unpackhi_epi16(*x6, *x7); // 64 74 65 75 66 76 67 77
-
- ww0 = _mm_unpacklo_epi32(w0, w1); // 04 14 24 34 05 15 25 35
- ww1 = _mm_unpacklo_epi32(w2, w3); // 44 54 64 74 45 55 65 75
-
- *d4 = _mm_unpacklo_epi64(ww0, ww1); // 04 14 24 34 44 54 64 74
- *d5 = _mm_unpackhi_epi64(ww0, ww1); // 05 15 25 35 45 55 65 75
-
- ww0 = _mm_unpackhi_epi32(w0, w1); // 06 16 26 36 07 17 27 37
- ww1 = _mm_unpackhi_epi32(w2, w3); // 46 56 66 76 47 57 67 77
-
- *d6 = _mm_unpacklo_epi64(ww0, ww1); // 06 16 26 36 46 56 66 76
- *d7 = _mm_unpackhi_epi64(ww0, ww1); // 07 17 27 37 47 57 67 77
-}
-
-// here in and out pointers (x and d) should be different! we don't store their
-// values inside
-static INLINE void highbd_transpose8x8_sse2(
- __m128i *x0, __m128i *x1, __m128i *x2, __m128i *x3, __m128i *x4,
- __m128i *x5, __m128i *x6, __m128i *x7, __m128i *d0, __m128i *d1,
- __m128i *d2, __m128i *d3, __m128i *d4, __m128i *d5, __m128i *d6,
- __m128i *d7) {
- highbd_transpose8x8_low_sse2(x0, x1, x2, x3, x4, x5, x6, x7, d0, d1, d2, d3);
- highbd_transpose8x8_high_sse2(x0, x1, x2, x3, x4, x5, x6, x7, d4, d5, d6, d7);
-}
-
-// here in and out pointers (x and d arrays) should be different! we don't store
-// their values inside
-static INLINE void highbd_transpose8x16_sse2(
- __m128i *x0, __m128i *x1, __m128i *x2, __m128i *x3, __m128i *x4,
- __m128i *x5, __m128i *x6, __m128i *x7, __m128i *d0, __m128i *d1,
- __m128i *d2, __m128i *d3, __m128i *d4, __m128i *d5, __m128i *d6,
- __m128i *d7) {
- highbd_transpose8x8_sse2(x0, x1, x2, x3, x4, x5, x6, x7, d0, d1, d2, d3, d4,
- d5, d6, d7);
- highbd_transpose8x8_sse2(x0 + 1, x1 + 1, x2 + 1, x3 + 1, x4 + 1, x5 + 1,
- x6 + 1, x7 + 1, d0 + 1, d1 + 1, d2 + 1, d3 + 1,
- d4 + 1, d5 + 1, d6 + 1, d7 + 1);
-}
-
-#endif // AOM_AOM_DSP_X86_LPF_COMMON_SSE2_H_
diff --git a/third_party/aom/aom_dsp/x86/masked_sad_intrin_avx2.c b/third_party/aom/aom_dsp/x86/masked_sad_intrin_avx2.c
deleted file mode 100644
index 584b5e7e3..000000000
--- a/third_party/aom/aom_dsp/x86/masked_sad_intrin_avx2.c
+++ /dev/null
@@ -1,389 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <tmmintrin.h>
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/blend.h"
-#include "aom/aom_integer.h"
-#include "aom_dsp/x86/synonyms.h"
-#include "aom_dsp/x86//masked_sad_intrin_ssse3.h"
-
-static INLINE unsigned int masked_sad32xh_avx2(
- const uint8_t *src_ptr, int src_stride, const uint8_t *a_ptr, int a_stride,
- const uint8_t *b_ptr, int b_stride, const uint8_t *m_ptr, int m_stride,
- int width, int height) {
- int x, y;
- __m256i res = _mm256_setzero_si256();
- const __m256i mask_max = _mm256_set1_epi8((1 << AOM_BLEND_A64_ROUND_BITS));
- const __m256i round_scale =
- _mm256_set1_epi16(1 << (15 - AOM_BLEND_A64_ROUND_BITS));
- for (y = 0; y < height; y++) {
- for (x = 0; x < width; x += 32) {
- const __m256i src = _mm256_lddqu_si256((const __m256i *)&src_ptr[x]);
- const __m256i a = _mm256_lddqu_si256((const __m256i *)&a_ptr[x]);
- const __m256i b = _mm256_lddqu_si256((const __m256i *)&b_ptr[x]);
- const __m256i m = _mm256_lddqu_si256((const __m256i *)&m_ptr[x]);
- const __m256i m_inv = _mm256_sub_epi8(mask_max, m);
-
- // Calculate 16 predicted pixels.
- // Note that the maximum value of any entry of 'pred_l' or 'pred_r'
- // is 64 * 255, so we have plenty of space to add rounding constants.
- const __m256i data_l = _mm256_unpacklo_epi8(a, b);
- const __m256i mask_l = _mm256_unpacklo_epi8(m, m_inv);
- __m256i pred_l = _mm256_maddubs_epi16(data_l, mask_l);
- pred_l = _mm256_mulhrs_epi16(pred_l, round_scale);
-
- const __m256i data_r = _mm256_unpackhi_epi8(a, b);
- const __m256i mask_r = _mm256_unpackhi_epi8(m, m_inv);
- __m256i pred_r = _mm256_maddubs_epi16(data_r, mask_r);
- pred_r = _mm256_mulhrs_epi16(pred_r, round_scale);
-
- const __m256i pred = _mm256_packus_epi16(pred_l, pred_r);
- res = _mm256_add_epi32(res, _mm256_sad_epu8(pred, src));
- }
-
- src_ptr += src_stride;
- a_ptr += a_stride;
- b_ptr += b_stride;
- m_ptr += m_stride;
- }
- // At this point, we have two 32-bit partial SADs in lanes 0 and 2 of 'res'.
- res = _mm256_shuffle_epi32(res, 0xd8);
- res = _mm256_permute4x64_epi64(res, 0xd8);
- res = _mm256_hadd_epi32(res, res);
- res = _mm256_hadd_epi32(res, res);
- int32_t sad = _mm256_extract_epi32(res, 0);
- return (sad + 31) >> 6;
-}
-
-static INLINE __m256i xx_loadu2_m128i(const void *hi, const void *lo) {
- __m128i a0 = _mm_lddqu_si128((const __m128i *)(lo));
- __m128i a1 = _mm_lddqu_si128((const __m128i *)(hi));
- __m256i a = _mm256_castsi128_si256(a0);
- return _mm256_inserti128_si256(a, a1, 1);
-}
-
-static INLINE unsigned int masked_sad16xh_avx2(
- const uint8_t *src_ptr, int src_stride, const uint8_t *a_ptr, int a_stride,
- const uint8_t *b_ptr, int b_stride, const uint8_t *m_ptr, int m_stride,
- int height) {
- int y;
- __m256i res = _mm256_setzero_si256();
- const __m256i mask_max = _mm256_set1_epi8((1 << AOM_BLEND_A64_ROUND_BITS));
- const __m256i round_scale =
- _mm256_set1_epi16(1 << (15 - AOM_BLEND_A64_ROUND_BITS));
- for (y = 0; y < height; y += 2) {
- const __m256i src = xx_loadu2_m128i(src_ptr + src_stride, src_ptr);
- const __m256i a = xx_loadu2_m128i(a_ptr + a_stride, a_ptr);
- const __m256i b = xx_loadu2_m128i(b_ptr + b_stride, b_ptr);
- const __m256i m = xx_loadu2_m128i(m_ptr + m_stride, m_ptr);
- const __m256i m_inv = _mm256_sub_epi8(mask_max, m);
-
- // Calculate 16 predicted pixels.
- // Note that the maximum value of any entry of 'pred_l' or 'pred_r'
- // is 64 * 255, so we have plenty of space to add rounding constants.
- const __m256i data_l = _mm256_unpacklo_epi8(a, b);
- const __m256i mask_l = _mm256_unpacklo_epi8(m, m_inv);
- __m256i pred_l = _mm256_maddubs_epi16(data_l, mask_l);
- pred_l = _mm256_mulhrs_epi16(pred_l, round_scale);
-
- const __m256i data_r = _mm256_unpackhi_epi8(a, b);
- const __m256i mask_r = _mm256_unpackhi_epi8(m, m_inv);
- __m256i pred_r = _mm256_maddubs_epi16(data_r, mask_r);
- pred_r = _mm256_mulhrs_epi16(pred_r, round_scale);
-
- const __m256i pred = _mm256_packus_epi16(pred_l, pred_r);
- res = _mm256_add_epi32(res, _mm256_sad_epu8(pred, src));
-
- src_ptr += src_stride << 1;
- a_ptr += a_stride << 1;
- b_ptr += b_stride << 1;
- m_ptr += m_stride << 1;
- }
- // At this point, we have two 32-bit partial SADs in lanes 0 and 2 of 'res'.
- res = _mm256_shuffle_epi32(res, 0xd8);
- res = _mm256_permute4x64_epi64(res, 0xd8);
- res = _mm256_hadd_epi32(res, res);
- res = _mm256_hadd_epi32(res, res);
- int32_t sad = _mm256_extract_epi32(res, 0);
- return (sad + 31) >> 6;
-}
-
-static INLINE unsigned int aom_masked_sad_avx2(
- const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride,
- const uint8_t *second_pred, const uint8_t *msk, int msk_stride,
- int invert_mask, int m, int n) {
- unsigned int sad;
- if (!invert_mask) {
- switch (m) {
- case 4:
- sad = aom_masked_sad4xh_ssse3(src, src_stride, ref, ref_stride,
- second_pred, m, msk, msk_stride, n);
- break;
- case 8:
- sad = aom_masked_sad8xh_ssse3(src, src_stride, ref, ref_stride,
- second_pred, m, msk, msk_stride, n);
- break;
- case 16:
- sad = masked_sad16xh_avx2(src, src_stride, ref, ref_stride, second_pred,
- m, msk, msk_stride, n);
- break;
- default:
- sad = masked_sad32xh_avx2(src, src_stride, ref, ref_stride, second_pred,
- m, msk, msk_stride, m, n);
- break;
- }
- } else {
- switch (m) {
- case 4:
- sad = aom_masked_sad4xh_ssse3(src, src_stride, second_pred, m, ref,
- ref_stride, msk, msk_stride, n);
- break;
- case 8:
- sad = aom_masked_sad8xh_ssse3(src, src_stride, second_pred, m, ref,
- ref_stride, msk, msk_stride, n);
- break;
- case 16:
- sad = masked_sad16xh_avx2(src, src_stride, second_pred, m, ref,
- ref_stride, msk, msk_stride, n);
- break;
- default:
- sad = masked_sad32xh_avx2(src, src_stride, second_pred, m, ref,
- ref_stride, msk, msk_stride, m, n);
- break;
- }
- }
- return sad;
-}
-
-#define MASKSADMXN_AVX2(m, n) \
- unsigned int aom_masked_sad##m##x##n##_avx2( \
- const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, \
- const uint8_t *second_pred, const uint8_t *msk, int msk_stride, \
- int invert_mask) { \
- return aom_masked_sad_avx2(src, src_stride, ref, ref_stride, second_pred, \
- msk, msk_stride, invert_mask, m, n); \
- }
-
-MASKSADMXN_AVX2(4, 4)
-MASKSADMXN_AVX2(4, 8)
-MASKSADMXN_AVX2(8, 4)
-MASKSADMXN_AVX2(8, 8)
-MASKSADMXN_AVX2(8, 16)
-MASKSADMXN_AVX2(16, 8)
-MASKSADMXN_AVX2(16, 16)
-MASKSADMXN_AVX2(16, 32)
-MASKSADMXN_AVX2(32, 16)
-MASKSADMXN_AVX2(32, 32)
-MASKSADMXN_AVX2(32, 64)
-MASKSADMXN_AVX2(64, 32)
-MASKSADMXN_AVX2(64, 64)
-MASKSADMXN_AVX2(64, 128)
-MASKSADMXN_AVX2(128, 64)
-MASKSADMXN_AVX2(128, 128)
-MASKSADMXN_AVX2(4, 16)
-MASKSADMXN_AVX2(16, 4)
-MASKSADMXN_AVX2(8, 32)
-MASKSADMXN_AVX2(32, 8)
-MASKSADMXN_AVX2(16, 64)
-MASKSADMXN_AVX2(64, 16)
-
-static INLINE unsigned int highbd_masked_sad8xh_avx2(
- const uint8_t *src8, int src_stride, const uint8_t *a8, int a_stride,
- const uint8_t *b8, int b_stride, const uint8_t *m_ptr, int m_stride,
- int height) {
- const uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src8);
- const uint16_t *a_ptr = CONVERT_TO_SHORTPTR(a8);
- const uint16_t *b_ptr = CONVERT_TO_SHORTPTR(b8);
- int y;
- __m256i res = _mm256_setzero_si256();
- const __m256i mask_max = _mm256_set1_epi16((1 << AOM_BLEND_A64_ROUND_BITS));
- const __m256i round_const =
- _mm256_set1_epi32((1 << AOM_BLEND_A64_ROUND_BITS) >> 1);
- const __m256i one = _mm256_set1_epi16(1);
-
- for (y = 0; y < height; y += 2) {
- const __m256i src = xx_loadu2_m128i(src_ptr + src_stride, src_ptr);
- const __m256i a = xx_loadu2_m128i(a_ptr + a_stride, a_ptr);
- const __m256i b = xx_loadu2_m128i(b_ptr + b_stride, b_ptr);
- // Zero-extend mask to 16 bits
- const __m256i m = _mm256_cvtepu8_epi16(_mm_unpacklo_epi64(
- _mm_loadl_epi64((const __m128i *)(m_ptr)),
- _mm_loadl_epi64((const __m128i *)(m_ptr + m_stride))));
- const __m256i m_inv = _mm256_sub_epi16(mask_max, m);
-
- const __m256i data_l = _mm256_unpacklo_epi16(a, b);
- const __m256i mask_l = _mm256_unpacklo_epi16(m, m_inv);
- __m256i pred_l = _mm256_madd_epi16(data_l, mask_l);
- pred_l = _mm256_srai_epi32(_mm256_add_epi32(pred_l, round_const),
- AOM_BLEND_A64_ROUND_BITS);
-
- const __m256i data_r = _mm256_unpackhi_epi16(a, b);
- const __m256i mask_r = _mm256_unpackhi_epi16(m, m_inv);
- __m256i pred_r = _mm256_madd_epi16(data_r, mask_r);
- pred_r = _mm256_srai_epi32(_mm256_add_epi32(pred_r, round_const),
- AOM_BLEND_A64_ROUND_BITS);
-
- // Note: the maximum value in pred_l/r is (2^bd)-1 < 2^15,
- // so it is safe to do signed saturation here.
- const __m256i pred = _mm256_packs_epi32(pred_l, pred_r);
- // There is no 16-bit SAD instruction, so we have to synthesize
- // an 8-element SAD. We do this by storing 4 32-bit partial SADs,
- // and accumulating them at the end
- const __m256i diff = _mm256_abs_epi16(_mm256_sub_epi16(pred, src));
- res = _mm256_add_epi32(res, _mm256_madd_epi16(diff, one));
-
- src_ptr += src_stride << 1;
- a_ptr += a_stride << 1;
- b_ptr += b_stride << 1;
- m_ptr += m_stride << 1;
- }
- // At this point, we have four 32-bit partial SADs stored in 'res'.
- res = _mm256_hadd_epi32(res, res);
- res = _mm256_hadd_epi32(res, res);
- int sad = _mm256_extract_epi32(res, 0) + _mm256_extract_epi32(res, 4);
- return (sad + 31) >> 6;
-}
-
-static INLINE unsigned int highbd_masked_sad16xh_avx2(
- const uint8_t *src8, int src_stride, const uint8_t *a8, int a_stride,
- const uint8_t *b8, int b_stride, const uint8_t *m_ptr, int m_stride,
- int width, int height) {
- const uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src8);
- const uint16_t *a_ptr = CONVERT_TO_SHORTPTR(a8);
- const uint16_t *b_ptr = CONVERT_TO_SHORTPTR(b8);
- int x, y;
- __m256i res = _mm256_setzero_si256();
- const __m256i mask_max = _mm256_set1_epi16((1 << AOM_BLEND_A64_ROUND_BITS));
- const __m256i round_const =
- _mm256_set1_epi32((1 << AOM_BLEND_A64_ROUND_BITS) >> 1);
- const __m256i one = _mm256_set1_epi16(1);
-
- for (y = 0; y < height; y++) {
- for (x = 0; x < width; x += 16) {
- const __m256i src = _mm256_lddqu_si256((const __m256i *)&src_ptr[x]);
- const __m256i a = _mm256_lddqu_si256((const __m256i *)&a_ptr[x]);
- const __m256i b = _mm256_lddqu_si256((const __m256i *)&b_ptr[x]);
- // Zero-extend mask to 16 bits
- const __m256i m =
- _mm256_cvtepu8_epi16(_mm_lddqu_si128((const __m128i *)&m_ptr[x]));
- const __m256i m_inv = _mm256_sub_epi16(mask_max, m);
-
- const __m256i data_l = _mm256_unpacklo_epi16(a, b);
- const __m256i mask_l = _mm256_unpacklo_epi16(m, m_inv);
- __m256i pred_l = _mm256_madd_epi16(data_l, mask_l);
- pred_l = _mm256_srai_epi32(_mm256_add_epi32(pred_l, round_const),
- AOM_BLEND_A64_ROUND_BITS);
-
- const __m256i data_r = _mm256_unpackhi_epi16(a, b);
- const __m256i mask_r = _mm256_unpackhi_epi16(m, m_inv);
- __m256i pred_r = _mm256_madd_epi16(data_r, mask_r);
- pred_r = _mm256_srai_epi32(_mm256_add_epi32(pred_r, round_const),
- AOM_BLEND_A64_ROUND_BITS);
-
- // Note: the maximum value in pred_l/r is (2^bd)-1 < 2^15,
- // so it is safe to do signed saturation here.
- const __m256i pred = _mm256_packs_epi32(pred_l, pred_r);
- // There is no 16-bit SAD instruction, so we have to synthesize
- // an 8-element SAD. We do this by storing 4 32-bit partial SADs,
- // and accumulating them at the end
- const __m256i diff = _mm256_abs_epi16(_mm256_sub_epi16(pred, src));
- res = _mm256_add_epi32(res, _mm256_madd_epi16(diff, one));
- }
-
- src_ptr += src_stride;
- a_ptr += a_stride;
- b_ptr += b_stride;
- m_ptr += m_stride;
- }
- // At this point, we have four 32-bit partial SADs stored in 'res'.
- res = _mm256_hadd_epi32(res, res);
- res = _mm256_hadd_epi32(res, res);
- int sad = _mm256_extract_epi32(res, 0) + _mm256_extract_epi32(res, 4);
- return (sad + 31) >> 6;
-}
-
-static INLINE unsigned int aom_highbd_masked_sad_avx2(
- const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride,
- const uint8_t *second_pred, const uint8_t *msk, int msk_stride,
- int invert_mask, int m, int n) {
- unsigned int sad;
- if (!invert_mask) {
- switch (m) {
- case 4:
- sad =
- aom_highbd_masked_sad4xh_ssse3(src, src_stride, ref, ref_stride,
- second_pred, m, msk, msk_stride, n);
- break;
- case 8:
- sad = highbd_masked_sad8xh_avx2(src, src_stride, ref, ref_stride,
- second_pred, m, msk, msk_stride, n);
- break;
- default:
- sad = highbd_masked_sad16xh_avx2(src, src_stride, ref, ref_stride,
- second_pred, m, msk, msk_stride, m, n);
- break;
- }
- } else {
- switch (m) {
- case 4:
- sad =
- aom_highbd_masked_sad4xh_ssse3(src, src_stride, second_pred, m, ref,
- ref_stride, msk, msk_stride, n);
- break;
- case 8:
- sad = highbd_masked_sad8xh_avx2(src, src_stride, second_pred, m, ref,
- ref_stride, msk, msk_stride, n);
- break;
- default:
- sad = highbd_masked_sad16xh_avx2(src, src_stride, second_pred, m, ref,
- ref_stride, msk, msk_stride, m, n);
- break;
- }
- }
- return sad;
-}
-
-#define HIGHBD_MASKSADMXN_AVX2(m, n) \
- unsigned int aom_highbd_masked_sad##m##x##n##_avx2( \
- const uint8_t *src8, int src_stride, const uint8_t *ref8, \
- int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, \
- int msk_stride, int invert_mask) { \
- return aom_highbd_masked_sad_avx2(src8, src_stride, ref8, ref_stride, \
- second_pred8, msk, msk_stride, \
- invert_mask, m, n); \
- }
-
-HIGHBD_MASKSADMXN_AVX2(4, 4);
-HIGHBD_MASKSADMXN_AVX2(4, 8);
-HIGHBD_MASKSADMXN_AVX2(8, 4);
-HIGHBD_MASKSADMXN_AVX2(8, 8);
-HIGHBD_MASKSADMXN_AVX2(8, 16);
-HIGHBD_MASKSADMXN_AVX2(16, 8);
-HIGHBD_MASKSADMXN_AVX2(16, 16);
-HIGHBD_MASKSADMXN_AVX2(16, 32);
-HIGHBD_MASKSADMXN_AVX2(32, 16);
-HIGHBD_MASKSADMXN_AVX2(32, 32);
-HIGHBD_MASKSADMXN_AVX2(32, 64);
-HIGHBD_MASKSADMXN_AVX2(64, 32);
-HIGHBD_MASKSADMXN_AVX2(64, 64);
-HIGHBD_MASKSADMXN_AVX2(64, 128);
-HIGHBD_MASKSADMXN_AVX2(128, 64);
-HIGHBD_MASKSADMXN_AVX2(128, 128);
-HIGHBD_MASKSADMXN_AVX2(4, 16);
-HIGHBD_MASKSADMXN_AVX2(16, 4);
-HIGHBD_MASKSADMXN_AVX2(8, 32);
-HIGHBD_MASKSADMXN_AVX2(32, 8);
-HIGHBD_MASKSADMXN_AVX2(16, 64);
-HIGHBD_MASKSADMXN_AVX2(64, 16);
diff --git a/third_party/aom/aom_dsp/x86/masked_sad_intrin_ssse3.c b/third_party/aom/aom_dsp/x86/masked_sad_intrin_ssse3.c
deleted file mode 100644
index 493f9bd8f..000000000
--- a/third_party/aom/aom_dsp/x86/masked_sad_intrin_ssse3.c
+++ /dev/null
@@ -1,402 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <stdio.h>
-#include <tmmintrin.h>
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/blend.h"
-#include "aom/aom_integer.h"
-#include "aom_dsp/x86/synonyms.h"
-
-#include "aom_dsp/x86//masked_sad_intrin_ssse3.h"
-
-// For width a multiple of 16
-static INLINE unsigned int masked_sad_ssse3(const uint8_t *src_ptr,
- int src_stride,
- const uint8_t *a_ptr, int a_stride,
- const uint8_t *b_ptr, int b_stride,
- const uint8_t *m_ptr, int m_stride,
- int width, int height);
-
-#define MASKSADMXN_SSSE3(m, n) \
- unsigned int aom_masked_sad##m##x##n##_ssse3( \
- const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, \
- const uint8_t *second_pred, const uint8_t *msk, int msk_stride, \
- int invert_mask) { \
- if (!invert_mask) \
- return masked_sad_ssse3(src, src_stride, ref, ref_stride, second_pred, \
- m, msk, msk_stride, m, n); \
- else \
- return masked_sad_ssse3(src, src_stride, second_pred, m, ref, \
- ref_stride, msk, msk_stride, m, n); \
- }
-
-#define MASKSAD8XN_SSSE3(n) \
- unsigned int aom_masked_sad8x##n##_ssse3( \
- const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, \
- const uint8_t *second_pred, const uint8_t *msk, int msk_stride, \
- int invert_mask) { \
- if (!invert_mask) \
- return aom_masked_sad8xh_ssse3(src, src_stride, ref, ref_stride, \
- second_pred, 8, msk, msk_stride, n); \
- else \
- return aom_masked_sad8xh_ssse3(src, src_stride, second_pred, 8, ref, \
- ref_stride, msk, msk_stride, n); \
- }
-
-#define MASKSAD4XN_SSSE3(n) \
- unsigned int aom_masked_sad4x##n##_ssse3( \
- const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, \
- const uint8_t *second_pred, const uint8_t *msk, int msk_stride, \
- int invert_mask) { \
- if (!invert_mask) \
- return aom_masked_sad4xh_ssse3(src, src_stride, ref, ref_stride, \
- second_pred, 4, msk, msk_stride, n); \
- else \
- return aom_masked_sad4xh_ssse3(src, src_stride, second_pred, 4, ref, \
- ref_stride, msk, msk_stride, n); \
- }
-
-MASKSADMXN_SSSE3(128, 128)
-MASKSADMXN_SSSE3(128, 64)
-MASKSADMXN_SSSE3(64, 128)
-MASKSADMXN_SSSE3(64, 64)
-MASKSADMXN_SSSE3(64, 32)
-MASKSADMXN_SSSE3(32, 64)
-MASKSADMXN_SSSE3(32, 32)
-MASKSADMXN_SSSE3(32, 16)
-MASKSADMXN_SSSE3(16, 32)
-MASKSADMXN_SSSE3(16, 16)
-MASKSADMXN_SSSE3(16, 8)
-MASKSAD8XN_SSSE3(16)
-MASKSAD8XN_SSSE3(8)
-MASKSAD8XN_SSSE3(4)
-MASKSAD4XN_SSSE3(8)
-MASKSAD4XN_SSSE3(4)
-MASKSAD4XN_SSSE3(16)
-MASKSADMXN_SSSE3(16, 4)
-MASKSAD8XN_SSSE3(32)
-MASKSADMXN_SSSE3(32, 8)
-MASKSADMXN_SSSE3(16, 64)
-MASKSADMXN_SSSE3(64, 16)
-
-static INLINE unsigned int masked_sad_ssse3(const uint8_t *src_ptr,
- int src_stride,
- const uint8_t *a_ptr, int a_stride,
- const uint8_t *b_ptr, int b_stride,
- const uint8_t *m_ptr, int m_stride,
- int width, int height) {
- int x, y;
- __m128i res = _mm_setzero_si128();
- const __m128i mask_max = _mm_set1_epi8((1 << AOM_BLEND_A64_ROUND_BITS));
-
- for (y = 0; y < height; y++) {
- for (x = 0; x < width; x += 16) {
- const __m128i src = _mm_loadu_si128((const __m128i *)&src_ptr[x]);
- const __m128i a = _mm_loadu_si128((const __m128i *)&a_ptr[x]);
- const __m128i b = _mm_loadu_si128((const __m128i *)&b_ptr[x]);
- const __m128i m = _mm_loadu_si128((const __m128i *)&m_ptr[x]);
- const __m128i m_inv = _mm_sub_epi8(mask_max, m);
-
- // Calculate 16 predicted pixels.
- // Note that the maximum value of any entry of 'pred_l' or 'pred_r'
- // is 64 * 255, so we have plenty of space to add rounding constants.
- const __m128i data_l = _mm_unpacklo_epi8(a, b);
- const __m128i mask_l = _mm_unpacklo_epi8(m, m_inv);
- __m128i pred_l = _mm_maddubs_epi16(data_l, mask_l);
- pred_l = xx_roundn_epu16(pred_l, AOM_BLEND_A64_ROUND_BITS);
-
- const __m128i data_r = _mm_unpackhi_epi8(a, b);
- const __m128i mask_r = _mm_unpackhi_epi8(m, m_inv);
- __m128i pred_r = _mm_maddubs_epi16(data_r, mask_r);
- pred_r = xx_roundn_epu16(pred_r, AOM_BLEND_A64_ROUND_BITS);
-
- const __m128i pred = _mm_packus_epi16(pred_l, pred_r);
- res = _mm_add_epi32(res, _mm_sad_epu8(pred, src));
- }
-
- src_ptr += src_stride;
- a_ptr += a_stride;
- b_ptr += b_stride;
- m_ptr += m_stride;
- }
- // At this point, we have two 32-bit partial SADs in lanes 0 and 2 of 'res'.
- int32_t sad =
- _mm_cvtsi128_si32(res) + _mm_cvtsi128_si32(_mm_srli_si128(res, 8));
- return (sad + 31) >> 6;
-}
-
-unsigned int aom_masked_sad8xh_ssse3(const uint8_t *src_ptr, int src_stride,
- const uint8_t *a_ptr, int a_stride,
- const uint8_t *b_ptr, int b_stride,
- const uint8_t *m_ptr, int m_stride,
- int height) {
- int y;
- __m128i res = _mm_setzero_si128();
- const __m128i mask_max = _mm_set1_epi8((1 << AOM_BLEND_A64_ROUND_BITS));
-
- for (y = 0; y < height; y += 2) {
- const __m128i src = _mm_unpacklo_epi64(
- _mm_loadl_epi64((const __m128i *)src_ptr),
- _mm_loadl_epi64((const __m128i *)&src_ptr[src_stride]));
- const __m128i a0 = _mm_loadl_epi64((const __m128i *)a_ptr);
- const __m128i a1 = _mm_loadl_epi64((const __m128i *)&a_ptr[a_stride]);
- const __m128i b0 = _mm_loadl_epi64((const __m128i *)b_ptr);
- const __m128i b1 = _mm_loadl_epi64((const __m128i *)&b_ptr[b_stride]);
- const __m128i m =
- _mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i *)m_ptr),
- _mm_loadl_epi64((const __m128i *)&m_ptr[m_stride]));
- const __m128i m_inv = _mm_sub_epi8(mask_max, m);
-
- const __m128i data_l = _mm_unpacklo_epi8(a0, b0);
- const __m128i mask_l = _mm_unpacklo_epi8(m, m_inv);
- __m128i pred_l = _mm_maddubs_epi16(data_l, mask_l);
- pred_l = xx_roundn_epu16(pred_l, AOM_BLEND_A64_ROUND_BITS);
-
- const __m128i data_r = _mm_unpacklo_epi8(a1, b1);
- const __m128i mask_r = _mm_unpackhi_epi8(m, m_inv);
- __m128i pred_r = _mm_maddubs_epi16(data_r, mask_r);
- pred_r = xx_roundn_epu16(pred_r, AOM_BLEND_A64_ROUND_BITS);
-
- const __m128i pred = _mm_packus_epi16(pred_l, pred_r);
- res = _mm_add_epi32(res, _mm_sad_epu8(pred, src));
-
- src_ptr += src_stride * 2;
- a_ptr += a_stride * 2;
- b_ptr += b_stride * 2;
- m_ptr += m_stride * 2;
- }
- int32_t sad =
- _mm_cvtsi128_si32(res) + _mm_cvtsi128_si32(_mm_srli_si128(res, 8));
- return (sad + 31) >> 6;
-}
-
-unsigned int aom_masked_sad4xh_ssse3(const uint8_t *src_ptr, int src_stride,
- const uint8_t *a_ptr, int a_stride,
- const uint8_t *b_ptr, int b_stride,
- const uint8_t *m_ptr, int m_stride,
- int height) {
- int y;
- __m128i res = _mm_setzero_si128();
- const __m128i mask_max = _mm_set1_epi8((1 << AOM_BLEND_A64_ROUND_BITS));
-
- for (y = 0; y < height; y += 2) {
- // Load two rows at a time, this seems to be a bit faster
- // than four rows at a time in this case.
- const __m128i src = _mm_unpacklo_epi32(
- _mm_cvtsi32_si128(*(uint32_t *)src_ptr),
- _mm_cvtsi32_si128(*(uint32_t *)&src_ptr[src_stride]));
- const __m128i a =
- _mm_unpacklo_epi32(_mm_cvtsi32_si128(*(uint32_t *)a_ptr),
- _mm_cvtsi32_si128(*(uint32_t *)&a_ptr[a_stride]));
- const __m128i b =
- _mm_unpacklo_epi32(_mm_cvtsi32_si128(*(uint32_t *)b_ptr),
- _mm_cvtsi32_si128(*(uint32_t *)&b_ptr[b_stride]));
- const __m128i m =
- _mm_unpacklo_epi32(_mm_cvtsi32_si128(*(uint32_t *)m_ptr),
- _mm_cvtsi32_si128(*(uint32_t *)&m_ptr[m_stride]));
- const __m128i m_inv = _mm_sub_epi8(mask_max, m);
-
- const __m128i data = _mm_unpacklo_epi8(a, b);
- const __m128i mask = _mm_unpacklo_epi8(m, m_inv);
- __m128i pred_16bit = _mm_maddubs_epi16(data, mask);
- pred_16bit = xx_roundn_epu16(pred_16bit, AOM_BLEND_A64_ROUND_BITS);
-
- const __m128i pred = _mm_packus_epi16(pred_16bit, _mm_setzero_si128());
- res = _mm_add_epi32(res, _mm_sad_epu8(pred, src));
-
- src_ptr += src_stride * 2;
- a_ptr += a_stride * 2;
- b_ptr += b_stride * 2;
- m_ptr += m_stride * 2;
- }
- // At this point, the SAD is stored in lane 0 of 'res'
- int32_t sad = _mm_cvtsi128_si32(res);
- return (sad + 31) >> 6;
-}
-
-// For width a multiple of 8
-static INLINE unsigned int highbd_masked_sad_ssse3(
- const uint8_t *src8, int src_stride, const uint8_t *a8, int a_stride,
- const uint8_t *b8, int b_stride, const uint8_t *m_ptr, int m_stride,
- int width, int height);
-
-#define HIGHBD_MASKSADMXN_SSSE3(m, n) \
- unsigned int aom_highbd_masked_sad##m##x##n##_ssse3( \
- const uint8_t *src8, int src_stride, const uint8_t *ref8, \
- int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, \
- int msk_stride, int invert_mask) { \
- if (!invert_mask) \
- return highbd_masked_sad_ssse3(src8, src_stride, ref8, ref_stride, \
- second_pred8, m, msk, msk_stride, m, n); \
- else \
- return highbd_masked_sad_ssse3(src8, src_stride, second_pred8, m, ref8, \
- ref_stride, msk, msk_stride, m, n); \
- }
-
-#define HIGHBD_MASKSAD4XN_SSSE3(n) \
- unsigned int aom_highbd_masked_sad4x##n##_ssse3( \
- const uint8_t *src8, int src_stride, const uint8_t *ref8, \
- int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, \
- int msk_stride, int invert_mask) { \
- if (!invert_mask) \
- return aom_highbd_masked_sad4xh_ssse3(src8, src_stride, ref8, \
- ref_stride, second_pred8, 4, msk, \
- msk_stride, n); \
- else \
- return aom_highbd_masked_sad4xh_ssse3(src8, src_stride, second_pred8, 4, \
- ref8, ref_stride, msk, msk_stride, \
- n); \
- }
-
-HIGHBD_MASKSADMXN_SSSE3(128, 128)
-HIGHBD_MASKSADMXN_SSSE3(128, 64)
-HIGHBD_MASKSADMXN_SSSE3(64, 128)
-HIGHBD_MASKSADMXN_SSSE3(64, 64)
-HIGHBD_MASKSADMXN_SSSE3(64, 32)
-HIGHBD_MASKSADMXN_SSSE3(32, 64)
-HIGHBD_MASKSADMXN_SSSE3(32, 32)
-HIGHBD_MASKSADMXN_SSSE3(32, 16)
-HIGHBD_MASKSADMXN_SSSE3(16, 32)
-HIGHBD_MASKSADMXN_SSSE3(16, 16)
-HIGHBD_MASKSADMXN_SSSE3(16, 8)
-HIGHBD_MASKSADMXN_SSSE3(8, 16)
-HIGHBD_MASKSADMXN_SSSE3(8, 8)
-HIGHBD_MASKSADMXN_SSSE3(8, 4)
-HIGHBD_MASKSAD4XN_SSSE3(8)
-HIGHBD_MASKSAD4XN_SSSE3(4)
-HIGHBD_MASKSAD4XN_SSSE3(16)
-HIGHBD_MASKSADMXN_SSSE3(16, 4)
-HIGHBD_MASKSADMXN_SSSE3(8, 32)
-HIGHBD_MASKSADMXN_SSSE3(32, 8)
-HIGHBD_MASKSADMXN_SSSE3(16, 64)
-HIGHBD_MASKSADMXN_SSSE3(64, 16)
-
-static INLINE unsigned int highbd_masked_sad_ssse3(
- const uint8_t *src8, int src_stride, const uint8_t *a8, int a_stride,
- const uint8_t *b8, int b_stride, const uint8_t *m_ptr, int m_stride,
- int width, int height) {
- const uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src8);
- const uint16_t *a_ptr = CONVERT_TO_SHORTPTR(a8);
- const uint16_t *b_ptr = CONVERT_TO_SHORTPTR(b8);
- int x, y;
- __m128i res = _mm_setzero_si128();
- const __m128i mask_max = _mm_set1_epi16((1 << AOM_BLEND_A64_ROUND_BITS));
- const __m128i round_const =
- _mm_set1_epi32((1 << AOM_BLEND_A64_ROUND_BITS) >> 1);
- const __m128i one = _mm_set1_epi16(1);
-
- for (y = 0; y < height; y++) {
- for (x = 0; x < width; x += 8) {
- const __m128i src = _mm_loadu_si128((const __m128i *)&src_ptr[x]);
- const __m128i a = _mm_loadu_si128((const __m128i *)&a_ptr[x]);
- const __m128i b = _mm_loadu_si128((const __m128i *)&b_ptr[x]);
- // Zero-extend mask to 16 bits
- const __m128i m = _mm_unpacklo_epi8(
- _mm_loadl_epi64((const __m128i *)&m_ptr[x]), _mm_setzero_si128());
- const __m128i m_inv = _mm_sub_epi16(mask_max, m);
-
- const __m128i data_l = _mm_unpacklo_epi16(a, b);
- const __m128i mask_l = _mm_unpacklo_epi16(m, m_inv);
- __m128i pred_l = _mm_madd_epi16(data_l, mask_l);
- pred_l = _mm_srai_epi32(_mm_add_epi32(pred_l, round_const),
- AOM_BLEND_A64_ROUND_BITS);
-
- const __m128i data_r = _mm_unpackhi_epi16(a, b);
- const __m128i mask_r = _mm_unpackhi_epi16(m, m_inv);
- __m128i pred_r = _mm_madd_epi16(data_r, mask_r);
- pred_r = _mm_srai_epi32(_mm_add_epi32(pred_r, round_const),
- AOM_BLEND_A64_ROUND_BITS);
-
- // Note: the maximum value in pred_l/r is (2^bd)-1 < 2^15,
- // so it is safe to do signed saturation here.
- const __m128i pred = _mm_packs_epi32(pred_l, pred_r);
- // There is no 16-bit SAD instruction, so we have to synthesize
- // an 8-element SAD. We do this by storing 4 32-bit partial SADs,
- // and accumulating them at the end
- const __m128i diff = _mm_abs_epi16(_mm_sub_epi16(pred, src));
- res = _mm_add_epi32(res, _mm_madd_epi16(diff, one));
- }
-
- src_ptr += src_stride;
- a_ptr += a_stride;
- b_ptr += b_stride;
- m_ptr += m_stride;
- }
- // At this point, we have four 32-bit partial SADs stored in 'res'.
- res = _mm_hadd_epi32(res, res);
- res = _mm_hadd_epi32(res, res);
- int sad = _mm_cvtsi128_si32(res);
- return (sad + 31) >> 6;
-}
-
-unsigned int aom_highbd_masked_sad4xh_ssse3(const uint8_t *src8, int src_stride,
- const uint8_t *a8, int a_stride,
- const uint8_t *b8, int b_stride,
- const uint8_t *m_ptr, int m_stride,
- int height) {
- const uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src8);
- const uint16_t *a_ptr = CONVERT_TO_SHORTPTR(a8);
- const uint16_t *b_ptr = CONVERT_TO_SHORTPTR(b8);
- int y;
- __m128i res = _mm_setzero_si128();
- const __m128i mask_max = _mm_set1_epi16((1 << AOM_BLEND_A64_ROUND_BITS));
- const __m128i round_const =
- _mm_set1_epi32((1 << AOM_BLEND_A64_ROUND_BITS) >> 1);
- const __m128i one = _mm_set1_epi16(1);
-
- for (y = 0; y < height; y += 2) {
- const __m128i src = _mm_unpacklo_epi64(
- _mm_loadl_epi64((const __m128i *)src_ptr),
- _mm_loadl_epi64((const __m128i *)&src_ptr[src_stride]));
- const __m128i a =
- _mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i *)a_ptr),
- _mm_loadl_epi64((const __m128i *)&a_ptr[a_stride]));
- const __m128i b =
- _mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i *)b_ptr),
- _mm_loadl_epi64((const __m128i *)&b_ptr[b_stride]));
- // Zero-extend mask to 16 bits
- const __m128i m = _mm_unpacklo_epi8(
- _mm_unpacklo_epi32(
- _mm_cvtsi32_si128(*(const uint32_t *)m_ptr),
- _mm_cvtsi32_si128(*(const uint32_t *)&m_ptr[m_stride])),
- _mm_setzero_si128());
- const __m128i m_inv = _mm_sub_epi16(mask_max, m);
-
- const __m128i data_l = _mm_unpacklo_epi16(a, b);
- const __m128i mask_l = _mm_unpacklo_epi16(m, m_inv);
- __m128i pred_l = _mm_madd_epi16(data_l, mask_l);
- pred_l = _mm_srai_epi32(_mm_add_epi32(pred_l, round_const),
- AOM_BLEND_A64_ROUND_BITS);
-
- const __m128i data_r = _mm_unpackhi_epi16(a, b);
- const __m128i mask_r = _mm_unpackhi_epi16(m, m_inv);
- __m128i pred_r = _mm_madd_epi16(data_r, mask_r);
- pred_r = _mm_srai_epi32(_mm_add_epi32(pred_r, round_const),
- AOM_BLEND_A64_ROUND_BITS);
-
- const __m128i pred = _mm_packs_epi32(pred_l, pred_r);
- const __m128i diff = _mm_abs_epi16(_mm_sub_epi16(pred, src));
- res = _mm_add_epi32(res, _mm_madd_epi16(diff, one));
-
- src_ptr += src_stride * 2;
- a_ptr += a_stride * 2;
- b_ptr += b_stride * 2;
- m_ptr += m_stride * 2;
- }
- res = _mm_hadd_epi32(res, res);
- res = _mm_hadd_epi32(res, res);
- int sad = _mm_cvtsi128_si32(res);
- return (sad + 31) >> 6;
-}
diff --git a/third_party/aom/aom_dsp/x86/masked_sad_intrin_ssse3.h b/third_party/aom/aom_dsp/x86/masked_sad_intrin_ssse3.h
deleted file mode 100644
index cffbd9672..000000000
--- a/third_party/aom/aom_dsp/x86/masked_sad_intrin_ssse3.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_X86_MASKED_SAD_INTRIN_SSSE3_H_
-#define AOM_AOM_DSP_X86_MASKED_SAD_INTRIN_SSSE3_H_
-
-unsigned int aom_masked_sad8xh_ssse3(const uint8_t *src_ptr, int src_stride,
- const uint8_t *a_ptr, int a_stride,
- const uint8_t *b_ptr, int b_stride,
- const uint8_t *m_ptr, int m_stride,
- int height);
-
-unsigned int aom_masked_sad4xh_ssse3(const uint8_t *src_ptr, int src_stride,
- const uint8_t *a_ptr, int a_stride,
- const uint8_t *b_ptr, int b_stride,
- const uint8_t *m_ptr, int m_stride,
- int height);
-
-unsigned int aom_highbd_masked_sad4xh_ssse3(const uint8_t *src8, int src_stride,
- const uint8_t *a8, int a_stride,
- const uint8_t *b8, int b_stride,
- const uint8_t *m_ptr, int m_stride,
- int height);
-
-#endif // AOM_AOM_DSP_X86_MASKED_SAD_INTRIN_SSSE3_H_
diff --git a/third_party/aom/aom_dsp/x86/masked_variance_intrin_ssse3.c b/third_party/aom/aom_dsp/x86/masked_variance_intrin_ssse3.c
deleted file mode 100644
index d7dbefd7d..000000000
--- a/third_party/aom/aom_dsp/x86/masked_variance_intrin_ssse3.c
+++ /dev/null
@@ -1,1064 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <stdlib.h>
-#include <string.h>
-#include <tmmintrin.h>
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom/aom_integer.h"
-#include "aom_dsp/aom_filter.h"
-#include "aom_dsp/blend.h"
-#include "aom_dsp/x86/masked_variance_intrin_ssse3.h"
-#include "aom_dsp/x86/synonyms.h"
-#include "aom_ports/mem.h"
-
-// For width a multiple of 16
-static void bilinear_filter(const uint8_t *src, int src_stride, int xoffset,
- int yoffset, uint8_t *dst, int w, int h);
-
-static void bilinear_filter8xh(const uint8_t *src, int src_stride, int xoffset,
- int yoffset, uint8_t *dst, int h);
-
-static void bilinear_filter4xh(const uint8_t *src, int src_stride, int xoffset,
- int yoffset, uint8_t *dst, int h);
-
-// For width a multiple of 16
-static void masked_variance(const uint8_t *src_ptr, int src_stride,
- const uint8_t *a_ptr, int a_stride,
- const uint8_t *b_ptr, int b_stride,
- const uint8_t *m_ptr, int m_stride, int width,
- int height, unsigned int *sse, int *sum_);
-
-static void masked_variance8xh(const uint8_t *src_ptr, int src_stride,
- const uint8_t *a_ptr, const uint8_t *b_ptr,
- const uint8_t *m_ptr, int m_stride, int height,
- unsigned int *sse, int *sum_);
-
-static void masked_variance4xh(const uint8_t *src_ptr, int src_stride,
- const uint8_t *a_ptr, const uint8_t *b_ptr,
- const uint8_t *m_ptr, int m_stride, int height,
- unsigned int *sse, int *sum_);
-
-#define MASK_SUBPIX_VAR_SSSE3(W, H) \
- unsigned int aom_masked_sub_pixel_variance##W##x##H##_ssse3( \
- const uint8_t *src, int src_stride, int xoffset, int yoffset, \
- const uint8_t *ref, int ref_stride, const uint8_t *second_pred, \
- const uint8_t *msk, int msk_stride, int invert_mask, \
- unsigned int *sse) { \
- int sum; \
- uint8_t temp[(H + 1) * W]; \
- \
- bilinear_filter(src, src_stride, xoffset, yoffset, temp, W, H); \
- \
- if (!invert_mask) \
- masked_variance(ref, ref_stride, temp, W, second_pred, W, msk, \
- msk_stride, W, H, sse, &sum); \
- else \
- masked_variance(ref, ref_stride, second_pred, W, temp, W, msk, \
- msk_stride, W, H, sse, &sum); \
- return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \
- }
-
-#define MASK_SUBPIX_VAR8XH_SSSE3(H) \
- unsigned int aom_masked_sub_pixel_variance8x##H##_ssse3( \
- const uint8_t *src, int src_stride, int xoffset, int yoffset, \
- const uint8_t *ref, int ref_stride, const uint8_t *second_pred, \
- const uint8_t *msk, int msk_stride, int invert_mask, \
- unsigned int *sse) { \
- int sum; \
- uint8_t temp[(H + 1) * 8]; \
- \
- bilinear_filter8xh(src, src_stride, xoffset, yoffset, temp, H); \
- \
- if (!invert_mask) \
- masked_variance8xh(ref, ref_stride, temp, second_pred, msk, msk_stride, \
- H, sse, &sum); \
- else \
- masked_variance8xh(ref, ref_stride, second_pred, temp, msk, msk_stride, \
- H, sse, &sum); \
- return *sse - (uint32_t)(((int64_t)sum * sum) / (8 * H)); \
- }
-
-#define MASK_SUBPIX_VAR4XH_SSSE3(H) \
- unsigned int aom_masked_sub_pixel_variance4x##H##_ssse3( \
- const uint8_t *src, int src_stride, int xoffset, int yoffset, \
- const uint8_t *ref, int ref_stride, const uint8_t *second_pred, \
- const uint8_t *msk, int msk_stride, int invert_mask, \
- unsigned int *sse) { \
- int sum; \
- uint8_t temp[(H + 1) * 4]; \
- \
- bilinear_filter4xh(src, src_stride, xoffset, yoffset, temp, H); \
- \
- if (!invert_mask) \
- masked_variance4xh(ref, ref_stride, temp, second_pred, msk, msk_stride, \
- H, sse, &sum); \
- else \
- masked_variance4xh(ref, ref_stride, second_pred, temp, msk, msk_stride, \
- H, sse, &sum); \
- return *sse - (uint32_t)(((int64_t)sum * sum) / (4 * H)); \
- }
-
-MASK_SUBPIX_VAR_SSSE3(128, 128)
-MASK_SUBPIX_VAR_SSSE3(128, 64)
-MASK_SUBPIX_VAR_SSSE3(64, 128)
-MASK_SUBPIX_VAR_SSSE3(64, 64)
-MASK_SUBPIX_VAR_SSSE3(64, 32)
-MASK_SUBPIX_VAR_SSSE3(32, 64)
-MASK_SUBPIX_VAR_SSSE3(32, 32)
-MASK_SUBPIX_VAR_SSSE3(32, 16)
-MASK_SUBPIX_VAR_SSSE3(16, 32)
-MASK_SUBPIX_VAR_SSSE3(16, 16)
-MASK_SUBPIX_VAR_SSSE3(16, 8)
-MASK_SUBPIX_VAR8XH_SSSE3(16)
-MASK_SUBPIX_VAR8XH_SSSE3(8)
-MASK_SUBPIX_VAR8XH_SSSE3(4)
-MASK_SUBPIX_VAR4XH_SSSE3(8)
-MASK_SUBPIX_VAR4XH_SSSE3(4)
-MASK_SUBPIX_VAR4XH_SSSE3(16)
-MASK_SUBPIX_VAR_SSSE3(16, 4)
-MASK_SUBPIX_VAR8XH_SSSE3(32)
-MASK_SUBPIX_VAR_SSSE3(32, 8)
-MASK_SUBPIX_VAR_SSSE3(64, 16)
-MASK_SUBPIX_VAR_SSSE3(16, 64)
-
-static INLINE __m128i filter_block(const __m128i a, const __m128i b,
- const __m128i filter) {
- __m128i v0 = _mm_unpacklo_epi8(a, b);
- v0 = _mm_maddubs_epi16(v0, filter);
- v0 = xx_roundn_epu16(v0, FILTER_BITS);
-
- __m128i v1 = _mm_unpackhi_epi8(a, b);
- v1 = _mm_maddubs_epi16(v1, filter);
- v1 = xx_roundn_epu16(v1, FILTER_BITS);
-
- return _mm_packus_epi16(v0, v1);
-}
-
-static void bilinear_filter(const uint8_t *src, int src_stride, int xoffset,
- int yoffset, uint8_t *dst, int w, int h) {
- int i, j;
- // Horizontal filter
- if (xoffset == 0) {
- uint8_t *b = dst;
- for (i = 0; i < h + 1; ++i) {
- for (j = 0; j < w; j += 16) {
- __m128i x = _mm_loadu_si128((__m128i *)&src[j]);
- _mm_storeu_si128((__m128i *)&b[j], x);
- }
- src += src_stride;
- b += w;
- }
- } else if (xoffset == 4) {
- uint8_t *b = dst;
- for (i = 0; i < h + 1; ++i) {
- for (j = 0; j < w; j += 16) {
- __m128i x = _mm_loadu_si128((__m128i *)&src[j]);
- __m128i y = _mm_loadu_si128((__m128i *)&src[j + 16]);
- __m128i z = _mm_alignr_epi8(y, x, 1);
- _mm_storeu_si128((__m128i *)&b[j], _mm_avg_epu8(x, z));
- }
- src += src_stride;
- b += w;
- }
- } else {
- uint8_t *b = dst;
- const uint8_t *hfilter = bilinear_filters_2t[xoffset];
- const __m128i hfilter_vec = _mm_set1_epi16(hfilter[0] | (hfilter[1] << 8));
- for (i = 0; i < h + 1; ++i) {
- for (j = 0; j < w; j += 16) {
- const __m128i x = _mm_loadu_si128((__m128i *)&src[j]);
- const __m128i y = _mm_loadu_si128((__m128i *)&src[j + 16]);
- const __m128i z = _mm_alignr_epi8(y, x, 1);
- const __m128i res = filter_block(x, z, hfilter_vec);
- _mm_storeu_si128((__m128i *)&b[j], res);
- }
-
- src += src_stride;
- b += w;
- }
- }
-
- // Vertical filter
- if (yoffset == 0) {
- // The data is already in 'dst', so no need to filter
- } else if (yoffset == 4) {
- for (i = 0; i < h; ++i) {
- for (j = 0; j < w; j += 16) {
- __m128i x = _mm_loadu_si128((__m128i *)&dst[j]);
- __m128i y = _mm_loadu_si128((__m128i *)&dst[j + w]);
- _mm_storeu_si128((__m128i *)&dst[j], _mm_avg_epu8(x, y));
- }
- dst += w;
- }
- } else {
- const uint8_t *vfilter = bilinear_filters_2t[yoffset];
- const __m128i vfilter_vec = _mm_set1_epi16(vfilter[0] | (vfilter[1] << 8));
- for (i = 0; i < h; ++i) {
- for (j = 0; j < w; j += 16) {
- const __m128i x = _mm_loadu_si128((__m128i *)&dst[j]);
- const __m128i y = _mm_loadu_si128((__m128i *)&dst[j + w]);
- const __m128i res = filter_block(x, y, vfilter_vec);
- _mm_storeu_si128((__m128i *)&dst[j], res);
- }
-
- dst += w;
- }
- }
-}
-
-static INLINE __m128i filter_block_2rows(const __m128i a0, const __m128i b0,
- const __m128i a1, const __m128i b1,
- const __m128i filter) {
- __m128i v0 = _mm_unpacklo_epi8(a0, b0);
- v0 = _mm_maddubs_epi16(v0, filter);
- v0 = xx_roundn_epu16(v0, FILTER_BITS);
-
- __m128i v1 = _mm_unpacklo_epi8(a1, b1);
- v1 = _mm_maddubs_epi16(v1, filter);
- v1 = xx_roundn_epu16(v1, FILTER_BITS);
-
- return _mm_packus_epi16(v0, v1);
-}
-
-static void bilinear_filter8xh(const uint8_t *src, int src_stride, int xoffset,
- int yoffset, uint8_t *dst, int h) {
- int i;
- // Horizontal filter
- if (xoffset == 0) {
- uint8_t *b = dst;
- for (i = 0; i < h + 1; ++i) {
- __m128i x = _mm_loadl_epi64((__m128i *)src);
- _mm_storel_epi64((__m128i *)b, x);
- src += src_stride;
- b += 8;
- }
- } else if (xoffset == 4) {
- uint8_t *b = dst;
- for (i = 0; i < h + 1; ++i) {
- __m128i x = _mm_loadu_si128((__m128i *)src);
- __m128i z = _mm_srli_si128(x, 1);
- _mm_storel_epi64((__m128i *)b, _mm_avg_epu8(x, z));
- src += src_stride;
- b += 8;
- }
- } else {
- uint8_t *b = dst;
- const uint8_t *hfilter = bilinear_filters_2t[xoffset];
- const __m128i hfilter_vec = _mm_set1_epi16(hfilter[0] | (hfilter[1] << 8));
- for (i = 0; i < h; i += 2) {
- const __m128i x0 = _mm_loadu_si128((__m128i *)src);
- const __m128i z0 = _mm_srli_si128(x0, 1);
- const __m128i x1 = _mm_loadu_si128((__m128i *)&src[src_stride]);
- const __m128i z1 = _mm_srli_si128(x1, 1);
- const __m128i res = filter_block_2rows(x0, z0, x1, z1, hfilter_vec);
- _mm_storeu_si128((__m128i *)b, res);
-
- src += src_stride * 2;
- b += 16;
- }
- // Handle i = h separately
- const __m128i x0 = _mm_loadu_si128((__m128i *)src);
- const __m128i z0 = _mm_srli_si128(x0, 1);
-
- __m128i v0 = _mm_unpacklo_epi8(x0, z0);
- v0 = _mm_maddubs_epi16(v0, hfilter_vec);
- v0 = xx_roundn_epu16(v0, FILTER_BITS);
-
- _mm_storel_epi64((__m128i *)b, _mm_packus_epi16(v0, v0));
- }
-
- // Vertical filter
- if (yoffset == 0) {
- // The data is already in 'dst', so no need to filter
- } else if (yoffset == 4) {
- for (i = 0; i < h; ++i) {
- __m128i x = _mm_loadl_epi64((__m128i *)dst);
- __m128i y = _mm_loadl_epi64((__m128i *)&dst[8]);
- _mm_storel_epi64((__m128i *)dst, _mm_avg_epu8(x, y));
- dst += 8;
- }
- } else {
- const uint8_t *vfilter = bilinear_filters_2t[yoffset];
- const __m128i vfilter_vec = _mm_set1_epi16(vfilter[0] | (vfilter[1] << 8));
- for (i = 0; i < h; i += 2) {
- const __m128i x = _mm_loadl_epi64((__m128i *)dst);
- const __m128i y = _mm_loadl_epi64((__m128i *)&dst[8]);
- const __m128i z = _mm_loadl_epi64((__m128i *)&dst[16]);
- const __m128i res = filter_block_2rows(x, y, y, z, vfilter_vec);
- _mm_storeu_si128((__m128i *)dst, res);
-
- dst += 16;
- }
- }
-}
-
-static void bilinear_filter4xh(const uint8_t *src, int src_stride, int xoffset,
- int yoffset, uint8_t *dst, int h) {
- int i;
- // Horizontal filter
- if (xoffset == 0) {
- uint8_t *b = dst;
- for (i = 0; i < h + 1; ++i) {
- __m128i x = xx_loadl_32((__m128i *)src);
- xx_storel_32((__m128i *)b, x);
- src += src_stride;
- b += 4;
- }
- } else if (xoffset == 4) {
- uint8_t *b = dst;
- for (i = 0; i < h + 1; ++i) {
- __m128i x = _mm_loadl_epi64((__m128i *)src);
- __m128i z = _mm_srli_si128(x, 1);
- xx_storel_32((__m128i *)b, _mm_avg_epu8(x, z));
- src += src_stride;
- b += 4;
- }
- } else {
- uint8_t *b = dst;
- const uint8_t *hfilter = bilinear_filters_2t[xoffset];
- const __m128i hfilter_vec = _mm_set1_epi16(hfilter[0] | (hfilter[1] << 8));
- for (i = 0; i < h; i += 4) {
- const __m128i x0 = _mm_loadl_epi64((__m128i *)src);
- const __m128i z0 = _mm_srli_si128(x0, 1);
- const __m128i x1 = _mm_loadl_epi64((__m128i *)&src[src_stride]);
- const __m128i z1 = _mm_srli_si128(x1, 1);
- const __m128i x2 = _mm_loadl_epi64((__m128i *)&src[src_stride * 2]);
- const __m128i z2 = _mm_srli_si128(x2, 1);
- const __m128i x3 = _mm_loadl_epi64((__m128i *)&src[src_stride * 3]);
- const __m128i z3 = _mm_srli_si128(x3, 1);
-
- const __m128i a0 = _mm_unpacklo_epi32(x0, x1);
- const __m128i b0 = _mm_unpacklo_epi32(z0, z1);
- const __m128i a1 = _mm_unpacklo_epi32(x2, x3);
- const __m128i b1 = _mm_unpacklo_epi32(z2, z3);
- const __m128i res = filter_block_2rows(a0, b0, a1, b1, hfilter_vec);
- _mm_storeu_si128((__m128i *)b, res);
-
- src += src_stride * 4;
- b += 16;
- }
- // Handle i = h separately
- const __m128i x = _mm_loadl_epi64((__m128i *)src);
- const __m128i z = _mm_srli_si128(x, 1);
-
- __m128i v0 = _mm_unpacklo_epi8(x, z);
- v0 = _mm_maddubs_epi16(v0, hfilter_vec);
- v0 = xx_roundn_epu16(v0, FILTER_BITS);
-
- xx_storel_32((__m128i *)b, _mm_packus_epi16(v0, v0));
- }
-
- // Vertical filter
- if (yoffset == 0) {
- // The data is already in 'dst', so no need to filter
- } else if (yoffset == 4) {
- for (i = 0; i < h; ++i) {
- __m128i x = xx_loadl_32((__m128i *)dst);
- __m128i y = xx_loadl_32((__m128i *)&dst[4]);
- xx_storel_32((__m128i *)dst, _mm_avg_epu8(x, y));
- dst += 4;
- }
- } else {
- const uint8_t *vfilter = bilinear_filters_2t[yoffset];
- const __m128i vfilter_vec = _mm_set1_epi16(vfilter[0] | (vfilter[1] << 8));
- for (i = 0; i < h; i += 4) {
- const __m128i a = xx_loadl_32((__m128i *)dst);
- const __m128i b = xx_loadl_32((__m128i *)&dst[4]);
- const __m128i c = xx_loadl_32((__m128i *)&dst[8]);
- const __m128i d = xx_loadl_32((__m128i *)&dst[12]);
- const __m128i e = xx_loadl_32((__m128i *)&dst[16]);
-
- const __m128i a0 = _mm_unpacklo_epi32(a, b);
- const __m128i b0 = _mm_unpacklo_epi32(b, c);
- const __m128i a1 = _mm_unpacklo_epi32(c, d);
- const __m128i b1 = _mm_unpacklo_epi32(d, e);
- const __m128i res = filter_block_2rows(a0, b0, a1, b1, vfilter_vec);
- _mm_storeu_si128((__m128i *)dst, res);
-
- dst += 16;
- }
- }
-}
-
-static INLINE void accumulate_block(const __m128i src, const __m128i a,
- const __m128i b, const __m128i m,
- __m128i *sum, __m128i *sum_sq) {
- const __m128i zero = _mm_setzero_si128();
- const __m128i one = _mm_set1_epi16(1);
- const __m128i mask_max = _mm_set1_epi8((1 << AOM_BLEND_A64_ROUND_BITS));
- const __m128i m_inv = _mm_sub_epi8(mask_max, m);
-
- // Calculate 16 predicted pixels.
- // Note that the maximum value of any entry of 'pred_l' or 'pred_r'
- // is 64 * 255, so we have plenty of space to add rounding constants.
- const __m128i data_l = _mm_unpacklo_epi8(a, b);
- const __m128i mask_l = _mm_unpacklo_epi8(m, m_inv);
- __m128i pred_l = _mm_maddubs_epi16(data_l, mask_l);
- pred_l = xx_roundn_epu16(pred_l, AOM_BLEND_A64_ROUND_BITS);
-
- const __m128i data_r = _mm_unpackhi_epi8(a, b);
- const __m128i mask_r = _mm_unpackhi_epi8(m, m_inv);
- __m128i pred_r = _mm_maddubs_epi16(data_r, mask_r);
- pred_r = xx_roundn_epu16(pred_r, AOM_BLEND_A64_ROUND_BITS);
-
- const __m128i src_l = _mm_unpacklo_epi8(src, zero);
- const __m128i src_r = _mm_unpackhi_epi8(src, zero);
- const __m128i diff_l = _mm_sub_epi16(pred_l, src_l);
- const __m128i diff_r = _mm_sub_epi16(pred_r, src_r);
-
- // Update partial sums and partial sums of squares
- *sum =
- _mm_add_epi32(*sum, _mm_madd_epi16(_mm_add_epi16(diff_l, diff_r), one));
- *sum_sq =
- _mm_add_epi32(*sum_sq, _mm_add_epi32(_mm_madd_epi16(diff_l, diff_l),
- _mm_madd_epi16(diff_r, diff_r)));
-}
-
-static void masked_variance(const uint8_t *src_ptr, int src_stride,
- const uint8_t *a_ptr, int a_stride,
- const uint8_t *b_ptr, int b_stride,
- const uint8_t *m_ptr, int m_stride, int width,
- int height, unsigned int *sse, int *sum_) {
- int x, y;
- __m128i sum = _mm_setzero_si128(), sum_sq = _mm_setzero_si128();
-
- for (y = 0; y < height; y++) {
- for (x = 0; x < width; x += 16) {
- const __m128i src = _mm_loadu_si128((const __m128i *)&src_ptr[x]);
- const __m128i a = _mm_loadu_si128((const __m128i *)&a_ptr[x]);
- const __m128i b = _mm_loadu_si128((const __m128i *)&b_ptr[x]);
- const __m128i m = _mm_loadu_si128((const __m128i *)&m_ptr[x]);
- accumulate_block(src, a, b, m, &sum, &sum_sq);
- }
-
- src_ptr += src_stride;
- a_ptr += a_stride;
- b_ptr += b_stride;
- m_ptr += m_stride;
- }
- // Reduce down to a single sum and sum of squares
- sum = _mm_hadd_epi32(sum, sum_sq);
- sum = _mm_hadd_epi32(sum, sum);
- *sum_ = _mm_cvtsi128_si32(sum);
- *sse = _mm_cvtsi128_si32(_mm_srli_si128(sum, 4));
-}
-
-static void masked_variance8xh(const uint8_t *src_ptr, int src_stride,
- const uint8_t *a_ptr, const uint8_t *b_ptr,
- const uint8_t *m_ptr, int m_stride, int height,
- unsigned int *sse, int *sum_) {
- int y;
- __m128i sum = _mm_setzero_si128(), sum_sq = _mm_setzero_si128();
-
- for (y = 0; y < height; y += 2) {
- __m128i src = _mm_unpacklo_epi64(
- _mm_loadl_epi64((const __m128i *)src_ptr),
- _mm_loadl_epi64((const __m128i *)&src_ptr[src_stride]));
- const __m128i a = _mm_loadu_si128((const __m128i *)a_ptr);
- const __m128i b = _mm_loadu_si128((const __m128i *)b_ptr);
- const __m128i m =
- _mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i *)m_ptr),
- _mm_loadl_epi64((const __m128i *)&m_ptr[m_stride]));
- accumulate_block(src, a, b, m, &sum, &sum_sq);
-
- src_ptr += src_stride * 2;
- a_ptr += 16;
- b_ptr += 16;
- m_ptr += m_stride * 2;
- }
- // Reduce down to a single sum and sum of squares
- sum = _mm_hadd_epi32(sum, sum_sq);
- sum = _mm_hadd_epi32(sum, sum);
- *sum_ = _mm_cvtsi128_si32(sum);
- *sse = _mm_cvtsi128_si32(_mm_srli_si128(sum, 4));
-}
-
-static void masked_variance4xh(const uint8_t *src_ptr, int src_stride,
- const uint8_t *a_ptr, const uint8_t *b_ptr,
- const uint8_t *m_ptr, int m_stride, int height,
- unsigned int *sse, int *sum_) {
- int y;
- __m128i sum = _mm_setzero_si128(), sum_sq = _mm_setzero_si128();
-
- for (y = 0; y < height; y += 4) {
- // Load four rows at a time
- __m128i src =
- _mm_setr_epi32(*(uint32_t *)src_ptr, *(uint32_t *)&src_ptr[src_stride],
- *(uint32_t *)&src_ptr[src_stride * 2],
- *(uint32_t *)&src_ptr[src_stride * 3]);
- const __m128i a = _mm_loadu_si128((const __m128i *)a_ptr);
- const __m128i b = _mm_loadu_si128((const __m128i *)b_ptr);
- const __m128i m = _mm_setr_epi32(
- *(uint32_t *)m_ptr, *(uint32_t *)&m_ptr[m_stride],
- *(uint32_t *)&m_ptr[m_stride * 2], *(uint32_t *)&m_ptr[m_stride * 3]);
- accumulate_block(src, a, b, m, &sum, &sum_sq);
-
- src_ptr += src_stride * 4;
- a_ptr += 16;
- b_ptr += 16;
- m_ptr += m_stride * 4;
- }
- // Reduce down to a single sum and sum of squares
- sum = _mm_hadd_epi32(sum, sum_sq);
- sum = _mm_hadd_epi32(sum, sum);
- *sum_ = _mm_cvtsi128_si32(sum);
- *sse = _mm_cvtsi128_si32(_mm_srli_si128(sum, 4));
-}
-
-// For width a multiple of 8
-static void highbd_bilinear_filter(const uint16_t *src, int src_stride,
- int xoffset, int yoffset, uint16_t *dst,
- int w, int h);
-
-static void highbd_bilinear_filter4xh(const uint16_t *src, int src_stride,
- int xoffset, int yoffset, uint16_t *dst,
- int h);
-
-// For width a multiple of 8
-static void highbd_masked_variance(const uint16_t *src_ptr, int src_stride,
- const uint16_t *a_ptr, int a_stride,
- const uint16_t *b_ptr, int b_stride,
- const uint8_t *m_ptr, int m_stride,
- int width, int height, uint64_t *sse,
- int *sum_);
-
-static void highbd_masked_variance4xh(const uint16_t *src_ptr, int src_stride,
- const uint16_t *a_ptr,
- const uint16_t *b_ptr,
- const uint8_t *m_ptr, int m_stride,
- int height, int *sse, int *sum_);
-
-#define HIGHBD_MASK_SUBPIX_VAR_SSSE3(W, H) \
- unsigned int aom_highbd_8_masked_sub_pixel_variance##W##x##H##_ssse3( \
- const uint8_t *src8, int src_stride, int xoffset, int yoffset, \
- const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, \
- const uint8_t *msk, int msk_stride, int invert_mask, uint32_t *sse) { \
- uint64_t sse64; \
- int sum; \
- uint16_t temp[(H + 1) * W]; \
- const uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
- const uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
- const uint16_t *second_pred = CONVERT_TO_SHORTPTR(second_pred8); \
- \
- highbd_bilinear_filter(src, src_stride, xoffset, yoffset, temp, W, H); \
- \
- if (!invert_mask) \
- highbd_masked_variance(ref, ref_stride, temp, W, second_pred, W, msk, \
- msk_stride, W, H, &sse64, &sum); \
- else \
- highbd_masked_variance(ref, ref_stride, second_pred, W, temp, W, msk, \
- msk_stride, W, H, &sse64, &sum); \
- *sse = (uint32_t)sse64; \
- return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \
- } \
- unsigned int aom_highbd_10_masked_sub_pixel_variance##W##x##H##_ssse3( \
- const uint8_t *src8, int src_stride, int xoffset, int yoffset, \
- const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, \
- const uint8_t *msk, int msk_stride, int invert_mask, uint32_t *sse) { \
- uint64_t sse64; \
- int sum; \
- int64_t var; \
- uint16_t temp[(H + 1) * W]; \
- const uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
- const uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
- const uint16_t *second_pred = CONVERT_TO_SHORTPTR(second_pred8); \
- \
- highbd_bilinear_filter(src, src_stride, xoffset, yoffset, temp, W, H); \
- \
- if (!invert_mask) \
- highbd_masked_variance(ref, ref_stride, temp, W, second_pred, W, msk, \
- msk_stride, W, H, &sse64, &sum); \
- else \
- highbd_masked_variance(ref, ref_stride, second_pred, W, temp, W, msk, \
- msk_stride, W, H, &sse64, &sum); \
- *sse = (uint32_t)ROUND_POWER_OF_TWO(sse64, 4); \
- sum = ROUND_POWER_OF_TWO(sum, 2); \
- var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \
- return (var >= 0) ? (uint32_t)var : 0; \
- } \
- unsigned int aom_highbd_12_masked_sub_pixel_variance##W##x##H##_ssse3( \
- const uint8_t *src8, int src_stride, int xoffset, int yoffset, \
- const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, \
- const uint8_t *msk, int msk_stride, int invert_mask, uint32_t *sse) { \
- uint64_t sse64; \
- int sum; \
- int64_t var; \
- uint16_t temp[(H + 1) * W]; \
- const uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
- const uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
- const uint16_t *second_pred = CONVERT_TO_SHORTPTR(second_pred8); \
- \
- highbd_bilinear_filter(src, src_stride, xoffset, yoffset, temp, W, H); \
- \
- if (!invert_mask) \
- highbd_masked_variance(ref, ref_stride, temp, W, second_pred, W, msk, \
- msk_stride, W, H, &sse64, &sum); \
- else \
- highbd_masked_variance(ref, ref_stride, second_pred, W, temp, W, msk, \
- msk_stride, W, H, &sse64, &sum); \
- *sse = (uint32_t)ROUND_POWER_OF_TWO(sse64, 8); \
- sum = ROUND_POWER_OF_TWO(sum, 4); \
- var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \
- return (var >= 0) ? (uint32_t)var : 0; \
- }
-
-#define HIGHBD_MASK_SUBPIX_VAR4XH_SSSE3(H) \
- unsigned int aom_highbd_8_masked_sub_pixel_variance4x##H##_ssse3( \
- const uint8_t *src8, int src_stride, int xoffset, int yoffset, \
- const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, \
- const uint8_t *msk, int msk_stride, int invert_mask, uint32_t *sse) { \
- int sse_; \
- int sum; \
- uint16_t temp[(H + 1) * 4]; \
- const uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
- const uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
- const uint16_t *second_pred = CONVERT_TO_SHORTPTR(second_pred8); \
- \
- highbd_bilinear_filter4xh(src, src_stride, xoffset, yoffset, temp, H); \
- \
- if (!invert_mask) \
- highbd_masked_variance4xh(ref, ref_stride, temp, second_pred, msk, \
- msk_stride, H, &sse_, &sum); \
- else \
- highbd_masked_variance4xh(ref, ref_stride, second_pred, temp, msk, \
- msk_stride, H, &sse_, &sum); \
- *sse = (uint32_t)sse_; \
- return *sse - (uint32_t)(((int64_t)sum * sum) / (4 * H)); \
- } \
- unsigned int aom_highbd_10_masked_sub_pixel_variance4x##H##_ssse3( \
- const uint8_t *src8, int src_stride, int xoffset, int yoffset, \
- const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, \
- const uint8_t *msk, int msk_stride, int invert_mask, uint32_t *sse) { \
- int sse_; \
- int sum; \
- int64_t var; \
- uint16_t temp[(H + 1) * 4]; \
- const uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
- const uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
- const uint16_t *second_pred = CONVERT_TO_SHORTPTR(second_pred8); \
- \
- highbd_bilinear_filter4xh(src, src_stride, xoffset, yoffset, temp, H); \
- \
- if (!invert_mask) \
- highbd_masked_variance4xh(ref, ref_stride, temp, second_pred, msk, \
- msk_stride, H, &sse_, &sum); \
- else \
- highbd_masked_variance4xh(ref, ref_stride, second_pred, temp, msk, \
- msk_stride, H, &sse_, &sum); \
- *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_, 4); \
- sum = ROUND_POWER_OF_TWO(sum, 2); \
- var = (int64_t)(*sse) - (((int64_t)sum * sum) / (4 * H)); \
- return (var >= 0) ? (uint32_t)var : 0; \
- } \
- unsigned int aom_highbd_12_masked_sub_pixel_variance4x##H##_ssse3( \
- const uint8_t *src8, int src_stride, int xoffset, int yoffset, \
- const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, \
- const uint8_t *msk, int msk_stride, int invert_mask, uint32_t *sse) { \
- int sse_; \
- int sum; \
- int64_t var; \
- uint16_t temp[(H + 1) * 4]; \
- const uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
- const uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
- const uint16_t *second_pred = CONVERT_TO_SHORTPTR(second_pred8); \
- \
- highbd_bilinear_filter4xh(src, src_stride, xoffset, yoffset, temp, H); \
- \
- if (!invert_mask) \
- highbd_masked_variance4xh(ref, ref_stride, temp, second_pred, msk, \
- msk_stride, H, &sse_, &sum); \
- else \
- highbd_masked_variance4xh(ref, ref_stride, second_pred, temp, msk, \
- msk_stride, H, &sse_, &sum); \
- *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_, 8); \
- sum = ROUND_POWER_OF_TWO(sum, 4); \
- var = (int64_t)(*sse) - (((int64_t)sum * sum) / (4 * H)); \
- return (var >= 0) ? (uint32_t)var : 0; \
- }
-
-HIGHBD_MASK_SUBPIX_VAR_SSSE3(128, 128)
-HIGHBD_MASK_SUBPIX_VAR_SSSE3(128, 64)
-HIGHBD_MASK_SUBPIX_VAR_SSSE3(64, 128)
-HIGHBD_MASK_SUBPIX_VAR_SSSE3(64, 64)
-HIGHBD_MASK_SUBPIX_VAR_SSSE3(64, 32)
-HIGHBD_MASK_SUBPIX_VAR_SSSE3(32, 64)
-HIGHBD_MASK_SUBPIX_VAR_SSSE3(32, 32)
-HIGHBD_MASK_SUBPIX_VAR_SSSE3(32, 16)
-HIGHBD_MASK_SUBPIX_VAR_SSSE3(16, 32)
-HIGHBD_MASK_SUBPIX_VAR_SSSE3(16, 16)
-HIGHBD_MASK_SUBPIX_VAR_SSSE3(16, 8)
-HIGHBD_MASK_SUBPIX_VAR_SSSE3(8, 16)
-HIGHBD_MASK_SUBPIX_VAR_SSSE3(8, 8)
-HIGHBD_MASK_SUBPIX_VAR_SSSE3(8, 4)
-HIGHBD_MASK_SUBPIX_VAR4XH_SSSE3(8)
-HIGHBD_MASK_SUBPIX_VAR4XH_SSSE3(4)
-HIGHBD_MASK_SUBPIX_VAR4XH_SSSE3(16)
-HIGHBD_MASK_SUBPIX_VAR_SSSE3(16, 4)
-HIGHBD_MASK_SUBPIX_VAR_SSSE3(8, 32)
-HIGHBD_MASK_SUBPIX_VAR_SSSE3(32, 8)
-HIGHBD_MASK_SUBPIX_VAR_SSSE3(16, 64)
-HIGHBD_MASK_SUBPIX_VAR_SSSE3(64, 16)
-
-static INLINE __m128i highbd_filter_block(const __m128i a, const __m128i b,
- const __m128i filter) {
- __m128i v0 = _mm_unpacklo_epi16(a, b);
- v0 = _mm_madd_epi16(v0, filter);
- v0 = xx_roundn_epu32(v0, FILTER_BITS);
-
- __m128i v1 = _mm_unpackhi_epi16(a, b);
- v1 = _mm_madd_epi16(v1, filter);
- v1 = xx_roundn_epu32(v1, FILTER_BITS);
-
- return _mm_packs_epi32(v0, v1);
-}
-
-static void highbd_bilinear_filter(const uint16_t *src, int src_stride,
- int xoffset, int yoffset, uint16_t *dst,
- int w, int h) {
- int i, j;
- // Horizontal filter
- if (xoffset == 0) {
- uint16_t *b = dst;
- for (i = 0; i < h + 1; ++i) {
- for (j = 0; j < w; j += 8) {
- __m128i x = _mm_loadu_si128((__m128i *)&src[j]);
- _mm_storeu_si128((__m128i *)&b[j], x);
- }
- src += src_stride;
- b += w;
- }
- } else if (xoffset == 4) {
- uint16_t *b = dst;
- for (i = 0; i < h + 1; ++i) {
- for (j = 0; j < w; j += 8) {
- __m128i x = _mm_loadu_si128((__m128i *)&src[j]);
- __m128i y = _mm_loadu_si128((__m128i *)&src[j + 8]);
- __m128i z = _mm_alignr_epi8(y, x, 2);
- _mm_storeu_si128((__m128i *)&b[j], _mm_avg_epu16(x, z));
- }
- src += src_stride;
- b += w;
- }
- } else {
- uint16_t *b = dst;
- const uint8_t *hfilter = bilinear_filters_2t[xoffset];
- const __m128i hfilter_vec = _mm_set1_epi32(hfilter[0] | (hfilter[1] << 16));
- for (i = 0; i < h + 1; ++i) {
- for (j = 0; j < w; j += 8) {
- const __m128i x = _mm_loadu_si128((__m128i *)&src[j]);
- const __m128i y = _mm_loadu_si128((__m128i *)&src[j + 8]);
- const __m128i z = _mm_alignr_epi8(y, x, 2);
- const __m128i res = highbd_filter_block(x, z, hfilter_vec);
- _mm_storeu_si128((__m128i *)&b[j], res);
- }
-
- src += src_stride;
- b += w;
- }
- }
-
- // Vertical filter
- if (yoffset == 0) {
- // The data is already in 'dst', so no need to filter
- } else if (yoffset == 4) {
- for (i = 0; i < h; ++i) {
- for (j = 0; j < w; j += 8) {
- __m128i x = _mm_loadu_si128((__m128i *)&dst[j]);
- __m128i y = _mm_loadu_si128((__m128i *)&dst[j + w]);
- _mm_storeu_si128((__m128i *)&dst[j], _mm_avg_epu16(x, y));
- }
- dst += w;
- }
- } else {
- const uint8_t *vfilter = bilinear_filters_2t[yoffset];
- const __m128i vfilter_vec = _mm_set1_epi32(vfilter[0] | (vfilter[1] << 16));
- for (i = 0; i < h; ++i) {
- for (j = 0; j < w; j += 8) {
- const __m128i x = _mm_loadu_si128((__m128i *)&dst[j]);
- const __m128i y = _mm_loadu_si128((__m128i *)&dst[j + w]);
- const __m128i res = highbd_filter_block(x, y, vfilter_vec);
- _mm_storeu_si128((__m128i *)&dst[j], res);
- }
-
- dst += w;
- }
- }
-}
-
-static INLINE __m128i highbd_filter_block_2rows(const __m128i a0,
- const __m128i b0,
- const __m128i a1,
- const __m128i b1,
- const __m128i filter) {
- __m128i v0 = _mm_unpacklo_epi16(a0, b0);
- v0 = _mm_madd_epi16(v0, filter);
- v0 = xx_roundn_epu32(v0, FILTER_BITS);
-
- __m128i v1 = _mm_unpacklo_epi16(a1, b1);
- v1 = _mm_madd_epi16(v1, filter);
- v1 = xx_roundn_epu32(v1, FILTER_BITS);
-
- return _mm_packs_epi32(v0, v1);
-}
-
-static void highbd_bilinear_filter4xh(const uint16_t *src, int src_stride,
- int xoffset, int yoffset, uint16_t *dst,
- int h) {
- int i;
- // Horizontal filter
- if (xoffset == 0) {
- uint16_t *b = dst;
- for (i = 0; i < h + 1; ++i) {
- __m128i x = _mm_loadl_epi64((__m128i *)src);
- _mm_storel_epi64((__m128i *)b, x);
- src += src_stride;
- b += 4;
- }
- } else if (xoffset == 4) {
- uint16_t *b = dst;
- for (i = 0; i < h + 1; ++i) {
- __m128i x = _mm_loadu_si128((__m128i *)src);
- __m128i z = _mm_srli_si128(x, 2);
- _mm_storel_epi64((__m128i *)b, _mm_avg_epu16(x, z));
- src += src_stride;
- b += 4;
- }
- } else {
- uint16_t *b = dst;
- const uint8_t *hfilter = bilinear_filters_2t[xoffset];
- const __m128i hfilter_vec = _mm_set1_epi32(hfilter[0] | (hfilter[1] << 16));
- for (i = 0; i < h; i += 2) {
- const __m128i x0 = _mm_loadu_si128((__m128i *)src);
- const __m128i z0 = _mm_srli_si128(x0, 2);
- const __m128i x1 = _mm_loadu_si128((__m128i *)&src[src_stride]);
- const __m128i z1 = _mm_srli_si128(x1, 2);
- const __m128i res =
- highbd_filter_block_2rows(x0, z0, x1, z1, hfilter_vec);
- _mm_storeu_si128((__m128i *)b, res);
-
- src += src_stride * 2;
- b += 8;
- }
- // Process i = h separately
- __m128i x = _mm_loadu_si128((__m128i *)src);
- __m128i z = _mm_srli_si128(x, 2);
-
- __m128i v0 = _mm_unpacklo_epi16(x, z);
- v0 = _mm_madd_epi16(v0, hfilter_vec);
- v0 = xx_roundn_epu32(v0, FILTER_BITS);
-
- _mm_storel_epi64((__m128i *)b, _mm_packs_epi32(v0, v0));
- }
-
- // Vertical filter
- if (yoffset == 0) {
- // The data is already in 'dst', so no need to filter
- } else if (yoffset == 4) {
- for (i = 0; i < h; ++i) {
- __m128i x = _mm_loadl_epi64((__m128i *)dst);
- __m128i y = _mm_loadl_epi64((__m128i *)&dst[4]);
- _mm_storel_epi64((__m128i *)dst, _mm_avg_epu16(x, y));
- dst += 4;
- }
- } else {
- const uint8_t *vfilter = bilinear_filters_2t[yoffset];
- const __m128i vfilter_vec = _mm_set1_epi32(vfilter[0] | (vfilter[1] << 16));
- for (i = 0; i < h; i += 2) {
- const __m128i x = _mm_loadl_epi64((__m128i *)dst);
- const __m128i y = _mm_loadl_epi64((__m128i *)&dst[4]);
- const __m128i z = _mm_loadl_epi64((__m128i *)&dst[8]);
- const __m128i res = highbd_filter_block_2rows(x, y, y, z, vfilter_vec);
- _mm_storeu_si128((__m128i *)dst, res);
-
- dst += 8;
- }
- }
-}
-
-static void highbd_masked_variance(const uint16_t *src_ptr, int src_stride,
- const uint16_t *a_ptr, int a_stride,
- const uint16_t *b_ptr, int b_stride,
- const uint8_t *m_ptr, int m_stride,
- int width, int height, uint64_t *sse,
- int *sum_) {
- int x, y;
- // Note on bit widths:
- // The maximum value of 'sum' is (2^12 - 1) * 128 * 128 =~ 2^26,
- // so this can be kept as four 32-bit values.
- // But the maximum value of 'sum_sq' is (2^12 - 1)^2 * 128 * 128 =~ 2^38,
- // so this must be stored as two 64-bit values.
- __m128i sum = _mm_setzero_si128(), sum_sq = _mm_setzero_si128();
- const __m128i mask_max = _mm_set1_epi16((1 << AOM_BLEND_A64_ROUND_BITS));
- const __m128i round_const =
- _mm_set1_epi32((1 << AOM_BLEND_A64_ROUND_BITS) >> 1);
- const __m128i zero = _mm_setzero_si128();
-
- for (y = 0; y < height; y++) {
- for (x = 0; x < width; x += 8) {
- const __m128i src = _mm_loadu_si128((const __m128i *)&src_ptr[x]);
- const __m128i a = _mm_loadu_si128((const __m128i *)&a_ptr[x]);
- const __m128i b = _mm_loadu_si128((const __m128i *)&b_ptr[x]);
- const __m128i m =
- _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)&m_ptr[x]), zero);
- const __m128i m_inv = _mm_sub_epi16(mask_max, m);
-
- // Calculate 8 predicted pixels.
- const __m128i data_l = _mm_unpacklo_epi16(a, b);
- const __m128i mask_l = _mm_unpacklo_epi16(m, m_inv);
- __m128i pred_l = _mm_madd_epi16(data_l, mask_l);
- pred_l = _mm_srai_epi32(_mm_add_epi32(pred_l, round_const),
- AOM_BLEND_A64_ROUND_BITS);
-
- const __m128i data_r = _mm_unpackhi_epi16(a, b);
- const __m128i mask_r = _mm_unpackhi_epi16(m, m_inv);
- __m128i pred_r = _mm_madd_epi16(data_r, mask_r);
- pred_r = _mm_srai_epi32(_mm_add_epi32(pred_r, round_const),
- AOM_BLEND_A64_ROUND_BITS);
-
- const __m128i src_l = _mm_unpacklo_epi16(src, zero);
- const __m128i src_r = _mm_unpackhi_epi16(src, zero);
- __m128i diff_l = _mm_sub_epi32(pred_l, src_l);
- __m128i diff_r = _mm_sub_epi32(pred_r, src_r);
-
- // Update partial sums and partial sums of squares
- sum = _mm_add_epi32(sum, _mm_add_epi32(diff_l, diff_r));
- // A trick: Now each entry of diff_l and diff_r is stored in a 32-bit
- // field, but the range of values is only [-(2^12 - 1), 2^12 - 1].
- // So we can re-pack into 16-bit fields and use _mm_madd_epi16
- // to calculate the squares and partially sum them.
- const __m128i tmp = _mm_packs_epi32(diff_l, diff_r);
- const __m128i prod = _mm_madd_epi16(tmp, tmp);
- // Then we want to sign-extend to 64 bits and accumulate
- const __m128i sign = _mm_srai_epi32(prod, 31);
- const __m128i tmp_0 = _mm_unpacklo_epi32(prod, sign);
- const __m128i tmp_1 = _mm_unpackhi_epi32(prod, sign);
- sum_sq = _mm_add_epi64(sum_sq, _mm_add_epi64(tmp_0, tmp_1));
- }
-
- src_ptr += src_stride;
- a_ptr += a_stride;
- b_ptr += b_stride;
- m_ptr += m_stride;
- }
- // Reduce down to a single sum and sum of squares
- sum = _mm_hadd_epi32(sum, zero);
- sum = _mm_hadd_epi32(sum, zero);
- *sum_ = _mm_cvtsi128_si32(sum);
- sum_sq = _mm_add_epi64(sum_sq, _mm_srli_si128(sum_sq, 8));
- _mm_storel_epi64((__m128i *)sse, sum_sq);
-}
-
-static void highbd_masked_variance4xh(const uint16_t *src_ptr, int src_stride,
- const uint16_t *a_ptr,
- const uint16_t *b_ptr,
- const uint8_t *m_ptr, int m_stride,
- int height, int *sse, int *sum_) {
- int y;
- // Note: For this function, h <= 8 (or maybe 16 if we add 4:1 partitions).
- // So the maximum value of sum is (2^12 - 1) * 4 * 16 =~ 2^18
- // and the maximum value of sum_sq is (2^12 - 1)^2 * 4 * 16 =~ 2^30.
- // So we can safely pack sum_sq into 32-bit fields, which is slightly more
- // convenient.
- __m128i sum = _mm_setzero_si128(), sum_sq = _mm_setzero_si128();
- const __m128i mask_max = _mm_set1_epi16((1 << AOM_BLEND_A64_ROUND_BITS));
- const __m128i round_const =
- _mm_set1_epi32((1 << AOM_BLEND_A64_ROUND_BITS) >> 1);
- const __m128i zero = _mm_setzero_si128();
-
- for (y = 0; y < height; y += 2) {
- __m128i src = _mm_unpacklo_epi64(
- _mm_loadl_epi64((const __m128i *)src_ptr),
- _mm_loadl_epi64((const __m128i *)&src_ptr[src_stride]));
- const __m128i a = _mm_loadu_si128((const __m128i *)a_ptr);
- const __m128i b = _mm_loadu_si128((const __m128i *)b_ptr);
- const __m128i m = _mm_unpacklo_epi8(
- _mm_unpacklo_epi32(
- _mm_cvtsi32_si128(*(const uint32_t *)m_ptr),
- _mm_cvtsi32_si128(*(const uint32_t *)&m_ptr[m_stride])),
- zero);
- const __m128i m_inv = _mm_sub_epi16(mask_max, m);
-
- const __m128i data_l = _mm_unpacklo_epi16(a, b);
- const __m128i mask_l = _mm_unpacklo_epi16(m, m_inv);
- __m128i pred_l = _mm_madd_epi16(data_l, mask_l);
- pred_l = _mm_srai_epi32(_mm_add_epi32(pred_l, round_const),
- AOM_BLEND_A64_ROUND_BITS);
-
- const __m128i data_r = _mm_unpackhi_epi16(a, b);
- const __m128i mask_r = _mm_unpackhi_epi16(m, m_inv);
- __m128i pred_r = _mm_madd_epi16(data_r, mask_r);
- pred_r = _mm_srai_epi32(_mm_add_epi32(pred_r, round_const),
- AOM_BLEND_A64_ROUND_BITS);
-
- const __m128i src_l = _mm_unpacklo_epi16(src, zero);
- const __m128i src_r = _mm_unpackhi_epi16(src, zero);
- __m128i diff_l = _mm_sub_epi32(pred_l, src_l);
- __m128i diff_r = _mm_sub_epi32(pred_r, src_r);
-
- // Update partial sums and partial sums of squares
- sum = _mm_add_epi32(sum, _mm_add_epi32(diff_l, diff_r));
- const __m128i tmp = _mm_packs_epi32(diff_l, diff_r);
- const __m128i prod = _mm_madd_epi16(tmp, tmp);
- sum_sq = _mm_add_epi32(sum_sq, prod);
-
- src_ptr += src_stride * 2;
- a_ptr += 8;
- b_ptr += 8;
- m_ptr += m_stride * 2;
- }
- // Reduce down to a single sum and sum of squares
- sum = _mm_hadd_epi32(sum, sum_sq);
- sum = _mm_hadd_epi32(sum, zero);
- *sum_ = _mm_cvtsi128_si32(sum);
- *sse = _mm_cvtsi128_si32(_mm_srli_si128(sum, 4));
-}
-
-void aom_comp_mask_pred_ssse3(uint8_t *comp_pred, const uint8_t *pred,
- int width, int height, const uint8_t *ref,
- int ref_stride, const uint8_t *mask,
- int mask_stride, int invert_mask) {
- const uint8_t *src0 = invert_mask ? pred : ref;
- const uint8_t *src1 = invert_mask ? ref : pred;
- const int stride0 = invert_mask ? width : ref_stride;
- const int stride1 = invert_mask ? ref_stride : width;
- assert(height % 2 == 0);
- int i = 0;
- if (width == 8) {
- comp_mask_pred_8_ssse3(comp_pred, height, src0, stride0, src1, stride1,
- mask, mask_stride);
- } else if (width == 16) {
- do {
- comp_mask_pred_16_ssse3(src0, src1, mask, comp_pred);
- comp_mask_pred_16_ssse3(src0 + stride0, src1 + stride1,
- mask + mask_stride, comp_pred + width);
- comp_pred += (width << 1);
- src0 += (stride0 << 1);
- src1 += (stride1 << 1);
- mask += (mask_stride << 1);
- i += 2;
- } while (i < height);
- } else { // width == 32
- assert(width == 32);
- do {
- comp_mask_pred_16_ssse3(src0, src1, mask, comp_pred);
- comp_mask_pred_16_ssse3(src0 + 16, src1 + 16, mask + 16, comp_pred + 16);
- comp_pred += (width);
- src0 += (stride0);
- src1 += (stride1);
- mask += (mask_stride);
- i += 1;
- } while (i < height);
- }
-}
diff --git a/third_party/aom/aom_dsp/x86/masked_variance_intrin_ssse3.h b/third_party/aom/aom_dsp/x86/masked_variance_intrin_ssse3.h
deleted file mode 100644
index 4faa098ac..000000000
--- a/third_party/aom/aom_dsp/x86/masked_variance_intrin_ssse3.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_X86_MASKED_VARIANCE_INTRIN_SSSE3_H_
-#define AOM_AOM_DSP_X86_MASKED_VARIANCE_INTRIN_SSSE3_H_
-
-#include <stdlib.h>
-#include <string.h>
-#include <tmmintrin.h>
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/blend.h"
-
-static INLINE void comp_mask_pred_16_ssse3(const uint8_t *src0,
- const uint8_t *src1,
- const uint8_t *mask, uint8_t *dst) {
- const __m128i alpha_max = _mm_set1_epi8(AOM_BLEND_A64_MAX_ALPHA);
- const __m128i round_offset =
- _mm_set1_epi16(1 << (15 - AOM_BLEND_A64_ROUND_BITS));
-
- const __m128i sA0 = _mm_lddqu_si128((const __m128i *)(src0));
- const __m128i sA1 = _mm_lddqu_si128((const __m128i *)(src1));
- const __m128i aA = _mm_load_si128((const __m128i *)(mask));
-
- const __m128i maA = _mm_sub_epi8(alpha_max, aA);
-
- const __m128i ssAL = _mm_unpacklo_epi8(sA0, sA1);
- const __m128i aaAL = _mm_unpacklo_epi8(aA, maA);
- const __m128i ssAH = _mm_unpackhi_epi8(sA0, sA1);
- const __m128i aaAH = _mm_unpackhi_epi8(aA, maA);
-
- const __m128i blendAL = _mm_maddubs_epi16(ssAL, aaAL);
- const __m128i blendAH = _mm_maddubs_epi16(ssAH, aaAH);
-
- const __m128i roundAL = _mm_mulhrs_epi16(blendAL, round_offset);
- const __m128i roundAH = _mm_mulhrs_epi16(blendAH, round_offset);
- _mm_store_si128((__m128i *)dst, _mm_packus_epi16(roundAL, roundAH));
-}
-
-static INLINE void comp_mask_pred_8_ssse3(uint8_t *comp_pred, int height,
- const uint8_t *src0, int stride0,
- const uint8_t *src1, int stride1,
- const uint8_t *mask,
- int mask_stride) {
- int i = 0;
- const __m128i alpha_max = _mm_set1_epi8(AOM_BLEND_A64_MAX_ALPHA);
- const __m128i round_offset =
- _mm_set1_epi16(1 << (15 - AOM_BLEND_A64_ROUND_BITS));
- do {
- // odd line A
- const __m128i sA0 = _mm_loadl_epi64((const __m128i *)(src0));
- const __m128i sA1 = _mm_loadl_epi64((const __m128i *)(src1));
- const __m128i aA = _mm_loadl_epi64((const __m128i *)(mask));
- // even line B
- const __m128i sB0 = _mm_loadl_epi64((const __m128i *)(src0 + stride0));
- const __m128i sB1 = _mm_loadl_epi64((const __m128i *)(src1 + stride1));
- const __m128i a = _mm_castps_si128(_mm_loadh_pi(
- _mm_castsi128_ps(aA), (const __m64 *)(mask + mask_stride)));
-
- const __m128i ssA = _mm_unpacklo_epi8(sA0, sA1);
- const __m128i ssB = _mm_unpacklo_epi8(sB0, sB1);
-
- const __m128i ma = _mm_sub_epi8(alpha_max, a);
- const __m128i aaA = _mm_unpacklo_epi8(a, ma);
- const __m128i aaB = _mm_unpackhi_epi8(a, ma);
-
- const __m128i blendA = _mm_maddubs_epi16(ssA, aaA);
- const __m128i blendB = _mm_maddubs_epi16(ssB, aaB);
- const __m128i roundA = _mm_mulhrs_epi16(blendA, round_offset);
- const __m128i roundB = _mm_mulhrs_epi16(blendB, round_offset);
- const __m128i round = _mm_packus_epi16(roundA, roundB);
- // comp_pred's stride == width == 8
- _mm_store_si128((__m128i *)(comp_pred), round);
- comp_pred += (8 << 1);
- src0 += (stride0 << 1);
- src1 += (stride1 << 1);
- mask += (mask_stride << 1);
- i += 2;
- } while (i < height);
-}
-
-#endif // AOM_AOM_DSP_X86_MASKED_VARIANCE_INTRIN_SSSE3_H_
diff --git a/third_party/aom/aom_dsp/x86/mem_sse2.h b/third_party/aom/aom_dsp/x86/mem_sse2.h
deleted file mode 100644
index 6c821673e..000000000
--- a/third_party/aom/aom_dsp/x86/mem_sse2.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_X86_MEM_SSE2_H_
-#define AOM_AOM_DSP_X86_MEM_SSE2_H_
-
-#include <emmintrin.h> // SSE2
-
-#include "config/aom_config.h"
-
-#include "aom/aom_integer.h"
-
-static INLINE __m128i loadh_epi64(const void *const src, const __m128i s) {
- return _mm_castps_si128(
- _mm_loadh_pi(_mm_castsi128_ps(s), (const __m64 *)src));
-}
-
-static INLINE __m128i load_8bit_4x4_to_1_reg_sse2(const void *const src,
- const int byte_stride) {
- return _mm_setr_epi32(*(const int32_t *)((int8_t *)src + 0 * byte_stride),
- *(const int32_t *)((int8_t *)src + 1 * byte_stride),
- *(const int32_t *)((int8_t *)src + 2 * byte_stride),
- *(const int32_t *)((int8_t *)src + 3 * byte_stride));
-}
-
-static INLINE __m128i load_8bit_8x2_to_1_reg_sse2(const void *const src,
- const int byte_stride) {
- __m128i dst;
- dst = _mm_loadl_epi64((__m128i *)((int8_t *)src + 0 * byte_stride));
- dst = loadh_epi64((int8_t *)src + 1 * byte_stride, dst);
- return dst;
-}
-
-#endif // AOM_AOM_DSP_X86_MEM_SSE2_H_
diff --git a/third_party/aom/aom_dsp/x86/obmc_intrinsic_sse4.h b/third_party/aom/aom_dsp/x86/obmc_intrinsic_sse4.h
deleted file mode 100644
index 5181e444c..000000000
--- a/third_party/aom/aom_dsp/x86/obmc_intrinsic_sse4.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_X86_OBMC_INTRINSIC_SSE4_H_
-#define AOM_AOM_DSP_X86_OBMC_INTRINSIC_SSE4_H_
-
-#include <smmintrin.h>
-
-#include "aom_dsp/x86/obmc_intrinsic_ssse3.h"
-
-static INLINE void obmc_variance_w4(const uint8_t *pre, const int pre_stride,
- const int32_t *wsrc, const int32_t *mask,
- unsigned int *const sse, int *const sum,
- const int h) {
- const int pre_step = pre_stride - 4;
- int n = 0;
- __m128i v_sum_d = _mm_setzero_si128();
- __m128i v_sse_d = _mm_setzero_si128();
-
- assert(IS_POWER_OF_TWO(h));
-
- do {
- const __m128i v_p_b = _mm_cvtsi32_si128(*(const uint32_t *)(pre + n));
- const __m128i v_m_d = _mm_load_si128((const __m128i *)(mask + n));
- const __m128i v_w_d = _mm_load_si128((const __m128i *)(wsrc + n));
-
- const __m128i v_p_d = _mm_cvtepu8_epi32(v_p_b);
-
- // Values in both pre and mask fit in 15 bits, and are packed at 32 bit
- // boundaries. We use pmaddwd, as it has lower latency on Haswell
- // than pmulld but produces the same result with these inputs.
- const __m128i v_pm_d = _mm_madd_epi16(v_p_d, v_m_d);
-
- const __m128i v_diff_d = _mm_sub_epi32(v_w_d, v_pm_d);
- const __m128i v_rdiff_d = xx_roundn_epi32(v_diff_d, 12);
- const __m128i v_sqrdiff_d = _mm_mullo_epi32(v_rdiff_d, v_rdiff_d);
-
- v_sum_d = _mm_add_epi32(v_sum_d, v_rdiff_d);
- v_sse_d = _mm_add_epi32(v_sse_d, v_sqrdiff_d);
-
- n += 4;
-
- if (n % 4 == 0) pre += pre_step;
- } while (n < 4 * h);
-
- *sum = xx_hsum_epi32_si32(v_sum_d);
- *sse = xx_hsum_epi32_si32(v_sse_d);
-}
-
-#endif // AOM_AOM_DSP_X86_OBMC_INTRINSIC_SSE4_H_
diff --git a/third_party/aom/aom_dsp/x86/obmc_intrinsic_ssse3.h b/third_party/aom/aom_dsp/x86/obmc_intrinsic_ssse3.h
deleted file mode 100644
index 48486c6c4..000000000
--- a/third_party/aom/aom_dsp/x86/obmc_intrinsic_ssse3.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_X86_OBMC_INTRINSIC_SSSE3_H_
-#define AOM_AOM_DSP_X86_OBMC_INTRINSIC_SSSE3_H_
-
-#include <immintrin.h>
-
-#include "config/aom_config.h"
-
-static INLINE int32_t xx_hsum_epi32_si32(__m128i v_d) {
- v_d = _mm_hadd_epi32(v_d, v_d);
- v_d = _mm_hadd_epi32(v_d, v_d);
- return _mm_cvtsi128_si32(v_d);
-}
-
-static INLINE int64_t xx_hsum_epi64_si64(__m128i v_q) {
- v_q = _mm_add_epi64(v_q, _mm_srli_si128(v_q, 8));
-#if ARCH_X86_64
- return _mm_cvtsi128_si64(v_q);
-#else
- {
- int64_t tmp;
- _mm_storel_epi64((__m128i *)&tmp, v_q);
- return tmp;
- }
-#endif
-}
-
-static INLINE int64_t xx_hsum_epi32_si64(__m128i v_d) {
- const __m128i v_sign_d = _mm_cmplt_epi32(v_d, _mm_setzero_si128());
- const __m128i v_0_q = _mm_unpacklo_epi32(v_d, v_sign_d);
- const __m128i v_1_q = _mm_unpackhi_epi32(v_d, v_sign_d);
- return xx_hsum_epi64_si64(_mm_add_epi64(v_0_q, v_1_q));
-}
-
-// This is equivalent to ROUND_POWER_OF_TWO_SIGNED(v_val_d, bits)
-static INLINE __m128i xx_roundn_epi32(__m128i v_val_d, int bits) {
- const __m128i v_bias_d = _mm_set1_epi32((1 << bits) >> 1);
- const __m128i v_sign_d = _mm_srai_epi32(v_val_d, 31);
- const __m128i v_tmp_d =
- _mm_add_epi32(_mm_add_epi32(v_val_d, v_bias_d), v_sign_d);
- return _mm_srai_epi32(v_tmp_d, bits);
-}
-
-#endif // AOM_AOM_DSP_X86_OBMC_INTRINSIC_SSSE3_H_
diff --git a/third_party/aom/aom_dsp/x86/obmc_sad_avx2.c b/third_party/aom/aom_dsp/x86/obmc_sad_avx2.c
deleted file mode 100644
index 2aa2a0555..000000000
--- a/third_party/aom/aom_dsp/x86/obmc_sad_avx2.c
+++ /dev/null
@@ -1,270 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <immintrin.h>
-
-#include "config/aom_config.h"
-
-#include "aom_ports/mem.h"
-#include "aom/aom_integer.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/x86/obmc_intrinsic_ssse3.h"
-#include "aom_dsp/x86/synonyms.h"
-
-////////////////////////////////////////////////////////////////////////////////
-// 8 bit
-////////////////////////////////////////////////////////////////////////////////
-
-static INLINE unsigned int obmc_sad_w4_avx2(const uint8_t *pre,
- const int pre_stride,
- const int32_t *wsrc,
- const int32_t *mask,
- const int height) {
- int n = 0;
- __m256i v_sad_d = _mm256_setzero_si256();
- const __m256i v_bias_d = _mm256_set1_epi32((1 << 12) >> 1);
-
- do {
- const __m128i v_p_b_0 = xx_loadl_32(pre);
- const __m128i v_p_b_1 = xx_loadl_32(pre + pre_stride);
- const __m128i v_p_b = _mm_unpacklo_epi32(v_p_b_0, v_p_b_1);
- const __m256i v_m_d = _mm256_lddqu_si256((__m256i *)(mask + n));
- const __m256i v_w_d = _mm256_lddqu_si256((__m256i *)(wsrc + n));
-
- const __m256i v_p_d = _mm256_cvtepu8_epi32(v_p_b);
-
- // Values in both pre and mask fit in 15 bits, and are packed at 32 bit
- // boundaries. We use pmaddwd, as it has lower latency on Haswell
- // than pmulld but produces the same result with these inputs.
- const __m256i v_pm_d = _mm256_madd_epi16(v_p_d, v_m_d);
-
- const __m256i v_diff_d = _mm256_sub_epi32(v_w_d, v_pm_d);
- const __m256i v_absdiff_d = _mm256_abs_epi32(v_diff_d);
-
- // Rounded absolute difference
- const __m256i v_tmp_d = _mm256_add_epi32(v_absdiff_d, v_bias_d);
- const __m256i v_rad_d = _mm256_srli_epi32(v_tmp_d, 12);
-
- v_sad_d = _mm256_add_epi32(v_sad_d, v_rad_d);
-
- n += 8;
- pre += pre_stride << 1;
- } while (n < 8 * (height >> 1));
-
- __m128i v_sad_d_0 = _mm256_castsi256_si128(v_sad_d);
- __m128i v_sad_d_1 = _mm256_extracti128_si256(v_sad_d, 1);
- v_sad_d_0 = _mm_add_epi32(v_sad_d_0, v_sad_d_1);
- return xx_hsum_epi32_si32(v_sad_d_0);
-}
-
-static INLINE unsigned int obmc_sad_w8n_avx2(
- const uint8_t *pre, const int pre_stride, const int32_t *wsrc,
- const int32_t *mask, const int width, const int height) {
- const int pre_step = pre_stride - width;
- int n = 0;
- __m256i v_sad_d = _mm256_setzero_si256();
- const __m256i v_bias_d = _mm256_set1_epi32((1 << 12) >> 1);
- assert(width >= 8);
- assert(IS_POWER_OF_TWO(width));
-
- do {
- const __m128i v_p0_b = xx_loadl_64(pre + n);
- const __m256i v_m0_d = _mm256_lddqu_si256((__m256i *)(mask + n));
- const __m256i v_w0_d = _mm256_lddqu_si256((__m256i *)(wsrc + n));
-
- const __m256i v_p0_d = _mm256_cvtepu8_epi32(v_p0_b);
-
- // Values in both pre and mask fit in 15 bits, and are packed at 32 bit
- // boundaries. We use pmaddwd, as it has lower latency on Haswell
- // than pmulld but produces the same result with these inputs.
- const __m256i v_pm0_d = _mm256_madd_epi16(v_p0_d, v_m0_d);
-
- const __m256i v_diff0_d = _mm256_sub_epi32(v_w0_d, v_pm0_d);
- const __m256i v_absdiff0_d = _mm256_abs_epi32(v_diff0_d);
-
- // Rounded absolute difference
- const __m256i v_tmp_d = _mm256_add_epi32(v_absdiff0_d, v_bias_d);
- const __m256i v_rad0_d = _mm256_srli_epi32(v_tmp_d, 12);
-
- v_sad_d = _mm256_add_epi32(v_sad_d, v_rad0_d);
-
- n += 8;
-
- if ((n & (width - 1)) == 0) pre += pre_step;
- } while (n < width * height);
-
- __m128i v_sad_d_0 = _mm256_castsi256_si128(v_sad_d);
- __m128i v_sad_d_1 = _mm256_extracti128_si256(v_sad_d, 1);
- v_sad_d_0 = _mm_add_epi32(v_sad_d_0, v_sad_d_1);
- return xx_hsum_epi32_si32(v_sad_d_0);
-}
-
-#define OBMCSADWXH(w, h) \
- unsigned int aom_obmc_sad##w##x##h##_avx2( \
- const uint8_t *pre, int pre_stride, const int32_t *wsrc, \
- const int32_t *msk) { \
- if (w == 4) { \
- return obmc_sad_w4_avx2(pre, pre_stride, wsrc, msk, h); \
- } else { \
- return obmc_sad_w8n_avx2(pre, pre_stride, wsrc, msk, w, h); \
- } \
- }
-
-OBMCSADWXH(128, 128)
-OBMCSADWXH(128, 64)
-OBMCSADWXH(64, 128)
-OBMCSADWXH(64, 64)
-OBMCSADWXH(64, 32)
-OBMCSADWXH(32, 64)
-OBMCSADWXH(32, 32)
-OBMCSADWXH(32, 16)
-OBMCSADWXH(16, 32)
-OBMCSADWXH(16, 16)
-OBMCSADWXH(16, 8)
-OBMCSADWXH(8, 16)
-OBMCSADWXH(8, 8)
-OBMCSADWXH(8, 4)
-OBMCSADWXH(4, 8)
-OBMCSADWXH(4, 4)
-OBMCSADWXH(4, 16)
-OBMCSADWXH(16, 4)
-OBMCSADWXH(8, 32)
-OBMCSADWXH(32, 8)
-OBMCSADWXH(16, 64)
-OBMCSADWXH(64, 16)
-
-////////////////////////////////////////////////////////////////////////////////
-// High bit-depth
-////////////////////////////////////////////////////////////////////////////////
-
-static INLINE unsigned int hbd_obmc_sad_w4_avx2(const uint8_t *pre8,
- const int pre_stride,
- const int32_t *wsrc,
- const int32_t *mask,
- const int height) {
- const uint16_t *pre = CONVERT_TO_SHORTPTR(pre8);
- int n = 0;
- __m256i v_sad_d = _mm256_setzero_si256();
- const __m256i v_bias_d = _mm256_set1_epi32((1 << 12) >> 1);
- do {
- const __m128i v_p_w_0 = xx_loadl_64(pre);
- const __m128i v_p_w_1 = xx_loadl_64(pre + pre_stride);
- const __m128i v_p_w = _mm_unpacklo_epi64(v_p_w_0, v_p_w_1);
- const __m256i v_m_d = _mm256_lddqu_si256((__m256i *)(mask + n));
- const __m256i v_w_d = _mm256_lddqu_si256((__m256i *)(wsrc + n));
-
- const __m256i v_p_d = _mm256_cvtepu16_epi32(v_p_w);
-
- // Values in both pre and mask fit in 15 bits, and are packed at 32 bit
- // boundaries. We use pmaddwd, as it has lower latency on Haswell
- // than pmulld but produces the same result with these inputs.
- const __m256i v_pm_d = _mm256_madd_epi16(v_p_d, v_m_d);
-
- const __m256i v_diff_d = _mm256_sub_epi32(v_w_d, v_pm_d);
- const __m256i v_absdiff_d = _mm256_abs_epi32(v_diff_d);
-
- // Rounded absolute difference
-
- const __m256i v_tmp_d = _mm256_add_epi32(v_absdiff_d, v_bias_d);
- const __m256i v_rad_d = _mm256_srli_epi32(v_tmp_d, 12);
-
- v_sad_d = _mm256_add_epi32(v_sad_d, v_rad_d);
-
- n += 8;
-
- pre += pre_stride << 1;
- } while (n < 8 * (height >> 1));
-
- __m128i v_sad_d_0 = _mm256_castsi256_si128(v_sad_d);
- __m128i v_sad_d_1 = _mm256_extracti128_si256(v_sad_d, 1);
- v_sad_d_0 = _mm_add_epi32(v_sad_d_0, v_sad_d_1);
- return xx_hsum_epi32_si32(v_sad_d_0);
-}
-
-static INLINE unsigned int hbd_obmc_sad_w8n_avx2(
- const uint8_t *pre8, const int pre_stride, const int32_t *wsrc,
- const int32_t *mask, const int width, const int height) {
- const uint16_t *pre = CONVERT_TO_SHORTPTR(pre8);
- const int pre_step = pre_stride - width;
- int n = 0;
- __m256i v_sad_d = _mm256_setzero_si256();
- const __m256i v_bias_d = _mm256_set1_epi32((1 << 12) >> 1);
-
- assert(width >= 8);
- assert(IS_POWER_OF_TWO(width));
-
- do {
- const __m128i v_p0_w = _mm_lddqu_si128((__m128i *)(pre + n));
- const __m256i v_m0_d = _mm256_lddqu_si256((__m256i *)(mask + n));
- const __m256i v_w0_d = _mm256_lddqu_si256((__m256i *)(wsrc + n));
-
- const __m256i v_p0_d = _mm256_cvtepu16_epi32(v_p0_w);
-
- // Values in both pre and mask fit in 15 bits, and are packed at 32 bit
- // boundaries. We use pmaddwd, as it has lower latency on Haswell
- // than pmulld but produces the same result with these inputs.
- const __m256i v_pm0_d = _mm256_madd_epi16(v_p0_d, v_m0_d);
-
- const __m256i v_diff0_d = _mm256_sub_epi32(v_w0_d, v_pm0_d);
- const __m256i v_absdiff0_d = _mm256_abs_epi32(v_diff0_d);
-
- // Rounded absolute difference
- const __m256i v_tmp_d = _mm256_add_epi32(v_absdiff0_d, v_bias_d);
- const __m256i v_rad0_d = _mm256_srli_epi32(v_tmp_d, 12);
-
- v_sad_d = _mm256_add_epi32(v_sad_d, v_rad0_d);
-
- n += 8;
-
- if (n % width == 0) pre += pre_step;
- } while (n < width * height);
-
- __m128i v_sad_d_0 = _mm256_castsi256_si128(v_sad_d);
- __m128i v_sad_d_1 = _mm256_extracti128_si256(v_sad_d, 1);
- v_sad_d_0 = _mm_add_epi32(v_sad_d_0, v_sad_d_1);
- return xx_hsum_epi32_si32(v_sad_d_0);
-}
-
-#define HBD_OBMCSADWXH(w, h) \
- unsigned int aom_highbd_obmc_sad##w##x##h##_avx2( \
- const uint8_t *pre, int pre_stride, const int32_t *wsrc, \
- const int32_t *mask) { \
- if (w == 4) { \
- return hbd_obmc_sad_w4_avx2(pre, pre_stride, wsrc, mask, h); \
- } else { \
- return hbd_obmc_sad_w8n_avx2(pre, pre_stride, wsrc, mask, w, h); \
- } \
- }
-
-HBD_OBMCSADWXH(128, 128)
-HBD_OBMCSADWXH(128, 64)
-HBD_OBMCSADWXH(64, 128)
-HBD_OBMCSADWXH(64, 64)
-HBD_OBMCSADWXH(64, 32)
-HBD_OBMCSADWXH(32, 64)
-HBD_OBMCSADWXH(32, 32)
-HBD_OBMCSADWXH(32, 16)
-HBD_OBMCSADWXH(16, 32)
-HBD_OBMCSADWXH(16, 16)
-HBD_OBMCSADWXH(16, 8)
-HBD_OBMCSADWXH(8, 16)
-HBD_OBMCSADWXH(8, 8)
-HBD_OBMCSADWXH(8, 4)
-HBD_OBMCSADWXH(4, 8)
-HBD_OBMCSADWXH(4, 4)
-HBD_OBMCSADWXH(4, 16)
-HBD_OBMCSADWXH(16, 4)
-HBD_OBMCSADWXH(8, 32)
-HBD_OBMCSADWXH(32, 8)
-HBD_OBMCSADWXH(16, 64)
-HBD_OBMCSADWXH(64, 16)
diff --git a/third_party/aom/aom_dsp/x86/obmc_sad_sse4.c b/third_party/aom/aom_dsp/x86/obmc_sad_sse4.c
deleted file mode 100644
index 0338a8c77..000000000
--- a/third_party/aom/aom_dsp/x86/obmc_sad_sse4.c
+++ /dev/null
@@ -1,268 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <immintrin.h>
-
-#include "config/aom_config.h"
-
-#include "aom_ports/mem.h"
-#include "aom/aom_integer.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/x86/obmc_intrinsic_ssse3.h"
-#include "aom_dsp/x86/synonyms.h"
-
-////////////////////////////////////////////////////////////////////////////////
-// 8 bit
-////////////////////////////////////////////////////////////////////////////////
-
-static AOM_FORCE_INLINE unsigned int obmc_sad_w4(const uint8_t *pre,
- const int pre_stride,
- const int32_t *wsrc,
- const int32_t *mask,
- const int height) {
- const int pre_step = pre_stride - 4;
- int n = 0;
- __m128i v_sad_d = _mm_setzero_si128();
-
- do {
- const __m128i v_p_b = xx_loadl_32(pre + n);
- const __m128i v_m_d = xx_load_128(mask + n);
- const __m128i v_w_d = xx_load_128(wsrc + n);
-
- const __m128i v_p_d = _mm_cvtepu8_epi32(v_p_b);
-
- // Values in both pre and mask fit in 15 bits, and are packed at 32 bit
- // boundaries. We use pmaddwd, as it has lower latency on Haswell
- // than pmulld but produces the same result with these inputs.
- const __m128i v_pm_d = _mm_madd_epi16(v_p_d, v_m_d);
-
- const __m128i v_diff_d = _mm_sub_epi32(v_w_d, v_pm_d);
- const __m128i v_absdiff_d = _mm_abs_epi32(v_diff_d);
-
- // Rounded absolute difference
- const __m128i v_rad_d = xx_roundn_epu32(v_absdiff_d, 12);
-
- v_sad_d = _mm_add_epi32(v_sad_d, v_rad_d);
-
- n += 4;
-
- if (n % 4 == 0) pre += pre_step;
- } while (n < 4 * height);
-
- return xx_hsum_epi32_si32(v_sad_d);
-}
-
-static AOM_FORCE_INLINE unsigned int obmc_sad_w8n(
- const uint8_t *pre, const int pre_stride, const int32_t *wsrc,
- const int32_t *mask, const int width, const int height) {
- const int pre_step = pre_stride - width;
- int n = 0;
- __m128i v_sad_d = _mm_setzero_si128();
-
- assert(width >= 8);
- assert(IS_POWER_OF_TWO(width));
-
- do {
- const __m128i v_p1_b = xx_loadl_32(pre + n + 4);
- const __m128i v_m1_d = xx_load_128(mask + n + 4);
- const __m128i v_w1_d = xx_load_128(wsrc + n + 4);
- const __m128i v_p0_b = xx_loadl_32(pre + n);
- const __m128i v_m0_d = xx_load_128(mask + n);
- const __m128i v_w0_d = xx_load_128(wsrc + n);
-
- const __m128i v_p0_d = _mm_cvtepu8_epi32(v_p0_b);
- const __m128i v_p1_d = _mm_cvtepu8_epi32(v_p1_b);
-
- // Values in both pre and mask fit in 15 bits, and are packed at 32 bit
- // boundaries. We use pmaddwd, as it has lower latency on Haswell
- // than pmulld but produces the same result with these inputs.
- const __m128i v_pm0_d = _mm_madd_epi16(v_p0_d, v_m0_d);
- const __m128i v_pm1_d = _mm_madd_epi16(v_p1_d, v_m1_d);
-
- const __m128i v_diff0_d = _mm_sub_epi32(v_w0_d, v_pm0_d);
- const __m128i v_diff1_d = _mm_sub_epi32(v_w1_d, v_pm1_d);
- const __m128i v_absdiff0_d = _mm_abs_epi32(v_diff0_d);
- const __m128i v_absdiff1_d = _mm_abs_epi32(v_diff1_d);
-
- // Rounded absolute difference
- const __m128i v_rad0_d = xx_roundn_epu32(v_absdiff0_d, 12);
- const __m128i v_rad1_d = xx_roundn_epu32(v_absdiff1_d, 12);
-
- v_sad_d = _mm_add_epi32(v_sad_d, v_rad0_d);
- v_sad_d = _mm_add_epi32(v_sad_d, v_rad1_d);
-
- n += 8;
-
- if (n % width == 0) pre += pre_step;
- } while (n < width * height);
-
- return xx_hsum_epi32_si32(v_sad_d);
-}
-
-#define OBMCSADWXH(w, h) \
- unsigned int aom_obmc_sad##w##x##h##_sse4_1( \
- const uint8_t *pre, int pre_stride, const int32_t *wsrc, \
- const int32_t *msk) { \
- if (w == 4) { \
- return obmc_sad_w4(pre, pre_stride, wsrc, msk, h); \
- } else { \
- return obmc_sad_w8n(pre, pre_stride, wsrc, msk, w, h); \
- } \
- }
-
-OBMCSADWXH(128, 128)
-OBMCSADWXH(128, 64)
-OBMCSADWXH(64, 128)
-OBMCSADWXH(64, 64)
-OBMCSADWXH(64, 32)
-OBMCSADWXH(32, 64)
-OBMCSADWXH(32, 32)
-OBMCSADWXH(32, 16)
-OBMCSADWXH(16, 32)
-OBMCSADWXH(16, 16)
-OBMCSADWXH(16, 8)
-OBMCSADWXH(8, 16)
-OBMCSADWXH(8, 8)
-OBMCSADWXH(8, 4)
-OBMCSADWXH(4, 8)
-OBMCSADWXH(4, 4)
-OBMCSADWXH(4, 16)
-OBMCSADWXH(16, 4)
-OBMCSADWXH(8, 32)
-OBMCSADWXH(32, 8)
-OBMCSADWXH(16, 64)
-OBMCSADWXH(64, 16)
-
-////////////////////////////////////////////////////////////////////////////////
-// High bit-depth
-////////////////////////////////////////////////////////////////////////////////
-
-static AOM_FORCE_INLINE unsigned int hbd_obmc_sad_w4(const uint8_t *pre8,
- const int pre_stride,
- const int32_t *wsrc,
- const int32_t *mask,
- const int height) {
- const uint16_t *pre = CONVERT_TO_SHORTPTR(pre8);
- const int pre_step = pre_stride - 4;
- int n = 0;
- __m128i v_sad_d = _mm_setzero_si128();
-
- do {
- const __m128i v_p_w = xx_loadl_64(pre + n);
- const __m128i v_m_d = xx_load_128(mask + n);
- const __m128i v_w_d = xx_load_128(wsrc + n);
-
- const __m128i v_p_d = _mm_cvtepu16_epi32(v_p_w);
-
- // Values in both pre and mask fit in 15 bits, and are packed at 32 bit
- // boundaries. We use pmaddwd, as it has lower latency on Haswell
- // than pmulld but produces the same result with these inputs.
- const __m128i v_pm_d = _mm_madd_epi16(v_p_d, v_m_d);
-
- const __m128i v_diff_d = _mm_sub_epi32(v_w_d, v_pm_d);
- const __m128i v_absdiff_d = _mm_abs_epi32(v_diff_d);
-
- // Rounded absolute difference
- const __m128i v_rad_d = xx_roundn_epu32(v_absdiff_d, 12);
-
- v_sad_d = _mm_add_epi32(v_sad_d, v_rad_d);
-
- n += 4;
-
- if (n % 4 == 0) pre += pre_step;
- } while (n < 4 * height);
-
- return xx_hsum_epi32_si32(v_sad_d);
-}
-
-static AOM_FORCE_INLINE unsigned int hbd_obmc_sad_w8n(
- const uint8_t *pre8, const int pre_stride, const int32_t *wsrc,
- const int32_t *mask, const int width, const int height) {
- const uint16_t *pre = CONVERT_TO_SHORTPTR(pre8);
- const int pre_step = pre_stride - width;
- int n = 0;
- __m128i v_sad_d = _mm_setzero_si128();
-
- assert(width >= 8);
- assert(IS_POWER_OF_TWO(width));
-
- do {
- const __m128i v_p1_w = xx_loadl_64(pre + n + 4);
- const __m128i v_m1_d = xx_load_128(mask + n + 4);
- const __m128i v_w1_d = xx_load_128(wsrc + n + 4);
- const __m128i v_p0_w = xx_loadl_64(pre + n);
- const __m128i v_m0_d = xx_load_128(mask + n);
- const __m128i v_w0_d = xx_load_128(wsrc + n);
-
- const __m128i v_p0_d = _mm_cvtepu16_epi32(v_p0_w);
- const __m128i v_p1_d = _mm_cvtepu16_epi32(v_p1_w);
-
- // Values in both pre and mask fit in 15 bits, and are packed at 32 bit
- // boundaries. We use pmaddwd, as it has lower latency on Haswell
- // than pmulld but produces the same result with these inputs.
- const __m128i v_pm0_d = _mm_madd_epi16(v_p0_d, v_m0_d);
- const __m128i v_pm1_d = _mm_madd_epi16(v_p1_d, v_m1_d);
-
- const __m128i v_diff0_d = _mm_sub_epi32(v_w0_d, v_pm0_d);
- const __m128i v_diff1_d = _mm_sub_epi32(v_w1_d, v_pm1_d);
- const __m128i v_absdiff0_d = _mm_abs_epi32(v_diff0_d);
- const __m128i v_absdiff1_d = _mm_abs_epi32(v_diff1_d);
-
- // Rounded absolute difference
- const __m128i v_rad0_d = xx_roundn_epu32(v_absdiff0_d, 12);
- const __m128i v_rad1_d = xx_roundn_epu32(v_absdiff1_d, 12);
-
- v_sad_d = _mm_add_epi32(v_sad_d, v_rad0_d);
- v_sad_d = _mm_add_epi32(v_sad_d, v_rad1_d);
-
- n += 8;
-
- if (n % width == 0) pre += pre_step;
- } while (n < width * height);
-
- return xx_hsum_epi32_si32(v_sad_d);
-}
-
-#define HBD_OBMCSADWXH(w, h) \
- unsigned int aom_highbd_obmc_sad##w##x##h##_sse4_1( \
- const uint8_t *pre, int pre_stride, const int32_t *wsrc, \
- const int32_t *mask) { \
- if (w == 4) { \
- return hbd_obmc_sad_w4(pre, pre_stride, wsrc, mask, h); \
- } else { \
- return hbd_obmc_sad_w8n(pre, pre_stride, wsrc, mask, w, h); \
- } \
- }
-
-HBD_OBMCSADWXH(128, 128)
-HBD_OBMCSADWXH(128, 64)
-HBD_OBMCSADWXH(64, 128)
-HBD_OBMCSADWXH(64, 64)
-HBD_OBMCSADWXH(64, 32)
-HBD_OBMCSADWXH(32, 64)
-HBD_OBMCSADWXH(32, 32)
-HBD_OBMCSADWXH(32, 16)
-HBD_OBMCSADWXH(16, 32)
-HBD_OBMCSADWXH(16, 16)
-HBD_OBMCSADWXH(16, 8)
-HBD_OBMCSADWXH(8, 16)
-HBD_OBMCSADWXH(8, 8)
-HBD_OBMCSADWXH(8, 4)
-HBD_OBMCSADWXH(4, 8)
-HBD_OBMCSADWXH(4, 4)
-HBD_OBMCSADWXH(4, 16)
-HBD_OBMCSADWXH(16, 4)
-HBD_OBMCSADWXH(8, 32)
-HBD_OBMCSADWXH(32, 8)
-HBD_OBMCSADWXH(16, 64)
-HBD_OBMCSADWXH(64, 16)
diff --git a/third_party/aom/aom_dsp/x86/obmc_variance_avx2.c b/third_party/aom/aom_dsp/x86/obmc_variance_avx2.c
deleted file mode 100644
index bfec0e8a8..000000000
--- a/third_party/aom/aom_dsp/x86/obmc_variance_avx2.c
+++ /dev/null
@@ -1,190 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <immintrin.h>
-
-#include "config/aom_config.h"
-
-#include "aom_ports/mem.h"
-#include "aom/aom_integer.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/aom_filter.h"
-#include "aom_dsp/x86/obmc_intrinsic_sse4.h"
-
-////////////////////////////////////////////////////////////////////////////////
-// 8 bit
-////////////////////////////////////////////////////////////////////////////////
-
-static INLINE void obmc_variance_w8n(const uint8_t *pre, const int pre_stride,
- const int32_t *wsrc, const int32_t *mask,
- unsigned int *const sse, int *const sum,
- const int w, const int h) {
- int n = 0, width, height = h;
- __m128i v_sum_d = _mm_setzero_si128();
- __m128i v_sse_d = _mm_setzero_si128();
- const __m256i v_bias_d = _mm256_set1_epi32((1 << 12) >> 1);
- __m128i v_d;
- const uint8_t *pre_temp;
- assert(w >= 8);
- assert(IS_POWER_OF_TWO(w));
- assert(IS_POWER_OF_TWO(h));
- do {
- width = w;
- pre_temp = pre;
- do {
- const __m128i v_p_b = _mm_loadl_epi64((const __m128i *)pre_temp);
- const __m256i v_m_d = _mm256_loadu_si256((__m256i const *)(mask + n));
- const __m256i v_w_d = _mm256_loadu_si256((__m256i const *)(wsrc + n));
- const __m256i v_p0_d = _mm256_cvtepu8_epi32(v_p_b);
-
- // Values in both pre and mask fit in 15 bits, and are packed at 32 bit
- // boundaries. We use pmaddwd, as it has lower latency on Haswell
- // than pmulld but produces the same result with these inputs.
- const __m256i v_pm_d = _mm256_madd_epi16(v_p0_d, v_m_d);
- const __m256i v_diff0_d = _mm256_sub_epi32(v_w_d, v_pm_d);
-
- const __m256i v_sign_d = _mm256_srai_epi32(v_diff0_d, 31);
- const __m256i v_tmp_d =
- _mm256_add_epi32(_mm256_add_epi32(v_diff0_d, v_bias_d), v_sign_d);
- const __m256i v_rdiff0_d = _mm256_srai_epi32(v_tmp_d, 12);
- const __m128i v_rdiff_d = _mm256_castsi256_si128(v_rdiff0_d);
- const __m128i v_rdiff1_d = _mm256_extracti128_si256(v_rdiff0_d, 1);
-
- const __m128i v_rdiff01_w = _mm_packs_epi32(v_rdiff_d, v_rdiff1_d);
- const __m128i v_sqrdiff_d = _mm_madd_epi16(v_rdiff01_w, v_rdiff01_w);
-
- v_sum_d = _mm_add_epi32(v_sum_d, v_rdiff_d);
- v_sum_d = _mm_add_epi32(v_sum_d, v_rdiff1_d);
- v_sse_d = _mm_add_epi32(v_sse_d, v_sqrdiff_d);
-
- pre_temp += 8;
- n += 8;
- width -= 8;
- } while (width > 0);
- pre += pre_stride;
- height -= 1;
- } while (height > 0);
- v_d = _mm_hadd_epi32(v_sum_d, v_sse_d);
- v_d = _mm_hadd_epi32(v_d, v_d);
- *sum = _mm_cvtsi128_si32(v_d);
- *sse = _mm_cvtsi128_si32(_mm_srli_si128(v_d, 4));
-}
-
-static INLINE void obmc_variance_w16n(const uint8_t *pre, const int pre_stride,
- const int32_t *wsrc, const int32_t *mask,
- unsigned int *const sse, int *const sum,
- const int w, const int h) {
- int n = 0, width, height = h;
- __m256i v_d;
- __m128i res0;
- const uint8_t *pre_temp;
- const __m256i v_bias_d = _mm256_set1_epi32((1 << 12) >> 1);
- __m256i v_sum_d = _mm256_setzero_si256();
- __m256i v_sse_d = _mm256_setzero_si256();
-
- assert(w >= 16);
- assert(IS_POWER_OF_TWO(w));
- assert(IS_POWER_OF_TWO(h));
- do {
- width = w;
- pre_temp = pre;
- do {
- const __m128i v_p_b = _mm_loadu_si128((__m128i *)pre_temp);
- const __m256i v_m0_d = _mm256_loadu_si256((__m256i const *)(mask + n));
- const __m256i v_w0_d = _mm256_loadu_si256((__m256i const *)(wsrc + n));
- const __m256i v_m1_d =
- _mm256_loadu_si256((__m256i const *)(mask + n + 8));
- const __m256i v_w1_d =
- _mm256_loadu_si256((__m256i const *)(wsrc + n + 8));
-
- const __m256i v_p0_d = _mm256_cvtepu8_epi32(v_p_b);
- const __m256i v_p1_d = _mm256_cvtepu8_epi32(_mm_srli_si128(v_p_b, 8));
-
- const __m256i v_pm0_d = _mm256_madd_epi16(v_p0_d, v_m0_d);
- const __m256i v_pm1_d = _mm256_madd_epi16(v_p1_d, v_m1_d);
-
- const __m256i v_diff0_d = _mm256_sub_epi32(v_w0_d, v_pm0_d);
- const __m256i v_diff1_d = _mm256_sub_epi32(v_w1_d, v_pm1_d);
-
- const __m256i v_sign0_d = _mm256_srai_epi32(v_diff0_d, 31);
- const __m256i v_sign1_d = _mm256_srai_epi32(v_diff1_d, 31);
-
- const __m256i v_tmp0_d =
- _mm256_add_epi32(_mm256_add_epi32(v_diff0_d, v_bias_d), v_sign0_d);
- const __m256i v_tmp1_d =
- _mm256_add_epi32(_mm256_add_epi32(v_diff1_d, v_bias_d), v_sign1_d);
-
- const __m256i v_rdiff0_d = _mm256_srai_epi32(v_tmp0_d, 12);
- const __m256i v_rdiff2_d = _mm256_srai_epi32(v_tmp1_d, 12);
-
- const __m256i v_rdiff1_d = _mm256_add_epi32(v_rdiff0_d, v_rdiff2_d);
- const __m256i v_rdiff01_w = _mm256_packs_epi32(v_rdiff0_d, v_rdiff2_d);
- const __m256i v_sqrdiff_d = _mm256_madd_epi16(v_rdiff01_w, v_rdiff01_w);
-
- v_sum_d = _mm256_add_epi32(v_sum_d, v_rdiff1_d);
- v_sse_d = _mm256_add_epi32(v_sse_d, v_sqrdiff_d);
-
- pre_temp += 16;
- n += 16;
- width -= 16;
- } while (width > 0);
- pre += pre_stride;
- height -= 1;
- } while (height > 0);
-
- v_d = _mm256_hadd_epi32(v_sum_d, v_sse_d);
- v_d = _mm256_hadd_epi32(v_d, v_d);
- res0 = _mm256_castsi256_si128(v_d);
- res0 = _mm_add_epi32(res0, _mm256_extractf128_si256(v_d, 1));
- *sum = _mm_cvtsi128_si32(res0);
- *sse = _mm_cvtsi128_si32(_mm_srli_si128(res0, 4));
-}
-
-#define OBMCVARWXH(W, H) \
- unsigned int aom_obmc_variance##W##x##H##_avx2( \
- const uint8_t *pre, int pre_stride, const int32_t *wsrc, \
- const int32_t *mask, unsigned int *sse) { \
- int sum; \
- if (W == 4) { \
- obmc_variance_w4(pre, pre_stride, wsrc, mask, sse, &sum, H); \
- } else if (W == 8) { \
- obmc_variance_w8n(pre, pre_stride, wsrc, mask, sse, &sum, W, H); \
- } else { \
- obmc_variance_w16n(pre, pre_stride, wsrc, mask, sse, &sum, W, H); \
- } \
- \
- return *sse - (unsigned int)(((int64_t)sum * sum) / (W * H)); \
- }
-
-OBMCVARWXH(128, 128)
-OBMCVARWXH(128, 64)
-OBMCVARWXH(64, 128)
-OBMCVARWXH(64, 64)
-OBMCVARWXH(64, 32)
-OBMCVARWXH(32, 64)
-OBMCVARWXH(32, 32)
-OBMCVARWXH(32, 16)
-OBMCVARWXH(16, 32)
-OBMCVARWXH(16, 16)
-OBMCVARWXH(16, 8)
-OBMCVARWXH(8, 16)
-OBMCVARWXH(8, 8)
-OBMCVARWXH(8, 4)
-OBMCVARWXH(4, 8)
-OBMCVARWXH(4, 4)
-OBMCVARWXH(4, 16)
-OBMCVARWXH(16, 4)
-OBMCVARWXH(8, 32)
-OBMCVARWXH(32, 8)
-OBMCVARWXH(16, 64)
-OBMCVARWXH(64, 16)
diff --git a/third_party/aom/aom_dsp/x86/obmc_variance_sse4.c b/third_party/aom/aom_dsp/x86/obmc_variance_sse4.c
deleted file mode 100644
index 72eda0e57..000000000
--- a/third_party/aom/aom_dsp/x86/obmc_variance_sse4.c
+++ /dev/null
@@ -1,380 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <immintrin.h>
-
-#include "config/aom_config.h"
-
-#include "aom_ports/mem.h"
-#include "aom/aom_integer.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/aom_filter.h"
-#include "aom_dsp/x86/obmc_intrinsic_sse4.h"
-#include "aom_dsp/x86/synonyms.h"
-
-////////////////////////////////////////////////////////////////////////////////
-// 8 bit
-////////////////////////////////////////////////////////////////////////////////
-
-void aom_var_filter_block2d_bil_first_pass_ssse3(
- const uint8_t *a, uint16_t *b, unsigned int src_pixels_per_line,
- unsigned int pixel_step, unsigned int output_height,
- unsigned int output_width, const uint8_t *filter);
-
-void aom_var_filter_block2d_bil_second_pass_ssse3(
- const uint16_t *a, uint8_t *b, unsigned int src_pixels_per_line,
- unsigned int pixel_step, unsigned int output_height,
- unsigned int output_width, const uint8_t *filter);
-
-static INLINE void obmc_variance_w8n(const uint8_t *pre, const int pre_stride,
- const int32_t *wsrc, const int32_t *mask,
- unsigned int *const sse, int *const sum,
- const int w, const int h) {
- const int pre_step = pre_stride - w;
- int n = 0;
- __m128i v_sum_d = _mm_setzero_si128();
- __m128i v_sse_d = _mm_setzero_si128();
-
- assert(w >= 8);
- assert(IS_POWER_OF_TWO(w));
- assert(IS_POWER_OF_TWO(h));
-
- do {
- const __m128i v_p1_b = xx_loadl_32(pre + n + 4);
- const __m128i v_m1_d = xx_load_128(mask + n + 4);
- const __m128i v_w1_d = xx_load_128(wsrc + n + 4);
- const __m128i v_p0_b = xx_loadl_32(pre + n);
- const __m128i v_m0_d = xx_load_128(mask + n);
- const __m128i v_w0_d = xx_load_128(wsrc + n);
-
- const __m128i v_p0_d = _mm_cvtepu8_epi32(v_p0_b);
- const __m128i v_p1_d = _mm_cvtepu8_epi32(v_p1_b);
-
- // Values in both pre and mask fit in 15 bits, and are packed at 32 bit
- // boundaries. We use pmaddwd, as it has lower latency on Haswell
- // than pmulld but produces the same result with these inputs.
- const __m128i v_pm0_d = _mm_madd_epi16(v_p0_d, v_m0_d);
- const __m128i v_pm1_d = _mm_madd_epi16(v_p1_d, v_m1_d);
-
- const __m128i v_diff0_d = _mm_sub_epi32(v_w0_d, v_pm0_d);
- const __m128i v_diff1_d = _mm_sub_epi32(v_w1_d, v_pm1_d);
-
- const __m128i v_rdiff0_d = xx_roundn_epi32(v_diff0_d, 12);
- const __m128i v_rdiff1_d = xx_roundn_epi32(v_diff1_d, 12);
- const __m128i v_rdiff01_w = _mm_packs_epi32(v_rdiff0_d, v_rdiff1_d);
- const __m128i v_sqrdiff_d = _mm_madd_epi16(v_rdiff01_w, v_rdiff01_w);
-
- v_sum_d = _mm_add_epi32(v_sum_d, v_rdiff0_d);
- v_sum_d = _mm_add_epi32(v_sum_d, v_rdiff1_d);
- v_sse_d = _mm_add_epi32(v_sse_d, v_sqrdiff_d);
-
- n += 8;
-
- if (n % w == 0) pre += pre_step;
- } while (n < w * h);
-
- *sum = xx_hsum_epi32_si32(v_sum_d);
- *sse = xx_hsum_epi32_si32(v_sse_d);
-}
-
-#define OBMCVARWXH(W, H) \
- unsigned int aom_obmc_variance##W##x##H##_sse4_1( \
- const uint8_t *pre, int pre_stride, const int32_t *wsrc, \
- const int32_t *mask, unsigned int *sse) { \
- int sum; \
- if (W == 4) { \
- obmc_variance_w4(pre, pre_stride, wsrc, mask, sse, &sum, H); \
- } else { \
- obmc_variance_w8n(pre, pre_stride, wsrc, mask, sse, &sum, W, H); \
- } \
- return *sse - (unsigned int)(((int64_t)sum * sum) / (W * H)); \
- }
-
-OBMCVARWXH(128, 128)
-OBMCVARWXH(128, 64)
-OBMCVARWXH(64, 128)
-OBMCVARWXH(64, 64)
-OBMCVARWXH(64, 32)
-OBMCVARWXH(32, 64)
-OBMCVARWXH(32, 32)
-OBMCVARWXH(32, 16)
-OBMCVARWXH(16, 32)
-OBMCVARWXH(16, 16)
-OBMCVARWXH(16, 8)
-OBMCVARWXH(8, 16)
-OBMCVARWXH(8, 8)
-OBMCVARWXH(8, 4)
-OBMCVARWXH(4, 8)
-OBMCVARWXH(4, 4)
-OBMCVARWXH(4, 16)
-OBMCVARWXH(16, 4)
-OBMCVARWXH(8, 32)
-OBMCVARWXH(32, 8)
-OBMCVARWXH(16, 64)
-OBMCVARWXH(64, 16)
-
-#include "config/aom_dsp_rtcd.h"
-
-#define OBMC_SUBPIX_VAR(W, H) \
- uint32_t aom_obmc_sub_pixel_variance##W##x##H##_sse4_1( \
- const uint8_t *pre, int pre_stride, int xoffset, int yoffset, \
- const int32_t *wsrc, const int32_t *mask, unsigned int *sse) { \
- uint16_t fdata3[(H + 1) * W]; \
- uint8_t temp2[H * W]; \
- \
- aom_var_filter_block2d_bil_first_pass_ssse3( \
- pre, fdata3, pre_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
- aom_var_filter_block2d_bil_second_pass_ssse3( \
- fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
- \
- return aom_obmc_variance##W##x##H##_sse4_1(temp2, W, wsrc, mask, sse); \
- }
-
-OBMC_SUBPIX_VAR(128, 128)
-OBMC_SUBPIX_VAR(128, 64)
-OBMC_SUBPIX_VAR(64, 128)
-OBMC_SUBPIX_VAR(64, 64)
-OBMC_SUBPIX_VAR(64, 32)
-OBMC_SUBPIX_VAR(32, 64)
-OBMC_SUBPIX_VAR(32, 32)
-OBMC_SUBPIX_VAR(32, 16)
-OBMC_SUBPIX_VAR(16, 32)
-OBMC_SUBPIX_VAR(16, 16)
-OBMC_SUBPIX_VAR(16, 8)
-OBMC_SUBPIX_VAR(8, 16)
-OBMC_SUBPIX_VAR(8, 8)
-OBMC_SUBPIX_VAR(8, 4)
-OBMC_SUBPIX_VAR(4, 8)
-OBMC_SUBPIX_VAR(4, 4)
-OBMC_SUBPIX_VAR(4, 16)
-OBMC_SUBPIX_VAR(16, 4)
-OBMC_SUBPIX_VAR(8, 32)
-OBMC_SUBPIX_VAR(32, 8)
-OBMC_SUBPIX_VAR(16, 64)
-OBMC_SUBPIX_VAR(64, 16)
-
-////////////////////////////////////////////////////////////////////////////////
-// High bit-depth
-////////////////////////////////////////////////////////////////////////////////
-
-static INLINE void hbd_obmc_variance_w4(
- const uint8_t *pre8, const int pre_stride, const int32_t *wsrc,
- const int32_t *mask, uint64_t *const sse, int64_t *const sum, const int h) {
- const uint16_t *pre = CONVERT_TO_SHORTPTR(pre8);
- const int pre_step = pre_stride - 4;
- int n = 0;
- __m128i v_sum_d = _mm_setzero_si128();
- __m128i v_sse_d = _mm_setzero_si128();
-
- assert(IS_POWER_OF_TWO(h));
-
- do {
- const __m128i v_p_w = xx_loadl_64(pre + n);
- const __m128i v_m_d = xx_load_128(mask + n);
- const __m128i v_w_d = xx_load_128(wsrc + n);
-
- const __m128i v_p_d = _mm_cvtepu16_epi32(v_p_w);
-
- // Values in both pre and mask fit in 15 bits, and are packed at 32 bit
- // boundaries. We use pmaddwd, as it has lower latency on Haswell
- // than pmulld but produces the same result with these inputs.
- const __m128i v_pm_d = _mm_madd_epi16(v_p_d, v_m_d);
-
- const __m128i v_diff_d = _mm_sub_epi32(v_w_d, v_pm_d);
- const __m128i v_rdiff_d = xx_roundn_epi32(v_diff_d, 12);
- const __m128i v_sqrdiff_d = _mm_mullo_epi32(v_rdiff_d, v_rdiff_d);
-
- v_sum_d = _mm_add_epi32(v_sum_d, v_rdiff_d);
- v_sse_d = _mm_add_epi32(v_sse_d, v_sqrdiff_d);
-
- n += 4;
-
- if (n % 4 == 0) pre += pre_step;
- } while (n < 4 * h);
-
- *sum = xx_hsum_epi32_si32(v_sum_d);
- *sse = xx_hsum_epi32_si32(v_sse_d);
-}
-
-static INLINE void hbd_obmc_variance_w8n(
- const uint8_t *pre8, const int pre_stride, const int32_t *wsrc,
- const int32_t *mask, uint64_t *const sse, int64_t *const sum, const int w,
- const int h) {
- const uint16_t *pre = CONVERT_TO_SHORTPTR(pre8);
- const int pre_step = pre_stride - w;
- int n = 0;
- __m128i v_sum_d = _mm_setzero_si128();
- __m128i v_sse_d = _mm_setzero_si128();
-
- assert(w >= 8);
- assert(IS_POWER_OF_TWO(w));
- assert(IS_POWER_OF_TWO(h));
-
- do {
- const __m128i v_p1_w = xx_loadl_64(pre + n + 4);
- const __m128i v_m1_d = xx_load_128(mask + n + 4);
- const __m128i v_w1_d = xx_load_128(wsrc + n + 4);
- const __m128i v_p0_w = xx_loadl_64(pre + n);
- const __m128i v_m0_d = xx_load_128(mask + n);
- const __m128i v_w0_d = xx_load_128(wsrc + n);
-
- const __m128i v_p0_d = _mm_cvtepu16_epi32(v_p0_w);
- const __m128i v_p1_d = _mm_cvtepu16_epi32(v_p1_w);
-
- // Values in both pre and mask fit in 15 bits, and are packed at 32 bit
- // boundaries. We use pmaddwd, as it has lower latency on Haswell
- // than pmulld but produces the same result with these inputs.
- const __m128i v_pm0_d = _mm_madd_epi16(v_p0_d, v_m0_d);
- const __m128i v_pm1_d = _mm_madd_epi16(v_p1_d, v_m1_d);
-
- const __m128i v_diff0_d = _mm_sub_epi32(v_w0_d, v_pm0_d);
- const __m128i v_diff1_d = _mm_sub_epi32(v_w1_d, v_pm1_d);
-
- const __m128i v_rdiff0_d = xx_roundn_epi32(v_diff0_d, 12);
- const __m128i v_rdiff1_d = xx_roundn_epi32(v_diff1_d, 12);
- const __m128i v_rdiff01_w = _mm_packs_epi32(v_rdiff0_d, v_rdiff1_d);
- const __m128i v_sqrdiff_d = _mm_madd_epi16(v_rdiff01_w, v_rdiff01_w);
-
- v_sum_d = _mm_add_epi32(v_sum_d, v_rdiff0_d);
- v_sum_d = _mm_add_epi32(v_sum_d, v_rdiff1_d);
- v_sse_d = _mm_add_epi32(v_sse_d, v_sqrdiff_d);
-
- n += 8;
-
- if (n % w == 0) pre += pre_step;
- } while (n < w * h);
-
- *sum += xx_hsum_epi32_si64(v_sum_d);
- *sse += xx_hsum_epi32_si64(v_sse_d);
-}
-
-static INLINE void highbd_obmc_variance(const uint8_t *pre8, int pre_stride,
- const int32_t *wsrc,
- const int32_t *mask, int w, int h,
- unsigned int *sse, int *sum) {
- int64_t sum64 = 0;
- uint64_t sse64 = 0;
- if (w == 4) {
- hbd_obmc_variance_w4(pre8, pre_stride, wsrc, mask, &sse64, &sum64, h);
- } else {
- hbd_obmc_variance_w8n(pre8, pre_stride, wsrc, mask, &sse64, &sum64, w, h);
- }
- *sum = (int)sum64;
- *sse = (unsigned int)sse64;
-}
-
-static INLINE void highbd_10_obmc_variance(const uint8_t *pre8, int pre_stride,
- const int32_t *wsrc,
- const int32_t *mask, int w, int h,
- unsigned int *sse, int *sum) {
- int64_t sum64 = 0;
- uint64_t sse64 = 0;
- if (w == 4) {
- hbd_obmc_variance_w4(pre8, pre_stride, wsrc, mask, &sse64, &sum64, h);
- } else if (w < 128 || h < 128) {
- hbd_obmc_variance_w8n(pre8, pre_stride, wsrc, mask, &sse64, &sum64, w, h);
- } else {
- assert(w == 128 && h == 128);
-
- do {
- hbd_obmc_variance_w8n(pre8, pre_stride, wsrc, mask, &sse64, &sum64, w,
- 64);
- pre8 += 64 * pre_stride;
- wsrc += 64 * w;
- mask += 64 * w;
- h -= 64;
- } while (h > 0);
- }
- *sum = (int)ROUND_POWER_OF_TWO(sum64, 2);
- *sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 4);
-}
-
-static INLINE void highbd_12_obmc_variance(const uint8_t *pre8, int pre_stride,
- const int32_t *wsrc,
- const int32_t *mask, int w, int h,
- unsigned int *sse, int *sum) {
- int64_t sum64 = 0;
- uint64_t sse64 = 0;
- int max_pel_allowed_per_ovf = 512;
- if (w == 4) {
- hbd_obmc_variance_w4(pre8, pre_stride, wsrc, mask, &sse64, &sum64, h);
- } else if (w * h <= max_pel_allowed_per_ovf) {
- hbd_obmc_variance_w8n(pre8, pre_stride, wsrc, mask, &sse64, &sum64, w, h);
- } else {
- int h_per_ovf = max_pel_allowed_per_ovf / w;
-
- assert(max_pel_allowed_per_ovf % w == 0);
- do {
- hbd_obmc_variance_w8n(pre8, pre_stride, wsrc, mask, &sse64, &sum64, w,
- h_per_ovf);
- pre8 += h_per_ovf * pre_stride;
- wsrc += h_per_ovf * w;
- mask += h_per_ovf * w;
- h -= h_per_ovf;
- } while (h > 0);
- }
- *sum = (int)ROUND_POWER_OF_TWO(sum64, 4);
- *sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 8);
-}
-
-#define HBD_OBMCVARWXH(W, H) \
- unsigned int aom_highbd_obmc_variance##W##x##H##_sse4_1( \
- const uint8_t *pre, int pre_stride, const int32_t *wsrc, \
- const int32_t *mask, unsigned int *sse) { \
- int sum; \
- highbd_obmc_variance(pre, pre_stride, wsrc, mask, W, H, sse, &sum); \
- return *sse - (unsigned int)(((int64_t)sum * sum) / (W * H)); \
- } \
- \
- unsigned int aom_highbd_10_obmc_variance##W##x##H##_sse4_1( \
- const uint8_t *pre, int pre_stride, const int32_t *wsrc, \
- const int32_t *mask, unsigned int *sse) { \
- int sum; \
- int64_t var; \
- highbd_10_obmc_variance(pre, pre_stride, wsrc, mask, W, H, sse, &sum); \
- var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \
- return (var >= 0) ? (uint32_t)var : 0; \
- } \
- \
- unsigned int aom_highbd_12_obmc_variance##W##x##H##_sse4_1( \
- const uint8_t *pre, int pre_stride, const int32_t *wsrc, \
- const int32_t *mask, unsigned int *sse) { \
- int sum; \
- int64_t var; \
- highbd_12_obmc_variance(pre, pre_stride, wsrc, mask, W, H, sse, &sum); \
- var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \
- return (var >= 0) ? (uint32_t)var : 0; \
- }
-
-HBD_OBMCVARWXH(128, 128)
-HBD_OBMCVARWXH(128, 64)
-HBD_OBMCVARWXH(64, 128)
-HBD_OBMCVARWXH(64, 64)
-HBD_OBMCVARWXH(64, 32)
-HBD_OBMCVARWXH(32, 64)
-HBD_OBMCVARWXH(32, 32)
-HBD_OBMCVARWXH(32, 16)
-HBD_OBMCVARWXH(16, 32)
-HBD_OBMCVARWXH(16, 16)
-HBD_OBMCVARWXH(16, 8)
-HBD_OBMCVARWXH(8, 16)
-HBD_OBMCVARWXH(8, 8)
-HBD_OBMCVARWXH(8, 4)
-HBD_OBMCVARWXH(4, 8)
-HBD_OBMCVARWXH(4, 4)
-HBD_OBMCVARWXH(4, 16)
-HBD_OBMCVARWXH(16, 4)
-HBD_OBMCVARWXH(8, 32)
-HBD_OBMCVARWXH(32, 8)
-HBD_OBMCVARWXH(16, 64)
-HBD_OBMCVARWXH(64, 16)
diff --git a/third_party/aom/aom_dsp/x86/quantize_avx_x86_64.asm b/third_party/aom/aom_dsp/x86/quantize_avx_x86_64.asm
deleted file mode 100644
index 216a0bd8f..000000000
--- a/third_party/aom/aom_dsp/x86/quantize_avx_x86_64.asm
+++ /dev/null
@@ -1,435 +0,0 @@
-;
-; Copyright (c) 2016, Alliance for Open Media. All rights reserved
-;
-; This source code is subject to the terms of the BSD 2 Clause License and
-; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-; was not distributed with this source code in the LICENSE file, you can
-; obtain it at www.aomedia.org/license/software. If the Alliance for Open
-; Media Patent License 1.0 was not distributed with this source code in the
-; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-;
-
-;
-
-%include "third_party/x86inc/x86inc.asm"
-
-SECTION .text
-
-%macro QUANTIZE_FN 2
-cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, zbin, round, quant, \
- shift, qcoeff, dqcoeff, dequant, \
- eob, scan, iscan
-
- vzeroupper
-
-%ifnidn %1, b_32x32
-
- ; Special case for ncoeff == 16, as it is frequent and we can save on
- ; not setting up a loop.
- cmp ncoeffmp, 16
- jne .generic
-
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;; Special case of ncoeff == 16
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-.single:
-
- movifnidn coeffq, coeffmp
- movifnidn zbinq, zbinmp
- mova m0, [zbinq] ; m0 = zbin
-
- ; Get DC and first 15 AC coeffs - in this special case, that is all.
- ; coeff stored as 32bit numbers but we process them as 16 bit numbers
- mova m9, [coeffq]
- packssdw m9, [coeffq+16] ; m9 = c[i]
- mova m10, [coeffq+32]
- packssdw m10, [coeffq+48] ; m10 = c[i]
-
- mov r0, eobmp ; Output pointer
- mov r1, qcoeffmp ; Output pointer
- mov r2, dqcoeffmp ; Output pointer
-
- pxor m5, m5 ; m5 = dedicated zero
-
- pcmpeqw m4, m4 ; All word lanes -1
- paddw m0, m4 ; m0 = zbin - 1
-
- pabsw m6, m9 ; m6 = abs(m9)
- pabsw m11, m10 ; m11 = abs(m10)
- pcmpgtw m7, m6, m0 ; m7 = c[i] >= zbin
- punpckhqdq m0, m0
- pcmpgtw m12, m11, m0 ; m12 = c[i] >= zbin
-
- ; Check if all coeffs are less than zbin. If yes, we just write zeros
- ; to the outputs and we are done.
- por m14, m7, m12
- ptest m14, m14
- jnz .single_nonzero
-
- mova [r1 ], ymm5
- mova [r1+32], ymm5
- mova [r2 ], ymm5
- mova [r2+32], ymm5
- mov [r0], word 0
-
- vzeroupper
- RET
-
-.single_nonzero:
-
- ; Actual quantization of size 16 block - setup pointers, rounders, etc.
- movifnidn r3, roundmp
- movifnidn r4, quantmp
- mov r6, dequantmp
- mov r5, shiftmp
- mova m1, [r3] ; m1 = round
- mova m2, [r4] ; m2 = quant
- mova m3, [r6] ; m3 = dequant
- mova m4, [r5] ; m4 = shift
-
- mov r3, iscanmp
-
- DEFINE_ARGS eob, qcoeff, dqcoeff, iscan
-
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
- paddsw m6, m1 ; m6 += round
- punpckhqdq m1, m1
- paddsw m11, m1 ; m11 += round
- pmulhw m8, m6, m2 ; m8 = m6*q>>16
- punpckhqdq m2, m2
- pmulhw m13, m11, m2 ; m13 = m11*q>>16
- paddw m8, m6 ; m8 += m6
- paddw m13, m11 ; m13 += m11
- pmulhw m8, m4 ; m8 = m8*qsh>>16
- punpckhqdq m4, m4
- pmulhw m13, m4 ; m13 = m13*qsh>>16
- psignw m8, m9 ; m8 = reinsert sign
- psignw m13, m10 ; m13 = reinsert sign
- pand m8, m7
- pand m13, m12
-
- ; Store 16bit numbers as 32bit numbers in array pointed to by qcoeff
- pcmpgtw m6, m5, m8
- punpckhwd m6, m8, m6
- pmovsxwd m11, m8
- mova [qcoeffq ], m11
- mova [qcoeffq+16], m6
- pcmpgtw m6, m5, m13
- punpckhwd m6, m13, m6
- pmovsxwd m11, m13
- mova [qcoeffq+32], m11
- mova [qcoeffq+48], m6
-
- pmullw m8, m3 ; dqc[i] = qc[i] * q
- punpckhqdq m3, m3
- pmullw m13, m3 ; dqc[i] = qc[i] * q
-
- ; Store 16bit numbers as 32bit numbers in array pointed to by qcoeff
- pcmpgtw m6, m5, m8
- punpckhwd m6, m8, m6
- pmovsxwd m11, m8
- mova [dqcoeffq ], m11
- mova [dqcoeffq+16], m6
- pcmpgtw m6, m5, m13
- punpckhwd m6, m13, m6
- pmovsxwd m11, m13
- mova [dqcoeffq+32], m11
- mova [dqcoeffq+48], m6
-
- mova m6, [iscanq] ; m6 = scan[i]
- mova m11, [iscanq+16] ; m11 = scan[i]
-
- pcmpeqw m8, m8, m5 ; m8 = c[i] == 0
- pcmpeqw m13, m13, m5 ; m13 = c[i] == 0
- psubw m6, m6, m7 ; m6 = scan[i] + 1
- psubw m11, m11, m12 ; m11 = scan[i] + 1
- pandn m8, m8, m6 ; m8 = max(eob)
- pandn m13, m13, m11 ; m13 = max(eob)
- pmaxsw m8, m8, m13
-
- ; Horizontally accumulate/max eobs and write into [eob] memory pointer
- pshufd m7, m8, 0xe
- pmaxsw m8, m7
- pshuflw m7, m8, 0xe
- pmaxsw m8, m7
- pshuflw m7, m8, 0x1
- pmaxsw m8, m7
- movq rax, m8
- mov [eobq], ax
-
- vzeroupper
- RET
-
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;; Generic case of ncoeff != 16
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-.generic:
-
-%endif ; %ifnidn %1, b_32x32
-
-DEFINE_ARGS coeff, ncoeff, zbin, round, quant, shift, \
- qcoeff, dqcoeff, dequant, eob, scan, iscan
-
- ; Actual quantization loop - setup pointers, rounders, etc.
- movifnidn coeffq, coeffmp
- movifnidn ncoeffq, ncoeffmp
- movifnidn zbinq, zbinmp
- movifnidn roundq, roundmp
- movifnidn quantq, quantmp
- movifnidn dequantq, dequantmp
- mova m0, [zbinq] ; m0 = zbin
- mova m1, [roundq] ; m1 = round
- mova m2, [quantq] ; m2 = quant
- mova m3, [dequantq] ; m3 = dequant
- pcmpeqw m4, m4 ; All lanes -1
-%ifidn %1, b_32x32
- psubw m0, m4
- psubw m1, m4
- psrlw m0, 1 ; m0 = (m0 + 1) / 2
- psrlw m1, 1 ; m1 = (m1 + 1) / 2
-%endif
- paddw m0, m4 ; m0 = m0 + 1
-
- mov r2, shiftmp
- mov r3, qcoeffmp
- mova m4, [r2] ; m4 = shift
- mov r4, dqcoeffmp
- mov r5, iscanmp
-%ifidn %1, b_32x32
- psllw m4, 1
-%endif
- pxor m5, m5 ; m5 = dedicated zero
-
- DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, eob
-
-
- lea coeffq, [ coeffq+ncoeffq*4]
- lea qcoeffq, [ qcoeffq+ncoeffq*4]
- lea dqcoeffq, [dqcoeffq+ncoeffq*4]
-
- lea iscanq, [ iscanq+ncoeffq*2]
- neg ncoeffq
-
- ; get DC and first 15 AC coeffs
- ; coeff stored as 32bit numbers & require 16bit numbers
- mova m9, [coeffq+ncoeffq*4+ 0]
- packssdw m9, [coeffq+ncoeffq*4+16]
- mova m10, [coeffq+ncoeffq*4+32]
- packssdw m10, [coeffq+ncoeffq*4+48]
-
- pabsw m6, m9 ; m6 = abs(m9)
- pabsw m11, m10 ; m11 = abs(m10)
- pcmpgtw m7, m6, m0 ; m7 = c[i] >= zbin
- punpckhqdq m0, m0
- pcmpgtw m12, m11, m0 ; m12 = c[i] >= zbin
-
- ; Check if all coeffs are less than zbin. If yes, skip forward quickly.
- por m14, m7, m12
- ptest m14, m14
- jnz .first_nonzero
-
- mova [qcoeffq+ncoeffq*4 ], ymm5
- mova [qcoeffq+ncoeffq*4+32], ymm5
- mova [dqcoeffq+ncoeffq*4 ], ymm5
- mova [dqcoeffq+ncoeffq*4+32], ymm5
- add ncoeffq, mmsize
-
- punpckhqdq m1, m1
- punpckhqdq m2, m2
- punpckhqdq m3, m3
- punpckhqdq m4, m4
- pxor m8, m8
-
- jmp .ac_only_loop
-
-.first_nonzero:
-
- paddsw m6, m1 ; m6 += round
- punpckhqdq m1, m1
- paddsw m11, m1 ; m11 += round
- pmulhw m8, m6, m2 ; m8 = m6*q>>16
- punpckhqdq m2, m2
- pmulhw m13, m11, m2 ; m13 = m11*q>>16
- paddw m8, m6 ; m8 += m6
- paddw m13, m11 ; m13 += m11
- pmulhw m8, m4 ; m8 = m8*qsh>>16
- punpckhqdq m4, m4
- pmulhw m13, m4 ; m13 = m13*qsh>>16
- psignw m8, m9 ; m8 = reinsert sign
- psignw m13, m10 ; m13 = reinsert sign
- pand m8, m7
- pand m13, m12
-
- ; store 16bit numbers as 32bit numbers in array pointed to by qcoeff
- pcmpgtw m6, m5, m8
- punpckhwd m6, m8, m6
- pmovsxwd m11, m8
- mova [qcoeffq+ncoeffq*4+ 0], m11
- mova [qcoeffq+ncoeffq*4+16], m6
- pcmpgtw m6, m5, m13
- punpckhwd m6, m13, m6
- pmovsxwd m11, m13
- mova [qcoeffq+ncoeffq*4+32], m11
- mova [qcoeffq+ncoeffq*4+48], m6
-
-%ifidn %1, b_32x32
- pabsw m8, m8
- pabsw m13, m13
-%endif
- pmullw m8, m3 ; dqc[i] = qc[i] * q
- punpckhqdq m3, m3
- pmullw m13, m3 ; dqc[i] = qc[i] * q
-%ifidn %1, b_32x32
- psrlw m8, 1
- psrlw m13, 1
- psignw m8, m9
- psignw m13, m10
-%endif
-
- ; store 16bit numbers as 32bit numbers in array pointed to by qcoeff
- pcmpgtw m6, m5, m8
- punpckhwd m6, m8, m6
- pmovsxwd m11, m8
- mova [dqcoeffq+ncoeffq*4+ 0], m11
- mova [dqcoeffq+ncoeffq*4+16], m6
- pcmpgtw m6, m5, m13
- punpckhwd m6, m13, m6
- pmovsxwd m11, m13
- mova [dqcoeffq+ncoeffq*4+32], m11
- mova [dqcoeffq+ncoeffq*4+48], m6
-
- pcmpeqw m8, m5 ; m8 = c[i] == 0
- pcmpeqw m13, m5 ; m13 = c[i] == 0
- mova m6, [iscanq+ncoeffq*2] ; m6 = scan[i]
- mova m11, [iscanq+ncoeffq*2+16] ; m11 = scan[i]
- psubw m6, m7 ; m6 = scan[i] + 1
- psubw m11, m12 ; m11 = scan[i] + 1
- pandn m8, m6 ; m8 = max(eob)
- pandn m13, m11 ; m13 = max(eob)
- pmaxsw m8, m13
- add ncoeffq, mmsize
-
-.ac_only_loop:
-
- ; pack coeff from 32bit to 16bit array
- mova m9, [coeffq+ncoeffq*4+ 0]
- packssdw m9, [coeffq+ncoeffq*4+16]
- mova m10, [coeffq+ncoeffq*4+32]
- packssdw m10, [coeffq+ncoeffq*4+48]
-
- pabsw m6, m9 ; m6 = abs(m9)
- pabsw m11, m10 ; m11 = abs(m10)
- pcmpgtw m7, m6, m0 ; m7 = c[i] >= zbin
- pcmpgtw m12, m11, m0 ; m12 = c[i] >= zbin
-
- ; Check if all coeffs are less than zbin. If yes, skip this itertion.
- ; And just write zeros as the result would be.
- por m14, m7, m12
- ptest m14, m14
- jnz .rest_nonzero
-
- mova [qcoeffq+ncoeffq*4+ 0], ymm5
- mova [qcoeffq+ncoeffq*4+32], ymm5
- mova [dqcoeffq+ncoeffq*4+ 0], ymm5
- mova [dqcoeffq+ncoeffq*4+32], ymm5
-
- add ncoeffq, mmsize
- jnz .ac_only_loop
-
- ; Horizontally accumulate/max eobs and write into [eob] memory pointer
- mov r2, eobmp
- pshufd m7, m8, 0xe
- pmaxsw m8, m7
- pshuflw m7, m8, 0xe
- pmaxsw m8, m7
- pshuflw m7, m8, 0x1
- pmaxsw m8, m7
- movq rax, m8
- mov [r2], ax
- vzeroupper
- RET
-
-.rest_nonzero:
- paddsw m6, m1 ; m6 += round
- paddsw m11, m1 ; m11 += round
- pmulhw m14, m6, m2 ; m14 = m6*q>>16
- pmulhw m13, m11, m2 ; m13 = m11*q>>16
- paddw m14, m6 ; m14 += m6
- paddw m13, m11 ; m13 += m11
- pmulhw m14, m4 ; m14 = m14*qsh>>16
- pmulhw m13, m4 ; m13 = m13*qsh>>16
- psignw m14, m9 ; m14 = reinsert sign
- psignw m13, m10 ; m13 = reinsert sign
- pand m14, m7
- pand m13, m12
-
- ; store 16bit numbers as 32bit numbers in array pointed to by qcoeff
- pcmpgtw m6, m5, m14
- punpckhwd m6, m14, m6
- pmovsxwd m11, m14
- mova [qcoeffq+ncoeffq*4+ 0], m11
- mova [qcoeffq+ncoeffq*4+16], m6
- pcmpgtw m6, m5, m13
- punpckhwd m6, m13, m6
- pmovsxwd m11, m13
- mova [qcoeffq+ncoeffq*4+32], m11
- mova [qcoeffq+ncoeffq*4+48], m6
-
-%ifidn %1, b_32x32
- pabsw m14, m14
- pabsw m13, m13
-%endif
- pmullw m14, m3 ; dqc[i] = qc[i] * q
- pmullw m13, m3 ; dqc[i] = qc[i] * q
-%ifidn %1, b_32x32
- psrlw m14, 1
- psrlw m13, 1
- psignw m14, m9
- psignw m13, m10
-%endif
-
- ; store 16bit numbers as 32bit numbers in array pointed to by qcoeff
- pcmpgtw m6, m5, m14
- punpckhwd m6, m14, m6
- pmovsxwd m11, m14
- mova [dqcoeffq+ncoeffq*4+ 0], m11
- mova [dqcoeffq+ncoeffq*4+16], m6
- pcmpgtw m6, m5, m13
- punpckhwd m6, m13, m6
- pmovsxwd m11, m13
- mova [dqcoeffq+ncoeffq*4+32], m11
- mova [dqcoeffq+ncoeffq*4+48], m6
-
- pcmpeqw m14, m5 ; m14 = c[i] == 0
- pcmpeqw m13, m5 ; m13 = c[i] == 0
- mova m6, [iscanq+ncoeffq*2+ 0] ; m6 = scan[i]
- mova m11, [iscanq+ncoeffq*2+16] ; m11 = scan[i]
- psubw m6, m7 ; m6 = scan[i] + 1
- psubw m11, m12 ; m11 = scan[i] + 1
- pandn m14, m6 ; m14 = max(eob)
- pandn m13, m11 ; m13 = max(eob)
- pmaxsw m8, m14
- pmaxsw m8, m13
- add ncoeffq, mmsize
- jnz .ac_only_loop
-
- ; Horizontally accumulate/max eobs and write into [eob] memory pointer
- mov r2, eobmp
- pshufd m7, m8, 0xe
- pmaxsw m8, m7
- pshuflw m7, m8, 0xe
- pmaxsw m8, m7
- pshuflw m7, m8, 0x1
- pmaxsw m8, m7
- movq rax, m8
- mov [r2], ax
- vzeroupper
- RET
-%endmacro
-
-INIT_XMM avx
-QUANTIZE_FN b, 9
-QUANTIZE_FN b_32x32, 9
diff --git a/third_party/aom/aom_dsp/x86/quantize_sse2.c b/third_party/aom/aom_dsp/x86/quantize_sse2.c
deleted file mode 100644
index d3de6e24d..000000000
--- a/third_party/aom/aom_dsp/x86/quantize_sse2.c
+++ /dev/null
@@ -1,147 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <emmintrin.h>
-#include <xmmintrin.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom/aom_integer.h"
-#include "aom_dsp/x86/quantize_x86.h"
-
-static INLINE __m128i load_coefficients(const tran_low_t *coeff_ptr) {
- assert(sizeof(tran_low_t) == 4);
-
- return _mm_setr_epi16((int16_t)coeff_ptr[0], (int16_t)coeff_ptr[1],
- (int16_t)coeff_ptr[2], (int16_t)coeff_ptr[3],
- (int16_t)coeff_ptr[4], (int16_t)coeff_ptr[5],
- (int16_t)coeff_ptr[6], (int16_t)coeff_ptr[7]);
-}
-
-static INLINE void store_coefficients(__m128i coeff_vals,
- tran_low_t *coeff_ptr) {
- assert(sizeof(tran_low_t) == 4);
-
- __m128i one = _mm_set1_epi16(1);
- __m128i coeff_vals_hi = _mm_mulhi_epi16(coeff_vals, one);
- __m128i coeff_vals_lo = _mm_mullo_epi16(coeff_vals, one);
- __m128i coeff_vals_1 = _mm_unpacklo_epi16(coeff_vals_lo, coeff_vals_hi);
- __m128i coeff_vals_2 = _mm_unpackhi_epi16(coeff_vals_lo, coeff_vals_hi);
- _mm_store_si128((__m128i *)(coeff_ptr), coeff_vals_1);
- _mm_store_si128((__m128i *)(coeff_ptr + 4), coeff_vals_2);
-}
-
-void aom_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
- const int16_t *zbin_ptr, const int16_t *round_ptr,
- const int16_t *quant_ptr,
- const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
- tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
- uint16_t *eob_ptr, const int16_t *scan_ptr,
- const int16_t *iscan_ptr) {
- const __m128i zero = _mm_setzero_si128();
- int index = 16;
-
- __m128i zbin, round, quant, dequant, shift;
- __m128i coeff0, coeff1, coeff0_sign, coeff1_sign;
- __m128i qcoeff0, qcoeff1;
- __m128i cmp_mask0, cmp_mask1;
- __m128i eob, eob0;
-
- (void)scan_ptr;
-
- // Setup global values.
- load_b_values(zbin_ptr, &zbin, round_ptr, &round, quant_ptr, &quant,
- dequant_ptr, &dequant, quant_shift_ptr, &shift);
-
- // Do DC and first 15 AC.
- coeff0 = load_coefficients(coeff_ptr);
- coeff1 = load_coefficients(coeff_ptr + 8);
-
- // Poor man's abs().
- coeff0_sign = _mm_srai_epi16(coeff0, 15);
- coeff1_sign = _mm_srai_epi16(coeff1, 15);
- qcoeff0 = invert_sign_sse2(coeff0, coeff0_sign);
- qcoeff1 = invert_sign_sse2(coeff1, coeff1_sign);
-
- cmp_mask0 = _mm_cmpgt_epi16(qcoeff0, zbin);
- zbin = _mm_unpackhi_epi64(zbin, zbin); // Switch DC to AC
- cmp_mask1 = _mm_cmpgt_epi16(qcoeff1, zbin);
-
- calculate_qcoeff(&qcoeff0, round, quant, shift);
-
- round = _mm_unpackhi_epi64(round, round);
- quant = _mm_unpackhi_epi64(quant, quant);
- shift = _mm_unpackhi_epi64(shift, shift);
-
- calculate_qcoeff(&qcoeff1, round, quant, shift);
-
- // Reinsert signs
- qcoeff0 = invert_sign_sse2(qcoeff0, coeff0_sign);
- qcoeff1 = invert_sign_sse2(qcoeff1, coeff1_sign);
-
- // Mask out zbin threshold coeffs
- qcoeff0 = _mm_and_si128(qcoeff0, cmp_mask0);
- qcoeff1 = _mm_and_si128(qcoeff1, cmp_mask1);
-
- store_coefficients(qcoeff0, qcoeff_ptr);
- store_coefficients(qcoeff1, qcoeff_ptr + 8);
-
- coeff0 = calculate_dqcoeff(qcoeff0, dequant);
- dequant = _mm_unpackhi_epi64(dequant, dequant);
- coeff1 = calculate_dqcoeff(qcoeff1, dequant);
-
- store_coefficients(coeff0, dqcoeff_ptr);
- store_coefficients(coeff1, dqcoeff_ptr + 8);
-
- eob =
- scan_for_eob(&coeff0, &coeff1, cmp_mask0, cmp_mask1, iscan_ptr, 0, zero);
-
- // AC only loop.
- while (index < n_coeffs) {
- coeff0 = load_coefficients(coeff_ptr + index);
- coeff1 = load_coefficients(coeff_ptr + index + 8);
-
- coeff0_sign = _mm_srai_epi16(coeff0, 15);
- coeff1_sign = _mm_srai_epi16(coeff1, 15);
- qcoeff0 = invert_sign_sse2(coeff0, coeff0_sign);
- qcoeff1 = invert_sign_sse2(coeff1, coeff1_sign);
-
- cmp_mask0 = _mm_cmpgt_epi16(qcoeff0, zbin);
- cmp_mask1 = _mm_cmpgt_epi16(qcoeff1, zbin);
-
- calculate_qcoeff(&qcoeff0, round, quant, shift);
- calculate_qcoeff(&qcoeff1, round, quant, shift);
-
- qcoeff0 = invert_sign_sse2(qcoeff0, coeff0_sign);
- qcoeff1 = invert_sign_sse2(qcoeff1, coeff1_sign);
-
- qcoeff0 = _mm_and_si128(qcoeff0, cmp_mask0);
- qcoeff1 = _mm_and_si128(qcoeff1, cmp_mask1);
-
- store_coefficients(qcoeff0, qcoeff_ptr + index);
- store_coefficients(qcoeff1, qcoeff_ptr + index + 8);
-
- coeff0 = calculate_dqcoeff(qcoeff0, dequant);
- coeff1 = calculate_dqcoeff(qcoeff1, dequant);
-
- store_coefficients(coeff0, dqcoeff_ptr + index);
- store_coefficients(coeff1, dqcoeff_ptr + index + 8);
-
- eob0 = scan_for_eob(&coeff0, &coeff1, cmp_mask0, cmp_mask1, iscan_ptr,
- index, zero);
- eob = _mm_max_epi16(eob, eob0);
-
- index += 16;
- }
-
- *eob_ptr = accumulate_eob(eob);
-}
diff --git a/third_party/aom/aom_dsp/x86/quantize_ssse3_x86_64.asm b/third_party/aom/aom_dsp/x86/quantize_ssse3_x86_64.asm
deleted file mode 100644
index 39d4ca674..000000000
--- a/third_party/aom/aom_dsp/x86/quantize_ssse3_x86_64.asm
+++ /dev/null
@@ -1,272 +0,0 @@
-;
-; Copyright (c) 2016, Alliance for Open Media. All rights reserved
-;
-; This source code is subject to the terms of the BSD 2 Clause License and
-; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-; was not distributed with this source code in the LICENSE file, you can
-; obtain it at www.aomedia.org/license/software. If the Alliance for Open
-; Media Patent License 1.0 was not distributed with this source code in the
-; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-;
-
-;
-
-%include "third_party/x86inc/x86inc.asm"
-
-SECTION_RODATA
-pw_1: times 8 dw 1
-
-SECTION .text
-
-%macro QUANTIZE_FN 2
-cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, zbin, round, quant, \
- shift, qcoeff, dqcoeff, dequant, \
- eob, scan, iscan
-
- ; actual quantize loop - setup pointers, rounders, etc.
- movifnidn coeffq, coeffmp
- movifnidn ncoeffq, ncoeffmp
- movifnidn zbinq, zbinmp
- movifnidn roundq, roundmp
- movifnidn quantq, quantmp
- movifnidn dequantq, dequantmp
- mova m0, [zbinq] ; m0 = zbin
- mova m1, [roundq] ; m1 = round
- mova m2, [quantq] ; m2 = quant
-%ifidn %1, b_32x32
- pcmpeqw m5, m5
- psrlw m5, 15
- paddw m0, m5
- paddw m1, m5
- psrlw m0, 1 ; m0 = (m0 + 1) / 2
- psrlw m1, 1 ; m1 = (m1 + 1) / 2
-%endif
- mova m3, [dequantq] ; m3 = dequant
- mov r2, shiftmp
- psubw m0, [GLOBAL(pw_1)]
- mova m4, [r2] ; m4 = shift
- mov r3, qcoeffmp
- mov r4, dqcoeffmp
- mov r5, iscanmp
-%ifidn %1, b_32x32
- psllw m4, 1
-%endif
- pxor m5, m5 ; m5 = dedicated zero
- DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, eob
- lea coeffq, [ coeffq+ncoeffq*4]
- lea qcoeffq, [ qcoeffq+ncoeffq*4]
- lea dqcoeffq, [dqcoeffq+ncoeffq*4]
- lea iscanq, [ iscanq+ncoeffq*2]
- neg ncoeffq
-
- ; get DC and first 15 AC coeffs
- ; coeff stored as 32bit numbers & require 16bit numbers
- mova m9, [ coeffq+ncoeffq*4+ 0]
- packssdw m9, [ coeffq+ncoeffq*4+16]
- mova m10, [ coeffq+ncoeffq*4+32]
- packssdw m10, [ coeffq+ncoeffq*4+48]
- pabsw m6, m9 ; m6 = abs(m9)
- pabsw m11, m10 ; m11 = abs(m10)
- pcmpgtw m7, m6, m0 ; m7 = c[i] >= zbin
- punpckhqdq m0, m0
- pcmpgtw m12, m11, m0 ; m12 = c[i] >= zbin
- paddsw m6, m1 ; m6 += round
- punpckhqdq m1, m1
- paddsw m11, m1 ; m11 += round
- pmulhw m8, m6, m2 ; m8 = m6*q>>16
- punpckhqdq m2, m2
- pmulhw m13, m11, m2 ; m13 = m11*q>>16
- paddw m8, m6 ; m8 += m6
- paddw m13, m11 ; m13 += m11
- pmulhw m8, m4 ; m8 = m8*qsh>>16
- punpckhqdq m4, m4
- pmulhw m13, m4 ; m13 = m13*qsh>>16
- psignw m8, m9 ; m8 = reinsert sign
- psignw m13, m10 ; m13 = reinsert sign
- pand m8, m7
- pand m13, m12
-
- ; store 16bit numbers as 32bit numbers in array pointed to by qcoeff
- mova m11, m8
- mova m6, m8
- pcmpgtw m5, m8
- punpcklwd m11, m5
- punpckhwd m6, m5
- mova [qcoeffq+ncoeffq*4+ 0], m11
- mova [qcoeffq+ncoeffq*4+16], m6
- pxor m5, m5
- mova m11, m13
- mova m6, m13
- pcmpgtw m5, m13
- punpcklwd m11, m5
- punpckhwd m6, m5
- mova [qcoeffq+ncoeffq*4+32], m11
- mova [qcoeffq+ncoeffq*4+48], m6
- pxor m5, m5 ; reset m5 to zero register
-
-%ifidn %1, b_32x32
- pabsw m8, m8
- pabsw m13, m13
-%endif
- pmullw m8, m3 ; dqc[i] = qc[i] * q
- punpckhqdq m3, m3
- pmullw m13, m3 ; dqc[i] = qc[i] * q
-%ifidn %1, b_32x32
- psrlw m8, 1
- psrlw m13, 1
- psignw m8, m9
- psignw m13, m10
-%endif
- ; store 16bit numbers as 32bit numbers in array pointed to by qcoeff
- mova m11, m8
- mova m6, m8
- pcmpgtw m5, m8
- punpcklwd m11, m5
- punpckhwd m6, m5
- mova [dqcoeffq+ncoeffq*4+ 0], m11
- mova [dqcoeffq+ncoeffq*4+16], m6
- pxor m5, m5
- mova m11, m13
- mova m6, m13
- pcmpgtw m5, m13
- punpcklwd m11, m5
- punpckhwd m6, m5
- mova [dqcoeffq+ncoeffq*4+32], m11
- mova [dqcoeffq+ncoeffq*4+48], m6
- pxor m5, m5 ; reset m5 to zero register
- pcmpeqw m8, m5 ; m8 = c[i] == 0
- pcmpeqw m13, m5 ; m13 = c[i] == 0
- mova m6, [ iscanq+ncoeffq*2+ 0] ; m6 = scan[i]
- mova m11, [ iscanq+ncoeffq*2+16] ; m11 = scan[i]
- psubw m6, m7 ; m6 = scan[i] + 1
- psubw m11, m12 ; m11 = scan[i] + 1
- pandn m8, m6 ; m8 = max(eob)
- pandn m13, m11 ; m13 = max(eob)
- pmaxsw m8, m13
- add ncoeffq, mmsize
- jz .accumulate_eob
-
-.ac_only_loop:
- ; pack coeff from 32bit to 16bit array
- mova m9, [ coeffq+ncoeffq*4+ 0]
- packssdw m9, [ coeffq+ncoeffq*4+16]
- mova m10, [ coeffq+ncoeffq*4+32]
- packssdw m10, [ coeffq+ncoeffq*4+48]
-
- pabsw m6, m9 ; m6 = abs(m9)
- pabsw m11, m10 ; m11 = abs(m10)
- pcmpgtw m7, m6, m0 ; m7 = c[i] >= zbin
- pcmpgtw m12, m11, m0 ; m12 = c[i] >= zbin
-%ifidn %1, b_32x32
- pmovmskb r6d, m7
- pmovmskb r2d, m12
- or r6, r2
- jz .skip_iter
-%endif
- paddsw m6, m1 ; m6 += round
- paddsw m11, m1 ; m11 += round
- pmulhw m14, m6, m2 ; m14 = m6*q>>16
- pmulhw m13, m11, m2 ; m13 = m11*q>>16
- paddw m14, m6 ; m14 += m6
- paddw m13, m11 ; m13 += m11
- pmulhw m14, m4 ; m14 = m14*qsh>>16
- pmulhw m13, m4 ; m13 = m13*qsh>>16
- psignw m14, m9 ; m14 = reinsert sign
- psignw m13, m10 ; m13 = reinsert sign
- pand m14, m7
- pand m13, m12
- ; store 16bit numbers as 32bit numbers in array pointed to by qcoeff
- pxor m11, m11
- mova m11, m14
- mova m6, m14
- pcmpgtw m5, m14
- punpcklwd m11, m5
- punpckhwd m6, m5
- mova [qcoeffq+ncoeffq*4+ 0], m11
- mova [qcoeffq+ncoeffq*4+16], m6
- pxor m5, m5
- mova m11, m13
- mova m6, m13
- pcmpgtw m5, m13
- punpcklwd m11, m5
- punpckhwd m6, m5
- mova [qcoeffq+ncoeffq*4+32], m11
- mova [qcoeffq+ncoeffq*4+48], m6
- pxor m5, m5 ; reset m5 to zero register
-
-%ifidn %1, b_32x32
- pabsw m14, m14
- pabsw m13, m13
-%endif
- pmullw m14, m3 ; dqc[i] = qc[i] * q
- pmullw m13, m3 ; dqc[i] = qc[i] * q
-%ifidn %1, b_32x32
- psrlw m14, 1
- psrlw m13, 1
- psignw m14, m9
- psignw m13, m10
-%endif
-
- ; store 16bit numbers as 32bit numbers in array pointed to by qcoeff
- mova m11, m14
- mova m6, m14
- pcmpgtw m5, m14
- punpcklwd m11, m5
- punpckhwd m6, m5
- mova [dqcoeffq+ncoeffq*4+ 0], m11
- mova [dqcoeffq+ncoeffq*4+16], m6
- pxor m5, m5
- mova m11, m13
- mova m6, m13
- pcmpgtw m5, m13
- punpcklwd m11, m5
- punpckhwd m6, m5
- mova [dqcoeffq+ncoeffq*4+32], m11
- mova [dqcoeffq+ncoeffq*4+48], m6
- pxor m5, m5
-
- pcmpeqw m14, m5 ; m14 = c[i] == 0
- pcmpeqw m13, m5 ; m13 = c[i] == 0
- mova m6, [ iscanq+ncoeffq*2+ 0] ; m6 = scan[i]
- mova m11, [ iscanq+ncoeffq*2+16] ; m11 = scan[i]
- psubw m6, m7 ; m6 = scan[i] + 1
- psubw m11, m12 ; m11 = scan[i] + 1
- pandn m14, m6 ; m14 = max(eob)
- pandn m13, m11 ; m13 = max(eob)
- pmaxsw m8, m14
- pmaxsw m8, m13
- add ncoeffq, mmsize
- jl .ac_only_loop
-
-%ifidn %1, b_32x32
- jmp .accumulate_eob
-.skip_iter:
- mova [qcoeffq+ncoeffq*4+ 0], m5
- mova [qcoeffq+ncoeffq*4+16], m5
- mova [qcoeffq+ncoeffq*4+32], m5
- mova [qcoeffq+ncoeffq*4+48], m5
- mova [dqcoeffq+ncoeffq*4+ 0], m5
- mova [dqcoeffq+ncoeffq*4+16], m5
- mova [dqcoeffq+ncoeffq*4+32], m5
- mova [dqcoeffq+ncoeffq*4+48], m5
- add ncoeffq, mmsize
- jl .ac_only_loop
-%endif
-
-.accumulate_eob:
- ; horizontally accumulate/max eobs and write into [eob] memory pointer
- mov r2, eobmp
- pshufd m7, m8, 0xe
- pmaxsw m8, m7
- pshuflw m7, m8, 0xe
- pmaxsw m8, m7
- pshuflw m7, m8, 0x1
- pmaxsw m8, m7
- pextrw r6, m8, 0
- mov [r2], r6
- RET
-%endmacro
-
-INIT_XMM ssse3
-QUANTIZE_FN b, 9
-QUANTIZE_FN b_32x32, 9
diff --git a/third_party/aom/aom_dsp/x86/quantize_x86.h b/third_party/aom/aom_dsp/x86/quantize_x86.h
deleted file mode 100644
index 4eed7dd29..000000000
--- a/third_party/aom/aom_dsp/x86/quantize_x86.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <emmintrin.h>
-
-#include "aom/aom_integer.h"
-
-static INLINE void load_b_values(const int16_t *zbin_ptr, __m128i *zbin,
- const int16_t *round_ptr, __m128i *round,
- const int16_t *quant_ptr, __m128i *quant,
- const int16_t *dequant_ptr, __m128i *dequant,
- const int16_t *shift_ptr, __m128i *shift) {
- *zbin = _mm_load_si128((const __m128i *)zbin_ptr);
- *round = _mm_load_si128((const __m128i *)round_ptr);
- *quant = _mm_load_si128((const __m128i *)quant_ptr);
- *zbin = _mm_sub_epi16(*zbin, _mm_set1_epi16(1));
- *dequant = _mm_load_si128((const __m128i *)dequant_ptr);
- *shift = _mm_load_si128((const __m128i *)shift_ptr);
-}
-
-// With ssse3 and later abs() and sign() are preferred.
-static INLINE __m128i invert_sign_sse2(__m128i a, __m128i sign) {
- a = _mm_xor_si128(a, sign);
- return _mm_sub_epi16(a, sign);
-}
-
-static INLINE void calculate_qcoeff(__m128i *coeff, const __m128i round,
- const __m128i quant, const __m128i shift) {
- __m128i tmp, qcoeff;
- qcoeff = _mm_adds_epi16(*coeff, round);
- tmp = _mm_mulhi_epi16(qcoeff, quant);
- qcoeff = _mm_add_epi16(tmp, qcoeff);
- *coeff = _mm_mulhi_epi16(qcoeff, shift);
-}
-
-static INLINE __m128i calculate_dqcoeff(__m128i qcoeff, __m128i dequant) {
- return _mm_mullo_epi16(qcoeff, dequant);
-}
-
-// Scan 16 values for eob reference in scan_ptr. Use masks (-1) from comparing
-// to zbin to add 1 to the index in 'scan'.
-static INLINE __m128i scan_for_eob(__m128i *coeff0, __m128i *coeff1,
- const __m128i zbin_mask0,
- const __m128i zbin_mask1,
- const int16_t *scan_ptr, const int index,
- const __m128i zero) {
- const __m128i zero_coeff0 = _mm_cmpeq_epi16(*coeff0, zero);
- const __m128i zero_coeff1 = _mm_cmpeq_epi16(*coeff1, zero);
- __m128i scan0 = _mm_load_si128((const __m128i *)(scan_ptr + index));
- __m128i scan1 = _mm_load_si128((const __m128i *)(scan_ptr + index + 8));
- __m128i eob0, eob1;
- // Add one to convert from indices to counts
- scan0 = _mm_sub_epi16(scan0, zbin_mask0);
- scan1 = _mm_sub_epi16(scan1, zbin_mask1);
- eob0 = _mm_andnot_si128(zero_coeff0, scan0);
- eob1 = _mm_andnot_si128(zero_coeff1, scan1);
- return _mm_max_epi16(eob0, eob1);
-}
-
-static INLINE int16_t accumulate_eob(__m128i eob) {
- __m128i eob_shuffled;
- eob_shuffled = _mm_shuffle_epi32(eob, 0xe);
- eob = _mm_max_epi16(eob, eob_shuffled);
- eob_shuffled = _mm_shufflelo_epi16(eob, 0xe);
- eob = _mm_max_epi16(eob, eob_shuffled);
- eob_shuffled = _mm_shufflelo_epi16(eob, 0x1);
- eob = _mm_max_epi16(eob, eob_shuffled);
- return _mm_extract_epi16(eob, 1);
-}
diff --git a/third_party/aom/aom_dsp/x86/sad4d_avx2.c b/third_party/aom/aom_dsp/x86/sad4d_avx2.c
deleted file mode 100644
index f662b62b1..000000000
--- a/third_party/aom/aom_dsp/x86/sad4d_avx2.c
+++ /dev/null
@@ -1,218 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#include <immintrin.h> // AVX2
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom/aom_integer.h"
-
-void aom_sad32x32x4d_avx2(const uint8_t *src, int src_stride,
- const uint8_t *const ref[4], int ref_stride,
- uint32_t res[4]) {
- __m256i src_reg, ref0_reg, ref1_reg, ref2_reg, ref3_reg;
- __m256i sum_ref0, sum_ref1, sum_ref2, sum_ref3;
- __m256i sum_mlow, sum_mhigh;
- int i;
- const uint8_t *ref0, *ref1, *ref2, *ref3;
-
- ref0 = ref[0];
- ref1 = ref[1];
- ref2 = ref[2];
- ref3 = ref[3];
- sum_ref0 = _mm256_set1_epi16(0);
- sum_ref1 = _mm256_set1_epi16(0);
- sum_ref2 = _mm256_set1_epi16(0);
- sum_ref3 = _mm256_set1_epi16(0);
- for (i = 0; i < 32; i++) {
- // load src and all refs
- src_reg = _mm256_loadu_si256((const __m256i *)src);
- ref0_reg = _mm256_loadu_si256((const __m256i *)ref0);
- ref1_reg = _mm256_loadu_si256((const __m256i *)ref1);
- ref2_reg = _mm256_loadu_si256((const __m256i *)ref2);
- ref3_reg = _mm256_loadu_si256((const __m256i *)ref3);
- // sum of the absolute differences between every ref-i to src
- ref0_reg = _mm256_sad_epu8(ref0_reg, src_reg);
- ref1_reg = _mm256_sad_epu8(ref1_reg, src_reg);
- ref2_reg = _mm256_sad_epu8(ref2_reg, src_reg);
- ref3_reg = _mm256_sad_epu8(ref3_reg, src_reg);
- // sum every ref-i
- sum_ref0 = _mm256_add_epi32(sum_ref0, ref0_reg);
- sum_ref1 = _mm256_add_epi32(sum_ref1, ref1_reg);
- sum_ref2 = _mm256_add_epi32(sum_ref2, ref2_reg);
- sum_ref3 = _mm256_add_epi32(sum_ref3, ref3_reg);
-
- src += src_stride;
- ref0 += ref_stride;
- ref1 += ref_stride;
- ref2 += ref_stride;
- ref3 += ref_stride;
- }
- {
- __m128i sum;
- // in sum_ref-i the result is saved in the first 4 bytes
- // the other 4 bytes are zeroed.
- // sum_ref1 and sum_ref3 are shifted left by 4 bytes
- sum_ref1 = _mm256_slli_si256(sum_ref1, 4);
- sum_ref3 = _mm256_slli_si256(sum_ref3, 4);
-
- // merge sum_ref0 and sum_ref1 also sum_ref2 and sum_ref3
- sum_ref0 = _mm256_or_si256(sum_ref0, sum_ref1);
- sum_ref2 = _mm256_or_si256(sum_ref2, sum_ref3);
-
- // merge every 64 bit from each sum_ref-i
- sum_mlow = _mm256_unpacklo_epi64(sum_ref0, sum_ref2);
- sum_mhigh = _mm256_unpackhi_epi64(sum_ref0, sum_ref2);
-
- // add the low 64 bit to the high 64 bit
- sum_mlow = _mm256_add_epi32(sum_mlow, sum_mhigh);
-
- // add the low 128 bit to the high 128 bit
- sum = _mm_add_epi32(_mm256_castsi256_si128(sum_mlow),
- _mm256_extractf128_si256(sum_mlow, 1));
-
- _mm_storeu_si128((__m128i *)(res), sum);
- }
- _mm256_zeroupper();
-}
-
-void aom_sad64x64x4d_avx2(const uint8_t *src, int src_stride,
- const uint8_t *const ref[4], int ref_stride,
- uint32_t res[4]) {
- __m256i src_reg, srcnext_reg, ref0_reg, ref0next_reg;
- __m256i ref1_reg, ref1next_reg, ref2_reg, ref2next_reg;
- __m256i ref3_reg, ref3next_reg;
- __m256i sum_ref0, sum_ref1, sum_ref2, sum_ref3;
- __m256i sum_mlow, sum_mhigh;
- int i;
- const uint8_t *ref0, *ref1, *ref2, *ref3;
-
- ref0 = ref[0];
- ref1 = ref[1];
- ref2 = ref[2];
- ref3 = ref[3];
- sum_ref0 = _mm256_set1_epi16(0);
- sum_ref1 = _mm256_set1_epi16(0);
- sum_ref2 = _mm256_set1_epi16(0);
- sum_ref3 = _mm256_set1_epi16(0);
- for (i = 0; i < 64; i++) {
- // load 64 bytes from src and all refs
- src_reg = _mm256_loadu_si256((const __m256i *)src);
- srcnext_reg = _mm256_loadu_si256((const __m256i *)(src + 32));
- ref0_reg = _mm256_loadu_si256((const __m256i *)ref0);
- ref0next_reg = _mm256_loadu_si256((const __m256i *)(ref0 + 32));
- ref1_reg = _mm256_loadu_si256((const __m256i *)ref1);
- ref1next_reg = _mm256_loadu_si256((const __m256i *)(ref1 + 32));
- ref2_reg = _mm256_loadu_si256((const __m256i *)ref2);
- ref2next_reg = _mm256_loadu_si256((const __m256i *)(ref2 + 32));
- ref3_reg = _mm256_loadu_si256((const __m256i *)ref3);
- ref3next_reg = _mm256_loadu_si256((const __m256i *)(ref3 + 32));
- // sum of the absolute differences between every ref-i to src
- ref0_reg = _mm256_sad_epu8(ref0_reg, src_reg);
- ref1_reg = _mm256_sad_epu8(ref1_reg, src_reg);
- ref2_reg = _mm256_sad_epu8(ref2_reg, src_reg);
- ref3_reg = _mm256_sad_epu8(ref3_reg, src_reg);
- ref0next_reg = _mm256_sad_epu8(ref0next_reg, srcnext_reg);
- ref1next_reg = _mm256_sad_epu8(ref1next_reg, srcnext_reg);
- ref2next_reg = _mm256_sad_epu8(ref2next_reg, srcnext_reg);
- ref3next_reg = _mm256_sad_epu8(ref3next_reg, srcnext_reg);
-
- // sum every ref-i
- sum_ref0 = _mm256_add_epi32(sum_ref0, ref0_reg);
- sum_ref1 = _mm256_add_epi32(sum_ref1, ref1_reg);
- sum_ref2 = _mm256_add_epi32(sum_ref2, ref2_reg);
- sum_ref3 = _mm256_add_epi32(sum_ref3, ref3_reg);
- sum_ref0 = _mm256_add_epi32(sum_ref0, ref0next_reg);
- sum_ref1 = _mm256_add_epi32(sum_ref1, ref1next_reg);
- sum_ref2 = _mm256_add_epi32(sum_ref2, ref2next_reg);
- sum_ref3 = _mm256_add_epi32(sum_ref3, ref3next_reg);
- src += src_stride;
- ref0 += ref_stride;
- ref1 += ref_stride;
- ref2 += ref_stride;
- ref3 += ref_stride;
- }
- {
- __m128i sum;
-
- // in sum_ref-i the result is saved in the first 4 bytes
- // the other 4 bytes are zeroed.
- // sum_ref1 and sum_ref3 are shifted left by 4 bytes
- sum_ref1 = _mm256_slli_si256(sum_ref1, 4);
- sum_ref3 = _mm256_slli_si256(sum_ref3, 4);
-
- // merge sum_ref0 and sum_ref1 also sum_ref2 and sum_ref3
- sum_ref0 = _mm256_or_si256(sum_ref0, sum_ref1);
- sum_ref2 = _mm256_or_si256(sum_ref2, sum_ref3);
-
- // merge every 64 bit from each sum_ref-i
- sum_mlow = _mm256_unpacklo_epi64(sum_ref0, sum_ref2);
- sum_mhigh = _mm256_unpackhi_epi64(sum_ref0, sum_ref2);
-
- // add the low 64 bit to the high 64 bit
- sum_mlow = _mm256_add_epi32(sum_mlow, sum_mhigh);
-
- // add the low 128 bit to the high 128 bit
- sum = _mm_add_epi32(_mm256_castsi256_si128(sum_mlow),
- _mm256_extractf128_si256(sum_mlow, 1));
-
- _mm_storeu_si128((__m128i *)(res), sum);
- }
- _mm256_zeroupper();
-}
-
-void aom_sad32x64x4d_avx2(const uint8_t *src, int src_stride,
- const uint8_t *const ref[4], int ref_stride,
- uint32_t res[4]) {
- const uint8_t *rf[4];
- uint32_t sum0[4];
- uint32_t sum1[4];
-
- rf[0] = ref[0];
- rf[1] = ref[1];
- rf[2] = ref[2];
- rf[3] = ref[3];
- aom_sad32x32x4d_avx2(src, src_stride, rf, ref_stride, sum0);
- src += src_stride << 5;
- rf[0] += ref_stride << 5;
- rf[1] += ref_stride << 5;
- rf[2] += ref_stride << 5;
- rf[3] += ref_stride << 5;
- aom_sad32x32x4d_avx2(src, src_stride, rf, ref_stride, sum1);
- res[0] = sum0[0] + sum1[0];
- res[1] = sum0[1] + sum1[1];
- res[2] = sum0[2] + sum1[2];
- res[3] = sum0[3] + sum1[3];
-}
-
-void aom_sad64x32x4d_avx2(const uint8_t *src, int src_stride,
- const uint8_t *const ref[4], int ref_stride,
- uint32_t res[4]) {
- const uint8_t *rf[4];
- uint32_t sum0[4];
- uint32_t sum1[4];
- unsigned int half_width = 32;
-
- rf[0] = ref[0];
- rf[1] = ref[1];
- rf[2] = ref[2];
- rf[3] = ref[3];
- aom_sad32x32x4d_avx2(src, src_stride, rf, ref_stride, sum0);
- src += half_width;
- rf[0] += half_width;
- rf[1] += half_width;
- rf[2] += half_width;
- rf[3] += half_width;
- aom_sad32x32x4d_avx2(src, src_stride, rf, ref_stride, sum1);
- res[0] = sum0[0] + sum1[0];
- res[1] = sum0[1] + sum1[1];
- res[2] = sum0[2] + sum1[2];
- res[3] = sum0[3] + sum1[3];
-}
diff --git a/third_party/aom/aom_dsp/x86/sad4d_sse2.asm b/third_party/aom/aom_dsp/x86/sad4d_sse2.asm
deleted file mode 100644
index 55a856985..000000000
--- a/third_party/aom/aom_dsp/x86/sad4d_sse2.asm
+++ /dev/null
@@ -1,257 +0,0 @@
-;
-; Copyright (c) 2016, Alliance for Open Media. All rights reserved
-;
-; This source code is subject to the terms of the BSD 2 Clause License and
-; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-; was not distributed with this source code in the LICENSE file, you can
-; obtain it at www.aomedia.org/license/software. If the Alliance for Open
-; Media Patent License 1.0 was not distributed with this source code in the
-; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-;
-
-;
-
-%include "third_party/x86inc/x86inc.asm"
-
-SECTION .text
-
-; PROCESS_4x2x4 first, off_{first,second}_{src,ref}, advance_at_end
-%macro PROCESS_4x2x4 5-6 0
- movd m0, [srcq +%2]
-%if %1 == 1
- movd m6, [ref1q+%3]
- movd m4, [ref2q+%3]
- movd m7, [ref3q+%3]
- movd m5, [ref4q+%3]
- movd m1, [srcq +%4]
- movd m2, [ref1q+%5]
- punpckldq m0, m1
- punpckldq m6, m2
- movd m1, [ref2q+%5]
- movd m2, [ref3q+%5]
- movd m3, [ref4q+%5]
- punpckldq m4, m1
- punpckldq m7, m2
- punpckldq m5, m3
- movlhps m0, m0
- movlhps m6, m4
- movlhps m7, m5
- psadbw m6, m0
- psadbw m7, m0
-%else
- movd m1, [ref1q+%3]
- movd m5, [ref1q+%5]
- movd m2, [ref2q+%3]
- movd m4, [ref2q+%5]
- punpckldq m1, m5
- punpckldq m2, m4
- movd m3, [ref3q+%3]
- movd m5, [ref3q+%5]
- punpckldq m3, m5
- movd m4, [ref4q+%3]
- movd m5, [ref4q+%5]
- punpckldq m4, m5
- movd m5, [srcq +%4]
- punpckldq m0, m5
- movlhps m0, m0
- movlhps m1, m2
- movlhps m3, m4
- psadbw m1, m0
- psadbw m3, m0
- paddd m6, m1
- paddd m7, m3
-%endif
-%if %6 == 1
- lea srcq, [srcq +src_strideq*2]
- lea ref1q, [ref1q+ref_strideq*2]
- lea ref2q, [ref2q+ref_strideq*2]
- lea ref3q, [ref3q+ref_strideq*2]
- lea ref4q, [ref4q+ref_strideq*2]
-%endif
-%endmacro
-
-; PROCESS_8x2x4 first, off_{first,second}_{src,ref}, advance_at_end
-%macro PROCESS_8x2x4 5-6 0
- movh m0, [srcq +%2]
-%if %1 == 1
- movh m4, [ref1q+%3]
- movh m5, [ref2q+%3]
- movh m6, [ref3q+%3]
- movh m7, [ref4q+%3]
- movhps m0, [srcq +%4]
- movhps m4, [ref1q+%5]
- movhps m5, [ref2q+%5]
- movhps m6, [ref3q+%5]
- movhps m7, [ref4q+%5]
- psadbw m4, m0
- psadbw m5, m0
- psadbw m6, m0
- psadbw m7, m0
-%else
- movh m1, [ref1q+%3]
- movh m2, [ref2q+%3]
- movh m3, [ref3q+%3]
- movhps m0, [srcq +%4]
- movhps m1, [ref1q+%5]
- movhps m2, [ref2q+%5]
- movhps m3, [ref3q+%5]
- psadbw m1, m0
- psadbw m2, m0
- psadbw m3, m0
- paddd m4, m1
- movh m1, [ref4q+%3]
- movhps m1, [ref4q+%5]
- paddd m5, m2
- paddd m6, m3
- psadbw m1, m0
- paddd m7, m1
-%endif
-%if %6 == 1
- lea srcq, [srcq +src_strideq*2]
- lea ref1q, [ref1q+ref_strideq*2]
- lea ref2q, [ref2q+ref_strideq*2]
- lea ref3q, [ref3q+ref_strideq*2]
- lea ref4q, [ref4q+ref_strideq*2]
-%endif
-%endmacro
-
-; PROCESS_16x2x4 first, off_{first,second}_{src,ref}, advance_at_end
-%macro PROCESS_16x2x4 5-6 0
- ; 1st 16 px
- mova m0, [srcq +%2]
-%if %1 == 1
- movu m4, [ref1q+%3]
- movu m5, [ref2q+%3]
- movu m6, [ref3q+%3]
- movu m7, [ref4q+%3]
- psadbw m4, m0
- psadbw m5, m0
- psadbw m6, m0
- psadbw m7, m0
-%else
- movu m1, [ref1q+%3]
- movu m2, [ref2q+%3]
- movu m3, [ref3q+%3]
- psadbw m1, m0
- psadbw m2, m0
- psadbw m3, m0
- paddd m4, m1
- movu m1, [ref4q+%3]
- paddd m5, m2
- paddd m6, m3
- psadbw m1, m0
- paddd m7, m1
-%endif
-
- ; 2nd 16 px
- mova m0, [srcq +%4]
- movu m1, [ref1q+%5]
- movu m2, [ref2q+%5]
- movu m3, [ref3q+%5]
- psadbw m1, m0
- psadbw m2, m0
- psadbw m3, m0
- paddd m4, m1
- movu m1, [ref4q+%5]
- paddd m5, m2
- paddd m6, m3
-%if %6 == 1
- lea srcq, [srcq +src_strideq*2]
- lea ref1q, [ref1q+ref_strideq*2]
- lea ref2q, [ref2q+ref_strideq*2]
- lea ref3q, [ref3q+ref_strideq*2]
- lea ref4q, [ref4q+ref_strideq*2]
-%endif
- psadbw m1, m0
- paddd m7, m1
-%endmacro
-
-; PROCESS_32x2x4 first, off_{first,second}_{src,ref}, advance_at_end
-%macro PROCESS_32x2x4 5-6 0
- PROCESS_16x2x4 %1, %2, %3, %2 + 16, %3 + 16
- PROCESS_16x2x4 0, %4, %5, %4 + 16, %5 + 16, %6
-%endmacro
-
-; PROCESS_64x2x4 first, off_{first,second}_{src,ref}, advance_at_end
-%macro PROCESS_64x2x4 5-6 0
- PROCESS_32x2x4 %1, %2, %3, %2 + 32, %3 + 32
- PROCESS_32x2x4 0, %4, %5, %4 + 32, %5 + 32, %6
-%endmacro
-
-; PROCESS_128x2x4 first, off_{first,second}_{src,ref}, advance_at_end
-%macro PROCESS_128x2x4 5-6 0
- PROCESS_64x2x4 %1, %2, %3, %2 + 64, %3 + 64
- PROCESS_64x2x4 0, %4, %5, %4 + 64, %5 + 64, %6
-%endmacro
-
-; void aom_sadNxNx4d_sse2(uint8_t *src, int src_stride,
-; uint8_t *ref[4], int ref_stride,
-; uint32_t res[4]);
-; where NxN = 64x64, 32x32, 16x16, 16x8, 8x16, 8x8, 8x4, 4x8 and 4x4
-%macro SADNXN4D 2
-%if UNIX64
-cglobal sad%1x%2x4d, 5, 8, 8, src, src_stride, ref1, ref_stride, \
- res, ref2, ref3, ref4
-%else
-cglobal sad%1x%2x4d, 4, 7, 8, src, src_stride, ref1, ref_stride, \
- ref2, ref3, ref4
-%endif
- movsxdifnidn src_strideq, src_strided
- movsxdifnidn ref_strideq, ref_strided
- mov ref2q, [ref1q+gprsize*1]
- mov ref3q, [ref1q+gprsize*2]
- mov ref4q, [ref1q+gprsize*3]
- mov ref1q, [ref1q+gprsize*0]
-
- PROCESS_%1x2x4 1, 0, 0, src_strideq, ref_strideq, 1
-%rep (%2-4)/2
- PROCESS_%1x2x4 0, 0, 0, src_strideq, ref_strideq, 1
-%endrep
- PROCESS_%1x2x4 0, 0, 0, src_strideq, ref_strideq, 0
-
-%if %1 > 4
- pslldq m5, 4
- pslldq m7, 4
- por m4, m5
- por m6, m7
- mova m5, m4
- mova m7, m6
- punpcklqdq m4, m6
- punpckhqdq m5, m7
- movifnidn r4, r4mp
- paddd m4, m5
- movu [r4], m4
- RET
-%else
- movifnidn r4, r4mp
- pshufd m6, m6, 0x08
- pshufd m7, m7, 0x08
- movq [r4+0], m6
- movq [r4+8], m7
- RET
-%endif
-%endmacro
-
-INIT_XMM sse2
-SADNXN4D 128, 128
-SADNXN4D 128, 64
-SADNXN4D 64, 128
-SADNXN4D 64, 64
-SADNXN4D 64, 32
-SADNXN4D 32, 64
-SADNXN4D 32, 32
-SADNXN4D 32, 16
-SADNXN4D 16, 32
-SADNXN4D 16, 16
-SADNXN4D 16, 8
-SADNXN4D 8, 16
-SADNXN4D 8, 8
-SADNXN4D 8, 4
-SADNXN4D 4, 8
-SADNXN4D 4, 4
-SADNXN4D 4, 16
-SADNXN4D 16, 4
-SADNXN4D 8, 32
-SADNXN4D 32, 8
-SADNXN4D 16, 64
-SADNXN4D 64, 16
diff --git a/third_party/aom/aom_dsp/x86/sad_avx2.c b/third_party/aom/aom_dsp/x86/sad_avx2.c
deleted file mode 100644
index a50dba64a..000000000
--- a/third_party/aom/aom_dsp/x86/sad_avx2.c
+++ /dev/null
@@ -1,189 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#include <immintrin.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_ports/mem.h"
-
-#define FSAD64_H(h) \
- unsigned int aom_sad64x##h##_avx2(const uint8_t *src_ptr, int src_stride, \
- const uint8_t *ref_ptr, int ref_stride) { \
- int i, res; \
- __m256i sad1_reg, sad2_reg, ref1_reg, ref2_reg; \
- __m256i sum_sad = _mm256_setzero_si256(); \
- __m256i sum_sad_h; \
- __m128i sum_sad128; \
- for (i = 0; i < h; i++) { \
- ref1_reg = _mm256_loadu_si256((__m256i const *)ref_ptr); \
- ref2_reg = _mm256_loadu_si256((__m256i const *)(ref_ptr + 32)); \
- sad1_reg = _mm256_sad_epu8( \
- ref1_reg, _mm256_loadu_si256((__m256i const *)src_ptr)); \
- sad2_reg = _mm256_sad_epu8( \
- ref2_reg, _mm256_loadu_si256((__m256i const *)(src_ptr + 32))); \
- sum_sad = \
- _mm256_add_epi32(sum_sad, _mm256_add_epi32(sad1_reg, sad2_reg)); \
- ref_ptr += ref_stride; \
- src_ptr += src_stride; \
- } \
- sum_sad_h = _mm256_srli_si256(sum_sad, 8); \
- sum_sad = _mm256_add_epi32(sum_sad, sum_sad_h); \
- sum_sad128 = _mm256_extracti128_si256(sum_sad, 1); \
- sum_sad128 = _mm_add_epi32(_mm256_castsi256_si128(sum_sad), sum_sad128); \
- res = _mm_cvtsi128_si32(sum_sad128); \
- _mm256_zeroupper(); \
- return res; \
- }
-
-#define FSAD32_H(h) \
- unsigned int aom_sad32x##h##_avx2(const uint8_t *src_ptr, int src_stride, \
- const uint8_t *ref_ptr, int ref_stride) { \
- int i, res; \
- __m256i sad1_reg, sad2_reg, ref1_reg, ref2_reg; \
- __m256i sum_sad = _mm256_setzero_si256(); \
- __m256i sum_sad_h; \
- __m128i sum_sad128; \
- int ref2_stride = ref_stride << 1; \
- int src2_stride = src_stride << 1; \
- int max = h >> 1; \
- for (i = 0; i < max; i++) { \
- ref1_reg = _mm256_loadu_si256((__m256i const *)ref_ptr); \
- ref2_reg = _mm256_loadu_si256((__m256i const *)(ref_ptr + ref_stride)); \
- sad1_reg = _mm256_sad_epu8( \
- ref1_reg, _mm256_loadu_si256((__m256i const *)src_ptr)); \
- sad2_reg = _mm256_sad_epu8( \
- ref2_reg, \
- _mm256_loadu_si256((__m256i const *)(src_ptr + src_stride))); \
- sum_sad = \
- _mm256_add_epi32(sum_sad, _mm256_add_epi32(sad1_reg, sad2_reg)); \
- ref_ptr += ref2_stride; \
- src_ptr += src2_stride; \
- } \
- sum_sad_h = _mm256_srli_si256(sum_sad, 8); \
- sum_sad = _mm256_add_epi32(sum_sad, sum_sad_h); \
- sum_sad128 = _mm256_extracti128_si256(sum_sad, 1); \
- sum_sad128 = _mm_add_epi32(_mm256_castsi256_si128(sum_sad), sum_sad128); \
- res = _mm_cvtsi128_si32(sum_sad128); \
- _mm256_zeroupper(); \
- return res; \
- }
-
-#define FSAD64 \
- FSAD64_H(64); \
- FSAD64_H(32);
-
-#define FSAD32 \
- FSAD32_H(64); \
- FSAD32_H(32); \
- FSAD32_H(16);
-
-/* clang-format off */
-FSAD64
-FSAD32
-/* clang-format on */
-
-#undef FSAD64
-#undef FSAD32
-#undef FSAD64_H
-#undef FSAD32_H
-
-#define FSADAVG64_H(h) \
- unsigned int aom_sad64x##h##_avg_avx2( \
- const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \
- int ref_stride, const uint8_t *second_pred) { \
- int i, res; \
- __m256i sad1_reg, sad2_reg, ref1_reg, ref2_reg; \
- __m256i sum_sad = _mm256_setzero_si256(); \
- __m256i sum_sad_h; \
- __m128i sum_sad128; \
- for (i = 0; i < h; i++) { \
- ref1_reg = _mm256_loadu_si256((__m256i const *)ref_ptr); \
- ref2_reg = _mm256_loadu_si256((__m256i const *)(ref_ptr + 32)); \
- ref1_reg = _mm256_avg_epu8( \
- ref1_reg, _mm256_loadu_si256((__m256i const *)second_pred)); \
- ref2_reg = _mm256_avg_epu8( \
- ref2_reg, _mm256_loadu_si256((__m256i const *)(second_pred + 32))); \
- sad1_reg = _mm256_sad_epu8( \
- ref1_reg, _mm256_loadu_si256((__m256i const *)src_ptr)); \
- sad2_reg = _mm256_sad_epu8( \
- ref2_reg, _mm256_loadu_si256((__m256i const *)(src_ptr + 32))); \
- sum_sad = \
- _mm256_add_epi32(sum_sad, _mm256_add_epi32(sad1_reg, sad2_reg)); \
- ref_ptr += ref_stride; \
- src_ptr += src_stride; \
- second_pred += 64; \
- } \
- sum_sad_h = _mm256_srli_si256(sum_sad, 8); \
- sum_sad = _mm256_add_epi32(sum_sad, sum_sad_h); \
- sum_sad128 = _mm256_extracti128_si256(sum_sad, 1); \
- sum_sad128 = _mm_add_epi32(_mm256_castsi256_si128(sum_sad), sum_sad128); \
- res = _mm_cvtsi128_si32(sum_sad128); \
- _mm256_zeroupper(); \
- return res; \
- }
-
-#define FSADAVG32_H(h) \
- unsigned int aom_sad32x##h##_avg_avx2( \
- const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \
- int ref_stride, const uint8_t *second_pred) { \
- int i, res; \
- __m256i sad1_reg, sad2_reg, ref1_reg, ref2_reg; \
- __m256i sum_sad = _mm256_setzero_si256(); \
- __m256i sum_sad_h; \
- __m128i sum_sad128; \
- int ref2_stride = ref_stride << 1; \
- int src2_stride = src_stride << 1; \
- int max = h >> 1; \
- for (i = 0; i < max; i++) { \
- ref1_reg = _mm256_loadu_si256((__m256i const *)ref_ptr); \
- ref2_reg = _mm256_loadu_si256((__m256i const *)(ref_ptr + ref_stride)); \
- ref1_reg = _mm256_avg_epu8( \
- ref1_reg, _mm256_loadu_si256((__m256i const *)second_pred)); \
- ref2_reg = _mm256_avg_epu8( \
- ref2_reg, _mm256_loadu_si256((__m256i const *)(second_pred + 32))); \
- sad1_reg = _mm256_sad_epu8( \
- ref1_reg, _mm256_loadu_si256((__m256i const *)src_ptr)); \
- sad2_reg = _mm256_sad_epu8( \
- ref2_reg, \
- _mm256_loadu_si256((__m256i const *)(src_ptr + src_stride))); \
- sum_sad = \
- _mm256_add_epi32(sum_sad, _mm256_add_epi32(sad1_reg, sad2_reg)); \
- ref_ptr += ref2_stride; \
- src_ptr += src2_stride; \
- second_pred += 64; \
- } \
- sum_sad_h = _mm256_srli_si256(sum_sad, 8); \
- sum_sad = _mm256_add_epi32(sum_sad, sum_sad_h); \
- sum_sad128 = _mm256_extracti128_si256(sum_sad, 1); \
- sum_sad128 = _mm_add_epi32(_mm256_castsi256_si128(sum_sad), sum_sad128); \
- res = _mm_cvtsi128_si32(sum_sad128); \
- _mm256_zeroupper(); \
- return res; \
- }
-
-#define FSADAVG64 \
- FSADAVG64_H(64); \
- FSADAVG64_H(32);
-
-#define FSADAVG32 \
- FSADAVG32_H(64); \
- FSADAVG32_H(32); \
- FSADAVG32_H(16);
-
-/* clang-format off */
-FSADAVG64
-FSADAVG32
-/* clang-format on */
-
-#undef FSADAVG64
-#undef FSADAVG32
-#undef FSADAVG64_H
-#undef FSADAVG32_H
diff --git a/third_party/aom/aom_dsp/x86/sad_highbd_avx2.c b/third_party/aom/aom_dsp/x86/sad_highbd_avx2.c
deleted file mode 100644
index b506d4663..000000000
--- a/third_party/aom/aom_dsp/x86/sad_highbd_avx2.c
+++ /dev/null
@@ -1,1038 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <immintrin.h>
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom/aom_integer.h"
-#include "aom_dsp/x86/synonyms_avx2.h"
-#include "aom_ports/mem.h"
-
-// SAD
-static INLINE unsigned int get_sad_from_mm256_epi32(const __m256i *v) {
- // input 8 32-bit summation
- __m128i lo128, hi128;
- __m256i u = _mm256_srli_si256(*v, 8);
- u = _mm256_add_epi32(u, *v);
-
- // 4 32-bit summation
- hi128 = _mm256_extracti128_si256(u, 1);
- lo128 = _mm256_castsi256_si128(u);
- lo128 = _mm_add_epi32(hi128, lo128);
-
- // 2 32-bit summation
- hi128 = _mm_srli_si128(lo128, 4);
- lo128 = _mm_add_epi32(lo128, hi128);
-
- return (unsigned int)_mm_cvtsi128_si32(lo128);
-}
-
-unsigned int aom_highbd_sad16x8_avx2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride) {
- const uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src);
- const uint16_t *ref_ptr = CONVERT_TO_SHORTPTR(ref);
-
- // first 4 rows
- __m256i s0 = _mm256_loadu_si256((const __m256i *)src_ptr);
- __m256i s1 = _mm256_loadu_si256((const __m256i *)(src_ptr + src_stride));
- __m256i s2 = _mm256_loadu_si256((const __m256i *)(src_ptr + 2 * src_stride));
- __m256i s3 = _mm256_loadu_si256((const __m256i *)(src_ptr + 3 * src_stride));
-
- __m256i r0 = _mm256_loadu_si256((const __m256i *)ref_ptr);
- __m256i r1 = _mm256_loadu_si256((const __m256i *)(ref_ptr + ref_stride));
- __m256i r2 = _mm256_loadu_si256((const __m256i *)(ref_ptr + 2 * ref_stride));
- __m256i r3 = _mm256_loadu_si256((const __m256i *)(ref_ptr + 3 * ref_stride));
-
- __m256i u0 = _mm256_sub_epi16(s0, r0);
- __m256i u1 = _mm256_sub_epi16(s1, r1);
- __m256i u2 = _mm256_sub_epi16(s2, r2);
- __m256i u3 = _mm256_sub_epi16(s3, r3);
- __m256i zero = _mm256_setzero_si256();
- __m256i sum0, sum1;
-
- u0 = _mm256_abs_epi16(u0);
- u1 = _mm256_abs_epi16(u1);
- u2 = _mm256_abs_epi16(u2);
- u3 = _mm256_abs_epi16(u3);
-
- sum0 = _mm256_add_epi16(u0, u1);
- sum0 = _mm256_add_epi16(sum0, u2);
- sum0 = _mm256_add_epi16(sum0, u3);
-
- // second 4 rows
- src_ptr += src_stride << 2;
- ref_ptr += ref_stride << 2;
- s0 = _mm256_loadu_si256((const __m256i *)src_ptr);
- s1 = _mm256_loadu_si256((const __m256i *)(src_ptr + src_stride));
- s2 = _mm256_loadu_si256((const __m256i *)(src_ptr + 2 * src_stride));
- s3 = _mm256_loadu_si256((const __m256i *)(src_ptr + 3 * src_stride));
-
- r0 = _mm256_loadu_si256((const __m256i *)ref_ptr);
- r1 = _mm256_loadu_si256((const __m256i *)(ref_ptr + ref_stride));
- r2 = _mm256_loadu_si256((const __m256i *)(ref_ptr + 2 * ref_stride));
- r3 = _mm256_loadu_si256((const __m256i *)(ref_ptr + 3 * ref_stride));
-
- u0 = _mm256_sub_epi16(s0, r0);
- u1 = _mm256_sub_epi16(s1, r1);
- u2 = _mm256_sub_epi16(s2, r2);
- u3 = _mm256_sub_epi16(s3, r3);
-
- u0 = _mm256_abs_epi16(u0);
- u1 = _mm256_abs_epi16(u1);
- u2 = _mm256_abs_epi16(u2);
- u3 = _mm256_abs_epi16(u3);
-
- sum1 = _mm256_add_epi16(u0, u1);
- sum1 = _mm256_add_epi16(sum1, u2);
- sum1 = _mm256_add_epi16(sum1, u3);
-
- // find out the SAD
- s0 = _mm256_unpacklo_epi16(sum0, zero);
- s1 = _mm256_unpackhi_epi16(sum0, zero);
- r0 = _mm256_unpacklo_epi16(sum1, zero);
- r1 = _mm256_unpackhi_epi16(sum1, zero);
- s0 = _mm256_add_epi32(s0, s1);
- r0 = _mm256_add_epi32(r0, r1);
- sum0 = _mm256_add_epi32(s0, r0);
- // 8 32-bit summation
-
- return (unsigned int)get_sad_from_mm256_epi32(&sum0);
-}
-
-unsigned int aom_highbd_sad16x16_avx2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride) {
- const uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src);
- const uint16_t *ref_ptr = CONVERT_TO_SHORTPTR(ref);
- __m256i s0, s1, s2, s3, r0, r1, r2, r3, u0, u1, u2, u3;
- __m256i sum0;
- __m256i sum = _mm256_setzero_si256();
- const __m256i zero = _mm256_setzero_si256();
- int row = 0;
-
- // Loop for every 4 rows
- while (row < 16) {
- s0 = _mm256_loadu_si256((const __m256i *)src_ptr);
- s1 = _mm256_loadu_si256((const __m256i *)(src_ptr + src_stride));
- s2 = _mm256_loadu_si256((const __m256i *)(src_ptr + 2 * src_stride));
- s3 = _mm256_loadu_si256((const __m256i *)(src_ptr + 3 * src_stride));
-
- r0 = _mm256_loadu_si256((const __m256i *)ref_ptr);
- r1 = _mm256_loadu_si256((const __m256i *)(ref_ptr + ref_stride));
- r2 = _mm256_loadu_si256((const __m256i *)(ref_ptr + 2 * ref_stride));
- r3 = _mm256_loadu_si256((const __m256i *)(ref_ptr + 3 * ref_stride));
-
- u0 = _mm256_sub_epi16(s0, r0);
- u1 = _mm256_sub_epi16(s1, r1);
- u2 = _mm256_sub_epi16(s2, r2);
- u3 = _mm256_sub_epi16(s3, r3);
-
- u0 = _mm256_abs_epi16(u0);
- u1 = _mm256_abs_epi16(u1);
- u2 = _mm256_abs_epi16(u2);
- u3 = _mm256_abs_epi16(u3);
-
- sum0 = _mm256_add_epi16(u0, u1);
- sum0 = _mm256_add_epi16(sum0, u2);
- sum0 = _mm256_add_epi16(sum0, u3);
-
- s0 = _mm256_unpacklo_epi16(sum0, zero);
- s1 = _mm256_unpackhi_epi16(sum0, zero);
- sum = _mm256_add_epi32(sum, s0);
- sum = _mm256_add_epi32(sum, s1);
- // 8 32-bit summation
-
- row += 4;
- src_ptr += src_stride << 2;
- ref_ptr += ref_stride << 2;
- }
- return get_sad_from_mm256_epi32(&sum);
-}
-
-static void sad32x4(const uint16_t *src_ptr, int src_stride,
- const uint16_t *ref_ptr, int ref_stride,
- const uint16_t *sec_ptr, __m256i *sad_acc) {
- __m256i s0, s1, s2, s3, r0, r1, r2, r3;
- const __m256i zero = _mm256_setzero_si256();
- int row_sections = 0;
-
- while (row_sections < 2) {
- s0 = _mm256_loadu_si256((const __m256i *)src_ptr);
- s1 = _mm256_loadu_si256((const __m256i *)(src_ptr + 16));
- s2 = _mm256_loadu_si256((const __m256i *)(src_ptr + src_stride));
- s3 = _mm256_loadu_si256((const __m256i *)(src_ptr + src_stride + 16));
-
- r0 = _mm256_loadu_si256((const __m256i *)ref_ptr);
- r1 = _mm256_loadu_si256((const __m256i *)(ref_ptr + 16));
- r2 = _mm256_loadu_si256((const __m256i *)(ref_ptr + ref_stride));
- r3 = _mm256_loadu_si256((const __m256i *)(ref_ptr + ref_stride + 16));
-
- if (sec_ptr) {
- r0 = _mm256_avg_epu16(r0, _mm256_loadu_si256((const __m256i *)sec_ptr));
- r1 = _mm256_avg_epu16(
- r1, _mm256_loadu_si256((const __m256i *)(sec_ptr + 16)));
- r2 = _mm256_avg_epu16(
- r2, _mm256_loadu_si256((const __m256i *)(sec_ptr + 32)));
- r3 = _mm256_avg_epu16(
- r3, _mm256_loadu_si256((const __m256i *)(sec_ptr + 48)));
- }
- s0 = _mm256_sub_epi16(s0, r0);
- s1 = _mm256_sub_epi16(s1, r1);
- s2 = _mm256_sub_epi16(s2, r2);
- s3 = _mm256_sub_epi16(s3, r3);
-
- s0 = _mm256_abs_epi16(s0);
- s1 = _mm256_abs_epi16(s1);
- s2 = _mm256_abs_epi16(s2);
- s3 = _mm256_abs_epi16(s3);
-
- s0 = _mm256_add_epi16(s0, s1);
- s0 = _mm256_add_epi16(s0, s2);
- s0 = _mm256_add_epi16(s0, s3);
-
- r0 = _mm256_unpacklo_epi16(s0, zero);
- r1 = _mm256_unpackhi_epi16(s0, zero);
-
- r0 = _mm256_add_epi32(r0, r1);
- *sad_acc = _mm256_add_epi32(*sad_acc, r0);
-
- row_sections += 1;
- src_ptr += src_stride << 1;
- ref_ptr += ref_stride << 1;
- if (sec_ptr) sec_ptr += 32 << 1;
- }
-}
-
-unsigned int aom_highbd_sad32x16_avx2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride) {
- __m256i sad = _mm256_setzero_si256();
- uint16_t *srcp = CONVERT_TO_SHORTPTR(src);
- uint16_t *refp = CONVERT_TO_SHORTPTR(ref);
- const int left_shift = 2;
- int row_section = 0;
-
- while (row_section < 4) {
- sad32x4(srcp, src_stride, refp, ref_stride, NULL, &sad);
- srcp += src_stride << left_shift;
- refp += ref_stride << left_shift;
- row_section += 1;
- }
- return get_sad_from_mm256_epi32(&sad);
-}
-
-unsigned int aom_highbd_sad16x32_avx2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride) {
- uint32_t sum = aom_highbd_sad16x16_avx2(src, src_stride, ref, ref_stride);
- src += src_stride << 4;
- ref += ref_stride << 4;
- sum += aom_highbd_sad16x16_avx2(src, src_stride, ref, ref_stride);
- return sum;
-}
-
-unsigned int aom_highbd_sad32x32_avx2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride) {
- uint32_t sum = aom_highbd_sad32x16_avx2(src, src_stride, ref, ref_stride);
- src += src_stride << 4;
- ref += ref_stride << 4;
- sum += aom_highbd_sad32x16_avx2(src, src_stride, ref, ref_stride);
- return sum;
-}
-
-unsigned int aom_highbd_sad32x64_avx2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride) {
- uint32_t sum = aom_highbd_sad32x32_avx2(src, src_stride, ref, ref_stride);
- src += src_stride << 5;
- ref += ref_stride << 5;
- sum += aom_highbd_sad32x32_avx2(src, src_stride, ref, ref_stride);
- return sum;
-}
-
-static void sad64x2(const uint16_t *src_ptr, int src_stride,
- const uint16_t *ref_ptr, int ref_stride,
- const uint16_t *sec_ptr, __m256i *sad_acc) {
- __m256i s[8], r[8];
- const __m256i zero = _mm256_setzero_si256();
-
- s[0] = _mm256_loadu_si256((const __m256i *)src_ptr);
- s[1] = _mm256_loadu_si256((const __m256i *)(src_ptr + 16));
- s[2] = _mm256_loadu_si256((const __m256i *)(src_ptr + 32));
- s[3] = _mm256_loadu_si256((const __m256i *)(src_ptr + 48));
- s[4] = _mm256_loadu_si256((const __m256i *)(src_ptr + src_stride));
- s[5] = _mm256_loadu_si256((const __m256i *)(src_ptr + src_stride + 16));
- s[6] = _mm256_loadu_si256((const __m256i *)(src_ptr + src_stride + 32));
- s[7] = _mm256_loadu_si256((const __m256i *)(src_ptr + src_stride + 48));
-
- r[0] = _mm256_loadu_si256((const __m256i *)ref_ptr);
- r[1] = _mm256_loadu_si256((const __m256i *)(ref_ptr + 16));
- r[2] = _mm256_loadu_si256((const __m256i *)(ref_ptr + 32));
- r[3] = _mm256_loadu_si256((const __m256i *)(ref_ptr + 48));
- r[4] = _mm256_loadu_si256((const __m256i *)(ref_ptr + ref_stride));
- r[5] = _mm256_loadu_si256((const __m256i *)(ref_ptr + ref_stride + 16));
- r[6] = _mm256_loadu_si256((const __m256i *)(ref_ptr + ref_stride + 32));
- r[7] = _mm256_loadu_si256((const __m256i *)(ref_ptr + ref_stride + 48));
-
- if (sec_ptr) {
- r[0] = _mm256_avg_epu16(r[0], _mm256_loadu_si256((const __m256i *)sec_ptr));
- r[1] = _mm256_avg_epu16(
- r[1], _mm256_loadu_si256((const __m256i *)(sec_ptr + 16)));
- r[2] = _mm256_avg_epu16(
- r[2], _mm256_loadu_si256((const __m256i *)(sec_ptr + 32)));
- r[3] = _mm256_avg_epu16(
- r[3], _mm256_loadu_si256((const __m256i *)(sec_ptr + 48)));
- r[4] = _mm256_avg_epu16(
- r[4], _mm256_loadu_si256((const __m256i *)(sec_ptr + 64)));
- r[5] = _mm256_avg_epu16(
- r[5], _mm256_loadu_si256((const __m256i *)(sec_ptr + 80)));
- r[6] = _mm256_avg_epu16(
- r[6], _mm256_loadu_si256((const __m256i *)(sec_ptr + 96)));
- r[7] = _mm256_avg_epu16(
- r[7], _mm256_loadu_si256((const __m256i *)(sec_ptr + 112)));
- }
-
- s[0] = _mm256_sub_epi16(s[0], r[0]);
- s[1] = _mm256_sub_epi16(s[1], r[1]);
- s[2] = _mm256_sub_epi16(s[2], r[2]);
- s[3] = _mm256_sub_epi16(s[3], r[3]);
- s[4] = _mm256_sub_epi16(s[4], r[4]);
- s[5] = _mm256_sub_epi16(s[5], r[5]);
- s[6] = _mm256_sub_epi16(s[6], r[6]);
- s[7] = _mm256_sub_epi16(s[7], r[7]);
-
- s[0] = _mm256_abs_epi16(s[0]);
- s[1] = _mm256_abs_epi16(s[1]);
- s[2] = _mm256_abs_epi16(s[2]);
- s[3] = _mm256_abs_epi16(s[3]);
- s[4] = _mm256_abs_epi16(s[4]);
- s[5] = _mm256_abs_epi16(s[5]);
- s[6] = _mm256_abs_epi16(s[6]);
- s[7] = _mm256_abs_epi16(s[7]);
-
- s[0] = _mm256_add_epi16(s[0], s[1]);
- s[0] = _mm256_add_epi16(s[0], s[2]);
- s[0] = _mm256_add_epi16(s[0], s[3]);
-
- s[4] = _mm256_add_epi16(s[4], s[5]);
- s[4] = _mm256_add_epi16(s[4], s[6]);
- s[4] = _mm256_add_epi16(s[4], s[7]);
-
- r[0] = _mm256_unpacklo_epi16(s[0], zero);
- r[1] = _mm256_unpackhi_epi16(s[0], zero);
- r[2] = _mm256_unpacklo_epi16(s[4], zero);
- r[3] = _mm256_unpackhi_epi16(s[4], zero);
-
- r[0] = _mm256_add_epi32(r[0], r[1]);
- r[0] = _mm256_add_epi32(r[0], r[2]);
- r[0] = _mm256_add_epi32(r[0], r[3]);
- *sad_acc = _mm256_add_epi32(*sad_acc, r[0]);
-}
-
-unsigned int aom_highbd_sad64x32_avx2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride) {
- __m256i sad = _mm256_setzero_si256();
- uint16_t *srcp = CONVERT_TO_SHORTPTR(src);
- uint16_t *refp = CONVERT_TO_SHORTPTR(ref);
- const int left_shift = 1;
- int row_section = 0;
-
- while (row_section < 16) {
- sad64x2(srcp, src_stride, refp, ref_stride, NULL, &sad);
- srcp += src_stride << left_shift;
- refp += ref_stride << left_shift;
- row_section += 1;
- }
- return get_sad_from_mm256_epi32(&sad);
-}
-
-unsigned int aom_highbd_sad64x64_avx2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride) {
- uint32_t sum = aom_highbd_sad64x32_avx2(src, src_stride, ref, ref_stride);
- src += src_stride << 5;
- ref += ref_stride << 5;
- sum += aom_highbd_sad64x32_avx2(src, src_stride, ref, ref_stride);
- return sum;
-}
-
-static void sad128x1(const uint16_t *src_ptr, const uint16_t *ref_ptr,
- const uint16_t *sec_ptr, __m256i *sad_acc) {
- __m256i s[8], r[8];
- const __m256i zero = _mm256_setzero_si256();
-
- s[0] = _mm256_loadu_si256((const __m256i *)src_ptr);
- s[1] = _mm256_loadu_si256((const __m256i *)(src_ptr + 16));
- s[2] = _mm256_loadu_si256((const __m256i *)(src_ptr + 32));
- s[3] = _mm256_loadu_si256((const __m256i *)(src_ptr + 48));
- s[4] = _mm256_loadu_si256((const __m256i *)(src_ptr + 64));
- s[5] = _mm256_loadu_si256((const __m256i *)(src_ptr + 80));
- s[6] = _mm256_loadu_si256((const __m256i *)(src_ptr + 96));
- s[7] = _mm256_loadu_si256((const __m256i *)(src_ptr + 112));
-
- r[0] = _mm256_loadu_si256((const __m256i *)ref_ptr);
- r[1] = _mm256_loadu_si256((const __m256i *)(ref_ptr + 16));
- r[2] = _mm256_loadu_si256((const __m256i *)(ref_ptr + 32));
- r[3] = _mm256_loadu_si256((const __m256i *)(ref_ptr + 48));
- r[4] = _mm256_loadu_si256((const __m256i *)(ref_ptr + 64));
- r[5] = _mm256_loadu_si256((const __m256i *)(ref_ptr + 80));
- r[6] = _mm256_loadu_si256((const __m256i *)(ref_ptr + 96));
- r[7] = _mm256_loadu_si256((const __m256i *)(ref_ptr + 112));
-
- if (sec_ptr) {
- r[0] = _mm256_avg_epu16(r[0], _mm256_loadu_si256((const __m256i *)sec_ptr));
- r[1] = _mm256_avg_epu16(
- r[1], _mm256_loadu_si256((const __m256i *)(sec_ptr + 16)));
- r[2] = _mm256_avg_epu16(
- r[2], _mm256_loadu_si256((const __m256i *)(sec_ptr + 32)));
- r[3] = _mm256_avg_epu16(
- r[3], _mm256_loadu_si256((const __m256i *)(sec_ptr + 48)));
- r[4] = _mm256_avg_epu16(
- r[4], _mm256_loadu_si256((const __m256i *)(sec_ptr + 64)));
- r[5] = _mm256_avg_epu16(
- r[5], _mm256_loadu_si256((const __m256i *)(sec_ptr + 80)));
- r[6] = _mm256_avg_epu16(
- r[6], _mm256_loadu_si256((const __m256i *)(sec_ptr + 96)));
- r[7] = _mm256_avg_epu16(
- r[7], _mm256_loadu_si256((const __m256i *)(sec_ptr + 112)));
- }
-
- s[0] = _mm256_sub_epi16(s[0], r[0]);
- s[1] = _mm256_sub_epi16(s[1], r[1]);
- s[2] = _mm256_sub_epi16(s[2], r[2]);
- s[3] = _mm256_sub_epi16(s[3], r[3]);
- s[4] = _mm256_sub_epi16(s[4], r[4]);
- s[5] = _mm256_sub_epi16(s[5], r[5]);
- s[6] = _mm256_sub_epi16(s[6], r[6]);
- s[7] = _mm256_sub_epi16(s[7], r[7]);
-
- s[0] = _mm256_abs_epi16(s[0]);
- s[1] = _mm256_abs_epi16(s[1]);
- s[2] = _mm256_abs_epi16(s[2]);
- s[3] = _mm256_abs_epi16(s[3]);
- s[4] = _mm256_abs_epi16(s[4]);
- s[5] = _mm256_abs_epi16(s[5]);
- s[6] = _mm256_abs_epi16(s[6]);
- s[7] = _mm256_abs_epi16(s[7]);
-
- s[0] = _mm256_add_epi16(s[0], s[1]);
- s[0] = _mm256_add_epi16(s[0], s[2]);
- s[0] = _mm256_add_epi16(s[0], s[3]);
-
- s[4] = _mm256_add_epi16(s[4], s[5]);
- s[4] = _mm256_add_epi16(s[4], s[6]);
- s[4] = _mm256_add_epi16(s[4], s[7]);
-
- r[0] = _mm256_unpacklo_epi16(s[0], zero);
- r[1] = _mm256_unpackhi_epi16(s[0], zero);
- r[2] = _mm256_unpacklo_epi16(s[4], zero);
- r[3] = _mm256_unpackhi_epi16(s[4], zero);
-
- r[0] = _mm256_add_epi32(r[0], r[1]);
- r[0] = _mm256_add_epi32(r[0], r[2]);
- r[0] = _mm256_add_epi32(r[0], r[3]);
- *sad_acc = _mm256_add_epi32(*sad_acc, r[0]);
-}
-
-unsigned int aom_highbd_sad128x64_avx2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride) {
- __m256i sad = _mm256_setzero_si256();
- uint16_t *srcp = CONVERT_TO_SHORTPTR(src);
- uint16_t *refp = CONVERT_TO_SHORTPTR(ref);
- int row = 0;
- while (row < 64) {
- sad128x1(srcp, refp, NULL, &sad);
- srcp += src_stride;
- refp += ref_stride;
- row += 1;
- }
- return get_sad_from_mm256_epi32(&sad);
-}
-
-unsigned int aom_highbd_sad64x128_avx2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride) {
- uint32_t sum = aom_highbd_sad64x64_avx2(src, src_stride, ref, ref_stride);
- src += src_stride << 6;
- ref += ref_stride << 6;
- sum += aom_highbd_sad64x64_avx2(src, src_stride, ref, ref_stride);
- return sum;
-}
-
-unsigned int aom_highbd_sad128x128_avx2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride) {
- uint32_t sum = aom_highbd_sad128x64_avx2(src, src_stride, ref, ref_stride);
- src += src_stride << 6;
- ref += ref_stride << 6;
- sum += aom_highbd_sad128x64_avx2(src, src_stride, ref, ref_stride);
- return sum;
-}
-
-// If sec_ptr = 0, calculate regular SAD. Otherwise, calculate average SAD.
-static INLINE void sad16x4(const uint16_t *src_ptr, int src_stride,
- const uint16_t *ref_ptr, int ref_stride,
- const uint16_t *sec_ptr, __m256i *sad_acc) {
- __m256i s0, s1, s2, s3, r0, r1, r2, r3;
- const __m256i zero = _mm256_setzero_si256();
-
- s0 = _mm256_loadu_si256((const __m256i *)src_ptr);
- s1 = _mm256_loadu_si256((const __m256i *)(src_ptr + src_stride));
- s2 = _mm256_loadu_si256((const __m256i *)(src_ptr + 2 * src_stride));
- s3 = _mm256_loadu_si256((const __m256i *)(src_ptr + 3 * src_stride));
-
- r0 = _mm256_loadu_si256((const __m256i *)ref_ptr);
- r1 = _mm256_loadu_si256((const __m256i *)(ref_ptr + ref_stride));
- r2 = _mm256_loadu_si256((const __m256i *)(ref_ptr + 2 * ref_stride));
- r3 = _mm256_loadu_si256((const __m256i *)(ref_ptr + 3 * ref_stride));
-
- if (sec_ptr) {
- r0 = _mm256_avg_epu16(r0, _mm256_loadu_si256((const __m256i *)sec_ptr));
- r1 = _mm256_avg_epu16(r1,
- _mm256_loadu_si256((const __m256i *)(sec_ptr + 16)));
- r2 = _mm256_avg_epu16(r2,
- _mm256_loadu_si256((const __m256i *)(sec_ptr + 32)));
- r3 = _mm256_avg_epu16(r3,
- _mm256_loadu_si256((const __m256i *)(sec_ptr + 48)));
- }
-
- s0 = _mm256_sub_epi16(s0, r0);
- s1 = _mm256_sub_epi16(s1, r1);
- s2 = _mm256_sub_epi16(s2, r2);
- s3 = _mm256_sub_epi16(s3, r3);
-
- s0 = _mm256_abs_epi16(s0);
- s1 = _mm256_abs_epi16(s1);
- s2 = _mm256_abs_epi16(s2);
- s3 = _mm256_abs_epi16(s3);
-
- s0 = _mm256_add_epi16(s0, s1);
- s0 = _mm256_add_epi16(s0, s2);
- s0 = _mm256_add_epi16(s0, s3);
-
- r0 = _mm256_unpacklo_epi16(s0, zero);
- r1 = _mm256_unpackhi_epi16(s0, zero);
-
- r0 = _mm256_add_epi32(r0, r1);
- *sad_acc = _mm256_add_epi32(*sad_acc, r0);
-}
-
-unsigned int aom_highbd_sad16x8_avg_avx2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- const uint8_t *second_pred) {
- __m256i sad = _mm256_setzero_si256();
- uint16_t *srcp = CONVERT_TO_SHORTPTR(src);
- uint16_t *refp = CONVERT_TO_SHORTPTR(ref);
- uint16_t *secp = CONVERT_TO_SHORTPTR(second_pred);
-
- sad16x4(srcp, src_stride, refp, ref_stride, secp, &sad);
-
- // Next 4 rows
- srcp += src_stride << 2;
- refp += ref_stride << 2;
- secp += 64;
- sad16x4(srcp, src_stride, refp, ref_stride, secp, &sad);
- return get_sad_from_mm256_epi32(&sad);
-}
-
-unsigned int aom_highbd_sad16x16_avg_avx2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- const uint8_t *second_pred) {
- const int left_shift = 3;
- uint32_t sum = aom_highbd_sad16x8_avg_avx2(src, src_stride, ref, ref_stride,
- second_pred);
- src += src_stride << left_shift;
- ref += ref_stride << left_shift;
- second_pred += 16 << left_shift;
- sum += aom_highbd_sad16x8_avg_avx2(src, src_stride, ref, ref_stride,
- second_pred);
- return sum;
-}
-
-unsigned int aom_highbd_sad16x32_avg_avx2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- const uint8_t *second_pred) {
- const int left_shift = 4;
- uint32_t sum = aom_highbd_sad16x16_avg_avx2(src, src_stride, ref, ref_stride,
- second_pred);
- src += src_stride << left_shift;
- ref += ref_stride << left_shift;
- second_pred += 16 << left_shift;
- sum += aom_highbd_sad16x16_avg_avx2(src, src_stride, ref, ref_stride,
- second_pred);
- return sum;
-}
-
-unsigned int aom_highbd_sad32x16_avg_avx2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- const uint8_t *second_pred) {
- __m256i sad = _mm256_setzero_si256();
- uint16_t *srcp = CONVERT_TO_SHORTPTR(src);
- uint16_t *refp = CONVERT_TO_SHORTPTR(ref);
- uint16_t *secp = CONVERT_TO_SHORTPTR(second_pred);
- const int left_shift = 2;
- int row_section = 0;
-
- while (row_section < 4) {
- sad32x4(srcp, src_stride, refp, ref_stride, secp, &sad);
- srcp += src_stride << left_shift;
- refp += ref_stride << left_shift;
- secp += 32 << left_shift;
- row_section += 1;
- }
- return get_sad_from_mm256_epi32(&sad);
-}
-
-unsigned int aom_highbd_sad32x32_avg_avx2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- const uint8_t *second_pred) {
- const int left_shift = 4;
- uint32_t sum = aom_highbd_sad32x16_avg_avx2(src, src_stride, ref, ref_stride,
- second_pred);
- src += src_stride << left_shift;
- ref += ref_stride << left_shift;
- second_pred += 32 << left_shift;
- sum += aom_highbd_sad32x16_avg_avx2(src, src_stride, ref, ref_stride,
- second_pred);
- return sum;
-}
-
-unsigned int aom_highbd_sad32x64_avg_avx2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- const uint8_t *second_pred) {
- const int left_shift = 5;
- uint32_t sum = aom_highbd_sad32x32_avg_avx2(src, src_stride, ref, ref_stride,
- second_pred);
- src += src_stride << left_shift;
- ref += ref_stride << left_shift;
- second_pred += 32 << left_shift;
- sum += aom_highbd_sad32x32_avg_avx2(src, src_stride, ref, ref_stride,
- second_pred);
- return sum;
-}
-
-unsigned int aom_highbd_sad64x32_avg_avx2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- const uint8_t *second_pred) {
- __m256i sad = _mm256_setzero_si256();
- uint16_t *srcp = CONVERT_TO_SHORTPTR(src);
- uint16_t *refp = CONVERT_TO_SHORTPTR(ref);
- uint16_t *secp = CONVERT_TO_SHORTPTR(second_pred);
- const int left_shift = 1;
- int row_section = 0;
-
- while (row_section < 16) {
- sad64x2(srcp, src_stride, refp, ref_stride, secp, &sad);
- srcp += src_stride << left_shift;
- refp += ref_stride << left_shift;
- secp += 64 << left_shift;
- row_section += 1;
- }
- return get_sad_from_mm256_epi32(&sad);
-}
-
-unsigned int aom_highbd_sad64x64_avg_avx2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- const uint8_t *second_pred) {
- const int left_shift = 5;
- uint32_t sum = aom_highbd_sad64x32_avg_avx2(src, src_stride, ref, ref_stride,
- second_pred);
- src += src_stride << left_shift;
- ref += ref_stride << left_shift;
- second_pred += 64 << left_shift;
- sum += aom_highbd_sad64x32_avg_avx2(src, src_stride, ref, ref_stride,
- second_pred);
- return sum;
-}
-
-unsigned int aom_highbd_sad64x128_avg_avx2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- const uint8_t *second_pred) {
- const int left_shift = 6;
- uint32_t sum = aom_highbd_sad64x64_avg_avx2(src, src_stride, ref, ref_stride,
- second_pred);
- src += src_stride << left_shift;
- ref += ref_stride << left_shift;
- second_pred += 64 << left_shift;
- sum += aom_highbd_sad64x64_avg_avx2(src, src_stride, ref, ref_stride,
- second_pred);
- return sum;
-}
-
-unsigned int aom_highbd_sad128x64_avg_avx2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- const uint8_t *second_pred) {
- __m256i sad = _mm256_setzero_si256();
- uint16_t *srcp = CONVERT_TO_SHORTPTR(src);
- uint16_t *refp = CONVERT_TO_SHORTPTR(ref);
- uint16_t *secp = CONVERT_TO_SHORTPTR(second_pred);
- int row = 0;
- while (row < 64) {
- sad128x1(srcp, refp, secp, &sad);
- srcp += src_stride;
- refp += ref_stride;
- secp += 16 << 3;
- row += 1;
- }
- return get_sad_from_mm256_epi32(&sad);
-}
-
-unsigned int aom_highbd_sad128x128_avg_avx2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- const uint8_t *second_pred) {
- unsigned int sum;
- const int left_shift = 6;
-
- sum = aom_highbd_sad128x64_avg_avx2(src, src_stride, ref, ref_stride,
- second_pred);
- src += src_stride << left_shift;
- ref += ref_stride << left_shift;
- second_pred += 128 << left_shift;
- sum += aom_highbd_sad128x64_avg_avx2(src, src_stride, ref, ref_stride,
- second_pred);
- return sum;
-}
-
-// SAD 4D
-// Combine 4 __m256i vectors to uint32_t result[4]
-static INLINE void get_4d_sad_from_mm256_epi32(const __m256i *v,
- uint32_t *res) {
- __m256i u0, u1, u2, u3;
- const __m256i mask = yy_set1_64_from_32i(UINT32_MAX);
- __m128i sad;
-
- // 8 32-bit summation
- u0 = _mm256_srli_si256(v[0], 4);
- u1 = _mm256_srli_si256(v[1], 4);
- u2 = _mm256_srli_si256(v[2], 4);
- u3 = _mm256_srli_si256(v[3], 4);
-
- u0 = _mm256_add_epi32(u0, v[0]);
- u1 = _mm256_add_epi32(u1, v[1]);
- u2 = _mm256_add_epi32(u2, v[2]);
- u3 = _mm256_add_epi32(u3, v[3]);
-
- u0 = _mm256_and_si256(u0, mask);
- u1 = _mm256_and_si256(u1, mask);
- u2 = _mm256_and_si256(u2, mask);
- u3 = _mm256_and_si256(u3, mask);
- // 4 32-bit summation, evenly positioned
-
- u1 = _mm256_slli_si256(u1, 4);
- u3 = _mm256_slli_si256(u3, 4);
-
- u0 = _mm256_or_si256(u0, u1);
- u2 = _mm256_or_si256(u2, u3);
- // 8 32-bit summation, interleaved
-
- u1 = _mm256_unpacklo_epi64(u0, u2);
- u3 = _mm256_unpackhi_epi64(u0, u2);
-
- u0 = _mm256_add_epi32(u1, u3);
- sad = _mm_add_epi32(_mm256_extractf128_si256(u0, 1),
- _mm256_castsi256_si128(u0));
- _mm_storeu_si128((__m128i *)res, sad);
-}
-
-static void convert_pointers(const uint8_t *const ref8[],
- const uint16_t *ref[]) {
- ref[0] = CONVERT_TO_SHORTPTR(ref8[0]);
- ref[1] = CONVERT_TO_SHORTPTR(ref8[1]);
- ref[2] = CONVERT_TO_SHORTPTR(ref8[2]);
- ref[3] = CONVERT_TO_SHORTPTR(ref8[3]);
-}
-
-static void init_sad(__m256i *s) {
- s[0] = _mm256_setzero_si256();
- s[1] = _mm256_setzero_si256();
- s[2] = _mm256_setzero_si256();
- s[3] = _mm256_setzero_si256();
-}
-
-void aom_highbd_sad16x8x4d_avx2(const uint8_t *src, int src_stride,
- const uint8_t *const ref_array[],
- int ref_stride, uint32_t *sad_array) {
- __m256i sad_vec[4];
- const uint16_t *refp[4];
- const uint16_t *keep = CONVERT_TO_SHORTPTR(src);
- const uint16_t *srcp;
- const int shift_for_4_rows = 2;
- int i;
-
- init_sad(sad_vec);
- convert_pointers(ref_array, refp);
-
- for (i = 0; i < 4; ++i) {
- srcp = keep;
- sad16x4(srcp, src_stride, refp[i], ref_stride, 0, &sad_vec[i]);
- srcp += src_stride << shift_for_4_rows;
- refp[i] += ref_stride << shift_for_4_rows;
- sad16x4(srcp, src_stride, refp[i], ref_stride, 0, &sad_vec[i]);
- }
- get_4d_sad_from_mm256_epi32(sad_vec, sad_array);
-}
-
-void aom_highbd_sad16x16x4d_avx2(const uint8_t *src, int src_stride,
- const uint8_t *const ref_array[],
- int ref_stride, uint32_t *sad_array) {
- uint32_t first8rows[4];
- uint32_t second8rows[4];
- const uint8_t *ref[4];
- const int shift_for_8_rows = 3;
-
- ref[0] = ref_array[0];
- ref[1] = ref_array[1];
- ref[2] = ref_array[2];
- ref[3] = ref_array[3];
-
- aom_highbd_sad16x8x4d_avx2(src, src_stride, ref, ref_stride, first8rows);
- src += src_stride << shift_for_8_rows;
- ref[0] += ref_stride << shift_for_8_rows;
- ref[1] += ref_stride << shift_for_8_rows;
- ref[2] += ref_stride << shift_for_8_rows;
- ref[3] += ref_stride << shift_for_8_rows;
- aom_highbd_sad16x8x4d_avx2(src, src_stride, ref, ref_stride, second8rows);
- sad_array[0] = first8rows[0] + second8rows[0];
- sad_array[1] = first8rows[1] + second8rows[1];
- sad_array[2] = first8rows[2] + second8rows[2];
- sad_array[3] = first8rows[3] + second8rows[3];
-}
-
-void aom_highbd_sad16x32x4d_avx2(const uint8_t *src, int src_stride,
- const uint8_t *const ref_array[],
- int ref_stride, uint32_t *sad_array) {
- uint32_t first_half[4];
- uint32_t second_half[4];
- const uint8_t *ref[4];
- const int shift_for_rows = 4;
-
- ref[0] = ref_array[0];
- ref[1] = ref_array[1];
- ref[2] = ref_array[2];
- ref[3] = ref_array[3];
-
- aom_highbd_sad16x16x4d_avx2(src, src_stride, ref, ref_stride, first_half);
- src += src_stride << shift_for_rows;
- ref[0] += ref_stride << shift_for_rows;
- ref[1] += ref_stride << shift_for_rows;
- ref[2] += ref_stride << shift_for_rows;
- ref[3] += ref_stride << shift_for_rows;
- aom_highbd_sad16x16x4d_avx2(src, src_stride, ref, ref_stride, second_half);
- sad_array[0] = first_half[0] + second_half[0];
- sad_array[1] = first_half[1] + second_half[1];
- sad_array[2] = first_half[2] + second_half[2];
- sad_array[3] = first_half[3] + second_half[3];
-}
-
-void aom_highbd_sad32x16x4d_avx2(const uint8_t *src, int src_stride,
- const uint8_t *const ref_array[],
- int ref_stride, uint32_t *sad_array) {
- __m256i sad_vec[4];
- const uint16_t *refp[4];
- const uint16_t *keep = CONVERT_TO_SHORTPTR(src);
- const uint16_t *srcp;
- const int shift_for_4_rows = 2;
- int i;
- int rows_section;
-
- init_sad(sad_vec);
- convert_pointers(ref_array, refp);
-
- for (i = 0; i < 4; ++i) {
- srcp = keep;
- rows_section = 0;
- while (rows_section < 4) {
- sad32x4(srcp, src_stride, refp[i], ref_stride, 0, &sad_vec[i]);
- srcp += src_stride << shift_for_4_rows;
- refp[i] += ref_stride << shift_for_4_rows;
- rows_section++;
- }
- }
- get_4d_sad_from_mm256_epi32(sad_vec, sad_array);
-}
-
-void aom_highbd_sad32x32x4d_avx2(const uint8_t *src, int src_stride,
- const uint8_t *const ref_array[],
- int ref_stride, uint32_t *sad_array) {
- uint32_t first_half[4];
- uint32_t second_half[4];
- const uint8_t *ref[4];
- const int shift_for_rows = 4;
-
- ref[0] = ref_array[0];
- ref[1] = ref_array[1];
- ref[2] = ref_array[2];
- ref[3] = ref_array[3];
-
- aom_highbd_sad32x16x4d_avx2(src, src_stride, ref, ref_stride, first_half);
- src += src_stride << shift_for_rows;
- ref[0] += ref_stride << shift_for_rows;
- ref[1] += ref_stride << shift_for_rows;
- ref[2] += ref_stride << shift_for_rows;
- ref[3] += ref_stride << shift_for_rows;
- aom_highbd_sad32x16x4d_avx2(src, src_stride, ref, ref_stride, second_half);
- sad_array[0] = first_half[0] + second_half[0];
- sad_array[1] = first_half[1] + second_half[1];
- sad_array[2] = first_half[2] + second_half[2];
- sad_array[3] = first_half[3] + second_half[3];
-}
-
-void aom_highbd_sad32x64x4d_avx2(const uint8_t *src, int src_stride,
- const uint8_t *const ref_array[],
- int ref_stride, uint32_t *sad_array) {
- uint32_t first_half[4];
- uint32_t second_half[4];
- const uint8_t *ref[4];
- const int shift_for_rows = 5;
-
- ref[0] = ref_array[0];
- ref[1] = ref_array[1];
- ref[2] = ref_array[2];
- ref[3] = ref_array[3];
-
- aom_highbd_sad32x32x4d_avx2(src, src_stride, ref, ref_stride, first_half);
- src += src_stride << shift_for_rows;
- ref[0] += ref_stride << shift_for_rows;
- ref[1] += ref_stride << shift_for_rows;
- ref[2] += ref_stride << shift_for_rows;
- ref[3] += ref_stride << shift_for_rows;
- aom_highbd_sad32x32x4d_avx2(src, src_stride, ref, ref_stride, second_half);
- sad_array[0] = first_half[0] + second_half[0];
- sad_array[1] = first_half[1] + second_half[1];
- sad_array[2] = first_half[2] + second_half[2];
- sad_array[3] = first_half[3] + second_half[3];
-}
-
-void aom_highbd_sad64x32x4d_avx2(const uint8_t *src, int src_stride,
- const uint8_t *const ref_array[],
- int ref_stride, uint32_t *sad_array) {
- __m256i sad_vec[4];
- const uint16_t *refp[4];
- const uint16_t *keep = CONVERT_TO_SHORTPTR(src);
- const uint16_t *srcp;
- const int shift_for_rows = 1;
- int i;
- int rows_section;
-
- init_sad(sad_vec);
- convert_pointers(ref_array, refp);
-
- for (i = 0; i < 4; ++i) {
- srcp = keep;
- rows_section = 0;
- while (rows_section < 16) {
- sad64x2(srcp, src_stride, refp[i], ref_stride, NULL, &sad_vec[i]);
- srcp += src_stride << shift_for_rows;
- refp[i] += ref_stride << shift_for_rows;
- rows_section++;
- }
- }
- get_4d_sad_from_mm256_epi32(sad_vec, sad_array);
-}
-
-void aom_highbd_sad64x64x4d_avx2(const uint8_t *src, int src_stride,
- const uint8_t *const ref_array[],
- int ref_stride, uint32_t *sad_array) {
- uint32_t first_half[4];
- uint32_t second_half[4];
- const uint8_t *ref[4];
- const int shift_for_rows = 5;
-
- ref[0] = ref_array[0];
- ref[1] = ref_array[1];
- ref[2] = ref_array[2];
- ref[3] = ref_array[3];
-
- aom_highbd_sad64x32x4d_avx2(src, src_stride, ref, ref_stride, first_half);
- src += src_stride << shift_for_rows;
- ref[0] += ref_stride << shift_for_rows;
- ref[1] += ref_stride << shift_for_rows;
- ref[2] += ref_stride << shift_for_rows;
- ref[3] += ref_stride << shift_for_rows;
- aom_highbd_sad64x32x4d_avx2(src, src_stride, ref, ref_stride, second_half);
- sad_array[0] = first_half[0] + second_half[0];
- sad_array[1] = first_half[1] + second_half[1];
- sad_array[2] = first_half[2] + second_half[2];
- sad_array[3] = first_half[3] + second_half[3];
-}
-
-void aom_highbd_sad64x128x4d_avx2(const uint8_t *src, int src_stride,
- const uint8_t *const ref_array[],
- int ref_stride, uint32_t *sad_array) {
- uint32_t first_half[4];
- uint32_t second_half[4];
- const uint8_t *ref[4];
- const int shift_for_rows = 6;
-
- ref[0] = ref_array[0];
- ref[1] = ref_array[1];
- ref[2] = ref_array[2];
- ref[3] = ref_array[3];
-
- aom_highbd_sad64x64x4d_avx2(src, src_stride, ref, ref_stride, first_half);
- src += src_stride << shift_for_rows;
- ref[0] += ref_stride << shift_for_rows;
- ref[1] += ref_stride << shift_for_rows;
- ref[2] += ref_stride << shift_for_rows;
- ref[3] += ref_stride << shift_for_rows;
- aom_highbd_sad64x64x4d_avx2(src, src_stride, ref, ref_stride, second_half);
- sad_array[0] = first_half[0] + second_half[0];
- sad_array[1] = first_half[1] + second_half[1];
- sad_array[2] = first_half[2] + second_half[2];
- sad_array[3] = first_half[3] + second_half[3];
-}
-
-void aom_highbd_sad128x64x4d_avx2(const uint8_t *src, int src_stride,
- const uint8_t *const ref_array[],
- int ref_stride, uint32_t *sad_array) {
- __m256i sad_vec[4];
- const uint16_t *refp[4];
- const uint16_t *keep = CONVERT_TO_SHORTPTR(src);
- const uint16_t *srcp;
- int i;
- int rows_section;
-
- init_sad(sad_vec);
- convert_pointers(ref_array, refp);
-
- for (i = 0; i < 4; ++i) {
- srcp = keep;
- rows_section = 0;
- while (rows_section < 64) {
- sad128x1(srcp, refp[i], NULL, &sad_vec[i]);
- srcp += src_stride;
- refp[i] += ref_stride;
- rows_section++;
- }
- }
- get_4d_sad_from_mm256_epi32(sad_vec, sad_array);
-}
-
-void aom_highbd_sad128x128x4d_avx2(const uint8_t *src, int src_stride,
- const uint8_t *const ref_array[],
- int ref_stride, uint32_t *sad_array) {
- uint32_t first_half[4];
- uint32_t second_half[4];
- const uint8_t *ref[4];
- const int shift_for_rows = 6;
-
- ref[0] = ref_array[0];
- ref[1] = ref_array[1];
- ref[2] = ref_array[2];
- ref[3] = ref_array[3];
-
- aom_highbd_sad128x64x4d_avx2(src, src_stride, ref, ref_stride, first_half);
- src += src_stride << shift_for_rows;
- ref[0] += ref_stride << shift_for_rows;
- ref[1] += ref_stride << shift_for_rows;
- ref[2] += ref_stride << shift_for_rows;
- ref[3] += ref_stride << shift_for_rows;
- aom_highbd_sad128x64x4d_avx2(src, src_stride, ref, ref_stride, second_half);
- sad_array[0] = first_half[0] + second_half[0];
- sad_array[1] = first_half[1] + second_half[1];
- sad_array[2] = first_half[2] + second_half[2];
- sad_array[3] = first_half[3] + second_half[3];
-}
diff --git a/third_party/aom/aom_dsp/x86/sad_impl_avx2.c b/third_party/aom/aom_dsp/x86/sad_impl_avx2.c
deleted file mode 100644
index c6fd62c9e..000000000
--- a/third_party/aom/aom_dsp/x86/sad_impl_avx2.c
+++ /dev/null
@@ -1,234 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <immintrin.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-static unsigned int sad32x32(const uint8_t *src_ptr, int src_stride,
- const uint8_t *ref_ptr, int ref_stride) {
- __m256i s1, s2, r1, r2;
- __m256i sum = _mm256_setzero_si256();
- __m128i sum_i128;
- int i;
-
- for (i = 0; i < 16; ++i) {
- r1 = _mm256_loadu_si256((__m256i const *)ref_ptr);
- r2 = _mm256_loadu_si256((__m256i const *)(ref_ptr + ref_stride));
- s1 = _mm256_sad_epu8(r1, _mm256_loadu_si256((__m256i const *)src_ptr));
- s2 = _mm256_sad_epu8(
- r2, _mm256_loadu_si256((__m256i const *)(src_ptr + src_stride)));
- sum = _mm256_add_epi32(sum, _mm256_add_epi32(s1, s2));
- ref_ptr += ref_stride << 1;
- src_ptr += src_stride << 1;
- }
-
- sum = _mm256_add_epi32(sum, _mm256_srli_si256(sum, 8));
- sum_i128 = _mm_add_epi32(_mm256_extracti128_si256(sum, 1),
- _mm256_castsi256_si128(sum));
- return _mm_cvtsi128_si32(sum_i128);
-}
-
-static unsigned int sad64x32(const uint8_t *src_ptr, int src_stride,
- const uint8_t *ref_ptr, int ref_stride) {
- unsigned int half_width = 32;
- uint32_t sum = sad32x32(src_ptr, src_stride, ref_ptr, ref_stride);
- src_ptr += half_width;
- ref_ptr += half_width;
- sum += sad32x32(src_ptr, src_stride, ref_ptr, ref_stride);
- return sum;
-}
-
-static unsigned int sad64x64(const uint8_t *src_ptr, int src_stride,
- const uint8_t *ref_ptr, int ref_stride) {
- uint32_t sum = sad64x32(src_ptr, src_stride, ref_ptr, ref_stride);
- src_ptr += src_stride << 5;
- ref_ptr += ref_stride << 5;
- sum += sad64x32(src_ptr, src_stride, ref_ptr, ref_stride);
- return sum;
-}
-
-unsigned int aom_sad128x64_avx2(const uint8_t *src_ptr, int src_stride,
- const uint8_t *ref_ptr, int ref_stride) {
- unsigned int half_width = 64;
- uint32_t sum = sad64x64(src_ptr, src_stride, ref_ptr, ref_stride);
- src_ptr += half_width;
- ref_ptr += half_width;
- sum += sad64x64(src_ptr, src_stride, ref_ptr, ref_stride);
- return sum;
-}
-
-unsigned int aom_sad64x128_avx2(const uint8_t *src_ptr, int src_stride,
- const uint8_t *ref_ptr, int ref_stride) {
- uint32_t sum = sad64x64(src_ptr, src_stride, ref_ptr, ref_stride);
- src_ptr += src_stride << 6;
- ref_ptr += ref_stride << 6;
- sum += sad64x64(src_ptr, src_stride, ref_ptr, ref_stride);
- return sum;
-}
-
-unsigned int aom_sad128x128_avx2(const uint8_t *src_ptr, int src_stride,
- const uint8_t *ref_ptr, int ref_stride) {
- uint32_t sum = aom_sad128x64_avx2(src_ptr, src_stride, ref_ptr, ref_stride);
- src_ptr += src_stride << 6;
- ref_ptr += ref_stride << 6;
- sum += aom_sad128x64_avx2(src_ptr, src_stride, ref_ptr, ref_stride);
- return sum;
-}
-
-static void sad64x64x4d(const uint8_t *src, int src_stride,
- const uint8_t *const ref[4], int ref_stride,
- __m128i *res) {
- uint32_t sum[4];
- aom_sad64x64x4d_avx2(src, src_stride, ref, ref_stride, sum);
- *res = _mm_loadu_si128((const __m128i *)sum);
-}
-
-void aom_sad64x128x4d_avx2(const uint8_t *src, int src_stride,
- const uint8_t *const ref[4], int ref_stride,
- uint32_t res[4]) {
- __m128i sum0, sum1;
- const uint8_t *rf[4];
-
- rf[0] = ref[0];
- rf[1] = ref[1];
- rf[2] = ref[2];
- rf[3] = ref[3];
- sad64x64x4d(src, src_stride, rf, ref_stride, &sum0);
- src += src_stride << 6;
- rf[0] += ref_stride << 6;
- rf[1] += ref_stride << 6;
- rf[2] += ref_stride << 6;
- rf[3] += ref_stride << 6;
- sad64x64x4d(src, src_stride, rf, ref_stride, &sum1);
- sum0 = _mm_add_epi32(sum0, sum1);
- _mm_storeu_si128((__m128i *)res, sum0);
-}
-
-void aom_sad128x64x4d_avx2(const uint8_t *src, int src_stride,
- const uint8_t *const ref[4], int ref_stride,
- uint32_t res[4]) {
- __m128i sum0, sum1;
- unsigned int half_width = 64;
- const uint8_t *rf[4];
-
- rf[0] = ref[0];
- rf[1] = ref[1];
- rf[2] = ref[2];
- rf[3] = ref[3];
- sad64x64x4d(src, src_stride, rf, ref_stride, &sum0);
- src += half_width;
- rf[0] += half_width;
- rf[1] += half_width;
- rf[2] += half_width;
- rf[3] += half_width;
- sad64x64x4d(src, src_stride, rf, ref_stride, &sum1);
- sum0 = _mm_add_epi32(sum0, sum1);
- _mm_storeu_si128((__m128i *)res, sum0);
-}
-
-void aom_sad128x128x4d_avx2(const uint8_t *src, int src_stride,
- const uint8_t *const ref[4], int ref_stride,
- uint32_t res[4]) {
- const uint8_t *rf[4];
- uint32_t sum0[4];
- uint32_t sum1[4];
-
- rf[0] = ref[0];
- rf[1] = ref[1];
- rf[2] = ref[2];
- rf[3] = ref[3];
- aom_sad128x64x4d_avx2(src, src_stride, rf, ref_stride, sum0);
- src += src_stride << 6;
- rf[0] += ref_stride << 6;
- rf[1] += ref_stride << 6;
- rf[2] += ref_stride << 6;
- rf[3] += ref_stride << 6;
- aom_sad128x64x4d_avx2(src, src_stride, rf, ref_stride, sum1);
- res[0] = sum0[0] + sum1[0];
- res[1] = sum0[1] + sum1[1];
- res[2] = sum0[2] + sum1[2];
- res[3] = sum0[3] + sum1[3];
-}
-
-static unsigned int sad_w64_avg_avx2(const uint8_t *src_ptr, int src_stride,
- const uint8_t *ref_ptr, int ref_stride,
- const int h, const uint8_t *second_pred,
- const int second_pred_stride) {
- int i, res;
- __m256i sad1_reg, sad2_reg, ref1_reg, ref2_reg;
- __m256i sum_sad = _mm256_setzero_si256();
- __m256i sum_sad_h;
- __m128i sum_sad128;
- for (i = 0; i < h; i++) {
- ref1_reg = _mm256_loadu_si256((__m256i const *)ref_ptr);
- ref2_reg = _mm256_loadu_si256((__m256i const *)(ref_ptr + 32));
- ref1_reg = _mm256_avg_epu8(
- ref1_reg, _mm256_loadu_si256((__m256i const *)second_pred));
- ref2_reg = _mm256_avg_epu8(
- ref2_reg, _mm256_loadu_si256((__m256i const *)(second_pred + 32)));
- sad1_reg =
- _mm256_sad_epu8(ref1_reg, _mm256_loadu_si256((__m256i const *)src_ptr));
- sad2_reg = _mm256_sad_epu8(
- ref2_reg, _mm256_loadu_si256((__m256i const *)(src_ptr + 32)));
- sum_sad = _mm256_add_epi32(sum_sad, _mm256_add_epi32(sad1_reg, sad2_reg));
- ref_ptr += ref_stride;
- src_ptr += src_stride;
- second_pred += second_pred_stride;
- }
- sum_sad_h = _mm256_srli_si256(sum_sad, 8);
- sum_sad = _mm256_add_epi32(sum_sad, sum_sad_h);
- sum_sad128 = _mm256_extracti128_si256(sum_sad, 1);
- sum_sad128 = _mm_add_epi32(_mm256_castsi256_si128(sum_sad), sum_sad128);
- res = _mm_cvtsi128_si32(sum_sad128);
-
- return res;
-}
-
-unsigned int aom_sad64x128_avg_avx2(const uint8_t *src_ptr, int src_stride,
- const uint8_t *ref_ptr, int ref_stride,
- const uint8_t *second_pred) {
- uint32_t sum = sad_w64_avg_avx2(src_ptr, src_stride, ref_ptr, ref_stride, 64,
- second_pred, 64);
- src_ptr += src_stride << 6;
- ref_ptr += ref_stride << 6;
- second_pred += 64 << 6;
- sum += sad_w64_avg_avx2(src_ptr, src_stride, ref_ptr, ref_stride, 64,
- second_pred, 64);
- return sum;
-}
-
-unsigned int aom_sad128x64_avg_avx2(const uint8_t *src_ptr, int src_stride,
- const uint8_t *ref_ptr, int ref_stride,
- const uint8_t *second_pred) {
- unsigned int half_width = 64;
- uint32_t sum = sad_w64_avg_avx2(src_ptr, src_stride, ref_ptr, ref_stride, 64,
- second_pred, 128);
- src_ptr += half_width;
- ref_ptr += half_width;
- second_pred += half_width;
- sum += sad_w64_avg_avx2(src_ptr, src_stride, ref_ptr, ref_stride, 64,
- second_pred, 128);
- return sum;
-}
-
-unsigned int aom_sad128x128_avg_avx2(const uint8_t *src_ptr, int src_stride,
- const uint8_t *ref_ptr, int ref_stride,
- const uint8_t *second_pred) {
- uint32_t sum = aom_sad128x64_avg_avx2(src_ptr, src_stride, ref_ptr,
- ref_stride, second_pred);
- src_ptr += src_stride << 6;
- ref_ptr += ref_stride << 6;
- second_pred += 128 << 6;
- sum += aom_sad128x64_avg_avx2(src_ptr, src_stride, ref_ptr, ref_stride,
- second_pred);
- return sum;
-}
diff --git a/third_party/aom/aom_dsp/x86/sad_sse2.asm b/third_party/aom/aom_dsp/x86/sad_sse2.asm
deleted file mode 100644
index 3251b7655..000000000
--- a/third_party/aom/aom_dsp/x86/sad_sse2.asm
+++ /dev/null
@@ -1,353 +0,0 @@
-;
-; Copyright (c) 2016, Alliance for Open Media. All rights reserved
-;
-; This source code is subject to the terms of the BSD 2 Clause License and
-; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-; was not distributed with this source code in the LICENSE file, you can
-; obtain it at www.aomedia.org/license/software. If the Alliance for Open
-; Media Patent License 1.0 was not distributed with this source code in the
-; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-;
-
-;
-
-%include "third_party/x86inc/x86inc.asm"
-
-SECTION .text
-
-%macro SAD_FN 4
-%if %4 == 0
-%if %3 == 5
-cglobal sad%1x%2, 4, %3, 5, src, src_stride, ref, ref_stride, n_rows
-%else ; %3 == 7
-cglobal sad%1x%2, 4, %3, 6, src, src_stride, ref, ref_stride, \
- src_stride3, ref_stride3, n_rows
-%endif ; %3 == 5/7
-%else ; avg
-%if %3 == 5
-cglobal sad%1x%2_avg, 5, 1 + %3, 5, src, src_stride, ref, ref_stride, \
- second_pred, n_rows
-%else ; %3 == 7
-cglobal sad%1x%2_avg, 5, ARCH_X86_64 + %3, 6, src, src_stride, \
- ref, ref_stride, \
- second_pred, \
- src_stride3, ref_stride3
-%if ARCH_X86_64
-%define n_rowsd r7d
-%else ; x86-32
-%define n_rowsd dword r0m
-%endif ; x86-32/64
-%endif ; %3 == 5/7
-%endif ; avg/sad
- movsxdifnidn src_strideq, src_strided
- movsxdifnidn ref_strideq, ref_strided
-%if %3 == 7
- lea src_stride3q, [src_strideq*3]
- lea ref_stride3q, [ref_strideq*3]
-%endif ; %3 == 7
-%endmacro
-
-; unsigned int aom_sad128x128_sse2(uint8_t *src, int src_stride,
-; uint8_t *ref, int ref_stride);
-%macro SAD128XN 1-2 0
- SAD_FN 128, %1, 5, %2
- mov n_rowsd, %1
- pxor m0, m0
-
-.loop:
- movu m1, [refq]
- movu m2, [refq+16]
- movu m3, [refq+32]
- movu m4, [refq+48]
-%if %2 == 1
- pavgb m1, [second_predq+mmsize*0]
- pavgb m2, [second_predq+mmsize*1]
- pavgb m3, [second_predq+mmsize*2]
- pavgb m4, [second_predq+mmsize*3]
-%endif
- psadbw m1, [srcq]
- psadbw m2, [srcq+16]
- psadbw m3, [srcq+32]
- psadbw m4, [srcq+48]
-
- paddd m1, m2
- paddd m3, m4
- paddd m0, m1
- paddd m0, m3
-
- movu m1, [refq+64]
- movu m2, [refq+80]
- movu m3, [refq+96]
- movu m4, [refq+112]
-%if %2 == 1
- pavgb m1, [second_predq+mmsize*4]
- pavgb m2, [second_predq+mmsize*5]
- pavgb m3, [second_predq+mmsize*6]
- pavgb m4, [second_predq+mmsize*7]
- lea second_predq, [second_predq+mmsize*8]
-%endif
- psadbw m1, [srcq+64]
- psadbw m2, [srcq+80]
- psadbw m3, [srcq+96]
- psadbw m4, [srcq+112]
-
- add refq, ref_strideq
- add srcq, src_strideq
-
- paddd m1, m2
- paddd m3, m4
- paddd m0, m1
- paddd m0, m3
-
- sub n_rowsd, 1
- jg .loop
-
- movhlps m1, m0
- paddd m0, m1
- movd eax, m0
- RET
-%endmacro
-
-INIT_XMM sse2
-SAD128XN 128 ; sad128x128_sse2
-SAD128XN 128, 1 ; sad128x128_avg_sse2
-SAD128XN 64 ; sad128x64_sse2
-SAD128XN 64, 1 ; sad128x64_avg_sse2
-
-
-; unsigned int aom_sad64x64_sse2(uint8_t *src, int src_stride,
-; uint8_t *ref, int ref_stride);
-%macro SAD64XN 1-2 0
- SAD_FN 64, %1, 5, %2
- mov n_rowsd, %1
- pxor m0, m0
-.loop:
- movu m1, [refq]
- movu m2, [refq+16]
- movu m3, [refq+32]
- movu m4, [refq+48]
-%if %2 == 1
- pavgb m1, [second_predq+mmsize*0]
- pavgb m2, [second_predq+mmsize*1]
- pavgb m3, [second_predq+mmsize*2]
- pavgb m4, [second_predq+mmsize*3]
- lea second_predq, [second_predq+mmsize*4]
-%endif
- psadbw m1, [srcq]
- psadbw m2, [srcq+16]
- psadbw m3, [srcq+32]
- psadbw m4, [srcq+48]
- paddd m1, m2
- paddd m3, m4
- add refq, ref_strideq
- paddd m0, m1
- add srcq, src_strideq
- paddd m0, m3
- dec n_rowsd
- jg .loop
-
- movhlps m1, m0
- paddd m0, m1
- movd eax, m0
- RET
-%endmacro
-
-INIT_XMM sse2
-SAD64XN 128 ; sad64x128_sse2
-SAD64XN 128, 1 ; sad64x128_avg_sse2
-SAD64XN 64 ; sad64x64_sse2
-SAD64XN 32 ; sad64x32_sse2
-SAD64XN 64, 1 ; sad64x64_avg_sse2
-SAD64XN 32, 1 ; sad64x32_avg_sse2
-SAD64XN 16 ; sad64x16_sse2
-SAD64XN 16, 1 ; sad64x16_avg_sse2
-
-; unsigned int aom_sad32x32_sse2(uint8_t *src, int src_stride,
-; uint8_t *ref, int ref_stride);
-%macro SAD32XN 1-2 0
- SAD_FN 32, %1, 5, %2
- mov n_rowsd, %1/2
- pxor m0, m0
-.loop:
- movu m1, [refq]
- movu m2, [refq+16]
- movu m3, [refq+ref_strideq]
- movu m4, [refq+ref_strideq+16]
-%if %2 == 1
- pavgb m1, [second_predq+mmsize*0]
- pavgb m2, [second_predq+mmsize*1]
- pavgb m3, [second_predq+mmsize*2]
- pavgb m4, [second_predq+mmsize*3]
- lea second_predq, [second_predq+mmsize*4]
-%endif
- psadbw m1, [srcq]
- psadbw m2, [srcq+16]
- psadbw m3, [srcq+src_strideq]
- psadbw m4, [srcq+src_strideq+16]
- paddd m1, m2
- paddd m3, m4
- lea refq, [refq+ref_strideq*2]
- paddd m0, m1
- lea srcq, [srcq+src_strideq*2]
- paddd m0, m3
- dec n_rowsd
- jg .loop
-
- movhlps m1, m0
- paddd m0, m1
- movd eax, m0
- RET
-%endmacro
-
-INIT_XMM sse2
-SAD32XN 64 ; sad32x64_sse2
-SAD32XN 32 ; sad32x32_sse2
-SAD32XN 16 ; sad32x16_sse2
-SAD32XN 64, 1 ; sad32x64_avg_sse2
-SAD32XN 32, 1 ; sad32x32_avg_sse2
-SAD32XN 16, 1 ; sad32x16_avg_sse2
-SAD32XN 8 ; sad_32x8_sse2
-SAD32XN 8, 1 ; sad_32x8_avg_sse2
-
-; unsigned int aom_sad16x{8,16}_sse2(uint8_t *src, int src_stride,
-; uint8_t *ref, int ref_stride);
-%macro SAD16XN 1-2 0
- SAD_FN 16, %1, 7, %2
- mov n_rowsd, %1/4
- pxor m0, m0
-
-.loop:
- movu m1, [refq]
- movu m2, [refq+ref_strideq]
- movu m3, [refq+ref_strideq*2]
- movu m4, [refq+ref_stride3q]
-%if %2 == 1
- pavgb m1, [second_predq+mmsize*0]
- pavgb m2, [second_predq+mmsize*1]
- pavgb m3, [second_predq+mmsize*2]
- pavgb m4, [second_predq+mmsize*3]
- lea second_predq, [second_predq+mmsize*4]
-%endif
- psadbw m1, [srcq]
- psadbw m2, [srcq+src_strideq]
- psadbw m3, [srcq+src_strideq*2]
- psadbw m4, [srcq+src_stride3q]
- paddd m1, m2
- paddd m3, m4
- lea refq, [refq+ref_strideq*4]
- paddd m0, m1
- lea srcq, [srcq+src_strideq*4]
- paddd m0, m3
- dec n_rowsd
- jg .loop
-
- movhlps m1, m0
- paddd m0, m1
- movd eax, m0
- RET
-%endmacro
-
-INIT_XMM sse2
-SAD16XN 32 ; sad16x32_sse2
-SAD16XN 16 ; sad16x16_sse2
-SAD16XN 8 ; sad16x8_sse2
-SAD16XN 32, 1 ; sad16x32_avg_sse2
-SAD16XN 16, 1 ; sad16x16_avg_sse2
-SAD16XN 8, 1 ; sad16x8_avg_sse2
-SAD16XN 4 ; sad_16x4_sse2
-SAD16XN 4, 1 ; sad_16x4_avg_sse2
-SAD16XN 64 ; sad_16x64_sse2
-SAD16XN 64, 1 ; sad_16x64_avg_sse2
-
-; unsigned int aom_sad8x{8,16}_sse2(uint8_t *src, int src_stride,
-; uint8_t *ref, int ref_stride);
-%macro SAD8XN 1-2 0
- SAD_FN 8, %1, 7, %2
- mov n_rowsd, %1/4
- pxor m0, m0
-
-.loop:
- movh m1, [refq]
- movhps m1, [refq+ref_strideq]
- movh m2, [refq+ref_strideq*2]
- movhps m2, [refq+ref_stride3q]
-%if %2 == 1
- pavgb m1, [second_predq+mmsize*0]
- pavgb m2, [second_predq+mmsize*1]
- lea second_predq, [second_predq+mmsize*2]
-%endif
- movh m3, [srcq]
- movhps m3, [srcq+src_strideq]
- movh m4, [srcq+src_strideq*2]
- movhps m4, [srcq+src_stride3q]
- psadbw m1, m3
- psadbw m2, m4
- lea refq, [refq+ref_strideq*4]
- paddd m0, m1
- lea srcq, [srcq+src_strideq*4]
- paddd m0, m2
- dec n_rowsd
- jg .loop
-
- movhlps m1, m0
- paddd m0, m1
- movd eax, m0
- RET
-%endmacro
-
-INIT_XMM sse2
-SAD8XN 16 ; sad8x16_sse2
-SAD8XN 8 ; sad8x8_sse2
-SAD8XN 4 ; sad8x4_sse2
-SAD8XN 16, 1 ; sad8x16_avg_sse2
-SAD8XN 8, 1 ; sad8x8_avg_sse2
-SAD8XN 4, 1 ; sad8x4_avg_sse2
-SAD8XN 32 ; sad_8x32_sse2
-SAD8XN 32, 1 ; sad_8x32_avg_sse2
-
-; unsigned int aom_sad4x{4, 8}_sse2(uint8_t *src, int src_stride,
-; uint8_t *ref, int ref_stride);
-%macro SAD4XN 1-2 0
- SAD_FN 4, %1, 7, %2
- mov n_rowsd, %1/4
- pxor m0, m0
-
-.loop:
- movd m1, [refq]
- movd m2, [refq+ref_strideq]
- movd m3, [refq+ref_strideq*2]
- movd m4, [refq+ref_stride3q]
- punpckldq m1, m2
- punpckldq m3, m4
- movlhps m1, m3
-%if %2 == 1
- pavgb m1, [second_predq+mmsize*0]
- lea second_predq, [second_predq+mmsize*1]
-%endif
- movd m2, [srcq]
- movd m5, [srcq+src_strideq]
- movd m4, [srcq+src_strideq*2]
- movd m3, [srcq+src_stride3q]
- punpckldq m2, m5
- punpckldq m4, m3
- movlhps m2, m4
- psadbw m1, m2
- lea refq, [refq+ref_strideq*4]
- paddd m0, m1
- lea srcq, [srcq+src_strideq*4]
- dec n_rowsd
- jg .loop
-
- movhlps m1, m0
- paddd m0, m1
- movd eax, m0
- RET
-%endmacro
-
-INIT_XMM sse2
-SAD4XN 8 ; sad4x8_sse
-SAD4XN 4 ; sad4x4_sse
-SAD4XN 8, 1 ; sad4x8_avg_sse
-SAD4XN 4, 1 ; sad4x4_avg_sse
-SAD4XN 16 ; sad_4x16_sse2
-SAD4XN 16, 1 ; sad_4x16_avg_sse2
diff --git a/third_party/aom/aom_dsp/x86/sse_avx2.c b/third_party/aom/aom_dsp/x86/sse_avx2.c
deleted file mode 100644
index 305dde5c0..000000000
--- a/third_party/aom/aom_dsp/x86/sse_avx2.c
+++ /dev/null
@@ -1,250 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#include <smmintrin.h>
-#include <immintrin.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_ports/mem.h"
-#include "aom_dsp/x86/synonyms.h"
-#include "aom_dsp/x86/synonyms_avx2.h"
-
-static INLINE void sse_w32_avx2(__m256i *sum, const uint8_t *a,
- const uint8_t *b) {
- const __m256i v_a0 = yy_loadu_256(a);
- const __m256i v_b0 = yy_loadu_256(b);
- const __m256i v_a00_w = _mm256_cvtepu8_epi16(_mm256_castsi256_si128(v_a0));
- const __m256i v_a01_w =
- _mm256_cvtepu8_epi16(_mm256_extracti128_si256(v_a0, 1));
- const __m256i v_b00_w = _mm256_cvtepu8_epi16(_mm256_castsi256_si128(v_b0));
- const __m256i v_b01_w =
- _mm256_cvtepu8_epi16(_mm256_extracti128_si256(v_b0, 1));
- const __m256i v_d00_w = _mm256_sub_epi16(v_a00_w, v_b00_w);
- const __m256i v_d01_w = _mm256_sub_epi16(v_a01_w, v_b01_w);
- *sum = _mm256_add_epi32(*sum, _mm256_madd_epi16(v_d00_w, v_d00_w));
- *sum = _mm256_add_epi32(*sum, _mm256_madd_epi16(v_d01_w, v_d01_w));
-}
-
-static INLINE int64_t summary_all_avx2(const __m256i *sum_all) {
- int64_t sum;
- const __m256i sum0_4x64 =
- _mm256_cvtepu32_epi64(_mm256_castsi256_si128(*sum_all));
- const __m256i sum1_4x64 =
- _mm256_cvtepu32_epi64(_mm256_extracti128_si256(*sum_all, 1));
- const __m256i sum_4x64 = _mm256_add_epi64(sum0_4x64, sum1_4x64);
- const __m128i sum_2x64 = _mm_add_epi64(_mm256_castsi256_si128(sum_4x64),
- _mm256_extracti128_si256(sum_4x64, 1));
- const __m128i sum_1x64 = _mm_add_epi64(sum_2x64, _mm_srli_si128(sum_2x64, 8));
-
- xx_storel_64(&sum, sum_1x64);
- return sum;
-}
-
-int64_t aom_sse_avx2(const uint8_t *a, int a_stride, const uint8_t *b,
- int b_stride, int width, int height) {
- int32_t y = 0;
- int64_t sse = 0;
- __m256i sum = _mm256_setzero_si256();
- switch (width) {
- case 4:
- do {
- const __m128i v_a0 = xx_loadl_32(a);
- const __m128i v_a1 = xx_loadl_32(a + a_stride);
- const __m128i v_a2 = xx_loadl_32(a + a_stride * 2);
- const __m128i v_a3 = xx_loadl_32(a + a_stride * 3);
- const __m128i v_b0 = xx_loadl_32(b);
- const __m128i v_b1 = xx_loadl_32(b + b_stride);
- const __m128i v_b2 = xx_loadl_32(b + b_stride * 2);
- const __m128i v_b3 = xx_loadl_32(b + b_stride * 3);
- const __m128i v_a0123 = _mm_unpacklo_epi64(
- _mm_unpacklo_epi32(v_a0, v_a1), _mm_unpacklo_epi32(v_a2, v_a3));
- const __m128i v_b0123 = _mm_unpacklo_epi64(
- _mm_unpacklo_epi32(v_b0, v_b1), _mm_unpacklo_epi32(v_b2, v_b3));
- const __m256i v_a_w = _mm256_cvtepu8_epi16(v_a0123);
- const __m256i v_b_w = _mm256_cvtepu8_epi16(v_b0123);
- const __m256i v_d_w = _mm256_sub_epi16(v_a_w, v_b_w);
- sum = _mm256_add_epi32(sum, _mm256_madd_epi16(v_d_w, v_d_w));
- a += a_stride << 2;
- b += b_stride << 2;
- y += 4;
- } while (y < height);
- sse = summary_all_avx2(&sum);
- break;
- case 8:
- do {
- const __m128i v_a0 = xx_loadl_64(a);
- const __m128i v_a1 = xx_loadl_64(a + a_stride);
- const __m128i v_b0 = xx_loadl_64(b);
- const __m128i v_b1 = xx_loadl_64(b + b_stride);
- const __m256i v_a_w =
- _mm256_cvtepu8_epi16(_mm_unpacklo_epi64(v_a0, v_a1));
- const __m256i v_b_w =
- _mm256_cvtepu8_epi16(_mm_unpacklo_epi64(v_b0, v_b1));
- const __m256i v_d_w = _mm256_sub_epi16(v_a_w, v_b_w);
- sum = _mm256_add_epi32(sum, _mm256_madd_epi16(v_d_w, v_d_w));
- a += a_stride << 1;
- b += b_stride << 1;
- y += 2;
- } while (y < height);
- sse = summary_all_avx2(&sum);
- break;
- case 16:
- do {
- const __m128i v_a0 = xx_loadu_128(a);
- const __m128i v_b0 = xx_loadu_128(b);
- const __m256i v_a_w = _mm256_cvtepu8_epi16(v_a0);
- const __m256i v_b_w = _mm256_cvtepu8_epi16(v_b0);
- const __m256i v_d_w = _mm256_sub_epi16(v_a_w, v_b_w);
- sum = _mm256_add_epi32(sum, _mm256_madd_epi16(v_d_w, v_d_w));
- a += a_stride;
- b += b_stride;
- y += 1;
- } while (y < height);
- sse = summary_all_avx2(&sum);
- break;
- case 32:
- do {
- sse_w32_avx2(&sum, a, b);
- a += a_stride;
- b += b_stride;
- y += 1;
- } while (y < height);
- sse = summary_all_avx2(&sum);
- break;
- case 64:
- do {
- sse_w32_avx2(&sum, a, b);
- sse_w32_avx2(&sum, a + 32, b + 32);
- a += a_stride;
- b += b_stride;
- y += 1;
- } while (y < height);
- sse = summary_all_avx2(&sum);
- break;
- case 128:
- do {
- sse_w32_avx2(&sum, a, b);
- sse_w32_avx2(&sum, a + 32, b + 32);
- sse_w32_avx2(&sum, a + 64, b + 64);
- sse_w32_avx2(&sum, a + 96, b + 96);
- a += a_stride;
- b += b_stride;
- y += 1;
- } while (y < height);
- sse = summary_all_avx2(&sum);
- break;
- default: break;
- }
-
- return sse;
-}
-
-static INLINE void highbd_sse_w16_avx2(__m256i *sum, const uint16_t *a,
- const uint16_t *b) {
- const __m256i v_a_w = yy_loadu_256(a);
- const __m256i v_b_w = yy_loadu_256(b);
- const __m256i v_d_w = _mm256_sub_epi16(v_a_w, v_b_w);
- *sum = _mm256_add_epi32(*sum, _mm256_madd_epi16(v_d_w, v_d_w));
-}
-
-int64_t aom_highbd_sse_avx2(const uint8_t *a8, int a_stride, const uint8_t *b8,
- int b_stride, int width, int height) {
- int32_t y = 0;
- int64_t sse = 0;
- uint16_t *a = CONVERT_TO_SHORTPTR(a8);
- uint16_t *b = CONVERT_TO_SHORTPTR(b8);
- __m256i sum = _mm256_setzero_si256();
- switch (width) {
- case 4:
- do {
- const __m128i v_a0 = xx_loadl_64(a);
- const __m128i v_a1 = xx_loadl_64(a + a_stride);
- const __m128i v_a2 = xx_loadl_64(a + a_stride * 2);
- const __m128i v_a3 = xx_loadl_64(a + a_stride * 3);
- const __m128i v_b0 = xx_loadl_64(b);
- const __m128i v_b1 = xx_loadl_64(b + b_stride);
- const __m128i v_b2 = xx_loadl_64(b + b_stride * 2);
- const __m128i v_b3 = xx_loadl_64(b + b_stride * 3);
- const __m256i v_a_w = yy_set_m128i(_mm_unpacklo_epi64(v_a0, v_a1),
- _mm_unpacklo_epi64(v_a2, v_a3));
- const __m256i v_b_w = yy_set_m128i(_mm_unpacklo_epi64(v_b0, v_b1),
- _mm_unpacklo_epi64(v_b2, v_b3));
- const __m256i v_d_w = _mm256_sub_epi16(v_a_w, v_b_w);
- sum = _mm256_add_epi32(sum, _mm256_madd_epi16(v_d_w, v_d_w));
- a += a_stride << 2;
- b += b_stride << 2;
- y += 4;
- } while (y < height);
- sse = summary_all_avx2(&sum);
- break;
- case 8:
- do {
- const __m256i v_a_w = yy_loadu2_128(a + a_stride, a);
- const __m256i v_b_w = yy_loadu2_128(b + b_stride, b);
- const __m256i v_d_w = _mm256_sub_epi16(v_a_w, v_b_w);
- sum = _mm256_add_epi32(sum, _mm256_madd_epi16(v_d_w, v_d_w));
- a += a_stride << 1;
- b += b_stride << 1;
- y += 2;
- } while (y < height);
- sse = summary_all_avx2(&sum);
- break;
- case 16:
- do {
- highbd_sse_w16_avx2(&sum, a, b);
- a += a_stride;
- b += b_stride;
- y += 1;
- } while (y < height);
- sse = summary_all_avx2(&sum);
- break;
- case 32:
- do {
- highbd_sse_w16_avx2(&sum, a, b);
- highbd_sse_w16_avx2(&sum, a + 16, b + 16);
- a += a_stride;
- b += b_stride;
- y += 1;
- } while (y < height);
- sse = summary_all_avx2(&sum);
- break;
- case 64:
- do {
- highbd_sse_w16_avx2(&sum, a, b);
- highbd_sse_w16_avx2(&sum, a + 16 * 1, b + 16 * 1);
- highbd_sse_w16_avx2(&sum, a + 16 * 2, b + 16 * 2);
- highbd_sse_w16_avx2(&sum, a + 16 * 3, b + 16 * 3);
- a += a_stride;
- b += b_stride;
- y += 1;
- } while (y < height);
- sse = summary_all_avx2(&sum);
- break;
- case 128:
- do {
- highbd_sse_w16_avx2(&sum, a, b);
- highbd_sse_w16_avx2(&sum, a + 16 * 1, b + 16 * 1);
- highbd_sse_w16_avx2(&sum, a + 16 * 2, b + 16 * 2);
- highbd_sse_w16_avx2(&sum, a + 16 * 3, b + 16 * 3);
- highbd_sse_w16_avx2(&sum, a + 16 * 4, b + 16 * 4);
- highbd_sse_w16_avx2(&sum, a + 16 * 5, b + 16 * 5);
- highbd_sse_w16_avx2(&sum, a + 16 * 6, b + 16 * 6);
- highbd_sse_w16_avx2(&sum, a + 16 * 7, b + 16 * 7);
- a += a_stride;
- b += b_stride;
- y += 1;
- } while (y < height);
- sse = summary_all_avx2(&sum);
- break;
- default: break;
- }
- return sse;
-}
diff --git a/third_party/aom/aom_dsp/x86/sse_sse4.c b/third_party/aom/aom_dsp/x86/sse_sse4.c
deleted file mode 100644
index 8b5af8469..000000000
--- a/third_party/aom/aom_dsp/x86/sse_sse4.c
+++ /dev/null
@@ -1,241 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <smmintrin.h>
-
-#include "config/aom_config.h"
-
-#include "aom_ports/mem.h"
-#include "aom/aom_integer.h"
-#include "aom_dsp/x86/synonyms.h"
-
-static INLINE int64_t summary_all_sse4(const __m128i *sum_all) {
- int64_t sum;
- const __m128i sum0 = _mm_cvtepu32_epi64(*sum_all);
- const __m128i sum1 = _mm_cvtepu32_epi64(_mm_srli_si128(*sum_all, 8));
- const __m128i sum_2x64 = _mm_add_epi64(sum0, sum1);
- const __m128i sum_1x64 = _mm_add_epi64(sum_2x64, _mm_srli_si128(sum_2x64, 8));
- xx_storel_64(&sum, sum_1x64);
- return sum;
-}
-
-static INLINE void sse_w16_sse4_1(__m128i *sum, const uint8_t *a,
- const uint8_t *b) {
- const __m128i v_a0 = xx_loadu_128(a);
- const __m128i v_b0 = xx_loadu_128(b);
- const __m128i v_a00_w = _mm_cvtepu8_epi16(v_a0);
- const __m128i v_a01_w = _mm_cvtepu8_epi16(_mm_srli_si128(v_a0, 8));
- const __m128i v_b00_w = _mm_cvtepu8_epi16(v_b0);
- const __m128i v_b01_w = _mm_cvtepu8_epi16(_mm_srli_si128(v_b0, 8));
- const __m128i v_d00_w = _mm_sub_epi16(v_a00_w, v_b00_w);
- const __m128i v_d01_w = _mm_sub_epi16(v_a01_w, v_b01_w);
- *sum = _mm_add_epi32(*sum, _mm_madd_epi16(v_d00_w, v_d00_w));
- *sum = _mm_add_epi32(*sum, _mm_madd_epi16(v_d01_w, v_d01_w));
-}
-
-int64_t aom_sse_sse4_1(const uint8_t *a, int a_stride, const uint8_t *b,
- int b_stride, int width, int height) {
- int y = 0;
- int64_t sse = 0;
- __m128i sum = _mm_setzero_si128();
- switch (width) {
- case 4:
- do {
- const __m128i v_a0 = xx_loadl_32(a);
- const __m128i v_a1 = xx_loadl_32(a + a_stride);
- const __m128i v_b0 = xx_loadl_32(b);
- const __m128i v_b1 = xx_loadl_32(b + b_stride);
- const __m128i v_a_w = _mm_cvtepu8_epi16(_mm_unpacklo_epi32(v_a0, v_a1));
- const __m128i v_b_w = _mm_cvtepu8_epi16(_mm_unpacklo_epi32(v_b0, v_b1));
- const __m128i v_d_w = _mm_sub_epi16(v_a_w, v_b_w);
- sum = _mm_add_epi32(sum, _mm_madd_epi16(v_d_w, v_d_w));
- a += a_stride << 1;
- b += b_stride << 1;
- y += 2;
- } while (y < height);
- sse = summary_all_sse4(&sum);
- break;
- case 8:
- do {
- const __m128i v_a0 = xx_loadl_64(a);
- const __m128i v_b0 = xx_loadl_64(b);
- const __m128i v_a_w = _mm_cvtepu8_epi16(v_a0);
- const __m128i v_b_w = _mm_cvtepu8_epi16(v_b0);
- const __m128i v_d_w = _mm_sub_epi16(v_a_w, v_b_w);
- sum = _mm_add_epi32(sum, _mm_madd_epi16(v_d_w, v_d_w));
- a += a_stride;
- b += b_stride;
- y += 1;
- } while (y < height);
- sse = summary_all_sse4(&sum);
- break;
- case 16:
- do {
- sse_w16_sse4_1(&sum, a, b);
- a += a_stride;
- b += b_stride;
- y += 1;
- } while (y < height);
- sse = summary_all_sse4(&sum);
- break;
- case 32:
- do {
- sse_w16_sse4_1(&sum, a, b);
- sse_w16_sse4_1(&sum, a + 16, b + 16);
- a += a_stride;
- b += b_stride;
- y += 1;
- } while (y < height);
- sse = summary_all_sse4(&sum);
- break;
- case 64:
- do {
- sse_w16_sse4_1(&sum, a, b);
- sse_w16_sse4_1(&sum, a + 16 * 1, b + 16 * 1);
- sse_w16_sse4_1(&sum, a + 16 * 2, b + 16 * 2);
- sse_w16_sse4_1(&sum, a + 16 * 3, b + 16 * 3);
- a += a_stride;
- b += b_stride;
- y += 1;
- } while (y < height);
- sse = summary_all_sse4(&sum);
- break;
- case 128:
- do {
- sse_w16_sse4_1(&sum, a, b);
- sse_w16_sse4_1(&sum, a + 16 * 1, b + 16 * 1);
- sse_w16_sse4_1(&sum, a + 16 * 2, b + 16 * 2);
- sse_w16_sse4_1(&sum, a + 16 * 3, b + 16 * 3);
- sse_w16_sse4_1(&sum, a + 16 * 4, b + 16 * 4);
- sse_w16_sse4_1(&sum, a + 16 * 5, b + 16 * 5);
- sse_w16_sse4_1(&sum, a + 16 * 6, b + 16 * 6);
- sse_w16_sse4_1(&sum, a + 16 * 7, b + 16 * 7);
- a += a_stride;
- b += b_stride;
- y += 1;
- } while (y < height);
- sse = summary_all_sse4(&sum);
- break;
- default: break;
- }
-
- return sse;
-}
-
-static INLINE void highbd_sse_w8_sse4_1(__m128i *sum, const uint16_t *a,
- const uint16_t *b) {
- const __m128i v_a_w = xx_loadu_128(a);
- const __m128i v_b_w = xx_loadu_128(b);
- const __m128i v_d_w = _mm_sub_epi16(v_a_w, v_b_w);
- *sum = _mm_add_epi32(*sum, _mm_madd_epi16(v_d_w, v_d_w));
-}
-
-int64_t aom_highbd_sse_sse4_1(const uint8_t *a8, int a_stride,
- const uint8_t *b8, int b_stride, int width,
- int height) {
- int32_t y = 0;
- int64_t sse = 0;
- uint16_t *a = CONVERT_TO_SHORTPTR(a8);
- uint16_t *b = CONVERT_TO_SHORTPTR(b8);
- __m128i sum = _mm_setzero_si128();
- switch (width) {
- case 4:
- do {
- const __m128i v_a0 = xx_loadl_64(a);
- const __m128i v_a1 = xx_loadl_64(a + a_stride);
- const __m128i v_b0 = xx_loadl_64(b);
- const __m128i v_b1 = xx_loadl_64(b + b_stride);
- const __m128i v_a_w = _mm_unpacklo_epi64(v_a0, v_a1);
- const __m128i v_b_w = _mm_unpacklo_epi64(v_b0, v_b1);
- const __m128i v_d_w = _mm_sub_epi16(v_a_w, v_b_w);
- sum = _mm_add_epi32(sum, _mm_madd_epi16(v_d_w, v_d_w));
- a += a_stride << 1;
- b += b_stride << 1;
- y += 2;
- } while (y < height);
- sse = summary_all_sse4(&sum);
- break;
- case 8:
- do {
- highbd_sse_w8_sse4_1(&sum, a, b);
- a += a_stride;
- b += b_stride;
- y += 1;
- } while (y < height);
- sse = summary_all_sse4(&sum);
- break;
- case 16:
- do {
- highbd_sse_w8_sse4_1(&sum, a, b);
- highbd_sse_w8_sse4_1(&sum, a + 8, b + 8);
- a += a_stride;
- b += b_stride;
- y += 1;
- } while (y < height);
- sse = summary_all_sse4(&sum);
- break;
- case 32:
- do {
- highbd_sse_w8_sse4_1(&sum, a, b);
- highbd_sse_w8_sse4_1(&sum, a + 8 * 1, b + 8 * 1);
- highbd_sse_w8_sse4_1(&sum, a + 8 * 2, b + 8 * 2);
- highbd_sse_w8_sse4_1(&sum, a + 8 * 3, b + 8 * 3);
- a += a_stride;
- b += b_stride;
- y += 1;
- } while (y < height);
- sse = summary_all_sse4(&sum);
- break;
- case 64:
- do {
- highbd_sse_w8_sse4_1(&sum, a, b);
- highbd_sse_w8_sse4_1(&sum, a + 8 * 1, b + 8 * 1);
- highbd_sse_w8_sse4_1(&sum, a + 8 * 2, b + 8 * 2);
- highbd_sse_w8_sse4_1(&sum, a + 8 * 3, b + 8 * 3);
- highbd_sse_w8_sse4_1(&sum, a + 8 * 4, b + 8 * 4);
- highbd_sse_w8_sse4_1(&sum, a + 8 * 5, b + 8 * 5);
- highbd_sse_w8_sse4_1(&sum, a + 8 * 6, b + 8 * 6);
- highbd_sse_w8_sse4_1(&sum, a + 8 * 7, b + 8 * 7);
- a += a_stride;
- b += b_stride;
- y += 1;
- } while (y < height);
- sse = summary_all_sse4(&sum);
- break;
- case 128:
- do {
- highbd_sse_w8_sse4_1(&sum, a, b);
- highbd_sse_w8_sse4_1(&sum, a + 8 * 1, b + 8 * 1);
- highbd_sse_w8_sse4_1(&sum, a + 8 * 2, b + 8 * 2);
- highbd_sse_w8_sse4_1(&sum, a + 8 * 3, b + 8 * 3);
- highbd_sse_w8_sse4_1(&sum, a + 8 * 4, b + 8 * 4);
- highbd_sse_w8_sse4_1(&sum, a + 8 * 5, b + 8 * 5);
- highbd_sse_w8_sse4_1(&sum, a + 8 * 6, b + 8 * 6);
- highbd_sse_w8_sse4_1(&sum, a + 8 * 7, b + 8 * 7);
- highbd_sse_w8_sse4_1(&sum, a + 8 * 8, b + 8 * 8);
- highbd_sse_w8_sse4_1(&sum, a + 8 * 9, b + 8 * 9);
- highbd_sse_w8_sse4_1(&sum, a + 8 * 10, b + 8 * 10);
- highbd_sse_w8_sse4_1(&sum, a + 8 * 11, b + 8 * 11);
- highbd_sse_w8_sse4_1(&sum, a + 8 * 12, b + 8 * 12);
- highbd_sse_w8_sse4_1(&sum, a + 8 * 13, b + 8 * 13);
- highbd_sse_w8_sse4_1(&sum, a + 8 * 14, b + 8 * 14);
- highbd_sse_w8_sse4_1(&sum, a + 8 * 15, b + 8 * 15);
- a += a_stride;
- b += b_stride;
- y += 1;
- } while (y < height);
- sse = summary_all_sse4(&sum);
- break;
- default: break;
- }
- return sse;
-}
diff --git a/third_party/aom/aom_dsp/x86/ssim_opt_x86_64.asm b/third_party/aom/aom_dsp/x86/ssim_opt_x86_64.asm
deleted file mode 100644
index 6d9b5a12f..000000000
--- a/third_party/aom/aom_dsp/x86/ssim_opt_x86_64.asm
+++ /dev/null
@@ -1,222 +0,0 @@
-;
-; Copyright (c) 2016, Alliance for Open Media. All rights reserved
-;
-; This source code is subject to the terms of the BSD 2 Clause License and
-; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-; was not distributed with this source code in the LICENSE file, you can
-; obtain it at www.aomedia.org/license/software. If the Alliance for Open
-; Media Patent License 1.0 was not distributed with this source code in the
-; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-;
-
-;
-
-%include "aom_ports/x86_abi_support.asm"
-
-; tabulate_ssim - sums sum_s,sum_r,sum_sq_s,sum_sq_r, sum_sxr
-%macro TABULATE_SSIM 0
- paddusw xmm15, xmm3 ; sum_s
- paddusw xmm14, xmm4 ; sum_r
- movdqa xmm1, xmm3
- pmaddwd xmm1, xmm1
- paddd xmm13, xmm1 ; sum_sq_s
- movdqa xmm2, xmm4
- pmaddwd xmm2, xmm2
- paddd xmm12, xmm2 ; sum_sq_r
- pmaddwd xmm3, xmm4
- paddd xmm11, xmm3 ; sum_sxr
-%endmacro
-
-; Sum across the register %1 starting with q words
-%macro SUM_ACROSS_Q 1
- movdqa xmm2,%1
- punpckldq %1,xmm0
- punpckhdq xmm2,xmm0
- paddq %1,xmm2
- movdqa xmm2,%1
- punpcklqdq %1,xmm0
- punpckhqdq xmm2,xmm0
- paddq %1,xmm2
-%endmacro
-
-; Sum across the register %1 starting with q words
-%macro SUM_ACROSS_W 1
- movdqa xmm1, %1
- punpcklwd %1,xmm0
- punpckhwd xmm1,xmm0
- paddd %1, xmm1
- SUM_ACROSS_Q %1
-%endmacro
-
-SECTION .text
-
-;void ssim_parms_sse2(
-; unsigned char *s,
-; int sp,
-; unsigned char *r,
-; int rp
-; uint32_t *sum_s,
-; uint32_t *sum_r,
-; uint32_t *sum_sq_s,
-; uint32_t *sum_sq_r,
-; uint32_t *sum_sxr);
-;
-; TODO: Use parm passing through structure, probably don't need the pxors
-; ( calling app will initialize to 0 ) could easily fit everything in sse2
-; without too much hastle, and can probably do better estimates with psadw
-; or pavgb At this point this is just meant to be first pass for calculating
-; all the parms needed for 16x16 ssim so we can play with dssim as distortion
-; in mode selection code.
-global sym(aom_ssim_parms_16x16_sse2) PRIVATE
-sym(aom_ssim_parms_16x16_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 9
- SAVE_XMM 15
- push rsi
- push rdi
- ; end prolog
-
- mov rsi, arg(0) ;s
- mov rcx, arg(1) ;sp
- mov rdi, arg(2) ;r
- mov rax, arg(3) ;rp
-
- pxor xmm0, xmm0
- pxor xmm15,xmm15 ;sum_s
- pxor xmm14,xmm14 ;sum_r
- pxor xmm13,xmm13 ;sum_sq_s
- pxor xmm12,xmm12 ;sum_sq_r
- pxor xmm11,xmm11 ;sum_sxr
-
- mov rdx, 16 ;row counter
-.NextRow:
-
- ;grab source and reference pixels
- movdqu xmm5, [rsi]
- movdqu xmm6, [rdi]
- movdqa xmm3, xmm5
- movdqa xmm4, xmm6
- punpckhbw xmm3, xmm0 ; high_s
- punpckhbw xmm4, xmm0 ; high_r
-
- TABULATE_SSIM
-
- movdqa xmm3, xmm5
- movdqa xmm4, xmm6
- punpcklbw xmm3, xmm0 ; low_s
- punpcklbw xmm4, xmm0 ; low_r
-
- TABULATE_SSIM
-
- add rsi, rcx ; next s row
- add rdi, rax ; next r row
-
- dec rdx ; counter
- jnz .NextRow
-
- SUM_ACROSS_W xmm15
- SUM_ACROSS_W xmm14
- SUM_ACROSS_Q xmm13
- SUM_ACROSS_Q xmm12
- SUM_ACROSS_Q xmm11
-
- mov rdi,arg(4)
- movd [rdi], xmm15;
- mov rdi,arg(5)
- movd [rdi], xmm14;
- mov rdi,arg(6)
- movd [rdi], xmm13;
- mov rdi,arg(7)
- movd [rdi], xmm12;
- mov rdi,arg(8)
- movd [rdi], xmm11;
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void ssim_parms_sse2(
-; unsigned char *s,
-; int sp,
-; unsigned char *r,
-; int rp
-; uint32_t *sum_s,
-; uint32_t *sum_r,
-; uint32_t *sum_sq_s,
-; uint32_t *sum_sq_r,
-; uint32_t *sum_sxr);
-;
-; TODO: Use parm passing through structure, probably don't need the pxors
-; ( calling app will initialize to 0 ) could easily fit everything in sse2
-; without too much hastle, and can probably do better estimates with psadw
-; or pavgb At this point this is just meant to be first pass for calculating
-; all the parms needed for 16x16 ssim so we can play with dssim as distortion
-; in mode selection code.
-global sym(aom_ssim_parms_8x8_sse2) PRIVATE
-sym(aom_ssim_parms_8x8_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 9
- SAVE_XMM 15
- push rsi
- push rdi
- ; end prolog
-
- mov rsi, arg(0) ;s
- mov rcx, arg(1) ;sp
- mov rdi, arg(2) ;r
- mov rax, arg(3) ;rp
-
- pxor xmm0, xmm0
- pxor xmm15,xmm15 ;sum_s
- pxor xmm14,xmm14 ;sum_r
- pxor xmm13,xmm13 ;sum_sq_s
- pxor xmm12,xmm12 ;sum_sq_r
- pxor xmm11,xmm11 ;sum_sxr
-
- mov rdx, 8 ;row counter
-.NextRow:
-
- ;grab source and reference pixels
- movq xmm3, [rsi]
- movq xmm4, [rdi]
- punpcklbw xmm3, xmm0 ; low_s
- punpcklbw xmm4, xmm0 ; low_r
-
- TABULATE_SSIM
-
- add rsi, rcx ; next s row
- add rdi, rax ; next r row
-
- dec rdx ; counter
- jnz .NextRow
-
- SUM_ACROSS_W xmm15
- SUM_ACROSS_W xmm14
- SUM_ACROSS_Q xmm13
- SUM_ACROSS_Q xmm12
- SUM_ACROSS_Q xmm11
-
- mov rdi,arg(4)
- movd [rdi], xmm15;
- mov rdi,arg(5)
- movd [rdi], xmm14;
- mov rdi,arg(6)
- movd [rdi], xmm13;
- mov rdi,arg(7)
- movd [rdi], xmm12;
- mov rdi,arg(8)
- movd [rdi], xmm11;
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
diff --git a/third_party/aom/aom_dsp/x86/subpel_variance_sse2.asm b/third_party/aom/aom_dsp/x86/subpel_variance_sse2.asm
deleted file mode 100644
index 45bf6ec3c..000000000
--- a/third_party/aom/aom_dsp/x86/subpel_variance_sse2.asm
+++ /dev/null
@@ -1,1481 +0,0 @@
-;
-; Copyright (c) 2016, Alliance for Open Media. All rights reserved
-;
-; This source code is subject to the terms of the BSD 2 Clause License and
-; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-; was not distributed with this source code in the LICENSE file, you can
-; obtain it at www.aomedia.org/license/software. If the Alliance for Open
-; Media Patent License 1.0 was not distributed with this source code in the
-; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-;
-
-;
-
-%include "third_party/x86inc/x86inc.asm"
-
-SECTION_RODATA
-pw_8: times 8 dw 8
-bilin_filter_m_sse2: times 8 dw 16
- times 8 dw 0
- times 8 dw 14
- times 8 dw 2
- times 8 dw 12
- times 8 dw 4
- times 8 dw 10
- times 8 dw 6
- times 16 dw 8
- times 8 dw 6
- times 8 dw 10
- times 8 dw 4
- times 8 dw 12
- times 8 dw 2
- times 8 dw 14
-
-bilin_filter_m_ssse3: times 8 db 16, 0
- times 8 db 14, 2
- times 8 db 12, 4
- times 8 db 10, 6
- times 16 db 8
- times 8 db 6, 10
- times 8 db 4, 12
- times 8 db 2, 14
-
-SECTION .text
-
-; int aom_sub_pixel_varianceNxh(const uint8_t *src, ptrdiff_t src_stride,
-; int x_offset, int y_offset,
-; const uint8_t *dst, ptrdiff_t dst_stride,
-; int height, unsigned int *sse);
-;
-; This function returns the SE and stores SSE in the given pointer.
-
-%macro SUM_SSE 6 ; src1, dst1, src2, dst2, sum, sse
- psubw %3, %4
- psubw %1, %2
- paddw %5, %3
- pmaddwd %3, %3
- paddw %5, %1
- pmaddwd %1, %1
- paddd %6, %3
- paddd %6, %1
-%endmacro
-
-%macro STORE_AND_RET 1
-%if %1 > 4
- ; if H=64 and W=16, we have 8 words of each 2(1bit)x64(6bit)x9bit=16bit
- ; in m6, i.e. it _exactly_ fits in a signed word per word in the xmm reg.
- ; We have to sign-extend it before adding the words within the register
- ; and outputing to a dword.
- pcmpgtw m5, m6 ; mask for 0 > x
- movhlps m3, m7
- punpcklwd m4, m6, m5
- punpckhwd m6, m5 ; sign-extend m6 word->dword
- paddd m7, m3
- paddd m6, m4
- pshufd m3, m7, 0x1
- movhlps m4, m6
- paddd m7, m3
- paddd m6, m4
- mov r1, ssem ; r1 = unsigned int *sse
- pshufd m4, m6, 0x1
- movd [r1], m7 ; store sse
- paddd m6, m4
- movd raxd, m6 ; store sum as return value
-%else ; 4xh
- pshuflw m4, m6, 0xe
- pshuflw m3, m7, 0xe
- paddw m6, m4
- paddd m7, m3
- pcmpgtw m5, m6 ; mask for 0 > x
- mov r1, ssem ; r1 = unsigned int *sse
- punpcklwd m6, m5 ; sign-extend m6 word->dword
- movd [r1], m7 ; store sse
- pshuflw m4, m6, 0xe
- paddd m6, m4
- movd raxd, m6 ; store sum as return value
-%endif
- RET
-%endmacro
-
-%macro INC_SRC_BY_SRC_STRIDE 0
-%if ARCH_X86=1 && CONFIG_PIC=1
- add srcq, src_stridemp
-%else
- add srcq, src_strideq
-%endif
-%endmacro
-
-%macro SUBPEL_VARIANCE 1-2 0 ; W
-%if cpuflag(ssse3)
-%define bilin_filter_m bilin_filter_m_ssse3
-%define filter_idx_shift 4
-%else
-%define bilin_filter_m bilin_filter_m_sse2
-%define filter_idx_shift 5
-%endif
-; FIXME(rbultje) only bilinear filters use >8 registers, and ssse3 only uses
-; 11, not 13, if the registers are ordered correctly. May make a minor speed
-; difference on Win64
-
-%if ARCH_X86_64
- %if %2 == 1 ; avg
- cglobal sub_pixel_avg_variance%1xh, 9, 10, 13, src, src_stride, \
- x_offset, y_offset, dst, dst_stride, \
- sec, sec_stride, height, sse
- %define sec_str sec_strideq
- %else
- cglobal sub_pixel_variance%1xh, 7, 8, 13, src, src_stride, \
- x_offset, y_offset, dst, dst_stride, \
- height, sse
- %endif
- %define block_height heightd
- %define bilin_filter sseq
-%else
- %if CONFIG_PIC=1
- %if %2 == 1 ; avg
- cglobal sub_pixel_avg_variance%1xh, 7, 7, 13, src, src_stride, \
- x_offset, y_offset, dst, dst_stride, \
- sec, sec_stride, height, sse, \
- g_bilin_filter, g_pw_8
- %define block_height dword heightm
- %define sec_str sec_stridemp
-
- ;Store bilin_filter and pw_8 location in stack
- %if GET_GOT_DEFINED == 1
- GET_GOT eax
- add esp, 4 ; restore esp
- %endif
-
- lea ecx, [GLOBAL(bilin_filter_m)]
- mov g_bilin_filterm, ecx
-
- lea ecx, [GLOBAL(pw_8)]
- mov g_pw_8m, ecx
-
- LOAD_IF_USED 0, 1 ; load eax, ecx back
- %else
- cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, \
- x_offset, y_offset, dst, dst_stride, \
- height, sse, g_bilin_filter, g_pw_8
- %define block_height heightd
-
- ;Store bilin_filter and pw_8 location in stack
- %if GET_GOT_DEFINED == 1
- GET_GOT eax
- add esp, 4 ; restore esp
- %endif
-
- lea ecx, [GLOBAL(bilin_filter_m)]
- mov g_bilin_filterm, ecx
-
- lea ecx, [GLOBAL(pw_8)]
- mov g_pw_8m, ecx
-
- LOAD_IF_USED 0, 1 ; load eax, ecx back
- %endif
- %else
- %if %2 == 1 ; avg
- cglobal sub_pixel_avg_variance%1xh, 7, 7, 13, src, src_stride, \
- x_offset, y_offset, \
- dst, dst_stride, sec, sec_stride, \
- height, sse
- %define block_height dword heightm
- %define sec_str sec_stridemp
- %else
- cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, \
- x_offset, y_offset, dst, dst_stride, \
- height, sse
- %define block_height heightd
- %endif
- %define bilin_filter bilin_filter_m
- %endif
-%endif
-
-%if %1 == 4
- %define movx movd
-%else
- %define movx movh
-%endif
-
- ASSERT %1 <= 16 ; m6 overflows if w > 16
- pxor m6, m6 ; sum
- pxor m7, m7 ; sse
- ; FIXME(rbultje) if both filters are bilinear, we don't actually use m5; we
- ; could perhaps use it for something more productive then
- pxor m5, m5 ; dedicated zero register
-%if %1 < 16
- sar block_height, 1
-%if %2 == 1 ; avg
- shl sec_str, 1
-%endif
-%endif
-
- ; FIXME(rbultje) replace by jumptable?
- test x_offsetd, x_offsetd
- jnz .x_nonzero
- ; x_offset == 0
- test y_offsetd, y_offsetd
- jnz .x_zero_y_nonzero
-
- ; x_offset == 0 && y_offset == 0
-.x_zero_y_zero_loop:
-%if %1 == 16
- movu m0, [srcq]
- mova m1, [dstq]
-%if %2 == 1 ; avg
- pavgb m0, [secq]
- punpckhbw m3, m1, m5
- punpcklbw m1, m5
-%endif
- punpckhbw m2, m0, m5
- punpcklbw m0, m5
-
-%if %2 == 0 ; !avg
- punpckhbw m3, m1, m5
- punpcklbw m1, m5
-%endif
- SUM_SSE m0, m1, m2, m3, m6, m7
-
- add srcq, src_strideq
- add dstq, dst_strideq
-%else ; %1 < 16
- movx m0, [srcq]
-%if %2 == 1 ; avg
-%if %1 > 4
- movhps m0, [srcq+src_strideq]
-%else ; 4xh
- movx m1, [srcq+src_strideq]
- punpckldq m0, m1
-%endif
-%else ; !avg
- movx m2, [srcq+src_strideq]
-%endif
-
- movx m1, [dstq]
- movx m3, [dstq+dst_strideq]
-
-%if %2 == 1 ; avg
-%if %1 > 4
- pavgb m0, [secq]
-%else
- movh m2, [secq]
- pavgb m0, m2
-%endif
- punpcklbw m3, m5
- punpcklbw m1, m5
-%if %1 > 4
- punpckhbw m2, m0, m5
- punpcklbw m0, m5
-%else ; 4xh
- punpcklbw m0, m5
- movhlps m2, m0
-%endif
-%else ; !avg
- punpcklbw m0, m5
- punpcklbw m2, m5
- punpcklbw m3, m5
- punpcklbw m1, m5
-%endif
- SUM_SSE m0, m1, m2, m3, m6, m7
-
- lea srcq, [srcq+src_strideq*2]
- lea dstq, [dstq+dst_strideq*2]
-%endif
-%if %2 == 1 ; avg
- add secq, sec_str
-%endif
- dec block_height
- jg .x_zero_y_zero_loop
- STORE_AND_RET %1
-
-.x_zero_y_nonzero:
- cmp y_offsetd, 4
- jne .x_zero_y_nonhalf
-
- ; x_offset == 0 && y_offset == 0.5
-.x_zero_y_half_loop:
-%if %1 == 16
- movu m0, [srcq]
- movu m4, [srcq+src_strideq]
- mova m1, [dstq]
- pavgb m0, m4
- punpckhbw m3, m1, m5
-%if %2 == 1 ; avg
- pavgb m0, [secq]
-%endif
- punpcklbw m1, m5
- punpckhbw m2, m0, m5
- punpcklbw m0, m5
- SUM_SSE m0, m1, m2, m3, m6, m7
-
- add srcq, src_strideq
- add dstq, dst_strideq
-%else ; %1 < 16
- movx m0, [srcq]
- movx m2, [srcq+src_strideq]
-%if %2 == 1 ; avg
-%if %1 > 4
- movhps m2, [srcq+src_strideq*2]
-%else ; 4xh
- movx m1, [srcq+src_strideq*2]
- punpckldq m2, m1
-%endif
- movx m1, [dstq]
-%if %1 > 4
- movlhps m0, m2
-%else ; 4xh
- punpckldq m0, m2
-%endif
- movx m3, [dstq+dst_strideq]
- pavgb m0, m2
- punpcklbw m1, m5
-%if %1 > 4
- pavgb m0, [secq]
- punpcklbw m3, m5
- punpckhbw m2, m0, m5
- punpcklbw m0, m5
-%else ; 4xh
- movh m4, [secq]
- pavgb m0, m4
- punpcklbw m3, m5
- punpcklbw m0, m5
- movhlps m2, m0
-%endif
-%else ; !avg
- movx m4, [srcq+src_strideq*2]
- movx m1, [dstq]
- pavgb m0, m2
- movx m3, [dstq+dst_strideq]
- pavgb m2, m4
- punpcklbw m0, m5
- punpcklbw m2, m5
- punpcklbw m3, m5
- punpcklbw m1, m5
-%endif
- SUM_SSE m0, m1, m2, m3, m6, m7
-
- lea srcq, [srcq+src_strideq*2]
- lea dstq, [dstq+dst_strideq*2]
-%endif
-%if %2 == 1 ; avg
- add secq, sec_str
-%endif
- dec block_height
- jg .x_zero_y_half_loop
- STORE_AND_RET %1
-
-.x_zero_y_nonhalf:
- ; x_offset == 0 && y_offset == bilin interpolation
-%if ARCH_X86_64
- lea bilin_filter, [GLOBAL(bilin_filter_m)]
-%endif
- shl y_offsetd, filter_idx_shift
-%if ARCH_X86_64 && %1 > 4
- mova m8, [bilin_filter+y_offsetq]
-%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
- mova m9, [bilin_filter+y_offsetq+16]
-%endif
- mova m10, [GLOBAL(pw_8)]
-%define filter_y_a m8
-%define filter_y_b m9
-%define filter_rnd m10
-%else ; x86-32 or mmx
-%if ARCH_X86=1 && CONFIG_PIC=1
-; x_offset == 0, reuse x_offset reg
-%define tempq x_offsetq
- add y_offsetq, g_bilin_filterm
-%define filter_y_a [y_offsetq]
-%define filter_y_b [y_offsetq+16]
- mov tempq, g_pw_8m
-%define filter_rnd [tempq]
-%else
- add y_offsetq, bilin_filter
-%define filter_y_a [y_offsetq]
-%define filter_y_b [y_offsetq+16]
-%define filter_rnd [GLOBAL(pw_8)]
-%endif
-%endif
-
-.x_zero_y_other_loop:
-%if %1 == 16
- movu m0, [srcq]
- movu m4, [srcq+src_strideq]
- mova m1, [dstq]
-%if cpuflag(ssse3)
- punpckhbw m2, m0, m4
- punpcklbw m0, m4
- pmaddubsw m2, filter_y_a
- pmaddubsw m0, filter_y_a
- paddw m2, filter_rnd
- paddw m0, filter_rnd
-%else
- punpckhbw m2, m0, m5
- punpckhbw m3, m4, m5
- punpcklbw m0, m5
- punpcklbw m4, m5
- ; FIXME(rbultje) instead of out=((num-x)*in1+x*in2+rnd)>>log2(num), we can
- ; also do out=in1+(((num-x)*(in2-in1)+rnd)>>log2(num)). Total number of
- ; instructions is the same (5), but it is 1 mul instead of 2, so might be
- ; slightly faster because of pmullw latency. It would also cut our rodata
- ; tables in half for this function, and save 1-2 registers on x86-64.
- pmullw m2, filter_y_a
- pmullw m3, filter_y_b
- paddw m2, filter_rnd
- pmullw m0, filter_y_a
- pmullw m4, filter_y_b
- paddw m0, filter_rnd
- paddw m2, m3
- paddw m0, m4
-%endif
- psraw m2, 4
- psraw m0, 4
-%if %2 == 1 ; avg
- ; FIXME(rbultje) pipeline
- packuswb m0, m2
- pavgb m0, [secq]
- punpckhbw m2, m0, m5
- punpcklbw m0, m5
-%endif
- punpckhbw m3, m1, m5
- punpcklbw m1, m5
- SUM_SSE m0, m1, m2, m3, m6, m7
-
- add srcq, src_strideq
- add dstq, dst_strideq
-%else ; %1 < 16
- movx m0, [srcq]
- movx m2, [srcq+src_strideq]
- movx m4, [srcq+src_strideq*2]
- movx m3, [dstq+dst_strideq]
-%if cpuflag(ssse3)
- movx m1, [dstq]
- punpcklbw m0, m2
- punpcklbw m2, m4
- pmaddubsw m0, filter_y_a
- pmaddubsw m2, filter_y_a
- punpcklbw m3, m5
- paddw m2, filter_rnd
- paddw m0, filter_rnd
-%else
- punpcklbw m0, m5
- punpcklbw m2, m5
- punpcklbw m4, m5
- pmullw m0, filter_y_a
- pmullw m1, m2, filter_y_b
- punpcklbw m3, m5
- paddw m0, filter_rnd
- pmullw m2, filter_y_a
- pmullw m4, filter_y_b
- paddw m0, m1
- paddw m2, filter_rnd
- movx m1, [dstq]
- paddw m2, m4
-%endif
- psraw m0, 4
- psraw m2, 4
-%if %2 == 1 ; avg
- ; FIXME(rbultje) pipeline
-%if %1 == 4
- movlhps m0, m2
-%endif
- packuswb m0, m2
-%if %1 > 4
- pavgb m0, [secq]
- punpckhbw m2, m0, m5
- punpcklbw m0, m5
-%else ; 4xh
- movh m2, [secq]
- pavgb m0, m2
- punpcklbw m0, m5
- movhlps m2, m0
-%endif
-%endif
- punpcklbw m1, m5
- SUM_SSE m0, m1, m2, m3, m6, m7
-
- lea srcq, [srcq+src_strideq*2]
- lea dstq, [dstq+dst_strideq*2]
-%endif
-%if %2 == 1 ; avg
- add secq, sec_str
-%endif
- dec block_height
- jg .x_zero_y_other_loop
-%undef filter_y_a
-%undef filter_y_b
-%undef filter_rnd
- STORE_AND_RET %1
-
-.x_nonzero:
- cmp x_offsetd, 4
- jne .x_nonhalf
- ; x_offset == 0.5
- test y_offsetd, y_offsetd
- jnz .x_half_y_nonzero
-
- ; x_offset == 0.5 && y_offset == 0
-.x_half_y_zero_loop:
-%if %1 == 16
- movu m0, [srcq]
- movu m4, [srcq+1]
- mova m1, [dstq]
- pavgb m0, m4
- punpckhbw m3, m1, m5
-%if %2 == 1 ; avg
- pavgb m0, [secq]
-%endif
- punpcklbw m1, m5
- punpckhbw m2, m0, m5
- punpcklbw m0, m5
- SUM_SSE m0, m1, m2, m3, m6, m7
-
- add srcq, src_strideq
- add dstq, dst_strideq
-%else ; %1 < 16
- movx m0, [srcq]
- movx m4, [srcq+1]
-%if %2 == 1 ; avg
-%if %1 > 4
- movhps m0, [srcq+src_strideq]
- movhps m4, [srcq+src_strideq+1]
-%else ; 4xh
- movx m1, [srcq+src_strideq]
- punpckldq m0, m1
- movx m2, [srcq+src_strideq+1]
- punpckldq m4, m2
-%endif
- movx m1, [dstq]
- movx m3, [dstq+dst_strideq]
- pavgb m0, m4
- punpcklbw m3, m5
-%if %1 > 4
- pavgb m0, [secq]
- punpcklbw m1, m5
- punpckhbw m2, m0, m5
- punpcklbw m0, m5
-%else ; 4xh
- movh m2, [secq]
- pavgb m0, m2
- punpcklbw m1, m5
- punpcklbw m0, m5
- movhlps m2, m0
-%endif
-%else ; !avg
- movx m2, [srcq+src_strideq]
- movx m1, [dstq]
- pavgb m0, m4
- movx m4, [srcq+src_strideq+1]
- movx m3, [dstq+dst_strideq]
- pavgb m2, m4
- punpcklbw m0, m5
- punpcklbw m2, m5
- punpcklbw m3, m5
- punpcklbw m1, m5
-%endif
- SUM_SSE m0, m1, m2, m3, m6, m7
-
- lea srcq, [srcq+src_strideq*2]
- lea dstq, [dstq+dst_strideq*2]
-%endif
-%if %2 == 1 ; avg
- add secq, sec_str
-%endif
- dec block_height
- jg .x_half_y_zero_loop
- STORE_AND_RET %1
-
-.x_half_y_nonzero:
- cmp y_offsetd, 4
- jne .x_half_y_nonhalf
-
- ; x_offset == 0.5 && y_offset == 0.5
-%if %1 == 16
- movu m0, [srcq]
- movu m3, [srcq+1]
- add srcq, src_strideq
- pavgb m0, m3
-.x_half_y_half_loop:
- movu m4, [srcq]
- movu m3, [srcq+1]
- mova m1, [dstq]
- pavgb m4, m3
- punpckhbw m3, m1, m5
- pavgb m0, m4
-%if %2 == 1 ; avg
- punpcklbw m1, m5
- pavgb m0, [secq]
- punpckhbw m2, m0, m5
- punpcklbw m0, m5
-%else
- punpckhbw m2, m0, m5
- punpcklbw m0, m5
- punpcklbw m1, m5
-%endif
- SUM_SSE m0, m1, m2, m3, m6, m7
- mova m0, m4
-
- add srcq, src_strideq
- add dstq, dst_strideq
-%else ; %1 < 16
- movx m0, [srcq]
- movx m3, [srcq+1]
- add srcq, src_strideq
- pavgb m0, m3
-.x_half_y_half_loop:
- movx m2, [srcq]
- movx m3, [srcq+1]
-%if %2 == 1 ; avg
-%if %1 > 4
- movhps m2, [srcq+src_strideq]
- movhps m3, [srcq+src_strideq+1]
-%else
- movx m1, [srcq+src_strideq]
- punpckldq m2, m1
- movx m1, [srcq+src_strideq+1]
- punpckldq m3, m1
-%endif
- pavgb m2, m3
-%if %1 > 4
- movlhps m0, m2
- movhlps m4, m2
-%else ; 4xh
- punpckldq m0, m2
- pshuflw m4, m2, 0xe
-%endif
- movx m1, [dstq]
- pavgb m0, m2
- movx m3, [dstq+dst_strideq]
-%if %1 > 4
- pavgb m0, [secq]
-%else
- movh m2, [secq]
- pavgb m0, m2
-%endif
- punpcklbw m3, m5
- punpcklbw m1, m5
-%if %1 > 4
- punpckhbw m2, m0, m5
- punpcklbw m0, m5
-%else
- punpcklbw m0, m5
- movhlps m2, m0
-%endif
-%else ; !avg
- movx m4, [srcq+src_strideq]
- movx m1, [srcq+src_strideq+1]
- pavgb m2, m3
- pavgb m4, m1
- pavgb m0, m2
- pavgb m2, m4
- movx m1, [dstq]
- movx m3, [dstq+dst_strideq]
- punpcklbw m0, m5
- punpcklbw m2, m5
- punpcklbw m3, m5
- punpcklbw m1, m5
-%endif
- SUM_SSE m0, m1, m2, m3, m6, m7
- mova m0, m4
-
- lea srcq, [srcq+src_strideq*2]
- lea dstq, [dstq+dst_strideq*2]
-%endif
-%if %2 == 1 ; avg
- add secq, sec_str
-%endif
- dec block_height
- jg .x_half_y_half_loop
- STORE_AND_RET %1
-
-.x_half_y_nonhalf:
- ; x_offset == 0.5 && y_offset == bilin interpolation
-%if ARCH_X86_64
- lea bilin_filter, [GLOBAL(bilin_filter_m)]
-%endif
- shl y_offsetd, filter_idx_shift
-%if ARCH_X86_64 && %1 > 4
- mova m8, [bilin_filter+y_offsetq]
-%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
- mova m9, [bilin_filter+y_offsetq+16]
-%endif
- mova m10, [GLOBAL(pw_8)]
-%define filter_y_a m8
-%define filter_y_b m9
-%define filter_rnd m10
-%else ;x86_32
-%if ARCH_X86=1 && CONFIG_PIC=1
-; x_offset == 0.5. We can reuse x_offset reg
-%define tempq x_offsetq
- add y_offsetq, g_bilin_filterm
-%define filter_y_a [y_offsetq]
-%define filter_y_b [y_offsetq+16]
- mov tempq, g_pw_8m
-%define filter_rnd [tempq]
-%else
- add y_offsetq, bilin_filter
-%define filter_y_a [y_offsetq]
-%define filter_y_b [y_offsetq+16]
-%define filter_rnd [GLOBAL(pw_8)]
-%endif
-%endif
-
-%if %1 == 16
- movu m0, [srcq]
- movu m3, [srcq+1]
- add srcq, src_strideq
- pavgb m0, m3
-.x_half_y_other_loop:
- movu m4, [srcq]
- movu m2, [srcq+1]
- mova m1, [dstq]
- pavgb m4, m2
-%if cpuflag(ssse3)
- punpckhbw m2, m0, m4
- punpcklbw m0, m4
- pmaddubsw m2, filter_y_a
- pmaddubsw m0, filter_y_a
- paddw m2, filter_rnd
- paddw m0, filter_rnd
- psraw m2, 4
-%else
- punpckhbw m2, m0, m5
- punpckhbw m3, m4, m5
- pmullw m2, filter_y_a
- pmullw m3, filter_y_b
- paddw m2, filter_rnd
- punpcklbw m0, m5
- paddw m2, m3
- punpcklbw m3, m4, m5
- pmullw m0, filter_y_a
- pmullw m3, filter_y_b
- paddw m0, filter_rnd
- psraw m2, 4
- paddw m0, m3
-%endif
- punpckhbw m3, m1, m5
- psraw m0, 4
-%if %2 == 1 ; avg
- ; FIXME(rbultje) pipeline
- packuswb m0, m2
- pavgb m0, [secq]
- punpckhbw m2, m0, m5
- punpcklbw m0, m5
-%endif
- punpcklbw m1, m5
- SUM_SSE m0, m1, m2, m3, m6, m7
- mova m0, m4
-
- add srcq, src_strideq
- add dstq, dst_strideq
-%else ; %1 < 16
- movx m0, [srcq]
- movx m3, [srcq+1]
- add srcq, src_strideq
- pavgb m0, m3
-%if notcpuflag(ssse3)
- punpcklbw m0, m5
-%endif
-.x_half_y_other_loop:
- movx m2, [srcq]
- movx m1, [srcq+1]
- movx m4, [srcq+src_strideq]
- movx m3, [srcq+src_strideq+1]
- pavgb m2, m1
- pavgb m4, m3
- movx m3, [dstq+dst_strideq]
-%if cpuflag(ssse3)
- movx m1, [dstq]
- punpcklbw m0, m2
- punpcklbw m2, m4
- pmaddubsw m0, filter_y_a
- pmaddubsw m2, filter_y_a
- punpcklbw m3, m5
- paddw m0, filter_rnd
- paddw m2, filter_rnd
-%else
- punpcklbw m2, m5
- punpcklbw m4, m5
- pmullw m0, filter_y_a
- pmullw m1, m2, filter_y_b
- punpcklbw m3, m5
- paddw m0, filter_rnd
- pmullw m2, filter_y_a
- paddw m0, m1
- pmullw m1, m4, filter_y_b
- paddw m2, filter_rnd
- paddw m2, m1
- movx m1, [dstq]
-%endif
- psraw m0, 4
- psraw m2, 4
-%if %2 == 1 ; avg
- ; FIXME(rbultje) pipeline
-%if %1 == 4
- movlhps m0, m2
-%endif
- packuswb m0, m2
-%if %1 > 4
- pavgb m0, [secq]
- punpckhbw m2, m0, m5
- punpcklbw m0, m5
-%else
- movh m2, [secq]
- pavgb m0, m2
- punpcklbw m0, m5
- movhlps m2, m0
-%endif
-%endif
- punpcklbw m1, m5
- SUM_SSE m0, m1, m2, m3, m6, m7
- mova m0, m4
-
- lea srcq, [srcq+src_strideq*2]
- lea dstq, [dstq+dst_strideq*2]
-%endif
-%if %2 == 1 ; avg
- add secq, sec_str
-%endif
- dec block_height
- jg .x_half_y_other_loop
-%undef filter_y_a
-%undef filter_y_b
-%undef filter_rnd
- STORE_AND_RET %1
-
-.x_nonhalf:
- test y_offsetd, y_offsetd
- jnz .x_nonhalf_y_nonzero
-
- ; x_offset == bilin interpolation && y_offset == 0
-%if ARCH_X86_64
- lea bilin_filter, [GLOBAL(bilin_filter_m)]
-%endif
- shl x_offsetd, filter_idx_shift
-%if ARCH_X86_64 && %1 > 4
- mova m8, [bilin_filter+x_offsetq]
-%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
- mova m9, [bilin_filter+x_offsetq+16]
-%endif
- mova m10, [GLOBAL(pw_8)]
-%define filter_x_a m8
-%define filter_x_b m9
-%define filter_rnd m10
-%else ; x86-32
-%if ARCH_X86=1 && CONFIG_PIC=1
-;y_offset == 0. We can reuse y_offset reg.
-%define tempq y_offsetq
- add x_offsetq, g_bilin_filterm
-%define filter_x_a [x_offsetq]
-%define filter_x_b [x_offsetq+16]
- mov tempq, g_pw_8m
-%define filter_rnd [tempq]
-%else
- add x_offsetq, bilin_filter
-%define filter_x_a [x_offsetq]
-%define filter_x_b [x_offsetq+16]
-%define filter_rnd [GLOBAL(pw_8)]
-%endif
-%endif
-
-.x_other_y_zero_loop:
-%if %1 == 16
- movu m0, [srcq]
- movu m4, [srcq+1]
- mova m1, [dstq]
-%if cpuflag(ssse3)
- punpckhbw m2, m0, m4
- punpcklbw m0, m4
- pmaddubsw m2, filter_x_a
- pmaddubsw m0, filter_x_a
- paddw m2, filter_rnd
- paddw m0, filter_rnd
-%else
- punpckhbw m2, m0, m5
- punpckhbw m3, m4, m5
- punpcklbw m0, m5
- punpcklbw m4, m5
- pmullw m2, filter_x_a
- pmullw m3, filter_x_b
- paddw m2, filter_rnd
- pmullw m0, filter_x_a
- pmullw m4, filter_x_b
- paddw m0, filter_rnd
- paddw m2, m3
- paddw m0, m4
-%endif
- psraw m2, 4
- psraw m0, 4
-%if %2 == 1 ; avg
- ; FIXME(rbultje) pipeline
- packuswb m0, m2
- pavgb m0, [secq]
- punpckhbw m2, m0, m5
- punpcklbw m0, m5
-%endif
- punpckhbw m3, m1, m5
- punpcklbw m1, m5
- SUM_SSE m0, m1, m2, m3, m6, m7
-
- add srcq, src_strideq
- add dstq, dst_strideq
-%else ; %1 < 16
- movx m0, [srcq]
- movx m1, [srcq+1]
- movx m2, [srcq+src_strideq]
- movx m4, [srcq+src_strideq+1]
- movx m3, [dstq+dst_strideq]
-%if cpuflag(ssse3)
- punpcklbw m0, m1
- movx m1, [dstq]
- punpcklbw m2, m4
- pmaddubsw m0, filter_x_a
- pmaddubsw m2, filter_x_a
- punpcklbw m3, m5
- paddw m0, filter_rnd
- paddw m2, filter_rnd
-%else
- punpcklbw m0, m5
- punpcklbw m1, m5
- punpcklbw m2, m5
- punpcklbw m4, m5
- pmullw m0, filter_x_a
- pmullw m1, filter_x_b
- punpcklbw m3, m5
- paddw m0, filter_rnd
- pmullw m2, filter_x_a
- pmullw m4, filter_x_b
- paddw m0, m1
- paddw m2, filter_rnd
- movx m1, [dstq]
- paddw m2, m4
-%endif
- psraw m0, 4
- psraw m2, 4
-%if %2 == 1 ; avg
- ; FIXME(rbultje) pipeline
-%if %1 == 4
- movlhps m0, m2
-%endif
- packuswb m0, m2
-%if %1 > 4
- pavgb m0, [secq]
- punpckhbw m2, m0, m5
- punpcklbw m0, m5
-%else
- movh m2, [secq]
- pavgb m0, m2
- punpcklbw m0, m5
- movhlps m2, m0
-%endif
-%endif
- punpcklbw m1, m5
- SUM_SSE m0, m1, m2, m3, m6, m7
-
- lea srcq, [srcq+src_strideq*2]
- lea dstq, [dstq+dst_strideq*2]
-%endif
-%if %2 == 1 ; avg
- add secq, sec_str
-%endif
- dec block_height
- jg .x_other_y_zero_loop
-%undef filter_x_a
-%undef filter_x_b
-%undef filter_rnd
- STORE_AND_RET %1
-
-.x_nonhalf_y_nonzero:
- cmp y_offsetd, 4
- jne .x_nonhalf_y_nonhalf
-
- ; x_offset == bilin interpolation && y_offset == 0.5
-%if ARCH_X86_64
- lea bilin_filter, [GLOBAL(bilin_filter_m)]
-%endif
- shl x_offsetd, filter_idx_shift
-%if ARCH_X86_64 && %1 > 4
- mova m8, [bilin_filter+x_offsetq]
-%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
- mova m9, [bilin_filter+x_offsetq+16]
-%endif
- mova m10, [GLOBAL(pw_8)]
-%define filter_x_a m8
-%define filter_x_b m9
-%define filter_rnd m10
-%else ; x86-32
-%if ARCH_X86=1 && CONFIG_PIC=1
-; y_offset == 0.5. We can reuse y_offset reg.
-%define tempq y_offsetq
- add x_offsetq, g_bilin_filterm
-%define filter_x_a [x_offsetq]
-%define filter_x_b [x_offsetq+16]
- mov tempq, g_pw_8m
-%define filter_rnd [tempq]
-%else
- add x_offsetq, bilin_filter
-%define filter_x_a [x_offsetq]
-%define filter_x_b [x_offsetq+16]
-%define filter_rnd [GLOBAL(pw_8)]
-%endif
-%endif
-
-%if %1 == 16
- movu m0, [srcq]
- movu m1, [srcq+1]
-%if cpuflag(ssse3)
- punpckhbw m2, m0, m1
- punpcklbw m0, m1
- pmaddubsw m2, filter_x_a
- pmaddubsw m0, filter_x_a
- paddw m2, filter_rnd
- paddw m0, filter_rnd
-%else
- punpckhbw m2, m0, m5
- punpckhbw m3, m1, m5
- punpcklbw m0, m5
- punpcklbw m1, m5
- pmullw m0, filter_x_a
- pmullw m1, filter_x_b
- paddw m0, filter_rnd
- pmullw m2, filter_x_a
- pmullw m3, filter_x_b
- paddw m2, filter_rnd
- paddw m0, m1
- paddw m2, m3
-%endif
- psraw m0, 4
- psraw m2, 4
- add srcq, src_strideq
- packuswb m0, m2
-.x_other_y_half_loop:
- movu m4, [srcq]
- movu m3, [srcq+1]
-%if cpuflag(ssse3)
- mova m1, [dstq]
- punpckhbw m2, m4, m3
- punpcklbw m4, m3
- pmaddubsw m2, filter_x_a
- pmaddubsw m4, filter_x_a
- paddw m2, filter_rnd
- paddw m4, filter_rnd
- psraw m2, 4
- psraw m4, 4
- packuswb m4, m2
- pavgb m0, m4
- punpckhbw m3, m1, m5
- punpcklbw m1, m5
-%else
- punpckhbw m2, m4, m5
- punpckhbw m1, m3, m5
- punpcklbw m4, m5
- punpcklbw m3, m5
- pmullw m4, filter_x_a
- pmullw m3, filter_x_b
- paddw m4, filter_rnd
- pmullw m2, filter_x_a
- pmullw m1, filter_x_b
- paddw m2, filter_rnd
- paddw m4, m3
- paddw m2, m1
- mova m1, [dstq]
- psraw m4, 4
- psraw m2, 4
- punpckhbw m3, m1, m5
- ; FIXME(rbultje) the repeated pack/unpack here around m0/m2 is because we
- ; have a 1-register shortage to be able to store the backup of the bilin
- ; filtered second line as words as cache for the next line. Packing into
- ; a byte costs 1 pack and 2 unpacks, but saves a register.
- packuswb m4, m2
- punpcklbw m1, m5
- pavgb m0, m4
-%endif
-%if %2 == 1 ; avg
- ; FIXME(rbultje) pipeline
- pavgb m0, [secq]
-%endif
- punpckhbw m2, m0, m5
- punpcklbw m0, m5
- SUM_SSE m0, m1, m2, m3, m6, m7
- mova m0, m4
-
- add srcq, src_strideq
- add dstq, dst_strideq
-%else ; %1 < 16
- movx m0, [srcq]
- movx m1, [srcq+1]
-%if cpuflag(ssse3)
- punpcklbw m0, m1
- pmaddubsw m0, filter_x_a
- paddw m0, filter_rnd
-%else
- punpcklbw m0, m5
- punpcklbw m1, m5
- pmullw m0, filter_x_a
- pmullw m1, filter_x_b
- paddw m0, filter_rnd
- paddw m0, m1
-%endif
- add srcq, src_strideq
- psraw m0, 4
-.x_other_y_half_loop:
- movx m2, [srcq]
- movx m1, [srcq+1]
- movx m4, [srcq+src_strideq]
- movx m3, [srcq+src_strideq+1]
-%if cpuflag(ssse3)
- punpcklbw m2, m1
- punpcklbw m4, m3
- pmaddubsw m2, filter_x_a
- pmaddubsw m4, filter_x_a
- movx m1, [dstq]
- movx m3, [dstq+dst_strideq]
- paddw m2, filter_rnd
- paddw m4, filter_rnd
-%else
- punpcklbw m2, m5
- punpcklbw m1, m5
- punpcklbw m4, m5
- punpcklbw m3, m5
- pmullw m2, filter_x_a
- pmullw m1, filter_x_b
- paddw m2, filter_rnd
- pmullw m4, filter_x_a
- pmullw m3, filter_x_b
- paddw m4, filter_rnd
- paddw m2, m1
- movx m1, [dstq]
- paddw m4, m3
- movx m3, [dstq+dst_strideq]
-%endif
- psraw m2, 4
- psraw m4, 4
- pavgw m0, m2
- pavgw m2, m4
-%if %2 == 1 ; avg
- ; FIXME(rbultje) pipeline - also consider going to bytes here
-%if %1 == 4
- movlhps m0, m2
-%endif
- packuswb m0, m2
-%if %1 > 4
- pavgb m0, [secq]
- punpckhbw m2, m0, m5
- punpcklbw m0, m5
-%else
- movh m2, [secq]
- pavgb m0, m2
- punpcklbw m0, m5
- movhlps m2, m0
-%endif
-%endif
- punpcklbw m3, m5
- punpcklbw m1, m5
- SUM_SSE m0, m1, m2, m3, m6, m7
- mova m0, m4
-
- lea srcq, [srcq+src_strideq*2]
- lea dstq, [dstq+dst_strideq*2]
-%endif
-%if %2 == 1 ; avg
- add secq, sec_str
-%endif
- dec block_height
- jg .x_other_y_half_loop
-%undef filter_x_a
-%undef filter_x_b
-%undef filter_rnd
- STORE_AND_RET %1
-
-.x_nonhalf_y_nonhalf:
-%if ARCH_X86_64
- lea bilin_filter, [GLOBAL(bilin_filter_m)]
-%endif
- shl x_offsetd, filter_idx_shift
- shl y_offsetd, filter_idx_shift
-%if ARCH_X86_64 && %1 > 4
- mova m8, [bilin_filter+x_offsetq]
-%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
- mova m9, [bilin_filter+x_offsetq+16]
-%endif
- mova m10, [bilin_filter+y_offsetq]
-%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
- mova m11, [bilin_filter+y_offsetq+16]
-%endif
- mova m12, [GLOBAL(pw_8)]
-%define filter_x_a m8
-%define filter_x_b m9
-%define filter_y_a m10
-%define filter_y_b m11
-%define filter_rnd m12
-%else ; x86-32
-%if ARCH_X86=1 && CONFIG_PIC=1
-; In this case, there is NO unused register. Used src_stride register. Later,
-; src_stride has to be loaded from stack when it is needed.
-%define tempq src_strideq
- mov tempq, g_bilin_filterm
- add x_offsetq, tempq
- add y_offsetq, tempq
-%define filter_x_a [x_offsetq]
-%define filter_x_b [x_offsetq+16]
-%define filter_y_a [y_offsetq]
-%define filter_y_b [y_offsetq+16]
-
- mov tempq, g_pw_8m
-%define filter_rnd [tempq]
-%else
- add x_offsetq, bilin_filter
- add y_offsetq, bilin_filter
-%define filter_x_a [x_offsetq]
-%define filter_x_b [x_offsetq+16]
-%define filter_y_a [y_offsetq]
-%define filter_y_b [y_offsetq+16]
-%define filter_rnd [GLOBAL(pw_8)]
-%endif
-%endif
-
- ; x_offset == bilin interpolation && y_offset == bilin interpolation
-%if %1 == 16
- movu m0, [srcq]
- movu m1, [srcq+1]
-%if cpuflag(ssse3)
- punpckhbw m2, m0, m1
- punpcklbw m0, m1
- pmaddubsw m2, filter_x_a
- pmaddubsw m0, filter_x_a
- paddw m2, filter_rnd
- paddw m0, filter_rnd
-%else
- punpckhbw m2, m0, m5
- punpckhbw m3, m1, m5
- punpcklbw m0, m5
- punpcklbw m1, m5
- pmullw m0, filter_x_a
- pmullw m1, filter_x_b
- paddw m0, filter_rnd
- pmullw m2, filter_x_a
- pmullw m3, filter_x_b
- paddw m2, filter_rnd
- paddw m0, m1
- paddw m2, m3
-%endif
- psraw m0, 4
- psraw m2, 4
-
- INC_SRC_BY_SRC_STRIDE
-
- packuswb m0, m2
-.x_other_y_other_loop:
-%if cpuflag(ssse3)
- movu m4, [srcq]
- movu m3, [srcq+1]
- mova m1, [dstq]
- punpckhbw m2, m4, m3
- punpcklbw m4, m3
- pmaddubsw m2, filter_x_a
- pmaddubsw m4, filter_x_a
- punpckhbw m3, m1, m5
- paddw m2, filter_rnd
- paddw m4, filter_rnd
- psraw m2, 4
- psraw m4, 4
- packuswb m4, m2
- punpckhbw m2, m0, m4
- punpcklbw m0, m4
- pmaddubsw m2, filter_y_a
- pmaddubsw m0, filter_y_a
- punpcklbw m1, m5
- paddw m2, filter_rnd
- paddw m0, filter_rnd
- psraw m2, 4
- psraw m0, 4
-%else
- movu m3, [srcq]
- movu m4, [srcq+1]
- punpckhbw m1, m3, m5
- punpckhbw m2, m4, m5
- punpcklbw m3, m5
- punpcklbw m4, m5
- pmullw m3, filter_x_a
- pmullw m4, filter_x_b
- paddw m3, filter_rnd
- pmullw m1, filter_x_a
- pmullw m2, filter_x_b
- paddw m1, filter_rnd
- paddw m3, m4
- paddw m1, m2
- psraw m3, 4
- psraw m1, 4
- packuswb m4, m3, m1
- punpckhbw m2, m0, m5
- punpcklbw m0, m5
- pmullw m2, filter_y_a
- pmullw m1, filter_y_b
- paddw m2, filter_rnd
- pmullw m0, filter_y_a
- pmullw m3, filter_y_b
- paddw m2, m1
- mova m1, [dstq]
- paddw m0, filter_rnd
- psraw m2, 4
- paddw m0, m3
- punpckhbw m3, m1, m5
- psraw m0, 4
- punpcklbw m1, m5
-%endif
-%if %2 == 1 ; avg
- ; FIXME(rbultje) pipeline
- packuswb m0, m2
- pavgb m0, [secq]
- punpckhbw m2, m0, m5
- punpcklbw m0, m5
-%endif
- SUM_SSE m0, m1, m2, m3, m6, m7
- mova m0, m4
-
- INC_SRC_BY_SRC_STRIDE
- add dstq, dst_strideq
-%else ; %1 < 16
- movx m0, [srcq]
- movx m1, [srcq+1]
-%if cpuflag(ssse3)
- punpcklbw m0, m1
- pmaddubsw m0, filter_x_a
- paddw m0, filter_rnd
-%else
- punpcklbw m0, m5
- punpcklbw m1, m5
- pmullw m0, filter_x_a
- pmullw m1, filter_x_b
- paddw m0, filter_rnd
- paddw m0, m1
-%endif
- psraw m0, 4
-%if cpuflag(ssse3)
- packuswb m0, m0
-%endif
-
- INC_SRC_BY_SRC_STRIDE
-
-.x_other_y_other_loop:
- movx m2, [srcq]
- movx m1, [srcq+1]
-
- INC_SRC_BY_SRC_STRIDE
- movx m4, [srcq]
- movx m3, [srcq+1]
-
-%if cpuflag(ssse3)
- punpcklbw m2, m1
- punpcklbw m4, m3
- pmaddubsw m2, filter_x_a
- pmaddubsw m4, filter_x_a
- movx m3, [dstq+dst_strideq]
- movx m1, [dstq]
- paddw m2, filter_rnd
- paddw m4, filter_rnd
- psraw m2, 4
- psraw m4, 4
- packuswb m2, m2
- packuswb m4, m4
- punpcklbw m0, m2
- punpcklbw m2, m4
- pmaddubsw m0, filter_y_a
- pmaddubsw m2, filter_y_a
- punpcklbw m3, m5
- paddw m0, filter_rnd
- paddw m2, filter_rnd
- psraw m0, 4
- psraw m2, 4
- punpcklbw m1, m5
-%else
- punpcklbw m2, m5
- punpcklbw m1, m5
- punpcklbw m4, m5
- punpcklbw m3, m5
- pmullw m2, filter_x_a
- pmullw m1, filter_x_b
- paddw m2, filter_rnd
- pmullw m4, filter_x_a
- pmullw m3, filter_x_b
- paddw m4, filter_rnd
- paddw m2, m1
- paddw m4, m3
- psraw m2, 4
- psraw m4, 4
- pmullw m0, filter_y_a
- pmullw m3, m2, filter_y_b
- paddw m0, filter_rnd
- pmullw m2, filter_y_a
- pmullw m1, m4, filter_y_b
- paddw m2, filter_rnd
- paddw m0, m3
- movx m3, [dstq+dst_strideq]
- paddw m2, m1
- movx m1, [dstq]
- psraw m0, 4
- psraw m2, 4
- punpcklbw m3, m5
- punpcklbw m1, m5
-%endif
-%if %2 == 1 ; avg
- ; FIXME(rbultje) pipeline
-%if %1 == 4
- movlhps m0, m2
-%endif
- packuswb m0, m2
-%if %1 > 4
- pavgb m0, [secq]
- punpckhbw m2, m0, m5
- punpcklbw m0, m5
-%else
- movh m2, [secq]
- pavgb m0, m2
- punpcklbw m0, m5
- movhlps m2, m0
-%endif
-%endif
- SUM_SSE m0, m1, m2, m3, m6, m7
- mova m0, m4
-
- INC_SRC_BY_SRC_STRIDE
- lea dstq, [dstq+dst_strideq*2]
-%endif
-%if %2 == 1 ; avg
- add secq, sec_str
-%endif
- dec block_height
- jg .x_other_y_other_loop
-%undef filter_x_a
-%undef filter_x_b
-%undef filter_y_a
-%undef filter_y_b
-%undef filter_rnd
-%undef movx
- STORE_AND_RET %1
-%endmacro
-
-; FIXME(rbultje) the non-bilinear versions (i.e. x=0,8&&y=0,8) are identical
-; between the ssse3 and non-ssse3 version. It may make sense to merge their
-; code in the sense that the ssse3 version would jump to the appropriate
-; location in the sse/2 version, rather than duplicating that code in the
-; binary.
-
-INIT_XMM sse2
-SUBPEL_VARIANCE 4
-SUBPEL_VARIANCE 8
-SUBPEL_VARIANCE 16
-
-INIT_XMM ssse3
-SUBPEL_VARIANCE 4
-SUBPEL_VARIANCE 8
-SUBPEL_VARIANCE 16
-
-INIT_XMM sse2
-SUBPEL_VARIANCE 4, 1
-SUBPEL_VARIANCE 8, 1
-SUBPEL_VARIANCE 16, 1
-
-INIT_XMM ssse3
-SUBPEL_VARIANCE 4, 1
-SUBPEL_VARIANCE 8, 1
-SUBPEL_VARIANCE 16, 1
diff --git a/third_party/aom/aom_dsp/x86/subtract_avx2.c b/third_party/aom/aom_dsp/x86/subtract_avx2.c
deleted file mode 100644
index 4389d123d..000000000
--- a/third_party/aom/aom_dsp/x86/subtract_avx2.c
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#include <immintrin.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-static INLINE void subtract32_avx2(int16_t *diff_ptr, const uint8_t *src_ptr,
- const uint8_t *pred_ptr) {
- __m256i s = _mm256_lddqu_si256((__m256i *)(src_ptr));
- __m256i p = _mm256_lddqu_si256((__m256i *)(pred_ptr));
- __m256i s_0 = _mm256_cvtepu8_epi16(_mm256_castsi256_si128(s));
- __m256i s_1 = _mm256_cvtepu8_epi16(_mm256_extracti128_si256(s, 1));
- __m256i p_0 = _mm256_cvtepu8_epi16(_mm256_castsi256_si128(p));
- __m256i p_1 = _mm256_cvtepu8_epi16(_mm256_extracti128_si256(p, 1));
- const __m256i d_0 = _mm256_sub_epi16(s_0, p_0);
- const __m256i d_1 = _mm256_sub_epi16(s_1, p_1);
- _mm256_store_si256((__m256i *)(diff_ptr), d_0);
- _mm256_store_si256((__m256i *)(diff_ptr + 16), d_1);
-}
-
-static INLINE void aom_subtract_block_16xn_avx2(
- int rows, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr,
- ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride) {
- for (int32_t j = 0; j < rows; ++j) {
- __m128i s = _mm_lddqu_si128((__m128i *)(src_ptr));
- __m128i p = _mm_lddqu_si128((__m128i *)(pred_ptr));
- __m256i s_0 = _mm256_cvtepu8_epi16(s);
- __m256i p_0 = _mm256_cvtepu8_epi16(p);
- const __m256i d_0 = _mm256_sub_epi16(s_0, p_0);
- _mm256_store_si256((__m256i *)(diff_ptr), d_0);
- src_ptr += src_stride;
- pred_ptr += pred_stride;
- diff_ptr += diff_stride;
- }
-}
-
-static INLINE void aom_subtract_block_32xn_avx2(
- int rows, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr,
- ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride) {
- for (int32_t j = 0; j < rows; ++j) {
- subtract32_avx2(diff_ptr, src_ptr, pred_ptr);
- src_ptr += src_stride;
- pred_ptr += pred_stride;
- diff_ptr += diff_stride;
- }
-}
-
-static INLINE void aom_subtract_block_64xn_avx2(
- int rows, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr,
- ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride) {
- for (int32_t j = 0; j < rows; ++j) {
- subtract32_avx2(diff_ptr, src_ptr, pred_ptr);
- subtract32_avx2(diff_ptr + 32, src_ptr + 32, pred_ptr + 32);
- src_ptr += src_stride;
- pred_ptr += pred_stride;
- diff_ptr += diff_stride;
- }
-}
-
-static INLINE void aom_subtract_block_128xn_avx2(
- int rows, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr,
- ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride) {
- for (int32_t j = 0; j < rows; ++j) {
- subtract32_avx2(diff_ptr, src_ptr, pred_ptr);
- subtract32_avx2(diff_ptr + 32, src_ptr + 32, pred_ptr + 32);
- subtract32_avx2(diff_ptr + 64, src_ptr + 64, pred_ptr + 64);
- subtract32_avx2(diff_ptr + 96, src_ptr + 96, pred_ptr + 96);
- src_ptr += src_stride;
- pred_ptr += pred_stride;
- diff_ptr += diff_stride;
- }
-}
-
-void aom_subtract_block_avx2(int rows, int cols, int16_t *diff_ptr,
- ptrdiff_t diff_stride, const uint8_t *src_ptr,
- ptrdiff_t src_stride, const uint8_t *pred_ptr,
- ptrdiff_t pred_stride) {
- switch (cols) {
- case 16:
- aom_subtract_block_16xn_avx2(rows, diff_ptr, diff_stride, src_ptr,
- src_stride, pred_ptr, pred_stride);
- break;
- case 32:
- aom_subtract_block_32xn_avx2(rows, diff_ptr, diff_stride, src_ptr,
- src_stride, pred_ptr, pred_stride);
- break;
- case 64:
- aom_subtract_block_64xn_avx2(rows, diff_ptr, diff_stride, src_ptr,
- src_stride, pred_ptr, pred_stride);
- break;
- case 128:
- aom_subtract_block_128xn_avx2(rows, diff_ptr, diff_stride, src_ptr,
- src_stride, pred_ptr, pred_stride);
- break;
- default:
- aom_subtract_block_sse2(rows, cols, diff_ptr, diff_stride, src_ptr,
- src_stride, pred_ptr, pred_stride);
- break;
- }
-}
diff --git a/third_party/aom/aom_dsp/x86/subtract_sse2.asm b/third_party/aom/aom_dsp/x86/subtract_sse2.asm
deleted file mode 100644
index 1a75a234f..000000000
--- a/third_party/aom/aom_dsp/x86/subtract_sse2.asm
+++ /dev/null
@@ -1,146 +0,0 @@
-;
-; Copyright (c) 2016, Alliance for Open Media. All rights reserved
-;
-; This source code is subject to the terms of the BSD 2 Clause License and
-; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-; was not distributed with this source code in the LICENSE file, you can
-; obtain it at www.aomedia.org/license/software. If the Alliance for Open
-; Media Patent License 1.0 was not distributed with this source code in the
-; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-;
-
-;
-
-%include "third_party/x86inc/x86inc.asm"
-
-SECTION .text
-
-; void aom_subtract_block(int rows, int cols,
-; int16_t *diff, ptrdiff_t diff_stride,
-; const uint8_t *src, ptrdiff_t src_stride,
-; const uint8_t *pred, ptrdiff_t pred_stride)
-
-INIT_XMM sse2
-cglobal subtract_block, 7, 7, 8, \
- rows, cols, diff, diff_stride, src, src_stride, \
- pred, pred_stride
-%define pred_str colsq
- pxor m7, m7 ; dedicated zero register
- cmp colsd, 4
- je .case_4
- cmp colsd, 8
- je .case_8
- cmp colsd, 16
- je .case_16
- cmp colsd, 32
- je .case_32
- cmp colsd, 64
- je .case_64
-
-%macro loop16 6
- mova m0, [srcq+%1]
- mova m4, [srcq+%2]
- mova m1, [predq+%3]
- mova m5, [predq+%4]
- punpckhbw m2, m0, m7
- punpckhbw m3, m1, m7
- punpcklbw m0, m7
- punpcklbw m1, m7
- psubw m2, m3
- psubw m0, m1
- punpckhbw m1, m4, m7
- punpckhbw m3, m5, m7
- punpcklbw m4, m7
- punpcklbw m5, m7
- psubw m1, m3
- psubw m4, m5
- mova [diffq+mmsize*0+%5], m0
- mova [diffq+mmsize*1+%5], m2
- mova [diffq+mmsize*0+%6], m4
- mova [diffq+mmsize*1+%6], m1
-%endmacro
-
- mov pred_str, pred_stridemp
-.loop_128:
- loop16 0*mmsize, 1*mmsize, 0*mmsize, 1*mmsize, 0*mmsize, 2*mmsize
- loop16 2*mmsize, 3*mmsize, 2*mmsize, 3*mmsize, 4*mmsize, 6*mmsize
- loop16 4*mmsize, 5*mmsize, 4*mmsize, 5*mmsize, 8*mmsize, 10*mmsize
- loop16 6*mmsize, 7*mmsize, 6*mmsize, 7*mmsize, 12*mmsize, 14*mmsize
- lea diffq, [diffq+diff_strideq*2]
- add predq, pred_str
- add srcq, src_strideq
- sub rowsd, 1
- jnz .loop_128
- RET
-
-.case_64:
- mov pred_str, pred_stridemp
-.loop_64:
- loop16 0*mmsize, 1*mmsize, 0*mmsize, 1*mmsize, 0*mmsize, 2*mmsize
- loop16 2*mmsize, 3*mmsize, 2*mmsize, 3*mmsize, 4*mmsize, 6*mmsize
- lea diffq, [diffq+diff_strideq*2]
- add predq, pred_str
- add srcq, src_strideq
- dec rowsd
- jg .loop_64
- RET
-
-.case_32:
- mov pred_str, pred_stridemp
-.loop_32:
- loop16 0, mmsize, 0, mmsize, 0, 2*mmsize
- lea diffq, [diffq+diff_strideq*2]
- add predq, pred_str
- add srcq, src_strideq
- dec rowsd
- jg .loop_32
- RET
-
-.case_16:
- mov pred_str, pred_stridemp
-.loop_16:
- loop16 0, src_strideq, 0, pred_str, 0, diff_strideq*2
- lea diffq, [diffq+diff_strideq*4]
- lea predq, [predq+pred_str*2]
- lea srcq, [srcq+src_strideq*2]
- sub rowsd, 2
- jg .loop_16
- RET
-
-%macro loop_h 0
- movh m0, [srcq]
- movh m2, [srcq+src_strideq]
- movh m1, [predq]
- movh m3, [predq+pred_str]
- punpcklbw m0, m7
- punpcklbw m1, m7
- punpcklbw m2, m7
- punpcklbw m3, m7
- psubw m0, m1
- psubw m2, m3
- mova [diffq], m0
- mova [diffq+diff_strideq*2], m2
-%endmacro
-
-.case_8:
- mov pred_str, pred_stridemp
-.loop_8:
- loop_h
- lea diffq, [diffq+diff_strideq*4]
- lea srcq, [srcq+src_strideq*2]
- lea predq, [predq+pred_str*2]
- sub rowsd, 2
- jg .loop_8
- RET
-
-INIT_MMX
-.case_4:
- mov pred_str, pred_stridemp
-.loop_4:
- loop_h
- lea diffq, [diffq+diff_strideq*4]
- lea srcq, [srcq+src_strideq*2]
- lea predq, [predq+pred_str*2]
- sub rowsd, 2
- jg .loop_4
- RET
diff --git a/third_party/aom/aom_dsp/x86/sum_squares_avx2.c b/third_party/aom/aom_dsp/x86/sum_squares_avx2.c
deleted file mode 100644
index 0af44e3a4..000000000
--- a/third_party/aom/aom_dsp/x86/sum_squares_avx2.c
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <immintrin.h>
-#include <smmintrin.h>
-
-#include "aom_dsp/x86/synonyms.h"
-#include "aom_dsp/x86/synonyms_avx2.h"
-#include "aom_dsp/x86/sum_squares_sse2.h"
-#include "config/aom_dsp_rtcd.h"
-
-static uint64_t aom_sum_squares_2d_i16_nxn_avx2(const int16_t *src, int stride,
- int width, int height) {
- uint64_t result;
- __m256i v_acc_q = _mm256_setzero_si256();
- const __m256i v_zext_mask_q = yy_set1_64_from_32i(0xffffffff);
- for (int col = 0; col < height; col += 4) {
- __m256i v_acc_d = _mm256_setzero_si256();
- for (int row = 0; row < width; row += 16) {
- const int16_t *tempsrc = src + row;
- const __m256i v_val_0_w =
- _mm256_loadu_si256((const __m256i *)(tempsrc + 0 * stride));
- const __m256i v_val_1_w =
- _mm256_loadu_si256((const __m256i *)(tempsrc + 1 * stride));
- const __m256i v_val_2_w =
- _mm256_loadu_si256((const __m256i *)(tempsrc + 2 * stride));
- const __m256i v_val_3_w =
- _mm256_loadu_si256((const __m256i *)(tempsrc + 3 * stride));
-
- const __m256i v_sq_0_d = _mm256_madd_epi16(v_val_0_w, v_val_0_w);
- const __m256i v_sq_1_d = _mm256_madd_epi16(v_val_1_w, v_val_1_w);
- const __m256i v_sq_2_d = _mm256_madd_epi16(v_val_2_w, v_val_2_w);
- const __m256i v_sq_3_d = _mm256_madd_epi16(v_val_3_w, v_val_3_w);
-
- const __m256i v_sum_01_d = _mm256_add_epi32(v_sq_0_d, v_sq_1_d);
- const __m256i v_sum_23_d = _mm256_add_epi32(v_sq_2_d, v_sq_3_d);
- const __m256i v_sum_0123_d = _mm256_add_epi32(v_sum_01_d, v_sum_23_d);
-
- v_acc_d = _mm256_add_epi32(v_acc_d, v_sum_0123_d);
- }
- v_acc_q =
- _mm256_add_epi64(v_acc_q, _mm256_and_si256(v_acc_d, v_zext_mask_q));
- v_acc_q = _mm256_add_epi64(v_acc_q, _mm256_srli_epi64(v_acc_d, 32));
- src += 4 * stride;
- }
- __m128i lower_64_2_Value = _mm256_castsi256_si128(v_acc_q);
- __m128i higher_64_2_Value = _mm256_extracti128_si256(v_acc_q, 1);
- __m128i result_64_2_int = _mm_add_epi64(lower_64_2_Value, higher_64_2_Value);
-
- result_64_2_int = _mm_add_epi64(
- result_64_2_int, _mm_unpackhi_epi64(result_64_2_int, result_64_2_int));
-
- xx_storel_64(&result, result_64_2_int);
-
- return result;
-}
-
-uint64_t aom_sum_squares_2d_i16_avx2(const int16_t *src, int stride, int width,
- int height) {
- if (LIKELY(width == 4 && height == 4)) {
- return aom_sum_squares_2d_i16_4x4_sse2(src, stride);
- } else if (LIKELY(width == 4 && (height & 3) == 0)) {
- return aom_sum_squares_2d_i16_4xn_sse2(src, stride, height);
- } else if (LIKELY(width == 8 && (height & 3) == 0)) {
- return aom_sum_squares_2d_i16_nxn_sse2(src, stride, width, height);
- } else if (LIKELY(((width & 15) == 0) && ((height & 3) == 0))) {
- return aom_sum_squares_2d_i16_nxn_avx2(src, stride, width, height);
- } else {
- return aom_sum_squares_2d_i16_c(src, stride, width, height);
- }
-}
diff --git a/third_party/aom/aom_dsp/x86/sum_squares_sse2.c b/third_party/aom/aom_dsp/x86/sum_squares_sse2.c
deleted file mode 100644
index 22d7739ec..000000000
--- a/third_party/aom/aom_dsp/x86/sum_squares_sse2.c
+++ /dev/null
@@ -1,203 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <emmintrin.h>
-#include <stdio.h>
-
-#include "aom_dsp/x86/synonyms.h"
-#include "aom_dsp/x86/sum_squares_sse2.h"
-#include "config/aom_dsp_rtcd.h"
-
-static INLINE __m128i xx_loadh_64(__m128i a, const void *b) {
- const __m128d ad = _mm_castsi128_pd(a);
- return _mm_castpd_si128(_mm_loadh_pd(ad, (double *)b));
-}
-
-static INLINE uint64_t xx_cvtsi128_si64(__m128i a) {
-#if ARCH_X86_64
- return (uint64_t)_mm_cvtsi128_si64(a);
-#else
- {
- uint64_t tmp;
- _mm_storel_epi64((__m128i *)&tmp, a);
- return tmp;
- }
-#endif
-}
-
-static INLINE __m128i sum_squares_i16_4x4_sse2(const int16_t *src, int stride) {
- const __m128i v_val_0_w = xx_loadl_64(src + 0 * stride);
- const __m128i v_val_2_w = xx_loadl_64(src + 2 * stride);
- const __m128i v_val_01_w = xx_loadh_64(v_val_0_w, src + 1 * stride);
- const __m128i v_val_23_w = xx_loadh_64(v_val_2_w, src + 3 * stride);
- const __m128i v_sq_01_d = _mm_madd_epi16(v_val_01_w, v_val_01_w);
- const __m128i v_sq_23_d = _mm_madd_epi16(v_val_23_w, v_val_23_w);
-
- return _mm_add_epi32(v_sq_01_d, v_sq_23_d);
-}
-
-uint64_t aom_sum_squares_2d_i16_4x4_sse2(const int16_t *src, int stride) {
- const __m128i v_sum_0123_d = sum_squares_i16_4x4_sse2(src, stride);
- __m128i v_sum_d =
- _mm_add_epi32(v_sum_0123_d, _mm_srli_epi64(v_sum_0123_d, 32));
- v_sum_d = _mm_add_epi32(v_sum_d, _mm_srli_si128(v_sum_d, 8));
- return (uint64_t)_mm_cvtsi128_si32(v_sum_d);
-}
-
-uint64_t aom_sum_squares_2d_i16_4xn_sse2(const int16_t *src, int stride,
- int height) {
- int r = 0;
- __m128i v_acc_q = _mm_setzero_si128();
- do {
- const __m128i v_acc_d = sum_squares_i16_4x4_sse2(src, stride);
- v_acc_q = _mm_add_epi32(v_acc_q, v_acc_d);
- src += stride << 2;
- r += 4;
- } while (r < height);
- const __m128i v_zext_mask_q = xx_set1_64_from_32i(0xffffffff);
- __m128i v_acc_64 = _mm_add_epi64(_mm_srli_epi64(v_acc_q, 32),
- _mm_and_si128(v_acc_q, v_zext_mask_q));
- v_acc_64 = _mm_add_epi64(v_acc_64, _mm_srli_si128(v_acc_64, 8));
- return xx_cvtsi128_si64(v_acc_64);
-}
-
-#ifdef __GNUC__
-// This prevents GCC/Clang from inlining this function into
-// aom_sum_squares_2d_i16_sse2, which in turn saves some stack
-// maintenance instructions in the common case of 4x4.
-__attribute__((noinline))
-#endif
-uint64_t
-aom_sum_squares_2d_i16_nxn_sse2(const int16_t *src, int stride, int width,
- int height) {
- int r = 0;
-
- const __m128i v_zext_mask_q = xx_set1_64_from_32i(0xffffffff);
- __m128i v_acc_q = _mm_setzero_si128();
-
- do {
- __m128i v_acc_d = _mm_setzero_si128();
- int c = 0;
- do {
- const int16_t *b = src + c;
-
- const __m128i v_val_0_w = xx_load_128(b + 0 * stride);
- const __m128i v_val_1_w = xx_load_128(b + 1 * stride);
- const __m128i v_val_2_w = xx_load_128(b + 2 * stride);
- const __m128i v_val_3_w = xx_load_128(b + 3 * stride);
-
- const __m128i v_sq_0_d = _mm_madd_epi16(v_val_0_w, v_val_0_w);
- const __m128i v_sq_1_d = _mm_madd_epi16(v_val_1_w, v_val_1_w);
- const __m128i v_sq_2_d = _mm_madd_epi16(v_val_2_w, v_val_2_w);
- const __m128i v_sq_3_d = _mm_madd_epi16(v_val_3_w, v_val_3_w);
-
- const __m128i v_sum_01_d = _mm_add_epi32(v_sq_0_d, v_sq_1_d);
- const __m128i v_sum_23_d = _mm_add_epi32(v_sq_2_d, v_sq_3_d);
-
- const __m128i v_sum_0123_d = _mm_add_epi32(v_sum_01_d, v_sum_23_d);
-
- v_acc_d = _mm_add_epi32(v_acc_d, v_sum_0123_d);
- c += 8;
- } while (c < width);
-
- v_acc_q = _mm_add_epi64(v_acc_q, _mm_and_si128(v_acc_d, v_zext_mask_q));
- v_acc_q = _mm_add_epi64(v_acc_q, _mm_srli_epi64(v_acc_d, 32));
-
- src += 4 * stride;
- r += 4;
- } while (r < height);
-
- v_acc_q = _mm_add_epi64(v_acc_q, _mm_srli_si128(v_acc_q, 8));
- return xx_cvtsi128_si64(v_acc_q);
-}
-
-uint64_t aom_sum_squares_2d_i16_sse2(const int16_t *src, int stride, int width,
- int height) {
- // 4 elements per row only requires half an XMM register, so this
- // must be a special case, but also note that over 75% of all calls
- // are with size == 4, so it is also the common case.
- if (LIKELY(width == 4 && height == 4)) {
- return aom_sum_squares_2d_i16_4x4_sse2(src, stride);
- } else if (LIKELY(width == 4 && (height & 3) == 0)) {
- return aom_sum_squares_2d_i16_4xn_sse2(src, stride, height);
- } else if (LIKELY((width & 7) == 0 && (height & 3) == 0)) {
- // Generic case
- return aom_sum_squares_2d_i16_nxn_sse2(src, stride, width, height);
- } else {
- return aom_sum_squares_2d_i16_c(src, stride, width, height);
- }
-}
-
-//////////////////////////////////////////////////////////////////////////////
-// 1D version
-//////////////////////////////////////////////////////////////////////////////
-
-static uint64_t aom_sum_squares_i16_64n_sse2(const int16_t *src, uint32_t n) {
- const __m128i v_zext_mask_q = xx_set1_64_from_32i(0xffffffff);
- __m128i v_acc0_q = _mm_setzero_si128();
- __m128i v_acc1_q = _mm_setzero_si128();
-
- const int16_t *const end = src + n;
-
- assert(n % 64 == 0);
-
- while (src < end) {
- const __m128i v_val_0_w = xx_load_128(src);
- const __m128i v_val_1_w = xx_load_128(src + 8);
- const __m128i v_val_2_w = xx_load_128(src + 16);
- const __m128i v_val_3_w = xx_load_128(src + 24);
- const __m128i v_val_4_w = xx_load_128(src + 32);
- const __m128i v_val_5_w = xx_load_128(src + 40);
- const __m128i v_val_6_w = xx_load_128(src + 48);
- const __m128i v_val_7_w = xx_load_128(src + 56);
-
- const __m128i v_sq_0_d = _mm_madd_epi16(v_val_0_w, v_val_0_w);
- const __m128i v_sq_1_d = _mm_madd_epi16(v_val_1_w, v_val_1_w);
- const __m128i v_sq_2_d = _mm_madd_epi16(v_val_2_w, v_val_2_w);
- const __m128i v_sq_3_d = _mm_madd_epi16(v_val_3_w, v_val_3_w);
- const __m128i v_sq_4_d = _mm_madd_epi16(v_val_4_w, v_val_4_w);
- const __m128i v_sq_5_d = _mm_madd_epi16(v_val_5_w, v_val_5_w);
- const __m128i v_sq_6_d = _mm_madd_epi16(v_val_6_w, v_val_6_w);
- const __m128i v_sq_7_d = _mm_madd_epi16(v_val_7_w, v_val_7_w);
-
- const __m128i v_sum_01_d = _mm_add_epi32(v_sq_0_d, v_sq_1_d);
- const __m128i v_sum_23_d = _mm_add_epi32(v_sq_2_d, v_sq_3_d);
- const __m128i v_sum_45_d = _mm_add_epi32(v_sq_4_d, v_sq_5_d);
- const __m128i v_sum_67_d = _mm_add_epi32(v_sq_6_d, v_sq_7_d);
-
- const __m128i v_sum_0123_d = _mm_add_epi32(v_sum_01_d, v_sum_23_d);
- const __m128i v_sum_4567_d = _mm_add_epi32(v_sum_45_d, v_sum_67_d);
-
- const __m128i v_sum_d = _mm_add_epi32(v_sum_0123_d, v_sum_4567_d);
-
- v_acc0_q = _mm_add_epi64(v_acc0_q, _mm_and_si128(v_sum_d, v_zext_mask_q));
- v_acc1_q = _mm_add_epi64(v_acc1_q, _mm_srli_epi64(v_sum_d, 32));
-
- src += 64;
- }
-
- v_acc0_q = _mm_add_epi64(v_acc0_q, v_acc1_q);
- v_acc0_q = _mm_add_epi64(v_acc0_q, _mm_srli_si128(v_acc0_q, 8));
- return xx_cvtsi128_si64(v_acc0_q);
-}
-
-uint64_t aom_sum_squares_i16_sse2(const int16_t *src, uint32_t n) {
- if (n % 64 == 0) {
- return aom_sum_squares_i16_64n_sse2(src, n);
- } else if (n > 64) {
- int k = n & ~(64 - 1);
- return aom_sum_squares_i16_64n_sse2(src, k) +
- aom_sum_squares_i16_c(src + k, n - k);
- } else {
- return aom_sum_squares_i16_c(src, n);
- }
-}
diff --git a/third_party/aom/aom_dsp/x86/sum_squares_sse2.h b/third_party/aom/aom_dsp/x86/sum_squares_sse2.h
deleted file mode 100644
index 491e31cc5..000000000
--- a/third_party/aom/aom_dsp/x86/sum_squares_sse2.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_DSP_X86_SUM_SQUARES_SSE2_H_
-#define AOM_DSP_X86_SUM_SQUARES_SSE2_H_
-
-uint64_t aom_sum_squares_2d_i16_nxn_sse2(const int16_t *src, int stride,
- int width, int height);
-
-uint64_t aom_sum_squares_2d_i16_4xn_sse2(const int16_t *src, int stride,
- int height);
-uint64_t aom_sum_squares_2d_i16_4x4_sse2(const int16_t *src, int stride);
-
-#endif // AOM_DSP_X86_SUM_SQUARES_SSE2_H_
diff --git a/third_party/aom/aom_dsp/x86/synonyms.h b/third_party/aom/aom_dsp/x86/synonyms.h
deleted file mode 100644
index 1e9f1e27b..000000000
--- a/third_party/aom/aom_dsp/x86/synonyms.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_X86_SYNONYMS_H_
-#define AOM_AOM_DSP_X86_SYNONYMS_H_
-
-#include <immintrin.h>
-
-#include "config/aom_config.h"
-
-#include "aom/aom_integer.h"
-
-/**
- * Various reusable shorthands for x86 SIMD intrinsics.
- *
- * Intrinsics prefixed with xx_ operate on or return 128bit XMM registers.
- * Intrinsics prefixed with yy_ operate on or return 256bit YMM registers.
- */
-
-// Loads and stores to do away with the tedium of casting the address
-// to the right type.
-static INLINE __m128i xx_loadl_32(const void *a) {
- return _mm_cvtsi32_si128(*(const uint32_t *)a);
-}
-
-static INLINE __m128i xx_loadl_64(const void *a) {
- return _mm_loadl_epi64((const __m128i *)a);
-}
-
-static INLINE __m128i xx_load_128(const void *a) {
- return _mm_load_si128((const __m128i *)a);
-}
-
-static INLINE __m128i xx_loadu_128(const void *a) {
- return _mm_loadu_si128((const __m128i *)a);
-}
-
-static INLINE void xx_storel_32(void *const a, const __m128i v) {
- *(uint32_t *)a = _mm_cvtsi128_si32(v);
-}
-
-static INLINE void xx_storel_64(void *const a, const __m128i v) {
- _mm_storel_epi64((__m128i *)a, v);
-}
-
-static INLINE void xx_store_128(void *const a, const __m128i v) {
- _mm_store_si128((__m128i *)a, v);
-}
-
-static INLINE void xx_storeu_128(void *const a, const __m128i v) {
- _mm_storeu_si128((__m128i *)a, v);
-}
-
-// The _mm_set_epi64x() intrinsic is undefined for some Visual Studio
-// compilers. The following function is equivalent to _mm_set_epi64x()
-// acting on 32-bit integers.
-static INLINE __m128i xx_set_64_from_32i(int32_t e1, int32_t e0) {
-#if defined(_MSC_VER) && _MSC_VER < 1900
- return _mm_set_epi32(0, e1, 0, e0);
-#else
- return _mm_set_epi64x((uint32_t)e1, (uint32_t)e0);
-#endif
-}
-
-// The _mm_set1_epi64x() intrinsic is undefined for some Visual Studio
-// compilers. The following function is equivalent to _mm_set1_epi64x()
-// acting on a 32-bit integer.
-static INLINE __m128i xx_set1_64_from_32i(int32_t a) {
-#if defined(_MSC_VER) && _MSC_VER < 1900
- return _mm_set_epi32(0, a, 0, a);
-#else
- return _mm_set1_epi64x((uint32_t)a);
-#endif
-}
-
-static INLINE __m128i xx_round_epu16(__m128i v_val_w) {
- return _mm_avg_epu16(v_val_w, _mm_setzero_si128());
-}
-
-static INLINE __m128i xx_roundn_epu16(__m128i v_val_w, int bits) {
- const __m128i v_s_w = _mm_srli_epi16(v_val_w, bits - 1);
- return _mm_avg_epu16(v_s_w, _mm_setzero_si128());
-}
-
-static INLINE __m128i xx_roundn_epu32(__m128i v_val_d, int bits) {
- const __m128i v_bias_d = _mm_set1_epi32((1 << bits) >> 1);
- const __m128i v_tmp_d = _mm_add_epi32(v_val_d, v_bias_d);
- return _mm_srli_epi32(v_tmp_d, bits);
-}
-
-// This is equivalent to ROUND_POWER_OF_TWO(v_val_d, bits)
-static INLINE __m128i xx_roundn_epi32_unsigned(__m128i v_val_d, int bits) {
- const __m128i v_bias_d = _mm_set1_epi32((1 << bits) >> 1);
- const __m128i v_tmp_d = _mm_add_epi32(v_val_d, v_bias_d);
- return _mm_srai_epi32(v_tmp_d, bits);
-}
-
-static INLINE __m128i xx_roundn_epi16(__m128i v_val_d, int bits) {
- const __m128i v_bias_d = _mm_set1_epi16((1 << bits) >> 1);
- const __m128i v_sign_d = _mm_srai_epi16(v_val_d, 15);
- const __m128i v_tmp_d =
- _mm_add_epi16(_mm_add_epi16(v_val_d, v_bias_d), v_sign_d);
- return _mm_srai_epi16(v_tmp_d, bits);
-}
-
-#endif // AOM_AOM_DSP_X86_SYNONYMS_H_
diff --git a/third_party/aom/aom_dsp/x86/synonyms_avx2.h b/third_party/aom/aom_dsp/x86/synonyms_avx2.h
deleted file mode 100644
index 3f69b120e..000000000
--- a/third_party/aom/aom_dsp/x86/synonyms_avx2.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_X86_SYNONYMS_AVX2_H_
-#define AOM_AOM_DSP_X86_SYNONYMS_AVX2_H_
-
-#include <immintrin.h>
-
-#include "config/aom_config.h"
-
-#include "aom/aom_integer.h"
-
-/**
- * Various reusable shorthands for x86 SIMD intrinsics.
- *
- * Intrinsics prefixed with xx_ operate on or return 128bit XMM registers.
- * Intrinsics prefixed with yy_ operate on or return 256bit YMM registers.
- */
-
-// Loads and stores to do away with the tedium of casting the address
-// to the right type.
-static INLINE __m256i yy_load_256(const void *a) {
- return _mm256_load_si256((const __m256i *)a);
-}
-
-static INLINE __m256i yy_loadu_256(const void *a) {
- return _mm256_loadu_si256((const __m256i *)a);
-}
-
-static INLINE void yy_store_256(void *const a, const __m256i v) {
- _mm256_store_si256((__m256i *)a, v);
-}
-
-static INLINE void yy_storeu_256(void *const a, const __m256i v) {
- _mm256_storeu_si256((__m256i *)a, v);
-}
-
-// The _mm256_set1_epi64x() intrinsic is undefined for some Visual Studio
-// compilers. The following function is equivalent to _mm256_set1_epi64x()
-// acting on a 32-bit integer.
-static INLINE __m256i yy_set1_64_from_32i(int32_t a) {
-#if defined(_MSC_VER) && defined(_M_IX86) && _MSC_VER < 1900
- return _mm256_set_epi32(0, a, 0, a, 0, a, 0, a);
-#else
- return _mm256_set1_epi64x((uint32_t)a);
-#endif
-}
-
-// Some compilers don't have _mm256_set_m128i defined in immintrin.h. We
-// therefore define an equivalent function using a different intrinsic.
-// ([ hi ], [ lo ]) -> [ hi ][ lo ]
-static INLINE __m256i yy_set_m128i(__m128i hi, __m128i lo) {
- return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), hi, 1);
-}
-
-static INLINE __m256i yy_loadu2_128(const void *hi, const void *lo) {
- __m128i mhi = _mm_loadu_si128((__m128i *)(hi));
- __m128i mlo = _mm_loadu_si128((__m128i *)(lo));
- return yy_set_m128i(mhi, mlo);
-}
-
-static INLINE __m256i yy_roundn_epu16(__m256i v_val_w, int bits) {
- const __m256i v_s_w = _mm256_srli_epi16(v_val_w, bits - 1);
- return _mm256_avg_epu16(v_s_w, _mm256_setzero_si256());
-}
-#endif // AOM_AOM_DSP_X86_SYNONYMS_AVX2_H_
diff --git a/third_party/aom/aom_dsp/x86/transpose_sse2.h b/third_party/aom/aom_dsp/x86/transpose_sse2.h
deleted file mode 100644
index d0d1ee684..000000000
--- a/third_party/aom/aom_dsp/x86/transpose_sse2.h
+++ /dev/null
@@ -1,420 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_X86_TRANSPOSE_SSE2_H_
-#define AOM_AOM_DSP_X86_TRANSPOSE_SSE2_H_
-
-#include <emmintrin.h> // SSE2
-
-#include "config/aom_config.h"
-
-static INLINE __m128i transpose_8bit_4x4(const __m128i *const in) {
- // Unpack 16 bit elements. Goes from:
- // in[0]: 00 01 02 03
- // in[1]: 10 11 12 13
- // in[2]: 20 21 22 23
- // in[3]: 30 31 32 33
- // to:
- // a0: 00 10 01 11 02 12 03 13
- // a1: 20 30 21 31 22 32 23 33
- const __m128i a0 = _mm_unpacklo_epi8(in[0], in[1]);
- const __m128i a1 = _mm_unpacklo_epi8(in[2], in[3]);
-
- // Unpack 32 bit elements resulting in:
- // 00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33
- return _mm_unpacklo_epi16(a0, a1);
-}
-
-static INLINE void transpose_8bit_8x8(const __m128i *const in,
- __m128i *const out) {
- // Unpack 8 bit elements. Goes from:
- // in[0]: 00 01 02 03 04 05 06 07
- // in[1]: 10 11 12 13 14 15 16 17
- // in[2]: 20 21 22 23 24 25 26 27
- // in[3]: 30 31 32 33 34 35 36 37
- // in[4]: 40 41 42 43 44 45 46 47
- // in[5]: 50 51 52 53 54 55 56 57
- // in[6]: 60 61 62 63 64 65 66 67
- // in[7]: 70 71 72 73 74 75 76 77
- // to:
- // a0: 00 10 01 11 02 12 03 13 04 14 05 15 06 16 07 17
- // a1: 20 30 21 31 22 32 23 33 24 34 25 35 26 36 27 37
- // a2: 40 50 41 51 42 52 43 53 44 54 45 55 46 56 47 57
- // a3: 60 70 61 71 62 72 63 73 64 74 65 75 66 76 67 77
- const __m128i a0 = _mm_unpacklo_epi8(in[0], in[1]);
- const __m128i a1 = _mm_unpacklo_epi8(in[2], in[3]);
- const __m128i a2 = _mm_unpacklo_epi8(in[4], in[5]);
- const __m128i a3 = _mm_unpacklo_epi8(in[6], in[7]);
-
- // Unpack 16 bit elements resulting in:
- // b0: 00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33
- // b1: 40 50 60 70 41 51 61 71 42 52 62 72 43 53 63 73
- // b2: 04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37
- // b3: 44 54 64 74 45 55 65 75 46 56 66 76 47 57 67 77
- const __m128i b0 = _mm_unpacklo_epi16(a0, a1);
- const __m128i b1 = _mm_unpackhi_epi16(a0, a1);
- const __m128i b2 = _mm_unpacklo_epi16(a2, a3);
- const __m128i b3 = _mm_unpackhi_epi16(a2, a3);
-
- // Unpack 32 bit elements resulting in:
- // c0: 00 10 20 30 40 50 60 70 01 11 21 31 41 51 61 71
- // c1: 02 12 22 32 42 52 62 72 03 13 23 33 43 53 63 73
- // c2: 04 14 24 34 44 54 64 74 05 15 25 35 45 55 65 75
- // c3: 06 16 26 36 46 56 66 76 07 17 27 37 47 57 67 77
- const __m128i c0 = _mm_unpacklo_epi32(b0, b2);
- const __m128i c1 = _mm_unpackhi_epi32(b0, b2);
- const __m128i c2 = _mm_unpacklo_epi32(b1, b3);
- const __m128i c3 = _mm_unpackhi_epi32(b1, b3);
-
- // Unpack 64 bit elements resulting in:
- // out[0]: 00 10 20 30 40 50 60 70
- // out[1]: 01 11 21 31 41 51 61 71
- // out[2]: 02 12 22 32 42 52 62 72
- // out[3]: 03 13 23 33 43 53 63 73
- // out[4]: 04 14 24 34 44 54 64 74
- // out[5]: 05 15 25 35 45 55 65 75
- // out[6]: 06 16 26 36 46 56 66 76
- // out[7]: 07 17 27 37 47 57 67 77
- out[0] = _mm_unpacklo_epi64(c0, c0);
- out[1] = _mm_unpackhi_epi64(c0, c0);
- out[2] = _mm_unpacklo_epi64(c1, c1);
- out[3] = _mm_unpackhi_epi64(c1, c1);
- out[4] = _mm_unpacklo_epi64(c2, c2);
- out[5] = _mm_unpackhi_epi64(c2, c2);
- out[6] = _mm_unpacklo_epi64(c3, c3);
- out[7] = _mm_unpackhi_epi64(c3, c3);
-}
-
-static INLINE void transpose_16bit_4x4(const __m128i *const in,
- __m128i *const out) {
- // Unpack 16 bit elements. Goes from:
- // in[0]: 00 01 02 03 XX XX XX XX
- // in[1]: 10 11 12 13 XX XX XX XX
- // in[2]: 20 21 22 23 XX XX XX XX
- // in[3]: 30 31 32 33 XX XX XX XX
- // to:
- // a0: 00 10 01 11 02 12 03 13
- // a1: 20 30 21 31 22 32 23 33
- const __m128i a0 = _mm_unpacklo_epi16(in[0], in[1]);
- const __m128i a1 = _mm_unpacklo_epi16(in[2], in[3]);
-
- // Unpack 32 bit elements resulting in:
- // out[0]: 00 10 20 30
- // out[1]: 01 11 21 31
- // out[2]: 02 12 22 32
- // out[3]: 03 13 23 33
- out[0] = _mm_unpacklo_epi32(a0, a1);
- out[1] = _mm_srli_si128(out[0], 8);
- out[2] = _mm_unpackhi_epi32(a0, a1);
- out[3] = _mm_srli_si128(out[2], 8);
-}
-
-static INLINE void transpose_16bit_4x8(const __m128i *const in,
- __m128i *const out) {
- // Unpack 16 bit elements. Goes from:
- // in[0]: 00 01 02 03 XX XX XX XX
- // in[1]: 10 11 12 13 XX XX XX XX
- // in[2]: 20 21 22 23 XX XX XX XX
- // in[3]: 30 31 32 33 XX XX XX XX
- // in[4]: 40 41 42 43 XX XX XX XX
- // in[5]: 50 51 52 53 XX XX XX XX
- // in[6]: 60 61 62 63 XX XX XX XX
- // in[7]: 70 71 72 73 XX XX XX XX
- // to:
- // a0: 00 10 01 11 02 12 03 13
- // a1: 20 30 21 31 22 32 23 33
- // a2: 40 50 41 51 42 52 43 53
- // a3: 60 70 61 71 62 72 63 73
- const __m128i a0 = _mm_unpacklo_epi16(in[0], in[1]);
- const __m128i a1 = _mm_unpacklo_epi16(in[2], in[3]);
- const __m128i a2 = _mm_unpacklo_epi16(in[4], in[5]);
- const __m128i a3 = _mm_unpacklo_epi16(in[6], in[7]);
-
- // Unpack 32 bit elements resulting in:
- // b0: 00 10 20 30 01 11 21 31
- // b1: 40 50 60 70 41 51 61 71
- // b2: 02 12 22 32 03 13 23 33
- // b3: 42 52 62 72 43 53 63 73
- const __m128i b0 = _mm_unpacklo_epi32(a0, a1);
- const __m128i b1 = _mm_unpacklo_epi32(a2, a3);
- const __m128i b2 = _mm_unpackhi_epi32(a0, a1);
- const __m128i b3 = _mm_unpackhi_epi32(a2, a3);
-
- // Unpack 64 bit elements resulting in:
- // out[0]: 00 10 20 30 40 50 60 70
- // out[1]: 01 11 21 31 41 51 61 71
- // out[2]: 02 12 22 32 42 52 62 72
- // out[3]: 03 13 23 33 43 53 63 73
- out[0] = _mm_unpacklo_epi64(b0, b1);
- out[1] = _mm_unpackhi_epi64(b0, b1);
- out[2] = _mm_unpacklo_epi64(b2, b3);
- out[3] = _mm_unpackhi_epi64(b2, b3);
-}
-
-static INLINE void transpose_16bit_8x4(const __m128i *const in,
- __m128i *const out) {
- // Unpack 16 bit elements. Goes from:
- // in[0]: 00 01 02 03 04 05 06 07
- // in[1]: 10 11 12 13 14 15 16 17
- // in[2]: 20 21 22 23 24 25 26 27
- // in[3]: 30 31 32 33 34 35 36 37
-
- // to:
- // a0: 00 10 01 11 02 12 03 13
- // a1: 20 30 21 31 22 32 23 33
- // a4: 04 14 05 15 06 16 07 17
- // a5: 24 34 25 35 26 36 27 37
- const __m128i a0 = _mm_unpacklo_epi16(in[0], in[1]);
- const __m128i a1 = _mm_unpacklo_epi16(in[2], in[3]);
- const __m128i a4 = _mm_unpackhi_epi16(in[0], in[1]);
- const __m128i a5 = _mm_unpackhi_epi16(in[2], in[3]);
-
- // Unpack 32 bit elements resulting in:
- // b0: 00 10 20 30 01 11 21 31
- // b2: 04 14 24 34 05 15 25 35
- // b4: 02 12 22 32 03 13 23 33
- // b6: 06 16 26 36 07 17 27 37
- const __m128i b0 = _mm_unpacklo_epi32(a0, a1);
- const __m128i b2 = _mm_unpacklo_epi32(a4, a5);
- const __m128i b4 = _mm_unpackhi_epi32(a0, a1);
- const __m128i b6 = _mm_unpackhi_epi32(a4, a5);
-
- // Unpack 64 bit elements resulting in:
- // out[0]: 00 10 20 30 XX XX XX XX
- // out[1]: 01 11 21 31 XX XX XX XX
- // out[2]: 02 12 22 32 XX XX XX XX
- // out[3]: 03 13 23 33 XX XX XX XX
- // out[4]: 04 14 24 34 XX XX XX XX
- // out[5]: 05 15 25 35 XX XX XX XX
- // out[6]: 06 16 26 36 XX XX XX XX
- // out[7]: 07 17 27 37 XX XX XX XX
- const __m128i zeros = _mm_setzero_si128();
- out[0] = _mm_unpacklo_epi64(b0, zeros);
- out[1] = _mm_unpackhi_epi64(b0, zeros);
- out[2] = _mm_unpacklo_epi64(b4, zeros);
- out[3] = _mm_unpackhi_epi64(b4, zeros);
- out[4] = _mm_unpacklo_epi64(b2, zeros);
- out[5] = _mm_unpackhi_epi64(b2, zeros);
- out[6] = _mm_unpacklo_epi64(b6, zeros);
- out[7] = _mm_unpackhi_epi64(b6, zeros);
-}
-
-static INLINE void transpose_16bit_8x8(const __m128i *const in,
- __m128i *const out) {
- // Unpack 16 bit elements. Goes from:
- // in[0]: 00 01 02 03 04 05 06 07
- // in[1]: 10 11 12 13 14 15 16 17
- // in[2]: 20 21 22 23 24 25 26 27
- // in[3]: 30 31 32 33 34 35 36 37
- // in[4]: 40 41 42 43 44 45 46 47
- // in[5]: 50 51 52 53 54 55 56 57
- // in[6]: 60 61 62 63 64 65 66 67
- // in[7]: 70 71 72 73 74 75 76 77
- // to:
- // a0: 00 10 01 11 02 12 03 13
- // a1: 20 30 21 31 22 32 23 33
- // a2: 40 50 41 51 42 52 43 53
- // a3: 60 70 61 71 62 72 63 73
- // a4: 04 14 05 15 06 16 07 17
- // a5: 24 34 25 35 26 36 27 37
- // a6: 44 54 45 55 46 56 47 57
- // a7: 64 74 65 75 66 76 67 77
- const __m128i a0 = _mm_unpacklo_epi16(in[0], in[1]);
- const __m128i a1 = _mm_unpacklo_epi16(in[2], in[3]);
- const __m128i a2 = _mm_unpacklo_epi16(in[4], in[5]);
- const __m128i a3 = _mm_unpacklo_epi16(in[6], in[7]);
- const __m128i a4 = _mm_unpackhi_epi16(in[0], in[1]);
- const __m128i a5 = _mm_unpackhi_epi16(in[2], in[3]);
- const __m128i a6 = _mm_unpackhi_epi16(in[4], in[5]);
- const __m128i a7 = _mm_unpackhi_epi16(in[6], in[7]);
-
- // Unpack 32 bit elements resulting in:
- // b0: 00 10 20 30 01 11 21 31
- // b1: 40 50 60 70 41 51 61 71
- // b2: 04 14 24 34 05 15 25 35
- // b3: 44 54 64 74 45 55 65 75
- // b4: 02 12 22 32 03 13 23 33
- // b5: 42 52 62 72 43 53 63 73
- // b6: 06 16 26 36 07 17 27 37
- // b7: 46 56 66 76 47 57 67 77
- const __m128i b0 = _mm_unpacklo_epi32(a0, a1);
- const __m128i b1 = _mm_unpacklo_epi32(a2, a3);
- const __m128i b2 = _mm_unpacklo_epi32(a4, a5);
- const __m128i b3 = _mm_unpacklo_epi32(a6, a7);
- const __m128i b4 = _mm_unpackhi_epi32(a0, a1);
- const __m128i b5 = _mm_unpackhi_epi32(a2, a3);
- const __m128i b6 = _mm_unpackhi_epi32(a4, a5);
- const __m128i b7 = _mm_unpackhi_epi32(a6, a7);
-
- // Unpack 64 bit elements resulting in:
- // out[0]: 00 10 20 30 40 50 60 70
- // out[1]: 01 11 21 31 41 51 61 71
- // out[2]: 02 12 22 32 42 52 62 72
- // out[3]: 03 13 23 33 43 53 63 73
- // out[4]: 04 14 24 34 44 54 64 74
- // out[5]: 05 15 25 35 45 55 65 75
- // out[6]: 06 16 26 36 46 56 66 76
- // out[7]: 07 17 27 37 47 57 67 77
- out[0] = _mm_unpacklo_epi64(b0, b1);
- out[1] = _mm_unpackhi_epi64(b0, b1);
- out[2] = _mm_unpacklo_epi64(b4, b5);
- out[3] = _mm_unpackhi_epi64(b4, b5);
- out[4] = _mm_unpacklo_epi64(b2, b3);
- out[5] = _mm_unpackhi_epi64(b2, b3);
- out[6] = _mm_unpacklo_epi64(b6, b7);
- out[7] = _mm_unpackhi_epi64(b6, b7);
-}
-
-// Transpose in-place
-static INLINE void transpose_16bit_16x16(__m128i *const left,
- __m128i *const right) {
- __m128i tbuf[8];
- transpose_16bit_8x8(left, left);
- transpose_16bit_8x8(right, tbuf);
- transpose_16bit_8x8(left + 8, right);
- transpose_16bit_8x8(right + 8, right + 8);
-
- left[8] = tbuf[0];
- left[9] = tbuf[1];
- left[10] = tbuf[2];
- left[11] = tbuf[3];
- left[12] = tbuf[4];
- left[13] = tbuf[5];
- left[14] = tbuf[6];
- left[15] = tbuf[7];
-}
-
-static INLINE void transpose_32bit_4x4(const __m128i *const in,
- __m128i *const out) {
- // Unpack 32 bit elements. Goes from:
- // in[0]: 00 01 02 03
- // in[1]: 10 11 12 13
- // in[2]: 20 21 22 23
- // in[3]: 30 31 32 33
- // to:
- // a0: 00 10 01 11
- // a1: 20 30 21 31
- // a2: 02 12 03 13
- // a3: 22 32 23 33
-
- const __m128i a0 = _mm_unpacklo_epi32(in[0], in[1]);
- const __m128i a1 = _mm_unpacklo_epi32(in[2], in[3]);
- const __m128i a2 = _mm_unpackhi_epi32(in[0], in[1]);
- const __m128i a3 = _mm_unpackhi_epi32(in[2], in[3]);
-
- // Unpack 64 bit elements resulting in:
- // out[0]: 00 10 20 30
- // out[1]: 01 11 21 31
- // out[2]: 02 12 22 32
- // out[3]: 03 13 23 33
- out[0] = _mm_unpacklo_epi64(a0, a1);
- out[1] = _mm_unpackhi_epi64(a0, a1);
- out[2] = _mm_unpacklo_epi64(a2, a3);
- out[3] = _mm_unpackhi_epi64(a2, a3);
-}
-
-static INLINE void transpose_32bit_4x4x2(const __m128i *const in,
- __m128i *const out) {
- // Unpack 32 bit elements. Goes from:
- // in[0]: 00 01 02 03
- // in[1]: 10 11 12 13
- // in[2]: 20 21 22 23
- // in[3]: 30 31 32 33
- // in[4]: 04 05 06 07
- // in[5]: 14 15 16 17
- // in[6]: 24 25 26 27
- // in[7]: 34 35 36 37
- // to:
- // a0: 00 10 01 11
- // a1: 20 30 21 31
- // a2: 02 12 03 13
- // a3: 22 32 23 33
- // a4: 04 14 05 15
- // a5: 24 34 25 35
- // a6: 06 16 07 17
- // a7: 26 36 27 37
- const __m128i a0 = _mm_unpacklo_epi32(in[0], in[1]);
- const __m128i a1 = _mm_unpacklo_epi32(in[2], in[3]);
- const __m128i a2 = _mm_unpackhi_epi32(in[0], in[1]);
- const __m128i a3 = _mm_unpackhi_epi32(in[2], in[3]);
- const __m128i a4 = _mm_unpacklo_epi32(in[4], in[5]);
- const __m128i a5 = _mm_unpacklo_epi32(in[6], in[7]);
- const __m128i a6 = _mm_unpackhi_epi32(in[4], in[5]);
- const __m128i a7 = _mm_unpackhi_epi32(in[6], in[7]);
-
- // Unpack 64 bit elements resulting in:
- // out[0]: 00 10 20 30
- // out[1]: 01 11 21 31
- // out[2]: 02 12 22 32
- // out[3]: 03 13 23 33
- // out[4]: 04 14 24 34
- // out[5]: 05 15 25 35
- // out[6]: 06 16 26 36
- // out[7]: 07 17 27 37
- out[0] = _mm_unpacklo_epi64(a0, a1);
- out[1] = _mm_unpackhi_epi64(a0, a1);
- out[2] = _mm_unpacklo_epi64(a2, a3);
- out[3] = _mm_unpackhi_epi64(a2, a3);
- out[4] = _mm_unpacklo_epi64(a4, a5);
- out[5] = _mm_unpackhi_epi64(a4, a5);
- out[6] = _mm_unpacklo_epi64(a6, a7);
- out[7] = _mm_unpackhi_epi64(a6, a7);
-}
-
-static INLINE void transpose_32bit_8x4(const __m128i *const in,
- __m128i *const out) {
- // Unpack 32 bit elements. Goes from:
- // in[0]: 00 01 02 03
- // in[1]: 04 05 06 07
- // in[2]: 10 11 12 13
- // in[3]: 14 15 16 17
- // in[4]: 20 21 22 23
- // in[5]: 24 25 26 27
- // in[6]: 30 31 32 33
- // in[7]: 34 35 36 37
- // to:
- // a0: 00 10 01 11
- // a1: 20 30 21 31
- // a2: 02 12 03 13
- // a3: 22 32 23 33
- // a4: 04 14 05 15
- // a5: 24 34 25 35
- // a6: 06 16 07 17
- // a7: 26 36 27 37
- const __m128i a0 = _mm_unpacklo_epi32(in[0], in[2]);
- const __m128i a1 = _mm_unpacklo_epi32(in[4], in[6]);
- const __m128i a2 = _mm_unpackhi_epi32(in[0], in[2]);
- const __m128i a3 = _mm_unpackhi_epi32(in[4], in[6]);
- const __m128i a4 = _mm_unpacklo_epi32(in[1], in[3]);
- const __m128i a5 = _mm_unpacklo_epi32(in[5], in[7]);
- const __m128i a6 = _mm_unpackhi_epi32(in[1], in[3]);
- const __m128i a7 = _mm_unpackhi_epi32(in[5], in[7]);
-
- // Unpack 64 bit elements resulting in:
- // out[0]: 00 10 20 30
- // out[1]: 01 11 21 31
- // out[2]: 02 12 22 32
- // out[3]: 03 13 23 33
- // out[4]: 04 14 24 34
- // out[5]: 05 15 25 35
- // out[6]: 06 16 26 36
- // out[7]: 07 17 27 37
- out[0] = _mm_unpacklo_epi64(a0, a1);
- out[1] = _mm_unpackhi_epi64(a0, a1);
- out[2] = _mm_unpacklo_epi64(a2, a3);
- out[3] = _mm_unpackhi_epi64(a2, a3);
- out[4] = _mm_unpacklo_epi64(a4, a5);
- out[5] = _mm_unpackhi_epi64(a4, a5);
- out[6] = _mm_unpacklo_epi64(a6, a7);
- out[7] = _mm_unpackhi_epi64(a6, a7);
-}
-
-#endif // AOM_AOM_DSP_X86_TRANSPOSE_SSE2_H_
diff --git a/third_party/aom/aom_dsp/x86/txfm_common_avx2.h b/third_party/aom/aom_dsp/x86/txfm_common_avx2.h
deleted file mode 100644
index b1611ba87..000000000
--- a/third_party/aom/aom_dsp/x86/txfm_common_avx2.h
+++ /dev/null
@@ -1,199 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_X86_TXFM_COMMON_AVX2_H_
-#define AOM_AOM_DSP_X86_TXFM_COMMON_AVX2_H_
-
-#include <emmintrin.h>
-#include "aom/aom_integer.h"
-#include "aom_dsp/x86/synonyms.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef void (*transform_1d_avx2)(const __m256i *input, __m256i *output,
- int8_t cos_bit);
-
-static INLINE __m256i pair_set_w16_epi16(int16_t a, int16_t b) {
- return _mm256_set1_epi32(
- (int32_t)(((uint16_t)(a)) | (((uint32_t)(b)) << 16)));
-}
-
-static INLINE void btf_16_w16_avx2(const __m256i w0, const __m256i w1,
- __m256i *in0, __m256i *in1, const __m256i _r,
- const int32_t cos_bit) {
- __m256i t0 = _mm256_unpacklo_epi16(*in0, *in1);
- __m256i t1 = _mm256_unpackhi_epi16(*in0, *in1);
- __m256i u0 = _mm256_madd_epi16(t0, w0);
- __m256i u1 = _mm256_madd_epi16(t1, w0);
- __m256i v0 = _mm256_madd_epi16(t0, w1);
- __m256i v1 = _mm256_madd_epi16(t1, w1);
-
- __m256i a0 = _mm256_add_epi32(u0, _r);
- __m256i a1 = _mm256_add_epi32(u1, _r);
- __m256i b0 = _mm256_add_epi32(v0, _r);
- __m256i b1 = _mm256_add_epi32(v1, _r);
-
- __m256i c0 = _mm256_srai_epi32(a0, cos_bit);
- __m256i c1 = _mm256_srai_epi32(a1, cos_bit);
- __m256i d0 = _mm256_srai_epi32(b0, cos_bit);
- __m256i d1 = _mm256_srai_epi32(b1, cos_bit);
-
- *in0 = _mm256_packs_epi32(c0, c1);
- *in1 = _mm256_packs_epi32(d0, d1);
-}
-
-static INLINE void btf_16_adds_subs_avx2(__m256i *in0, __m256i *in1) {
- const __m256i _in0 = *in0;
- const __m256i _in1 = *in1;
- *in0 = _mm256_adds_epi16(_in0, _in1);
- *in1 = _mm256_subs_epi16(_in0, _in1);
-}
-
-static INLINE void btf_32_add_sub_avx2(__m256i *in0, __m256i *in1) {
- const __m256i _in0 = *in0;
- const __m256i _in1 = *in1;
- *in0 = _mm256_add_epi32(_in0, _in1);
- *in1 = _mm256_sub_epi32(_in0, _in1);
-}
-
-static INLINE void btf_16_adds_subs_out_avx2(__m256i *out0, __m256i *out1,
- __m256i in0, __m256i in1) {
- const __m256i _in0 = in0;
- const __m256i _in1 = in1;
- *out0 = _mm256_adds_epi16(_in0, _in1);
- *out1 = _mm256_subs_epi16(_in0, _in1);
-}
-
-static INLINE void btf_32_add_sub_out_avx2(__m256i *out0, __m256i *out1,
- __m256i in0, __m256i in1) {
- const __m256i _in0 = in0;
- const __m256i _in1 = in1;
- *out0 = _mm256_add_epi32(_in0, _in1);
- *out1 = _mm256_sub_epi32(_in0, _in1);
-}
-
-static INLINE __m256i load_16bit_to_16bit_avx2(const int16_t *a) {
- return _mm256_load_si256((const __m256i *)a);
-}
-
-static INLINE void load_buffer_16bit_to_16bit_avx2(const int16_t *in,
- int stride, __m256i *out,
- int out_size) {
- for (int i = 0; i < out_size; ++i) {
- out[i] = load_16bit_to_16bit_avx2(in + i * stride);
- }
-}
-
-static INLINE void load_buffer_16bit_to_16bit_flip_avx2(const int16_t *in,
- int stride,
- __m256i *out,
- int out_size) {
- for (int i = 0; i < out_size; ++i) {
- out[out_size - i - 1] = load_16bit_to_16bit_avx2(in + i * stride);
- }
-}
-
-static INLINE __m256i load_32bit_to_16bit_w16_avx2(const int32_t *a) {
- const __m256i a_low = _mm256_lddqu_si256((const __m256i *)a);
- const __m256i b = _mm256_packs_epi32(a_low, *(const __m256i *)(a + 8));
- return _mm256_permute4x64_epi64(b, 0xD8);
-}
-
-static INLINE void load_buffer_32bit_to_16bit_w16_avx2(const int32_t *in,
- int stride, __m256i *out,
- int out_size) {
- for (int i = 0; i < out_size; ++i) {
- out[i] = load_32bit_to_16bit_w16_avx2(in + i * stride);
- }
-}
-
-static INLINE void transpose_16bit_16x16_avx2(const __m256i *const in,
- __m256i *const out) {
- // Unpack 16 bit elements. Goes from:
- // in[0]: 00 01 02 03 08 09 0a 0b 04 05 06 07 0c 0d 0e 0f
- // in[1]: 10 11 12 13 18 19 1a 1b 14 15 16 17 1c 1d 1e 1f
- // in[2]: 20 21 22 23 28 29 2a 2b 24 25 26 27 2c 2d 2e 2f
- // in[3]: 30 31 32 33 38 39 3a 3b 34 35 36 37 3c 3d 3e 3f
- // in[4]: 40 41 42 43 48 49 4a 4b 44 45 46 47 4c 4d 4e 4f
- // in[5]: 50 51 52 53 58 59 5a 5b 54 55 56 57 5c 5d 5e 5f
- // in[6]: 60 61 62 63 68 69 6a 6b 64 65 66 67 6c 6d 6e 6f
- // in[7]: 70 71 72 73 78 79 7a 7b 74 75 76 77 7c 7d 7e 7f
- // in[8]: 80 81 82 83 88 89 8a 8b 84 85 86 87 8c 8d 8e 8f
- // to:
- // a0: 00 10 01 11 02 12 03 13 04 14 05 15 06 16 07 17
- // a1: 20 30 21 31 22 32 23 33 24 34 25 35 26 36 27 37
- // a2: 40 50 41 51 42 52 43 53 44 54 45 55 46 56 47 57
- // a3: 60 70 61 71 62 72 63 73 64 74 65 75 66 76 67 77
- // ...
- __m256i a[16];
- for (int i = 0; i < 16; i += 2) {
- a[i / 2 + 0] = _mm256_unpacklo_epi16(in[i], in[i + 1]);
- a[i / 2 + 8] = _mm256_unpackhi_epi16(in[i], in[i + 1]);
- }
- __m256i b[16];
- for (int i = 0; i < 16; i += 2) {
- b[i / 2 + 0] = _mm256_unpacklo_epi32(a[i], a[i + 1]);
- b[i / 2 + 8] = _mm256_unpackhi_epi32(a[i], a[i + 1]);
- }
- __m256i c[16];
- for (int i = 0; i < 16; i += 2) {
- c[i / 2 + 0] = _mm256_unpacklo_epi64(b[i], b[i + 1]);
- c[i / 2 + 8] = _mm256_unpackhi_epi64(b[i], b[i + 1]);
- }
- out[0 + 0] = _mm256_permute2x128_si256(c[0], c[1], 0x20);
- out[1 + 0] = _mm256_permute2x128_si256(c[8], c[9], 0x20);
- out[2 + 0] = _mm256_permute2x128_si256(c[4], c[5], 0x20);
- out[3 + 0] = _mm256_permute2x128_si256(c[12], c[13], 0x20);
-
- out[0 + 8] = _mm256_permute2x128_si256(c[0], c[1], 0x31);
- out[1 + 8] = _mm256_permute2x128_si256(c[8], c[9], 0x31);
- out[2 + 8] = _mm256_permute2x128_si256(c[4], c[5], 0x31);
- out[3 + 8] = _mm256_permute2x128_si256(c[12], c[13], 0x31);
-
- out[4 + 0] = _mm256_permute2x128_si256(c[0 + 2], c[1 + 2], 0x20);
- out[5 + 0] = _mm256_permute2x128_si256(c[8 + 2], c[9 + 2], 0x20);
- out[6 + 0] = _mm256_permute2x128_si256(c[4 + 2], c[5 + 2], 0x20);
- out[7 + 0] = _mm256_permute2x128_si256(c[12 + 2], c[13 + 2], 0x20);
-
- out[4 + 8] = _mm256_permute2x128_si256(c[0 + 2], c[1 + 2], 0x31);
- out[5 + 8] = _mm256_permute2x128_si256(c[8 + 2], c[9 + 2], 0x31);
- out[6 + 8] = _mm256_permute2x128_si256(c[4 + 2], c[5 + 2], 0x31);
- out[7 + 8] = _mm256_permute2x128_si256(c[12 + 2], c[13 + 2], 0x31);
-}
-
-static INLINE void flip_buf_avx2(__m256i *in, __m256i *out, int size) {
- for (int i = 0; i < size; ++i) {
- out[size - i - 1] = in[i];
- }
-}
-
-static INLINE void round_shift_16bit_w16_avx2(__m256i *in, int size, int bit) {
- if (bit < 0) {
- bit = -bit;
- __m256i round = _mm256_set1_epi16(1 << (bit - 1));
- for (int i = 0; i < size; ++i) {
- in[i] = _mm256_adds_epi16(in[i], round);
- in[i] = _mm256_srai_epi16(in[i], bit);
- }
- } else if (bit > 0) {
- for (int i = 0; i < size; ++i) {
- in[i] = _mm256_slli_epi16(in[i], bit);
- }
- }
-}
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // AOM_AOM_DSP_X86_TXFM_COMMON_AVX2_H_
diff --git a/third_party/aom/aom_dsp/x86/txfm_common_sse2.h b/third_party/aom/aom_dsp/x86/txfm_common_sse2.h
deleted file mode 100644
index ed82eee96..000000000
--- a/third_party/aom/aom_dsp/x86/txfm_common_sse2.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_X86_TXFM_COMMON_SSE2_H_
-#define AOM_AOM_DSP_X86_TXFM_COMMON_SSE2_H_
-
-#include <emmintrin.h>
-#include "aom/aom_integer.h"
-#include "aom_dsp/x86/synonyms.h"
-
-#define pair_set_epi16(a, b) \
- _mm_set1_epi32((int32_t)(((uint16_t)(a)) | (((uint32_t)(b)) << 16)))
-
-// Reverse the 8 16 bit words in __m128i
-static INLINE __m128i mm_reverse_epi16(const __m128i x) {
- const __m128i a = _mm_shufflelo_epi16(x, 0x1b);
- const __m128i b = _mm_shufflehi_epi16(a, 0x1b);
- return _mm_shuffle_epi32(b, 0x4e);
-}
-
-#endif // AOM_AOM_DSP_X86_TXFM_COMMON_SSE2_H_
diff --git a/third_party/aom/aom_dsp/x86/variance_avx2.c b/third_party/aom/aom_dsp/x86/variance_avx2.c
deleted file mode 100644
index 800aef126..000000000
--- a/third_party/aom/aom_dsp/x86/variance_avx2.c
+++ /dev/null
@@ -1,517 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <immintrin.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/x86/masked_variance_intrin_ssse3.h"
-
-static INLINE __m128i mm256_add_hi_lo_epi16(const __m256i val) {
- return _mm_add_epi16(_mm256_castsi256_si128(val),
- _mm256_extractf128_si256(val, 1));
-}
-
-static INLINE __m128i mm256_add_hi_lo_epi32(const __m256i val) {
- return _mm_add_epi32(_mm256_castsi256_si128(val),
- _mm256_extractf128_si256(val, 1));
-}
-
-static INLINE void variance_kernel_avx2(const __m256i src, const __m256i ref,
- __m256i *const sse,
- __m256i *const sum) {
- const __m256i adj_sub = _mm256_set1_epi16(0xff01); // (1,-1)
-
- // unpack into pairs of source and reference values
- const __m256i src_ref0 = _mm256_unpacklo_epi8(src, ref);
- const __m256i src_ref1 = _mm256_unpackhi_epi8(src, ref);
-
- // subtract adjacent elements using src*1 + ref*-1
- const __m256i diff0 = _mm256_maddubs_epi16(src_ref0, adj_sub);
- const __m256i diff1 = _mm256_maddubs_epi16(src_ref1, adj_sub);
- const __m256i madd0 = _mm256_madd_epi16(diff0, diff0);
- const __m256i madd1 = _mm256_madd_epi16(diff1, diff1);
-
- // add to the running totals
- *sum = _mm256_add_epi16(*sum, _mm256_add_epi16(diff0, diff1));
- *sse = _mm256_add_epi32(*sse, _mm256_add_epi32(madd0, madd1));
-}
-
-static INLINE int variance_final_from_32bit_sum_avx2(__m256i vsse, __m128i vsum,
- unsigned int *const sse) {
- // extract the low lane and add it to the high lane
- const __m128i sse_reg_128 = mm256_add_hi_lo_epi32(vsse);
-
- // unpack sse and sum registers and add
- const __m128i sse_sum_lo = _mm_unpacklo_epi32(sse_reg_128, vsum);
- const __m128i sse_sum_hi = _mm_unpackhi_epi32(sse_reg_128, vsum);
- const __m128i sse_sum = _mm_add_epi32(sse_sum_lo, sse_sum_hi);
-
- // perform the final summation and extract the results
- const __m128i res = _mm_add_epi32(sse_sum, _mm_srli_si128(sse_sum, 8));
- *((int *)sse) = _mm_cvtsi128_si32(res);
- return _mm_extract_epi32(res, 1);
-}
-
-// handle pixels (<= 512)
-static INLINE int variance_final_512_avx2(__m256i vsse, __m256i vsum,
- unsigned int *const sse) {
- // extract the low lane and add it to the high lane
- const __m128i vsum_128 = mm256_add_hi_lo_epi16(vsum);
- const __m128i vsum_64 = _mm_add_epi16(vsum_128, _mm_srli_si128(vsum_128, 8));
- const __m128i sum_int32 = _mm_cvtepi16_epi32(vsum_64);
- return variance_final_from_32bit_sum_avx2(vsse, sum_int32, sse);
-}
-
-// handle 1024 pixels (32x32, 16x64, 64x16)
-static INLINE int variance_final_1024_avx2(__m256i vsse, __m256i vsum,
- unsigned int *const sse) {
- // extract the low lane and add it to the high lane
- const __m128i vsum_128 = mm256_add_hi_lo_epi16(vsum);
- const __m128i vsum_64 =
- _mm_add_epi32(_mm_cvtepi16_epi32(vsum_128),
- _mm_cvtepi16_epi32(_mm_srli_si128(vsum_128, 8)));
- return variance_final_from_32bit_sum_avx2(vsse, vsum_64, sse);
-}
-
-static INLINE __m256i sum_to_32bit_avx2(const __m256i sum) {
- const __m256i sum_lo = _mm256_cvtepi16_epi32(_mm256_castsi256_si128(sum));
- const __m256i sum_hi =
- _mm256_cvtepi16_epi32(_mm256_extractf128_si256(sum, 1));
- return _mm256_add_epi32(sum_lo, sum_hi);
-}
-
-// handle 2048 pixels (32x64, 64x32)
-static INLINE int variance_final_2048_avx2(__m256i vsse, __m256i vsum,
- unsigned int *const sse) {
- vsum = sum_to_32bit_avx2(vsum);
- const __m128i vsum_128 = mm256_add_hi_lo_epi32(vsum);
- return variance_final_from_32bit_sum_avx2(vsse, vsum_128, sse);
-}
-
-static INLINE void variance16_kernel_avx2(
- const uint8_t *const src, const int src_stride, const uint8_t *const ref,
- const int ref_stride, __m256i *const sse, __m256i *const sum) {
- const __m128i s0 = _mm_loadu_si128((__m128i const *)(src + 0 * src_stride));
- const __m128i s1 = _mm_loadu_si128((__m128i const *)(src + 1 * src_stride));
- const __m128i r0 = _mm_loadu_si128((__m128i const *)(ref + 0 * ref_stride));
- const __m128i r1 = _mm_loadu_si128((__m128i const *)(ref + 1 * ref_stride));
- const __m256i s = _mm256_inserti128_si256(_mm256_castsi128_si256(s0), s1, 1);
- const __m256i r = _mm256_inserti128_si256(_mm256_castsi128_si256(r0), r1, 1);
- variance_kernel_avx2(s, r, sse, sum);
-}
-
-static INLINE void variance32_kernel_avx2(const uint8_t *const src,
- const uint8_t *const ref,
- __m256i *const sse,
- __m256i *const sum) {
- const __m256i s = _mm256_loadu_si256((__m256i const *)(src));
- const __m256i r = _mm256_loadu_si256((__m256i const *)(ref));
- variance_kernel_avx2(s, r, sse, sum);
-}
-
-static INLINE void variance16_avx2(const uint8_t *src, const int src_stride,
- const uint8_t *ref, const int ref_stride,
- const int h, __m256i *const vsse,
- __m256i *const vsum) {
- *vsum = _mm256_setzero_si256();
-
- for (int i = 0; i < h; i += 2) {
- variance16_kernel_avx2(src, src_stride, ref, ref_stride, vsse, vsum);
- src += 2 * src_stride;
- ref += 2 * ref_stride;
- }
-}
-
-static INLINE void variance32_avx2(const uint8_t *src, const int src_stride,
- const uint8_t *ref, const int ref_stride,
- const int h, __m256i *const vsse,
- __m256i *const vsum) {
- *vsum = _mm256_setzero_si256();
-
- for (int i = 0; i < h; i++) {
- variance32_kernel_avx2(src, ref, vsse, vsum);
- src += src_stride;
- ref += ref_stride;
- }
-}
-
-static INLINE void variance64_avx2(const uint8_t *src, const int src_stride,
- const uint8_t *ref, const int ref_stride,
- const int h, __m256i *const vsse,
- __m256i *const vsum) {
- *vsum = _mm256_setzero_si256();
-
- for (int i = 0; i < h; i++) {
- variance32_kernel_avx2(src + 0, ref + 0, vsse, vsum);
- variance32_kernel_avx2(src + 32, ref + 32, vsse, vsum);
- src += src_stride;
- ref += ref_stride;
- }
-}
-
-static INLINE void variance128_avx2(const uint8_t *src, const int src_stride,
- const uint8_t *ref, const int ref_stride,
- const int h, __m256i *const vsse,
- __m256i *const vsum) {
- *vsum = _mm256_setzero_si256();
-
- for (int i = 0; i < h; i++) {
- variance32_kernel_avx2(src + 0, ref + 0, vsse, vsum);
- variance32_kernel_avx2(src + 32, ref + 32, vsse, vsum);
- variance32_kernel_avx2(src + 64, ref + 64, vsse, vsum);
- variance32_kernel_avx2(src + 96, ref + 96, vsse, vsum);
- src += src_stride;
- ref += ref_stride;
- }
-}
-
-#define AOM_VAR_NO_LOOP_AVX2(bw, bh, bits, max_pixel) \
- unsigned int aom_variance##bw##x##bh##_avx2( \
- const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, \
- unsigned int *sse) { \
- __m256i vsse = _mm256_setzero_si256(); \
- __m256i vsum; \
- variance##bw##_avx2(src, src_stride, ref, ref_stride, bh, &vsse, &vsum); \
- const int sum = variance_final_##max_pixel##_avx2(vsse, vsum, sse); \
- return *sse - (uint32_t)(((int64_t)sum * sum) >> bits); \
- }
-
-AOM_VAR_NO_LOOP_AVX2(16, 4, 6, 512);
-AOM_VAR_NO_LOOP_AVX2(16, 8, 7, 512);
-AOM_VAR_NO_LOOP_AVX2(16, 16, 8, 512);
-AOM_VAR_NO_LOOP_AVX2(16, 32, 9, 512);
-AOM_VAR_NO_LOOP_AVX2(16, 64, 10, 1024);
-
-AOM_VAR_NO_LOOP_AVX2(32, 8, 8, 512);
-AOM_VAR_NO_LOOP_AVX2(32, 16, 9, 512);
-AOM_VAR_NO_LOOP_AVX2(32, 32, 10, 1024);
-AOM_VAR_NO_LOOP_AVX2(32, 64, 11, 2048);
-
-AOM_VAR_NO_LOOP_AVX2(64, 16, 10, 1024);
-AOM_VAR_NO_LOOP_AVX2(64, 32, 11, 2048);
-
-#define AOM_VAR_LOOP_AVX2(bw, bh, bits, uh) \
- unsigned int aom_variance##bw##x##bh##_avx2( \
- const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, \
- unsigned int *sse) { \
- __m256i vsse = _mm256_setzero_si256(); \
- __m256i vsum = _mm256_setzero_si256(); \
- for (int i = 0; i < (bh / uh); i++) { \
- __m256i vsum16; \
- variance##bw##_avx2(src, src_stride, ref, ref_stride, uh, &vsse, \
- &vsum16); \
- vsum = _mm256_add_epi32(vsum, sum_to_32bit_avx2(vsum16)); \
- src += uh * src_stride; \
- ref += uh * ref_stride; \
- } \
- const __m128i vsum_128 = mm256_add_hi_lo_epi32(vsum); \
- const int sum = variance_final_from_32bit_sum_avx2(vsse, vsum_128, sse); \
- return *sse - (unsigned int)(((int64_t)sum * sum) >> bits); \
- }
-
-AOM_VAR_LOOP_AVX2(64, 64, 12, 32); // 64x32 * ( 64/32)
-AOM_VAR_LOOP_AVX2(64, 128, 13, 32); // 64x32 * (128/32)
-AOM_VAR_LOOP_AVX2(128, 64, 13, 16); // 128x16 * ( 64/16)
-AOM_VAR_LOOP_AVX2(128, 128, 14, 16); // 128x16 * (128/16)
-
-unsigned int aom_mse16x16_avx2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- unsigned int *sse) {
- aom_variance16x16_avx2(src, src_stride, ref, ref_stride, sse);
- return *sse;
-}
-
-unsigned int aom_sub_pixel_variance32xh_avx2(const uint8_t *src, int src_stride,
- int x_offset, int y_offset,
- const uint8_t *dst, int dst_stride,
- int height, unsigned int *sse);
-
-unsigned int aom_sub_pixel_avg_variance32xh_avx2(
- const uint8_t *src, int src_stride, int x_offset, int y_offset,
- const uint8_t *dst, int dst_stride, const uint8_t *sec, int sec_stride,
- int height, unsigned int *sseptr);
-
-#define AOM_SUB_PIXEL_VAR_AVX2(w, h, wf, wlog2, hlog2) \
- unsigned int aom_sub_pixel_variance##w##x##h##_avx2( \
- const uint8_t *src, int src_stride, int x_offset, int y_offset, \
- const uint8_t *dst, int dst_stride, unsigned int *sse_ptr) { \
- /*Avoid overflow in helper by capping height.*/ \
- const int hf = AOMMIN(h, 64); \
- unsigned int sse = 0; \
- int se = 0; \
- for (int i = 0; i < (w / wf); ++i) { \
- const uint8_t *src_ptr = src; \
- const uint8_t *dst_ptr = dst; \
- for (int j = 0; j < (h / hf); ++j) { \
- unsigned int sse2; \
- const int se2 = aom_sub_pixel_variance##wf##xh_avx2( \
- src_ptr, src_stride, x_offset, y_offset, dst_ptr, dst_stride, hf, \
- &sse2); \
- dst_ptr += hf * dst_stride; \
- src_ptr += hf * src_stride; \
- se += se2; \
- sse += sse2; \
- } \
- src += wf; \
- dst += wf; \
- } \
- *sse_ptr = sse; \
- return sse - (unsigned int)(((int64_t)se * se) >> (wlog2 + hlog2)); \
- }
-
-AOM_SUB_PIXEL_VAR_AVX2(128, 128, 32, 7, 7);
-AOM_SUB_PIXEL_VAR_AVX2(128, 64, 32, 7, 6);
-AOM_SUB_PIXEL_VAR_AVX2(64, 128, 32, 6, 7);
-AOM_SUB_PIXEL_VAR_AVX2(64, 64, 32, 6, 6);
-AOM_SUB_PIXEL_VAR_AVX2(64, 32, 32, 6, 5);
-AOM_SUB_PIXEL_VAR_AVX2(32, 64, 32, 5, 6);
-AOM_SUB_PIXEL_VAR_AVX2(32, 32, 32, 5, 5);
-AOM_SUB_PIXEL_VAR_AVX2(32, 16, 32, 5, 4);
-
-#define AOM_SUB_PIXEL_AVG_VAR_AVX2(w, h, wf, wlog2, hlog2) \
- unsigned int aom_sub_pixel_avg_variance##w##x##h##_avx2( \
- const uint8_t *src, int src_stride, int x_offset, int y_offset, \
- const uint8_t *dst, int dst_stride, unsigned int *sse_ptr, \
- const uint8_t *sec) { \
- /*Avoid overflow in helper by capping height.*/ \
- const int hf = AOMMIN(h, 64); \
- unsigned int sse = 0; \
- int se = 0; \
- for (int i = 0; i < (w / wf); ++i) { \
- const uint8_t *src_ptr = src; \
- const uint8_t *dst_ptr = dst; \
- const uint8_t *sec_ptr = sec; \
- for (int j = 0; j < (h / hf); ++j) { \
- unsigned int sse2; \
- const int se2 = aom_sub_pixel_avg_variance##wf##xh_avx2( \
- src_ptr, src_stride, x_offset, y_offset, dst_ptr, dst_stride, \
- sec_ptr, w, hf, &sse2); \
- dst_ptr += hf * dst_stride; \
- src_ptr += hf * src_stride; \
- sec_ptr += hf * w; \
- se += se2; \
- sse += sse2; \
- } \
- src += wf; \
- dst += wf; \
- sec += wf; \
- } \
- *sse_ptr = sse; \
- return sse - (unsigned int)(((int64_t)se * se) >> (wlog2 + hlog2)); \
- }
-
-AOM_SUB_PIXEL_AVG_VAR_AVX2(128, 128, 32, 7, 7);
-AOM_SUB_PIXEL_AVG_VAR_AVX2(128, 64, 32, 7, 6);
-AOM_SUB_PIXEL_AVG_VAR_AVX2(64, 128, 32, 6, 7);
-AOM_SUB_PIXEL_AVG_VAR_AVX2(64, 64, 32, 6, 6);
-AOM_SUB_PIXEL_AVG_VAR_AVX2(64, 32, 32, 6, 5);
-AOM_SUB_PIXEL_AVG_VAR_AVX2(32, 64, 32, 5, 6);
-AOM_SUB_PIXEL_AVG_VAR_AVX2(32, 32, 32, 5, 5);
-AOM_SUB_PIXEL_AVG_VAR_AVX2(32, 16, 32, 5, 4);
-
-static INLINE __m256i mm256_loadu2(const uint8_t *p0, const uint8_t *p1) {
- const __m256i d =
- _mm256_castsi128_si256(_mm_loadu_si128((const __m128i *)p1));
- return _mm256_insertf128_si256(d, _mm_loadu_si128((const __m128i *)p0), 1);
-}
-
-static INLINE __m256i mm256_loadu2_16(const uint16_t *p0, const uint16_t *p1) {
- const __m256i d =
- _mm256_castsi128_si256(_mm_loadu_si128((const __m128i *)p1));
- return _mm256_insertf128_si256(d, _mm_loadu_si128((const __m128i *)p0), 1);
-}
-
-static INLINE void comp_mask_pred_line_avx2(const __m256i s0, const __m256i s1,
- const __m256i a,
- uint8_t *comp_pred) {
- const __m256i alpha_max = _mm256_set1_epi8(AOM_BLEND_A64_MAX_ALPHA);
- const int16_t round_bits = 15 - AOM_BLEND_A64_ROUND_BITS;
- const __m256i round_offset = _mm256_set1_epi16(1 << (round_bits));
-
- const __m256i ma = _mm256_sub_epi8(alpha_max, a);
-
- const __m256i ssAL = _mm256_unpacklo_epi8(s0, s1);
- const __m256i aaAL = _mm256_unpacklo_epi8(a, ma);
- const __m256i ssAH = _mm256_unpackhi_epi8(s0, s1);
- const __m256i aaAH = _mm256_unpackhi_epi8(a, ma);
-
- const __m256i blendAL = _mm256_maddubs_epi16(ssAL, aaAL);
- const __m256i blendAH = _mm256_maddubs_epi16(ssAH, aaAH);
- const __m256i roundAL = _mm256_mulhrs_epi16(blendAL, round_offset);
- const __m256i roundAH = _mm256_mulhrs_epi16(blendAH, round_offset);
-
- const __m256i roundA = _mm256_packus_epi16(roundAL, roundAH);
- _mm256_storeu_si256((__m256i *)(comp_pred), roundA);
-}
-
-void aom_comp_mask_pred_avx2(uint8_t *comp_pred, const uint8_t *pred, int width,
- int height, const uint8_t *ref, int ref_stride,
- const uint8_t *mask, int mask_stride,
- int invert_mask) {
- int i = 0;
- const uint8_t *src0 = invert_mask ? pred : ref;
- const uint8_t *src1 = invert_mask ? ref : pred;
- const int stride0 = invert_mask ? width : ref_stride;
- const int stride1 = invert_mask ? ref_stride : width;
- if (width == 8) {
- comp_mask_pred_8_ssse3(comp_pred, height, src0, stride0, src1, stride1,
- mask, mask_stride);
- } else if (width == 16) {
- do {
- const __m256i sA0 = mm256_loadu2(src0 + stride0, src0);
- const __m256i sA1 = mm256_loadu2(src1 + stride1, src1);
- const __m256i aA = mm256_loadu2(mask + mask_stride, mask);
- src0 += (stride0 << 1);
- src1 += (stride1 << 1);
- mask += (mask_stride << 1);
- const __m256i sB0 = mm256_loadu2(src0 + stride0, src0);
- const __m256i sB1 = mm256_loadu2(src1 + stride1, src1);
- const __m256i aB = mm256_loadu2(mask + mask_stride, mask);
- src0 += (stride0 << 1);
- src1 += (stride1 << 1);
- mask += (mask_stride << 1);
- // comp_pred's stride == width == 16
- comp_mask_pred_line_avx2(sA0, sA1, aA, comp_pred);
- comp_mask_pred_line_avx2(sB0, sB1, aB, comp_pred + 32);
- comp_pred += (16 << 2);
- i += 4;
- } while (i < height);
- } else { // for width == 32
- do {
- const __m256i sA0 = _mm256_lddqu_si256((const __m256i *)(src0));
- const __m256i sA1 = _mm256_lddqu_si256((const __m256i *)(src1));
- const __m256i aA = _mm256_lddqu_si256((const __m256i *)(mask));
-
- const __m256i sB0 = _mm256_lddqu_si256((const __m256i *)(src0 + stride0));
- const __m256i sB1 = _mm256_lddqu_si256((const __m256i *)(src1 + stride1));
- const __m256i aB =
- _mm256_lddqu_si256((const __m256i *)(mask + mask_stride));
-
- comp_mask_pred_line_avx2(sA0, sA1, aA, comp_pred);
- comp_mask_pred_line_avx2(sB0, sB1, aB, comp_pred + 32);
- comp_pred += (32 << 1);
-
- src0 += (stride0 << 1);
- src1 += (stride1 << 1);
- mask += (mask_stride << 1);
- i += 2;
- } while (i < height);
- }
-}
-
-static INLINE __m256i highbd_comp_mask_pred_line_avx2(const __m256i s0,
- const __m256i s1,
- const __m256i a) {
- const __m256i alpha_max = _mm256_set1_epi16((1 << AOM_BLEND_A64_ROUND_BITS));
- const __m256i round_const =
- _mm256_set1_epi32((1 << AOM_BLEND_A64_ROUND_BITS) >> 1);
- const __m256i a_inv = _mm256_sub_epi16(alpha_max, a);
-
- const __m256i s_lo = _mm256_unpacklo_epi16(s0, s1);
- const __m256i a_lo = _mm256_unpacklo_epi16(a, a_inv);
- const __m256i pred_lo = _mm256_madd_epi16(s_lo, a_lo);
- const __m256i pred_l = _mm256_srai_epi32(
- _mm256_add_epi32(pred_lo, round_const), AOM_BLEND_A64_ROUND_BITS);
-
- const __m256i s_hi = _mm256_unpackhi_epi16(s0, s1);
- const __m256i a_hi = _mm256_unpackhi_epi16(a, a_inv);
- const __m256i pred_hi = _mm256_madd_epi16(s_hi, a_hi);
- const __m256i pred_h = _mm256_srai_epi32(
- _mm256_add_epi32(pred_hi, round_const), AOM_BLEND_A64_ROUND_BITS);
-
- const __m256i comp = _mm256_packs_epi32(pred_l, pred_h);
-
- return comp;
-}
-
-void aom_highbd_comp_mask_pred_avx2(uint8_t *comp_pred8, const uint8_t *pred8,
- int width, int height, const uint8_t *ref8,
- int ref_stride, const uint8_t *mask,
- int mask_stride, int invert_mask) {
- int i = 0;
- uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
- uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
- uint16_t *comp_pred = CONVERT_TO_SHORTPTR(comp_pred8);
- const uint16_t *src0 = invert_mask ? pred : ref;
- const uint16_t *src1 = invert_mask ? ref : pred;
- const int stride0 = invert_mask ? width : ref_stride;
- const int stride1 = invert_mask ? ref_stride : width;
- const __m256i zero = _mm256_setzero_si256();
-
- if (width == 8) {
- do {
- const __m256i s0 = mm256_loadu2_16(src0 + stride0, src0);
- const __m256i s1 = mm256_loadu2_16(src1 + stride1, src1);
-
- const __m128i m_l = _mm_loadl_epi64((const __m128i *)mask);
- const __m128i m_h = _mm_loadl_epi64((const __m128i *)(mask + 8));
-
- __m256i m = _mm256_castsi128_si256(m_l);
- m = _mm256_insertf128_si256(m, m_h, 1);
- const __m256i m_16 = _mm256_unpacklo_epi8(m, zero);
-
- const __m256i comp = highbd_comp_mask_pred_line_avx2(s0, s1, m_16);
-
- _mm_storeu_si128((__m128i *)(comp_pred), _mm256_castsi256_si128(comp));
-
- _mm_storeu_si128((__m128i *)(comp_pred + width),
- _mm256_extractf128_si256(comp, 1));
-
- src0 += (stride0 << 1);
- src1 += (stride1 << 1);
- mask += (mask_stride << 1);
- comp_pred += (width << 1);
- i += 2;
- } while (i < height);
- } else if (width == 16) {
- do {
- const __m256i s0 = _mm256_loadu_si256((const __m256i *)(src0));
- const __m256i s1 = _mm256_loadu_si256((const __m256i *)(src1));
- const __m256i m_16 =
- _mm256_cvtepu8_epi16(_mm_loadu_si128((const __m128i *)mask));
-
- const __m256i comp = highbd_comp_mask_pred_line_avx2(s0, s1, m_16);
-
- _mm256_storeu_si256((__m256i *)comp_pred, comp);
-
- src0 += stride0;
- src1 += stride1;
- mask += mask_stride;
- comp_pred += width;
- i += 1;
- } while (i < height);
- } else if (width == 32) {
- do {
- const __m256i s0 = _mm256_loadu_si256((const __m256i *)src0);
- const __m256i s2 = _mm256_loadu_si256((const __m256i *)(src0 + 16));
- const __m256i s1 = _mm256_loadu_si256((const __m256i *)src1);
- const __m256i s3 = _mm256_loadu_si256((const __m256i *)(src1 + 16));
-
- const __m256i m01_16 =
- _mm256_cvtepu8_epi16(_mm_loadu_si128((const __m128i *)mask));
- const __m256i m23_16 =
- _mm256_cvtepu8_epi16(_mm_loadu_si128((const __m128i *)(mask + 16)));
-
- const __m256i comp = highbd_comp_mask_pred_line_avx2(s0, s1, m01_16);
- const __m256i comp1 = highbd_comp_mask_pred_line_avx2(s2, s3, m23_16);
-
- _mm256_storeu_si256((__m256i *)comp_pred, comp);
- _mm256_storeu_si256((__m256i *)(comp_pred + 16), comp1);
-
- src0 += stride0;
- src1 += stride1;
- mask += mask_stride;
- comp_pred += width;
- i += 1;
- } while (i < height);
- }
-}
diff --git a/third_party/aom/aom_dsp/x86/variance_impl_avx2.c b/third_party/aom/aom_dsp/x86/variance_impl_avx2.c
deleted file mode 100644
index 88e27aef3..000000000
--- a/third_party/aom/aom_dsp/x86/variance_impl_avx2.c
+++ /dev/null
@@ -1,517 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <immintrin.h> // AVX2
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_ports/mem.h"
-
-/* clang-format off */
-DECLARE_ALIGNED(32, static const uint8_t, bilinear_filters_avx2[512]) = {
- 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0,
- 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0,
- 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2,
- 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2,
- 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4,
- 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4,
- 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6,
- 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6,
- 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
- 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
- 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10,
- 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10,
- 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12,
- 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12,
- 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14,
- 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14,
-};
-/* clang-format on */
-
-#define FILTER_SRC(filter) \
- /* filter the source */ \
- exp_src_lo = _mm256_maddubs_epi16(exp_src_lo, filter); \
- exp_src_hi = _mm256_maddubs_epi16(exp_src_hi, filter); \
- \
- /* add 8 to source */ \
- exp_src_lo = _mm256_add_epi16(exp_src_lo, pw8); \
- exp_src_hi = _mm256_add_epi16(exp_src_hi, pw8); \
- \
- /* divide source by 16 */ \
- exp_src_lo = _mm256_srai_epi16(exp_src_lo, 4); \
- exp_src_hi = _mm256_srai_epi16(exp_src_hi, 4);
-
-#define MERGE_WITH_SRC(src_reg, reg) \
- exp_src_lo = _mm256_unpacklo_epi8(src_reg, reg); \
- exp_src_hi = _mm256_unpackhi_epi8(src_reg, reg);
-
-#define LOAD_SRC_DST \
- /* load source and destination */ \
- src_reg = _mm256_loadu_si256((__m256i const *)(src)); \
- dst_reg = _mm256_loadu_si256((__m256i const *)(dst));
-
-#define AVG_NEXT_SRC(src_reg, size_stride) \
- src_next_reg = _mm256_loadu_si256((__m256i const *)(src + size_stride)); \
- /* average between current and next stride source */ \
- src_reg = _mm256_avg_epu8(src_reg, src_next_reg);
-
-#define MERGE_NEXT_SRC(src_reg, size_stride) \
- src_next_reg = _mm256_loadu_si256((__m256i const *)(src + size_stride)); \
- MERGE_WITH_SRC(src_reg, src_next_reg)
-
-#define CALC_SUM_SSE_INSIDE_LOOP \
- /* expand each byte to 2 bytes */ \
- exp_dst_lo = _mm256_unpacklo_epi8(dst_reg, zero_reg); \
- exp_dst_hi = _mm256_unpackhi_epi8(dst_reg, zero_reg); \
- /* source - dest */ \
- exp_src_lo = _mm256_sub_epi16(exp_src_lo, exp_dst_lo); \
- exp_src_hi = _mm256_sub_epi16(exp_src_hi, exp_dst_hi); \
- /* caculate sum */ \
- sum_reg = _mm256_add_epi16(sum_reg, exp_src_lo); \
- exp_src_lo = _mm256_madd_epi16(exp_src_lo, exp_src_lo); \
- sum_reg = _mm256_add_epi16(sum_reg, exp_src_hi); \
- exp_src_hi = _mm256_madd_epi16(exp_src_hi, exp_src_hi); \
- /* calculate sse */ \
- sse_reg = _mm256_add_epi32(sse_reg, exp_src_lo); \
- sse_reg = _mm256_add_epi32(sse_reg, exp_src_hi);
-
-// final calculation to sum and sse
-#define CALC_SUM_AND_SSE \
- res_cmp = _mm256_cmpgt_epi16(zero_reg, sum_reg); \
- sse_reg_hi = _mm256_srli_si256(sse_reg, 8); \
- sum_reg_lo = _mm256_unpacklo_epi16(sum_reg, res_cmp); \
- sum_reg_hi = _mm256_unpackhi_epi16(sum_reg, res_cmp); \
- sse_reg = _mm256_add_epi32(sse_reg, sse_reg_hi); \
- sum_reg = _mm256_add_epi32(sum_reg_lo, sum_reg_hi); \
- \
- sse_reg_hi = _mm256_srli_si256(sse_reg, 4); \
- sum_reg_hi = _mm256_srli_si256(sum_reg, 8); \
- \
- sse_reg = _mm256_add_epi32(sse_reg, sse_reg_hi); \
- sum_reg = _mm256_add_epi32(sum_reg, sum_reg_hi); \
- *((int *)sse) = _mm_cvtsi128_si32(_mm256_castsi256_si128(sse_reg)) + \
- _mm_cvtsi128_si32(_mm256_extractf128_si256(sse_reg, 1)); \
- sum_reg_hi = _mm256_srli_si256(sum_reg, 4); \
- sum_reg = _mm256_add_epi32(sum_reg, sum_reg_hi); \
- sum = _mm_cvtsi128_si32(_mm256_castsi256_si128(sum_reg)) + \
- _mm_cvtsi128_si32(_mm256_extractf128_si256(sum_reg, 1));
-
-unsigned int aom_sub_pixel_variance32xh_avx2(const uint8_t *src, int src_stride,
- int x_offset, int y_offset,
- const uint8_t *dst, int dst_stride,
- int height, unsigned int *sse) {
- __m256i src_reg, dst_reg, exp_src_lo, exp_src_hi, exp_dst_lo, exp_dst_hi;
- __m256i sse_reg, sum_reg, sse_reg_hi, res_cmp, sum_reg_lo, sum_reg_hi;
- __m256i zero_reg;
- int i, sum;
- sum_reg = _mm256_set1_epi16(0);
- sse_reg = _mm256_set1_epi16(0);
- zero_reg = _mm256_set1_epi16(0);
-
- // x_offset = 0 and y_offset = 0
- if (x_offset == 0) {
- if (y_offset == 0) {
- for (i = 0; i < height; i++) {
- LOAD_SRC_DST
- // expend each byte to 2 bytes
- MERGE_WITH_SRC(src_reg, zero_reg)
- CALC_SUM_SSE_INSIDE_LOOP
- src += src_stride;
- dst += dst_stride;
- }
- // x_offset = 0 and y_offset = 8
- } else if (y_offset == 8) {
- __m256i src_next_reg;
- for (i = 0; i < height; i++) {
- LOAD_SRC_DST
- AVG_NEXT_SRC(src_reg, src_stride)
- // expend each byte to 2 bytes
- MERGE_WITH_SRC(src_reg, zero_reg)
- CALC_SUM_SSE_INSIDE_LOOP
- src += src_stride;
- dst += dst_stride;
- }
- // x_offset = 0 and y_offset = bilin interpolation
- } else {
- __m256i filter, pw8, src_next_reg;
-
- y_offset <<= 5;
- filter = _mm256_load_si256(
- (__m256i const *)(bilinear_filters_avx2 + y_offset));
- pw8 = _mm256_set1_epi16(8);
- for (i = 0; i < height; i++) {
- LOAD_SRC_DST
- MERGE_NEXT_SRC(src_reg, src_stride)
- FILTER_SRC(filter)
- CALC_SUM_SSE_INSIDE_LOOP
- src += src_stride;
- dst += dst_stride;
- }
- }
- // x_offset = 8 and y_offset = 0
- } else if (x_offset == 8) {
- if (y_offset == 0) {
- __m256i src_next_reg;
- for (i = 0; i < height; i++) {
- LOAD_SRC_DST
- AVG_NEXT_SRC(src_reg, 1)
- // expand each byte to 2 bytes
- MERGE_WITH_SRC(src_reg, zero_reg)
- CALC_SUM_SSE_INSIDE_LOOP
- src += src_stride;
- dst += dst_stride;
- }
- // x_offset = 8 and y_offset = 8
- } else if (y_offset == 8) {
- __m256i src_next_reg, src_avg;
- // load source and another source starting from the next
- // following byte
- src_reg = _mm256_loadu_si256((__m256i const *)(src));
- AVG_NEXT_SRC(src_reg, 1)
- for (i = 0; i < height; i++) {
- src_avg = src_reg;
- src += src_stride;
- LOAD_SRC_DST
- AVG_NEXT_SRC(src_reg, 1)
- // average between previous average to current average
- src_avg = _mm256_avg_epu8(src_avg, src_reg);
- // expand each byte to 2 bytes
- MERGE_WITH_SRC(src_avg, zero_reg)
- // save current source average
- CALC_SUM_SSE_INSIDE_LOOP
- dst += dst_stride;
- }
- // x_offset = 8 and y_offset = bilin interpolation
- } else {
- __m256i filter, pw8, src_next_reg, src_avg;
- y_offset <<= 5;
- filter = _mm256_load_si256(
- (__m256i const *)(bilinear_filters_avx2 + y_offset));
- pw8 = _mm256_set1_epi16(8);
- // load source and another source starting from the next
- // following byte
- src_reg = _mm256_loadu_si256((__m256i const *)(src));
- AVG_NEXT_SRC(src_reg, 1)
- for (i = 0; i < height; i++) {
- // save current source average
- src_avg = src_reg;
- src += src_stride;
- LOAD_SRC_DST
- AVG_NEXT_SRC(src_reg, 1)
- MERGE_WITH_SRC(src_avg, src_reg)
- FILTER_SRC(filter)
- CALC_SUM_SSE_INSIDE_LOOP
- dst += dst_stride;
- }
- }
- // x_offset = bilin interpolation and y_offset = 0
- } else {
- if (y_offset == 0) {
- __m256i filter, pw8, src_next_reg;
- x_offset <<= 5;
- filter = _mm256_load_si256(
- (__m256i const *)(bilinear_filters_avx2 + x_offset));
- pw8 = _mm256_set1_epi16(8);
- for (i = 0; i < height; i++) {
- LOAD_SRC_DST
- MERGE_NEXT_SRC(src_reg, 1)
- FILTER_SRC(filter)
- CALC_SUM_SSE_INSIDE_LOOP
- src += src_stride;
- dst += dst_stride;
- }
- // x_offset = bilin interpolation and y_offset = 8
- } else if (y_offset == 8) {
- __m256i filter, pw8, src_next_reg, src_pack;
- x_offset <<= 5;
- filter = _mm256_load_si256(
- (__m256i const *)(bilinear_filters_avx2 + x_offset));
- pw8 = _mm256_set1_epi16(8);
- src_reg = _mm256_loadu_si256((__m256i const *)(src));
- MERGE_NEXT_SRC(src_reg, 1)
- FILTER_SRC(filter)
- // convert each 16 bit to 8 bit to each low and high lane source
- src_pack = _mm256_packus_epi16(exp_src_lo, exp_src_hi);
- for (i = 0; i < height; i++) {
- src += src_stride;
- LOAD_SRC_DST
- MERGE_NEXT_SRC(src_reg, 1)
- FILTER_SRC(filter)
- src_reg = _mm256_packus_epi16(exp_src_lo, exp_src_hi);
- // average between previous pack to the current
- src_pack = _mm256_avg_epu8(src_pack, src_reg);
- MERGE_WITH_SRC(src_pack, zero_reg)
- CALC_SUM_SSE_INSIDE_LOOP
- src_pack = src_reg;
- dst += dst_stride;
- }
- // x_offset = bilin interpolation and y_offset = bilin interpolation
- } else {
- __m256i xfilter, yfilter, pw8, src_next_reg, src_pack;
- x_offset <<= 5;
- xfilter = _mm256_load_si256(
- (__m256i const *)(bilinear_filters_avx2 + x_offset));
- y_offset <<= 5;
- yfilter = _mm256_load_si256(
- (__m256i const *)(bilinear_filters_avx2 + y_offset));
- pw8 = _mm256_set1_epi16(8);
- // load source and another source starting from the next
- // following byte
- src_reg = _mm256_loadu_si256((__m256i const *)(src));
- MERGE_NEXT_SRC(src_reg, 1)
-
- FILTER_SRC(xfilter)
- // convert each 16 bit to 8 bit to each low and high lane source
- src_pack = _mm256_packus_epi16(exp_src_lo, exp_src_hi);
- for (i = 0; i < height; i++) {
- src += src_stride;
- LOAD_SRC_DST
- MERGE_NEXT_SRC(src_reg, 1)
- FILTER_SRC(xfilter)
- src_reg = _mm256_packus_epi16(exp_src_lo, exp_src_hi);
- // merge previous pack to current pack source
- MERGE_WITH_SRC(src_pack, src_reg)
- // filter the source
- FILTER_SRC(yfilter)
- src_pack = src_reg;
- CALC_SUM_SSE_INSIDE_LOOP
- dst += dst_stride;
- }
- }
- }
- CALC_SUM_AND_SSE
- _mm256_zeroupper();
- return sum;
-}
-
-unsigned int aom_sub_pixel_avg_variance32xh_avx2(
- const uint8_t *src, int src_stride, int x_offset, int y_offset,
- const uint8_t *dst, int dst_stride, const uint8_t *sec, int sec_stride,
- int height, unsigned int *sse) {
- __m256i sec_reg;
- __m256i src_reg, dst_reg, exp_src_lo, exp_src_hi, exp_dst_lo, exp_dst_hi;
- __m256i sse_reg, sum_reg, sse_reg_hi, res_cmp, sum_reg_lo, sum_reg_hi;
- __m256i zero_reg;
- int i, sum;
- sum_reg = _mm256_set1_epi16(0);
- sse_reg = _mm256_set1_epi16(0);
- zero_reg = _mm256_set1_epi16(0);
-
- // x_offset = 0 and y_offset = 0
- if (x_offset == 0) {
- if (y_offset == 0) {
- for (i = 0; i < height; i++) {
- LOAD_SRC_DST
- sec_reg = _mm256_loadu_si256((__m256i const *)(sec));
- src_reg = _mm256_avg_epu8(src_reg, sec_reg);
- sec += sec_stride;
- // expend each byte to 2 bytes
- MERGE_WITH_SRC(src_reg, zero_reg)
- CALC_SUM_SSE_INSIDE_LOOP
- src += src_stride;
- dst += dst_stride;
- }
- } else if (y_offset == 8) {
- __m256i src_next_reg;
- for (i = 0; i < height; i++) {
- LOAD_SRC_DST
- AVG_NEXT_SRC(src_reg, src_stride)
- sec_reg = _mm256_loadu_si256((__m256i const *)(sec));
- src_reg = _mm256_avg_epu8(src_reg, sec_reg);
- sec += sec_stride;
- // expend each byte to 2 bytes
- MERGE_WITH_SRC(src_reg, zero_reg)
- CALC_SUM_SSE_INSIDE_LOOP
- src += src_stride;
- dst += dst_stride;
- }
- // x_offset = 0 and y_offset = bilin interpolation
- } else {
- __m256i filter, pw8, src_next_reg;
-
- y_offset <<= 5;
- filter = _mm256_load_si256(
- (__m256i const *)(bilinear_filters_avx2 + y_offset));
- pw8 = _mm256_set1_epi16(8);
- for (i = 0; i < height; i++) {
- LOAD_SRC_DST
- MERGE_NEXT_SRC(src_reg, src_stride)
- FILTER_SRC(filter)
- src_reg = _mm256_packus_epi16(exp_src_lo, exp_src_hi);
- sec_reg = _mm256_loadu_si256((__m256i const *)(sec));
- src_reg = _mm256_avg_epu8(src_reg, sec_reg);
- sec += sec_stride;
- MERGE_WITH_SRC(src_reg, zero_reg)
- CALC_SUM_SSE_INSIDE_LOOP
- src += src_stride;
- dst += dst_stride;
- }
- }
- // x_offset = 8 and y_offset = 0
- } else if (x_offset == 8) {
- if (y_offset == 0) {
- __m256i src_next_reg;
- for (i = 0; i < height; i++) {
- LOAD_SRC_DST
- AVG_NEXT_SRC(src_reg, 1)
- sec_reg = _mm256_loadu_si256((__m256i const *)(sec));
- src_reg = _mm256_avg_epu8(src_reg, sec_reg);
- sec += sec_stride;
- // expand each byte to 2 bytes
- MERGE_WITH_SRC(src_reg, zero_reg)
- CALC_SUM_SSE_INSIDE_LOOP
- src += src_stride;
- dst += dst_stride;
- }
- // x_offset = 8 and y_offset = 8
- } else if (y_offset == 8) {
- __m256i src_next_reg, src_avg;
- // load source and another source starting from the next
- // following byte
- src_reg = _mm256_loadu_si256((__m256i const *)(src));
- AVG_NEXT_SRC(src_reg, 1)
- for (i = 0; i < height; i++) {
- // save current source average
- src_avg = src_reg;
- src += src_stride;
- LOAD_SRC_DST
- AVG_NEXT_SRC(src_reg, 1)
- // average between previous average to current average
- src_avg = _mm256_avg_epu8(src_avg, src_reg);
- sec_reg = _mm256_loadu_si256((__m256i const *)(sec));
- src_avg = _mm256_avg_epu8(src_avg, sec_reg);
- sec += sec_stride;
- // expand each byte to 2 bytes
- MERGE_WITH_SRC(src_avg, zero_reg)
- CALC_SUM_SSE_INSIDE_LOOP
- dst += dst_stride;
- }
- // x_offset = 8 and y_offset = bilin interpolation
- } else {
- __m256i filter, pw8, src_next_reg, src_avg;
- y_offset <<= 5;
- filter = _mm256_load_si256(
- (__m256i const *)(bilinear_filters_avx2 + y_offset));
- pw8 = _mm256_set1_epi16(8);
- // load source and another source starting from the next
- // following byte
- src_reg = _mm256_loadu_si256((__m256i const *)(src));
- AVG_NEXT_SRC(src_reg, 1)
- for (i = 0; i < height; i++) {
- // save current source average
- src_avg = src_reg;
- src += src_stride;
- LOAD_SRC_DST
- AVG_NEXT_SRC(src_reg, 1)
- MERGE_WITH_SRC(src_avg, src_reg)
- FILTER_SRC(filter)
- src_avg = _mm256_packus_epi16(exp_src_lo, exp_src_hi);
- sec_reg = _mm256_loadu_si256((__m256i const *)(sec));
- src_avg = _mm256_avg_epu8(src_avg, sec_reg);
- // expand each byte to 2 bytes
- MERGE_WITH_SRC(src_avg, zero_reg)
- sec += sec_stride;
- CALC_SUM_SSE_INSIDE_LOOP
- dst += dst_stride;
- }
- }
- // x_offset = bilin interpolation and y_offset = 0
- } else {
- if (y_offset == 0) {
- __m256i filter, pw8, src_next_reg;
- x_offset <<= 5;
- filter = _mm256_load_si256(
- (__m256i const *)(bilinear_filters_avx2 + x_offset));
- pw8 = _mm256_set1_epi16(8);
- for (i = 0; i < height; i++) {
- LOAD_SRC_DST
- MERGE_NEXT_SRC(src_reg, 1)
- FILTER_SRC(filter)
- src_reg = _mm256_packus_epi16(exp_src_lo, exp_src_hi);
- sec_reg = _mm256_loadu_si256((__m256i const *)(sec));
- src_reg = _mm256_avg_epu8(src_reg, sec_reg);
- MERGE_WITH_SRC(src_reg, zero_reg)
- sec += sec_stride;
- CALC_SUM_SSE_INSIDE_LOOP
- src += src_stride;
- dst += dst_stride;
- }
- // x_offset = bilin interpolation and y_offset = 8
- } else if (y_offset == 8) {
- __m256i filter, pw8, src_next_reg, src_pack;
- x_offset <<= 5;
- filter = _mm256_load_si256(
- (__m256i const *)(bilinear_filters_avx2 + x_offset));
- pw8 = _mm256_set1_epi16(8);
- src_reg = _mm256_loadu_si256((__m256i const *)(src));
- MERGE_NEXT_SRC(src_reg, 1)
- FILTER_SRC(filter)
- // convert each 16 bit to 8 bit to each low and high lane source
- src_pack = _mm256_packus_epi16(exp_src_lo, exp_src_hi);
- for (i = 0; i < height; i++) {
- src += src_stride;
- LOAD_SRC_DST
- MERGE_NEXT_SRC(src_reg, 1)
- FILTER_SRC(filter)
- src_reg = _mm256_packus_epi16(exp_src_lo, exp_src_hi);
- // average between previous pack to the current
- src_pack = _mm256_avg_epu8(src_pack, src_reg);
- sec_reg = _mm256_loadu_si256((__m256i const *)(sec));
- src_pack = _mm256_avg_epu8(src_pack, sec_reg);
- sec += sec_stride;
- MERGE_WITH_SRC(src_pack, zero_reg)
- src_pack = src_reg;
- CALC_SUM_SSE_INSIDE_LOOP
- dst += dst_stride;
- }
- // x_offset = bilin interpolation and y_offset = bilin interpolation
- } else {
- __m256i xfilter, yfilter, pw8, src_next_reg, src_pack;
- x_offset <<= 5;
- xfilter = _mm256_load_si256(
- (__m256i const *)(bilinear_filters_avx2 + x_offset));
- y_offset <<= 5;
- yfilter = _mm256_load_si256(
- (__m256i const *)(bilinear_filters_avx2 + y_offset));
- pw8 = _mm256_set1_epi16(8);
- // load source and another source starting from the next
- // following byte
- src_reg = _mm256_loadu_si256((__m256i const *)(src));
- MERGE_NEXT_SRC(src_reg, 1)
-
- FILTER_SRC(xfilter)
- // convert each 16 bit to 8 bit to each low and high lane source
- src_pack = _mm256_packus_epi16(exp_src_lo, exp_src_hi);
- for (i = 0; i < height; i++) {
- src += src_stride;
- LOAD_SRC_DST
- MERGE_NEXT_SRC(src_reg, 1)
- FILTER_SRC(xfilter)
- src_reg = _mm256_packus_epi16(exp_src_lo, exp_src_hi);
- // merge previous pack to current pack source
- MERGE_WITH_SRC(src_pack, src_reg)
- // filter the source
- FILTER_SRC(yfilter)
- src_pack = _mm256_packus_epi16(exp_src_lo, exp_src_hi);
- sec_reg = _mm256_loadu_si256((__m256i const *)(sec));
- src_pack = _mm256_avg_epu8(src_pack, sec_reg);
- MERGE_WITH_SRC(src_pack, zero_reg)
- src_pack = src_reg;
- sec += sec_stride;
- CALC_SUM_SSE_INSIDE_LOOP
- dst += dst_stride;
- }
- }
- }
- CALC_SUM_AND_SSE
- _mm256_zeroupper();
- return sum;
-}
diff --git a/third_party/aom/aom_dsp/x86/variance_impl_ssse3.c b/third_party/aom/aom_dsp/x86/variance_impl_ssse3.c
deleted file mode 100644
index 66b0d7d84..000000000
--- a/third_party/aom/aom_dsp/x86/variance_impl_ssse3.c
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <tmmintrin.h>
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/x86/synonyms.h"
-
-void aom_var_filter_block2d_bil_first_pass_ssse3(
- const uint8_t *a, uint16_t *b, unsigned int src_pixels_per_line,
- unsigned int pixel_step, unsigned int output_height,
- unsigned int output_width, const uint8_t *filter) {
- // Note: filter[0], filter[1] could be {128, 0}, where 128 will overflow
- // in computation using _mm_maddubs_epi16.
- // Change {128, 0} to {64, 0} and reduce FILTER_BITS by 1 to avoid overflow.
- const int16_t round = (1 << (FILTER_BITS - 1)) >> 1;
- const __m128i r = _mm_set1_epi16(round);
- const uint8_t f0 = filter[0] >> 1;
- const uint8_t f1 = filter[1] >> 1;
- const __m128i filters = _mm_setr_epi8(f0, f1, f0, f1, f0, f1, f0, f1, f0, f1,
- f0, f1, f0, f1, f0, f1);
- unsigned int i, j;
- (void)pixel_step;
-
- if (output_width >= 8) {
- for (i = 0; i < output_height; ++i) {
- for (j = 0; j < output_width; j += 8) {
- // load source
- __m128i source_low = xx_loadl_64(a);
- __m128i source_hi = xx_loadl_64(a + 1);
-
- // unpack to:
- // { a[0], a[1], a[1], a[2], a[2], a[3], a[3], a[4],
- // a[4], a[5], a[5], a[6], a[6], a[7], a[7], a[8] }
- __m128i source = _mm_unpacklo_epi8(source_low, source_hi);
-
- // b[i] = a[i] * filter[0] + a[i + 1] * filter[1]
- __m128i res = _mm_maddubs_epi16(source, filters);
-
- // round
- res = _mm_srai_epi16(_mm_add_epi16(res, r), FILTER_BITS - 1);
-
- xx_storeu_128(b, res);
-
- a += 8;
- b += 8;
- }
-
- a += src_pixels_per_line - output_width;
- }
- } else {
- const __m128i shuffle_mask =
- _mm_setr_epi8(0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8);
- for (i = 0; i < output_height; ++i) {
- // load source, only first 5 values are meaningful:
- // { a[0], a[1], a[2], a[3], a[4], xxxx }
- __m128i source = xx_loadl_64(a);
-
- // shuffle, up to the first 8 are useful
- // { a[0], a[1], a[1], a[2], a[2], a[3], a[3], a[4],
- // a[4], a[5], a[5], a[6], a[6], a[7], a[7], a[8] }
- __m128i source_shuffle = _mm_shuffle_epi8(source, shuffle_mask);
-
- __m128i res = _mm_maddubs_epi16(source_shuffle, filters);
- res = _mm_srai_epi16(_mm_add_epi16(res, r), FILTER_BITS - 1);
-
- xx_storel_64(b, res);
-
- a += src_pixels_per_line;
- b += output_width;
- }
- }
-}
-
-void aom_var_filter_block2d_bil_second_pass_ssse3(
- const uint16_t *a, uint8_t *b, unsigned int src_pixels_per_line,
- unsigned int pixel_step, unsigned int output_height,
- unsigned int output_width, const uint8_t *filter) {
- const int16_t round = (1 << FILTER_BITS) >> 1;
- const __m128i r = _mm_set1_epi32(round);
- const __m128i filters =
- _mm_setr_epi16(filter[0], filter[1], filter[0], filter[1], filter[0],
- filter[1], filter[0], filter[1]);
- const __m128i shuffle_mask =
- _mm_setr_epi8(0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15);
- const __m128i mask =
- _mm_setr_epi8(0, 4, 8, 12, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
- unsigned int i, j;
-
- for (i = 0; i < output_height; ++i) {
- for (j = 0; j < output_width; j += 4) {
- // load source as:
- // { a[0], a[1], a[2], a[3], a[w], a[w+1], a[w+2], a[w+3] }
- __m128i source1 = xx_loadl_64(a);
- __m128i source2 = xx_loadl_64(a + pixel_step);
- __m128i source = _mm_unpacklo_epi64(source1, source2);
-
- // shuffle source to:
- // { a[0], a[w], a[1], a[w+1], a[2], a[w+2], a[3], a[w+3] }
- __m128i source_shuffle = _mm_shuffle_epi8(source, shuffle_mask);
-
- // b[i] = a[i] * filter[0] + a[w + i] * filter[1]
- __m128i res = _mm_madd_epi16(source_shuffle, filters);
-
- // round
- res = _mm_srai_epi32(_mm_add_epi32(res, r), FILTER_BITS);
-
- // shuffle to get each lower 8 bit of every 32 bit
- res = _mm_shuffle_epi8(res, mask);
-
- xx_storel_32(b, res);
-
- a += 4;
- b += 4;
- }
-
- a += src_pixels_per_line - output_width;
- }
-}
diff --git a/third_party/aom/aom_dsp/x86/variance_sse2.c b/third_party/aom/aom_dsp/x86/variance_sse2.c
deleted file mode 100644
index 3c37e77c0..000000000
--- a/third_party/aom/aom_dsp/x86/variance_sse2.c
+++ /dev/null
@@ -1,806 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <emmintrin.h> // SSE2
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-#include "config/av1_rtcd.h"
-
-#include "aom_dsp/blend.h"
-#include "aom_dsp/x86/synonyms.h"
-
-#include "aom_ports/mem.h"
-
-#include "av1/common/filter.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/common/reconinter.h"
-
-unsigned int aom_get_mb_ss_sse2(const int16_t *src) {
- __m128i vsum = _mm_setzero_si128();
- int i;
-
- for (i = 0; i < 32; ++i) {
- const __m128i v = xx_loadu_128(src);
- vsum = _mm_add_epi32(vsum, _mm_madd_epi16(v, v));
- src += 8;
- }
-
- vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 8));
- vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 4));
- return _mm_cvtsi128_si32(vsum);
-}
-
-static INLINE __m128i load4x2_sse2(const uint8_t *const p, const int stride) {
- const __m128i p0 = _mm_cvtsi32_si128(*(const uint32_t *)(p + 0 * stride));
- const __m128i p1 = _mm_cvtsi32_si128(*(const uint32_t *)(p + 1 * stride));
- return _mm_unpacklo_epi8(_mm_unpacklo_epi32(p0, p1), _mm_setzero_si128());
-}
-
-static INLINE __m128i load8_8to16_sse2(const uint8_t *const p) {
- const __m128i p0 = _mm_loadl_epi64((const __m128i *)p);
- return _mm_unpacklo_epi8(p0, _mm_setzero_si128());
-}
-
-// Accumulate 4 32bit numbers in val to 1 32bit number
-static INLINE unsigned int add32x4_sse2(__m128i val) {
- val = _mm_add_epi32(val, _mm_srli_si128(val, 8));
- val = _mm_add_epi32(val, _mm_srli_si128(val, 4));
- return _mm_cvtsi128_si32(val);
-}
-
-// Accumulate 8 16bit in sum to 4 32bit number
-static INLINE __m128i sum_to_32bit_sse2(const __m128i sum) {
- const __m128i sum_lo = _mm_srai_epi32(_mm_unpacklo_epi16(sum, sum), 16);
- const __m128i sum_hi = _mm_srai_epi32(_mm_unpackhi_epi16(sum, sum), 16);
- return _mm_add_epi32(sum_lo, sum_hi);
-}
-
-static INLINE void variance_kernel_sse2(const __m128i src, const __m128i ref,
- __m128i *const sse,
- __m128i *const sum) {
- const __m128i diff = _mm_sub_epi16(src, ref);
- *sse = _mm_add_epi32(*sse, _mm_madd_epi16(diff, diff));
- *sum = _mm_add_epi16(*sum, diff);
-}
-
-// Can handle 128 pixels' diff sum (such as 8x16 or 16x8)
-// Slightly faster than variance_final_256_pel_sse2()
-// diff sum of 128 pixels can still fit in 16bit integer
-static INLINE void variance_final_128_pel_sse2(__m128i vsse, __m128i vsum,
- unsigned int *const sse,
- int *const sum) {
- *sse = add32x4_sse2(vsse);
-
- vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8));
- vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 4));
- vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 2));
- *sum = (int16_t)_mm_extract_epi16(vsum, 0);
-}
-
-// Can handle 256 pixels' diff sum (such as 16x16)
-static INLINE void variance_final_256_pel_sse2(__m128i vsse, __m128i vsum,
- unsigned int *const sse,
- int *const sum) {
- *sse = add32x4_sse2(vsse);
-
- vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8));
- vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 4));
- *sum = (int16_t)_mm_extract_epi16(vsum, 0);
- *sum += (int16_t)_mm_extract_epi16(vsum, 1);
-}
-
-// Can handle 512 pixels' diff sum (such as 16x32 or 32x16)
-static INLINE void variance_final_512_pel_sse2(__m128i vsse, __m128i vsum,
- unsigned int *const sse,
- int *const sum) {
- *sse = add32x4_sse2(vsse);
-
- vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8));
- vsum = _mm_unpacklo_epi16(vsum, vsum);
- vsum = _mm_srai_epi32(vsum, 16);
- *sum = add32x4_sse2(vsum);
-}
-
-// Can handle 1024 pixels' diff sum (such as 32x32)
-static INLINE void variance_final_1024_pel_sse2(__m128i vsse, __m128i vsum,
- unsigned int *const sse,
- int *const sum) {
- *sse = add32x4_sse2(vsse);
-
- vsum = sum_to_32bit_sse2(vsum);
- *sum = add32x4_sse2(vsum);
-}
-
-static INLINE void variance4_sse2(const uint8_t *src, const int src_stride,
- const uint8_t *ref, const int ref_stride,
- const int h, __m128i *const sse,
- __m128i *const sum) {
- assert(h <= 256); // May overflow for larger height.
- *sum = _mm_setzero_si128();
-
- for (int i = 0; i < h; i += 2) {
- const __m128i s = load4x2_sse2(src, src_stride);
- const __m128i r = load4x2_sse2(ref, ref_stride);
-
- variance_kernel_sse2(s, r, sse, sum);
- src += 2 * src_stride;
- ref += 2 * ref_stride;
- }
-}
-
-static INLINE void variance8_sse2(const uint8_t *src, const int src_stride,
- const uint8_t *ref, const int ref_stride,
- const int h, __m128i *const sse,
- __m128i *const sum) {
- assert(h <= 128); // May overflow for larger height.
- *sum = _mm_setzero_si128();
- for (int i = 0; i < h; i++) {
- const __m128i s = load8_8to16_sse2(src);
- const __m128i r = load8_8to16_sse2(ref);
-
- variance_kernel_sse2(s, r, sse, sum);
- src += src_stride;
- ref += ref_stride;
- }
-}
-
-static INLINE void variance16_kernel_sse2(const uint8_t *const src,
- const uint8_t *const ref,
- __m128i *const sse,
- __m128i *const sum) {
- const __m128i zero = _mm_setzero_si128();
- const __m128i s = _mm_loadu_si128((const __m128i *)src);
- const __m128i r = _mm_loadu_si128((const __m128i *)ref);
- const __m128i src0 = _mm_unpacklo_epi8(s, zero);
- const __m128i ref0 = _mm_unpacklo_epi8(r, zero);
- const __m128i src1 = _mm_unpackhi_epi8(s, zero);
- const __m128i ref1 = _mm_unpackhi_epi8(r, zero);
-
- variance_kernel_sse2(src0, ref0, sse, sum);
- variance_kernel_sse2(src1, ref1, sse, sum);
-}
-
-static INLINE void variance16_sse2(const uint8_t *src, const int src_stride,
- const uint8_t *ref, const int ref_stride,
- const int h, __m128i *const sse,
- __m128i *const sum) {
- assert(h <= 64); // May overflow for larger height.
- *sum = _mm_setzero_si128();
-
- for (int i = 0; i < h; ++i) {
- variance16_kernel_sse2(src, ref, sse, sum);
- src += src_stride;
- ref += ref_stride;
- }
-}
-
-static INLINE void variance32_sse2(const uint8_t *src, const int src_stride,
- const uint8_t *ref, const int ref_stride,
- const int h, __m128i *const sse,
- __m128i *const sum) {
- assert(h <= 32); // May overflow for larger height.
- // Don't initialize sse here since it's an accumulation.
- *sum = _mm_setzero_si128();
-
- for (int i = 0; i < h; ++i) {
- variance16_kernel_sse2(src + 0, ref + 0, sse, sum);
- variance16_kernel_sse2(src + 16, ref + 16, sse, sum);
- src += src_stride;
- ref += ref_stride;
- }
-}
-
-static INLINE void variance64_sse2(const uint8_t *src, const int src_stride,
- const uint8_t *ref, const int ref_stride,
- const int h, __m128i *const sse,
- __m128i *const sum) {
- assert(h <= 16); // May overflow for larger height.
- *sum = _mm_setzero_si128();
-
- for (int i = 0; i < h; ++i) {
- variance16_kernel_sse2(src + 0, ref + 0, sse, sum);
- variance16_kernel_sse2(src + 16, ref + 16, sse, sum);
- variance16_kernel_sse2(src + 32, ref + 32, sse, sum);
- variance16_kernel_sse2(src + 48, ref + 48, sse, sum);
- src += src_stride;
- ref += ref_stride;
- }
-}
-
-static INLINE void variance128_sse2(const uint8_t *src, const int src_stride,
- const uint8_t *ref, const int ref_stride,
- const int h, __m128i *const sse,
- __m128i *const sum) {
- assert(h <= 8); // May overflow for larger height.
- *sum = _mm_setzero_si128();
-
- for (int i = 0; i < h; ++i) {
- for (int j = 0; j < 4; ++j) {
- const int offset0 = j << 5;
- const int offset1 = offset0 + 16;
- variance16_kernel_sse2(src + offset0, ref + offset0, sse, sum);
- variance16_kernel_sse2(src + offset1, ref + offset1, sse, sum);
- }
- src += src_stride;
- ref += ref_stride;
- }
-}
-
-#define AOM_VAR_NO_LOOP_SSE2(bw, bh, bits, max_pixels) \
- unsigned int aom_variance##bw##x##bh##_sse2( \
- const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, \
- unsigned int *sse) { \
- __m128i vsse = _mm_setzero_si128(); \
- __m128i vsum; \
- int sum = 0; \
- variance##bw##_sse2(src, src_stride, ref, ref_stride, bh, &vsse, &vsum); \
- variance_final_##max_pixels##_pel_sse2(vsse, vsum, sse, &sum); \
- assert(sum <= 255 * bw * bh); \
- assert(sum >= -255 * bw * bh); \
- return *sse - (uint32_t)(((int64_t)sum * sum) >> bits); \
- }
-
-AOM_VAR_NO_LOOP_SSE2(4, 4, 4, 128);
-AOM_VAR_NO_LOOP_SSE2(4, 8, 5, 128);
-AOM_VAR_NO_LOOP_SSE2(4, 16, 6, 128);
-
-AOM_VAR_NO_LOOP_SSE2(8, 4, 5, 128);
-AOM_VAR_NO_LOOP_SSE2(8, 8, 6, 128);
-AOM_VAR_NO_LOOP_SSE2(8, 16, 7, 128);
-AOM_VAR_NO_LOOP_SSE2(8, 32, 8, 256);
-
-AOM_VAR_NO_LOOP_SSE2(16, 4, 6, 128);
-AOM_VAR_NO_LOOP_SSE2(16, 8, 7, 128);
-AOM_VAR_NO_LOOP_SSE2(16, 16, 8, 256);
-AOM_VAR_NO_LOOP_SSE2(16, 32, 9, 512);
-AOM_VAR_NO_LOOP_SSE2(16, 64, 10, 1024);
-
-AOM_VAR_NO_LOOP_SSE2(32, 8, 8, 256);
-AOM_VAR_NO_LOOP_SSE2(32, 16, 9, 512);
-AOM_VAR_NO_LOOP_SSE2(32, 32, 10, 1024);
-
-#define AOM_VAR_LOOP_SSE2(bw, bh, bits, uh) \
- unsigned int aom_variance##bw##x##bh##_sse2( \
- const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, \
- unsigned int *sse) { \
- __m128i vsse = _mm_setzero_si128(); \
- __m128i vsum = _mm_setzero_si128(); \
- for (int i = 0; i < (bh / uh); ++i) { \
- __m128i vsum16; \
- variance##bw##_sse2(src, src_stride, ref, ref_stride, uh, &vsse, \
- &vsum16); \
- vsum = _mm_add_epi32(vsum, sum_to_32bit_sse2(vsum16)); \
- src += (src_stride * uh); \
- ref += (ref_stride * uh); \
- } \
- *sse = add32x4_sse2(vsse); \
- int sum = add32x4_sse2(vsum); \
- assert(sum <= 255 * bw * bh); \
- assert(sum >= -255 * bw * bh); \
- return *sse - (uint32_t)(((int64_t)sum * sum) >> bits); \
- }
-
-AOM_VAR_LOOP_SSE2(32, 64, 11, 32); // 32x32 * ( 64/32 )
-
-AOM_VAR_NO_LOOP_SSE2(64, 16, 10, 1024);
-AOM_VAR_LOOP_SSE2(64, 32, 11, 16); // 64x16 * ( 32/16 )
-AOM_VAR_LOOP_SSE2(64, 64, 12, 16); // 64x16 * ( 64/16 )
-AOM_VAR_LOOP_SSE2(64, 128, 13, 16); // 64x16 * ( 128/16 )
-
-AOM_VAR_LOOP_SSE2(128, 64, 13, 8); // 128x8 * ( 64/8 )
-AOM_VAR_LOOP_SSE2(128, 128, 14, 8); // 128x8 * ( 128/8 )
-
-unsigned int aom_mse8x8_sse2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- unsigned int *sse) {
- aom_variance8x8_sse2(src, src_stride, ref, ref_stride, sse);
- return *sse;
-}
-
-unsigned int aom_mse8x16_sse2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- unsigned int *sse) {
- aom_variance8x16_sse2(src, src_stride, ref, ref_stride, sse);
- return *sse;
-}
-
-unsigned int aom_mse16x8_sse2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- unsigned int *sse) {
- aom_variance16x8_sse2(src, src_stride, ref, ref_stride, sse);
- return *sse;
-}
-
-unsigned int aom_mse16x16_sse2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- unsigned int *sse) {
- aom_variance16x16_sse2(src, src_stride, ref, ref_stride, sse);
- return *sse;
-}
-
-// The 2 unused parameters are place holders for PIC enabled build.
-// These definitions are for functions defined in subpel_variance.asm
-#define DECL(w, opt) \
- int aom_sub_pixel_variance##w##xh_##opt( \
- const uint8_t *src, ptrdiff_t src_stride, int x_offset, int y_offset, \
- const uint8_t *dst, ptrdiff_t dst_stride, int height, unsigned int *sse, \
- void *unused0, void *unused)
-#define DECLS(opt) \
- DECL(4, opt); \
- DECL(8, opt); \
- DECL(16, opt)
-
-DECLS(sse2);
-DECLS(ssse3);
-#undef DECLS
-#undef DECL
-
-#define FN(w, h, wf, wlog2, hlog2, opt, cast_prod, cast) \
- unsigned int aom_sub_pixel_variance##w##x##h##_##opt( \
- const uint8_t *src, int src_stride, int x_offset, int y_offset, \
- const uint8_t *dst, int dst_stride, unsigned int *sse_ptr) { \
- /*Avoid overflow in helper by capping height.*/ \
- const int hf = AOMMIN(h, 64); \
- unsigned int sse = 0; \
- int se = 0; \
- for (int i = 0; i < (w / wf); ++i) { \
- const uint8_t *src_ptr = src; \
- const uint8_t *dst_ptr = dst; \
- for (int j = 0; j < (h / hf); ++j) { \
- unsigned int sse2; \
- const int se2 = aom_sub_pixel_variance##wf##xh_##opt( \
- src_ptr, src_stride, x_offset, y_offset, dst_ptr, dst_stride, hf, \
- &sse2, NULL, NULL); \
- dst_ptr += hf * dst_stride; \
- src_ptr += hf * src_stride; \
- se += se2; \
- sse += sse2; \
- } \
- src += wf; \
- dst += wf; \
- } \
- *sse_ptr = sse; \
- return sse - (unsigned int)(cast_prod(cast se * se) >> (wlog2 + hlog2)); \
- }
-
-#define FNS(opt) \
- FN(128, 128, 16, 7, 7, opt, (int64_t), (int64_t)); \
- FN(128, 64, 16, 7, 6, opt, (int64_t), (int64_t)); \
- FN(64, 128, 16, 6, 7, opt, (int64_t), (int64_t)); \
- FN(64, 64, 16, 6, 6, opt, (int64_t), (int64_t)); \
- FN(64, 32, 16, 6, 5, opt, (int64_t), (int64_t)); \
- FN(32, 64, 16, 5, 6, opt, (int64_t), (int64_t)); \
- FN(32, 32, 16, 5, 5, opt, (int64_t), (int64_t)); \
- FN(32, 16, 16, 5, 4, opt, (int64_t), (int64_t)); \
- FN(16, 32, 16, 4, 5, opt, (int64_t), (int64_t)); \
- FN(16, 16, 16, 4, 4, opt, (uint32_t), (int64_t)); \
- FN(16, 8, 16, 4, 3, opt, (int32_t), (int32_t)); \
- FN(8, 16, 8, 3, 4, opt, (int32_t), (int32_t)); \
- FN(8, 8, 8, 3, 3, opt, (int32_t), (int32_t)); \
- FN(8, 4, 8, 3, 2, opt, (int32_t), (int32_t)); \
- FN(4, 8, 4, 2, 3, opt, (int32_t), (int32_t)); \
- FN(4, 4, 4, 2, 2, opt, (int32_t), (int32_t)); \
- FN(4, 16, 4, 2, 4, opt, (int32_t), (int32_t)); \
- FN(16, 4, 16, 4, 2, opt, (int32_t), (int32_t)); \
- FN(8, 32, 8, 3, 5, opt, (uint32_t), (int64_t)); \
- FN(32, 8, 16, 5, 3, opt, (uint32_t), (int64_t)); \
- FN(16, 64, 16, 4, 6, opt, (int64_t), (int64_t)); \
- FN(64, 16, 16, 6, 4, opt, (int64_t), (int64_t))
-
-FNS(sse2);
-FNS(ssse3);
-
-#undef FNS
-#undef FN
-
-// The 2 unused parameters are place holders for PIC enabled build.
-#define DECL(w, opt) \
- int aom_sub_pixel_avg_variance##w##xh_##opt( \
- const uint8_t *src, ptrdiff_t src_stride, int x_offset, int y_offset, \
- const uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *sec, \
- ptrdiff_t sec_stride, int height, unsigned int *sse, void *unused0, \
- void *unused)
-#define DECLS(opt) \
- DECL(4, opt); \
- DECL(8, opt); \
- DECL(16, opt)
-
-DECLS(sse2);
-DECLS(ssse3);
-#undef DECL
-#undef DECLS
-
-#define FN(w, h, wf, wlog2, hlog2, opt, cast_prod, cast) \
- unsigned int aom_sub_pixel_avg_variance##w##x##h##_##opt( \
- const uint8_t *src, int src_stride, int x_offset, int y_offset, \
- const uint8_t *dst, int dst_stride, unsigned int *sse_ptr, \
- const uint8_t *sec) { \
- /*Avoid overflow in helper by capping height.*/ \
- const int hf = AOMMIN(h, 64); \
- unsigned int sse = 0; \
- int se = 0; \
- for (int i = 0; i < (w / wf); ++i) { \
- const uint8_t *src_ptr = src; \
- const uint8_t *dst_ptr = dst; \
- const uint8_t *sec_ptr = sec; \
- for (int j = 0; j < (h / hf); ++j) { \
- unsigned int sse2; \
- const int se2 = aom_sub_pixel_avg_variance##wf##xh_##opt( \
- src_ptr, src_stride, x_offset, y_offset, dst_ptr, dst_stride, \
- sec_ptr, w, hf, &sse2, NULL, NULL); \
- dst_ptr += hf * dst_stride; \
- src_ptr += hf * src_stride; \
- sec_ptr += hf * w; \
- se += se2; \
- sse += sse2; \
- } \
- src += wf; \
- dst += wf; \
- sec += wf; \
- } \
- *sse_ptr = sse; \
- return sse - (unsigned int)(cast_prod(cast se * se) >> (wlog2 + hlog2)); \
- }
-
-#define FNS(opt) \
- FN(128, 128, 16, 7, 7, opt, (int64_t), (int64_t)); \
- FN(128, 64, 16, 7, 6, opt, (int64_t), (int64_t)); \
- FN(64, 128, 16, 6, 7, opt, (int64_t), (int64_t)); \
- FN(64, 64, 16, 6, 6, opt, (int64_t), (int64_t)); \
- FN(64, 32, 16, 6, 5, opt, (int64_t), (int64_t)); \
- FN(32, 64, 16, 5, 6, opt, (int64_t), (int64_t)); \
- FN(32, 32, 16, 5, 5, opt, (int64_t), (int64_t)); \
- FN(32, 16, 16, 5, 4, opt, (int64_t), (int64_t)); \
- FN(16, 32, 16, 4, 5, opt, (int64_t), (int64_t)); \
- FN(16, 16, 16, 4, 4, opt, (uint32_t), (int64_t)); \
- FN(16, 8, 16, 4, 3, opt, (uint32_t), (int32_t)); \
- FN(8, 16, 8, 3, 4, opt, (uint32_t), (int32_t)); \
- FN(8, 8, 8, 3, 3, opt, (uint32_t), (int32_t)); \
- FN(8, 4, 8, 3, 2, opt, (uint32_t), (int32_t)); \
- FN(4, 8, 4, 2, 3, opt, (uint32_t), (int32_t)); \
- FN(4, 4, 4, 2, 2, opt, (uint32_t), (int32_t)); \
- FN(4, 16, 4, 2, 4, opt, (int32_t), (int32_t)); \
- FN(16, 4, 16, 4, 2, opt, (int32_t), (int32_t)); \
- FN(8, 32, 8, 3, 5, opt, (uint32_t), (int64_t)); \
- FN(32, 8, 16, 5, 3, opt, (uint32_t), (int64_t)); \
- FN(16, 64, 16, 4, 6, opt, (int64_t), (int64_t)); \
- FN(64, 16, 16, 6, 4, opt, (int64_t), (int64_t))
-
-FNS(sse2);
-FNS(ssse3);
-
-#undef FNS
-#undef FN
-
-void aom_upsampled_pred_sse2(MACROBLOCKD *xd, const struct AV1Common *const cm,
- int mi_row, int mi_col, const MV *const mv,
- uint8_t *comp_pred, int width, int height,
- int subpel_x_q3, int subpel_y_q3,
- const uint8_t *ref, int ref_stride,
- int subpel_search) {
- // expect xd == NULL only in tests
- if (xd != NULL) {
- const MB_MODE_INFO *mi = xd->mi[0];
- const int ref_num = 0;
- const int is_intrabc = is_intrabc_block(mi);
- const struct scale_factors *const sf =
- is_intrabc ? &cm->sf_identity : &xd->block_refs[ref_num]->sf;
- const int is_scaled = av1_is_scaled(sf);
-
- if (is_scaled) {
- // Note: This is mostly a copy from the >=8X8 case in
- // build_inter_predictors() function, with some small tweaks.
-
- // Some assumptions.
- const int plane = 0;
-
- // Get pre-requisites.
- const struct macroblockd_plane *const pd = &xd->plane[plane];
- const int ssx = pd->subsampling_x;
- const int ssy = pd->subsampling_y;
- assert(ssx == 0 && ssy == 0);
- const struct buf_2d *const dst_buf = &pd->dst;
- const struct buf_2d *const pre_buf =
- is_intrabc ? dst_buf : &pd->pre[ref_num];
- const int mi_x = mi_col * MI_SIZE;
- const int mi_y = mi_row * MI_SIZE;
-
- // Calculate subpel_x/y and x/y_step.
- const int row_start = 0; // Because ss_y is 0.
- const int col_start = 0; // Because ss_x is 0.
- const int pre_x = (mi_x + MI_SIZE * col_start) >> ssx;
- const int pre_y = (mi_y + MI_SIZE * row_start) >> ssy;
- int orig_pos_y = pre_y << SUBPEL_BITS;
- orig_pos_y += mv->row * (1 << (1 - ssy));
- int orig_pos_x = pre_x << SUBPEL_BITS;
- orig_pos_x += mv->col * (1 << (1 - ssx));
- int pos_y = sf->scale_value_y(orig_pos_y, sf);
- int pos_x = sf->scale_value_x(orig_pos_x, sf);
- pos_x += SCALE_EXTRA_OFF;
- pos_y += SCALE_EXTRA_OFF;
-
- const int top = -AOM_LEFT_TOP_MARGIN_SCALED(ssy);
- const int left = -AOM_LEFT_TOP_MARGIN_SCALED(ssx);
- const int bottom = (pre_buf->height + AOM_INTERP_EXTEND)
- << SCALE_SUBPEL_BITS;
- const int right = (pre_buf->width + AOM_INTERP_EXTEND)
- << SCALE_SUBPEL_BITS;
- pos_y = clamp(pos_y, top, bottom);
- pos_x = clamp(pos_x, left, right);
-
- const uint8_t *const pre =
- pre_buf->buf0 + (pos_y >> SCALE_SUBPEL_BITS) * pre_buf->stride +
- (pos_x >> SCALE_SUBPEL_BITS);
-
- const SubpelParams subpel_params = { sf->x_step_q4, sf->y_step_q4,
- pos_x & SCALE_SUBPEL_MASK,
- pos_y & SCALE_SUBPEL_MASK };
-
- // Get warp types.
- const WarpedMotionParams *const wm =
- &xd->global_motion[mi->ref_frame[ref_num]];
- const int is_global = is_global_mv_block(mi, wm->wmtype);
- WarpTypesAllowed warp_types;
- warp_types.global_warp_allowed = is_global;
- warp_types.local_warp_allowed = mi->motion_mode == WARPED_CAUSAL;
-
- // Get convolve parameters.
- ConvolveParams conv_params = get_conv_params(0, plane, xd->bd);
- const InterpFilters filters =
- av1_broadcast_interp_filter(EIGHTTAP_REGULAR);
-
- // Get the inter predictor.
- const int build_for_obmc = 0;
- av1_make_inter_predictor(pre, pre_buf->stride, comp_pred, width,
- &subpel_params, sf, width, height, &conv_params,
- filters, &warp_types, mi_x >> pd->subsampling_x,
- mi_y >> pd->subsampling_y, plane, ref_num, mi,
- build_for_obmc, xd, cm->allow_warped_motion);
-
- return;
- }
- }
-
- const InterpFilterParams *filter =
- (subpel_search == 1)
- ? av1_get_4tap_interp_filter_params(EIGHTTAP_REGULAR)
- : av1_get_interp_filter_params_with_block_size(EIGHTTAP_REGULAR, 8);
- int filter_taps = (subpel_search == 1) ? 4 : SUBPEL_TAPS;
-
- if (!subpel_x_q3 && !subpel_y_q3) {
- if (width >= 16) {
- int i;
- assert(!(width & 15));
- /*Read 16 pixels one row at a time.*/
- for (i = 0; i < height; i++) {
- int j;
- for (j = 0; j < width; j += 16) {
- xx_storeu_128(comp_pred, xx_loadu_128(ref));
- comp_pred += 16;
- ref += 16;
- }
- ref += ref_stride - width;
- }
- } else if (width >= 8) {
- int i;
- assert(!(width & 7));
- assert(!(height & 1));
- /*Read 8 pixels two rows at a time.*/
- for (i = 0; i < height; i += 2) {
- __m128i s0 = xx_loadl_64(ref + 0 * ref_stride);
- __m128i s1 = xx_loadl_64(ref + 1 * ref_stride);
- xx_storeu_128(comp_pred, _mm_unpacklo_epi64(s0, s1));
- comp_pred += 16;
- ref += 2 * ref_stride;
- }
- } else {
- int i;
- assert(!(width & 3));
- assert(!(height & 3));
- /*Read 4 pixels four rows at a time.*/
- for (i = 0; i < height; i++) {
- const __m128i row0 = xx_loadl_64(ref + 0 * ref_stride);
- const __m128i row1 = xx_loadl_64(ref + 1 * ref_stride);
- const __m128i row2 = xx_loadl_64(ref + 2 * ref_stride);
- const __m128i row3 = xx_loadl_64(ref + 3 * ref_stride);
- const __m128i reg = _mm_unpacklo_epi64(_mm_unpacklo_epi32(row0, row1),
- _mm_unpacklo_epi32(row2, row3));
- xx_storeu_128(comp_pred, reg);
- comp_pred += 16;
- ref += 4 * ref_stride;
- }
- }
- } else if (!subpel_y_q3) {
- const int16_t *const kernel =
- av1_get_interp_filter_subpel_kernel(filter, subpel_x_q3 << 1);
- aom_convolve8_horiz(ref, ref_stride, comp_pred, width, kernel, 16, NULL, -1,
- width, height);
- } else if (!subpel_x_q3) {
- const int16_t *const kernel =
- av1_get_interp_filter_subpel_kernel(filter, subpel_y_q3 << 1);
- aom_convolve8_vert(ref, ref_stride, comp_pred, width, NULL, -1, kernel, 16,
- width, height);
- } else {
- DECLARE_ALIGNED(16, uint8_t,
- temp[((MAX_SB_SIZE * 2 + 16) + 16) * MAX_SB_SIZE]);
- const int16_t *const kernel_x =
- av1_get_interp_filter_subpel_kernel(filter, subpel_x_q3 << 1);
- const int16_t *const kernel_y =
- av1_get_interp_filter_subpel_kernel(filter, subpel_y_q3 << 1);
- const uint8_t *ref_start = ref - ref_stride * ((filter_taps >> 1) - 1);
- uint8_t *temp_start_horiz =
- (subpel_search == 1) ? temp + (filter_taps >> 1) * MAX_SB_SIZE : temp;
- uint8_t *temp_start_vert = temp + MAX_SB_SIZE * ((filter->taps >> 1) - 1);
- int intermediate_height =
- (((height - 1) * 8 + subpel_y_q3) >> 3) + filter_taps;
- assert(intermediate_height <= (MAX_SB_SIZE * 2 + 16) + 16);
- // TODO(Deepa): Remove the memset below when we have
- // 4 tap simd for sse2 and ssse3.
- if (subpel_search == 1) {
- memset(temp_start_vert - 3 * MAX_SB_SIZE, 0, width);
- memset(temp_start_vert - 2 * MAX_SB_SIZE, 0, width);
- memset(temp_start_vert + (height + 2) * MAX_SB_SIZE, 0, width);
- memset(temp_start_vert + (height + 3) * MAX_SB_SIZE, 0, width);
- }
- aom_convolve8_horiz(ref_start, ref_stride, temp_start_horiz, MAX_SB_SIZE,
- kernel_x, 16, NULL, -1, width, intermediate_height);
- aom_convolve8_vert(temp_start_vert, MAX_SB_SIZE, comp_pred, width, NULL, -1,
- kernel_y, 16, width, height);
- }
-}
-
-void aom_comp_avg_upsampled_pred_sse2(
- MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
- const MV *const mv, uint8_t *comp_pred, const uint8_t *pred, int width,
- int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref,
- int ref_stride, int subpel_search) {
- int n;
- int i;
- aom_upsampled_pred(xd, cm, mi_row, mi_col, mv, comp_pred, width, height,
- subpel_x_q3, subpel_y_q3, ref, ref_stride, subpel_search);
- /*The total number of pixels must be a multiple of 16 (e.g., 4x4).*/
- assert(!(width * height & 15));
- n = width * height >> 4;
- for (i = 0; i < n; i++) {
- __m128i s0 = xx_loadu_128(comp_pred);
- __m128i p0 = xx_loadu_128(pred);
- xx_storeu_128(comp_pred, _mm_avg_epu8(s0, p0));
- comp_pred += 16;
- pred += 16;
- }
-}
-
-void aom_comp_mask_upsampled_pred_sse2(
- MACROBLOCKD *xd, const AV1_COMMON *const cm, int mi_row, int mi_col,
- const MV *const mv, uint8_t *comp_pred, const uint8_t *pred, int width,
- int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref,
- int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask,
- int subpel_search) {
- if (subpel_x_q3 | subpel_y_q3) {
- aom_upsampled_pred(xd, cm, mi_row, mi_col, mv, comp_pred, width, height,
- subpel_x_q3, subpel_y_q3, ref, ref_stride,
- subpel_search);
- ref = comp_pred;
- ref_stride = width;
- }
- aom_comp_mask_pred(comp_pred, pred, width, height, ref, ref_stride, mask,
- mask_stride, invert_mask);
-}
-
-static INLINE __m128i highbd_comp_mask_pred_line_sse2(const __m128i s0,
- const __m128i s1,
- const __m128i a) {
- const __m128i alpha_max = _mm_set1_epi16((1 << AOM_BLEND_A64_ROUND_BITS));
- const __m128i round_const =
- _mm_set1_epi32((1 << AOM_BLEND_A64_ROUND_BITS) >> 1);
- const __m128i a_inv = _mm_sub_epi16(alpha_max, a);
-
- const __m128i s_lo = _mm_unpacklo_epi16(s0, s1);
- const __m128i a_lo = _mm_unpacklo_epi16(a, a_inv);
- const __m128i pred_lo = _mm_madd_epi16(s_lo, a_lo);
- const __m128i pred_l = _mm_srai_epi32(_mm_add_epi32(pred_lo, round_const),
- AOM_BLEND_A64_ROUND_BITS);
-
- const __m128i s_hi = _mm_unpackhi_epi16(s0, s1);
- const __m128i a_hi = _mm_unpackhi_epi16(a, a_inv);
- const __m128i pred_hi = _mm_madd_epi16(s_hi, a_hi);
- const __m128i pred_h = _mm_srai_epi32(_mm_add_epi32(pred_hi, round_const),
- AOM_BLEND_A64_ROUND_BITS);
-
- const __m128i comp = _mm_packs_epi32(pred_l, pred_h);
-
- return comp;
-}
-
-void aom_highbd_comp_mask_pred_sse2(uint8_t *comp_pred8, const uint8_t *pred8,
- int width, int height, const uint8_t *ref8,
- int ref_stride, const uint8_t *mask,
- int mask_stride, int invert_mask) {
- int i = 0;
- uint16_t *comp_pred = CONVERT_TO_SHORTPTR(comp_pred8);
- uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
- uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
- const uint16_t *src0 = invert_mask ? pred : ref;
- const uint16_t *src1 = invert_mask ? ref : pred;
- const int stride0 = invert_mask ? width : ref_stride;
- const int stride1 = invert_mask ? ref_stride : width;
- const __m128i zero = _mm_setzero_si128();
-
- if (width == 8) {
- do {
- const __m128i s0 = _mm_loadu_si128((const __m128i *)(src0));
- const __m128i s1 = _mm_loadu_si128((const __m128i *)(src1));
- const __m128i m_8 = _mm_loadl_epi64((const __m128i *)mask);
- const __m128i m_16 = _mm_unpacklo_epi8(m_8, zero);
-
- const __m128i comp = highbd_comp_mask_pred_line_sse2(s0, s1, m_16);
-
- _mm_storeu_si128((__m128i *)comp_pred, comp);
-
- src0 += stride0;
- src1 += stride1;
- mask += mask_stride;
- comp_pred += width;
- i += 1;
- } while (i < height);
- } else if (width == 16) {
- do {
- const __m128i s0 = _mm_loadu_si128((const __m128i *)(src0));
- const __m128i s2 = _mm_loadu_si128((const __m128i *)(src0 + 8));
- const __m128i s1 = _mm_loadu_si128((const __m128i *)(src1));
- const __m128i s3 = _mm_loadu_si128((const __m128i *)(src1 + 8));
-
- const __m128i m_8 = _mm_loadu_si128((const __m128i *)mask);
- const __m128i m01_16 = _mm_unpacklo_epi8(m_8, zero);
- const __m128i m23_16 = _mm_unpackhi_epi8(m_8, zero);
-
- const __m128i comp = highbd_comp_mask_pred_line_sse2(s0, s1, m01_16);
- const __m128i comp1 = highbd_comp_mask_pred_line_sse2(s2, s3, m23_16);
-
- _mm_storeu_si128((__m128i *)comp_pred, comp);
- _mm_storeu_si128((__m128i *)(comp_pred + 8), comp1);
-
- src0 += stride0;
- src1 += stride1;
- mask += mask_stride;
- comp_pred += width;
- i += 1;
- } while (i < height);
- } else if (width == 32) {
- do {
- for (int j = 0; j < 2; j++) {
- const __m128i s0 = _mm_loadu_si128((const __m128i *)(src0 + j * 16));
- const __m128i s2 =
- _mm_loadu_si128((const __m128i *)(src0 + 8 + j * 16));
- const __m128i s1 = _mm_loadu_si128((const __m128i *)(src1 + j * 16));
- const __m128i s3 =
- _mm_loadu_si128((const __m128i *)(src1 + 8 + j * 16));
-
- const __m128i m_8 = _mm_loadu_si128((const __m128i *)(mask + j * 16));
- const __m128i m01_16 = _mm_unpacklo_epi8(m_8, zero);
- const __m128i m23_16 = _mm_unpackhi_epi8(m_8, zero);
-
- const __m128i comp = highbd_comp_mask_pred_line_sse2(s0, s1, m01_16);
- const __m128i comp1 = highbd_comp_mask_pred_line_sse2(s2, s3, m23_16);
-
- _mm_storeu_si128((__m128i *)(comp_pred + j * 16), comp);
- _mm_storeu_si128((__m128i *)(comp_pred + 8 + j * 16), comp1);
- }
- src0 += stride0;
- src1 += stride1;
- mask += mask_stride;
- comp_pred += width;
- i += 1;
- } while (i < height);
- }
-}
diff --git a/third_party/aom/aom_mem/aom_mem.c b/third_party/aom/aom_mem/aom_mem.c
deleted file mode 100644
index e603fc5bf..000000000
--- a/third_party/aom/aom_mem/aom_mem.c
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "aom_mem.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include "include/aom_mem_intrnl.h"
-#include "aom/aom_integer.h"
-
-#if defined(AOM_MAX_ALLOCABLE_MEMORY)
-// Returns 0 in case of overflow of nmemb * size.
-static int check_size_argument_overflow(uint64_t nmemb, uint64_t size) {
- const uint64_t total_size = nmemb * size;
- if (nmemb == 0) return 1;
- if (size > AOM_MAX_ALLOCABLE_MEMORY / nmemb) return 0;
- if (total_size != (size_t)total_size) return 0;
- return 1;
-}
-#endif
-
-static size_t GetAlignedMallocSize(size_t size, size_t align) {
- return size + align - 1 + ADDRESS_STORAGE_SIZE;
-}
-
-static size_t *GetMallocAddressLocation(void *const mem) {
- return ((size_t *)mem) - 1;
-}
-
-static void SetActualMallocAddress(void *const mem,
- const void *const malloc_addr) {
- size_t *const malloc_addr_location = GetMallocAddressLocation(mem);
- *malloc_addr_location = (size_t)malloc_addr;
-}
-
-static void *GetActualMallocAddress(void *const mem) {
- const size_t *const malloc_addr_location = GetMallocAddressLocation(mem);
- return (void *)(*malloc_addr_location);
-}
-
-void *aom_memalign(size_t align, size_t size) {
- void *x = NULL;
- const size_t aligned_size = GetAlignedMallocSize(size, align);
-#if defined(AOM_MAX_ALLOCABLE_MEMORY)
- if (!check_size_argument_overflow(1, aligned_size)) return NULL;
-#endif
- void *const addr = malloc(aligned_size);
- if (addr) {
- x = align_addr((unsigned char *)addr + ADDRESS_STORAGE_SIZE, align);
- SetActualMallocAddress(x, addr);
- }
- return x;
-}
-
-void *aom_malloc(size_t size) { return aom_memalign(DEFAULT_ALIGNMENT, size); }
-
-void *aom_calloc(size_t num, size_t size) {
- const size_t total_size = num * size;
- void *const x = aom_malloc(total_size);
- if (x) memset(x, 0, total_size);
- return x;
-}
-
-void aom_free(void *memblk) {
- if (memblk) {
- void *addr = GetActualMallocAddress(memblk);
- free(addr);
- }
-}
-
-void *aom_memset16(void *dest, int val, size_t length) {
- size_t i;
- uint16_t *dest16 = (uint16_t *)dest;
- for (i = 0; i < length; i++) *dest16++ = val;
- return dest;
-}
diff --git a/third_party/aom/aom_mem/aom_mem.cmake b/third_party/aom/aom_mem/aom_mem.cmake
deleted file mode 100644
index eaee8440b..000000000
--- a/third_party/aom/aom_mem/aom_mem.cmake
+++ /dev/null
@@ -1,26 +0,0 @@
-#
-# Copyright (c) 2017, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and the
-# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
-# not distributed with this source code in the LICENSE file, you can obtain it
-# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
-# License 1.0 was not distributed with this source code in the PATENTS file, you
-# can obtain it at www.aomedia.org/license/patent.
-#
-if(AOM_AOM_MEM_AOM_MEM_CMAKE_)
- return()
-endif() # AOM_AOM_MEM_AOM_MEM_CMAKE_
-set(AOM_AOM_MEM_AOM_MEM_CMAKE_ 1)
-
-list(APPEND AOM_MEM_SOURCES "${AOM_ROOT}/aom_mem/aom_mem.c"
- "${AOM_ROOT}/aom_mem/aom_mem.h"
- "${AOM_ROOT}/aom_mem/include/aom_mem_intrnl.h")
-
-# Creates the aom_mem build target and makes libaom depend on it. The libaom
-# target must exist before this function is called.
-function(setup_aom_mem_targets)
- add_library(aom_mem OBJECT ${AOM_MEM_SOURCES})
- set(AOM_LIB_TARGETS ${AOM_LIB_TARGETS} aom_mem PARENT_SCOPE)
- target_sources(aom PRIVATE $<TARGET_OBJECTS:aom_mem>)
-endfunction()
diff --git a/third_party/aom/aom_mem/aom_mem.h b/third_party/aom/aom_mem/aom_mem.h
deleted file mode 100644
index 4b1fa45f1..000000000
--- a/third_party/aom/aom_mem/aom_mem.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_MEM_AOM_MEM_H_
-#define AOM_AOM_MEM_AOM_MEM_H_
-
-#include "aom/aom_integer.h"
-#include "config/aom_config.h"
-
-#if defined(__uClinux__)
-#include <lddk.h>
-#endif
-
-#if defined(__cplusplus)
-extern "C" {
-#endif
-
-#ifndef AOM_MAX_ALLOCABLE_MEMORY
-#if SIZE_MAX > (1ULL << 32)
-#define AOM_MAX_ALLOCABLE_MEMORY 8589934592 // 8 GB
-#else
-// For 32-bit targets keep this below INT_MAX to avoid valgrind warnings.
-#define AOM_MAX_ALLOCABLE_MEMORY ((1ULL << 31) - (1 << 16))
-#endif
-#endif
-
-void *aom_memalign(size_t align, size_t size);
-void *aom_malloc(size_t size);
-void *aom_calloc(size_t num, size_t size);
-void aom_free(void *memblk);
-void *aom_memset16(void *dest, int val, size_t length);
-
-#include <string.h>
-
-#ifdef AOM_MEM_PLTFRM
-#include AOM_MEM_PLTFRM
-#endif
-
-#if CONFIG_DEBUG
-#define AOM_CHECK_MEM_ERROR(error_info, lval, expr) \
- do { \
- lval = (expr); \
- if (!lval) \
- aom_internal_error(error_info, AOM_CODEC_MEM_ERROR, \
- "Failed to allocate " #lval " at %s:%d", __FILE__, \
- __LINE__); \
- } while (0)
-#else
-#define AOM_CHECK_MEM_ERROR(error_info, lval, expr) \
- do { \
- lval = (expr); \
- if (!lval) \
- aom_internal_error(error_info, AOM_CODEC_MEM_ERROR, \
- "Failed to allocate " #lval); \
- } while (0)
-#endif
-
-#if defined(__cplusplus)
-}
-#endif
-
-#endif // AOM_AOM_MEM_AOM_MEM_H_
diff --git a/third_party/aom/aom_mem/include/aom_mem_intrnl.h b/third_party/aom/aom_mem/include/aom_mem_intrnl.h
deleted file mode 100644
index cbc30a9bb..000000000
--- a/third_party/aom/aom_mem/include/aom_mem_intrnl.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_MEM_INCLUDE_AOM_MEM_INTRNL_H_
-#define AOM_AOM_MEM_INCLUDE_AOM_MEM_INTRNL_H_
-
-#include "config/aom_config.h"
-
-#define ADDRESS_STORAGE_SIZE sizeof(size_t)
-
-#ifndef DEFAULT_ALIGNMENT
-#if defined(VXWORKS)
-/*default addr alignment to use in calls to aom_* functions other than
- aom_memalign*/
-#define DEFAULT_ALIGNMENT 32
-#else
-#define DEFAULT_ALIGNMENT (2 * sizeof(void *)) /* NOLINT */
-#endif
-#endif
-
-/*returns an addr aligned to the byte boundary specified by align*/
-#define align_addr(addr, align) \
- (void *)(((size_t)(addr) + ((align)-1)) & ~(size_t)((align)-1))
-
-#endif // AOM_AOM_MEM_INCLUDE_AOM_MEM_INTRNL_H_
diff --git a/third_party/aom/aom_ports/aom_once.h b/third_party/aom/aom_ports/aom_once.h
deleted file mode 100644
index 4d77aac5a..000000000
--- a/third_party/aom/aom_ports/aom_once.h
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_PORTS_AOM_ONCE_H_
-#define AOM_AOM_PORTS_AOM_ONCE_H_
-
-#include "config/aom_config.h"
-
-/* Implement a function wrapper to guarantee initialization
- * thread-safety for library singletons.
- *
- * NOTE: This function uses static locks, and can only be
- * used with one common argument per compilation unit. So
- *
- * file1.c:
- * aom_once(foo);
- * ...
- * aom_once(foo);
- *
- * file2.c:
- * aom_once(bar);
- *
- * will ensure foo() and bar() are each called only once, but in
- *
- * file1.c:
- * aom_once(foo);
- * aom_once(bar):
- *
- * bar() will never be called because the lock is used up
- * by the call to foo().
- */
-
-#if CONFIG_MULTITHREAD && defined(_WIN32)
-#include <windows.h>
-#include <stdlib.h>
-/* Declare a per-compilation-unit state variable to track the progress
- * of calling func() only once. This must be at global scope because
- * local initializers are not thread-safe in MSVC prior to Visual
- * Studio 2015.
- *
- * As a static, aom_once_state will be zero-initialized as program start.
- */
-static LONG aom_once_state;
-static void aom_once(void (*func)(void)) {
- /* Try to advance aom_once_state from its initial value of 0 to 1.
- * Only one thread can succeed in doing so.
- */
- if (InterlockedCompareExchange(&aom_once_state, 1, 0) == 0) {
- /* We're the winning thread, having set aom_once_state to 1.
- * Call our function. */
- func();
- /* Now advance aom_once_state to 2, unblocking any other threads. */
- InterlockedIncrement(&aom_once_state);
- return;
- }
-
- /* We weren't the winning thread, but we want to block on
- * the state variable so we don't return before func()
- * has finished executing elsewhere.
- *
- * Try to advance aom_once_state from 2 to 2, which is only possible
- * after the winning thead advances it from 1 to 2.
- */
- while (InterlockedCompareExchange(&aom_once_state, 2, 2) != 2) {
- /* State isn't yet 2. Try again.
- *
- * We are used for singleton initialization functions,
- * which should complete quickly. Contention will likewise
- * be rare, so it's worthwhile to use a simple but cpu-
- * intensive busy-wait instead of successive backoff,
- * waiting on a kernel object, or another heavier-weight scheme.
- *
- * We can at least yield our timeslice.
- */
- Sleep(0);
- }
-
- /* We've seen aom_once_state advance to 2, so we know func()
- * has been called. And we've left aom_once_state as we found it,
- * so other threads will have the same experience.
- *
- * It's safe to return now.
- */
- return;
-}
-
-#elif CONFIG_MULTITHREAD && defined(__OS2__)
-#define INCL_DOS
-#include <os2.h>
-static void aom_once(void (*func)(void)) {
- static int done;
-
- /* If the initialization is complete, return early. */
- if (done) return;
-
- /* Causes all other threads in the process to block themselves
- * and give up their time slice.
- */
- DosEnterCritSec();
-
- if (!done) {
- func();
- done = 1;
- }
-
- /* Restores normal thread dispatching for the current process. */
- DosExitCritSec();
-}
-
-#elif CONFIG_MULTITHREAD && HAVE_PTHREAD_H
-#include <pthread.h>
-static void aom_once(void (*func)(void)) {
- static pthread_once_t lock = PTHREAD_ONCE_INIT;
- pthread_once(&lock, func);
-}
-
-#else
-/* Default version that performs no synchronization. */
-
-static void aom_once(void (*func)(void)) {
- static int done;
-
- if (!done) {
- func();
- done = 1;
- }
-}
-#endif
-
-#endif // AOM_AOM_PORTS_AOM_ONCE_H_
diff --git a/third_party/aom/aom_ports/aom_ports.cmake b/third_party/aom/aom_ports/aom_ports.cmake
deleted file mode 100644
index 6272fc0e3..000000000
--- a/third_party/aom/aom_ports/aom_ports.cmake
+++ /dev/null
@@ -1,81 +0,0 @@
-#
-# Copyright (c) 2017, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and the
-# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
-# not distributed with this source code in the LICENSE file, you can obtain it
-# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
-# License 1.0 was not distributed with this source code in the PATENTS file, you
-# can obtain it at www.aomedia.org/license/patent.
-#
-if(AOM_AOM_PORTS_AOM_PORTS_CMAKE_)
- return()
-endif() # AOM_AOM_PORTS_AOM_PORTS_CMAKE_
-set(AOM_AOM_PORTS_AOM_PORTS_CMAKE_ 1)
-
-list(APPEND AOM_PORTS_INCLUDES
- "${AOM_ROOT}/aom_ports/aom_once.h"
- "${AOM_ROOT}/aom_ports/aom_timer.h"
- "${AOM_ROOT}/aom_ports/bitops.h"
- "${AOM_ROOT}/aom_ports/emmintrin_compat.h"
- "${AOM_ROOT}/aom_ports/mem.h"
- "${AOM_ROOT}/aom_ports/mem_ops.h"
- "${AOM_ROOT}/aom_ports/mem_ops_aligned.h"
- "${AOM_ROOT}/aom_ports/msvc.h"
- "${AOM_ROOT}/aom_ports/sanitizer.h"
- "${AOM_ROOT}/aom_ports/system_state.h")
-
-list(APPEND AOM_PORTS_ASM_X86 "${AOM_ROOT}/aom_ports/emms.asm")
-
-list(APPEND AOM_PORTS_INCLUDES_X86 "${AOM_ROOT}/aom_ports/x86_abi_support.asm")
-
-list(APPEND AOM_PORTS_SOURCES_ARM "${AOM_ROOT}/aom_ports/arm.h"
- "${AOM_ROOT}/aom_ports/arm_cpudetect.c")
-
-list(APPEND AOM_PORTS_SOURCES_PPC "${AOM_ROOT}/aom_ports/ppc.h"
- "${AOM_ROOT}/aom_ports/ppc_cpudetect.c")
-
-# For arm and x86 targets:
-#
-# * Creates the aom_ports build target, adds the includes in aom_ports to the
-# target, and makes libaom depend on it.
-#
-# Otherwise:
-#
-# * Adds the includes in aom_ports to the libaom target.
-#
-# For all target platforms:
-#
-# * The libaom target must exist before this function is called.
-function(setup_aom_ports_targets)
- if("${AOM_TARGET_CPU}" MATCHES "^x86")
- add_asm_library("aom_ports" "AOM_PORTS_ASM_X86" "aom")
- set(aom_ports_has_symbols 1)
- elseif("${AOM_TARGET_CPU}" MATCHES "arm")
- add_library(aom_ports OBJECT ${AOM_PORTS_SOURCES_ARM})
- set(aom_ports_has_symbols 1)
- target_sources(aom PRIVATE $<TARGET_OBJECTS:aom_ports>)
- elseif("${AOM_TARGET_CPU}" MATCHES "ppc")
- add_library(aom_ports OBJECT ${AOM_PORTS_SOURCES_PPC})
- set(aom_ports_has_symbols 1)
- target_sources(aom PRIVATE $<TARGET_OBJECTS:aom_ports>)
- endif()
-
- if(aom_ports_has_symbols)
- target_sources(aom_ports PRIVATE ${AOM_PORTS_INCLUDES})
-
- if("${AOM_TARGET_CPU}" STREQUAL "x86" OR "${AOM_TARGET_CPU}" STREQUAL
- "x86_64")
- target_sources(aom_ports PRIVATE ${AOM_PORTS_INCLUDES_X86})
- endif()
-
- set(AOM_LIB_TARGETS ${AOM_LIB_TARGETS} PARENT_SCOPE)
- else()
- target_sources(aom PRIVATE ${AOM_PORTS_INCLUDES})
-
- if("${AOM_TARGET_CPU}" STREQUAL "x86" OR "${AOM_TARGET_CPU}" STREQUAL
- "x86_64")
- target_sources(aom PRIVATE ${AOM_PORTS_INCLUDES_X86})
- endif()
- endif()
-endfunction()
diff --git a/third_party/aom/aom_ports/aom_timer.h b/third_party/aom/aom_ports/aom_timer.h
deleted file mode 100644
index 9b17b8983..000000000
--- a/third_party/aom/aom_ports/aom_timer.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_PORTS_AOM_TIMER_H_
-#define AOM_AOM_PORTS_AOM_TIMER_H_
-
-#include "config/aom_config.h"
-
-#include "aom/aom_integer.h"
-
-#if CONFIG_OS_SUPPORT
-
-#if defined(_WIN32)
-/*
- * Win32 specific includes
- */
-#ifndef WIN32_LEAN_AND_MEAN
-#define WIN32_LEAN_AND_MEAN
-#endif
-#include <windows.h>
-#else
-/*
- * POSIX specific includes
- */
-#include <sys/time.h>
-
-/* timersub is not provided by msys at this time. */
-#ifndef timersub
-#define timersub(a, b, result) \
- do { \
- (result)->tv_sec = (a)->tv_sec - (b)->tv_sec; \
- (result)->tv_usec = (a)->tv_usec - (b)->tv_usec; \
- if ((result)->tv_usec < 0) { \
- --(result)->tv_sec; \
- (result)->tv_usec += 1000000; \
- } \
- } while (0)
-#endif
-#endif
-
-struct aom_usec_timer {
-#if defined(_WIN32)
- LARGE_INTEGER begin, end;
-#else
- struct timeval begin, end;
-#endif
-};
-
-static INLINE void aom_usec_timer_start(struct aom_usec_timer *t) {
-#if defined(_WIN32)
- QueryPerformanceCounter(&t->begin);
-#else
- gettimeofday(&t->begin, NULL);
-#endif
-}
-
-static INLINE void aom_usec_timer_mark(struct aom_usec_timer *t) {
-#if defined(_WIN32)
- QueryPerformanceCounter(&t->end);
-#else
- gettimeofday(&t->end, NULL);
-#endif
-}
-
-static INLINE int64_t aom_usec_timer_elapsed(struct aom_usec_timer *t) {
-#if defined(_WIN32)
- LARGE_INTEGER freq, diff;
-
- diff.QuadPart = t->end.QuadPart - t->begin.QuadPart;
-
- QueryPerformanceFrequency(&freq);
- return diff.QuadPart * 1000000 / freq.QuadPart;
-#else
- struct timeval diff;
-
- timersub(&t->end, &t->begin, &diff);
- return ((int64_t)diff.tv_sec) * 1000000 + diff.tv_usec;
-#endif
-}
-
-#else /* CONFIG_OS_SUPPORT = 0*/
-
-/* Empty timer functions if CONFIG_OS_SUPPORT = 0 */
-#ifndef timersub
-#define timersub(a, b, result)
-#endif
-
-struct aom_usec_timer {
- void *dummy;
-};
-
-static INLINE void aom_usec_timer_start(struct aom_usec_timer *t) { (void)t; }
-
-static INLINE void aom_usec_timer_mark(struct aom_usec_timer *t) { (void)t; }
-
-static INLINE int aom_usec_timer_elapsed(struct aom_usec_timer *t) {
- (void)t;
- return 0;
-}
-
-#endif /* CONFIG_OS_SUPPORT */
-
-#endif // AOM_AOM_PORTS_AOM_TIMER_H_
diff --git a/third_party/aom/aom_ports/arm.h b/third_party/aom/aom_ports/arm.h
deleted file mode 100644
index cb1fb9bec..000000000
--- a/third_party/aom/aom_ports/arm.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_PORTS_ARM_H_
-#define AOM_AOM_PORTS_ARM_H_
-#include <stdlib.h>
-
-#include "config/aom_config.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*ARMv5TE "Enhanced DSP" instructions.*/
-#define HAS_EDSP 0x01
-/*ARMv6 "Parallel" or "Media" instructions.*/
-#define HAS_MEDIA 0x02
-/*ARMv7 optional NEON instructions.*/
-#define HAS_NEON 0x04
-
-int aom_arm_cpu_caps(void);
-
-// Earlier gcc compilers have issues with some neon intrinsics
-#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ == 4 && \
- __GNUC_MINOR__ <= 6
-#define AOM_INCOMPATIBLE_GCC
-#endif
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AOM_PORTS_ARM_H_
diff --git a/third_party/aom/aom_ports/arm_cpudetect.c b/third_party/aom/aom_ports/arm_cpudetect.c
deleted file mode 100644
index 5a75bb348..000000000
--- a/third_party/aom/aom_ports/arm_cpudetect.c
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <stdlib.h>
-#include <string.h>
-#include "aom_ports/arm.h"
-#include "config/aom_config.h"
-
-#ifdef WINAPI_FAMILY
-#include <winapifamily.h>
-#if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
-#define getenv(x) NULL
-#endif
-#endif
-
-static int arm_cpu_env_flags(int *flags) {
- char *env;
- env = getenv("AOM_SIMD_CAPS");
- if (env && *env) {
- *flags = (int)strtol(env, NULL, 0);
- return 0;
- }
- *flags = 0;
- return -1;
-}
-
-static int arm_cpu_env_mask(void) {
- char *env;
- env = getenv("AOM_SIMD_CAPS_MASK");
- return env && *env ? (int)strtol(env, NULL, 0) : ~0;
-}
-
-#if !CONFIG_RUNTIME_CPU_DETECT
-
-int aom_arm_cpu_caps(void) {
- /* This function should actually be a no-op. There is no way to adjust any of
- * these because the RTCD tables do not exist: the functions are called
- * statically */
- int flags;
- int mask;
- if (!arm_cpu_env_flags(&flags)) {
- return flags;
- }
- mask = arm_cpu_env_mask();
-#if HAVE_NEON
- flags |= HAS_NEON;
-#endif /* HAVE_NEON */
- return flags & mask;
-}
-
-#elif defined(_MSC_VER) /* end !CONFIG_RUNTIME_CPU_DETECT */
-/*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/
-#define WIN32_LEAN_AND_MEAN
-#define WIN32_EXTRA_LEAN
-#include <windows.h>
-
-int aom_arm_cpu_caps(void) {
- int flags;
- int mask;
- if (!arm_cpu_env_flags(&flags)) {
- return flags;
- }
- mask = arm_cpu_env_mask();
-/* MSVC has no inline __asm support for ARM, but it does let you __emit
- * instructions via their assembled hex code.
- * All of these instructions should be essentially nops.
- */
-#if HAVE_NEON
- if (mask & HAS_NEON) {
- __try {
- /*VORR q0,q0,q0*/
- __emit(0xF2200150);
- flags |= HAS_NEON;
- } __except (GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION) {
- /*Ignore exception.*/
- }
- }
-#endif /* HAVE_NEON */
- return flags & mask;
-}
-
-#elif defined(__ANDROID__) /* end _MSC_VER */
-#include <cpu-features.h>
-
-int aom_arm_cpu_caps(void) {
- int flags;
- int mask;
- uint64_t features;
- if (!arm_cpu_env_flags(&flags)) {
- return flags;
- }
- mask = arm_cpu_env_mask();
- features = android_getCpuFeatures();
-
-#if HAVE_NEON
- if (features & ANDROID_CPU_ARM_FEATURE_NEON) flags |= HAS_NEON;
-#endif /* HAVE_NEON */
- return flags & mask;
-}
-
-#elif defined(__linux__) /* end __ANDROID__ */
-
-#include <stdio.h>
-
-int aom_arm_cpu_caps(void) {
- FILE *fin;
- int flags;
- int mask;
- if (!arm_cpu_env_flags(&flags)) {
- return flags;
- }
- mask = arm_cpu_env_mask();
- /* Reading /proc/self/auxv would be easier, but that doesn't work reliably
- * on Android.
- * This also means that detection will fail in Scratchbox.
- */
- fin = fopen("/proc/cpuinfo", "r");
- if (fin != NULL) {
- /* 512 should be enough for anybody (it's even enough for all the flags
- * that x86 has accumulated... so far).
- */
- char buf[512];
- while (fgets(buf, 511, fin) != NULL) {
-#if HAVE_NEON
- if (memcmp(buf, "Features", 8) == 0) {
- char *p;
- p = strstr(buf, " neon");
- if (p != NULL && (p[5] == ' ' || p[5] == '\n')) {
- flags |= HAS_NEON;
- }
- }
-#endif /* HAVE_NEON */
- }
- fclose(fin);
- }
- return flags & mask;
-}
-#else /* end __linux__ */
-#error \
- "--enable-runtime-cpu-detect selected, but no CPU detection method " \
-"available for your platform. Reconfigure with --disable-runtime-cpu-detect."
-#endif
diff --git a/third_party/aom/aom_ports/bitops.h b/third_party/aom/aom_ports/bitops.h
deleted file mode 100644
index 44df17307..000000000
--- a/third_party/aom/aom_ports/bitops.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_PORTS_BITOPS_H_
-#define AOM_AOM_PORTS_BITOPS_H_
-
-#include <assert.h>
-
-#include "aom_ports/msvc.h"
-#include "config/aom_config.h"
-
-#ifdef _MSC_VER
-#if defined(_M_X64) || defined(_M_IX86)
-#include <intrin.h>
-#define USE_MSC_INTRINSICS
-#endif
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// get_msb:
-// Returns (int)floor(log2(n)). n must be > 0.
-// These versions of get_msb() are only valid when n != 0 because all
-// of the optimized versions are undefined when n == 0:
-// https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html
-
-// use GNU builtins where available.
-#if defined(__GNUC__) && \
- ((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4)
-static INLINE int get_msb(unsigned int n) {
- assert(n != 0);
- return 31 ^ __builtin_clz(n);
-}
-#elif defined(USE_MSC_INTRINSICS)
-#pragma intrinsic(_BitScanReverse)
-
-static INLINE int get_msb(unsigned int n) {
- unsigned long first_set_bit;
- assert(n != 0);
- _BitScanReverse(&first_set_bit, n);
- return first_set_bit;
-}
-#undef USE_MSC_INTRINSICS
-#else
-static INLINE int get_msb(unsigned int n) {
- int log = 0;
- unsigned int value = n;
- int i;
-
- assert(n != 0);
-
- for (i = 4; i >= 0; --i) {
- const int shift = (1 << i);
- const unsigned int x = value >> shift;
- if (x != 0) {
- value = x;
- log += shift;
- }
- }
- return log;
-}
-#endif
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AOM_PORTS_BITOPS_H_
diff --git a/third_party/aom/aom_ports/emmintrin_compat.h b/third_party/aom/aom_ports/emmintrin_compat.h
deleted file mode 100644
index 85d218a3d..000000000
--- a/third_party/aom/aom_ports/emmintrin_compat.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_PORTS_EMMINTRIN_COMPAT_H_
-#define AOM_AOM_PORTS_EMMINTRIN_COMPAT_H_
-
-#if defined(__GNUC__) && __GNUC__ < 4
-/* From emmintrin.h (gcc 4.5.3) */
-/* Casts between various SP, DP, INT vector types. Note that these do no
- conversion of values, they just change the type. */
-extern __inline __m128
- __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_castpd_ps(__m128d __A) {
- return (__m128)__A;
-}
-
-extern __inline __m128i
- __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_castpd_si128(__m128d __A) {
- return (__m128i)__A;
-}
-
-extern __inline __m128d
- __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_castps_pd(__m128 __A) {
- return (__m128d)__A;
-}
-
-extern __inline __m128i
- __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_castps_si128(__m128 __A) {
- return (__m128i)__A;
-}
-
-extern __inline __m128
- __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_castsi128_ps(__m128i __A) {
- return (__m128)__A;
-}
-
-extern __inline __m128d
- __attribute__((__gnu_inline__, __always_inline__, __artificial__))
- _mm_castsi128_pd(__m128i __A) {
- return (__m128d)__A;
-}
-#endif
-
-#endif // AOM_AOM_PORTS_EMMINTRIN_COMPAT_H_
diff --git a/third_party/aom/aom_ports/emms.asm b/third_party/aom/aom_ports/emms.asm
deleted file mode 100644
index 90776bacb..000000000
--- a/third_party/aom/aom_ports/emms.asm
+++ /dev/null
@@ -1,41 +0,0 @@
-;
-; Copyright (c) 2016, Alliance for Open Media. All rights reserved
-;
-; This source code is subject to the terms of the BSD 2 Clause License and
-; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-; was not distributed with this source code in the LICENSE file, you can
-; obtain it at www.aomedia.org/license/software. If the Alliance for Open
-; Media Patent License 1.0 was not distributed with this source code in the
-; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-;
-
-;
-
-
-%include "aom_ports/x86_abi_support.asm"
-
-section .text
-global sym(aom_reset_mmx_state) PRIVATE
-sym(aom_reset_mmx_state):
- emms
- ret
-
-
-%if LIBAOM_YASM_WIN64
-global sym(aom_winx64_fldcw) PRIVATE
-sym(aom_winx64_fldcw):
- sub rsp, 8
- mov [rsp], rcx ; win x64 specific
- fldcw [rsp]
- add rsp, 8
- ret
-
-
-global sym(aom_winx64_fstcw) PRIVATE
-sym(aom_winx64_fstcw):
- sub rsp, 8
- fstcw [rsp]
- mov rax, [rsp]
- add rsp, 8
- ret
-%endif
diff --git a/third_party/aom/aom_ports/mem.h b/third_party/aom/aom_ports/mem.h
deleted file mode 100644
index 3ffea3cd6..000000000
--- a/third_party/aom/aom_ports/mem.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_PORTS_MEM_H_
-#define AOM_AOM_PORTS_MEM_H_
-
-#include "aom/aom_integer.h"
-#include "config/aom_config.h"
-
-#if (defined(__GNUC__) && __GNUC__) || defined(__SUNPRO_C)
-#define DECLARE_ALIGNED(n, typ, val) typ val __attribute__((aligned(n)))
-#elif defined(_MSC_VER)
-#define DECLARE_ALIGNED(n, typ, val) __declspec(align(n)) typ val
-#else
-#warning No alignment directives known for this compiler.
-#define DECLARE_ALIGNED(n, typ, val) typ val
-#endif
-
-/* Indicates that the usage of the specified variable has been audited to assure
- * that it's safe to use uninitialized. Silences 'may be used uninitialized'
- * warnings on gcc.
- */
-#if defined(__GNUC__) && __GNUC__
-#define UNINITIALIZED_IS_SAFE(x) x = x
-#else
-#define UNINITIALIZED_IS_SAFE(x) x
-#endif
-
-#if HAVE_NEON && defined(_MSC_VER)
-#define __builtin_prefetch(x)
-#endif
-
-/* Shift down with rounding for use when n >= 0, value >= 0 */
-#define ROUND_POWER_OF_TWO(value, n) (((value) + (((1 << (n)) >> 1))) >> (n))
-
-/* Shift down with rounding for signed integers, for use when n >= 0 */
-#define ROUND_POWER_OF_TWO_SIGNED(value, n) \
- (((value) < 0) ? -ROUND_POWER_OF_TWO(-(value), (n)) \
- : ROUND_POWER_OF_TWO((value), (n)))
-
-/* Shift down with rounding for use when n >= 0, value >= 0 for (64 bit) */
-#define ROUND_POWER_OF_TWO_64(value, n) \
- (((value) + ((((int64_t)1 << (n)) >> 1))) >> (n))
-/* Shift down with rounding for signed integers, for use when n >= 0 (64 bit) */
-#define ROUND_POWER_OF_TWO_SIGNED_64(value, n) \
- (((value) < 0) ? -ROUND_POWER_OF_TWO_64(-(value), (n)) \
- : ROUND_POWER_OF_TWO_64((value), (n)))
-
-/* shift right or left depending on sign of n */
-#define RIGHT_SIGNED_SHIFT(value, n) \
- ((n) < 0 ? ((value) << (-(n))) : ((value) >> (n)))
-
-#define ALIGN_POWER_OF_TWO(value, n) \
- (((value) + ((1 << (n)) - 1)) & ~((1 << (n)) - 1))
-
-#define DIVIDE_AND_ROUND(x, y) (((x) + ((y) >> 1)) / (y))
-
-#define CONVERT_TO_SHORTPTR(x) ((uint16_t *)(((uintptr_t)(x)) << 1))
-#define CONVERT_TO_BYTEPTR(x) ((uint8_t *)(((uintptr_t)(x)) >> 1))
-
-#endif // AOM_AOM_PORTS_MEM_H_
diff --git a/third_party/aom/aom_ports/mem_ops.h b/third_party/aom/aom_ports/mem_ops.h
deleted file mode 100644
index 2b5bc0f0f..000000000
--- a/third_party/aom/aom_ports/mem_ops.h
+++ /dev/null
@@ -1,228 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_PORTS_MEM_OPS_H_
-#define AOM_AOM_PORTS_MEM_OPS_H_
-
-/* \file
- * \brief Provides portable memory access primitives
- *
- * This function provides portable primitives for getting and setting of
- * signed and unsigned integers in 16, 24, and 32 bit sizes. The operations
- * can be performed on unaligned data regardless of hardware support for
- * unaligned accesses.
- *
- * The type used to pass the integral values may be changed by defining
- * MEM_VALUE_T with the appropriate type. The type given must be an integral
- * numeric type.
- *
- * The actual functions instantiated have the MEM_VALUE_T type name pasted
- * on to the symbol name. This allows the developer to instantiate these
- * operations for multiple types within the same translation unit. This is
- * of somewhat questionable utility, but the capability exists nonetheless.
- * Users not making use of this functionality should call the functions
- * without the type name appended, and the preprocessor will take care of
- * it.
- *
- * NOTE: This code is not supported on platforms where char > 1 octet ATM.
- */
-
-#ifndef MAU_T
-/* Minimum Access Unit for this target */
-#define MAU_T unsigned char
-#endif
-
-#ifndef MEM_VALUE_T
-#define MEM_VALUE_T int
-#endif
-
-#undef MEM_VALUE_T_SZ_BITS
-#define MEM_VALUE_T_SZ_BITS (sizeof(MEM_VALUE_T) << 3)
-
-#undef mem_ops_wrap_symbol
-#define mem_ops_wrap_symbol(fn) mem_ops_wrap_symbol2(fn, MEM_VALUE_T)
-#undef mem_ops_wrap_symbol2
-#define mem_ops_wrap_symbol2(fn, typ) mem_ops_wrap_symbol3(fn, typ)
-#undef mem_ops_wrap_symbol3
-#define mem_ops_wrap_symbol3(fn, typ) fn##_as_##typ
-
-/*
- * Include aligned access routines
- */
-#define INCLUDED_BY_MEM_OPS_H
-#include "mem_ops_aligned.h"
-#undef INCLUDED_BY_MEM_OPS_H
-
-#undef mem_get_be16
-#define mem_get_be16 mem_ops_wrap_symbol(mem_get_be16)
-static unsigned MEM_VALUE_T mem_get_be16(const void *vmem) {
- unsigned MEM_VALUE_T val;
- const MAU_T *mem = (const MAU_T *)vmem;
-
- val = mem[0] << 8;
- val |= mem[1];
- return val;
-}
-
-#undef mem_get_be24
-#define mem_get_be24 mem_ops_wrap_symbol(mem_get_be24)
-static unsigned MEM_VALUE_T mem_get_be24(const void *vmem) {
- unsigned MEM_VALUE_T val;
- const MAU_T *mem = (const MAU_T *)vmem;
-
- val = mem[0] << 16;
- val |= mem[1] << 8;
- val |= mem[2];
- return val;
-}
-
-#undef mem_get_be32
-#define mem_get_be32 mem_ops_wrap_symbol(mem_get_be32)
-static unsigned MEM_VALUE_T mem_get_be32(const void *vmem) {
- unsigned MEM_VALUE_T val;
- const MAU_T *mem = (const MAU_T *)vmem;
-
- val = ((unsigned MEM_VALUE_T)mem[0]) << 24;
- val |= mem[1] << 16;
- val |= mem[2] << 8;
- val |= mem[3];
- return val;
-}
-
-#undef mem_get_le16
-#define mem_get_le16 mem_ops_wrap_symbol(mem_get_le16)
-static unsigned MEM_VALUE_T mem_get_le16(const void *vmem) {
- unsigned MEM_VALUE_T val;
- const MAU_T *mem = (const MAU_T *)vmem;
-
- val = mem[1] << 8;
- val |= mem[0];
- return val;
-}
-
-#undef mem_get_le24
-#define mem_get_le24 mem_ops_wrap_symbol(mem_get_le24)
-static unsigned MEM_VALUE_T mem_get_le24(const void *vmem) {
- unsigned MEM_VALUE_T val;
- const MAU_T *mem = (const MAU_T *)vmem;
-
- val = mem[2] << 16;
- val |= mem[1] << 8;
- val |= mem[0];
- return val;
-}
-
-#undef mem_get_le32
-#define mem_get_le32 mem_ops_wrap_symbol(mem_get_le32)
-static unsigned MEM_VALUE_T mem_get_le32(const void *vmem) {
- unsigned MEM_VALUE_T val;
- const MAU_T *mem = (const MAU_T *)vmem;
-
- val = ((unsigned MEM_VALUE_T)mem[3]) << 24;
- val |= mem[2] << 16;
- val |= mem[1] << 8;
- val |= mem[0];
- return val;
-}
-
-#define mem_get_s_generic(end, sz) \
- static AOM_INLINE signed MEM_VALUE_T mem_get_s##end##sz(const void *vmem) { \
- const MAU_T *mem = (const MAU_T *)vmem; \
- signed MEM_VALUE_T val = mem_get_##end##sz(mem); \
- return (val << (MEM_VALUE_T_SZ_BITS - sz)) >> (MEM_VALUE_T_SZ_BITS - sz); \
- }
-
-/* clang-format off */
-#undef mem_get_sbe16
-#define mem_get_sbe16 mem_ops_wrap_symbol(mem_get_sbe16)
-mem_get_s_generic(be, 16)
-
-#undef mem_get_sbe24
-#define mem_get_sbe24 mem_ops_wrap_symbol(mem_get_sbe24)
-mem_get_s_generic(be, 24)
-
-#undef mem_get_sbe32
-#define mem_get_sbe32 mem_ops_wrap_symbol(mem_get_sbe32)
-mem_get_s_generic(be, 32)
-
-#undef mem_get_sle16
-#define mem_get_sle16 mem_ops_wrap_symbol(mem_get_sle16)
-mem_get_s_generic(le, 16)
-
-#undef mem_get_sle24
-#define mem_get_sle24 mem_ops_wrap_symbol(mem_get_sle24)
-mem_get_s_generic(le, 24)
-
-#undef mem_get_sle32
-#define mem_get_sle32 mem_ops_wrap_symbol(mem_get_sle32)
-mem_get_s_generic(le, 32)
-
-#undef mem_put_be16
-#define mem_put_be16 mem_ops_wrap_symbol(mem_put_be16)
-static AOM_INLINE void mem_put_be16(void *vmem, MEM_VALUE_T val) {
- MAU_T *mem = (MAU_T *)vmem;
-
- mem[0] = (MAU_T)((val >> 8) & 0xff);
- mem[1] = (MAU_T)((val >> 0) & 0xff);
-}
-
-#undef mem_put_be24
-#define mem_put_be24 mem_ops_wrap_symbol(mem_put_be24)
-static AOM_INLINE void mem_put_be24(void *vmem, MEM_VALUE_T val) {
- MAU_T *mem = (MAU_T *)vmem;
-
- mem[0] = (MAU_T)((val >> 16) & 0xff);
- mem[1] = (MAU_T)((val >> 8) & 0xff);
- mem[2] = (MAU_T)((val >> 0) & 0xff);
-}
-
-#undef mem_put_be32
-#define mem_put_be32 mem_ops_wrap_symbol(mem_put_be32)
-static AOM_INLINE void mem_put_be32(void *vmem, MEM_VALUE_T val) {
- MAU_T *mem = (MAU_T *)vmem;
-
- mem[0] = (MAU_T)((val >> 24) & 0xff);
- mem[1] = (MAU_T)((val >> 16) & 0xff);
- mem[2] = (MAU_T)((val >> 8) & 0xff);
- mem[3] = (MAU_T)((val >> 0) & 0xff);
-}
-
-#undef mem_put_le16
-#define mem_put_le16 mem_ops_wrap_symbol(mem_put_le16)
-static AOM_INLINE void mem_put_le16(void *vmem, MEM_VALUE_T val) {
- MAU_T *mem = (MAU_T *)vmem;
-
- mem[0] = (MAU_T)((val >> 0) & 0xff);
- mem[1] = (MAU_T)((val >> 8) & 0xff);
-}
-
-#undef mem_put_le24
-#define mem_put_le24 mem_ops_wrap_symbol(mem_put_le24)
-static AOM_INLINE void mem_put_le24(void *vmem, MEM_VALUE_T val) {
- MAU_T *mem = (MAU_T *)vmem;
-
- mem[0] = (MAU_T)((val >> 0) & 0xff);
- mem[1] = (MAU_T)((val >> 8) & 0xff);
- mem[2] = (MAU_T)((val >> 16) & 0xff);
-}
-
-#undef mem_put_le32
-#define mem_put_le32 mem_ops_wrap_symbol(mem_put_le32)
-static AOM_INLINE void mem_put_le32(void *vmem, MEM_VALUE_T val) {
- MAU_T *mem = (MAU_T *)vmem;
-
- mem[0] = (MAU_T)((val >> 0) & 0xff);
- mem[1] = (MAU_T)((val >> 8) & 0xff);
- mem[2] = (MAU_T)((val >> 16) & 0xff);
- mem[3] = (MAU_T)((val >> 24) & 0xff);
-}
-/* clang-format on */
-#endif // AOM_AOM_PORTS_MEM_OPS_H_
diff --git a/third_party/aom/aom_ports/mem_ops_aligned.h b/third_party/aom/aom_ports/mem_ops_aligned.h
deleted file mode 100644
index 37c367531..000000000
--- a/third_party/aom/aom_ports/mem_ops_aligned.h
+++ /dev/null
@@ -1,173 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_PORTS_MEM_OPS_ALIGNED_H_
-#define AOM_AOM_PORTS_MEM_OPS_ALIGNED_H_
-
-#include "aom/aom_integer.h"
-
-/* \file
- * \brief Provides portable memory access primitives for operating on aligned
- * data
- *
- * This file is split from mem_ops.h for easier maintenance. See mem_ops.h
- * for a more detailed description of these primitives.
- */
-#ifndef INCLUDED_BY_MEM_OPS_H
-#error Include mem_ops.h, not mem_ops_aligned.h directly.
-#endif
-
-/* Architectures that provide instructions for doing this byte swapping
- * could redefine these macros.
- */
-#define swap_endian_16(val, raw) \
- do { \
- val = (uint16_t)(((raw >> 8) & 0x00ff) | ((raw << 8) & 0xff00)); \
- } while (0)
-#define swap_endian_32(val, raw) \
- do { \
- val = ((raw >> 24) & 0x000000ff) | ((raw >> 8) & 0x0000ff00) | \
- ((raw << 8) & 0x00ff0000) | ((raw << 24) & 0xff000000); \
- } while (0)
-#define swap_endian_16_se(val, raw) \
- do { \
- swap_endian_16(val, raw); \
- val = ((val << 16) >> 16); \
- } while (0)
-#define swap_endian_32_se(val, raw) swap_endian_32(val, raw)
-
-#define mem_get_ne_aligned_generic(end, sz) \
- static AOM_INLINE unsigned MEM_VALUE_T mem_get_##end##sz##_aligned( \
- const void *vmem) { \
- const uint##sz##_t *mem = (const uint##sz##_t *)vmem; \
- return *mem; \
- }
-
-#define mem_get_sne_aligned_generic(end, sz) \
- static AOM_INLINE signed MEM_VALUE_T mem_get_s##end##sz##_aligned( \
- const void *vmem) { \
- const int##sz##_t *mem = (const int##sz##_t *)vmem; \
- return *mem; \
- }
-
-#define mem_get_se_aligned_generic(end, sz) \
- static AOM_INLINE unsigned MEM_VALUE_T mem_get_##end##sz##_aligned( \
- const void *vmem) { \
- const uint##sz##_t *mem = (const uint##sz##_t *)vmem; \
- unsigned MEM_VALUE_T val, raw = *mem; \
- swap_endian_##sz(val, raw); \
- return val; \
- }
-
-#define mem_get_sse_aligned_generic(end, sz) \
- static AOM_INLINE signed MEM_VALUE_T mem_get_s##end##sz##_aligned( \
- const void *vmem) { \
- const int##sz##_t *mem = (const int##sz##_t *)vmem; \
- unsigned MEM_VALUE_T val, raw = *mem; \
- swap_endian_##sz##_se(val, raw); \
- return val; \
- }
-
-#define mem_put_ne_aligned_generic(end, sz) \
- static AOM_INLINE void mem_put_##end##sz##_aligned(void *vmem, \
- MEM_VALUE_T val) { \
- uint##sz##_t *mem = (uint##sz##_t *)vmem; \
- *mem = (uint##sz##_t)val; \
- }
-
-#define mem_put_se_aligned_generic(end, sz) \
- static AOM_INLINE void mem_put_##end##sz##_aligned(void *vmem, \
- MEM_VALUE_T val) { \
- uint##sz##_t *mem = (uint##sz##_t *)vmem, raw; \
- swap_endian_##sz(raw, val); \
- *mem = (uint##sz##_t)raw; \
- }
-
-#include "config/aom_config.h"
-
-#if CONFIG_BIG_ENDIAN
-#define mem_get_be_aligned_generic(sz) mem_get_ne_aligned_generic(be, sz)
-#define mem_get_sbe_aligned_generic(sz) mem_get_sne_aligned_generic(be, sz)
-#define mem_get_le_aligned_generic(sz) mem_get_se_aligned_generic(le, sz)
-#define mem_get_sle_aligned_generic(sz) mem_get_sse_aligned_generic(le, sz)
-#define mem_put_be_aligned_generic(sz) mem_put_ne_aligned_generic(be, sz)
-#define mem_put_le_aligned_generic(sz) mem_put_se_aligned_generic(le, sz)
-#else
-#define mem_get_be_aligned_generic(sz) mem_get_se_aligned_generic(be, sz)
-#define mem_get_sbe_aligned_generic(sz) mem_get_sse_aligned_generic(be, sz)
-#define mem_get_le_aligned_generic(sz) mem_get_ne_aligned_generic(le, sz)
-#define mem_get_sle_aligned_generic(sz) mem_get_sne_aligned_generic(le, sz)
-#define mem_put_be_aligned_generic(sz) mem_put_se_aligned_generic(be, sz)
-#define mem_put_le_aligned_generic(sz) mem_put_ne_aligned_generic(le, sz)
-#endif
-
-/* clang-format off */
-#undef mem_get_be16_aligned
-#define mem_get_be16_aligned mem_ops_wrap_symbol(mem_get_be16_aligned)
-mem_get_be_aligned_generic(16)
-
-#undef mem_get_be32_aligned
-#define mem_get_be32_aligned mem_ops_wrap_symbol(mem_get_be32_aligned)
-mem_get_be_aligned_generic(32)
-
-#undef mem_get_le16_aligned
-#define mem_get_le16_aligned mem_ops_wrap_symbol(mem_get_le16_aligned)
-mem_get_le_aligned_generic(16)
-
-#undef mem_get_le32_aligned
-#define mem_get_le32_aligned mem_ops_wrap_symbol(mem_get_le32_aligned)
-mem_get_le_aligned_generic(32)
-
-#undef mem_get_sbe16_aligned
-#define mem_get_sbe16_aligned mem_ops_wrap_symbol(mem_get_sbe16_aligned)
-mem_get_sbe_aligned_generic(16)
-
-#undef mem_get_sbe32_aligned
-#define mem_get_sbe32_aligned mem_ops_wrap_symbol(mem_get_sbe32_aligned)
-mem_get_sbe_aligned_generic(32)
-
-#undef mem_get_sle16_aligned
-#define mem_get_sle16_aligned mem_ops_wrap_symbol(mem_get_sle16_aligned)
-mem_get_sle_aligned_generic(16)
-
-#undef mem_get_sle32_aligned
-#define mem_get_sle32_aligned mem_ops_wrap_symbol(mem_get_sle32_aligned)
-mem_get_sle_aligned_generic(32)
-
-#undef mem_put_be16_aligned
-#define mem_put_be16_aligned mem_ops_wrap_symbol(mem_put_be16_aligned)
-mem_put_be_aligned_generic(16)
-
-#undef mem_put_be32_aligned
-#define mem_put_be32_aligned mem_ops_wrap_symbol(mem_put_be32_aligned)
-mem_put_be_aligned_generic(32)
-
-#undef mem_put_le16_aligned
-#define mem_put_le16_aligned mem_ops_wrap_symbol(mem_put_le16_aligned)
-mem_put_le_aligned_generic(16)
-
-#undef mem_put_le32_aligned
-#define mem_put_le32_aligned mem_ops_wrap_symbol(mem_put_le32_aligned)
-mem_put_le_aligned_generic(32)
-
-#undef mem_get_ne_aligned_generic
-#undef mem_get_se_aligned_generic
-#undef mem_get_sne_aligned_generic
-#undef mem_get_sse_aligned_generic
-#undef mem_put_ne_aligned_generic
-#undef mem_put_se_aligned_generic
-#undef swap_endian_16
-#undef swap_endian_32
-#undef swap_endian_16_se
-#undef swap_endian_32_se
-/* clang-format on */
-
-#endif // AOM_AOM_PORTS_MEM_OPS_ALIGNED_H_
diff --git a/third_party/aom/aom_ports/msvc.h b/third_party/aom/aom_ports/msvc.h
deleted file mode 100644
index e78e605f2..000000000
--- a/third_party/aom/aom_ports/msvc.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_PORTS_MSVC_H_
-#define AOM_AOM_PORTS_MSVC_H_
-#ifdef _MSC_VER
-
-#include "config/aom_config.h"
-
-#if _MSC_VER < 1900 // VS2015 provides snprintf
-#define snprintf _snprintf
-#endif // _MSC_VER < 1900
-
-#if _MSC_VER < 1800 // VS2013 provides round
-#include <math.h>
-static INLINE double round(double x) {
- if (x < 0)
- return ceil(x - 0.5);
- else
- return floor(x + 0.5);
-}
-
-static INLINE float roundf(float x) {
- if (x < 0)
- return (float)ceil(x - 0.5f);
- else
- return (float)floor(x + 0.5f);
-}
-
-static INLINE long lroundf(float x) {
- if (x < 0)
- return (long)(x - 0.5f);
- else
- return (long)(x + 0.5f);
-}
-#endif // _MSC_VER < 1800
-
-#if HAVE_AVX
-#include <immintrin.h>
-// Note:
-// _mm256_insert_epi16 intrinsics is available from vs2017.
-// We define this macro for vs2015 and earlier. The
-// intrinsics used here are in vs2015 document:
-// https://msdn.microsoft.com/en-us/library/hh977022.aspx
-// Input parameters:
-// a: __m256i,
-// d: int16_t,
-// indx: imm8 (0 - 15)
-#if _MSC_VER <= 1900
-#define _mm256_insert_epi16(a, d, indx) \
- _mm256_insertf128_si256( \
- a, \
- _mm_insert_epi16(_mm256_extractf128_si256(a, indx >> 3), d, indx % 8), \
- indx >> 3)
-
-static INLINE int _mm256_extract_epi32(__m256i a, const int i) {
- return a.m256i_i32[i & 7];
-}
-static INLINE __m256i _mm256_insert_epi32(__m256i a, int b, const int i) {
- __m256i c = a;
- c.m256i_i32[i & 7] = b;
- return c;
-}
-#endif // _MSC_VER <= 1900
-#endif // HAVE_AVX
-#endif // _MSC_VER
-#endif // AOM_AOM_PORTS_MSVC_H_
diff --git a/third_party/aom/aom_ports/ppc.h b/third_party/aom/aom_ports/ppc.h
deleted file mode 100644
index 3159bda68..000000000
--- a/third_party/aom/aom_ports/ppc.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_PORTS_PPC_H_
-#define AOM_AOM_PORTS_PPC_H_
-#include <stdlib.h>
-
-#include "config/aom_config.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define HAS_VSX 0x01
-
-int ppc_simd_caps(void);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AOM_PORTS_PPC_H_
diff --git a/third_party/aom/aom_ports/ppc_cpudetect.c b/third_party/aom/aom_ports/ppc_cpudetect.c
deleted file mode 100644
index 82b4f58cc..000000000
--- a/third_party/aom/aom_ports/ppc_cpudetect.c
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <fcntl.h>
-#include <unistd.h>
-#include <stdint.h>
-#include <asm/cputable.h>
-#include <linux/auxvec.h>
-
-#include "config/aom_config.h"
-
-#include "aom_ports/ppc.h"
-
-#if CONFIG_RUNTIME_CPU_DETECT
-static int cpu_env_flags(int *flags) {
- char *env;
- env = getenv("AOM_SIMD_CAPS");
- if (env && *env) {
- *flags = (int)strtol(env, NULL, 0);
- return 0;
- }
- *flags = 0;
- return -1;
-}
-
-static int cpu_env_mask(void) {
- char *env;
- env = getenv("AOM_SIMD_CAPS_MASK");
- return env && *env ? (int)strtol(env, NULL, 0) : ~0;
-}
-
-int ppc_simd_caps(void) {
- int flags;
- int mask;
- int fd;
- ssize_t count;
- unsigned int i;
- uint64_t buf[64];
-
- // If VPX_SIMD_CAPS is set then allow only those capabilities.
- if (!cpu_env_flags(&flags)) {
- return flags;
- }
-
- mask = cpu_env_mask();
-
- fd = open("/proc/self/auxv", O_RDONLY);
- if (fd < 0) {
- return 0;
- }
-
- while ((count = read(fd, buf, sizeof(buf))) > 0) {
- for (i = 0; i < (count / sizeof(*buf)); i += 2) {
- if (buf[i] == AT_HWCAP) {
-#if HAVE_VSX
- if (buf[i + 1] & PPC_FEATURE_HAS_VSX) {
- flags |= HAS_VSX;
- }
-#endif // HAVE_VSX
- goto out_close;
- } else if (buf[i] == AT_NULL) {
- goto out_close;
- }
- }
- }
-out_close:
- close(fd);
- return flags & mask;
-}
-#else
-// If there is no RTCD the function pointers are not used and can not be
-// changed.
-int ppc_simd_caps(void) { return 0; }
-#endif // CONFIG_RUNTIME_CPU_DETECT
diff --git a/third_party/aom/aom_ports/sanitizer.h b/third_party/aom/aom_ports/sanitizer.h
deleted file mode 100644
index 1dd8eb4cf..000000000
--- a/third_party/aom/aom_ports/sanitizer.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_PORTS_SANITIZER_H_
-#define AOM_AOM_PORTS_SANITIZER_H_
-
-// AddressSanitizer support.
-
-// Define AOM_ADDRESS_SANITIZER if AddressSanitizer is used.
-// Clang.
-#if defined(__has_feature)
-#if __has_feature(address_sanitizer)
-#define AOM_ADDRESS_SANITIZER 1
-#endif
-#endif // defined(__has_feature)
-// GCC.
-#if defined(__SANITIZE_ADDRESS__)
-#define AOM_ADDRESS_SANITIZER 1
-#endif // defined(__SANITIZE_ADDRESS__)
-
-// Define the macros for AddressSanitizer manual memory poisoning. See
-// https://github.com/google/sanitizers/wiki/AddressSanitizerManualPoisoning.
-#if defined(AOM_ADDRESS_SANITIZER)
-#include <sanitizer/asan_interface.h>
-#else
-#define ASAN_POISON_MEMORY_REGION(addr, size) ((void)(addr), (void)(size))
-#define ASAN_UNPOISON_MEMORY_REGION(addr, size) ((void)(addr), (void)(size))
-#endif
-
-#endif // AOM_AOM_PORTS_SANITIZER_H_
diff --git a/third_party/aom/aom_ports/system_state.h b/third_party/aom/aom_ports/system_state.h
deleted file mode 100644
index 6640839d8..000000000
--- a/third_party/aom/aom_ports/system_state.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_PORTS_SYSTEM_STATE_H_
-#define AOM_AOM_PORTS_SYSTEM_STATE_H_
-
-#include "config/aom_config.h"
-
-#if ARCH_X86 || ARCH_X86_64
-void aom_reset_mmx_state(void);
-#define aom_clear_system_state() aom_reset_mmx_state()
-#else
-#define aom_clear_system_state()
-#endif // ARCH_X86 || ARCH_X86_64
-#endif // AOM_AOM_PORTS_SYSTEM_STATE_H_
diff --git a/third_party/aom/aom_ports/x86.h b/third_party/aom/aom_ports/x86.h
deleted file mode 100644
index 52ee49cb3..000000000
--- a/third_party/aom/aom_ports/x86.h
+++ /dev/null
@@ -1,325 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_PORTS_X86_H_
-#define AOM_AOM_PORTS_X86_H_
-#include <stdlib.h>
-
-#if defined(_MSC_VER)
-#include <intrin.h> /* For __cpuidex, __rdtsc */
-#endif
-
-#include "aom/aom_integer.h"
-#include "config/aom_config.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef enum {
- AOM_CPU_UNKNOWN = -1,
- AOM_CPU_AMD,
- AOM_CPU_AMD_OLD,
- AOM_CPU_CENTAUR,
- AOM_CPU_CYRIX,
- AOM_CPU_INTEL,
- AOM_CPU_NEXGEN,
- AOM_CPU_NSC,
- AOM_CPU_RISE,
- AOM_CPU_SIS,
- AOM_CPU_TRANSMETA,
- AOM_CPU_TRANSMETA_OLD,
- AOM_CPU_UMC,
- AOM_CPU_VIA,
-
- AOM_CPU_LAST
-} aom_cpu_t;
-
-#if defined(__GNUC__) && __GNUC__ || defined(__ANDROID__)
-#if ARCH_X86_64
-#define cpuid(func, func2, ax, bx, cx, dx) \
- __asm__ __volatile__("cpuid \n\t" \
- : "=a"(ax), "=b"(bx), "=c"(cx), "=d"(dx) \
- : "a"(func), "c"(func2));
-#else
-#define cpuid(func, func2, ax, bx, cx, dx) \
- __asm__ __volatile__( \
- "mov %%ebx, %%edi \n\t" \
- "cpuid \n\t" \
- "xchg %%edi, %%ebx \n\t" \
- : "=a"(ax), "=D"(bx), "=c"(cx), "=d"(dx) \
- : "a"(func), "c"(func2));
-#endif
-#elif defined(__SUNPRO_C) || \
- defined(__SUNPRO_CC) /* end __GNUC__ or __ANDROID__*/
-#if ARCH_X86_64
-#define cpuid(func, func2, ax, bx, cx, dx) \
- asm volatile( \
- "xchg %rsi, %rbx \n\t" \
- "cpuid \n\t" \
- "movl %ebx, %edi \n\t" \
- "xchg %rsi, %rbx \n\t" \
- : "=a"(ax), "=D"(bx), "=c"(cx), "=d"(dx) \
- : "a"(func), "c"(func2));
-#else
-#define cpuid(func, func2, ax, bx, cx, dx) \
- asm volatile( \
- "pushl %ebx \n\t" \
- "cpuid \n\t" \
- "movl %ebx, %edi \n\t" \
- "popl %ebx \n\t" \
- : "=a"(ax), "=D"(bx), "=c"(cx), "=d"(dx) \
- : "a"(func), "c"(func2));
-#endif
-#else /* end __SUNPRO__ */
-#if ARCH_X86_64
-#if defined(_MSC_VER) && _MSC_VER > 1500
-#define cpuid(func, func2, a, b, c, d) \
- do { \
- int regs[4]; \
- __cpuidex(regs, func, func2); \
- a = regs[0]; \
- b = regs[1]; \
- c = regs[2]; \
- d = regs[3]; \
- } while (0)
-#else
-#define cpuid(func, func2, a, b, c, d) \
- do { \
- int regs[4]; \
- __cpuid(regs, func); \
- a = regs[0]; \
- b = regs[1]; \
- c = regs[2]; \
- d = regs[3]; \
- } while (0)
-#endif
-#else
-/* clang-format off */
-#define cpuid(func, func2, a, b, c, d) \
- __asm mov eax, func \
- __asm mov ecx, func2 \
- __asm cpuid \
- __asm mov a, eax \
- __asm mov b, ebx \
- __asm mov c, ecx \
- __asm mov d, edx
-#endif
-/* clang-format on */
-#endif /* end others */
-
-// NaCl has no support for xgetbv or the raw opcode.
-#if !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__))
-static INLINE uint64_t xgetbv(void) {
- const uint32_t ecx = 0;
- uint32_t eax, edx;
- // Use the raw opcode for xgetbv for compatibility with older toolchains.
- __asm__ volatile(".byte 0x0f, 0x01, 0xd0\n"
- : "=a"(eax), "=d"(edx)
- : "c"(ecx));
- return ((uint64_t)edx << 32) | eax;
-}
-#elif (defined(_M_X64) || defined(_M_IX86)) && defined(_MSC_FULL_VER) && \
- _MSC_FULL_VER >= 160040219 // >= VS2010 SP1
-#include <immintrin.h>
-#define xgetbv() _xgetbv(0)
-#elif defined(_MSC_VER) && defined(_M_IX86)
-static INLINE uint64_t xgetbv(void) {
- uint32_t eax_, edx_;
- __asm {
- xor ecx, ecx // ecx = 0
- // Use the raw opcode for xgetbv for compatibility with older toolchains.
- __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0
- mov eax_, eax
- mov edx_, edx
- }
- return ((uint64_t)edx_ << 32) | eax_;
-}
-#else
-#define xgetbv() 0U // no AVX for older x64 or unrecognized toolchains.
-#endif
-
-#if defined(_MSC_VER) && _MSC_VER >= 1700
-#include <windows.h>
-#if WINAPI_FAMILY_PARTITION(WINAPI_FAMILY_APP)
-#define getenv(x) NULL
-#endif
-#endif
-
-#define HAS_MMX 0x01
-#define HAS_SSE 0x02
-#define HAS_SSE2 0x04
-#define HAS_SSE3 0x08
-#define HAS_SSSE3 0x10
-#define HAS_SSE4_1 0x20
-#define HAS_AVX 0x40
-#define HAS_AVX2 0x80
-#define HAS_SSE4_2 0x100
-#ifndef BIT
-#define BIT(n) (1 << n)
-#endif
-
-static INLINE int x86_simd_caps(void) {
- unsigned int flags = 0;
- unsigned int mask = ~0;
- unsigned int max_cpuid_val, reg_eax, reg_ebx, reg_ecx, reg_edx;
- char *env;
- (void)reg_ebx;
-
- /* See if the CPU capabilities are being overridden by the environment */
- env = getenv("AOM_SIMD_CAPS");
-
- if (env && *env) return (int)strtol(env, NULL, 0);
-
- env = getenv("AOM_SIMD_CAPS_MASK");
-
- if (env && *env) mask = (unsigned int)strtoul(env, NULL, 0);
-
- /* Ensure that the CPUID instruction supports extended features */
- cpuid(0, 0, max_cpuid_val, reg_ebx, reg_ecx, reg_edx);
-
- if (max_cpuid_val < 1) return 0;
-
- /* Get the standard feature flags */
- cpuid(1, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);
-
- if (reg_edx & BIT(23)) flags |= HAS_MMX;
-
- if (reg_edx & BIT(25)) flags |= HAS_SSE; /* aka xmm */
-
- if (reg_edx & BIT(26)) flags |= HAS_SSE2; /* aka wmt */
-
- if (reg_ecx & BIT(0)) flags |= HAS_SSE3;
-
- if (reg_ecx & BIT(9)) flags |= HAS_SSSE3;
-
- if (reg_ecx & BIT(19)) flags |= HAS_SSE4_1;
-
- if (reg_ecx & BIT(20)) flags |= HAS_SSE4_2;
-
- // bits 27 (OSXSAVE) & 28 (256-bit AVX)
- if ((reg_ecx & (BIT(27) | BIT(28))) == (BIT(27) | BIT(28))) {
- if ((xgetbv() & 0x6) == 0x6) {
- flags |= HAS_AVX;
-
- if (max_cpuid_val >= 7) {
- /* Get the leaf 7 feature flags. Needed to check for AVX2 support */
- cpuid(7, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);
-
- if (reg_ebx & BIT(5)) flags |= HAS_AVX2;
- }
- }
- }
-
- return flags & mask;
-}
-
-// Note:
-// 32-bit CPU cycle counter is light-weighted for most function performance
-// measurement. For large function (CPU time > a couple of seconds), 64-bit
-// counter should be used.
-// 32-bit CPU cycle counter
-static INLINE unsigned int x86_readtsc(void) {
-#if defined(__GNUC__) && __GNUC__
- unsigned int tsc;
- __asm__ __volatile__("rdtsc\n\t" : "=a"(tsc) :);
- return tsc;
-#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
- unsigned int tsc;
- asm volatile("rdtsc\n\t" : "=a"(tsc) :);
- return tsc;
-#else
-#if ARCH_X86_64
- return (unsigned int)__rdtsc();
-#else
- __asm rdtsc;
-#endif
-#endif
-}
-// 64-bit CPU cycle counter
-static INLINE uint64_t x86_readtsc64(void) {
-#if defined(__GNUC__) && __GNUC__
- uint32_t hi, lo;
- __asm__ __volatile__("rdtsc" : "=a"(lo), "=d"(hi));
- return ((uint64_t)hi << 32) | lo;
-#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
- uint_t hi, lo;
- asm volatile("rdtsc\n\t" : "=a"(lo), "=d"(hi));
- return ((uint64_t)hi << 32) | lo;
-#else
-#if ARCH_X86_64
- return (uint64_t)__rdtsc();
-#else
- __asm rdtsc;
-#endif
-#endif
-}
-
-#if defined(__GNUC__) && __GNUC__
-#define x86_pause_hint() __asm__ __volatile__("pause \n\t")
-#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
-#define x86_pause_hint() asm volatile("pause \n\t")
-#else
-#if ARCH_X86_64
-#define x86_pause_hint() _mm_pause();
-#else
-#define x86_pause_hint() __asm pause
-#endif
-#endif
-
-#if defined(__GNUC__) && __GNUC__
-static void x87_set_control_word(unsigned short mode) {
- __asm__ __volatile__("fldcw %0" : : "m"(*&mode));
-}
-static unsigned short x87_get_control_word(void) {
- unsigned short mode;
- __asm__ __volatile__("fstcw %0\n\t" : "=m"(*&mode) :);
- return mode;
-}
-#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
-static void x87_set_control_word(unsigned short mode) {
- asm volatile("fldcw %0" : : "m"(*&mode));
-}
-static unsigned short x87_get_control_word(void) {
- unsigned short mode;
- asm volatile("fstcw %0\n\t" : "=m"(*&mode) :);
- return mode;
-}
-#elif ARCH_X86_64
-/* No fldcw intrinsics on Windows x64, punt to external asm */
-extern void aom_winx64_fldcw(unsigned short mode);
-extern unsigned short aom_winx64_fstcw(void);
-#define x87_set_control_word aom_winx64_fldcw
-#define x87_get_control_word aom_winx64_fstcw
-#else
-static void x87_set_control_word(unsigned short mode) {
- __asm { fldcw mode }
-}
-static unsigned short x87_get_control_word(void) {
- unsigned short mode;
- __asm { fstcw mode }
- return mode;
-}
-#endif
-
-static INLINE unsigned int x87_set_double_precision(void) {
- unsigned int mode = x87_get_control_word();
- x87_set_control_word((mode & ~0x300) | 0x200);
- return mode;
-}
-
-extern void aom_reset_mmx_state(void);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AOM_PORTS_X86_H_
diff --git a/third_party/aom/aom_ports/x86_abi_support.asm b/third_party/aom/aom_ports/x86_abi_support.asm
deleted file mode 100644
index 0e7c26287..000000000
--- a/third_party/aom/aom_ports/x86_abi_support.asm
+++ /dev/null
@@ -1,395 +0,0 @@
-;
-; Copyright (c) 2016, Alliance for Open Media. All rights reserved
-;
-; This source code is subject to the terms of the BSD 2 Clause License and
-; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-; was not distributed with this source code in the LICENSE file, you can
-; obtain it at www.aomedia.org/license/software. If the Alliance for Open
-; Media Patent License 1.0 was not distributed with this source code in the
-; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-;
-
-;
-
-
-%include "config/aom_config.asm"
-
-; 32/64 bit compatibility macros
-;
-; In general, we make the source use 64 bit syntax, then twiddle with it using
-; the preprocessor to get the 32 bit syntax on 32 bit platforms.
-;
-%ifidn __OUTPUT_FORMAT__,elf32
-%define ABI_IS_32BIT 1
-%elifidn __OUTPUT_FORMAT__,macho32
-%define ABI_IS_32BIT 1
-%elifidn __OUTPUT_FORMAT__,win32
-%define ABI_IS_32BIT 1
-%elifidn __OUTPUT_FORMAT__,aout
-%define ABI_IS_32BIT 1
-%else
-%define ABI_IS_32BIT 0
-%endif
-
-%if ABI_IS_32BIT
-%define rax eax
-%define rbx ebx
-%define rcx ecx
-%define rdx edx
-%define rsi esi
-%define rdi edi
-%define rsp esp
-%define rbp ebp
-%define movsxd mov
-%macro movq 2
- %ifidn %1,eax
- movd %1,%2
- %elifidn %2,eax
- movd %1,%2
- %elifidn %1,ebx
- movd %1,%2
- %elifidn %2,ebx
- movd %1,%2
- %elifidn %1,ecx
- movd %1,%2
- %elifidn %2,ecx
- movd %1,%2
- %elifidn %1,edx
- movd %1,%2
- %elifidn %2,edx
- movd %1,%2
- %elifidn %1,esi
- movd %1,%2
- %elifidn %2,esi
- movd %1,%2
- %elifidn %1,edi
- movd %1,%2
- %elifidn %2,edi
- movd %1,%2
- %elifidn %1,esp
- movd %1,%2
- %elifidn %2,esp
- movd %1,%2
- %elifidn %1,ebp
- movd %1,%2
- %elifidn %2,ebp
- movd %1,%2
- %else
- movq %1,%2
- %endif
-%endmacro
-%endif
-
-
-; LIBAOM_YASM_WIN64
-; Set LIBAOM_YASM_WIN64 if output is Windows 64bit so the code will work if x64
-; or win64 is defined on the Yasm command line.
-%ifidn __OUTPUT_FORMAT__,win64
-%define LIBAOM_YASM_WIN64 1
-%elifidn __OUTPUT_FORMAT__,x64
-%define LIBAOM_YASM_WIN64 1
-%else
-%define LIBAOM_YASM_WIN64 0
-%endif
-
-; sym()
-; Return the proper symbol name for the target ABI.
-;
-; Certain ABIs, notably MS COFF and Darwin MACH-O, require that symbols
-; with C linkage be prefixed with an underscore.
-;
-%ifidn __OUTPUT_FORMAT__,elf32
-%define sym(x) x
-%elifidn __OUTPUT_FORMAT__,elf64
-%define sym(x) x
-%elifidn __OUTPUT_FORMAT__,elfx32
-%define sym(x) x
-%elif LIBAOM_YASM_WIN64
-%define sym(x) x
-%else
-%define sym(x) _ %+ x
-%endif
-
-; PRIVATE
-; Macro for the attribute to hide a global symbol for the target ABI.
-; This is only active if CHROMIUM is defined.
-;
-; Chromium doesn't like exported global symbols due to symbol clashing with
-; plugins among other things.
-;
-; Requires Chromium's patched copy of yasm:
-; http://src.chromium.org/viewvc/chrome?view=rev&revision=73761
-; http://www.tortall.net/projects/yasm/ticket/236
-;
-%ifdef CHROMIUM
- %ifidn __OUTPUT_FORMAT__,elf32
- %define PRIVATE :hidden
- %elifidn __OUTPUT_FORMAT__,elf64
- %define PRIVATE :hidden
- %elifidn __OUTPUT_FORMAT__,elfx32
- %define PRIVATE :hidden
- %elif LIBAOM_YASM_WIN64
- %define PRIVATE
- %else
- %define PRIVATE :private_extern
- %endif
-%else
- %define PRIVATE
-%endif
-
-; arg()
-; Return the address specification of the given argument
-;
-%if ABI_IS_32BIT
- %define arg(x) [ebp+8+4*x]
-%else
- ; 64 bit ABI passes arguments in registers. This is a workaround to get up
- ; and running quickly. Relies on SHADOW_ARGS_TO_STACK
- %if LIBAOM_YASM_WIN64
- %define arg(x) [rbp+16+8*x]
- %else
- %define arg(x) [rbp-8-8*x]
- %endif
-%endif
-
-; REG_SZ_BYTES, REG_SZ_BITS
-; Size of a register
-%if ABI_IS_32BIT
-%define REG_SZ_BYTES 4
-%define REG_SZ_BITS 32
-%else
-%define REG_SZ_BYTES 8
-%define REG_SZ_BITS 64
-%endif
-
-
-; ALIGN_STACK <alignment> <register>
-; This macro aligns the stack to the given alignment (in bytes). The stack
-; is left such that the previous value of the stack pointer is the first
-; argument on the stack (ie, the inverse of this macro is 'pop rsp.')
-; This macro uses one temporary register, which is not preserved, and thus
-; must be specified as an argument.
-%macro ALIGN_STACK 2
- mov %2, rsp
- and rsp, -%1
- lea rsp, [rsp - (%1 - REG_SZ_BYTES)]
- push %2
-%endmacro
-
-
-;
-; The Microsoft assembler tries to impose a certain amount of type safety in
-; its register usage. YASM doesn't recognize these directives, so we just
-; %define them away to maintain as much compatibility as possible with the
-; original inline assembler we're porting from.
-;
-%idefine PTR
-%idefine XMMWORD
-%idefine MMWORD
-
-; PIC macros
-;
-%if ABI_IS_32BIT
- %if CONFIG_PIC=1
- %ifidn __OUTPUT_FORMAT__,elf32
- %define WRT_PLT wrt ..plt
- %macro GET_GOT 1
- extern _GLOBAL_OFFSET_TABLE_
- push %1
- call %%get_got
- %%sub_offset:
- jmp %%exitGG
- %%get_got:
- mov %1, [esp]
- add %1, _GLOBAL_OFFSET_TABLE_ + $$ - %%sub_offset wrt ..gotpc
- ret
- %%exitGG:
- %undef GLOBAL
- %define GLOBAL(x) x + %1 wrt ..gotoff
- %undef RESTORE_GOT
- %define RESTORE_GOT pop %1
- %endmacro
- %elifidn __OUTPUT_FORMAT__,macho32
- %macro GET_GOT 1
- push %1
- call %%get_got
- %%get_got:
- pop %1
- %undef GLOBAL
- %define GLOBAL(x) x + %1 - %%get_got
- %undef RESTORE_GOT
- %define RESTORE_GOT pop %1
- %endmacro
- %endif
- %endif
-
- %ifdef CHROMIUM
- %ifidn __OUTPUT_FORMAT__,macho32
- %define HIDDEN_DATA(x) x:private_extern
- %else
- %define HIDDEN_DATA(x) x
- %endif
- %else
- %define HIDDEN_DATA(x) x
- %endif
-%else
- %macro GET_GOT 1
- %endmacro
- %define GLOBAL(x) rel x
- %ifidn __OUTPUT_FORMAT__,elf64
- %define WRT_PLT wrt ..plt
- %define HIDDEN_DATA(x) x:data hidden
- %elifidn __OUTPUT_FORMAT__,elfx32
- %define WRT_PLT wrt ..plt
- %define HIDDEN_DATA(x) x:data hidden
- %elifidn __OUTPUT_FORMAT__,macho64
- %ifdef CHROMIUM
- %define HIDDEN_DATA(x) x:private_extern
- %else
- %define HIDDEN_DATA(x) x
- %endif
- %else
- %define HIDDEN_DATA(x) x
- %endif
-%endif
-%ifnmacro GET_GOT
- %macro GET_GOT 1
- %endmacro
- %define GLOBAL(x) x
-%endif
-%ifndef RESTORE_GOT
-%define RESTORE_GOT
-%endif
-%ifndef WRT_PLT
-%define WRT_PLT
-%endif
-
-%if ABI_IS_32BIT
- %macro SHADOW_ARGS_TO_STACK 1
- %endm
- %define UNSHADOW_ARGS
-%else
-%if LIBAOM_YASM_WIN64
- %macro SHADOW_ARGS_TO_STACK 1 ; argc
- %if %1 > 0
- mov arg(0),rcx
- %endif
- %if %1 > 1
- mov arg(1),rdx
- %endif
- %if %1 > 2
- mov arg(2),r8
- %endif
- %if %1 > 3
- mov arg(3),r9
- %endif
- %endm
-%else
- %macro SHADOW_ARGS_TO_STACK 1 ; argc
- %if %1 > 0
- push rdi
- %endif
- %if %1 > 1
- push rsi
- %endif
- %if %1 > 2
- push rdx
- %endif
- %if %1 > 3
- push rcx
- %endif
- %if %1 > 4
- push r8
- %endif
- %if %1 > 5
- push r9
- %endif
- %if %1 > 6
- %assign i %1-6
- %assign off 16
- %rep i
- mov rax,[rbp+off]
- push rax
- %assign off off+8
- %endrep
- %endif
- %endm
-%endif
- %define UNSHADOW_ARGS mov rsp, rbp
-%endif
-
-; Win64 ABI requires that XMM6:XMM15 are callee saved
-; SAVE_XMM n, [u]
-; store registers 6-n on the stack
-; if u is specified, use unaligned movs.
-; Win64 ABI requires 16 byte stack alignment, but then pushes an 8 byte return
-; value. Typically we follow this up with 'push rbp' - re-aligning the stack -
-; but in some cases this is not done and unaligned movs must be used.
-%if LIBAOM_YASM_WIN64
-%macro SAVE_XMM 1-2 a
- %if %1 < 6
- %error Only xmm registers 6-15 must be preserved
- %else
- %assign last_xmm %1
- %define movxmm movdq %+ %2
- %assign xmm_stack_space ((last_xmm - 5) * 16)
- sub rsp, xmm_stack_space
- %assign i 6
- %rep (last_xmm - 5)
- movxmm [rsp + ((i - 6) * 16)], xmm %+ i
- %assign i i+1
- %endrep
- %endif
-%endmacro
-%macro RESTORE_XMM 0
- %ifndef last_xmm
- %error RESTORE_XMM must be paired with SAVE_XMM n
- %else
- %assign i last_xmm
- %rep (last_xmm - 5)
- movxmm xmm %+ i, [rsp +((i - 6) * 16)]
- %assign i i-1
- %endrep
- add rsp, xmm_stack_space
- ; there are a couple functions which return from multiple places.
- ; otherwise, we could uncomment these:
- ; %undef last_xmm
- ; %undef xmm_stack_space
- ; %undef movxmm
- %endif
-%endmacro
-%else
-%macro SAVE_XMM 1-2
-%endmacro
-%macro RESTORE_XMM 0
-%endmacro
-%endif
-
-; Name of the rodata section
-;
-; .rodata seems to be an elf-ism, as it doesn't work on OSX.
-;
-%ifidn __OUTPUT_FORMAT__,macho64
-%define SECTION_RODATA section .text
-%elifidn __OUTPUT_FORMAT__,macho32
-%macro SECTION_RODATA 0
-section .text
-%endmacro
-%elifidn __OUTPUT_FORMAT__,aout
-%define SECTION_RODATA section .data
-%else
-%define SECTION_RODATA section .rodata
-%endif
-
-
-; Tell GNU ld that we don't require an executable stack.
-%ifidn __OUTPUT_FORMAT__,elf32
-section .note.GNU-stack noalloc noexec nowrite progbits
-section .text
-%elifidn __OUTPUT_FORMAT__,elf64
-section .note.GNU-stack noalloc noexec nowrite progbits
-section .text
-%elifidn __OUTPUT_FORMAT__,elfx32
-section .note.GNU-stack noalloc noexec nowrite progbits
-section .text
-%endif
diff --git a/third_party/aom/aom_scale/aom_scale.cmake b/third_party/aom/aom_scale/aom_scale.cmake
deleted file mode 100644
index 197dea6bd..000000000
--- a/third_party/aom/aom_scale/aom_scale.cmake
+++ /dev/null
@@ -1,38 +0,0 @@
-#
-# Copyright (c) 2017, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and the
-# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
-# not distributed with this source code in the LICENSE file, you can obtain it
-# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
-# License 1.0 was not distributed with this source code in the PATENTS file, you
-# can obtain it at www.aomedia.org/license/patent.
-#
-if(AOM_AOM_SCALE_AOM_SCALE_CMAKE_)
- return()
-endif() # AOM_AOM_SCALE_AOM_SCALE_CMAKE_
-set(AOM_AOM_SCALE_AOM_SCALE_CMAKE_ 1)
-
-list(APPEND AOM_SCALE_SOURCES "${AOM_ROOT}/aom_scale/aom_scale.h"
- "${AOM_ROOT}/aom_scale/generic/aom_scale.c"
- "${AOM_ROOT}/aom_scale/generic/gen_scalers.c"
- "${AOM_ROOT}/aom_scale/generic/yv12config.c"
- "${AOM_ROOT}/aom_scale/generic/yv12extend.c"
- "${AOM_ROOT}/aom_scale/yv12config.h")
-
-list(APPEND AOM_SCALE_INTRIN_DSPR2
- "${AOM_ROOT}/aom_scale/mips/dspr2/yv12extend_dspr2.c")
-
-# Creates the aom_scale build target and makes libaom depend on it. The libaom
-# target must exist before this function is called.
-function(setup_aom_scale_targets)
- add_library(aom_scale OBJECT ${AOM_SCALE_SOURCES})
- target_sources(aom PRIVATE $<TARGET_OBJECTS:aom_scale>)
-
- if(HAVE_DSPR2)
- add_intrinsics_object_library("" "dspr2" "aom_scale"
- "AOM_SCALE_INTRIN_DSPR2" "aom")
- endif()
-
- set(AOM_LIB_TARGETS ${AOM_LIB_TARGETS} aom_scale PARENT_SCOPE)
-endfunction()
diff --git a/third_party/aom/aom_scale/aom_scale.h b/third_party/aom/aom_scale/aom_scale.h
deleted file mode 100644
index 11812a145..000000000
--- a/third_party/aom/aom_scale/aom_scale.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_SCALE_AOM_SCALE_H_
-#define AOM_AOM_SCALE_AOM_SCALE_H_
-
-#include "aom_scale/yv12config.h"
-
-extern void aom_scale_frame(YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst,
- unsigned char *temp_area, unsigned char temp_height,
- unsigned int hscale, unsigned int hratio,
- unsigned int vscale, unsigned int vratio,
- unsigned int interlaced, const int num_planes);
-
-#endif // AOM_AOM_SCALE_AOM_SCALE_H_
diff --git a/third_party/aom/aom_scale/aom_scale_rtcd.c b/third_party/aom/aom_scale/aom_scale_rtcd.c
deleted file mode 100644
index a04e053b0..000000000
--- a/third_party/aom/aom_scale/aom_scale_rtcd.c
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#include "config/aom_config.h"
-
-#define RTCD_C
-#include "config/aom_scale_rtcd.h"
-
-#include "aom_ports/aom_once.h"
-
-void aom_scale_rtcd() { aom_once(setup_rtcd_internal); }
diff --git a/third_party/aom/aom_scale/aom_scale_rtcd.pl b/third_party/aom/aom_scale/aom_scale_rtcd.pl
deleted file mode 100644
index c5990b1bb..000000000
--- a/third_party/aom/aom_scale/aom_scale_rtcd.pl
+++ /dev/null
@@ -1,52 +0,0 @@
-##
-## Copyright (c) 2017, Alliance for Open Media. All rights reserved
-##
-## This source code is subject to the terms of the BSD 2 Clause License and
-## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-## was not distributed with this source code in the LICENSE file, you can
-## obtain it at www.aomedia.org/license/software. If the Alliance for Open
-## Media Patent License 1.0 was not distributed with this source code in the
-## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-##
-sub aom_scale_forward_decls() {
-print <<EOF
-struct yv12_buffer_config;
-EOF
-}
-forward_decls qw/aom_scale_forward_decls/;
-
-# Scaler functions
-if (aom_config("CONFIG_SPATIAL_RESAMPLING") eq "yes") {
- add_proto qw/void aom_horizontal_line_5_4_scale/, "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width";
- add_proto qw/void aom_vertical_band_5_4_scale/, "unsigned char *source, int src_pitch, unsigned char *dest, int dest_pitch, unsigned int dest_width";
- add_proto qw/void aom_horizontal_line_5_3_scale/, "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width";
- add_proto qw/void aom_vertical_band_5_3_scale/, "unsigned char *source, int src_pitch, unsigned char *dest, int dest_pitch, unsigned int dest_width";
- add_proto qw/void aom_horizontal_line_2_1_scale/, "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width";
- add_proto qw/void aom_vertical_band_2_1_scale/, "unsigned char *source, int src_pitch, unsigned char *dest, int dest_pitch, unsigned int dest_width";
- add_proto qw/void aom_vertical_band_2_1_scale_i/, "unsigned char *source, int src_pitch, unsigned char *dest, int dest_pitch, unsigned int dest_width";
-}
-
-add_proto qw/void aom_yv12_extend_frame_borders/, "struct yv12_buffer_config *ybf, const int num_planes";
-
-add_proto qw/void aom_yv12_copy_frame/, "const struct yv12_buffer_config *src_bc, struct yv12_buffer_config *dst_bc, const int num_planes";
-
-add_proto qw/void aom_yv12_copy_y/, "const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc";
-
-add_proto qw/void aom_yv12_copy_u/, "const struct yv12_buffer_config *src_bc, struct yv12_buffer_config *dst_bc";
-
-add_proto qw/void aom_yv12_copy_v/, "const struct yv12_buffer_config *src_bc, struct yv12_buffer_config *dst_bc";
-
-add_proto qw/void aom_yv12_partial_copy_y/, "const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc, int hstart, int hend, int vstart, int vend";
-
-add_proto qw/void aom_yv12_partial_copy_u/, "const struct yv12_buffer_config *src_bc, struct yv12_buffer_config *dst_bc, int hstart, int hend, int vstart, int vend";
-
-add_proto qw/void aom_yv12_partial_copy_v/, "const struct yv12_buffer_config *src_bc, struct yv12_buffer_config *dst_bc, int hstart, int hend, int vstart, int vend";
-
-add_proto qw/void aom_extend_frame_borders/, "struct yv12_buffer_config *ybf, const int num_planes";
-specialize qw/aom_extend_frame_borders dspr2/;
-
-add_proto qw/void aom_extend_frame_inner_borders/, "struct yv12_buffer_config *ybf, const int num_planes";
-specialize qw/aom_extend_frame_inner_borders dspr2/;
-
-add_proto qw/void aom_extend_frame_borders_y/, "struct yv12_buffer_config *ybf";
-1;
diff --git a/third_party/aom/aom_scale/generic/aom_scale.c b/third_party/aom/aom_scale/generic/aom_scale.c
deleted file mode 100644
index 206c42c9f..000000000
--- a/third_party/aom/aom_scale/generic/aom_scale.c
+++ /dev/null
@@ -1,506 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-/****************************************************************************
- *
- * Module Title : scale.c
- *
- * Description : Image scaling functions.
- *
- ***************************************************************************/
-
-/****************************************************************************
- * Header Files
- ****************************************************************************/
-#include "config/aom_scale_rtcd.h"
-
-#include "aom_mem/aom_mem.h"
-#include "aom_scale/aom_scale.h"
-#include "aom_scale/yv12config.h"
-
-typedef struct {
- int expanded_frame_width;
- int expanded_frame_height;
-
- int HScale;
- int HRatio;
- int VScale;
- int VRatio;
-
- YV12_BUFFER_CONFIG *src_yuv_config;
- YV12_BUFFER_CONFIG *dst_yuv_config;
-
-} SCALE_VARS;
-
-/****************************************************************************
- *
- * ROUTINE : scale1d_2t1_i
- *
- * INPUTS : const unsigned char *source : Pointer to data to be scaled.
- * int source_step : Number of pixels to step on
- * in source.
- * unsigned int source_scale : Scale for source (UNUSED).
- * unsigned int source_length : Length of source (UNUSED).
- * unsigned char *dest : Pointer to output data array.
- * int dest_step : Number of pixels to step on
- * in destination.
- * unsigned int dest_scale : Scale for destination
- * (UNUSED).
- * unsigned int dest_length : Length of destination.
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : Performs 2-to-1 interpolated scaling.
- *
- * SPECIAL NOTES : None.
- *
- ****************************************************************************/
-static void scale1d_2t1_i(const unsigned char *source, int source_step,
- unsigned int source_scale, unsigned int source_length,
- unsigned char *dest, int dest_step,
- unsigned int dest_scale, unsigned int dest_length) {
- const unsigned char *const dest_end = dest + dest_length * dest_step;
- (void)source_length;
- (void)source_scale;
- (void)dest_scale;
-
- source_step *= 2; // Every other row.
-
- dest[0] = source[0]; // Special case: 1st pixel.
- source += source_step;
- dest += dest_step;
-
- while (dest < dest_end) {
- const unsigned int a = 3 * source[-source_step];
- const unsigned int b = 10 * source[0];
- const unsigned int c = 3 * source[source_step];
- *dest = (unsigned char)((8 + a + b + c) >> 4);
- source += source_step;
- dest += dest_step;
- }
-}
-
-/****************************************************************************
- *
- * ROUTINE : scale1d_2t1_ps
- *
- * INPUTS : const unsigned char *source : Pointer to data to be scaled.
- * int source_step : Number of pixels to step on
- * in source.
- * unsigned int source_scale : Scale for source (UNUSED).
- * unsigned int source_length : Length of source (UNUSED).
- * unsigned char *dest : Pointer to output data array.
- * int dest_step : Number of pixels to step on
- * in destination.
- * unsigned int dest_scale : Scale for destination
- * (UNUSED).
- * unsigned int dest_length : Length of destination.
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : Performs 2-to-1 point subsampled scaling.
- *
- * SPECIAL NOTES : None.
- *
- ****************************************************************************/
-static void scale1d_2t1_ps(const unsigned char *source, int source_step,
- unsigned int source_scale,
- unsigned int source_length, unsigned char *dest,
- int dest_step, unsigned int dest_scale,
- unsigned int dest_length) {
- const unsigned char *const dest_end = dest + dest_length * dest_step;
- (void)source_length;
- (void)source_scale;
- (void)dest_scale;
-
- source_step *= 2; // Every other row.
-
- while (dest < dest_end) {
- *dest = *source;
- source += source_step;
- dest += dest_step;
- }
-}
-/****************************************************************************
- *
- * ROUTINE : scale1d_c
- *
- * INPUTS : const unsigned char *source : Pointer to data to be scaled.
- * int source_step : Number of pixels to step on
- * in source.
- * unsigned int source_scale : Scale for source.
- * unsigned int source_length : Length of source (UNUSED).
- * unsigned char *dest : Pointer to output data array.
- * int dest_step : Number of pixels to step on
- * in destination.
- * unsigned int dest_scale : Scale for destination.
- * unsigned int dest_length : Length of destination.
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : Performs linear interpolation in one dimension.
- *
- * SPECIAL NOTES : None.
- *
- ****************************************************************************/
-static void scale1d_c(const unsigned char *source, int source_step,
- unsigned int source_scale, unsigned int source_length,
- unsigned char *dest, int dest_step,
- unsigned int dest_scale, unsigned int dest_length) {
- const unsigned char *const dest_end = dest + dest_length * dest_step;
- const unsigned int round_value = dest_scale / 2;
- unsigned int left_modifier = dest_scale;
- unsigned int right_modifier = 0;
- unsigned char left_pixel = source[0];
- unsigned char right_pixel = source[source_step];
-
- (void)source_length;
-
- /* These asserts are needed if there are boundary issues... */
- /* assert ( dest_scale > source_scale );*/
- /* assert ( (source_length - 1) * dest_scale >= (dest_length - 1) *
- * source_scale);*/
-
- while (dest < dest_end) {
- *dest = (unsigned char)((left_modifier * left_pixel +
- right_modifier * right_pixel + round_value) /
- dest_scale);
-
- right_modifier += source_scale;
-
- while (right_modifier > dest_scale) {
- right_modifier -= dest_scale;
- source += source_step;
- left_pixel = source[0];
- right_pixel = source[source_step];
- }
-
- left_modifier = dest_scale - right_modifier;
- }
-}
-
-/****************************************************************************
- *
- * ROUTINE : Scale2D
- *
- * INPUTS : const unsigned char *source : Pointer to data to be
- * scaled.
- * int source_pitch : Stride of source image.
- * unsigned int source_width : Width of input image.
- * unsigned int source_height : Height of input image.
- * unsigned char *dest : Pointer to output data
- * array.
- * int dest_pitch : Stride of destination
- * image.
- * unsigned int dest_width : Width of destination image.
- * unsigned int dest_height : Height of destination
- * image.
- * unsigned char *temp_area : Pointer to temp work area.
- * unsigned char temp_area_height : Height of temp work area.
- * unsigned int hscale : Horizontal scale factor
- * numerator.
- * unsigned int hratio : Horizontal scale factor
- * denominator.
- * unsigned int vscale : Vertical scale factor
- * numerator.
- * unsigned int vratio : Vertical scale factor
- * denominator.
- * unsigned int interlaced : Interlace flag.
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : Performs 2-tap linear interpolation in two dimensions.
- *
- * SPECIAL NOTES : Expansion is performed one band at a time to help with
- * caching.
- *
- ****************************************************************************/
-static void Scale2D(
- /*const*/
- unsigned char *source, int source_pitch, unsigned int source_width,
- unsigned int source_height, unsigned char *dest, int dest_pitch,
- unsigned int dest_width, unsigned int dest_height, unsigned char *temp_area,
- unsigned char temp_area_height, unsigned int hscale, unsigned int hratio,
- unsigned int vscale, unsigned int vratio, unsigned int interlaced) {
- unsigned int i, j, k;
- unsigned int bands;
- unsigned int dest_band_height;
- unsigned int source_band_height;
-
- typedef void (*Scale1D)(const unsigned char *source, int source_step,
- unsigned int source_scale, unsigned int source_length,
- unsigned char *dest, int dest_step,
- unsigned int dest_scale, unsigned int dest_length);
-
- Scale1D Scale1Dv = scale1d_c;
- Scale1D Scale1Dh = scale1d_c;
-
- void (*horiz_line_scale)(const unsigned char *, unsigned int, unsigned char *,
- unsigned int) = NULL;
- void (*vert_band_scale)(unsigned char *, int, unsigned char *, int,
- unsigned int) = NULL;
-
- int ratio_scalable = 1;
- int interpolation = 0;
-
- unsigned char *source_base;
- unsigned char *line_src;
-
- source_base = (unsigned char *)source;
-
- if (source_pitch < 0) {
- int offset;
-
- offset = (source_height - 1);
- offset *= source_pitch;
-
- source_base += offset;
- }
-
- /* find out the ratio for each direction */
- switch (hratio * 10 / hscale) {
- case 8:
- /* 4-5 Scale in Width direction */
- horiz_line_scale = aom_horizontal_line_5_4_scale;
- break;
- case 6:
- /* 3-5 Scale in Width direction */
- horiz_line_scale = aom_horizontal_line_5_3_scale;
- break;
- case 5:
- /* 1-2 Scale in Width direction */
- horiz_line_scale = aom_horizontal_line_2_1_scale;
- break;
- default:
- /* The ratio is not acceptable now */
- /* throw("The ratio is not acceptable for now!"); */
- ratio_scalable = 0;
- break;
- }
-
- switch (vratio * 10 / vscale) {
- case 8:
- /* 4-5 Scale in vertical direction */
- vert_band_scale = aom_vertical_band_5_4_scale;
- source_band_height = 5;
- dest_band_height = 4;
- break;
- case 6:
- /* 3-5 Scale in vertical direction */
- vert_band_scale = aom_vertical_band_5_3_scale;
- source_band_height = 5;
- dest_band_height = 3;
- break;
- case 5:
- /* 1-2 Scale in vertical direction */
-
- if (interlaced) {
- /* if the content is interlaced, point sampling is used */
- vert_band_scale = aom_vertical_band_2_1_scale;
- } else {
- interpolation = 1;
- /* if the content is progressive, interplo */
- vert_band_scale = aom_vertical_band_2_1_scale_i;
- }
-
- source_band_height = 2;
- dest_band_height = 1;
- break;
- default:
- /* The ratio is not acceptable now */
- /* throw("The ratio is not acceptable for now!"); */
- ratio_scalable = 0;
- break;
- }
-
- if (ratio_scalable) {
- if (source_height == dest_height) {
- /* for each band of the image */
- for (k = 0; k < dest_height; ++k) {
- horiz_line_scale(source, source_width, dest, dest_width);
- source += source_pitch;
- dest += dest_pitch;
- }
-
- return;
- }
-
- if (interpolation) {
- if (source < source_base) source = source_base;
-
- horiz_line_scale(source, source_width, temp_area, dest_width);
- }
-
- for (k = 0; k < (dest_height + dest_band_height - 1) / dest_band_height;
- ++k) {
- /* scale one band horizontally */
- for (i = 0; i < source_band_height; ++i) {
- /* Trap case where we could read off the base of the source buffer */
-
- line_src = source + i * source_pitch;
-
- if (line_src < source_base) line_src = source_base;
-
- horiz_line_scale(line_src, source_width,
- temp_area + (i + 1) * dest_pitch, dest_width);
- }
-
- /* Vertical scaling is in place */
- vert_band_scale(temp_area + dest_pitch, dest_pitch, dest, dest_pitch,
- dest_width);
-
- if (interpolation)
- memcpy(temp_area, temp_area + source_band_height * dest_pitch,
- dest_width);
-
- /* Next band... */
- source += (unsigned long)source_band_height * source_pitch;
- dest += (unsigned long)dest_band_height * dest_pitch;
- }
-
- return;
- }
-
- if (hscale == 2 && hratio == 1) Scale1Dh = scale1d_2t1_ps;
-
- if (vscale == 2 && vratio == 1) {
- if (interlaced)
- Scale1Dv = scale1d_2t1_ps;
- else
- Scale1Dv = scale1d_2t1_i;
- }
-
- if (source_height == dest_height) {
- /* for each band of the image */
- for (k = 0; k < dest_height; ++k) {
- Scale1Dh(source, 1, hscale, source_width + 1, dest, 1, hratio,
- dest_width);
- source += source_pitch;
- dest += dest_pitch;
- }
-
- return;
- }
-
- if (dest_height > source_height) {
- dest_band_height = temp_area_height - 1;
- source_band_height = dest_band_height * source_height / dest_height;
- } else {
- source_band_height = temp_area_height - 1;
- dest_band_height = source_band_height * vratio / vscale;
- }
-
- /* first row needs to be done so that we can stay one row ahead for vertical
- * zoom */
- Scale1Dh(source, 1, hscale, source_width + 1, temp_area, 1, hratio,
- dest_width);
-
- /* for each band of the image */
- bands = (dest_height + dest_band_height - 1) / dest_band_height;
-
- for (k = 0; k < bands; ++k) {
- /* scale one band horizontally */
- for (i = 1; i < source_band_height + 1; ++i) {
- if (k * source_band_height + i < source_height) {
- Scale1Dh(source + i * source_pitch, 1, hscale, source_width + 1,
- temp_area + i * dest_pitch, 1, hratio, dest_width);
- } else { /* Duplicate the last row */
- /* copy temp_area row 0 over from last row in the past */
- memcpy(temp_area + i * dest_pitch, temp_area + (i - 1) * dest_pitch,
- dest_pitch);
- }
- }
-
- /* scale one band vertically */
- for (j = 0; j < dest_width; ++j) {
- Scale1Dv(&temp_area[j], dest_pitch, vscale, source_band_height + 1,
- &dest[j], dest_pitch, vratio, dest_band_height);
- }
-
- /* copy temp_area row 0 over from last row in the past */
- memcpy(temp_area, temp_area + source_band_height * dest_pitch, dest_pitch);
-
- /* move to the next band */
- source += source_band_height * source_pitch;
- dest += dest_band_height * dest_pitch;
- }
-}
-
-/****************************************************************************
- *
- * ROUTINE : aom_scale_frame
- *
- * INPUTS : YV12_BUFFER_CONFIG *src : Pointer to frame to be
- * scaled.
- * YV12_BUFFER_CONFIG *dst : Pointer to buffer to hold
- * scaled frame.
- * unsigned char *temp_area : Pointer to temp work area.
- * unsigned char temp_area_height : Height of temp work area.
- * unsigned int hscale : Horizontal scale factor
- * numerator.
- * unsigned int hratio : Horizontal scale factor
- * denominator.
- * unsigned int vscale : Vertical scale factor
- * numerator.
- * unsigned int vratio : Vertical scale factor
- * denominator.
- * unsigned int interlaced : Interlace flag.
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : Performs 2-tap linear interpolation in two dimensions.
- *
- * SPECIAL NOTES : Expansion is performed one band at a time to help with
- * caching.
- *
- ****************************************************************************/
-void aom_scale_frame(YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst,
- unsigned char *temp_area, unsigned char temp_height,
- unsigned int hscale, unsigned int hratio,
- unsigned int vscale, unsigned int vratio,
- unsigned int interlaced, const int num_planes) {
- const int dw = (hscale - 1 + src->y_width * hratio) / hscale;
- const int dh = (vscale - 1 + src->y_height * vratio) / vscale;
-
- for (int plane = 0; plane < num_planes; ++plane) {
- const int is_uv = plane > 0;
- const int plane_dw = dw >> is_uv;
- const int plane_dh = dh >> is_uv;
-
- Scale2D((unsigned char *)src->buffers[plane], src->strides[is_uv],
- src->widths[is_uv], src->heights[is_uv],
- (unsigned char *)dst->buffers[plane], dst->strides[is_uv], plane_dw,
- plane_dh, temp_area, temp_height, hscale, hratio, vscale, vratio,
- interlaced);
-
- if (plane_dw < dst->widths[is_uv])
- for (int i = 0; i < plane_dh; ++i)
- memset(dst->buffers[plane] + i * dst->strides[is_uv] + plane_dw - 1,
- dst->buffers[plane][i * dst->strides[is_uv] + plane_dw - 2],
- dst->widths[is_uv] - plane_dw + 1);
-
- if (plane_dh < dst->heights[is_uv])
- for (int i = plane_dh - 1; i < dst->heights[is_uv]; ++i)
- memcpy(dst->buffers[plane] + i * dst->strides[is_uv],
- dst->buffers[plane] + (plane_dh - 2) * dst->strides[is_uv],
- dst->widths[is_uv] + 1);
- }
-}
diff --git a/third_party/aom/aom_scale/generic/gen_scalers.c b/third_party/aom/aom_scale/generic/gen_scalers.c
deleted file mode 100644
index 549e2aa69..000000000
--- a/third_party/aom/aom_scale/generic/gen_scalers.c
+++ /dev/null
@@ -1,201 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "config/aom_scale_rtcd.h"
-
-#include "aom_scale/aom_scale.h"
-#include "aom_mem/aom_mem.h"
-/****************************************************************************
- * Imports
- ****************************************************************************/
-
-/****************************************************************************
- *
- *
- * INPUTS : const unsigned char *source : Pointer to source data.
- * unsigned int source_width : Stride of source.
- * unsigned char *dest : Pointer to destination data.
- * unsigned int dest_width : Stride of destination
- * (NOT USED).
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : Copies horizontal line of pixels from source to
- * destination scaling up by 4 to 5.
- *
- * SPECIAL NOTES : None.
- *
- ****************************************************************************/
-void aom_horizontal_line_5_4_scale_c(const unsigned char *source,
- unsigned int source_width,
- unsigned char *dest,
- unsigned int dest_width) {
- const unsigned char *const source_end = source + source_width;
- (void)dest_width;
-
- while (source < source_end) {
- const unsigned int a = source[0];
- const unsigned int b = source[1];
- const unsigned int c = source[2];
- const unsigned int d = source[3];
- const unsigned int e = source[4];
-
- dest[0] = (unsigned char)a;
- dest[1] = (unsigned char)((b * 192 + c * 64 + 128) >> 8);
- dest[2] = (unsigned char)((c * 128 + d * 128 + 128) >> 8);
- dest[3] = (unsigned char)((d * 64 + e * 192 + 128) >> 8);
-
- source += 5;
- dest += 4;
- }
-}
-
-void aom_vertical_band_5_4_scale_c(unsigned char *source, int src_pitch,
- unsigned char *dest, int dest_pitch,
- unsigned int dest_width) {
- const unsigned char *const dest_end = dest + dest_width;
- while (dest < dest_end) {
- const unsigned int a = source[0 * src_pitch];
- const unsigned int b = source[1 * src_pitch];
- const unsigned int c = source[2 * src_pitch];
- const unsigned int d = source[3 * src_pitch];
- const unsigned int e = source[4 * src_pitch];
-
- dest[0 * dest_pitch] = (unsigned char)a;
- dest[1 * dest_pitch] = (unsigned char)((b * 192 + c * 64 + 128) >> 8);
- dest[2 * dest_pitch] = (unsigned char)((c * 128 + d * 128 + 128) >> 8);
- dest[3 * dest_pitch] = (unsigned char)((d * 64 + e * 192 + 128) >> 8);
-
- ++source;
- ++dest;
- }
-}
-
-/*7***************************************************************************
- *
- * ROUTINE : aom_horizontal_line_3_5_scale_c
- *
- * INPUTS : const unsigned char *source : Pointer to source data.
- * unsigned int source_width : Stride of source.
- * unsigned char *dest : Pointer to destination data.
- * unsigned int dest_width : Stride of destination
- * (NOT USED).
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : Copies horizontal line of pixels from source to
- * destination scaling up by 3 to 5.
- *
- * SPECIAL NOTES : None.
- *
- *
- ****************************************************************************/
-void aom_horizontal_line_5_3_scale_c(const unsigned char *source,
- unsigned int source_width,
- unsigned char *dest,
- unsigned int dest_width) {
- const unsigned char *const source_end = source + source_width;
- (void)dest_width;
- while (source < source_end) {
- const unsigned int a = source[0];
- const unsigned int b = source[1];
- const unsigned int c = source[2];
- const unsigned int d = source[3];
- const unsigned int e = source[4];
-
- dest[0] = (unsigned char)a;
- dest[1] = (unsigned char)((b * 85 + c * 171 + 128) >> 8);
- dest[2] = (unsigned char)((d * 171 + e * 85 + 128) >> 8);
-
- source += 5;
- dest += 3;
- }
-}
-
-void aom_vertical_band_5_3_scale_c(unsigned char *source, int src_pitch,
- unsigned char *dest, int dest_pitch,
- unsigned int dest_width) {
- const unsigned char *const dest_end = dest + dest_width;
- while (dest < dest_end) {
- const unsigned int a = source[0 * src_pitch];
- const unsigned int b = source[1 * src_pitch];
- const unsigned int c = source[2 * src_pitch];
- const unsigned int d = source[3 * src_pitch];
- const unsigned int e = source[4 * src_pitch];
-
- dest[0 * dest_pitch] = (unsigned char)a;
- dest[1 * dest_pitch] = (unsigned char)((b * 85 + c * 171 + 128) >> 8);
- dest[2 * dest_pitch] = (unsigned char)((d * 171 + e * 85 + 128) >> 8);
-
- ++source;
- ++dest;
- }
-}
-
-/****************************************************************************
- *
- * ROUTINE : aom_horizontal_line_1_2_scale_c
- *
- * INPUTS : const unsigned char *source : Pointer to source data.
- * unsigned int source_width : Stride of source.
- * unsigned char *dest : Pointer to destination data.
- * unsigned int dest_width : Stride of destination
- * (NOT USED).
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : Copies horizontal line of pixels from source to
- * destination scaling up by 1 to 2.
- *
- * SPECIAL NOTES : None.
- *
- ****************************************************************************/
-void aom_horizontal_line_2_1_scale_c(const unsigned char *source,
- unsigned int source_width,
- unsigned char *dest,
- unsigned int dest_width) {
- const unsigned char *const source_end = source + source_width;
- (void)dest_width;
- while (source < source_end) {
- dest[0] = source[0];
- source += 2;
- ++dest;
- }
-}
-
-void aom_vertical_band_2_1_scale_c(unsigned char *source, int src_pitch,
- unsigned char *dest, int dest_pitch,
- unsigned int dest_width) {
- (void)dest_pitch;
- (void)src_pitch;
- memcpy(dest, source, dest_width);
-}
-
-void aom_vertical_band_2_1_scale_i_c(unsigned char *source, int src_pitch,
- unsigned char *dest, int dest_pitch,
- unsigned int dest_width) {
- const unsigned char *const dest_end = dest + dest_width;
- (void)dest_pitch;
- while (dest < dest_end) {
- const unsigned int a = source[-src_pitch] * 3;
- const unsigned int b = source[0] * 10;
- const unsigned int c = source[src_pitch] * 3;
- dest[0] = (unsigned char)((8 + a + b + c) >> 4);
- ++source;
- ++dest;
- }
-}
diff --git a/third_party/aom/aom_scale/generic/yv12config.c b/third_party/aom/aom_scale/generic/yv12config.c
deleted file mode 100644
index 84705e2d8..000000000
--- a/third_party/aom/aom_scale/generic/yv12config.c
+++ /dev/null
@@ -1,203 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-
-#include "aom_mem/aom_mem.h"
-#include "aom_ports/mem.h"
-#include "aom_scale/yv12config.h"
-#include "av1/common/enums.h"
-
-/****************************************************************************
- * Exports
- ****************************************************************************/
-
-/****************************************************************************
- *
- ****************************************************************************/
-#define yv12_align_addr(addr, align) \
- (void *)(((size_t)(addr) + ((align)-1)) & (size_t) - (align))
-
-// TODO(jkoleszar): Maybe replace this with struct aom_image
-
-int aom_free_frame_buffer(YV12_BUFFER_CONFIG *ybf) {
- if (ybf) {
- if (ybf->buffer_alloc_sz > 0) {
- aom_free(ybf->buffer_alloc);
- }
- if (ybf->y_buffer_8bit) aom_free(ybf->y_buffer_8bit);
-
- /* buffer_alloc isn't accessed by most functions. Rather y_buffer,
- u_buffer and v_buffer point to buffer_alloc and are used. Clear out
- all of this so that a freed pointer isn't inadvertently used */
- memset(ybf, 0, sizeof(YV12_BUFFER_CONFIG));
- } else {
- return -1;
- }
-
- return 0;
-}
-
-int aom_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height,
- int ss_x, int ss_y, int use_highbitdepth,
- int border, int byte_alignment,
- aom_codec_frame_buffer_t *fb,
- aom_get_frame_buffer_cb_fn_t cb, void *cb_priv) {
-#if CONFIG_SIZE_LIMIT
- if (width > DECODE_WIDTH_LIMIT || height > DECODE_HEIGHT_LIMIT) return -1;
-#endif
-
- if (ybf) {
- const int aom_byte_align = (byte_alignment == 0) ? 1 : byte_alignment;
- const int aligned_width = (width + 7) & ~7;
- const int aligned_height = (height + 7) & ~7;
- const int y_stride = ((aligned_width + 2 * border) + 31) & ~31;
- const uint64_t yplane_size =
- (aligned_height + 2 * border) * (uint64_t)y_stride + byte_alignment;
- const int uv_width = aligned_width >> ss_x;
- const int uv_height = aligned_height >> ss_y;
- const int uv_stride = y_stride >> ss_x;
- const int uv_border_w = border >> ss_x;
- const int uv_border_h = border >> ss_y;
- const uint64_t uvplane_size =
- (uv_height + 2 * uv_border_h) * (uint64_t)uv_stride + byte_alignment;
-
- const uint64_t frame_size =
- (1 + use_highbitdepth) * (yplane_size + 2 * uvplane_size);
-
- uint8_t *buf = NULL;
-
-#if defined AOM_MAX_ALLOCABLE_MEMORY
- // The size of ybf->buffer_alloc.
- uint64_t alloc_size = frame_size;
- // The size of ybf->y_buffer_8bit.
- if (use_highbitdepth) alloc_size += yplane_size;
- // The decoder may allocate REF_FRAMES frame buffers in the frame buffer
- // pool. Bound the total amount of allocated memory as if these REF_FRAMES
- // frame buffers were allocated in a single allocation.
- if (alloc_size > AOM_MAX_ALLOCABLE_MEMORY / REF_FRAMES) return -1;
-#endif
-
- if (cb != NULL) {
- const int align_addr_extra_size = 31;
- const uint64_t external_frame_size = frame_size + align_addr_extra_size;
-
- assert(fb != NULL);
-
- if (external_frame_size != (size_t)external_frame_size) return -1;
-
- // Allocation to hold larger frame, or first allocation.
- if (cb(cb_priv, (size_t)external_frame_size, fb) < 0) return -1;
-
- if (fb->data == NULL || fb->size < external_frame_size) return -1;
-
- ybf->buffer_alloc = (uint8_t *)yv12_align_addr(fb->data, 32);
-
-#if defined(__has_feature)
-#if __has_feature(memory_sanitizer)
- // This memset is needed for fixing the issue of using uninitialized
- // value in msan test. It will cause a perf loss, so only do this for
- // msan test.
- memset(ybf->buffer_alloc, 0, (size_t)frame_size);
-#endif
-#endif
- } else if (frame_size > (size_t)ybf->buffer_alloc_sz) {
- // Allocation to hold larger frame, or first allocation.
- aom_free(ybf->buffer_alloc);
- ybf->buffer_alloc = NULL;
-
- if (frame_size != (size_t)frame_size) return -1;
-
- ybf->buffer_alloc = (uint8_t *)aom_memalign(32, (size_t)frame_size);
- if (!ybf->buffer_alloc) return -1;
-
- ybf->buffer_alloc_sz = (size_t)frame_size;
-
- // This memset is needed for fixing valgrind error from C loop filter
- // due to access uninitialized memory in frame border. It could be
- // removed if border is totally removed.
- memset(ybf->buffer_alloc, 0, ybf->buffer_alloc_sz);
- }
-
- /* Only support allocating buffers that have a border that's a multiple
- * of 32. The border restriction is required to get 16-byte alignment of
- * the start of the chroma rows without introducing an arbitrary gap
- * between planes, which would break the semantics of things like
- * aom_img_set_rect(). */
- if (border & 0x1f) return -3;
-
- ybf->y_crop_width = width;
- ybf->y_crop_height = height;
- ybf->y_width = aligned_width;
- ybf->y_height = aligned_height;
- ybf->y_stride = y_stride;
-
- ybf->uv_crop_width = (width + ss_x) >> ss_x;
- ybf->uv_crop_height = (height + ss_y) >> ss_y;
- ybf->uv_width = uv_width;
- ybf->uv_height = uv_height;
- ybf->uv_stride = uv_stride;
-
- ybf->border = border;
- ybf->frame_size = (size_t)frame_size;
- ybf->subsampling_x = ss_x;
- ybf->subsampling_y = ss_y;
-
- buf = ybf->buffer_alloc;
- if (use_highbitdepth) {
- // Store uint16 addresses when using 16bit framebuffers
- buf = CONVERT_TO_BYTEPTR(ybf->buffer_alloc);
- ybf->flags = YV12_FLAG_HIGHBITDEPTH;
- } else {
- ybf->flags = 0;
- }
-
- ybf->y_buffer = (uint8_t *)yv12_align_addr(
- buf + (border * y_stride) + border, aom_byte_align);
- ybf->u_buffer = (uint8_t *)yv12_align_addr(
- buf + yplane_size + (uv_border_h * uv_stride) + uv_border_w,
- aom_byte_align);
- ybf->v_buffer =
- (uint8_t *)yv12_align_addr(buf + yplane_size + uvplane_size +
- (uv_border_h * uv_stride) + uv_border_w,
- aom_byte_align);
-
- ybf->use_external_reference_buffers = 0;
-
- if (use_highbitdepth) {
- if (ybf->y_buffer_8bit) aom_free(ybf->y_buffer_8bit);
- ybf->y_buffer_8bit = (uint8_t *)aom_memalign(32, (size_t)yplane_size);
- if (!ybf->y_buffer_8bit) return -1;
- } else {
- if (ybf->y_buffer_8bit) {
- aom_free(ybf->y_buffer_8bit);
- ybf->y_buffer_8bit = NULL;
- ybf->buf_8bit_valid = 0;
- }
- }
-
- ybf->corrupted = 0; /* assume not corrupted by errors */
- return 0;
- }
- return -2;
-}
-
-int aom_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height,
- int ss_x, int ss_y, int use_highbitdepth, int border,
- int byte_alignment) {
- if (ybf) {
- aom_free_frame_buffer(ybf);
- return aom_realloc_frame_buffer(ybf, width, height, ss_x, ss_y,
- use_highbitdepth, border, byte_alignment,
- NULL, NULL, NULL);
- }
- return -2;
-}
diff --git a/third_party/aom/aom_scale/generic/yv12extend.c b/third_party/aom/aom_scale/generic/yv12extend.c
deleted file mode 100644
index ba183520a..000000000
--- a/third_party/aom/aom_scale/generic/yv12extend.c
+++ /dev/null
@@ -1,411 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-
-#include "config/aom_config.h"
-#include "config/aom_scale_rtcd.h"
-
-#include "aom/aom_integer.h"
-#include "aom_mem/aom_mem.h"
-#include "aom_ports/mem.h"
-#include "aom_scale/yv12config.h"
-
-static void extend_plane(uint8_t *const src, int src_stride, int width,
- int height, int extend_top, int extend_left,
- int extend_bottom, int extend_right) {
- int i;
- const int linesize = extend_left + extend_right + width;
-
- /* copy the left and right most columns out */
- uint8_t *src_ptr1 = src;
- uint8_t *src_ptr2 = src + width - 1;
- uint8_t *dst_ptr1 = src - extend_left;
- uint8_t *dst_ptr2 = src + width;
-
- for (i = 0; i < height; ++i) {
- memset(dst_ptr1, src_ptr1[0], extend_left);
- memset(dst_ptr2, src_ptr2[0], extend_right);
- src_ptr1 += src_stride;
- src_ptr2 += src_stride;
- dst_ptr1 += src_stride;
- dst_ptr2 += src_stride;
- }
-
- /* Now copy the top and bottom lines into each line of the respective
- * borders
- */
- src_ptr1 = src - extend_left;
- src_ptr2 = src + src_stride * (height - 1) - extend_left;
- dst_ptr1 = src + src_stride * -extend_top - extend_left;
- dst_ptr2 = src + src_stride * height - extend_left;
-
- for (i = 0; i < extend_top; ++i) {
- memcpy(dst_ptr1, src_ptr1, linesize);
- dst_ptr1 += src_stride;
- }
-
- for (i = 0; i < extend_bottom; ++i) {
- memcpy(dst_ptr2, src_ptr2, linesize);
- dst_ptr2 += src_stride;
- }
-}
-
-static void extend_plane_high(uint8_t *const src8, int src_stride, int width,
- int height, int extend_top, int extend_left,
- int extend_bottom, int extend_right) {
- int i;
- const int linesize = extend_left + extend_right + width;
- uint16_t *src = CONVERT_TO_SHORTPTR(src8);
-
- /* copy the left and right most columns out */
- uint16_t *src_ptr1 = src;
- uint16_t *src_ptr2 = src + width - 1;
- uint16_t *dst_ptr1 = src - extend_left;
- uint16_t *dst_ptr2 = src + width;
-
- for (i = 0; i < height; ++i) {
- aom_memset16(dst_ptr1, src_ptr1[0], extend_left);
- aom_memset16(dst_ptr2, src_ptr2[0], extend_right);
- src_ptr1 += src_stride;
- src_ptr2 += src_stride;
- dst_ptr1 += src_stride;
- dst_ptr2 += src_stride;
- }
-
- /* Now copy the top and bottom lines into each line of the respective
- * borders
- */
- src_ptr1 = src - extend_left;
- src_ptr2 = src + src_stride * (height - 1) - extend_left;
- dst_ptr1 = src + src_stride * -extend_top - extend_left;
- dst_ptr2 = src + src_stride * height - extend_left;
-
- for (i = 0; i < extend_top; ++i) {
- memcpy(dst_ptr1, src_ptr1, linesize * sizeof(uint16_t));
- dst_ptr1 += src_stride;
- }
-
- for (i = 0; i < extend_bottom; ++i) {
- memcpy(dst_ptr2, src_ptr2, linesize * sizeof(uint16_t));
- dst_ptr2 += src_stride;
- }
-}
-
-void aom_yv12_extend_frame_borders_c(YV12_BUFFER_CONFIG *ybf,
- const int num_planes) {
- assert(ybf->border % 2 == 0);
- assert(ybf->y_height - ybf->y_crop_height < 16);
- assert(ybf->y_width - ybf->y_crop_width < 16);
- assert(ybf->y_height - ybf->y_crop_height >= 0);
- assert(ybf->y_width - ybf->y_crop_width >= 0);
-
- if (ybf->flags & YV12_FLAG_HIGHBITDEPTH) {
- for (int plane = 0; plane < num_planes; ++plane) {
- const int is_uv = plane > 0;
- const int plane_border = ybf->border >> is_uv;
- extend_plane_high(
- ybf->buffers[plane], ybf->strides[is_uv], ybf->crop_widths[is_uv],
- ybf->crop_heights[is_uv], plane_border, plane_border,
- plane_border + ybf->heights[is_uv] - ybf->crop_heights[is_uv],
- plane_border + ybf->widths[is_uv] - ybf->crop_widths[is_uv]);
- }
- return;
- }
- for (int plane = 0; plane < num_planes; ++plane) {
- const int is_uv = plane > 0;
- const int plane_border = ybf->border >> is_uv;
- extend_plane(ybf->buffers[plane], ybf->strides[is_uv],
- ybf->crop_widths[is_uv], ybf->crop_heights[is_uv],
- plane_border, plane_border,
- plane_border + ybf->heights[is_uv] - ybf->crop_heights[is_uv],
- plane_border + ybf->widths[is_uv] - ybf->crop_widths[is_uv]);
- }
-}
-
-static void extend_frame(YV12_BUFFER_CONFIG *const ybf, int ext_size,
- const int num_planes) {
- const int ss_x = ybf->uv_width < ybf->y_width;
- const int ss_y = ybf->uv_height < ybf->y_height;
-
- assert(ybf->y_height - ybf->y_crop_height < 16);
- assert(ybf->y_width - ybf->y_crop_width < 16);
- assert(ybf->y_height - ybf->y_crop_height >= 0);
- assert(ybf->y_width - ybf->y_crop_width >= 0);
-
- if (ybf->flags & YV12_FLAG_HIGHBITDEPTH) {
- for (int plane = 0; plane < num_planes; ++plane) {
- const int is_uv = plane > 0;
- const int top = ext_size >> (is_uv ? ss_y : 0);
- const int left = ext_size >> (is_uv ? ss_x : 0);
- const int bottom = top + ybf->heights[is_uv] - ybf->crop_heights[is_uv];
- const int right = left + ybf->widths[is_uv] - ybf->crop_widths[is_uv];
- extend_plane_high(ybf->buffers[plane], ybf->strides[is_uv],
- ybf->crop_widths[is_uv], ybf->crop_heights[is_uv], top,
- left, bottom, right);
- }
- return;
- }
- for (int plane = 0; plane < num_planes; ++plane) {
- const int is_uv = plane > 0;
- const int top = ext_size >> (is_uv ? ss_y : 0);
- const int left = ext_size >> (is_uv ? ss_x : 0);
- const int bottom = top + ybf->heights[is_uv] - ybf->crop_heights[is_uv];
- const int right = left + ybf->widths[is_uv] - ybf->crop_widths[is_uv];
- extend_plane(ybf->buffers[plane], ybf->strides[is_uv],
- ybf->crop_widths[is_uv], ybf->crop_heights[is_uv], top, left,
- bottom, right);
- }
-}
-
-void aom_extend_frame_borders_c(YV12_BUFFER_CONFIG *ybf, const int num_planes) {
- extend_frame(ybf, ybf->border, num_planes);
-}
-
-void aom_extend_frame_inner_borders_c(YV12_BUFFER_CONFIG *ybf,
- const int num_planes) {
- const int inner_bw = (ybf->border > AOMINNERBORDERINPIXELS)
- ? AOMINNERBORDERINPIXELS
- : ybf->border;
- extend_frame(ybf, inner_bw, num_planes);
-}
-
-void aom_extend_frame_borders_y_c(YV12_BUFFER_CONFIG *ybf) {
- int ext_size = ybf->border;
- assert(ybf->y_height - ybf->y_crop_height < 16);
- assert(ybf->y_width - ybf->y_crop_width < 16);
- assert(ybf->y_height - ybf->y_crop_height >= 0);
- assert(ybf->y_width - ybf->y_crop_width >= 0);
-
- if (ybf->flags & YV12_FLAG_HIGHBITDEPTH) {
- extend_plane_high(ybf->y_buffer, ybf->y_stride, ybf->y_crop_width,
- ybf->y_crop_height, ext_size, ext_size,
- ext_size + ybf->y_height - ybf->y_crop_height,
- ext_size + ybf->y_width - ybf->y_crop_width);
- return;
- }
- extend_plane(ybf->y_buffer, ybf->y_stride, ybf->y_crop_width,
- ybf->y_crop_height, ext_size, ext_size,
- ext_size + ybf->y_height - ybf->y_crop_height,
- ext_size + ybf->y_width - ybf->y_crop_width);
-}
-
-static void memcpy_short_addr(uint8_t *dst8, const uint8_t *src8, int num) {
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
- uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- memcpy(dst, src, num * sizeof(uint16_t));
-}
-
-// Copies the source image into the destination image and updates the
-// destination's UMV borders.
-// Note: The frames are assumed to be identical in size.
-void aom_yv12_copy_frame_c(const YV12_BUFFER_CONFIG *src_bc,
- YV12_BUFFER_CONFIG *dst_bc, const int num_planes) {
-#if 0
- /* These assertions are valid in the codec, but the libaom-tester uses
- * this code slightly differently.
- */
- assert(src_bc->y_width == dst_bc->y_width);
- assert(src_bc->y_height == dst_bc->y_height);
-#endif
-
- assert((src_bc->flags & YV12_FLAG_HIGHBITDEPTH) ==
- (dst_bc->flags & YV12_FLAG_HIGHBITDEPTH));
-
- if (src_bc->flags & YV12_FLAG_HIGHBITDEPTH) {
- for (int plane = 0; plane < num_planes; ++plane) {
- const uint8_t *plane_src = src_bc->buffers[plane];
- uint8_t *plane_dst = dst_bc->buffers[plane];
- const int is_uv = plane > 0;
-
- for (int row = 0; row < src_bc->heights[is_uv]; ++row) {
- memcpy_short_addr(plane_dst, plane_src, src_bc->widths[is_uv]);
- plane_src += src_bc->strides[is_uv];
- plane_dst += dst_bc->strides[is_uv];
- }
- }
- aom_yv12_extend_frame_borders_c(dst_bc, num_planes);
- return;
- }
- for (int plane = 0; plane < num_planes; ++plane) {
- const uint8_t *plane_src = src_bc->buffers[plane];
- uint8_t *plane_dst = dst_bc->buffers[plane];
- const int is_uv = plane > 0;
-
- for (int row = 0; row < src_bc->heights[is_uv]; ++row) {
- memcpy(plane_dst, plane_src, src_bc->widths[is_uv]);
- plane_src += src_bc->strides[is_uv];
- plane_dst += dst_bc->strides[is_uv];
- }
- }
- aom_yv12_extend_frame_borders_c(dst_bc, num_planes);
-}
-
-void aom_yv12_copy_y_c(const YV12_BUFFER_CONFIG *src_ybc,
- YV12_BUFFER_CONFIG *dst_ybc) {
- int row;
- const uint8_t *src = src_ybc->y_buffer;
- uint8_t *dst = dst_ybc->y_buffer;
-
- if (src_ybc->flags & YV12_FLAG_HIGHBITDEPTH) {
- const uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
- uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
- for (row = 0; row < src_ybc->y_height; ++row) {
- memcpy(dst16, src16, src_ybc->y_width * sizeof(uint16_t));
- src16 += src_ybc->y_stride;
- dst16 += dst_ybc->y_stride;
- }
- return;
- }
-
- for (row = 0; row < src_ybc->y_height; ++row) {
- memcpy(dst, src, src_ybc->y_width);
- src += src_ybc->y_stride;
- dst += dst_ybc->y_stride;
- }
-}
-
-void aom_yv12_copy_u_c(const YV12_BUFFER_CONFIG *src_bc,
- YV12_BUFFER_CONFIG *dst_bc) {
- int row;
- const uint8_t *src = src_bc->u_buffer;
- uint8_t *dst = dst_bc->u_buffer;
-
- if (src_bc->flags & YV12_FLAG_HIGHBITDEPTH) {
- const uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
- uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
- for (row = 0; row < src_bc->uv_height; ++row) {
- memcpy(dst16, src16, src_bc->uv_width * sizeof(uint16_t));
- src16 += src_bc->uv_stride;
- dst16 += dst_bc->uv_stride;
- }
- return;
- }
-
- for (row = 0; row < src_bc->uv_height; ++row) {
- memcpy(dst, src, src_bc->uv_width);
- src += src_bc->uv_stride;
- dst += dst_bc->uv_stride;
- }
-}
-
-void aom_yv12_copy_v_c(const YV12_BUFFER_CONFIG *src_bc,
- YV12_BUFFER_CONFIG *dst_bc) {
- int row;
- const uint8_t *src = src_bc->v_buffer;
- uint8_t *dst = dst_bc->v_buffer;
-
- if (src_bc->flags & YV12_FLAG_HIGHBITDEPTH) {
- const uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
- uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
- for (row = 0; row < src_bc->uv_height; ++row) {
- memcpy(dst16, src16, src_bc->uv_width * sizeof(uint16_t));
- src16 += src_bc->uv_stride;
- dst16 += dst_bc->uv_stride;
- }
- return;
- }
-
- for (row = 0; row < src_bc->uv_height; ++row) {
- memcpy(dst, src, src_bc->uv_width);
- src += src_bc->uv_stride;
- dst += dst_bc->uv_stride;
- }
-}
-
-void aom_yv12_partial_copy_y_c(const YV12_BUFFER_CONFIG *src_ybc,
- YV12_BUFFER_CONFIG *dst_ybc, int hstart,
- int hend, int vstart, int vend) {
- int row;
- const uint8_t *src = src_ybc->y_buffer;
- uint8_t *dst = dst_ybc->y_buffer;
-
- if (src_ybc->flags & YV12_FLAG_HIGHBITDEPTH) {
- const uint16_t *src16 =
- CONVERT_TO_SHORTPTR(src + vstart * src_ybc->y_stride + hstart);
- uint16_t *dst16 =
- CONVERT_TO_SHORTPTR(dst + vstart * dst_ybc->y_stride + hstart);
- for (row = vstart; row < vend; ++row) {
- memcpy(dst16, src16, (hend - hstart) * sizeof(uint16_t));
- src16 += src_ybc->y_stride;
- dst16 += dst_ybc->y_stride;
- }
- return;
- }
- src = (src + vstart * src_ybc->y_stride + hstart);
- dst = (dst + vstart * dst_ybc->y_stride + hstart);
-
- for (row = vstart; row < vend; ++row) {
- memcpy(dst, src, (hend - hstart));
- src += src_ybc->y_stride;
- dst += dst_ybc->y_stride;
- }
-}
-
-void aom_yv12_partial_copy_u_c(const YV12_BUFFER_CONFIG *src_bc,
- YV12_BUFFER_CONFIG *dst_bc, int hstart, int hend,
- int vstart, int vend) {
- int row;
- const uint8_t *src = src_bc->u_buffer;
- uint8_t *dst = dst_bc->u_buffer;
-
- if (src_bc->flags & YV12_FLAG_HIGHBITDEPTH) {
- const uint16_t *src16 =
- CONVERT_TO_SHORTPTR(src + vstart * src_bc->uv_stride + hstart);
- uint16_t *dst16 =
- CONVERT_TO_SHORTPTR(dst + vstart * dst_bc->uv_stride + hstart);
- for (row = vstart; row < vend; ++row) {
- memcpy(dst16, src16, (hend - hstart) * sizeof(uint16_t));
- src16 += src_bc->uv_stride;
- dst16 += dst_bc->uv_stride;
- }
- return;
- }
-
- src = (src + vstart * src_bc->uv_stride + hstart);
- dst = (dst + vstart * dst_bc->uv_stride + hstart);
-
- for (row = vstart; row < vend; ++row) {
- memcpy(dst, src, (hend - hstart));
- src += src_bc->uv_stride;
- dst += dst_bc->uv_stride;
- }
-}
-
-void aom_yv12_partial_copy_v_c(const YV12_BUFFER_CONFIG *src_bc,
- YV12_BUFFER_CONFIG *dst_bc, int hstart, int hend,
- int vstart, int vend) {
- int row;
- const uint8_t *src = src_bc->v_buffer;
- uint8_t *dst = dst_bc->v_buffer;
-
- if (src_bc->flags & YV12_FLAG_HIGHBITDEPTH) {
- const uint16_t *src16 =
- CONVERT_TO_SHORTPTR(src + vstart * src_bc->uv_stride + hstart);
- uint16_t *dst16 =
- CONVERT_TO_SHORTPTR(dst + vstart * dst_bc->uv_stride + hstart);
- for (row = vstart; row < vend; ++row) {
- memcpy(dst16, src16, (hend - hstart) * sizeof(uint16_t));
- src16 += src_bc->uv_stride;
- dst16 += dst_bc->uv_stride;
- }
- return;
- }
-
- src = (src + vstart * src_bc->uv_stride + hstart);
- dst = (dst + vstart * dst_bc->uv_stride + hstart);
-
- for (row = vstart; row < vend; ++row) {
- memcpy(dst, src, (hend - hstart));
- src += src_bc->uv_stride;
- dst += dst_bc->uv_stride;
- }
-}
diff --git a/third_party/aom/aom_scale/mips/dspr2/yv12extend_dspr2.c b/third_party/aom/aom_scale/mips/dspr2/yv12extend_dspr2.c
deleted file mode 100644
index 869e594d7..000000000
--- a/third_party/aom/aom_scale/mips/dspr2/yv12extend_dspr2.c
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-
-#include "config/aom_config.h"
-
-#include "aom_scale/yv12config.h"
-#include "aom_mem/aom_mem.h"
-#include "aom_scale/aom_scale.h"
-
-#if HAVE_DSPR2
-static void extend_plane(uint8_t *const src, int src_stride, int width,
- int height, int extend_top, int extend_left,
- int extend_bottom, int extend_right) {
- int i, j;
- uint8_t *left_src, *right_src;
- uint8_t *left_dst_start, *right_dst_start;
- uint8_t *left_dst, *right_dst;
- uint8_t *top_src, *bot_src;
- uint8_t *top_dst, *bot_dst;
- uint32_t left_pix;
- uint32_t right_pix;
- uint32_t linesize;
-
- /* copy the left and right most columns out */
- left_src = src;
- right_src = src + width - 1;
- left_dst_start = src - extend_left;
- right_dst_start = src + width;
-
- for (i = height; i--;) {
- left_dst = left_dst_start;
- right_dst = right_dst_start;
-
- __asm__ __volatile__(
- "lb %[left_pix], 0(%[left_src]) \n\t"
- "lb %[right_pix], 0(%[right_src]) \n\t"
- "replv.qb %[left_pix], %[left_pix] \n\t"
- "replv.qb %[right_pix], %[right_pix] \n\t"
-
- : [left_pix] "=&r"(left_pix), [right_pix] "=&r"(right_pix)
- : [left_src] "r"(left_src), [right_src] "r"(right_src));
-
- for (j = extend_left / 4; j--;) {
- __asm__ __volatile__(
- "sw %[left_pix], 0(%[left_dst]) \n\t"
- "sw %[right_pix], 0(%[right_dst]) \n\t"
-
- :
- : [left_dst] "r"(left_dst), [left_pix] "r"(left_pix),
- [right_dst] "r"(right_dst), [right_pix] "r"(right_pix));
-
- left_dst += 4;
- right_dst += 4;
- }
-
- for (j = extend_left % 4; j--;) {
- __asm__ __volatile__(
- "sb %[left_pix], 0(%[left_dst]) \n\t"
- "sb %[right_pix], 0(%[right_dst]) \n\t"
-
- :
- : [left_dst] "r"(left_dst), [left_pix] "r"(left_pix),
- [right_dst] "r"(right_dst), [right_pix] "r"(right_pix));
-
- left_dst += 1;
- right_dst += 1;
- }
-
- left_src += src_stride;
- right_src += src_stride;
- left_dst_start += src_stride;
- right_dst_start += src_stride;
- }
-
- /* Now copy the top and bottom lines into each line of the respective
- * borders
- */
- top_src = src - extend_left;
- bot_src = src + src_stride * (height - 1) - extend_left;
- top_dst = src + src_stride * (-extend_top) - extend_left;
- bot_dst = src + src_stride * (height)-extend_left;
- linesize = extend_left + extend_right + width;
-
- for (i = 0; i < extend_top; i++) {
- memcpy(top_dst, top_src, linesize);
- top_dst += src_stride;
- }
-
- for (i = 0; i < extend_bottom; i++) {
- memcpy(bot_dst, bot_src, linesize);
- bot_dst += src_stride;
- }
-}
-
-static void extend_frame(YV12_BUFFER_CONFIG *const ybf, int ext_size) {
- const int c_w = ybf->uv_crop_width;
- const int c_h = ybf->uv_crop_height;
- const int ss_x = ybf->uv_width < ybf->y_width;
- const int ss_y = ybf->uv_height < ybf->y_height;
- const int c_et = ext_size >> ss_y;
- const int c_el = ext_size >> ss_x;
- const int c_eb = c_et + ybf->uv_height - ybf->uv_crop_height;
- const int c_er = c_el + ybf->uv_width - ybf->uv_crop_width;
-
- assert(ybf->y_height - ybf->y_crop_height < 16);
- assert(ybf->y_width - ybf->y_crop_width < 16);
- assert(ybf->y_height - ybf->y_crop_height >= 0);
- assert(ybf->y_width - ybf->y_crop_width >= 0);
-
- extend_plane(ybf->y_buffer, ybf->y_stride, ybf->y_crop_width,
- ybf->y_crop_height, ext_size, ext_size,
- ext_size + ybf->y_height - ybf->y_crop_height,
- ext_size + ybf->y_width - ybf->y_crop_width);
-
- extend_plane(ybf->u_buffer, ybf->uv_stride, c_w, c_h, c_et, c_el, c_eb, c_er);
-
- extend_plane(ybf->v_buffer, ybf->uv_stride, c_w, c_h, c_et, c_el, c_eb, c_er);
-}
-
-void aom_extend_frame_borders_dspr2(YV12_BUFFER_CONFIG *ybf,
- const int num_planes) {
- extend_frame(ybf, ybf->border, num_planes);
-}
-
-void aom_extend_frame_inner_borders_dspr2(YV12_BUFFER_CONFIG *ybf,
- const int num_planes) {
- const int inner_bw = (ybf->border > AOMINNERBORDERINPIXELS)
- ? AOMINNERBORDERINPIXELS
- : ybf->border;
- extend_frame(ybf, inner_bw, num_planes);
-}
-#endif
diff --git a/third_party/aom/aom_scale/yv12config.h b/third_party/aom/aom_scale/yv12config.h
deleted file mode 100644
index 2fb81acd7..000000000
--- a/third_party/aom/aom_scale/yv12config.h
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_SCALE_YV12CONFIG_H_
-#define AOM_AOM_SCALE_YV12CONFIG_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "config/aom_config.h"
-
-#include "aom/aom_codec.h"
-#include "aom/aom_frame_buffer.h"
-#include "aom/aom_integer.h"
-
-#define AOMINNERBORDERINPIXELS 160
-#define AOM_INTERP_EXTEND 4
-
-// TODO(jingning): Use unified inter predictor for encoder and
-// decoder during the development process. Revisit the frame border
-// to improve the decoder performance.
-#if CONFIG_REDUCED_ENCODER_BORDER
-#define AOM_BORDER_IN_PIXELS 160
-#else
-#define AOM_BORDER_IN_PIXELS 288
-#endif // CONFIG_REDUCED_ENCODER_BORDER
-
-typedef struct yv12_buffer_config {
- union {
- struct {
- int y_width;
- int uv_width;
- int alpha_width;
- };
- int widths[3];
- };
- union {
- struct {
- int y_height;
- int uv_height;
- int alpha_height;
- };
- int heights[3];
- };
- union {
- struct {
- int y_crop_width;
- int uv_crop_width;
- };
- int crop_widths[2];
- };
- union {
- struct {
- int y_crop_height;
- int uv_crop_height;
- };
- int crop_heights[2];
- };
- union {
- struct {
- int y_stride;
- int uv_stride;
- int alpha_stride;
- };
- int strides[3];
- };
- union {
- struct {
- uint8_t *y_buffer;
- uint8_t *u_buffer;
- uint8_t *v_buffer;
- uint8_t *alpha_buffer;
- };
- uint8_t *buffers[4];
- };
-
- // Indicate whether y_buffer, u_buffer, and v_buffer points to the internally
- // allocated memory or external buffers.
- int use_external_reference_buffers;
- // This is needed to store y_buffer, u_buffer, and v_buffer when set reference
- // uses an external refernece, and restore those buffer pointers after the
- // external reference frame is no longer used.
- uint8_t *store_buf_adr[3];
-
- // If the frame is stored in a 16-bit buffer, this stores an 8-bit version
- // for use in global motion detection. It is allocated on-demand.
- uint8_t *y_buffer_8bit;
- int buf_8bit_valid;
-
- uint8_t *buffer_alloc;
- size_t buffer_alloc_sz;
- int border;
- size_t frame_size;
- int subsampling_x;
- int subsampling_y;
- unsigned int bit_depth;
- aom_color_primaries_t color_primaries;
- aom_transfer_characteristics_t transfer_characteristics;
- aom_matrix_coefficients_t matrix_coefficients;
- int monochrome;
- aom_chroma_sample_position_t chroma_sample_position;
- aom_color_range_t color_range;
- int render_width;
- int render_height;
-
- int corrupted;
- int flags;
-} YV12_BUFFER_CONFIG;
-
-#define YV12_FLAG_HIGHBITDEPTH 8
-
-int aom_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height,
- int ss_x, int ss_y, int use_highbitdepth, int border,
- int byte_alignment);
-
-// Updates the yv12 buffer config with the frame buffer. |byte_alignment| must
-// be a power of 2, from 32 to 1024. 0 sets legacy alignment. If cb is not
-// NULL, then libaom is using the frame buffer callbacks to handle memory.
-// If cb is not NULL, libaom will call cb with minimum size in bytes needed
-// to decode the current frame. If cb is NULL, libaom will allocate memory
-// internally to decode the current frame. Returns 0 on success. Returns < 0
-// on failure.
-int aom_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height,
- int ss_x, int ss_y, int use_highbitdepth,
- int border, int byte_alignment,
- aom_codec_frame_buffer_t *fb,
- aom_get_frame_buffer_cb_fn_t cb, void *cb_priv);
-int aom_free_frame_buffer(YV12_BUFFER_CONFIG *ybf);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // AOM_AOM_SCALE_YV12CONFIG_H_
diff --git a/third_party/aom/aom_util/aom_thread.c b/third_party/aom/aom_util/aom_thread.c
deleted file mode 100644
index cae9f5e25..000000000
--- a/third_party/aom/aom_util/aom_thread.c
+++ /dev/null
@@ -1,184 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-//
-// Multi-threaded worker
-//
-// Original source:
-// https://chromium.googlesource.com/webm/libwebp
-
-#include <assert.h>
-#include <string.h> // for memset()
-
-#include "aom_mem/aom_mem.h"
-#include "aom_util/aom_thread.h"
-
-#if CONFIG_MULTITHREAD
-
-struct AVxWorkerImpl {
- pthread_mutex_t mutex_;
- pthread_cond_t condition_;
- pthread_t thread_;
-};
-
-//------------------------------------------------------------------------------
-
-static void execute(AVxWorker *const worker); // Forward declaration.
-
-static THREADFN thread_loop(void *ptr) {
- AVxWorker *const worker = (AVxWorker *)ptr;
- int done = 0;
- while (!done) {
- pthread_mutex_lock(&worker->impl_->mutex_);
- while (worker->status_ == OK) { // wait in idling mode
- pthread_cond_wait(&worker->impl_->condition_, &worker->impl_->mutex_);
- }
- if (worker->status_ == WORK) {
- execute(worker);
- worker->status_ = OK;
- } else if (worker->status_ == NOT_OK) { // finish the worker
- done = 1;
- }
- // signal to the main thread that we're done (for sync())
- pthread_cond_signal(&worker->impl_->condition_);
- pthread_mutex_unlock(&worker->impl_->mutex_);
- }
- return THREAD_RETURN(NULL); // Thread is finished
-}
-
-// main thread state control
-static void change_state(AVxWorker *const worker, AVxWorkerStatus new_status) {
- // No-op when attempting to change state on a thread that didn't come up.
- // Checking status_ without acquiring the lock first would result in a data
- // race.
- if (worker->impl_ == NULL) return;
-
- pthread_mutex_lock(&worker->impl_->mutex_);
- if (worker->status_ >= OK) {
- // wait for the worker to finish
- while (worker->status_ != OK) {
- pthread_cond_wait(&worker->impl_->condition_, &worker->impl_->mutex_);
- }
- // assign new status and release the working thread if needed
- if (new_status != OK) {
- worker->status_ = new_status;
- pthread_cond_signal(&worker->impl_->condition_);
- }
- }
- pthread_mutex_unlock(&worker->impl_->mutex_);
-}
-
-#endif // CONFIG_MULTITHREAD
-
-//------------------------------------------------------------------------------
-
-static void init(AVxWorker *const worker) {
- memset(worker, 0, sizeof(*worker));
- worker->status_ = NOT_OK;
-}
-
-static int sync(AVxWorker *const worker) {
-#if CONFIG_MULTITHREAD
- change_state(worker, OK);
-#endif
- assert(worker->status_ <= OK);
- return !worker->had_error;
-}
-
-static int reset(AVxWorker *const worker) {
- int ok = 1;
- worker->had_error = 0;
- if (worker->status_ < OK) {
-#if CONFIG_MULTITHREAD
- worker->impl_ = (AVxWorkerImpl *)aom_calloc(1, sizeof(*worker->impl_));
- if (worker->impl_ == NULL) {
- return 0;
- }
- if (pthread_mutex_init(&worker->impl_->mutex_, NULL)) {
- goto Error;
- }
- if (pthread_cond_init(&worker->impl_->condition_, NULL)) {
- pthread_mutex_destroy(&worker->impl_->mutex_);
- goto Error;
- }
- pthread_mutex_lock(&worker->impl_->mutex_);
- ok = !pthread_create(&worker->impl_->thread_, NULL, thread_loop, worker);
- if (ok) worker->status_ = OK;
- pthread_mutex_unlock(&worker->impl_->mutex_);
- if (!ok) {
- pthread_mutex_destroy(&worker->impl_->mutex_);
- pthread_cond_destroy(&worker->impl_->condition_);
- Error:
- aom_free(worker->impl_);
- worker->impl_ = NULL;
- return 0;
- }
-#else
- worker->status_ = OK;
-#endif
- } else if (worker->status_ > OK) {
- ok = sync(worker);
- }
- assert(!ok || (worker->status_ == OK));
- return ok;
-}
-
-static void execute(AVxWorker *const worker) {
- if (worker->hook != NULL) {
- worker->had_error |= !worker->hook(worker->data1, worker->data2);
- }
-}
-
-static void launch(AVxWorker *const worker) {
-#if CONFIG_MULTITHREAD
- change_state(worker, WORK);
-#else
- execute(worker);
-#endif
-}
-
-static void end(AVxWorker *const worker) {
-#if CONFIG_MULTITHREAD
- if (worker->impl_ != NULL) {
- change_state(worker, NOT_OK);
- pthread_join(worker->impl_->thread_, NULL);
- pthread_mutex_destroy(&worker->impl_->mutex_);
- pthread_cond_destroy(&worker->impl_->condition_);
- aom_free(worker->impl_);
- worker->impl_ = NULL;
- }
-#else
- worker->status_ = NOT_OK;
- assert(worker->impl_ == NULL);
-#endif
- assert(worker->status_ == NOT_OK);
-}
-
-//------------------------------------------------------------------------------
-
-static AVxWorkerInterface g_worker_interface = { init, reset, sync,
- launch, execute, end };
-
-int aom_set_worker_interface(const AVxWorkerInterface *const winterface) {
- if (winterface == NULL || winterface->init == NULL ||
- winterface->reset == NULL || winterface->sync == NULL ||
- winterface->launch == NULL || winterface->execute == NULL ||
- winterface->end == NULL) {
- return 0;
- }
- g_worker_interface = *winterface;
- return 1;
-}
-
-const AVxWorkerInterface *aom_get_worker_interface(void) {
- return &g_worker_interface;
-}
-
-//------------------------------------------------------------------------------
diff --git a/third_party/aom/aom_util/aom_thread.h b/third_party/aom/aom_util/aom_thread.h
deleted file mode 100644
index f14c1ac18..000000000
--- a/third_party/aom/aom_util/aom_thread.h
+++ /dev/null
@@ -1,430 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-//
-// Multi-threaded worker
-//
-// Original source:
-// https://chromium.googlesource.com/webm/libwebp
-
-#ifndef AOM_AOM_UTIL_AOM_THREAD_H_
-#define AOM_AOM_UTIL_AOM_THREAD_H_
-
-#include "config/aom_config.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// Set maximum decode threads to be 8 due to the limit of frame buffers
-// and not enough semaphores in the emulation layer on windows.
-#define MAX_DECODE_THREADS 8
-
-#if CONFIG_MULTITHREAD
-
-#if defined(_WIN32) && !HAVE_PTHREAD_H
-#include <errno.h> // NOLINT
-#include <process.h> // NOLINT
-#include <windows.h> // NOLINT
-typedef HANDLE pthread_t;
-typedef CRITICAL_SECTION pthread_mutex_t;
-
-#if _WIN32_WINNT >= 0x0600 // Windows Vista / Server 2008 or greater
-#define USE_WINDOWS_CONDITION_VARIABLE
-typedef CONDITION_VARIABLE pthread_cond_t;
-#else
-typedef struct {
- HANDLE waiting_sem_;
- HANDLE received_sem_;
- HANDLE signal_event_;
-} pthread_cond_t;
-#endif // _WIN32_WINNT >= 0x600
-
-#ifndef WINAPI_FAMILY_PARTITION
-#define WINAPI_PARTITION_DESKTOP 1
-#define WINAPI_FAMILY_PARTITION(x) x
-#endif
-
-#if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
-#define USE_CREATE_THREAD
-#endif
-
-//------------------------------------------------------------------------------
-// simplistic pthread emulation layer
-
-// _beginthreadex requires __stdcall
-#define THREADFN unsigned int __stdcall
-#define THREAD_RETURN(val) (unsigned int)((DWORD_PTR)val)
-
-#if _WIN32_WINNT >= 0x0501 // Windows XP or greater
-#define WaitForSingleObject(obj, timeout) \
- WaitForSingleObjectEx(obj, timeout, FALSE /*bAlertable*/)
-#endif
-
-static INLINE int pthread_create(pthread_t *const thread, const void *attr,
- unsigned int(__stdcall *start)(void *),
- void *arg) {
- (void)attr;
-#ifdef USE_CREATE_THREAD
- *thread = CreateThread(NULL, /* lpThreadAttributes */
- 0, /* dwStackSize */
- start, arg, 0, /* dwStackSize */
- NULL); /* lpThreadId */
-#else
- *thread = (pthread_t)_beginthreadex(NULL, /* void *security */
- 0, /* unsigned stack_size */
- start, arg, 0, /* unsigned initflag */
- NULL); /* unsigned *thrdaddr */
-#endif
- if (*thread == NULL) return 1;
- SetThreadPriority(*thread, THREAD_PRIORITY_ABOVE_NORMAL);
- return 0;
-}
-
-static INLINE int pthread_join(pthread_t thread, void **value_ptr) {
- (void)value_ptr;
- return (WaitForSingleObject(thread, INFINITE) != WAIT_OBJECT_0 ||
- CloseHandle(thread) == 0);
-}
-
-// Mutex
-static INLINE int pthread_mutex_init(pthread_mutex_t *const mutex,
- void *mutexattr) {
- (void)mutexattr;
-#if _WIN32_WINNT >= 0x0600 // Windows Vista / Server 2008 or greater
- InitializeCriticalSectionEx(mutex, 0 /*dwSpinCount*/, 0 /*Flags*/);
-#else
- InitializeCriticalSection(mutex);
-#endif
- return 0;
-}
-
-static INLINE int pthread_mutex_trylock(pthread_mutex_t *const mutex) {
- return TryEnterCriticalSection(mutex) ? 0 : EBUSY;
-}
-
-static INLINE int pthread_mutex_lock(pthread_mutex_t *const mutex) {
- EnterCriticalSection(mutex);
- return 0;
-}
-
-static INLINE int pthread_mutex_unlock(pthread_mutex_t *const mutex) {
- LeaveCriticalSection(mutex);
- return 0;
-}
-
-static INLINE int pthread_mutex_destroy(pthread_mutex_t *const mutex) {
- DeleteCriticalSection(mutex);
- return 0;
-}
-
-// Condition
-static INLINE int pthread_cond_destroy(pthread_cond_t *const condition) {
- int ok = 1;
-#ifdef USE_WINDOWS_CONDITION_VARIABLE
- (void)condition;
-#else
- ok &= (CloseHandle(condition->waiting_sem_) != 0);
- ok &= (CloseHandle(condition->received_sem_) != 0);
- ok &= (CloseHandle(condition->signal_event_) != 0);
-#endif
- return !ok;
-}
-
-static INLINE int pthread_cond_init(pthread_cond_t *const condition,
- void *cond_attr) {
- (void)cond_attr;
-#ifdef USE_WINDOWS_CONDITION_VARIABLE
- InitializeConditionVariable(condition);
-#else
- condition->waiting_sem_ = CreateSemaphore(NULL, 0, MAX_DECODE_THREADS, NULL);
- condition->received_sem_ = CreateSemaphore(NULL, 0, MAX_DECODE_THREADS, NULL);
- condition->signal_event_ = CreateEvent(NULL, FALSE, FALSE, NULL);
- if (condition->waiting_sem_ == NULL || condition->received_sem_ == NULL ||
- condition->signal_event_ == NULL) {
- pthread_cond_destroy(condition);
- return 1;
- }
-#endif
- return 0;
-}
-
-static INLINE int pthread_cond_signal(pthread_cond_t *const condition) {
- int ok = 1;
-#ifdef USE_WINDOWS_CONDITION_VARIABLE
- WakeConditionVariable(condition);
-#else
- if (WaitForSingleObject(condition->waiting_sem_, 0) == WAIT_OBJECT_0) {
- // a thread is waiting in pthread_cond_wait: allow it to be notified
- ok = SetEvent(condition->signal_event_);
- // wait until the event is consumed so the signaler cannot consume
- // the event via its own pthread_cond_wait.
- ok &= (WaitForSingleObject(condition->received_sem_, INFINITE) !=
- WAIT_OBJECT_0);
- }
-#endif
- return !ok;
-}
-
-static INLINE int pthread_cond_broadcast(pthread_cond_t *const condition) {
- int ok = 1;
-#ifdef USE_WINDOWS_CONDITION_VARIABLE
- WakeAllConditionVariable(condition);
-#else
- while (WaitForSingleObject(condition->waiting_sem_, 0) == WAIT_OBJECT_0) {
- // a thread is waiting in pthread_cond_wait: allow it to be notified
- ok &= SetEvent(condition->signal_event_);
- // wait until the event is consumed so the signaler cannot consume
- // the event via its own pthread_cond_wait.
- ok &= (WaitForSingleObject(condition->received_sem_, INFINITE) !=
- WAIT_OBJECT_0);
- }
-#endif
- return !ok;
-}
-
-static INLINE int pthread_cond_wait(pthread_cond_t *const condition,
- pthread_mutex_t *const mutex) {
- int ok;
-#ifdef USE_WINDOWS_CONDITION_VARIABLE
- ok = SleepConditionVariableCS(condition, mutex, INFINITE);
-#else
- // note that there is a consumer available so the signal isn't dropped in
- // pthread_cond_signal
- if (!ReleaseSemaphore(condition->waiting_sem_, 1, NULL)) return 1;
- // now unlock the mutex so pthread_cond_signal may be issued
- pthread_mutex_unlock(mutex);
- ok = (WaitForSingleObject(condition->signal_event_, INFINITE) ==
- WAIT_OBJECT_0);
- ok &= ReleaseSemaphore(condition->received_sem_, 1, NULL);
- pthread_mutex_lock(mutex);
-#endif
- return !ok;
-}
-#elif defined(__OS2__)
-#define INCL_DOS
-#include <os2.h> // NOLINT
-
-#include <errno.h> // NOLINT
-#include <stdlib.h> // NOLINT
-#include <sys/builtin.h> // NOLINT
-
-#define pthread_t TID
-#define pthread_mutex_t HMTX
-
-typedef struct {
- HEV event_sem_;
- HEV ack_sem_;
- volatile unsigned wait_count_;
-} pthread_cond_t;
-
-//------------------------------------------------------------------------------
-// simplistic pthread emulation layer
-
-#define THREADFN void *
-#define THREAD_RETURN(val) (val)
-
-typedef struct {
- void *(*start_)(void *);
- void *arg_;
-} thread_arg;
-
-static void thread_start(void *arg) {
- thread_arg targ = *(thread_arg *)arg;
- free(arg);
-
- targ.start_(targ.arg_);
-}
-
-static INLINE int pthread_create(pthread_t *const thread, const void *attr,
- void *(*start)(void *), void *arg) {
- int tid;
- thread_arg *targ = (thread_arg *)malloc(sizeof(*targ));
- if (targ == NULL) return 1;
-
- (void)attr;
-
- targ->start_ = start;
- targ->arg_ = arg;
- tid = (pthread_t)_beginthread(thread_start, NULL, 1024 * 1024, targ);
- if (tid == -1) {
- free(targ);
- return 1;
- }
-
- *thread = tid;
- return 0;
-}
-
-static INLINE int pthread_join(pthread_t thread, void **value_ptr) {
- (void)value_ptr;
- return DosWaitThread(&thread, DCWW_WAIT) != 0;
-}
-
-// Mutex
-static INLINE int pthread_mutex_init(pthread_mutex_t *const mutex,
- void *mutexattr) {
- (void)mutexattr;
- return DosCreateMutexSem(NULL, mutex, 0, FALSE) != 0;
-}
-
-static INLINE int pthread_mutex_trylock(pthread_mutex_t *const mutex) {
- return DosRequestMutexSem(*mutex, SEM_IMMEDIATE_RETURN) == 0 ? 0 : EBUSY;
-}
-
-static INLINE int pthread_mutex_lock(pthread_mutex_t *const mutex) {
- return DosRequestMutexSem(*mutex, SEM_INDEFINITE_WAIT) != 0;
-}
-
-static INLINE int pthread_mutex_unlock(pthread_mutex_t *const mutex) {
- return DosReleaseMutexSem(*mutex) != 0;
-}
-
-static INLINE int pthread_mutex_destroy(pthread_mutex_t *const mutex) {
- return DosCloseMutexSem(*mutex) != 0;
-}
-
-// Condition
-static INLINE int pthread_cond_destroy(pthread_cond_t *const condition) {
- int ok = 1;
- ok &= DosCloseEventSem(condition->event_sem_) == 0;
- ok &= DosCloseEventSem(condition->ack_sem_) == 0;
- return !ok;
-}
-
-static INLINE int pthread_cond_init(pthread_cond_t *const condition,
- void *cond_attr) {
- int ok = 1;
- (void)cond_attr;
-
- ok &=
- DosCreateEventSem(NULL, &condition->event_sem_, DCE_POSTONE, FALSE) == 0;
- ok &= DosCreateEventSem(NULL, &condition->ack_sem_, DCE_POSTONE, FALSE) == 0;
- if (!ok) {
- pthread_cond_destroy(condition);
- return 1;
- }
- condition->wait_count_ = 0;
- return 0;
-}
-
-static INLINE int pthread_cond_signal(pthread_cond_t *const condition) {
- int ok = 1;
-
- if (!__atomic_cmpxchg32(&condition->wait_count_, 0, 0)) {
- ok &= DosPostEventSem(condition->event_sem_) == 0;
- ok &= DosWaitEventSem(condition->ack_sem_, SEM_INDEFINITE_WAIT) == 0;
- }
-
- return !ok;
-}
-
-static INLINE int pthread_cond_broadcast(pthread_cond_t *const condition) {
- int ok = 1;
-
- while (!__atomic_cmpxchg32(&condition->wait_count_, 0, 0))
- ok &= pthread_cond_signal(condition) == 0;
-
- return !ok;
-}
-
-static INLINE int pthread_cond_wait(pthread_cond_t *const condition,
- pthread_mutex_t *const mutex) {
- int ok = 1;
-
- __atomic_increment(&condition->wait_count_);
-
- ok &= pthread_mutex_unlock(mutex) == 0;
-
- ok &= DosWaitEventSem(condition->event_sem_, SEM_INDEFINITE_WAIT) == 0;
-
- __atomic_decrement(&condition->wait_count_);
-
- ok &= DosPostEventSem(condition->ack_sem_) == 0;
-
- pthread_mutex_lock(mutex);
-
- return !ok;
-}
-#else // _WIN32
-#include <pthread.h> // NOLINT
-#define THREADFN void *
-#define THREAD_RETURN(val) val
-#endif
-
-#endif // CONFIG_MULTITHREAD
-
-// State of the worker thread object
-typedef enum {
- NOT_OK = 0, // object is unusable
- OK, // ready to work
- WORK // busy finishing the current task
-} AVxWorkerStatus;
-
-// Function to be called by the worker thread. Takes two opaque pointers as
-// arguments (data1 and data2). Should return true on success and return false
-// in case of error.
-typedef int (*AVxWorkerHook)(void *, void *);
-
-// Platform-dependent implementation details for the worker.
-typedef struct AVxWorkerImpl AVxWorkerImpl;
-
-// Synchronization object used to launch job in the worker thread
-typedef struct {
- AVxWorkerImpl *impl_;
- AVxWorkerStatus status_;
- AVxWorkerHook hook; // hook to call
- void *data1; // first argument passed to 'hook'
- void *data2; // second argument passed to 'hook'
- int had_error; // true if a call to 'hook' returned false
-} AVxWorker;
-
-// The interface for all thread-worker related functions. All these functions
-// must be implemented.
-typedef struct {
- // Must be called first, before any other method.
- void (*init)(AVxWorker *const worker);
- // Must be called to initialize the object and spawn the thread. Re-entrant.
- // Will potentially launch the thread. Returns false in case of error.
- int (*reset)(AVxWorker *const worker);
- // Makes sure the previous work is finished. Returns true if worker->had_error
- // was not set and no error condition was triggered by the working thread.
- int (*sync)(AVxWorker *const worker);
- // Triggers the thread to call hook() with data1 and data2 arguments. These
- // hook/data1/data2 values can be changed at any time before calling this
- // function, but not be changed afterward until the next call to Sync().
- void (*launch)(AVxWorker *const worker);
- // This function is similar to launch() except that it calls the
- // hook directly instead of using a thread. Convenient to bypass the thread
- // mechanism while still using the AVxWorker structs. sync() must
- // still be called afterward (for error reporting).
- void (*execute)(AVxWorker *const worker);
- // Kill the thread and terminate the object. To use the object again, one
- // must call reset() again.
- void (*end)(AVxWorker *const worker);
-} AVxWorkerInterface;
-
-// Install a new set of threading functions, overriding the defaults. This
-// should be done before any workers are started, i.e., before any encoding or
-// decoding takes place. The contents of the interface struct are copied, it
-// is safe to free the corresponding memory after this call. This function is
-// not thread-safe. Return false in case of invalid pointer or methods.
-int aom_set_worker_interface(const AVxWorkerInterface *const winterface);
-
-// Retrieve the currently set thread worker interface.
-const AVxWorkerInterface *aom_get_worker_interface(void);
-
-//------------------------------------------------------------------------------
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AOM_UTIL_AOM_THREAD_H_
diff --git a/third_party/aom/aom_util/aom_util.cmake b/third_party/aom/aom_util/aom_util.cmake
deleted file mode 100644
index d4f3bce74..000000000
--- a/third_party/aom/aom_util/aom_util.cmake
+++ /dev/null
@@ -1,28 +0,0 @@
-#
-# Copyright (c) 2017, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and the
-# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
-# not distributed with this source code in the LICENSE file, you can obtain it
-# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
-# License 1.0 was not distributed with this source code in the PATENTS file, you
-# can obtain it at www.aomedia.org/license/patent.
-#
-if(AOM_AOM_UTIL_AOM_UTIL_CMAKE_)
- return()
-endif() # AOM_AOM_UTIL_AOM_UTIL_CMAKE_
-set(AOM_AOM_UTIL_AOM_UTIL_CMAKE_ 1)
-
-list(APPEND AOM_UTIL_SOURCES "${AOM_ROOT}/aom_util/aom_thread.c"
- "${AOM_ROOT}/aom_util/aom_thread.h"
- "${AOM_ROOT}/aom_util/endian_inl.h"
- "${AOM_ROOT}/aom_util/debug_util.c"
- "${AOM_ROOT}/aom_util/debug_util.h")
-
-# Creates the aom_util build target and makes libaom depend on it. The libaom
-# target must exist before this function is called.
-function(setup_aom_util_targets)
- add_library(aom_util OBJECT ${AOM_UTIL_SOURCES})
- set(AOM_LIB_TARGETS ${AOM_LIB_TARGETS} aom_util PARENT_SCOPE)
- target_sources(aom PRIVATE $<TARGET_OBJECTS:aom_util>)
-endfunction()
diff --git a/third_party/aom/aom_util/debug_util.c b/third_party/aom/aom_util/debug_util.c
deleted file mode 100644
index 468c47ed1..000000000
--- a/third_party/aom/aom_util/debug_util.c
+++ /dev/null
@@ -1,275 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <stdio.h>
-#include <string.h>
-#include "aom_util/debug_util.h"
-
-static int frame_idx_w = 0;
-
-static int frame_idx_r = 0;
-
-void bitstream_queue_set_frame_write(int frame_idx) { frame_idx_w = frame_idx; }
-
-int bitstream_queue_get_frame_write(void) { return frame_idx_w; }
-
-void bitstream_queue_set_frame_read(int frame_idx) { frame_idx_r = frame_idx; }
-
-int bitstream_queue_get_frame_read(void) { return frame_idx_r; }
-
-#if CONFIG_BITSTREAM_DEBUG
-#define QUEUE_MAX_SIZE 2000000
-static int result_queue[QUEUE_MAX_SIZE];
-static int nsymbs_queue[QUEUE_MAX_SIZE];
-static aom_cdf_prob cdf_queue[QUEUE_MAX_SIZE][16];
-
-static int queue_r = 0;
-static int queue_w = 0;
-static int queue_prev_w = -1;
-static int skip_r = 0;
-static int skip_w = 0;
-
-void bitstream_queue_set_skip_write(int skip) { skip_w = skip; }
-
-void bitstream_queue_set_skip_read(int skip) { skip_r = skip; }
-
-void bitstream_queue_record_write(void) { queue_prev_w = queue_w; }
-
-void bitstream_queue_reset_write(void) { queue_w = queue_prev_w; }
-
-int bitstream_queue_get_write(void) { return queue_w; }
-
-int bitstream_queue_get_read(void) { return queue_r; }
-
-void bitstream_queue_pop(int *result, aom_cdf_prob *cdf, int *nsymbs) {
- if (!skip_r) {
- if (queue_w == queue_r) {
- printf("buffer underflow queue_w %d queue_r %d\n", queue_w, queue_r);
- assert(0);
- }
- *result = result_queue[queue_r];
- *nsymbs = nsymbs_queue[queue_r];
- memcpy(cdf, cdf_queue[queue_r], *nsymbs * sizeof(*cdf));
- queue_r = (queue_r + 1) % QUEUE_MAX_SIZE;
- }
-}
-
-void bitstream_queue_push(int result, const aom_cdf_prob *cdf, int nsymbs) {
- if (!skip_w) {
- result_queue[queue_w] = result;
- nsymbs_queue[queue_w] = nsymbs;
- memcpy(cdf_queue[queue_w], cdf, nsymbs * sizeof(*cdf));
- queue_w = (queue_w + 1) % QUEUE_MAX_SIZE;
- if (queue_w == queue_r) {
- printf("buffer overflow queue_w %d queue_r %d\n", queue_w, queue_r);
- assert(0);
- }
- }
-}
-#endif // CONFIG_BITSTREAM_DEBUG
-
-#if CONFIG_MISMATCH_DEBUG
-static int frame_buf_idx_r = 0;
-static int frame_buf_idx_w = 0;
-static int max_frame_buf_num = 5;
-#define MAX_FRAME_STRIDE 1280
-#define MAX_FRAME_HEIGHT 720
-static uint16_t
- frame_pre[5][3][MAX_FRAME_STRIDE * MAX_FRAME_HEIGHT]; // prediction only
-static uint16_t
- frame_tx[5][3][MAX_FRAME_STRIDE * MAX_FRAME_HEIGHT]; // prediction + txfm
-static int frame_stride = MAX_FRAME_STRIDE;
-static int frame_height = MAX_FRAME_HEIGHT;
-static int frame_size = MAX_FRAME_STRIDE * MAX_FRAME_HEIGHT;
-void mismatch_move_frame_idx_w() {
- frame_buf_idx_w = (frame_buf_idx_w + 1) % max_frame_buf_num;
- if (frame_buf_idx_w == frame_buf_idx_r) {
- printf("frame_buf overflow\n");
- assert(0);
- }
-}
-
-void mismatch_reset_frame(int num_planes) {
- for (int plane = 0; plane < num_planes; ++plane) {
- memset(frame_pre[frame_buf_idx_w][plane], 0,
- sizeof(frame_pre[frame_buf_idx_w][plane][0]) * frame_size);
- memset(frame_tx[frame_buf_idx_w][plane], 0,
- sizeof(frame_tx[frame_buf_idx_w][plane][0]) * frame_size);
- }
-}
-
-void mismatch_move_frame_idx_r() {
- if (frame_buf_idx_w == frame_buf_idx_r) {
- printf("frame_buf underflow\n");
- assert(0);
- }
- frame_buf_idx_r = (frame_buf_idx_r + 1) % max_frame_buf_num;
-}
-
-void mismatch_record_block_pre(const uint8_t *src, int src_stride,
- int frame_offset, int plane, int pixel_c,
- int pixel_r, int blk_w, int blk_h, int highbd) {
- if (pixel_c + blk_w >= frame_stride || pixel_r + blk_h >= frame_height) {
- printf("frame_buf undersized\n");
- assert(0);
- }
-
- const uint16_t *src16 = highbd ? CONVERT_TO_SHORTPTR(src) : NULL;
- for (int r = 0; r < blk_h; ++r) {
- for (int c = 0; c < blk_w; ++c) {
- frame_pre[frame_buf_idx_w][plane]
- [(r + pixel_r) * frame_stride + c + pixel_c] =
- src16 ? src16[r * src_stride + c] : src[r * src_stride + c];
- }
- }
-#if 0
- int ref_frame_idx = 3;
- int ref_frame_offset = 4;
- int ref_plane = 1;
- int ref_pixel_c = 162;
- int ref_pixel_r = 16;
- if (frame_idx_w == ref_frame_idx && plane == ref_plane &&
- frame_offset == ref_frame_offset && ref_pixel_c >= pixel_c &&
- ref_pixel_c < pixel_c + blk_w && ref_pixel_r >= pixel_r &&
- ref_pixel_r < pixel_r + blk_h) {
- printf(
- "\nrecord_block_pre frame_idx %d frame_offset %d plane %d pixel_c %d pixel_r %d blk_w "
- "%d blk_h %d\n",
- frame_idx_w, frame_offset, plane, pixel_c, pixel_r, blk_w, blk_h);
- }
-#endif
-}
-void mismatch_record_block_tx(const uint8_t *src, int src_stride,
- int frame_offset, int plane, int pixel_c,
- int pixel_r, int blk_w, int blk_h, int highbd) {
- if (pixel_c + blk_w >= frame_stride || pixel_r + blk_h >= frame_height) {
- printf("frame_buf undersized\n");
- assert(0);
- }
-
- const uint16_t *src16 = highbd ? CONVERT_TO_SHORTPTR(src) : NULL;
- for (int r = 0; r < blk_h; ++r) {
- for (int c = 0; c < blk_w; ++c) {
- frame_tx[frame_buf_idx_w][plane]
- [(r + pixel_r) * frame_stride + c + pixel_c] =
- src16 ? src16[r * src_stride + c] : src[r * src_stride + c];
- }
- }
-#if 0
- int ref_frame_idx = 3;
- int ref_frame_offset = 4;
- int ref_plane = 1;
- int ref_pixel_c = 162;
- int ref_pixel_r = 16;
- if (frame_idx_w == ref_frame_idx && plane == ref_plane && frame_offset == ref_frame_offset &&
- ref_pixel_c >= pixel_c && ref_pixel_c < pixel_c + blk_w &&
- ref_pixel_r >= pixel_r && ref_pixel_r < pixel_r + blk_h) {
- printf(
- "\nrecord_block_tx frame_idx %d frame_offset %d plane %d pixel_c %d pixel_r %d blk_w "
- "%d blk_h %d\n",
- frame_idx_w, frame_offset, plane, pixel_c, pixel_r, blk_w, blk_h);
- }
-#endif
-}
-void mismatch_check_block_pre(const uint8_t *src, int src_stride,
- int frame_offset, int plane, int pixel_c,
- int pixel_r, int blk_w, int blk_h, int highbd) {
- if (pixel_c + blk_w >= frame_stride || pixel_r + blk_h >= frame_height) {
- printf("frame_buf undersized\n");
- assert(0);
- }
-
- const uint16_t *src16 = highbd ? CONVERT_TO_SHORTPTR(src) : NULL;
- int mismatch = 0;
- for (int r = 0; r < blk_h; ++r) {
- for (int c = 0; c < blk_w; ++c) {
- if (frame_pre[frame_buf_idx_r][plane]
- [(r + pixel_r) * frame_stride + c + pixel_c] !=
- (uint16_t)(src16 ? src16[r * src_stride + c]
- : src[r * src_stride + c])) {
- mismatch = 1;
- }
- }
- }
- if (mismatch) {
- printf(
- "\ncheck_block_pre failed frame_idx %d frame_offset %d plane %d "
- "pixel_c %d pixel_r "
- "%d blk_w %d blk_h %d\n",
- frame_idx_r, frame_offset, plane, pixel_c, pixel_r, blk_w, blk_h);
- printf("enc\n");
- for (int rr = 0; rr < blk_h; ++rr) {
- for (int cc = 0; cc < blk_w; ++cc) {
- printf("%d ", frame_pre[frame_buf_idx_r][plane]
- [(rr + pixel_r) * frame_stride + cc + pixel_c]);
- }
- printf("\n");
- }
-
- printf("dec\n");
- for (int rr = 0; rr < blk_h; ++rr) {
- for (int cc = 0; cc < blk_w; ++cc) {
- printf("%d ",
- src16 ? src16[rr * src_stride + cc] : src[rr * src_stride + cc]);
- }
- printf("\n");
- }
- assert(0);
- }
-}
-void mismatch_check_block_tx(const uint8_t *src, int src_stride,
- int frame_offset, int plane, int pixel_c,
- int pixel_r, int blk_w, int blk_h, int highbd) {
- if (pixel_c + blk_w >= frame_stride || pixel_r + blk_h >= frame_height) {
- printf("frame_buf undersized\n");
- assert(0);
- }
-
- const uint16_t *src16 = highbd ? CONVERT_TO_SHORTPTR(src) : NULL;
- int mismatch = 0;
- for (int r = 0; r < blk_h; ++r) {
- for (int c = 0; c < blk_w; ++c) {
- if (frame_tx[frame_buf_idx_r][plane]
- [(r + pixel_r) * frame_stride + c + pixel_c] !=
- (uint16_t)(src16 ? src16[r * src_stride + c]
- : src[r * src_stride + c])) {
- mismatch = 1;
- }
- }
- }
- if (mismatch) {
- printf(
- "\ncheck_block_tx failed frame_idx %d frame_offset %d plane %d pixel_c "
- "%d pixel_r "
- "%d blk_w %d blk_h %d\n",
- frame_idx_r, frame_offset, plane, pixel_c, pixel_r, blk_w, blk_h);
- printf("enc\n");
- for (int rr = 0; rr < blk_h; ++rr) {
- for (int cc = 0; cc < blk_w; ++cc) {
- printf("%d ", frame_tx[frame_buf_idx_r][plane]
- [(rr + pixel_r) * frame_stride + cc + pixel_c]);
- }
- printf("\n");
- }
-
- printf("dec\n");
- for (int rr = 0; rr < blk_h; ++rr) {
- for (int cc = 0; cc < blk_w; ++cc) {
- printf("%d ",
- src16 ? src16[rr * src_stride + cc] : src[rr * src_stride + cc]);
- }
- printf("\n");
- }
- assert(0);
- }
-}
-#endif // CONFIG_MISMATCH_DEBUG
diff --git a/third_party/aom/aom_util/debug_util.h b/third_party/aom/aom_util/debug_util.h
deleted file mode 100644
index 127a8b468..000000000
--- a/third_party/aom/aom_util/debug_util.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_UTIL_DEBUG_UTIL_H_
-#define AOM_AOM_UTIL_DEBUG_UTIL_H_
-
-#include "config/aom_config.h"
-
-#include "aom_dsp/prob.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void bitstream_queue_set_frame_write(int frame_idx);
-int bitstream_queue_get_frame_write(void);
-void bitstream_queue_set_frame_read(int frame_idx);
-int bitstream_queue_get_frame_read(void);
-
-#if CONFIG_BITSTREAM_DEBUG
-/* This is a debug tool used to detect bitstream error. On encoder side, it
- * pushes each bit and probability into a queue before the bit is written into
- * the Arithmetic coder. On decoder side, whenever a bit is read out from the
- * Arithmetic coder, it pops out the reference bit and probability from the
- * queue as well. If the two results do not match, this debug tool will report
- * an error. This tool can be used to pin down the bitstream error precisely.
- * By combining gdb's backtrace method, we can detect which module causes the
- * bitstream error. */
-int bitstream_queue_get_write(void);
-int bitstream_queue_get_read(void);
-void bitstream_queue_record_write(void);
-void bitstream_queue_reset_write(void);
-void bitstream_queue_pop(int *result, aom_cdf_prob *cdf, int *nsymbs);
-void bitstream_queue_push(int result, const aom_cdf_prob *cdf, int nsymbs);
-void bitstream_queue_set_skip_write(int skip);
-void bitstream_queue_set_skip_read(int skip);
-#endif // CONFIG_BITSTREAM_DEBUG
-
-#if CONFIG_MISMATCH_DEBUG
-void mismatch_move_frame_idx_w();
-void mismatch_move_frame_idx_r();
-void mismatch_reset_frame(int num_planes);
-void mismatch_record_block_pre(const uint8_t *src, int src_stride,
- int frame_offset, int plane, int pixel_c,
- int pixel_r, int blk_w, int blk_h, int highbd);
-void mismatch_record_block_tx(const uint8_t *src, int src_stride,
- int frame_offset, int plane, int pixel_c,
- int pixel_r, int blk_w, int blk_h, int highbd);
-void mismatch_check_block_pre(const uint8_t *src, int src_stride,
- int frame_offset, int plane, int pixel_c,
- int pixel_r, int blk_w, int blk_h, int highbd);
-void mismatch_check_block_tx(const uint8_t *src, int src_stride,
- int frame_offset, int plane, int pixel_c,
- int pixel_r, int blk_w, int blk_h, int highbd);
-#endif // CONFIG_MISMATCH_DEBUG
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AOM_UTIL_DEBUG_UTIL_H_
diff --git a/third_party/aom/aom_util/endian_inl.h b/third_party/aom/aom_util/endian_inl.h
deleted file mode 100644
index f536ec5b8..000000000
--- a/third_party/aom/aom_util/endian_inl.h
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-//
-// Endian related functions.
-
-#ifndef AOM_AOM_UTIL_ENDIAN_INL_H_
-#define AOM_AOM_UTIL_ENDIAN_INL_H_
-
-#include <stdlib.h>
-
-#include "config/aom_config.h"
-
-#include "aom/aom_integer.h"
-
-#if defined(__GNUC__)
-#define LOCAL_GCC_VERSION ((__GNUC__ << 8) | __GNUC_MINOR__)
-#define LOCAL_GCC_PREREQ(maj, min) (LOCAL_GCC_VERSION >= (((maj) << 8) | (min)))
-#else
-#define LOCAL_GCC_VERSION 0
-#define LOCAL_GCC_PREREQ(maj, min) 0
-#endif
-
-// handle clang compatibility
-#ifndef __has_builtin
-#define __has_builtin(x) 0
-#endif
-
-// some endian fix (e.g.: mips-gcc doesn't define __BIG_ENDIAN__)
-#if !defined(WORDS_BIGENDIAN) && \
- (defined(__BIG_ENDIAN__) || defined(_M_PPC) || \
- (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)))
-#define WORDS_BIGENDIAN
-#endif
-
-#if defined(WORDS_BIGENDIAN)
-#define HToLE32 BSwap32
-#define HToLE16 BSwap16
-#define HToBE64(x) (x)
-#define HToBE32(x) (x)
-#else
-#define HToLE32(x) (x)
-#define HToLE16(x) (x)
-#define HToBE64(X) BSwap64(X)
-#define HToBE32(X) BSwap32(X)
-#endif
-
-#if LOCAL_GCC_PREREQ(4, 8) || __has_builtin(__builtin_bswap16)
-#define HAVE_BUILTIN_BSWAP16
-#endif
-
-#if LOCAL_GCC_PREREQ(4, 3) || __has_builtin(__builtin_bswap32)
-#define HAVE_BUILTIN_BSWAP32
-#endif
-
-#if LOCAL_GCC_PREREQ(4, 3) || __has_builtin(__builtin_bswap64)
-#define HAVE_BUILTIN_BSWAP64
-#endif
-
-#if HAVE_MIPS32 && defined(__mips__) && !defined(__mips64) && \
- defined(__mips_isa_rev) && (__mips_isa_rev >= 2) && (__mips_isa_rev < 6)
-#define AOM_USE_MIPS32_R2
-#endif
-
-static INLINE uint16_t BSwap16(uint16_t x) {
-#if defined(HAVE_BUILTIN_BSWAP16)
- return __builtin_bswap16(x);
-#elif defined(_MSC_VER)
- return _byteswap_ushort(x);
-#else
- // gcc will recognize a 'rorw $8, ...' here:
- return (x >> 8) | ((x & 0xff) << 8);
-#endif // HAVE_BUILTIN_BSWAP16
-}
-
-static INLINE uint32_t BSwap32(uint32_t x) {
-#if defined(AOM_USE_MIPS32_R2)
- uint32_t ret;
- __asm__ volatile(
- "wsbh %[ret], %[x] \n\t"
- "rotr %[ret], %[ret], 16 \n\t"
- : [ret] "=r"(ret)
- : [x] "r"(x));
- return ret;
-#elif defined(HAVE_BUILTIN_BSWAP32)
- return __builtin_bswap32(x);
-#elif defined(__i386__) || defined(__x86_64__)
- uint32_t swapped_bytes;
- __asm__ volatile("bswap %0" : "=r"(swapped_bytes) : "0"(x));
- return swapped_bytes;
-#elif defined(_MSC_VER)
- return (uint32_t)_byteswap_ulong(x);
-#else
- return (x >> 24) | ((x >> 8) & 0xff00) | ((x << 8) & 0xff0000) | (x << 24);
-#endif // HAVE_BUILTIN_BSWAP32
-}
-
-static INLINE uint64_t BSwap64(uint64_t x) {
-#if defined(HAVE_BUILTIN_BSWAP64)
- return __builtin_bswap64(x);
-#elif defined(__x86_64__)
- uint64_t swapped_bytes;
- __asm__ volatile("bswapq %0" : "=r"(swapped_bytes) : "0"(x));
- return swapped_bytes;
-#elif defined(_MSC_VER)
- return (uint64_t)_byteswap_uint64(x);
-#else // generic code for swapping 64-bit values (suggested by bdb@)
- x = ((x & 0xffffffff00000000ull) >> 32) | ((x & 0x00000000ffffffffull) << 32);
- x = ((x & 0xffff0000ffff0000ull) >> 16) | ((x & 0x0000ffff0000ffffull) << 16);
- x = ((x & 0xff00ff00ff00ff00ull) >> 8) | ((x & 0x00ff00ff00ff00ffull) << 8);
- return x;
-#endif // HAVE_BUILTIN_BSWAP64
-}
-
-#endif // AOM_AOM_UTIL_ENDIAN_INL_H_
diff --git a/third_party/aom/apps/aomdec.c b/third_party/aom/apps/aomdec.c
deleted file mode 100644
index ff13b6f50..000000000
--- a/third_party/aom/apps/aomdec.c
+++ /dev/null
@@ -1,1046 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdarg.h>
-#include <string.h>
-#include <limits.h>
-
-#include "config/aom_config.h"
-
-#if CONFIG_OS_SUPPORT
-#if HAVE_UNISTD_H
-#include <unistd.h> // NOLINT
-#elif !defined(STDOUT_FILENO)
-#define STDOUT_FILENO 1
-#endif
-#endif
-
-#include "aom/aom_decoder.h"
-#include "aom/aomdx.h"
-#include "aom_ports/aom_timer.h"
-#include "aom_ports/mem_ops.h"
-#include "common/args.h"
-#include "common/ivfdec.h"
-#include "common/md5_utils.h"
-#include "common/obudec.h"
-#include "common/tools_common.h"
-
-#if CONFIG_WEBM_IO
-#include "common/webmdec.h"
-#endif
-
-#include "common/rawenc.h"
-#include "common/y4menc.h"
-
-#if CONFIG_LIBYUV
-#include "third_party/libyuv/include/libyuv/scale.h"
-#endif
-
-static const char *exec_name;
-
-struct AvxDecInputContext {
- struct AvxInputContext *aom_input_ctx;
- struct ObuDecInputContext *obu_ctx;
- struct WebmInputContext *webm_ctx;
-};
-
-static const arg_def_t help =
- ARG_DEF(NULL, "help", 0, "Show usage options and exit");
-static const arg_def_t looparg =
- ARG_DEF(NULL, "loops", 1, "Number of times to decode the file");
-static const arg_def_t codecarg = ARG_DEF(NULL, "codec", 1, "Codec to use");
-static const arg_def_t use_yv12 =
- ARG_DEF(NULL, "yv12", 0, "Output raw YV12 frames");
-static const arg_def_t use_i420 =
- ARG_DEF(NULL, "i420", 0, "Output raw I420 frames");
-static const arg_def_t flipuvarg =
- ARG_DEF(NULL, "flipuv", 0, "Flip the chroma planes in the output");
-static const arg_def_t rawvideo =
- ARG_DEF(NULL, "rawvideo", 0, "Output raw YUV frames");
-static const arg_def_t noblitarg =
- ARG_DEF(NULL, "noblit", 0, "Don't process the decoded frames");
-static const arg_def_t progressarg =
- ARG_DEF(NULL, "progress", 0, "Show progress after each frame decodes");
-static const arg_def_t limitarg =
- ARG_DEF(NULL, "limit", 1, "Stop decoding after n frames");
-static const arg_def_t skiparg =
- ARG_DEF(NULL, "skip", 1, "Skip the first n input frames");
-static const arg_def_t postprocarg =
- ARG_DEF(NULL, "postproc", 0, "Postprocess decoded frames");
-static const arg_def_t summaryarg =
- ARG_DEF(NULL, "summary", 0, "Show timing summary");
-static const arg_def_t outputfile =
- ARG_DEF("o", "output", 1, "Output file name pattern (see below)");
-static const arg_def_t threadsarg =
- ARG_DEF("t", "threads", 1, "Max threads to use");
-static const arg_def_t verbosearg =
- ARG_DEF("v", "verbose", 0, "Show version string");
-static const arg_def_t scalearg =
- ARG_DEF("S", "scale", 0, "Scale output frames uniformly");
-static const arg_def_t continuearg =
- ARG_DEF("k", "keep-going", 0, "(debug) Continue decoding after error");
-static const arg_def_t fb_arg =
- ARG_DEF(NULL, "frame-buffers", 1, "Number of frame buffers to use");
-static const arg_def_t md5arg =
- ARG_DEF(NULL, "md5", 0, "Compute the MD5 sum of the decoded frame");
-static const arg_def_t framestatsarg =
- ARG_DEF(NULL, "framestats", 1, "Output per-frame stats (.csv format)");
-static const arg_def_t outbitdeptharg =
- ARG_DEF(NULL, "output-bit-depth", 1, "Output bit-depth for decoded frames");
-static const arg_def_t isannexb =
- ARG_DEF(NULL, "annexb", 0, "Bitstream is in Annex-B format");
-static const arg_def_t oppointarg = ARG_DEF(
- NULL, "oppoint", 1, "Select an operating point of a scalable bitstream");
-static const arg_def_t outallarg = ARG_DEF(
- NULL, "all-layers", 0, "Output all decoded frames of a scalable bitstream");
-static const arg_def_t skipfilmgrain =
- ARG_DEF(NULL, "skip-film-grain", 0, "Skip film grain application");
-
-static const arg_def_t *all_args[] = {
- &help, &codecarg, &use_yv12, &use_i420,
- &flipuvarg, &rawvideo, &noblitarg, &progressarg,
- &limitarg, &skiparg, &postprocarg, &summaryarg,
- &outputfile, &threadsarg, &verbosearg, &scalearg,
- &fb_arg, &md5arg, &framestatsarg, &continuearg,
- &outbitdeptharg, &isannexb, &oppointarg, &outallarg,
- &skipfilmgrain, NULL
-};
-
-#if CONFIG_LIBYUV
-static INLINE int libyuv_scale(aom_image_t *src, aom_image_t *dst,
- FilterModeEnum mode) {
- if (src->fmt == AOM_IMG_FMT_I42016) {
- assert(dst->fmt == AOM_IMG_FMT_I42016);
- return I420Scale_16(
- (uint16_t *)src->planes[AOM_PLANE_Y], src->stride[AOM_PLANE_Y] / 2,
- (uint16_t *)src->planes[AOM_PLANE_U], src->stride[AOM_PLANE_U] / 2,
- (uint16_t *)src->planes[AOM_PLANE_V], src->stride[AOM_PLANE_V] / 2,
- src->d_w, src->d_h, (uint16_t *)dst->planes[AOM_PLANE_Y],
- dst->stride[AOM_PLANE_Y] / 2, (uint16_t *)dst->planes[AOM_PLANE_U],
- dst->stride[AOM_PLANE_U] / 2, (uint16_t *)dst->planes[AOM_PLANE_V],
- dst->stride[AOM_PLANE_V] / 2, dst->d_w, dst->d_h, mode);
- }
- assert(src->fmt == AOM_IMG_FMT_I420);
- assert(dst->fmt == AOM_IMG_FMT_I420);
- return I420Scale(src->planes[AOM_PLANE_Y], src->stride[AOM_PLANE_Y],
- src->planes[AOM_PLANE_U], src->stride[AOM_PLANE_U],
- src->planes[AOM_PLANE_V], src->stride[AOM_PLANE_V], src->d_w,
- src->d_h, dst->planes[AOM_PLANE_Y], dst->stride[AOM_PLANE_Y],
- dst->planes[AOM_PLANE_U], dst->stride[AOM_PLANE_U],
- dst->planes[AOM_PLANE_V], dst->stride[AOM_PLANE_V], dst->d_w,
- dst->d_h, mode);
-}
-#endif
-
-void show_help(FILE *fout, int shorthelp) {
- fprintf(fout, "Usage: %s <options> filename\n\n", exec_name);
-
- if (shorthelp) {
- fprintf(fout, "Use --help to see the full list of options.\n");
- return;
- }
-
- fprintf(fout, "Options:\n");
- arg_show_usage(fout, all_args);
- fprintf(fout,
- "\nOutput File Patterns:\n\n"
- " The -o argument specifies the name of the file(s) to "
- "write to. If the\n argument does not include any escape "
- "characters, the output will be\n written to a single file. "
- "Otherwise, the filename will be calculated by\n expanding "
- "the following escape characters:\n");
- fprintf(fout,
- "\n\t%%w - Frame width"
- "\n\t%%h - Frame height"
- "\n\t%%<n> - Frame number, zero padded to <n> places (1..9)"
- "\n\n Pattern arguments are only supported in conjunction "
- "with the --yv12 and\n --i420 options. If the -o option is "
- "not specified, the output will be\n directed to stdout.\n");
- fprintf(fout, "\nIncluded decoders:\n\n");
-
- for (int i = 0; i < get_aom_decoder_count(); ++i) {
- const AvxInterface *const decoder = get_aom_decoder_by_index(i);
- fprintf(fout, " %-6s - %s\n", decoder->name,
- aom_codec_iface_name(decoder->codec_interface()));
- }
-}
-
-void usage_exit(void) {
- show_help(stderr, 1);
- exit(EXIT_FAILURE);
-}
-
-static int raw_read_frame(FILE *infile, uint8_t **buffer, size_t *bytes_read,
- size_t *buffer_size) {
- char raw_hdr[RAW_FRAME_HDR_SZ];
- size_t frame_size = 0;
-
- if (fread(raw_hdr, RAW_FRAME_HDR_SZ, 1, infile) != 1) {
- if (!feof(infile)) warn("Failed to read RAW frame size\n");
- } else {
- const size_t kCorruptFrameThreshold = 256 * 1024 * 1024;
- const size_t kFrameTooSmallThreshold = 256 * 1024;
- frame_size = mem_get_le32(raw_hdr);
-
- if (frame_size > kCorruptFrameThreshold) {
- warn("Read invalid frame size (%u)\n", (unsigned int)frame_size);
- frame_size = 0;
- }
-
- if (frame_size < kFrameTooSmallThreshold) {
- warn("Warning: Read invalid frame size (%u) - not a raw file?\n",
- (unsigned int)frame_size);
- }
-
- if (frame_size > *buffer_size) {
- uint8_t *new_buf = realloc(*buffer, 2 * frame_size);
- if (new_buf) {
- *buffer = new_buf;
- *buffer_size = 2 * frame_size;
- } else {
- warn("Failed to allocate compressed data buffer\n");
- frame_size = 0;
- }
- }
- }
-
- if (!feof(infile)) {
- if (fread(*buffer, 1, frame_size, infile) != frame_size) {
- warn("Failed to read full frame\n");
- return 1;
- }
- *bytes_read = frame_size;
- }
-
- return 0;
-}
-
-static int read_frame(struct AvxDecInputContext *input, uint8_t **buf,
- size_t *bytes_in_buffer, size_t *buffer_size) {
- switch (input->aom_input_ctx->file_type) {
-#if CONFIG_WEBM_IO
- case FILE_TYPE_WEBM:
- return webm_read_frame(input->webm_ctx, buf, bytes_in_buffer,
- buffer_size);
-#endif
- case FILE_TYPE_RAW:
- return raw_read_frame(input->aom_input_ctx->file, buf, bytes_in_buffer,
- buffer_size);
- case FILE_TYPE_IVF:
- return ivf_read_frame(input->aom_input_ctx->file, buf, bytes_in_buffer,
- buffer_size, NULL);
- case FILE_TYPE_OBU:
- return obudec_read_temporal_unit(input->obu_ctx, buf, bytes_in_buffer,
- buffer_size);
- default: return 1;
- }
-}
-
-static int file_is_raw(struct AvxInputContext *input) {
- uint8_t buf[32];
- int is_raw = 0;
- aom_codec_stream_info_t si;
- memset(&si, 0, sizeof(si));
-
- if (fread(buf, 1, 32, input->file) == 32) {
- int i;
-
- if (mem_get_le32(buf) < 256 * 1024 * 1024) {
- for (i = 0; i < get_aom_decoder_count(); ++i) {
- const AvxInterface *const decoder = get_aom_decoder_by_index(i);
- if (!aom_codec_peek_stream_info(decoder->codec_interface(), buf + 4,
- 32 - 4, &si)) {
- is_raw = 1;
- input->fourcc = decoder->fourcc;
- input->width = si.w;
- input->height = si.h;
- input->framerate.numerator = 30;
- input->framerate.denominator = 1;
- break;
- }
- }
- }
- }
-
- rewind(input->file);
- return is_raw;
-}
-
-static void show_progress(int frame_in, int frame_out, uint64_t dx_time) {
- fprintf(stderr,
- "%d decoded frames/%d showed frames in %" PRId64 " us (%.2f fps)\r",
- frame_in, frame_out, dx_time,
- (double)frame_out * 1000000.0 / (double)dx_time);
-}
-
-struct ExternalFrameBuffer {
- uint8_t *data;
- size_t size;
- int in_use;
-};
-
-struct ExternalFrameBufferList {
- int num_external_frame_buffers;
- struct ExternalFrameBuffer *ext_fb;
-};
-
-// Callback used by libaom to request an external frame buffer. |cb_priv|
-// Application private data passed into the set function. |min_size| is the
-// minimum size in bytes needed to decode the next frame. |fb| pointer to the
-// frame buffer.
-static int get_av1_frame_buffer(void *cb_priv, size_t min_size,
- aom_codec_frame_buffer_t *fb) {
- int i;
- struct ExternalFrameBufferList *const ext_fb_list =
- (struct ExternalFrameBufferList *)cb_priv;
- if (ext_fb_list == NULL) return -1;
-
- // Find a free frame buffer.
- for (i = 0; i < ext_fb_list->num_external_frame_buffers; ++i) {
- if (!ext_fb_list->ext_fb[i].in_use) break;
- }
-
- if (i == ext_fb_list->num_external_frame_buffers) return -1;
-
- if (ext_fb_list->ext_fb[i].size < min_size) {
- free(ext_fb_list->ext_fb[i].data);
- ext_fb_list->ext_fb[i].data = (uint8_t *)calloc(min_size, sizeof(uint8_t));
- if (!ext_fb_list->ext_fb[i].data) return -1;
-
- ext_fb_list->ext_fb[i].size = min_size;
- }
-
- fb->data = ext_fb_list->ext_fb[i].data;
- fb->size = ext_fb_list->ext_fb[i].size;
- ext_fb_list->ext_fb[i].in_use = 1;
-
- // Set the frame buffer's private data to point at the external frame buffer.
- fb->priv = &ext_fb_list->ext_fb[i];
- return 0;
-}
-
-// Callback used by libaom when there are no references to the frame buffer.
-// |cb_priv| user private data passed into the set function. |fb| pointer
-// to the frame buffer.
-static int release_av1_frame_buffer(void *cb_priv,
- aom_codec_frame_buffer_t *fb) {
- struct ExternalFrameBuffer *const ext_fb =
- (struct ExternalFrameBuffer *)fb->priv;
- (void)cb_priv;
- ext_fb->in_use = 0;
- return 0;
-}
-
-static void generate_filename(const char *pattern, char *out, size_t q_len,
- unsigned int d_w, unsigned int d_h,
- unsigned int frame_in) {
- const char *p = pattern;
- char *q = out;
-
- do {
- char *next_pat = strchr(p, '%');
-
- if (p == next_pat) {
- size_t pat_len;
-
- /* parse the pattern */
- q[q_len - 1] = '\0';
- switch (p[1]) {
- case 'w': snprintf(q, q_len - 1, "%d", d_w); break;
- case 'h': snprintf(q, q_len - 1, "%d", d_h); break;
- case '1': snprintf(q, q_len - 1, "%d", frame_in); break;
- case '2': snprintf(q, q_len - 1, "%02d", frame_in); break;
- case '3': snprintf(q, q_len - 1, "%03d", frame_in); break;
- case '4': snprintf(q, q_len - 1, "%04d", frame_in); break;
- case '5': snprintf(q, q_len - 1, "%05d", frame_in); break;
- case '6': snprintf(q, q_len - 1, "%06d", frame_in); break;
- case '7': snprintf(q, q_len - 1, "%07d", frame_in); break;
- case '8': snprintf(q, q_len - 1, "%08d", frame_in); break;
- case '9': snprintf(q, q_len - 1, "%09d", frame_in); break;
- default: die("Unrecognized pattern %%%c\n", p[1]); break;
- }
-
- pat_len = strlen(q);
- if (pat_len >= q_len - 1) die("Output filename too long.\n");
- q += pat_len;
- p += 2;
- q_len -= pat_len;
- } else {
- size_t copy_len;
-
- /* copy the next segment */
- if (!next_pat)
- copy_len = strlen(p);
- else
- copy_len = next_pat - p;
-
- if (copy_len >= q_len - 1) die("Output filename too long.\n");
-
- memcpy(q, p, copy_len);
- q[copy_len] = '\0';
- q += copy_len;
- p += copy_len;
- q_len -= copy_len;
- }
- } while (*p);
-}
-
-static int is_single_file(const char *outfile_pattern) {
- const char *p = outfile_pattern;
-
- do {
- p = strchr(p, '%');
- if (p && p[1] >= '1' && p[1] <= '9')
- return 0; // pattern contains sequence number, so it's not unique
- if (p) p++;
- } while (p);
-
- return 1;
-}
-
-static void print_md5(unsigned char digest[16], const char *filename) {
- int i;
-
- for (i = 0; i < 16; ++i) printf("%02x", digest[i]);
- printf(" %s\n", filename);
-}
-
-static FILE *open_outfile(const char *name) {
- if (strcmp("-", name) == 0) {
- set_binary_mode(stdout);
- return stdout;
- } else {
- FILE *file = fopen(name, "wb");
- if (!file) fatal("Failed to open output file '%s'", name);
- return file;
- }
-}
-
-static int img_shifted_realloc_required(const aom_image_t *img,
- const aom_image_t *shifted,
- aom_img_fmt_t required_fmt) {
- return img->d_w != shifted->d_w || img->d_h != shifted->d_h ||
- required_fmt != shifted->fmt;
-}
-
-static int main_loop(int argc, const char **argv_) {
- aom_codec_ctx_t decoder;
- char *fn = NULL;
- int i;
- int ret = EXIT_FAILURE;
- uint8_t *buf = NULL;
- size_t bytes_in_buffer = 0, buffer_size = 0;
- FILE *infile;
- int frame_in = 0, frame_out = 0, flipuv = 0, noblit = 0;
- int do_md5 = 0, progress = 0;
- int stop_after = 0, postproc = 0, summary = 0, quiet = 1;
- int arg_skip = 0;
- int keep_going = 0;
- const AvxInterface *interface = NULL;
- const AvxInterface *fourcc_interface = NULL;
- uint64_t dx_time = 0;
- struct arg arg;
- char **argv, **argi, **argj;
-
- int single_file;
- int use_y4m = 1;
- int opt_yv12 = 0;
- int opt_i420 = 0;
- int opt_raw = 0;
- aom_codec_dec_cfg_t cfg = { 0, 0, 0, CONFIG_LOWBITDEPTH, { 1 } };
- unsigned int output_bit_depth = 0;
- unsigned int is_annexb = 0;
- int frames_corrupted = 0;
- int dec_flags = 0;
- int do_scale = 0;
- int operating_point = 0;
- int output_all_layers = 0;
- int skip_film_grain = 0;
- aom_image_t *scaled_img = NULL;
- aom_image_t *img_shifted = NULL;
- int frame_avail, got_data, flush_decoder = 0;
- int num_external_frame_buffers = 0;
- struct ExternalFrameBufferList ext_fb_list = { 0, NULL };
-
- const char *outfile_pattern = NULL;
- char outfile_name[PATH_MAX] = { 0 };
- FILE *outfile = NULL;
-
- FILE *framestats_file = NULL;
-
- MD5Context md5_ctx;
- unsigned char md5_digest[16];
-
- struct AvxDecInputContext input = { NULL, NULL, NULL };
- struct AvxInputContext aom_input_ctx;
- memset(&aom_input_ctx, 0, sizeof(aom_input_ctx));
-#if CONFIG_WEBM_IO
- struct WebmInputContext webm_ctx;
- memset(&webm_ctx, 0, sizeof(webm_ctx));
- input.webm_ctx = &webm_ctx;
-#endif
- struct ObuDecInputContext obu_ctx = { NULL, NULL, 0, 0, 0 };
-
- obu_ctx.avx_ctx = &aom_input_ctx;
- input.obu_ctx = &obu_ctx;
- input.aom_input_ctx = &aom_input_ctx;
-
- /* Parse command line */
- exec_name = argv_[0];
- argv = argv_dup(argc - 1, argv_ + 1);
-
- for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
- memset(&arg, 0, sizeof(arg));
- arg.argv_step = 1;
-
- if (arg_match(&arg, &help, argi)) {
- show_help(stdout, 0);
- exit(EXIT_SUCCESS);
- } else if (arg_match(&arg, &codecarg, argi)) {
- interface = get_aom_decoder_by_name(arg.val);
- if (!interface)
- die("Error: Unrecognized argument (%s) to --codec\n", arg.val);
- } else if (arg_match(&arg, &looparg, argi)) {
- // no-op
- } else if (arg_match(&arg, &outputfile, argi)) {
- outfile_pattern = arg.val;
- } else if (arg_match(&arg, &use_yv12, argi)) {
- use_y4m = 0;
- flipuv = 1;
- opt_yv12 = 1;
- opt_i420 = 0;
- opt_raw = 0;
- } else if (arg_match(&arg, &use_i420, argi)) {
- use_y4m = 0;
- flipuv = 0;
- opt_yv12 = 0;
- opt_i420 = 1;
- opt_raw = 0;
- } else if (arg_match(&arg, &rawvideo, argi)) {
- use_y4m = 0;
- opt_yv12 = 0;
- opt_i420 = 0;
- opt_raw = 1;
- } else if (arg_match(&arg, &flipuvarg, argi)) {
- flipuv = 1;
- } else if (arg_match(&arg, &noblitarg, argi)) {
- noblit = 1;
- } else if (arg_match(&arg, &progressarg, argi)) {
- progress = 1;
- } else if (arg_match(&arg, &limitarg, argi)) {
- stop_after = arg_parse_uint(&arg);
- } else if (arg_match(&arg, &skiparg, argi)) {
- arg_skip = arg_parse_uint(&arg);
- } else if (arg_match(&arg, &postprocarg, argi)) {
- postproc = 1;
- } else if (arg_match(&arg, &md5arg, argi)) {
- do_md5 = 1;
- } else if (arg_match(&arg, &framestatsarg, argi)) {
- framestats_file = fopen(arg.val, "w");
- if (!framestats_file) {
- die("Error: Could not open --framestats file (%s) for writing.\n",
- arg.val);
- }
- } else if (arg_match(&arg, &summaryarg, argi)) {
- summary = 1;
- } else if (arg_match(&arg, &threadsarg, argi)) {
- cfg.threads = arg_parse_uint(&arg);
-#if !CONFIG_MULTITHREAD
- if (cfg.threads > 1) {
- die("Error: --threads=%d is not supported when CONFIG_MULTITHREAD = "
- "0.\n",
- cfg.threads);
- }
-#endif
- } else if (arg_match(&arg, &verbosearg, argi)) {
- quiet = 0;
- } else if (arg_match(&arg, &scalearg, argi)) {
- do_scale = 1;
- } else if (arg_match(&arg, &fb_arg, argi)) {
- num_external_frame_buffers = arg_parse_uint(&arg);
- } else if (arg_match(&arg, &continuearg, argi)) {
- keep_going = 1;
- } else if (arg_match(&arg, &outbitdeptharg, argi)) {
- output_bit_depth = arg_parse_uint(&arg);
- } else if (arg_match(&arg, &isannexb, argi)) {
- is_annexb = 1;
- input.obu_ctx->is_annexb = 1;
- } else if (arg_match(&arg, &oppointarg, argi)) {
- operating_point = arg_parse_int(&arg);
- } else if (arg_match(&arg, &outallarg, argi)) {
- output_all_layers = 1;
- } else if (arg_match(&arg, &skipfilmgrain, argi)) {
- skip_film_grain = 1;
- } else {
- argj++;
- }
- }
-
- /* Check for unrecognized options */
- for (argi = argv; *argi; argi++)
- if (argi[0][0] == '-' && strlen(argi[0]) > 1)
- die("Error: Unrecognized option %s\n", *argi);
-
- /* Handle non-option arguments */
- fn = argv[0];
-
- if (!fn) {
- free(argv);
- fprintf(stderr, "No input file specified!\n");
- usage_exit();
- }
- /* Open file */
- infile = strcmp(fn, "-") ? fopen(fn, "rb") : set_binary_mode(stdin);
-
- if (!infile) {
- fatal("Failed to open input file '%s'", strcmp(fn, "-") ? fn : "stdin");
- }
-#if CONFIG_OS_SUPPORT
- /* Make sure we don't dump to the terminal, unless forced to with -o - */
- if (!outfile_pattern && isatty(STDOUT_FILENO) && !do_md5 && !noblit) {
- fprintf(stderr,
- "Not dumping raw video to your terminal. Use '-o -' to "
- "override.\n");
- return EXIT_FAILURE;
- }
-#endif
- input.aom_input_ctx->filename = fn;
- input.aom_input_ctx->file = infile;
- if (file_is_ivf(input.aom_input_ctx))
- input.aom_input_ctx->file_type = FILE_TYPE_IVF;
-#if CONFIG_WEBM_IO
- else if (file_is_webm(input.webm_ctx, input.aom_input_ctx))
- input.aom_input_ctx->file_type = FILE_TYPE_WEBM;
-#endif
- else if (file_is_obu(&obu_ctx))
- input.aom_input_ctx->file_type = FILE_TYPE_OBU;
- else if (file_is_raw(input.aom_input_ctx))
- input.aom_input_ctx->file_type = FILE_TYPE_RAW;
- else {
- fprintf(stderr, "Unrecognized input file type.\n");
-#if !CONFIG_WEBM_IO
- fprintf(stderr, "aomdec was built without WebM container support.\n");
-#endif
- return EXIT_FAILURE;
- }
-
- outfile_pattern = outfile_pattern ? outfile_pattern : "-";
- single_file = is_single_file(outfile_pattern);
-
- if (!noblit && single_file) {
- generate_filename(outfile_pattern, outfile_name, PATH_MAX,
- aom_input_ctx.width, aom_input_ctx.height, 0);
- if (do_md5)
- MD5Init(&md5_ctx);
- else
- outfile = open_outfile(outfile_name);
- }
-
- if (use_y4m && !noblit) {
- if (!single_file) {
- fprintf(stderr,
- "YUV4MPEG2 not supported with output patterns,"
- " try --i420 or --yv12 or --rawvideo.\n");
- return EXIT_FAILURE;
- }
-
-#if CONFIG_WEBM_IO
- if (aom_input_ctx.file_type == FILE_TYPE_WEBM) {
- if (webm_guess_framerate(input.webm_ctx, input.aom_input_ctx)) {
- fprintf(stderr,
- "Failed to guess framerate -- error parsing "
- "webm file?\n");
- return EXIT_FAILURE;
- }
- }
-#endif
- }
-
- fourcc_interface = get_aom_decoder_by_fourcc(aom_input_ctx.fourcc);
- if (interface && fourcc_interface && interface != fourcc_interface)
- warn("Header indicates codec: %s\n", fourcc_interface->name);
- else
- interface = fourcc_interface;
-
- if (!interface) interface = get_aom_decoder_by_index(0);
-
- dec_flags = (postproc ? AOM_CODEC_USE_POSTPROC : 0);
- if (aom_codec_dec_init(&decoder, interface->codec_interface(), &cfg,
- dec_flags)) {
- fprintf(stderr, "Failed to initialize decoder: %s\n",
- aom_codec_error(&decoder));
- goto fail2;
- }
-
- if (!quiet) fprintf(stderr, "%s\n", decoder.name);
-
- if (aom_codec_control(&decoder, AV1D_SET_IS_ANNEXB, is_annexb)) {
- fprintf(stderr, "Failed to set is_annexb: %s\n", aom_codec_error(&decoder));
- goto fail;
- }
-
- if (aom_codec_control(&decoder, AV1D_SET_OPERATING_POINT, operating_point)) {
- fprintf(stderr, "Failed to set operating_point: %s\n",
- aom_codec_error(&decoder));
- goto fail;
- }
-
- if (aom_codec_control(&decoder, AV1D_SET_OUTPUT_ALL_LAYERS,
- output_all_layers)) {
- fprintf(stderr, "Failed to set output_all_layers: %s\n",
- aom_codec_error(&decoder));
- goto fail;
- }
-
- if (aom_codec_control(&decoder, AV1D_SET_SKIP_FILM_GRAIN, skip_film_grain)) {
- fprintf(stderr, "Failed to set skip_film_grain: %s\n",
- aom_codec_error(&decoder));
- goto fail;
- }
-
- if (arg_skip) fprintf(stderr, "Skipping first %d frames.\n", arg_skip);
- while (arg_skip) {
- if (read_frame(&input, &buf, &bytes_in_buffer, &buffer_size)) break;
- arg_skip--;
- }
-
- if (num_external_frame_buffers > 0) {
- ext_fb_list.num_external_frame_buffers = num_external_frame_buffers;
- ext_fb_list.ext_fb = (struct ExternalFrameBuffer *)calloc(
- num_external_frame_buffers, sizeof(*ext_fb_list.ext_fb));
- if (aom_codec_set_frame_buffer_functions(&decoder, get_av1_frame_buffer,
- release_av1_frame_buffer,
- &ext_fb_list)) {
- fprintf(stderr, "Failed to configure external frame buffers: %s\n",
- aom_codec_error(&decoder));
- goto fail;
- }
- }
-
- frame_avail = 1;
- got_data = 0;
-
- if (framestats_file) fprintf(framestats_file, "bytes,qp\r\n");
-
- /* Decode file */
- while (frame_avail || got_data) {
- aom_codec_iter_t iter = NULL;
- aom_image_t *img;
- struct aom_usec_timer timer;
- int corrupted = 0;
-
- frame_avail = 0;
- if (!stop_after || frame_in < stop_after) {
- if (!read_frame(&input, &buf, &bytes_in_buffer, &buffer_size)) {
- frame_avail = 1;
- frame_in++;
-
- aom_usec_timer_start(&timer);
-
- if (aom_codec_decode(&decoder, buf, bytes_in_buffer, NULL)) {
- const char *detail = aom_codec_error_detail(&decoder);
- warn("Failed to decode frame %d: %s", frame_in,
- aom_codec_error(&decoder));
-
- if (detail) warn("Additional information: %s", detail);
- if (!keep_going) goto fail;
- }
-
- if (framestats_file) {
- int qp;
- if (aom_codec_control(&decoder, AOMD_GET_LAST_QUANTIZER, &qp)) {
- warn("Failed AOMD_GET_LAST_QUANTIZER: %s",
- aom_codec_error(&decoder));
- if (!keep_going) goto fail;
- }
- fprintf(framestats_file, "%d,%d\r\n", (int)bytes_in_buffer, qp);
- }
-
- aom_usec_timer_mark(&timer);
- dx_time += aom_usec_timer_elapsed(&timer);
- } else {
- flush_decoder = 1;
- }
- } else {
- flush_decoder = 1;
- }
-
- aom_usec_timer_start(&timer);
-
- if (flush_decoder) {
- // Flush the decoder in frame parallel decode.
- if (aom_codec_decode(&decoder, NULL, 0, NULL)) {
- warn("Failed to flush decoder: %s", aom_codec_error(&decoder));
- }
- }
-
- aom_usec_timer_mark(&timer);
- dx_time += aom_usec_timer_elapsed(&timer);
-
- got_data = 0;
- while ((img = aom_codec_get_frame(&decoder, &iter))) {
- ++frame_out;
- got_data = 1;
-
- if (aom_codec_control(&decoder, AOMD_GET_FRAME_CORRUPTED, &corrupted)) {
- warn("Failed AOM_GET_FRAME_CORRUPTED: %s", aom_codec_error(&decoder));
- if (!keep_going) goto fail;
- }
- frames_corrupted += corrupted;
-
- if (progress) show_progress(frame_in, frame_out, dx_time);
-
- if (!noblit) {
- const int PLANES_YUV[] = { AOM_PLANE_Y, AOM_PLANE_U, AOM_PLANE_V };
- const int PLANES_YVU[] = { AOM_PLANE_Y, AOM_PLANE_V, AOM_PLANE_U };
- const int *planes = flipuv ? PLANES_YVU : PLANES_YUV;
-
- if (do_scale) {
- if (frame_out == 1) {
- // If the output frames are to be scaled to a fixed display size
- // then use the width and height specified in the container. If
- // either of these is set to 0, use the display size set in the
- // first frame header. If that is unavailable, use the raw decoded
- // size of the first decoded frame.
- int render_width = aom_input_ctx.width;
- int render_height = aom_input_ctx.height;
- if (!render_width || !render_height) {
- int render_size[2];
- if (aom_codec_control(&decoder, AV1D_GET_DISPLAY_SIZE,
- render_size)) {
- // As last resort use size of first frame as display size.
- render_width = img->d_w;
- render_height = img->d_h;
- } else {
- render_width = render_size[0];
- render_height = render_size[1];
- }
- }
- scaled_img =
- aom_img_alloc(NULL, img->fmt, render_width, render_height, 16);
- scaled_img->bit_depth = img->bit_depth;
- scaled_img->monochrome = img->monochrome;
- scaled_img->csp = img->csp;
- }
-
- if (img->d_w != scaled_img->d_w || img->d_h != scaled_img->d_h) {
-#if CONFIG_LIBYUV
- libyuv_scale(img, scaled_img, kFilterBox);
- img = scaled_img;
-#else
- fprintf(
- stderr,
- "Failed to scale output frame: %s.\n"
- "libyuv is required for scaling but is currently disabled.\n"
- "Be sure to specify -DCONFIG_LIBYUV=1 when running cmake.\n",
- aom_codec_error(&decoder));
- goto fail;
-#endif
- }
- }
- // Default to codec bit depth if output bit depth not set
- if (!output_bit_depth && single_file && !do_md5) {
- output_bit_depth = img->bit_depth;
- }
- // Shift up or down if necessary
- if (output_bit_depth != 0) {
- const aom_img_fmt_t shifted_fmt =
- output_bit_depth == 8 ? img->fmt & ~AOM_IMG_FMT_HIGHBITDEPTH
- : img->fmt | AOM_IMG_FMT_HIGHBITDEPTH;
-
- if (shifted_fmt != img->fmt || output_bit_depth != img->bit_depth) {
- if (img_shifted &&
- img_shifted_realloc_required(img, img_shifted, shifted_fmt)) {
- aom_img_free(img_shifted);
- img_shifted = NULL;
- }
- if (img_shifted) {
- img_shifted->monochrome = img->monochrome;
- }
- if (!img_shifted) {
- img_shifted =
- aom_img_alloc(NULL, shifted_fmt, img->d_w, img->d_h, 16);
- img_shifted->bit_depth = output_bit_depth;
- img_shifted->monochrome = img->monochrome;
- img_shifted->csp = img->csp;
- }
- if (output_bit_depth > img->bit_depth) {
- aom_img_upshift(img_shifted, img,
- output_bit_depth - img->bit_depth);
- } else {
- aom_img_downshift(img_shifted, img,
- img->bit_depth - output_bit_depth);
- }
- img = img_shifted;
- }
- }
-
- aom_input_ctx.width = img->d_w;
- aom_input_ctx.height = img->d_h;
-
- int num_planes = (opt_raw && img->monochrome) ? 1 : 3;
- if (single_file) {
- if (use_y4m) {
- char y4m_buf[Y4M_BUFFER_SIZE] = { 0 };
- size_t len = 0;
- if (frame_out == 1) {
- // Y4M file header
- len = y4m_write_file_header(
- y4m_buf, sizeof(y4m_buf), aom_input_ctx.width,
- aom_input_ctx.height, &aom_input_ctx.framerate,
- img->monochrome, img->csp, img->fmt, img->bit_depth);
- if (do_md5) {
- MD5Update(&md5_ctx, (md5byte *)y4m_buf, (unsigned int)len);
- } else {
- fputs(y4m_buf, outfile);
- }
- }
-
- // Y4M frame header
- len = y4m_write_frame_header(y4m_buf, sizeof(y4m_buf));
- if (do_md5) {
- MD5Update(&md5_ctx, (md5byte *)y4m_buf, (unsigned int)len);
- y4m_update_image_md5(img, planes, &md5_ctx);
- } else {
- fputs(y4m_buf, outfile);
- y4m_write_image_file(img, planes, outfile);
- }
- } else {
- if (frame_out == 1) {
- // Check if --yv12 or --i420 options are consistent with the
- // bit-stream decoded
- if (opt_i420) {
- if (img->fmt != AOM_IMG_FMT_I420 &&
- img->fmt != AOM_IMG_FMT_I42016) {
- fprintf(stderr,
- "Cannot produce i420 output for bit-stream.\n");
- goto fail;
- }
- }
- if (opt_yv12) {
- if ((img->fmt != AOM_IMG_FMT_I420 &&
- img->fmt != AOM_IMG_FMT_YV12) ||
- img->bit_depth != 8) {
- fprintf(stderr,
- "Cannot produce yv12 output for bit-stream.\n");
- goto fail;
- }
- }
- }
- if (do_md5) {
- raw_update_image_md5(img, planes, num_planes, &md5_ctx);
- } else {
- raw_write_image_file(img, planes, num_planes, outfile);
- }
- }
- } else {
- generate_filename(outfile_pattern, outfile_name, PATH_MAX, img->d_w,
- img->d_h, frame_in);
- if (do_md5) {
- MD5Init(&md5_ctx);
- if (use_y4m) {
- y4m_update_image_md5(img, planes, &md5_ctx);
- } else {
- raw_update_image_md5(img, planes, num_planes, &md5_ctx);
- }
- MD5Final(md5_digest, &md5_ctx);
- print_md5(md5_digest, outfile_name);
- } else {
- outfile = open_outfile(outfile_name);
- if (use_y4m) {
- y4m_write_image_file(img, planes, outfile);
- } else {
- raw_write_image_file(img, planes, num_planes, outfile);
- }
- fclose(outfile);
- }
- }
- }
- }
- }
-
- if (summary || progress) {
- show_progress(frame_in, frame_out, dx_time);
- fprintf(stderr, "\n");
- }
-
- if (frames_corrupted) {
- fprintf(stderr, "WARNING: %d frames corrupted.\n", frames_corrupted);
- } else {
- ret = EXIT_SUCCESS;
- }
-
-fail:
-
- if (aom_codec_destroy(&decoder)) {
- fprintf(stderr, "Failed to destroy decoder: %s\n",
- aom_codec_error(&decoder));
- }
-
-fail2:
-
- if (!noblit && single_file) {
- if (do_md5) {
- MD5Final(md5_digest, &md5_ctx);
- print_md5(md5_digest, outfile_name);
- } else {
- fclose(outfile);
- }
- }
-
-#if CONFIG_WEBM_IO
- if (input.aom_input_ctx->file_type == FILE_TYPE_WEBM)
- webm_free(input.webm_ctx);
-#endif
- if (input.aom_input_ctx->file_type == FILE_TYPE_OBU)
- obudec_free(input.obu_ctx);
-
- if (input.aom_input_ctx->file_type != FILE_TYPE_WEBM) free(buf);
-
- if (scaled_img) aom_img_free(scaled_img);
- if (img_shifted) aom_img_free(img_shifted);
-
- for (i = 0; i < ext_fb_list.num_external_frame_buffers; ++i) {
- free(ext_fb_list.ext_fb[i].data);
- }
- free(ext_fb_list.ext_fb);
-
- fclose(infile);
- if (framestats_file) fclose(framestats_file);
-
- free(argv);
-
- return ret;
-}
-
-int main(int argc, const char **argv_) {
- unsigned int loops = 1, i;
- char **argv, **argi, **argj;
- struct arg arg;
- int error = 0;
-
- argv = argv_dup(argc - 1, argv_ + 1);
- for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
- memset(&arg, 0, sizeof(arg));
- arg.argv_step = 1;
-
- if (arg_match(&arg, &looparg, argi)) {
- loops = arg_parse_uint(&arg);
- break;
- }
- }
- free(argv);
- for (i = 0; !error && i < loops; i++) error = main_loop(argc, argv_);
- return error;
-}
diff --git a/third_party/aom/apps/aomenc.c b/third_party/aom/apps/aomenc.c
deleted file mode 100644
index 2e5d35cfe..000000000
--- a/third_party/aom/apps/aomenc.c
+++ /dev/null
@@ -1,2391 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "apps/aomenc.h"
-
-#include "config/aom_config.h"
-
-#include <assert.h>
-#include <limits.h>
-#include <math.h>
-#include <stdarg.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#if CONFIG_AV1_DECODER
-#include "aom/aom_decoder.h"
-#include "aom/aomdx.h"
-#endif
-
-#include "aom/aom_encoder.h"
-#include "aom/aom_integer.h"
-#include "aom/aomcx.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_ports/aom_timer.h"
-#include "aom_ports/mem_ops.h"
-#include "common/args.h"
-#include "common/ivfenc.h"
-#include "common/tools_common.h"
-#include "common/warnings.h"
-
-#if CONFIG_WEBM_IO
-#include "common/webmenc.h"
-#endif
-
-#include "common/y4minput.h"
-#include "examples/encoder_util.h"
-#include "stats/aomstats.h"
-#include "stats/rate_hist.h"
-
-#if CONFIG_LIBYUV
-#include "third_party/libyuv/include/libyuv/scale.h"
-#endif
-
-/* Swallow warnings about unused results of fread/fwrite */
-static size_t wrap_fread(void *ptr, size_t size, size_t nmemb, FILE *stream) {
- return fread(ptr, size, nmemb, stream);
-}
-#define fread wrap_fread
-
-static size_t wrap_fwrite(const void *ptr, size_t size, size_t nmemb,
- FILE *stream) {
- return fwrite(ptr, size, nmemb, stream);
-}
-#define fwrite wrap_fwrite
-
-static const char *exec_name;
-
-static void warn_or_exit_on_errorv(aom_codec_ctx_t *ctx, int fatal,
- const char *s, va_list ap) {
- if (ctx->err) {
- const char *detail = aom_codec_error_detail(ctx);
-
- vfprintf(stderr, s, ap);
- fprintf(stderr, ": %s\n", aom_codec_error(ctx));
-
- if (detail) fprintf(stderr, " %s\n", detail);
-
- if (fatal) exit(EXIT_FAILURE);
- }
-}
-
-static void ctx_exit_on_error(aom_codec_ctx_t *ctx, const char *s, ...) {
- va_list ap;
-
- va_start(ap, s);
- warn_or_exit_on_errorv(ctx, 1, s, ap);
- va_end(ap);
-}
-
-static void warn_or_exit_on_error(aom_codec_ctx_t *ctx, int fatal,
- const char *s, ...) {
- va_list ap;
-
- va_start(ap, s);
- warn_or_exit_on_errorv(ctx, fatal, s, ap);
- va_end(ap);
-}
-
-static int read_frame(struct AvxInputContext *input_ctx, aom_image_t *img) {
- FILE *f = input_ctx->file;
- y4m_input *y4m = &input_ctx->y4m;
- int shortread = 0;
-
- if (input_ctx->file_type == FILE_TYPE_Y4M) {
- if (y4m_input_fetch_frame(y4m, f, img) < 1) return 0;
- } else {
- shortread = read_yuv_frame(input_ctx, img);
- }
-
- return !shortread;
-}
-
-static int file_is_y4m(const char detect[4]) {
- if (memcmp(detect, "YUV4", 4) == 0) {
- return 1;
- }
- return 0;
-}
-
-static int fourcc_is_ivf(const char detect[4]) {
- if (memcmp(detect, "DKIF", 4) == 0) {
- return 1;
- }
- return 0;
-}
-
-static const arg_def_t help =
- ARG_DEF(NULL, "help", 0, "Show usage options and exit");
-static const arg_def_t debugmode =
- ARG_DEF("D", "debug", 0, "Debug mode (makes output deterministic)");
-static const arg_def_t outputfile =
- ARG_DEF("o", "output", 1, "Output filename");
-static const arg_def_t use_yv12 =
- ARG_DEF(NULL, "yv12", 0, "Input file is YV12 ");
-static const arg_def_t use_i420 =
- ARG_DEF(NULL, "i420", 0, "Input file is I420 (default)");
-static const arg_def_t use_i422 =
- ARG_DEF(NULL, "i422", 0, "Input file is I422");
-static const arg_def_t use_i444 =
- ARG_DEF(NULL, "i444", 0, "Input file is I444");
-static const arg_def_t codecarg = ARG_DEF(NULL, "codec", 1, "Codec to use");
-static const arg_def_t passes =
- ARG_DEF("p", "passes", 1, "Number of passes (1/2)");
-static const arg_def_t pass_arg =
- ARG_DEF(NULL, "pass", 1, "Pass to execute (1/2)");
-static const arg_def_t fpf_name =
- ARG_DEF(NULL, "fpf", 1, "First pass statistics file name");
-#if CONFIG_FP_MB_STATS
-static const arg_def_t fpmbf_name =
- ARG_DEF(NULL, "fpmbf", 1, "First pass block statistics file name");
-#endif
-static const arg_def_t limit =
- ARG_DEF(NULL, "limit", 1, "Stop encoding after n input frames");
-static const arg_def_t skip =
- ARG_DEF(NULL, "skip", 1, "Skip the first n input frames");
-static const arg_def_t good_dl =
- ARG_DEF(NULL, "good", 0, "Use Good Quality Deadline");
-static const arg_def_t quietarg =
- ARG_DEF("q", "quiet", 0, "Do not print encode progress");
-static const arg_def_t verbosearg =
- ARG_DEF("v", "verbose", 0, "Show encoder parameters");
-static const arg_def_t psnrarg =
- ARG_DEF(NULL, "psnr", 0, "Show PSNR in status line");
-#if CONFIG_FILEOPTIONS
-static const arg_def_t use_cfg = ARG_DEF("c", "cfg", 1, "Config file to use");
-static const arg_def_t ext_partition =
- ARG_DEF(NULL, "ext-partition", 1, "corresponds to extended partitions");
-#endif
-
-static const struct arg_enum_list test_decode_enum[] = {
- { "off", TEST_DECODE_OFF },
- { "fatal", TEST_DECODE_FATAL },
- { "warn", TEST_DECODE_WARN },
- { NULL, 0 }
-};
-static const arg_def_t recontest = ARG_DEF_ENUM(
- NULL, "test-decode", 1, "Test encode/decode mismatch", test_decode_enum);
-static const arg_def_t framerate =
- ARG_DEF(NULL, "fps", 1, "Stream frame rate (rate/scale)");
-static const arg_def_t use_webm =
- ARG_DEF(NULL, "webm", 0, "Output WebM (default when WebM IO is enabled)");
-static const arg_def_t use_ivf = ARG_DEF(NULL, "ivf", 0, "Output IVF");
-static const arg_def_t use_obu = ARG_DEF(NULL, "obu", 0, "Output OBU");
-static const arg_def_t q_hist_n =
- ARG_DEF(NULL, "q-hist", 1, "Show quantizer histogram (n-buckets)");
-static const arg_def_t rate_hist_n =
- ARG_DEF(NULL, "rate-hist", 1, "Show rate histogram (n-buckets)");
-static const arg_def_t disable_warnings =
- ARG_DEF(NULL, "disable-warnings", 0,
- "Disable warnings about potentially incorrect encode settings.");
-static const arg_def_t disable_warning_prompt =
- ARG_DEF("y", "disable-warning-prompt", 0,
- "Display warnings, but do not prompt user to continue.");
-static const struct arg_enum_list bitdepth_enum[] = {
- { "8", AOM_BITS_8 }, { "10", AOM_BITS_10 }, { "12", AOM_BITS_12 }, { NULL, 0 }
-};
-
-static const arg_def_t bitdeptharg = ARG_DEF_ENUM(
- "b", "bit-depth", 1,
- "Bit depth for codec (8 for version <=1, 10 or 12 for version 2)",
- bitdepth_enum);
-static const arg_def_t inbitdeptharg =
- ARG_DEF(NULL, "input-bit-depth", 1, "Bit depth of input");
-
-static const arg_def_t input_chroma_subsampling_x = ARG_DEF(
- NULL, "input-chroma-subsampling-x", 1, "chroma subsampling x value.");
-static const arg_def_t input_chroma_subsampling_y = ARG_DEF(
- NULL, "input-chroma-subsampling-y", 1, "chroma subsampling y value.");
-
-static const arg_def_t *main_args[] = { &help,
-#if CONFIG_FILEOPTIONS
- &use_cfg,
-#endif
- &debugmode,
- &outputfile,
- &codecarg,
- &passes,
- &pass_arg,
- &fpf_name,
- &limit,
- &skip,
- &good_dl,
- &quietarg,
- &verbosearg,
- &psnrarg,
- &use_webm,
- &use_ivf,
- &use_obu,
- &q_hist_n,
- &rate_hist_n,
- &disable_warnings,
- &disable_warning_prompt,
- &recontest,
- NULL };
-
-static const arg_def_t usage =
- ARG_DEF("u", "usage", 1, "Usage profile number to use");
-static const arg_def_t threads =
- ARG_DEF("t", "threads", 1, "Max number of threads to use");
-static const arg_def_t profile =
- ARG_DEF(NULL, "profile", 1, "Bitstream profile number to use");
-static const arg_def_t width = ARG_DEF("w", "width", 1, "Frame width");
-static const arg_def_t height = ARG_DEF("h", "height", 1, "Frame height");
-static const arg_def_t forced_max_frame_width = ARG_DEF(
- NULL, "forced_max_frame_width", 0, "Maximum frame width value to force");
-static const arg_def_t forced_max_frame_height = ARG_DEF(
- NULL, "forced_max_frame_height", 0, "Maximum frame height value to force");
-#if CONFIG_WEBM_IO
-static const struct arg_enum_list stereo_mode_enum[] = {
- { "mono", STEREO_FORMAT_MONO },
- { "left-right", STEREO_FORMAT_LEFT_RIGHT },
- { "bottom-top", STEREO_FORMAT_BOTTOM_TOP },
- { "top-bottom", STEREO_FORMAT_TOP_BOTTOM },
- { "right-left", STEREO_FORMAT_RIGHT_LEFT },
- { NULL, 0 }
-};
-static const arg_def_t stereo_mode = ARG_DEF_ENUM(
- NULL, "stereo-mode", 1, "Stereo 3D video format", stereo_mode_enum);
-#endif
-static const arg_def_t timebase = ARG_DEF(
- NULL, "timebase", 1, "Output timestamp precision (fractional seconds)");
-static const arg_def_t global_error_resilient =
- ARG_DEF(NULL, "global-error-resilient", 1,
- "Enable global error resiliency features");
-static const arg_def_t lag_in_frames =
- ARG_DEF(NULL, "lag-in-frames", 1, "Max number of frames to lag");
-static const arg_def_t large_scale_tile =
- ARG_DEF(NULL, "large-scale-tile", 1,
- "Large scale tile coding (0: off (default), 1: on)");
-static const arg_def_t monochrome =
- ARG_DEF(NULL, "monochrome", 0, "Monochrome video (no chroma planes)");
-static const arg_def_t full_still_picture_hdr = ARG_DEF(
- NULL, "full-still-picture-hdr", 0, "Use full header for still picture");
-
-static const arg_def_t *global_args[] = { &use_yv12,
- &use_i420,
- &use_i422,
- &use_i444,
- &usage,
- &threads,
- &profile,
- &width,
- &height,
- &forced_max_frame_width,
- &forced_max_frame_height,
-#if CONFIG_WEBM_IO
- &stereo_mode,
-#endif
- &timebase,
- &framerate,
- &global_error_resilient,
- &bitdeptharg,
- &lag_in_frames,
- &large_scale_tile,
- &monochrome,
- &full_still_picture_hdr,
- NULL };
-
-static const arg_def_t dropframe_thresh =
- ARG_DEF(NULL, "drop-frame", 1, "Temporal resampling threshold (buf %)");
-static const arg_def_t resize_mode =
- ARG_DEF(NULL, "resize-mode", 1, "Frame resize mode");
-static const arg_def_t resize_denominator =
- ARG_DEF(NULL, "resize-denominator", 1, "Frame resize denominator");
-static const arg_def_t resize_kf_denominator = ARG_DEF(
- NULL, "resize-kf-denominator", 1, "Frame resize keyframe denominator");
-static const arg_def_t superres_mode =
- ARG_DEF(NULL, "superres-mode", 1, "Frame super-resolution mode");
-static const arg_def_t superres_denominator = ARG_DEF(
- NULL, "superres-denominator", 1, "Frame super-resolution denominator");
-static const arg_def_t superres_kf_denominator =
- ARG_DEF(NULL, "superres-kf-denominator", 1,
- "Frame super-resolution keyframe denominator");
-static const arg_def_t superres_qthresh = ARG_DEF(
- NULL, "superres-qthresh", 1, "Frame super-resolution qindex threshold");
-static const arg_def_t superres_kf_qthresh =
- ARG_DEF(NULL, "superres-kf-qthresh", 1,
- "Frame super-resolution keyframe qindex threshold");
-static const struct arg_enum_list end_usage_enum[] = { { "vbr", AOM_VBR },
- { "cbr", AOM_CBR },
- { "cq", AOM_CQ },
- { "q", AOM_Q },
- { NULL, 0 } };
-static const arg_def_t end_usage =
- ARG_DEF_ENUM(NULL, "end-usage", 1, "Rate control mode", end_usage_enum);
-static const arg_def_t target_bitrate =
- ARG_DEF(NULL, "target-bitrate", 1, "Bitrate (kbps)");
-static const arg_def_t min_quantizer =
- ARG_DEF(NULL, "min-q", 1, "Minimum (best) quantizer");
-static const arg_def_t max_quantizer =
- ARG_DEF(NULL, "max-q", 1, "Maximum (worst) quantizer");
-static const arg_def_t undershoot_pct =
- ARG_DEF(NULL, "undershoot-pct", 1, "Datarate undershoot (min) target (%)");
-static const arg_def_t overshoot_pct =
- ARG_DEF(NULL, "overshoot-pct", 1, "Datarate overshoot (max) target (%)");
-static const arg_def_t buf_sz =
- ARG_DEF(NULL, "buf-sz", 1, "Client buffer size (ms)");
-static const arg_def_t buf_initial_sz =
- ARG_DEF(NULL, "buf-initial-sz", 1, "Client initial buffer size (ms)");
-static const arg_def_t buf_optimal_sz =
- ARG_DEF(NULL, "buf-optimal-sz", 1, "Client optimal buffer size (ms)");
-static const arg_def_t *rc_args[] = { &dropframe_thresh,
- &resize_mode,
- &resize_denominator,
- &resize_kf_denominator,
- &superres_mode,
- &superres_denominator,
- &superres_kf_denominator,
- &superres_qthresh,
- &superres_kf_qthresh,
- &end_usage,
- &target_bitrate,
- &min_quantizer,
- &max_quantizer,
- &undershoot_pct,
- &overshoot_pct,
- &buf_sz,
- &buf_initial_sz,
- &buf_optimal_sz,
- NULL };
-
-static const arg_def_t bias_pct =
- ARG_DEF(NULL, "bias-pct", 1, "CBR/VBR bias (0=CBR, 100=VBR)");
-static const arg_def_t minsection_pct =
- ARG_DEF(NULL, "minsection-pct", 1, "GOP min bitrate (% of target)");
-static const arg_def_t maxsection_pct =
- ARG_DEF(NULL, "maxsection-pct", 1, "GOP max bitrate (% of target)");
-static const arg_def_t *rc_twopass_args[] = { &bias_pct, &minsection_pct,
- &maxsection_pct, NULL };
-static const arg_def_t fwd_kf_enabled =
- ARG_DEF(NULL, "enable-fwd-kf", 1, "Enable forward reference keyframes");
-static const arg_def_t kf_min_dist =
- ARG_DEF(NULL, "kf-min-dist", 1, "Minimum keyframe interval (frames)");
-static const arg_def_t kf_max_dist =
- ARG_DEF(NULL, "kf-max-dist", 1, "Maximum keyframe interval (frames)");
-static const arg_def_t kf_disabled =
- ARG_DEF(NULL, "disable-kf", 0, "Disable keyframe placement");
-static const arg_def_t *kf_args[] = { &fwd_kf_enabled, &kf_min_dist,
- &kf_max_dist, &kf_disabled, NULL };
-static const arg_def_t sframe_dist =
- ARG_DEF(NULL, "sframe-dist", 1, "S-Frame interval (frames)");
-static const arg_def_t sframe_mode =
- ARG_DEF(NULL, "sframe-mode", 1, "S-Frame insertion mode (1..2)");
-static const arg_def_t save_as_annexb =
- ARG_DEF(NULL, "annexb", 1, "Save as Annex-B");
-static const arg_def_t noise_sens =
- ARG_DEF(NULL, "noise-sensitivity", 1, "Noise sensitivity (frames to blur)");
-static const arg_def_t sharpness =
- ARG_DEF(NULL, "sharpness", 1, "Loop filter sharpness (0..7)");
-static const arg_def_t static_thresh =
- ARG_DEF(NULL, "static-thresh", 1, "Motion detection threshold");
-static const arg_def_t auto_altref =
- ARG_DEF(NULL, "auto-alt-ref", 1, "Enable automatic alt reference frames");
-static const arg_def_t arnr_maxframes =
- ARG_DEF(NULL, "arnr-maxframes", 1, "AltRef max frames (0..15)");
-static const arg_def_t arnr_strength =
- ARG_DEF(NULL, "arnr-strength", 1, "AltRef filter strength (0..6)");
-static const struct arg_enum_list tuning_enum[] = {
- { "psnr", AOM_TUNE_PSNR },
- { "ssim", AOM_TUNE_SSIM },
-#ifdef CONFIG_DIST_8X8
- { "cdef-dist", AOM_TUNE_CDEF_DIST },
- { "daala-dist", AOM_TUNE_DAALA_DIST },
-#endif
- { NULL, 0 }
-};
-static const arg_def_t tune_metric =
- ARG_DEF_ENUM(NULL, "tune", 1, "Distortion metric tuned with", tuning_enum);
-static const arg_def_t cq_level =
- ARG_DEF(NULL, "cq-level", 1, "Constant/Constrained Quality level");
-static const arg_def_t max_intra_rate_pct =
- ARG_DEF(NULL, "max-intra-rate", 1, "Max I-frame bitrate (pct)");
-
-#if CONFIG_AV1_ENCODER
-static const arg_def_t cpu_used_av1 =
- ARG_DEF(NULL, "cpu-used", 1, "CPU Used (0..8)");
-static const arg_def_t rowmtarg =
- ARG_DEF(NULL, "row-mt", 1,
- "Enable row based multi-threading (0: off (default), 1: on)");
-static const arg_def_t tile_cols =
- ARG_DEF(NULL, "tile-columns", 1, "Number of tile columns to use, log2");
-static const arg_def_t tile_rows =
- ARG_DEF(NULL, "tile-rows", 1, "Number of tile rows to use, log2");
-static const arg_def_t tile_width =
- ARG_DEF(NULL, "tile-width", 1, "Tile widths (comma separated)");
-static const arg_def_t tile_height =
- ARG_DEF(NULL, "tile-height", 1, "Tile heights (command separated)");
-static const arg_def_t lossless =
- ARG_DEF(NULL, "lossless", 1, "Lossless mode (0: false (default), 1: true)");
-static const arg_def_t enable_cdef =
- ARG_DEF(NULL, "enable-cdef", 1,
- "Enable the constrained directional enhancement filter (0: false, "
- "1: true (default))");
-static const arg_def_t enable_restoration =
- ARG_DEF(NULL, "enable-restoration", 1,
- "Enable the loop restoration filter (0: false, "
- "1: true (default))");
-static const arg_def_t disable_trellis_quant =
- ARG_DEF(NULL, "disable-trellis-quant", 1,
- "Disable trellis optimization of quantized coefficients (0: false ("
- "default) 1: true)");
-static const arg_def_t enable_qm =
- ARG_DEF(NULL, "enable-qm", 1,
- "Enable quantisation matrices (0: false (default), 1: true)");
-static const arg_def_t qm_min = ARG_DEF(
- NULL, "qm-min", 1, "Min quant matrix flatness (0..15), default is 8");
-static const arg_def_t qm_max = ARG_DEF(
- NULL, "qm-max", 1, "Max quant matrix flatness (0..15), default is 15");
-#if CONFIG_DIST_8X8
-static const arg_def_t enable_dist_8x8 =
- ARG_DEF(NULL, "enable-dist-8x8", 1,
- "Enable dist-8x8 (0: false (default), 1: true)");
-#endif // CONFIG_DIST_8X8
-static const arg_def_t num_tg = ARG_DEF(
- NULL, "num-tile-groups", 1, "Maximum number of tile groups, default is 1");
-static const arg_def_t mtu_size =
- ARG_DEF(NULL, "mtu-size", 1,
- "MTU size for a tile group, default is 0 (no MTU targeting), "
- "overrides maximum number of tile groups");
-static const struct arg_enum_list timing_info_enum[] = {
- { "unspecified", AOM_TIMING_UNSPECIFIED },
- { "constant", AOM_TIMING_EQUAL },
- { "model", AOM_TIMING_DEC_MODEL },
- { NULL, 0 }
-};
-static const arg_def_t timing_info =
- ARG_DEF_ENUM(NULL, "timing-info", 1,
- "Signal timing info in the bitstream (model unly works for no "
- "hidden frames, no super-res yet):",
- timing_info_enum);
-static const arg_def_t film_grain_test =
- ARG_DEF(NULL, "film-grain-test", 1,
- "Film grain test vectors (0: none (default), 1: test-1 2: test-2, "
- "... 16: test-16)");
-static const arg_def_t film_grain_table =
- ARG_DEF(NULL, "film-grain-table", 1,
- "Path to file containing film grain parameters");
-#if CONFIG_DENOISE
-static const arg_def_t denoise_noise_level =
- ARG_DEF(NULL, "denoise-noise-level", 1,
- "Amount of noise (from 0 = don't denoise, to 50)");
-static const arg_def_t denoise_block_size =
- ARG_DEF(NULL, "denoise-block-size", 1, "Denoise block size (default = 32)");
-#endif
-static const arg_def_t enable_ref_frame_mvs =
- ARG_DEF(NULL, "enable-ref-frame-mvs", 1,
- "Enable temporal mv prediction (default is 1)");
-static const arg_def_t frame_parallel_decoding =
- ARG_DEF(NULL, "frame-parallel", 1,
- "Enable frame parallel decodability features "
- "(0: false (default), 1: true)");
-static const arg_def_t error_resilient_mode =
- ARG_DEF(NULL, "error-resilient", 1,
- "Enable error resilient features "
- "(0: false (default), 1: true)");
-static const arg_def_t aq_mode = ARG_DEF(
- NULL, "aq-mode", 1,
- "Adaptive quantization mode (0: off (default), 1: variance 2: complexity, "
- "3: cyclic refresh)");
-static const arg_def_t deltaq_mode = ARG_DEF(
- NULL, "deltaq-mode", 1,
- "Delta qindex mode (0: off (default), 1: deltaq 2: deltaq + deltalf)");
-static const arg_def_t frame_periodic_boost =
- ARG_DEF(NULL, "frame-boost", 1,
- "Enable frame periodic boost (0: off (default), 1: on)");
-static const arg_def_t gf_cbr_boost_pct = ARG_DEF(
- NULL, "gf-cbr-boost", 1, "Boost for Golden Frame in CBR mode (pct)");
-static const arg_def_t max_inter_rate_pct =
- ARG_DEF(NULL, "max-inter-rate", 1, "Max P-frame bitrate (pct)");
-static const arg_def_t min_gf_interval = ARG_DEF(
- NULL, "min-gf-interval", 1,
- "min gf/arf frame interval (default 0, indicating in-built behavior)");
-static const arg_def_t max_gf_interval = ARG_DEF(
- NULL, "max-gf-interval", 1,
- "max gf/arf frame interval (default 0, indicating in-built behavior)");
-
-static const struct arg_enum_list color_primaries_enum[] = {
- { "bt709", AOM_CICP_CP_BT_709 },
- { "unspecified", AOM_CICP_CP_UNSPECIFIED },
- { "bt601", AOM_CICP_CP_BT_601 },
- { "bt470m", AOM_CICP_CP_BT_470_M },
- { "bt470bg", AOM_CICP_CP_BT_470_B_G },
- { "smpte240", AOM_CICP_CP_SMPTE_240 },
- { "film", AOM_CICP_CP_GENERIC_FILM },
- { "bt2020", AOM_CICP_CP_BT_2020 },
- { "xyz", AOM_CICP_CP_XYZ },
- { "smpte431", AOM_CICP_CP_SMPTE_431 },
- { "smpte432", AOM_CICP_CP_SMPTE_432 },
- { "ebu3213", AOM_CICP_CP_EBU_3213 },
- { NULL, 0 }
-};
-
-static const arg_def_t input_color_primaries = ARG_DEF_ENUM(
- NULL, "color-primaries", 1,
- "Color primaries (CICP) of input content:", color_primaries_enum);
-
-static const struct arg_enum_list transfer_characteristics_enum[] = {
- { "unspecified", AOM_CICP_CP_UNSPECIFIED },
- { "bt709", AOM_CICP_TC_BT_709 },
- { "bt470m", AOM_CICP_TC_BT_470_M },
- { "bt470bg", AOM_CICP_TC_BT_470_B_G },
- { "bt601", AOM_CICP_TC_BT_601 },
- { "smpte240", AOM_CICP_TC_SMPTE_240 },
- { "lin", AOM_CICP_TC_LINEAR },
- { "log100", AOM_CICP_TC_LOG_100 },
- { "log100sq10", AOM_CICP_TC_LOG_100_SQRT10 },
- { "iec61966", AOM_CICP_TC_IEC_61966 },
- { "bt1361", AOM_CICP_TC_BT_1361 },
- { "srgb", AOM_CICP_TC_SRGB },
- { "bt2020-10bit", AOM_CICP_TC_BT_2020_10_BIT },
- { "bt2020-12bit", AOM_CICP_TC_BT_2020_12_BIT },
- { "smpte2084", AOM_CICP_TC_SMPTE_2084 },
- { "hlg", AOM_CICP_TC_HLG },
- { "smpte428", AOM_CICP_TC_SMPTE_428 },
- { NULL, 0 }
-};
-
-static const arg_def_t input_transfer_characteristics =
- ARG_DEF_ENUM(NULL, "transfer-characteristics", 1,
- "Transfer characteristics (CICP) of input content:",
- transfer_characteristics_enum);
-
-static const struct arg_enum_list matrix_coefficients_enum[] = {
- { "identity", AOM_CICP_MC_IDENTITY },
- { "bt709", AOM_CICP_MC_BT_709 },
- { "unspecified", AOM_CICP_MC_UNSPECIFIED },
- { "fcc73", AOM_CICP_MC_FCC },
- { "bt470bg", AOM_CICP_MC_BT_470_B_G },
- { "bt601", AOM_CICP_MC_BT_601 },
- { "smpte240", AOM_CICP_CP_SMPTE_240 },
- { "ycgco", AOM_CICP_MC_SMPTE_YCGCO },
- { "bt2020ncl", AOM_CICP_MC_BT_2020_NCL },
- { "bt2020cl", AOM_CICP_MC_BT_2020_CL },
- { "smpte2085", AOM_CICP_MC_SMPTE_2085 },
- { "chromncl", AOM_CICP_MC_CHROMAT_NCL },
- { "chromcl", AOM_CICP_MC_CHROMAT_CL },
- { "ictcp", AOM_CICP_MC_ICTCP },
- { NULL, 0 }
-};
-
-static const arg_def_t input_matrix_coefficients = ARG_DEF_ENUM(
- NULL, "matrix-coefficients", 1,
- "Matrix coefficients (CICP) of input content:", matrix_coefficients_enum);
-
-static const struct arg_enum_list chroma_sample_position_enum[] = {
- { "unknown", AOM_CSP_UNKNOWN },
- { "vertical", AOM_CSP_VERTICAL },
- { "colocated", AOM_CSP_COLOCATED },
- { NULL, 0 }
-};
-
-static const arg_def_t input_chroma_sample_position =
- ARG_DEF_ENUM(NULL, "chroma-sample-position", 1,
- "The chroma sample position when chroma 4:2:0 is signaled:",
- chroma_sample_position_enum);
-
-static const struct arg_enum_list tune_content_enum[] = {
- { "default", AOM_CONTENT_DEFAULT },
- { "screen", AOM_CONTENT_SCREEN },
- { NULL, 0 }
-};
-
-static const arg_def_t tune_content = ARG_DEF_ENUM(
- NULL, "tune-content", 1, "Tune content type", tune_content_enum);
-
-static const arg_def_t cdf_update_mode =
- ARG_DEF(NULL, "cdf-update-mode", 1,
- "CDF update mode for entropy coding "
- "(0: no CDF update; 1: update CDF on all frames(default); "
- "2: selectively update CDF on some frames");
-
-static const struct arg_enum_list superblock_size_enum[] = {
- { "dynamic", AOM_SUPERBLOCK_SIZE_DYNAMIC },
- { "64", AOM_SUPERBLOCK_SIZE_64X64 },
- { "128", AOM_SUPERBLOCK_SIZE_128X128 },
- { NULL, 0 }
-};
-static const arg_def_t superblock_size = ARG_DEF_ENUM(
- NULL, "sb-size", 1, "Superblock size to use", superblock_size_enum);
-
-static const arg_def_t *av1_args[] = { &cpu_used_av1,
- &auto_altref,
- &sharpness,
- &static_thresh,
- &rowmtarg,
- &tile_cols,
- &tile_rows,
- &arnr_maxframes,
- &arnr_strength,
- &tune_metric,
- &cq_level,
- &max_intra_rate_pct,
- &max_inter_rate_pct,
- &gf_cbr_boost_pct,
- &lossless,
- &enable_cdef,
- &enable_restoration,
- &disable_trellis_quant,
- &enable_qm,
- &qm_min,
- &qm_max,
-#if CONFIG_DIST_8X8
- &enable_dist_8x8,
-#endif
- &frame_parallel_decoding,
- &error_resilient_mode,
- &aq_mode,
- &deltaq_mode,
- &frame_periodic_boost,
- &noise_sens,
- &tune_content,
- &cdf_update_mode,
- &input_color_primaries,
- &input_transfer_characteristics,
- &input_matrix_coefficients,
- &input_chroma_sample_position,
- &min_gf_interval,
- &max_gf_interval,
- &superblock_size,
- &num_tg,
- &mtu_size,
- &timing_info,
- &film_grain_test,
- &film_grain_table,
-#if CONFIG_DENOISE
- &denoise_noise_level,
- &denoise_block_size,
-#endif
- &enable_ref_frame_mvs,
- &bitdeptharg,
- &inbitdeptharg,
- &input_chroma_subsampling_x,
- &input_chroma_subsampling_y,
- &sframe_dist,
- &sframe_mode,
- &save_as_annexb,
- NULL };
-static const int av1_arg_ctrl_map[] = { AOME_SET_CPUUSED,
- AOME_SET_ENABLEAUTOALTREF,
- AOME_SET_SHARPNESS,
- AOME_SET_STATIC_THRESHOLD,
- AV1E_SET_ROW_MT,
- AV1E_SET_TILE_COLUMNS,
- AV1E_SET_TILE_ROWS,
- AOME_SET_ARNR_MAXFRAMES,
- AOME_SET_ARNR_STRENGTH,
- AOME_SET_TUNING,
- AOME_SET_CQ_LEVEL,
- AOME_SET_MAX_INTRA_BITRATE_PCT,
- AV1E_SET_MAX_INTER_BITRATE_PCT,
- AV1E_SET_GF_CBR_BOOST_PCT,
- AV1E_SET_LOSSLESS,
- AV1E_SET_ENABLE_CDEF,
- AV1E_SET_ENABLE_RESTORATION,
- AV1E_SET_DISABLE_TRELLIS_QUANT,
- AV1E_SET_ENABLE_QM,
- AV1E_SET_QM_MIN,
- AV1E_SET_QM_MAX,
-#if CONFIG_DIST_8X8
- AV1E_SET_ENABLE_DIST_8X8,
-#endif
- AV1E_SET_FRAME_PARALLEL_DECODING,
- AV1E_SET_ERROR_RESILIENT_MODE,
- AV1E_SET_AQ_MODE,
- AV1E_SET_DELTAQ_MODE,
- AV1E_SET_FRAME_PERIODIC_BOOST,
- AV1E_SET_NOISE_SENSITIVITY,
- AV1E_SET_TUNE_CONTENT,
- AV1E_SET_CDF_UPDATE_MODE,
- AV1E_SET_COLOR_PRIMARIES,
- AV1E_SET_TRANSFER_CHARACTERISTICS,
- AV1E_SET_MATRIX_COEFFICIENTS,
- AV1E_SET_CHROMA_SAMPLE_POSITION,
- AV1E_SET_MIN_GF_INTERVAL,
- AV1E_SET_MAX_GF_INTERVAL,
- AV1E_SET_SUPERBLOCK_SIZE,
- AV1E_SET_NUM_TG,
- AV1E_SET_MTU,
- AV1E_SET_TIMING_INFO_TYPE,
- AV1E_SET_FILM_GRAIN_TEST_VECTOR,
- AV1E_SET_FILM_GRAIN_TABLE,
-#if CONFIG_DENOISE
- AV1E_SET_DENOISE_NOISE_LEVEL,
- AV1E_SET_DENOISE_BLOCK_SIZE,
-#endif
- AV1E_SET_ENABLE_REF_FRAME_MVS,
- AV1E_SET_ENABLE_DF,
- AV1E_SET_ENABLE_ORDER_HINT,
- AV1E_SET_ENABLE_JNT_COMP,
- AV1E_SET_ENABLE_SUPERRES,
- 0 };
-#endif // CONFIG_AV1_ENCODER
-
-static const arg_def_t *no_args[] = { NULL };
-
-void show_help(FILE *fout, int shorthelp) {
- fprintf(fout, "Usage: %s <options> -o dst_filename src_filename \n",
- exec_name);
-
- if (shorthelp) {
- fprintf(fout, "Use --help to see the full list of options.\n");
- return;
- }
-
- fprintf(fout, "\nOptions:\n");
- arg_show_usage(fout, main_args);
- fprintf(fout, "\nEncoder Global Options:\n");
- arg_show_usage(fout, global_args);
- fprintf(fout, "\nRate Control Options:\n");
- arg_show_usage(fout, rc_args);
- fprintf(fout, "\nTwopass Rate Control Options:\n");
- arg_show_usage(fout, rc_twopass_args);
- fprintf(fout, "\nKeyframe Placement Options:\n");
- arg_show_usage(fout, kf_args);
-#if CONFIG_AV1_ENCODER
- fprintf(fout, "\nAV1 Specific Options:\n");
- arg_show_usage(fout, av1_args);
-#endif
- fprintf(fout,
- "\nStream timebase (--timebase):\n"
- " The desired precision of timestamps in the output, expressed\n"
- " in fractional seconds. Default is 1/1000.\n");
- fprintf(fout, "\nIncluded encoders:\n\n");
-
- const int num_encoder = get_aom_encoder_count();
- for (int i = 0; i < num_encoder; ++i) {
- const AvxInterface *const encoder = get_aom_encoder_by_index(i);
- const char *defstr = (i == (num_encoder - 1)) ? "(default)" : "";
- fprintf(fout, " %-6s - %s %s\n", encoder->name,
- aom_codec_iface_name(encoder->codec_interface()), defstr);
- }
- fprintf(fout, "\n ");
- fprintf(fout, "Use --codec to switch to a non-default encoder.\n\n");
-}
-
-void usage_exit(void) {
- show_help(stderr, 1);
- exit(EXIT_FAILURE);
-}
-
-#if CONFIG_AV1_ENCODER
-#define ARG_CTRL_CNT_MAX NELEMENTS(av1_arg_ctrl_map)
-#endif
-
-#if !CONFIG_WEBM_IO
-typedef int stereo_format_t;
-struct WebmOutputContext {
- int debug;
-};
-#endif
-
-/* Per-stream configuration */
-struct stream_config {
- struct aom_codec_enc_cfg cfg;
- const char *out_fn;
- const char *stats_fn;
-#if CONFIG_FP_MB_STATS
- const char *fpmb_stats_fn;
-#endif
- stereo_format_t stereo_fmt;
- int arg_ctrls[ARG_CTRL_CNT_MAX][2];
- int arg_ctrl_cnt;
- int write_webm;
- const char *film_grain_filename;
- int write_ivf;
- // whether to use 16bit internal buffers
- int use_16bit_internal;
-};
-
-struct stream_state {
- int index;
- struct stream_state *next;
- struct stream_config config;
- FILE *file;
- struct rate_hist *rate_hist;
- struct WebmOutputContext webm_ctx;
- uint64_t psnr_sse_total;
- uint64_t psnr_samples_total;
- double psnr_totals[4];
- int psnr_count;
- int counts[64];
- aom_codec_ctx_t encoder;
- unsigned int frames_out;
- uint64_t cx_time;
- size_t nbytes;
- stats_io_t stats;
-#if CONFIG_FP_MB_STATS
- stats_io_t fpmb_stats;
-#endif
- struct aom_image *img;
- aom_codec_ctx_t decoder;
- int mismatch_seen;
- unsigned int chroma_subsampling_x;
- unsigned int chroma_subsampling_y;
-};
-
-static void validate_positive_rational(const char *msg,
- struct aom_rational *rat) {
- if (rat->den < 0) {
- rat->num *= -1;
- rat->den *= -1;
- }
-
- if (rat->num < 0) die("Error: %s must be positive\n", msg);
-
- if (!rat->den) die("Error: %s has zero denominator\n", msg);
-}
-
-static void parse_global_config(struct AvxEncoderConfig *global, int *argc,
- char ***argv) {
- char **argi, **argj;
- struct arg arg;
- const int num_encoder = get_aom_encoder_count();
- char **argv_local = (char **)*argv;
-#if CONFIG_FILEOPTIONS
- int argc_local = *argc;
-#endif
- if (num_encoder < 1) die("Error: no valid encoder available\n");
-
- /* Initialize default parameters */
- memset(global, 0, sizeof(*global));
- global->codec = get_aom_encoder_by_index(num_encoder - 1);
- global->passes = 0;
- global->color_type = I420;
-
-#if CONFIG_FILEOPTIONS
- const char *cfg = NULL;
- int cfg_included = 0;
-#endif
- for (argi = argj = argv_local; (*argj = *argi); argi += arg.argv_step) {
- arg.argv_step = 1;
-
-#if CONFIG_FILEOPTIONS
- if (arg_match(&arg, &use_cfg, argi)) {
- if (cfg_included) continue;
- cfg = arg.val;
-
- arg_cfg(&argc_local, &argv_local, cfg);
-
- *argj = *argi = *argv_local;
- argj = argi = argv_local;
- *argv = argv_local;
- cfg_included = 1;
- continue;
- }
-#endif
- if (arg_match(&arg, &help, argi)) {
- show_help(stdout, 0);
- exit(EXIT_SUCCESS);
- } else if (arg_match(&arg, &codecarg, argi)) {
- global->codec = get_aom_encoder_by_name(arg.val);
- if (!global->codec)
- die("Error: Unrecognized argument (%s) to --codec\n", arg.val);
- } else if (arg_match(&arg, &passes, argi)) {
- global->passes = arg_parse_uint(&arg);
-
- if (global->passes < 1 || global->passes > 2)
- die("Error: Invalid number of passes (%d)\n", global->passes);
- } else if (arg_match(&arg, &pass_arg, argi)) {
- global->pass = arg_parse_uint(&arg);
-
- if (global->pass < 1 || global->pass > 2)
- die("Error: Invalid pass selected (%d)\n", global->pass);
- } else if (arg_match(&arg, &usage, argi))
- global->usage = arg_parse_uint(&arg);
- else if (arg_match(&arg, &good_dl, argi))
- warn("Deprecated --good option! Ignoring\n");
- else if (arg_match(&arg, &use_yv12, argi))
- global->color_type = YV12;
- else if (arg_match(&arg, &use_i420, argi))
- global->color_type = I420;
- else if (arg_match(&arg, &use_i422, argi))
- global->color_type = I422;
- else if (arg_match(&arg, &use_i444, argi))
- global->color_type = I444;
- else if (arg_match(&arg, &quietarg, argi))
- global->quiet = 1;
- else if (arg_match(&arg, &verbosearg, argi))
- global->verbose = 1;
- else if (arg_match(&arg, &limit, argi))
- global->limit = arg_parse_uint(&arg);
- else if (arg_match(&arg, &skip, argi))
- global->skip_frames = arg_parse_uint(&arg);
- else if (arg_match(&arg, &psnrarg, argi))
- global->show_psnr = 1;
- else if (arg_match(&arg, &recontest, argi))
- global->test_decode = arg_parse_enum_or_int(&arg);
- else if (arg_match(&arg, &framerate, argi)) {
- global->framerate = arg_parse_rational(&arg);
- validate_positive_rational(arg.name, &global->framerate);
- global->have_framerate = 1;
- } else if (arg_match(&arg, &debugmode, argi))
- global->debug = 1;
- else if (arg_match(&arg, &q_hist_n, argi))
- global->show_q_hist_buckets = arg_parse_uint(&arg);
- else if (arg_match(&arg, &rate_hist_n, argi))
- global->show_rate_hist_buckets = arg_parse_uint(&arg);
- else if (arg_match(&arg, &disable_warnings, argi))
- global->disable_warnings = 1;
- else if (arg_match(&arg, &disable_warning_prompt, argi))
- global->disable_warning_prompt = 1;
- else
- argj++;
- }
-
- if (global->pass) {
- /* DWIM: Assume the user meant passes=2 if pass=2 is specified */
- if (global->pass > global->passes) {
- warn("Assuming --pass=%d implies --passes=%d\n", global->pass,
- global->pass);
- global->passes = global->pass;
- }
- }
- /* Validate global config */
- if (global->passes == 0) {
-#if CONFIG_AV1_ENCODER
- // Make default AV1 passes = 2 until there is a better quality 1-pass
- // encoder
- if (global->codec != NULL && global->codec->name != NULL)
- global->passes = (strcmp(global->codec->name, "av1") == 0) ? 2 : 1;
-#else
- global->passes = 1;
-#endif
- }
-}
-
-static void open_input_file(struct AvxInputContext *input) {
- /* Parse certain options from the input file, if possible */
- input->file = strcmp(input->filename, "-") ? fopen(input->filename, "rb")
- : set_binary_mode(stdin);
-
- if (!input->file) fatal("Failed to open input file");
-
- if (!fseeko(input->file, 0, SEEK_END)) {
- /* Input file is seekable. Figure out how long it is, so we can get
- * progress info.
- */
- input->length = ftello(input->file);
- rewind(input->file);
- }
-
- /* Default to 1:1 pixel aspect ratio. */
- input->pixel_aspect_ratio.numerator = 1;
- input->pixel_aspect_ratio.denominator = 1;
-
- /* For RAW input sources, these bytes will applied on the first frame
- * in read_frame().
- */
- input->detect.buf_read = fread(input->detect.buf, 1, 4, input->file);
- input->detect.position = 0;
-
- if (input->detect.buf_read == 4 && file_is_y4m(input->detect.buf)) {
- if (y4m_input_open(&input->y4m, input->file, input->detect.buf, 4,
- input->only_i420) >= 0) {
- input->file_type = FILE_TYPE_Y4M;
- input->width = input->y4m.pic_w;
- input->height = input->y4m.pic_h;
- input->pixel_aspect_ratio.numerator = input->y4m.par_n;
- input->pixel_aspect_ratio.denominator = input->y4m.par_d;
- input->framerate.numerator = input->y4m.fps_n;
- input->framerate.denominator = input->y4m.fps_d;
- input->fmt = input->y4m.aom_fmt;
- input->bit_depth = input->y4m.bit_depth;
- } else
- fatal("Unsupported Y4M stream.");
- } else if (input->detect.buf_read == 4 && fourcc_is_ivf(input->detect.buf)) {
- fatal("IVF is not supported as input.");
- } else {
- input->file_type = FILE_TYPE_RAW;
- }
-}
-
-static void close_input_file(struct AvxInputContext *input) {
- fclose(input->file);
- if (input->file_type == FILE_TYPE_Y4M) y4m_input_close(&input->y4m);
-}
-
-static struct stream_state *new_stream(struct AvxEncoderConfig *global,
- struct stream_state *prev) {
- struct stream_state *stream;
-
- stream = calloc(1, sizeof(*stream));
- if (stream == NULL) {
- fatal("Failed to allocate new stream.");
- }
-
- if (prev) {
- memcpy(stream, prev, sizeof(*stream));
- stream->index++;
- prev->next = stream;
- } else {
- aom_codec_err_t res;
-
- /* Populate encoder configuration */
- res = aom_codec_enc_config_default(global->codec->codec_interface(),
- &stream->config.cfg, global->usage);
- if (res) fatal("Failed to get config: %s\n", aom_codec_err_to_string(res));
-
- /* Change the default timebase to a high enough value so that the
- * encoder will always create strictly increasing timestamps.
- */
- stream->config.cfg.g_timebase.den = 1000;
-
- /* Never use the library's default resolution, require it be parsed
- * from the file or set on the command line.
- */
- stream->config.cfg.g_w = 0;
- stream->config.cfg.g_h = 0;
-
- /* Initialize remaining stream parameters */
- stream->config.write_webm = 1;
- stream->config.write_ivf = 0;
-
-#if CONFIG_WEBM_IO
- stream->config.stereo_fmt = STEREO_FORMAT_MONO;
- stream->webm_ctx.last_pts_ns = -1;
- stream->webm_ctx.writer = NULL;
- stream->webm_ctx.segment = NULL;
-#endif
-
- /* Allows removal of the application version from the EBML tags */
- stream->webm_ctx.debug = global->debug;
- }
-
- /* Output files must be specified for each stream */
- stream->config.out_fn = NULL;
-
- stream->next = NULL;
- return stream;
-}
-
-static void set_config_arg_ctrls(struct stream_config *config, int key,
- const struct arg *arg) {
- int j;
- if (key == AV1E_SET_FILM_GRAIN_TABLE) {
- config->film_grain_filename = arg->val;
- return;
- }
-
- /* Point either to the next free element or the first instance of this
- * control.
- */
- for (j = 0; j < config->arg_ctrl_cnt; j++)
- if (config->arg_ctrls[j][0] == key) break;
-
- /* Update/insert */
- assert(j < (int)ARG_CTRL_CNT_MAX);
- config->arg_ctrls[j][0] = key;
- config->arg_ctrls[j][1] = arg_parse_enum_or_int(arg);
-
- if (key == AOME_SET_ENABLEAUTOALTREF && config->arg_ctrls[j][1] > 1) {
- warn("auto-alt-ref > 1 is deprecated... setting auto-alt-ref=1\n");
- config->arg_ctrls[j][1] = 1;
- }
- if (j == config->arg_ctrl_cnt) config->arg_ctrl_cnt++;
-}
-
-static int parse_stream_params(struct AvxEncoderConfig *global,
- struct stream_state *stream, char **argv) {
- char **argi, **argj;
- struct arg arg;
- static const arg_def_t **ctrl_args = no_args;
- static const int *ctrl_args_map = NULL;
- struct stream_config *config = &stream->config;
- int eos_mark_found = 0;
- int webm_forced = 0;
-
- // Handle codec specific options
- if (0) {
-#if CONFIG_AV1_ENCODER
- } else if (strcmp(global->codec->name, "av1") == 0) {
- // TODO(jingning): Reuse AV1 specific encoder configuration parameters.
- // Consider to expand this set for AV1 encoder control.
- ctrl_args = av1_args;
- ctrl_args_map = av1_arg_ctrl_map;
-#endif
- }
-
- for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
- arg.argv_step = 1;
-
- /* Once we've found an end-of-stream marker (--) we want to continue
- * shifting arguments but not consuming them.
- */
- if (eos_mark_found) {
- argj++;
- continue;
- } else if (!strcmp(*argj, "--")) {
- eos_mark_found = 1;
- continue;
- }
-
- if (arg_match(&arg, &outputfile, argi)) {
- config->out_fn = arg.val;
- if (!webm_forced) {
- const size_t out_fn_len = strlen(config->out_fn);
- if (out_fn_len >= 4 &&
- !strcmp(config->out_fn + out_fn_len - 4, ".ivf")) {
- config->write_webm = 0;
- config->write_ivf = 1;
- } else if (out_fn_len >= 4 &&
- !strcmp(config->out_fn + out_fn_len - 4, ".obu")) {
- config->write_webm = 0;
- config->write_ivf = 0;
- }
- }
- } else if (arg_match(&arg, &fpf_name, argi)) {
- config->stats_fn = arg.val;
-#if CONFIG_FP_MB_STATS
- } else if (arg_match(&arg, &fpmbf_name, argi)) {
- config->fpmb_stats_fn = arg.val;
-#endif
- } else if (arg_match(&arg, &use_webm, argi)) {
-#if CONFIG_WEBM_IO
- config->write_webm = 1;
- webm_forced = 1;
-#else
- die("Error: --webm specified but webm is disabled.");
-#endif
- } else if (arg_match(&arg, &use_ivf, argi)) {
- config->write_webm = 0;
- config->write_ivf = 1;
- } else if (arg_match(&arg, &use_obu, argi)) {
- config->write_webm = 0;
- config->write_ivf = 0;
- } else if (arg_match(&arg, &threads, argi)) {
- config->cfg.g_threads = arg_parse_uint(&arg);
- } else if (arg_match(&arg, &profile, argi)) {
- config->cfg.g_profile = arg_parse_uint(&arg);
- } else if (arg_match(&arg, &width, argi)) {
- config->cfg.g_w = arg_parse_uint(&arg);
- } else if (arg_match(&arg, &height, argi)) {
- config->cfg.g_h = arg_parse_uint(&arg);
- } else if (arg_match(&arg, &forced_max_frame_width, argi)) {
- config->cfg.g_forced_max_frame_width = arg_parse_uint(&arg);
- } else if (arg_match(&arg, &forced_max_frame_height, argi)) {
- config->cfg.g_forced_max_frame_height = arg_parse_uint(&arg);
- } else if (arg_match(&arg, &bitdeptharg, argi)) {
- config->cfg.g_bit_depth = arg_parse_enum_or_int(&arg);
- } else if (arg_match(&arg, &inbitdeptharg, argi)) {
- config->cfg.g_input_bit_depth = arg_parse_uint(&arg);
- } else if (arg_match(&arg, &input_chroma_subsampling_x, argi)) {
- stream->chroma_subsampling_x = arg_parse_uint(&arg);
- } else if (arg_match(&arg, &input_chroma_subsampling_y, argi)) {
- stream->chroma_subsampling_y = arg_parse_uint(&arg);
-#if CONFIG_WEBM_IO
- } else if (arg_match(&arg, &stereo_mode, argi)) {
- config->stereo_fmt = arg_parse_enum_or_int(&arg);
-#endif
- } else if (arg_match(&arg, &timebase, argi)) {
- config->cfg.g_timebase = arg_parse_rational(&arg);
- validate_positive_rational(arg.name, &config->cfg.g_timebase);
- } else if (arg_match(&arg, &global_error_resilient, argi)) {
- config->cfg.g_error_resilient = arg_parse_uint(&arg);
- } else if (arg_match(&arg, &lag_in_frames, argi)) {
- config->cfg.g_lag_in_frames = arg_parse_uint(&arg);
- } else if (arg_match(&arg, &large_scale_tile, argi)) {
- config->cfg.large_scale_tile = arg_parse_uint(&arg);
- } else if (arg_match(&arg, &monochrome, argi)) {
- config->cfg.monochrome = 1;
- } else if (arg_match(&arg, &full_still_picture_hdr, argi)) {
- config->cfg.full_still_picture_hdr = 1;
- } else if (arg_match(&arg, &dropframe_thresh, argi)) {
- config->cfg.rc_dropframe_thresh = arg_parse_uint(&arg);
- } else if (arg_match(&arg, &resize_mode, argi)) {
- config->cfg.rc_resize_mode = arg_parse_uint(&arg);
- } else if (arg_match(&arg, &resize_denominator, argi)) {
- config->cfg.rc_resize_denominator = arg_parse_uint(&arg);
- } else if (arg_match(&arg, &resize_kf_denominator, argi)) {
- config->cfg.rc_resize_kf_denominator = arg_parse_uint(&arg);
- } else if (arg_match(&arg, &superres_mode, argi)) {
- config->cfg.rc_superres_mode = arg_parse_uint(&arg);
- } else if (arg_match(&arg, &superres_denominator, argi)) {
- config->cfg.rc_superres_denominator = arg_parse_uint(&arg);
- } else if (arg_match(&arg, &superres_kf_denominator, argi)) {
- config->cfg.rc_superres_kf_denominator = arg_parse_uint(&arg);
- } else if (arg_match(&arg, &superres_qthresh, argi)) {
- config->cfg.rc_superres_qthresh = arg_parse_uint(&arg);
- } else if (arg_match(&arg, &superres_kf_qthresh, argi)) {
- config->cfg.rc_superres_kf_qthresh = arg_parse_uint(&arg);
- } else if (arg_match(&arg, &end_usage, argi)) {
- config->cfg.rc_end_usage = arg_parse_enum_or_int(&arg);
- } else if (arg_match(&arg, &target_bitrate, argi)) {
- config->cfg.rc_target_bitrate = arg_parse_uint(&arg);
- } else if (arg_match(&arg, &min_quantizer, argi)) {
- config->cfg.rc_min_quantizer = arg_parse_uint(&arg);
- } else if (arg_match(&arg, &max_quantizer, argi)) {
- config->cfg.rc_max_quantizer = arg_parse_uint(&arg);
- } else if (arg_match(&arg, &undershoot_pct, argi)) {
- config->cfg.rc_undershoot_pct = arg_parse_uint(&arg);
- } else if (arg_match(&arg, &overshoot_pct, argi)) {
- config->cfg.rc_overshoot_pct = arg_parse_uint(&arg);
- } else if (arg_match(&arg, &buf_sz, argi)) {
- config->cfg.rc_buf_sz = arg_parse_uint(&arg);
- } else if (arg_match(&arg, &buf_initial_sz, argi)) {
- config->cfg.rc_buf_initial_sz = arg_parse_uint(&arg);
- } else if (arg_match(&arg, &buf_optimal_sz, argi)) {
- config->cfg.rc_buf_optimal_sz = arg_parse_uint(&arg);
- } else if (arg_match(&arg, &bias_pct, argi)) {
- config->cfg.rc_2pass_vbr_bias_pct = arg_parse_uint(&arg);
- if (global->passes < 2)
- warn("option %s ignored in one-pass mode.\n", arg.name);
- } else if (arg_match(&arg, &minsection_pct, argi)) {
- config->cfg.rc_2pass_vbr_minsection_pct = arg_parse_uint(&arg);
-
- if (global->passes < 2)
- warn("option %s ignored in one-pass mode.\n", arg.name);
- } else if (arg_match(&arg, &maxsection_pct, argi)) {
- config->cfg.rc_2pass_vbr_maxsection_pct = arg_parse_uint(&arg);
-
- if (global->passes < 2)
- warn("option %s ignored in one-pass mode.\n", arg.name);
- } else if (arg_match(&arg, &fwd_kf_enabled, argi)) {
- config->cfg.fwd_kf_enabled = arg_parse_uint(&arg);
- } else if (arg_match(&arg, &kf_min_dist, argi)) {
- config->cfg.kf_min_dist = arg_parse_uint(&arg);
- } else if (arg_match(&arg, &kf_max_dist, argi)) {
- config->cfg.kf_max_dist = arg_parse_uint(&arg);
- } else if (arg_match(&arg, &kf_disabled, argi)) {
- config->cfg.kf_mode = AOM_KF_DISABLED;
- } else if (arg_match(&arg, &sframe_dist, argi)) {
- config->cfg.sframe_dist = arg_parse_uint(&arg);
- } else if (arg_match(&arg, &sframe_mode, argi)) {
- config->cfg.sframe_mode = arg_parse_uint(&arg);
- } else if (arg_match(&arg, &save_as_annexb, argi)) {
- config->cfg.save_as_annexb = arg_parse_uint(&arg);
- } else if (arg_match(&arg, &tile_width, argi)) {
- config->cfg.tile_width_count =
- arg_parse_list(&arg, config->cfg.tile_widths, MAX_TILE_WIDTHS);
- } else if (arg_match(&arg, &tile_height, argi)) {
- config->cfg.tile_height_count =
- arg_parse_list(&arg, config->cfg.tile_heights, MAX_TILE_HEIGHTS);
-#if CONFIG_FILEOPTIONS
- } else if (arg_match(&arg, &ext_partition, argi)) {
- config->cfg.cfg.ext_partition = !!arg_parse_uint(&arg) > 0;
-#endif
- } else {
- int i, match = 0;
- for (i = 0; ctrl_args[i]; i++) {
- if (arg_match(&arg, ctrl_args[i], argi)) {
- match = 1;
- if (ctrl_args_map) {
- set_config_arg_ctrls(config, ctrl_args_map[i], &arg);
- }
- }
- }
- if (!match) argj++;
- }
- }
- config->use_16bit_internal =
- config->cfg.g_bit_depth > AOM_BITS_8 || !CONFIG_LOWBITDEPTH;
- return eos_mark_found;
-}
-
-#define FOREACH_STREAM(iterator, list) \
- for (struct stream_state *iterator = list; iterator; \
- iterator = iterator->next)
-
-static void validate_stream_config(const struct stream_state *stream,
- const struct AvxEncoderConfig *global) {
- const struct stream_state *streami;
- (void)global;
-
- if (!stream->config.cfg.g_w || !stream->config.cfg.g_h)
- fatal(
- "Stream %d: Specify stream dimensions with --width (-w) "
- " and --height (-h)",
- stream->index);
-
- // Check that the codec bit depth is greater than the input bit depth.
- if (stream->config.cfg.g_input_bit_depth >
- (unsigned int)stream->config.cfg.g_bit_depth) {
- fatal("Stream %d: codec bit depth (%d) less than input bit depth (%d)",
- stream->index, (int)stream->config.cfg.g_bit_depth,
- stream->config.cfg.g_input_bit_depth);
- }
-
- for (streami = stream; streami; streami = streami->next) {
- /* All streams require output files */
- if (!streami->config.out_fn)
- fatal("Stream %d: Output file is required (specify with -o)",
- streami->index);
-
- /* Check for two streams outputting to the same file */
- if (streami != stream) {
- const char *a = stream->config.out_fn;
- const char *b = streami->config.out_fn;
- if (!strcmp(a, b) && strcmp(a, "/dev/null") && strcmp(a, ":nul"))
- fatal("Stream %d: duplicate output file (from stream %d)",
- streami->index, stream->index);
- }
-
- /* Check for two streams sharing a stats file. */
- if (streami != stream) {
- const char *a = stream->config.stats_fn;
- const char *b = streami->config.stats_fn;
- if (a && b && !strcmp(a, b))
- fatal("Stream %d: duplicate stats file (from stream %d)",
- streami->index, stream->index);
- }
-
-#if CONFIG_FP_MB_STATS
- /* Check for two streams sharing a mb stats file. */
- if (streami != stream) {
- const char *a = stream->config.fpmb_stats_fn;
- const char *b = streami->config.fpmb_stats_fn;
- if (a && b && !strcmp(a, b))
- fatal("Stream %d: duplicate mb stats file (from stream %d)",
- streami->index, stream->index);
- }
-#endif
- }
-}
-
-static void set_stream_dimensions(struct stream_state *stream, unsigned int w,
- unsigned int h) {
- if (!stream->config.cfg.g_w) {
- if (!stream->config.cfg.g_h)
- stream->config.cfg.g_w = w;
- else
- stream->config.cfg.g_w = w * stream->config.cfg.g_h / h;
- }
- if (!stream->config.cfg.g_h) {
- stream->config.cfg.g_h = h * stream->config.cfg.g_w / w;
- }
-}
-
-static const char *file_type_to_string(enum VideoFileType t) {
- switch (t) {
- case FILE_TYPE_RAW: return "RAW";
- case FILE_TYPE_Y4M: return "Y4M";
- default: return "Other";
- }
-}
-
-static const char *image_format_to_string(aom_img_fmt_t f) {
- switch (f) {
- case AOM_IMG_FMT_I420: return "I420";
- case AOM_IMG_FMT_I422: return "I422";
- case AOM_IMG_FMT_I444: return "I444";
- case AOM_IMG_FMT_YV12: return "YV12";
- case AOM_IMG_FMT_I42016: return "I42016";
- case AOM_IMG_FMT_I42216: return "I42216";
- case AOM_IMG_FMT_I44416: return "I44416";
- default: return "Other";
- }
-}
-
-static void show_stream_config(struct stream_state *stream,
- struct AvxEncoderConfig *global,
- struct AvxInputContext *input) {
-#define SHOW(field) \
- fprintf(stderr, " %-28s = %d\n", #field, stream->config.cfg.field)
-
- if (stream->index == 0) {
- fprintf(stderr, "Codec: %s\n",
- aom_codec_iface_name(global->codec->codec_interface()));
- fprintf(stderr, "Source file: %s File Type: %s Format: %s\n",
- input->filename, file_type_to_string(input->file_type),
- image_format_to_string(input->fmt));
- }
- if (stream->next || stream->index)
- fprintf(stderr, "\nStream Index: %d\n", stream->index);
- fprintf(stderr, "Destination file: %s\n", stream->config.out_fn);
- fprintf(stderr, "Coding path: %s\n",
- stream->config.use_16bit_internal ? "HBD" : "LBD");
- fprintf(stderr, "Encoder parameters:\n");
-
- SHOW(g_usage);
- SHOW(g_threads);
- SHOW(g_profile);
- SHOW(g_w);
- SHOW(g_h);
- SHOW(g_bit_depth);
- SHOW(g_input_bit_depth);
- SHOW(g_timebase.num);
- SHOW(g_timebase.den);
- SHOW(g_error_resilient);
- SHOW(g_pass);
- SHOW(g_lag_in_frames);
- SHOW(large_scale_tile);
- SHOW(rc_dropframe_thresh);
- SHOW(rc_resize_mode);
- SHOW(rc_resize_denominator);
- SHOW(rc_resize_kf_denominator);
- SHOW(rc_superres_mode);
- SHOW(rc_superres_denominator);
- SHOW(rc_superres_kf_denominator);
- SHOW(rc_superres_qthresh);
- SHOW(rc_superres_kf_qthresh);
- SHOW(rc_end_usage);
- SHOW(rc_target_bitrate);
- SHOW(rc_min_quantizer);
- SHOW(rc_max_quantizer);
- SHOW(rc_undershoot_pct);
- SHOW(rc_overshoot_pct);
- SHOW(rc_buf_sz);
- SHOW(rc_buf_initial_sz);
- SHOW(rc_buf_optimal_sz);
- SHOW(rc_2pass_vbr_bias_pct);
- SHOW(rc_2pass_vbr_minsection_pct);
- SHOW(rc_2pass_vbr_maxsection_pct);
- SHOW(fwd_kf_enabled);
- SHOW(kf_mode);
- SHOW(kf_min_dist);
- SHOW(kf_max_dist);
-}
-
-static void open_output_file(struct stream_state *stream,
- struct AvxEncoderConfig *global,
- const struct AvxRational *pixel_aspect_ratio) {
- const char *fn = stream->config.out_fn;
- const struct aom_codec_enc_cfg *const cfg = &stream->config.cfg;
-
- if (cfg->g_pass == AOM_RC_FIRST_PASS) return;
-
- stream->file = strcmp(fn, "-") ? fopen(fn, "wb") : set_binary_mode(stdout);
-
- if (!stream->file) fatal("Failed to open output file");
-
- if (stream->config.write_webm && fseek(stream->file, 0, SEEK_CUR))
- fatal("WebM output to pipes not supported.");
-
-#if CONFIG_WEBM_IO
- if (stream->config.write_webm) {
- stream->webm_ctx.stream = stream->file;
- write_webm_file_header(&stream->webm_ctx, cfg, stream->config.stereo_fmt,
- global->codec->fourcc, pixel_aspect_ratio);
- }
-#else
- (void)pixel_aspect_ratio;
-#endif
-
- if (!stream->config.write_webm && stream->config.write_ivf) {
- ivf_write_file_header(stream->file, cfg, global->codec->fourcc, 0);
- }
-}
-
-static void close_output_file(struct stream_state *stream,
- unsigned int fourcc) {
- const struct aom_codec_enc_cfg *const cfg = &stream->config.cfg;
-
- if (cfg->g_pass == AOM_RC_FIRST_PASS) return;
-
-#if CONFIG_WEBM_IO
- if (stream->config.write_webm) {
- write_webm_file_footer(&stream->webm_ctx);
- }
-#endif
-
- if (!stream->config.write_webm && stream->config.write_ivf) {
- if (!fseek(stream->file, 0, SEEK_SET))
- ivf_write_file_header(stream->file, &stream->config.cfg, fourcc,
- stream->frames_out);
- }
-
- fclose(stream->file);
-}
-
-static void setup_pass(struct stream_state *stream,
- struct AvxEncoderConfig *global, int pass) {
- if (stream->config.stats_fn) {
- if (!stats_open_file(&stream->stats, stream->config.stats_fn, pass))
- fatal("Failed to open statistics store");
- } else {
- if (!stats_open_mem(&stream->stats, pass))
- fatal("Failed to open statistics store");
- }
-
-#if CONFIG_FP_MB_STATS
- if (stream->config.fpmb_stats_fn) {
- if (!stats_open_file(&stream->fpmb_stats, stream->config.fpmb_stats_fn,
- pass))
- fatal("Failed to open mb statistics store");
- } else {
- if (!stats_open_mem(&stream->fpmb_stats, pass))
- fatal("Failed to open mb statistics store");
- }
-#endif
-
- stream->config.cfg.g_pass = global->passes == 2
- ? pass ? AOM_RC_LAST_PASS : AOM_RC_FIRST_PASS
- : AOM_RC_ONE_PASS;
- if (pass) {
- stream->config.cfg.rc_twopass_stats_in = stats_get(&stream->stats);
-#if CONFIG_FP_MB_STATS
- stream->config.cfg.rc_firstpass_mb_stats_in =
- stats_get(&stream->fpmb_stats);
-#endif
- }
-
- stream->cx_time = 0;
- stream->nbytes = 0;
- stream->frames_out = 0;
-}
-
-static void initialize_encoder(struct stream_state *stream,
- struct AvxEncoderConfig *global) {
- int i;
- int flags = 0;
-
- flags |= global->show_psnr ? AOM_CODEC_USE_PSNR : 0;
- flags |= stream->config.use_16bit_internal ? AOM_CODEC_USE_HIGHBITDEPTH : 0;
-
- /* Construct Encoder Context */
- aom_codec_enc_init(&stream->encoder, global->codec->codec_interface(),
- &stream->config.cfg, flags);
- ctx_exit_on_error(&stream->encoder, "Failed to initialize encoder");
-
- /* Note that we bypass the aom_codec_control wrapper macro because
- * we're being clever to store the control IDs in an array. Real
- * applications will want to make use of the enumerations directly
- */
- for (i = 0; i < stream->config.arg_ctrl_cnt; i++) {
- int ctrl = stream->config.arg_ctrls[i][0];
- int value = stream->config.arg_ctrls[i][1];
- if (aom_codec_control_(&stream->encoder, ctrl, value))
- fprintf(stderr, "Error: Tried to set control %d = %d\n", ctrl, value);
-
- ctx_exit_on_error(&stream->encoder, "Failed to control codec");
- }
- if (stream->config.film_grain_filename) {
- aom_codec_control_(&stream->encoder, AV1E_SET_FILM_GRAIN_TABLE,
- stream->config.film_grain_filename);
- }
-
-#if CONFIG_AV1_DECODER
- if (global->test_decode != TEST_DECODE_OFF) {
- const AvxInterface *decoder = get_aom_decoder_by_name(global->codec->name);
- aom_codec_dec_cfg_t cfg = { 0, 0, 0, CONFIG_LOWBITDEPTH, { 1 } };
- aom_codec_dec_init(&stream->decoder, decoder->codec_interface(), &cfg, 0);
-
- if (strcmp(global->codec->name, "av1") == 0) {
- aom_codec_control(&stream->decoder, AV1_SET_TILE_MODE,
- stream->config.cfg.large_scale_tile);
- ctx_exit_on_error(&stream->decoder, "Failed to set decode_tile_mode");
-
- aom_codec_control(&stream->decoder, AV1D_SET_IS_ANNEXB,
- stream->config.cfg.save_as_annexb);
- ctx_exit_on_error(&stream->decoder, "Failed to set is_annexb");
-
- aom_codec_control(&stream->decoder, AV1_SET_DECODE_TILE_ROW, -1);
- ctx_exit_on_error(&stream->decoder, "Failed to set decode_tile_row");
-
- aom_codec_control(&stream->decoder, AV1_SET_DECODE_TILE_COL, -1);
- ctx_exit_on_error(&stream->decoder, "Failed to set decode_tile_col");
- }
- }
-#endif
-}
-
-static void encode_frame(struct stream_state *stream,
- struct AvxEncoderConfig *global, struct aom_image *img,
- unsigned int frames_in) {
- aom_codec_pts_t frame_start, next_frame_start;
- struct aom_codec_enc_cfg *cfg = &stream->config.cfg;
- struct aom_usec_timer timer;
-
- frame_start =
- (cfg->g_timebase.den * (int64_t)(frames_in - 1) * global->framerate.den) /
- cfg->g_timebase.num / global->framerate.num;
- next_frame_start =
- (cfg->g_timebase.den * (int64_t)(frames_in)*global->framerate.den) /
- cfg->g_timebase.num / global->framerate.num;
-
- /* Scale if necessary */
- if (img) {
- if ((img->fmt & AOM_IMG_FMT_HIGHBITDEPTH) &&
- (img->d_w != cfg->g_w || img->d_h != cfg->g_h)) {
- if (img->fmt != AOM_IMG_FMT_I42016) {
- fprintf(stderr, "%s can only scale 4:2:0 inputs\n", exec_name);
- exit(EXIT_FAILURE);
- }
-#if CONFIG_LIBYUV
- if (!stream->img) {
- stream->img =
- aom_img_alloc(NULL, AOM_IMG_FMT_I42016, cfg->g_w, cfg->g_h, 16);
- }
- I420Scale_16(
- (uint16_t *)img->planes[AOM_PLANE_Y], img->stride[AOM_PLANE_Y] / 2,
- (uint16_t *)img->planes[AOM_PLANE_U], img->stride[AOM_PLANE_U] / 2,
- (uint16_t *)img->planes[AOM_PLANE_V], img->stride[AOM_PLANE_V] / 2,
- img->d_w, img->d_h, (uint16_t *)stream->img->planes[AOM_PLANE_Y],
- stream->img->stride[AOM_PLANE_Y] / 2,
- (uint16_t *)stream->img->planes[AOM_PLANE_U],
- stream->img->stride[AOM_PLANE_U] / 2,
- (uint16_t *)stream->img->planes[AOM_PLANE_V],
- stream->img->stride[AOM_PLANE_V] / 2, stream->img->d_w,
- stream->img->d_h, kFilterBox);
- img = stream->img;
-#else
- stream->encoder.err = 1;
- ctx_exit_on_error(&stream->encoder,
- "Stream %d: Failed to encode frame.\n"
- "libyuv is required for scaling but is currently "
- "disabled.\n"
- "Be sure to specify -DCONFIG_LIBYUV=1 when running "
- "cmake.\n",
- stream->index);
-#endif
- }
- }
- if (img && (img->d_w != cfg->g_w || img->d_h != cfg->g_h)) {
- if (img->fmt != AOM_IMG_FMT_I420 && img->fmt != AOM_IMG_FMT_YV12) {
- fprintf(stderr, "%s can only scale 4:2:0 8bpp inputs\n", exec_name);
- exit(EXIT_FAILURE);
- }
-#if CONFIG_LIBYUV
- if (!stream->img)
- stream->img =
- aom_img_alloc(NULL, AOM_IMG_FMT_I420, cfg->g_w, cfg->g_h, 16);
- I420Scale(
- img->planes[AOM_PLANE_Y], img->stride[AOM_PLANE_Y],
- img->planes[AOM_PLANE_U], img->stride[AOM_PLANE_U],
- img->planes[AOM_PLANE_V], img->stride[AOM_PLANE_V], img->d_w, img->d_h,
- stream->img->planes[AOM_PLANE_Y], stream->img->stride[AOM_PLANE_Y],
- stream->img->planes[AOM_PLANE_U], stream->img->stride[AOM_PLANE_U],
- stream->img->planes[AOM_PLANE_V], stream->img->stride[AOM_PLANE_V],
- stream->img->d_w, stream->img->d_h, kFilterBox);
- img = stream->img;
-#else
- stream->encoder.err = 1;
- ctx_exit_on_error(&stream->encoder,
- "Stream %d: Failed to encode frame.\n"
- "Scaling disabled in this configuration. \n"
- "To enable, configure with --enable-libyuv\n",
- stream->index);
-#endif
- }
-
- aom_usec_timer_start(&timer);
- aom_codec_encode(&stream->encoder, img, frame_start,
- (uint32_t)(next_frame_start - frame_start), 0);
- aom_usec_timer_mark(&timer);
- stream->cx_time += aom_usec_timer_elapsed(&timer);
- ctx_exit_on_error(&stream->encoder, "Stream %d: Failed to encode frame",
- stream->index);
-}
-
-static void update_quantizer_histogram(struct stream_state *stream) {
- if (stream->config.cfg.g_pass != AOM_RC_FIRST_PASS) {
- int q;
-
- aom_codec_control(&stream->encoder, AOME_GET_LAST_QUANTIZER_64, &q);
- ctx_exit_on_error(&stream->encoder, "Failed to read quantizer");
- stream->counts[q]++;
- }
-}
-
-static void get_cx_data(struct stream_state *stream,
- struct AvxEncoderConfig *global, int *got_data) {
- const aom_codec_cx_pkt_t *pkt;
- const struct aom_codec_enc_cfg *cfg = &stream->config.cfg;
- aom_codec_iter_t iter = NULL;
-
- *got_data = 0;
- while ((pkt = aom_codec_get_cx_data(&stream->encoder, &iter))) {
- static size_t fsize = 0;
- static FileOffset ivf_header_pos = 0;
-
- switch (pkt->kind) {
- case AOM_CODEC_CX_FRAME_PKT:
- if (!(pkt->data.frame.flags & AOM_FRAME_IS_FRAGMENT)) {
- stream->frames_out++;
- }
- if (!global->quiet)
- fprintf(stderr, " %6luF", (unsigned long)pkt->data.frame.sz);
-
- update_rate_histogram(stream->rate_hist, cfg, pkt);
-#if CONFIG_WEBM_IO
- if (stream->config.write_webm) {
- write_webm_block(&stream->webm_ctx, cfg, pkt);
- }
-#endif
- if (!stream->config.write_webm) {
- if (stream->config.write_ivf) {
- if (pkt->data.frame.partition_id <= 0) {
- ivf_header_pos = ftello(stream->file);
- fsize = pkt->data.frame.sz;
-
- ivf_write_frame_header(stream->file, pkt->data.frame.pts, fsize);
- } else {
- fsize += pkt->data.frame.sz;
-
- if (!(pkt->data.frame.flags & AOM_FRAME_IS_FRAGMENT)) {
- const FileOffset currpos = ftello(stream->file);
- fseeko(stream->file, ivf_header_pos, SEEK_SET);
- ivf_write_frame_size(stream->file, fsize);
- fseeko(stream->file, currpos, SEEK_SET);
- }
- }
- }
-
- (void)fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
- stream->file);
- }
- stream->nbytes += pkt->data.raw.sz;
-
- *got_data = 1;
-#if CONFIG_AV1_DECODER
- if (global->test_decode != TEST_DECODE_OFF && !stream->mismatch_seen) {
- aom_codec_decode(&stream->decoder, pkt->data.frame.buf,
- pkt->data.frame.sz, NULL);
- if (stream->decoder.err) {
- warn_or_exit_on_error(&stream->decoder,
- global->test_decode == TEST_DECODE_FATAL,
- "Failed to decode frame %d in stream %d",
- stream->frames_out + 1, stream->index);
- stream->mismatch_seen = stream->frames_out + 1;
- }
- }
-#endif
- break;
- case AOM_CODEC_STATS_PKT:
- stream->frames_out++;
- stats_write(&stream->stats, pkt->data.twopass_stats.buf,
- pkt->data.twopass_stats.sz);
- stream->nbytes += pkt->data.raw.sz;
- break;
-#if CONFIG_FP_MB_STATS
- case AOM_CODEC_FPMB_STATS_PKT:
- stats_write(&stream->fpmb_stats, pkt->data.firstpass_mb_stats.buf,
- pkt->data.firstpass_mb_stats.sz);
- stream->nbytes += pkt->data.raw.sz;
- break;
-#endif
- case AOM_CODEC_PSNR_PKT:
-
- if (global->show_psnr) {
- int i;
-
- stream->psnr_sse_total += pkt->data.psnr.sse[0];
- stream->psnr_samples_total += pkt->data.psnr.samples[0];
- for (i = 0; i < 4; i++) {
- if (!global->quiet)
- fprintf(stderr, "%.3f ", pkt->data.psnr.psnr[i]);
- stream->psnr_totals[i] += pkt->data.psnr.psnr[i];
- }
- stream->psnr_count++;
- }
-
- break;
- default: break;
- }
- }
-}
-
-static void show_psnr(struct stream_state *stream, double peak, int64_t bps) {
- int i;
- double ovpsnr;
-
- if (!stream->psnr_count) return;
-
- fprintf(stderr, "Stream %d PSNR (Overall/Avg/Y/U/V)", stream->index);
- ovpsnr = sse_to_psnr((double)stream->psnr_samples_total, peak,
- (double)stream->psnr_sse_total);
- fprintf(stderr, " %.3f", ovpsnr);
-
- for (i = 0; i < 4; i++) {
- fprintf(stderr, " %.3f", stream->psnr_totals[i] / stream->psnr_count);
- }
- if (bps > 0) {
- fprintf(stderr, " %7" PRId64 " bps", bps);
- }
- fprintf(stderr, " %7" PRId64 " ms", stream->cx_time / 1000);
- fprintf(stderr, "\n");
-}
-
-static float usec_to_fps(uint64_t usec, unsigned int frames) {
- return (float)(usec > 0 ? frames * 1000000.0 / (float)usec : 0);
-}
-
-static void test_decode(struct stream_state *stream,
- enum TestDecodeFatality fatal) {
- aom_image_t enc_img, dec_img;
-
- if (stream->mismatch_seen) return;
-
- /* Get the internal reference frame */
- aom_codec_control(&stream->encoder, AV1_GET_NEW_FRAME_IMAGE, &enc_img);
- aom_codec_control(&stream->decoder, AV1_GET_NEW_FRAME_IMAGE, &dec_img);
-
- if ((enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) !=
- (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH)) {
- if (enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
- aom_image_t enc_hbd_img;
- aom_img_alloc(&enc_hbd_img, enc_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH,
- enc_img.d_w, enc_img.d_h, 16);
- aom_img_truncate_16_to_8(&enc_hbd_img, &enc_img);
- enc_img = enc_hbd_img;
- }
- if (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
- aom_image_t dec_hbd_img;
- aom_img_alloc(&dec_hbd_img, dec_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH,
- dec_img.d_w, dec_img.d_h, 16);
- aom_img_truncate_16_to_8(&dec_hbd_img, &dec_img);
- dec_img = dec_hbd_img;
- }
- }
-
- ctx_exit_on_error(&stream->encoder, "Failed to get encoder reference frame");
- ctx_exit_on_error(&stream->decoder, "Failed to get decoder reference frame");
-
- if (!aom_compare_img(&enc_img, &dec_img)) {
- int y[4], u[4], v[4];
- if (enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
- aom_find_mismatch_high(&enc_img, &dec_img, y, u, v);
- } else {
- aom_find_mismatch(&enc_img, &dec_img, y, u, v);
- }
- stream->decoder.err = 1;
- warn_or_exit_on_error(&stream->decoder, fatal == TEST_DECODE_FATAL,
- "Stream %d: Encode/decode mismatch on frame %d at"
- " Y[%d, %d] {%d/%d},"
- " U[%d, %d] {%d/%d},"
- " V[%d, %d] {%d/%d}",
- stream->index, stream->frames_out, y[0], y[1], y[2],
- y[3], u[0], u[1], u[2], u[3], v[0], v[1], v[2], v[3]);
- stream->mismatch_seen = stream->frames_out;
- }
-
- aom_img_free(&enc_img);
- aom_img_free(&dec_img);
-}
-
-static void print_time(const char *label, int64_t etl) {
- int64_t hours;
- int64_t mins;
- int64_t secs;
-
- if (etl >= 0) {
- hours = etl / 3600;
- etl -= hours * 3600;
- mins = etl / 60;
- etl -= mins * 60;
- secs = etl;
-
- fprintf(stderr, "[%3s %2" PRId64 ":%02" PRId64 ":%02" PRId64 "] ", label,
- hours, mins, secs);
- } else {
- fprintf(stderr, "[%3s unknown] ", label);
- }
-}
-
-int main(int argc, const char **argv_) {
- int pass;
- aom_image_t raw;
- aom_image_t raw_shift;
- int allocated_raw_shift = 0;
- int use_16bit_internal = 0;
- int input_shift = 0;
- int frame_avail, got_data;
-
- struct AvxInputContext input;
- struct AvxEncoderConfig global;
- struct stream_state *streams = NULL;
- char **argv, **argi;
- uint64_t cx_time = 0;
- int stream_cnt = 0;
- int res = 0;
- int profile_updated = 0;
-
- memset(&input, 0, sizeof(input));
- exec_name = argv_[0];
-
- /* Setup default input stream settings */
- input.framerate.numerator = 30;
- input.framerate.denominator = 1;
- input.only_i420 = 1;
- input.bit_depth = 0;
-
- /* First parse the global configuration values, because we want to apply
- * other parameters on top of the default configuration provided by the
- * codec.
- */
- argv = argv_dup(argc - 1, argv_ + 1);
- parse_global_config(&global, &argc, &argv);
-
-#if CONFIG_FILEOPTIONS
- if (argc < 2) usage_exit();
-#else
- if (argc < 3) usage_exit();
-#endif
-
- switch (global.color_type) {
- case I420: input.fmt = AOM_IMG_FMT_I420; break;
- case I422: input.fmt = AOM_IMG_FMT_I422; break;
- case I444: input.fmt = AOM_IMG_FMT_I444; break;
- case YV12: input.fmt = AOM_IMG_FMT_YV12; break;
- }
-
- {
- /* Now parse each stream's parameters. Using a local scope here
- * due to the use of 'stream' as loop variable in FOREACH_STREAM
- * loops
- */
- struct stream_state *stream = NULL;
-
- do {
- stream = new_stream(&global, stream);
- stream_cnt++;
- if (!streams) streams = stream;
- } while (parse_stream_params(&global, stream, argv));
- }
-
- /* Check for unrecognized options */
- for (argi = argv; *argi; argi++)
- if (argi[0][0] == '-' && argi[0][1])
- die("Error: Unrecognized option %s\n", *argi);
-
- FOREACH_STREAM(stream, streams) {
- check_encoder_config(global.disable_warning_prompt, &global,
- &stream->config.cfg);
- }
-
- /* Handle non-option arguments */
- input.filename = argv[0];
-
- if (!input.filename) {
- fprintf(stderr, "No input file specified!\n");
- usage_exit();
- }
-
- /* Decide if other chroma subsamplings than 4:2:0 are supported */
- if (global.codec->fourcc == AV1_FOURCC) input.only_i420 = 0;
-
- for (pass = global.pass ? global.pass - 1 : 0; pass < global.passes; pass++) {
- int frames_in = 0, seen_frames = 0;
- int64_t estimated_time_left = -1;
- int64_t average_rate = -1;
- int64_t lagged_count = 0;
-
- open_input_file(&input);
-
- /* If the input file doesn't specify its w/h (raw files), try to get
- * the data from the first stream's configuration.
- */
- if (!input.width || !input.height) {
- FOREACH_STREAM(stream, streams) {
- if (stream->config.cfg.g_w && stream->config.cfg.g_h) {
- input.width = stream->config.cfg.g_w;
- input.height = stream->config.cfg.g_h;
- break;
- }
- };
- }
-
- /* Update stream configurations from the input file's parameters */
- if (!input.width || !input.height)
- fatal(
- "Specify stream dimensions with --width (-w) "
- " and --height (-h)");
-
- /* If input file does not specify bit-depth but input-bit-depth parameter
- * exists, assume that to be the input bit-depth. However, if the
- * input-bit-depth paramter does not exist, assume the input bit-depth
- * to be the same as the codec bit-depth.
- */
- if (!input.bit_depth) {
- FOREACH_STREAM(stream, streams) {
- if (stream->config.cfg.g_input_bit_depth)
- input.bit_depth = stream->config.cfg.g_input_bit_depth;
- else
- input.bit_depth = stream->config.cfg.g_input_bit_depth =
- (int)stream->config.cfg.g_bit_depth;
- }
- if (input.bit_depth > 8) input.fmt |= AOM_IMG_FMT_HIGHBITDEPTH;
- } else {
- FOREACH_STREAM(stream, streams) {
- stream->config.cfg.g_input_bit_depth = input.bit_depth;
- }
- }
-
- FOREACH_STREAM(stream, streams) {
- if (input.fmt != AOM_IMG_FMT_I420 && input.fmt != AOM_IMG_FMT_I42016) {
- /* Automatically upgrade if input is non-4:2:0 but a 4:2:0 profile
- was selected. */
- switch (stream->config.cfg.g_profile) {
- case 0:
- if (input.bit_depth < 12 && (input.fmt == AOM_IMG_FMT_I444 ||
- input.fmt == AOM_IMG_FMT_I44416)) {
- if (!stream->config.cfg.monochrome) {
- stream->config.cfg.g_profile = 1;
- profile_updated = 1;
- }
- } else if (input.bit_depth == 12 || input.fmt == AOM_IMG_FMT_I422 ||
- input.fmt == AOM_IMG_FMT_I42216) {
- stream->config.cfg.g_profile = 2;
- profile_updated = 1;
- }
- break;
- case 1:
- if (input.bit_depth == 12 || input.fmt == AOM_IMG_FMT_I422 ||
- input.fmt == AOM_IMG_FMT_I42216) {
- stream->config.cfg.g_profile = 2;
- profile_updated = 1;
- } else if (input.bit_depth < 12 &&
- (input.fmt == AOM_IMG_FMT_I420 ||
- input.fmt == AOM_IMG_FMT_I42016)) {
- stream->config.cfg.g_profile = 0;
- profile_updated = 1;
- }
- break;
- case 2:
- if (input.bit_depth < 12 && (input.fmt == AOM_IMG_FMT_I444 ||
- input.fmt == AOM_IMG_FMT_I44416)) {
- stream->config.cfg.g_profile = 1;
- profile_updated = 1;
- } else if (input.bit_depth < 12 &&
- (input.fmt == AOM_IMG_FMT_I420 ||
- input.fmt == AOM_IMG_FMT_I42016)) {
- stream->config.cfg.g_profile = 0;
- profile_updated = 1;
- } else if (input.bit_depth == 12 &&
- input.file_type == FILE_TYPE_Y4M) {
- // Note that here the input file values for chroma subsampling
- // are used instead of those from the command line.
- aom_codec_control(&stream->encoder, AV1E_SET_CHROMA_SUBSAMPLING_X,
- input.y4m.dst_c_dec_h >> 1);
- aom_codec_control(&stream->encoder, AV1E_SET_CHROMA_SUBSAMPLING_Y,
- input.y4m.dst_c_dec_v >> 1);
- } else if (input.bit_depth == 12 &&
- input.file_type == FILE_TYPE_RAW) {
- aom_codec_control(&stream->encoder, AV1E_SET_CHROMA_SUBSAMPLING_X,
- stream->chroma_subsampling_x);
- aom_codec_control(&stream->encoder, AV1E_SET_CHROMA_SUBSAMPLING_Y,
- stream->chroma_subsampling_y);
- }
- break;
- default: break;
- }
- }
- if (stream->config.cfg.g_bit_depth > 10) {
- switch (stream->config.cfg.g_profile) {
- case 0:
- case 1:
- stream->config.cfg.g_profile = 2;
- profile_updated = 1;
- break;
- default: break;
- }
- }
- if (stream->config.cfg.g_bit_depth > 8) {
- stream->config.use_16bit_internal = 1;
- }
- if (profile_updated && !global.quiet) {
- fprintf(stderr,
- "Warning: automatically updating to profile %d to "
- "match input format.\n",
- stream->config.cfg.g_profile);
- }
- /* Set limit */
- stream->config.cfg.g_limit = global.limit;
- }
-
- FOREACH_STREAM(stream, streams) {
- set_stream_dimensions(stream, input.width, input.height);
- }
- FOREACH_STREAM(stream, streams) { validate_stream_config(stream, &global); }
-
- /* Ensure that --passes and --pass are consistent. If --pass is set and
- * --passes=2, ensure --fpf was set.
- */
- if (global.pass && global.passes == 2) {
- FOREACH_STREAM(stream, streams) {
- if (!stream->config.stats_fn)
- die("Stream %d: Must specify --fpf when --pass=%d"
- " and --passes=2\n",
- stream->index, global.pass);
- }
- }
-
-#if !CONFIG_WEBM_IO
- FOREACH_STREAM(stream, streams) {
- if (stream->config.write_webm) {
- stream->config.write_webm = 0;
- stream->config.write_ivf = 0;
- warn("aomenc compiled w/o WebM support. Writing OBU stream.");
- }
- }
-#endif
-
- /* Use the frame rate from the file only if none was specified
- * on the command-line.
- */
- if (!global.have_framerate) {
- global.framerate.num = input.framerate.numerator;
- global.framerate.den = input.framerate.denominator;
- }
- FOREACH_STREAM(stream, streams) {
- stream->config.cfg.g_timebase.den = global.framerate.num;
- stream->config.cfg.g_timebase.num = global.framerate.den;
- }
- /* Show configuration */
- if (global.verbose && pass == 0) {
- FOREACH_STREAM(stream, streams) {
- show_stream_config(stream, &global, &input);
- }
- }
-
- if (pass == (global.pass ? global.pass - 1 : 0)) {
- if (input.file_type == FILE_TYPE_Y4M)
- /*The Y4M reader does its own allocation.
- Just initialize this here to avoid problems if we never read any
- frames.*/
- memset(&raw, 0, sizeof(raw));
- else
- aom_img_alloc(&raw, input.fmt, input.width, input.height, 32);
-
- FOREACH_STREAM(stream, streams) {
- stream->rate_hist =
- init_rate_histogram(&stream->config.cfg, &global.framerate);
- }
- }
-
- FOREACH_STREAM(stream, streams) { setup_pass(stream, &global, pass); }
- FOREACH_STREAM(stream, streams) {
- open_output_file(stream, &global, &input.pixel_aspect_ratio);
- }
- FOREACH_STREAM(stream, streams) { initialize_encoder(stream, &global); }
- if (strcmp(global.codec->name, "av1") == 0 ||
- strcmp(global.codec->name, "av1") == 0) {
- // Check to see if at least one stream uses 16 bit internal.
- // Currently assume that the bit_depths for all streams using
- // highbitdepth are the same.
- FOREACH_STREAM(stream, streams) {
- if (stream->config.use_16bit_internal) {
- use_16bit_internal = 1;
- }
- input_shift = (int)stream->config.cfg.g_bit_depth -
- stream->config.cfg.g_input_bit_depth;
- };
- }
-
- frame_avail = 1;
- got_data = 0;
-
- while (frame_avail || got_data) {
- struct aom_usec_timer timer;
-
- if (!global.limit || frames_in < global.limit) {
- frame_avail = read_frame(&input, &raw);
-
- if (frame_avail) frames_in++;
- seen_frames =
- frames_in > global.skip_frames ? frames_in - global.skip_frames : 0;
-
- if (!global.quiet) {
- float fps = usec_to_fps(cx_time, seen_frames);
- fprintf(stderr, "\rPass %d/%d ", pass + 1, global.passes);
-
- if (stream_cnt == 1)
- fprintf(stderr, "frame %4d/%-4d %7" PRId64 "B ", frames_in,
- streams->frames_out, (int64_t)streams->nbytes);
- else
- fprintf(stderr, "frame %4d ", frames_in);
-
- fprintf(stderr, "%7" PRId64 " %s %.2f %s ",
- cx_time > 9999999 ? cx_time / 1000 : cx_time,
- cx_time > 9999999 ? "ms" : "us", fps >= 1.0 ? fps : fps * 60,
- fps >= 1.0 ? "fps" : "fpm");
- print_time("ETA", estimated_time_left);
- }
-
- } else {
- frame_avail = 0;
- }
-
- if (frames_in > global.skip_frames) {
- aom_image_t *frame_to_encode;
- if (input_shift || (use_16bit_internal && input.bit_depth == 8)) {
- assert(use_16bit_internal);
- // Input bit depth and stream bit depth do not match, so up
- // shift frame to stream bit depth
- if (!allocated_raw_shift) {
- aom_img_alloc(&raw_shift, raw.fmt | AOM_IMG_FMT_HIGHBITDEPTH,
- input.width, input.height, 32);
- allocated_raw_shift = 1;
- }
- aom_img_upshift(&raw_shift, &raw, input_shift);
- frame_to_encode = &raw_shift;
- } else {
- frame_to_encode = &raw;
- }
- aom_usec_timer_start(&timer);
- if (use_16bit_internal) {
- assert(frame_to_encode->fmt & AOM_IMG_FMT_HIGHBITDEPTH);
- FOREACH_STREAM(stream, streams) {
- if (stream->config.use_16bit_internal)
- encode_frame(stream, &global,
- frame_avail ? frame_to_encode : NULL, frames_in);
- else
- assert(0);
- };
- } else {
- assert((frame_to_encode->fmt & AOM_IMG_FMT_HIGHBITDEPTH) == 0);
- FOREACH_STREAM(stream, streams) {
- encode_frame(stream, &global, frame_avail ? frame_to_encode : NULL,
- frames_in);
- }
- }
- aom_usec_timer_mark(&timer);
- cx_time += aom_usec_timer_elapsed(&timer);
-
- FOREACH_STREAM(stream, streams) { update_quantizer_histogram(stream); }
-
- got_data = 0;
- FOREACH_STREAM(stream, streams) {
- get_cx_data(stream, &global, &got_data);
- }
-
- if (!got_data && input.length && streams != NULL &&
- !streams->frames_out) {
- lagged_count = global.limit ? seen_frames : ftello(input.file);
- } else if (input.length) {
- int64_t remaining;
- int64_t rate;
-
- if (global.limit) {
- const int64_t frame_in_lagged = (seen_frames - lagged_count) * 1000;
-
- rate = cx_time ? frame_in_lagged * (int64_t)1000000 / cx_time : 0;
- remaining = 1000 * (global.limit - global.skip_frames -
- seen_frames + lagged_count);
- } else {
- const int64_t input_pos = ftello(input.file);
- const int64_t input_pos_lagged = input_pos - lagged_count;
- const int64_t input_limit = input.length;
-
- rate = cx_time ? input_pos_lagged * (int64_t)1000000 / cx_time : 0;
- remaining = input_limit - input_pos + lagged_count;
- }
-
- average_rate =
- (average_rate <= 0) ? rate : (average_rate * 7 + rate) / 8;
- estimated_time_left = average_rate ? remaining / average_rate : -1;
- }
-
- if (got_data && global.test_decode != TEST_DECODE_OFF) {
- FOREACH_STREAM(stream, streams) {
- test_decode(stream, global.test_decode);
- }
- }
- }
-
- fflush(stdout);
- if (!global.quiet) fprintf(stderr, "\033[K");
- }
-
- if (stream_cnt > 1) fprintf(stderr, "\n");
-
- if (!global.quiet) {
- FOREACH_STREAM(stream, streams) {
- const int64_t bpf =
- seen_frames ? (int64_t)(stream->nbytes * 8 / seen_frames) : 0;
- const int64_t bps = bpf * global.framerate.num / global.framerate.den;
- fprintf(stderr,
- "\rPass %d/%d frame %4d/%-4d %7" PRId64 "B %7" PRId64
- "b/f %7" PRId64
- "b/s"
- " %7" PRId64 " %s (%.2f fps)\033[K\n",
- pass + 1, global.passes, frames_in, stream->frames_out,
- (int64_t)stream->nbytes, bpf, bps,
- stream->cx_time > 9999999 ? stream->cx_time / 1000
- : stream->cx_time,
- stream->cx_time > 9999999 ? "ms" : "us",
- usec_to_fps(stream->cx_time, seen_frames));
- }
- }
-
- if (global.show_psnr) {
- if (global.codec->fourcc == AV1_FOURCC) {
- FOREACH_STREAM(stream, streams) {
- int64_t bps = 0;
- if (stream->psnr_count && seen_frames && global.framerate.den) {
- bps = (int64_t)stream->nbytes * 8 * (int64_t)global.framerate.num /
- global.framerate.den / seen_frames;
- }
- show_psnr(stream, (1 << stream->config.cfg.g_input_bit_depth) - 1,
- bps);
- }
- } else {
- FOREACH_STREAM(stream, streams) { show_psnr(stream, 255.0, 0); }
- }
- }
-
- FOREACH_STREAM(stream, streams) { aom_codec_destroy(&stream->encoder); }
-
- if (global.test_decode != TEST_DECODE_OFF) {
- FOREACH_STREAM(stream, streams) { aom_codec_destroy(&stream->decoder); }
- }
-
- close_input_file(&input);
-
- if (global.test_decode == TEST_DECODE_FATAL) {
- FOREACH_STREAM(stream, streams) { res |= stream->mismatch_seen; }
- }
- FOREACH_STREAM(stream, streams) {
- close_output_file(stream, global.codec->fourcc);
- }
-
- FOREACH_STREAM(stream, streams) {
- stats_close(&stream->stats, global.passes - 1);
- }
-
-#if CONFIG_FP_MB_STATS
- FOREACH_STREAM(stream, streams) {
- stats_close(&stream->fpmb_stats, global.passes - 1);
- }
-#endif
-
- if (global.pass) break;
- }
-
- if (global.show_q_hist_buckets) {
- FOREACH_STREAM(stream, streams) {
- show_q_histogram(stream->counts, global.show_q_hist_buckets);
- }
- }
-
- if (global.show_rate_hist_buckets) {
- FOREACH_STREAM(stream, streams) {
- show_rate_histogram(stream->rate_hist, &stream->config.cfg,
- global.show_rate_hist_buckets);
- }
- }
- FOREACH_STREAM(stream, streams) { destroy_rate_histogram(stream->rate_hist); }
-
-#if CONFIG_INTERNAL_STATS
- /* TODO(jkoleszar): This doesn't belong in this executable. Do it for now,
- * to match some existing utilities.
- */
- if (!(global.pass == 1 && global.passes == 2)) {
- FOREACH_STREAM(stream, streams) {
- FILE *f = fopen("opsnr.stt", "a");
- if (stream->mismatch_seen) {
- fprintf(f, "First mismatch occurred in frame %d\n",
- stream->mismatch_seen);
- } else {
- fprintf(f, "No mismatch detected in recon buffers\n");
- }
- fclose(f);
- }
- }
-#endif
-
- if (allocated_raw_shift) aom_img_free(&raw_shift);
- aom_img_free(&raw);
- free(argv);
- free(streams);
- return res ? EXIT_FAILURE : EXIT_SUCCESS;
-}
diff --git a/third_party/aom/apps/aomenc.h b/third_party/aom/apps/aomenc.h
deleted file mode 100644
index 7c23df006..000000000
--- a/third_party/aom/apps/aomenc.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#ifndef AOM_APPS_AOMENC_H_
-#define AOM_APPS_AOMENC_H_
-
-#include "aom/aom_encoder.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-enum TestDecodeFatality {
- TEST_DECODE_OFF,
- TEST_DECODE_FATAL,
- TEST_DECODE_WARN,
-};
-
-typedef enum {
- I420, // 4:2:0 8+ bit-depth
- I422, // 4:2:2 8+ bit-depth
- I444, // 4:4:4 8+ bit-depth
- YV12, // 4:2:0 with uv flipped, only 8-bit depth
-} ColorInputType;
-
-struct AvxInterface;
-
-/* Configuration elements common to all streams. */
-struct AvxEncoderConfig {
- const struct AvxInterface *codec;
- int passes;
- int pass;
- int usage;
- ColorInputType color_type;
- int quiet;
- int verbose;
- int limit;
- int skip_frames;
- int show_psnr;
- enum TestDecodeFatality test_decode;
- int have_framerate;
- struct aom_rational framerate;
- int debug;
- int show_q_hist_buckets;
- int show_rate_hist_buckets;
- int disable_warnings;
- int disable_warning_prompt;
- int experimental_bitstream;
-};
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_APPS_AOMENC_H_
diff --git a/third_party/aom/av1/av1.cmake b/third_party/aom/av1/av1.cmake
deleted file mode 100644
index 3a7cd7ee1..000000000
--- a/third_party/aom/av1/av1.cmake
+++ /dev/null
@@ -1,469 +0,0 @@
-#
-# Copyright (c) 2017, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and the
-# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
-# not distributed with this source code in the LICENSE file, you can obtain it
-# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
-# License 1.0 was not distributed with this source code in the PATENTS file, you
-# can obtain it at www.aomedia.org/license/patent.
-#
-if(AOM_AV1_AV1_CMAKE_)
- return()
-endif() # AOM_AV1_AV1_CMAKE_
-set(AOM_AV1_AV1_CMAKE_ 1)
-
-list(APPEND AOM_AV1_COMMON_SOURCES
- "${AOM_ROOT}/av1/av1_iface_common.h"
- "${AOM_ROOT}/av1/common/alloccommon.c"
- "${AOM_ROOT}/av1/common/alloccommon.h"
- "${AOM_ROOT}/av1/common/av1_inv_txfm1d.c"
- "${AOM_ROOT}/av1/common/av1_inv_txfm1d.h"
- "${AOM_ROOT}/av1/common/av1_inv_txfm1d_cfg.h"
- "${AOM_ROOT}/av1/common/av1_inv_txfm2d.c"
- "${AOM_ROOT}/av1/common/av1_loopfilter.c"
- "${AOM_ROOT}/av1/common/av1_loopfilter.h"
- "${AOM_ROOT}/av1/common/av1_txfm.c"
- "${AOM_ROOT}/av1/common/av1_txfm.h"
- "${AOM_ROOT}/av1/common/blockd.c"
- "${AOM_ROOT}/av1/common/blockd.h"
- "${AOM_ROOT}/av1/common/cdef.c"
- "${AOM_ROOT}/av1/common/cdef.h"
- "${AOM_ROOT}/av1/common/cdef_block.c"
- "${AOM_ROOT}/av1/common/cdef_block.h"
- "${AOM_ROOT}/av1/common/cfl.c"
- "${AOM_ROOT}/av1/common/cfl.h"
- "${AOM_ROOT}/av1/common/common.h"
- "${AOM_ROOT}/av1/common/common_data.h"
- "${AOM_ROOT}/av1/common/convolve.c"
- "${AOM_ROOT}/av1/common/convolve.h"
- "${AOM_ROOT}/av1/common/debugmodes.c"
- "${AOM_ROOT}/av1/common/entropy.c"
- "${AOM_ROOT}/av1/common/entropy.h"
- "${AOM_ROOT}/av1/common/entropymode.c"
- "${AOM_ROOT}/av1/common/entropymode.h"
- "${AOM_ROOT}/av1/common/entropymv.c"
- "${AOM_ROOT}/av1/common/entropymv.h"
- "${AOM_ROOT}/av1/common/enums.h"
- "${AOM_ROOT}/av1/common/filter.h"
- "${AOM_ROOT}/av1/common/frame_buffers.c"
- "${AOM_ROOT}/av1/common/frame_buffers.h"
- "${AOM_ROOT}/av1/common/idct.c"
- "${AOM_ROOT}/av1/common/idct.h"
- "${AOM_ROOT}/av1/common/mv.h"
- "${AOM_ROOT}/av1/common/mvref_common.c"
- "${AOM_ROOT}/av1/common/mvref_common.h"
- "${AOM_ROOT}/av1/common/obu_util.c"
- "${AOM_ROOT}/av1/common/obu_util.h"
- "${AOM_ROOT}/av1/common/odintrin.c"
- "${AOM_ROOT}/av1/common/odintrin.h"
- "${AOM_ROOT}/av1/common/onyxc_int.h"
- "${AOM_ROOT}/av1/common/pred_common.c"
- "${AOM_ROOT}/av1/common/pred_common.h"
- "${AOM_ROOT}/av1/common/quant_common.c"
- "${AOM_ROOT}/av1/common/quant_common.h"
- "${AOM_ROOT}/av1/common/reconinter.c"
- "${AOM_ROOT}/av1/common/reconinter.h"
- "${AOM_ROOT}/av1/common/reconintra.c"
- "${AOM_ROOT}/av1/common/reconintra.h"
- "${AOM_ROOT}/av1/common/resize.c"
- "${AOM_ROOT}/av1/common/resize.h"
- "${AOM_ROOT}/av1/common/restoration.c"
- "${AOM_ROOT}/av1/common/restoration.h"
- "${AOM_ROOT}/av1/common/scale.c"
- "${AOM_ROOT}/av1/common/scale.h"
- "${AOM_ROOT}/av1/common/scan.c"
- "${AOM_ROOT}/av1/common/scan.h"
- "${AOM_ROOT}/av1/common/seg_common.c"
- "${AOM_ROOT}/av1/common/seg_common.h"
- "${AOM_ROOT}/av1/common/thread_common.c"
- "${AOM_ROOT}/av1/common/thread_common.h"
- "${AOM_ROOT}/av1/common/tile_common.c"
- "${AOM_ROOT}/av1/common/tile_common.h"
- "${AOM_ROOT}/av1/common/timing.c"
- "${AOM_ROOT}/av1/common/timing.h"
- "${AOM_ROOT}/av1/common/token_cdfs.h"
- "${AOM_ROOT}/av1/common/txb_common.c"
- "${AOM_ROOT}/av1/common/txb_common.h"
- "${AOM_ROOT}/av1/common/warped_motion.c"
- "${AOM_ROOT}/av1/common/warped_motion.h")
-
-list(APPEND AOM_AV1_DECODER_SOURCES
- "${AOM_ROOT}/av1/av1_dx_iface.c"
- "${AOM_ROOT}/av1/decoder/decodeframe.c"
- "${AOM_ROOT}/av1/decoder/decodeframe.h"
- "${AOM_ROOT}/av1/decoder/decodemv.c"
- "${AOM_ROOT}/av1/decoder/decodemv.h"
- "${AOM_ROOT}/av1/decoder/decoder.c"
- "${AOM_ROOT}/av1/decoder/decoder.h"
- "${AOM_ROOT}/av1/decoder/decodetxb.c"
- "${AOM_ROOT}/av1/decoder/decodetxb.h"
- "${AOM_ROOT}/av1/decoder/detokenize.c"
- "${AOM_ROOT}/av1/decoder/detokenize.h"
- "${AOM_ROOT}/av1/decoder/dthread.c"
- "${AOM_ROOT}/av1/decoder/dthread.h"
- "${AOM_ROOT}/av1/decoder/obu.h"
- "${AOM_ROOT}/av1/decoder/obu.c")
-
-list(APPEND AOM_AV1_ENCODER_SOURCES
- "${AOM_ROOT}/av1/av1_cx_iface.c"
- "${AOM_ROOT}/av1/encoder/aq_complexity.c"
- "${AOM_ROOT}/av1/encoder/aq_complexity.h"
- "${AOM_ROOT}/av1/encoder/aq_cyclicrefresh.c"
- "${AOM_ROOT}/av1/encoder/aq_cyclicrefresh.h"
- "${AOM_ROOT}/av1/encoder/aq_variance.c"
- "${AOM_ROOT}/av1/encoder/aq_variance.h"
- "${AOM_ROOT}/av1/encoder/av1_fwd_txfm1d.c"
- "${AOM_ROOT}/av1/encoder/av1_fwd_txfm1d.h"
- "${AOM_ROOT}/av1/encoder/av1_fwd_txfm1d_cfg.h"
- "${AOM_ROOT}/av1/encoder/av1_fwd_txfm2d.c"
- "${AOM_ROOT}/av1/encoder/av1_quantize.c"
- "${AOM_ROOT}/av1/encoder/av1_quantize.h"
- "${AOM_ROOT}/av1/encoder/bitstream.c"
- "${AOM_ROOT}/av1/encoder/bitstream.h"
- "${AOM_ROOT}/av1/encoder/block.h"
- "${AOM_ROOT}/av1/encoder/context_tree.c"
- "${AOM_ROOT}/av1/encoder/context_tree.h"
- "${AOM_ROOT}/av1/encoder/corner_detect.c"
- "${AOM_ROOT}/av1/encoder/corner_detect.h"
- "${AOM_ROOT}/av1/encoder/corner_match.c"
- "${AOM_ROOT}/av1/encoder/corner_match.h"
- "${AOM_ROOT}/av1/encoder/cost.c"
- "${AOM_ROOT}/av1/encoder/cost.h"
- "${AOM_ROOT}/av1/encoder/encodeframe.c"
- "${AOM_ROOT}/av1/encoder/encodeframe.h"
- "${AOM_ROOT}/av1/encoder/encodemb.c"
- "${AOM_ROOT}/av1/encoder/encodemb.h"
- "${AOM_ROOT}/av1/encoder/encodemv.c"
- "${AOM_ROOT}/av1/encoder/encodemv.h"
- "${AOM_ROOT}/av1/encoder/encoder.c"
- "${AOM_ROOT}/av1/encoder/encoder.h"
- "${AOM_ROOT}/av1/encoder/encodetxb.c"
- "${AOM_ROOT}/av1/encoder/encodetxb.h"
- "${AOM_ROOT}/av1/encoder/ethread.c"
- "${AOM_ROOT}/av1/encoder/ethread.h"
- "${AOM_ROOT}/av1/encoder/extend.c"
- "${AOM_ROOT}/av1/encoder/extend.h"
- "${AOM_ROOT}/av1/encoder/firstpass.c"
- "${AOM_ROOT}/av1/encoder/firstpass.h"
- "${AOM_ROOT}/av1/encoder/global_motion.c"
- "${AOM_ROOT}/av1/encoder/global_motion.h"
- "${AOM_ROOT}/av1/encoder/grain_test_vectors.h"
- "${AOM_ROOT}/av1/encoder/hash.c"
- "${AOM_ROOT}/av1/encoder/hash.h"
- "${AOM_ROOT}/av1/encoder/hash_motion.c"
- "${AOM_ROOT}/av1/encoder/hash_motion.h"
- "${AOM_ROOT}/av1/encoder/hybrid_fwd_txfm.c"
- "${AOM_ROOT}/av1/encoder/hybrid_fwd_txfm.h"
- "${AOM_ROOT}/av1/encoder/lookahead.c"
- "${AOM_ROOT}/av1/encoder/lookahead.h"
- "${AOM_ROOT}/av1/encoder/mbgraph.c"
- "${AOM_ROOT}/av1/encoder/mbgraph.h"
- "${AOM_ROOT}/av1/encoder/mcomp.c"
- "${AOM_ROOT}/av1/encoder/mcomp.h"
- "${AOM_ROOT}/av1/encoder/ml.c"
- "${AOM_ROOT}/av1/encoder/ml.h"
- "${AOM_ROOT}/av1/encoder/palette.c"
- "${AOM_ROOT}/av1/encoder/palette.h"
- "${AOM_ROOT}/av1/encoder/pickcdef.c"
- "${AOM_ROOT}/av1/encoder/picklpf.c"
- "${AOM_ROOT}/av1/encoder/picklpf.h"
- "${AOM_ROOT}/av1/encoder/pickrst.c"
- "${AOM_ROOT}/av1/encoder/pickrst.h"
- "${AOM_ROOT}/av1/encoder/ransac.c"
- "${AOM_ROOT}/av1/encoder/ransac.h"
- "${AOM_ROOT}/av1/encoder/ratectrl.c"
- "${AOM_ROOT}/av1/encoder/ratectrl.h"
- "${AOM_ROOT}/av1/encoder/rd.c"
- "${AOM_ROOT}/av1/encoder/rd.h"
- "${AOM_ROOT}/av1/encoder/rdopt.c"
- "${AOM_ROOT}/av1/encoder/rdopt.h"
- "${AOM_ROOT}/av1/encoder/reconinter_enc.c"
- "${AOM_ROOT}/av1/encoder/reconinter_enc.h"
- "${AOM_ROOT}/av1/encoder/segmentation.c"
- "${AOM_ROOT}/av1/encoder/segmentation.h"
- "${AOM_ROOT}/av1/encoder/speed_features.c"
- "${AOM_ROOT}/av1/encoder/speed_features.h"
- "${AOM_ROOT}/av1/encoder/temporal_filter.c"
- "${AOM_ROOT}/av1/encoder/temporal_filter.h"
- "${AOM_ROOT}/av1/encoder/tokenize.c"
- "${AOM_ROOT}/av1/encoder/tokenize.h"
- "${AOM_ROOT}/av1/encoder/wedge_utils.c"
- "${AOM_ROOT}/third_party/fastfeat/fast.c"
- "${AOM_ROOT}/third_party/fastfeat/fast.h"
- "${AOM_ROOT}/third_party/fastfeat/fast_9.c"
- "${AOM_ROOT}/third_party/fastfeat/nonmax.c"
- "${AOM_ROOT}/third_party/vector/vector.c"
- "${AOM_ROOT}/third_party/vector/vector.h"
- "${AOM_ROOT}/av1/encoder/dwt.c"
- "${AOM_ROOT}/av1/encoder/dwt.h")
-
-list(APPEND AOM_AV1_COMMON_INTRIN_SSE2
- "${AOM_ROOT}/av1/common/cdef_block_sse2.c"
- "${AOM_ROOT}/av1/common/x86/cfl_sse2.c"
- "${AOM_ROOT}/av1/common/x86/convolve_2d_sse2.c"
- "${AOM_ROOT}/av1/common/x86/convolve_sse2.c"
- "${AOM_ROOT}/av1/common/x86/highbd_convolve_2d_sse2.c"
- "${AOM_ROOT}/av1/common/x86/jnt_convolve_sse2.c"
- "${AOM_ROOT}/av1/common/x86/wiener_convolve_sse2.c"
- "${AOM_ROOT}/av1/common/x86/av1_txfm_sse2.h")
-
-list(APPEND AOM_AV1_COMMON_INTRIN_SSSE3
- "${AOM_ROOT}/av1/common/cdef_block_ssse3.c"
- "${AOM_ROOT}/av1/common/x86/av1_inv_txfm_ssse3.c"
- "${AOM_ROOT}/av1/common/x86/av1_inv_txfm_ssse3.h"
- "${AOM_ROOT}/av1/common/x86/cfl_ssse3.c"
- "${AOM_ROOT}/av1/common/x86/highbd_convolve_2d_ssse3.c"
- "${AOM_ROOT}/av1/common/x86/highbd_wiener_convolve_ssse3.c"
- "${AOM_ROOT}/av1/common/x86/jnt_convolve_ssse3.c"
- "${AOM_ROOT}/av1/common/x86/reconinter_ssse3.c")
-
-list(APPEND AOM_AV1_COMMON_INTRIN_SSE4_1
- "${AOM_ROOT}/av1/common/cdef_block_sse4.c"
- "${AOM_ROOT}/av1/common/x86/av1_convolve_horiz_rs_sse4.c"
- "${AOM_ROOT}/av1/common/x86/av1_convolve_scale_sse4.c"
- "${AOM_ROOT}/av1/common/x86/av1_highbd_convolve_sse4.c"
- "${AOM_ROOT}/av1/common/x86/av1_txfm_sse4.c"
- "${AOM_ROOT}/av1/common/x86/av1_txfm_sse4.h"
- "${AOM_ROOT}/av1/common/x86/filterintra_sse4.c"
- "${AOM_ROOT}/av1/common/x86/highbd_convolve_2d_sse4.c"
- "${AOM_ROOT}/av1/common/x86/highbd_inv_txfm_sse4.c"
- "${AOM_ROOT}/av1/common/x86/highbd_jnt_convolve_sse4.c"
- "${AOM_ROOT}/av1/common/x86/highbd_warp_plane_sse4.c"
- "${AOM_ROOT}/av1/common/x86/intra_edge_sse4.c"
- "${AOM_ROOT}/av1/common/x86/reconinter_sse4.c"
- "${AOM_ROOT}/av1/common/x86/selfguided_sse4.c"
- "${AOM_ROOT}/av1/common/x86/warp_plane_sse4.c")
-
-list(APPEND AOM_AV1_COMMON_INTRIN_AVX2
- "${AOM_ROOT}/av1/common/cdef_block_avx2.c"
- "${AOM_ROOT}/av1/common/x86/av1_inv_txfm_avx2.c"
- "${AOM_ROOT}/av1/common/x86/av1_inv_txfm_avx2.h"
- "${AOM_ROOT}/av1/common/x86/cfl_avx2.c"
- "${AOM_ROOT}/av1/common/x86/convolve_2d_avx2.c"
- "${AOM_ROOT}/av1/common/x86/convolve_avx2.c"
- "${AOM_ROOT}/av1/common/x86/highbd_convolve_2d_avx2.c"
- "${AOM_ROOT}/av1/common/x86/highbd_inv_txfm_avx2.c"
- "${AOM_ROOT}/av1/common/x86/highbd_jnt_convolve_avx2.c"
- "${AOM_ROOT}/av1/common/x86/highbd_wiener_convolve_avx2.c"
- "${AOM_ROOT}/av1/common/x86/jnt_convolve_avx2.c"
- "${AOM_ROOT}/av1/common/x86/reconinter_avx2.c"
- "${AOM_ROOT}/av1/common/x86/selfguided_avx2.c"
- "${AOM_ROOT}/av1/common/x86/wiener_convolve_avx2.c")
-
-list(APPEND AOM_AV1_ENCODER_ASM_SSE2 "${AOM_ROOT}/av1/encoder/x86/dct_sse2.asm"
- "${AOM_ROOT}/av1/encoder/x86/error_sse2.asm"
- "${AOM_ROOT}/av1/encoder/x86/temporal_filter_apply_sse2.asm")
-
-list(APPEND AOM_AV1_ENCODER_INTRIN_SSE2
- "${AOM_ROOT}/av1/encoder/x86/av1_fwd_txfm_sse2.c"
- "${AOM_ROOT}/av1/encoder/x86/av1_fwd_txfm_sse2.h"
- "${AOM_ROOT}/av1/encoder/x86/av1_quantize_sse2.c"
- "${AOM_ROOT}/av1/encoder/x86/encodetxb_sse2.c"
- "${AOM_ROOT}/av1/encoder/x86/highbd_block_error_intrin_sse2.c"
- "${AOM_ROOT}/av1/encoder/x86/wedge_utils_sse2.c")
-
-list(APPEND AOM_AV1_ENCODER_ASM_SSSE3_X86_64
- "${AOM_ROOT}/av1/encoder/x86/av1_quantize_ssse3_x86_64.asm")
-
-list(APPEND AOM_AV1_ENCODER_INTRIN_SSE4_1
- "${AOM_ROOT}/av1/encoder/x86/av1_fwd_txfm1d_sse4.c"
- "${AOM_ROOT}/av1/encoder/x86/av1_fwd_txfm2d_sse4.c"
- "${AOM_ROOT}/av1/encoder/x86/av1_highbd_quantize_sse4.c"
- "${AOM_ROOT}/av1/encoder/x86/corner_match_sse4.c"
- "${AOM_ROOT}/av1/encoder/x86/encodetxb_sse4.c"
- "${AOM_ROOT}/av1/encoder/x86/highbd_fwd_txfm_sse4.c"
- "${AOM_ROOT}/av1/encoder/x86/pickrst_sse4.c")
-
-list(APPEND AOM_AV1_ENCODER_INTRIN_AVX2
- "${AOM_ROOT}/av1/encoder/x86/av1_quantize_avx2.c"
- "${AOM_ROOT}/av1/encoder/x86/av1_highbd_quantize_avx2.c"
- "${AOM_ROOT}/av1/encoder/x86/error_intrin_avx2.c"
- "${AOM_ROOT}/av1/encoder/x86/av1_fwd_txfm_avx2.h"
- "${AOM_ROOT}/av1/encoder/x86/av1_fwd_txfm2d_avx2.c"
- "${AOM_ROOT}/av1/encoder/x86/wedge_utils_avx2.c"
- "${AOM_ROOT}/av1/encoder/x86/encodetxb_avx2.c"
- "${AOM_ROOT}/av1/encoder/x86/pickrst_avx2.c")
-
-list(APPEND AOM_AV1_ENCODER_INTRIN_NEON
- "${AOM_ROOT}/av1/encoder/arm/neon/quantize_neon.c")
-
-list(APPEND AOM_AV1_ENCODER_INTRIN_MSA
- "${AOM_ROOT}/av1/encoder/mips/msa/error_msa.c"
- "${AOM_ROOT}/av1/encoder/mips/msa/fdct4x4_msa.c"
- "${AOM_ROOT}/av1/encoder/mips/msa/temporal_filter_msa.c")
-
-list(APPEND AOM_AV1_COMMON_INTRIN_NEON
- "${AOM_ROOT}/av1/common/arm/av1_txfm_neon.c"
- "${AOM_ROOT}/av1/common/arm/cfl_neon.c"
- "${AOM_ROOT}/av1/common/arm/convolve_neon.c"
- "${AOM_ROOT}/av1/common/arm/convolve_neon.h"
- "${AOM_ROOT}/av1/common/arm/jnt_convolve_neon.c"
- "${AOM_ROOT}/av1/common/arm/mem_neon.h"
- "${AOM_ROOT}/av1/common/arm/transpose_neon.h"
- "${AOM_ROOT}/av1/common/arm/blend_a64_hmask_neon.c"
- "${AOM_ROOT}/av1/common/arm/blend_a64_vmask_neon.c"
- "${AOM_ROOT}/av1/common/arm/reconinter_neon.c"
- "${AOM_ROOT}/av1/common/arm/wiener_convolve_neon.c"
- "${AOM_ROOT}/av1/common/arm/selfguided_neon.c"
- "${AOM_ROOT}/av1/common/arm/av1_inv_txfm_neon.c"
- "${AOM_ROOT}/av1/common/arm/av1_inv_txfm_neon.h"
- "${AOM_ROOT}/av1/common/arm/warp_plane_neon.c"
- "${AOM_ROOT}/av1/common/cdef_block_neon.c")
-
-list(APPEND AOM_AV1_ENCODER_INTRIN_SSE4_2
- "${AOM_ROOT}/av1/encoder/x86/hash_sse42.c")
-
-list(APPEND AOM_AV1_COMMON_INTRIN_VSX "${AOM_ROOT}/av1/common/ppc/cfl_ppc.c")
-
-if(CONFIG_ACCOUNTING)
- list(APPEND AOM_AV1_DECODER_SOURCES "${AOM_ROOT}/av1/decoder/accounting.c"
- "${AOM_ROOT}/av1/decoder/accounting.h")
-endif()
-
-if(CONFIG_INSPECTION)
- list(APPEND AOM_AV1_DECODER_SOURCES "${AOM_ROOT}/av1/decoder/inspection.c"
- "${AOM_ROOT}/av1/decoder/inspection.h")
-endif()
-
-if(CONFIG_INTERNAL_STATS)
- list(APPEND AOM_AV1_ENCODER_SOURCES "${AOM_ROOT}/av1/encoder/blockiness.c")
-endif()
-
-# Setup AV1 common/decoder/encoder targets. The libaom target must exist before
-# this function is called.
-function(setup_av1_targets)
- add_library(aom_av1_common OBJECT ${AOM_AV1_COMMON_SOURCES})
- list(APPEND AOM_LIB_TARGETS aom_av1_common)
-
- create_dummy_source_file("aom_av1" "c" "dummy_source_file")
- add_library(aom_av1 OBJECT "${dummy_source_file}")
- target_sources(aom PRIVATE $<TARGET_OBJECTS:aom_av1_common>)
- list(APPEND AOM_LIB_TARGETS aom_av1)
-
- # Not all generators support libraries consisting only of object files. Add a
- # dummy source file to the aom_av1 target.
- add_dummy_source_file_to_target("aom_av1" "c")
-
- if(CONFIG_AV1_DECODER)
- add_library(aom_av1_decoder OBJECT ${AOM_AV1_DECODER_SOURCES})
- set(AOM_LIB_TARGETS ${AOM_LIB_TARGETS} aom_av1_decoder)
- target_sources(aom PRIVATE $<TARGET_OBJECTS:aom_av1_decoder>)
- endif()
-
- if(CONFIG_AV1_ENCODER)
- add_library(aom_av1_encoder OBJECT ${AOM_AV1_ENCODER_SOURCES})
- set(AOM_LIB_TARGETS ${AOM_LIB_TARGETS} aom_av1_encoder)
- target_sources(aom PRIVATE $<TARGET_OBJECTS:aom_av1_encoder>)
- endif()
-
- if(HAVE_SSE2)
- require_compiler_flag_nomsvc("-msse2" NO)
- add_intrinsics_object_library("-msse2" "sse2" "aom_av1_common"
- "AOM_AV1_COMMON_INTRIN_SSE2" "aom")
- if(CONFIG_AV1_DECODER)
- if(AOM_AV1_DECODER_ASM_SSE2)
- add_asm_library("aom_av1_decoder_sse2" "AOM_AV1_DECODER_ASM_SSE2" "aom")
- endif()
-
- if(AOM_AV1_DECODER_INTRIN_SSE2)
- add_intrinsics_object_library("-msse2" "sse2" "aom_av1_decoder"
- "AOM_AV1_DECODER_INTRIN_SSE2" "aom")
- endif()
- endif()
-
- if(CONFIG_AV1_ENCODER)
- add_asm_library("aom_av1_encoder_sse2" "AOM_AV1_ENCODER_ASM_SSE2" "aom")
- add_intrinsics_object_library("-msse2" "sse2" "aom_av1_encoder"
- "AOM_AV1_ENCODER_INTRIN_SSE2" "aom")
- endif()
- endif()
-
- if(HAVE_SSSE3)
- require_compiler_flag_nomsvc("-mssse3" NO)
- add_intrinsics_object_library("-mssse3" "ssse3" "aom_av1_common"
- "AOM_AV1_COMMON_INTRIN_SSSE3" "aom")
-
- if(CONFIG_AV1_DECODER)
- if(AOM_AV1_DECODER_INTRIN_SSSE3)
- add_intrinsics_object_library("-mssse3" "ssse3" "aom_av1_decoder"
- "AOM_AV1_DECODER_INTRIN_SSSE3" "aom")
- endif()
- endif()
- endif()
-
- if(HAVE_SSE4_1)
- require_compiler_flag_nomsvc("-msse4.1" NO)
- add_intrinsics_object_library("-msse4.1" "sse4" "aom_av1_common"
- "AOM_AV1_COMMON_INTRIN_SSE4_1" "aom")
-
- if(CONFIG_AV1_ENCODER)
- if("${AOM_TARGET_CPU}" STREQUAL "x86_64")
- add_asm_library("aom_av1_encoder_ssse3"
- "AOM_AV1_ENCODER_ASM_SSSE3_X86_64" "aom")
- endif()
-
- if(AOM_AV1_ENCODER_INTRIN_SSE4_1)
- add_intrinsics_object_library("-msse4.1" "sse4" "aom_av1_encoder"
- "AOM_AV1_ENCODER_INTRIN_SSE4_1" "aom")
- endif()
- endif()
- endif()
-
- if(HAVE_SSE4_2)
- require_compiler_flag_nomsvc("-msse4.2" NO)
- if(CONFIG_AV1_ENCODER)
- if(AOM_AV1_ENCODER_INTRIN_SSE4_2)
- add_intrinsics_object_library("-msse4.2" "sse42" "aom_av1_encoder"
- "AOM_AV1_ENCODER_INTRIN_SSE4_2" "aom")
- endif()
- endif()
- endif()
-
- if(HAVE_AVX2)
- require_compiler_flag_nomsvc("-mavx2" NO)
- add_intrinsics_object_library("-mavx2" "avx2" "aom_av1_common"
- "AOM_AV1_COMMON_INTRIN_AVX2" "aom")
-
- if(CONFIG_AV1_ENCODER)
- add_intrinsics_object_library("-mavx2" "avx2" "aom_av1_encoder"
- "AOM_AV1_ENCODER_INTRIN_AVX2" "aom")
- endif()
- endif()
-
- if(HAVE_NEON)
- if(AOM_AV1_COMMON_INTRIN_NEON)
- add_intrinsics_object_library("${AOM_INTRIN_NEON_FLAG}" "neon"
- "aom_av1_common"
- "AOM_AV1_COMMON_INTRIN_NEON" "aom")
- endif()
-
- if(AOM_AV1_ENCODER_INTRIN_NEON)
- add_intrinsics_object_library("${AOM_INTRIN_NEON_FLAG}" "neon"
- "aom_av1_encoder"
- "AOM_AV1_ENCODER_INTRIN_NEON" "aom")
- endif()
- endif()
-
- if(HAVE_VSX)
- if(AOM_AV1_COMMON_INTRIN_VSX)
- add_intrinsics_object_library("-mvsx -maltivec" "vsx" "aom_av1_common"
- "AOM_AV1_COMMON_INTRIN_VSX" "aom")
- endif()
- endif()
-
- if(HAVE_MSA)
- add_intrinsics_object_library("" "msa" "aom_av1_encoder"
- "AOM_AV1_ENCODER_INTRIN_MSA" "aom")
- endif()
-
- target_sources(aom PRIVATE $<TARGET_OBJECTS:aom_dsp>)
- target_sources(aom PRIVATE $<TARGET_OBJECTS:aom_scale>)
-
- # Pass the new lib targets up to the parent scope instance of
- # $AOM_LIB_TARGETS.
- set(AOM_LIB_TARGETS ${AOM_LIB_TARGETS} PARENT_SCOPE)
-endfunction()
-
-function(setup_av1_test_targets)
-endfunction()
diff --git a/third_party/aom/av1/av1_cx_iface.c b/third_party/aom/av1/av1_cx_iface.c
deleted file mode 100644
index 3295f618a..000000000
--- a/third_party/aom/av1/av1_cx_iface.c
+++ /dev/null
@@ -1,1908 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#include <stdlib.h>
-#include <string.h>
-
-#include "config/aom_config.h"
-#include "config/aom_version.h"
-
-#include "aom_ports/aom_once.h"
-#include "aom_ports/mem_ops.h"
-#include "aom_ports/system_state.h"
-
-#include "aom/aom_encoder.h"
-#include "aom/internal/aom_codec_internal.h"
-
-#include "av1/av1_iface_common.h"
-#include "av1/encoder/bitstream.h"
-#include "av1/encoder/encoder.h"
-#include "av1/encoder/firstpass.h"
-
-#define MAG_SIZE (4)
-#define MAX_NUM_ENHANCEMENT_LAYERS 3
-
-struct av1_extracfg {
- int cpu_used; // available cpu percentage in 1/16
- unsigned int enable_auto_alt_ref;
- unsigned int enable_auto_bwd_ref;
- unsigned int noise_sensitivity;
- unsigned int sharpness;
- unsigned int static_thresh;
- unsigned int row_mt;
- unsigned int tile_columns; // log2 number of tile columns
- unsigned int tile_rows; // log2 number of tile rows
- unsigned int arnr_max_frames;
- unsigned int arnr_strength;
- unsigned int min_gf_interval;
- unsigned int max_gf_interval;
- aom_tune_metric tuning;
- unsigned int cq_level; // constrained quality level
- unsigned int rc_max_intra_bitrate_pct;
- unsigned int rc_max_inter_bitrate_pct;
- unsigned int gf_cbr_boost_pct;
- unsigned int lossless;
- unsigned int enable_cdef;
- unsigned int enable_restoration;
- unsigned int disable_trellis_quant;
- unsigned int enable_qm;
- unsigned int qm_y;
- unsigned int qm_u;
- unsigned int qm_v;
- unsigned int qm_min;
- unsigned int qm_max;
-#if CONFIG_DIST_8X8
- unsigned int enable_dist_8x8;
-#endif
- unsigned int num_tg;
- unsigned int mtu_size;
-
- aom_timing_info_type_t timing_info_type;
- unsigned int frame_parallel_decoding_mode;
- int use_dual_filter;
- AQ_MODE aq_mode;
- DELTAQ_MODE deltaq_mode;
- unsigned int frame_periodic_boost;
- aom_bit_depth_t bit_depth;
- aom_tune_content content;
- aom_color_primaries_t color_primaries;
- aom_transfer_characteristics_t transfer_characteristics;
- aom_matrix_coefficients_t matrix_coefficients;
- aom_chroma_sample_position_t chroma_sample_position;
- int color_range;
- int render_width;
- int render_height;
- aom_superblock_size_t superblock_size;
- unsigned int single_tile_decoding;
- int error_resilient_mode;
- int s_frame_mode;
-
- int film_grain_test_vector;
- const char *film_grain_table_filename;
- unsigned int motion_vector_unit_test;
- unsigned int cdf_update_mode;
- int enable_order_hint;
- int enable_jnt_comp;
- int enable_ref_frame_mvs; // sequence level
- int allow_ref_frame_mvs; // frame level
- int enable_warped_motion; // sequence level
- int allow_warped_motion; // frame level
- int enable_superres;
-#if CONFIG_DENOISE
- float noise_level;
- int noise_block_size;
-#endif
-
- unsigned int chroma_subsampling_x;
- unsigned int chroma_subsampling_y;
-};
-
-static struct av1_extracfg default_extra_cfg = {
- 0, // cpu_used
- 1, // enable_auto_alt_ref
- 0, // enable_auto_bwd_ref
- 0, // noise_sensitivity
- CONFIG_SHARP_SETTINGS, // sharpness
- 0, // static_thresh
- 0, // row_mt
- 0, // tile_columns
- 0, // tile_rows
- 7, // arnr_max_frames
- 5, // arnr_strength
- 0, // min_gf_interval; 0 -> default decision
- 0, // max_gf_interval; 0 -> default decision
- AOM_TUNE_PSNR, // tuning
- 10, // cq_level
- 0, // rc_max_intra_bitrate_pct
- 0, // rc_max_inter_bitrate_pct
- 0, // gf_cbr_boost_pct
- 0, // lossless
- !CONFIG_SHARP_SETTINGS, // enable_cdef
- 1, // enable_restoration
- 0, // disable_trellis_quant
- 0, // enable_qm
- DEFAULT_QM_Y, // qm_y
- DEFAULT_QM_U, // qm_u
- DEFAULT_QM_V, // qm_v
- DEFAULT_QM_FIRST, // qm_min
- DEFAULT_QM_LAST, // qm_max
-#if CONFIG_DIST_8X8
- 0,
-#endif
- 1, // max number of tile groups
- 0, // mtu_size
- AOM_TIMING_UNSPECIFIED, // No picture timing signaling in bitstream
- 1, // frame_parallel_decoding_mode
- 1, // enable dual filter
- NO_AQ, // aq_mode
- NO_DELTA_Q, // deltaq_mode
- 0, // frame_periodic_delta_q
- AOM_BITS_8, // Bit depth
- AOM_CONTENT_DEFAULT, // content
- AOM_CICP_CP_UNSPECIFIED, // CICP color space
- AOM_CICP_TC_UNSPECIFIED, // CICP transfer characteristics
- AOM_CICP_MC_UNSPECIFIED, // CICP matrix coefficients
- AOM_CSP_UNKNOWN, // chroma sample position
- 0, // color range
- 0, // render width
- 0, // render height
- AOM_SUPERBLOCK_SIZE_DYNAMIC, // superblock_size
- 1, // this depends on large_scale_tile.
- 0, // error_resilient_mode off by default.
- 0, // s_frame_mode off by default.
- 0, // film_grain_test_vector
- 0, // film_grain_table_filename
- 0, // motion_vector_unit_test
- 1, // CDF update mode
- 1, // frame order hint
- 1, // jnt_comp
- 1, // enable_ref_frame_mvs sequence level
- 1, // allow ref_frame_mvs frame level
- 1, // enable_warped_motion at sequence level
- 1, // allow_warped_motion at frame level
- 1, // superres
-#if CONFIG_DENOISE
- 0, // noise_level
- 32, // noise_block_size
-#endif
- 0, // chroma_subsampling_x
- 0, // chroma_subsampling_y
-};
-
-struct aom_codec_alg_priv {
- aom_codec_priv_t base;
- aom_codec_enc_cfg_t cfg;
- struct av1_extracfg extra_cfg;
- AV1EncoderConfig oxcf;
- AV1_COMP *cpi;
- unsigned char *cx_data;
- size_t cx_data_sz;
- unsigned char *pending_cx_data;
- size_t pending_cx_data_sz;
- int pending_frame_count;
- size_t pending_frame_sizes[8];
- aom_image_t preview_img;
- aom_enc_frame_flags_t next_frame_flags;
- aom_postproc_cfg_t preview_ppcfg;
- aom_codec_pkt_list_decl(256) pkt_list;
- unsigned int fixed_kf_cntr;
- // BufferPool that holds all reference frames.
- BufferPool *buffer_pool;
-};
-
-static aom_codec_err_t update_error_state(
- aom_codec_alg_priv_t *ctx, const struct aom_internal_error_info *error) {
- const aom_codec_err_t res = error->error_code;
-
- if (res != AOM_CODEC_OK)
- ctx->base.err_detail = error->has_detail ? error->detail : NULL;
-
- return res;
-}
-
-#undef ERROR
-#define ERROR(str) \
- do { \
- ctx->base.err_detail = str; \
- return AOM_CODEC_INVALID_PARAM; \
- } while (0)
-
-#define RANGE_CHECK(p, memb, lo, hi) \
- do { \
- if (!((p)->memb >= (lo) && (p)->memb <= (hi))) \
- ERROR(#memb " out of range [" #lo ".." #hi "]"); \
- } while (0)
-
-#define RANGE_CHECK_HI(p, memb, hi) \
- do { \
- if (!((p)->memb <= (hi))) ERROR(#memb " out of range [.." #hi "]"); \
- } while (0)
-
-#define RANGE_CHECK_BOOL(p, memb) \
- do { \
- if (!!((p)->memb) != (p)->memb) ERROR(#memb " expected boolean"); \
- } while (0)
-
-static aom_codec_err_t validate_config(aom_codec_alg_priv_t *ctx,
- const aom_codec_enc_cfg_t *cfg,
- const struct av1_extracfg *extra_cfg) {
- RANGE_CHECK(cfg, g_w, 1, 65535); // 16 bits available
- RANGE_CHECK(cfg, g_h, 1, 65535); // 16 bits available
- RANGE_CHECK(cfg, g_timebase.den, 1, 1000000000);
- RANGE_CHECK(cfg, g_timebase.num, 1, cfg->g_timebase.den);
- RANGE_CHECK_HI(cfg, g_profile, MAX_PROFILES - 1);
-
- RANGE_CHECK_HI(cfg, rc_max_quantizer, 63);
- RANGE_CHECK_HI(cfg, rc_min_quantizer, cfg->rc_max_quantizer);
- RANGE_CHECK_BOOL(extra_cfg, lossless);
- RANGE_CHECK_HI(extra_cfg, aq_mode, AQ_MODE_COUNT - 1);
- RANGE_CHECK_HI(extra_cfg, deltaq_mode, DELTAQ_MODE_COUNT - 1);
- RANGE_CHECK_HI(extra_cfg, frame_periodic_boost, 1);
- RANGE_CHECK_HI(cfg, g_threads, 64);
- RANGE_CHECK_HI(cfg, g_lag_in_frames, MAX_LAG_BUFFERS);
- RANGE_CHECK(cfg, rc_end_usage, AOM_VBR, AOM_Q);
- RANGE_CHECK_HI(cfg, rc_undershoot_pct, 100);
- RANGE_CHECK_HI(cfg, rc_overshoot_pct, 100);
- RANGE_CHECK_HI(cfg, rc_2pass_vbr_bias_pct, 100);
- RANGE_CHECK(cfg, kf_mode, AOM_KF_DISABLED, AOM_KF_AUTO);
- RANGE_CHECK_HI(cfg, rc_dropframe_thresh, 100);
- RANGE_CHECK(cfg, g_pass, AOM_RC_ONE_PASS, AOM_RC_LAST_PASS);
- RANGE_CHECK_HI(extra_cfg, min_gf_interval, MAX_LAG_BUFFERS - 1);
- RANGE_CHECK_HI(extra_cfg, max_gf_interval, MAX_LAG_BUFFERS - 1);
- if (extra_cfg->max_gf_interval > 0) {
- RANGE_CHECK(extra_cfg, max_gf_interval, MAX(2, extra_cfg->min_gf_interval),
- (MAX_LAG_BUFFERS - 1));
- }
-
- RANGE_CHECK_HI(cfg, rc_resize_mode, RESIZE_MODES - 1);
- RANGE_CHECK(cfg, rc_resize_denominator, SCALE_NUMERATOR,
- SCALE_NUMERATOR << 1);
- RANGE_CHECK(cfg, rc_resize_kf_denominator, SCALE_NUMERATOR,
- SCALE_NUMERATOR << 1);
- RANGE_CHECK_HI(cfg, rc_superres_mode, SUPERRES_MODES - 1);
- RANGE_CHECK(cfg, rc_superres_denominator, SCALE_NUMERATOR,
- SCALE_NUMERATOR << 1);
- RANGE_CHECK(cfg, rc_superres_kf_denominator, SCALE_NUMERATOR,
- SCALE_NUMERATOR << 1);
- RANGE_CHECK(cfg, rc_superres_qthresh, 1, 63);
- RANGE_CHECK(cfg, rc_superres_kf_qthresh, 1, 63);
- RANGE_CHECK_HI(extra_cfg, cdf_update_mode, 2);
-
- // AV1 does not support a lower bound on the keyframe interval in
- // automatic keyframe placement mode.
- if (cfg->kf_mode != AOM_KF_DISABLED && cfg->kf_min_dist != cfg->kf_max_dist &&
- cfg->kf_min_dist > 0)
- ERROR(
- "kf_min_dist not supported in auto mode, use 0 "
- "or kf_max_dist instead.");
-
- RANGE_CHECK_HI(extra_cfg, motion_vector_unit_test, 2);
- RANGE_CHECK_HI(extra_cfg, enable_auto_alt_ref, 2);
- RANGE_CHECK_HI(extra_cfg, enable_auto_bwd_ref, 2);
- RANGE_CHECK(extra_cfg, cpu_used, 0, 8);
- RANGE_CHECK_HI(extra_cfg, noise_sensitivity, 6);
- RANGE_CHECK(extra_cfg, superblock_size, AOM_SUPERBLOCK_SIZE_64X64,
- AOM_SUPERBLOCK_SIZE_DYNAMIC);
- RANGE_CHECK_HI(cfg, large_scale_tile, 1);
- RANGE_CHECK_HI(extra_cfg, single_tile_decoding, 1);
-
- RANGE_CHECK_HI(extra_cfg, row_mt, 1);
-
- RANGE_CHECK_HI(extra_cfg, tile_columns, 6);
- RANGE_CHECK_HI(extra_cfg, tile_rows, 6);
-
- RANGE_CHECK_HI(cfg, monochrome, 1);
-
- if (cfg->large_scale_tile && extra_cfg->aq_mode)
- ERROR(
- "Adaptive quantization are not supported in large scale tile "
- "coding.");
-
- RANGE_CHECK_HI(extra_cfg, sharpness, 7);
- RANGE_CHECK_HI(extra_cfg, arnr_max_frames, 15);
- RANGE_CHECK_HI(extra_cfg, arnr_strength, 6);
- RANGE_CHECK_HI(extra_cfg, cq_level, 63);
- RANGE_CHECK(cfg, g_bit_depth, AOM_BITS_8, AOM_BITS_12);
- RANGE_CHECK(cfg, g_input_bit_depth, 8, 12);
- RANGE_CHECK(extra_cfg, content, AOM_CONTENT_DEFAULT, AOM_CONTENT_INVALID - 1);
-
- // TODO(yaowu): remove this when ssim tuning is implemented for av1
- if (extra_cfg->tuning == AOM_TUNE_SSIM)
- ERROR("Option --tune=ssim is not currently supported in AV1.");
-
- if (cfg->g_pass == AOM_RC_LAST_PASS) {
- const size_t packet_sz = sizeof(FIRSTPASS_STATS);
- const int n_packets = (int)(cfg->rc_twopass_stats_in.sz / packet_sz);
- const FIRSTPASS_STATS *stats;
-
- if (cfg->rc_twopass_stats_in.buf == NULL)
- ERROR("rc_twopass_stats_in.buf not set.");
-
- if (cfg->rc_twopass_stats_in.sz % packet_sz)
- ERROR("rc_twopass_stats_in.sz indicates truncated packet.");
-
- if (cfg->rc_twopass_stats_in.sz < 2 * packet_sz)
- ERROR("rc_twopass_stats_in requires at least two packets.");
-
- stats =
- (const FIRSTPASS_STATS *)cfg->rc_twopass_stats_in.buf + n_packets - 1;
-
- if ((int)(stats->count + 0.5) != n_packets - 1)
- ERROR("rc_twopass_stats_in missing EOS stats packet");
- }
-
- if (cfg->g_profile <= (unsigned int)PROFILE_1 &&
- cfg->g_bit_depth > AOM_BITS_10) {
- ERROR("Codec bit-depth 12 not supported in profile < 2");
- }
- if (cfg->g_profile <= (unsigned int)PROFILE_1 &&
- cfg->g_input_bit_depth > 10) {
- ERROR("Source bit-depth 12 not supported in profile < 2");
- }
-
- RANGE_CHECK(extra_cfg, color_primaries, AOM_CICP_CP_BT_709,
- AOM_CICP_CP_EBU_3213); // Need to check range more precisely to
- // check for reserved values?
- RANGE_CHECK(extra_cfg, transfer_characteristics, AOM_CICP_TC_BT_709,
- AOM_CICP_TC_HLG);
- RANGE_CHECK(extra_cfg, matrix_coefficients, AOM_CICP_MC_IDENTITY,
- AOM_CICP_MC_ICTCP);
- RANGE_CHECK(extra_cfg, color_range, 0, 1);
-
-#if CONFIG_DIST_8X8
- RANGE_CHECK(extra_cfg, tuning, AOM_TUNE_PSNR, AOM_TUNE_DAALA_DIST);
-#else
- RANGE_CHECK(extra_cfg, tuning, AOM_TUNE_PSNR, AOM_TUNE_SSIM);
-#endif
-
- RANGE_CHECK(extra_cfg, timing_info_type, AOM_TIMING_UNSPECIFIED,
- AOM_TIMING_DEC_MODEL);
-
- RANGE_CHECK(extra_cfg, film_grain_test_vector, 0, 16);
-
- if (extra_cfg->lossless) {
- if (extra_cfg->aq_mode != 0)
- ERROR("Only --aq_mode=0 can be used with --lossless=1.");
-#if CONFIG_DIST_8X8
- if (extra_cfg->enable_dist_8x8)
- ERROR("dist-8x8 cannot be used with lossless compression.");
-#endif
- }
-
- RANGE_CHECK_HI(extra_cfg, chroma_subsampling_x, 1);
- RANGE_CHECK_HI(extra_cfg, chroma_subsampling_y, 1);
-
- return AOM_CODEC_OK;
-}
-
-static aom_codec_err_t validate_img(aom_codec_alg_priv_t *ctx,
- const aom_image_t *img) {
- switch (img->fmt) {
- case AOM_IMG_FMT_YV12:
- case AOM_IMG_FMT_I420:
- case AOM_IMG_FMT_I42016: break;
- case AOM_IMG_FMT_I444:
- case AOM_IMG_FMT_I44416:
- if (ctx->cfg.g_profile == (unsigned int)PROFILE_0 &&
- !ctx->cfg.monochrome) {
- ERROR("Invalid image format. I444 images not supported in profile.");
- }
- break;
- case AOM_IMG_FMT_I422:
- case AOM_IMG_FMT_I42216:
- if (ctx->cfg.g_profile != (unsigned int)PROFILE_2) {
- ERROR("Invalid image format. I422 images not supported in profile.");
- }
- break;
- default:
- ERROR(
- "Invalid image format. Only YV12, I420, I422, I444 images are "
- "supported.");
- break;
- }
-
- if (img->d_w != ctx->cfg.g_w || img->d_h != ctx->cfg.g_h)
- ERROR("Image size must match encoder init configuration size");
-
- return AOM_CODEC_OK;
-}
-
-static int get_image_bps(const aom_image_t *img) {
- switch (img->fmt) {
- case AOM_IMG_FMT_YV12:
- case AOM_IMG_FMT_I420: return 12;
- case AOM_IMG_FMT_I422: return 16;
- case AOM_IMG_FMT_I444: return 24;
- case AOM_IMG_FMT_I42016: return 24;
- case AOM_IMG_FMT_I42216: return 32;
- case AOM_IMG_FMT_I44416: return 48;
- default: assert(0 && "Invalid image format"); break;
- }
- return 0;
-}
-
-// Set appropriate options to disable frame super-resolution.
-static void disable_superres(AV1EncoderConfig *const oxcf) {
- oxcf->superres_mode = SUPERRES_NONE;
- oxcf->superres_scale_denominator = SCALE_NUMERATOR;
- oxcf->superres_kf_scale_denominator = SCALE_NUMERATOR;
- oxcf->superres_qthresh = 255;
- oxcf->superres_kf_qthresh = 255;
-}
-
-static aom_codec_err_t set_encoder_config(
- AV1EncoderConfig *oxcf, const aom_codec_enc_cfg_t *cfg,
- const struct av1_extracfg *extra_cfg) {
- const int is_vbr = cfg->rc_end_usage == AOM_VBR;
- oxcf->profile = cfg->g_profile;
- oxcf->fwd_kf_enabled = cfg->fwd_kf_enabled;
- oxcf->max_threads = (int)cfg->g_threads;
- oxcf->width = cfg->g_w;
- oxcf->height = cfg->g_h;
- oxcf->forced_max_frame_width = cfg->g_forced_max_frame_width;
- oxcf->forced_max_frame_height = cfg->g_forced_max_frame_height;
- oxcf->bit_depth = cfg->g_bit_depth;
- oxcf->input_bit_depth = cfg->g_input_bit_depth;
- // guess a frame rate if out of whack, use 30
- oxcf->init_framerate = (double)cfg->g_timebase.den / cfg->g_timebase.num;
- if (extra_cfg->timing_info_type == AOM_TIMING_EQUAL ||
- extra_cfg->timing_info_type == AOM_TIMING_DEC_MODEL) {
- oxcf->timing_info_present = 1;
- oxcf->timing_info.num_units_in_display_tick = cfg->g_timebase.num;
- oxcf->timing_info.time_scale = cfg->g_timebase.den;
- oxcf->timing_info.num_ticks_per_picture = 1;
- } else {
- oxcf->timing_info_present = 0;
- }
- if (extra_cfg->timing_info_type == AOM_TIMING_EQUAL) {
- oxcf->timing_info.equal_picture_interval = 1;
- oxcf->decoder_model_info_present_flag = 0;
- oxcf->display_model_info_present_flag = 1;
- } else if (extra_cfg->timing_info_type == AOM_TIMING_DEC_MODEL) {
- // if( extra_cfg->arnr_strength > 0 )
- // {
- // printf("Only --arnr-strength=0 can currently be used with
- // --timing-info=model."); return AOM_CODEC_INVALID_PARAM;
- // }
- // if( extra_cfg->enable_superres)
- // {
- // printf("Only --superres-mode=0 can currently be used with
- // --timing-info=model."); return AOM_CODEC_INVALID_PARAM;
- // }
- oxcf->buffer_model.num_units_in_decoding_tick = cfg->g_timebase.num;
- oxcf->timing_info.equal_picture_interval = 0;
- oxcf->decoder_model_info_present_flag = 1;
- oxcf->buffer_removal_time_present = 1;
- oxcf->display_model_info_present_flag = 1;
- }
- if (oxcf->init_framerate > 180) {
- oxcf->init_framerate = 30;
- oxcf->timing_info_present = 0;
- }
- oxcf->mode = GOOD;
- oxcf->cfg = &cfg->cfg;
-
- switch (cfg->g_pass) {
- case AOM_RC_ONE_PASS: oxcf->pass = 0; break;
- case AOM_RC_FIRST_PASS: oxcf->pass = 1; break;
- case AOM_RC_LAST_PASS: oxcf->pass = 2; break;
- }
-
- oxcf->lag_in_frames =
- cfg->g_pass == AOM_RC_FIRST_PASS ? 0 : cfg->g_lag_in_frames;
- oxcf->rc_mode = cfg->rc_end_usage;
-
- // Convert target bandwidth from Kbit/s to Bit/s
- oxcf->target_bandwidth = 1000 * cfg->rc_target_bitrate;
- oxcf->rc_max_intra_bitrate_pct = extra_cfg->rc_max_intra_bitrate_pct;
- oxcf->rc_max_inter_bitrate_pct = extra_cfg->rc_max_inter_bitrate_pct;
- oxcf->gf_cbr_boost_pct = extra_cfg->gf_cbr_boost_pct;
-
- oxcf->best_allowed_q =
- extra_cfg->lossless ? 0 : av1_quantizer_to_qindex(cfg->rc_min_quantizer);
- oxcf->worst_allowed_q =
- extra_cfg->lossless ? 0 : av1_quantizer_to_qindex(cfg->rc_max_quantizer);
- oxcf->cq_level = av1_quantizer_to_qindex(extra_cfg->cq_level);
- oxcf->fixed_q = -1;
-
- oxcf->enable_cdef = extra_cfg->enable_cdef;
- oxcf->enable_restoration = extra_cfg->enable_restoration;
- oxcf->disable_trellis_quant = extra_cfg->disable_trellis_quant;
- oxcf->using_qm = extra_cfg->enable_qm;
- oxcf->qm_y = extra_cfg->qm_y;
- oxcf->qm_u = extra_cfg->qm_u;
- oxcf->qm_v = extra_cfg->qm_v;
- oxcf->qm_minlevel = extra_cfg->qm_min;
- oxcf->qm_maxlevel = extra_cfg->qm_max;
-#if CONFIG_DIST_8X8
- oxcf->using_dist_8x8 = extra_cfg->enable_dist_8x8;
- if (extra_cfg->tuning == AOM_TUNE_CDEF_DIST ||
- extra_cfg->tuning == AOM_TUNE_DAALA_DIST)
- oxcf->using_dist_8x8 = 1;
-#endif
- oxcf->num_tile_groups = extra_cfg->num_tg;
- // In large-scale tile encoding mode, num_tile_groups is always 1.
- if (cfg->large_scale_tile) oxcf->num_tile_groups = 1;
- oxcf->mtu = extra_cfg->mtu_size;
-
- // FIXME(debargha): Should this be:
- // oxcf->allow_ref_frame_mvs = extra_cfg->allow_ref_frame_mvs &
- // extra_cfg->enable_order_hint ?
- // Disallow using temporal MVs while large_scale_tile = 1.
- oxcf->allow_ref_frame_mvs =
- extra_cfg->allow_ref_frame_mvs && !cfg->large_scale_tile;
- oxcf->under_shoot_pct = cfg->rc_undershoot_pct;
- oxcf->over_shoot_pct = cfg->rc_overshoot_pct;
-
- oxcf->resize_mode = (RESIZE_MODE)cfg->rc_resize_mode;
- oxcf->resize_scale_denominator = (uint8_t)cfg->rc_resize_denominator;
- oxcf->resize_kf_scale_denominator = (uint8_t)cfg->rc_resize_kf_denominator;
- if (oxcf->resize_mode == RESIZE_FIXED &&
- oxcf->resize_scale_denominator == SCALE_NUMERATOR &&
- oxcf->resize_kf_scale_denominator == SCALE_NUMERATOR)
- oxcf->resize_mode = RESIZE_NONE;
-
- if (extra_cfg->lossless || cfg->large_scale_tile) {
- disable_superres(oxcf);
- } else {
- oxcf->superres_mode = (SUPERRES_MODE)cfg->rc_superres_mode;
- oxcf->superres_scale_denominator = (uint8_t)cfg->rc_superres_denominator;
- oxcf->superres_kf_scale_denominator =
- (uint8_t)cfg->rc_superres_kf_denominator;
- oxcf->superres_qthresh = av1_quantizer_to_qindex(cfg->rc_superres_qthresh);
- oxcf->superres_kf_qthresh =
- av1_quantizer_to_qindex(cfg->rc_superres_kf_qthresh);
- if (oxcf->superres_mode == SUPERRES_FIXED &&
- oxcf->superres_scale_denominator == SCALE_NUMERATOR &&
- oxcf->superres_kf_scale_denominator == SCALE_NUMERATOR) {
- disable_superres(oxcf);
- }
- if (oxcf->superres_mode == SUPERRES_QTHRESH &&
- oxcf->superres_qthresh == 255 && oxcf->superres_kf_qthresh == 255) {
- disable_superres(oxcf);
- }
- }
-
- oxcf->maximum_buffer_size_ms = is_vbr ? 240000 : cfg->rc_buf_sz;
- oxcf->starting_buffer_level_ms = is_vbr ? 60000 : cfg->rc_buf_initial_sz;
- oxcf->optimal_buffer_level_ms = is_vbr ? 60000 : cfg->rc_buf_optimal_sz;
-
- oxcf->drop_frames_water_mark = cfg->rc_dropframe_thresh;
-
- oxcf->two_pass_vbrbias = cfg->rc_2pass_vbr_bias_pct;
- oxcf->two_pass_vbrmin_section = cfg->rc_2pass_vbr_minsection_pct;
- oxcf->two_pass_vbrmax_section = cfg->rc_2pass_vbr_maxsection_pct;
-
- oxcf->auto_key =
- cfg->kf_mode == AOM_KF_AUTO && cfg->kf_min_dist != cfg->kf_max_dist;
-
- oxcf->key_freq = cfg->kf_max_dist;
- oxcf->sframe_dist = cfg->sframe_dist;
- oxcf->sframe_mode = cfg->sframe_mode;
- oxcf->sframe_enabled = cfg->sframe_dist != 0;
- oxcf->speed = extra_cfg->cpu_used;
- oxcf->enable_auto_arf = extra_cfg->enable_auto_alt_ref;
- oxcf->enable_auto_brf = extra_cfg->enable_auto_bwd_ref;
- oxcf->noise_sensitivity = extra_cfg->noise_sensitivity;
- oxcf->sharpness = extra_cfg->sharpness;
-
- oxcf->two_pass_stats_in = cfg->rc_twopass_stats_in;
-
-#if CONFIG_FP_MB_STATS
- oxcf->firstpass_mb_stats_in = cfg->rc_firstpass_mb_stats_in;
-#endif
-
- oxcf->color_primaries = extra_cfg->color_primaries;
- oxcf->transfer_characteristics = extra_cfg->transfer_characteristics;
- oxcf->matrix_coefficients = extra_cfg->matrix_coefficients;
- oxcf->chroma_sample_position = extra_cfg->chroma_sample_position;
-
- oxcf->color_range = extra_cfg->color_range;
- oxcf->render_width = extra_cfg->render_width;
- oxcf->render_height = extra_cfg->render_height;
- oxcf->arnr_max_frames = extra_cfg->arnr_max_frames;
- // Adjust g_lag_in_frames down if not needed
- oxcf->lag_in_frames =
- AOMMIN(MAX_GF_INTERVAL + oxcf->arnr_max_frames / 2, oxcf->lag_in_frames);
- oxcf->arnr_strength = extra_cfg->arnr_strength;
- oxcf->min_gf_interval = extra_cfg->min_gf_interval;
- oxcf->max_gf_interval = extra_cfg->max_gf_interval;
-
- oxcf->tuning = extra_cfg->tuning;
- oxcf->content = extra_cfg->content;
- oxcf->cdf_update_mode = (uint8_t)extra_cfg->cdf_update_mode;
- oxcf->superblock_size = extra_cfg->superblock_size;
- if (cfg->large_scale_tile) {
- oxcf->film_grain_test_vector = 0;
- oxcf->film_grain_table_filename = NULL;
- } else {
- oxcf->film_grain_test_vector = extra_cfg->film_grain_test_vector;
- oxcf->film_grain_table_filename = extra_cfg->film_grain_table_filename;
- }
-#if CONFIG_DENOISE
- oxcf->noise_level = extra_cfg->noise_level;
- oxcf->noise_block_size = extra_cfg->noise_block_size;
-#endif
- oxcf->large_scale_tile = cfg->large_scale_tile;
- oxcf->single_tile_decoding =
- (oxcf->large_scale_tile) ? extra_cfg->single_tile_decoding : 0;
- if (oxcf->large_scale_tile) {
- // The superblock_size can only be AOM_SUPERBLOCK_SIZE_64X64 or
- // AOM_SUPERBLOCK_SIZE_128X128 while oxcf->large_scale_tile = 1. If
- // superblock_size = AOM_SUPERBLOCK_SIZE_DYNAMIC, hard set it to
- // AOM_SUPERBLOCK_SIZE_64X64(default value in large_scale_tile).
- if (extra_cfg->superblock_size != AOM_SUPERBLOCK_SIZE_64X64 &&
- extra_cfg->superblock_size != AOM_SUPERBLOCK_SIZE_128X128)
- oxcf->superblock_size = AOM_SUPERBLOCK_SIZE_64X64;
- }
-
- oxcf->row_mt = extra_cfg->row_mt;
-
- oxcf->tile_columns = extra_cfg->tile_columns;
- oxcf->tile_rows = extra_cfg->tile_rows;
-
- oxcf->monochrome = cfg->monochrome;
- oxcf->full_still_picture_hdr = cfg->full_still_picture_hdr;
- oxcf->enable_dual_filter = extra_cfg->use_dual_filter;
- oxcf->enable_order_hint = extra_cfg->enable_order_hint;
- oxcf->enable_jnt_comp =
- extra_cfg->enable_jnt_comp & extra_cfg->enable_order_hint;
- oxcf->enable_ref_frame_mvs =
- extra_cfg->enable_ref_frame_mvs & extra_cfg->enable_order_hint;
-
- oxcf->enable_warped_motion = extra_cfg->enable_warped_motion;
- oxcf->allow_warped_motion =
- extra_cfg->allow_warped_motion & extra_cfg->enable_warped_motion;
-
- oxcf->enable_superres =
- (oxcf->superres_mode != SUPERRES_NONE) && extra_cfg->enable_superres;
- if (!oxcf->enable_superres) {
- disable_superres(oxcf);
- }
-
- oxcf->tile_width_count = AOMMIN(cfg->tile_width_count, MAX_TILE_COLS);
- oxcf->tile_height_count = AOMMIN(cfg->tile_height_count, MAX_TILE_ROWS);
- for (int i = 0; i < oxcf->tile_width_count; i++) {
- oxcf->tile_widths[i] = AOMMAX(cfg->tile_widths[i], 1);
- }
- for (int i = 0; i < oxcf->tile_height_count; i++) {
- oxcf->tile_heights[i] = AOMMAX(cfg->tile_heights[i], 1);
- }
- oxcf->error_resilient_mode =
- cfg->g_error_resilient | extra_cfg->error_resilient_mode;
- oxcf->s_frame_mode = extra_cfg->s_frame_mode;
- oxcf->frame_parallel_decoding_mode = extra_cfg->frame_parallel_decoding_mode;
- if (cfg->g_pass == AOM_RC_LAST_PASS) {
- const size_t packet_sz = sizeof(FIRSTPASS_STATS);
- const int n_packets = (int)(cfg->rc_twopass_stats_in.sz / packet_sz);
- oxcf->limit = n_packets - 1;
- } else {
- oxcf->limit = cfg->g_limit;
- }
-
- if (oxcf->limit == 1) {
- // still picture mode, display model and timing is meaningless
- oxcf->display_model_info_present_flag = 0;
- oxcf->timing_info_present = 0;
- }
-
- oxcf->aq_mode = extra_cfg->aq_mode;
- oxcf->deltaq_mode = extra_cfg->deltaq_mode;
-
- oxcf->save_as_annexb = cfg->save_as_annexb;
-
- oxcf->frame_periodic_boost = extra_cfg->frame_periodic_boost;
- oxcf->motion_vector_unit_test = extra_cfg->motion_vector_unit_test;
-
-#if CONFIG_REDUCED_ENCODER_BORDER
- if (oxcf->superres_mode != SUPERRES_NONE ||
- oxcf->resize_mode != RESIZE_NONE) {
- warn(
- "Superres / resize cannot be used with CONFIG_REDUCED_ENCODER_BORDER. "
- "Disabling superres/resize.\n");
- // return AOM_CODEC_INVALID_PARAM;
- disable_superres(oxcf);
- oxcf->resize_mode = RESIZE_NONE;
- oxcf->resize_scale_denominator = SCALE_NUMERATOR;
- oxcf->resize_kf_scale_denominator = SCALE_NUMERATOR;
- }
-#endif // CONFIG_REDUCED_ENCODER_BORDER
-
- oxcf->chroma_subsampling_x = extra_cfg->chroma_subsampling_x;
- oxcf->chroma_subsampling_y = extra_cfg->chroma_subsampling_y;
-
- return AOM_CODEC_OK;
-}
-
-static aom_codec_err_t encoder_set_config(aom_codec_alg_priv_t *ctx,
- const aom_codec_enc_cfg_t *cfg) {
- aom_codec_err_t res;
- int force_key = 0;
-
- if (cfg->g_w != ctx->cfg.g_w || cfg->g_h != ctx->cfg.g_h) {
- if (cfg->g_lag_in_frames > 1 || cfg->g_pass != AOM_RC_ONE_PASS)
- ERROR("Cannot change width or height after initialization");
- if (!valid_ref_frame_size(ctx->cfg.g_w, ctx->cfg.g_h, cfg->g_w, cfg->g_h) ||
- (ctx->cpi->initial_width && (int)cfg->g_w > ctx->cpi->initial_width) ||
- (ctx->cpi->initial_height && (int)cfg->g_h > ctx->cpi->initial_height))
- force_key = 1;
- }
-
- // Prevent increasing lag_in_frames. This check is stricter than it needs
- // to be -- the limit is not increasing past the first lag_in_frames
- // value, but we don't track the initial config, only the last successful
- // config.
- if (cfg->g_lag_in_frames > ctx->cfg.g_lag_in_frames)
- ERROR("Cannot increase lag_in_frames");
-
- res = validate_config(ctx, cfg, &ctx->extra_cfg);
-
- if (res == AOM_CODEC_OK) {
- ctx->cfg = *cfg;
- set_encoder_config(&ctx->oxcf, &ctx->cfg, &ctx->extra_cfg);
- // On profile change, request a key frame
- force_key |= ctx->cpi->common.seq_params.profile != ctx->oxcf.profile;
- av1_change_config(ctx->cpi, &ctx->oxcf);
- }
-
- if (force_key) ctx->next_frame_flags |= AOM_EFLAG_FORCE_KF;
-
- return res;
-}
-
-static aom_fixed_buf_t *encoder_get_global_headers(aom_codec_alg_priv_t *ctx) {
- return av1_get_global_headers(ctx->cpi);
-}
-
-static aom_codec_err_t ctrl_get_quantizer(aom_codec_alg_priv_t *ctx,
- va_list args) {
- int *const arg = va_arg(args, int *);
- if (arg == NULL) return AOM_CODEC_INVALID_PARAM;
- *arg = av1_get_quantizer(ctx->cpi);
- return AOM_CODEC_OK;
-}
-
-static aom_codec_err_t ctrl_get_quantizer64(aom_codec_alg_priv_t *ctx,
- va_list args) {
- int *const arg = va_arg(args, int *);
- if (arg == NULL) return AOM_CODEC_INVALID_PARAM;
- *arg = av1_qindex_to_quantizer(av1_get_quantizer(ctx->cpi));
- return AOM_CODEC_OK;
-}
-
-static aom_codec_err_t update_extra_cfg(aom_codec_alg_priv_t *ctx,
- const struct av1_extracfg *extra_cfg) {
- const aom_codec_err_t res = validate_config(ctx, &ctx->cfg, extra_cfg);
- if (res == AOM_CODEC_OK) {
- ctx->extra_cfg = *extra_cfg;
- set_encoder_config(&ctx->oxcf, &ctx->cfg, &ctx->extra_cfg);
- av1_change_config(ctx->cpi, &ctx->oxcf);
- }
- return res;
-}
-
-static aom_codec_err_t ctrl_set_cpuused(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.cpu_used = CAST(AOME_SET_CPUUSED, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_enable_auto_alt_ref(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.enable_auto_alt_ref = CAST(AOME_SET_ENABLEAUTOALTREF, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_enable_auto_bwd_ref(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.enable_auto_bwd_ref = CAST(AOME_SET_ENABLEAUTOBWDREF, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_noise_sensitivity(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.noise_sensitivity = CAST(AV1E_SET_NOISE_SENSITIVITY, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_sharpness(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.sharpness = CAST(AOME_SET_SHARPNESS, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_static_thresh(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.static_thresh = CAST(AOME_SET_STATIC_THRESHOLD, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_row_mt(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.row_mt = CAST(AV1E_SET_ROW_MT, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_tile_columns(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.tile_columns = CAST(AV1E_SET_TILE_COLUMNS, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_tile_rows(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.tile_rows = CAST(AV1E_SET_TILE_ROWS, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_arnr_max_frames(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.arnr_max_frames = CAST(AOME_SET_ARNR_MAXFRAMES, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_arnr_strength(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.arnr_strength = CAST(AOME_SET_ARNR_STRENGTH, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_tuning(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.tuning = CAST(AOME_SET_TUNING, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_cq_level(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.cq_level = CAST(AOME_SET_CQ_LEVEL, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_rc_max_intra_bitrate_pct(
- aom_codec_alg_priv_t *ctx, va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.rc_max_intra_bitrate_pct =
- CAST(AOME_SET_MAX_INTRA_BITRATE_PCT, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_rc_max_inter_bitrate_pct(
- aom_codec_alg_priv_t *ctx, va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.rc_max_inter_bitrate_pct =
- CAST(AOME_SET_MAX_INTER_BITRATE_PCT, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_rc_gf_cbr_boost_pct(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.gf_cbr_boost_pct = CAST(AV1E_SET_GF_CBR_BOOST_PCT, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_lossless(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.lossless = CAST(AV1E_SET_LOSSLESS, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_enable_cdef(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.enable_cdef = CAST(AV1E_SET_ENABLE_CDEF, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_enable_restoration(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.enable_restoration = CAST(AV1E_SET_ENABLE_RESTORATION, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_disable_trellis_quant(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.disable_trellis_quant = CAST(AV1E_SET_DISABLE_TRELLIS_QUANT, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_enable_qm(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.enable_qm = CAST(AV1E_SET_ENABLE_QM, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-static aom_codec_err_t ctrl_set_qm_y(aom_codec_alg_priv_t *ctx, va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.qm_y = CAST(AV1E_SET_QM_Y, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-static aom_codec_err_t ctrl_set_qm_u(aom_codec_alg_priv_t *ctx, va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.qm_u = CAST(AV1E_SET_QM_U, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-static aom_codec_err_t ctrl_set_qm_v(aom_codec_alg_priv_t *ctx, va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.qm_v = CAST(AV1E_SET_QM_V, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-static aom_codec_err_t ctrl_set_qm_min(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.qm_min = CAST(AV1E_SET_QM_MIN, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_qm_max(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.qm_max = CAST(AV1E_SET_QM_MAX, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-#if CONFIG_DIST_8X8
-static aom_codec_err_t ctrl_set_enable_dist_8x8(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.enable_dist_8x8 = CAST(AV1E_SET_ENABLE_DIST_8X8, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-#endif
-static aom_codec_err_t ctrl_set_num_tg(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.num_tg = CAST(AV1E_SET_NUM_TG, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_mtu(aom_codec_alg_priv_t *ctx, va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.mtu_size = CAST(AV1E_SET_MTU, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-static aom_codec_err_t ctrl_set_timing_info_type(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.timing_info_type = CAST(AV1E_SET_TIMING_INFO_TYPE, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_enable_df(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.use_dual_filter = CAST(AV1E_SET_ENABLE_DF, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_enable_order_hint(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.enable_order_hint = CAST(AV1E_SET_ENABLE_ORDER_HINT, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_enable_jnt_comp(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.enable_jnt_comp = CAST(AV1E_SET_ENABLE_JNT_COMP, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_enable_ref_frame_mvs(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.enable_ref_frame_mvs = CAST(AV1E_SET_ENABLE_REF_FRAME_MVS, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_allow_ref_frame_mvs(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.allow_ref_frame_mvs = CAST(AV1E_SET_ALLOW_REF_FRAME_MVS, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_enable_warped_motion(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.enable_warped_motion = CAST(AV1E_SET_ENABLE_WARPED_MOTION, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_allow_warped_motion(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.allow_warped_motion = CAST(AV1E_SET_ALLOW_WARPED_MOTION, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_enable_superres(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.enable_superres = CAST(AV1E_SET_ENABLE_SUPERRES, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_error_resilient_mode(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.error_resilient_mode = CAST(AV1E_SET_ERROR_RESILIENT_MODE, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_s_frame_mode(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.s_frame_mode = CAST(AV1E_SET_S_FRAME_MODE, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_frame_parallel_decoding_mode(
- aom_codec_alg_priv_t *ctx, va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.frame_parallel_decoding_mode =
- CAST(AV1E_SET_FRAME_PARALLEL_DECODING, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_single_tile_decoding(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.single_tile_decoding = CAST(AV1E_SET_SINGLE_TILE_DECODING, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_aq_mode(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.aq_mode = CAST(AV1E_SET_AQ_MODE, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_film_grain_test_vector(
- aom_codec_alg_priv_t *ctx, va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.film_grain_test_vector =
- CAST(AV1E_SET_FILM_GRAIN_TEST_VECTOR, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_film_grain_table(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.film_grain_table_filename = CAST(AV1E_SET_FILM_GRAIN_TABLE, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-#if CONFIG_DENOISE
-static aom_codec_err_t ctrl_set_denoise_noise_level(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.noise_level =
- ((float)CAST(AV1E_SET_DENOISE_NOISE_LEVEL, args)) / 10.0f;
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_denoise_block_size(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.noise_block_size = CAST(AV1E_SET_DENOISE_BLOCK_SIZE, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-#endif
-
-static aom_codec_err_t ctrl_set_deltaq_mode(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.deltaq_mode = CAST(AV1E_SET_DELTAQ_MODE, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_min_gf_interval(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.min_gf_interval = CAST(AV1E_SET_MIN_GF_INTERVAL, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_max_gf_interval(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.max_gf_interval = CAST(AV1E_SET_MAX_GF_INTERVAL, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_frame_periodic_boost(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.frame_periodic_boost = CAST(AV1E_SET_FRAME_PERIODIC_BOOST, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_enable_motion_vector_unit_test(
- aom_codec_alg_priv_t *ctx, va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.motion_vector_unit_test =
- CAST(AV1E_ENABLE_MOTION_VECTOR_UNIT_TEST, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t encoder_init(aom_codec_ctx_t *ctx,
- aom_codec_priv_enc_mr_cfg_t *data) {
- aom_codec_err_t res = AOM_CODEC_OK;
- (void)data;
-
- if (ctx->priv == NULL) {
- aom_codec_alg_priv_t *const priv = aom_calloc(1, sizeof(*priv));
- if (priv == NULL) return AOM_CODEC_MEM_ERROR;
-
- ctx->priv = (aom_codec_priv_t *)priv;
- ctx->priv->init_flags = ctx->init_flags;
- ctx->priv->enc.total_encoders = 1;
- priv->buffer_pool = (BufferPool *)aom_calloc(1, sizeof(BufferPool));
- if (priv->buffer_pool == NULL) return AOM_CODEC_MEM_ERROR;
-
-#if CONFIG_MULTITHREAD
- if (pthread_mutex_init(&priv->buffer_pool->pool_mutex, NULL)) {
- return AOM_CODEC_MEM_ERROR;
- }
-#endif
-
- if (ctx->config.enc) {
- // Update the reference to the config structure to an internal copy.
- priv->cfg = *ctx->config.enc;
- ctx->config.enc = &priv->cfg;
- }
-
- priv->extra_cfg = default_extra_cfg;
- aom_once(av1_initialize_enc);
-
- res = validate_config(priv, &priv->cfg, &priv->extra_cfg);
-
- if (res == AOM_CODEC_OK) {
- set_encoder_config(&priv->oxcf, &priv->cfg, &priv->extra_cfg);
- priv->oxcf.use_highbitdepth =
- (ctx->init_flags & AOM_CODEC_USE_HIGHBITDEPTH) ? 1 : 0;
- priv->cpi = av1_create_compressor(&priv->oxcf, priv->buffer_pool);
- if (priv->cpi == NULL)
- res = AOM_CODEC_MEM_ERROR;
- else
- priv->cpi->output_pkt_list = &priv->pkt_list.head;
- }
- }
-
- return res;
-}
-
-static aom_codec_err_t encoder_destroy(aom_codec_alg_priv_t *ctx) {
- free(ctx->cx_data);
- av1_remove_compressor(ctx->cpi);
-#if CONFIG_MULTITHREAD
- pthread_mutex_destroy(&ctx->buffer_pool->pool_mutex);
-#endif
- aom_free(ctx->buffer_pool);
- aom_free(ctx);
- return AOM_CODEC_OK;
-}
-
-static aom_codec_frame_flags_t get_frame_pkt_flags(const AV1_COMP *cpi,
- unsigned int lib_flags) {
- aom_codec_frame_flags_t flags = lib_flags << 16;
-
- if (lib_flags & FRAMEFLAGS_KEY) flags |= AOM_FRAME_IS_KEY;
-
- if (cpi->droppable) flags |= AOM_FRAME_IS_DROPPABLE;
-
- return flags;
-}
-
-static aom_codec_err_t encoder_encode(aom_codec_alg_priv_t *ctx,
- const aom_image_t *img,
- aom_codec_pts_t pts,
- unsigned long duration,
- aom_enc_frame_flags_t enc_flags) {
- const size_t kMinCompressedSize = 8192;
- volatile aom_codec_err_t res = AOM_CODEC_OK;
- AV1_COMP *const cpi = ctx->cpi;
- const aom_rational_t *const timebase = &ctx->cfg.g_timebase;
-
- if (cpi == NULL) return AOM_CODEC_INVALID_PARAM;
-
- if (img != NULL) {
- res = validate_img(ctx, img);
- // TODO(jzern) the checks related to cpi's validity should be treated as a
- // failure condition, encoder setup is done fully in init() currently.
- if (res == AOM_CODEC_OK) {
- size_t data_sz = ALIGN_POWER_OF_TWO(ctx->cfg.g_w, 5) *
- ALIGN_POWER_OF_TWO(ctx->cfg.g_h, 5) * get_image_bps(img);
- if (data_sz < kMinCompressedSize) data_sz = kMinCompressedSize;
- if (ctx->cx_data == NULL || ctx->cx_data_sz < data_sz) {
- ctx->cx_data_sz = data_sz;
- free(ctx->cx_data);
- ctx->cx_data = (unsigned char *)malloc(ctx->cx_data_sz);
- if (ctx->cx_data == NULL) {
- return AOM_CODEC_MEM_ERROR;
- }
- }
- }
- }
-
- if (ctx->oxcf.mode != GOOD) {
- ctx->oxcf.mode = GOOD;
- av1_change_config(ctx->cpi, &ctx->oxcf);
- }
-
- aom_codec_pkt_list_init(&ctx->pkt_list);
-
- volatile aom_enc_frame_flags_t flags = enc_flags;
-
- // The jmp_buf is valid only for the duration of the function that calls
- // setjmp(). Therefore, this function must reset the 'setjmp' field to 0
- // before it returns.
- if (setjmp(cpi->common.error.jmp)) {
- cpi->common.error.setjmp = 0;
- res = update_error_state(ctx, &cpi->common.error);
- aom_clear_system_state();
- return res;
- }
- cpi->common.error.setjmp = 1;
-
- // Note(yunqing): While applying encoding flags, always start from enabling
- // all, and then modifying according to the flags. Previous frame's flags are
- // overwritten.
- av1_apply_encoding_flags(cpi, flags);
-
- // Handle fixed keyframe intervals
- if (ctx->cfg.kf_mode == AOM_KF_AUTO &&
- ctx->cfg.kf_min_dist == ctx->cfg.kf_max_dist) {
- if (++ctx->fixed_kf_cntr > ctx->cfg.kf_min_dist) {
- flags |= AOM_EFLAG_FORCE_KF;
- ctx->fixed_kf_cntr = 1;
- }
- }
-
- if (res == AOM_CODEC_OK) {
- int64_t dst_time_stamp = timebase_units_to_ticks(timebase, pts);
- int64_t dst_end_time_stamp =
- timebase_units_to_ticks(timebase, pts + duration);
-
- // Set up internal flags
- if (ctx->base.init_flags & AOM_CODEC_USE_PSNR) cpi->b_calculate_psnr = 1;
-
- if (img != NULL) {
- YV12_BUFFER_CONFIG sd;
- res = image2yuvconfig(img, &sd);
-
- // Store the original flags in to the frame buffer. Will extract the
- // key frame flag when we actually encode this frame.
- if (av1_receive_raw_frame(cpi, flags | ctx->next_frame_flags, &sd,
- dst_time_stamp, dst_end_time_stamp)) {
- res = update_error_state(ctx, &cpi->common.error);
- }
- ctx->next_frame_flags = 0;
- }
-
- unsigned char *cx_data = ctx->cx_data;
- size_t cx_data_sz = ctx->cx_data_sz;
-
- /* Any pending invisible frames? */
- if (ctx->pending_cx_data) {
- memmove(cx_data, ctx->pending_cx_data, ctx->pending_cx_data_sz);
- ctx->pending_cx_data = cx_data;
- cx_data += ctx->pending_cx_data_sz;
- cx_data_sz -= ctx->pending_cx_data_sz;
-
- /* TODO: this is a minimal check, the underlying codec doesn't respect
- * the buffer size anyway.
- */
- if (cx_data_sz < ctx->cx_data_sz / 2) {
- aom_internal_error(&cpi->common.error, AOM_CODEC_ERROR,
- "Compressed data buffer too small");
- }
- }
-
- size_t frame_size = 0;
- unsigned int lib_flags = 0;
- int is_frame_visible = 0;
- int index_size = 0;
- // invisible frames get packed with the next visible frame
- while (cx_data_sz - index_size >= ctx->cx_data_sz / 2 &&
- !is_frame_visible &&
- -1 != av1_get_compressed_data(cpi, &lib_flags, &frame_size, cx_data,
- &dst_time_stamp, &dst_end_time_stamp,
- !img, timebase)) {
- if (cpi->common.seq_params.frame_id_numbers_present_flag) {
- if (cpi->common.invalid_delta_frame_id_minus_1) {
- aom_internal_error(&cpi->common.error, AOM_CODEC_ERROR,
- "Invalid delta_frame_id_minus_1");
- }
- }
- cpi->seq_params_locked = 1;
- if (frame_size) {
- if (ctx->pending_cx_data == 0) ctx->pending_cx_data = cx_data;
-
- const int write_temporal_delimiter =
- !cpi->common.spatial_layer_id && !ctx->pending_frame_count;
-
- if (write_temporal_delimiter) {
- uint32_t obu_header_size = 1;
- const uint32_t obu_payload_size = 0;
- const size_t length_field_size =
- aom_uleb_size_in_bytes(obu_payload_size);
-
- if (ctx->pending_cx_data) {
- const size_t move_offset = length_field_size + 1;
- memmove(ctx->pending_cx_data + move_offset, ctx->pending_cx_data,
- frame_size);
- }
- const uint32_t obu_header_offset = 0;
- obu_header_size = write_obu_header(
- OBU_TEMPORAL_DELIMITER, 0,
- (uint8_t *)(ctx->pending_cx_data + obu_header_offset));
-
- // OBUs are preceded/succeeded by an unsigned leb128 coded integer.
- if (write_uleb_obu_size(obu_header_size, obu_payload_size,
- ctx->pending_cx_data) != AOM_CODEC_OK) {
- aom_internal_error(&cpi->common.error, AOM_CODEC_ERROR, NULL);
- }
-
- frame_size += obu_header_size + obu_payload_size + length_field_size;
- }
-
- if (ctx->oxcf.save_as_annexb) {
- size_t curr_frame_size = frame_size;
- if (av1_convert_sect5obus_to_annexb(cx_data, &curr_frame_size) !=
- AOM_CODEC_OK) {
- aom_internal_error(&cpi->common.error, AOM_CODEC_ERROR, NULL);
- }
- frame_size = curr_frame_size;
-
- // B_PRIME (add frame size)
- const size_t length_field_size = aom_uleb_size_in_bytes(frame_size);
- if (ctx->pending_cx_data) {
- const size_t move_offset = length_field_size;
- memmove(cx_data + move_offset, cx_data, frame_size);
- }
- if (write_uleb_obu_size(0, (uint32_t)frame_size, cx_data) !=
- AOM_CODEC_OK) {
- aom_internal_error(&cpi->common.error, AOM_CODEC_ERROR, NULL);
- }
- frame_size += length_field_size;
- }
-
- ctx->pending_frame_sizes[ctx->pending_frame_count++] = frame_size;
- ctx->pending_cx_data_sz += frame_size;
-
- cx_data += frame_size;
- cx_data_sz -= frame_size;
-
- index_size = MAG_SIZE * (ctx->pending_frame_count - 1) + 2;
-
- is_frame_visible = cpi->common.show_frame;
- }
- }
- if (is_frame_visible) {
- // Add the frame packet to the list of returned packets.
- aom_codec_cx_pkt_t pkt;
-
- if (ctx->oxcf.save_as_annexb) {
- // B_PRIME (add TU size)
- size_t tu_size = ctx->pending_cx_data_sz;
- const size_t length_field_size = aom_uleb_size_in_bytes(tu_size);
- if (ctx->pending_cx_data) {
- const size_t move_offset = length_field_size;
- memmove(ctx->pending_cx_data + move_offset, ctx->pending_cx_data,
- tu_size);
- }
- if (write_uleb_obu_size(0, (uint32_t)tu_size, ctx->pending_cx_data) !=
- AOM_CODEC_OK) {
- aom_internal_error(&cpi->common.error, AOM_CODEC_ERROR, NULL);
- }
- ctx->pending_cx_data_sz += length_field_size;
- }
-
- pkt.kind = AOM_CODEC_CX_FRAME_PKT;
-
- pkt.data.frame.buf = ctx->pending_cx_data;
- pkt.data.frame.sz = ctx->pending_cx_data_sz;
- pkt.data.frame.partition_id = -1;
- pkt.data.frame.vis_frame_size = frame_size;
-
- pkt.data.frame.pts = ticks_to_timebase_units(timebase, dst_time_stamp);
- pkt.data.frame.flags = get_frame_pkt_flags(cpi, lib_flags);
- pkt.data.frame.duration = (uint32_t)ticks_to_timebase_units(
- timebase, dst_end_time_stamp - dst_time_stamp);
-
- aom_codec_pkt_list_add(&ctx->pkt_list.head, &pkt);
-
- ctx->pending_cx_data = NULL;
- ctx->pending_cx_data_sz = 0;
- ctx->pending_frame_count = 0;
- }
- }
-
- cpi->common.error.setjmp = 0;
- return res;
-}
-
-static const aom_codec_cx_pkt_t *encoder_get_cxdata(aom_codec_alg_priv_t *ctx,
- aom_codec_iter_t *iter) {
- return aom_codec_pkt_list_get(&ctx->pkt_list.head, iter);
-}
-
-static aom_codec_err_t ctrl_set_reference(aom_codec_alg_priv_t *ctx,
- va_list args) {
- av1_ref_frame_t *const frame = va_arg(args, av1_ref_frame_t *);
-
- if (frame != NULL) {
- YV12_BUFFER_CONFIG sd;
-
- image2yuvconfig(&frame->img, &sd);
- av1_set_reference_enc(ctx->cpi, frame->idx, &sd);
- return AOM_CODEC_OK;
- } else {
- return AOM_CODEC_INVALID_PARAM;
- }
-}
-
-static aom_codec_err_t ctrl_copy_reference(aom_codec_alg_priv_t *ctx,
- va_list args) {
- av1_ref_frame_t *const frame = va_arg(args, av1_ref_frame_t *);
-
- if (frame != NULL) {
- YV12_BUFFER_CONFIG sd;
-
- image2yuvconfig(&frame->img, &sd);
- av1_copy_reference_enc(ctx->cpi, frame->idx, &sd);
- return AOM_CODEC_OK;
- } else {
- return AOM_CODEC_INVALID_PARAM;
- }
-}
-
-static aom_codec_err_t ctrl_get_reference(aom_codec_alg_priv_t *ctx,
- va_list args) {
- av1_ref_frame_t *const frame = va_arg(args, av1_ref_frame_t *);
-
- if (frame != NULL) {
- YV12_BUFFER_CONFIG *fb = get_ref_frame(&ctx->cpi->common, frame->idx);
- if (fb == NULL) return AOM_CODEC_ERROR;
-
- yuvconfig2image(&frame->img, fb, NULL);
- return AOM_CODEC_OK;
- } else {
- return AOM_CODEC_INVALID_PARAM;
- }
-}
-
-static aom_codec_err_t ctrl_get_new_frame_image(aom_codec_alg_priv_t *ctx,
- va_list args) {
- aom_image_t *const new_img = va_arg(args, aom_image_t *);
-
- if (new_img != NULL) {
- YV12_BUFFER_CONFIG new_frame;
-
- if (av1_get_last_show_frame(ctx->cpi, &new_frame) == 0) {
- yuvconfig2image(new_img, &new_frame, NULL);
- return AOM_CODEC_OK;
- } else {
- return AOM_CODEC_ERROR;
- }
- } else {
- return AOM_CODEC_INVALID_PARAM;
- }
-}
-
-static aom_codec_err_t ctrl_copy_new_frame_image(aom_codec_alg_priv_t *ctx,
- va_list args) {
- aom_image_t *const new_img = va_arg(args, aom_image_t *);
-
- if (new_img != NULL) {
- YV12_BUFFER_CONFIG new_frame;
-
- if (av1_get_last_show_frame(ctx->cpi, &new_frame) == 0) {
- YV12_BUFFER_CONFIG sd;
- image2yuvconfig(new_img, &sd);
- return av1_copy_new_frame_enc(&ctx->cpi->common, &new_frame, &sd);
- } else {
- return AOM_CODEC_ERROR;
- }
- } else {
- return AOM_CODEC_INVALID_PARAM;
- }
-}
-
-static aom_codec_err_t ctrl_set_previewpp(aom_codec_alg_priv_t *ctx,
- va_list args) {
- (void)ctx;
- (void)args;
- return AOM_CODEC_INCAPABLE;
-}
-
-static aom_image_t *encoder_get_preview(aom_codec_alg_priv_t *ctx) {
- YV12_BUFFER_CONFIG sd;
-
- if (av1_get_preview_raw_frame(ctx->cpi, &sd) == 0) {
- yuvconfig2image(&ctx->preview_img, &sd, NULL);
- return &ctx->preview_img;
- } else {
- return NULL;
- }
-}
-
-static aom_codec_err_t ctrl_use_reference(aom_codec_alg_priv_t *ctx,
- va_list args) {
- const int reference_flag = va_arg(args, int);
-
- av1_use_as_reference(ctx->cpi, reference_flag);
- return AOM_CODEC_OK;
-}
-
-static aom_codec_err_t ctrl_set_roi_map(aom_codec_alg_priv_t *ctx,
- va_list args) {
- (void)ctx;
- (void)args;
-
- // TODO(yaowu): Need to re-implement and test for AV1.
- return AOM_CODEC_INVALID_PARAM;
-}
-
-static aom_codec_err_t ctrl_set_active_map(aom_codec_alg_priv_t *ctx,
- va_list args) {
- aom_active_map_t *const map = va_arg(args, aom_active_map_t *);
-
- if (map) {
- if (!av1_set_active_map(ctx->cpi, map->active_map, (int)map->rows,
- (int)map->cols))
- return AOM_CODEC_OK;
- else
- return AOM_CODEC_INVALID_PARAM;
- } else {
- return AOM_CODEC_INVALID_PARAM;
- }
-}
-
-static aom_codec_err_t ctrl_get_active_map(aom_codec_alg_priv_t *ctx,
- va_list args) {
- aom_active_map_t *const map = va_arg(args, aom_active_map_t *);
-
- if (map) {
- if (!av1_get_active_map(ctx->cpi, map->active_map, (int)map->rows,
- (int)map->cols))
- return AOM_CODEC_OK;
- else
- return AOM_CODEC_INVALID_PARAM;
- } else {
- return AOM_CODEC_INVALID_PARAM;
- }
-}
-
-static aom_codec_err_t ctrl_set_scale_mode(aom_codec_alg_priv_t *ctx,
- va_list args) {
- aom_scaling_mode_t *const mode = va_arg(args, aom_scaling_mode_t *);
-
- if (mode) {
- const int res =
- av1_set_internal_size(ctx->cpi, (AOM_SCALING)mode->h_scaling_mode,
- (AOM_SCALING)mode->v_scaling_mode);
- return (res == 0) ? AOM_CODEC_OK : AOM_CODEC_INVALID_PARAM;
- } else {
- return AOM_CODEC_INVALID_PARAM;
- }
-}
-
-static aom_codec_err_t ctrl_set_spatial_layer_id(aom_codec_alg_priv_t *ctx,
- va_list args) {
- const int spatial_layer_id = va_arg(args, int);
- if (spatial_layer_id > MAX_NUM_ENHANCEMENT_LAYERS)
- return AOM_CODEC_INVALID_PARAM;
- ctx->cpi->common.spatial_layer_id = spatial_layer_id;
- return AOM_CODEC_OK;
-}
-
-static aom_codec_err_t ctrl_set_number_spatial_layers(aom_codec_alg_priv_t *ctx,
- va_list args) {
- const int number_spatial_layers = va_arg(args, int);
- if (number_spatial_layers > MAX_NUM_ENHANCEMENT_LAYERS)
- return AOM_CODEC_INVALID_PARAM;
- ctx->cpi->common.number_spatial_layers = number_spatial_layers;
- return AOM_CODEC_OK;
-}
-
-static aom_codec_err_t ctrl_set_tune_content(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.content = CAST(AV1E_SET_TUNE_CONTENT, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_cdf_update_mode(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.cdf_update_mode = CAST(AV1E_SET_CDF_UPDATE_MODE, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_color_primaries(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.color_primaries = CAST(AV1E_SET_COLOR_PRIMARIES, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_transfer_characteristics(
- aom_codec_alg_priv_t *ctx, va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.transfer_characteristics =
- CAST(AV1E_SET_TRANSFER_CHARACTERISTICS, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_matrix_coefficients(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.matrix_coefficients = CAST(AV1E_SET_MATRIX_COEFFICIENTS, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_chroma_sample_position(
- aom_codec_alg_priv_t *ctx, va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.chroma_sample_position =
- CAST(AV1E_SET_CHROMA_SAMPLE_POSITION, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_color_range(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.color_range = CAST(AV1E_SET_COLOR_RANGE, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_render_size(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- int *const render_size = va_arg(args, int *);
- extra_cfg.render_width = render_size[0];
- extra_cfg.render_height = render_size[1];
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_superblock_size(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.superblock_size = CAST(AV1E_SET_SUPERBLOCK_SIZE, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_chroma_subsampling_x(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.chroma_subsampling_x = CAST(AV1E_SET_CHROMA_SUBSAMPLING_X, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_err_t ctrl_set_chroma_subsampling_y(aom_codec_alg_priv_t *ctx,
- va_list args) {
- struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.chroma_subsampling_y = CAST(AV1E_SET_CHROMA_SUBSAMPLING_Y, args);
- return update_extra_cfg(ctx, &extra_cfg);
-}
-
-static aom_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
- { AV1_COPY_REFERENCE, ctrl_copy_reference },
- { AOME_USE_REFERENCE, ctrl_use_reference },
-
- // Setters
- { AV1_SET_REFERENCE, ctrl_set_reference },
- { AOM_SET_POSTPROC, ctrl_set_previewpp },
- { AOME_SET_ROI_MAP, ctrl_set_roi_map },
- { AOME_SET_ACTIVEMAP, ctrl_set_active_map },
- { AOME_SET_SCALEMODE, ctrl_set_scale_mode },
- { AOME_SET_SPATIAL_LAYER_ID, ctrl_set_spatial_layer_id },
- { AOME_SET_CPUUSED, ctrl_set_cpuused },
- { AOME_SET_ENABLEAUTOALTREF, ctrl_set_enable_auto_alt_ref },
- { AOME_SET_ENABLEAUTOBWDREF, ctrl_set_enable_auto_bwd_ref },
- { AOME_SET_SHARPNESS, ctrl_set_sharpness },
- { AOME_SET_STATIC_THRESHOLD, ctrl_set_static_thresh },
- { AV1E_SET_ROW_MT, ctrl_set_row_mt },
- { AV1E_SET_TILE_COLUMNS, ctrl_set_tile_columns },
- { AV1E_SET_TILE_ROWS, ctrl_set_tile_rows },
- { AOME_SET_ARNR_MAXFRAMES, ctrl_set_arnr_max_frames },
- { AOME_SET_ARNR_STRENGTH, ctrl_set_arnr_strength },
- { AOME_SET_TUNING, ctrl_set_tuning },
- { AOME_SET_CQ_LEVEL, ctrl_set_cq_level },
- { AOME_SET_MAX_INTRA_BITRATE_PCT, ctrl_set_rc_max_intra_bitrate_pct },
- { AOME_SET_NUMBER_SPATIAL_LAYERS, ctrl_set_number_spatial_layers },
- { AV1E_SET_MAX_INTER_BITRATE_PCT, ctrl_set_rc_max_inter_bitrate_pct },
- { AV1E_SET_GF_CBR_BOOST_PCT, ctrl_set_rc_gf_cbr_boost_pct },
- { AV1E_SET_LOSSLESS, ctrl_set_lossless },
- { AV1E_SET_ENABLE_CDEF, ctrl_set_enable_cdef },
- { AV1E_SET_ENABLE_RESTORATION, ctrl_set_enable_restoration },
- { AV1E_SET_DISABLE_TRELLIS_QUANT, ctrl_set_disable_trellis_quant },
- { AV1E_SET_ENABLE_QM, ctrl_set_enable_qm },
- { AV1E_SET_QM_Y, ctrl_set_qm_y },
- { AV1E_SET_QM_U, ctrl_set_qm_u },
- { AV1E_SET_QM_V, ctrl_set_qm_v },
- { AV1E_SET_QM_MIN, ctrl_set_qm_min },
- { AV1E_SET_QM_MAX, ctrl_set_qm_max },
-#if CONFIG_DIST_8X8
- { AV1E_SET_ENABLE_DIST_8X8, ctrl_set_enable_dist_8x8 },
-#endif
- { AV1E_SET_NUM_TG, ctrl_set_num_tg },
- { AV1E_SET_MTU, ctrl_set_mtu },
- { AV1E_SET_TIMING_INFO_TYPE, ctrl_set_timing_info_type },
- { AV1E_SET_FRAME_PARALLEL_DECODING, ctrl_set_frame_parallel_decoding_mode },
- { AV1E_SET_ERROR_RESILIENT_MODE, ctrl_set_error_resilient_mode },
- { AV1E_SET_S_FRAME_MODE, ctrl_set_s_frame_mode },
- { AV1E_SET_ENABLE_DF, ctrl_set_enable_df },
- { AV1E_SET_ENABLE_ORDER_HINT, ctrl_set_enable_order_hint },
- { AV1E_SET_ENABLE_JNT_COMP, ctrl_set_enable_jnt_comp },
- { AV1E_SET_ENABLE_REF_FRAME_MVS, ctrl_set_enable_ref_frame_mvs },
- { AV1E_SET_ALLOW_REF_FRAME_MVS, ctrl_set_allow_ref_frame_mvs },
- { AV1E_SET_ENABLE_WARPED_MOTION, ctrl_set_enable_warped_motion },
- { AV1E_SET_ALLOW_WARPED_MOTION, ctrl_set_allow_warped_motion },
- { AV1E_SET_ENABLE_SUPERRES, ctrl_set_enable_superres },
- { AV1E_SET_AQ_MODE, ctrl_set_aq_mode },
- { AV1E_SET_DELTAQ_MODE, ctrl_set_deltaq_mode },
- { AV1E_SET_FRAME_PERIODIC_BOOST, ctrl_set_frame_periodic_boost },
- { AV1E_SET_TUNE_CONTENT, ctrl_set_tune_content },
- { AV1E_SET_CDF_UPDATE_MODE, ctrl_set_cdf_update_mode },
- { AV1E_SET_COLOR_PRIMARIES, ctrl_set_color_primaries },
- { AV1E_SET_TRANSFER_CHARACTERISTICS, ctrl_set_transfer_characteristics },
- { AV1E_SET_MATRIX_COEFFICIENTS, ctrl_set_matrix_coefficients },
- { AV1E_SET_CHROMA_SAMPLE_POSITION, ctrl_set_chroma_sample_position },
- { AV1E_SET_COLOR_RANGE, ctrl_set_color_range },
- { AV1E_SET_NOISE_SENSITIVITY, ctrl_set_noise_sensitivity },
- { AV1E_SET_MIN_GF_INTERVAL, ctrl_set_min_gf_interval },
- { AV1E_SET_MAX_GF_INTERVAL, ctrl_set_max_gf_interval },
- { AV1E_SET_RENDER_SIZE, ctrl_set_render_size },
- { AV1E_SET_SUPERBLOCK_SIZE, ctrl_set_superblock_size },
- { AV1E_SET_SINGLE_TILE_DECODING, ctrl_set_single_tile_decoding },
- { AV1E_SET_FILM_GRAIN_TEST_VECTOR, ctrl_set_film_grain_test_vector },
- { AV1E_SET_FILM_GRAIN_TABLE, ctrl_set_film_grain_table },
-#if CONFIG_DENOISE
- { AV1E_SET_DENOISE_NOISE_LEVEL, ctrl_set_denoise_noise_level },
- { AV1E_SET_DENOISE_BLOCK_SIZE, ctrl_set_denoise_block_size },
-#endif // CONFIG_FILM_GRAIN
- { AV1E_ENABLE_MOTION_VECTOR_UNIT_TEST, ctrl_enable_motion_vector_unit_test },
-
- // Getters
- { AOME_GET_LAST_QUANTIZER, ctrl_get_quantizer },
- { AOME_GET_LAST_QUANTIZER_64, ctrl_get_quantizer64 },
- { AV1_GET_REFERENCE, ctrl_get_reference },
- { AV1E_GET_ACTIVEMAP, ctrl_get_active_map },
- { AV1_GET_NEW_FRAME_IMAGE, ctrl_get_new_frame_image },
- { AV1_COPY_NEW_FRAME_IMAGE, ctrl_copy_new_frame_image },
- { AV1E_SET_CHROMA_SUBSAMPLING_X, ctrl_set_chroma_subsampling_x },
- { AV1E_SET_CHROMA_SUBSAMPLING_Y, ctrl_set_chroma_subsampling_y },
- { -1, NULL },
-};
-
-static aom_codec_enc_cfg_map_t encoder_usage_cfg_map[] = {
- { 0,
- {
- // NOLINT
- 0, // g_usage
- 0, // g_threads
- 0, // g_profile
-
- 320, // g_width
- 240, // g_height
- 0, // g_limit
- 0, // g_forced_max_frame_width
- 0, // g_forced_max_frame_height
- AOM_BITS_8, // g_bit_depth
- 8, // g_input_bit_depth
-
- { 1, 30 }, // g_timebase
-
- 0, // g_error_resilient
-
- AOM_RC_ONE_PASS, // g_pass
-
- 19, // g_lag_in_frames
-
- 0, // rc_dropframe_thresh
- RESIZE_NONE, // rc_resize_mode
- SCALE_NUMERATOR, // rc_resize_denominator
- SCALE_NUMERATOR, // rc_resize_kf_denominator
-
- 0, // rc_superres_mode
- SCALE_NUMERATOR, // rc_superres_denominator
- SCALE_NUMERATOR, // rc_superres_kf_denominator
- 63, // rc_superres_qthresh
- 63, // rc_superres_kf_qthresh
-
- AOM_VBR, // rc_end_usage
- { NULL, 0 }, // rc_twopass_stats_in
- { NULL, 0 }, // rc_firstpass_mb_stats_in
- 256, // rc_target_bandwidth
- 0, // rc_min_quantizer
- 63, // rc_max_quantizer
- 25, // rc_undershoot_pct
- 25, // rc_overshoot_pct
-
- 6000, // rc_max_buffer_size
- 4000, // rc_buffer_initial_size
- 5000, // rc_buffer_optimal_size
-
- 50, // rc_two_pass_vbrbias
- 0, // rc_two_pass_vbrmin_section
- 2000, // rc_two_pass_vbrmax_section
-
- // keyframing settings (kf)
- 0, // fwd_kf_enabled
- AOM_KF_AUTO, // g_kfmode
- 0, // kf_min_dist
- 9999, // kf_max_dist
- 0, // sframe_dist
- 1, // sframe_mode
- 0, // large_scale_tile
- 0, // monochrome
- 0, // full_still_picture_hdr
- 0, // save_as_annexb
- 0, // tile_width_count
- 0, // tile_height_count
- { 0 }, // tile_widths
- { 0 }, // tile_heights
- { 1 }, // config file
- } },
-};
-
-#ifndef VERSION_STRING
-#define VERSION_STRING
-#endif
-CODEC_INTERFACE(aom_codec_av1_cx) = {
- "AOMedia Project AV1 Encoder" VERSION_STRING,
- AOM_CODEC_INTERNAL_ABI_VERSION,
- AOM_CODEC_CAP_HIGHBITDEPTH | AOM_CODEC_CAP_ENCODER |
- AOM_CODEC_CAP_PSNR, // aom_codec_caps_t
- encoder_init, // aom_codec_init_fn_t
- encoder_destroy, // aom_codec_destroy_fn_t
- encoder_ctrl_maps, // aom_codec_ctrl_fn_map_t
- {
- // NOLINT
- NULL, // aom_codec_peek_si_fn_t
- NULL, // aom_codec_get_si_fn_t
- NULL, // aom_codec_decode_fn_t
- NULL, // aom_codec_get_frame_fn_t
- NULL // aom_codec_set_fb_fn_t
- },
- {
- // NOLINT
- 1, // 1 cfg map
- encoder_usage_cfg_map, // aom_codec_enc_cfg_map_t
- encoder_encode, // aom_codec_encode_fn_t
- encoder_get_cxdata, // aom_codec_get_cx_data_fn_t
- encoder_set_config, // aom_codec_enc_config_set_fn_t
- encoder_get_global_headers, // aom_codec_get_global_headers_fn_t
- encoder_get_preview, // aom_codec_get_preview_frame_fn_t
- NULL // aom_codec_enc_mr_get_mem_loc_fn_t
- }
-};
diff --git a/third_party/aom/av1/av1_dx_iface.c b/third_party/aom/av1/av1_dx_iface.c
deleted file mode 100644
index 4a6631047..000000000
--- a/third_party/aom/av1/av1_dx_iface.c
+++ /dev/null
@@ -1,1328 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <stdlib.h>
-#include <string.h>
-
-#include "config/aom_config.h"
-#include "config/aom_version.h"
-
-#include "aom/internal/aom_codec_internal.h"
-#include "aom/aomdx.h"
-#include "aom/aom_decoder.h"
-#include "aom_dsp/bitreader_buffer.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_ports/mem_ops.h"
-#include "aom_util/aom_thread.h"
-
-#include "av1/common/alloccommon.h"
-#include "av1/common/frame_buffers.h"
-#include "av1/common/enums.h"
-#include "av1/common/obu_util.h"
-
-#include "av1/decoder/decoder.h"
-#include "av1/decoder/decodeframe.h"
-#include "av1/decoder/obu.h"
-
-#include "av1/av1_iface_common.h"
-
-struct aom_codec_alg_priv {
- aom_codec_priv_t base;
- aom_codec_dec_cfg_t cfg;
- aom_codec_stream_info_t si;
- int postproc_cfg_set;
- aom_postproc_cfg_t postproc_cfg;
- aom_image_t img;
- int img_avail;
- int flushed;
- int invert_tile_order;
- int last_show_frame; // Index of last output frame.
- int byte_alignment;
- int skip_loop_filter;
- int skip_film_grain;
- int decode_tile_row;
- int decode_tile_col;
- unsigned int tile_mode;
- unsigned int ext_tile_debug;
- unsigned int row_mt;
- EXTERNAL_REFERENCES ext_refs;
- unsigned int is_annexb;
- int operating_point;
- int output_all_layers;
-
- AVxWorker *frame_workers;
- int num_frame_workers;
- int next_submit_worker_id;
- int last_submit_worker_id;
- int next_output_worker_id;
- int available_threads;
- aom_image_t *image_with_grain[MAX_NUM_SPATIAL_LAYERS];
- int need_resync; // wait for key/intra-only frame
- // BufferPool that holds all reference frames. Shared by all the FrameWorkers.
- BufferPool *buffer_pool;
-
- // External frame buffer info to save for AV1 common.
- void *ext_priv; // Private data associated with the external frame buffers.
- aom_get_frame_buffer_cb_fn_t get_ext_fb_cb;
- aom_release_frame_buffer_cb_fn_t release_ext_fb_cb;
-
-#if CONFIG_INSPECTION
- aom_inspect_cb inspect_cb;
- void *inspect_ctx;
-#endif
-};
-
-static aom_codec_err_t decoder_init(aom_codec_ctx_t *ctx,
- aom_codec_priv_enc_mr_cfg_t *data) {
- // This function only allocates space for the aom_codec_alg_priv_t
- // structure. More memory may be required at the time the stream
- // information becomes known.
- (void)data;
-
- if (!ctx->priv) {
- aom_codec_alg_priv_t *const priv =
- (aom_codec_alg_priv_t *)aom_calloc(1, sizeof(*priv));
- if (priv == NULL) return AOM_CODEC_MEM_ERROR;
-
- ctx->priv = (aom_codec_priv_t *)priv;
- ctx->priv->init_flags = ctx->init_flags;
- priv->flushed = 0;
-
- // TODO(tdaede): this should not be exposed to the API
- priv->cfg.allow_lowbitdepth = CONFIG_LOWBITDEPTH;
- if (ctx->config.dec) {
- priv->cfg = *ctx->config.dec;
- ctx->config.dec = &priv->cfg;
- // default values
- priv->cfg.cfg.ext_partition = 1;
- }
- av1_zero(priv->image_with_grain);
- // Turn row_mt on by default.
- priv->row_mt = 1;
-
- // Turn on normal tile coding mode by default.
- // 0 is for normal tile coding mode, and 1 is for large scale tile coding
- // mode(refer to lightfield example).
- priv->tile_mode = 0;
- priv->decode_tile_row = -1;
- priv->decode_tile_col = -1;
- }
-
- return AOM_CODEC_OK;
-}
-
-static aom_codec_err_t decoder_destroy(aom_codec_alg_priv_t *ctx) {
- if (ctx->frame_workers != NULL) {
- int i;
- for (i = 0; i < ctx->num_frame_workers; ++i) {
- AVxWorker *const worker = &ctx->frame_workers[i];
- FrameWorkerData *const frame_worker_data =
- (FrameWorkerData *)worker->data1;
- aom_get_worker_interface()->end(worker);
- aom_free(frame_worker_data->pbi->common.tpl_mvs);
- frame_worker_data->pbi->common.tpl_mvs = NULL;
- av1_remove_common(&frame_worker_data->pbi->common);
- av1_free_restoration_buffers(&frame_worker_data->pbi->common);
- av1_decoder_remove(frame_worker_data->pbi);
- aom_free(frame_worker_data->scratch_buffer);
-#if CONFIG_MULTITHREAD
- pthread_mutex_destroy(&frame_worker_data->stats_mutex);
- pthread_cond_destroy(&frame_worker_data->stats_cond);
-#endif
- aom_free(frame_worker_data);
- }
-#if CONFIG_MULTITHREAD
- pthread_mutex_destroy(&ctx->buffer_pool->pool_mutex);
-#endif
- }
-
- if (ctx->buffer_pool) {
- av1_free_ref_frame_buffers(ctx->buffer_pool);
- av1_free_internal_frame_buffers(&ctx->buffer_pool->int_frame_buffers);
- }
-
- aom_free(ctx->frame_workers);
- aom_free(ctx->buffer_pool);
- for (int i = 0; i < MAX_NUM_SPATIAL_LAYERS; i++) {
- if (ctx->image_with_grain[i]) aom_img_free(ctx->image_with_grain[i]);
- }
- aom_free(ctx);
- return AOM_CODEC_OK;
-}
-
-// Parses the operating points (including operating_point_idc, seq_level_idx,
-// and seq_tier) and then sets si->number_spatial_layers and
-// si->number_temporal_layers based on operating_point_idc[0].
-static aom_codec_err_t parse_operating_points(struct aom_read_bit_buffer *rb,
- int is_reduced_header,
- aom_codec_stream_info_t *si) {
- int operating_point_idc0 = 0;
-
- if (is_reduced_header) {
- aom_rb_read_literal(rb, LEVEL_BITS); // level
- } else {
- const uint8_t operating_points_cnt_minus_1 =
- aom_rb_read_literal(rb, OP_POINTS_CNT_MINUS_1_BITS);
- for (int i = 0; i < operating_points_cnt_minus_1 + 1; i++) {
- int operating_point_idc;
- operating_point_idc = aom_rb_read_literal(rb, OP_POINTS_IDC_BITS);
- if (i == 0) operating_point_idc0 = operating_point_idc;
- int seq_level_idx = aom_rb_read_literal(rb, LEVEL_BITS); // level
- if (seq_level_idx > 7) aom_rb_read_bit(rb); // tier
- }
- }
-
- if (aom_get_num_layers_from_operating_point_idc(
- operating_point_idc0, &si->number_spatial_layers,
- &si->number_temporal_layers) != AOM_CODEC_OK) {
- return AOM_CODEC_ERROR;
- }
-
- return AOM_CODEC_OK;
-}
-
-static aom_codec_err_t decoder_peek_si_internal(const uint8_t *data,
- size_t data_sz,
- aom_codec_stream_info_t *si,
- int *is_intra_only) {
- int intra_only_flag = 0;
- int got_sequence_header = 0;
- int found_keyframe = 0;
-
- if (data + data_sz <= data || data_sz < 1) return AOM_CODEC_INVALID_PARAM;
-
- si->w = 0;
- si->h = 0;
- si->is_kf = 0; // is_kf indicates whether the current packet contains a RAP
-
- ObuHeader obu_header;
- memset(&obu_header, 0, sizeof(obu_header));
- size_t payload_size = 0;
- size_t bytes_read = 0;
- int reduced_still_picture_hdr = 0;
- aom_codec_err_t status = aom_read_obu_header_and_size(
- data, data_sz, si->is_annexb, &obu_header, &payload_size, &bytes_read);
- if (status != AOM_CODEC_OK) return status;
-
- // If the first OBU is a temporal delimiter, skip over it and look at the next
- // OBU in the bitstream
- if (obu_header.type == OBU_TEMPORAL_DELIMITER) {
- // Skip any associated payload (there shouldn't be one, but just in case)
- if (data_sz < bytes_read + payload_size) return AOM_CODEC_CORRUPT_FRAME;
- data += bytes_read + payload_size;
- data_sz -= bytes_read + payload_size;
-
- status = aom_read_obu_header_and_size(
- data, data_sz, si->is_annexb, &obu_header, &payload_size, &bytes_read);
- if (status != AOM_CODEC_OK) return status;
- }
- while (1) {
- data += bytes_read;
- data_sz -= bytes_read;
- if (data_sz < payload_size) return AOM_CODEC_CORRUPT_FRAME;
- // Check that the selected OBU is a sequence header
- if (obu_header.type == OBU_SEQUENCE_HEADER) {
- // Sanity check on sequence header size
- if (data_sz < 2) return AOM_CODEC_CORRUPT_FRAME;
- // Read a few values from the sequence header payload
- struct aom_read_bit_buffer rb = { data, data + data_sz, 0, NULL, NULL };
-
- av1_read_profile(&rb); // profile
- const int still_picture = aom_rb_read_bit(&rb);
- reduced_still_picture_hdr = aom_rb_read_bit(&rb);
-
- if (!still_picture && reduced_still_picture_hdr) {
- return AOM_CODEC_UNSUP_BITSTREAM;
- }
-
- if (parse_operating_points(&rb, reduced_still_picture_hdr, si) !=
- AOM_CODEC_OK) {
- return AOM_CODEC_ERROR;
- }
-
- int num_bits_width = aom_rb_read_literal(&rb, 4) + 1;
- int num_bits_height = aom_rb_read_literal(&rb, 4) + 1;
- int max_frame_width = aom_rb_read_literal(&rb, num_bits_width) + 1;
- int max_frame_height = aom_rb_read_literal(&rb, num_bits_height) + 1;
- si->w = max_frame_width;
- si->h = max_frame_height;
- got_sequence_header = 1;
- } else if (obu_header.type == OBU_FRAME_HEADER ||
- obu_header.type == OBU_FRAME) {
- if (got_sequence_header && reduced_still_picture_hdr) {
- found_keyframe = 1;
- break;
- } else {
- // make sure we have enough bits to get the frame type out
- if (data_sz < 1) return AOM_CODEC_CORRUPT_FRAME;
- struct aom_read_bit_buffer rb = { data, data + data_sz, 0, NULL, NULL };
- const int show_existing_frame = aom_rb_read_bit(&rb);
- if (!show_existing_frame) {
- const FRAME_TYPE frame_type = (FRAME_TYPE)aom_rb_read_literal(&rb, 2);
- if (frame_type == KEY_FRAME) {
- found_keyframe = 1;
- break; // Stop here as no further OBUs will change the outcome.
- }
- }
- }
- }
- // skip past any unread OBU header data
- data += payload_size;
- data_sz -= payload_size;
- if (data_sz == 0) break; // exit if we're out of OBUs
- status = aom_read_obu_header_and_size(
- data, data_sz, si->is_annexb, &obu_header, &payload_size, &bytes_read);
- if (status != AOM_CODEC_OK) return status;
- }
- if (got_sequence_header && found_keyframe) si->is_kf = 1;
- if (is_intra_only != NULL) *is_intra_only = intra_only_flag;
- return AOM_CODEC_OK;
-}
-
-static aom_codec_err_t decoder_peek_si(const uint8_t *data, size_t data_sz,
- aom_codec_stream_info_t *si) {
- return decoder_peek_si_internal(data, data_sz, si, NULL);
-}
-
-static aom_codec_err_t decoder_get_si(aom_codec_alg_priv_t *ctx,
- aom_codec_stream_info_t *si) {
- memcpy(si, &ctx->si, sizeof(*si));
-
- return AOM_CODEC_OK;
-}
-
-static void set_error_detail(aom_codec_alg_priv_t *ctx,
- const char *const error) {
- ctx->base.err_detail = error;
-}
-
-static aom_codec_err_t update_error_state(
- aom_codec_alg_priv_t *ctx, const struct aom_internal_error_info *error) {
- if (error->error_code)
- set_error_detail(ctx, error->has_detail ? error->detail : NULL);
-
- return error->error_code;
-}
-
-static void init_buffer_callbacks(aom_codec_alg_priv_t *ctx) {
- int i;
-
- for (i = 0; i < ctx->num_frame_workers; ++i) {
- AVxWorker *const worker = &ctx->frame_workers[i];
- FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1;
- AV1_COMMON *const cm = &frame_worker_data->pbi->common;
- BufferPool *const pool = cm->buffer_pool;
-
- cm->new_fb_idx = INVALID_IDX;
- cm->byte_alignment = ctx->byte_alignment;
- cm->skip_loop_filter = ctx->skip_loop_filter;
- cm->skip_film_grain = ctx->skip_film_grain;
-
- if (ctx->get_ext_fb_cb != NULL && ctx->release_ext_fb_cb != NULL) {
- pool->get_fb_cb = ctx->get_ext_fb_cb;
- pool->release_fb_cb = ctx->release_ext_fb_cb;
- pool->cb_priv = ctx->ext_priv;
- } else {
- pool->get_fb_cb = av1_get_frame_buffer;
- pool->release_fb_cb = av1_release_frame_buffer;
-
- if (av1_alloc_internal_frame_buffers(&pool->int_frame_buffers))
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
- "Failed to initialize internal frame buffers");
-
- pool->cb_priv = &pool->int_frame_buffers;
- }
- }
-}
-
-static void set_default_ppflags(aom_postproc_cfg_t *cfg) {
- cfg->post_proc_flag = AOM_DEBLOCK | AOM_DEMACROBLOCK;
- cfg->deblocking_level = 4;
- cfg->noise_level = 0;
-}
-
-static int frame_worker_hook(void *arg1, void *arg2) {
- FrameWorkerData *const frame_worker_data = (FrameWorkerData *)arg1;
- const uint8_t *data = frame_worker_data->data;
- (void)arg2;
-
- int result = av1_receive_compressed_data(frame_worker_data->pbi,
- frame_worker_data->data_size, &data);
- frame_worker_data->data_end = data;
-
- if (result != 0) {
- // Check decode result in serial decode.
- frame_worker_data->pbi->cur_buf->buf.corrupted = 1;
- frame_worker_data->pbi->need_resync = 1;
- }
- return !result;
-}
-
-static aom_codec_err_t init_decoder(aom_codec_alg_priv_t *ctx) {
- int i;
- const AVxWorkerInterface *const winterface = aom_get_worker_interface();
-
- ctx->last_show_frame = -1;
- ctx->next_submit_worker_id = 0;
- ctx->last_submit_worker_id = 0;
- ctx->next_output_worker_id = 0;
- ctx->need_resync = 1;
- ctx->num_frame_workers = 1;
- if (ctx->num_frame_workers > MAX_DECODE_THREADS)
- ctx->num_frame_workers = MAX_DECODE_THREADS;
- ctx->available_threads = ctx->num_frame_workers;
- ctx->flushed = 0;
-
- ctx->buffer_pool = (BufferPool *)aom_calloc(1, sizeof(BufferPool));
- if (ctx->buffer_pool == NULL) return AOM_CODEC_MEM_ERROR;
-
-#if CONFIG_MULTITHREAD
- if (pthread_mutex_init(&ctx->buffer_pool->pool_mutex, NULL)) {
- set_error_detail(ctx, "Failed to allocate buffer pool mutex");
- return AOM_CODEC_MEM_ERROR;
- }
-#endif
-
- ctx->frame_workers = (AVxWorker *)aom_malloc(ctx->num_frame_workers *
- sizeof(*ctx->frame_workers));
- if (ctx->frame_workers == NULL) {
- set_error_detail(ctx, "Failed to allocate frame_workers");
- return AOM_CODEC_MEM_ERROR;
- }
-
- for (i = 0; i < ctx->num_frame_workers; ++i) {
- AVxWorker *const worker = &ctx->frame_workers[i];
- FrameWorkerData *frame_worker_data = NULL;
- winterface->init(worker);
- worker->data1 = aom_memalign(32, sizeof(FrameWorkerData));
- if (worker->data1 == NULL) {
- set_error_detail(ctx, "Failed to allocate frame_worker_data");
- return AOM_CODEC_MEM_ERROR;
- }
- frame_worker_data = (FrameWorkerData *)worker->data1;
- frame_worker_data->pbi = av1_decoder_create(ctx->buffer_pool);
- if (frame_worker_data->pbi == NULL) {
- set_error_detail(ctx, "Failed to allocate frame_worker_data");
- return AOM_CODEC_MEM_ERROR;
- }
- frame_worker_data->pbi->common.options = &ctx->cfg.cfg;
- frame_worker_data->pbi->frame_worker_owner = worker;
- frame_worker_data->worker_id = i;
- frame_worker_data->scratch_buffer = NULL;
- frame_worker_data->scratch_buffer_size = 0;
- frame_worker_data->frame_context_ready = 0;
- frame_worker_data->received_frame = 0;
-#if CONFIG_MULTITHREAD
- if (pthread_mutex_init(&frame_worker_data->stats_mutex, NULL)) {
- set_error_detail(ctx, "Failed to allocate frame_worker_data mutex");
- return AOM_CODEC_MEM_ERROR;
- }
-
- if (pthread_cond_init(&frame_worker_data->stats_cond, NULL)) {
- set_error_detail(ctx, "Failed to allocate frame_worker_data cond");
- return AOM_CODEC_MEM_ERROR;
- }
-#endif
- frame_worker_data->pbi->allow_lowbitdepth = ctx->cfg.allow_lowbitdepth;
-
- // If decoding in serial mode, FrameWorker thread could create tile worker
- // thread or loopfilter thread.
- frame_worker_data->pbi->max_threads = ctx->cfg.threads;
- frame_worker_data->pbi->inv_tile_order = ctx->invert_tile_order;
- frame_worker_data->pbi->common.large_scale_tile = ctx->tile_mode;
- frame_worker_data->pbi->common.is_annexb = ctx->is_annexb;
- frame_worker_data->pbi->dec_tile_row = ctx->decode_tile_row;
- frame_worker_data->pbi->dec_tile_col = ctx->decode_tile_col;
- frame_worker_data->pbi->operating_point = ctx->operating_point;
- frame_worker_data->pbi->output_all_layers = ctx->output_all_layers;
- frame_worker_data->pbi->ext_tile_debug = ctx->ext_tile_debug;
- frame_worker_data->pbi->row_mt = ctx->row_mt;
-
- worker->hook = frame_worker_hook;
- if (!winterface->reset(worker)) {
- set_error_detail(ctx, "Frame Worker thread creation failed");
- return AOM_CODEC_MEM_ERROR;
- }
- }
-
- // If postprocessing was enabled by the application and a
- // configuration has not been provided, default it.
- if (!ctx->postproc_cfg_set && (ctx->base.init_flags & AOM_CODEC_USE_POSTPROC))
- set_default_ppflags(&ctx->postproc_cfg);
-
- init_buffer_callbacks(ctx);
-
- return AOM_CODEC_OK;
-}
-
-static INLINE void check_resync(aom_codec_alg_priv_t *const ctx,
- const AV1Decoder *const pbi) {
- // Clear resync flag if worker got a key frame or intra only frame.
- if (ctx->need_resync == 1 && pbi->need_resync == 0 &&
- (pbi->common.intra_only || pbi->common.frame_type == KEY_FRAME))
- ctx->need_resync = 0;
-}
-
-static aom_codec_err_t decode_one(aom_codec_alg_priv_t *ctx,
- const uint8_t **data, size_t data_sz,
- void *user_priv) {
- const AVxWorkerInterface *const winterface = aom_get_worker_interface();
-
- // Determine the stream parameters. Note that we rely on peek_si to
- // validate that we have a buffer that does not wrap around the top
- // of the heap.
- if (!ctx->si.h) {
- int is_intra_only = 0;
- ctx->si.is_annexb = ctx->is_annexb;
- const aom_codec_err_t res =
- decoder_peek_si_internal(*data, data_sz, &ctx->si, &is_intra_only);
- if (res != AOM_CODEC_OK) return res;
-
- if (!ctx->si.is_kf && !is_intra_only) return AOM_CODEC_ERROR;
- }
-
- AVxWorker *const worker = ctx->frame_workers;
- FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1;
- frame_worker_data->data = *data;
- frame_worker_data->data_size = data_sz;
- frame_worker_data->user_priv = user_priv;
- frame_worker_data->received_frame = 1;
-
-#if CONFIG_INSPECTION
- frame_worker_data->pbi->inspect_cb = ctx->inspect_cb;
- frame_worker_data->pbi->inspect_ctx = ctx->inspect_ctx;
-#endif
-
- frame_worker_data->pbi->common.large_scale_tile = ctx->tile_mode;
- frame_worker_data->pbi->dec_tile_row = ctx->decode_tile_row;
- frame_worker_data->pbi->dec_tile_col = ctx->decode_tile_col;
- frame_worker_data->pbi->ext_tile_debug = ctx->ext_tile_debug;
- frame_worker_data->pbi->row_mt = ctx->row_mt;
- frame_worker_data->pbi->ext_refs = ctx->ext_refs;
-
- frame_worker_data->pbi->common.is_annexb = ctx->is_annexb;
-
- worker->had_error = 0;
- winterface->execute(worker);
-
- // Update data pointer after decode.
- *data = frame_worker_data->data_end;
-
- if (worker->had_error)
- return update_error_state(ctx, &frame_worker_data->pbi->common.error);
-
- check_resync(ctx, frame_worker_data->pbi);
-
- return AOM_CODEC_OK;
-}
-
-static aom_codec_err_t decoder_decode(aom_codec_alg_priv_t *ctx,
- const uint8_t *data, size_t data_sz,
- void *user_priv) {
- aom_codec_err_t res = AOM_CODEC_OK;
-
- // Release any pending output frames from the previous decoder_decode call.
- // We need to do this even if the decoder is being flushed or the input
- // arguments are invalid.
- if (ctx->frame_workers) {
- BufferPool *const pool = ctx->buffer_pool;
- RefCntBuffer *const frame_bufs = pool->frame_bufs;
- lock_buffer_pool(pool);
- for (int i = 0; i < ctx->num_frame_workers; ++i) {
- AVxWorker *const worker = &ctx->frame_workers[i];
- FrameWorkerData *const frame_worker_data =
- (FrameWorkerData *)worker->data1;
- struct AV1Decoder *pbi = frame_worker_data->pbi;
- for (size_t j = 0; j < pbi->num_output_frames; j++) {
- decrease_ref_count((int)pbi->output_frame_index[j], frame_bufs, pool);
- }
- pbi->num_output_frames = 0;
- }
- unlock_buffer_pool(ctx->buffer_pool);
- }
-
- /* Sanity checks */
- /* NULL data ptr allowed if data_sz is 0 too */
- if (data == NULL && data_sz == 0) {
- ctx->flushed = 1;
- return AOM_CODEC_OK;
- }
- if (data == NULL || data_sz == 0) return AOM_CODEC_INVALID_PARAM;
-
- // Reset flushed when receiving a valid frame.
- ctx->flushed = 0;
-
- // Initialize the decoder workers on the first frame.
- if (ctx->frame_workers == NULL) {
- res = init_decoder(ctx);
- if (res != AOM_CODEC_OK) return res;
- }
-
- const uint8_t *data_start = data;
- const uint8_t *data_end = data + data_sz;
-
- if (ctx->is_annexb) {
- // read the size of this temporal unit
- size_t length_of_size;
- uint64_t temporal_unit_size;
- if (aom_uleb_decode(data_start, data_sz, &temporal_unit_size,
- &length_of_size) != 0) {
- return AOM_CODEC_CORRUPT_FRAME;
- }
- data_start += length_of_size;
- if (temporal_unit_size > (size_t)(data_end - data_start))
- return AOM_CODEC_CORRUPT_FRAME;
- data_end = data_start + temporal_unit_size;
- }
-
- // Decode in serial mode.
- while (data_start < data_end) {
- uint64_t frame_size;
- if (ctx->is_annexb) {
- // read the size of this frame unit
- size_t length_of_size;
- if (aom_uleb_decode(data_start, (size_t)(data_end - data_start),
- &frame_size, &length_of_size) != 0) {
- return AOM_CODEC_CORRUPT_FRAME;
- }
- data_start += length_of_size;
- if (frame_size > (size_t)(data_end - data_start))
- return AOM_CODEC_CORRUPT_FRAME;
- } else {
- frame_size = (uint64_t)(data_end - data_start);
- }
-
- res = decode_one(ctx, &data_start, (size_t)frame_size, user_priv);
- if (res != AOM_CODEC_OK) return res;
-
- // Allow extra zero bytes after the frame end
- while (data_start < data_end) {
- const uint8_t marker = data_start[0];
- if (marker) break;
- ++data_start;
- }
- }
-
- return res;
-}
-
-// If grain_params->apply_grain is false, returns img. Otherwise, adds film
-// grain to img, saves the result in *grain_img_ptr (allocating *grain_img_ptr
-// if necessary), and returns *grain_img_ptr.
-static aom_image_t *add_grain_if_needed(aom_image_t *img,
- aom_image_t **grain_img_ptr,
- aom_film_grain_t *grain_params) {
- if (!grain_params->apply_grain) return img;
-
- aom_image_t *grain_img_buf = *grain_img_ptr;
-
- const int w_even = ALIGN_POWER_OF_TWO(img->d_w, 1);
- const int h_even = ALIGN_POWER_OF_TWO(img->d_h, 1);
-
- if (grain_img_buf) {
- const int alloc_w = ALIGN_POWER_OF_TWO(grain_img_buf->d_w, 1);
- const int alloc_h = ALIGN_POWER_OF_TWO(grain_img_buf->d_h, 1);
- if (w_even != alloc_w || h_even != alloc_h ||
- img->fmt != grain_img_buf->fmt) {
- aom_img_free(grain_img_buf);
- grain_img_buf = NULL;
- *grain_img_ptr = NULL;
- }
- }
- if (!grain_img_buf) {
- grain_img_buf = aom_img_alloc(NULL, img->fmt, w_even, h_even, 16);
- *grain_img_ptr = grain_img_buf;
- }
-
- if (grain_img_buf) {
- grain_img_buf->user_priv = img->user_priv;
- if (av1_add_film_grain(grain_params, img, grain_img_buf)) {
- aom_img_free(grain_img_buf);
- grain_img_buf = NULL;
- *grain_img_ptr = NULL;
- }
- }
-
- return grain_img_buf;
-}
-
-static aom_image_t *decoder_get_frame(aom_codec_alg_priv_t *ctx,
- aom_codec_iter_t *iter) {
- aom_image_t *img = NULL;
-
- if (!iter) {
- return NULL;
- }
-
- // To avoid having to allocate any extra storage, treat 'iter' as
- // simply a pointer to an integer index
- uintptr_t *index = (uintptr_t *)iter;
-
- if (ctx->frame_workers != NULL) {
- do {
- YV12_BUFFER_CONFIG *sd;
- // NOTE(david.barker): This code does not support multiple worker threads
- // yet. We should probably move the iteration over threads into *iter
- // instead of using ctx->next_output_worker_id.
- const AVxWorkerInterface *const winterface = aom_get_worker_interface();
- AVxWorker *const worker = &ctx->frame_workers[ctx->next_output_worker_id];
- FrameWorkerData *const frame_worker_data =
- (FrameWorkerData *)worker->data1;
- ctx->next_output_worker_id =
- (ctx->next_output_worker_id + 1) % ctx->num_frame_workers;
- // Wait for the frame from worker thread.
- if (winterface->sync(worker)) {
- // Check if worker has received any frames.
- if (frame_worker_data->received_frame == 1) {
- ++ctx->available_threads;
- frame_worker_data->received_frame = 0;
- check_resync(ctx, frame_worker_data->pbi);
- }
- aom_film_grain_t *grain_params;
- if (av1_get_raw_frame(frame_worker_data->pbi, *index, &sd,
- &grain_params) == 0) {
- AV1Decoder *const pbi = frame_worker_data->pbi;
- AV1_COMMON *const cm = &pbi->common;
- RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
- ctx->last_show_frame = cm->new_fb_idx;
- if (ctx->need_resync) return NULL;
- yuvconfig2image(&ctx->img, sd, frame_worker_data->user_priv);
-
- if (!pbi->ext_tile_debug && cm->large_scale_tile) {
- *index += 1; // Advance the iterator to point to the next image
- img = &ctx->img;
- img->img_data = pbi->tile_list_output;
- img->sz = pbi->tile_list_size;
- return img;
- }
-
- const int num_planes = av1_num_planes(cm);
- if (pbi->ext_tile_debug && cm->single_tile_decoding &&
- pbi->dec_tile_row >= 0) {
- const int tile_row = AOMMIN(pbi->dec_tile_row, cm->tile_rows - 1);
- const int mi_row = tile_row * cm->tile_height;
- const int ssy = ctx->img.y_chroma_shift;
- int plane;
- ctx->img.planes[0] += mi_row * MI_SIZE * ctx->img.stride[0];
- if (num_planes > 1) {
- for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
- ctx->img.planes[plane] +=
- mi_row * (MI_SIZE >> ssy) * ctx->img.stride[plane];
- }
- }
- ctx->img.d_h =
- AOMMIN(cm->tile_height, cm->mi_rows - mi_row) * MI_SIZE;
- }
-
- if (pbi->ext_tile_debug && cm->single_tile_decoding &&
- pbi->dec_tile_col >= 0) {
- const int tile_col = AOMMIN(pbi->dec_tile_col, cm->tile_cols - 1);
- const int mi_col = tile_col * cm->tile_width;
- const int ssx = ctx->img.x_chroma_shift;
- const int is_hbd =
- (ctx->img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) ? 1 : 0;
- int plane;
- ctx->img.planes[0] += mi_col * MI_SIZE * (1 + is_hbd);
- if (num_planes > 1) {
- for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
- ctx->img.planes[plane] +=
- mi_col * (MI_SIZE >> ssx) * (1 + is_hbd);
- }
- }
- ctx->img.d_w =
- AOMMIN(cm->tile_width, cm->mi_cols - mi_col) * MI_SIZE;
- }
-
- ctx->img.fb_priv = frame_bufs[cm->new_fb_idx].raw_frame_buffer.priv;
- img = &ctx->img;
- img->temporal_id = cm->temporal_layer_id;
- img->spatial_id = cm->spatial_layer_id;
- if (cm->skip_film_grain) grain_params->apply_grain = 0;
- aom_image_t *res = add_grain_if_needed(
- img, &ctx->image_with_grain[*index], grain_params);
- if (!res) {
- aom_internal_error(&pbi->common.error, AOM_CODEC_CORRUPT_FRAME,
- "Grain systhesis failed\n");
- }
- *index += 1; // Advance the iterator to point to the next image
- return res;
- }
- } else {
- // Decoding failed. Release the worker thread.
- frame_worker_data->received_frame = 0;
- ++ctx->available_threads;
- ctx->need_resync = 1;
- if (ctx->flushed != 1) return NULL;
- }
- } while (ctx->next_output_worker_id != ctx->next_submit_worker_id);
- }
- return NULL;
-}
-
-static aom_codec_err_t decoder_set_fb_fn(
- aom_codec_alg_priv_t *ctx, aom_get_frame_buffer_cb_fn_t cb_get,
- aom_release_frame_buffer_cb_fn_t cb_release, void *cb_priv) {
- if (cb_get == NULL || cb_release == NULL) {
- return AOM_CODEC_INVALID_PARAM;
- } else if (ctx->frame_workers == NULL) {
- // If the decoder has already been initialized, do not accept changes to
- // the frame buffer functions.
- ctx->get_ext_fb_cb = cb_get;
- ctx->release_ext_fb_cb = cb_release;
- ctx->ext_priv = cb_priv;
- return AOM_CODEC_OK;
- }
-
- return AOM_CODEC_ERROR;
-}
-
-static aom_codec_err_t ctrl_set_reference(aom_codec_alg_priv_t *ctx,
- va_list args) {
- av1_ref_frame_t *const data = va_arg(args, av1_ref_frame_t *);
-
- if (data) {
- av1_ref_frame_t *const frame = data;
- YV12_BUFFER_CONFIG sd;
- AVxWorker *const worker = ctx->frame_workers;
- FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1;
- image2yuvconfig(&frame->img, &sd);
- return av1_set_reference_dec(&frame_worker_data->pbi->common, frame->idx,
- frame->use_external_ref, &sd);
- } else {
- return AOM_CODEC_INVALID_PARAM;
- }
-}
-
-static aom_codec_err_t ctrl_copy_reference(aom_codec_alg_priv_t *ctx,
- va_list args) {
- const av1_ref_frame_t *const frame = va_arg(args, av1_ref_frame_t *);
- if (frame) {
- YV12_BUFFER_CONFIG sd;
- AVxWorker *const worker = ctx->frame_workers;
- FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1;
- image2yuvconfig(&frame->img, &sd);
- return av1_copy_reference_dec(frame_worker_data->pbi, frame->idx, &sd);
- } else {
- return AOM_CODEC_INVALID_PARAM;
- }
-}
-
-static aom_codec_err_t ctrl_get_reference(aom_codec_alg_priv_t *ctx,
- va_list args) {
- av1_ref_frame_t *data = va_arg(args, av1_ref_frame_t *);
- if (data) {
- YV12_BUFFER_CONFIG *fb;
- AVxWorker *const worker = ctx->frame_workers;
- FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1;
- fb = get_ref_frame(&frame_worker_data->pbi->common, data->idx);
- if (fb == NULL) return AOM_CODEC_ERROR;
- yuvconfig2image(&data->img, fb, NULL);
- return AOM_CODEC_OK;
- } else {
- return AOM_CODEC_INVALID_PARAM;
- }
-}
-
-static aom_codec_err_t ctrl_get_new_frame_image(aom_codec_alg_priv_t *ctx,
- va_list args) {
- aom_image_t *new_img = va_arg(args, aom_image_t *);
- if (new_img) {
- YV12_BUFFER_CONFIG new_frame;
- AVxWorker *const worker = ctx->frame_workers;
- FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1;
-
- if (av1_get_frame_to_show(frame_worker_data->pbi, &new_frame) == 0) {
- yuvconfig2image(new_img, &new_frame, NULL);
- return AOM_CODEC_OK;
- } else {
- return AOM_CODEC_ERROR;
- }
- } else {
- return AOM_CODEC_INVALID_PARAM;
- }
-}
-
-static aom_codec_err_t ctrl_copy_new_frame_image(aom_codec_alg_priv_t *ctx,
- va_list args) {
- aom_image_t *img = va_arg(args, aom_image_t *);
- if (img) {
- YV12_BUFFER_CONFIG new_frame;
- AVxWorker *const worker = ctx->frame_workers;
- FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1;
-
- if (av1_get_frame_to_show(frame_worker_data->pbi, &new_frame) == 0) {
- YV12_BUFFER_CONFIG sd;
- image2yuvconfig(img, &sd);
- return av1_copy_new_frame_dec(&frame_worker_data->pbi->common, &new_frame,
- &sd);
- } else {
- return AOM_CODEC_ERROR;
- }
- } else {
- return AOM_CODEC_INVALID_PARAM;
- }
-}
-
-static aom_codec_err_t ctrl_set_postproc(aom_codec_alg_priv_t *ctx,
- va_list args) {
- (void)ctx;
- (void)args;
- return AOM_CODEC_INCAPABLE;
-}
-
-static aom_codec_err_t ctrl_set_dbg_options(aom_codec_alg_priv_t *ctx,
- va_list args) {
- (void)ctx;
- (void)args;
- return AOM_CODEC_INCAPABLE;
-}
-
-static aom_codec_err_t ctrl_get_last_ref_updates(aom_codec_alg_priv_t *ctx,
- va_list args) {
- int *const update_info = va_arg(args, int *);
-
- if (update_info) {
- if (ctx->frame_workers) {
- AVxWorker *const worker = ctx->frame_workers;
- FrameWorkerData *const frame_worker_data =
- (FrameWorkerData *)worker->data1;
- *update_info = frame_worker_data->pbi->refresh_frame_flags;
- return AOM_CODEC_OK;
- } else {
- return AOM_CODEC_ERROR;
- }
- }
-
- return AOM_CODEC_INVALID_PARAM;
-}
-
-static aom_codec_err_t ctrl_get_last_quantizer(aom_codec_alg_priv_t *ctx,
- va_list args) {
- int *const arg = va_arg(args, int *);
- if (arg == NULL) return AOM_CODEC_INVALID_PARAM;
- *arg =
- ((FrameWorkerData *)ctx->frame_workers[0].data1)->pbi->common.base_qindex;
- return AOM_CODEC_OK;
-}
-
-static aom_codec_err_t ctrl_get_frame_corrupted(aom_codec_alg_priv_t *ctx,
- va_list args) {
- int *corrupted = va_arg(args, int *);
-
- if (corrupted) {
- if (ctx->frame_workers) {
- AVxWorker *const worker = ctx->frame_workers;
- FrameWorkerData *const frame_worker_data =
- (FrameWorkerData *)worker->data1;
- AV1Decoder *const pbi = frame_worker_data->pbi;
- RefCntBuffer *const frame_bufs = pbi->common.buffer_pool->frame_bufs;
- if (pbi->seen_frame_header && pbi->num_output_frames == 0)
- return AOM_CODEC_ERROR;
- if (ctx->last_show_frame >= 0)
- *corrupted = frame_bufs[ctx->last_show_frame].buf.corrupted;
- return AOM_CODEC_OK;
- } else {
- return AOM_CODEC_ERROR;
- }
- }
-
- return AOM_CODEC_INVALID_PARAM;
-}
-
-static aom_codec_err_t ctrl_get_frame_size(aom_codec_alg_priv_t *ctx,
- va_list args) {
- int *const frame_size = va_arg(args, int *);
-
- if (frame_size) {
- if (ctx->frame_workers) {
- AVxWorker *const worker = ctx->frame_workers;
- FrameWorkerData *const frame_worker_data =
- (FrameWorkerData *)worker->data1;
- const AV1_COMMON *const cm = &frame_worker_data->pbi->common;
- frame_size[0] = cm->width;
- frame_size[1] = cm->height;
- return AOM_CODEC_OK;
- } else {
- return AOM_CODEC_ERROR;
- }
- }
-
- return AOM_CODEC_INVALID_PARAM;
-}
-
-static aom_codec_err_t ctrl_get_frame_header_info(aom_codec_alg_priv_t *ctx,
- va_list args) {
- aom_tile_data *const frame_header_info = va_arg(args, aom_tile_data *);
-
- if (frame_header_info) {
- if (ctx->frame_workers) {
- AVxWorker *const worker = ctx->frame_workers;
- FrameWorkerData *const frame_worker_data =
- (FrameWorkerData *)worker->data1;
- const AV1Decoder *pbi = frame_worker_data->pbi;
- frame_header_info->coded_tile_data_size = pbi->obu_size_hdr.size;
- frame_header_info->coded_tile_data = pbi->obu_size_hdr.data;
- frame_header_info->extra_size = pbi->frame_header_size;
- } else {
- return AOM_CODEC_ERROR;
- }
- }
-
- return AOM_CODEC_INVALID_PARAM;
-}
-
-static aom_codec_err_t ctrl_get_tile_data(aom_codec_alg_priv_t *ctx,
- va_list args) {
- aom_tile_data *const tile_data = va_arg(args, aom_tile_data *);
-
- if (tile_data) {
- if (ctx->frame_workers) {
- AVxWorker *const worker = ctx->frame_workers;
- FrameWorkerData *const frame_worker_data =
- (FrameWorkerData *)worker->data1;
- const AV1Decoder *pbi = frame_worker_data->pbi;
- tile_data->coded_tile_data_size =
- pbi->tile_buffers[pbi->dec_tile_row][pbi->dec_tile_col].size;
- tile_data->coded_tile_data =
- pbi->tile_buffers[pbi->dec_tile_row][pbi->dec_tile_col].data;
- return AOM_CODEC_OK;
- } else {
- return AOM_CODEC_ERROR;
- }
- }
-
- return AOM_CODEC_INVALID_PARAM;
-}
-
-static aom_codec_err_t ctrl_set_ext_ref_ptr(aom_codec_alg_priv_t *ctx,
- va_list args) {
- av1_ext_ref_frame_t *const data = va_arg(args, av1_ext_ref_frame_t *);
-
- if (data) {
- av1_ext_ref_frame_t *const ext_frames = data;
- ctx->ext_refs.num = ext_frames->num;
- for (int i = 0; i < ctx->ext_refs.num; i++) {
- image2yuvconfig(ext_frames->img++, &ctx->ext_refs.refs[i]);
- }
- return AOM_CODEC_OK;
- } else {
- return AOM_CODEC_INVALID_PARAM;
- }
-}
-
-static aom_codec_err_t ctrl_get_render_size(aom_codec_alg_priv_t *ctx,
- va_list args) {
- int *const render_size = va_arg(args, int *);
-
- if (render_size) {
- if (ctx->frame_workers) {
- AVxWorker *const worker = ctx->frame_workers;
- FrameWorkerData *const frame_worker_data =
- (FrameWorkerData *)worker->data1;
- const AV1_COMMON *const cm = &frame_worker_data->pbi->common;
- render_size[0] = cm->render_width;
- render_size[1] = cm->render_height;
- return AOM_CODEC_OK;
- } else {
- return AOM_CODEC_ERROR;
- }
- }
-
- return AOM_CODEC_INVALID_PARAM;
-}
-
-static aom_codec_err_t ctrl_get_bit_depth(aom_codec_alg_priv_t *ctx,
- va_list args) {
- unsigned int *const bit_depth = va_arg(args, unsigned int *);
- AVxWorker *const worker = &ctx->frame_workers[ctx->next_output_worker_id];
-
- if (bit_depth) {
- if (worker) {
- FrameWorkerData *const frame_worker_data =
- (FrameWorkerData *)worker->data1;
- const AV1_COMMON *const cm = &frame_worker_data->pbi->common;
- *bit_depth = cm->seq_params.bit_depth;
- return AOM_CODEC_OK;
- } else {
- return AOM_CODEC_ERROR;
- }
- }
-
- return AOM_CODEC_INVALID_PARAM;
-}
-
-static aom_img_fmt_t get_img_format(int subsampling_x, int subsampling_y,
- int use_highbitdepth) {
- aom_img_fmt_t fmt = 0;
-
- if (subsampling_x == 0 && subsampling_y == 0)
- fmt = AOM_IMG_FMT_I444;
- else if (subsampling_x == 1 && subsampling_y == 0)
- fmt = AOM_IMG_FMT_I422;
- else if (subsampling_x == 1 && subsampling_y == 1)
- fmt = AOM_IMG_FMT_I420;
-
- if (use_highbitdepth) fmt |= AOM_IMG_FMT_HIGHBITDEPTH;
- return fmt;
-}
-
-static aom_codec_err_t ctrl_get_img_format(aom_codec_alg_priv_t *ctx,
- va_list args) {
- aom_img_fmt_t *const img_fmt = va_arg(args, aom_img_fmt_t *);
- AVxWorker *const worker = &ctx->frame_workers[ctx->next_output_worker_id];
-
- if (img_fmt) {
- if (worker) {
- FrameWorkerData *const frame_worker_data =
- (FrameWorkerData *)worker->data1;
- const AV1_COMMON *const cm = &frame_worker_data->pbi->common;
-
- *img_fmt = get_img_format(cm->seq_params.subsampling_x,
- cm->seq_params.subsampling_y,
- cm->seq_params.use_highbitdepth);
- return AOM_CODEC_OK;
- } else {
- return AOM_CODEC_ERROR;
- }
- }
-
- return AOM_CODEC_INVALID_PARAM;
-}
-
-static aom_codec_err_t ctrl_get_tile_size(aom_codec_alg_priv_t *ctx,
- va_list args) {
- unsigned int *const tile_size = va_arg(args, unsigned int *);
- AVxWorker *const worker = &ctx->frame_workers[ctx->next_output_worker_id];
-
- if (tile_size) {
- if (worker) {
- FrameWorkerData *const frame_worker_data =
- (FrameWorkerData *)worker->data1;
- const AV1_COMMON *const cm = &frame_worker_data->pbi->common;
- *tile_size =
- ((cm->tile_width * MI_SIZE) << 16) + cm->tile_height * MI_SIZE;
- return AOM_CODEC_OK;
- } else {
- return AOM_CODEC_ERROR;
- }
- }
- return AOM_CODEC_INVALID_PARAM;
-}
-
-static aom_codec_err_t ctrl_set_invert_tile_order(aom_codec_alg_priv_t *ctx,
- va_list args) {
- ctx->invert_tile_order = va_arg(args, int);
- return AOM_CODEC_OK;
-}
-
-static aom_codec_err_t ctrl_set_byte_alignment(aom_codec_alg_priv_t *ctx,
- va_list args) {
- const int legacy_byte_alignment = 0;
- const int min_byte_alignment = 32;
- const int max_byte_alignment = 1024;
- const int byte_alignment = va_arg(args, int);
-
- if (byte_alignment != legacy_byte_alignment &&
- (byte_alignment < min_byte_alignment ||
- byte_alignment > max_byte_alignment ||
- (byte_alignment & (byte_alignment - 1)) != 0))
- return AOM_CODEC_INVALID_PARAM;
-
- ctx->byte_alignment = byte_alignment;
- if (ctx->frame_workers) {
- AVxWorker *const worker = ctx->frame_workers;
- FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1;
- frame_worker_data->pbi->common.byte_alignment = byte_alignment;
- }
- return AOM_CODEC_OK;
-}
-
-static aom_codec_err_t ctrl_set_skip_loop_filter(aom_codec_alg_priv_t *ctx,
- va_list args) {
- ctx->skip_loop_filter = va_arg(args, int);
-
- if (ctx->frame_workers) {
- AVxWorker *const worker = ctx->frame_workers;
- FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1;
- frame_worker_data->pbi->common.skip_loop_filter = ctx->skip_loop_filter;
- }
-
- return AOM_CODEC_OK;
-}
-
-static aom_codec_err_t ctrl_set_skip_film_grain(aom_codec_alg_priv_t *ctx,
- va_list args) {
- ctx->skip_film_grain = va_arg(args, int);
-
- if (ctx->frame_workers) {
- AVxWorker *const worker = ctx->frame_workers;
- FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1;
- frame_worker_data->pbi->common.skip_film_grain = ctx->skip_film_grain;
- }
-
- return AOM_CODEC_OK;
-}
-
-static aom_codec_err_t ctrl_get_accounting(aom_codec_alg_priv_t *ctx,
- va_list args) {
-#if !CONFIG_ACCOUNTING
- (void)ctx;
- (void)args;
- return AOM_CODEC_INCAPABLE;
-#else
- if (ctx->frame_workers) {
- AVxWorker *const worker = ctx->frame_workers;
- FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1;
- AV1Decoder *pbi = frame_worker_data->pbi;
- Accounting **acct = va_arg(args, Accounting **);
- *acct = &pbi->accounting;
- return AOM_CODEC_OK;
- }
- return AOM_CODEC_ERROR;
-#endif
-}
-static aom_codec_err_t ctrl_set_decode_tile_row(aom_codec_alg_priv_t *ctx,
- va_list args) {
- ctx->decode_tile_row = va_arg(args, int);
- return AOM_CODEC_OK;
-}
-
-static aom_codec_err_t ctrl_set_decode_tile_col(aom_codec_alg_priv_t *ctx,
- va_list args) {
- ctx->decode_tile_col = va_arg(args, int);
- return AOM_CODEC_OK;
-}
-
-static aom_codec_err_t ctrl_set_tile_mode(aom_codec_alg_priv_t *ctx,
- va_list args) {
- ctx->tile_mode = va_arg(args, unsigned int);
- return AOM_CODEC_OK;
-}
-
-static aom_codec_err_t ctrl_set_is_annexb(aom_codec_alg_priv_t *ctx,
- va_list args) {
- ctx->is_annexb = va_arg(args, unsigned int);
- return AOM_CODEC_OK;
-}
-
-static aom_codec_err_t ctrl_set_operating_point(aom_codec_alg_priv_t *ctx,
- va_list args) {
- ctx->operating_point = va_arg(args, int);
- return AOM_CODEC_OK;
-}
-
-static aom_codec_err_t ctrl_set_output_all_layers(aom_codec_alg_priv_t *ctx,
- va_list args) {
- ctx->output_all_layers = va_arg(args, int);
- return AOM_CODEC_OK;
-}
-
-static aom_codec_err_t ctrl_set_inspection_callback(aom_codec_alg_priv_t *ctx,
- va_list args) {
-#if !CONFIG_INSPECTION
- (void)ctx;
- (void)args;
- return AOM_CODEC_INCAPABLE;
-#else
- aom_inspect_init *init = va_arg(args, aom_inspect_init *);
- ctx->inspect_cb = init->inspect_cb;
- ctx->inspect_ctx = init->inspect_ctx;
- return AOM_CODEC_OK;
-#endif
-}
-
-static aom_codec_err_t ctrl_ext_tile_debug(aom_codec_alg_priv_t *ctx,
- va_list args) {
- ctx->ext_tile_debug = va_arg(args, int);
- return AOM_CODEC_OK;
-}
-
-static aom_codec_err_t ctrl_set_row_mt(aom_codec_alg_priv_t *ctx,
- va_list args) {
- ctx->row_mt = va_arg(args, unsigned int);
- return AOM_CODEC_OK;
-}
-
-static aom_codec_ctrl_fn_map_t decoder_ctrl_maps[] = {
- { AV1_COPY_REFERENCE, ctrl_copy_reference },
-
- // Setters
- { AV1_SET_REFERENCE, ctrl_set_reference },
- { AOM_SET_POSTPROC, ctrl_set_postproc },
- { AOM_SET_DBG_COLOR_REF_FRAME, ctrl_set_dbg_options },
- { AOM_SET_DBG_COLOR_MB_MODES, ctrl_set_dbg_options },
- { AOM_SET_DBG_COLOR_B_MODES, ctrl_set_dbg_options },
- { AOM_SET_DBG_DISPLAY_MV, ctrl_set_dbg_options },
- { AV1_INVERT_TILE_DECODE_ORDER, ctrl_set_invert_tile_order },
- { AV1_SET_BYTE_ALIGNMENT, ctrl_set_byte_alignment },
- { AV1_SET_SKIP_LOOP_FILTER, ctrl_set_skip_loop_filter },
- { AV1_SET_DECODE_TILE_ROW, ctrl_set_decode_tile_row },
- { AV1_SET_DECODE_TILE_COL, ctrl_set_decode_tile_col },
- { AV1_SET_TILE_MODE, ctrl_set_tile_mode },
- { AV1D_SET_IS_ANNEXB, ctrl_set_is_annexb },
- { AV1D_SET_OPERATING_POINT, ctrl_set_operating_point },
- { AV1D_SET_OUTPUT_ALL_LAYERS, ctrl_set_output_all_layers },
- { AV1_SET_INSPECTION_CALLBACK, ctrl_set_inspection_callback },
- { AV1D_EXT_TILE_DEBUG, ctrl_ext_tile_debug },
- { AV1D_SET_ROW_MT, ctrl_set_row_mt },
- { AV1D_SET_EXT_REF_PTR, ctrl_set_ext_ref_ptr },
- { AV1D_SET_SKIP_FILM_GRAIN, ctrl_set_skip_film_grain },
-
- // Getters
- { AOMD_GET_FRAME_CORRUPTED, ctrl_get_frame_corrupted },
- { AOMD_GET_LAST_QUANTIZER, ctrl_get_last_quantizer },
- { AOMD_GET_LAST_REF_UPDATES, ctrl_get_last_ref_updates },
- { AV1D_GET_BIT_DEPTH, ctrl_get_bit_depth },
- { AV1D_GET_IMG_FORMAT, ctrl_get_img_format },
- { AV1D_GET_TILE_SIZE, ctrl_get_tile_size },
- { AV1D_GET_DISPLAY_SIZE, ctrl_get_render_size },
- { AV1D_GET_FRAME_SIZE, ctrl_get_frame_size },
- { AV1_GET_ACCOUNTING, ctrl_get_accounting },
- { AV1_GET_NEW_FRAME_IMAGE, ctrl_get_new_frame_image },
- { AV1_COPY_NEW_FRAME_IMAGE, ctrl_copy_new_frame_image },
- { AV1_GET_REFERENCE, ctrl_get_reference },
- { AV1D_GET_FRAME_HEADER_INFO, ctrl_get_frame_header_info },
- { AV1D_GET_TILE_DATA, ctrl_get_tile_data },
-
- { -1, NULL },
-};
-
-#ifndef VERSION_STRING
-#define VERSION_STRING
-#endif
-CODEC_INTERFACE(aom_codec_av1_dx) = {
- "AOMedia Project AV1 Decoder" VERSION_STRING,
- AOM_CODEC_INTERNAL_ABI_VERSION,
- AOM_CODEC_CAP_DECODER |
- AOM_CODEC_CAP_EXTERNAL_FRAME_BUFFER, // aom_codec_caps_t
- decoder_init, // aom_codec_init_fn_t
- decoder_destroy, // aom_codec_destroy_fn_t
- decoder_ctrl_maps, // aom_codec_ctrl_fn_map_t
- {
- // NOLINT
- decoder_peek_si, // aom_codec_peek_si_fn_t
- decoder_get_si, // aom_codec_get_si_fn_t
- decoder_decode, // aom_codec_decode_fn_t
- decoder_get_frame, // aom_codec_get_frame_fn_t
- decoder_set_fb_fn, // aom_codec_set_fb_fn_t
- },
- {
- // NOLINT
- 0,
- NULL, // aom_codec_enc_cfg_map_t
- NULL, // aom_codec_encode_fn_t
- NULL, // aom_codec_get_cx_data_fn_t
- NULL, // aom_codec_enc_config_set_fn_t
- NULL, // aom_codec_get_global_headers_fn_t
- NULL, // aom_codec_get_preview_frame_fn_t
- NULL // aom_codec_enc_mr_get_mem_loc_fn_t
- }
-};
diff --git a/third_party/aom/av1/av1_iface_common.h b/third_party/aom/av1/av1_iface_common.h
deleted file mode 100644
index 4a7af580b..000000000
--- a/third_party/aom/av1/av1_iface_common.h
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#ifndef AOM_AV1_AV1_IFACE_COMMON_H_
-#define AOM_AV1_AV1_IFACE_COMMON_H_
-
-#include "aom_ports/mem.h"
-#include "aom_scale/yv12config.h"
-
-static void yuvconfig2image(aom_image_t *img, const YV12_BUFFER_CONFIG *yv12,
- void *user_priv) {
- /* aom_img_wrap() doesn't allow specifying independent strides for
- * the Y, U, and V planes, nor other alignment adjustments that
- * might be representable by a YV12_BUFFER_CONFIG, so we just
- * initialize all the fields.
- */
- int bps;
- if (!yv12->subsampling_y) {
- if (!yv12->subsampling_x) {
- img->fmt = AOM_IMG_FMT_I444;
- bps = 24;
- } else {
- img->fmt = AOM_IMG_FMT_I422;
- bps = 16;
- }
- } else {
- img->fmt = AOM_IMG_FMT_I420;
- bps = 12;
- }
- img->cp = yv12->color_primaries;
- img->tc = yv12->transfer_characteristics;
- img->mc = yv12->matrix_coefficients;
- img->monochrome = yv12->monochrome;
- img->csp = yv12->chroma_sample_position;
- img->range = yv12->color_range;
- img->bit_depth = 8;
- img->w = yv12->y_width;
- img->h = yv12->y_height;
- img->d_w = yv12->y_crop_width;
- img->d_h = yv12->y_crop_height;
- img->r_w = yv12->render_width;
- img->r_h = yv12->render_height;
- img->x_chroma_shift = yv12->subsampling_x;
- img->y_chroma_shift = yv12->subsampling_y;
- img->planes[AOM_PLANE_Y] = yv12->y_buffer;
- img->planes[AOM_PLANE_U] = yv12->u_buffer;
- img->planes[AOM_PLANE_V] = yv12->v_buffer;
- img->planes[AOM_PLANE_ALPHA] = NULL;
- img->stride[AOM_PLANE_Y] = yv12->y_stride;
- img->stride[AOM_PLANE_U] = yv12->uv_stride;
- img->stride[AOM_PLANE_V] = yv12->uv_stride;
- img->stride[AOM_PLANE_ALPHA] = yv12->y_stride;
- if (yv12->flags & YV12_FLAG_HIGHBITDEPTH) {
- // aom_image_t uses byte strides and a pointer to the first byte
- // of the image.
- img->fmt = (aom_img_fmt_t)(img->fmt | AOM_IMG_FMT_HIGHBITDEPTH);
- img->bit_depth = yv12->bit_depth;
- img->planes[AOM_PLANE_Y] = (uint8_t *)CONVERT_TO_SHORTPTR(yv12->y_buffer);
- img->planes[AOM_PLANE_U] = (uint8_t *)CONVERT_TO_SHORTPTR(yv12->u_buffer);
- img->planes[AOM_PLANE_V] = (uint8_t *)CONVERT_TO_SHORTPTR(yv12->v_buffer);
- img->planes[AOM_PLANE_ALPHA] = NULL;
- img->stride[AOM_PLANE_Y] = 2 * yv12->y_stride;
- img->stride[AOM_PLANE_U] = 2 * yv12->uv_stride;
- img->stride[AOM_PLANE_V] = 2 * yv12->uv_stride;
- img->stride[AOM_PLANE_ALPHA] = 2 * yv12->y_stride;
- }
- img->bps = bps;
- img->user_priv = user_priv;
- img->img_data = yv12->buffer_alloc;
- img->img_data_owner = 0;
- img->self_allocd = 0;
-}
-
-static aom_codec_err_t image2yuvconfig(const aom_image_t *img,
- YV12_BUFFER_CONFIG *yv12) {
- yv12->y_buffer = img->planes[AOM_PLANE_Y];
- yv12->u_buffer = img->planes[AOM_PLANE_U];
- yv12->v_buffer = img->planes[AOM_PLANE_V];
-
- yv12->y_crop_width = img->d_w;
- yv12->y_crop_height = img->d_h;
- yv12->render_width = img->r_w;
- yv12->render_height = img->r_h;
- yv12->y_width = img->w;
- yv12->y_height = img->h;
-
- yv12->uv_width =
- img->x_chroma_shift == 1 ? (1 + yv12->y_width) / 2 : yv12->y_width;
- yv12->uv_height =
- img->y_chroma_shift == 1 ? (1 + yv12->y_height) / 2 : yv12->y_height;
- yv12->uv_crop_width = yv12->uv_width;
- yv12->uv_crop_height = yv12->uv_height;
-
- yv12->y_stride = img->stride[AOM_PLANE_Y];
- yv12->uv_stride = img->stride[AOM_PLANE_U];
- yv12->color_primaries = img->cp;
- yv12->transfer_characteristics = img->tc;
- yv12->matrix_coefficients = img->mc;
- yv12->monochrome = img->monochrome;
- yv12->chroma_sample_position = img->csp;
- yv12->color_range = img->range;
-
- if (img->fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
- // In aom_image_t
- // planes point to uint8 address of start of data
- // stride counts uint8s to reach next row
- // In YV12_BUFFER_CONFIG
- // y_buffer, u_buffer, v_buffer point to uint16 address of data
- // stride and border counts in uint16s
- // This means that all the address calculations in the main body of code
- // should work correctly.
- // However, before we do any pixel operations we need to cast the address
- // to a uint16 ponter and double its value.
- yv12->y_buffer = CONVERT_TO_BYTEPTR(yv12->y_buffer);
- yv12->u_buffer = CONVERT_TO_BYTEPTR(yv12->u_buffer);
- yv12->v_buffer = CONVERT_TO_BYTEPTR(yv12->v_buffer);
- yv12->y_stride >>= 1;
- yv12->uv_stride >>= 1;
- yv12->flags = YV12_FLAG_HIGHBITDEPTH;
- } else {
- yv12->flags = 0;
- }
- yv12->border = (yv12->y_stride - img->w) / 2;
- yv12->subsampling_x = img->x_chroma_shift;
- yv12->subsampling_y = img->y_chroma_shift;
- return AOM_CODEC_OK;
-}
-
-#endif // AOM_AV1_AV1_IFACE_COMMON_H_
diff --git a/third_party/aom/av1/common/alloccommon.c b/third_party/aom/av1/common/alloccommon.c
deleted file mode 100644
index 1bf81c91d..000000000
--- a/third_party/aom/av1/common/alloccommon.c
+++ /dev/null
@@ -1,300 +0,0 @@
-/*
- *
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "config/aom_config.h"
-
-#include "aom_mem/aom_mem.h"
-
-#include "av1/common/alloccommon.h"
-#include "av1/common/blockd.h"
-#include "av1/common/entropymode.h"
-#include "av1/common/entropymv.h"
-#include "av1/common/onyxc_int.h"
-
-int av1_get_MBs(int width, int height) {
- const int aligned_width = ALIGN_POWER_OF_TWO(width, 3);
- const int aligned_height = ALIGN_POWER_OF_TWO(height, 3);
- const int mi_cols = aligned_width >> MI_SIZE_LOG2;
- const int mi_rows = aligned_height >> MI_SIZE_LOG2;
-
- const int mb_cols = (mi_cols + 2) >> 2;
- const int mb_rows = (mi_rows + 2) >> 2;
- return mb_rows * mb_cols;
-}
-
-#if LOOP_FILTER_BITMASK
-static int alloc_loop_filter_mask(AV1_COMMON *cm) {
- aom_free(cm->lf.lfm);
- cm->lf.lfm = NULL;
-
- // Each lfm holds bit masks for all the 4x4 blocks in a max
- // 64x64 (128x128 for ext_partitions) region. The stride
- // and rows are rounded up / truncated to a multiple of 16
- // (32 for ext_partition).
- cm->lf.lfm_stride = (cm->mi_cols + (MI_SIZE_64X64 - 1)) >> MIN_MIB_SIZE_LOG2;
- cm->lf.lfm_num = ((cm->mi_rows + (MI_SIZE_64X64 - 1)) >> MIN_MIB_SIZE_LOG2) *
- cm->lf.lfm_stride;
- cm->lf.lfm =
- (LoopFilterMask *)aom_calloc(cm->lf.lfm_num, sizeof(*cm->lf.lfm));
- if (!cm->lf.lfm) return 1;
-
- unsigned int i;
- for (i = 0; i < cm->lf.lfm_num; ++i) av1_zero(cm->lf.lfm[i]);
-
- return 0;
-}
-
-static void free_loop_filter_mask(AV1_COMMON *cm) {
- if (cm->lf.lfm == NULL) return;
-
- aom_free(cm->lf.lfm);
- cm->lf.lfm = NULL;
- cm->lf.lfm_num = 0;
- cm->lf.lfm_stride = 0;
-}
-#endif
-
-void av1_set_mb_mi(AV1_COMMON *cm, int width, int height) {
- // Ensure that the decoded width and height are both multiples of
- // 8 luma pixels (note: this may only be a multiple of 4 chroma pixels if
- // subsampling is used).
- // This simplifies the implementation of various experiments,
- // eg. cdef, which operates on units of 8x8 luma pixels.
- const int aligned_width = ALIGN_POWER_OF_TWO(width, 3);
- const int aligned_height = ALIGN_POWER_OF_TWO(height, 3);
-
- cm->mi_cols = aligned_width >> MI_SIZE_LOG2;
- cm->mi_rows = aligned_height >> MI_SIZE_LOG2;
- cm->mi_stride = calc_mi_size(cm->mi_cols);
-
- cm->mb_cols = (cm->mi_cols + 2) >> 2;
- cm->mb_rows = (cm->mi_rows + 2) >> 2;
- cm->MBs = cm->mb_rows * cm->mb_cols;
-
-#if LOOP_FILTER_BITMASK
- alloc_loop_filter_mask(cm);
-#endif
-}
-
-void av1_free_ref_frame_buffers(BufferPool *pool) {
- int i;
-
- for (i = 0; i < FRAME_BUFFERS; ++i) {
- if (pool->frame_bufs[i].ref_count > 0 &&
- pool->frame_bufs[i].raw_frame_buffer.data != NULL) {
- pool->release_fb_cb(pool->cb_priv, &pool->frame_bufs[i].raw_frame_buffer);
- pool->frame_bufs[i].ref_count = 0;
- }
- aom_free(pool->frame_bufs[i].mvs);
- pool->frame_bufs[i].mvs = NULL;
- aom_free(pool->frame_bufs[i].seg_map);
- pool->frame_bufs[i].seg_map = NULL;
- aom_free_frame_buffer(&pool->frame_bufs[i].buf);
- }
-}
-
-// Assumes cm->rst_info[p].restoration_unit_size is already initialized
-void av1_alloc_restoration_buffers(AV1_COMMON *cm) {
- const int num_planes = av1_num_planes(cm);
- for (int p = 0; p < num_planes; ++p)
- av1_alloc_restoration_struct(cm, &cm->rst_info[p], p > 0);
-
- if (cm->rst_tmpbuf == NULL) {
- CHECK_MEM_ERROR(cm, cm->rst_tmpbuf,
- (int32_t *)aom_memalign(16, RESTORATION_TMPBUF_SIZE));
- }
-
- if (cm->rlbs == NULL) {
- CHECK_MEM_ERROR(cm, cm->rlbs, aom_malloc(sizeof(RestorationLineBuffers)));
- }
-
- // For striped loop restoration, we divide each row of tiles into "stripes",
- // of height 64 luma pixels but with an offset by RESTORATION_UNIT_OFFSET
- // luma pixels to match the output from CDEF. We will need to store 2 *
- // RESTORATION_CTX_VERT lines of data for each stripe, and also need to be
- // able to quickly answer the question "Where is the <n>'th stripe for tile
- // row <m>?" To make that efficient, we generate the rst_last_stripe array.
- int num_stripes = 0;
- for (int i = 0; i < cm->tile_rows; ++i) {
- TileInfo tile_info;
- av1_tile_set_row(&tile_info, cm, i);
- const int mi_h = tile_info.mi_row_end - tile_info.mi_row_start;
- const int ext_h = RESTORATION_UNIT_OFFSET + (mi_h << MI_SIZE_LOG2);
- const int tile_stripes = (ext_h + 63) / 64;
- num_stripes += tile_stripes;
- cm->rst_end_stripe[i] = num_stripes;
- }
-
- // Now we need to allocate enough space to store the line buffers for the
- // stripes
- const int frame_w = cm->superres_upscaled_width;
- const int use_highbd = cm->seq_params.use_highbitdepth ? 1 : 0;
-
- for (int p = 0; p < num_planes; ++p) {
- const int is_uv = p > 0;
- const int ss_x = is_uv && cm->seq_params.subsampling_x;
- const int plane_w = ((frame_w + ss_x) >> ss_x) + 2 * RESTORATION_EXTRA_HORZ;
- const int stride = ALIGN_POWER_OF_TWO(plane_w, 5);
- const int buf_size = num_stripes * stride * RESTORATION_CTX_VERT
- << use_highbd;
- RestorationStripeBoundaries *boundaries = &cm->rst_info[p].boundaries;
-
- if (buf_size != boundaries->stripe_boundary_size ||
- boundaries->stripe_boundary_above == NULL ||
- boundaries->stripe_boundary_below == NULL) {
- aom_free(boundaries->stripe_boundary_above);
- aom_free(boundaries->stripe_boundary_below);
-
- CHECK_MEM_ERROR(cm, boundaries->stripe_boundary_above,
- (uint8_t *)aom_memalign(32, buf_size));
- CHECK_MEM_ERROR(cm, boundaries->stripe_boundary_below,
- (uint8_t *)aom_memalign(32, buf_size));
-
- boundaries->stripe_boundary_size = buf_size;
- }
- boundaries->stripe_boundary_stride = stride;
- }
-}
-
-void av1_free_restoration_buffers(AV1_COMMON *cm) {
- int p;
- for (p = 0; p < MAX_MB_PLANE; ++p)
- av1_free_restoration_struct(&cm->rst_info[p]);
- aom_free(cm->rst_tmpbuf);
- cm->rst_tmpbuf = NULL;
- aom_free(cm->rlbs);
- cm->rlbs = NULL;
- for (p = 0; p < MAX_MB_PLANE; ++p) {
- RestorationStripeBoundaries *boundaries = &cm->rst_info[p].boundaries;
- aom_free(boundaries->stripe_boundary_above);
- aom_free(boundaries->stripe_boundary_below);
- boundaries->stripe_boundary_above = NULL;
- boundaries->stripe_boundary_below = NULL;
- }
-
- aom_free_frame_buffer(&cm->rst_frame);
-}
-
-void av1_free_above_context_buffers(AV1_COMMON *cm,
- int num_free_above_contexts) {
- int i;
- const int num_planes = cm->num_allocated_above_context_planes;
-
- for (int tile_row = 0; tile_row < num_free_above_contexts; tile_row++) {
- for (i = 0; i < num_planes; i++) {
- aom_free(cm->above_context[i][tile_row]);
- cm->above_context[i][tile_row] = NULL;
- }
- aom_free(cm->above_seg_context[tile_row]);
- cm->above_seg_context[tile_row] = NULL;
-
- aom_free(cm->above_txfm_context[tile_row]);
- cm->above_txfm_context[tile_row] = NULL;
- }
- for (i = 0; i < num_planes; i++) {
- aom_free(cm->above_context[i]);
- cm->above_context[i] = NULL;
- }
- aom_free(cm->above_seg_context);
- cm->above_seg_context = NULL;
-
- aom_free(cm->above_txfm_context);
- cm->above_txfm_context = NULL;
-
- cm->num_allocated_above_contexts = 0;
- cm->num_allocated_above_context_mi_col = 0;
- cm->num_allocated_above_context_planes = 0;
-}
-
-void av1_free_context_buffers(AV1_COMMON *cm) {
- cm->free_mi(cm);
-
- av1_free_above_context_buffers(cm, cm->num_allocated_above_contexts);
-
-#if LOOP_FILTER_BITMASK
- free_loop_filter_mask(cm);
-#endif
-}
-
-int av1_alloc_above_context_buffers(AV1_COMMON *cm,
- int num_alloc_above_contexts) {
- const int num_planes = av1_num_planes(cm);
- int plane_idx;
- const int aligned_mi_cols =
- ALIGN_POWER_OF_TWO(cm->mi_cols, MAX_MIB_SIZE_LOG2);
-
- // Allocate above context buffers
- cm->num_allocated_above_contexts = num_alloc_above_contexts;
- cm->num_allocated_above_context_mi_col = aligned_mi_cols;
- cm->num_allocated_above_context_planes = num_planes;
- for (plane_idx = 0; plane_idx < num_planes; plane_idx++) {
- cm->above_context[plane_idx] = (ENTROPY_CONTEXT **)aom_calloc(
- num_alloc_above_contexts, sizeof(cm->above_context[0]));
- if (!cm->above_context[plane_idx]) return 1;
- }
-
- cm->above_seg_context = (PARTITION_CONTEXT **)aom_calloc(
- num_alloc_above_contexts, sizeof(cm->above_seg_context));
- if (!cm->above_seg_context) return 1;
-
- cm->above_txfm_context = (TXFM_CONTEXT **)aom_calloc(
- num_alloc_above_contexts, sizeof(cm->above_txfm_context));
- if (!cm->above_txfm_context) return 1;
-
- for (int tile_row = 0; tile_row < num_alloc_above_contexts; tile_row++) {
- for (plane_idx = 0; plane_idx < num_planes; plane_idx++) {
- cm->above_context[plane_idx][tile_row] = (ENTROPY_CONTEXT *)aom_calloc(
- aligned_mi_cols, sizeof(*cm->above_context[0][tile_row]));
- if (!cm->above_context[plane_idx][tile_row]) return 1;
- }
-
- cm->above_seg_context[tile_row] = (PARTITION_CONTEXT *)aom_calloc(
- aligned_mi_cols, sizeof(*cm->above_seg_context[tile_row]));
- if (!cm->above_seg_context[tile_row]) return 1;
-
- cm->above_txfm_context[tile_row] = (TXFM_CONTEXT *)aom_calloc(
- aligned_mi_cols, sizeof(*cm->above_txfm_context[tile_row]));
- if (!cm->above_txfm_context[tile_row]) return 1;
- }
-
- return 0;
-}
-
-int av1_alloc_context_buffers(AV1_COMMON *cm, int width, int height) {
- int new_mi_size;
-
- av1_set_mb_mi(cm, width, height);
- new_mi_size = cm->mi_stride * calc_mi_size(cm->mi_rows);
- if (cm->mi_alloc_size < new_mi_size) {
- cm->free_mi(cm);
- if (cm->alloc_mi(cm, new_mi_size)) goto fail;
- }
-
- return 0;
-
-fail:
- // clear the mi_* values to force a realloc on resync
- av1_set_mb_mi(cm, 0, 0);
- av1_free_context_buffers(cm);
- return 1;
-}
-
-void av1_remove_common(AV1_COMMON *cm) {
- av1_free_context_buffers(cm);
-
- aom_free(cm->fc);
- cm->fc = NULL;
- aom_free(cm->frame_contexts);
- cm->frame_contexts = NULL;
-}
-
-void av1_init_context_buffers(AV1_COMMON *cm) { cm->setup_mi(cm); }
diff --git a/third_party/aom/av1/common/alloccommon.h b/third_party/aom/av1/common/alloccommon.h
deleted file mode 100644
index 8e5896981..000000000
--- a/third_party/aom/av1/common/alloccommon.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_COMMON_ALLOCCOMMON_H_
-#define AOM_AV1_COMMON_ALLOCCOMMON_H_
-
-#define INVALID_IDX -1 // Invalid buffer index.
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct AV1Common;
-struct BufferPool;
-
-void av1_remove_common(struct AV1Common *cm);
-
-int av1_alloc_above_context_buffers(struct AV1Common *cm,
- int num_alloc_above_contexts);
-void av1_free_above_context_buffers(struct AV1Common *cm,
- int num_free_above_contexts);
-int av1_alloc_context_buffers(struct AV1Common *cm, int width, int height);
-void av1_init_context_buffers(struct AV1Common *cm);
-void av1_free_context_buffers(struct AV1Common *cm);
-
-void av1_free_ref_frame_buffers(struct BufferPool *pool);
-void av1_alloc_restoration_buffers(struct AV1Common *cm);
-void av1_free_restoration_buffers(struct AV1Common *cm);
-
-int av1_alloc_state_buffers(struct AV1Common *cm, int width, int height);
-void av1_free_state_buffers(struct AV1Common *cm);
-
-void av1_set_mb_mi(struct AV1Common *cm, int width, int height);
-int av1_get_MBs(int width, int height);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_COMMON_ALLOCCOMMON_H_
diff --git a/third_party/aom/av1/common/arm/av1_inv_txfm_neon.c b/third_party/aom/av1/common/arm/av1_inv_txfm_neon.c
deleted file mode 100644
index bad411743..000000000
--- a/third_party/aom/av1/common/arm/av1_inv_txfm_neon.c
+++ /dev/null
@@ -1,3231 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <arm_neon.h>
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-#include "config/av1_rtcd.h"
-
-#include "av1/common/av1_inv_txfm1d.h"
-#include "av1/common/av1_inv_txfm1d_cfg.h"
-#include "av1/common/av1_txfm.h"
-#include "av1/common/enums.h"
-#include "av1/common/idct.h"
-#include "av1/common/arm/av1_inv_txfm_neon.h"
-#include "av1/common/arm/transpose_neon.h"
-
-// 1D itx types
-typedef enum ATTRIBUTE_PACKED {
- IDCT_1D,
- IADST_1D,
- IFLIPADST_1D = IADST_1D,
- IIDENTITY_1D,
- ITX_TYPES_1D,
-} ITX_TYPE_1D;
-
-static const ITX_TYPE_1D vitx_1d_tab[TX_TYPES] = {
- IDCT_1D, IADST_1D, IDCT_1D, IADST_1D,
- IFLIPADST_1D, IDCT_1D, IFLIPADST_1D, IADST_1D,
- IFLIPADST_1D, IIDENTITY_1D, IDCT_1D, IIDENTITY_1D,
- IADST_1D, IIDENTITY_1D, IFLIPADST_1D, IIDENTITY_1D,
-};
-
-static const ITX_TYPE_1D hitx_1d_tab[TX_TYPES] = {
- IDCT_1D, IDCT_1D, IADST_1D, IADST_1D,
- IDCT_1D, IFLIPADST_1D, IFLIPADST_1D, IFLIPADST_1D,
- IADST_1D, IIDENTITY_1D, IIDENTITY_1D, IDCT_1D,
- IIDENTITY_1D, IADST_1D, IIDENTITY_1D, IFLIPADST_1D,
-};
-
-// 1D functions
-static const transform_1d_neon lowbd_txfm_all_1d_arr[TX_SIZES][ITX_TYPES_1D] = {
- { av1_idct4_new, av1_iadst4_new, av1_iidentity4_c },
- { av1_idct8_new, av1_iadst8_new, av1_iidentity8_c },
- { av1_idct16_new, av1_iadst16_new, av1_iidentity16_c },
- { av1_idct32_new, NULL, NULL },
- { av1_idct64_new, NULL, NULL },
-};
-
-static INLINE void lowbd_add_flip_buffer_8xn_neon(int16x8_t *in,
- uint8_t *output, int stride,
- int flipud,
- const int height) {
- int j = flipud ? (height - 1) : 0;
- const int step = flipud ? -1 : 1;
- int16x8_t temp_output;
- for (int i = 0; i < height; ++i, j += step) {
- temp_output = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(output)));
- temp_output = vaddq_s16(temp_output, in[j]);
- vst1_u8(output, vqmovun_s16(temp_output));
- output += stride;
- }
-}
-
-static INLINE uint8x16_t lowbd_get_recon_16x16_neon(const uint8x16_t pred,
- int16x8_t res0,
- int16x8_t res1) {
- int16x8_t temp_output[2];
- uint8x16_t temp_output_8q;
- temp_output[0] = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(pred)));
- temp_output[0] = vaddq_s16(temp_output[0], res0);
- temp_output[1] = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(pred)));
- temp_output[1] = vaddq_s16(temp_output[1], res1);
- temp_output_8q =
- vcombine_u8(vqmovun_s16(temp_output[0]), vqmovun_s16(temp_output[1]));
- return temp_output_8q;
-}
-
-static INLINE void lowbd_add_flip_buffer_16xn_neon(int16x8_t *in,
- uint8_t *output, int stride,
- int flipud, int height) {
- uint8x16_t temp_output_8q;
- int j = flipud ? (height - 1) : 0;
- const int step = flipud ? -1 : 1;
- for (int i = 0; i < height; ++i, j += step) {
- temp_output_8q = vld1q_u8(output + i * stride);
- temp_output_8q =
- lowbd_get_recon_16x16_neon(temp_output_8q, in[j], in[j + height]);
- vst1q_u8((output + i * stride), temp_output_8q);
- }
-}
-
-static INLINE void lowbd_inv_txfm2d_memset_neon(int16x8_t *a, int size,
- int value) {
- for (int i = 0; i < size; i++) {
- a[i] = vdupq_n_s16((int16_t)value);
- }
-}
-
-static INLINE void btf_16_lane_0_1_neon(const int16x8_t in0,
- const int16x8_t in1, const int16x4_t c,
- int16x8_t *t0, int16x8_t *t1) {
- int32x4_t s0[2], s1[2];
- int16x4_t v0[2], v1[2];
-
- s0[0] = vmull_lane_s16(vget_low_s16(in0), c, 0);
- s0[1] = vmull_lane_s16(vget_high_s16(in0), c, 0);
- s1[0] = vmull_lane_s16(vget_low_s16(in0), c, 1);
- s1[1] = vmull_lane_s16(vget_high_s16(in0), c, 1);
-
- s0[0] = vmlal_lane_s16(s0[0], vget_low_s16(in1), c, 1);
- s0[1] = vmlal_lane_s16(s0[1], vget_high_s16(in1), c, 1);
- s1[0] = vmlsl_lane_s16(s1[0], vget_low_s16(in1), c, 0);
- s1[1] = vmlsl_lane_s16(s1[1], vget_high_s16(in1), c, 0);
-
- v0[0] = vrshrn_n_s32(s0[0], INV_COS_BIT);
- v0[1] = vrshrn_n_s32(s0[1], INV_COS_BIT);
- v1[0] = vrshrn_n_s32(s1[0], INV_COS_BIT);
- v1[1] = vrshrn_n_s32(s1[1], INV_COS_BIT);
-
- *t0 = vcombine_s16(v0[0], v0[1]);
- *t1 = vcombine_s16(v1[0], v1[1]);
-}
-
-static INLINE void btf_16_lane_1_0_neon(const int16x8_t in0,
- const int16x8_t in1, const int16x4_t c,
- int16x8_t *t0, int16x8_t *t1) {
- int32x4_t s0[2], s1[2];
- int16x4_t v0[2], v1[2];
-
- s0[0] = vmull_lane_s16(vget_low_s16(in0), c, 1);
- s0[1] = vmull_lane_s16(vget_high_s16(in0), c, 1);
- s1[0] = vmull_lane_s16(vget_low_s16(in0), c, 0);
- s1[1] = vmull_lane_s16(vget_high_s16(in0), c, 0);
-
- s0[0] = vmlal_lane_s16(s0[0], vget_low_s16(in1), c, 0);
- s0[1] = vmlal_lane_s16(s0[1], vget_high_s16(in1), c, 0);
- s1[0] = vmlsl_lane_s16(s1[0], vget_low_s16(in1), c, 1);
- s1[1] = vmlsl_lane_s16(s1[1], vget_high_s16(in1), c, 1);
-
- v0[0] = vrshrn_n_s32(s0[0], INV_COS_BIT);
- v0[1] = vrshrn_n_s32(s0[1], INV_COS_BIT);
- v1[0] = vrshrn_n_s32(s1[0], INV_COS_BIT);
- v1[1] = vrshrn_n_s32(s1[1], INV_COS_BIT);
-
- *t0 = vcombine_s16(v0[0], v0[1]);
- *t1 = vcombine_s16(v1[0], v1[1]);
-}
-
-static INLINE void btf_16_lane_2_3_neon(const int16x8_t in0,
- const int16x8_t in1, const int16x4_t c,
- int16x8_t *t0, int16x8_t *t1) {
- int32x4_t s0[2], s1[2];
- int16x4_t v0[2], v1[2];
-
- s0[0] = vmull_lane_s16(vget_low_s16(in0), c, 2);
- s0[1] = vmull_lane_s16(vget_high_s16(in0), c, 2);
- s1[0] = vmull_lane_s16(vget_low_s16(in0), c, 3);
- s1[1] = vmull_lane_s16(vget_high_s16(in0), c, 3);
-
- s0[0] = vmlal_lane_s16(s0[0], vget_low_s16(in1), c, 3);
- s0[1] = vmlal_lane_s16(s0[1], vget_high_s16(in1), c, 3);
- s1[0] = vmlsl_lane_s16(s1[0], vget_low_s16(in1), c, 2);
- s1[1] = vmlsl_lane_s16(s1[1], vget_high_s16(in1), c, 2);
-
- v0[0] = vrshrn_n_s32(s0[0], INV_COS_BIT);
- v0[1] = vrshrn_n_s32(s0[1], INV_COS_BIT);
- v1[0] = vrshrn_n_s32(s1[0], INV_COS_BIT);
- v1[1] = vrshrn_n_s32(s1[1], INV_COS_BIT);
-
- *t0 = vcombine_s16(v0[0], v0[1]);
- *t1 = vcombine_s16(v1[0], v1[1]);
-}
-
-static INLINE void btf_16_neon(const int16x8_t in0, int16_t coef1,
- int16_t coef2, int16x8_t *t0, int16x8_t *t1) {
- int32x4_t s0_l, s0_h, s1_l, s1_h;
- int16x4_t v0[2], v1[2];
-
- s0_l = vmull_n_s16(vget_low_s16(in0), coef1);
- s0_h = vmull_n_s16(vget_high_s16(in0), coef1);
- s1_l = vmull_n_s16(vget_low_s16(in0), coef2);
- s1_h = vmull_n_s16(vget_high_s16(in0), coef2);
-
- v0[0] = vrshrn_n_s32(s0_l, INV_COS_BIT);
- v0[1] = vrshrn_n_s32(s0_h, INV_COS_BIT);
- v1[0] = vrshrn_n_s32(s1_l, INV_COS_BIT);
- v1[1] = vrshrn_n_s32(s1_h, INV_COS_BIT);
-
- *t0 = vcombine_s16(v0[0], v0[1]);
- *t1 = vcombine_s16(v1[0], v1[1]);
-}
-
-static INLINE void btf_16_lane_3_2_neon(const int16x8_t in0,
- const int16x8_t in1, const int16x4_t c,
- int16x8_t *t0, int16x8_t *t1) {
- int32x4_t s0[2], s1[2];
- int16x4_t v0[2], v1[2];
-
- s0[0] = vmull_lane_s16(vget_low_s16(in0), c, 3);
- s0[1] = vmull_lane_s16(vget_high_s16(in0), c, 3);
- s1[0] = vmull_lane_s16(vget_low_s16(in0), c, 2);
- s1[1] = vmull_lane_s16(vget_high_s16(in0), c, 2);
-
- s0[0] = vmlal_lane_s16(s0[0], vget_low_s16(in1), c, 2);
- s0[1] = vmlal_lane_s16(s0[1], vget_high_s16(in1), c, 2);
- s1[0] = vmlsl_lane_s16(s1[0], vget_low_s16(in1), c, 3);
- s1[1] = vmlsl_lane_s16(s1[1], vget_high_s16(in1), c, 3);
-
- v0[0] = vrshrn_n_s32(s0[0], INV_COS_BIT);
- v0[1] = vrshrn_n_s32(s0[1], INV_COS_BIT);
- v1[0] = vrshrn_n_s32(s1[0], INV_COS_BIT);
- v1[1] = vrshrn_n_s32(s1[1], INV_COS_BIT);
-
- *t0 = vcombine_s16(v0[0], v0[1]);
- *t1 = vcombine_s16(v1[0], v1[1]);
-}
-
-static INLINE void btf_16_half_neon(int16x8_t *const x, const int16x4_t c) {
- int32x4_t t0[2], t1[2];
- int16x4_t v0[2], v1[2];
-
- // Don't add/sub before multiply, which will overflow in iadst8.
- const int32x4_t x0_lo = vmull_lane_s16(vget_low_s16(x[0]), c, 0);
- const int32x4_t x0_hi = vmull_lane_s16(vget_high_s16(x[0]), c, 0);
- const int32x4_t x1_lo = vmull_lane_s16(vget_low_s16(x[1]), c, 0);
- const int32x4_t x1_hi = vmull_lane_s16(vget_high_s16(x[1]), c, 0);
-
- t0[0] = vaddq_s32(x0_lo, x1_lo);
- t0[1] = vaddq_s32(x0_hi, x1_hi);
- t1[0] = vsubq_s32(x0_lo, x1_lo);
- t1[1] = vsubq_s32(x0_hi, x1_hi);
-
- v0[0] = vrshrn_n_s32(t0[0], INV_COS_BIT);
- v0[1] = vrshrn_n_s32(t0[1], INV_COS_BIT);
- v1[0] = vrshrn_n_s32(t1[0], INV_COS_BIT);
- v1[1] = vrshrn_n_s32(t1[1], INV_COS_BIT);
-
- x[0] = vcombine_s16(v0[0], v0[1]);
- x[1] = vcombine_s16(v1[0], v1[1]);
-}
-
-static INLINE int16x4_t create_s16x4_neon(int16_t *const c0, int16_t *const c1,
- int16_t *const c2,
- int16_t *const c3) {
- int16x4_t val = vdup_n_s16((int16_t)0);
- val = vld1_lane_s16(c0, val, 0);
- val = vld1_lane_s16(c1, val, 1);
- val = vld1_lane_s16(c2, val, 2);
- val = vld1_lane_s16(c3, val, 3);
- return val;
-}
-
-static INLINE void iadst8_new_neon(int16x8_t *const in, int16x8_t *out,
- int8_t cos_bit, int bit) {
- (void)bit;
- const int32_t *cospi = cospi_arr(cos_bit);
-
- const int16x4_t c0 =
- create_s16x4_neon((int16_t *)(cospi + 4), (int16_t *)(cospi + 60),
- (int16_t *)(cospi + 20), (int16_t *)(cospi + 44));
- const int16x4_t c1 =
- create_s16x4_neon((int16_t *)(cospi + 36), (int16_t *)(cospi + 28),
- (int16_t *)(cospi + 52), (int16_t *)(cospi + 12));
- const int16x4_t c2 =
- create_s16x4_neon((int16_t *)(cospi + 32), (int16_t *)(cospi + 32),
- (int16_t *)(cospi + 16), (int16_t *)(cospi + 48));
-
- int16x8_t x[8];
- int16x8_t s0, s1, s2, s3, s4, s5, s6, s7;
-
- // Stage 1
- x[0] = in[7];
- x[1] = in[0];
- x[2] = in[5];
- x[3] = in[2];
- x[4] = in[3];
- x[5] = in[4];
- x[6] = in[1];
- x[7] = in[6];
-
- // Stage 2
- btf_16_lane_0_1_neon(x[0], x[1], c0, &s0, &s1);
- btf_16_lane_2_3_neon(x[2], x[3], c0, &s2, &s3);
- btf_16_lane_0_1_neon(x[4], x[5], c1, &s4, &s5);
- btf_16_lane_2_3_neon(x[6], x[7], c1, &s6, &s7);
-
- // Stage 3
- x[0] = vqaddq_s16(s0, s4);
- x[1] = vqaddq_s16(s1, s5);
- x[2] = vqaddq_s16(s2, s6);
- x[3] = vqaddq_s16(s3, s7);
- x[4] = vqsubq_s16(s0, s4);
- x[5] = vqsubq_s16(s1, s5);
- x[6] = vqsubq_s16(s2, s6);
- x[7] = vqsubq_s16(s3, s7);
-
- // Stage 4
- s0 = x[0];
- s1 = x[1];
- s2 = x[2];
- s3 = x[3];
- btf_16_lane_2_3_neon(x[4], x[5], c2, &s4, &s5);
- btf_16_lane_3_2_neon(x[7], x[6], c2, &s7, &s6);
-
- // Stage 5
- x[0] = vqaddq_s16(s0, s2);
- x[1] = vqaddq_s16(s1, s3);
- x[2] = vqsubq_s16(s0, s2);
- x[3] = vqsubq_s16(s1, s3);
- x[4] = vqaddq_s16(s4, s6);
- x[5] = vqaddq_s16(s5, s7);
- x[6] = vqsubq_s16(s4, s6);
- x[7] = vqsubq_s16(s5, s7);
-
- // stage 6
- btf_16_half_neon(x + 2, c2);
- btf_16_half_neon(x + 6, c2);
-
- // Stage 7
- out[0] = x[0];
- out[1] = vnegq_s16(x[4]);
- out[2] = x[6];
- out[3] = vnegq_s16(x[2]);
- out[4] = x[3];
- out[5] = vnegq_s16(x[7]);
- out[6] = x[5];
- out[7] = vnegq_s16(x[1]);
-}
-
-static INLINE void iadst8_low1_new_neon(int16x8_t *const in, int16x8_t *out,
- int8_t cos_bit, int bit) {
- (void)bit;
- const int32_t *cospi = cospi_arr(cos_bit);
- const int16x4_t c2 =
- create_s16x4_neon((int16_t *)(cospi + 32), (int16_t *)(cospi + 32),
- (int16_t *)(cospi + 16), (int16_t *)(cospi + 48));
-
- int16x8_t x[8];
- int16x8_t s0, s1, s4, s5;
-
- // Stage 1
- x[1] = in[0];
-
- // Stage 2
-
- btf_16_neon(x[1], cospi[60], -cospi[4], &s0, &s1);
-
- // Stage 3
- x[0] = s0;
- x[1] = s1;
- x[4] = s0;
- x[5] = s1;
-
- // Stage 4
- s0 = x[0];
- s1 = x[1];
- btf_16_lane_2_3_neon(x[4], x[5], c2, &s4, &s5);
-
- // Stage 5
- x[0] = s0;
- x[1] = s1;
- x[2] = s0;
- x[3] = s1;
- x[4] = s4;
- x[5] = s5;
- x[6] = s4;
- x[7] = s5;
-
- // stage 6
- btf_16_half_neon(x + 2, c2);
- btf_16_half_neon(x + 6, c2);
-
- // Stage 7
- out[0] = x[0];
- out[1] = vnegq_s16(x[4]);
- out[2] = x[6];
- out[3] = vnegq_s16(x[2]);
- out[4] = x[3];
- out[5] = vnegq_s16(x[7]);
- out[6] = x[5];
- out[7] = vnegq_s16(x[1]);
-}
-
-static INLINE void idct8_new_neon(int16x8_t *in, int16x8_t *out, int8_t cos_bit,
- int bit) {
- (void)bit;
- const int32_t *cospi = cospi_arr(cos_bit);
- int16x8_t step1[8], step2[8];
- const int16x4_t c0 =
- create_s16x4_neon((int16_t *)(cospi + 8), (int16_t *)(cospi + 56),
- (int16_t *)(cospi + 40), (int16_t *)(cospi + 24));
- const int16x4_t c2 =
- create_s16x4_neon((int16_t *)(cospi + 32), (int16_t *)(cospi + 32),
- (int16_t *)(cospi + 16), (int16_t *)(cospi + 48));
-
- // stage 2
- btf_16_lane_0_1_neon(in[1], in[7], c0, &step1[7], &step1[4]);
- btf_16_lane_2_3_neon(in[5], in[3], c0, &step1[6], &step1[5]);
-
- // stage 3
- btf_16_lane_0_1_neon(in[0], in[4], c2, &step2[0], &step2[1]);
- btf_16_lane_2_3_neon(in[2], in[6], c2, &step2[3], &step2[2]);
- step2[4] = vqaddq_s16(step1[4], step1[5]);
- step2[5] = vqsubq_s16(step1[4], step1[5]);
- step2[6] = vqsubq_s16(step1[7], step1[6]);
- step2[7] = vqaddq_s16(step1[7], step1[6]);
-
- // stage 4
- step1[0] = vqaddq_s16(step2[0], step2[3]);
- step1[1] = vqaddq_s16(step2[1], step2[2]);
- step1[2] = vqsubq_s16(step2[1], step2[2]);
- step1[3] = vqsubq_s16(step2[0], step2[3]);
- btf_16_lane_0_1_neon(step2[6], step2[5], c2, &step1[6], &step1[5]);
-
- // stage 5
- out[0] = vqaddq_s16(step1[0], step2[7]);
- out[1] = vqaddq_s16(step1[1], step1[6]);
- out[2] = vqaddq_s16(step1[2], step1[5]);
- out[3] = vqaddq_s16(step1[3], step2[4]);
- out[4] = vqsubq_s16(step1[3], step2[4]);
- out[5] = vqsubq_s16(step1[2], step1[5]);
- out[6] = vqsubq_s16(step1[1], step1[6]);
- out[7] = vqsubq_s16(step1[0], step2[7]);
-}
-
-static INLINE void idct8_low1_new_neon(int16x8_t *in, int16x8_t *out,
- int8_t cos_bit, int bit) {
- (void)bit;
- const int32_t *cospi = cospi_arr(cos_bit);
- int16x8_t step1;
- int32x4_t t32[2];
-
- // stage 1
- // stage 2
- // stage 3
- t32[0] = vmull_n_s16(vget_low_s16(in[0]), (int16_t)cospi[32]);
- t32[1] = vmull_n_s16(vget_high_s16(in[0]), (int16_t)cospi[32]);
-
- step1 = vcombine_s16(vrshrn_n_s32(t32[0], INV_COS_BIT),
- vrshrn_n_s32(t32[1], INV_COS_BIT));
-
- // stage 4
- // stage 5
- out[0] = step1;
- out[1] = step1;
- out[2] = step1;
- out[3] = step1;
- out[4] = step1;
- out[5] = step1;
- out[6] = step1;
- out[7] = step1;
-}
-
-void av1_round_shift_array_16_neon(int16x8_t *arr, int size, int bit) {
- assert(!(size % 4));
- if (!bit) return;
- const int16x8_t dup_bits_n_16x8 = vdupq_n_s16((int16_t)(-bit));
- for (int i = 0; i < size; i++) {
- arr[i] = vrshlq_s16(arr[i], dup_bits_n_16x8);
- }
-}
-
-static INLINE void flip_buf_ud_neon(int16x8_t *input, int size) {
- int16x8_t temp[8];
- for (int i = 0; i < size; ++i) {
- temp[i] = input[size - 1 - i];
- }
- for (int i = 0; i < size; ++i) {
- input[i] = temp[i];
- }
-}
-
-static INLINE void load_buffer_32bit_to_16bit_neon(const int32_t *input,
- int16x8_t *const a,
- int out_size) {
- for (int i = 0; i < 8; ++i) {
- a[i] = vcombine_s16(vmovn_s32(vld1q_s32(input)),
- vmovn_s32(vld1q_s32(input + 4)));
- input += out_size;
- }
-}
-
-static INLINE void identity8_new_neon(int16x8_t *input, int16x8_t *output,
- int8_t cos_bit, int bit) {
- (void)bit;
- (void)cos_bit;
-
- output[0] = vmulq_n_s16(input[0], (int16_t)2);
- output[1] = vmulq_n_s16(input[1], (int16_t)2);
- output[2] = vmulq_n_s16(input[2], (int16_t)2);
- output[3] = vmulq_n_s16(input[3], (int16_t)2);
- output[4] = vmulq_n_s16(input[4], (int16_t)2);
- output[5] = vmulq_n_s16(input[5], (int16_t)2);
- output[6] = vmulq_n_s16(input[6], (int16_t)2);
- output[7] = vmulq_n_s16(input[7], (int16_t)2);
-}
-
-static INLINE void round_shift_for_rect(int16x8_t *input, int16x8_t *output,
- int size) {
- int32x4_t out_low, out_high;
- int16x4_t low, high;
-
- for (int z = 0; z < size; ++z) {
- out_low = vmull_n_s16(vget_low_s16(input[z]), (int16_t)NewInvSqrt2);
- out_high = vmull_n_s16(vget_high_s16(input[z]), (int16_t)NewInvSqrt2);
-
- low = vqrshrn_n_s32(out_low, (int32_t)NewSqrt2Bits);
- high = vqrshrn_n_s32(out_high, (int32_t)NewSqrt2Bits);
-
- output[z] = vcombine_s16(low, high);
- }
-}
-
-static INLINE void identity16_new_neon(int16x8_t *input, int16x8_t *output,
- int8_t cos_bit, int bit) {
- (void)bit;
- (void)cos_bit;
-
- int32x4_t out_low, out_high;
- int16x4_t low, high;
- int16_t scale = (int16_t)(2 * NewSqrt2);
-
- for (int z = 0; z < 16; ++z) {
- out_low = vmull_n_s16(vget_low_s16(input[z]), scale);
- out_high = vmull_n_s16(vget_high_s16(input[z]), scale);
-
- low = vqrshrn_n_s32(out_low, (int32_t)NewSqrt2Bits);
- high = vqrshrn_n_s32(out_high, (int32_t)NewSqrt2Bits);
-
- output[z] = vcombine_s16(low, high);
- }
-}
-
-static INLINE void identity32_new_neon(int16x8_t *input, int16x8_t *output,
- int8_t cos_bit, int bit) {
- (void)bit;
- (void)cos_bit;
-
- for (int z = 0; z < 32; ++z) {
- output[z] = vmulq_n_s16(input[z], (int16_t)4);
- }
-}
-
-static INLINE void idct16_low1_new_neon(int16x8_t *in, int16x8_t *out,
- int8_t cos_bit, int bit) {
- (void)bit;
- const int32_t *cospi = cospi_arr(cos_bit);
- int16x8_t step1;
- int32x4_t t32[2];
-
- // stage 4
-
- t32[0] = vmull_n_s16(vget_low_s16(in[0]), cospi[32]);
- t32[1] = vmull_n_s16(vget_high_s16(in[0]), cospi[32]);
- step1 = vcombine_s16(vrshrn_n_s32(t32[0], INV_COS_BIT),
- vrshrn_n_s32(t32[1], INV_COS_BIT));
-
- // stage 6
- // stage 7
- out[0] = step1;
- out[1] = step1;
- out[2] = step1;
- out[3] = step1;
- out[4] = step1;
- out[5] = step1;
- out[6] = step1;
- out[7] = step1;
- out[8] = step1;
- out[9] = step1;
- out[10] = step1;
- out[11] = step1;
- out[12] = step1;
- out[13] = step1;
- out[14] = step1;
- out[15] = step1;
-}
-
-static INLINE void idct16_new_neon(int16x8_t *in, int16x8_t *out,
- int8_t cos_bit, int bit) {
- (void)bit;
- const int32_t *cospi = cospi_arr(cos_bit);
- int16x8_t step1[16], step2[16];
-
- const int16x4_t c0 =
- create_s16x4_neon((int16_t *)(cospi + 4), (int16_t *)(cospi + 60),
- (int16_t *)(cospi + 36), (int16_t *)(cospi + 28));
- const int16x4_t c1 =
- create_s16x4_neon((int16_t *)(cospi + 20), (int16_t *)(cospi + 44),
- (int16_t *)(cospi + 52), (int16_t *)(cospi + 12));
- const int16x4_t c2 =
- create_s16x4_neon((int16_t *)(cospi + 8), (int16_t *)(cospi + 56),
- (int16_t *)(cospi + 40), (int16_t *)(cospi + 24));
- const int16x4_t c3 =
- create_s16x4_neon((int16_t *)(cospi + 32), (int16_t *)(cospi + 32),
- (int16_t *)(cospi + 16), (int16_t *)(cospi + 48));
-
- // stage 2
-
- btf_16_lane_0_1_neon(in[1], in[15], c0, &step2[15], &step2[8]);
- btf_16_lane_2_3_neon(in[9], in[7], c0, &step2[14], &step2[9]);
- btf_16_lane_0_1_neon(in[5], in[11], c1, &step2[13], &step2[10]);
- btf_16_lane_2_3_neon(in[13], in[3], c1, &step2[12], &step2[11]);
-
- step2[0] = in[0];
- step2[1] = in[8];
- step2[2] = in[4];
- step2[3] = in[12];
- step2[4] = in[2];
- step2[5] = in[10];
- step2[6] = in[6];
- step2[7] = in[14];
-
- // stage 3
-
- btf_16_lane_0_1_neon(step2[4], step2[7], c2, &step1[7], &step1[4]);
- btf_16_lane_2_3_neon(step2[5], step2[6], c2, &step1[6], &step1[5]);
-
- step1[0] = step2[0];
- step1[1] = step2[1];
- step1[2] = step2[2];
- step1[3] = step2[3];
- step1[8] = vqaddq_s16(step2[8], step2[9]);
- step1[9] = vqsubq_s16(step2[8], step2[9]);
- step1[10] = vqsubq_s16(step2[11], step2[10]);
- step1[11] = vqaddq_s16(step2[11], step2[10]);
- step1[12] = vqaddq_s16(step2[12], step2[13]);
- step1[13] = vqsubq_s16(step2[12], step2[13]);
- step1[14] = vqsubq_s16(step2[15], step2[14]);
- step1[15] = vqaddq_s16(step2[15], step2[14]);
-
- // stage 4
-
- btf_16_lane_0_1_neon(step1[0], step1[1], c3, &step2[0], &step2[1]);
- btf_16_lane_2_3_neon(step1[2], step1[3], c3, &step2[3], &step2[2]);
- btf_16_lane_2_3_neon(step1[14], step1[9], c3, &step2[14], &step2[9]);
- btf_16_lane_3_2_neon(vnegq_s16(step1[10]), vnegq_s16(step1[13]), c3,
- &step2[10], &step2[13]);
-
- step2[4] = vqaddq_s16(step1[4], step1[5]);
- step2[5] = vqsubq_s16(step1[4], step1[5]);
- step2[6] = vqsubq_s16(step1[7], step1[6]);
- step2[7] = vqaddq_s16(step1[7], step1[6]);
- step2[8] = step1[8];
- step2[11] = step1[11];
- step2[12] = step1[12];
- step2[15] = step1[15];
-
- // stage 5
-
- btf_16_lane_0_1_neon(step2[6], step2[5], c3, &step1[6], &step1[5]);
-
- step1[0] = vqaddq_s16(step2[0], step2[3]);
- step1[1] = vqaddq_s16(step2[1], step2[2]);
- step1[2] = vqsubq_s16(step2[1], step2[2]);
- step1[3] = vqsubq_s16(step2[0], step2[3]);
- step1[4] = step2[4];
- step1[7] = step2[7];
- step1[8] = vqaddq_s16(step2[8], step2[11]);
- step1[9] = vqaddq_s16(step2[9], step2[10]);
- step1[10] = vqsubq_s16(step2[9], step2[10]);
- step1[11] = vqsubq_s16(step2[8], step2[11]);
- step1[12] = vqsubq_s16(step2[15], step2[12]);
- step1[13] = vqsubq_s16(step2[14], step2[13]);
- step1[14] = vqaddq_s16(step2[14], step2[13]);
- step1[15] = vqaddq_s16(step2[15], step2[12]);
-
- // stage 6
-
- btf_16_lane_0_1_neon(step1[13], step1[10], c3, &step2[13], &step2[10]);
- btf_16_lane_0_1_neon(step1[12], step1[11], c3, &step2[12], &step2[11]);
-
- step2[0] = vqaddq_s16(step1[0], step1[7]);
- step2[1] = vqaddq_s16(step1[1], step1[6]);
- step2[2] = vqaddq_s16(step1[2], step1[5]);
- step2[3] = vqaddq_s16(step1[3], step1[4]);
- step2[4] = vqsubq_s16(step1[3], step1[4]);
- step2[5] = vqsubq_s16(step1[2], step1[5]);
- step2[6] = vqsubq_s16(step1[1], step1[6]);
- step2[7] = vqsubq_s16(step1[0], step1[7]);
- step2[8] = step1[8];
- step2[9] = step1[9];
- step2[14] = step1[14];
- step2[15] = step1[15];
-
- // stage 7
- out[0] = vqaddq_s16(step2[0], step2[15]);
- out[1] = vqaddq_s16(step2[1], step2[14]);
- out[2] = vqaddq_s16(step2[2], step2[13]);
- out[3] = vqaddq_s16(step2[3], step2[12]);
- out[4] = vqaddq_s16(step2[4], step2[11]);
- out[5] = vqaddq_s16(step2[5], step2[10]);
- out[6] = vqaddq_s16(step2[6], step2[9]);
- out[7] = vqaddq_s16(step2[7], step2[8]);
- out[8] = vqsubq_s16(step2[7], step2[8]);
- out[9] = vqsubq_s16(step2[6], step2[9]);
- out[10] = vqsubq_s16(step2[5], step2[10]);
- out[11] = vqsubq_s16(step2[4], step2[11]);
- out[12] = vqsubq_s16(step2[3], step2[12]);
- out[13] = vqsubq_s16(step2[2], step2[13]);
- out[14] = vqsubq_s16(step2[1], step2[14]);
- out[15] = vqsubq_s16(step2[0], step2[15]);
-}
-
-static INLINE void idct16_low8_new_neon(int16x8_t *in, int16x8_t *out,
- int8_t cos_bit, int bit) {
- (void)bit;
- const int32_t *cospi = cospi_arr(cos_bit);
- int16x8_t step1[16], step2[16];
- const int16x4_t c0 =
- create_s16x4_neon((int16_t *)(cospi + 32), (int16_t *)(cospi + 32),
- (int16_t *)(cospi + 16), (int16_t *)(cospi + 48));
-
- // stage 1
- // stage 2
-
- step2[0] = in[0];
- step2[2] = in[4];
- step2[4] = in[2];
- step2[6] = in[6];
-
- btf_16_neon(in[1], cospi[60], cospi[4], &step2[8], &step2[15]);
- btf_16_neon(in[7], -cospi[36], cospi[28], &step2[9], &step2[14]);
- btf_16_neon(in[5], cospi[44], cospi[20], &step2[10], &step2[13]);
- btf_16_neon(in[3], -cospi[52], cospi[12], &step2[11], &step2[12]);
-
- // stage 3
-
- btf_16_neon(step2[4], cospi[56], cospi[8], &step1[4], &step1[7]);
- btf_16_neon(step2[6], -cospi[40], cospi[24], &step1[5], &step1[6]);
-
- step1[0] = step2[0];
- step1[2] = step2[2];
- step1[8] = vqaddq_s16(step2[8], step2[9]);
- step1[9] = vqsubq_s16(step2[8], step2[9]);
- step1[10] = vqsubq_s16(step2[11], step2[10]);
- step1[11] = vqaddq_s16(step2[11], step2[10]);
- step1[12] = vqaddq_s16(step2[12], step2[13]);
- step1[13] = vqsubq_s16(step2[12], step2[13]);
- step1[14] = vqsubq_s16(step2[15], step2[14]);
- step1[15] = vqaddq_s16(step2[15], step2[14]);
-
- // stage 4
-
- btf_16_neon(step1[0], cospi[32], cospi[32], &step2[0], &step2[1]);
- btf_16_neon(step1[2], cospi[48], cospi[16], &step2[2], &step2[3]);
- btf_16_lane_2_3_neon(step1[14], step1[9], c0, &step2[14], &step2[9]);
- btf_16_lane_3_2_neon(vnegq_s16(step1[10]), vnegq_s16(step1[13]), c0,
- &step2[10], &step2[13]);
-
- step2[4] = vqaddq_s16(step1[4], step1[5]);
- step2[5] = vqsubq_s16(step1[4], step1[5]);
- step2[6] = vqsubq_s16(step1[7], step1[6]);
- step2[7] = vqaddq_s16(step1[7], step1[6]);
- step2[8] = step1[8];
- step2[11] = step1[11];
- step2[12] = step1[12];
- step2[15] = step1[15];
-
- // stage 5
-
- btf_16_lane_0_1_neon(step2[6], step2[5], c0, &step1[6], &step1[5]);
- step1[0] = vqaddq_s16(step2[0], step2[3]);
- step1[1] = vqaddq_s16(step2[1], step2[2]);
- step1[2] = vqsubq_s16(step2[1], step2[2]);
- step1[3] = vqsubq_s16(step2[0], step2[3]);
- step1[4] = step2[4];
- step1[7] = step2[7];
- step1[8] = vqaddq_s16(step2[8], step2[11]);
- step1[9] = vqaddq_s16(step2[9], step2[10]);
- step1[10] = vqsubq_s16(step2[9], step2[10]);
- step1[11] = vqsubq_s16(step2[8], step2[11]);
- step1[12] = vqsubq_s16(step2[15], step2[12]);
- step1[13] = vqsubq_s16(step2[14], step2[13]);
- step1[14] = vqaddq_s16(step2[14], step2[13]);
- step1[15] = vqaddq_s16(step2[15], step2[12]);
-
- // stage 6
- btf_16_lane_0_1_neon(step1[13], step1[10], c0, &step2[13], &step2[10]);
- btf_16_lane_0_1_neon(step1[12], step1[11], c0, &step2[12], &step2[11]);
-
- step2[0] = vqaddq_s16(step1[0], step1[7]);
- step2[1] = vqaddq_s16(step1[1], step1[6]);
- step2[2] = vqaddq_s16(step1[2], step1[5]);
- step2[3] = vqaddq_s16(step1[3], step1[4]);
- step2[4] = vqsubq_s16(step1[3], step1[4]);
- step2[5] = vqsubq_s16(step1[2], step1[5]);
- step2[6] = vqsubq_s16(step1[1], step1[6]);
- step2[7] = vqsubq_s16(step1[0], step1[7]);
- step2[8] = step1[8];
- step2[9] = step1[9];
- step2[14] = step1[14];
- step2[15] = step1[15];
-
- // stage 7
-
- out[0] = vqaddq_s16(step2[0], step2[15]);
- out[1] = vqaddq_s16(step2[1], step2[14]);
- out[2] = vqaddq_s16(step2[2], step2[13]);
- out[3] = vqaddq_s16(step2[3], step2[12]);
- out[4] = vqaddq_s16(step2[4], step2[11]);
- out[5] = vqaddq_s16(step2[5], step2[10]);
- out[6] = vqaddq_s16(step2[6], step2[9]);
- out[7] = vqaddq_s16(step2[7], step2[8]);
- out[8] = vqsubq_s16(step2[7], step2[8]);
- out[9] = vqsubq_s16(step2[6], step2[9]);
- out[10] = vqsubq_s16(step2[5], step2[10]);
- out[11] = vqsubq_s16(step2[4], step2[11]);
- out[12] = vqsubq_s16(step2[3], step2[12]);
- out[13] = vqsubq_s16(step2[2], step2[13]);
- out[14] = vqsubq_s16(step2[1], step2[14]);
- out[15] = vqsubq_s16(step2[0], step2[15]);
-}
-
-static INLINE void iadst16_new_neon(int16x8_t *const in, int16x8_t *out,
- int8_t cos_bit, int bit) {
- (void)bit;
- const int32_t *cospi = cospi_arr(cos_bit);
-
- const int16x4_t c0 =
- create_s16x4_neon((int16_t *)(cospi + 2), (int16_t *)(cospi + 62),
- (int16_t *)(cospi + 10), (int16_t *)(cospi + 54));
- const int16x4_t c1 =
- create_s16x4_neon((int16_t *)(cospi + 18), (int16_t *)(cospi + 46),
- (int16_t *)(cospi + 26), (int16_t *)(cospi + 38));
- const int16x4_t c2 =
- create_s16x4_neon((int16_t *)(cospi + 34), (int16_t *)(cospi + 30),
- (int16_t *)(cospi + 42), (int16_t *)(cospi + 22));
- const int16x4_t c3 =
- create_s16x4_neon((int16_t *)(cospi + 50), (int16_t *)(cospi + 14),
- (int16_t *)(cospi + 58), (int16_t *)(cospi + 6));
- const int16x4_t c4 =
- create_s16x4_neon((int16_t *)(cospi + 8), (int16_t *)(cospi + 56),
- (int16_t *)(cospi + 40), (int16_t *)(cospi + 24));
-
- const int16x4_t c =
- create_s16x4_neon((int16_t *)(cospi + 32), (int16_t *)(cospi + 32),
- (int16_t *)(cospi + 16), (int16_t *)(cospi + 48));
-
- int16x8_t x[16];
- int16x8_t t[14];
- int16x8_t s0, s1, s2, s3, s4, s5, s6, s7;
- int16x8_t s8, s9, s10, s11, s12, s13, s14, s15;
-
- // Stage 1
- x[0] = in[15];
- x[1] = in[0];
- x[2] = in[13];
- x[3] = in[2];
- x[4] = in[11];
- x[5] = in[4];
- x[6] = in[9];
- x[7] = in[6];
- x[8] = in[7];
- x[9] = in[8];
- x[10] = in[5];
- x[11] = in[10];
- x[12] = in[3];
- x[13] = in[12];
- x[14] = in[1];
- x[15] = in[14];
-
- // Stage 2
- btf_16_lane_0_1_neon(x[0], x[1], c0, &s0, &s1);
- btf_16_lane_2_3_neon(x[2], x[3], c0, &s2, &s3);
- btf_16_lane_0_1_neon(x[4], x[5], c1, &s4, &s5);
- btf_16_lane_2_3_neon(x[6], x[7], c1, &s6, &s7);
- btf_16_lane_0_1_neon(x[8], x[9], c2, &s8, &s9);
- btf_16_lane_2_3_neon(x[10], x[11], c2, &s10, &s11);
- btf_16_lane_0_1_neon(x[12], x[13], c3, &s12, &s13);
- btf_16_lane_2_3_neon(x[14], x[15], c3, &s14, &s15);
-
- // Stage 3
- x[0] = vqaddq_s16(s0, s8);
- x[1] = vqaddq_s16(s1, s9);
- x[2] = vqaddq_s16(s2, s10);
- x[3] = vqaddq_s16(s3, s11);
- x[4] = vqaddq_s16(s4, s12);
- x[5] = vqaddq_s16(s5, s13);
- x[6] = vqaddq_s16(s6, s14);
- x[7] = vqaddq_s16(s7, s15);
- x[8] = vqsubq_s16(s0, s8);
- x[9] = vqsubq_s16(s1, s9);
- x[10] = vqsubq_s16(s2, s10);
- x[11] = vqsubq_s16(s3, s11);
- x[12] = vqsubq_s16(s4, s12);
- x[13] = vqsubq_s16(s5, s13);
- x[14] = vqsubq_s16(s6, s14);
- x[15] = vqsubq_s16(s7, s15);
-
- // Stage 4
- t[0] = x[0];
- t[1] = x[1];
- t[2] = x[2];
- t[3] = x[3];
- t[4] = x[4];
- t[5] = x[5];
- t[6] = x[6];
- t[7] = x[7];
- btf_16_lane_0_1_neon(x[8], x[9], c4, &s8, &s9);
- btf_16_lane_2_3_neon(x[10], x[11], c4, &s10, &s11);
- btf_16_lane_1_0_neon(x[13], x[12], c4, &s13, &s12);
- btf_16_lane_3_2_neon(x[15], x[14], c4, &s15, &s14);
-
- // Stage 5
- x[0] = vqaddq_s16(t[0], t[4]);
- x[1] = vqaddq_s16(t[1], t[5]);
- x[2] = vqaddq_s16(t[2], t[6]);
- x[3] = vqaddq_s16(t[3], t[7]);
- x[4] = vqsubq_s16(t[0], t[4]);
- x[5] = vqsubq_s16(t[1], t[5]);
- x[6] = vqsubq_s16(t[2], t[6]);
- x[7] = vqsubq_s16(t[3], t[7]);
- x[8] = vqaddq_s16(s8, s12);
- x[9] = vqaddq_s16(s9, s13);
- x[10] = vqaddq_s16(s10, s14);
- x[11] = vqaddq_s16(s11, s15);
- x[12] = vqsubq_s16(s8, s12);
- x[13] = vqsubq_s16(s9, s13);
- x[14] = vqsubq_s16(s10, s14);
- x[15] = vqsubq_s16(s11, s15);
-
- // stage 6
- t[0] = x[0];
- t[1] = x[1];
- t[2] = x[2];
- t[3] = x[3];
- btf_16_lane_2_3_neon(x[4], x[5], c, &s4, &s5);
- btf_16_lane_3_2_neon(x[7], x[6], c, &s7, &s6);
- t[8] = x[8];
- t[9] = x[9];
- t[10] = x[10];
- t[11] = x[11];
- btf_16_lane_2_3_neon(x[12], x[13], c, &s12, &s13);
- btf_16_lane_3_2_neon(x[15], x[14], c, &s15, &s14);
-
- // Stage 7
- x[0] = vqaddq_s16(t[0], t[2]);
- x[1] = vqaddq_s16(t[1], t[3]);
- x[2] = vqsubq_s16(t[0], t[2]);
- x[3] = vqsubq_s16(t[1], t[3]);
- x[4] = vqaddq_s16(s4, s6);
- x[5] = vqaddq_s16(s5, s7);
- x[6] = vqsubq_s16(s4, s6);
- x[7] = vqsubq_s16(s5, s7);
- x[8] = vqaddq_s16(t[8], t[10]);
- x[9] = vqaddq_s16(t[9], t[11]);
- x[10] = vqsubq_s16(t[8], t[10]);
- x[11] = vqsubq_s16(t[9], t[11]);
- x[12] = vqaddq_s16(s12, s14);
- x[13] = vqaddq_s16(s13, s15);
- x[14] = vqsubq_s16(s12, s14);
- x[15] = vqsubq_s16(s13, s15);
-
- // Stage 8
- btf_16_half_neon(x + 2, c);
- btf_16_half_neon(x + 6, c);
- btf_16_half_neon(x + 10, c);
- btf_16_half_neon(x + 14, c);
-
- // Stage 9
- out[0] = x[0];
- out[1] = vnegq_s16(x[8]);
- out[2] = x[12];
- out[3] = vnegq_s16(x[4]);
- out[4] = x[6];
- out[5] = vnegq_s16(x[14]);
- out[6] = x[10];
- out[7] = vnegq_s16(x[2]);
- out[8] = x[3];
- out[9] = vnegq_s16(x[11]);
- out[10] = x[15];
- out[11] = vnegq_s16(x[7]);
- out[12] = x[5];
- out[13] = vnegq_s16(x[13]);
- out[14] = x[9];
- out[15] = vnegq_s16(x[1]);
-}
-
-static INLINE void iadst16_low1_new_neon(int16x8_t *const in, int16x8_t *out,
- int8_t cos_bit, int bit) {
- (void)bit;
- const int32_t *cospi = cospi_arr(cos_bit);
- const int16x4_t c4 =
- create_s16x4_neon((int16_t *)(cospi + 8), (int16_t *)(cospi + 56),
- (int16_t *)(cospi + 40), (int16_t *)(cospi + 24));
- const int16x4_t c =
- create_s16x4_neon((int16_t *)(cospi + 32), (int16_t *)(cospi + 32),
- (int16_t *)(cospi + 16), (int16_t *)(cospi + 48));
-
- int16x8_t x[16];
- int16x8_t t[10];
- int16x8_t s0, s1, s4, s5;
- int16x8_t s8, s9, s12, s13;
-
- // Stage 1
- x[1] = in[0];
-
- // Stage 2
- btf_16_neon(x[1], cospi[62], -cospi[2], &s0, &s1);
-
- // Stage 3
- x[0] = s0;
- x[1] = s1;
- x[8] = s0;
- x[9] = s1;
-
- // Stage 4
- t[0] = x[0];
- t[1] = x[1];
- btf_16_lane_0_1_neon(x[8], x[9], c4, &s8, &s9);
-
- // Stage 5
- x[0] = t[0];
- x[1] = t[1];
- x[4] = t[0];
- x[5] = t[1];
- x[8] = s8;
- x[9] = s9;
- x[12] = s8;
- x[13] = s9;
-
- // stage 6
- t[0] = x[0];
- t[1] = x[1];
- btf_16_lane_2_3_neon(x[4], x[5], c, &s4, &s5);
- t[8] = x[8];
- t[9] = x[9];
- btf_16_lane_2_3_neon(x[12], x[13], c, &s12, &s13);
-
- // Stage 7
- x[0] = t[0];
- x[1] = t[1];
- x[2] = t[0];
- x[3] = t[1];
- x[4] = s4;
- x[5] = s5;
- x[6] = s4;
- x[7] = s5;
- x[8] = t[8];
- x[9] = t[9];
- x[10] = t[8];
- x[11] = t[9];
- x[12] = s12;
- x[13] = s13;
- x[14] = s12;
- x[15] = s13;
-
- // Stage 8
- btf_16_half_neon(x + 2, c);
- btf_16_half_neon(x + 6, c);
- btf_16_half_neon(x + 10, c);
- btf_16_half_neon(x + 14, c);
-
- // Stage 9
- out[0] = x[0];
- out[1] = vnegq_s16(x[8]);
- out[2] = x[12];
- out[3] = vnegq_s16(x[4]);
- out[4] = x[6];
- out[5] = vnegq_s16(x[14]);
- out[6] = x[10];
- out[7] = vnegq_s16(x[2]);
- out[8] = x[3];
- out[9] = vnegq_s16(x[11]);
- out[10] = x[15];
- out[11] = vnegq_s16(x[7]);
- out[12] = x[5];
- out[13] = vnegq_s16(x[13]);
- out[14] = x[9];
- out[15] = vnegq_s16(x[1]);
-}
-
-static INLINE void iadst16_low8_new_neon(int16x8_t *const in, int16x8_t *out,
- int8_t cos_bit, int bit) {
- (void)bit;
- const int32_t *cospi = cospi_arr(cos_bit);
-
- const int16x4_t c4 =
- create_s16x4_neon((int16_t *)(cospi + 8), (int16_t *)(cospi + 56),
- (int16_t *)(cospi + 40), (int16_t *)(cospi + 24));
- const int16x4_t c =
- create_s16x4_neon((int16_t *)(cospi + 32), (int16_t *)(cospi + 32),
- (int16_t *)(cospi + 16), (int16_t *)(cospi + 48));
-
- int16x8_t x[16];
- int16x8_t t[14];
- int16x8_t s0, s1, s2, s3, s4, s5, s6, s7;
- int16x8_t s8, s9, s10, s11, s12, s13, s14, s15;
-
- // Stage 1
- x[1] = in[0];
- x[3] = in[2];
- x[5] = in[4];
- x[7] = in[6];
- x[8] = in[7];
- x[10] = in[5];
- x[12] = in[3];
- x[14] = in[1];
-
- // Stage 2
- btf_16_neon(x[1], cospi[62], -cospi[2], &s0, &s1);
- btf_16_neon(x[3], cospi[54], -cospi[10], &s2, &s3);
- btf_16_neon(x[5], cospi[46], -cospi[18], &s4, &s5);
- btf_16_neon(x[7], cospi[38], -cospi[26], &s6, &s7);
-
- btf_16_neon(x[8], cospi[34], cospi[30], &s8, &s9);
- btf_16_neon(x[10], cospi[42], cospi[22], &s10, &s11);
- btf_16_neon(x[12], cospi[50], cospi[14], &s12, &s13);
- btf_16_neon(x[14], cospi[58], cospi[6], &s14, &s15);
-
- // Stage 3
- x[0] = vqaddq_s16(s0, s8);
- x[1] = vqaddq_s16(s1, s9);
- x[2] = vqaddq_s16(s2, s10);
- x[3] = vqaddq_s16(s3, s11);
- x[4] = vqaddq_s16(s4, s12);
- x[5] = vqaddq_s16(s5, s13);
- x[6] = vqaddq_s16(s6, s14);
- x[7] = vqaddq_s16(s7, s15);
- x[8] = vqsubq_s16(s0, s8);
- x[9] = vqsubq_s16(s1, s9);
- x[10] = vqsubq_s16(s2, s10);
- x[11] = vqsubq_s16(s3, s11);
- x[12] = vqsubq_s16(s4, s12);
- x[13] = vqsubq_s16(s5, s13);
- x[14] = vqsubq_s16(s6, s14);
- x[15] = vqsubq_s16(s7, s15);
-
- // Stage 4
- t[0] = x[0];
- t[1] = x[1];
- t[2] = x[2];
- t[3] = x[3];
- t[4] = x[4];
- t[5] = x[5];
- t[6] = x[6];
- t[7] = x[7];
- btf_16_lane_0_1_neon(x[8], x[9], c4, &s8, &s9);
- btf_16_lane_2_3_neon(x[10], x[11], c4, &s10, &s11);
- btf_16_lane_1_0_neon(x[13], x[12], c4, &s13, &s12);
- btf_16_lane_3_2_neon(x[15], x[14], c4, &s15, &s14);
-
- // Stage 5
- x[0] = vqaddq_s16(t[0], t[4]);
- x[1] = vqaddq_s16(t[1], t[5]);
- x[2] = vqaddq_s16(t[2], t[6]);
- x[3] = vqaddq_s16(t[3], t[7]);
- x[4] = vqsubq_s16(t[0], t[4]);
- x[5] = vqsubq_s16(t[1], t[5]);
- x[6] = vqsubq_s16(t[2], t[6]);
- x[7] = vqsubq_s16(t[3], t[7]);
- x[8] = vqaddq_s16(s8, s12);
- x[9] = vqaddq_s16(s9, s13);
- x[10] = vqaddq_s16(s10, s14);
- x[11] = vqaddq_s16(s11, s15);
- x[12] = vqsubq_s16(s8, s12);
- x[13] = vqsubq_s16(s9, s13);
- x[14] = vqsubq_s16(s10, s14);
- x[15] = vqsubq_s16(s11, s15);
-
- // stage 6
- t[0] = x[0];
- t[1] = x[1];
- t[2] = x[2];
- t[3] = x[3];
- btf_16_lane_2_3_neon(x[4], x[5], c, &s4, &s5);
- btf_16_lane_3_2_neon(x[7], x[6], c, &s7, &s6);
- t[8] = x[8];
- t[9] = x[9];
- t[10] = x[10];
- t[11] = x[11];
- btf_16_lane_2_3_neon(x[12], x[13], c, &s12, &s13);
- btf_16_lane_3_2_neon(x[15], x[14], c, &s15, &s14);
-
- // Stage 7
- x[0] = vqaddq_s16(t[0], t[2]);
- x[1] = vqaddq_s16(t[1], t[3]);
- x[2] = vqsubq_s16(t[0], t[2]);
- x[3] = vqsubq_s16(t[1], t[3]);
- x[4] = vqaddq_s16(s4, s6);
- x[5] = vqaddq_s16(s5, s7);
- x[6] = vqsubq_s16(s4, s6);
- x[7] = vqsubq_s16(s5, s7);
- x[8] = vqaddq_s16(t[8], t[10]);
- x[9] = vqaddq_s16(t[9], t[11]);
- x[10] = vqsubq_s16(t[8], t[10]);
- x[11] = vqsubq_s16(t[9], t[11]);
- x[12] = vqaddq_s16(s12, s14);
- x[13] = vqaddq_s16(s13, s15);
- x[14] = vqsubq_s16(s12, s14);
- x[15] = vqsubq_s16(s13, s15);
-
- // Stage 8
- btf_16_half_neon(x + 2, c);
- btf_16_half_neon(x + 6, c);
- btf_16_half_neon(x + 10, c);
- btf_16_half_neon(x + 14, c);
-
- // Stage 9
- out[0] = x[0];
- out[1] = vnegq_s16(x[8]);
- out[2] = x[12];
- out[3] = vnegq_s16(x[4]);
- out[4] = x[6];
- out[5] = vnegq_s16(x[14]);
- out[6] = x[10];
- out[7] = vnegq_s16(x[2]);
- out[8] = x[3];
- out[9] = vnegq_s16(x[11]);
- out[10] = x[15];
- out[11] = vnegq_s16(x[7]);
- out[12] = x[5];
- out[13] = vnegq_s16(x[13]);
- out[14] = x[9];
- out[15] = vnegq_s16(x[1]);
-}
-
-static INLINE void idct32_new_neon(int16x8_t *in, int16x8_t *out,
- int8_t cos_bit, int bit) {
- (void)bit;
- const int32_t *cospi = cospi_arr(cos_bit);
- int16x8_t step1[32], step2[32];
-
- const int16x4_t c0 =
- create_s16x4_neon((int16_t *)(cospi + 2), (int16_t *)(cospi + 62),
- (int16_t *)(cospi + 34), (int16_t *)(cospi + 30));
- const int16x4_t c1 =
- create_s16x4_neon((int16_t *)(cospi + 18), (int16_t *)(cospi + 46),
- (int16_t *)(cospi + 50), (int16_t *)(cospi + 14));
- const int16x4_t c2 =
- create_s16x4_neon((int16_t *)(cospi + 10), (int16_t *)(cospi + 54),
- (int16_t *)(cospi + 42), (int16_t *)(cospi + 22));
- const int16x4_t c3 =
- create_s16x4_neon((int16_t *)(cospi + 26), (int16_t *)(cospi + 38),
- (int16_t *)(cospi + 58), (int16_t *)(cospi + 6));
- const int16x4_t c4 =
- create_s16x4_neon((int16_t *)(cospi + 4), (int16_t *)(cospi + 60),
- (int16_t *)(cospi + 36), (int16_t *)(cospi + 28));
- const int16x4_t c5 =
- create_s16x4_neon((int16_t *)(cospi + 20), (int16_t *)(cospi + 44),
- (int16_t *)(cospi + 52), (int16_t *)(cospi + 12));
- const int16x4_t c6 =
- create_s16x4_neon((int16_t *)(cospi + 8), (int16_t *)(cospi + 56),
- (int16_t *)(cospi + 40), (int16_t *)(cospi + 24));
- const int16x4_t c7 =
- create_s16x4_neon((int16_t *)(cospi + 32), (int16_t *)(cospi + 32),
- (int16_t *)(cospi + 16), (int16_t *)(cospi + 48));
-
- // stage 2
-
- btf_16_lane_0_1_neon(in[1], in[31], c0, &step2[31], &step2[16]);
- btf_16_lane_2_3_neon(in[17], in[15], c0, &step2[30], &step2[17]);
- btf_16_lane_0_1_neon(in[9], in[23], c1, &step2[29], &step2[18]);
- btf_16_lane_2_3_neon(in[25], in[7], c1, &step2[28], &step2[19]);
- btf_16_lane_0_1_neon(in[5], in[27], c2, &step2[27], &step2[20]);
- btf_16_lane_2_3_neon(in[21], in[11], c2, &step2[26], &step2[21]);
- btf_16_lane_0_1_neon(in[13], in[19], c3, &step2[25], &step2[22]);
- btf_16_lane_2_3_neon(in[29], in[3], c3, &step2[24], &step2[23]);
-
- step2[0] = in[0];
- step2[1] = in[16];
- step2[2] = in[8];
- step2[3] = in[24];
- step2[4] = in[4];
- step2[5] = in[20];
- step2[6] = in[12];
- step2[7] = in[28];
- step2[8] = in[2];
- step2[9] = in[18];
- step2[10] = in[10];
- step2[11] = in[26];
- step2[12] = in[6];
- step2[13] = in[22];
- step2[14] = in[14];
- step2[15] = in[30];
-
- // stage 3
-
- btf_16_lane_0_1_neon(step2[8], step2[15], c4, &step1[15], &step1[8]);
- btf_16_lane_2_3_neon(step2[9], step2[14], c4, &step1[14], &step1[9]);
- btf_16_lane_0_1_neon(step2[10], step2[13], c5, &step1[13], &step1[10]);
- btf_16_lane_2_3_neon(step2[11], step2[12], c5, &step1[12], &step1[11]);
-
- step1[0] = step2[0];
- step1[1] = step2[1];
- step1[2] = step2[2];
- step1[3] = step2[3];
- step1[4] = step2[4];
- step1[5] = step2[5];
- step1[6] = step2[6];
- step1[7] = step2[7];
-
- step1[16] = vqaddq_s16(step2[16], step2[17]);
- step1[17] = vqsubq_s16(step2[16], step2[17]);
- step1[18] = vqsubq_s16(step2[19], step2[18]);
- step1[19] = vqaddq_s16(step2[19], step2[18]);
- step1[20] = vqaddq_s16(step2[20], step2[21]);
- step1[21] = vqsubq_s16(step2[20], step2[21]);
- step1[22] = vqsubq_s16(step2[23], step2[22]);
- step1[23] = vqaddq_s16(step2[23], step2[22]);
- step1[24] = vqaddq_s16(step2[24], step2[25]);
- step1[25] = vqsubq_s16(step2[24], step2[25]);
- step1[26] = vqsubq_s16(step2[27], step2[26]);
- step1[27] = vqaddq_s16(step2[27], step2[26]);
- step1[28] = vqaddq_s16(step2[28], step2[29]);
- step1[29] = vqsubq_s16(step2[28], step2[29]);
- step1[30] = vqsubq_s16(step2[31], step2[30]);
- step1[31] = vqaddq_s16(step2[31], step2[30]);
-
- // stage 4
-
- btf_16_lane_0_1_neon(step1[4], step1[7], c6, &step2[7], &step2[4]);
- btf_16_lane_2_3_neon(step1[5], step1[6], c6, &step2[6], &step2[5]);
- btf_16_lane_0_1_neon(step1[30], step1[17], c6, &step2[30], &step2[17]);
- btf_16_lane_1_0_neon(vnegq_s16(step1[18]), vnegq_s16(step1[29]), c6,
- &step2[18], &step2[29]);
- btf_16_lane_2_3_neon(step1[26], step1[21], c6, &step2[26], &step2[21]);
- btf_16_lane_3_2_neon(vnegq_s16(step1[22]), vnegq_s16(step1[25]), c6,
- &step2[22], &step2[25]);
-
- step2[0] = step1[0];
- step2[1] = step1[1];
- step2[2] = step1[2];
- step2[3] = step1[3];
- step2[8] = vqaddq_s16(step1[8], step1[9]);
- step2[9] = vqsubq_s16(step1[8], step1[9]);
- step2[10] = vqsubq_s16(step1[11], step1[10]);
- step2[11] = vqaddq_s16(step1[11], step1[10]);
- step2[12] = vqaddq_s16(step1[12], step1[13]);
- step2[13] = vqsubq_s16(step1[12], step1[13]);
- step2[14] = vqsubq_s16(step1[15], step1[14]);
- step2[15] = vqaddq_s16(step1[15], step1[14]);
- step2[16] = step1[16];
- step2[19] = step1[19];
- step2[20] = step1[20];
- step2[23] = step1[23];
- step2[24] = step1[24];
- step2[27] = step1[27];
- step2[28] = step1[28];
- step2[31] = step1[31];
-
- // stage 5
-
- btf_16_lane_0_1_neon(step2[0], step2[1], c7, &step1[0], &step1[1]);
- btf_16_lane_2_3_neon(step2[2], step2[3], c7, &step1[3], &step1[2]);
- btf_16_lane_2_3_neon(step2[14], step2[9], c7, &step1[14], &step1[9]);
- btf_16_lane_3_2_neon(vnegq_s16(step2[10]), vnegq_s16(step2[13]), c7,
- &step1[10], &step1[13]);
-
- step1[4] = vqaddq_s16(step2[4], step2[5]);
- step1[5] = vqsubq_s16(step2[4], step2[5]);
- step1[6] = vqsubq_s16(step2[7], step2[6]);
- step1[7] = vqaddq_s16(step2[7], step2[6]);
- step1[8] = step2[8];
- step1[11] = step2[11];
- step1[12] = step2[12];
- step1[15] = step2[15];
- step1[16] = vqaddq_s16(step2[16], step2[19]);
- step1[17] = vqaddq_s16(step2[17], step2[18]);
- step1[18] = vqsubq_s16(step2[17], step2[18]);
- step1[19] = vqsubq_s16(step2[16], step2[19]);
- step1[20] = vqsubq_s16(step2[23], step2[20]);
- step1[21] = vqsubq_s16(step2[22], step2[21]);
- step1[22] = vqaddq_s16(step2[22], step2[21]);
- step1[23] = vqaddq_s16(step2[23], step2[20]);
- step1[24] = vqaddq_s16(step2[24], step2[27]);
- step1[25] = vqaddq_s16(step2[25], step2[26]);
- step1[26] = vqsubq_s16(step2[25], step2[26]);
- step1[27] = vqsubq_s16(step2[24], step2[27]);
- step1[28] = vqsubq_s16(step2[31], step2[28]);
- step1[29] = vqsubq_s16(step2[30], step2[29]);
- step1[30] = vqaddq_s16(step2[30], step2[29]);
- step1[31] = vqaddq_s16(step2[31], step2[28]);
-
- // stage 6
-
- btf_16_lane_0_1_neon(step1[6], step1[5], c7, &step2[6], &step2[5]);
- btf_16_lane_2_3_neon(step1[29], step1[18], c7, &step2[29], &step2[18]);
- btf_16_lane_2_3_neon(step1[28], step1[19], c7, &step2[28], &step2[19]);
- btf_16_lane_3_2_neon(vnegq_s16(step1[20]), vnegq_s16(step1[27]), c7,
- &step2[20], &step2[27]);
- btf_16_lane_3_2_neon(vnegq_s16(step1[21]), vnegq_s16(step1[26]), c7,
- &step2[21], &step2[26]);
-
- step2[0] = vqaddq_s16(step1[0], step1[3]);
- step2[1] = vqaddq_s16(step1[1], step1[2]);
- step2[2] = vqsubq_s16(step1[1], step1[2]);
- step2[3] = vqsubq_s16(step1[0], step1[3]);
- step2[4] = step1[4];
- step2[7] = step1[7];
- step2[8] = vqaddq_s16(step1[8], step1[11]);
- step2[9] = vqaddq_s16(step1[9], step1[10]);
- step2[10] = vqsubq_s16(step1[9], step1[10]);
- step2[11] = vqsubq_s16(step1[8], step1[11]);
- step2[12] = vqsubq_s16(step1[15], step1[12]);
- step2[13] = vqsubq_s16(step1[14], step1[13]);
- step2[14] = vqaddq_s16(step1[14], step1[13]);
- step2[15] = vqaddq_s16(step1[15], step1[12]);
- step2[16] = step1[16];
- step2[17] = step1[17];
- step2[22] = step1[22];
- step2[23] = step1[23];
- step2[24] = step1[24];
- step2[25] = step1[25];
- step2[30] = step1[30];
- step2[31] = step1[31];
-
- // stage 7
-
- btf_16_lane_0_1_neon(step2[13], step2[10], c7, &step1[13], &step1[10]);
- btf_16_lane_0_1_neon(step2[12], step2[11], c7, &step1[12], &step1[11]);
-
- step1[0] = vqaddq_s16(step2[0], step2[7]);
- step1[1] = vqaddq_s16(step2[1], step2[6]);
- step1[2] = vqaddq_s16(step2[2], step2[5]);
- step1[3] = vqaddq_s16(step2[3], step2[4]);
- step1[4] = vqsubq_s16(step2[3], step2[4]);
- step1[5] = vqsubq_s16(step2[2], step2[5]);
- step1[6] = vqsubq_s16(step2[1], step2[6]);
- step1[7] = vqsubq_s16(step2[0], step2[7]);
- step1[8] = step2[8];
- step1[9] = step2[9];
- step1[14] = step2[14];
- step1[15] = step2[15];
- step1[16] = vqaddq_s16(step2[16], step2[23]);
- step1[17] = vqaddq_s16(step2[17], step2[22]);
- step1[18] = vqaddq_s16(step2[18], step2[21]);
- step1[19] = vqaddq_s16(step2[19], step2[20]);
- step1[20] = vqsubq_s16(step2[19], step2[20]);
- step1[21] = vqsubq_s16(step2[18], step2[21]);
- step1[22] = vqsubq_s16(step2[17], step2[22]);
- step1[23] = vqsubq_s16(step2[16], step2[23]);
- step1[24] = vqsubq_s16(step2[31], step2[24]);
- step1[25] = vqsubq_s16(step2[30], step2[25]);
- step1[26] = vqsubq_s16(step2[29], step2[26]);
- step1[27] = vqsubq_s16(step2[28], step2[27]);
- step1[28] = vqaddq_s16(step2[27], step2[28]);
- step1[29] = vqaddq_s16(step2[26], step2[29]);
- step1[30] = vqaddq_s16(step2[25], step2[30]);
- step1[31] = vqaddq_s16(step2[24], step2[31]);
-
- // stage 8
-
- btf_16_lane_0_1_neon(step1[27], step1[20], c7, &step2[27], &step2[20]);
- btf_16_lane_0_1_neon(step1[26], step1[21], c7, &step2[26], &step2[21]);
- btf_16_lane_0_1_neon(step1[25], step1[22], c7, &step2[25], &step2[22]);
- btf_16_lane_0_1_neon(step1[24], step1[23], c7, &step2[24], &step2[23]);
-
- step2[0] = vqaddq_s16(step1[0], step1[15]);
- step2[1] = vqaddq_s16(step1[1], step1[14]);
- step2[2] = vqaddq_s16(step1[2], step1[13]);
- step2[3] = vqaddq_s16(step1[3], step1[12]);
- step2[4] = vqaddq_s16(step1[4], step1[11]);
- step2[5] = vqaddq_s16(step1[5], step1[10]);
- step2[6] = vqaddq_s16(step1[6], step1[9]);
- step2[7] = vqaddq_s16(step1[7], step1[8]);
- step2[8] = vqsubq_s16(step1[7], step1[8]);
- step2[9] = vqsubq_s16(step1[6], step1[9]);
- step2[10] = vqsubq_s16(step1[5], step1[10]);
- step2[11] = vqsubq_s16(step1[4], step1[11]);
- step2[12] = vqsubq_s16(step1[3], step1[12]);
- step2[13] = vqsubq_s16(step1[2], step1[13]);
- step2[14] = vqsubq_s16(step1[1], step1[14]);
- step2[15] = vqsubq_s16(step1[0], step1[15]);
- step2[16] = step1[16];
- step2[17] = step1[17];
- step2[18] = step1[18];
- step2[19] = step1[19];
- step2[28] = step1[28];
- step2[29] = step1[29];
- step2[30] = step1[30];
- step2[31] = step1[31];
-
- // stage 9
-
- out[0] = vqaddq_s16(step2[0], step2[31]);
- out[1] = vqaddq_s16(step2[1], step2[30]);
- out[2] = vqaddq_s16(step2[2], step2[29]);
- out[3] = vqaddq_s16(step2[3], step2[28]);
- out[4] = vqaddq_s16(step2[4], step2[27]);
- out[5] = vqaddq_s16(step2[5], step2[26]);
- out[6] = vqaddq_s16(step2[6], step2[25]);
- out[7] = vqaddq_s16(step2[7], step2[24]);
- out[8] = vqaddq_s16(step2[8], step2[23]);
- out[9] = vqaddq_s16(step2[9], step2[22]);
- out[10] = vqaddq_s16(step2[10], step2[21]);
- out[11] = vqaddq_s16(step2[11], step2[20]);
- out[12] = vqaddq_s16(step2[12], step2[19]);
- out[13] = vqaddq_s16(step2[13], step2[18]);
- out[14] = vqaddq_s16(step2[14], step2[17]);
- out[15] = vqaddq_s16(step2[15], step2[16]);
- out[16] = vqsubq_s16(step2[15], step2[16]);
- out[17] = vqsubq_s16(step2[14], step2[17]);
- out[18] = vqsubq_s16(step2[13], step2[18]);
- out[19] = vqsubq_s16(step2[12], step2[19]);
- out[20] = vqsubq_s16(step2[11], step2[20]);
- out[21] = vqsubq_s16(step2[10], step2[21]);
- out[22] = vqsubq_s16(step2[9], step2[22]);
- out[23] = vqsubq_s16(step2[8], step2[23]);
- out[24] = vqsubq_s16(step2[7], step2[24]);
- out[25] = vqsubq_s16(step2[6], step2[25]);
- out[26] = vqsubq_s16(step2[5], step2[26]);
- out[27] = vqsubq_s16(step2[4], step2[27]);
- out[28] = vqsubq_s16(step2[3], step2[28]);
- out[29] = vqsubq_s16(step2[2], step2[29]);
- out[30] = vqsubq_s16(step2[1], step2[30]);
- out[31] = vqsubq_s16(step2[0], step2[31]);
-}
-
-static INLINE void idct32_low1_new_neon(int16x8_t *in, int16x8_t *out,
- int8_t cos_bit, int bit) {
- (void)bit;
- const int32_t *cospi = cospi_arr(cos_bit);
- int16x8_t step1;
- int32x4_t t32[2];
-
- // stage 1
- // stage 2
- // stage 3
- // stage 4
- // stage 5
-
- t32[0] = vmull_n_s16(vget_low_s16(in[0]), cospi[32]);
- t32[1] = vmull_n_s16(vget_high_s16(in[0]), cospi[32]);
- step1 = vcombine_s16(vrshrn_n_s32(t32[0], INV_COS_BIT),
- vrshrn_n_s32(t32[1], INV_COS_BIT));
-
- // stage 6
- // stage 7
- // stage 8
- // stage 9
-
- out[0] = step1;
- out[1] = step1;
- out[2] = step1;
- out[3] = step1;
- out[4] = step1;
- out[5] = step1;
- out[6] = step1;
- out[7] = step1;
- out[8] = step1;
- out[9] = step1;
- out[10] = step1;
- out[11] = step1;
- out[12] = step1;
- out[13] = step1;
- out[14] = step1;
- out[15] = step1;
- out[16] = step1;
- out[17] = step1;
- out[18] = step1;
- out[19] = step1;
- out[20] = step1;
- out[21] = step1;
- out[22] = step1;
- out[23] = step1;
- out[24] = step1;
- out[25] = step1;
- out[26] = step1;
- out[27] = step1;
- out[28] = step1;
- out[29] = step1;
- out[30] = step1;
- out[31] = step1;
-}
-
-static INLINE void idct32_low8_new_neon(int16x8_t *in, int16x8_t *out,
- int8_t cos_bit, int bit) {
- (void)bit;
- const int32_t *cospi = cospi_arr(cos_bit);
- int16x8_t step1[32], step2[32];
- int32x4_t t32[16];
- const int16x4_t c0 =
- create_s16x4_neon((int16_t *)(cospi + 8), (int16_t *)(cospi + 56),
- (int16_t *)(cospi + 40), (int16_t *)(cospi + 24));
- const int16x4_t c1 =
- create_s16x4_neon((int16_t *)(cospi + 32), (int16_t *)(cospi + 32),
- (int16_t *)(cospi + 16), (int16_t *)(cospi + 48));
-
- // stage 1
- // stage 2
-
- step2[0] = in[0];
- step2[4] = in[4];
- step2[8] = in[2];
- step2[12] = in[6];
-
- btf_16_neon(in[1], cospi[62], cospi[2], &step2[16], &step2[31]);
- btf_16_neon(in[7], -cospi[50], cospi[14], &step2[19], &step2[28]);
- btf_16_neon(in[5], cospi[54], cospi[10], &step2[20], &step2[27]);
- btf_16_neon(in[3], -cospi[58], cospi[6], &step2[23], &step2[24]);
-
- // stage 3
- step1[0] = step2[0];
- step1[4] = step2[4];
-
- btf_16_neon(step2[8], cospi[60], cospi[4], &step1[8], &step1[15]);
- btf_16_neon(step2[12], -cospi[52], cospi[12], &step1[11], &step1[12]);
-
- step1[16] = step2[16];
- step1[17] = step2[16];
- step1[18] = step2[19];
- step1[19] = step2[19];
- step1[20] = step2[20];
- step1[21] = step2[20];
- step1[22] = step2[23];
- step1[23] = step2[23];
- step1[24] = step2[24];
- step1[25] = step2[24];
- step1[26] = step2[27];
- step1[27] = step2[27];
- step1[28] = step2[28];
- step1[29] = step2[28];
- step1[30] = step2[31];
- step1[31] = step2[31];
-
- // stage 4
-
- btf_16_neon(step1[4], cospi[56], cospi[8], &step2[4], &step2[7]);
- btf_16_lane_0_1_neon(step1[30], step1[17], c0, &step2[30], &step2[17]);
- btf_16_lane_1_0_neon(vnegq_s16(step1[18]), vnegq_s16(step1[29]), c0,
- &step2[18], &step2[29]);
- btf_16_lane_2_3_neon(step1[26], step1[21], c0, &step2[26], &step2[21]);
- btf_16_lane_3_2_neon(vnegq_s16(step1[22]), vnegq_s16(step1[25]), c0,
- &step2[22], &step2[25]);
-
- step2[0] = step1[0];
- step2[8] = step1[8];
- step2[9] = step1[8];
- step2[10] = step1[11];
- step2[11] = step1[11];
- step2[12] = step1[12];
- step2[13] = step1[12];
- step2[14] = step1[15];
- step2[15] = step1[15];
- step2[16] = step1[16];
- step2[19] = step1[19];
- step2[20] = step1[20];
- step2[23] = step1[23];
- step2[24] = step1[24];
- step2[27] = step1[27];
- step2[28] = step1[28];
- step2[31] = step1[31];
-
- // stage 5
-
- t32[0] = vmull_n_s16(vget_low_s16(step2[0]), cospi[32]);
- t32[1] = vmull_n_s16(vget_high_s16(step2[0]), cospi[32]);
- step1[0] = vcombine_s16(vrshrn_n_s32(t32[0], INV_COS_BIT),
- vrshrn_n_s32(t32[1], INV_COS_BIT));
-
- btf_16_lane_2_3_neon(step2[14], step2[9], c1, &step1[14], &step1[9]);
- btf_16_lane_3_2_neon(vnegq_s16(step2[10]), vnegq_s16(step2[13]), c1,
- &step1[10], &step1[13]);
-
- step1[4] = step2[4];
- step1[5] = step2[4];
- step1[6] = step2[7];
- step1[7] = step2[7];
- step1[8] = step2[8];
- step1[11] = step2[11];
- step1[12] = step2[12];
- step1[15] = step2[15];
- step1[16] = vqaddq_s16(step2[16], step2[19]);
- step1[17] = vqaddq_s16(step2[17], step2[18]);
- step1[18] = vqsubq_s16(step2[17], step2[18]);
- step1[19] = vqsubq_s16(step2[16], step2[19]);
- step1[20] = vqsubq_s16(step2[23], step2[20]);
- step1[21] = vqsubq_s16(step2[22], step2[21]);
- step1[22] = vqaddq_s16(step2[22], step2[21]);
- step1[23] = vqaddq_s16(step2[23], step2[20]);
- step1[24] = vqaddq_s16(step2[24], step2[27]);
- step1[25] = vqaddq_s16(step2[25], step2[26]);
- step1[26] = vqsubq_s16(step2[25], step2[26]);
- step1[27] = vqsubq_s16(step2[24], step2[27]);
- step1[28] = vqsubq_s16(step2[31], step2[28]);
- step1[29] = vqsubq_s16(step2[30], step2[29]);
- step1[30] = vqaddq_s16(step2[30], step2[29]);
- step1[31] = vqaddq_s16(step2[31], step2[28]);
-
- // stage 6
-
- btf_16_lane_0_1_neon(step1[6], step1[5], c1, &step2[6], &step2[5]);
- btf_16_lane_2_3_neon(step1[29], step1[18], c1, &step2[29], &step2[18]);
- btf_16_lane_2_3_neon(step1[28], step1[19], c1, &step2[28], &step2[19]);
- btf_16_lane_3_2_neon(vnegq_s16(step1[20]), vnegq_s16(step1[27]), c1,
- &step2[20], &step2[27]);
- btf_16_lane_3_2_neon(vnegq_s16(step1[21]), vnegq_s16(step1[26]), c1,
- &step2[21], &step2[26]);
-
- step2[0] = step1[0];
- step2[1] = step1[0];
- step2[2] = step1[0];
- step2[3] = step1[0];
- step2[4] = step1[4];
- step2[7] = step1[7];
- step2[8] = vqaddq_s16(step1[8], step1[11]);
- step2[9] = vqaddq_s16(step1[9], step1[10]);
- step2[10] = vqsubq_s16(step1[9], step1[10]);
- step2[11] = vqsubq_s16(step1[8], step1[11]);
- step2[12] = vqsubq_s16(step1[15], step1[12]);
- step2[13] = vqsubq_s16(step1[14], step1[13]);
- step2[14] = vqaddq_s16(step1[14], step1[13]);
- step2[15] = vqaddq_s16(step1[15], step1[12]);
- step2[16] = step1[16];
- step2[17] = step1[17];
- step2[22] = step1[22];
- step2[23] = step1[23];
- step2[24] = step1[24];
- step2[25] = step1[25];
- step2[30] = step1[30];
- step2[31] = step1[31];
-
- // stage 7
-
- btf_16_lane_0_1_neon(step2[13], step2[10], c1, &step1[13], &step1[10]);
- btf_16_lane_0_1_neon(step2[12], step2[11], c1, &step1[12], &step1[11]);
-
- step1[0] = vqaddq_s16(step2[0], step2[7]);
- step1[1] = vqaddq_s16(step2[1], step2[6]);
- step1[2] = vqaddq_s16(step2[2], step2[5]);
- step1[3] = vqaddq_s16(step2[3], step2[4]);
- step1[4] = vqsubq_s16(step2[3], step2[4]);
- step1[5] = vqsubq_s16(step2[2], step2[5]);
- step1[6] = vqsubq_s16(step2[1], step2[6]);
- step1[7] = vqsubq_s16(step2[0], step2[7]);
- step1[8] = step2[8];
- step1[9] = step2[9];
- step1[14] = step2[14];
- step1[15] = step2[15];
- step1[16] = vqaddq_s16(step2[16], step2[23]);
- step1[17] = vqaddq_s16(step2[17], step2[22]);
- step1[18] = vqaddq_s16(step2[18], step2[21]);
- step1[19] = vqaddq_s16(step2[19], step2[20]);
- step1[20] = vqsubq_s16(step2[19], step2[20]);
- step1[21] = vqsubq_s16(step2[18], step2[21]);
- step1[22] = vqsubq_s16(step2[17], step2[22]);
- step1[23] = vqsubq_s16(step2[16], step2[23]);
- step1[24] = vqsubq_s16(step2[31], step2[24]);
- step1[25] = vqsubq_s16(step2[30], step2[25]);
- step1[26] = vqsubq_s16(step2[29], step2[26]);
- step1[27] = vqsubq_s16(step2[28], step2[27]);
- step1[28] = vqaddq_s16(step2[27], step2[28]);
- step1[29] = vqaddq_s16(step2[26], step2[29]);
- step1[30] = vqaddq_s16(step2[25], step2[30]);
- step1[31] = vqaddq_s16(step2[24], step2[31]);
-
- // stage 8
-
- btf_16_lane_0_1_neon(step1[27], step1[20], c1, &step2[27], &step2[20]);
- btf_16_lane_0_1_neon(step1[26], step1[21], c1, &step2[26], &step2[21]);
- btf_16_lane_0_1_neon(step1[25], step1[22], c1, &step2[25], &step2[22]);
- btf_16_lane_0_1_neon(step1[24], step1[23], c1, &step2[24], &step2[23]);
-
- step2[0] = vqaddq_s16(step1[0], step1[15]);
- step2[1] = vqaddq_s16(step1[1], step1[14]);
- step2[2] = vqaddq_s16(step1[2], step1[13]);
- step2[3] = vqaddq_s16(step1[3], step1[12]);
- step2[4] = vqaddq_s16(step1[4], step1[11]);
- step2[5] = vqaddq_s16(step1[5], step1[10]);
- step2[6] = vqaddq_s16(step1[6], step1[9]);
- step2[7] = vqaddq_s16(step1[7], step1[8]);
- step2[8] = vqsubq_s16(step1[7], step1[8]);
- step2[9] = vqsubq_s16(step1[6], step1[9]);
- step2[10] = vqsubq_s16(step1[5], step1[10]);
- step2[11] = vqsubq_s16(step1[4], step1[11]);
- step2[12] = vqsubq_s16(step1[3], step1[12]);
- step2[13] = vqsubq_s16(step1[2], step1[13]);
- step2[14] = vqsubq_s16(step1[1], step1[14]);
- step2[15] = vqsubq_s16(step1[0], step1[15]);
- step2[16] = step1[16];
- step2[17] = step1[17];
- step2[18] = step1[18];
- step2[19] = step1[19];
- step2[28] = step1[28];
- step2[29] = step1[29];
- step2[30] = step1[30];
- step2[31] = step1[31];
-
- // stage 9
-
- out[0] = vqaddq_s16(step2[0], step2[31]);
- out[1] = vqaddq_s16(step2[1], step2[30]);
- out[2] = vqaddq_s16(step2[2], step2[29]);
- out[3] = vqaddq_s16(step2[3], step2[28]);
- out[4] = vqaddq_s16(step2[4], step2[27]);
- out[5] = vqaddq_s16(step2[5], step2[26]);
- out[6] = vqaddq_s16(step2[6], step2[25]);
- out[7] = vqaddq_s16(step2[7], step2[24]);
- out[8] = vqaddq_s16(step2[8], step2[23]);
- out[9] = vqaddq_s16(step2[9], step2[22]);
- out[10] = vqaddq_s16(step2[10], step2[21]);
- out[11] = vqaddq_s16(step2[11], step2[20]);
- out[12] = vqaddq_s16(step2[12], step2[19]);
- out[13] = vqaddq_s16(step2[13], step2[18]);
- out[14] = vqaddq_s16(step2[14], step2[17]);
- out[15] = vqaddq_s16(step2[15], step2[16]);
- out[16] = vqsubq_s16(step2[15], step2[16]);
- out[17] = vqsubq_s16(step2[14], step2[17]);
- out[18] = vqsubq_s16(step2[13], step2[18]);
- out[19] = vqsubq_s16(step2[12], step2[19]);
- out[20] = vqsubq_s16(step2[11], step2[20]);
- out[21] = vqsubq_s16(step2[10], step2[21]);
- out[22] = vqsubq_s16(step2[9], step2[22]);
- out[23] = vqsubq_s16(step2[8], step2[23]);
- out[24] = vqsubq_s16(step2[7], step2[24]);
- out[25] = vqsubq_s16(step2[6], step2[25]);
- out[26] = vqsubq_s16(step2[5], step2[26]);
- out[27] = vqsubq_s16(step2[4], step2[27]);
- out[28] = vqsubq_s16(step2[3], step2[28]);
- out[29] = vqsubq_s16(step2[2], step2[29]);
- out[30] = vqsubq_s16(step2[1], step2[30]);
- out[31] = vqsubq_s16(step2[0], step2[31]);
-}
-
-static INLINE void idct32_low16_new_neon(int16x8_t *in, int16x8_t *out,
- int8_t cos_bit, int bit) {
- (void)bit;
- const int32_t *cospi = cospi_arr(cos_bit);
- int16x8_t step1[32], step2[32];
- int32x4_t t32[16];
- const int16x4_t c0 =
- create_s16x4_neon((int16_t *)(cospi + 8), (int16_t *)(cospi + 56),
- (int16_t *)(cospi + 40), (int16_t *)(cospi + 24));
- const int16x4_t c1 =
- create_s16x4_neon((int16_t *)(cospi + 32), (int16_t *)(cospi + 32),
- (int16_t *)(cospi + 16), (int16_t *)(cospi + 48));
-
- // stage 1
- // stage 2
-
- btf_16_neon(in[1], cospi[62], cospi[2], &step2[16], &step2[31]);
- btf_16_neon(in[15], -cospi[34], cospi[30], &step2[17], &step2[30]);
- btf_16_neon(in[9], cospi[46], cospi[18], &step2[18], &step2[29]);
- btf_16_neon(in[7], -cospi[50], cospi[14], &step2[19], &step2[28]);
- btf_16_neon(in[5], cospi[54], cospi[10], &step2[20], &step2[27]);
- btf_16_neon(in[11], -cospi[42], cospi[22], &step2[21], &step2[26]);
- btf_16_neon(in[13], cospi[38], cospi[26], &step2[22], &step2[25]);
- btf_16_neon(in[3], -cospi[58], cospi[6], &step2[23], &step2[24]);
-
- step2[0] = in[0];
- step2[2] = in[8];
- step2[4] = in[4];
- step2[6] = in[12];
- step2[8] = in[2];
- step2[10] = in[10];
- step2[12] = in[6];
- step2[14] = in[14];
-
- // stage 3
-
- btf_16_neon(step2[8], cospi[60], cospi[4], &step1[8], &step1[15]);
- btf_16_neon(step2[14], -cospi[36], cospi[28], &step1[9], &step1[14]);
- btf_16_neon(step2[10], cospi[44], cospi[20], &step1[10], &step1[13]);
- btf_16_neon(step2[12], -cospi[52], cospi[12], &step1[11], &step1[12]);
-
- step1[0] = step2[0];
- step1[2] = step2[2];
- step1[4] = step2[4];
- step1[6] = step2[6];
- step1[16] = vqaddq_s16(step2[16], step2[17]);
- step1[17] = vqsubq_s16(step2[16], step2[17]);
- step1[18] = vqsubq_s16(step2[19], step2[18]);
- step1[19] = vqaddq_s16(step2[19], step2[18]);
- step1[20] = vqaddq_s16(step2[20], step2[21]);
- step1[21] = vqsubq_s16(step2[20], step2[21]);
- step1[22] = vqsubq_s16(step2[23], step2[22]);
- step1[23] = vqaddq_s16(step2[23], step2[22]);
- step1[24] = vqaddq_s16(step2[24], step2[25]);
- step1[25] = vqsubq_s16(step2[24], step2[25]);
- step1[26] = vqsubq_s16(step2[27], step2[26]);
- step1[27] = vqaddq_s16(step2[27], step2[26]);
- step1[28] = vqaddq_s16(step2[28], step2[29]);
- step1[29] = vqsubq_s16(step2[28], step2[29]);
- step1[30] = vqsubq_s16(step2[31], step2[30]);
- step1[31] = vqaddq_s16(step2[31], step2[30]);
-
- // stage 4
-
- btf_16_neon(step1[4], cospi[56], cospi[8], &step2[4], &step2[7]);
- btf_16_neon(step1[6], -cospi[40], cospi[24], &step2[5], &step2[6]);
- btf_16_lane_0_1_neon(step1[30], step1[17], c0, &step2[30], &step2[17]);
- btf_16_lane_1_0_neon(vnegq_s16(step1[18]), vnegq_s16(step1[29]), c0,
- &step2[18], &step2[29]);
- btf_16_lane_2_3_neon(step1[26], step1[21], c0, &step2[26], &step2[21]);
- btf_16_lane_3_2_neon(vnegq_s16(step1[22]), vnegq_s16(step1[25]), c0,
- &step2[22], &step2[25]);
-
- step2[0] = step1[0];
- step2[2] = step1[2];
- step2[8] = vqaddq_s16(step1[8], step1[9]);
- step2[9] = vqsubq_s16(step1[8], step1[9]);
- step2[10] = vqsubq_s16(step1[11], step1[10]);
- step2[11] = vqaddq_s16(step1[11], step1[10]);
- step2[12] = vqaddq_s16(step1[12], step1[13]);
- step2[13] = vqsubq_s16(step1[12], step1[13]);
- step2[14] = vqsubq_s16(step1[15], step1[14]);
- step2[15] = vqaddq_s16(step1[15], step1[14]);
- step2[16] = step1[16];
- step2[19] = step1[19];
- step2[20] = step1[20];
- step2[23] = step1[23];
- step2[24] = step1[24];
- step2[27] = step1[27];
- step2[28] = step1[28];
- step2[31] = step1[31];
-
- // stage 5
-
- t32[0] = vmull_n_s16(vget_low_s16(step2[0]), cospi[32]);
- t32[1] = vmull_n_s16(vget_high_s16(step2[0]), cospi[32]);
-
- step1[0] = vcombine_s16(vrshrn_n_s32(t32[0], INV_COS_BIT),
- vrshrn_n_s32(t32[1], INV_COS_BIT));
-
- btf_16_neon(step2[2], cospi[48], cospi[16], &step1[2], &step1[3]);
- btf_16_lane_2_3_neon(step2[14], step2[9], c1, &step1[14], &step1[9]);
- btf_16_lane_3_2_neon(vnegq_s16(step2[10]), vnegq_s16(step2[13]), c1,
- &step1[10], &step1[13]);
-
- step1[4] = vqaddq_s16(step2[4], step2[5]);
- step1[5] = vqsubq_s16(step2[4], step2[5]);
- step1[6] = vqsubq_s16(step2[7], step2[6]);
- step1[7] = vqaddq_s16(step2[7], step2[6]);
- step1[8] = step2[8];
- step1[11] = step2[11];
- step1[12] = step2[12];
- step1[15] = step2[15];
- step1[16] = vqaddq_s16(step2[16], step2[19]);
- step1[17] = vqaddq_s16(step2[17], step2[18]);
- step1[18] = vqsubq_s16(step2[17], step2[18]);
- step1[19] = vqsubq_s16(step2[16], step2[19]);
- step1[20] = vqsubq_s16(step2[23], step2[20]);
- step1[21] = vqsubq_s16(step2[22], step2[21]);
- step1[22] = vqaddq_s16(step2[22], step2[21]);
- step1[23] = vqaddq_s16(step2[23], step2[20]);
- step1[24] = vqaddq_s16(step2[24], step2[27]);
- step1[25] = vqaddq_s16(step2[25], step2[26]);
- step1[26] = vqsubq_s16(step2[25], step2[26]);
- step1[27] = vqsubq_s16(step2[24], step2[27]);
- step1[28] = vqsubq_s16(step2[31], step2[28]);
- step1[29] = vqsubq_s16(step2[30], step2[29]);
- step1[30] = vqaddq_s16(step2[30], step2[29]);
- step1[31] = vqaddq_s16(step2[31], step2[28]);
-
- // stage 6
-
- btf_16_lane_0_1_neon(step1[6], step1[5], c1, &step2[6], &step2[5]);
- btf_16_lane_2_3_neon(step1[29], step1[18], c1, &step2[29], &step2[18]);
- btf_16_lane_2_3_neon(step1[28], step1[19], c1, &step2[28], &step2[19]);
- btf_16_lane_3_2_neon(vnegq_s16(step1[20]), vnegq_s16(step1[27]), c1,
- &step2[20], &step2[27]);
- btf_16_lane_3_2_neon(vnegq_s16(step1[21]), vnegq_s16(step1[26]), c1,
- &step2[21], &step2[26]);
-
- step2[0] = vqaddq_s16(step1[0], step1[3]);
- step2[1] = vqaddq_s16(step1[0], step1[2]);
- step2[2] = vqsubq_s16(step1[0], step1[2]);
- step2[3] = vqsubq_s16(step1[0], step1[3]);
- step2[4] = step1[4];
- step2[7] = step1[7];
- step2[8] = vqaddq_s16(step1[8], step1[11]);
- step2[9] = vqaddq_s16(step1[9], step1[10]);
- step2[10] = vqsubq_s16(step1[9], step1[10]);
- step2[11] = vqsubq_s16(step1[8], step1[11]);
- step2[12] = vqsubq_s16(step1[15], step1[12]);
- step2[13] = vqsubq_s16(step1[14], step1[13]);
- step2[14] = vqaddq_s16(step1[14], step1[13]);
- step2[15] = vqaddq_s16(step1[15], step1[12]);
- step2[16] = step1[16];
- step2[17] = step1[17];
- step2[22] = step1[22];
- step2[23] = step1[23];
- step2[24] = step1[24];
- step2[25] = step1[25];
- step2[30] = step1[30];
- step2[31] = step1[31];
-
- // stage 7
-
- btf_16_lane_0_1_neon(step2[13], step2[10], c1, &step1[13], &step1[10]);
- btf_16_lane_0_1_neon(step2[12], step2[11], c1, &step1[12], &step1[11]);
-
- step1[0] = vqaddq_s16(step2[0], step2[7]);
- step1[1] = vqaddq_s16(step2[1], step2[6]);
- step1[2] = vqaddq_s16(step2[2], step2[5]);
- step1[3] = vqaddq_s16(step2[3], step2[4]);
- step1[4] = vqsubq_s16(step2[3], step2[4]);
- step1[5] = vqsubq_s16(step2[2], step2[5]);
- step1[6] = vqsubq_s16(step2[1], step2[6]);
- step1[7] = vqsubq_s16(step2[0], step2[7]);
- step1[8] = step2[8];
- step1[9] = step2[9];
- step1[14] = step2[14];
- step1[15] = step2[15];
- step1[16] = vqaddq_s16(step2[16], step2[23]);
- step1[17] = vqaddq_s16(step2[17], step2[22]);
- step1[18] = vqaddq_s16(step2[18], step2[21]);
- step1[19] = vqaddq_s16(step2[19], step2[20]);
- step1[20] = vqsubq_s16(step2[19], step2[20]);
- step1[21] = vqsubq_s16(step2[18], step2[21]);
- step1[22] = vqsubq_s16(step2[17], step2[22]);
- step1[23] = vqsubq_s16(step2[16], step2[23]);
- step1[24] = vqsubq_s16(step2[31], step2[24]);
- step1[25] = vqsubq_s16(step2[30], step2[25]);
- step1[26] = vqsubq_s16(step2[29], step2[26]);
- step1[27] = vqsubq_s16(step2[28], step2[27]);
- step1[28] = vqaddq_s16(step2[27], step2[28]);
- step1[29] = vqaddq_s16(step2[26], step2[29]);
- step1[30] = vqaddq_s16(step2[25], step2[30]);
- step1[31] = vqaddq_s16(step2[24], step2[31]);
-
- // stage 8
-
- btf_16_lane_0_1_neon(step1[27], step1[20], c1, &step2[27], &step2[20]);
- btf_16_lane_0_1_neon(step1[26], step1[21], c1, &step2[26], &step2[21]);
- btf_16_lane_0_1_neon(step1[25], step1[22], c1, &step2[25], &step2[22]);
- btf_16_lane_0_1_neon(step1[24], step1[23], c1, &step2[24], &step2[23]);
-
- step2[0] = vqaddq_s16(step1[0], step1[15]);
- step2[1] = vqaddq_s16(step1[1], step1[14]);
- step2[2] = vqaddq_s16(step1[2], step1[13]);
- step2[3] = vqaddq_s16(step1[3], step1[12]);
- step2[4] = vqaddq_s16(step1[4], step1[11]);
- step2[5] = vqaddq_s16(step1[5], step1[10]);
- step2[6] = vqaddq_s16(step1[6], step1[9]);
- step2[7] = vqaddq_s16(step1[7], step1[8]);
- step2[8] = vqsubq_s16(step1[7], step1[8]);
- step2[9] = vqsubq_s16(step1[6], step1[9]);
- step2[10] = vqsubq_s16(step1[5], step1[10]);
- step2[11] = vqsubq_s16(step1[4], step1[11]);
- step2[12] = vqsubq_s16(step1[3], step1[12]);
- step2[13] = vqsubq_s16(step1[2], step1[13]);
- step2[14] = vqsubq_s16(step1[1], step1[14]);
- step2[15] = vqsubq_s16(step1[0], step1[15]);
- step2[16] = step1[16];
- step2[17] = step1[17];
- step2[18] = step1[18];
- step2[19] = step1[19];
- step2[28] = step1[28];
- step2[29] = step1[29];
- step2[30] = step1[30];
- step2[31] = step1[31];
-
- // stage 9
-
- out[0] = vqaddq_s16(step2[0], step2[31]);
- out[1] = vqaddq_s16(step2[1], step2[30]);
- out[2] = vqaddq_s16(step2[2], step2[29]);
- out[3] = vqaddq_s16(step2[3], step2[28]);
- out[4] = vqaddq_s16(step2[4], step2[27]);
- out[5] = vqaddq_s16(step2[5], step2[26]);
- out[6] = vqaddq_s16(step2[6], step2[25]);
- out[7] = vqaddq_s16(step2[7], step2[24]);
- out[8] = vqaddq_s16(step2[8], step2[23]);
- out[9] = vqaddq_s16(step2[9], step2[22]);
- out[10] = vqaddq_s16(step2[10], step2[21]);
- out[11] = vqaddq_s16(step2[11], step2[20]);
- out[12] = vqaddq_s16(step2[12], step2[19]);
- out[13] = vqaddq_s16(step2[13], step2[18]);
- out[14] = vqaddq_s16(step2[14], step2[17]);
- out[15] = vqaddq_s16(step2[15], step2[16]);
- out[16] = vqsubq_s16(step2[15], step2[16]);
- out[17] = vqsubq_s16(step2[14], step2[17]);
- out[18] = vqsubq_s16(step2[13], step2[18]);
- out[19] = vqsubq_s16(step2[12], step2[19]);
- out[20] = vqsubq_s16(step2[11], step2[20]);
- out[21] = vqsubq_s16(step2[10], step2[21]);
- out[22] = vqsubq_s16(step2[9], step2[22]);
- out[23] = vqsubq_s16(step2[8], step2[23]);
- out[24] = vqsubq_s16(step2[7], step2[24]);
- out[25] = vqsubq_s16(step2[6], step2[25]);
- out[26] = vqsubq_s16(step2[5], step2[26]);
- out[27] = vqsubq_s16(step2[4], step2[27]);
- out[28] = vqsubq_s16(step2[3], step2[28]);
- out[29] = vqsubq_s16(step2[2], step2[29]);
- out[30] = vqsubq_s16(step2[1], step2[30]);
- out[31] = vqsubq_s16(step2[0], step2[31]);
-}
-
-// Functions for blocks with eob at DC and within
-// topleft 8x8, 16x16, 32x32 corner
-static const transform_1d_neon
- lowbd_txfm_all_1d_zeros_w8_arr[TX_SIZES][ITX_TYPES_1D][4] = {
- {
- { av1_idct4_new, av1_idct4_new, NULL, NULL },
- { av1_iadst4_new, av1_iadst4_new, NULL, NULL },
- { av1_iidentity4_c, av1_iidentity4_c, NULL, NULL },
- },
- { { av1_idct8_new, av1_idct8_new, NULL, NULL },
- { av1_iadst8_new, av1_iadst8_new, NULL, NULL },
- { av1_iidentity8_c, av1_iidentity8_c, NULL, NULL } },
- {
- { av1_idct16_new, av1_idct16_new, av1_idct16_new, NULL },
- { av1_iadst16_new, av1_iadst16_new, av1_iadst16_new, NULL },
- { av1_iidentity16_c, av1_iidentity16_c, av1_iidentity16_c, NULL },
- },
- { { av1_idct32_new, av1_idct32_new, av1_idct32_new, av1_idct32_new },
- { NULL, NULL, NULL, NULL },
- { av1_iidentity32_c, av1_iidentity32_c, av1_iidentity32_c,
- av1_iidentity32_c } },
- { { av1_idct64_new, av1_idct64_new, av1_idct64_new, av1_idct64_new },
- { NULL, NULL, NULL, NULL },
- { NULL, NULL, NULL, NULL } }
- };
-
-static const transform_neon
- lowbd_txfm_all_1d_zeros_w_arr[TX_SIZES][ITX_TYPES_1D][4] = {
- {
- { NULL, NULL, NULL, NULL },
- { NULL, NULL, NULL, NULL },
- { NULL, NULL, NULL, NULL },
- },
- { { idct8_low1_new_neon, idct8_new_neon, NULL, NULL },
- { iadst8_low1_new_neon, iadst8_new_neon, NULL, NULL },
- { identity8_new_neon, identity8_new_neon, NULL, NULL } },
- {
- { idct16_low1_new_neon, idct16_low8_new_neon, idct16_new_neon, NULL },
- { iadst16_low1_new_neon, iadst16_low8_new_neon, iadst16_new_neon,
- NULL },
- { identity16_new_neon, identity16_new_neon, identity16_new_neon,
- NULL },
- },
- { { idct32_low1_new_neon, idct32_low8_new_neon, idct32_low16_new_neon,
- idct32_new_neon },
- { NULL, NULL, NULL, NULL },
- { identity32_new_neon, identity32_new_neon, identity32_new_neon,
- identity32_new_neon } },
- { { NULL, NULL, NULL, NULL },
- { NULL, NULL, NULL, NULL },
- { NULL, NULL, NULL, NULL } }
- };
-
-static INLINE void lowbd_inv_txfm2d_add_wxh_idtx_neon(
- const int32_t *input, uint8_t *output, int stride, TX_TYPE tx_type,
- TX_SIZE tx_size, int eob) {
- DECLARE_ALIGNED(32, int, txfm_buf[32 * 32 + 32 + 32]);
- int32_t *temp_in = txfm_buf;
-
- int eobx, eoby;
- get_eobx_eoby_scan_default(&eobx, &eoby, tx_size, eob);
- const int8_t *shift = inv_txfm_shift_ls[tx_size];
- const int txw_idx = get_txw_idx(tx_size);
- const int txh_idx = get_txh_idx(tx_size);
- const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
- const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
- const int txfm_size_col = tx_size_wide[tx_size];
- const int txfm_size_row = tx_size_high[tx_size];
- const int buf_size_nonzero_h_div8 = (eoby + 8) >> 3;
-
- const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
- const int buf_offset = AOMMAX(txfm_size_row, txfm_size_col);
-
- int32_t *temp_out = temp_in + buf_offset;
- int32_t *buf = temp_out + buf_offset;
- int32_t *buf_ptr = buf;
- const int8_t stage_range[MAX_TXFM_STAGE_NUM] = { 16 };
- int r, bd = 8;
-
- const int fun_idx_x = lowbd_txfm_all_1d_zeros_idx[eobx];
- const int fun_idx_y = lowbd_txfm_all_1d_zeros_idx[eoby];
- const transform_1d_neon row_txfm =
- lowbd_txfm_all_1d_zeros_w8_arr[txw_idx][hitx_1d_tab[tx_type]][fun_idx_x];
- const transform_1d_neon col_txfm =
- lowbd_txfm_all_1d_zeros_w8_arr[txh_idx][vitx_1d_tab[tx_type]][fun_idx_y];
-
- assert(col_txfm != NULL);
- assert(row_txfm != NULL);
-
- // row tx
- int row_start = (buf_size_nonzero_h_div8 * 8);
- for (int i = 0; i < row_start; i++) {
- if (abs(rect_type) == 1) {
- for (int j = 0; j < txfm_size_col; j++)
- temp_in[j] = round_shift((int64_t)input[j] * NewInvSqrt2, NewSqrt2Bits);
- row_txfm(temp_in, buf_ptr, cos_bit_row, stage_range);
- } else {
- row_txfm(input, buf_ptr, cos_bit_row, stage_range);
- }
- av1_round_shift_array(buf_ptr, txfm_size_col, -shift[0]);
- input += txfm_size_col;
- buf_ptr += txfm_size_col;
- }
-
- // Doing memset for the rows which are not processed in row transform.
- memset(buf_ptr, 0,
- sizeof(int32_t) * txfm_size_col * (txfm_size_row - row_start));
-
- // col tx
- for (int c = 0; c < txfm_size_col; c++) {
- for (r = 0; r < txfm_size_row; ++r) temp_in[r] = buf[r * txfm_size_col + c];
-
- col_txfm(temp_in, temp_out, cos_bit_col, stage_range);
- av1_round_shift_array(temp_out, txfm_size_row, -shift[1]);
-
- for (r = 0; r < txfm_size_row; ++r) {
- output[r * stride + c] =
- highbd_clip_pixel_add(output[r * stride + c], temp_out[r], bd);
- }
- }
-}
-
-static INLINE void lowbd_inv_txfm2d_add_idtx_neon(const int32_t *input,
- uint8_t *output, int stride,
- TX_TYPE tx_type,
- TX_SIZE tx_size, int eob) {
- int16x8_t a[32 * 4];
- int16x8_t b[32 * 4];
- int eobx, eoby;
- get_eobx_eoby_scan_default(&eobx, &eoby, tx_size, eob);
- const int8_t *shift = inv_txfm_shift_ls[tx_size];
- const int txw_idx = get_txw_idx(tx_size);
- const int txh_idx = get_txh_idx(tx_size);
- const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
- const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
- const int txfm_size_col = tx_size_wide[tx_size];
- const int txfm_size_row = tx_size_high[tx_size];
- lowbd_inv_txfm2d_memset_neon(&a[0], (txfm_size_col * (txfm_size_row) >> 3),
- 0);
- lowbd_inv_txfm2d_memset_neon(&b[0], (txfm_size_col * (txfm_size_row) >> 3),
- 0);
- const int buf_size_w_div8 = txfm_size_col >> 3;
- const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
- const int buf_size_nonzero_h_div8 = (eoby + 8) >> 3;
- const int buf_size_nonzero_w_div8 = (eobx + 8) >> 3;
- const int fun_idx_x = lowbd_txfm_all_1d_zeros_idx[eobx];
- const int fun_idx_y = lowbd_txfm_all_1d_zeros_idx[eoby];
- const int32_t *input_1;
- int temp_b = 0;
- const transform_neon row_txfm =
- lowbd_txfm_all_1d_zeros_w_arr[txw_idx][hitx_1d_tab[tx_type]][fun_idx_x];
- const transform_neon col_txfm =
- lowbd_txfm_all_1d_zeros_w_arr[txh_idx][vitx_1d_tab[tx_type]][fun_idx_y];
-
- assert(col_txfm != NULL);
- assert(row_txfm != NULL);
-
- for (int i = 0; i < buf_size_nonzero_h_div8; i++) {
- input_1 = input;
- for (int j = 0; j < buf_size_nonzero_w_div8; ++j) {
- int k = j * 8 + i * txfm_size_col;
- load_buffer_32bit_to_16bit_neon(input_1, &a[k], txfm_size_col);
- transpose_s16_8x8q(&a[k], &a[k]);
- input_1 += 8;
- }
- input += (txfm_size_col * 8);
- if (abs(rect_type) == 1) {
- int y = i * txfm_size_col;
- round_shift_for_rect(&a[y], &a[y], txfm_size_col);
- }
- row_txfm(&a[i * txfm_size_col], &a[i * txfm_size_col], cos_bit_row, 0);
- av1_round_shift_array_16_neon(&a[i * txfm_size_col], txfm_size_col,
- -shift[0]);
- for (int j = 0; j < buf_size_w_div8; ++j) {
- int k = j * 8 + i * txfm_size_col;
- transpose_s16_8x8q(&a[k], &b[temp_b + txfm_size_row * j]);
- }
- temp_b += 8;
- }
- for (int j = 0; j < buf_size_w_div8; ++j) {
- col_txfm(&b[j * txfm_size_row], &b[j * txfm_size_row], cos_bit_col, 0);
- av1_round_shift_array_16_neon(&b[j * txfm_size_row], txfm_size_row,
- -shift[1]);
- }
- if (txfm_size_col >= 16) {
- for (int i = 0; i < (txfm_size_col >> 4); i++) {
- lowbd_add_flip_buffer_16xn_neon(
- &b[i * txfm_size_row * 2], output + 16 * i, stride, 0, txfm_size_row);
- }
- } else if (txfm_size_col == 8) {
- lowbd_add_flip_buffer_8xn_neon(b, output, stride, 0, txfm_size_row);
- }
-}
-
-static INLINE void lowbd_inv_txfm2d_add_v_wxh_identity_neon(
- const int32_t *input, uint8_t *output, int stride, TX_TYPE tx_type,
- TX_SIZE tx_size, int eob) {
- DECLARE_ALIGNED(32, int, txfm_buf[32 * 32 + 32 + 32]);
- int32_t *temp_in = txfm_buf;
-
- int eobx, eoby;
- get_eobx_eoby_scan_v_identity(&eobx, &eoby, tx_size, eob);
- const int8_t *shift = inv_txfm_shift_ls[tx_size];
- const int txw_idx = get_txw_idx(tx_size);
- const int txh_idx = get_txh_idx(tx_size);
- const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
- const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
- const int txfm_size_col = tx_size_wide[tx_size];
- const int txfm_size_row = tx_size_high[tx_size];
- const int buf_size_nonzero_h_div8 = (eoby + 8) >> 3;
-
- const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
- const int buf_offset = AOMMAX(txfm_size_row, txfm_size_col);
-
- int32_t *temp_out = temp_in + buf_offset;
- int32_t *buf = temp_out + buf_offset;
- int32_t *buf_ptr = buf;
- const int8_t stage_range[MAX_TXFM_STAGE_NUM] = { 16 };
- int r, bd = 8;
-
- const int fun_idx_x = lowbd_txfm_all_1d_zeros_idx[eobx];
- const int fun_idx_y = lowbd_txfm_all_1d_zeros_idx[eoby];
- const transform_1d_neon row_txfm =
- lowbd_txfm_all_1d_zeros_w8_arr[txw_idx][hitx_1d_tab[tx_type]][fun_idx_x];
- const transform_1d_neon col_txfm =
- lowbd_txfm_all_1d_zeros_w8_arr[txh_idx][vitx_1d_tab[tx_type]][fun_idx_y];
-
- assert(col_txfm != NULL);
- assert(row_txfm != NULL);
- int ud_flip, lr_flip;
- get_flip_cfg(tx_type, &ud_flip, &lr_flip);
-
- // row tx
- int row_start = (buf_size_nonzero_h_div8 * 8);
- for (int i = 0; i < row_start; i++) {
- if (abs(rect_type) == 1) {
- for (int j = 0; j < txfm_size_col; j++)
- temp_in[j] = round_shift((int64_t)input[j] * NewInvSqrt2, NewSqrt2Bits);
- row_txfm(temp_in, buf_ptr, cos_bit_row, stage_range);
- } else {
- row_txfm(input, buf_ptr, cos_bit_row, stage_range);
- }
- av1_round_shift_array(buf_ptr, txfm_size_col, -shift[0]);
- input += txfm_size_col;
- buf_ptr += txfm_size_col;
- }
- // Doing memset for the rows which are not processed in row transform.
- memset(buf_ptr, 0,
- sizeof(int32_t) * txfm_size_col * (txfm_size_row - row_start));
-
- // col tx
- for (int c = 0; c < txfm_size_col; c++) {
- if (lr_flip == 0) {
- for (r = 0; r < txfm_size_row; ++r)
- temp_in[r] = buf[r * txfm_size_col + c];
- } else {
- // flip left right
- for (r = 0; r < txfm_size_row; ++r)
- temp_in[r] = buf[r * txfm_size_col + (txfm_size_col - c - 1)];
- }
- col_txfm(temp_in, temp_out, cos_bit_col, stage_range);
- av1_round_shift_array(temp_out, txfm_size_row, -shift[1]);
-
- if (ud_flip == 0) {
- for (r = 0; r < txfm_size_row; ++r) {
- output[r * stride + c] =
- highbd_clip_pixel_add(output[r * stride + c], temp_out[r], bd);
- }
- } else {
- // flip upside down
- for (r = 0; r < txfm_size_row; ++r) {
- output[r * stride + c] = highbd_clip_pixel_add(
- output[r * stride + c], temp_out[txfm_size_row - r - 1], bd);
- }
- }
- }
-}
-
-static INLINE void lowbd_inv_txfm2d_add_v_identity_neon(
- const int32_t *input, uint8_t *output, int stride, TX_TYPE tx_type,
- TX_SIZE tx_size, int eob) {
- int16x8_t a[16 * 2];
- int16x8_t b[16 * 2];
- int eobx, eoby, ud_flip, lr_flip;
- get_eobx_eoby_scan_v_identity(&eobx, &eoby, tx_size, eob);
- const int8_t *shift = inv_txfm_shift_ls[tx_size];
- const int txw_idx = get_txw_idx(tx_size);
- const int txh_idx = get_txh_idx(tx_size);
- const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
- const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
- const int txfm_size_col = tx_size_wide[tx_size];
- const int txfm_size_row = tx_size_high[tx_size];
- lowbd_inv_txfm2d_memset_neon(&b[0], (txfm_size_col * (txfm_size_row) >> 3),
- 0);
- const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
- const int buf_size_w_div8 = txfm_size_col >> 3;
- const int buf_size_nonzero_h_div8 = (eoby + 8) >> 3;
- const int buf_size_nonzero_w_div8 = (eobx + 8) >> 3;
- const int fun_idx_x = lowbd_txfm_all_1d_zeros_idx[eobx];
- const int fun_idx_y = lowbd_txfm_all_1d_zeros_idx[eoby];
- const int32_t *input_1;
- int temp_b = 0;
- const transform_neon row_txfm =
- lowbd_txfm_all_1d_zeros_w_arr[txw_idx][hitx_1d_tab[tx_type]][fun_idx_x];
- const transform_neon col_txfm =
- lowbd_txfm_all_1d_zeros_w_arr[txh_idx][vitx_1d_tab[tx_type]][fun_idx_y];
-
- assert(col_txfm != NULL);
- assert(row_txfm != NULL);
-
- get_flip_cfg(tx_type, &ud_flip, &lr_flip);
-
- for (int i = 0; i < buf_size_nonzero_h_div8; i++) {
- input_1 = input;
- for (int j = 0; j < buf_size_nonzero_w_div8; ++j) {
- int k = j * 8 + i * txfm_size_col;
- load_buffer_32bit_to_16bit_neon(input_1, &a[k], txfm_size_col);
- transpose_s16_8x8q(&a[k], &a[k]);
- input_1 += 8;
- }
- input += (txfm_size_col * 8);
- if (abs(rect_type) == 1) {
- int y = i * txfm_size_col;
- round_shift_for_rect(&a[y], &a[y], txfm_size_col);
- }
- row_txfm(&a[i * txfm_size_col], &a[i * txfm_size_col], cos_bit_row, 0);
- av1_round_shift_array_16_neon(&a[i * txfm_size_col], txfm_size_col,
- -shift[0]);
- if (lr_flip == 1) {
- for (int j = 0; j < buf_size_w_div8; ++j) {
- int k = j * 8 + i * txfm_size_col;
- flip_buf_ud_neon(&a[k], 8);
- transpose_s16_8x8q(
- &a[k], &b[temp_b + txfm_size_row * (buf_size_w_div8 - 1 - j)]);
- }
- temp_b += 8;
- } else {
- for (int j = 0; j < buf_size_w_div8; ++j) {
- int k = j * 8 + i * txfm_size_col;
- transpose_s16_8x8q(&a[k], &b[temp_b + txfm_size_row * j]);
- }
- temp_b += 8;
- }
- }
- for (int j = 0; j < buf_size_w_div8; ++j) {
- col_txfm(&b[j * txfm_size_row], &b[j * txfm_size_row], cos_bit_col, 0);
- av1_round_shift_array_16_neon(&b[j * txfm_size_row], txfm_size_row,
- -shift[1]);
- }
- if (txfm_size_col >= 16) {
- for (int i = 0; i < (txfm_size_col >> 4); i++) {
- lowbd_add_flip_buffer_16xn_neon(
- &b[i * txfm_size_row * 2], output + 16 * i, stride, 0, txfm_size_row);
- }
- } else if (txfm_size_col == 8) {
- lowbd_add_flip_buffer_8xn_neon(b, output, stride, 0, txfm_size_row);
- }
-}
-
-static INLINE void lowbd_inv_txfm2d_add_h_wxh_identity_neon(
- const int32_t *input, uint8_t *output, int stride, TX_TYPE tx_type,
- TX_SIZE tx_size, int eob) {
- DECLARE_ALIGNED(32, int, txfm_buf[32 * 32 + 32 + 32]);
- int32_t *temp_in = txfm_buf;
-
- int eobx, eoby;
- get_eobx_eoby_scan_h_identity(&eobx, &eoby, tx_size, eob);
- const int8_t *shift = inv_txfm_shift_ls[tx_size];
- const int txw_idx = get_txw_idx(tx_size);
- const int txh_idx = get_txh_idx(tx_size);
- const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
- const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
- const int txfm_size_col = tx_size_wide[tx_size];
- const int txfm_size_row = tx_size_high[tx_size];
- const int buf_size_nonzero_h_div8 = (eoby + 8) >> 3;
-
- const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
- const int buf_offset = AOMMAX(txfm_size_row, txfm_size_col);
-
- int32_t *temp_out = temp_in + buf_offset;
- int32_t *buf = temp_out + buf_offset;
- int32_t *buf_ptr = buf;
- const int8_t stage_range[MAX_TXFM_STAGE_NUM] = { 16 };
- int r, bd = 8;
-
- const int fun_idx_x = lowbd_txfm_all_1d_zeros_idx[eobx];
- const int fun_idx_y = lowbd_txfm_all_1d_zeros_idx[eoby];
- const transform_1d_neon row_txfm =
- lowbd_txfm_all_1d_zeros_w8_arr[txw_idx][hitx_1d_tab[tx_type]][fun_idx_x];
- const transform_1d_neon col_txfm =
- lowbd_txfm_all_1d_zeros_w8_arr[txh_idx][vitx_1d_tab[tx_type]][fun_idx_y];
-
- assert(col_txfm != NULL);
- assert(row_txfm != NULL);
- int ud_flip, lr_flip;
- get_flip_cfg(tx_type, &ud_flip, &lr_flip);
-
- // row tx
- int row_start = (buf_size_nonzero_h_div8 * 8);
- for (int i = 0; i < row_start; i++) {
- if (abs(rect_type) == 1) {
- for (int j = 0; j < txfm_size_col; j++)
- temp_in[j] = round_shift((int64_t)input[j] * NewInvSqrt2, NewSqrt2Bits);
- row_txfm(temp_in, buf_ptr, cos_bit_row, stage_range);
- } else {
- row_txfm(input, buf_ptr, cos_bit_row, stage_range);
- }
- av1_round_shift_array(buf_ptr, txfm_size_col, -shift[0]);
- input += txfm_size_col;
- buf_ptr += txfm_size_col;
- }
- // Doing memset for the rows which are not processed in row transform.
- memset(buf_ptr, 0,
- sizeof(int32_t) * txfm_size_col * (txfm_size_row - row_start));
-
- // col tx
- for (int c = 0; c < txfm_size_col; c++) {
- if (lr_flip == 0) {
- for (r = 0; r < txfm_size_row; ++r)
- temp_in[r] = buf[r * txfm_size_col + c];
- } else {
- // flip left right
- for (r = 0; r < txfm_size_row; ++r)
- temp_in[r] = buf[r * txfm_size_col + (txfm_size_col - c - 1)];
- }
- col_txfm(temp_in, temp_out, cos_bit_col, stage_range);
- av1_round_shift_array(temp_out, txfm_size_row, -shift[1]);
-
- if (ud_flip == 0) {
- for (r = 0; r < txfm_size_row; ++r) {
- output[r * stride + c] =
- highbd_clip_pixel_add(output[r * stride + c], temp_out[r], bd);
- }
- } else {
- // flip upside down
- for (r = 0; r < txfm_size_row; ++r) {
- output[r * stride + c] = highbd_clip_pixel_add(
- output[r * stride + c], temp_out[txfm_size_row - r - 1], bd);
- }
- }
- }
-}
-
-static INLINE void lowbd_inv_txfm2d_add_h_identity_neon(
- const int32_t *input, uint8_t *output, int stride, TX_TYPE tx_type,
- TX_SIZE tx_size, int eob) {
- int16x8_t a[16 * 2];
- int16x8_t b[16 * 2];
- int eobx, eoby, ud_flip, lr_flip;
- get_eobx_eoby_scan_h_identity(&eobx, &eoby, tx_size, eob);
- const int8_t *shift = inv_txfm_shift_ls[tx_size];
- const int txw_idx = get_txw_idx(tx_size);
- const int txh_idx = get_txh_idx(tx_size);
- const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
- const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
- const int txfm_size_col = tx_size_wide[tx_size];
- const int txfm_size_row = tx_size_high[tx_size];
- lowbd_inv_txfm2d_memset_neon(&a[0], (txfm_size_col * (txfm_size_row) >> 3),
- 0);
- const int buf_size_w_div8 = txfm_size_col >> 3;
- const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
- const int buf_size_nonzero_h_div8 = (eoby + 8) >> 3;
- const int buf_size_nonzero_w_div8 = (eobx + 8) >> 3;
- const int fun_idx_x = lowbd_txfm_all_1d_zeros_idx[eobx];
- const int fun_idx_y = lowbd_txfm_all_1d_zeros_idx[eoby];
- const int32_t *input_1;
- int temp_b = 0;
- const transform_neon row_txfm =
- lowbd_txfm_all_1d_zeros_w_arr[txw_idx][hitx_1d_tab[tx_type]][fun_idx_x];
- const transform_neon col_txfm =
- lowbd_txfm_all_1d_zeros_w_arr[txh_idx][vitx_1d_tab[tx_type]][fun_idx_y];
-
- assert(col_txfm != NULL);
- assert(row_txfm != NULL);
-
- get_flip_cfg(tx_type, &ud_flip, &lr_flip);
-
- for (int i = 0; i < buf_size_nonzero_h_div8; i++) {
- input_1 = input;
- for (int j = 0; j < buf_size_nonzero_w_div8; ++j) {
- int k = j * 8 + i * txfm_size_col;
- load_buffer_32bit_to_16bit_neon(input_1, &a[k], txfm_size_col);
- transpose_s16_8x8q(&a[k], &a[k]);
- input_1 += 8;
- }
- input += (txfm_size_col * 8);
- if (abs(rect_type) == 1) {
- int y = i * txfm_size_col;
- round_shift_for_rect(&a[y], &a[y], txfm_size_col);
- }
- row_txfm(&a[i * txfm_size_col], &a[i * txfm_size_col], cos_bit_row, 0);
- av1_round_shift_array_16_neon(&a[i * txfm_size_col], txfm_size_col,
- -shift[0]);
- for (int j = 0; j < buf_size_w_div8; ++j) {
- int k = j * 8 + i * txfm_size_col;
- transpose_s16_8x8q(&a[k], &b[temp_b + txfm_size_row * j]);
- }
- temp_b += 8;
- }
- for (int j = 0; j < buf_size_w_div8; ++j) {
- col_txfm(&b[j * txfm_size_row], &b[j * txfm_size_row], cos_bit_col, 0);
- av1_round_shift_array_16_neon(&b[j * txfm_size_row], txfm_size_row,
- -shift[1]);
- }
- if (txfm_size_col >= 16) {
- for (int i = 0; i < (txfm_size_col >> 4); i++) {
- lowbd_add_flip_buffer_16xn_neon(&b[i * txfm_size_row * 2],
- output + 16 * i, stride, ud_flip,
- txfm_size_row);
- }
- } else if (txfm_size_col == 8) {
- lowbd_add_flip_buffer_8xn_neon(b, output, stride, ud_flip, txfm_size_row);
- }
-}
-
-static INLINE void lowbd_inv_txfm2d_add_4x4_neon(const int32_t *input,
- uint8_t *output, int stride,
- TX_TYPE tx_type,
- TX_SIZE tx_size, int eob) {
- (void)eob;
- DECLARE_ALIGNED(32, int, txfm_buf[4 * 4 + 8 + 8]);
- int32_t *temp_in = txfm_buf;
-
- const int8_t *shift = inv_txfm_shift_ls[tx_size];
- const int txw_idx = get_txw_idx(tx_size);
- const int txh_idx = get_txh_idx(tx_size);
- const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
- const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
- const int txfm_size_col = tx_size_wide[tx_size];
- const int txfm_size_row = tx_size_high[tx_size];
- const int buf_offset = AOMMAX(txfm_size_row, txfm_size_col);
- int32_t *temp_out = temp_in + buf_offset;
- int32_t *buf = temp_out + buf_offset;
- int32_t *buf_ptr = buf;
- const int8_t stage_range[MAX_TXFM_STAGE_NUM] = { 16 };
- int r, bd = 8;
- const transform_1d_neon row_txfm =
- lowbd_txfm_all_1d_arr[txw_idx][hitx_1d_tab[tx_type]];
- const transform_1d_neon col_txfm =
- lowbd_txfm_all_1d_arr[txh_idx][vitx_1d_tab[tx_type]];
-
- int ud_flip, lr_flip;
- get_flip_cfg(tx_type, &ud_flip, &lr_flip);
-
- for (int i = 0; i < txfm_size_row; i++) {
- row_txfm(input, buf_ptr, cos_bit_row, stage_range);
-
- input += txfm_size_col;
- buf_ptr += txfm_size_col;
- }
-
- for (int c = 0; c < txfm_size_col; ++c) {
- if (lr_flip == 0) {
- for (r = 0; r < txfm_size_row; ++r)
- temp_in[r] = buf[r * txfm_size_col + c];
- } else {
- // flip left right
- for (r = 0; r < txfm_size_row; ++r)
- temp_in[r] = buf[r * txfm_size_col + (txfm_size_col - c - 1)];
- }
- col_txfm(temp_in, temp_out, cos_bit_col, stage_range);
- av1_round_shift_array(temp_out, txfm_size_row, -shift[1]);
-
- if (ud_flip == 0) {
- for (r = 0; r < txfm_size_row; ++r) {
- output[r * stride + c] =
- highbd_clip_pixel_add(output[r * stride + c], temp_out[r], bd);
- }
- } else {
- // flip upside down
- for (r = 0; r < txfm_size_row; ++r) {
- output[r * stride + c] = highbd_clip_pixel_add(
- output[r * stride + c], temp_out[txfm_size_row - r - 1], bd);
- }
- }
- }
-}
-
-void lowbd_inv_txfm2d_add_4x8_neon(const int32_t *input, uint8_t *output,
- int stride, TX_TYPE tx_type, TX_SIZE tx_size,
- int eob) {
- (void)eob;
- DECLARE_ALIGNED(32, int, txfm_buf[4 * 8 + 8 + 8]);
- int32_t *temp_in = txfm_buf;
-
- const int8_t *shift = inv_txfm_shift_ls[tx_size];
- const int txw_idx = get_txw_idx(tx_size);
- const int txh_idx = get_txh_idx(tx_size);
- const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
- const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
- const int txfm_size_col = tx_size_wide[tx_size];
- const int txfm_size_row = tx_size_high[tx_size];
- const int buf_offset = AOMMAX(txfm_size_row, txfm_size_col);
- int32_t *temp_out = temp_in + buf_offset;
- int32_t *buf = temp_out + buf_offset;
- int32_t *buf_ptr = buf;
- const int8_t stage_range[MAX_TXFM_STAGE_NUM] = { 16 };
- int r, bd = 8;
- const transform_1d_neon row_txfm =
- lowbd_txfm_all_1d_arr[txw_idx][hitx_1d_tab[tx_type]];
- const transform_1d_neon col_txfm =
- lowbd_txfm_all_1d_arr[txh_idx][vitx_1d_tab[tx_type]];
-
- int ud_flip, lr_flip;
- get_flip_cfg(tx_type, &ud_flip, &lr_flip);
-
- for (int i = 0; i < txfm_size_row; i++) {
- for (int j = 0; j < txfm_size_col; j++)
- temp_in[j] = round_shift((int64_t)input[j] * NewInvSqrt2, NewSqrt2Bits);
-
- row_txfm(temp_in, buf_ptr, cos_bit_row, stage_range);
- input += txfm_size_col;
- buf_ptr += txfm_size_col;
- }
-
- for (int c = 0; c < txfm_size_col; ++c) {
- if (lr_flip == 0) {
- for (r = 0; r < txfm_size_row; ++r)
- temp_in[r] = buf[r * txfm_size_col + c];
- } else {
- // flip left right
- for (r = 0; r < txfm_size_row; ++r)
- temp_in[r] = buf[r * txfm_size_col + (txfm_size_col - c - 1)];
- }
- col_txfm(temp_in, temp_out, cos_bit_col, stage_range);
- av1_round_shift_array(temp_out, txfm_size_row, -shift[1]);
-
- if (ud_flip == 0) {
- for (r = 0; r < txfm_size_row; ++r) {
- output[r * stride + c] =
- highbd_clip_pixel_add(output[r * stride + c], temp_out[r], bd);
- }
- } else {
- // flip upside down
- for (r = 0; r < txfm_size_row; ++r) {
- output[r * stride + c] = highbd_clip_pixel_add(
- output[r * stride + c], temp_out[txfm_size_row - r - 1], bd);
- }
- }
- }
-}
-
-void lowbd_inv_txfm2d_add_8x4_neon(const int32_t *input, uint8_t *output,
- int stride, TX_TYPE tx_type, TX_SIZE tx_size,
- int eob) {
- (void)eob;
- DECLARE_ALIGNED(32, int, txfm_buf[8 * 4 + 8 + 8]);
- int32_t *temp_in = txfm_buf;
-
- const int8_t *shift = inv_txfm_shift_ls[tx_size];
- const int txw_idx = get_txw_idx(tx_size);
- const int txh_idx = get_txh_idx(tx_size);
- const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
- const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
- const int txfm_size_col = tx_size_wide[tx_size];
- const int txfm_size_row = tx_size_high[tx_size];
- const int buf_offset = AOMMAX(txfm_size_row, txfm_size_col);
- int32_t *temp_out = temp_in + buf_offset;
- int32_t *buf = temp_out + buf_offset;
- int32_t *buf_ptr = buf;
- const int8_t stage_range[MAX_TXFM_STAGE_NUM] = { 16 };
- int r, bd = 8;
- const transform_1d_neon row_txfm =
- lowbd_txfm_all_1d_arr[txw_idx][hitx_1d_tab[tx_type]];
- const transform_1d_neon col_txfm =
- lowbd_txfm_all_1d_arr[txh_idx][vitx_1d_tab[tx_type]];
-
- int ud_flip, lr_flip;
- get_flip_cfg(tx_type, &ud_flip, &lr_flip);
-
- for (int i = 0; i < txfm_size_row; i++) {
- for (int j = 0; j < txfm_size_col; j++)
- temp_in[j] = round_shift((int64_t)input[j] * NewInvSqrt2, NewSqrt2Bits);
-
- row_txfm(temp_in, buf_ptr, cos_bit_row, stage_range);
- input += txfm_size_col;
- buf_ptr += txfm_size_col;
- }
-
- for (int c = 0; c < txfm_size_col; ++c) {
- if (lr_flip == 0) {
- for (r = 0; r < txfm_size_row; ++r)
- temp_in[r] = buf[r * txfm_size_col + c];
- } else {
- // flip left right
- for (r = 0; r < txfm_size_row; ++r)
- temp_in[r] = buf[r * txfm_size_col + (txfm_size_col - c - 1)];
- }
- col_txfm(temp_in, temp_out, cos_bit_col, stage_range);
- av1_round_shift_array(temp_out, txfm_size_row, -shift[1]);
-
- if (ud_flip == 0) {
- for (r = 0; r < txfm_size_row; ++r) {
- output[r * stride + c] =
- highbd_clip_pixel_add(output[r * stride + c], temp_out[r], bd);
- }
- } else {
- // flip upside down
- for (r = 0; r < txfm_size_row; ++r) {
- output[r * stride + c] = highbd_clip_pixel_add(
- output[r * stride + c], temp_out[txfm_size_row - r - 1], bd);
- }
- }
- }
-}
-
-void lowbd_inv_txfm2d_add_4x16_neon(const int32_t *input, uint8_t *output,
- int stride, TX_TYPE tx_type,
- TX_SIZE tx_size, int eob) {
- (void)eob;
- DECLARE_ALIGNED(32, int, txfm_buf[4 * 16 + 16 + 16]);
- int32_t *temp_in = txfm_buf;
-
- const int8_t *shift = inv_txfm_shift_ls[tx_size];
- const int txw_idx = get_txw_idx(tx_size);
- const int txh_idx = get_txh_idx(tx_size);
- const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
- const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
- const int txfm_size_col = tx_size_wide[tx_size];
- const int txfm_size_row = tx_size_high[tx_size];
- const int buf_offset = AOMMAX(txfm_size_row, txfm_size_col);
- int32_t *temp_out = temp_in + buf_offset;
- int32_t *buf = temp_out + buf_offset;
- int32_t *buf_ptr = buf;
- const int8_t stage_range[MAX_TXFM_STAGE_NUM] = { 16 };
- int r, bd = 8;
- const transform_1d_neon row_txfm =
- lowbd_txfm_all_1d_arr[txw_idx][hitx_1d_tab[tx_type]];
- const transform_1d_neon col_txfm =
- lowbd_txfm_all_1d_arr[txh_idx][vitx_1d_tab[tx_type]];
-
- int ud_flip, lr_flip;
- get_flip_cfg(tx_type, &ud_flip, &lr_flip);
-
- for (int i = 0; i < txfm_size_row; i++) {
- row_txfm(input, buf_ptr, cos_bit_row, stage_range);
- av1_round_shift_array(buf_ptr, txfm_size_col, -shift[0]);
- input += txfm_size_col;
- buf_ptr += txfm_size_col;
- }
-
- for (int c = 0; c < txfm_size_col; ++c) {
- if (lr_flip == 0) {
- for (r = 0; r < txfm_size_row; ++r)
- temp_in[r] = buf[r * txfm_size_col + c];
- } else {
- // flip left right
- for (r = 0; r < txfm_size_row; ++r)
- temp_in[r] = buf[r * txfm_size_col + (txfm_size_col - c - 1)];
- }
- col_txfm(temp_in, temp_out, cos_bit_col, stage_range);
- av1_round_shift_array(temp_out, txfm_size_row, -shift[1]);
-
- if (ud_flip == 0) {
- for (r = 0; r < txfm_size_row; ++r) {
- output[r * stride + c] =
- highbd_clip_pixel_add(output[r * stride + c], temp_out[r], bd);
- }
- } else {
- // flip upside down
- for (r = 0; r < txfm_size_row; ++r) {
- output[r * stride + c] = highbd_clip_pixel_add(
- output[r * stride + c], temp_out[txfm_size_row - r - 1], bd);
- }
- }
- }
-}
-
-void lowbd_inv_txfm2d_add_16x4_neon(const int32_t *input, uint8_t *output,
- int stride, TX_TYPE tx_type,
- TX_SIZE tx_size, int eob) {
- (void)eob;
-
- DECLARE_ALIGNED(32, int, txfm_buf[16 * 4 + 16 + 16]);
- int32_t *temp_in = txfm_buf;
-
- const int8_t *shift = inv_txfm_shift_ls[tx_size];
- const int txw_idx = get_txw_idx(tx_size);
- const int txh_idx = get_txh_idx(tx_size);
- const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
- const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
- const int txfm_size_col = tx_size_wide[tx_size];
- const int txfm_size_row = tx_size_high[tx_size];
- const int buf_offset = AOMMAX(txfm_size_row, txfm_size_col);
- int32_t *temp_out = temp_in + buf_offset;
- int32_t *buf = temp_out + buf_offset;
- int32_t *buf_ptr = buf;
- const int8_t stage_range[MAX_TXFM_STAGE_NUM] = { 16 };
- int r, bd = 8;
- const transform_1d_neon row_txfm =
- lowbd_txfm_all_1d_arr[txw_idx][hitx_1d_tab[tx_type]];
- const transform_1d_neon col_txfm =
- lowbd_txfm_all_1d_arr[txh_idx][vitx_1d_tab[tx_type]];
-
- int ud_flip, lr_flip;
- get_flip_cfg(tx_type, &ud_flip, &lr_flip);
-
- for (int i = 0; i < txfm_size_row; i++) {
- row_txfm(input, buf_ptr, cos_bit_row, stage_range);
- av1_round_shift_array(buf_ptr, txfm_size_col, -shift[0]);
- input += txfm_size_col;
- buf_ptr += txfm_size_col;
- }
-
- for (int c = 0; c < txfm_size_col; ++c) {
- if (lr_flip == 0) {
- for (r = 0; r < txfm_size_row; ++r)
- temp_in[r] = buf[r * txfm_size_col + c];
- } else {
- // flip left right
- for (r = 0; r < txfm_size_row; ++r)
- temp_in[r] = buf[r * txfm_size_col + (txfm_size_col - c - 1)];
- }
- col_txfm(temp_in, temp_out, cos_bit_col, stage_range);
- av1_round_shift_array(temp_out, txfm_size_row, -shift[1]);
-
- if (ud_flip == 0) {
- for (r = 0; r < txfm_size_row; ++r) {
- output[r * stride + c] =
- highbd_clip_pixel_add(output[r * stride + c], temp_out[r], bd);
- }
- } else {
- // flip upside down
- for (r = 0; r < txfm_size_row; ++r) {
- output[r * stride + c] = highbd_clip_pixel_add(
- output[r * stride + c], temp_out[txfm_size_row - r - 1], bd);
- }
- }
- }
-}
-
-static INLINE void lowbd_inv_txfm2d_add_wxh_no_identity_neon(
- const int32_t *input, uint8_t *output, int stride, TX_TYPE tx_type,
- TX_SIZE tx_size, int eob) {
- DECLARE_ALIGNED(32, int, txfm_buf[64 * 64 + 64 + 64]);
- int32_t *temp_in = txfm_buf;
-
- int eobx, eoby, ud_flip, lr_flip, row_start;
- get_eobx_eoby_scan_default(&eobx, &eoby, tx_size, eob);
- const int8_t *shift = inv_txfm_shift_ls[tx_size];
- const int txw_idx = get_txw_idx(tx_size);
- const int txh_idx = get_txh_idx(tx_size);
- const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
- const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
- const int txfm_size_col = tx_size_wide[tx_size];
- const int txfm_size_row = tx_size_high[tx_size];
- const int buf_size_nonzero_h_div8 = (eoby + 8) >> 3;
- const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
- const int buf_offset = AOMMAX(txfm_size_row, txfm_size_col);
-
- int32_t *temp_out = temp_in + buf_offset;
- int32_t *buf = temp_out + buf_offset;
- int32_t *buf_ptr = buf;
- const int8_t stage_range[MAX_TXFM_STAGE_NUM] = { 16 };
- const int bd = 8;
- int r;
-
- const int fun_idx_x = lowbd_txfm_all_1d_zeros_idx[eobx];
- const int fun_idx_y = lowbd_txfm_all_1d_zeros_idx[eoby];
- const transform_1d_neon row_txfm =
- lowbd_txfm_all_1d_zeros_w8_arr[txw_idx][hitx_1d_tab[tx_type]][fun_idx_x];
- const transform_1d_neon col_txfm =
- lowbd_txfm_all_1d_zeros_w8_arr[txh_idx][vitx_1d_tab[tx_type]][fun_idx_y];
-
- assert(col_txfm != NULL);
- assert(row_txfm != NULL);
-
- get_flip_cfg(tx_type, &ud_flip, &lr_flip);
- row_start = (buf_size_nonzero_h_div8 << 3);
-
- for (int i = 0; i < row_start; i++) {
- if (abs(rect_type) == 1) {
- for (int j = 0; j < txfm_size_col; j++)
- temp_in[j] = round_shift((int64_t)input[j] * NewInvSqrt2, NewSqrt2Bits);
- row_txfm(temp_in, buf_ptr, cos_bit_row, stage_range);
- } else {
- row_txfm(input, buf_ptr, cos_bit_row, stage_range);
- }
- av1_round_shift_array(buf_ptr, txfm_size_col, -shift[0]);
- input += txfm_size_col;
- buf_ptr += txfm_size_col;
- }
-
- // Doing memset for the rows which are not processed in row transform.
- memset(buf_ptr, 0,
- sizeof(int32_t) * txfm_size_col * (txfm_size_row - row_start));
-
- for (int c = 0; c < txfm_size_col; c++) {
- if (lr_flip == 0) {
- for (r = 0; r < txfm_size_row; ++r)
- temp_in[r] = buf[r * txfm_size_col + c];
- } else {
- // flip left right
- for (r = 0; r < txfm_size_row; ++r)
- temp_in[r] = buf[r * txfm_size_col + (txfm_size_col - c - 1)];
- }
- col_txfm(temp_in, temp_out, cos_bit_col, stage_range);
- av1_round_shift_array(temp_out, txfm_size_row, -shift[1]);
-
- if (ud_flip == 0) {
- for (r = 0; r < txfm_size_row; ++r) {
- output[r * stride + c] =
- highbd_clip_pixel_add(output[r * stride + c], temp_out[r], bd);
- }
- } else {
- // flip upside down
- for (r = 0; r < txfm_size_row; ++r) {
- output[r * stride + c] = highbd_clip_pixel_add(
- output[r * stride + c], temp_out[txfm_size_row - r - 1], bd);
- }
- }
- }
-}
-
-static INLINE void lowbd_inv_txfm2d_add_no_identity_neon(
- const int32_t *input, uint8_t *output, int stride, TX_TYPE tx_type,
- TX_SIZE tx_size, int eob) {
- int16x8_t a[64 * 8];
- int16x8_t b[64 * 8];
- int eobx, eoby, ud_flip, lr_flip;
- get_eobx_eoby_scan_default(&eobx, &eoby, tx_size, eob);
- const int8_t *shift = inv_txfm_shift_ls[tx_size];
- const int txw_idx = get_txw_idx(tx_size);
- const int txh_idx = get_txh_idx(tx_size);
- const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
- const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
- const int txfm_size_col = tx_size_wide[tx_size];
- const int txfm_size_row = tx_size_high[tx_size];
- const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
- const int buf_size_w_div8 = txfm_size_col >> 3;
- const int buf_size_nonzero_h_div8 = (eoby + 8) >> 3;
- const int buf_size_nonzero_w_div8 = (eobx + 8) >> 3;
- const int fun_idx_x = lowbd_txfm_all_1d_zeros_idx[eobx];
- const int fun_idx_y = lowbd_txfm_all_1d_zeros_idx[eoby];
- const int32_t *input_1;
- int temp_b = 0;
-
- const transform_neon row_txfm =
- lowbd_txfm_all_1d_zeros_w_arr[txw_idx][hitx_1d_tab[tx_type]][fun_idx_x];
- const transform_neon col_txfm =
- lowbd_txfm_all_1d_zeros_w_arr[txh_idx][vitx_1d_tab[tx_type]][fun_idx_y];
-
- assert(col_txfm != NULL);
- assert(row_txfm != NULL);
-
- get_flip_cfg(tx_type, &ud_flip, &lr_flip);
-
- for (int i = 0; i < buf_size_nonzero_h_div8; i++) {
- input_1 = input;
- for (int j = 0; j < buf_size_nonzero_w_div8; ++j) {
- int k = j * 8 + i * txfm_size_col;
- load_buffer_32bit_to_16bit_neon(input_1, &a[k], txfm_size_col);
- transpose_s16_8x8q(&a[k], &a[k]);
- input_1 += 8;
- }
- input += (txfm_size_col * 8);
- if (abs(rect_type) == 1) {
- int y = i * txfm_size_col;
- round_shift_for_rect(&a[y], &a[y], txfm_size_col);
- }
- row_txfm(&a[i * txfm_size_col], &a[i * txfm_size_col], cos_bit_row, 0);
- av1_round_shift_array_16_neon(&a[i * txfm_size_col], txfm_size_col,
- -shift[0]);
- if (lr_flip == 1) {
- for (int j = 0; j < buf_size_w_div8; ++j) {
- int k = j * 8 + i * txfm_size_col;
- flip_buf_ud_neon(&a[k], 8);
- transpose_s16_8x8q(
- &a[k], &b[temp_b + txfm_size_row * (buf_size_w_div8 - 1 - j)]);
- }
- temp_b += 8;
- } else {
- for (int j = 0; j < buf_size_w_div8; ++j) {
- int k = j * 8 + i * txfm_size_col;
- transpose_s16_8x8q(&a[k], &b[temp_b + txfm_size_row * j]);
- }
- temp_b += 8;
- }
- }
- for (int j = 0; j < buf_size_w_div8; ++j) {
- col_txfm(&b[j * txfm_size_row], &b[j * txfm_size_row], cos_bit_col, 0);
- av1_round_shift_array_16_neon(&b[j * txfm_size_row], txfm_size_row,
- -shift[1]);
- }
-
- if (txfm_size_col >= 16) {
- for (int i = 0; i < (txfm_size_col >> 4); i++) {
- lowbd_add_flip_buffer_16xn_neon(&b[i * txfm_size_row * 2],
- output + 16 * i, stride, ud_flip,
- txfm_size_row);
- }
- } else if (txfm_size_col == 8) {
- lowbd_add_flip_buffer_8xn_neon(b, output, stride, ud_flip, txfm_size_row);
- }
-}
-
-static INLINE void lowbd_inv_txfm2d_add_wxh_universe_neon(
- const int32_t *input, uint8_t *output, int stride, TX_TYPE tx_type,
- TX_SIZE tx_size, int eob) {
- switch (tx_type) {
- case IDTX:
- lowbd_inv_txfm2d_add_wxh_idtx_neon(input, output, stride, tx_type,
- tx_size, eob);
- break;
-
- case H_DCT:
- case H_ADST:
- case H_FLIPADST:
- lowbd_inv_txfm2d_add_v_wxh_identity_neon(input, output, stride, tx_type,
- tx_size, eob);
- break;
-
- case V_DCT:
- case V_ADST:
- case V_FLIPADST:
- lowbd_inv_txfm2d_add_h_wxh_identity_neon(input, output, stride, tx_type,
- tx_size, eob);
- break;
-
- default:
- lowbd_inv_txfm2d_add_wxh_no_identity_neon(input, output, stride, tx_type,
- tx_size, eob);
- break;
- }
-}
-
-static INLINE void lowbd_inv_txfm2d_add_universe_neon(
- const int32_t *input, uint8_t *output, int stride, TX_TYPE tx_type,
- TX_SIZE tx_size, int eob) {
- switch (tx_type) {
- case IDTX:
- lowbd_inv_txfm2d_add_idtx_neon(input, output, stride, tx_type, tx_size,
- eob);
- break;
-
- case H_DCT:
- case H_ADST:
- case H_FLIPADST:
- lowbd_inv_txfm2d_add_v_identity_neon(input, output, stride, tx_type,
- tx_size, eob);
- break;
-
- case V_DCT:
- case V_ADST:
- case V_FLIPADST:
- lowbd_inv_txfm2d_add_h_identity_neon(input, output, stride, tx_type,
- tx_size, eob);
- break;
-
- default:
- lowbd_inv_txfm2d_add_no_identity_neon(input, output, stride, tx_type,
- tx_size, eob);
- break;
- }
-}
-
-void av1_lowbd_inv_txfm2d_add_neon(const int32_t *input, uint8_t *output,
- int stride, TX_TYPE tx_type, TX_SIZE tx_size,
- int eob) {
- int row;
- switch (tx_size) {
- case TX_4X4:
- lowbd_inv_txfm2d_add_4x4_neon(input, output, stride, tx_type, tx_size,
- eob);
- break;
-
- case TX_4X8:
- lowbd_inv_txfm2d_add_4x8_neon(input, output, stride, tx_type, tx_size,
- eob);
- break;
-
- case TX_8X4:
- lowbd_inv_txfm2d_add_8x4_neon(input, output, stride, tx_type, tx_size,
- eob);
- break;
-
- case TX_4X16:
- lowbd_inv_txfm2d_add_4x16_neon(input, output, stride, tx_type, tx_size,
- eob);
- break;
-
- case TX_16X4:
- lowbd_inv_txfm2d_add_16x4_neon(input, output, stride, tx_type, tx_size,
- eob);
- break;
-
- case TX_16X64: {
- lowbd_inv_txfm2d_add_wxh_universe_neon(input, output, stride, tx_type,
- tx_size, eob);
- } break;
-
- case TX_64X16: {
- int32_t mod_input[64 * 16];
- for (row = 0; row < 16; ++row) {
- memcpy(mod_input + row * 64, input + row * 32, 32 * sizeof(*mod_input));
- memset(mod_input + row * 64 + 32, 0, 32 * sizeof(*mod_input));
- }
- lowbd_inv_txfm2d_add_wxh_universe_neon(mod_input, output, stride, tx_type,
- tx_size, eob);
- } break;
-
- case TX_32X64: {
- lowbd_inv_txfm2d_add_wxh_universe_neon(input, output, stride, tx_type,
- tx_size, eob);
- } break;
-
- case TX_64X32: {
- int32_t mod_input[64 * 32];
- for (row = 0; row < 32; ++row) {
- memcpy(mod_input + row * 64, input + row * 32, 32 * sizeof(*mod_input));
- memset(mod_input + row * 64 + 32, 0, 32 * sizeof(*mod_input));
- }
- lowbd_inv_txfm2d_add_wxh_universe_neon(mod_input, output, stride, tx_type,
- tx_size, eob);
- } break;
-
- case TX_64X64: {
- int32_t mod_input[64 * 64];
- for (row = 0; row < 32; ++row) {
- memcpy(mod_input + row * 64, input + row * 32, 32 * sizeof(*mod_input));
- memset(mod_input + row * 64 + 32, 0, 32 * sizeof(*mod_input));
- }
- lowbd_inv_txfm2d_add_wxh_universe_neon(mod_input, output, stride, tx_type,
- tx_size, eob);
- } break;
-
- default:
- lowbd_inv_txfm2d_add_universe_neon(input, output, stride, tx_type,
- tx_size, eob);
- break;
- }
-}
-void av1_inv_txfm_add_neon(const tran_low_t *dqcoeff, uint8_t *dst, int stride,
- const TxfmParam *txfm_param) {
- const TX_TYPE tx_type = txfm_param->tx_type;
- if (!txfm_param->lossless) {
- av1_lowbd_inv_txfm2d_add_neon(dqcoeff, dst, stride, tx_type,
- txfm_param->tx_size, txfm_param->eob);
- } else {
- av1_inv_txfm_add_c(dqcoeff, dst, stride, txfm_param);
- }
-}
diff --git a/third_party/aom/av1/common/arm/av1_inv_txfm_neon.h b/third_party/aom/av1/common/arm/av1_inv_txfm_neon.h
deleted file mode 100644
index 9ec658291..000000000
--- a/third_party/aom/av1/common/arm/av1_inv_txfm_neon.h
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#ifndef AOM_AV1_COMMON_ARM_AV1_INV_TXFM_NEON_H_
-#define AOM_AV1_COMMON_ARM_AV1_INV_TXFM_NEON_H_
-
-#include "config/aom_config.h"
-#include "config/av1_rtcd.h"
-
-#include "aom/aom_integer.h"
-#include "av1/common/enums.h"
-#include "av1/common/av1_inv_txfm1d.h"
-#include "av1/common/av1_inv_txfm1d_cfg.h"
-#include "av1/common/av1_txfm.h"
-
-typedef void (*transform_1d_neon)(const int32_t *input, int32_t *output,
- const int8_t cos_bit,
- const int8_t *stage_ptr);
-typedef void (*transform_neon)(int16x8_t *input, int16x8_t *output,
- int8_t cos_bit, int bit);
-
-DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_8x8_default[8]) = {
- 0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0707,
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- av1_eob_to_eobxy_16x16_default[16]) = {
- 0x0707, 0x0707, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f,
- 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f,
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- av1_eob_to_eobxy_32x32_default[32]) = {
- 0x0707, 0x0f0f, 0x0f0f, 0x0f0f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f,
- 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f,
- 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f,
- 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_8x16_default[16]) = {
- 0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0f07, 0x0f07, 0x0f07,
- 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_16x8_default[8]) = {
- 0x0707, 0x0707, 0x070f, 0x070f, 0x070f, 0x070f, 0x070f, 0x070f,
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- av1_eob_to_eobxy_16x32_default[32]) = {
- 0x0707, 0x0707, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f,
- 0x0f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f,
- 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f,
- 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f,
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- av1_eob_to_eobxy_32x16_default[16]) = {
- 0x0707, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f,
- 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_8x32_default[32]) = {
- 0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0f07, 0x0f07, 0x0f07,
- 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x1f07, 0x1f07, 0x1f07,
- 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07,
- 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_32x8_default[8]) = {
- 0x0707, 0x070f, 0x070f, 0x071f, 0x071f, 0x071f, 0x071f, 0x071f,
-};
-
-DECLARE_ALIGNED(16, static const int16_t *,
- av1_eob_to_eobxy_default[TX_SIZES_ALL]) = {
- NULL,
- av1_eob_to_eobxy_8x8_default,
- av1_eob_to_eobxy_16x16_default,
- av1_eob_to_eobxy_32x32_default,
- av1_eob_to_eobxy_32x32_default,
- NULL,
- NULL,
- av1_eob_to_eobxy_8x16_default,
- av1_eob_to_eobxy_16x8_default,
- av1_eob_to_eobxy_16x32_default,
- av1_eob_to_eobxy_32x16_default,
- av1_eob_to_eobxy_32x32_default,
- av1_eob_to_eobxy_32x32_default,
- NULL,
- NULL,
- av1_eob_to_eobxy_8x32_default,
- av1_eob_to_eobxy_32x8_default,
- av1_eob_to_eobxy_16x32_default,
- av1_eob_to_eobxy_32x16_default,
-};
-
-static const int lowbd_txfm_all_1d_zeros_idx[32] = {
- 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-};
-
-// Transform block width in log2 for eob (size of 64 map to 32)
-static const int tx_size_wide_log2_eob[TX_SIZES_ALL] = {
- 2, 3, 4, 5, 5, 2, 3, 3, 4, 4, 5, 5, 5, 2, 4, 3, 5, 4, 5,
-};
-
-static int eob_fill[32] = {
- 0, 7, 7, 7, 7, 7, 7, 7, 15, 15, 15, 15, 15, 15, 15, 15,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
-};
-
-static INLINE void get_eobx_eoby_scan_default(int *eobx, int *eoby,
- TX_SIZE tx_size, int eob) {
- if (eob == 1) {
- *eobx = 0;
- *eoby = 0;
- return;
- }
-
- const int tx_w_log2 = tx_size_wide_log2_eob[tx_size];
- const int eob_row = (eob - 1) >> tx_w_log2;
- const int eobxy = av1_eob_to_eobxy_default[tx_size][eob_row];
- *eobx = eobxy & 0xFF;
- *eoby = eobxy >> 8;
-}
-
-static INLINE void get_eobx_eoby_scan_v_identity(int *eobx, int *eoby,
- TX_SIZE tx_size, int eob) {
- eob -= 1;
- const int txfm_size_row = tx_size_high[tx_size];
- const int eoby_max = AOMMIN(32, txfm_size_row) - 1;
- *eobx = eob / (eoby_max + 1);
- *eoby = (eob >= eoby_max) ? eoby_max : eob_fill[eob];
-}
-
-static INLINE void get_eobx_eoby_scan_h_identity(int *eobx, int *eoby,
- TX_SIZE tx_size, int eob) {
- eob -= 1;
- const int txfm_size_col = tx_size_wide[tx_size];
- const int eobx_max = AOMMIN(32, txfm_size_col) - 1;
- *eobx = (eob >= eobx_max) ? eobx_max : eob_fill[eob];
- const int temp_eoby = eob / (eobx_max + 1);
- assert(temp_eoby < 32);
- *eoby = eob_fill[temp_eoby];
-}
-
-#endif // AOM_AV1_COMMON_ARM_AV1_INV_TXFM_NEON_H_
diff --git a/third_party/aom/av1/common/arm/av1_txfm_neon.c b/third_party/aom/av1/common/arm/av1_txfm_neon.c
deleted file mode 100644
index de3c54724..000000000
--- a/third_party/aom/av1/common/arm/av1_txfm_neon.c
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- *
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#include <arm_neon.h>
-#include <assert.h>
-
-#include "aom_ports/mem.h"
-#include "av1/common/arm/mem_neon.h"
-
-void av1_round_shift_array_neon(int32_t *arr, int size, int bit) {
- assert(!(size % 4));
- if (!bit) return;
- const int32x4_t dup_bits_n_32x4 = vdupq_n_s32((int32_t)(-bit));
- for (int i = 0; i < size; i += 4) {
- int32x4_t tmp_q_s32 = vld1q_s32(arr);
- tmp_q_s32 = vrshlq_s32(tmp_q_s32, dup_bits_n_32x4);
- vst1q_s32(arr, tmp_q_s32);
- arr += 4;
- }
-}
diff --git a/third_party/aom/av1/common/arm/blend_a64_hmask_neon.c b/third_party/aom/av1/common/arm/blend_a64_hmask_neon.c
deleted file mode 100644
index 7134f183e..000000000
--- a/third_party/aom/av1/common/arm/blend_a64_hmask_neon.c
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- *
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <arm_neon.h>
-#include <assert.h>
-
-#include "aom/aom_integer.h"
-#include "aom_dsp/blend.h"
-#include "aom_ports/mem.h"
-#include "av1/common/arm/mem_neon.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "config/aom_dsp_rtcd.h"
-
-void aom_blend_a64_hmask_neon(uint8_t *dst, uint32_t dst_stride,
- const uint8_t *src0, uint32_t src0_stride,
- const uint8_t *src1, uint32_t src1_stride,
- const uint8_t *mask, int w, int h) {
- assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
- assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
-
- assert(h >= 2);
- assert(w >= 2);
- assert(IS_POWER_OF_TWO(h));
- assert(IS_POWER_OF_TWO(w));
- uint8x8_t tmp0, tmp1;
- uint8x16_t res_q;
- uint16x8_t res, res_low, res_high;
- uint32x2_t tmp0_32 = vdup_n_u32(0), tmp1_32 = vdup_n_u32(0);
- uint16x4_t tmp0_16 = vdup_n_u16(0), tmp1_16 = vdup_n_u16(0);
- const uint8x8_t vdup_64 = vdup_n_u8((uint8_t)64);
-
- if (w >= 16) {
- const uint8x16_t vdup_64_q = vdupq_n_u8((uint8_t)64);
- for (int i = 0; i < h; ++i) {
- for (int j = 0; j < w; j += 16) {
- __builtin_prefetch(src0);
- __builtin_prefetch(src1);
- const uint8x16_t tmp0_q = vld1q_u8(src0);
- const uint8x16_t tmp1_q = vld1q_u8(src1);
- const uint8x16_t m_q = vld1q_u8(mask);
- const uint8x16_t max_minus_m_q = vsubq_u8(vdup_64_q, m_q);
- res_low = vmull_u8(vget_low_u8(m_q), vget_low_u8(tmp0_q));
- res_low =
- vmlal_u8(res_low, vget_low_u8(max_minus_m_q), vget_low_u8(tmp1_q));
- res_high = vmull_u8(vget_high_u8(m_q), vget_high_u8(tmp0_q));
- res_high = vmlal_u8(res_high, vget_high_u8(max_minus_m_q),
- vget_high_u8(tmp1_q));
- res_q = vcombine_u8(vrshrn_n_u16(res_low, AOM_BLEND_A64_ROUND_BITS),
- vrshrn_n_u16(res_high, AOM_BLEND_A64_ROUND_BITS));
- vst1q_u8(dst, res_q);
- src0 += 16;
- src1 += 16;
- dst += 16;
- mask += 16;
- }
- src0 += src0_stride - w;
- src1 += src1_stride - w;
- dst += dst_stride - w;
- mask -= w;
- }
- } else if (w == 8) {
- const uint8x8_t m = vld1_u8(mask);
- const uint8x8_t max_minus_m = vsub_u8(vdup_64, m);
- for (int i = 0; i < h; ++i) {
- __builtin_prefetch(src0);
- __builtin_prefetch(src1);
- tmp0 = vld1_u8(src0);
- tmp1 = vld1_u8(src1);
- res = vmull_u8(m, tmp0);
- res = vmlal_u8(res, max_minus_m, tmp1);
- vst1_u8(dst, vrshrn_n_u16(res, AOM_BLEND_A64_ROUND_BITS));
- src0 += src0_stride;
- src1 += src1_stride;
- dst += dst_stride;
- }
- } else if (w == 4) {
- const uint8x8_t m = vreinterpret_u8_u32(vld1_dup_u32((uint32_t *)mask));
- const uint8x8_t max_minus_m = vsub_u8(vdup_64, m);
- for (int i = 0; i < h; i += 2) {
- __builtin_prefetch(src0 + 0 * src0_stride);
- __builtin_prefetch(src0 + 1 * src0_stride);
- __builtin_prefetch(src1 + 0 * src1_stride);
- __builtin_prefetch(src1 + 1 * src1_stride);
- load_unaligned_u8_4x2(src0, src0_stride, &tmp0_32);
- tmp0 = vreinterpret_u8_u32(tmp0_32);
- load_unaligned_u8_4x2(src1, src1_stride, &tmp1_32);
- tmp1 = vreinterpret_u8_u32(tmp1_32);
- res = vmull_u8(m, tmp0);
- res = vmlal_u8(res, max_minus_m, tmp1);
- vst1_lane_u32(
- (uint32_t *)(dst + (0 * dst_stride)),
- vreinterpret_u32_u8(vrshrn_n_u16(res, AOM_BLEND_A64_ROUND_BITS)), 0);
- vst1_lane_u32(
- (uint32_t *)(dst + (1 * dst_stride)),
- vreinterpret_u32_u8(vrshrn_n_u16(res, AOM_BLEND_A64_ROUND_BITS)), 1);
- src0 += (2 * src0_stride);
- src1 += (2 * src1_stride);
- dst += (2 * dst_stride);
- }
- } else if (w == 2) {
- const uint8x8_t m = vreinterpret_u8_u16(vld1_dup_u16((uint16_t *)mask));
- const uint8x8_t max_minus_m = vsub_u8(vdup_64, m);
- for (int i = 0; i < h; i += 2) {
- __builtin_prefetch(src0 + 0 * src0_stride);
- __builtin_prefetch(src0 + 1 * src0_stride);
- __builtin_prefetch(src1 + 0 * src1_stride);
- __builtin_prefetch(src1 + 1 * src1_stride);
- load_unaligned_u8_2x2(src0, src0_stride, &tmp0_16);
- tmp0 = vreinterpret_u8_u16(tmp0_16);
- load_unaligned_u8_2x2(src1, src1_stride, &tmp1_16);
- tmp1 = vreinterpret_u8_u16(tmp1_16);
- res = vmull_u8(m, tmp0);
- res = vmlal_u8(res, max_minus_m, tmp1);
- vst1_lane_u16(
- (uint16_t *)(dst + (0 * dst_stride)),
- vreinterpret_u16_u8(vrshrn_n_u16(res, AOM_BLEND_A64_ROUND_BITS)), 0);
- vst1_lane_u16(
- (uint16_t *)(dst + (1 * dst_stride)),
- vreinterpret_u16_u8(vrshrn_n_u16(res, AOM_BLEND_A64_ROUND_BITS)), 1);
- src0 += (2 * src0_stride);
- src1 += (2 * src1_stride);
- dst += (2 * dst_stride);
- }
- }
-}
diff --git a/third_party/aom/av1/common/arm/blend_a64_vmask_neon.c b/third_party/aom/av1/common/arm/blend_a64_vmask_neon.c
deleted file mode 100644
index 194e94c8c..000000000
--- a/third_party/aom/av1/common/arm/blend_a64_vmask_neon.c
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- *
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <arm_neon.h>
-#include <assert.h>
-
-#include "aom/aom_integer.h"
-#include "aom_dsp/blend.h"
-#include "aom_ports/mem.h"
-#include "av1/common/arm/mem_neon.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "config/aom_dsp_rtcd.h"
-
-void aom_blend_a64_vmask_neon(uint8_t *dst, uint32_t dst_stride,
- const uint8_t *src0, uint32_t src0_stride,
- const uint8_t *src1, uint32_t src1_stride,
- const uint8_t *mask, int w, int h) {
- uint8x8_t tmp0, tmp1;
- uint8x16_t tmp0_q, tmp1_q, res_q;
- uint16x8_t res, res_low, res_high;
- uint32x2_t tmp0_32 = vdup_n_u32(0), tmp1_32 = vdup_n_u32(0);
- uint16x4_t tmp0_16 = vdup_n_u16(0), tmp1_16 = vdup_n_u16(0);
- assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
- assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
-
- assert(h >= 2);
- assert(w >= 2);
- assert(IS_POWER_OF_TWO(h));
- assert(IS_POWER_OF_TWO(w));
-
- if (w >= 16) {
- for (int i = 0; i < h; ++i) {
- const uint8x8_t m = vdup_n_u8((uint8_t)mask[i]);
- const uint8x8_t max_minus_m = vdup_n_u8(64 - (uint8_t)mask[i]);
- for (int j = 0; j < w; j += 16) {
- __builtin_prefetch(src0);
- __builtin_prefetch(src1);
- tmp0_q = vld1q_u8(src0);
- tmp1_q = vld1q_u8(src1);
- res_low = vmull_u8(m, vget_low_u8(tmp0_q));
- res_low = vmlal_u8(res_low, max_minus_m, vget_low_u8(tmp1_q));
- res_high = vmull_u8(m, vget_high_u8(tmp0_q));
- res_high = vmlal_u8(res_high, max_minus_m, vget_high_u8(tmp1_q));
- res_q = vcombine_u8(vrshrn_n_u16(res_low, AOM_BLEND_A64_ROUND_BITS),
- vrshrn_n_u16(res_high, AOM_BLEND_A64_ROUND_BITS));
- vst1q_u8(dst, res_q);
- src0 += 16;
- src1 += 16;
- dst += 16;
- }
- src0 += src0_stride - w;
- src1 += src1_stride - w;
- dst += dst_stride - w;
- }
- } else if (w == 8) {
- for (int i = 0; i < h; ++i) {
- __builtin_prefetch(src0);
- __builtin_prefetch(src1);
- const uint8x8_t m = vdup_n_u8((uint8_t)mask[i]);
- const uint8x8_t max_minus_m = vdup_n_u8(64 - (uint8_t)mask[i]);
- tmp0 = vld1_u8(src0);
- tmp1 = vld1_u8(src1);
- res = vmull_u8(m, tmp0);
- res = vmlal_u8(res, max_minus_m, tmp1);
- vst1_u8(dst, vrshrn_n_u16(res, AOM_BLEND_A64_ROUND_BITS));
- src0 += src0_stride;
- src1 += src1_stride;
- dst += dst_stride;
- }
- } else if (w == 4) {
- for (int i = 0; i < h; i += 2) {
- __builtin_prefetch(src0 + 0 * src0_stride);
- __builtin_prefetch(src0 + 1 * src0_stride);
- __builtin_prefetch(src1 + 0 * src1_stride);
- __builtin_prefetch(src1 + 1 * src1_stride);
- const uint16x4_t m1 = vdup_n_u16((uint16_t)mask[i]);
- const uint16x4_t m2 = vdup_n_u16((uint16_t)mask[i + 1]);
- const uint8x8_t m = vmovn_u16(vcombine_u16(m1, m2));
- const uint16x4_t max_minus_m1 = vdup_n_u16(64 - (uint16_t)mask[i]);
- const uint16x4_t max_minus_m2 = vdup_n_u16(64 - (uint16_t)mask[i + 1]);
- const uint8x8_t max_minus_m =
- vmovn_u16(vcombine_u16(max_minus_m1, max_minus_m2));
- load_unaligned_u8_4x2(src0, src0_stride, &tmp0_32);
- tmp0 = vreinterpret_u8_u32(tmp0_32);
- load_unaligned_u8_4x2(src1, src1_stride, &tmp1_32);
- tmp1 = vreinterpret_u8_u32(tmp1_32);
- res = vmull_u8(m, tmp0);
- res = vmlal_u8(res, max_minus_m, tmp1);
- vst1_lane_u32(
- (uint32_t *)(dst + (0 * dst_stride)),
- vreinterpret_u32_u8(vrshrn_n_u16(res, AOM_BLEND_A64_ROUND_BITS)), 0);
- vst1_lane_u32(
- (uint32_t *)(dst + (1 * dst_stride)),
- vreinterpret_u32_u8(vrshrn_n_u16(res, AOM_BLEND_A64_ROUND_BITS)), 1);
- src0 += (2 * src0_stride);
- src1 += (2 * src1_stride);
- dst += (2 * dst_stride);
- }
- } else if (w == 2) {
- for (int i = 0; i < h; i += 2) {
- __builtin_prefetch(src0 + 0 * src0_stride);
- __builtin_prefetch(src0 + 1 * src0_stride);
- __builtin_prefetch(src1 + 0 * src1_stride);
- __builtin_prefetch(src1 + 1 * src1_stride);
- const uint8x8_t m1 = vdup_n_u8(mask[i]);
- const uint8x8_t m2 = vdup_n_u8(mask[i + 1]);
- const uint16x4x2_t m_trn =
- vtrn_u16(vreinterpret_u16_u8(m1), vreinterpret_u16_u8(m2));
- const uint8x8_t m = vreinterpret_u8_u16(m_trn.val[0]);
- const uint8x8_t max_minus_m1 = vdup_n_u8(64 - mask[i]);
- const uint8x8_t max_minus_m2 = vdup_n_u8(64 - mask[i + 1]);
- const uint16x4x2_t max_minus_m_trn = vtrn_u16(
- vreinterpret_u16_u8(max_minus_m1), vreinterpret_u16_u8(max_minus_m2));
- const uint8x8_t max_minus_m = vreinterpret_u8_u16(max_minus_m_trn.val[0]);
- load_unaligned_u8_2x2(src0, src0_stride, &tmp0_16);
- tmp0 = vreinterpret_u8_u16(tmp0_16);
- load_unaligned_u8_2x2(src1, src1_stride, &tmp1_16);
- tmp1 = vreinterpret_u8_u16(tmp1_16);
- res = vmull_u8(m, tmp0);
- res = vmlal_u8(res, max_minus_m, tmp1);
- vst1_lane_u16(
- (uint16_t *)(dst + (0 * dst_stride)),
- vreinterpret_u16_u8(vrshrn_n_u16(res, AOM_BLEND_A64_ROUND_BITS)), 0);
- vst1_lane_u16(
- (uint16_t *)(dst + (1 * dst_stride)),
- vreinterpret_u16_u8(vrshrn_n_u16(res, AOM_BLEND_A64_ROUND_BITS)), 1);
- src0 += (2 * src0_stride);
- src1 += (2 * src1_stride);
- dst += (2 * dst_stride);
- }
- }
-}
diff --git a/third_party/aom/av1/common/arm/cfl_neon.c b/third_party/aom/av1/common/arm/cfl_neon.c
deleted file mode 100644
index 39025b5e5..000000000
--- a/third_party/aom/av1/common/arm/cfl_neon.c
+++ /dev/null
@@ -1,584 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#include <arm_neon.h>
-
-#include "config/av1_rtcd.h"
-
-#include "av1/common/cfl.h"
-
-static INLINE void vldsubstq_s16(int16_t *dst, const uint16_t *src, int offset,
- int16x8_t sub) {
- vst1q_s16(dst + offset,
- vsubq_s16(vreinterpretq_s16_u16(vld1q_u16(src + offset)), sub));
-}
-
-static INLINE uint16x8_t vldaddq_u16(const uint16_t *buf, size_t offset) {
- return vaddq_u16(vld1q_u16(buf), vld1q_u16(buf + offset));
-}
-
-// Load half of a vector and duplicated in other half
-static INLINE uint8x8_t vldh_dup_u8(const uint8_t *ptr) {
- return vreinterpret_u8_u32(vld1_dup_u32((const uint32_t *)ptr));
-}
-
-// Store half of a vector.
-static INLINE void vsth_u16(uint16_t *ptr, uint16x4_t val) {
- *((uint32_t *)ptr) = vreinterpret_u32_u16(val)[0];
-}
-
-// Store half of a vector.
-static INLINE void vsth_u8(uint8_t *ptr, uint8x8_t val) {
- *((uint32_t *)ptr) = vreinterpret_u32_u8(val)[0];
-}
-
-static void cfl_luma_subsampling_420_lbd_neon(const uint8_t *input,
- int input_stride,
- uint16_t *pred_buf_q3, int width,
- int height) {
- const uint16_t *end = pred_buf_q3 + (height >> 1) * CFL_BUF_LINE;
- const int luma_stride = input_stride << 1;
- do {
- if (width == 4) {
- const uint16x4_t top = vpaddl_u8(vldh_dup_u8(input));
- const uint16x4_t sum = vpadal_u8(top, vldh_dup_u8(input + input_stride));
- vsth_u16(pred_buf_q3, vshl_n_u16(sum, 1));
- } else if (width == 8) {
- const uint16x4_t top = vpaddl_u8(vld1_u8(input));
- const uint16x4_t sum = vpadal_u8(top, vld1_u8(input + input_stride));
- vst1_u16(pred_buf_q3, vshl_n_u16(sum, 1));
- } else if (width == 16) {
- const uint16x8_t top = vpaddlq_u8(vld1q_u8(input));
- const uint16x8_t sum = vpadalq_u8(top, vld1q_u8(input + input_stride));
- vst1q_u16(pred_buf_q3, vshlq_n_u16(sum, 1));
- } else {
- const uint8x8x4_t top = vld4_u8(input);
- const uint8x8x4_t bot = vld4_u8(input + input_stride);
- // equivalent to a vpaddlq_u8 (because vld4q interleaves)
- const uint16x8_t top_0 = vaddl_u8(top.val[0], top.val[1]);
- // equivalent to a vpaddlq_u8 (because vld4q interleaves)
- const uint16x8_t bot_0 = vaddl_u8(bot.val[0], bot.val[1]);
- // equivalent to a vpaddlq_u8 (because vld4q interleaves)
- const uint16x8_t top_1 = vaddl_u8(top.val[2], top.val[3]);
- // equivalent to a vpaddlq_u8 (because vld4q interleaves)
- const uint16x8_t bot_1 = vaddl_u8(bot.val[2], bot.val[3]);
- uint16x8x2_t sum;
- sum.val[0] = vshlq_n_u16(vaddq_u16(top_0, bot_0), 1);
- sum.val[1] = vshlq_n_u16(vaddq_u16(top_1, bot_1), 1);
- vst2q_u16(pred_buf_q3, sum);
- }
- input += luma_stride;
- } while ((pred_buf_q3 += CFL_BUF_LINE) < end);
-}
-
-static void cfl_luma_subsampling_422_lbd_neon(const uint8_t *input,
- int input_stride,
- uint16_t *pred_buf_q3, int width,
- int height) {
- const uint16_t *end = pred_buf_q3 + height * CFL_BUF_LINE;
- do {
- if (width == 4) {
- const uint16x4_t top = vpaddl_u8(vldh_dup_u8(input));
- vsth_u16(pred_buf_q3, vshl_n_u16(top, 2));
- } else if (width == 8) {
- const uint16x4_t top = vpaddl_u8(vld1_u8(input));
- vst1_u16(pred_buf_q3, vshl_n_u16(top, 2));
- } else if (width == 16) {
- const uint16x8_t top = vpaddlq_u8(vld1q_u8(input));
- vst1q_u16(pred_buf_q3, vshlq_n_u16(top, 2));
- } else {
- const uint8x8x4_t top = vld4_u8(input);
- uint16x8x2_t sum;
- // vaddl_u8 is equivalent to a vpaddlq_u8 (because vld4q interleaves)
- sum.val[0] = vshlq_n_u16(vaddl_u8(top.val[0], top.val[1]), 2);
- sum.val[1] = vshlq_n_u16(vaddl_u8(top.val[2], top.val[3]), 2);
- vst2q_u16(pred_buf_q3, sum);
- }
- input += input_stride;
- } while ((pred_buf_q3 += CFL_BUF_LINE) < end);
-}
-
-static void cfl_luma_subsampling_444_lbd_neon(const uint8_t *input,
- int input_stride,
- uint16_t *pred_buf_q3, int width,
- int height) {
- const uint16_t *end = pred_buf_q3 + height * CFL_BUF_LINE;
- do {
- if (width == 4) {
- const uint16x8_t top = vshll_n_u8(vldh_dup_u8(input), 3);
- vst1_u16(pred_buf_q3, vget_low_u16(top));
- } else if (width == 8) {
- const uint16x8_t top = vshll_n_u8(vld1_u8(input), 3);
- vst1q_u16(pred_buf_q3, top);
- } else {
- const uint8x16_t top = vld1q_u8(input);
- vst1q_u16(pred_buf_q3, vshll_n_u8(vget_low_u8(top), 3));
- vst1q_u16(pred_buf_q3 + 8, vshll_n_u8(vget_high_u8(top), 3));
- if (width == 32) {
- const uint8x16_t next_top = vld1q_u8(input + 16);
- vst1q_u16(pred_buf_q3 + 16, vshll_n_u8(vget_low_u8(next_top), 3));
- vst1q_u16(pred_buf_q3 + 24, vshll_n_u8(vget_high_u8(next_top), 3));
- }
- }
- input += input_stride;
- } while ((pred_buf_q3 += CFL_BUF_LINE) < end);
-}
-
-#ifndef __aarch64__
-uint16x8_t vpaddq_u16(uint16x8_t a, uint16x8_t b) {
- return vcombine_u16(vpadd_u16(vget_low_u16(a), vget_high_u16(a)),
- vpadd_u16(vget_low_u16(b), vget_high_u16(b)));
-}
-#endif
-
-static void cfl_luma_subsampling_420_hbd_neon(const uint16_t *input,
- int input_stride,
- uint16_t *pred_buf_q3, int width,
- int height) {
- const uint16_t *end = pred_buf_q3 + (height >> 1) * CFL_BUF_LINE;
- const int luma_stride = input_stride << 1;
- do {
- if (width == 4) {
- const uint16x4_t top = vld1_u16(input);
- const uint16x4_t bot = vld1_u16(input + input_stride);
- const uint16x4_t sum = vadd_u16(top, bot);
- const uint16x4_t hsum = vpadd_u16(sum, sum);
- vsth_u16(pred_buf_q3, vshl_n_u16(hsum, 1));
- } else if (width < 32) {
- const uint16x8_t top = vld1q_u16(input);
- const uint16x8_t bot = vld1q_u16(input + input_stride);
- const uint16x8_t sum = vaddq_u16(top, bot);
- if (width == 8) {
- const uint16x4_t hsum = vget_low_u16(vpaddq_u16(sum, sum));
- vst1_u16(pred_buf_q3, vshl_n_u16(hsum, 1));
- } else {
- const uint16x8_t top_1 = vld1q_u16(input + 8);
- const uint16x8_t bot_1 = vld1q_u16(input + 8 + input_stride);
- const uint16x8_t sum_1 = vaddq_u16(top_1, bot_1);
- const uint16x8_t hsum = vpaddq_u16(sum, sum_1);
- vst1q_u16(pred_buf_q3, vshlq_n_u16(hsum, 1));
- }
- } else {
- const uint16x8x4_t top = vld4q_u16(input);
- const uint16x8x4_t bot = vld4q_u16(input + input_stride);
- // equivalent to a vpaddq_u16 (because vld4q interleaves)
- const uint16x8_t top_0 = vaddq_u16(top.val[0], top.val[1]);
- // equivalent to a vpaddq_u16 (because vld4q interleaves)
- const uint16x8_t bot_0 = vaddq_u16(bot.val[0], bot.val[1]);
- // equivalent to a vpaddq_u16 (because vld4q interleaves)
- const uint16x8_t top_1 = vaddq_u16(top.val[2], top.val[3]);
- // equivalent to a vpaddq_u16 (because vld4q interleaves)
- const uint16x8_t bot_1 = vaddq_u16(bot.val[2], bot.val[3]);
- uint16x8x2_t sum;
- sum.val[0] = vshlq_n_u16(vaddq_u16(top_0, bot_0), 1);
- sum.val[1] = vshlq_n_u16(vaddq_u16(top_1, bot_1), 1);
- vst2q_u16(pred_buf_q3, sum);
- }
- input += luma_stride;
- } while ((pred_buf_q3 += CFL_BUF_LINE) < end);
-}
-
-static void cfl_luma_subsampling_422_hbd_neon(const uint16_t *input,
- int input_stride,
- uint16_t *pred_buf_q3, int width,
- int height) {
- const uint16_t *end = pred_buf_q3 + height * CFL_BUF_LINE;
- do {
- if (width == 4) {
- const uint16x4_t top = vld1_u16(input);
- const uint16x4_t hsum = vpadd_u16(top, top);
- vsth_u16(pred_buf_q3, vshl_n_u16(hsum, 2));
- } else if (width == 8) {
- const uint16x4x2_t top = vld2_u16(input);
- // equivalent to a vpadd_u16 (because vld2 interleaves)
- const uint16x4_t hsum = vadd_u16(top.val[0], top.val[1]);
- vst1_u16(pred_buf_q3, vshl_n_u16(hsum, 2));
- } else if (width == 16) {
- const uint16x8x2_t top = vld2q_u16(input);
- // equivalent to a vpaddq_u16 (because vld2q interleaves)
- const uint16x8_t hsum = vaddq_u16(top.val[0], top.val[1]);
- vst1q_u16(pred_buf_q3, vshlq_n_u16(hsum, 2));
- } else {
- const uint16x8x4_t top = vld4q_u16(input);
- // equivalent to a vpaddq_u16 (because vld4q interleaves)
- const uint16x8_t hsum_0 = vaddq_u16(top.val[0], top.val[1]);
- // equivalent to a vpaddq_u16 (because vld4q interleaves)
- const uint16x8_t hsum_1 = vaddq_u16(top.val[2], top.val[3]);
- uint16x8x2_t result = { { vshlq_n_u16(hsum_0, 2),
- vshlq_n_u16(hsum_1, 2) } };
- vst2q_u16(pred_buf_q3, result);
- }
- input += input_stride;
- } while ((pred_buf_q3 += CFL_BUF_LINE) < end);
-}
-
-static void cfl_luma_subsampling_444_hbd_neon(const uint16_t *input,
- int input_stride,
- uint16_t *pred_buf_q3, int width,
- int height) {
- const uint16_t *end = pred_buf_q3 + height * CFL_BUF_LINE;
- do {
- if (width == 4) {
- const uint16x4_t top = vld1_u16(input);
- vst1_u16(pred_buf_q3, vshl_n_u16(top, 3));
- } else if (width == 8) {
- const uint16x8_t top = vld1q_u16(input);
- vst1q_u16(pred_buf_q3, vshlq_n_u16(top, 3));
- } else if (width == 16) {
- uint16x8x2_t top = vld2q_u16(input);
- top.val[0] = vshlq_n_u16(top.val[0], 3);
- top.val[1] = vshlq_n_u16(top.val[1], 3);
- vst2q_u16(pred_buf_q3, top);
- } else {
- uint16x8x4_t top = vld4q_u16(input);
- top.val[0] = vshlq_n_u16(top.val[0], 3);
- top.val[1] = vshlq_n_u16(top.val[1], 3);
- top.val[2] = vshlq_n_u16(top.val[2], 3);
- top.val[3] = vshlq_n_u16(top.val[3], 3);
- vst4q_u16(pred_buf_q3, top);
- }
- input += input_stride;
- } while ((pred_buf_q3 += CFL_BUF_LINE) < end);
-}
-
-CFL_GET_SUBSAMPLE_FUNCTION(neon)
-
-static INLINE void subtract_average_neon(const uint16_t *src, int16_t *dst,
- int width, int height,
- int round_offset,
- const int num_pel_log2) {
- const uint16_t *const end = src + height * CFL_BUF_LINE;
-
- // Round offset is not needed, because NEON will handle the rounding.
- (void)round_offset;
-
- // To optimize the use of the CPU pipeline, we process 4 rows per iteration
- const int step = 4 * CFL_BUF_LINE;
-
- // At this stage, the prediction buffer contains scaled reconstructed luma
- // pixels, which are positive integer and only require 15 bits. By using
- // unsigned integer for the sum, we can do one addition operation inside 16
- // bits (8 lanes) before having to convert to 32 bits (4 lanes).
- const uint16_t *sum_buf = src;
- uint32x4_t sum_32x4 = { 0, 0, 0, 0 };
- do {
- // For all widths, we load, add and combine the data so it fits in 4 lanes.
- if (width == 4) {
- const uint16x4_t a0 =
- vadd_u16(vld1_u16(sum_buf), vld1_u16(sum_buf + CFL_BUF_LINE));
- const uint16x4_t a1 = vadd_u16(vld1_u16(sum_buf + 2 * CFL_BUF_LINE),
- vld1_u16(sum_buf + 3 * CFL_BUF_LINE));
- sum_32x4 = vaddq_u32(sum_32x4, vaddl_u16(a0, a1));
- } else if (width == 8) {
- const uint16x8_t a0 = vldaddq_u16(sum_buf, CFL_BUF_LINE);
- const uint16x8_t a1 =
- vldaddq_u16(sum_buf + 2 * CFL_BUF_LINE, CFL_BUF_LINE);
- sum_32x4 = vpadalq_u16(sum_32x4, a0);
- sum_32x4 = vpadalq_u16(sum_32x4, a1);
- } else {
- const uint16x8_t row0 = vldaddq_u16(sum_buf, 8);
- const uint16x8_t row1 = vldaddq_u16(sum_buf + CFL_BUF_LINE, 8);
- const uint16x8_t row2 = vldaddq_u16(sum_buf + 2 * CFL_BUF_LINE, 8);
- const uint16x8_t row3 = vldaddq_u16(sum_buf + 3 * CFL_BUF_LINE, 8);
- sum_32x4 = vpadalq_u16(sum_32x4, row0);
- sum_32x4 = vpadalq_u16(sum_32x4, row1);
- sum_32x4 = vpadalq_u16(sum_32x4, row2);
- sum_32x4 = vpadalq_u16(sum_32x4, row3);
-
- if (width == 32) {
- const uint16x8_t row0_1 = vldaddq_u16(sum_buf + 16, 8);
- const uint16x8_t row1_1 = vldaddq_u16(sum_buf + CFL_BUF_LINE + 16, 8);
- const uint16x8_t row2_1 =
- vldaddq_u16(sum_buf + 2 * CFL_BUF_LINE + 16, 8);
- const uint16x8_t row3_1 =
- vldaddq_u16(sum_buf + 3 * CFL_BUF_LINE + 16, 8);
-
- sum_32x4 = vpadalq_u16(sum_32x4, row0_1);
- sum_32x4 = vpadalq_u16(sum_32x4, row1_1);
- sum_32x4 = vpadalq_u16(sum_32x4, row2_1);
- sum_32x4 = vpadalq_u16(sum_32x4, row3_1);
- }
- }
- sum_buf += step;
- } while (sum_buf < end);
-
- // Permute and add in such a way that each lane contains the block sum.
- // [A+C+B+D, B+D+A+C, C+A+D+B, D+B+C+A]
-#ifdef __aarch64__
- sum_32x4 = vpaddq_u32(sum_32x4, sum_32x4);
- sum_32x4 = vpaddq_u32(sum_32x4, sum_32x4);
-#else
- uint32x4_t flip =
- vcombine_u32(vget_high_u32(sum_32x4), vget_low_u32(sum_32x4));
- sum_32x4 = vaddq_u32(sum_32x4, flip);
- sum_32x4 = vaddq_u32(sum_32x4, vrev64q_u32(sum_32x4));
-#endif
-
- // Computing the average could be done using scalars, but getting off the NEON
- // engine introduces latency, so we use vqrshrn.
- int16x4_t avg_16x4;
- // Constant propagation makes for some ugly code.
- switch (num_pel_log2) {
- case 4: avg_16x4 = vreinterpret_s16_u16(vqrshrn_n_u32(sum_32x4, 4)); break;
- case 5: avg_16x4 = vreinterpret_s16_u16(vqrshrn_n_u32(sum_32x4, 5)); break;
- case 6: avg_16x4 = vreinterpret_s16_u16(vqrshrn_n_u32(sum_32x4, 6)); break;
- case 7: avg_16x4 = vreinterpret_s16_u16(vqrshrn_n_u32(sum_32x4, 7)); break;
- case 8: avg_16x4 = vreinterpret_s16_u16(vqrshrn_n_u32(sum_32x4, 8)); break;
- case 9: avg_16x4 = vreinterpret_s16_u16(vqrshrn_n_u32(sum_32x4, 9)); break;
- case 10:
- avg_16x4 = vreinterpret_s16_u16(vqrshrn_n_u32(sum_32x4, 10));
- break;
- default: assert(0);
- }
-
- if (width == 4) {
- do {
- vst1_s16(dst, vsub_s16(vreinterpret_s16_u16(vld1_u16(src)), avg_16x4));
- src += CFL_BUF_LINE;
- dst += CFL_BUF_LINE;
- } while (src < end);
- } else {
- const int16x8_t avg_16x8 = vcombine_s16(avg_16x4, avg_16x4);
- do {
- vldsubstq_s16(dst, src, 0, avg_16x8);
- vldsubstq_s16(dst, src, CFL_BUF_LINE, avg_16x8);
- vldsubstq_s16(dst, src, 2 * CFL_BUF_LINE, avg_16x8);
- vldsubstq_s16(dst, src, 3 * CFL_BUF_LINE, avg_16x8);
-
- if (width > 8) {
- vldsubstq_s16(dst, src, 8, avg_16x8);
- vldsubstq_s16(dst, src, 8 + CFL_BUF_LINE, avg_16x8);
- vldsubstq_s16(dst, src, 8 + 2 * CFL_BUF_LINE, avg_16x8);
- vldsubstq_s16(dst, src, 8 + 3 * CFL_BUF_LINE, avg_16x8);
- }
- if (width == 32) {
- vldsubstq_s16(dst, src, 16, avg_16x8);
- vldsubstq_s16(dst, src, 16 + CFL_BUF_LINE, avg_16x8);
- vldsubstq_s16(dst, src, 16 + 2 * CFL_BUF_LINE, avg_16x8);
- vldsubstq_s16(dst, src, 16 + 3 * CFL_BUF_LINE, avg_16x8);
- vldsubstq_s16(dst, src, 24, avg_16x8);
- vldsubstq_s16(dst, src, 24 + CFL_BUF_LINE, avg_16x8);
- vldsubstq_s16(dst, src, 24 + 2 * CFL_BUF_LINE, avg_16x8);
- vldsubstq_s16(dst, src, 24 + 3 * CFL_BUF_LINE, avg_16x8);
- }
- src += step;
- dst += step;
- } while (src < end);
- }
-}
-
-CFL_SUB_AVG_FN(neon)
-
-// Saturating negate 16-bit integers in a when the corresponding signed 16-bit
-// integer in b is negative.
-// Notes:
-// * Negating INT16_MIN results in INT16_MIN. However, this cannot occur in
-// practice, as scaled_luma is the multiplication of two absolute values.
-// * In the Intel equivalent, elements in a are zeroed out when the
-// corresponding elements in b are zero. Because vsign is used twice in a
-// row, with b in the first call becoming a in the second call, there's no
-// impact from not zeroing out.
-static int16x4_t vsign_s16(int16x4_t a, int16x4_t b) {
- const int16x4_t mask = vshr_n_s16(b, 15);
- return veor_s16(vadd_s16(a, mask), mask);
-}
-
-// Saturating negate 16-bit integers in a when the corresponding signed 16-bit
-// integer in b is negative.
-// Notes:
-// * Negating INT16_MIN results in INT16_MIN. However, this cannot occur in
-// practice, as scaled_luma is the multiplication of two absolute values.
-// * In the Intel equivalent, elements in a are zeroed out when the
-// corresponding elements in b are zero. Because vsignq is used twice in a
-// row, with b in the first call becoming a in the second call, there's no
-// impact from not zeroing out.
-static int16x8_t vsignq_s16(int16x8_t a, int16x8_t b) {
- const int16x8_t mask = vshrq_n_s16(b, 15);
- return veorq_s16(vaddq_s16(a, mask), mask);
-}
-
-static INLINE int16x4_t predict_w4(const int16_t *pred_buf_q3,
- int16x4_t alpha_sign, int abs_alpha_q12,
- int16x4_t dc) {
- const int16x4_t ac_q3 = vld1_s16(pred_buf_q3);
- const int16x4_t ac_sign = veor_s16(alpha_sign, ac_q3);
- int16x4_t scaled_luma = vqrdmulh_n_s16(vabs_s16(ac_q3), abs_alpha_q12);
- return vadd_s16(vsign_s16(scaled_luma, ac_sign), dc);
-}
-
-static INLINE int16x8_t predict_w8(const int16_t *pred_buf_q3,
- int16x8_t alpha_sign, int abs_alpha_q12,
- int16x8_t dc) {
- const int16x8_t ac_q3 = vld1q_s16(pred_buf_q3);
- const int16x8_t ac_sign = veorq_s16(alpha_sign, ac_q3);
- int16x8_t scaled_luma = vqrdmulhq_n_s16(vabsq_s16(ac_q3), abs_alpha_q12);
- return vaddq_s16(vsignq_s16(scaled_luma, ac_sign), dc);
-}
-
-static INLINE int16x8x2_t predict_w16(const int16_t *pred_buf_q3,
- int16x8_t alpha_sign, int abs_alpha_q12,
- int16x8_t dc) {
- // vld2q_s16 interleaves, which is not useful for prediction. vst1q_s16_x2
- // does not interleave, but is not currently available in the compilier used
- // by the AOM build system.
- const int16x8x2_t ac_q3 = vld2q_s16(pred_buf_q3);
- const int16x8_t ac_sign_0 = veorq_s16(alpha_sign, ac_q3.val[0]);
- const int16x8_t ac_sign_1 = veorq_s16(alpha_sign, ac_q3.val[1]);
- const int16x8_t scaled_luma_0 =
- vqrdmulhq_n_s16(vabsq_s16(ac_q3.val[0]), abs_alpha_q12);
- const int16x8_t scaled_luma_1 =
- vqrdmulhq_n_s16(vabsq_s16(ac_q3.val[1]), abs_alpha_q12);
- int16x8x2_t result;
- result.val[0] = vaddq_s16(vsignq_s16(scaled_luma_0, ac_sign_0), dc);
- result.val[1] = vaddq_s16(vsignq_s16(scaled_luma_1, ac_sign_1), dc);
- return result;
-}
-
-static INLINE int16x8x4_t predict_w32(const int16_t *pred_buf_q3,
- int16x8_t alpha_sign, int abs_alpha_q12,
- int16x8_t dc) {
- // vld4q_s16 interleaves, which is not useful for prediction. vst1q_s16_x4
- // does not interleave, but is not currently available in the compilier used
- // by the AOM build system.
- const int16x8x4_t ac_q3 = vld4q_s16(pred_buf_q3);
- const int16x8_t ac_sign_0 = veorq_s16(alpha_sign, ac_q3.val[0]);
- const int16x8_t ac_sign_1 = veorq_s16(alpha_sign, ac_q3.val[1]);
- const int16x8_t ac_sign_2 = veorq_s16(alpha_sign, ac_q3.val[2]);
- const int16x8_t ac_sign_3 = veorq_s16(alpha_sign, ac_q3.val[3]);
- const int16x8_t scaled_luma_0 =
- vqrdmulhq_n_s16(vabsq_s16(ac_q3.val[0]), abs_alpha_q12);
- const int16x8_t scaled_luma_1 =
- vqrdmulhq_n_s16(vabsq_s16(ac_q3.val[1]), abs_alpha_q12);
- const int16x8_t scaled_luma_2 =
- vqrdmulhq_n_s16(vabsq_s16(ac_q3.val[2]), abs_alpha_q12);
- const int16x8_t scaled_luma_3 =
- vqrdmulhq_n_s16(vabsq_s16(ac_q3.val[3]), abs_alpha_q12);
- int16x8x4_t result;
- result.val[0] = vaddq_s16(vsignq_s16(scaled_luma_0, ac_sign_0), dc);
- result.val[1] = vaddq_s16(vsignq_s16(scaled_luma_1, ac_sign_1), dc);
- result.val[2] = vaddq_s16(vsignq_s16(scaled_luma_2, ac_sign_2), dc);
- result.val[3] = vaddq_s16(vsignq_s16(scaled_luma_3, ac_sign_3), dc);
- return result;
-}
-
-static INLINE void cfl_predict_lbd_neon(const int16_t *pred_buf_q3,
- uint8_t *dst, int dst_stride,
- int alpha_q3, int width, int height) {
- const int16_t abs_alpha_q12 = abs(alpha_q3) << 9;
- const int16_t *const end = pred_buf_q3 + height * CFL_BUF_LINE;
- if (width == 4) {
- const int16x4_t alpha_sign = vdup_n_s16(alpha_q3);
- const int16x4_t dc = vdup_n_s16(*dst);
- do {
- const int16x4_t pred =
- predict_w4(pred_buf_q3, alpha_sign, abs_alpha_q12, dc);
- vsth_u8(dst, vqmovun_s16(vcombine_s16(pred, pred)));
- dst += dst_stride;
- } while ((pred_buf_q3 += CFL_BUF_LINE) < end);
- } else {
- const int16x8_t alpha_sign = vdupq_n_s16(alpha_q3);
- const int16x8_t dc = vdupq_n_s16(*dst);
- do {
- if (width == 8) {
- vst1_u8(dst, vqmovun_s16(predict_w8(pred_buf_q3, alpha_sign,
- abs_alpha_q12, dc)));
- } else if (width == 16) {
- const int16x8x2_t pred =
- predict_w16(pred_buf_q3, alpha_sign, abs_alpha_q12, dc);
- const uint8x8x2_t predun = { { vqmovun_s16(pred.val[0]),
- vqmovun_s16(pred.val[1]) } };
- vst2_u8(dst, predun);
- } else {
- const int16x8x4_t pred =
- predict_w32(pred_buf_q3, alpha_sign, abs_alpha_q12, dc);
- const uint8x8x4_t predun = {
- { vqmovun_s16(pred.val[0]), vqmovun_s16(pred.val[1]),
- vqmovun_s16(pred.val[2]), vqmovun_s16(pred.val[3]) }
- };
- vst4_u8(dst, predun);
- }
- dst += dst_stride;
- } while ((pred_buf_q3 += CFL_BUF_LINE) < end);
- }
-}
-
-CFL_PREDICT_FN(neon, lbd)
-
-static INLINE uint16x4_t clamp_s16(int16x4_t a, int16x4_t max) {
- return vreinterpret_u16_s16(vmax_s16(vmin_s16(a, max), vdup_n_s16(0)));
-}
-
-static INLINE uint16x8_t clampq_s16(int16x8_t a, int16x8_t max) {
- return vreinterpretq_u16_s16(vmaxq_s16(vminq_s16(a, max), vdupq_n_s16(0)));
-}
-
-static INLINE uint16x8x2_t clamp2q_s16(int16x8x2_t a, int16x8_t max) {
- uint16x8x2_t result;
- result.val[0] = vreinterpretq_u16_s16(
- vmaxq_s16(vminq_s16(a.val[0], max), vdupq_n_s16(0)));
- result.val[1] = vreinterpretq_u16_s16(
- vmaxq_s16(vminq_s16(a.val[1], max), vdupq_n_s16(0)));
- return result;
-}
-
-static INLINE uint16x8x4_t clamp4q_s16(int16x8x4_t a, int16x8_t max) {
- uint16x8x4_t result;
- result.val[0] = vreinterpretq_u16_s16(
- vmaxq_s16(vminq_s16(a.val[0], max), vdupq_n_s16(0)));
- result.val[1] = vreinterpretq_u16_s16(
- vmaxq_s16(vminq_s16(a.val[1], max), vdupq_n_s16(0)));
- result.val[2] = vreinterpretq_u16_s16(
- vmaxq_s16(vminq_s16(a.val[2], max), vdupq_n_s16(0)));
- result.val[3] = vreinterpretq_u16_s16(
- vmaxq_s16(vminq_s16(a.val[3], max), vdupq_n_s16(0)));
- return result;
-}
-
-static INLINE void cfl_predict_hbd_neon(const int16_t *pred_buf_q3,
- uint16_t *dst, int dst_stride,
- int alpha_q3, int bd, int width,
- int height) {
- const int max = (1 << bd) - 1;
- const int16_t abs_alpha_q12 = abs(alpha_q3) << 9;
- const int16_t *const end = pred_buf_q3 + height * CFL_BUF_LINE;
- if (width == 4) {
- const int16x4_t alpha_sign = vdup_n_s16(alpha_q3);
- const int16x4_t dc = vdup_n_s16(*dst);
- const int16x4_t max_16x4 = vdup_n_s16(max);
- do {
- const int16x4_t scaled_luma =
- predict_w4(pred_buf_q3, alpha_sign, abs_alpha_q12, dc);
- vst1_u16(dst, clamp_s16(scaled_luma, max_16x4));
- dst += dst_stride;
- } while ((pred_buf_q3 += CFL_BUF_LINE) < end);
- } else {
- const int16x8_t alpha_sign = vdupq_n_s16(alpha_q3);
- const int16x8_t dc = vdupq_n_s16(*dst);
- const int16x8_t max_16x8 = vdupq_n_s16(max);
- do {
- if (width == 8) {
- const int16x8_t pred =
- predict_w8(pred_buf_q3, alpha_sign, abs_alpha_q12, dc);
- vst1q_u16(dst, clampq_s16(pred, max_16x8));
- } else if (width == 16) {
- const int16x8x2_t pred =
- predict_w16(pred_buf_q3, alpha_sign, abs_alpha_q12, dc);
- vst2q_u16(dst, clamp2q_s16(pred, max_16x8));
- } else {
- const int16x8x4_t pred =
- predict_w32(pred_buf_q3, alpha_sign, abs_alpha_q12, dc);
- vst4q_u16(dst, clamp4q_s16(pred, max_16x8));
- }
- dst += dst_stride;
- } while ((pred_buf_q3 += CFL_BUF_LINE) < end);
- }
-}
-
-CFL_PREDICT_FN(neon, hbd)
diff --git a/third_party/aom/av1/common/arm/convolve_neon.c b/third_party/aom/av1/common/arm/convolve_neon.c
deleted file mode 100644
index d0c4f8ff6..000000000
--- a/third_party/aom/av1/common/arm/convolve_neon.c
+++ /dev/null
@@ -1,1455 +0,0 @@
-/*
- *
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <arm_neon.h>
-
-#include "config/av1_rtcd.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_ports/mem.h"
-#include "av1/common/convolve.h"
-#include "av1/common/filter.h"
-#include "av1/common/arm/convolve_neon.h"
-#include "av1/common/arm/mem_neon.h"
-#include "av1/common/arm/transpose_neon.h"
-
-static INLINE int16x4_t convolve8_4x4(const int16x4_t s0, const int16x4_t s1,
- const int16x4_t s2, const int16x4_t s3,
- const int16x4_t s4, const int16x4_t s5,
- const int16x4_t s6, const int16x4_t s7,
- const int16_t *filter) {
- int16x4_t sum;
-
- sum = vmul_n_s16(s0, filter[0]);
- sum = vmla_n_s16(sum, s1, filter[1]);
- sum = vmla_n_s16(sum, s2, filter[2]);
- sum = vmla_n_s16(sum, s5, filter[5]);
- sum = vmla_n_s16(sum, s6, filter[6]);
- sum = vmla_n_s16(sum, s7, filter[7]);
- /* filter[3] can take a max value of 128. So the max value of the result :
- * 128*255 + sum > 16 bits
- */
- sum = vqadd_s16(sum, vmul_n_s16(s3, filter[3]));
- sum = vqadd_s16(sum, vmul_n_s16(s4, filter[4]));
-
- return sum;
-}
-
-static INLINE uint8x8_t convolve8_horiz_8x8(
- const int16x8_t s0, const int16x8_t s1, const int16x8_t s2,
- const int16x8_t s3, const int16x8_t s4, const int16x8_t s5,
- const int16x8_t s6, const int16x8_t s7, const int16_t *filter,
- const int16x8_t shift_round_0, const int16x8_t shift_by_bits) {
- int16x8_t sum;
-
- sum = vmulq_n_s16(s0, filter[0]);
- sum = vmlaq_n_s16(sum, s1, filter[1]);
- sum = vmlaq_n_s16(sum, s2, filter[2]);
- sum = vmlaq_n_s16(sum, s5, filter[5]);
- sum = vmlaq_n_s16(sum, s6, filter[6]);
- sum = vmlaq_n_s16(sum, s7, filter[7]);
- /* filter[3] can take a max value of 128. So the max value of the result :
- * 128*255 + sum > 16 bits
- */
- sum = vqaddq_s16(sum, vmulq_n_s16(s3, filter[3]));
- sum = vqaddq_s16(sum, vmulq_n_s16(s4, filter[4]));
-
- sum = vqrshlq_s16(sum, shift_round_0);
- sum = vqrshlq_s16(sum, shift_by_bits);
-
- return vqmovun_s16(sum);
-}
-
-#if !defined(__aarch64__)
-static INLINE uint8x8_t convolve8_horiz_4x1(
- const int16x4_t s0, const int16x4_t s1, const int16x4_t s2,
- const int16x4_t s3, const int16x4_t s4, const int16x4_t s5,
- const int16x4_t s6, const int16x4_t s7, const int16_t *filter,
- const int16x4_t shift_round_0, const int16x4_t shift_by_bits) {
- int16x4_t sum;
-
- sum = vmul_n_s16(s0, filter[0]);
- sum = vmla_n_s16(sum, s1, filter[1]);
- sum = vmla_n_s16(sum, s2, filter[2]);
- sum = vmla_n_s16(sum, s5, filter[5]);
- sum = vmla_n_s16(sum, s6, filter[6]);
- sum = vmla_n_s16(sum, s7, filter[7]);
- /* filter[3] can take a max value of 128. So the max value of the result :
- * 128*255 + sum > 16 bits
- */
- sum = vqadd_s16(sum, vmul_n_s16(s3, filter[3]));
- sum = vqadd_s16(sum, vmul_n_s16(s4, filter[4]));
-
- sum = vqrshl_s16(sum, shift_round_0);
- sum = vqrshl_s16(sum, shift_by_bits);
-
- return vqmovun_s16(vcombine_s16(sum, sum));
-}
-#endif // !defined(__arch64__)
-
-static INLINE uint8x8_t convolve8_vert_8x4(
- const int16x8_t s0, const int16x8_t s1, const int16x8_t s2,
- const int16x8_t s3, const int16x8_t s4, const int16x8_t s5,
- const int16x8_t s6, const int16x8_t s7, const int16_t *filter) {
- int16x8_t sum;
-
- sum = vmulq_n_s16(s0, filter[0]);
- sum = vmlaq_n_s16(sum, s1, filter[1]);
- sum = vmlaq_n_s16(sum, s2, filter[2]);
- sum = vmlaq_n_s16(sum, s5, filter[5]);
- sum = vmlaq_n_s16(sum, s6, filter[6]);
- sum = vmlaq_n_s16(sum, s7, filter[7]);
- /* filter[3] can take a max value of 128. So the max value of the result :
- * 128*255 + sum > 16 bits
- */
- sum = vqaddq_s16(sum, vmulq_n_s16(s3, filter[3]));
- sum = vqaddq_s16(sum, vmulq_n_s16(s4, filter[4]));
-
- return vqrshrun_n_s16(sum, FILTER_BITS);
-}
-
-static INLINE uint16x4_t convolve8_vert_4x4_s32(
- const int16x4_t s0, const int16x4_t s1, const int16x4_t s2,
- const int16x4_t s3, const int16x4_t s4, const int16x4_t s5,
- const int16x4_t s6, const int16x4_t s7, const int16_t *y_filter,
- const int32x4_t round_shift_vec, const int32x4_t offset_const,
- const int32x4_t sub_const_vec) {
- int32x4_t sum0;
- uint16x4_t res;
- const int32x4_t zero = vdupq_n_s32(0);
-
- sum0 = vmull_n_s16(s0, y_filter[0]);
- sum0 = vmlal_n_s16(sum0, s1, y_filter[1]);
- sum0 = vmlal_n_s16(sum0, s2, y_filter[2]);
- sum0 = vmlal_n_s16(sum0, s3, y_filter[3]);
- sum0 = vmlal_n_s16(sum0, s4, y_filter[4]);
- sum0 = vmlal_n_s16(sum0, s5, y_filter[5]);
- sum0 = vmlal_n_s16(sum0, s6, y_filter[6]);
- sum0 = vmlal_n_s16(sum0, s7, y_filter[7]);
-
- sum0 = vaddq_s32(sum0, offset_const);
- sum0 = vqrshlq_s32(sum0, round_shift_vec);
- sum0 = vsubq_s32(sum0, sub_const_vec);
- sum0 = vmaxq_s32(sum0, zero);
-
- res = vmovn_u32(vreinterpretq_u32_s32(sum0));
-
- return res;
-}
-
-static INLINE uint8x8_t convolve8_vert_8x4_s32(
- const int16x8_t s0, const int16x8_t s1, const int16x8_t s2,
- const int16x8_t s3, const int16x8_t s4, const int16x8_t s5,
- const int16x8_t s6, const int16x8_t s7, const int16_t *y_filter,
- const int32x4_t round_shift_vec, const int32x4_t offset_const,
- const int32x4_t sub_const_vec, const int16x8_t vec_round_bits) {
- int32x4_t sum0, sum1;
- uint16x8_t res;
- const int32x4_t zero = vdupq_n_s32(0);
-
- sum0 = vmull_n_s16(vget_low_s16(s0), y_filter[0]);
- sum0 = vmlal_n_s16(sum0, vget_low_s16(s1), y_filter[1]);
- sum0 = vmlal_n_s16(sum0, vget_low_s16(s2), y_filter[2]);
- sum0 = vmlal_n_s16(sum0, vget_low_s16(s3), y_filter[3]);
- sum0 = vmlal_n_s16(sum0, vget_low_s16(s4), y_filter[4]);
- sum0 = vmlal_n_s16(sum0, vget_low_s16(s5), y_filter[5]);
- sum0 = vmlal_n_s16(sum0, vget_low_s16(s6), y_filter[6]);
- sum0 = vmlal_n_s16(sum0, vget_low_s16(s7), y_filter[7]);
-
- sum1 = vmull_n_s16(vget_high_s16(s0), y_filter[0]);
- sum1 = vmlal_n_s16(sum1, vget_high_s16(s1), y_filter[1]);
- sum1 = vmlal_n_s16(sum1, vget_high_s16(s2), y_filter[2]);
- sum1 = vmlal_n_s16(sum1, vget_high_s16(s3), y_filter[3]);
- sum1 = vmlal_n_s16(sum1, vget_high_s16(s4), y_filter[4]);
- sum1 = vmlal_n_s16(sum1, vget_high_s16(s5), y_filter[5]);
- sum1 = vmlal_n_s16(sum1, vget_high_s16(s6), y_filter[6]);
- sum1 = vmlal_n_s16(sum1, vget_high_s16(s7), y_filter[7]);
-
- sum0 = vaddq_s32(sum0, offset_const);
- sum1 = vaddq_s32(sum1, offset_const);
- sum0 = vqrshlq_s32(sum0, round_shift_vec);
- sum1 = vqrshlq_s32(sum1, round_shift_vec);
- sum0 = vsubq_s32(sum0, sub_const_vec);
- sum1 = vsubq_s32(sum1, sub_const_vec);
- sum0 = vmaxq_s32(sum0, zero);
- sum1 = vmaxq_s32(sum1, zero);
- res = vcombine_u16(vqmovn_u32(vreinterpretq_u32_s32(sum0)),
- vqmovn_u32(vreinterpretq_u32_s32(sum1)));
-
- res = vqrshlq_u16(res, vec_round_bits);
-
- return vqmovn_u16(res);
-}
-
-void av1_convolve_x_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst,
- int dst_stride, int w, int h,
- const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y,
- const int subpel_x_q4, const int subpel_y_q4,
- ConvolveParams *conv_params) {
- const uint8_t horiz_offset = filter_params_x->taps / 2 - 1;
- const int8_t bits = FILTER_BITS - conv_params->round_0;
-
- (void)subpel_y_q4;
- (void)conv_params;
- (void)filter_params_y;
-
- uint8x8_t t0;
-#if defined(__aarch64__)
- uint8x8_t t1, t2, t3;
-#endif
-
- assert(bits >= 0);
- assert((FILTER_BITS - conv_params->round_1) >= 0 ||
- ((conv_params->round_0 + conv_params->round_1) == 2 * FILTER_BITS));
-
- const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
- filter_params_x, subpel_x_q4 & SUBPEL_MASK);
-
- const int16x8_t shift_round_0 = vdupq_n_s16(-conv_params->round_0);
- const int16x8_t shift_by_bits = vdupq_n_s16(-bits);
-
- src -= horiz_offset;
-#if defined(__aarch64__)
- if (h == 4) {
- uint8x8_t d01, d23;
- int16x4_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, d0, d1, d2, d3;
- int16x8_t d01_temp, d23_temp;
-
- __builtin_prefetch(src + 0 * src_stride);
- __builtin_prefetch(src + 1 * src_stride);
- __builtin_prefetch(src + 2 * src_stride);
- __builtin_prefetch(src + 3 * src_stride);
-
- load_u8_8x4(src, src_stride, &t0, &t1, &t2, &t3);
- transpose_u8_8x4(&t0, &t1, &t2, &t3);
-
- s0 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t0)));
- s1 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t1)));
- s2 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t2)));
- s3 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t3)));
- s4 = vget_high_s16(vreinterpretq_s16_u16(vmovl_u8(t0)));
- s5 = vget_high_s16(vreinterpretq_s16_u16(vmovl_u8(t1)));
- s6 = vget_high_s16(vreinterpretq_s16_u16(vmovl_u8(t2)));
- __builtin_prefetch(dst + 0 * dst_stride);
- __builtin_prefetch(dst + 1 * dst_stride);
- __builtin_prefetch(dst + 2 * dst_stride);
- __builtin_prefetch(dst + 3 * dst_stride);
- src += 7;
-
- do {
- load_u8_8x4(src, src_stride, &t0, &t1, &t2, &t3);
- transpose_u8_8x4(&t0, &t1, &t2, &t3);
-
- s7 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t0)));
- s8 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t1)));
- s9 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t2)));
- s10 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t3)));
-
- d0 = convolve8_4x4(s0, s1, s2, s3, s4, s5, s6, s7, x_filter);
-
- d1 = convolve8_4x4(s1, s2, s3, s4, s5, s6, s7, s8, x_filter);
-
- d2 = convolve8_4x4(s2, s3, s4, s5, s6, s7, s8, s9, x_filter);
-
- d3 = convolve8_4x4(s3, s4, s5, s6, s7, s8, s9, s10, x_filter);
-
- d01_temp = vqrshlq_s16(vcombine_s16(d0, d1), shift_round_0);
- d23_temp = vqrshlq_s16(vcombine_s16(d2, d3), shift_round_0);
-
- d01_temp = vqrshlq_s16(d01_temp, shift_by_bits);
- d23_temp = vqrshlq_s16(d23_temp, shift_by_bits);
-
- d01 = vqmovun_s16(d01_temp);
- d23 = vqmovun_s16(d23_temp);
-
- transpose_u8_4x4(&d01, &d23);
-
- if (w != 2) {
- vst1_lane_u32((uint32_t *)(dst + 0 * dst_stride), // 00 01 02 03
- vreinterpret_u32_u8(d01), 0);
- vst1_lane_u32((uint32_t *)(dst + 1 * dst_stride), // 10 11 12 13
- vreinterpret_u32_u8(d23), 0);
- vst1_lane_u32((uint32_t *)(dst + 2 * dst_stride), // 20 21 22 23
- vreinterpret_u32_u8(d01), 1);
- vst1_lane_u32((uint32_t *)(dst + 3 * dst_stride), // 30 31 32 33
- vreinterpret_u32_u8(d23), 1);
- } else {
- vst1_lane_u16((uint16_t *)(dst + 0 * dst_stride), // 00 01
- vreinterpret_u16_u8(d01), 0);
- vst1_lane_u16((uint16_t *)(dst + 1 * dst_stride), // 10 11
- vreinterpret_u16_u8(d23), 0);
- vst1_lane_u16((uint16_t *)(dst + 2 * dst_stride), // 20 21
- vreinterpret_u16_u8(d01), 2);
- vst1_lane_u16((uint16_t *)(dst + 3 * dst_stride), // 30 31
- vreinterpret_u16_u8(d23), 2);
- }
-
- s0 = s4;
- s1 = s5;
- s2 = s6;
- s3 = s7;
- s4 = s8;
- s5 = s9;
- s6 = s10;
- src += 4;
- dst += 4;
- w -= 4;
- } while (w > 0);
- } else {
-#endif
- int width;
- const uint8_t *s;
- int16x8_t s0, s1, s2, s3, s4, s5, s6, s7;
-
-#if defined(__aarch64__)
- int16x8_t s8, s9, s10;
- uint8x8_t t4, t5, t6, t7;
-#endif
-
- if (w <= 4) {
-#if defined(__aarch64__)
- do {
- load_u8_8x8(src, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
- transpose_u8_8x8(&t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
- s0 = vreinterpretq_s16_u16(vmovl_u8(t0));
- s1 = vreinterpretq_s16_u16(vmovl_u8(t1));
- s2 = vreinterpretq_s16_u16(vmovl_u8(t2));
- s3 = vreinterpretq_s16_u16(vmovl_u8(t3));
- s4 = vreinterpretq_s16_u16(vmovl_u8(t4));
- s5 = vreinterpretq_s16_u16(vmovl_u8(t5));
- s6 = vreinterpretq_s16_u16(vmovl_u8(t6));
-
- load_u8_8x8(src + 7, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6,
- &t7);
- src += 8 * src_stride;
- __builtin_prefetch(dst + 0 * dst_stride);
- __builtin_prefetch(dst + 1 * dst_stride);
- __builtin_prefetch(dst + 2 * dst_stride);
- __builtin_prefetch(dst + 3 * dst_stride);
- __builtin_prefetch(dst + 4 * dst_stride);
- __builtin_prefetch(dst + 5 * dst_stride);
- __builtin_prefetch(dst + 6 * dst_stride);
- __builtin_prefetch(dst + 7 * dst_stride);
-
- transpose_u8_4x8(&t0, &t1, &t2, &t3, t4, t5, t6, t7);
-
- s7 = vreinterpretq_s16_u16(vmovl_u8(t0));
- s8 = vreinterpretq_s16_u16(vmovl_u8(t1));
- s9 = vreinterpretq_s16_u16(vmovl_u8(t2));
- s10 = vreinterpretq_s16_u16(vmovl_u8(t3));
-
- __builtin_prefetch(src + 0 * src_stride);
- __builtin_prefetch(src + 1 * src_stride);
- __builtin_prefetch(src + 2 * src_stride);
- __builtin_prefetch(src + 3 * src_stride);
- __builtin_prefetch(src + 4 * src_stride);
- __builtin_prefetch(src + 5 * src_stride);
- __builtin_prefetch(src + 6 * src_stride);
- __builtin_prefetch(src + 7 * src_stride);
- t0 = convolve8_horiz_8x8(s0, s1, s2, s3, s4, s5, s6, s7, x_filter,
- shift_round_0, shift_by_bits);
- t1 = convolve8_horiz_8x8(s1, s2, s3, s4, s5, s6, s7, s8, x_filter,
- shift_round_0, shift_by_bits);
- t2 = convolve8_horiz_8x8(s2, s3, s4, s5, s6, s7, s8, s9, x_filter,
- shift_round_0, shift_by_bits);
- t3 = convolve8_horiz_8x8(s3, s4, s5, s6, s7, s8, s9, s10, x_filter,
- shift_round_0, shift_by_bits);
-
- transpose_u8_8x4(&t0, &t1, &t2, &t3);
-
- if ((w == 4) && (h > 4)) {
- vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(t0),
- 0); // 00 01 02 03
- dst += dst_stride;
- vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(t1),
- 0); // 10 11 12 13
- dst += dst_stride;
- vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(t2),
- 0); // 20 21 22 23
- dst += dst_stride;
- vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(t3),
- 0); // 30 31 32 33
- dst += dst_stride;
- vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(t0),
- 1); // 40 41 42 43
- dst += dst_stride;
- vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(t1),
- 1); // 50 51 52 53
- dst += dst_stride;
- vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(t2),
- 1); // 60 61 62 63
- dst += dst_stride;
- vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(t3),
- 1); // 70 71 72 73
- dst += dst_stride;
- } else if ((w == 4) && (h == 2)) {
- vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(t0),
- 0); // 00 01 02 03
- dst += dst_stride;
- vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(t1),
- 0); // 10 11 12 13
- dst += dst_stride;
- } else if ((w == 2) && (h > 4)) {
- vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(t0), 0); // 00 01
- dst += dst_stride;
- vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(t1), 0); // 10 11
- dst += dst_stride;
- vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(t2), 0); // 20 21
- dst += dst_stride;
- vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(t3), 0); // 30 31
- dst += dst_stride;
- vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(t0), 2); // 40 41
- dst += dst_stride;
- vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(t1), 2); // 50 51
- dst += dst_stride;
- vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(t2), 2); // 60 61
- dst += dst_stride;
- vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(t3), 2); // 70 71
- dst += dst_stride;
- } else if ((w == 2) && (h == 2)) {
- vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(t0), 0); // 00 01
- dst += dst_stride;
- vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(t1), 0); // 10 11
- dst += dst_stride;
- }
- h -= 8;
- } while (h > 0);
-#else
- int16x8_t tt0;
- int16x4_t x0, x1, x2, x3, x4, x5, x6, x7;
- const int16x4_t shift_round_0_low = vget_low_s16(shift_round_0);
- const int16x4_t shift_by_bits_low = vget_low_s16(shift_by_bits);
- do {
- t0 = vld1_u8(src); // a0 a1 a2 a3 a4 a5 a6 a7
- tt0 = vreinterpretq_s16_u16(vmovl_u8(t0));
- x0 = vget_low_s16(tt0); // a0 a1 a2 a3
- x4 = vget_high_s16(tt0); // a4 a5 a6 a7
-
- t0 = vld1_u8(src + 8); // a8 a9 a10 a11 a12 a13 a14 a15
- tt0 = vreinterpretq_s16_u16(vmovl_u8(t0));
- x7 = vget_low_s16(tt0); // a8 a9 a10 a11
-
- x1 = vext_s16(x0, x4, 1); // a1 a2 a3 a4
- x2 = vext_s16(x0, x4, 2); // a2 a3 a4 a5
- x3 = vext_s16(x0, x4, 3); // a3 a4 a5 a6
- x5 = vext_s16(x4, x7, 1); // a5 a6 a7 a8
- x6 = vext_s16(x4, x7, 2); // a6 a7 a8 a9
- x7 = vext_s16(x4, x7, 3); // a7 a8 a9 a10
-
- src += src_stride;
-
- t0 = convolve8_horiz_4x1(x0, x1, x2, x3, x4, x5, x6, x7, x_filter,
- shift_round_0_low, shift_by_bits_low);
-
- if (w == 4) {
- vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(t0),
- 0); // 00 01 02 03
- dst += dst_stride;
- } else if (w == 2) {
- vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(t0), 0); // 00 01
- dst += dst_stride;
- }
- h -= 1;
- } while (h > 0);
-#endif
- } else {
- uint8_t *d;
- int16x8_t s11;
-#if defined(__aarch64__)
- int16x8_t s12, s13, s14;
- do {
- __builtin_prefetch(src + 0 * src_stride);
- __builtin_prefetch(src + 1 * src_stride);
- __builtin_prefetch(src + 2 * src_stride);
- __builtin_prefetch(src + 3 * src_stride);
- __builtin_prefetch(src + 4 * src_stride);
- __builtin_prefetch(src + 5 * src_stride);
- __builtin_prefetch(src + 6 * src_stride);
- __builtin_prefetch(src + 7 * src_stride);
- load_u8_8x8(src, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
- transpose_u8_8x8(&t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
- s0 = vreinterpretq_s16_u16(vmovl_u8(t0));
- s1 = vreinterpretq_s16_u16(vmovl_u8(t1));
- s2 = vreinterpretq_s16_u16(vmovl_u8(t2));
- s3 = vreinterpretq_s16_u16(vmovl_u8(t3));
- s4 = vreinterpretq_s16_u16(vmovl_u8(t4));
- s5 = vreinterpretq_s16_u16(vmovl_u8(t5));
- s6 = vreinterpretq_s16_u16(vmovl_u8(t6));
-
- width = w;
- s = src + 7;
- d = dst;
- __builtin_prefetch(dst + 0 * dst_stride);
- __builtin_prefetch(dst + 1 * dst_stride);
- __builtin_prefetch(dst + 2 * dst_stride);
- __builtin_prefetch(dst + 3 * dst_stride);
- __builtin_prefetch(dst + 4 * dst_stride);
- __builtin_prefetch(dst + 5 * dst_stride);
- __builtin_prefetch(dst + 6 * dst_stride);
- __builtin_prefetch(dst + 7 * dst_stride);
-
- do {
- load_u8_8x8(s, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
- transpose_u8_8x8(&t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
- s7 = vreinterpretq_s16_u16(vmovl_u8(t0));
- s8 = vreinterpretq_s16_u16(vmovl_u8(t1));
- s9 = vreinterpretq_s16_u16(vmovl_u8(t2));
- s10 = vreinterpretq_s16_u16(vmovl_u8(t3));
- s11 = vreinterpretq_s16_u16(vmovl_u8(t4));
- s12 = vreinterpretq_s16_u16(vmovl_u8(t5));
- s13 = vreinterpretq_s16_u16(vmovl_u8(t6));
- s14 = vreinterpretq_s16_u16(vmovl_u8(t7));
-
- t0 = convolve8_horiz_8x8(s0, s1, s2, s3, s4, s5, s6, s7, x_filter,
- shift_round_0, shift_by_bits);
-
- t1 = convolve8_horiz_8x8(s1, s2, s3, s4, s5, s6, s7, s8, x_filter,
- shift_round_0, shift_by_bits);
-
- t2 = convolve8_horiz_8x8(s2, s3, s4, s5, s6, s7, s8, s9, x_filter,
- shift_round_0, shift_by_bits);
-
- t3 = convolve8_horiz_8x8(s3, s4, s5, s6, s7, s8, s9, s10, x_filter,
- shift_round_0, shift_by_bits);
-
- t4 = convolve8_horiz_8x8(s4, s5, s6, s7, s8, s9, s10, s11, x_filter,
- shift_round_0, shift_by_bits);
-
- t5 = convolve8_horiz_8x8(s5, s6, s7, s8, s9, s10, s11, s12, x_filter,
- shift_round_0, shift_by_bits);
-
- t6 = convolve8_horiz_8x8(s6, s7, s8, s9, s10, s11, s12, s13, x_filter,
- shift_round_0, shift_by_bits);
-
- t7 = convolve8_horiz_8x8(s7, s8, s9, s10, s11, s12, s13, s14,
- x_filter, shift_round_0, shift_by_bits);
-
- transpose_u8_8x8(&t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
- if (h != 2) {
- store_u8_8x8(d, dst_stride, t0, t1, t2, t3, t4, t5, t6, t7);
- } else {
- store_row2_u8_8x8(d, dst_stride, t0, t1);
- }
- s0 = s8;
- s1 = s9;
- s2 = s10;
- s3 = s11;
- s4 = s12;
- s5 = s13;
- s6 = s14;
- s += 8;
- d += 8;
- width -= 8;
- } while (width > 0);
- src += 8 * src_stride;
- dst += 8 * dst_stride;
- h -= 8;
- } while (h > 0);
-#else
- do {
- t0 = vld1_u8(src); // a0 a1 a2 a3 a4 a5 a6 a7
- s0 = vreinterpretq_s16_u16(vmovl_u8(t0));
-
- width = w;
- s = src + 8;
- d = dst;
- __builtin_prefetch(dst);
-
- do {
- t0 = vld1_u8(s); // a8 a9 a10 a11 a12 a13 a14 a15
- s7 = vreinterpretq_s16_u16(vmovl_u8(t0));
- s11 = s0;
- s0 = s7;
-
- s1 = vextq_s16(s11, s7, 1); // a1 a2 a3 a4 a5 a6 a7 a8
- s2 = vextq_s16(s11, s7, 2); // a2 a3 a4 a5 a6 a7 a8 a9
- s3 = vextq_s16(s11, s7, 3); // a3 a4 a5 a6 a7 a8 a9 a10
- s4 = vextq_s16(s11, s7, 4); // a4 a5 a6 a7 a8 a9 a10 a11
- s5 = vextq_s16(s11, s7, 5); // a5 a6 a7 a8 a9 a10 a11 a12
- s6 = vextq_s16(s11, s7, 6); // a6 a7 a8 a9 a10 a11 a12 a13
- s7 = vextq_s16(s11, s7, 7); // a7 a8 a9 a10 a11 a12 a13 a14
-
- t0 = convolve8_horiz_8x8(s11, s1, s2, s3, s4, s5, s6, s7, x_filter,
- shift_round_0, shift_by_bits);
- vst1_u8(d, t0);
-
- s += 8;
- d += 8;
- width -= 8;
- } while (width > 0);
- src += src_stride;
- dst += dst_stride;
- h -= 1;
- } while (h > 0);
-#endif
- }
-#if defined(__aarch64__)
- }
-#endif
-}
-
-void av1_convolve_y_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst,
- int dst_stride, int w, int h,
- const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y,
- const int subpel_x_q4, const int subpel_y_q4,
- ConvolveParams *conv_params) {
- const int vert_offset = filter_params_y->taps / 2 - 1;
-
- src -= vert_offset * src_stride;
-
- (void)filter_params_x;
- (void)subpel_x_q4;
- (void)conv_params;
-
- assert(conv_params->round_0 <= FILTER_BITS);
- assert(((conv_params->round_0 + conv_params->round_1) <= (FILTER_BITS + 1)) ||
- ((conv_params->round_0 + conv_params->round_1) == (2 * FILTER_BITS)));
-
- const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
- filter_params_y, subpel_y_q4 & SUBPEL_MASK);
-
- if (w <= 4) {
- uint8x8_t d01;
- int16x4_t s0, s1, s2, s3, s4, s5, s6, s7, d0;
-#if defined(__aarch64__)
- uint8x8_t d23;
- int16x4_t s8, s9, s10, d1, d2, d3;
-#endif
- s0 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src))));
- src += src_stride;
- s1 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src))));
- src += src_stride;
- s2 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src))));
- src += src_stride;
- s3 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src))));
- src += src_stride;
- s4 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src))));
- src += src_stride;
- s5 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src))));
- src += src_stride;
- s6 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src))));
- src += src_stride;
-
- do {
- s7 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src))));
- src += src_stride;
-#if defined(__aarch64__)
- s8 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src))));
- src += src_stride;
- s9 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src))));
- src += src_stride;
- s10 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src))));
- src += src_stride;
-
- __builtin_prefetch(dst + 0 * dst_stride);
- __builtin_prefetch(dst + 1 * dst_stride);
- __builtin_prefetch(dst + 2 * dst_stride);
- __builtin_prefetch(dst + 3 * dst_stride);
- __builtin_prefetch(src + 0 * src_stride);
- __builtin_prefetch(src + 1 * src_stride);
- __builtin_prefetch(src + 2 * src_stride);
- __builtin_prefetch(src + 3 * src_stride);
- d0 = convolve8_4x4(s0, s1, s2, s3, s4, s5, s6, s7, y_filter);
- d1 = convolve8_4x4(s1, s2, s3, s4, s5, s6, s7, s8, y_filter);
- d2 = convolve8_4x4(s2, s3, s4, s5, s6, s7, s8, s9, y_filter);
- d3 = convolve8_4x4(s3, s4, s5, s6, s7, s8, s9, s10, y_filter);
-
- d01 = vqrshrun_n_s16(vcombine_s16(d0, d1), FILTER_BITS);
- d23 = vqrshrun_n_s16(vcombine_s16(d2, d3), FILTER_BITS);
- if ((w == 4) && (h != 2)) {
- vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d01),
- 0); // 00 01 02 03
- dst += dst_stride;
- vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d01),
- 1); // 10 11 12 13
- dst += dst_stride;
- vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d23),
- 0); // 20 21 22 23
- dst += dst_stride;
- vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d23),
- 1); // 30 31 32 33
- dst += dst_stride;
- } else if ((w == 4) && (h == 2)) {
- vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d01),
- 0); // 00 01 02 03
- dst += dst_stride;
- vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d01),
- 1); // 10 11 12 13
- dst += dst_stride;
- } else if ((w == 2) && (h != 2)) {
- vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(d01), 0); // 00 01
- dst += dst_stride;
- vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(d01), 2); // 10 11
- dst += dst_stride;
- vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(d23), 0); // 20 21
- dst += dst_stride;
- vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(d23), 2); // 30 31
- dst += dst_stride;
- } else if ((w == 2) && (h == 2)) {
- vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(d01), 0); // 00 01
- dst += dst_stride;
- vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(d01), 2); // 10 11
- dst += dst_stride;
- }
- s0 = s4;
- s1 = s5;
- s2 = s6;
- s3 = s7;
- s4 = s8;
- s5 = s9;
- s6 = s10;
- h -= 4;
-#else
- __builtin_prefetch(dst + 0 * dst_stride);
- __builtin_prefetch(src + 0 * src_stride);
-
- d0 = convolve8_4x4(s0, s1, s2, s3, s4, s5, s6, s7, y_filter);
-
- d01 = vqrshrun_n_s16(vcombine_s16(d0, d0), FILTER_BITS);
-
- if (w == 4) {
- vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d01), 0);
- dst += dst_stride;
- } else if (w == 2) {
- vst1_lane_u16((uint16_t *)dst, vreinterpret_u16_u8(d01), 0);
- dst += dst_stride;
- }
- s0 = s1;
- s1 = s2;
- s2 = s3;
- s3 = s4;
- s4 = s5;
- s5 = s6;
- s6 = s7;
- h -= 1;
-#endif
- } while (h > 0);
- } else {
- int height;
- const uint8_t *s;
- uint8_t *d;
- uint8x8_t t0;
- int16x8_t s0, s1, s2, s3, s4, s5, s6, s7;
-#if defined(__aarch64__)
- uint8x8_t t1, t2, t3;
- int16x8_t s8, s9, s10;
-#endif
- do {
- __builtin_prefetch(src + 0 * src_stride);
- __builtin_prefetch(src + 1 * src_stride);
- __builtin_prefetch(src + 2 * src_stride);
- __builtin_prefetch(src + 3 * src_stride);
- __builtin_prefetch(src + 4 * src_stride);
- __builtin_prefetch(src + 5 * src_stride);
- __builtin_prefetch(src + 6 * src_stride);
- s = src;
- s0 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s)));
- s += src_stride;
- s1 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s)));
- s += src_stride;
- s2 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s)));
- s += src_stride;
- s3 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s)));
- s += src_stride;
- s4 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s)));
- s += src_stride;
- s5 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s)));
- s += src_stride;
- s6 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s)));
- s += src_stride;
- d = dst;
- height = h;
-
- do {
- s7 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s)));
- s += src_stride;
-#if defined(__aarch64__)
- s8 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s)));
- s += src_stride;
- s9 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s)));
- s += src_stride;
- s10 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s)));
- s += src_stride;
-
- __builtin_prefetch(d + 0 * dst_stride);
- __builtin_prefetch(d + 1 * dst_stride);
- __builtin_prefetch(d + 2 * dst_stride);
- __builtin_prefetch(d + 3 * dst_stride);
- __builtin_prefetch(s + 0 * src_stride);
- __builtin_prefetch(s + 1 * src_stride);
- __builtin_prefetch(s + 2 * src_stride);
- __builtin_prefetch(s + 3 * src_stride);
- t0 = convolve8_vert_8x4(s0, s1, s2, s3, s4, s5, s6, s7, y_filter);
- t1 = convolve8_vert_8x4(s1, s2, s3, s4, s5, s6, s7, s8, y_filter);
- t2 = convolve8_vert_8x4(s2, s3, s4, s5, s6, s7, s8, s9, y_filter);
- t3 = convolve8_vert_8x4(s3, s4, s5, s6, s7, s8, s9, s10, y_filter);
- if (h != 2) {
- vst1_u8(d, t0);
- d += dst_stride;
- vst1_u8(d, t1);
- d += dst_stride;
- vst1_u8(d, t2);
- d += dst_stride;
- vst1_u8(d, t3);
- d += dst_stride;
- } else {
- vst1_u8(d, t0);
- d += dst_stride;
- vst1_u8(d, t1);
- d += dst_stride;
- }
- s0 = s4;
- s1 = s5;
- s2 = s6;
- s3 = s7;
- s4 = s8;
- s5 = s9;
- s6 = s10;
- height -= 4;
-#else
- __builtin_prefetch(d);
- __builtin_prefetch(s);
-
- t0 = convolve8_vert_8x4(s0, s1, s2, s3, s4, s5, s6, s7, y_filter);
-
- vst1_u8(d, t0);
- d += dst_stride;
-
- s0 = s1;
- s1 = s2;
- s2 = s3;
- s3 = s4;
- s4 = s5;
- s5 = s6;
- s6 = s7;
- height -= 1;
-#endif
- } while (height > 0);
- src += 8;
- dst += 8;
- w -= 8;
- } while (w > 0);
- }
-}
-
-void av1_convolve_2d_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst,
- int dst_stride, int w, int h,
- const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y,
- const int subpel_x_q4, const int subpel_y_q4,
- ConvolveParams *conv_params) {
- int im_dst_stride;
- int width, height;
- uint8x8_t t0;
-#if defined(__aarch64__)
- uint8x8_t t1, t2, t3, t4, t5, t6, t7;
-#endif
-
- DECLARE_ALIGNED(16, int16_t,
- im_block[(MAX_SB_SIZE + HORIZ_EXTRA_ROWS) * MAX_SB_SIZE]);
-
- const int bd = 8;
- const int im_h = h + filter_params_y->taps - 1;
- const int im_stride = MAX_SB_SIZE;
- const int vert_offset = filter_params_y->taps / 2 - 1;
- const int horiz_offset = filter_params_x->taps / 2 - 1;
-
- const uint8_t *src_ptr = src - vert_offset * src_stride - horiz_offset;
- const uint8_t *s;
- int16_t *dst_ptr;
-
- dst_ptr = im_block;
- im_dst_stride = im_stride;
- height = im_h;
- width = w;
-
- const int16_t round_bits =
- FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1;
- const int16x8_t vec_round_bits = vdupq_n_s16(-round_bits);
- const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
- const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
- filter_params_x, subpel_x_q4 & SUBPEL_MASK);
-
- int16_t x_filter_tmp[8];
- int16x8_t filter_x_coef = vld1q_s16(x_filter);
-
- // filter coeffs are even, so downshifting by 1 to reduce intermediate
- // precision requirements.
- filter_x_coef = vshrq_n_s16(filter_x_coef, 1);
- vst1q_s16(&x_filter_tmp[0], filter_x_coef);
-
- assert(conv_params->round_0 > 0);
-
- if (w <= 4) {
- int16x4_t s0, s1, s2, s3, s4, s5, s6, s7, d0;
-#if defined(__aarch64__)
- int16x4_t s8, s9, s10, d1, d2, d3;
-#endif
-
- const int16x4_t horiz_const = vdup_n_s16((1 << (bd + FILTER_BITS - 2)));
- const int16x4_t shift_round_0 = vdup_n_s16(-(conv_params->round_0 - 1));
-
- do {
- s = src_ptr;
-
-#if defined(__aarch64__)
- __builtin_prefetch(s + 0 * src_stride);
- __builtin_prefetch(s + 1 * src_stride);
- __builtin_prefetch(s + 2 * src_stride);
- __builtin_prefetch(s + 3 * src_stride);
-
- load_u8_8x4(s, src_stride, &t0, &t1, &t2, &t3);
- transpose_u8_8x4(&t0, &t1, &t2, &t3);
-
- s0 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t0)));
- s1 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t1)));
- s2 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t2)));
- s3 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t3)));
- s4 = vget_high_s16(vreinterpretq_s16_u16(vmovl_u8(t0)));
- s5 = vget_high_s16(vreinterpretq_s16_u16(vmovl_u8(t1)));
- s6 = vget_high_s16(vreinterpretq_s16_u16(vmovl_u8(t2)));
-
- __builtin_prefetch(dst_ptr + 0 * im_dst_stride);
- __builtin_prefetch(dst_ptr + 1 * im_dst_stride);
- __builtin_prefetch(dst_ptr + 2 * im_dst_stride);
- __builtin_prefetch(dst_ptr + 3 * im_dst_stride);
- s += 7;
-
- load_u8_8x4(s, src_stride, &t0, &t1, &t2, &t3);
- transpose_u8_8x4(&t0, &t1, &t2, &t3);
-
- s7 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t0)));
- s8 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t1)));
- s9 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t2)));
- s10 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t3)));
-
- d0 = convolve8_4x4_s16(s0, s1, s2, s3, s4, s5, s6, s7, x_filter_tmp,
- horiz_const, shift_round_0);
- d1 = convolve8_4x4_s16(s1, s2, s3, s4, s5, s6, s7, s8, x_filter_tmp,
- horiz_const, shift_round_0);
- d2 = convolve8_4x4_s16(s2, s3, s4, s5, s6, s7, s8, s9, x_filter_tmp,
- horiz_const, shift_round_0);
- d3 = convolve8_4x4_s16(s3, s4, s5, s6, s7, s8, s9, s10, x_filter_tmp,
- horiz_const, shift_round_0);
-
- transpose_s16_4x4d(&d0, &d1, &d2, &d3);
- if (w == 4) {
- vst1_s16((dst_ptr + 0 * im_dst_stride), d0);
- vst1_s16((dst_ptr + 1 * im_dst_stride), d1);
- vst1_s16((dst_ptr + 2 * im_dst_stride), d2);
- vst1_s16((dst_ptr + 3 * im_dst_stride), d3);
- } else if (w == 2) {
- vst1_lane_u32((uint32_t *)(dst_ptr + 0 * im_dst_stride),
- vreinterpret_u32_s16(d0), 0);
- vst1_lane_u32((uint32_t *)(dst_ptr + 1 * im_dst_stride),
- vreinterpret_u32_s16(d1), 0);
- vst1_lane_u32((uint32_t *)(dst_ptr + 2 * im_dst_stride),
- vreinterpret_u32_s16(d2), 0);
- vst1_lane_u32((uint32_t *)(dst_ptr + 3 * im_dst_stride),
- vreinterpret_u32_s16(d3), 0);
- }
- src_ptr += 4 * src_stride;
- dst_ptr += 4 * im_dst_stride;
- height -= 4;
-#else
- int16x8_t tt0;
-
- __builtin_prefetch(s);
-
- t0 = vld1_u8(s); // a0 a1 a2 a3 a4 a5 a6 a7
- tt0 = vreinterpretq_s16_u16(vmovl_u8(t0));
- s0 = vget_low_s16(tt0);
- s4 = vget_high_s16(tt0);
-
- __builtin_prefetch(dst_ptr);
- s += 8;
-
- t0 = vld1_u8(s); // a8 a9 a10 a11 a12 a13 a14 a15
- s7 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t0)));
-
- s1 = vext_s16(s0, s4, 1); // a1 a2 a3 a4
- s2 = vext_s16(s0, s4, 2); // a2 a3 a4 a5
- s3 = vext_s16(s0, s4, 3); // a3 a4 a5 a6
- s5 = vext_s16(s4, s7, 1); // a5 a6 a7 a8
- s6 = vext_s16(s4, s7, 2); // a6 a7 a8 a9
- s7 = vext_s16(s4, s7, 3); // a7 a8 a9 a10
-
- d0 = convolve8_4x4_s16(s0, s1, s2, s3, s4, s5, s6, s7, x_filter_tmp,
- horiz_const, shift_round_0);
-
- if (w == 4) {
- vst1_s16(dst_ptr, d0);
- dst_ptr += im_dst_stride;
- } else if (w == 2) {
- vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_s16(d0), 0);
- dst_ptr += im_dst_stride;
- }
-
- src_ptr += src_stride;
- height -= 1;
-#endif
- } while (height > 0);
- } else {
- int16_t *d_tmp;
- int16x8_t s0, s1, s2, s3, s4, s5, s6, s7, res0;
-#if defined(__aarch64__)
- int16x8_t s8, s9, s10, res1, res2, res3, res4, res5, res6, res7;
- int16x8_t s11, s12, s13, s14;
-#endif
-
- const int16x8_t horiz_const = vdupq_n_s16((1 << (bd + FILTER_BITS - 2)));
- const int16x8_t shift_round_0 = vdupq_n_s16(-(conv_params->round_0 - 1));
-
-#if defined(__aarch64__)
- do {
- __builtin_prefetch(src_ptr + 0 * src_stride);
- __builtin_prefetch(src_ptr + 1 * src_stride);
- __builtin_prefetch(src_ptr + 2 * src_stride);
- __builtin_prefetch(src_ptr + 3 * src_stride);
- __builtin_prefetch(src_ptr + 4 * src_stride);
- __builtin_prefetch(src_ptr + 5 * src_stride);
- __builtin_prefetch(src_ptr + 6 * src_stride);
- __builtin_prefetch(src_ptr + 7 * src_stride);
-
- load_u8_8x8(src_ptr, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
-
- transpose_u8_8x8(&t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
-
- s0 = vreinterpretq_s16_u16(vmovl_u8(t0));
- s1 = vreinterpretq_s16_u16(vmovl_u8(t1));
- s2 = vreinterpretq_s16_u16(vmovl_u8(t2));
- s3 = vreinterpretq_s16_u16(vmovl_u8(t3));
- s4 = vreinterpretq_s16_u16(vmovl_u8(t4));
- s5 = vreinterpretq_s16_u16(vmovl_u8(t5));
- s6 = vreinterpretq_s16_u16(vmovl_u8(t6));
-
- width = w;
- s = src_ptr + 7;
- d_tmp = dst_ptr;
-
- __builtin_prefetch(dst_ptr + 0 * im_dst_stride);
- __builtin_prefetch(dst_ptr + 1 * im_dst_stride);
- __builtin_prefetch(dst_ptr + 2 * im_dst_stride);
- __builtin_prefetch(dst_ptr + 3 * im_dst_stride);
- __builtin_prefetch(dst_ptr + 4 * im_dst_stride);
- __builtin_prefetch(dst_ptr + 5 * im_dst_stride);
- __builtin_prefetch(dst_ptr + 6 * im_dst_stride);
- __builtin_prefetch(dst_ptr + 7 * im_dst_stride);
-
- do {
- load_u8_8x8(s, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
-
- transpose_u8_8x8(&t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
-
- s7 = vreinterpretq_s16_u16(vmovl_u8(t0));
- s8 = vreinterpretq_s16_u16(vmovl_u8(t1));
- s9 = vreinterpretq_s16_u16(vmovl_u8(t2));
- s10 = vreinterpretq_s16_u16(vmovl_u8(t3));
- s11 = vreinterpretq_s16_u16(vmovl_u8(t4));
- s12 = vreinterpretq_s16_u16(vmovl_u8(t5));
- s13 = vreinterpretq_s16_u16(vmovl_u8(t6));
- s14 = vreinterpretq_s16_u16(vmovl_u8(t7));
-
- res0 = convolve8_8x8_s16(s0, s1, s2, s3, s4, s5, s6, s7, x_filter_tmp,
- horiz_const, shift_round_0);
- res1 = convolve8_8x8_s16(s1, s2, s3, s4, s5, s6, s7, s8, x_filter_tmp,
- horiz_const, shift_round_0);
- res2 = convolve8_8x8_s16(s2, s3, s4, s5, s6, s7, s8, s9, x_filter_tmp,
- horiz_const, shift_round_0);
- res3 = convolve8_8x8_s16(s3, s4, s5, s6, s7, s8, s9, s10, x_filter_tmp,
- horiz_const, shift_round_0);
- res4 = convolve8_8x8_s16(s4, s5, s6, s7, s8, s9, s10, s11, x_filter_tmp,
- horiz_const, shift_round_0);
- res5 = convolve8_8x8_s16(s5, s6, s7, s8, s9, s10, s11, s12,
- x_filter_tmp, horiz_const, shift_round_0);
- res6 = convolve8_8x8_s16(s6, s7, s8, s9, s10, s11, s12, s13,
- x_filter_tmp, horiz_const, shift_round_0);
- res7 = convolve8_8x8_s16(s7, s8, s9, s10, s11, s12, s13, s14,
- x_filter_tmp, horiz_const, shift_round_0);
-
- transpose_s16_8x8(&res0, &res1, &res2, &res3, &res4, &res5, &res6,
- &res7);
-
- store_s16_8x8(d_tmp, im_dst_stride, res0, res1, res2, res3, res4, res5,
- res6, res7);
-
- s0 = s8;
- s1 = s9;
- s2 = s10;
- s3 = s11;
- s4 = s12;
- s5 = s13;
- s6 = s14;
- s += 8;
- d_tmp += 8;
- width -= 8;
- } while (width > 0);
- src_ptr += 8 * src_stride;
- dst_ptr += 8 * im_dst_stride;
- height -= 8;
- } while (height > 0);
-#else
- do {
- t0 = vld1_u8(src_ptr);
- s0 = vreinterpretq_s16_u16(vmovl_u8(t0)); // a0 a1 a2 a3 a4 a5 a6 a7
-
- width = w;
- s = src_ptr + 8;
- d_tmp = dst_ptr;
-
- __builtin_prefetch(dst_ptr);
-
- do {
- t0 = vld1_u8(s); // a8 a9 a10 a11 a12 a13 a14 a15
- s7 = vreinterpretq_s16_u16(vmovl_u8(t0));
- int16x8_t sum = s0;
- s0 = s7;
-
- s1 = vextq_s16(sum, s7, 1); // a1 a2 a3 a4 a5 a6 a7 a8
- s2 = vextq_s16(sum, s7, 2); // a2 a3 a4 a5 a6 a7 a8 a9
- s3 = vextq_s16(sum, s7, 3); // a3 a4 a5 a6 a7 a8 a9 a10
- s4 = vextq_s16(sum, s7, 4); // a4 a5 a6 a7 a8 a9 a10 a11
- s5 = vextq_s16(sum, s7, 5); // a5 a6 a7 a8 a9 a10 a11 a12
- s6 = vextq_s16(sum, s7, 6); // a6 a7 a8 a9 a10 a11 a12 a13
- s7 = vextq_s16(sum, s7, 7); // a7 a8 a9 a10 a11 a12 a13 a14
-
- res0 = convolve8_8x8_s16(sum, s1, s2, s3, s4, s5, s6, s7, x_filter_tmp,
- horiz_const, shift_round_0);
-
- vst1q_s16(d_tmp, res0);
-
- s += 8;
- d_tmp += 8;
- width -= 8;
- } while (width > 0);
- src_ptr += src_stride;
- dst_ptr += im_dst_stride;
- height -= 1;
- } while (height > 0);
-#endif
- }
-
- // vertical
- {
- uint8_t *dst_u8_ptr, *d_u8;
- int16_t *v_src_ptr, *v_s;
-
- const int32_t sub_const = (1 << (offset_bits - conv_params->round_1)) +
- (1 << (offset_bits - conv_params->round_1 - 1));
- const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
- filter_params_y, subpel_y_q4 & SUBPEL_MASK);
-
- const int32x4_t round_shift_vec = vdupq_n_s32(-(conv_params->round_1));
- const int32x4_t offset_const = vdupq_n_s32(1 << offset_bits);
- const int32x4_t sub_const_vec = vdupq_n_s32(sub_const);
-
- src_stride = im_stride;
- v_src_ptr = im_block;
- dst_u8_ptr = dst;
-
- height = h;
- width = w;
-
- if (width <= 4) {
- int16x4_t s0, s1, s2, s3, s4, s5, s6, s7;
- uint16x4_t d0;
- uint16x8_t dd0;
- uint8x8_t d01;
-
-#if defined(__aarch64__)
- int16x4_t s8, s9, s10;
- uint16x4_t d1, d2, d3;
- uint16x8_t dd1;
- uint8x8_t d23;
-#endif
-
- d_u8 = dst_u8_ptr;
- v_s = v_src_ptr;
-
- __builtin_prefetch(v_s + 0 * im_stride);
- __builtin_prefetch(v_s + 1 * im_stride);
- __builtin_prefetch(v_s + 2 * im_stride);
- __builtin_prefetch(v_s + 3 * im_stride);
- __builtin_prefetch(v_s + 4 * im_stride);
- __builtin_prefetch(v_s + 5 * im_stride);
- __builtin_prefetch(v_s + 6 * im_stride);
- __builtin_prefetch(v_s + 7 * im_stride);
-
- load_s16_4x8(v_s, im_stride, &s0, &s1, &s2, &s3, &s4, &s5, &s6, &s7);
- v_s += (7 * im_stride);
-
- do {
-#if defined(__aarch64__)
- load_s16_4x4(v_s, im_stride, &s7, &s8, &s9, &s10);
- v_s += (im_stride << 2);
-
- __builtin_prefetch(d_u8 + 0 * dst_stride);
- __builtin_prefetch(d_u8 + 1 * dst_stride);
- __builtin_prefetch(d_u8 + 2 * dst_stride);
- __builtin_prefetch(d_u8 + 3 * dst_stride);
-
- d0 = convolve8_vert_4x4_s32(s0, s1, s2, s3, s4, s5, s6, s7, y_filter,
- round_shift_vec, offset_const,
- sub_const_vec);
- d1 = convolve8_vert_4x4_s32(s1, s2, s3, s4, s5, s6, s7, s8, y_filter,
- round_shift_vec, offset_const,
- sub_const_vec);
- d2 = convolve8_vert_4x4_s32(s2, s3, s4, s5, s6, s7, s8, s9, y_filter,
- round_shift_vec, offset_const,
- sub_const_vec);
- d3 = convolve8_vert_4x4_s32(s3, s4, s5, s6, s7, s8, s9, s10, y_filter,
- round_shift_vec, offset_const,
- sub_const_vec);
-
- dd0 = vqrshlq_u16(vcombine_u16(d0, d1), vec_round_bits);
- dd1 = vqrshlq_u16(vcombine_u16(d2, d3), vec_round_bits);
-
- d01 = vqmovn_u16(dd0);
- d23 = vqmovn_u16(dd1);
-
- if ((w == 4) && (h != 2)) {
- vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(d01),
- 0); // 00 01 02 03
- d_u8 += dst_stride;
- vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(d01),
- 1); // 10 11 12 13
- d_u8 += dst_stride;
- vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(d23),
- 0); // 20 21 22 23
- d_u8 += dst_stride;
- vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(d23),
- 1); // 30 31 32 33
- d_u8 += dst_stride;
- } else if ((w == 2) && (h != 2)) {
- vst1_lane_u16((uint16_t *)d_u8, vreinterpret_u16_u8(d01),
- 0); // 00 01
- d_u8 += dst_stride;
- vst1_lane_u16((uint16_t *)d_u8, vreinterpret_u16_u8(d01),
- 2); // 10 11
- d_u8 += dst_stride;
- vst1_lane_u16((uint16_t *)d_u8, vreinterpret_u16_u8(d23),
- 0); // 20 21
- d_u8 += dst_stride;
- vst1_lane_u16((uint16_t *)d_u8, vreinterpret_u16_u8(d23),
- 2); // 30 31
- d_u8 += dst_stride;
- } else if ((w == 4) && (h == 2)) {
- vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(d01),
- 0); // 00 01 02 03
- d_u8 += dst_stride;
- vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(d01),
- 1); // 10 11 12 13
- d_u8 += dst_stride;
- } else if ((w == 2) && (h == 2)) {
- vst1_lane_u16((uint16_t *)d_u8, vreinterpret_u16_u8(d01),
- 0); // 00 01
- d_u8 += dst_stride;
- vst1_lane_u16((uint16_t *)d_u8, vreinterpret_u16_u8(d01),
- 2); // 10 11
- d_u8 += dst_stride;
- }
-
- s0 = s4;
- s1 = s5;
- s2 = s6;
- s3 = s7;
- s4 = s8;
- s5 = s9;
- s6 = s10;
- height -= 4;
-#else
- s7 = vld1_s16(v_s);
- v_s += im_stride;
-
- __builtin_prefetch(d_u8 + 0 * dst_stride);
-
- d0 = convolve8_vert_4x4_s32(s0, s1, s2, s3, s4, s5, s6, s7, y_filter,
- round_shift_vec, offset_const,
- sub_const_vec);
-
- dd0 = vqrshlq_u16(vcombine_u16(d0, d0), vec_round_bits);
- d01 = vqmovn_u16(dd0);
-
- if (w == 4) {
- vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(d01),
- 0); // 00 01 02 03
- d_u8 += dst_stride;
-
- } else if (w == 2) {
- vst1_lane_u16((uint16_t *)d_u8, vreinterpret_u16_u8(d01),
- 0); // 00 01
- d_u8 += dst_stride;
- }
-
- s0 = s1;
- s1 = s2;
- s2 = s3;
- s3 = s4;
- s4 = s5;
- s5 = s6;
- s6 = s7;
- height -= 1;
-#endif
- } while (height > 0);
- } else {
- // if width is a multiple of 8 & height is a multiple of 4
- int16x8_t s0, s1, s2, s3, s4, s5, s6, s7;
- uint8x8_t res0;
-#if defined(__aarch64__)
- int16x8_t s8, s9, s10;
- uint8x8_t res1, res2, res3;
-#endif
-
- do {
- __builtin_prefetch(v_src_ptr + 0 * im_stride);
- __builtin_prefetch(v_src_ptr + 1 * im_stride);
- __builtin_prefetch(v_src_ptr + 2 * im_stride);
- __builtin_prefetch(v_src_ptr + 3 * im_stride);
- __builtin_prefetch(v_src_ptr + 4 * im_stride);
- __builtin_prefetch(v_src_ptr + 5 * im_stride);
- __builtin_prefetch(v_src_ptr + 6 * im_stride);
- __builtin_prefetch(v_src_ptr + 7 * im_stride);
-
- v_s = v_src_ptr;
- load_s16_8x8(v_s, im_stride, &s0, &s1, &s2, &s3, &s4, &s5, &s6, &s7);
- v_s += (7 * im_stride);
-
- d_u8 = dst_u8_ptr;
- height = h;
-
- do {
-#if defined(__aarch64__)
- load_s16_8x4(v_s, im_stride, &s7, &s8, &s9, &s10);
- v_s += (im_stride << 2);
-
- __builtin_prefetch(d_u8 + 4 * dst_stride);
- __builtin_prefetch(d_u8 + 5 * dst_stride);
- __builtin_prefetch(d_u8 + 6 * dst_stride);
- __builtin_prefetch(d_u8 + 7 * dst_stride);
-
- res0 = convolve8_vert_8x4_s32(s0, s1, s2, s3, s4, s5, s6, s7,
- y_filter, round_shift_vec, offset_const,
- sub_const_vec, vec_round_bits);
- res1 = convolve8_vert_8x4_s32(s1, s2, s3, s4, s5, s6, s7, s8,
- y_filter, round_shift_vec, offset_const,
- sub_const_vec, vec_round_bits);
- res2 = convolve8_vert_8x4_s32(s2, s3, s4, s5, s6, s7, s8, s9,
- y_filter, round_shift_vec, offset_const,
- sub_const_vec, vec_round_bits);
- res3 = convolve8_vert_8x4_s32(s3, s4, s5, s6, s7, s8, s9, s10,
- y_filter, round_shift_vec, offset_const,
- sub_const_vec, vec_round_bits);
-
- if (h != 2) {
- vst1_u8(d_u8, res0);
- d_u8 += dst_stride;
- vst1_u8(d_u8, res1);
- d_u8 += dst_stride;
- vst1_u8(d_u8, res2);
- d_u8 += dst_stride;
- vst1_u8(d_u8, res3);
- d_u8 += dst_stride;
- } else {
- vst1_u8(d_u8, res0);
- d_u8 += dst_stride;
- vst1_u8(d_u8, res1);
- d_u8 += dst_stride;
- }
- s0 = s4;
- s1 = s5;
- s2 = s6;
- s3 = s7;
- s4 = s8;
- s5 = s9;
- s6 = s10;
- height -= 4;
-#else
- s7 = vld1q_s16(v_s);
- v_s += im_stride;
-
- __builtin_prefetch(d_u8 + 0 * dst_stride);
-
- res0 = convolve8_vert_8x4_s32(s0, s1, s2, s3, s4, s5, s6, s7,
- y_filter, round_shift_vec, offset_const,
- sub_const_vec, vec_round_bits);
-
- vst1_u8(d_u8, res0);
- d_u8 += dst_stride;
-
- s0 = s1;
- s1 = s2;
- s2 = s3;
- s3 = s4;
- s4 = s5;
- s5 = s6;
- s6 = s7;
- height -= 1;
-#endif
- } while (height > 0);
- v_src_ptr += 8;
- dst_u8_ptr += 8;
- w -= 8;
- } while (w > 0);
- }
- }
-}
-void av1_convolve_2d_copy_sr_neon(const uint8_t *src, int src_stride,
- uint8_t *dst, int dst_stride, int w, int h,
- const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y,
- const int subpel_x_q4, const int subpel_y_q4,
- ConvolveParams *conv_params) {
- (void)filter_params_x;
- (void)filter_params_y;
- (void)subpel_x_q4;
- (void)subpel_y_q4;
- (void)conv_params;
-
- const uint8_t *src1;
- uint8_t *dst1;
- int y;
-
- if (!(w & 0x0F)) {
- for (y = 0; y < h; ++y) {
- src1 = src;
- dst1 = dst;
- for (int x = 0; x < (w >> 4); ++x) {
- vst1q_u8(dst1, vld1q_u8(src1));
- src1 += 16;
- dst1 += 16;
- }
- src += src_stride;
- dst += dst_stride;
- }
- } else if (!(w & 0x07)) {
- for (y = 0; y < h; ++y) {
- vst1_u8(dst, vld1_u8(src));
- src += src_stride;
- dst += dst_stride;
- }
- } else if (!(w & 0x03)) {
- for (y = 0; y < h; ++y) {
- vst1_lane_u32((uint32_t *)(dst), vreinterpret_u32_u8(vld1_u8(src)), 0);
- src += src_stride;
- dst += dst_stride;
- }
- } else if (!(w & 0x01)) {
- for (y = 0; y < h; ++y) {
- vst1_lane_u16((uint16_t *)(dst), vreinterpret_u16_u8(vld1_u8(src)), 0);
- src += src_stride;
- dst += dst_stride;
- }
- }
-}
diff --git a/third_party/aom/av1/common/arm/convolve_neon.h b/third_party/aom/av1/common/arm/convolve_neon.h
deleted file mode 100644
index f382984f2..000000000
--- a/third_party/aom/av1/common/arm/convolve_neon.h
+++ /dev/null
@@ -1,228 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef AOM_AV1_COMMON_ARM_CONVOLVE_NEON_H_
-#define AOM_AV1_COMMON_ARM_CONVOLVE_NEON_H_
-
-#include <arm_neon.h>
-
-#define HORIZ_EXTRA_ROWS ((SUBPEL_TAPS + 7) & ~0x07)
-
-static INLINE uint8x8_t wiener_convolve8_vert_4x8(
- const int16x8_t s0, const int16x8_t s1, const int16x8_t s2,
- const int16x8_t s3, const int16x8_t s4, const int16x8_t s5,
- const int16x8_t s6, int16_t *filter_y, const int bd,
- const int round1_bits) {
- int16x8_t ss0, ss1, ss2;
- int32x4_t sum0, sum1;
- uint16x4_t tmp0, tmp1;
- uint16x8_t tmp;
- uint8x8_t res;
-
- const int32_t round_const = (1 << (bd + round1_bits - 1));
- const int32x4_t round_bits = vdupq_n_s32(-round1_bits);
- const int32x4_t zero = vdupq_n_s32(0);
- const int32x4_t round_vec = vdupq_n_s32(round_const);
-
- ss0 = vaddq_s16(s0, s6);
- ss1 = vaddq_s16(s1, s5);
- ss2 = vaddq_s16(s2, s4);
-
- sum0 = vmull_n_s16(vget_low_s16(ss0), filter_y[0]);
- sum0 = vmlal_n_s16(sum0, vget_low_s16(ss1), filter_y[1]);
- sum0 = vmlal_n_s16(sum0, vget_low_s16(ss2), filter_y[2]);
- sum0 = vmlal_n_s16(sum0, vget_low_s16(s3), filter_y[3]);
-
- sum1 = vmull_n_s16(vget_high_s16(ss0), filter_y[0]);
- sum1 = vmlal_n_s16(sum1, vget_high_s16(ss1), filter_y[1]);
- sum1 = vmlal_n_s16(sum1, vget_high_s16(ss2), filter_y[2]);
- sum1 = vmlal_n_s16(sum1, vget_high_s16(s3), filter_y[3]);
-
- sum0 = vsubq_s32(sum0, round_vec);
- sum1 = vsubq_s32(sum1, round_vec);
-
- /* right shift & rounding */
- sum0 = vrshlq_s32(sum0, round_bits);
- sum1 = vrshlq_s32(sum1, round_bits);
-
- sum0 = vmaxq_s32(sum0, zero);
- sum1 = vmaxq_s32(sum1, zero);
-
- /* from int32x4_t to uint8x8_t */
- tmp0 = vqmovn_u32(vreinterpretq_u32_s32(sum0));
- tmp1 = vqmovn_u32(vreinterpretq_u32_s32(sum1));
- tmp = vcombine_u16(tmp0, tmp1);
- res = vqmovn_u16(tmp);
-
- return res;
-}
-
-static INLINE uint16x8_t wiener_convolve8_horiz_8x8(
- const int16x8_t s0, const int16x8_t s1, const int16x8_t s2,
- const int16x8_t s3, int16_t *filter_x, const int bd,
- const int round0_bits) {
- int16x8_t sum;
- uint16x8_t res;
- int32x4_t sum_0, sum_1;
- int32x4_t s3_0, s3_1;
- const int32_t round_const_0 = (1 << (bd + FILTER_BITS - 1));
- const int32_t round_const_1 = (1 << ((bd) + 1 + FILTER_BITS - round0_bits));
-
- /* for the purpose of right shift by { conv_params->round_0 } */
- const int32x4_t round_bits = vdupq_n_s32(-round0_bits);
-
- const int32x4_t round_vec_0 = vdupq_n_s32(round_const_0);
- const int32x4_t round_vec_1 = vdupq_n_s32(round_const_1);
-
- sum = vmulq_n_s16(s0, filter_x[0]);
- sum = vmlaq_n_s16(sum, s1, filter_x[1]);
- sum = vmlaq_n_s16(sum, s2, filter_x[2]);
-
- /* sum from 16x8 to 2 32x4 registers */
- sum_0 = vmovl_s16(vget_low_s16(sum));
- sum_1 = vmovl_s16(vget_high_s16(sum));
-
- /* s[3]*128 -- and filter coef max can be 128
- * then max value possible = 128*128*255 exceeding 16 bit
- */
-
- s3_0 = vmull_n_s16(vget_low_s16(s3), filter_x[3]);
- s3_1 = vmull_n_s16(vget_high_s16(s3), filter_x[3]);
- sum_0 = vaddq_s32(sum_0, s3_0);
- sum_1 = vaddq_s32(sum_1, s3_1);
-
- /* Add the constant value */
- sum_0 = vaddq_s32(sum_0, round_vec_0);
- sum_1 = vaddq_s32(sum_1, round_vec_0);
-
- /* right shift & rounding & saturating */
- sum_0 = vqrshlq_s32(sum_0, round_bits);
- sum_1 = vqrshlq_s32(sum_1, round_bits);
-
- /* Clipping to max value */
- sum_0 = vminq_s32(sum_0, round_vec_1);
- sum_1 = vminq_s32(sum_1, round_vec_1);
-
- res = vcombine_u16(vqmovun_s32(sum_0), vqmovun_s32(sum_1));
- return res;
-}
-
-static INLINE uint16x4_t wiener_convolve8_horiz_4x8(
- const int16x4_t s0, const int16x4_t s1, const int16x4_t s2,
- const int16x4_t s3, const int16x4_t s4, const int16x4_t s5,
- const int16x4_t s6, int16_t *filter_x, const int bd,
- const int round0_bits) {
- uint16x4_t res;
- int32x4_t sum_0, s3_0;
- int16x4_t sum, temp0, temp1, temp2;
-
- const int32_t round_const_0 = (1 << (bd + FILTER_BITS - 1));
- const int32_t round_const_1 = (1 << ((bd) + 1 + FILTER_BITS - round0_bits));
- const int32x4_t round_bits = vdupq_n_s32(-round0_bits);
- const int32x4_t zero = vdupq_n_s32(0);
- const int32x4_t round_vec_0 = vdupq_n_s32(round_const_0);
- const int32x4_t round_vec_1 = vdupq_n_s32(round_const_1);
-
- temp0 = vadd_s16(s0, s6);
- temp1 = vadd_s16(s1, s5);
- temp2 = vadd_s16(s2, s4);
-
- sum = vmul_n_s16(temp0, filter_x[0]);
- sum = vmla_n_s16(sum, temp1, filter_x[1]);
- sum = vmla_n_s16(sum, temp2, filter_x[2]);
- sum_0 = vmovl_s16(sum);
-
- /* s[3]*128 -- and filter coff max can be 128.
- * then max value possible = 128*128*255 Therefore, 32 bits are required to
- * hold the result.
- */
- s3_0 = vmull_n_s16(s3, filter_x[3]);
- sum_0 = vaddq_s32(sum_0, s3_0);
-
- sum_0 = vaddq_s32(sum_0, round_vec_0);
- sum_0 = vrshlq_s32(sum_0, round_bits);
-
- sum_0 = vmaxq_s32(sum_0, zero);
- sum_0 = vminq_s32(sum_0, round_vec_1);
- res = vqmovun_s32(sum_0);
- return res;
-}
-
-static INLINE int16x8_t
-convolve8_8x8_s16(const int16x8_t s0, const int16x8_t s1, const int16x8_t s2,
- const int16x8_t s3, const int16x8_t s4, const int16x8_t s5,
- const int16x8_t s6, const int16x8_t s7, const int16_t *filter,
- const int16x8_t horiz_const, const int16x8_t shift_round_0) {
- int16x8_t sum;
- int16x8_t res;
-
- sum = horiz_const;
- sum = vmlaq_n_s16(sum, s0, filter[0]);
- sum = vmlaq_n_s16(sum, s1, filter[1]);
- sum = vmlaq_n_s16(sum, s2, filter[2]);
- sum = vmlaq_n_s16(sum, s3, filter[3]);
- sum = vmlaq_n_s16(sum, s4, filter[4]);
- sum = vmlaq_n_s16(sum, s5, filter[5]);
- sum = vmlaq_n_s16(sum, s6, filter[6]);
- sum = vmlaq_n_s16(sum, s7, filter[7]);
-
- res = vqrshlq_s16(sum, shift_round_0);
-
- return res;
-}
-
-static INLINE int16x4_t
-convolve8_4x4_s16(const int16x4_t s0, const int16x4_t s1, const int16x4_t s2,
- const int16x4_t s3, const int16x4_t s4, const int16x4_t s5,
- const int16x4_t s6, const int16x4_t s7, const int16_t *filter,
- const int16x4_t horiz_const, const int16x4_t shift_round_0) {
- int16x4_t sum;
- sum = horiz_const;
- sum = vmla_n_s16(sum, s0, filter[0]);
- sum = vmla_n_s16(sum, s1, filter[1]);
- sum = vmla_n_s16(sum, s2, filter[2]);
- sum = vmla_n_s16(sum, s3, filter[3]);
- sum = vmla_n_s16(sum, s4, filter[4]);
- sum = vmla_n_s16(sum, s5, filter[5]);
- sum = vmla_n_s16(sum, s6, filter[6]);
- sum = vmla_n_s16(sum, s7, filter[7]);
-
- sum = vqrshl_s16(sum, shift_round_0);
-
- return sum;
-}
-
-static INLINE uint16x4_t convolve8_4x4_s32(
- const int16x4_t s0, const int16x4_t s1, const int16x4_t s2,
- const int16x4_t s3, const int16x4_t s4, const int16x4_t s5,
- const int16x4_t s6, const int16x4_t s7, const int16_t *y_filter,
- const int32x4_t round_shift_vec, const int32x4_t offset_const) {
- int32x4_t sum0;
- uint16x4_t res;
- const int32x4_t zero = vdupq_n_s32(0);
-
- sum0 = vmull_n_s16(s0, y_filter[0]);
- sum0 = vmlal_n_s16(sum0, s1, y_filter[1]);
- sum0 = vmlal_n_s16(sum0, s2, y_filter[2]);
- sum0 = vmlal_n_s16(sum0, s3, y_filter[3]);
- sum0 = vmlal_n_s16(sum0, s4, y_filter[4]);
- sum0 = vmlal_n_s16(sum0, s5, y_filter[5]);
- sum0 = vmlal_n_s16(sum0, s6, y_filter[6]);
- sum0 = vmlal_n_s16(sum0, s7, y_filter[7]);
-
- sum0 = vaddq_s32(sum0, offset_const);
- sum0 = vqrshlq_s32(sum0, round_shift_vec);
- sum0 = vmaxq_s32(sum0, zero);
- res = vmovn_u32(vreinterpretq_u32_s32(sum0));
-
- return res;
-}
-
-#endif // AOM_AV1_COMMON_ARM_CONVOLVE_NEON_H_
diff --git a/third_party/aom/av1/common/arm/jnt_convolve_neon.c b/third_party/aom/av1/common/arm/jnt_convolve_neon.c
deleted file mode 100644
index e5674ef7c..000000000
--- a/third_party/aom/av1/common/arm/jnt_convolve_neon.c
+++ /dev/null
@@ -1,1740 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <arm_neon.h>
-#include <assert.h>
-
-#include "config/aom_config.h"
-#include "config/av1_rtcd.h"
-
-#include "aom_dsp/txfm_common.h"
-#include "aom_ports/mem.h"
-#include "av1/common/common.h"
-#include "av1/common/arm/convolve_neon.h"
-#include "av1/common/arm/mem_neon.h"
-#include "av1/common/arm/transpose_neon.h"
-
-#if !defined(__aarch64__)
-static INLINE void compute_avg_4x1(uint16x4_t res0, uint16x4_t d0,
- const uint16_t fwd_offset,
- const uint16_t bck_offset,
- const int16x4_t sub_const_vec,
- const int16_t round_bits,
- const int use_jnt_comp_avg, uint8x8_t *t0) {
- int16x4_t tmp0;
- uint16x4_t tmp_u0;
- uint32x4_t sum0;
- int32x4_t dst0;
- int16x8_t tmp4;
-
- if (use_jnt_comp_avg) {
- const int32x4_t round_bits_vec = vdupq_n_s32((int32_t)(-round_bits));
-
- sum0 = vmull_n_u16(res0, fwd_offset);
- sum0 = vmlal_n_u16(sum0, d0, bck_offset);
-
- sum0 = vshrq_n_u32(sum0, DIST_PRECISION_BITS);
-
- dst0 = vsubq_s32(vreinterpretq_s32_u32(sum0), vmovl_s16(sub_const_vec));
-
- dst0 = vqrshlq_s32(dst0, round_bits_vec);
-
- tmp0 = vqmovn_s32(dst0);
- tmp4 = vcombine_s16(tmp0, tmp0);
-
- *t0 = vqmovun_s16(tmp4);
- } else {
- const int16x4_t round_bits_vec = vdup_n_s16(-round_bits);
- tmp_u0 = vhadd_u16(res0, d0);
-
- tmp0 = vsub_s16(vreinterpret_s16_u16(tmp_u0), sub_const_vec);
-
- tmp0 = vqrshl_s16(tmp0, round_bits_vec);
-
- tmp4 = vcombine_s16(tmp0, tmp0);
-
- *t0 = vqmovun_s16(tmp4);
- }
-}
-
-static INLINE void compute_avg_8x1(uint16x8_t res0, uint16x8_t d0,
- const uint16_t fwd_offset,
- const uint16_t bck_offset,
- const int16x4_t sub_const,
- const int16_t round_bits,
- const int use_jnt_comp_avg, uint8x8_t *t0) {
- int16x4_t tmp0, tmp2;
- int16x8_t f0;
- uint32x4_t sum0, sum2;
- int32x4_t dst0, dst2;
-
- uint16x8_t tmp_u0;
-
- if (use_jnt_comp_avg) {
- const int32x4_t sub_const_vec = vmovl_s16(sub_const);
- const int32x4_t round_bits_vec = vdupq_n_s32(-(int32_t)round_bits);
-
- sum0 = vmull_n_u16(vget_low_u16(res0), fwd_offset);
- sum0 = vmlal_n_u16(sum0, vget_low_u16(d0), bck_offset);
- sum0 = vshrq_n_u32(sum0, DIST_PRECISION_BITS);
-
- sum2 = vmull_n_u16(vget_high_u16(res0), fwd_offset);
- sum2 = vmlal_n_u16(sum2, vget_high_u16(d0), bck_offset);
- sum2 = vshrq_n_u32(sum2, DIST_PRECISION_BITS);
-
- dst0 = vsubq_s32(vreinterpretq_s32_u32(sum0), sub_const_vec);
- dst2 = vsubq_s32(vreinterpretq_s32_u32(sum2), sub_const_vec);
-
- dst0 = vqrshlq_s32(dst0, round_bits_vec);
- dst2 = vqrshlq_s32(dst2, round_bits_vec);
-
- tmp0 = vqmovn_s32(dst0);
- tmp2 = vqmovn_s32(dst2);
-
- f0 = vcombine_s16(tmp0, tmp2);
-
- *t0 = vqmovun_s16(f0);
-
- } else {
- const int16x8_t sub_const_vec = vcombine_s16(sub_const, sub_const);
- const int16x8_t round_bits_vec = vdupq_n_s16(-round_bits);
-
- tmp_u0 = vhaddq_u16(res0, d0);
-
- f0 = vsubq_s16(vreinterpretq_s16_u16(tmp_u0), sub_const_vec);
-
- f0 = vqrshlq_s16(f0, round_bits_vec);
-
- *t0 = vqmovun_s16(f0);
- }
-}
-#endif // !defined(__arch64__)
-
-static INLINE void compute_avg_4x4(
- uint16x4_t res0, uint16x4_t res1, uint16x4_t res2, uint16x4_t res3,
- uint16x4_t d0, uint16x4_t d1, uint16x4_t d2, uint16x4_t d3,
- const uint16_t fwd_offset, const uint16_t bck_offset,
- const int16x4_t sub_const_vec, const int16_t round_bits,
- const int use_jnt_comp_avg, uint8x8_t *t0, uint8x8_t *t1) {
- int16x4_t tmp0, tmp1, tmp2, tmp3;
- uint16x4_t tmp_u0, tmp_u1, tmp_u2, tmp_u3;
- uint32x4_t sum0, sum1, sum2, sum3;
-
- int32x4_t dst0, dst1, dst2, dst3;
- int16x8_t tmp4, tmp5;
- const int16x8_t zero = vdupq_n_s16(0);
-
- if (use_jnt_comp_avg) {
- const int32x4_t round_bits_vec = vdupq_n_s32((int32_t)(-round_bits));
- const int32x4_t const_vec = vmovl_s16(sub_const_vec);
-
- sum0 = vmull_n_u16(res0, fwd_offset);
- sum0 = vmlal_n_u16(sum0, d0, bck_offset);
- sum1 = vmull_n_u16(res1, fwd_offset);
- sum1 = vmlal_n_u16(sum1, d1, bck_offset);
- sum2 = vmull_n_u16(res2, fwd_offset);
- sum2 = vmlal_n_u16(sum2, d2, bck_offset);
- sum3 = vmull_n_u16(res3, fwd_offset);
- sum3 = vmlal_n_u16(sum3, d3, bck_offset);
-
- sum0 = vshrq_n_u32(sum0, DIST_PRECISION_BITS);
- sum1 = vshrq_n_u32(sum1, DIST_PRECISION_BITS);
- sum2 = vshrq_n_u32(sum2, DIST_PRECISION_BITS);
- sum3 = vshrq_n_u32(sum3, DIST_PRECISION_BITS);
-
- dst0 = vsubq_s32(vreinterpretq_s32_u32(sum0), const_vec);
- dst1 = vsubq_s32(vreinterpretq_s32_u32(sum1), const_vec);
- dst2 = vsubq_s32(vreinterpretq_s32_u32(sum2), const_vec);
- dst3 = vsubq_s32(vreinterpretq_s32_u32(sum3), const_vec);
-
- dst0 = vqrshlq_s32(dst0, round_bits_vec);
- dst1 = vqrshlq_s32(dst1, round_bits_vec);
- dst2 = vqrshlq_s32(dst2, round_bits_vec);
- dst3 = vqrshlq_s32(dst3, round_bits_vec);
-
- tmp0 = vqmovn_s32(dst0);
- tmp1 = vqmovn_s32(dst1);
- tmp2 = vqmovn_s32(dst2);
- tmp3 = vqmovn_s32(dst3);
- tmp4 = vcombine_s16(tmp0, tmp1);
- tmp5 = vcombine_s16(tmp2, tmp3);
- tmp4 = vmaxq_s16(tmp4, zero);
- tmp5 = vmaxq_s16(tmp5, zero);
-
- *t0 = vqmovn_u16(vreinterpretq_u16_s16(tmp4));
- *t1 = vqmovn_u16(vreinterpretq_u16_s16(tmp5));
- } else {
- const int16x4_t round_bits_vec = vdup_n_s16(-round_bits);
- tmp_u0 = vhadd_u16(res0, d0);
- tmp_u1 = vhadd_u16(res1, d1);
- tmp_u2 = vhadd_u16(res2, d2);
- tmp_u3 = vhadd_u16(res3, d3);
-
- tmp0 = vsub_s16(vreinterpret_s16_u16(tmp_u0), sub_const_vec);
- tmp1 = vsub_s16(vreinterpret_s16_u16(tmp_u1), sub_const_vec);
- tmp2 = vsub_s16(vreinterpret_s16_u16(tmp_u2), sub_const_vec);
- tmp3 = vsub_s16(vreinterpret_s16_u16(tmp_u3), sub_const_vec);
-
- tmp0 = vqrshl_s16(tmp0, round_bits_vec);
- tmp1 = vqrshl_s16(tmp1, round_bits_vec);
- tmp2 = vqrshl_s16(tmp2, round_bits_vec);
- tmp3 = vqrshl_s16(tmp3, round_bits_vec);
-
- tmp4 = vcombine_s16(tmp0, tmp1);
- tmp5 = vcombine_s16(tmp2, tmp3);
- tmp4 = vmaxq_s16(tmp4, zero);
- tmp5 = vmaxq_s16(tmp5, zero);
-
- *t0 = vqmovn_u16(vreinterpretq_u16_s16(tmp4));
- *t1 = vqmovn_u16(vreinterpretq_u16_s16(tmp5));
- }
-}
-
-static INLINE void compute_avg_8x4(
- uint16x8_t res0, uint16x8_t res1, uint16x8_t res2, uint16x8_t res3,
- uint16x8_t d0, uint16x8_t d1, uint16x8_t d2, uint16x8_t d3,
- const uint16_t fwd_offset, const uint16_t bck_offset,
- const int16x4_t sub_const, const int16_t round_bits,
- const int use_jnt_comp_avg, uint8x8_t *t0, uint8x8_t *t1, uint8x8_t *t2,
- uint8x8_t *t3) {
- int16x4_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
- int16x8_t f0, f1, f2, f3;
- uint32x4_t sum0, sum1, sum2, sum3;
- uint32x4_t sum4, sum5, sum6, sum7;
- int32x4_t dst0, dst1, dst2, dst3;
- int32x4_t dst4, dst5, dst6, dst7;
- uint16x8_t tmp_u0, tmp_u1, tmp_u2, tmp_u3;
- const int16x8_t zero = vdupq_n_s16(0);
-
- if (use_jnt_comp_avg) {
- const int32x4_t sub_const_vec = vmovl_s16(sub_const);
- const int32x4_t round_bits_vec = vdupq_n_s32(-(int32_t)round_bits);
-
- sum0 = vmull_n_u16(vget_low_u16(res0), fwd_offset);
- sum0 = vmlal_n_u16(sum0, vget_low_u16(d0), bck_offset);
- sum1 = vmull_n_u16(vget_low_u16(res1), fwd_offset);
- sum1 = vmlal_n_u16(sum1, vget_low_u16(d1), bck_offset);
- sum0 = vshrq_n_u32(sum0, DIST_PRECISION_BITS);
- sum1 = vshrq_n_u32(sum1, DIST_PRECISION_BITS);
-
- sum2 = vmull_n_u16(vget_high_u16(res0), fwd_offset);
- sum2 = vmlal_n_u16(sum2, vget_high_u16(d0), bck_offset);
- sum3 = vmull_n_u16(vget_high_u16(res1), fwd_offset);
- sum3 = vmlal_n_u16(sum3, vget_high_u16(d1), bck_offset);
- sum2 = vshrq_n_u32(sum2, DIST_PRECISION_BITS);
- sum3 = vshrq_n_u32(sum3, DIST_PRECISION_BITS);
-
- sum4 = vmull_n_u16(vget_low_u16(res2), fwd_offset);
- sum4 = vmlal_n_u16(sum4, vget_low_u16(d2), bck_offset);
- sum5 = vmull_n_u16(vget_low_u16(res3), fwd_offset);
- sum5 = vmlal_n_u16(sum5, vget_low_u16(d3), bck_offset);
- sum4 = vshrq_n_u32(sum4, DIST_PRECISION_BITS);
- sum5 = vshrq_n_u32(sum5, DIST_PRECISION_BITS);
-
- sum6 = vmull_n_u16(vget_high_u16(res2), fwd_offset);
- sum6 = vmlal_n_u16(sum6, vget_high_u16(d2), bck_offset);
- sum7 = vmull_n_u16(vget_high_u16(res3), fwd_offset);
- sum7 = vmlal_n_u16(sum7, vget_high_u16(d3), bck_offset);
- sum6 = vshrq_n_u32(sum6, DIST_PRECISION_BITS);
- sum7 = vshrq_n_u32(sum7, DIST_PRECISION_BITS);
-
- dst0 = vsubq_s32(vreinterpretq_s32_u32(sum0), sub_const_vec);
- dst1 = vsubq_s32(vreinterpretq_s32_u32(sum1), sub_const_vec);
- dst2 = vsubq_s32(vreinterpretq_s32_u32(sum2), sub_const_vec);
- dst3 = vsubq_s32(vreinterpretq_s32_u32(sum3), sub_const_vec);
- dst4 = vsubq_s32(vreinterpretq_s32_u32(sum4), sub_const_vec);
- dst5 = vsubq_s32(vreinterpretq_s32_u32(sum5), sub_const_vec);
- dst6 = vsubq_s32(vreinterpretq_s32_u32(sum6), sub_const_vec);
- dst7 = vsubq_s32(vreinterpretq_s32_u32(sum7), sub_const_vec);
-
- dst0 = vqrshlq_s32(dst0, round_bits_vec);
- dst1 = vqrshlq_s32(dst1, round_bits_vec);
- dst2 = vqrshlq_s32(dst2, round_bits_vec);
- dst3 = vqrshlq_s32(dst3, round_bits_vec);
- dst4 = vqrshlq_s32(dst4, round_bits_vec);
- dst5 = vqrshlq_s32(dst5, round_bits_vec);
- dst6 = vqrshlq_s32(dst6, round_bits_vec);
- dst7 = vqrshlq_s32(dst7, round_bits_vec);
-
- tmp0 = vqmovn_s32(dst0);
- tmp1 = vqmovn_s32(dst1);
- tmp2 = vqmovn_s32(dst2);
- tmp3 = vqmovn_s32(dst3);
- tmp4 = vqmovn_s32(dst4);
- tmp5 = vqmovn_s32(dst5);
- tmp6 = vqmovn_s32(dst6);
- tmp7 = vqmovn_s32(dst7);
-
- f0 = vcombine_s16(tmp0, tmp2);
- f1 = vcombine_s16(tmp1, tmp3);
- f2 = vcombine_s16(tmp4, tmp6);
- f3 = vcombine_s16(tmp5, tmp7);
-
- f0 = vmaxq_s16(f0, zero);
- f1 = vmaxq_s16(f1, zero);
- f2 = vmaxq_s16(f2, zero);
- f3 = vmaxq_s16(f3, zero);
-
- *t0 = vqmovn_u16(vreinterpretq_u16_s16(f0));
- *t1 = vqmovn_u16(vreinterpretq_u16_s16(f1));
- *t2 = vqmovn_u16(vreinterpretq_u16_s16(f2));
- *t3 = vqmovn_u16(vreinterpretq_u16_s16(f3));
-
- } else {
- const int16x8_t sub_const_vec = vcombine_s16(sub_const, sub_const);
- const int16x8_t round_bits_vec = vdupq_n_s16(-round_bits);
-
- tmp_u0 = vhaddq_u16(res0, d0);
- tmp_u1 = vhaddq_u16(res1, d1);
- tmp_u2 = vhaddq_u16(res2, d2);
- tmp_u3 = vhaddq_u16(res3, d3);
-
- f0 = vsubq_s16(vreinterpretq_s16_u16(tmp_u0), sub_const_vec);
- f1 = vsubq_s16(vreinterpretq_s16_u16(tmp_u1), sub_const_vec);
- f2 = vsubq_s16(vreinterpretq_s16_u16(tmp_u2), sub_const_vec);
- f3 = vsubq_s16(vreinterpretq_s16_u16(tmp_u3), sub_const_vec);
-
- f0 = vqrshlq_s16(f0, round_bits_vec);
- f1 = vqrshlq_s16(f1, round_bits_vec);
- f2 = vqrshlq_s16(f2, round_bits_vec);
- f3 = vqrshlq_s16(f3, round_bits_vec);
-
- f0 = vmaxq_s16(f0, zero);
- f1 = vmaxq_s16(f1, zero);
- f2 = vmaxq_s16(f2, zero);
- f3 = vmaxq_s16(f3, zero);
-
- *t0 = vqmovn_u16(vreinterpretq_u16_s16(f0));
- *t1 = vqmovn_u16(vreinterpretq_u16_s16(f1));
- *t2 = vqmovn_u16(vreinterpretq_u16_s16(f2));
- *t3 = vqmovn_u16(vreinterpretq_u16_s16(f3));
- }
-}
-
-static INLINE void jnt_convolve_2d_horiz_neon(
- const uint8_t *src, int src_stride, int16_t *im_block, const int im_stride,
- int16_t *x_filter_tmp, const int im_h, int w, const int round_0) {
- const int bd = 8;
- const uint8_t *s;
- int16_t *dst_ptr;
- int dst_stride;
- int width, height;
-
- dst_ptr = im_block;
- dst_stride = im_stride;
- height = im_h;
- width = w;
-
- if (w == 4) {
- int16x4_t s0, s1, s2, s3, s4, s5, s6, s7, d0;
- int16x8_t tt0;
- uint8x8_t t0;
-
- const int16x4_t horiz_const = vdup_n_s16((1 << (bd + FILTER_BITS - 2)));
- const int16x4_t shift_round_0 = vdup_n_s16(-(round_0));
-
-#if defined(__aarch64__)
- int16x4_t s8, s9, s10, d1, d2, d3;
- int16x8_t tt1, tt2, tt3;
- uint8x8_t t1, t2, t3;
-#endif
- do {
- s = src;
- __builtin_prefetch(s + 0 * src_stride);
-#if defined(__aarch64__)
- __builtin_prefetch(s + 1 * src_stride);
- __builtin_prefetch(s + 2 * src_stride);
- __builtin_prefetch(s + 3 * src_stride);
-
- load_u8_8x4(s, src_stride, &t0, &t1, &t2, &t3);
- transpose_u8_8x4(&t0, &t1, &t2, &t3);
- tt0 = vreinterpretq_s16_u16(vmovl_u8(t0));
- tt1 = vreinterpretq_s16_u16(vmovl_u8(t1));
- tt2 = vreinterpretq_s16_u16(vmovl_u8(t2));
- tt3 = vreinterpretq_s16_u16(vmovl_u8(t3));
- s0 = vget_low_s16(tt0);
- s1 = vget_low_s16(tt1);
- s2 = vget_low_s16(tt2);
- s3 = vget_low_s16(tt3);
- s4 = vget_high_s16(tt0);
- s5 = vget_high_s16(tt1);
- s6 = vget_high_s16(tt2);
- __builtin_prefetch(dst_ptr + 0 * dst_stride);
- __builtin_prefetch(dst_ptr + 1 * dst_stride);
- __builtin_prefetch(dst_ptr + 2 * dst_stride);
- __builtin_prefetch(dst_ptr + 3 * dst_stride);
- s += 7;
-
- load_u8_8x4(s, src_stride, &t0, &t1, &t2, &t3);
- transpose_u8_8x4(&t0, &t1, &t2, &t3);
- tt0 = vreinterpretq_s16_u16(vmovl_u8(t0));
- tt1 = vreinterpretq_s16_u16(vmovl_u8(t1));
- tt2 = vreinterpretq_s16_u16(vmovl_u8(t2));
- tt3 = vreinterpretq_s16_u16(vmovl_u8(t3));
- s7 = vget_low_s16(tt0);
- s8 = vget_low_s16(tt1);
- s9 = vget_low_s16(tt2);
- s10 = vget_low_s16(tt3);
-
- d0 = convolve8_4x4_s16(s0, s1, s2, s3, s4, s5, s6, s7, x_filter_tmp,
- horiz_const, shift_round_0);
- d1 = convolve8_4x4_s16(s1, s2, s3, s4, s5, s6, s7, s8, x_filter_tmp,
- horiz_const, shift_round_0);
- d2 = convolve8_4x4_s16(s2, s3, s4, s5, s6, s7, s8, s9, x_filter_tmp,
- horiz_const, shift_round_0);
- d3 = convolve8_4x4_s16(s3, s4, s5, s6, s7, s8, s9, s10, x_filter_tmp,
- horiz_const, shift_round_0);
-
- transpose_s16_4x4d(&d0, &d1, &d2, &d3);
-
- vst1_s16((dst_ptr + 0 * dst_stride), d0);
- vst1_s16((dst_ptr + 1 * dst_stride), d1);
- vst1_s16((dst_ptr + 2 * dst_stride), d2);
- vst1_s16((dst_ptr + 3 * dst_stride), d3);
-
- src += 4 * src_stride;
- dst_ptr += 4 * dst_stride;
- height -= 4;
-#else
- t0 = vld1_u8(s); // a0 a1 a2 a3 a4 a5 a6 a7
- tt0 = vreinterpretq_s16_u16(vmovl_u8(t0)); // a0 a1 a2 a3 a4 a5 a6 a7
- s0 = vget_low_s16(tt0); // a0 a1 a2 a3
- s4 = vget_high_s16(tt0); // a4 a5 a6 a7
- __builtin_prefetch(dst_ptr);
- s += 8;
- t0 = vld1_u8(s); // a8 a9 a10 a11
-
- // a8 a9 a10 a11
- s7 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t0)));
-
- s1 = vext_s16(s0, s4, 1); // a1 a2 a3 a4
- s2 = vext_s16(s0, s4, 2); // a2 a3 a4 a5
- s3 = vext_s16(s0, s4, 3); // a3 a4 a5 a6
- s5 = vext_s16(s4, s7, 1); // a5 a6 a7 a8
- s6 = vext_s16(s4, s7, 2); // a6 a7 a8 a9
- s7 = vext_s16(s4, s7, 3); // a7 a8 a9 a10
-
- d0 = convolve8_4x4_s16(s0, s1, s2, s3, s4, s5, s6, s7, x_filter_tmp,
- horiz_const, shift_round_0);
-
- vst1_s16(dst_ptr, d0);
-
- src += src_stride;
- dst_ptr += dst_stride;
- height -= 1;
-#endif
- } while (height > 0);
- } else {
- int16_t *d_tmp;
- int16x8_t s0, s1, s2, s3, s4, s5, s6, s7;
- int16x8_t res0;
- uint8x8_t t0;
-
- const int16x8_t horiz_const = vdupq_n_s16((1 << (bd + FILTER_BITS - 2)));
- const int16x8_t shift_round_0 = vdupq_n_s16(-(round_0));
- do {
-#if defined(__aarch64__)
- uint8x8_t t1, t2, t3, t4, t5, t6, t7;
- int16x8_t s8, s9, s10, s11, s12, s13, s14;
- int16x8_t res1, res2, res3, res4, res5, res6, res7;
- __builtin_prefetch(src + 0 * src_stride);
- __builtin_prefetch(src + 1 * src_stride);
- __builtin_prefetch(src + 2 * src_stride);
- __builtin_prefetch(src + 3 * src_stride);
- __builtin_prefetch(src + 4 * src_stride);
- __builtin_prefetch(src + 5 * src_stride);
- __builtin_prefetch(src + 6 * src_stride);
- __builtin_prefetch(src + 7 * src_stride);
- load_u8_8x8(src, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
- transpose_u8_8x8(&t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
- s0 = vreinterpretq_s16_u16(vmovl_u8(t0));
- s1 = vreinterpretq_s16_u16(vmovl_u8(t1));
- s2 = vreinterpretq_s16_u16(vmovl_u8(t2));
- s3 = vreinterpretq_s16_u16(vmovl_u8(t3));
- s4 = vreinterpretq_s16_u16(vmovl_u8(t4));
- s5 = vreinterpretq_s16_u16(vmovl_u8(t5));
- s6 = vreinterpretq_s16_u16(vmovl_u8(t6));
-
- width = w;
- s = src + 7;
- d_tmp = dst_ptr;
- __builtin_prefetch(dst_ptr + 0 * dst_stride);
- __builtin_prefetch(dst_ptr + 1 * dst_stride);
- __builtin_prefetch(dst_ptr + 2 * dst_stride);
- __builtin_prefetch(dst_ptr + 3 * dst_stride);
- __builtin_prefetch(dst_ptr + 4 * dst_stride);
- __builtin_prefetch(dst_ptr + 5 * dst_stride);
- __builtin_prefetch(dst_ptr + 6 * dst_stride);
- __builtin_prefetch(dst_ptr + 7 * dst_stride);
-
- do {
- load_u8_8x8(s, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
- transpose_u8_8x8(&t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
- s7 = vreinterpretq_s16_u16(vmovl_u8(t0));
- s8 = vreinterpretq_s16_u16(vmovl_u8(t1));
- s9 = vreinterpretq_s16_u16(vmovl_u8(t2));
- s10 = vreinterpretq_s16_u16(vmovl_u8(t3));
- s11 = vreinterpretq_s16_u16(vmovl_u8(t4));
- s12 = vreinterpretq_s16_u16(vmovl_u8(t5));
- s13 = vreinterpretq_s16_u16(vmovl_u8(t6));
- s14 = vreinterpretq_s16_u16(vmovl_u8(t7));
-
- res0 = convolve8_8x8_s16(s0, s1, s2, s3, s4, s5, s6, s7, x_filter_tmp,
- horiz_const, shift_round_0);
- res1 = convolve8_8x8_s16(s1, s2, s3, s4, s5, s6, s7, s8, x_filter_tmp,
- horiz_const, shift_round_0);
- res2 = convolve8_8x8_s16(s2, s3, s4, s5, s6, s7, s8, s9, x_filter_tmp,
- horiz_const, shift_round_0);
- res3 = convolve8_8x8_s16(s3, s4, s5, s6, s7, s8, s9, s10, x_filter_tmp,
- horiz_const, shift_round_0);
- res4 = convolve8_8x8_s16(s4, s5, s6, s7, s8, s9, s10, s11, x_filter_tmp,
- horiz_const, shift_round_0);
- res5 = convolve8_8x8_s16(s5, s6, s7, s8, s9, s10, s11, s12,
- x_filter_tmp, horiz_const, shift_round_0);
- res6 = convolve8_8x8_s16(s6, s7, s8, s9, s10, s11, s12, s13,
- x_filter_tmp, horiz_const, shift_round_0);
- res7 = convolve8_8x8_s16(s7, s8, s9, s10, s11, s12, s13, s14,
- x_filter_tmp, horiz_const, shift_round_0);
-
- transpose_s16_8x8(&res0, &res1, &res2, &res3, &res4, &res5, &res6,
- &res7);
-
- store_s16_8x8(d_tmp, dst_stride, res0, res1, res2, res3, res4, res5,
- res6, res7);
- s0 = s8;
- s1 = s9;
- s2 = s10;
- s3 = s11;
- s4 = s12;
- s5 = s13;
- s6 = s14;
- s += 8;
- d_tmp += 8;
- width -= 8;
- } while (width > 0);
- src += 8 * src_stride;
- dst_ptr += 8 * dst_stride;
- height -= 8;
-#else
- int16x8_t temp_0;
- t0 = vld1_u8(src);
- s0 = vreinterpretq_s16_u16(vmovl_u8(t0)); // a0 a1 a2 a3 a4 a5 a6 a7
-
- width = w;
- s = src + 8;
- d_tmp = dst_ptr;
- __builtin_prefetch(dst_ptr);
-
- do {
- t0 = vld1_u8(s); // a8 a9 a10 a11 a12 a13 a14 a15
- s7 = vreinterpretq_s16_u16(vmovl_u8(t0));
- temp_0 = s0;
- s0 = s7;
-
- s1 = vextq_s16(temp_0, s7, 1); // a1 a2 a3 a4 a5 a6 a7 a8
- s2 = vextq_s16(temp_0, s7, 2); // a2 a3 a4 a5 a6 a7 a8 a9
- s3 = vextq_s16(temp_0, s7, 3); // a3 a4 a5 a6 a7 a8 a9 a10
- s4 = vextq_s16(temp_0, s7, 4); // a4 a5 a6 a7 a8 a9 a10 a11
- s5 = vextq_s16(temp_0, s7, 5); // a5 a6 a7 a8 a9 a10 a11 a12
- s6 = vextq_s16(temp_0, s7, 6); // a6 a7 a8 a9 a10 a11 a12 a13
- s7 = vextq_s16(temp_0, s7, 7); // a7 a8 a9 a10 a11 a12 a13 a14
-
- res0 = convolve8_8x8_s16(temp_0, s1, s2, s3, s4, s5, s6, s7,
- x_filter_tmp, horiz_const, shift_round_0);
- vst1q_s16(d_tmp, res0);
-
- s += 8;
- d_tmp += 8;
- width -= 8;
- } while (width > 0);
- src += src_stride;
- dst_ptr += dst_stride;
- height -= 1;
-#endif
- } while (height > 0);
- }
-}
-
-static INLINE void jnt_convolve_2d_vert_neon(
- int16_t *im_block, const int im_stride, uint8_t *dst8, int dst8_stride,
- ConvolveParams *conv_params, const int16_t *y_filter, int h, int w) {
- uint8_t *dst_u8_ptr, *d_u8;
- CONV_BUF_TYPE *dst_ptr, *dst;
- int16_t *src_ptr, *s;
- uint16_t *d;
-
- const int bd = 8;
- int height;
- int dst_stride = conv_params->dst_stride;
- const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
- const int16_t sub_const = (1 << (offset_bits - conv_params->round_1)) +
- (1 << (offset_bits - conv_params->round_1 - 1));
-
- const int16_t round_bits =
- 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
- const int offset = bd + 2 * FILTER_BITS - conv_params->round_0;
- const int32x4_t round_shift_vec = vdupq_n_s32(-(conv_params->round_1));
- const int32x4_t offset_const = vdupq_n_s32(1 << offset);
- const int16x4_t sub_const_vec = vdup_n_s16(sub_const);
- const uint16_t fwd_offset = conv_params->fwd_offset;
- const uint16_t bck_offset = conv_params->bck_offset;
- const int do_average = conv_params->do_average;
- const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg;
-
- int16x4_t s0, s1, s2, s3, s4, s5, s6, s7;
- uint16x4_t res4, d0;
- uint8x8_t t0;
-
-#if defined(__aarch64__)
- int16x4_t s8, s9, s10;
- uint16x4_t res5, res6, res7, d1, d2, d3;
- uint8x8_t t1;
-#endif
-
- dst = conv_params->dst;
- src_ptr = im_block;
- dst_u8_ptr = dst8;
- dst_ptr = dst;
- height = h;
-
- do {
- d = dst_ptr;
- d_u8 = dst_u8_ptr;
- s = src_ptr;
- height = h;
-
- __builtin_prefetch(s + 0 * im_stride);
- __builtin_prefetch(s + 1 * im_stride);
- __builtin_prefetch(s + 2 * im_stride);
- __builtin_prefetch(s + 3 * im_stride);
- __builtin_prefetch(s + 4 * im_stride);
- __builtin_prefetch(s + 5 * im_stride);
- __builtin_prefetch(s + 6 * im_stride);
- __builtin_prefetch(s + 7 * im_stride);
-
- load_s16_4x8(s, im_stride, &s0, &s1, &s2, &s3, &s4, &s5, &s6, &s7);
- s += (7 * im_stride);
-
- do {
-#if defined(__aarch64__)
- load_s16_4x4(s, im_stride, &s7, &s8, &s9, &s10);
- s += (im_stride << 2);
-
- __builtin_prefetch(d + 0 * dst_stride);
- __builtin_prefetch(d + 1 * dst_stride);
- __builtin_prefetch(d + 2 * dst_stride);
- __builtin_prefetch(d + 3 * dst_stride);
-
- __builtin_prefetch(d_u8 + 4 * dst8_stride);
- __builtin_prefetch(d_u8 + 5 * dst8_stride);
- __builtin_prefetch(d_u8 + 6 * dst8_stride);
- __builtin_prefetch(d_u8 + 7 * dst8_stride);
-
- d0 = convolve8_4x4_s32(s0, s1, s2, s3, s4, s5, s6, s7, y_filter,
- round_shift_vec, offset_const);
- d1 = convolve8_4x4_s32(s1, s2, s3, s4, s5, s6, s7, s8, y_filter,
- round_shift_vec, offset_const);
- d2 = convolve8_4x4_s32(s2, s3, s4, s5, s6, s7, s8, s9, y_filter,
- round_shift_vec, offset_const);
- d3 = convolve8_4x4_s32(s3, s4, s5, s6, s7, s8, s9, s10, y_filter,
- round_shift_vec, offset_const);
-
- if (do_average) {
- load_u16_4x4(d, dst_stride, &res4, &res5, &res6, &res7);
- d += (dst_stride << 2);
-
- compute_avg_4x4(res4, res5, res6, res7, d0, d1, d2, d3, fwd_offset,
- bck_offset, sub_const_vec, round_bits, use_jnt_comp_avg,
- &t0, &t1);
-
- vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(t0), 0);
- d_u8 += dst8_stride;
- vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(t0), 1);
- d_u8 += dst8_stride;
- vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(t1), 0);
- d_u8 += dst8_stride;
- vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(t1), 1);
- d_u8 += dst8_stride;
-
- } else {
- store_u16_4x4(d, dst_stride, d0, d1, d2, d3);
- d += (dst_stride << 2);
- }
- s0 = s4;
- s1 = s5;
- s2 = s6;
- s3 = s7;
- s4 = s8;
- s5 = s9;
- s6 = s10;
- height -= 4;
-#else
- s7 = vld1_s16(s);
- s += (im_stride);
-
- __builtin_prefetch(d + 0 * dst_stride);
- __builtin_prefetch(d_u8 + 0 * dst8_stride);
-
- d0 = convolve8_4x4_s32(s0, s1, s2, s3, s4, s5, s6, s7, y_filter,
- round_shift_vec, offset_const);
-
- if (do_average) {
- res4 = vld1_u16(d);
- d += (dst_stride);
-
- compute_avg_4x1(res4, d0, fwd_offset, bck_offset, sub_const_vec,
- round_bits, use_jnt_comp_avg, &t0);
-
- vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(t0), 0);
- d_u8 += dst8_stride;
-
- } else {
- vst1_u16(d, d0);
- d += (dst_stride);
- }
- s0 = s1;
- s1 = s2;
- s2 = s3;
- s3 = s4;
- s4 = s5;
- s5 = s6;
- s6 = s7;
- height--;
-#endif
- } while (height > 0);
- src_ptr += 4;
- dst_ptr += 4;
- dst_u8_ptr += 4;
- w -= 4;
- } while (w > 0);
-}
-
-void av1_jnt_convolve_2d_neon(const uint8_t *src, int src_stride, uint8_t *dst8,
- int dst8_stride, int w, int h,
- const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y,
- const int subpel_x_q4, const int subpel_y_q4,
- ConvolveParams *conv_params) {
- assert(!(w % 4));
- assert(!(h % 4));
-
- DECLARE_ALIGNED(16, int16_t,
- im_block[(MAX_SB_SIZE + HORIZ_EXTRA_ROWS) * MAX_SB_SIZE]);
-
- const int im_h = h + filter_params_y->taps - 1;
- const int im_stride = MAX_SB_SIZE;
- const int vert_offset = filter_params_y->taps / 2 - 1;
- const int horiz_offset = filter_params_x->taps / 2 - 1;
- const int round_0 = conv_params->round_0 - 1;
- const uint8_t *src_ptr = src - vert_offset * src_stride - horiz_offset;
- const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
- filter_params_x, subpel_x_q4 & SUBPEL_MASK);
- const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
- filter_params_y, subpel_y_q4 & SUBPEL_MASK);
-
- int16_t x_filter_tmp[8];
- int16x8_t filter_x_coef = vld1q_s16(x_filter);
-
- // filter coeffs are even, so downshifting by 1 to reduce intermediate
- // precision requirements.
- filter_x_coef = vshrq_n_s16(filter_x_coef, 1);
- vst1q_s16(&x_filter_tmp[0], filter_x_coef);
-
- jnt_convolve_2d_horiz_neon(src_ptr, src_stride, im_block, im_stride,
- x_filter_tmp, im_h, w, round_0);
-
- jnt_convolve_2d_vert_neon(im_block, im_stride, dst8, dst8_stride, conv_params,
- y_filter, h, w);
-}
-
-void av1_jnt_convolve_2d_copy_neon(const uint8_t *src, int src_stride,
- uint8_t *dst8, int dst8_stride, int w, int h,
- const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y,
- const int subpel_x_q4, const int subpel_y_q4,
- ConvolveParams *conv_params) {
- uint8x8_t res0_8, res1_8, res2_8, res3_8, tmp_shift0, tmp_shift1, tmp_shift2,
- tmp_shift3;
- uint16x8_t res_q0, res_q1, res_q2, res_q3, tmp_q0, tmp_q1, tmp_q2, tmp_q3;
- uint16x4_t tmp4, tmp5, tmp6, tmp7, res4, res5, res6, res7;
- const uint8_t *src1, *src2;
- uint8_t *dst8_1;
- CONV_BUF_TYPE *dst = conv_params->dst, *dst_1, *dst_2;
- const int dst_stride = conv_params->dst_stride;
- int x, y;
- const int16_t bits =
- FILTER_BITS * 2 - conv_params->round_1 - conv_params->round_0;
- const int bd = 8;
- const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
- const int round_offset = (1 << (offset_bits - conv_params->round_1)) +
- (1 << (offset_bits - conv_params->round_1 - 1));
- const int16x4_t sub_const_vec = vdup_n_s16((int16_t)round_offset);
- const uint16x8_t dup_round_offset16x8 = vdupq_n_u16((uint16_t)round_offset);
- const int16x4_t dup_bits16x4 = vdup_n_s16(bits);
- const int16x8_t dup_bits16x8 = vdupq_n_s16(bits);
-
- (void)filter_params_x;
- (void)filter_params_y;
- (void)subpel_x_q4;
- (void)subpel_y_q4;
-
- if (!(w & 0x07)) {
- for (y = 0; y < (h >> 2); ++y) {
- src1 = src;
- dst8_1 = dst8;
- dst_1 = dst;
- for (x = 0; x < (w >> 3); ++x) {
- src2 = src1;
- load_u8_8x4(src2, src_stride, &res0_8, &res1_8, &res2_8, &res3_8);
-
- res_q0 = vaddq_u16(vshlq_u16(vmovl_u8(res0_8), dup_bits16x8),
- dup_round_offset16x8);
- res_q1 = vaddq_u16(vshlq_u16(vmovl_u8(res1_8), dup_bits16x8),
- dup_round_offset16x8);
- res_q2 = vaddq_u16(vshlq_u16(vmovl_u8(res2_8), dup_bits16x8),
- dup_round_offset16x8);
- res_q3 = vaddq_u16(vshlq_u16(vmovl_u8(res3_8), dup_bits16x8),
- dup_round_offset16x8);
-
- if (conv_params->do_average) {
- dst_2 = dst_1;
- load_u16_8x4(dst_2, dst_stride, &tmp_q0, &tmp_q1, &tmp_q2, &tmp_q3);
-
- compute_avg_8x4(tmp_q0, tmp_q1, tmp_q2, tmp_q3, res_q0, res_q1,
- res_q2, res_q3, conv_params->fwd_offset,
- conv_params->bck_offset, sub_const_vec, bits,
- conv_params->use_jnt_comp_avg, &tmp_shift0,
- &tmp_shift1, &tmp_shift2, &tmp_shift3);
-
- vst1_u8(dst8_1 + (0 * dst8_stride), tmp_shift0);
- vst1_u8(dst8_1 + (1 * dst8_stride), tmp_shift1);
- vst1_u8(dst8_1 + (2 * dst8_stride), tmp_shift2);
- vst1_u8(dst8_1 + (3 * dst8_stride), tmp_shift3);
-
- } else {
- vst1q_u16(dst_1 + (0 * dst_stride), res_q0);
- vst1q_u16(dst_1 + (1 * dst_stride), res_q1);
- vst1q_u16(dst_1 + (2 * dst_stride), res_q2);
- vst1q_u16(dst_1 + (3 * dst_stride), res_q3);
- }
- src1 = src1 + 8;
- dst_1 = dst_1 + 8;
- dst8_1 = dst8_1 + 8;
- }
- src += src_stride * 4;
- dst8 += dst8_stride * 4;
- dst += dst_stride * 4;
- }
- } else if (!(w & 0x03)) {
- for (y = 0; y < (h >> 2); ++y) {
- src1 = src;
- dst8_1 = dst8;
- dst_1 = dst;
-
- load_u8_8x4(src1, src_stride, &res0_8, &res1_8, &res2_8, &res3_8);
-
- res4 = vadd_u16(vshl_u16(vget_low_u16(vmovl_u8(res0_8)), dup_bits16x4),
- vreinterpret_u16_s16(sub_const_vec));
- res5 = vadd_u16(vshl_u16(vget_low_u16(vmovl_u8(res1_8)), dup_bits16x4),
- vreinterpret_u16_s16(sub_const_vec));
- res6 = vadd_u16(vshl_u16(vget_low_u16(vmovl_u8(res2_8)), dup_bits16x4),
- vreinterpret_u16_s16(sub_const_vec));
- res7 = vadd_u16(vshl_u16(vget_low_u16(vmovl_u8(res3_8)), dup_bits16x4),
- vreinterpret_u16_s16(sub_const_vec));
- if (conv_params->do_average) {
- load_u16_4x4(dst_1, dst_stride, &tmp4, &tmp5, &tmp6, &tmp7);
-
- compute_avg_4x4(tmp4, tmp5, tmp6, tmp7, res4, res5, res6, res7,
- conv_params->fwd_offset, conv_params->bck_offset,
- sub_const_vec, bits, conv_params->use_jnt_comp_avg,
- &tmp_shift0, &tmp_shift1);
-
- vst1_lane_u32((uint32_t *)(dst8_1), vreinterpret_u32_u8(tmp_shift0), 0);
- dst8_1 += dst8_stride;
- vst1_lane_u32((uint32_t *)(dst8_1), vreinterpret_u32_u8(tmp_shift0), 1);
- dst8_1 += dst8_stride;
- vst1_lane_u32((uint32_t *)(dst8_1), vreinterpret_u32_u8(tmp_shift1), 0);
- dst8_1 += dst8_stride;
- vst1_lane_u32((uint32_t *)(dst8_1), vreinterpret_u32_u8(tmp_shift1), 1);
-
- } else {
- vst1_u16(dst_1, res4);
- dst_1 += dst_stride;
- vst1_u16(dst_1, res5);
- dst_1 += dst_stride;
- vst1_u16(dst_1, res6);
- dst_1 += dst_stride;
- vst1_u16(dst_1, res7);
- }
- src += src_stride * 4;
- dst += dst_stride * 4;
- dst8 += dst8_stride * 4;
- }
- }
-}
-
-void av1_jnt_convolve_x_neon(const uint8_t *src, int src_stride, uint8_t *dst8,
- int dst8_stride, int w, int h,
- const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y,
- const int subpel_x_q4, const int subpel_y_q4,
- ConvolveParams *conv_params) {
- assert(!(w % 4));
- assert(!(h % 4));
-
- CONV_BUF_TYPE *dst = conv_params->dst;
- int dst_stride = conv_params->dst_stride;
- const int horiz_offset = filter_params_x->taps / 2 - 1;
- const int bits = FILTER_BITS - conv_params->round_1;
- const int bd = 8;
- const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
- const int round_offset = (1 << (offset_bits - conv_params->round_1)) +
- (1 << (offset_bits - conv_params->round_1 - 1));
- const int round_bits =
- 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
- const uint16_t fwd_offset = conv_params->fwd_offset;
- const uint16_t bck_offset = conv_params->bck_offset;
- const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg;
-
- (void)filter_params_y;
- (void)subpel_y_q4;
-
- // horizontal filter
- const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
- filter_params_x, subpel_x_q4 & SUBPEL_MASK);
-
- const uint8_t *src_ptr = src - horiz_offset;
-
- int16_t x_filter_tmp[8];
- int16x8_t filter_x_coef = vld1q_s16(x_filter);
-
- // filter coeffs are even, so downshifting by 1 to reduce intermediate
- // precision requirements.
- filter_x_coef = vshrq_n_s16(filter_x_coef, 1);
- vst1q_s16(&x_filter_tmp[0], filter_x_coef);
-
- const uint8_t *s;
- uint8_t *d_u8;
- uint8_t *dst_u8_ptr;
- CONV_BUF_TYPE *d, *dst_ptr;
- int width, height;
- uint8x8_t t0;
-#if defined(__aarch64__)
- uint8x8_t t1, t2, t3, t4, t5, t6, t7;
-#endif
- s = src_ptr;
- dst_ptr = dst;
- dst_u8_ptr = dst8;
- width = w;
- height = h;
-
- if ((w == 4) || (h == 4)) {
- int16x4_t s0, s1, s2, s3, s4, s5, s6, s7, d0;
- int16x8_t tt0;
- uint16x4_t res4;
-#if defined(__aarch64__)
- int16x4_t s8, s9, s10, d1, d2, d3;
- int16x8_t tt1, tt2, tt3;
- uint16x4_t res5, res6, res7;
- uint32x2_t tu0 = vdup_n_u32(0), tu1 = vdup_n_u32(0);
- int16x8_t u0, u1;
-#else
- int16x4_t temp_0;
-#endif
- const int16x4_t zero = vdup_n_s16(0);
- const int16x4_t round_offset_vec = vdup_n_s16(round_offset);
- const int16x4_t shift_round_0 = vdup_n_s16(-conv_params->round_0 + 1);
- const int16x4_t horiz_const = vdup_n_s16(bits);
- do {
- s = src_ptr;
- d = dst_ptr;
- d_u8 = dst_u8_ptr;
- width = w;
- __builtin_prefetch(s + 0 * src_stride);
-#if defined(__aarch64__)
- __builtin_prefetch(s + 1 * src_stride);
- __builtin_prefetch(s + 2 * src_stride);
- __builtin_prefetch(s + 3 * src_stride);
-
- load_u8_8x4(s, src_stride, &t0, &t1, &t2, &t3);
- transpose_u8_8x4(&t0, &t1, &t2, &t3);
- tt0 = vreinterpretq_s16_u16(vmovl_u8(t0));
- tt1 = vreinterpretq_s16_u16(vmovl_u8(t1));
- tt2 = vreinterpretq_s16_u16(vmovl_u8(t2));
- tt3 = vreinterpretq_s16_u16(vmovl_u8(t3));
- s0 = vget_low_s16(tt0);
- s1 = vget_low_s16(tt1);
- s2 = vget_low_s16(tt2);
- s3 = vget_low_s16(tt3);
- s4 = vget_high_s16(tt0);
- s5 = vget_high_s16(tt1);
- s6 = vget_high_s16(tt2);
- __builtin_prefetch(d + 0 * dst_stride);
- __builtin_prefetch(d + 1 * dst_stride);
- __builtin_prefetch(d + 2 * dst_stride);
- __builtin_prefetch(d + 3 * dst_stride);
- s += 7;
- do {
- load_unaligned_u8_4x4(s, src_stride, &tu0, &tu1);
- t0 = vreinterpret_u8_u32(tu0);
- t1 = vreinterpret_u8_u32(tu1);
-
- transpose_u8_4x4(&t0, &t1);
- u0 = vreinterpretq_s16_u16(vmovl_u8(t0));
- u1 = vreinterpretq_s16_u16(vmovl_u8(t1));
-
- s7 = vget_low_s16(u0);
- s8 = vget_low_s16(u1);
- s9 = vget_high_s16(u0);
- s10 = vget_high_s16(u1);
-
- d0 = convolve8_4x4_s16(s0, s1, s2, s3, s4, s5, s6, s7, x_filter_tmp,
- zero, shift_round_0);
- d0 = vrshl_s16(d0, horiz_const);
- d0 = vadd_s16(d0, round_offset_vec);
- d1 = convolve8_4x4_s16(s1, s2, s3, s4, s5, s6, s7, s8, x_filter_tmp,
- zero, shift_round_0);
- d1 = vrshl_s16(d1, horiz_const);
- d1 = vadd_s16(d1, round_offset_vec);
- d2 = convolve8_4x4_s16(s2, s3, s4, s5, s6, s7, s8, s9, x_filter_tmp,
- zero, shift_round_0);
- d2 = vrshl_s16(d2, horiz_const);
- d2 = vadd_s16(d2, round_offset_vec);
- d3 = convolve8_4x4_s16(s3, s4, s5, s6, s7, s8, s9, s10, x_filter_tmp,
- zero, shift_round_0);
- d3 = vrshl_s16(d3, horiz_const);
- d3 = vadd_s16(d3, round_offset_vec);
-
- transpose_s16_4x4d(&d0, &d1, &d2, &d3);
-
- if (conv_params->do_average) {
- __builtin_prefetch(d + 0 * dst_stride);
- __builtin_prefetch(d + 1 * dst_stride);
- __builtin_prefetch(d + 2 * dst_stride);
- __builtin_prefetch(d + 3 * dst_stride);
-
- __builtin_prefetch(d_u8 + 0 * dst8_stride);
- __builtin_prefetch(d_u8 + 1 * dst8_stride);
- __builtin_prefetch(d_u8 + 2 * dst8_stride);
- __builtin_prefetch(d_u8 + 3 * dst8_stride);
-
- load_u16_4x4(d, dst_stride, &res4, &res5, &res6, &res7);
-
- compute_avg_4x4(res4, res5, res6, res7, vreinterpret_u16_s16(d0),
- vreinterpret_u16_s16(d1), vreinterpret_u16_s16(d2),
- vreinterpret_u16_s16(d3), fwd_offset, bck_offset,
- round_offset_vec, round_bits, use_jnt_comp_avg, &t0,
- &t1);
-
- vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(t0),
- 0); // 00 01 02 03
- vst1_lane_u32((uint32_t *)(d_u8 + dst8_stride),
- vreinterpret_u32_u8(t0),
- 1); // 10 11 12 13
- vst1_lane_u32((uint32_t *)(d_u8 + 2 * dst8_stride),
- vreinterpret_u32_u8(t1),
- 0); // 20 21 22 23
- vst1_lane_u32((uint32_t *)(d_u8 + 3 * dst8_stride),
- vreinterpret_u32_u8(t1),
- 1); // 30 31 32 33
- } else {
- store_u16_4x4(d, dst_stride, vreinterpret_u16_s16(d0),
- vreinterpret_u16_s16(d1), vreinterpret_u16_s16(d2),
- vreinterpret_u16_s16(d3));
- }
-
- s0 = s4;
- s1 = s5;
- s2 = s6;
- s3 = s7;
- s4 = s8;
- s5 = s9;
- s6 = s10;
-
- s += 4;
- width -= 4;
- d += 4;
- d_u8 += 4;
- } while (width > 0);
- src_ptr += (src_stride << 2);
- dst_ptr += (dst_stride << 2);
- dst_u8_ptr += (dst8_stride << 2);
- height -= 4;
-#else
- t0 = vld1_u8(s); // a0 a1 a2 a3 a4 a5 a6 a7
- tt0 = vreinterpretq_s16_u16(vmovl_u8(t0)); // a0 a1 a2 a3 a4 a5 a6 a7
- s0 = vget_low_s16(tt0); // a0 a1 a2 a3
- s4 = vget_high_s16(tt0); // a4 a5 a6 a7
- __builtin_prefetch(d);
-
- s += 8;
- do {
- t0 = vld1_u8(s); // a8 a9 a10 a11
-
- // a8 a9 a10 a11
- s7 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t0)));
- temp_0 = s7;
- s1 = vext_s16(s0, s4, 1); // a1 a2 a3 a4
- s2 = vext_s16(s0, s4, 2); // a2 a3 a4 a5
- s3 = vext_s16(s0, s4, 3); // a3 a4 a5 a6
- s5 = vext_s16(s4, s7, 1); // a5 a6 a7 a8
- s6 = vext_s16(s4, s7, 2); // a6 a7 a8 a9
- s7 = vext_s16(s4, s7, 3); // a7 a8 a9 a10
-
- d0 = convolve8_4x4_s16(s0, s1, s2, s3, s4, s5, s6, s7, x_filter_tmp,
- zero, shift_round_0);
- d0 = vrshl_s16(d0, horiz_const);
- d0 = vadd_s16(d0, round_offset_vec);
- s0 = s4;
- s4 = temp_0;
- if (conv_params->do_average) {
- __builtin_prefetch(d);
- __builtin_prefetch(d_u8);
-
- res4 = vld1_u16(d);
-
- compute_avg_4x1(res4, vreinterpret_u16_s16(d0), fwd_offset,
- bck_offset, round_offset_vec, round_bits,
- use_jnt_comp_avg, &t0);
-
- vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(t0),
- 0); // 00 01 02 03
- } else {
- vst1_u16(d, vreinterpret_u16_s16(d0));
- }
-
- s += 4;
- width -= 4;
- d += 4;
- d_u8 += 4;
- } while (width > 0);
- src_ptr += (src_stride);
- dst_ptr += (dst_stride);
- dst_u8_ptr += (dst8_stride);
- height--;
-#endif
- } while (height > 0);
- } else {
- CONV_BUF_TYPE *d_tmp;
- uint8_t *d_u8_tmp;
- int16x8_t s0, s1, s2, s3, s4, s5, s6, s7;
- int16x8_t res0;
- uint16x8_t res8;
- const int16x8_t round_offset128 = vdupq_n_s16(round_offset);
- const int16x4_t round_offset64 = vdup_n_s16(round_offset);
- const int16x8_t shift_round_0 = vdupq_n_s16(-conv_params->round_0 + 1);
- const int16x8_t horiz_const = vdupq_n_s16(bits);
- const int16x8_t zero = vdupq_n_s16(0);
-
- d = dst_ptr = dst;
- d_u8 = dst_u8_ptr = dst8;
- do {
-#if defined(__aarch64__)
- int16x8_t s11, s12, s13, s14;
- int16x8_t s8, s9, s10;
- int16x8_t res1, res2, res3, res4, res5, res6, res7;
- uint16x8_t res9, res10, res11;
- __builtin_prefetch(src_ptr + 0 * src_stride);
- __builtin_prefetch(src_ptr + 1 * src_stride);
- __builtin_prefetch(src_ptr + 2 * src_stride);
- __builtin_prefetch(src_ptr + 3 * src_stride);
- __builtin_prefetch(src_ptr + 4 * src_stride);
- __builtin_prefetch(src_ptr + 5 * src_stride);
- __builtin_prefetch(src_ptr + 6 * src_stride);
- __builtin_prefetch(src_ptr + 7 * src_stride);
- load_u8_8x8(src_ptr, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
- transpose_u8_8x8(&t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
- s0 = vreinterpretq_s16_u16(vmovl_u8(t0));
- s1 = vreinterpretq_s16_u16(vmovl_u8(t1));
- s2 = vreinterpretq_s16_u16(vmovl_u8(t2));
- s3 = vreinterpretq_s16_u16(vmovl_u8(t3));
- s4 = vreinterpretq_s16_u16(vmovl_u8(t4));
- s5 = vreinterpretq_s16_u16(vmovl_u8(t5));
- s6 = vreinterpretq_s16_u16(vmovl_u8(t6));
-
- width = w;
- s = src_ptr + 7;
- d = dst_ptr;
- d_u8_tmp = dst_u8_ptr;
-
- __builtin_prefetch(dst_ptr + 0 * dst_stride);
- __builtin_prefetch(dst_ptr + 1 * dst_stride);
- __builtin_prefetch(dst_ptr + 2 * dst_stride);
- __builtin_prefetch(dst_ptr + 3 * dst_stride);
- __builtin_prefetch(dst_ptr + 4 * dst_stride);
- __builtin_prefetch(dst_ptr + 5 * dst_stride);
- __builtin_prefetch(dst_ptr + 6 * dst_stride);
- __builtin_prefetch(dst_ptr + 7 * dst_stride);
-
- do {
- d_u8 = d_u8_tmp;
- d_tmp = d;
-
- load_u8_8x8(s, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
- transpose_u8_8x8(&t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
- s7 = vreinterpretq_s16_u16(vmovl_u8(t0));
- s8 = vreinterpretq_s16_u16(vmovl_u8(t1));
- s9 = vreinterpretq_s16_u16(vmovl_u8(t2));
- s10 = vreinterpretq_s16_u16(vmovl_u8(t3));
- s11 = vreinterpretq_s16_u16(vmovl_u8(t4));
- s12 = vreinterpretq_s16_u16(vmovl_u8(t5));
- s13 = vreinterpretq_s16_u16(vmovl_u8(t6));
- s14 = vreinterpretq_s16_u16(vmovl_u8(t7));
-
- res0 = convolve8_8x8_s16(s0, s1, s2, s3, s4, s5, s6, s7, x_filter_tmp,
- zero, shift_round_0);
-
- res0 = vrshlq_s16(res0, horiz_const);
- res0 = vaddq_s16(res0, round_offset128);
-
- res1 = convolve8_8x8_s16(s1, s2, s3, s4, s5, s6, s7, s8, x_filter_tmp,
- zero, shift_round_0);
- res1 = vrshlq_s16(res1, horiz_const);
- res1 = vaddq_s16(res1, round_offset128);
- res2 = convolve8_8x8_s16(s2, s3, s4, s5, s6, s7, s8, s9, x_filter_tmp,
- zero, shift_round_0);
- res2 = vrshlq_s16(res2, horiz_const);
- res2 = vaddq_s16(res2, round_offset128);
- res3 = convolve8_8x8_s16(s3, s4, s5, s6, s7, s8, s9, s10, x_filter_tmp,
- zero, shift_round_0);
- res3 = vrshlq_s16(res3, horiz_const);
- res3 = vaddq_s16(res3, round_offset128);
- res4 = convolve8_8x8_s16(s4, s5, s6, s7, s8, s9, s10, s11, x_filter_tmp,
- zero, shift_round_0);
- res4 = vrshlq_s16(res4, horiz_const);
- res4 = vaddq_s16(res4, round_offset128);
- res5 = convolve8_8x8_s16(s5, s6, s7, s8, s9, s10, s11, s12,
- x_filter_tmp, zero, shift_round_0);
- res5 = vrshlq_s16(res5, horiz_const);
- res5 = vaddq_s16(res5, round_offset128);
- res6 = convolve8_8x8_s16(s6, s7, s8, s9, s10, s11, s12, s13,
- x_filter_tmp, zero, shift_round_0);
- res6 = vrshlq_s16(res6, horiz_const);
- res6 = vaddq_s16(res6, round_offset128);
- res7 = convolve8_8x8_s16(s7, s8, s9, s10, s11, s12, s13, s14,
- x_filter_tmp, zero, shift_round_0);
- res7 = vrshlq_s16(res7, horiz_const);
- res7 = vaddq_s16(res7, round_offset128);
-
- transpose_s16_8x8(&res0, &res1, &res2, &res3, &res4, &res5, &res6,
- &res7);
-
- if (conv_params->do_average) {
- load_u16_8x4(d_tmp, dst_stride, &res8, &res9, &res10, &res11);
- d_tmp += (dst_stride << 2);
-
- compute_avg_8x4(
- res8, res9, res10, res11, vreinterpretq_u16_s16(res0),
- vreinterpretq_u16_s16(res1), vreinterpretq_u16_s16(res2),
- vreinterpretq_u16_s16(res3), fwd_offset, bck_offset,
- round_offset64, round_bits, use_jnt_comp_avg, &t0, &t1, &t2, &t3);
-
- store_u8_8x4(d_u8, dst8_stride, t0, t1, t2, t3);
- d_u8 += (dst8_stride << 2);
-
- load_u16_8x4(d_tmp, dst_stride, &res8, &res9, &res10, &res11);
- d_tmp += (dst_stride << 2);
-
- compute_avg_8x4(
- res8, res9, res10, res11, vreinterpretq_u16_s16(res4),
- vreinterpretq_u16_s16(res5), vreinterpretq_u16_s16(res6),
- vreinterpretq_u16_s16(res7), fwd_offset, bck_offset,
- round_offset64, round_bits, use_jnt_comp_avg, &t0, &t1, &t2, &t3);
-
- store_u8_8x4(d_u8, dst8_stride, t0, t1, t2, t3);
- d_u8 += (dst8_stride << 2);
- } else {
- store_u16_8x8(
- d_tmp, dst_stride, vreinterpretq_u16_s16(res0),
- vreinterpretq_u16_s16(res1), vreinterpretq_u16_s16(res2),
- vreinterpretq_u16_s16(res3), vreinterpretq_u16_s16(res4),
- vreinterpretq_u16_s16(res5), vreinterpretq_u16_s16(res6),
- vreinterpretq_u16_s16(res7));
- d_tmp += (dst_stride << 3);
- }
-
- s0 = s8;
- s1 = s9;
- s2 = s10;
- s3 = s11;
- s4 = s12;
- s5 = s13;
- s6 = s14;
- s += 8;
- d += 8;
- width -= 8;
- d_u8_tmp += 8;
- } while (width > 0);
- src_ptr += 8 * src_stride;
- dst_ptr += 8 * dst_stride;
- dst_u8_ptr += 8 * dst8_stride;
- height -= 8;
-#else
- int16x8_t temp_0;
- __builtin_prefetch(src_ptr);
- t0 = vld1_u8(src_ptr);
- s0 = vreinterpretq_s16_u16(vmovl_u8(t0)); // a0 a1 a2 a3 a4 a5 a6 a7
-
- width = w;
- s = src_ptr + 8;
- d = dst_ptr;
- d_u8_tmp = dst_u8_ptr;
-
- __builtin_prefetch(dst_ptr);
-
- do {
- d_u8 = d_u8_tmp;
- d_tmp = d;
-
- t0 = vld1_u8(s); // a8 a9 a10 a11 a12 a13 a14 a15
- s7 = vreinterpretq_s16_u16(vmovl_u8(t0));
- temp_0 = s0;
- s0 = s7;
-
- s1 = vextq_s16(temp_0, s7, 1); // a1 a2 a3 a4 a5 a6 a7 a8
- s2 = vextq_s16(temp_0, s7, 2); // a2 a3 a4 a5 a6 a7 a8 a9
- s3 = vextq_s16(temp_0, s7, 3); // a3 a4 a5 a6 a7 a8 a9 a10
- s4 = vextq_s16(temp_0, s7, 4); // a4 a5 a6 a7 a8 a9 a10 a11
- s5 = vextq_s16(temp_0, s7, 5); // a5 a6 a7 a8 a9 a10 a11 a12
- s6 = vextq_s16(temp_0, s7, 6); // a6 a7 a8 a9 a10 a11 a12 a13
- s7 = vextq_s16(temp_0, s7, 7); // a7 a8 a9 a10 a11 a12 a13 a14
-
- res0 = convolve8_8x8_s16(temp_0, s1, s2, s3, s4, s5, s6, s7,
- x_filter_tmp, zero, shift_round_0);
-
- res0 = vrshlq_s16(res0, horiz_const);
- res0 = vaddq_s16(res0, round_offset128);
-
- if (conv_params->do_average) {
- res8 = vld1q_u16(d_tmp);
- d_tmp += (dst_stride);
-
- compute_avg_8x1(res8, vreinterpretq_u16_s16(res0), fwd_offset,
- bck_offset, round_offset64, round_bits,
- use_jnt_comp_avg, &t0);
-
- vst1_u8(d_u8, t0);
- d_u8 += (dst8_stride);
- } else {
- vst1q_u16(d_tmp, vreinterpretq_u16_s16(res0));
- d_tmp += (dst_stride);
- }
-
- s += 8;
- d += 8;
- width -= 8;
- d_u8_tmp += 8;
- } while (width > 0);
- src_ptr += src_stride;
- dst_ptr += dst_stride;
- dst_u8_ptr += dst8_stride;
- height--;
-#endif
- } while (height > 0);
- }
-}
-
-void av1_jnt_convolve_y_neon(const uint8_t *src, int src_stride, uint8_t *dst8,
- int dst8_stride, int w, int h,
- const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y,
- const int subpel_x_q4, const int subpel_y_q4,
- ConvolveParams *conv_params) {
- assert(!(w % 4));
- assert(!(h % 4));
-
- CONV_BUF_TYPE *dst = conv_params->dst;
- const int dst_stride = conv_params->dst_stride;
- const int vert_offset = filter_params_y->taps / 2 - 1;
- const int bits = FILTER_BITS - conv_params->round_0;
- const int bd = 8;
- const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
- const int round_offset = (1 << (offset_bits - conv_params->round_1)) +
- (1 << (offset_bits - conv_params->round_1 - 1));
- const int round_bits =
- 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
- const uint16_t fwd_offset = conv_params->fwd_offset;
- const uint16_t bck_offset = conv_params->bck_offset;
- const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg;
- const int shift_value = (conv_params->round_1 - 1 - bits);
-
- (void)filter_params_x;
- (void)subpel_x_q4;
-
- // vertical filter
- const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
- filter_params_y, subpel_y_q4 & SUBPEL_MASK);
-
- const uint8_t *src_ptr = src - (vert_offset * src_stride);
-
- int16_t y_filter_tmp[8];
- int16x8_t filter_y_coef = vld1q_s16(y_filter);
-
- // filter coeffs are even, so downshifting by 1 to reduce intermediate
- // precision requirements.
- filter_y_coef = vshrq_n_s16(filter_y_coef, 1);
- vst1q_s16(&y_filter_tmp[0], filter_y_coef);
-
- const uint8_t *s;
- uint8_t *d_u8;
- uint8_t *dst_u8_ptr;
- CONV_BUF_TYPE *d, *dst_ptr;
- int width, height;
-
- s = src_ptr;
- dst_ptr = dst;
- dst_u8_ptr = dst8;
- width = w;
- height = h;
-
- // used to get rid of multiplication = (vertical filter output sum) *
- // (1<<bits).
- assert((conv_params->round_1 - 2) >= bits);
-
- if ((w == 4) || (h == 4)) {
- int16x4_t s0, s1, s2, s3, s4, s5, s6, s7, d0;
- uint16x4_t res4;
- uint32x2_t tu0 = vdup_n_u32(0), tu1 = vdup_n_u32(0), tu2 = vdup_n_u32(0),
- tu3 = vdup_n_u32(0);
- int16x8_t u0, u1, u2, u3;
- uint8x8_t t0;
-
-#if defined(__aarch64__)
- int16x4_t s8, s9, s10, d1, d2, d3;
- uint16x4_t res5, res6, res7;
- uint8x8_t t1;
-#endif
- const int16x4_t round_offset64 = vdup_n_s16(round_offset);
- const int16x4_t shift_vec = vdup_n_s16(-shift_value);
- const int16x4_t zero = vdup_n_s16(0);
-
- do {
- s = src_ptr;
- d = dst_ptr;
- d_u8 = dst_u8_ptr;
- height = h;
- __builtin_prefetch(s + 0 * src_stride);
- __builtin_prefetch(s + 1 * src_stride);
- __builtin_prefetch(s + 2 * src_stride);
- __builtin_prefetch(s + 3 * src_stride);
-
- load_unaligned_u8_4x8(s, src_stride, &tu0, &tu1, &tu2, &tu3);
-
- u0 = vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(tu0)));
- u1 = vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(tu1)));
- u2 = vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(tu2)));
- u3 = vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(tu3)));
-
- s0 = vget_low_s16(u0);
- s1 = vget_high_s16(u0);
- s2 = vget_low_s16(u1);
- s3 = vget_high_s16(u1);
- s4 = vget_low_s16(u2);
- s5 = vget_high_s16(u2);
- s6 = vget_low_s16(u3);
-
- __builtin_prefetch(d + 0 * dst_stride);
- __builtin_prefetch(d + 1 * dst_stride);
- __builtin_prefetch(d + 2 * dst_stride);
- __builtin_prefetch(d + 3 * dst_stride);
-
- s += (7 * src_stride);
- do {
-#if defined(__aarch64__)
- load_unaligned_u8_4x4(s, src_stride, &tu0, &tu1);
-
- u0 = vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(tu0)));
- u1 = vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(tu1)));
-
- s7 = vget_low_s16(u0);
- s8 = vget_high_s16(u0);
- s9 = vget_low_s16(u1);
- s10 = vget_high_s16(u1);
-
- d0 = convolve8_4x4_s16(s0, s1, s2, s3, s4, s5, s6, s7, y_filter_tmp,
- zero, shift_vec);
- d0 = vadd_s16(d0, round_offset64);
- d1 = convolve8_4x4_s16(s1, s2, s3, s4, s5, s6, s7, s8, y_filter_tmp,
- zero, shift_vec);
- d1 = vadd_s16(d1, round_offset64);
- d2 = convolve8_4x4_s16(s2, s3, s4, s5, s6, s7, s8, s9, y_filter_tmp,
- zero, shift_vec);
- d2 = vadd_s16(d2, round_offset64);
- d3 = convolve8_4x4_s16(s3, s4, s5, s6, s7, s8, s9, s10, y_filter_tmp,
- zero, shift_vec);
- d3 = vadd_s16(d3, round_offset64);
-
- if (conv_params->do_average) {
- __builtin_prefetch(d + 0 * dst_stride);
- __builtin_prefetch(d + 1 * dst_stride);
- __builtin_prefetch(d + 2 * dst_stride);
- __builtin_prefetch(d + 3 * dst_stride);
-
- __builtin_prefetch(d_u8 + 0 * dst8_stride);
- __builtin_prefetch(d_u8 + 1 * dst8_stride);
- __builtin_prefetch(d_u8 + 2 * dst8_stride);
- __builtin_prefetch(d_u8 + 3 * dst8_stride);
-
- load_u16_4x4(d, dst_stride, &res4, &res5, &res6, &res7);
- d += (dst_stride << 2);
-
- compute_avg_4x4(res4, res5, res6, res7, vreinterpret_u16_s16(d0),
- vreinterpret_u16_s16(d1), vreinterpret_u16_s16(d2),
- vreinterpret_u16_s16(d3), fwd_offset, bck_offset,
- round_offset64, round_bits, use_jnt_comp_avg, &t0,
- &t1);
-
- vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(t0), 0);
- d_u8 += dst8_stride;
- vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(t0), 1);
- d_u8 += dst8_stride;
- vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(t1), 0);
- d_u8 += dst8_stride;
- vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(t1), 1);
- d_u8 += dst8_stride;
- } else {
- store_u16_4x4(d, dst_stride, vreinterpret_u16_s16(d0),
- vreinterpret_u16_s16(d1), vreinterpret_u16_s16(d2),
- vreinterpret_u16_s16(d3));
- d += (dst_stride << 2);
- }
-
- s0 = s4;
- s1 = s5;
- s2 = s6;
- s3 = s7;
- s4 = s8;
- s5 = s9;
- s6 = s10;
-
- s += (src_stride << 2);
- height -= 4;
-#else
- load_unaligned_u8_4x1(s, src_stride, &tu0);
- u0 = vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(tu0)));
- s7 = vget_low_s16(u0);
-
- d0 = convolve8_4x4_s16(s0, s1, s2, s3, s4, s5, s6, s7, y_filter_tmp,
- zero, shift_vec);
-
- d0 = vadd_s16(d0, round_offset64);
-
- if (conv_params->do_average) {
- __builtin_prefetch(d);
-
- res4 = vld1_u16(d);
- d += (dst_stride);
-
- compute_avg_4x1(res4, vreinterpret_u16_s16(d0), fwd_offset,
- bck_offset, round_offset64, round_bits,
- use_jnt_comp_avg, &t0);
-
- vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(t0), 0);
- d_u8 += dst8_stride;
- } else {
- vst1_u16(d, vreinterpret_u16_s16(d0));
- d += (dst_stride);
- }
-
- s0 = s1;
- s1 = s2;
- s2 = s3;
- s3 = s4;
- s4 = s5;
- s5 = s6;
- s6 = s7;
-
- s += (src_stride);
- height--;
-#endif
- } while (height > 0);
- src_ptr += 4;
- dst_ptr += 4;
- dst_u8_ptr += 4;
- width -= 4;
- } while (width > 0);
- } else {
- CONV_BUF_TYPE *d_tmp;
- int16x8_t s0, s1, s2, s3, s4, s5, s6, s7;
- int16x8_t res0;
- uint16x8_t res8;
- uint8x8_t t0, t1, t2, t3, t4, t5, t6, t7;
- const int16x8_t round_offset128 = vdupq_n_s16(round_offset);
- const int16x8_t shift_vec = vdupq_n_s16(-shift_value);
- const int16x4_t round_offset64 = vdup_n_s16(round_offset);
- const int16x8_t zero = vdupq_n_s16(0);
-#if defined(__aarch64__)
- int16x8_t s8, s9, s10, s11, s12, s13, s14;
- int16x8_t res1, res2, res3, res4, res5, res6, res7;
- uint16x8_t res10, res11, res9;
-#endif
- dst_ptr = dst;
- dst_u8_ptr = dst8;
- do {
- __builtin_prefetch(src_ptr + 0 * src_stride);
- __builtin_prefetch(src_ptr + 1 * src_stride);
- __builtin_prefetch(src_ptr + 2 * src_stride);
- __builtin_prefetch(src_ptr + 3 * src_stride);
- __builtin_prefetch(src_ptr + 4 * src_stride);
- __builtin_prefetch(src_ptr + 5 * src_stride);
- __builtin_prefetch(src_ptr + 6 * src_stride);
- __builtin_prefetch(src_ptr + 7 * src_stride);
- load_u8_8x8(src_ptr, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
-
- s0 = vreinterpretq_s16_u16(vmovl_u8(t0));
- s1 = vreinterpretq_s16_u16(vmovl_u8(t1));
- s2 = vreinterpretq_s16_u16(vmovl_u8(t2));
- s3 = vreinterpretq_s16_u16(vmovl_u8(t3));
- s4 = vreinterpretq_s16_u16(vmovl_u8(t4));
- s5 = vreinterpretq_s16_u16(vmovl_u8(t5));
- s6 = vreinterpretq_s16_u16(vmovl_u8(t6));
-
- height = h;
- s = src_ptr + (7 * src_stride);
- d_tmp = dst_ptr;
- d_u8 = dst_u8_ptr;
-
- do {
-#if defined(__aarch64__)
- load_u8_8x8(s, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
-
- s7 = vreinterpretq_s16_u16(vmovl_u8(t0));
- s8 = vreinterpretq_s16_u16(vmovl_u8(t1));
- s9 = vreinterpretq_s16_u16(vmovl_u8(t2));
- s10 = vreinterpretq_s16_u16(vmovl_u8(t3));
- s11 = vreinterpretq_s16_u16(vmovl_u8(t4));
- s12 = vreinterpretq_s16_u16(vmovl_u8(t5));
- s13 = vreinterpretq_s16_u16(vmovl_u8(t6));
- s14 = vreinterpretq_s16_u16(vmovl_u8(t7));
-
- __builtin_prefetch(dst_ptr + 0 * dst_stride);
- __builtin_prefetch(dst_ptr + 1 * dst_stride);
- __builtin_prefetch(dst_ptr + 2 * dst_stride);
- __builtin_prefetch(dst_ptr + 3 * dst_stride);
-
- res0 = convolve8_8x8_s16(s0, s1, s2, s3, s4, s5, s6, s7, y_filter_tmp,
- zero, shift_vec);
- res0 = vaddq_s16(res0, round_offset128);
- res1 = convolve8_8x8_s16(s1, s2, s3, s4, s5, s6, s7, s8, y_filter_tmp,
- zero, shift_vec);
- res1 = vaddq_s16(res1, round_offset128);
- res2 = convolve8_8x8_s16(s2, s3, s4, s5, s6, s7, s8, s9, y_filter_tmp,
- zero, shift_vec);
- res2 = vaddq_s16(res2, round_offset128);
- res3 = convolve8_8x8_s16(s3, s4, s5, s6, s7, s8, s9, s10, y_filter_tmp,
- zero, shift_vec);
- res3 = vaddq_s16(res3, round_offset128);
- res4 = convolve8_8x8_s16(s4, s5, s6, s7, s8, s9, s10, s11, y_filter_tmp,
- zero, shift_vec);
- res4 = vaddq_s16(res4, round_offset128);
- res5 = convolve8_8x8_s16(s5, s6, s7, s8, s9, s10, s11, s12,
- y_filter_tmp, zero, shift_vec);
- res5 = vaddq_s16(res5, round_offset128);
- res6 = convolve8_8x8_s16(s6, s7, s8, s9, s10, s11, s12, s13,
- y_filter_tmp, zero, shift_vec);
- res6 = vaddq_s16(res6, round_offset128);
- res7 = convolve8_8x8_s16(s7, s8, s9, s10, s11, s12, s13, s14,
- y_filter_tmp, zero, shift_vec);
- res7 = vaddq_s16(res7, round_offset128);
-
- if (conv_params->do_average) {
- __builtin_prefetch(d_tmp + 0 * dst8_stride);
- __builtin_prefetch(d_tmp + 1 * dst8_stride);
- __builtin_prefetch(d_tmp + 2 * dst8_stride);
- __builtin_prefetch(d_tmp + 3 * dst8_stride);
-
- load_u16_8x4(d_tmp, dst_stride, &res8, &res9, &res10, &res11);
- d_tmp += (dst_stride << 2);
-
- compute_avg_8x4(
- res8, res9, res10, res11, vreinterpretq_u16_s16(res0),
- vreinterpretq_u16_s16(res1), vreinterpretq_u16_s16(res2),
- vreinterpretq_u16_s16(res3), fwd_offset, bck_offset,
- round_offset64, round_bits, use_jnt_comp_avg, &t0, &t1, &t2, &t3);
-
- store_u8_8x4(d_u8, dst8_stride, t0, t1, t2, t3);
- d_u8 += (dst8_stride << 2);
-
- load_u16_8x4(d_tmp, dst_stride, &res8, &res9, &res10, &res11);
- d_tmp += (dst_stride << 2);
-
- compute_avg_8x4(
- res8, res9, res10, res11, vreinterpretq_u16_s16(res4),
- vreinterpretq_u16_s16(res5), vreinterpretq_u16_s16(res6),
- vreinterpretq_u16_s16(res7), fwd_offset, bck_offset,
- round_offset64, round_bits, use_jnt_comp_avg, &t0, &t1, &t2, &t3);
-
- store_u8_8x4(d_u8, dst8_stride, t0, t1, t2, t3);
- d_u8 += (dst8_stride << 2);
- } else {
- store_u16_8x8(
- d_tmp, dst_stride, vreinterpretq_u16_s16(res0),
- vreinterpretq_u16_s16(res1), vreinterpretq_u16_s16(res2),
- vreinterpretq_u16_s16(res3), vreinterpretq_u16_s16(res4),
- vreinterpretq_u16_s16(res5), vreinterpretq_u16_s16(res6),
- vreinterpretq_u16_s16(res7));
- d_tmp += (dst_stride << 3);
- }
-
- s0 = s8;
- s1 = s9;
- s2 = s10;
- s3 = s11;
- s4 = s12;
- s5 = s13;
- s6 = s14;
- s += (8 * src_stride);
- height -= 8;
-#else
- s7 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s)));
-
- __builtin_prefetch(dst_ptr);
-
- res0 = convolve8_8x8_s16(s0, s1, s2, s3, s4, s5, s6, s7, y_filter_tmp,
- zero, shift_vec);
- res0 = vaddq_s16(res0, round_offset128);
-
- s0 = s1;
- s1 = s2;
- s2 = s3;
- s3 = s4;
- s4 = s5;
- s5 = s6;
- s6 = s7;
-
- if (conv_params->do_average) {
- __builtin_prefetch(d_tmp);
-
- res8 = vld1q_u16(d_tmp);
- d_tmp += (dst_stride);
-
- compute_avg_8x1(res8, vreinterpretq_u16_s16(res0), fwd_offset,
- bck_offset, round_offset64, round_bits,
- use_jnt_comp_avg, &t0);
-
- vst1_u8(d_u8, t0);
- d_u8 += (dst8_stride);
- } else {
- vst1q_u16(d_tmp, vreinterpretq_u16_s16(res0));
- d_tmp += dst_stride;
- }
-
- s += (src_stride);
- height--;
-#endif
- } while (height > 0);
- src_ptr += 8;
- dst_ptr += 8;
- dst_u8_ptr += 8;
- width -= 8;
- } while (width > 0);
- }
-}
diff --git a/third_party/aom/av1/common/arm/mem_neon.h b/third_party/aom/av1/common/arm/mem_neon.h
deleted file mode 100644
index c4ae2e784..000000000
--- a/third_party/aom/av1/common/arm/mem_neon.h
+++ /dev/null
@@ -1,494 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef AOM_AV1_COMMON_ARM_MEM_NEON_H_
-#define AOM_AV1_COMMON_ARM_MEM_NEON_H_
-
-#include <arm_neon.h>
-#include <string.h>
-
-static INLINE void store_row2_u8_8x8(uint8_t *s, int p, const uint8x8_t s0,
- const uint8x8_t s1) {
- vst1_u8(s, s0);
- s += p;
- vst1_u8(s, s1);
- s += p;
-}
-
-/* These intrinsics require immediate values, so we must use #defines
- to enforce that. */
-#define load_u8_4x1(s, s0, lane) \
- do { \
- *(s0) = vreinterpret_u8_u32( \
- vld1_lane_u32((uint32_t *)(s), vreinterpret_u32_u8(*(s0)), lane)); \
- } while (0)
-
-static INLINE void load_u8_8x8(const uint8_t *s, ptrdiff_t p,
- uint8x8_t *const s0, uint8x8_t *const s1,
- uint8x8_t *const s2, uint8x8_t *const s3,
- uint8x8_t *const s4, uint8x8_t *const s5,
- uint8x8_t *const s6, uint8x8_t *const s7) {
- *s0 = vld1_u8(s);
- s += p;
- *s1 = vld1_u8(s);
- s += p;
- *s2 = vld1_u8(s);
- s += p;
- *s3 = vld1_u8(s);
- s += p;
- *s4 = vld1_u8(s);
- s += p;
- *s5 = vld1_u8(s);
- s += p;
- *s6 = vld1_u8(s);
- s += p;
- *s7 = vld1_u8(s);
-}
-
-static INLINE void load_u8_8x16(const uint8_t *s, ptrdiff_t p,
- uint8x16_t *const s0, uint8x16_t *const s1,
- uint8x16_t *const s2, uint8x16_t *const s3) {
- *s0 = vld1q_u8(s);
- s += p;
- *s1 = vld1q_u8(s);
- s += p;
- *s2 = vld1q_u8(s);
- s += p;
- *s3 = vld1q_u8(s);
-}
-
-static INLINE void load_u8_8x4(const uint8_t *s, const ptrdiff_t p,
- uint8x8_t *const s0, uint8x8_t *const s1,
- uint8x8_t *const s2, uint8x8_t *const s3) {
- *s0 = vld1_u8(s);
- s += p;
- *s1 = vld1_u8(s);
- s += p;
- *s2 = vld1_u8(s);
- s += p;
- *s3 = vld1_u8(s);
-}
-
-static INLINE void load_u16_4x4(const uint16_t *s, const ptrdiff_t p,
- uint16x4_t *const s0, uint16x4_t *const s1,
- uint16x4_t *const s2, uint16x4_t *const s3) {
- *s0 = vld1_u16(s);
- s += p;
- *s1 = vld1_u16(s);
- s += p;
- *s2 = vld1_u16(s);
- s += p;
- *s3 = vld1_u16(s);
- s += p;
-}
-
-static INLINE void load_u16_8x4(const uint16_t *s, const ptrdiff_t p,
- uint16x8_t *const s0, uint16x8_t *const s1,
- uint16x8_t *const s2, uint16x8_t *const s3) {
- *s0 = vld1q_u16(s);
- s += p;
- *s1 = vld1q_u16(s);
- s += p;
- *s2 = vld1q_u16(s);
- s += p;
- *s3 = vld1q_u16(s);
- s += p;
-}
-
-static INLINE void load_s16_4x8(const int16_t *s, ptrdiff_t p,
- int16x4_t *const s0, int16x4_t *const s1,
- int16x4_t *const s2, int16x4_t *const s3,
- int16x4_t *const s4, int16x4_t *const s5,
- int16x4_t *const s6, int16x4_t *const s7) {
- *s0 = vld1_s16(s);
- s += p;
- *s1 = vld1_s16(s);
- s += p;
- *s2 = vld1_s16(s);
- s += p;
- *s3 = vld1_s16(s);
- s += p;
- *s4 = vld1_s16(s);
- s += p;
- *s5 = vld1_s16(s);
- s += p;
- *s6 = vld1_s16(s);
- s += p;
- *s7 = vld1_s16(s);
-}
-
-static INLINE void load_s16_4x4(const int16_t *s, ptrdiff_t p,
- int16x4_t *const s0, int16x4_t *const s1,
- int16x4_t *const s2, int16x4_t *const s3) {
- *s0 = vld1_s16(s);
- s += p;
- *s1 = vld1_s16(s);
- s += p;
- *s2 = vld1_s16(s);
- s += p;
- *s3 = vld1_s16(s);
-}
-
-/* These intrinsics require immediate values, so we must use #defines
- to enforce that. */
-#define store_u8_4x1(s, s0, lane) \
- do { \
- vst1_lane_u32((uint32_t *)(s), vreinterpret_u32_u8(s0), lane); \
- } while (0)
-
-static INLINE void store_u8_8x8(uint8_t *s, ptrdiff_t p, const uint8x8_t s0,
- const uint8x8_t s1, const uint8x8_t s2,
- const uint8x8_t s3, const uint8x8_t s4,
- const uint8x8_t s5, const uint8x8_t s6,
- const uint8x8_t s7) {
- vst1_u8(s, s0);
- s += p;
- vst1_u8(s, s1);
- s += p;
- vst1_u8(s, s2);
- s += p;
- vst1_u8(s, s3);
- s += p;
- vst1_u8(s, s4);
- s += p;
- vst1_u8(s, s5);
- s += p;
- vst1_u8(s, s6);
- s += p;
- vst1_u8(s, s7);
-}
-
-static INLINE void store_u8_8x4(uint8_t *s, ptrdiff_t p, const uint8x8_t s0,
- const uint8x8_t s1, const uint8x8_t s2,
- const uint8x8_t s3) {
- vst1_u8(s, s0);
- s += p;
- vst1_u8(s, s1);
- s += p;
- vst1_u8(s, s2);
- s += p;
- vst1_u8(s, s3);
-}
-
-static INLINE void store_u8_8x16(uint8_t *s, ptrdiff_t p, const uint8x16_t s0,
- const uint8x16_t s1, const uint8x16_t s2,
- const uint8x16_t s3) {
- vst1q_u8(s, s0);
- s += p;
- vst1q_u8(s, s1);
- s += p;
- vst1q_u8(s, s2);
- s += p;
- vst1q_u8(s, s3);
-}
-
-static INLINE void store_u16_8x8(uint16_t *s, ptrdiff_t dst_stride,
- const uint16x8_t s0, const uint16x8_t s1,
- const uint16x8_t s2, const uint16x8_t s3,
- const uint16x8_t s4, const uint16x8_t s5,
- const uint16x8_t s6, const uint16x8_t s7) {
- vst1q_u16(s, s0);
- s += dst_stride;
- vst1q_u16(s, s1);
- s += dst_stride;
- vst1q_u16(s, s2);
- s += dst_stride;
- vst1q_u16(s, s3);
- s += dst_stride;
- vst1q_u16(s, s4);
- s += dst_stride;
- vst1q_u16(s, s5);
- s += dst_stride;
- vst1q_u16(s, s6);
- s += dst_stride;
- vst1q_u16(s, s7);
-}
-
-static INLINE void store_u16_4x4(uint16_t *s, ptrdiff_t dst_stride,
- const uint16x4_t s0, const uint16x4_t s1,
- const uint16x4_t s2, const uint16x4_t s3) {
- vst1_u16(s, s0);
- s += dst_stride;
- vst1_u16(s, s1);
- s += dst_stride;
- vst1_u16(s, s2);
- s += dst_stride;
- vst1_u16(s, s3);
-}
-
-static INLINE void store_u16_8x4(uint16_t *s, ptrdiff_t dst_stride,
- const uint16x8_t s0, const uint16x8_t s1,
- const uint16x8_t s2, const uint16x8_t s3) {
- vst1q_u16(s, s0);
- s += dst_stride;
- vst1q_u16(s, s1);
- s += dst_stride;
- vst1q_u16(s, s2);
- s += dst_stride;
- vst1q_u16(s, s3);
-}
-
-static INLINE void store_s16_8x8(int16_t *s, ptrdiff_t dst_stride,
- const int16x8_t s0, const int16x8_t s1,
- const int16x8_t s2, const int16x8_t s3,
- const int16x8_t s4, const int16x8_t s5,
- const int16x8_t s6, const int16x8_t s7) {
- vst1q_s16(s, s0);
- s += dst_stride;
- vst1q_s16(s, s1);
- s += dst_stride;
- vst1q_s16(s, s2);
- s += dst_stride;
- vst1q_s16(s, s3);
- s += dst_stride;
- vst1q_s16(s, s4);
- s += dst_stride;
- vst1q_s16(s, s5);
- s += dst_stride;
- vst1q_s16(s, s6);
- s += dst_stride;
- vst1q_s16(s, s7);
-}
-
-static INLINE void store_s16_4x4(int16_t *s, ptrdiff_t dst_stride,
- const int16x4_t s0, const int16x4_t s1,
- const int16x4_t s2, const int16x4_t s3) {
- vst1_s16(s, s0);
- s += dst_stride;
- vst1_s16(s, s1);
- s += dst_stride;
- vst1_s16(s, s2);
- s += dst_stride;
- vst1_s16(s, s3);
-}
-
-static INLINE void store_s16_8x4(int16_t *s, ptrdiff_t dst_stride,
- const int16x8_t s0, const int16x8_t s1,
- const int16x8_t s2, const int16x8_t s3) {
- vst1q_s16(s, s0);
- s += dst_stride;
- vst1q_s16(s, s1);
- s += dst_stride;
- vst1q_s16(s, s2);
- s += dst_stride;
- vst1q_s16(s, s3);
-}
-
-static INLINE void load_s16_8x8(const int16_t *s, ptrdiff_t p,
- int16x8_t *const s0, int16x8_t *const s1,
- int16x8_t *const s2, int16x8_t *const s3,
- int16x8_t *const s4, int16x8_t *const s5,
- int16x8_t *const s6, int16x8_t *const s7) {
- *s0 = vld1q_s16(s);
- s += p;
- *s1 = vld1q_s16(s);
- s += p;
- *s2 = vld1q_s16(s);
- s += p;
- *s3 = vld1q_s16(s);
- s += p;
- *s4 = vld1q_s16(s);
- s += p;
- *s5 = vld1q_s16(s);
- s += p;
- *s6 = vld1q_s16(s);
- s += p;
- *s7 = vld1q_s16(s);
-}
-
-static INLINE void load_s16_8x4(const int16_t *s, ptrdiff_t p,
- int16x8_t *const s0, int16x8_t *const s1,
- int16x8_t *const s2, int16x8_t *const s3) {
- *s0 = vld1q_s16(s);
- s += p;
- *s1 = vld1q_s16(s);
- s += p;
- *s2 = vld1q_s16(s);
- s += p;
- *s3 = vld1q_s16(s);
-}
-
-static INLINE void load_unaligned_u8_4x8(const uint8_t *buf, int stride,
- uint32x2_t *tu0, uint32x2_t *tu1,
- uint32x2_t *tu2, uint32x2_t *tu3) {
- uint32_t a;
-
- memcpy(&a, buf, 4);
- buf += stride;
- *tu0 = vset_lane_u32(a, *tu0, 0);
- memcpy(&a, buf, 4);
- buf += stride;
- *tu0 = vset_lane_u32(a, *tu0, 1);
- memcpy(&a, buf, 4);
- buf += stride;
- *tu1 = vset_lane_u32(a, *tu1, 0);
- memcpy(&a, buf, 4);
- buf += stride;
- *tu1 = vset_lane_u32(a, *tu1, 1);
- memcpy(&a, buf, 4);
- buf += stride;
- *tu2 = vset_lane_u32(a, *tu2, 0);
- memcpy(&a, buf, 4);
- buf += stride;
- *tu2 = vset_lane_u32(a, *tu2, 1);
- memcpy(&a, buf, 4);
- buf += stride;
- *tu3 = vset_lane_u32(a, *tu3, 0);
- memcpy(&a, buf, 4);
- *tu3 = vset_lane_u32(a, *tu3, 1);
-}
-
-static INLINE void load_unaligned_u8_4x4(const uint8_t *buf, int stride,
- uint32x2_t *tu0, uint32x2_t *tu1) {
- uint32_t a;
-
- memcpy(&a, buf, 4);
- buf += stride;
- *tu0 = vset_lane_u32(a, *tu0, 0);
- memcpy(&a, buf, 4);
- buf += stride;
- *tu0 = vset_lane_u32(a, *tu0, 1);
- memcpy(&a, buf, 4);
- buf += stride;
- *tu1 = vset_lane_u32(a, *tu1, 0);
- memcpy(&a, buf, 4);
- *tu1 = vset_lane_u32(a, *tu1, 1);
-}
-
-static INLINE void load_unaligned_u8_4x1(const uint8_t *buf, int stride,
- uint32x2_t *tu0) {
- uint32_t a;
-
- memcpy(&a, buf, 4);
- buf += stride;
- *tu0 = vset_lane_u32(a, *tu0, 0);
-}
-
-static INLINE void load_unaligned_u8_4x2(const uint8_t *buf, int stride,
- uint32x2_t *tu0) {
- uint32_t a;
-
- memcpy(&a, buf, 4);
- buf += stride;
- *tu0 = vset_lane_u32(a, *tu0, 0);
- memcpy(&a, buf, 4);
- buf += stride;
- *tu0 = vset_lane_u32(a, *tu0, 1);
-}
-
-static INLINE void load_unaligned_u8_2x2(const uint8_t *buf, int stride,
- uint16x4_t *tu0) {
- uint16_t a;
-
- memcpy(&a, buf, 2);
- buf += stride;
- *tu0 = vset_lane_u16(a, *tu0, 0);
- memcpy(&a, buf, 2);
- buf += stride;
- *tu0 = vset_lane_u16(a, *tu0, 1);
-}
-
-static INLINE void load_u8_16x8(const uint8_t *s, ptrdiff_t p,
- uint8x16_t *const s0, uint8x16_t *const s1,
- uint8x16_t *const s2, uint8x16_t *const s3,
- uint8x16_t *const s4, uint8x16_t *const s5,
- uint8x16_t *const s6, uint8x16_t *const s7) {
- *s0 = vld1q_u8(s);
- s += p;
- *s1 = vld1q_u8(s);
- s += p;
- *s2 = vld1q_u8(s);
- s += p;
- *s3 = vld1q_u8(s);
- s += p;
- *s4 = vld1q_u8(s);
- s += p;
- *s5 = vld1q_u8(s);
- s += p;
- *s6 = vld1q_u8(s);
- s += p;
- *s7 = vld1q_u8(s);
-}
-
-static INLINE void load_u8_16x4(const uint8_t *s, ptrdiff_t p,
- uint8x16_t *const s0, uint8x16_t *const s1,
- uint8x16_t *const s2, uint8x16_t *const s3) {
- *s0 = vld1q_u8(s);
- s += p;
- *s1 = vld1q_u8(s);
- s += p;
- *s2 = vld1q_u8(s);
- s += p;
- *s3 = vld1q_u8(s);
-}
-
-static INLINE void load_unaligned_u16_4x4(const uint16_t *buf, uint32_t stride,
- uint64x2_t *tu0, uint64x2_t *tu1) {
- uint64_t a;
-
- memcpy(&a, buf, 8);
- buf += stride;
- *tu0 = vsetq_lane_u64(a, *tu0, 0);
- memcpy(&a, buf, 8);
- buf += stride;
- *tu0 = vsetq_lane_u64(a, *tu0, 1);
- memcpy(&a, buf, 8);
- buf += stride;
- *tu1 = vsetq_lane_u64(a, *tu1, 0);
- memcpy(&a, buf, 8);
- *tu1 = vsetq_lane_u64(a, *tu1, 1);
-}
-
-static INLINE void load_s32_4x4(int32_t *s, int32_t p, int32x4_t *s1,
- int32x4_t *s2, int32x4_t *s3, int32x4_t *s4) {
- *s1 = vld1q_s32(s);
- s += p;
- *s2 = vld1q_s32(s);
- s += p;
- *s3 = vld1q_s32(s);
- s += p;
- *s4 = vld1q_s32(s);
-}
-
-static INLINE void store_s32_4x4(int32_t *s, int32_t p, int32x4_t s1,
- int32x4_t s2, int32x4_t s3, int32x4_t s4) {
- vst1q_s32(s, s1);
- s += p;
- vst1q_s32(s, s2);
- s += p;
- vst1q_s32(s, s3);
- s += p;
- vst1q_s32(s, s4);
-}
-
-static INLINE void load_u32_4x4(uint32_t *s, int32_t p, uint32x4_t *s1,
- uint32x4_t *s2, uint32x4_t *s3,
- uint32x4_t *s4) {
- *s1 = vld1q_u32(s);
- s += p;
- *s2 = vld1q_u32(s);
- s += p;
- *s3 = vld1q_u32(s);
- s += p;
- *s4 = vld1q_u32(s);
-}
-
-static INLINE void store_u32_4x4(uint32_t *s, int32_t p, uint32x4_t s1,
- uint32x4_t s2, uint32x4_t s3, uint32x4_t s4) {
- vst1q_u32(s, s1);
- s += p;
- vst1q_u32(s, s2);
- s += p;
- vst1q_u32(s, s3);
- s += p;
- vst1q_u32(s, s4);
-}
-
-#endif // AOM_AV1_COMMON_ARM_MEM_NEON_H_
diff --git a/third_party/aom/av1/common/arm/reconinter_neon.c b/third_party/aom/av1/common/arm/reconinter_neon.c
deleted file mode 100644
index 44e064195..000000000
--- a/third_party/aom/av1/common/arm/reconinter_neon.c
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- *
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <arm_neon.h>
-#include <assert.h>
-
-#include "aom/aom_integer.h"
-#include "aom_dsp/blend.h"
-#include "aom_ports/mem.h"
-#include "av1/common/arm/mem_neon.h"
-#include "av1/common/blockd.h"
-#include "config/av1_rtcd.h"
-
-void av1_build_compound_diffwtd_mask_d16_neon(
- uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0,
- int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w,
- ConvolveParams *conv_params, int bd) {
- assert(h >= 4);
- assert(w >= 4);
- assert((mask_type == DIFFWTD_38_INV) || (mask_type == DIFFWTD_38));
- const int round =
- 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1 + (bd - 8);
- uint16x8_t diff_q, tmp0, tmp1;
- uint8x8_t diff_d, diff_select;
- const CONV_BUF_TYPE *src0_1, *src1_1;
- const int16x8_t dup_round = vdupq_n_s16((int16_t)(-round));
- const uint8x8_t dup_38 = vdup_n_u8(38);
- const uint8x8_t dup_64 = vdup_n_u8(AOM_BLEND_A64_MAX_ALPHA);
- if (mask_type == DIFFWTD_38) {
- diff_select = vdup_n_u8(255);
- } else {
- diff_select = vdup_n_u8(0);
- }
- if (w >= 8) {
- for (int i = 0; i < h; ++i) {
- src0_1 = src0;
- src1_1 = src1;
- for (int j = 0; j < w; j += 8) {
- __builtin_prefetch(src0_1);
- __builtin_prefetch(src1_1);
- diff_q = vabdq_u16(vld1q_u16(src0_1), vld1q_u16(src1_1));
- diff_q = vrshlq_u16(diff_q, dup_round);
- diff_d = vshrn_n_u16(diff_q, DIFF_FACTOR_LOG2);
- diff_d = vmin_u8(vadd_u8(diff_d, dup_38), dup_64);
- diff_d = vbsl_u8(diff_select, diff_d, vsub_u8(dup_64, diff_d));
- vst1_u8(mask, diff_d);
- src0_1 += 8;
- src1_1 += 8;
- mask += 8;
- }
- src0 += src0_stride;
- src1 += src1_stride;
- }
- } else if (w == 4) {
- for (int i = 0; i < h; i += 2) {
- src0_1 = src0;
- src1_1 = src1;
- __builtin_prefetch(src0_1 + 0 * src0_stride);
- __builtin_prefetch(src0_1 + 1 * src0_stride);
- __builtin_prefetch(src1_1 + 0 * src1_stride);
- __builtin_prefetch(src1_1 + 1 * src1_stride);
- tmp0 = vcombine_u16(vld1_u16(src0_1 + (0 * src0_stride)),
- vld1_u16(src0_1 + (1 * src0_stride)));
- tmp1 = vcombine_u16(vld1_u16(src1_1 + (0 * src1_stride)),
- vld1_u16(src1_1 + (1 * src1_stride)));
- diff_q = vabdq_u16(tmp0, tmp1);
- diff_q = vrshlq_u16(diff_q, dup_round);
- diff_d = vshrn_n_u16(diff_q, DIFF_FACTOR_LOG2);
- diff_d = vmin_u8(vadd_u8(diff_d, dup_38), dup_64);
- diff_d = vbsl_u8(diff_select, diff_d, vsub_u8(dup_64, diff_d));
- vst1_u8(mask, diff_d);
- src0 += src0_stride * 2;
- src1 += src1_stride * 2;
- mask += w * 2;
- }
- }
-}
diff --git a/third_party/aom/av1/common/arm/selfguided_neon.c b/third_party/aom/av1/common/arm/selfguided_neon.c
deleted file mode 100644
index b3a37c4cb..000000000
--- a/third_party/aom/av1/common/arm/selfguided_neon.c
+++ /dev/null
@@ -1,1508 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <arm_neon.h>
-#include <assert.h>
-
-#include "config/aom_config.h"
-#include "config/av1_rtcd.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/txfm_common.h"
-#include "aom_mem/aom_mem.h"
-#include "aom_ports/mem.h"
-#include "av1/common/common.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/common/resize.h"
-#include "av1/common/restoration.h"
-#include "av1/common/arm/mem_neon.h"
-#include "av1/common/arm/transpose_neon.h"
-
-// Constants used for right shift in final_filter calculation.
-#define NB_EVEN 5
-#define NB_ODD 4
-
-static INLINE void calc_ab_fast_internal_common(
- uint32x4_t s0, uint32x4_t s1, uint32x4_t s2, uint32x4_t s3, uint32x4_t s4,
- uint32x4_t s5, uint32x4_t s6, uint32x4_t s7, int32x4_t sr4, int32x4_t sr5,
- int32x4_t sr6, int32x4_t sr7, uint32x4_t const_n_val, uint32x4_t s_vec,
- uint32x4_t const_val, uint32x4_t one_by_n_minus_1_vec,
- uint16x4_t sgrproj_sgr, int32_t *src1, uint16_t *dst_A16, int32_t *src2,
- const int buf_stride) {
- uint32x4_t q0, q1, q2, q3;
- uint32x4_t p0, p1, p2, p3;
- uint16x4_t d0, d1, d2, d3;
-
- s0 = vmulq_u32(s0, const_n_val);
- s1 = vmulq_u32(s1, const_n_val);
- s2 = vmulq_u32(s2, const_n_val);
- s3 = vmulq_u32(s3, const_n_val);
-
- q0 = vmulq_u32(s4, s4);
- q1 = vmulq_u32(s5, s5);
- q2 = vmulq_u32(s6, s6);
- q3 = vmulq_u32(s7, s7);
-
- p0 = vcleq_u32(q0, s0);
- p1 = vcleq_u32(q1, s1);
- p2 = vcleq_u32(q2, s2);
- p3 = vcleq_u32(q3, s3);
-
- q0 = vsubq_u32(s0, q0);
- q1 = vsubq_u32(s1, q1);
- q2 = vsubq_u32(s2, q2);
- q3 = vsubq_u32(s3, q3);
-
- p0 = vandq_u32(p0, q0);
- p1 = vandq_u32(p1, q1);
- p2 = vandq_u32(p2, q2);
- p3 = vandq_u32(p3, q3);
-
- p0 = vmulq_u32(p0, s_vec);
- p1 = vmulq_u32(p1, s_vec);
- p2 = vmulq_u32(p2, s_vec);
- p3 = vmulq_u32(p3, s_vec);
-
- p0 = vrshrq_n_u32(p0, SGRPROJ_MTABLE_BITS);
- p1 = vrshrq_n_u32(p1, SGRPROJ_MTABLE_BITS);
- p2 = vrshrq_n_u32(p2, SGRPROJ_MTABLE_BITS);
- p3 = vrshrq_n_u32(p3, SGRPROJ_MTABLE_BITS);
-
- p0 = vminq_u32(p0, const_val);
- p1 = vminq_u32(p1, const_val);
- p2 = vminq_u32(p2, const_val);
- p3 = vminq_u32(p3, const_val);
-
- {
- store_u32_4x4((uint32_t *)src1, buf_stride, p0, p1, p2, p3);
-
- for (int x = 0; x < 4; x++) {
- for (int y = 0; y < 4; y++) {
- dst_A16[x * buf_stride + y] = x_by_xplus1[src1[x * buf_stride + y]];
- }
- }
- load_u16_4x4(dst_A16, buf_stride, &d0, &d1, &d2, &d3);
- }
- p0 = vsubl_u16(sgrproj_sgr, d0);
- p1 = vsubl_u16(sgrproj_sgr, d1);
- p2 = vsubl_u16(sgrproj_sgr, d2);
- p3 = vsubl_u16(sgrproj_sgr, d3);
-
- s4 = vmulq_u32(vreinterpretq_u32_s32(sr4), one_by_n_minus_1_vec);
- s5 = vmulq_u32(vreinterpretq_u32_s32(sr5), one_by_n_minus_1_vec);
- s6 = vmulq_u32(vreinterpretq_u32_s32(sr6), one_by_n_minus_1_vec);
- s7 = vmulq_u32(vreinterpretq_u32_s32(sr7), one_by_n_minus_1_vec);
-
- s4 = vmulq_u32(s4, p0);
- s5 = vmulq_u32(s5, p1);
- s6 = vmulq_u32(s6, p2);
- s7 = vmulq_u32(s7, p3);
-
- p0 = vrshrq_n_u32(s4, SGRPROJ_RECIP_BITS);
- p1 = vrshrq_n_u32(s5, SGRPROJ_RECIP_BITS);
- p2 = vrshrq_n_u32(s6, SGRPROJ_RECIP_BITS);
- p3 = vrshrq_n_u32(s7, SGRPROJ_RECIP_BITS);
-
- store_s32_4x4(src2, buf_stride, vreinterpretq_s32_u32(p0),
- vreinterpretq_s32_u32(p1), vreinterpretq_s32_u32(p2),
- vreinterpretq_s32_u32(p3));
-}
-static INLINE void calc_ab_internal_common(
- uint32x4_t s0, uint32x4_t s1, uint32x4_t s2, uint32x4_t s3, uint32x4_t s4,
- uint32x4_t s5, uint32x4_t s6, uint32x4_t s7, uint16x8_t s16_0,
- uint16x8_t s16_1, uint16x8_t s16_2, uint16x8_t s16_3, uint16x8_t s16_4,
- uint16x8_t s16_5, uint16x8_t s16_6, uint16x8_t s16_7,
- uint32x4_t const_n_val, uint32x4_t s_vec, uint32x4_t const_val,
- uint16x4_t one_by_n_minus_1_vec, uint16x8_t sgrproj_sgr, int32_t *src1,
- uint16_t *dst_A16, int32_t *dst2, const int buf_stride) {
- uint16x4_t d0, d1, d2, d3, d4, d5, d6, d7;
- uint32x4_t q0, q1, q2, q3, q4, q5, q6, q7;
- uint32x4_t p0, p1, p2, p3, p4, p5, p6, p7;
-
- s0 = vmulq_u32(s0, const_n_val);
- s1 = vmulq_u32(s1, const_n_val);
- s2 = vmulq_u32(s2, const_n_val);
- s3 = vmulq_u32(s3, const_n_val);
- s4 = vmulq_u32(s4, const_n_val);
- s5 = vmulq_u32(s5, const_n_val);
- s6 = vmulq_u32(s6, const_n_val);
- s7 = vmulq_u32(s7, const_n_val);
-
- d0 = vget_low_u16(s16_4);
- d1 = vget_low_u16(s16_5);
- d2 = vget_low_u16(s16_6);
- d3 = vget_low_u16(s16_7);
- d4 = vget_high_u16(s16_4);
- d5 = vget_high_u16(s16_5);
- d6 = vget_high_u16(s16_6);
- d7 = vget_high_u16(s16_7);
-
- q0 = vmull_u16(d0, d0);
- q1 = vmull_u16(d1, d1);
- q2 = vmull_u16(d2, d2);
- q3 = vmull_u16(d3, d3);
- q4 = vmull_u16(d4, d4);
- q5 = vmull_u16(d5, d5);
- q6 = vmull_u16(d6, d6);
- q7 = vmull_u16(d7, d7);
-
- p0 = vcleq_u32(q0, s0);
- p1 = vcleq_u32(q1, s1);
- p2 = vcleq_u32(q2, s2);
- p3 = vcleq_u32(q3, s3);
- p4 = vcleq_u32(q4, s4);
- p5 = vcleq_u32(q5, s5);
- p6 = vcleq_u32(q6, s6);
- p7 = vcleq_u32(q7, s7);
-
- q0 = vsubq_u32(s0, q0);
- q1 = vsubq_u32(s1, q1);
- q2 = vsubq_u32(s2, q2);
- q3 = vsubq_u32(s3, q3);
- q4 = vsubq_u32(s4, q4);
- q5 = vsubq_u32(s5, q5);
- q6 = vsubq_u32(s6, q6);
- q7 = vsubq_u32(s7, q7);
-
- p0 = vandq_u32(p0, q0);
- p1 = vandq_u32(p1, q1);
- p2 = vandq_u32(p2, q2);
- p3 = vandq_u32(p3, q3);
- p4 = vandq_u32(p4, q4);
- p5 = vandq_u32(p5, q5);
- p6 = vandq_u32(p6, q6);
- p7 = vandq_u32(p7, q7);
-
- p0 = vmulq_u32(p0, s_vec);
- p1 = vmulq_u32(p1, s_vec);
- p2 = vmulq_u32(p2, s_vec);
- p3 = vmulq_u32(p3, s_vec);
- p4 = vmulq_u32(p4, s_vec);
- p5 = vmulq_u32(p5, s_vec);
- p6 = vmulq_u32(p6, s_vec);
- p7 = vmulq_u32(p7, s_vec);
-
- p0 = vrshrq_n_u32(p0, SGRPROJ_MTABLE_BITS);
- p1 = vrshrq_n_u32(p1, SGRPROJ_MTABLE_BITS);
- p2 = vrshrq_n_u32(p2, SGRPROJ_MTABLE_BITS);
- p3 = vrshrq_n_u32(p3, SGRPROJ_MTABLE_BITS);
- p4 = vrshrq_n_u32(p4, SGRPROJ_MTABLE_BITS);
- p5 = vrshrq_n_u32(p5, SGRPROJ_MTABLE_BITS);
- p6 = vrshrq_n_u32(p6, SGRPROJ_MTABLE_BITS);
- p7 = vrshrq_n_u32(p7, SGRPROJ_MTABLE_BITS);
-
- p0 = vminq_u32(p0, const_val);
- p1 = vminq_u32(p1, const_val);
- p2 = vminq_u32(p2, const_val);
- p3 = vminq_u32(p3, const_val);
- p4 = vminq_u32(p4, const_val);
- p5 = vminq_u32(p5, const_val);
- p6 = vminq_u32(p6, const_val);
- p7 = vminq_u32(p7, const_val);
-
- {
- store_u32_4x4((uint32_t *)src1, buf_stride, p0, p1, p2, p3);
- store_u32_4x4((uint32_t *)src1 + 4, buf_stride, p4, p5, p6, p7);
-
- for (int x = 0; x < 4; x++) {
- for (int y = 0; y < 8; y++) {
- dst_A16[x * buf_stride + y] = x_by_xplus1[src1[x * buf_stride + y]];
- }
- }
- load_u16_8x4(dst_A16, buf_stride, &s16_4, &s16_5, &s16_6, &s16_7);
- }
-
- s16_4 = vsubq_u16(sgrproj_sgr, s16_4);
- s16_5 = vsubq_u16(sgrproj_sgr, s16_5);
- s16_6 = vsubq_u16(sgrproj_sgr, s16_6);
- s16_7 = vsubq_u16(sgrproj_sgr, s16_7);
-
- s0 = vmull_u16(vget_low_u16(s16_0), one_by_n_minus_1_vec);
- s1 = vmull_u16(vget_low_u16(s16_1), one_by_n_minus_1_vec);
- s2 = vmull_u16(vget_low_u16(s16_2), one_by_n_minus_1_vec);
- s3 = vmull_u16(vget_low_u16(s16_3), one_by_n_minus_1_vec);
- s4 = vmull_u16(vget_high_u16(s16_0), one_by_n_minus_1_vec);
- s5 = vmull_u16(vget_high_u16(s16_1), one_by_n_minus_1_vec);
- s6 = vmull_u16(vget_high_u16(s16_2), one_by_n_minus_1_vec);
- s7 = vmull_u16(vget_high_u16(s16_3), one_by_n_minus_1_vec);
-
- s0 = vmulq_u32(s0, vmovl_u16(vget_low_u16(s16_4)));
- s1 = vmulq_u32(s1, vmovl_u16(vget_low_u16(s16_5)));
- s2 = vmulq_u32(s2, vmovl_u16(vget_low_u16(s16_6)));
- s3 = vmulq_u32(s3, vmovl_u16(vget_low_u16(s16_7)));
- s4 = vmulq_u32(s4, vmovl_u16(vget_high_u16(s16_4)));
- s5 = vmulq_u32(s5, vmovl_u16(vget_high_u16(s16_5)));
- s6 = vmulq_u32(s6, vmovl_u16(vget_high_u16(s16_6)));
- s7 = vmulq_u32(s7, vmovl_u16(vget_high_u16(s16_7)));
-
- p0 = vrshrq_n_u32(s0, SGRPROJ_RECIP_BITS);
- p1 = vrshrq_n_u32(s1, SGRPROJ_RECIP_BITS);
- p2 = vrshrq_n_u32(s2, SGRPROJ_RECIP_BITS);
- p3 = vrshrq_n_u32(s3, SGRPROJ_RECIP_BITS);
- p4 = vrshrq_n_u32(s4, SGRPROJ_RECIP_BITS);
- p5 = vrshrq_n_u32(s5, SGRPROJ_RECIP_BITS);
- p6 = vrshrq_n_u32(s6, SGRPROJ_RECIP_BITS);
- p7 = vrshrq_n_u32(s7, SGRPROJ_RECIP_BITS);
-
- store_s32_4x4(dst2, buf_stride, vreinterpretq_s32_u32(p0),
- vreinterpretq_s32_u32(p1), vreinterpretq_s32_u32(p2),
- vreinterpretq_s32_u32(p3));
- store_s32_4x4(dst2 + 4, buf_stride, vreinterpretq_s32_u32(p4),
- vreinterpretq_s32_u32(p5), vreinterpretq_s32_u32(p6),
- vreinterpretq_s32_u32(p7));
-}
-
-static INLINE void boxsum2_square_sum_calc(
- int16x4_t t1, int16x4_t t2, int16x4_t t3, int16x4_t t4, int16x4_t t5,
- int16x4_t t6, int16x4_t t7, int16x4_t t8, int16x4_t t9, int16x4_t t10,
- int16x4_t t11, int32x4_t *r0, int32x4_t *r1, int32x4_t *r2, int32x4_t *r3) {
- int32x4_t d1, d2, d3, d4, d5, d6, d7, d8, d9, d10, d11;
- int32x4_t r12, r34, r67, r89, r1011;
- int32x4_t r345, r6789, r789;
-
- d1 = vmull_s16(t1, t1);
- d2 = vmull_s16(t2, t2);
- d3 = vmull_s16(t3, t3);
- d4 = vmull_s16(t4, t4);
- d5 = vmull_s16(t5, t5);
- d6 = vmull_s16(t6, t6);
- d7 = vmull_s16(t7, t7);
- d8 = vmull_s16(t8, t8);
- d9 = vmull_s16(t9, t9);
- d10 = vmull_s16(t10, t10);
- d11 = vmull_s16(t11, t11);
-
- r12 = vaddq_s32(d1, d2);
- r34 = vaddq_s32(d3, d4);
- r67 = vaddq_s32(d6, d7);
- r89 = vaddq_s32(d8, d9);
- r1011 = vaddq_s32(d10, d11);
- r345 = vaddq_s32(r34, d5);
- r6789 = vaddq_s32(r67, r89);
- r789 = vsubq_s32(r6789, d6);
- *r0 = vaddq_s32(r12, r345);
- *r1 = vaddq_s32(r67, r345);
- *r2 = vaddq_s32(d5, r6789);
- *r3 = vaddq_s32(r789, r1011);
-}
-
-static INLINE void boxsum2(int16_t *src, const int src_stride, int16_t *dst16,
- int32_t *dst32, int32_t *dst2, const int dst_stride,
- const int width, const int height) {
- assert(width > 2 * SGRPROJ_BORDER_HORZ);
- assert(height > 2 * SGRPROJ_BORDER_VERT);
-
- int16_t *dst1_16_ptr, *src_ptr;
- int32_t *dst2_ptr;
- int h, w, count = 0;
- const int dst_stride_2 = (dst_stride << 1);
- const int dst_stride_8 = (dst_stride << 3);
-
- dst1_16_ptr = dst16;
- dst2_ptr = dst2;
- src_ptr = src;
- w = width;
- {
- int16x8_t t1, t2, t3, t4, t5, t6, t7;
- int16x8_t t8, t9, t10, t11, t12;
-
- int16x8_t q12345, q56789, q34567, q7891011;
- int16x8_t q12, q34, q67, q89, q1011;
- int16x8_t q345, q6789, q789;
-
- int32x4_t r12345, r56789, r34567, r7891011;
-
- do {
- h = height;
- dst1_16_ptr = dst16 + (count << 3);
- dst2_ptr = dst2 + (count << 3);
- src_ptr = src + (count << 3);
-
- dst1_16_ptr += dst_stride_2;
- dst2_ptr += dst_stride_2;
- do {
- load_s16_8x4(src_ptr, src_stride, &t1, &t2, &t3, &t4);
- src_ptr += 4 * src_stride;
- load_s16_8x4(src_ptr, src_stride, &t5, &t6, &t7, &t8);
- src_ptr += 4 * src_stride;
- load_s16_8x4(src_ptr, src_stride, &t9, &t10, &t11, &t12);
-
- q12 = vaddq_s16(t1, t2);
- q34 = vaddq_s16(t3, t4);
- q67 = vaddq_s16(t6, t7);
- q89 = vaddq_s16(t8, t9);
- q1011 = vaddq_s16(t10, t11);
- q345 = vaddq_s16(q34, t5);
- q6789 = vaddq_s16(q67, q89);
- q789 = vaddq_s16(q89, t7);
- q12345 = vaddq_s16(q12, q345);
- q34567 = vaddq_s16(q67, q345);
- q56789 = vaddq_s16(t5, q6789);
- q7891011 = vaddq_s16(q789, q1011);
-
- store_s16_8x4(dst1_16_ptr, dst_stride_2, q12345, q34567, q56789,
- q7891011);
- dst1_16_ptr += dst_stride_8;
-
- boxsum2_square_sum_calc(
- vget_low_s16(t1), vget_low_s16(t2), vget_low_s16(t3),
- vget_low_s16(t4), vget_low_s16(t5), vget_low_s16(t6),
- vget_low_s16(t7), vget_low_s16(t8), vget_low_s16(t9),
- vget_low_s16(t10), vget_low_s16(t11), &r12345, &r34567, &r56789,
- &r7891011);
-
- store_s32_4x4(dst2_ptr, dst_stride_2, r12345, r34567, r56789, r7891011);
-
- boxsum2_square_sum_calc(
- vget_high_s16(t1), vget_high_s16(t2), vget_high_s16(t3),
- vget_high_s16(t4), vget_high_s16(t5), vget_high_s16(t6),
- vget_high_s16(t7), vget_high_s16(t8), vget_high_s16(t9),
- vget_high_s16(t10), vget_high_s16(t11), &r12345, &r34567, &r56789,
- &r7891011);
-
- store_s32_4x4(dst2_ptr + 4, dst_stride_2, r12345, r34567, r56789,
- r7891011);
- dst2_ptr += (dst_stride_8);
- h -= 8;
- } while (h > 0);
- w -= 8;
- count++;
- } while (w > 0);
- }
-
- {
- int16x4_t s1, s2, s3, s4, s5, s6, s7, s8;
- int32x4_t d1, d2, d3, d4, d5, d6, d7, d8;
- int32x4_t q12345, q34567, q23456, q45678;
- int32x4_t q23, q45, q67;
- int32x4_t q2345, q4567;
-
- int32x4_t r12345, r34567, r23456, r45678;
- int32x4_t r23, r45, r67;
- int32x4_t r2345, r4567;
-
- int32_t *src2_ptr, *dst1_32_ptr;
- int16_t *src1_ptr;
- count = 0;
- h = height;
- do {
- dst1_32_ptr = dst32 + count * dst_stride_8 + (dst_stride_2);
- dst2_ptr = dst2 + count * dst_stride_8 + (dst_stride_2);
- src1_ptr = dst16 + count * dst_stride_8 + (dst_stride_2);
- src2_ptr = dst2 + count * dst_stride_8 + (dst_stride_2);
- w = width;
-
- dst1_32_ptr += 2;
- dst2_ptr += 2;
- load_s16_4x4(src1_ptr, dst_stride_2, &s1, &s2, &s3, &s4);
- transpose_s16_4x4d(&s1, &s2, &s3, &s4);
- load_s32_4x4(src2_ptr, dst_stride_2, &d1, &d2, &d3, &d4);
- transpose_s32_4x4(&d1, &d2, &d3, &d4);
- do {
- src1_ptr += 4;
- src2_ptr += 4;
- load_s16_4x4(src1_ptr, dst_stride_2, &s5, &s6, &s7, &s8);
- transpose_s16_4x4d(&s5, &s6, &s7, &s8);
- load_s32_4x4(src2_ptr, dst_stride_2, &d5, &d6, &d7, &d8);
- transpose_s32_4x4(&d5, &d6, &d7, &d8);
- q23 = vaddl_s16(s2, s3);
- q45 = vaddl_s16(s4, s5);
- q67 = vaddl_s16(s6, s7);
- q2345 = vaddq_s32(q23, q45);
- q4567 = vaddq_s32(q45, q67);
- q12345 = vaddq_s32(vmovl_s16(s1), q2345);
- q23456 = vaddq_s32(q2345, vmovl_s16(s6));
- q34567 = vaddq_s32(q4567, vmovl_s16(s3));
- q45678 = vaddq_s32(q4567, vmovl_s16(s8));
-
- transpose_s32_4x4(&q12345, &q23456, &q34567, &q45678);
- store_s32_4x4(dst1_32_ptr, dst_stride_2, q12345, q23456, q34567,
- q45678);
- dst1_32_ptr += 4;
- s1 = s5;
- s2 = s6;
- s3 = s7;
- s4 = s8;
-
- r23 = vaddq_s32(d2, d3);
- r45 = vaddq_s32(d4, d5);
- r67 = vaddq_s32(d6, d7);
- r2345 = vaddq_s32(r23, r45);
- r4567 = vaddq_s32(r45, r67);
- r12345 = vaddq_s32(d1, r2345);
- r23456 = vaddq_s32(r2345, d6);
- r34567 = vaddq_s32(r4567, d3);
- r45678 = vaddq_s32(r4567, d8);
-
- transpose_s32_4x4(&r12345, &r23456, &r34567, &r45678);
- store_s32_4x4(dst2_ptr, dst_stride_2, r12345, r23456, r34567, r45678);
- dst2_ptr += 4;
- d1 = d5;
- d2 = d6;
- d3 = d7;
- d4 = d8;
- w -= 4;
- } while (w > 0);
- h -= 8;
- count++;
- } while (h > 0);
- }
-}
-
-static INLINE void calc_ab_internal_lbd(int32_t *A, uint16_t *A16,
- uint16_t *B16, int32_t *B,
- const int buf_stride, const int width,
- const int height, const int r,
- const int s, const int ht_inc) {
- int32_t *src1, *dst2, count = 0;
- uint16_t *dst_A16, *src2;
- const uint32_t n = (2 * r + 1) * (2 * r + 1);
- const uint32x4_t const_n_val = vdupq_n_u32(n);
- const uint16x8_t sgrproj_sgr = vdupq_n_u16(SGRPROJ_SGR);
- const uint16x4_t one_by_n_minus_1_vec = vdup_n_u16(one_by_x[n - 1]);
- const uint32x4_t const_val = vdupq_n_u32(255);
-
- uint16x8_t s16_0, s16_1, s16_2, s16_3, s16_4, s16_5, s16_6, s16_7;
-
- uint32x4_t s0, s1, s2, s3, s4, s5, s6, s7;
-
- const uint32x4_t s_vec = vdupq_n_u32(s);
- int w, h = height;
-
- do {
- dst_A16 = A16 + (count << 2) * buf_stride;
- src1 = A + (count << 2) * buf_stride;
- src2 = B16 + (count << 2) * buf_stride;
- dst2 = B + (count << 2) * buf_stride;
- w = width;
- do {
- load_u32_4x4((uint32_t *)src1, buf_stride, &s0, &s1, &s2, &s3);
- load_u32_4x4((uint32_t *)src1 + 4, buf_stride, &s4, &s5, &s6, &s7);
- load_u16_8x4(src2, buf_stride, &s16_0, &s16_1, &s16_2, &s16_3);
-
- s16_4 = s16_0;
- s16_5 = s16_1;
- s16_6 = s16_2;
- s16_7 = s16_3;
-
- calc_ab_internal_common(
- s0, s1, s2, s3, s4, s5, s6, s7, s16_0, s16_1, s16_2, s16_3, s16_4,
- s16_5, s16_6, s16_7, const_n_val, s_vec, const_val,
- one_by_n_minus_1_vec, sgrproj_sgr, src1, dst_A16, dst2, buf_stride);
-
- w -= 8;
- dst2 += 8;
- src1 += 8;
- src2 += 8;
- dst_A16 += 8;
- } while (w > 0);
- count++;
- h -= (ht_inc * 4);
- } while (h > 0);
-}
-
-static INLINE void calc_ab_internal_hbd(int32_t *A, uint16_t *A16,
- uint16_t *B16, int32_t *B,
- const int buf_stride, const int width,
- const int height, const int bit_depth,
- const int r, const int s,
- const int ht_inc) {
- int32_t *src1, *dst2, count = 0;
- uint16_t *dst_A16, *src2;
- const uint32_t n = (2 * r + 1) * (2 * r + 1);
- const int16x8_t bd_min_2_vec = vdupq_n_s16(-(bit_depth - 8));
- const int32x4_t bd_min_1_vec = vdupq_n_s32(-((bit_depth - 8) << 1));
- const uint32x4_t const_n_val = vdupq_n_u32(n);
- const uint16x8_t sgrproj_sgr = vdupq_n_u16(SGRPROJ_SGR);
- const uint16x4_t one_by_n_minus_1_vec = vdup_n_u16(one_by_x[n - 1]);
- const uint32x4_t const_val = vdupq_n_u32(255);
-
- int32x4_t sr0, sr1, sr2, sr3, sr4, sr5, sr6, sr7;
- uint16x8_t s16_0, s16_1, s16_2, s16_3;
- uint16x8_t s16_4, s16_5, s16_6, s16_7;
- uint32x4_t s0, s1, s2, s3, s4, s5, s6, s7;
-
- const uint32x4_t s_vec = vdupq_n_u32(s);
- int w, h = height;
-
- do {
- src1 = A + (count << 2) * buf_stride;
- src2 = B16 + (count << 2) * buf_stride;
- dst2 = B + (count << 2) * buf_stride;
- dst_A16 = A16 + (count << 2) * buf_stride;
- w = width;
- do {
- load_s32_4x4(src1, buf_stride, &sr0, &sr1, &sr2, &sr3);
- load_s32_4x4(src1 + 4, buf_stride, &sr4, &sr5, &sr6, &sr7);
- load_u16_8x4(src2, buf_stride, &s16_0, &s16_1, &s16_2, &s16_3);
-
- s0 = vrshlq_u32(vreinterpretq_u32_s32(sr0), bd_min_1_vec);
- s1 = vrshlq_u32(vreinterpretq_u32_s32(sr1), bd_min_1_vec);
- s2 = vrshlq_u32(vreinterpretq_u32_s32(sr2), bd_min_1_vec);
- s3 = vrshlq_u32(vreinterpretq_u32_s32(sr3), bd_min_1_vec);
- s4 = vrshlq_u32(vreinterpretq_u32_s32(sr4), bd_min_1_vec);
- s5 = vrshlq_u32(vreinterpretq_u32_s32(sr5), bd_min_1_vec);
- s6 = vrshlq_u32(vreinterpretq_u32_s32(sr6), bd_min_1_vec);
- s7 = vrshlq_u32(vreinterpretq_u32_s32(sr7), bd_min_1_vec);
-
- s16_4 = vrshlq_u16(s16_0, bd_min_2_vec);
- s16_5 = vrshlq_u16(s16_1, bd_min_2_vec);
- s16_6 = vrshlq_u16(s16_2, bd_min_2_vec);
- s16_7 = vrshlq_u16(s16_3, bd_min_2_vec);
-
- calc_ab_internal_common(
- s0, s1, s2, s3, s4, s5, s6, s7, s16_0, s16_1, s16_2, s16_3, s16_4,
- s16_5, s16_6, s16_7, const_n_val, s_vec, const_val,
- one_by_n_minus_1_vec, sgrproj_sgr, src1, dst_A16, dst2, buf_stride);
-
- w -= 8;
- dst2 += 8;
- src1 += 8;
- src2 += 8;
- dst_A16 += 8;
- } while (w > 0);
- count++;
- h -= (ht_inc * 4);
- } while (h > 0);
-}
-
-static INLINE void calc_ab_fast_internal_lbd(int32_t *A, uint16_t *A16,
- int32_t *B, const int buf_stride,
- const int width, const int height,
- const int r, const int s,
- const int ht_inc) {
- int32_t *src1, *src2, count = 0;
- uint16_t *dst_A16;
- const uint32_t n = (2 * r + 1) * (2 * r + 1);
- const uint32x4_t const_n_val = vdupq_n_u32(n);
- const uint16x4_t sgrproj_sgr = vdup_n_u16(SGRPROJ_SGR);
- const uint32x4_t one_by_n_minus_1_vec = vdupq_n_u32(one_by_x[n - 1]);
- const uint32x4_t const_val = vdupq_n_u32(255);
-
- int32x4_t sr0, sr1, sr2, sr3, sr4, sr5, sr6, sr7;
- uint32x4_t s0, s1, s2, s3, s4, s5, s6, s7;
-
- const uint32x4_t s_vec = vdupq_n_u32(s);
- int w, h = height;
-
- do {
- src1 = A + (count << 2) * buf_stride;
- src2 = B + (count << 2) * buf_stride;
- dst_A16 = A16 + (count << 2) * buf_stride;
- w = width;
- do {
- load_s32_4x4(src1, buf_stride, &sr0, &sr1, &sr2, &sr3);
- load_s32_4x4(src2, buf_stride, &sr4, &sr5, &sr6, &sr7);
-
- s0 = vreinterpretq_u32_s32(sr0);
- s1 = vreinterpretq_u32_s32(sr1);
- s2 = vreinterpretq_u32_s32(sr2);
- s3 = vreinterpretq_u32_s32(sr3);
- s4 = vreinterpretq_u32_s32(sr4);
- s5 = vreinterpretq_u32_s32(sr5);
- s6 = vreinterpretq_u32_s32(sr6);
- s7 = vreinterpretq_u32_s32(sr7);
-
- calc_ab_fast_internal_common(s0, s1, s2, s3, s4, s5, s6, s7, sr4, sr5,
- sr6, sr7, const_n_val, s_vec, const_val,
- one_by_n_minus_1_vec, sgrproj_sgr, src1,
- dst_A16, src2, buf_stride);
-
- w -= 4;
- src1 += 4;
- src2 += 4;
- dst_A16 += 4;
- } while (w > 0);
- count++;
- h -= (ht_inc * 4);
- } while (h > 0);
-}
-
-static INLINE void calc_ab_fast_internal_hbd(int32_t *A, uint16_t *A16,
- int32_t *B, const int buf_stride,
- const int width, const int height,
- const int bit_depth, const int r,
- const int s, const int ht_inc) {
- int32_t *src1, *src2, count = 0;
- uint16_t *dst_A16;
- const uint32_t n = (2 * r + 1) * (2 * r + 1);
- const int32x4_t bd_min_2_vec = vdupq_n_s32(-(bit_depth - 8));
- const int32x4_t bd_min_1_vec = vdupq_n_s32(-((bit_depth - 8) << 1));
- const uint32x4_t const_n_val = vdupq_n_u32(n);
- const uint16x4_t sgrproj_sgr = vdup_n_u16(SGRPROJ_SGR);
- const uint32x4_t one_by_n_minus_1_vec = vdupq_n_u32(one_by_x[n - 1]);
- const uint32x4_t const_val = vdupq_n_u32(255);
-
- int32x4_t sr0, sr1, sr2, sr3, sr4, sr5, sr6, sr7;
- uint32x4_t s0, s1, s2, s3, s4, s5, s6, s7;
-
- const uint32x4_t s_vec = vdupq_n_u32(s);
- int w, h = height;
-
- do {
- src1 = A + (count << 2) * buf_stride;
- src2 = B + (count << 2) * buf_stride;
- dst_A16 = A16 + (count << 2) * buf_stride;
- w = width;
- do {
- load_s32_4x4(src1, buf_stride, &sr0, &sr1, &sr2, &sr3);
- load_s32_4x4(src2, buf_stride, &sr4, &sr5, &sr6, &sr7);
-
- s0 = vrshlq_u32(vreinterpretq_u32_s32(sr0), bd_min_1_vec);
- s1 = vrshlq_u32(vreinterpretq_u32_s32(sr1), bd_min_1_vec);
- s2 = vrshlq_u32(vreinterpretq_u32_s32(sr2), bd_min_1_vec);
- s3 = vrshlq_u32(vreinterpretq_u32_s32(sr3), bd_min_1_vec);
- s4 = vrshlq_u32(vreinterpretq_u32_s32(sr4), bd_min_2_vec);
- s5 = vrshlq_u32(vreinterpretq_u32_s32(sr5), bd_min_2_vec);
- s6 = vrshlq_u32(vreinterpretq_u32_s32(sr6), bd_min_2_vec);
- s7 = vrshlq_u32(vreinterpretq_u32_s32(sr7), bd_min_2_vec);
-
- calc_ab_fast_internal_common(s0, s1, s2, s3, s4, s5, s6, s7, sr4, sr5,
- sr6, sr7, const_n_val, s_vec, const_val,
- one_by_n_minus_1_vec, sgrproj_sgr, src1,
- dst_A16, src2, buf_stride);
-
- w -= 4;
- src1 += 4;
- src2 += 4;
- dst_A16 += 4;
- } while (w > 0);
- count++;
- h -= (ht_inc * 4);
- } while (h > 0);
-}
-
-static INLINE void boxsum1(int16_t *src, const int src_stride, uint16_t *dst1,
- int32_t *dst2, const int dst_stride, const int width,
- const int height) {
- assert(width > 2 * SGRPROJ_BORDER_HORZ);
- assert(height > 2 * SGRPROJ_BORDER_VERT);
-
- int16_t *src_ptr;
- int32_t *dst2_ptr;
- uint16_t *dst1_ptr;
- int h, w, count = 0;
-
- w = width;
- {
- int16x8_t s1, s2, s3, s4, s5, s6, s7, s8;
- int16x8_t q23, q34, q56, q234, q345, q456, q567;
- int32x4_t r23, r56, r345, r456, r567, r78, r678;
- int32x4_t r4_low, r4_high, r34_low, r34_high, r234_low, r234_high;
- int32x4_t r2, r3, r5, r6, r7, r8;
- int16x8_t q678, q78;
-
- do {
- dst1_ptr = dst1 + (count << 3);
- dst2_ptr = dst2 + (count << 3);
- src_ptr = src + (count << 3);
- h = height;
-
- load_s16_8x4(src_ptr, src_stride, &s1, &s2, &s3, &s4);
- src_ptr += 4 * src_stride;
-
- q23 = vaddq_s16(s2, s3);
- q234 = vaddq_s16(q23, s4);
- q34 = vaddq_s16(s3, s4);
- dst1_ptr += (dst_stride << 1);
-
- r2 = vmull_s16(vget_low_s16(s2), vget_low_s16(s2));
- r3 = vmull_s16(vget_low_s16(s3), vget_low_s16(s3));
- r4_low = vmull_s16(vget_low_s16(s4), vget_low_s16(s4));
- r23 = vaddq_s32(r2, r3);
- r234_low = vaddq_s32(r23, r4_low);
- r34_low = vaddq_s32(r3, r4_low);
-
- r2 = vmull_s16(vget_high_s16(s2), vget_high_s16(s2));
- r3 = vmull_s16(vget_high_s16(s3), vget_high_s16(s3));
- r4_high = vmull_s16(vget_high_s16(s4), vget_high_s16(s4));
- r23 = vaddq_s32(r2, r3);
- r234_high = vaddq_s32(r23, r4_high);
- r34_high = vaddq_s32(r3, r4_high);
-
- dst2_ptr += (dst_stride << 1);
-
- do {
- load_s16_8x4(src_ptr, src_stride, &s5, &s6, &s7, &s8);
- src_ptr += 4 * src_stride;
-
- q345 = vaddq_s16(s5, q34);
- q56 = vaddq_s16(s5, s6);
- q456 = vaddq_s16(s4, q56);
- q567 = vaddq_s16(s7, q56);
- q78 = vaddq_s16(s7, s8);
- q678 = vaddq_s16(s6, q78);
-
- store_s16_8x4((int16_t *)dst1_ptr, dst_stride, q234, q345, q456, q567);
- dst1_ptr += (dst_stride << 2);
-
- s4 = s8;
- q34 = q78;
- q234 = q678;
-
- r5 = vmull_s16(vget_low_s16(s5), vget_low_s16(s5));
- r6 = vmull_s16(vget_low_s16(s6), vget_low_s16(s6));
- r7 = vmull_s16(vget_low_s16(s7), vget_low_s16(s7));
- r8 = vmull_s16(vget_low_s16(s8), vget_low_s16(s8));
-
- r345 = vaddq_s32(r5, r34_low);
- r56 = vaddq_s32(r5, r6);
- r456 = vaddq_s32(r4_low, r56);
- r567 = vaddq_s32(r7, r56);
- r78 = vaddq_s32(r7, r8);
- r678 = vaddq_s32(r6, r78);
- store_s32_4x4(dst2_ptr, dst_stride, r234_low, r345, r456, r567);
-
- r4_low = r8;
- r34_low = r78;
- r234_low = r678;
-
- r5 = vmull_s16(vget_high_s16(s5), vget_high_s16(s5));
- r6 = vmull_s16(vget_high_s16(s6), vget_high_s16(s6));
- r7 = vmull_s16(vget_high_s16(s7), vget_high_s16(s7));
- r8 = vmull_s16(vget_high_s16(s8), vget_high_s16(s8));
-
- r345 = vaddq_s32(r5, r34_high);
- r56 = vaddq_s32(r5, r6);
- r456 = vaddq_s32(r4_high, r56);
- r567 = vaddq_s32(r7, r56);
- r78 = vaddq_s32(r7, r8);
- r678 = vaddq_s32(r6, r78);
- store_s32_4x4((dst2_ptr + 4), dst_stride, r234_high, r345, r456, r567);
- dst2_ptr += (dst_stride << 2);
-
- r4_high = r8;
- r34_high = r78;
- r234_high = r678;
-
- h -= 4;
- } while (h > 0);
- w -= 8;
- count++;
- } while (w > 0);
- }
-
- {
- int16x4_t d1, d2, d3, d4, d5, d6, d7, d8;
- int16x4_t q23, q34, q56, q234, q345, q456, q567;
- int32x4_t r23, r56, r234, r345, r456, r567, r34, r78, r678;
- int32x4_t r1, r2, r3, r4, r5, r6, r7, r8;
- int16x4_t q678, q78;
-
- int32_t *src2_ptr;
- uint16_t *src1_ptr;
- count = 0;
- h = height;
- w = width;
- do {
- dst1_ptr = dst1 + (count << 2) * dst_stride;
- dst2_ptr = dst2 + (count << 2) * dst_stride;
- src1_ptr = dst1 + (count << 2) * dst_stride;
- src2_ptr = dst2 + (count << 2) * dst_stride;
- w = width;
-
- load_s16_4x4((int16_t *)src1_ptr, dst_stride, &d1, &d2, &d3, &d4);
- transpose_s16_4x4d(&d1, &d2, &d3, &d4);
- load_s32_4x4(src2_ptr, dst_stride, &r1, &r2, &r3, &r4);
- transpose_s32_4x4(&r1, &r2, &r3, &r4);
- src1_ptr += 4;
- src2_ptr += 4;
-
- q23 = vadd_s16(d2, d3);
- q234 = vadd_s16(q23, d4);
- q34 = vadd_s16(d3, d4);
- dst1_ptr += 2;
- r23 = vaddq_s32(r2, r3);
- r234 = vaddq_s32(r23, r4);
- r34 = vaddq_s32(r3, r4);
- dst2_ptr += 2;
-
- do {
- load_s16_4x4((int16_t *)src1_ptr, dst_stride, &d5, &d6, &d7, &d8);
- transpose_s16_4x4d(&d5, &d6, &d7, &d8);
- load_s32_4x4(src2_ptr, dst_stride, &r5, &r6, &r7, &r8);
- transpose_s32_4x4(&r5, &r6, &r7, &r8);
- src1_ptr += 4;
- src2_ptr += 4;
-
- q345 = vadd_s16(d5, q34);
- q56 = vadd_s16(d5, d6);
- q456 = vadd_s16(d4, q56);
- q567 = vadd_s16(d7, q56);
- q78 = vadd_s16(d7, d8);
- q678 = vadd_s16(d6, q78);
- transpose_s16_4x4d(&q234, &q345, &q456, &q567);
- store_s16_4x4((int16_t *)dst1_ptr, dst_stride, q234, q345, q456, q567);
- dst1_ptr += 4;
-
- d4 = d8;
- q34 = q78;
- q234 = q678;
-
- r345 = vaddq_s32(r5, r34);
- r56 = vaddq_s32(r5, r6);
- r456 = vaddq_s32(r4, r56);
- r567 = vaddq_s32(r7, r56);
- r78 = vaddq_s32(r7, r8);
- r678 = vaddq_s32(r6, r78);
- transpose_s32_4x4(&r234, &r345, &r456, &r567);
- store_s32_4x4(dst2_ptr, dst_stride, r234, r345, r456, r567);
- dst2_ptr += 4;
-
- r4 = r8;
- r34 = r78;
- r234 = r678;
- w -= 4;
- } while (w > 0);
- h -= 4;
- count++;
- } while (h > 0);
- }
-}
-
-static INLINE int32x4_t cross_sum_inp_s32(int32_t *buf, int buf_stride) {
- int32x4_t xtr, xt, xtl, xl, x, xr, xbr, xb, xbl;
- int32x4_t fours, threes, res;
-
- xtl = vld1q_s32(buf - buf_stride - 1);
- xt = vld1q_s32(buf - buf_stride);
- xtr = vld1q_s32(buf - buf_stride + 1);
- xl = vld1q_s32(buf - 1);
- x = vld1q_s32(buf);
- xr = vld1q_s32(buf + 1);
- xbl = vld1q_s32(buf + buf_stride - 1);
- xb = vld1q_s32(buf + buf_stride);
- xbr = vld1q_s32(buf + buf_stride + 1);
-
- fours = vaddq_s32(xl, vaddq_s32(xt, vaddq_s32(xr, vaddq_s32(xb, x))));
- threes = vaddq_s32(xtl, vaddq_s32(xtr, vaddq_s32(xbr, xbl)));
- res = vsubq_s32(vshlq_n_s32(vaddq_s32(fours, threes), 2), threes);
- return res;
-}
-
-static INLINE void cross_sum_inp_u16(uint16_t *buf, int buf_stride,
- int32x4_t *a0, int32x4_t *a1) {
- uint16x8_t xtr, xt, xtl, xl, x, xr, xbr, xb, xbl;
- uint16x8_t r0, r1;
-
- xtl = vld1q_u16(buf - buf_stride - 1);
- xt = vld1q_u16(buf - buf_stride);
- xtr = vld1q_u16(buf - buf_stride + 1);
- xl = vld1q_u16(buf - 1);
- x = vld1q_u16(buf);
- xr = vld1q_u16(buf + 1);
- xbl = vld1q_u16(buf + buf_stride - 1);
- xb = vld1q_u16(buf + buf_stride);
- xbr = vld1q_u16(buf + buf_stride + 1);
-
- xb = vaddq_u16(xb, x);
- xt = vaddq_u16(xt, xr);
- xl = vaddq_u16(xl, xb);
- xl = vaddq_u16(xl, xt);
-
- r0 = vshlq_n_u16(xl, 2);
-
- xbl = vaddq_u16(xbl, xbr);
- xtl = vaddq_u16(xtl, xtr);
- xtl = vaddq_u16(xtl, xbl);
-
- r1 = vshlq_n_u16(xtl, 2);
- r1 = vsubq_u16(r1, xtl);
-
- *a0 = vreinterpretq_s32_u32(
- vaddq_u32(vmovl_u16(vget_low_u16(r0)), vmovl_u16(vget_low_u16(r1))));
- *a1 = vreinterpretq_s32_u32(
- vaddq_u32(vmovl_u16(vget_high_u16(r0)), vmovl_u16(vget_high_u16(r1))));
-}
-
-static INLINE int32x4_t cross_sum_fast_even_row(int32_t *buf, int buf_stride) {
- int32x4_t xtr, xt, xtl, xbr, xb, xbl;
- int32x4_t fives, sixes, fives_plus_sixes;
-
- xtl = vld1q_s32(buf - buf_stride - 1);
- xt = vld1q_s32(buf - buf_stride);
- xtr = vld1q_s32(buf - buf_stride + 1);
- xbl = vld1q_s32(buf + buf_stride - 1);
- xb = vld1q_s32(buf + buf_stride);
- xbr = vld1q_s32(buf + buf_stride + 1);
-
- fives = vaddq_s32(xtl, vaddq_s32(xtr, vaddq_s32(xbr, xbl)));
- sixes = vaddq_s32(xt, xb);
- fives_plus_sixes = vaddq_s32(fives, sixes);
-
- return vaddq_s32(
- vaddq_s32(vshlq_n_s32(fives_plus_sixes, 2), fives_plus_sixes), sixes);
-}
-
-static INLINE void cross_sum_fast_even_row_inp16(uint16_t *buf, int buf_stride,
- int32x4_t *a0, int32x4_t *a1) {
- uint16x8_t xtr, xt, xtl, xbr, xb, xbl, xb0;
-
- xtl = vld1q_u16(buf - buf_stride - 1);
- xt = vld1q_u16(buf - buf_stride);
- xtr = vld1q_u16(buf - buf_stride + 1);
- xbl = vld1q_u16(buf + buf_stride - 1);
- xb = vld1q_u16(buf + buf_stride);
- xbr = vld1q_u16(buf + buf_stride + 1);
-
- xbr = vaddq_u16(xbr, xbl);
- xtr = vaddq_u16(xtr, xtl);
- xbr = vaddq_u16(xbr, xtr);
- xtl = vshlq_n_u16(xbr, 2);
- xbr = vaddq_u16(xtl, xbr);
-
- xb = vaddq_u16(xb, xt);
- xb0 = vshlq_n_u16(xb, 1);
- xb = vshlq_n_u16(xb, 2);
- xb = vaddq_u16(xb, xb0);
-
- *a0 = vreinterpretq_s32_u32(
- vaddq_u32(vmovl_u16(vget_low_u16(xbr)), vmovl_u16(vget_low_u16(xb))));
- *a1 = vreinterpretq_s32_u32(
- vaddq_u32(vmovl_u16(vget_high_u16(xbr)), vmovl_u16(vget_high_u16(xb))));
-}
-
-static INLINE int32x4_t cross_sum_fast_odd_row(int32_t *buf) {
- int32x4_t xl, x, xr;
- int32x4_t fives, sixes, fives_plus_sixes;
-
- xl = vld1q_s32(buf - 1);
- x = vld1q_s32(buf);
- xr = vld1q_s32(buf + 1);
- fives = vaddq_s32(xl, xr);
- sixes = x;
- fives_plus_sixes = vaddq_s32(fives, sixes);
-
- return vaddq_s32(
- vaddq_s32(vshlq_n_s32(fives_plus_sixes, 2), fives_plus_sixes), sixes);
-}
-
-static INLINE void cross_sum_fast_odd_row_inp16(uint16_t *buf, int32x4_t *a0,
- int32x4_t *a1) {
- uint16x8_t xl, x, xr;
- uint16x8_t x0;
-
- xl = vld1q_u16(buf - 1);
- x = vld1q_u16(buf);
- xr = vld1q_u16(buf + 1);
- xl = vaddq_u16(xl, xr);
- x0 = vshlq_n_u16(xl, 2);
- xl = vaddq_u16(xl, x0);
-
- x0 = vshlq_n_u16(x, 1);
- x = vshlq_n_u16(x, 2);
- x = vaddq_u16(x, x0);
-
- *a0 = vreinterpretq_s32_u32(
- vaddq_u32(vmovl_u16(vget_low_u16(xl)), vmovl_u16(vget_low_u16(x))));
- *a1 = vreinterpretq_s32_u32(
- vaddq_u32(vmovl_u16(vget_high_u16(xl)), vmovl_u16(vget_high_u16(x))));
-}
-
-static void final_filter_fast_internal(uint16_t *A, int32_t *B,
- const int buf_stride, int16_t *src,
- const int src_stride, int32_t *dst,
- const int dst_stride, const int width,
- const int height) {
- int16x8_t s0;
- int32_t *B_tmp, *dst_ptr;
- uint16_t *A_tmp;
- int16_t *src_ptr;
- int32x4_t a_res0, a_res1, b_res0, b_res1;
- int w, h, count = 0;
- assert(SGRPROJ_SGR_BITS == 8);
- assert(SGRPROJ_RST_BITS == 4);
-
- A_tmp = A;
- B_tmp = B;
- src_ptr = src;
- dst_ptr = dst;
- h = height;
- do {
- A_tmp = (A + count * buf_stride);
- B_tmp = (B + count * buf_stride);
- src_ptr = (src + count * src_stride);
- dst_ptr = (dst + count * dst_stride);
- w = width;
- if (!(count & 1)) {
- do {
- s0 = vld1q_s16(src_ptr);
- cross_sum_fast_even_row_inp16(A_tmp, buf_stride, &a_res0, &a_res1);
- a_res0 = vmulq_s32(vmovl_s16(vget_low_s16(s0)), a_res0);
- a_res1 = vmulq_s32(vmovl_s16(vget_high_s16(s0)), a_res1);
-
- b_res0 = cross_sum_fast_even_row(B_tmp, buf_stride);
- b_res1 = cross_sum_fast_even_row(B_tmp + 4, buf_stride);
- a_res0 = vaddq_s32(a_res0, b_res0);
- a_res1 = vaddq_s32(a_res1, b_res1);
-
- a_res0 =
- vrshrq_n_s32(a_res0, SGRPROJ_SGR_BITS + NB_EVEN - SGRPROJ_RST_BITS);
- a_res1 =
- vrshrq_n_s32(a_res1, SGRPROJ_SGR_BITS + NB_EVEN - SGRPROJ_RST_BITS);
-
- vst1q_s32(dst_ptr, a_res0);
- vst1q_s32(dst_ptr + 4, a_res1);
-
- A_tmp += 8;
- B_tmp += 8;
- src_ptr += 8;
- dst_ptr += 8;
- w -= 8;
- } while (w > 0);
- } else {
- do {
- s0 = vld1q_s16(src_ptr);
- cross_sum_fast_odd_row_inp16(A_tmp, &a_res0, &a_res1);
- a_res0 = vmulq_s32(vmovl_s16(vget_low_s16(s0)), a_res0);
- a_res1 = vmulq_s32(vmovl_s16(vget_high_s16(s0)), a_res1);
-
- b_res0 = cross_sum_fast_odd_row(B_tmp);
- b_res1 = cross_sum_fast_odd_row(B_tmp + 4);
- a_res0 = vaddq_s32(a_res0, b_res0);
- a_res1 = vaddq_s32(a_res1, b_res1);
-
- a_res0 =
- vrshrq_n_s32(a_res0, SGRPROJ_SGR_BITS + NB_ODD - SGRPROJ_RST_BITS);
- a_res1 =
- vrshrq_n_s32(a_res1, SGRPROJ_SGR_BITS + NB_ODD - SGRPROJ_RST_BITS);
-
- vst1q_s32(dst_ptr, a_res0);
- vst1q_s32(dst_ptr + 4, a_res1);
-
- A_tmp += 8;
- B_tmp += 8;
- src_ptr += 8;
- dst_ptr += 8;
- w -= 8;
- } while (w > 0);
- }
- count++;
- h -= 1;
- } while (h > 0);
-}
-
-void final_filter_internal(uint16_t *A, int32_t *B, const int buf_stride,
- int16_t *src, const int src_stride, int32_t *dst,
- const int dst_stride, const int width,
- const int height) {
- int16x8_t s0;
- int32_t *B_tmp, *dst_ptr;
- uint16_t *A_tmp;
- int16_t *src_ptr;
- int32x4_t a_res0, a_res1, b_res0, b_res1;
- int w, h, count = 0;
-
- assert(SGRPROJ_SGR_BITS == 8);
- assert(SGRPROJ_RST_BITS == 4);
- h = height;
-
- do {
- A_tmp = (A + count * buf_stride);
- B_tmp = (B + count * buf_stride);
- src_ptr = (src + count * src_stride);
- dst_ptr = (dst + count * dst_stride);
- w = width;
- do {
- s0 = vld1q_s16(src_ptr);
- cross_sum_inp_u16(A_tmp, buf_stride, &a_res0, &a_res1);
- a_res0 = vmulq_s32(vmovl_s16(vget_low_s16(s0)), a_res0);
- a_res1 = vmulq_s32(vmovl_s16(vget_high_s16(s0)), a_res1);
-
- b_res0 = cross_sum_inp_s32(B_tmp, buf_stride);
- b_res1 = cross_sum_inp_s32(B_tmp + 4, buf_stride);
- a_res0 = vaddq_s32(a_res0, b_res0);
- a_res1 = vaddq_s32(a_res1, b_res1);
-
- a_res0 =
- vrshrq_n_s32(a_res0, SGRPROJ_SGR_BITS + NB_EVEN - SGRPROJ_RST_BITS);
- a_res1 =
- vrshrq_n_s32(a_res1, SGRPROJ_SGR_BITS + NB_EVEN - SGRPROJ_RST_BITS);
- vst1q_s32(dst_ptr, a_res0);
- vst1q_s32(dst_ptr + 4, a_res1);
-
- A_tmp += 8;
- B_tmp += 8;
- src_ptr += 8;
- dst_ptr += 8;
- w -= 8;
- } while (w > 0);
- count++;
- h -= 1;
- } while (h > 0);
-}
-
-static INLINE void restoration_fast_internal(uint16_t *dgd16, int width,
- int height, int dgd_stride,
- int32_t *dst, int dst_stride,
- int bit_depth, int sgr_params_idx,
- int radius_idx) {
- const sgr_params_type *const params = &sgr_params[sgr_params_idx];
- const int r = params->r[radius_idx];
- const int width_ext = width + 2 * SGRPROJ_BORDER_HORZ;
- const int height_ext = height + 2 * SGRPROJ_BORDER_VERT;
-
- const int buf_stride = ((width_ext + 3) & ~3) + 16;
- int32_t A_[RESTORATION_PROC_UNIT_PELS];
- uint16_t A16_[RESTORATION_PROC_UNIT_PELS];
- int32_t B_[RESTORATION_PROC_UNIT_PELS];
- int32_t *square_sum_buf = A_;
- int32_t *sum_buf = B_;
- uint16_t *tmp16_buf = A16_;
-
- assert(r <= MAX_RADIUS && "Need MAX_RADIUS >= r");
- assert(r <= SGRPROJ_BORDER_VERT - 1 && r <= SGRPROJ_BORDER_HORZ - 1 &&
- "Need SGRPROJ_BORDER_* >= r+1");
-
- assert(radius_idx == 0);
- assert(r == 2);
-
- // input(dgd16) is 16bit.
- // sum of pixels 1st stage output will be in 16bit(tmp16_buf). End output is
- // kept in 32bit [sum_buf]. sum of squares output is kept in 32bit
- // buffer(square_sum_buf).
- boxsum2((int16_t *)(dgd16 - dgd_stride * SGRPROJ_BORDER_VERT -
- SGRPROJ_BORDER_HORZ),
- dgd_stride, (int16_t *)tmp16_buf, sum_buf, square_sum_buf, buf_stride,
- width_ext, height_ext);
-
- square_sum_buf += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ;
- sum_buf += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ;
- tmp16_buf += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ;
-
- // Calculation of a, b. a output is in 16bit tmp_buf which is in range of
- // [1, 256] for all bit depths. b output is kept in 32bit buffer.
-
- if (8 == bit_depth) {
- calc_ab_fast_internal_lbd(
- (square_sum_buf - buf_stride - 1), (tmp16_buf - buf_stride - 1),
- (sum_buf - buf_stride - 1), buf_stride * 2, width + 2, height + 2, r,
- params->s[radius_idx], 2);
- } else {
- calc_ab_fast_internal_hbd(
- (square_sum_buf - buf_stride - 1), (tmp16_buf - buf_stride - 1),
- (sum_buf - buf_stride - 1), buf_stride * 2, width + 2, height + 2,
- bit_depth, r, params->s[radius_idx], 2);
- }
- final_filter_fast_internal(tmp16_buf, sum_buf, buf_stride, (int16_t *)dgd16,
- dgd_stride, dst, dst_stride, width, height);
-}
-
-static INLINE void restoration_internal(uint16_t *dgd16, int width, int height,
- int dgd_stride, int32_t *dst,
- int dst_stride, int bit_depth,
- int sgr_params_idx, int radius_idx) {
- const sgr_params_type *const params = &sgr_params[sgr_params_idx];
- const int r = params->r[radius_idx];
- const int width_ext = width + 2 * SGRPROJ_BORDER_HORZ;
- const int height_ext = height + 2 * SGRPROJ_BORDER_VERT;
-
- int buf_stride = ((width_ext + 3) & ~3) + 16;
- int32_t A_[RESTORATION_PROC_UNIT_PELS];
- uint16_t A16_[RESTORATION_PROC_UNIT_PELS];
- uint16_t B16_[RESTORATION_PROC_UNIT_PELS];
- int32_t B_[RESTORATION_PROC_UNIT_PELS];
- int32_t *square_sum_buf = A_;
- uint16_t *sum_buf = B16_;
- uint16_t *A16 = A16_;
- int32_t *B = B_;
-
- assert(r <= MAX_RADIUS && "Need MAX_RADIUS >= r");
- assert(r <= SGRPROJ_BORDER_VERT - 1 && r <= SGRPROJ_BORDER_HORZ - 1 &&
- "Need SGRPROJ_BORDER_* >= r+1");
-
- assert(radius_idx == 1);
- assert(r == 1);
-
- // input(dgd16) is 16bit.
- // sum of pixels output will be in 16bit(sum_buf).
- // sum of squares output is kept in 32bit buffer(square_sum_buf).
- boxsum1((int16_t *)(dgd16 - dgd_stride * SGRPROJ_BORDER_VERT -
- SGRPROJ_BORDER_HORZ),
- dgd_stride, sum_buf, square_sum_buf, buf_stride, width_ext,
- height_ext);
-
- square_sum_buf += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ;
- B += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ;
- A16 += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ;
- sum_buf += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ;
-
- // Calculation of a, b. a output is in 16bit tmp_buf which is in range of
- // [1, 256] for all bit depths. b output is kept in 32bit buffer.
- if (8 == bit_depth) {
- calc_ab_internal_lbd((square_sum_buf - buf_stride - 1),
- (A16 - buf_stride - 1), (sum_buf - buf_stride - 1),
- (B - buf_stride - 1), buf_stride, width + 2,
- height + 2, r, params->s[radius_idx], 1);
- } else {
- calc_ab_internal_hbd((square_sum_buf - buf_stride - 1),
- (A16 - buf_stride - 1), (sum_buf - buf_stride - 1),
- (B - buf_stride - 1), buf_stride, width + 2,
- height + 2, bit_depth, r, params->s[radius_idx], 1);
- }
- final_filter_internal(A16, B, buf_stride, (int16_t *)dgd16, dgd_stride, dst,
- dst_stride, width, height);
-}
-
-static INLINE void src_convert_u8_to_u16(const uint8_t *src,
- const int src_stride, uint16_t *dst,
- const int dst_stride, const int width,
- const int height) {
- const uint8_t *src_ptr;
- uint16_t *dst_ptr;
- int h, w, count = 0;
-
- uint8x8_t t1, t2, t3, t4;
- uint16x8_t s1, s2, s3, s4;
- h = height;
- do {
- src_ptr = src + (count << 2) * src_stride;
- dst_ptr = dst + (count << 2) * dst_stride;
- w = width;
- if (w >= 7) {
- do {
- load_u8_8x4(src_ptr, src_stride, &t1, &t2, &t3, &t4);
- s1 = vmovl_u8(t1);
- s2 = vmovl_u8(t2);
- s3 = vmovl_u8(t3);
- s4 = vmovl_u8(t4);
- store_u16_8x4(dst_ptr, dst_stride, s1, s2, s3, s4);
-
- src_ptr += 8;
- dst_ptr += 8;
- w -= 8;
- } while (w > 7);
- }
-
- for (int y = 0; y < w; y++) {
- dst_ptr[y] = src_ptr[y];
- dst_ptr[y + 1 * dst_stride] = src_ptr[y + 1 * src_stride];
- dst_ptr[y + 2 * dst_stride] = src_ptr[y + 2 * src_stride];
- dst_ptr[y + 3 * dst_stride] = src_ptr[y + 3 * src_stride];
- }
- count++;
- h -= 4;
- } while (h > 3);
-
- src_ptr = src + (count << 2) * src_stride;
- dst_ptr = dst + (count << 2) * dst_stride;
- for (int x = 0; x < h; x++) {
- for (int y = 0; y < width; y++) {
- dst_ptr[y + x * dst_stride] = src_ptr[y + x * src_stride];
- }
- }
-}
-
-static INLINE void src_convert_hbd_copy(const uint16_t *src, int src_stride,
- uint16_t *dst, const int dst_stride,
- int width, int height) {
- const uint16_t *src_ptr;
- uint16_t *dst_ptr;
- int h, w, count = 0;
- uint16x8_t s1, s2, s3, s4;
-
- h = height;
- do {
- src_ptr = src + (count << 2) * src_stride;
- dst_ptr = dst + (count << 2) * dst_stride;
- w = width;
- do {
- load_u16_8x4(src_ptr, src_stride, &s1, &s2, &s3, &s4);
- store_u16_8x4(dst_ptr, dst_stride, s1, s2, s3, s4);
- src_ptr += 8;
- dst_ptr += 8;
- w -= 8;
- } while (w > 7);
-
- for (int y = 0; y < w; y++) {
- dst_ptr[y] = src_ptr[y];
- dst_ptr[y + 1 * dst_stride] = src_ptr[y + 1 * src_stride];
- dst_ptr[y + 2 * dst_stride] = src_ptr[y + 2 * src_stride];
- dst_ptr[y + 3 * dst_stride] = src_ptr[y + 3 * src_stride];
- }
- count++;
- h -= 4;
- } while (h > 3);
-
- src_ptr = src + (count << 2) * src_stride;
- dst_ptr = dst + (count << 2) * dst_stride;
-
- for (int x = 0; x < h; x++) {
- memcpy((dst_ptr + x * dst_stride), (src_ptr + x * src_stride),
- sizeof(uint16_t) * width);
- }
-}
-
-int av1_selfguided_restoration_neon(const uint8_t *dat8, int width, int height,
- int stride, int32_t *flt0, int32_t *flt1,
- int flt_stride, int sgr_params_idx,
- int bit_depth, int highbd) {
- const sgr_params_type *const params = &sgr_params[sgr_params_idx];
- assert(!(params->r[0] == 0 && params->r[1] == 0));
-
- uint16_t dgd16_[RESTORATION_PROC_UNIT_PELS];
- const int dgd16_stride = width + 2 * SGRPROJ_BORDER_HORZ;
- uint16_t *dgd16 =
- dgd16_ + dgd16_stride * SGRPROJ_BORDER_VERT + SGRPROJ_BORDER_HORZ;
- const int width_ext = width + 2 * SGRPROJ_BORDER_HORZ;
- const int height_ext = height + 2 * SGRPROJ_BORDER_VERT;
- const int dgd_stride = stride;
-
- if (highbd) {
- const uint16_t *dgd16_tmp = CONVERT_TO_SHORTPTR(dat8);
- src_convert_hbd_copy(
- dgd16_tmp - SGRPROJ_BORDER_VERT * dgd_stride - SGRPROJ_BORDER_HORZ,
- dgd_stride,
- dgd16 - SGRPROJ_BORDER_VERT * dgd16_stride - SGRPROJ_BORDER_HORZ,
- dgd16_stride, width_ext, height_ext);
- } else {
- src_convert_u8_to_u16(
- dat8 - SGRPROJ_BORDER_VERT * dgd_stride - SGRPROJ_BORDER_HORZ,
- dgd_stride,
- dgd16 - SGRPROJ_BORDER_VERT * dgd16_stride - SGRPROJ_BORDER_HORZ,
- dgd16_stride, width_ext, height_ext);
- }
-
- if (params->r[0] > 0)
- restoration_fast_internal(dgd16, width, height, dgd16_stride, flt0,
- flt_stride, bit_depth, sgr_params_idx, 0);
- if (params->r[1] > 0)
- restoration_internal(dgd16, width, height, dgd16_stride, flt1, flt_stride,
- bit_depth, sgr_params_idx, 1);
- return 0;
-}
-
-void apply_selfguided_restoration_neon(const uint8_t *dat8, int width,
- int height, int stride, int eps,
- const int *xqd, uint8_t *dst8,
- int dst_stride, int32_t *tmpbuf,
- int bit_depth, int highbd) {
- int32_t *flt0 = tmpbuf;
- int32_t *flt1 = flt0 + RESTORATION_UNITPELS_MAX;
- assert(width * height <= RESTORATION_UNITPELS_MAX);
- uint16_t dgd16_[RESTORATION_PROC_UNIT_PELS];
- const int dgd16_stride = width + 2 * SGRPROJ_BORDER_HORZ;
- uint16_t *dgd16 =
- dgd16_ + dgd16_stride * SGRPROJ_BORDER_VERT + SGRPROJ_BORDER_HORZ;
- const int width_ext = width + 2 * SGRPROJ_BORDER_HORZ;
- const int height_ext = height + 2 * SGRPROJ_BORDER_VERT;
- const int dgd_stride = stride;
- const sgr_params_type *const params = &sgr_params[eps];
- int xq[2];
-
- assert(!(params->r[0] == 0 && params->r[1] == 0));
-
- if (highbd) {
- const uint16_t *dgd16_tmp = CONVERT_TO_SHORTPTR(dat8);
- src_convert_hbd_copy(
- dgd16_tmp - SGRPROJ_BORDER_VERT * dgd_stride - SGRPROJ_BORDER_HORZ,
- dgd_stride,
- dgd16 - SGRPROJ_BORDER_VERT * dgd16_stride - SGRPROJ_BORDER_HORZ,
- dgd16_stride, width_ext, height_ext);
- } else {
- src_convert_u8_to_u16(
- dat8 - SGRPROJ_BORDER_VERT * dgd_stride - SGRPROJ_BORDER_HORZ,
- dgd_stride,
- dgd16 - SGRPROJ_BORDER_VERT * dgd16_stride - SGRPROJ_BORDER_HORZ,
- dgd16_stride, width_ext, height_ext);
- }
-
- if (params->r[0] > 0)
- restoration_fast_internal(dgd16, width, height, dgd16_stride, flt0, width,
- bit_depth, eps, 0);
- if (params->r[1] > 0)
- restoration_internal(dgd16, width, height, dgd16_stride, flt1, width,
- bit_depth, eps, 1);
-
- decode_xq(xqd, xq, params);
-
- {
- int16_t *src_ptr;
- uint8_t *dst_ptr;
- uint16_t *dst16_ptr;
- int16x4_t d0, d4;
- int16x8_t r0, s0;
- uint16x8_t r4;
- int32x4_t u0, u4, v0, v4, f00, f10;
- uint8x8_t t0;
- int count = 0, w = width, h = height, rc = 0;
-
- const int32x4_t xq0_vec = vdupq_n_s32(xq[0]);
- const int32x4_t xq1_vec = vdupq_n_s32(xq[1]);
- const int16x8_t zero = vdupq_n_s16(0);
- const uint16x8_t max = vdupq_n_u16((1 << bit_depth) - 1);
- uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst8);
- dst_ptr = dst8;
- src_ptr = (int16_t *)dgd16;
- do {
- w = width;
- count = 0;
- dst_ptr = dst8 + rc * dst_stride;
- dst16_ptr = dst16 + rc * dst_stride;
- do {
- s0 = vld1q_s16(src_ptr + count);
-
- u0 = vshll_n_s16(vget_low_s16(s0), SGRPROJ_RST_BITS);
- u4 = vshll_n_s16(vget_high_s16(s0), SGRPROJ_RST_BITS);
-
- v0 = vshlq_n_s32(u0, SGRPROJ_PRJ_BITS);
- v4 = vshlq_n_s32(u4, SGRPROJ_PRJ_BITS);
-
- if (params->r[0] > 0) {
- f00 = vld1q_s32(flt0 + count);
- f10 = vld1q_s32(flt0 + count + 4);
-
- f00 = vsubq_s32(f00, u0);
- f10 = vsubq_s32(f10, u4);
-
- v0 = vmlaq_s32(v0, xq0_vec, f00);
- v4 = vmlaq_s32(v4, xq0_vec, f10);
- }
-
- if (params->r[1] > 0) {
- f00 = vld1q_s32(flt1 + count);
- f10 = vld1q_s32(flt1 + count + 4);
-
- f00 = vsubq_s32(f00, u0);
- f10 = vsubq_s32(f10, u4);
-
- v0 = vmlaq_s32(v0, xq1_vec, f00);
- v4 = vmlaq_s32(v4, xq1_vec, f10);
- }
-
- d0 = vqrshrn_n_s32(v0, SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS);
- d4 = vqrshrn_n_s32(v4, SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS);
-
- r0 = vcombine_s16(d0, d4);
-
- r4 = vreinterpretq_u16_s16(vmaxq_s16(r0, zero));
-
- if (highbd) {
- r4 = vminq_u16(r4, max);
- vst1q_u16(dst16_ptr, r4);
- } else {
- t0 = vqmovn_u16(r4);
- vst1_u8(dst_ptr, t0);
- }
- w -= 8;
- count += 8;
- dst_ptr += 8;
- dst16_ptr += 8;
- } while (w > 0);
-
- src_ptr += dgd16_stride;
- flt1 += width;
- flt0 += width;
- rc++;
- h--;
- } while (h > 0);
- }
-}
diff --git a/third_party/aom/av1/common/arm/transpose_neon.h b/third_party/aom/av1/common/arm/transpose_neon.h
deleted file mode 100644
index 8a3d9f07f..000000000
--- a/third_party/aom/av1/common/arm/transpose_neon.h
+++ /dev/null
@@ -1,537 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef AOM_AV1_COMMON_ARM_TRANSPOSE_NEON_H_
-#define AOM_AV1_COMMON_ARM_TRANSPOSE_NEON_H_
-
-#include <arm_neon.h>
-
-static INLINE void transpose_u8_8x8(uint8x8_t *a0, uint8x8_t *a1, uint8x8_t *a2,
- uint8x8_t *a3, uint8x8_t *a4, uint8x8_t *a5,
- uint8x8_t *a6, uint8x8_t *a7) {
- // Swap 8 bit elements. Goes from:
- // a0: 00 01 02 03 04 05 06 07
- // a1: 10 11 12 13 14 15 16 17
- // a2: 20 21 22 23 24 25 26 27
- // a3: 30 31 32 33 34 35 36 37
- // a4: 40 41 42 43 44 45 46 47
- // a5: 50 51 52 53 54 55 56 57
- // a6: 60 61 62 63 64 65 66 67
- // a7: 70 71 72 73 74 75 76 77
- // to:
- // b0.val[0]: 00 10 02 12 04 14 06 16 40 50 42 52 44 54 46 56
- // b0.val[1]: 01 11 03 13 05 15 07 17 41 51 43 53 45 55 47 57
- // b1.val[0]: 20 30 22 32 24 34 26 36 60 70 62 72 64 74 66 76
- // b1.val[1]: 21 31 23 33 25 35 27 37 61 71 63 73 65 75 67 77
-
- const uint8x16x2_t b0 =
- vtrnq_u8(vcombine_u8(*a0, *a4), vcombine_u8(*a1, *a5));
- const uint8x16x2_t b1 =
- vtrnq_u8(vcombine_u8(*a2, *a6), vcombine_u8(*a3, *a7));
-
- // Swap 16 bit elements resulting in:
- // c0.val[0]: 00 10 20 30 04 14 24 34 40 50 60 70 44 54 64 74
- // c0.val[1]: 02 12 22 32 06 16 26 36 42 52 62 72 46 56 66 76
- // c1.val[0]: 01 11 21 31 05 15 25 35 41 51 61 71 45 55 65 75
- // c1.val[1]: 03 13 23 33 07 17 27 37 43 53 63 73 47 57 67 77
-
- const uint16x8x2_t c0 = vtrnq_u16(vreinterpretq_u16_u8(b0.val[0]),
- vreinterpretq_u16_u8(b1.val[0]));
- const uint16x8x2_t c1 = vtrnq_u16(vreinterpretq_u16_u8(b0.val[1]),
- vreinterpretq_u16_u8(b1.val[1]));
-
- // Unzip 32 bit elements resulting in:
- // d0.val[0]: 00 10 20 30 40 50 60 70 01 11 21 31 41 51 61 71
- // d0.val[1]: 04 14 24 34 44 54 64 74 05 15 25 35 45 55 65 75
- // d1.val[0]: 02 12 22 32 42 52 62 72 03 13 23 33 43 53 63 73
- // d1.val[1]: 06 16 26 36 46 56 66 76 07 17 27 37 47 57 67 77
- const uint32x4x2_t d0 = vuzpq_u32(vreinterpretq_u32_u16(c0.val[0]),
- vreinterpretq_u32_u16(c1.val[0]));
- const uint32x4x2_t d1 = vuzpq_u32(vreinterpretq_u32_u16(c0.val[1]),
- vreinterpretq_u32_u16(c1.val[1]));
-
- *a0 = vreinterpret_u8_u32(vget_low_u32(d0.val[0]));
- *a1 = vreinterpret_u8_u32(vget_high_u32(d0.val[0]));
- *a2 = vreinterpret_u8_u32(vget_low_u32(d1.val[0]));
- *a3 = vreinterpret_u8_u32(vget_high_u32(d1.val[0]));
- *a4 = vreinterpret_u8_u32(vget_low_u32(d0.val[1]));
- *a5 = vreinterpret_u8_u32(vget_high_u32(d0.val[1]));
- *a6 = vreinterpret_u8_u32(vget_low_u32(d1.val[1]));
- *a7 = vreinterpret_u8_u32(vget_high_u32(d1.val[1]));
-}
-
-static INLINE void transpose_u8_8x4(uint8x8_t *a0, uint8x8_t *a1, uint8x8_t *a2,
- uint8x8_t *a3) {
- // Swap 8 bit elements. Goes from:
- // a0: 00 01 02 03 04 05 06 07
- // a1: 10 11 12 13 14 15 16 17
- // a2: 20 21 22 23 24 25 26 27
- // a3: 30 31 32 33 34 35 36 37
- // to:
- // b0.val[0]: 00 10 02 12 04 14 06 16
- // b0.val[1]: 01 11 03 13 05 15 07 17
- // b1.val[0]: 20 30 22 32 24 34 26 36
- // b1.val[1]: 21 31 23 33 25 35 27 37
-
- const uint8x8x2_t b0 = vtrn_u8(*a0, *a1);
- const uint8x8x2_t b1 = vtrn_u8(*a2, *a3);
-
- // Swap 16 bit elements resulting in:
- // c0.val[0]: 00 10 20 30 04 14 24 34
- // c0.val[1]: 02 12 22 32 06 16 26 36
- // c1.val[0]: 01 11 21 31 05 15 25 35
- // c1.val[1]: 03 13 23 33 07 17 27 37
-
- const uint16x4x2_t c0 =
- vtrn_u16(vreinterpret_u16_u8(b0.val[0]), vreinterpret_u16_u8(b1.val[0]));
- const uint16x4x2_t c1 =
- vtrn_u16(vreinterpret_u16_u8(b0.val[1]), vreinterpret_u16_u8(b1.val[1]));
-
- *a0 = vreinterpret_u8_u16(c0.val[0]);
- *a1 = vreinterpret_u8_u16(c1.val[0]);
- *a2 = vreinterpret_u8_u16(c0.val[1]);
- *a3 = vreinterpret_u8_u16(c1.val[1]);
-}
-
-static INLINE void transpose_u8_4x4(uint8x8_t *a0, uint8x8_t *a1) {
- // Swap 16 bit elements. Goes from:
- // a0: 00 01 02 03 10 11 12 13
- // a1: 20 21 22 23 30 31 32 33
- // to:
- // b0.val[0]: 00 01 20 21 10 11 30 31
- // b0.val[1]: 02 03 22 23 12 13 32 33
-
- const uint16x4x2_t b0 =
- vtrn_u16(vreinterpret_u16_u8(*a0), vreinterpret_u16_u8(*a1));
-
- // Swap 32 bit elements resulting in:
- // c0.val[0]: 00 01 20 21 02 03 22 23
- // c0.val[1]: 10 11 30 31 12 13 32 33
-
- const uint32x2x2_t c0 = vtrn_u32(vreinterpret_u32_u16(b0.val[0]),
- vreinterpret_u32_u16(b0.val[1]));
-
- // Swap 8 bit elements resulting in:
- // d0.val[0]: 00 10 20 30 02 12 22 32
- // d0.val[1]: 01 11 21 31 03 13 23 33
-
- const uint8x8x2_t d0 =
- vtrn_u8(vreinterpret_u8_u32(c0.val[0]), vreinterpret_u8_u32(c0.val[1]));
-
- *a0 = d0.val[0];
- *a1 = d0.val[1];
-}
-
-static INLINE void transpose_u8_4x8(uint8x8_t *a0, uint8x8_t *a1, uint8x8_t *a2,
- uint8x8_t *a3, const uint8x8_t a4,
- const uint8x8_t a5, const uint8x8_t a6,
- const uint8x8_t a7) {
- // Swap 32 bit elements. Goes from:
- // a0: 00 01 02 03 XX XX XX XX
- // a1: 10 11 12 13 XX XX XX XX
- // a2: 20 21 22 23 XX XX XX XX
- // a3; 30 31 32 33 XX XX XX XX
- // a4: 40 41 42 43 XX XX XX XX
- // a5: 50 51 52 53 XX XX XX XX
- // a6: 60 61 62 63 XX XX XX XX
- // a7: 70 71 72 73 XX XX XX XX
- // to:
- // b0.val[0]: 00 01 02 03 40 41 42 43
- // b1.val[0]: 10 11 12 13 50 51 52 53
- // b2.val[0]: 20 21 22 23 60 61 62 63
- // b3.val[0]: 30 31 32 33 70 71 72 73
-
- const uint32x2x2_t b0 =
- vtrn_u32(vreinterpret_u32_u8(*a0), vreinterpret_u32_u8(a4));
- const uint32x2x2_t b1 =
- vtrn_u32(vreinterpret_u32_u8(*a1), vreinterpret_u32_u8(a5));
- const uint32x2x2_t b2 =
- vtrn_u32(vreinterpret_u32_u8(*a2), vreinterpret_u32_u8(a6));
- const uint32x2x2_t b3 =
- vtrn_u32(vreinterpret_u32_u8(*a3), vreinterpret_u32_u8(a7));
-
- // Swap 16 bit elements resulting in:
- // c0.val[0]: 00 01 20 21 40 41 60 61
- // c0.val[1]: 02 03 22 23 42 43 62 63
- // c1.val[0]: 10 11 30 31 50 51 70 71
- // c1.val[1]: 12 13 32 33 52 53 72 73
-
- const uint16x4x2_t c0 = vtrn_u16(vreinterpret_u16_u32(b0.val[0]),
- vreinterpret_u16_u32(b2.val[0]));
- const uint16x4x2_t c1 = vtrn_u16(vreinterpret_u16_u32(b1.val[0]),
- vreinterpret_u16_u32(b3.val[0]));
-
- // Swap 8 bit elements resulting in:
- // d0.val[0]: 00 10 20 30 40 50 60 70
- // d0.val[1]: 01 11 21 31 41 51 61 71
- // d1.val[0]: 02 12 22 32 42 52 62 72
- // d1.val[1]: 03 13 23 33 43 53 63 73
-
- const uint8x8x2_t d0 =
- vtrn_u8(vreinterpret_u8_u16(c0.val[0]), vreinterpret_u8_u16(c1.val[0]));
- const uint8x8x2_t d1 =
- vtrn_u8(vreinterpret_u8_u16(c0.val[1]), vreinterpret_u8_u16(c1.val[1]));
-
- *a0 = d0.val[0];
- *a1 = d0.val[1];
- *a2 = d1.val[0];
- *a3 = d1.val[1];
-}
-
-static INLINE void transpose_u16_4x8(uint16x4_t *a0, uint16x4_t *a1,
- uint16x4_t *a2, uint16x4_t *a3,
- uint16x4_t *a4, uint16x4_t *a5,
- uint16x4_t *a6, uint16x4_t *a7,
- uint16x8_t *o0, uint16x8_t *o1,
- uint16x8_t *o2, uint16x8_t *o3) {
- // Swap 16 bit elements. Goes from:
- // a0: 00 01 02 03
- // a1: 10 11 12 13
- // a2: 20 21 22 23
- // a3: 30 31 32 33
- // a4: 40 41 42 43
- // a5: 50 51 52 53
- // a6: 60 61 62 63
- // a7: 70 71 72 73
- // to:
- // b0.val[0]: 00 10 02 12
- // b0.val[1]: 01 11 03 13
- // b1.val[0]: 20 30 22 32
- // b1.val[1]: 21 31 23 33
- // b2.val[0]: 40 50 42 52
- // b2.val[1]: 41 51 43 53
- // b3.val[0]: 60 70 62 72
- // b3.val[1]: 61 71 63 73
-
- uint16x4x2_t b0 = vtrn_u16(*a0, *a1);
- uint16x4x2_t b1 = vtrn_u16(*a2, *a3);
- uint16x4x2_t b2 = vtrn_u16(*a4, *a5);
- uint16x4x2_t b3 = vtrn_u16(*a6, *a7);
-
- // Swap 32 bit elements resulting in:
- // c0.val[0]: 00 10 20 30
- // c0.val[1]: 02 12 22 32
- // c1.val[0]: 01 11 21 31
- // c1.val[1]: 03 13 23 33
- // c2.val[0]: 40 50 60 70
- // c2.val[1]: 42 52 62 72
- // c3.val[0]: 41 51 61 71
- // c3.val[1]: 43 53 63 73
-
- uint32x2x2_t c0 = vtrn_u32(vreinterpret_u32_u16(b0.val[0]),
- vreinterpret_u32_u16(b1.val[0]));
- uint32x2x2_t c1 = vtrn_u32(vreinterpret_u32_u16(b0.val[1]),
- vreinterpret_u32_u16(b1.val[1]));
- uint32x2x2_t c2 = vtrn_u32(vreinterpret_u32_u16(b2.val[0]),
- vreinterpret_u32_u16(b3.val[0]));
- uint32x2x2_t c3 = vtrn_u32(vreinterpret_u32_u16(b2.val[1]),
- vreinterpret_u32_u16(b3.val[1]));
-
- // Swap 64 bit elements resulting in:
- // o0: 00 10 20 30 40 50 60 70
- // o1: 01 11 21 31 41 51 61 71
- // o2: 02 12 22 32 42 52 62 72
- // o3: 03 13 23 33 43 53 63 73
-
- *o0 = vcombine_u16(vreinterpret_u16_u32(c0.val[0]),
- vreinterpret_u16_u32(c2.val[0]));
- *o1 = vcombine_u16(vreinterpret_u16_u32(c1.val[0]),
- vreinterpret_u16_u32(c3.val[0]));
- *o2 = vcombine_u16(vreinterpret_u16_u32(c0.val[1]),
- vreinterpret_u16_u32(c2.val[1]));
- *o3 = vcombine_u16(vreinterpret_u16_u32(c1.val[1]),
- vreinterpret_u16_u32(c3.val[1]));
-}
-
-static INLINE void transpose_u16_8x8(uint16x8_t *a0, uint16x8_t *a1,
- uint16x8_t *a2, uint16x8_t *a3,
- uint16x8_t *a4, uint16x8_t *a5,
- uint16x8_t *a6, uint16x8_t *a7) {
- // Swap 16 bit elements. Goes from:
- // a0: 00 01 02 03 04 05 06 07
- // a1: 10 11 12 13 14 15 16 17
- // a2: 20 21 22 23 24 25 26 27
- // a3: 30 31 32 33 34 35 36 37
- // a4: 40 41 42 43 44 45 46 47
- // a5: 50 51 52 53 54 55 56 57
- // a6: 60 61 62 63 64 65 66 67
- // a7: 70 71 72 73 74 75 76 77
- // to:
- // b0.val[0]: 00 10 02 12 04 14 06 16
- // b0.val[1]: 01 11 03 13 05 15 07 17
- // b1.val[0]: 20 30 22 32 24 34 26 36
- // b1.val[1]: 21 31 23 33 25 35 27 37
- // b2.val[0]: 40 50 42 52 44 54 46 56
- // b2.val[1]: 41 51 43 53 45 55 47 57
- // b3.val[0]: 60 70 62 72 64 74 66 76
- // b3.val[1]: 61 71 63 73 65 75 67 77
-
- const uint16x8x2_t b0 = vtrnq_u16(*a0, *a1);
- const uint16x8x2_t b1 = vtrnq_u16(*a2, *a3);
- const uint16x8x2_t b2 = vtrnq_u16(*a4, *a5);
- const uint16x8x2_t b3 = vtrnq_u16(*a6, *a7);
-
- // Swap 32 bit elements resulting in:
- // c0.val[0]: 00 10 20 30 04 14 24 34
- // c0.val[1]: 02 12 22 32 06 16 26 36
- // c1.val[0]: 01 11 21 31 05 15 25 35
- // c1.val[1]: 03 13 23 33 07 17 27 37
- // c2.val[0]: 40 50 60 70 44 54 64 74
- // c2.val[1]: 42 52 62 72 46 56 66 76
- // c3.val[0]: 41 51 61 71 45 55 65 75
- // c3.val[1]: 43 53 63 73 47 57 67 77
-
- const uint32x4x2_t c0 = vtrnq_u32(vreinterpretq_u32_u16(b0.val[0]),
- vreinterpretq_u32_u16(b1.val[0]));
- const uint32x4x2_t c1 = vtrnq_u32(vreinterpretq_u32_u16(b0.val[1]),
- vreinterpretq_u32_u16(b1.val[1]));
- const uint32x4x2_t c2 = vtrnq_u32(vreinterpretq_u32_u16(b2.val[0]),
- vreinterpretq_u32_u16(b3.val[0]));
- const uint32x4x2_t c3 = vtrnq_u32(vreinterpretq_u32_u16(b2.val[1]),
- vreinterpretq_u32_u16(b3.val[1]));
-
- *a0 = vcombine_u16(vget_low_u16(vreinterpretq_u16_u32(c0.val[0])),
- vget_low_u16(vreinterpretq_u16_u32(c2.val[0])));
- *a4 = vcombine_u16(vget_high_u16(vreinterpretq_u16_u32(c0.val[0])),
- vget_high_u16(vreinterpretq_u16_u32(c2.val[0])));
-
- *a2 = vcombine_u16(vget_low_u16(vreinterpretq_u16_u32(c0.val[1])),
- vget_low_u16(vreinterpretq_u16_u32(c2.val[1])));
- *a6 = vcombine_u16(vget_high_u16(vreinterpretq_u16_u32(c0.val[1])),
- vget_high_u16(vreinterpretq_u16_u32(c2.val[1])));
-
- *a1 = vcombine_u16(vget_low_u16(vreinterpretq_u16_u32(c1.val[0])),
- vget_low_u16(vreinterpretq_u16_u32(c3.val[0])));
- *a5 = vcombine_u16(vget_high_u16(vreinterpretq_u16_u32(c1.val[0])),
- vget_high_u16(vreinterpretq_u16_u32(c3.val[0])));
-
- *a3 = vcombine_u16(vget_low_u16(vreinterpretq_u16_u32(c1.val[1])),
- vget_low_u16(vreinterpretq_u16_u32(c3.val[1])));
- *a7 = vcombine_u16(vget_high_u16(vreinterpretq_u16_u32(c1.val[1])),
- vget_high_u16(vreinterpretq_u16_u32(c3.val[1])));
-}
-
-static INLINE void transpose_s16_8x8(int16x8_t *a0, int16x8_t *a1,
- int16x8_t *a2, int16x8_t *a3,
- int16x8_t *a4, int16x8_t *a5,
- int16x8_t *a6, int16x8_t *a7) {
- // Swap 16 bit elements. Goes from:
- // a0: 00 01 02 03 04 05 06 07
- // a1: 10 11 12 13 14 15 16 17
- // a2: 20 21 22 23 24 25 26 27
- // a3: 30 31 32 33 34 35 36 37
- // a4: 40 41 42 43 44 45 46 47
- // a5: 50 51 52 53 54 55 56 57
- // a6: 60 61 62 63 64 65 66 67
- // a7: 70 71 72 73 74 75 76 77
- // to:
- // b0.val[0]: 00 10 02 12 04 14 06 16
- // b0.val[1]: 01 11 03 13 05 15 07 17
- // b1.val[0]: 20 30 22 32 24 34 26 36
- // b1.val[1]: 21 31 23 33 25 35 27 37
- // b2.val[0]: 40 50 42 52 44 54 46 56
- // b2.val[1]: 41 51 43 53 45 55 47 57
- // b3.val[0]: 60 70 62 72 64 74 66 76
- // b3.val[1]: 61 71 63 73 65 75 67 77
-
- const int16x8x2_t b0 = vtrnq_s16(*a0, *a1);
- const int16x8x2_t b1 = vtrnq_s16(*a2, *a3);
- const int16x8x2_t b2 = vtrnq_s16(*a4, *a5);
- const int16x8x2_t b3 = vtrnq_s16(*a6, *a7);
-
- // Swap 32 bit elements resulting in:
- // c0.val[0]: 00 10 20 30 04 14 24 34
- // c0.val[1]: 02 12 22 32 06 16 26 36
- // c1.val[0]: 01 11 21 31 05 15 25 35
- // c1.val[1]: 03 13 23 33 07 17 27 37
- // c2.val[0]: 40 50 60 70 44 54 64 74
- // c2.val[1]: 42 52 62 72 46 56 66 76
- // c3.val[0]: 41 51 61 71 45 55 65 75
- // c3.val[1]: 43 53 63 73 47 57 67 77
-
- const int32x4x2_t c0 = vtrnq_s32(vreinterpretq_s32_s16(b0.val[0]),
- vreinterpretq_s32_s16(b1.val[0]));
- const int32x4x2_t c1 = vtrnq_s32(vreinterpretq_s32_s16(b0.val[1]),
- vreinterpretq_s32_s16(b1.val[1]));
- const int32x4x2_t c2 = vtrnq_s32(vreinterpretq_s32_s16(b2.val[0]),
- vreinterpretq_s32_s16(b3.val[0]));
- const int32x4x2_t c3 = vtrnq_s32(vreinterpretq_s32_s16(b2.val[1]),
- vreinterpretq_s32_s16(b3.val[1]));
-
- *a0 = vcombine_s16(vget_low_s16(vreinterpretq_s16_s32(c0.val[0])),
- vget_low_s16(vreinterpretq_s16_s32(c2.val[0])));
- *a4 = vcombine_s16(vget_high_s16(vreinterpretq_s16_s32(c0.val[0])),
- vget_high_s16(vreinterpretq_s16_s32(c2.val[0])));
-
- *a2 = vcombine_s16(vget_low_s16(vreinterpretq_s16_s32(c0.val[1])),
- vget_low_s16(vreinterpretq_s16_s32(c2.val[1])));
- *a6 = vcombine_s16(vget_high_s16(vreinterpretq_s16_s32(c0.val[1])),
- vget_high_s16(vreinterpretq_s16_s32(c2.val[1])));
-
- *a1 = vcombine_s16(vget_low_s16(vreinterpretq_s16_s32(c1.val[0])),
- vget_low_s16(vreinterpretq_s16_s32(c3.val[0])));
- *a5 = vcombine_s16(vget_high_s16(vreinterpretq_s16_s32(c1.val[0])),
- vget_high_s16(vreinterpretq_s16_s32(c3.val[0])));
-
- *a3 = vcombine_s16(vget_low_s16(vreinterpretq_s16_s32(c1.val[1])),
- vget_low_s16(vreinterpretq_s16_s32(c3.val[1])));
- *a7 = vcombine_s16(vget_high_s16(vreinterpretq_s16_s32(c1.val[1])),
- vget_high_s16(vreinterpretq_s16_s32(c3.val[1])));
-}
-
-static INLINE int16x8x2_t vpx_vtrnq_s64_to_s16(int32x4_t a0, int32x4_t a1) {
- int16x8x2_t b0;
- b0.val[0] = vcombine_s16(vreinterpret_s16_s32(vget_low_s32(a0)),
- vreinterpret_s16_s32(vget_low_s32(a1)));
- b0.val[1] = vcombine_s16(vreinterpret_s16_s32(vget_high_s32(a0)),
- vreinterpret_s16_s32(vget_high_s32(a1)));
- return b0;
-}
-
-static INLINE void transpose_s16_8x8q(int16x8_t *a0, int16x8_t *out) {
- // Swap 16 bit elements. Goes from:
- // a0: 00 01 02 03 04 05 06 07
- // a1: 10 11 12 13 14 15 16 17
- // a2: 20 21 22 23 24 25 26 27
- // a3: 30 31 32 33 34 35 36 37
- // a4: 40 41 42 43 44 45 46 47
- // a5: 50 51 52 53 54 55 56 57
- // a6: 60 61 62 63 64 65 66 67
- // a7: 70 71 72 73 74 75 76 77
- // to:
- // b0.val[0]: 00 10 02 12 04 14 06 16
- // b0.val[1]: 01 11 03 13 05 15 07 17
- // b1.val[0]: 20 30 22 32 24 34 26 36
- // b1.val[1]: 21 31 23 33 25 35 27 37
- // b2.val[0]: 40 50 42 52 44 54 46 56
- // b2.val[1]: 41 51 43 53 45 55 47 57
- // b3.val[0]: 60 70 62 72 64 74 66 76
- // b3.val[1]: 61 71 63 73 65 75 67 77
-
- const int16x8x2_t b0 = vtrnq_s16(*a0, *(a0 + 1));
- const int16x8x2_t b1 = vtrnq_s16(*(a0 + 2), *(a0 + 3));
- const int16x8x2_t b2 = vtrnq_s16(*(a0 + 4), *(a0 + 5));
- const int16x8x2_t b3 = vtrnq_s16(*(a0 + 6), *(a0 + 7));
-
- // Swap 32 bit elements resulting in:
- // c0.val[0]: 00 10 20 30 04 14 24 34
- // c0.val[1]: 02 12 22 32 06 16 26 36
- // c1.val[0]: 01 11 21 31 05 15 25 35
- // c1.val[1]: 03 13 23 33 07 17 27 37
- // c2.val[0]: 40 50 60 70 44 54 64 74
- // c2.val[1]: 42 52 62 72 46 56 66 76
- // c3.val[0]: 41 51 61 71 45 55 65 75
- // c3.val[1]: 43 53 63 73 47 57 67 77
-
- const int32x4x2_t c0 = vtrnq_s32(vreinterpretq_s32_s16(b0.val[0]),
- vreinterpretq_s32_s16(b1.val[0]));
- const int32x4x2_t c1 = vtrnq_s32(vreinterpretq_s32_s16(b0.val[1]),
- vreinterpretq_s32_s16(b1.val[1]));
- const int32x4x2_t c2 = vtrnq_s32(vreinterpretq_s32_s16(b2.val[0]),
- vreinterpretq_s32_s16(b3.val[0]));
- const int32x4x2_t c3 = vtrnq_s32(vreinterpretq_s32_s16(b2.val[1]),
- vreinterpretq_s32_s16(b3.val[1]));
-
- // Swap 64 bit elements resulting in:
- // d0.val[0]: 00 10 20 30 40 50 60 70
- // d0.val[1]: 04 14 24 34 44 54 64 74
- // d1.val[0]: 01 11 21 31 41 51 61 71
- // d1.val[1]: 05 15 25 35 45 55 65 75
- // d2.val[0]: 02 12 22 32 42 52 62 72
- // d2.val[1]: 06 16 26 36 46 56 66 76
- // d3.val[0]: 03 13 23 33 43 53 63 73
- // d3.val[1]: 07 17 27 37 47 57 67 77
- const int16x8x2_t d0 = vpx_vtrnq_s64_to_s16(c0.val[0], c2.val[0]);
- const int16x8x2_t d1 = vpx_vtrnq_s64_to_s16(c1.val[0], c3.val[0]);
- const int16x8x2_t d2 = vpx_vtrnq_s64_to_s16(c0.val[1], c2.val[1]);
- const int16x8x2_t d3 = vpx_vtrnq_s64_to_s16(c1.val[1], c3.val[1]);
-
- *out = d0.val[0];
- *(out + 1) = d1.val[0];
- *(out + 2) = d2.val[0];
- *(out + 3) = d3.val[0];
- *(out + 4) = d0.val[1];
- *(out + 5) = d1.val[1];
- *(out + 6) = d2.val[1];
- *(out + 7) = d3.val[1];
-}
-
-static INLINE void transpose_s16_4x4d(int16x4_t *a0, int16x4_t *a1,
- int16x4_t *a2, int16x4_t *a3) {
- // Swap 16 bit elements. Goes from:
- // a0: 00 01 02 03
- // a1: 10 11 12 13
- // a2: 20 21 22 23
- // a3: 30 31 32 33
- // to:
- // b0.val[0]: 00 10 02 12
- // b0.val[1]: 01 11 03 13
- // b1.val[0]: 20 30 22 32
- // b1.val[1]: 21 31 23 33
-
- const int16x4x2_t b0 = vtrn_s16(*a0, *a1);
- const int16x4x2_t b1 = vtrn_s16(*a2, *a3);
-
- // Swap 32 bit elements resulting in:
- // c0.val[0]: 00 10 20 30
- // c0.val[1]: 02 12 22 32
- // c1.val[0]: 01 11 21 31
- // c1.val[1]: 03 13 23 33
-
- const int32x2x2_t c0 = vtrn_s32(vreinterpret_s32_s16(b0.val[0]),
- vreinterpret_s32_s16(b1.val[0]));
- const int32x2x2_t c1 = vtrn_s32(vreinterpret_s32_s16(b0.val[1]),
- vreinterpret_s32_s16(b1.val[1]));
-
- *a0 = vreinterpret_s16_s32(c0.val[0]);
- *a1 = vreinterpret_s16_s32(c1.val[0]);
- *a2 = vreinterpret_s16_s32(c0.val[1]);
- *a3 = vreinterpret_s16_s32(c1.val[1]);
-}
-
-static INLINE int32x4x2_t aom_vtrnq_s64_to_s32(int32x4_t a0, int32x4_t a1) {
- int32x4x2_t b0;
- b0.val[0] = vcombine_s32(vget_low_s32(a0), vget_low_s32(a1));
- b0.val[1] = vcombine_s32(vget_high_s32(a0), vget_high_s32(a1));
- return b0;
-}
-
-static INLINE void transpose_s32_4x4(int32x4_t *a0, int32x4_t *a1,
- int32x4_t *a2, int32x4_t *a3) {
- // Swap 32 bit elements. Goes from:
- // a0: 00 01 02 03
- // a1: 10 11 12 13
- // a2: 20 21 22 23
- // a3: 30 31 32 33
- // to:
- // b0.val[0]: 00 10 02 12
- // b0.val[1]: 01 11 03 13
- // b1.val[0]: 20 30 22 32
- // b1.val[1]: 21 31 23 33
-
- const int32x4x2_t b0 = vtrnq_s32(*a0, *a1);
- const int32x4x2_t b1 = vtrnq_s32(*a2, *a3);
-
- // Swap 64 bit elements resulting in:
- // c0.val[0]: 00 10 20 30
- // c0.val[1]: 02 12 22 32
- // c1.val[0]: 01 11 21 31
- // c1.val[1]: 03 13 23 33
-
- const int32x4x2_t c0 = aom_vtrnq_s64_to_s32(b0.val[0], b1.val[0]);
- const int32x4x2_t c1 = aom_vtrnq_s64_to_s32(b0.val[1], b1.val[1]);
-
- *a0 = c0.val[0];
- *a1 = c1.val[0];
- *a2 = c0.val[1];
- *a3 = c1.val[1];
-}
-
-#endif // AOM_AV1_COMMON_ARM_TRANSPOSE_NEON_H_
diff --git a/third_party/aom/av1/common/arm/warp_plane_neon.c b/third_party/aom/av1/common/arm/warp_plane_neon.c
deleted file mode 100644
index 7f02d42a7..000000000
--- a/third_party/aom/av1/common/arm/warp_plane_neon.c
+++ /dev/null
@@ -1,714 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <arm_neon.h>
-#include <memory.h>
-#include <math.h>
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_ports/mem.h"
-#include "config/av1_rtcd.h"
-#include "av1/common/warped_motion.h"
-#include "av1/common/scale.h"
-
-/* This is a modified version of 'warped_filter' from warped_motion.c:
- * Each coefficient is stored in 8 bits instead of 16 bits
- * The coefficients are rearranged in the column order 0, 2, 4, 6, 1, 3, 5, 7
-
- This is done in order to avoid overflow: Since the tap with the largest
- coefficient could be any of taps 2, 3, 4 or 5, we can't use the summation
- order ((0 + 1) + (4 + 5)) + ((2 + 3) + (6 + 7)) used in the regular
- convolve functions.
-
- Instead, we use the summation order
- ((0 + 2) + (4 + 6)) + ((1 + 3) + (5 + 7)).
- The rearrangement of coefficients in this table is so that we can get the
- coefficients into the correct order more quickly.
-*/
-/* clang-format off */
-DECLARE_ALIGNED(8, static const int8_t,
- filter_8bit_neon[WARPEDPIXEL_PREC_SHIFTS * 3 + 1][8]) = {
-#if WARPEDPIXEL_PREC_BITS == 6
- // [-1, 0)
- { 0, 127, 0, 0, 0, 1, 0, 0}, { 0, 127, 0, 0, -1, 2, 0, 0},
- { 1, 127, -1, 0, -3, 4, 0, 0}, { 1, 126, -2, 0, -4, 6, 1, 0},
- { 1, 126, -3, 0, -5, 8, 1, 0}, { 1, 125, -4, 0, -6, 11, 1, 0},
- { 1, 124, -4, 0, -7, 13, 1, 0}, { 2, 123, -5, 0, -8, 15, 1, 0},
- { 2, 122, -6, 0, -9, 18, 1, 0}, { 2, 121, -6, 0, -10, 20, 1, 0},
- { 2, 120, -7, 0, -11, 22, 2, 0}, { 2, 119, -8, 0, -12, 25, 2, 0},
- { 3, 117, -8, 0, -13, 27, 2, 0}, { 3, 116, -9, 0, -13, 29, 2, 0},
- { 3, 114, -10, 0, -14, 32, 3, 0}, { 3, 113, -10, 0, -15, 35, 2, 0},
- { 3, 111, -11, 0, -15, 37, 3, 0}, { 3, 109, -11, 0, -16, 40, 3, 0},
- { 3, 108, -12, 0, -16, 42, 3, 0}, { 4, 106, -13, 0, -17, 45, 3, 0},
- { 4, 104, -13, 0, -17, 47, 3, 0}, { 4, 102, -14, 0, -17, 50, 3, 0},
- { 4, 100, -14, 0, -17, 52, 3, 0}, { 4, 98, -15, 0, -18, 55, 4, 0},
- { 4, 96, -15, 0, -18, 58, 3, 0}, { 4, 94, -16, 0, -18, 60, 4, 0},
- { 4, 91, -16, 0, -18, 63, 4, 0}, { 4, 89, -16, 0, -18, 65, 4, 0},
- { 4, 87, -17, 0, -18, 68, 4, 0}, { 4, 85, -17, 0, -18, 70, 4, 0},
- { 4, 82, -17, 0, -18, 73, 4, 0}, { 4, 80, -17, 0, -18, 75, 4, 0},
- { 4, 78, -18, 0, -18, 78, 4, 0}, { 4, 75, -18, 0, -17, 80, 4, 0},
- { 4, 73, -18, 0, -17, 82, 4, 0}, { 4, 70, -18, 0, -17, 85, 4, 0},
- { 4, 68, -18, 0, -17, 87, 4, 0}, { 4, 65, -18, 0, -16, 89, 4, 0},
- { 4, 63, -18, 0, -16, 91, 4, 0}, { 4, 60, -18, 0, -16, 94, 4, 0},
- { 3, 58, -18, 0, -15, 96, 4, 0}, { 4, 55, -18, 0, -15, 98, 4, 0},
- { 3, 52, -17, 0, -14, 100, 4, 0}, { 3, 50, -17, 0, -14, 102, 4, 0},
- { 3, 47, -17, 0, -13, 104, 4, 0}, { 3, 45, -17, 0, -13, 106, 4, 0},
- { 3, 42, -16, 0, -12, 108, 3, 0}, { 3, 40, -16, 0, -11, 109, 3, 0},
- { 3, 37, -15, 0, -11, 111, 3, 0}, { 2, 35, -15, 0, -10, 113, 3, 0},
- { 3, 32, -14, 0, -10, 114, 3, 0}, { 2, 29, -13, 0, -9, 116, 3, 0},
- { 2, 27, -13, 0, -8, 117, 3, 0}, { 2, 25, -12, 0, -8, 119, 2, 0},
- { 2, 22, -11, 0, -7, 120, 2, 0}, { 1, 20, -10, 0, -6, 121, 2, 0},
- { 1, 18, -9, 0, -6, 122, 2, 0}, { 1, 15, -8, 0, -5, 123, 2, 0},
- { 1, 13, -7, 0, -4, 124, 1, 0}, { 1, 11, -6, 0, -4, 125, 1, 0},
- { 1, 8, -5, 0, -3, 126, 1, 0}, { 1, 6, -4, 0, -2, 126, 1, 0},
- { 0, 4, -3, 0, -1, 127, 1, 0}, { 0, 2, -1, 0, 0, 127, 0, 0},
- // [0, 1)
- { 0, 0, 1, 0, 0, 127, 0, 0}, { 0, -1, 2, 0, 0, 127, 0, 0},
- { 0, -3, 4, 1, 1, 127, -2, 0}, { 0, -5, 6, 1, 1, 127, -2, 0},
- { 0, -6, 8, 1, 2, 126, -3, 0}, {-1, -7, 11, 2, 2, 126, -4, -1},
- {-1, -8, 13, 2, 3, 125, -5, -1}, {-1, -10, 16, 3, 3, 124, -6, -1},
- {-1, -11, 18, 3, 4, 123, -7, -1}, {-1, -12, 20, 3, 4, 122, -7, -1},
- {-1, -13, 23, 3, 4, 121, -8, -1}, {-2, -14, 25, 4, 5, 120, -9, -1},
- {-1, -15, 27, 4, 5, 119, -10, -1}, {-1, -16, 30, 4, 5, 118, -11, -1},
- {-2, -17, 33, 5, 6, 116, -12, -1}, {-2, -17, 35, 5, 6, 114, -12, -1},
- {-2, -18, 38, 5, 6, 113, -13, -1}, {-2, -19, 41, 6, 7, 111, -14, -2},
- {-2, -19, 43, 6, 7, 110, -15, -2}, {-2, -20, 46, 6, 7, 108, -15, -2},
- {-2, -20, 49, 6, 7, 106, -16, -2}, {-2, -21, 51, 7, 7, 104, -16, -2},
- {-2, -21, 54, 7, 7, 102, -17, -2}, {-2, -21, 56, 7, 8, 100, -18, -2},
- {-2, -22, 59, 7, 8, 98, -18, -2}, {-2, -22, 62, 7, 8, 96, -19, -2},
- {-2, -22, 64, 7, 8, 94, -19, -2}, {-2, -22, 67, 8, 8, 91, -20, -2},
- {-2, -22, 69, 8, 8, 89, -20, -2}, {-2, -22, 72, 8, 8, 87, -21, -2},
- {-2, -21, 74, 8, 8, 84, -21, -2}, {-2, -22, 77, 8, 8, 82, -21, -2},
- {-2, -21, 79, 8, 8, 79, -21, -2}, {-2, -21, 82, 8, 8, 77, -22, -2},
- {-2, -21, 84, 8, 8, 74, -21, -2}, {-2, -21, 87, 8, 8, 72, -22, -2},
- {-2, -20, 89, 8, 8, 69, -22, -2}, {-2, -20, 91, 8, 8, 67, -22, -2},
- {-2, -19, 94, 8, 7, 64, -22, -2}, {-2, -19, 96, 8, 7, 62, -22, -2},
- {-2, -18, 98, 8, 7, 59, -22, -2}, {-2, -18, 100, 8, 7, 56, -21, -2},
- {-2, -17, 102, 7, 7, 54, -21, -2}, {-2, -16, 104, 7, 7, 51, -21, -2},
- {-2, -16, 106, 7, 6, 49, -20, -2}, {-2, -15, 108, 7, 6, 46, -20, -2},
- {-2, -15, 110, 7, 6, 43, -19, -2}, {-2, -14, 111, 7, 6, 41, -19, -2},
- {-1, -13, 113, 6, 5, 38, -18, -2}, {-1, -12, 114, 6, 5, 35, -17, -2},
- {-1, -12, 116, 6, 5, 33, -17, -2}, {-1, -11, 118, 5, 4, 30, -16, -1},
- {-1, -10, 119, 5, 4, 27, -15, -1}, {-1, -9, 120, 5, 4, 25, -14, -2},
- {-1, -8, 121, 4, 3, 23, -13, -1}, {-1, -7, 122, 4, 3, 20, -12, -1},
- {-1, -7, 123, 4, 3, 18, -11, -1}, {-1, -6, 124, 3, 3, 16, -10, -1},
- {-1, -5, 125, 3, 2, 13, -8, -1}, {-1, -4, 126, 2, 2, 11, -7, -1},
- { 0, -3, 126, 2, 1, 8, -6, 0}, { 0, -2, 127, 1, 1, 6, -5, 0},
- { 0, -2, 127, 1, 1, 4, -3, 0}, { 0, 0, 127, 0, 0, 2, -1, 0},
- // [1, 2)
- { 0, 0, 127, 0, 0, 1, 0, 0}, { 0, 0, 127, 0, 0, -1, 2, 0},
- { 0, 1, 127, -1, 0, -3, 4, 0}, { 0, 1, 126, -2, 0, -4, 6, 1},
- { 0, 1, 126, -3, 0, -5, 8, 1}, { 0, 1, 125, -4, 0, -6, 11, 1},
- { 0, 1, 124, -4, 0, -7, 13, 1}, { 0, 2, 123, -5, 0, -8, 15, 1},
- { 0, 2, 122, -6, 0, -9, 18, 1}, { 0, 2, 121, -6, 0, -10, 20, 1},
- { 0, 2, 120, -7, 0, -11, 22, 2}, { 0, 2, 119, -8, 0, -12, 25, 2},
- { 0, 3, 117, -8, 0, -13, 27, 2}, { 0, 3, 116, -9, 0, -13, 29, 2},
- { 0, 3, 114, -10, 0, -14, 32, 3}, { 0, 3, 113, -10, 0, -15, 35, 2},
- { 0, 3, 111, -11, 0, -15, 37, 3}, { 0, 3, 109, -11, 0, -16, 40, 3},
- { 0, 3, 108, -12, 0, -16, 42, 3}, { 0, 4, 106, -13, 0, -17, 45, 3},
- { 0, 4, 104, -13, 0, -17, 47, 3}, { 0, 4, 102, -14, 0, -17, 50, 3},
- { 0, 4, 100, -14, 0, -17, 52, 3}, { 0, 4, 98, -15, 0, -18, 55, 4},
- { 0, 4, 96, -15, 0, -18, 58, 3}, { 0, 4, 94, -16, 0, -18, 60, 4},
- { 0, 4, 91, -16, 0, -18, 63, 4}, { 0, 4, 89, -16, 0, -18, 65, 4},
- { 0, 4, 87, -17, 0, -18, 68, 4}, { 0, 4, 85, -17, 0, -18, 70, 4},
- { 0, 4, 82, -17, 0, -18, 73, 4}, { 0, 4, 80, -17, 0, -18, 75, 4},
- { 0, 4, 78, -18, 0, -18, 78, 4}, { 0, 4, 75, -18, 0, -17, 80, 4},
- { 0, 4, 73, -18, 0, -17, 82, 4}, { 0, 4, 70, -18, 0, -17, 85, 4},
- { 0, 4, 68, -18, 0, -17, 87, 4}, { 0, 4, 65, -18, 0, -16, 89, 4},
- { 0, 4, 63, -18, 0, -16, 91, 4}, { 0, 4, 60, -18, 0, -16, 94, 4},
- { 0, 3, 58, -18, 0, -15, 96, 4}, { 0, 4, 55, -18, 0, -15, 98, 4},
- { 0, 3, 52, -17, 0, -14, 100, 4}, { 0, 3, 50, -17, 0, -14, 102, 4},
- { 0, 3, 47, -17, 0, -13, 104, 4}, { 0, 3, 45, -17, 0, -13, 106, 4},
- { 0, 3, 42, -16, 0, -12, 108, 3}, { 0, 3, 40, -16, 0, -11, 109, 3},
- { 0, 3, 37, -15, 0, -11, 111, 3}, { 0, 2, 35, -15, 0, -10, 113, 3},
- { 0, 3, 32, -14, 0, -10, 114, 3}, { 0, 2, 29, -13, 0, -9, 116, 3},
- { 0, 2, 27, -13, 0, -8, 117, 3}, { 0, 2, 25, -12, 0, -8, 119, 2},
- { 0, 2, 22, -11, 0, -7, 120, 2}, { 0, 1, 20, -10, 0, -6, 121, 2},
- { 0, 1, 18, -9, 0, -6, 122, 2}, { 0, 1, 15, -8, 0, -5, 123, 2},
- { 0, 1, 13, -7, 0, -4, 124, 1}, { 0, 1, 11, -6, 0, -4, 125, 1},
- { 0, 1, 8, -5, 0, -3, 126, 1}, { 0, 1, 6, -4, 0, -2, 126, 1},
- { 0, 0, 4, -3, 0, -1, 127, 1}, { 0, 0, 2, -1, 0, 0, 127, 0},
- // dummy (replicate row index 191)
- { 0, 0, 2, -1, 0, 0, 127, 0},
-
-#else
- // [-1, 0)
- { 0, 127, 0, 0, 0, 1, 0, 0}, { 1, 127, -1, 0, -3, 4, 0, 0},
- { 1, 126, -3, 0, -5, 8, 1, 0}, { 1, 124, -4, 0, -7, 13, 1, 0},
- { 2, 122, -6, 0, -9, 18, 1, 0}, { 2, 120, -7, 0, -11, 22, 2, 0},
- { 3, 117, -8, 0, -13, 27, 2, 0}, { 3, 114, -10, 0, -14, 32, 3, 0},
- { 3, 111, -11, 0, -15, 37, 3, 0}, { 3, 108, -12, 0, -16, 42, 3, 0},
- { 4, 104, -13, 0, -17, 47, 3, 0}, { 4, 100, -14, 0, -17, 52, 3, 0},
- { 4, 96, -15, 0, -18, 58, 3, 0}, { 4, 91, -16, 0, -18, 63, 4, 0},
- { 4, 87, -17, 0, -18, 68, 4, 0}, { 4, 82, -17, 0, -18, 73, 4, 0},
- { 4, 78, -18, 0, -18, 78, 4, 0}, { 4, 73, -18, 0, -17, 82, 4, 0},
- { 4, 68, -18, 0, -17, 87, 4, 0}, { 4, 63, -18, 0, -16, 91, 4, 0},
- { 3, 58, -18, 0, -15, 96, 4, 0}, { 3, 52, -17, 0, -14, 100, 4, 0},
- { 3, 47, -17, 0, -13, 104, 4, 0}, { 3, 42, -16, 0, -12, 108, 3, 0},
- { 3, 37, -15, 0, -11, 111, 3, 0}, { 3, 32, -14, 0, -10, 114, 3, 0},
- { 2, 27, -13, 0, -8, 117, 3, 0}, { 2, 22, -11, 0, -7, 120, 2, 0},
- { 1, 18, -9, 0, -6, 122, 2, 0}, { 1, 13, -7, 0, -4, 124, 1, 0},
- { 1, 8, -5, 0, -3, 126, 1, 0}, { 0, 4, -3, 0, -1, 127, 1, 0},
- // [0, 1)
- { 0, 0, 1, 0, 0, 127, 0, 0}, { 0, -3, 4, 1, 1, 127, -2, 0},
- { 0, -6, 8, 1, 2, 126, -3, 0}, {-1, -8, 13, 2, 3, 125, -5, -1},
- {-1, -11, 18, 3, 4, 123, -7, -1}, {-1, -13, 23, 3, 4, 121, -8, -1},
- {-1, -15, 27, 4, 5, 119, -10, -1}, {-2, -17, 33, 5, 6, 116, -12, -1},
- {-2, -18, 38, 5, 6, 113, -13, -1}, {-2, -19, 43, 6, 7, 110, -15, -2},
- {-2, -20, 49, 6, 7, 106, -16, -2}, {-2, -21, 54, 7, 7, 102, -17, -2},
- {-2, -22, 59, 7, 8, 98, -18, -2}, {-2, -22, 64, 7, 8, 94, -19, -2},
- {-2, -22, 69, 8, 8, 89, -20, -2}, {-2, -21, 74, 8, 8, 84, -21, -2},
- {-2, -21, 79, 8, 8, 79, -21, -2}, {-2, -21, 84, 8, 8, 74, -21, -2},
- {-2, -20, 89, 8, 8, 69, -22, -2}, {-2, -19, 94, 8, 7, 64, -22, -2},
- {-2, -18, 98, 8, 7, 59, -22, -2}, {-2, -17, 102, 7, 7, 54, -21, -2},
- {-2, -16, 106, 7, 6, 49, -20, -2}, {-2, -15, 110, 7, 6, 43, -19, -2},
- {-1, -13, 113, 6, 5, 38, -18, -2}, {-1, -12, 116, 6, 5, 33, -17, -2},
- {-1, -10, 119, 5, 4, 27, -15, -1}, {-1, -8, 121, 4, 3, 23, -13, -1},
- {-1, -7, 123, 4, 3, 18, -11, -1}, {-1, -5, 125, 3, 2, 13, -8, -1},
- { 0, -3, 126, 2, 1, 8, -6, 0}, { 0, -2, 127, 1, 1, 4, -3, 0},
- // [1, 2)
- { 0, 0, 127, 0, 0, 1, 0, 0}, { 0, 1, 127, -1, 0, -3, 4, 0},
- { 0, 1, 126, -3, 0, -5, 8, 1}, { 0, 1, 124, -4, 0, -7, 13, 1},
- { 0, 2, 122, -6, 0, -9, 18, 1}, { 0, 2, 120, -7, 0, -11, 22, 2},
- { 0, 3, 117, -8, 0, -13, 27, 2}, { 0, 3, 114, -10, 0, -14, 32, 3},
- { 0, 3, 111, -11, 0, -15, 37, 3}, { 0, 3, 108, -12, 0, -16, 42, 3},
- { 0, 4, 104, -13, 0, -17, 47, 3}, { 0, 4, 100, -14, 0, -17, 52, 3},
- { 0, 4, 96, -15, 0, -18, 58, 3}, { 0, 4, 91, -16, 0, -18, 63, 4},
- { 0, 4, 87, -17, 0, -18, 68, 4}, { 0, 4, 82, -17, 0, -18, 73, 4},
- { 0, 4, 78, -18, 0, -18, 78, 4}, { 0, 4, 73, -18, 0, -17, 82, 4},
- { 0, 4, 68, -18, 0, -17, 87, 4}, { 0, 4, 63, -18, 0, -16, 91, 4},
- { 0, 3, 58, -18, 0, -15, 96, 4}, { 0, 3, 52, -17, 0, -14, 100, 4},
- { 0, 3, 47, -17, 0, -13, 104, 4}, { 0, 3, 42, -16, 0, -12, 108, 3},
- { 0, 3, 37, -15, 0, -11, 111, 3}, { 0, 3, 32, -14, 0, -10, 114, 3},
- { 0, 2, 27, -13, 0, -8, 117, 3}, { 0, 2, 22, -11, 0, -7, 120, 2},
- { 0, 1, 18, -9, 0, -6, 122, 2}, { 0, 1, 13, -7, 0, -4, 124, 1},
- { 0, 1, 8, -5, 0, -3, 126, 1}, { 0, 0, 4, -3, 0, -1, 127, 1},
- // dummy (replicate row index 95)
- { 0, 0, 4, -3, 0, -1, 127, 1},
-#endif // WARPEDPIXEL_PREC_BITS == 6
-};
-/* clang-format on */
-
-static INLINE void convolve(int32x2x2_t x0, int32x2x2_t x1, uint8x8_t src_0,
- uint8x8_t src_1, int16x4_t *res) {
- int16x8_t coeff_0, coeff_1;
- int16x8_t pix_0, pix_1;
-
- coeff_0 = vcombine_s16(vreinterpret_s16_s32(x0.val[0]),
- vreinterpret_s16_s32(x1.val[0]));
- coeff_1 = vcombine_s16(vreinterpret_s16_s32(x0.val[1]),
- vreinterpret_s16_s32(x1.val[1]));
-
- pix_0 = vreinterpretq_s16_u16(vmovl_u8(src_0));
- pix_0 = vmulq_s16(coeff_0, pix_0);
-
- pix_1 = vreinterpretq_s16_u16(vmovl_u8(src_1));
- pix_0 = vmlaq_s16(pix_0, coeff_1, pix_1);
-
- *res = vpadd_s16(vget_low_s16(pix_0), vget_high_s16(pix_0));
-}
-
-static INLINE void horizontal_filter_neon(uint8x16_t src_1, uint8x16_t src_2,
- uint8x16_t src_3, uint8x16_t src_4,
- int16x8_t *tmp_dst, int sx, int alpha,
- int k, const int offset_bits_horiz,
- const int reduce_bits_horiz) {
- const uint8x16_t mask = { 255, 0, 255, 0, 255, 0, 255, 0,
- 255, 0, 255, 0, 255, 0, 255, 0 };
- const int32x4_t add_const = vdupq_n_s32((int32_t)(1 << offset_bits_horiz));
- const int16x8_t shift = vdupq_n_s16(-(int16_t)reduce_bits_horiz);
-
- int16x8_t f0, f1, f2, f3, f4, f5, f6, f7;
- int32x2x2_t b0, b1;
- uint8x8_t src_1_low, src_2_low, src_3_low, src_4_low, src_5_low, src_6_low;
- int32x4_t tmp_res_low, tmp_res_high;
- uint16x8_t res;
- int16x4_t res_0246_even, res_0246_odd, res_1357_even, res_1357_odd;
-
- uint8x16_t tmp_0 = vandq_u8(src_1, mask);
- uint8x16_t tmp_1 = vandq_u8(src_2, mask);
- uint8x16_t tmp_2 = vandq_u8(src_3, mask);
- uint8x16_t tmp_3 = vandq_u8(src_4, mask);
-
- tmp_2 = vextq_u8(tmp_0, tmp_0, 1);
- tmp_3 = vextq_u8(tmp_1, tmp_1, 1);
-
- src_1 = vaddq_u8(tmp_0, tmp_2);
- src_2 = vaddq_u8(tmp_1, tmp_3);
-
- src_1_low = vget_low_u8(src_1);
- src_2_low = vget_low_u8(src_2);
- src_3_low = vget_low_u8(vextq_u8(src_1, src_1, 4));
- src_4_low = vget_low_u8(vextq_u8(src_2, src_2, 4));
- src_5_low = vget_low_u8(vextq_u8(src_1, src_1, 2));
- src_6_low = vget_low_u8(vextq_u8(src_1, src_1, 6));
-
- // Loading the 8 filter taps
- f0 = vmovl_s8(
- vld1_s8(filter_8bit_neon[(sx + 0 * alpha) >> WARPEDDIFF_PREC_BITS]));
- f1 = vmovl_s8(
- vld1_s8(filter_8bit_neon[(sx + 1 * alpha) >> WARPEDDIFF_PREC_BITS]));
- f2 = vmovl_s8(
- vld1_s8(filter_8bit_neon[(sx + 2 * alpha) >> WARPEDDIFF_PREC_BITS]));
- f3 = vmovl_s8(
- vld1_s8(filter_8bit_neon[(sx + 3 * alpha) >> WARPEDDIFF_PREC_BITS]));
- f4 = vmovl_s8(
- vld1_s8(filter_8bit_neon[(sx + 4 * alpha) >> WARPEDDIFF_PREC_BITS]));
- f5 = vmovl_s8(
- vld1_s8(filter_8bit_neon[(sx + 5 * alpha) >> WARPEDDIFF_PREC_BITS]));
- f6 = vmovl_s8(
- vld1_s8(filter_8bit_neon[(sx + 6 * alpha) >> WARPEDDIFF_PREC_BITS]));
- f7 = vmovl_s8(
- vld1_s8(filter_8bit_neon[(sx + 7 * alpha) >> WARPEDDIFF_PREC_BITS]));
-
- b0 = vtrn_s32(vreinterpret_s32_s16(vget_low_s16(f0)),
- vreinterpret_s32_s16(vget_low_s16(f2)));
- b1 = vtrn_s32(vreinterpret_s32_s16(vget_low_s16(f4)),
- vreinterpret_s32_s16(vget_low_s16(f6)));
- convolve(b0, b1, src_1_low, src_3_low, &res_0246_even);
-
- b0 = vtrn_s32(vreinterpret_s32_s16(vget_low_s16(f1)),
- vreinterpret_s32_s16(vget_low_s16(f3)));
- b1 = vtrn_s32(vreinterpret_s32_s16(vget_low_s16(f5)),
- vreinterpret_s32_s16(vget_low_s16(f7)));
- convolve(b0, b1, src_2_low, src_4_low, &res_0246_odd);
-
- b0 = vtrn_s32(vreinterpret_s32_s16(vget_high_s16(f0)),
- vreinterpret_s32_s16(vget_high_s16(f2)));
- b1 = vtrn_s32(vreinterpret_s32_s16(vget_high_s16(f4)),
- vreinterpret_s32_s16(vget_high_s16(f6)));
- convolve(b0, b1, src_2_low, src_4_low, &res_1357_even);
-
- b0 = vtrn_s32(vreinterpret_s32_s16(vget_high_s16(f1)),
- vreinterpret_s32_s16(vget_high_s16(f3)));
- b1 = vtrn_s32(vreinterpret_s32_s16(vget_high_s16(f5)),
- vreinterpret_s32_s16(vget_high_s16(f7)));
- convolve(b0, b1, src_5_low, src_6_low, &res_1357_odd);
-
- tmp_res_low = vaddl_s16(res_0246_even, res_1357_even);
- tmp_res_high = vaddl_s16(res_0246_odd, res_1357_odd);
-
- tmp_res_low = vaddq_s32(tmp_res_low, add_const);
- tmp_res_high = vaddq_s32(tmp_res_high, add_const);
-
- res = vcombine_u16(vqmovun_s32(tmp_res_low), vqmovun_s32(tmp_res_high));
- res = vqrshlq_u16(res, shift);
-
- tmp_dst[k + 7] = vreinterpretq_s16_u16(res);
-}
-
-static INLINE void vertical_filter_neon(const int16x8_t *src,
- int32x4_t *res_low, int32x4_t *res_high,
- int sy, int gamma) {
- int16x4_t src_0, src_1, fltr_0, fltr_1;
- int32x4_t res_0, res_1;
- int32x2_t res_0_im, res_1_im;
- int32x4_t res_even, res_odd, im_res_0, im_res_1;
-
- int16x8_t f0, f1, f2, f3, f4, f5, f6, f7;
- int16x8x2_t b0, b1, b2, b3;
- int32x4x2_t c0, c1, c2, c3;
- int32x4x2_t d0, d1, d2, d3;
-
- b0 = vtrnq_s16(src[0], src[1]);
- b1 = vtrnq_s16(src[2], src[3]);
- b2 = vtrnq_s16(src[4], src[5]);
- b3 = vtrnq_s16(src[6], src[7]);
-
- c0 = vtrnq_s32(vreinterpretq_s32_s16(b0.val[0]),
- vreinterpretq_s32_s16(b0.val[1]));
- c1 = vtrnq_s32(vreinterpretq_s32_s16(b1.val[0]),
- vreinterpretq_s32_s16(b1.val[1]));
- c2 = vtrnq_s32(vreinterpretq_s32_s16(b2.val[0]),
- vreinterpretq_s32_s16(b2.val[1]));
- c3 = vtrnq_s32(vreinterpretq_s32_s16(b3.val[0]),
- vreinterpretq_s32_s16(b3.val[1]));
-
- f0 = vld1q_s16(
- (int16_t *)(warped_filter + ((sy + 0 * gamma) >> WARPEDDIFF_PREC_BITS)));
- f1 = vld1q_s16(
- (int16_t *)(warped_filter + ((sy + 1 * gamma) >> WARPEDDIFF_PREC_BITS)));
- f2 = vld1q_s16(
- (int16_t *)(warped_filter + ((sy + 2 * gamma) >> WARPEDDIFF_PREC_BITS)));
- f3 = vld1q_s16(
- (int16_t *)(warped_filter + ((sy + 3 * gamma) >> WARPEDDIFF_PREC_BITS)));
- f4 = vld1q_s16(
- (int16_t *)(warped_filter + ((sy + 4 * gamma) >> WARPEDDIFF_PREC_BITS)));
- f5 = vld1q_s16(
- (int16_t *)(warped_filter + ((sy + 5 * gamma) >> WARPEDDIFF_PREC_BITS)));
- f6 = vld1q_s16(
- (int16_t *)(warped_filter + ((sy + 6 * gamma) >> WARPEDDIFF_PREC_BITS)));
- f7 = vld1q_s16(
- (int16_t *)(warped_filter + ((sy + 7 * gamma) >> WARPEDDIFF_PREC_BITS)));
-
- d0 = vtrnq_s32(vreinterpretq_s32_s16(f0), vreinterpretq_s32_s16(f2));
- d1 = vtrnq_s32(vreinterpretq_s32_s16(f4), vreinterpretq_s32_s16(f6));
- d2 = vtrnq_s32(vreinterpretq_s32_s16(f1), vreinterpretq_s32_s16(f3));
- d3 = vtrnq_s32(vreinterpretq_s32_s16(f5), vreinterpretq_s32_s16(f7));
-
- // row:0,1 even_col:0,2
- src_0 = vget_low_s16(vreinterpretq_s16_s32(c0.val[0]));
- fltr_0 = vget_low_s16(vreinterpretq_s16_s32(d0.val[0]));
- res_0 = vmull_s16(src_0, fltr_0);
-
- // row:0,1,2,3 even_col:0,2
- src_0 = vget_low_s16(vreinterpretq_s16_s32(c1.val[0]));
- fltr_0 = vget_low_s16(vreinterpretq_s16_s32(d0.val[1]));
- res_0 = vmlal_s16(res_0, src_0, fltr_0);
- res_0_im = vpadd_s32(vget_low_s32(res_0), vget_high_s32(res_0));
-
- // row:0,1 even_col:4,6
- src_1 = vget_low_s16(vreinterpretq_s16_s32(c0.val[1]));
- fltr_1 = vget_low_s16(vreinterpretq_s16_s32(d1.val[0]));
- res_1 = vmull_s16(src_1, fltr_1);
-
- // row:0,1,2,3 even_col:4,6
- src_1 = vget_low_s16(vreinterpretq_s16_s32(c1.val[1]));
- fltr_1 = vget_low_s16(vreinterpretq_s16_s32(d1.val[1]));
- res_1 = vmlal_s16(res_1, src_1, fltr_1);
- res_1_im = vpadd_s32(vget_low_s32(res_1), vget_high_s32(res_1));
-
- // row:0,1,2,3 even_col:0,2,4,6
- im_res_0 = vcombine_s32(res_0_im, res_1_im);
-
- // row:4,5 even_col:0,2
- src_0 = vget_low_s16(vreinterpretq_s16_s32(c2.val[0]));
- fltr_0 = vget_high_s16(vreinterpretq_s16_s32(d0.val[0]));
- res_0 = vmull_s16(src_0, fltr_0);
-
- // row:4,5,6,7 even_col:0,2
- src_0 = vget_low_s16(vreinterpretq_s16_s32(c3.val[0]));
- fltr_0 = vget_high_s16(vreinterpretq_s16_s32(d0.val[1]));
- res_0 = vmlal_s16(res_0, src_0, fltr_0);
- res_0_im = vpadd_s32(vget_low_s32(res_0), vget_high_s32(res_0));
-
- // row:4,5 even_col:4,6
- src_1 = vget_low_s16(vreinterpretq_s16_s32(c2.val[1]));
- fltr_1 = vget_high_s16(vreinterpretq_s16_s32(d1.val[0]));
- res_1 = vmull_s16(src_1, fltr_1);
-
- // row:4,5,6,7 even_col:4,6
- src_1 = vget_low_s16(vreinterpretq_s16_s32(c3.val[1]));
- fltr_1 = vget_high_s16(vreinterpretq_s16_s32(d1.val[1]));
- res_1 = vmlal_s16(res_1, src_1, fltr_1);
- res_1_im = vpadd_s32(vget_low_s32(res_1), vget_high_s32(res_1));
-
- // row:4,5,6,7 even_col:0,2,4,6
- im_res_1 = vcombine_s32(res_0_im, res_1_im);
-
- // row:0-7 even_col:0,2,4,6
- res_even = vaddq_s32(im_res_0, im_res_1);
-
- // row:0,1 odd_col:1,3
- src_0 = vget_high_s16(vreinterpretq_s16_s32(c0.val[0]));
- fltr_0 = vget_low_s16(vreinterpretq_s16_s32(d2.val[0]));
- res_0 = vmull_s16(src_0, fltr_0);
-
- // row:0,1,2,3 odd_col:1,3
- src_0 = vget_high_s16(vreinterpretq_s16_s32(c1.val[0]));
- fltr_0 = vget_low_s16(vreinterpretq_s16_s32(d2.val[1]));
- res_0 = vmlal_s16(res_0, src_0, fltr_0);
- res_0_im = vpadd_s32(vget_low_s32(res_0), vget_high_s32(res_0));
-
- // row:0,1 odd_col:5,7
- src_1 = vget_high_s16(vreinterpretq_s16_s32(c0.val[1]));
- fltr_1 = vget_low_s16(vreinterpretq_s16_s32(d3.val[0]));
- res_1 = vmull_s16(src_1, fltr_1);
-
- // row:0,1,2,3 odd_col:5,7
- src_1 = vget_high_s16(vreinterpretq_s16_s32(c1.val[1]));
- fltr_1 = vget_low_s16(vreinterpretq_s16_s32(d3.val[1]));
- res_1 = vmlal_s16(res_1, src_1, fltr_1);
- res_1_im = vpadd_s32(vget_low_s32(res_1), vget_high_s32(res_1));
-
- // row:0,1,2,3 odd_col:1,3,5,7
- im_res_0 = vcombine_s32(res_0_im, res_1_im);
-
- // row:4,5 odd_col:1,3
- src_0 = vget_high_s16(vreinterpretq_s16_s32(c2.val[0]));
- fltr_0 = vget_high_s16(vreinterpretq_s16_s32(d2.val[0]));
- res_0 = vmull_s16(src_0, fltr_0);
-
- // row:4,5,6,7 odd_col:1,3
- src_0 = vget_high_s16(vreinterpretq_s16_s32(c3.val[0]));
- fltr_0 = vget_high_s16(vreinterpretq_s16_s32(d2.val[1]));
- res_0 = vmlal_s16(res_0, src_0, fltr_0);
- res_0_im = vpadd_s32(vget_low_s32(res_0), vget_high_s32(res_0));
-
- // row:4,5 odd_col:5,7
- src_1 = vget_high_s16(vreinterpretq_s16_s32(c2.val[1]));
- fltr_1 = vget_high_s16(vreinterpretq_s16_s32(d3.val[0]));
- res_1 = vmull_s16(src_1, fltr_1);
-
- // row:4,5,6,7 odd_col:5,7
- src_1 = vget_high_s16(vreinterpretq_s16_s32(c3.val[1]));
- fltr_1 = vget_high_s16(vreinterpretq_s16_s32(d3.val[1]));
- res_1 = vmlal_s16(res_1, src_1, fltr_1);
- res_1_im = vpadd_s32(vget_low_s32(res_1), vget_high_s32(res_1));
-
- // row:4,5,6,7 odd_col:1,3,5,7
- im_res_1 = vcombine_s32(res_0_im, res_1_im);
-
- // row:0-7 odd_col:1,3,5,7
- res_odd = vaddq_s32(im_res_0, im_res_1);
-
- // reordering as 0 1 2 3 | 4 5 6 7
- c0 = vtrnq_s32(res_even, res_odd);
-
- // Final store
- *res_low = vcombine_s32(vget_low_s32(c0.val[0]), vget_low_s32(c0.val[1]));
- *res_high = vcombine_s32(vget_high_s32(c0.val[0]), vget_high_s32(c0.val[1]));
-}
-
-void av1_warp_affine_neon(const int32_t *mat, const uint8_t *ref, int width,
- int height, int stride, uint8_t *pred, int p_col,
- int p_row, int p_width, int p_height, int p_stride,
- int subsampling_x, int subsampling_y,
- ConvolveParams *conv_params, int16_t alpha,
- int16_t beta, int16_t gamma, int16_t delta) {
- int16x8_t tmp[15];
- const int bd = 8;
- const int w0 = conv_params->fwd_offset;
- const int w1 = conv_params->bck_offset;
- const int32x4_t fwd = vdupq_n_s32((int32_t)w0);
- const int32x4_t bwd = vdupq_n_s32((int32_t)w1);
- const int16x8_t sub_constant = vdupq_n_s16((1 << (bd - 1)) + (1 << bd));
-
- int limit = 0;
- uint8x16_t vec_dup, mask_val;
- int32x4_t res_lo, res_hi;
- int16x8_t result_final;
- uint8x16_t src_1, src_2, src_3, src_4;
- uint8x16_t indx_vec = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
- };
- uint8x16_t cmp_vec;
-
- const int reduce_bits_horiz = conv_params->round_0;
- const int reduce_bits_vert = conv_params->is_compound
- ? conv_params->round_1
- : 2 * FILTER_BITS - reduce_bits_horiz;
- const int32x4_t shift_vert = vdupq_n_s32(-(int32_t)reduce_bits_vert);
- const int offset_bits_horiz = bd + FILTER_BITS - 1;
-
- assert(IMPLIES(conv_params->is_compound, conv_params->dst != NULL));
-
- const int offset_bits_vert = bd + 2 * FILTER_BITS - reduce_bits_horiz;
- int32x4_t add_const_vert = vdupq_n_s32((int32_t)(1 << offset_bits_vert));
- const int round_bits =
- 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
- const int16x4_t round_bits_vec = vdup_n_s16(-(int16_t)round_bits);
- const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
- const int16x4_t res_sub_const =
- vdup_n_s16(-((1 << (offset_bits - conv_params->round_1)) +
- (1 << (offset_bits - conv_params->round_1 - 1))));
- int k;
-
- assert(IMPLIES(conv_params->do_average, conv_params->is_compound));
-
- for (int i = 0; i < p_height; i += 8) {
- for (int j = 0; j < p_width; j += 8) {
- const int32_t src_x = (p_col + j + 4) << subsampling_x;
- const int32_t src_y = (p_row + i + 4) << subsampling_y;
- const int32_t dst_x = mat[2] * src_x + mat[3] * src_y + mat[0];
- const int32_t dst_y = mat[4] * src_x + mat[5] * src_y + mat[1];
- const int32_t x4 = dst_x >> subsampling_x;
- const int32_t y4 = dst_y >> subsampling_y;
-
- int32_t ix4 = x4 >> WARPEDMODEL_PREC_BITS;
- int32_t sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
- int32_t iy4 = y4 >> WARPEDMODEL_PREC_BITS;
- int32_t sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
-
- sx4 += alpha * (-4) + beta * (-4) + (1 << (WARPEDDIFF_PREC_BITS - 1)) +
- (WARPEDPIXEL_PREC_SHIFTS << WARPEDDIFF_PREC_BITS);
- sy4 += gamma * (-4) + delta * (-4) + (1 << (WARPEDDIFF_PREC_BITS - 1)) +
- (WARPEDPIXEL_PREC_SHIFTS << WARPEDDIFF_PREC_BITS);
-
- sx4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1);
- sy4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1);
- // horizontal
- if (ix4 <= -7) {
- for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
- int iy = iy4 + k;
- if (iy < 0)
- iy = 0;
- else if (iy > height - 1)
- iy = height - 1;
- int16_t dup_val =
- (1 << (bd + FILTER_BITS - reduce_bits_horiz - 1)) +
- ref[iy * stride] * (1 << (FILTER_BITS - reduce_bits_horiz));
-
- tmp[k + 7] = vdupq_n_s16(dup_val);
- }
- } else if (ix4 >= width + 6) {
- for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
- int iy = iy4 + k;
- if (iy < 0)
- iy = 0;
- else if (iy > height - 1)
- iy = height - 1;
- int16_t dup_val = (1 << (bd + FILTER_BITS - reduce_bits_horiz - 1)) +
- ref[iy * stride + (width - 1)] *
- (1 << (FILTER_BITS - reduce_bits_horiz));
- tmp[k + 7] = vdupq_n_s16(dup_val);
- }
- } else if (((ix4 - 7) < 0) || ((ix4 + 9) > width)) {
- const int out_of_boundary_left = -(ix4 - 6);
- const int out_of_boundary_right = (ix4 + 8) - width;
-
- for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
- int iy = iy4 + k;
- if (iy < 0)
- iy = 0;
- else if (iy > height - 1)
- iy = height - 1;
- int sx = sx4 + beta * (k + 4);
-
- const uint8_t *src = ref + iy * stride + ix4 - 7;
- src_1 = vld1q_u8(src);
-
- if (out_of_boundary_left >= 0) {
- limit = out_of_boundary_left + 1;
- cmp_vec = vdupq_n_u8(out_of_boundary_left);
- vec_dup = vdupq_n_u8(*(src + limit));
- mask_val = vcleq_u8(indx_vec, cmp_vec);
- src_1 = vbslq_u8(mask_val, vec_dup, src_1);
- }
- if (out_of_boundary_right >= 0) {
- limit = 15 - (out_of_boundary_right + 1);
- cmp_vec = vdupq_n_u8(15 - out_of_boundary_right);
- vec_dup = vdupq_n_u8(*(src + limit));
- mask_val = vcgeq_u8(indx_vec, cmp_vec);
- src_1 = vbslq_u8(mask_val, vec_dup, src_1);
- }
- src_2 = vextq_u8(src_1, src_1, 1);
- src_3 = vextq_u8(src_2, src_2, 1);
- src_4 = vextq_u8(src_3, src_3, 1);
-
- horizontal_filter_neon(src_1, src_2, src_3, src_4, tmp, sx, alpha, k,
- offset_bits_horiz, reduce_bits_horiz);
- }
- } else {
- for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
- int iy = iy4 + k;
- if (iy < 0)
- iy = 0;
- else if (iy > height - 1)
- iy = height - 1;
- int sx = sx4 + beta * (k + 4);
-
- const uint8_t *src = ref + iy * stride + ix4 - 7;
- src_1 = vld1q_u8(src);
- src_2 = vextq_u8(src_1, src_1, 1);
- src_3 = vextq_u8(src_2, src_2, 1);
- src_4 = vextq_u8(src_3, src_3, 1);
-
- horizontal_filter_neon(src_1, src_2, src_3, src_4, tmp, sx, alpha, k,
- offset_bits_horiz, reduce_bits_horiz);
- }
- }
-
- // vertical
- for (k = -4; k < AOMMIN(4, p_height - i - 4); ++k) {
- int sy = sy4 + delta * (k + 4);
-
- const int16x8_t *v_src = tmp + (k + 4);
-
- vertical_filter_neon(v_src, &res_lo, &res_hi, sy, gamma);
-
- res_lo = vaddq_s32(res_lo, add_const_vert);
- res_hi = vaddq_s32(res_hi, add_const_vert);
-
- if (conv_params->is_compound) {
- uint16_t *const p =
- (uint16_t *)&conv_params
- ->dst[(i + k + 4) * conv_params->dst_stride + j];
-
- res_lo = vrshlq_s32(res_lo, shift_vert);
- if (conv_params->do_average) {
- uint8_t *const dst8 = &pred[(i + k + 4) * p_stride + j];
- uint16x4_t tmp16_lo = vld1_u16(p);
- int32x4_t tmp32_lo = vreinterpretq_s32_u32(vmovl_u16(tmp16_lo));
- int16x4_t tmp16_low;
- if (conv_params->use_jnt_comp_avg) {
- res_lo = vmulq_s32(res_lo, bwd);
- tmp32_lo = vmulq_s32(tmp32_lo, fwd);
- tmp32_lo = vaddq_s32(tmp32_lo, res_lo);
- tmp16_low = vshrn_n_s32(tmp32_lo, DIST_PRECISION_BITS);
- } else {
- tmp32_lo = vaddq_s32(tmp32_lo, res_lo);
- tmp16_low = vshrn_n_s32(tmp32_lo, 1);
- }
- int16x4_t res_low = vadd_s16(tmp16_low, res_sub_const);
- res_low = vqrshl_s16(res_low, round_bits_vec);
- int16x8_t final_res_low = vcombine_s16(res_low, res_low);
- uint8x8_t res_8_low = vqmovun_s16(final_res_low);
-
- vst1_lane_u32((uint32_t *)dst8, vreinterpret_u32_u8(res_8_low), 0);
- } else {
- uint16x4_t res_u16_low = vqmovun_s32(res_lo);
- vst1_u16(p, res_u16_low);
- }
- if (p_width > 4) {
- uint16_t *const p4 =
- (uint16_t *)&conv_params
- ->dst[(i + k + 4) * conv_params->dst_stride + j + 4];
-
- res_hi = vrshlq_s32(res_hi, shift_vert);
- if (conv_params->do_average) {
- uint8_t *const dst8_4 = &pred[(i + k + 4) * p_stride + j + 4];
-
- uint16x4_t tmp16_hi = vld1_u16(p4);
- int32x4_t tmp32_hi = vreinterpretq_s32_u32(vmovl_u16(tmp16_hi));
- int16x4_t tmp16_high;
- if (conv_params->use_jnt_comp_avg) {
- res_hi = vmulq_s32(res_hi, bwd);
- tmp32_hi = vmulq_s32(tmp32_hi, fwd);
- tmp32_hi = vaddq_s32(tmp32_hi, res_hi);
- tmp16_high = vshrn_n_s32(tmp32_hi, DIST_PRECISION_BITS);
- } else {
- tmp32_hi = vaddq_s32(tmp32_hi, res_hi);
- tmp16_high = vshrn_n_s32(tmp32_hi, 1);
- }
- int16x4_t res_high = vadd_s16(tmp16_high, res_sub_const);
- res_high = vqrshl_s16(res_high, round_bits_vec);
- int16x8_t final_res_high = vcombine_s16(res_high, res_high);
- uint8x8_t res_8_high = vqmovun_s16(final_res_high);
-
- vst1_lane_u32((uint32_t *)dst8_4, vreinterpret_u32_u8(res_8_high),
- 0);
- } else {
- uint16x4_t res_u16_high = vqmovun_s32(res_hi);
- vst1_u16(p4, res_u16_high);
- }
- }
- } else {
- res_lo = vrshlq_s32(res_lo, shift_vert);
- res_hi = vrshlq_s32(res_hi, shift_vert);
-
- result_final = vcombine_s16(vmovn_s32(res_lo), vmovn_s32(res_hi));
- result_final = vsubq_s16(result_final, sub_constant);
-
- uint8_t *const p = (uint8_t *)&pred[(i + k + 4) * p_stride + j];
- uint8x8_t val = vqmovun_s16(result_final);
-
- if (p_width == 4) {
- vst1_lane_u32((uint32_t *)p, vreinterpret_u32_u8(val), 0);
- } else {
- vst1_u8(p, val);
- }
- }
- }
- }
- }
-}
diff --git a/third_party/aom/av1/common/arm/wiener_convolve_neon.c b/third_party/aom/av1/common/arm/wiener_convolve_neon.c
deleted file mode 100644
index a9bb5bcf0..000000000
--- a/third_party/aom/av1/common/arm/wiener_convolve_neon.c
+++ /dev/null
@@ -1,530 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <arm_neon.h>
-#include <assert.h>
-
-#include "config/aom_config.h"
-#include "config/av1_rtcd.h"
-
-#include "aom_dsp/txfm_common.h"
-#include "aom_ports/mem.h"
-#include "av1/common/common.h"
-#include "av1/common/arm/convolve_neon.h"
-#include "av1/common/arm/mem_neon.h"
-#include "av1/common/arm/transpose_neon.h"
-
-/* Wiener filter 2D
- Apply horizontal filter and store in a temporary buffer. When applying
- vertical filter, overwrite the original pixel values.
- */
-void av1_wiener_convolve_add_src_neon(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h,
- const ConvolveParams *conv_params) {
- uint16_t *d_tmp;
- uint8_t *d;
- const uint8_t *src_ptr, *s_tmp;
- uint16_t *dst_ptr;
- (void)x_step_q4;
- (void)y_step_q4;
-
- int width, height;
- const int bd = 8;
- const int intermediate_height = h + SUBPEL_TAPS - 1;
- const int center_tap = ((SUBPEL_TAPS - 1) / 2);
- int16_t filter_x_tmp[7], filter_y_tmp[7];
-
- DECLARE_ALIGNED(16, uint16_t,
- temp[(MAX_SB_SIZE + HORIZ_EXTRA_ROWS) * MAX_SB_SIZE]);
-
- assert(x_step_q4 == 16 && y_step_q4 == 16);
- assert(!(w % 8));
-
- assert(w <= MAX_SB_SIZE);
- assert(h <= MAX_SB_SIZE);
-
- assert(filter_x[7] == 0);
- assert(filter_y[7] == 0);
-
- /* assumption of horizontal filtering output will not exceed 15 bit.
- ((bd) + 1 + FILTER_BITS - conv_params->round_0) <= 15
- 16 - conv_params->round_0 <= 15 -- (conv_params->round_0) >= 1
- */
- assert((conv_params->round_0) >= 1);
-
- memcpy(&filter_x_tmp[0], filter_x, sizeof(*filter_x) * FILTER_BITS);
- memcpy(&filter_y_tmp[0], filter_y, sizeof(*filter_y) * FILTER_BITS);
-
- filter_x_tmp[3] += (1 << FILTER_BITS);
- filter_y_tmp[3] += (1 << FILTER_BITS);
-
- s_tmp = src - center_tap * src_stride - center_tap;
- dst_ptr = temp;
- src_ptr = s_tmp;
- height = intermediate_height;
-
- /* if height is a multiple of 8 */
- if (!(h & 7)) {
- int16x8_t res0, res1, res2, res3;
- uint16x8_t res4;
- uint8x8_t t0, t1, t2, t3, t4, t5, t6, t7;
-#if defined(__aarch64__)
- uint16x8_t res5, res6, res7, res8, res9, res10, res11;
- uint8x8_t t8, t9, t10, t11, t12, t13, t14;
-
- do {
- const uint8_t *s;
-
- __builtin_prefetch(src_ptr + 0 * src_stride);
- __builtin_prefetch(src_ptr + 1 * src_stride);
- __builtin_prefetch(src_ptr + 2 * src_stride);
- __builtin_prefetch(src_ptr + 3 * src_stride);
- __builtin_prefetch(src_ptr + 4 * src_stride);
- __builtin_prefetch(src_ptr + 5 * src_stride);
- __builtin_prefetch(src_ptr + 6 * src_stride);
- __builtin_prefetch(src_ptr + 7 * src_stride);
-
- load_u8_8x8(src_ptr, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
- transpose_u8_8x8(&t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
-
- s = src_ptr + 7;
- d_tmp = dst_ptr;
- width = w;
-
- __builtin_prefetch(dst_ptr + 0 * dst_stride);
- __builtin_prefetch(dst_ptr + 1 * dst_stride);
- __builtin_prefetch(dst_ptr + 2 * dst_stride);
- __builtin_prefetch(dst_ptr + 3 * dst_stride);
- __builtin_prefetch(dst_ptr + 4 * dst_stride);
- __builtin_prefetch(dst_ptr + 5 * dst_stride);
- __builtin_prefetch(dst_ptr + 6 * dst_stride);
- __builtin_prefetch(dst_ptr + 7 * dst_stride);
-
- do {
- load_u8_8x8(s, src_stride, &t7, &t8, &t9, &t10, &t11, &t12, &t13, &t14);
- transpose_u8_8x8(&t7, &t8, &t9, &t10, &t11, &t12, &t13, &t14);
-
- res0 = vreinterpretq_s16_u16(vaddl_u8(t0, t6));
- res1 = vreinterpretq_s16_u16(vaddl_u8(t1, t5));
- res2 = vreinterpretq_s16_u16(vaddl_u8(t2, t4));
- res3 = vreinterpretq_s16_u16(vmovl_u8(t3));
- res4 = wiener_convolve8_horiz_8x8(res0, res1, res2, res3, filter_x_tmp,
- bd, conv_params->round_0);
-
- res0 = vreinterpretq_s16_u16(vaddl_u8(t1, t7));
- res1 = vreinterpretq_s16_u16(vaddl_u8(t2, t6));
- res2 = vreinterpretq_s16_u16(vaddl_u8(t3, t5));
- res3 = vreinterpretq_s16_u16(vmovl_u8(t4));
- res5 = wiener_convolve8_horiz_8x8(res0, res1, res2, res3, filter_x_tmp,
- bd, conv_params->round_0);
-
- res0 = vreinterpretq_s16_u16(vaddl_u8(t2, t8));
- res1 = vreinterpretq_s16_u16(vaddl_u8(t3, t7));
- res2 = vreinterpretq_s16_u16(vaddl_u8(t4, t6));
- res3 = vreinterpretq_s16_u16(vmovl_u8(t5));
- res6 = wiener_convolve8_horiz_8x8(res0, res1, res2, res3, filter_x_tmp,
- bd, conv_params->round_0);
-
- res0 = vreinterpretq_s16_u16(vaddl_u8(t3, t9));
- res1 = vreinterpretq_s16_u16(vaddl_u8(t4, t8));
- res2 = vreinterpretq_s16_u16(vaddl_u8(t5, t7));
- res3 = vreinterpretq_s16_u16(vmovl_u8(t6));
- res7 = wiener_convolve8_horiz_8x8(res0, res1, res2, res3, filter_x_tmp,
- bd, conv_params->round_0);
-
- res0 = vreinterpretq_s16_u16(vaddl_u8(t4, t10));
- res1 = vreinterpretq_s16_u16(vaddl_u8(t5, t9));
- res2 = vreinterpretq_s16_u16(vaddl_u8(t6, t8));
- res3 = vreinterpretq_s16_u16(vmovl_u8(t7));
- res8 = wiener_convolve8_horiz_8x8(res0, res1, res2, res3, filter_x_tmp,
- bd, conv_params->round_0);
-
- res0 = vreinterpretq_s16_u16(vaddl_u8(t5, t11));
- res1 = vreinterpretq_s16_u16(vaddl_u8(t6, t10));
- res2 = vreinterpretq_s16_u16(vaddl_u8(t7, t9));
- res3 = vreinterpretq_s16_u16(vmovl_u8(t8));
- res9 = wiener_convolve8_horiz_8x8(res0, res1, res2, res3, filter_x_tmp,
- bd, conv_params->round_0);
-
- res0 = vreinterpretq_s16_u16(vaddl_u8(t6, t12));
- res1 = vreinterpretq_s16_u16(vaddl_u8(t7, t11));
- res2 = vreinterpretq_s16_u16(vaddl_u8(t8, t10));
- res3 = vreinterpretq_s16_u16(vmovl_u8(t9));
- res10 = wiener_convolve8_horiz_8x8(res0, res1, res2, res3, filter_x_tmp,
- bd, conv_params->round_0);
-
- res0 = vreinterpretq_s16_u16(vaddl_u8(t7, t13));
- res1 = vreinterpretq_s16_u16(vaddl_u8(t8, t12));
- res2 = vreinterpretq_s16_u16(vaddl_u8(t9, t11));
- res3 = vreinterpretq_s16_u16(vmovl_u8(t10));
- res11 = wiener_convolve8_horiz_8x8(res0, res1, res2, res3, filter_x_tmp,
- bd, conv_params->round_0);
-
- transpose_u16_8x8(&res4, &res5, &res6, &res7, &res8, &res9, &res10,
- &res11);
- store_u16_8x8(d_tmp, MAX_SB_SIZE, res4, res5, res6, res7, res8, res9,
- res10, res11);
-
- t0 = t8;
- t1 = t9;
- t2 = t10;
- t3 = t11;
- t4 = t12;
- t5 = t13;
- t6 = t14;
- s += 8;
- d_tmp += 8;
- width -= 8;
- } while (width > 0);
- src_ptr += 8 * src_stride;
- dst_ptr += 8 * MAX_SB_SIZE;
- height -= 8;
- } while (height > 0);
-#else
- uint8x8_t temp_0;
-
- do {
- const uint8_t *s;
-
- __builtin_prefetch(src_ptr);
-
- t0 = vld1_u8(src_ptr); // a0 a1 a2 a3 a4 a5 a6 a7
- s = src_ptr + 8;
- d_tmp = dst_ptr;
- width = w;
-
- __builtin_prefetch(dst_ptr);
-
- do {
- t7 = vld1_u8(s); // a8 a9 a10 a11 a12 a13 a14 a15
- temp_0 = t0;
- t0 = t7;
-
- t1 = vext_u8(temp_0, t7, 1); // a1 a2 a3 a4 a5 a6 a7 a8
- t2 = vext_u8(temp_0, t7, 2); // a2 a3 a4 a5 a6 a7 a8 a9
- t3 = vext_u8(temp_0, t7, 3); // a3 a4 a5 a6 a7 a8 a9 a10
- t4 = vext_u8(temp_0, t7, 4); // a4 a5 a6 a7 a8 a9 a10 a11
- t5 = vext_u8(temp_0, t7, 5); // a5 a6 a7 a8 a9 a10 a11 a12
- t6 = vext_u8(temp_0, t7, 6); // a6 a7 a8 a9 a10 a11 a12 a13
- t7 = vext_u8(temp_0, t7, 7); // a7 a8 a9 a10 a11 a12 a13 a14
-
- res0 = vreinterpretq_s16_u16(vaddl_u8(temp_0, t6));
- res1 = vreinterpretq_s16_u16(vaddl_u8(t1, t5));
- res2 = vreinterpretq_s16_u16(vaddl_u8(t2, t4));
- res3 = vreinterpretq_s16_u16(vmovl_u8(t3));
- res4 = wiener_convolve8_horiz_8x8(res0, res1, res2, res3, filter_x_tmp,
- bd, conv_params->round_0);
-
- vst1q_u16(d_tmp, res4);
-
- s += 8;
- d_tmp += 8;
- width -= 8;
- } while (width > 0);
- src_ptr += src_stride;
- dst_ptr += MAX_SB_SIZE;
- height--;
- } while (height > 0);
-#endif
- } else {
- /*if height is a multiple of 4*/
- const uint8_t *s;
- int16x8_t tt0, tt1, tt2, tt3;
- uint16x8_t d0;
- uint8x8_t t0, t1, t2, t3;
-
-#if defined(__aarch64__)
- uint16x4_t res0, res1, res2, res3, res4, res5, res6, res7;
- uint16x8_t d1, d2, d3;
- int16x4_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10;
- int16x4_t s11, s12, s13, s14;
- do {
- __builtin_prefetch(src_ptr + 0 * src_stride);
- __builtin_prefetch(src_ptr + 1 * src_stride);
- __builtin_prefetch(src_ptr + 2 * src_stride);
- __builtin_prefetch(src_ptr + 3 * src_stride);
-
- load_u8_8x4(src_ptr, src_stride, &t0, &t1, &t2, &t3); /*8x4*/
- transpose_u8_8x4(&t0, &t1, &t2,
- &t3); /*first 8 pixels of 4 rows transposed-- 4x8*/
-
- tt0 = vreinterpretq_s16_u16(vmovl_u8(t0));
- tt1 = vreinterpretq_s16_u16(vmovl_u8(t1));
- tt2 = vreinterpretq_s16_u16(vmovl_u8(t2));
- tt3 = vreinterpretq_s16_u16(vmovl_u8(t3));
-
- s0 = vget_low_s16(tt0); /*pa0 pb0 pc0 pd0 -- pixel_a0*/
- s1 = vget_low_s16(tt1); /*pa1 pb1 pc1 pd1 */
- s2 = vget_low_s16(tt2); /*pa2 pb2 pc2 pd2 */
- s3 = vget_low_s16(tt3); /*pa3 pb3 pc3 pd3 */
- s4 = vget_high_s16(tt0); /*pa4 pb4 pc4 pd4 */
- s5 = vget_high_s16(tt1); /*pa5 pb5 pc5 pd5 */
- s6 = vget_high_s16(tt2); /*pa6 pb6 pc6 pd6 */
-
- __builtin_prefetch(dst_ptr + 0 * dst_stride);
- __builtin_prefetch(dst_ptr + 1 * dst_stride);
- __builtin_prefetch(dst_ptr + 2 * dst_stride);
- __builtin_prefetch(dst_ptr + 3 * dst_stride);
-
- s = src_ptr + 7;
- d_tmp = dst_ptr;
- width = w;
-
- do {
- load_u8_8x4(s, src_stride, &t0, &t1, &t2, &t3); /*8x4*/
- transpose_u8_8x4(&t0, &t1, &t2, &t3);
-
- tt0 = vreinterpretq_s16_u16(vmovl_u8(t0));
- tt1 = vreinterpretq_s16_u16(vmovl_u8(t1));
- tt2 = vreinterpretq_s16_u16(vmovl_u8(t2));
- tt3 = vreinterpretq_s16_u16(vmovl_u8(t3));
-
- s7 = vget_low_s16(tt0); /*pa7 pb7 pc7 pd7 */ /*4x8*/
- s8 = vget_low_s16(tt1); /*pa8 pb8 pc8 pd8 */
- s9 = vget_low_s16(tt2); /*pa9 pb9 pc9 pd9 */
- s10 = vget_low_s16(tt3); /*pa10 pb10 pc10 pd10 */
- s11 = vget_high_s16(tt0); /*pa11 pb11 pc11 pd11 */
- s12 = vget_high_s16(tt1); /*pa12 pb12 pc12 pd12 */
- s13 = vget_high_s16(tt2); /*pa13 pb13 pc13 pd13 */
- s14 = vget_high_s16(tt3); /*pa14 pb14 pc14 pd14 */
-
- res0 = wiener_convolve8_horiz_4x8(
- s0, s1, s2, s3, s4, s5, s6, filter_x_tmp, bd, conv_params->round_0);
- res1 = wiener_convolve8_horiz_4x8(
- s1, s2, s3, s4, s5, s6, s7, filter_x_tmp, bd, conv_params->round_0);
- res2 = wiener_convolve8_horiz_4x8(
- s2, s3, s4, s5, s6, s7, s8, filter_x_tmp, bd, conv_params->round_0);
- res3 = wiener_convolve8_horiz_4x8(
- s3, s4, s5, s6, s7, s8, s9, filter_x_tmp, bd, conv_params->round_0);
- res4 =
- wiener_convolve8_horiz_4x8(s4, s5, s6, s7, s8, s9, s10,
- filter_x_tmp, bd, conv_params->round_0);
- res5 =
- wiener_convolve8_horiz_4x8(s5, s6, s7, s8, s9, s10, s11,
- filter_x_tmp, bd, conv_params->round_0);
- res6 =
- wiener_convolve8_horiz_4x8(s6, s7, s8, s9, s10, s11, s12,
- filter_x_tmp, bd, conv_params->round_0);
- res7 =
- wiener_convolve8_horiz_4x8(s7, s8, s9, s10, s11, s12, s13,
- filter_x_tmp, bd, conv_params->round_0);
-
- transpose_u16_4x8(&res0, &res1, &res2, &res3, &res4, &res5, &res6,
- &res7, &d0, &d1, &d2, &d3);
-
- store_u16_8x4(d_tmp, MAX_SB_SIZE, d0, d1, d2, d3);
-
- s0 = s8;
- s1 = s9;
- s2 = s10;
- s3 = s11;
- s4 = s12;
- s5 = s13;
- s6 = s14;
- s += 8;
- d_tmp += 8;
- width -= 8;
- } while (width > 0);
-
- src_ptr += 4 * src_stride;
- dst_ptr += 4 * MAX_SB_SIZE;
- height -= 4;
- } while (height > 0);
-#else
- uint8x8_t temp_0, t4, t5, t6, t7;
-
- do {
- __builtin_prefetch(src_ptr);
-
- t0 = vld1_u8(src_ptr); // a0 a1 a2 a3 a4 a5 a6 a7
-
- __builtin_prefetch(dst_ptr);
-
- s = src_ptr + 8;
- d_tmp = dst_ptr;
- width = w;
-
- do {
- t7 = vld1_u8(s); // a8 a9 a10 a11 a12 a13 a14 a15
- temp_0 = t0;
- t0 = t7;
-
- t1 = vext_u8(temp_0, t7, 1); // a1 a2 a3 a4 a5 a6 a7 a8
- t2 = vext_u8(temp_0, t7, 2); // a2 a3 a4 a5 a6 a7 a8 a9
- t3 = vext_u8(temp_0, t7, 3); // a3 a4 a5 a6 a7 a8 a9 a10
- t4 = vext_u8(temp_0, t7, 4); // a4 a5 a6 a7 a8 a9 a10 a11
- t5 = vext_u8(temp_0, t7, 5); // a5 a6 a7 a8 a9 a10 a11 a12
- t6 = vext_u8(temp_0, t7, 6); // a6 a7 a8 a9 a10 a11 a12 a13
- t7 = vext_u8(temp_0, t7, 7); // a7 a8 a9 a10 a11 a12 a13 a14
-
- tt0 = vreinterpretq_s16_u16(vaddl_u8(temp_0, t6));
- tt1 = vreinterpretq_s16_u16(vaddl_u8(t1, t5));
- tt2 = vreinterpretq_s16_u16(vaddl_u8(t2, t4));
- tt3 = vreinterpretq_s16_u16(vmovl_u8(t3));
- d0 = wiener_convolve8_horiz_8x8(tt0, tt1, tt2, tt3, filter_x_tmp, bd,
- conv_params->round_0);
-
- vst1q_u16(d_tmp, d0);
-
- s += 8;
- d_tmp += 8;
- width -= 8;
- } while (width > 0);
-
- src_ptr += src_stride;
- dst_ptr += MAX_SB_SIZE;
- height -= 1;
- } while (height > 0);
-#endif
- }
-
- {
- int16x8_t s0, s1, s2, s3, s4, s5, s6, s7;
- uint8x8_t t0;
-#if defined(__aarch64__)
- int16x8_t s8, s9, s10;
- uint8x8_t t1, t2, t3;
-#endif
- int16_t *src_tmp_ptr, *s;
- uint8_t *dst_tmp_ptr;
- height = h;
- width = w;
- src_tmp_ptr = (int16_t *)temp;
- dst_tmp_ptr = dst;
- src_stride = MAX_SB_SIZE;
-
- do {
- s = src_tmp_ptr;
- s0 = vld1q_s16(s);
- s += src_stride;
- s1 = vld1q_s16(s);
- s += src_stride;
- s2 = vld1q_s16(s);
- s += src_stride;
- s3 = vld1q_s16(s);
- s += src_stride;
- s4 = vld1q_s16(s);
- s += src_stride;
- s5 = vld1q_s16(s);
- s += src_stride;
- s6 = vld1q_s16(s);
- s += src_stride;
- d = dst_tmp_ptr;
- height = h;
-
-#if defined(__aarch64__)
- do {
- __builtin_prefetch(dst_tmp_ptr + 0 * dst_stride);
- __builtin_prefetch(dst_tmp_ptr + 1 * dst_stride);
- __builtin_prefetch(dst_tmp_ptr + 2 * dst_stride);
- __builtin_prefetch(dst_tmp_ptr + 3 * dst_stride);
-
- s7 = vld1q_s16(s);
- s += src_stride;
- s8 = vld1q_s16(s);
- s += src_stride;
- s9 = vld1q_s16(s);
- s += src_stride;
- s10 = vld1q_s16(s);
- s += src_stride;
-
- t0 = wiener_convolve8_vert_4x8(s0, s1, s2, s3, s4, s5, s6, filter_y_tmp,
- bd, conv_params->round_1);
- t1 = wiener_convolve8_vert_4x8(s1, s2, s3, s4, s5, s6, s7, filter_y_tmp,
- bd, conv_params->round_1);
- t2 = wiener_convolve8_vert_4x8(s2, s3, s4, s5, s6, s7, s8, filter_y_tmp,
- bd, conv_params->round_1);
- t3 = wiener_convolve8_vert_4x8(s3, s4, s5, s6, s7, s8, s9, filter_y_tmp,
- bd, conv_params->round_1);
-
- vst1_u8(d, t0);
- d += dst_stride;
- vst1_u8(d, t1);
- d += dst_stride;
- vst1_u8(d, t2);
- d += dst_stride;
- vst1_u8(d, t3);
- d += dst_stride;
-
- s0 = s4;
- s1 = s5;
- s2 = s6;
- s3 = s7;
- s4 = s8;
- s5 = s9;
- s6 = s10;
- height -= 4;
- } while (height > 3);
-
- if (height != 0) {
- __builtin_prefetch(dst_tmp_ptr + 0 * dst_stride);
- __builtin_prefetch(dst_tmp_ptr + 1 * dst_stride);
-
- do {
- s7 = vld1q_s16(s);
- s += src_stride;
-
- t0 =
- wiener_convolve8_vert_4x8(s0, s1, s2, s3, s4, s5, s6,
- filter_y_tmp, bd, conv_params->round_1);
- vst1_u8(d, t0);
- d += dst_stride;
-
- s0 = s1;
- s1 = s2;
- s2 = s3;
- s3 = s4;
- s4 = s5;
- s5 = s6;
- s6 = s7;
- height -= 1;
- } while (height > 0);
- }
-
- src_tmp_ptr += 8;
- dst_tmp_ptr += 8;
-
- w -= 8;
- } while (w > 0);
-#else
- do {
- __builtin_prefetch(dst_tmp_ptr + 0 * dst_stride);
-
- s7 = vld1q_s16(s);
- s += src_stride;
-
- t0 = wiener_convolve8_vert_4x8(s0, s1, s2, s3, s4, s5, s6, filter_y_tmp,
- bd, conv_params->round_1);
-
- vst1_u8(d, t0);
- d += dst_stride;
-
- s0 = s1;
- s1 = s2;
- s2 = s3;
- s3 = s4;
- s4 = s5;
- s5 = s6;
- s6 = s7;
- height -= 1;
- } while (height > 0);
-
- src_tmp_ptr += 8;
- dst_tmp_ptr += 8;
-
- w -= 8;
- } while (w > 0);
-#endif
- }
-}
diff --git a/third_party/aom/av1/common/av1_inv_txfm1d.c b/third_party/aom/av1/common/av1_inv_txfm1d.c
deleted file mode 100644
index 7ef2d6d7f..000000000
--- a/third_party/aom/av1/common/av1_inv_txfm1d.c
+++ /dev/null
@@ -1,1846 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <stdlib.h>
-#include "av1/common/av1_inv_txfm1d.h"
-#include "av1/common/av1_txfm.h"
-
-// TODO(angiebird): Make 1-d txfm functions static
-//
-
-void av1_idct4_new(const int32_t *input, int32_t *output, int8_t cos_bit,
- const int8_t *stage_range) {
- assert(output != input);
- const int32_t size = 4;
- const int32_t *cospi = cospi_arr(cos_bit);
-
- int32_t stage = 0;
- int32_t *bf0, *bf1;
- int32_t step[4];
-
- // stage 0;
-
- // stage 1;
- stage++;
- bf1 = output;
- bf1[0] = input[0];
- bf1[1] = input[2];
- bf1[2] = input[1];
- bf1[3] = input[3];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 2
- stage++;
- bf0 = output;
- bf1 = step;
- bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit);
- bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit);
- bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit);
- bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit);
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 3
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = clamp_value(bf0[0] + bf0[3], stage_range[stage]);
- bf1[1] = clamp_value(bf0[1] + bf0[2], stage_range[stage]);
- bf1[2] = clamp_value(bf0[1] - bf0[2], stage_range[stage]);
- bf1[3] = clamp_value(bf0[0] - bf0[3], stage_range[stage]);
-}
-
-void av1_idct8_new(const int32_t *input, int32_t *output, int8_t cos_bit,
- const int8_t *stage_range) {
- assert(output != input);
- const int32_t size = 8;
- const int32_t *cospi = cospi_arr(cos_bit);
-
- int32_t stage = 0;
- int32_t *bf0, *bf1;
- int32_t step[8];
-
- // stage 0;
-
- // stage 1;
- stage++;
- bf1 = output;
- bf1[0] = input[0];
- bf1[1] = input[4];
- bf1[2] = input[2];
- bf1[3] = input[6];
- bf1[4] = input[1];
- bf1[5] = input[5];
- bf1[6] = input[3];
- bf1[7] = input[7];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 2
- stage++;
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = bf0[2];
- bf1[3] = bf0[3];
- bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit);
- bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit);
- bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit);
- bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit);
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 3
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit);
- bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit);
- bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit);
- bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit);
- bf1[4] = clamp_value(bf0[4] + bf0[5], stage_range[stage]);
- bf1[5] = clamp_value(bf0[4] - bf0[5], stage_range[stage]);
- bf1[6] = clamp_value(-bf0[6] + bf0[7], stage_range[stage]);
- bf1[7] = clamp_value(bf0[6] + bf0[7], stage_range[stage]);
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 4
- stage++;
- bf0 = output;
- bf1 = step;
- bf1[0] = clamp_value(bf0[0] + bf0[3], stage_range[stage]);
- bf1[1] = clamp_value(bf0[1] + bf0[2], stage_range[stage]);
- bf1[2] = clamp_value(bf0[1] - bf0[2], stage_range[stage]);
- bf1[3] = clamp_value(bf0[0] - bf0[3], stage_range[stage]);
- bf1[4] = bf0[4];
- bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
- bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
- bf1[7] = bf0[7];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 5
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = clamp_value(bf0[0] + bf0[7], stage_range[stage]);
- bf1[1] = clamp_value(bf0[1] + bf0[6], stage_range[stage]);
- bf1[2] = clamp_value(bf0[2] + bf0[5], stage_range[stage]);
- bf1[3] = clamp_value(bf0[3] + bf0[4], stage_range[stage]);
- bf1[4] = clamp_value(bf0[3] - bf0[4], stage_range[stage]);
- bf1[5] = clamp_value(bf0[2] - bf0[5], stage_range[stage]);
- bf1[6] = clamp_value(bf0[1] - bf0[6], stage_range[stage]);
- bf1[7] = clamp_value(bf0[0] - bf0[7], stage_range[stage]);
-}
-
-void av1_idct16_new(const int32_t *input, int32_t *output, int8_t cos_bit,
- const int8_t *stage_range) {
- assert(output != input);
- const int32_t size = 16;
- const int32_t *cospi = cospi_arr(cos_bit);
-
- int32_t stage = 0;
- int32_t *bf0, *bf1;
- int32_t step[16];
-
- // stage 0;
-
- // stage 1;
- stage++;
- bf1 = output;
- bf1[0] = input[0];
- bf1[1] = input[8];
- bf1[2] = input[4];
- bf1[3] = input[12];
- bf1[4] = input[2];
- bf1[5] = input[10];
- bf1[6] = input[6];
- bf1[7] = input[14];
- bf1[8] = input[1];
- bf1[9] = input[9];
- bf1[10] = input[5];
- bf1[11] = input[13];
- bf1[12] = input[3];
- bf1[13] = input[11];
- bf1[14] = input[7];
- bf1[15] = input[15];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 2
- stage++;
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = bf0[2];
- bf1[3] = bf0[3];
- bf1[4] = bf0[4];
- bf1[5] = bf0[5];
- bf1[6] = bf0[6];
- bf1[7] = bf0[7];
- bf1[8] = half_btf(cospi[60], bf0[8], -cospi[4], bf0[15], cos_bit);
- bf1[9] = half_btf(cospi[28], bf0[9], -cospi[36], bf0[14], cos_bit);
- bf1[10] = half_btf(cospi[44], bf0[10], -cospi[20], bf0[13], cos_bit);
- bf1[11] = half_btf(cospi[12], bf0[11], -cospi[52], bf0[12], cos_bit);
- bf1[12] = half_btf(cospi[52], bf0[11], cospi[12], bf0[12], cos_bit);
- bf1[13] = half_btf(cospi[20], bf0[10], cospi[44], bf0[13], cos_bit);
- bf1[14] = half_btf(cospi[36], bf0[9], cospi[28], bf0[14], cos_bit);
- bf1[15] = half_btf(cospi[4], bf0[8], cospi[60], bf0[15], cos_bit);
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 3
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = bf0[2];
- bf1[3] = bf0[3];
- bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit);
- bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit);
- bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit);
- bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit);
- bf1[8] = clamp_value(bf0[8] + bf0[9], stage_range[stage]);
- bf1[9] = clamp_value(bf0[8] - bf0[9], stage_range[stage]);
- bf1[10] = clamp_value(-bf0[10] + bf0[11], stage_range[stage]);
- bf1[11] = clamp_value(bf0[10] + bf0[11], stage_range[stage]);
- bf1[12] = clamp_value(bf0[12] + bf0[13], stage_range[stage]);
- bf1[13] = clamp_value(bf0[12] - bf0[13], stage_range[stage]);
- bf1[14] = clamp_value(-bf0[14] + bf0[15], stage_range[stage]);
- bf1[15] = clamp_value(bf0[14] + bf0[15], stage_range[stage]);
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 4
- stage++;
- bf0 = output;
- bf1 = step;
- bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit);
- bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit);
- bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit);
- bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit);
- bf1[4] = clamp_value(bf0[4] + bf0[5], stage_range[stage]);
- bf1[5] = clamp_value(bf0[4] - bf0[5], stage_range[stage]);
- bf1[6] = clamp_value(-bf0[6] + bf0[7], stage_range[stage]);
- bf1[7] = clamp_value(bf0[6] + bf0[7], stage_range[stage]);
- bf1[8] = bf0[8];
- bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit);
- bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit);
- bf1[11] = bf0[11];
- bf1[12] = bf0[12];
- bf1[13] = half_btf(-cospi[16], bf0[10], cospi[48], bf0[13], cos_bit);
- bf1[14] = half_btf(cospi[48], bf0[9], cospi[16], bf0[14], cos_bit);
- bf1[15] = bf0[15];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 5
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = clamp_value(bf0[0] + bf0[3], stage_range[stage]);
- bf1[1] = clamp_value(bf0[1] + bf0[2], stage_range[stage]);
- bf1[2] = clamp_value(bf0[1] - bf0[2], stage_range[stage]);
- bf1[3] = clamp_value(bf0[0] - bf0[3], stage_range[stage]);
- bf1[4] = bf0[4];
- bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
- bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
- bf1[7] = bf0[7];
- bf1[8] = clamp_value(bf0[8] + bf0[11], stage_range[stage]);
- bf1[9] = clamp_value(bf0[9] + bf0[10], stage_range[stage]);
- bf1[10] = clamp_value(bf0[9] - bf0[10], stage_range[stage]);
- bf1[11] = clamp_value(bf0[8] - bf0[11], stage_range[stage]);
- bf1[12] = clamp_value(-bf0[12] + bf0[15], stage_range[stage]);
- bf1[13] = clamp_value(-bf0[13] + bf0[14], stage_range[stage]);
- bf1[14] = clamp_value(bf0[13] + bf0[14], stage_range[stage]);
- bf1[15] = clamp_value(bf0[12] + bf0[15], stage_range[stage]);
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 6
- stage++;
- bf0 = output;
- bf1 = step;
- bf1[0] = clamp_value(bf0[0] + bf0[7], stage_range[stage]);
- bf1[1] = clamp_value(bf0[1] + bf0[6], stage_range[stage]);
- bf1[2] = clamp_value(bf0[2] + bf0[5], stage_range[stage]);
- bf1[3] = clamp_value(bf0[3] + bf0[4], stage_range[stage]);
- bf1[4] = clamp_value(bf0[3] - bf0[4], stage_range[stage]);
- bf1[5] = clamp_value(bf0[2] - bf0[5], stage_range[stage]);
- bf1[6] = clamp_value(bf0[1] - bf0[6], stage_range[stage]);
- bf1[7] = clamp_value(bf0[0] - bf0[7], stage_range[stage]);
- bf1[8] = bf0[8];
- bf1[9] = bf0[9];
- bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit);
- bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit);
- bf1[12] = half_btf(cospi[32], bf0[11], cospi[32], bf0[12], cos_bit);
- bf1[13] = half_btf(cospi[32], bf0[10], cospi[32], bf0[13], cos_bit);
- bf1[14] = bf0[14];
- bf1[15] = bf0[15];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 7
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = clamp_value(bf0[0] + bf0[15], stage_range[stage]);
- bf1[1] = clamp_value(bf0[1] + bf0[14], stage_range[stage]);
- bf1[2] = clamp_value(bf0[2] + bf0[13], stage_range[stage]);
- bf1[3] = clamp_value(bf0[3] + bf0[12], stage_range[stage]);
- bf1[4] = clamp_value(bf0[4] + bf0[11], stage_range[stage]);
- bf1[5] = clamp_value(bf0[5] + bf0[10], stage_range[stage]);
- bf1[6] = clamp_value(bf0[6] + bf0[9], stage_range[stage]);
- bf1[7] = clamp_value(bf0[7] + bf0[8], stage_range[stage]);
- bf1[8] = clamp_value(bf0[7] - bf0[8], stage_range[stage]);
- bf1[9] = clamp_value(bf0[6] - bf0[9], stage_range[stage]);
- bf1[10] = clamp_value(bf0[5] - bf0[10], stage_range[stage]);
- bf1[11] = clamp_value(bf0[4] - bf0[11], stage_range[stage]);
- bf1[12] = clamp_value(bf0[3] - bf0[12], stage_range[stage]);
- bf1[13] = clamp_value(bf0[2] - bf0[13], stage_range[stage]);
- bf1[14] = clamp_value(bf0[1] - bf0[14], stage_range[stage]);
- bf1[15] = clamp_value(bf0[0] - bf0[15], stage_range[stage]);
-}
-
-void av1_idct32_new(const int32_t *input, int32_t *output, int8_t cos_bit,
- const int8_t *stage_range) {
- assert(output != input);
- const int32_t size = 32;
- const int32_t *cospi = cospi_arr(cos_bit);
-
- int32_t stage = 0;
- int32_t *bf0, *bf1;
- int32_t step[32];
-
- // stage 0;
-
- // stage 1;
- stage++;
- bf1 = output;
- bf1[0] = input[0];
- bf1[1] = input[16];
- bf1[2] = input[8];
- bf1[3] = input[24];
- bf1[4] = input[4];
- bf1[5] = input[20];
- bf1[6] = input[12];
- bf1[7] = input[28];
- bf1[8] = input[2];
- bf1[9] = input[18];
- bf1[10] = input[10];
- bf1[11] = input[26];
- bf1[12] = input[6];
- bf1[13] = input[22];
- bf1[14] = input[14];
- bf1[15] = input[30];
- bf1[16] = input[1];
- bf1[17] = input[17];
- bf1[18] = input[9];
- bf1[19] = input[25];
- bf1[20] = input[5];
- bf1[21] = input[21];
- bf1[22] = input[13];
- bf1[23] = input[29];
- bf1[24] = input[3];
- bf1[25] = input[19];
- bf1[26] = input[11];
- bf1[27] = input[27];
- bf1[28] = input[7];
- bf1[29] = input[23];
- bf1[30] = input[15];
- bf1[31] = input[31];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 2
- stage++;
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = bf0[2];
- bf1[3] = bf0[3];
- bf1[4] = bf0[4];
- bf1[5] = bf0[5];
- bf1[6] = bf0[6];
- bf1[7] = bf0[7];
- bf1[8] = bf0[8];
- bf1[9] = bf0[9];
- bf1[10] = bf0[10];
- bf1[11] = bf0[11];
- bf1[12] = bf0[12];
- bf1[13] = bf0[13];
- bf1[14] = bf0[14];
- bf1[15] = bf0[15];
- bf1[16] = half_btf(cospi[62], bf0[16], -cospi[2], bf0[31], cos_bit);
- bf1[17] = half_btf(cospi[30], bf0[17], -cospi[34], bf0[30], cos_bit);
- bf1[18] = half_btf(cospi[46], bf0[18], -cospi[18], bf0[29], cos_bit);
- bf1[19] = half_btf(cospi[14], bf0[19], -cospi[50], bf0[28], cos_bit);
- bf1[20] = half_btf(cospi[54], bf0[20], -cospi[10], bf0[27], cos_bit);
- bf1[21] = half_btf(cospi[22], bf0[21], -cospi[42], bf0[26], cos_bit);
- bf1[22] = half_btf(cospi[38], bf0[22], -cospi[26], bf0[25], cos_bit);
- bf1[23] = half_btf(cospi[6], bf0[23], -cospi[58], bf0[24], cos_bit);
- bf1[24] = half_btf(cospi[58], bf0[23], cospi[6], bf0[24], cos_bit);
- bf1[25] = half_btf(cospi[26], bf0[22], cospi[38], bf0[25], cos_bit);
- bf1[26] = half_btf(cospi[42], bf0[21], cospi[22], bf0[26], cos_bit);
- bf1[27] = half_btf(cospi[10], bf0[20], cospi[54], bf0[27], cos_bit);
- bf1[28] = half_btf(cospi[50], bf0[19], cospi[14], bf0[28], cos_bit);
- bf1[29] = half_btf(cospi[18], bf0[18], cospi[46], bf0[29], cos_bit);
- bf1[30] = half_btf(cospi[34], bf0[17], cospi[30], bf0[30], cos_bit);
- bf1[31] = half_btf(cospi[2], bf0[16], cospi[62], bf0[31], cos_bit);
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 3
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = bf0[2];
- bf1[3] = bf0[3];
- bf1[4] = bf0[4];
- bf1[5] = bf0[5];
- bf1[6] = bf0[6];
- bf1[7] = bf0[7];
- bf1[8] = half_btf(cospi[60], bf0[8], -cospi[4], bf0[15], cos_bit);
- bf1[9] = half_btf(cospi[28], bf0[9], -cospi[36], bf0[14], cos_bit);
- bf1[10] = half_btf(cospi[44], bf0[10], -cospi[20], bf0[13], cos_bit);
- bf1[11] = half_btf(cospi[12], bf0[11], -cospi[52], bf0[12], cos_bit);
- bf1[12] = half_btf(cospi[52], bf0[11], cospi[12], bf0[12], cos_bit);
- bf1[13] = half_btf(cospi[20], bf0[10], cospi[44], bf0[13], cos_bit);
- bf1[14] = half_btf(cospi[36], bf0[9], cospi[28], bf0[14], cos_bit);
- bf1[15] = half_btf(cospi[4], bf0[8], cospi[60], bf0[15], cos_bit);
- bf1[16] = clamp_value(bf0[16] + bf0[17], stage_range[stage]);
- bf1[17] = clamp_value(bf0[16] - bf0[17], stage_range[stage]);
- bf1[18] = clamp_value(-bf0[18] + bf0[19], stage_range[stage]);
- bf1[19] = clamp_value(bf0[18] + bf0[19], stage_range[stage]);
- bf1[20] = clamp_value(bf0[20] + bf0[21], stage_range[stage]);
- bf1[21] = clamp_value(bf0[20] - bf0[21], stage_range[stage]);
- bf1[22] = clamp_value(-bf0[22] + bf0[23], stage_range[stage]);
- bf1[23] = clamp_value(bf0[22] + bf0[23], stage_range[stage]);
- bf1[24] = clamp_value(bf0[24] + bf0[25], stage_range[stage]);
- bf1[25] = clamp_value(bf0[24] - bf0[25], stage_range[stage]);
- bf1[26] = clamp_value(-bf0[26] + bf0[27], stage_range[stage]);
- bf1[27] = clamp_value(bf0[26] + bf0[27], stage_range[stage]);
- bf1[28] = clamp_value(bf0[28] + bf0[29], stage_range[stage]);
- bf1[29] = clamp_value(bf0[28] - bf0[29], stage_range[stage]);
- bf1[30] = clamp_value(-bf0[30] + bf0[31], stage_range[stage]);
- bf1[31] = clamp_value(bf0[30] + bf0[31], stage_range[stage]);
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 4
- stage++;
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = bf0[2];
- bf1[3] = bf0[3];
- bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit);
- bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit);
- bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit);
- bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit);
- bf1[8] = clamp_value(bf0[8] + bf0[9], stage_range[stage]);
- bf1[9] = clamp_value(bf0[8] - bf0[9], stage_range[stage]);
- bf1[10] = clamp_value(-bf0[10] + bf0[11], stage_range[stage]);
- bf1[11] = clamp_value(bf0[10] + bf0[11], stage_range[stage]);
- bf1[12] = clamp_value(bf0[12] + bf0[13], stage_range[stage]);
- bf1[13] = clamp_value(bf0[12] - bf0[13], stage_range[stage]);
- bf1[14] = clamp_value(-bf0[14] + bf0[15], stage_range[stage]);
- bf1[15] = clamp_value(bf0[14] + bf0[15], stage_range[stage]);
- bf1[16] = bf0[16];
- bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit);
- bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit);
- bf1[19] = bf0[19];
- bf1[20] = bf0[20];
- bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit);
- bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit);
- bf1[23] = bf0[23];
- bf1[24] = bf0[24];
- bf1[25] = half_btf(-cospi[40], bf0[22], cospi[24], bf0[25], cos_bit);
- bf1[26] = half_btf(cospi[24], bf0[21], cospi[40], bf0[26], cos_bit);
- bf1[27] = bf0[27];
- bf1[28] = bf0[28];
- bf1[29] = half_btf(-cospi[8], bf0[18], cospi[56], bf0[29], cos_bit);
- bf1[30] = half_btf(cospi[56], bf0[17], cospi[8], bf0[30], cos_bit);
- bf1[31] = bf0[31];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 5
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit);
- bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit);
- bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit);
- bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit);
- bf1[4] = clamp_value(bf0[4] + bf0[5], stage_range[stage]);
- bf1[5] = clamp_value(bf0[4] - bf0[5], stage_range[stage]);
- bf1[6] = clamp_value(-bf0[6] + bf0[7], stage_range[stage]);
- bf1[7] = clamp_value(bf0[6] + bf0[7], stage_range[stage]);
- bf1[8] = bf0[8];
- bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit);
- bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit);
- bf1[11] = bf0[11];
- bf1[12] = bf0[12];
- bf1[13] = half_btf(-cospi[16], bf0[10], cospi[48], bf0[13], cos_bit);
- bf1[14] = half_btf(cospi[48], bf0[9], cospi[16], bf0[14], cos_bit);
- bf1[15] = bf0[15];
- bf1[16] = clamp_value(bf0[16] + bf0[19], stage_range[stage]);
- bf1[17] = clamp_value(bf0[17] + bf0[18], stage_range[stage]);
- bf1[18] = clamp_value(bf0[17] - bf0[18], stage_range[stage]);
- bf1[19] = clamp_value(bf0[16] - bf0[19], stage_range[stage]);
- bf1[20] = clamp_value(-bf0[20] + bf0[23], stage_range[stage]);
- bf1[21] = clamp_value(-bf0[21] + bf0[22], stage_range[stage]);
- bf1[22] = clamp_value(bf0[21] + bf0[22], stage_range[stage]);
- bf1[23] = clamp_value(bf0[20] + bf0[23], stage_range[stage]);
- bf1[24] = clamp_value(bf0[24] + bf0[27], stage_range[stage]);
- bf1[25] = clamp_value(bf0[25] + bf0[26], stage_range[stage]);
- bf1[26] = clamp_value(bf0[25] - bf0[26], stage_range[stage]);
- bf1[27] = clamp_value(bf0[24] - bf0[27], stage_range[stage]);
- bf1[28] = clamp_value(-bf0[28] + bf0[31], stage_range[stage]);
- bf1[29] = clamp_value(-bf0[29] + bf0[30], stage_range[stage]);
- bf1[30] = clamp_value(bf0[29] + bf0[30], stage_range[stage]);
- bf1[31] = clamp_value(bf0[28] + bf0[31], stage_range[stage]);
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 6
- stage++;
- bf0 = output;
- bf1 = step;
- bf1[0] = clamp_value(bf0[0] + bf0[3], stage_range[stage]);
- bf1[1] = clamp_value(bf0[1] + bf0[2], stage_range[stage]);
- bf1[2] = clamp_value(bf0[1] - bf0[2], stage_range[stage]);
- bf1[3] = clamp_value(bf0[0] - bf0[3], stage_range[stage]);
- bf1[4] = bf0[4];
- bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
- bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
- bf1[7] = bf0[7];
- bf1[8] = clamp_value(bf0[8] + bf0[11], stage_range[stage]);
- bf1[9] = clamp_value(bf0[9] + bf0[10], stage_range[stage]);
- bf1[10] = clamp_value(bf0[9] - bf0[10], stage_range[stage]);
- bf1[11] = clamp_value(bf0[8] - bf0[11], stage_range[stage]);
- bf1[12] = clamp_value(-bf0[12] + bf0[15], stage_range[stage]);
- bf1[13] = clamp_value(-bf0[13] + bf0[14], stage_range[stage]);
- bf1[14] = clamp_value(bf0[13] + bf0[14], stage_range[stage]);
- bf1[15] = clamp_value(bf0[12] + bf0[15], stage_range[stage]);
- bf1[16] = bf0[16];
- bf1[17] = bf0[17];
- bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit);
- bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit);
- bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit);
- bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit);
- bf1[22] = bf0[22];
- bf1[23] = bf0[23];
- bf1[24] = bf0[24];
- bf1[25] = bf0[25];
- bf1[26] = half_btf(-cospi[16], bf0[21], cospi[48], bf0[26], cos_bit);
- bf1[27] = half_btf(-cospi[16], bf0[20], cospi[48], bf0[27], cos_bit);
- bf1[28] = half_btf(cospi[48], bf0[19], cospi[16], bf0[28], cos_bit);
- bf1[29] = half_btf(cospi[48], bf0[18], cospi[16], bf0[29], cos_bit);
- bf1[30] = bf0[30];
- bf1[31] = bf0[31];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 7
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = clamp_value(bf0[0] + bf0[7], stage_range[stage]);
- bf1[1] = clamp_value(bf0[1] + bf0[6], stage_range[stage]);
- bf1[2] = clamp_value(bf0[2] + bf0[5], stage_range[stage]);
- bf1[3] = clamp_value(bf0[3] + bf0[4], stage_range[stage]);
- bf1[4] = clamp_value(bf0[3] - bf0[4], stage_range[stage]);
- bf1[5] = clamp_value(bf0[2] - bf0[5], stage_range[stage]);
- bf1[6] = clamp_value(bf0[1] - bf0[6], stage_range[stage]);
- bf1[7] = clamp_value(bf0[0] - bf0[7], stage_range[stage]);
- bf1[8] = bf0[8];
- bf1[9] = bf0[9];
- bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit);
- bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit);
- bf1[12] = half_btf(cospi[32], bf0[11], cospi[32], bf0[12], cos_bit);
- bf1[13] = half_btf(cospi[32], bf0[10], cospi[32], bf0[13], cos_bit);
- bf1[14] = bf0[14];
- bf1[15] = bf0[15];
- bf1[16] = clamp_value(bf0[16] + bf0[23], stage_range[stage]);
- bf1[17] = clamp_value(bf0[17] + bf0[22], stage_range[stage]);
- bf1[18] = clamp_value(bf0[18] + bf0[21], stage_range[stage]);
- bf1[19] = clamp_value(bf0[19] + bf0[20], stage_range[stage]);
- bf1[20] = clamp_value(bf0[19] - bf0[20], stage_range[stage]);
- bf1[21] = clamp_value(bf0[18] - bf0[21], stage_range[stage]);
- bf1[22] = clamp_value(bf0[17] - bf0[22], stage_range[stage]);
- bf1[23] = clamp_value(bf0[16] - bf0[23], stage_range[stage]);
- bf1[24] = clamp_value(-bf0[24] + bf0[31], stage_range[stage]);
- bf1[25] = clamp_value(-bf0[25] + bf0[30], stage_range[stage]);
- bf1[26] = clamp_value(-bf0[26] + bf0[29], stage_range[stage]);
- bf1[27] = clamp_value(-bf0[27] + bf0[28], stage_range[stage]);
- bf1[28] = clamp_value(bf0[27] + bf0[28], stage_range[stage]);
- bf1[29] = clamp_value(bf0[26] + bf0[29], stage_range[stage]);
- bf1[30] = clamp_value(bf0[25] + bf0[30], stage_range[stage]);
- bf1[31] = clamp_value(bf0[24] + bf0[31], stage_range[stage]);
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 8
- stage++;
- bf0 = output;
- bf1 = step;
- bf1[0] = clamp_value(bf0[0] + bf0[15], stage_range[stage]);
- bf1[1] = clamp_value(bf0[1] + bf0[14], stage_range[stage]);
- bf1[2] = clamp_value(bf0[2] + bf0[13], stage_range[stage]);
- bf1[3] = clamp_value(bf0[3] + bf0[12], stage_range[stage]);
- bf1[4] = clamp_value(bf0[4] + bf0[11], stage_range[stage]);
- bf1[5] = clamp_value(bf0[5] + bf0[10], stage_range[stage]);
- bf1[6] = clamp_value(bf0[6] + bf0[9], stage_range[stage]);
- bf1[7] = clamp_value(bf0[7] + bf0[8], stage_range[stage]);
- bf1[8] = clamp_value(bf0[7] - bf0[8], stage_range[stage]);
- bf1[9] = clamp_value(bf0[6] - bf0[9], stage_range[stage]);
- bf1[10] = clamp_value(bf0[5] - bf0[10], stage_range[stage]);
- bf1[11] = clamp_value(bf0[4] - bf0[11], stage_range[stage]);
- bf1[12] = clamp_value(bf0[3] - bf0[12], stage_range[stage]);
- bf1[13] = clamp_value(bf0[2] - bf0[13], stage_range[stage]);
- bf1[14] = clamp_value(bf0[1] - bf0[14], stage_range[stage]);
- bf1[15] = clamp_value(bf0[0] - bf0[15], stage_range[stage]);
- bf1[16] = bf0[16];
- bf1[17] = bf0[17];
- bf1[18] = bf0[18];
- bf1[19] = bf0[19];
- bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit);
- bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit);
- bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit);
- bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit);
- bf1[24] = half_btf(cospi[32], bf0[23], cospi[32], bf0[24], cos_bit);
- bf1[25] = half_btf(cospi[32], bf0[22], cospi[32], bf0[25], cos_bit);
- bf1[26] = half_btf(cospi[32], bf0[21], cospi[32], bf0[26], cos_bit);
- bf1[27] = half_btf(cospi[32], bf0[20], cospi[32], bf0[27], cos_bit);
- bf1[28] = bf0[28];
- bf1[29] = bf0[29];
- bf1[30] = bf0[30];
- bf1[31] = bf0[31];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 9
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = clamp_value(bf0[0] + bf0[31], stage_range[stage]);
- bf1[1] = clamp_value(bf0[1] + bf0[30], stage_range[stage]);
- bf1[2] = clamp_value(bf0[2] + bf0[29], stage_range[stage]);
- bf1[3] = clamp_value(bf0[3] + bf0[28], stage_range[stage]);
- bf1[4] = clamp_value(bf0[4] + bf0[27], stage_range[stage]);
- bf1[5] = clamp_value(bf0[5] + bf0[26], stage_range[stage]);
- bf1[6] = clamp_value(bf0[6] + bf0[25], stage_range[stage]);
- bf1[7] = clamp_value(bf0[7] + bf0[24], stage_range[stage]);
- bf1[8] = clamp_value(bf0[8] + bf0[23], stage_range[stage]);
- bf1[9] = clamp_value(bf0[9] + bf0[22], stage_range[stage]);
- bf1[10] = clamp_value(bf0[10] + bf0[21], stage_range[stage]);
- bf1[11] = clamp_value(bf0[11] + bf0[20], stage_range[stage]);
- bf1[12] = clamp_value(bf0[12] + bf0[19], stage_range[stage]);
- bf1[13] = clamp_value(bf0[13] + bf0[18], stage_range[stage]);
- bf1[14] = clamp_value(bf0[14] + bf0[17], stage_range[stage]);
- bf1[15] = clamp_value(bf0[15] + bf0[16], stage_range[stage]);
- bf1[16] = clamp_value(bf0[15] - bf0[16], stage_range[stage]);
- bf1[17] = clamp_value(bf0[14] - bf0[17], stage_range[stage]);
- bf1[18] = clamp_value(bf0[13] - bf0[18], stage_range[stage]);
- bf1[19] = clamp_value(bf0[12] - bf0[19], stage_range[stage]);
- bf1[20] = clamp_value(bf0[11] - bf0[20], stage_range[stage]);
- bf1[21] = clamp_value(bf0[10] - bf0[21], stage_range[stage]);
- bf1[22] = clamp_value(bf0[9] - bf0[22], stage_range[stage]);
- bf1[23] = clamp_value(bf0[8] - bf0[23], stage_range[stage]);
- bf1[24] = clamp_value(bf0[7] - bf0[24], stage_range[stage]);
- bf1[25] = clamp_value(bf0[6] - bf0[25], stage_range[stage]);
- bf1[26] = clamp_value(bf0[5] - bf0[26], stage_range[stage]);
- bf1[27] = clamp_value(bf0[4] - bf0[27], stage_range[stage]);
- bf1[28] = clamp_value(bf0[3] - bf0[28], stage_range[stage]);
- bf1[29] = clamp_value(bf0[2] - bf0[29], stage_range[stage]);
- bf1[30] = clamp_value(bf0[1] - bf0[30], stage_range[stage]);
- bf1[31] = clamp_value(bf0[0] - bf0[31], stage_range[stage]);
-}
-
-void av1_iadst4_new(const int32_t *input, int32_t *output, int8_t cos_bit,
- const int8_t *stage_range) {
- int bit = cos_bit;
- const int32_t *sinpi = sinpi_arr(bit);
- int32_t s0, s1, s2, s3, s4, s5, s6, s7;
-
- int32_t x0 = input[0];
- int32_t x1 = input[1];
- int32_t x2 = input[2];
- int32_t x3 = input[3];
-
- if (!(x0 | x1 | x2 | x3)) {
- output[0] = output[1] = output[2] = output[3] = 0;
- return;
- }
-
- assert(sinpi[1] + sinpi[2] == sinpi[4]);
-
- // stage 1
- s0 = range_check_value(sinpi[1] * x0, stage_range[1] + bit);
- s1 = range_check_value(sinpi[2] * x0, stage_range[1] + bit);
- s2 = range_check_value(sinpi[3] * x1, stage_range[1] + bit);
- s3 = range_check_value(sinpi[4] * x2, stage_range[1] + bit);
- s4 = range_check_value(sinpi[1] * x2, stage_range[1] + bit);
- s5 = range_check_value(sinpi[2] * x3, stage_range[1] + bit);
- s6 = range_check_value(sinpi[4] * x3, stage_range[1] + bit);
-
- // stage 2
- // NOTICE: (x0 - x2) here may use one extra bit compared to the
- // opt_range_row/col specified in av1_gen_inv_stage_range()
- s7 = range_check_value((x0 - x2) + x3, stage_range[2]);
-
- // stage 3
- s0 = range_check_value(s0 + s3, stage_range[3] + bit);
- s1 = range_check_value(s1 - s4, stage_range[3] + bit);
- s3 = range_check_value(s2, stage_range[3] + bit);
- s2 = range_check_value(sinpi[3] * s7, stage_range[3] + bit);
-
- // stage 4
- s0 = range_check_value(s0 + s5, stage_range[4] + bit);
- s1 = range_check_value(s1 - s6, stage_range[4] + bit);
-
- // stage 5
- x0 = range_check_value(s0 + s3, stage_range[5] + bit);
- x1 = range_check_value(s1 + s3, stage_range[5] + bit);
- x2 = range_check_value(s2, stage_range[5] + bit);
- x3 = range_check_value(s0 + s1, stage_range[5] + bit);
-
- // stage 6
- x3 = range_check_value(x3 - s3, stage_range[6] + bit);
-
- output[0] = round_shift(x0, bit);
- output[1] = round_shift(x1, bit);
- output[2] = round_shift(x2, bit);
- output[3] = round_shift(x3, bit);
-}
-
-void av1_iadst8_new(const int32_t *input, int32_t *output, int8_t cos_bit,
- const int8_t *stage_range) {
- assert(output != input);
- const int32_t size = 8;
- const int32_t *cospi = cospi_arr(cos_bit);
-
- int32_t stage = 0;
- int32_t *bf0, *bf1;
- int32_t step[8];
-
- // stage 0;
-
- // stage 1;
- stage++;
- bf1 = output;
- bf1[0] = input[7];
- bf1[1] = input[0];
- bf1[2] = input[5];
- bf1[3] = input[2];
- bf1[4] = input[3];
- bf1[5] = input[4];
- bf1[6] = input[1];
- bf1[7] = input[6];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 2
- stage++;
- bf0 = output;
- bf1 = step;
- bf1[0] = half_btf(cospi[4], bf0[0], cospi[60], bf0[1], cos_bit);
- bf1[1] = half_btf(cospi[60], bf0[0], -cospi[4], bf0[1], cos_bit);
- bf1[2] = half_btf(cospi[20], bf0[2], cospi[44], bf0[3], cos_bit);
- bf1[3] = half_btf(cospi[44], bf0[2], -cospi[20], bf0[3], cos_bit);
- bf1[4] = half_btf(cospi[36], bf0[4], cospi[28], bf0[5], cos_bit);
- bf1[5] = half_btf(cospi[28], bf0[4], -cospi[36], bf0[5], cos_bit);
- bf1[6] = half_btf(cospi[52], bf0[6], cospi[12], bf0[7], cos_bit);
- bf1[7] = half_btf(cospi[12], bf0[6], -cospi[52], bf0[7], cos_bit);
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 3
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = clamp_value(bf0[0] + bf0[4], stage_range[stage]);
- bf1[1] = clamp_value(bf0[1] + bf0[5], stage_range[stage]);
- bf1[2] = clamp_value(bf0[2] + bf0[6], stage_range[stage]);
- bf1[3] = clamp_value(bf0[3] + bf0[7], stage_range[stage]);
- bf1[4] = clamp_value(bf0[0] - bf0[4], stage_range[stage]);
- bf1[5] = clamp_value(bf0[1] - bf0[5], stage_range[stage]);
- bf1[6] = clamp_value(bf0[2] - bf0[6], stage_range[stage]);
- bf1[7] = clamp_value(bf0[3] - bf0[7], stage_range[stage]);
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 4
- stage++;
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = bf0[2];
- bf1[3] = bf0[3];
- bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit);
- bf1[5] = half_btf(cospi[48], bf0[4], -cospi[16], bf0[5], cos_bit);
- bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit);
- bf1[7] = half_btf(cospi[16], bf0[6], cospi[48], bf0[7], cos_bit);
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 5
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = clamp_value(bf0[0] + bf0[2], stage_range[stage]);
- bf1[1] = clamp_value(bf0[1] + bf0[3], stage_range[stage]);
- bf1[2] = clamp_value(bf0[0] - bf0[2], stage_range[stage]);
- bf1[3] = clamp_value(bf0[1] - bf0[3], stage_range[stage]);
- bf1[4] = clamp_value(bf0[4] + bf0[6], stage_range[stage]);
- bf1[5] = clamp_value(bf0[5] + bf0[7], stage_range[stage]);
- bf1[6] = clamp_value(bf0[4] - bf0[6], stage_range[stage]);
- bf1[7] = clamp_value(bf0[5] - bf0[7], stage_range[stage]);
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 6
- stage++;
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit);
- bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit);
- bf1[4] = bf0[4];
- bf1[5] = bf0[5];
- bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit);
- bf1[7] = half_btf(cospi[32], bf0[6], -cospi[32], bf0[7], cos_bit);
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 7
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0];
- bf1[1] = -bf0[4];
- bf1[2] = bf0[6];
- bf1[3] = -bf0[2];
- bf1[4] = bf0[3];
- bf1[5] = -bf0[7];
- bf1[6] = bf0[5];
- bf1[7] = -bf0[1];
-}
-
-void av1_iadst16_new(const int32_t *input, int32_t *output, int8_t cos_bit,
- const int8_t *stage_range) {
- assert(output != input);
- const int32_t size = 16;
- const int32_t *cospi = cospi_arr(cos_bit);
-
- int32_t stage = 0;
- int32_t *bf0, *bf1;
- int32_t step[16];
-
- // stage 0;
-
- // stage 1;
- stage++;
- bf1 = output;
- bf1[0] = input[15];
- bf1[1] = input[0];
- bf1[2] = input[13];
- bf1[3] = input[2];
- bf1[4] = input[11];
- bf1[5] = input[4];
- bf1[6] = input[9];
- bf1[7] = input[6];
- bf1[8] = input[7];
- bf1[9] = input[8];
- bf1[10] = input[5];
- bf1[11] = input[10];
- bf1[12] = input[3];
- bf1[13] = input[12];
- bf1[14] = input[1];
- bf1[15] = input[14];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 2
- stage++;
- bf0 = output;
- bf1 = step;
- bf1[0] = half_btf(cospi[2], bf0[0], cospi[62], bf0[1], cos_bit);
- bf1[1] = half_btf(cospi[62], bf0[0], -cospi[2], bf0[1], cos_bit);
- bf1[2] = half_btf(cospi[10], bf0[2], cospi[54], bf0[3], cos_bit);
- bf1[3] = half_btf(cospi[54], bf0[2], -cospi[10], bf0[3], cos_bit);
- bf1[4] = half_btf(cospi[18], bf0[4], cospi[46], bf0[5], cos_bit);
- bf1[5] = half_btf(cospi[46], bf0[4], -cospi[18], bf0[5], cos_bit);
- bf1[6] = half_btf(cospi[26], bf0[6], cospi[38], bf0[7], cos_bit);
- bf1[7] = half_btf(cospi[38], bf0[6], -cospi[26], bf0[7], cos_bit);
- bf1[8] = half_btf(cospi[34], bf0[8], cospi[30], bf0[9], cos_bit);
- bf1[9] = half_btf(cospi[30], bf0[8], -cospi[34], bf0[9], cos_bit);
- bf1[10] = half_btf(cospi[42], bf0[10], cospi[22], bf0[11], cos_bit);
- bf1[11] = half_btf(cospi[22], bf0[10], -cospi[42], bf0[11], cos_bit);
- bf1[12] = half_btf(cospi[50], bf0[12], cospi[14], bf0[13], cos_bit);
- bf1[13] = half_btf(cospi[14], bf0[12], -cospi[50], bf0[13], cos_bit);
- bf1[14] = half_btf(cospi[58], bf0[14], cospi[6], bf0[15], cos_bit);
- bf1[15] = half_btf(cospi[6], bf0[14], -cospi[58], bf0[15], cos_bit);
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 3
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = clamp_value(bf0[0] + bf0[8], stage_range[stage]);
- bf1[1] = clamp_value(bf0[1] + bf0[9], stage_range[stage]);
- bf1[2] = clamp_value(bf0[2] + bf0[10], stage_range[stage]);
- bf1[3] = clamp_value(bf0[3] + bf0[11], stage_range[stage]);
- bf1[4] = clamp_value(bf0[4] + bf0[12], stage_range[stage]);
- bf1[5] = clamp_value(bf0[5] + bf0[13], stage_range[stage]);
- bf1[6] = clamp_value(bf0[6] + bf0[14], stage_range[stage]);
- bf1[7] = clamp_value(bf0[7] + bf0[15], stage_range[stage]);
- bf1[8] = clamp_value(bf0[0] - bf0[8], stage_range[stage]);
- bf1[9] = clamp_value(bf0[1] - bf0[9], stage_range[stage]);
- bf1[10] = clamp_value(bf0[2] - bf0[10], stage_range[stage]);
- bf1[11] = clamp_value(bf0[3] - bf0[11], stage_range[stage]);
- bf1[12] = clamp_value(bf0[4] - bf0[12], stage_range[stage]);
- bf1[13] = clamp_value(bf0[5] - bf0[13], stage_range[stage]);
- bf1[14] = clamp_value(bf0[6] - bf0[14], stage_range[stage]);
- bf1[15] = clamp_value(bf0[7] - bf0[15], stage_range[stage]);
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 4
- stage++;
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = bf0[2];
- bf1[3] = bf0[3];
- bf1[4] = bf0[4];
- bf1[5] = bf0[5];
- bf1[6] = bf0[6];
- bf1[7] = bf0[7];
- bf1[8] = half_btf(cospi[8], bf0[8], cospi[56], bf0[9], cos_bit);
- bf1[9] = half_btf(cospi[56], bf0[8], -cospi[8], bf0[9], cos_bit);
- bf1[10] = half_btf(cospi[40], bf0[10], cospi[24], bf0[11], cos_bit);
- bf1[11] = half_btf(cospi[24], bf0[10], -cospi[40], bf0[11], cos_bit);
- bf1[12] = half_btf(-cospi[56], bf0[12], cospi[8], bf0[13], cos_bit);
- bf1[13] = half_btf(cospi[8], bf0[12], cospi[56], bf0[13], cos_bit);
- bf1[14] = half_btf(-cospi[24], bf0[14], cospi[40], bf0[15], cos_bit);
- bf1[15] = half_btf(cospi[40], bf0[14], cospi[24], bf0[15], cos_bit);
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 5
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = clamp_value(bf0[0] + bf0[4], stage_range[stage]);
- bf1[1] = clamp_value(bf0[1] + bf0[5], stage_range[stage]);
- bf1[2] = clamp_value(bf0[2] + bf0[6], stage_range[stage]);
- bf1[3] = clamp_value(bf0[3] + bf0[7], stage_range[stage]);
- bf1[4] = clamp_value(bf0[0] - bf0[4], stage_range[stage]);
- bf1[5] = clamp_value(bf0[1] - bf0[5], stage_range[stage]);
- bf1[6] = clamp_value(bf0[2] - bf0[6], stage_range[stage]);
- bf1[7] = clamp_value(bf0[3] - bf0[7], stage_range[stage]);
- bf1[8] = clamp_value(bf0[8] + bf0[12], stage_range[stage]);
- bf1[9] = clamp_value(bf0[9] + bf0[13], stage_range[stage]);
- bf1[10] = clamp_value(bf0[10] + bf0[14], stage_range[stage]);
- bf1[11] = clamp_value(bf0[11] + bf0[15], stage_range[stage]);
- bf1[12] = clamp_value(bf0[8] - bf0[12], stage_range[stage]);
- bf1[13] = clamp_value(bf0[9] - bf0[13], stage_range[stage]);
- bf1[14] = clamp_value(bf0[10] - bf0[14], stage_range[stage]);
- bf1[15] = clamp_value(bf0[11] - bf0[15], stage_range[stage]);
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 6
- stage++;
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = bf0[2];
- bf1[3] = bf0[3];
- bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit);
- bf1[5] = half_btf(cospi[48], bf0[4], -cospi[16], bf0[5], cos_bit);
- bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit);
- bf1[7] = half_btf(cospi[16], bf0[6], cospi[48], bf0[7], cos_bit);
- bf1[8] = bf0[8];
- bf1[9] = bf0[9];
- bf1[10] = bf0[10];
- bf1[11] = bf0[11];
- bf1[12] = half_btf(cospi[16], bf0[12], cospi[48], bf0[13], cos_bit);
- bf1[13] = half_btf(cospi[48], bf0[12], -cospi[16], bf0[13], cos_bit);
- bf1[14] = half_btf(-cospi[48], bf0[14], cospi[16], bf0[15], cos_bit);
- bf1[15] = half_btf(cospi[16], bf0[14], cospi[48], bf0[15], cos_bit);
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 7
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = clamp_value(bf0[0] + bf0[2], stage_range[stage]);
- bf1[1] = clamp_value(bf0[1] + bf0[3], stage_range[stage]);
- bf1[2] = clamp_value(bf0[0] - bf0[2], stage_range[stage]);
- bf1[3] = clamp_value(bf0[1] - bf0[3], stage_range[stage]);
- bf1[4] = clamp_value(bf0[4] + bf0[6], stage_range[stage]);
- bf1[5] = clamp_value(bf0[5] + bf0[7], stage_range[stage]);
- bf1[6] = clamp_value(bf0[4] - bf0[6], stage_range[stage]);
- bf1[7] = clamp_value(bf0[5] - bf0[7], stage_range[stage]);
- bf1[8] = clamp_value(bf0[8] + bf0[10], stage_range[stage]);
- bf1[9] = clamp_value(bf0[9] + bf0[11], stage_range[stage]);
- bf1[10] = clamp_value(bf0[8] - bf0[10], stage_range[stage]);
- bf1[11] = clamp_value(bf0[9] - bf0[11], stage_range[stage]);
- bf1[12] = clamp_value(bf0[12] + bf0[14], stage_range[stage]);
- bf1[13] = clamp_value(bf0[13] + bf0[15], stage_range[stage]);
- bf1[14] = clamp_value(bf0[12] - bf0[14], stage_range[stage]);
- bf1[15] = clamp_value(bf0[13] - bf0[15], stage_range[stage]);
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 8
- stage++;
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit);
- bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit);
- bf1[4] = bf0[4];
- bf1[5] = bf0[5];
- bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit);
- bf1[7] = half_btf(cospi[32], bf0[6], -cospi[32], bf0[7], cos_bit);
- bf1[8] = bf0[8];
- bf1[9] = bf0[9];
- bf1[10] = half_btf(cospi[32], bf0[10], cospi[32], bf0[11], cos_bit);
- bf1[11] = half_btf(cospi[32], bf0[10], -cospi[32], bf0[11], cos_bit);
- bf1[12] = bf0[12];
- bf1[13] = bf0[13];
- bf1[14] = half_btf(cospi[32], bf0[14], cospi[32], bf0[15], cos_bit);
- bf1[15] = half_btf(cospi[32], bf0[14], -cospi[32], bf0[15], cos_bit);
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 9
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0];
- bf1[1] = -bf0[8];
- bf1[2] = bf0[12];
- bf1[3] = -bf0[4];
- bf1[4] = bf0[6];
- bf1[5] = -bf0[14];
- bf1[6] = bf0[10];
- bf1[7] = -bf0[2];
- bf1[8] = bf0[3];
- bf1[9] = -bf0[11];
- bf1[10] = bf0[15];
- bf1[11] = -bf0[7];
- bf1[12] = bf0[5];
- bf1[13] = -bf0[13];
- bf1[14] = bf0[9];
- bf1[15] = -bf0[1];
-}
-
-void av1_iidentity4_c(const int32_t *input, int32_t *output, int8_t cos_bit,
- const int8_t *stage_range) {
- (void)cos_bit;
- (void)stage_range;
- for (int i = 0; i < 4; ++i) {
- output[i] = round_shift((int64_t)NewSqrt2 * input[i], NewSqrt2Bits);
- }
- assert(stage_range[0] + NewSqrt2Bits <= 32);
-}
-
-void av1_iidentity8_c(const int32_t *input, int32_t *output, int8_t cos_bit,
- const int8_t *stage_range) {
- (void)cos_bit;
- (void)stage_range;
- for (int i = 0; i < 8; ++i) output[i] = (int32_t)((int64_t)input[i] * 2);
-}
-
-void av1_iidentity16_c(const int32_t *input, int32_t *output, int8_t cos_bit,
- const int8_t *stage_range) {
- (void)cos_bit;
- (void)stage_range;
- for (int i = 0; i < 16; ++i)
- output[i] = round_shift((int64_t)NewSqrt2 * 2 * input[i], NewSqrt2Bits);
- assert(stage_range[0] + NewSqrt2Bits <= 32);
-}
-
-void av1_iidentity32_c(const int32_t *input, int32_t *output, int8_t cos_bit,
- const int8_t *stage_range) {
- (void)cos_bit;
- (void)stage_range;
- for (int i = 0; i < 32; ++i) output[i] = (int32_t)((int64_t)input[i] * 4);
-}
-
-void av1_idct64_new(const int32_t *input, int32_t *output, int8_t cos_bit,
- const int8_t *stage_range) {
- assert(output != input);
- const int32_t size = 64;
- const int32_t *cospi = cospi_arr(cos_bit);
-
- int32_t stage = 0;
- int32_t *bf0, *bf1;
- int32_t step[64];
-
- // stage 0;
-
- // stage 1;
- stage++;
- bf1 = output;
- bf1[0] = input[0];
- bf1[1] = input[32];
- bf1[2] = input[16];
- bf1[3] = input[48];
- bf1[4] = input[8];
- bf1[5] = input[40];
- bf1[6] = input[24];
- bf1[7] = input[56];
- bf1[8] = input[4];
- bf1[9] = input[36];
- bf1[10] = input[20];
- bf1[11] = input[52];
- bf1[12] = input[12];
- bf1[13] = input[44];
- bf1[14] = input[28];
- bf1[15] = input[60];
- bf1[16] = input[2];
- bf1[17] = input[34];
- bf1[18] = input[18];
- bf1[19] = input[50];
- bf1[20] = input[10];
- bf1[21] = input[42];
- bf1[22] = input[26];
- bf1[23] = input[58];
- bf1[24] = input[6];
- bf1[25] = input[38];
- bf1[26] = input[22];
- bf1[27] = input[54];
- bf1[28] = input[14];
- bf1[29] = input[46];
- bf1[30] = input[30];
- bf1[31] = input[62];
- bf1[32] = input[1];
- bf1[33] = input[33];
- bf1[34] = input[17];
- bf1[35] = input[49];
- bf1[36] = input[9];
- bf1[37] = input[41];
- bf1[38] = input[25];
- bf1[39] = input[57];
- bf1[40] = input[5];
- bf1[41] = input[37];
- bf1[42] = input[21];
- bf1[43] = input[53];
- bf1[44] = input[13];
- bf1[45] = input[45];
- bf1[46] = input[29];
- bf1[47] = input[61];
- bf1[48] = input[3];
- bf1[49] = input[35];
- bf1[50] = input[19];
- bf1[51] = input[51];
- bf1[52] = input[11];
- bf1[53] = input[43];
- bf1[54] = input[27];
- bf1[55] = input[59];
- bf1[56] = input[7];
- bf1[57] = input[39];
- bf1[58] = input[23];
- bf1[59] = input[55];
- bf1[60] = input[15];
- bf1[61] = input[47];
- bf1[62] = input[31];
- bf1[63] = input[63];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 2
- stage++;
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = bf0[2];
- bf1[3] = bf0[3];
- bf1[4] = bf0[4];
- bf1[5] = bf0[5];
- bf1[6] = bf0[6];
- bf1[7] = bf0[7];
- bf1[8] = bf0[8];
- bf1[9] = bf0[9];
- bf1[10] = bf0[10];
- bf1[11] = bf0[11];
- bf1[12] = bf0[12];
- bf1[13] = bf0[13];
- bf1[14] = bf0[14];
- bf1[15] = bf0[15];
- bf1[16] = bf0[16];
- bf1[17] = bf0[17];
- bf1[18] = bf0[18];
- bf1[19] = bf0[19];
- bf1[20] = bf0[20];
- bf1[21] = bf0[21];
- bf1[22] = bf0[22];
- bf1[23] = bf0[23];
- bf1[24] = bf0[24];
- bf1[25] = bf0[25];
- bf1[26] = bf0[26];
- bf1[27] = bf0[27];
- bf1[28] = bf0[28];
- bf1[29] = bf0[29];
- bf1[30] = bf0[30];
- bf1[31] = bf0[31];
- bf1[32] = half_btf(cospi[63], bf0[32], -cospi[1], bf0[63], cos_bit);
- bf1[33] = half_btf(cospi[31], bf0[33], -cospi[33], bf0[62], cos_bit);
- bf1[34] = half_btf(cospi[47], bf0[34], -cospi[17], bf0[61], cos_bit);
- bf1[35] = half_btf(cospi[15], bf0[35], -cospi[49], bf0[60], cos_bit);
- bf1[36] = half_btf(cospi[55], bf0[36], -cospi[9], bf0[59], cos_bit);
- bf1[37] = half_btf(cospi[23], bf0[37], -cospi[41], bf0[58], cos_bit);
- bf1[38] = half_btf(cospi[39], bf0[38], -cospi[25], bf0[57], cos_bit);
- bf1[39] = half_btf(cospi[7], bf0[39], -cospi[57], bf0[56], cos_bit);
- bf1[40] = half_btf(cospi[59], bf0[40], -cospi[5], bf0[55], cos_bit);
- bf1[41] = half_btf(cospi[27], bf0[41], -cospi[37], bf0[54], cos_bit);
- bf1[42] = half_btf(cospi[43], bf0[42], -cospi[21], bf0[53], cos_bit);
- bf1[43] = half_btf(cospi[11], bf0[43], -cospi[53], bf0[52], cos_bit);
- bf1[44] = half_btf(cospi[51], bf0[44], -cospi[13], bf0[51], cos_bit);
- bf1[45] = half_btf(cospi[19], bf0[45], -cospi[45], bf0[50], cos_bit);
- bf1[46] = half_btf(cospi[35], bf0[46], -cospi[29], bf0[49], cos_bit);
- bf1[47] = half_btf(cospi[3], bf0[47], -cospi[61], bf0[48], cos_bit);
- bf1[48] = half_btf(cospi[61], bf0[47], cospi[3], bf0[48], cos_bit);
- bf1[49] = half_btf(cospi[29], bf0[46], cospi[35], bf0[49], cos_bit);
- bf1[50] = half_btf(cospi[45], bf0[45], cospi[19], bf0[50], cos_bit);
- bf1[51] = half_btf(cospi[13], bf0[44], cospi[51], bf0[51], cos_bit);
- bf1[52] = half_btf(cospi[53], bf0[43], cospi[11], bf0[52], cos_bit);
- bf1[53] = half_btf(cospi[21], bf0[42], cospi[43], bf0[53], cos_bit);
- bf1[54] = half_btf(cospi[37], bf0[41], cospi[27], bf0[54], cos_bit);
- bf1[55] = half_btf(cospi[5], bf0[40], cospi[59], bf0[55], cos_bit);
- bf1[56] = half_btf(cospi[57], bf0[39], cospi[7], bf0[56], cos_bit);
- bf1[57] = half_btf(cospi[25], bf0[38], cospi[39], bf0[57], cos_bit);
- bf1[58] = half_btf(cospi[41], bf0[37], cospi[23], bf0[58], cos_bit);
- bf1[59] = half_btf(cospi[9], bf0[36], cospi[55], bf0[59], cos_bit);
- bf1[60] = half_btf(cospi[49], bf0[35], cospi[15], bf0[60], cos_bit);
- bf1[61] = half_btf(cospi[17], bf0[34], cospi[47], bf0[61], cos_bit);
- bf1[62] = half_btf(cospi[33], bf0[33], cospi[31], bf0[62], cos_bit);
- bf1[63] = half_btf(cospi[1], bf0[32], cospi[63], bf0[63], cos_bit);
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 3
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = bf0[2];
- bf1[3] = bf0[3];
- bf1[4] = bf0[4];
- bf1[5] = bf0[5];
- bf1[6] = bf0[6];
- bf1[7] = bf0[7];
- bf1[8] = bf0[8];
- bf1[9] = bf0[9];
- bf1[10] = bf0[10];
- bf1[11] = bf0[11];
- bf1[12] = bf0[12];
- bf1[13] = bf0[13];
- bf1[14] = bf0[14];
- bf1[15] = bf0[15];
- bf1[16] = half_btf(cospi[62], bf0[16], -cospi[2], bf0[31], cos_bit);
- bf1[17] = half_btf(cospi[30], bf0[17], -cospi[34], bf0[30], cos_bit);
- bf1[18] = half_btf(cospi[46], bf0[18], -cospi[18], bf0[29], cos_bit);
- bf1[19] = half_btf(cospi[14], bf0[19], -cospi[50], bf0[28], cos_bit);
- bf1[20] = half_btf(cospi[54], bf0[20], -cospi[10], bf0[27], cos_bit);
- bf1[21] = half_btf(cospi[22], bf0[21], -cospi[42], bf0[26], cos_bit);
- bf1[22] = half_btf(cospi[38], bf0[22], -cospi[26], bf0[25], cos_bit);
- bf1[23] = half_btf(cospi[6], bf0[23], -cospi[58], bf0[24], cos_bit);
- bf1[24] = half_btf(cospi[58], bf0[23], cospi[6], bf0[24], cos_bit);
- bf1[25] = half_btf(cospi[26], bf0[22], cospi[38], bf0[25], cos_bit);
- bf1[26] = half_btf(cospi[42], bf0[21], cospi[22], bf0[26], cos_bit);
- bf1[27] = half_btf(cospi[10], bf0[20], cospi[54], bf0[27], cos_bit);
- bf1[28] = half_btf(cospi[50], bf0[19], cospi[14], bf0[28], cos_bit);
- bf1[29] = half_btf(cospi[18], bf0[18], cospi[46], bf0[29], cos_bit);
- bf1[30] = half_btf(cospi[34], bf0[17], cospi[30], bf0[30], cos_bit);
- bf1[31] = half_btf(cospi[2], bf0[16], cospi[62], bf0[31], cos_bit);
- bf1[32] = clamp_value(bf0[32] + bf0[33], stage_range[stage]);
- bf1[33] = clamp_value(bf0[32] - bf0[33], stage_range[stage]);
- bf1[34] = clamp_value(-bf0[34] + bf0[35], stage_range[stage]);
- bf1[35] = clamp_value(bf0[34] + bf0[35], stage_range[stage]);
- bf1[36] = clamp_value(bf0[36] + bf0[37], stage_range[stage]);
- bf1[37] = clamp_value(bf0[36] - bf0[37], stage_range[stage]);
- bf1[38] = clamp_value(-bf0[38] + bf0[39], stage_range[stage]);
- bf1[39] = clamp_value(bf0[38] + bf0[39], stage_range[stage]);
- bf1[40] = clamp_value(bf0[40] + bf0[41], stage_range[stage]);
- bf1[41] = clamp_value(bf0[40] - bf0[41], stage_range[stage]);
- bf1[42] = clamp_value(-bf0[42] + bf0[43], stage_range[stage]);
- bf1[43] = clamp_value(bf0[42] + bf0[43], stage_range[stage]);
- bf1[44] = clamp_value(bf0[44] + bf0[45], stage_range[stage]);
- bf1[45] = clamp_value(bf0[44] - bf0[45], stage_range[stage]);
- bf1[46] = clamp_value(-bf0[46] + bf0[47], stage_range[stage]);
- bf1[47] = clamp_value(bf0[46] + bf0[47], stage_range[stage]);
- bf1[48] = clamp_value(bf0[48] + bf0[49], stage_range[stage]);
- bf1[49] = clamp_value(bf0[48] - bf0[49], stage_range[stage]);
- bf1[50] = clamp_value(-bf0[50] + bf0[51], stage_range[stage]);
- bf1[51] = clamp_value(bf0[50] + bf0[51], stage_range[stage]);
- bf1[52] = clamp_value(bf0[52] + bf0[53], stage_range[stage]);
- bf1[53] = clamp_value(bf0[52] - bf0[53], stage_range[stage]);
- bf1[54] = clamp_value(-bf0[54] + bf0[55], stage_range[stage]);
- bf1[55] = clamp_value(bf0[54] + bf0[55], stage_range[stage]);
- bf1[56] = clamp_value(bf0[56] + bf0[57], stage_range[stage]);
- bf1[57] = clamp_value(bf0[56] - bf0[57], stage_range[stage]);
- bf1[58] = clamp_value(-bf0[58] + bf0[59], stage_range[stage]);
- bf1[59] = clamp_value(bf0[58] + bf0[59], stage_range[stage]);
- bf1[60] = clamp_value(bf0[60] + bf0[61], stage_range[stage]);
- bf1[61] = clamp_value(bf0[60] - bf0[61], stage_range[stage]);
- bf1[62] = clamp_value(-bf0[62] + bf0[63], stage_range[stage]);
- bf1[63] = clamp_value(bf0[62] + bf0[63], stage_range[stage]);
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 4
- stage++;
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = bf0[2];
- bf1[3] = bf0[3];
- bf1[4] = bf0[4];
- bf1[5] = bf0[5];
- bf1[6] = bf0[6];
- bf1[7] = bf0[7];
- bf1[8] = half_btf(cospi[60], bf0[8], -cospi[4], bf0[15], cos_bit);
- bf1[9] = half_btf(cospi[28], bf0[9], -cospi[36], bf0[14], cos_bit);
- bf1[10] = half_btf(cospi[44], bf0[10], -cospi[20], bf0[13], cos_bit);
- bf1[11] = half_btf(cospi[12], bf0[11], -cospi[52], bf0[12], cos_bit);
- bf1[12] = half_btf(cospi[52], bf0[11], cospi[12], bf0[12], cos_bit);
- bf1[13] = half_btf(cospi[20], bf0[10], cospi[44], bf0[13], cos_bit);
- bf1[14] = half_btf(cospi[36], bf0[9], cospi[28], bf0[14], cos_bit);
- bf1[15] = half_btf(cospi[4], bf0[8], cospi[60], bf0[15], cos_bit);
- bf1[16] = clamp_value(bf0[16] + bf0[17], stage_range[stage]);
- bf1[17] = clamp_value(bf0[16] - bf0[17], stage_range[stage]);
- bf1[18] = clamp_value(-bf0[18] + bf0[19], stage_range[stage]);
- bf1[19] = clamp_value(bf0[18] + bf0[19], stage_range[stage]);
- bf1[20] = clamp_value(bf0[20] + bf0[21], stage_range[stage]);
- bf1[21] = clamp_value(bf0[20] - bf0[21], stage_range[stage]);
- bf1[22] = clamp_value(-bf0[22] + bf0[23], stage_range[stage]);
- bf1[23] = clamp_value(bf0[22] + bf0[23], stage_range[stage]);
- bf1[24] = clamp_value(bf0[24] + bf0[25], stage_range[stage]);
- bf1[25] = clamp_value(bf0[24] - bf0[25], stage_range[stage]);
- bf1[26] = clamp_value(-bf0[26] + bf0[27], stage_range[stage]);
- bf1[27] = clamp_value(bf0[26] + bf0[27], stage_range[stage]);
- bf1[28] = clamp_value(bf0[28] + bf0[29], stage_range[stage]);
- bf1[29] = clamp_value(bf0[28] - bf0[29], stage_range[stage]);
- bf1[30] = clamp_value(-bf0[30] + bf0[31], stage_range[stage]);
- bf1[31] = clamp_value(bf0[30] + bf0[31], stage_range[stage]);
- bf1[32] = bf0[32];
- bf1[33] = half_btf(-cospi[4], bf0[33], cospi[60], bf0[62], cos_bit);
- bf1[34] = half_btf(-cospi[60], bf0[34], -cospi[4], bf0[61], cos_bit);
- bf1[35] = bf0[35];
- bf1[36] = bf0[36];
- bf1[37] = half_btf(-cospi[36], bf0[37], cospi[28], bf0[58], cos_bit);
- bf1[38] = half_btf(-cospi[28], bf0[38], -cospi[36], bf0[57], cos_bit);
- bf1[39] = bf0[39];
- bf1[40] = bf0[40];
- bf1[41] = half_btf(-cospi[20], bf0[41], cospi[44], bf0[54], cos_bit);
- bf1[42] = half_btf(-cospi[44], bf0[42], -cospi[20], bf0[53], cos_bit);
- bf1[43] = bf0[43];
- bf1[44] = bf0[44];
- bf1[45] = half_btf(-cospi[52], bf0[45], cospi[12], bf0[50], cos_bit);
- bf1[46] = half_btf(-cospi[12], bf0[46], -cospi[52], bf0[49], cos_bit);
- bf1[47] = bf0[47];
- bf1[48] = bf0[48];
- bf1[49] = half_btf(-cospi[52], bf0[46], cospi[12], bf0[49], cos_bit);
- bf1[50] = half_btf(cospi[12], bf0[45], cospi[52], bf0[50], cos_bit);
- bf1[51] = bf0[51];
- bf1[52] = bf0[52];
- bf1[53] = half_btf(-cospi[20], bf0[42], cospi[44], bf0[53], cos_bit);
- bf1[54] = half_btf(cospi[44], bf0[41], cospi[20], bf0[54], cos_bit);
- bf1[55] = bf0[55];
- bf1[56] = bf0[56];
- bf1[57] = half_btf(-cospi[36], bf0[38], cospi[28], bf0[57], cos_bit);
- bf1[58] = half_btf(cospi[28], bf0[37], cospi[36], bf0[58], cos_bit);
- bf1[59] = bf0[59];
- bf1[60] = bf0[60];
- bf1[61] = half_btf(-cospi[4], bf0[34], cospi[60], bf0[61], cos_bit);
- bf1[62] = half_btf(cospi[60], bf0[33], cospi[4], bf0[62], cos_bit);
- bf1[63] = bf0[63];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 5
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = bf0[2];
- bf1[3] = bf0[3];
- bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit);
- bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit);
- bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit);
- bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit);
- bf1[8] = clamp_value(bf0[8] + bf0[9], stage_range[stage]);
- bf1[9] = clamp_value(bf0[8] - bf0[9], stage_range[stage]);
- bf1[10] = clamp_value(-bf0[10] + bf0[11], stage_range[stage]);
- bf1[11] = clamp_value(bf0[10] + bf0[11], stage_range[stage]);
- bf1[12] = clamp_value(bf0[12] + bf0[13], stage_range[stage]);
- bf1[13] = clamp_value(bf0[12] - bf0[13], stage_range[stage]);
- bf1[14] = clamp_value(-bf0[14] + bf0[15], stage_range[stage]);
- bf1[15] = clamp_value(bf0[14] + bf0[15], stage_range[stage]);
- bf1[16] = bf0[16];
- bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit);
- bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit);
- bf1[19] = bf0[19];
- bf1[20] = bf0[20];
- bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit);
- bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit);
- bf1[23] = bf0[23];
- bf1[24] = bf0[24];
- bf1[25] = half_btf(-cospi[40], bf0[22], cospi[24], bf0[25], cos_bit);
- bf1[26] = half_btf(cospi[24], bf0[21], cospi[40], bf0[26], cos_bit);
- bf1[27] = bf0[27];
- bf1[28] = bf0[28];
- bf1[29] = half_btf(-cospi[8], bf0[18], cospi[56], bf0[29], cos_bit);
- bf1[30] = half_btf(cospi[56], bf0[17], cospi[8], bf0[30], cos_bit);
- bf1[31] = bf0[31];
- bf1[32] = clamp_value(bf0[32] + bf0[35], stage_range[stage]);
- bf1[33] = clamp_value(bf0[33] + bf0[34], stage_range[stage]);
- bf1[34] = clamp_value(bf0[33] - bf0[34], stage_range[stage]);
- bf1[35] = clamp_value(bf0[32] - bf0[35], stage_range[stage]);
- bf1[36] = clamp_value(-bf0[36] + bf0[39], stage_range[stage]);
- bf1[37] = clamp_value(-bf0[37] + bf0[38], stage_range[stage]);
- bf1[38] = clamp_value(bf0[37] + bf0[38], stage_range[stage]);
- bf1[39] = clamp_value(bf0[36] + bf0[39], stage_range[stage]);
- bf1[40] = clamp_value(bf0[40] + bf0[43], stage_range[stage]);
- bf1[41] = clamp_value(bf0[41] + bf0[42], stage_range[stage]);
- bf1[42] = clamp_value(bf0[41] - bf0[42], stage_range[stage]);
- bf1[43] = clamp_value(bf0[40] - bf0[43], stage_range[stage]);
- bf1[44] = clamp_value(-bf0[44] + bf0[47], stage_range[stage]);
- bf1[45] = clamp_value(-bf0[45] + bf0[46], stage_range[stage]);
- bf1[46] = clamp_value(bf0[45] + bf0[46], stage_range[stage]);
- bf1[47] = clamp_value(bf0[44] + bf0[47], stage_range[stage]);
- bf1[48] = clamp_value(bf0[48] + bf0[51], stage_range[stage]);
- bf1[49] = clamp_value(bf0[49] + bf0[50], stage_range[stage]);
- bf1[50] = clamp_value(bf0[49] - bf0[50], stage_range[stage]);
- bf1[51] = clamp_value(bf0[48] - bf0[51], stage_range[stage]);
- bf1[52] = clamp_value(-bf0[52] + bf0[55], stage_range[stage]);
- bf1[53] = clamp_value(-bf0[53] + bf0[54], stage_range[stage]);
- bf1[54] = clamp_value(bf0[53] + bf0[54], stage_range[stage]);
- bf1[55] = clamp_value(bf0[52] + bf0[55], stage_range[stage]);
- bf1[56] = clamp_value(bf0[56] + bf0[59], stage_range[stage]);
- bf1[57] = clamp_value(bf0[57] + bf0[58], stage_range[stage]);
- bf1[58] = clamp_value(bf0[57] - bf0[58], stage_range[stage]);
- bf1[59] = clamp_value(bf0[56] - bf0[59], stage_range[stage]);
- bf1[60] = clamp_value(-bf0[60] + bf0[63], stage_range[stage]);
- bf1[61] = clamp_value(-bf0[61] + bf0[62], stage_range[stage]);
- bf1[62] = clamp_value(bf0[61] + bf0[62], stage_range[stage]);
- bf1[63] = clamp_value(bf0[60] + bf0[63], stage_range[stage]);
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 6
- stage++;
- bf0 = output;
- bf1 = step;
- bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit);
- bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit);
- bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit);
- bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit);
- bf1[4] = clamp_value(bf0[4] + bf0[5], stage_range[stage]);
- bf1[5] = clamp_value(bf0[4] - bf0[5], stage_range[stage]);
- bf1[6] = clamp_value(-bf0[6] + bf0[7], stage_range[stage]);
- bf1[7] = clamp_value(bf0[6] + bf0[7], stage_range[stage]);
- bf1[8] = bf0[8];
- bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit);
- bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit);
- bf1[11] = bf0[11];
- bf1[12] = bf0[12];
- bf1[13] = half_btf(-cospi[16], bf0[10], cospi[48], bf0[13], cos_bit);
- bf1[14] = half_btf(cospi[48], bf0[9], cospi[16], bf0[14], cos_bit);
- bf1[15] = bf0[15];
- bf1[16] = clamp_value(bf0[16] + bf0[19], stage_range[stage]);
- bf1[17] = clamp_value(bf0[17] + bf0[18], stage_range[stage]);
- bf1[18] = clamp_value(bf0[17] - bf0[18], stage_range[stage]);
- bf1[19] = clamp_value(bf0[16] - bf0[19], stage_range[stage]);
- bf1[20] = clamp_value(-bf0[20] + bf0[23], stage_range[stage]);
- bf1[21] = clamp_value(-bf0[21] + bf0[22], stage_range[stage]);
- bf1[22] = clamp_value(bf0[21] + bf0[22], stage_range[stage]);
- bf1[23] = clamp_value(bf0[20] + bf0[23], stage_range[stage]);
- bf1[24] = clamp_value(bf0[24] + bf0[27], stage_range[stage]);
- bf1[25] = clamp_value(bf0[25] + bf0[26], stage_range[stage]);
- bf1[26] = clamp_value(bf0[25] - bf0[26], stage_range[stage]);
- bf1[27] = clamp_value(bf0[24] - bf0[27], stage_range[stage]);
- bf1[28] = clamp_value(-bf0[28] + bf0[31], stage_range[stage]);
- bf1[29] = clamp_value(-bf0[29] + bf0[30], stage_range[stage]);
- bf1[30] = clamp_value(bf0[29] + bf0[30], stage_range[stage]);
- bf1[31] = clamp_value(bf0[28] + bf0[31], stage_range[stage]);
- bf1[32] = bf0[32];
- bf1[33] = bf0[33];
- bf1[34] = half_btf(-cospi[8], bf0[34], cospi[56], bf0[61], cos_bit);
- bf1[35] = half_btf(-cospi[8], bf0[35], cospi[56], bf0[60], cos_bit);
- bf1[36] = half_btf(-cospi[56], bf0[36], -cospi[8], bf0[59], cos_bit);
- bf1[37] = half_btf(-cospi[56], bf0[37], -cospi[8], bf0[58], cos_bit);
- bf1[38] = bf0[38];
- bf1[39] = bf0[39];
- bf1[40] = bf0[40];
- bf1[41] = bf0[41];
- bf1[42] = half_btf(-cospi[40], bf0[42], cospi[24], bf0[53], cos_bit);
- bf1[43] = half_btf(-cospi[40], bf0[43], cospi[24], bf0[52], cos_bit);
- bf1[44] = half_btf(-cospi[24], bf0[44], -cospi[40], bf0[51], cos_bit);
- bf1[45] = half_btf(-cospi[24], bf0[45], -cospi[40], bf0[50], cos_bit);
- bf1[46] = bf0[46];
- bf1[47] = bf0[47];
- bf1[48] = bf0[48];
- bf1[49] = bf0[49];
- bf1[50] = half_btf(-cospi[40], bf0[45], cospi[24], bf0[50], cos_bit);
- bf1[51] = half_btf(-cospi[40], bf0[44], cospi[24], bf0[51], cos_bit);
- bf1[52] = half_btf(cospi[24], bf0[43], cospi[40], bf0[52], cos_bit);
- bf1[53] = half_btf(cospi[24], bf0[42], cospi[40], bf0[53], cos_bit);
- bf1[54] = bf0[54];
- bf1[55] = bf0[55];
- bf1[56] = bf0[56];
- bf1[57] = bf0[57];
- bf1[58] = half_btf(-cospi[8], bf0[37], cospi[56], bf0[58], cos_bit);
- bf1[59] = half_btf(-cospi[8], bf0[36], cospi[56], bf0[59], cos_bit);
- bf1[60] = half_btf(cospi[56], bf0[35], cospi[8], bf0[60], cos_bit);
- bf1[61] = half_btf(cospi[56], bf0[34], cospi[8], bf0[61], cos_bit);
- bf1[62] = bf0[62];
- bf1[63] = bf0[63];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 7
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = clamp_value(bf0[0] + bf0[3], stage_range[stage]);
- bf1[1] = clamp_value(bf0[1] + bf0[2], stage_range[stage]);
- bf1[2] = clamp_value(bf0[1] - bf0[2], stage_range[stage]);
- bf1[3] = clamp_value(bf0[0] - bf0[3], stage_range[stage]);
- bf1[4] = bf0[4];
- bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
- bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
- bf1[7] = bf0[7];
- bf1[8] = clamp_value(bf0[8] + bf0[11], stage_range[stage]);
- bf1[9] = clamp_value(bf0[9] + bf0[10], stage_range[stage]);
- bf1[10] = clamp_value(bf0[9] - bf0[10], stage_range[stage]);
- bf1[11] = clamp_value(bf0[8] - bf0[11], stage_range[stage]);
- bf1[12] = clamp_value(-bf0[12] + bf0[15], stage_range[stage]);
- bf1[13] = clamp_value(-bf0[13] + bf0[14], stage_range[stage]);
- bf1[14] = clamp_value(bf0[13] + bf0[14], stage_range[stage]);
- bf1[15] = clamp_value(bf0[12] + bf0[15], stage_range[stage]);
- bf1[16] = bf0[16];
- bf1[17] = bf0[17];
- bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit);
- bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit);
- bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit);
- bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit);
- bf1[22] = bf0[22];
- bf1[23] = bf0[23];
- bf1[24] = bf0[24];
- bf1[25] = bf0[25];
- bf1[26] = half_btf(-cospi[16], bf0[21], cospi[48], bf0[26], cos_bit);
- bf1[27] = half_btf(-cospi[16], bf0[20], cospi[48], bf0[27], cos_bit);
- bf1[28] = half_btf(cospi[48], bf0[19], cospi[16], bf0[28], cos_bit);
- bf1[29] = half_btf(cospi[48], bf0[18], cospi[16], bf0[29], cos_bit);
- bf1[30] = bf0[30];
- bf1[31] = bf0[31];
- bf1[32] = clamp_value(bf0[32] + bf0[39], stage_range[stage]);
- bf1[33] = clamp_value(bf0[33] + bf0[38], stage_range[stage]);
- bf1[34] = clamp_value(bf0[34] + bf0[37], stage_range[stage]);
- bf1[35] = clamp_value(bf0[35] + bf0[36], stage_range[stage]);
- bf1[36] = clamp_value(bf0[35] - bf0[36], stage_range[stage]);
- bf1[37] = clamp_value(bf0[34] - bf0[37], stage_range[stage]);
- bf1[38] = clamp_value(bf0[33] - bf0[38], stage_range[stage]);
- bf1[39] = clamp_value(bf0[32] - bf0[39], stage_range[stage]);
- bf1[40] = clamp_value(-bf0[40] + bf0[47], stage_range[stage]);
- bf1[41] = clamp_value(-bf0[41] + bf0[46], stage_range[stage]);
- bf1[42] = clamp_value(-bf0[42] + bf0[45], stage_range[stage]);
- bf1[43] = clamp_value(-bf0[43] + bf0[44], stage_range[stage]);
- bf1[44] = clamp_value(bf0[43] + bf0[44], stage_range[stage]);
- bf1[45] = clamp_value(bf0[42] + bf0[45], stage_range[stage]);
- bf1[46] = clamp_value(bf0[41] + bf0[46], stage_range[stage]);
- bf1[47] = clamp_value(bf0[40] + bf0[47], stage_range[stage]);
- bf1[48] = clamp_value(bf0[48] + bf0[55], stage_range[stage]);
- bf1[49] = clamp_value(bf0[49] + bf0[54], stage_range[stage]);
- bf1[50] = clamp_value(bf0[50] + bf0[53], stage_range[stage]);
- bf1[51] = clamp_value(bf0[51] + bf0[52], stage_range[stage]);
- bf1[52] = clamp_value(bf0[51] - bf0[52], stage_range[stage]);
- bf1[53] = clamp_value(bf0[50] - bf0[53], stage_range[stage]);
- bf1[54] = clamp_value(bf0[49] - bf0[54], stage_range[stage]);
- bf1[55] = clamp_value(bf0[48] - bf0[55], stage_range[stage]);
- bf1[56] = clamp_value(-bf0[56] + bf0[63], stage_range[stage]);
- bf1[57] = clamp_value(-bf0[57] + bf0[62], stage_range[stage]);
- bf1[58] = clamp_value(-bf0[58] + bf0[61], stage_range[stage]);
- bf1[59] = clamp_value(-bf0[59] + bf0[60], stage_range[stage]);
- bf1[60] = clamp_value(bf0[59] + bf0[60], stage_range[stage]);
- bf1[61] = clamp_value(bf0[58] + bf0[61], stage_range[stage]);
- bf1[62] = clamp_value(bf0[57] + bf0[62], stage_range[stage]);
- bf1[63] = clamp_value(bf0[56] + bf0[63], stage_range[stage]);
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 8
- stage++;
- bf0 = output;
- bf1 = step;
- bf1[0] = clamp_value(bf0[0] + bf0[7], stage_range[stage]);
- bf1[1] = clamp_value(bf0[1] + bf0[6], stage_range[stage]);
- bf1[2] = clamp_value(bf0[2] + bf0[5], stage_range[stage]);
- bf1[3] = clamp_value(bf0[3] + bf0[4], stage_range[stage]);
- bf1[4] = clamp_value(bf0[3] - bf0[4], stage_range[stage]);
- bf1[5] = clamp_value(bf0[2] - bf0[5], stage_range[stage]);
- bf1[6] = clamp_value(bf0[1] - bf0[6], stage_range[stage]);
- bf1[7] = clamp_value(bf0[0] - bf0[7], stage_range[stage]);
- bf1[8] = bf0[8];
- bf1[9] = bf0[9];
- bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit);
- bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit);
- bf1[12] = half_btf(cospi[32], bf0[11], cospi[32], bf0[12], cos_bit);
- bf1[13] = half_btf(cospi[32], bf0[10], cospi[32], bf0[13], cos_bit);
- bf1[14] = bf0[14];
- bf1[15] = bf0[15];
- bf1[16] = clamp_value(bf0[16] + bf0[23], stage_range[stage]);
- bf1[17] = clamp_value(bf0[17] + bf0[22], stage_range[stage]);
- bf1[18] = clamp_value(bf0[18] + bf0[21], stage_range[stage]);
- bf1[19] = clamp_value(bf0[19] + bf0[20], stage_range[stage]);
- bf1[20] = clamp_value(bf0[19] - bf0[20], stage_range[stage]);
- bf1[21] = clamp_value(bf0[18] - bf0[21], stage_range[stage]);
- bf1[22] = clamp_value(bf0[17] - bf0[22], stage_range[stage]);
- bf1[23] = clamp_value(bf0[16] - bf0[23], stage_range[stage]);
- bf1[24] = clamp_value(-bf0[24] + bf0[31], stage_range[stage]);
- bf1[25] = clamp_value(-bf0[25] + bf0[30], stage_range[stage]);
- bf1[26] = clamp_value(-bf0[26] + bf0[29], stage_range[stage]);
- bf1[27] = clamp_value(-bf0[27] + bf0[28], stage_range[stage]);
- bf1[28] = clamp_value(bf0[27] + bf0[28], stage_range[stage]);
- bf1[29] = clamp_value(bf0[26] + bf0[29], stage_range[stage]);
- bf1[30] = clamp_value(bf0[25] + bf0[30], stage_range[stage]);
- bf1[31] = clamp_value(bf0[24] + bf0[31], stage_range[stage]);
- bf1[32] = bf0[32];
- bf1[33] = bf0[33];
- bf1[34] = bf0[34];
- bf1[35] = bf0[35];
- bf1[36] = half_btf(-cospi[16], bf0[36], cospi[48], bf0[59], cos_bit);
- bf1[37] = half_btf(-cospi[16], bf0[37], cospi[48], bf0[58], cos_bit);
- bf1[38] = half_btf(-cospi[16], bf0[38], cospi[48], bf0[57], cos_bit);
- bf1[39] = half_btf(-cospi[16], bf0[39], cospi[48], bf0[56], cos_bit);
- bf1[40] = half_btf(-cospi[48], bf0[40], -cospi[16], bf0[55], cos_bit);
- bf1[41] = half_btf(-cospi[48], bf0[41], -cospi[16], bf0[54], cos_bit);
- bf1[42] = half_btf(-cospi[48], bf0[42], -cospi[16], bf0[53], cos_bit);
- bf1[43] = half_btf(-cospi[48], bf0[43], -cospi[16], bf0[52], cos_bit);
- bf1[44] = bf0[44];
- bf1[45] = bf0[45];
- bf1[46] = bf0[46];
- bf1[47] = bf0[47];
- bf1[48] = bf0[48];
- bf1[49] = bf0[49];
- bf1[50] = bf0[50];
- bf1[51] = bf0[51];
- bf1[52] = half_btf(-cospi[16], bf0[43], cospi[48], bf0[52], cos_bit);
- bf1[53] = half_btf(-cospi[16], bf0[42], cospi[48], bf0[53], cos_bit);
- bf1[54] = half_btf(-cospi[16], bf0[41], cospi[48], bf0[54], cos_bit);
- bf1[55] = half_btf(-cospi[16], bf0[40], cospi[48], bf0[55], cos_bit);
- bf1[56] = half_btf(cospi[48], bf0[39], cospi[16], bf0[56], cos_bit);
- bf1[57] = half_btf(cospi[48], bf0[38], cospi[16], bf0[57], cos_bit);
- bf1[58] = half_btf(cospi[48], bf0[37], cospi[16], bf0[58], cos_bit);
- bf1[59] = half_btf(cospi[48], bf0[36], cospi[16], bf0[59], cos_bit);
- bf1[60] = bf0[60];
- bf1[61] = bf0[61];
- bf1[62] = bf0[62];
- bf1[63] = bf0[63];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 9
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = clamp_value(bf0[0] + bf0[15], stage_range[stage]);
- bf1[1] = clamp_value(bf0[1] + bf0[14], stage_range[stage]);
- bf1[2] = clamp_value(bf0[2] + bf0[13], stage_range[stage]);
- bf1[3] = clamp_value(bf0[3] + bf0[12], stage_range[stage]);
- bf1[4] = clamp_value(bf0[4] + bf0[11], stage_range[stage]);
- bf1[5] = clamp_value(bf0[5] + bf0[10], stage_range[stage]);
- bf1[6] = clamp_value(bf0[6] + bf0[9], stage_range[stage]);
- bf1[7] = clamp_value(bf0[7] + bf0[8], stage_range[stage]);
- bf1[8] = clamp_value(bf0[7] - bf0[8], stage_range[stage]);
- bf1[9] = clamp_value(bf0[6] - bf0[9], stage_range[stage]);
- bf1[10] = clamp_value(bf0[5] - bf0[10], stage_range[stage]);
- bf1[11] = clamp_value(bf0[4] - bf0[11], stage_range[stage]);
- bf1[12] = clamp_value(bf0[3] - bf0[12], stage_range[stage]);
- bf1[13] = clamp_value(bf0[2] - bf0[13], stage_range[stage]);
- bf1[14] = clamp_value(bf0[1] - bf0[14], stage_range[stage]);
- bf1[15] = clamp_value(bf0[0] - bf0[15], stage_range[stage]);
- bf1[16] = bf0[16];
- bf1[17] = bf0[17];
- bf1[18] = bf0[18];
- bf1[19] = bf0[19];
- bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit);
- bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit);
- bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit);
- bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit);
- bf1[24] = half_btf(cospi[32], bf0[23], cospi[32], bf0[24], cos_bit);
- bf1[25] = half_btf(cospi[32], bf0[22], cospi[32], bf0[25], cos_bit);
- bf1[26] = half_btf(cospi[32], bf0[21], cospi[32], bf0[26], cos_bit);
- bf1[27] = half_btf(cospi[32], bf0[20], cospi[32], bf0[27], cos_bit);
- bf1[28] = bf0[28];
- bf1[29] = bf0[29];
- bf1[30] = bf0[30];
- bf1[31] = bf0[31];
- bf1[32] = clamp_value(bf0[32] + bf0[47], stage_range[stage]);
- bf1[33] = clamp_value(bf0[33] + bf0[46], stage_range[stage]);
- bf1[34] = clamp_value(bf0[34] + bf0[45], stage_range[stage]);
- bf1[35] = clamp_value(bf0[35] + bf0[44], stage_range[stage]);
- bf1[36] = clamp_value(bf0[36] + bf0[43], stage_range[stage]);
- bf1[37] = clamp_value(bf0[37] + bf0[42], stage_range[stage]);
- bf1[38] = clamp_value(bf0[38] + bf0[41], stage_range[stage]);
- bf1[39] = clamp_value(bf0[39] + bf0[40], stage_range[stage]);
- bf1[40] = clamp_value(bf0[39] - bf0[40], stage_range[stage]);
- bf1[41] = clamp_value(bf0[38] - bf0[41], stage_range[stage]);
- bf1[42] = clamp_value(bf0[37] - bf0[42], stage_range[stage]);
- bf1[43] = clamp_value(bf0[36] - bf0[43], stage_range[stage]);
- bf1[44] = clamp_value(bf0[35] - bf0[44], stage_range[stage]);
- bf1[45] = clamp_value(bf0[34] - bf0[45], stage_range[stage]);
- bf1[46] = clamp_value(bf0[33] - bf0[46], stage_range[stage]);
- bf1[47] = clamp_value(bf0[32] - bf0[47], stage_range[stage]);
- bf1[48] = clamp_value(-bf0[48] + bf0[63], stage_range[stage]);
- bf1[49] = clamp_value(-bf0[49] + bf0[62], stage_range[stage]);
- bf1[50] = clamp_value(-bf0[50] + bf0[61], stage_range[stage]);
- bf1[51] = clamp_value(-bf0[51] + bf0[60], stage_range[stage]);
- bf1[52] = clamp_value(-bf0[52] + bf0[59], stage_range[stage]);
- bf1[53] = clamp_value(-bf0[53] + bf0[58], stage_range[stage]);
- bf1[54] = clamp_value(-bf0[54] + bf0[57], stage_range[stage]);
- bf1[55] = clamp_value(-bf0[55] + bf0[56], stage_range[stage]);
- bf1[56] = clamp_value(bf0[55] + bf0[56], stage_range[stage]);
- bf1[57] = clamp_value(bf0[54] + bf0[57], stage_range[stage]);
- bf1[58] = clamp_value(bf0[53] + bf0[58], stage_range[stage]);
- bf1[59] = clamp_value(bf0[52] + bf0[59], stage_range[stage]);
- bf1[60] = clamp_value(bf0[51] + bf0[60], stage_range[stage]);
- bf1[61] = clamp_value(bf0[50] + bf0[61], stage_range[stage]);
- bf1[62] = clamp_value(bf0[49] + bf0[62], stage_range[stage]);
- bf1[63] = clamp_value(bf0[48] + bf0[63], stage_range[stage]);
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 10
- stage++;
- bf0 = output;
- bf1 = step;
- bf1[0] = clamp_value(bf0[0] + bf0[31], stage_range[stage]);
- bf1[1] = clamp_value(bf0[1] + bf0[30], stage_range[stage]);
- bf1[2] = clamp_value(bf0[2] + bf0[29], stage_range[stage]);
- bf1[3] = clamp_value(bf0[3] + bf0[28], stage_range[stage]);
- bf1[4] = clamp_value(bf0[4] + bf0[27], stage_range[stage]);
- bf1[5] = clamp_value(bf0[5] + bf0[26], stage_range[stage]);
- bf1[6] = clamp_value(bf0[6] + bf0[25], stage_range[stage]);
- bf1[7] = clamp_value(bf0[7] + bf0[24], stage_range[stage]);
- bf1[8] = clamp_value(bf0[8] + bf0[23], stage_range[stage]);
- bf1[9] = clamp_value(bf0[9] + bf0[22], stage_range[stage]);
- bf1[10] = clamp_value(bf0[10] + bf0[21], stage_range[stage]);
- bf1[11] = clamp_value(bf0[11] + bf0[20], stage_range[stage]);
- bf1[12] = clamp_value(bf0[12] + bf0[19], stage_range[stage]);
- bf1[13] = clamp_value(bf0[13] + bf0[18], stage_range[stage]);
- bf1[14] = clamp_value(bf0[14] + bf0[17], stage_range[stage]);
- bf1[15] = clamp_value(bf0[15] + bf0[16], stage_range[stage]);
- bf1[16] = clamp_value(bf0[15] - bf0[16], stage_range[stage]);
- bf1[17] = clamp_value(bf0[14] - bf0[17], stage_range[stage]);
- bf1[18] = clamp_value(bf0[13] - bf0[18], stage_range[stage]);
- bf1[19] = clamp_value(bf0[12] - bf0[19], stage_range[stage]);
- bf1[20] = clamp_value(bf0[11] - bf0[20], stage_range[stage]);
- bf1[21] = clamp_value(bf0[10] - bf0[21], stage_range[stage]);
- bf1[22] = clamp_value(bf0[9] - bf0[22], stage_range[stage]);
- bf1[23] = clamp_value(bf0[8] - bf0[23], stage_range[stage]);
- bf1[24] = clamp_value(bf0[7] - bf0[24], stage_range[stage]);
- bf1[25] = clamp_value(bf0[6] - bf0[25], stage_range[stage]);
- bf1[26] = clamp_value(bf0[5] - bf0[26], stage_range[stage]);
- bf1[27] = clamp_value(bf0[4] - bf0[27], stage_range[stage]);
- bf1[28] = clamp_value(bf0[3] - bf0[28], stage_range[stage]);
- bf1[29] = clamp_value(bf0[2] - bf0[29], stage_range[stage]);
- bf1[30] = clamp_value(bf0[1] - bf0[30], stage_range[stage]);
- bf1[31] = clamp_value(bf0[0] - bf0[31], stage_range[stage]);
- bf1[32] = bf0[32];
- bf1[33] = bf0[33];
- bf1[34] = bf0[34];
- bf1[35] = bf0[35];
- bf1[36] = bf0[36];
- bf1[37] = bf0[37];
- bf1[38] = bf0[38];
- bf1[39] = bf0[39];
- bf1[40] = half_btf(-cospi[32], bf0[40], cospi[32], bf0[55], cos_bit);
- bf1[41] = half_btf(-cospi[32], bf0[41], cospi[32], bf0[54], cos_bit);
- bf1[42] = half_btf(-cospi[32], bf0[42], cospi[32], bf0[53], cos_bit);
- bf1[43] = half_btf(-cospi[32], bf0[43], cospi[32], bf0[52], cos_bit);
- bf1[44] = half_btf(-cospi[32], bf0[44], cospi[32], bf0[51], cos_bit);
- bf1[45] = half_btf(-cospi[32], bf0[45], cospi[32], bf0[50], cos_bit);
- bf1[46] = half_btf(-cospi[32], bf0[46], cospi[32], bf0[49], cos_bit);
- bf1[47] = half_btf(-cospi[32], bf0[47], cospi[32], bf0[48], cos_bit);
- bf1[48] = half_btf(cospi[32], bf0[47], cospi[32], bf0[48], cos_bit);
- bf1[49] = half_btf(cospi[32], bf0[46], cospi[32], bf0[49], cos_bit);
- bf1[50] = half_btf(cospi[32], bf0[45], cospi[32], bf0[50], cos_bit);
- bf1[51] = half_btf(cospi[32], bf0[44], cospi[32], bf0[51], cos_bit);
- bf1[52] = half_btf(cospi[32], bf0[43], cospi[32], bf0[52], cos_bit);
- bf1[53] = half_btf(cospi[32], bf0[42], cospi[32], bf0[53], cos_bit);
- bf1[54] = half_btf(cospi[32], bf0[41], cospi[32], bf0[54], cos_bit);
- bf1[55] = half_btf(cospi[32], bf0[40], cospi[32], bf0[55], cos_bit);
- bf1[56] = bf0[56];
- bf1[57] = bf0[57];
- bf1[58] = bf0[58];
- bf1[59] = bf0[59];
- bf1[60] = bf0[60];
- bf1[61] = bf0[61];
- bf1[62] = bf0[62];
- bf1[63] = bf0[63];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 11
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = clamp_value(bf0[0] + bf0[63], stage_range[stage]);
- bf1[1] = clamp_value(bf0[1] + bf0[62], stage_range[stage]);
- bf1[2] = clamp_value(bf0[2] + bf0[61], stage_range[stage]);
- bf1[3] = clamp_value(bf0[3] + bf0[60], stage_range[stage]);
- bf1[4] = clamp_value(bf0[4] + bf0[59], stage_range[stage]);
- bf1[5] = clamp_value(bf0[5] + bf0[58], stage_range[stage]);
- bf1[6] = clamp_value(bf0[6] + bf0[57], stage_range[stage]);
- bf1[7] = clamp_value(bf0[7] + bf0[56], stage_range[stage]);
- bf1[8] = clamp_value(bf0[8] + bf0[55], stage_range[stage]);
- bf1[9] = clamp_value(bf0[9] + bf0[54], stage_range[stage]);
- bf1[10] = clamp_value(bf0[10] + bf0[53], stage_range[stage]);
- bf1[11] = clamp_value(bf0[11] + bf0[52], stage_range[stage]);
- bf1[12] = clamp_value(bf0[12] + bf0[51], stage_range[stage]);
- bf1[13] = clamp_value(bf0[13] + bf0[50], stage_range[stage]);
- bf1[14] = clamp_value(bf0[14] + bf0[49], stage_range[stage]);
- bf1[15] = clamp_value(bf0[15] + bf0[48], stage_range[stage]);
- bf1[16] = clamp_value(bf0[16] + bf0[47], stage_range[stage]);
- bf1[17] = clamp_value(bf0[17] + bf0[46], stage_range[stage]);
- bf1[18] = clamp_value(bf0[18] + bf0[45], stage_range[stage]);
- bf1[19] = clamp_value(bf0[19] + bf0[44], stage_range[stage]);
- bf1[20] = clamp_value(bf0[20] + bf0[43], stage_range[stage]);
- bf1[21] = clamp_value(bf0[21] + bf0[42], stage_range[stage]);
- bf1[22] = clamp_value(bf0[22] + bf0[41], stage_range[stage]);
- bf1[23] = clamp_value(bf0[23] + bf0[40], stage_range[stage]);
- bf1[24] = clamp_value(bf0[24] + bf0[39], stage_range[stage]);
- bf1[25] = clamp_value(bf0[25] + bf0[38], stage_range[stage]);
- bf1[26] = clamp_value(bf0[26] + bf0[37], stage_range[stage]);
- bf1[27] = clamp_value(bf0[27] + bf0[36], stage_range[stage]);
- bf1[28] = clamp_value(bf0[28] + bf0[35], stage_range[stage]);
- bf1[29] = clamp_value(bf0[29] + bf0[34], stage_range[stage]);
- bf1[30] = clamp_value(bf0[30] + bf0[33], stage_range[stage]);
- bf1[31] = clamp_value(bf0[31] + bf0[32], stage_range[stage]);
- bf1[32] = clamp_value(bf0[31] - bf0[32], stage_range[stage]);
- bf1[33] = clamp_value(bf0[30] - bf0[33], stage_range[stage]);
- bf1[34] = clamp_value(bf0[29] - bf0[34], stage_range[stage]);
- bf1[35] = clamp_value(bf0[28] - bf0[35], stage_range[stage]);
- bf1[36] = clamp_value(bf0[27] - bf0[36], stage_range[stage]);
- bf1[37] = clamp_value(bf0[26] - bf0[37], stage_range[stage]);
- bf1[38] = clamp_value(bf0[25] - bf0[38], stage_range[stage]);
- bf1[39] = clamp_value(bf0[24] - bf0[39], stage_range[stage]);
- bf1[40] = clamp_value(bf0[23] - bf0[40], stage_range[stage]);
- bf1[41] = clamp_value(bf0[22] - bf0[41], stage_range[stage]);
- bf1[42] = clamp_value(bf0[21] - bf0[42], stage_range[stage]);
- bf1[43] = clamp_value(bf0[20] - bf0[43], stage_range[stage]);
- bf1[44] = clamp_value(bf0[19] - bf0[44], stage_range[stage]);
- bf1[45] = clamp_value(bf0[18] - bf0[45], stage_range[stage]);
- bf1[46] = clamp_value(bf0[17] - bf0[46], stage_range[stage]);
- bf1[47] = clamp_value(bf0[16] - bf0[47], stage_range[stage]);
- bf1[48] = clamp_value(bf0[15] - bf0[48], stage_range[stage]);
- bf1[49] = clamp_value(bf0[14] - bf0[49], stage_range[stage]);
- bf1[50] = clamp_value(bf0[13] - bf0[50], stage_range[stage]);
- bf1[51] = clamp_value(bf0[12] - bf0[51], stage_range[stage]);
- bf1[52] = clamp_value(bf0[11] - bf0[52], stage_range[stage]);
- bf1[53] = clamp_value(bf0[10] - bf0[53], stage_range[stage]);
- bf1[54] = clamp_value(bf0[9] - bf0[54], stage_range[stage]);
- bf1[55] = clamp_value(bf0[8] - bf0[55], stage_range[stage]);
- bf1[56] = clamp_value(bf0[7] - bf0[56], stage_range[stage]);
- bf1[57] = clamp_value(bf0[6] - bf0[57], stage_range[stage]);
- bf1[58] = clamp_value(bf0[5] - bf0[58], stage_range[stage]);
- bf1[59] = clamp_value(bf0[4] - bf0[59], stage_range[stage]);
- bf1[60] = clamp_value(bf0[3] - bf0[60], stage_range[stage]);
- bf1[61] = clamp_value(bf0[2] - bf0[61], stage_range[stage]);
- bf1[62] = clamp_value(bf0[1] - bf0[62], stage_range[stage]);
- bf1[63] = clamp_value(bf0[0] - bf0[63], stage_range[stage]);
-}
diff --git a/third_party/aom/av1/common/av1_inv_txfm1d.h b/third_party/aom/av1/common/av1_inv_txfm1d.h
deleted file mode 100644
index c31c019aa..000000000
--- a/third_party/aom/av1/common/av1_inv_txfm1d.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_COMMON_AV1_INV_TXFM1D_H_
-#define AOM_AV1_COMMON_AV1_INV_TXFM1D_H_
-
-#include "av1/common/av1_txfm.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-static INLINE int32_t clamp_value(int32_t value, int8_t bit) {
- if (bit <= 0) return value; // Do nothing for invalid clamp bit.
- const int64_t max_value = (1LL << (bit - 1)) - 1;
- const int64_t min_value = -(1LL << (bit - 1));
- return (int32_t)clamp64(value, min_value, max_value);
-}
-
-static INLINE void clamp_buf(int32_t *buf, int32_t size, int8_t bit) {
- for (int i = 0; i < size; ++i) buf[i] = clamp_value(buf[i], bit);
-}
-
-void av1_idct4_new(const int32_t *input, int32_t *output, int8_t cos_bit,
- const int8_t *stage_range);
-void av1_idct8_new(const int32_t *input, int32_t *output, int8_t cos_bit,
- const int8_t *stage_range);
-void av1_idct16_new(const int32_t *input, int32_t *output, int8_t cos_bit,
- const int8_t *stage_range);
-void av1_idct32_new(const int32_t *input, int32_t *output, int8_t cos_bit,
- const int8_t *stage_range);
-void av1_idct64_new(const int32_t *input, int32_t *output, int8_t cos_bit,
- const int8_t *stage_range);
-void av1_iadst4_new(const int32_t *input, int32_t *output, int8_t cos_bit,
- const int8_t *stage_range);
-void av1_iadst8_new(const int32_t *input, int32_t *output, int8_t cos_bit,
- const int8_t *stage_range);
-void av1_iadst16_new(const int32_t *input, int32_t *output, int8_t cos_bit,
- const int8_t *stage_range);
-void av1_iidentity4_c(const int32_t *input, int32_t *output, int8_t cos_bit,
- const int8_t *stage_range);
-void av1_iidentity8_c(const int32_t *input, int32_t *output, int8_t cos_bit,
- const int8_t *stage_range);
-void av1_iidentity16_c(const int32_t *input, int32_t *output, int8_t cos_bit,
- const int8_t *stage_range);
-void av1_iidentity32_c(const int32_t *input, int32_t *output, int8_t cos_bit,
- const int8_t *stage_range);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // AOM_AV1_COMMON_AV1_INV_TXFM1D_H_
diff --git a/third_party/aom/av1/common/av1_inv_txfm1d_cfg.h b/third_party/aom/av1/common/av1_inv_txfm1d_cfg.h
deleted file mode 100644
index 7d80a0099..000000000
--- a/third_party/aom/av1/common/av1_inv_txfm1d_cfg.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_COMMON_AV1_INV_TXFM1D_CFG_H_
-#define AOM_AV1_COMMON_AV1_INV_TXFM1D_CFG_H_
-#include "av1/common/av1_inv_txfm1d.h"
-
-// sum of fwd_shift_##
-static const int8_t inv_start_range[TX_SIZES_ALL] = {
- 5, // 4x4 transform
- 6, // 8x8 transform
- 7, // 16x16 transform
- 7, // 32x32 transform
- 7, // 64x64 transform
- 5, // 4x8 transform
- 5, // 8x4 transform
- 6, // 8x16 transform
- 6, // 16x8 transform
- 6, // 16x32 transform
- 6, // 32x16 transform
- 6, // 32x64 transform
- 6, // 64x32 transform
- 6, // 4x16 transform
- 6, // 16x4 transform
- 7, // 8x32 transform
- 7, // 32x8 transform
- 7, // 16x64 transform
- 7, // 64x16 transform
-};
-
-extern const int8_t *inv_txfm_shift_ls[TX_SIZES_ALL];
-
-// Values in both inv_cos_bit_col and inv_cos_bit_row are always 12
-// for each valid row and col combination
-#define INV_COS_BIT 12
-extern const int8_t inv_cos_bit_col[5 /*row*/][5 /*col*/];
-extern const int8_t inv_cos_bit_row[5 /*row*/][5 /*col*/];
-
-#endif // AOM_AV1_COMMON_AV1_INV_TXFM1D_CFG_H_
diff --git a/third_party/aom/av1/common/av1_inv_txfm2d.c b/third_party/aom/av1/common/av1_inv_txfm2d.c
deleted file mode 100644
index 4e6944314..000000000
--- a/third_party/aom/av1/common/av1_inv_txfm2d.c
+++ /dev/null
@@ -1,505 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "config/aom_dsp_rtcd.h"
-#include "config/av1_rtcd.h"
-
-#include "av1/common/enums.h"
-#include "av1/common/av1_txfm.h"
-#include "av1/common/av1_inv_txfm1d.h"
-#include "av1/common/av1_inv_txfm1d_cfg.h"
-
-void av1_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
- int stride, int bd) {
- /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds,
- 0.5 shifts per pixel. */
- int i;
- tran_low_t output[16];
- tran_low_t a1, b1, c1, d1, e1;
- const tran_low_t *ip = input;
- tran_low_t *op = output;
- uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
-
- for (i = 0; i < 4; i++) {
- a1 = ip[0] >> UNIT_QUANT_SHIFT;
- c1 = ip[1] >> UNIT_QUANT_SHIFT;
- d1 = ip[2] >> UNIT_QUANT_SHIFT;
- b1 = ip[3] >> UNIT_QUANT_SHIFT;
- a1 += c1;
- d1 -= b1;
- e1 = (a1 - d1) >> 1;
- b1 = e1 - b1;
- c1 = e1 - c1;
- a1 -= b1;
- d1 += c1;
-
- op[0] = a1;
- op[1] = b1;
- op[2] = c1;
- op[3] = d1;
- ip += 4;
- op += 4;
- }
-
- ip = output;
- for (i = 0; i < 4; i++) {
- a1 = ip[4 * 0];
- c1 = ip[4 * 1];
- d1 = ip[4 * 2];
- b1 = ip[4 * 3];
- a1 += c1;
- d1 -= b1;
- e1 = (a1 - d1) >> 1;
- b1 = e1 - b1;
- c1 = e1 - c1;
- a1 -= b1;
- d1 += c1;
-
- range_check_value(a1, bd + 1);
- range_check_value(b1, bd + 1);
- range_check_value(c1, bd + 1);
- range_check_value(d1, bd + 1);
-
- dest[stride * 0] = highbd_clip_pixel_add(dest[stride * 0], a1, bd);
- dest[stride * 1] = highbd_clip_pixel_add(dest[stride * 1], b1, bd);
- dest[stride * 2] = highbd_clip_pixel_add(dest[stride * 2], c1, bd);
- dest[stride * 3] = highbd_clip_pixel_add(dest[stride * 3], d1, bd);
-
- ip++;
- dest++;
- }
-}
-
-void av1_highbd_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest8,
- int dest_stride, int bd) {
- int i;
- tran_low_t a1, e1;
- tran_low_t tmp[4];
- const tran_low_t *ip = in;
- tran_low_t *op = tmp;
- uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
- (void)bd;
-
- a1 = ip[0] >> UNIT_QUANT_SHIFT;
- e1 = a1 >> 1;
- a1 -= e1;
- op[0] = a1;
- op[1] = op[2] = op[3] = e1;
-
- ip = tmp;
- for (i = 0; i < 4; i++) {
- e1 = ip[0] >> 1;
- a1 = ip[0] - e1;
- dest[dest_stride * 0] =
- highbd_clip_pixel_add(dest[dest_stride * 0], a1, bd);
- dest[dest_stride * 1] =
- highbd_clip_pixel_add(dest[dest_stride * 1], e1, bd);
- dest[dest_stride * 2] =
- highbd_clip_pixel_add(dest[dest_stride * 2], e1, bd);
- dest[dest_stride * 3] =
- highbd_clip_pixel_add(dest[dest_stride * 3], e1, bd);
- ip++;
- dest++;
- }
-}
-
-static INLINE TxfmFunc inv_txfm_type_to_func(TXFM_TYPE txfm_type) {
- switch (txfm_type) {
- case TXFM_TYPE_DCT4: return av1_idct4_new;
- case TXFM_TYPE_DCT8: return av1_idct8_new;
- case TXFM_TYPE_DCT16: return av1_idct16_new;
- case TXFM_TYPE_DCT32: return av1_idct32_new;
- case TXFM_TYPE_DCT64: return av1_idct64_new;
- case TXFM_TYPE_ADST4: return av1_iadst4_new;
- case TXFM_TYPE_ADST8: return av1_iadst8_new;
- case TXFM_TYPE_ADST16: return av1_iadst16_new;
- case TXFM_TYPE_IDENTITY4: return av1_iidentity4_c;
- case TXFM_TYPE_IDENTITY8: return av1_iidentity8_c;
- case TXFM_TYPE_IDENTITY16: return av1_iidentity16_c;
- case TXFM_TYPE_IDENTITY32: return av1_iidentity32_c;
- default: assert(0); return NULL;
- }
-}
-
-static const int8_t inv_shift_4x4[2] = { 0, -4 };
-static const int8_t inv_shift_8x8[2] = { -1, -4 };
-static const int8_t inv_shift_16x16[2] = { -2, -4 };
-static const int8_t inv_shift_32x32[2] = { -2, -4 };
-static const int8_t inv_shift_64x64[2] = { -2, -4 };
-static const int8_t inv_shift_4x8[2] = { 0, -4 };
-static const int8_t inv_shift_8x4[2] = { 0, -4 };
-static const int8_t inv_shift_8x16[2] = { -1, -4 };
-static const int8_t inv_shift_16x8[2] = { -1, -4 };
-static const int8_t inv_shift_16x32[2] = { -1, -4 };
-static const int8_t inv_shift_32x16[2] = { -1, -4 };
-static const int8_t inv_shift_32x64[2] = { -1, -4 };
-static const int8_t inv_shift_64x32[2] = { -1, -4 };
-static const int8_t inv_shift_4x16[2] = { -1, -4 };
-static const int8_t inv_shift_16x4[2] = { -1, -4 };
-static const int8_t inv_shift_8x32[2] = { -2, -4 };
-static const int8_t inv_shift_32x8[2] = { -2, -4 };
-static const int8_t inv_shift_16x64[2] = { -2, -4 };
-static const int8_t inv_shift_64x16[2] = { -2, -4 };
-
-const int8_t *inv_txfm_shift_ls[TX_SIZES_ALL] = {
- inv_shift_4x4, inv_shift_8x8, inv_shift_16x16, inv_shift_32x32,
- inv_shift_64x64, inv_shift_4x8, inv_shift_8x4, inv_shift_8x16,
- inv_shift_16x8, inv_shift_16x32, inv_shift_32x16, inv_shift_32x64,
- inv_shift_64x32, inv_shift_4x16, inv_shift_16x4, inv_shift_8x32,
- inv_shift_32x8, inv_shift_16x64, inv_shift_64x16,
-};
-
-/* clang-format off */
-const int8_t inv_cos_bit_col[MAX_TXWH_IDX] // txw_idx
- [MAX_TXWH_IDX] = { // txh_idx
- { INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, 0, 0 },
- { INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, 0 },
- { INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT },
- { 0, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT },
- { 0, 0, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT }
- };
-
-const int8_t inv_cos_bit_row[MAX_TXWH_IDX] // txw_idx
- [MAX_TXWH_IDX] = { // txh_idx
- { INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, 0, 0 },
- { INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, 0 },
- { INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT },
- { 0, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT },
- { 0, 0, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT }
- };
-/* clang-format on */
-
-const int8_t iadst4_range[7] = { 0, 1, 0, 0, 0, 0, 0 };
-
-void av1_get_inv_txfm_cfg(TX_TYPE tx_type, TX_SIZE tx_size,
- TXFM_2D_FLIP_CFG *cfg) {
- assert(cfg != NULL);
- cfg->tx_size = tx_size;
- set_flip_cfg(tx_type, cfg);
- av1_zero(cfg->stage_range_col);
- av1_zero(cfg->stage_range_row);
- set_flip_cfg(tx_type, cfg);
- const TX_TYPE_1D tx_type_1d_col = vtx_tab[tx_type];
- const TX_TYPE_1D tx_type_1d_row = htx_tab[tx_type];
- cfg->shift = inv_txfm_shift_ls[tx_size];
- const int txw_idx = get_txw_idx(tx_size);
- const int txh_idx = get_txh_idx(tx_size);
- cfg->cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
- cfg->cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
- cfg->txfm_type_col = av1_txfm_type_ls[txh_idx][tx_type_1d_col];
- if (cfg->txfm_type_col == TXFM_TYPE_ADST4) {
- memcpy(cfg->stage_range_col, iadst4_range, sizeof(iadst4_range));
- }
- cfg->txfm_type_row = av1_txfm_type_ls[txw_idx][tx_type_1d_row];
- if (cfg->txfm_type_row == TXFM_TYPE_ADST4) {
- memcpy(cfg->stage_range_row, iadst4_range, sizeof(iadst4_range));
- }
- cfg->stage_num_col = av1_txfm_stage_num_list[cfg->txfm_type_col];
- cfg->stage_num_row = av1_txfm_stage_num_list[cfg->txfm_type_row];
-}
-
-void av1_gen_inv_stage_range(int8_t *stage_range_col, int8_t *stage_range_row,
- const TXFM_2D_FLIP_CFG *cfg, TX_SIZE tx_size,
- int bd) {
- const int fwd_shift = inv_start_range[tx_size];
- const int8_t *shift = cfg->shift;
- int8_t opt_range_row, opt_range_col;
- if (bd == 8) {
- opt_range_row = 16;
- opt_range_col = 16;
- } else if (bd == 10) {
- opt_range_row = 18;
- opt_range_col = 16;
- } else {
- assert(bd == 12);
- opt_range_row = 20;
- opt_range_col = 18;
- }
- // i < MAX_TXFM_STAGE_NUM will mute above array bounds warning
- for (int i = 0; i < cfg->stage_num_row && i < MAX_TXFM_STAGE_NUM; ++i) {
- int real_range_row = cfg->stage_range_row[i] + fwd_shift + bd + 1;
- (void)real_range_row;
- if (cfg->txfm_type_row == TXFM_TYPE_ADST4 && i == 1) {
- // the adst4 may use 1 extra bit on top of opt_range_row at stage 1
- // so opt_range_col >= real_range_col will not hold
- stage_range_row[i] = opt_range_row;
- } else {
- assert(opt_range_row >= real_range_row);
- stage_range_row[i] = opt_range_row;
- }
- }
- // i < MAX_TXFM_STAGE_NUM will mute above array bounds warning
- for (int i = 0; i < cfg->stage_num_col && i < MAX_TXFM_STAGE_NUM; ++i) {
- int real_range_col =
- cfg->stage_range_col[i] + fwd_shift + shift[0] + bd + 1;
- (void)real_range_col;
- if (cfg->txfm_type_col == TXFM_TYPE_ADST4 && i == 1) {
- // the adst4 may use 1 extra bit on top of opt_range_row at stage 1
- // so opt_range_col >= real_range_col will not hold
- stage_range_col[i] = opt_range_col;
- } else {
- assert(opt_range_col >= real_range_col);
- stage_range_col[i] = opt_range_col;
- }
- }
-}
-
-static INLINE void inv_txfm2d_add_c(const int32_t *input, uint16_t *output,
- int stride, TXFM_2D_FLIP_CFG *cfg,
- int32_t *txfm_buf, TX_SIZE tx_size,
- int bd) {
- // Note when assigning txfm_size_col, we use the txfm_size from the
- // row configuration and vice versa. This is intentionally done to
- // accurately perform rectangular transforms. When the transform is
- // rectangular, the number of columns will be the same as the
- // txfm_size stored in the row cfg struct. It will make no difference
- // for square transforms.
- const int txfm_size_col = tx_size_wide[cfg->tx_size];
- const int txfm_size_row = tx_size_high[cfg->tx_size];
- // Take the shift from the larger dimension in the rectangular case.
- const int8_t *shift = cfg->shift;
- const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
- int8_t stage_range_row[MAX_TXFM_STAGE_NUM];
- int8_t stage_range_col[MAX_TXFM_STAGE_NUM];
- assert(cfg->stage_num_row <= MAX_TXFM_STAGE_NUM);
- assert(cfg->stage_num_col <= MAX_TXFM_STAGE_NUM);
- av1_gen_inv_stage_range(stage_range_col, stage_range_row, cfg, tx_size, bd);
-
- const int8_t cos_bit_col = cfg->cos_bit_col;
- const int8_t cos_bit_row = cfg->cos_bit_row;
- const TxfmFunc txfm_func_col = inv_txfm_type_to_func(cfg->txfm_type_col);
- const TxfmFunc txfm_func_row = inv_txfm_type_to_func(cfg->txfm_type_row);
-
- // txfm_buf's length is txfm_size_row * txfm_size_col + 2 *
- // AOMMAX(txfm_size_row, txfm_size_col)
- // it is used for intermediate data buffering
- const int buf_offset = AOMMAX(txfm_size_row, txfm_size_col);
- int32_t *temp_in = txfm_buf;
- int32_t *temp_out = temp_in + buf_offset;
- int32_t *buf = temp_out + buf_offset;
- int32_t *buf_ptr = buf;
- int c, r;
-
- // Rows
- for (r = 0; r < txfm_size_row; ++r) {
- if (abs(rect_type) == 1) {
- for (c = 0; c < txfm_size_col; ++c) {
- temp_in[c] = round_shift((int64_t)input[c] * NewInvSqrt2, NewSqrt2Bits);
- }
- clamp_buf(temp_in, txfm_size_col, bd + 8);
- txfm_func_row(temp_in, buf_ptr, cos_bit_row, stage_range_row);
- } else {
- for (c = 0; c < txfm_size_col; ++c) {
- temp_in[c] = input[c];
- }
- clamp_buf(temp_in, txfm_size_col, bd + 8);
- txfm_func_row(temp_in, buf_ptr, cos_bit_row, stage_range_row);
- }
- av1_round_shift_array(buf_ptr, txfm_size_col, -shift[0]);
- input += txfm_size_col;
- buf_ptr += txfm_size_col;
- }
-
- // Columns
- for (c = 0; c < txfm_size_col; ++c) {
- if (cfg->lr_flip == 0) {
- for (r = 0; r < txfm_size_row; ++r)
- temp_in[r] = buf[r * txfm_size_col + c];
- } else {
- // flip left right
- for (r = 0; r < txfm_size_row; ++r)
- temp_in[r] = buf[r * txfm_size_col + (txfm_size_col - c - 1)];
- }
- clamp_buf(temp_in, txfm_size_row, AOMMAX(bd + 6, 16));
- txfm_func_col(temp_in, temp_out, cos_bit_col, stage_range_col);
- av1_round_shift_array(temp_out, txfm_size_row, -shift[1]);
- if (cfg->ud_flip == 0) {
- for (r = 0; r < txfm_size_row; ++r) {
- output[r * stride + c] =
- highbd_clip_pixel_add(output[r * stride + c], temp_out[r], bd);
- }
- } else {
- // flip upside down
- for (r = 0; r < txfm_size_row; ++r) {
- output[r * stride + c] = highbd_clip_pixel_add(
- output[r * stride + c], temp_out[txfm_size_row - r - 1], bd);
- }
- }
- }
-}
-
-static INLINE void inv_txfm2d_add_facade(const int32_t *input, uint16_t *output,
- int stride, int32_t *txfm_buf,
- TX_TYPE tx_type, TX_SIZE tx_size,
- int bd) {
- TXFM_2D_FLIP_CFG cfg;
- av1_get_inv_txfm_cfg(tx_type, tx_size, &cfg);
- // Forward shift sum uses larger square size, to be consistent with what
- // av1_gen_inv_stage_range() does for inverse shifts.
- inv_txfm2d_add_c(input, output, stride, &cfg, txfm_buf, tx_size, bd);
-}
-
-void av1_inv_txfm2d_add_4x8_c(const int32_t *input, uint16_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- DECLARE_ALIGNED(32, int, txfm_buf[4 * 8 + 8 + 8]);
- inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_4X8, bd);
-}
-
-void av1_inv_txfm2d_add_8x4_c(const int32_t *input, uint16_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- DECLARE_ALIGNED(32, int, txfm_buf[8 * 4 + 8 + 8]);
- inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_8X4, bd);
-}
-
-void av1_inv_txfm2d_add_8x16_c(const int32_t *input, uint16_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- DECLARE_ALIGNED(32, int, txfm_buf[8 * 16 + 16 + 16]);
- inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_8X16, bd);
-}
-
-void av1_inv_txfm2d_add_16x8_c(const int32_t *input, uint16_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- DECLARE_ALIGNED(32, int, txfm_buf[16 * 8 + 16 + 16]);
- inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_16X8, bd);
-}
-
-void av1_inv_txfm2d_add_16x32_c(const int32_t *input, uint16_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- DECLARE_ALIGNED(32, int, txfm_buf[16 * 32 + 32 + 32]);
- inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_16X32, bd);
-}
-
-void av1_inv_txfm2d_add_32x16_c(const int32_t *input, uint16_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- DECLARE_ALIGNED(32, int, txfm_buf[32 * 16 + 32 + 32]);
- inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_32X16, bd);
-}
-
-void av1_inv_txfm2d_add_4x4_c(const int32_t *input, uint16_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- DECLARE_ALIGNED(32, int, txfm_buf[4 * 4 + 4 + 4]);
- inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_4X4, bd);
-}
-
-void av1_inv_txfm2d_add_8x8_c(const int32_t *input, uint16_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- DECLARE_ALIGNED(32, int, txfm_buf[8 * 8 + 8 + 8]);
- inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_8X8, bd);
-}
-
-void av1_inv_txfm2d_add_16x16_c(const int32_t *input, uint16_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- DECLARE_ALIGNED(32, int, txfm_buf[16 * 16 + 16 + 16]);
- inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_16X16, bd);
-}
-
-void av1_inv_txfm2d_add_32x32_c(const int32_t *input, uint16_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- DECLARE_ALIGNED(32, int, txfm_buf[32 * 32 + 32 + 32]);
- inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_32X32, bd);
-}
-
-void av1_inv_txfm2d_add_64x64_c(const int32_t *input, uint16_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- // TODO(urvang): Can the same array be reused, instead of using a new array?
- // Remap 32x32 input into a modified 64x64 by:
- // - Copying over these values in top-left 32x32 locations.
- // - Setting the rest of the locations to 0.
- int32_t mod_input[64 * 64];
- for (int row = 0; row < 32; ++row) {
- memcpy(mod_input + row * 64, input + row * 32, 32 * sizeof(*mod_input));
- memset(mod_input + row * 64 + 32, 0, 32 * sizeof(*mod_input));
- }
- memset(mod_input + 32 * 64, 0, 32 * 64 * sizeof(*mod_input));
- DECLARE_ALIGNED(32, int, txfm_buf[64 * 64 + 64 + 64]);
- inv_txfm2d_add_facade(mod_input, output, stride, txfm_buf, tx_type, TX_64X64,
- bd);
-}
-
-void av1_inv_txfm2d_add_64x32_c(const int32_t *input, uint16_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- // Remap 32x32 input into a modified 64x32 by:
- // - Copying over these values in top-left 32x32 locations.
- // - Setting the rest of the locations to 0.
- int32_t mod_input[64 * 32];
- for (int row = 0; row < 32; ++row) {
- memcpy(mod_input + row * 64, input + row * 32, 32 * sizeof(*mod_input));
- memset(mod_input + row * 64 + 32, 0, 32 * sizeof(*mod_input));
- }
- DECLARE_ALIGNED(32, int, txfm_buf[64 * 32 + 64 + 64]);
- inv_txfm2d_add_facade(mod_input, output, stride, txfm_buf, tx_type, TX_64X32,
- bd);
-}
-
-void av1_inv_txfm2d_add_32x64_c(const int32_t *input, uint16_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- // Remap 32x32 input into a modified 32x64 input by:
- // - Copying over these values in top-left 32x32 locations.
- // - Setting the rest of the locations to 0.
- int32_t mod_input[32 * 64];
- memcpy(mod_input, input, 32 * 32 * sizeof(*mod_input));
- memset(mod_input + 32 * 32, 0, 32 * 32 * sizeof(*mod_input));
- DECLARE_ALIGNED(32, int, txfm_buf[64 * 32 + 64 + 64]);
- inv_txfm2d_add_facade(mod_input, output, stride, txfm_buf, tx_type, TX_32X64,
- bd);
-}
-
-void av1_inv_txfm2d_add_16x64_c(const int32_t *input, uint16_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- // Remap 16x32 input into a modified 16x64 input by:
- // - Copying over these values in top-left 16x32 locations.
- // - Setting the rest of the locations to 0.
- int32_t mod_input[16 * 64];
- memcpy(mod_input, input, 16 * 32 * sizeof(*mod_input));
- memset(mod_input + 16 * 32, 0, 16 * 32 * sizeof(*mod_input));
- DECLARE_ALIGNED(32, int, txfm_buf[16 * 64 + 64 + 64]);
- inv_txfm2d_add_facade(mod_input, output, stride, txfm_buf, tx_type, TX_16X64,
- bd);
-}
-
-void av1_inv_txfm2d_add_64x16_c(const int32_t *input, uint16_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- // Remap 32x16 input into a modified 64x16 by:
- // - Copying over these values in top-left 32x16 locations.
- // - Setting the rest of the locations to 0.
- int32_t mod_input[64 * 16];
- for (int row = 0; row < 16; ++row) {
- memcpy(mod_input + row * 64, input + row * 32, 32 * sizeof(*mod_input));
- memset(mod_input + row * 64 + 32, 0, 32 * sizeof(*mod_input));
- }
- DECLARE_ALIGNED(32, int, txfm_buf[16 * 64 + 64 + 64]);
- inv_txfm2d_add_facade(mod_input, output, stride, txfm_buf, tx_type, TX_64X16,
- bd);
-}
-
-void av1_inv_txfm2d_add_4x16_c(const int32_t *input, uint16_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- DECLARE_ALIGNED(32, int, txfm_buf[4 * 16 + 16 + 16]);
- inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_4X16, bd);
-}
-
-void av1_inv_txfm2d_add_16x4_c(const int32_t *input, uint16_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- DECLARE_ALIGNED(32, int, txfm_buf[4 * 16 + 16 + 16]);
- inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_16X4, bd);
-}
-
-void av1_inv_txfm2d_add_8x32_c(const int32_t *input, uint16_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- DECLARE_ALIGNED(32, int, txfm_buf[8 * 32 + 32 + 32]);
- inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_8X32, bd);
-}
-
-void av1_inv_txfm2d_add_32x8_c(const int32_t *input, uint16_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- DECLARE_ALIGNED(32, int, txfm_buf[8 * 32 + 32 + 32]);
- inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_32X8, bd);
-}
diff --git a/third_party/aom/av1/common/av1_loopfilter.c b/third_party/aom/av1/common/av1_loopfilter.c
deleted file mode 100644
index 537d8dfe9..000000000
--- a/third_party/aom/av1/common/av1_loopfilter.c
+++ /dev/null
@@ -1,2377 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <math.h>
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_mem/aom_mem.h"
-#include "aom_ports/mem.h"
-#include "av1/common/av1_loopfilter.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/common/reconinter.h"
-#include "av1/common/seg_common.h"
-
-static const SEG_LVL_FEATURES seg_lvl_lf_lut[MAX_MB_PLANE][2] = {
- { SEG_LVL_ALT_LF_Y_V, SEG_LVL_ALT_LF_Y_H },
- { SEG_LVL_ALT_LF_U, SEG_LVL_ALT_LF_U },
- { SEG_LVL_ALT_LF_V, SEG_LVL_ALT_LF_V }
-};
-
-static const int delta_lf_id_lut[MAX_MB_PLANE][2] = {
- { 0, 1 }, { 2, 2 }, { 3, 3 }
-};
-
-typedef enum EDGE_DIR { VERT_EDGE = 0, HORZ_EDGE = 1, NUM_EDGE_DIRS } EDGE_DIR;
-
-static const int mode_lf_lut[] = {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // INTRA_MODES
- 1, 1, 0, 1, // INTER_MODES (GLOBALMV == 0)
- 1, 1, 1, 1, 1, 1, 0, 1 // INTER_COMPOUND_MODES (GLOBAL_GLOBALMV == 0)
-};
-
-#if LOOP_FILTER_BITMASK
-// 256 bit masks (64x64 / 4x4) for left transform size for Y plane.
-// We use 4 uint64_t to represent the 256 bit.
-// Each 1 represents a position where we should apply a loop filter
-// across the left border of an 4x4 block boundary.
-//
-// In the case of TX_8x8-> ( in low order byte first we end up with
-// a mask that looks like this (-- and | are used for better view)
-//
-// 10101010|10101010
-// 10101010|10101010
-// 10101010|10101010
-// 10101010|10101010
-// 10101010|10101010
-// 10101010|10101010
-// 10101010|10101010
-// 10101010|10101010
-// -----------------
-// 10101010|10101010
-// 10101010|10101010
-// 10101010|10101010
-// 10101010|10101010
-// 10101010|10101010
-// 10101010|10101010
-// 10101010|10101010
-// 10101010|10101010
-//
-// A loopfilter should be applied to every other 4x4 horizontally.
-
-// 256 bit masks (64x64 / 4x4) for above transform size for Y plane.
-// We use 4 uint64_t to represent the 256 bit.
-// Each 1 represents a position where we should apply a loop filter
-// across the top border of an 4x4 block boundary.
-//
-// In the case of TX_8x8-> ( in low order byte first we end up with
-// a mask that looks like this
-//
-// 11111111|11111111
-// 00000000|00000000
-// 11111111|11111111
-// 00000000|00000000
-// 11111111|11111111
-// 00000000|00000000
-// 11111111|11111111
-// 00000000|00000000
-// -----------------
-// 11111111|11111111
-// 00000000|00000000
-// 11111111|11111111
-// 00000000|00000000
-// 11111111|11111111
-// 00000000|00000000
-// 11111111|11111111
-// 00000000|00000000
-//
-// A loopfilter should be applied to every other 4x4 horizontally.
-
-const int mask_id_table_tx_4x4[BLOCK_SIZES_ALL] = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, -1, -1, -1, 13, 14, 15, 16, 17, 18
-};
-
-const int mask_id_table_tx_8x8[BLOCK_SIZES_ALL] = {
- -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, 10, 11, 12, 13
-};
-
-const int mask_id_table_tx_16x16[BLOCK_SIZES_ALL] = {
- -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, -1, -1, -1, -1, -1, -1, -1, 7, 8
-};
-
-const int mask_id_table_tx_32x32[BLOCK_SIZES_ALL] = { -1, -1, -1, -1, -1, -1,
- -1, -1, -1, 0, 1, 2,
- 3, -1, -1, -1, -1, -1,
- -1, -1, -1, -1 };
-
-const FilterMask left_mask_univariant_reordered[67] = {
- // TX_4X4
- { { 0x0000000000000001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 4X4, TX_4X4
- { { 0x0000000000010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 4X8, TX_4X4
- { { 0x0000000000000003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 8X4, TX_4X4
- { { 0x0000000000030003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 8X8, TX_4X4
- { { 0x0003000300030003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 8X16, TX_4X4
- { { 0x00000000000f000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 16X8, TX_4X4
- { { 0x000f000f000f000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 16X16, TX_4X4
- { { 0x000f000f000f000fULL, 0x000f000f000f000fULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 16X32, TX_4X4
- { { 0x00ff00ff00ff00ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 32X16, TX_4X4
- { { 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 32X32, TX_4X4
- { { 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL,
- 0x00ff00ff00ff00ffULL } }, // block size 32X64, TX_4X4
- { { 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 64X32, TX_4X4
- { { 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL,
- 0xffffffffffffffffULL } }, // block size 64X64, TX_4X4
- { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 4X16, TX_4X4
- { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 16X4, TX_4X4
- { { 0x0003000300030003ULL, 0x0003000300030003ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 8X32, TX_4X4
- { { 0x0000000000ff00ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 32X8, TX_4X4
- { { 0x000f000f000f000fULL, 0x000f000f000f000fULL, 0x000f000f000f000fULL,
- 0x000f000f000f000fULL } }, // block size 16X64, TX_4X4
- { { 0xffffffffffffffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 64X16, TX_4X4
- // TX_8X8
- { { 0x0000000000010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 8X8, TX_8X8
- { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 8X16, TX_8X8
- { { 0x0000000000050005ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 16X8, TX_8X8
- { { 0x0005000500050005ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 16X16, TX_8X8
- { { 0x0005000500050005ULL, 0x0005000500050005ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 16X32, TX_8X8
- { { 0x0055005500550055ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 32X16, TX_8X8
- { { 0x0055005500550055ULL, 0x0055005500550055ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 32X32, TX_8X8
- { { 0x0055005500550055ULL, 0x0055005500550055ULL, 0x0055005500550055ULL,
- 0x0055005500550055ULL } }, // block size 32X64, TX_8X8
- { { 0x5555555555555555ULL, 0x5555555555555555ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 64X32, TX_8X8
- { { 0x5555555555555555ULL, 0x5555555555555555ULL, 0x5555555555555555ULL,
- 0x5555555555555555ULL } }, // block size 64X64, TX_8X8
- { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 8X32, TX_8X8
- { { 0x0000000000550055ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 32X8, TX_8X8
- { { 0x0005000500050005ULL, 0x0005000500050005ULL, 0x0005000500050005ULL,
- 0x0005000500050005ULL } }, // block size 16X64, TX_8X8
- { { 0x5555555555555555ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 64X16, TX_8X8
- // TX_16X16
- { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 16X16, TX_16X16
- { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 16X32, TX_16X16
- { { 0x0011001100110011ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 32X16, TX_16X16
- { { 0x0011001100110011ULL, 0x0011001100110011ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 32X32, TX_16X16
- { { 0x0011001100110011ULL, 0x0011001100110011ULL, 0x0011001100110011ULL,
- 0x0011001100110011ULL } }, // block size 32X64, TX_16X16
- { { 0x1111111111111111ULL, 0x1111111111111111ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 64X32, TX_16X16
- { { 0x1111111111111111ULL, 0x1111111111111111ULL, 0x1111111111111111ULL,
- 0x1111111111111111ULL } }, // block size 64X64, TX_16X16
- { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0001000100010001ULL,
- 0x0001000100010001ULL } }, // block size 16X64, TX_16X16
- { { 0x1111111111111111ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 64X16, TX_16X16
- // TX_32X32
- { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 32X32, TX_32X32
- { { 0x0101010101010101ULL, 0x0101010101010101ULL, 0x0101010101010101ULL,
- 0x0101010101010101ULL } }, // block size 32X64, TX_32X32
- { { 0x0101010101010101ULL, 0x0101010101010101ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 64X32, TX_32X32
- { { 0x0101010101010101ULL, 0x0101010101010101ULL, 0x0101010101010101ULL,
- 0x0101010101010101ULL } }, // block size 64X64, TX_32X32
- // TX_64X64
- { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0001000100010001ULL,
- 0x0001000100010001ULL } }, // block size 64X64, TX_64X64
- // 2:1, 1:2 transform sizes.
- { { 0x0000000000010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 4X8, TX_4X8
- { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 4X16, TX_4X8
- { { 0x0000000000000001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 8X4, TX_8X4
- { { 0x0000000000000005ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 16X4, TX_8X4
- { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 8X16, TX_8X16
- { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 8X32, TX_8X16
- { { 0x0000000000010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 16X8, TX_16X8
- { { 0x0000000000110011ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 32X8, TX_16X8
- { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 16X32, TX_16X32
- { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0001000100010001ULL,
- 0x0001000100010001ULL } }, // block size 16X64, TX_16X32
- { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 32X16, TX_32X16
- { { 0x0101010101010101ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 64X16, TX_32X16
- { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0001000100010001ULL,
- 0x0001000100010001ULL } }, // block size 32X64, TX_32X64
- { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 64X32, TX_64X32
- // 4:1, 1:4 transform sizes.
- { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 4X16, TX_4X16
- { { 0x0000000000000001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 16X4, TX_16X4
- { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 8X32, TX_8X32
- { { 0x0000000000010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 32X8, TX_32X8
- { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0001000100010001ULL,
- 0x0001000100010001ULL } }, // block size 16X64, TX_16X64
- { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 64X16, TX_64X16
-};
-
-const FilterMask above_mask_univariant_reordered[67] = {
- // TX_4X4
- { { 0x0000000000000001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 4X4, TX_4X4
- { { 0x0000000000010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 4X8, TX_4X4
- { { 0x0000000000000003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 8X4, TX_4X4
- { { 0x0000000000030003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 8X8, TX_4X4
- { { 0x0003000300030003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 8X16, TX_4X4
- { { 0x00000000000f000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 16X8, TX_4X4
- { { 0x000f000f000f000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 16X16, TX_4X4
- { { 0x000f000f000f000fULL, 0x000f000f000f000fULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 16X32, TX_4X4
- { { 0x00ff00ff00ff00ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 32X16, TX_4X4
- { { 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 32X32, TX_4X4
- { { 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL,
- 0x00ff00ff00ff00ffULL } }, // block size 32X64, TX_4X4
- { { 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 64X32, TX_4X4
- { { 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL,
- 0xffffffffffffffffULL } }, // block size 64X64, TX_4x4
- { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 4X16, TX_4X4
- { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 16X4, TX_4X4
- { { 0x0003000300030003ULL, 0x0003000300030003ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 8X32, TX_4X4
- { { 0x0000000000ff00ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 32X8, TX_4X4
- { { 0x000f000f000f000fULL, 0x000f000f000f000fULL, 0x000f000f000f000fULL,
- 0x000f000f000f000fULL } }, // block size 16X64, TX_4X4
- { { 0xffffffffffffffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 64X16, TX_4X4
- // TX_8X8
- { { 0x0000000000000003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 8X8, TX_8X8
- { { 0x0000000300000003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 8X16, TX_8X8
- { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 16X8, TX_8X8
- { { 0x0000000f0000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 16X16, TX_8X8
- { { 0x0000000f0000000fULL, 0x0000000f0000000fULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 16X32, TX_8X8
- { { 0x000000ff000000ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 32X16, TX_8X8
- { { 0x000000ff000000ffULL, 0x000000ff000000ffULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 32X32, TX_8X8
- { { 0x000000ff000000ffULL, 0x000000ff000000ffULL, 0x000000ff000000ffULL,
- 0x000000ff000000ffULL } }, // block size 32X64, TX_8X8
- { { 0x0000ffff0000ffffULL, 0x0000ffff0000ffffULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 64X32, TX_8X8
- { { 0x0000ffff0000ffffULL, 0x0000ffff0000ffffULL, 0x0000ffff0000ffffULL,
- 0x0000ffff0000ffffULL } }, // block size 64X64, TX_8X8
- { { 0x0000000300000003ULL, 0x0000000300000003ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 8X32, TX_8X8
- { { 0x00000000000000ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 32X8, TX_8X8
- { { 0x0000000f0000000fULL, 0x0000000f0000000fULL, 0x0000000f0000000fULL,
- 0x0000000f0000000fULL } }, // block size 16X64, TX_8X8
- { { 0x0000ffff0000ffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 64X16, TX_8X8
- // TX_16X16
- { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 16X16, TX_16X16
- { { 0x000000000000000fULL, 0x000000000000000fULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 16X32, TX_16X16
- { { 0x00000000000000ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 32X16, TX_16X16
- { { 0x00000000000000ffULL, 0x00000000000000ffULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 32X32, TX_16X16
- { { 0x00000000000000ffULL, 0x00000000000000ffULL, 0x00000000000000ffULL,
- 0x00000000000000ffULL } }, // block size 32X64, TX_16X16
- { { 0x000000000000ffffULL, 0x000000000000ffffULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 64X32, TX_16X16
- { { 0x000000000000ffffULL, 0x000000000000ffffULL, 0x000000000000ffffULL,
- 0x000000000000ffffULL } }, // block size 64X64, TX_16X16
- { { 0x000000000000000fULL, 0x000000000000000fULL, 0x000000000000000fULL,
- 0x000000000000000fULL } }, // block size 16X64, TX_16X16
- { { 0x000000000000ffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 64X16, TX_16X16
- // TX_32X32
- { { 0x00000000000000ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 32X32, TX_32X32
- { { 0x00000000000000ffULL, 0x0000000000000000ULL, 0x00000000000000ffULL,
- 0x0000000000000000ULL } }, // block size 32X64, TX_32X32
- { { 0x000000000000ffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 64X32, TX_32X32
- { { 0x000000000000ffffULL, 0x0000000000000000ULL, 0x000000000000ffffULL,
- 0x0000000000000000ULL } }, // block size 64X64, TX_32X32
- // TX_64X64
- { { 0x000000000000ffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 64X64, TX_64X64
- // 2:1, 1:2 transform sizes.
- { { 0x0000000000000001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 4X8, TX_4X8
- { { 0x0000000100000001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 4X16, TX_4X8
- { { 0x0000000000000003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 8X4, TX_8X4
- { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 16X4, TX_8X4
- { { 0x0000000000000003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 8X16, TX_8X16
- { { 0x0000000000000003ULL, 0x0000000000000003ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 8X32, TX_8X16
- { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 16X8, TX_16X8
- { { 0x00000000000000ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 32X8, TX_16X8
- { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 16X32, TX_16X32
- { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x000000000000000fULL,
- 0x0000000000000000ULL } }, // block size 16X64, TX_16X32
- { { 0x00000000000000ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 32X16, TX_32X16
- { { 0x000000000000ffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 64X16, TX_32X16
- { { 0x00000000000000ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 32X64, TX_32X64
- { { 0x000000000000ffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 64X32, TX_64X32
- // 4:1, 1:4 transform sizes.
- { { 0x0000000000000001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 4X16, TX_4X16
- { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 16X4, TX_16X4
- { { 0x0000000000000003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 8X32, TX_8X32
- { { 0x00000000000000ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 32X8, TX_32X8
- { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 16X64, TX_16X64
- { { 0x000000000000ffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
- 0x0000000000000000ULL } }, // block size 64X16, TX_64X16
-};
-
-LoopFilterMask *get_loop_filter_mask(const AV1_COMMON *const cm, int mi_row,
- int mi_col) {
- assert(cm->lf.lfm != NULL);
- const int row = mi_row >> MIN_MIB_SIZE_LOG2; // 64x64
- const int col = mi_col >> MIN_MIB_SIZE_LOG2;
- return &cm->lf.lfm[row * cm->lf.lfm_stride + col];
-}
-
-typedef void (*LpfFunc)(uint8_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh);
-
-typedef void (*LpfDualFunc)(uint8_t *s, int p, const uint8_t *blimit0,
- const uint8_t *limit0, const uint8_t *thresh0,
- const uint8_t *blimit1, const uint8_t *limit1,
- const uint8_t *thresh1);
-
-typedef void (*HbdLpfFunc)(uint16_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh, int bd);
-
-typedef void (*HbdLpfDualFunc)(uint16_t *s, int p, const uint8_t *blimit0,
- const uint8_t *limit0, const uint8_t *thresh0,
- const uint8_t *blimit1, const uint8_t *limit1,
- const uint8_t *thresh1, int bd);
-#endif // LOOP_FILTER_BITMASK
-
-static void update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) {
- int lvl;
-
- // For each possible value for the loop filter fill out limits
- for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++) {
- // Set loop filter parameters that control sharpness.
- int block_inside_limit = lvl >> ((sharpness_lvl > 0) + (sharpness_lvl > 4));
-
- if (sharpness_lvl > 0) {
- if (block_inside_limit > (9 - sharpness_lvl))
- block_inside_limit = (9 - sharpness_lvl);
- }
-
- if (block_inside_limit < 1) block_inside_limit = 1;
-
- memset(lfi->lfthr[lvl].lim, block_inside_limit, SIMD_WIDTH);
- memset(lfi->lfthr[lvl].mblim, (2 * (lvl + 2) + block_inside_limit),
- SIMD_WIDTH);
- }
-}
-
-uint8_t get_filter_level(const AV1_COMMON *cm, const loop_filter_info_n *lfi_n,
- const int dir_idx, int plane,
- const MB_MODE_INFO *mbmi) {
- const int segment_id = mbmi->segment_id;
- if (cm->delta_lf_present_flag) {
- int delta_lf;
- if (cm->delta_lf_multi) {
- const int delta_lf_idx = delta_lf_id_lut[plane][dir_idx];
- delta_lf = mbmi->delta_lf[delta_lf_idx];
- } else {
- delta_lf = mbmi->delta_lf_from_base;
- }
- int base_level;
- if (plane == 0)
- base_level = cm->lf.filter_level[dir_idx];
- else if (plane == 1)
- base_level = cm->lf.filter_level_u;
- else
- base_level = cm->lf.filter_level_v;
- int lvl_seg = clamp(delta_lf + base_level, 0, MAX_LOOP_FILTER);
- assert(plane >= 0 && plane <= 2);
- const int seg_lf_feature_id = seg_lvl_lf_lut[plane][dir_idx];
- if (segfeature_active(&cm->seg, segment_id, seg_lf_feature_id)) {
- const int data = get_segdata(&cm->seg, segment_id, seg_lf_feature_id);
- lvl_seg = clamp(lvl_seg + data, 0, MAX_LOOP_FILTER);
- }
-
- if (cm->lf.mode_ref_delta_enabled) {
- const int scale = 1 << (lvl_seg >> 5);
- lvl_seg += cm->lf.ref_deltas[mbmi->ref_frame[0]] * scale;
- if (mbmi->ref_frame[0] > INTRA_FRAME)
- lvl_seg += cm->lf.mode_deltas[mode_lf_lut[mbmi->mode]] * scale;
- lvl_seg = clamp(lvl_seg, 0, MAX_LOOP_FILTER);
- }
- return lvl_seg;
- } else {
- return lfi_n->lvl[plane][segment_id][dir_idx][mbmi->ref_frame[0]]
- [mode_lf_lut[mbmi->mode]];
- }
-}
-
-void av1_loop_filter_init(AV1_COMMON *cm) {
- assert(MB_MODE_COUNT == NELEMENTS(mode_lf_lut));
- loop_filter_info_n *lfi = &cm->lf_info;
- struct loopfilter *lf = &cm->lf;
- int lvl;
-
- lf->combine_vert_horz_lf = 1;
-
- // init limits for given sharpness
- update_sharpness(lfi, lf->sharpness_level);
-
- // init hev threshold const vectors
- for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++)
- memset(lfi->lfthr[lvl].hev_thr, (lvl >> 4), SIMD_WIDTH);
-}
-
-// Update the loop filter for the current frame.
-// This should be called before loop_filter_rows(),
-// av1_loop_filter_frame() calls this function directly.
-void av1_loop_filter_frame_init(AV1_COMMON *cm, int plane_start,
- int plane_end) {
- int filt_lvl[MAX_MB_PLANE], filt_lvl_r[MAX_MB_PLANE];
- int plane;
- int seg_id;
- // n_shift is the multiplier for lf_deltas
- // the multiplier is 1 for when filter_lvl is between 0 and 31;
- // 2 when filter_lvl is between 32 and 63
- loop_filter_info_n *const lfi = &cm->lf_info;
- struct loopfilter *const lf = &cm->lf;
- const struct segmentation *const seg = &cm->seg;
-
- // update sharpness limits
- update_sharpness(lfi, lf->sharpness_level);
-
- filt_lvl[0] = cm->lf.filter_level[0];
- filt_lvl[1] = cm->lf.filter_level_u;
- filt_lvl[2] = cm->lf.filter_level_v;
-
- filt_lvl_r[0] = cm->lf.filter_level[1];
- filt_lvl_r[1] = cm->lf.filter_level_u;
- filt_lvl_r[2] = cm->lf.filter_level_v;
-
- for (plane = plane_start; plane < plane_end; plane++) {
- if (plane == 0 && !filt_lvl[0] && !filt_lvl_r[0])
- break;
- else if (plane == 1 && !filt_lvl[1])
- continue;
- else if (plane == 2 && !filt_lvl[2])
- continue;
-
- for (seg_id = 0; seg_id < MAX_SEGMENTS; seg_id++) {
- for (int dir = 0; dir < 2; ++dir) {
- int lvl_seg = (dir == 0) ? filt_lvl[plane] : filt_lvl_r[plane];
- assert(plane >= 0 && plane <= 2);
- const int seg_lf_feature_id = seg_lvl_lf_lut[plane][dir];
- if (segfeature_active(seg, seg_id, seg_lf_feature_id)) {
- const int data = get_segdata(&cm->seg, seg_id, seg_lf_feature_id);
- lvl_seg = clamp(lvl_seg + data, 0, MAX_LOOP_FILTER);
- }
-
- if (!lf->mode_ref_delta_enabled) {
- // we could get rid of this if we assume that deltas are set to
- // zero when not in use; encoder always uses deltas
- memset(lfi->lvl[plane][seg_id][dir], lvl_seg,
- sizeof(lfi->lvl[plane][seg_id][dir]));
- } else {
- int ref, mode;
- const int scale = 1 << (lvl_seg >> 5);
- const int intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale;
- lfi->lvl[plane][seg_id][dir][INTRA_FRAME][0] =
- clamp(intra_lvl, 0, MAX_LOOP_FILTER);
-
- for (ref = LAST_FRAME; ref < REF_FRAMES; ++ref) {
- for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) {
- const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * scale +
- lf->mode_deltas[mode] * scale;
- lfi->lvl[plane][seg_id][dir][ref][mode] =
- clamp(inter_lvl, 0, MAX_LOOP_FILTER);
- }
- }
- }
- }
- }
- }
-}
-
-#if LOOP_FILTER_BITMASK
-// A 64x64 tx block requires 256 bits to represent each 4x4 tx block.
-// Every 4 rows is represented by one uint64_t mask. Hence,
-// there are 4 uint64_t bitmask[4] to represent the 64x64 block.
-//
-// Given a location by (mi_col, mi_row), This function returns the index
-// 0, 1, 2, 3 to select which bitmask[] to use, and the shift value.
-//
-// For example, mi_row is the offset of pixels in mi size (4),
-// (mi_row / 4) returns which uint64_t.
-// After locating which uint64_t, mi_row % 4 is the
-// row offset, and each row has 16 = 1 << stride_log2 4x4 units.
-// Therefore, shift = (row << stride_log2) + mi_col;
-int get_index_shift(int mi_col, int mi_row, int *index) {
- // *index = mi_row >> 2;
- // rows = mi_row % 4;
- // stride_log2 = 4;
- // shift = (rows << stride_log2) + mi_col;
- *index = mi_row >> 2;
- return ((mi_row & 3) << 4) | mi_col;
-}
-
-static void check_mask(const FilterMask *lfm) {
-#ifndef NDEBUG
- for (int i = 0; i < 4; ++i) {
- assert(!(lfm[TX_4X4].bits[i] & lfm[TX_8X8].bits[i]));
- assert(!(lfm[TX_4X4].bits[i] & lfm[TX_16X16].bits[i]));
- assert(!(lfm[TX_4X4].bits[i] & lfm[TX_32X32].bits[i]));
- assert(!(lfm[TX_4X4].bits[i] & lfm[TX_64X64].bits[i]));
- assert(!(lfm[TX_8X8].bits[i] & lfm[TX_16X16].bits[i]));
- assert(!(lfm[TX_8X8].bits[i] & lfm[TX_32X32].bits[i]));
- assert(!(lfm[TX_8X8].bits[i] & lfm[TX_64X64].bits[i]));
- assert(!(lfm[TX_16X16].bits[i] & lfm[TX_32X32].bits[i]));
- assert(!(lfm[TX_16X16].bits[i] & lfm[TX_64X64].bits[i]));
- assert(!(lfm[TX_32X32].bits[i] & lfm[TX_64X64].bits[i]));
- }
-#else
- (void)lfm;
-#endif
-}
-
-static void check_loop_filter_masks(const LoopFilterMask *lfm, int plane) {
- if (plane == 0) {
- // Assert if we try to apply 2 different loop filters at the same
- // position.
- check_mask(lfm->left_y);
- check_mask(lfm->above_y);
- } else if (plane == 1) {
- check_mask(lfm->left_u);
- check_mask(lfm->above_u);
- } else {
- check_mask(lfm->left_v);
- check_mask(lfm->above_v);
- }
-}
-
-static void update_masks(EDGE_DIR dir, int plane, uint64_t *mask,
- TX_SIZE sqr_tx_size, LoopFilterMask *lfm) {
- if (dir == VERT_EDGE) {
- switch (plane) {
- case 0:
- for (int i = 0; i < 4; ++i) lfm->left_y[sqr_tx_size].bits[i] |= mask[i];
- break;
- case 1:
- for (int i = 0; i < 4; ++i) lfm->left_u[sqr_tx_size].bits[i] |= mask[i];
- break;
- case 2:
- for (int i = 0; i < 4; ++i) lfm->left_v[sqr_tx_size].bits[i] |= mask[i];
- break;
- default: assert(plane <= 2);
- }
- } else {
- switch (plane) {
- case 0:
- for (int i = 0; i < 4; ++i)
- lfm->above_y[sqr_tx_size].bits[i] |= mask[i];
- break;
- case 1:
- for (int i = 0; i < 4; ++i)
- lfm->above_u[sqr_tx_size].bits[i] |= mask[i];
- break;
- case 2:
- for (int i = 0; i < 4; ++i)
- lfm->above_v[sqr_tx_size].bits[i] |= mask[i];
- break;
- default: assert(plane <= 2);
- }
- }
-}
-
-static int is_frame_boundary(AV1_COMMON *const cm, int plane, int mi_row,
- int mi_col, int ssx, int ssy, EDGE_DIR dir) {
- if (plane && (ssx || ssy)) {
- if (ssx && ssy) { // format 420
- if ((mi_row << MI_SIZE_LOG2) > cm->height ||
- (mi_col << MI_SIZE_LOG2) > cm->width)
- return 1;
- } else if (ssx) { // format 422
- if ((mi_row << MI_SIZE_LOG2) >= cm->height ||
- (mi_col << MI_SIZE_LOG2) > cm->width)
- return 1;
- }
- } else {
- if ((mi_row << MI_SIZE_LOG2) >= cm->height ||
- (mi_col << MI_SIZE_LOG2) >= cm->width)
- return 1;
- }
-
- int row_or_col;
- if (plane == 0) {
- row_or_col = dir == VERT_EDGE ? mi_col : mi_row;
- } else {
- // chroma sub8x8 block uses bottom/right mi of co-located 8x8 luma block.
- // So if mi_col == 1, it is actually the frame boundary.
- if (dir == VERT_EDGE) {
- row_or_col = ssx ? (mi_col & 0x0FFFFFFE) : mi_col;
- } else {
- row_or_col = ssy ? (mi_row & 0x0FFFFFFE) : mi_row;
- }
- }
- return row_or_col == 0;
-}
-
-static void setup_masks(AV1_COMMON *const cm, int mi_row, int mi_col, int plane,
- int ssx, int ssy, TX_SIZE tx_size) {
- LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row, mi_col);
- const int x = (mi_col << (MI_SIZE_LOG2 - ssx));
- const int y = (mi_row << (MI_SIZE_LOG2 - ssy));
- // decide whether current vertical/horizontal edge needs loop filtering
- for (EDGE_DIR dir = VERT_EDGE; dir <= HORZ_EDGE; ++dir) {
- // chroma sub8x8 block uses bottom/right mi of co-located 8x8 luma block.
- mi_row |= ssy;
- mi_col |= ssx;
-
- MB_MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride + mi_col;
- const MB_MODE_INFO *const mbmi = mi[0];
- const int curr_skip = mbmi->skip && is_inter_block(mbmi);
- const BLOCK_SIZE bsize = mbmi->sb_type;
- const BLOCK_SIZE bsizec = scale_chroma_bsize(bsize, ssx, ssy);
- const BLOCK_SIZE plane_bsize = ss_size_lookup[bsizec][ssx][ssy];
- const uint8_t level = get_filter_level(cm, &cm->lf_info, dir, plane, mbmi);
- const int prediction_masks = dir == VERT_EDGE
- ? block_size_wide[plane_bsize] - 1
- : block_size_high[plane_bsize] - 1;
- const int is_coding_block_border =
- dir == VERT_EDGE ? !(x & prediction_masks) : !(y & prediction_masks);
-
- // TODO(chengchen): step can be optimized.
- const int row_step = mi_size_high[TX_4X4] << ssy;
- const int col_step = mi_size_wide[TX_4X4] << ssx;
- const int mi_height =
- dir == VERT_EDGE ? tx_size_high_unit[tx_size] << ssy : row_step;
- const int mi_width =
- dir == VERT_EDGE ? col_step : tx_size_wide_unit[tx_size] << ssx;
-
- // assign filter levels
- for (int r = mi_row; r < mi_row + mi_height; r += row_step) {
- for (int c = mi_col; c < mi_col + mi_width; c += col_step) {
- // do not filter frame boundary
- // Note: when chroma planes' size are half of luma plane,
- // chroma plane mi corresponds to even position.
- // If frame size is not even, we still need to filter this chroma
- // position. Therefore the boundary condition check needs to be
- // separated to two cases.
- if (plane && (ssx || ssy)) {
- if (ssx && ssy) { // format 420
- if ((r << MI_SIZE_LOG2) > cm->height ||
- (c << MI_SIZE_LOG2) > cm->width)
- continue;
- } else if (ssx) { // format 422
- if ((r << MI_SIZE_LOG2) >= cm->height ||
- (c << MI_SIZE_LOG2) > cm->width)
- continue;
- }
- } else {
- if ((r << MI_SIZE_LOG2) >= cm->height ||
- (c << MI_SIZE_LOG2) >= cm->width)
- continue;
- }
-
- const int row = r % MI_SIZE_64X64;
- const int col = c % MI_SIZE_64X64;
- if (plane == 0) {
- if (dir == VERT_EDGE)
- lfm->lfl_y_ver[row][col] = level;
- else
- lfm->lfl_y_hor[row][col] = level;
- } else if (plane == 1) {
- lfm->lfl_u[row][col] = level;
- } else {
- lfm->lfl_v[row][col] = level;
- }
- }
- }
-
- for (int r = mi_row; r < mi_row + mi_height; r += row_step) {
- for (int c = mi_col; c < mi_col + mi_width; c += col_step) {
- // do not filter frame boundary
- if (is_frame_boundary(cm, plane, r, c, ssx, ssy, dir)) continue;
-
- uint64_t mask[4] = { 0 };
- const int prev_row = dir == VERT_EDGE ? r : r - (1 << ssy);
- const int prev_col = dir == VERT_EDGE ? c - (1 << ssx) : c;
- MB_MODE_INFO **mi_prev =
- cm->mi_grid_visible + prev_row * cm->mi_stride + prev_col;
- const MB_MODE_INFO *const mbmi_prev = mi_prev[0];
- const int prev_skip = mbmi_prev->skip && is_inter_block(mbmi_prev);
- const uint8_t level_prev =
- get_filter_level(cm, &cm->lf_info, dir, plane, mbmi_prev);
- const int is_edge =
- (level || level_prev) &&
- (!curr_skip || !prev_skip || is_coding_block_border);
-
- if (is_edge) {
- const TX_SIZE prev_tx_size =
- plane ? av1_get_max_uv_txsize(mbmi_prev->sb_type, ssx, ssy)
- : mbmi_prev->tx_size;
- TX_SIZE min_tx_size = (dir == VERT_EDGE)
- ? AOMMIN(txsize_horz_map[tx_size],
- txsize_horz_map[prev_tx_size])
- : AOMMIN(txsize_vert_map[tx_size],
- txsize_vert_map[prev_tx_size]);
- min_tx_size = AOMMIN(min_tx_size, TX_16X16);
- assert(min_tx_size < TX_SIZES);
- const int row = r % MI_SIZE_64X64;
- const int col = c % MI_SIZE_64X64;
- int index = 0;
- const int shift = get_index_shift(col, row, &index);
- assert(index < 4 && index >= 0);
- mask[index] |= ((uint64_t)1 << shift);
- // set mask on corresponding bit
- update_masks(dir, plane, mask, min_tx_size, lfm);
- }
- }
- }
- }
-}
-
-static void setup_tx_block_mask(AV1_COMMON *const cm, int mi_row, int mi_col,
- int blk_row, int blk_col,
- BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
- int plane, int ssx, int ssy) {
- blk_row <<= ssy;
- blk_col <<= ssx;
- if (((mi_row + blk_row) << MI_SIZE_LOG2) >= cm->height ||
- ((mi_col + blk_col) << MI_SIZE_LOG2) >= cm->width)
- return;
-
- // U/V plane, tx_size is always the largest size
- if (plane) {
- assert(tx_size_wide[tx_size] <= 32 && tx_size_high[tx_size] <= 32);
- setup_masks(cm, mi_row + blk_row, mi_col + blk_col, plane, ssx, ssy,
- tx_size);
- return;
- }
-
- MB_MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride + mi_col;
- const MB_MODE_INFO *const mbmi = mi[0];
- // For Y plane:
- // If intra block, tx size is univariant.
- // If inter block, tx size follows inter_tx_size.
- TX_SIZE plane_tx_size = tx_size;
- const int is_inter = is_inter_block(mbmi);
-
- if (plane == 0) {
- if (is_inter) {
- if (mbmi->skip) {
- // TODO(chengchen): change av1_get_transform_size() to be consistant.
- // plane_tx_size = get_max_rect_tx_size(plane_bsize);
- plane_tx_size = mbmi->tx_size;
- } else {
- plane_tx_size = mbmi->inter_tx_size[av1_get_txb_size_index(
- plane_bsize, blk_row, blk_col)];
- }
- } else {
- MB_MODE_INFO **mi_this = cm->mi_grid_visible +
- (mi_row + blk_row) * cm->mi_stride + mi_col +
- blk_col;
- const MB_MODE_INFO *const mbmi_this = mi_this[0];
- plane_tx_size = mbmi_this->tx_size;
- }
- }
-
- assert(txsize_to_bsize[plane_tx_size] <= plane_bsize);
-
- if (plane || plane_tx_size == tx_size) {
- setup_masks(cm, mi_row + blk_row, mi_col + blk_col, plane, ssx, ssy,
- tx_size);
- } else {
- const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
- const int bsw = tx_size_wide_unit[sub_txs];
- const int bsh = tx_size_high_unit[sub_txs];
- for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh) {
- for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) {
- const int offsetr = blk_row + row;
- const int offsetc = blk_col + col;
- setup_tx_block_mask(cm, mi_row, mi_col, offsetr, offsetc, plane_bsize,
- sub_txs, plane, ssx, ssy);
- }
- }
- }
-}
-
-static void setup_fix_block_mask(AV1_COMMON *const cm, int mi_row, int mi_col,
- int plane, int ssx, int ssy) {
- MB_MODE_INFO **mi =
- cm->mi_grid_visible + (mi_row | ssy) * cm->mi_stride + (mi_col | ssx);
- const MB_MODE_INFO *const mbmi = mi[0];
-
- const BLOCK_SIZE bsize = mbmi->sb_type;
- const BLOCK_SIZE bsizec = scale_chroma_bsize(bsize, ssx, ssy);
- const BLOCK_SIZE plane_bsize = ss_size_lookup[bsizec][ssx][ssy];
-
- const int block_width = mi_size_wide[plane_bsize];
- const int block_height = mi_size_high[plane_bsize];
-
- TX_SIZE max_txsize = max_txsize_rect_lookup[plane_bsize];
- // The decoder is designed so that it can process 64x64 luma pixels at a
- // time. If this is a chroma plane with subsampling and bsize corresponds to
- // a subsampled BLOCK_128X128 then the lookup above will give TX_64X64. That
- // mustn't be used for the subsampled plane (because it would be bigger than
- // a 64x64 luma block) so we round down to TX_32X32.
- if (plane && txsize_sqr_up_map[max_txsize] == TX_64X64) {
- if (max_txsize == TX_16X64)
- max_txsize = TX_16X32;
- else if (max_txsize == TX_64X16)
- max_txsize = TX_32X16;
- else
- max_txsize = TX_32X32;
- }
-
- const BLOCK_SIZE txb_size = txsize_to_bsize[max_txsize];
- const int bw = block_size_wide[txb_size] >> tx_size_wide_log2[0];
- const int bh = block_size_high[txb_size] >> tx_size_wide_log2[0];
- const BLOCK_SIZE max_unit_bsize = ss_size_lookup[BLOCK_64X64][ssx][ssy];
- int mu_blocks_wide = block_size_wide[max_unit_bsize] >> tx_size_wide_log2[0];
- int mu_blocks_high = block_size_high[max_unit_bsize] >> tx_size_high_log2[0];
-
- mu_blocks_wide = AOMMIN(block_width, mu_blocks_wide);
- mu_blocks_high = AOMMIN(block_height, mu_blocks_high);
-
- // Y: Largest tx_size is 64x64, while superblock size can be 128x128.
- // Here we ensure that setup_tx_block_mask process at most a 64x64 block.
- // U/V: largest tx size is 32x32.
- for (int idy = 0; idy < block_height; idy += mu_blocks_high) {
- for (int idx = 0; idx < block_width; idx += mu_blocks_wide) {
- const int unit_height = AOMMIN(mu_blocks_high + idy, block_height);
- const int unit_width = AOMMIN(mu_blocks_wide + idx, block_width);
- for (int blk_row = idy; blk_row < unit_height; blk_row += bh) {
- for (int blk_col = idx; blk_col < unit_width; blk_col += bw) {
- setup_tx_block_mask(cm, mi_row, mi_col, blk_row, blk_col, plane_bsize,
- max_txsize, plane, ssx, ssy);
- }
- }
- }
- }
-}
-
-static void setup_block_mask(AV1_COMMON *const cm, int mi_row, int mi_col,
- BLOCK_SIZE bsize, int plane, int ssx, int ssy) {
- if ((mi_row << MI_SIZE_LOG2) >= cm->height ||
- (mi_col << MI_SIZE_LOG2) >= cm->width)
- return;
-
- const PARTITION_TYPE partition = get_partition(cm, mi_row, mi_col, bsize);
- const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition);
- const int hbs = mi_size_wide[bsize] / 2;
- const int quarter_step = mi_size_wide[bsize] / 4;
- const int allow_sub8x8 = (ssx || ssy) ? bsize > BLOCK_8X8 : 1;
- const int has_next_row =
- (((mi_row + hbs) << MI_SIZE_LOG2) < cm->height) & allow_sub8x8;
- const int has_next_col =
- (((mi_col + hbs) << MI_SIZE_LOG2) < cm->width) & allow_sub8x8;
- int i;
-
- switch (partition) {
- case PARTITION_NONE:
- setup_fix_block_mask(cm, mi_row, mi_col, plane, ssx, ssy);
- break;
- case PARTITION_HORZ:
- setup_fix_block_mask(cm, mi_row, mi_col, plane, ssx, ssy);
- if (has_next_row)
- setup_fix_block_mask(cm, mi_row + hbs, mi_col, plane, ssx, ssy);
- break;
- case PARTITION_VERT:
- setup_fix_block_mask(cm, mi_row, mi_col, plane, ssx, ssy);
- if (has_next_col)
- setup_fix_block_mask(cm, mi_row, mi_col + hbs, plane, ssx, ssy);
- break;
- case PARTITION_SPLIT:
- setup_block_mask(cm, mi_row, mi_col, subsize, plane, ssx, ssy);
- if (has_next_col)
- setup_block_mask(cm, mi_row, mi_col + hbs, subsize, plane, ssx, ssy);
- if (has_next_row)
- setup_block_mask(cm, mi_row + hbs, mi_col, subsize, plane, ssx, ssy);
- if (has_next_col & has_next_row)
- setup_block_mask(cm, mi_row + hbs, mi_col + hbs, subsize, plane, ssx,
- ssy);
- break;
- case PARTITION_HORZ_A:
- setup_fix_block_mask(cm, mi_row, mi_col, plane, ssx, ssy);
- if (has_next_col)
- setup_fix_block_mask(cm, mi_row, mi_col + hbs, plane, ssx, ssy);
- if (has_next_row)
- setup_fix_block_mask(cm, mi_row + hbs, mi_col, plane, ssx, ssy);
- break;
- case PARTITION_HORZ_B:
- setup_fix_block_mask(cm, mi_row, mi_col, plane, ssx, ssy);
- if (has_next_row)
- setup_fix_block_mask(cm, mi_row + hbs, mi_col, plane, ssx, ssy);
- if (has_next_col & has_next_row)
- setup_fix_block_mask(cm, mi_row + hbs, mi_col + hbs, plane, ssx, ssy);
- break;
- case PARTITION_VERT_A:
- setup_fix_block_mask(cm, mi_row, mi_col, plane, ssx, ssy);
- if (has_next_row)
- setup_fix_block_mask(cm, mi_row + hbs, mi_col, plane, ssx, ssy);
- if (has_next_col)
- setup_fix_block_mask(cm, mi_row, mi_col + hbs, plane, ssx, ssy);
- break;
- case PARTITION_VERT_B:
- setup_fix_block_mask(cm, mi_row, mi_col, plane, ssx, ssy);
- if (has_next_col)
- setup_fix_block_mask(cm, mi_row, mi_col + hbs, plane, ssx, ssy);
- if (has_next_row)
- setup_fix_block_mask(cm, mi_row + hbs, mi_col + hbs, plane, ssx, ssy);
- break;
- case PARTITION_HORZ_4:
- for (i = 0; i < 4; ++i) {
- int this_mi_row = mi_row + i * quarter_step;
- if (i > 0 && (this_mi_row << MI_SIZE_LOG2) >= cm->height) break;
- // chroma plane filter the odd location
- if (plane && bsize == BLOCK_16X16 && (i & 0x01)) continue;
-
- setup_fix_block_mask(cm, this_mi_row, mi_col, plane, ssx, ssy);
- }
- break;
- case PARTITION_VERT_4:
- for (i = 0; i < 4; ++i) {
- int this_mi_col = mi_col + i * quarter_step;
- if (i > 0 && this_mi_col >= cm->mi_cols) break;
- // chroma plane filter the odd location
- if (plane && bsize == BLOCK_16X16 && (i & 0x01)) continue;
-
- setup_fix_block_mask(cm, mi_row, this_mi_col, plane, ssx, ssy);
- }
- break;
- default: assert(0);
- }
-}
-
-// TODO(chengchen): if lossless, do not need to setup mask. But when
-// segments enabled, each segment has different lossless settings.
-void av1_setup_bitmask(AV1_COMMON *const cm, int mi_row, int mi_col, int plane,
- int subsampling_x, int subsampling_y, int row_end,
- int col_end) {
- const int num_64x64 = cm->seq_params.mib_size >> MIN_MIB_SIZE_LOG2;
- for (int y = 0; y < num_64x64; ++y) {
- for (int x = 0; x < num_64x64; ++x) {
- const int row = mi_row + y * MI_SIZE_64X64;
- const int col = mi_col + x * MI_SIZE_64X64;
- if (row >= row_end || col >= col_end) continue;
- if ((row << MI_SIZE_LOG2) >= cm->height ||
- (col << MI_SIZE_LOG2) >= cm->width)
- continue;
-
- LoopFilterMask *lfm = get_loop_filter_mask(cm, row, col);
- if (lfm == NULL) return;
-
- // init mask to zero
- if (plane == 0) {
- av1_zero(lfm->left_y);
- av1_zero(lfm->above_y);
- av1_zero(lfm->lfl_y_ver);
- av1_zero(lfm->lfl_y_hor);
- } else if (plane == 1) {
- av1_zero(lfm->left_u);
- av1_zero(lfm->above_u);
- av1_zero(lfm->lfl_u);
- } else {
- av1_zero(lfm->left_v);
- av1_zero(lfm->above_v);
- av1_zero(lfm->lfl_v);
- }
- }
- }
-
- // set up bitmask for each superblock
- setup_block_mask(cm, mi_row, mi_col, cm->seq_params.sb_size, plane,
- subsampling_x, subsampling_y);
-
- for (int y = 0; y < num_64x64; ++y) {
- for (int x = 0; x < num_64x64; ++x) {
- const int row = mi_row + y * MI_SIZE_64X64;
- const int col = mi_col + x * MI_SIZE_64X64;
- if (row >= row_end || col >= col_end) continue;
- if ((row << MI_SIZE_LOG2) >= cm->height ||
- (col << MI_SIZE_LOG2) >= cm->width)
- continue;
-
- LoopFilterMask *lfm = get_loop_filter_mask(cm, row, col);
- if (lfm == NULL) return;
-
- // check if the mask is valid
- check_loop_filter_masks(lfm, plane);
-
- {
- // Let 16x16 hold 32x32 (Y/U/V) and 64x64(Y only).
- // Even tx size is greater, we only apply max length filter, which
- // is 16.
- if (plane == 0) {
- for (int j = 0; j < 4; ++j) {
- lfm->left_y[TX_16X16].bits[j] |= lfm->left_y[TX_32X32].bits[j];
- lfm->left_y[TX_16X16].bits[j] |= lfm->left_y[TX_64X64].bits[j];
- lfm->above_y[TX_16X16].bits[j] |= lfm->above_y[TX_32X32].bits[j];
- lfm->above_y[TX_16X16].bits[j] |= lfm->above_y[TX_64X64].bits[j];
-
- // set 32x32 and 64x64 to 0
- lfm->left_y[TX_32X32].bits[j] = 0;
- lfm->left_y[TX_64X64].bits[j] = 0;
- lfm->above_y[TX_32X32].bits[j] = 0;
- lfm->above_y[TX_64X64].bits[j] = 0;
- }
- } else if (plane == 1) {
- for (int j = 0; j < 4; ++j) {
- lfm->left_u[TX_16X16].bits[j] |= lfm->left_u[TX_32X32].bits[j];
- lfm->above_u[TX_16X16].bits[j] |= lfm->above_u[TX_32X32].bits[j];
-
- // set 32x32 to 0
- lfm->left_u[TX_32X32].bits[j] = 0;
- lfm->above_u[TX_32X32].bits[j] = 0;
- }
- } else {
- for (int j = 0; j < 4; ++j) {
- lfm->left_v[TX_16X16].bits[j] |= lfm->left_v[TX_32X32].bits[j];
- lfm->above_v[TX_16X16].bits[j] |= lfm->above_v[TX_32X32].bits[j];
-
- // set 32x32 to 0
- lfm->left_v[TX_32X32].bits[j] = 0;
- lfm->above_v[TX_32X32].bits[j] = 0;
- }
- }
- }
-
- // check if the mask is valid
- check_loop_filter_masks(lfm, plane);
- }
- }
-}
-
-static void filter_selectively_vert_row2(
- int subsampling_factor, uint8_t *s, int pitch, int plane,
- uint64_t mask_16x16_0, uint64_t mask_8x8_0, uint64_t mask_4x4_0,
- uint64_t mask_16x16_1, uint64_t mask_8x8_1, uint64_t mask_4x4_1,
- const loop_filter_info_n *lfi_n, uint8_t *lfl, uint8_t *lfl2) {
- uint64_t mask;
- const int step = 1 << subsampling_factor;
-
- for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_16x16_1 |
- mask_8x8_1 | mask_4x4_1;
- mask; mask >>= step) {
- const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
- const loop_filter_thresh *lfi1 = lfi_n->lfthr + *lfl2;
-
- if (mask & 1) {
- if ((mask_16x16_0 | mask_16x16_1) & 1) {
- // chroma plane filters less pixels introduced in deblock_13tap
- // experiment
- LpfFunc lpf_vertical = plane ? aom_lpf_vertical_6 : aom_lpf_vertical_14;
-
- if ((mask_16x16_0 & mask_16x16_1) & 1) {
- if (plane) {
- aom_lpf_vertical_6_dual(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, lfi1->mblim, lfi1->lim,
- lfi1->hev_thr);
- } else {
- aom_lpf_vertical_14_dual(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, lfi1->mblim, lfi1->lim,
- lfi1->hev_thr);
- }
- } else if (mask_16x16_0 & 1) {
- lpf_vertical(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
- } else {
- lpf_vertical(s + 4 * pitch, pitch, lfi1->mblim, lfi1->lim,
- lfi1->hev_thr);
- }
- }
-
- if ((mask_8x8_0 | mask_8x8_1) & 1) {
- // chroma plane filters less pixels introduced in deblock_13tap
- // experiment
- LpfFunc lpf_vertical = plane ? aom_lpf_vertical_6 : aom_lpf_vertical_8;
-
- if ((mask_8x8_0 & mask_8x8_1) & 1) {
- if (plane) {
- aom_lpf_vertical_6_dual(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, lfi1->mblim, lfi1->lim,
- lfi1->hev_thr);
- } else {
- aom_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, lfi1->mblim, lfi1->lim,
- lfi1->hev_thr);
- }
- } else if (mask_8x8_0 & 1) {
- lpf_vertical(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
- } else {
- lpf_vertical(s + 4 * pitch, pitch, lfi1->mblim, lfi1->lim,
- lfi1->hev_thr);
- }
- }
-
- if ((mask_4x4_0 | mask_4x4_1) & 1) {
- if ((mask_4x4_0 & mask_4x4_1) & 1) {
- aom_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, lfi1->mblim, lfi1->lim,
- lfi1->hev_thr);
- } else if (mask_4x4_0 & 1) {
- aom_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
- } else {
- aom_lpf_vertical_4(s + 4 * pitch, pitch, lfi1->mblim, lfi1->lim,
- lfi1->hev_thr);
- }
- }
- }
-
- s += 4;
- lfl += step;
- lfl2 += step;
- mask_16x16_0 >>= step;
- mask_8x8_0 >>= step;
- mask_4x4_0 >>= step;
- mask_16x16_1 >>= step;
- mask_8x8_1 >>= step;
- mask_4x4_1 >>= step;
- }
-}
-
-static void highbd_filter_selectively_vert_row2(
- int subsampling_factor, uint16_t *s, int pitch, int plane,
- uint64_t mask_16x16_0, uint64_t mask_8x8_0, uint64_t mask_4x4_0,
- uint64_t mask_16x16_1, uint64_t mask_8x8_1, uint64_t mask_4x4_1,
- const loop_filter_info_n *lfi_n, uint8_t *lfl, uint8_t *lfl2, int bd) {
- uint64_t mask;
- const int step = 1 << subsampling_factor;
-
- for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_16x16_1 |
- mask_8x8_1 | mask_4x4_1;
- mask; mask >>= step) {
- const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
- const loop_filter_thresh *lfi1 = lfi_n->lfthr + *lfl2;
-
- if (mask & 1) {
- if ((mask_16x16_0 | mask_16x16_1) & 1) {
- // chroma plane filters less pixels introduced in deblock_13tap
- // experiment
- HbdLpfFunc highbd_lpf_vertical =
- plane ? aom_highbd_lpf_vertical_6 : aom_highbd_lpf_vertical_14;
-
- if ((mask_16x16_0 & mask_16x16_1) & 1) {
- if (plane) {
- aom_highbd_lpf_vertical_6_dual(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, lfi1->mblim,
- lfi1->lim, lfi1->hev_thr, bd);
- } else {
- aom_highbd_lpf_vertical_14_dual(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, lfi1->mblim,
- lfi1->lim, lfi1->hev_thr, bd);
- }
- } else if (mask_16x16_0 & 1) {
- highbd_lpf_vertical(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr,
- bd);
- } else {
- highbd_lpf_vertical(s + 4 * pitch, pitch, lfi1->mblim, lfi1->lim,
- lfi1->hev_thr, bd);
- }
- }
-
- if ((mask_8x8_0 | mask_8x8_1) & 1) {
- HbdLpfFunc highbd_lpf_vertical =
- plane ? aom_highbd_lpf_vertical_6 : aom_highbd_lpf_vertical_8;
-
- if ((mask_8x8_0 & mask_8x8_1) & 1) {
- if (plane) {
- aom_highbd_lpf_vertical_6_dual(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, lfi1->mblim,
- lfi1->lim, lfi1->hev_thr, bd);
- } else {
- aom_highbd_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, lfi1->mblim,
- lfi1->lim, lfi1->hev_thr, bd);
- }
- } else if (mask_8x8_0 & 1) {
- highbd_lpf_vertical(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr,
- bd);
- } else {
- highbd_lpf_vertical(s + 4 * pitch, pitch, lfi1->mblim, lfi1->lim,
- lfi1->hev_thr, bd);
- }
- }
-
- if ((mask_4x4_0 | mask_4x4_1) & 1) {
- if ((mask_4x4_0 & mask_4x4_1) & 1) {
- aom_highbd_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, lfi1->mblim, lfi1->lim,
- lfi1->hev_thr, bd);
- } else if (mask_4x4_0 & 1) {
- aom_highbd_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, bd);
- } else {
- aom_highbd_lpf_vertical_4(s + 4 * pitch, pitch, lfi1->mblim,
- lfi1->lim, lfi1->hev_thr, bd);
- }
- }
- }
-
- s += 4;
- lfl += step;
- lfl2 += step;
- mask_16x16_0 >>= step;
- mask_8x8_0 >>= step;
- mask_4x4_0 >>= step;
- mask_16x16_1 >>= step;
- mask_8x8_1 >>= step;
- mask_4x4_1 >>= step;
- }
-}
-
-static void filter_selectively_horiz(uint8_t *s, int pitch, int plane,
- int subsampling, uint64_t mask_16x16,
- uint64_t mask_8x8, uint64_t mask_4x4,
- const loop_filter_info_n *lfi_n,
- const uint8_t *lfl) {
- uint64_t mask;
- int count;
- const int step = 1 << subsampling;
- const unsigned int two_block_mask = subsampling ? 5 : 3;
-
- for (mask = mask_16x16 | mask_8x8 | mask_4x4; mask; mask >>= step * count) {
- const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
- // Next block's thresholds.
- const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + step);
-
- count = 1;
- if (mask & 1) {
- if (mask_16x16 & 1) {
- // chroma plane filters less pixels introduced in deblock_13tap
- // experiment
- LpfFunc lpf_horizontal =
- plane ? aom_lpf_horizontal_6 : aom_lpf_horizontal_14;
-
- if ((mask_16x16 & two_block_mask) == two_block_mask) {
- if (plane) {
- aom_lpf_horizontal_6_dual(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, lfin->mblim, lfin->lim,
- lfin->hev_thr);
- } else {
- aom_lpf_horizontal_14_dual(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, lfin->mblim, lfin->lim,
- lfin->hev_thr);
- }
- count = 2;
- } else {
- lpf_horizontal(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
- }
- } else if (mask_8x8 & 1) {
- // chroma plane filters less pixels introduced in deblock_13tap
- // experiment
- LpfFunc lpf_horizontal =
- plane ? aom_lpf_horizontal_6 : aom_lpf_horizontal_8;
-
- if ((mask_8x8 & two_block_mask) == two_block_mask) {
- if (plane) {
- aom_lpf_horizontal_6_dual(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, lfin->mblim, lfin->lim,
- lfin->hev_thr);
- } else {
- aom_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, lfin->mblim, lfin->lim,
- lfin->hev_thr);
- }
- count = 2;
- } else {
- lpf_horizontal(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
- }
- } else if (mask_4x4 & 1) {
- if ((mask_4x4 & two_block_mask) == two_block_mask) {
- aom_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, lfin->mblim, lfin->lim,
- lfin->hev_thr);
- count = 2;
- } else {
- aom_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
- }
- }
- }
-
- s += 4 * count;
- lfl += step * count;
- mask_16x16 >>= step * count;
- mask_8x8 >>= step * count;
- mask_4x4 >>= step * count;
- }
-}
-
-static void highbd_filter_selectively_horiz(
- uint16_t *s, int pitch, int plane, int subsampling, uint64_t mask_16x16,
- uint64_t mask_8x8, uint64_t mask_4x4, const loop_filter_info_n *lfi_n,
- uint8_t *lfl, int bd) {
- uint64_t mask;
- int count;
- const int step = 1 << subsampling;
- const unsigned int two_block_mask = subsampling ? 5 : 3;
-
- for (mask = mask_16x16 | mask_8x8 | mask_4x4; mask; mask >>= step * count) {
- const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
- // Next block's thresholds.
- const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + step);
-
- count = 1;
- if (mask & 1) {
- if (mask_16x16 & 1) {
- HbdLpfFunc highbd_lpf_horizontal =
- plane ? aom_highbd_lpf_horizontal_6 : aom_highbd_lpf_horizontal_14;
-
- if ((mask_16x16 & two_block_mask) == two_block_mask) {
- if (plane) {
- aom_highbd_lpf_horizontal_6_dual(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, lfin->mblim,
- lfin->lim, lfin->hev_thr, bd);
- } else {
- aom_highbd_lpf_horizontal_14_dual(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, lfin->mblim,
- lfin->lim, lfin->hev_thr, bd);
- }
- count = 2;
- } else {
- highbd_lpf_horizontal(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
- bd);
- }
- } else if (mask_8x8 & 1) {
- HbdLpfFunc highbd_lpf_horizontal =
- plane ? aom_highbd_lpf_horizontal_6 : aom_highbd_lpf_horizontal_8;
-
- if ((mask_8x8 & two_block_mask) == two_block_mask) {
- if (plane) {
- aom_highbd_lpf_horizontal_6_dual(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, lfin->mblim,
- lfin->lim, lfin->hev_thr, bd);
- } else {
- aom_highbd_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, lfin->mblim,
- lfin->lim, lfin->hev_thr, bd);
- }
- count = 2;
- } else {
- highbd_lpf_horizontal(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
- bd);
- }
- } else if (mask_4x4 & 1) {
- if ((mask_4x4 & two_block_mask) == two_block_mask) {
- aom_highbd_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, lfin->mblim, lfin->lim,
- lfin->hev_thr, bd);
- count = 2;
- } else {
- aom_highbd_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, bd);
- }
- }
- }
-
- s += 4 * count;
- lfl += step * count;
- mask_16x16 >>= step * count;
- mask_8x8 >>= step * count;
- mask_4x4 >>= step * count;
- }
-}
-
-void av1_build_bitmask_vert_info(
- AV1_COMMON *const cm, const struct macroblockd_plane *const plane_ptr,
- int plane) {
- const int subsampling_x = plane_ptr->subsampling_x;
- const int subsampling_y = plane_ptr->subsampling_y;
- const int row_step = (MI_SIZE >> MI_SIZE_LOG2);
- const int is_uv = plane > 0;
- TX_SIZE tx_size = TX_16X16, prev_tx_size = TX_16X16;
- uint8_t level, prev_level = 1;
- int skip, prev_skip = 0;
- int is_coding_block_border;
-
- for (int r = 0; (r << MI_SIZE_LOG2) < plane_ptr->dst.height; r += row_step) {
- const int mi_row = r << subsampling_y;
- const int row = mi_row % MI_SIZE_64X64;
- int index = 0;
- const int shift = get_index_shift(0, row, &index);
-
- for (int c = 0; (c << MI_SIZE_LOG2) < plane_ptr->dst.width;
- c += (tx_size_wide_unit[TX_64X64] >> subsampling_x)) {
- const int mi_col = c << subsampling_x;
- LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row, mi_col);
-
- for (int col_in_unit = 0;
- col_in_unit < (tx_size_wide_unit[TX_64X64] >> subsampling_x);) {
- const int x = (c + col_in_unit) << MI_SIZE_LOG2;
- if (x >= plane_ptr->dst.width) break;
- const int col = col_in_unit << subsampling_x;
- const uint64_t mask = ((uint64_t)1 << (shift | col));
- skip = lfm->skip.bits[index] & mask;
- is_coding_block_border = lfm->is_vert_border.bits[index] & mask;
- switch (plane) {
- case 0: level = lfm->lfl_y_ver[row][col]; break;
- case 1: level = lfm->lfl_u[row][col]; break;
- case 2: level = lfm->lfl_v[row][col]; break;
- default: assert(plane >= 0 && plane <= 2); return;
- }
- for (TX_SIZE ts = TX_4X4; ts <= TX_64X64; ++ts) {
- if (is_uv && ts == TX_64X64) continue;
- if (lfm->tx_size_ver[is_uv][ts].bits[index] & mask) {
- tx_size = ts;
- break;
- }
- }
- if ((c + col_in_unit > 0) && (level || prev_level) &&
- (!prev_skip || !skip || is_coding_block_border)) {
- const TX_SIZE min_tx_size =
- AOMMIN(TX_16X16, AOMMIN(tx_size, prev_tx_size));
- const int tmp_row = (mi_row | subsampling_y) % MI_SIZE_64X64;
- const int tmp_col = (col | subsampling_x) % MI_SIZE_64X64;
- const int shift_1 = get_index_shift(tmp_col, tmp_row, &index);
- const uint64_t mask_1 = ((uint64_t)1 << shift_1);
- switch (plane) {
- case 0: lfm->left_y[min_tx_size].bits[index] |= mask_1; break;
- case 1: lfm->left_u[min_tx_size].bits[index] |= mask_1; break;
- case 2: lfm->left_v[min_tx_size].bits[index] |= mask_1; break;
- default: assert(plane >= 0 && plane <= 2); return;
- }
- }
-
- // update prev info
- prev_level = level;
- prev_skip = skip;
- prev_tx_size = tx_size;
- // advance
- col_in_unit += tx_size_wide_unit[tx_size];
- }
- }
- }
-}
-
-void av1_build_bitmask_horz_info(
- AV1_COMMON *const cm, const struct macroblockd_plane *const plane_ptr,
- int plane) {
- const int subsampling_x = plane_ptr->subsampling_x;
- const int subsampling_y = plane_ptr->subsampling_y;
- const int col_step = (MI_SIZE >> MI_SIZE_LOG2);
- const int is_uv = plane > 0;
- TX_SIZE tx_size = TX_16X16, prev_tx_size = TX_16X16;
- uint8_t level, prev_level = 1;
- int skip, prev_skip = 0;
- int is_coding_block_border;
-
- for (int c = 0; (c << MI_SIZE_LOG2) < plane_ptr->dst.width; c += col_step) {
- const int mi_col = c << subsampling_x;
- const int col = mi_col % MI_SIZE_64X64;
-
- for (int r = 0; (r << MI_SIZE_LOG2) < plane_ptr->dst.height;
- r += (tx_size_high_unit[TX_64X64] >> subsampling_y)) {
- const int mi_row = r << subsampling_y;
- LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row, mi_col);
-
- for (int r_in_unit = 0;
- r_in_unit < (tx_size_high_unit[TX_64X64] >> subsampling_y);) {
- const int y = (r + r_in_unit) << MI_SIZE_LOG2;
- if (y >= plane_ptr->dst.height) break;
- const int row = r_in_unit << subsampling_y;
- int index = 0;
- const int shift = get_index_shift(col, row, &index);
- const uint64_t mask = ((uint64_t)1 << shift);
- skip = lfm->skip.bits[index] & mask;
- is_coding_block_border = lfm->is_horz_border.bits[index] & mask;
- switch (plane) {
- case 0: level = lfm->lfl_y_hor[row][col]; break;
- case 1: level = lfm->lfl_u[row][col]; break;
- case 2: level = lfm->lfl_v[row][col]; break;
- default: assert(plane >= 0 && plane <= 2); return;
- }
- for (TX_SIZE ts = TX_4X4; ts <= TX_64X64; ++ts) {
- if (is_uv && ts == TX_64X64) continue;
- if (lfm->tx_size_hor[is_uv][ts].bits[index] & mask) {
- tx_size = ts;
- break;
- }
- }
- if ((r + r_in_unit > 0) && (level || prev_level) &&
- (!prev_skip || !skip || is_coding_block_border)) {
- const TX_SIZE min_tx_size =
- AOMMIN(TX_16X16, AOMMIN(tx_size, prev_tx_size));
- const int tmp_row = (row | subsampling_y) % MI_SIZE_64X64;
- const int tmp_col = (mi_col | subsampling_x) % MI_SIZE_64X64;
- const int shift_1 = get_index_shift(tmp_col, tmp_row, &index);
- const uint64_t mask_1 = ((uint64_t)1 << shift_1);
-
- switch (plane) {
- case 0: lfm->above_y[min_tx_size].bits[index] |= mask_1; break;
- case 1: lfm->above_u[min_tx_size].bits[index] |= mask_1; break;
- case 2: lfm->above_v[min_tx_size].bits[index] |= mask_1; break;
- default: assert(plane >= 0 && plane <= 2); return;
- }
- }
-
- // update prev info
- prev_level = level;
- prev_skip = skip;
- prev_tx_size = tx_size;
- // advance
- r_in_unit += tx_size_high_unit[tx_size];
- }
- }
- }
-}
-
-void av1_filter_block_plane_bitmask_vert(
- AV1_COMMON *const cm, struct macroblockd_plane *const plane_ptr, int pl,
- int mi_row, int mi_col) {
- struct buf_2d *const dst = &plane_ptr->dst;
- uint8_t *const buf0 = dst->buf;
- const int ssx = plane_ptr->subsampling_x;
- const int ssy = plane_ptr->subsampling_y;
- const int mask_cutoff = 0xffff;
- const int row_step = 1 << ssy;
- const int two_row_step = 2 << ssy;
- const int row_stride = dst->stride << MI_SIZE_LOG2;
- const int two_row_stride = row_stride << 1;
- uint64_t mask_16x16 = 0;
- uint64_t mask_8x8 = 0;
- uint64_t mask_4x4 = 0;
- uint8_t *lfl;
- uint8_t *lfl2;
- LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row, mi_col);
- assert(lfm);
-
- // 1. vertical filtering. filter two rows at a time
- for (int r = 0;
- ((mi_row + r) << MI_SIZE_LOG2) < cm->height && r < MI_SIZE_64X64;
- r += two_row_step) {
- const int row = r | ssy;
- const int row_next = row + row_step;
- const int col = ssx;
- int index = 0;
- const int shift = get_index_shift(col, row, &index);
- int index_next = 0;
- const int shift_next = get_index_shift(col, row_next, &index_next);
- switch (pl) {
- case 0:
- mask_16x16 = lfm->left_y[TX_16X16].bits[index];
- mask_8x8 = lfm->left_y[TX_8X8].bits[index];
- mask_4x4 = lfm->left_y[TX_4X4].bits[index];
- lfl = &lfm->lfl_y_ver[row][col];
- lfl2 = &lfm->lfl_y_ver[row_next][col];
- break;
- case 1:
- mask_16x16 = lfm->left_u[TX_16X16].bits[index];
- mask_8x8 = lfm->left_u[TX_8X8].bits[index];
- mask_4x4 = lfm->left_u[TX_4X4].bits[index];
- lfl = &lfm->lfl_u[row][col];
- lfl2 = &lfm->lfl_u[row_next][col];
- break;
- case 2:
- mask_16x16 = lfm->left_v[TX_16X16].bits[index];
- mask_8x8 = lfm->left_v[TX_8X8].bits[index];
- mask_4x4 = lfm->left_v[TX_4X4].bits[index];
- lfl = &lfm->lfl_v[row][col];
- lfl2 = &lfm->lfl_v[row_next][col];
- break;
- default: assert(pl >= 0 && pl <= 2); return;
- }
- uint64_t mask_16x16_0 = (mask_16x16 >> shift) & mask_cutoff;
- uint64_t mask_8x8_0 = (mask_8x8 >> shift) & mask_cutoff;
- uint64_t mask_4x4_0 = (mask_4x4 >> shift) & mask_cutoff;
- uint64_t mask_16x16_1 = (mask_16x16 >> shift_next) & mask_cutoff;
- uint64_t mask_8x8_1 = (mask_8x8 >> shift_next) & mask_cutoff;
- uint64_t mask_4x4_1 = (mask_4x4 >> shift_next) & mask_cutoff;
-
- if (cm->seq_params.use_highbitdepth)
- highbd_filter_selectively_vert_row2(
- ssx, CONVERT_TO_SHORTPTR(dst->buf), dst->stride, pl, mask_16x16_0,
- mask_8x8_0, mask_4x4_0, mask_16x16_1, mask_8x8_1, mask_4x4_1,
- &cm->lf_info, lfl, lfl2, (int)cm->seq_params.bit_depth);
- else
- filter_selectively_vert_row2(
- ssx, dst->buf, dst->stride, pl, mask_16x16_0, mask_8x8_0, mask_4x4_0,
- mask_16x16_1, mask_8x8_1, mask_4x4_1, &cm->lf_info, lfl, lfl2);
- dst->buf += two_row_stride;
- }
- // reset buf pointer for horizontal filtering
- dst->buf = buf0;
-}
-
-void av1_filter_block_plane_bitmask_horz(
- AV1_COMMON *const cm, struct macroblockd_plane *const plane_ptr, int pl,
- int mi_row, int mi_col) {
- struct buf_2d *const dst = &plane_ptr->dst;
- uint8_t *const buf0 = dst->buf;
- const int ssx = plane_ptr->subsampling_x;
- const int ssy = plane_ptr->subsampling_y;
- const int mask_cutoff = 0xffff;
- const int row_step = 1 << ssy;
- const int row_stride = dst->stride << MI_SIZE_LOG2;
- uint64_t mask_16x16 = 0;
- uint64_t mask_8x8 = 0;
- uint64_t mask_4x4 = 0;
- uint8_t *lfl;
- LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row, mi_col);
- assert(lfm);
- for (int r = 0;
- ((mi_row + r) << MI_SIZE_LOG2) < cm->height && r < MI_SIZE_64X64;
- r += row_step) {
- if (mi_row + r == 0) {
- dst->buf += row_stride;
- continue;
- }
- const int row = r | ssy;
- const int col = ssx;
- int index = 0;
- const int shift = get_index_shift(col, row, &index);
- switch (pl) {
- case 0:
- mask_16x16 = lfm->above_y[TX_16X16].bits[index];
- mask_8x8 = lfm->above_y[TX_8X8].bits[index];
- mask_4x4 = lfm->above_y[TX_4X4].bits[index];
- lfl = &lfm->lfl_y_hor[row][col];
- break;
- case 1:
- mask_16x16 = lfm->above_u[TX_16X16].bits[index];
- mask_8x8 = lfm->above_u[TX_8X8].bits[index];
- mask_4x4 = lfm->above_u[TX_4X4].bits[index];
- lfl = &lfm->lfl_u[row][col];
- break;
- case 2:
- mask_16x16 = lfm->above_v[TX_16X16].bits[index];
- mask_8x8 = lfm->above_v[TX_8X8].bits[index];
- mask_4x4 = lfm->above_v[TX_4X4].bits[index];
- lfl = &lfm->lfl_v[row][col];
- break;
- default: assert(pl >= 0 && pl <= 2); return;
- }
- mask_16x16 = (mask_16x16 >> shift) & mask_cutoff;
- mask_8x8 = (mask_8x8 >> shift) & mask_cutoff;
- mask_4x4 = (mask_4x4 >> shift) & mask_cutoff;
-
- if (cm->seq_params.use_highbitdepth)
- highbd_filter_selectively_horiz(
- CONVERT_TO_SHORTPTR(dst->buf), dst->stride, pl, ssx, mask_16x16,
- mask_8x8, mask_4x4, &cm->lf_info, lfl, (int)cm->seq_params.bit_depth);
- else
- filter_selectively_horiz(dst->buf, dst->stride, pl, ssx, mask_16x16,
- mask_8x8, mask_4x4, &cm->lf_info, lfl);
- dst->buf += row_stride;
- }
- // reset buf pointer for next block
- dst->buf = buf0;
-}
-
-void av1_filter_block_plane_ver(AV1_COMMON *const cm,
- struct macroblockd_plane *const plane_ptr,
- int pl, int mi_row, int mi_col) {
- struct buf_2d *const dst = &plane_ptr->dst;
- int r, c;
- const int ssx = plane_ptr->subsampling_x;
- const int ssy = plane_ptr->subsampling_y;
- const int mask_cutoff = 0xffff;
- const int single_step = 1 << ssy;
- const int r_step = 2 << ssy;
- uint64_t mask_16x16 = 0;
- uint64_t mask_8x8 = 0;
- uint64_t mask_4x4 = 0;
- uint8_t *lfl;
- uint8_t *lfl2;
-
- // filter two rows at a time
- for (r = 0; r < cm->seq_params.mib_size &&
- ((mi_row + r) << MI_SIZE_LOG2 < cm->height);
- r += r_step) {
- for (c = 0; c < cm->seq_params.mib_size &&
- ((mi_col + c) << MI_SIZE_LOG2 < cm->width);
- c += MI_SIZE_64X64) {
- dst->buf += ((c << MI_SIZE_LOG2) >> ssx);
- LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row + r, mi_col + c);
- assert(lfm);
- const int row = ((mi_row + r) | ssy) % MI_SIZE_64X64;
- const int col = ((mi_col + c) | ssx) % MI_SIZE_64X64;
- int index = 0;
- const int shift = get_index_shift(col, row, &index);
- // current and next row should belong to the same mask_idx and index
- // next row's shift
- const int row_next = row + single_step;
- int index_next = 0;
- const int shift_next = get_index_shift(col, row_next, &index_next);
- switch (pl) {
- case 0:
- mask_16x16 = lfm->left_y[TX_16X16].bits[index];
- mask_8x8 = lfm->left_y[TX_8X8].bits[index];
- mask_4x4 = lfm->left_y[TX_4X4].bits[index];
- lfl = &lfm->lfl_y_ver[row][col];
- lfl2 = &lfm->lfl_y_ver[row_next][col];
- break;
- case 1:
- mask_16x16 = lfm->left_u[TX_16X16].bits[index];
- mask_8x8 = lfm->left_u[TX_8X8].bits[index];
- mask_4x4 = lfm->left_u[TX_4X4].bits[index];
- lfl = &lfm->lfl_u[row][col];
- lfl2 = &lfm->lfl_u[row_next][col];
- break;
- case 2:
- mask_16x16 = lfm->left_v[TX_16X16].bits[index];
- mask_8x8 = lfm->left_v[TX_8X8].bits[index];
- mask_4x4 = lfm->left_v[TX_4X4].bits[index];
- lfl = &lfm->lfl_v[row][col];
- lfl2 = &lfm->lfl_v[row_next][col];
- break;
- default: assert(pl >= 0 && pl <= 2); return;
- }
- uint64_t mask_16x16_0 = (mask_16x16 >> shift) & mask_cutoff;
- uint64_t mask_8x8_0 = (mask_8x8 >> shift) & mask_cutoff;
- uint64_t mask_4x4_0 = (mask_4x4 >> shift) & mask_cutoff;
- uint64_t mask_16x16_1 = (mask_16x16 >> shift_next) & mask_cutoff;
- uint64_t mask_8x8_1 = (mask_8x8 >> shift_next) & mask_cutoff;
- uint64_t mask_4x4_1 = (mask_4x4 >> shift_next) & mask_cutoff;
-
- if (cm->seq_params.use_highbitdepth)
- highbd_filter_selectively_vert_row2(
- ssx, CONVERT_TO_SHORTPTR(dst->buf), dst->stride, pl, mask_16x16_0,
- mask_8x8_0, mask_4x4_0, mask_16x16_1, mask_8x8_1, mask_4x4_1,
- &cm->lf_info, lfl, lfl2, (int)cm->seq_params.bit_depth);
- else
- filter_selectively_vert_row2(ssx, dst->buf, dst->stride, pl,
- mask_16x16_0, mask_8x8_0, mask_4x4_0,
- mask_16x16_1, mask_8x8_1, mask_4x4_1,
- &cm->lf_info, lfl, lfl2);
- dst->buf -= ((c << MI_SIZE_LOG2) >> ssx);
- }
- dst->buf += 2 * MI_SIZE * dst->stride;
- }
-}
-
-void av1_filter_block_plane_hor(AV1_COMMON *const cm,
- struct macroblockd_plane *const plane_ptr,
- int pl, int mi_row, int mi_col) {
- struct buf_2d *const dst = &plane_ptr->dst;
- int r, c;
- const int ssx = plane_ptr->subsampling_x;
- const int ssy = plane_ptr->subsampling_y;
- const int mask_cutoff = 0xffff;
- const int r_step = 1 << ssy;
- uint64_t mask_16x16 = 0;
- uint64_t mask_8x8 = 0;
- uint64_t mask_4x4 = 0;
- uint8_t *lfl;
-
- for (r = 0; r < cm->seq_params.mib_size &&
- ((mi_row + r) << MI_SIZE_LOG2 < cm->height);
- r += r_step) {
- for (c = 0; c < cm->seq_params.mib_size &&
- ((mi_col + c) << MI_SIZE_LOG2 < cm->width);
- c += MI_SIZE_64X64) {
- if (mi_row + r == 0) continue;
-
- dst->buf += ((c << MI_SIZE_LOG2) >> ssx);
- LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row + r, mi_col + c);
- assert(lfm);
- const int row = ((mi_row + r) | ssy) % MI_SIZE_64X64;
- const int col = ((mi_col + c) | ssx) % MI_SIZE_64X64;
- int index = 0;
- const int shift = get_index_shift(col, row, &index);
- switch (pl) {
- case 0:
- mask_16x16 = lfm->above_y[TX_16X16].bits[index];
- mask_8x8 = lfm->above_y[TX_8X8].bits[index];
- mask_4x4 = lfm->above_y[TX_4X4].bits[index];
- lfl = &lfm->lfl_y_hor[row][col];
- break;
- case 1:
- mask_16x16 = lfm->above_u[TX_16X16].bits[index];
- mask_8x8 = lfm->above_u[TX_8X8].bits[index];
- mask_4x4 = lfm->above_u[TX_4X4].bits[index];
- lfl = &lfm->lfl_u[row][col];
- break;
- case 2:
- mask_16x16 = lfm->above_v[TX_16X16].bits[index];
- mask_8x8 = lfm->above_v[TX_8X8].bits[index];
- mask_4x4 = lfm->above_v[TX_4X4].bits[index];
- lfl = &lfm->lfl_v[row][col];
- break;
- default: assert(pl >= 0 && pl <= 2); return;
- }
- mask_16x16 = (mask_16x16 >> shift) & mask_cutoff;
- mask_8x8 = (mask_8x8 >> shift) & mask_cutoff;
- mask_4x4 = (mask_4x4 >> shift) & mask_cutoff;
-
- if (cm->seq_params.use_highbitdepth)
- highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf),
- dst->stride, pl, ssx, mask_16x16,
- mask_8x8, mask_4x4, &cm->lf_info, lfl,
- (int)cm->seq_params.bit_depth);
- else
- filter_selectively_horiz(dst->buf, dst->stride, pl, ssx, mask_16x16,
- mask_8x8, mask_4x4, &cm->lf_info, lfl);
- dst->buf -= ((c << MI_SIZE_LOG2) >> ssx);
- }
- dst->buf += MI_SIZE * dst->stride;
- }
-}
-#endif // LOOP_FILTER_BITMASK
-
-static TX_SIZE get_transform_size(const MACROBLOCKD *const xd,
- const MB_MODE_INFO *const mbmi,
- const EDGE_DIR edge_dir, const int mi_row,
- const int mi_col, const int plane,
- const struct macroblockd_plane *plane_ptr) {
- assert(mbmi != NULL);
- if (xd && xd->lossless[mbmi->segment_id]) return TX_4X4;
-
- TX_SIZE tx_size =
- (plane == AOM_PLANE_Y)
- ? mbmi->tx_size
- : av1_get_max_uv_txsize(mbmi->sb_type, plane_ptr->subsampling_x,
- plane_ptr->subsampling_y);
- assert(tx_size < TX_SIZES_ALL);
- if ((plane == AOM_PLANE_Y) && is_inter_block(mbmi) && !mbmi->skip) {
- const BLOCK_SIZE sb_type = mbmi->sb_type;
- const int blk_row = mi_row & (mi_size_high[sb_type] - 1);
- const int blk_col = mi_col & (mi_size_wide[sb_type] - 1);
- const TX_SIZE mb_tx_size =
- mbmi->inter_tx_size[av1_get_txb_size_index(sb_type, blk_row, blk_col)];
- assert(mb_tx_size < TX_SIZES_ALL);
- tx_size = mb_tx_size;
- }
-
- // since in case of chrominance or non-square transorm need to convert
- // transform size into transform size in particular direction.
- // for vertical edge, filter direction is horizontal, for horizontal
- // edge, filter direction is vertical.
- tx_size = (VERT_EDGE == edge_dir) ? txsize_horz_map[tx_size]
- : txsize_vert_map[tx_size];
- return tx_size;
-}
-
-typedef struct AV1_DEBLOCKING_PARAMETERS {
- // length of the filter applied to the outer edge
- uint32_t filter_length;
- // deblocking limits
- const uint8_t *lim;
- const uint8_t *mblim;
- const uint8_t *hev_thr;
-} AV1_DEBLOCKING_PARAMETERS;
-
-// Return TX_SIZE from get_transform_size(), so it is plane and direction
-// awared
-static TX_SIZE set_lpf_parameters(
- AV1_DEBLOCKING_PARAMETERS *const params, const ptrdiff_t mode_step,
- const AV1_COMMON *const cm, const MACROBLOCKD *const xd,
- const EDGE_DIR edge_dir, const uint32_t x, const uint32_t y,
- const int plane, const struct macroblockd_plane *const plane_ptr) {
- // reset to initial values
- params->filter_length = 0;
-
- // no deblocking is required
- const uint32_t width = plane_ptr->dst.width;
- const uint32_t height = plane_ptr->dst.height;
- if ((width <= x) || (height <= y)) {
- // just return the smallest transform unit size
- return TX_4X4;
- }
-
- const uint32_t scale_horz = plane_ptr->subsampling_x;
- const uint32_t scale_vert = plane_ptr->subsampling_y;
- // for sub8x8 block, chroma prediction mode is obtained from the bottom/right
- // mi structure of the co-located 8x8 luma block. so for chroma plane, mi_row
- // and mi_col should map to the bottom/right mi structure, i.e, both mi_row
- // and mi_col should be odd number for chroma plane.
- const int mi_row = scale_vert | ((y << scale_vert) >> MI_SIZE_LOG2);
- const int mi_col = scale_horz | ((x << scale_horz) >> MI_SIZE_LOG2);
- MB_MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride + mi_col;
- const MB_MODE_INFO *mbmi = mi[0];
- // If current mbmi is not correctly setup, return an invalid value to stop
- // filtering. One example is that if this tile is not coded, then its mbmi
- // it not set up.
- if (mbmi == NULL) return TX_INVALID;
-
- const TX_SIZE ts =
- get_transform_size(xd, mi[0], edge_dir, mi_row, mi_col, plane, plane_ptr);
-
- {
- const uint32_t coord = (VERT_EDGE == edge_dir) ? (x) : (y);
- const uint32_t transform_masks =
- edge_dir == VERT_EDGE ? tx_size_wide[ts] - 1 : tx_size_high[ts] - 1;
- const int32_t tu_edge = (coord & transform_masks) ? (0) : (1);
-
- if (!tu_edge) return ts;
-
- // prepare outer edge parameters. deblock the edge if it's an edge of a TU
- {
- const uint32_t curr_level =
- get_filter_level(cm, &cm->lf_info, edge_dir, plane, mbmi);
- const int curr_skipped = mbmi->skip && is_inter_block(mbmi);
- uint32_t level = curr_level;
- if (coord) {
- {
- const MB_MODE_INFO *const mi_prev = *(mi - mode_step);
- if (mi_prev == NULL) return TX_INVALID;
- const int pv_row =
- (VERT_EDGE == edge_dir) ? (mi_row) : (mi_row - (1 << scale_vert));
- const int pv_col =
- (VERT_EDGE == edge_dir) ? (mi_col - (1 << scale_horz)) : (mi_col);
- const TX_SIZE pv_ts = get_transform_size(
- xd, mi_prev, edge_dir, pv_row, pv_col, plane, plane_ptr);
-
- const uint32_t pv_lvl =
- get_filter_level(cm, &cm->lf_info, edge_dir, plane, mi_prev);
-
- const int pv_skip = mi_prev->skip && is_inter_block(mi_prev);
- const BLOCK_SIZE bsize =
- get_plane_block_size(mbmi->sb_type, plane_ptr->subsampling_x,
- plane_ptr->subsampling_y);
- const int prediction_masks = edge_dir == VERT_EDGE
- ? block_size_wide[bsize] - 1
- : block_size_high[bsize] - 1;
- const int32_t pu_edge = !(coord & prediction_masks);
- // if the current and the previous blocks are skipped,
- // deblock the edge if the edge belongs to a PU's edge only.
- if ((curr_level || pv_lvl) &&
- (!pv_skip || !curr_skipped || pu_edge)) {
- const TX_SIZE min_ts = AOMMIN(ts, pv_ts);
- if (TX_4X4 >= min_ts) {
- params->filter_length = 4;
- } else if (TX_8X8 == min_ts) {
- if (plane != 0)
- params->filter_length = 6;
- else
- params->filter_length = 8;
- } else {
- params->filter_length = 14;
- // No wide filtering for chroma plane
- if (plane != 0) {
- params->filter_length = 6;
- }
- }
-
- // update the level if the current block is skipped,
- // but the previous one is not
- level = (curr_level) ? (curr_level) : (pv_lvl);
- }
- }
- }
- // prepare common parameters
- if (params->filter_length) {
- const loop_filter_thresh *const limits = cm->lf_info.lfthr + level;
- params->lim = limits->lim;
- params->mblim = limits->mblim;
- params->hev_thr = limits->hev_thr;
- }
- }
- }
-
- return ts;
-}
-
-void av1_filter_block_plane_vert(const AV1_COMMON *const cm,
- const MACROBLOCKD *const xd, const int plane,
- const MACROBLOCKD_PLANE *const plane_ptr,
- const uint32_t mi_row, const uint32_t mi_col) {
- const int row_step = MI_SIZE >> MI_SIZE_LOG2;
- const uint32_t scale_horz = plane_ptr->subsampling_x;
- const uint32_t scale_vert = plane_ptr->subsampling_y;
- uint8_t *const dst_ptr = plane_ptr->dst.buf;
- const int dst_stride = plane_ptr->dst.stride;
- const int y_range = (MAX_MIB_SIZE >> scale_vert);
- const int x_range = (MAX_MIB_SIZE >> scale_horz);
- const int use_highbitdepth = cm->seq_params.use_highbitdepth;
- const aom_bit_depth_t bit_depth = cm->seq_params.bit_depth;
- for (int y = 0; y < y_range; y += row_step) {
- uint8_t *p = dst_ptr + y * MI_SIZE * dst_stride;
- for (int x = 0; x < x_range;) {
- // inner loop always filter vertical edges in a MI block. If MI size
- // is 8x8, it will filter the vertical edge aligned with a 8x8 block.
- // If 4x4 trasnform is used, it will then filter the internal edge
- // aligned with a 4x4 block
- const uint32_t curr_x = ((mi_col * MI_SIZE) >> scale_horz) + x * MI_SIZE;
- const uint32_t curr_y = ((mi_row * MI_SIZE) >> scale_vert) + y * MI_SIZE;
- uint32_t advance_units;
- TX_SIZE tx_size;
- AV1_DEBLOCKING_PARAMETERS params;
- memset(&params, 0, sizeof(params));
-
- tx_size =
- set_lpf_parameters(&params, ((ptrdiff_t)1 << scale_horz), cm, xd,
- VERT_EDGE, curr_x, curr_y, plane, plane_ptr);
- if (tx_size == TX_INVALID) {
- params.filter_length = 0;
- tx_size = TX_4X4;
- }
-
- switch (params.filter_length) {
- // apply 4-tap filtering
- case 4:
- if (use_highbitdepth)
- aom_highbd_lpf_vertical_4(CONVERT_TO_SHORTPTR(p), dst_stride,
- params.mblim, params.lim, params.hev_thr,
- bit_depth);
- else
- aom_lpf_vertical_4(p, dst_stride, params.mblim, params.lim,
- params.hev_thr);
- break;
- case 6: // apply 6-tap filter for chroma plane only
- assert(plane != 0);
- if (use_highbitdepth)
- aom_highbd_lpf_vertical_6(CONVERT_TO_SHORTPTR(p), dst_stride,
- params.mblim, params.lim, params.hev_thr,
- bit_depth);
- else
- aom_lpf_vertical_6(p, dst_stride, params.mblim, params.lim,
- params.hev_thr);
- break;
- // apply 8-tap filtering
- case 8:
- if (use_highbitdepth)
- aom_highbd_lpf_vertical_8(CONVERT_TO_SHORTPTR(p), dst_stride,
- params.mblim, params.lim, params.hev_thr,
- bit_depth);
- else
- aom_lpf_vertical_8(p, dst_stride, params.mblim, params.lim,
- params.hev_thr);
- break;
- // apply 14-tap filtering
- case 14:
- if (use_highbitdepth)
- aom_highbd_lpf_vertical_14(CONVERT_TO_SHORTPTR(p), dst_stride,
- params.mblim, params.lim, params.hev_thr,
- bit_depth);
- else
- aom_lpf_vertical_14(p, dst_stride, params.mblim, params.lim,
- params.hev_thr);
- break;
- // no filtering
- default: break;
- }
- // advance the destination pointer
- advance_units = tx_size_wide_unit[tx_size];
- x += advance_units;
- p += advance_units * MI_SIZE;
- }
- }
-}
-
-void av1_filter_block_plane_horz(const AV1_COMMON *const cm,
- const MACROBLOCKD *const xd, const int plane,
- const MACROBLOCKD_PLANE *const plane_ptr,
- const uint32_t mi_row, const uint32_t mi_col) {
- const int col_step = MI_SIZE >> MI_SIZE_LOG2;
- const uint32_t scale_horz = plane_ptr->subsampling_x;
- const uint32_t scale_vert = plane_ptr->subsampling_y;
- uint8_t *const dst_ptr = plane_ptr->dst.buf;
- const int dst_stride = plane_ptr->dst.stride;
- const int y_range = (MAX_MIB_SIZE >> scale_vert);
- const int x_range = (MAX_MIB_SIZE >> scale_horz);
- const int use_highbitdepth = cm->seq_params.use_highbitdepth;
- const aom_bit_depth_t bit_depth = cm->seq_params.bit_depth;
- for (int x = 0; x < x_range; x += col_step) {
- uint8_t *p = dst_ptr + x * MI_SIZE;
- for (int y = 0; y < y_range;) {
- // inner loop always filter vertical edges in a MI block. If MI size
- // is 8x8, it will first filter the vertical edge aligned with a 8x8
- // block. If 4x4 trasnform is used, it will then filter the internal
- // edge aligned with a 4x4 block
- const uint32_t curr_x = ((mi_col * MI_SIZE) >> scale_horz) + x * MI_SIZE;
- const uint32_t curr_y = ((mi_row * MI_SIZE) >> scale_vert) + y * MI_SIZE;
- uint32_t advance_units;
- TX_SIZE tx_size;
- AV1_DEBLOCKING_PARAMETERS params;
- memset(&params, 0, sizeof(params));
-
- tx_size =
- set_lpf_parameters(&params, (cm->mi_stride << scale_vert), cm, xd,
- HORZ_EDGE, curr_x, curr_y, plane, plane_ptr);
- if (tx_size == TX_INVALID) {
- params.filter_length = 0;
- tx_size = TX_4X4;
- }
-
- switch (params.filter_length) {
- // apply 4-tap filtering
- case 4:
- if (use_highbitdepth)
- aom_highbd_lpf_horizontal_4(CONVERT_TO_SHORTPTR(p), dst_stride,
- params.mblim, params.lim,
- params.hev_thr, bit_depth);
- else
- aom_lpf_horizontal_4(p, dst_stride, params.mblim, params.lim,
- params.hev_thr);
- break;
- // apply 6-tap filtering
- case 6:
- assert(plane != 0);
- if (use_highbitdepth)
- aom_highbd_lpf_horizontal_6(CONVERT_TO_SHORTPTR(p), dst_stride,
- params.mblim, params.lim,
- params.hev_thr, bit_depth);
- else
- aom_lpf_horizontal_6(p, dst_stride, params.mblim, params.lim,
- params.hev_thr);
- break;
- // apply 8-tap filtering
- case 8:
- if (use_highbitdepth)
- aom_highbd_lpf_horizontal_8(CONVERT_TO_SHORTPTR(p), dst_stride,
- params.mblim, params.lim,
- params.hev_thr, bit_depth);
- else
- aom_lpf_horizontal_8(p, dst_stride, params.mblim, params.lim,
- params.hev_thr);
- break;
- // apply 14-tap filtering
- case 14:
- if (use_highbitdepth)
- aom_highbd_lpf_horizontal_14(CONVERT_TO_SHORTPTR(p), dst_stride,
- params.mblim, params.lim,
- params.hev_thr, bit_depth);
- else
- aom_lpf_horizontal_14(p, dst_stride, params.mblim, params.lim,
- params.hev_thr);
- break;
- // no filtering
- default: break;
- }
-
- // advance the destination pointer
- advance_units = tx_size_high_unit[tx_size];
- y += advance_units;
- p += advance_units * dst_stride * MI_SIZE;
- }
- }
-}
-
-static void loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm,
- MACROBLOCKD *xd, int start, int stop,
-#if LOOP_FILTER_BITMASK
- int is_decoding,
-#endif
- int plane_start, int plane_end) {
- struct macroblockd_plane *pd = xd->plane;
- const int col_start = 0;
- const int col_end = cm->mi_cols;
- int mi_row, mi_col;
- int plane;
-
-#if LOOP_FILTER_BITMASK
- if (is_decoding) {
- for (plane = plane_start; plane < plane_end; plane++) {
- if (plane == 0 && !(cm->lf.filter_level[0]) && !(cm->lf.filter_level[1]))
- break;
- else if (plane == 1 && !(cm->lf.filter_level_u))
- continue;
- else if (plane == 2 && !(cm->lf.filter_level_v))
- continue;
-
- av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, 0, 0,
- plane, plane + 1);
- av1_build_bitmask_vert_info(cm, &pd[plane], plane);
- av1_build_bitmask_horz_info(cm, &pd[plane], plane);
-
- // apply loop filtering which only goes through buffer once
- for (mi_row = start; mi_row < stop; mi_row += MI_SIZE_64X64) {
- for (mi_col = col_start; mi_col < col_end; mi_col += MI_SIZE_64X64) {
- av1_setup_dst_planes(pd, MI_SIZE_64X64, frame_buffer, mi_row, mi_col,
- plane, plane + 1);
- av1_filter_block_plane_bitmask_vert(cm, &pd[plane], plane, mi_row,
- mi_col);
- if (mi_col - MI_SIZE_64X64 >= 0) {
- av1_setup_dst_planes(pd, MI_SIZE_64X64, frame_buffer, mi_row,
- mi_col - MI_SIZE_64X64, plane, plane + 1);
- av1_filter_block_plane_bitmask_horz(cm, &pd[plane], plane, mi_row,
- mi_col - MI_SIZE_64X64);
- }
- }
- av1_setup_dst_planes(pd, MI_SIZE_64X64, frame_buffer, mi_row,
- mi_col - MI_SIZE_64X64, plane, plane + 1);
- av1_filter_block_plane_bitmask_horz(cm, &pd[plane], plane, mi_row,
- mi_col - MI_SIZE_64X64);
- }
- }
- return;
- }
-#endif
-
- for (plane = plane_start; plane < plane_end; plane++) {
- if (plane == 0 && !(cm->lf.filter_level[0]) && !(cm->lf.filter_level[1]))
- break;
- else if (plane == 1 && !(cm->lf.filter_level_u))
- continue;
- else if (plane == 2 && !(cm->lf.filter_level_v))
- continue;
-
-#if LOOP_FILTER_BITMASK
- // filter all vertical edges every superblock (could be 128x128 or 64x64)
- for (mi_row = start; mi_row < stop; mi_row += cm->seq_params.mib_size) {
- for (mi_col = col_start; mi_col < col_end;
- mi_col += cm->seq_params.mib_size) {
- av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row,
- mi_col, plane, plane + 1);
-
- av1_setup_bitmask(cm, mi_row, mi_col, plane, pd[plane].subsampling_x,
- pd[plane].subsampling_y, stop, col_end);
- av1_filter_block_plane_ver(cm, &pd[plane], plane, mi_row, mi_col);
- }
- }
-
- // filter all horizontal edges every superblock
- for (mi_row = start; mi_row < stop; mi_row += cm->seq_params.mib_size) {
- for (mi_col = col_start; mi_col < col_end;
- mi_col += cm->seq_params.mib_size) {
- av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row,
- mi_col, plane, plane + 1);
-
- av1_filter_block_plane_hor(cm, &pd[plane], plane, mi_row, mi_col);
- }
- }
-#else
- if (cm->lf.combine_vert_horz_lf) {
- // filter all vertical and horizontal edges in every 128x128 super block
- for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
- for (mi_col = col_start; mi_col < col_end; mi_col += MAX_MIB_SIZE) {
- // filter vertical edges
- av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row,
- mi_col, plane, plane + 1);
- av1_filter_block_plane_vert(cm, xd, plane, &pd[plane], mi_row,
- mi_col);
- // filter horizontal edges
- if (mi_col - MAX_MIB_SIZE >= 0) {
- av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer,
- mi_row, mi_col - MAX_MIB_SIZE, plane,
- plane + 1);
- av1_filter_block_plane_horz(cm, xd, plane, &pd[plane], mi_row,
- mi_col - MAX_MIB_SIZE);
- }
- }
- // filter horizontal edges
- av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row,
- mi_col - MAX_MIB_SIZE, plane, plane + 1);
- av1_filter_block_plane_horz(cm, xd, plane, &pd[plane], mi_row,
- mi_col - MAX_MIB_SIZE);
- }
- } else {
- // filter all vertical edges in every 128x128 super block
- for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
- for (mi_col = col_start; mi_col < col_end; mi_col += MAX_MIB_SIZE) {
- av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row,
- mi_col, plane, plane + 1);
- av1_filter_block_plane_vert(cm, xd, plane, &pd[plane], mi_row,
- mi_col);
- }
- }
-
- // filter all horizontal edges in every 128x128 super block
- for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
- for (mi_col = col_start; mi_col < col_end; mi_col += MAX_MIB_SIZE) {
- av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row,
- mi_col, plane, plane + 1);
- av1_filter_block_plane_horz(cm, xd, plane, &pd[plane], mi_row,
- mi_col);
- }
- }
- }
-#endif // LOOP_FILTER_BITMASK
- }
-}
-
-void av1_loop_filter_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
- MACROBLOCKD *xd,
-#if LOOP_FILTER_BITMASK
- int is_decoding,
-#endif
- int plane_start, int plane_end, int partial_frame) {
- int start_mi_row, end_mi_row, mi_rows_to_filter;
-
- start_mi_row = 0;
- mi_rows_to_filter = cm->mi_rows;
- if (partial_frame && cm->mi_rows > 8) {
- start_mi_row = cm->mi_rows >> 1;
- start_mi_row &= 0xfffffff8;
- mi_rows_to_filter = AOMMAX(cm->mi_rows / 8, 8);
- }
- end_mi_row = start_mi_row + mi_rows_to_filter;
- av1_loop_filter_frame_init(cm, plane_start, plane_end);
- loop_filter_rows(frame, cm, xd, start_mi_row, end_mi_row,
-#if LOOP_FILTER_BITMASK
- is_decoding,
-#endif
- plane_start, plane_end);
-}
diff --git a/third_party/aom/av1/common/av1_loopfilter.h b/third_party/aom/av1/common/av1_loopfilter.h
deleted file mode 100644
index 80ac61178..000000000
--- a/third_party/aom/av1/common/av1_loopfilter.h
+++ /dev/null
@@ -1,227 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_COMMON_AV1_LOOPFILTER_H_
-#define AOM_AV1_COMMON_AV1_LOOPFILTER_H_
-
-#include "config/aom_config.h"
-
-#include "aom_ports/mem.h"
-#include "av1/common/blockd.h"
-#include "av1/common/seg_common.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define MAX_LOOP_FILTER 63
-#define MAX_SHARPNESS 7
-
-#define SIMD_WIDTH 16
-
-enum lf_path {
- LF_PATH_420,
- LF_PATH_444,
- LF_PATH_SLOW,
-};
-
-#if LOOP_FILTER_BITMASK
-typedef struct {
- uint64_t bits[4];
-} FilterMask;
-
-// This structure holds bit masks for all 4x4 blocks in a 64x64 region.
-// Each 1 bit represents a position in which we want to apply the loop filter.
-// For Y plane, 4x4 in 64x64 requires 16x16 = 256 bit, therefore we use 4
-// uint64_t; For U, V plane, for 420 format, plane size is 32x32, thus we use
-// a uint64_t to represent bitmask.
-// Left_ entries refer to whether we apply a filter on the border to the
-// left of the block. Above_ entries refer to whether or not to apply a
-// filter on the above border.
-// Since each transform is accompanied by a potentially different type of
-// loop filter there is a different entry in the array for each transform size.
-typedef struct {
- FilterMask left_y[TX_SIZES];
- FilterMask above_y[TX_SIZES];
- FilterMask left_u[TX_SIZES];
- FilterMask above_u[TX_SIZES];
- FilterMask left_v[TX_SIZES];
- FilterMask above_v[TX_SIZES];
-
- // Y plane vertical edge and horizontal edge filter level
- uint8_t lfl_y_hor[MI_SIZE_64X64][MI_SIZE_64X64];
- uint8_t lfl_y_ver[MI_SIZE_64X64][MI_SIZE_64X64];
-
- // U plane filter level
- uint8_t lfl_u[MI_SIZE_64X64][MI_SIZE_64X64];
-
- // V plane filter level
- uint8_t lfl_v[MI_SIZE_64X64][MI_SIZE_64X64];
-
- // other info
- FilterMask skip;
- FilterMask is_vert_border;
- FilterMask is_horz_border;
- // Y or UV planes, 5 tx sizes: 4x4, 8x8, 16x16, 32x32, 64x64
- FilterMask tx_size_ver[2][5];
- FilterMask tx_size_hor[2][5];
-} LoopFilterMask;
-#endif // LOOP_FILTER_BITMASK
-
-struct loopfilter {
- int filter_level[2];
- int filter_level_u;
- int filter_level_v;
-
- int sharpness_level;
-
- uint8_t mode_ref_delta_enabled;
- uint8_t mode_ref_delta_update;
-
- // 0 = Intra, Last, Last2+Last3,
- // GF, BRF, ARF2, ARF
- int8_t ref_deltas[REF_FRAMES];
-
- // 0 = ZERO_MV, MV
- int8_t mode_deltas[MAX_MODE_LF_DELTAS];
-
- int combine_vert_horz_lf;
-
-#if LOOP_FILTER_BITMASK
- LoopFilterMask *lfm;
- size_t lfm_num;
- int lfm_stride;
-#endif // LOOP_FILTER_BITMASK
-};
-
-// Need to align this structure so when it is declared and
-// passed it can be loaded into vector registers.
-typedef struct {
- DECLARE_ALIGNED(SIMD_WIDTH, uint8_t, mblim[SIMD_WIDTH]);
- DECLARE_ALIGNED(SIMD_WIDTH, uint8_t, lim[SIMD_WIDTH]);
- DECLARE_ALIGNED(SIMD_WIDTH, uint8_t, hev_thr[SIMD_WIDTH]);
-} loop_filter_thresh;
-
-typedef struct {
- loop_filter_thresh lfthr[MAX_LOOP_FILTER + 1];
- uint8_t lvl[MAX_MB_PLANE][MAX_SEGMENTS][2][REF_FRAMES][MAX_MODE_LF_DELTAS];
-} loop_filter_info_n;
-
-/* assorted loopfilter functions which get used elsewhere */
-struct AV1Common;
-struct macroblockd;
-struct AV1LfSyncData;
-
-void av1_loop_filter_init(struct AV1Common *cm);
-
-void av1_loop_filter_frame_init(struct AV1Common *cm, int plane_start,
- int plane_end);
-
-#if LOOP_FILTER_BITMASK
-void av1_loop_filter_frame(YV12_BUFFER_CONFIG *frame, struct AV1Common *cm,
- struct macroblockd *mbd, int is_decoding,
- int plane_start, int plane_end, int partial_frame);
-#else
-void av1_loop_filter_frame(YV12_BUFFER_CONFIG *frame, struct AV1Common *cm,
- struct macroblockd *mbd, int plane_start,
- int plane_end, int partial_frame);
-#endif
-
-void av1_filter_block_plane_vert(const struct AV1Common *const cm,
- const MACROBLOCKD *const xd, const int plane,
- const MACROBLOCKD_PLANE *const plane_ptr,
- const uint32_t mi_row, const uint32_t mi_col);
-
-void av1_filter_block_plane_horz(const struct AV1Common *const cm,
- const MACROBLOCKD *const xd, const int plane,
- const MACROBLOCKD_PLANE *const plane_ptr,
- const uint32_t mi_row, const uint32_t mi_col);
-
-typedef struct LoopFilterWorkerData {
- YV12_BUFFER_CONFIG *frame_buffer;
- struct AV1Common *cm;
- struct macroblockd_plane planes[MAX_MB_PLANE];
- // TODO(Ranjit): When the filter functions are modified to use xd->lossless
- // add lossless as a member here.
- MACROBLOCKD *xd;
-} LFWorkerData;
-
-uint8_t get_filter_level(const struct AV1Common *cm,
- const loop_filter_info_n *lfi_n, const int dir_idx,
- int plane, const MB_MODE_INFO *mbmi);
-#if LOOP_FILTER_BITMASK
-void av1_setup_bitmask(struct AV1Common *const cm, int mi_row, int mi_col,
- int plane, int subsampling_x, int subsampling_y,
- int row_end, int col_end);
-
-void av1_filter_block_plane_ver(struct AV1Common *const cm,
- struct macroblockd_plane *const plane_ptr,
- int pl, int mi_row, int mi_col);
-
-void av1_filter_block_plane_hor(struct AV1Common *const cm,
- struct macroblockd_plane *const plane, int pl,
- int mi_row, int mi_col);
-LoopFilterMask *get_loop_filter_mask(const struct AV1Common *const cm,
- int mi_row, int mi_col);
-int get_index_shift(int mi_col, int mi_row, int *index);
-
-static const FilterMask left_txform_mask[TX_SIZES] = {
- { { 0x0000000000000001ULL, // TX_4X4,
- 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
-
- { { 0x0000000000010001ULL, // TX_8X8,
- 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
-
- { { 0x0001000100010001ULL, // TX_16X16,
- 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
-
- { { 0x0001000100010001ULL, // TX_32X32,
- 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL } },
-
- { { 0x0001000100010001ULL, // TX_64X64,
- 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0001000100010001ULL } },
-};
-
-static const uint64_t above_txform_mask[2][TX_SIZES] = {
- {
- 0x0000000000000001ULL, // TX_4X4
- 0x0000000000000003ULL, // TX_8X8
- 0x000000000000000fULL, // TX_16X16
- 0x00000000000000ffULL, // TX_32X32
- 0x000000000000ffffULL, // TX_64X64
- },
- {
- 0x0000000000000001ULL, // TX_4X4
- 0x0000000000000005ULL, // TX_8X8
- 0x0000000000000055ULL, // TX_16X16
- 0x0000000000005555ULL, // TX_32X32
- 0x0000000055555555ULL, // TX_64X64
- },
-};
-
-extern const int mask_id_table_tx_4x4[BLOCK_SIZES_ALL];
-
-extern const int mask_id_table_tx_8x8[BLOCK_SIZES_ALL];
-
-extern const int mask_id_table_tx_16x16[BLOCK_SIZES_ALL];
-
-extern const int mask_id_table_tx_32x32[BLOCK_SIZES_ALL];
-
-extern const FilterMask left_mask_univariant_reordered[67];
-
-extern const FilterMask above_mask_univariant_reordered[67];
-#endif
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_COMMON_AV1_LOOPFILTER_H_
diff --git a/third_party/aom/av1/common/av1_rtcd.c b/third_party/aom/av1/common/av1_rtcd.c
deleted file mode 100644
index a77a4d254..000000000
--- a/third_party/aom/av1/common/av1_rtcd.c
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#include "config/aom_config.h"
-
-#define RTCD_C
-#include "config/av1_rtcd.h"
-
-#include "aom_ports/aom_once.h"
-
-void av1_rtcd() {
- // TODO(JBB): Remove this aom_once, by insuring that both the encoder and
- // decoder setup functions are protected by aom_once();
- aom_once(setup_rtcd_internal);
-}
diff --git a/third_party/aom/av1/common/av1_rtcd_defs.pl b/third_party/aom/av1/common/av1_rtcd_defs.pl
deleted file mode 100755
index dee1f1c79..000000000
--- a/third_party/aom/av1/common/av1_rtcd_defs.pl
+++ /dev/null
@@ -1,398 +0,0 @@
-##
-## Copyright (c) 2017, Alliance for Open Media. All rights reserved
-##
-## This source code is subject to the terms of the BSD 2 Clause License and
-## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-## was not distributed with this source code in the LICENSE file, you can
-## obtain it at www.aomedia.org/license/software. If the Alliance for Open
-## Media Patent License 1.0 was not distributed with this source code in the
-## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-##
-sub av1_common_forward_decls() {
-print <<EOF
-/*
- * AV1
- */
-
-#include "aom/aom_integer.h"
-#include "aom_dsp/txfm_common.h"
-#include "av1/common/common.h"
-#include "av1/common/enums.h"
-#include "av1/common/quant_common.h"
-#include "av1/common/filter.h"
-#include "av1/common/convolve.h"
-#include "av1/common/av1_txfm.h"
-#include "av1/common/odintrin.h"
-#include "av1/common/restoration.h"
-
-struct macroblockd;
-
-/* Encoder forward decls */
-struct macroblock;
-struct txfm_param;
-struct aom_variance_vtable;
-struct search_site_config;
-struct yv12_buffer_config;
-
-/* Function pointers return by CfL functions */
-typedef void (*cfl_subsample_lbd_fn)(const uint8_t *input, int input_stride,
- uint16_t *output_q3);
-
-typedef void (*cfl_subsample_hbd_fn)(const uint16_t *input, int input_stride,
- uint16_t *output_q3);
-
-typedef void (*cfl_subtract_average_fn)(const uint16_t *src, int16_t *dst);
-
-typedef void (*cfl_predict_lbd_fn)(const int16_t *src, uint8_t *dst,
- int dst_stride, int alpha_q3);
-
-typedef void (*cfl_predict_hbd_fn)(const int16_t *src, uint16_t *dst,
- int dst_stride, int alpha_q3, int bd);
-EOF
-}
-forward_decls qw/av1_common_forward_decls/;
-
-# functions that are 64 bit only.
-$mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 = $avx_x86_64 = $avx2_x86_64 = '';
-if ($opts{arch} eq "x86_64") {
- $mmx_x86_64 = 'mmx';
- $sse2_x86_64 = 'sse2';
- $ssse3_x86_64 = 'ssse3';
- $avx_x86_64 = 'avx';
- $avx2_x86_64 = 'avx2';
-}
-
-add_proto qw/void av1_convolve_horiz_rs/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn";
-specialize qw/av1_convolve_horiz_rs sse4_1/;
-
-add_proto qw/void av1_highbd_convolve_horiz_rs/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn, int bd";
-specialize qw/av1_highbd_convolve_horiz_rs sse4_1/;
-
-add_proto qw/void av1_wiener_convolve_add_src/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const ConvolveParams *conv_params";
-
-add_proto qw/void av1_highbd_wiener_convolve_add_src/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const ConvolveParams *conv_params, int bps";
-
-specialize qw/av1_wiener_convolve_add_src sse2 avx2 neon/;
-specialize qw/av1_highbd_wiener_convolve_add_src ssse3/;
-specialize qw/av1_highbd_wiener_convolve_add_src avx2/;
-
-
-# directional intra predictor functions
-add_proto qw/void av1_dr_prediction_z1/, "uint8_t *dst, ptrdiff_t stride, int bw, int bh, const uint8_t *above, const uint8_t *left, int upsample_above, int dx, int dy";
-add_proto qw/void av1_dr_prediction_z2/, "uint8_t *dst, ptrdiff_t stride, int bw, int bh, const uint8_t *above, const uint8_t *left, int upsample_above, int upsample_left, int dx, int dy";
-add_proto qw/void av1_dr_prediction_z3/, "uint8_t *dst, ptrdiff_t stride, int bw, int bh, const uint8_t *above, const uint8_t *left, int upsample_left, int dx, int dy";
-
-# FILTER_INTRA predictor functions
-add_proto qw/void av1_filter_intra_predictor/, "uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size, const uint8_t *above, const uint8_t *left, int mode";
-specialize qw/av1_filter_intra_predictor sse4_1/;
-
-# High bitdepth functions
-
-#
-# Sub Pixel Filters
-#
-add_proto qw/void av1_highbd_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
-
-add_proto qw/void av1_highbd_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
-
-add_proto qw/void av1_highbd_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
-specialize qw/av1_highbd_convolve8/, "$sse2_x86_64";
-
-add_proto qw/void av1_highbd_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
-specialize qw/av1_highbd_convolve8_horiz/, "$sse2_x86_64";
-
-add_proto qw/void av1_highbd_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
-specialize qw/av1_highbd_convolve8_vert/, "$sse2_x86_64";
-
-#inv txfm
-add_proto qw/void av1_inv_txfm_add/, "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param";
-specialize qw/av1_inv_txfm_add ssse3 avx2 neon/;
-
-add_proto qw/void av1_highbd_inv_txfm_add/, "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param";
-specialize qw/av1_highbd_inv_txfm_add sse4_1 avx2/;
-
-add_proto qw/void av1_highbd_inv_txfm_add_4x4/, "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param";
-specialize qw/av1_highbd_inv_txfm_add_4x4 sse4_1/;
-add_proto qw/void av1_highbd_inv_txfm_add_8x8/, "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param";
-specialize qw/av1_highbd_inv_txfm_add_8x8 sse4_1/;
-add_proto qw/void av1_highbd_inv_txfm_add_16x8/, "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param";
-specialize qw/av1_highbd_inv_txfm_add_16x8 sse4_1/;
-add_proto qw/void av1_highbd_inv_txfm_add_8x16/, "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param";
-specialize qw/av1_highbd_inv_txfm_add_8x16 sse4_1/;
-add_proto qw/void av1_highbd_inv_txfm_add_16x16/, "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param";
-specialize qw/av1_highbd_inv_txfm_add_16x16 sse4_1/;
-add_proto qw/void av1_highbd_inv_txfm_add_32x32/, "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param";
-specialize qw/av1_highbd_inv_txfm_add_32x32 sse4_1 avx2/;
-
-add_proto qw/void av1_highbd_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
-add_proto qw/void av1_highbd_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
-
-add_proto qw/void av1_inv_txfm2d_add_4x8/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
-add_proto qw/void av1_inv_txfm2d_add_8x4/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
-add_proto qw/void av1_inv_txfm2d_add_8x16/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
-add_proto qw/void av1_inv_txfm2d_add_16x8/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
-add_proto qw/void av1_inv_txfm2d_add_16x32/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
-add_proto qw/void av1_inv_txfm2d_add_32x16/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
-add_proto qw/void av1_inv_txfm2d_add_4x4/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
-specialize qw/av1_inv_txfm2d_add_4x4 sse4_1/;
-add_proto qw/void av1_inv_txfm2d_add_8x8/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
-specialize qw/av1_inv_txfm2d_add_8x8 sse4_1/;
-add_proto qw/void av1_inv_txfm2d_add_16x16/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
-add_proto qw/void av1_inv_txfm2d_add_32x32/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
-
-add_proto qw/void av1_inv_txfm2d_add_64x64/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
-add_proto qw/void av1_inv_txfm2d_add_32x64/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
-add_proto qw/void av1_inv_txfm2d_add_64x32/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
-add_proto qw/void av1_inv_txfm2d_add_16x64/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
-add_proto qw/void av1_inv_txfm2d_add_64x16/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
-
-add_proto qw/void av1_inv_txfm2d_add_4x16/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
-add_proto qw/void av1_inv_txfm2d_add_16x4/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
-add_proto qw/void av1_inv_txfm2d_add_8x32/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
-add_proto qw/void av1_inv_txfm2d_add_32x8/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
-
-# directional intra predictor functions
-add_proto qw/void av1_highbd_dr_prediction_z1/, "uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int upsample_above, int dx, int dy, int bd";
-add_proto qw/void av1_highbd_dr_prediction_z2/, "uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int upsample_above, int upsample_left, int dx, int dy, int bd";
-add_proto qw/void av1_highbd_dr_prediction_z3/, "uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int upsample_left, int dx, int dy, int bd";
-
-# build compound seg mask functions
-add_proto qw/void av1_build_compound_diffwtd_mask/, "uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w";
-specialize qw/av1_build_compound_diffwtd_mask sse4_1 avx2/;
-
-add_proto qw/void av1_build_compound_diffwtd_mask_highbd/, "uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd";
-specialize qw/av1_build_compound_diffwtd_mask_highbd ssse3 avx2/;
-
-add_proto qw/void av1_build_compound_diffwtd_mask_d16/, "uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0, int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w, ConvolveParams *conv_params, int bd";
-specialize qw/av1_build_compound_diffwtd_mask_d16 sse4_1 avx2 neon/;
-
-#
-# Encoder functions below this point.
-#
-if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
-
- # ENCODEMB INVOKE
-
- # the transform coefficients are held in 32-bit
- # values, so the assembler code for av1_block_error can no longer be used.
- add_proto qw/int64_t av1_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
- specialize qw/av1_block_error avx2/;
-
- add_proto qw/void av1_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/av1_quantize_fp sse2 avx2/;
-
- add_proto qw/void av1_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/av1_quantize_fp_32x32 avx2/;
-
- add_proto qw/void av1_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/av1_quantize_fp_64x64 avx2/;
-
- # fdct functions
-
- add_proto qw/void av1_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
-
- #fwd txfm
- add_proto qw/void av1_lowbd_fwd_txfm/, "const int16_t *src_diff, tran_low_t *coeff, int diff_stride, TxfmParam *txfm_param";
- specialize qw/av1_lowbd_fwd_txfm sse2 sse4_1 avx2/;
-
- add_proto qw/void av1_fwd_txfm2d_4x8/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
- add_proto qw/void av1_fwd_txfm2d_8x4/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
- add_proto qw/void av1_fwd_txfm2d_8x16/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
- specialize qw/av1_fwd_txfm2d_8x16 sse4_1/;
- add_proto qw/void av1_fwd_txfm2d_16x8/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
- specialize qw/av1_fwd_txfm2d_16x8 sse4_1/;
- add_proto qw/void av1_fwd_txfm2d_16x32/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
- add_proto qw/void av1_fwd_txfm2d_32x16/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
- add_proto qw/void av1_fwd_txfm2d_4x16/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
- add_proto qw/void av1_fwd_txfm2d_16x4/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
- add_proto qw/void av1_fwd_txfm2d_8x32/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
- add_proto qw/void av1_fwd_txfm2d_32x8/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
- add_proto qw/void av1_fwd_txfm2d_4x4/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
- specialize qw/av1_fwd_txfm2d_4x4 sse4_1/;
- add_proto qw/void av1_fwd_txfm2d_8x8/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
- specialize qw/av1_fwd_txfm2d_8x8 sse4_1/;
- add_proto qw/void av1_fwd_txfm2d_16x16/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
- specialize qw/av1_fwd_txfm2d_16x16 sse4_1/;
- add_proto qw/void av1_fwd_txfm2d_32x32/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
- specialize qw/av1_fwd_txfm2d_32x32 sse4_1/;
-
- add_proto qw/void av1_fwd_txfm2d_64x64/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
- specialize qw/av1_fwd_txfm2d_64x64 sse4_1/;
- add_proto qw/void av1_fwd_txfm2d_32x64/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
- add_proto qw/void av1_fwd_txfm2d_64x32/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
- add_proto qw/void av1_fwd_txfm2d_16x64/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
- add_proto qw/void av1_fwd_txfm2d_64x16/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
-
- #
- # Motion search
- #
- add_proto qw/int av1_diamond_search_sad/, "struct macroblock *x, const struct search_site_config *cfg, MV *ref_mv, MV *best_mv, int search_param, int sad_per_bit, int *num00, const struct aom_variance_vtable *fn_ptr, const MV *center_mv";
-
- add_proto qw/int av1_full_range_search/, "const struct macroblock *x, const struct search_site_config *cfg, MV *ref_mv, MV *best_mv, int search_param, int sad_per_bit, int *num00, const struct aom_variance_vtable *fn_ptr, const MV *center_mv";
-
- add_proto qw/void av1_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
- specialize qw/av1_temporal_filter_apply sse2 msa/;
-
- add_proto qw/void av1_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale";
-
- # ENCODEMB INVOKE
-
- add_proto qw/int64_t av1_highbd_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd";
- specialize qw/av1_highbd_block_error sse2/;
-
- add_proto qw/void av1_highbd_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
-
- add_proto qw/void av1_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale";
- specialize qw/av1_highbd_quantize_fp sse4_1 avx2/;
-
- add_proto qw/void av1_highbd_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
-
- # End av1_high encoder functions
-
- # txb
- add_proto qw/void av1_get_nz_map_contexts/, "const uint8_t *const levels, const int16_t *const scan, const uint16_t eob, const TX_SIZE tx_size, const TX_CLASS tx_class, int8_t *const coeff_contexts";
- specialize qw/av1_get_nz_map_contexts sse2/;
- add_proto qw/void av1_txb_init_levels/, "const tran_low_t *const coeff, const int width, const int height, uint8_t *const levels";
- specialize qw/av1_txb_init_levels sse4_1 avx2/;
-
- add_proto qw/uint64_t av1_wedge_sse_from_residuals/, "const int16_t *r1, const int16_t *d, const uint8_t *m, int N";
- specialize qw/av1_wedge_sse_from_residuals sse2 avx2/;
- add_proto qw/int av1_wedge_sign_from_residuals/, "const int16_t *ds, const uint8_t *m, int N, int64_t limit";
- specialize qw/av1_wedge_sign_from_residuals sse2 avx2/;
- add_proto qw/void av1_wedge_compute_delta_squares/, "int16_t *d, const int16_t *a, const int16_t *b, int N";
- specialize qw/av1_wedge_compute_delta_squares sse2 avx2/;
-
- # hash
- add_proto qw/uint32_t av1_get_crc32c_value/, "void *crc_calculator, uint8_t *p, int length";
- specialize qw/av1_get_crc32c_value sse4_2/;
-
- add_proto qw/void av1_compute_stats/, "int wiener_win, const uint8_t *dgd8, const uint8_t *src8, int h_start, int h_end, int v_start, int v_end, int dgd_stride, int src_stride, double *M, double *H";
- specialize qw/av1_compute_stats sse4_1 avx2/;
-
- add_proto qw/int64_t av1_lowbd_pixel_proj_error/, " const uint8_t *src8, int width, int height, int src_stride, const uint8_t *dat8, int dat_stride, int32_t *flt0, int flt0_stride, int32_t *flt1, int flt1_stride, int xq[2], const sgr_params_type *params";
- specialize qw/av1_lowbd_pixel_proj_error sse4_1 avx2/;
-}
-# end encoder functions
-
-# Deringing Functions
-
-add_proto qw/int cdef_find_dir/, "const uint16_t *img, int stride, int32_t *var, int coeff_shift";
-add_proto qw/void cdef_filter_block/, "uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max, int coeff_shift";
-
-add_proto qw/void copy_rect8_8bit_to_16bit/, "uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h";
-add_proto qw/void copy_rect8_16bit_to_16bit/, "uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h";
-
-# VS compiling for 32 bit targets does not support vector types in
-# structs as arguments, which makes the v256 type of the intrinsics
-# hard to support, so optimizations for this target are disabled.
-if ($opts{config} !~ /libs-x86-win32-vs.*/) {
- specialize qw/cdef_find_dir sse2 ssse3 sse4_1 avx2 neon/;
- specialize qw/cdef_filter_block sse2 ssse3 sse4_1 avx2 neon/;
- specialize qw/copy_rect8_8bit_to_16bit sse2 ssse3 sse4_1 avx2 neon/;
- specialize qw/copy_rect8_16bit_to_16bit sse2 ssse3 sse4_1 avx2 neon/;
-}
-
-# WARPED_MOTION / GLOBAL_MOTION functions
-
-add_proto qw/void av1_warp_affine/, "const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta";
-specialize qw/av1_warp_affine sse4_1 neon/;
-
-add_proto qw/void av1_highbd_warp_affine/, "const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta";
-specialize qw/av1_highbd_warp_affine sse4_1/;
-
-if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
- add_proto qw/double compute_cross_correlation/, "unsigned char *im1, int stride1, int x1, int y1, unsigned char *im2, int stride2, int x2, int y2";
- specialize qw/compute_cross_correlation sse4_1/;
-}
-
-# LOOP_RESTORATION functions
-
-add_proto qw/void apply_selfguided_restoration/, "const uint8_t *dat, int width, int height, int stride, int eps, const int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf, int bit_depth, int highbd";
-specialize qw/apply_selfguided_restoration sse4_1 avx2 neon/;
-
-add_proto qw/int av1_selfguided_restoration/, "const uint8_t *dgd8, int width, int height,
- int dgd_stride, int32_t *flt0, int32_t *flt1, int flt_stride,
- int sgr_params_idx, int bit_depth, int highbd";
-specialize qw/av1_selfguided_restoration sse4_1 avx2 neon/;
-
-# CONVOLVE_ROUND/COMPOUND_ROUND functions
-
-add_proto qw/void av1_convolve_2d_sr/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
-add_proto qw/void av1_convolve_2d_copy_sr/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
-add_proto qw/void av1_convolve_x_sr/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
-add_proto qw/void av1_convolve_y_sr/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
-add_proto qw/void av1_jnt_convolve_2d/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
-add_proto qw/void av1_jnt_convolve_2d_copy/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
-add_proto qw/void av1_jnt_convolve_x/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
-add_proto qw/void av1_jnt_convolve_y/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
-add_proto qw/void av1_highbd_convolve_2d_copy_sr/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
-add_proto qw/void av1_highbd_convolve_2d_sr/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
-add_proto qw/void av1_highbd_convolve_x_sr/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
-add_proto qw/void av1_highbd_convolve_y_sr/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
-add_proto qw/void av1_highbd_jnt_convolve_2d/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
-add_proto qw/void av1_highbd_jnt_convolve_x/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
-add_proto qw/void av1_highbd_jnt_convolve_y/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
-add_proto qw/void av1_highbd_jnt_convolve_2d_copy/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
-
- add_proto qw/void av1_convolve_2d_scale/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params";
- add_proto qw/void av1_highbd_convolve_2d_scale/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd";
-
- specialize qw/av1_convolve_2d_sr sse2 avx2 neon/;
- specialize qw/av1_convolve_2d_copy_sr sse2 avx2 neon/;
- specialize qw/av1_convolve_x_sr sse2 avx2 neon/;
- specialize qw/av1_convolve_y_sr sse2 avx2 neon/;
- specialize qw/av1_convolve_2d_scale sse4_1/;
- specialize qw/av1_jnt_convolve_2d ssse3 avx2 neon/;
- specialize qw/av1_jnt_convolve_2d_copy sse2 avx2 neon/;
- specialize qw/av1_jnt_convolve_x sse2 avx2 neon/;
- specialize qw/av1_jnt_convolve_y sse2 avx2 neon/;
- specialize qw/av1_highbd_convolve_2d_copy_sr sse2 avx2/;
- specialize qw/av1_highbd_convolve_2d_sr ssse3 avx2/;
- specialize qw/av1_highbd_convolve_x_sr ssse3 avx2/;
- specialize qw/av1_highbd_convolve_y_sr ssse3 avx2/;
- specialize qw/av1_highbd_convolve_2d_scale sse4_1/;
- specialize qw/av1_highbd_jnt_convolve_2d sse4_1 avx2/;
- specialize qw/av1_highbd_jnt_convolve_x sse4_1 avx2/;
- specialize qw/av1_highbd_jnt_convolve_y sse4_1 avx2/;
- specialize qw/av1_highbd_jnt_convolve_2d_copy sse4_1 avx2/;
-
-# INTRA_EDGE functions
-add_proto qw/void av1_filter_intra_edge/, "uint8_t *p, int sz, int strength";
-specialize qw/av1_filter_intra_edge sse4_1/;
-add_proto qw/void av1_upsample_intra_edge/, "uint8_t *p, int sz";
-specialize qw/av1_upsample_intra_edge sse4_1/;
-
-add_proto qw/void av1_filter_intra_edge_high/, "uint16_t *p, int sz, int strength";
-specialize qw/av1_filter_intra_edge_high sse4_1/;
-add_proto qw/void av1_upsample_intra_edge_high/, "uint16_t *p, int sz, int bd";
-specialize qw/av1_upsample_intra_edge_high sse4_1/;
-
-# CFL
-add_proto qw/cfl_subtract_average_fn get_subtract_average_fn/, "TX_SIZE tx_size";
-specialize qw/get_subtract_average_fn sse2 avx2 neon vsx/;
-
-add_proto qw/cfl_subsample_lbd_fn cfl_get_luma_subsampling_420_lbd/, "TX_SIZE tx_size";
-specialize qw/cfl_get_luma_subsampling_420_lbd ssse3 avx2 neon/;
-
-add_proto qw/cfl_subsample_lbd_fn cfl_get_luma_subsampling_422_lbd/, "TX_SIZE tx_size";
-specialize qw/cfl_get_luma_subsampling_422_lbd ssse3 avx2 neon/;
-
-add_proto qw/cfl_subsample_lbd_fn cfl_get_luma_subsampling_444_lbd/, "TX_SIZE tx_size";
-specialize qw/cfl_get_luma_subsampling_444_lbd ssse3 avx2 neon/;
-
-add_proto qw/cfl_subsample_hbd_fn cfl_get_luma_subsampling_420_hbd/, "TX_SIZE tx_size";
-specialize qw/cfl_get_luma_subsampling_420_hbd ssse3 avx2 neon/;
-
-add_proto qw/cfl_subsample_hbd_fn cfl_get_luma_subsampling_422_hbd/, "TX_SIZE tx_size";
-specialize qw/cfl_get_luma_subsampling_422_hbd ssse3 avx2 neon/;
-
-add_proto qw/cfl_subsample_hbd_fn cfl_get_luma_subsampling_444_hbd/, "TX_SIZE tx_size";
-specialize qw/cfl_get_luma_subsampling_444_hbd ssse3 avx2 neon/;
-
-add_proto qw/cfl_predict_lbd_fn get_predict_lbd_fn/, "TX_SIZE tx_size";
-specialize qw/get_predict_lbd_fn ssse3 avx2 neon/;
-
-add_proto qw/cfl_predict_hbd_fn get_predict_hbd_fn/, "TX_SIZE tx_size";
-specialize qw/get_predict_hbd_fn ssse3 avx2 neon/;
-
-1;
diff --git a/third_party/aom/av1/common/av1_txfm.c b/third_party/aom/av1/common/av1_txfm.c
deleted file mode 100644
index bb70eab70..000000000
--- a/third_party/aom/av1/common/av1_txfm.c
+++ /dev/null
@@ -1,160 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "av1/common/av1_txfm.h"
-
-// av1_cospi_arr[i][j] = (int)round(cos(M_PI*j/128) * (1<<(cos_bit_min+i)));
-const int32_t av1_cospi_arr_data[7][64] = {
- { 1024, 1024, 1023, 1021, 1019, 1016, 1013, 1009, 1004, 999, 993, 987, 980,
- 972, 964, 955, 946, 936, 926, 915, 903, 891, 878, 865, 851, 837,
- 822, 807, 792, 775, 759, 742, 724, 706, 688, 669, 650, 630, 610,
- 590, 569, 548, 526, 505, 483, 460, 438, 415, 392, 369, 345, 321,
- 297, 273, 249, 224, 200, 175, 150, 125, 100, 75, 50, 25 },
- { 2048, 2047, 2046, 2042, 2038, 2033, 2026, 2018, 2009, 1998, 1987,
- 1974, 1960, 1945, 1928, 1911, 1892, 1872, 1851, 1829, 1806, 1782,
- 1757, 1730, 1703, 1674, 1645, 1615, 1583, 1551, 1517, 1483, 1448,
- 1412, 1375, 1338, 1299, 1260, 1220, 1179, 1138, 1096, 1053, 1009,
- 965, 921, 876, 830, 784, 737, 690, 642, 595, 546, 498,
- 449, 400, 350, 301, 251, 201, 151, 100, 50 },
- { 4096, 4095, 4091, 4085, 4076, 4065, 4052, 4036, 4017, 3996, 3973,
- 3948, 3920, 3889, 3857, 3822, 3784, 3745, 3703, 3659, 3612, 3564,
- 3513, 3461, 3406, 3349, 3290, 3229, 3166, 3102, 3035, 2967, 2896,
- 2824, 2751, 2675, 2598, 2520, 2440, 2359, 2276, 2191, 2106, 2019,
- 1931, 1842, 1751, 1660, 1567, 1474, 1380, 1285, 1189, 1092, 995,
- 897, 799, 700, 601, 501, 401, 301, 201, 101 },
- { 8192, 8190, 8182, 8170, 8153, 8130, 8103, 8071, 8035, 7993, 7946,
- 7895, 7839, 7779, 7713, 7643, 7568, 7489, 7405, 7317, 7225, 7128,
- 7027, 6921, 6811, 6698, 6580, 6458, 6333, 6203, 6070, 5933, 5793,
- 5649, 5501, 5351, 5197, 5040, 4880, 4717, 4551, 4383, 4212, 4038,
- 3862, 3683, 3503, 3320, 3135, 2948, 2760, 2570, 2378, 2185, 1990,
- 1795, 1598, 1401, 1202, 1003, 803, 603, 402, 201 },
- { 16384, 16379, 16364, 16340, 16305, 16261, 16207, 16143, 16069, 15986, 15893,
- 15791, 15679, 15557, 15426, 15286, 15137, 14978, 14811, 14635, 14449, 14256,
- 14053, 13842, 13623, 13395, 13160, 12916, 12665, 12406, 12140, 11866, 11585,
- 11297, 11003, 10702, 10394, 10080, 9760, 9434, 9102, 8765, 8423, 8076,
- 7723, 7366, 7005, 6639, 6270, 5897, 5520, 5139, 4756, 4370, 3981,
- 3590, 3196, 2801, 2404, 2006, 1606, 1205, 804, 402 },
- { 32768, 32758, 32729, 32679, 32610, 32522, 32413, 32286, 32138, 31972, 31786,
- 31581, 31357, 31114, 30853, 30572, 30274, 29957, 29622, 29269, 28899, 28511,
- 28106, 27684, 27246, 26791, 26320, 25833, 25330, 24812, 24279, 23732, 23170,
- 22595, 22006, 21403, 20788, 20160, 19520, 18868, 18205, 17531, 16846, 16151,
- 15447, 14733, 14010, 13279, 12540, 11793, 11039, 10279, 9512, 8740, 7962,
- 7180, 6393, 5602, 4808, 4011, 3212, 2411, 1608, 804 },
- { 65536, 65516, 65457, 65358, 65220, 65043, 64827, 64571, 64277, 63944, 63572,
- 63162, 62714, 62228, 61705, 61145, 60547, 59914, 59244, 58538, 57798, 57022,
- 56212, 55368, 54491, 53581, 52639, 51665, 50660, 49624, 48559, 47464, 46341,
- 45190, 44011, 42806, 41576, 40320, 39040, 37736, 36410, 35062, 33692, 32303,
- 30893, 29466, 28020, 26558, 25080, 23586, 22078, 20557, 19024, 17479, 15924,
- 14359, 12785, 11204, 9616, 8022, 6424, 4821, 3216, 1608 }
-};
-
-// av1_sinpi_arr_data[i][j] = (int)round((sqrt(2) * sin(j*Pi/9) * 2 / 3) * (1
-// << (cos_bit_min + i))) modified so that elements j=1,2 sum to element j=4.
-const int32_t av1_sinpi_arr_data[7][5] = {
- { 0, 330, 621, 836, 951 }, { 0, 660, 1241, 1672, 1901 },
- { 0, 1321, 2482, 3344, 3803 }, { 0, 2642, 4964, 6689, 7606 },
- { 0, 5283, 9929, 13377, 15212 }, { 0, 10566, 19858, 26755, 30424 },
- { 0, 21133, 39716, 53510, 60849 }
-};
-
-void av1_round_shift_array_c(int32_t *arr, int size, int bit) {
- int i;
- if (bit == 0) {
- return;
- } else {
- if (bit > 0) {
- for (i = 0; i < size; i++) {
- arr[i] = round_shift(arr[i], bit);
- }
- } else {
- for (i = 0; i < size; i++) {
- arr[i] = (int32_t)clamp64(((int64_t)1 << (-bit)) * arr[i], INT32_MIN,
- INT32_MAX);
- }
- }
- }
-}
-
-const TXFM_TYPE av1_txfm_type_ls[5][TX_TYPES_1D] = {
- { TXFM_TYPE_DCT4, TXFM_TYPE_ADST4, TXFM_TYPE_ADST4, TXFM_TYPE_IDENTITY4 },
- { TXFM_TYPE_DCT8, TXFM_TYPE_ADST8, TXFM_TYPE_ADST8, TXFM_TYPE_IDENTITY8 },
- { TXFM_TYPE_DCT16, TXFM_TYPE_ADST16, TXFM_TYPE_ADST16, TXFM_TYPE_IDENTITY16 },
- { TXFM_TYPE_DCT32, TXFM_TYPE_INVALID, TXFM_TYPE_INVALID,
- TXFM_TYPE_IDENTITY32 },
- { TXFM_TYPE_DCT64, TXFM_TYPE_INVALID, TXFM_TYPE_INVALID, TXFM_TYPE_INVALID }
-};
-
-const int8_t av1_txfm_stage_num_list[TXFM_TYPES] = {
- 4, // TXFM_TYPE_DCT4
- 6, // TXFM_TYPE_DCT8
- 8, // TXFM_TYPE_DCT16
- 10, // TXFM_TYPE_DCT32
- 12, // TXFM_TYPE_DCT64
- 7, // TXFM_TYPE_ADST4
- 8, // TXFM_TYPE_ADST8
- 10, // TXFM_TYPE_ADST16
- 1, // TXFM_TYPE_IDENTITY4
- 1, // TXFM_TYPE_IDENTITY8
- 1, // TXFM_TYPE_IDENTITY16
- 1, // TXFM_TYPE_IDENTITY32
-};
-
-void av1_range_check_buf(int32_t stage, const int32_t *input,
- const int32_t *buf, int32_t size, int8_t bit) {
-#if CONFIG_COEFFICIENT_RANGE_CHECKING
- const int64_t max_value = (1LL << (bit - 1)) - 1;
- const int64_t min_value = -(1LL << (bit - 1));
-
- int in_range = 1;
-
- for (int i = 0; i < size; ++i) {
- if (buf[i] < min_value || buf[i] > max_value) {
- in_range = 0;
- }
- }
-
- if (!in_range) {
- fprintf(stderr, "Error: coeffs contain out-of-range values\n");
- fprintf(stderr, "size: %d\n", size);
- fprintf(stderr, "stage: %d\n", stage);
- fprintf(stderr, "allowed range: [%" PRId64 ";%" PRId64 "]\n", min_value,
- max_value);
-
- fprintf(stderr, "coeffs: ");
-
- fprintf(stderr, "[");
- for (int j = 0; j < size; j++) {
- if (j > 0) fprintf(stderr, ", ");
- fprintf(stderr, "%d", input[j]);
- }
- fprintf(stderr, "]\n");
-
- fprintf(stderr, " buf: ");
-
- fprintf(stderr, "[");
- for (int j = 0; j < size; j++) {
- if (j > 0) fprintf(stderr, ", ");
- fprintf(stderr, "%d", buf[j]);
- }
- fprintf(stderr, "]\n\n");
- }
-
- assert(in_range);
-#else
- (void)stage;
- (void)input;
- (void)buf;
- (void)size;
- (void)bit;
-#endif
-}
diff --git a/third_party/aom/av1/common/av1_txfm.h b/third_party/aom/av1/common/av1_txfm.h
deleted file mode 100644
index 59d64ca4a..000000000
--- a/third_party/aom/av1/common/av1_txfm.h
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_COMMON_AV1_TXFM_H_
-#define AOM_AV1_COMMON_AV1_TXFM_H_
-
-#include <assert.h>
-#include <math.h>
-#include <stdio.h>
-
-#include "config/aom_config.h"
-
-#include "av1/common/enums.h"
-#include "av1/common/blockd.h"
-#include "aom/aom_integer.h"
-#include "aom_dsp/aom_dsp_common.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if !defined(DO_RANGE_CHECK_CLAMP)
-#define DO_RANGE_CHECK_CLAMP 0
-#endif
-
-extern const int32_t av1_cospi_arr_data[7][64];
-extern const int32_t av1_sinpi_arr_data[7][5];
-
-#define MAX_TXFM_STAGE_NUM 12
-
-static const int cos_bit_min = 10;
-static const int cos_bit_max = 16;
-
-#define NewSqrt2Bits ((int32_t)12)
-// 2^12 * sqrt(2)
-static const int32_t NewSqrt2 = 5793;
-// 2^12 / sqrt(2)
-static const int32_t NewInvSqrt2 = 2896;
-
-static INLINE const int32_t *cospi_arr(int n) {
- return av1_cospi_arr_data[n - cos_bit_min];
-}
-
-static INLINE const int32_t *sinpi_arr(int n) {
- return av1_sinpi_arr_data[n - cos_bit_min];
-}
-
-static INLINE int32_t range_check_value(int32_t value, int8_t bit) {
-#if CONFIG_COEFFICIENT_RANGE_CHECKING
- const int64_t max_value = (1LL << (bit - 1)) - 1;
- const int64_t min_value = -(1LL << (bit - 1));
- if (value < min_value || value > max_value) {
- fprintf(stderr, "coeff out of bit range, value: %d bit %d\n", value, bit);
- assert(0);
- }
-#endif // CONFIG_COEFFICIENT_RANGE_CHECKING
-#if DO_RANGE_CHECK_CLAMP
- bit = AOMMIN(bit, 31);
- return clamp(value, -(1 << (bit - 1)), (1 << (bit - 1)) - 1);
-#endif // DO_RANGE_CHECK_CLAMP
- (void)bit;
- return value;
-}
-
-static INLINE int32_t round_shift(int64_t value, int bit) {
- assert(bit >= 1);
- return (int32_t)((value + (1ll << (bit - 1))) >> bit);
-}
-
-static INLINE int32_t half_btf(int32_t w0, int32_t in0, int32_t w1, int32_t in1,
- int bit) {
- int64_t result_64 = (int64_t)(w0 * in0) + (int64_t)(w1 * in1);
- int64_t intermediate = result_64 + (1LL << (bit - 1));
- // NOTE(david.barker): The value 'result_64' may not necessarily fit
- // into 32 bits. However, the result of this function is nominally
- // ROUND_POWER_OF_TWO_64(result_64, bit)
- // and that is required to fit into stage_range[stage] many bits
- // (checked by range_check_buf()).
- //
- // Here we've unpacked that rounding operation, and it can be shown
- // that the value of 'intermediate' here *does* fit into 32 bits
- // for any conformant bitstream.
- // The upshot is that, if you do all this calculation using
- // wrapping 32-bit arithmetic instead of (non-wrapping) 64-bit arithmetic,
- // then you'll still get the correct result.
- // To provide a check on this logic, we assert that 'intermediate'
- // would fit into an int32 if range checking is enabled.
-#if CONFIG_COEFFICIENT_RANGE_CHECKING
- assert(intermediate >= INT32_MIN && intermediate <= INT32_MAX);
-#endif
- return (int32_t)(intermediate >> bit);
-}
-
-static INLINE uint16_t highbd_clip_pixel_add(uint16_t dest, tran_high_t trans,
- int bd) {
- return clip_pixel_highbd(dest + (int)trans, bd);
-}
-
-typedef void (*TxfmFunc)(const int32_t *input, int32_t *output, int8_t cos_bit,
- const int8_t *stage_range);
-
-typedef void (*FwdTxfm2dFunc)(const int16_t *input, int32_t *output, int stride,
- TX_TYPE tx_type, int bd);
-
-typedef enum TXFM_TYPE {
- TXFM_TYPE_DCT4,
- TXFM_TYPE_DCT8,
- TXFM_TYPE_DCT16,
- TXFM_TYPE_DCT32,
- TXFM_TYPE_DCT64,
- TXFM_TYPE_ADST4,
- TXFM_TYPE_ADST8,
- TXFM_TYPE_ADST16,
- TXFM_TYPE_IDENTITY4,
- TXFM_TYPE_IDENTITY8,
- TXFM_TYPE_IDENTITY16,
- TXFM_TYPE_IDENTITY32,
- TXFM_TYPES,
- TXFM_TYPE_INVALID,
-} TXFM_TYPE;
-
-typedef struct TXFM_2D_FLIP_CFG {
- TX_SIZE tx_size;
- int ud_flip; // flip upside down
- int lr_flip; // flip left to right
- const int8_t *shift;
- int8_t cos_bit_col;
- int8_t cos_bit_row;
- int8_t stage_range_col[MAX_TXFM_STAGE_NUM];
- int8_t stage_range_row[MAX_TXFM_STAGE_NUM];
- TXFM_TYPE txfm_type_col;
- TXFM_TYPE txfm_type_row;
- int stage_num_col;
- int stage_num_row;
-} TXFM_2D_FLIP_CFG;
-
-static INLINE void get_flip_cfg(TX_TYPE tx_type, int *ud_flip, int *lr_flip) {
- switch (tx_type) {
- case DCT_DCT:
- case ADST_DCT:
- case DCT_ADST:
- case ADST_ADST:
- *ud_flip = 0;
- *lr_flip = 0;
- break;
- case IDTX:
- case V_DCT:
- case H_DCT:
- case V_ADST:
- case H_ADST:
- *ud_flip = 0;
- *lr_flip = 0;
- break;
- case FLIPADST_DCT:
- case FLIPADST_ADST:
- case V_FLIPADST:
- *ud_flip = 1;
- *lr_flip = 0;
- break;
- case DCT_FLIPADST:
- case ADST_FLIPADST:
- case H_FLIPADST:
- *ud_flip = 0;
- *lr_flip = 1;
- break;
- case FLIPADST_FLIPADST:
- *ud_flip = 1;
- *lr_flip = 1;
- break;
- default:
- *ud_flip = 0;
- *lr_flip = 0;
- assert(0);
- }
-}
-
-static INLINE void set_flip_cfg(TX_TYPE tx_type, TXFM_2D_FLIP_CFG *cfg) {
- get_flip_cfg(tx_type, &cfg->ud_flip, &cfg->lr_flip);
-}
-
-// Utility function that returns the log of the ratio of the col and row
-// sizes.
-static INLINE int get_rect_tx_log_ratio(int col, int row) {
- if (col == row) return 0;
- if (col > row) {
- if (col == row * 2) return 1;
- if (col == row * 4) return 2;
- assert(0 && "Unsupported transform size");
- } else {
- if (row == col * 2) return -1;
- if (row == col * 4) return -2;
- assert(0 && "Unsupported transform size");
- }
- return 0; // Invalid
-}
-
-void av1_gen_fwd_stage_range(int8_t *stage_range_col, int8_t *stage_range_row,
- const TXFM_2D_FLIP_CFG *cfg, int bd);
-
-void av1_gen_inv_stage_range(int8_t *stage_range_col, int8_t *stage_range_row,
- const TXFM_2D_FLIP_CFG *cfg, TX_SIZE tx_size,
- int bd);
-
-void av1_get_fwd_txfm_cfg(TX_TYPE tx_type, TX_SIZE tx_size,
- TXFM_2D_FLIP_CFG *cfg);
-void av1_get_inv_txfm_cfg(TX_TYPE tx_type, TX_SIZE tx_size,
- TXFM_2D_FLIP_CFG *cfg);
-extern const TXFM_TYPE av1_txfm_type_ls[5][TX_TYPES_1D];
-extern const int8_t av1_txfm_stage_num_list[TXFM_TYPES];
-static INLINE int get_txw_idx(TX_SIZE tx_size) {
- return tx_size_wide_log2[tx_size] - tx_size_wide_log2[0];
-}
-static INLINE int get_txh_idx(TX_SIZE tx_size) {
- return tx_size_high_log2[tx_size] - tx_size_high_log2[0];
-}
-
-void av1_range_check_buf(int32_t stage, const int32_t *input,
- const int32_t *buf, int32_t size, int8_t bit);
-#define MAX_TXWH_IDX 5
-#ifdef __cplusplus
-}
-#endif // __cplusplus
-
-#endif // AOM_AV1_COMMON_AV1_TXFM_H_
diff --git a/third_party/aom/av1/common/blockd.c b/third_party/aom/av1/common/blockd.c
deleted file mode 100644
index 2e796b656..000000000
--- a/third_party/aom/av1/common/blockd.c
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <math.h>
-
-#include "aom_ports/system_state.h"
-
-#include "av1/common/blockd.h"
-#include "av1/common/onyxc_int.h"
-
-PREDICTION_MODE av1_left_block_mode(const MB_MODE_INFO *left_mi) {
- if (!left_mi) return DC_PRED;
- assert(!is_inter_block(left_mi) || is_intrabc_block(left_mi));
- return left_mi->mode;
-}
-
-PREDICTION_MODE av1_above_block_mode(const MB_MODE_INFO *above_mi) {
- if (!above_mi) return DC_PRED;
- assert(!is_inter_block(above_mi) || is_intrabc_block(above_mi));
- return above_mi->mode;
-}
-
-void av1_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd,
- int plane, BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
- int has_eob, int aoff, int loff) {
- ENTROPY_CONTEXT *const a = pd->above_context + aoff;
- ENTROPY_CONTEXT *const l = pd->left_context + loff;
- const int txs_wide = tx_size_wide_unit[tx_size];
- const int txs_high = tx_size_high_unit[tx_size];
-
- // above
- if (has_eob && xd->mb_to_right_edge < 0) {
- const int blocks_wide = max_block_wide(xd, plane_bsize, plane);
- const int above_contexts = AOMMIN(txs_wide, blocks_wide - aoff);
- memset(a, has_eob, sizeof(*a) * above_contexts);
- memset(a + above_contexts, 0, sizeof(*a) * (txs_wide - above_contexts));
- } else {
- memset(a, has_eob, sizeof(*a) * txs_wide);
- }
-
- // left
- if (has_eob && xd->mb_to_bottom_edge < 0) {
- const int blocks_high = max_block_high(xd, plane_bsize, plane);
- const int left_contexts = AOMMIN(txs_high, blocks_high - loff);
- memset(l, has_eob, sizeof(*l) * left_contexts);
- memset(l + left_contexts, 0, sizeof(*l) * (txs_high - left_contexts));
- } else {
- memset(l, has_eob, sizeof(*l) * txs_high);
- }
-}
-void av1_reset_skip_context(MACROBLOCKD *xd, int mi_row, int mi_col,
- BLOCK_SIZE bsize, const int num_planes) {
- int i;
- int nplanes;
- int chroma_ref;
- chroma_ref =
- is_chroma_reference(mi_row, mi_col, bsize, xd->plane[1].subsampling_x,
- xd->plane[1].subsampling_y);
- nplanes = 1 + (num_planes - 1) * chroma_ref;
- for (i = 0; i < nplanes; i++) {
- struct macroblockd_plane *const pd = &xd->plane[i];
- const BLOCK_SIZE plane_bsize =
- get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
- const int txs_wide = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
- const int txs_high = block_size_high[plane_bsize] >> tx_size_high_log2[0];
- memset(pd->above_context, 0, sizeof(ENTROPY_CONTEXT) * txs_wide);
- memset(pd->left_context, 0, sizeof(ENTROPY_CONTEXT) * txs_high);
- }
-}
-
-void av1_reset_loop_filter_delta(MACROBLOCKD *xd, int num_planes) {
- xd->delta_lf_from_base = 0;
- const int frame_lf_count =
- num_planes > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
- for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) xd->delta_lf[lf_id] = 0;
-}
-
-void av1_reset_loop_restoration(MACROBLOCKD *xd, const int num_planes) {
- for (int p = 0; p < num_planes; ++p) {
- set_default_wiener(xd->wiener_info + p);
- set_default_sgrproj(xd->sgrproj_info + p);
- }
-}
-
-void av1_setup_block_planes(MACROBLOCKD *xd, int ss_x, int ss_y,
- const int num_planes) {
- int i;
-
- for (i = 0; i < num_planes; i++) {
- xd->plane[i].plane_type = get_plane_type(i);
- xd->plane[i].subsampling_x = i ? ss_x : 0;
- xd->plane[i].subsampling_y = i ? ss_y : 0;
- }
- for (i = num_planes; i < MAX_MB_PLANE; i++) {
- xd->plane[i].subsampling_x = 1;
- xd->plane[i].subsampling_y = 1;
- }
-}
-
-const int16_t dr_intra_derivative[90] = {
- // More evenly spread out angles and limited to 10-bit
- // Values that are 0 will never be used
- // Approx angle
- 0, 0, 0, //
- 1023, 0, 0, // 3, ...
- 547, 0, 0, // 6, ...
- 372, 0, 0, 0, 0, // 9, ...
- 273, 0, 0, // 14, ...
- 215, 0, 0, // 17, ...
- 178, 0, 0, // 20, ...
- 151, 0, 0, // 23, ... (113 & 203 are base angles)
- 132, 0, 0, // 26, ...
- 116, 0, 0, // 29, ...
- 102, 0, 0, 0, // 32, ...
- 90, 0, 0, // 36, ...
- 80, 0, 0, // 39, ...
- 71, 0, 0, // 42, ...
- 64, 0, 0, // 45, ... (45 & 135 are base angles)
- 57, 0, 0, // 48, ...
- 51, 0, 0, // 51, ...
- 45, 0, 0, 0, // 54, ...
- 40, 0, 0, // 58, ...
- 35, 0, 0, // 61, ...
- 31, 0, 0, // 64, ...
- 27, 0, 0, // 67, ... (67 & 157 are base angles)
- 23, 0, 0, // 70, ...
- 19, 0, 0, // 73, ...
- 15, 0, 0, 0, 0, // 76, ...
- 11, 0, 0, // 81, ...
- 7, 0, 0, // 84, ...
- 3, 0, 0, // 87, ...
-};
diff --git a/third_party/aom/av1/common/blockd.h b/third_party/aom/av1/common/blockd.h
deleted file mode 100644
index a2311c1b0..000000000
--- a/third_party/aom/av1/common/blockd.h
+++ /dev/null
@@ -1,1176 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_COMMON_BLOCKD_H_
-#define AOM_AV1_COMMON_BLOCKD_H_
-
-#include "config/aom_config.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_ports/mem.h"
-#include "aom_scale/yv12config.h"
-
-#include "av1/common/common_data.h"
-#include "av1/common/quant_common.h"
-#include "av1/common/entropy.h"
-#include "av1/common/entropymode.h"
-#include "av1/common/mv.h"
-#include "av1/common/scale.h"
-#include "av1/common/seg_common.h"
-#include "av1/common/tile_common.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define USE_B_QUANT_NO_TRELLIS 1
-
-#define MAX_MB_PLANE 3
-
-#define MAX_DIFFWTD_MASK_BITS 1
-
-// DIFFWTD_MASK_TYPES should not surpass 1 << MAX_DIFFWTD_MASK_BITS
-typedef enum ATTRIBUTE_PACKED {
- DIFFWTD_38 = 0,
- DIFFWTD_38_INV,
- DIFFWTD_MASK_TYPES,
-} DIFFWTD_MASK_TYPE;
-
-typedef enum ATTRIBUTE_PACKED {
- KEY_FRAME = 0,
- INTER_FRAME = 1,
- INTRA_ONLY_FRAME = 2, // replaces intra-only
- S_FRAME = 3,
- FRAME_TYPES,
-} FRAME_TYPE;
-
-static INLINE int is_comp_ref_allowed(BLOCK_SIZE bsize) {
- return AOMMIN(block_size_wide[bsize], block_size_high[bsize]) >= 8;
-}
-
-static INLINE int is_inter_mode(PREDICTION_MODE mode) {
- return mode >= INTER_MODE_START && mode < INTER_MODE_END;
-}
-
-typedef struct {
- uint8_t *plane[MAX_MB_PLANE];
- int stride[MAX_MB_PLANE];
-} BUFFER_SET;
-
-static INLINE int is_inter_singleref_mode(PREDICTION_MODE mode) {
- return mode >= SINGLE_INTER_MODE_START && mode < SINGLE_INTER_MODE_END;
-}
-static INLINE int is_inter_compound_mode(PREDICTION_MODE mode) {
- return mode >= COMP_INTER_MODE_START && mode < COMP_INTER_MODE_END;
-}
-
-static INLINE PREDICTION_MODE compound_ref0_mode(PREDICTION_MODE mode) {
- static PREDICTION_MODE lut[] = {
- MB_MODE_COUNT, // DC_PRED
- MB_MODE_COUNT, // V_PRED
- MB_MODE_COUNT, // H_PRED
- MB_MODE_COUNT, // D45_PRED
- MB_MODE_COUNT, // D135_PRED
- MB_MODE_COUNT, // D113_PRED
- MB_MODE_COUNT, // D157_PRED
- MB_MODE_COUNT, // D203_PRED
- MB_MODE_COUNT, // D67_PRED
- MB_MODE_COUNT, // SMOOTH_PRED
- MB_MODE_COUNT, // SMOOTH_V_PRED
- MB_MODE_COUNT, // SMOOTH_H_PRED
- MB_MODE_COUNT, // PAETH_PRED
- MB_MODE_COUNT, // NEARESTMV
- MB_MODE_COUNT, // NEARMV
- MB_MODE_COUNT, // GLOBALMV
- MB_MODE_COUNT, // NEWMV
- NEARESTMV, // NEAREST_NEARESTMV
- NEARMV, // NEAR_NEARMV
- NEARESTMV, // NEAREST_NEWMV
- NEWMV, // NEW_NEARESTMV
- NEARMV, // NEAR_NEWMV
- NEWMV, // NEW_NEARMV
- GLOBALMV, // GLOBAL_GLOBALMV
- NEWMV, // NEW_NEWMV
- };
- assert(NELEMENTS(lut) == MB_MODE_COUNT);
- assert(is_inter_compound_mode(mode));
- return lut[mode];
-}
-
-static INLINE PREDICTION_MODE compound_ref1_mode(PREDICTION_MODE mode) {
- static PREDICTION_MODE lut[] = {
- MB_MODE_COUNT, // DC_PRED
- MB_MODE_COUNT, // V_PRED
- MB_MODE_COUNT, // H_PRED
- MB_MODE_COUNT, // D45_PRED
- MB_MODE_COUNT, // D135_PRED
- MB_MODE_COUNT, // D113_PRED
- MB_MODE_COUNT, // D157_PRED
- MB_MODE_COUNT, // D203_PRED
- MB_MODE_COUNT, // D67_PRED
- MB_MODE_COUNT, // SMOOTH_PRED
- MB_MODE_COUNT, // SMOOTH_V_PRED
- MB_MODE_COUNT, // SMOOTH_H_PRED
- MB_MODE_COUNT, // PAETH_PRED
- MB_MODE_COUNT, // NEARESTMV
- MB_MODE_COUNT, // NEARMV
- MB_MODE_COUNT, // GLOBALMV
- MB_MODE_COUNT, // NEWMV
- NEARESTMV, // NEAREST_NEARESTMV
- NEARMV, // NEAR_NEARMV
- NEWMV, // NEAREST_NEWMV
- NEARESTMV, // NEW_NEARESTMV
- NEWMV, // NEAR_NEWMV
- NEARMV, // NEW_NEARMV
- GLOBALMV, // GLOBAL_GLOBALMV
- NEWMV, // NEW_NEWMV
- };
- assert(NELEMENTS(lut) == MB_MODE_COUNT);
- assert(is_inter_compound_mode(mode));
- return lut[mode];
-}
-
-static INLINE int have_nearmv_in_inter_mode(PREDICTION_MODE mode) {
- return (mode == NEARMV || mode == NEAR_NEARMV || mode == NEAR_NEWMV ||
- mode == NEW_NEARMV);
-}
-
-static INLINE int have_newmv_in_inter_mode(PREDICTION_MODE mode) {
- return (mode == NEWMV || mode == NEW_NEWMV || mode == NEAREST_NEWMV ||
- mode == NEW_NEARESTMV || mode == NEAR_NEWMV || mode == NEW_NEARMV);
-}
-
-static INLINE int is_masked_compound_type(COMPOUND_TYPE type) {
- return (type == COMPOUND_WEDGE || type == COMPOUND_DIFFWTD);
-}
-
-/* For keyframes, intra block modes are predicted by the (already decoded)
- modes for the Y blocks to the left and above us; for interframes, there
- is a single probability table. */
-
-typedef int8_t MV_REFERENCE_FRAME;
-
-typedef struct {
- // Number of base colors for Y (0) and UV (1)
- uint8_t palette_size[2];
- // Value of base colors for Y, U, and V
- uint16_t palette_colors[3 * PALETTE_MAX_SIZE];
-} PALETTE_MODE_INFO;
-
-typedef struct {
- uint8_t use_filter_intra;
- FILTER_INTRA_MODE filter_intra_mode;
-} FILTER_INTRA_MODE_INFO;
-
-static const PREDICTION_MODE fimode_to_intradir[FILTER_INTRA_MODES] = {
- DC_PRED, V_PRED, H_PRED, D157_PRED, DC_PRED
-};
-
-#if CONFIG_RD_DEBUG
-#define TXB_COEFF_COST_MAP_SIZE (MAX_MIB_SIZE)
-#endif
-
-typedef struct RD_STATS {
- int rate;
- int64_t dist;
- // Please be careful of using rdcost, it's not guaranteed to be set all the
- // time.
- // TODO(angiebird): Create a set of functions to manipulate the RD_STATS. In
- // these functions, make sure rdcost is always up-to-date according to
- // rate/dist.
- int64_t rdcost;
- int64_t sse;
- int skip; // sse should equal to dist when skip == 1
- int64_t ref_rdcost;
- int zero_rate;
- uint8_t invalid_rate;
-#if CONFIG_RD_DEBUG
- int txb_coeff_cost[MAX_MB_PLANE];
- int txb_coeff_cost_map[MAX_MB_PLANE][TXB_COEFF_COST_MAP_SIZE]
- [TXB_COEFF_COST_MAP_SIZE];
-#endif // CONFIG_RD_DEBUG
-} RD_STATS;
-
-// This struct is used to group function args that are commonly
-// sent together in functions related to interinter compound modes
-typedef struct {
- int wedge_index;
- int wedge_sign;
- DIFFWTD_MASK_TYPE mask_type;
- uint8_t *seg_mask;
- COMPOUND_TYPE type;
-} INTERINTER_COMPOUND_DATA;
-
-#define INTER_TX_SIZE_BUF_LEN 16
-#define TXK_TYPE_BUF_LEN 64
-// This structure now relates to 4x4 block regions.
-typedef struct MB_MODE_INFO {
- // Common for both INTER and INTRA blocks
- BLOCK_SIZE sb_type;
- PREDICTION_MODE mode;
- TX_SIZE tx_size;
- uint8_t inter_tx_size[INTER_TX_SIZE_BUF_LEN];
- int8_t skip;
- int8_t skip_mode;
- int8_t segment_id;
- int8_t seg_id_predicted; // valid only when temporal_update is enabled
-
- // Only for INTRA blocks
- UV_PREDICTION_MODE uv_mode;
-
- PALETTE_MODE_INFO palette_mode_info;
- uint8_t use_intrabc;
-
- // Only for INTER blocks
- InterpFilters interp_filters;
- MV_REFERENCE_FRAME ref_frame[2];
-
- TX_TYPE txk_type[TXK_TYPE_BUF_LEN];
-
- FILTER_INTRA_MODE_INFO filter_intra_mode_info;
-
- // The actual prediction angle is the base angle + (angle_delta * step).
- int8_t angle_delta[PLANE_TYPES];
-
- // interintra members
- INTERINTRA_MODE interintra_mode;
- // TODO(debargha): Consolidate these flags
- int use_wedge_interintra;
- int interintra_wedge_index;
- int interintra_wedge_sign;
- // interinter members
- INTERINTER_COMPOUND_DATA interinter_comp;
- MOTION_MODE motion_mode;
- int overlappable_neighbors[2];
- int_mv mv[2];
- uint8_t ref_mv_idx;
- PARTITION_TYPE partition;
- /* deringing gain *per-superblock* */
- int8_t cdef_strength;
- int current_qindex;
- int delta_lf_from_base;
- int delta_lf[FRAME_LF_COUNT];
-#if CONFIG_RD_DEBUG
- RD_STATS rd_stats;
- int mi_row;
- int mi_col;
-#endif
- int num_proj_ref;
- WarpedMotionParams wm_params;
-
- // Index of the alpha Cb and alpha Cr combination
- int cfl_alpha_idx;
- // Joint sign of alpha Cb and alpha Cr
- int cfl_alpha_signs;
-
- int compound_idx;
- int comp_group_idx;
-} MB_MODE_INFO;
-
-static INLINE int is_intrabc_block(const MB_MODE_INFO *mbmi) {
- return mbmi->use_intrabc;
-}
-
-static INLINE PREDICTION_MODE get_uv_mode(UV_PREDICTION_MODE mode) {
- assert(mode < UV_INTRA_MODES);
- static const PREDICTION_MODE uv2y[] = {
- DC_PRED, // UV_DC_PRED
- V_PRED, // UV_V_PRED
- H_PRED, // UV_H_PRED
- D45_PRED, // UV_D45_PRED
- D135_PRED, // UV_D135_PRED
- D113_PRED, // UV_D113_PRED
- D157_PRED, // UV_D157_PRED
- D203_PRED, // UV_D203_PRED
- D67_PRED, // UV_D67_PRED
- SMOOTH_PRED, // UV_SMOOTH_PRED
- SMOOTH_V_PRED, // UV_SMOOTH_V_PRED
- SMOOTH_H_PRED, // UV_SMOOTH_H_PRED
- PAETH_PRED, // UV_PAETH_PRED
- DC_PRED, // UV_CFL_PRED
- INTRA_INVALID, // UV_INTRA_MODES
- INTRA_INVALID, // UV_MODE_INVALID
- };
- return uv2y[mode];
-}
-
-static INLINE int is_inter_block(const MB_MODE_INFO *mbmi) {
- return is_intrabc_block(mbmi) || mbmi->ref_frame[0] > INTRA_FRAME;
-}
-
-static INLINE int has_second_ref(const MB_MODE_INFO *mbmi) {
- return mbmi->ref_frame[1] > INTRA_FRAME;
-}
-
-static INLINE int has_uni_comp_refs(const MB_MODE_INFO *mbmi) {
- return has_second_ref(mbmi) && (!((mbmi->ref_frame[0] >= BWDREF_FRAME) ^
- (mbmi->ref_frame[1] >= BWDREF_FRAME)));
-}
-
-static INLINE MV_REFERENCE_FRAME comp_ref0(int ref_idx) {
- static const MV_REFERENCE_FRAME lut[] = {
- LAST_FRAME, // LAST_LAST2_FRAMES,
- LAST_FRAME, // LAST_LAST3_FRAMES,
- LAST_FRAME, // LAST_GOLDEN_FRAMES,
- BWDREF_FRAME, // BWDREF_ALTREF_FRAMES,
- LAST2_FRAME, // LAST2_LAST3_FRAMES
- LAST2_FRAME, // LAST2_GOLDEN_FRAMES,
- LAST3_FRAME, // LAST3_GOLDEN_FRAMES,
- BWDREF_FRAME, // BWDREF_ALTREF2_FRAMES,
- ALTREF2_FRAME, // ALTREF2_ALTREF_FRAMES,
- };
- assert(NELEMENTS(lut) == TOTAL_UNIDIR_COMP_REFS);
- return lut[ref_idx];
-}
-
-static INLINE MV_REFERENCE_FRAME comp_ref1(int ref_idx) {
- static const MV_REFERENCE_FRAME lut[] = {
- LAST2_FRAME, // LAST_LAST2_FRAMES,
- LAST3_FRAME, // LAST_LAST3_FRAMES,
- GOLDEN_FRAME, // LAST_GOLDEN_FRAMES,
- ALTREF_FRAME, // BWDREF_ALTREF_FRAMES,
- LAST3_FRAME, // LAST2_LAST3_FRAMES
- GOLDEN_FRAME, // LAST2_GOLDEN_FRAMES,
- GOLDEN_FRAME, // LAST3_GOLDEN_FRAMES,
- ALTREF2_FRAME, // BWDREF_ALTREF2_FRAMES,
- ALTREF_FRAME, // ALTREF2_ALTREF_FRAMES,
- };
- assert(NELEMENTS(lut) == TOTAL_UNIDIR_COMP_REFS);
- return lut[ref_idx];
-}
-
-PREDICTION_MODE av1_left_block_mode(const MB_MODE_INFO *left_mi);
-
-PREDICTION_MODE av1_above_block_mode(const MB_MODE_INFO *above_mi);
-
-static INLINE int is_global_mv_block(const MB_MODE_INFO *const mbmi,
- TransformationType type) {
- const PREDICTION_MODE mode = mbmi->mode;
- const BLOCK_SIZE bsize = mbmi->sb_type;
- const int block_size_allowed =
- AOMMIN(block_size_wide[bsize], block_size_high[bsize]) >= 8;
- return (mode == GLOBALMV || mode == GLOBAL_GLOBALMV) && type > TRANSLATION &&
- block_size_allowed;
-}
-
-#if CONFIG_MISMATCH_DEBUG
-static INLINE void mi_to_pixel_loc(int *pixel_c, int *pixel_r, int mi_col,
- int mi_row, int tx_blk_col, int tx_blk_row,
- int subsampling_x, int subsampling_y) {
- *pixel_c = ((mi_col >> subsampling_x) << MI_SIZE_LOG2) +
- (tx_blk_col << tx_size_wide_log2[0]);
- *pixel_r = ((mi_row >> subsampling_y) << MI_SIZE_LOG2) +
- (tx_blk_row << tx_size_high_log2[0]);
-}
-#endif
-
-enum ATTRIBUTE_PACKED mv_precision { MV_PRECISION_Q3, MV_PRECISION_Q4 };
-
-struct buf_2d {
- uint8_t *buf;
- uint8_t *buf0;
- int width;
- int height;
- int stride;
-};
-
-typedef struct eob_info {
- uint16_t eob;
- uint16_t max_scan_line;
-} eob_info;
-
-typedef struct {
- DECLARE_ALIGNED(32, tran_low_t, dqcoeff[MAX_MB_PLANE][MAX_SB_SQUARE]);
- eob_info eob_data[MAX_MB_PLANE]
- [MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
- DECLARE_ALIGNED(16, uint8_t, color_index_map[2][MAX_SB_SQUARE]);
-} CB_BUFFER;
-
-typedef struct macroblockd_plane {
- tran_low_t *dqcoeff;
- tran_low_t *dqcoeff_block;
- eob_info *eob_data;
- PLANE_TYPE plane_type;
- int subsampling_x;
- int subsampling_y;
- struct buf_2d dst;
- struct buf_2d pre[2];
- ENTROPY_CONTEXT *above_context;
- ENTROPY_CONTEXT *left_context;
-
- // The dequantizers below are true dequntizers used only in the
- // dequantization process. They have the same coefficient
- // shift/scale as TX.
- int16_t seg_dequant_QTX[MAX_SEGMENTS][2];
- uint8_t *color_index_map;
-
- // block size in pixels
- uint8_t width, height;
-
- qm_val_t *seg_iqmatrix[MAX_SEGMENTS][TX_SIZES_ALL];
- qm_val_t *seg_qmatrix[MAX_SEGMENTS][TX_SIZES_ALL];
-
- // the 'dequantizers' below are not literal dequantizer values.
- // They're used by encoder RDO to generate ad-hoc lambda values.
- // They use a hardwired Q3 coeff shift and do not necessarily match
- // the TX scale in use.
- const int16_t *dequant_Q3;
-} MACROBLOCKD_PLANE;
-
-#define BLOCK_OFFSET(x, i) \
- ((x) + (i) * (1 << (tx_size_wide_log2[0] + tx_size_high_log2[0])))
-
-typedef struct RefBuffer {
- int idx; // frame buf idx
- int map_idx; // frame map idx
- YV12_BUFFER_CONFIG *buf;
- struct scale_factors sf;
-} RefBuffer;
-
-typedef struct {
- DECLARE_ALIGNED(16, InterpKernel, vfilter);
- DECLARE_ALIGNED(16, InterpKernel, hfilter);
-} WienerInfo;
-
-typedef struct {
- int ep;
- int xqd[2];
-} SgrprojInfo;
-
-#if CONFIG_DEBUG
-#define CFL_SUB8X8_VAL_MI_SIZE (4)
-#define CFL_SUB8X8_VAL_MI_SQUARE \
- (CFL_SUB8X8_VAL_MI_SIZE * CFL_SUB8X8_VAL_MI_SIZE)
-#endif // CONFIG_DEBUG
-#define CFL_MAX_BLOCK_SIZE (BLOCK_32X32)
-#define CFL_BUF_LINE (32)
-#define CFL_BUF_LINE_I128 (CFL_BUF_LINE >> 3)
-#define CFL_BUF_LINE_I256 (CFL_BUF_LINE >> 4)
-#define CFL_BUF_SQUARE (CFL_BUF_LINE * CFL_BUF_LINE)
-typedef struct cfl_ctx {
- // Q3 reconstructed luma pixels (only Q2 is required, but Q3 is used to avoid
- // shifts)
- uint16_t recon_buf_q3[CFL_BUF_SQUARE];
- // Q3 AC contributions (reconstructed luma pixels - tx block avg)
- int16_t ac_buf_q3[CFL_BUF_SQUARE];
-
- // Cache the DC_PRED when performing RDO, so it does not have to be recomputed
- // for every scaling parameter
- int dc_pred_is_cached[CFL_PRED_PLANES];
- // The DC_PRED cache is disable when decoding
- int use_dc_pred_cache;
- // Only cache the first row of the DC_PRED
- int16_t dc_pred_cache[CFL_PRED_PLANES][CFL_BUF_LINE];
-
- // Height and width currently used in the CfL prediction buffer.
- int buf_height, buf_width;
-
- int are_parameters_computed;
-
- // Chroma subsampling
- int subsampling_x, subsampling_y;
-
- int mi_row, mi_col;
-
- // Whether the reconstructed luma pixels need to be stored
- int store_y;
-
-#if CONFIG_DEBUG
- int rate;
-#endif // CONFIG_DEBUG
-
- int is_chroma_reference;
-} CFL_CTX;
-
-typedef struct jnt_comp_params {
- int use_jnt_comp_avg;
- int fwd_offset;
- int bck_offset;
-} JNT_COMP_PARAMS;
-
-// Most/all of the pointers are mere pointers to actual arrays are allocated
-// elsewhere. This is mostly for coding convenience.
-typedef struct macroblockd {
- struct macroblockd_plane plane[MAX_MB_PLANE];
-
- TileInfo tile;
-
- int mi_stride;
-
- MB_MODE_INFO **mi;
- MB_MODE_INFO *left_mbmi;
- MB_MODE_INFO *above_mbmi;
- MB_MODE_INFO *chroma_left_mbmi;
- MB_MODE_INFO *chroma_above_mbmi;
-
- int up_available;
- int left_available;
- int chroma_up_available;
- int chroma_left_available;
-
- /* Distance of MB away from frame edges in subpixels (1/8th pixel) */
- int mb_to_left_edge;
- int mb_to_right_edge;
- int mb_to_top_edge;
- int mb_to_bottom_edge;
-
- /* pointers to reference frames */
- const RefBuffer *block_refs[2];
-
- /* pointer to current frame */
- const YV12_BUFFER_CONFIG *cur_buf;
-
- ENTROPY_CONTEXT *above_context[MAX_MB_PLANE];
- ENTROPY_CONTEXT left_context[MAX_MB_PLANE][MAX_MIB_SIZE];
-
- PARTITION_CONTEXT *above_seg_context;
- PARTITION_CONTEXT left_seg_context[MAX_MIB_SIZE];
-
- TXFM_CONTEXT *above_txfm_context;
- TXFM_CONTEXT *left_txfm_context;
- TXFM_CONTEXT left_txfm_context_buffer[MAX_MIB_SIZE];
-
- WienerInfo wiener_info[MAX_MB_PLANE];
- SgrprojInfo sgrproj_info[MAX_MB_PLANE];
-
- // block dimension in the unit of mode_info.
- uint8_t n4_w, n4_h;
-
- uint8_t ref_mv_count[MODE_CTX_REF_FRAMES];
- CANDIDATE_MV ref_mv_stack[MODE_CTX_REF_FRAMES][MAX_REF_MV_STACK_SIZE];
- uint8_t is_sec_rect;
-
- // Counts of each reference frame in the above and left neighboring blocks.
- // NOTE: Take into account both single and comp references.
- uint8_t neighbors_ref_counts[REF_FRAMES];
-
- FRAME_CONTEXT *tile_ctx;
- /* Bit depth: 8, 10, 12 */
- int bd;
-
- int qindex[MAX_SEGMENTS];
- int lossless[MAX_SEGMENTS];
- int corrupted;
- int cur_frame_force_integer_mv;
- // same with that in AV1_COMMON
- struct aom_internal_error_info *error_info;
- const WarpedMotionParams *global_motion;
- int delta_qindex;
- int current_qindex;
- // Since actual frame level loop filtering level value is not available
- // at the beginning of the tile (only available during actual filtering)
- // at encoder side.we record the delta_lf (against the frame level loop
- // filtering level) and code the delta between previous superblock's delta
- // lf and current delta lf. It is equivalent to the delta between previous
- // superblock's actual lf and current lf.
- int delta_lf_from_base;
- // For this experiment, we have four frame filter levels for different plane
- // and direction. So, to support the per superblock update, we need to add
- // a few more params as below.
- // 0: delta loop filter level for y plane vertical
- // 1: delta loop filter level for y plane horizontal
- // 2: delta loop filter level for u plane
- // 3: delta loop filter level for v plane
- // To make it consistent with the reference to each filter level in segment,
- // we need to -1, since
- // SEG_LVL_ALT_LF_Y_V = 1;
- // SEG_LVL_ALT_LF_Y_H = 2;
- // SEG_LVL_ALT_LF_U = 3;
- // SEG_LVL_ALT_LF_V = 4;
- int delta_lf[FRAME_LF_COUNT];
- int cdef_preset[4];
-
- DECLARE_ALIGNED(16, uint8_t, seg_mask[2 * MAX_SB_SQUARE]);
- uint8_t *mc_buf[2];
- CFL_CTX cfl;
-
- JNT_COMP_PARAMS jcp_param;
-
- uint16_t cb_offset[MAX_MB_PLANE];
- uint16_t txb_offset[MAX_MB_PLANE];
- uint16_t color_index_map_offset[2];
-
- CONV_BUF_TYPE *tmp_conv_dst;
- uint8_t *tmp_obmc_bufs[2];
-} MACROBLOCKD;
-
-static INLINE int get_bitdepth_data_path_index(const MACROBLOCKD *xd) {
- return xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH ? 1 : 0;
-}
-
-static INLINE uint8_t *get_buf_by_bd(const MACROBLOCKD *xd, uint8_t *buf16) {
- return (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
- ? CONVERT_TO_BYTEPTR(buf16)
- : buf16;
-}
-
-static INLINE int get_sqr_bsize_idx(BLOCK_SIZE bsize) {
- switch (bsize) {
- case BLOCK_4X4: return 0;
- case BLOCK_8X8: return 1;
- case BLOCK_16X16: return 2;
- case BLOCK_32X32: return 3;
- case BLOCK_64X64: return 4;
- case BLOCK_128X128: return 5;
- default: return SQR_BLOCK_SIZES;
- }
-}
-
-// For a square block size 'bsize', returns the size of the sub-blocks used by
-// the given partition type. If the partition produces sub-blocks of different
-// sizes, then the function returns the largest sub-block size.
-// Implements the Partition_Subsize lookup table in the spec (Section 9.3.
-// Conversion tables).
-// Note: the input block size should be square.
-// Otherwise it's considered invalid.
-static INLINE BLOCK_SIZE get_partition_subsize(BLOCK_SIZE bsize,
- PARTITION_TYPE partition) {
- if (partition == PARTITION_INVALID) {
- return BLOCK_INVALID;
- } else {
- const int sqr_bsize_idx = get_sqr_bsize_idx(bsize);
- return sqr_bsize_idx >= SQR_BLOCK_SIZES
- ? BLOCK_INVALID
- : subsize_lookup[partition][sqr_bsize_idx];
- }
-}
-
-static TX_TYPE intra_mode_to_tx_type(const MB_MODE_INFO *mbmi,
- PLANE_TYPE plane_type) {
- static const TX_TYPE _intra_mode_to_tx_type[INTRA_MODES] = {
- DCT_DCT, // DC
- ADST_DCT, // V
- DCT_ADST, // H
- DCT_DCT, // D45
- ADST_ADST, // D135
- ADST_DCT, // D117
- DCT_ADST, // D153
- DCT_ADST, // D207
- ADST_DCT, // D63
- ADST_ADST, // SMOOTH
- ADST_DCT, // SMOOTH_V
- DCT_ADST, // SMOOTH_H
- ADST_ADST, // PAETH
- };
- const PREDICTION_MODE mode =
- (plane_type == PLANE_TYPE_Y) ? mbmi->mode : get_uv_mode(mbmi->uv_mode);
- assert(mode < INTRA_MODES);
- return _intra_mode_to_tx_type[mode];
-}
-
-static INLINE int is_rect_tx(TX_SIZE tx_size) { return tx_size >= TX_SIZES; }
-
-static INLINE int block_signals_txsize(BLOCK_SIZE bsize) {
- return bsize > BLOCK_4X4;
-}
-
-// Number of transform types in each set type
-static const int av1_num_ext_tx_set[EXT_TX_SET_TYPES] = {
- 1, 2, 5, 7, 12, 16,
-};
-
-static const int av1_ext_tx_used[EXT_TX_SET_TYPES][TX_TYPES] = {
- { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
- { 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 },
- { 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 },
- { 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0 },
- { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0 },
- { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
-};
-
-static const uint16_t av1_ext_tx_used_flag[EXT_TX_SET_TYPES] = {
- 0x0001, // 0000 0000 0000 0001
- 0x0201, // 0000 0010 0000 0001
- 0x020F, // 0000 0010 0000 1111
- 0x0E0F, // 0000 1110 0000 1111
- 0x0FFF, // 0000 1111 1111 1111
- 0xFFFF, // 1111 1111 1111 1111
-};
-
-static INLINE TxSetType av1_get_ext_tx_set_type(TX_SIZE tx_size, int is_inter,
- int use_reduced_set) {
- const TX_SIZE tx_size_sqr_up = txsize_sqr_up_map[tx_size];
- if (tx_size_sqr_up > TX_32X32) return EXT_TX_SET_DCTONLY;
- if (tx_size_sqr_up == TX_32X32)
- return is_inter ? EXT_TX_SET_DCT_IDTX : EXT_TX_SET_DCTONLY;
- if (use_reduced_set)
- return is_inter ? EXT_TX_SET_DCT_IDTX : EXT_TX_SET_DTT4_IDTX;
- const TX_SIZE tx_size_sqr = txsize_sqr_map[tx_size];
- if (is_inter) {
- return (tx_size_sqr == TX_16X16 ? EXT_TX_SET_DTT9_IDTX_1DDCT
- : EXT_TX_SET_ALL16);
- } else {
- return (tx_size_sqr == TX_16X16 ? EXT_TX_SET_DTT4_IDTX
- : EXT_TX_SET_DTT4_IDTX_1DDCT);
- }
-}
-
-// Maps tx set types to the indices.
-static const int ext_tx_set_index[2][EXT_TX_SET_TYPES] = {
- { // Intra
- 0, -1, 2, 1, -1, -1 },
- { // Inter
- 0, 3, -1, -1, 2, 1 },
-};
-
-static INLINE int get_ext_tx_set(TX_SIZE tx_size, int is_inter,
- int use_reduced_set) {
- const TxSetType set_type =
- av1_get_ext_tx_set_type(tx_size, is_inter, use_reduced_set);
- return ext_tx_set_index[is_inter][set_type];
-}
-
-static INLINE int get_ext_tx_types(TX_SIZE tx_size, int is_inter,
- int use_reduced_set) {
- const int set_type =
- av1_get_ext_tx_set_type(tx_size, is_inter, use_reduced_set);
- return av1_num_ext_tx_set[set_type];
-}
-
-#define TXSIZEMAX(t1, t2) (tx_size_2d[(t1)] >= tx_size_2d[(t2)] ? (t1) : (t2))
-#define TXSIZEMIN(t1, t2) (tx_size_2d[(t1)] <= tx_size_2d[(t2)] ? (t1) : (t2))
-
-static INLINE TX_SIZE tx_size_from_tx_mode(BLOCK_SIZE bsize, TX_MODE tx_mode) {
- const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[tx_mode];
- const TX_SIZE max_rect_tx_size = max_txsize_rect_lookup[bsize];
- if (bsize == BLOCK_4X4)
- return AOMMIN(max_txsize_lookup[bsize], largest_tx_size);
- if (txsize_sqr_map[max_rect_tx_size] <= largest_tx_size)
- return max_rect_tx_size;
- else
- return largest_tx_size;
-}
-
-extern const int16_t dr_intra_derivative[90];
-static const uint8_t mode_to_angle_map[] = {
- 0, 90, 180, 45, 135, 113, 157, 203, 67, 0, 0, 0, 0,
-};
-
-// Converts block_index for given transform size to index of the block in raster
-// order.
-static INLINE int av1_block_index_to_raster_order(TX_SIZE tx_size,
- int block_idx) {
- // For transform size 4x8, the possible block_idx values are 0 & 2, because
- // block_idx values are incremented in steps of size 'tx_width_unit x
- // tx_height_unit'. But, for this transform size, block_idx = 2 corresponds to
- // block number 1 in raster order, inside an 8x8 MI block.
- // For any other transform size, the two indices are equivalent.
- return (tx_size == TX_4X8 && block_idx == 2) ? 1 : block_idx;
-}
-
-// Inverse of above function.
-// Note: only implemented for transform sizes 4x4, 4x8 and 8x4 right now.
-static INLINE int av1_raster_order_to_block_index(TX_SIZE tx_size,
- int raster_order) {
- assert(tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4);
- // We ensure that block indices are 0 & 2 if tx size is 4x8 or 8x4.
- return (tx_size == TX_4X4) ? raster_order : (raster_order > 0) ? 2 : 0;
-}
-
-static INLINE TX_TYPE get_default_tx_type(PLANE_TYPE plane_type,
- const MACROBLOCKD *xd,
- TX_SIZE tx_size) {
- const MB_MODE_INFO *const mbmi = xd->mi[0];
-
- if (is_inter_block(mbmi) || plane_type != PLANE_TYPE_Y ||
- xd->lossless[mbmi->segment_id] || tx_size >= TX_32X32)
- return DCT_DCT;
-
- return intra_mode_to_tx_type(mbmi, plane_type);
-}
-
-// Implements the get_plane_residual_size() function in the spec (Section
-// 5.11.38. Get plane residual size function).
-static INLINE BLOCK_SIZE get_plane_block_size(BLOCK_SIZE bsize,
- int subsampling_x,
- int subsampling_y) {
- if (bsize == BLOCK_INVALID) return BLOCK_INVALID;
- return ss_size_lookup[bsize][subsampling_x][subsampling_y];
-}
-
-static INLINE int av1_get_txb_size_index(BLOCK_SIZE bsize, int blk_row,
- int blk_col) {
- TX_SIZE txs = max_txsize_rect_lookup[bsize];
- for (int level = 0; level < MAX_VARTX_DEPTH - 1; ++level)
- txs = sub_tx_size_map[txs];
- const int tx_w_log2 = tx_size_wide_log2[txs] - MI_SIZE_LOG2;
- const int tx_h_log2 = tx_size_high_log2[txs] - MI_SIZE_LOG2;
- const int bw_log2 = mi_size_wide_log2[bsize];
- const int stride_log2 = bw_log2 - tx_w_log2;
- const int index =
- ((blk_row >> tx_h_log2) << stride_log2) + (blk_col >> tx_w_log2);
- assert(index < INTER_TX_SIZE_BUF_LEN);
- return index;
-}
-
-static INLINE int av1_get_txk_type_index(BLOCK_SIZE bsize, int blk_row,
- int blk_col) {
- TX_SIZE txs = max_txsize_rect_lookup[bsize];
- for (int level = 0; level < MAX_VARTX_DEPTH; ++level)
- txs = sub_tx_size_map[txs];
- const int tx_w_log2 = tx_size_wide_log2[txs] - MI_SIZE_LOG2;
- const int tx_h_log2 = tx_size_high_log2[txs] - MI_SIZE_LOG2;
- const int bw_uint_log2 = mi_size_wide_log2[bsize];
- const int stride_log2 = bw_uint_log2 - tx_w_log2;
- const int index =
- ((blk_row >> tx_h_log2) << stride_log2) + (blk_col >> tx_w_log2);
- assert(index < TXK_TYPE_BUF_LEN);
- return index;
-}
-
-static INLINE void update_txk_array(TX_TYPE *txk_type, BLOCK_SIZE bsize,
- int blk_row, int blk_col, TX_SIZE tx_size,
- TX_TYPE tx_type) {
- const int txk_type_idx = av1_get_txk_type_index(bsize, blk_row, blk_col);
- txk_type[txk_type_idx] = tx_type;
-
- const int txw = tx_size_wide_unit[tx_size];
- const int txh = tx_size_high_unit[tx_size];
- // The 16x16 unit is due to the constraint from tx_64x64 which sets the
- // maximum tx size for chroma as 32x32. Coupled with 4x1 transform block
- // size, the constraint takes effect in 32x16 / 16x32 size too. To solve
- // the intricacy, cover all the 16x16 units inside a 64 level transform.
- if (txw == tx_size_wide_unit[TX_64X64] ||
- txh == tx_size_high_unit[TX_64X64]) {
- const int tx_unit = tx_size_wide_unit[TX_16X16];
- for (int idy = 0; idy < txh; idy += tx_unit) {
- for (int idx = 0; idx < txw; idx += tx_unit) {
- const int this_index =
- av1_get_txk_type_index(bsize, blk_row + idy, blk_col + idx);
- txk_type[this_index] = tx_type;
- }
- }
- }
-}
-
-static INLINE TX_TYPE av1_get_tx_type(PLANE_TYPE plane_type,
- const MACROBLOCKD *xd, int blk_row,
- int blk_col, TX_SIZE tx_size,
- int reduced_tx_set) {
- const MB_MODE_INFO *const mbmi = xd->mi[0];
- const struct macroblockd_plane *const pd = &xd->plane[plane_type];
- const TxSetType tx_set_type =
- av1_get_ext_tx_set_type(tx_size, is_inter_block(mbmi), reduced_tx_set);
-
- TX_TYPE tx_type;
- if (xd->lossless[mbmi->segment_id] || txsize_sqr_up_map[tx_size] > TX_32X32) {
- tx_type = DCT_DCT;
- } else {
- if (plane_type == PLANE_TYPE_Y) {
- const int txk_type_idx =
- av1_get_txk_type_index(mbmi->sb_type, blk_row, blk_col);
- tx_type = mbmi->txk_type[txk_type_idx];
- } else if (is_inter_block(mbmi)) {
- // scale back to y plane's coordinate
- blk_row <<= pd->subsampling_y;
- blk_col <<= pd->subsampling_x;
- const int txk_type_idx =
- av1_get_txk_type_index(mbmi->sb_type, blk_row, blk_col);
- tx_type = mbmi->txk_type[txk_type_idx];
- } else {
- // In intra mode, uv planes don't share the same prediction mode as y
- // plane, so the tx_type should not be shared
- tx_type = intra_mode_to_tx_type(mbmi, PLANE_TYPE_UV);
- }
- }
- assert(tx_type < TX_TYPES);
- if (!av1_ext_tx_used[tx_set_type][tx_type]) return DCT_DCT;
- return tx_type;
-}
-
-void av1_setup_block_planes(MACROBLOCKD *xd, int ss_x, int ss_y,
- const int num_planes);
-
-static INLINE int bsize_to_max_depth(BLOCK_SIZE bsize) {
- TX_SIZE tx_size = max_txsize_rect_lookup[bsize];
- int depth = 0;
- while (depth < MAX_TX_DEPTH && tx_size != TX_4X4) {
- depth++;
- tx_size = sub_tx_size_map[tx_size];
- }
- return depth;
-}
-
-static INLINE int bsize_to_tx_size_cat(BLOCK_SIZE bsize) {
- TX_SIZE tx_size = max_txsize_rect_lookup[bsize];
- assert(tx_size != TX_4X4);
- int depth = 0;
- while (tx_size != TX_4X4) {
- depth++;
- tx_size = sub_tx_size_map[tx_size];
- assert(depth < 10);
- }
- assert(depth <= MAX_TX_CATS);
- return depth - 1;
-}
-
-static INLINE TX_SIZE depth_to_tx_size(int depth, BLOCK_SIZE bsize) {
- TX_SIZE max_tx_size = max_txsize_rect_lookup[bsize];
- TX_SIZE tx_size = max_tx_size;
- for (int d = 0; d < depth; ++d) tx_size = sub_tx_size_map[tx_size];
- return tx_size;
-}
-
-static INLINE TX_SIZE av1_get_adjusted_tx_size(TX_SIZE tx_size) {
- switch (tx_size) {
- case TX_64X64:
- case TX_64X32:
- case TX_32X64: return TX_32X32;
- case TX_64X16: return TX_32X16;
- case TX_16X64: return TX_16X32;
- default: return tx_size;
- }
-}
-
-static INLINE TX_SIZE av1_get_max_uv_txsize(BLOCK_SIZE bsize, int subsampling_x,
- int subsampling_y) {
- const BLOCK_SIZE plane_bsize =
- get_plane_block_size(bsize, subsampling_x, subsampling_y);
- assert(plane_bsize < BLOCK_SIZES_ALL);
- const TX_SIZE uv_tx = max_txsize_rect_lookup[plane_bsize];
- return av1_get_adjusted_tx_size(uv_tx);
-}
-
-static INLINE TX_SIZE av1_get_tx_size(int plane, const MACROBLOCKD *xd) {
- const MB_MODE_INFO *mbmi = xd->mi[0];
- if (xd->lossless[mbmi->segment_id]) return TX_4X4;
- if (plane == 0) return mbmi->tx_size;
- const MACROBLOCKD_PLANE *pd = &xd->plane[plane];
- return av1_get_max_uv_txsize(mbmi->sb_type, pd->subsampling_x,
- pd->subsampling_y);
-}
-
-void av1_reset_skip_context(MACROBLOCKD *xd, int mi_row, int mi_col,
- BLOCK_SIZE bsize, const int num_planes);
-
-void av1_reset_loop_filter_delta(MACROBLOCKD *xd, int num_planes);
-
-void av1_reset_loop_restoration(MACROBLOCKD *xd, const int num_planes);
-
-typedef void (*foreach_transformed_block_visitor)(int plane, int block,
- int blk_row, int blk_col,
- BLOCK_SIZE plane_bsize,
- TX_SIZE tx_size, void *arg);
-
-void av1_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd,
- int plane, BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
- int has_eob, int aoff, int loff);
-
-#define MAX_INTERINTRA_SB_SQUARE 32 * 32
-static INLINE int is_interintra_mode(const MB_MODE_INFO *mbmi) {
- return (mbmi->ref_frame[0] > INTRA_FRAME &&
- mbmi->ref_frame[1] == INTRA_FRAME);
-}
-
-static INLINE int is_interintra_allowed_bsize(const BLOCK_SIZE bsize) {
- return (bsize >= BLOCK_8X8) && (bsize <= BLOCK_32X32);
-}
-
-static INLINE int is_interintra_allowed_mode(const PREDICTION_MODE mode) {
- return (mode >= SINGLE_INTER_MODE_START) && (mode < SINGLE_INTER_MODE_END);
-}
-
-static INLINE int is_interintra_allowed_ref(const MV_REFERENCE_FRAME rf[2]) {
- return (rf[0] > INTRA_FRAME) && (rf[1] <= INTRA_FRAME);
-}
-
-static INLINE int is_interintra_allowed(const MB_MODE_INFO *mbmi) {
- return is_interintra_allowed_bsize(mbmi->sb_type) &&
- is_interintra_allowed_mode(mbmi->mode) &&
- is_interintra_allowed_ref(mbmi->ref_frame);
-}
-
-static INLINE int is_interintra_allowed_bsize_group(int group) {
- int i;
- for (i = 0; i < BLOCK_SIZES_ALL; i++) {
- if (size_group_lookup[i] == group &&
- is_interintra_allowed_bsize((BLOCK_SIZE)i)) {
- return 1;
- }
- }
- return 0;
-}
-
-static INLINE int is_interintra_pred(const MB_MODE_INFO *mbmi) {
- return mbmi->ref_frame[0] > INTRA_FRAME &&
- mbmi->ref_frame[1] == INTRA_FRAME && is_interintra_allowed(mbmi);
-}
-
-static INLINE int get_vartx_max_txsize(const MACROBLOCKD *xd, BLOCK_SIZE bsize,
- int plane) {
- if (xd->lossless[xd->mi[0]->segment_id]) return TX_4X4;
- const TX_SIZE max_txsize = max_txsize_rect_lookup[bsize];
- if (plane == 0) return max_txsize; // luma
- return av1_get_adjusted_tx_size(max_txsize); // chroma
-}
-
-static INLINE int is_motion_variation_allowed_bsize(BLOCK_SIZE bsize) {
- return AOMMIN(block_size_wide[bsize], block_size_high[bsize]) >= 8;
-}
-
-static INLINE int is_motion_variation_allowed_compound(
- const MB_MODE_INFO *mbmi) {
- if (!has_second_ref(mbmi))
- return 1;
- else
- return 0;
-}
-
-// input: log2 of length, 0(4), 1(8), ...
-static const int max_neighbor_obmc[6] = { 0, 1, 2, 3, 4, 4 };
-
-static INLINE int check_num_overlappable_neighbors(const MB_MODE_INFO *mbmi) {
- return !(mbmi->overlappable_neighbors[0] == 0 &&
- mbmi->overlappable_neighbors[1] == 0);
-}
-
-static INLINE MOTION_MODE
-motion_mode_allowed(const WarpedMotionParams *gm_params, const MACROBLOCKD *xd,
- const MB_MODE_INFO *mbmi, int allow_warped_motion) {
- if (xd->cur_frame_force_integer_mv == 0) {
- const TransformationType gm_type = gm_params[mbmi->ref_frame[0]].wmtype;
- if (is_global_mv_block(mbmi, gm_type)) return SIMPLE_TRANSLATION;
- }
- if (is_motion_variation_allowed_bsize(mbmi->sb_type) &&
- is_inter_mode(mbmi->mode) && mbmi->ref_frame[1] != INTRA_FRAME &&
- is_motion_variation_allowed_compound(mbmi)) {
- if (!check_num_overlappable_neighbors(mbmi)) return SIMPLE_TRANSLATION;
- assert(!has_second_ref(mbmi));
- if (mbmi->num_proj_ref >= 1 &&
- (allow_warped_motion && !av1_is_scaled(&(xd->block_refs[0]->sf)))) {
- if (xd->cur_frame_force_integer_mv) {
- return OBMC_CAUSAL;
- }
- return WARPED_CAUSAL;
- }
- return OBMC_CAUSAL;
- } else {
- return SIMPLE_TRANSLATION;
- }
-}
-
-static INLINE void assert_motion_mode_valid(MOTION_MODE mode,
- const WarpedMotionParams *gm_params,
- const MACROBLOCKD *xd,
- const MB_MODE_INFO *mbmi,
- int allow_warped_motion) {
- const MOTION_MODE last_motion_mode_allowed =
- motion_mode_allowed(gm_params, xd, mbmi, allow_warped_motion);
-
- // Check that the input mode is not illegal
- if (last_motion_mode_allowed < mode)
- assert(0 && "Illegal motion mode selected");
-}
-
-static INLINE int is_neighbor_overlappable(const MB_MODE_INFO *mbmi) {
- return (is_inter_block(mbmi));
-}
-
-static INLINE int av1_allow_palette(int allow_screen_content_tools,
- BLOCK_SIZE sb_type) {
- return allow_screen_content_tools && block_size_wide[sb_type] <= 64 &&
- block_size_high[sb_type] <= 64 && sb_type >= BLOCK_8X8;
-}
-
-// Returns sub-sampled dimensions of the given block.
-// The output values for 'rows_within_bounds' and 'cols_within_bounds' will
-// differ from 'height' and 'width' when part of the block is outside the
-// right
-// and/or bottom image boundary.
-static INLINE void av1_get_block_dimensions(BLOCK_SIZE bsize, int plane,
- const MACROBLOCKD *xd, int *width,
- int *height,
- int *rows_within_bounds,
- int *cols_within_bounds) {
- const int block_height = block_size_high[bsize];
- const int block_width = block_size_wide[bsize];
- const int block_rows = (xd->mb_to_bottom_edge >= 0)
- ? block_height
- : (xd->mb_to_bottom_edge >> 3) + block_height;
- const int block_cols = (xd->mb_to_right_edge >= 0)
- ? block_width
- : (xd->mb_to_right_edge >> 3) + block_width;
- const struct macroblockd_plane *const pd = &xd->plane[plane];
- assert(IMPLIES(plane == PLANE_TYPE_Y, pd->subsampling_x == 0));
- assert(IMPLIES(plane == PLANE_TYPE_Y, pd->subsampling_y == 0));
- assert(block_width >= block_cols);
- assert(block_height >= block_rows);
- const int plane_block_width = block_width >> pd->subsampling_x;
- const int plane_block_height = block_height >> pd->subsampling_y;
- // Special handling for chroma sub8x8.
- const int is_chroma_sub8_x = plane > 0 && plane_block_width < 4;
- const int is_chroma_sub8_y = plane > 0 && plane_block_height < 4;
- if (width) *width = plane_block_width + 2 * is_chroma_sub8_x;
- if (height) *height = plane_block_height + 2 * is_chroma_sub8_y;
- if (rows_within_bounds) {
- *rows_within_bounds =
- (block_rows >> pd->subsampling_y) + 2 * is_chroma_sub8_y;
- }
- if (cols_within_bounds) {
- *cols_within_bounds =
- (block_cols >> pd->subsampling_x) + 2 * is_chroma_sub8_x;
- }
-}
-
-/* clang-format off */
-typedef aom_cdf_prob (*MapCdf)[PALETTE_COLOR_INDEX_CONTEXTS]
- [CDF_SIZE(PALETTE_COLORS)];
-typedef const int (*ColorCost)[PALETTE_SIZES][PALETTE_COLOR_INDEX_CONTEXTS]
- [PALETTE_COLORS];
-/* clang-format on */
-
-typedef struct {
- int rows;
- int cols;
- int n_colors;
- int plane_width;
- int plane_height;
- uint8_t *color_map;
- MapCdf map_cdf;
- ColorCost color_cost;
-} Av1ColorMapParam;
-
-static INLINE int is_nontrans_global_motion(const MACROBLOCKD *xd,
- const MB_MODE_INFO *mbmi) {
- int ref;
-
- // First check if all modes are GLOBALMV
- if (mbmi->mode != GLOBALMV && mbmi->mode != GLOBAL_GLOBALMV) return 0;
-
- if (AOMMIN(mi_size_wide[mbmi->sb_type], mi_size_high[mbmi->sb_type]) < 2)
- return 0;
-
- // Now check if all global motion is non translational
- for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) {
- if (xd->global_motion[mbmi->ref_frame[ref]].wmtype == TRANSLATION) return 0;
- }
- return 1;
-}
-
-static INLINE PLANE_TYPE get_plane_type(int plane) {
- return (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
-}
-
-static INLINE int av1_get_max_eob(TX_SIZE tx_size) {
- if (tx_size == TX_64X64 || tx_size == TX_64X32 || tx_size == TX_32X64) {
- return 1024;
- }
- if (tx_size == TX_16X64 || tx_size == TX_64X16) {
- return 512;
- }
- return tx_size_2d[tx_size];
-}
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_COMMON_BLOCKD_H_
diff --git a/third_party/aom/av1/common/cdef.c b/third_party/aom/av1/common/cdef.c
deleted file mode 100644
index e9e2b0e42..000000000
--- a/third_party/aom/av1/common/cdef.c
+++ /dev/null
@@ -1,403 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <math.h>
-#include <string.h>
-
-#include "config/aom_scale_rtcd.h"
-
-#include "aom/aom_integer.h"
-#include "av1/common/cdef.h"
-#include "av1/common/cdef_block.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/common/reconinter.h"
-
-int sb_all_skip(const AV1_COMMON *const cm, int mi_row, int mi_col) {
- int maxc, maxr;
- int skip = 1;
- maxc = cm->mi_cols - mi_col;
- maxr = cm->mi_rows - mi_row;
-
- maxr = AOMMIN(maxr, MI_SIZE_64X64);
- maxc = AOMMIN(maxc, MI_SIZE_64X64);
-
- for (int r = 0; r < maxr; r++) {
- for (int c = 0; c < maxc; c++) {
- skip =
- skip &&
- cm->mi_grid_visible[(mi_row + r) * cm->mi_stride + mi_col + c]->skip;
- }
- }
- return skip;
-}
-
-static int is_8x8_block_skip(MB_MODE_INFO **grid, int mi_row, int mi_col,
- int mi_stride) {
- int is_skip = 1;
- for (int r = 0; r < mi_size_high[BLOCK_8X8]; ++r)
- for (int c = 0; c < mi_size_wide[BLOCK_8X8]; ++c)
- is_skip &= grid[(mi_row + r) * mi_stride + (mi_col + c)]->skip;
-
- return is_skip;
-}
-
-int sb_compute_cdef_list(const AV1_COMMON *const cm, int mi_row, int mi_col,
- cdef_list *dlist, BLOCK_SIZE bs) {
- MB_MODE_INFO **grid = cm->mi_grid_visible;
- int maxc = cm->mi_cols - mi_col;
- int maxr = cm->mi_rows - mi_row;
-
- if (bs == BLOCK_128X128 || bs == BLOCK_128X64)
- maxc = AOMMIN(maxc, MI_SIZE_128X128);
- else
- maxc = AOMMIN(maxc, MI_SIZE_64X64);
- if (bs == BLOCK_128X128 || bs == BLOCK_64X128)
- maxr = AOMMIN(maxr, MI_SIZE_128X128);
- else
- maxr = AOMMIN(maxr, MI_SIZE_64X64);
-
- const int r_step = mi_size_high[BLOCK_8X8];
- const int c_step = mi_size_wide[BLOCK_8X8];
- const int r_shift = (r_step == 2);
- const int c_shift = (c_step == 2);
-
- assert(r_step == 1 || r_step == 2);
- assert(c_step == 1 || c_step == 2);
-
- int count = 0;
-
- for (int r = 0; r < maxr; r += r_step) {
- for (int c = 0; c < maxc; c += c_step) {
- if (!is_8x8_block_skip(grid, mi_row + r, mi_col + c, cm->mi_stride)) {
- dlist[count].by = r >> r_shift;
- dlist[count].bx = c >> c_shift;
- dlist[count].skip = 0;
- count++;
- }
- }
- }
- return count;
-}
-
-void copy_rect8_8bit_to_16bit_c(uint16_t *dst, int dstride, const uint8_t *src,
- int sstride, int v, int h) {
- for (int i = 0; i < v; i++) {
- for (int j = 0; j < h; j++) {
- dst[i * dstride + j] = src[i * sstride + j];
- }
- }
-}
-
-void copy_rect8_16bit_to_16bit_c(uint16_t *dst, int dstride,
- const uint16_t *src, int sstride, int v,
- int h) {
- for (int i = 0; i < v; i++) {
- for (int j = 0; j < h; j++) {
- dst[i * dstride + j] = src[i * sstride + j];
- }
- }
-}
-
-static void copy_sb8_16(AOM_UNUSED AV1_COMMON *cm, uint16_t *dst, int dstride,
- const uint8_t *src, int src_voffset, int src_hoffset,
- int sstride, int vsize, int hsize) {
- if (cm->seq_params.use_highbitdepth) {
- const uint16_t *base =
- &CONVERT_TO_SHORTPTR(src)[src_voffset * sstride + src_hoffset];
- copy_rect8_16bit_to_16bit(dst, dstride, base, sstride, vsize, hsize);
- } else {
- const uint8_t *base = &src[src_voffset * sstride + src_hoffset];
- copy_rect8_8bit_to_16bit(dst, dstride, base, sstride, vsize, hsize);
- }
-}
-
-static INLINE void fill_rect(uint16_t *dst, int dstride, int v, int h,
- uint16_t x) {
- for (int i = 0; i < v; i++) {
- for (int j = 0; j < h; j++) {
- dst[i * dstride + j] = x;
- }
- }
-}
-
-static INLINE void copy_rect(uint16_t *dst, int dstride, const uint16_t *src,
- int sstride, int v, int h) {
- for (int i = 0; i < v; i++) {
- for (int j = 0; j < h; j++) {
- dst[i * dstride + j] = src[i * sstride + j];
- }
- }
-}
-
-void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
- MACROBLOCKD *xd) {
- const int num_planes = av1_num_planes(cm);
- DECLARE_ALIGNED(16, uint16_t, src[CDEF_INBUF_SIZE]);
- uint16_t *linebuf[3];
- uint16_t *colbuf[3];
- cdef_list dlist[MI_SIZE_64X64 * MI_SIZE_64X64];
- unsigned char *row_cdef, *prev_row_cdef, *curr_row_cdef;
- int cdef_count;
- int dir[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
- int var[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
- int mi_wide_l2[3];
- int mi_high_l2[3];
- int xdec[3];
- int ydec[3];
- int coeff_shift = AOMMAX(cm->seq_params.bit_depth - 8, 0);
- const int nvfb = (cm->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
- const int nhfb = (cm->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
- av1_setup_dst_planes(xd->plane, cm->seq_params.sb_size, frame, 0, 0, 0,
- num_planes);
- row_cdef = aom_malloc(sizeof(*row_cdef) * (nhfb + 2) * 2);
- memset(row_cdef, 1, sizeof(*row_cdef) * (nhfb + 2) * 2);
- prev_row_cdef = row_cdef + 1;
- curr_row_cdef = prev_row_cdef + nhfb + 2;
- for (int pli = 0; pli < num_planes; pli++) {
- xdec[pli] = xd->plane[pli].subsampling_x;
- ydec[pli] = xd->plane[pli].subsampling_y;
- mi_wide_l2[pli] = MI_SIZE_LOG2 - xd->plane[pli].subsampling_x;
- mi_high_l2[pli] = MI_SIZE_LOG2 - xd->plane[pli].subsampling_y;
- }
- const int stride = (cm->mi_cols << MI_SIZE_LOG2) + 2 * CDEF_HBORDER;
- for (int pli = 0; pli < num_planes; pli++) {
- linebuf[pli] = aom_malloc(sizeof(*linebuf) * CDEF_VBORDER * stride);
- colbuf[pli] =
- aom_malloc(sizeof(*colbuf) *
- ((CDEF_BLOCKSIZE << mi_high_l2[pli]) + 2 * CDEF_VBORDER) *
- CDEF_HBORDER);
- }
- for (int fbr = 0; fbr < nvfb; fbr++) {
- for (int pli = 0; pli < num_planes; pli++) {
- const int block_height =
- (MI_SIZE_64X64 << mi_high_l2[pli]) + 2 * CDEF_VBORDER;
- fill_rect(colbuf[pli], CDEF_HBORDER, block_height, CDEF_HBORDER,
- CDEF_VERY_LARGE);
- }
- int cdef_left = 1;
- for (int fbc = 0; fbc < nhfb; fbc++) {
- int level, sec_strength;
- int uv_level, uv_sec_strength;
- int nhb, nvb;
- int cstart = 0;
- curr_row_cdef[fbc] = 0;
- if (cm->mi_grid_visible[MI_SIZE_64X64 * fbr * cm->mi_stride +
- MI_SIZE_64X64 * fbc] == NULL ||
- cm->mi_grid_visible[MI_SIZE_64X64 * fbr * cm->mi_stride +
- MI_SIZE_64X64 * fbc]
- ->cdef_strength == -1) {
- cdef_left = 0;
- continue;
- }
- if (!cdef_left) cstart = -CDEF_HBORDER;
- nhb = AOMMIN(MI_SIZE_64X64, cm->mi_cols - MI_SIZE_64X64 * fbc);
- nvb = AOMMIN(MI_SIZE_64X64, cm->mi_rows - MI_SIZE_64X64 * fbr);
- int frame_top, frame_left, frame_bottom, frame_right;
-
- int mi_row = MI_SIZE_64X64 * fbr;
- int mi_col = MI_SIZE_64X64 * fbc;
- // for the current filter block, it's top left corner mi structure (mi_tl)
- // is first accessed to check whether the top and left boundaries are
- // frame boundaries. Then bottom-left and top-right mi structures are
- // accessed to check whether the bottom and right boundaries
- // (respectively) are frame boundaries.
- //
- // Note that we can't just check the bottom-right mi structure - eg. if
- // we're at the right-hand edge of the frame but not the bottom, then
- // the bottom-right mi is NULL but the bottom-left is not.
- frame_top = (mi_row == 0) ? 1 : 0;
- frame_left = (mi_col == 0) ? 1 : 0;
-
- if (fbr != nvfb - 1)
- frame_bottom = (mi_row + MI_SIZE_64X64 == cm->mi_rows) ? 1 : 0;
- else
- frame_bottom = 1;
-
- if (fbc != nhfb - 1)
- frame_right = (mi_col + MI_SIZE_64X64 == cm->mi_cols) ? 1 : 0;
- else
- frame_right = 1;
-
- const int mbmi_cdef_strength =
- cm->mi_grid_visible[MI_SIZE_64X64 * fbr * cm->mi_stride +
- MI_SIZE_64X64 * fbc]
- ->cdef_strength;
- level = cm->cdef_strengths[mbmi_cdef_strength] / CDEF_SEC_STRENGTHS;
- sec_strength =
- cm->cdef_strengths[mbmi_cdef_strength] % CDEF_SEC_STRENGTHS;
- sec_strength += sec_strength == 3;
- uv_level = cm->cdef_uv_strengths[mbmi_cdef_strength] / CDEF_SEC_STRENGTHS;
- uv_sec_strength =
- cm->cdef_uv_strengths[mbmi_cdef_strength] % CDEF_SEC_STRENGTHS;
- uv_sec_strength += uv_sec_strength == 3;
- if ((level == 0 && sec_strength == 0 && uv_level == 0 &&
- uv_sec_strength == 0) ||
- (cdef_count = sb_compute_cdef_list(cm, fbr * MI_SIZE_64X64,
- fbc * MI_SIZE_64X64, dlist,
- BLOCK_64X64)) == 0) {
- cdef_left = 0;
- continue;
- }
-
- curr_row_cdef[fbc] = 1;
- for (int pli = 0; pli < num_planes; pli++) {
- int coffset;
- int rend, cend;
- int pri_damping = cm->cdef_pri_damping;
- int sec_damping = cm->cdef_sec_damping;
- int hsize = nhb << mi_wide_l2[pli];
- int vsize = nvb << mi_high_l2[pli];
-
- if (pli) {
- level = uv_level;
- sec_strength = uv_sec_strength;
- }
-
- if (fbc == nhfb - 1)
- cend = hsize;
- else
- cend = hsize + CDEF_HBORDER;
-
- if (fbr == nvfb - 1)
- rend = vsize;
- else
- rend = vsize + CDEF_VBORDER;
-
- coffset = fbc * MI_SIZE_64X64 << mi_wide_l2[pli];
- if (fbc == nhfb - 1) {
- /* On the last superblock column, fill in the right border with
- CDEF_VERY_LARGE to avoid filtering with the outside. */
- fill_rect(&src[cend + CDEF_HBORDER], CDEF_BSTRIDE,
- rend + CDEF_VBORDER, hsize + CDEF_HBORDER - cend,
- CDEF_VERY_LARGE);
- }
- if (fbr == nvfb - 1) {
- /* On the last superblock row, fill in the bottom border with
- CDEF_VERY_LARGE to avoid filtering with the outside. */
- fill_rect(&src[(rend + CDEF_VBORDER) * CDEF_BSTRIDE], CDEF_BSTRIDE,
- CDEF_VBORDER, hsize + 2 * CDEF_HBORDER, CDEF_VERY_LARGE);
- }
- /* Copy in the pixels we need from the current superblock for
- deringing.*/
- copy_sb8_16(cm,
- &src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER + cstart],
- CDEF_BSTRIDE, xd->plane[pli].dst.buf,
- (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr, coffset + cstart,
- xd->plane[pli].dst.stride, rend, cend - cstart);
- if (!prev_row_cdef[fbc]) {
- copy_sb8_16(cm, &src[CDEF_HBORDER], CDEF_BSTRIDE,
- xd->plane[pli].dst.buf,
- (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr - CDEF_VBORDER,
- coffset, xd->plane[pli].dst.stride, CDEF_VBORDER, hsize);
- } else if (fbr > 0) {
- copy_rect(&src[CDEF_HBORDER], CDEF_BSTRIDE, &linebuf[pli][coffset],
- stride, CDEF_VBORDER, hsize);
- } else {
- fill_rect(&src[CDEF_HBORDER], CDEF_BSTRIDE, CDEF_VBORDER, hsize,
- CDEF_VERY_LARGE);
- }
- if (!prev_row_cdef[fbc - 1]) {
- copy_sb8_16(cm, src, CDEF_BSTRIDE, xd->plane[pli].dst.buf,
- (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr - CDEF_VBORDER,
- coffset - CDEF_HBORDER, xd->plane[pli].dst.stride,
- CDEF_VBORDER, CDEF_HBORDER);
- } else if (fbr > 0 && fbc > 0) {
- copy_rect(src, CDEF_BSTRIDE, &linebuf[pli][coffset - CDEF_HBORDER],
- stride, CDEF_VBORDER, CDEF_HBORDER);
- } else {
- fill_rect(src, CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER,
- CDEF_VERY_LARGE);
- }
- if (!prev_row_cdef[fbc + 1]) {
- copy_sb8_16(cm, &src[CDEF_HBORDER + (nhb << mi_wide_l2[pli])],
- CDEF_BSTRIDE, xd->plane[pli].dst.buf,
- (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr - CDEF_VBORDER,
- coffset + hsize, xd->plane[pli].dst.stride, CDEF_VBORDER,
- CDEF_HBORDER);
- } else if (fbr > 0 && fbc < nhfb - 1) {
- copy_rect(&src[hsize + CDEF_HBORDER], CDEF_BSTRIDE,
- &linebuf[pli][coffset + hsize], stride, CDEF_VBORDER,
- CDEF_HBORDER);
- } else {
- fill_rect(&src[hsize + CDEF_HBORDER], CDEF_BSTRIDE, CDEF_VBORDER,
- CDEF_HBORDER, CDEF_VERY_LARGE);
- }
- if (cdef_left) {
- /* If we deringed the superblock on the left then we need to copy in
- saved pixels. */
- copy_rect(src, CDEF_BSTRIDE, colbuf[pli], CDEF_HBORDER,
- rend + CDEF_VBORDER, CDEF_HBORDER);
- }
- /* Saving pixels in case we need to dering the superblock on the
- right. */
- copy_rect(colbuf[pli], CDEF_HBORDER, src + hsize, CDEF_BSTRIDE,
- rend + CDEF_VBORDER, CDEF_HBORDER);
- copy_sb8_16(
- cm, &linebuf[pli][coffset], stride, xd->plane[pli].dst.buf,
- (MI_SIZE_64X64 << mi_high_l2[pli]) * (fbr + 1) - CDEF_VBORDER,
- coffset, xd->plane[pli].dst.stride, CDEF_VBORDER, hsize);
-
- if (frame_top) {
- fill_rect(src, CDEF_BSTRIDE, CDEF_VBORDER, hsize + 2 * CDEF_HBORDER,
- CDEF_VERY_LARGE);
- }
- if (frame_left) {
- fill_rect(src, CDEF_BSTRIDE, vsize + 2 * CDEF_VBORDER, CDEF_HBORDER,
- CDEF_VERY_LARGE);
- }
- if (frame_bottom) {
- fill_rect(&src[(vsize + CDEF_VBORDER) * CDEF_BSTRIDE], CDEF_BSTRIDE,
- CDEF_VBORDER, hsize + 2 * CDEF_HBORDER, CDEF_VERY_LARGE);
- }
- if (frame_right) {
- fill_rect(&src[hsize + CDEF_HBORDER], CDEF_BSTRIDE,
- vsize + 2 * CDEF_VBORDER, CDEF_HBORDER, CDEF_VERY_LARGE);
- }
-
- if (cm->seq_params.use_highbitdepth) {
- cdef_filter_fb(
- NULL,
- &CONVERT_TO_SHORTPTR(
- xd->plane[pli]
- .dst.buf)[xd->plane[pli].dst.stride *
- (MI_SIZE_64X64 * fbr << mi_high_l2[pli]) +
- (fbc * MI_SIZE_64X64 << mi_wide_l2[pli])],
- xd->plane[pli].dst.stride,
- &src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER], xdec[pli],
- ydec[pli], dir, NULL, var, pli, dlist, cdef_count, level,
- sec_strength, pri_damping, sec_damping, coeff_shift);
- } else {
- cdef_filter_fb(
- &xd->plane[pli]
- .dst.buf[xd->plane[pli].dst.stride *
- (MI_SIZE_64X64 * fbr << mi_high_l2[pli]) +
- (fbc * MI_SIZE_64X64 << mi_wide_l2[pli])],
- NULL, xd->plane[pli].dst.stride,
- &src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER], xdec[pli],
- ydec[pli], dir, NULL, var, pli, dlist, cdef_count, level,
- sec_strength, pri_damping, sec_damping, coeff_shift);
- }
- }
- cdef_left = 1;
- }
- {
- unsigned char *tmp = prev_row_cdef;
- prev_row_cdef = curr_row_cdef;
- curr_row_cdef = tmp;
- }
- }
- aom_free(row_cdef);
- for (int pli = 0; pli < num_planes; pli++) {
- aom_free(linebuf[pli]);
- aom_free(colbuf[pli]);
- }
-}
diff --git a/third_party/aom/av1/common/cdef.h b/third_party/aom/av1/common/cdef.h
deleted file mode 100644
index 3b2eac8a5..000000000
--- a/third_party/aom/av1/common/cdef.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#ifndef AOM_AV1_COMMON_CDEF_H_
-#define AOM_AV1_COMMON_CDEF_H_
-
-#define CDEF_STRENGTH_BITS 6
-
-#define CDEF_PRI_STRENGTHS 16
-#define CDEF_SEC_STRENGTHS 4
-
-#include "config/aom_config.h"
-
-#include "aom/aom_integer.h"
-#include "aom_ports/mem.h"
-#include "av1/common/cdef_block.h"
-#include "av1/common/onyxc_int.h"
-
-static INLINE int sign(int i) { return i < 0 ? -1 : 1; }
-
-static INLINE int constrain(int diff, int threshold, int damping) {
- if (!threshold) return 0;
-
- const int shift = AOMMAX(0, damping - get_msb(threshold));
- return sign(diff) *
- AOMMIN(abs(diff), AOMMAX(0, threshold - (abs(diff) >> shift)));
-}
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-int sb_all_skip(const AV1_COMMON *const cm, int mi_row, int mi_col);
-int sb_compute_cdef_list(const AV1_COMMON *const cm, int mi_row, int mi_col,
- cdef_list *dlist, BLOCK_SIZE bsize);
-void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd);
-
-void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
- AV1_COMMON *cm, MACROBLOCKD *xd, int fast);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-#endif // AOM_AV1_COMMON_CDEF_H_
diff --git a/third_party/aom/av1/common/cdef_block.c b/third_party/aom/av1/common/cdef_block.c
deleted file mode 100644
index df1de89be..000000000
--- a/third_party/aom/av1/common/cdef_block.c
+++ /dev/null
@@ -1,257 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <math.h>
-#include <stdlib.h>
-
-#include "config/aom_dsp_rtcd.h"
-#include "config/av1_rtcd.h"
-
-#include "av1/common/cdef.h"
-
-/* Generated from gen_filter_tables.c. */
-DECLARE_ALIGNED(16, const int, cdef_directions[8][2]) = {
- { -1 * CDEF_BSTRIDE + 1, -2 * CDEF_BSTRIDE + 2 },
- { 0 * CDEF_BSTRIDE + 1, -1 * CDEF_BSTRIDE + 2 },
- { 0 * CDEF_BSTRIDE + 1, 0 * CDEF_BSTRIDE + 2 },
- { 0 * CDEF_BSTRIDE + 1, 1 * CDEF_BSTRIDE + 2 },
- { 1 * CDEF_BSTRIDE + 1, 2 * CDEF_BSTRIDE + 2 },
- { 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE + 1 },
- { 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE + 0 },
- { 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE - 1 }
-};
-
-/* Detect direction. 0 means 45-degree up-right, 2 is horizontal, and so on.
- The search minimizes the weighted variance along all the lines in a
- particular direction, i.e. the squared error between the input and a
- "predicted" block where each pixel is replaced by the average along a line
- in a particular direction. Since each direction have the same sum(x^2) term,
- that term is never computed. See Section 2, step 2, of:
- http://jmvalin.ca/notes/intra_paint.pdf */
-int cdef_find_dir_c(const uint16_t *img, int stride, int32_t *var,
- int coeff_shift) {
- int i;
- int32_t cost[8] = { 0 };
- int partial[8][15] = { { 0 } };
- int32_t best_cost = 0;
- int best_dir = 0;
- /* Instead of dividing by n between 2 and 8, we multiply by 3*5*7*8/n.
- The output is then 840 times larger, but we don't care for finding
- the max. */
- static const int div_table[] = { 0, 840, 420, 280, 210, 168, 140, 120, 105 };
- for (i = 0; i < 8; i++) {
- int j;
- for (j = 0; j < 8; j++) {
- int x;
- /* We subtract 128 here to reduce the maximum range of the squared
- partial sums. */
- x = (img[i * stride + j] >> coeff_shift) - 128;
- partial[0][i + j] += x;
- partial[1][i + j / 2] += x;
- partial[2][i] += x;
- partial[3][3 + i - j / 2] += x;
- partial[4][7 + i - j] += x;
- partial[5][3 - i / 2 + j] += x;
- partial[6][j] += x;
- partial[7][i / 2 + j] += x;
- }
- }
- for (i = 0; i < 8; i++) {
- cost[2] += partial[2][i] * partial[2][i];
- cost[6] += partial[6][i] * partial[6][i];
- }
- cost[2] *= div_table[8];
- cost[6] *= div_table[8];
- for (i = 0; i < 7; i++) {
- cost[0] += (partial[0][i] * partial[0][i] +
- partial[0][14 - i] * partial[0][14 - i]) *
- div_table[i + 1];
- cost[4] += (partial[4][i] * partial[4][i] +
- partial[4][14 - i] * partial[4][14 - i]) *
- div_table[i + 1];
- }
- cost[0] += partial[0][7] * partial[0][7] * div_table[8];
- cost[4] += partial[4][7] * partial[4][7] * div_table[8];
- for (i = 1; i < 8; i += 2) {
- int j;
- for (j = 0; j < 4 + 1; j++) {
- cost[i] += partial[i][3 + j] * partial[i][3 + j];
- }
- cost[i] *= div_table[8];
- for (j = 0; j < 4 - 1; j++) {
- cost[i] += (partial[i][j] * partial[i][j] +
- partial[i][10 - j] * partial[i][10 - j]) *
- div_table[2 * j + 2];
- }
- }
- for (i = 0; i < 8; i++) {
- if (cost[i] > best_cost) {
- best_cost = cost[i];
- best_dir = i;
- }
- }
- /* Difference between the optimal variance and the variance along the
- orthogonal direction. Again, the sum(x^2) terms cancel out. */
- *var = best_cost - cost[(best_dir + 4) & 7];
- /* We'd normally divide by 840, but dividing by 1024 is close enough
- for what we're going to do with this. */
- *var >>= 10;
- return best_dir;
-}
-
-const int cdef_pri_taps[2][2] = { { 4, 2 }, { 3, 3 } };
-const int cdef_sec_taps[2][2] = { { 2, 1 }, { 2, 1 } };
-
-/* Smooth in the direction detected. */
-void cdef_filter_block_c(uint8_t *dst8, uint16_t *dst16, int dstride,
- const uint16_t *in, int pri_strength, int sec_strength,
- int dir, int pri_damping, int sec_damping, int bsize,
- AOM_UNUSED int max_unused, int coeff_shift) {
- int i, j, k;
- const int s = CDEF_BSTRIDE;
- const int *pri_taps = cdef_pri_taps[(pri_strength >> coeff_shift) & 1];
- const int *sec_taps = cdef_sec_taps[(pri_strength >> coeff_shift) & 1];
- for (i = 0; i < 4 << (bsize == BLOCK_8X8 || bsize == BLOCK_4X8); i++) {
- for (j = 0; j < 4 << (bsize == BLOCK_8X8 || bsize == BLOCK_8X4); j++) {
- int16_t sum = 0;
- int16_t y;
- int16_t x = in[i * s + j];
- int max = x;
- int min = x;
- for (k = 0; k < 2; k++) {
- int16_t p0 = in[i * s + j + cdef_directions[dir][k]];
- int16_t p1 = in[i * s + j - cdef_directions[dir][k]];
- sum += pri_taps[k] * constrain(p0 - x, pri_strength, pri_damping);
- sum += pri_taps[k] * constrain(p1 - x, pri_strength, pri_damping);
- if (p0 != CDEF_VERY_LARGE) max = AOMMAX(p0, max);
- if (p1 != CDEF_VERY_LARGE) max = AOMMAX(p1, max);
- min = AOMMIN(p0, min);
- min = AOMMIN(p1, min);
- int16_t s0 = in[i * s + j + cdef_directions[(dir + 2) & 7][k]];
- int16_t s1 = in[i * s + j - cdef_directions[(dir + 2) & 7][k]];
- int16_t s2 = in[i * s + j + cdef_directions[(dir + 6) & 7][k]];
- int16_t s3 = in[i * s + j - cdef_directions[(dir + 6) & 7][k]];
- if (s0 != CDEF_VERY_LARGE) max = AOMMAX(s0, max);
- if (s1 != CDEF_VERY_LARGE) max = AOMMAX(s1, max);
- if (s2 != CDEF_VERY_LARGE) max = AOMMAX(s2, max);
- if (s3 != CDEF_VERY_LARGE) max = AOMMAX(s3, max);
- min = AOMMIN(s0, min);
- min = AOMMIN(s1, min);
- min = AOMMIN(s2, min);
- min = AOMMIN(s3, min);
- sum += sec_taps[k] * constrain(s0 - x, sec_strength, sec_damping);
- sum += sec_taps[k] * constrain(s1 - x, sec_strength, sec_damping);
- sum += sec_taps[k] * constrain(s2 - x, sec_strength, sec_damping);
- sum += sec_taps[k] * constrain(s3 - x, sec_strength, sec_damping);
- }
- y = clamp((int16_t)x + ((8 + sum - (sum < 0)) >> 4), min, max);
- if (dst8)
- dst8[i * dstride + j] = (uint8_t)y;
- else
- dst16[i * dstride + j] = (uint16_t)y;
- }
- }
-}
-
-/* Compute the primary filter strength for an 8x8 block based on the
- directional variance difference. A high variance difference means
- that we have a highly directional pattern (e.g. a high contrast
- edge), so we can apply more deringing. A low variance means that we
- either have a low contrast edge, or a non-directional texture, so
- we want to be careful not to blur. */
-static INLINE int adjust_strength(int strength, int32_t var) {
- const int i = var >> 6 ? AOMMIN(get_msb(var >> 6), 12) : 0;
- /* We use the variance of 8x8 blocks to adjust the strength. */
- return var ? (strength * (4 + i) + 8) >> 4 : 0;
-}
-
-void cdef_filter_fb(uint8_t *dst8, uint16_t *dst16, int dstride, uint16_t *in,
- int xdec, int ydec, int dir[CDEF_NBLOCKS][CDEF_NBLOCKS],
- int *dirinit, int var[CDEF_NBLOCKS][CDEF_NBLOCKS], int pli,
- cdef_list *dlist, int cdef_count, int level,
- int sec_strength, int pri_damping, int sec_damping,
- int coeff_shift) {
- int bi;
- int bx;
- int by;
- int bsize, bsizex, bsizey;
-
- int pri_strength = level << coeff_shift;
- sec_strength <<= coeff_shift;
- sec_damping += coeff_shift - (pli != AOM_PLANE_Y);
- pri_damping += coeff_shift - (pli != AOM_PLANE_Y);
- bsize =
- ydec ? (xdec ? BLOCK_4X4 : BLOCK_8X4) : (xdec ? BLOCK_4X8 : BLOCK_8X8);
- bsizex = 3 - xdec;
- bsizey = 3 - ydec;
- if (dirinit && pri_strength == 0 && sec_strength == 0) {
- // If we're here, both primary and secondary strengths are 0, and
- // we still haven't written anything to y[] yet, so we just copy
- // the input to y[]. This is necessary only for av1_cdef_search()
- // and only av1_cdef_search() sets dirinit.
- for (bi = 0; bi < cdef_count; bi++) {
- by = dlist[bi].by;
- bx = dlist[bi].bx;
- int iy, ix;
- // TODO(stemidts/jmvalin): SIMD optimisations
- for (iy = 0; iy < 1 << bsizey; iy++)
- for (ix = 0; ix < 1 << bsizex; ix++)
- dst16[(bi << (bsizex + bsizey)) + (iy << bsizex) + ix] =
- in[((by << bsizey) + iy) * CDEF_BSTRIDE + (bx << bsizex) + ix];
- }
- return;
- }
-
- if (pli == 0) {
- if (!dirinit || !*dirinit) {
- for (bi = 0; bi < cdef_count; bi++) {
- by = dlist[bi].by;
- bx = dlist[bi].bx;
- dir[by][bx] = cdef_find_dir(&in[8 * by * CDEF_BSTRIDE + 8 * bx],
- CDEF_BSTRIDE, &var[by][bx], coeff_shift);
- }
- if (dirinit) *dirinit = 1;
- }
- }
- if (pli == 1 && xdec != ydec) {
- for (bi = 0; bi < cdef_count; bi++) {
- static const int conv422[8] = { 7, 0, 2, 4, 5, 6, 6, 6 };
- static const int conv440[8] = { 1, 2, 2, 2, 3, 4, 6, 0 };
- by = dlist[bi].by;
- bx = dlist[bi].bx;
- dir[by][bx] = (xdec ? conv422 : conv440)[dir[by][bx]];
- }
- }
-
- for (bi = 0; bi < cdef_count; bi++) {
- int t = dlist[bi].skip ? 0 : pri_strength;
- int s = dlist[bi].skip ? 0 : sec_strength;
- by = dlist[bi].by;
- bx = dlist[bi].bx;
- if (dst8)
- cdef_filter_block(&dst8[(by << bsizey) * dstride + (bx << bsizex)], NULL,
- dstride,
- &in[(by * CDEF_BSTRIDE << bsizey) + (bx << bsizex)],
- (pli ? t : adjust_strength(t, var[by][bx])), s,
- t ? dir[by][bx] : 0, pri_damping, sec_damping, bsize,
- (256 << coeff_shift) - 1, coeff_shift);
- else
- cdef_filter_block(
- NULL,
- &dst16[dirinit ? bi << (bsizex + bsizey)
- : (by << bsizey) * dstride + (bx << bsizex)],
- dirinit ? 1 << bsizex : dstride,
- &in[(by * CDEF_BSTRIDE << bsizey) + (bx << bsizex)],
- (pli ? t : adjust_strength(t, var[by][bx])), s, t ? dir[by][bx] : 0,
- pri_damping, sec_damping, bsize, (256 << coeff_shift) - 1,
- coeff_shift);
- }
-}
diff --git a/third_party/aom/av1/common/cdef_block.h b/third_party/aom/av1/common/cdef_block.h
deleted file mode 100644
index 6b4452cd6..000000000
--- a/third_party/aom/av1/common/cdef_block.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_COMMON_CDEF_BLOCK_H_
-#define AOM_AV1_COMMON_CDEF_BLOCK_H_
-
-#include "av1/common/odintrin.h"
-
-#define CDEF_BLOCKSIZE 64
-#define CDEF_BLOCKSIZE_LOG2 6
-#define CDEF_NBLOCKS ((1 << MAX_SB_SIZE_LOG2) / 8)
-#define CDEF_SB_SHIFT (MAX_SB_SIZE_LOG2 - CDEF_BLOCKSIZE_LOG2)
-
-/* We need to buffer three vertical lines. */
-#define CDEF_VBORDER (3)
-/* We only need to buffer three horizontal pixels too, but let's align to
- 16 bytes (8 x 16 bits) to make vectorization easier. */
-#define CDEF_HBORDER (8)
-#define CDEF_BSTRIDE \
- ALIGN_POWER_OF_TWO((1 << MAX_SB_SIZE_LOG2) + 2 * CDEF_HBORDER, 3)
-
-#define CDEF_VERY_LARGE (30000)
-#define CDEF_INBUF_SIZE \
- (CDEF_BSTRIDE * ((1 << MAX_SB_SIZE_LOG2) + 2 * CDEF_VBORDER))
-
-extern const int cdef_pri_taps[2][2];
-extern const int cdef_sec_taps[2][2];
-DECLARE_ALIGNED(16, extern const int, cdef_directions[8][2]);
-
-typedef struct {
- uint8_t by;
- uint8_t bx;
- uint8_t skip;
-} cdef_list;
-
-typedef void (*cdef_filter_block_func)(uint8_t *dst8, uint16_t *dst16,
- int dstride, const uint16_t *in,
- int pri_strength, int sec_strength,
- int dir, int pri_damping,
- int sec_damping, int bsize, int max,
- int coeff_shift);
-void copy_cdef_16bit_to_16bit(uint16_t *dst, int dstride, uint16_t *src,
- cdef_list *dlist, int cdef_count, int bsize);
-
-void cdef_filter_fb(uint8_t *dst8, uint16_t *dst16, int dstride, uint16_t *in,
- int xdec, int ydec, int dir[CDEF_NBLOCKS][CDEF_NBLOCKS],
- int *dirinit, int var[CDEF_NBLOCKS][CDEF_NBLOCKS], int pli,
- cdef_list *dlist, int cdef_count, int level,
- int sec_strength, int pri_damping, int sec_damping,
- int coeff_shift);
-#endif // AOM_AV1_COMMON_CDEF_BLOCK_H_
diff --git a/third_party/aom/av1/common/cdef_block_avx2.c b/third_party/aom/av1/common/cdef_block_avx2.c
deleted file mode 100644
index e2b85b3e2..000000000
--- a/third_party/aom/av1/common/cdef_block_avx2.c
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "aom_dsp/aom_simd.h"
-#define SIMD_FUNC(name) name##_avx2
-#include "av1/common/cdef_block_simd.h"
diff --git a/third_party/aom/av1/common/cdef_block_neon.c b/third_party/aom/av1/common/cdef_block_neon.c
deleted file mode 100644
index 2d6bc65e3..000000000
--- a/third_party/aom/av1/common/cdef_block_neon.c
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "aom_dsp/aom_simd.h"
-#define SIMD_FUNC(name) name##_neon
-#include "av1/common/cdef_block_simd.h"
diff --git a/third_party/aom/av1/common/cdef_block_simd.h b/third_party/aom/av1/common/cdef_block_simd.h
deleted file mode 100644
index 14587a023..000000000
--- a/third_party/aom/av1/common/cdef_block_simd.h
+++ /dev/null
@@ -1,920 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_COMMON_CDEF_BLOCK_SIMD_H_
-#define AOM_AV1_COMMON_CDEF_BLOCK_SIMD_H_
-
-#include "config/av1_rtcd.h"
-
-#include "av1/common/cdef_block.h"
-
-/* partial A is a 16-bit vector of the form:
- [x8 x7 x6 x5 x4 x3 x2 x1] and partial B has the form:
- [0 y1 y2 y3 y4 y5 y6 y7].
- This function computes (x1^2+y1^2)*C1 + (x2^2+y2^2)*C2 + ...
- (x7^2+y2^7)*C7 + (x8^2+0^2)*C8 where the C1..C8 constants are in const1
- and const2. */
-static INLINE v128 fold_mul_and_sum(v128 partiala, v128 partialb, v128 const1,
- v128 const2) {
- v128 tmp;
- /* Reverse partial B. */
- partialb = v128_shuffle_8(
- partialb, v128_from_32(0x0f0e0100, 0x03020504, 0x07060908, 0x0b0a0d0c));
- /* Interleave the x and y values of identical indices and pair x8 with 0. */
- tmp = partiala;
- partiala = v128_ziplo_16(partialb, partiala);
- partialb = v128_ziphi_16(partialb, tmp);
- /* Square and add the corresponding x and y values. */
- partiala = v128_madd_s16(partiala, partiala);
- partialb = v128_madd_s16(partialb, partialb);
- /* Multiply by constant. */
- partiala = v128_mullo_s32(partiala, const1);
- partialb = v128_mullo_s32(partialb, const2);
- /* Sum all results. */
- partiala = v128_add_32(partiala, partialb);
- return partiala;
-}
-
-static INLINE v128 hsum4(v128 x0, v128 x1, v128 x2, v128 x3) {
- v128 t0, t1, t2, t3;
- t0 = v128_ziplo_32(x1, x0);
- t1 = v128_ziplo_32(x3, x2);
- t2 = v128_ziphi_32(x1, x0);
- t3 = v128_ziphi_32(x3, x2);
- x0 = v128_ziplo_64(t1, t0);
- x1 = v128_ziphi_64(t1, t0);
- x2 = v128_ziplo_64(t3, t2);
- x3 = v128_ziphi_64(t3, t2);
- return v128_add_32(v128_add_32(x0, x1), v128_add_32(x2, x3));
-}
-
-/* Computes cost for directions 0, 5, 6 and 7. We can call this function again
- to compute the remaining directions. */
-static INLINE v128 compute_directions(v128 lines[8], int32_t tmp_cost1[4]) {
- v128 partial4a, partial4b, partial5a, partial5b, partial7a, partial7b;
- v128 partial6;
- v128 tmp;
- /* Partial sums for lines 0 and 1. */
- partial4a = v128_shl_n_byte(lines[0], 14);
- partial4b = v128_shr_n_byte(lines[0], 2);
- partial4a = v128_add_16(partial4a, v128_shl_n_byte(lines[1], 12));
- partial4b = v128_add_16(partial4b, v128_shr_n_byte(lines[1], 4));
- tmp = v128_add_16(lines[0], lines[1]);
- partial5a = v128_shl_n_byte(tmp, 10);
- partial5b = v128_shr_n_byte(tmp, 6);
- partial7a = v128_shl_n_byte(tmp, 4);
- partial7b = v128_shr_n_byte(tmp, 12);
- partial6 = tmp;
-
- /* Partial sums for lines 2 and 3. */
- partial4a = v128_add_16(partial4a, v128_shl_n_byte(lines[2], 10));
- partial4b = v128_add_16(partial4b, v128_shr_n_byte(lines[2], 6));
- partial4a = v128_add_16(partial4a, v128_shl_n_byte(lines[3], 8));
- partial4b = v128_add_16(partial4b, v128_shr_n_byte(lines[3], 8));
- tmp = v128_add_16(lines[2], lines[3]);
- partial5a = v128_add_16(partial5a, v128_shl_n_byte(tmp, 8));
- partial5b = v128_add_16(partial5b, v128_shr_n_byte(tmp, 8));
- partial7a = v128_add_16(partial7a, v128_shl_n_byte(tmp, 6));
- partial7b = v128_add_16(partial7b, v128_shr_n_byte(tmp, 10));
- partial6 = v128_add_16(partial6, tmp);
-
- /* Partial sums for lines 4 and 5. */
- partial4a = v128_add_16(partial4a, v128_shl_n_byte(lines[4], 6));
- partial4b = v128_add_16(partial4b, v128_shr_n_byte(lines[4], 10));
- partial4a = v128_add_16(partial4a, v128_shl_n_byte(lines[5], 4));
- partial4b = v128_add_16(partial4b, v128_shr_n_byte(lines[5], 12));
- tmp = v128_add_16(lines[4], lines[5]);
- partial5a = v128_add_16(partial5a, v128_shl_n_byte(tmp, 6));
- partial5b = v128_add_16(partial5b, v128_shr_n_byte(tmp, 10));
- partial7a = v128_add_16(partial7a, v128_shl_n_byte(tmp, 8));
- partial7b = v128_add_16(partial7b, v128_shr_n_byte(tmp, 8));
- partial6 = v128_add_16(partial6, tmp);
-
- /* Partial sums for lines 6 and 7. */
- partial4a = v128_add_16(partial4a, v128_shl_n_byte(lines[6], 2));
- partial4b = v128_add_16(partial4b, v128_shr_n_byte(lines[6], 14));
- partial4a = v128_add_16(partial4a, lines[7]);
- tmp = v128_add_16(lines[6], lines[7]);
- partial5a = v128_add_16(partial5a, v128_shl_n_byte(tmp, 4));
- partial5b = v128_add_16(partial5b, v128_shr_n_byte(tmp, 12));
- partial7a = v128_add_16(partial7a, v128_shl_n_byte(tmp, 10));
- partial7b = v128_add_16(partial7b, v128_shr_n_byte(tmp, 6));
- partial6 = v128_add_16(partial6, tmp);
-
- /* Compute costs in terms of partial sums. */
- partial4a =
- fold_mul_and_sum(partial4a, partial4b, v128_from_32(210, 280, 420, 840),
- v128_from_32(105, 120, 140, 168));
- partial7a =
- fold_mul_and_sum(partial7a, partial7b, v128_from_32(210, 420, 0, 0),
- v128_from_32(105, 105, 105, 140));
- partial5a =
- fold_mul_and_sum(partial5a, partial5b, v128_from_32(210, 420, 0, 0),
- v128_from_32(105, 105, 105, 140));
- partial6 = v128_madd_s16(partial6, partial6);
- partial6 = v128_mullo_s32(partial6, v128_dup_32(105));
-
- partial4a = hsum4(partial4a, partial5a, partial6, partial7a);
- v128_store_unaligned(tmp_cost1, partial4a);
- return partial4a;
-}
-
-/* transpose and reverse the order of the lines -- equivalent to a 90-degree
- counter-clockwise rotation of the pixels. */
-static INLINE void array_reverse_transpose_8x8(v128 *in, v128 *res) {
- const v128 tr0_0 = v128_ziplo_16(in[1], in[0]);
- const v128 tr0_1 = v128_ziplo_16(in[3], in[2]);
- const v128 tr0_2 = v128_ziphi_16(in[1], in[0]);
- const v128 tr0_3 = v128_ziphi_16(in[3], in[2]);
- const v128 tr0_4 = v128_ziplo_16(in[5], in[4]);
- const v128 tr0_5 = v128_ziplo_16(in[7], in[6]);
- const v128 tr0_6 = v128_ziphi_16(in[5], in[4]);
- const v128 tr0_7 = v128_ziphi_16(in[7], in[6]);
-
- const v128 tr1_0 = v128_ziplo_32(tr0_1, tr0_0);
- const v128 tr1_1 = v128_ziplo_32(tr0_5, tr0_4);
- const v128 tr1_2 = v128_ziphi_32(tr0_1, tr0_0);
- const v128 tr1_3 = v128_ziphi_32(tr0_5, tr0_4);
- const v128 tr1_4 = v128_ziplo_32(tr0_3, tr0_2);
- const v128 tr1_5 = v128_ziplo_32(tr0_7, tr0_6);
- const v128 tr1_6 = v128_ziphi_32(tr0_3, tr0_2);
- const v128 tr1_7 = v128_ziphi_32(tr0_7, tr0_6);
-
- res[7] = v128_ziplo_64(tr1_1, tr1_0);
- res[6] = v128_ziphi_64(tr1_1, tr1_0);
- res[5] = v128_ziplo_64(tr1_3, tr1_2);
- res[4] = v128_ziphi_64(tr1_3, tr1_2);
- res[3] = v128_ziplo_64(tr1_5, tr1_4);
- res[2] = v128_ziphi_64(tr1_5, tr1_4);
- res[1] = v128_ziplo_64(tr1_7, tr1_6);
- res[0] = v128_ziphi_64(tr1_7, tr1_6);
-}
-
-int SIMD_FUNC(cdef_find_dir)(const uint16_t *img, int stride, int32_t *var,
- int coeff_shift) {
- int i;
- int32_t cost[8];
- int32_t best_cost = 0;
- int best_dir = 0;
- v128 lines[8];
- for (i = 0; i < 8; i++) {
- lines[i] = v128_load_unaligned(&img[i * stride]);
- lines[i] =
- v128_sub_16(v128_shr_s16(lines[i], coeff_shift), v128_dup_16(128));
- }
-
- /* Compute "mostly vertical" directions. */
- v128 dir47 = compute_directions(lines, cost + 4);
-
- array_reverse_transpose_8x8(lines, lines);
-
- /* Compute "mostly horizontal" directions. */
- v128 dir03 = compute_directions(lines, cost);
-
- v128 max = v128_max_s32(dir03, dir47);
- max = v128_max_s32(max, v128_align(max, max, 8));
- max = v128_max_s32(max, v128_align(max, max, 4));
- best_cost = v128_low_u32(max);
- v128 t =
- v128_pack_s32_s16(v128_cmpeq_32(max, dir47), v128_cmpeq_32(max, dir03));
- best_dir = v128_movemask_8(v128_pack_s16_s8(t, t));
- best_dir = get_msb(best_dir ^ (best_dir - 1)); // Count trailing zeros
-
- /* Difference between the optimal variance and the variance along the
- orthogonal direction. Again, the sum(x^2) terms cancel out. */
- *var = best_cost - cost[(best_dir + 4) & 7];
- /* We'd normally divide by 840, but dividing by 1024 is close enough
- for what we're going to do with this. */
- *var >>= 10;
- return best_dir;
-}
-
-// sign(a-b) * min(abs(a-b), max(0, threshold - (abs(a-b) >> adjdamp)))
-SIMD_INLINE v256 constrain16(v256 a, v256 b, unsigned int threshold,
- unsigned int adjdamp) {
- v256 diff = v256_sub_16(a, b);
- const v256 sign = v256_shr_n_s16(diff, 15);
- diff = v256_abs_s16(diff);
- const v256 s =
- v256_ssub_u16(v256_dup_16(threshold), v256_shr_u16(diff, adjdamp));
- return v256_xor(v256_add_16(sign, v256_min_s16(diff, s)), sign);
-}
-
-// sign(a - b) * min(abs(a - b), max(0, strength - (abs(a - b) >> adjdamp)))
-SIMD_INLINE v128 constrain(v256 a, v256 b, unsigned int strength,
- unsigned int adjdamp) {
- const v256 diff16 = v256_sub_16(a, b);
- v128 diff = v128_pack_s16_s8(v256_high_v128(diff16), v256_low_v128(diff16));
- const v128 sign = v128_cmplt_s8(diff, v128_zero());
- diff = v128_abs_s8(diff);
- return v128_xor(
- v128_add_8(sign,
- v128_min_u8(diff, v128_ssub_u8(v128_dup_8(strength),
- v128_shr_u8(diff, adjdamp)))),
- sign);
-}
-
-void SIMD_FUNC(cdef_filter_block_4x4_8)(uint8_t *dst, int dstride,
- const uint16_t *in, int pri_strength,
- int sec_strength, int dir,
- int pri_damping, int sec_damping,
- AOM_UNUSED int max_unused,
- int coeff_shift) {
- v128 p0, p1, p2, p3;
- v256 sum, row, tap, res;
- v256 max, min, large = v256_dup_16(CDEF_VERY_LARGE);
- int po1 = cdef_directions[dir][0];
- int po2 = cdef_directions[dir][1];
- int s1o1 = cdef_directions[(dir + 2) & 7][0];
- int s1o2 = cdef_directions[(dir + 2) & 7][1];
- int s2o1 = cdef_directions[(dir + 6) & 7][0];
- int s2o2 = cdef_directions[(dir + 6) & 7][1];
-
- const int *pri_taps = cdef_pri_taps[(pri_strength >> coeff_shift) & 1];
- const int *sec_taps = cdef_sec_taps[(pri_strength >> coeff_shift) & 1];
-
- if (pri_strength)
- pri_damping = AOMMAX(0, pri_damping - get_msb(pri_strength));
- if (sec_strength)
- sec_damping = AOMMAX(0, sec_damping - get_msb(sec_strength));
-
- sum = v256_zero();
- row = v256_from_v64(v64_load_aligned(&in[0 * CDEF_BSTRIDE]),
- v64_load_aligned(&in[1 * CDEF_BSTRIDE]),
- v64_load_aligned(&in[2 * CDEF_BSTRIDE]),
- v64_load_aligned(&in[3 * CDEF_BSTRIDE]));
- max = min = row;
-
- if (pri_strength) {
- // Primary near taps
- tap = v256_from_v64(v64_load_unaligned(&in[0 * CDEF_BSTRIDE + po1]),
- v64_load_unaligned(&in[1 * CDEF_BSTRIDE + po1]),
- v64_load_unaligned(&in[2 * CDEF_BSTRIDE + po1]),
- v64_load_unaligned(&in[3 * CDEF_BSTRIDE + po1]));
- max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
- min = v256_min_s16(min, tap);
- p0 = constrain(tap, row, pri_strength, pri_damping);
- tap = v256_from_v64(v64_load_unaligned(&in[0 * CDEF_BSTRIDE - po1]),
- v64_load_unaligned(&in[1 * CDEF_BSTRIDE - po1]),
- v64_load_unaligned(&in[2 * CDEF_BSTRIDE - po1]),
- v64_load_unaligned(&in[3 * CDEF_BSTRIDE - po1]));
- max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
- min = v256_min_s16(min, tap);
- p1 = constrain(tap, row, pri_strength, pri_damping);
-
- // sum += pri_taps[0] * (p0 + p1)
- sum = v256_add_16(sum, v256_madd_us8(v256_dup_8(pri_taps[0]),
- v256_from_v128(v128_ziphi_8(p0, p1),
- v128_ziplo_8(p0, p1))));
-
- // Primary far taps
- tap = v256_from_v64(v64_load_unaligned(&in[0 * CDEF_BSTRIDE + po2]),
- v64_load_unaligned(&in[1 * CDEF_BSTRIDE + po2]),
- v64_load_unaligned(&in[2 * CDEF_BSTRIDE + po2]),
- v64_load_unaligned(&in[3 * CDEF_BSTRIDE + po2]));
- max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
- min = v256_min_s16(min, tap);
- p0 = constrain(tap, row, pri_strength, pri_damping);
- tap = v256_from_v64(v64_load_unaligned(&in[0 * CDEF_BSTRIDE - po2]),
- v64_load_unaligned(&in[1 * CDEF_BSTRIDE - po2]),
- v64_load_unaligned(&in[2 * CDEF_BSTRIDE - po2]),
- v64_load_unaligned(&in[3 * CDEF_BSTRIDE - po2]));
- max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
- min = v256_min_s16(min, tap);
- p1 = constrain(tap, row, pri_strength, pri_damping);
-
- // sum += pri_taps[1] * (p0 + p1)
- sum = v256_add_16(sum, v256_madd_us8(v256_dup_8(pri_taps[1]),
- v256_from_v128(v128_ziphi_8(p0, p1),
- v128_ziplo_8(p0, p1))));
- }
-
- if (sec_strength) {
- // Secondary near taps
- tap = v256_from_v64(v64_load_unaligned(&in[0 * CDEF_BSTRIDE + s1o1]),
- v64_load_unaligned(&in[1 * CDEF_BSTRIDE + s1o1]),
- v64_load_unaligned(&in[2 * CDEF_BSTRIDE + s1o1]),
- v64_load_unaligned(&in[3 * CDEF_BSTRIDE + s1o1]));
- max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
- min = v256_min_s16(min, tap);
- p0 = constrain(tap, row, sec_strength, sec_damping);
- tap = v256_from_v64(v64_load_unaligned(&in[0 * CDEF_BSTRIDE - s1o1]),
- v64_load_unaligned(&in[1 * CDEF_BSTRIDE - s1o1]),
- v64_load_unaligned(&in[2 * CDEF_BSTRIDE - s1o1]),
- v64_load_unaligned(&in[3 * CDEF_BSTRIDE - s1o1]));
- max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
- min = v256_min_s16(min, tap);
- p1 = constrain(tap, row, sec_strength, sec_damping);
- tap = v256_from_v64(v64_load_unaligned(&in[0 * CDEF_BSTRIDE + s2o1]),
- v64_load_unaligned(&in[1 * CDEF_BSTRIDE + s2o1]),
- v64_load_unaligned(&in[2 * CDEF_BSTRIDE + s2o1]),
- v64_load_unaligned(&in[3 * CDEF_BSTRIDE + s2o1]));
- max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
- min = v256_min_s16(min, tap);
- p2 = constrain(tap, row, sec_strength, sec_damping);
- tap = v256_from_v64(v64_load_unaligned(&in[0 * CDEF_BSTRIDE - s2o1]),
- v64_load_unaligned(&in[1 * CDEF_BSTRIDE - s2o1]),
- v64_load_unaligned(&in[2 * CDEF_BSTRIDE - s2o1]),
- v64_load_unaligned(&in[3 * CDEF_BSTRIDE - s2o1]));
- max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
- min = v256_min_s16(min, tap);
- p3 = constrain(tap, row, sec_strength, sec_damping);
-
- // sum += sec_taps[0] * (p0 + p1 + p2 + p3)
- p0 = v128_add_8(p0, p1);
- p2 = v128_add_8(p2, p3);
- sum = v256_add_16(sum, v256_madd_us8(v256_dup_8(sec_taps[0]),
- v256_from_v128(v128_ziphi_8(p0, p2),
- v128_ziplo_8(p0, p2))));
-
- // Secondary far taps
- tap = v256_from_v64(v64_load_unaligned(&in[0 * CDEF_BSTRIDE + s1o2]),
- v64_load_unaligned(&in[1 * CDEF_BSTRIDE + s1o2]),
- v64_load_unaligned(&in[2 * CDEF_BSTRIDE + s1o2]),
- v64_load_unaligned(&in[3 * CDEF_BSTRIDE + s1o2]));
- max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
- min = v256_min_s16(min, tap);
- p0 = constrain(tap, row, sec_strength, sec_damping);
- tap = v256_from_v64(v64_load_unaligned(&in[0 * CDEF_BSTRIDE - s1o2]),
- v64_load_unaligned(&in[1 * CDEF_BSTRIDE - s1o2]),
- v64_load_unaligned(&in[2 * CDEF_BSTRIDE - s1o2]),
- v64_load_unaligned(&in[3 * CDEF_BSTRIDE - s1o2]));
- max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
- min = v256_min_s16(min, tap);
- p1 = constrain(tap, row, sec_strength, sec_damping);
- tap = v256_from_v64(v64_load_unaligned(&in[0 * CDEF_BSTRIDE + s2o2]),
- v64_load_unaligned(&in[1 * CDEF_BSTRIDE + s2o2]),
- v64_load_unaligned(&in[2 * CDEF_BSTRIDE + s2o2]),
- v64_load_unaligned(&in[3 * CDEF_BSTRIDE + s2o2]));
- max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
- min = v256_min_s16(min, tap);
- p2 = constrain(tap, row, sec_strength, sec_damping);
- tap = v256_from_v64(v64_load_unaligned(&in[0 * CDEF_BSTRIDE - s2o2]),
- v64_load_unaligned(&in[1 * CDEF_BSTRIDE - s2o2]),
- v64_load_unaligned(&in[2 * CDEF_BSTRIDE - s2o2]),
- v64_load_unaligned(&in[3 * CDEF_BSTRIDE - s2o2]));
- max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
- min = v256_min_s16(min, tap);
- p3 = constrain(tap, row, sec_strength, sec_damping);
-
- // sum += sec_taps[1] * (p0 + p1 + p2 + p3)
- p0 = v128_add_8(p0, p1);
- p2 = v128_add_8(p2, p3);
-
- sum = v256_add_16(sum, v256_madd_us8(v256_dup_8(sec_taps[1]),
- v256_from_v128(v128_ziphi_8(p0, p2),
- v128_ziplo_8(p0, p2))));
- }
-
- // res = row + ((sum - (sum < 0) + 8) >> 4)
- sum = v256_add_16(sum, v256_cmplt_s16(sum, v256_zero()));
- res = v256_add_16(sum, v256_dup_16(8));
- res = v256_shr_n_s16(res, 4);
- res = v256_add_16(row, res);
- res = v256_min_s16(v256_max_s16(res, min), max);
- res = v256_pack_s16_u8(res, res);
-
- p0 = v256_low_v128(res);
- u32_store_aligned(&dst[0 * dstride], v64_high_u32(v128_high_v64(p0)));
- u32_store_aligned(&dst[1 * dstride], v64_low_u32(v128_high_v64(p0)));
- u32_store_aligned(&dst[2 * dstride], v64_high_u32(v128_low_v64(p0)));
- u32_store_aligned(&dst[3 * dstride], v64_low_u32(v128_low_v64(p0)));
-}
-
-void SIMD_FUNC(cdef_filter_block_8x8_8)(uint8_t *dst, int dstride,
- const uint16_t *in, int pri_strength,
- int sec_strength, int dir,
- int pri_damping, int sec_damping,
- AOM_UNUSED int max_unused,
- int coeff_shift) {
- int i;
- v128 p0, p1, p2, p3;
- v256 sum, row, res, tap;
- v256 max, min, large = v256_dup_16(CDEF_VERY_LARGE);
- int po1 = cdef_directions[dir][0];
- int po2 = cdef_directions[dir][1];
- int s1o1 = cdef_directions[(dir + 2) & 7][0];
- int s1o2 = cdef_directions[(dir + 2) & 7][1];
- int s2o1 = cdef_directions[(dir + 6) & 7][0];
- int s2o2 = cdef_directions[(dir + 6) & 7][1];
-
- const int *pri_taps = cdef_pri_taps[(pri_strength >> coeff_shift) & 1];
- const int *sec_taps = cdef_sec_taps[(pri_strength >> coeff_shift) & 1];
-
- if (pri_strength)
- pri_damping = AOMMAX(0, pri_damping - get_msb(pri_strength));
- if (sec_strength)
- sec_damping = AOMMAX(0, sec_damping - get_msb(sec_strength));
- for (i = 0; i < 8; i += 2) {
- sum = v256_zero();
- row = v256_from_v128(v128_load_aligned(&in[i * CDEF_BSTRIDE]),
- v128_load_aligned(&in[(i + 1) * CDEF_BSTRIDE]));
-
- max = min = row;
- // Primary near taps
- tap =
- v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE + po1]),
- v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + po1]));
- max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
- min = v256_min_s16(min, tap);
- p0 = constrain(tap, row, pri_strength, pri_damping);
- tap =
- v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE - po1]),
- v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - po1]));
- max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
- min = v256_min_s16(min, tap);
- p1 = constrain(tap, row, pri_strength, pri_damping);
-
- // sum += pri_taps[0] * (p0 + p1)
- sum = v256_add_16(sum, v256_madd_us8(v256_dup_8(pri_taps[0]),
- v256_from_v128(v128_ziphi_8(p0, p1),
- v128_ziplo_8(p0, p1))));
-
- // Primary far taps
- tap =
- v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE + po2]),
- v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + po2]));
- max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
- min = v256_min_s16(min, tap);
- p0 = constrain(tap, row, pri_strength, pri_damping);
- tap =
- v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE - po2]),
- v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - po2]));
- max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
- min = v256_min_s16(min, tap);
- p1 = constrain(tap, row, pri_strength, pri_damping);
-
- // sum += pri_taps[1] * (p0 + p1)
- sum = v256_add_16(sum, v256_madd_us8(v256_dup_8(pri_taps[1]),
- v256_from_v128(v128_ziphi_8(p0, p1),
- v128_ziplo_8(p0, p1))));
-
- // Secondary near taps
- tap =
- v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE + s1o1]),
- v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + s1o1]));
- max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
- min = v256_min_s16(min, tap);
- p0 = constrain(tap, row, sec_strength, sec_damping);
- tap =
- v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE - s1o1]),
- v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - s1o1]));
- max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
- min = v256_min_s16(min, tap);
- p1 = constrain(tap, row, sec_strength, sec_damping);
- tap =
- v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE + s2o1]),
- v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + s2o1]));
- max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
- min = v256_min_s16(min, tap);
- p2 = constrain(tap, row, sec_strength, sec_damping);
- tap =
- v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE - s2o1]),
- v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - s2o1]));
- max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
- min = v256_min_s16(min, tap);
- p3 = constrain(tap, row, sec_strength, sec_damping);
-
- // sum += sec_taps[0] * (p0 + p1 + p2 + p3)
- p0 = v128_add_8(p0, p1);
- p2 = v128_add_8(p2, p3);
- sum = v256_add_16(sum, v256_madd_us8(v256_dup_8(sec_taps[0]),
- v256_from_v128(v128_ziphi_8(p0, p2),
- v128_ziplo_8(p0, p2))));
-
- // Secondary far taps
- tap =
- v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE + s1o2]),
- v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + s1o2]));
- max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
- min = v256_min_s16(min, tap);
- p0 = constrain(tap, row, sec_strength, sec_damping);
- tap =
- v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE - s1o2]),
- v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - s1o2]));
- max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
- min = v256_min_s16(min, tap);
- p1 = constrain(tap, row, sec_strength, sec_damping);
- tap =
- v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE + s2o2]),
- v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + s2o2]));
- max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
- min = v256_min_s16(min, tap);
- p2 = constrain(tap, row, sec_strength, sec_damping);
- tap =
- v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE - s2o2]),
- v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - s2o2]));
- max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
- min = v256_min_s16(min, tap);
- p3 = constrain(tap, row, sec_strength, sec_damping);
-
- // sum += sec_taps[1] * (p0 + p1 + p2 + p3)
- p0 = v128_add_8(p0, p1);
- p2 = v128_add_8(p2, p3);
- sum = v256_add_16(sum, v256_madd_us8(v256_dup_8(sec_taps[1]),
- v256_from_v128(v128_ziphi_8(p0, p2),
- v128_ziplo_8(p0, p2))));
-
- // res = row + ((sum - (sum < 0) + 8) >> 4)
- sum = v256_add_16(sum, v256_cmplt_s16(sum, v256_zero()));
- res = v256_add_16(sum, v256_dup_16(8));
- res = v256_shr_n_s16(res, 4);
- res = v256_add_16(row, res);
- res = v256_min_s16(v256_max_s16(res, min), max);
- res = v256_pack_s16_u8(res, res);
-
- p0 = v256_low_v128(res);
- v64_store_aligned(&dst[i * dstride], v128_high_v64(p0));
- v64_store_aligned(&dst[(i + 1) * dstride], v128_low_v64(p0));
- }
-}
-
-void SIMD_FUNC(cdef_filter_block_4x4_16)(uint16_t *dst, int dstride,
- const uint16_t *in, int pri_strength,
- int sec_strength, int dir,
- int pri_damping, int sec_damping,
- AOM_UNUSED int max_unused,
- int coeff_shift) {
- int i;
- v256 p0, p1, p2, p3, sum, row, res;
- v256 max, min, large = v256_dup_16(CDEF_VERY_LARGE);
- int po1 = cdef_directions[dir][0];
- int po2 = cdef_directions[dir][1];
- int s1o1 = cdef_directions[(dir + 2) & 7][0];
- int s1o2 = cdef_directions[(dir + 2) & 7][1];
- int s2o1 = cdef_directions[(dir + 6) & 7][0];
- int s2o2 = cdef_directions[(dir + 6) & 7][1];
-
- const int *pri_taps = cdef_pri_taps[(pri_strength >> coeff_shift) & 1];
- const int *sec_taps = cdef_sec_taps[(pri_strength >> coeff_shift) & 1];
-
- if (pri_strength)
- pri_damping = AOMMAX(0, pri_damping - get_msb(pri_strength));
- if (sec_strength)
- sec_damping = AOMMAX(0, sec_damping - get_msb(sec_strength));
- for (i = 0; i < 4; i += 4) {
- sum = v256_zero();
- row = v256_from_v64(v64_load_aligned(&in[i * CDEF_BSTRIDE]),
- v64_load_aligned(&in[(i + 1) * CDEF_BSTRIDE]),
- v64_load_aligned(&in[(i + 2) * CDEF_BSTRIDE]),
- v64_load_aligned(&in[(i + 3) * CDEF_BSTRIDE]));
- min = max = row;
-
- // Primary near taps
- p0 = v256_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE + po1]),
- v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + po1]),
- v64_load_unaligned(&in[(i + 2) * CDEF_BSTRIDE + po1]),
- v64_load_unaligned(&in[(i + 3) * CDEF_BSTRIDE + po1]));
- p1 = v256_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE - po1]),
- v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - po1]),
- v64_load_unaligned(&in[(i + 2) * CDEF_BSTRIDE - po1]),
- v64_load_unaligned(&in[(i + 3) * CDEF_BSTRIDE - po1]));
- max =
- v256_max_s16(v256_max_s16(max, v256_andn(p0, v256_cmpeq_16(p0, large))),
- v256_andn(p1, v256_cmpeq_16(p1, large)));
- min = v256_min_s16(v256_min_s16(min, p0), p1);
- p0 = constrain16(p0, row, pri_strength, pri_damping);
- p1 = constrain16(p1, row, pri_strength, pri_damping);
-
- // sum += pri_taps[0] * (p0 + p1)
- sum = v256_add_16(
- sum, v256_mullo_s16(v256_dup_16(pri_taps[0]), v256_add_16(p0, p1)));
-
- // Primary far taps
- p0 = v256_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE + po2]),
- v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + po2]),
- v64_load_unaligned(&in[(i + 2) * CDEF_BSTRIDE + po2]),
- v64_load_unaligned(&in[(i + 3) * CDEF_BSTRIDE + po2]));
- p1 = v256_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE - po2]),
- v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - po2]),
- v64_load_unaligned(&in[(i + 2) * CDEF_BSTRIDE - po2]),
- v64_load_unaligned(&in[(i + 3) * CDEF_BSTRIDE - po2]));
- max =
- v256_max_s16(v256_max_s16(max, v256_andn(p0, v256_cmpeq_16(p0, large))),
- v256_andn(p1, v256_cmpeq_16(p1, large)));
- min = v256_min_s16(v256_min_s16(min, p0), p1);
- p0 = constrain16(p0, row, pri_strength, pri_damping);
- p1 = constrain16(p1, row, pri_strength, pri_damping);
-
- // sum += pri_taps[1] * (p0 + p1)
- sum = v256_add_16(
- sum, v256_mullo_s16(v256_dup_16(pri_taps[1]), v256_add_16(p0, p1)));
-
- // Secondary near taps
- p0 = v256_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE + s1o1]),
- v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + s1o1]),
- v64_load_unaligned(&in[(i + 2) * CDEF_BSTRIDE + s1o1]),
- v64_load_unaligned(&in[(i + 3) * CDEF_BSTRIDE + s1o1]));
- p1 = v256_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE - s1o1]),
- v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - s1o1]),
- v64_load_unaligned(&in[(i + 2) * CDEF_BSTRIDE - s1o1]),
- v64_load_unaligned(&in[(i + 3) * CDEF_BSTRIDE - s1o1]));
- p2 = v256_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE + s2o1]),
- v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + s2o1]),
- v64_load_unaligned(&in[(i + 2) * CDEF_BSTRIDE + s2o1]),
- v64_load_unaligned(&in[(i + 3) * CDEF_BSTRIDE + s2o1]));
- p3 = v256_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE - s2o1]),
- v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - s2o1]),
- v64_load_unaligned(&in[(i + 2) * CDEF_BSTRIDE - s2o1]),
- v64_load_unaligned(&in[(i + 3) * CDEF_BSTRIDE - s2o1]));
- max =
- v256_max_s16(v256_max_s16(max, v256_andn(p0, v256_cmpeq_16(p0, large))),
- v256_andn(p1, v256_cmpeq_16(p1, large)));
- max =
- v256_max_s16(v256_max_s16(max, v256_andn(p2, v256_cmpeq_16(p2, large))),
- v256_andn(p3, v256_cmpeq_16(p3, large)));
- min = v256_min_s16(
- v256_min_s16(v256_min_s16(v256_min_s16(min, p0), p1), p2), p3);
- p0 = constrain16(p0, row, sec_strength, sec_damping);
- p1 = constrain16(p1, row, sec_strength, sec_damping);
- p2 = constrain16(p2, row, sec_strength, sec_damping);
- p3 = constrain16(p3, row, sec_strength, sec_damping);
-
- // sum += sec_taps[0] * (p0 + p1 + p2 + p3)
- sum = v256_add_16(sum, v256_mullo_s16(v256_dup_16(sec_taps[0]),
- v256_add_16(v256_add_16(p0, p1),
- v256_add_16(p2, p3))));
-
- // Secondary far taps
- p0 = v256_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE + s1o2]),
- v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + s1o2]),
- v64_load_unaligned(&in[(i + 2) * CDEF_BSTRIDE + s1o2]),
- v64_load_unaligned(&in[(i + 3) * CDEF_BSTRIDE + s1o2]));
- p1 = v256_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE - s1o2]),
- v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - s1o2]),
- v64_load_unaligned(&in[(i + 2) * CDEF_BSTRIDE - s1o2]),
- v64_load_unaligned(&in[(i + 3) * CDEF_BSTRIDE - s1o2]));
- p2 = v256_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE + s2o2]),
- v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + s2o2]),
- v64_load_unaligned(&in[(i + 2) * CDEF_BSTRIDE + s2o2]),
- v64_load_unaligned(&in[(i + 3) * CDEF_BSTRIDE + s2o2]));
- p3 = v256_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE - s2o2]),
- v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - s2o2]),
- v64_load_unaligned(&in[(i + 2) * CDEF_BSTRIDE - s2o2]),
- v64_load_unaligned(&in[(i + 3) * CDEF_BSTRIDE - s2o2]));
- max =
- v256_max_s16(v256_max_s16(max, v256_andn(p0, v256_cmpeq_16(p0, large))),
- v256_andn(p1, v256_cmpeq_16(p1, large)));
- max =
- v256_max_s16(v256_max_s16(max, v256_andn(p2, v256_cmpeq_16(p2, large))),
- v256_andn(p3, v256_cmpeq_16(p3, large)));
- min = v256_min_s16(
- v256_min_s16(v256_min_s16(v256_min_s16(min, p0), p1), p2), p3);
- p0 = constrain16(p0, row, sec_strength, sec_damping);
- p1 = constrain16(p1, row, sec_strength, sec_damping);
- p2 = constrain16(p2, row, sec_strength, sec_damping);
- p3 = constrain16(p3, row, sec_strength, sec_damping);
-
- // sum += sec_taps[1] * (p0 + p1 + p2 + p3)
- sum = v256_add_16(sum, v256_mullo_s16(v256_dup_16(sec_taps[1]),
- v256_add_16(v256_add_16(p0, p1),
- v256_add_16(p2, p3))));
-
- // res = row + ((sum - (sum < 0) + 8) >> 4)
- sum = v256_add_16(sum, v256_cmplt_s16(sum, v256_zero()));
- res = v256_add_16(sum, v256_dup_16(8));
- res = v256_shr_n_s16(res, 4);
- res = v256_add_16(row, res);
- res = v256_min_s16(v256_max_s16(res, min), max);
-
- v64_store_aligned(&dst[i * dstride], v128_high_v64(v256_high_v128(res)));
- v64_store_aligned(&dst[(i + 1) * dstride],
- v128_low_v64(v256_high_v128(res)));
- v64_store_aligned(&dst[(i + 2) * dstride],
- v128_high_v64(v256_low_v128(res)));
- v64_store_aligned(&dst[(i + 3) * dstride],
- v128_low_v64(v256_low_v128(res)));
- }
-}
-
-void SIMD_FUNC(cdef_filter_block_8x8_16)(uint16_t *dst, int dstride,
- const uint16_t *in, int pri_strength,
- int sec_strength, int dir,
- int pri_damping, int sec_damping,
- AOM_UNUSED int max_unused,
- int coeff_shift) {
- int i;
- v256 sum, p0, p1, p2, p3, row, res;
- v256 max, min, large = v256_dup_16(CDEF_VERY_LARGE);
- int po1 = cdef_directions[dir][0];
- int po2 = cdef_directions[dir][1];
- int s1o1 = cdef_directions[(dir + 2) & 7][0];
- int s1o2 = cdef_directions[(dir + 2) & 7][1];
- int s2o1 = cdef_directions[(dir + 6) & 7][0];
- int s2o2 = cdef_directions[(dir + 6) & 7][1];
-
- const int *pri_taps = cdef_pri_taps[(pri_strength >> coeff_shift) & 1];
- const int *sec_taps = cdef_sec_taps[(pri_strength >> coeff_shift) & 1];
-
- if (pri_strength)
- pri_damping = AOMMAX(0, pri_damping - get_msb(pri_strength));
- if (sec_strength)
- sec_damping = AOMMAX(0, sec_damping - get_msb(sec_strength));
-
- for (i = 0; i < 8; i += 2) {
- sum = v256_zero();
- row = v256_from_v128(v128_load_aligned(&in[i * CDEF_BSTRIDE]),
- v128_load_aligned(&in[(i + 1) * CDEF_BSTRIDE]));
-
- min = max = row;
- // Primary near taps
- p0 = v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE + po1]),
- v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + po1]));
- p1 = v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE - po1]),
- v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - po1]));
- max =
- v256_max_s16(v256_max_s16(max, v256_andn(p0, v256_cmpeq_16(p0, large))),
- v256_andn(p1, v256_cmpeq_16(p1, large)));
- min = v256_min_s16(v256_min_s16(min, p0), p1);
- p0 = constrain16(p0, row, pri_strength, pri_damping);
- p1 = constrain16(p1, row, pri_strength, pri_damping);
-
- // sum += pri_taps[0] * (p0 + p1)
- sum = v256_add_16(
- sum, v256_mullo_s16(v256_dup_16(pri_taps[0]), v256_add_16(p0, p1)));
-
- // Primary far taps
- p0 = v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE + po2]),
- v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + po2]));
- p1 = v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE - po2]),
- v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - po2]));
- max =
- v256_max_s16(v256_max_s16(max, v256_andn(p0, v256_cmpeq_16(p0, large))),
- v256_andn(p1, v256_cmpeq_16(p1, large)));
- min = v256_min_s16(v256_min_s16(min, p0), p1);
- p0 = constrain16(p0, row, pri_strength, pri_damping);
- p1 = constrain16(p1, row, pri_strength, pri_damping);
-
- // sum += pri_taps[1] * (p0 + p1)
- sum = v256_add_16(
- sum, v256_mullo_s16(v256_dup_16(pri_taps[1]), v256_add_16(p0, p1)));
-
- // Secondary near taps
- p0 =
- v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE + s1o1]),
- v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + s1o1]));
- p1 =
- v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE - s1o1]),
- v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - s1o1]));
- p2 =
- v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE + s2o1]),
- v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + s2o1]));
- p3 =
- v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE - s2o1]),
- v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - s2o1]));
- max =
- v256_max_s16(v256_max_s16(max, v256_andn(p0, v256_cmpeq_16(p0, large))),
- v256_andn(p1, v256_cmpeq_16(p1, large)));
- max =
- v256_max_s16(v256_max_s16(max, v256_andn(p2, v256_cmpeq_16(p2, large))),
- v256_andn(p3, v256_cmpeq_16(p3, large)));
- min = v256_min_s16(
- v256_min_s16(v256_min_s16(v256_min_s16(min, p0), p1), p2), p3);
- p0 = constrain16(p0, row, sec_strength, sec_damping);
- p1 = constrain16(p1, row, sec_strength, sec_damping);
- p2 = constrain16(p2, row, sec_strength, sec_damping);
- p3 = constrain16(p3, row, sec_strength, sec_damping);
-
- // sum += sec_taps[0] * (p0 + p1 + p2 + p3)
- sum = v256_add_16(sum, v256_mullo_s16(v256_dup_16(sec_taps[0]),
- v256_add_16(v256_add_16(p0, p1),
- v256_add_16(p2, p3))));
-
- // Secondary far taps
- p0 =
- v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE + s1o2]),
- v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + s1o2]));
- p1 =
- v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE - s1o2]),
- v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - s1o2]));
- p2 =
- v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE + s2o2]),
- v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + s2o2]));
- p3 =
- v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE - s2o2]),
- v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - s2o2]));
- max =
- v256_max_s16(v256_max_s16(max, v256_andn(p0, v256_cmpeq_16(p0, large))),
- v256_andn(p1, v256_cmpeq_16(p1, large)));
- max =
- v256_max_s16(v256_max_s16(max, v256_andn(p2, v256_cmpeq_16(p2, large))),
- v256_andn(p3, v256_cmpeq_16(p3, large)));
- min = v256_min_s16(
- v256_min_s16(v256_min_s16(v256_min_s16(min, p0), p1), p2), p3);
- p0 = constrain16(p0, row, sec_strength, sec_damping);
- p1 = constrain16(p1, row, sec_strength, sec_damping);
- p2 = constrain16(p2, row, sec_strength, sec_damping);
- p3 = constrain16(p3, row, sec_strength, sec_damping);
-
- // sum += sec_taps[1] * (p0 + p1 + p2 + p3)
- sum = v256_add_16(sum, v256_mullo_s16(v256_dup_16(sec_taps[1]),
- v256_add_16(v256_add_16(p0, p1),
- v256_add_16(p2, p3))));
-
- // res = row + ((sum - (sum < 0) + 8) >> 4)
- sum = v256_add_16(sum, v256_cmplt_s16(sum, v256_zero()));
- res = v256_add_16(sum, v256_dup_16(8));
- res = v256_shr_n_s16(res, 4);
- res = v256_add_16(row, res);
- res = v256_min_s16(v256_max_s16(res, min), max);
- v128_store_unaligned(&dst[i * dstride], v256_high_v128(res));
- v128_store_unaligned(&dst[(i + 1) * dstride], v256_low_v128(res));
- }
-}
-
-void SIMD_FUNC(cdef_filter_block)(uint8_t *dst8, uint16_t *dst16, int dstride,
- const uint16_t *in, int pri_strength,
- int sec_strength, int dir, int pri_damping,
- int sec_damping, int bsize, int max,
- int coeff_shift) {
- if (dst8) {
- if (bsize == BLOCK_8X8) {
- SIMD_FUNC(cdef_filter_block_8x8_8)
- (dst8, dstride, in, pri_strength, sec_strength, dir, pri_damping,
- sec_damping, max, coeff_shift);
- } else if (bsize == BLOCK_4X8) {
- SIMD_FUNC(cdef_filter_block_4x4_8)
- (dst8, dstride, in, pri_strength, sec_strength, dir, pri_damping,
- sec_damping, max, coeff_shift);
- SIMD_FUNC(cdef_filter_block_4x4_8)
- (dst8 + 4 * dstride, dstride, in + 4 * CDEF_BSTRIDE, pri_strength,
- sec_strength, dir, pri_damping, sec_damping, max, coeff_shift);
- } else if (bsize == BLOCK_8X4) {
- SIMD_FUNC(cdef_filter_block_4x4_8)
- (dst8, dstride, in, pri_strength, sec_strength, dir, pri_damping,
- sec_damping, max, coeff_shift);
- SIMD_FUNC(cdef_filter_block_4x4_8)
- (dst8 + 4, dstride, in + 4, pri_strength, sec_strength, dir, pri_damping,
- sec_damping, max, coeff_shift);
- } else {
- SIMD_FUNC(cdef_filter_block_4x4_8)
- (dst8, dstride, in, pri_strength, sec_strength, dir, pri_damping,
- sec_damping, max, coeff_shift);
- }
- } else {
- if (bsize == BLOCK_8X8) {
- SIMD_FUNC(cdef_filter_block_8x8_16)
- (dst16, dstride, in, pri_strength, sec_strength, dir, pri_damping,
- sec_damping, max, coeff_shift);
- } else if (bsize == BLOCK_4X8) {
- SIMD_FUNC(cdef_filter_block_4x4_16)
- (dst16, dstride, in, pri_strength, sec_strength, dir, pri_damping,
- sec_damping, max, coeff_shift);
- SIMD_FUNC(cdef_filter_block_4x4_16)
- (dst16 + 4 * dstride, dstride, in + 4 * CDEF_BSTRIDE, pri_strength,
- sec_strength, dir, pri_damping, sec_damping, max, coeff_shift);
- } else if (bsize == BLOCK_8X4) {
- SIMD_FUNC(cdef_filter_block_4x4_16)
- (dst16, dstride, in, pri_strength, sec_strength, dir, pri_damping,
- sec_damping, max, coeff_shift);
- SIMD_FUNC(cdef_filter_block_4x4_16)
- (dst16 + 4, dstride, in + 4, pri_strength, sec_strength, dir, pri_damping,
- sec_damping, max, coeff_shift);
- } else {
- assert(bsize == BLOCK_4X4);
- SIMD_FUNC(cdef_filter_block_4x4_16)
- (dst16, dstride, in, pri_strength, sec_strength, dir, pri_damping,
- sec_damping, max, coeff_shift);
- }
- }
-}
-
-void SIMD_FUNC(copy_rect8_8bit_to_16bit)(uint16_t *dst, int dstride,
- const uint8_t *src, int sstride, int v,
- int h) {
- int i, j;
- for (i = 0; i < v; i++) {
- for (j = 0; j < (h & ~0x7); j += 8) {
- v64 row = v64_load_unaligned(&src[i * sstride + j]);
- v128_store_unaligned(&dst[i * dstride + j], v128_unpack_u8_s16(row));
- }
- for (; j < h; j++) {
- dst[i * dstride + j] = src[i * sstride + j];
- }
- }
-}
-
-void SIMD_FUNC(copy_rect8_16bit_to_16bit)(uint16_t *dst, int dstride,
- const uint16_t *src, int sstride,
- int v, int h) {
- int i, j;
- for (i = 0; i < v; i++) {
- for (j = 0; j < (h & ~0x7); j += 8) {
- v128 row = v128_load_unaligned(&src[i * sstride + j]);
- v128_store_unaligned(&dst[i * dstride + j], row);
- }
- for (; j < h; j++) {
- dst[i * dstride + j] = src[i * sstride + j];
- }
- }
-}
-
-#endif // AOM_AV1_COMMON_CDEF_BLOCK_SIMD_H_
diff --git a/third_party/aom/av1/common/cdef_block_sse2.c b/third_party/aom/av1/common/cdef_block_sse2.c
deleted file mode 100644
index 73f115d17..000000000
--- a/third_party/aom/av1/common/cdef_block_sse2.c
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "aom_dsp/aom_simd.h"
-#define SIMD_FUNC(name) name##_sse2
-#include "av1/common/cdef_block_simd.h"
diff --git a/third_party/aom/av1/common/cdef_block_sse4.c b/third_party/aom/av1/common/cdef_block_sse4.c
deleted file mode 100644
index 349329af6..000000000
--- a/third_party/aom/av1/common/cdef_block_sse4.c
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "aom_dsp/aom_simd.h"
-#define SIMD_FUNC(name) name##_sse4_1
-#include "av1/common/cdef_block_simd.h"
diff --git a/third_party/aom/av1/common/cdef_block_ssse3.c b/third_party/aom/av1/common/cdef_block_ssse3.c
deleted file mode 100644
index 3a93b150f..000000000
--- a/third_party/aom/av1/common/cdef_block_ssse3.c
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "aom_dsp/aom_simd.h"
-#define SIMD_FUNC(name) name##_ssse3
-#include "av1/common/cdef_block_simd.h"
diff --git a/third_party/aom/av1/common/cfl.c b/third_party/aom/av1/common/cfl.c
deleted file mode 100644
index ccc59b4eb..000000000
--- a/third_party/aom/av1/common/cfl.c
+++ /dev/null
@@ -1,448 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "av1/common/cfl.h"
-#include "av1/common/common_data.h"
-#include "av1/common/onyxc_int.h"
-
-#include "config/av1_rtcd.h"
-
-void cfl_init(CFL_CTX *cfl, const SequenceHeader *seq_params) {
- assert(block_size_wide[CFL_MAX_BLOCK_SIZE] == CFL_BUF_LINE);
- assert(block_size_high[CFL_MAX_BLOCK_SIZE] == CFL_BUF_LINE);
-
- memset(&cfl->recon_buf_q3, 0, sizeof(cfl->recon_buf_q3));
- memset(&cfl->ac_buf_q3, 0, sizeof(cfl->ac_buf_q3));
- cfl->subsampling_x = seq_params->subsampling_x;
- cfl->subsampling_y = seq_params->subsampling_y;
- cfl->are_parameters_computed = 0;
- cfl->store_y = 0;
- // The DC_PRED cache is disabled by default and is only enabled in
- // cfl_rd_pick_alpha
- cfl->use_dc_pred_cache = 0;
- cfl->dc_pred_is_cached[CFL_PRED_U] = 0;
- cfl->dc_pred_is_cached[CFL_PRED_V] = 0;
-}
-
-void cfl_store_dc_pred(MACROBLOCKD *const xd, const uint8_t *input,
- CFL_PRED_TYPE pred_plane, int width) {
- assert(pred_plane < CFL_PRED_PLANES);
- assert(width <= CFL_BUF_LINE);
-
- if (get_bitdepth_data_path_index(xd)) {
- uint16_t *const input_16 = CONVERT_TO_SHORTPTR(input);
- memcpy(xd->cfl.dc_pred_cache[pred_plane], input_16, width << 1);
- return;
- }
-
- memcpy(xd->cfl.dc_pred_cache[pred_plane], input, width);
-}
-
-static void cfl_load_dc_pred_lbd(const int16_t *dc_pred_cache, uint8_t *dst,
- int dst_stride, int width, int height) {
- for (int j = 0; j < height; j++) {
- memcpy(dst, dc_pred_cache, width);
- dst += dst_stride;
- }
-}
-
-static void cfl_load_dc_pred_hbd(const int16_t *dc_pred_cache, uint16_t *dst,
- int dst_stride, int width, int height) {
- const size_t num_bytes = width << 1;
- for (int j = 0; j < height; j++) {
- memcpy(dst, dc_pred_cache, num_bytes);
- dst += dst_stride;
- }
-}
-void cfl_load_dc_pred(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride,
- TX_SIZE tx_size, CFL_PRED_TYPE pred_plane) {
- const int width = tx_size_wide[tx_size];
- const int height = tx_size_high[tx_size];
- assert(pred_plane < CFL_PRED_PLANES);
- assert(width <= CFL_BUF_LINE);
- assert(height <= CFL_BUF_LINE);
- if (get_bitdepth_data_path_index(xd)) {
- uint16_t *dst_16 = CONVERT_TO_SHORTPTR(dst);
- cfl_load_dc_pred_hbd(xd->cfl.dc_pred_cache[pred_plane], dst_16, dst_stride,
- width, height);
- return;
- }
- cfl_load_dc_pred_lbd(xd->cfl.dc_pred_cache[pred_plane], dst, dst_stride,
- width, height);
-}
-
-// Due to frame boundary issues, it is possible that the total area covered by
-// chroma exceeds that of luma. When this happens, we fill the missing pixels by
-// repeating the last columns and/or rows.
-static INLINE void cfl_pad(CFL_CTX *cfl, int width, int height) {
- const int diff_width = width - cfl->buf_width;
- const int diff_height = height - cfl->buf_height;
-
- if (diff_width > 0) {
- const int min_height = height - diff_height;
- uint16_t *recon_buf_q3 = cfl->recon_buf_q3 + (width - diff_width);
- for (int j = 0; j < min_height; j++) {
- const uint16_t last_pixel = recon_buf_q3[-1];
- assert(recon_buf_q3 + diff_width <= cfl->recon_buf_q3 + CFL_BUF_SQUARE);
- for (int i = 0; i < diff_width; i++) {
- recon_buf_q3[i] = last_pixel;
- }
- recon_buf_q3 += CFL_BUF_LINE;
- }
- cfl->buf_width = width;
- }
- if (diff_height > 0) {
- uint16_t *recon_buf_q3 =
- cfl->recon_buf_q3 + ((height - diff_height) * CFL_BUF_LINE);
- for (int j = 0; j < diff_height; j++) {
- const uint16_t *last_row_q3 = recon_buf_q3 - CFL_BUF_LINE;
- assert(recon_buf_q3 + width <= cfl->recon_buf_q3 + CFL_BUF_SQUARE);
- for (int i = 0; i < width; i++) {
- recon_buf_q3[i] = last_row_q3[i];
- }
- recon_buf_q3 += CFL_BUF_LINE;
- }
- cfl->buf_height = height;
- }
-}
-
-static void subtract_average_c(const uint16_t *src, int16_t *dst, int width,
- int height, int round_offset, int num_pel_log2) {
- int sum = round_offset;
- const uint16_t *recon = src;
- for (int j = 0; j < height; j++) {
- for (int i = 0; i < width; i++) {
- sum += recon[i];
- }
- recon += CFL_BUF_LINE;
- }
- const int avg = sum >> num_pel_log2;
- for (int j = 0; j < height; j++) {
- for (int i = 0; i < width; i++) {
- dst[i] = src[i] - avg;
- }
- src += CFL_BUF_LINE;
- dst += CFL_BUF_LINE;
- }
-}
-
-CFL_SUB_AVG_FN(c)
-
-static INLINE int cfl_idx_to_alpha(int alpha_idx, int joint_sign,
- CFL_PRED_TYPE pred_type) {
- const int alpha_sign = (pred_type == CFL_PRED_U) ? CFL_SIGN_U(joint_sign)
- : CFL_SIGN_V(joint_sign);
- if (alpha_sign == CFL_SIGN_ZERO) return 0;
- const int abs_alpha_q3 =
- (pred_type == CFL_PRED_U) ? CFL_IDX_U(alpha_idx) : CFL_IDX_V(alpha_idx);
- return (alpha_sign == CFL_SIGN_POS) ? abs_alpha_q3 + 1 : -abs_alpha_q3 - 1;
-}
-
-static INLINE void cfl_predict_lbd_c(const int16_t *ac_buf_q3, uint8_t *dst,
- int dst_stride, int alpha_q3, int width,
- int height) {
- for (int j = 0; j < height; j++) {
- for (int i = 0; i < width; i++) {
- dst[i] = clip_pixel(get_scaled_luma_q0(alpha_q3, ac_buf_q3[i]) + dst[i]);
- }
- dst += dst_stride;
- ac_buf_q3 += CFL_BUF_LINE;
- }
-}
-
-// Null function used for invalid tx_sizes
-void cfl_predict_lbd_null(const int16_t *ac_buf_q3, uint8_t *dst,
- int dst_stride, int alpha_q3) {
- (void)ac_buf_q3;
- (void)dst;
- (void)dst_stride;
- (void)alpha_q3;
- assert(0);
-}
-
-CFL_PREDICT_FN(c, lbd)
-
-void cfl_predict_hbd_c(const int16_t *ac_buf_q3, uint16_t *dst, int dst_stride,
- int alpha_q3, int bit_depth, int width, int height) {
- for (int j = 0; j < height; j++) {
- for (int i = 0; i < width; i++) {
- dst[i] = clip_pixel_highbd(
- get_scaled_luma_q0(alpha_q3, ac_buf_q3[i]) + dst[i], bit_depth);
- }
- dst += dst_stride;
- ac_buf_q3 += CFL_BUF_LINE;
- }
-}
-
-// Null function used for invalid tx_sizes
-void cfl_predict_hbd_null(const int16_t *ac_buf_q3, uint16_t *dst,
- int dst_stride, int alpha_q3, int bd) {
- (void)ac_buf_q3;
- (void)dst;
- (void)dst_stride;
- (void)alpha_q3;
- (void)bd;
- assert(0);
-}
-
-CFL_PREDICT_FN(c, hbd)
-
-static void cfl_compute_parameters(MACROBLOCKD *const xd, TX_SIZE tx_size) {
- CFL_CTX *const cfl = &xd->cfl;
- // Do not call cfl_compute_parameters multiple time on the same values.
- assert(cfl->are_parameters_computed == 0);
-
- cfl_pad(cfl, tx_size_wide[tx_size], tx_size_high[tx_size]);
- get_subtract_average_fn(tx_size)(cfl->recon_buf_q3, cfl->ac_buf_q3);
- cfl->are_parameters_computed = 1;
-}
-
-void cfl_predict_block(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride,
- TX_SIZE tx_size, int plane) {
- CFL_CTX *const cfl = &xd->cfl;
- MB_MODE_INFO *mbmi = xd->mi[0];
- assert(is_cfl_allowed(xd));
-
- if (!cfl->are_parameters_computed) cfl_compute_parameters(xd, tx_size);
-
- const int alpha_q3 =
- cfl_idx_to_alpha(mbmi->cfl_alpha_idx, mbmi->cfl_alpha_signs, plane - 1);
- assert((tx_size_high[tx_size] - 1) * CFL_BUF_LINE + tx_size_wide[tx_size] <=
- CFL_BUF_SQUARE);
- if (get_bitdepth_data_path_index(xd)) {
- uint16_t *dst_16 = CONVERT_TO_SHORTPTR(dst);
- get_predict_hbd_fn(tx_size)(cfl->ac_buf_q3, dst_16, dst_stride, alpha_q3,
- xd->bd);
- return;
- }
- get_predict_lbd_fn(tx_size)(cfl->ac_buf_q3, dst, dst_stride, alpha_q3);
-}
-
-// Null function used for invalid tx_sizes
-void cfl_subsample_lbd_null(const uint8_t *input, int input_stride,
- uint16_t *output_q3) {
- (void)input;
- (void)input_stride;
- (void)output_q3;
- assert(0);
-}
-
-// Null function used for invalid tx_sizes
-void cfl_subsample_hbd_null(const uint16_t *input, int input_stride,
- uint16_t *output_q3) {
- (void)input;
- (void)input_stride;
- (void)output_q3;
- assert(0);
-}
-
-static void cfl_luma_subsampling_420_lbd_c(const uint8_t *input,
- int input_stride,
- uint16_t *output_q3, int width,
- int height) {
- for (int j = 0; j < height; j += 2) {
- for (int i = 0; i < width; i += 2) {
- const int bot = i + input_stride;
- output_q3[i >> 1] =
- (input[i] + input[i + 1] + input[bot] + input[bot + 1]) << 1;
- }
- input += input_stride << 1;
- output_q3 += CFL_BUF_LINE;
- }
-}
-
-static void cfl_luma_subsampling_422_lbd_c(const uint8_t *input,
- int input_stride,
- uint16_t *output_q3, int width,
- int height) {
- assert((height - 1) * CFL_BUF_LINE + width <= CFL_BUF_SQUARE);
- for (int j = 0; j < height; j++) {
- for (int i = 0; i < width; i += 2) {
- output_q3[i >> 1] = (input[i] + input[i + 1]) << 2;
- }
- input += input_stride;
- output_q3 += CFL_BUF_LINE;
- }
-}
-
-static void cfl_luma_subsampling_444_lbd_c(const uint8_t *input,
- int input_stride,
- uint16_t *output_q3, int width,
- int height) {
- assert((height - 1) * CFL_BUF_LINE + width <= CFL_BUF_SQUARE);
- for (int j = 0; j < height; j++) {
- for (int i = 0; i < width; i++) {
- output_q3[i] = input[i] << 3;
- }
- input += input_stride;
- output_q3 += CFL_BUF_LINE;
- }
-}
-
-static void cfl_luma_subsampling_420_hbd_c(const uint16_t *input,
- int input_stride,
- uint16_t *output_q3, int width,
- int height) {
- for (int j = 0; j < height; j += 2) {
- for (int i = 0; i < width; i += 2) {
- const int bot = i + input_stride;
- output_q3[i >> 1] =
- (input[i] + input[i + 1] + input[bot] + input[bot + 1]) << 1;
- }
- input += input_stride << 1;
- output_q3 += CFL_BUF_LINE;
- }
-}
-
-static void cfl_luma_subsampling_422_hbd_c(const uint16_t *input,
- int input_stride,
- uint16_t *output_q3, int width,
- int height) {
- assert((height - 1) * CFL_BUF_LINE + width <= CFL_BUF_SQUARE);
- for (int j = 0; j < height; j++) {
- for (int i = 0; i < width; i += 2) {
- output_q3[i >> 1] = (input[i] + input[i + 1]) << 2;
- }
- input += input_stride;
- output_q3 += CFL_BUF_LINE;
- }
-}
-
-static void cfl_luma_subsampling_444_hbd_c(const uint16_t *input,
- int input_stride,
- uint16_t *output_q3, int width,
- int height) {
- assert((height - 1) * CFL_BUF_LINE + width <= CFL_BUF_SQUARE);
- for (int j = 0; j < height; j++) {
- for (int i = 0; i < width; i++) {
- output_q3[i] = input[i] << 3;
- }
- input += input_stride;
- output_q3 += CFL_BUF_LINE;
- }
-}
-
-CFL_GET_SUBSAMPLE_FUNCTION(c)
-
-static INLINE cfl_subsample_hbd_fn cfl_subsampling_hbd(TX_SIZE tx_size,
- int sub_x, int sub_y) {
- if (sub_x == 1) {
- if (sub_y == 1) {
- return cfl_get_luma_subsampling_420_hbd(tx_size);
- }
- return cfl_get_luma_subsampling_422_hbd(tx_size);
- }
- return cfl_get_luma_subsampling_444_hbd(tx_size);
-}
-
-static INLINE cfl_subsample_lbd_fn cfl_subsampling_lbd(TX_SIZE tx_size,
- int sub_x, int sub_y) {
- if (sub_x == 1) {
- if (sub_y == 1) {
- return cfl_get_luma_subsampling_420_lbd(tx_size);
- }
- return cfl_get_luma_subsampling_422_lbd(tx_size);
- }
- return cfl_get_luma_subsampling_444_lbd(tx_size);
-}
-
-static void cfl_store(CFL_CTX *cfl, const uint8_t *input, int input_stride,
- int row, int col, TX_SIZE tx_size, int use_hbd) {
- const int width = tx_size_wide[tx_size];
- const int height = tx_size_high[tx_size];
- const int tx_off_log2 = tx_size_wide_log2[0];
- const int sub_x = cfl->subsampling_x;
- const int sub_y = cfl->subsampling_y;
- const int store_row = row << (tx_off_log2 - sub_y);
- const int store_col = col << (tx_off_log2 - sub_x);
- const int store_height = height >> sub_y;
- const int store_width = width >> sub_x;
-
- // Invalidate current parameters
- cfl->are_parameters_computed = 0;
-
- // Store the surface of the pixel buffer that was written to, this way we
- // can manage chroma overrun (e.g. when the chroma surfaces goes beyond the
- // frame boundary)
- if (col == 0 && row == 0) {
- cfl->buf_width = store_width;
- cfl->buf_height = store_height;
- } else {
- cfl->buf_width = OD_MAXI(store_col + store_width, cfl->buf_width);
- cfl->buf_height = OD_MAXI(store_row + store_height, cfl->buf_height);
- }
-
- // Check that we will remain inside the pixel buffer.
- assert(store_row + store_height <= CFL_BUF_LINE);
- assert(store_col + store_width <= CFL_BUF_LINE);
-
- // Store the input into the CfL pixel buffer
- uint16_t *recon_buf_q3 =
- cfl->recon_buf_q3 + (store_row * CFL_BUF_LINE + store_col);
-
- if (use_hbd) {
- cfl_subsampling_hbd(tx_size, sub_x, sub_y)(CONVERT_TO_SHORTPTR(input),
- input_stride, recon_buf_q3);
- } else {
- cfl_subsampling_lbd(tx_size, sub_x, sub_y)(input, input_stride,
- recon_buf_q3);
- }
-}
-
-// Adjust the row and column of blocks smaller than 8X8, as chroma-referenced
-// and non-chroma-referenced blocks are stored together in the CfL buffer.
-static INLINE void sub8x8_adjust_offset(const CFL_CTX *cfl, int *row_out,
- int *col_out) {
- // Increment row index for bottom: 8x4, 16x4 or both bottom 4x4s.
- if ((cfl->mi_row & 0x01) && cfl->subsampling_y) {
- assert(*row_out == 0);
- (*row_out)++;
- }
-
- // Increment col index for right: 4x8, 4x16 or both right 4x4s.
- if ((cfl->mi_col & 0x01) && cfl->subsampling_x) {
- assert(*col_out == 0);
- (*col_out)++;
- }
-}
-
-void cfl_store_tx(MACROBLOCKD *const xd, int row, int col, TX_SIZE tx_size,
- BLOCK_SIZE bsize) {
- CFL_CTX *const cfl = &xd->cfl;
- struct macroblockd_plane *const pd = &xd->plane[AOM_PLANE_Y];
- uint8_t *dst =
- &pd->dst.buf[(row * pd->dst.stride + col) << tx_size_wide_log2[0]];
-
- if (block_size_high[bsize] == 4 || block_size_wide[bsize] == 4) {
- // Only dimensions of size 4 can have an odd offset.
- assert(!((col & 1) && tx_size_wide[tx_size] != 4));
- assert(!((row & 1) && tx_size_high[tx_size] != 4));
- sub8x8_adjust_offset(cfl, &row, &col);
- }
- cfl_store(cfl, dst, pd->dst.stride, row, col, tx_size,
- get_bitdepth_data_path_index(xd));
-}
-
-void cfl_store_block(MACROBLOCKD *const xd, BLOCK_SIZE bsize, TX_SIZE tx_size) {
- CFL_CTX *const cfl = &xd->cfl;
- struct macroblockd_plane *const pd = &xd->plane[AOM_PLANE_Y];
- int row = 0;
- int col = 0;
-
- if (block_size_high[bsize] == 4 || block_size_wide[bsize] == 4) {
- sub8x8_adjust_offset(cfl, &row, &col);
- }
- const int width = max_intra_block_width(xd, bsize, AOM_PLANE_Y, tx_size);
- const int height = max_intra_block_height(xd, bsize, AOM_PLANE_Y, tx_size);
- tx_size = get_tx_size(width, height);
- cfl_store(cfl, pd->dst.buf, pd->dst.stride, row, col, tx_size,
- get_bitdepth_data_path_index(xd));
-}
diff --git a/third_party/aom/av1/common/cfl.h b/third_party/aom/av1/common/cfl.h
deleted file mode 100644
index d627891bf..000000000
--- a/third_party/aom/av1/common/cfl.h
+++ /dev/null
@@ -1,302 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_COMMON_CFL_H_
-#define AOM_AV1_COMMON_CFL_H_
-
-#include "av1/common/blockd.h"
-#include "av1/common/onyxc_int.h"
-
-// Can we use CfL for the current block?
-static INLINE CFL_ALLOWED_TYPE is_cfl_allowed(const MACROBLOCKD *xd) {
- const MB_MODE_INFO *mbmi = xd->mi[0];
- const BLOCK_SIZE bsize = mbmi->sb_type;
- assert(bsize < BLOCK_SIZES_ALL);
- if (xd->lossless[mbmi->segment_id]) {
- // In lossless, CfL is available when the partition size is equal to the
- // transform size.
- const int ssx = xd->plane[AOM_PLANE_U].subsampling_x;
- const int ssy = xd->plane[AOM_PLANE_U].subsampling_y;
- const int plane_bsize = get_plane_block_size(bsize, ssx, ssy);
- return (CFL_ALLOWED_TYPE)(plane_bsize == BLOCK_4X4);
- }
- // Spec: CfL is available to luma partitions lesser than or equal to 32x32
- return (CFL_ALLOWED_TYPE)(block_size_wide[bsize] <= 32 &&
- block_size_high[bsize] <= 32);
-}
-
-// Do we need to save the luma pixels from the current block,
-// for a possible future CfL prediction?
-static INLINE CFL_ALLOWED_TYPE store_cfl_required(const AV1_COMMON *cm,
- const MACROBLOCKD *xd) {
- const MB_MODE_INFO *mbmi = xd->mi[0];
-
- if (cm->seq_params.monochrome) return CFL_DISALLOWED;
-
- if (!xd->cfl.is_chroma_reference) {
- // For non-chroma-reference blocks, we should always store the luma pixels,
- // in case the corresponding chroma-reference block uses CfL.
- // Note that this can only happen for block sizes which are <8 on
- // their shortest side, as otherwise they would be chroma reference
- // blocks.
- return CFL_ALLOWED;
- }
-
- // If this block has chroma information, we know whether we're
- // actually going to perform a CfL prediction
- return (CFL_ALLOWED_TYPE)(!is_inter_block(mbmi) &&
- mbmi->uv_mode == UV_CFL_PRED);
-}
-
-static INLINE int get_scaled_luma_q0(int alpha_q3, int16_t pred_buf_q3) {
- int scaled_luma_q6 = alpha_q3 * pred_buf_q3;
- return ROUND_POWER_OF_TWO_SIGNED(scaled_luma_q6, 6);
-}
-
-static INLINE CFL_PRED_TYPE get_cfl_pred_type(PLANE_TYPE plane) {
- assert(plane > 0);
- return (CFL_PRED_TYPE)(plane - 1);
-}
-
-void cfl_predict_block(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride,
- TX_SIZE tx_size, int plane);
-
-void cfl_store_block(MACROBLOCKD *const xd, BLOCK_SIZE bsize, TX_SIZE tx_size);
-
-void cfl_store_tx(MACROBLOCKD *const xd, int row, int col, TX_SIZE tx_size,
- BLOCK_SIZE bsize);
-
-void cfl_store_dc_pred(MACROBLOCKD *const xd, const uint8_t *input,
- CFL_PRED_TYPE pred_plane, int width);
-
-void cfl_load_dc_pred(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride,
- TX_SIZE tx_size, CFL_PRED_TYPE pred_plane);
-
-// Null function used for invalid tx_sizes
-void cfl_subsample_lbd_null(const uint8_t *input, int input_stride,
- uint16_t *output_q3);
-
-// Null function used for invalid tx_sizes
-void cfl_subsample_hbd_null(const uint16_t *input, int input_stride,
- uint16_t *output_q3);
-
-// Allows the CFL_SUBSAMPLE function to switch types depending on the bitdepth.
-#define CFL_lbd_TYPE uint8_t *cfl_type
-#define CFL_hbd_TYPE uint16_t *cfl_type
-
-// Declare a size-specific wrapper for the size-generic function. The compiler
-// will inline the size generic function in here, the advantage is that the size
-// will be constant allowing for loop unrolling and other constant propagated
-// goodness.
-#define CFL_SUBSAMPLE(arch, sub, bd, width, height) \
- void subsample_##bd##_##sub##_##width##x##height##_##arch( \
- const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \
- cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \
- output_q3, width, height); \
- }
-
-// Declare size-specific wrappers for all valid CfL sizes.
-#define CFL_SUBSAMPLE_FUNCTIONS(arch, sub, bd) \
- CFL_SUBSAMPLE(arch, sub, bd, 4, 4) \
- CFL_SUBSAMPLE(arch, sub, bd, 8, 8) \
- CFL_SUBSAMPLE(arch, sub, bd, 16, 16) \
- CFL_SUBSAMPLE(arch, sub, bd, 32, 32) \
- CFL_SUBSAMPLE(arch, sub, bd, 4, 8) \
- CFL_SUBSAMPLE(arch, sub, bd, 8, 4) \
- CFL_SUBSAMPLE(arch, sub, bd, 8, 16) \
- CFL_SUBSAMPLE(arch, sub, bd, 16, 8) \
- CFL_SUBSAMPLE(arch, sub, bd, 16, 32) \
- CFL_SUBSAMPLE(arch, sub, bd, 32, 16) \
- CFL_SUBSAMPLE(arch, sub, bd, 4, 16) \
- CFL_SUBSAMPLE(arch, sub, bd, 16, 4) \
- CFL_SUBSAMPLE(arch, sub, bd, 8, 32) \
- CFL_SUBSAMPLE(arch, sub, bd, 32, 8) \
- cfl_subsample_##bd##_fn cfl_get_luma_subsampling_##sub##_##bd##_##arch( \
- TX_SIZE tx_size) { \
- CFL_SUBSAMPLE_FUNCTION_ARRAY(arch, sub, bd) \
- return subfn_##sub[tx_size]; \
- }
-
-// Declare an architecture-specific array of function pointers for size-specific
-// wrappers.
-#define CFL_SUBSAMPLE_FUNCTION_ARRAY(arch, sub, bd) \
- static const cfl_subsample_##bd##_fn subfn_##sub[TX_SIZES_ALL] = { \
- subsample_##bd##_##sub##_4x4_##arch, /* 4x4 */ \
- subsample_##bd##_##sub##_8x8_##arch, /* 8x8 */ \
- subsample_##bd##_##sub##_16x16_##arch, /* 16x16 */ \
- subsample_##bd##_##sub##_32x32_##arch, /* 32x32 */ \
- cfl_subsample_##bd##_null, /* 64x64 (invalid CFL size) */ \
- subsample_##bd##_##sub##_4x8_##arch, /* 4x8 */ \
- subsample_##bd##_##sub##_8x4_##arch, /* 8x4 */ \
- subsample_##bd##_##sub##_8x16_##arch, /* 8x16 */ \
- subsample_##bd##_##sub##_16x8_##arch, /* 16x8 */ \
- subsample_##bd##_##sub##_16x32_##arch, /* 16x32 */ \
- subsample_##bd##_##sub##_32x16_##arch, /* 32x16 */ \
- cfl_subsample_##bd##_null, /* 32x64 (invalid CFL size) */ \
- cfl_subsample_##bd##_null, /* 64x32 (invalid CFL size) */ \
- subsample_##bd##_##sub##_4x16_##arch, /* 4x16 */ \
- subsample_##bd##_##sub##_16x4_##arch, /* 16x4 */ \
- subsample_##bd##_##sub##_8x32_##arch, /* 8x32 */ \
- subsample_##bd##_##sub##_32x8_##arch, /* 32x8 */ \
- cfl_subsample_##bd##_null, /* 16x64 (invalid CFL size) */ \
- cfl_subsample_##bd##_null, /* 64x16 (invalid CFL size) */ \
- };
-
-// The RTCD script does not support passing in an array, so we wrap it in this
-// function.
-#define CFL_GET_SUBSAMPLE_FUNCTION(arch) \
- CFL_SUBSAMPLE_FUNCTIONS(arch, 420, lbd) \
- CFL_SUBSAMPLE_FUNCTIONS(arch, 422, lbd) \
- CFL_SUBSAMPLE_FUNCTIONS(arch, 444, lbd) \
- CFL_SUBSAMPLE_FUNCTIONS(arch, 420, hbd) \
- CFL_SUBSAMPLE_FUNCTIONS(arch, 422, hbd) \
- CFL_SUBSAMPLE_FUNCTIONS(arch, 444, hbd)
-
-// Null function used for invalid tx_sizes
-static INLINE void cfl_subtract_average_null(const uint16_t *src,
- int16_t *dst) {
- (void)dst;
- (void)src;
- assert(0);
-}
-
-// Declare a size-specific wrapper for the size-generic function. The compiler
-// will inline the size generic function in here, the advantage is that the size
-// will be constant allowing for loop unrolling and other constant propagated
-// goodness.
-#define CFL_SUB_AVG_X(arch, width, height, round_offset, num_pel_log2) \
- void subtract_average_##width##x##height##_##arch(const uint16_t *src, \
- int16_t *dst) { \
- subtract_average_##arch(src, dst, width, height, round_offset, \
- num_pel_log2); \
- }
-
-// Declare size-specific wrappers for all valid CfL sizes.
-#define CFL_SUB_AVG_FN(arch) \
- CFL_SUB_AVG_X(arch, 4, 4, 8, 4) \
- CFL_SUB_AVG_X(arch, 4, 8, 16, 5) \
- CFL_SUB_AVG_X(arch, 4, 16, 32, 6) \
- CFL_SUB_AVG_X(arch, 8, 4, 16, 5) \
- CFL_SUB_AVG_X(arch, 8, 8, 32, 6) \
- CFL_SUB_AVG_X(arch, 8, 16, 64, 7) \
- CFL_SUB_AVG_X(arch, 8, 32, 128, 8) \
- CFL_SUB_AVG_X(arch, 16, 4, 32, 6) \
- CFL_SUB_AVG_X(arch, 16, 8, 64, 7) \
- CFL_SUB_AVG_X(arch, 16, 16, 128, 8) \
- CFL_SUB_AVG_X(arch, 16, 32, 256, 9) \
- CFL_SUB_AVG_X(arch, 32, 8, 128, 8) \
- CFL_SUB_AVG_X(arch, 32, 16, 256, 9) \
- CFL_SUB_AVG_X(arch, 32, 32, 512, 10) \
- cfl_subtract_average_fn get_subtract_average_fn_##arch(TX_SIZE tx_size) { \
- static const cfl_subtract_average_fn sub_avg[TX_SIZES_ALL] = { \
- subtract_average_4x4_##arch, /* 4x4 */ \
- subtract_average_8x8_##arch, /* 8x8 */ \
- subtract_average_16x16_##arch, /* 16x16 */ \
- subtract_average_32x32_##arch, /* 32x32 */ \
- cfl_subtract_average_null, /* 64x64 (invalid CFL size) */ \
- subtract_average_4x8_##arch, /* 4x8 */ \
- subtract_average_8x4_##arch, /* 8x4 */ \
- subtract_average_8x16_##arch, /* 8x16 */ \
- subtract_average_16x8_##arch, /* 16x8 */ \
- subtract_average_16x32_##arch, /* 16x32 */ \
- subtract_average_32x16_##arch, /* 32x16 */ \
- cfl_subtract_average_null, /* 32x64 (invalid CFL size) */ \
- cfl_subtract_average_null, /* 64x32 (invalid CFL size) */ \
- subtract_average_4x16_##arch, /* 4x16 (invalid CFL size) */ \
- subtract_average_16x4_##arch, /* 16x4 (invalid CFL size) */ \
- subtract_average_8x32_##arch, /* 8x32 (invalid CFL size) */ \
- subtract_average_32x8_##arch, /* 32x8 (invalid CFL size) */ \
- cfl_subtract_average_null, /* 16x64 (invalid CFL size) */ \
- cfl_subtract_average_null, /* 64x16 (invalid CFL size) */ \
- }; \
- /* Modulo TX_SIZES_ALL to ensure that an attacker won't be able to */ \
- /* index the function pointer array out of bounds. */ \
- return sub_avg[tx_size % TX_SIZES_ALL]; \
- }
-
-// For VSX SIMD optimization, the C versions of width == 4 subtract are
-// faster than the VSX. As such, the VSX code calls the C versions.
-void subtract_average_4x4_c(const uint16_t *src, int16_t *dst);
-void subtract_average_4x8_c(const uint16_t *src, int16_t *dst);
-void subtract_average_4x16_c(const uint16_t *src, int16_t *dst);
-
-#define CFL_PREDICT_lbd(arch, width, height) \
- void predict_lbd_##width##x##height##_##arch(const int16_t *pred_buf_q3, \
- uint8_t *dst, int dst_stride, \
- int alpha_q3) { \
- cfl_predict_lbd_##arch(pred_buf_q3, dst, dst_stride, alpha_q3, width, \
- height); \
- }
-
-#define CFL_PREDICT_hbd(arch, width, height) \
- void predict_hbd_##width##x##height##_##arch(const int16_t *pred_buf_q3, \
- uint16_t *dst, int dst_stride, \
- int alpha_q3, int bd) { \
- cfl_predict_hbd_##arch(pred_buf_q3, dst, dst_stride, alpha_q3, bd, width, \
- height); \
- }
-
-// This wrapper exists because clang format does not like calling macros with
-// lowercase letters.
-#define CFL_PREDICT_X(arch, width, height, bd) \
- CFL_PREDICT_##bd(arch, width, height)
-
-// Null function used for invalid tx_sizes
-void cfl_predict_lbd_null(const int16_t *pred_buf_q3, uint8_t *dst,
- int dst_stride, int alpha_q3);
-
-// Null function used for invalid tx_sizes
-void cfl_predict_hbd_null(const int16_t *pred_buf_q3, uint16_t *dst,
- int dst_stride, int alpha_q3, int bd);
-
-#define CFL_PREDICT_FN(arch, bd) \
- CFL_PREDICT_X(arch, 4, 4, bd) \
- CFL_PREDICT_X(arch, 4, 8, bd) \
- CFL_PREDICT_X(arch, 4, 16, bd) \
- CFL_PREDICT_X(arch, 8, 4, bd) \
- CFL_PREDICT_X(arch, 8, 8, bd) \
- CFL_PREDICT_X(arch, 8, 16, bd) \
- CFL_PREDICT_X(arch, 8, 32, bd) \
- CFL_PREDICT_X(arch, 16, 4, bd) \
- CFL_PREDICT_X(arch, 16, 8, bd) \
- CFL_PREDICT_X(arch, 16, 16, bd) \
- CFL_PREDICT_X(arch, 16, 32, bd) \
- CFL_PREDICT_X(arch, 32, 8, bd) \
- CFL_PREDICT_X(arch, 32, 16, bd) \
- CFL_PREDICT_X(arch, 32, 32, bd) \
- cfl_predict_##bd##_fn get_predict_##bd##_fn_##arch(TX_SIZE tx_size) { \
- static const cfl_predict_##bd##_fn pred[TX_SIZES_ALL] = { \
- predict_##bd##_4x4_##arch, /* 4x4 */ \
- predict_##bd##_8x8_##arch, /* 8x8 */ \
- predict_##bd##_16x16_##arch, /* 16x16 */ \
- predict_##bd##_32x32_##arch, /* 32x32 */ \
- cfl_predict_##bd##_null, /* 64x64 (invalid CFL size) */ \
- predict_##bd##_4x8_##arch, /* 4x8 */ \
- predict_##bd##_8x4_##arch, /* 8x4 */ \
- predict_##bd##_8x16_##arch, /* 8x16 */ \
- predict_##bd##_16x8_##arch, /* 16x8 */ \
- predict_##bd##_16x32_##arch, /* 16x32 */ \
- predict_##bd##_32x16_##arch, /* 32x16 */ \
- cfl_predict_##bd##_null, /* 32x64 (invalid CFL size) */ \
- cfl_predict_##bd##_null, /* 64x32 (invalid CFL size) */ \
- predict_##bd##_4x16_##arch, /* 4x16 */ \
- predict_##bd##_16x4_##arch, /* 16x4 */ \
- predict_##bd##_8x32_##arch, /* 8x32 */ \
- predict_##bd##_32x8_##arch, /* 32x8 */ \
- cfl_predict_##bd##_null, /* 16x64 (invalid CFL size) */ \
- cfl_predict_##bd##_null, /* 64x16 (invalid CFL size) */ \
- }; \
- /* Modulo TX_SIZES_ALL to ensure that an attacker won't be able to */ \
- /* index the function pointer array out of bounds. */ \
- return pred[tx_size % TX_SIZES_ALL]; \
- }
-
-#endif // AOM_AV1_COMMON_CFL_H_
diff --git a/third_party/aom/av1/common/common.h b/third_party/aom/av1/common/common.h
deleted file mode 100644
index bed6083db..000000000
--- a/third_party/aom/av1/common/common.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_COMMON_COMMON_H_
-#define AOM_AV1_COMMON_COMMON_H_
-
-/* Interface header for common constant data structures and lookup tables */
-
-#include <assert.h>
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_mem/aom_mem.h"
-#include "aom/aom_integer.h"
-#include "aom_ports/bitops.h"
-#include "config/aom_config.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define PI 3.141592653589793238462643383279502884
-
-// Only need this for fixed-size arrays, for structs just assign.
-#define av1_copy(dest, src) \
- { \
- assert(sizeof(dest) == sizeof(src)); \
- memcpy(dest, src, sizeof(src)); \
- }
-
-// Use this for variably-sized arrays.
-#define av1_copy_array(dest, src, n) \
- { \
- assert(sizeof(*(dest)) == sizeof(*(src))); \
- memcpy(dest, src, n * sizeof(*(src))); \
- }
-
-#define av1_zero(dest) memset(&(dest), 0, sizeof(dest))
-#define av1_zero_array(dest, n) memset(dest, 0, n * sizeof(*(dest)))
-
-static INLINE int get_unsigned_bits(unsigned int num_values) {
- return num_values > 0 ? get_msb(num_values) + 1 : 0;
-}
-
-#define CHECK_MEM_ERROR(cm, lval, expr) \
- AOM_CHECK_MEM_ERROR(&cm->error, lval, expr)
-
-#define AOM_FRAME_MARKER 0x2
-
-#define AV1_MIN_TILE_SIZE_BYTES 1
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_COMMON_COMMON_H_
diff --git a/third_party/aom/av1/common/common_data.h b/third_party/aom/av1/common/common_data.h
deleted file mode 100644
index 46e455fdb..000000000
--- a/third_party/aom/av1/common/common_data.h
+++ /dev/null
@@ -1,446 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_COMMON_COMMON_DATA_H_
-#define AOM_AV1_COMMON_COMMON_DATA_H_
-
-#include "av1/common/enums.h"
-#include "aom/aom_integer.h"
-#include "aom_dsp/aom_dsp_common.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// Log 2 conversion lookup tables in units of mode info (4x4).
-// The Mi_Width_Log2 table in the spec (Section 9.3. Conversion tables).
-static const uint8_t mi_size_wide_log2[BLOCK_SIZES_ALL] = {
- 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 0, 2, 1, 3, 2, 4
-};
-// The Mi_Height_Log2 table in the spec (Section 9.3. Conversion tables).
-static const uint8_t mi_size_high_log2[BLOCK_SIZES_ALL] = {
- 0, 1, 0, 1, 2, 1, 2, 3, 2, 3, 4, 3, 4, 5, 4, 5, 2, 0, 3, 1, 4, 2
-};
-
-// Width/height lookup tables in units of mode info (4x4).
-// The Num_4x4_Blocks_Wide table in the spec (Section 9.3. Conversion tables).
-static const uint8_t mi_size_wide[BLOCK_SIZES_ALL] = {
- 1, 1, 2, 2, 2, 4, 4, 4, 8, 8, 8, 16, 16, 16, 32, 32, 1, 4, 2, 8, 4, 16
-};
-
-// The Num_4x4_Blocks_High table in the spec (Section 9.3. Conversion tables).
-static const uint8_t mi_size_high[BLOCK_SIZES_ALL] = {
- 1, 2, 1, 2, 4, 2, 4, 8, 4, 8, 16, 8, 16, 32, 16, 32, 4, 1, 8, 2, 16, 4
-};
-
-// Width/height lookup tables in units of samples.
-// The Block_Width table in the spec (Section 9.3. Conversion tables).
-static const uint8_t block_size_wide[BLOCK_SIZES_ALL] = {
- 4, 4, 8, 8, 8, 16, 16, 16, 32, 32, 32,
- 64, 64, 64, 128, 128, 4, 16, 8, 32, 16, 64
-};
-
-// The Block_Height table in the spec (Section 9.3. Conversion tables).
-static const uint8_t block_size_high[BLOCK_SIZES_ALL] = {
- 4, 8, 4, 8, 16, 8, 16, 32, 16, 32, 64,
- 32, 64, 128, 64, 128, 16, 4, 32, 8, 64, 16
-};
-
-// Maps a block size to a context.
-// The Size_Group table in the spec (Section 9.3. Conversion tables).
-// AOMMIN(3, AOMMIN(mi_size_wide_log2(bsize), mi_size_high_log2(bsize)))
-static const uint8_t size_group_lookup[BLOCK_SIZES_ALL] = {
- 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 0, 0, 1, 1, 2, 2
-};
-
-static const uint8_t num_pels_log2_lookup[BLOCK_SIZES_ALL] = {
- 4, 5, 5, 6, 7, 7, 8, 9, 9, 10, 11, 11, 12, 13, 13, 14, 6, 6, 8, 8, 10, 10
-};
-
-// A compressed version of the Partition_Subsize table in the spec (9.3.
-// Conversion tables), for square block sizes only.
-/* clang-format off */
-static const BLOCK_SIZE subsize_lookup[EXT_PARTITION_TYPES][SQR_BLOCK_SIZES] = {
- { // PARTITION_NONE
- BLOCK_4X4, BLOCK_8X8, BLOCK_16X16,
- BLOCK_32X32, BLOCK_64X64, BLOCK_128X128
- }, { // PARTITION_HORZ
- BLOCK_INVALID, BLOCK_8X4, BLOCK_16X8,
- BLOCK_32X16, BLOCK_64X32, BLOCK_128X64
- }, { // PARTITION_VERT
- BLOCK_INVALID, BLOCK_4X8, BLOCK_8X16,
- BLOCK_16X32, BLOCK_32X64, BLOCK_64X128
- }, { // PARTITION_SPLIT
- BLOCK_INVALID, BLOCK_4X4, BLOCK_8X8,
- BLOCK_16X16, BLOCK_32X32, BLOCK_64X64
- }, { // PARTITION_HORZ_A
- BLOCK_INVALID, BLOCK_8X4, BLOCK_16X8,
- BLOCK_32X16, BLOCK_64X32, BLOCK_128X64
- }, { // PARTITION_HORZ_B
- BLOCK_INVALID, BLOCK_8X4, BLOCK_16X8,
- BLOCK_32X16, BLOCK_64X32, BLOCK_128X64
- }, { // PARTITION_VERT_A
- BLOCK_INVALID, BLOCK_4X8, BLOCK_8X16,
- BLOCK_16X32, BLOCK_32X64, BLOCK_64X128
- }, { // PARTITION_VERT_B
- BLOCK_INVALID, BLOCK_4X8, BLOCK_8X16,
- BLOCK_16X32, BLOCK_32X64, BLOCK_64X128
- }, { // PARTITION_HORZ_4
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X4,
- BLOCK_32X8, BLOCK_64X16, BLOCK_INVALID
- }, { // PARTITION_VERT_4
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X16,
- BLOCK_8X32, BLOCK_16X64, BLOCK_INVALID
- }
-};
-
-static const TX_SIZE max_txsize_lookup[BLOCK_SIZES_ALL] = {
- // 4X4
- TX_4X4,
- // 4X8, 8X4, 8X8
- TX_4X4, TX_4X4, TX_8X8,
- // 8X16, 16X8, 16X16
- TX_8X8, TX_8X8, TX_16X16,
- // 16X32, 32X16, 32X32
- TX_16X16, TX_16X16, TX_32X32,
- // 32X64, 64X32,
- TX_32X32, TX_32X32,
- // 64X64
- TX_64X64,
- // 64x128, 128x64, 128x128
- TX_64X64, TX_64X64, TX_64X64,
- // 4x16, 16x4, 8x32
- TX_4X4, TX_4X4, TX_8X8,
- // 32x8, 16x64 64x16
- TX_8X8, TX_16X16, TX_16X16
-};
-
-static const TX_SIZE max_txsize_rect_lookup[BLOCK_SIZES_ALL] = {
- // 4X4
- TX_4X4,
- // 4X8, 8X4, 8X8
- TX_4X8, TX_8X4, TX_8X8,
- // 8X16, 16X8, 16X16
- TX_8X16, TX_16X8, TX_16X16,
- // 16X32, 32X16, 32X32
- TX_16X32, TX_32X16, TX_32X32,
- // 32X64, 64X32,
- TX_32X64, TX_64X32,
- // 64X64
- TX_64X64,
- // 64x128, 128x64, 128x128
- TX_64X64, TX_64X64, TX_64X64,
- // 4x16, 16x4,
- TX_4X16, TX_16X4,
- // 8x32, 32x8
- TX_8X32, TX_32X8,
- // 16x64, 64x16
- TX_16X64, TX_64X16
-};
-
-static const TX_TYPE_1D vtx_tab[TX_TYPES] = {
- DCT_1D, ADST_1D, DCT_1D, ADST_1D,
- FLIPADST_1D, DCT_1D, FLIPADST_1D, ADST_1D, FLIPADST_1D, IDTX_1D,
- DCT_1D, IDTX_1D, ADST_1D, IDTX_1D, FLIPADST_1D, IDTX_1D,
-};
-
-static const TX_TYPE_1D htx_tab[TX_TYPES] = {
- DCT_1D, DCT_1D, ADST_1D, ADST_1D,
- DCT_1D, FLIPADST_1D, FLIPADST_1D, FLIPADST_1D, ADST_1D, IDTX_1D,
- IDTX_1D, DCT_1D, IDTX_1D, ADST_1D, IDTX_1D, FLIPADST_1D,
-};
-
-#define TXSIZE_CAT_INVALID (-1)
-
-/* clang-format on */
-
-static const TX_SIZE sub_tx_size_map[TX_SIZES_ALL] = {
- TX_4X4, // TX_4X4
- TX_4X4, // TX_8X8
- TX_8X8, // TX_16X16
- TX_16X16, // TX_32X32
- TX_32X32, // TX_64X64
- TX_4X4, // TX_4X8
- TX_4X4, // TX_8X4
- TX_8X8, // TX_8X16
- TX_8X8, // TX_16X8
- TX_16X16, // TX_16X32
- TX_16X16, // TX_32X16
- TX_32X32, // TX_32X64
- TX_32X32, // TX_64X32
- TX_4X8, // TX_4X16
- TX_8X4, // TX_16X4
- TX_8X16, // TX_8X32
- TX_16X8, // TX_32X8
- TX_16X32, // TX_16X64
- TX_32X16, // TX_64X16
-};
-
-static const TX_SIZE txsize_horz_map[TX_SIZES_ALL] = {
- TX_4X4, // TX_4X4
- TX_8X8, // TX_8X8
- TX_16X16, // TX_16X16
- TX_32X32, // TX_32X32
- TX_64X64, // TX_64X64
- TX_4X4, // TX_4X8
- TX_8X8, // TX_8X4
- TX_8X8, // TX_8X16
- TX_16X16, // TX_16X8
- TX_16X16, // TX_16X32
- TX_32X32, // TX_32X16
- TX_32X32, // TX_32X64
- TX_64X64, // TX_64X32
- TX_4X4, // TX_4X16
- TX_16X16, // TX_16X4
- TX_8X8, // TX_8X32
- TX_32X32, // TX_32X8
- TX_16X16, // TX_16X64
- TX_64X64, // TX_64X16
-};
-
-static const TX_SIZE txsize_vert_map[TX_SIZES_ALL] = {
- TX_4X4, // TX_4X4
- TX_8X8, // TX_8X8
- TX_16X16, // TX_16X16
- TX_32X32, // TX_32X32
- TX_64X64, // TX_64X64
- TX_8X8, // TX_4X8
- TX_4X4, // TX_8X4
- TX_16X16, // TX_8X16
- TX_8X8, // TX_16X8
- TX_32X32, // TX_16X32
- TX_16X16, // TX_32X16
- TX_64X64, // TX_32X64
- TX_32X32, // TX_64X32
- TX_16X16, // TX_4X16
- TX_4X4, // TX_16X4
- TX_32X32, // TX_8X32
- TX_8X8, // TX_32X8
- TX_64X64, // TX_16X64
- TX_16X16, // TX_64X16
-};
-
-#define TX_SIZE_W_MIN 4
-
-// Transform block width in pixels
-static const int tx_size_wide[TX_SIZES_ALL] = {
- 4, 8, 16, 32, 64, 4, 8, 8, 16, 16, 32, 32, 64, 4, 16, 8, 32, 16, 64,
-};
-
-#define TX_SIZE_H_MIN 4
-
-// Transform block height in pixels
-static const int tx_size_high[TX_SIZES_ALL] = {
- 4, 8, 16, 32, 64, 8, 4, 16, 8, 32, 16, 64, 32, 16, 4, 32, 8, 64, 16,
-};
-
-// Transform block width in unit
-static const int tx_size_wide_unit[TX_SIZES_ALL] = {
- 1, 2, 4, 8, 16, 1, 2, 2, 4, 4, 8, 8, 16, 1, 4, 2, 8, 4, 16,
-};
-
-// Transform block height in unit
-static const int tx_size_high_unit[TX_SIZES_ALL] = {
- 1, 2, 4, 8, 16, 2, 1, 4, 2, 8, 4, 16, 8, 4, 1, 8, 2, 16, 4,
-};
-
-// Transform block width in log2
-static const int tx_size_wide_log2[TX_SIZES_ALL] = {
- 2, 3, 4, 5, 6, 2, 3, 3, 4, 4, 5, 5, 6, 2, 4, 3, 5, 4, 6,
-};
-
-// Transform block height in log2
-static const int tx_size_high_log2[TX_SIZES_ALL] = {
- 2, 3, 4, 5, 6, 3, 2, 4, 3, 5, 4, 6, 5, 4, 2, 5, 3, 6, 4,
-};
-
-static const int tx_size_2d[TX_SIZES_ALL + 1] = {
- 16, 64, 256, 1024, 4096, 32, 32, 128, 128, 512,
- 512, 2048, 2048, 64, 64, 256, 256, 1024, 1024,
-};
-
-static const BLOCK_SIZE txsize_to_bsize[TX_SIZES_ALL] = {
- BLOCK_4X4, // TX_4X4
- BLOCK_8X8, // TX_8X8
- BLOCK_16X16, // TX_16X16
- BLOCK_32X32, // TX_32X32
- BLOCK_64X64, // TX_64X64
- BLOCK_4X8, // TX_4X8
- BLOCK_8X4, // TX_8X4
- BLOCK_8X16, // TX_8X16
- BLOCK_16X8, // TX_16X8
- BLOCK_16X32, // TX_16X32
- BLOCK_32X16, // TX_32X16
- BLOCK_32X64, // TX_32X64
- BLOCK_64X32, // TX_64X32
- BLOCK_4X16, // TX_4X16
- BLOCK_16X4, // TX_16X4
- BLOCK_8X32, // TX_8X32
- BLOCK_32X8, // TX_32X8
- BLOCK_16X64, // TX_16X64
- BLOCK_64X16, // TX_64X16
-};
-
-static const TX_SIZE txsize_sqr_map[TX_SIZES_ALL] = {
- TX_4X4, // TX_4X4
- TX_8X8, // TX_8X8
- TX_16X16, // TX_16X16
- TX_32X32, // TX_32X32
- TX_64X64, // TX_64X64
- TX_4X4, // TX_4X8
- TX_4X4, // TX_8X4
- TX_8X8, // TX_8X16
- TX_8X8, // TX_16X8
- TX_16X16, // TX_16X32
- TX_16X16, // TX_32X16
- TX_32X32, // TX_32X64
- TX_32X32, // TX_64X32
- TX_4X4, // TX_4X16
- TX_4X4, // TX_16X4
- TX_8X8, // TX_8X32
- TX_8X8, // TX_32X8
- TX_16X16, // TX_16X64
- TX_16X16, // TX_64X16
-};
-
-static const TX_SIZE txsize_sqr_up_map[TX_SIZES_ALL] = {
- TX_4X4, // TX_4X4
- TX_8X8, // TX_8X8
- TX_16X16, // TX_16X16
- TX_32X32, // TX_32X32
- TX_64X64, // TX_64X64
- TX_8X8, // TX_4X8
- TX_8X8, // TX_8X4
- TX_16X16, // TX_8X16
- TX_16X16, // TX_16X8
- TX_32X32, // TX_16X32
- TX_32X32, // TX_32X16
- TX_64X64, // TX_32X64
- TX_64X64, // TX_64X32
- TX_16X16, // TX_4X16
- TX_16X16, // TX_16X4
- TX_32X32, // TX_8X32
- TX_32X32, // TX_32X8
- TX_64X64, // TX_16X64
- TX_64X64, // TX_64X16
-};
-
-static const int8_t txsize_log2_minus4[TX_SIZES_ALL] = {
- 0, // TX_4X4
- 2, // TX_8X8
- 4, // TX_16X16
- 6, // TX_32X32
- 6, // TX_64X64
- 1, // TX_4X8
- 1, // TX_8X4
- 3, // TX_8X16
- 3, // TX_16X8
- 5, // TX_16X32
- 5, // TX_32X16
- 6, // TX_32X64
- 6, // TX_64X32
- 2, // TX_4X16
- 2, // TX_16X4
- 4, // TX_8X32
- 4, // TX_32X8
- 5, // TX_16X64
- 5, // TX_64X16
-};
-
-/* clang-format off */
-static const TX_SIZE tx_mode_to_biggest_tx_size[TX_MODES] = {
- TX_4X4, // ONLY_4X4
- TX_64X64, // TX_MODE_LARGEST
- TX_64X64, // TX_MODE_SELECT
-};
-
-// The Subsampled_Size table in the spec (Section 5.11.38. Get plane residual
-// size function).
-static const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES_ALL][2][2] = {
- // ss_x == 0 ss_x == 0 ss_x == 1 ss_x == 1
- // ss_y == 0 ss_y == 1 ss_y == 0 ss_y == 1
- { { BLOCK_4X4, BLOCK_4X4 }, { BLOCK_4X4, BLOCK_4X4 } },
- { { BLOCK_4X8, BLOCK_4X4 }, { BLOCK_INVALID, BLOCK_4X4 } },
- { { BLOCK_8X4, BLOCK_INVALID }, { BLOCK_4X4, BLOCK_4X4 } },
- { { BLOCK_8X8, BLOCK_8X4 }, { BLOCK_4X8, BLOCK_4X4 } },
- { { BLOCK_8X16, BLOCK_8X8 }, { BLOCK_INVALID, BLOCK_4X8 } },
- { { BLOCK_16X8, BLOCK_INVALID }, { BLOCK_8X8, BLOCK_8X4 } },
- { { BLOCK_16X16, BLOCK_16X8 }, { BLOCK_8X16, BLOCK_8X8 } },
- { { BLOCK_16X32, BLOCK_16X16 }, { BLOCK_INVALID, BLOCK_8X16 } },
- { { BLOCK_32X16, BLOCK_INVALID }, { BLOCK_16X16, BLOCK_16X8 } },
- { { BLOCK_32X32, BLOCK_32X16 }, { BLOCK_16X32, BLOCK_16X16 } },
- { { BLOCK_32X64, BLOCK_32X32 }, { BLOCK_INVALID, BLOCK_16X32 } },
- { { BLOCK_64X32, BLOCK_INVALID }, { BLOCK_32X32, BLOCK_32X16 } },
- { { BLOCK_64X64, BLOCK_64X32 }, { BLOCK_32X64, BLOCK_32X32 } },
- { { BLOCK_64X128, BLOCK_64X64 }, { BLOCK_INVALID, BLOCK_32X64 } },
- { { BLOCK_128X64, BLOCK_INVALID }, { BLOCK_64X64, BLOCK_64X32 } },
- { { BLOCK_128X128, BLOCK_128X64 }, { BLOCK_64X128, BLOCK_64X64 } },
- { { BLOCK_4X16, BLOCK_4X8 }, { BLOCK_INVALID, BLOCK_4X8 } },
- { { BLOCK_16X4, BLOCK_INVALID }, { BLOCK_8X4, BLOCK_8X4 } },
- { { BLOCK_8X32, BLOCK_8X16 }, { BLOCK_INVALID, BLOCK_4X16 } },
- { { BLOCK_32X8, BLOCK_INVALID }, { BLOCK_16X8, BLOCK_16X4 } },
- { { BLOCK_16X64, BLOCK_16X32 }, { BLOCK_INVALID, BLOCK_8X32 } },
- { { BLOCK_64X16, BLOCK_INVALID }, { BLOCK_32X16, BLOCK_32X8 } }
-};
-/* clang-format on */
-
-// Generates 5 bit field in which each bit set to 1 represents
-// a blocksize partition 11111 means we split 128x128, 64x64, 32x32, 16x16
-// and 8x8. 10000 means we just split the 128x128 to 64x64
-/* clang-format off */
-static const struct {
- PARTITION_CONTEXT above;
- PARTITION_CONTEXT left;
-} partition_context_lookup[BLOCK_SIZES_ALL] = {
- { 31, 31 }, // 4X4 - {0b11111, 0b11111}
- { 31, 30 }, // 4X8 - {0b11111, 0b11110}
- { 30, 31 }, // 8X4 - {0b11110, 0b11111}
- { 30, 30 }, // 8X8 - {0b11110, 0b11110}
- { 30, 28 }, // 8X16 - {0b11110, 0b11100}
- { 28, 30 }, // 16X8 - {0b11100, 0b11110}
- { 28, 28 }, // 16X16 - {0b11100, 0b11100}
- { 28, 24 }, // 16X32 - {0b11100, 0b11000}
- { 24, 28 }, // 32X16 - {0b11000, 0b11100}
- { 24, 24 }, // 32X32 - {0b11000, 0b11000}
- { 24, 16 }, // 32X64 - {0b11000, 0b10000}
- { 16, 24 }, // 64X32 - {0b10000, 0b11000}
- { 16, 16 }, // 64X64 - {0b10000, 0b10000}
- { 16, 0 }, // 64X128- {0b10000, 0b00000}
- { 0, 16 }, // 128X64- {0b00000, 0b10000}
- { 0, 0 }, // 128X128-{0b00000, 0b00000}
- { 31, 28 }, // 4X16 - {0b11111, 0b11100}
- { 28, 31 }, // 16X4 - {0b11100, 0b11111}
- { 30, 24 }, // 8X32 - {0b11110, 0b11000}
- { 24, 30 }, // 32X8 - {0b11000, 0b11110}
- { 28, 16 }, // 16X64 - {0b11100, 0b10000}
- { 16, 28 }, // 64X16 - {0b10000, 0b11100}
-};
-/* clang-format on */
-
-static const int intra_mode_context[INTRA_MODES] = {
- 0, 1, 2, 3, 4, 4, 4, 4, 3, 0, 1, 2, 0,
-};
-
-// Note: this is also used in unit tests. So whenever one changes the table,
-// the unit tests need to be changed accordingly.
-static const int quant_dist_weight[4][2] = {
- { 2, 3 }, { 2, 5 }, { 2, 7 }, { 1, MAX_FRAME_DISTANCE }
-};
-static const int quant_dist_lookup_table[2][4][2] = {
- { { 9, 7 }, { 11, 5 }, { 12, 4 }, { 13, 3 } },
- { { 7, 9 }, { 5, 11 }, { 4, 12 }, { 3, 13 } },
-};
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_COMMON_COMMON_DATA_H_
diff --git a/third_party/aom/av1/common/convolve.c b/third_party/aom/av1/common/convolve.c
deleted file mode 100644
index 1f11126fc..000000000
--- a/third_party/aom/av1/common/convolve.c
+++ /dev/null
@@ -1,1295 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <string.h>
-
-#include "config/aom_dsp_rtcd.h"
-#include "config/av1_rtcd.h"
-
-#include "av1/common/blockd.h"
-#include "av1/common/convolve.h"
-#include "av1/common/filter.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/common/resize.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_ports/mem.h"
-
-void av1_convolve_horiz_rs_c(const uint8_t *src, int src_stride, uint8_t *dst,
- int dst_stride, int w, int h,
- const int16_t *x_filters, int x0_qn,
- int x_step_qn) {
- src -= UPSCALE_NORMATIVE_TAPS / 2 - 1;
- for (int y = 0; y < h; ++y) {
- int x_qn = x0_qn;
- for (int x = 0; x < w; ++x) {
- const uint8_t *const src_x = &src[x_qn >> RS_SCALE_SUBPEL_BITS];
- const int x_filter_idx =
- (x_qn & RS_SCALE_SUBPEL_MASK) >> RS_SCALE_EXTRA_BITS;
- assert(x_filter_idx <= RS_SUBPEL_MASK);
- const int16_t *const x_filter =
- &x_filters[x_filter_idx * UPSCALE_NORMATIVE_TAPS];
- int sum = 0;
- for (int k = 0; k < UPSCALE_NORMATIVE_TAPS; ++k)
- sum += src_x[k] * x_filter[k];
- dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
- x_qn += x_step_qn;
- }
- src += src_stride;
- dst += dst_stride;
- }
-}
-
-void av1_highbd_convolve_horiz_rs_c(const uint16_t *src, int src_stride,
- uint16_t *dst, int dst_stride, int w, int h,
- const int16_t *x_filters, int x0_qn,
- int x_step_qn, int bd) {
- src -= UPSCALE_NORMATIVE_TAPS / 2 - 1;
- for (int y = 0; y < h; ++y) {
- int x_qn = x0_qn;
- for (int x = 0; x < w; ++x) {
- const uint16_t *const src_x = &src[x_qn >> RS_SCALE_SUBPEL_BITS];
- const int x_filter_idx =
- (x_qn & RS_SCALE_SUBPEL_MASK) >> RS_SCALE_EXTRA_BITS;
- assert(x_filter_idx <= RS_SUBPEL_MASK);
- const int16_t *const x_filter =
- &x_filters[x_filter_idx * UPSCALE_NORMATIVE_TAPS];
- int sum = 0;
- for (int k = 0; k < UPSCALE_NORMATIVE_TAPS; ++k)
- sum += src_x[k] * x_filter[k];
- dst[x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
- x_qn += x_step_qn;
- }
- src += src_stride;
- dst += dst_stride;
- }
-}
-
-void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst,
- int dst_stride, int w, int h,
- const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y,
- const int subpel_x_q4, const int subpel_y_q4,
- ConvolveParams *conv_params) {
- int16_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
- int im_h = h + filter_params_y->taps - 1;
- int im_stride = w;
- const int fo_vert = filter_params_y->taps / 2 - 1;
- const int fo_horiz = filter_params_x->taps / 2 - 1;
- const int bd = 8;
- const int bits =
- FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1;
-
- // horizontal filter
- const uint8_t *src_horiz = src - fo_vert * src_stride;
- const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
- filter_params_x, subpel_x_q4 & SUBPEL_MASK);
- for (int y = 0; y < im_h; ++y) {
- for (int x = 0; x < w; ++x) {
- int32_t sum = (1 << (bd + FILTER_BITS - 1));
- for (int k = 0; k < filter_params_x->taps; ++k) {
- sum += x_filter[k] * src_horiz[y * src_stride + x - fo_horiz + k];
- }
- assert(0 <= sum && sum < (1 << (bd + FILTER_BITS + 1)));
- im_block[y * im_stride + x] =
- (int16_t)ROUND_POWER_OF_TWO(sum, conv_params->round_0);
- }
- }
-
- // vertical filter
- int16_t *src_vert = im_block + fo_vert * im_stride;
- const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
- filter_params_y, subpel_y_q4 & SUBPEL_MASK);
- const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
- for (int y = 0; y < h; ++y) {
- for (int x = 0; x < w; ++x) {
- int32_t sum = 1 << offset_bits;
- for (int k = 0; k < filter_params_y->taps; ++k) {
- sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x];
- }
- assert(0 <= sum && sum < (1 << (offset_bits + 2)));
- int16_t res = ROUND_POWER_OF_TWO(sum, conv_params->round_1) -
- ((1 << (offset_bits - conv_params->round_1)) +
- (1 << (offset_bits - conv_params->round_1 - 1)));
- dst[y * dst_stride + x] = clip_pixel(ROUND_POWER_OF_TWO(res, bits));
- }
- }
-}
-
-void av1_convolve_y_sr_c(const uint8_t *src, int src_stride, uint8_t *dst,
- int dst_stride, int w, int h,
- const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y,
- const int subpel_x_q4, const int subpel_y_q4,
- ConvolveParams *conv_params) {
- const int fo_vert = filter_params_y->taps / 2 - 1;
- (void)filter_params_x;
- (void)subpel_x_q4;
- (void)conv_params;
-
- assert(conv_params->round_0 <= FILTER_BITS);
- assert(((conv_params->round_0 + conv_params->round_1) <= (FILTER_BITS + 1)) ||
- ((conv_params->round_0 + conv_params->round_1) == (2 * FILTER_BITS)));
-
- // vertical filter
- const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
- filter_params_y, subpel_y_q4 & SUBPEL_MASK);
- for (int y = 0; y < h; ++y) {
- for (int x = 0; x < w; ++x) {
- int32_t res = 0;
- for (int k = 0; k < filter_params_y->taps; ++k) {
- res += y_filter[k] * src[(y - fo_vert + k) * src_stride + x];
- }
- dst[y * dst_stride + x] =
- clip_pixel(ROUND_POWER_OF_TWO(res, FILTER_BITS));
- }
- }
-}
-
-void av1_convolve_x_sr_c(const uint8_t *src, int src_stride, uint8_t *dst,
- int dst_stride, int w, int h,
- const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y,
- const int subpel_x_q4, const int subpel_y_q4,
- ConvolveParams *conv_params) {
- const int fo_horiz = filter_params_x->taps / 2 - 1;
- const int bits = FILTER_BITS - conv_params->round_0;
- (void)filter_params_y;
- (void)subpel_y_q4;
- (void)conv_params;
-
- assert(bits >= 0);
- assert((FILTER_BITS - conv_params->round_1) >= 0 ||
- ((conv_params->round_0 + conv_params->round_1) == 2 * FILTER_BITS));
-
- // horizontal filter
- const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
- filter_params_x, subpel_x_q4 & SUBPEL_MASK);
-
- for (int y = 0; y < h; ++y) {
- for (int x = 0; x < w; ++x) {
- int32_t res = 0;
- for (int k = 0; k < filter_params_x->taps; ++k) {
- res += x_filter[k] * src[y * src_stride + x - fo_horiz + k];
- }
- res = ROUND_POWER_OF_TWO(res, conv_params->round_0);
- dst[y * dst_stride + x] = clip_pixel(ROUND_POWER_OF_TWO(res, bits));
- }
- }
-}
-
-void av1_convolve_2d_copy_sr_c(const uint8_t *src, int src_stride, uint8_t *dst,
- int dst_stride, int w, int h,
- const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y,
- const int subpel_x_q4, const int subpel_y_q4,
- ConvolveParams *conv_params) {
- (void)filter_params_x;
- (void)filter_params_y;
- (void)subpel_x_q4;
- (void)subpel_y_q4;
- (void)conv_params;
-
- for (int y = 0; y < h; ++y) {
- memcpy(dst + y * dst_stride, src + y * src_stride, w * sizeof(src[0]));
- }
-}
-
-void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst8,
- int dst8_stride, int w, int h,
- const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y,
- const int subpel_x_q4, const int subpel_y_q4,
- ConvolveParams *conv_params) {
- CONV_BUF_TYPE *dst = conv_params->dst;
- int dst_stride = conv_params->dst_stride;
- int16_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
- int im_h = h + filter_params_y->taps - 1;
- int im_stride = w;
- const int fo_vert = filter_params_y->taps / 2 - 1;
- const int fo_horiz = filter_params_x->taps / 2 - 1;
- const int bd = 8;
- const int round_bits =
- 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
-
- // horizontal filter
- const uint8_t *src_horiz = src - fo_vert * src_stride;
- const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
- filter_params_x, subpel_x_q4 & SUBPEL_MASK);
- for (int y = 0; y < im_h; ++y) {
- for (int x = 0; x < w; ++x) {
- int32_t sum = (1 << (bd + FILTER_BITS - 1));
- for (int k = 0; k < filter_params_x->taps; ++k) {
- sum += x_filter[k] * src_horiz[y * src_stride + x - fo_horiz + k];
- }
- assert(0 <= sum && sum < (1 << (bd + FILTER_BITS + 1)));
- im_block[y * im_stride + x] =
- (int16_t)ROUND_POWER_OF_TWO(sum, conv_params->round_0);
- }
- }
-
- // vertical filter
- int16_t *src_vert = im_block + fo_vert * im_stride;
- const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
- filter_params_y, subpel_y_q4 & SUBPEL_MASK);
- const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
- for (int y = 0; y < h; ++y) {
- for (int x = 0; x < w; ++x) {
- int32_t sum = 1 << offset_bits;
- for (int k = 0; k < filter_params_y->taps; ++k) {
- sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x];
- }
- assert(0 <= sum && sum < (1 << (offset_bits + 2)));
- CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1);
- if (conv_params->do_average) {
- int32_t tmp = dst[y * dst_stride + x];
- if (conv_params->use_jnt_comp_avg) {
- tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset;
- tmp = tmp >> DIST_PRECISION_BITS;
- } else {
- tmp += res;
- tmp = tmp >> 1;
- }
- tmp -= (1 << (offset_bits - conv_params->round_1)) +
- (1 << (offset_bits - conv_params->round_1 - 1));
- dst8[y * dst8_stride + x] =
- clip_pixel(ROUND_POWER_OF_TWO(tmp, round_bits));
- } else {
- dst[y * dst_stride + x] = res;
- }
- }
- }
-}
-
-void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst8,
- int dst8_stride, int w, int h,
- const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y,
- const int subpel_x_q4, const int subpel_y_q4,
- ConvolveParams *conv_params) {
- CONV_BUF_TYPE *dst = conv_params->dst;
- int dst_stride = conv_params->dst_stride;
- const int fo_vert = filter_params_y->taps / 2 - 1;
- const int bits = FILTER_BITS - conv_params->round_0;
- const int bd = 8;
- const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
- const int round_offset = (1 << (offset_bits - conv_params->round_1)) +
- (1 << (offset_bits - conv_params->round_1 - 1));
- const int round_bits =
- 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
- (void)filter_params_x;
- (void)subpel_x_q4;
-
- // vertical filter
- const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
- filter_params_y, subpel_y_q4 & SUBPEL_MASK);
- for (int y = 0; y < h; ++y) {
- for (int x = 0; x < w; ++x) {
- int32_t res = 0;
- for (int k = 0; k < filter_params_y->taps; ++k) {
- res += y_filter[k] * src[(y - fo_vert + k) * src_stride + x];
- }
- res *= (1 << bits);
- res = ROUND_POWER_OF_TWO(res, conv_params->round_1) + round_offset;
-
- if (conv_params->do_average) {
- int32_t tmp = dst[y * dst_stride + x];
- if (conv_params->use_jnt_comp_avg) {
- tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset;
- tmp = tmp >> DIST_PRECISION_BITS;
- } else {
- tmp += res;
- tmp = tmp >> 1;
- }
- tmp -= round_offset;
- dst8[y * dst8_stride + x] =
- clip_pixel(ROUND_POWER_OF_TWO(tmp, round_bits));
- } else {
- dst[y * dst_stride + x] = res;
- }
- }
- }
-}
-
-void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst8,
- int dst8_stride, int w, int h,
- const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y,
- const int subpel_x_q4, const int subpel_y_q4,
- ConvolveParams *conv_params) {
- CONV_BUF_TYPE *dst = conv_params->dst;
- int dst_stride = conv_params->dst_stride;
- const int fo_horiz = filter_params_x->taps / 2 - 1;
- const int bits = FILTER_BITS - conv_params->round_1;
- const int bd = 8;
- const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
- const int round_offset = (1 << (offset_bits - conv_params->round_1)) +
- (1 << (offset_bits - conv_params->round_1 - 1));
- const int round_bits =
- 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
- (void)filter_params_y;
- (void)subpel_y_q4;
-
- // horizontal filter
- const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
- filter_params_x, subpel_x_q4 & SUBPEL_MASK);
- for (int y = 0; y < h; ++y) {
- for (int x = 0; x < w; ++x) {
- int32_t res = 0;
- for (int k = 0; k < filter_params_x->taps; ++k) {
- res += x_filter[k] * src[y * src_stride + x - fo_horiz + k];
- }
- res = (1 << bits) * ROUND_POWER_OF_TWO(res, conv_params->round_0);
- res += round_offset;
-
- if (conv_params->do_average) {
- int32_t tmp = dst[y * dst_stride + x];
- if (conv_params->use_jnt_comp_avg) {
- tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset;
- tmp = tmp >> DIST_PRECISION_BITS;
- } else {
- tmp += res;
- tmp = tmp >> 1;
- }
- tmp -= round_offset;
- dst8[y * dst8_stride + x] =
- clip_pixel(ROUND_POWER_OF_TWO(tmp, round_bits));
- } else {
- dst[y * dst_stride + x] = res;
- }
- }
- }
-}
-
-void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride,
- uint8_t *dst8, int dst8_stride, int w, int h,
- const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y,
- const int subpel_x_q4, const int subpel_y_q4,
- ConvolveParams *conv_params) {
- CONV_BUF_TYPE *dst = conv_params->dst;
- int dst_stride = conv_params->dst_stride;
- const int bits =
- FILTER_BITS * 2 - conv_params->round_1 - conv_params->round_0;
- const int bd = 8;
- const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
- const int round_offset = (1 << (offset_bits - conv_params->round_1)) +
- (1 << (offset_bits - conv_params->round_1 - 1));
- (void)filter_params_x;
- (void)filter_params_y;
- (void)subpel_x_q4;
- (void)subpel_y_q4;
-
- for (int y = 0; y < h; ++y) {
- for (int x = 0; x < w; ++x) {
- CONV_BUF_TYPE res = src[y * src_stride + x] << bits;
- res += round_offset;
-
- if (conv_params->do_average) {
- int32_t tmp = dst[y * dst_stride + x];
- if (conv_params->use_jnt_comp_avg) {
- tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset;
- tmp = tmp >> DIST_PRECISION_BITS;
- } else {
- tmp += res;
- tmp = tmp >> 1;
- }
- tmp -= round_offset;
- dst8[y * dst8_stride + x] = clip_pixel(ROUND_POWER_OF_TWO(tmp, bits));
- } else {
- dst[y * dst_stride + x] = res;
- }
- }
- }
-}
-
-void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst8,
- int dst8_stride, int w, int h,
- const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y,
- const int subpel_x_qn, const int x_step_qn,
- const int subpel_y_qn, const int y_step_qn,
- ConvolveParams *conv_params) {
- int16_t im_block[(2 * MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE];
- int im_h = (((h - 1) * y_step_qn + subpel_y_qn) >> SCALE_SUBPEL_BITS) +
- filter_params_y->taps;
- CONV_BUF_TYPE *dst16 = conv_params->dst;
- const int dst16_stride = conv_params->dst_stride;
- const int bits =
- FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1;
- assert(bits >= 0);
- int im_stride = w;
- const int fo_vert = filter_params_y->taps / 2 - 1;
- const int fo_horiz = filter_params_x->taps / 2 - 1;
- const int bd = 8;
-
- // horizontal filter
- const uint8_t *src_horiz = src - fo_vert * src_stride;
- for (int y = 0; y < im_h; ++y) {
- int x_qn = subpel_x_qn;
- for (int x = 0; x < w; ++x, x_qn += x_step_qn) {
- const uint8_t *const src_x = &src_horiz[(x_qn >> SCALE_SUBPEL_BITS)];
- const int x_filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
- assert(x_filter_idx < SUBPEL_SHIFTS);
- const int16_t *x_filter =
- av1_get_interp_filter_subpel_kernel(filter_params_x, x_filter_idx);
- int32_t sum = (1 << (bd + FILTER_BITS - 1));
- for (int k = 0; k < filter_params_x->taps; ++k) {
- sum += x_filter[k] * src_x[k - fo_horiz];
- }
- assert(0 <= sum && sum < (1 << (bd + FILTER_BITS + 1)));
- im_block[y * im_stride + x] =
- (int16_t)ROUND_POWER_OF_TWO(sum, conv_params->round_0);
- }
- src_horiz += src_stride;
- }
-
- // vertical filter
- int16_t *src_vert = im_block + fo_vert * im_stride;
- const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
- for (int x = 0; x < w; ++x) {
- int y_qn = subpel_y_qn;
- for (int y = 0; y < h; ++y, y_qn += y_step_qn) {
- const int16_t *src_y = &src_vert[(y_qn >> SCALE_SUBPEL_BITS) * im_stride];
- const int y_filter_idx = (y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
- assert(y_filter_idx < SUBPEL_SHIFTS);
- const int16_t *y_filter =
- av1_get_interp_filter_subpel_kernel(filter_params_y, y_filter_idx);
- int32_t sum = 1 << offset_bits;
- for (int k = 0; k < filter_params_y->taps; ++k) {
- sum += y_filter[k] * src_y[(k - fo_vert) * im_stride];
- }
- assert(0 <= sum && sum < (1 << (offset_bits + 2)));
- CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1);
- if (conv_params->is_compound) {
- if (conv_params->do_average) {
- int32_t tmp = dst16[y * dst16_stride + x];
- if (conv_params->use_jnt_comp_avg) {
- tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset;
- tmp = tmp >> DIST_PRECISION_BITS;
- } else {
- tmp += res;
- tmp = tmp >> 1;
- }
- /* Subtract round offset and convolve round */
- tmp = tmp - ((1 << (offset_bits - conv_params->round_1)) +
- (1 << (offset_bits - conv_params->round_1 - 1)));
- dst8[y * dst8_stride + x] = clip_pixel(ROUND_POWER_OF_TWO(tmp, bits));
- } else {
- dst16[y * dst16_stride + x] = res;
- }
- } else {
- /* Subtract round offset and convolve round */
- int32_t tmp = res - ((1 << (offset_bits - conv_params->round_1)) +
- (1 << (offset_bits - conv_params->round_1 - 1)));
- dst8[y * dst8_stride + x] = clip_pixel(ROUND_POWER_OF_TWO(tmp, bits));
- }
- }
- src_vert++;
- }
-}
-
-static void convolve_2d_scale_wrapper(
- const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w,
- int h, const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y, const int subpel_x_qn,
- const int x_step_qn, const int subpel_y_qn, const int y_step_qn,
- ConvolveParams *conv_params) {
- if (conv_params->is_compound) {
- assert(conv_params->dst != NULL);
- }
- av1_convolve_2d_scale(src, src_stride, dst, dst_stride, w, h, filter_params_x,
- filter_params_y, subpel_x_qn, x_step_qn, subpel_y_qn,
- y_step_qn, conv_params);
-}
-
-// TODO(huisu@google.com): bilinear filtering only needs 2 taps in general. So
-// we may create optimized code to do 2-tap filtering for all bilinear filtering
-// usages, not just IntraBC.
-static void convolve_2d_for_intrabc(const uint8_t *src, int src_stride,
- uint8_t *dst, int dst_stride, int w, int h,
- int subpel_x_q4, int subpel_y_q4,
- ConvolveParams *conv_params) {
- const InterpFilterParams *filter_params_x =
- subpel_x_q4 ? &av1_intrabc_filter_params : NULL;
- const InterpFilterParams *filter_params_y =
- subpel_y_q4 ? &av1_intrabc_filter_params : NULL;
- if (subpel_x_q4 != 0 && subpel_y_q4 != 0) {
- av1_convolve_2d_sr_c(src, src_stride, dst, dst_stride, w, h,
- filter_params_x, filter_params_y, 0, 0, conv_params);
- } else if (subpel_x_q4 != 0) {
- av1_convolve_x_sr_c(src, src_stride, dst, dst_stride, w, h, filter_params_x,
- filter_params_y, 0, 0, conv_params);
- } else {
- av1_convolve_y_sr_c(src, src_stride, dst, dst_stride, w, h, filter_params_x,
- filter_params_y, 0, 0, conv_params);
- }
-}
-
-void av1_convolve_2d_facade(const uint8_t *src, int src_stride, uint8_t *dst,
- int dst_stride, int w, int h,
- InterpFilters interp_filters, const int subpel_x_q4,
- int x_step_q4, const int subpel_y_q4, int y_step_q4,
- int scaled, ConvolveParams *conv_params,
- const struct scale_factors *sf, int is_intrabc) {
- assert(IMPLIES(is_intrabc, !scaled));
- (void)x_step_q4;
- (void)y_step_q4;
- (void)dst;
- (void)dst_stride;
-
- if (is_intrabc && (subpel_x_q4 != 0 || subpel_y_q4 != 0)) {
- convolve_2d_for_intrabc(src, src_stride, dst, dst_stride, w, h, subpel_x_q4,
- subpel_y_q4, conv_params);
- return;
- }
-
- InterpFilter filter_x = 0;
- InterpFilter filter_y = 0;
- const int need_filter_params_x = (subpel_x_q4 != 0) | scaled;
- const int need_filter_params_y = (subpel_y_q4 != 0) | scaled;
- if (need_filter_params_x)
- filter_x = av1_extract_interp_filter(interp_filters, 1);
- if (need_filter_params_y)
- filter_y = av1_extract_interp_filter(interp_filters, 0);
- const InterpFilterParams *filter_params_x =
- need_filter_params_x
- ? av1_get_interp_filter_params_with_block_size(filter_x, w)
- : NULL;
- const InterpFilterParams *filter_params_y =
- need_filter_params_y
- ? av1_get_interp_filter_params_with_block_size(filter_y, h)
- : NULL;
-
- if (scaled) {
- convolve_2d_scale_wrapper(src, src_stride, dst, dst_stride, w, h,
- filter_params_x, filter_params_y, subpel_x_q4,
- x_step_q4, subpel_y_q4, y_step_q4, conv_params);
- } else {
- sf->convolve[subpel_x_q4 != 0][subpel_y_q4 != 0][conv_params->is_compound](
- src, src_stride, dst, dst_stride, w, h, filter_params_x,
- filter_params_y, subpel_x_q4, subpel_y_q4, conv_params);
- }
-}
-
-void av1_highbd_convolve_2d_copy_sr_c(
- const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w,
- int h, const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y, const int subpel_x_q4,
- const int subpel_y_q4, ConvolveParams *conv_params, int bd) {
- (void)filter_params_x;
- (void)filter_params_y;
- (void)subpel_x_q4;
- (void)subpel_y_q4;
- (void)conv_params;
- (void)bd;
-
- for (int y = 0; y < h; ++y) {
- memcpy(dst + y * dst_stride, src + y * src_stride, w * sizeof(src[0]));
- }
-}
-
-void av1_highbd_convolve_x_sr_c(const uint16_t *src, int src_stride,
- uint16_t *dst, int dst_stride, int w, int h,
- const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y,
- const int subpel_x_q4, const int subpel_y_q4,
- ConvolveParams *conv_params, int bd) {
- const int fo_horiz = filter_params_x->taps / 2 - 1;
- const int bits = FILTER_BITS - conv_params->round_0;
- (void)filter_params_y;
- (void)subpel_y_q4;
-
- assert(bits >= 0);
- assert((FILTER_BITS - conv_params->round_1) >= 0 ||
- ((conv_params->round_0 + conv_params->round_1) == 2 * FILTER_BITS));
-
- // horizontal filter
- const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
- filter_params_x, subpel_x_q4 & SUBPEL_MASK);
- for (int y = 0; y < h; ++y) {
- for (int x = 0; x < w; ++x) {
- int32_t res = 0;
- for (int k = 0; k < filter_params_x->taps; ++k) {
- res += x_filter[k] * src[y * src_stride + x - fo_horiz + k];
- }
- res = ROUND_POWER_OF_TWO(res, conv_params->round_0);
- dst[y * dst_stride + x] =
- clip_pixel_highbd(ROUND_POWER_OF_TWO(res, bits), bd);
- }
- }
-}
-
-void av1_highbd_convolve_y_sr_c(const uint16_t *src, int src_stride,
- uint16_t *dst, int dst_stride, int w, int h,
- const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y,
- const int subpel_x_q4, const int subpel_y_q4,
- ConvolveParams *conv_params, int bd) {
- const int fo_vert = filter_params_y->taps / 2 - 1;
- (void)filter_params_x;
- (void)subpel_x_q4;
- (void)conv_params;
-
- assert(conv_params->round_0 <= FILTER_BITS);
- assert(((conv_params->round_0 + conv_params->round_1) <= (FILTER_BITS + 1)) ||
- ((conv_params->round_0 + conv_params->round_1) == (2 * FILTER_BITS)));
- // vertical filter
- const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
- filter_params_y, subpel_y_q4 & SUBPEL_MASK);
- for (int y = 0; y < h; ++y) {
- for (int x = 0; x < w; ++x) {
- int32_t res = 0;
- for (int k = 0; k < filter_params_y->taps; ++k) {
- res += y_filter[k] * src[(y - fo_vert + k) * src_stride + x];
- }
- dst[y * dst_stride + x] =
- clip_pixel_highbd(ROUND_POWER_OF_TWO(res, FILTER_BITS), bd);
- }
- }
-}
-
-void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride,
- uint16_t *dst, int dst_stride, int w, int h,
- const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y,
- const int subpel_x_q4, const int subpel_y_q4,
- ConvolveParams *conv_params, int bd) {
- int16_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
- int im_h = h + filter_params_y->taps - 1;
- int im_stride = w;
- const int fo_vert = filter_params_y->taps / 2 - 1;
- const int fo_horiz = filter_params_x->taps / 2 - 1;
- const int bits =
- FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1;
- assert(bits >= 0);
-
- // horizontal filter
- const uint16_t *src_horiz = src - fo_vert * src_stride;
- const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
- filter_params_x, subpel_x_q4 & SUBPEL_MASK);
- for (int y = 0; y < im_h; ++y) {
- for (int x = 0; x < w; ++x) {
- int32_t sum = (1 << (bd + FILTER_BITS - 1));
- for (int k = 0; k < filter_params_x->taps; ++k) {
- sum += x_filter[k] * src_horiz[y * src_stride + x - fo_horiz + k];
- }
- assert(0 <= sum && sum < (1 << (bd + FILTER_BITS + 1)));
- im_block[y * im_stride + x] =
- ROUND_POWER_OF_TWO(sum, conv_params->round_0);
- }
- }
-
- // vertical filter
- int16_t *src_vert = im_block + fo_vert * im_stride;
- const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
- filter_params_y, subpel_y_q4 & SUBPEL_MASK);
- const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
- for (int y = 0; y < h; ++y) {
- for (int x = 0; x < w; ++x) {
- int32_t sum = 1 << offset_bits;
- for (int k = 0; k < filter_params_y->taps; ++k) {
- sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x];
- }
- assert(0 <= sum && sum < (1 << (offset_bits + 2)));
- int32_t res = ROUND_POWER_OF_TWO(sum, conv_params->round_1) -
- ((1 << (offset_bits - conv_params->round_1)) +
- (1 << (offset_bits - conv_params->round_1 - 1)));
- dst[y * dst_stride + x] =
- clip_pixel_highbd(ROUND_POWER_OF_TWO(res, bits), bd);
- }
- }
-}
-
-void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride,
- uint16_t *dst16, int dst16_stride, int w,
- int h,
- const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y,
- const int subpel_x_q4, const int subpel_y_q4,
- ConvolveParams *conv_params, int bd) {
- int x, y, k;
- int16_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
- CONV_BUF_TYPE *dst = conv_params->dst;
- int dst_stride = conv_params->dst_stride;
- int im_h = h + filter_params_y->taps - 1;
- int im_stride = w;
- const int fo_vert = filter_params_y->taps / 2 - 1;
- const int fo_horiz = filter_params_x->taps / 2 - 1;
- const int round_bits =
- 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
- assert(round_bits >= 0);
-
- // horizontal filter
- const uint16_t *src_horiz = src - fo_vert * src_stride;
- const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
- filter_params_x, subpel_x_q4 & SUBPEL_MASK);
- for (y = 0; y < im_h; ++y) {
- for (x = 0; x < w; ++x) {
- int32_t sum = (1 << (bd + FILTER_BITS - 1));
- for (k = 0; k < filter_params_x->taps; ++k) {
- sum += x_filter[k] * src_horiz[y * src_stride + x - fo_horiz + k];
- }
- assert(0 <= sum && sum < (1 << (bd + FILTER_BITS + 1)));
- (void)bd;
- im_block[y * im_stride + x] =
- (int16_t)ROUND_POWER_OF_TWO(sum, conv_params->round_0);
- }
- }
-
- // vertical filter
- int16_t *src_vert = im_block + fo_vert * im_stride;
- const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
- const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
- filter_params_y, subpel_y_q4 & SUBPEL_MASK);
- for (y = 0; y < h; ++y) {
- for (x = 0; x < w; ++x) {
- int32_t sum = 1 << offset_bits;
- for (k = 0; k < filter_params_y->taps; ++k) {
- sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x];
- }
- assert(0 <= sum && sum < (1 << (offset_bits + 2)));
- CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1);
- if (conv_params->do_average) {
- int32_t tmp = dst[y * dst_stride + x];
- if (conv_params->use_jnt_comp_avg) {
- tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset;
- tmp = tmp >> DIST_PRECISION_BITS;
- } else {
- tmp += res;
- tmp = tmp >> 1;
- }
- tmp -= (1 << (offset_bits - conv_params->round_1)) +
- (1 << (offset_bits - conv_params->round_1 - 1));
- dst16[y * dst16_stride + x] =
- clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp, round_bits), bd);
- } else {
- dst[y * dst_stride + x] = res;
- }
- }
- }
-}
-
-void av1_highbd_jnt_convolve_x_c(const uint16_t *src, int src_stride,
- uint16_t *dst16, int dst16_stride, int w,
- int h,
- const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y,
- const int subpel_x_q4, const int subpel_y_q4,
- ConvolveParams *conv_params, int bd) {
- CONV_BUF_TYPE *dst = conv_params->dst;
- int dst_stride = conv_params->dst_stride;
- const int fo_horiz = filter_params_x->taps / 2 - 1;
- const int bits = FILTER_BITS - conv_params->round_1;
- const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
- const int round_offset = (1 << (offset_bits - conv_params->round_1)) +
- (1 << (offset_bits - conv_params->round_1 - 1));
- const int round_bits =
- 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
- assert(round_bits >= 0);
- (void)filter_params_y;
- (void)subpel_y_q4;
- assert(bits >= 0);
- // horizontal filter
- const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
- filter_params_x, subpel_x_q4 & SUBPEL_MASK);
- for (int y = 0; y < h; ++y) {
- for (int x = 0; x < w; ++x) {
- int32_t res = 0;
- for (int k = 0; k < filter_params_x->taps; ++k) {
- res += x_filter[k] * src[y * src_stride + x - fo_horiz + k];
- }
- res = (1 << bits) * ROUND_POWER_OF_TWO(res, conv_params->round_0);
- res += round_offset;
-
- if (conv_params->do_average) {
- int32_t tmp = dst[y * dst_stride + x];
- if (conv_params->use_jnt_comp_avg) {
- tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset;
- tmp = tmp >> DIST_PRECISION_BITS;
- } else {
- tmp += res;
- tmp = tmp >> 1;
- }
- tmp -= round_offset;
- dst16[y * dst16_stride + x] =
- clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp, round_bits), bd);
- } else {
- dst[y * dst_stride + x] = res;
- }
- }
- }
-}
-
-void av1_highbd_jnt_convolve_y_c(const uint16_t *src, int src_stride,
- uint16_t *dst16, int dst16_stride, int w,
- int h,
- const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y,
- const int subpel_x_q4, const int subpel_y_q4,
- ConvolveParams *conv_params, int bd) {
- CONV_BUF_TYPE *dst = conv_params->dst;
- int dst_stride = conv_params->dst_stride;
- const int fo_vert = filter_params_y->taps / 2 - 1;
- const int bits = FILTER_BITS - conv_params->round_0;
- const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
- const int round_offset = (1 << (offset_bits - conv_params->round_1)) +
- (1 << (offset_bits - conv_params->round_1 - 1));
- const int round_bits =
- 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
- assert(round_bits >= 0);
- (void)filter_params_x;
- (void)subpel_x_q4;
- assert(bits >= 0);
- // vertical filter
- const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
- filter_params_y, subpel_y_q4 & SUBPEL_MASK);
- for (int y = 0; y < h; ++y) {
- for (int x = 0; x < w; ++x) {
- int32_t res = 0;
- for (int k = 0; k < filter_params_y->taps; ++k) {
- res += y_filter[k] * src[(y - fo_vert + k) * src_stride + x];
- }
- res *= (1 << bits);
- res = ROUND_POWER_OF_TWO(res, conv_params->round_1) + round_offset;
-
- if (conv_params->do_average) {
- int32_t tmp = dst[y * dst_stride + x];
- if (conv_params->use_jnt_comp_avg) {
- tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset;
- tmp = tmp >> DIST_PRECISION_BITS;
- } else {
- tmp += res;
- tmp = tmp >> 1;
- }
- tmp -= round_offset;
- dst16[y * dst16_stride + x] =
- clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp, round_bits), bd);
- } else {
- dst[y * dst_stride + x] = res;
- }
- }
- }
-}
-
-void av1_highbd_jnt_convolve_2d_copy_c(
- const uint16_t *src, int src_stride, uint16_t *dst16, int dst16_stride,
- int w, int h, const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y, const int subpel_x_q4,
- const int subpel_y_q4, ConvolveParams *conv_params, int bd) {
- CONV_BUF_TYPE *dst = conv_params->dst;
- int dst_stride = conv_params->dst_stride;
- const int bits =
- FILTER_BITS * 2 - conv_params->round_1 - conv_params->round_0;
- const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
- const int round_offset = (1 << (offset_bits - conv_params->round_1)) +
- (1 << (offset_bits - conv_params->round_1 - 1));
- assert(bits >= 0);
- (void)filter_params_x;
- (void)filter_params_y;
- (void)subpel_x_q4;
- (void)subpel_y_q4;
-
- for (int y = 0; y < h; ++y) {
- for (int x = 0; x < w; ++x) {
- CONV_BUF_TYPE res = src[y * src_stride + x] << bits;
- res += round_offset;
- if (conv_params->do_average) {
- int32_t tmp = dst[y * dst_stride + x];
- if (conv_params->use_jnt_comp_avg) {
- tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset;
- tmp = tmp >> DIST_PRECISION_BITS;
- } else {
- tmp += res;
- tmp = tmp >> 1;
- }
- tmp -= round_offset;
- dst16[y * dst16_stride + x] =
- clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp, bits), bd);
- } else {
- dst[y * dst_stride + x] = res;
- }
- }
- }
-}
-
-void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride,
- uint16_t *dst, int dst_stride, int w, int h,
- const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y,
- const int subpel_x_qn, const int x_step_qn,
- const int subpel_y_qn, const int y_step_qn,
- ConvolveParams *conv_params, int bd) {
- int16_t im_block[(2 * MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE];
- int im_h = (((h - 1) * y_step_qn + subpel_y_qn) >> SCALE_SUBPEL_BITS) +
- filter_params_y->taps;
- int im_stride = w;
- const int fo_vert = filter_params_y->taps / 2 - 1;
- const int fo_horiz = filter_params_x->taps / 2 - 1;
- CONV_BUF_TYPE *dst16 = conv_params->dst;
- const int dst16_stride = conv_params->dst_stride;
- const int bits =
- FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1;
- assert(bits >= 0);
- // horizontal filter
- const uint16_t *src_horiz = src - fo_vert * src_stride;
- for (int y = 0; y < im_h; ++y) {
- int x_qn = subpel_x_qn;
- for (int x = 0; x < w; ++x, x_qn += x_step_qn) {
- const uint16_t *const src_x = &src_horiz[(x_qn >> SCALE_SUBPEL_BITS)];
- const int x_filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
- assert(x_filter_idx < SUBPEL_SHIFTS);
- const int16_t *x_filter =
- av1_get_interp_filter_subpel_kernel(filter_params_x, x_filter_idx);
- int32_t sum = (1 << (bd + FILTER_BITS - 1));
- for (int k = 0; k < filter_params_x->taps; ++k) {
- sum += x_filter[k] * src_x[k - fo_horiz];
- }
- assert(0 <= sum && sum < (1 << (bd + FILTER_BITS + 1)));
- im_block[y * im_stride + x] =
- (int16_t)ROUND_POWER_OF_TWO(sum, conv_params->round_0);
- }
- src_horiz += src_stride;
- }
-
- // vertical filter
- int16_t *src_vert = im_block + fo_vert * im_stride;
- const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
- for (int x = 0; x < w; ++x) {
- int y_qn = subpel_y_qn;
- for (int y = 0; y < h; ++y, y_qn += y_step_qn) {
- const int16_t *src_y = &src_vert[(y_qn >> SCALE_SUBPEL_BITS) * im_stride];
- const int y_filter_idx = (y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
- assert(y_filter_idx < SUBPEL_SHIFTS);
- const int16_t *y_filter =
- av1_get_interp_filter_subpel_kernel(filter_params_y, y_filter_idx);
- int32_t sum = 1 << offset_bits;
- for (int k = 0; k < filter_params_y->taps; ++k) {
- sum += y_filter[k] * src_y[(k - fo_vert) * im_stride];
- }
- assert(0 <= sum && sum < (1 << (offset_bits + 2)));
- CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1);
- if (conv_params->is_compound) {
- if (conv_params->do_average) {
- int32_t tmp = dst16[y * dst16_stride + x];
- if (conv_params->use_jnt_comp_avg) {
- tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset;
- tmp = tmp >> DIST_PRECISION_BITS;
- } else {
- tmp += res;
- tmp = tmp >> 1;
- }
- /* Subtract round offset and convolve round */
- tmp = tmp - ((1 << (offset_bits - conv_params->round_1)) +
- (1 << (offset_bits - conv_params->round_1 - 1)));
- dst[y * dst_stride + x] =
- clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp, bits), bd);
- } else {
- dst16[y * dst16_stride + x] = res;
- }
- } else {
- /* Subtract round offset and convolve round */
- int32_t tmp = res - ((1 << (offset_bits - conv_params->round_1)) +
- (1 << (offset_bits - conv_params->round_1 - 1)));
- dst[y * dst_stride + x] =
- clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp, bits), bd);
- }
- }
- src_vert++;
- }
-}
-
-static void highbd_convolve_2d_for_intrabc(const uint16_t *src, int src_stride,
- uint16_t *dst, int dst_stride, int w,
- int h, int subpel_x_q4,
- int subpel_y_q4,
- ConvolveParams *conv_params,
- int bd) {
- const InterpFilterParams *filter_params_x =
- subpel_x_q4 ? &av1_intrabc_filter_params : NULL;
- const InterpFilterParams *filter_params_y =
- subpel_y_q4 ? &av1_intrabc_filter_params : NULL;
- if (subpel_x_q4 != 0 && subpel_y_q4 != 0) {
- av1_highbd_convolve_2d_sr_c(src, src_stride, dst, dst_stride, w, h,
- filter_params_x, filter_params_y, 0, 0,
- conv_params, bd);
- } else if (subpel_x_q4 != 0) {
- av1_highbd_convolve_x_sr_c(src, src_stride, dst, dst_stride, w, h,
- filter_params_x, filter_params_y, 0, 0,
- conv_params, bd);
- } else {
- av1_highbd_convolve_y_sr_c(src, src_stride, dst, dst_stride, w, h,
- filter_params_x, filter_params_y, 0, 0,
- conv_params, bd);
- }
-}
-
-void av1_highbd_convolve_2d_facade(const uint8_t *src8, int src_stride,
- uint8_t *dst8, int dst_stride, int w, int h,
- InterpFilters interp_filters,
- const int subpel_x_q4, int x_step_q4,
- const int subpel_y_q4, int y_step_q4,
- int scaled, ConvolveParams *conv_params,
- const struct scale_factors *sf,
- int is_intrabc, int bd) {
- assert(IMPLIES(is_intrabc, !scaled));
- (void)x_step_q4;
- (void)y_step_q4;
- (void)dst_stride;
- const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
-
- if (is_intrabc && (subpel_x_q4 != 0 || subpel_y_q4 != 0)) {
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
- highbd_convolve_2d_for_intrabc(src, src_stride, dst, dst_stride, w, h,
- subpel_x_q4, subpel_y_q4, conv_params, bd);
- return;
- }
-
- InterpFilter filter_x = 0;
- InterpFilter filter_y = 0;
- const int need_filter_params_x = (subpel_x_q4 != 0) | scaled;
- const int need_filter_params_y = (subpel_y_q4 != 0) | scaled;
- if (need_filter_params_x)
- filter_x = av1_extract_interp_filter(interp_filters, 1);
- if (need_filter_params_y)
- filter_y = av1_extract_interp_filter(interp_filters, 0);
- const InterpFilterParams *filter_params_x =
- need_filter_params_x
- ? av1_get_interp_filter_params_with_block_size(filter_x, w)
- : NULL;
- const InterpFilterParams *filter_params_y =
- need_filter_params_y
- ? av1_get_interp_filter_params_with_block_size(filter_y, h)
- : NULL;
-
- if (scaled) {
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
- if (conv_params->is_compound) {
- assert(conv_params->dst != NULL);
- }
- av1_highbd_convolve_2d_scale(src, src_stride, dst, dst_stride, w, h,
- filter_params_x, filter_params_y, subpel_x_q4,
- x_step_q4, subpel_y_q4, y_step_q4, conv_params,
- bd);
- } else {
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
-
- sf->highbd_convolve[subpel_x_q4 != 0][subpel_y_q4 !=
- 0][conv_params->is_compound](
- src, src_stride, dst, dst_stride, w, h, filter_params_x,
- filter_params_y, subpel_x_q4, subpel_y_q4, conv_params, bd);
- }
-}
-
-// Note: Fixed size intermediate buffers, place limits on parameters
-// of some functions. 2d filtering proceeds in 2 steps:
-// (1) Interpolate horizontally into an intermediate buffer, temp.
-// (2) Interpolate temp vertically to derive the sub-pixel result.
-// Deriving the maximum number of rows in the temp buffer (135):
-// --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative).
-// --Largest block size is 128x128 pixels.
-// --128 rows in the downscaled frame span a distance of (128 - 1) * 32 in the
-// original frame (in 1/16th pixel units).
-// --Must round-up because block may be located at sub-pixel position.
-// --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
-// --((128 - 1) * 32 + 15) >> 4 + 8 = 263.
-#define WIENER_MAX_EXT_SIZE 263
-
-static INLINE int horz_scalar_product(const uint8_t *a, const int16_t *b) {
- int sum = 0;
- for (int k = 0; k < SUBPEL_TAPS; ++k) sum += a[k] * b[k];
- return sum;
-}
-
-static INLINE int highbd_horz_scalar_product(const uint16_t *a,
- const int16_t *b) {
- int sum = 0;
- for (int k = 0; k < SUBPEL_TAPS; ++k) sum += a[k] * b[k];
- return sum;
-}
-
-static INLINE int highbd_vert_scalar_product(const uint16_t *a,
- ptrdiff_t a_stride,
- const int16_t *b) {
- int sum = 0;
- for (int k = 0; k < SUBPEL_TAPS; ++k) sum += a[k * a_stride] * b[k];
- return sum;
-}
-
-static const InterpKernel *get_filter_base(const int16_t *filter) {
- // NOTE: This assumes that the filter table is 256-byte aligned.
- // TODO(agrange) Modify to make independent of table alignment.
- return (const InterpKernel *)(((intptr_t)filter) & ~((intptr_t)0xFF));
-}
-
-static int get_filter_offset(const int16_t *f, const InterpKernel *base) {
- return (int)((const InterpKernel *)(intptr_t)f - base);
-}
-
-static void convolve_add_src_horiz_hip(const uint8_t *src, ptrdiff_t src_stride,
- uint16_t *dst, ptrdiff_t dst_stride,
- const InterpKernel *x_filters, int x0_q4,
- int x_step_q4, int w, int h,
- int round0_bits) {
- const int bd = 8;
- src -= SUBPEL_TAPS / 2 - 1;
- for (int y = 0; y < h; ++y) {
- int x_q4 = x0_q4;
- for (int x = 0; x < w; ++x) {
- const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
- const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
- const int rounding = ((int)src_x[SUBPEL_TAPS / 2 - 1] << FILTER_BITS) +
- (1 << (bd + FILTER_BITS - 1));
- const int sum = horz_scalar_product(src_x, x_filter) + rounding;
- dst[x] = (uint16_t)clamp(ROUND_POWER_OF_TWO(sum, round0_bits), 0,
- WIENER_CLAMP_LIMIT(round0_bits, bd) - 1);
- x_q4 += x_step_q4;
- }
- src += src_stride;
- dst += dst_stride;
- }
-}
-
-static void convolve_add_src_vert_hip(const uint16_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const InterpKernel *y_filters, int y0_q4,
- int y_step_q4, int w, int h,
- int round1_bits) {
- const int bd = 8;
- src -= src_stride * (SUBPEL_TAPS / 2 - 1);
-
- for (int x = 0; x < w; ++x) {
- int y_q4 = y0_q4;
- for (int y = 0; y < h; ++y) {
- const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
- const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
- const int rounding =
- ((int)src_y[(SUBPEL_TAPS / 2 - 1) * src_stride] << FILTER_BITS) -
- (1 << (bd + round1_bits - 1));
- const int sum =
- highbd_vert_scalar_product(src_y, src_stride, y_filter) + rounding;
- dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, round1_bits));
- y_q4 += y_step_q4;
- }
- ++src;
- ++dst;
- }
-}
-
-void av1_wiener_convolve_add_src_c(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h,
- const ConvolveParams *conv_params) {
- const InterpKernel *const filters_x = get_filter_base(filter_x);
- const int x0_q4 = get_filter_offset(filter_x, filters_x);
-
- const InterpKernel *const filters_y = get_filter_base(filter_y);
- const int y0_q4 = get_filter_offset(filter_y, filters_y);
-
- uint16_t temp[WIENER_MAX_EXT_SIZE * MAX_SB_SIZE];
- const int intermediate_height =
- (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS - 1;
- memset(temp + (intermediate_height * MAX_SB_SIZE), 0, MAX_SB_SIZE);
-
- assert(w <= MAX_SB_SIZE);
- assert(h <= MAX_SB_SIZE);
- assert(y_step_q4 <= 32);
- assert(x_step_q4 <= 32);
-
- convolve_add_src_horiz_hip(src - src_stride * (SUBPEL_TAPS / 2 - 1),
- src_stride, temp, MAX_SB_SIZE, filters_x, x0_q4,
- x_step_q4, w, intermediate_height,
- conv_params->round_0);
- convolve_add_src_vert_hip(temp + MAX_SB_SIZE * (SUBPEL_TAPS / 2 - 1),
- MAX_SB_SIZE, dst, dst_stride, filters_y, y0_q4,
- y_step_q4, w, h, conv_params->round_1);
-}
-
-static void highbd_convolve_add_src_horiz_hip(
- const uint8_t *src8, ptrdiff_t src_stride, uint16_t *dst,
- ptrdiff_t dst_stride, const InterpKernel *x_filters, int x0_q4,
- int x_step_q4, int w, int h, int round0_bits, int bd) {
- const int extraprec_clamp_limit = WIENER_CLAMP_LIMIT(round0_bits, bd);
- uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- src -= SUBPEL_TAPS / 2 - 1;
- for (int y = 0; y < h; ++y) {
- int x_q4 = x0_q4;
- for (int x = 0; x < w; ++x) {
- const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
- const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
- const int rounding = ((int)src_x[SUBPEL_TAPS / 2 - 1] << FILTER_BITS) +
- (1 << (bd + FILTER_BITS - 1));
- const int sum = highbd_horz_scalar_product(src_x, x_filter) + rounding;
- dst[x] = (uint16_t)clamp(ROUND_POWER_OF_TWO(sum, round0_bits), 0,
- extraprec_clamp_limit - 1);
- x_q4 += x_step_q4;
- }
- src += src_stride;
- dst += dst_stride;
- }
-}
-
-static void highbd_convolve_add_src_vert_hip(
- const uint16_t *src, ptrdiff_t src_stride, uint8_t *dst8,
- ptrdiff_t dst_stride, const InterpKernel *y_filters, int y0_q4,
- int y_step_q4, int w, int h, int round1_bits, int bd) {
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
- src -= src_stride * (SUBPEL_TAPS / 2 - 1);
- for (int x = 0; x < w; ++x) {
- int y_q4 = y0_q4;
- for (int y = 0; y < h; ++y) {
- const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
- const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
- const int rounding =
- ((int)src_y[(SUBPEL_TAPS / 2 - 1) * src_stride] << FILTER_BITS) -
- (1 << (bd + round1_bits - 1));
- const int sum =
- highbd_vert_scalar_product(src_y, src_stride, y_filter) + rounding;
- dst[y * dst_stride] =
- clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, round1_bits), bd);
- y_q4 += y_step_q4;
- }
- ++src;
- ++dst;
- }
-}
-
-void av1_highbd_wiener_convolve_add_src_c(
- const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
- ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4, int w, int h,
- const ConvolveParams *conv_params, int bd) {
- const InterpKernel *const filters_x = get_filter_base(filter_x);
- const int x0_q4 = get_filter_offset(filter_x, filters_x);
-
- const InterpKernel *const filters_y = get_filter_base(filter_y);
- const int y0_q4 = get_filter_offset(filter_y, filters_y);
-
- uint16_t temp[WIENER_MAX_EXT_SIZE * MAX_SB_SIZE];
- const int intermediate_height =
- (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;
-
- assert(w <= MAX_SB_SIZE);
- assert(h <= MAX_SB_SIZE);
- assert(y_step_q4 <= 32);
- assert(x_step_q4 <= 32);
- assert(bd + FILTER_BITS - conv_params->round_0 + 2 <= 16);
-
- highbd_convolve_add_src_horiz_hip(src - src_stride * (SUBPEL_TAPS / 2 - 1),
- src_stride, temp, MAX_SB_SIZE, filters_x,
- x0_q4, x_step_q4, w, intermediate_height,
- conv_params->round_0, bd);
- highbd_convolve_add_src_vert_hip(
- temp + MAX_SB_SIZE * (SUBPEL_TAPS / 2 - 1), MAX_SB_SIZE, dst, dst_stride,
- filters_y, y0_q4, y_step_q4, w, h, conv_params->round_1, bd);
-}
diff --git a/third_party/aom/av1/common/convolve.h b/third_party/aom/av1/common/convolve.h
deleted file mode 100644
index 4109dd843..000000000
--- a/third_party/aom/av1/common/convolve.h
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_COMMON_CONVOLVE_H_
-#define AOM_AV1_COMMON_CONVOLVE_H_
-#include "av1/common/filter.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef uint16_t CONV_BUF_TYPE;
-typedef struct ConvolveParams {
- int do_average;
- CONV_BUF_TYPE *dst;
- int dst_stride;
- int round_0;
- int round_1;
- int plane;
- int is_compound;
- int use_jnt_comp_avg;
- int fwd_offset;
- int bck_offset;
-} ConvolveParams;
-
-#define ROUND0_BITS 3
-#define COMPOUND_ROUND1_BITS 7
-#define WIENER_ROUND0_BITS 3
-
-#define WIENER_CLAMP_LIMIT(r0, bd) (1 << ((bd) + 1 + FILTER_BITS - r0))
-
-typedef void (*aom_convolve_fn_t)(const uint8_t *src, int src_stride,
- uint8_t *dst, int dst_stride, int w, int h,
- const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y,
- const int subpel_x_q4, const int subpel_y_q4,
- ConvolveParams *conv_params);
-
-typedef void (*aom_highbd_convolve_fn_t)(
- const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w,
- int h, const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y, const int subpel_x_q4,
- const int subpel_y_q4, ConvolveParams *conv_params, int bd);
-
-struct AV1Common;
-struct scale_factors;
-
-void av1_convolve_2d_facade(const uint8_t *src, int src_stride, uint8_t *dst,
- int dst_stride, int w, int h,
- InterpFilters interp_filters, const int subpel_x_q4,
- int x_step_q4, const int subpel_y_q4, int y_step_q4,
- int scaled, ConvolveParams *conv_params,
- const struct scale_factors *sf, int is_intrabc);
-
-static INLINE ConvolveParams get_conv_params_no_round(int do_average, int plane,
- CONV_BUF_TYPE *dst,
- int dst_stride,
- int is_compound, int bd) {
- ConvolveParams conv_params;
- conv_params.do_average = do_average;
- assert(IMPLIES(do_average, is_compound));
- conv_params.is_compound = is_compound;
- conv_params.round_0 = ROUND0_BITS;
- conv_params.round_1 = is_compound ? COMPOUND_ROUND1_BITS
- : 2 * FILTER_BITS - conv_params.round_0;
- const int intbufrange = bd + FILTER_BITS - conv_params.round_0 + 2;
- assert(IMPLIES(bd < 12, intbufrange <= 16));
- if (intbufrange > 16) {
- conv_params.round_0 += intbufrange - 16;
- if (!is_compound) conv_params.round_1 -= intbufrange - 16;
- }
- // TODO(yunqing): The following dst should only be valid while
- // is_compound = 1;
- conv_params.dst = dst;
- conv_params.dst_stride = dst_stride;
- conv_params.plane = plane;
- return conv_params;
-}
-
-static INLINE ConvolveParams get_conv_params(int do_average, int plane,
- int bd) {
- return get_conv_params_no_round(do_average, plane, NULL, 0, 0, bd);
-}
-
-static INLINE ConvolveParams get_conv_params_wiener(int bd) {
- ConvolveParams conv_params;
- (void)bd;
- conv_params.do_average = 0;
- conv_params.is_compound = 0;
- conv_params.round_0 = WIENER_ROUND0_BITS;
- conv_params.round_1 = 2 * FILTER_BITS - conv_params.round_0;
- const int intbufrange = bd + FILTER_BITS - conv_params.round_0 + 2;
- assert(IMPLIES(bd < 12, intbufrange <= 16));
- if (intbufrange > 16) {
- conv_params.round_0 += intbufrange - 16;
- conv_params.round_1 -= intbufrange - 16;
- }
- conv_params.dst = NULL;
- conv_params.dst_stride = 0;
- conv_params.plane = 0;
- return conv_params;
-}
-
-void av1_highbd_convolve_2d_facade(const uint8_t *src8, int src_stride,
- uint8_t *dst, int dst_stride, int w, int h,
- InterpFilters interp_filters,
- const int subpel_x_q4, int x_step_q4,
- const int subpel_y_q4, int y_step_q4,
- int scaled, ConvolveParams *conv_params,
- const struct scale_factors *sf,
- int is_intrabc, int bd);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_COMMON_CONVOLVE_H_
diff --git a/third_party/aom/av1/common/debugmodes.c b/third_party/aom/av1/common/debugmodes.c
deleted file mode 100644
index 868f341b5..000000000
--- a/third_party/aom/av1/common/debugmodes.c
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <stdio.h>
-
-#include "av1/common/blockd.h"
-#include "av1/common/enums.h"
-#include "av1/common/onyxc_int.h"
-
-static void log_frame_info(AV1_COMMON *cm, const char *str, FILE *f) {
- fprintf(f, "%s", str);
- fprintf(f, "(Frame %d, Show:%d, Q:%d): \n", cm->current_video_frame,
- cm->show_frame, cm->base_qindex);
-}
-/* This function dereferences a pointer to the mbmi structure
- * and uses the passed in member offset to print out the value of an integer
- * for each mbmi member value in the mi structure.
- */
-static void print_mi_data(AV1_COMMON *cm, FILE *file, const char *descriptor,
- size_t member_offset) {
- int mi_row, mi_col;
- MB_MODE_INFO **mi = cm->mi_grid_visible;
- int rows = cm->mi_rows;
- int cols = cm->mi_cols;
- char prefix = descriptor[0];
-
- log_frame_info(cm, descriptor, file);
- for (mi_row = 0; mi_row < rows; mi_row++) {
- fprintf(file, "%c ", prefix);
- for (mi_col = 0; mi_col < cols; mi_col++) {
- fprintf(file, "%2d ", *((char *)((char *)(mi[0]) + member_offset)));
- mi++;
- }
- fprintf(file, "\n");
- mi += MAX_MIB_SIZE;
- }
- fprintf(file, "\n");
-}
-
-void av1_print_modes_and_motion_vectors(AV1_COMMON *cm, const char *file) {
- int mi_row;
- int mi_col;
- FILE *mvs = fopen(file, "a");
- MB_MODE_INFO **mi = cm->mi_grid_visible;
- int rows = cm->mi_rows;
- int cols = cm->mi_cols;
-
- print_mi_data(cm, mvs, "Partitions:", offsetof(MB_MODE_INFO, sb_type));
- print_mi_data(cm, mvs, "Modes:", offsetof(MB_MODE_INFO, mode));
- print_mi_data(cm, mvs, "Ref frame:", offsetof(MB_MODE_INFO, ref_frame[0]));
- print_mi_data(cm, mvs, "Transform:", offsetof(MB_MODE_INFO, tx_size));
- print_mi_data(cm, mvs, "UV Modes:", offsetof(MB_MODE_INFO, uv_mode));
-
- // output skip infomation.
- log_frame_info(cm, "Skips:", mvs);
- for (mi_row = 0; mi_row < rows; mi_row++) {
- fprintf(mvs, "S ");
- for (mi_col = 0; mi_col < cols; mi_col++) {
- fprintf(mvs, "%2d ", mi[0]->skip);
- mi++;
- }
- fprintf(mvs, "\n");
- mi += MAX_MIB_SIZE;
- }
- fprintf(mvs, "\n");
-
- // output motion vectors.
- log_frame_info(cm, "Vectors ", mvs);
- mi = cm->mi_grid_visible;
- for (mi_row = 0; mi_row < rows; mi_row++) {
- fprintf(mvs, "V ");
- for (mi_col = 0; mi_col < cols; mi_col++) {
- fprintf(mvs, "%4d:%4d ", mi[0]->mv[0].as_mv.row, mi[0]->mv[0].as_mv.col);
- mi++;
- }
- fprintf(mvs, "\n");
- mi += MAX_MIB_SIZE;
- }
- fprintf(mvs, "\n");
-
- fclose(mvs);
-}
-
-void av1_print_uncompressed_frame_header(const uint8_t *data, int size,
- const char *filename) {
- FILE *hdrFile = fopen(filename, "w");
- fwrite(data, size, sizeof(uint8_t), hdrFile);
- fclose(hdrFile);
-}
-
-void av1_print_frame_contexts(const FRAME_CONTEXT *fc, const char *filename) {
- FILE *fcFile = fopen(filename, "w");
- const uint16_t *fcp = (uint16_t *)fc;
- const unsigned int n_contexts = sizeof(FRAME_CONTEXT) / sizeof(uint16_t);
- unsigned int i;
-
- for (i = 0; i < n_contexts; ++i) fprintf(fcFile, "%d ", *fcp++);
- fclose(fcFile);
-}
diff --git a/third_party/aom/av1/common/entropy.c b/third_party/aom/av1/common/entropy.c
deleted file mode 100644
index 4f95ef69b..000000000
--- a/third_party/aom/av1/common/entropy.c
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "config/aom_config.h"
-
-#include "aom/aom_integer.h"
-#include "aom_mem/aom_mem.h"
-#include "av1/common/blockd.h"
-#include "av1/common/entropy.h"
-#include "av1/common/entropymode.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/common/scan.h"
-#include "av1/common/token_cdfs.h"
-#include "av1/common/txb_common.h"
-
-static int get_q_ctx(int q) {
- if (q <= 20) return 0;
- if (q <= 60) return 1;
- if (q <= 120) return 2;
- return 3;
-}
-
-void av1_default_coef_probs(AV1_COMMON *cm) {
- const int index = get_q_ctx(cm->base_qindex);
-#if CONFIG_ENTROPY_STATS
- cm->coef_cdf_category = index;
-#endif
-
- av1_copy(cm->fc->txb_skip_cdf, av1_default_txb_skip_cdfs[index]);
- av1_copy(cm->fc->eob_extra_cdf, av1_default_eob_extra_cdfs[index]);
- av1_copy(cm->fc->dc_sign_cdf, av1_default_dc_sign_cdfs[index]);
- av1_copy(cm->fc->coeff_br_cdf, av1_default_coeff_lps_multi_cdfs[index]);
- av1_copy(cm->fc->coeff_base_cdf, av1_default_coeff_base_multi_cdfs[index]);
- av1_copy(cm->fc->coeff_base_eob_cdf,
- av1_default_coeff_base_eob_multi_cdfs[index]);
- av1_copy(cm->fc->eob_flag_cdf16, av1_default_eob_multi16_cdfs[index]);
- av1_copy(cm->fc->eob_flag_cdf32, av1_default_eob_multi32_cdfs[index]);
- av1_copy(cm->fc->eob_flag_cdf64, av1_default_eob_multi64_cdfs[index]);
- av1_copy(cm->fc->eob_flag_cdf128, av1_default_eob_multi128_cdfs[index]);
- av1_copy(cm->fc->eob_flag_cdf256, av1_default_eob_multi256_cdfs[index]);
- av1_copy(cm->fc->eob_flag_cdf512, av1_default_eob_multi512_cdfs[index]);
- av1_copy(cm->fc->eob_flag_cdf1024, av1_default_eob_multi1024_cdfs[index]);
-}
-
-static void reset_cdf_symbol_counter(aom_cdf_prob *cdf_ptr, int num_cdfs,
- int cdf_stride, int nsymbs) {
- for (int i = 0; i < num_cdfs; i++) {
- cdf_ptr[i * cdf_stride + nsymbs] = 0;
- }
-}
-
-#define RESET_CDF_COUNTER(cname, nsymbs) \
- RESET_CDF_COUNTER_STRIDE(cname, nsymbs, CDF_SIZE(nsymbs))
-
-#define RESET_CDF_COUNTER_STRIDE(cname, nsymbs, cdf_stride) \
- do { \
- aom_cdf_prob *cdf_ptr = (aom_cdf_prob *)cname; \
- int array_size = (int)sizeof(cname) / sizeof(aom_cdf_prob); \
- int num_cdfs = array_size / cdf_stride; \
- reset_cdf_symbol_counter(cdf_ptr, num_cdfs, cdf_stride, nsymbs); \
- } while (0)
-
-static void reset_nmv_counter(nmv_context *nmv) {
- RESET_CDF_COUNTER(nmv->joints_cdf, 4);
- for (int i = 0; i < 2; i++) {
- RESET_CDF_COUNTER(nmv->comps[i].classes_cdf, MV_CLASSES);
- RESET_CDF_COUNTER(nmv->comps[i].class0_fp_cdf, MV_FP_SIZE);
- RESET_CDF_COUNTER(nmv->comps[i].fp_cdf, MV_FP_SIZE);
- RESET_CDF_COUNTER(nmv->comps[i].sign_cdf, 2);
- RESET_CDF_COUNTER(nmv->comps[i].class0_hp_cdf, 2);
- RESET_CDF_COUNTER(nmv->comps[i].hp_cdf, 2);
- RESET_CDF_COUNTER(nmv->comps[i].class0_cdf, CLASS0_SIZE);
- RESET_CDF_COUNTER(nmv->comps[i].bits_cdf, 2);
- }
-}
-
-void av1_reset_cdf_symbol_counters(FRAME_CONTEXT *fc) {
- RESET_CDF_COUNTER(fc->txb_skip_cdf, 2);
- RESET_CDF_COUNTER(fc->eob_extra_cdf, 2);
- RESET_CDF_COUNTER(fc->dc_sign_cdf, 2);
- RESET_CDF_COUNTER(fc->eob_flag_cdf16, 5);
- RESET_CDF_COUNTER(fc->eob_flag_cdf32, 6);
- RESET_CDF_COUNTER(fc->eob_flag_cdf64, 7);
- RESET_CDF_COUNTER(fc->eob_flag_cdf128, 8);
- RESET_CDF_COUNTER(fc->eob_flag_cdf256, 9);
- RESET_CDF_COUNTER(fc->eob_flag_cdf512, 10);
- RESET_CDF_COUNTER(fc->eob_flag_cdf1024, 11);
- RESET_CDF_COUNTER(fc->coeff_base_eob_cdf, 3);
- RESET_CDF_COUNTER(fc->coeff_base_cdf, 4);
- RESET_CDF_COUNTER(fc->coeff_br_cdf, BR_CDF_SIZE);
- RESET_CDF_COUNTER(fc->newmv_cdf, 2);
- RESET_CDF_COUNTER(fc->zeromv_cdf, 2);
- RESET_CDF_COUNTER(fc->refmv_cdf, 2);
- RESET_CDF_COUNTER(fc->drl_cdf, 2);
- RESET_CDF_COUNTER(fc->inter_compound_mode_cdf, INTER_COMPOUND_MODES);
- RESET_CDF_COUNTER(fc->compound_type_cdf, COMPOUND_TYPES - 1);
- RESET_CDF_COUNTER(fc->wedge_idx_cdf, 16);
- RESET_CDF_COUNTER(fc->interintra_cdf, 2);
- RESET_CDF_COUNTER(fc->wedge_interintra_cdf, 2);
- RESET_CDF_COUNTER(fc->interintra_mode_cdf, INTERINTRA_MODES);
- RESET_CDF_COUNTER(fc->motion_mode_cdf, MOTION_MODES);
- RESET_CDF_COUNTER(fc->obmc_cdf, 2);
- RESET_CDF_COUNTER(fc->palette_y_size_cdf, PALETTE_SIZES);
- RESET_CDF_COUNTER(fc->palette_uv_size_cdf, PALETTE_SIZES);
- for (int j = 0; j < PALETTE_SIZES; j++) {
- int nsymbs = j + PALETTE_MIN_SIZE;
- RESET_CDF_COUNTER_STRIDE(fc->palette_y_color_index_cdf[j], nsymbs,
- CDF_SIZE(PALETTE_COLORS));
- RESET_CDF_COUNTER_STRIDE(fc->palette_uv_color_index_cdf[j], nsymbs,
- CDF_SIZE(PALETTE_COLORS));
- }
- RESET_CDF_COUNTER(fc->palette_y_mode_cdf, 2);
- RESET_CDF_COUNTER(fc->palette_uv_mode_cdf, 2);
- RESET_CDF_COUNTER(fc->comp_inter_cdf, 2);
- RESET_CDF_COUNTER(fc->single_ref_cdf, 2);
- RESET_CDF_COUNTER(fc->comp_ref_type_cdf, 2);
- RESET_CDF_COUNTER(fc->uni_comp_ref_cdf, 2);
- RESET_CDF_COUNTER(fc->comp_ref_cdf, 2);
- RESET_CDF_COUNTER(fc->comp_bwdref_cdf, 2);
- RESET_CDF_COUNTER(fc->txfm_partition_cdf, 2);
- RESET_CDF_COUNTER(fc->compound_index_cdf, 2);
- RESET_CDF_COUNTER(fc->comp_group_idx_cdf, 2);
- RESET_CDF_COUNTER(fc->skip_mode_cdfs, 2);
- RESET_CDF_COUNTER(fc->skip_cdfs, 2);
- RESET_CDF_COUNTER(fc->intra_inter_cdf, 2);
- reset_nmv_counter(&fc->nmvc);
- reset_nmv_counter(&fc->ndvc);
- RESET_CDF_COUNTER(fc->intrabc_cdf, 2);
- RESET_CDF_COUNTER(fc->seg.tree_cdf, MAX_SEGMENTS);
- RESET_CDF_COUNTER(fc->seg.pred_cdf, 2);
- RESET_CDF_COUNTER(fc->seg.spatial_pred_seg_cdf, MAX_SEGMENTS);
- RESET_CDF_COUNTER(fc->filter_intra_cdfs, 2);
- RESET_CDF_COUNTER(fc->filter_intra_mode_cdf, FILTER_INTRA_MODES);
- RESET_CDF_COUNTER(fc->switchable_restore_cdf, RESTORE_SWITCHABLE_TYPES);
- RESET_CDF_COUNTER(fc->wiener_restore_cdf, 2);
- RESET_CDF_COUNTER(fc->sgrproj_restore_cdf, 2);
- RESET_CDF_COUNTER(fc->y_mode_cdf, INTRA_MODES);
- RESET_CDF_COUNTER_STRIDE(fc->uv_mode_cdf[0], UV_INTRA_MODES - 1,
- CDF_SIZE(UV_INTRA_MODES));
- RESET_CDF_COUNTER(fc->uv_mode_cdf[1], UV_INTRA_MODES);
- for (int i = 0; i < PARTITION_CONTEXTS; i++) {
- if (i < 4) {
- RESET_CDF_COUNTER_STRIDE(fc->partition_cdf[i], 4, CDF_SIZE(10));
- } else if (i < 16) {
- RESET_CDF_COUNTER(fc->partition_cdf[i], 10);
- } else {
- RESET_CDF_COUNTER_STRIDE(fc->partition_cdf[i], 8, CDF_SIZE(10));
- }
- }
- RESET_CDF_COUNTER(fc->switchable_interp_cdf, SWITCHABLE_FILTERS);
- RESET_CDF_COUNTER(fc->kf_y_cdf, INTRA_MODES);
- RESET_CDF_COUNTER(fc->angle_delta_cdf, 2 * MAX_ANGLE_DELTA + 1);
- RESET_CDF_COUNTER_STRIDE(fc->tx_size_cdf[0], MAX_TX_DEPTH,
- CDF_SIZE(MAX_TX_DEPTH + 1));
- RESET_CDF_COUNTER(fc->tx_size_cdf[1], MAX_TX_DEPTH + 1);
- RESET_CDF_COUNTER(fc->tx_size_cdf[2], MAX_TX_DEPTH + 1);
- RESET_CDF_COUNTER(fc->tx_size_cdf[3], MAX_TX_DEPTH + 1);
- RESET_CDF_COUNTER(fc->delta_q_cdf, DELTA_Q_PROBS + 1);
- RESET_CDF_COUNTER(fc->delta_lf_cdf, DELTA_LF_PROBS + 1);
- for (int i = 0; i < FRAME_LF_COUNT; i++) {
- RESET_CDF_COUNTER(fc->delta_lf_multi_cdf[i], DELTA_LF_PROBS + 1);
- }
- RESET_CDF_COUNTER_STRIDE(fc->intra_ext_tx_cdf[1], 7, CDF_SIZE(TX_TYPES));
- RESET_CDF_COUNTER_STRIDE(fc->intra_ext_tx_cdf[2], 5, CDF_SIZE(TX_TYPES));
- RESET_CDF_COUNTER_STRIDE(fc->inter_ext_tx_cdf[1], 16, CDF_SIZE(TX_TYPES));
- RESET_CDF_COUNTER_STRIDE(fc->inter_ext_tx_cdf[2], 12, CDF_SIZE(TX_TYPES));
- RESET_CDF_COUNTER_STRIDE(fc->inter_ext_tx_cdf[3], 2, CDF_SIZE(TX_TYPES));
- RESET_CDF_COUNTER(fc->cfl_sign_cdf, CFL_JOINT_SIGNS);
- RESET_CDF_COUNTER(fc->cfl_alpha_cdf, CFL_ALPHABET_SIZE);
-}
diff --git a/third_party/aom/av1/common/entropy.h b/third_party/aom/av1/common/entropy.h
deleted file mode 100644
index 991692c2f..000000000
--- a/third_party/aom/av1/common/entropy.h
+++ /dev/null
@@ -1,181 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_COMMON_ENTROPY_H_
-#define AOM_AV1_COMMON_ENTROPY_H_
-
-#include "config/aom_config.h"
-
-#include "aom/aom_integer.h"
-#include "aom_dsp/prob.h"
-
-#include "av1/common/common.h"
-#include "av1/common/common_data.h"
-#include "av1/common/enums.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define TOKEN_CDF_Q_CTXS 4
-
-#define TXB_SKIP_CONTEXTS 13
-
-#define EOB_COEF_CONTEXTS 9
-
-#define SIG_COEF_CONTEXTS_2D 26
-#define SIG_COEF_CONTEXTS_1D 16
-#define SIG_COEF_CONTEXTS_EOB 4
-#define SIG_COEF_CONTEXTS (SIG_COEF_CONTEXTS_2D + SIG_COEF_CONTEXTS_1D)
-
-#define COEFF_BASE_CONTEXTS (SIG_COEF_CONTEXTS)
-#define DC_SIGN_CONTEXTS 3
-
-#define BR_TMP_OFFSET 12
-#define BR_REF_CAT 4
-#define LEVEL_CONTEXTS 21
-
-#define NUM_BASE_LEVELS 2
-
-#define BR_CDF_SIZE (4)
-#define COEFF_BASE_RANGE (4 * (BR_CDF_SIZE - 1))
-
-#define COEFF_CONTEXT_BITS 6
-#define COEFF_CONTEXT_MASK ((1 << COEFF_CONTEXT_BITS) - 1)
-#define MAX_BASE_BR_RANGE (COEFF_BASE_RANGE + NUM_BASE_LEVELS + 1)
-
-#define BASE_CONTEXT_POSITION_NUM 12
-
-typedef enum TX_CLASS {
- TX_CLASS_2D = 0,
- TX_CLASS_HORIZ = 1,
- TX_CLASS_VERT = 2,
- TX_CLASSES = 3,
-} TX_CLASS;
-
-#define DCT_MAX_VALUE 16384
-#define DCT_MAX_VALUE_HIGH10 65536
-#define DCT_MAX_VALUE_HIGH12 262144
-
-/* Coefficients are predicted via a 3-dimensional probability table indexed on
- * REF_TYPES, COEF_BANDS and COEF_CONTEXTS. */
-#define REF_TYPES 2 // intra=0, inter=1
-
-struct AV1Common;
-struct frame_contexts;
-void av1_reset_cdf_symbol_counters(struct frame_contexts *fc);
-void av1_default_coef_probs(struct AV1Common *cm);
-
-struct frame_contexts;
-
-typedef char ENTROPY_CONTEXT;
-
-static INLINE int combine_entropy_contexts(ENTROPY_CONTEXT a,
- ENTROPY_CONTEXT b) {
- return (a != 0) + (b != 0);
-}
-
-static INLINE int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
- const ENTROPY_CONTEXT *l) {
- ENTROPY_CONTEXT above_ec = 0, left_ec = 0;
-
- switch (tx_size) {
- case TX_4X4:
- above_ec = a[0] != 0;
- left_ec = l[0] != 0;
- break;
- case TX_4X8:
- above_ec = a[0] != 0;
- left_ec = !!*(const uint16_t *)l;
- break;
- case TX_8X4:
- above_ec = !!*(const uint16_t *)a;
- left_ec = l[0] != 0;
- break;
- case TX_8X16:
- above_ec = !!*(const uint16_t *)a;
- left_ec = !!*(const uint32_t *)l;
- break;
- case TX_16X8:
- above_ec = !!*(const uint32_t *)a;
- left_ec = !!*(const uint16_t *)l;
- break;
- case TX_16X32:
- above_ec = !!*(const uint32_t *)a;
- left_ec = !!*(const uint64_t *)l;
- break;
- case TX_32X16:
- above_ec = !!*(const uint64_t *)a;
- left_ec = !!*(const uint32_t *)l;
- break;
- case TX_8X8:
- above_ec = !!*(const uint16_t *)a;
- left_ec = !!*(const uint16_t *)l;
- break;
- case TX_16X16:
- above_ec = !!*(const uint32_t *)a;
- left_ec = !!*(const uint32_t *)l;
- break;
- case TX_32X32:
- above_ec = !!*(const uint64_t *)a;
- left_ec = !!*(const uint64_t *)l;
- break;
- case TX_64X64:
- above_ec = !!(*(const uint64_t *)a | *(const uint64_t *)(a + 8));
- left_ec = !!(*(const uint64_t *)l | *(const uint64_t *)(l + 8));
- break;
- case TX_32X64:
- above_ec = !!*(const uint64_t *)a;
- left_ec = !!(*(const uint64_t *)l | *(const uint64_t *)(l + 8));
- break;
- case TX_64X32:
- above_ec = !!(*(const uint64_t *)a | *(const uint64_t *)(a + 8));
- left_ec = !!*(const uint64_t *)l;
- break;
- case TX_4X16:
- above_ec = a[0] != 0;
- left_ec = !!*(const uint32_t *)l;
- break;
- case TX_16X4:
- above_ec = !!*(const uint32_t *)a;
- left_ec = l[0] != 0;
- break;
- case TX_8X32:
- above_ec = !!*(const uint16_t *)a;
- left_ec = !!*(const uint64_t *)l;
- break;
- case TX_32X8:
- above_ec = !!*(const uint64_t *)a;
- left_ec = !!*(const uint16_t *)l;
- break;
- case TX_16X64:
- above_ec = !!*(const uint32_t *)a;
- left_ec = !!(*(const uint64_t *)l | *(const uint64_t *)(l + 8));
- break;
- case TX_64X16:
- above_ec = !!(*(const uint64_t *)a | *(const uint64_t *)(a + 8));
- left_ec = !!*(const uint32_t *)l;
- break;
- default: assert(0 && "Invalid transform size."); break;
- }
- return combine_entropy_contexts(above_ec, left_ec);
-}
-
-static INLINE TX_SIZE get_txsize_entropy_ctx(TX_SIZE txsize) {
- return (TX_SIZE)((txsize_sqr_map[txsize] + txsize_sqr_up_map[txsize] + 1) >>
- 1);
-}
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_COMMON_ENTROPY_H_
diff --git a/third_party/aom/av1/common/entropymode.c b/third_party/aom/av1/common/entropymode.c
deleted file mode 100644
index 41dc30ddb..000000000
--- a/third_party/aom/av1/common/entropymode.c
+++ /dev/null
@@ -1,1103 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "aom_mem/aom_mem.h"
-
-#include "av1/common/reconinter.h"
-#include "av1/common/scan.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/common/seg_common.h"
-#include "av1/common/txb_common.h"
-
-static const aom_cdf_prob
- default_kf_y_mode_cdf[KF_MODE_CONTEXTS][KF_MODE_CONTEXTS][CDF_SIZE(
- INTRA_MODES)] = {
- { { AOM_CDF13(15588, 17027, 19338, 20218, 20682, 21110, 21825, 23244,
- 24189, 28165, 29093, 30466) },
- { AOM_CDF13(12016, 18066, 19516, 20303, 20719, 21444, 21888, 23032,
- 24434, 28658, 30172, 31409) },
- { AOM_CDF13(10052, 10771, 22296, 22788, 23055, 23239, 24133, 25620,
- 26160, 29336, 29929, 31567) },
- { AOM_CDF13(14091, 15406, 16442, 18808, 19136, 19546, 19998, 22096,
- 24746, 29585, 30958, 32462) },
- { AOM_CDF13(12122, 13265, 15603, 16501, 18609, 20033, 22391, 25583,
- 26437, 30261, 31073, 32475) } },
- { { AOM_CDF13(10023, 19585, 20848, 21440, 21832, 22760, 23089, 24023,
- 25381, 29014, 30482, 31436) },
- { AOM_CDF13(5983, 24099, 24560, 24886, 25066, 25795, 25913, 26423,
- 27610, 29905, 31276, 31794) },
- { AOM_CDF13(7444, 12781, 20177, 20728, 21077, 21607, 22170, 23405,
- 24469, 27915, 29090, 30492) },
- { AOM_CDF13(8537, 14689, 15432, 17087, 17408, 18172, 18408, 19825,
- 24649, 29153, 31096, 32210) },
- { AOM_CDF13(7543, 14231, 15496, 16195, 17905, 20717, 21984, 24516,
- 26001, 29675, 30981, 31994) } },
- { { AOM_CDF13(12613, 13591, 21383, 22004, 22312, 22577, 23401, 25055,
- 25729, 29538, 30305, 32077) },
- { AOM_CDF13(9687, 13470, 18506, 19230, 19604, 20147, 20695, 22062,
- 23219, 27743, 29211, 30907) },
- { AOM_CDF13(6183, 6505, 26024, 26252, 26366, 26434, 27082, 28354, 28555,
- 30467, 30794, 32086) },
- { AOM_CDF13(10718, 11734, 14954, 17224, 17565, 17924, 18561, 21523,
- 23878, 28975, 30287, 32252) },
- { AOM_CDF13(9194, 9858, 16501, 17263, 18424, 19171, 21563, 25961, 26561,
- 30072, 30737, 32463) } },
- { { AOM_CDF13(12602, 14399, 15488, 18381, 18778, 19315, 19724, 21419,
- 25060, 29696, 30917, 32409) },
- { AOM_CDF13(8203, 13821, 14524, 17105, 17439, 18131, 18404, 19468,
- 25225, 29485, 31158, 32342) },
- { AOM_CDF13(8451, 9731, 15004, 17643, 18012, 18425, 19070, 21538, 24605,
- 29118, 30078, 32018) },
- { AOM_CDF13(7714, 9048, 9516, 16667, 16817, 16994, 17153, 18767, 26743,
- 30389, 31536, 32528) },
- { AOM_CDF13(8843, 10280, 11496, 15317, 16652, 17943, 19108, 22718,
- 25769, 29953, 30983, 32485) } },
- { { AOM_CDF13(12578, 13671, 15979, 16834, 19075, 20913, 22989, 25449,
- 26219, 30214, 31150, 32477) },
- { AOM_CDF13(9563, 13626, 15080, 15892, 17756, 20863, 22207, 24236,
- 25380, 29653, 31143, 32277) },
- { AOM_CDF13(8356, 8901, 17616, 18256, 19350, 20106, 22598, 25947, 26466,
- 29900, 30523, 32261) },
- { AOM_CDF13(10835, 11815, 13124, 16042, 17018, 18039, 18947, 22753,
- 24615, 29489, 30883, 32482) },
- { AOM_CDF13(7618, 8288, 9859, 10509, 15386, 18657, 22903, 28776, 29180,
- 31355, 31802, 32593) } }
- };
-
-static const aom_cdf_prob default_angle_delta_cdf[DIRECTIONAL_MODES][CDF_SIZE(
- 2 * MAX_ANGLE_DELTA + 1)] = {
- { AOM_CDF7(2180, 5032, 7567, 22776, 26989, 30217) },
- { AOM_CDF7(2301, 5608, 8801, 23487, 26974, 30330) },
- { AOM_CDF7(3780, 11018, 13699, 19354, 23083, 31286) },
- { AOM_CDF7(4581, 11226, 15147, 17138, 21834, 28397) },
- { AOM_CDF7(1737, 10927, 14509, 19588, 22745, 28823) },
- { AOM_CDF7(2664, 10176, 12485, 17650, 21600, 30495) },
- { AOM_CDF7(2240, 11096, 15453, 20341, 22561, 28917) },
- { AOM_CDF7(3605, 10428, 12459, 17676, 21244, 30655) }
-};
-
-static const aom_cdf_prob default_if_y_mode_cdf[BLOCK_SIZE_GROUPS][CDF_SIZE(
- INTRA_MODES)] = { { AOM_CDF13(22801, 23489, 24293, 24756, 25601, 26123,
- 26606, 27418, 27945, 29228, 29685, 30349) },
- { AOM_CDF13(18673, 19845, 22631, 23318, 23950, 24649,
- 25527, 27364, 28152, 29701, 29984, 30852) },
- { AOM_CDF13(19770, 20979, 23396, 23939, 24241, 24654,
- 25136, 27073, 27830, 29360, 29730, 30659) },
- { AOM_CDF13(20155, 21301, 22838, 23178, 23261, 23533,
- 23703, 24804, 25352, 26575, 27016, 28049) } };
-
-static const aom_cdf_prob
- default_uv_mode_cdf[CFL_ALLOWED_TYPES][INTRA_MODES][CDF_SIZE(
- UV_INTRA_MODES)] = {
- { { AOM_CDF13(22631, 24152, 25378, 25661, 25986, 26520, 27055, 27923,
- 28244, 30059, 30941, 31961) },
- { AOM_CDF13(9513, 26881, 26973, 27046, 27118, 27664, 27739, 27824,
- 28359, 29505, 29800, 31796) },
- { AOM_CDF13(9845, 9915, 28663, 28704, 28757, 28780, 29198, 29822, 29854,
- 30764, 31777, 32029) },
- { AOM_CDF13(13639, 13897, 14171, 25331, 25606, 25727, 25953, 27148,
- 28577, 30612, 31355, 32493) },
- { AOM_CDF13(9764, 9835, 9930, 9954, 25386, 27053, 27958, 28148, 28243,
- 31101, 31744, 32363) },
- { AOM_CDF13(11825, 13589, 13677, 13720, 15048, 29213, 29301, 29458,
- 29711, 31161, 31441, 32550) },
- { AOM_CDF13(14175, 14399, 16608, 16821, 17718, 17775, 28551, 30200,
- 30245, 31837, 32342, 32667) },
- { AOM_CDF13(12885, 13038, 14978, 15590, 15673, 15748, 16176, 29128,
- 29267, 30643, 31961, 32461) },
- { AOM_CDF13(12026, 13661, 13874, 15305, 15490, 15726, 15995, 16273,
- 28443, 30388, 30767, 32416) },
- { AOM_CDF13(19052, 19840, 20579, 20916, 21150, 21467, 21885, 22719,
- 23174, 28861, 30379, 32175) },
- { AOM_CDF13(18627, 19649, 20974, 21219, 21492, 21816, 22199, 23119,
- 23527, 27053, 31397, 32148) },
- { AOM_CDF13(17026, 19004, 19997, 20339, 20586, 21103, 21349, 21907,
- 22482, 25896, 26541, 31819) },
- { AOM_CDF13(12124, 13759, 14959, 14992, 15007, 15051, 15078, 15166,
- 15255, 15753, 16039, 16606) } },
- { { AOM_CDF14(10407, 11208, 12900, 13181, 13823, 14175, 14899, 15656,
- 15986, 20086, 20995, 22455, 24212) },
- { AOM_CDF14(4532, 19780, 20057, 20215, 20428, 21071, 21199, 21451,
- 22099, 24228, 24693, 27032, 29472) },
- { AOM_CDF14(5273, 5379, 20177, 20270, 20385, 20439, 20949, 21695, 21774,
- 23138, 24256, 24703, 26679) },
- { AOM_CDF14(6740, 7167, 7662, 14152, 14536, 14785, 15034, 16741, 18371,
- 21520, 22206, 23389, 24182) },
- { AOM_CDF14(4987, 5368, 5928, 6068, 19114, 20315, 21857, 22253, 22411,
- 24911, 25380, 26027, 26376) },
- { AOM_CDF14(5370, 6889, 7247, 7393, 9498, 21114, 21402, 21753, 21981,
- 24780, 25386, 26517, 27176) },
- { AOM_CDF14(4816, 4961, 7204, 7326, 8765, 8930, 20169, 20682, 20803,
- 23188, 23763, 24455, 24940) },
- { AOM_CDF14(6608, 6740, 8529, 9049, 9257, 9356, 9735, 18827, 19059,
- 22336, 23204, 23964, 24793) },
- { AOM_CDF14(5998, 7419, 7781, 8933, 9255, 9549, 9753, 10417, 18898,
- 22494, 23139, 24764, 25989) },
- { AOM_CDF14(10660, 11298, 12550, 12957, 13322, 13624, 14040, 15004,
- 15534, 20714, 21789, 23443, 24861) },
- { AOM_CDF14(10522, 11530, 12552, 12963, 13378, 13779, 14245, 15235,
- 15902, 20102, 22696, 23774, 25838) },
- { AOM_CDF14(10099, 10691, 12639, 13049, 13386, 13665, 14125, 15163,
- 15636, 19676, 20474, 23519, 25208) },
- { AOM_CDF14(3144, 5087, 7382, 7504, 7593, 7690, 7801, 8064, 8232, 9248,
- 9875, 10521, 29048) } }
- };
-
-static const aom_cdf_prob default_partition_cdf[PARTITION_CONTEXTS][CDF_SIZE(
- EXT_PARTITION_TYPES)] = {
- { AOM_CDF4(19132, 25510, 30392) },
- { AOM_CDF4(13928, 19855, 28540) },
- { AOM_CDF4(12522, 23679, 28629) },
- { AOM_CDF4(9896, 18783, 25853) },
- { AOM_CDF10(15597, 20929, 24571, 26706, 27664, 28821, 29601, 30571, 31902) },
- { AOM_CDF10(7925, 11043, 16785, 22470, 23971, 25043, 26651, 28701, 29834) },
- { AOM_CDF10(5414, 13269, 15111, 20488, 22360, 24500, 25537, 26336, 32117) },
- { AOM_CDF10(2662, 6362, 8614, 20860, 23053, 24778, 26436, 27829, 31171) },
- { AOM_CDF10(18462, 20920, 23124, 27647, 28227, 29049, 29519, 30178, 31544) },
- { AOM_CDF10(7689, 9060, 12056, 24992, 25660, 26182, 26951, 28041, 29052) },
- { AOM_CDF10(6015, 9009, 10062, 24544, 25409, 26545, 27071, 27526, 32047) },
- { AOM_CDF10(1394, 2208, 2796, 28614, 29061, 29466, 29840, 30185, 31899) },
- { AOM_CDF10(20137, 21547, 23078, 29566, 29837, 30261, 30524, 30892, 31724) },
- { AOM_CDF10(6732, 7490, 9497, 27944, 28250, 28515, 28969, 29630, 30104) },
- { AOM_CDF10(5945, 7663, 8348, 28683, 29117, 29749, 30064, 30298, 32238) },
- { AOM_CDF10(870, 1212, 1487, 31198, 31394, 31574, 31743, 31881, 32332) },
- { AOM_CDF8(27899, 28219, 28529, 32484, 32539, 32619, 32639) },
- { AOM_CDF8(6607, 6990, 8268, 32060, 32219, 32338, 32371) },
- { AOM_CDF8(5429, 6676, 7122, 32027, 32227, 32531, 32582) },
- { AOM_CDF8(711, 966, 1172, 32448, 32538, 32617, 32664) },
-};
-
-static const aom_cdf_prob default_intra_ext_tx_cdf
- [EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES][CDF_SIZE(TX_TYPES)] = {
- {
- {
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- },
- {
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- },
- {
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- },
- {
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- },
- },
- {
- {
- { AOM_CDF7(1535, 8035, 9461, 12751, 23467, 27825) },
- { AOM_CDF7(564, 3335, 9709, 10870, 18143, 28094) },
- { AOM_CDF7(672, 3247, 3676, 11982, 19415, 23127) },
- { AOM_CDF7(5279, 13885, 15487, 18044, 23527, 30252) },
- { AOM_CDF7(4423, 6074, 7985, 10416, 25693, 29298) },
- { AOM_CDF7(1486, 4241, 9460, 10662, 16456, 27694) },
- { AOM_CDF7(439, 2838, 3522, 6737, 18058, 23754) },
- { AOM_CDF7(1190, 4233, 4855, 11670, 20281, 24377) },
- { AOM_CDF7(1045, 4312, 8647, 10159, 18644, 29335) },
- { AOM_CDF7(202, 3734, 4747, 7298, 17127, 24016) },
- { AOM_CDF7(447, 4312, 6819, 8884, 16010, 23858) },
- { AOM_CDF7(277, 4369, 5255, 8905, 16465, 22271) },
- { AOM_CDF7(3409, 5436, 10599, 15599, 19687, 24040) },
- },
- {
- { AOM_CDF7(1870, 13742, 14530, 16498, 23770, 27698) },
- { AOM_CDF7(326, 8796, 14632, 15079, 19272, 27486) },
- { AOM_CDF7(484, 7576, 7712, 14443, 19159, 22591) },
- { AOM_CDF7(1126, 15340, 15895, 17023, 20896, 30279) },
- { AOM_CDF7(655, 4854, 5249, 5913, 22099, 27138) },
- { AOM_CDF7(1299, 6458, 8885, 9290, 14851, 25497) },
- { AOM_CDF7(311, 5295, 5552, 6885, 16107, 22672) },
- { AOM_CDF7(883, 8059, 8270, 11258, 17289, 21549) },
- { AOM_CDF7(741, 7580, 9318, 10345, 16688, 29046) },
- { AOM_CDF7(110, 7406, 7915, 9195, 16041, 23329) },
- { AOM_CDF7(363, 7974, 9357, 10673, 15629, 24474) },
- { AOM_CDF7(153, 7647, 8112, 9936, 15307, 19996) },
- { AOM_CDF7(3511, 6332, 11165, 15335, 19323, 23594) },
- },
- {
- { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
- { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
- { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
- { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
- { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
- { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
- { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
- { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
- { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
- { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
- { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
- { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
- { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
- },
- {
- { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
- { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
- { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
- { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
- { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
- { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
- { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
- { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
- { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
- { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
- { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
- { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
- { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
- },
- },
- {
- {
- { AOM_CDF5(6554, 13107, 19661, 26214) },
- { AOM_CDF5(6554, 13107, 19661, 26214) },
- { AOM_CDF5(6554, 13107, 19661, 26214) },
- { AOM_CDF5(6554, 13107, 19661, 26214) },
- { AOM_CDF5(6554, 13107, 19661, 26214) },
- { AOM_CDF5(6554, 13107, 19661, 26214) },
- { AOM_CDF5(6554, 13107, 19661, 26214) },
- { AOM_CDF5(6554, 13107, 19661, 26214) },
- { AOM_CDF5(6554, 13107, 19661, 26214) },
- { AOM_CDF5(6554, 13107, 19661, 26214) },
- { AOM_CDF5(6554, 13107, 19661, 26214) },
- { AOM_CDF5(6554, 13107, 19661, 26214) },
- { AOM_CDF5(6554, 13107, 19661, 26214) },
- },
- {
- { AOM_CDF5(6554, 13107, 19661, 26214) },
- { AOM_CDF5(6554, 13107, 19661, 26214) },
- { AOM_CDF5(6554, 13107, 19661, 26214) },
- { AOM_CDF5(6554, 13107, 19661, 26214) },
- { AOM_CDF5(6554, 13107, 19661, 26214) },
- { AOM_CDF5(6554, 13107, 19661, 26214) },
- { AOM_CDF5(6554, 13107, 19661, 26214) },
- { AOM_CDF5(6554, 13107, 19661, 26214) },
- { AOM_CDF5(6554, 13107, 19661, 26214) },
- { AOM_CDF5(6554, 13107, 19661, 26214) },
- { AOM_CDF5(6554, 13107, 19661, 26214) },
- { AOM_CDF5(6554, 13107, 19661, 26214) },
- { AOM_CDF5(6554, 13107, 19661, 26214) },
- },
- {
- { AOM_CDF5(1127, 12814, 22772, 27483) },
- { AOM_CDF5(145, 6761, 11980, 26667) },
- { AOM_CDF5(362, 5887, 11678, 16725) },
- { AOM_CDF5(385, 15213, 18587, 30693) },
- { AOM_CDF5(25, 2914, 23134, 27903) },
- { AOM_CDF5(60, 4470, 11749, 23991) },
- { AOM_CDF5(37, 3332, 14511, 21448) },
- { AOM_CDF5(157, 6320, 13036, 17439) },
- { AOM_CDF5(119, 6719, 12906, 29396) },
- { AOM_CDF5(47, 5537, 12576, 21499) },
- { AOM_CDF5(269, 6076, 11258, 23115) },
- { AOM_CDF5(83, 5615, 12001, 17228) },
- { AOM_CDF5(1968, 5556, 12023, 18547) },
- },
- {
- { AOM_CDF5(6554, 13107, 19661, 26214) },
- { AOM_CDF5(6554, 13107, 19661, 26214) },
- { AOM_CDF5(6554, 13107, 19661, 26214) },
- { AOM_CDF5(6554, 13107, 19661, 26214) },
- { AOM_CDF5(6554, 13107, 19661, 26214) },
- { AOM_CDF5(6554, 13107, 19661, 26214) },
- { AOM_CDF5(6554, 13107, 19661, 26214) },
- { AOM_CDF5(6554, 13107, 19661, 26214) },
- { AOM_CDF5(6554, 13107, 19661, 26214) },
- { AOM_CDF5(6554, 13107, 19661, 26214) },
- { AOM_CDF5(6554, 13107, 19661, 26214) },
- { AOM_CDF5(6554, 13107, 19661, 26214) },
- { AOM_CDF5(6554, 13107, 19661, 26214) },
- },
- },
- };
-
-static const aom_cdf_prob
- default_inter_ext_tx_cdf[EXT_TX_SETS_INTER][EXT_TX_SIZES][CDF_SIZE(
- TX_TYPES)] = {
- {
- { 0 },
- { 0 },
- { 0 },
- { 0 },
- },
- {
- { AOM_CDF16(4458, 5560, 7695, 9709, 13330, 14789, 17537, 20266, 21504,
- 22848, 23934, 25474, 27727, 28915, 30631) },
- { AOM_CDF16(1645, 2573, 4778, 5711, 7807, 8622, 10522, 15357, 17674,
- 20408, 22517, 25010, 27116, 28856, 30749) },
- { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 18432,
- 20480, 22528, 24576, 26624, 28672, 30720) },
- { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 18432,
- 20480, 22528, 24576, 26624, 28672, 30720) },
- },
- {
- { AOM_CDF12(2731, 5461, 8192, 10923, 13653, 16384, 19115, 21845,
- 24576, 27307, 30037) },
- { AOM_CDF12(2731, 5461, 8192, 10923, 13653, 16384, 19115, 21845,
- 24576, 27307, 30037) },
- { AOM_CDF12(770, 2421, 5225, 12907, 15819, 18927, 21561, 24089, 26595,
- 28526, 30529) },
- { AOM_CDF12(2731, 5461, 8192, 10923, 13653, 16384, 19115, 21845,
- 24576, 27307, 30037) },
- },
- {
- { AOM_CDF2(16384) },
- { AOM_CDF2(4167) },
- { AOM_CDF2(1998) },
- { AOM_CDF2(748) },
- },
- };
-
-static const aom_cdf_prob default_cfl_sign_cdf[CDF_SIZE(CFL_JOINT_SIGNS)] = {
- AOM_CDF8(1418, 2123, 13340, 18405, 26972, 28343, 32294)
-};
-
-static const aom_cdf_prob
- default_cfl_alpha_cdf[CFL_ALPHA_CONTEXTS][CDF_SIZE(CFL_ALPHABET_SIZE)] = {
- { AOM_CDF16(7637, 20719, 31401, 32481, 32657, 32688, 32692, 32696, 32700,
- 32704, 32708, 32712, 32716, 32720, 32724) },
- { AOM_CDF16(14365, 23603, 28135, 31168, 32167, 32395, 32487, 32573, 32620,
- 32647, 32668, 32672, 32676, 32680, 32684) },
- { AOM_CDF16(11532, 22380, 28445, 31360, 32349, 32523, 32584, 32649, 32673,
- 32677, 32681, 32685, 32689, 32693, 32697) },
- { AOM_CDF16(26990, 31402, 32282, 32571, 32692, 32696, 32700, 32704, 32708,
- 32712, 32716, 32720, 32724, 32728, 32732) },
- { AOM_CDF16(17248, 26058, 28904, 30608, 31305, 31877, 32126, 32321, 32394,
- 32464, 32516, 32560, 32576, 32593, 32622) },
- { AOM_CDF16(14738, 21678, 25779, 27901, 29024, 30302, 30980, 31843, 32144,
- 32413, 32520, 32594, 32622, 32656, 32660) }
- };
-
-static const aom_cdf_prob
- default_switchable_interp_cdf[SWITCHABLE_FILTER_CONTEXTS][CDF_SIZE(
- SWITCHABLE_FILTERS)] = {
- { AOM_CDF3(31935, 32720) }, { AOM_CDF3(5568, 32719) },
- { AOM_CDF3(422, 2938) }, { AOM_CDF3(28244, 32608) },
- { AOM_CDF3(31206, 31953) }, { AOM_CDF3(4862, 32121) },
- { AOM_CDF3(770, 1152) }, { AOM_CDF3(20889, 25637) },
- { AOM_CDF3(31910, 32724) }, { AOM_CDF3(4120, 32712) },
- { AOM_CDF3(305, 2247) }, { AOM_CDF3(27403, 32636) },
- { AOM_CDF3(31022, 32009) }, { AOM_CDF3(2963, 32093) },
- { AOM_CDF3(601, 943) }, { AOM_CDF3(14969, 21398) }
- };
-
-static const aom_cdf_prob default_newmv_cdf[NEWMV_MODE_CONTEXTS][CDF_SIZE(2)] =
- { { AOM_CDF2(24035) }, { AOM_CDF2(16630) }, { AOM_CDF2(15339) },
- { AOM_CDF2(8386) }, { AOM_CDF2(12222) }, { AOM_CDF2(4676) } };
-
-static const aom_cdf_prob default_zeromv_cdf[GLOBALMV_MODE_CONTEXTS][CDF_SIZE(
- 2)] = { { AOM_CDF2(2175) }, { AOM_CDF2(1054) } };
-
-static const aom_cdf_prob default_refmv_cdf[REFMV_MODE_CONTEXTS][CDF_SIZE(2)] =
- { { AOM_CDF2(23974) }, { AOM_CDF2(24188) }, { AOM_CDF2(17848) },
- { AOM_CDF2(28622) }, { AOM_CDF2(24312) }, { AOM_CDF2(19923) } };
-
-static const aom_cdf_prob default_drl_cdf[DRL_MODE_CONTEXTS][CDF_SIZE(2)] = {
- { AOM_CDF2(13104) }, { AOM_CDF2(24560) }, { AOM_CDF2(18945) }
-};
-
-static const aom_cdf_prob
- default_inter_compound_mode_cdf[INTER_MODE_CONTEXTS][CDF_SIZE(
- INTER_COMPOUND_MODES)] = {
- { AOM_CDF8(7760, 13823, 15808, 17641, 19156, 20666, 26891) },
- { AOM_CDF8(10730, 19452, 21145, 22749, 24039, 25131, 28724) },
- { AOM_CDF8(10664, 20221, 21588, 22906, 24295, 25387, 28436) },
- { AOM_CDF8(13298, 16984, 20471, 24182, 25067, 25736, 26422) },
- { AOM_CDF8(18904, 23325, 25242, 27432, 27898, 28258, 30758) },
- { AOM_CDF8(10725, 17454, 20124, 22820, 24195, 25168, 26046) },
- { AOM_CDF8(17125, 24273, 25814, 27492, 28214, 28704, 30592) },
- { AOM_CDF8(13046, 23214, 24505, 25942, 27435, 28442, 29330) }
- };
-
-static const aom_cdf_prob default_interintra_cdf[BLOCK_SIZE_GROUPS][CDF_SIZE(
- 2)] = { { AOM_CDF2(16384) },
- { AOM_CDF2(26887) },
- { AOM_CDF2(27597) },
- { AOM_CDF2(30237) } };
-
-static const aom_cdf_prob
- default_interintra_mode_cdf[BLOCK_SIZE_GROUPS][CDF_SIZE(INTERINTRA_MODES)] =
- { { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(1875, 11082, 27332) },
- { AOM_CDF4(2473, 9996, 26388) },
- { AOM_CDF4(4238, 11537, 25926) } };
-
-static const aom_cdf_prob
- default_wedge_interintra_cdf[BLOCK_SIZES_ALL][CDF_SIZE(2)] = {
- { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
- { AOM_CDF2(20036) }, { AOM_CDF2(24957) }, { AOM_CDF2(26704) },
- { AOM_CDF2(27530) }, { AOM_CDF2(29564) }, { AOM_CDF2(29444) },
- { AOM_CDF2(26872) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
- { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
- { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
- { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
- { AOM_CDF2(16384) }
- };
-
-static const aom_cdf_prob
- default_compound_type_cdf[BLOCK_SIZES_ALL][CDF_SIZE(COMPOUND_TYPES - 1)] = {
- { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
- { AOM_CDF2(23431) }, { AOM_CDF2(13171) }, { AOM_CDF2(11470) },
- { AOM_CDF2(9770) }, { AOM_CDF2(9100) }, { AOM_CDF2(8233) },
- { AOM_CDF2(6172) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
- { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
- { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
- { AOM_CDF2(11820) }, { AOM_CDF2(7701) }, { AOM_CDF2(16384) },
- { AOM_CDF2(16384) }
- };
-
-static const aom_cdf_prob default_wedge_idx_cdf[BLOCK_SIZES_ALL][CDF_SIZE(16)] =
- { { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 18432,
- 20480, 22528, 24576, 26624, 28672, 30720) },
- { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 18432,
- 20480, 22528, 24576, 26624, 28672, 30720) },
- { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 18432,
- 20480, 22528, 24576, 26624, 28672, 30720) },
- { AOM_CDF16(2438, 4440, 6599, 8663, 11005, 12874, 15751, 18094, 20359,
- 22362, 24127, 25702, 27752, 29450, 31171) },
- { AOM_CDF16(806, 3266, 6005, 6738, 7218, 7367, 7771, 14588, 16323, 17367,
- 18452, 19422, 22839, 26127, 29629) },
- { AOM_CDF16(2779, 3738, 4683, 7213, 7775, 8017, 8655, 14357, 17939, 21332,
- 24520, 27470, 29456, 30529, 31656) },
- { AOM_CDF16(1684, 3625, 5675, 7108, 9302, 11274, 14429, 17144, 19163,
- 20961, 22884, 24471, 26719, 28714, 30877) },
- { AOM_CDF16(1142, 3491, 6277, 7314, 8089, 8355, 9023, 13624, 15369, 16730,
- 18114, 19313, 22521, 26012, 29550) },
- { AOM_CDF16(2742, 4195, 5727, 8035, 8980, 9336, 10146, 14124, 17270,
- 20533, 23434, 25972, 27944, 29570, 31416) },
- { AOM_CDF16(1727, 3948, 6101, 7796, 9841, 12344, 15766, 18944, 20638,
- 22038, 23963, 25311, 26988, 28766, 31012) },
- { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 18432,
- 20480, 22528, 24576, 26624, 28672, 30720) },
- { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 18432,
- 20480, 22528, 24576, 26624, 28672, 30720) },
- { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 18432,
- 20480, 22528, 24576, 26624, 28672, 30720) },
- { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 18432,
- 20480, 22528, 24576, 26624, 28672, 30720) },
- { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 18432,
- 20480, 22528, 24576, 26624, 28672, 30720) },
- { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 18432,
- 20480, 22528, 24576, 26624, 28672, 30720) },
- { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 18432,
- 20480, 22528, 24576, 26624, 28672, 30720) },
- { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 18432,
- 20480, 22528, 24576, 26624, 28672, 30720) },
- { AOM_CDF16(154, 987, 1925, 2051, 2088, 2111, 2151, 23033, 23703, 24284,
- 24985, 25684, 27259, 28883, 30911) },
- { AOM_CDF16(1135, 1322, 1493, 2635, 2696, 2737, 2770, 21016, 22935, 25057,
- 27251, 29173, 30089, 30960, 31933) },
- { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 18432,
- 20480, 22528, 24576, 26624, 28672, 30720) },
- { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 18432,
- 20480, 22528, 24576, 26624, 28672, 30720) } };
-
-static const aom_cdf_prob default_motion_mode_cdf[BLOCK_SIZES_ALL][CDF_SIZE(
- MOTION_MODES)] = { { AOM_CDF3(10923, 21845) }, { AOM_CDF3(10923, 21845) },
- { AOM_CDF3(10923, 21845) }, { AOM_CDF3(7651, 24760) },
- { AOM_CDF3(4738, 24765) }, { AOM_CDF3(5391, 25528) },
- { AOM_CDF3(19419, 26810) }, { AOM_CDF3(5123, 23606) },
- { AOM_CDF3(11606, 24308) }, { AOM_CDF3(26260, 29116) },
- { AOM_CDF3(20360, 28062) }, { AOM_CDF3(21679, 26830) },
- { AOM_CDF3(29516, 30701) }, { AOM_CDF3(28898, 30397) },
- { AOM_CDF3(30878, 31335) }, { AOM_CDF3(32507, 32558) },
- { AOM_CDF3(10923, 21845) }, { AOM_CDF3(10923, 21845) },
- { AOM_CDF3(28799, 31390) }, { AOM_CDF3(26431, 30774) },
- { AOM_CDF3(28973, 31594) }, { AOM_CDF3(29742, 31203) } };
-
-static const aom_cdf_prob default_obmc_cdf[BLOCK_SIZES_ALL][CDF_SIZE(2)] = {
- { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
- { AOM_CDF2(10437) }, { AOM_CDF2(9371) }, { AOM_CDF2(9301) },
- { AOM_CDF2(17432) }, { AOM_CDF2(14423) }, { AOM_CDF2(15142) },
- { AOM_CDF2(25817) }, { AOM_CDF2(22823) }, { AOM_CDF2(22083) },
- { AOM_CDF2(30128) }, { AOM_CDF2(31014) }, { AOM_CDF2(31560) },
- { AOM_CDF2(32638) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
- { AOM_CDF2(23664) }, { AOM_CDF2(20901) }, { AOM_CDF2(24008) },
- { AOM_CDF2(26879) }
-};
-
-static const aom_cdf_prob default_intra_inter_cdf[INTRA_INTER_CONTEXTS]
- [CDF_SIZE(2)] = {
- { AOM_CDF2(806) },
- { AOM_CDF2(16662) },
- { AOM_CDF2(20186) },
- { AOM_CDF2(26538) }
- };
-
-static const aom_cdf_prob default_comp_inter_cdf[COMP_INTER_CONTEXTS][CDF_SIZE(
- 2)] = { { AOM_CDF2(26828) },
- { AOM_CDF2(24035) },
- { AOM_CDF2(12031) },
- { AOM_CDF2(10640) },
- { AOM_CDF2(2901) } };
-
-static const aom_cdf_prob default_comp_ref_type_cdf[COMP_REF_TYPE_CONTEXTS]
- [CDF_SIZE(2)] = {
- { AOM_CDF2(1198) },
- { AOM_CDF2(2070) },
- { AOM_CDF2(9166) },
- { AOM_CDF2(7499) },
- { AOM_CDF2(22475) }
- };
-
-static const aom_cdf_prob
- default_uni_comp_ref_cdf[UNI_COMP_REF_CONTEXTS][UNIDIR_COMP_REFS -
- 1][CDF_SIZE(2)] = {
- { { AOM_CDF2(5284) }, { AOM_CDF2(3865) }, { AOM_CDF2(3128) } },
- { { AOM_CDF2(23152) }, { AOM_CDF2(14173) }, { AOM_CDF2(15270) } },
- { { AOM_CDF2(31774) }, { AOM_CDF2(25120) }, { AOM_CDF2(26710) } }
- };
-
-static const aom_cdf_prob default_single_ref_cdf[REF_CONTEXTS][SINGLE_REFS - 1]
- [CDF_SIZE(2)] = {
- { { AOM_CDF2(4897) },
- { AOM_CDF2(1555) },
- { AOM_CDF2(4236) },
- { AOM_CDF2(8650) },
- { AOM_CDF2(904) },
- { AOM_CDF2(1444) } },
- { { AOM_CDF2(16973) },
- { AOM_CDF2(16751) },
- { AOM_CDF2(19647) },
- { AOM_CDF2(24773) },
- { AOM_CDF2(11014) },
- { AOM_CDF2(15087) } },
- { { AOM_CDF2(29744) },
- { AOM_CDF2(30279) },
- { AOM_CDF2(31194) },
- { AOM_CDF2(31895) },
- { AOM_CDF2(26875) },
- { AOM_CDF2(30304) } }
- };
-
-static const aom_cdf_prob
- default_comp_ref_cdf[REF_CONTEXTS][FWD_REFS - 1][CDF_SIZE(2)] = {
- { { AOM_CDF2(4946) }, { AOM_CDF2(9468) }, { AOM_CDF2(1503) } },
- { { AOM_CDF2(19891) }, { AOM_CDF2(22441) }, { AOM_CDF2(15160) } },
- { { AOM_CDF2(30731) }, { AOM_CDF2(31059) }, { AOM_CDF2(27544) } }
- };
-
-static const aom_cdf_prob
- default_comp_bwdref_cdf[REF_CONTEXTS][BWD_REFS - 1][CDF_SIZE(2)] = {
- { { AOM_CDF2(2235) }, { AOM_CDF2(1423) } },
- { { AOM_CDF2(17182) }, { AOM_CDF2(15175) } },
- { { AOM_CDF2(30606) }, { AOM_CDF2(30489) } }
- };
-
-static const aom_cdf_prob
- default_palette_y_size_cdf[PALATTE_BSIZE_CTXS][CDF_SIZE(PALETTE_SIZES)] = {
- { AOM_CDF7(7952, 13000, 18149, 21478, 25527, 29241) },
- { AOM_CDF7(7139, 11421, 16195, 19544, 23666, 28073) },
- { AOM_CDF7(7788, 12741, 17325, 20500, 24315, 28530) },
- { AOM_CDF7(8271, 14064, 18246, 21564, 25071, 28533) },
- { AOM_CDF7(12725, 19180, 21863, 24839, 27535, 30120) },
- { AOM_CDF7(9711, 14888, 16923, 21052, 25661, 27875) },
- { AOM_CDF7(14940, 20797, 21678, 24186, 27033, 28999) }
- };
-
-static const aom_cdf_prob
- default_palette_uv_size_cdf[PALATTE_BSIZE_CTXS][CDF_SIZE(PALETTE_SIZES)] = {
- { AOM_CDF7(8713, 19979, 27128, 29609, 31331, 32272) },
- { AOM_CDF7(5839, 15573, 23581, 26947, 29848, 31700) },
- { AOM_CDF7(4426, 11260, 17999, 21483, 25863, 29430) },
- { AOM_CDF7(3228, 9464, 14993, 18089, 22523, 27420) },
- { AOM_CDF7(3768, 8886, 13091, 17852, 22495, 27207) },
- { AOM_CDF7(2464, 8451, 12861, 21632, 25525, 28555) },
- { AOM_CDF7(1269, 5435, 10433, 18963, 21700, 25865) }
- };
-
-static const aom_cdf_prob default_palette_y_mode_cdf
- [PALATTE_BSIZE_CTXS][PALETTE_Y_MODE_CONTEXTS][CDF_SIZE(2)] = {
- { { AOM_CDF2(31676) }, { AOM_CDF2(3419) }, { AOM_CDF2(1261) } },
- { { AOM_CDF2(31912) }, { AOM_CDF2(2859) }, { AOM_CDF2(980) } },
- { { AOM_CDF2(31823) }, { AOM_CDF2(3400) }, { AOM_CDF2(781) } },
- { { AOM_CDF2(32030) }, { AOM_CDF2(3561) }, { AOM_CDF2(904) } },
- { { AOM_CDF2(32309) }, { AOM_CDF2(7337) }, { AOM_CDF2(1462) } },
- { { AOM_CDF2(32265) }, { AOM_CDF2(4015) }, { AOM_CDF2(1521) } },
- { { AOM_CDF2(32450) }, { AOM_CDF2(7946) }, { AOM_CDF2(129) } }
- };
-
-static const aom_cdf_prob
- default_palette_uv_mode_cdf[PALETTE_UV_MODE_CONTEXTS][CDF_SIZE(2)] = {
- { AOM_CDF2(32461) }, { AOM_CDF2(21488) }
- };
-
-static const aom_cdf_prob default_palette_y_color_index_cdf
- [PALETTE_SIZES][PALETTE_COLOR_INDEX_CONTEXTS][CDF_SIZE(PALETTE_COLORS)] = {
- {
- { AOM_CDF2(28710) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(10553) },
- { AOM_CDF2(27036) },
- { AOM_CDF2(31603) },
- },
- {
- { AOM_CDF3(27877, 30490) },
- { AOM_CDF3(11532, 25697) },
- { AOM_CDF3(6544, 30234) },
- { AOM_CDF3(23018, 28072) },
- { AOM_CDF3(31915, 32385) },
- },
- {
- { AOM_CDF4(25572, 28046, 30045) },
- { AOM_CDF4(9478, 21590, 27256) },
- { AOM_CDF4(7248, 26837, 29824) },
- { AOM_CDF4(19167, 24486, 28349) },
- { AOM_CDF4(31400, 31825, 32250) },
- },
- {
- { AOM_CDF5(24779, 26955, 28576, 30282) },
- { AOM_CDF5(8669, 20364, 24073, 28093) },
- { AOM_CDF5(4255, 27565, 29377, 31067) },
- { AOM_CDF5(19864, 23674, 26716, 29530) },
- { AOM_CDF5(31646, 31893, 32147, 32426) },
- },
- {
- { AOM_CDF6(23132, 25407, 26970, 28435, 30073) },
- { AOM_CDF6(7443, 17242, 20717, 24762, 27982) },
- { AOM_CDF6(6300, 24862, 26944, 28784, 30671) },
- { AOM_CDF6(18916, 22895, 25267, 27435, 29652) },
- { AOM_CDF6(31270, 31550, 31808, 32059, 32353) },
- },
- {
- { AOM_CDF7(23105, 25199, 26464, 27684, 28931, 30318) },
- { AOM_CDF7(6950, 15447, 18952, 22681, 25567, 28563) },
- { AOM_CDF7(7560, 23474, 25490, 27203, 28921, 30708) },
- { AOM_CDF7(18544, 22373, 24457, 26195, 28119, 30045) },
- { AOM_CDF7(31198, 31451, 31670, 31882, 32123, 32391) },
- },
- {
- { AOM_CDF8(21689, 23883, 25163, 26352, 27506, 28827, 30195) },
- { AOM_CDF8(6892, 15385, 17840, 21606, 24287, 26753, 29204) },
- { AOM_CDF8(5651, 23182, 25042, 26518, 27982, 29392, 30900) },
- { AOM_CDF8(19349, 22578, 24418, 25994, 27524, 29031, 30448) },
- { AOM_CDF8(31028, 31270, 31504, 31705, 31927, 32153, 32392) },
- },
- };
-
-static const aom_cdf_prob default_palette_uv_color_index_cdf
- [PALETTE_SIZES][PALETTE_COLOR_INDEX_CONTEXTS][CDF_SIZE(PALETTE_COLORS)] = {
- {
- { AOM_CDF2(29089) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(8713) },
- { AOM_CDF2(29257) },
- { AOM_CDF2(31610) },
- },
- {
- { AOM_CDF3(25257, 29145) },
- { AOM_CDF3(12287, 27293) },
- { AOM_CDF3(7033, 27960) },
- { AOM_CDF3(20145, 25405) },
- { AOM_CDF3(30608, 31639) },
- },
- {
- { AOM_CDF4(24210, 27175, 29903) },
- { AOM_CDF4(9888, 22386, 27214) },
- { AOM_CDF4(5901, 26053, 29293) },
- { AOM_CDF4(18318, 22152, 28333) },
- { AOM_CDF4(30459, 31136, 31926) },
- },
- {
- { AOM_CDF5(22980, 25479, 27781, 29986) },
- { AOM_CDF5(8413, 21408, 24859, 28874) },
- { AOM_CDF5(2257, 29449, 30594, 31598) },
- { AOM_CDF5(19189, 21202, 25915, 28620) },
- { AOM_CDF5(31844, 32044, 32281, 32518) },
- },
- {
- { AOM_CDF6(22217, 24567, 26637, 28683, 30548) },
- { AOM_CDF6(7307, 16406, 19636, 24632, 28424) },
- { AOM_CDF6(4441, 25064, 26879, 28942, 30919) },
- { AOM_CDF6(17210, 20528, 23319, 26750, 29582) },
- { AOM_CDF6(30674, 30953, 31396, 31735, 32207) },
- },
- {
- { AOM_CDF7(21239, 23168, 25044, 26962, 28705, 30506) },
- { AOM_CDF7(6545, 15012, 18004, 21817, 25503, 28701) },
- { AOM_CDF7(3448, 26295, 27437, 28704, 30126, 31442) },
- { AOM_CDF7(15889, 18323, 21704, 24698, 26976, 29690) },
- { AOM_CDF7(30988, 31204, 31479, 31734, 31983, 32325) },
- },
- {
- { AOM_CDF8(21442, 23288, 24758, 26246, 27649, 28980, 30563) },
- { AOM_CDF8(5863, 14933, 17552, 20668, 23683, 26411, 29273) },
- { AOM_CDF8(3415, 25810, 26877, 27990, 29223, 30394, 31618) },
- { AOM_CDF8(17965, 20084, 22232, 23974, 26274, 28402, 30390) },
- { AOM_CDF8(31190, 31329, 31516, 31679, 31825, 32026, 32322) },
- },
- };
-
-static const aom_cdf_prob
- default_txfm_partition_cdf[TXFM_PARTITION_CONTEXTS][CDF_SIZE(2)] = {
- { AOM_CDF2(28581) }, { AOM_CDF2(23846) }, { AOM_CDF2(20847) },
- { AOM_CDF2(24315) }, { AOM_CDF2(18196) }, { AOM_CDF2(12133) },
- { AOM_CDF2(18791) }, { AOM_CDF2(10887) }, { AOM_CDF2(11005) },
- { AOM_CDF2(27179) }, { AOM_CDF2(20004) }, { AOM_CDF2(11281) },
- { AOM_CDF2(26549) }, { AOM_CDF2(19308) }, { AOM_CDF2(14224) },
- { AOM_CDF2(28015) }, { AOM_CDF2(21546) }, { AOM_CDF2(14400) },
- { AOM_CDF2(28165) }, { AOM_CDF2(22401) }, { AOM_CDF2(16088) }
- };
-
-static const aom_cdf_prob default_skip_cdfs[SKIP_CONTEXTS][CDF_SIZE(2)] = {
- { AOM_CDF2(31671) }, { AOM_CDF2(16515) }, { AOM_CDF2(4576) }
-};
-
-static const aom_cdf_prob default_skip_mode_cdfs[SKIP_MODE_CONTEXTS][CDF_SIZE(
- 2)] = { { AOM_CDF2(32621) }, { AOM_CDF2(20708) }, { AOM_CDF2(8127) } };
-
-static const aom_cdf_prob
- default_compound_idx_cdfs[COMP_INDEX_CONTEXTS][CDF_SIZE(2)] = {
- { AOM_CDF2(18244) }, { AOM_CDF2(12865) }, { AOM_CDF2(7053) },
- { AOM_CDF2(13259) }, { AOM_CDF2(9334) }, { AOM_CDF2(4644) }
- };
-
-static const aom_cdf_prob
- default_comp_group_idx_cdfs[COMP_GROUP_IDX_CONTEXTS][CDF_SIZE(2)] = {
- { AOM_CDF2(26607) }, { AOM_CDF2(22891) }, { AOM_CDF2(18840) },
- { AOM_CDF2(24594) }, { AOM_CDF2(19934) }, { AOM_CDF2(22674) }
- };
-
-static const aom_cdf_prob default_intrabc_cdf[CDF_SIZE(2)] = { AOM_CDF2(
- 30531) };
-
-static const aom_cdf_prob default_filter_intra_mode_cdf[CDF_SIZE(
- FILTER_INTRA_MODES)] = { AOM_CDF5(8949, 12776, 17211, 29558) };
-
-static const aom_cdf_prob default_filter_intra_cdfs[BLOCK_SIZES_ALL][CDF_SIZE(
- 2)] = { { AOM_CDF2(4621) }, { AOM_CDF2(6743) }, { AOM_CDF2(5893) },
- { AOM_CDF2(7866) }, { AOM_CDF2(12551) }, { AOM_CDF2(9394) },
- { AOM_CDF2(12408) }, { AOM_CDF2(14301) }, { AOM_CDF2(12756) },
- { AOM_CDF2(22343) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
- { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
- { AOM_CDF2(16384) }, { AOM_CDF2(12770) }, { AOM_CDF2(10368) },
- { AOM_CDF2(20229) }, { AOM_CDF2(18101) }, { AOM_CDF2(16384) },
- { AOM_CDF2(16384) } };
-
-static const aom_cdf_prob default_switchable_restore_cdf[CDF_SIZE(
- RESTORE_SWITCHABLE_TYPES)] = { AOM_CDF3(9413, 22581) };
-
-static const aom_cdf_prob default_wiener_restore_cdf[CDF_SIZE(2)] = { AOM_CDF2(
- 11570) };
-
-static const aom_cdf_prob default_sgrproj_restore_cdf[CDF_SIZE(2)] = { AOM_CDF2(
- 16855) };
-
-static const aom_cdf_prob default_delta_q_cdf[CDF_SIZE(DELTA_Q_PROBS + 1)] = {
- AOM_CDF4(28160, 32120, 32677)
-};
-
-static const aom_cdf_prob default_delta_lf_multi_cdf[FRAME_LF_COUNT][CDF_SIZE(
- DELTA_LF_PROBS + 1)] = { { AOM_CDF4(28160, 32120, 32677) },
- { AOM_CDF4(28160, 32120, 32677) },
- { AOM_CDF4(28160, 32120, 32677) },
- { AOM_CDF4(28160, 32120, 32677) } };
-static const aom_cdf_prob default_delta_lf_cdf[CDF_SIZE(DELTA_LF_PROBS + 1)] = {
- AOM_CDF4(28160, 32120, 32677)
-};
-
-// FIXME(someone) need real defaults here
-static const aom_cdf_prob default_seg_tree_cdf[CDF_SIZE(MAX_SEGMENTS)] = {
- AOM_CDF8(4096, 8192, 12288, 16384, 20480, 24576, 28672)
-};
-
-static const aom_cdf_prob
- default_segment_pred_cdf[SEG_TEMPORAL_PRED_CTXS][CDF_SIZE(2)] = {
- { AOM_CDF2(128 * 128) }, { AOM_CDF2(128 * 128) }, { AOM_CDF2(128 * 128) }
- };
-
-static const aom_cdf_prob
- default_spatial_pred_seg_tree_cdf[SPATIAL_PREDICTION_PROBS][CDF_SIZE(
- MAX_SEGMENTS)] = {
- {
- AOM_CDF8(5622, 7893, 16093, 18233, 27809, 28373, 32533),
- },
- {
- AOM_CDF8(14274, 18230, 22557, 24935, 29980, 30851, 32344),
- },
- {
- AOM_CDF8(27527, 28487, 28723, 28890, 32397, 32647, 32679),
- },
- };
-
-static const aom_cdf_prob default_tx_size_cdf[MAX_TX_CATS][TX_SIZE_CONTEXTS]
- [CDF_SIZE(MAX_TX_DEPTH + 1)] = {
- { { AOM_CDF2(19968) },
- { AOM_CDF2(19968) },
- { AOM_CDF2(24320) } },
- { { AOM_CDF3(12272, 30172) },
- { AOM_CDF3(12272, 30172) },
- { AOM_CDF3(18677, 30848) } },
- { { AOM_CDF3(12986, 15180) },
- { AOM_CDF3(12986, 15180) },
- { AOM_CDF3(24302, 25602) } },
- { { AOM_CDF3(5782, 11475) },
- { AOM_CDF3(5782, 11475) },
- { AOM_CDF3(16803, 22759) } },
- };
-
-#define MAX_COLOR_CONTEXT_HASH 8
-// Negative values are invalid
-static const int palette_color_index_context_lookup[MAX_COLOR_CONTEXT_HASH +
- 1] = { -1, -1, 0, -1, -1,
- 4, 3, 2, 1 };
-
-#define NUM_PALETTE_NEIGHBORS 3 // left, top-left and top.
-int av1_get_palette_color_index_context(const uint8_t *color_map, int stride,
- int r, int c, int palette_size,
- uint8_t *color_order, int *color_idx) {
- assert(palette_size <= PALETTE_MAX_SIZE);
- assert(r > 0 || c > 0);
-
- // Get color indices of neighbors.
- int color_neighbors[NUM_PALETTE_NEIGHBORS];
- color_neighbors[0] = (c - 1 >= 0) ? color_map[r * stride + c - 1] : -1;
- color_neighbors[1] =
- (c - 1 >= 0 && r - 1 >= 0) ? color_map[(r - 1) * stride + c - 1] : -1;
- color_neighbors[2] = (r - 1 >= 0) ? color_map[(r - 1) * stride + c] : -1;
-
- // The +10 below should not be needed. But we get a warning "array subscript
- // is above array bounds [-Werror=array-bounds]" without it, possibly due to
- // this (or similar) bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59124
- int scores[PALETTE_MAX_SIZE + 10] = { 0 };
- int i;
- static const int weights[NUM_PALETTE_NEIGHBORS] = { 2, 1, 2 };
- for (i = 0; i < NUM_PALETTE_NEIGHBORS; ++i) {
- if (color_neighbors[i] >= 0) {
- scores[color_neighbors[i]] += weights[i];
- }
- }
-
- int inverse_color_order[PALETTE_MAX_SIZE];
- for (i = 0; i < PALETTE_MAX_SIZE; ++i) {
- color_order[i] = i;
- inverse_color_order[i] = i;
- }
-
- // Get the top NUM_PALETTE_NEIGHBORS scores (sorted from large to small).
- for (i = 0; i < NUM_PALETTE_NEIGHBORS; ++i) {
- int max = scores[i];
- int max_idx = i;
- for (int j = i + 1; j < palette_size; ++j) {
- if (scores[j] > max) {
- max = scores[j];
- max_idx = j;
- }
- }
- if (max_idx != i) {
- // Move the score at index 'max_idx' to index 'i', and shift the scores
- // from 'i' to 'max_idx - 1' by 1.
- const int max_score = scores[max_idx];
- const uint8_t max_color_order = color_order[max_idx];
- for (int k = max_idx; k > i; --k) {
- scores[k] = scores[k - 1];
- color_order[k] = color_order[k - 1];
- inverse_color_order[color_order[k]] = k;
- }
- scores[i] = max_score;
- color_order[i] = max_color_order;
- inverse_color_order[color_order[i]] = i;
- }
- }
-
- if (color_idx != NULL)
- *color_idx = inverse_color_order[color_map[r * stride + c]];
-
- // Get hash value of context.
- int color_index_ctx_hash = 0;
- static const int hash_multipliers[NUM_PALETTE_NEIGHBORS] = { 1, 2, 2 };
- for (i = 0; i < NUM_PALETTE_NEIGHBORS; ++i) {
- color_index_ctx_hash += scores[i] * hash_multipliers[i];
- }
- assert(color_index_ctx_hash > 0);
- assert(color_index_ctx_hash <= MAX_COLOR_CONTEXT_HASH);
-
- // Lookup context from hash.
- const int color_index_ctx =
- palette_color_index_context_lookup[color_index_ctx_hash];
- assert(color_index_ctx >= 0);
- assert(color_index_ctx < PALETTE_COLOR_INDEX_CONTEXTS);
- return color_index_ctx;
-}
-#undef NUM_PALETTE_NEIGHBORS
-#undef MAX_COLOR_CONTEXT_HASH
-
-static void init_mode_probs(FRAME_CONTEXT *fc) {
- av1_copy(fc->palette_y_size_cdf, default_palette_y_size_cdf);
- av1_copy(fc->palette_uv_size_cdf, default_palette_uv_size_cdf);
- av1_copy(fc->palette_y_color_index_cdf, default_palette_y_color_index_cdf);
- av1_copy(fc->palette_uv_color_index_cdf, default_palette_uv_color_index_cdf);
- av1_copy(fc->kf_y_cdf, default_kf_y_mode_cdf);
- av1_copy(fc->angle_delta_cdf, default_angle_delta_cdf);
- av1_copy(fc->comp_inter_cdf, default_comp_inter_cdf);
- av1_copy(fc->comp_ref_type_cdf, default_comp_ref_type_cdf);
- av1_copy(fc->uni_comp_ref_cdf, default_uni_comp_ref_cdf);
- av1_copy(fc->palette_y_mode_cdf, default_palette_y_mode_cdf);
- av1_copy(fc->palette_uv_mode_cdf, default_palette_uv_mode_cdf);
- av1_copy(fc->comp_ref_cdf, default_comp_ref_cdf);
- av1_copy(fc->comp_bwdref_cdf, default_comp_bwdref_cdf);
- av1_copy(fc->single_ref_cdf, default_single_ref_cdf);
- av1_copy(fc->txfm_partition_cdf, default_txfm_partition_cdf);
- av1_copy(fc->compound_index_cdf, default_compound_idx_cdfs);
- av1_copy(fc->comp_group_idx_cdf, default_comp_group_idx_cdfs);
- av1_copy(fc->newmv_cdf, default_newmv_cdf);
- av1_copy(fc->zeromv_cdf, default_zeromv_cdf);
- av1_copy(fc->refmv_cdf, default_refmv_cdf);
- av1_copy(fc->drl_cdf, default_drl_cdf);
- av1_copy(fc->motion_mode_cdf, default_motion_mode_cdf);
- av1_copy(fc->obmc_cdf, default_obmc_cdf);
- av1_copy(fc->inter_compound_mode_cdf, default_inter_compound_mode_cdf);
- av1_copy(fc->compound_type_cdf, default_compound_type_cdf);
- av1_copy(fc->wedge_idx_cdf, default_wedge_idx_cdf);
- av1_copy(fc->interintra_cdf, default_interintra_cdf);
- av1_copy(fc->wedge_interintra_cdf, default_wedge_interintra_cdf);
- av1_copy(fc->interintra_mode_cdf, default_interintra_mode_cdf);
- av1_copy(fc->seg.pred_cdf, default_segment_pred_cdf);
- av1_copy(fc->seg.tree_cdf, default_seg_tree_cdf);
- av1_copy(fc->filter_intra_cdfs, default_filter_intra_cdfs);
- av1_copy(fc->filter_intra_mode_cdf, default_filter_intra_mode_cdf);
- av1_copy(fc->switchable_restore_cdf, default_switchable_restore_cdf);
- av1_copy(fc->wiener_restore_cdf, default_wiener_restore_cdf);
- av1_copy(fc->sgrproj_restore_cdf, default_sgrproj_restore_cdf);
- av1_copy(fc->y_mode_cdf, default_if_y_mode_cdf);
- av1_copy(fc->uv_mode_cdf, default_uv_mode_cdf);
- av1_copy(fc->switchable_interp_cdf, default_switchable_interp_cdf);
- av1_copy(fc->partition_cdf, default_partition_cdf);
- av1_copy(fc->intra_ext_tx_cdf, default_intra_ext_tx_cdf);
- av1_copy(fc->inter_ext_tx_cdf, default_inter_ext_tx_cdf);
- av1_copy(fc->skip_mode_cdfs, default_skip_mode_cdfs);
- av1_copy(fc->skip_cdfs, default_skip_cdfs);
- av1_copy(fc->intra_inter_cdf, default_intra_inter_cdf);
- for (int i = 0; i < SPATIAL_PREDICTION_PROBS; i++)
- av1_copy(fc->seg.spatial_pred_seg_cdf[i],
- default_spatial_pred_seg_tree_cdf[i]);
- av1_copy(fc->tx_size_cdf, default_tx_size_cdf);
- av1_copy(fc->delta_q_cdf, default_delta_q_cdf);
- av1_copy(fc->delta_lf_cdf, default_delta_lf_cdf);
- av1_copy(fc->delta_lf_multi_cdf, default_delta_lf_multi_cdf);
- av1_copy(fc->cfl_sign_cdf, default_cfl_sign_cdf);
- av1_copy(fc->cfl_alpha_cdf, default_cfl_alpha_cdf);
- av1_copy(fc->intrabc_cdf, default_intrabc_cdf);
-}
-
-void av1_set_default_ref_deltas(int8_t *ref_deltas) {
- assert(ref_deltas != NULL);
-
- ref_deltas[INTRA_FRAME] = 1;
- ref_deltas[LAST_FRAME] = 0;
- ref_deltas[LAST2_FRAME] = ref_deltas[LAST_FRAME];
- ref_deltas[LAST3_FRAME] = ref_deltas[LAST_FRAME];
- ref_deltas[BWDREF_FRAME] = ref_deltas[LAST_FRAME];
- ref_deltas[GOLDEN_FRAME] = -1;
- ref_deltas[ALTREF2_FRAME] = -1;
- ref_deltas[ALTREF_FRAME] = -1;
-}
-
-void av1_set_default_mode_deltas(int8_t *mode_deltas) {
- assert(mode_deltas != NULL);
-
- mode_deltas[0] = 0;
- mode_deltas[1] = 0;
-}
-
-static void set_default_lf_deltas(struct loopfilter *lf) {
- lf->mode_ref_delta_enabled = 1;
- lf->mode_ref_delta_update = 1;
-
- av1_set_default_ref_deltas(lf->ref_deltas);
- av1_set_default_mode_deltas(lf->mode_deltas);
-}
-
-void av1_setup_frame_contexts(AV1_COMMON *cm) {
- // Store the frame context into a special slot (not associated with any
- // reference buffer), so that we can set up cm->pre_fc correctly later
- // This function must ONLY be called when cm->fc has been initialized with
- // default probs, either by av1_setup_past_independence or after manually
- // initializing them
- cm->frame_contexts[FRAME_CONTEXT_DEFAULTS] = *cm->fc;
- if (cm->large_scale_tile) {
- for (int i = 0; i < FRAME_CONTEXTS; ++i) cm->frame_contexts[i] = *cm->fc;
- }
-}
-
-void av1_setup_past_independence(AV1_COMMON *cm) {
- // Reset the segment feature data to the default stats:
- // Features disabled, 0, with delta coding (Default state).
- av1_clearall_segfeatures(&cm->seg);
-
- cm->current_frame_seg_map = cm->cur_frame->seg_map;
-
- if (cm->current_frame_seg_map)
- memset(cm->current_frame_seg_map, 0, (cm->mi_rows * cm->mi_cols));
-
- // reset mode ref deltas
- av1_set_default_ref_deltas(cm->cur_frame->ref_deltas);
- av1_set_default_mode_deltas(cm->cur_frame->mode_deltas);
- set_default_lf_deltas(&cm->lf);
-
- av1_default_coef_probs(cm);
- init_mode_probs(cm->fc);
- av1_init_mv_probs(cm);
- av1_init_lv_map(cm);
- cm->fc->initialized = 1;
- av1_setup_frame_contexts(cm);
-
- // prev_mip will only be allocated in encoder.
- if (frame_is_intra_only(cm) && cm->prev_mip)
- memset(cm->prev_mip, 0,
- cm->mi_stride * cm->mi_rows * sizeof(*cm->prev_mip));
-}
diff --git a/third_party/aom/av1/common/entropymode.h b/third_party/aom/av1/common/entropymode.h
deleted file mode 100644
index 7047f34d2..000000000
--- a/third_party/aom/av1/common/entropymode.h
+++ /dev/null
@@ -1,212 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_COMMON_ENTROPYMODE_H_
-#define AOM_AV1_COMMON_ENTROPYMODE_H_
-
-#include "av1/common/entropy.h"
-#include "av1/common/entropymv.h"
-#include "av1/common/filter.h"
-#include "av1/common/seg_common.h"
-#include "aom_dsp/aom_filter.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define BLOCK_SIZE_GROUPS 4
-
-#define TX_SIZE_CONTEXTS 3
-
-#define INTER_OFFSET(mode) ((mode)-NEARESTMV)
-#define INTER_COMPOUND_OFFSET(mode) (uint8_t)((mode)-NEAREST_NEARESTMV)
-
-// Number of possible contexts for a color index.
-// As can be seen from av1_get_palette_color_index_context(), the possible
-// contexts are (2,0,0), (2,2,1), (3,2,0), (4,1,0), (5,0,0). These are mapped to
-// a value from 0 to 4 using 'palette_color_index_context_lookup' table.
-#define PALETTE_COLOR_INDEX_CONTEXTS 5
-
-// Palette Y mode context for a block is determined by number of neighboring
-// blocks (top and/or left) using a palette for Y plane. So, possible Y mode'
-// context values are:
-// 0 if neither left nor top block uses palette for Y plane,
-// 1 if exactly one of left or top block uses palette for Y plane, and
-// 2 if both left and top blocks use palette for Y plane.
-#define PALETTE_Y_MODE_CONTEXTS 3
-
-// Palette UV mode context for a block is determined by whether this block uses
-// palette for the Y plane. So, possible values are:
-// 0 if this block doesn't use palette for Y plane.
-// 1 if this block uses palette for Y plane (i.e. Y palette size > 0).
-#define PALETTE_UV_MODE_CONTEXTS 2
-
-// Map the number of pixels in a block size to a context
-// 64(BLOCK_8X8, BLOCK_4x16, BLOCK_16X4) -> 0
-// 128(BLOCK_8X16, BLOCK_16x8) -> 1
-// ...
-// 4096(BLOCK_64X64) -> 6
-#define PALATTE_BSIZE_CTXS 7
-
-#define KF_MODE_CONTEXTS 5
-
-struct AV1Common;
-
-typedef struct {
- const int16_t *scan;
- const int16_t *iscan;
- const int16_t *neighbors;
-} SCAN_ORDER;
-
-typedef struct frame_contexts {
- aom_cdf_prob txb_skip_cdf[TX_SIZES][TXB_SKIP_CONTEXTS][CDF_SIZE(2)];
- aom_cdf_prob eob_extra_cdf[TX_SIZES][PLANE_TYPES][EOB_COEF_CONTEXTS]
- [CDF_SIZE(2)];
- aom_cdf_prob dc_sign_cdf[PLANE_TYPES][DC_SIGN_CONTEXTS][CDF_SIZE(2)];
- aom_cdf_prob eob_flag_cdf16[PLANE_TYPES][2][CDF_SIZE(5)];
- aom_cdf_prob eob_flag_cdf32[PLANE_TYPES][2][CDF_SIZE(6)];
- aom_cdf_prob eob_flag_cdf64[PLANE_TYPES][2][CDF_SIZE(7)];
- aom_cdf_prob eob_flag_cdf128[PLANE_TYPES][2][CDF_SIZE(8)];
- aom_cdf_prob eob_flag_cdf256[PLANE_TYPES][2][CDF_SIZE(9)];
- aom_cdf_prob eob_flag_cdf512[PLANE_TYPES][2][CDF_SIZE(10)];
- aom_cdf_prob eob_flag_cdf1024[PLANE_TYPES][2][CDF_SIZE(11)];
- aom_cdf_prob coeff_base_eob_cdf[TX_SIZES][PLANE_TYPES][SIG_COEF_CONTEXTS_EOB]
- [CDF_SIZE(3)];
- aom_cdf_prob coeff_base_cdf[TX_SIZES][PLANE_TYPES][SIG_COEF_CONTEXTS]
- [CDF_SIZE(4)];
- aom_cdf_prob coeff_br_cdf[TX_SIZES][PLANE_TYPES][LEVEL_CONTEXTS]
- [CDF_SIZE(BR_CDF_SIZE)];
-
- aom_cdf_prob newmv_cdf[NEWMV_MODE_CONTEXTS][CDF_SIZE(2)];
- aom_cdf_prob zeromv_cdf[GLOBALMV_MODE_CONTEXTS][CDF_SIZE(2)];
- aom_cdf_prob refmv_cdf[REFMV_MODE_CONTEXTS][CDF_SIZE(2)];
- aom_cdf_prob drl_cdf[DRL_MODE_CONTEXTS][CDF_SIZE(2)];
-
- aom_cdf_prob inter_compound_mode_cdf[INTER_MODE_CONTEXTS]
- [CDF_SIZE(INTER_COMPOUND_MODES)];
- aom_cdf_prob compound_type_cdf[BLOCK_SIZES_ALL][CDF_SIZE(COMPOUND_TYPES - 1)];
- aom_cdf_prob wedge_idx_cdf[BLOCK_SIZES_ALL][CDF_SIZE(16)];
- aom_cdf_prob interintra_cdf[BLOCK_SIZE_GROUPS][CDF_SIZE(2)];
- aom_cdf_prob wedge_interintra_cdf[BLOCK_SIZES_ALL][CDF_SIZE(2)];
- aom_cdf_prob interintra_mode_cdf[BLOCK_SIZE_GROUPS]
- [CDF_SIZE(INTERINTRA_MODES)];
- aom_cdf_prob motion_mode_cdf[BLOCK_SIZES_ALL][CDF_SIZE(MOTION_MODES)];
- aom_cdf_prob obmc_cdf[BLOCK_SIZES_ALL][CDF_SIZE(2)];
- aom_cdf_prob palette_y_size_cdf[PALATTE_BSIZE_CTXS][CDF_SIZE(PALETTE_SIZES)];
- aom_cdf_prob palette_uv_size_cdf[PALATTE_BSIZE_CTXS][CDF_SIZE(PALETTE_SIZES)];
- aom_cdf_prob palette_y_color_index_cdf[PALETTE_SIZES]
- [PALETTE_COLOR_INDEX_CONTEXTS]
- [CDF_SIZE(PALETTE_COLORS)];
- aom_cdf_prob palette_uv_color_index_cdf[PALETTE_SIZES]
- [PALETTE_COLOR_INDEX_CONTEXTS]
- [CDF_SIZE(PALETTE_COLORS)];
- aom_cdf_prob palette_y_mode_cdf[PALATTE_BSIZE_CTXS][PALETTE_Y_MODE_CONTEXTS]
- [CDF_SIZE(2)];
- aom_cdf_prob palette_uv_mode_cdf[PALETTE_UV_MODE_CONTEXTS][CDF_SIZE(2)];
- aom_cdf_prob comp_inter_cdf[COMP_INTER_CONTEXTS][CDF_SIZE(2)];
- aom_cdf_prob single_ref_cdf[REF_CONTEXTS][SINGLE_REFS - 1][CDF_SIZE(2)];
- aom_cdf_prob comp_ref_type_cdf[COMP_REF_TYPE_CONTEXTS][CDF_SIZE(2)];
- aom_cdf_prob uni_comp_ref_cdf[UNI_COMP_REF_CONTEXTS][UNIDIR_COMP_REFS - 1]
- [CDF_SIZE(2)];
- aom_cdf_prob comp_ref_cdf[REF_CONTEXTS][FWD_REFS - 1][CDF_SIZE(2)];
- aom_cdf_prob comp_bwdref_cdf[REF_CONTEXTS][BWD_REFS - 1][CDF_SIZE(2)];
- aom_cdf_prob txfm_partition_cdf[TXFM_PARTITION_CONTEXTS][CDF_SIZE(2)];
- aom_cdf_prob compound_index_cdf[COMP_INDEX_CONTEXTS][CDF_SIZE(2)];
- aom_cdf_prob comp_group_idx_cdf[COMP_GROUP_IDX_CONTEXTS][CDF_SIZE(2)];
- aom_cdf_prob skip_mode_cdfs[SKIP_CONTEXTS][CDF_SIZE(2)];
- aom_cdf_prob skip_cdfs[SKIP_CONTEXTS][CDF_SIZE(2)];
- aom_cdf_prob intra_inter_cdf[INTRA_INTER_CONTEXTS][CDF_SIZE(2)];
- nmv_context nmvc;
- nmv_context ndvc;
- aom_cdf_prob intrabc_cdf[CDF_SIZE(2)];
- struct segmentation_probs seg;
- aom_cdf_prob filter_intra_cdfs[BLOCK_SIZES_ALL][CDF_SIZE(2)];
- aom_cdf_prob filter_intra_mode_cdf[CDF_SIZE(FILTER_INTRA_MODES)];
- aom_cdf_prob switchable_restore_cdf[CDF_SIZE(RESTORE_SWITCHABLE_TYPES)];
- aom_cdf_prob wiener_restore_cdf[CDF_SIZE(2)];
- aom_cdf_prob sgrproj_restore_cdf[CDF_SIZE(2)];
- aom_cdf_prob y_mode_cdf[BLOCK_SIZE_GROUPS][CDF_SIZE(INTRA_MODES)];
- aom_cdf_prob uv_mode_cdf[CFL_ALLOWED_TYPES][INTRA_MODES]
- [CDF_SIZE(UV_INTRA_MODES)];
- aom_cdf_prob partition_cdf[PARTITION_CONTEXTS][CDF_SIZE(EXT_PARTITION_TYPES)];
- aom_cdf_prob switchable_interp_cdf[SWITCHABLE_FILTER_CONTEXTS]
- [CDF_SIZE(SWITCHABLE_FILTERS)];
- /* kf_y_cdf is discarded after use, so does not require persistent storage.
- However, we keep it with the other CDFs in this struct since it needs to
- be copied to each tile to support parallelism just like the others.
- */
- aom_cdf_prob kf_y_cdf[KF_MODE_CONTEXTS][KF_MODE_CONTEXTS]
- [CDF_SIZE(INTRA_MODES)];
-
- aom_cdf_prob angle_delta_cdf[DIRECTIONAL_MODES]
- [CDF_SIZE(2 * MAX_ANGLE_DELTA + 1)];
-
- aom_cdf_prob tx_size_cdf[MAX_TX_CATS][TX_SIZE_CONTEXTS]
- [CDF_SIZE(MAX_TX_DEPTH + 1)];
- aom_cdf_prob delta_q_cdf[CDF_SIZE(DELTA_Q_PROBS + 1)];
- aom_cdf_prob delta_lf_multi_cdf[FRAME_LF_COUNT][CDF_SIZE(DELTA_LF_PROBS + 1)];
- aom_cdf_prob delta_lf_cdf[CDF_SIZE(DELTA_LF_PROBS + 1)];
- aom_cdf_prob intra_ext_tx_cdf[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES]
- [CDF_SIZE(TX_TYPES)];
- aom_cdf_prob inter_ext_tx_cdf[EXT_TX_SETS_INTER][EXT_TX_SIZES]
- [CDF_SIZE(TX_TYPES)];
- aom_cdf_prob cfl_sign_cdf[CDF_SIZE(CFL_JOINT_SIGNS)];
- aom_cdf_prob cfl_alpha_cdf[CFL_ALPHA_CONTEXTS][CDF_SIZE(CFL_ALPHABET_SIZE)];
- int initialized;
-} FRAME_CONTEXT;
-
-static const int av1_ext_tx_ind[EXT_TX_SET_TYPES][TX_TYPES] = {
- { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
- { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
- { 1, 3, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
- { 1, 5, 6, 4, 0, 0, 0, 0, 0, 0, 2, 3, 0, 0, 0, 0 },
- { 3, 4, 5, 8, 6, 7, 9, 10, 11, 0, 1, 2, 0, 0, 0, 0 },
- { 7, 8, 9, 12, 10, 11, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6 },
-};
-
-static const int av1_ext_tx_inv[EXT_TX_SET_TYPES][TX_TYPES] = {
- { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
- { 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
- { 9, 0, 3, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
- { 9, 0, 10, 11, 3, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
- { 9, 10, 11, 0, 1, 2, 4, 5, 3, 6, 7, 8, 0, 0, 0, 0 },
- { 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 4, 5, 3, 6, 7, 8 },
-};
-
-void av1_set_default_ref_deltas(int8_t *ref_deltas);
-void av1_set_default_mode_deltas(int8_t *mode_deltas);
-void av1_setup_frame_contexts(struct AV1Common *cm);
-void av1_setup_past_independence(struct AV1Common *cm);
-
-// Returns (int)ceil(log2(n)).
-// NOTE: This implementation only works for n <= 2^30.
-static INLINE int av1_ceil_log2(int n) {
- if (n < 2) return 0;
- int i = 1, p = 2;
- while (p < n) {
- i++;
- p = p << 1;
- }
- return i;
-}
-
-// Returns the context for palette color index at row 'r' and column 'c',
-// along with the 'color_order' of neighbors and the 'color_idx'.
-// The 'color_map' is a 2D array with the given 'stride'.
-int av1_get_palette_color_index_context(const uint8_t *color_map, int stride,
- int r, int c, int palette_size,
- uint8_t *color_order, int *color_idx);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_COMMON_ENTROPYMODE_H_
diff --git a/third_party/aom/av1/common/entropymv.c b/third_party/aom/av1/common/entropymv.c
deleted file mode 100644
index 491337387..000000000
--- a/third_party/aom/av1/common/entropymv.c
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "av1/common/onyxc_int.h"
-#include "av1/common/entropymv.h"
-
-static const nmv_context default_nmv_context = {
- { AOM_CDF4(4096, 11264, 19328) }, // joints_cdf
- { {
- // Vertical component
- { AOM_CDF11(28672, 30976, 31858, 32320, 32551, 32656, 32740, 32757,
- 32762, 32767) }, // class_cdf // fp
- { { AOM_CDF4(16384, 24576, 26624) },
- { AOM_CDF4(12288, 21248, 24128) } }, // class0_fp_cdf
- { AOM_CDF4(8192, 17408, 21248) }, // fp_cdf
- { AOM_CDF2(128 * 128) }, // sign_cdf
- { AOM_CDF2(160 * 128) }, // class0_hp_cdf
- { AOM_CDF2(128 * 128) }, // hp_cdf
- { AOM_CDF2(216 * 128) }, // class0_cdf
- { { AOM_CDF2(128 * 136) },
- { AOM_CDF2(128 * 140) },
- { AOM_CDF2(128 * 148) },
- { AOM_CDF2(128 * 160) },
- { AOM_CDF2(128 * 176) },
- { AOM_CDF2(128 * 192) },
- { AOM_CDF2(128 * 224) },
- { AOM_CDF2(128 * 234) },
- { AOM_CDF2(128 * 234) },
- { AOM_CDF2(128 * 240) } }, // bits_cdf
- },
- {
- // Horizontal component
- { AOM_CDF11(28672, 30976, 31858, 32320, 32551, 32656, 32740, 32757,
- 32762, 32767) }, // class_cdf // fp
- { { AOM_CDF4(16384, 24576, 26624) },
- { AOM_CDF4(12288, 21248, 24128) } }, // class0_fp_cdf
- { AOM_CDF4(8192, 17408, 21248) }, // fp_cdf
- { AOM_CDF2(128 * 128) }, // sign_cdf
- { AOM_CDF2(160 * 128) }, // class0_hp_cdf
- { AOM_CDF2(128 * 128) }, // hp_cdf
- { AOM_CDF2(216 * 128) }, // class0_cdf
- { { AOM_CDF2(128 * 136) },
- { AOM_CDF2(128 * 140) },
- { AOM_CDF2(128 * 148) },
- { AOM_CDF2(128 * 160) },
- { AOM_CDF2(128 * 176) },
- { AOM_CDF2(128 * 192) },
- { AOM_CDF2(128 * 224) },
- { AOM_CDF2(128 * 234) },
- { AOM_CDF2(128 * 234) },
- { AOM_CDF2(128 * 240) } }, // bits_cdf
- } },
-};
-
-void av1_init_mv_probs(AV1_COMMON *cm) {
- // NB: this sets CDFs too
- cm->fc->nmvc = default_nmv_context;
- cm->fc->ndvc = default_nmv_context;
-}
diff --git a/third_party/aom/av1/common/entropymv.h b/third_party/aom/av1/common/entropymv.h
deleted file mode 100644
index fa818a2c1..000000000
--- a/third_party/aom/av1/common/entropymv.h
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_COMMON_ENTROPYMV_H_
-#define AOM_AV1_COMMON_ENTROPYMV_H_
-
-#include "config/aom_config.h"
-
-#include "aom_dsp/prob.h"
-
-#include "av1/common/mv.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct AV1Common;
-
-void av1_init_mv_probs(struct AV1Common *cm);
-
-#define MV_UPDATE_PROB 252
-
-/* Symbols for coding which components are zero jointly */
-#define MV_JOINTS 4
-typedef enum {
- MV_JOINT_ZERO = 0, /* Zero vector */
- MV_JOINT_HNZVZ = 1, /* Vert zero, hor nonzero */
- MV_JOINT_HZVNZ = 2, /* Hor zero, vert nonzero */
- MV_JOINT_HNZVNZ = 3, /* Both components nonzero */
-} MV_JOINT_TYPE;
-
-static INLINE int mv_joint_vertical(MV_JOINT_TYPE type) {
- return type == MV_JOINT_HZVNZ || type == MV_JOINT_HNZVNZ;
-}
-
-static INLINE int mv_joint_horizontal(MV_JOINT_TYPE type) {
- return type == MV_JOINT_HNZVZ || type == MV_JOINT_HNZVNZ;
-}
-
-/* Symbols for coding magnitude class of nonzero components */
-#define MV_CLASSES 11
-typedef enum {
- MV_CLASS_0 = 0, /* (0, 2] integer pel */
- MV_CLASS_1 = 1, /* (2, 4] integer pel */
- MV_CLASS_2 = 2, /* (4, 8] integer pel */
- MV_CLASS_3 = 3, /* (8, 16] integer pel */
- MV_CLASS_4 = 4, /* (16, 32] integer pel */
- MV_CLASS_5 = 5, /* (32, 64] integer pel */
- MV_CLASS_6 = 6, /* (64, 128] integer pel */
- MV_CLASS_7 = 7, /* (128, 256] integer pel */
- MV_CLASS_8 = 8, /* (256, 512] integer pel */
- MV_CLASS_9 = 9, /* (512, 1024] integer pel */
- MV_CLASS_10 = 10, /* (1024,2048] integer pel */
-} MV_CLASS_TYPE;
-
-#define CLASS0_BITS 1 /* bits at integer precision for class 0 */
-#define CLASS0_SIZE (1 << CLASS0_BITS)
-#define MV_OFFSET_BITS (MV_CLASSES + CLASS0_BITS - 2)
-#define MV_BITS_CONTEXTS 6
-#define MV_FP_SIZE 4
-
-#define MV_MAX_BITS (MV_CLASSES + CLASS0_BITS + 2)
-#define MV_MAX ((1 << MV_MAX_BITS) - 1)
-#define MV_VALS ((MV_MAX << 1) + 1)
-
-#define MV_IN_USE_BITS 14
-#define MV_UPP (1 << MV_IN_USE_BITS)
-#define MV_LOW (-(1 << MV_IN_USE_BITS))
-
-typedef struct {
- aom_cdf_prob classes_cdf[CDF_SIZE(MV_CLASSES)];
- aom_cdf_prob class0_fp_cdf[CLASS0_SIZE][CDF_SIZE(MV_FP_SIZE)];
- aom_cdf_prob fp_cdf[CDF_SIZE(MV_FP_SIZE)];
- aom_cdf_prob sign_cdf[CDF_SIZE(2)];
- aom_cdf_prob class0_hp_cdf[CDF_SIZE(2)];
- aom_cdf_prob hp_cdf[CDF_SIZE(2)];
- aom_cdf_prob class0_cdf[CDF_SIZE(CLASS0_SIZE)];
- aom_cdf_prob bits_cdf[MV_OFFSET_BITS][CDF_SIZE(2)];
-} nmv_component;
-
-typedef struct {
- aom_cdf_prob joints_cdf[CDF_SIZE(MV_JOINTS)];
- nmv_component comps[2];
-} nmv_context;
-
-typedef enum {
- MV_SUBPEL_NONE = -1,
- MV_SUBPEL_LOW_PRECISION = 0,
- MV_SUBPEL_HIGH_PRECISION,
-} MvSubpelPrecision;
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_COMMON_ENTROPYMV_H_
diff --git a/third_party/aom/av1/common/enums.h b/third_party/aom/av1/common/enums.h
deleted file mode 100644
index 869c06ef2..000000000
--- a/third_party/aom/av1/common/enums.h
+++ /dev/null
@@ -1,619 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_COMMON_ENUMS_H_
-#define AOM_AV1_COMMON_ENUMS_H_
-
-#include "config/aom_config.h"
-
-#include "aom/aom_codec.h"
-#include "aom/aom_integer.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#undef MAX_SB_SIZE
-
-// Max superblock size
-#define MAX_SB_SIZE_LOG2 7
-#define MAX_SB_SIZE (1 << MAX_SB_SIZE_LOG2)
-#define MAX_SB_SQUARE (MAX_SB_SIZE * MAX_SB_SIZE)
-
-// Min superblock size
-#define MIN_SB_SIZE_LOG2 6
-
-// Pixels per Mode Info (MI) unit
-#define MI_SIZE_LOG2 2
-#define MI_SIZE (1 << MI_SIZE_LOG2)
-
-// MI-units per max superblock (MI Block - MIB)
-#define MAX_MIB_SIZE_LOG2 (MAX_SB_SIZE_LOG2 - MI_SIZE_LOG2)
-#define MAX_MIB_SIZE (1 << MAX_MIB_SIZE_LOG2)
-
-// MI-units per min superblock
-#define MIN_MIB_SIZE_LOG2 (MIN_SB_SIZE_LOG2 - MI_SIZE_LOG2)
-
-// Mask to extract MI offset within max MIB
-#define MAX_MIB_MASK (MAX_MIB_SIZE - 1)
-
-// Maximum number of tile rows and tile columns
-#define MAX_TILE_ROWS 64
-#define MAX_TILE_COLS 64
-
-#define MAX_VARTX_DEPTH 2
-
-#define MI_SIZE_64X64 (64 >> MI_SIZE_LOG2)
-#define MI_SIZE_128X128 (128 >> MI_SIZE_LOG2)
-
-#define MAX_PALETTE_SQUARE (64 * 64)
-// Maximum number of colors in a palette.
-#define PALETTE_MAX_SIZE 8
-// Minimum number of colors in a palette.
-#define PALETTE_MIN_SIZE 2
-
-#define FRAME_OFFSET_BITS 5
-#define MAX_FRAME_DISTANCE ((1 << FRAME_OFFSET_BITS) - 1)
-
-#define REF_FRAMES_LOG2 3
-#define REF_FRAMES (1 << REF_FRAMES_LOG2)
-
-// 4 scratch frames for the new frames to support a maximum of 4 cores decoding
-// in parallel, 3 for scaled references on the encoder.
-// TODO(hkuang): Add ondemand frame buffers instead of hardcoding the number
-// of framebuffers.
-// TODO(jkoleszar): These 3 extra references could probably come from the
-// normal reference pool.
-#define FRAME_BUFFERS (REF_FRAMES + 7)
-
-// 4 frame filter levels: y plane vertical, y plane horizontal,
-// u plane, and v plane
-#define FRAME_LF_COUNT 4
-#define DEFAULT_DELTA_LF_MULTI 0
-#define MAX_MODE_LF_DELTAS 2
-
-#define DIST_PRECISION_BITS 4
-#define DIST_PRECISION (1 << DIST_PRECISION_BITS) // 16
-
-// TODO(chengchen): Temporal flag serve as experimental flag for WIP
-// bitmask construction.
-// Shall be removed when bitmask code is completely checkedin
-#define LOOP_FILTER_BITMASK 0
-
-#define PROFILE_BITS 3
-// The following three profiles are currently defined.
-// Profile 0. 8-bit and 10-bit 4:2:0 and 4:0:0 only.
-// Profile 1. 8-bit and 10-bit 4:4:4
-// Profile 2. 8-bit and 10-bit 4:2:2
-// 12-bit 4:0:0, 4:2:2 and 4:4:4
-// Since we have three bits for the profiles, it can be extended later.
-typedef enum BITSTREAM_PROFILE {
- PROFILE_0,
- PROFILE_1,
- PROFILE_2,
- MAX_PROFILES,
-} BITSTREAM_PROFILE;
-
-#define LEVEL_MAJOR_BITS 3
-#define LEVEL_MINOR_BITS 2
-#define LEVEL_BITS (LEVEL_MAJOR_BITS + LEVEL_MINOR_BITS)
-
-#define LEVEL_MAJOR_MIN 2
-#define LEVEL_MAJOR_MAX ((1 << LEVEL_MAJOR_BITS) - 1 + LEVEL_MAJOR_MIN)
-#define LEVEL_MINOR_MIN 0
-#define LEVEL_MINOR_MAX ((1 << LEVEL_MINOR_BITS) - 1)
-
-#define OP_POINTS_CNT_MINUS_1_BITS 5
-#define OP_POINTS_IDC_BITS 12
-
-// Note: Some enums use the attribute 'packed' to use smallest possible integer
-// type, so that we can save memory when they are used in structs/arrays.
-
-typedef enum ATTRIBUTE_PACKED {
- BLOCK_4X4,
- BLOCK_4X8,
- BLOCK_8X4,
- BLOCK_8X8,
- BLOCK_8X16,
- BLOCK_16X8,
- BLOCK_16X16,
- BLOCK_16X32,
- BLOCK_32X16,
- BLOCK_32X32,
- BLOCK_32X64,
- BLOCK_64X32,
- BLOCK_64X64,
- BLOCK_64X128,
- BLOCK_128X64,
- BLOCK_128X128,
- BLOCK_4X16,
- BLOCK_16X4,
- BLOCK_8X32,
- BLOCK_32X8,
- BLOCK_16X64,
- BLOCK_64X16,
- BLOCK_SIZES_ALL,
- BLOCK_SIZES = BLOCK_4X16,
- BLOCK_INVALID = 255,
- BLOCK_LARGEST = (BLOCK_SIZES - 1)
-} BLOCK_SIZE;
-
-// 4X4, 8X8, 16X16, 32X32, 64X64, 128X128
-#define SQR_BLOCK_SIZES 6
-
-typedef enum ATTRIBUTE_PACKED {
- PARTITION_NONE,
- PARTITION_HORZ,
- PARTITION_VERT,
- PARTITION_SPLIT,
- PARTITION_HORZ_A, // HORZ split and the top partition is split again
- PARTITION_HORZ_B, // HORZ split and the bottom partition is split again
- PARTITION_VERT_A, // VERT split and the left partition is split again
- PARTITION_VERT_B, // VERT split and the right partition is split again
- PARTITION_HORZ_4, // 4:1 horizontal partition
- PARTITION_VERT_4, // 4:1 vertical partition
- EXT_PARTITION_TYPES,
- PARTITION_TYPES = PARTITION_SPLIT + 1,
- PARTITION_INVALID = 255
-} PARTITION_TYPE;
-
-typedef char PARTITION_CONTEXT;
-#define PARTITION_PLOFFSET 4 // number of probability models per block size
-#define PARTITION_BLOCK_SIZES 5
-#define PARTITION_CONTEXTS (PARTITION_BLOCK_SIZES * PARTITION_PLOFFSET)
-
-// block transform size
-#if defined(_MSC_VER)
-typedef uint8_t TX_SIZE;
-enum ATTRIBUTE_PACKED {
-#else
-typedef enum ATTRIBUTE_PACKED {
-#endif
- TX_4X4, // 4x4 transform
- TX_8X8, // 8x8 transform
- TX_16X16, // 16x16 transform
- TX_32X32, // 32x32 transform
- TX_64X64, // 64x64 transform
- TX_4X8, // 4x8 transform
- TX_8X4, // 8x4 transform
- TX_8X16, // 8x16 transform
- TX_16X8, // 16x8 transform
- TX_16X32, // 16x32 transform
- TX_32X16, // 32x16 transform
- TX_32X64, // 32x64 transform
- TX_64X32, // 64x32 transform
- TX_4X16, // 4x16 transform
- TX_16X4, // 16x4 transform
- TX_8X32, // 8x32 transform
- TX_32X8, // 32x8 transform
- TX_16X64, // 16x64 transform
- TX_64X16, // 64x16 transform
- TX_SIZES_ALL, // Includes rectangular transforms
- TX_SIZES = TX_4X8, // Does NOT include rectangular transforms
- TX_SIZES_LARGEST = TX_64X64,
- TX_INVALID = 255 // Invalid transform size
-#if defined(_MSC_VER)
-};
-#else
-} TX_SIZE;
-#endif
-
-#define TX_SIZE_LUMA_MIN (TX_4X4)
-/* We don't need to code a transform size unless the allowed size is at least
- one more than the minimum. */
-#define TX_SIZE_CTX_MIN (TX_SIZE_LUMA_MIN + 1)
-
-// Maximum tx_size categories
-#define MAX_TX_CATS (TX_SIZES - TX_SIZE_CTX_MIN)
-#define MAX_TX_DEPTH 2
-
-#define MAX_TX_SIZE_LOG2 (6)
-#define MAX_TX_SIZE (1 << MAX_TX_SIZE_LOG2)
-#define MIN_TX_SIZE_LOG2 2
-#define MIN_TX_SIZE (1 << MIN_TX_SIZE_LOG2)
-#define MAX_TX_SQUARE (MAX_TX_SIZE * MAX_TX_SIZE)
-
-// Pad 4 extra columns to remove horizontal availability check.
-#define TX_PAD_HOR_LOG2 2
-#define TX_PAD_HOR 4
-// Pad 6 extra rows (2 on top and 4 on bottom) to remove vertical availability
-// check.
-#define TX_PAD_TOP 2
-#define TX_PAD_BOTTOM 4
-#define TX_PAD_VER (TX_PAD_TOP + TX_PAD_BOTTOM)
-// Pad 16 extra bytes to avoid reading overflow in SIMD optimization.
-#define TX_PAD_END 16
-#define TX_PAD_2D ((32 + TX_PAD_HOR) * (32 + TX_PAD_VER) + TX_PAD_END)
-
-// Number of maxium size transform blocks in the maximum size superblock
-#define MAX_TX_BLOCKS_IN_MAX_SB_LOG2 ((MAX_SB_SIZE_LOG2 - MAX_TX_SIZE_LOG2) * 2)
-#define MAX_TX_BLOCKS_IN_MAX_SB (1 << MAX_TX_BLOCKS_IN_MAX_SB_LOG2)
-
-// frame transform mode
-typedef enum ATTRIBUTE_PACKED {
- ONLY_4X4, // use only 4x4 transform
- TX_MODE_LARGEST, // transform size is the largest possible for pu size
- TX_MODE_SELECT, // transform specified for each block
- TX_MODES,
-} TX_MODE;
-
-// 1D tx types
-typedef enum ATTRIBUTE_PACKED {
- DCT_1D,
- ADST_1D,
- FLIPADST_1D,
- IDTX_1D,
- TX_TYPES_1D,
-} TX_TYPE_1D;
-
-typedef enum ATTRIBUTE_PACKED {
- DCT_DCT, // DCT in both horizontal and vertical
- ADST_DCT, // ADST in vertical, DCT in horizontal
- DCT_ADST, // DCT in vertical, ADST in horizontal
- ADST_ADST, // ADST in both directions
- FLIPADST_DCT,
- DCT_FLIPADST,
- FLIPADST_FLIPADST,
- ADST_FLIPADST,
- FLIPADST_ADST,
- IDTX,
- V_DCT,
- H_DCT,
- V_ADST,
- H_ADST,
- V_FLIPADST,
- H_FLIPADST,
- TX_TYPES,
-} TX_TYPE;
-
-typedef enum ATTRIBUTE_PACKED {
- REG_REG,
- REG_SMOOTH,
- REG_SHARP,
- SMOOTH_REG,
- SMOOTH_SMOOTH,
- SMOOTH_SHARP,
- SHARP_REG,
- SHARP_SMOOTH,
- SHARP_SHARP,
-} DUAL_FILTER_TYPE;
-
-typedef enum ATTRIBUTE_PACKED {
- // DCT only
- EXT_TX_SET_DCTONLY,
- // DCT + Identity only
- EXT_TX_SET_DCT_IDTX,
- // Discrete Trig transforms w/o flip (4) + Identity (1)
- EXT_TX_SET_DTT4_IDTX,
- // Discrete Trig transforms w/o flip (4) + Identity (1) + 1D Hor/vert DCT (2)
- EXT_TX_SET_DTT4_IDTX_1DDCT,
- // Discrete Trig transforms w/ flip (9) + Identity (1) + 1D Hor/Ver DCT (2)
- EXT_TX_SET_DTT9_IDTX_1DDCT,
- // Discrete Trig transforms w/ flip (9) + Identity (1) + 1D Hor/Ver (6)
- EXT_TX_SET_ALL16,
- EXT_TX_SET_TYPES
-} TxSetType;
-
-#define IS_2D_TRANSFORM(tx_type) (tx_type < IDTX)
-
-#define EXT_TX_SIZES 4 // number of sizes that use extended transforms
-#define EXT_TX_SETS_INTER 4 // Sets of transform selections for INTER
-#define EXT_TX_SETS_INTRA 3 // Sets of transform selections for INTRA
-
-typedef enum ATTRIBUTE_PACKED {
- AOM_LAST_FLAG = 1 << 0,
- AOM_LAST2_FLAG = 1 << 1,
- AOM_LAST3_FLAG = 1 << 2,
- AOM_GOLD_FLAG = 1 << 3,
- AOM_BWD_FLAG = 1 << 4,
- AOM_ALT2_FLAG = 1 << 5,
- AOM_ALT_FLAG = 1 << 6,
- AOM_REFFRAME_ALL = (1 << 7) - 1
-} AOM_REFFRAME;
-
-typedef enum ATTRIBUTE_PACKED {
- UNIDIR_COMP_REFERENCE,
- BIDIR_COMP_REFERENCE,
- COMP_REFERENCE_TYPES,
-} COMP_REFERENCE_TYPE;
-
-typedef enum ATTRIBUTE_PACKED {
- PLANE_TYPE_Y,
- PLANE_TYPE_UV,
- PLANE_TYPES
-} PLANE_TYPE;
-
-#define CFL_ALPHABET_SIZE_LOG2 4
-#define CFL_ALPHABET_SIZE (1 << CFL_ALPHABET_SIZE_LOG2)
-#define CFL_MAGS_SIZE ((2 << CFL_ALPHABET_SIZE_LOG2) + 1)
-#define CFL_IDX_U(idx) (idx >> CFL_ALPHABET_SIZE_LOG2)
-#define CFL_IDX_V(idx) (idx & (CFL_ALPHABET_SIZE - 1))
-
-typedef enum ATTRIBUTE_PACKED {
- CFL_PRED_U,
- CFL_PRED_V,
- CFL_PRED_PLANES
-} CFL_PRED_TYPE;
-
-typedef enum ATTRIBUTE_PACKED {
- CFL_SIGN_ZERO,
- CFL_SIGN_NEG,
- CFL_SIGN_POS,
- CFL_SIGNS
-} CFL_SIGN_TYPE;
-
-typedef enum ATTRIBUTE_PACKED {
- CFL_DISALLOWED,
- CFL_ALLOWED,
- CFL_ALLOWED_TYPES
-} CFL_ALLOWED_TYPE;
-
-// CFL_SIGN_ZERO,CFL_SIGN_ZERO is invalid
-#define CFL_JOINT_SIGNS (CFL_SIGNS * CFL_SIGNS - 1)
-// CFL_SIGN_U is equivalent to (js + 1) / 3 for js in 0 to 8
-#define CFL_SIGN_U(js) (((js + 1) * 11) >> 5)
-// CFL_SIGN_V is equivalent to (js + 1) % 3 for js in 0 to 8
-#define CFL_SIGN_V(js) ((js + 1) - CFL_SIGNS * CFL_SIGN_U(js))
-
-// There is no context when the alpha for a given plane is zero.
-// So there are 2 fewer contexts than joint signs.
-#define CFL_ALPHA_CONTEXTS (CFL_JOINT_SIGNS + 1 - CFL_SIGNS)
-#define CFL_CONTEXT_U(js) (js + 1 - CFL_SIGNS)
-// Also, the contexts are symmetric under swapping the planes.
-#define CFL_CONTEXT_V(js) \
- (CFL_SIGN_V(js) * CFL_SIGNS + CFL_SIGN_U(js) - CFL_SIGNS)
-
-typedef enum ATTRIBUTE_PACKED {
- PALETTE_MAP,
- COLOR_MAP_TYPES,
-} COLOR_MAP_TYPE;
-
-typedef enum ATTRIBUTE_PACKED {
- TWO_COLORS,
- THREE_COLORS,
- FOUR_COLORS,
- FIVE_COLORS,
- SIX_COLORS,
- SEVEN_COLORS,
- EIGHT_COLORS,
- PALETTE_SIZES
-} PALETTE_SIZE;
-
-typedef enum ATTRIBUTE_PACKED {
- PALETTE_COLOR_ONE,
- PALETTE_COLOR_TWO,
- PALETTE_COLOR_THREE,
- PALETTE_COLOR_FOUR,
- PALETTE_COLOR_FIVE,
- PALETTE_COLOR_SIX,
- PALETTE_COLOR_SEVEN,
- PALETTE_COLOR_EIGHT,
- PALETTE_COLORS
-} PALETTE_COLOR;
-
-// Note: All directional predictors must be between V_PRED and D67_PRED (both
-// inclusive).
-typedef enum ATTRIBUTE_PACKED {
- DC_PRED, // Average of above and left pixels
- V_PRED, // Vertical
- H_PRED, // Horizontal
- D45_PRED, // Directional 45 degree
- D135_PRED, // Directional 135 degree
- D113_PRED, // Directional 113 degree
- D157_PRED, // Directional 157 degree
- D203_PRED, // Directional 203 degree
- D67_PRED, // Directional 67 degree
- SMOOTH_PRED, // Combination of horizontal and vertical interpolation
- SMOOTH_V_PRED, // Vertical interpolation
- SMOOTH_H_PRED, // Horizontal interpolation
- PAETH_PRED, // Predict from the direction of smallest gradient
- NEARESTMV,
- NEARMV,
- GLOBALMV,
- NEWMV,
- // Compound ref compound modes
- NEAREST_NEARESTMV,
- NEAR_NEARMV,
- NEAREST_NEWMV,
- NEW_NEARESTMV,
- NEAR_NEWMV,
- NEW_NEARMV,
- GLOBAL_GLOBALMV,
- NEW_NEWMV,
- MB_MODE_COUNT,
- INTRA_MODE_START = DC_PRED,
- INTRA_MODE_END = NEARESTMV,
- INTRA_MODE_NUM = INTRA_MODE_END - INTRA_MODE_START,
- SINGLE_INTER_MODE_START = NEARESTMV,
- SINGLE_INTER_MODE_END = NEAREST_NEARESTMV,
- SINGLE_INTER_MODE_NUM = SINGLE_INTER_MODE_END - SINGLE_INTER_MODE_START,
- COMP_INTER_MODE_START = NEAREST_NEARESTMV,
- COMP_INTER_MODE_END = MB_MODE_COUNT,
- COMP_INTER_MODE_NUM = COMP_INTER_MODE_END - COMP_INTER_MODE_START,
- INTER_MODE_START = NEARESTMV,
- INTER_MODE_END = MB_MODE_COUNT,
- INTRA_MODES = PAETH_PRED + 1, // PAETH_PRED has to be the last intra mode.
- INTRA_INVALID = MB_MODE_COUNT // For uv_mode in inter blocks
-} PREDICTION_MODE;
-
-// TODO(ltrudeau) Do we really want to pack this?
-// TODO(ltrudeau) Do we match with PREDICTION_MODE?
-typedef enum ATTRIBUTE_PACKED {
- UV_DC_PRED, // Average of above and left pixels
- UV_V_PRED, // Vertical
- UV_H_PRED, // Horizontal
- UV_D45_PRED, // Directional 45 degree
- UV_D135_PRED, // Directional 135 degree
- UV_D113_PRED, // Directional 113 degree
- UV_D157_PRED, // Directional 157 degree
- UV_D203_PRED, // Directional 203 degree
- UV_D67_PRED, // Directional 67 degree
- UV_SMOOTH_PRED, // Combination of horizontal and vertical interpolation
- UV_SMOOTH_V_PRED, // Vertical interpolation
- UV_SMOOTH_H_PRED, // Horizontal interpolation
- UV_PAETH_PRED, // Predict from the direction of smallest gradient
- UV_CFL_PRED, // Chroma-from-Luma
- UV_INTRA_MODES,
- UV_MODE_INVALID, // For uv_mode in inter blocks
-} UV_PREDICTION_MODE;
-
-typedef enum ATTRIBUTE_PACKED {
- SIMPLE_TRANSLATION,
- OBMC_CAUSAL, // 2-sided OBMC
- WARPED_CAUSAL, // 2-sided WARPED
- MOTION_MODES
-} MOTION_MODE;
-
-typedef enum ATTRIBUTE_PACKED {
- II_DC_PRED,
- II_V_PRED,
- II_H_PRED,
- II_SMOOTH_PRED,
- INTERINTRA_MODES
-} INTERINTRA_MODE;
-
-typedef enum ATTRIBUTE_PACKED {
- COMPOUND_AVERAGE,
- COMPOUND_WEDGE,
- COMPOUND_DIFFWTD,
- COMPOUND_TYPES,
-} COMPOUND_TYPE;
-
-typedef enum ATTRIBUTE_PACKED {
- FILTER_DC_PRED,
- FILTER_V_PRED,
- FILTER_H_PRED,
- FILTER_D157_PRED,
- FILTER_PAETH_PRED,
- FILTER_INTRA_MODES,
-} FILTER_INTRA_MODE;
-
-#define DIRECTIONAL_MODES 8
-#define MAX_ANGLE_DELTA 3
-#define ANGLE_STEP 3
-
-#define INTER_MODES (1 + NEWMV - NEARESTMV)
-
-#define INTER_COMPOUND_MODES (1 + NEW_NEWMV - NEAREST_NEARESTMV)
-
-#define SKIP_CONTEXTS 3
-#define SKIP_MODE_CONTEXTS 3
-
-#define COMP_INDEX_CONTEXTS 6
-#define COMP_GROUP_IDX_CONTEXTS 6
-
-#define NMV_CONTEXTS 3
-
-#define NEWMV_MODE_CONTEXTS 6
-#define GLOBALMV_MODE_CONTEXTS 2
-#define REFMV_MODE_CONTEXTS 6
-#define DRL_MODE_CONTEXTS 3
-
-#define GLOBALMV_OFFSET 3
-#define REFMV_OFFSET 4
-
-#define NEWMV_CTX_MASK ((1 << GLOBALMV_OFFSET) - 1)
-#define GLOBALMV_CTX_MASK ((1 << (REFMV_OFFSET - GLOBALMV_OFFSET)) - 1)
-#define REFMV_CTX_MASK ((1 << (8 - REFMV_OFFSET)) - 1)
-
-#define COMP_NEWMV_CTXS 5
-#define INTER_MODE_CONTEXTS 8
-
-#define DELTA_Q_SMALL 3
-#define DELTA_Q_PROBS (DELTA_Q_SMALL)
-#define DEFAULT_DELTA_Q_RES 4
-#define DELTA_LF_SMALL 3
-#define DELTA_LF_PROBS (DELTA_LF_SMALL)
-#define DEFAULT_DELTA_LF_RES 2
-
-/* Segment Feature Masks */
-#define MAX_MV_REF_CANDIDATES 2
-
-#define MAX_REF_MV_STACK_SIZE 8
-#define REF_CAT_LEVEL 640
-
-#define INTRA_INTER_CONTEXTS 4
-#define COMP_INTER_CONTEXTS 5
-#define REF_CONTEXTS 3
-
-#define COMP_REF_TYPE_CONTEXTS 5
-#define UNI_COMP_REF_CONTEXTS 3
-
-#define TXFM_PARTITION_CONTEXTS ((TX_SIZES - TX_8X8) * 6 - 3)
-typedef uint8_t TXFM_CONTEXT;
-
-#define NONE_FRAME -1
-#define INTRA_FRAME 0
-#define LAST_FRAME 1
-#define LAST2_FRAME 2
-#define LAST3_FRAME 3
-#define GOLDEN_FRAME 4
-#define BWDREF_FRAME 5
-#define ALTREF2_FRAME 6
-#define ALTREF_FRAME 7
-#define EXTREF_FRAME REF_FRAMES
-#define LAST_REF_FRAMES (LAST3_FRAME - LAST_FRAME + 1)
-
-#define INTER_REFS_PER_FRAME (ALTREF_FRAME - LAST_FRAME + 1)
-
-#define FWD_REFS (GOLDEN_FRAME - LAST_FRAME + 1)
-#define FWD_RF_OFFSET(ref) (ref - LAST_FRAME)
-#define BWD_REFS (ALTREF_FRAME - BWDREF_FRAME + 1)
-#define BWD_RF_OFFSET(ref) (ref - BWDREF_FRAME)
-
-#define SINGLE_REFS (FWD_REFS + BWD_REFS)
-
-typedef enum ATTRIBUTE_PACKED {
- LAST_LAST2_FRAMES, // { LAST_FRAME, LAST2_FRAME }
- LAST_LAST3_FRAMES, // { LAST_FRAME, LAST3_FRAME }
- LAST_GOLDEN_FRAMES, // { LAST_FRAME, GOLDEN_FRAME }
- BWDREF_ALTREF_FRAMES, // { BWDREF_FRAME, ALTREF_FRAME }
- LAST2_LAST3_FRAMES, // { LAST2_FRAME, LAST3_FRAME }
- LAST2_GOLDEN_FRAMES, // { LAST2_FRAME, GOLDEN_FRAME }
- LAST3_GOLDEN_FRAMES, // { LAST3_FRAME, GOLDEN_FRAME }
- BWDREF_ALTREF2_FRAMES, // { BWDREF_FRAME, ALTREF2_FRAME }
- ALTREF2_ALTREF_FRAMES, // { ALTREF2_FRAME, ALTREF_FRAME }
- TOTAL_UNIDIR_COMP_REFS,
- // NOTE: UNIDIR_COMP_REFS is the number of uni-directional reference pairs
- // that are explicitly signaled.
- UNIDIR_COMP_REFS = BWDREF_ALTREF_FRAMES + 1,
-} UNIDIR_COMP_REF;
-
-#define TOTAL_COMP_REFS (FWD_REFS * BWD_REFS + TOTAL_UNIDIR_COMP_REFS)
-
-#define COMP_REFS (FWD_REFS * BWD_REFS + UNIDIR_COMP_REFS)
-
-// NOTE: A limited number of unidirectional reference pairs can be signalled for
-// compound prediction. The use of skip mode, on the other hand, makes it
-// possible to have a reference pair not listed for explicit signaling.
-#define MODE_CTX_REF_FRAMES (REF_FRAMES + TOTAL_COMP_REFS)
-
-typedef enum ATTRIBUTE_PACKED {
- RESTORE_NONE,
- RESTORE_WIENER,
- RESTORE_SGRPROJ,
- RESTORE_SWITCHABLE,
- RESTORE_SWITCHABLE_TYPES = RESTORE_SWITCHABLE,
- RESTORE_TYPES = 4,
-} RestorationType;
-
-#define SUPERRES_SCALE_BITS 3
-#define SUPERRES_SCALE_DENOMINATOR_MIN (SCALE_NUMERATOR + 1)
-
-// In large_scale_tile coding, external references are used.
-#define MAX_EXTERNAL_REFERENCES 128
-#define MAX_TILES 512
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_COMMON_ENUMS_H_
diff --git a/third_party/aom/av1/common/filter.h b/third_party/aom/av1/common/filter.h
deleted file mode 100644
index 571422d11..000000000
--- a/third_party/aom/av1/common/filter.h
+++ /dev/null
@@ -1,214 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_COMMON_FILTER_H_
-#define AOM_AV1_COMMON_FILTER_H_
-
-#include <assert.h>
-
-#include "config/aom_config.h"
-
-#include "aom/aom_integer.h"
-#include "aom_dsp/aom_filter.h"
-#include "aom_ports/mem.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define MAX_FILTER_TAP 8
-
-typedef enum ATTRIBUTE_PACKED {
- EIGHTTAP_REGULAR,
- EIGHTTAP_SMOOTH,
- MULTITAP_SHARP,
- BILINEAR,
- INTERP_FILTERS_ALL,
- SWITCHABLE_FILTERS = BILINEAR,
- SWITCHABLE = SWITCHABLE_FILTERS + 1, /* the last switchable one */
- EXTRA_FILTERS = INTERP_FILTERS_ALL - SWITCHABLE_FILTERS,
-} InterpFilter;
-
-// With CONFIG_DUAL_FILTER, pack two InterpFilter's into a uint32_t: since
-// there are at most 10 filters, we can use 16 bits for each and have more than
-// enough space. This reduces argument passing and unifies the operation of
-// setting a (pair of) filters.
-//
-// Without CONFIG_DUAL_FILTER,
-typedef uint32_t InterpFilters;
-static INLINE InterpFilter av1_extract_interp_filter(InterpFilters filters,
- int x_filter) {
- return (InterpFilter)((filters >> (x_filter ? 16 : 0)) & 0xf);
-}
-
-static INLINE InterpFilters av1_make_interp_filters(InterpFilter y_filter,
- InterpFilter x_filter) {
- uint16_t y16 = y_filter & 0xf;
- uint16_t x16 = x_filter & 0xf;
- return y16 | ((uint32_t)x16 << 16);
-}
-
-static INLINE InterpFilters av1_broadcast_interp_filter(InterpFilter filter) {
- return av1_make_interp_filters(filter, filter);
-}
-
-static INLINE InterpFilter av1_unswitchable_filter(InterpFilter filter) {
- return filter == SWITCHABLE ? EIGHTTAP_REGULAR : filter;
-}
-
-/* (1 << LOG_SWITCHABLE_FILTERS) > SWITCHABLE_FILTERS */
-#define LOG_SWITCHABLE_FILTERS 2
-
-#define MAX_SUBPEL_TAPS 12
-#define SWITCHABLE_FILTER_CONTEXTS ((SWITCHABLE_FILTERS + 1) * 4)
-#define INTER_FILTER_COMP_OFFSET (SWITCHABLE_FILTERS + 1)
-#define INTER_FILTER_DIR_OFFSET ((SWITCHABLE_FILTERS + 1) * 2)
-
-typedef struct InterpFilterParams {
- const int16_t *filter_ptr;
- uint16_t taps;
- uint16_t subpel_shifts;
- InterpFilter interp_filter;
-} InterpFilterParams;
-
-DECLARE_ALIGNED(256, static const InterpKernel,
- av1_bilinear_filters[SUBPEL_SHIFTS]) = {
- { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 0, 0, 120, 8, 0, 0, 0 },
- { 0, 0, 0, 112, 16, 0, 0, 0 }, { 0, 0, 0, 104, 24, 0, 0, 0 },
- { 0, 0, 0, 96, 32, 0, 0, 0 }, { 0, 0, 0, 88, 40, 0, 0, 0 },
- { 0, 0, 0, 80, 48, 0, 0, 0 }, { 0, 0, 0, 72, 56, 0, 0, 0 },
- { 0, 0, 0, 64, 64, 0, 0, 0 }, { 0, 0, 0, 56, 72, 0, 0, 0 },
- { 0, 0, 0, 48, 80, 0, 0, 0 }, { 0, 0, 0, 40, 88, 0, 0, 0 },
- { 0, 0, 0, 32, 96, 0, 0, 0 }, { 0, 0, 0, 24, 104, 0, 0, 0 },
- { 0, 0, 0, 16, 112, 0, 0, 0 }, { 0, 0, 0, 8, 120, 0, 0, 0 }
-};
-
-DECLARE_ALIGNED(256, static const InterpKernel,
- av1_sub_pel_filters_8[SUBPEL_SHIFTS]) = {
- { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 2, -6, 126, 8, -2, 0, 0 },
- { 0, 2, -10, 122, 18, -4, 0, 0 }, { 0, 2, -12, 116, 28, -8, 2, 0 },
- { 0, 2, -14, 110, 38, -10, 2, 0 }, { 0, 2, -14, 102, 48, -12, 2, 0 },
- { 0, 2, -16, 94, 58, -12, 2, 0 }, { 0, 2, -14, 84, 66, -12, 2, 0 },
- { 0, 2, -14, 76, 76, -14, 2, 0 }, { 0, 2, -12, 66, 84, -14, 2, 0 },
- { 0, 2, -12, 58, 94, -16, 2, 0 }, { 0, 2, -12, 48, 102, -14, 2, 0 },
- { 0, 2, -10, 38, 110, -14, 2, 0 }, { 0, 2, -8, 28, 116, -12, 2, 0 },
- { 0, 0, -4, 18, 122, -10, 2, 0 }, { 0, 0, -2, 8, 126, -6, 2, 0 }
-};
-
-DECLARE_ALIGNED(256, static const InterpKernel,
- av1_sub_pel_filters_8sharp[SUBPEL_SHIFTS]) = {
- { 0, 0, 0, 128, 0, 0, 0, 0 }, { -2, 2, -6, 126, 8, -2, 2, 0 },
- { -2, 6, -12, 124, 16, -6, 4, -2 }, { -2, 8, -18, 120, 26, -10, 6, -2 },
- { -4, 10, -22, 116, 38, -14, 6, -2 }, { -4, 10, -22, 108, 48, -18, 8, -2 },
- { -4, 10, -24, 100, 60, -20, 8, -2 }, { -4, 10, -24, 90, 70, -22, 10, -2 },
- { -4, 12, -24, 80, 80, -24, 12, -4 }, { -2, 10, -22, 70, 90, -24, 10, -4 },
- { -2, 8, -20, 60, 100, -24, 10, -4 }, { -2, 8, -18, 48, 108, -22, 10, -4 },
- { -2, 6, -14, 38, 116, -22, 10, -4 }, { -2, 6, -10, 26, 120, -18, 8, -2 },
- { -2, 4, -6, 16, 124, -12, 6, -2 }, { 0, 2, -2, 8, 126, -6, 2, -2 }
-};
-
-DECLARE_ALIGNED(256, static const InterpKernel,
- av1_sub_pel_filters_8smooth[SUBPEL_SHIFTS]) = {
- { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 2, 28, 62, 34, 2, 0, 0 },
- { 0, 0, 26, 62, 36, 4, 0, 0 }, { 0, 0, 22, 62, 40, 4, 0, 0 },
- { 0, 0, 20, 60, 42, 6, 0, 0 }, { 0, 0, 18, 58, 44, 8, 0, 0 },
- { 0, 0, 16, 56, 46, 10, 0, 0 }, { 0, -2, 16, 54, 48, 12, 0, 0 },
- { 0, -2, 14, 52, 52, 14, -2, 0 }, { 0, 0, 12, 48, 54, 16, -2, 0 },
- { 0, 0, 10, 46, 56, 16, 0, 0 }, { 0, 0, 8, 44, 58, 18, 0, 0 },
- { 0, 0, 6, 42, 60, 20, 0, 0 }, { 0, 0, 4, 40, 62, 22, 0, 0 },
- { 0, 0, 4, 36, 62, 26, 0, 0 }, { 0, 0, 2, 34, 62, 28, 2, 0 }
-};
-
-static const InterpFilterParams
- av1_interp_filter_params_list[SWITCHABLE_FILTERS + 1] = {
- { (const int16_t *)av1_sub_pel_filters_8, SUBPEL_TAPS, SUBPEL_SHIFTS,
- EIGHTTAP_REGULAR },
- { (const int16_t *)av1_sub_pel_filters_8smooth, SUBPEL_TAPS,
- SUBPEL_SHIFTS, EIGHTTAP_SMOOTH },
- { (const int16_t *)av1_sub_pel_filters_8sharp, SUBPEL_TAPS, SUBPEL_SHIFTS,
- MULTITAP_SHARP },
- { (const int16_t *)av1_bilinear_filters, SUBPEL_TAPS, SUBPEL_SHIFTS,
- BILINEAR }
- };
-
-// A special 2-tap bilinear filter for IntraBC chroma. IntraBC uses full pixel
-// MV for luma. If sub-sampling exists, chroma may possibly use half-pel MV.
-DECLARE_ALIGNED(256, static const int16_t, av1_intrabc_bilinear_filter[2]) = {
- 64,
- 64,
-};
-
-static const InterpFilterParams av1_intrabc_filter_params = {
- av1_intrabc_bilinear_filter, 2, 0, BILINEAR
-};
-
-DECLARE_ALIGNED(256, static const InterpKernel,
- av1_sub_pel_filters_4[SUBPEL_SHIFTS]) = {
- { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 0, -4, 126, 8, -2, 0, 0 },
- { 0, 0, -8, 122, 18, -4, 0, 0 }, { 0, 0, -10, 116, 28, -6, 0, 0 },
- { 0, 0, -12, 110, 38, -8, 0, 0 }, { 0, 0, -12, 102, 48, -10, 0, 0 },
- { 0, 0, -14, 94, 58, -10, 0, 0 }, { 0, 0, -12, 84, 66, -10, 0, 0 },
- { 0, 0, -12, 76, 76, -12, 0, 0 }, { 0, 0, -10, 66, 84, -12, 0, 0 },
- { 0, 0, -10, 58, 94, -14, 0, 0 }, { 0, 0, -10, 48, 102, -12, 0, 0 },
- { 0, 0, -8, 38, 110, -12, 0, 0 }, { 0, 0, -6, 28, 116, -10, 0, 0 },
- { 0, 0, -4, 18, 122, -8, 0, 0 }, { 0, 0, -2, 8, 126, -4, 0, 0 }
-};
-DECLARE_ALIGNED(256, static const InterpKernel,
- av1_sub_pel_filters_4smooth[SUBPEL_SHIFTS]) = {
- { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 0, 30, 62, 34, 2, 0, 0 },
- { 0, 0, 26, 62, 36, 4, 0, 0 }, { 0, 0, 22, 62, 40, 4, 0, 0 },
- { 0, 0, 20, 60, 42, 6, 0, 0 }, { 0, 0, 18, 58, 44, 8, 0, 0 },
- { 0, 0, 16, 56, 46, 10, 0, 0 }, { 0, 0, 14, 54, 48, 12, 0, 0 },
- { 0, 0, 12, 52, 52, 12, 0, 0 }, { 0, 0, 12, 48, 54, 14, 0, 0 },
- { 0, 0, 10, 46, 56, 16, 0, 0 }, { 0, 0, 8, 44, 58, 18, 0, 0 },
- { 0, 0, 6, 42, 60, 20, 0, 0 }, { 0, 0, 4, 40, 62, 22, 0, 0 },
- { 0, 0, 4, 36, 62, 26, 0, 0 }, { 0, 0, 2, 34, 62, 30, 0, 0 }
-};
-
-// For w<=4, MULTITAP_SHARP is the same as EIGHTTAP_REGULAR
-static const InterpFilterParams av1_interp_4tap[SWITCHABLE_FILTERS + 1] = {
- { (const int16_t *)av1_sub_pel_filters_4, SUBPEL_TAPS, SUBPEL_SHIFTS,
- EIGHTTAP_REGULAR },
- { (const int16_t *)av1_sub_pel_filters_4smooth, SUBPEL_TAPS, SUBPEL_SHIFTS,
- EIGHTTAP_SMOOTH },
- { (const int16_t *)av1_sub_pel_filters_4, SUBPEL_TAPS, SUBPEL_SHIFTS,
- EIGHTTAP_REGULAR },
- { (const int16_t *)av1_bilinear_filters, SUBPEL_TAPS, SUBPEL_SHIFTS,
- BILINEAR },
-};
-
-static INLINE const InterpFilterParams *
-av1_get_interp_filter_params_with_block_size(const InterpFilter interp_filter,
- const int w) {
- if (w <= 4) return &av1_interp_4tap[interp_filter];
- return &av1_interp_filter_params_list[interp_filter];
-}
-
-static INLINE const InterpFilterParams *av1_get_4tap_interp_filter_params(
- const InterpFilter interp_filter) {
- return &av1_interp_4tap[interp_filter];
-}
-
-static INLINE const int16_t *av1_get_interp_filter_kernel(
- const InterpFilter interp_filter) {
- return av1_interp_filter_params_list[interp_filter].filter_ptr;
-}
-
-static INLINE const int16_t *av1_get_interp_filter_subpel_kernel(
- const InterpFilterParams *const filter_params, const int subpel) {
- return filter_params->filter_ptr + filter_params->taps * subpel;
-}
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_COMMON_FILTER_H_
diff --git a/third_party/aom/av1/common/frame_buffers.c b/third_party/aom/av1/common/frame_buffers.c
deleted file mode 100644
index fd6c4bc79..000000000
--- a/third_party/aom/av1/common/frame_buffers.c
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-
-#include "av1/common/frame_buffers.h"
-#include "aom_mem/aom_mem.h"
-
-int av1_alloc_internal_frame_buffers(InternalFrameBufferList *list) {
- assert(list != NULL);
- av1_free_internal_frame_buffers(list);
-
- list->num_internal_frame_buffers =
- AOM_MAXIMUM_REF_BUFFERS + AOM_MAXIMUM_WORK_BUFFERS;
- list->int_fb = (InternalFrameBuffer *)aom_calloc(
- list->num_internal_frame_buffers, sizeof(*list->int_fb));
- return (list->int_fb == NULL);
-}
-
-void av1_free_internal_frame_buffers(InternalFrameBufferList *list) {
- int i;
-
- assert(list != NULL);
-
- for (i = 0; i < list->num_internal_frame_buffers; ++i) {
- aom_free(list->int_fb[i].data);
- list->int_fb[i].data = NULL;
- }
- aom_free(list->int_fb);
- list->int_fb = NULL;
-}
-
-void av1_zero_unused_internal_frame_buffers(InternalFrameBufferList *list) {
- int i;
-
- assert(list != NULL);
-
- for (i = 0; i < list->num_internal_frame_buffers; ++i) {
- if (list->int_fb[i].data && !list->int_fb[i].in_use)
- memset(list->int_fb[i].data, 0, list->int_fb[i].size);
- }
-}
-
-int av1_get_frame_buffer(void *cb_priv, size_t min_size,
- aom_codec_frame_buffer_t *fb) {
- int i;
- InternalFrameBufferList *const int_fb_list =
- (InternalFrameBufferList *)cb_priv;
- if (int_fb_list == NULL) return -1;
-
- // Find a free frame buffer.
- for (i = 0; i < int_fb_list->num_internal_frame_buffers; ++i) {
- if (!int_fb_list->int_fb[i].in_use) break;
- }
-
- if (i == int_fb_list->num_internal_frame_buffers) return -1;
-
- if (int_fb_list->int_fb[i].size < min_size) {
- aom_free(int_fb_list->int_fb[i].data);
- // The data must be zeroed to fix a valgrind error from the C loop filter
- // due to access uninitialized memory in frame border. It could be
- // skipped if border were totally removed.
- int_fb_list->int_fb[i].data = (uint8_t *)aom_calloc(1, min_size);
- if (!int_fb_list->int_fb[i].data) return -1;
- int_fb_list->int_fb[i].size = min_size;
- }
-
- fb->data = int_fb_list->int_fb[i].data;
- fb->size = int_fb_list->int_fb[i].size;
- int_fb_list->int_fb[i].in_use = 1;
-
- // Set the frame buffer's private data to point at the internal frame buffer.
- fb->priv = &int_fb_list->int_fb[i];
- return 0;
-}
-
-int av1_release_frame_buffer(void *cb_priv, aom_codec_frame_buffer_t *fb) {
- InternalFrameBuffer *const int_fb = (InternalFrameBuffer *)fb->priv;
- (void)cb_priv;
- if (int_fb) int_fb->in_use = 0;
- fb->priv = NULL;
- return 0;
-}
diff --git a/third_party/aom/av1/common/frame_buffers.h b/third_party/aom/av1/common/frame_buffers.h
deleted file mode 100644
index 16188e51c..000000000
--- a/third_party/aom/av1/common/frame_buffers.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_COMMON_FRAME_BUFFERS_H_
-#define AOM_AV1_COMMON_FRAME_BUFFERS_H_
-
-#include "aom/aom_frame_buffer.h"
-#include "aom/aom_integer.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct InternalFrameBuffer {
- uint8_t *data;
- size_t size;
- int in_use;
-} InternalFrameBuffer;
-
-typedef struct InternalFrameBufferList {
- int num_internal_frame_buffers;
- InternalFrameBuffer *int_fb;
-} InternalFrameBufferList;
-
-// Initializes |list|. Returns 0 on success.
-int av1_alloc_internal_frame_buffers(InternalFrameBufferList *list);
-
-// Free any data allocated to the frame buffers.
-void av1_free_internal_frame_buffers(InternalFrameBufferList *list);
-
-// Zeros all unused internal frame buffers. In particular, this zeros the
-// frame borders. Call this function after a sequence header change to
-// re-initialize the frame borders for the different width, height, or bit
-// depth.
-void av1_zero_unused_internal_frame_buffers(InternalFrameBufferList *list);
-
-// Callback used by libaom to request an external frame buffer. |cb_priv|
-// Callback private data, which points to an InternalFrameBufferList.
-// |min_size| is the minimum size in bytes needed to decode the next frame.
-// |fb| pointer to the frame buffer.
-int av1_get_frame_buffer(void *cb_priv, size_t min_size,
- aom_codec_frame_buffer_t *fb);
-
-// Callback used by libaom when there are no references to the frame buffer.
-// |cb_priv| is not used. |fb| pointer to the frame buffer.
-int av1_release_frame_buffer(void *cb_priv, aom_codec_frame_buffer_t *fb);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_COMMON_FRAME_BUFFERS_H_
diff --git a/third_party/aom/av1/common/idct.c b/third_party/aom/av1/common/idct.c
deleted file mode 100644
index 2c1cb9827..000000000
--- a/third_party/aom/av1/common/idct.c
+++ /dev/null
@@ -1,322 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <math.h>
-
-#include "config/aom_dsp_rtcd.h"
-#include "config/av1_rtcd.h"
-
-#include "aom_ports/mem.h"
-#include "av1/common/av1_inv_txfm1d_cfg.h"
-#include "av1/common/av1_txfm.h"
-#include "av1/common/blockd.h"
-#include "av1/common/enums.h"
-#include "av1/common/idct.h"
-
-int av1_get_tx_scale(const TX_SIZE tx_size) {
- const int pels = tx_size_2d[tx_size];
- // Largest possible pels is 4096 (64x64).
- return (pels > 256) + (pels > 1024);
-}
-
-// NOTE: The implementation of all inverses need to be aware of the fact
-// that input and output could be the same buffer.
-
-// idct
-void av1_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
- int eob, int bd) {
- if (eob > 1)
- av1_highbd_iwht4x4_16_add(input, dest, stride, bd);
- else
- av1_highbd_iwht4x4_1_add(input, dest, stride, bd);
-}
-
-void av1_highbd_inv_txfm_add_4x4_c(const tran_low_t *input, uint8_t *dest,
- int stride, const TxfmParam *txfm_param) {
- assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]);
- int eob = txfm_param->eob;
- int bd = txfm_param->bd;
- int lossless = txfm_param->lossless;
- const int32_t *src = cast_to_int32(input);
- const TX_TYPE tx_type = txfm_param->tx_type;
- if (lossless) {
- assert(tx_type == DCT_DCT);
- av1_highbd_iwht4x4_add(input, dest, stride, eob, bd);
- return;
- }
-
- av1_inv_txfm2d_add_4x4_c(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type, bd);
-}
-
-void av1_highbd_inv_txfm_add_4x8(const tran_low_t *input, uint8_t *dest,
- int stride, const TxfmParam *txfm_param) {
- assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]);
- const int32_t *src = cast_to_int32(input);
- av1_inv_txfm2d_add_4x8_c(src, CONVERT_TO_SHORTPTR(dest), stride,
- txfm_param->tx_type, txfm_param->bd);
-}
-
-void av1_highbd_inv_txfm_add_8x4(const tran_low_t *input, uint8_t *dest,
- int stride, const TxfmParam *txfm_param) {
- assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]);
- const int32_t *src = cast_to_int32(input);
- av1_inv_txfm2d_add_8x4_c(src, CONVERT_TO_SHORTPTR(dest), stride,
- txfm_param->tx_type, txfm_param->bd);
-}
-
-void av1_highbd_inv_txfm_add_16x32(const tran_low_t *input, uint8_t *dest,
- int stride, const TxfmParam *txfm_param) {
- const int32_t *src = cast_to_int32(input);
- av1_inv_txfm2d_add_16x32_c(src, CONVERT_TO_SHORTPTR(dest), stride,
- txfm_param->tx_type, txfm_param->bd);
-}
-
-void av1_highbd_inv_txfm_add_32x16(const tran_low_t *input, uint8_t *dest,
- int stride, const TxfmParam *txfm_param) {
- const int32_t *src = cast_to_int32(input);
- av1_inv_txfm2d_add_32x16_c(src, CONVERT_TO_SHORTPTR(dest), stride,
- txfm_param->tx_type, txfm_param->bd);
-}
-
-void av1_highbd_inv_txfm_add_16x4(const tran_low_t *input, uint8_t *dest,
- int stride, const TxfmParam *txfm_param) {
- const int32_t *src = cast_to_int32(input);
- av1_inv_txfm2d_add_16x4_c(src, CONVERT_TO_SHORTPTR(dest), stride,
- txfm_param->tx_type, txfm_param->bd);
-}
-
-void av1_highbd_inv_txfm_add_4x16(const tran_low_t *input, uint8_t *dest,
- int stride, const TxfmParam *txfm_param) {
- const int32_t *src = cast_to_int32(input);
- av1_inv_txfm2d_add_4x16_c(src, CONVERT_TO_SHORTPTR(dest), stride,
- txfm_param->tx_type, txfm_param->bd);
-}
-
-void av1_highbd_inv_txfm_add_32x8(const tran_low_t *input, uint8_t *dest,
- int stride, const TxfmParam *txfm_param) {
- const int32_t *src = cast_to_int32(input);
- av1_inv_txfm2d_add_32x8_c(src, CONVERT_TO_SHORTPTR(dest), stride,
- txfm_param->tx_type, txfm_param->bd);
-}
-
-void av1_highbd_inv_txfm_add_8x32(const tran_low_t *input, uint8_t *dest,
- int stride, const TxfmParam *txfm_param) {
- const int32_t *src = cast_to_int32(input);
- av1_inv_txfm2d_add_8x32_c(src, CONVERT_TO_SHORTPTR(dest), stride,
- txfm_param->tx_type, txfm_param->bd);
-}
-
-void av1_highbd_inv_txfm_add_32x64(const tran_low_t *input, uint8_t *dest,
- int stride, const TxfmParam *txfm_param) {
- const int32_t *src = cast_to_int32(input);
- av1_inv_txfm2d_add_32x64_c(src, CONVERT_TO_SHORTPTR(dest), stride,
- txfm_param->tx_type, txfm_param->bd);
-}
-
-void av1_highbd_inv_txfm_add_64x32(const tran_low_t *input, uint8_t *dest,
- int stride, const TxfmParam *txfm_param) {
- const int32_t *src = cast_to_int32(input);
- av1_inv_txfm2d_add_64x32_c(src, CONVERT_TO_SHORTPTR(dest), stride,
- txfm_param->tx_type, txfm_param->bd);
-}
-
-void av1_highbd_inv_txfm_add_16x64(const tran_low_t *input, uint8_t *dest,
- int stride, const TxfmParam *txfm_param) {
- const int32_t *src = cast_to_int32(input);
- av1_inv_txfm2d_add_16x64_c(src, CONVERT_TO_SHORTPTR(dest), stride,
- txfm_param->tx_type, txfm_param->bd);
-}
-
-void av1_highbd_inv_txfm_add_64x16(const tran_low_t *input, uint8_t *dest,
- int stride, const TxfmParam *txfm_param) {
- const int32_t *src = cast_to_int32(input);
- av1_inv_txfm2d_add_64x16_c(src, CONVERT_TO_SHORTPTR(dest), stride,
- txfm_param->tx_type, txfm_param->bd);
-}
-
-void av1_highbd_inv_txfm_add_8x8_c(const tran_low_t *input, uint8_t *dest,
- int stride, const TxfmParam *txfm_param) {
- int bd = txfm_param->bd;
- const TX_TYPE tx_type = txfm_param->tx_type;
- const int32_t *src = cast_to_int32(input);
-
- av1_inv_txfm2d_add_8x8_c(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type, bd);
-}
-
-void av1_highbd_inv_txfm_add_16x16_c(const tran_low_t *input, uint8_t *dest,
- int stride, const TxfmParam *txfm_param) {
- int bd = txfm_param->bd;
- const TX_TYPE tx_type = txfm_param->tx_type;
- const int32_t *src = cast_to_int32(input);
-
- av1_inv_txfm2d_add_16x16_c(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
- bd);
-}
-
-void av1_highbd_inv_txfm_add_8x16_c(const tran_low_t *input, uint8_t *dest,
- int stride, const TxfmParam *txfm_param) {
- const int32_t *src = cast_to_int32(input);
- av1_inv_txfm2d_add_8x16_c(src, CONVERT_TO_SHORTPTR(dest), stride,
- txfm_param->tx_type, txfm_param->bd);
-}
-
-void av1_highbd_inv_txfm_add_16x8_c(const tran_low_t *input, uint8_t *dest,
- int stride, const TxfmParam *txfm_param) {
- const int32_t *src = cast_to_int32(input);
- av1_inv_txfm2d_add_16x8_c(src, CONVERT_TO_SHORTPTR(dest), stride,
- txfm_param->tx_type, txfm_param->bd);
-}
-
-void av1_highbd_inv_txfm_add_32x32_c(const tran_low_t *input, uint8_t *dest,
- int stride, const TxfmParam *txfm_param) {
- const int bd = txfm_param->bd;
- const TX_TYPE tx_type = txfm_param->tx_type;
- const int32_t *src = cast_to_int32(input);
-
- av1_inv_txfm2d_add_32x32_c(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
- bd);
-}
-
-void av1_highbd_inv_txfm_add_64x64_c(const tran_low_t *input, uint8_t *dest,
- int stride, const TxfmParam *txfm_param) {
- const int bd = txfm_param->bd;
- const TX_TYPE tx_type = txfm_param->tx_type;
- const int32_t *src = cast_to_int32(input);
- assert(tx_type == DCT_DCT);
- av1_inv_txfm2d_add_64x64_c(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
- bd);
-}
-
-static void init_txfm_param(const MACROBLOCKD *xd, int plane, TX_SIZE tx_size,
- TX_TYPE tx_type, int eob, int reduced_tx_set,
- TxfmParam *txfm_param) {
- (void)plane;
- txfm_param->tx_type = tx_type;
- txfm_param->tx_size = tx_size;
- txfm_param->eob = eob;
- txfm_param->lossless = xd->lossless[xd->mi[0]->segment_id];
- txfm_param->bd = xd->bd;
- txfm_param->is_hbd = get_bitdepth_data_path_index(xd);
- txfm_param->tx_set_type = av1_get_ext_tx_set_type(
- txfm_param->tx_size, is_inter_block(xd->mi[0]), reduced_tx_set);
-}
-
-void av1_highbd_inv_txfm_add_c(const tran_low_t *input, uint8_t *dest,
- int stride, const TxfmParam *txfm_param) {
- assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]);
- const TX_SIZE tx_size = txfm_param->tx_size;
- switch (tx_size) {
- case TX_32X32:
- av1_highbd_inv_txfm_add_32x32_c(input, dest, stride, txfm_param);
- break;
- case TX_16X16:
- av1_highbd_inv_txfm_add_16x16_c(input, dest, stride, txfm_param);
- break;
- case TX_8X8:
- av1_highbd_inv_txfm_add_8x8_c(input, dest, stride, txfm_param);
- break;
- case TX_4X8:
- av1_highbd_inv_txfm_add_4x8(input, dest, stride, txfm_param);
- break;
- case TX_8X4:
- av1_highbd_inv_txfm_add_8x4(input, dest, stride, txfm_param);
- break;
- case TX_8X16:
- av1_highbd_inv_txfm_add_8x16_c(input, dest, stride, txfm_param);
- break;
- case TX_16X8:
- av1_highbd_inv_txfm_add_16x8_c(input, dest, stride, txfm_param);
- break;
- case TX_16X32:
- av1_highbd_inv_txfm_add_16x32(input, dest, stride, txfm_param);
- break;
- case TX_32X16:
- av1_highbd_inv_txfm_add_32x16(input, dest, stride, txfm_param);
- break;
- case TX_64X64:
- av1_highbd_inv_txfm_add_64x64_c(input, dest, stride, txfm_param);
- break;
- case TX_32X64:
- av1_highbd_inv_txfm_add_32x64(input, dest, stride, txfm_param);
- break;
- case TX_64X32:
- av1_highbd_inv_txfm_add_64x32(input, dest, stride, txfm_param);
- break;
- case TX_16X64:
- av1_highbd_inv_txfm_add_16x64(input, dest, stride, txfm_param);
- break;
- case TX_64X16:
- av1_highbd_inv_txfm_add_64x16(input, dest, stride, txfm_param);
- break;
- case TX_4X4:
- // this is like av1_short_idct4x4 but has a special case around eob<=1
- // which is significant (not just an optimization) for the lossless
- // case.
- av1_highbd_inv_txfm_add_4x4_c(input, dest, stride, txfm_param);
- break;
- case TX_16X4:
- av1_highbd_inv_txfm_add_16x4(input, dest, stride, txfm_param);
- break;
- case TX_4X16:
- av1_highbd_inv_txfm_add_4x16(input, dest, stride, txfm_param);
- break;
- case TX_8X32:
- av1_highbd_inv_txfm_add_8x32(input, dest, stride, txfm_param);
- break;
- case TX_32X8:
- av1_highbd_inv_txfm_add_32x8(input, dest, stride, txfm_param);
- break;
- default: assert(0 && "Invalid transform size"); break;
- }
-}
-
-void av1_inv_txfm_add_c(const tran_low_t *dqcoeff, uint8_t *dst, int stride,
- const TxfmParam *txfm_param) {
- const TX_SIZE tx_size = txfm_param->tx_size;
- DECLARE_ALIGNED(32, uint16_t, tmp[MAX_TX_SQUARE]);
- int tmp_stride = MAX_TX_SIZE;
- int w = tx_size_wide[tx_size];
- int h = tx_size_high[tx_size];
- for (int r = 0; r < h; ++r) {
- for (int c = 0; c < w; ++c) {
- tmp[r * tmp_stride + c] = dst[r * stride + c];
- }
- }
-
- av1_highbd_inv_txfm_add(dqcoeff, CONVERT_TO_BYTEPTR(tmp), tmp_stride,
- txfm_param);
-
- for (int r = 0; r < h; ++r) {
- for (int c = 0; c < w; ++c) {
- dst[r * stride + c] = (uint8_t)tmp[r * tmp_stride + c];
- }
- }
-}
-
-void av1_inverse_transform_block(const MACROBLOCKD *xd,
- const tran_low_t *dqcoeff, int plane,
- TX_TYPE tx_type, TX_SIZE tx_size, uint8_t *dst,
- int stride, int eob, int reduced_tx_set) {
- if (!eob) return;
-
- assert(eob <= av1_get_max_eob(tx_size));
-
- TxfmParam txfm_param;
- init_txfm_param(xd, plane, tx_size, tx_type, eob, reduced_tx_set,
- &txfm_param);
- assert(av1_ext_tx_used[txfm_param.tx_set_type][txfm_param.tx_type]);
-
- if (txfm_param.is_hbd) {
- av1_highbd_inv_txfm_add(dqcoeff, dst, stride, &txfm_param);
- } else {
- av1_inv_txfm_add(dqcoeff, dst, stride, &txfm_param);
- }
-}
diff --git a/third_party/aom/av1/common/idct.h b/third_party/aom/av1/common/idct.h
deleted file mode 100644
index d9454e73f..000000000
--- a/third_party/aom/av1/common/idct.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_COMMON_IDCT_H_
-#define AOM_AV1_COMMON_IDCT_H_
-
-#include "config/aom_config.h"
-
-#include "av1/common/blockd.h"
-#include "av1/common/common.h"
-#include "av1/common/enums.h"
-#include "aom_dsp/txfm_common.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef void (*transform_1d)(const tran_low_t *, tran_low_t *);
-
-typedef struct {
- transform_1d cols, rows; // vertical and horizontal
-} transform_2d;
-
-#define MAX_TX_SCALE 1
-int av1_get_tx_scale(const TX_SIZE tx_size);
-
-void av1_inverse_transform_block(const MACROBLOCKD *xd,
- const tran_low_t *dqcoeff, int plane,
- TX_TYPE tx_type, TX_SIZE tx_size, uint8_t *dst,
- int stride, int eob, int reduced_tx_set);
-void av1_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
- int eob, int bd);
-
-static INLINE const int32_t *cast_to_int32(const tran_low_t *input) {
- assert(sizeof(int32_t) == sizeof(tran_low_t));
- return (const int32_t *)input;
-}
-
-typedef void(highbd_inv_txfm_add)(const tran_low_t *input, uint8_t *dest,
- int stride, const TxfmParam *param);
-
-highbd_inv_txfm_add av1_highbd_inv_txfm_add_4x8;
-highbd_inv_txfm_add av1_highbd_inv_txfm_add_8x4;
-highbd_inv_txfm_add av1_highbd_inv_txfm_add_16x32;
-highbd_inv_txfm_add av1_highbd_inv_txfm_add_32x16;
-highbd_inv_txfm_add av1_highbd_inv_txfm_add_32x64;
-highbd_inv_txfm_add av1_highbd_inv_txfm_add_64x32;
-highbd_inv_txfm_add av1_highbd_inv_txfm_add_16x64;
-highbd_inv_txfm_add av1_highbd_inv_txfm_add_64x16;
-highbd_inv_txfm_add av1_highbd_inv_txfm_add_16x4;
-highbd_inv_txfm_add av1_highbd_inv_txfm_add_4x16;
-highbd_inv_txfm_add av1_highbd_inv_txfm_add_8x32;
-highbd_inv_txfm_add av1_highbd_inv_txfm_add_32x8;
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_COMMON_IDCT_H_
diff --git a/third_party/aom/av1/common/mv.h b/third_party/aom/av1/common/mv.h
deleted file mode 100644
index 5b0225192..000000000
--- a/third_party/aom/av1/common/mv.h
+++ /dev/null
@@ -1,301 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_COMMON_MV_H_
-#define AOM_AV1_COMMON_MV_H_
-
-#include "av1/common/common.h"
-#include "av1/common/common_data.h"
-#include "aom_dsp/aom_filter.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define INVALID_MV 0x80008000
-
-typedef struct mv {
- int16_t row;
- int16_t col;
-} MV;
-
-static const MV kZeroMv = { 0, 0 };
-
-typedef union int_mv {
- uint32_t as_int;
- MV as_mv;
-} int_mv; /* facilitates faster equality tests and copies */
-
-typedef struct mv32 {
- int32_t row;
- int32_t col;
-} MV32;
-
-// Bits of precision used for the model
-#define WARPEDMODEL_PREC_BITS 16
-#define WARPEDMODEL_ROW3HOMO_PREC_BITS 16
-
-#define WARPEDMODEL_TRANS_CLAMP (128 << WARPEDMODEL_PREC_BITS)
-#define WARPEDMODEL_NONDIAGAFFINE_CLAMP (1 << (WARPEDMODEL_PREC_BITS - 3))
-#define WARPEDMODEL_ROW3HOMO_CLAMP (1 << (WARPEDMODEL_PREC_BITS - 2))
-
-// Bits of subpel precision for warped interpolation
-#define WARPEDPIXEL_PREC_BITS 6
-#define WARPEDPIXEL_PREC_SHIFTS (1 << WARPEDPIXEL_PREC_BITS)
-
-#define WARP_PARAM_REDUCE_BITS 6
-
-#define WARPEDDIFF_PREC_BITS (WARPEDMODEL_PREC_BITS - WARPEDPIXEL_PREC_BITS)
-
-/* clang-format off */
-typedef enum ATTRIBUTE_PACKED {
- IDENTITY = 0, // identity transformation, 0-parameter
- TRANSLATION = 1, // translational motion 2-parameter
- ROTZOOM = 2, // simplified affine with rotation + zoom only, 4-parameter
- AFFINE = 3, // affine, 6-parameter
- TRANS_TYPES,
-} TransformationType;
-/* clang-format on */
-
-// Number of types used for global motion (must be >= 3 and <= TRANS_TYPES)
-// The following can be useful:
-// GLOBAL_TRANS_TYPES 3 - up to rotation-zoom
-// GLOBAL_TRANS_TYPES 4 - up to affine
-// GLOBAL_TRANS_TYPES 6 - up to hor/ver trapezoids
-// GLOBAL_TRANS_TYPES 7 - up to full homography
-#define GLOBAL_TRANS_TYPES 4
-
-typedef struct {
- int global_warp_allowed;
- int local_warp_allowed;
-} WarpTypesAllowed;
-
-// number of parameters used by each transformation in TransformationTypes
-static const int trans_model_params[TRANS_TYPES] = { 0, 2, 4, 6 };
-
-// The order of values in the wmmat matrix below is best described
-// by the homography:
-// [x' (m2 m3 m0 [x
-// z . y' = m4 m5 m1 * y
-// 1] m6 m7 1) 1]
-typedef struct {
- TransformationType wmtype;
- int32_t wmmat[8];
- int16_t alpha, beta, gamma, delta;
- int8_t invalid;
-} WarpedMotionParams;
-
-/* clang-format off */
-static const WarpedMotionParams default_warp_params = {
- IDENTITY,
- { 0, 0, (1 << WARPEDMODEL_PREC_BITS), 0, 0, (1 << WARPEDMODEL_PREC_BITS), 0,
- 0 },
- 0, 0, 0, 0,
- 0,
-};
-/* clang-format on */
-
-// The following constants describe the various precisions
-// of different parameters in the global motion experiment.
-//
-// Given the general homography:
-// [x' (a b c [x
-// z . y' = d e f * y
-// 1] g h i) 1]
-//
-// Constants using the name ALPHA here are related to parameters
-// a, b, d, e. Constants using the name TRANS are related
-// to parameters c and f.
-//
-// Anything ending in PREC_BITS is the number of bits of precision
-// to maintain when converting from double to integer.
-//
-// The ABS parameters are used to create an upper and lower bound
-// for each parameter. In other words, after a parameter is integerized
-// it is clamped between -(1 << ABS_XXX_BITS) and (1 << ABS_XXX_BITS).
-//
-// XXX_PREC_DIFF and XXX_DECODE_FACTOR
-// are computed once here to prevent repetitive
-// computation on the decoder side. These are
-// to allow the global motion parameters to be encoded in a lower
-// precision than the warped model precision. This means that they
-// need to be changed to warped precision when they are decoded.
-//
-// XX_MIN, XX_MAX are also computed to avoid repeated computation
-
-#define SUBEXPFIN_K 3
-#define GM_TRANS_PREC_BITS 6
-#define GM_ABS_TRANS_BITS 12
-#define GM_ABS_TRANS_ONLY_BITS (GM_ABS_TRANS_BITS - GM_TRANS_PREC_BITS + 3)
-#define GM_TRANS_PREC_DIFF (WARPEDMODEL_PREC_BITS - GM_TRANS_PREC_BITS)
-#define GM_TRANS_ONLY_PREC_DIFF (WARPEDMODEL_PREC_BITS - 3)
-#define GM_TRANS_DECODE_FACTOR (1 << GM_TRANS_PREC_DIFF)
-#define GM_TRANS_ONLY_DECODE_FACTOR (1 << GM_TRANS_ONLY_PREC_DIFF)
-
-#define GM_ALPHA_PREC_BITS 15
-#define GM_ABS_ALPHA_BITS 12
-#define GM_ALPHA_PREC_DIFF (WARPEDMODEL_PREC_BITS - GM_ALPHA_PREC_BITS)
-#define GM_ALPHA_DECODE_FACTOR (1 << GM_ALPHA_PREC_DIFF)
-
-#define GM_ROW3HOMO_PREC_BITS 16
-#define GM_ABS_ROW3HOMO_BITS 11
-#define GM_ROW3HOMO_PREC_DIFF \
- (WARPEDMODEL_ROW3HOMO_PREC_BITS - GM_ROW3HOMO_PREC_BITS)
-#define GM_ROW3HOMO_DECODE_FACTOR (1 << GM_ROW3HOMO_PREC_DIFF)
-
-#define GM_TRANS_MAX (1 << GM_ABS_TRANS_BITS)
-#define GM_ALPHA_MAX (1 << GM_ABS_ALPHA_BITS)
-#define GM_ROW3HOMO_MAX (1 << GM_ABS_ROW3HOMO_BITS)
-
-#define GM_TRANS_MIN -GM_TRANS_MAX
-#define GM_ALPHA_MIN -GM_ALPHA_MAX
-#define GM_ROW3HOMO_MIN -GM_ROW3HOMO_MAX
-
-static INLINE int block_center_x(int mi_col, BLOCK_SIZE bs) {
- const int bw = block_size_wide[bs];
- return mi_col * MI_SIZE + bw / 2 - 1;
-}
-
-static INLINE int block_center_y(int mi_row, BLOCK_SIZE bs) {
- const int bh = block_size_high[bs];
- return mi_row * MI_SIZE + bh / 2 - 1;
-}
-
-static INLINE int convert_to_trans_prec(int allow_hp, int coor) {
- if (allow_hp)
- return ROUND_POWER_OF_TWO_SIGNED(coor, WARPEDMODEL_PREC_BITS - 3);
- else
- return ROUND_POWER_OF_TWO_SIGNED(coor, WARPEDMODEL_PREC_BITS - 2) * 2;
-}
-static INLINE void integer_mv_precision(MV *mv) {
- int mod = (mv->row % 8);
- if (mod != 0) {
- mv->row -= mod;
- if (abs(mod) > 4) {
- if (mod > 0) {
- mv->row += 8;
- } else {
- mv->row -= 8;
- }
- }
- }
-
- mod = (mv->col % 8);
- if (mod != 0) {
- mv->col -= mod;
- if (abs(mod) > 4) {
- if (mod > 0) {
- mv->col += 8;
- } else {
- mv->col -= 8;
- }
- }
- }
-}
-// Convert a global motion vector into a motion vector at the centre of the
-// given block.
-//
-// The resulting motion vector will have three fractional bits of precision. If
-// allow_hp is zero, the bottom bit will always be zero. If CONFIG_AMVR and
-// is_integer is true, the bottom three bits will be zero (so the motion vector
-// represents an integer)
-static INLINE int_mv gm_get_motion_vector(const WarpedMotionParams *gm,
- int allow_hp, BLOCK_SIZE bsize,
- int mi_col, int mi_row,
- int is_integer) {
- int_mv res;
-
- if (gm->wmtype == IDENTITY) {
- res.as_int = 0;
- return res;
- }
-
- const int32_t *mat = gm->wmmat;
- int x, y, tx, ty;
-
- if (gm->wmtype == TRANSLATION) {
- // All global motion vectors are stored with WARPEDMODEL_PREC_BITS (16)
- // bits of fractional precision. The offset for a translation is stored in
- // entries 0 and 1. For translations, all but the top three (two if
- // cm->allow_high_precision_mv is false) fractional bits are always zero.
- //
- // After the right shifts, there are 3 fractional bits of precision. If
- // allow_hp is false, the bottom bit is always zero (so we don't need a
- // call to convert_to_trans_prec here)
- res.as_mv.row = gm->wmmat[0] >> GM_TRANS_ONLY_PREC_DIFF;
- res.as_mv.col = gm->wmmat[1] >> GM_TRANS_ONLY_PREC_DIFF;
- assert(IMPLIES(1 & (res.as_mv.row | res.as_mv.col), allow_hp));
- if (is_integer) {
- integer_mv_precision(&res.as_mv);
- }
- return res;
- }
-
- x = block_center_x(mi_col, bsize);
- y = block_center_y(mi_row, bsize);
-
- if (gm->wmtype == ROTZOOM) {
- assert(gm->wmmat[5] == gm->wmmat[2]);
- assert(gm->wmmat[4] == -gm->wmmat[3]);
- }
-
- const int xc =
- (mat[2] - (1 << WARPEDMODEL_PREC_BITS)) * x + mat[3] * y + mat[0];
- const int yc =
- mat[4] * x + (mat[5] - (1 << WARPEDMODEL_PREC_BITS)) * y + mat[1];
- tx = convert_to_trans_prec(allow_hp, xc);
- ty = convert_to_trans_prec(allow_hp, yc);
-
- res.as_mv.row = ty;
- res.as_mv.col = tx;
-
- if (is_integer) {
- integer_mv_precision(&res.as_mv);
- }
- return res;
-}
-
-static INLINE TransformationType get_gmtype(const WarpedMotionParams *gm) {
- if (gm->wmmat[5] == (1 << WARPEDMODEL_PREC_BITS) && !gm->wmmat[4] &&
- gm->wmmat[2] == (1 << WARPEDMODEL_PREC_BITS) && !gm->wmmat[3]) {
- return ((!gm->wmmat[1] && !gm->wmmat[0]) ? IDENTITY : TRANSLATION);
- }
- if (gm->wmmat[2] == gm->wmmat[5] && gm->wmmat[3] == -gm->wmmat[4])
- return ROTZOOM;
- else
- return AFFINE;
-}
-
-typedef struct candidate_mv {
- int_mv this_mv;
- int_mv comp_mv;
- int weight;
-} CANDIDATE_MV;
-
-static INLINE int is_zero_mv(const MV *mv) {
- return *((const uint32_t *)mv) == 0;
-}
-
-static INLINE int is_equal_mv(const MV *a, const MV *b) {
- return *((const uint32_t *)a) == *((const uint32_t *)b);
-}
-
-static INLINE void clamp_mv(MV *mv, int min_col, int max_col, int min_row,
- int max_row) {
- mv->col = clamp(mv->col, min_col, max_col);
- mv->row = clamp(mv->row, min_row, max_row);
-}
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_COMMON_MV_H_
diff --git a/third_party/aom/av1/common/mvref_common.c b/third_party/aom/av1/common/mvref_common.c
deleted file mode 100644
index 7f24ab4e6..000000000
--- a/third_party/aom/av1/common/mvref_common.c
+++ /dev/null
@@ -1,1523 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <stdlib.h>
-
-#include "av1/common/mvref_common.h"
-#include "av1/common/warped_motion.h"
-
-// Although we assign 32 bit integers, all the values are strictly under 14
-// bits.
-static int div_mult[32] = { 0, 16384, 8192, 5461, 4096, 3276, 2730, 2340,
- 2048, 1820, 1638, 1489, 1365, 1260, 1170, 1092,
- 1024, 963, 910, 862, 819, 780, 744, 712,
- 682, 655, 630, 606, 585, 564, 546, 528 };
-
-// TODO(jingning): Consider the use of lookup table for (num / den)
-// altogether.
-static void get_mv_projection(MV *output, MV ref, int num, int den) {
- den = AOMMIN(den, MAX_FRAME_DISTANCE);
- num = num > 0 ? AOMMIN(num, MAX_FRAME_DISTANCE)
- : AOMMAX(num, -MAX_FRAME_DISTANCE);
- const int mv_row =
- ROUND_POWER_OF_TWO_SIGNED(ref.row * num * div_mult[den], 14);
- const int mv_col =
- ROUND_POWER_OF_TWO_SIGNED(ref.col * num * div_mult[den], 14);
- const int clamp_max = MV_UPP - 1;
- const int clamp_min = MV_LOW + 1;
- output->row = (int16_t)clamp(mv_row, clamp_min, clamp_max);
- output->col = (int16_t)clamp(mv_col, clamp_min, clamp_max);
-}
-
-void av1_copy_frame_mvs(const AV1_COMMON *const cm,
- const MB_MODE_INFO *const mi, int mi_row, int mi_col,
- int x_mis, int y_mis) {
- const int frame_mvs_stride = ROUND_POWER_OF_TWO(cm->mi_cols, 1);
- MV_REF *frame_mvs =
- cm->cur_frame->mvs + (mi_row >> 1) * frame_mvs_stride + (mi_col >> 1);
- x_mis = ROUND_POWER_OF_TWO(x_mis, 1);
- y_mis = ROUND_POWER_OF_TWO(y_mis, 1);
- int w, h;
-
- for (h = 0; h < y_mis; h++) {
- MV_REF *mv = frame_mvs;
- for (w = 0; w < x_mis; w++) {
- mv->ref_frame = NONE_FRAME;
- mv->mv.as_int = 0;
-
- for (int idx = 0; idx < 2; ++idx) {
- MV_REFERENCE_FRAME ref_frame = mi->ref_frame[idx];
- if (ref_frame > INTRA_FRAME) {
- int8_t ref_idx = cm->ref_frame_side[ref_frame];
- if (ref_idx) continue;
- if ((abs(mi->mv[idx].as_mv.row) > REFMVS_LIMIT) ||
- (abs(mi->mv[idx].as_mv.col) > REFMVS_LIMIT))
- continue;
- mv->ref_frame = ref_frame;
- mv->mv.as_int = mi->mv[idx].as_int;
- }
- }
- mv++;
- }
- frame_mvs += frame_mvs_stride;
- }
-}
-
-static void add_ref_mv_candidate(
- const MB_MODE_INFO *const candidate, const MV_REFERENCE_FRAME rf[2],
- uint8_t *refmv_count, uint8_t *ref_match_count, uint8_t *newmv_count,
- CANDIDATE_MV *ref_mv_stack, int_mv *gm_mv_candidates,
- const WarpedMotionParams *gm_params, int col, int weight) {
- if (!is_inter_block(candidate)) return; // for intrabc
- int index = 0, ref;
- assert(weight % 2 == 0);
-
- if (rf[1] == NONE_FRAME) {
- // single reference frame
- for (ref = 0; ref < 2; ++ref) {
- if (candidate->ref_frame[ref] == rf[0]) {
- int_mv this_refmv;
- if (is_global_mv_block(candidate, gm_params[rf[0]].wmtype))
- this_refmv = gm_mv_candidates[0];
- else
- this_refmv = get_sub_block_mv(candidate, ref, col);
-
- for (index = 0; index < *refmv_count; ++index)
- if (ref_mv_stack[index].this_mv.as_int == this_refmv.as_int) break;
-
- if (index < *refmv_count) ref_mv_stack[index].weight += weight;
-
- // Add a new item to the list.
- if (index == *refmv_count && *refmv_count < MAX_REF_MV_STACK_SIZE) {
- ref_mv_stack[index].this_mv = this_refmv;
- ref_mv_stack[index].weight = weight;
- ++(*refmv_count);
- }
- if (have_newmv_in_inter_mode(candidate->mode)) ++*newmv_count;
- ++*ref_match_count;
- }
- }
- } else {
- // compound reference frame
- if (candidate->ref_frame[0] == rf[0] && candidate->ref_frame[1] == rf[1]) {
- int_mv this_refmv[2];
-
- for (ref = 0; ref < 2; ++ref) {
- if (is_global_mv_block(candidate, gm_params[rf[ref]].wmtype))
- this_refmv[ref] = gm_mv_candidates[ref];
- else
- this_refmv[ref] = get_sub_block_mv(candidate, ref, col);
- }
-
- for (index = 0; index < *refmv_count; ++index)
- if ((ref_mv_stack[index].this_mv.as_int == this_refmv[0].as_int) &&
- (ref_mv_stack[index].comp_mv.as_int == this_refmv[1].as_int))
- break;
-
- if (index < *refmv_count) ref_mv_stack[index].weight += weight;
-
- // Add a new item to the list.
- if (index == *refmv_count && *refmv_count < MAX_REF_MV_STACK_SIZE) {
- ref_mv_stack[index].this_mv = this_refmv[0];
- ref_mv_stack[index].comp_mv = this_refmv[1];
- ref_mv_stack[index].weight = weight;
- ++(*refmv_count);
- }
- if (have_newmv_in_inter_mode(candidate->mode)) ++*newmv_count;
- ++*ref_match_count;
- }
- }
-}
-
-static void scan_row_mbmi(const AV1_COMMON *cm, const MACROBLOCKD *xd,
- int mi_row, int mi_col,
- const MV_REFERENCE_FRAME rf[2], int row_offset,
- CANDIDATE_MV *ref_mv_stack, uint8_t *refmv_count,
- uint8_t *ref_match_count, uint8_t *newmv_count,
- int_mv *gm_mv_candidates, int max_row_offset,
- int *processed_rows) {
- int end_mi = AOMMIN(xd->n4_w, cm->mi_cols - mi_col);
- end_mi = AOMMIN(end_mi, mi_size_wide[BLOCK_64X64]);
- const int n8_w_8 = mi_size_wide[BLOCK_8X8];
- const int n8_w_16 = mi_size_wide[BLOCK_16X16];
- int i;
- int col_offset = 0;
- // TODO(jingning): Revisit this part after cb4x4 is stable.
- if (abs(row_offset) > 1) {
- col_offset = 1;
- if ((mi_col & 0x01) && xd->n4_w < n8_w_8) --col_offset;
- }
- const int use_step_16 = (xd->n4_w >= 16);
- MB_MODE_INFO **const candidate_mi0 = xd->mi + row_offset * xd->mi_stride;
- (void)mi_row;
-
- for (i = 0; i < end_mi;) {
- const MB_MODE_INFO *const candidate = candidate_mi0[col_offset + i];
- const int candidate_bsize = candidate->sb_type;
- const int n4_w = mi_size_wide[candidate_bsize];
- int len = AOMMIN(xd->n4_w, n4_w);
- if (use_step_16)
- len = AOMMAX(n8_w_16, len);
- else if (abs(row_offset) > 1)
- len = AOMMAX(len, n8_w_8);
-
- int weight = 2;
- if (xd->n4_w >= n8_w_8 && xd->n4_w <= n4_w) {
- int inc = AOMMIN(-max_row_offset + row_offset + 1,
- mi_size_high[candidate_bsize]);
- // Obtain range used in weight calculation.
- weight = AOMMAX(weight, inc);
- // Update processed rows.
- *processed_rows = inc - row_offset - 1;
- }
-
- add_ref_mv_candidate(candidate, rf, refmv_count, ref_match_count,
- newmv_count, ref_mv_stack, gm_mv_candidates,
- cm->global_motion, col_offset + i, len * weight);
-
- i += len;
- }
-}
-
-static void scan_col_mbmi(const AV1_COMMON *cm, const MACROBLOCKD *xd,
- int mi_row, int mi_col,
- const MV_REFERENCE_FRAME rf[2], int col_offset,
- CANDIDATE_MV *ref_mv_stack, uint8_t *refmv_count,
- uint8_t *ref_match_count, uint8_t *newmv_count,
- int_mv *gm_mv_candidates, int max_col_offset,
- int *processed_cols) {
- int end_mi = AOMMIN(xd->n4_h, cm->mi_rows - mi_row);
- end_mi = AOMMIN(end_mi, mi_size_high[BLOCK_64X64]);
- const int n8_h_8 = mi_size_high[BLOCK_8X8];
- const int n8_h_16 = mi_size_high[BLOCK_16X16];
- int i;
- int row_offset = 0;
- if (abs(col_offset) > 1) {
- row_offset = 1;
- if ((mi_row & 0x01) && xd->n4_h < n8_h_8) --row_offset;
- }
- const int use_step_16 = (xd->n4_h >= 16);
- (void)mi_col;
-
- for (i = 0; i < end_mi;) {
- const MB_MODE_INFO *const candidate =
- xd->mi[(row_offset + i) * xd->mi_stride + col_offset];
- const int candidate_bsize = candidate->sb_type;
- const int n4_h = mi_size_high[candidate_bsize];
- int len = AOMMIN(xd->n4_h, n4_h);
- if (use_step_16)
- len = AOMMAX(n8_h_16, len);
- else if (abs(col_offset) > 1)
- len = AOMMAX(len, n8_h_8);
-
- int weight = 2;
- if (xd->n4_h >= n8_h_8 && xd->n4_h <= n4_h) {
- int inc = AOMMIN(-max_col_offset + col_offset + 1,
- mi_size_wide[candidate_bsize]);
- // Obtain range used in weight calculation.
- weight = AOMMAX(weight, inc);
- // Update processed cols.
- *processed_cols = inc - col_offset - 1;
- }
-
- add_ref_mv_candidate(candidate, rf, refmv_count, ref_match_count,
- newmv_count, ref_mv_stack, gm_mv_candidates,
- cm->global_motion, col_offset, len * weight);
-
- i += len;
- }
-}
-
-static void scan_blk_mbmi(const AV1_COMMON *cm, const MACROBLOCKD *xd,
- const int mi_row, const int mi_col,
- const MV_REFERENCE_FRAME rf[2], int row_offset,
- int col_offset, CANDIDATE_MV *ref_mv_stack,
- uint8_t *ref_match_count, uint8_t *newmv_count,
- int_mv *gm_mv_candidates,
- uint8_t refmv_count[MODE_CTX_REF_FRAMES]) {
- const TileInfo *const tile = &xd->tile;
- POSITION mi_pos;
-
- mi_pos.row = row_offset;
- mi_pos.col = col_offset;
-
- if (is_inside(tile, mi_col, mi_row, &mi_pos)) {
- const MB_MODE_INFO *const candidate =
- xd->mi[mi_pos.row * xd->mi_stride + mi_pos.col];
- const int len = mi_size_wide[BLOCK_8X8];
-
- add_ref_mv_candidate(candidate, rf, refmv_count, ref_match_count,
- newmv_count, ref_mv_stack, gm_mv_candidates,
- cm->global_motion, mi_pos.col, 2 * len);
- } // Analyze a single 8x8 block motion information.
-}
-
-static int has_top_right(const AV1_COMMON *cm, const MACROBLOCKD *xd,
- int mi_row, int mi_col, int bs) {
- const int sb_mi_size = mi_size_wide[cm->seq_params.sb_size];
- const int mask_row = mi_row & (sb_mi_size - 1);
- const int mask_col = mi_col & (sb_mi_size - 1);
-
- if (bs > mi_size_wide[BLOCK_64X64]) return 0;
-
- // In a split partition all apart from the bottom right has a top right
- int has_tr = !((mask_row & bs) && (mask_col & bs));
-
- // bs > 0 and bs is a power of 2
- assert(bs > 0 && !(bs & (bs - 1)));
-
- // For each 4x4 group of blocks, when the bottom right is decoded the blocks
- // to the right have not been decoded therefore the bottom right does
- // not have a top right
- while (bs < sb_mi_size) {
- if (mask_col & bs) {
- if ((mask_col & (2 * bs)) && (mask_row & (2 * bs))) {
- has_tr = 0;
- break;
- }
- } else {
- break;
- }
- bs <<= 1;
- }
-
- // The left hand of two vertical rectangles always has a top right (as the
- // block above will have been decoded)
- if (xd->n4_w < xd->n4_h)
- if (!xd->is_sec_rect) has_tr = 1;
-
- // The bottom of two horizontal rectangles never has a top right (as the block
- // to the right won't have been decoded)
- if (xd->n4_w > xd->n4_h)
- if (xd->is_sec_rect) has_tr = 0;
-
- // The bottom left square of a Vertical A (in the old format) does
- // not have a top right as it is decoded before the right hand
- // rectangle of the partition
- if (xd->mi[0]->partition == PARTITION_VERT_A) {
- if (xd->n4_w == xd->n4_h)
- if (mask_row & bs) has_tr = 0;
- }
-
- return has_tr;
-}
-
-static int check_sb_border(const int mi_row, const int mi_col,
- const int row_offset, const int col_offset) {
- const int sb_mi_size = mi_size_wide[BLOCK_64X64];
- const int row = mi_row & (sb_mi_size - 1);
- const int col = mi_col & (sb_mi_size - 1);
-
- if (row + row_offset < 0 || row + row_offset >= sb_mi_size ||
- col + col_offset < 0 || col + col_offset >= sb_mi_size)
- return 0;
-
- return 1;
-}
-
-static int add_tpl_ref_mv(const AV1_COMMON *cm, const MACROBLOCKD *xd,
- int mi_row, int mi_col, MV_REFERENCE_FRAME ref_frame,
- int blk_row, int blk_col, int_mv *gm_mv_candidates,
- uint8_t refmv_count[MODE_CTX_REF_FRAMES],
- CANDIDATE_MV ref_mv_stacks[][MAX_REF_MV_STACK_SIZE],
- int16_t *mode_context) {
- POSITION mi_pos;
- int idx;
- const int weight_unit = 1; // mi_size_wide[BLOCK_8X8];
-
- mi_pos.row = (mi_row & 0x01) ? blk_row : blk_row + 1;
- mi_pos.col = (mi_col & 0x01) ? blk_col : blk_col + 1;
-
- if (!is_inside(&xd->tile, mi_col, mi_row, &mi_pos)) return 0;
-
- const TPL_MV_REF *prev_frame_mvs =
- cm->tpl_mvs + ((mi_row + mi_pos.row) >> 1) * (cm->mi_stride >> 1) +
- ((mi_col + mi_pos.col) >> 1);
-
- MV_REFERENCE_FRAME rf[2];
- av1_set_ref_frame(rf, ref_frame);
-
- if (rf[1] == NONE_FRAME) {
- int cur_frame_index = cm->cur_frame->cur_frame_offset;
- int buf_idx_0 = cm->frame_refs[FWD_RF_OFFSET(rf[0])].idx;
- int frame0_index = cm->buffer_pool->frame_bufs[buf_idx_0].cur_frame_offset;
- int cur_offset_0 = get_relative_dist(cm, cur_frame_index, frame0_index);
- CANDIDATE_MV *ref_mv_stack = ref_mv_stacks[rf[0]];
-
- if (prev_frame_mvs->mfmv0.as_int != INVALID_MV) {
- int_mv this_refmv;
-
- get_mv_projection(&this_refmv.as_mv, prev_frame_mvs->mfmv0.as_mv,
- cur_offset_0, prev_frame_mvs->ref_frame_offset);
- lower_mv_precision(&this_refmv.as_mv, cm->allow_high_precision_mv,
- cm->cur_frame_force_integer_mv);
-
- if (blk_row == 0 && blk_col == 0)
- if (abs(this_refmv.as_mv.row - gm_mv_candidates[0].as_mv.row) >= 16 ||
- abs(this_refmv.as_mv.col - gm_mv_candidates[0].as_mv.col) >= 16)
- mode_context[ref_frame] |= (1 << GLOBALMV_OFFSET);
-
- for (idx = 0; idx < refmv_count[rf[0]]; ++idx)
- if (this_refmv.as_int == ref_mv_stack[idx].this_mv.as_int) break;
-
- if (idx < refmv_count[rf[0]]) ref_mv_stack[idx].weight += 2 * weight_unit;
-
- if (idx == refmv_count[rf[0]] &&
- refmv_count[rf[0]] < MAX_REF_MV_STACK_SIZE) {
- ref_mv_stack[idx].this_mv.as_int = this_refmv.as_int;
- ref_mv_stack[idx].weight = 2 * weight_unit;
- ++(refmv_count[rf[0]]);
- }
- return 1;
- }
- } else {
- // Process compound inter mode
- int cur_frame_index = cm->cur_frame->cur_frame_offset;
- int buf_idx_0 = cm->frame_refs[FWD_RF_OFFSET(rf[0])].idx;
- int frame0_index = cm->buffer_pool->frame_bufs[buf_idx_0].cur_frame_offset;
-
- int cur_offset_0 = get_relative_dist(cm, cur_frame_index, frame0_index);
- int buf_idx_1 = cm->frame_refs[FWD_RF_OFFSET(rf[1])].idx;
- int frame1_index = cm->buffer_pool->frame_bufs[buf_idx_1].cur_frame_offset;
- int cur_offset_1 = get_relative_dist(cm, cur_frame_index, frame1_index);
- CANDIDATE_MV *ref_mv_stack = ref_mv_stacks[ref_frame];
-
- if (prev_frame_mvs->mfmv0.as_int != INVALID_MV) {
- int_mv this_refmv;
- int_mv comp_refmv;
- get_mv_projection(&this_refmv.as_mv, prev_frame_mvs->mfmv0.as_mv,
- cur_offset_0, prev_frame_mvs->ref_frame_offset);
- get_mv_projection(&comp_refmv.as_mv, prev_frame_mvs->mfmv0.as_mv,
- cur_offset_1, prev_frame_mvs->ref_frame_offset);
-
- lower_mv_precision(&this_refmv.as_mv, cm->allow_high_precision_mv,
- cm->cur_frame_force_integer_mv);
- lower_mv_precision(&comp_refmv.as_mv, cm->allow_high_precision_mv,
- cm->cur_frame_force_integer_mv);
-
- if (blk_row == 0 && blk_col == 0)
- if (abs(this_refmv.as_mv.row - gm_mv_candidates[0].as_mv.row) >= 16 ||
- abs(this_refmv.as_mv.col - gm_mv_candidates[0].as_mv.col) >= 16 ||
- abs(comp_refmv.as_mv.row - gm_mv_candidates[1].as_mv.row) >= 16 ||
- abs(comp_refmv.as_mv.col - gm_mv_candidates[1].as_mv.col) >= 16)
- mode_context[ref_frame] |= (1 << GLOBALMV_OFFSET);
-
- for (idx = 0; idx < refmv_count[ref_frame]; ++idx)
- if (this_refmv.as_int == ref_mv_stack[idx].this_mv.as_int &&
- comp_refmv.as_int == ref_mv_stack[idx].comp_mv.as_int)
- break;
-
- if (idx < refmv_count[ref_frame])
- ref_mv_stack[idx].weight += 2 * weight_unit;
-
- if (idx == refmv_count[ref_frame] &&
- refmv_count[ref_frame] < MAX_REF_MV_STACK_SIZE) {
- ref_mv_stack[idx].this_mv.as_int = this_refmv.as_int;
- ref_mv_stack[idx].comp_mv.as_int = comp_refmv.as_int;
- ref_mv_stack[idx].weight = 2 * weight_unit;
- ++(refmv_count[ref_frame]);
- }
- return 1;
- }
- }
- return 0;
-}
-
-static void process_compound_ref_mv_candidate(
- const MB_MODE_INFO *const candidate, const AV1_COMMON *const cm,
- const MV_REFERENCE_FRAME *const rf, int_mv ref_id[2][2],
- int ref_id_count[2], int_mv ref_diff[2][2], int ref_diff_count[2]) {
- for (int rf_idx = 0; rf_idx < 2; ++rf_idx) {
- MV_REFERENCE_FRAME can_rf = candidate->ref_frame[rf_idx];
-
- for (int cmp_idx = 0; cmp_idx < 2; ++cmp_idx) {
- if (can_rf == rf[cmp_idx] && ref_id_count[cmp_idx] < 2) {
- ref_id[cmp_idx][ref_id_count[cmp_idx]] = candidate->mv[rf_idx];
- ++ref_id_count[cmp_idx];
- } else if (can_rf > INTRA_FRAME && ref_diff_count[cmp_idx] < 2) {
- int_mv this_mv = candidate->mv[rf_idx];
- if (cm->ref_frame_sign_bias[can_rf] !=
- cm->ref_frame_sign_bias[rf[cmp_idx]]) {
- this_mv.as_mv.row = -this_mv.as_mv.row;
- this_mv.as_mv.col = -this_mv.as_mv.col;
- }
- ref_diff[cmp_idx][ref_diff_count[cmp_idx]] = this_mv;
- ++ref_diff_count[cmp_idx];
- }
- }
- }
-}
-
-static void process_single_ref_mv_candidate(
- const MB_MODE_INFO *const candidate, const AV1_COMMON *const cm,
- MV_REFERENCE_FRAME ref_frame, uint8_t refmv_count[MODE_CTX_REF_FRAMES],
- CANDIDATE_MV ref_mv_stack[][MAX_REF_MV_STACK_SIZE]) {
- for (int rf_idx = 0; rf_idx < 2; ++rf_idx) {
- if (candidate->ref_frame[rf_idx] > INTRA_FRAME) {
- int_mv this_mv = candidate->mv[rf_idx];
- if (cm->ref_frame_sign_bias[candidate->ref_frame[rf_idx]] !=
- cm->ref_frame_sign_bias[ref_frame]) {
- this_mv.as_mv.row = -this_mv.as_mv.row;
- this_mv.as_mv.col = -this_mv.as_mv.col;
- }
- int stack_idx;
- for (stack_idx = 0; stack_idx < refmv_count[ref_frame]; ++stack_idx) {
- const int_mv stack_mv = ref_mv_stack[ref_frame][stack_idx].this_mv;
- if (this_mv.as_int == stack_mv.as_int) break;
- }
-
- if (stack_idx == refmv_count[ref_frame]) {
- ref_mv_stack[ref_frame][stack_idx].this_mv = this_mv;
-
- // TODO(jingning): Set an arbitrary small number here. The weight
- // doesn't matter as long as it is properly initialized.
- ref_mv_stack[ref_frame][stack_idx].weight = 2;
- ++refmv_count[ref_frame];
- }
- }
- }
-}
-
-static void setup_ref_mv_list(
- const AV1_COMMON *cm, const MACROBLOCKD *xd, MV_REFERENCE_FRAME ref_frame,
- uint8_t refmv_count[MODE_CTX_REF_FRAMES],
- CANDIDATE_MV ref_mv_stack[][MAX_REF_MV_STACK_SIZE],
- int_mv mv_ref_list[][MAX_MV_REF_CANDIDATES], int_mv *gm_mv_candidates,
- int mi_row, int mi_col, int16_t *mode_context) {
- const int bs = AOMMAX(xd->n4_w, xd->n4_h);
- const int has_tr = has_top_right(cm, xd, mi_row, mi_col, bs);
- MV_REFERENCE_FRAME rf[2];
-
- const TileInfo *const tile = &xd->tile;
- int max_row_offset = 0, max_col_offset = 0;
- const int row_adj = (xd->n4_h < mi_size_high[BLOCK_8X8]) && (mi_row & 0x01);
- const int col_adj = (xd->n4_w < mi_size_wide[BLOCK_8X8]) && (mi_col & 0x01);
- int processed_rows = 0;
- int processed_cols = 0;
-
- av1_set_ref_frame(rf, ref_frame);
- mode_context[ref_frame] = 0;
- refmv_count[ref_frame] = 0;
-
- // Find valid maximum row/col offset.
- if (xd->up_available) {
- max_row_offset = -(MVREF_ROW_COLS << 1) + row_adj;
-
- if (xd->n4_h < mi_size_high[BLOCK_8X8])
- max_row_offset = -(2 << 1) + row_adj;
-
- max_row_offset = find_valid_row_offset(tile, mi_row, max_row_offset);
- }
-
- if (xd->left_available) {
- max_col_offset = -(MVREF_ROW_COLS << 1) + col_adj;
-
- if (xd->n4_w < mi_size_wide[BLOCK_8X8])
- max_col_offset = -(2 << 1) + col_adj;
-
- max_col_offset = find_valid_col_offset(tile, mi_col, max_col_offset);
- }
-
- uint8_t col_match_count = 0;
- uint8_t row_match_count = 0;
- uint8_t newmv_count = 0;
-
- // Scan the first above row mode info. row_offset = -1;
- if (abs(max_row_offset) >= 1)
- scan_row_mbmi(cm, xd, mi_row, mi_col, rf, -1, ref_mv_stack[ref_frame],
- &refmv_count[ref_frame], &row_match_count, &newmv_count,
- gm_mv_candidates, max_row_offset, &processed_rows);
- // Scan the first left column mode info. col_offset = -1;
- if (abs(max_col_offset) >= 1)
- scan_col_mbmi(cm, xd, mi_row, mi_col, rf, -1, ref_mv_stack[ref_frame],
- &refmv_count[ref_frame], &col_match_count, &newmv_count,
- gm_mv_candidates, max_col_offset, &processed_cols);
- // Check top-right boundary
- if (has_tr)
- scan_blk_mbmi(cm, xd, mi_row, mi_col, rf, -1, xd->n4_w,
- ref_mv_stack[ref_frame], &row_match_count, &newmv_count,
- gm_mv_candidates, &refmv_count[ref_frame]);
-
- const uint8_t nearest_match = (row_match_count > 0) + (col_match_count > 0);
- const uint8_t nearest_refmv_count = refmv_count[ref_frame];
-
- // TODO(yunqing): for comp_search, do it for all 3 cases.
- for (int idx = 0; idx < nearest_refmv_count; ++idx)
- ref_mv_stack[ref_frame][idx].weight += REF_CAT_LEVEL;
-
- if (cm->allow_ref_frame_mvs) {
- int is_available = 0;
- const int voffset = AOMMAX(mi_size_high[BLOCK_8X8], xd->n4_h);
- const int hoffset = AOMMAX(mi_size_wide[BLOCK_8X8], xd->n4_w);
- const int blk_row_end = AOMMIN(xd->n4_h, mi_size_high[BLOCK_64X64]);
- const int blk_col_end = AOMMIN(xd->n4_w, mi_size_wide[BLOCK_64X64]);
-
- const int tpl_sample_pos[3][2] = {
- { voffset, -2 },
- { voffset, hoffset },
- { voffset - 2, hoffset },
- };
- const int allow_extension = (xd->n4_h >= mi_size_high[BLOCK_8X8]) &&
- (xd->n4_h < mi_size_high[BLOCK_64X64]) &&
- (xd->n4_w >= mi_size_wide[BLOCK_8X8]) &&
- (xd->n4_w < mi_size_wide[BLOCK_64X64]);
-
- const int step_h = (xd->n4_h >= mi_size_high[BLOCK_64X64])
- ? mi_size_high[BLOCK_16X16]
- : mi_size_high[BLOCK_8X8];
- const int step_w = (xd->n4_w >= mi_size_wide[BLOCK_64X64])
- ? mi_size_wide[BLOCK_16X16]
- : mi_size_wide[BLOCK_8X8];
-
- for (int blk_row = 0; blk_row < blk_row_end; blk_row += step_h) {
- for (int blk_col = 0; blk_col < blk_col_end; blk_col += step_w) {
- int ret = add_tpl_ref_mv(cm, xd, mi_row, mi_col, ref_frame, blk_row,
- blk_col, gm_mv_candidates, refmv_count,
- ref_mv_stack, mode_context);
- if (blk_row == 0 && blk_col == 0) is_available = ret;
- }
- }
-
- if (is_available == 0) mode_context[ref_frame] |= (1 << GLOBALMV_OFFSET);
-
- for (int i = 0; i < 3 && allow_extension; ++i) {
- const int blk_row = tpl_sample_pos[i][0];
- const int blk_col = tpl_sample_pos[i][1];
-
- if (!check_sb_border(mi_row, mi_col, blk_row, blk_col)) continue;
- add_tpl_ref_mv(cm, xd, mi_row, mi_col, ref_frame, blk_row, blk_col,
- gm_mv_candidates, refmv_count, ref_mv_stack, mode_context);
- }
- }
-
- uint8_t dummy_newmv_count = 0;
-
- // Scan the second outer area.
- scan_blk_mbmi(cm, xd, mi_row, mi_col, rf, -1, -1, ref_mv_stack[ref_frame],
- &row_match_count, &dummy_newmv_count, gm_mv_candidates,
- &refmv_count[ref_frame]);
-
- for (int idx = 2; idx <= MVREF_ROW_COLS; ++idx) {
- const int row_offset = -(idx << 1) + 1 + row_adj;
- const int col_offset = -(idx << 1) + 1 + col_adj;
-
- if (abs(row_offset) <= abs(max_row_offset) &&
- abs(row_offset) > processed_rows)
- scan_row_mbmi(cm, xd, mi_row, mi_col, rf, row_offset,
- ref_mv_stack[ref_frame], &refmv_count[ref_frame],
- &row_match_count, &dummy_newmv_count, gm_mv_candidates,
- max_row_offset, &processed_rows);
-
- if (abs(col_offset) <= abs(max_col_offset) &&
- abs(col_offset) > processed_cols)
- scan_col_mbmi(cm, xd, mi_row, mi_col, rf, col_offset,
- ref_mv_stack[ref_frame], &refmv_count[ref_frame],
- &col_match_count, &dummy_newmv_count, gm_mv_candidates,
- max_col_offset, &processed_cols);
- }
-
- const uint8_t ref_match_count = (row_match_count > 0) + (col_match_count > 0);
-
- switch (nearest_match) {
- case 0:
- mode_context[ref_frame] |= 0;
- if (ref_match_count >= 1) mode_context[ref_frame] |= 1;
- if (ref_match_count == 1)
- mode_context[ref_frame] |= (1 << REFMV_OFFSET);
- else if (ref_match_count >= 2)
- mode_context[ref_frame] |= (2 << REFMV_OFFSET);
- break;
- case 1:
- mode_context[ref_frame] |= (newmv_count > 0) ? 2 : 3;
- if (ref_match_count == 1)
- mode_context[ref_frame] |= (3 << REFMV_OFFSET);
- else if (ref_match_count >= 2)
- mode_context[ref_frame] |= (4 << REFMV_OFFSET);
- break;
- case 2:
- default:
- if (newmv_count >= 1)
- mode_context[ref_frame] |= 4;
- else
- mode_context[ref_frame] |= 5;
-
- mode_context[ref_frame] |= (5 << REFMV_OFFSET);
- break;
- }
-
- // Rank the likelihood and assign nearest and near mvs.
- int len = nearest_refmv_count;
- while (len > 0) {
- int nr_len = 0;
- for (int idx = 1; idx < len; ++idx) {
- if (ref_mv_stack[ref_frame][idx - 1].weight <
- ref_mv_stack[ref_frame][idx].weight) {
- CANDIDATE_MV tmp_mv = ref_mv_stack[ref_frame][idx - 1];
- ref_mv_stack[ref_frame][idx - 1] = ref_mv_stack[ref_frame][idx];
- ref_mv_stack[ref_frame][idx] = tmp_mv;
- nr_len = idx;
- }
- }
- len = nr_len;
- }
-
- len = refmv_count[ref_frame];
- while (len > nearest_refmv_count) {
- int nr_len = nearest_refmv_count;
- for (int idx = nearest_refmv_count + 1; idx < len; ++idx) {
- if (ref_mv_stack[ref_frame][idx - 1].weight <
- ref_mv_stack[ref_frame][idx].weight) {
- CANDIDATE_MV tmp_mv = ref_mv_stack[ref_frame][idx - 1];
- ref_mv_stack[ref_frame][idx - 1] = ref_mv_stack[ref_frame][idx];
- ref_mv_stack[ref_frame][idx] = tmp_mv;
- nr_len = idx;
- }
- }
- len = nr_len;
- }
-
- if (rf[1] > NONE_FRAME) {
- // TODO(jingning, yunqing): Refactor and consolidate the compound and
- // single reference frame modes. Reduce unnecessary redundancy.
- if (refmv_count[ref_frame] < MAX_MV_REF_CANDIDATES) {
- int_mv ref_id[2][2], ref_diff[2][2];
- int ref_id_count[2] = { 0 }, ref_diff_count[2] = { 0 };
-
- int mi_width = AOMMIN(mi_size_wide[BLOCK_64X64], xd->n4_w);
- mi_width = AOMMIN(mi_width, cm->mi_cols - mi_col);
- int mi_height = AOMMIN(mi_size_high[BLOCK_64X64], xd->n4_h);
- mi_height = AOMMIN(mi_height, cm->mi_rows - mi_row);
- int mi_size = AOMMIN(mi_width, mi_height);
-
- for (int idx = 0; abs(max_row_offset) >= 1 && idx < mi_size;) {
- const MB_MODE_INFO *const candidate = xd->mi[-xd->mi_stride + idx];
- process_compound_ref_mv_candidate(
- candidate, cm, rf, ref_id, ref_id_count, ref_diff, ref_diff_count);
- idx += mi_size_wide[candidate->sb_type];
- }
-
- for (int idx = 0; abs(max_col_offset) >= 1 && idx < mi_size;) {
- const MB_MODE_INFO *const candidate = xd->mi[idx * xd->mi_stride - 1];
- process_compound_ref_mv_candidate(
- candidate, cm, rf, ref_id, ref_id_count, ref_diff, ref_diff_count);
- idx += mi_size_high[candidate->sb_type];
- }
-
- // Build up the compound mv predictor
- int_mv comp_list[3][2];
-
- for (int idx = 0; idx < 2; ++idx) {
- int comp_idx = 0;
- for (int list_idx = 0; list_idx < ref_id_count[idx] && comp_idx < 2;
- ++list_idx, ++comp_idx)
- comp_list[comp_idx][idx] = ref_id[idx][list_idx];
- for (int list_idx = 0; list_idx < ref_diff_count[idx] && comp_idx < 2;
- ++list_idx, ++comp_idx)
- comp_list[comp_idx][idx] = ref_diff[idx][list_idx];
- for (; comp_idx < 3; ++comp_idx)
- comp_list[comp_idx][idx] = gm_mv_candidates[idx];
- }
-
- if (refmv_count[ref_frame]) {
- assert(refmv_count[ref_frame] == 1);
- if (comp_list[0][0].as_int ==
- ref_mv_stack[ref_frame][0].this_mv.as_int &&
- comp_list[0][1].as_int ==
- ref_mv_stack[ref_frame][0].comp_mv.as_int) {
- ref_mv_stack[ref_frame][refmv_count[ref_frame]].this_mv =
- comp_list[1][0];
- ref_mv_stack[ref_frame][refmv_count[ref_frame]].comp_mv =
- comp_list[1][1];
- } else {
- ref_mv_stack[ref_frame][refmv_count[ref_frame]].this_mv =
- comp_list[0][0];
- ref_mv_stack[ref_frame][refmv_count[ref_frame]].comp_mv =
- comp_list[0][1];
- }
- ref_mv_stack[ref_frame][refmv_count[ref_frame]].weight = 2;
- ++refmv_count[ref_frame];
- } else {
- for (int idx = 0; idx < MAX_MV_REF_CANDIDATES; ++idx) {
- ref_mv_stack[ref_frame][refmv_count[ref_frame]].this_mv =
- comp_list[idx][0];
- ref_mv_stack[ref_frame][refmv_count[ref_frame]].comp_mv =
- comp_list[idx][1];
- ref_mv_stack[ref_frame][refmv_count[ref_frame]].weight = 2;
- ++refmv_count[ref_frame];
- }
- }
- }
-
- assert(refmv_count[ref_frame] >= 2);
-
- for (int idx = 0; idx < refmv_count[ref_frame]; ++idx) {
- clamp_mv_ref(&ref_mv_stack[ref_frame][idx].this_mv.as_mv,
- xd->n4_w << MI_SIZE_LOG2, xd->n4_h << MI_SIZE_LOG2, xd);
- clamp_mv_ref(&ref_mv_stack[ref_frame][idx].comp_mv.as_mv,
- xd->n4_w << MI_SIZE_LOG2, xd->n4_h << MI_SIZE_LOG2, xd);
- }
- } else {
- // Handle single reference frame extension
- int mi_width = AOMMIN(mi_size_wide[BLOCK_64X64], xd->n4_w);
- mi_width = AOMMIN(mi_width, cm->mi_cols - mi_col);
- int mi_height = AOMMIN(mi_size_high[BLOCK_64X64], xd->n4_h);
- mi_height = AOMMIN(mi_height, cm->mi_rows - mi_row);
- int mi_size = AOMMIN(mi_width, mi_height);
-
- for (int idx = 0; abs(max_row_offset) >= 1 && idx < mi_size &&
- refmv_count[ref_frame] < MAX_MV_REF_CANDIDATES;) {
- const MB_MODE_INFO *const candidate = xd->mi[-xd->mi_stride + idx];
- process_single_ref_mv_candidate(candidate, cm, ref_frame, refmv_count,
- ref_mv_stack);
- idx += mi_size_wide[candidate->sb_type];
- }
-
- for (int idx = 0; abs(max_col_offset) >= 1 && idx < mi_size &&
- refmv_count[ref_frame] < MAX_MV_REF_CANDIDATES;) {
- const MB_MODE_INFO *const candidate = xd->mi[idx * xd->mi_stride - 1];
- process_single_ref_mv_candidate(candidate, cm, ref_frame, refmv_count,
- ref_mv_stack);
- idx += mi_size_high[candidate->sb_type];
- }
-
- for (int idx = 0; idx < refmv_count[ref_frame]; ++idx) {
- clamp_mv_ref(&ref_mv_stack[ref_frame][idx].this_mv.as_mv,
- xd->n4_w << MI_SIZE_LOG2, xd->n4_h << MI_SIZE_LOG2, xd);
- }
-
- if (mv_ref_list != NULL) {
- for (int idx = refmv_count[ref_frame]; idx < MAX_MV_REF_CANDIDATES; ++idx)
- mv_ref_list[rf[0]][idx].as_int = gm_mv_candidates[0].as_int;
-
- for (int idx = 0;
- idx < AOMMIN(MAX_MV_REF_CANDIDATES, refmv_count[ref_frame]); ++idx) {
- mv_ref_list[rf[0]][idx].as_int =
- ref_mv_stack[ref_frame][idx].this_mv.as_int;
- }
- }
- }
-}
-
-void av1_find_mv_refs(const AV1_COMMON *cm, const MACROBLOCKD *xd,
- MB_MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
- uint8_t ref_mv_count[MODE_CTX_REF_FRAMES],
- CANDIDATE_MV ref_mv_stack[][MAX_REF_MV_STACK_SIZE],
- int_mv mv_ref_list[][MAX_MV_REF_CANDIDATES],
- int_mv *global_mvs, int mi_row, int mi_col,
- int16_t *mode_context) {
- int_mv zeromv[2];
- BLOCK_SIZE bsize = mi->sb_type;
- MV_REFERENCE_FRAME rf[2];
- av1_set_ref_frame(rf, ref_frame);
-
- if (ref_frame < REF_FRAMES) {
- if (ref_frame != INTRA_FRAME) {
- global_mvs[ref_frame] = gm_get_motion_vector(
- &cm->global_motion[ref_frame], cm->allow_high_precision_mv, bsize,
- mi_col, mi_row, cm->cur_frame_force_integer_mv);
- } else {
- global_mvs[ref_frame].as_int = INVALID_MV;
- }
- }
-
- if (ref_frame != INTRA_FRAME) {
- zeromv[0].as_int =
- gm_get_motion_vector(&cm->global_motion[rf[0]],
- cm->allow_high_precision_mv, bsize, mi_col, mi_row,
- cm->cur_frame_force_integer_mv)
- .as_int;
- zeromv[1].as_int =
- (rf[1] != NONE_FRAME)
- ? gm_get_motion_vector(&cm->global_motion[rf[1]],
- cm->allow_high_precision_mv, bsize, mi_col,
- mi_row, cm->cur_frame_force_integer_mv)
- .as_int
- : 0;
- } else {
- zeromv[0].as_int = zeromv[1].as_int = 0;
- }
-
- setup_ref_mv_list(cm, xd, ref_frame, ref_mv_count, ref_mv_stack, mv_ref_list,
- zeromv, mi_row, mi_col, mode_context);
-}
-
-void av1_find_best_ref_mvs(int allow_hp, int_mv *mvlist, int_mv *nearest_mv,
- int_mv *near_mv, int is_integer) {
- int i;
- // Make sure all the candidates are properly clamped etc
- for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) {
- lower_mv_precision(&mvlist[i].as_mv, allow_hp, is_integer);
- }
- *nearest_mv = mvlist[0];
- *near_mv = mvlist[1];
-}
-
-void av1_setup_frame_buf_refs(AV1_COMMON *cm) {
- cm->cur_frame->cur_frame_offset = cm->frame_offset;
-
- MV_REFERENCE_FRAME ref_frame;
- for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
- const int buf_idx = cm->frame_refs[ref_frame - LAST_FRAME].idx;
- if (buf_idx >= 0)
- cm->cur_frame->ref_frame_offset[ref_frame - LAST_FRAME] =
- cm->buffer_pool->frame_bufs[buf_idx].cur_frame_offset;
- }
-}
-
-void av1_setup_frame_sign_bias(AV1_COMMON *cm) {
- MV_REFERENCE_FRAME ref_frame;
- for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
- const int buf_idx = cm->frame_refs[ref_frame - LAST_FRAME].idx;
- if (cm->seq_params.enable_order_hint && buf_idx != INVALID_IDX) {
- const int ref_frame_offset =
- cm->buffer_pool->frame_bufs[buf_idx].cur_frame_offset;
- cm->ref_frame_sign_bias[ref_frame] =
- (get_relative_dist(cm, ref_frame_offset, (int)cm->frame_offset) <= 0)
- ? 0
- : 1;
- } else {
- cm->ref_frame_sign_bias[ref_frame] = 0;
- }
- }
-}
-
-#define MAX_OFFSET_WIDTH 64
-#define MAX_OFFSET_HEIGHT 0
-
-static int get_block_position(AV1_COMMON *cm, int *mi_r, int *mi_c, int blk_row,
- int blk_col, MV mv, int sign_bias) {
- const int base_blk_row = (blk_row >> 3) << 3;
- const int base_blk_col = (blk_col >> 3) << 3;
-
- const int row_offset = (mv.row >= 0) ? (mv.row >> (4 + MI_SIZE_LOG2))
- : -((-mv.row) >> (4 + MI_SIZE_LOG2));
-
- const int col_offset = (mv.col >= 0) ? (mv.col >> (4 + MI_SIZE_LOG2))
- : -((-mv.col) >> (4 + MI_SIZE_LOG2));
-
- const int row =
- (sign_bias == 1) ? blk_row - row_offset : blk_row + row_offset;
- const int col =
- (sign_bias == 1) ? blk_col - col_offset : blk_col + col_offset;
-
- if (row < 0 || row >= (cm->mi_rows >> 1) || col < 0 ||
- col >= (cm->mi_cols >> 1))
- return 0;
-
- if (row < base_blk_row - (MAX_OFFSET_HEIGHT >> 3) ||
- row >= base_blk_row + 8 + (MAX_OFFSET_HEIGHT >> 3) ||
- col < base_blk_col - (MAX_OFFSET_WIDTH >> 3) ||
- col >= base_blk_col + 8 + (MAX_OFFSET_WIDTH >> 3))
- return 0;
-
- *mi_r = row;
- *mi_c = col;
-
- return 1;
-}
-
-// Note: motion_filed_projection finds motion vectors of current frame's
-// reference frame, and projects them to current frame. To make it clear,
-// let's call current frame's reference frame as start frame.
-// Call Start frame's reference frames as reference frames.
-// Call ref_offset as frame distances between start frame and its reference
-// frames.
-static int motion_field_projection(AV1_COMMON *cm,
- MV_REFERENCE_FRAME start_frame, int dir) {
- TPL_MV_REF *tpl_mvs_base = cm->tpl_mvs;
- int ref_offset[REF_FRAMES] = { 0 };
-
- (void)dir;
-
- const int start_frame_idx = cm->frame_refs[FWD_RF_OFFSET(start_frame)].idx;
- if (start_frame_idx < 0) return 0;
-
- if (cm->buffer_pool->frame_bufs[start_frame_idx].intra_only) return 0;
-
- if (cm->buffer_pool->frame_bufs[start_frame_idx].mi_rows != cm->mi_rows ||
- cm->buffer_pool->frame_bufs[start_frame_idx].mi_cols != cm->mi_cols)
- return 0;
-
- const int start_frame_offset =
- cm->buffer_pool->frame_bufs[start_frame_idx].cur_frame_offset;
- const unsigned int *const ref_frame_offsets =
- &cm->buffer_pool->frame_bufs[start_frame_idx].ref_frame_offset[0];
- const int cur_frame_offset = cm->cur_frame->cur_frame_offset;
- int start_to_current_frame_offset =
- get_relative_dist(cm, start_frame_offset, cur_frame_offset);
-
- for (MV_REFERENCE_FRAME rf = LAST_FRAME; rf <= INTER_REFS_PER_FRAME; ++rf) {
- ref_offset[rf] = get_relative_dist(cm, start_frame_offset,
- ref_frame_offsets[rf - LAST_FRAME]);
- }
-
- if (dir == 2) start_to_current_frame_offset = -start_to_current_frame_offset;
-
- MV_REF *mv_ref_base = cm->buffer_pool->frame_bufs[start_frame_idx].mvs;
- const int mvs_rows = (cm->mi_rows + 1) >> 1;
- const int mvs_cols = (cm->mi_cols + 1) >> 1;
-
- for (int blk_row = 0; blk_row < mvs_rows; ++blk_row) {
- for (int blk_col = 0; blk_col < mvs_cols; ++blk_col) {
- MV_REF *mv_ref = &mv_ref_base[blk_row * mvs_cols + blk_col];
- MV fwd_mv = mv_ref->mv.as_mv;
-
- if (mv_ref->ref_frame > INTRA_FRAME) {
- int_mv this_mv;
- int mi_r, mi_c;
- const int ref_frame_offset = ref_offset[mv_ref->ref_frame];
-
- int pos_valid =
- abs(ref_frame_offset) <= MAX_FRAME_DISTANCE &&
- ref_frame_offset > 0 &&
- abs(start_to_current_frame_offset) <= MAX_FRAME_DISTANCE;
-
- if (pos_valid) {
- get_mv_projection(&this_mv.as_mv, fwd_mv,
- start_to_current_frame_offset, ref_frame_offset);
- pos_valid = get_block_position(cm, &mi_r, &mi_c, blk_row, blk_col,
- this_mv.as_mv, dir >> 1);
- }
-
- if (pos_valid) {
- const int mi_offset = mi_r * (cm->mi_stride >> 1) + mi_c;
-
- tpl_mvs_base[mi_offset].mfmv0.as_mv.row = fwd_mv.row;
- tpl_mvs_base[mi_offset].mfmv0.as_mv.col = fwd_mv.col;
- tpl_mvs_base[mi_offset].ref_frame_offset = ref_frame_offset;
- }
- }
- }
- }
-
- return 1;
-}
-
-void av1_setup_motion_field(AV1_COMMON *cm) {
- memset(cm->ref_frame_side, 0, sizeof(cm->ref_frame_side));
- if (!cm->seq_params.enable_order_hint) return;
-
- TPL_MV_REF *tpl_mvs_base = cm->tpl_mvs;
- int size = ((cm->mi_rows + MAX_MIB_SIZE) >> 1) * (cm->mi_stride >> 1);
- for (int idx = 0; idx < size; ++idx) {
- tpl_mvs_base[idx].mfmv0.as_int = INVALID_MV;
- tpl_mvs_base[idx].ref_frame_offset = 0;
- }
-
- const int cur_order_hint = cm->cur_frame->cur_frame_offset;
- RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
-
- int ref_buf_idx[INTER_REFS_PER_FRAME];
- int ref_order_hint[INTER_REFS_PER_FRAME];
-
- for (int ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
- const int ref_idx = ref_frame - LAST_FRAME;
- const int buf_idx = cm->frame_refs[ref_idx].idx;
- int order_hint = 0;
-
- if (buf_idx >= 0) order_hint = frame_bufs[buf_idx].cur_frame_offset;
-
- ref_buf_idx[ref_idx] = buf_idx;
- ref_order_hint[ref_idx] = order_hint;
-
- if (get_relative_dist(cm, order_hint, cur_order_hint) > 0)
- cm->ref_frame_side[ref_frame] = 1;
- else if (order_hint == cur_order_hint)
- cm->ref_frame_side[ref_frame] = -1;
- }
-
- int ref_stamp = MFMV_STACK_SIZE - 1;
-
- if (ref_buf_idx[LAST_FRAME - LAST_FRAME] >= 0) {
- const int alt_of_lst_order_hint =
- frame_bufs[ref_buf_idx[LAST_FRAME - LAST_FRAME]]
- .ref_frame_offset[ALTREF_FRAME - LAST_FRAME];
-
- const int is_lst_overlay =
- (alt_of_lst_order_hint == ref_order_hint[GOLDEN_FRAME - LAST_FRAME]);
- if (!is_lst_overlay) motion_field_projection(cm, LAST_FRAME, 2);
- --ref_stamp;
- }
-
- if (get_relative_dist(cm, ref_order_hint[BWDREF_FRAME - LAST_FRAME],
- cur_order_hint) > 0) {
- if (motion_field_projection(cm, BWDREF_FRAME, 0)) --ref_stamp;
- }
-
- if (get_relative_dist(cm, ref_order_hint[ALTREF2_FRAME - LAST_FRAME],
- cur_order_hint) > 0) {
- if (motion_field_projection(cm, ALTREF2_FRAME, 0)) --ref_stamp;
- }
-
- if (get_relative_dist(cm, ref_order_hint[ALTREF_FRAME - LAST_FRAME],
- cur_order_hint) > 0 &&
- ref_stamp >= 0)
- if (motion_field_projection(cm, ALTREF_FRAME, 0)) --ref_stamp;
-
- if (ref_stamp >= 0 && ref_buf_idx[LAST2_FRAME - LAST_FRAME] >= 0)
- if (motion_field_projection(cm, LAST2_FRAME, 2)) --ref_stamp;
-}
-
-static INLINE void record_samples(MB_MODE_INFO *mbmi, int *pts, int *pts_inref,
- int row_offset, int sign_r, int col_offset,
- int sign_c) {
- int bw = block_size_wide[mbmi->sb_type];
- int bh = block_size_high[mbmi->sb_type];
- int x = col_offset * MI_SIZE + sign_c * AOMMAX(bw, MI_SIZE) / 2 - 1;
- int y = row_offset * MI_SIZE + sign_r * AOMMAX(bh, MI_SIZE) / 2 - 1;
-
- pts[0] = (x * 8);
- pts[1] = (y * 8);
- pts_inref[0] = (x * 8) + mbmi->mv[0].as_mv.col;
- pts_inref[1] = (y * 8) + mbmi->mv[0].as_mv.row;
-}
-
-// Select samples according to the motion vector difference.
-int selectSamples(MV *mv, int *pts, int *pts_inref, int len, BLOCK_SIZE bsize) {
- const int bw = block_size_wide[bsize];
- const int bh = block_size_high[bsize];
- const int thresh = clamp(AOMMAX(bw, bh), 16, 112);
- int pts_mvd[SAMPLES_ARRAY_SIZE] = { 0 };
- int i, j, k, l = len;
- int ret = 0;
- assert(len <= LEAST_SQUARES_SAMPLES_MAX);
-
- // Obtain the motion vector difference.
- for (i = 0; i < len; ++i) {
- pts_mvd[i] = abs(pts_inref[2 * i] - pts[2 * i] - mv->col) +
- abs(pts_inref[2 * i + 1] - pts[2 * i + 1] - mv->row);
-
- if (pts_mvd[i] > thresh)
- pts_mvd[i] = -1;
- else
- ret++;
- }
-
- // Keep at least 1 sample.
- if (!ret) return 1;
-
- i = 0;
- j = l - 1;
- for (k = 0; k < l - ret; k++) {
- while (pts_mvd[i] != -1) i++;
- while (pts_mvd[j] == -1) j--;
- assert(i != j);
- if (i > j) break;
-
- // Replace the discarded samples;
- pts_mvd[i] = pts_mvd[j];
- pts[2 * i] = pts[2 * j];
- pts[2 * i + 1] = pts[2 * j + 1];
- pts_inref[2 * i] = pts_inref[2 * j];
- pts_inref[2 * i + 1] = pts_inref[2 * j + 1];
- i++;
- j--;
- }
-
- return ret;
-}
-
-// Note: Samples returned are at 1/8-pel precision
-// Sample are the neighbor block center point's coordinates relative to the
-// left-top pixel of current block.
-int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col,
- int *pts, int *pts_inref) {
- MB_MODE_INFO *const mbmi0 = xd->mi[0];
- int ref_frame = mbmi0->ref_frame[0];
- int up_available = xd->up_available;
- int left_available = xd->left_available;
- int i, mi_step = 1, np = 0;
-
- const TileInfo *const tile = &xd->tile;
- int do_tl = 1;
- int do_tr = 1;
-
- // scan the nearest above rows
- if (up_available) {
- int mi_row_offset = -1;
- MB_MODE_INFO *mbmi = xd->mi[mi_row_offset * xd->mi_stride];
- uint8_t n4_w = mi_size_wide[mbmi->sb_type];
-
- if (xd->n4_w <= n4_w) {
- // Handle "current block width <= above block width" case.
- int col_offset = -mi_col % n4_w;
-
- if (col_offset < 0) do_tl = 0;
- if (col_offset + n4_w > xd->n4_w) do_tr = 0;
-
- if (mbmi->ref_frame[0] == ref_frame && mbmi->ref_frame[1] == NONE_FRAME) {
- record_samples(mbmi, pts, pts_inref, 0, -1, col_offset, 1);
- pts += 2;
- pts_inref += 2;
- np++;
- if (np >= LEAST_SQUARES_SAMPLES_MAX) return LEAST_SQUARES_SAMPLES_MAX;
- }
- } else {
- // Handle "current block width > above block width" case.
- for (i = 0; i < AOMMIN(xd->n4_w, cm->mi_cols - mi_col); i += mi_step) {
- int mi_col_offset = i;
- mbmi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
- n4_w = mi_size_wide[mbmi->sb_type];
- mi_step = AOMMIN(xd->n4_w, n4_w);
-
- if (mbmi->ref_frame[0] == ref_frame &&
- mbmi->ref_frame[1] == NONE_FRAME) {
- record_samples(mbmi, pts, pts_inref, 0, -1, i, 1);
- pts += 2;
- pts_inref += 2;
- np++;
- if (np >= LEAST_SQUARES_SAMPLES_MAX) return LEAST_SQUARES_SAMPLES_MAX;
- }
- }
- }
- }
- assert(np <= LEAST_SQUARES_SAMPLES_MAX);
-
- // scan the nearest left columns
- if (left_available) {
- int mi_col_offset = -1;
-
- MB_MODE_INFO *mbmi = xd->mi[mi_col_offset];
- uint8_t n4_h = mi_size_high[mbmi->sb_type];
-
- if (xd->n4_h <= n4_h) {
- // Handle "current block height <= above block height" case.
- int row_offset = -mi_row % n4_h;
-
- if (row_offset < 0) do_tl = 0;
-
- if (mbmi->ref_frame[0] == ref_frame && mbmi->ref_frame[1] == NONE_FRAME) {
- record_samples(mbmi, pts, pts_inref, row_offset, 1, 0, -1);
- pts += 2;
- pts_inref += 2;
- np++;
- if (np >= LEAST_SQUARES_SAMPLES_MAX) return LEAST_SQUARES_SAMPLES_MAX;
- }
- } else {
- // Handle "current block height > above block height" case.
- for (i = 0; i < AOMMIN(xd->n4_h, cm->mi_rows - mi_row); i += mi_step) {
- int mi_row_offset = i;
- mbmi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
- n4_h = mi_size_high[mbmi->sb_type];
- mi_step = AOMMIN(xd->n4_h, n4_h);
-
- if (mbmi->ref_frame[0] == ref_frame &&
- mbmi->ref_frame[1] == NONE_FRAME) {
- record_samples(mbmi, pts, pts_inref, i, 1, 0, -1);
- pts += 2;
- pts_inref += 2;
- np++;
- if (np >= LEAST_SQUARES_SAMPLES_MAX) return LEAST_SQUARES_SAMPLES_MAX;
- }
- }
- }
- }
- assert(np <= LEAST_SQUARES_SAMPLES_MAX);
-
- // Top-left block
- if (do_tl && left_available && up_available) {
- int mi_row_offset = -1;
- int mi_col_offset = -1;
-
- MB_MODE_INFO *mbmi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
-
- if (mbmi->ref_frame[0] == ref_frame && mbmi->ref_frame[1] == NONE_FRAME) {
- record_samples(mbmi, pts, pts_inref, 0, -1, 0, -1);
- pts += 2;
- pts_inref += 2;
- np++;
- if (np >= LEAST_SQUARES_SAMPLES_MAX) return LEAST_SQUARES_SAMPLES_MAX;
- }
- }
- assert(np <= LEAST_SQUARES_SAMPLES_MAX);
-
- // Top-right block
- if (do_tr &&
- has_top_right(cm, xd, mi_row, mi_col, AOMMAX(xd->n4_w, xd->n4_h))) {
- POSITION trb_pos = { -1, xd->n4_w };
-
- if (is_inside(tile, mi_col, mi_row, &trb_pos)) {
- int mi_row_offset = -1;
- int mi_col_offset = xd->n4_w;
-
- MB_MODE_INFO *mbmi =
- xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
-
- if (mbmi->ref_frame[0] == ref_frame && mbmi->ref_frame[1] == NONE_FRAME) {
- record_samples(mbmi, pts, pts_inref, 0, -1, xd->n4_w, 1);
- np++;
- if (np >= LEAST_SQUARES_SAMPLES_MAX) return LEAST_SQUARES_SAMPLES_MAX;
- }
- }
- }
- assert(np <= LEAST_SQUARES_SAMPLES_MAX);
-
- return np;
-}
-
-void av1_setup_skip_mode_allowed(AV1_COMMON *cm) {
- cm->is_skip_mode_allowed = 0;
- cm->ref_frame_idx_0 = cm->ref_frame_idx_1 = INVALID_IDX;
-
- if (!cm->seq_params.enable_order_hint || frame_is_intra_only(cm) ||
- cm->reference_mode == SINGLE_REFERENCE)
- return;
-
- RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
- const int cur_frame_offset = cm->frame_offset;
- int ref_frame_offset[2] = { -1, INT_MAX };
- int ref_idx[2] = { INVALID_IDX, INVALID_IDX };
-
- // Identify the nearest forward and backward references.
- for (int i = 0; i < INTER_REFS_PER_FRAME; ++i) {
- const int buf_idx = cm->frame_refs[i].idx;
- if (buf_idx == INVALID_IDX) continue;
-
- const int ref_offset = frame_bufs[buf_idx].cur_frame_offset;
- if (get_relative_dist(cm, ref_offset, cur_frame_offset) < 0) {
- // Forward reference
- if (ref_frame_offset[0] == -1 ||
- get_relative_dist(cm, ref_offset, ref_frame_offset[0]) > 0) {
- ref_frame_offset[0] = ref_offset;
- ref_idx[0] = i;
- }
- } else if (get_relative_dist(cm, ref_offset, cur_frame_offset) > 0) {
- // Backward reference
- if (ref_frame_offset[1] == INT_MAX ||
- get_relative_dist(cm, ref_offset, ref_frame_offset[1]) < 0) {
- ref_frame_offset[1] = ref_offset;
- ref_idx[1] = i;
- }
- }
- }
-
- if (ref_idx[0] != INVALID_IDX && ref_idx[1] != INVALID_IDX) {
- // == Bi-directional prediction ==
- cm->is_skip_mode_allowed = 1;
- cm->ref_frame_idx_0 = AOMMIN(ref_idx[0], ref_idx[1]);
- cm->ref_frame_idx_1 = AOMMAX(ref_idx[0], ref_idx[1]);
- } else if (ref_idx[0] != INVALID_IDX && ref_idx[1] == INVALID_IDX) {
- // == Forward prediction only ==
- // Identify the second nearest forward reference.
- ref_frame_offset[1] = -1;
- for (int i = 0; i < INTER_REFS_PER_FRAME; ++i) {
- const int buf_idx = cm->frame_refs[i].idx;
- if (buf_idx == INVALID_IDX) continue;
-
- const int ref_offset = frame_bufs[buf_idx].cur_frame_offset;
- if ((ref_frame_offset[0] != -1 &&
- get_relative_dist(cm, ref_offset, ref_frame_offset[0]) < 0) &&
- (ref_frame_offset[1] == -1 ||
- get_relative_dist(cm, ref_offset, ref_frame_offset[1]) > 0)) {
- // Second closest forward reference
- ref_frame_offset[1] = ref_offset;
- ref_idx[1] = i;
- }
- }
- if (ref_frame_offset[1] != -1) {
- cm->is_skip_mode_allowed = 1;
- cm->ref_frame_idx_0 = AOMMIN(ref_idx[0], ref_idx[1]);
- cm->ref_frame_idx_1 = AOMMAX(ref_idx[0], ref_idx[1]);
- }
- }
-}
-
-typedef struct {
- int map_idx; // frame map index
- int buf_idx; // frame buffer index
- int sort_idx; // index based on the offset to be used for sorting
-} REF_FRAME_INFO;
-
-static int compare_ref_frame_info(const void *arg_a, const void *arg_b) {
- const REF_FRAME_INFO *info_a = (REF_FRAME_INFO *)arg_a;
- const REF_FRAME_INFO *info_b = (REF_FRAME_INFO *)arg_b;
-
- if (info_a->sort_idx < info_b->sort_idx) return -1;
- if (info_a->sort_idx > info_b->sort_idx) return 1;
- return (info_a->map_idx < info_b->map_idx)
- ? -1
- : ((info_a->map_idx > info_b->map_idx) ? 1 : 0);
-}
-
-static void set_ref_frame_info(AV1_COMMON *const cm, int frame_idx,
- REF_FRAME_INFO *ref_info) {
- assert(frame_idx >= 0 && frame_idx < INTER_REFS_PER_FRAME);
-
- const int buf_idx = ref_info->buf_idx;
-
- cm->frame_refs[frame_idx].idx = buf_idx;
- cm->frame_refs[frame_idx].buf = &cm->buffer_pool->frame_bufs[buf_idx].buf;
- cm->frame_refs[frame_idx].map_idx = ref_info->map_idx;
-}
-
-void av1_set_frame_refs(AV1_COMMON *const cm, int lst_map_idx,
- int gld_map_idx) {
- BufferPool *const pool = cm->buffer_pool;
- RefCntBuffer *const frame_bufs = pool->frame_bufs;
-
- int lst_frame_sort_idx = -1;
- int gld_frame_sort_idx = -1;
-
- assert(cm->seq_params.enable_order_hint);
- assert(cm->seq_params.order_hint_bits_minus_1 >= 0);
- const int cur_frame_offset = (int)cm->frame_offset;
- const int cur_frame_sort_idx = 1 << cm->seq_params.order_hint_bits_minus_1;
-
- REF_FRAME_INFO ref_frame_info[REF_FRAMES];
- int ref_flag_list[INTER_REFS_PER_FRAME] = { 0, 0, 0, 0, 0, 0, 0 };
-
- for (int i = 0; i < REF_FRAMES; ++i) {
- const int map_idx = i;
-
- ref_frame_info[i].map_idx = map_idx;
- ref_frame_info[i].sort_idx = -1;
-
- const int buf_idx = cm->ref_frame_map[map_idx];
- ref_frame_info[i].buf_idx = buf_idx;
-
- if (buf_idx < 0 || buf_idx >= FRAME_BUFFERS) continue;
- // TODO(zoeliu@google.com): To verify the checking on ref_count.
- if (frame_bufs[buf_idx].ref_count <= 0) continue;
-
- const int offset = (int)frame_bufs[buf_idx].cur_frame_offset;
- ref_frame_info[i].sort_idx =
- (offset == -1) ? -1
- : cur_frame_sort_idx +
- get_relative_dist(cm, offset, cur_frame_offset);
- assert(ref_frame_info[i].sort_idx >= -1);
-
- if (map_idx == lst_map_idx) lst_frame_sort_idx = ref_frame_info[i].sort_idx;
- if (map_idx == gld_map_idx) gld_frame_sort_idx = ref_frame_info[i].sort_idx;
- }
-
- // Confirm both LAST_FRAME and GOLDEN_FRAME are valid forward reference
- // frames.
- if (lst_frame_sort_idx == -1 || lst_frame_sort_idx >= cur_frame_sort_idx) {
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
- "Inter frame requests a look-ahead frame as LAST");
- }
- if (gld_frame_sort_idx == -1 || gld_frame_sort_idx >= cur_frame_sort_idx) {
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
- "Inter frame requests a look-ahead frame as GOLDEN");
- }
-
- // Sort ref frames based on their frame_offset values.
- qsort(ref_frame_info, REF_FRAMES, sizeof(REF_FRAME_INFO),
- compare_ref_frame_info);
-
- // Identify forward and backward reference frames.
- // Forward reference: offset < cur_frame_offset
- // Backward reference: offset >= cur_frame_offset
- int fwd_start_idx = 0, fwd_end_idx = REF_FRAMES - 1;
-
- for (int i = 0; i < REF_FRAMES; i++) {
- if (ref_frame_info[i].sort_idx == -1) {
- fwd_start_idx++;
- continue;
- }
-
- if (ref_frame_info[i].sort_idx >= cur_frame_sort_idx) {
- fwd_end_idx = i - 1;
- break;
- }
- }
-
- int bwd_start_idx = fwd_end_idx + 1;
- int bwd_end_idx = REF_FRAMES - 1;
-
- // === Backward Reference Frames ===
-
- // == ALTREF_FRAME ==
- if (bwd_start_idx <= bwd_end_idx) {
- set_ref_frame_info(cm, ALTREF_FRAME - LAST_FRAME,
- &ref_frame_info[bwd_end_idx]);
- ref_flag_list[ALTREF_FRAME - LAST_FRAME] = 1;
- bwd_end_idx--;
- }
-
- // == BWDREF_FRAME ==
- if (bwd_start_idx <= bwd_end_idx) {
- set_ref_frame_info(cm, BWDREF_FRAME - LAST_FRAME,
- &ref_frame_info[bwd_start_idx]);
- ref_flag_list[BWDREF_FRAME - LAST_FRAME] = 1;
- bwd_start_idx++;
- }
-
- // == ALTREF2_FRAME ==
- if (bwd_start_idx <= bwd_end_idx) {
- set_ref_frame_info(cm, ALTREF2_FRAME - LAST_FRAME,
- &ref_frame_info[bwd_start_idx]);
- ref_flag_list[ALTREF2_FRAME - LAST_FRAME] = 1;
- }
-
- // === Forward Reference Frames ===
-
- for (int i = fwd_start_idx; i <= fwd_end_idx; ++i) {
- // == LAST_FRAME ==
- if (ref_frame_info[i].map_idx == lst_map_idx) {
- set_ref_frame_info(cm, LAST_FRAME - LAST_FRAME, &ref_frame_info[i]);
- ref_flag_list[LAST_FRAME - LAST_FRAME] = 1;
- }
-
- // == GOLDEN_FRAME ==
- if (ref_frame_info[i].map_idx == gld_map_idx) {
- set_ref_frame_info(cm, GOLDEN_FRAME - LAST_FRAME, &ref_frame_info[i]);
- ref_flag_list[GOLDEN_FRAME - LAST_FRAME] = 1;
- }
- }
-
- assert(ref_flag_list[LAST_FRAME - LAST_FRAME] == 1 &&
- ref_flag_list[GOLDEN_FRAME - LAST_FRAME] == 1);
-
- // == LAST2_FRAME ==
- // == LAST3_FRAME ==
- // == BWDREF_FRAME ==
- // == ALTREF2_FRAME ==
- // == ALTREF_FRAME ==
-
- // Set up the reference frames in the anti-chronological order.
- static const MV_REFERENCE_FRAME ref_frame_list[INTER_REFS_PER_FRAME - 2] = {
- LAST2_FRAME, LAST3_FRAME, BWDREF_FRAME, ALTREF2_FRAME, ALTREF_FRAME
- };
-
- int ref_idx;
- for (ref_idx = 0; ref_idx < (INTER_REFS_PER_FRAME - 2); ref_idx++) {
- const MV_REFERENCE_FRAME ref_frame = ref_frame_list[ref_idx];
-
- if (ref_flag_list[ref_frame - LAST_FRAME] == 1) continue;
-
- while (fwd_start_idx <= fwd_end_idx &&
- (ref_frame_info[fwd_end_idx].map_idx == lst_map_idx ||
- ref_frame_info[fwd_end_idx].map_idx == gld_map_idx)) {
- fwd_end_idx--;
- }
- if (fwd_start_idx > fwd_end_idx) break;
-
- set_ref_frame_info(cm, ref_frame - LAST_FRAME,
- &ref_frame_info[fwd_end_idx]);
- ref_flag_list[ref_frame - LAST_FRAME] = 1;
-
- fwd_end_idx--;
- }
-
- // Assign all the remaining frame(s), if any, to the earliest reference frame.
- for (; ref_idx < (INTER_REFS_PER_FRAME - 2); ref_idx++) {
- const MV_REFERENCE_FRAME ref_frame = ref_frame_list[ref_idx];
- if (ref_flag_list[ref_frame - LAST_FRAME] == 1) continue;
- set_ref_frame_info(cm, ref_frame - LAST_FRAME,
- &ref_frame_info[fwd_start_idx]);
- ref_flag_list[ref_frame - LAST_FRAME] = 1;
- }
-
- for (int i = 0; i < INTER_REFS_PER_FRAME; i++) {
- assert(ref_flag_list[i] == 1);
- }
-}
diff --git a/third_party/aom/av1/common/mvref_common.h b/third_party/aom/av1/common/mvref_common.h
deleted file mode 100644
index 83f7a1ac0..000000000
--- a/third_party/aom/av1/common/mvref_common.h
+++ /dev/null
@@ -1,361 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#ifndef AOM_AV1_COMMON_MVREF_COMMON_H_
-#define AOM_AV1_COMMON_MVREF_COMMON_H_
-
-#include "av1/common/onyxc_int.h"
-#include "av1/common/blockd.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define MVREF_ROW_COLS 3
-
-// Set the upper limit of the motion vector component magnitude.
-// This would make a motion vector fit in 26 bits. Plus 3 bits for the
-// reference frame index. A tuple of motion vector can hence be stored within
-// 32 bit range for efficient load/store operations.
-#define REFMVS_LIMIT ((1 << 12) - 1)
-
-typedef struct position {
- int row;
- int col;
-} POSITION;
-
-// clamp_mv_ref
-#define MV_BORDER (16 << 3) // Allow 16 pels in 1/8th pel units
-
-static INLINE int get_relative_dist(const AV1_COMMON *cm, int a, int b) {
- if (!cm->seq_params.enable_order_hint) return 0;
-
- const int bits = cm->seq_params.order_hint_bits_minus_1 + 1;
-
- assert(bits >= 1);
- assert(a >= 0 && a < (1 << bits));
- assert(b >= 0 && b < (1 << bits));
-
- int diff = a - b;
- const int m = 1 << (bits - 1);
- diff = (diff & (m - 1)) - (diff & m);
- return diff;
-}
-
-static INLINE void clamp_mv_ref(MV *mv, int bw, int bh, const MACROBLOCKD *xd) {
- clamp_mv(mv, xd->mb_to_left_edge - bw * 8 - MV_BORDER,
- xd->mb_to_right_edge + bw * 8 + MV_BORDER,
- xd->mb_to_top_edge - bh * 8 - MV_BORDER,
- xd->mb_to_bottom_edge + bh * 8 + MV_BORDER);
-}
-
-// This function returns either the appropriate sub block or block's mv
-// on whether the block_size < 8x8 and we have check_sub_blocks set.
-static INLINE int_mv get_sub_block_mv(const MB_MODE_INFO *candidate,
- int which_mv, int search_col) {
- (void)search_col;
- return candidate->mv[which_mv];
-}
-
-static INLINE int_mv get_sub_block_pred_mv(const MB_MODE_INFO *candidate,
- int which_mv, int search_col) {
- (void)search_col;
- return candidate->mv[which_mv];
-}
-
-// Performs mv sign inversion if indicated by the reference frame combination.
-static INLINE int_mv scale_mv(const MB_MODE_INFO *mbmi, int ref,
- const MV_REFERENCE_FRAME this_ref_frame,
- const int *ref_sign_bias) {
- int_mv mv = mbmi->mv[ref];
- if (ref_sign_bias[mbmi->ref_frame[ref]] != ref_sign_bias[this_ref_frame]) {
- mv.as_mv.row *= -1;
- mv.as_mv.col *= -1;
- }
- return mv;
-}
-
-// Checks that the given mi_row, mi_col and search point
-// are inside the borders of the tile.
-static INLINE int is_inside(const TileInfo *const tile, int mi_col, int mi_row,
- const POSITION *mi_pos) {
- return !(mi_row + mi_pos->row < tile->mi_row_start ||
- mi_col + mi_pos->col < tile->mi_col_start ||
- mi_row + mi_pos->row >= tile->mi_row_end ||
- mi_col + mi_pos->col >= tile->mi_col_end);
-}
-
-static INLINE int find_valid_row_offset(const TileInfo *const tile, int mi_row,
- int row_offset) {
- return clamp(row_offset, tile->mi_row_start - mi_row,
- tile->mi_row_end - mi_row - 1);
-}
-
-static INLINE int find_valid_col_offset(const TileInfo *const tile, int mi_col,
- int col_offset) {
- return clamp(col_offset, tile->mi_col_start - mi_col,
- tile->mi_col_end - mi_col - 1);
-}
-
-static INLINE void lower_mv_precision(MV *mv, int allow_hp, int is_integer) {
- if (is_integer) {
- integer_mv_precision(mv);
- } else {
- if (!allow_hp) {
- if (mv->row & 1) mv->row += (mv->row > 0 ? -1 : 1);
- if (mv->col & 1) mv->col += (mv->col > 0 ? -1 : 1);
- }
- }
-}
-
-static INLINE int8_t get_uni_comp_ref_idx(const MV_REFERENCE_FRAME *const rf) {
- // Single ref pred
- if (rf[1] <= INTRA_FRAME) return -1;
-
- // Bi-directional comp ref pred
- if ((rf[0] < BWDREF_FRAME) && (rf[1] >= BWDREF_FRAME)) return -1;
-
- for (int8_t ref_idx = 0; ref_idx < TOTAL_UNIDIR_COMP_REFS; ++ref_idx) {
- if (rf[0] == comp_ref0(ref_idx) && rf[1] == comp_ref1(ref_idx))
- return ref_idx;
- }
- return -1;
-}
-
-static INLINE int8_t av1_ref_frame_type(const MV_REFERENCE_FRAME *const rf) {
- if (rf[1] > INTRA_FRAME) {
- const int8_t uni_comp_ref_idx = get_uni_comp_ref_idx(rf);
- if (uni_comp_ref_idx >= 0) {
- assert((REF_FRAMES + FWD_REFS * BWD_REFS + uni_comp_ref_idx) <
- MODE_CTX_REF_FRAMES);
- return REF_FRAMES + FWD_REFS * BWD_REFS + uni_comp_ref_idx;
- } else {
- return REF_FRAMES + FWD_RF_OFFSET(rf[0]) +
- BWD_RF_OFFSET(rf[1]) * FWD_REFS;
- }
- }
-
- return rf[0];
-}
-
-// clang-format off
-static MV_REFERENCE_FRAME ref_frame_map[TOTAL_COMP_REFS][2] = {
- { LAST_FRAME, BWDREF_FRAME }, { LAST2_FRAME, BWDREF_FRAME },
- { LAST3_FRAME, BWDREF_FRAME }, { GOLDEN_FRAME, BWDREF_FRAME },
-
- { LAST_FRAME, ALTREF2_FRAME }, { LAST2_FRAME, ALTREF2_FRAME },
- { LAST3_FRAME, ALTREF2_FRAME }, { GOLDEN_FRAME, ALTREF2_FRAME },
-
- { LAST_FRAME, ALTREF_FRAME }, { LAST2_FRAME, ALTREF_FRAME },
- { LAST3_FRAME, ALTREF_FRAME }, { GOLDEN_FRAME, ALTREF_FRAME },
-
- { LAST_FRAME, LAST2_FRAME }, { LAST_FRAME, LAST3_FRAME },
- { LAST_FRAME, GOLDEN_FRAME }, { BWDREF_FRAME, ALTREF_FRAME },
-
- // NOTE: Following reference frame pairs are not supported to be explicitly
- // signalled, but they are possibly chosen by the use of skip_mode,
- // which may use the most recent one-sided reference frame pair.
- { LAST2_FRAME, LAST3_FRAME }, { LAST2_FRAME, GOLDEN_FRAME },
- { LAST3_FRAME, GOLDEN_FRAME }, {BWDREF_FRAME, ALTREF2_FRAME},
- { ALTREF2_FRAME, ALTREF_FRAME }
-};
-// clang-format on
-
-static INLINE void av1_set_ref_frame(MV_REFERENCE_FRAME *rf,
- int8_t ref_frame_type) {
- if (ref_frame_type >= REF_FRAMES) {
- rf[0] = ref_frame_map[ref_frame_type - REF_FRAMES][0];
- rf[1] = ref_frame_map[ref_frame_type - REF_FRAMES][1];
- } else {
- rf[0] = ref_frame_type;
- rf[1] = NONE_FRAME;
- assert(ref_frame_type > NONE_FRAME);
- }
-}
-
-static uint16_t compound_mode_ctx_map[3][COMP_NEWMV_CTXS] = {
- { 0, 1, 1, 1, 1 },
- { 1, 2, 3, 4, 4 },
- { 4, 4, 5, 6, 7 },
-};
-
-static INLINE int16_t av1_mode_context_analyzer(
- const int16_t *const mode_context, const MV_REFERENCE_FRAME *const rf) {
- const int8_t ref_frame = av1_ref_frame_type(rf);
-
- if (rf[1] <= INTRA_FRAME) return mode_context[ref_frame];
-
- const int16_t newmv_ctx = mode_context[ref_frame] & NEWMV_CTX_MASK;
- const int16_t refmv_ctx =
- (mode_context[ref_frame] >> REFMV_OFFSET) & REFMV_CTX_MASK;
-
- const int16_t comp_ctx = compound_mode_ctx_map[refmv_ctx >> 1][AOMMIN(
- newmv_ctx, COMP_NEWMV_CTXS - 1)];
- return comp_ctx;
-}
-
-static INLINE uint8_t av1_drl_ctx(const CANDIDATE_MV *ref_mv_stack,
- int ref_idx) {
- if (ref_mv_stack[ref_idx].weight >= REF_CAT_LEVEL &&
- ref_mv_stack[ref_idx + 1].weight >= REF_CAT_LEVEL)
- return 0;
-
- if (ref_mv_stack[ref_idx].weight >= REF_CAT_LEVEL &&
- ref_mv_stack[ref_idx + 1].weight < REF_CAT_LEVEL)
- return 1;
-
- if (ref_mv_stack[ref_idx].weight < REF_CAT_LEVEL &&
- ref_mv_stack[ref_idx + 1].weight < REF_CAT_LEVEL)
- return 2;
-
- return 0;
-}
-
-void av1_setup_frame_buf_refs(AV1_COMMON *cm);
-void av1_setup_frame_sign_bias(AV1_COMMON *cm);
-void av1_setup_skip_mode_allowed(AV1_COMMON *cm);
-void av1_setup_motion_field(AV1_COMMON *cm);
-void av1_set_frame_refs(AV1_COMMON *const cm, int lst_map_idx, int gld_map_idx);
-
-static INLINE void av1_collect_neighbors_ref_counts(MACROBLOCKD *const xd) {
- av1_zero(xd->neighbors_ref_counts);
-
- uint8_t *const ref_counts = xd->neighbors_ref_counts;
-
- const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
- const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
- const int above_in_image = xd->up_available;
- const int left_in_image = xd->left_available;
-
- // Above neighbor
- if (above_in_image && is_inter_block(above_mbmi)) {
- ref_counts[above_mbmi->ref_frame[0]]++;
- if (has_second_ref(above_mbmi)) {
- ref_counts[above_mbmi->ref_frame[1]]++;
- }
- }
-
- // Left neighbor
- if (left_in_image && is_inter_block(left_mbmi)) {
- ref_counts[left_mbmi->ref_frame[0]]++;
- if (has_second_ref(left_mbmi)) {
- ref_counts[left_mbmi->ref_frame[1]]++;
- }
- }
-}
-
-void av1_copy_frame_mvs(const AV1_COMMON *const cm,
- const MB_MODE_INFO *const mi, int mi_row, int mi_col,
- int x_mis, int y_mis);
-
-void av1_find_mv_refs(const AV1_COMMON *cm, const MACROBLOCKD *xd,
- MB_MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
- uint8_t ref_mv_count[MODE_CTX_REF_FRAMES],
- CANDIDATE_MV ref_mv_stack[][MAX_REF_MV_STACK_SIZE],
- int_mv mv_ref_list[][MAX_MV_REF_CANDIDATES],
- int_mv *global_mvs, int mi_row, int mi_col,
- int16_t *mode_context);
-
-// check a list of motion vectors by sad score using a number rows of pixels
-// above and a number cols of pixels in the left to select the one with best
-// score to use as ref motion vector
-void av1_find_best_ref_mvs(int allow_hp, int_mv *mvlist, int_mv *nearest_mv,
- int_mv *near_mv, int is_integer);
-
-int selectSamples(MV *mv, int *pts, int *pts_inref, int len, BLOCK_SIZE bsize);
-int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col,
- int *pts, int *pts_inref);
-
-#define INTRABC_DELAY_PIXELS 256 // Delay of 256 pixels
-#define INTRABC_DELAY_SB64 (INTRABC_DELAY_PIXELS / 64)
-
-static INLINE void av1_find_ref_dv(int_mv *ref_dv, const TileInfo *const tile,
- int mib_size, int mi_row, int mi_col) {
- (void)mi_col;
- if (mi_row - mib_size < tile->mi_row_start) {
- ref_dv->as_mv.row = 0;
- ref_dv->as_mv.col = -MI_SIZE * mib_size - INTRABC_DELAY_PIXELS;
- } else {
- ref_dv->as_mv.row = -MI_SIZE * mib_size;
- ref_dv->as_mv.col = 0;
- }
- ref_dv->as_mv.row *= 8;
- ref_dv->as_mv.col *= 8;
-}
-
-static INLINE int av1_is_dv_valid(const MV dv, const AV1_COMMON *cm,
- const MACROBLOCKD *xd, int mi_row, int mi_col,
- BLOCK_SIZE bsize, int mib_size_log2) {
- const int bw = block_size_wide[bsize];
- const int bh = block_size_high[bsize];
- const int SCALE_PX_TO_MV = 8;
- // Disallow subpixel for now
- // SUBPEL_MASK is not the correct scale
- if (((dv.row & (SCALE_PX_TO_MV - 1)) || (dv.col & (SCALE_PX_TO_MV - 1))))
- return 0;
-
- const TileInfo *const tile = &xd->tile;
- // Is the source top-left inside the current tile?
- const int src_top_edge = mi_row * MI_SIZE * SCALE_PX_TO_MV + dv.row;
- const int tile_top_edge = tile->mi_row_start * MI_SIZE * SCALE_PX_TO_MV;
- if (src_top_edge < tile_top_edge) return 0;
- const int src_left_edge = mi_col * MI_SIZE * SCALE_PX_TO_MV + dv.col;
- const int tile_left_edge = tile->mi_col_start * MI_SIZE * SCALE_PX_TO_MV;
- if (src_left_edge < tile_left_edge) return 0;
- // Is the bottom right inside the current tile?
- const int src_bottom_edge = (mi_row * MI_SIZE + bh) * SCALE_PX_TO_MV + dv.row;
- const int tile_bottom_edge = tile->mi_row_end * MI_SIZE * SCALE_PX_TO_MV;
- if (src_bottom_edge > tile_bottom_edge) return 0;
- const int src_right_edge = (mi_col * MI_SIZE + bw) * SCALE_PX_TO_MV + dv.col;
- const int tile_right_edge = tile->mi_col_end * MI_SIZE * SCALE_PX_TO_MV;
- if (src_right_edge > tile_right_edge) return 0;
-
- // Special case for sub 8x8 chroma cases, to prevent referring to chroma
- // pixels outside current tile.
- for (int plane = 1; plane < av1_num_planes(cm); ++plane) {
- const struct macroblockd_plane *const pd = &xd->plane[plane];
- if (is_chroma_reference(mi_row, mi_col, bsize, pd->subsampling_x,
- pd->subsampling_y)) {
- if (bw < 8 && pd->subsampling_x)
- if (src_left_edge < tile_left_edge + 4 * SCALE_PX_TO_MV) return 0;
- if (bh < 8 && pd->subsampling_y)
- if (src_top_edge < tile_top_edge + 4 * SCALE_PX_TO_MV) return 0;
- }
- }
-
- // Is the bottom right within an already coded SB? Also consider additional
- // constraints to facilitate HW decoder.
- const int max_mib_size = 1 << mib_size_log2;
- const int active_sb_row = mi_row >> mib_size_log2;
- const int active_sb64_col = (mi_col * MI_SIZE) >> 6;
- const int sb_size = max_mib_size * MI_SIZE;
- const int src_sb_row = ((src_bottom_edge >> 3) - 1) / sb_size;
- const int src_sb64_col = ((src_right_edge >> 3) - 1) >> 6;
- const int total_sb64_per_row =
- ((tile->mi_col_end - tile->mi_col_start - 1) >> 4) + 1;
- const int active_sb64 = active_sb_row * total_sb64_per_row + active_sb64_col;
- const int src_sb64 = src_sb_row * total_sb64_per_row + src_sb64_col;
- if (src_sb64 >= active_sb64 - INTRABC_DELAY_SB64) return 0;
-
- // Wavefront constraint: use only top left area of frame for reference.
- const int gradient = 1 + INTRABC_DELAY_SB64 + (sb_size > 64);
- const int wf_offset = gradient * (active_sb_row - src_sb_row);
- if (src_sb_row > active_sb_row ||
- src_sb64_col >= active_sb64_col - INTRABC_DELAY_SB64 + wf_offset)
- return 0;
-
- return 1;
-}
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_COMMON_MVREF_COMMON_H_
diff --git a/third_party/aom/av1/common/obmc.h b/third_party/aom/av1/common/obmc.h
deleted file mode 100644
index 1c90cd93f..000000000
--- a/third_party/aom/av1/common/obmc.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_COMMON_OBMC_H_
-#define AOM_AV1_COMMON_OBMC_H_
-
-typedef void (*overlappable_nb_visitor_t)(MACROBLOCKD *xd, int rel_mi_pos,
- uint8_t nb_mi_size,
- MB_MODE_INFO *nb_mi, void *fun_ctxt,
- const int num_planes);
-
-static INLINE void foreach_overlappable_nb_above(const AV1_COMMON *cm,
- MACROBLOCKD *xd, int mi_col,
- int nb_max,
- overlappable_nb_visitor_t fun,
- void *fun_ctxt) {
- const int num_planes = av1_num_planes(cm);
- if (!xd->up_available) return;
-
- int nb_count = 0;
-
- // prev_row_mi points into the mi array, starting at the beginning of the
- // previous row.
- MB_MODE_INFO **prev_row_mi = xd->mi - mi_col - 1 * xd->mi_stride;
- const int end_col = AOMMIN(mi_col + xd->n4_w, cm->mi_cols);
- uint8_t mi_step;
- for (int above_mi_col = mi_col; above_mi_col < end_col && nb_count < nb_max;
- above_mi_col += mi_step) {
- MB_MODE_INFO **above_mi = prev_row_mi + above_mi_col;
- mi_step =
- AOMMIN(mi_size_wide[above_mi[0]->sb_type], mi_size_wide[BLOCK_64X64]);
- // If we're considering a block with width 4, it should be treated as
- // half of a pair of blocks with chroma information in the second. Move
- // above_mi_col back to the start of the pair if needed, set above_mbmi
- // to point at the block with chroma information, and set mi_step to 2 to
- // step over the entire pair at the end of the iteration.
- if (mi_step == 1) {
- above_mi_col &= ~1;
- above_mi = prev_row_mi + above_mi_col + 1;
- mi_step = 2;
- }
- if (is_neighbor_overlappable(*above_mi)) {
- ++nb_count;
- fun(xd, above_mi_col - mi_col, AOMMIN(xd->n4_w, mi_step), *above_mi,
- fun_ctxt, num_planes);
- }
- }
-}
-
-static INLINE void foreach_overlappable_nb_left(const AV1_COMMON *cm,
- MACROBLOCKD *xd, int mi_row,
- int nb_max,
- overlappable_nb_visitor_t fun,
- void *fun_ctxt) {
- const int num_planes = av1_num_planes(cm);
- if (!xd->left_available) return;
-
- int nb_count = 0;
-
- // prev_col_mi points into the mi array, starting at the top of the
- // previous column
- MB_MODE_INFO **prev_col_mi = xd->mi - 1 - mi_row * xd->mi_stride;
- const int end_row = AOMMIN(mi_row + xd->n4_h, cm->mi_rows);
- uint8_t mi_step;
- for (int left_mi_row = mi_row; left_mi_row < end_row && nb_count < nb_max;
- left_mi_row += mi_step) {
- MB_MODE_INFO **left_mi = prev_col_mi + left_mi_row * xd->mi_stride;
- mi_step =
- AOMMIN(mi_size_high[left_mi[0]->sb_type], mi_size_high[BLOCK_64X64]);
- if (mi_step == 1) {
- left_mi_row &= ~1;
- left_mi = prev_col_mi + (left_mi_row + 1) * xd->mi_stride;
- mi_step = 2;
- }
- if (is_neighbor_overlappable(*left_mi)) {
- ++nb_count;
- fun(xd, left_mi_row - mi_row, AOMMIN(xd->n4_h, mi_step), *left_mi,
- fun_ctxt, num_planes);
- }
- }
-}
-
-#endif // AOM_AV1_COMMON_OBMC_H_
diff --git a/third_party/aom/av1/common/obu_util.c b/third_party/aom/av1/common/obu_util.c
deleted file mode 100644
index 823b700b1..000000000
--- a/third_party/aom/av1/common/obu_util.c
+++ /dev/null
@@ -1,147 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#include "av1/common/obu_util.h"
-
-#include "aom_dsp/bitreader_buffer.h"
-
-// Returns 1 when OBU type is valid, and 0 otherwise.
-static int valid_obu_type(int obu_type) {
- int valid_type = 0;
- switch (obu_type) {
- case OBU_SEQUENCE_HEADER:
- case OBU_TEMPORAL_DELIMITER:
- case OBU_FRAME_HEADER:
- case OBU_TILE_GROUP:
- case OBU_METADATA:
- case OBU_FRAME:
- case OBU_REDUNDANT_FRAME_HEADER:
- case OBU_TILE_LIST:
- case OBU_PADDING: valid_type = 1; break;
- default: break;
- }
- return valid_type;
-}
-
-static aom_codec_err_t read_obu_size(const uint8_t *data,
- size_t bytes_available,
- size_t *const obu_size,
- size_t *const length_field_size) {
- uint64_t u_obu_size = 0;
- if (aom_uleb_decode(data, bytes_available, &u_obu_size, length_field_size) !=
- 0) {
- return AOM_CODEC_CORRUPT_FRAME;
- }
-
- if (u_obu_size > UINT32_MAX) return AOM_CODEC_CORRUPT_FRAME;
- *obu_size = (size_t)u_obu_size;
- return AOM_CODEC_OK;
-}
-
-// Parses OBU header and stores values in 'header'.
-static aom_codec_err_t read_obu_header(struct aom_read_bit_buffer *rb,
- int is_annexb, ObuHeader *header) {
- if (!rb || !header) return AOM_CODEC_INVALID_PARAM;
-
- const ptrdiff_t bit_buffer_byte_length = rb->bit_buffer_end - rb->bit_buffer;
- if (bit_buffer_byte_length < 1) return AOM_CODEC_CORRUPT_FRAME;
-
- header->size = 1;
-
- if (aom_rb_read_bit(rb) != 0) {
- // Forbidden bit. Must not be set.
- return AOM_CODEC_CORRUPT_FRAME;
- }
-
- header->type = (OBU_TYPE)aom_rb_read_literal(rb, 4);
-
- if (!valid_obu_type(header->type)) return AOM_CODEC_CORRUPT_FRAME;
-
- header->has_extension = aom_rb_read_bit(rb);
- header->has_size_field = aom_rb_read_bit(rb);
-
- if (!header->has_size_field && !is_annexb) {
- // section 5 obu streams must have obu_size field set.
- return AOM_CODEC_UNSUP_BITSTREAM;
- }
-
- if (aom_rb_read_bit(rb) != 0) {
- // obu_reserved_1bit must be set to 0.
- return AOM_CODEC_CORRUPT_FRAME;
- }
-
- if (header->has_extension) {
- if (bit_buffer_byte_length == 1) return AOM_CODEC_CORRUPT_FRAME;
-
- header->size += 1;
- header->temporal_layer_id = aom_rb_read_literal(rb, 3);
- header->spatial_layer_id = aom_rb_read_literal(rb, 2);
- if (aom_rb_read_literal(rb, 3) != 0) {
- // extension_header_reserved_3bits must be set to 0.
- return AOM_CODEC_CORRUPT_FRAME;
- }
- }
-
- return AOM_CODEC_OK;
-}
-
-aom_codec_err_t aom_read_obu_header(uint8_t *buffer, size_t buffer_length,
- size_t *consumed, ObuHeader *header,
- int is_annexb) {
- if (buffer_length < 1 || !consumed || !header) return AOM_CODEC_INVALID_PARAM;
-
- // TODO(tomfinegan): Set the error handler here and throughout this file, and
- // confirm parsing work done via aom_read_bit_buffer is successful.
- struct aom_read_bit_buffer rb = { buffer, buffer + buffer_length, 0, NULL,
- NULL };
- aom_codec_err_t parse_result = read_obu_header(&rb, is_annexb, header);
- if (parse_result == AOM_CODEC_OK) *consumed = header->size;
- return parse_result;
-}
-
-aom_codec_err_t aom_read_obu_header_and_size(const uint8_t *data,
- size_t bytes_available,
- int is_annexb,
- ObuHeader *obu_header,
- size_t *const payload_size,
- size_t *const bytes_read) {
- size_t length_field_size = 0, obu_size = 0;
- aom_codec_err_t status;
-
- if (is_annexb) {
- // Size field comes before the OBU header, and includes the OBU header
- status =
- read_obu_size(data, bytes_available, &obu_size, &length_field_size);
-
- if (status != AOM_CODEC_OK) return status;
- }
-
- struct aom_read_bit_buffer rb = { data + length_field_size,
- data + bytes_available, 0, NULL, NULL };
-
- status = read_obu_header(&rb, is_annexb, obu_header);
- if (status != AOM_CODEC_OK) return status;
-
- if (is_annexb) {
- // Derive the payload size from the data we've already read
- if (obu_size < obu_header->size) return AOM_CODEC_CORRUPT_FRAME;
-
- *payload_size = obu_size - obu_header->size;
- } else {
- // Size field comes after the OBU header, and is just the payload size
- status = read_obu_size(data + obu_header->size,
- bytes_available - obu_header->size, payload_size,
- &length_field_size);
- if (status != AOM_CODEC_OK) return status;
- }
-
- *bytes_read = length_field_size + obu_header->size;
- return AOM_CODEC_OK;
-}
diff --git a/third_party/aom/av1/common/obu_util.h b/third_party/aom/av1/common/obu_util.h
deleted file mode 100644
index 7c56904c8..000000000
--- a/third_party/aom/av1/common/obu_util.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#ifndef AOM_AV1_COMMON_OBU_UTIL_H_
-#define AOM_AV1_COMMON_OBU_UTIL_H_
-
-#include "aom/aom_codec.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct {
- size_t size; // Size (1 or 2 bytes) of the OBU header (including the
- // optional OBU extension header) in the bitstream.
- OBU_TYPE type;
- int has_size_field;
- int has_extension;
- // The following fields come from the OBU extension header and therefore are
- // only used if has_extension is true.
- int temporal_layer_id;
- int spatial_layer_id;
-} ObuHeader;
-
-aom_codec_err_t aom_read_obu_header(uint8_t *buffer, size_t buffer_length,
- size_t *consumed, ObuHeader *header,
- int is_annexb);
-
-aom_codec_err_t aom_read_obu_header_and_size(const uint8_t *data,
- size_t bytes_available,
- int is_annexb,
- ObuHeader *obu_header,
- size_t *const payload_size,
- size_t *const bytes_read);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_COMMON_OBU_UTIL_H_
diff --git a/third_party/aom/av1/common/odintrin.c b/third_party/aom/av1/common/odintrin.c
deleted file mode 100644
index 7584b2e52..000000000
--- a/third_party/aom/av1/common/odintrin.c
+++ /dev/null
@@ -1,541 +0,0 @@
-/*
- * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-/* clang-format off */
-
-#include "av1/common/odintrin.h"
-
-/*Constants for use with OD_DIVU_SMALL().
- See \cite{Rob05} for details on computing these constants.
- @INPROCEEDINGS{Rob05,
- author="Arch D. Robison",
- title="{N}-bit Unsigned Division via {N}-bit Multiply-Add",
- booktitle="Proc. of the 17th IEEE Symposium on Computer Arithmetic
- (ARITH'05)",
- pages="131--139",
- address="Cape Cod, MA",
- month=Jun,
- year=2005
- }*/
-uint32_t OD_DIVU_SMALL_CONSTS[OD_DIVU_DMAX][2] = {
- { 0xFFFFFFFF, 0xFFFFFFFF }, { 0xFFFFFFFF, 0xFFFFFFFF },
- { 0xAAAAAAAB, 0 }, { 0xFFFFFFFF, 0xFFFFFFFF },
- { 0xCCCCCCCD, 0 }, { 0xAAAAAAAB, 0 },
- { 0x92492492, 0x92492492 }, { 0xFFFFFFFF, 0xFFFFFFFF },
- { 0xE38E38E4, 0 }, { 0xCCCCCCCD, 0 },
- { 0xBA2E8BA3, 0 }, { 0xAAAAAAAB, 0 },
- { 0x9D89D89E, 0 }, { 0x92492492, 0x92492492 },
- { 0x88888889, 0 }, { 0xFFFFFFFF, 0xFFFFFFFF },
- { 0xF0F0F0F1, 0 }, { 0xE38E38E4, 0 },
- { 0xD79435E5, 0xD79435E5 }, { 0xCCCCCCCD, 0 },
- { 0xC30C30C3, 0xC30C30C3 }, { 0xBA2E8BA3, 0 },
- { 0xB21642C9, 0 }, { 0xAAAAAAAB, 0 },
- { 0xA3D70A3E, 0 }, { 0x9D89D89E, 0 },
- { 0x97B425ED, 0x97B425ED }, { 0x92492492, 0x92492492 },
- { 0x8D3DCB09, 0 }, { 0x88888889, 0 },
- { 0x84210842, 0x84210842 }, { 0xFFFFFFFF, 0xFFFFFFFF },
- { 0xF83E0F84, 0 }, { 0xF0F0F0F1, 0 },
- { 0xEA0EA0EA, 0xEA0EA0EA }, { 0xE38E38E4, 0 },
- { 0xDD67C8A6, 0xDD67C8A6 }, { 0xD79435E5, 0xD79435E5 },
- { 0xD20D20D2, 0xD20D20D2 }, { 0xCCCCCCCD, 0 },
- { 0xC7CE0C7D, 0 }, { 0xC30C30C3, 0xC30C30C3 },
- { 0xBE82FA0C, 0 }, { 0xBA2E8BA3, 0 },
- { 0xB60B60B6, 0xB60B60B6 }, { 0xB21642C9, 0 },
- { 0xAE4C415D, 0 }, { 0xAAAAAAAB, 0 },
- { 0xA72F053A, 0 }, { 0xA3D70A3E, 0 },
- { 0xA0A0A0A1, 0 }, { 0x9D89D89E, 0 },
- { 0x9A90E7D9, 0x9A90E7D9 }, { 0x97B425ED, 0x97B425ED },
- { 0x94F2094F, 0x94F2094F }, { 0x92492492, 0x92492492 },
- { 0x8FB823EE, 0x8FB823EE }, { 0x8D3DCB09, 0 },
- { 0x8AD8F2FC, 0 }, { 0x88888889, 0 },
- { 0x864B8A7E, 0 }, { 0x84210842, 0x84210842 },
- { 0x82082082, 0x82082082 }, { 0xFFFFFFFF, 0xFFFFFFFF },
- { 0xFC0FC0FD, 0 }, { 0xF83E0F84, 0 },
- { 0xF4898D60, 0 }, { 0xF0F0F0F1, 0 },
- { 0xED7303B6, 0 }, { 0xEA0EA0EA, 0xEA0EA0EA },
- { 0xE6C2B449, 0 }, { 0xE38E38E4, 0 },
- { 0xE070381C, 0xE070381C }, { 0xDD67C8A6, 0xDD67C8A6 },
- { 0xDA740DA8, 0 }, { 0xD79435E5, 0xD79435E5 },
- { 0xD4C77B04, 0 }, { 0xD20D20D2, 0xD20D20D2 },
- { 0xCF6474A9, 0 }, { 0xCCCCCCCD, 0 },
- { 0xCA4587E7, 0 }, { 0xC7CE0C7D, 0 },
- { 0xC565C87C, 0 }, { 0xC30C30C3, 0xC30C30C3 },
- { 0xC0C0C0C1, 0 }, { 0xBE82FA0C, 0 },
- { 0xBC52640C, 0 }, { 0xBA2E8BA3, 0 },
- { 0xB81702E1, 0 }, { 0xB60B60B6, 0xB60B60B6 },
- { 0xB40B40B4, 0xB40B40B4 }, { 0xB21642C9, 0 },
- { 0xB02C0B03, 0 }, { 0xAE4C415D, 0 },
- { 0xAC769184, 0xAC769184 }, { 0xAAAAAAAB, 0 },
- { 0xA8E83F57, 0xA8E83F57 }, { 0xA72F053A, 0 },
- { 0xA57EB503, 0 }, { 0xA3D70A3E, 0 },
- { 0xA237C32B, 0xA237C32B }, { 0xA0A0A0A1, 0 },
- { 0x9F1165E7, 0x9F1165E7 }, { 0x9D89D89E, 0 },
- { 0x9C09C09C, 0x9C09C09C }, { 0x9A90E7D9, 0x9A90E7D9 },
- { 0x991F1A51, 0x991F1A51 }, { 0x97B425ED, 0x97B425ED },
- { 0x964FDA6C, 0x964FDA6C }, { 0x94F2094F, 0x94F2094F },
- { 0x939A85C4, 0x939A85C4 }, { 0x92492492, 0x92492492 },
- { 0x90FDBC09, 0x90FDBC09 }, { 0x8FB823EE, 0x8FB823EE },
- { 0x8E78356D, 0x8E78356D }, { 0x8D3DCB09, 0 },
- { 0x8C08C08C, 0x8C08C08C }, { 0x8AD8F2FC, 0 },
- { 0x89AE408A, 0 }, { 0x88888889, 0 },
- { 0x8767AB5F, 0x8767AB5F }, { 0x864B8A7E, 0 },
- { 0x85340853, 0x85340853 }, { 0x84210842, 0x84210842 },
- { 0x83126E98, 0 }, { 0x82082082, 0x82082082 },
- { 0x81020408, 0x81020408 }, { 0xFFFFFFFF, 0xFFFFFFFF },
- { 0xFE03F810, 0 }, { 0xFC0FC0FD, 0 },
- { 0xFA232CF3, 0 }, { 0xF83E0F84, 0 },
- { 0xF6603D99, 0 }, { 0xF4898D60, 0 },
- { 0xF2B9D649, 0 }, { 0xF0F0F0F1, 0 },
- { 0xEF2EB720, 0 }, { 0xED7303B6, 0 },
- { 0xEBBDB2A6, 0 }, { 0xEA0EA0EA, 0xEA0EA0EA },
- { 0xE865AC7C, 0 }, { 0xE6C2B449, 0 },
- { 0xE525982B, 0 }, { 0xE38E38E4, 0 },
- { 0xE1FC780F, 0 }, { 0xE070381C, 0xE070381C },
- { 0xDEE95C4D, 0 }, { 0xDD67C8A6, 0xDD67C8A6 },
- { 0xDBEB61EF, 0 }, { 0xDA740DA8, 0 },
- { 0xD901B204, 0 }, { 0xD79435E5, 0xD79435E5 },
- { 0xD62B80D7, 0 }, { 0xD4C77B04, 0 },
- { 0xD3680D37, 0 }, { 0xD20D20D2, 0xD20D20D2 },
- { 0xD0B69FCC, 0 }, { 0xCF6474A9, 0 },
- { 0xCE168A77, 0xCE168A77 }, { 0xCCCCCCCD, 0 },
- { 0xCB8727C1, 0 }, { 0xCA4587E7, 0 },
- { 0xC907DA4F, 0 }, { 0xC7CE0C7D, 0 },
- { 0xC6980C6A, 0 }, { 0xC565C87C, 0 },
- { 0xC4372F86, 0 }, { 0xC30C30C3, 0xC30C30C3 },
- { 0xC1E4BBD6, 0 }, { 0xC0C0C0C1, 0 },
- { 0xBFA02FE8, 0xBFA02FE8 }, { 0xBE82FA0C, 0 },
- { 0xBD691047, 0xBD691047 }, { 0xBC52640C, 0 },
- { 0xBB3EE722, 0 }, { 0xBA2E8BA3, 0 },
- { 0xB92143FA, 0xB92143FA }, { 0xB81702E1, 0 },
- { 0xB70FBB5A, 0xB70FBB5A }, { 0xB60B60B6, 0xB60B60B6 },
- { 0xB509E68B, 0 }, { 0xB40B40B4, 0xB40B40B4 },
- { 0xB30F6353, 0 }, { 0xB21642C9, 0 },
- { 0xB11FD3B8, 0xB11FD3B8 }, { 0xB02C0B03, 0 },
- { 0xAF3ADDC7, 0 }, { 0xAE4C415D, 0 },
- { 0xAD602B58, 0xAD602B58 }, { 0xAC769184, 0xAC769184 },
- { 0xAB8F69E3, 0 }, { 0xAAAAAAAB, 0 },
- { 0xA9C84A48, 0 }, { 0xA8E83F57, 0xA8E83F57 },
- { 0xA80A80A8, 0xA80A80A8 }, { 0xA72F053A, 0 },
- { 0xA655C439, 0xA655C439 }, { 0xA57EB503, 0 },
- { 0xA4A9CF1E, 0 }, { 0xA3D70A3E, 0 },
- { 0xA3065E40, 0 }, { 0xA237C32B, 0xA237C32B },
- { 0xA16B312F, 0 }, { 0xA0A0A0A1, 0 },
- { 0x9FD809FE, 0 }, { 0x9F1165E7, 0x9F1165E7 },
- { 0x9E4CAD24, 0 }, { 0x9D89D89E, 0 },
- { 0x9CC8E161, 0 }, { 0x9C09C09C, 0x9C09C09C },
- { 0x9B4C6F9F, 0 }, { 0x9A90E7D9, 0x9A90E7D9 },
- { 0x99D722DB, 0 }, { 0x991F1A51, 0x991F1A51 },
- { 0x9868C80A, 0 }, { 0x97B425ED, 0x97B425ED },
- { 0x97012E02, 0x97012E02 }, { 0x964FDA6C, 0x964FDA6C },
- { 0x95A02568, 0x95A02568 }, { 0x94F2094F, 0x94F2094F },
- { 0x94458094, 0x94458094 }, { 0x939A85C4, 0x939A85C4 },
- { 0x92F11384, 0x92F11384 }, { 0x92492492, 0x92492492 },
- { 0x91A2B3C5, 0 }, { 0x90FDBC09, 0x90FDBC09 },
- { 0x905A3863, 0x905A3863 }, { 0x8FB823EE, 0x8FB823EE },
- { 0x8F1779DA, 0 }, { 0x8E78356D, 0x8E78356D },
- { 0x8DDA5202, 0x8DDA5202 }, { 0x8D3DCB09, 0 },
- { 0x8CA29C04, 0x8CA29C04 }, { 0x8C08C08C, 0x8C08C08C },
- { 0x8B70344A, 0x8B70344A }, { 0x8AD8F2FC, 0 },
- { 0x8A42F870, 0x8A42F870 }, { 0x89AE408A, 0 },
- { 0x891AC73B, 0 }, { 0x88888889, 0 },
- { 0x87F78088, 0 }, { 0x8767AB5F, 0x8767AB5F },
- { 0x86D90545, 0 }, { 0x864B8A7E, 0 },
- { 0x85BF3761, 0x85BF3761 }, { 0x85340853, 0x85340853 },
- { 0x84A9F9C8, 0x84A9F9C8 }, { 0x84210842, 0x84210842 },
- { 0x83993052, 0x83993052 }, { 0x83126E98, 0 },
- { 0x828CBFBF, 0 }, { 0x82082082, 0x82082082 },
- { 0x81848DA9, 0 }, { 0x81020408, 0x81020408 },
- { 0x80808081, 0 }, { 0xFFFFFFFF, 0xFFFFFFFF },
- { 0xFF00FF01, 0 }, { 0xFE03F810, 0 },
- { 0xFD08E551, 0 }, { 0xFC0FC0FD, 0 },
- { 0xFB188566, 0 }, { 0xFA232CF3, 0 },
- { 0xF92FB222, 0 }, { 0xF83E0F84, 0 },
- { 0xF74E3FC3, 0 }, { 0xF6603D99, 0 },
- { 0xF57403D6, 0 }, { 0xF4898D60, 0 },
- { 0xF3A0D52D, 0 }, { 0xF2B9D649, 0 },
- { 0xF1D48BCF, 0 }, { 0xF0F0F0F1, 0 },
- { 0xF00F00F0, 0xF00F00F0 }, { 0xEF2EB720, 0 },
- { 0xEE500EE5, 0xEE500EE5 }, { 0xED7303B6, 0 },
- { 0xEC979119, 0 }, { 0xEBBDB2A6, 0 },
- { 0xEAE56404, 0 }, { 0xEA0EA0EA, 0xEA0EA0EA },
- { 0xE9396520, 0 }, { 0xE865AC7C, 0 },
- { 0xE79372E3, 0 }, { 0xE6C2B449, 0 },
- { 0xE5F36CB0, 0xE5F36CB0 }, { 0xE525982B, 0 },
- { 0xE45932D8, 0 }, { 0xE38E38E4, 0 },
- { 0xE2C4A689, 0 }, { 0xE1FC780F, 0 },
- { 0xE135A9CA, 0 }, { 0xE070381C, 0xE070381C },
- { 0xDFAC1F75, 0 }, { 0xDEE95C4D, 0 },
- { 0xDE27EB2D, 0 }, { 0xDD67C8A6, 0xDD67C8A6 },
- { 0xDCA8F159, 0 }, { 0xDBEB61EF, 0 },
- { 0xDB2F171E, 0 }, { 0xDA740DA8, 0 },
- { 0xD9BA4257, 0 }, { 0xD901B204, 0 },
- { 0xD84A598F, 0 }, { 0xD79435E5, 0xD79435E5 },
- { 0xD6DF43FD, 0 }, { 0xD62B80D7, 0 },
- { 0xD578E97D, 0 }, { 0xD4C77B04, 0 },
- { 0xD417328A, 0 }, { 0xD3680D37, 0 },
- { 0xD2BA083C, 0 }, { 0xD20D20D2, 0xD20D20D2 },
- { 0xD161543E, 0xD161543E }, { 0xD0B69FCC, 0 },
- { 0xD00D00D0, 0xD00D00D0 }, { 0xCF6474A9, 0 },
- { 0xCEBCF8BC, 0 }, { 0xCE168A77, 0xCE168A77 },
- { 0xCD712753, 0 }, { 0xCCCCCCCD, 0 },
- { 0xCC29786D, 0 }, { 0xCB8727C1, 0 },
- { 0xCAE5D85F, 0xCAE5D85F }, { 0xCA4587E7, 0 },
- { 0xC9A633FD, 0 }, { 0xC907DA4F, 0 },
- { 0xC86A7890, 0xC86A7890 }, { 0xC7CE0C7D, 0 },
- { 0xC73293D8, 0 }, { 0xC6980C6A, 0 },
- { 0xC5FE7403, 0xC5FE7403 }, { 0xC565C87C, 0 },
- { 0xC4CE07B0, 0xC4CE07B0 }, { 0xC4372F86, 0 },
- { 0xC3A13DE6, 0xC3A13DE6 }, { 0xC30C30C3, 0xC30C30C3 },
- { 0xC2780614, 0 }, { 0xC1E4BBD6, 0 },
- { 0xC152500C, 0xC152500C }, { 0xC0C0C0C1, 0 },
- { 0xC0300C03, 0xC0300C03 }, { 0xBFA02FE8, 0xBFA02FE8 },
- { 0xBF112A8B, 0 }, { 0xBE82FA0C, 0 },
- { 0xBDF59C92, 0 }, { 0xBD691047, 0xBD691047 },
- { 0xBCDD535E, 0 }, { 0xBC52640C, 0 },
- { 0xBBC8408D, 0 }, { 0xBB3EE722, 0 },
- { 0xBAB65610, 0xBAB65610 }, { 0xBA2E8BA3, 0 },
- { 0xB9A7862A, 0xB9A7862A }, { 0xB92143FA, 0xB92143FA },
- { 0xB89BC36D, 0 }, { 0xB81702E1, 0 },
- { 0xB79300B8, 0 }, { 0xB70FBB5A, 0xB70FBB5A },
- { 0xB68D3134, 0xB68D3134 }, { 0xB60B60B6, 0xB60B60B6 },
- { 0xB58A4855, 0xB58A4855 }, { 0xB509E68B, 0 },
- { 0xB48A39D4, 0xB48A39D4 }, { 0xB40B40B4, 0xB40B40B4 },
- { 0xB38CF9B0, 0xB38CF9B0 }, { 0xB30F6353, 0 },
- { 0xB2927C2A, 0 }, { 0xB21642C9, 0 },
- { 0xB19AB5C5, 0 }, { 0xB11FD3B8, 0xB11FD3B8 },
- { 0xB0A59B42, 0 }, { 0xB02C0B03, 0 },
- { 0xAFB321A1, 0xAFB321A1 }, { 0xAF3ADDC7, 0 },
- { 0xAEC33E20, 0 }, { 0xAE4C415D, 0 },
- { 0xADD5E632, 0xADD5E632 }, { 0xAD602B58, 0xAD602B58 },
- { 0xACEB0F89, 0xACEB0F89 }, { 0xAC769184, 0xAC769184 },
- { 0xAC02B00B, 0 }, { 0xAB8F69E3, 0 },
- { 0xAB1CBDD4, 0 }, { 0xAAAAAAAB, 0 },
- { 0xAA392F36, 0 }, { 0xA9C84A48, 0 },
- { 0xA957FAB5, 0xA957FAB5 }, { 0xA8E83F57, 0xA8E83F57 },
- { 0xA8791709, 0 }, { 0xA80A80A8, 0xA80A80A8 },
- { 0xA79C7B17, 0 }, { 0xA72F053A, 0 },
- { 0xA6C21DF7, 0 }, { 0xA655C439, 0xA655C439 },
- { 0xA5E9F6ED, 0xA5E9F6ED }, { 0xA57EB503, 0 },
- { 0xA513FD6C, 0 }, { 0xA4A9CF1E, 0 },
- { 0xA4402910, 0xA4402910 }, { 0xA3D70A3E, 0 },
- { 0xA36E71A3, 0 }, { 0xA3065E40, 0 },
- { 0xA29ECF16, 0xA29ECF16 }, { 0xA237C32B, 0xA237C32B },
- { 0xA1D13986, 0 }, { 0xA16B312F, 0 },
- { 0xA105A933, 0 }, { 0xA0A0A0A1, 0 },
- { 0xA03C1689, 0 }, { 0x9FD809FE, 0 },
- { 0x9F747A15, 0x9F747A15 }, { 0x9F1165E7, 0x9F1165E7 },
- { 0x9EAECC8D, 0x9EAECC8D }, { 0x9E4CAD24, 0 },
- { 0x9DEB06C9, 0x9DEB06C9 }, { 0x9D89D89E, 0 },
- { 0x9D2921C4, 0 }, { 0x9CC8E161, 0 },
- { 0x9C69169B, 0x9C69169B }, { 0x9C09C09C, 0x9C09C09C },
- { 0x9BAADE8E, 0x9BAADE8E }, { 0x9B4C6F9F, 0 },
- { 0x9AEE72FD, 0 }, { 0x9A90E7D9, 0x9A90E7D9 },
- { 0x9A33CD67, 0x9A33CD67 }, { 0x99D722DB, 0 },
- { 0x997AE76B, 0x997AE76B }, { 0x991F1A51, 0x991F1A51 },
- { 0x98C3BAC7, 0x98C3BAC7 }, { 0x9868C80A, 0 },
- { 0x980E4156, 0x980E4156 }, { 0x97B425ED, 0x97B425ED },
- { 0x975A7510, 0 }, { 0x97012E02, 0x97012E02 },
- { 0x96A8500A, 0 }, { 0x964FDA6C, 0x964FDA6C },
- { 0x95F7CC73, 0 }, { 0x95A02568, 0x95A02568 },
- { 0x9548E498, 0 }, { 0x94F2094F, 0x94F2094F },
- { 0x949B92DE, 0 }, { 0x94458094, 0x94458094 },
- { 0x93EFD1C5, 0x93EFD1C5 }, { 0x939A85C4, 0x939A85C4 },
- { 0x93459BE7, 0 }, { 0x92F11384, 0x92F11384 },
- { 0x929CEBF5, 0 }, { 0x92492492, 0x92492492 },
- { 0x91F5BCB9, 0 }, { 0x91A2B3C5, 0 },
- { 0x91500915, 0x91500915 }, { 0x90FDBC09, 0x90FDBC09 },
- { 0x90ABCC02, 0x90ABCC02 }, { 0x905A3863, 0x905A3863 },
- { 0x90090090, 0x90090090 }, { 0x8FB823EE, 0x8FB823EE },
- { 0x8F67A1E4, 0 }, { 0x8F1779DA, 0 },
- { 0x8EC7AB3A, 0 }, { 0x8E78356D, 0x8E78356D },
- { 0x8E2917E1, 0 }, { 0x8DDA5202, 0x8DDA5202 },
- { 0x8D8BE340, 0 }, { 0x8D3DCB09, 0 },
- { 0x8CF008CF, 0x8CF008CF }, { 0x8CA29C04, 0x8CA29C04 },
- { 0x8C55841D, 0 }, { 0x8C08C08C, 0x8C08C08C },
- { 0x8BBC50C9, 0 }, { 0x8B70344A, 0x8B70344A },
- { 0x8B246A88, 0 }, { 0x8AD8F2FC, 0 },
- { 0x8A8DCD20, 0 }, { 0x8A42F870, 0x8A42F870 },
- { 0x89F8746A, 0 }, { 0x89AE408A, 0 },
- { 0x89645C4F, 0x89645C4F }, { 0x891AC73B, 0 },
- { 0x88D180CD, 0x88D180CD }, { 0x88888889, 0 },
- { 0x883FDDF0, 0x883FDDF0 }, { 0x87F78088, 0 },
- { 0x87AF6FD6, 0 }, { 0x8767AB5F, 0x8767AB5F },
- { 0x872032AC, 0x872032AC }, { 0x86D90545, 0 },
- { 0x869222B2, 0 }, { 0x864B8A7E, 0 },
- { 0x86053C34, 0x86053C34 }, { 0x85BF3761, 0x85BF3761 },
- { 0x85797B91, 0x85797B91 }, { 0x85340853, 0x85340853 },
- { 0x84EEDD36, 0 }, { 0x84A9F9C8, 0x84A9F9C8 },
- { 0x84655D9C, 0 }, { 0x84210842, 0x84210842 },
- { 0x83DCF94E, 0 }, { 0x83993052, 0x83993052 },
- { 0x8355ACE4, 0 }, { 0x83126E98, 0 },
- { 0x82CF7504, 0 }, { 0x828CBFBF, 0 },
- { 0x824A4E61, 0 }, { 0x82082082, 0x82082082 },
- { 0x81C635BC, 0x81C635BC }, { 0x81848DA9, 0 },
- { 0x814327E4, 0 }, { 0x81020408, 0x81020408 },
- { 0x80C121B3, 0 }, { 0x80808081, 0 },
- { 0x80402010, 0x80402010 }, { 0xFFFFFFFF, 0xFFFFFFFF },
- { 0xFF803FE1, 0 }, { 0xFF00FF01, 0 },
- { 0xFE823CA6, 0 }, { 0xFE03F810, 0 },
- { 0xFD863087, 0 }, { 0xFD08E551, 0 },
- { 0xFC8C15B5, 0 }, { 0xFC0FC0FD, 0 },
- { 0xFB93E673, 0 }, { 0xFB188566, 0 },
- { 0xFA9D9D20, 0 }, { 0xFA232CF3, 0 },
- { 0xF9A9342D, 0 }, { 0xF92FB222, 0 },
- { 0xF8B6A622, 0xF8B6A622 }, { 0xF83E0F84, 0 },
- { 0xF7C5ED9D, 0 }, { 0xF74E3FC3, 0 },
- { 0xF6D7054E, 0 }, { 0xF6603D99, 0 },
- { 0xF5E9E7FD, 0 }, { 0xF57403D6, 0 },
- { 0xF4FE9083, 0 }, { 0xF4898D60, 0 },
- { 0xF414F9CE, 0 }, { 0xF3A0D52D, 0 },
- { 0xF32D1EE0, 0 }, { 0xF2B9D649, 0 },
- { 0xF246FACC, 0 }, { 0xF1D48BCF, 0 },
- { 0xF16288B9, 0 }, { 0xF0F0F0F1, 0 },
- { 0xF07FC3E0, 0xF07FC3E0 }, { 0xF00F00F0, 0xF00F00F0 },
- { 0xEF9EA78C, 0 }, { 0xEF2EB720, 0 },
- { 0xEEBF2F19, 0 }, { 0xEE500EE5, 0xEE500EE5 },
- { 0xEDE155F4, 0 }, { 0xED7303B6, 0 },
- { 0xED05179C, 0xED05179C }, { 0xEC979119, 0 },
- { 0xEC2A6FA0, 0xEC2A6FA0 }, { 0xEBBDB2A6, 0 },
- { 0xEB5159A0, 0 }, { 0xEAE56404, 0 },
- { 0xEA79D14A, 0 }, { 0xEA0EA0EA, 0xEA0EA0EA },
- { 0xE9A3D25E, 0xE9A3D25E }, { 0xE9396520, 0 },
- { 0xE8CF58AB, 0 }, { 0xE865AC7C, 0 },
- { 0xE7FC600F, 0 }, { 0xE79372E3, 0 },
- { 0xE72AE476, 0 }, { 0xE6C2B449, 0 },
- { 0xE65AE1DC, 0 }, { 0xE5F36CB0, 0xE5F36CB0 },
- { 0xE58C544A, 0 }, { 0xE525982B, 0 },
- { 0xE4BF37D9, 0 }, { 0xE45932D8, 0 },
- { 0xE3F388AF, 0 }, { 0xE38E38E4, 0 },
- { 0xE32942FF, 0 }, { 0xE2C4A689, 0 },
- { 0xE260630B, 0 }, { 0xE1FC780F, 0 },
- { 0xE198E520, 0 }, { 0xE135A9CA, 0 },
- { 0xE0D2C59A, 0 }, { 0xE070381C, 0xE070381C },
- { 0xE00E00E0, 0xE00E00E0 }, { 0xDFAC1F75, 0 },
- { 0xDF4A9369, 0 }, { 0xDEE95C4D, 0 },
- { 0xDE8879B3, 0 }, { 0xDE27EB2D, 0 },
- { 0xDDC7B04D, 0 }, { 0xDD67C8A6, 0xDD67C8A6 },
- { 0xDD0833CE, 0 }, { 0xDCA8F159, 0 },
- { 0xDC4A00DD, 0 }, { 0xDBEB61EF, 0 },
- { 0xDB8D1428, 0 }, { 0xDB2F171E, 0 },
- { 0xDAD16A6B, 0 }, { 0xDA740DA8, 0 },
- { 0xDA17006D, 0xDA17006D }, { 0xD9BA4257, 0 },
- { 0xD95DD300, 0 }, { 0xD901B204, 0 },
- { 0xD8A5DEFF, 0 }, { 0xD84A598F, 0 },
- { 0xD7EF2152, 0 }, { 0xD79435E5, 0xD79435E5 },
- { 0xD73996E9, 0 }, { 0xD6DF43FD, 0 },
- { 0xD6853CC1, 0 }, { 0xD62B80D7, 0 },
- { 0xD5D20FDF, 0 }, { 0xD578E97D, 0 },
- { 0xD5200D52, 0xD5200D52 }, { 0xD4C77B04, 0 },
- { 0xD46F3235, 0 }, { 0xD417328A, 0 },
- { 0xD3BF7BA9, 0 }, { 0xD3680D37, 0 },
- { 0xD310E6DB, 0 }, { 0xD2BA083C, 0 },
- { 0xD2637101, 0 }, { 0xD20D20D2, 0xD20D20D2 },
- { 0xD1B71759, 0 }, { 0xD161543E, 0xD161543E },
- { 0xD10BD72C, 0 }, { 0xD0B69FCC, 0 },
- { 0xD061ADCA, 0 }, { 0xD00D00D0, 0xD00D00D0 },
- { 0xCFB8988C, 0 }, { 0xCF6474A9, 0 },
- { 0xCF1094D4, 0 }, { 0xCEBCF8BC, 0 },
- { 0xCE69A00D, 0 }, { 0xCE168A77, 0xCE168A77 },
- { 0xCDC3B7A9, 0xCDC3B7A9 }, { 0xCD712753, 0 },
- { 0xCD1ED924, 0 }, { 0xCCCCCCCD, 0 },
- { 0xCC7B0200, 0 }, { 0xCC29786D, 0 },
- { 0xCBD82FC7, 0 }, { 0xCB8727C1, 0 },
- { 0xCB36600D, 0 }, { 0xCAE5D85F, 0xCAE5D85F },
- { 0xCA95906C, 0 }, { 0xCA4587E7, 0 },
- { 0xC9F5BE86, 0 }, { 0xC9A633FD, 0 },
- { 0xC956E803, 0xC956E803 }, { 0xC907DA4F, 0 },
- { 0xC8B90A96, 0 }, { 0xC86A7890, 0xC86A7890 },
- { 0xC81C23F5, 0xC81C23F5 }, { 0xC7CE0C7D, 0 },
- { 0xC78031E0, 0xC78031E0 }, { 0xC73293D8, 0 },
- { 0xC6E5321D, 0 }, { 0xC6980C6A, 0 },
- { 0xC64B2278, 0xC64B2278 }, { 0xC5FE7403, 0xC5FE7403 },
- { 0xC5B200C6, 0 }, { 0xC565C87C, 0 },
- { 0xC519CAE0, 0xC519CAE0 }, { 0xC4CE07B0, 0xC4CE07B0 },
- { 0xC4827EA8, 0xC4827EA8 }, { 0xC4372F86, 0 },
- { 0xC3EC1A06, 0 }, { 0xC3A13DE6, 0xC3A13DE6 },
- { 0xC3569AE6, 0 }, { 0xC30C30C3, 0xC30C30C3 },
- { 0xC2C1FF3E, 0 }, { 0xC2780614, 0 },
- { 0xC22E4507, 0 }, { 0xC1E4BBD6, 0 },
- { 0xC19B6A42, 0 }, { 0xC152500C, 0xC152500C },
- { 0xC1096CF6, 0 }, { 0xC0C0C0C1, 0 },
- { 0xC0784B2F, 0 }, { 0xC0300C03, 0xC0300C03 },
- { 0xBFE80300, 0 }, { 0xBFA02FE8, 0xBFA02FE8 },
- { 0xBF589280, 0 }, { 0xBF112A8B, 0 },
- { 0xBEC9F7CE, 0 }, { 0xBE82FA0C, 0 },
- { 0xBE3C310C, 0 }, { 0xBDF59C92, 0 },
- { 0xBDAF3C64, 0 }, { 0xBD691047, 0xBD691047 },
- { 0xBD231803, 0 }, { 0xBCDD535E, 0 },
- { 0xBC97C21E, 0xBC97C21E }, { 0xBC52640C, 0 },
- { 0xBC0D38EE, 0xBC0D38EE }, { 0xBBC8408D, 0 },
- { 0xBB837AB1, 0 }, { 0xBB3EE722, 0 },
- { 0xBAFA85A9, 0xBAFA85A9 }, { 0xBAB65610, 0xBAB65610 },
- { 0xBA725820, 0xBA725820 }, { 0xBA2E8BA3, 0 },
- { 0xB9EAF063, 0 }, { 0xB9A7862A, 0xB9A7862A },
- { 0xB9644CC4, 0 }, { 0xB92143FA, 0xB92143FA },
- { 0xB8DE6B9A, 0 }, { 0xB89BC36D, 0 },
- { 0xB8594B41, 0 }, { 0xB81702E1, 0 },
- { 0xB7D4EA19, 0xB7D4EA19 }, { 0xB79300B8, 0 },
- { 0xB7514689, 0 }, { 0xB70FBB5A, 0xB70FBB5A },
- { 0xB6CE5EF9, 0xB6CE5EF9 }, { 0xB68D3134, 0xB68D3134 },
- { 0xB64C31D9, 0 }, { 0xB60B60B6, 0xB60B60B6 },
- { 0xB5CABD9B, 0 }, { 0xB58A4855, 0xB58A4855 },
- { 0xB54A00B5, 0xB54A00B5 }, { 0xB509E68B, 0 },
- { 0xB4C9F9A5, 0 }, { 0xB48A39D4, 0xB48A39D4 },
- { 0xB44AA6E9, 0xB44AA6E9 }, { 0xB40B40B4, 0xB40B40B4 },
- { 0xB3CC0706, 0 }, { 0xB38CF9B0, 0xB38CF9B0 },
- { 0xB34E1884, 0 }, { 0xB30F6353, 0 },
- { 0xB2D0D9EF, 0 }, { 0xB2927C2A, 0 },
- { 0xB25449D7, 0 }, { 0xB21642C9, 0 },
- { 0xB1D866D1, 0xB1D866D1 }, { 0xB19AB5C5, 0 },
- { 0xB15D2F76, 0 }, { 0xB11FD3B8, 0xB11FD3B8 },
- { 0xB0E2A260, 0xB0E2A260 }, { 0xB0A59B42, 0 },
- { 0xB068BE31, 0 }, { 0xB02C0B03, 0 },
- { 0xAFEF818C, 0 }, { 0xAFB321A1, 0xAFB321A1 },
- { 0xAF76EB19, 0 }, { 0xAF3ADDC7, 0 },
- { 0xAEFEF982, 0 }, { 0xAEC33E20, 0 },
- { 0xAE87AB76, 0xAE87AB76 }, { 0xAE4C415D, 0 },
- { 0xAE10FFA9, 0 }, { 0xADD5E632, 0xADD5E632 },
- { 0xAD9AF4D0, 0 }, { 0xAD602B58, 0xAD602B58 },
- { 0xAD2589A4, 0 }, { 0xACEB0F89, 0xACEB0F89 },
- { 0xACB0BCE1, 0xACB0BCE1 }, { 0xAC769184, 0xAC769184 },
- { 0xAC3C8D4A, 0 }, { 0xAC02B00B, 0 },
- { 0xABC8F9A0, 0xABC8F9A0 }, { 0xAB8F69E3, 0 },
- { 0xAB5600AC, 0 }, { 0xAB1CBDD4, 0 },
- { 0xAAE3A136, 0 }, { 0xAAAAAAAB, 0 },
- { 0xAA71DA0D, 0 }, { 0xAA392F36, 0 },
- { 0xAA00AA01, 0 }, { 0xA9C84A48, 0 },
- { 0xA9900FE6, 0 }, { 0xA957FAB5, 0xA957FAB5 },
- { 0xA9200A92, 0xA9200A92 }, { 0xA8E83F57, 0xA8E83F57 },
- { 0xA8B098E0, 0xA8B098E0 }, { 0xA8791709, 0 },
- { 0xA841B9AD, 0 }, { 0xA80A80A8, 0xA80A80A8 },
- { 0xA7D36BD8, 0 }, { 0xA79C7B17, 0 },
- { 0xA765AE44, 0 }, { 0xA72F053A, 0 },
- { 0xA6F87FD6, 0xA6F87FD6 }, { 0xA6C21DF7, 0 },
- { 0xA68BDF79, 0 }, { 0xA655C439, 0xA655C439 },
- { 0xA61FCC16, 0xA61FCC16 }, { 0xA5E9F6ED, 0xA5E9F6ED },
- { 0xA5B4449D, 0 }, { 0xA57EB503, 0 },
- { 0xA54947FE, 0 }, { 0xA513FD6C, 0 },
- { 0xA4DED52C, 0xA4DED52C }, { 0xA4A9CF1E, 0 },
- { 0xA474EB1F, 0xA474EB1F }, { 0xA4402910, 0xA4402910 },
- { 0xA40B88D0, 0 }, { 0xA3D70A3E, 0 },
- { 0xA3A2AD39, 0xA3A2AD39 }, { 0xA36E71A3, 0 },
- { 0xA33A575A, 0xA33A575A }, { 0xA3065E40, 0 },
- { 0xA2D28634, 0 }, { 0xA29ECF16, 0xA29ECF16 },
- { 0xA26B38C9, 0 }, { 0xA237C32B, 0xA237C32B },
- { 0xA2046E1F, 0xA2046E1F }, { 0xA1D13986, 0 },
- { 0xA19E2540, 0 }, { 0xA16B312F, 0 },
- { 0xA1385D35, 0 }, { 0xA105A933, 0 },
- { 0xA0D3150C, 0 }, { 0xA0A0A0A1, 0 },
- { 0xA06E4BD4, 0xA06E4BD4 }, { 0xA03C1689, 0 },
- { 0xA00A00A0, 0xA00A00A0 }, { 0x9FD809FE, 0 },
- { 0x9FA63284, 0 }, { 0x9F747A15, 0x9F747A15 },
- { 0x9F42E095, 0x9F42E095 }, { 0x9F1165E7, 0x9F1165E7 },
- { 0x9EE009EE, 0x9EE009EE }, { 0x9EAECC8D, 0x9EAECC8D },
- { 0x9E7DADA9, 0 }, { 0x9E4CAD24, 0 },
- { 0x9E1BCAE3, 0 }, { 0x9DEB06C9, 0x9DEB06C9 },
- { 0x9DBA60BB, 0x9DBA60BB }, { 0x9D89D89E, 0 },
- { 0x9D596E54, 0x9D596E54 }, { 0x9D2921C4, 0 },
- { 0x9CF8F2D1, 0x9CF8F2D1 }, { 0x9CC8E161, 0 },
- { 0x9C98ED58, 0 }, { 0x9C69169B, 0x9C69169B },
- { 0x9C395D10, 0x9C395D10 }, { 0x9C09C09C, 0x9C09C09C },
- { 0x9BDA4124, 0x9BDA4124 }, { 0x9BAADE8E, 0x9BAADE8E },
- { 0x9B7B98C0, 0 }, { 0x9B4C6F9F, 0 },
- { 0x9B1D6311, 0x9B1D6311 }, { 0x9AEE72FD, 0 },
- { 0x9ABF9F48, 0x9ABF9F48 }, { 0x9A90E7D9, 0x9A90E7D9 },
- { 0x9A624C97, 0 }, { 0x9A33CD67, 0x9A33CD67 },
- { 0x9A056A31, 0 }, { 0x99D722DB, 0 },
- { 0x99A8F74C, 0 }, { 0x997AE76B, 0x997AE76B },
- { 0x994CF320, 0x994CF320 }, { 0x991F1A51, 0x991F1A51 },
- { 0x98F15CE7, 0 }, { 0x98C3BAC7, 0x98C3BAC7 },
- { 0x989633DB, 0x989633DB }, { 0x9868C80A, 0 },
- { 0x983B773B, 0 }, { 0x980E4156, 0x980E4156 },
- { 0x97E12644, 0x97E12644 }, { 0x97B425ED, 0x97B425ED },
- { 0x97874039, 0 }, { 0x975A7510, 0 },
- { 0x972DC45B, 0 }, { 0x97012E02, 0x97012E02 },
- { 0x96D4B1EF, 0 }, { 0x96A8500A, 0 },
- { 0x967C083B, 0 }, { 0x964FDA6C, 0x964FDA6C },
- { 0x9623C686, 0x9623C686 }, { 0x95F7CC73, 0 },
- { 0x95CBEC1B, 0 }, { 0x95A02568, 0x95A02568 },
- { 0x95747844, 0 }, { 0x9548E498, 0 },
- { 0x951D6A4E, 0 }, { 0x94F2094F, 0x94F2094F },
- { 0x94C6C187, 0 }, { 0x949B92DE, 0 },
- { 0x94707D3F, 0 }, { 0x94458094, 0x94458094 },
- { 0x941A9CC8, 0x941A9CC8 }, { 0x93EFD1C5, 0x93EFD1C5 },
- { 0x93C51F76, 0 }, { 0x939A85C4, 0x939A85C4 },
- { 0x9370049C, 0 }, { 0x93459BE7, 0 },
- { 0x931B4B91, 0 }, { 0x92F11384, 0x92F11384 },
- { 0x92C6F3AC, 0x92C6F3AC }, { 0x929CEBF5, 0 },
- { 0x9272FC48, 0x9272FC48 }, { 0x92492492, 0x92492492 },
- { 0x921F64BF, 0 }, { 0x91F5BCB9, 0 },
- { 0x91CC2C6C, 0x91CC2C6C }, { 0x91A2B3C5, 0 },
- { 0x917952AF, 0 }, { 0x91500915, 0x91500915 },
- { 0x9126D6E5, 0 }, { 0x90FDBC09, 0x90FDBC09 },
- { 0x90D4B86F, 0 }, { 0x90ABCC02, 0x90ABCC02 },
- { 0x9082F6B0, 0 }, { 0x905A3863, 0x905A3863 },
- { 0x9031910A, 0 }, { 0x90090090, 0x90090090 },
- { 0x8FE086E3, 0 }, { 0x8FB823EE, 0x8FB823EE },
- { 0x8F8FD7A0, 0 }, { 0x8F67A1E4, 0 },
- { 0x8F3F82A8, 0x8F3F82A8 }, { 0x8F1779DA, 0 },
- { 0x8EEF8766, 0 }, { 0x8EC7AB3A, 0 },
- { 0x8E9FE542, 0x8E9FE542 }, { 0x8E78356D, 0x8E78356D },
- { 0x8E509BA8, 0x8E509BA8 }, { 0x8E2917E1, 0 },
- { 0x8E01AA05, 0 }, { 0x8DDA5202, 0x8DDA5202 },
- { 0x8DB30FC6, 0x8DB30FC6 }, { 0x8D8BE340, 0 },
- { 0x8D64CC5C, 0 }, { 0x8D3DCB09, 0 },
- { 0x8D16DF35, 0x8D16DF35 }, { 0x8CF008CF, 0x8CF008CF },
- { 0x8CC947C5, 0 }, { 0x8CA29C04, 0x8CA29C04 },
- { 0x8C7C057D, 0 }, { 0x8C55841D, 0 },
- { 0x8C2F17D2, 0x8C2F17D2 }, { 0x8C08C08C, 0x8C08C08C },
- { 0x8BE27E39, 0x8BE27E39 }, { 0x8BBC50C9, 0 },
- { 0x8B963829, 0x8B963829 }, { 0x8B70344A, 0x8B70344A },
- { 0x8B4A451A, 0 }, { 0x8B246A88, 0 },
- { 0x8AFEA483, 0x8AFEA483 }, { 0x8AD8F2FC, 0 },
- { 0x8AB355E0, 0x8AB355E0 }, { 0x8A8DCD20, 0 },
- { 0x8A6858AB, 0 }, { 0x8A42F870, 0x8A42F870 },
- { 0x8A1DAC60, 0x8A1DAC60 }, { 0x89F8746A, 0 },
- { 0x89D3507D, 0 }, { 0x89AE408A, 0 },
- { 0x89894480, 0 }, { 0x89645C4F, 0x89645C4F },
- { 0x893F87E8, 0x893F87E8 }, { 0x891AC73B, 0 },
- { 0x88F61A37, 0x88F61A37 }, { 0x88D180CD, 0x88D180CD },
- { 0x88ACFAEE, 0 }, { 0x88888889, 0 },
- { 0x8864298F, 0 }, { 0x883FDDF0, 0x883FDDF0 },
- { 0x881BA59E, 0 }, { 0x87F78088, 0 },
- { 0x87D36EA0, 0 }, { 0x87AF6FD6, 0 },
- { 0x878B841B, 0 }, { 0x8767AB5F, 0x8767AB5F },
- { 0x8743E595, 0 }, { 0x872032AC, 0x872032AC },
- { 0x86FC9296, 0x86FC9296 }, { 0x86D90545, 0 },
- { 0x86B58AA8, 0 }, { 0x869222B2, 0 },
- { 0x866ECD53, 0x866ECD53 }, { 0x864B8A7E, 0 },
- { 0x86285A23, 0x86285A23 }, { 0x86053C34, 0x86053C34 },
- { 0x85E230A3, 0x85E230A3 }, { 0x85BF3761, 0x85BF3761 },
- { 0x859C5060, 0x859C5060 }, { 0x85797B91, 0x85797B91 },
- { 0x8556B8E7, 0x8556B8E7 }, { 0x85340853, 0x85340853 },
- { 0x851169C7, 0x851169C7 }, { 0x84EEDD36, 0 },
- { 0x84CC6290, 0 }, { 0x84A9F9C8, 0x84A9F9C8 },
- { 0x8487A2D1, 0 }, { 0x84655D9C, 0 },
- { 0x84432A1B, 0x84432A1B }, { 0x84210842, 0x84210842 },
- { 0x83FEF802, 0x83FEF802 }, { 0x83DCF94E, 0 },
- { 0x83BB0C18, 0 }, { 0x83993052, 0x83993052 },
- { 0x837765F0, 0x837765F0 }, { 0x8355ACE4, 0 },
- { 0x83340520, 0x83340520 }, { 0x83126E98, 0 },
- { 0x82F0E93D, 0x82F0E93D }, { 0x82CF7504, 0 },
- { 0x82AE11DE, 0 }, { 0x828CBFBF, 0 },
- { 0x826B7E99, 0x826B7E99 }, { 0x824A4E61, 0 },
- { 0x82292F08, 0 }, { 0x82082082, 0x82082082 },
- { 0x81E722C2, 0x81E722C2 }, { 0x81C635BC, 0x81C635BC },
- { 0x81A55963, 0 }, { 0x81848DA9, 0 },
- { 0x8163D283, 0 }, { 0x814327E4, 0 },
- { 0x81228DBF, 0 }, { 0x81020408, 0x81020408 },
- { 0x80E18AB3, 0 }, { 0x80C121B3, 0 },
- { 0x80A0C8FB, 0x80A0C8FB }, { 0x80808081, 0 },
- { 0x80604836, 0x80604836 }, { 0x80402010, 0x80402010 },
- { 0x80200802, 0x80200802 }, { 0xFFFFFFFF, 0xFFFFFFFF }
-};
diff --git a/third_party/aom/av1/common/odintrin.h b/third_party/aom/av1/common/odintrin.h
deleted file mode 100644
index e1db0f44d..000000000
--- a/third_party/aom/av1/common/odintrin.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-/* clang-format off */
-
-#ifndef AOM_AV1_COMMON_ODINTRIN_H_
-#define AOM_AV1_COMMON_ODINTRIN_H_
-
-#include <stdlib.h>
-#include <string.h>
-
-#include "aom/aom_integer.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_ports/bitops.h"
-#include "av1/common/enums.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef int od_coeff;
-
-#define OD_DIVU_DMAX (1024)
-
-extern uint32_t OD_DIVU_SMALL_CONSTS[OD_DIVU_DMAX][2];
-
-/*Translate unsigned division by small divisors into multiplications.*/
-#define OD_DIVU_SMALL(_x, _d) \
- ((uint32_t)((OD_DIVU_SMALL_CONSTS[(_d)-1][0] * (uint64_t)(_x) + \
- OD_DIVU_SMALL_CONSTS[(_d)-1][1]) >> \
- 32) >> \
- (OD_ILOG_NZ(_d) - 1))
-
-#define OD_DIVU(_x, _d) \
- (((_d) < OD_DIVU_DMAX) ? (OD_DIVU_SMALL((_x), (_d))) : ((_x) / (_d)))
-
-#define OD_MINI AOMMIN
-#define OD_MAXI AOMMAX
-#define OD_CLAMPI(min, val, max) (OD_MAXI(min, OD_MINI(val, max)))
-
-/*Integer logarithm (base 2) of a nonzero unsigned 32-bit integer.
- OD_ILOG_NZ(x) = (int)floor(log2(x)) + 1.*/
-#define OD_ILOG_NZ(x) (1 + get_msb(x))
-
-/*Enable special features for gcc and compatible compilers.*/
-#if defined(__GNUC__) && defined(__GNUC_MINOR__) && defined(__GNUC_PATCHLEVEL__)
-#define OD_GNUC_PREREQ(maj, min, pat) \
- ((__GNUC__ << 16) + (__GNUC_MINOR__ << 8) + __GNUC_PATCHLEVEL__ >= \
- ((maj) << 16) + ((min) << 8) + pat) // NOLINT
-#else
-#define OD_GNUC_PREREQ(maj, min, pat) (0)
-#endif
-
-#if OD_GNUC_PREREQ(3, 4, 0)
-#define OD_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__))
-#else
-#define OD_WARN_UNUSED_RESULT
-#endif
-
-#if OD_GNUC_PREREQ(3, 4, 0)
-#define OD_ARG_NONNULL(x) __attribute__((__nonnull__(x)))
-#else
-#define OD_ARG_NONNULL(x)
-#endif
-
-/** Copy n elements of memory from src to dst. The 0* term provides
- compile-time type checking */
-#if !defined(OVERRIDE_OD_COPY)
-#define OD_COPY(dst, src, n) \
- (memcpy((dst), (src), sizeof(*(dst)) * (n) + 0 * ((dst) - (src))))
-#endif
-
-/** Copy n elements of memory from src to dst, allowing overlapping regions.
- The 0* term provides compile-time type checking */
-#if !defined(OVERRIDE_OD_MOVE)
-# define OD_MOVE(dst, src, n) \
- (memmove((dst), (src), sizeof(*(dst))*(n) + 0*((dst) - (src)) ))
-#endif
-
-/*All of these macros should expect floats as arguments.*/
-# define OD_SIGNMASK(a) (-((a) < 0))
-# define OD_FLIPSIGNI(a, b) (((a) + OD_SIGNMASK(b)) ^ OD_SIGNMASK(b))
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_COMMON_ODINTRIN_H_
diff --git a/third_party/aom/av1/common/onyxc_int.h b/third_party/aom/av1/common/onyxc_int.h
deleted file mode 100644
index ff011c89e..000000000
--- a/third_party/aom/av1/common/onyxc_int.h
+++ /dev/null
@@ -1,1342 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_COMMON_ONYXC_INT_H_
-#define AOM_AV1_COMMON_ONYXC_INT_H_
-
-#include "config/aom_config.h"
-#include "config/av1_rtcd.h"
-
-#include "aom/internal/aom_codec_internal.h"
-#include "aom_util/aom_thread.h"
-#include "av1/common/alloccommon.h"
-#include "av1/common/av1_loopfilter.h"
-#include "av1/common/entropy.h"
-#include "av1/common/entropymode.h"
-#include "av1/common/entropymv.h"
-#include "av1/common/enums.h"
-#include "av1/common/frame_buffers.h"
-#include "av1/common/mv.h"
-#include "av1/common/quant_common.h"
-#include "av1/common/restoration.h"
-#include "av1/common/tile_common.h"
-#include "av1/common/timing.h"
-#include "av1/common/odintrin.h"
-#include "av1/encoder/hash_motion.h"
-#include "aom_dsp/grain_synthesis.h"
-#include "aom_dsp/grain_table.h"
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if defined(__clang__) && defined(__has_warning)
-#if __has_feature(cxx_attributes) && __has_warning("-Wimplicit-fallthrough")
-#define AOM_FALLTHROUGH_INTENDED [[clang::fallthrough]] // NOLINT
-#endif
-#elif defined(__GNUC__) && __GNUC__ >= 7
-#define AOM_FALLTHROUGH_INTENDED __attribute__((fallthrough)) // NOLINT
-#endif
-
-#ifndef AOM_FALLTHROUGH_INTENDED
-#define AOM_FALLTHROUGH_INTENDED \
- do { \
- } while (0)
-#endif
-
-#define CDEF_MAX_STRENGTHS 16
-
-/* Constant values while waiting for the sequence header */
-#define FRAME_ID_LENGTH 15
-#define DELTA_FRAME_ID_LENGTH 14
-
-#define FRAME_CONTEXTS (FRAME_BUFFERS + 1)
-// Extra frame context which is always kept at default values
-#define FRAME_CONTEXT_DEFAULTS (FRAME_CONTEXTS - 1)
-#define PRIMARY_REF_BITS 3
-#define PRIMARY_REF_NONE 7
-
-#define NUM_PING_PONG_BUFFERS 2
-
-#define MAX_NUM_TEMPORAL_LAYERS 8
-#define MAX_NUM_SPATIAL_LAYERS 4
-/* clang-format off */
-// clang-format seems to think this is a pointer dereference and not a
-// multiplication.
-#define MAX_NUM_OPERATING_POINTS \
- MAX_NUM_TEMPORAL_LAYERS * MAX_NUM_SPATIAL_LAYERS
-/* clang-format on*/
-
-// TODO(jingning): Turning this on to set up transform coefficient
-// processing timer.
-#define TXCOEFF_TIMER 0
-#define TXCOEFF_COST_TIMER 0
-
-typedef enum {
- SINGLE_REFERENCE = 0,
- COMPOUND_REFERENCE = 1,
- REFERENCE_MODE_SELECT = 2,
- REFERENCE_MODES = 3,
-} REFERENCE_MODE;
-
-typedef enum {
- /**
- * Frame context updates are disabled
- */
- REFRESH_FRAME_CONTEXT_DISABLED,
- /**
- * Update frame context to values resulting from backward probability
- * updates based on entropy/counts in the decoded frame
- */
- REFRESH_FRAME_CONTEXT_BACKWARD,
-} REFRESH_FRAME_CONTEXT_MODE;
-
-#define MFMV_STACK_SIZE 3
-typedef struct {
- int_mv mfmv0;
- uint8_t ref_frame_offset;
-} TPL_MV_REF;
-
-typedef struct {
- int_mv mv;
- MV_REFERENCE_FRAME ref_frame;
-} MV_REF;
-
-typedef struct {
- int ref_count;
-
- unsigned int cur_frame_offset;
- unsigned int ref_frame_offset[INTER_REFS_PER_FRAME];
-
- MV_REF *mvs;
- uint8_t *seg_map;
- struct segmentation seg;
- int mi_rows;
- int mi_cols;
- // Width and height give the size of the buffer (before any upscaling, unlike
- // the sizes that can be derived from the buf structure)
- int width;
- int height;
- WarpedMotionParams global_motion[REF_FRAMES];
- int showable_frame; // frame can be used as show existing frame in future
- int film_grain_params_present;
- aom_film_grain_t film_grain_params;
- aom_codec_frame_buffer_t raw_frame_buffer;
- YV12_BUFFER_CONFIG buf;
- hash_table hash_table;
- uint8_t intra_only;
- FRAME_TYPE frame_type;
- // The Following variables will only be used in frame parallel decode.
-
- // frame_worker_owner indicates which FrameWorker owns this buffer. NULL means
- // that no FrameWorker owns, or is decoding, this buffer.
- AVxWorker *frame_worker_owner;
-
- // row and col indicate which position frame has been decoded to in real
- // pixel unit. They are reset to -1 when decoding begins and set to INT_MAX
- // when the frame is fully decoded.
- int row;
- int col;
-
- // Inter frame reference frame delta for loop filter
- int8_t ref_deltas[REF_FRAMES];
-
- // 0 = ZERO_MV, MV
- int8_t mode_deltas[MAX_MODE_LF_DELTAS];
-} RefCntBuffer;
-
-typedef struct BufferPool {
-// Protect BufferPool from being accessed by several FrameWorkers at
-// the same time during frame parallel decode.
-// TODO(hkuang): Try to use atomic variable instead of locking the whole pool.
-#if CONFIG_MULTITHREAD
- pthread_mutex_t pool_mutex;
-#endif
-
- // Private data associated with the frame buffer callbacks.
- void *cb_priv;
-
- aom_get_frame_buffer_cb_fn_t get_fb_cb;
- aom_release_frame_buffer_cb_fn_t release_fb_cb;
-
- RefCntBuffer frame_bufs[FRAME_BUFFERS];
-
- // Frame buffers allocated internally by the codec.
- InternalFrameBufferList int_frame_buffers;
-} BufferPool;
-
-typedef struct {
- int base_ctx_table[2 /*row*/][2 /*col*/][3 /*sig_map*/]
- [BASE_CONTEXT_POSITION_NUM + 1];
-} LV_MAP_CTX_TABLE;
-typedef int BASE_CTX_TABLE[2 /*col*/][3 /*sig_map*/]
- [BASE_CONTEXT_POSITION_NUM + 1];
-
-typedef struct BitstreamLevel {
- uint8_t major;
- uint8_t minor;
-} BitstreamLevel;
-
-// Sequence header structure.
-// Note: All syntax elements of sequence_header_obu that need to be
-// bit-identical across multiple sequence headers must be part of this struct,
-// so that consistency is checked by are_seq_headers_consistent() function.
-typedef struct SequenceHeader {
- int num_bits_width;
- int num_bits_height;
- int max_frame_width;
- int max_frame_height;
- int frame_id_numbers_present_flag;
- int frame_id_length;
- int delta_frame_id_length;
- BLOCK_SIZE sb_size; // Size of the superblock used for this frame
- int mib_size; // Size of the superblock in units of MI blocks
- int mib_size_log2; // Log 2 of above.
- int order_hint_bits_minus_1;
- int force_screen_content_tools; // 0 - force off
- // 1 - force on
- // 2 - adaptive
- int force_integer_mv; // 0 - Not to force. MV can be in 1/4 or 1/8
- // 1 - force to integer
- // 2 - adaptive
- int still_picture; // Video is a single frame still picture
- int reduced_still_picture_hdr; // Use reduced header for still picture
- int enable_filter_intra; // enables/disables filterintra
- int enable_intra_edge_filter; // enables/disables corner/edge/upsampling
- int enable_interintra_compound; // enables/disables interintra_compound
- int enable_masked_compound; // enables/disables masked compound
- int enable_dual_filter; // 0 - disable dual interpolation filter
- // 1 - enable vert/horiz filter selection
- int enable_order_hint; // 0 - disable order hint, and related tools
- // jnt_comp, ref_frame_mvs, frame_sign_bias
- // if 0, enable_jnt_comp and
- // enable_ref_frame_mvs must be set zs 0.
- int enable_jnt_comp; // 0 - disable joint compound modes
- // 1 - enable it
- int enable_ref_frame_mvs; // 0 - disable ref frame mvs
- // 1 - enable it
- int enable_warped_motion; // 0 - disable warped motion for sequence
- // 1 - enable it for the sequence
- int enable_superres; // 0 - Disable superres for the sequence, and disable
- // transmitting per-frame superres enabled flag.
- // 1 - Enable superres for the sequence, and also
- // enable per-frame flag to denote if superres is
- // enabled for that frame.
- int enable_cdef; // To turn on/off CDEF
- int enable_restoration; // To turn on/off loop restoration
- BITSTREAM_PROFILE profile;
-
- // Operating point info.
- int operating_points_cnt_minus_1;
- int operating_point_idc[MAX_NUM_OPERATING_POINTS];
- int display_model_info_present_flag;
- int decoder_model_info_present_flag;
- BitstreamLevel level[MAX_NUM_OPERATING_POINTS];
- uint8_t tier[MAX_NUM_OPERATING_POINTS]; // seq_tier in the spec. One bit: 0
- // or 1.
-
- // Color config.
- aom_bit_depth_t bit_depth; // AOM_BITS_8 in profile 0 or 1,
- // AOM_BITS_10 or AOM_BITS_12 in profile 2 or 3.
- int use_highbitdepth; // If true, we need to use 16bit frame buffers.
- int monochrome; // Monochorme video
- aom_color_primaries_t color_primaries;
- aom_transfer_characteristics_t transfer_characteristics;
- aom_matrix_coefficients_t matrix_coefficients;
- int color_range;
- int subsampling_x; // Chroma subsampling for x
- int subsampling_y; // Chroma subsampling for y
- aom_chroma_sample_position_t chroma_sample_position;
- int separate_uv_delta_q;
-
- int film_grain_params_present;
-} SequenceHeader;
-
-typedef struct AV1Common {
- struct aom_internal_error_info error;
- int width;
- int height;
- int render_width;
- int render_height;
- int last_width;
- int last_height;
- int timing_info_present;
- aom_timing_info_t timing_info;
- int buffer_removal_time_present;
- aom_dec_model_info_t buffer_model;
- aom_dec_model_op_parameters_t op_params[MAX_NUM_OPERATING_POINTS + 1];
- aom_op_timing_info_t op_frame_timing[MAX_NUM_OPERATING_POINTS + 1];
- uint32_t frame_presentation_time;
-
- int largest_tile_id;
- size_t largest_tile_size;
- int context_update_tile_id;
-
- // Scale of the current frame with respect to itself.
- struct scale_factors sf_identity;
-
- YV12_BUFFER_CONFIG *frame_to_show;
- RefCntBuffer *prev_frame;
-
- // TODO(hkuang): Combine this with cur_buf in macroblockd.
- RefCntBuffer *cur_frame;
-
- int ref_frame_map[REF_FRAMES]; /* maps fb_idx to reference slot */
-
- // Prepare ref_frame_map for the next frame.
- // Only used in frame parallel decode.
- int next_ref_frame_map[REF_FRAMES];
-
- // TODO(jkoleszar): could expand active_ref_idx to 4, with 0 as intra, and
- // roll new_fb_idx into it.
-
- // Each Inter frame can reference INTER_REFS_PER_FRAME buffers
- RefBuffer frame_refs[INTER_REFS_PER_FRAME];
- int is_skip_mode_allowed;
- int skip_mode_flag;
- int ref_frame_idx_0;
- int ref_frame_idx_1;
-
- int new_fb_idx;
-
- FRAME_TYPE last_frame_type; /* last frame's frame type for motion search.*/
- FRAME_TYPE frame_type;
-
- int show_frame;
- int showable_frame; // frame can be used as show existing frame in future
- int last_show_frame;
- int show_existing_frame;
- // Flag for a frame used as a reference - not written to the bitstream
- int is_reference_frame;
- int reset_decoder_state;
-
- // Flag signaling that the frame is encoded using only INTRA modes.
- uint8_t intra_only;
- uint8_t last_intra_only;
- uint8_t disable_cdf_update;
- int allow_high_precision_mv;
- int cur_frame_force_integer_mv; // 0 the default in AOM, 1 only integer
-
- int allow_screen_content_tools;
- int allow_intrabc;
- int allow_warped_motion;
-
- // MBs, mb_rows/cols is in 16-pixel units; mi_rows/cols is in
- // MB_MODE_INFO (8-pixel) units.
- int MBs;
- int mb_rows, mi_rows;
- int mb_cols, mi_cols;
- int mi_stride;
-
- /* profile settings */
- TX_MODE tx_mode;
-
-#if CONFIG_ENTROPY_STATS
- int coef_cdf_category;
-#endif
-
- int base_qindex;
- int y_dc_delta_q;
- int u_dc_delta_q;
- int v_dc_delta_q;
- int u_ac_delta_q;
- int v_ac_delta_q;
-
- // The dequantizers below are true dequntizers used only in the
- // dequantization process. They have the same coefficient
- // shift/scale as TX.
- int16_t y_dequant_QTX[MAX_SEGMENTS][2];
- int16_t u_dequant_QTX[MAX_SEGMENTS][2];
- int16_t v_dequant_QTX[MAX_SEGMENTS][2];
-
- // Global quant matrix tables
- const qm_val_t *giqmatrix[NUM_QM_LEVELS][3][TX_SIZES_ALL];
- const qm_val_t *gqmatrix[NUM_QM_LEVELS][3][TX_SIZES_ALL];
-
- // Local quant matrix tables for each frame
- const qm_val_t *y_iqmatrix[MAX_SEGMENTS][TX_SIZES_ALL];
- const qm_val_t *u_iqmatrix[MAX_SEGMENTS][TX_SIZES_ALL];
- const qm_val_t *v_iqmatrix[MAX_SEGMENTS][TX_SIZES_ALL];
-
- // Encoder
- int using_qmatrix;
- int qm_y;
- int qm_u;
- int qm_v;
- int min_qmlevel;
- int max_qmlevel;
-
- /* We allocate a MB_MODE_INFO struct for each macroblock, together with
- an extra row on top and column on the left to simplify prediction. */
- int mi_alloc_size;
- MB_MODE_INFO *mip; /* Base of allocated array */
- MB_MODE_INFO *mi; /* Corresponds to upper left visible macroblock */
-
- // TODO(agrange): Move prev_mi into encoder structure.
- // prev_mip and prev_mi will only be allocated in encoder.
- MB_MODE_INFO *prev_mip; /* MB_MODE_INFO array 'mip' from last decoded frame */
- MB_MODE_INFO *prev_mi; /* 'mi' from last frame (points into prev_mip) */
-
- // Separate mi functions between encoder and decoder.
- int (*alloc_mi)(struct AV1Common *cm, int mi_size);
- void (*free_mi)(struct AV1Common *cm);
- void (*setup_mi)(struct AV1Common *cm);
-
- // Grid of pointers to 8x8 MB_MODE_INFO structs. Any 8x8 not in the visible
- // area will be NULL.
- MB_MODE_INFO **mi_grid_base;
- MB_MODE_INFO **mi_grid_visible;
- MB_MODE_INFO **prev_mi_grid_base;
- MB_MODE_INFO **prev_mi_grid_visible;
-
- // Whether to use previous frames' motion vectors for prediction.
- int allow_ref_frame_mvs;
-
- uint8_t *last_frame_seg_map;
- uint8_t *current_frame_seg_map;
- int seg_map_alloc_size;
-
- InterpFilter interp_filter;
-
- int switchable_motion_mode;
-
- loop_filter_info_n lf_info;
- // The denominator of the superres scale; the numerator is fixed.
- uint8_t superres_scale_denominator;
- int superres_upscaled_width;
- int superres_upscaled_height;
- RestorationInfo rst_info[MAX_MB_PLANE];
-
- // rst_end_stripe[i] is one more than the index of the bottom stripe
- // for tile row i.
- int rst_end_stripe[MAX_TILE_ROWS];
-
- // Pointer to a scratch buffer used by self-guided restoration
- int32_t *rst_tmpbuf;
- RestorationLineBuffers *rlbs;
-
- // Output of loop restoration
- YV12_BUFFER_CONFIG rst_frame;
-
- // Flag signaling how frame contexts should be updated at the end of
- // a frame decode
- REFRESH_FRAME_CONTEXT_MODE refresh_frame_context;
-
- int ref_frame_sign_bias[REF_FRAMES]; /* Two state 0, 1 */
-
- struct loopfilter lf;
- struct segmentation seg;
- int coded_lossless; // frame is fully lossless at the coded resolution.
- int all_lossless; // frame is fully lossless at the upscaled resolution.
-
- int reduced_tx_set_used;
-
- // Context probabilities for reference frame prediction
- MV_REFERENCE_FRAME comp_fwd_ref[FWD_REFS];
- MV_REFERENCE_FRAME comp_bwd_ref[BWD_REFS];
- REFERENCE_MODE reference_mode;
-
- FRAME_CONTEXT *fc; /* this frame entropy */
- FRAME_CONTEXT *frame_contexts; // FRAME_CONTEXTS
- unsigned int frame_context_idx; /* Context to use/update */
- int fb_of_context_type[REF_FRAMES];
- int primary_ref_frame;
-
- unsigned int frame_offset;
-
- unsigned int current_video_frame;
-
- aom_bit_depth_t dequant_bit_depth; // bit_depth of current dequantizer
-
- int error_resilient_mode;
- int force_primary_ref_none;
-
- int tile_cols, tile_rows;
- int last_tile_cols, last_tile_rows;
-
- int max_tile_width_sb;
- int min_log2_tile_cols;
- int max_log2_tile_cols;
- int max_log2_tile_rows;
- int min_log2_tile_rows;
- int min_log2_tiles;
- int max_tile_height_sb;
- int uniform_tile_spacing_flag;
- int log2_tile_cols; // only valid for uniform tiles
- int log2_tile_rows; // only valid for uniform tiles
- int tile_col_start_sb[MAX_TILE_COLS + 1]; // valid for 0 <= i <= tile_cols
- int tile_row_start_sb[MAX_TILE_ROWS + 1]; // valid for 0 <= i <= tile_rows
- int tile_width, tile_height; // In MI units
-
- unsigned int large_scale_tile;
- unsigned int single_tile_decoding;
-
- int byte_alignment;
- int skip_loop_filter;
- int skip_film_grain;
-
- // Private data associated with the frame buffer callbacks.
- void *cb_priv;
- aom_get_frame_buffer_cb_fn_t get_fb_cb;
- aom_release_frame_buffer_cb_fn_t release_fb_cb;
-
- // Handles memory for the codec.
- InternalFrameBufferList int_frame_buffers;
-
- // External BufferPool passed from outside.
- BufferPool *buffer_pool;
-
- PARTITION_CONTEXT **above_seg_context;
- ENTROPY_CONTEXT **above_context[MAX_MB_PLANE];
- TXFM_CONTEXT **above_txfm_context;
- WarpedMotionParams global_motion[REF_FRAMES];
- aom_film_grain_t film_grain_params;
-
- int cdef_pri_damping;
- int cdef_sec_damping;
- int nb_cdef_strengths;
- int cdef_strengths[CDEF_MAX_STRENGTHS];
- int cdef_uv_strengths[CDEF_MAX_STRENGTHS];
- int cdef_bits;
-
- int delta_q_present_flag;
- // Resolution of delta quant
- int delta_q_res;
- int delta_lf_present_flag;
- // Resolution of delta lf level
- int delta_lf_res;
- // This is a flag for number of deltas of loop filter level
- // 0: use 1 delta, for y_vertical, y_horizontal, u, and v
- // 1: use separate deltas for each filter level
- int delta_lf_multi;
- int num_tg;
- SequenceHeader seq_params;
- int current_frame_id;
- int ref_frame_id[REF_FRAMES];
- int valid_for_referencing[REF_FRAMES];
- int invalid_delta_frame_id_minus_1;
- LV_MAP_CTX_TABLE coeff_ctx_table;
- TPL_MV_REF *tpl_mvs;
- int tpl_mvs_mem_size;
- // TODO(jingning): This can be combined with sign_bias later.
- int8_t ref_frame_side[REF_FRAMES];
-
- int is_annexb;
-
- int frame_refs_short_signaling;
- int temporal_layer_id;
- int spatial_layer_id;
- unsigned int number_temporal_layers;
- unsigned int number_spatial_layers;
- int num_allocated_above_context_mi_col;
- int num_allocated_above_contexts;
- int num_allocated_above_context_planes;
-
-#if TXCOEFF_TIMER
- int64_t cum_txcoeff_timer;
- int64_t txcoeff_timer;
- int txb_count;
-#endif
-
-#if TXCOEFF_COST_TIMER
- int64_t cum_txcoeff_cost_timer;
- int64_t txcoeff_cost_timer;
- int64_t txcoeff_cost_count;
-#endif
- const cfg_options_t *options;
-} AV1_COMMON;
-
-// TODO(hkuang): Don't need to lock the whole pool after implementing atomic
-// frame reference count.
-static void lock_buffer_pool(BufferPool *const pool) {
-#if CONFIG_MULTITHREAD
- pthread_mutex_lock(&pool->pool_mutex);
-#else
- (void)pool;
-#endif
-}
-
-static void unlock_buffer_pool(BufferPool *const pool) {
-#if CONFIG_MULTITHREAD
- pthread_mutex_unlock(&pool->pool_mutex);
-#else
- (void)pool;
-#endif
-}
-
-static INLINE YV12_BUFFER_CONFIG *get_ref_frame(AV1_COMMON *cm, int index) {
- if (index < 0 || index >= REF_FRAMES) return NULL;
- if (cm->ref_frame_map[index] < 0) return NULL;
- assert(cm->ref_frame_map[index] < FRAME_BUFFERS);
- return &cm->buffer_pool->frame_bufs[cm->ref_frame_map[index]].buf;
-}
-
-static INLINE YV12_BUFFER_CONFIG *get_frame_new_buffer(
- const AV1_COMMON *const cm) {
- return &cm->buffer_pool->frame_bufs[cm->new_fb_idx].buf;
-}
-
-static INLINE int get_free_fb(AV1_COMMON *cm) {
- RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
- int i;
-
- lock_buffer_pool(cm->buffer_pool);
- for (i = 0; i < FRAME_BUFFERS; ++i)
- if (frame_bufs[i].ref_count == 0) break;
-
- if (i != FRAME_BUFFERS) {
- if (frame_bufs[i].buf.use_external_reference_buffers) {
- // If this frame buffer's y_buffer, u_buffer, and v_buffer point to the
- // external reference buffers. Restore the buffer pointers to point to the
- // internally allocated memory.
- YV12_BUFFER_CONFIG *ybf = &frame_bufs[i].buf;
- ybf->y_buffer = ybf->store_buf_adr[0];
- ybf->u_buffer = ybf->store_buf_adr[1];
- ybf->v_buffer = ybf->store_buf_adr[2];
- ybf->use_external_reference_buffers = 0;
- }
-
- frame_bufs[i].ref_count = 1;
- } else {
- // Reset i to be INVALID_IDX to indicate no free buffer found.
- i = INVALID_IDX;
- }
-
- unlock_buffer_pool(cm->buffer_pool);
- return i;
-}
-
-static INLINE void ref_cnt_fb(RefCntBuffer *bufs, int *idx, int new_idx) {
- const int ref_index = *idx;
-
- if (ref_index >= 0 && bufs[ref_index].ref_count > 0)
- bufs[ref_index].ref_count--;
-
- *idx = new_idx;
-
- bufs[new_idx].ref_count++;
-}
-
-static INLINE int frame_is_intra_only(const AV1_COMMON *const cm) {
- return cm->frame_type == KEY_FRAME || cm->intra_only;
-}
-
-static INLINE int frame_is_sframe(const AV1_COMMON *cm) {
- return cm->frame_type == S_FRAME;
-}
-
-static INLINE RefCntBuffer *get_prev_frame(const AV1_COMMON *const cm) {
- if (cm->primary_ref_frame == PRIMARY_REF_NONE ||
- cm->frame_refs[cm->primary_ref_frame].idx == INVALID_IDX) {
- return NULL;
- } else {
- return &cm->buffer_pool
- ->frame_bufs[cm->frame_refs[cm->primary_ref_frame].idx];
- }
-}
-
-// Returns 1 if this frame might allow mvs from some reference frame.
-static INLINE int frame_might_allow_ref_frame_mvs(const AV1_COMMON *cm) {
- return !cm->error_resilient_mode && cm->seq_params.enable_ref_frame_mvs &&
- cm->seq_params.enable_order_hint && !frame_is_intra_only(cm);
-}
-
-// Returns 1 if this frame might use warped_motion
-static INLINE int frame_might_allow_warped_motion(const AV1_COMMON *cm) {
- return !cm->error_resilient_mode && !frame_is_intra_only(cm) &&
- cm->seq_params.enable_warped_motion;
-}
-
-static INLINE void ensure_mv_buffer(RefCntBuffer *buf, AV1_COMMON *cm) {
- const int buf_rows = buf->mi_rows;
- const int buf_cols = buf->mi_cols;
-
- if (buf->mvs == NULL || buf_rows != cm->mi_rows || buf_cols != cm->mi_cols) {
- aom_free(buf->mvs);
- buf->mi_rows = cm->mi_rows;
- buf->mi_cols = cm->mi_cols;
- CHECK_MEM_ERROR(cm, buf->mvs,
- (MV_REF *)aom_calloc(
- ((cm->mi_rows + 1) >> 1) * ((cm->mi_cols + 1) >> 1),
- sizeof(*buf->mvs)));
- aom_free(buf->seg_map);
- CHECK_MEM_ERROR(cm, buf->seg_map,
- (uint8_t *)aom_calloc(cm->mi_rows * cm->mi_cols,
- sizeof(*buf->seg_map)));
- }
-
- const int mem_size =
- ((cm->mi_rows + MAX_MIB_SIZE) >> 1) * (cm->mi_stride >> 1);
- int realloc = cm->tpl_mvs == NULL;
- if (cm->tpl_mvs) realloc |= cm->tpl_mvs_mem_size < mem_size;
-
- if (realloc) {
- aom_free(cm->tpl_mvs);
- CHECK_MEM_ERROR(cm, cm->tpl_mvs,
- (TPL_MV_REF *)aom_calloc(mem_size, sizeof(*cm->tpl_mvs)));
- cm->tpl_mvs_mem_size = mem_size;
- }
-}
-
-void cfl_init(CFL_CTX *cfl, const SequenceHeader *seq_params);
-
-static INLINE int av1_num_planes(const AV1_COMMON *cm) {
- return cm->seq_params.monochrome ? 1 : MAX_MB_PLANE;
-}
-
-static INLINE void av1_init_above_context(AV1_COMMON *cm, MACROBLOCKD *xd,
- const int tile_row) {
- const int num_planes = av1_num_planes(cm);
- for (int i = 0; i < num_planes; ++i) {
- xd->above_context[i] = cm->above_context[i][tile_row];
- }
- xd->above_seg_context = cm->above_seg_context[tile_row];
- xd->above_txfm_context = cm->above_txfm_context[tile_row];
-}
-
-static INLINE void av1_init_macroblockd(AV1_COMMON *cm, MACROBLOCKD *xd,
- tran_low_t *dqcoeff) {
- const int num_planes = av1_num_planes(cm);
- for (int i = 0; i < num_planes; ++i) {
- xd->plane[i].dqcoeff = dqcoeff;
-
- if (xd->plane[i].plane_type == PLANE_TYPE_Y) {
- memcpy(xd->plane[i].seg_dequant_QTX, cm->y_dequant_QTX,
- sizeof(cm->y_dequant_QTX));
- memcpy(xd->plane[i].seg_iqmatrix, cm->y_iqmatrix, sizeof(cm->y_iqmatrix));
-
- } else {
- if (i == AOM_PLANE_U) {
- memcpy(xd->plane[i].seg_dequant_QTX, cm->u_dequant_QTX,
- sizeof(cm->u_dequant_QTX));
- memcpy(xd->plane[i].seg_iqmatrix, cm->u_iqmatrix,
- sizeof(cm->u_iqmatrix));
- } else {
- memcpy(xd->plane[i].seg_dequant_QTX, cm->v_dequant_QTX,
- sizeof(cm->v_dequant_QTX));
- memcpy(xd->plane[i].seg_iqmatrix, cm->v_iqmatrix,
- sizeof(cm->v_iqmatrix));
- }
- }
- }
- xd->mi_stride = cm->mi_stride;
- xd->error_info = &cm->error;
- cfl_init(&xd->cfl, &cm->seq_params);
-}
-
-static INLINE void set_skip_context(MACROBLOCKD *xd, int mi_row, int mi_col,
- const int num_planes) {
- int i;
- int row_offset = mi_row;
- int col_offset = mi_col;
- for (i = 0; i < num_planes; ++i) {
- struct macroblockd_plane *const pd = &xd->plane[i];
- // Offset the buffer pointer
- const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
- if (pd->subsampling_y && (mi_row & 0x01) && (mi_size_high[bsize] == 1))
- row_offset = mi_row - 1;
- if (pd->subsampling_x && (mi_col & 0x01) && (mi_size_wide[bsize] == 1))
- col_offset = mi_col - 1;
- int above_idx = col_offset;
- int left_idx = row_offset & MAX_MIB_MASK;
- pd->above_context = &xd->above_context[i][above_idx >> pd->subsampling_x];
- pd->left_context = &xd->left_context[i][left_idx >> pd->subsampling_y];
- }
-}
-
-static INLINE int calc_mi_size(int len) {
- // len is in mi units. Align to a multiple of SBs.
- return ALIGN_POWER_OF_TWO(len, MAX_MIB_SIZE_LOG2);
-}
-
-static INLINE void set_plane_n4(MACROBLOCKD *const xd, int bw, int bh,
- const int num_planes) {
- int i;
- for (i = 0; i < num_planes; i++) {
- xd->plane[i].width = (bw * MI_SIZE) >> xd->plane[i].subsampling_x;
- xd->plane[i].height = (bh * MI_SIZE) >> xd->plane[i].subsampling_y;
-
- xd->plane[i].width = AOMMAX(xd->plane[i].width, 4);
- xd->plane[i].height = AOMMAX(xd->plane[i].height, 4);
- }
-}
-
-static INLINE void set_mi_row_col(MACROBLOCKD *xd, const TileInfo *const tile,
- int mi_row, int bh, int mi_col, int bw,
- int mi_rows, int mi_cols) {
- xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8);
- xd->mb_to_bottom_edge = ((mi_rows - bh - mi_row) * MI_SIZE) * 8;
- xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8);
- xd->mb_to_right_edge = ((mi_cols - bw - mi_col) * MI_SIZE) * 8;
-
- // Are edges available for intra prediction?
- xd->up_available = (mi_row > tile->mi_row_start);
-
- const int ss_x = xd->plane[1].subsampling_x;
- const int ss_y = xd->plane[1].subsampling_y;
-
- xd->left_available = (mi_col > tile->mi_col_start);
- xd->chroma_up_available = xd->up_available;
- xd->chroma_left_available = xd->left_available;
- if (ss_x && bw < mi_size_wide[BLOCK_8X8])
- xd->chroma_left_available = (mi_col - 1) > tile->mi_col_start;
- if (ss_y && bh < mi_size_high[BLOCK_8X8])
- xd->chroma_up_available = (mi_row - 1) > tile->mi_row_start;
- if (xd->up_available) {
- xd->above_mbmi = xd->mi[-xd->mi_stride];
- } else {
- xd->above_mbmi = NULL;
- }
-
- if (xd->left_available) {
- xd->left_mbmi = xd->mi[-1];
- } else {
- xd->left_mbmi = NULL;
- }
-
- const int chroma_ref = ((mi_row & 0x01) || !(bh & 0x01) || !ss_y) &&
- ((mi_col & 0x01) || !(bw & 0x01) || !ss_x);
- if (chroma_ref) {
- // To help calculate the "above" and "left" chroma blocks, note that the
- // current block may cover multiple luma blocks (eg, if partitioned into
- // 4x4 luma blocks).
- // First, find the top-left-most luma block covered by this chroma block
- MB_MODE_INFO **base_mi =
- &xd->mi[-(mi_row & ss_y) * xd->mi_stride - (mi_col & ss_x)];
-
- // Then, we consider the luma region covered by the left or above 4x4 chroma
- // prediction. We want to point to the chroma reference block in that
- // region, which is the bottom-right-most mi unit.
- // This leads to the following offsets:
- MB_MODE_INFO *chroma_above_mi =
- xd->chroma_up_available ? base_mi[-xd->mi_stride + ss_x] : NULL;
- xd->chroma_above_mbmi = chroma_above_mi;
-
- MB_MODE_INFO *chroma_left_mi =
- xd->chroma_left_available ? base_mi[ss_y * xd->mi_stride - 1] : NULL;
- xd->chroma_left_mbmi = chroma_left_mi;
- }
-
- xd->n4_h = bh;
- xd->n4_w = bw;
- xd->is_sec_rect = 0;
- if (xd->n4_w < xd->n4_h) {
- // Only mark is_sec_rect as 1 for the last block.
- // For PARTITION_VERT_4, it would be (0, 0, 0, 1);
- // For other partitions, it would be (0, 1).
- if (!((mi_col + xd->n4_w) & (xd->n4_h - 1))) xd->is_sec_rect = 1;
- }
-
- if (xd->n4_w > xd->n4_h)
- if (mi_row & (xd->n4_w - 1)) xd->is_sec_rect = 1;
-}
-
-static INLINE aom_cdf_prob *get_y_mode_cdf(FRAME_CONTEXT *tile_ctx,
- const MB_MODE_INFO *above_mi,
- const MB_MODE_INFO *left_mi) {
- const PREDICTION_MODE above = av1_above_block_mode(above_mi);
- const PREDICTION_MODE left = av1_left_block_mode(left_mi);
- const int above_ctx = intra_mode_context[above];
- const int left_ctx = intra_mode_context[left];
- return tile_ctx->kf_y_cdf[above_ctx][left_ctx];
-}
-
-static INLINE void update_partition_context(MACROBLOCKD *xd, int mi_row,
- int mi_col, BLOCK_SIZE subsize,
- BLOCK_SIZE bsize) {
- PARTITION_CONTEXT *const above_ctx = xd->above_seg_context + mi_col;
- PARTITION_CONTEXT *const left_ctx =
- xd->left_seg_context + (mi_row & MAX_MIB_MASK);
-
- const int bw = mi_size_wide[bsize];
- const int bh = mi_size_high[bsize];
- memset(above_ctx, partition_context_lookup[subsize].above, bw);
- memset(left_ctx, partition_context_lookup[subsize].left, bh);
-}
-
-static INLINE int is_chroma_reference(int mi_row, int mi_col, BLOCK_SIZE bsize,
- int subsampling_x, int subsampling_y) {
- const int bw = mi_size_wide[bsize];
- const int bh = mi_size_high[bsize];
- int ref_pos = ((mi_row & 0x01) || !(bh & 0x01) || !subsampling_y) &&
- ((mi_col & 0x01) || !(bw & 0x01) || !subsampling_x);
- return ref_pos;
-}
-
-static INLINE BLOCK_SIZE scale_chroma_bsize(BLOCK_SIZE bsize, int subsampling_x,
- int subsampling_y) {
- BLOCK_SIZE bs = bsize;
- switch (bsize) {
- case BLOCK_4X4:
- if (subsampling_x == 1 && subsampling_y == 1)
- bs = BLOCK_8X8;
- else if (subsampling_x == 1)
- bs = BLOCK_8X4;
- else if (subsampling_y == 1)
- bs = BLOCK_4X8;
- break;
- case BLOCK_4X8:
- if (subsampling_x == 1 && subsampling_y == 1)
- bs = BLOCK_8X8;
- else if (subsampling_x == 1)
- bs = BLOCK_8X8;
- else if (subsampling_y == 1)
- bs = BLOCK_4X8;
- break;
- case BLOCK_8X4:
- if (subsampling_x == 1 && subsampling_y == 1)
- bs = BLOCK_8X8;
- else if (subsampling_x == 1)
- bs = BLOCK_8X4;
- else if (subsampling_y == 1)
- bs = BLOCK_8X8;
- break;
- case BLOCK_4X16:
- if (subsampling_x == 1 && subsampling_y == 1)
- bs = BLOCK_8X16;
- else if (subsampling_x == 1)
- bs = BLOCK_8X16;
- else if (subsampling_y == 1)
- bs = BLOCK_4X16;
- break;
- case BLOCK_16X4:
- if (subsampling_x == 1 && subsampling_y == 1)
- bs = BLOCK_16X8;
- else if (subsampling_x == 1)
- bs = BLOCK_16X4;
- else if (subsampling_y == 1)
- bs = BLOCK_16X8;
- break;
- default: break;
- }
- return bs;
-}
-
-static INLINE aom_cdf_prob cdf_element_prob(const aom_cdf_prob *cdf,
- size_t element) {
- assert(cdf != NULL);
- return (element > 0 ? cdf[element - 1] : CDF_PROB_TOP) - cdf[element];
-}
-
-static INLINE void partition_gather_horz_alike(aom_cdf_prob *out,
- const aom_cdf_prob *const in,
- BLOCK_SIZE bsize) {
- (void)bsize;
- out[0] = CDF_PROB_TOP;
- out[0] -= cdf_element_prob(in, PARTITION_HORZ);
- out[0] -= cdf_element_prob(in, PARTITION_SPLIT);
- out[0] -= cdf_element_prob(in, PARTITION_HORZ_A);
- out[0] -= cdf_element_prob(in, PARTITION_HORZ_B);
- out[0] -= cdf_element_prob(in, PARTITION_VERT_A);
- if (bsize != BLOCK_128X128) out[0] -= cdf_element_prob(in, PARTITION_HORZ_4);
- out[0] = AOM_ICDF(out[0]);
- out[1] = AOM_ICDF(CDF_PROB_TOP);
-}
-
-static INLINE void partition_gather_vert_alike(aom_cdf_prob *out,
- const aom_cdf_prob *const in,
- BLOCK_SIZE bsize) {
- (void)bsize;
- out[0] = CDF_PROB_TOP;
- out[0] -= cdf_element_prob(in, PARTITION_VERT);
- out[0] -= cdf_element_prob(in, PARTITION_SPLIT);
- out[0] -= cdf_element_prob(in, PARTITION_HORZ_A);
- out[0] -= cdf_element_prob(in, PARTITION_VERT_A);
- out[0] -= cdf_element_prob(in, PARTITION_VERT_B);
- if (bsize != BLOCK_128X128) out[0] -= cdf_element_prob(in, PARTITION_VERT_4);
- out[0] = AOM_ICDF(out[0]);
- out[1] = AOM_ICDF(CDF_PROB_TOP);
-}
-
-static INLINE void update_ext_partition_context(MACROBLOCKD *xd, int mi_row,
- int mi_col, BLOCK_SIZE subsize,
- BLOCK_SIZE bsize,
- PARTITION_TYPE partition) {
- if (bsize >= BLOCK_8X8) {
- const int hbs = mi_size_wide[bsize] / 2;
- BLOCK_SIZE bsize2 = get_partition_subsize(bsize, PARTITION_SPLIT);
- switch (partition) {
- case PARTITION_SPLIT:
- if (bsize != BLOCK_8X8) break;
- AOM_FALLTHROUGH_INTENDED;
- case PARTITION_NONE:
- case PARTITION_HORZ:
- case PARTITION_VERT:
- case PARTITION_HORZ_4:
- case PARTITION_VERT_4:
- update_partition_context(xd, mi_row, mi_col, subsize, bsize);
- break;
- case PARTITION_HORZ_A:
- update_partition_context(xd, mi_row, mi_col, bsize2, subsize);
- update_partition_context(xd, mi_row + hbs, mi_col, subsize, subsize);
- break;
- case PARTITION_HORZ_B:
- update_partition_context(xd, mi_row, mi_col, subsize, subsize);
- update_partition_context(xd, mi_row + hbs, mi_col, bsize2, subsize);
- break;
- case PARTITION_VERT_A:
- update_partition_context(xd, mi_row, mi_col, bsize2, subsize);
- update_partition_context(xd, mi_row, mi_col + hbs, subsize, subsize);
- break;
- case PARTITION_VERT_B:
- update_partition_context(xd, mi_row, mi_col, subsize, subsize);
- update_partition_context(xd, mi_row, mi_col + hbs, bsize2, subsize);
- break;
- default: assert(0 && "Invalid partition type");
- }
- }
-}
-
-static INLINE int partition_plane_context(const MACROBLOCKD *xd, int mi_row,
- int mi_col, BLOCK_SIZE bsize) {
- const PARTITION_CONTEXT *above_ctx = xd->above_seg_context + mi_col;
- const PARTITION_CONTEXT *left_ctx =
- xd->left_seg_context + (mi_row & MAX_MIB_MASK);
- // Minimum partition point is 8x8. Offset the bsl accordingly.
- const int bsl = mi_size_wide_log2[bsize] - mi_size_wide_log2[BLOCK_8X8];
- int above = (*above_ctx >> bsl) & 1, left = (*left_ctx >> bsl) & 1;
-
- assert(mi_size_wide_log2[bsize] == mi_size_high_log2[bsize]);
- assert(bsl >= 0);
-
- return (left * 2 + above) + bsl * PARTITION_PLOFFSET;
-}
-
-// Return the number of elements in the partition CDF when
-// partitioning the (square) block with luma block size of bsize.
-static INLINE int partition_cdf_length(BLOCK_SIZE bsize) {
- if (bsize <= BLOCK_8X8)
- return PARTITION_TYPES;
- else if (bsize == BLOCK_128X128)
- return EXT_PARTITION_TYPES - 2;
- else
- return EXT_PARTITION_TYPES;
-}
-
-static INLINE int max_block_wide(const MACROBLOCKD *xd, BLOCK_SIZE bsize,
- int plane) {
- int max_blocks_wide = block_size_wide[bsize];
- const struct macroblockd_plane *const pd = &xd->plane[plane];
-
- if (xd->mb_to_right_edge < 0)
- max_blocks_wide += xd->mb_to_right_edge >> (3 + pd->subsampling_x);
-
- // Scale the width in the transform block unit.
- return max_blocks_wide >> tx_size_wide_log2[0];
-}
-
-static INLINE int max_block_high(const MACROBLOCKD *xd, BLOCK_SIZE bsize,
- int plane) {
- int max_blocks_high = block_size_high[bsize];
- const struct macroblockd_plane *const pd = &xd->plane[plane];
-
- if (xd->mb_to_bottom_edge < 0)
- max_blocks_high += xd->mb_to_bottom_edge >> (3 + pd->subsampling_y);
-
- // Scale the height in the transform block unit.
- return max_blocks_high >> tx_size_high_log2[0];
-}
-
-static INLINE int max_intra_block_width(const MACROBLOCKD *xd,
- BLOCK_SIZE plane_bsize, int plane,
- TX_SIZE tx_size) {
- const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane)
- << tx_size_wide_log2[0];
- return ALIGN_POWER_OF_TWO(max_blocks_wide, tx_size_wide_log2[tx_size]);
-}
-
-static INLINE int max_intra_block_height(const MACROBLOCKD *xd,
- BLOCK_SIZE plane_bsize, int plane,
- TX_SIZE tx_size) {
- const int max_blocks_high = max_block_high(xd, plane_bsize, plane)
- << tx_size_high_log2[0];
- return ALIGN_POWER_OF_TWO(max_blocks_high, tx_size_high_log2[tx_size]);
-}
-
-static INLINE void av1_zero_above_context(AV1_COMMON *const cm, const MACROBLOCKD *xd,
- int mi_col_start, int mi_col_end, const int tile_row) {
- const SequenceHeader *const seq_params = &cm->seq_params;
- const int num_planes = av1_num_planes(cm);
- const int width = mi_col_end - mi_col_start;
- const int aligned_width =
- ALIGN_POWER_OF_TWO(width, seq_params->mib_size_log2);
-
- const int offset_y = mi_col_start;
- const int width_y = aligned_width;
- const int offset_uv = offset_y >> seq_params->subsampling_x;
- const int width_uv = width_y >> seq_params->subsampling_x;
-
- av1_zero_array(cm->above_context[0][tile_row] + offset_y, width_y);
- if (num_planes > 1) {
- if (cm->above_context[1][tile_row] && cm->above_context[2][tile_row]) {
- av1_zero_array(cm->above_context[1][tile_row] + offset_uv, width_uv);
- av1_zero_array(cm->above_context[2][tile_row] + offset_uv, width_uv);
- } else {
- aom_internal_error(xd->error_info, AOM_CODEC_CORRUPT_FRAME,
- "Invalid value of planes");
- }
- }
-
- av1_zero_array(cm->above_seg_context[tile_row] + mi_col_start, aligned_width);
-
- memset(cm->above_txfm_context[tile_row] + mi_col_start,
- tx_size_wide[TX_SIZES_LARGEST],
- aligned_width * sizeof(TXFM_CONTEXT));
-}
-
-static INLINE void av1_zero_left_context(MACROBLOCKD *const xd) {
- av1_zero(xd->left_context);
- av1_zero(xd->left_seg_context);
-
- memset(xd->left_txfm_context_buffer, tx_size_high[TX_SIZES_LARGEST],
- sizeof(xd->left_txfm_context_buffer));
-}
-
-// Disable array-bounds checks as the TX_SIZE enum contains values larger than
-// TX_SIZES_ALL (TX_INVALID) which make extending the array as a workaround
-// infeasible. The assert is enough for static analysis and this or other tools
-// asan, valgrind would catch oob access at runtime.
-#if defined(__GNUC__) && __GNUC__ >= 4
-#pragma GCC diagnostic ignored "-Warray-bounds"
-#endif
-
-#if defined(__GNUC__) && __GNUC__ >= 4
-#pragma GCC diagnostic warning "-Warray-bounds"
-#endif
-
-static INLINE void set_txfm_ctx(TXFM_CONTEXT *txfm_ctx, uint8_t txs, int len) {
- int i;
- for (i = 0; i < len; ++i) txfm_ctx[i] = txs;
-}
-
-static INLINE void set_txfm_ctxs(TX_SIZE tx_size, int n4_w, int n4_h, int skip,
- const MACROBLOCKD *xd) {
- uint8_t bw = tx_size_wide[tx_size];
- uint8_t bh = tx_size_high[tx_size];
-
- if (skip) {
- bw = n4_w * MI_SIZE;
- bh = n4_h * MI_SIZE;
- }
-
- set_txfm_ctx(xd->above_txfm_context, bw, n4_w);
- set_txfm_ctx(xd->left_txfm_context, bh, n4_h);
-}
-
-static INLINE void txfm_partition_update(TXFM_CONTEXT *above_ctx,
- TXFM_CONTEXT *left_ctx,
- TX_SIZE tx_size, TX_SIZE txb_size) {
- BLOCK_SIZE bsize = txsize_to_bsize[txb_size];
- int bh = mi_size_high[bsize];
- int bw = mi_size_wide[bsize];
- uint8_t txw = tx_size_wide[tx_size];
- uint8_t txh = tx_size_high[tx_size];
- int i;
- for (i = 0; i < bh; ++i) left_ctx[i] = txh;
- for (i = 0; i < bw; ++i) above_ctx[i] = txw;
-}
-
-static INLINE TX_SIZE get_sqr_tx_size(int tx_dim) {
- switch (tx_dim) {
- case 128:
- case 64: return TX_64X64; break;
- case 32: return TX_32X32; break;
- case 16: return TX_16X16; break;
- case 8: return TX_8X8; break;
- default: return TX_4X4;
- }
-}
-
-static INLINE TX_SIZE get_tx_size(int width, int height) {
- if (width == height) {
- return get_sqr_tx_size(width);
- }
- if (width < height) {
- if (width + width == height) {
- switch (width) {
- case 4: return TX_4X8; break;
- case 8: return TX_8X16; break;
- case 16: return TX_16X32; break;
- case 32: return TX_32X64; break;
- }
- } else {
- switch (width) {
- case 4: return TX_4X16; break;
- case 8: return TX_8X32; break;
- case 16: return TX_16X64; break;
- }
- }
- } else {
- if (height + height == width) {
- switch (height) {
- case 4: return TX_8X4; break;
- case 8: return TX_16X8; break;
- case 16: return TX_32X16; break;
- case 32: return TX_64X32; break;
- }
- } else {
- switch (height) {
- case 4: return TX_16X4; break;
- case 8: return TX_32X8; break;
- case 16: return TX_64X16; break;
- }
- }
- }
- assert(0);
- return TX_4X4;
-}
-
-static INLINE int txfm_partition_context(TXFM_CONTEXT *above_ctx,
- TXFM_CONTEXT *left_ctx,
- BLOCK_SIZE bsize, TX_SIZE tx_size) {
- const uint8_t txw = tx_size_wide[tx_size];
- const uint8_t txh = tx_size_high[tx_size];
- const int above = *above_ctx < txw;
- const int left = *left_ctx < txh;
- int category = TXFM_PARTITION_CONTEXTS;
-
- // dummy return, not used by others.
- if (tx_size <= TX_4X4) return 0;
-
- TX_SIZE max_tx_size =
- get_sqr_tx_size(AOMMAX(block_size_wide[bsize], block_size_high[bsize]));
-
- if (max_tx_size >= TX_8X8) {
- category =
- (txsize_sqr_up_map[tx_size] != max_tx_size && max_tx_size > TX_8X8) +
- (TX_SIZES - 1 - max_tx_size) * 2;
- }
- assert(category != TXFM_PARTITION_CONTEXTS);
- return category * 3 + above + left;
-}
-
-// Compute the next partition in the direction of the sb_type stored in the mi
-// array, starting with bsize.
-static INLINE PARTITION_TYPE get_partition(const AV1_COMMON *const cm,
- int mi_row, int mi_col,
- BLOCK_SIZE bsize) {
- if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return PARTITION_INVALID;
-
- const int offset = mi_row * cm->mi_stride + mi_col;
- MB_MODE_INFO **mi = cm->mi_grid_visible + offset;
- const BLOCK_SIZE subsize = mi[0]->sb_type;
-
- if (subsize == bsize) return PARTITION_NONE;
-
- const int bhigh = mi_size_high[bsize];
- const int bwide = mi_size_wide[bsize];
- const int sshigh = mi_size_high[subsize];
- const int sswide = mi_size_wide[subsize];
-
- if (bsize > BLOCK_8X8 && mi_row + bwide / 2 < cm->mi_rows &&
- mi_col + bhigh / 2 < cm->mi_cols) {
- // In this case, the block might be using an extended partition
- // type.
- const MB_MODE_INFO *const mbmi_right = mi[bwide / 2];
- const MB_MODE_INFO *const mbmi_below = mi[bhigh / 2 * cm->mi_stride];
-
- if (sswide == bwide) {
- // Smaller height but same width. Is PARTITION_HORZ_4, PARTITION_HORZ or
- // PARTITION_HORZ_B. To distinguish the latter two, check if the lower
- // half was split.
- if (sshigh * 4 == bhigh) return PARTITION_HORZ_4;
- assert(sshigh * 2 == bhigh);
-
- if (mbmi_below->sb_type == subsize)
- return PARTITION_HORZ;
- else
- return PARTITION_HORZ_B;
- } else if (sshigh == bhigh) {
- // Smaller width but same height. Is PARTITION_VERT_4, PARTITION_VERT or
- // PARTITION_VERT_B. To distinguish the latter two, check if the right
- // half was split.
- if (sswide * 4 == bwide) return PARTITION_VERT_4;
- assert(sswide * 2 == bhigh);
-
- if (mbmi_right->sb_type == subsize)
- return PARTITION_VERT;
- else
- return PARTITION_VERT_B;
- } else {
- // Smaller width and smaller height. Might be PARTITION_SPLIT or could be
- // PARTITION_HORZ_A or PARTITION_VERT_A. If subsize isn't halved in both
- // dimensions, we immediately know this is a split (which will recurse to
- // get to subsize). Otherwise look down and to the right. With
- // PARTITION_VERT_A, the right block will have height bhigh; with
- // PARTITION_HORZ_A, the lower block with have width bwide. Otherwise
- // it's PARTITION_SPLIT.
- if (sswide * 2 != bwide || sshigh * 2 != bhigh) return PARTITION_SPLIT;
-
- if (mi_size_wide[mbmi_below->sb_type] == bwide) return PARTITION_HORZ_A;
- if (mi_size_high[mbmi_right->sb_type] == bhigh) return PARTITION_VERT_A;
-
- return PARTITION_SPLIT;
- }
- }
- const int vert_split = sswide < bwide;
- const int horz_split = sshigh < bhigh;
- const int split_idx = (vert_split << 1) | horz_split;
- assert(split_idx != 0);
-
- static const PARTITION_TYPE base_partitions[4] = {
- PARTITION_INVALID, PARTITION_HORZ, PARTITION_VERT, PARTITION_SPLIT
- };
-
- return base_partitions[split_idx];
-}
-
-static INLINE void set_use_reference_buffer(AV1_COMMON *const cm, int use) {
- cm->seq_params.frame_id_numbers_present_flag = use;
-}
-
-static INLINE void set_sb_size(SequenceHeader *const seq_params,
- BLOCK_SIZE sb_size) {
- seq_params->sb_size = sb_size;
- seq_params->mib_size = mi_size_wide[seq_params->sb_size];
- seq_params->mib_size_log2 = mi_size_wide_log2[seq_params->sb_size];
-}
-
-// Returns true if the frame is fully lossless at the coded resolution.
-// Note: If super-resolution is used, such a frame will still NOT be lossless at
-// the upscaled resolution.
-static INLINE int is_coded_lossless(const AV1_COMMON *cm,
- const MACROBLOCKD *xd) {
- int coded_lossless = 1;
- if (cm->seg.enabled) {
- for (int i = 0; i < MAX_SEGMENTS; ++i) {
- if (!xd->lossless[i]) {
- coded_lossless = 0;
- break;
- }
- }
- } else {
- coded_lossless = xd->lossless[0];
- }
- return coded_lossless;
-}
-
-static INLINE int is_valid_seq_level_idx(uint8_t seq_level_idx) {
- return seq_level_idx < 24 || seq_level_idx == 31;
-}
-
-static INLINE uint8_t major_minor_to_seq_level_idx(BitstreamLevel bl) {
- assert(bl.major >= LEVEL_MAJOR_MIN && bl.major <= LEVEL_MAJOR_MAX);
- // Since bl.minor is unsigned a comparison will return a warning:
- // comparison is always true due to limited range of data type
- assert(LEVEL_MINOR_MIN == 0);
- assert(bl.minor <= LEVEL_MINOR_MAX);
- return ((bl.major - LEVEL_MAJOR_MIN) << LEVEL_MINOR_BITS) + bl.minor;
-}
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_COMMON_ONYXC_INT_H_
diff --git a/third_party/aom/av1/common/ppc/cfl_ppc.c b/third_party/aom/av1/common/ppc/cfl_ppc.c
deleted file mode 100644
index 026a07809..000000000
--- a/third_party/aom/av1/common/ppc/cfl_ppc.c
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <altivec.h>
-
-#include "config/av1_rtcd.h"
-
-#include "av1/common/cfl.h"
-
-#define OFF_0 0
-#define OFF_1 16
-#define OFF_2 32
-#define OFF_3 48
-#define CFL_BUF_LINE_BYTES 64
-#define CFL_LINE_1 64
-#define CFL_LINE_2 128
-#define CFL_LINE_3 192
-
-typedef vector signed char int8x16_t; // NOLINT(runtime/int)
-typedef vector unsigned char uint8x16_t; // NOLINT(runtime/int)
-typedef vector signed short int16x8_t; // NOLINT(runtime/int)
-typedef vector unsigned short uint16x8_t; // NOLINT(runtime/int)
-typedef vector signed int int32x4_t; // NOLINT(runtime/int)
-typedef vector unsigned int uint32x4_t; // NOLINT(runtime/int)
-typedef vector unsigned long long uint64x2_t; // NOLINT(runtime/int)
-
-static INLINE void subtract_average_vsx(const uint16_t *src_ptr, int16_t *dst,
- int width, int height, int round_offset,
- int num_pel_log2) {
- // int16_t *dst = dst_ptr;
- const int16_t *dst_end = dst + height * CFL_BUF_LINE;
- const int16_t *sum_buf = (const int16_t *)src_ptr;
- const int16_t *end = sum_buf + height * CFL_BUF_LINE;
- const uint32x4_t div_shift = vec_splats((uint32_t)num_pel_log2);
- const uint8x16_t mask_64 = { 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
- 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 };
- const uint8x16_t mask_32 = { 0x14, 0x15, 0x16, 0x17, 0x00, 0x01, 0x02, 0x03,
- 0x1C, 0x1D, 0x1E, 0x1F, 0x08, 0x09, 0x0A, 0x0B };
-
- int32x4_t sum_32x4_0 = { 0, 0, 0, round_offset };
- int32x4_t sum_32x4_1 = { 0, 0, 0, 0 };
- do {
- sum_32x4_0 = vec_sum4s(vec_vsx_ld(OFF_0, sum_buf), sum_32x4_0);
- sum_32x4_1 = vec_sum4s(vec_vsx_ld(OFF_0 + CFL_LINE_1, sum_buf), sum_32x4_1);
- if (width >= 16) {
- sum_32x4_0 = vec_sum4s(vec_vsx_ld(OFF_1, sum_buf), sum_32x4_0);
- sum_32x4_1 =
- vec_sum4s(vec_vsx_ld(OFF_1 + CFL_LINE_1, sum_buf), sum_32x4_1);
- }
- if (width == 32) {
- sum_32x4_0 = vec_sum4s(vec_vsx_ld(OFF_2, sum_buf), sum_32x4_0);
- sum_32x4_1 =
- vec_sum4s(vec_vsx_ld(OFF_2 + CFL_LINE_1, sum_buf), sum_32x4_1);
- sum_32x4_0 = vec_sum4s(vec_vsx_ld(OFF_3, sum_buf), sum_32x4_0);
- sum_32x4_1 =
- vec_sum4s(vec_vsx_ld(OFF_3 + CFL_LINE_1, sum_buf), sum_32x4_1);
- }
- } while ((sum_buf += (CFL_BUF_LINE * 2)) < end);
- int32x4_t sum_32x4 = vec_add(sum_32x4_0, sum_32x4_1);
-
- const int32x4_t perm_64 = vec_perm(sum_32x4, sum_32x4, mask_64);
- sum_32x4 = vec_add(sum_32x4, perm_64);
- const int32x4_t perm_32 = vec_perm(sum_32x4, sum_32x4, mask_32);
- sum_32x4 = vec_add(sum_32x4, perm_32);
- const int32x4_t avg = vec_sr(sum_32x4, div_shift);
- const int16x8_t vec_avg = vec_pack(avg, avg);
- do {
- vec_vsx_st(vec_sub(vec_vsx_ld(OFF_0, dst), vec_avg), OFF_0, dst);
- vec_vsx_st(vec_sub(vec_vsx_ld(OFF_0 + CFL_LINE_1, dst), vec_avg),
- OFF_0 + CFL_BUF_LINE_BYTES, dst);
- vec_vsx_st(vec_sub(vec_vsx_ld(OFF_0 + CFL_LINE_2, dst), vec_avg),
- OFF_0 + CFL_LINE_2, dst);
- vec_vsx_st(vec_sub(vec_vsx_ld(OFF_0 + CFL_LINE_3, dst), vec_avg),
- OFF_0 + CFL_LINE_3, dst);
- if (width >= 16) {
- vec_vsx_st(vec_sub(vec_vsx_ld(OFF_1, dst), vec_avg), OFF_1, dst);
- vec_vsx_st(vec_sub(vec_vsx_ld(OFF_1 + CFL_LINE_1, dst), vec_avg),
- OFF_1 + CFL_LINE_1, dst);
- vec_vsx_st(vec_sub(vec_vsx_ld(OFF_1 + CFL_LINE_2, dst), vec_avg),
- OFF_1 + CFL_LINE_2, dst);
- vec_vsx_st(vec_sub(vec_vsx_ld(OFF_1 + CFL_LINE_3, dst), vec_avg),
- OFF_1 + CFL_LINE_3, dst);
- }
- if (width == 32) {
- vec_vsx_st(vec_sub(vec_vsx_ld(OFF_2, dst), vec_avg), OFF_2, dst);
- vec_vsx_st(vec_sub(vec_vsx_ld(OFF_2 + CFL_LINE_1, dst), vec_avg),
- OFF_2 + CFL_LINE_1, dst);
- vec_vsx_st(vec_sub(vec_vsx_ld(OFF_2 + CFL_LINE_2, dst), vec_avg),
- OFF_2 + CFL_LINE_2, dst);
- vec_vsx_st(vec_sub(vec_vsx_ld(OFF_2 + CFL_LINE_3, dst), vec_avg),
- OFF_2 + CFL_LINE_3, dst);
-
- vec_vsx_st(vec_sub(vec_vsx_ld(OFF_3, dst), vec_avg), OFF_3, dst);
- vec_vsx_st(vec_sub(vec_vsx_ld(OFF_3 + CFL_LINE_1, dst), vec_avg),
- OFF_3 + CFL_LINE_1, dst);
- vec_vsx_st(vec_sub(vec_vsx_ld(OFF_3 + CFL_LINE_2, dst), vec_avg),
- OFF_3 + CFL_LINE_2, dst);
- vec_vsx_st(vec_sub(vec_vsx_ld(OFF_3 + CFL_LINE_3, dst), vec_avg),
- OFF_3 + CFL_LINE_3, dst);
- }
- } while ((dst += CFL_BUF_LINE * 4) < dst_end);
-}
-
-// Declare wrappers for VSX sizes
-CFL_SUB_AVG_X(vsx, 8, 4, 16, 5)
-CFL_SUB_AVG_X(vsx, 8, 8, 32, 6)
-CFL_SUB_AVG_X(vsx, 8, 16, 64, 7)
-CFL_SUB_AVG_X(vsx, 8, 32, 128, 8)
-CFL_SUB_AVG_X(vsx, 16, 4, 32, 6)
-CFL_SUB_AVG_X(vsx, 16, 8, 64, 7)
-CFL_SUB_AVG_X(vsx, 16, 16, 128, 8)
-CFL_SUB_AVG_X(vsx, 16, 32, 256, 9)
-CFL_SUB_AVG_X(vsx, 32, 8, 128, 8)
-CFL_SUB_AVG_X(vsx, 32, 16, 256, 9)
-CFL_SUB_AVG_X(vsx, 32, 32, 512, 10)
-
-// Based on observation, for small blocks VSX does not outperform C (no 64bit
-// load and store intrinsics). So we call the C code for block widths 4.
-cfl_subtract_average_fn get_subtract_average_fn_vsx(TX_SIZE tx_size) {
- static const cfl_subtract_average_fn sub_avg[TX_SIZES_ALL] = {
- subtract_average_4x4_c, /* 4x4 */
- subtract_average_8x8_vsx, /* 8x8 */
- subtract_average_16x16_vsx, /* 16x16 */
- subtract_average_32x32_vsx, /* 32x32 */
- cfl_subtract_average_null, /* 64x64 (invalid CFL size) */
- subtract_average_4x8_c, /* 4x8 */
- subtract_average_8x4_vsx, /* 8x4 */
- subtract_average_8x16_vsx, /* 8x16 */
- subtract_average_16x8_vsx, /* 16x8 */
- subtract_average_16x32_vsx, /* 16x32 */
- subtract_average_32x16_vsx, /* 32x16 */
- cfl_subtract_average_null, /* 32x64 (invalid CFL size) */
- cfl_subtract_average_null, /* 64x32 (invalid CFL size) */
- subtract_average_4x16_c, /* 4x16 */
- subtract_average_16x4_vsx, /* 16x4 */
- subtract_average_8x32_vsx, /* 8x32 */
- subtract_average_32x8_vsx, /* 32x8 */
- cfl_subtract_average_null, /* 16x64 (invalid CFL size) */
- cfl_subtract_average_null, /* 64x16 (invalid CFL size) */
- };
- // Modulo TX_SIZES_ALL to ensure that an attacker won't be able to
- // index the function pointer array out of bounds.
- return sub_avg[tx_size % TX_SIZES_ALL];
-}
diff --git a/third_party/aom/av1/common/pred_common.c b/third_party/aom/av1/common/pred_common.c
deleted file mode 100644
index 5952441d1..000000000
--- a/third_party/aom/av1/common/pred_common.c
+++ /dev/null
@@ -1,501 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "av1/common/common.h"
-#include "av1/common/pred_common.h"
-#include "av1/common/reconinter.h"
-#include "av1/common/reconintra.h"
-#include "av1/common/seg_common.h"
-
-// Returns a context number for the given MB prediction signal
-static InterpFilter get_ref_filter_type(const MB_MODE_INFO *ref_mbmi,
- const MACROBLOCKD *xd, int dir,
- MV_REFERENCE_FRAME ref_frame) {
- (void)xd;
-
- return ((ref_mbmi->ref_frame[0] == ref_frame ||
- ref_mbmi->ref_frame[1] == ref_frame)
- ? av1_extract_interp_filter(ref_mbmi->interp_filters, dir & 0x01)
- : SWITCHABLE_FILTERS);
-}
-
-int av1_get_pred_context_switchable_interp(const MACROBLOCKD *xd, int dir) {
- const MB_MODE_INFO *const mbmi = xd->mi[0];
- const int ctx_offset =
- (mbmi->ref_frame[1] > INTRA_FRAME) * INTER_FILTER_COMP_OFFSET;
- assert(dir == 0 || dir == 1);
- const MV_REFERENCE_FRAME ref_frame = mbmi->ref_frame[0];
- // Note:
- // The mode info data structure has a one element border above and to the
- // left of the entries corresponding to real macroblocks.
- // The prediction flags in these dummy entries are initialized to 0.
- int filter_type_ctx = ctx_offset + (dir & 0x01) * INTER_FILTER_DIR_OFFSET;
- int left_type = SWITCHABLE_FILTERS;
- int above_type = SWITCHABLE_FILTERS;
-
- if (xd->left_available)
- left_type = get_ref_filter_type(xd->mi[-1], xd, dir, ref_frame);
-
- if (xd->up_available)
- above_type =
- get_ref_filter_type(xd->mi[-xd->mi_stride], xd, dir, ref_frame);
-
- if (left_type == above_type) {
- filter_type_ctx += left_type;
- } else if (left_type == SWITCHABLE_FILTERS) {
- assert(above_type != SWITCHABLE_FILTERS);
- filter_type_ctx += above_type;
- } else if (above_type == SWITCHABLE_FILTERS) {
- assert(left_type != SWITCHABLE_FILTERS);
- filter_type_ctx += left_type;
- } else {
- filter_type_ctx += SWITCHABLE_FILTERS;
- }
-
- return filter_type_ctx;
-}
-
-static void palette_add_to_cache(uint16_t *cache, int *n, uint16_t val) {
- // Do not add an already existing value
- if (*n > 0 && val == cache[*n - 1]) return;
-
- cache[(*n)++] = val;
-}
-
-int av1_get_palette_cache(const MACROBLOCKD *const xd, int plane,
- uint16_t *cache) {
- const int row = -xd->mb_to_top_edge >> 3;
- // Do not refer to above SB row when on SB boundary.
- const MB_MODE_INFO *const above_mi =
- (row % (1 << MIN_SB_SIZE_LOG2)) ? xd->above_mbmi : NULL;
- const MB_MODE_INFO *const left_mi = xd->left_mbmi;
- int above_n = 0, left_n = 0;
- if (above_mi) above_n = above_mi->palette_mode_info.palette_size[plane != 0];
- if (left_mi) left_n = left_mi->palette_mode_info.palette_size[plane != 0];
- if (above_n == 0 && left_n == 0) return 0;
- int above_idx = plane * PALETTE_MAX_SIZE;
- int left_idx = plane * PALETTE_MAX_SIZE;
- int n = 0;
- const uint16_t *above_colors =
- above_mi ? above_mi->palette_mode_info.palette_colors : NULL;
- const uint16_t *left_colors =
- left_mi ? left_mi->palette_mode_info.palette_colors : NULL;
- // Merge the sorted lists of base colors from above and left to get
- // combined sorted color cache.
- while (above_n > 0 && left_n > 0) {
- uint16_t v_above = above_colors[above_idx];
- uint16_t v_left = left_colors[left_idx];
- if (v_left < v_above) {
- palette_add_to_cache(cache, &n, v_left);
- ++left_idx, --left_n;
- } else {
- palette_add_to_cache(cache, &n, v_above);
- ++above_idx, --above_n;
- if (v_left == v_above) ++left_idx, --left_n;
- }
- }
- while (above_n-- > 0) {
- uint16_t val = above_colors[above_idx++];
- palette_add_to_cache(cache, &n, val);
- }
- while (left_n-- > 0) {
- uint16_t val = left_colors[left_idx++];
- palette_add_to_cache(cache, &n, val);
- }
- assert(n <= 2 * PALETTE_MAX_SIZE);
- return n;
-}
-
-// The mode info data structure has a one element border above and to the
-// left of the entries corresponding to real macroblocks.
-// The prediction flags in these dummy entries are initialized to 0.
-// 0 - inter/inter, inter/--, --/inter, --/--
-// 1 - intra/inter, inter/intra
-// 2 - intra/--, --/intra
-// 3 - intra/intra
-int av1_get_intra_inter_context(const MACROBLOCKD *xd) {
- const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
- const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
- const int has_above = xd->up_available;
- const int has_left = xd->left_available;
-
- if (has_above && has_left) { // both edges available
- const int above_intra = !is_inter_block(above_mbmi);
- const int left_intra = !is_inter_block(left_mbmi);
- return left_intra && above_intra ? 3 : left_intra || above_intra;
- } else if (has_above || has_left) { // one edge available
- return 2 * !is_inter_block(has_above ? above_mbmi : left_mbmi);
- } else {
- return 0;
- }
-}
-
-#define CHECK_BACKWARD_REFS(ref_frame) \
- (((ref_frame) >= BWDREF_FRAME) && ((ref_frame) <= ALTREF_FRAME))
-#define IS_BACKWARD_REF_FRAME(ref_frame) CHECK_BACKWARD_REFS(ref_frame)
-
-int av1_get_reference_mode_context(const MACROBLOCKD *xd) {
- int ctx;
- const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
- const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
- const int has_above = xd->up_available;
- const int has_left = xd->left_available;
-
- // Note:
- // The mode info data structure has a one element border above and to the
- // left of the entries corresponding to real macroblocks.
- // The prediction flags in these dummy entries are initialized to 0.
- if (has_above && has_left) { // both edges available
- if (!has_second_ref(above_mbmi) && !has_second_ref(left_mbmi))
- // neither edge uses comp pred (0/1)
- ctx = IS_BACKWARD_REF_FRAME(above_mbmi->ref_frame[0]) ^
- IS_BACKWARD_REF_FRAME(left_mbmi->ref_frame[0]);
- else if (!has_second_ref(above_mbmi))
- // one of two edges uses comp pred (2/3)
- ctx = 2 + (IS_BACKWARD_REF_FRAME(above_mbmi->ref_frame[0]) ||
- !is_inter_block(above_mbmi));
- else if (!has_second_ref(left_mbmi))
- // one of two edges uses comp pred (2/3)
- ctx = 2 + (IS_BACKWARD_REF_FRAME(left_mbmi->ref_frame[0]) ||
- !is_inter_block(left_mbmi));
- else // both edges use comp pred (4)
- ctx = 4;
- } else if (has_above || has_left) { // one edge available
- const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi;
-
- if (!has_second_ref(edge_mbmi))
- // edge does not use comp pred (0/1)
- ctx = IS_BACKWARD_REF_FRAME(edge_mbmi->ref_frame[0]);
- else
- // edge uses comp pred (3)
- ctx = 3;
- } else { // no edges available (1)
- ctx = 1;
- }
- assert(ctx >= 0 && ctx < COMP_INTER_CONTEXTS);
- return ctx;
-}
-
-int av1_get_comp_reference_type_context(const MACROBLOCKD *xd) {
- int pred_context;
- const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
- const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
- const int above_in_image = xd->up_available;
- const int left_in_image = xd->left_available;
-
- if (above_in_image && left_in_image) { // both edges available
- const int above_intra = !is_inter_block(above_mbmi);
- const int left_intra = !is_inter_block(left_mbmi);
-
- if (above_intra && left_intra) { // intra/intra
- pred_context = 2;
- } else if (above_intra || left_intra) { // intra/inter
- const MB_MODE_INFO *inter_mbmi = above_intra ? left_mbmi : above_mbmi;
-
- if (!has_second_ref(inter_mbmi)) // single pred
- pred_context = 2;
- else // comp pred
- pred_context = 1 + 2 * has_uni_comp_refs(inter_mbmi);
- } else { // inter/inter
- const int a_sg = !has_second_ref(above_mbmi);
- const int l_sg = !has_second_ref(left_mbmi);
- const MV_REFERENCE_FRAME frfa = above_mbmi->ref_frame[0];
- const MV_REFERENCE_FRAME frfl = left_mbmi->ref_frame[0];
-
- if (a_sg && l_sg) { // single/single
- pred_context = 1 + 2 * (!(IS_BACKWARD_REF_FRAME(frfa) ^
- IS_BACKWARD_REF_FRAME(frfl)));
- } else if (l_sg || a_sg) { // single/comp
- const int uni_rfc =
- a_sg ? has_uni_comp_refs(left_mbmi) : has_uni_comp_refs(above_mbmi);
-
- if (!uni_rfc) // comp bidir
- pred_context = 1;
- else // comp unidir
- pred_context = 3 + (!(IS_BACKWARD_REF_FRAME(frfa) ^
- IS_BACKWARD_REF_FRAME(frfl)));
- } else { // comp/comp
- const int a_uni_rfc = has_uni_comp_refs(above_mbmi);
- const int l_uni_rfc = has_uni_comp_refs(left_mbmi);
-
- if (!a_uni_rfc && !l_uni_rfc) // bidir/bidir
- pred_context = 0;
- else if (!a_uni_rfc || !l_uni_rfc) // unidir/bidir
- pred_context = 2;
- else // unidir/unidir
- pred_context =
- 3 + (!((frfa == BWDREF_FRAME) ^ (frfl == BWDREF_FRAME)));
- }
- }
- } else if (above_in_image || left_in_image) { // one edge available
- const MB_MODE_INFO *edge_mbmi = above_in_image ? above_mbmi : left_mbmi;
-
- if (!is_inter_block(edge_mbmi)) { // intra
- pred_context = 2;
- } else { // inter
- if (!has_second_ref(edge_mbmi)) // single pred
- pred_context = 2;
- else // comp pred
- pred_context = 4 * has_uni_comp_refs(edge_mbmi);
- }
- } else { // no edges available
- pred_context = 2;
- }
-
- assert(pred_context >= 0 && pred_context < COMP_REF_TYPE_CONTEXTS);
- return pred_context;
-}
-
-// Returns a context number for the given MB prediction signal
-//
-// Signal the uni-directional compound reference frame pair as either
-// (BWDREF, ALTREF), or (LAST, LAST2) / (LAST, LAST3) / (LAST, GOLDEN),
-// conditioning on the pair is known as uni-directional.
-//
-// 3 contexts: Voting is used to compare the count of forward references with
-// that of backward references from the spatial neighbors.
-int av1_get_pred_context_uni_comp_ref_p(const MACROBLOCKD *xd) {
- const uint8_t *const ref_counts = &xd->neighbors_ref_counts[0];
-
- // Count of forward references (L, L2, L3, or G)
- const int frf_count = ref_counts[LAST_FRAME] + ref_counts[LAST2_FRAME] +
- ref_counts[LAST3_FRAME] + ref_counts[GOLDEN_FRAME];
- // Count of backward references (B or A)
- const int brf_count = ref_counts[BWDREF_FRAME] + ref_counts[ALTREF2_FRAME] +
- ref_counts[ALTREF_FRAME];
-
- const int pred_context =
- (frf_count == brf_count) ? 1 : ((frf_count < brf_count) ? 0 : 2);
-
- assert(pred_context >= 0 && pred_context < UNI_COMP_REF_CONTEXTS);
- return pred_context;
-}
-
-// Returns a context number for the given MB prediction signal
-//
-// Signal the uni-directional compound reference frame pair as
-// either (LAST, LAST2), or (LAST, LAST3) / (LAST, GOLDEN),
-// conditioning on the pair is known as one of the above three.
-//
-// 3 contexts: Voting is used to compare the count of LAST2_FRAME with the
-// total count of LAST3/GOLDEN from the spatial neighbors.
-int av1_get_pred_context_uni_comp_ref_p1(const MACROBLOCKD *xd) {
- const uint8_t *const ref_counts = &xd->neighbors_ref_counts[0];
-
- // Count of LAST2
- const int last2_count = ref_counts[LAST2_FRAME];
- // Count of LAST3 or GOLDEN
- const int last3_or_gld_count =
- ref_counts[LAST3_FRAME] + ref_counts[GOLDEN_FRAME];
-
- const int pred_context = (last2_count == last3_or_gld_count)
- ? 1
- : ((last2_count < last3_or_gld_count) ? 0 : 2);
-
- assert(pred_context >= 0 && pred_context < UNI_COMP_REF_CONTEXTS);
- return pred_context;
-}
-
-// Returns a context number for the given MB prediction signal
-//
-// Signal the uni-directional compound reference frame pair as
-// either (LAST, LAST3) or (LAST, GOLDEN),
-// conditioning on the pair is known as one of the above two.
-//
-// 3 contexts: Voting is used to compare the count of LAST3_FRAME with the
-// total count of GOLDEN_FRAME from the spatial neighbors.
-int av1_get_pred_context_uni_comp_ref_p2(const MACROBLOCKD *xd) {
- const uint8_t *const ref_counts = &xd->neighbors_ref_counts[0];
-
- // Count of LAST3
- const int last3_count = ref_counts[LAST3_FRAME];
- // Count of GOLDEN
- const int gld_count = ref_counts[GOLDEN_FRAME];
-
- const int pred_context =
- (last3_count == gld_count) ? 1 : ((last3_count < gld_count) ? 0 : 2);
-
- assert(pred_context >= 0 && pred_context < UNI_COMP_REF_CONTEXTS);
- return pred_context;
-}
-
-// == Common context functions for both comp and single ref ==
-//
-// Obtain contexts to signal a reference frame to be either LAST/LAST2 or
-// LAST3/GOLDEN.
-static int get_pred_context_ll2_or_l3gld(const MACROBLOCKD *xd) {
- const uint8_t *const ref_counts = &xd->neighbors_ref_counts[0];
-
- // Count of LAST + LAST2
- const int last_last2_count = ref_counts[LAST_FRAME] + ref_counts[LAST2_FRAME];
- // Count of LAST3 + GOLDEN
- const int last3_gld_count =
- ref_counts[LAST3_FRAME] + ref_counts[GOLDEN_FRAME];
-
- const int pred_context = (last_last2_count == last3_gld_count)
- ? 1
- : ((last_last2_count < last3_gld_count) ? 0 : 2);
-
- assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
- return pred_context;
-}
-
-// Obtain contexts to signal a reference frame to be either LAST or LAST2.
-static int get_pred_context_last_or_last2(const MACROBLOCKD *xd) {
- const uint8_t *const ref_counts = &xd->neighbors_ref_counts[0];
-
- // Count of LAST
- const int last_count = ref_counts[LAST_FRAME];
- // Count of LAST2
- const int last2_count = ref_counts[LAST2_FRAME];
-
- const int pred_context =
- (last_count == last2_count) ? 1 : ((last_count < last2_count) ? 0 : 2);
-
- assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
- return pred_context;
-}
-
-// Obtain contexts to signal a reference frame to be either LAST3 or GOLDEN.
-static int get_pred_context_last3_or_gld(const MACROBLOCKD *xd) {
- const uint8_t *const ref_counts = &xd->neighbors_ref_counts[0];
-
- // Count of LAST3
- const int last3_count = ref_counts[LAST3_FRAME];
- // Count of GOLDEN
- const int gld_count = ref_counts[GOLDEN_FRAME];
-
- const int pred_context =
- (last3_count == gld_count) ? 1 : ((last3_count < gld_count) ? 0 : 2);
-
- assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
- return pred_context;
-}
-
-// Obtain contexts to signal a reference frame be either BWDREF/ALTREF2, or
-// ALTREF.
-static int get_pred_context_brfarf2_or_arf(const MACROBLOCKD *xd) {
- const uint8_t *const ref_counts = &xd->neighbors_ref_counts[0];
-
- // Counts of BWDREF, ALTREF2, or ALTREF frames (B, A2, or A)
- const int brfarf2_count =
- ref_counts[BWDREF_FRAME] + ref_counts[ALTREF2_FRAME];
- const int arf_count = ref_counts[ALTREF_FRAME];
-
- const int pred_context =
- (brfarf2_count == arf_count) ? 1 : ((brfarf2_count < arf_count) ? 0 : 2);
-
- assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
- return pred_context;
-}
-
-// Obtain contexts to signal a reference frame be either BWDREF or ALTREF2.
-static int get_pred_context_brf_or_arf2(const MACROBLOCKD *xd) {
- const uint8_t *const ref_counts = &xd->neighbors_ref_counts[0];
-
- // Count of BWDREF frames (B)
- const int brf_count = ref_counts[BWDREF_FRAME];
- // Count of ALTREF2 frames (A2)
- const int arf2_count = ref_counts[ALTREF2_FRAME];
-
- const int pred_context =
- (brf_count == arf2_count) ? 1 : ((brf_count < arf2_count) ? 0 : 2);
-
- assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
- return pred_context;
-}
-
-// == Context functions for comp ref ==
-//
-// Returns a context number for the given MB prediction signal
-// Signal the first reference frame for a compound mode be either
-// GOLDEN/LAST3, or LAST/LAST2.
-int av1_get_pred_context_comp_ref_p(const MACROBLOCKD *xd) {
- return get_pred_context_ll2_or_l3gld(xd);
-}
-
-// Returns a context number for the given MB prediction signal
-// Signal the first reference frame for a compound mode be LAST,
-// conditioning on that it is known either LAST/LAST2.
-int av1_get_pred_context_comp_ref_p1(const MACROBLOCKD *xd) {
- return get_pred_context_last_or_last2(xd);
-}
-
-// Returns a context number for the given MB prediction signal
-// Signal the first reference frame for a compound mode be GOLDEN,
-// conditioning on that it is known either GOLDEN or LAST3.
-int av1_get_pred_context_comp_ref_p2(const MACROBLOCKD *xd) {
- return get_pred_context_last3_or_gld(xd);
-}
-
-// Signal the 2nd reference frame for a compound mode be either
-// ALTREF, or ALTREF2/BWDREF.
-int av1_get_pred_context_comp_bwdref_p(const MACROBLOCKD *xd) {
- return get_pred_context_brfarf2_or_arf(xd);
-}
-
-// Signal the 2nd reference frame for a compound mode be either
-// ALTREF2 or BWDREF.
-int av1_get_pred_context_comp_bwdref_p1(const MACROBLOCKD *xd) {
- return get_pred_context_brf_or_arf2(xd);
-}
-
-// == Context functions for single ref ==
-//
-// For the bit to signal whether the single reference is a forward reference
-// frame or a backward reference frame.
-int av1_get_pred_context_single_ref_p1(const MACROBLOCKD *xd) {
- const uint8_t *const ref_counts = &xd->neighbors_ref_counts[0];
-
- // Count of forward reference frames
- const int fwd_count = ref_counts[LAST_FRAME] + ref_counts[LAST2_FRAME] +
- ref_counts[LAST3_FRAME] + ref_counts[GOLDEN_FRAME];
- // Count of backward reference frames
- const int bwd_count = ref_counts[BWDREF_FRAME] + ref_counts[ALTREF2_FRAME] +
- ref_counts[ALTREF_FRAME];
-
- const int pred_context =
- (fwd_count == bwd_count) ? 1 : ((fwd_count < bwd_count) ? 0 : 2);
-
- assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
- return pred_context;
-}
-
-// For the bit to signal whether the single reference is ALTREF_FRAME or
-// non-ALTREF backward reference frame, knowing that it shall be either of
-// these 2 choices.
-int av1_get_pred_context_single_ref_p2(const MACROBLOCKD *xd) {
- return get_pred_context_brfarf2_or_arf(xd);
-}
-
-// For the bit to signal whether the single reference is LAST3/GOLDEN or
-// LAST2/LAST, knowing that it shall be either of these 2 choices.
-int av1_get_pred_context_single_ref_p3(const MACROBLOCKD *xd) {
- return get_pred_context_ll2_or_l3gld(xd);
-}
-
-// For the bit to signal whether the single reference is LAST2_FRAME or
-// LAST_FRAME, knowing that it shall be either of these 2 choices.
-int av1_get_pred_context_single_ref_p4(const MACROBLOCKD *xd) {
- return get_pred_context_last_or_last2(xd);
-}
-
-// For the bit to signal whether the single reference is GOLDEN_FRAME or
-// LAST3_FRAME, knowing that it shall be either of these 2 choices.
-int av1_get_pred_context_single_ref_p5(const MACROBLOCKD *xd) {
- return get_pred_context_last3_or_gld(xd);
-}
-
-// For the bit to signal whether the single reference is ALTREF2_FRAME or
-// BWDREF_FRAME, knowing that it shall be either of these 2 choices.
-int av1_get_pred_context_single_ref_p6(const MACROBLOCKD *xd) {
- return get_pred_context_brf_or_arf2(xd);
-}
diff --git a/third_party/aom/av1/common/pred_common.h b/third_party/aom/av1/common/pred_common.h
deleted file mode 100644
index 6dba2322d..000000000
--- a/third_party/aom/av1/common/pred_common.h
+++ /dev/null
@@ -1,360 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_COMMON_PRED_COMMON_H_
-#define AOM_AV1_COMMON_PRED_COMMON_H_
-
-#include "av1/common/blockd.h"
-#include "av1/common/mvref_common.h"
-#include "av1/common/onyxc_int.h"
-#include "aom_dsp/aom_dsp_common.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-static INLINE int get_segment_id(const AV1_COMMON *const cm,
- const uint8_t *segment_ids, BLOCK_SIZE bsize,
- int mi_row, int mi_col) {
- const int mi_offset = mi_row * cm->mi_cols + mi_col;
- const int bw = mi_size_wide[bsize];
- const int bh = mi_size_high[bsize];
- const int xmis = AOMMIN(cm->mi_cols - mi_col, bw);
- const int ymis = AOMMIN(cm->mi_rows - mi_row, bh);
- int x, y, segment_id = MAX_SEGMENTS;
-
- for (y = 0; y < ymis; ++y)
- for (x = 0; x < xmis; ++x)
- segment_id =
- AOMMIN(segment_id, segment_ids[mi_offset + y * cm->mi_cols + x]);
-
- assert(segment_id >= 0 && segment_id < MAX_SEGMENTS);
- return segment_id;
-}
-
-static INLINE int av1_get_spatial_seg_pred(const AV1_COMMON *const cm,
- const MACROBLOCKD *const xd,
- int mi_row, int mi_col,
- int *cdf_index) {
- int prev_ul = -1; // top left segment_id
- int prev_l = -1; // left segment_id
- int prev_u = -1; // top segment_id
- if ((xd->up_available) && (xd->left_available)) {
- prev_ul = get_segment_id(cm, cm->current_frame_seg_map, BLOCK_4X4,
- mi_row - 1, mi_col - 1);
- }
- if (xd->up_available) {
- prev_u = get_segment_id(cm, cm->current_frame_seg_map, BLOCK_4X4,
- mi_row - 1, mi_col - 0);
- }
- if (xd->left_available) {
- prev_l = get_segment_id(cm, cm->current_frame_seg_map, BLOCK_4X4,
- mi_row - 0, mi_col - 1);
- }
-
- // Pick CDF index based on number of matching/out-of-bounds segment IDs.
- if (prev_ul < 0 || prev_u < 0 || prev_l < 0) /* Edge case */
- *cdf_index = 0;
- else if ((prev_ul == prev_u) && (prev_ul == prev_l))
- *cdf_index = 2;
- else if ((prev_ul == prev_u) || (prev_ul == prev_l) || (prev_u == prev_l))
- *cdf_index = 1;
- else
- *cdf_index = 0;
-
- // If 2 or more are identical returns that as predictor, otherwise prev_l.
- if (prev_u == -1) // edge case
- return prev_l == -1 ? 0 : prev_l;
- if (prev_l == -1) // edge case
- return prev_u;
- return (prev_ul == prev_u) ? prev_u : prev_l;
-}
-
-static INLINE int av1_get_pred_context_seg_id(const MACROBLOCKD *xd) {
- const MB_MODE_INFO *const above_mi = xd->above_mbmi;
- const MB_MODE_INFO *const left_mi = xd->left_mbmi;
- const int above_sip = (above_mi != NULL) ? above_mi->seg_id_predicted : 0;
- const int left_sip = (left_mi != NULL) ? left_mi->seg_id_predicted : 0;
-
- return above_sip + left_sip;
-}
-
-static INLINE int get_comp_index_context(const AV1_COMMON *cm,
- const MACROBLOCKD *xd) {
- MB_MODE_INFO *mbmi = xd->mi[0];
- int bck_idx = cm->frame_refs[mbmi->ref_frame[0] - LAST_FRAME].idx;
- int fwd_idx = cm->frame_refs[mbmi->ref_frame[1] - LAST_FRAME].idx;
- int bck_frame_index = 0, fwd_frame_index = 0;
- int cur_frame_index = cm->cur_frame->cur_frame_offset;
-
- if (bck_idx >= 0)
- bck_frame_index = cm->buffer_pool->frame_bufs[bck_idx].cur_frame_offset;
-
- if (fwd_idx >= 0)
- fwd_frame_index = cm->buffer_pool->frame_bufs[fwd_idx].cur_frame_offset;
- int fwd = abs(get_relative_dist(cm, fwd_frame_index, cur_frame_index));
- int bck = abs(get_relative_dist(cm, cur_frame_index, bck_frame_index));
-
- const MB_MODE_INFO *const above_mi = xd->above_mbmi;
- const MB_MODE_INFO *const left_mi = xd->left_mbmi;
-
- int above_ctx = 0, left_ctx = 0;
- const int offset = (fwd == bck);
-
- if (above_mi) {
- if (has_second_ref(above_mi))
- above_ctx = above_mi->compound_idx;
- else if (above_mi->ref_frame[0] == ALTREF_FRAME)
- above_ctx = 1;
- }
-
- if (left_mi) {
- if (has_second_ref(left_mi))
- left_ctx = left_mi->compound_idx;
- else if (left_mi->ref_frame[0] == ALTREF_FRAME)
- left_ctx = 1;
- }
-
- return above_ctx + left_ctx + 3 * offset;
-}
-
-static INLINE int get_comp_group_idx_context(const MACROBLOCKD *xd) {
- const MB_MODE_INFO *const above_mi = xd->above_mbmi;
- const MB_MODE_INFO *const left_mi = xd->left_mbmi;
- int above_ctx = 0, left_ctx = 0;
-
- if (above_mi) {
- if (has_second_ref(above_mi))
- above_ctx = above_mi->comp_group_idx;
- else if (above_mi->ref_frame[0] == ALTREF_FRAME)
- above_ctx = 3;
- }
- if (left_mi) {
- if (has_second_ref(left_mi))
- left_ctx = left_mi->comp_group_idx;
- else if (left_mi->ref_frame[0] == ALTREF_FRAME)
- left_ctx = 3;
- }
-
- return AOMMIN(5, above_ctx + left_ctx);
-}
-
-static INLINE aom_cdf_prob *av1_get_pred_cdf_seg_id(
- struct segmentation_probs *segp, const MACROBLOCKD *xd) {
- return segp->pred_cdf[av1_get_pred_context_seg_id(xd)];
-}
-
-static INLINE int av1_get_skip_mode_context(const MACROBLOCKD *xd) {
- const MB_MODE_INFO *const above_mi = xd->above_mbmi;
- const MB_MODE_INFO *const left_mi = xd->left_mbmi;
- const int above_skip_mode = above_mi ? above_mi->skip_mode : 0;
- const int left_skip_mode = left_mi ? left_mi->skip_mode : 0;
- return above_skip_mode + left_skip_mode;
-}
-
-static INLINE int av1_get_skip_context(const MACROBLOCKD *xd) {
- const MB_MODE_INFO *const above_mi = xd->above_mbmi;
- const MB_MODE_INFO *const left_mi = xd->left_mbmi;
- const int above_skip = above_mi ? above_mi->skip : 0;
- const int left_skip = left_mi ? left_mi->skip : 0;
- return above_skip + left_skip;
-}
-
-int av1_get_pred_context_switchable_interp(const MACROBLOCKD *xd, int dir);
-
-// Get a list of palette base colors that are used in the above and left blocks,
-// referred to as "color cache". The return value is the number of colors in the
-// cache (<= 2 * PALETTE_MAX_SIZE). The color values are stored in "cache"
-// in ascending order.
-int av1_get_palette_cache(const MACROBLOCKD *const xd, int plane,
- uint16_t *cache);
-
-static INLINE int av1_get_palette_bsize_ctx(BLOCK_SIZE bsize) {
- return num_pels_log2_lookup[bsize] - num_pels_log2_lookup[BLOCK_8X8];
-}
-
-static INLINE int av1_get_palette_mode_ctx(const MACROBLOCKD *xd) {
- const MB_MODE_INFO *const above_mi = xd->above_mbmi;
- const MB_MODE_INFO *const left_mi = xd->left_mbmi;
- int ctx = 0;
- if (above_mi) ctx += (above_mi->palette_mode_info.palette_size[0] > 0);
- if (left_mi) ctx += (left_mi->palette_mode_info.palette_size[0] > 0);
- return ctx;
-}
-
-int av1_get_intra_inter_context(const MACROBLOCKD *xd);
-
-int av1_get_reference_mode_context(const MACROBLOCKD *xd);
-
-static INLINE aom_cdf_prob *av1_get_reference_mode_cdf(const MACROBLOCKD *xd) {
- return xd->tile_ctx->comp_inter_cdf[av1_get_reference_mode_context(xd)];
-}
-
-int av1_get_comp_reference_type_context(const MACROBLOCKD *xd);
-
-// == Uni-directional contexts ==
-
-int av1_get_pred_context_uni_comp_ref_p(const MACROBLOCKD *xd);
-
-int av1_get_pred_context_uni_comp_ref_p1(const MACROBLOCKD *xd);
-
-int av1_get_pred_context_uni_comp_ref_p2(const MACROBLOCKD *xd);
-
-static INLINE aom_cdf_prob *av1_get_comp_reference_type_cdf(
- const MACROBLOCKD *xd) {
- const int pred_context = av1_get_comp_reference_type_context(xd);
- return xd->tile_ctx->comp_ref_type_cdf[pred_context];
-}
-
-static INLINE aom_cdf_prob *av1_get_pred_cdf_uni_comp_ref_p(
- const MACROBLOCKD *xd) {
- const int pred_context = av1_get_pred_context_uni_comp_ref_p(xd);
- return xd->tile_ctx->uni_comp_ref_cdf[pred_context][0];
-}
-
-static INLINE aom_cdf_prob *av1_get_pred_cdf_uni_comp_ref_p1(
- const MACROBLOCKD *xd) {
- const int pred_context = av1_get_pred_context_uni_comp_ref_p1(xd);
- return xd->tile_ctx->uni_comp_ref_cdf[pred_context][1];
-}
-
-static INLINE aom_cdf_prob *av1_get_pred_cdf_uni_comp_ref_p2(
- const MACROBLOCKD *xd) {
- const int pred_context = av1_get_pred_context_uni_comp_ref_p2(xd);
- return xd->tile_ctx->uni_comp_ref_cdf[pred_context][2];
-}
-
-// == Bi-directional contexts ==
-
-int av1_get_pred_context_comp_ref_p(const MACROBLOCKD *xd);
-
-int av1_get_pred_context_comp_ref_p1(const MACROBLOCKD *xd);
-
-int av1_get_pred_context_comp_ref_p2(const MACROBLOCKD *xd);
-
-int av1_get_pred_context_comp_bwdref_p(const MACROBLOCKD *xd);
-
-int av1_get_pred_context_comp_bwdref_p1(const MACROBLOCKD *xd);
-
-static INLINE aom_cdf_prob *av1_get_pred_cdf_comp_ref_p(const MACROBLOCKD *xd) {
- const int pred_context = av1_get_pred_context_comp_ref_p(xd);
- return xd->tile_ctx->comp_ref_cdf[pred_context][0];
-}
-
-static INLINE aom_cdf_prob *av1_get_pred_cdf_comp_ref_p1(
- const MACROBLOCKD *xd) {
- const int pred_context = av1_get_pred_context_comp_ref_p1(xd);
- return xd->tile_ctx->comp_ref_cdf[pred_context][1];
-}
-
-static INLINE aom_cdf_prob *av1_get_pred_cdf_comp_ref_p2(
- const MACROBLOCKD *xd) {
- const int pred_context = av1_get_pred_context_comp_ref_p2(xd);
- return xd->tile_ctx->comp_ref_cdf[pred_context][2];
-}
-
-static INLINE aom_cdf_prob *av1_get_pred_cdf_comp_bwdref_p(
- const MACROBLOCKD *xd) {
- const int pred_context = av1_get_pred_context_comp_bwdref_p(xd);
- return xd->tile_ctx->comp_bwdref_cdf[pred_context][0];
-}
-
-static INLINE aom_cdf_prob *av1_get_pred_cdf_comp_bwdref_p1(
- const MACROBLOCKD *xd) {
- const int pred_context = av1_get_pred_context_comp_bwdref_p1(xd);
- return xd->tile_ctx->comp_bwdref_cdf[pred_context][1];
-}
-
-// == Single contexts ==
-
-int av1_get_pred_context_single_ref_p1(const MACROBLOCKD *xd);
-
-int av1_get_pred_context_single_ref_p2(const MACROBLOCKD *xd);
-
-int av1_get_pred_context_single_ref_p3(const MACROBLOCKD *xd);
-
-int av1_get_pred_context_single_ref_p4(const MACROBLOCKD *xd);
-
-int av1_get_pred_context_single_ref_p5(const MACROBLOCKD *xd);
-
-int av1_get_pred_context_single_ref_p6(const MACROBLOCKD *xd);
-
-static INLINE aom_cdf_prob *av1_get_pred_cdf_single_ref_p1(
- const MACROBLOCKD *xd) {
- return xd->tile_ctx
- ->single_ref_cdf[av1_get_pred_context_single_ref_p1(xd)][0];
-}
-static INLINE aom_cdf_prob *av1_get_pred_cdf_single_ref_p2(
- const MACROBLOCKD *xd) {
- return xd->tile_ctx
- ->single_ref_cdf[av1_get_pred_context_single_ref_p2(xd)][1];
-}
-static INLINE aom_cdf_prob *av1_get_pred_cdf_single_ref_p3(
- const MACROBLOCKD *xd) {
- return xd->tile_ctx
- ->single_ref_cdf[av1_get_pred_context_single_ref_p3(xd)][2];
-}
-static INLINE aom_cdf_prob *av1_get_pred_cdf_single_ref_p4(
- const MACROBLOCKD *xd) {
- return xd->tile_ctx
- ->single_ref_cdf[av1_get_pred_context_single_ref_p4(xd)][3];
-}
-static INLINE aom_cdf_prob *av1_get_pred_cdf_single_ref_p5(
- const MACROBLOCKD *xd) {
- return xd->tile_ctx
- ->single_ref_cdf[av1_get_pred_context_single_ref_p5(xd)][4];
-}
-static INLINE aom_cdf_prob *av1_get_pred_cdf_single_ref_p6(
- const MACROBLOCKD *xd) {
- return xd->tile_ctx
- ->single_ref_cdf[av1_get_pred_context_single_ref_p6(xd)][5];
-}
-
-// Returns a context number for the given MB prediction signal
-// The mode info data structure has a one element border above and to the
-// left of the entries corresponding to real blocks.
-// The prediction flags in these dummy entries are initialized to 0.
-static INLINE int get_tx_size_context(const MACROBLOCKD *xd) {
- const MB_MODE_INFO *mbmi = xd->mi[0];
- const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
- const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
- const TX_SIZE max_tx_size = max_txsize_rect_lookup[mbmi->sb_type];
- const int max_tx_wide = tx_size_wide[max_tx_size];
- const int max_tx_high = tx_size_high[max_tx_size];
- const int has_above = xd->up_available;
- const int has_left = xd->left_available;
-
- int above = xd->above_txfm_context[0] >= max_tx_wide;
- int left = xd->left_txfm_context[0] >= max_tx_high;
-
- if (has_above)
- if (is_inter_block(above_mbmi))
- above = block_size_wide[above_mbmi->sb_type] >= max_tx_wide;
-
- if (has_left)
- if (is_inter_block(left_mbmi))
- left = block_size_high[left_mbmi->sb_type] >= max_tx_high;
-
- if (has_above && has_left)
- return (above + left);
- else if (has_above)
- return above;
- else if (has_left)
- return left;
- else
- return 0;
-}
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_COMMON_PRED_COMMON_H_
diff --git a/third_party/aom/av1/common/quant_common.c b/third_party/aom/av1/common/quant_common.c
deleted file mode 100644
index 0e14da7a3..000000000
--- a/third_party/aom/av1/common/quant_common.c
+++ /dev/null
@@ -1,13676 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "av1/common/common.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/common/entropy.h"
-#include "av1/common/quant_common.h"
-#include "av1/common/seg_common.h"
-#include "av1/common/blockd.h"
-
-static const int16_t dc_qlookup_Q3[QINDEX_RANGE] = {
- 4, 8, 8, 9, 10, 11, 12, 12, 13, 14, 15, 16, 17, 18,
- 19, 19, 20, 21, 22, 23, 24, 25, 26, 26, 27, 28, 29, 30,
- 31, 32, 32, 33, 34, 35, 36, 37, 38, 38, 39, 40, 41, 42,
- 43, 43, 44, 45, 46, 47, 48, 48, 49, 50, 51, 52, 53, 53,
- 54, 55, 56, 57, 57, 58, 59, 60, 61, 62, 62, 63, 64, 65,
- 66, 66, 67, 68, 69, 70, 70, 71, 72, 73, 74, 74, 75, 76,
- 77, 78, 78, 79, 80, 81, 81, 82, 83, 84, 85, 85, 87, 88,
- 90, 92, 93, 95, 96, 98, 99, 101, 102, 104, 105, 107, 108, 110,
- 111, 113, 114, 116, 117, 118, 120, 121, 123, 125, 127, 129, 131, 134,
- 136, 138, 140, 142, 144, 146, 148, 150, 152, 154, 156, 158, 161, 164,
- 166, 169, 172, 174, 177, 180, 182, 185, 187, 190, 192, 195, 199, 202,
- 205, 208, 211, 214, 217, 220, 223, 226, 230, 233, 237, 240, 243, 247,
- 250, 253, 257, 261, 265, 269, 272, 276, 280, 284, 288, 292, 296, 300,
- 304, 309, 313, 317, 322, 326, 330, 335, 340, 344, 349, 354, 359, 364,
- 369, 374, 379, 384, 389, 395, 400, 406, 411, 417, 423, 429, 435, 441,
- 447, 454, 461, 467, 475, 482, 489, 497, 505, 513, 522, 530, 539, 549,
- 559, 569, 579, 590, 602, 614, 626, 640, 654, 668, 684, 700, 717, 736,
- 755, 775, 796, 819, 843, 869, 896, 925, 955, 988, 1022, 1058, 1098, 1139,
- 1184, 1232, 1282, 1336,
-};
-
-static const int16_t dc_qlookup_10_Q3[QINDEX_RANGE] = {
- 4, 9, 10, 13, 15, 17, 20, 22, 25, 28, 31, 34, 37,
- 40, 43, 47, 50, 53, 57, 60, 64, 68, 71, 75, 78, 82,
- 86, 90, 93, 97, 101, 105, 109, 113, 116, 120, 124, 128, 132,
- 136, 140, 143, 147, 151, 155, 159, 163, 166, 170, 174, 178, 182,
- 185, 189, 193, 197, 200, 204, 208, 212, 215, 219, 223, 226, 230,
- 233, 237, 241, 244, 248, 251, 255, 259, 262, 266, 269, 273, 276,
- 280, 283, 287, 290, 293, 297, 300, 304, 307, 310, 314, 317, 321,
- 324, 327, 331, 334, 337, 343, 350, 356, 362, 369, 375, 381, 387,
- 394, 400, 406, 412, 418, 424, 430, 436, 442, 448, 454, 460, 466,
- 472, 478, 484, 490, 499, 507, 516, 525, 533, 542, 550, 559, 567,
- 576, 584, 592, 601, 609, 617, 625, 634, 644, 655, 666, 676, 687,
- 698, 708, 718, 729, 739, 749, 759, 770, 782, 795, 807, 819, 831,
- 844, 856, 868, 880, 891, 906, 920, 933, 947, 961, 975, 988, 1001,
- 1015, 1030, 1045, 1061, 1076, 1090, 1105, 1120, 1137, 1153, 1170, 1186, 1202,
- 1218, 1236, 1253, 1271, 1288, 1306, 1323, 1342, 1361, 1379, 1398, 1416, 1436,
- 1456, 1476, 1496, 1516, 1537, 1559, 1580, 1601, 1624, 1647, 1670, 1692, 1717,
- 1741, 1766, 1791, 1817, 1844, 1871, 1900, 1929, 1958, 1990, 2021, 2054, 2088,
- 2123, 2159, 2197, 2236, 2276, 2319, 2363, 2410, 2458, 2508, 2561, 2616, 2675,
- 2737, 2802, 2871, 2944, 3020, 3102, 3188, 3280, 3375, 3478, 3586, 3702, 3823,
- 3953, 4089, 4236, 4394, 4559, 4737, 4929, 5130, 5347,
-};
-
-static const int16_t dc_qlookup_12_Q3[QINDEX_RANGE] = {
- 4, 12, 18, 25, 33, 41, 50, 60, 70, 80, 91,
- 103, 115, 127, 140, 153, 166, 180, 194, 208, 222, 237,
- 251, 266, 281, 296, 312, 327, 343, 358, 374, 390, 405,
- 421, 437, 453, 469, 484, 500, 516, 532, 548, 564, 580,
- 596, 611, 627, 643, 659, 674, 690, 706, 721, 737, 752,
- 768, 783, 798, 814, 829, 844, 859, 874, 889, 904, 919,
- 934, 949, 964, 978, 993, 1008, 1022, 1037, 1051, 1065, 1080,
- 1094, 1108, 1122, 1136, 1151, 1165, 1179, 1192, 1206, 1220, 1234,
- 1248, 1261, 1275, 1288, 1302, 1315, 1329, 1342, 1368, 1393, 1419,
- 1444, 1469, 1494, 1519, 1544, 1569, 1594, 1618, 1643, 1668, 1692,
- 1717, 1741, 1765, 1789, 1814, 1838, 1862, 1885, 1909, 1933, 1957,
- 1992, 2027, 2061, 2096, 2130, 2165, 2199, 2233, 2267, 2300, 2334,
- 2367, 2400, 2434, 2467, 2499, 2532, 2575, 2618, 2661, 2704, 2746,
- 2788, 2830, 2872, 2913, 2954, 2995, 3036, 3076, 3127, 3177, 3226,
- 3275, 3324, 3373, 3421, 3469, 3517, 3565, 3621, 3677, 3733, 3788,
- 3843, 3897, 3951, 4005, 4058, 4119, 4181, 4241, 4301, 4361, 4420,
- 4479, 4546, 4612, 4677, 4742, 4807, 4871, 4942, 5013, 5083, 5153,
- 5222, 5291, 5367, 5442, 5517, 5591, 5665, 5745, 5825, 5905, 5984,
- 6063, 6149, 6234, 6319, 6404, 6495, 6587, 6678, 6769, 6867, 6966,
- 7064, 7163, 7269, 7376, 7483, 7599, 7715, 7832, 7958, 8085, 8214,
- 8352, 8492, 8635, 8788, 8945, 9104, 9275, 9450, 9639, 9832, 10031,
- 10245, 10465, 10702, 10946, 11210, 11482, 11776, 12081, 12409, 12750, 13118,
- 13501, 13913, 14343, 14807, 15290, 15812, 16356, 16943, 17575, 18237, 18949,
- 19718, 20521, 21387,
-};
-
-static const int16_t ac_qlookup_Q3[QINDEX_RANGE] = {
- 4, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
- 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
- 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
- 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
- 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71,
- 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
- 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97,
- 98, 99, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
- 120, 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 142, 144,
- 146, 148, 150, 152, 155, 158, 161, 164, 167, 170, 173, 176, 179,
- 182, 185, 188, 191, 194, 197, 200, 203, 207, 211, 215, 219, 223,
- 227, 231, 235, 239, 243, 247, 251, 255, 260, 265, 270, 275, 280,
- 285, 290, 295, 300, 305, 311, 317, 323, 329, 335, 341, 347, 353,
- 359, 366, 373, 380, 387, 394, 401, 408, 416, 424, 432, 440, 448,
- 456, 465, 474, 483, 492, 501, 510, 520, 530, 540, 550, 560, 571,
- 582, 593, 604, 615, 627, 639, 651, 663, 676, 689, 702, 715, 729,
- 743, 757, 771, 786, 801, 816, 832, 848, 864, 881, 898, 915, 933,
- 951, 969, 988, 1007, 1026, 1046, 1066, 1087, 1108, 1129, 1151, 1173, 1196,
- 1219, 1243, 1267, 1292, 1317, 1343, 1369, 1396, 1423, 1451, 1479, 1508, 1537,
- 1567, 1597, 1628, 1660, 1692, 1725, 1759, 1793, 1828,
-};
-
-static const int16_t ac_qlookup_10_Q3[QINDEX_RANGE] = {
- 4, 9, 11, 13, 16, 18, 21, 24, 27, 30, 33, 37, 40,
- 44, 48, 51, 55, 59, 63, 67, 71, 75, 79, 83, 88, 92,
- 96, 100, 105, 109, 114, 118, 122, 127, 131, 136, 140, 145, 149,
- 154, 158, 163, 168, 172, 177, 181, 186, 190, 195, 199, 204, 208,
- 213, 217, 222, 226, 231, 235, 240, 244, 249, 253, 258, 262, 267,
- 271, 275, 280, 284, 289, 293, 297, 302, 306, 311, 315, 319, 324,
- 328, 332, 337, 341, 345, 349, 354, 358, 362, 367, 371, 375, 379,
- 384, 388, 392, 396, 401, 409, 417, 425, 433, 441, 449, 458, 466,
- 474, 482, 490, 498, 506, 514, 523, 531, 539, 547, 555, 563, 571,
- 579, 588, 596, 604, 616, 628, 640, 652, 664, 676, 688, 700, 713,
- 725, 737, 749, 761, 773, 785, 797, 809, 825, 841, 857, 873, 889,
- 905, 922, 938, 954, 970, 986, 1002, 1018, 1038, 1058, 1078, 1098, 1118,
- 1138, 1158, 1178, 1198, 1218, 1242, 1266, 1290, 1314, 1338, 1362, 1386, 1411,
- 1435, 1463, 1491, 1519, 1547, 1575, 1603, 1631, 1663, 1695, 1727, 1759, 1791,
- 1823, 1859, 1895, 1931, 1967, 2003, 2039, 2079, 2119, 2159, 2199, 2239, 2283,
- 2327, 2371, 2415, 2459, 2507, 2555, 2603, 2651, 2703, 2755, 2807, 2859, 2915,
- 2971, 3027, 3083, 3143, 3203, 3263, 3327, 3391, 3455, 3523, 3591, 3659, 3731,
- 3803, 3876, 3952, 4028, 4104, 4184, 4264, 4348, 4432, 4516, 4604, 4692, 4784,
- 4876, 4972, 5068, 5168, 5268, 5372, 5476, 5584, 5692, 5804, 5916, 6032, 6148,
- 6268, 6388, 6512, 6640, 6768, 6900, 7036, 7172, 7312,
-};
-
-static const int16_t ac_qlookup_12_Q3[QINDEX_RANGE] = {
- 4, 13, 19, 27, 35, 44, 54, 64, 75, 87, 99,
- 112, 126, 139, 154, 168, 183, 199, 214, 230, 247, 263,
- 280, 297, 314, 331, 349, 366, 384, 402, 420, 438, 456,
- 475, 493, 511, 530, 548, 567, 586, 604, 623, 642, 660,
- 679, 698, 716, 735, 753, 772, 791, 809, 828, 846, 865,
- 884, 902, 920, 939, 957, 976, 994, 1012, 1030, 1049, 1067,
- 1085, 1103, 1121, 1139, 1157, 1175, 1193, 1211, 1229, 1246, 1264,
- 1282, 1299, 1317, 1335, 1352, 1370, 1387, 1405, 1422, 1440, 1457,
- 1474, 1491, 1509, 1526, 1543, 1560, 1577, 1595, 1627, 1660, 1693,
- 1725, 1758, 1791, 1824, 1856, 1889, 1922, 1954, 1987, 2020, 2052,
- 2085, 2118, 2150, 2183, 2216, 2248, 2281, 2313, 2346, 2378, 2411,
- 2459, 2508, 2556, 2605, 2653, 2701, 2750, 2798, 2847, 2895, 2943,
- 2992, 3040, 3088, 3137, 3185, 3234, 3298, 3362, 3426, 3491, 3555,
- 3619, 3684, 3748, 3812, 3876, 3941, 4005, 4069, 4149, 4230, 4310,
- 4390, 4470, 4550, 4631, 4711, 4791, 4871, 4967, 5064, 5160, 5256,
- 5352, 5448, 5544, 5641, 5737, 5849, 5961, 6073, 6185, 6297, 6410,
- 6522, 6650, 6778, 6906, 7034, 7162, 7290, 7435, 7579, 7723, 7867,
- 8011, 8155, 8315, 8475, 8635, 8795, 8956, 9132, 9308, 9484, 9660,
- 9836, 10028, 10220, 10412, 10604, 10812, 11020, 11228, 11437, 11661, 11885,
- 12109, 12333, 12573, 12813, 13053, 13309, 13565, 13821, 14093, 14365, 14637,
- 14925, 15213, 15502, 15806, 16110, 16414, 16734, 17054, 17390, 17726, 18062,
- 18414, 18766, 19134, 19502, 19886, 20270, 20670, 21070, 21486, 21902, 22334,
- 22766, 23214, 23662, 24126, 24590, 25070, 25551, 26047, 26559, 27071, 27599,
- 28143, 28687, 29247,
-};
-
-// Coefficient scaling and quantization with AV1 TX are tailored to
-// the AV1 TX transforms. Regardless of the bit-depth of the input,
-// the transform stages scale the coefficient values up by a factor of
-// 8 (3 bits) over the scale of the pixel values. Thus, for 8-bit
-// input, the coefficients have effectively 11 bits of scale depth
-// (8+3), 10-bit input pixels result in 13-bit coefficient depth
-// (10+3) and 12-bit pixels yield 15-bit (12+3) coefficient depth.
-// All quantizers are built using this invariant of x8, 3-bit scaling,
-// thus the Q3 suffix.
-
-// A partial exception to this rule is large transforms; to avoid
-// overflow, TX blocks with > 256 pels (>16x16) are scaled only
-// 4-times unity (2 bits) over the pixel depth, and TX blocks with
-// over 1024 pixels (>32x32) are scaled up only 2x unity (1 bit).
-// This descaling is found via av1_tx_get_scale(). Thus, 16x32, 32x16
-// and 32x32 transforms actually return Q2 coefficients, and 32x64,
-// 64x32 and 64x64 transforms return Q1 coefficients. However, the
-// quantizers are de-scaled down on-the-fly by the same amount
-// (av1_tx_get_scale()) during quantization, and as such the
-// dequantized/decoded coefficients, even for large TX blocks, are always
-// effectively Q3. Meanwhile, quantized/coded coefficients are Q0
-// because Qn quantizers are applied to Qn tx coefficients.
-
-// Note that encoder decision making (which uses the quantizer to
-// generate several bespoke lamdas for RDO and other heuristics)
-// expects quantizers to be larger for higher-bitdepth input. In
-// addition, the minimum allowable quantizer is 4; smaller values will
-// underflow to 0 in the actual quantization routines.
-
-int16_t av1_dc_quant_Q3(int qindex, int delta, aom_bit_depth_t bit_depth) {
- switch (bit_depth) {
- case AOM_BITS_8: return dc_qlookup_Q3[clamp(qindex + delta, 0, MAXQ)];
- case AOM_BITS_10: return dc_qlookup_10_Q3[clamp(qindex + delta, 0, MAXQ)];
- case AOM_BITS_12: return dc_qlookup_12_Q3[clamp(qindex + delta, 0, MAXQ)];
- default:
- assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
- return -1;
- }
-}
-
-int16_t av1_ac_quant_Q3(int qindex, int delta, aom_bit_depth_t bit_depth) {
- switch (bit_depth) {
- case AOM_BITS_8: return ac_qlookup_Q3[clamp(qindex + delta, 0, MAXQ)];
- case AOM_BITS_10: return ac_qlookup_10_Q3[clamp(qindex + delta, 0, MAXQ)];
- case AOM_BITS_12: return ac_qlookup_12_Q3[clamp(qindex + delta, 0, MAXQ)];
- default:
- assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
- return -1;
- }
-}
-
-// In AV1 TX, the coefficients are always scaled up a factor of 8 (3
-// bits), so QTX == Q3.
-
-int16_t av1_dc_quant_QTX(int qindex, int delta, aom_bit_depth_t bit_depth) {
- return av1_dc_quant_Q3(qindex, delta, bit_depth);
-}
-
-int16_t av1_ac_quant_QTX(int qindex, int delta, aom_bit_depth_t bit_depth) {
- return av1_ac_quant_Q3(qindex, delta, bit_depth);
-}
-
-int av1_get_qindex(const struct segmentation *seg, int segment_id,
- int base_qindex) {
- if (segfeature_active(seg, segment_id, SEG_LVL_ALT_Q)) {
- const int data = get_segdata(seg, segment_id, SEG_LVL_ALT_Q);
- const int seg_qindex = base_qindex + data;
- return clamp(seg_qindex, 0, MAXQ);
- } else {
- return base_qindex;
- }
-}
-
-const qm_val_t *av1_iqmatrix(AV1_COMMON *cm, int qmlevel, int plane,
- TX_SIZE tx_size) {
- return &cm->giqmatrix[qmlevel][plane][tx_size][0];
-}
-const qm_val_t *av1_qmatrix(AV1_COMMON *cm, int qmlevel, int plane,
- TX_SIZE tx_size) {
- return &cm->gqmatrix[qmlevel][plane][tx_size][0];
-}
-
-#define QM_TOTAL_SIZE 3344
-static const qm_val_t wt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE];
-static const qm_val_t iwt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE];
-
-void av1_qm_init(AV1_COMMON *cm) {
- const int num_planes = av1_num_planes(cm);
- int q, c, t;
- int current;
- for (q = 0; q < NUM_QM_LEVELS; ++q) {
- for (c = 0; c < num_planes; ++c) {
- current = 0;
- for (t = 0; t < TX_SIZES_ALL; ++t) {
- const int size = tx_size_2d[t];
- const int qm_tx_size = av1_get_adjusted_tx_size(t);
- if (q == NUM_QM_LEVELS - 1) {
- cm->gqmatrix[q][c][t] = NULL;
- cm->giqmatrix[q][c][t] = NULL;
- } else if (t != qm_tx_size) { // Reuse matrices for 'qm_tx_size'
- cm->gqmatrix[q][c][t] = cm->gqmatrix[q][c][qm_tx_size];
- cm->giqmatrix[q][c][t] = cm->giqmatrix[q][c][qm_tx_size];
- } else {
- assert(current + size <= QM_TOTAL_SIZE);
- cm->gqmatrix[q][c][t] = &wt_matrix_ref[q][c >= 1][current];
- cm->giqmatrix[q][c][t] = &iwt_matrix_ref[q][c >= 1][current];
- current += size;
- }
- }
- }
- }
-}
-
-/* Provide 16 sets of quantization matrices for chroma and luma
- and each TX size. Matrices for different TX sizes are in fact
- sub-sampled from the 32x32 and 16x16 sizes, but explicitly
- defined here for convenience. Intra and inter matrix sets are the
- same but changing DEFAULT_QM_INTER_OFFSET from zero allows
- for different matrices for inter and intra blocks in the same
- frame.
- Matrices for different QM levels have been rescaled in the
- frequency domain according to different nominal viewing
- distances.
- */
-static const qm_val_t iwt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
- {
- { /* Luma */
- /* Size 4x4 */
- 32, 43, 73, 97, 43, 67, 94, 110, 73, 94, 137, 150, 97, 110, 150, 200,
- /* Size 8x8 */
- 32, 32, 38, 51, 68, 84, 95, 109, 32, 35, 40, 49, 63, 76, 89, 102, 38,
- 40, 54, 65, 78, 91, 98, 106, 51, 49, 65, 82, 97, 111, 113, 121, 68, 63,
- 78, 97, 117, 134, 138, 142, 84, 76, 91, 111, 134, 152, 159, 168, 95, 89,
- 98, 113, 138, 159, 183, 199, 109, 102, 106, 121, 142, 168, 199, 220,
- /* Size 16x16 */
- 32, 31, 31, 34, 36, 44, 48, 59, 65, 80, 83, 91, 97, 104, 111, 119, 31,
- 32, 32, 33, 34, 41, 44, 54, 59, 72, 75, 83, 90, 97, 104, 112, 31, 32,
- 33, 35, 36, 42, 45, 54, 59, 71, 74, 81, 86, 93, 100, 107, 34, 33, 35,
- 39, 42, 47, 51, 58, 63, 74, 76, 81, 84, 90, 97, 105, 36, 34, 36, 42, 48,
- 54, 57, 64, 68, 79, 81, 88, 91, 96, 102, 105, 44, 41, 42, 47, 54, 63,
- 67, 75, 79, 90, 92, 95, 100, 102, 109, 112, 48, 44, 45, 51, 57, 67, 71,
- 80, 85, 96, 99, 107, 108, 111, 117, 120, 59, 54, 54, 58, 64, 75, 80, 92,
- 98, 110, 113, 115, 116, 122, 125, 130, 65, 59, 59, 63, 68, 79, 85, 98,
- 105, 118, 121, 127, 130, 134, 135, 140, 80, 72, 71, 74, 79, 90, 96, 110,
- 118, 134, 137, 140, 143, 144, 146, 152, 83, 75, 74, 76, 81, 92, 99, 113,
- 121, 137, 140, 151, 152, 155, 158, 165, 91, 83, 81, 81, 88, 95, 107,
- 115, 127, 140, 151, 159, 166, 169, 173, 179, 97, 90, 86, 84, 91, 100,
- 108, 116, 130, 143, 152, 166, 174, 182, 189, 193, 104, 97, 93, 90, 96,
- 102, 111, 122, 134, 144, 155, 169, 182, 191, 200, 210, 111, 104, 100,
- 97, 102, 109, 117, 125, 135, 146, 158, 173, 189, 200, 210, 220, 119,
- 112, 107, 105, 105, 112, 120, 130, 140, 152, 165, 179, 193, 210, 220,
- 231,
- /* Size 32x32 */
- 32, 31, 31, 31, 31, 32, 34, 35, 36, 39, 44, 46, 48, 54, 59, 62, 65, 71,
- 80, 81, 83, 88, 91, 94, 97, 101, 104, 107, 111, 115, 119, 123, 31, 32,
- 32, 32, 32, 32, 34, 34, 35, 38, 42, 44, 46, 51, 56, 59, 62, 68, 76, 77,
- 78, 84, 86, 89, 92, 95, 99, 102, 105, 109, 113, 116, 31, 32, 32, 32, 32,
- 32, 33, 34, 34, 37, 41, 42, 44, 49, 54, 56, 59, 65, 72, 73, 75, 80, 83,
- 86, 90, 93, 97, 101, 104, 108, 112, 116, 31, 32, 32, 32, 33, 33, 34, 35,
- 35, 38, 41, 43, 45, 49, 54, 56, 59, 64, 72, 73, 74, 79, 82, 85, 88, 91,
- 94, 97, 101, 104, 107, 111, 31, 32, 32, 33, 33, 34, 35, 36, 36, 39, 42,
- 44, 45, 50, 54, 56, 59, 64, 71, 72, 74, 78, 81, 84, 86, 89, 93, 96, 100,
- 104, 107, 111, 32, 32, 32, 33, 34, 35, 37, 37, 38, 40, 42, 44, 46, 49,
- 53, 55, 58, 63, 69, 70, 72, 76, 79, 82, 85, 89, 93, 96, 99, 102, 106,
- 109, 34, 34, 33, 34, 35, 37, 39, 41, 42, 45, 47, 49, 51, 54, 58, 60, 63,
- 68, 74, 75, 76, 80, 81, 82, 84, 87, 90, 93, 97, 101, 105, 110, 35, 34,
- 34, 35, 36, 37, 41, 43, 45, 47, 50, 52, 53, 57, 61, 63, 65, 70, 76, 77,
- 79, 82, 84, 86, 89, 91, 92, 93, 96, 100, 103, 107, 36, 35, 34, 35, 36,
- 38, 42, 45, 48, 50, 54, 55, 57, 60, 64, 66, 68, 73, 79, 80, 81, 85, 88,
- 90, 91, 93, 96, 99, 102, 103, 105, 107, 39, 38, 37, 38, 39, 40, 45, 47,
- 50, 54, 58, 59, 61, 65, 69, 71, 73, 78, 84, 85, 86, 91, 92, 92, 95, 98,
- 100, 101, 103, 106, 110, 114, 44, 42, 41, 41, 42, 42, 47, 50, 54, 58,
- 63, 65, 67, 71, 75, 77, 79, 84, 90, 91, 92, 95, 95, 97, 100, 101, 102,
- 105, 109, 111, 112, 114, 46, 44, 42, 43, 44, 44, 49, 52, 55, 59, 65, 67,
- 69, 74, 78, 80, 82, 87, 93, 94, 95, 98, 100, 103, 102, 105, 108, 110,
- 111, 113, 117, 121, 48, 46, 44, 45, 45, 46, 51, 53, 57, 61, 67, 69, 71,
- 76, 80, 83, 85, 90, 96, 97, 99, 103, 107, 105, 108, 111, 111, 113, 117,
- 119, 120, 122, 54, 51, 49, 49, 50, 49, 54, 57, 60, 65, 71, 74, 76, 82,
- 87, 89, 92, 97, 104, 105, 106, 111, 110, 111, 114, 113, 116, 120, 120,
- 121, 125, 130, 59, 56, 54, 54, 54, 53, 58, 61, 64, 69, 75, 78, 80, 87,
- 92, 95, 98, 103, 110, 111, 113, 115, 115, 119, 116, 120, 122, 122, 125,
- 129, 130, 130, 62, 59, 56, 56, 56, 55, 60, 63, 66, 71, 77, 80, 83, 89,
- 95, 98, 101, 107, 114, 115, 117, 119, 123, 121, 125, 126, 125, 129, 131,
- 131, 135, 140, 65, 62, 59, 59, 59, 58, 63, 65, 68, 73, 79, 82, 85, 92,
- 98, 101, 105, 111, 118, 119, 121, 126, 127, 128, 130, 130, 134, 133,
- 135, 140, 140, 140, 71, 68, 65, 64, 64, 63, 68, 70, 73, 78, 84, 87, 90,
- 97, 103, 107, 111, 117, 125, 126, 128, 134, 132, 136, 133, 138, 137,
- 140, 143, 142, 145, 150, 80, 76, 72, 72, 71, 69, 74, 76, 79, 84, 90, 93,
- 96, 104, 110, 114, 118, 125, 134, 135, 137, 139, 140, 139, 143, 142,
- 144, 146, 146, 151, 152, 151, 81, 77, 73, 73, 72, 70, 75, 77, 80, 85,
- 91, 94, 97, 105, 111, 115, 119, 126, 135, 137, 138, 144, 147, 146, 148,
- 149, 151, 150, 156, 155, 157, 163, 83, 78, 75, 74, 74, 72, 76, 79, 81,
- 86, 92, 95, 99, 106, 113, 117, 121, 128, 137, 138, 140, 147, 151, 156,
- 152, 157, 155, 161, 158, 162, 165, 164, 88, 84, 80, 79, 78, 76, 80, 82,
- 85, 91, 95, 98, 103, 111, 115, 119, 126, 134, 139, 144, 147, 152, 154,
- 158, 163, 159, 165, 163, 168, 168, 169, 176, 91, 86, 83, 82, 81, 79, 81,
- 84, 88, 92, 95, 100, 107, 110, 115, 123, 127, 132, 140, 147, 151, 154,
- 159, 161, 166, 171, 169, 173, 173, 176, 179, 177, 94, 89, 86, 85, 84,
- 82, 82, 86, 90, 92, 97, 103, 105, 111, 119, 121, 128, 136, 139, 146,
- 156, 158, 161, 166, 168, 174, 179, 178, 180, 183, 183, 190, 97, 92, 90,
- 88, 86, 85, 84, 89, 91, 95, 100, 102, 108, 114, 116, 125, 130, 133, 143,
- 148, 152, 163, 166, 168, 174, 176, 182, 187, 189, 188, 193, 191, 101,
- 95, 93, 91, 89, 89, 87, 91, 93, 98, 101, 105, 111, 113, 120, 126, 130,
- 138, 142, 149, 157, 159, 171, 174, 176, 183, 184, 191, 195, 199, 197,
- 204, 104, 99, 97, 94, 93, 93, 90, 92, 96, 100, 102, 108, 111, 116, 122,
- 125, 134, 137, 144, 151, 155, 165, 169, 179, 182, 184, 191, 193, 200,
- 204, 210, 206, 107, 102, 101, 97, 96, 96, 93, 93, 99, 101, 105, 110,
- 113, 120, 122, 129, 133, 140, 146, 150, 161, 163, 173, 178, 187, 191,
- 193, 200, 202, 210, 214, 222, 111, 105, 104, 101, 100, 99, 97, 96, 102,
- 103, 109, 111, 117, 120, 125, 131, 135, 143, 146, 156, 158, 168, 173,
- 180, 189, 195, 200, 202, 210, 212, 220, 224, 115, 109, 108, 104, 104,
- 102, 101, 100, 103, 106, 111, 113, 119, 121, 129, 131, 140, 142, 151,
- 155, 162, 168, 176, 183, 188, 199, 204, 210, 212, 220, 222, 230, 119,
- 113, 112, 107, 107, 106, 105, 103, 105, 110, 112, 117, 120, 125, 130,
- 135, 140, 145, 152, 157, 165, 169, 179, 183, 193, 197, 210, 214, 220,
- 222, 231, 232, 123, 116, 116, 111, 111, 109, 110, 107, 107, 114, 114,
- 121, 122, 130, 130, 140, 140, 150, 151, 163, 164, 176, 177, 190, 191,
- 204, 206, 222, 224, 230, 232, 242,
- /* Size 4x8 */
- 32, 42, 75, 91, 33, 42, 69, 86, 37, 58, 84, 91, 49, 71, 103, 110, 65,
- 84, 125, 128, 80, 97, 142, 152, 91, 100, 145, 178, 104, 112, 146, 190,
- /* Size 8x4 */
- 32, 33, 37, 49, 65, 80, 91, 104, 42, 42, 58, 71, 84, 97, 100, 112, 75,
- 69, 84, 103, 125, 142, 145, 146, 91, 86, 91, 110, 128, 152, 178, 190,
- /* Size 8x16 */
- 32, 32, 36, 53, 65, 87, 93, 99, 31, 33, 34, 49, 59, 78, 86, 93, 32, 34,
- 36, 50, 59, 77, 82, 89, 34, 37, 42, 54, 63, 79, 80, 88, 36, 38, 48, 60,
- 68, 84, 86, 90, 44, 43, 53, 71, 79, 95, 94, 97, 48, 46, 56, 76, 85, 102,
- 105, 105, 58, 54, 63, 87, 98, 116, 112, 115, 65, 58, 68, 92, 105, 124,
- 122, 124, 79, 70, 79, 104, 118, 141, 135, 135, 82, 72, 81, 106, 121,
- 144, 149, 146, 91, 80, 88, 106, 130, 148, 162, 159, 97, 86, 94, 107,
- 128, 157, 167, 171, 103, 93, 98, 114, 131, 150, 174, 186, 110, 100, 101,
- 117, 138, 161, 183, 193, 118, 107, 105, 118, 136, 157, 182, 203,
- /* Size 16x8 */
- 32, 31, 32, 34, 36, 44, 48, 58, 65, 79, 82, 91, 97, 103, 110, 118, 32,
- 33, 34, 37, 38, 43, 46, 54, 58, 70, 72, 80, 86, 93, 100, 107, 36, 34,
- 36, 42, 48, 53, 56, 63, 68, 79, 81, 88, 94, 98, 101, 105, 53, 49, 50,
- 54, 60, 71, 76, 87, 92, 104, 106, 106, 107, 114, 117, 118, 65, 59, 59,
- 63, 68, 79, 85, 98, 105, 118, 121, 130, 128, 131, 138, 136, 87, 78, 77,
- 79, 84, 95, 102, 116, 124, 141, 144, 148, 157, 150, 161, 157, 93, 86,
- 82, 80, 86, 94, 105, 112, 122, 135, 149, 162, 167, 174, 183, 182, 99,
- 93, 89, 88, 90, 97, 105, 115, 124, 135, 146, 159, 171, 186, 193, 203,
- /* Size 16x32 */
- 32, 31, 32, 34, 36, 44, 53, 59, 65, 79, 87, 90, 93, 96, 99, 102, 31, 32,
- 32, 34, 35, 42, 51, 56, 62, 75, 82, 85, 88, 91, 94, 97, 31, 32, 33, 33,
- 34, 41, 49, 54, 59, 72, 78, 82, 86, 90, 93, 97, 31, 32, 33, 34, 35, 41,
- 49, 54, 59, 71, 78, 81, 84, 87, 90, 93, 32, 32, 34, 35, 36, 42, 50, 54,
- 59, 71, 77, 80, 82, 86, 89, 93, 32, 33, 35, 37, 38, 42, 49, 53, 58, 69,
- 75, 78, 82, 86, 89, 92, 34, 34, 37, 39, 42, 48, 54, 58, 63, 73, 79, 78,
- 80, 83, 88, 92, 35, 34, 37, 41, 45, 50, 57, 61, 65, 76, 82, 83, 84, 84,
- 87, 90, 36, 34, 38, 43, 48, 54, 60, 64, 68, 78, 84, 87, 86, 89, 90, 90,
- 39, 37, 40, 45, 50, 58, 65, 69, 73, 84, 89, 89, 91, 91, 93, 96, 44, 41,
- 43, 48, 53, 63, 71, 75, 79, 90, 95, 93, 94, 95, 97, 97, 46, 43, 44, 49,
- 55, 65, 73, 78, 82, 93, 98, 100, 98, 100, 99, 103, 48, 45, 46, 51, 56,
- 67, 76, 80, 85, 96, 102, 102, 105, 102, 105, 104, 53, 49, 50, 54, 60,
- 71, 82, 87, 92, 103, 109, 107, 107, 110, 107, 111, 58, 54, 54, 58, 63,
- 75, 87, 92, 98, 110, 116, 115, 112, 111, 115, 112, 61, 57, 56, 60, 66,
- 77, 89, 95, 101, 114, 120, 118, 119, 118, 116, 120, 65, 60, 58, 63, 68,
- 79, 92, 98, 105, 118, 124, 123, 122, 123, 124, 121, 71, 65, 63, 68, 73,
- 84, 97, 103, 111, 125, 132, 132, 130, 128, 127, 130, 79, 72, 70, 74, 79,
- 90, 104, 110, 118, 133, 141, 136, 135, 135, 135, 131, 81, 74, 71, 75,
- 80, 91, 105, 112, 119, 135, 142, 140, 140, 138, 139, 142, 82, 75, 72,
- 76, 81, 92, 106, 113, 121, 136, 144, 151, 149, 149, 146, 143, 88, 80,
- 77, 80, 85, 97, 108, 115, 126, 142, 149, 153, 153, 152, 152, 154, 91,
- 83, 80, 81, 88, 100, 106, 114, 130, 142, 148, 155, 162, 160, 159, 155,
- 94, 85, 83, 82, 91, 100, 105, 118, 131, 137, 153, 160, 165, 167, 166,
- 168, 97, 88, 86, 85, 94, 100, 107, 123, 128, 140, 157, 161, 167, 173,
- 171, 169, 100, 91, 89, 87, 97, 100, 111, 121, 127, 145, 152, 164, 173,
- 178, 182, 181, 103, 94, 93, 90, 98, 101, 114, 120, 131, 144, 150, 170,
- 174, 180, 186, 183, 107, 97, 96, 93, 100, 104, 117, 119, 136, 142, 155,
- 168, 177, 187, 191, 198, 110, 101, 100, 97, 101, 108, 117, 123, 138,
- 141, 161, 165, 183, 188, 193, 200, 114, 104, 104, 100, 103, 112, 117,
- 127, 137, 146, 159, 167, 185, 190, 201, 206, 118, 108, 107, 103, 105,
- 115, 118, 131, 136, 151, 157, 172, 182, 197, 203, 208, 122, 111, 111,
- 107, 107, 119, 119, 136, 136, 156, 156, 178, 179, 203, 204, 217,
- /* Size 32x16 */
- 32, 31, 31, 31, 32, 32, 34, 35, 36, 39, 44, 46, 48, 53, 58, 61, 65, 71,
- 79, 81, 82, 88, 91, 94, 97, 100, 103, 107, 110, 114, 118, 122, 31, 32,
- 32, 32, 32, 33, 34, 34, 34, 37, 41, 43, 45, 49, 54, 57, 60, 65, 72, 74,
- 75, 80, 83, 85, 88, 91, 94, 97, 101, 104, 108, 111, 32, 32, 33, 33, 34,
- 35, 37, 37, 38, 40, 43, 44, 46, 50, 54, 56, 58, 63, 70, 71, 72, 77, 80,
- 83, 86, 89, 93, 96, 100, 104, 107, 111, 34, 34, 33, 34, 35, 37, 39, 41,
- 43, 45, 48, 49, 51, 54, 58, 60, 63, 68, 74, 75, 76, 80, 81, 82, 85, 87,
- 90, 93, 97, 100, 103, 107, 36, 35, 34, 35, 36, 38, 42, 45, 48, 50, 53,
- 55, 56, 60, 63, 66, 68, 73, 79, 80, 81, 85, 88, 91, 94, 97, 98, 100,
- 101, 103, 105, 107, 44, 42, 41, 41, 42, 42, 48, 50, 54, 58, 63, 65, 67,
- 71, 75, 77, 79, 84, 90, 91, 92, 97, 100, 100, 100, 100, 101, 104, 108,
- 112, 115, 119, 53, 51, 49, 49, 50, 49, 54, 57, 60, 65, 71, 73, 76, 82,
- 87, 89, 92, 97, 104, 105, 106, 108, 106, 105, 107, 111, 114, 117, 117,
- 117, 118, 119, 59, 56, 54, 54, 54, 53, 58, 61, 64, 69, 75, 78, 80, 87,
- 92, 95, 98, 103, 110, 112, 113, 115, 114, 118, 123, 121, 120, 119, 123,
- 127, 131, 136, 65, 62, 59, 59, 59, 58, 63, 65, 68, 73, 79, 82, 85, 92,
- 98, 101, 105, 111, 118, 119, 121, 126, 130, 131, 128, 127, 131, 136,
- 138, 137, 136, 136, 79, 75, 72, 71, 71, 69, 73, 76, 78, 84, 90, 93, 96,
- 103, 110, 114, 118, 125, 133, 135, 136, 142, 142, 137, 140, 145, 144,
- 142, 141, 146, 151, 156, 87, 82, 78, 78, 77, 75, 79, 82, 84, 89, 95, 98,
- 102, 109, 116, 120, 124, 132, 141, 142, 144, 149, 148, 153, 157, 152,
- 150, 155, 161, 159, 157, 156, 90, 85, 82, 81, 80, 78, 78, 83, 87, 89,
- 93, 100, 102, 107, 115, 118, 123, 132, 136, 140, 151, 153, 155, 160,
- 161, 164, 170, 168, 165, 167, 172, 178, 93, 88, 86, 84, 82, 82, 80, 84,
- 86, 91, 94, 98, 105, 107, 112, 119, 122, 130, 135, 140, 149, 153, 162,
- 165, 167, 173, 174, 177, 183, 185, 182, 179, 96, 91, 90, 87, 86, 86, 83,
- 84, 89, 91, 95, 100, 102, 110, 111, 118, 123, 128, 135, 138, 149, 152,
- 160, 167, 173, 178, 180, 187, 188, 190, 197, 203, 99, 94, 93, 90, 89,
- 89, 88, 87, 90, 93, 97, 99, 105, 107, 115, 116, 124, 127, 135, 139, 146,
- 152, 159, 166, 171, 182, 186, 191, 193, 201, 203, 204, 102, 97, 97, 93,
- 93, 92, 92, 90, 90, 96, 97, 103, 104, 111, 112, 120, 121, 130, 131, 142,
- 143, 154, 155, 168, 169, 181, 183, 198, 200, 206, 208, 217,
- /* Size 4x16 */
- 31, 44, 79, 96, 32, 41, 72, 90, 32, 42, 71, 86, 34, 48, 73, 83, 34, 54,
- 78, 89, 41, 63, 90, 95, 45, 67, 96, 102, 54, 75, 110, 111, 60, 79, 118,
- 123, 72, 90, 133, 135, 75, 92, 136, 149, 83, 100, 142, 160, 88, 100,
- 140, 173, 94, 101, 144, 180, 101, 108, 141, 188, 108, 115, 151, 197,
- /* Size 16x4 */
- 31, 32, 32, 34, 34, 41, 45, 54, 60, 72, 75, 83, 88, 94, 101, 108, 44,
- 41, 42, 48, 54, 63, 67, 75, 79, 90, 92, 100, 100, 101, 108, 115, 79, 72,
- 71, 73, 78, 90, 96, 110, 118, 133, 136, 142, 140, 144, 141, 151, 96, 90,
- 86, 83, 89, 95, 102, 111, 123, 135, 149, 160, 173, 180, 188, 197,
- /* Size 8x32 */
- 32, 32, 36, 53, 65, 87, 93, 99, 31, 32, 35, 51, 62, 82, 88, 94, 31, 33,
- 34, 49, 59, 78, 86, 93, 31, 33, 35, 49, 59, 78, 84, 90, 32, 34, 36, 50,
- 59, 77, 82, 89, 32, 35, 38, 49, 58, 75, 82, 89, 34, 37, 42, 54, 63, 79,
- 80, 88, 35, 37, 45, 57, 65, 82, 84, 87, 36, 38, 48, 60, 68, 84, 86, 90,
- 39, 40, 50, 65, 73, 89, 91, 93, 44, 43, 53, 71, 79, 95, 94, 97, 46, 44,
- 55, 73, 82, 98, 98, 99, 48, 46, 56, 76, 85, 102, 105, 105, 53, 50, 60,
- 82, 92, 109, 107, 107, 58, 54, 63, 87, 98, 116, 112, 115, 61, 56, 66,
- 89, 101, 120, 119, 116, 65, 58, 68, 92, 105, 124, 122, 124, 71, 63, 73,
- 97, 111, 132, 130, 127, 79, 70, 79, 104, 118, 141, 135, 135, 81, 71, 80,
- 105, 119, 142, 140, 139, 82, 72, 81, 106, 121, 144, 149, 146, 88, 77,
- 85, 108, 126, 149, 153, 152, 91, 80, 88, 106, 130, 148, 162, 159, 94,
- 83, 91, 105, 131, 153, 165, 166, 97, 86, 94, 107, 128, 157, 167, 171,
- 100, 89, 97, 111, 127, 152, 173, 182, 103, 93, 98, 114, 131, 150, 174,
- 186, 107, 96, 100, 117, 136, 155, 177, 191, 110, 100, 101, 117, 138,
- 161, 183, 193, 114, 104, 103, 117, 137, 159, 185, 201, 118, 107, 105,
- 118, 136, 157, 182, 203, 122, 111, 107, 119, 136, 156, 179, 204,
- /* Size 32x8 */
- 32, 31, 31, 31, 32, 32, 34, 35, 36, 39, 44, 46, 48, 53, 58, 61, 65, 71,
- 79, 81, 82, 88, 91, 94, 97, 100, 103, 107, 110, 114, 118, 122, 32, 32,
- 33, 33, 34, 35, 37, 37, 38, 40, 43, 44, 46, 50, 54, 56, 58, 63, 70, 71,
- 72, 77, 80, 83, 86, 89, 93, 96, 100, 104, 107, 111, 36, 35, 34, 35, 36,
- 38, 42, 45, 48, 50, 53, 55, 56, 60, 63, 66, 68, 73, 79, 80, 81, 85, 88,
- 91, 94, 97, 98, 100, 101, 103, 105, 107, 53, 51, 49, 49, 50, 49, 54, 57,
- 60, 65, 71, 73, 76, 82, 87, 89, 92, 97, 104, 105, 106, 108, 106, 105,
- 107, 111, 114, 117, 117, 117, 118, 119, 65, 62, 59, 59, 59, 58, 63, 65,
- 68, 73, 79, 82, 85, 92, 98, 101, 105, 111, 118, 119, 121, 126, 130, 131,
- 128, 127, 131, 136, 138, 137, 136, 136, 87, 82, 78, 78, 77, 75, 79, 82,
- 84, 89, 95, 98, 102, 109, 116, 120, 124, 132, 141, 142, 144, 149, 148,
- 153, 157, 152, 150, 155, 161, 159, 157, 156, 93, 88, 86, 84, 82, 82, 80,
- 84, 86, 91, 94, 98, 105, 107, 112, 119, 122, 130, 135, 140, 149, 153,
- 162, 165, 167, 173, 174, 177, 183, 185, 182, 179, 99, 94, 93, 90, 89,
- 89, 88, 87, 90, 93, 97, 99, 105, 107, 115, 116, 124, 127, 135, 139, 146,
- 152, 159, 166, 171, 182, 186, 191, 193, 201, 203, 204 },
- { /* Chroma */
- /* Size 4x4 */
- 35, 46, 57, 66, 46, 60, 69, 71, 57, 69, 90, 90, 66, 71, 90, 109,
- /* Size 8x8 */
- 31, 38, 47, 50, 57, 63, 67, 71, 38, 47, 46, 47, 52, 57, 62, 67, 47, 46,
- 54, 57, 61, 66, 67, 68, 50, 47, 57, 66, 72, 77, 75, 75, 57, 52, 61, 72,
- 82, 88, 86, 84, 63, 57, 66, 77, 88, 96, 95, 95, 67, 62, 67, 75, 86, 95,
- 104, 107, 71, 67, 68, 75, 84, 95, 107, 113,
- /* Size 16x16 */
- 32, 30, 33, 41, 49, 49, 50, 54, 57, 63, 65, 68, 70, 72, 74, 76, 30, 32,
- 35, 42, 46, 45, 46, 49, 52, 57, 58, 62, 64, 67, 70, 72, 33, 35, 39, 45,
- 47, 45, 46, 49, 51, 56, 57, 60, 62, 64, 66, 69, 41, 42, 45, 48, 50, 49,
- 50, 52, 53, 57, 58, 59, 60, 61, 64, 67, 49, 46, 47, 50, 53, 53, 54, 55,
- 56, 60, 61, 64, 64, 65, 66, 66, 49, 45, 45, 49, 53, 58, 60, 62, 63, 67,
- 68, 67, 69, 68, 70, 70, 50, 46, 46, 50, 54, 60, 61, 65, 67, 71, 71, 74,
- 73, 73, 74, 74, 54, 49, 49, 52, 55, 62, 65, 71, 73, 78, 79, 78, 77, 78,
- 78, 78, 57, 52, 51, 53, 56, 63, 67, 73, 76, 82, 83, 84, 84, 84, 82, 83,
- 63, 57, 56, 57, 60, 67, 71, 78, 82, 89, 90, 90, 89, 88, 87, 88, 65, 58,
- 57, 58, 61, 68, 71, 79, 83, 90, 91, 94, 93, 93, 92, 93, 68, 62, 60, 59,
- 64, 67, 74, 78, 84, 90, 94, 98, 99, 98, 98, 98, 70, 64, 62, 60, 64, 69,
- 73, 77, 84, 89, 93, 99, 102, 103, 104, 104, 72, 67, 64, 61, 65, 68, 73,
- 78, 84, 88, 93, 98, 103, 106, 108, 109, 74, 70, 66, 64, 66, 70, 74, 78,
- 82, 87, 92, 98, 104, 108, 111, 112, 76, 72, 69, 67, 66, 70, 74, 78, 83,
- 88, 93, 98, 104, 109, 112, 116,
- /* Size 32x32 */
- 32, 31, 30, 32, 33, 36, 41, 45, 49, 48, 49, 50, 50, 52, 54, 56, 57, 60,
- 63, 64, 65, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 78, 31, 31, 31, 33,
- 34, 38, 42, 45, 47, 47, 47, 47, 48, 50, 52, 53, 54, 57, 60, 61, 61, 63,
- 64, 65, 66, 67, 68, 69, 70, 71, 72, 74, 30, 31, 32, 33, 35, 40, 42, 44,
- 46, 45, 45, 45, 46, 47, 49, 51, 52, 54, 57, 58, 58, 61, 62, 63, 64, 66,
- 67, 68, 70, 71, 72, 74, 32, 33, 33, 35, 37, 41, 43, 45, 47, 46, 45, 46,
- 46, 47, 49, 50, 51, 54, 57, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68,
- 69, 70, 33, 34, 35, 37, 39, 43, 45, 46, 47, 46, 45, 46, 46, 47, 49, 50,
- 51, 53, 56, 57, 57, 59, 60, 61, 62, 63, 64, 65, 66, 68, 69, 70, 36, 38,
- 40, 41, 43, 47, 47, 47, 48, 46, 45, 46, 46, 47, 48, 49, 50, 52, 54, 55,
- 55, 57, 58, 59, 61, 62, 64, 65, 66, 67, 68, 69, 41, 42, 42, 43, 45, 47,
- 48, 49, 50, 49, 49, 49, 50, 50, 52, 52, 53, 55, 57, 58, 58, 60, 59, 59,
- 60, 61, 61, 63, 64, 66, 67, 69, 45, 45, 44, 45, 46, 47, 49, 50, 51, 51,
- 51, 51, 52, 52, 53, 54, 55, 57, 59, 59, 60, 61, 61, 62, 63, 63, 63, 63,
- 63, 64, 65, 66, 49, 47, 46, 47, 47, 48, 50, 51, 53, 53, 53, 54, 54, 54,
- 55, 56, 56, 58, 60, 61, 61, 63, 64, 64, 64, 64, 65, 66, 66, 66, 66, 66,
- 48, 47, 45, 46, 46, 46, 49, 51, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61,
- 63, 64, 64, 66, 66, 65, 66, 67, 67, 67, 67, 68, 69, 70, 49, 47, 45, 45,
- 45, 45, 49, 51, 53, 55, 58, 59, 60, 61, 62, 63, 63, 65, 67, 67, 68, 69,
- 67, 68, 69, 68, 68, 69, 70, 70, 70, 70, 50, 47, 45, 46, 46, 46, 49, 51,
- 54, 56, 59, 60, 60, 62, 64, 64, 65, 67, 69, 69, 70, 70, 71, 71, 70, 70,
- 71, 71, 71, 71, 72, 74, 50, 48, 46, 46, 46, 46, 50, 52, 54, 56, 60, 60,
- 61, 63, 65, 66, 67, 68, 71, 71, 71, 73, 74, 72, 73, 74, 73, 73, 74, 74,
- 74, 74, 52, 50, 47, 47, 47, 47, 50, 52, 54, 57, 61, 62, 63, 66, 68, 69,
- 70, 72, 75, 75, 75, 77, 75, 75, 76, 75, 75, 76, 75, 75, 76, 77, 54, 52,
- 49, 49, 49, 48, 52, 53, 55, 58, 62, 64, 65, 68, 71, 72, 73, 75, 78, 78,
- 79, 79, 78, 79, 77, 78, 78, 77, 78, 79, 78, 78, 56, 53, 51, 50, 50, 49,
- 52, 54, 56, 59, 63, 64, 66, 69, 72, 73, 75, 77, 80, 80, 81, 81, 82, 80,
- 81, 81, 79, 81, 80, 79, 81, 82, 57, 54, 52, 51, 51, 50, 53, 55, 56, 60,
- 63, 65, 67, 70, 73, 75, 76, 79, 82, 82, 83, 85, 84, 83, 84, 83, 84, 82,
- 82, 84, 83, 82, 60, 57, 54, 54, 53, 52, 55, 57, 58, 61, 65, 67, 68, 72,
- 75, 77, 79, 82, 85, 85, 86, 88, 86, 87, 85, 86, 85, 85, 86, 84, 85, 86,
- 63, 60, 57, 57, 56, 54, 57, 59, 60, 63, 67, 69, 71, 75, 78, 80, 82, 85,
- 89, 89, 90, 90, 90, 89, 89, 88, 88, 88, 87, 88, 88, 87, 64, 61, 58, 57,
- 57, 55, 58, 59, 61, 64, 67, 69, 71, 75, 78, 80, 82, 85, 89, 90, 91, 92,
- 93, 92, 92, 91, 91, 90, 91, 90, 90, 92, 65, 61, 58, 58, 57, 55, 58, 60,
- 61, 64, 68, 70, 71, 75, 79, 81, 83, 86, 90, 91, 91, 94, 94, 96, 93, 94,
- 93, 94, 92, 93, 93, 92, 67, 63, 61, 60, 59, 57, 60, 61, 63, 66, 69, 70,
- 73, 77, 79, 81, 85, 88, 90, 92, 94, 96, 96, 97, 98, 95, 97, 95, 96, 95,
- 95, 96, 68, 64, 62, 61, 60, 58, 59, 61, 64, 66, 67, 71, 74, 75, 78, 82,
- 84, 86, 90, 93, 94, 96, 98, 98, 99, 100, 98, 99, 98, 98, 98, 97, 69, 65,
- 63, 62, 61, 59, 59, 62, 64, 65, 68, 71, 72, 75, 79, 80, 83, 87, 89, 92,
- 96, 97, 98, 100, 100, 101, 102, 101, 101, 101, 100, 102, 70, 66, 64, 63,
- 62, 61, 60, 63, 64, 66, 69, 70, 73, 76, 77, 81, 84, 85, 89, 92, 93, 98,
- 99, 100, 102, 102, 103, 104, 104, 103, 104, 102, 71, 67, 66, 64, 63, 62,
- 61, 63, 64, 67, 68, 70, 74, 75, 78, 81, 83, 86, 88, 91, 94, 95, 100,
- 101, 102, 104, 104, 105, 106, 107, 105, 107, 72, 68, 67, 65, 64, 64, 61,
- 63, 65, 67, 68, 71, 73, 75, 78, 79, 84, 85, 88, 91, 93, 97, 98, 102,
- 103, 104, 106, 106, 108, 108, 109, 107, 73, 69, 68, 66, 65, 65, 63, 63,
- 66, 67, 69, 71, 73, 76, 77, 81, 82, 85, 88, 90, 94, 95, 99, 101, 104,
- 105, 106, 109, 108, 110, 111, 112, 74, 70, 70, 67, 66, 66, 64, 63, 66,
- 67, 70, 71, 74, 75, 78, 80, 82, 86, 87, 91, 92, 96, 98, 101, 104, 106,
- 108, 108, 111, 111, 112, 113, 75, 71, 71, 68, 68, 67, 66, 64, 66, 68,
- 70, 71, 74, 75, 79, 79, 84, 84, 88, 90, 93, 95, 98, 101, 103, 107, 108,
- 110, 111, 113, 113, 115, 76, 72, 72, 69, 69, 68, 67, 65, 66, 69, 70, 72,
- 74, 76, 78, 81, 83, 85, 88, 90, 93, 95, 98, 100, 104, 105, 109, 111,
- 112, 113, 116, 115, 78, 74, 74, 70, 70, 69, 69, 66, 66, 70, 70, 74, 74,
- 77, 78, 82, 82, 86, 87, 92, 92, 96, 97, 102, 102, 107, 107, 112, 113,
- 115, 115, 118,
- /* Size 4x8 */
- 31, 47, 60, 66, 40, 45, 54, 61, 46, 56, 64, 64, 48, 61, 75, 73, 54, 65,
- 85, 82, 61, 69, 92, 92, 64, 68, 90, 102, 68, 71, 87, 105,
- /* Size 8x4 */
- 31, 40, 46, 48, 54, 61, 64, 68, 47, 45, 56, 61, 65, 69, 68, 71, 60, 54,
- 64, 75, 85, 92, 90, 87, 66, 61, 64, 73, 82, 92, 102, 105,
- /* Size 8x16 */
- 32, 37, 48, 52, 57, 66, 68, 71, 30, 40, 46, 48, 52, 60, 63, 66, 33, 43,
- 47, 47, 51, 59, 60, 63, 42, 47, 50, 50, 53, 60, 59, 62, 49, 48, 53, 54,
- 57, 62, 62, 62, 49, 46, 53, 61, 64, 69, 66, 66, 50, 46, 54, 64, 67, 73,
- 72, 70, 54, 49, 55, 68, 73, 80, 76, 75, 57, 50, 56, 70, 76, 84, 80, 79,
- 63, 55, 60, 75, 82, 92, 87, 84, 64, 56, 61, 75, 83, 93, 93, 89, 68, 59,
- 64, 74, 86, 94, 98, 94, 70, 62, 66, 73, 83, 96, 99, 98, 72, 64, 66, 75,
- 83, 92, 101, 104, 74, 67, 66, 74, 84, 94, 103, 106, 76, 69, 67, 73, 82,
- 91, 101, 109,
- /* Size 16x8 */
- 32, 30, 33, 42, 49, 49, 50, 54, 57, 63, 64, 68, 70, 72, 74, 76, 37, 40,
- 43, 47, 48, 46, 46, 49, 50, 55, 56, 59, 62, 64, 67, 69, 48, 46, 47, 50,
- 53, 53, 54, 55, 56, 60, 61, 64, 66, 66, 66, 67, 52, 48, 47, 50, 54, 61,
- 64, 68, 70, 75, 75, 74, 73, 75, 74, 73, 57, 52, 51, 53, 57, 64, 67, 73,
- 76, 82, 83, 86, 83, 83, 84, 82, 66, 60, 59, 60, 62, 69, 73, 80, 84, 92,
- 93, 94, 96, 92, 94, 91, 68, 63, 60, 59, 62, 66, 72, 76, 80, 87, 93, 98,
- 99, 101, 103, 101, 71, 66, 63, 62, 62, 66, 70, 75, 79, 84, 89, 94, 98,
- 104, 106, 109,
- /* Size 16x32 */
- 32, 31, 37, 42, 48, 49, 52, 54, 57, 63, 66, 67, 68, 69, 71, 72, 31, 31,
- 38, 42, 47, 47, 50, 52, 54, 60, 63, 64, 65, 66, 67, 68, 30, 32, 40, 42,
- 46, 45, 48, 50, 52, 57, 60, 62, 63, 65, 66, 68, 32, 34, 41, 44, 46, 45,
- 48, 49, 51, 57, 59, 61, 62, 63, 64, 65, 33, 36, 43, 45, 47, 46, 47, 49,
- 51, 56, 59, 60, 60, 62, 63, 65, 37, 40, 47, 47, 47, 45, 47, 48, 50, 54,
- 57, 58, 60, 61, 62, 63, 42, 43, 47, 48, 50, 49, 50, 52, 53, 57, 60, 58,
- 59, 60, 62, 63, 45, 44, 47, 49, 51, 51, 52, 54, 55, 59, 61, 61, 61, 60,
- 61, 61, 49, 46, 48, 50, 53, 53, 54, 55, 57, 60, 62, 63, 62, 63, 62, 62,
- 48, 46, 47, 50, 53, 56, 57, 59, 60, 64, 66, 65, 65, 64, 64, 65, 49, 45,
- 46, 49, 53, 58, 61, 62, 64, 67, 69, 67, 66, 66, 66, 65, 49, 46, 46, 49,
- 53, 59, 62, 64, 65, 69, 71, 70, 68, 68, 67, 68, 50, 46, 46, 50, 54, 59,
- 64, 65, 67, 71, 73, 72, 72, 70, 70, 69, 52, 48, 47, 50, 54, 61, 66, 68,
- 71, 75, 77, 74, 73, 73, 71, 72, 54, 50, 49, 52, 55, 62, 68, 71, 73, 78,
- 80, 78, 76, 74, 75, 73, 55, 51, 49, 52, 56, 63, 69, 72, 75, 80, 82, 80,
- 79, 78, 76, 77, 57, 52, 50, 53, 56, 64, 70, 73, 76, 82, 84, 82, 80, 80,
- 79, 77, 60, 54, 52, 55, 58, 65, 72, 75, 79, 85, 88, 86, 84, 82, 81, 81,
- 63, 57, 55, 58, 60, 67, 75, 78, 82, 89, 92, 88, 87, 85, 84, 81, 64, 58,
- 55, 58, 61, 68, 75, 78, 82, 89, 92, 90, 89, 87, 86, 86, 64, 59, 56, 58,
- 61, 68, 75, 79, 83, 90, 93, 95, 93, 91, 89, 87, 67, 61, 58, 60, 63, 69,
- 76, 79, 85, 92, 95, 96, 94, 92, 91, 91, 68, 62, 59, 60, 64, 71, 74, 78,
- 86, 91, 94, 96, 98, 96, 94, 91, 69, 62, 60, 60, 65, 70, 72, 79, 85, 88,
- 95, 98, 99, 98, 97, 96, 70, 63, 62, 60, 66, 69, 73, 81, 83, 89, 96, 97,
- 99, 101, 98, 97, 71, 64, 63, 61, 67, 68, 74, 79, 82, 90, 93, 98, 102,
- 102, 102, 101, 72, 65, 64, 62, 66, 68, 75, 78, 83, 89, 92, 100, 101,
- 103, 104, 102, 73, 66, 65, 63, 66, 69, 75, 76, 84, 87, 93, 98, 102, 105,
- 106, 107, 74, 67, 67, 64, 66, 70, 74, 77, 84, 86, 94, 96, 103, 105, 106,
- 107, 75, 68, 68, 65, 66, 71, 74, 78, 83, 87, 93, 96, 103, 105, 109, 109,
- 76, 69, 69, 66, 67, 72, 73, 80, 82, 88, 91, 97, 101, 107, 109, 110, 77,
- 70, 70, 67, 67, 73, 73, 81, 81, 90, 90, 99, 99, 108, 108, 113,
- /* Size 32x16 */
- 32, 31, 30, 32, 33, 37, 42, 45, 49, 48, 49, 49, 50, 52, 54, 55, 57, 60,
- 63, 64, 64, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 31, 31, 32, 34,
- 36, 40, 43, 44, 46, 46, 45, 46, 46, 48, 50, 51, 52, 54, 57, 58, 59, 61,
- 62, 62, 63, 64, 65, 66, 67, 68, 69, 70, 37, 38, 40, 41, 43, 47, 47, 47,
- 48, 47, 46, 46, 46, 47, 49, 49, 50, 52, 55, 55, 56, 58, 59, 60, 62, 63,
- 64, 65, 67, 68, 69, 70, 42, 42, 42, 44, 45, 47, 48, 49, 50, 50, 49, 49,
- 50, 50, 52, 52, 53, 55, 58, 58, 58, 60, 60, 60, 60, 61, 62, 63, 64, 65,
- 66, 67, 48, 47, 46, 46, 47, 47, 50, 51, 53, 53, 53, 53, 54, 54, 55, 56,
- 56, 58, 60, 61, 61, 63, 64, 65, 66, 67, 66, 66, 66, 66, 67, 67, 49, 47,
- 45, 45, 46, 45, 49, 51, 53, 56, 58, 59, 59, 61, 62, 63, 64, 65, 67, 68,
- 68, 69, 71, 70, 69, 68, 68, 69, 70, 71, 72, 73, 52, 50, 48, 48, 47, 47,
- 50, 52, 54, 57, 61, 62, 64, 66, 68, 69, 70, 72, 75, 75, 75, 76, 74, 72,
- 73, 74, 75, 75, 74, 74, 73, 73, 54, 52, 50, 49, 49, 48, 52, 54, 55, 59,
- 62, 64, 65, 68, 71, 72, 73, 75, 78, 78, 79, 79, 78, 79, 81, 79, 78, 76,
- 77, 78, 80, 81, 57, 54, 52, 51, 51, 50, 53, 55, 57, 60, 64, 65, 67, 71,
- 73, 75, 76, 79, 82, 82, 83, 85, 86, 85, 83, 82, 83, 84, 84, 83, 82, 81,
- 63, 60, 57, 57, 56, 54, 57, 59, 60, 64, 67, 69, 71, 75, 78, 80, 82, 85,
- 89, 89, 90, 92, 91, 88, 89, 90, 89, 87, 86, 87, 88, 90, 66, 63, 60, 59,
- 59, 57, 60, 61, 62, 66, 69, 71, 73, 77, 80, 82, 84, 88, 92, 92, 93, 95,
- 94, 95, 96, 93, 92, 93, 94, 93, 91, 90, 67, 64, 62, 61, 60, 58, 58, 61,
- 63, 65, 67, 70, 72, 74, 78, 80, 82, 86, 88, 90, 95, 96, 96, 98, 97, 98,
- 100, 98, 96, 96, 97, 99, 68, 65, 63, 62, 60, 60, 59, 61, 62, 65, 66, 68,
- 72, 73, 76, 79, 80, 84, 87, 89, 93, 94, 98, 99, 99, 102, 101, 102, 103,
- 103, 101, 99, 69, 66, 65, 63, 62, 61, 60, 60, 63, 64, 66, 68, 70, 73,
- 74, 78, 80, 82, 85, 87, 91, 92, 96, 98, 101, 102, 103, 105, 105, 105,
- 107, 108, 71, 67, 66, 64, 63, 62, 62, 61, 62, 64, 66, 67, 70, 71, 75,
- 76, 79, 81, 84, 86, 89, 91, 94, 97, 98, 102, 104, 106, 106, 109, 109,
- 108, 72, 68, 68, 65, 65, 63, 63, 61, 62, 65, 65, 68, 69, 72, 73, 77, 77,
- 81, 81, 86, 87, 91, 91, 96, 97, 101, 102, 107, 107, 109, 110, 113,
- /* Size 4x16 */
- 31, 49, 63, 69, 32, 45, 57, 65, 36, 46, 56, 62, 43, 49, 57, 60, 46, 53,
- 60, 63, 45, 58, 67, 66, 46, 59, 71, 70, 50, 62, 78, 74, 52, 64, 82, 80,
- 57, 67, 89, 85, 59, 68, 90, 91, 62, 71, 91, 96, 63, 69, 89, 101, 65, 68,
- 89, 103, 67, 70, 86, 105, 69, 72, 88, 107,
- /* Size 16x4 */
- 31, 32, 36, 43, 46, 45, 46, 50, 52, 57, 59, 62, 63, 65, 67, 69, 49, 45,
- 46, 49, 53, 58, 59, 62, 64, 67, 68, 71, 69, 68, 70, 72, 63, 57, 56, 57,
- 60, 67, 71, 78, 82, 89, 90, 91, 89, 89, 86, 88, 69, 65, 62, 60, 63, 66,
- 70, 74, 80, 85, 91, 96, 101, 103, 105, 107,
- /* Size 8x32 */
- 32, 37, 48, 52, 57, 66, 68, 71, 31, 38, 47, 50, 54, 63, 65, 67, 30, 40,
- 46, 48, 52, 60, 63, 66, 32, 41, 46, 48, 51, 59, 62, 64, 33, 43, 47, 47,
- 51, 59, 60, 63, 37, 47, 47, 47, 50, 57, 60, 62, 42, 47, 50, 50, 53, 60,
- 59, 62, 45, 47, 51, 52, 55, 61, 61, 61, 49, 48, 53, 54, 57, 62, 62, 62,
- 48, 47, 53, 57, 60, 66, 65, 64, 49, 46, 53, 61, 64, 69, 66, 66, 49, 46,
- 53, 62, 65, 71, 68, 67, 50, 46, 54, 64, 67, 73, 72, 70, 52, 47, 54, 66,
- 71, 77, 73, 71, 54, 49, 55, 68, 73, 80, 76, 75, 55, 49, 56, 69, 75, 82,
- 79, 76, 57, 50, 56, 70, 76, 84, 80, 79, 60, 52, 58, 72, 79, 88, 84, 81,
- 63, 55, 60, 75, 82, 92, 87, 84, 64, 55, 61, 75, 82, 92, 89, 86, 64, 56,
- 61, 75, 83, 93, 93, 89, 67, 58, 63, 76, 85, 95, 94, 91, 68, 59, 64, 74,
- 86, 94, 98, 94, 69, 60, 65, 72, 85, 95, 99, 97, 70, 62, 66, 73, 83, 96,
- 99, 98, 71, 63, 67, 74, 82, 93, 102, 102, 72, 64, 66, 75, 83, 92, 101,
- 104, 73, 65, 66, 75, 84, 93, 102, 106, 74, 67, 66, 74, 84, 94, 103, 106,
- 75, 68, 66, 74, 83, 93, 103, 109, 76, 69, 67, 73, 82, 91, 101, 109, 77,
- 70, 67, 73, 81, 90, 99, 108,
- /* Size 32x8 */
- 32, 31, 30, 32, 33, 37, 42, 45, 49, 48, 49, 49, 50, 52, 54, 55, 57, 60,
- 63, 64, 64, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 37, 38, 40, 41,
- 43, 47, 47, 47, 48, 47, 46, 46, 46, 47, 49, 49, 50, 52, 55, 55, 56, 58,
- 59, 60, 62, 63, 64, 65, 67, 68, 69, 70, 48, 47, 46, 46, 47, 47, 50, 51,
- 53, 53, 53, 53, 54, 54, 55, 56, 56, 58, 60, 61, 61, 63, 64, 65, 66, 67,
- 66, 66, 66, 66, 67, 67, 52, 50, 48, 48, 47, 47, 50, 52, 54, 57, 61, 62,
- 64, 66, 68, 69, 70, 72, 75, 75, 75, 76, 74, 72, 73, 74, 75, 75, 74, 74,
- 73, 73, 57, 54, 52, 51, 51, 50, 53, 55, 57, 60, 64, 65, 67, 71, 73, 75,
- 76, 79, 82, 82, 83, 85, 86, 85, 83, 82, 83, 84, 84, 83, 82, 81, 66, 63,
- 60, 59, 59, 57, 60, 61, 62, 66, 69, 71, 73, 77, 80, 82, 84, 88, 92, 92,
- 93, 95, 94, 95, 96, 93, 92, 93, 94, 93, 91, 90, 68, 65, 63, 62, 60, 60,
- 59, 61, 62, 65, 66, 68, 72, 73, 76, 79, 80, 84, 87, 89, 93, 94, 98, 99,
- 99, 102, 101, 102, 103, 103, 101, 99, 71, 67, 66, 64, 63, 62, 62, 61,
- 62, 64, 66, 67, 70, 71, 75, 76, 79, 81, 84, 86, 89, 91, 94, 97, 98, 102,
- 104, 106, 106, 109, 109, 108 },
- },
- {
- { /* Luma */
- /* Size 4x4 */
- 32, 41, 69, 92, 41, 63, 88, 103, 69, 88, 127, 140, 92, 103, 140, 184,
- /* Size 8x8 */
- 32, 32, 37, 47, 62, 78, 90, 102, 32, 35, 39, 46, 58, 72, 84, 96, 37, 39,
- 51, 60, 71, 84, 93, 100, 47, 46, 60, 73, 87, 100, 106, 113, 62, 58, 71,
- 87, 105, 121, 129, 132, 78, 72, 84, 100, 121, 140, 148, 155, 90, 84, 93,
- 106, 129, 148, 169, 183, 102, 96, 100, 113, 132, 155, 183, 201,
- /* Size 16x16 */
- 32, 31, 31, 32, 36, 39, 47, 54, 61, 71, 80, 86, 92, 98, 104, 111, 31,
- 32, 32, 33, 34, 37, 44, 50, 56, 65, 73, 79, 85, 91, 98, 105, 31, 32, 33,
- 34, 36, 39, 45, 50, 56, 64, 71, 77, 82, 88, 94, 100, 32, 33, 34, 36, 40,
- 42, 47, 51, 57, 65, 71, 76, 80, 85, 91, 98, 36, 34, 36, 40, 48, 50, 56,
- 60, 65, 73, 79, 84, 86, 90, 95, 98, 39, 37, 39, 42, 50, 54, 60, 65, 70,
- 78, 84, 89, 95, 96, 102, 105, 47, 44, 45, 47, 56, 60, 69, 75, 81, 89,
- 95, 100, 102, 104, 109, 112, 54, 50, 50, 51, 60, 65, 75, 82, 89, 97,
- 104, 109, 110, 114, 117, 121, 61, 56, 56, 57, 65, 70, 81, 89, 97, 106,
- 113, 119, 122, 126, 125, 130, 71, 65, 64, 65, 73, 78, 89, 97, 106, 117,
- 125, 131, 134, 134, 136, 141, 80, 73, 71, 71, 79, 84, 95, 104, 113, 125,
- 134, 140, 142, 145, 146, 152, 86, 79, 77, 76, 84, 89, 100, 109, 119,
- 131, 140, 147, 154, 157, 160, 165, 92, 85, 82, 80, 86, 95, 102, 110,
- 122, 134, 142, 154, 162, 168, 174, 178, 98, 91, 88, 85, 90, 96, 104,
- 114, 126, 134, 145, 157, 168, 176, 184, 193, 104, 98, 94, 91, 95, 102,
- 109, 117, 125, 136, 146, 160, 174, 184, 193, 201, 111, 105, 100, 98, 98,
- 105, 112, 121, 130, 141, 152, 165, 178, 193, 201, 210,
- /* Size 32x32 */
- 32, 31, 31, 31, 31, 32, 32, 34, 36, 38, 39, 44, 47, 49, 54, 59, 61, 65,
- 71, 76, 80, 83, 86, 89, 92, 95, 98, 101, 104, 108, 111, 114, 31, 32, 32,
- 32, 32, 32, 33, 34, 35, 37, 38, 42, 45, 47, 51, 56, 58, 62, 68, 72, 76,
- 78, 82, 85, 88, 90, 93, 96, 99, 102, 105, 109, 31, 32, 32, 32, 32, 32,
- 33, 33, 34, 36, 37, 41, 44, 46, 50, 54, 56, 60, 65, 70, 73, 76, 79, 82,
- 85, 88, 91, 95, 98, 101, 105, 109, 31, 32, 32, 32, 32, 33, 33, 34, 35,
- 36, 38, 41, 44, 45, 49, 54, 56, 59, 65, 69, 72, 75, 78, 81, 84, 86, 89,
- 92, 95, 98, 101, 104, 31, 32, 32, 32, 33, 34, 34, 35, 36, 38, 39, 42,
- 45, 46, 50, 54, 56, 59, 64, 68, 71, 74, 77, 79, 82, 85, 88, 91, 94, 97,
- 100, 104, 32, 32, 32, 33, 34, 35, 36, 37, 38, 39, 40, 42, 45, 46, 49,
- 53, 55, 58, 63, 66, 69, 72, 74, 78, 81, 84, 87, 90, 93, 96, 99, 102, 32,
- 33, 33, 33, 34, 36, 36, 38, 40, 41, 42, 44, 47, 48, 51, 55, 57, 60, 65,
- 68, 71, 73, 76, 78, 80, 82, 85, 88, 91, 95, 98, 102, 34, 34, 33, 34, 35,
- 37, 38, 39, 42, 44, 45, 47, 50, 51, 54, 58, 60, 63, 68, 71, 74, 76, 79,
- 82, 85, 86, 87, 88, 90, 93, 96, 99, 36, 35, 34, 35, 36, 38, 40, 42, 48,
- 50, 50, 54, 56, 57, 60, 64, 65, 68, 73, 76, 79, 81, 84, 86, 86, 88, 90,
- 93, 95, 97, 98, 100, 38, 37, 36, 36, 38, 39, 41, 44, 50, 51, 52, 56, 58,
- 60, 63, 67, 68, 71, 76, 79, 82, 84, 87, 87, 90, 93, 94, 95, 96, 100,
- 103, 106, 39, 38, 37, 38, 39, 40, 42, 45, 50, 52, 54, 58, 60, 62, 65,
- 69, 70, 73, 78, 81, 84, 86, 89, 92, 95, 95, 96, 99, 102, 104, 105, 106,
- 44, 42, 41, 41, 42, 42, 44, 47, 54, 56, 58, 63, 66, 68, 71, 75, 77, 79,
- 84, 88, 90, 92, 95, 97, 97, 99, 102, 103, 103, 106, 109, 113, 47, 45,
- 44, 44, 45, 45, 47, 50, 56, 58, 60, 66, 69, 71, 75, 79, 81, 84, 89, 92,
- 95, 97, 100, 100, 102, 105, 104, 106, 109, 111, 112, 113, 49, 47, 46,
- 45, 46, 46, 48, 51, 57, 60, 62, 68, 71, 73, 77, 81, 83, 87, 92, 95, 98,
- 100, 103, 105, 107, 106, 109, 112, 112, 113, 117, 120, 54, 51, 50, 49,
- 50, 49, 51, 54, 60, 63, 65, 71, 75, 77, 82, 87, 89, 92, 97, 101, 104,
- 106, 109, 112, 110, 113, 114, 114, 117, 121, 121, 121, 59, 56, 54, 54,
- 54, 53, 55, 58, 64, 67, 69, 75, 79, 81, 87, 92, 94, 98, 103, 107, 110,
- 113, 116, 114, 117, 118, 117, 121, 122, 122, 125, 129, 61, 58, 56, 56,
- 56, 55, 57, 60, 65, 68, 70, 77, 81, 83, 89, 94, 97, 101, 106, 110, 113,
- 116, 119, 120, 122, 121, 126, 124, 125, 130, 130, 130, 65, 62, 60, 59,
- 59, 58, 60, 63, 68, 71, 73, 79, 84, 87, 92, 98, 101, 105, 111, 115, 118,
- 121, 124, 128, 125, 129, 128, 131, 133, 132, 135, 139, 71, 68, 65, 65,
- 64, 63, 65, 68, 73, 76, 78, 84, 89, 92, 97, 103, 106, 111, 117, 122,
- 125, 128, 131, 131, 134, 132, 134, 136, 136, 140, 141, 140, 76, 72, 70,
- 69, 68, 66, 68, 71, 76, 79, 81, 88, 92, 95, 101, 107, 110, 115, 122,
- 127, 130, 133, 136, 136, 138, 139, 141, 140, 145, 143, 146, 151, 80, 76,
- 73, 72, 71, 69, 71, 74, 79, 82, 84, 90, 95, 98, 104, 110, 113, 118, 125,
- 130, 134, 137, 140, 146, 142, 146, 145, 149, 146, 150, 152, 151, 83, 78,
- 76, 75, 74, 72, 73, 76, 81, 84, 86, 92, 97, 100, 106, 113, 116, 121,
- 128, 133, 137, 140, 144, 147, 152, 148, 154, 151, 156, 155, 156, 162,
- 86, 82, 79, 78, 77, 74, 76, 79, 84, 87, 89, 95, 100, 103, 109, 116, 119,
- 124, 131, 136, 140, 144, 147, 150, 154, 159, 157, 160, 160, 162, 165,
- 162, 89, 85, 82, 81, 79, 78, 78, 82, 86, 87, 92, 97, 100, 105, 112, 114,
- 120, 128, 131, 136, 146, 147, 150, 155, 156, 161, 166, 165, 167, 169,
- 169, 175, 92, 88, 85, 84, 82, 81, 80, 85, 86, 90, 95, 97, 102, 107, 110,
- 117, 122, 125, 134, 138, 142, 152, 154, 156, 162, 163, 168, 173, 174,
- 174, 178, 176, 95, 90, 88, 86, 85, 84, 82, 86, 88, 93, 95, 99, 105, 106,
- 113, 118, 121, 129, 132, 139, 146, 148, 159, 161, 163, 169, 170, 176,
- 180, 183, 181, 187, 98, 93, 91, 89, 88, 87, 85, 87, 90, 94, 96, 102,
- 104, 109, 114, 117, 126, 128, 134, 141, 145, 154, 157, 166, 168, 170,
- 176, 178, 184, 188, 193, 188, 101, 96, 95, 92, 91, 90, 88, 88, 93, 95,
- 99, 103, 106, 112, 114, 121, 124, 131, 136, 140, 149, 151, 160, 165,
- 173, 176, 178, 184, 186, 192, 196, 203, 104, 99, 98, 95, 94, 93, 91, 90,
- 95, 96, 102, 103, 109, 112, 117, 122, 125, 133, 136, 145, 146, 156, 160,
- 167, 174, 180, 184, 186, 193, 194, 201, 204, 108, 102, 101, 98, 97, 96,
- 95, 93, 97, 100, 104, 106, 111, 113, 121, 122, 130, 132, 140, 143, 150,
- 155, 162, 169, 174, 183, 188, 192, 194, 201, 202, 210, 111, 105, 105,
- 101, 100, 99, 98, 96, 98, 103, 105, 109, 112, 117, 121, 125, 130, 135,
- 141, 146, 152, 156, 165, 169, 178, 181, 193, 196, 201, 202, 210, 211,
- 114, 109, 109, 104, 104, 102, 102, 99, 100, 106, 106, 113, 113, 120,
- 121, 129, 130, 139, 140, 151, 151, 162, 162, 175, 176, 187, 188, 203,
- 204, 210, 211, 219,
- /* Size 4x8 */
- 32, 42, 69, 88, 33, 42, 64, 83, 36, 56, 77, 88, 46, 67, 93, 105, 60, 79,
- 112, 122, 75, 92, 130, 144, 86, 95, 136, 167, 98, 105, 136, 177,
- /* Size 8x4 */
- 32, 33, 36, 46, 60, 75, 86, 98, 42, 42, 56, 67, 79, 92, 95, 105, 69, 64,
- 77, 93, 112, 130, 136, 136, 88, 83, 88, 105, 122, 144, 167, 177,
- /* Size 8x16 */
- 32, 32, 36, 47, 65, 79, 90, 96, 31, 32, 35, 44, 60, 72, 84, 90, 32, 34,
- 36, 45, 59, 71, 80, 87, 32, 35, 40, 47, 60, 71, 78, 85, 36, 37, 48, 56,
- 68, 78, 83, 87, 39, 40, 50, 60, 73, 84, 91, 94, 47, 45, 56, 69, 84, 95,
- 101, 101, 53, 50, 60, 75, 92, 103, 108, 110, 61, 56, 65, 81, 100, 113,
- 116, 118, 71, 64, 73, 89, 111, 125, 129, 129, 79, 70, 79, 95, 118, 133,
- 142, 138, 86, 76, 84, 100, 124, 140, 153, 150, 92, 82, 89, 101, 121,
- 148, 157, 161, 98, 88, 93, 108, 124, 141, 163, 174, 104, 94, 95, 110,
- 129, 151, 171, 181, 110, 100, 98, 111, 127, 147, 169, 188,
- /* Size 16x8 */
- 32, 31, 32, 32, 36, 39, 47, 53, 61, 71, 79, 86, 92, 98, 104, 110, 32,
- 32, 34, 35, 37, 40, 45, 50, 56, 64, 70, 76, 82, 88, 94, 100, 36, 35, 36,
- 40, 48, 50, 56, 60, 65, 73, 79, 84, 89, 93, 95, 98, 47, 44, 45, 47, 56,
- 60, 69, 75, 81, 89, 95, 100, 101, 108, 110, 111, 65, 60, 59, 60, 68, 73,
- 84, 92, 100, 111, 118, 124, 121, 124, 129, 127, 79, 72, 71, 71, 78, 84,
- 95, 103, 113, 125, 133, 140, 148, 141, 151, 147, 90, 84, 80, 78, 83, 91,
- 101, 108, 116, 129, 142, 153, 157, 163, 171, 169, 96, 90, 87, 85, 87,
- 94, 101, 110, 118, 129, 138, 150, 161, 174, 181, 188,
- /* Size 16x32 */
- 32, 31, 32, 32, 36, 44, 47, 53, 65, 73, 79, 87, 90, 93, 96, 99, 31, 32,
- 32, 33, 35, 42, 45, 51, 62, 69, 75, 83, 86, 88, 91, 94, 31, 32, 32, 33,
- 35, 41, 44, 49, 60, 67, 72, 80, 84, 87, 90, 94, 31, 32, 33, 33, 35, 41,
- 44, 49, 59, 66, 71, 79, 82, 84, 87, 90, 32, 32, 34, 34, 36, 42, 45, 50,
- 59, 65, 71, 78, 80, 83, 87, 90, 32, 33, 35, 36, 38, 42, 45, 49, 58, 64,
- 69, 76, 80, 83, 86, 88, 32, 33, 35, 36, 40, 44, 47, 51, 60, 66, 71, 76,
- 78, 81, 85, 89, 34, 34, 36, 38, 42, 48, 50, 54, 63, 69, 73, 80, 82, 81,
- 84, 86, 36, 34, 37, 40, 48, 54, 56, 60, 68, 74, 78, 84, 83, 86, 87, 87,
- 38, 36, 39, 41, 49, 56, 58, 63, 71, 77, 81, 86, 88, 88, 90, 93, 39, 37,
- 40, 42, 50, 58, 60, 65, 73, 79, 84, 90, 91, 92, 94, 93, 44, 41, 42, 45,
- 53, 63, 66, 71, 79, 85, 90, 96, 94, 96, 96, 99, 47, 44, 45, 47, 56, 66,
- 69, 75, 84, 90, 95, 99, 101, 98, 101, 99, 49, 46, 47, 48, 57, 67, 71,
- 77, 86, 93, 97, 103, 103, 105, 102, 106, 53, 49, 50, 51, 60, 71, 75, 82,
- 92, 99, 103, 111, 108, 107, 110, 107, 58, 54, 54, 55, 63, 75, 79, 87,
- 98, 105, 110, 114, 114, 113, 111, 115, 61, 56, 56, 57, 65, 77, 81, 89,
- 100, 107, 113, 118, 116, 117, 118, 116, 65, 60, 59, 60, 68, 79, 84, 92,
- 105, 112, 118, 126, 124, 122, 121, 124, 71, 65, 64, 65, 73, 84, 89, 97,
- 111, 119, 125, 130, 129, 129, 129, 125, 76, 69, 68, 69, 76, 88, 92, 101,
- 115, 123, 130, 134, 134, 131, 132, 135, 79, 72, 70, 71, 79, 90, 95, 104,
- 118, 127, 133, 143, 142, 141, 138, 136, 82, 75, 73, 74, 81, 92, 97, 106,
- 121, 130, 136, 146, 145, 144, 144, 145, 86, 78, 76, 77, 84, 95, 100,
- 109, 124, 133, 140, 147, 153, 151, 150, 146, 89, 81, 79, 78, 87, 95, 99,
- 112, 124, 130, 145, 152, 156, 157, 156, 158, 92, 84, 82, 80, 89, 95,
- 101, 116, 121, 132, 148, 151, 157, 163, 161, 159, 95, 86, 85, 83, 92,
- 95, 105, 114, 120, 136, 143, 155, 163, 167, 171, 170, 98, 89, 88, 85,
- 93, 95, 108, 113, 124, 136, 141, 160, 163, 169, 174, 171, 101, 92, 91,
- 88, 94, 98, 110, 112, 128, 133, 146, 158, 166, 175, 179, 185, 104, 95,
- 94, 91, 95, 101, 110, 115, 129, 132, 151, 154, 171, 175, 181, 186, 107,
- 98, 97, 94, 96, 105, 110, 119, 128, 136, 149, 156, 173, 177, 188, 192,
- 110, 101, 100, 97, 98, 108, 111, 123, 127, 141, 147, 161, 169, 183, 188,
- 193, 114, 104, 104, 100, 100, 111, 111, 126, 127, 145, 145, 166, 166,
- 189, 190, 201,
- /* Size 32x16 */
- 32, 31, 31, 31, 32, 32, 32, 34, 36, 38, 39, 44, 47, 49, 53, 58, 61, 65,
- 71, 76, 79, 82, 86, 89, 92, 95, 98, 101, 104, 107, 110, 114, 31, 32, 32,
- 32, 32, 33, 33, 34, 34, 36, 37, 41, 44, 46, 49, 54, 56, 60, 65, 69, 72,
- 75, 78, 81, 84, 86, 89, 92, 95, 98, 101, 104, 32, 32, 32, 33, 34, 35,
- 35, 36, 37, 39, 40, 42, 45, 47, 50, 54, 56, 59, 64, 68, 70, 73, 76, 79,
- 82, 85, 88, 91, 94, 97, 100, 104, 32, 33, 33, 33, 34, 36, 36, 38, 40,
- 41, 42, 45, 47, 48, 51, 55, 57, 60, 65, 69, 71, 74, 77, 78, 80, 83, 85,
- 88, 91, 94, 97, 100, 36, 35, 35, 35, 36, 38, 40, 42, 48, 49, 50, 53, 56,
- 57, 60, 63, 65, 68, 73, 76, 79, 81, 84, 87, 89, 92, 93, 94, 95, 96, 98,
- 100, 44, 42, 41, 41, 42, 42, 44, 48, 54, 56, 58, 63, 66, 67, 71, 75, 77,
- 79, 84, 88, 90, 92, 95, 95, 95, 95, 95, 98, 101, 105, 108, 111, 47, 45,
- 44, 44, 45, 45, 47, 50, 56, 58, 60, 66, 69, 71, 75, 79, 81, 84, 89, 92,
- 95, 97, 100, 99, 101, 105, 108, 110, 110, 110, 111, 111, 53, 51, 49, 49,
- 50, 49, 51, 54, 60, 63, 65, 71, 75, 77, 82, 87, 89, 92, 97, 101, 104,
- 106, 109, 112, 116, 114, 113, 112, 115, 119, 123, 126, 65, 62, 60, 59,
- 59, 58, 60, 63, 68, 71, 73, 79, 84, 86, 92, 98, 100, 105, 111, 115, 118,
- 121, 124, 124, 121, 120, 124, 128, 129, 128, 127, 127, 73, 69, 67, 66,
- 65, 64, 66, 69, 74, 77, 79, 85, 90, 93, 99, 105, 107, 112, 119, 123,
- 127, 130, 133, 130, 132, 136, 136, 133, 132, 136, 141, 145, 79, 75, 72,
- 71, 71, 69, 71, 73, 78, 81, 84, 90, 95, 97, 103, 110, 113, 118, 125,
- 130, 133, 136, 140, 145, 148, 143, 141, 146, 151, 149, 147, 145, 87, 83,
- 80, 79, 78, 76, 76, 80, 84, 86, 90, 96, 99, 103, 111, 114, 118, 126,
- 130, 134, 143, 146, 147, 152, 151, 155, 160, 158, 154, 156, 161, 166,
- 90, 86, 84, 82, 80, 80, 78, 82, 83, 88, 91, 94, 101, 103, 108, 114, 116,
- 124, 129, 134, 142, 145, 153, 156, 157, 163, 163, 166, 171, 173, 169,
- 166, 93, 88, 87, 84, 83, 83, 81, 81, 86, 88, 92, 96, 98, 105, 107, 113,
- 117, 122, 129, 131, 141, 144, 151, 157, 163, 167, 169, 175, 175, 177,
- 183, 189, 96, 91, 90, 87, 87, 86, 85, 84, 87, 90, 94, 96, 101, 102, 110,
- 111, 118, 121, 129, 132, 138, 144, 150, 156, 161, 171, 174, 179, 181,
- 188, 188, 190, 99, 94, 94, 90, 90, 88, 89, 86, 87, 93, 93, 99, 99, 106,
- 107, 115, 116, 124, 125, 135, 136, 145, 146, 158, 159, 170, 171, 185,
- 186, 192, 193, 201,
- /* Size 4x16 */
- 31, 44, 73, 93, 32, 41, 67, 87, 32, 42, 65, 83, 33, 44, 66, 81, 34, 54,
- 74, 86, 37, 58, 79, 92, 44, 66, 90, 98, 49, 71, 99, 107, 56, 77, 107,
- 117, 65, 84, 119, 129, 72, 90, 127, 141, 78, 95, 133, 151, 84, 95, 132,
- 163, 89, 95, 136, 169, 95, 101, 132, 175, 101, 108, 141, 183,
- /* Size 16x4 */
- 31, 32, 32, 33, 34, 37, 44, 49, 56, 65, 72, 78, 84, 89, 95, 101, 44, 41,
- 42, 44, 54, 58, 66, 71, 77, 84, 90, 95, 95, 95, 101, 108, 73, 67, 65,
- 66, 74, 79, 90, 99, 107, 119, 127, 133, 132, 136, 132, 141, 93, 87, 83,
- 81, 86, 92, 98, 107, 117, 129, 141, 151, 163, 169, 175, 183,
- /* Size 8x32 */
- 32, 32, 36, 47, 65, 79, 90, 96, 31, 32, 35, 45, 62, 75, 86, 91, 31, 32,
- 35, 44, 60, 72, 84, 90, 31, 33, 35, 44, 59, 71, 82, 87, 32, 34, 36, 45,
- 59, 71, 80, 87, 32, 35, 38, 45, 58, 69, 80, 86, 32, 35, 40, 47, 60, 71,
- 78, 85, 34, 36, 42, 50, 63, 73, 82, 84, 36, 37, 48, 56, 68, 78, 83, 87,
- 38, 39, 49, 58, 71, 81, 88, 90, 39, 40, 50, 60, 73, 84, 91, 94, 44, 42,
- 53, 66, 79, 90, 94, 96, 47, 45, 56, 69, 84, 95, 101, 101, 49, 47, 57,
- 71, 86, 97, 103, 102, 53, 50, 60, 75, 92, 103, 108, 110, 58, 54, 63, 79,
- 98, 110, 114, 111, 61, 56, 65, 81, 100, 113, 116, 118, 65, 59, 68, 84,
- 105, 118, 124, 121, 71, 64, 73, 89, 111, 125, 129, 129, 76, 68, 76, 92,
- 115, 130, 134, 132, 79, 70, 79, 95, 118, 133, 142, 138, 82, 73, 81, 97,
- 121, 136, 145, 144, 86, 76, 84, 100, 124, 140, 153, 150, 89, 79, 87, 99,
- 124, 145, 156, 156, 92, 82, 89, 101, 121, 148, 157, 161, 95, 85, 92,
- 105, 120, 143, 163, 171, 98, 88, 93, 108, 124, 141, 163, 174, 101, 91,
- 94, 110, 128, 146, 166, 179, 104, 94, 95, 110, 129, 151, 171, 181, 107,
- 97, 96, 110, 128, 149, 173, 188, 110, 100, 98, 111, 127, 147, 169, 188,
- 114, 104, 100, 111, 127, 145, 166, 190,
- /* Size 32x8 */
- 32, 31, 31, 31, 32, 32, 32, 34, 36, 38, 39, 44, 47, 49, 53, 58, 61, 65,
- 71, 76, 79, 82, 86, 89, 92, 95, 98, 101, 104, 107, 110, 114, 32, 32, 32,
- 33, 34, 35, 35, 36, 37, 39, 40, 42, 45, 47, 50, 54, 56, 59, 64, 68, 70,
- 73, 76, 79, 82, 85, 88, 91, 94, 97, 100, 104, 36, 35, 35, 35, 36, 38,
- 40, 42, 48, 49, 50, 53, 56, 57, 60, 63, 65, 68, 73, 76, 79, 81, 84, 87,
- 89, 92, 93, 94, 95, 96, 98, 100, 47, 45, 44, 44, 45, 45, 47, 50, 56, 58,
- 60, 66, 69, 71, 75, 79, 81, 84, 89, 92, 95, 97, 100, 99, 101, 105, 108,
- 110, 110, 110, 111, 111, 65, 62, 60, 59, 59, 58, 60, 63, 68, 71, 73, 79,
- 84, 86, 92, 98, 100, 105, 111, 115, 118, 121, 124, 124, 121, 120, 124,
- 128, 129, 128, 127, 127, 79, 75, 72, 71, 71, 69, 71, 73, 78, 81, 84, 90,
- 95, 97, 103, 110, 113, 118, 125, 130, 133, 136, 140, 145, 148, 143, 141,
- 146, 151, 149, 147, 145, 90, 86, 84, 82, 80, 80, 78, 82, 83, 88, 91, 94,
- 101, 103, 108, 114, 116, 124, 129, 134, 142, 145, 153, 156, 157, 163,
- 163, 166, 171, 173, 169, 166, 96, 91, 90, 87, 87, 86, 85, 84, 87, 90,
- 94, 96, 101, 102, 110, 111, 118, 121, 129, 132, 138, 144, 150, 156, 161,
- 171, 174, 179, 181, 188, 188, 190 },
- { /* Chroma */
- /* Size 4x4 */
- 33, 45, 56, 64, 45, 58, 66, 69, 56, 66, 86, 87, 64, 69, 87, 105,
- /* Size 8x8 */
- 31, 38, 47, 48, 54, 61, 66, 69, 38, 47, 47, 46, 50, 55, 61, 65, 47, 47,
- 53, 55, 58, 63, 65, 66, 48, 46, 55, 62, 67, 72, 73, 73, 54, 50, 58, 67,
- 76, 83, 84, 82, 61, 55, 63, 72, 83, 91, 92, 92, 66, 61, 65, 73, 84, 92,
- 101, 103, 69, 65, 66, 73, 82, 92, 103, 109,
- /* Size 16x16 */
- 32, 30, 33, 38, 49, 48, 50, 52, 55, 60, 63, 66, 68, 70, 72, 74, 30, 31,
- 35, 41, 46, 46, 46, 48, 51, 55, 58, 60, 63, 65, 68, 70, 33, 35, 39, 44,
- 47, 46, 46, 47, 50, 53, 56, 58, 60, 62, 65, 67, 38, 41, 44, 47, 49, 48,
- 47, 48, 50, 53, 55, 58, 58, 60, 62, 65, 49, 46, 47, 49, 53, 53, 54, 54,
- 56, 58, 60, 62, 62, 63, 64, 64, 48, 46, 46, 48, 53, 54, 56, 57, 59, 61,
- 63, 65, 67, 66, 68, 68, 50, 46, 46, 47, 54, 56, 61, 63, 65, 68, 70, 72,
- 71, 71, 72, 72, 52, 48, 47, 48, 54, 57, 63, 66, 69, 72, 75, 76, 75, 76,
- 76, 76, 55, 51, 50, 50, 56, 59, 65, 69, 73, 77, 79, 81, 81, 81, 80, 80,
- 60, 55, 53, 53, 58, 61, 68, 72, 77, 82, 85, 87, 87, 85, 84, 85, 63, 58,
- 56, 55, 60, 63, 70, 75, 79, 85, 89, 91, 91, 90, 89, 90, 66, 60, 58, 58,
- 62, 65, 72, 76, 81, 87, 91, 94, 96, 95, 95, 95, 68, 63, 60, 58, 62, 67,
- 71, 75, 81, 87, 91, 96, 99, 100, 100, 100, 70, 65, 62, 60, 63, 66, 71,
- 76, 81, 85, 90, 95, 100, 103, 104, 105, 72, 68, 65, 62, 64, 68, 72, 76,
- 80, 84, 89, 95, 100, 104, 107, 108, 74, 70, 67, 65, 64, 68, 72, 76, 80,
- 85, 90, 95, 100, 105, 108, 111,
- /* Size 32x32 */
- 32, 31, 30, 31, 33, 36, 38, 41, 49, 49, 48, 49, 50, 51, 52, 54, 55, 57,
- 60, 62, 63, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 31, 31, 31, 32,
- 34, 38, 40, 42, 47, 47, 47, 47, 48, 48, 50, 52, 53, 54, 57, 59, 60, 61,
- 63, 64, 65, 66, 67, 67, 68, 69, 70, 71, 30, 31, 31, 32, 35, 39, 41, 42,
- 46, 46, 46, 45, 46, 47, 48, 50, 51, 52, 55, 57, 58, 59, 60, 62, 63, 64,
- 65, 67, 68, 69, 70, 71, 31, 32, 32, 33, 36, 40, 41, 43, 46, 46, 45, 45,
- 46, 46, 47, 49, 50, 51, 54, 56, 57, 58, 59, 61, 62, 63, 63, 64, 65, 66,
- 67, 68, 33, 34, 35, 36, 39, 43, 44, 45, 47, 46, 46, 45, 46, 47, 47, 49,
- 50, 51, 53, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 68, 36, 38,
- 39, 40, 43, 47, 47, 47, 48, 47, 46, 45, 46, 46, 47, 48, 49, 50, 52, 53,
- 54, 55, 56, 58, 59, 61, 62, 63, 64, 65, 66, 66, 38, 40, 41, 41, 44, 47,
- 47, 48, 49, 48, 48, 47, 47, 47, 48, 49, 50, 51, 53, 54, 55, 56, 58, 58,
- 58, 59, 60, 61, 62, 64, 65, 66, 41, 42, 42, 43, 45, 47, 48, 48, 50, 50,
- 49, 49, 50, 50, 50, 52, 52, 53, 55, 56, 57, 58, 59, 60, 61, 61, 61, 61,
- 62, 63, 63, 64, 49, 47, 46, 46, 47, 48, 49, 50, 53, 53, 53, 53, 54, 54,
- 54, 55, 56, 56, 58, 59, 60, 61, 62, 63, 62, 62, 63, 64, 64, 64, 64, 64,
- 49, 47, 46, 46, 46, 47, 48, 50, 53, 53, 54, 55, 55, 55, 56, 57, 58, 58,
- 60, 61, 62, 63, 64, 64, 64, 65, 65, 65, 65, 66, 67, 68, 48, 47, 46, 45,
- 46, 46, 48, 49, 53, 54, 54, 55, 56, 56, 57, 58, 59, 60, 61, 63, 63, 64,
- 65, 66, 67, 66, 66, 67, 68, 68, 68, 68, 49, 47, 45, 45, 45, 45, 47, 49,
- 53, 55, 55, 58, 59, 60, 61, 62, 63, 63, 65, 66, 67, 68, 69, 69, 68, 68,
- 69, 69, 69, 69, 70, 71, 50, 48, 46, 46, 46, 46, 47, 50, 54, 55, 56, 59,
- 61, 61, 63, 64, 65, 66, 68, 69, 70, 71, 72, 71, 71, 72, 71, 71, 72, 72,
- 72, 71, 51, 48, 47, 46, 47, 46, 47, 50, 54, 55, 56, 60, 61, 62, 64, 66,
- 66, 67, 69, 70, 71, 72, 73, 73, 74, 73, 73, 74, 73, 73, 74, 75, 52, 50,
- 48, 47, 47, 47, 48, 50, 54, 56, 57, 61, 63, 64, 66, 68, 69, 70, 72, 74,
- 75, 75, 76, 77, 75, 76, 76, 75, 76, 77, 76, 75, 54, 52, 50, 49, 49, 48,
- 49, 52, 55, 57, 58, 62, 64, 66, 68, 71, 72, 73, 75, 77, 78, 79, 80, 78,
- 79, 78, 77, 78, 78, 77, 78, 79, 55, 53, 51, 50, 50, 49, 50, 52, 56, 58,
- 59, 63, 65, 66, 69, 72, 73, 74, 77, 78, 79, 80, 81, 81, 81, 80, 81, 80,
- 80, 81, 80, 79, 57, 54, 52, 51, 51, 50, 51, 53, 56, 58, 60, 63, 66, 67,
- 70, 73, 74, 76, 79, 80, 82, 83, 84, 85, 83, 84, 83, 83, 83, 82, 82, 83,
- 60, 57, 55, 54, 53, 52, 53, 55, 58, 60, 61, 65, 68, 69, 72, 75, 77, 79,
- 82, 84, 85, 86, 87, 86, 87, 85, 85, 85, 84, 86, 85, 84, 62, 59, 57, 56,
- 55, 53, 54, 56, 59, 61, 63, 66, 69, 70, 74, 77, 78, 80, 84, 86, 87, 88,
- 90, 89, 89, 88, 88, 87, 88, 87, 87, 88, 63, 60, 58, 57, 56, 54, 55, 57,
- 60, 62, 63, 67, 70, 71, 75, 78, 79, 82, 85, 87, 89, 90, 91, 93, 91, 91,
- 90, 91, 89, 90, 90, 89, 65, 61, 59, 58, 57, 55, 56, 58, 61, 63, 64, 68,
- 71, 72, 75, 79, 80, 83, 86, 88, 90, 91, 93, 94, 95, 92, 94, 92, 93, 92,
- 91, 93, 66, 63, 60, 59, 58, 56, 58, 59, 62, 64, 65, 69, 72, 73, 76, 80,
- 81, 84, 87, 90, 91, 93, 94, 95, 96, 97, 95, 95, 95, 95, 95, 93, 67, 64,
- 62, 61, 59, 58, 58, 60, 63, 64, 66, 69, 71, 73, 77, 78, 81, 85, 86, 89,
- 93, 94, 95, 97, 97, 98, 99, 97, 97, 97, 96, 98, 68, 65, 63, 62, 60, 59,
- 58, 61, 62, 64, 67, 68, 71, 74, 75, 79, 81, 83, 87, 89, 91, 95, 96, 97,
- 99, 98, 100, 100, 100, 99, 100, 98, 69, 66, 64, 63, 61, 61, 59, 61, 62,
- 65, 66, 68, 72, 73, 76, 78, 80, 84, 85, 88, 91, 92, 97, 98, 98, 101,
- 100, 102, 102, 103, 101, 102, 70, 67, 65, 63, 62, 62, 60, 61, 63, 65,
- 66, 69, 71, 73, 76, 77, 81, 83, 85, 88, 90, 94, 95, 99, 100, 100, 103,
- 102, 104, 104, 105, 103, 71, 67, 67, 64, 63, 63, 61, 61, 64, 65, 67, 69,
- 71, 74, 75, 78, 80, 83, 85, 87, 91, 92, 95, 97, 100, 102, 102, 105, 104,
- 106, 106, 108, 72, 68, 68, 65, 65, 64, 62, 62, 64, 65, 68, 69, 72, 73,
- 76, 78, 80, 83, 84, 88, 89, 93, 95, 97, 100, 102, 104, 104, 107, 106,
- 108, 108, 73, 69, 69, 66, 66, 65, 64, 63, 64, 66, 68, 69, 72, 73, 77,
- 77, 81, 82, 86, 87, 90, 92, 95, 97, 99, 103, 104, 106, 106, 109, 108,
- 110, 74, 70, 70, 67, 67, 66, 65, 63, 64, 67, 68, 70, 72, 74, 76, 78, 80,
- 82, 85, 87, 90, 91, 95, 96, 100, 101, 105, 106, 108, 108, 111, 110, 75,
- 71, 71, 68, 68, 66, 66, 64, 64, 68, 68, 71, 71, 75, 75, 79, 79, 83, 84,
- 88, 89, 93, 93, 98, 98, 102, 103, 108, 108, 110, 110, 113,
- /* Size 4x8 */
- 31, 47, 57, 65, 40, 45, 52, 61, 46, 55, 61, 63, 47, 60, 70, 72, 52, 64,
- 79, 81, 59, 68, 87, 90, 63, 66, 88, 99, 66, 69, 85, 102,
- /* Size 8x4 */
- 31, 40, 46, 47, 52, 59, 63, 66, 47, 45, 55, 60, 64, 68, 66, 69, 57, 52,
- 61, 70, 79, 87, 88, 85, 65, 61, 63, 72, 81, 90, 99, 102,
- /* Size 8x16 */
- 32, 35, 48, 50, 57, 63, 68, 70, 30, 38, 46, 46, 52, 58, 63, 65, 33, 41,
- 47, 46, 51, 56, 60, 63, 39, 46, 48, 47, 51, 55, 58, 61, 49, 48, 53, 54,
- 57, 60, 61, 61, 48, 46, 53, 56, 60, 64, 65, 65, 50, 46, 54, 61, 66, 70,
- 71, 69, 52, 47, 54, 63, 71, 75, 75, 74, 55, 49, 56, 65, 74, 79, 79, 78,
- 60, 53, 58, 68, 79, 85, 85, 82, 63, 55, 60, 70, 82, 89, 91, 87, 66, 58,
- 62, 72, 84, 91, 95, 91, 68, 60, 64, 71, 81, 94, 97, 96, 70, 62, 65, 73,
- 81, 89, 98, 101, 72, 65, 65, 72, 82, 92, 100, 103, 74, 67, 65, 71, 79,
- 89, 98, 105,
- /* Size 16x8 */
- 32, 30, 33, 39, 49, 48, 50, 52, 55, 60, 63, 66, 68, 70, 72, 74, 35, 38,
- 41, 46, 48, 46, 46, 47, 49, 53, 55, 58, 60, 62, 65, 67, 48, 46, 47, 48,
- 53, 53, 54, 54, 56, 58, 60, 62, 64, 65, 65, 65, 50, 46, 46, 47, 54, 56,
- 61, 63, 65, 68, 70, 72, 71, 73, 72, 71, 57, 52, 51, 51, 57, 60, 66, 71,
- 74, 79, 82, 84, 81, 81, 82, 79, 63, 58, 56, 55, 60, 64, 70, 75, 79, 85,
- 89, 91, 94, 89, 92, 89, 68, 63, 60, 58, 61, 65, 71, 75, 79, 85, 91, 95,
- 97, 98, 100, 98, 70, 65, 63, 61, 61, 65, 69, 74, 78, 82, 87, 91, 96,
- 101, 103, 105,
- /* Size 16x32 */
- 32, 31, 35, 38, 48, 49, 50, 52, 57, 61, 63, 67, 68, 69, 70, 71, 31, 31,
- 37, 40, 47, 47, 48, 50, 54, 57, 60, 63, 64, 65, 66, 67, 30, 32, 38, 40,
- 46, 45, 46, 48, 52, 55, 58, 61, 63, 64, 65, 67, 31, 33, 38, 41, 46, 45,
- 46, 48, 52, 55, 57, 60, 61, 62, 63, 64, 33, 36, 41, 44, 47, 46, 46, 47,
- 51, 54, 56, 59, 60, 61, 63, 64, 37, 40, 45, 47, 47, 45, 46, 47, 50, 52,
- 54, 57, 59, 61, 62, 62, 39, 41, 46, 47, 48, 47, 47, 48, 51, 54, 55, 57,
- 58, 59, 61, 62, 42, 43, 46, 48, 50, 49, 50, 50, 53, 56, 57, 60, 60, 59,
- 60, 60, 49, 46, 48, 49, 53, 53, 54, 54, 57, 59, 60, 63, 61, 62, 61, 61,
- 48, 46, 47, 48, 53, 55, 55, 56, 58, 61, 62, 64, 64, 63, 63, 64, 48, 46,
- 46, 48, 53, 56, 56, 57, 60, 62, 64, 66, 65, 65, 65, 64, 49, 45, 45, 47,
- 53, 58, 59, 61, 64, 66, 67, 69, 67, 67, 66, 67, 50, 46, 46, 48, 54, 59,
- 61, 63, 66, 68, 70, 71, 71, 68, 69, 67, 51, 47, 47, 48, 54, 60, 61, 64,
- 68, 70, 71, 73, 72, 72, 70, 71, 52, 48, 47, 48, 54, 61, 63, 66, 71, 73,
- 75, 77, 75, 73, 74, 71, 54, 50, 49, 50, 55, 62, 65, 68, 73, 76, 78, 79,
- 78, 76, 74, 75, 55, 51, 49, 50, 56, 63, 65, 69, 74, 77, 79, 81, 79, 78,
- 78, 75, 57, 52, 50, 51, 56, 64, 66, 70, 76, 79, 82, 85, 83, 81, 79, 79,
- 60, 54, 53, 53, 58, 65, 68, 72, 79, 82, 85, 87, 85, 84, 82, 80, 62, 56,
- 54, 55, 60, 66, 69, 74, 81, 84, 87, 88, 87, 85, 84, 84, 63, 57, 55, 56,
- 60, 67, 70, 75, 82, 86, 89, 92, 91, 89, 87, 84, 64, 59, 56, 57, 61, 68,
- 71, 75, 83, 87, 90, 93, 92, 90, 89, 89, 66, 60, 58, 58, 62, 69, 72, 76,
- 84, 88, 91, 94, 95, 93, 91, 89, 67, 61, 59, 58, 63, 68, 71, 78, 83, 86,
- 93, 96, 96, 96, 94, 94, 68, 62, 60, 59, 64, 67, 71, 79, 81, 86, 94, 95,
- 97, 98, 96, 94, 69, 63, 61, 60, 65, 66, 72, 77, 80, 88, 91, 96, 99, 99,
- 100, 98, 70, 64, 62, 60, 65, 66, 73, 76, 81, 87, 89, 97, 98, 100, 101,
- 99, 71, 65, 64, 61, 65, 67, 73, 74, 82, 85, 90, 95, 99, 102, 103, 104,
- 72, 65, 65, 62, 65, 68, 72, 75, 82, 83, 92, 93, 100, 102, 103, 104, 73,
- 66, 66, 63, 65, 69, 72, 76, 81, 85, 90, 93, 100, 102, 105, 106, 74, 67,
- 67, 64, 65, 70, 71, 77, 79, 86, 89, 94, 98, 103, 105, 106, 75, 68, 68,
- 65, 65, 71, 71, 78, 78, 87, 87, 96, 96, 105, 105, 109,
- /* Size 32x16 */
- 32, 31, 30, 31, 33, 37, 39, 42, 49, 48, 48, 49, 50, 51, 52, 54, 55, 57,
- 60, 62, 63, 64, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 31, 31, 32, 33,
- 36, 40, 41, 43, 46, 46, 46, 45, 46, 47, 48, 50, 51, 52, 54, 56, 57, 59,
- 60, 61, 62, 63, 64, 65, 65, 66, 67, 68, 35, 37, 38, 38, 41, 45, 46, 46,
- 48, 47, 46, 45, 46, 47, 47, 49, 49, 50, 53, 54, 55, 56, 58, 59, 60, 61,
- 62, 64, 65, 66, 67, 68, 38, 40, 40, 41, 44, 47, 47, 48, 49, 48, 48, 47,
- 48, 48, 48, 50, 50, 51, 53, 55, 56, 57, 58, 58, 59, 60, 60, 61, 62, 63,
- 64, 65, 48, 47, 46, 46, 47, 47, 48, 50, 53, 53, 53, 53, 54, 54, 54, 55,
- 56, 56, 58, 60, 60, 61, 62, 63, 64, 65, 65, 65, 65, 65, 65, 65, 49, 47,
- 45, 45, 46, 45, 47, 49, 53, 55, 56, 58, 59, 60, 61, 62, 63, 64, 65, 66,
- 67, 68, 69, 68, 67, 66, 66, 67, 68, 69, 70, 71, 50, 48, 46, 46, 46, 46,
- 47, 50, 54, 55, 56, 59, 61, 61, 63, 65, 65, 66, 68, 69, 70, 71, 72, 71,
- 71, 72, 73, 73, 72, 72, 71, 71, 52, 50, 48, 48, 47, 47, 48, 50, 54, 56,
- 57, 61, 63, 64, 66, 68, 69, 70, 72, 74, 75, 75, 76, 78, 79, 77, 76, 74,
- 75, 76, 77, 78, 57, 54, 52, 52, 51, 50, 51, 53, 57, 58, 60, 64, 66, 68,
- 71, 73, 74, 76, 79, 81, 82, 83, 84, 83, 81, 80, 81, 82, 82, 81, 79, 78,
- 61, 57, 55, 55, 54, 52, 54, 56, 59, 61, 62, 66, 68, 70, 73, 76, 77, 79,
- 82, 84, 86, 87, 88, 86, 86, 88, 87, 85, 83, 85, 86, 87, 63, 60, 58, 57,
- 56, 54, 55, 57, 60, 62, 64, 67, 70, 71, 75, 78, 79, 82, 85, 87, 89, 90,
- 91, 93, 94, 91, 89, 90, 92, 90, 89, 87, 67, 63, 61, 60, 59, 57, 57, 60,
- 63, 64, 66, 69, 71, 73, 77, 79, 81, 85, 87, 88, 92, 93, 94, 96, 95, 96,
- 97, 95, 93, 93, 94, 96, 68, 64, 63, 61, 60, 59, 58, 60, 61, 64, 65, 67,
- 71, 72, 75, 78, 79, 83, 85, 87, 91, 92, 95, 96, 97, 99, 98, 99, 100,
- 100, 98, 96, 69, 65, 64, 62, 61, 61, 59, 59, 62, 63, 65, 67, 68, 72, 73,
- 76, 78, 81, 84, 85, 89, 90, 93, 96, 98, 99, 100, 102, 102, 102, 103,
- 105, 70, 66, 65, 63, 63, 62, 61, 60, 61, 63, 65, 66, 69, 70, 74, 74, 78,
- 79, 82, 84, 87, 89, 91, 94, 96, 100, 101, 103, 103, 105, 105, 105, 71,
- 67, 67, 64, 64, 62, 62, 60, 61, 64, 64, 67, 67, 71, 71, 75, 75, 79, 80,
- 84, 84, 89, 89, 94, 94, 98, 99, 104, 104, 106, 106, 109,
- /* Size 4x16 */
- 31, 49, 61, 69, 32, 45, 55, 64, 36, 46, 54, 61, 41, 47, 54, 59, 46, 53,
- 59, 62, 46, 56, 62, 65, 46, 59, 68, 68, 48, 61, 73, 73, 51, 63, 77, 78,
- 54, 65, 82, 84, 57, 67, 86, 89, 60, 69, 88, 93, 62, 67, 86, 98, 64, 66,
- 87, 100, 65, 68, 83, 102, 67, 70, 86, 103,
- /* Size 16x4 */
- 31, 32, 36, 41, 46, 46, 46, 48, 51, 54, 57, 60, 62, 64, 65, 67, 49, 45,
- 46, 47, 53, 56, 59, 61, 63, 65, 67, 69, 67, 66, 68, 70, 61, 55, 54, 54,
- 59, 62, 68, 73, 77, 82, 86, 88, 86, 87, 83, 86, 69, 64, 61, 59, 62, 65,
- 68, 73, 78, 84, 89, 93, 98, 100, 102, 103,
- /* Size 8x32 */
- 32, 35, 48, 50, 57, 63, 68, 70, 31, 37, 47, 48, 54, 60, 64, 66, 30, 38,
- 46, 46, 52, 58, 63, 65, 31, 38, 46, 46, 52, 57, 61, 63, 33, 41, 47, 46,
- 51, 56, 60, 63, 37, 45, 47, 46, 50, 54, 59, 62, 39, 46, 48, 47, 51, 55,
- 58, 61, 42, 46, 50, 50, 53, 57, 60, 60, 49, 48, 53, 54, 57, 60, 61, 61,
- 48, 47, 53, 55, 58, 62, 64, 63, 48, 46, 53, 56, 60, 64, 65, 65, 49, 45,
- 53, 59, 64, 67, 67, 66, 50, 46, 54, 61, 66, 70, 71, 69, 51, 47, 54, 61,
- 68, 71, 72, 70, 52, 47, 54, 63, 71, 75, 75, 74, 54, 49, 55, 65, 73, 78,
- 78, 74, 55, 49, 56, 65, 74, 79, 79, 78, 57, 50, 56, 66, 76, 82, 83, 79,
- 60, 53, 58, 68, 79, 85, 85, 82, 62, 54, 60, 69, 81, 87, 87, 84, 63, 55,
- 60, 70, 82, 89, 91, 87, 64, 56, 61, 71, 83, 90, 92, 89, 66, 58, 62, 72,
- 84, 91, 95, 91, 67, 59, 63, 71, 83, 93, 96, 94, 68, 60, 64, 71, 81, 94,
- 97, 96, 69, 61, 65, 72, 80, 91, 99, 100, 70, 62, 65, 73, 81, 89, 98,
- 101, 71, 64, 65, 73, 82, 90, 99, 103, 72, 65, 65, 72, 82, 92, 100, 103,
- 73, 66, 65, 72, 81, 90, 100, 105, 74, 67, 65, 71, 79, 89, 98, 105, 75,
- 68, 65, 71, 78, 87, 96, 105,
- /* Size 32x8 */
- 32, 31, 30, 31, 33, 37, 39, 42, 49, 48, 48, 49, 50, 51, 52, 54, 55, 57,
- 60, 62, 63, 64, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 35, 37, 38, 38,
- 41, 45, 46, 46, 48, 47, 46, 45, 46, 47, 47, 49, 49, 50, 53, 54, 55, 56,
- 58, 59, 60, 61, 62, 64, 65, 66, 67, 68, 48, 47, 46, 46, 47, 47, 48, 50,
- 53, 53, 53, 53, 54, 54, 54, 55, 56, 56, 58, 60, 60, 61, 62, 63, 64, 65,
- 65, 65, 65, 65, 65, 65, 50, 48, 46, 46, 46, 46, 47, 50, 54, 55, 56, 59,
- 61, 61, 63, 65, 65, 66, 68, 69, 70, 71, 72, 71, 71, 72, 73, 73, 72, 72,
- 71, 71, 57, 54, 52, 52, 51, 50, 51, 53, 57, 58, 60, 64, 66, 68, 71, 73,
- 74, 76, 79, 81, 82, 83, 84, 83, 81, 80, 81, 82, 82, 81, 79, 78, 63, 60,
- 58, 57, 56, 54, 55, 57, 60, 62, 64, 67, 70, 71, 75, 78, 79, 82, 85, 87,
- 89, 90, 91, 93, 94, 91, 89, 90, 92, 90, 89, 87, 68, 64, 63, 61, 60, 59,
- 58, 60, 61, 64, 65, 67, 71, 72, 75, 78, 79, 83, 85, 87, 91, 92, 95, 96,
- 97, 99, 98, 99, 100, 100, 98, 96, 70, 66, 65, 63, 63, 62, 61, 60, 61,
- 63, 65, 66, 69, 70, 74, 74, 78, 79, 82, 84, 87, 89, 91, 94, 96, 100,
- 101, 103, 103, 105, 105, 105 },
- },
- {
- { /* Luma */
- /* Size 4x4 */
- 32, 38, 63, 86, 38, 56, 78, 97, 63, 78, 113, 130, 86, 97, 130, 169,
- /* Size 8x8 */
- 32, 32, 35, 46, 57, 76, 85, 96, 32, 34, 37, 45, 54, 70, 79, 90, 35, 37,
- 48, 56, 64, 79, 87, 93, 46, 45, 56, 70, 80, 96, 100, 105, 57, 54, 64,
- 80, 93, 111, 121, 122, 76, 70, 79, 96, 111, 134, 138, 144, 85, 79, 87,
- 100, 121, 138, 156, 168, 96, 90, 93, 105, 122, 144, 168, 184,
- /* Size 16x16 */
- 32, 31, 31, 32, 34, 39, 44, 49, 58, 65, 71, 81, 87, 93, 98, 104, 31, 32,
- 32, 32, 34, 38, 41, 46, 54, 60, 66, 75, 81, 86, 92, 98, 31, 32, 33, 34,
- 36, 39, 42, 46, 53, 59, 64, 73, 78, 83, 88, 94, 32, 32, 34, 35, 37, 40,
- 42, 46, 52, 58, 63, 71, 75, 80, 86, 92, 34, 34, 36, 37, 42, 47, 50, 53,
- 59, 65, 70, 77, 82, 85, 89, 92, 39, 38, 39, 40, 47, 54, 58, 62, 68, 73,
- 78, 85, 90, 90, 96, 98, 44, 41, 42, 42, 50, 58, 63, 68, 74, 79, 84, 91,
- 96, 98, 102, 104, 49, 46, 46, 46, 53, 62, 68, 73, 81, 87, 92, 99, 103,
- 107, 109, 112, 58, 54, 53, 52, 59, 68, 74, 81, 90, 97, 102, 110, 114,
- 118, 117, 121, 65, 60, 59, 58, 65, 73, 79, 87, 97, 105, 111, 120, 125,
- 125, 126, 130, 71, 66, 64, 63, 70, 78, 84, 92, 102, 111, 117, 127, 133,
- 134, 136, 141, 81, 75, 73, 71, 77, 85, 91, 99, 110, 120, 127, 137, 143,
- 145, 148, 152, 87, 81, 78, 75, 82, 90, 96, 103, 114, 125, 133, 143, 150,
- 156, 160, 163, 93, 86, 83, 80, 85, 90, 98, 107, 118, 125, 134, 145, 156,
- 163, 169, 177, 98, 92, 88, 86, 89, 96, 102, 109, 117, 126, 136, 148,
- 160, 169, 176, 184, 104, 98, 94, 92, 92, 98, 104, 112, 121, 130, 141,
- 152, 163, 177, 184, 191,
- /* Size 32x32 */
- 32, 31, 31, 31, 31, 32, 32, 34, 34, 36, 39, 41, 44, 48, 49, 54, 58, 59,
- 65, 69, 71, 80, 81, 83, 87, 90, 93, 95, 98, 101, 104, 107, 31, 32, 32,
- 32, 32, 32, 32, 34, 34, 35, 38, 39, 42, 46, 47, 51, 55, 57, 62, 66, 68,
- 76, 77, 78, 83, 85, 88, 90, 93, 96, 99, 101, 31, 32, 32, 32, 32, 32, 32,
- 33, 34, 34, 38, 39, 41, 45, 46, 50, 54, 55, 60, 64, 66, 73, 75, 76, 81,
- 83, 86, 89, 92, 95, 98, 101, 31, 32, 32, 32, 32, 32, 32, 33, 34, 34, 37,
- 38, 41, 44, 45, 49, 53, 54, 59, 63, 65, 72, 74, 75, 79, 81, 84, 86, 89,
- 91, 94, 97, 31, 32, 32, 32, 33, 33, 34, 35, 36, 36, 39, 40, 42, 45, 46,
- 50, 53, 54, 59, 63, 64, 71, 73, 74, 78, 80, 83, 85, 88, 91, 94, 97, 32,
- 32, 32, 32, 33, 34, 34, 36, 36, 37, 40, 40, 42, 45, 46, 49, 53, 54, 58,
- 62, 63, 70, 72, 73, 77, 79, 82, 85, 87, 90, 92, 95, 32, 32, 32, 32, 34,
- 34, 35, 37, 37, 38, 40, 41, 42, 45, 46, 49, 52, 54, 58, 61, 63, 69, 71,
- 72, 75, 78, 80, 83, 86, 89, 92, 95, 34, 34, 33, 33, 35, 36, 37, 39, 41,
- 42, 45, 46, 47, 50, 51, 54, 57, 59, 63, 66, 68, 74, 75, 76, 80, 81, 82,
- 83, 85, 87, 90, 93, 34, 34, 34, 34, 36, 36, 37, 41, 42, 45, 47, 48, 50,
- 53, 53, 56, 59, 61, 65, 68, 70, 76, 77, 78, 82, 83, 85, 88, 89, 90, 92,
- 93, 36, 35, 34, 34, 36, 37, 38, 42, 45, 48, 50, 51, 54, 56, 57, 60, 63,
- 64, 68, 71, 73, 79, 80, 81, 85, 87, 89, 89, 90, 93, 96, 99, 39, 38, 38,
- 37, 39, 40, 40, 45, 47, 50, 54, 55, 58, 61, 62, 65, 68, 69, 73, 76, 78,
- 84, 85, 86, 90, 89, 90, 93, 96, 97, 98, 99, 41, 39, 39, 38, 40, 40, 41,
- 46, 48, 51, 55, 56, 59, 62, 63, 67, 70, 71, 75, 78, 80, 86, 87, 88, 91,
- 93, 96, 97, 97, 99, 102, 105, 44, 42, 41, 41, 42, 42, 42, 47, 50, 54,
- 58, 59, 63, 66, 68, 71, 74, 75, 79, 83, 84, 90, 91, 92, 96, 98, 98, 99,
- 102, 104, 104, 105, 48, 46, 45, 44, 45, 45, 45, 50, 53, 56, 61, 62, 66,
- 70, 71, 76, 79, 80, 85, 88, 90, 96, 97, 98, 101, 100, 102, 105, 105,
- 105, 109, 112, 49, 47, 46, 45, 46, 46, 46, 51, 53, 57, 62, 63, 68, 71,
- 73, 77, 81, 82, 87, 90, 92, 98, 99, 100, 103, 106, 107, 106, 109, 112,
- 112, 112, 54, 51, 50, 49, 50, 49, 49, 54, 56, 60, 65, 67, 71, 76, 77,
- 82, 86, 87, 92, 96, 97, 104, 105, 106, 110, 110, 109, 113, 114, 113,
- 116, 120, 58, 55, 54, 53, 53, 53, 52, 57, 59, 63, 68, 70, 74, 79, 81,
- 86, 90, 91, 97, 100, 102, 109, 110, 111, 114, 114, 118, 116, 117, 121,
- 121, 120, 59, 57, 55, 54, 54, 54, 54, 59, 61, 64, 69, 71, 75, 80, 82,
- 87, 91, 93, 99, 102, 104, 111, 112, 113, 117, 121, 120, 122, 124, 122,
- 125, 129, 65, 62, 60, 59, 59, 58, 58, 63, 65, 68, 73, 75, 79, 85, 87,
- 92, 97, 99, 105, 109, 111, 118, 120, 121, 125, 124, 125, 127, 126, 130,
- 130, 129, 69, 66, 64, 63, 63, 62, 61, 66, 68, 71, 76, 78, 83, 88, 90,
- 96, 100, 102, 109, 113, 115, 123, 125, 126, 129, 130, 131, 130, 134,
- 133, 135, 139, 71, 68, 66, 65, 64, 63, 63, 68, 70, 73, 78, 80, 84, 90,
- 92, 97, 102, 104, 111, 115, 117, 125, 127, 128, 133, 136, 134, 139, 136,
- 139, 141, 140, 80, 76, 73, 72, 71, 70, 69, 74, 76, 79, 84, 86, 90, 96,
- 98, 104, 109, 111, 118, 123, 125, 134, 136, 137, 142, 138, 143, 140,
- 144, 144, 144, 149, 81, 77, 75, 74, 73, 72, 71, 75, 77, 80, 85, 87, 91,
- 97, 99, 105, 110, 112, 120, 125, 127, 136, 137, 139, 143, 148, 145, 148,
- 148, 150, 152, 149, 83, 78, 76, 75, 74, 73, 72, 76, 78, 81, 86, 88, 92,
- 98, 100, 106, 111, 113, 121, 126, 128, 137, 139, 140, 145, 149, 153,
- 153, 154, 155, 155, 161, 87, 83, 81, 79, 78, 77, 75, 80, 82, 85, 90, 91,
- 96, 101, 103, 110, 114, 117, 125, 129, 133, 142, 143, 145, 150, 151,
- 156, 159, 160, 160, 163, 161, 90, 85, 83, 81, 80, 79, 78, 81, 83, 87,
- 89, 93, 98, 100, 106, 110, 114, 121, 124, 130, 136, 138, 148, 149, 151,
- 156, 157, 162, 166, 168, 166, 172, 93, 88, 86, 84, 83, 82, 80, 82, 85,
- 89, 90, 96, 98, 102, 107, 109, 118, 120, 125, 131, 134, 143, 145, 153,
- 156, 157, 163, 164, 169, 172, 177, 172, 95, 90, 89, 86, 85, 85, 83, 83,
- 88, 89, 93, 97, 99, 105, 106, 113, 116, 122, 127, 130, 139, 140, 148,
- 153, 159, 162, 164, 169, 170, 176, 179, 185, 98, 93, 92, 89, 88, 87, 86,
- 85, 89, 90, 96, 97, 102, 105, 109, 114, 117, 124, 126, 134, 136, 144,
- 148, 154, 160, 166, 169, 170, 176, 177, 184, 186, 101, 96, 95, 91, 91,
- 90, 89, 87, 90, 93, 97, 99, 104, 105, 112, 113, 121, 122, 130, 133, 139,
- 144, 150, 155, 160, 168, 172, 176, 177, 184, 185, 191, 104, 99, 98, 94,
- 94, 92, 92, 90, 92, 96, 98, 102, 104, 109, 112, 116, 121, 125, 130, 135,
- 141, 144, 152, 155, 163, 166, 177, 179, 184, 185, 191, 192, 107, 101,
- 101, 97, 97, 95, 95, 93, 93, 99, 99, 105, 105, 112, 112, 120, 120, 129,
- 129, 139, 140, 149, 149, 161, 161, 172, 172, 185, 186, 191, 192, 199,
- /* Size 4x8 */
- 32, 38, 62, 86, 32, 40, 58, 80, 34, 51, 68, 85, 44, 61, 85, 101, 54, 69,
- 98, 117, 72, 84, 118, 136, 82, 89, 129, 157, 92, 98, 127, 165,
- /* Size 8x4 */
- 32, 32, 34, 44, 54, 72, 82, 92, 38, 40, 51, 61, 69, 84, 89, 98, 62, 58,
- 68, 85, 98, 118, 129, 127, 86, 80, 85, 101, 117, 136, 157, 165,
- /* Size 8x16 */
- 32, 32, 36, 44, 58, 79, 88, 93, 31, 32, 35, 41, 54, 73, 81, 88, 32, 33,
- 36, 42, 53, 71, 78, 84, 32, 34, 38, 42, 52, 69, 76, 82, 34, 36, 44, 50,
- 59, 75, 81, 84, 39, 39, 50, 58, 68, 84, 88, 90, 44, 42, 53, 63, 74, 90,
- 97, 97, 49, 46, 57, 67, 81, 97, 104, 105, 57, 53, 63, 74, 90, 108, 111,
- 113, 65, 59, 68, 79, 97, 118, 123, 122, 71, 64, 73, 84, 102, 125, 135,
- 131, 81, 72, 80, 91, 110, 135, 145, 141, 87, 77, 85, 96, 114, 140, 148,
- 151, 92, 83, 88, 102, 117, 133, 153, 163, 98, 88, 89, 103, 121, 141,
- 160, 169, 103, 94, 92, 103, 119, 137, 158, 175,
- /* Size 16x8 */
- 32, 31, 32, 32, 34, 39, 44, 49, 57, 65, 71, 81, 87, 92, 98, 103, 32, 32,
- 33, 34, 36, 39, 42, 46, 53, 59, 64, 72, 77, 83, 88, 94, 36, 35, 36, 38,
- 44, 50, 53, 57, 63, 68, 73, 80, 85, 88, 89, 92, 44, 41, 42, 42, 50, 58,
- 63, 67, 74, 79, 84, 91, 96, 102, 103, 103, 58, 54, 53, 52, 59, 68, 74,
- 81, 90, 97, 102, 110, 114, 117, 121, 119, 79, 73, 71, 69, 75, 84, 90,
- 97, 108, 118, 125, 135, 140, 133, 141, 137, 88, 81, 78, 76, 81, 88, 97,
- 104, 111, 123, 135, 145, 148, 153, 160, 158, 93, 88, 84, 82, 84, 90, 97,
- 105, 113, 122, 131, 141, 151, 163, 169, 175,
- /* Size 16x32 */
- 32, 31, 32, 32, 36, 39, 44, 53, 58, 65, 79, 81, 88, 90, 93, 96, 31, 32,
- 32, 32, 35, 38, 42, 51, 55, 62, 75, 77, 83, 86, 88, 91, 31, 32, 32, 32,
- 35, 38, 41, 50, 54, 60, 73, 75, 81, 84, 88, 91, 31, 32, 32, 33, 34, 37,
- 41, 49, 53, 59, 72, 74, 79, 82, 84, 87, 32, 32, 33, 34, 36, 39, 42, 50,
- 53, 59, 71, 72, 78, 81, 84, 87, 32, 32, 34, 34, 37, 40, 42, 49, 53, 58,
- 70, 71, 77, 80, 83, 85, 32, 33, 34, 35, 38, 40, 42, 49, 52, 58, 69, 70,
- 76, 78, 82, 86, 34, 34, 35, 37, 42, 45, 48, 54, 57, 63, 73, 75, 79, 79,
- 81, 83, 34, 34, 36, 37, 44, 47, 50, 56, 59, 65, 75, 77, 81, 83, 84, 84,
- 36, 34, 37, 38, 48, 51, 54, 60, 63, 68, 78, 80, 85, 85, 86, 89, 39, 37,
- 39, 40, 50, 54, 58, 65, 68, 73, 84, 85, 88, 89, 90, 89, 40, 38, 40, 41,
- 51, 55, 59, 67, 70, 75, 85, 87, 91, 92, 92, 95, 44, 41, 42, 43, 53, 58,
- 63, 71, 74, 79, 90, 91, 97, 94, 97, 95, 47, 44, 45, 46, 56, 61, 66, 75,
- 79, 85, 95, 97, 99, 101, 98, 102, 49, 46, 46, 47, 57, 62, 67, 77, 81,
- 86, 97, 99, 104, 102, 105, 102, 53, 49, 50, 50, 60, 65, 71, 82, 86, 92,
- 103, 105, 109, 108, 106, 110, 57, 53, 53, 53, 63, 68, 74, 86, 90, 97,
- 108, 110, 111, 112, 113, 110, 59, 54, 54, 54, 64, 69, 75, 87, 91, 98,
- 111, 112, 119, 117, 115, 118, 65, 60, 59, 58, 68, 73, 79, 92, 97, 105,
- 118, 119, 123, 123, 122, 119, 69, 63, 62, 62, 71, 76, 83, 96, 100, 109,
- 122, 124, 127, 125, 125, 128, 71, 65, 64, 63, 73, 78, 84, 97, 102, 111,
- 125, 127, 135, 134, 131, 129, 79, 72, 71, 70, 79, 84, 90, 104, 109, 118,
- 133, 135, 137, 136, 136, 137, 81, 74, 72, 71, 80, 85, 91, 105, 110, 120,
- 135, 137, 145, 143, 141, 138, 82, 75, 73, 72, 81, 86, 92, 106, 111, 121,
- 136, 139, 147, 148, 147, 149, 87, 79, 77, 76, 85, 90, 96, 110, 114, 125,
- 140, 143, 148, 154, 151, 149, 90, 82, 80, 78, 87, 89, 99, 108, 113, 129,
- 135, 146, 153, 157, 160, 159, 92, 84, 83, 81, 88, 90, 102, 106, 117,
- 128, 133, 150, 153, 158, 163, 160, 95, 87, 85, 83, 88, 92, 103, 105,
- 120, 125, 137, 148, 155, 164, 168, 173, 98, 89, 88, 85, 89, 95, 103,
- 108, 121, 124, 141, 144, 160, 164, 169, 174, 100, 92, 91, 88, 90, 98,
- 103, 111, 120, 127, 139, 146, 161, 165, 175, 179, 103, 94, 94, 90, 92,
- 101, 103, 114, 119, 131, 137, 150, 158, 170, 175, 180, 106, 97, 97, 93,
- 93, 104, 104, 118, 118, 135, 135, 154, 155, 175, 176, 187,
- /* Size 32x16 */
- 32, 31, 31, 31, 32, 32, 32, 34, 34, 36, 39, 40, 44, 47, 49, 53, 57, 59,
- 65, 69, 71, 79, 81, 82, 87, 90, 92, 95, 98, 100, 103, 106, 31, 32, 32,
- 32, 32, 32, 33, 34, 34, 34, 37, 38, 41, 44, 46, 49, 53, 54, 60, 63, 65,
- 72, 74, 75, 79, 82, 84, 87, 89, 92, 94, 97, 32, 32, 32, 32, 33, 34, 34,
- 35, 36, 37, 39, 40, 42, 45, 46, 50, 53, 54, 59, 62, 64, 71, 72, 73, 77,
- 80, 83, 85, 88, 91, 94, 97, 32, 32, 32, 33, 34, 34, 35, 37, 37, 38, 40,
- 41, 43, 46, 47, 50, 53, 54, 58, 62, 63, 70, 71, 72, 76, 78, 81, 83, 85,
- 88, 90, 93, 36, 35, 35, 34, 36, 37, 38, 42, 44, 48, 50, 51, 53, 56, 57,
- 60, 63, 64, 68, 71, 73, 79, 80, 81, 85, 87, 88, 88, 89, 90, 92, 93, 39,
- 38, 38, 37, 39, 40, 40, 45, 47, 51, 54, 55, 58, 61, 62, 65, 68, 69, 73,
- 76, 78, 84, 85, 86, 90, 89, 90, 92, 95, 98, 101, 104, 44, 42, 41, 41,
- 42, 42, 42, 48, 50, 54, 58, 59, 63, 66, 67, 71, 74, 75, 79, 83, 84, 90,
- 91, 92, 96, 99, 102, 103, 103, 103, 103, 104, 53, 51, 50, 49, 50, 49,
- 49, 54, 56, 60, 65, 67, 71, 75, 77, 82, 86, 87, 92, 96, 97, 104, 105,
- 106, 110, 108, 106, 105, 108, 111, 114, 118, 58, 55, 54, 53, 53, 53, 52,
- 57, 59, 63, 68, 70, 74, 79, 81, 86, 90, 91, 97, 100, 102, 109, 110, 111,
- 114, 113, 117, 120, 121, 120, 119, 118, 65, 62, 60, 59, 59, 58, 58, 63,
- 65, 68, 73, 75, 79, 85, 86, 92, 97, 98, 105, 109, 111, 118, 120, 121,
- 125, 129, 128, 125, 124, 127, 131, 135, 79, 75, 73, 72, 71, 70, 69, 73,
- 75, 78, 84, 85, 90, 95, 97, 103, 108, 111, 118, 122, 125, 133, 135, 136,
- 140, 135, 133, 137, 141, 139, 137, 135, 81, 77, 75, 74, 72, 71, 70, 75,
- 77, 80, 85, 87, 91, 97, 99, 105, 110, 112, 119, 124, 127, 135, 137, 139,
- 143, 146, 150, 148, 144, 146, 150, 154, 88, 83, 81, 79, 78, 77, 76, 79,
- 81, 85, 88, 91, 97, 99, 104, 109, 111, 119, 123, 127, 135, 137, 145,
- 147, 148, 153, 153, 155, 160, 161, 158, 155, 90, 86, 84, 82, 81, 80, 78,
- 79, 83, 85, 89, 92, 94, 101, 102, 108, 112, 117, 123, 125, 134, 136,
- 143, 148, 154, 157, 158, 164, 164, 165, 170, 175, 93, 88, 88, 84, 84,
- 83, 82, 81, 84, 86, 90, 92, 97, 98, 105, 106, 113, 115, 122, 125, 131,
- 136, 141, 147, 151, 160, 163, 168, 169, 175, 175, 176, 96, 91, 91, 87,
- 87, 85, 86, 83, 84, 89, 89, 95, 95, 102, 102, 110, 110, 118, 119, 128,
- 129, 137, 138, 149, 149, 159, 160, 173, 174, 179, 180, 187,
- /* Size 4x16 */
- 31, 39, 65, 90, 32, 38, 60, 84, 32, 39, 59, 81, 33, 40, 58, 78, 34, 47,
- 65, 83, 37, 54, 73, 89, 41, 58, 79, 94, 46, 62, 86, 102, 53, 68, 97,
- 112, 60, 73, 105, 123, 65, 78, 111, 134, 74, 85, 120, 143, 79, 90, 125,
- 154, 84, 90, 128, 158, 89, 95, 124, 164, 94, 101, 131, 170,
- /* Size 16x4 */
- 31, 32, 32, 33, 34, 37, 41, 46, 53, 60, 65, 74, 79, 84, 89, 94, 39, 38,
- 39, 40, 47, 54, 58, 62, 68, 73, 78, 85, 90, 90, 95, 101, 65, 60, 59, 58,
- 65, 73, 79, 86, 97, 105, 111, 120, 125, 128, 124, 131, 90, 84, 81, 78,
- 83, 89, 94, 102, 112, 123, 134, 143, 154, 158, 164, 170,
- /* Size 8x32 */
- 32, 32, 36, 44, 58, 79, 88, 93, 31, 32, 35, 42, 55, 75, 83, 88, 31, 32,
- 35, 41, 54, 73, 81, 88, 31, 32, 34, 41, 53, 72, 79, 84, 32, 33, 36, 42,
- 53, 71, 78, 84, 32, 34, 37, 42, 53, 70, 77, 83, 32, 34, 38, 42, 52, 69,
- 76, 82, 34, 35, 42, 48, 57, 73, 79, 81, 34, 36, 44, 50, 59, 75, 81, 84,
- 36, 37, 48, 54, 63, 78, 85, 86, 39, 39, 50, 58, 68, 84, 88, 90, 40, 40,
- 51, 59, 70, 85, 91, 92, 44, 42, 53, 63, 74, 90, 97, 97, 47, 45, 56, 66,
- 79, 95, 99, 98, 49, 46, 57, 67, 81, 97, 104, 105, 53, 50, 60, 71, 86,
- 103, 109, 106, 57, 53, 63, 74, 90, 108, 111, 113, 59, 54, 64, 75, 91,
- 111, 119, 115, 65, 59, 68, 79, 97, 118, 123, 122, 69, 62, 71, 83, 100,
- 122, 127, 125, 71, 64, 73, 84, 102, 125, 135, 131, 79, 71, 79, 90, 109,
- 133, 137, 136, 81, 72, 80, 91, 110, 135, 145, 141, 82, 73, 81, 92, 111,
- 136, 147, 147, 87, 77, 85, 96, 114, 140, 148, 151, 90, 80, 87, 99, 113,
- 135, 153, 160, 92, 83, 88, 102, 117, 133, 153, 163, 95, 85, 88, 103,
- 120, 137, 155, 168, 98, 88, 89, 103, 121, 141, 160, 169, 100, 91, 90,
- 103, 120, 139, 161, 175, 103, 94, 92, 103, 119, 137, 158, 175, 106, 97,
- 93, 104, 118, 135, 155, 176,
- /* Size 32x8 */
- 32, 31, 31, 31, 32, 32, 32, 34, 34, 36, 39, 40, 44, 47, 49, 53, 57, 59,
- 65, 69, 71, 79, 81, 82, 87, 90, 92, 95, 98, 100, 103, 106, 32, 32, 32,
- 32, 33, 34, 34, 35, 36, 37, 39, 40, 42, 45, 46, 50, 53, 54, 59, 62, 64,
- 71, 72, 73, 77, 80, 83, 85, 88, 91, 94, 97, 36, 35, 35, 34, 36, 37, 38,
- 42, 44, 48, 50, 51, 53, 56, 57, 60, 63, 64, 68, 71, 73, 79, 80, 81, 85,
- 87, 88, 88, 89, 90, 92, 93, 44, 42, 41, 41, 42, 42, 42, 48, 50, 54, 58,
- 59, 63, 66, 67, 71, 74, 75, 79, 83, 84, 90, 91, 92, 96, 99, 102, 103,
- 103, 103, 103, 104, 58, 55, 54, 53, 53, 53, 52, 57, 59, 63, 68, 70, 74,
- 79, 81, 86, 90, 91, 97, 100, 102, 109, 110, 111, 114, 113, 117, 120,
- 121, 120, 119, 118, 79, 75, 73, 72, 71, 70, 69, 73, 75, 78, 84, 85, 90,
- 95, 97, 103, 108, 111, 118, 122, 125, 133, 135, 136, 140, 135, 133, 137,
- 141, 139, 137, 135, 88, 83, 81, 79, 78, 77, 76, 79, 81, 85, 88, 91, 97,
- 99, 104, 109, 111, 119, 123, 127, 135, 137, 145, 147, 148, 153, 153,
- 155, 160, 161, 158, 155, 93, 88, 88, 84, 84, 83, 82, 81, 84, 86, 90, 92,
- 97, 98, 105, 106, 113, 115, 122, 125, 131, 136, 141, 147, 151, 160, 163,
- 168, 169, 175, 175, 176 },
- { /* Chroma */
- /* Size 4x4 */
- 32, 45, 53, 63, 45, 55, 62, 67, 53, 62, 80, 84, 63, 67, 84, 101,
- /* Size 8x8 */
- 31, 36, 47, 48, 52, 60, 64, 67, 36, 43, 47, 46, 49, 55, 59, 63, 47, 47,
- 53, 54, 55, 60, 63, 64, 48, 46, 54, 61, 65, 70, 71, 71, 52, 49, 55, 65,
- 71, 78, 81, 79, 60, 55, 60, 70, 78, 89, 89, 89, 64, 59, 63, 71, 81, 89,
- 97, 99, 67, 63, 64, 71, 79, 89, 99, 104,
- /* Size 16x16 */
- 32, 30, 33, 36, 44, 48, 49, 51, 54, 57, 60, 64, 67, 68, 70, 72, 30, 31,
- 35, 39, 44, 46, 46, 47, 50, 53, 55, 59, 61, 64, 66, 68, 33, 35, 39, 43,
- 46, 46, 45, 47, 49, 51, 53, 57, 59, 61, 63, 65, 36, 39, 43, 47, 47, 46,
- 45, 46, 48, 50, 52, 55, 57, 58, 61, 63, 44, 44, 46, 47, 50, 51, 51, 51,
- 53, 54, 56, 59, 61, 61, 63, 62, 48, 46, 46, 46, 51, 54, 55, 56, 58, 60,
- 61, 64, 65, 64, 66, 66, 49, 46, 45, 45, 51, 55, 58, 60, 62, 63, 65, 68,
- 69, 69, 69, 69, 51, 47, 47, 46, 51, 56, 60, 62, 65, 67, 69, 72, 73, 74,
- 73, 73, 54, 50, 49, 48, 53, 58, 62, 65, 70, 73, 75, 78, 79, 79, 77, 77,
- 57, 53, 51, 50, 54, 60, 63, 67, 73, 76, 79, 82, 84, 83, 82, 82, 60, 55,
- 53, 52, 56, 61, 65, 69, 75, 79, 82, 86, 88, 87, 86, 87, 64, 59, 57, 55,
- 59, 64, 68, 72, 78, 82, 86, 90, 93, 92, 91, 92, 67, 61, 59, 57, 61, 65,
- 69, 73, 79, 84, 88, 93, 95, 96, 96, 96, 68, 64, 61, 58, 61, 64, 69, 74,
- 79, 83, 87, 92, 96, 99, 100, 101, 70, 66, 63, 61, 63, 66, 69, 73, 77,
- 82, 86, 91, 96, 100, 103, 104, 72, 68, 65, 63, 62, 66, 69, 73, 77, 82,
- 87, 92, 96, 101, 104, 106,
- /* Size 32x32 */
- 32, 31, 30, 30, 33, 35, 36, 41, 44, 49, 48, 48, 49, 50, 51, 52, 54, 55,
- 57, 59, 60, 63, 64, 65, 67, 68, 68, 69, 70, 71, 72, 73, 31, 31, 31, 31,
- 34, 36, 38, 42, 44, 47, 47, 47, 47, 48, 48, 50, 51, 52, 54, 56, 57, 60,
- 61, 61, 63, 64, 65, 66, 67, 67, 68, 69, 30, 31, 31, 31, 35, 37, 39, 42,
- 44, 47, 46, 46, 46, 47, 47, 48, 50, 51, 53, 54, 55, 58, 59, 60, 61, 63,
- 64, 65, 66, 67, 68, 69, 30, 31, 31, 32, 35, 37, 40, 42, 44, 46, 45, 45,
- 45, 46, 46, 47, 49, 50, 52, 53, 54, 57, 58, 58, 60, 61, 62, 63, 63, 64,
- 65, 66, 33, 34, 35, 35, 39, 41, 43, 45, 46, 47, 46, 46, 45, 46, 47, 47,
- 49, 49, 51, 53, 53, 56, 57, 57, 59, 60, 61, 62, 63, 64, 65, 66, 35, 36,
- 37, 37, 41, 43, 45, 46, 46, 47, 46, 46, 45, 46, 46, 47, 48, 49, 50, 52,
- 53, 55, 56, 56, 58, 59, 60, 61, 62, 63, 64, 64, 36, 38, 39, 40, 43, 45,
- 47, 47, 47, 48, 46, 46, 45, 46, 46, 47, 48, 48, 50, 51, 52, 54, 55, 55,
- 57, 58, 58, 59, 61, 62, 63, 64, 41, 42, 42, 42, 45, 46, 47, 48, 49, 50,
- 49, 49, 49, 50, 50, 50, 51, 52, 53, 54, 55, 57, 58, 58, 60, 60, 59, 59,
- 60, 61, 61, 62, 44, 44, 44, 44, 46, 46, 47, 49, 50, 51, 51, 51, 51, 51,
- 51, 52, 53, 53, 54, 56, 56, 59, 59, 59, 61, 61, 61, 62, 63, 62, 62, 62,
- 49, 47, 47, 46, 47, 47, 48, 50, 51, 53, 53, 53, 53, 54, 54, 54, 55, 55,
- 56, 58, 58, 60, 61, 61, 63, 63, 64, 63, 63, 64, 65, 66, 48, 47, 46, 45,
- 46, 46, 46, 49, 51, 53, 54, 54, 55, 56, 56, 57, 58, 59, 60, 61, 61, 63,
- 64, 64, 65, 65, 64, 65, 66, 66, 66, 66, 48, 47, 46, 45, 46, 46, 46, 49,
- 51, 53, 54, 55, 56, 57, 57, 58, 59, 60, 61, 62, 63, 65, 65, 65, 66, 67,
- 68, 67, 67, 67, 68, 69, 49, 47, 46, 45, 45, 45, 45, 49, 51, 53, 55, 56,
- 58, 59, 60, 61, 62, 62, 63, 65, 65, 67, 68, 68, 69, 70, 69, 69, 69, 70,
- 69, 69, 50, 48, 47, 46, 46, 46, 46, 50, 51, 54, 56, 57, 59, 61, 62, 63,
- 64, 65, 66, 68, 68, 70, 71, 71, 72, 71, 71, 72, 71, 71, 71, 72, 51, 48,
- 47, 46, 47, 46, 46, 50, 51, 54, 56, 57, 60, 62, 62, 64, 65, 66, 67, 69,
- 69, 71, 72, 72, 73, 74, 74, 72, 73, 74, 73, 73, 52, 50, 48, 47, 47, 47,
- 47, 50, 52, 54, 57, 58, 61, 63, 64, 66, 68, 68, 70, 72, 72, 75, 75, 75,
- 77, 76, 75, 76, 76, 74, 75, 76, 54, 51, 50, 49, 49, 48, 48, 51, 53, 55,
- 58, 59, 62, 64, 65, 68, 70, 70, 73, 74, 75, 77, 78, 78, 79, 78, 79, 78,
- 77, 78, 77, 77, 55, 52, 51, 50, 49, 49, 48, 52, 53, 55, 59, 60, 62, 65,
- 66, 68, 70, 71, 73, 75, 76, 78, 79, 79, 80, 81, 80, 80, 81, 79, 79, 81,
- 57, 54, 53, 52, 51, 50, 50, 53, 54, 56, 60, 61, 63, 66, 67, 70, 73, 73,
- 76, 78, 79, 82, 82, 83, 84, 83, 83, 83, 82, 83, 82, 81, 59, 56, 54, 53,
- 53, 52, 51, 54, 56, 58, 61, 62, 65, 68, 69, 72, 74, 75, 78, 80, 81, 84,
- 85, 85, 86, 86, 86, 84, 85, 84, 84, 85, 60, 57, 55, 54, 53, 53, 52, 55,
- 56, 58, 61, 63, 65, 68, 69, 72, 75, 76, 79, 81, 82, 85, 86, 86, 88, 88,
- 87, 88, 86, 87, 87, 85, 63, 60, 58, 57, 56, 55, 54, 57, 59, 60, 63, 65,
- 67, 70, 71, 75, 77, 78, 82, 84, 85, 89, 89, 90, 92, 89, 91, 89, 90, 89,
- 88, 89, 64, 61, 59, 58, 57, 56, 55, 58, 59, 61, 64, 65, 68, 71, 72, 75,
- 78, 79, 82, 85, 86, 89, 90, 91, 93, 94, 92, 92, 91, 91, 92, 90, 65, 61,
- 60, 58, 57, 56, 55, 58, 59, 61, 64, 65, 68, 71, 72, 75, 78, 79, 83, 85,
- 86, 90, 91, 91, 93, 94, 95, 94, 94, 94, 93, 94, 67, 63, 61, 60, 59, 58,
- 57, 60, 61, 63, 65, 66, 69, 72, 73, 77, 79, 80, 84, 86, 88, 92, 93, 93,
- 95, 95, 96, 97, 96, 95, 96, 94, 68, 64, 63, 61, 60, 59, 58, 60, 61, 63,
- 65, 67, 70, 71, 74, 76, 78, 81, 83, 86, 88, 89, 94, 94, 95, 97, 97, 98,
- 99, 99, 97, 99, 68, 65, 64, 62, 61, 60, 58, 59, 61, 64, 64, 68, 69, 71,
- 74, 75, 79, 80, 83, 86, 87, 91, 92, 95, 96, 97, 99, 99, 100, 100, 101,
- 99, 69, 66, 65, 63, 62, 61, 59, 59, 62, 63, 65, 67, 69, 72, 72, 76, 78,
- 80, 83, 84, 88, 89, 92, 94, 97, 98, 99, 101, 100, 102, 102, 104, 70, 67,
- 66, 63, 63, 62, 61, 60, 63, 63, 66, 67, 69, 71, 73, 76, 77, 81, 82, 85,
- 86, 90, 91, 94, 96, 99, 100, 100, 103, 102, 104, 104, 71, 67, 67, 64,
- 64, 63, 62, 61, 62, 64, 66, 67, 70, 71, 74, 74, 78, 79, 83, 84, 87, 89,
- 91, 94, 95, 99, 100, 102, 102, 104, 104, 106, 72, 68, 68, 65, 65, 64,
- 63, 61, 62, 65, 66, 68, 69, 71, 73, 75, 77, 79, 82, 84, 87, 88, 92, 93,
- 96, 97, 101, 102, 104, 104, 106, 106, 73, 69, 69, 66, 66, 64, 64, 62,
- 62, 66, 66, 69, 69, 72, 73, 76, 77, 81, 81, 85, 85, 89, 90, 94, 94, 99,
- 99, 104, 104, 106, 106, 108,
- /* Size 4x8 */
- 31, 47, 54, 64, 38, 46, 50, 60, 46, 53, 57, 62, 46, 56, 66, 71, 50, 59,
- 74, 79, 57, 64, 82, 88, 61, 65, 85, 97, 65, 67, 82, 99,
- /* Size 8x4 */
- 31, 38, 46, 46, 50, 57, 61, 65, 47, 46, 53, 56, 59, 64, 65, 67, 54, 50,
- 57, 66, 74, 82, 85, 82, 64, 60, 62, 71, 79, 88, 97, 99,
- /* Size 8x16 */
- 32, 34, 48, 49, 54, 63, 67, 69, 31, 36, 46, 46, 50, 58, 62, 65, 33, 40,
- 47, 46, 49, 56, 59, 62, 37, 44, 47, 45, 48, 54, 57, 60, 44, 46, 51, 51,
- 53, 59, 60, 61, 48, 46, 53, 56, 58, 64, 64, 64, 49, 45, 53, 58, 62, 67,
- 70, 68, 51, 47, 54, 60, 65, 71, 73, 72, 54, 49, 55, 62, 70, 77, 77, 76,
- 57, 51, 56, 64, 73, 82, 83, 81, 60, 53, 58, 65, 75, 85, 89, 85, 64, 57,
- 61, 68, 78, 89, 93, 89, 66, 59, 63, 69, 79, 91, 94, 93, 68, 61, 63, 71,
- 79, 87, 96, 98, 70, 63, 63, 70, 80, 89, 97, 100, 72, 65, 63, 69, 77, 86,
- 95, 102,
- /* Size 16x8 */
- 32, 31, 33, 37, 44, 48, 49, 51, 54, 57, 60, 64, 66, 68, 70, 72, 34, 36,
- 40, 44, 46, 46, 45, 47, 49, 51, 53, 57, 59, 61, 63, 65, 48, 46, 47, 47,
- 51, 53, 53, 54, 55, 56, 58, 61, 63, 63, 63, 63, 49, 46, 46, 45, 51, 56,
- 58, 60, 62, 64, 65, 68, 69, 71, 70, 69, 54, 50, 49, 48, 53, 58, 62, 65,
- 70, 73, 75, 78, 79, 79, 80, 77, 63, 58, 56, 54, 59, 64, 67, 71, 77, 82,
- 85, 89, 91, 87, 89, 86, 67, 62, 59, 57, 60, 64, 70, 73, 77, 83, 89, 93,
- 94, 96, 97, 95, 69, 65, 62, 60, 61, 64, 68, 72, 76, 81, 85, 89, 93, 98,
- 100, 102,
- /* Size 16x32 */
- 32, 31, 34, 37, 48, 48, 49, 52, 54, 57, 63, 64, 67, 68, 69, 69, 31, 31,
- 35, 38, 47, 47, 47, 50, 51, 54, 60, 61, 63, 64, 65, 66, 31, 32, 36, 39,
- 46, 46, 46, 48, 50, 53, 58, 59, 62, 63, 65, 66, 30, 32, 36, 40, 46, 45,
- 45, 48, 49, 52, 57, 58, 60, 61, 62, 63, 33, 36, 40, 43, 47, 46, 46, 47,
- 49, 51, 56, 57, 59, 60, 62, 63, 35, 38, 42, 45, 47, 46, 45, 47, 48, 50,
- 55, 56, 58, 60, 61, 61, 37, 40, 44, 47, 47, 46, 45, 47, 48, 50, 54, 55,
- 57, 58, 60, 61, 42, 43, 45, 47, 50, 50, 49, 50, 51, 53, 57, 58, 59, 58,
- 59, 59, 44, 44, 46, 47, 51, 51, 51, 52, 53, 54, 59, 59, 60, 61, 61, 60,
- 49, 46, 47, 48, 53, 53, 53, 54, 55, 57, 60, 61, 63, 62, 62, 63, 48, 46,
- 46, 47, 53, 54, 56, 57, 58, 60, 64, 64, 64, 64, 64, 63, 48, 45, 46, 46,
- 53, 55, 56, 58, 59, 61, 65, 65, 66, 66, 65, 66, 49, 45, 45, 46, 53, 56,
- 58, 61, 62, 64, 67, 68, 70, 67, 68, 66, 50, 46, 46, 46, 54, 56, 59, 63,
- 65, 66, 70, 71, 70, 71, 68, 70, 51, 47, 47, 47, 54, 57, 60, 64, 65, 68,
- 71, 72, 73, 71, 72, 70, 52, 48, 47, 47, 54, 57, 61, 66, 68, 71, 75, 75,
- 76, 75, 73, 73, 54, 49, 49, 48, 55, 58, 62, 68, 70, 73, 77, 78, 77, 77,
- 76, 74, 54, 50, 49, 49, 55, 59, 62, 68, 70, 74, 78, 79, 81, 79, 77, 78,
- 57, 52, 51, 50, 56, 60, 64, 70, 73, 76, 82, 82, 83, 82, 81, 78, 59, 54,
- 52, 52, 58, 61, 65, 72, 74, 78, 84, 85, 85, 83, 82, 82, 60, 54, 53, 52,
- 58, 62, 65, 72, 75, 79, 85, 86, 89, 87, 85, 82, 63, 57, 56, 55, 60, 64,
- 67, 75, 77, 82, 89, 90, 90, 88, 87, 86, 64, 58, 57, 55, 61, 64, 68, 75,
- 78, 82, 89, 90, 93, 91, 89, 87, 64, 59, 57, 56, 61, 65, 68, 75, 78, 83,
- 90, 91, 94, 93, 92, 91, 66, 60, 59, 57, 63, 66, 69, 77, 79, 84, 91, 93,
- 94, 95, 93, 91, 67, 61, 60, 58, 63, 65, 70, 75, 78, 85, 88, 93, 96, 97,
- 97, 95, 68, 62, 61, 59, 63, 64, 71, 74, 79, 84, 87, 94, 96, 97, 98, 96,
- 69, 63, 62, 60, 63, 65, 71, 72, 80, 82, 88, 93, 96, 99, 100, 101, 70,
- 64, 63, 60, 63, 66, 70, 73, 80, 81, 89, 90, 97, 99, 100, 101, 71, 65,
- 64, 61, 63, 67, 70, 74, 78, 82, 88, 90, 97, 99, 102, 103, 72, 65, 65,
- 62, 63, 68, 69, 75, 77, 83, 86, 92, 95, 100, 102, 103, 73, 66, 66, 63,
- 63, 69, 69, 76, 76, 84, 84, 93, 93, 101, 101, 105,
- /* Size 32x16 */
- 32, 31, 31, 30, 33, 35, 37, 42, 44, 49, 48, 48, 49, 50, 51, 52, 54, 54,
- 57, 59, 60, 63, 64, 64, 66, 67, 68, 69, 70, 71, 72, 73, 31, 31, 32, 32,
- 36, 38, 40, 43, 44, 46, 46, 45, 45, 46, 47, 48, 49, 50, 52, 54, 54, 57,
- 58, 59, 60, 61, 62, 63, 64, 65, 65, 66, 34, 35, 36, 36, 40, 42, 44, 45,
- 46, 47, 46, 46, 45, 46, 47, 47, 49, 49, 51, 52, 53, 56, 57, 57, 59, 60,
- 61, 62, 63, 64, 65, 66, 37, 38, 39, 40, 43, 45, 47, 47, 47, 48, 47, 46,
- 46, 46, 47, 47, 48, 49, 50, 52, 52, 55, 55, 56, 57, 58, 59, 60, 60, 61,
- 62, 63, 48, 47, 46, 46, 47, 47, 47, 50, 51, 53, 53, 53, 53, 54, 54, 54,
- 55, 55, 56, 58, 58, 60, 61, 61, 63, 63, 63, 63, 63, 63, 63, 63, 48, 47,
- 46, 45, 46, 46, 46, 50, 51, 53, 54, 55, 56, 56, 57, 57, 58, 59, 60, 61,
- 62, 64, 64, 65, 66, 65, 64, 65, 66, 67, 68, 69, 49, 47, 46, 45, 46, 45,
- 45, 49, 51, 53, 56, 56, 58, 59, 60, 61, 62, 62, 64, 65, 65, 67, 68, 68,
- 69, 70, 71, 71, 70, 70, 69, 69, 52, 50, 48, 48, 47, 47, 47, 50, 52, 54,
- 57, 58, 61, 63, 64, 66, 68, 68, 70, 72, 72, 75, 75, 75, 77, 75, 74, 72,
- 73, 74, 75, 76, 54, 51, 50, 49, 49, 48, 48, 51, 53, 55, 58, 59, 62, 65,
- 65, 68, 70, 70, 73, 74, 75, 77, 78, 78, 79, 78, 79, 80, 80, 78, 77, 76,
- 57, 54, 53, 52, 51, 50, 50, 53, 54, 57, 60, 61, 64, 66, 68, 71, 73, 74,
- 76, 78, 79, 82, 82, 83, 84, 85, 84, 82, 81, 82, 83, 84, 63, 60, 58, 57,
- 56, 55, 54, 57, 59, 60, 64, 65, 67, 70, 71, 75, 77, 78, 82, 84, 85, 89,
- 89, 90, 91, 88, 87, 88, 89, 88, 86, 84, 64, 61, 59, 58, 57, 56, 55, 58,
- 59, 61, 64, 65, 68, 71, 72, 75, 78, 79, 82, 85, 86, 90, 90, 91, 93, 93,
- 94, 93, 90, 90, 92, 93, 67, 63, 62, 60, 59, 58, 57, 59, 60, 63, 64, 66,
- 70, 70, 73, 76, 77, 81, 83, 85, 89, 90, 93, 94, 94, 96, 96, 96, 97, 97,
- 95, 93, 68, 64, 63, 61, 60, 60, 58, 58, 61, 62, 64, 66, 67, 71, 71, 75,
- 77, 79, 82, 83, 87, 88, 91, 93, 95, 97, 97, 99, 99, 99, 100, 101, 69,
- 65, 65, 62, 62, 61, 60, 59, 61, 62, 64, 65, 68, 68, 72, 73, 76, 77, 81,
- 82, 85, 87, 89, 92, 93, 97, 98, 100, 100, 102, 102, 101, 69, 66, 66, 63,
- 63, 61, 61, 59, 60, 63, 63, 66, 66, 70, 70, 73, 74, 78, 78, 82, 82, 86,
- 87, 91, 91, 95, 96, 101, 101, 103, 103, 105,
- /* Size 4x16 */
- 31, 48, 57, 68, 32, 46, 53, 63, 36, 46, 51, 60, 40, 46, 50, 58, 44, 51,
- 54, 61, 46, 54, 60, 64, 45, 56, 64, 67, 47, 57, 68, 71, 49, 58, 73, 77,
- 52, 60, 76, 82, 54, 62, 79, 87, 58, 64, 82, 91, 60, 66, 84, 95, 62, 64,
- 84, 97, 64, 66, 81, 99, 65, 68, 83, 100,
- /* Size 16x4 */
- 31, 32, 36, 40, 44, 46, 45, 47, 49, 52, 54, 58, 60, 62, 64, 65, 48, 46,
- 46, 46, 51, 54, 56, 57, 58, 60, 62, 64, 66, 64, 66, 68, 57, 53, 51, 50,
- 54, 60, 64, 68, 73, 76, 79, 82, 84, 84, 81, 83, 68, 63, 60, 58, 61, 64,
- 67, 71, 77, 82, 87, 91, 95, 97, 99, 100,
- /* Size 8x32 */
- 32, 34, 48, 49, 54, 63, 67, 69, 31, 35, 47, 47, 51, 60, 63, 65, 31, 36,
- 46, 46, 50, 58, 62, 65, 30, 36, 46, 45, 49, 57, 60, 62, 33, 40, 47, 46,
- 49, 56, 59, 62, 35, 42, 47, 45, 48, 55, 58, 61, 37, 44, 47, 45, 48, 54,
- 57, 60, 42, 45, 50, 49, 51, 57, 59, 59, 44, 46, 51, 51, 53, 59, 60, 61,
- 49, 47, 53, 53, 55, 60, 63, 62, 48, 46, 53, 56, 58, 64, 64, 64, 48, 46,
- 53, 56, 59, 65, 66, 65, 49, 45, 53, 58, 62, 67, 70, 68, 50, 46, 54, 59,
- 65, 70, 70, 68, 51, 47, 54, 60, 65, 71, 73, 72, 52, 47, 54, 61, 68, 75,
- 76, 73, 54, 49, 55, 62, 70, 77, 77, 76, 54, 49, 55, 62, 70, 78, 81, 77,
- 57, 51, 56, 64, 73, 82, 83, 81, 59, 52, 58, 65, 74, 84, 85, 82, 60, 53,
- 58, 65, 75, 85, 89, 85, 63, 56, 60, 67, 77, 89, 90, 87, 64, 57, 61, 68,
- 78, 89, 93, 89, 64, 57, 61, 68, 78, 90, 94, 92, 66, 59, 63, 69, 79, 91,
- 94, 93, 67, 60, 63, 70, 78, 88, 96, 97, 68, 61, 63, 71, 79, 87, 96, 98,
- 69, 62, 63, 71, 80, 88, 96, 100, 70, 63, 63, 70, 80, 89, 97, 100, 71,
- 64, 63, 70, 78, 88, 97, 102, 72, 65, 63, 69, 77, 86, 95, 102, 73, 66,
- 63, 69, 76, 84, 93, 101,
- /* Size 32x8 */
- 32, 31, 31, 30, 33, 35, 37, 42, 44, 49, 48, 48, 49, 50, 51, 52, 54, 54,
- 57, 59, 60, 63, 64, 64, 66, 67, 68, 69, 70, 71, 72, 73, 34, 35, 36, 36,
- 40, 42, 44, 45, 46, 47, 46, 46, 45, 46, 47, 47, 49, 49, 51, 52, 53, 56,
- 57, 57, 59, 60, 61, 62, 63, 64, 65, 66, 48, 47, 46, 46, 47, 47, 47, 50,
- 51, 53, 53, 53, 53, 54, 54, 54, 55, 55, 56, 58, 58, 60, 61, 61, 63, 63,
- 63, 63, 63, 63, 63, 63, 49, 47, 46, 45, 46, 45, 45, 49, 51, 53, 56, 56,
- 58, 59, 60, 61, 62, 62, 64, 65, 65, 67, 68, 68, 69, 70, 71, 71, 70, 70,
- 69, 69, 54, 51, 50, 49, 49, 48, 48, 51, 53, 55, 58, 59, 62, 65, 65, 68,
- 70, 70, 73, 74, 75, 77, 78, 78, 79, 78, 79, 80, 80, 78, 77, 76, 63, 60,
- 58, 57, 56, 55, 54, 57, 59, 60, 64, 65, 67, 70, 71, 75, 77, 78, 82, 84,
- 85, 89, 89, 90, 91, 88, 87, 88, 89, 88, 86, 84, 67, 63, 62, 60, 59, 58,
- 57, 59, 60, 63, 64, 66, 70, 70, 73, 76, 77, 81, 83, 85, 89, 90, 93, 94,
- 94, 96, 96, 96, 97, 97, 95, 93, 69, 65, 65, 62, 62, 61, 60, 59, 61, 62,
- 64, 65, 68, 68, 72, 73, 76, 77, 81, 82, 85, 87, 89, 92, 93, 97, 98, 100,
- 100, 102, 102, 101 },
- },
- {
- { /* Luma */
- /* Size 4x4 */
- 32, 37, 58, 81, 37, 54, 72, 91, 58, 72, 102, 121, 81, 91, 121, 156,
- /* Size 8x8 */
- 32, 32, 35, 42, 53, 68, 78, 90, 32, 33, 36, 42, 51, 64, 74, 84, 35, 36,
- 46, 52, 60, 72, 80, 87, 42, 42, 52, 63, 73, 84, 92, 98, 53, 51, 60, 73,
- 86, 100, 109, 114, 68, 64, 72, 84, 100, 117, 128, 133, 78, 74, 80, 92,
- 109, 128, 140, 155, 90, 84, 87, 98, 114, 133, 155, 168,
- /* Size 16x16 */
- 32, 31, 31, 32, 34, 36, 41, 47, 54, 59, 65, 74, 82, 87, 92, 97, 31, 32,
- 32, 32, 34, 35, 39, 45, 50, 55, 61, 69, 76, 81, 87, 92, 31, 32, 33, 33,
- 35, 36, 40, 44, 49, 54, 59, 67, 73, 78, 83, 88, 32, 32, 33, 35, 37, 38,
- 41, 45, 49, 53, 58, 65, 71, 75, 80, 86, 34, 34, 35, 37, 39, 42, 46, 50,
- 54, 58, 63, 70, 76, 80, 84, 85, 36, 35, 36, 38, 42, 48, 52, 56, 60, 64,
- 68, 75, 80, 85, 90, 91, 41, 39, 40, 41, 46, 52, 57, 62, 67, 71, 75, 83,
- 88, 92, 95, 97, 47, 45, 44, 45, 50, 56, 62, 69, 75, 79, 84, 91, 97, 100,
- 102, 104, 54, 50, 49, 49, 54, 60, 67, 75, 82, 87, 92, 100, 106, 110,
- 109, 112, 59, 55, 54, 53, 58, 64, 71, 79, 87, 92, 98, 106, 112, 117,
- 117, 121, 65, 61, 59, 58, 63, 68, 75, 84, 92, 98, 105, 114, 120, 125,
- 126, 130, 74, 69, 67, 65, 70, 75, 83, 91, 100, 106, 114, 123, 131, 135,
- 137, 140, 82, 76, 73, 71, 76, 80, 88, 97, 106, 112, 120, 131, 139, 144,
- 148, 150, 87, 81, 78, 75, 80, 85, 92, 100, 110, 117, 125, 135, 144, 150,
- 155, 162, 92, 87, 83, 80, 84, 90, 95, 102, 109, 117, 126, 137, 148, 155,
- 162, 168, 97, 92, 88, 86, 85, 91, 97, 104, 112, 121, 130, 140, 150, 162,
- 168, 174,
- /* Size 32x32 */
- 32, 31, 31, 31, 31, 31, 32, 32, 34, 35, 36, 39, 41, 44, 47, 48, 54, 56,
- 59, 64, 65, 71, 74, 80, 82, 83, 87, 90, 92, 95, 97, 100, 31, 32, 32, 32,
- 32, 32, 32, 33, 34, 35, 35, 38, 40, 42, 45, 46, 51, 53, 56, 61, 62, 68,
- 71, 76, 78, 78, 83, 85, 88, 90, 92, 95, 31, 32, 32, 32, 32, 32, 32, 33,
- 34, 34, 35, 38, 39, 42, 45, 45, 50, 52, 55, 60, 61, 67, 69, 74, 76, 77,
- 81, 84, 87, 89, 92, 95, 31, 32, 32, 32, 32, 32, 32, 33, 33, 34, 34, 37,
- 38, 41, 44, 44, 49, 51, 54, 58, 59, 65, 68, 72, 74, 75, 79, 81, 84, 86,
- 88, 90, 31, 32, 32, 32, 33, 33, 33, 34, 35, 36, 36, 39, 40, 42, 44, 45,
- 49, 51, 54, 58, 59, 64, 67, 71, 73, 74, 78, 80, 83, 85, 88, 90, 31, 32,
- 32, 32, 33, 33, 34, 34, 35, 36, 36, 39, 40, 42, 45, 45, 50, 51, 54, 58,
- 59, 64, 67, 71, 73, 74, 78, 80, 82, 84, 86, 89, 32, 32, 32, 32, 33, 34,
- 35, 36, 37, 38, 38, 40, 41, 42, 45, 46, 49, 51, 53, 57, 58, 63, 65, 69,
- 71, 72, 75, 78, 80, 83, 86, 89, 32, 33, 33, 33, 34, 34, 36, 36, 38, 39,
- 40, 42, 43, 44, 47, 47, 51, 53, 55, 59, 60, 65, 67, 71, 73, 73, 77, 78,
- 80, 82, 84, 86, 34, 34, 34, 33, 35, 35, 37, 38, 39, 42, 42, 45, 46, 47,
- 50, 51, 54, 56, 58, 62, 63, 68, 70, 74, 76, 76, 80, 82, 84, 85, 85, 86,
- 35, 35, 34, 34, 36, 36, 38, 39, 42, 46, 47, 49, 50, 52, 55, 55, 59, 60,
- 62, 66, 67, 72, 74, 78, 79, 80, 83, 84, 85, 87, 90, 92, 36, 35, 35, 34,
- 36, 36, 38, 40, 42, 47, 48, 50, 52, 54, 56, 57, 60, 61, 64, 67, 68, 73,
- 75, 79, 80, 81, 85, 87, 90, 91, 91, 92, 39, 38, 38, 37, 39, 39, 40, 42,
- 45, 49, 50, 54, 55, 58, 60, 61, 65, 66, 69, 72, 73, 78, 80, 84, 86, 86,
- 90, 91, 91, 92, 95, 97, 41, 40, 39, 38, 40, 40, 41, 43, 46, 50, 52, 55,
- 57, 60, 62, 63, 67, 69, 71, 75, 75, 80, 83, 86, 88, 89, 92, 93, 95, 97,
- 97, 98, 44, 42, 42, 41, 42, 42, 42, 44, 47, 52, 54, 58, 60, 63, 66, 67,
- 71, 73, 75, 79, 79, 84, 86, 90, 92, 92, 96, 98, 98, 98, 101, 104, 47,
- 45, 45, 44, 44, 45, 45, 47, 50, 55, 56, 60, 62, 66, 69, 70, 75, 77, 79,
- 83, 84, 89, 91, 95, 97, 97, 100, 99, 102, 105, 104, 104, 48, 46, 45, 44,
- 45, 45, 46, 47, 51, 55, 57, 61, 63, 67, 70, 71, 76, 78, 80, 84, 85, 90,
- 93, 96, 98, 99, 102, 106, 106, 105, 108, 111, 54, 51, 50, 49, 49, 50,
- 49, 51, 54, 59, 60, 65, 67, 71, 75, 76, 82, 84, 87, 91, 92, 97, 100,
- 104, 106, 106, 110, 108, 109, 112, 112, 111, 56, 53, 52, 51, 51, 51, 51,
- 53, 56, 60, 61, 66, 69, 73, 77, 78, 84, 86, 89, 93, 94, 100, 102, 106,
- 108, 109, 112, 113, 115, 114, 116, 119, 59, 56, 55, 54, 54, 54, 53, 55,
- 58, 62, 64, 69, 71, 75, 79, 80, 87, 89, 92, 97, 98, 103, 106, 110, 112,
- 113, 117, 118, 117, 121, 121, 119, 64, 61, 60, 58, 58, 58, 57, 59, 62,
- 66, 67, 72, 75, 79, 83, 84, 91, 93, 97, 102, 103, 109, 112, 116, 118,
- 119, 122, 121, 125, 123, 125, 128, 65, 62, 61, 59, 59, 59, 58, 60, 63,
- 67, 68, 73, 75, 79, 84, 85, 92, 94, 98, 103, 105, 111, 114, 118, 120,
- 121, 125, 129, 126, 129, 130, 129, 71, 68, 67, 65, 64, 64, 63, 65, 68,
- 72, 73, 78, 80, 84, 89, 90, 97, 100, 103, 109, 111, 117, 120, 125, 127,
- 128, 133, 130, 134, 133, 133, 137, 74, 71, 69, 68, 67, 67, 65, 67, 70,
- 74, 75, 80, 83, 86, 91, 93, 100, 102, 106, 112, 114, 120, 123, 128, 131,
- 131, 135, 137, 137, 138, 140, 137, 80, 76, 74, 72, 71, 71, 69, 71, 74,
- 78, 79, 84, 86, 90, 95, 96, 104, 106, 110, 116, 118, 125, 128, 134, 136,
- 137, 142, 141, 142, 143, 143, 147, 82, 78, 76, 74, 73, 73, 71, 73, 76,
- 79, 80, 86, 88, 92, 97, 98, 106, 108, 112, 118, 120, 127, 131, 136, 139,
- 139, 144, 147, 148, 147, 150, 148, 83, 78, 77, 75, 74, 74, 72, 73, 76,
- 80, 81, 86, 89, 92, 97, 99, 106, 109, 113, 119, 121, 128, 131, 137, 139,
- 140, 145, 150, 152, 155, 152, 157, 87, 83, 81, 79, 78, 78, 75, 77, 80,
- 83, 85, 90, 92, 96, 100, 102, 110, 112, 117, 122, 125, 133, 135, 142,
- 144, 145, 150, 151, 155, 158, 162, 158, 90, 85, 84, 81, 80, 80, 78, 78,
- 82, 84, 87, 91, 93, 98, 99, 106, 108, 113, 118, 121, 129, 130, 137, 141,
- 147, 150, 151, 156, 156, 161, 164, 169, 92, 88, 87, 84, 83, 82, 80, 80,
- 84, 85, 90, 91, 95, 98, 102, 106, 109, 115, 117, 125, 126, 134, 137,
- 142, 148, 152, 155, 156, 162, 162, 168, 170, 95, 90, 89, 86, 85, 84, 83,
- 82, 85, 87, 91, 92, 97, 98, 105, 105, 112, 114, 121, 123, 129, 133, 138,
- 143, 147, 155, 158, 161, 162, 168, 168, 174, 97, 92, 92, 88, 88, 86, 86,
- 84, 85, 90, 91, 95, 97, 101, 104, 108, 112, 116, 121, 125, 130, 133,
- 140, 143, 150, 152, 162, 164, 168, 168, 174, 175, 100, 95, 95, 90, 90,
- 89, 89, 86, 86, 92, 92, 97, 98, 104, 104, 111, 111, 119, 119, 128, 129,
- 137, 137, 147, 148, 157, 158, 169, 170, 174, 175, 181,
- /* Size 4x8 */
- 32, 35, 59, 83, 32, 36, 57, 78, 34, 47, 65, 82, 41, 53, 78, 97, 51, 61,
- 92, 111, 65, 73, 108, 129, 75, 81, 117, 148, 86, 92, 119, 154,
- /* Size 8x4 */
- 32, 32, 34, 41, 51, 65, 75, 86, 35, 36, 47, 53, 61, 73, 81, 92, 59, 57,
- 65, 78, 92, 108, 117, 119, 83, 78, 82, 97, 111, 129, 148, 154,
- /* Size 8x16 */
- 32, 31, 35, 44, 53, 65, 82, 90, 31, 32, 34, 41, 50, 61, 76, 85, 31, 33,
- 35, 42, 49, 59, 73, 81, 32, 34, 37, 42, 49, 58, 71, 79, 34, 35, 41, 48,
- 54, 63, 76, 81, 36, 36, 46, 54, 60, 68, 80, 87, 41, 40, 49, 60, 67, 76,
- 88, 93, 47, 44, 53, 66, 75, 84, 97, 101, 53, 50, 57, 71, 82, 92, 106,
- 108, 58, 54, 61, 75, 87, 98, 112, 116, 65, 59, 66, 79, 92, 105, 120,
- 124, 74, 67, 73, 86, 100, 113, 131, 134, 82, 73, 79, 92, 105, 120, 139,
- 142, 87, 78, 83, 96, 110, 125, 144, 153, 92, 83, 84, 97, 114, 132, 150,
- 157, 97, 88, 86, 97, 111, 128, 147, 163,
- /* Size 16x8 */
- 32, 31, 31, 32, 34, 36, 41, 47, 53, 58, 65, 74, 82, 87, 92, 97, 31, 32,
- 33, 34, 35, 36, 40, 44, 50, 54, 59, 67, 73, 78, 83, 88, 35, 34, 35, 37,
- 41, 46, 49, 53, 57, 61, 66, 73, 79, 83, 84, 86, 44, 41, 42, 42, 48, 54,
- 60, 66, 71, 75, 79, 86, 92, 96, 97, 97, 53, 50, 49, 49, 54, 60, 67, 75,
- 82, 87, 92, 100, 105, 110, 114, 111, 65, 61, 59, 58, 63, 68, 76, 84, 92,
- 98, 105, 113, 120, 125, 132, 128, 82, 76, 73, 71, 76, 80, 88, 97, 106,
- 112, 120, 131, 139, 144, 150, 147, 90, 85, 81, 79, 81, 87, 93, 101, 108,
- 116, 124, 134, 142, 153, 157, 163,
- /* Size 16x32 */
- 32, 31, 31, 32, 35, 36, 44, 47, 53, 62, 65, 79, 82, 88, 90, 93, 31, 32,
- 32, 32, 35, 35, 42, 45, 51, 59, 62, 75, 78, 83, 86, 88, 31, 32, 32, 32,
- 34, 35, 41, 45, 50, 58, 61, 74, 76, 82, 85, 88, 31, 32, 32, 33, 34, 34,
- 41, 44, 49, 57, 59, 72, 74, 79, 82, 84, 31, 32, 33, 34, 35, 36, 42, 44,
- 49, 57, 59, 71, 73, 79, 81, 84, 32, 32, 33, 34, 36, 36, 42, 45, 50, 57,
- 59, 71, 73, 78, 80, 82, 32, 33, 34, 35, 37, 38, 42, 45, 49, 56, 58, 69,
- 71, 76, 79, 83, 32, 33, 34, 36, 39, 40, 44, 47, 51, 58, 60, 71, 73, 76,
- 78, 80, 34, 34, 35, 37, 41, 42, 48, 50, 54, 61, 63, 73, 76, 81, 81, 80,
- 35, 34, 36, 38, 45, 47, 52, 55, 59, 65, 67, 77, 79, 82, 83, 86, 36, 34,
- 36, 38, 46, 48, 54, 56, 60, 66, 68, 78, 80, 85, 87, 86, 39, 37, 39, 40,
- 48, 50, 58, 60, 65, 71, 73, 84, 86, 89, 88, 91, 41, 39, 40, 41, 49, 51,
- 60, 62, 67, 74, 76, 86, 88, 91, 93, 91, 44, 41, 42, 43, 51, 53, 63, 66,
- 71, 78, 79, 90, 92, 97, 94, 97, 47, 44, 44, 45, 53, 56, 66, 69, 75, 82,
- 84, 95, 97, 98, 101, 98, 48, 45, 45, 46, 54, 56, 67, 70, 76, 83, 85, 96,
- 98, 104, 101, 105, 53, 49, 50, 50, 57, 60, 71, 75, 82, 90, 92, 103, 106,
- 107, 108, 105, 55, 51, 51, 51, 59, 61, 72, 77, 84, 92, 94, 106, 108,
- 111, 110, 112, 58, 54, 54, 54, 61, 63, 75, 79, 87, 95, 98, 110, 112,
- 117, 116, 113, 63, 58, 58, 57, 65, 67, 78, 83, 91, 100, 103, 116, 118,
- 119, 119, 121, 65, 60, 59, 58, 66, 68, 79, 84, 92, 102, 105, 118, 120,
- 127, 124, 122, 71, 65, 64, 63, 71, 73, 84, 89, 97, 108, 111, 125, 127,
- 129, 129, 130, 74, 68, 67, 66, 73, 75, 86, 91, 100, 110, 113, 128, 131,
- 135, 134, 130, 79, 72, 71, 70, 77, 79, 90, 95, 104, 115, 118, 133, 136,
- 140, 139, 140, 82, 75, 73, 72, 79, 81, 92, 97, 105, 117, 120, 136, 139,
- 145, 142, 140, 82, 75, 74, 72, 79, 81, 92, 97, 106, 117, 121, 136, 139,
- 148, 150, 149, 87, 79, 78, 76, 83, 85, 96, 100, 110, 120, 125, 141, 144,
- 148, 153, 150, 89, 82, 81, 78, 83, 87, 97, 99, 113, 118, 128, 139, 145,
- 153, 157, 161, 92, 84, 83, 80, 84, 89, 97, 101, 114, 116, 132, 135, 150,
- 153, 157, 162, 94, 86, 85, 82, 85, 92, 97, 104, 112, 119, 130, 136, 151,
- 154, 163, 166, 97, 88, 88, 85, 86, 94, 97, 107, 111, 123, 128, 140, 147,
- 159, 163, 167, 99, 91, 91, 87, 87, 97, 97, 110, 110, 126, 126, 144, 144,
- 163, 163, 173,
- /* Size 32x16 */
- 32, 31, 31, 31, 31, 32, 32, 32, 34, 35, 36, 39, 41, 44, 47, 48, 53, 55,
- 58, 63, 65, 71, 74, 79, 82, 82, 87, 89, 92, 94, 97, 99, 31, 32, 32, 32,
- 32, 32, 33, 33, 34, 34, 34, 37, 39, 41, 44, 45, 49, 51, 54, 58, 60, 65,
- 68, 72, 75, 75, 79, 82, 84, 86, 88, 91, 31, 32, 32, 32, 33, 33, 34, 34,
- 35, 36, 36, 39, 40, 42, 44, 45, 50, 51, 54, 58, 59, 64, 67, 71, 73, 74,
- 78, 81, 83, 85, 88, 91, 32, 32, 32, 33, 34, 34, 35, 36, 37, 38, 38, 40,
- 41, 43, 45, 46, 50, 51, 54, 57, 58, 63, 66, 70, 72, 72, 76, 78, 80, 82,
- 85, 87, 35, 35, 34, 34, 35, 36, 37, 39, 41, 45, 46, 48, 49, 51, 53, 54,
- 57, 59, 61, 65, 66, 71, 73, 77, 79, 79, 83, 83, 84, 85, 86, 87, 36, 35,
- 35, 34, 36, 36, 38, 40, 42, 47, 48, 50, 51, 53, 56, 56, 60, 61, 63, 67,
- 68, 73, 75, 79, 81, 81, 85, 87, 89, 92, 94, 97, 44, 42, 41, 41, 42, 42,
- 42, 44, 48, 52, 54, 58, 60, 63, 66, 67, 71, 72, 75, 78, 79, 84, 86, 90,
- 92, 92, 96, 97, 97, 97, 97, 97, 47, 45, 45, 44, 44, 45, 45, 47, 50, 55,
- 56, 60, 62, 66, 69, 70, 75, 77, 79, 83, 84, 89, 91, 95, 97, 97, 100, 99,
- 101, 104, 107, 110, 53, 51, 50, 49, 49, 50, 49, 51, 54, 59, 60, 65, 67,
- 71, 75, 76, 82, 84, 87, 91, 92, 97, 100, 104, 105, 106, 110, 113, 114,
- 112, 111, 110, 62, 59, 58, 57, 57, 57, 56, 58, 61, 65, 66, 71, 74, 78,
- 82, 83, 90, 92, 95, 100, 102, 108, 110, 115, 117, 117, 120, 118, 116,
- 119, 123, 126, 65, 62, 61, 59, 59, 59, 58, 60, 63, 67, 68, 73, 76, 79,
- 84, 85, 92, 94, 98, 103, 105, 111, 113, 118, 120, 121, 125, 128, 132,
- 130, 128, 126, 79, 75, 74, 72, 71, 71, 69, 71, 73, 77, 78, 84, 86, 90,
- 95, 96, 103, 106, 110, 116, 118, 125, 128, 133, 136, 136, 141, 139, 135,
- 136, 140, 144, 82, 78, 76, 74, 73, 73, 71, 73, 76, 79, 80, 86, 88, 92,
- 97, 98, 106, 108, 112, 118, 120, 127, 131, 136, 139, 139, 144, 145, 150,
- 151, 147, 144, 88, 83, 82, 79, 79, 78, 76, 76, 81, 82, 85, 89, 91, 97,
- 98, 104, 107, 111, 117, 119, 127, 129, 135, 140, 145, 148, 148, 153,
- 153, 154, 159, 163, 90, 86, 85, 82, 81, 80, 79, 78, 81, 83, 87, 88, 93,
- 94, 101, 101, 108, 110, 116, 119, 124, 129, 134, 139, 142, 150, 153,
- 157, 157, 163, 163, 163, 93, 88, 88, 84, 84, 82, 83, 80, 80, 86, 86, 91,
- 91, 97, 98, 105, 105, 112, 113, 121, 122, 130, 130, 140, 140, 149, 150,
- 161, 162, 166, 167, 173,
- /* Size 4x16 */
- 31, 36, 62, 88, 32, 35, 58, 82, 32, 36, 57, 79, 33, 38, 56, 76, 34, 42,
- 61, 81, 34, 48, 66, 85, 39, 51, 74, 91, 44, 56, 82, 98, 49, 60, 90, 107,
- 54, 63, 95, 117, 60, 68, 102, 127, 68, 75, 110, 135, 75, 81, 117, 145,
- 79, 85, 120, 148, 84, 89, 116, 153, 88, 94, 123, 159,
- /* Size 16x4 */
- 31, 32, 32, 33, 34, 34, 39, 44, 49, 54, 60, 68, 75, 79, 84, 88, 36, 35,
- 36, 38, 42, 48, 51, 56, 60, 63, 68, 75, 81, 85, 89, 94, 62, 58, 57, 56,
- 61, 66, 74, 82, 90, 95, 102, 110, 117, 120, 116, 123, 88, 82, 79, 76,
- 81, 85, 91, 98, 107, 117, 127, 135, 145, 148, 153, 159,
- /* Size 8x32 */
- 32, 31, 35, 44, 53, 65, 82, 90, 31, 32, 35, 42, 51, 62, 78, 86, 31, 32,
- 34, 41, 50, 61, 76, 85, 31, 32, 34, 41, 49, 59, 74, 82, 31, 33, 35, 42,
- 49, 59, 73, 81, 32, 33, 36, 42, 50, 59, 73, 80, 32, 34, 37, 42, 49, 58,
- 71, 79, 32, 34, 39, 44, 51, 60, 73, 78, 34, 35, 41, 48, 54, 63, 76, 81,
- 35, 36, 45, 52, 59, 67, 79, 83, 36, 36, 46, 54, 60, 68, 80, 87, 39, 39,
- 48, 58, 65, 73, 86, 88, 41, 40, 49, 60, 67, 76, 88, 93, 44, 42, 51, 63,
- 71, 79, 92, 94, 47, 44, 53, 66, 75, 84, 97, 101, 48, 45, 54, 67, 76, 85,
- 98, 101, 53, 50, 57, 71, 82, 92, 106, 108, 55, 51, 59, 72, 84, 94, 108,
- 110, 58, 54, 61, 75, 87, 98, 112, 116, 63, 58, 65, 78, 91, 103, 118,
- 119, 65, 59, 66, 79, 92, 105, 120, 124, 71, 64, 71, 84, 97, 111, 127,
- 129, 74, 67, 73, 86, 100, 113, 131, 134, 79, 71, 77, 90, 104, 118, 136,
- 139, 82, 73, 79, 92, 105, 120, 139, 142, 82, 74, 79, 92, 106, 121, 139,
- 150, 87, 78, 83, 96, 110, 125, 144, 153, 89, 81, 83, 97, 113, 128, 145,
- 157, 92, 83, 84, 97, 114, 132, 150, 157, 94, 85, 85, 97, 112, 130, 151,
- 163, 97, 88, 86, 97, 111, 128, 147, 163, 99, 91, 87, 97, 110, 126, 144,
- 163,
- /* Size 32x8 */
- 32, 31, 31, 31, 31, 32, 32, 32, 34, 35, 36, 39, 41, 44, 47, 48, 53, 55,
- 58, 63, 65, 71, 74, 79, 82, 82, 87, 89, 92, 94, 97, 99, 31, 32, 32, 32,
- 33, 33, 34, 34, 35, 36, 36, 39, 40, 42, 44, 45, 50, 51, 54, 58, 59, 64,
- 67, 71, 73, 74, 78, 81, 83, 85, 88, 91, 35, 35, 34, 34, 35, 36, 37, 39,
- 41, 45, 46, 48, 49, 51, 53, 54, 57, 59, 61, 65, 66, 71, 73, 77, 79, 79,
- 83, 83, 84, 85, 86, 87, 44, 42, 41, 41, 42, 42, 42, 44, 48, 52, 54, 58,
- 60, 63, 66, 67, 71, 72, 75, 78, 79, 84, 86, 90, 92, 92, 96, 97, 97, 97,
- 97, 97, 53, 51, 50, 49, 49, 50, 49, 51, 54, 59, 60, 65, 67, 71, 75, 76,
- 82, 84, 87, 91, 92, 97, 100, 104, 105, 106, 110, 113, 114, 112, 111,
- 110, 65, 62, 61, 59, 59, 59, 58, 60, 63, 67, 68, 73, 76, 79, 84, 85, 92,
- 94, 98, 103, 105, 111, 113, 118, 120, 121, 125, 128, 132, 130, 128, 126,
- 82, 78, 76, 74, 73, 73, 71, 73, 76, 79, 80, 86, 88, 92, 97, 98, 106,
- 108, 112, 118, 120, 127, 131, 136, 139, 139, 144, 145, 150, 151, 147,
- 144, 90, 86, 85, 82, 81, 80, 79, 78, 81, 83, 87, 88, 93, 94, 101, 101,
- 108, 110, 116, 119, 124, 129, 134, 139, 142, 150, 153, 157, 157, 163,
- 163, 163 },
- { /* Chroma */
- /* Size 4x4 */
- 32, 45, 51, 61, 45, 54, 59, 65, 51, 59, 75, 81, 61, 65, 81, 97,
- /* Size 8x8 */
- 31, 34, 46, 47, 50, 57, 61, 65, 34, 39, 47, 45, 48, 53, 57, 61, 46, 47,
- 52, 52, 54, 58, 61, 62, 47, 45, 52, 58, 62, 65, 68, 68, 50, 48, 54, 62,
- 68, 73, 77, 76, 57, 53, 58, 65, 73, 82, 86, 86, 61, 57, 61, 68, 77, 86,
- 91, 95, 65, 61, 62, 68, 76, 86, 95, 100,
- /* Size 16x16 */
- 32, 31, 33, 36, 41, 49, 49, 50, 52, 54, 57, 61, 64, 67, 68, 70, 31, 31,
- 34, 39, 42, 47, 46, 47, 49, 51, 53, 57, 60, 62, 64, 66, 33, 34, 37, 42,
- 44, 47, 46, 46, 47, 49, 51, 55, 57, 59, 61, 63, 36, 39, 42, 47, 47, 48,
- 46, 46, 47, 48, 50, 53, 55, 57, 59, 61, 41, 42, 44, 47, 48, 50, 49, 50,
- 50, 52, 53, 56, 58, 60, 61, 60, 49, 47, 47, 48, 50, 53, 53, 54, 54, 55,
- 56, 59, 61, 63, 64, 64, 49, 46, 46, 46, 49, 53, 55, 57, 59, 60, 61, 64,
- 66, 67, 67, 67, 50, 47, 46, 46, 50, 54, 57, 61, 63, 64, 66, 69, 70, 72,
- 71, 71, 52, 49, 47, 47, 50, 54, 59, 63, 66, 68, 70, 73, 75, 77, 75, 75,
- 54, 51, 49, 48, 52, 55, 60, 64, 68, 71, 73, 76, 79, 80, 79, 79, 57, 53,
- 51, 50, 53, 56, 61, 66, 70, 73, 76, 80, 82, 84, 83, 84, 61, 57, 55, 53,
- 56, 59, 64, 69, 73, 76, 80, 84, 87, 89, 88, 88, 64, 60, 57, 55, 58, 61,
- 66, 70, 75, 79, 82, 87, 91, 93, 93, 93, 67, 62, 59, 57, 60, 63, 67, 72,
- 77, 80, 84, 89, 93, 95, 96, 97, 68, 64, 61, 59, 61, 64, 67, 71, 75, 79,
- 83, 88, 93, 96, 99, 100, 70, 66, 63, 61, 60, 64, 67, 71, 75, 79, 84, 88,
- 93, 97, 100, 102,
- /* Size 32x32 */
- 32, 31, 31, 30, 33, 33, 36, 38, 41, 47, 49, 48, 49, 49, 50, 50, 52, 53,
- 54, 56, 57, 60, 61, 63, 64, 65, 67, 67, 68, 69, 70, 71, 31, 31, 31, 31,
- 34, 34, 38, 40, 42, 46, 47, 47, 47, 47, 48, 48, 50, 50, 52, 54, 54, 57,
- 58, 60, 61, 61, 63, 64, 65, 65, 66, 67, 31, 31, 31, 31, 34, 35, 39, 40,
- 42, 46, 47, 46, 46, 46, 47, 47, 49, 50, 51, 53, 53, 56, 57, 59, 60, 60,
- 62, 63, 64, 65, 66, 67, 30, 31, 31, 32, 34, 35, 40, 41, 42, 45, 46, 45,
- 45, 45, 46, 46, 47, 48, 49, 51, 52, 54, 55, 57, 58, 58, 60, 61, 62, 62,
- 63, 64, 33, 34, 34, 34, 37, 38, 42, 43, 44, 46, 47, 46, 46, 45, 46, 46,
- 47, 48, 49, 51, 51, 53, 55, 56, 57, 57, 59, 60, 61, 62, 63, 64, 33, 34,
- 35, 35, 38, 39, 43, 44, 45, 47, 47, 46, 46, 45, 46, 46, 47, 48, 49, 51,
- 51, 53, 54, 56, 57, 57, 59, 60, 60, 61, 62, 62, 36, 38, 39, 40, 42, 43,
- 47, 47, 47, 47, 48, 46, 46, 45, 46, 46, 47, 47, 48, 49, 50, 52, 53, 54,
- 55, 55, 57, 58, 59, 60, 61, 62, 38, 40, 40, 41, 43, 44, 47, 47, 48, 48,
- 49, 48, 47, 47, 47, 47, 48, 49, 49, 51, 51, 53, 54, 55, 56, 56, 58, 58,
- 58, 59, 60, 60, 41, 42, 42, 42, 44, 45, 47, 48, 48, 50, 50, 49, 49, 49,
- 50, 50, 50, 51, 52, 53, 53, 55, 56, 57, 58, 58, 60, 61, 61, 61, 60, 60,
- 47, 46, 46, 45, 46, 47, 47, 48, 50, 52, 52, 52, 52, 52, 53, 53, 53, 54,
- 55, 55, 56, 58, 58, 60, 60, 61, 62, 61, 61, 62, 63, 64, 49, 47, 47, 46,
- 47, 47, 48, 49, 50, 52, 53, 53, 53, 53, 54, 54, 54, 55, 55, 56, 56, 58,
- 59, 60, 61, 61, 63, 63, 64, 64, 64, 64, 48, 47, 46, 45, 46, 46, 46, 48,
- 49, 52, 53, 54, 55, 55, 56, 56, 57, 58, 58, 59, 60, 61, 62, 63, 64, 64,
- 66, 65, 65, 65, 66, 67, 49, 47, 46, 45, 46, 46, 46, 47, 49, 52, 53, 55,
- 55, 57, 57, 58, 59, 59, 60, 61, 61, 63, 64, 65, 66, 66, 67, 67, 67, 68,
- 67, 67, 49, 47, 46, 45, 45, 45, 45, 47, 49, 52, 53, 55, 57, 58, 59, 60,
- 61, 62, 62, 63, 63, 65, 66, 67, 68, 68, 69, 70, 69, 68, 69, 70, 50, 48,
- 47, 46, 46, 46, 46, 47, 50, 53, 54, 56, 57, 59, 61, 61, 63, 64, 64, 66,
- 66, 68, 69, 70, 70, 71, 72, 70, 71, 72, 71, 70, 50, 48, 47, 46, 46, 46,
- 46, 47, 50, 53, 54, 56, 58, 60, 61, 61, 63, 64, 65, 66, 67, 68, 69, 71,
- 71, 71, 73, 74, 73, 72, 73, 74, 52, 50, 49, 47, 47, 47, 47, 48, 50, 53,
- 54, 57, 59, 61, 63, 63, 66, 67, 68, 70, 70, 72, 73, 75, 75, 75, 77, 75,
- 75, 76, 75, 74, 53, 50, 50, 48, 48, 48, 47, 49, 51, 54, 55, 58, 59, 62,
- 64, 64, 67, 68, 69, 71, 71, 73, 74, 76, 77, 77, 78, 78, 78, 76, 77, 78,
- 54, 52, 51, 49, 49, 49, 48, 49, 52, 55, 55, 58, 60, 62, 64, 65, 68, 69,
- 71, 73, 73, 75, 76, 78, 79, 79, 80, 80, 79, 80, 79, 78, 56, 54, 53, 51,
- 51, 51, 49, 51, 53, 55, 56, 59, 61, 63, 66, 66, 70, 71, 73, 75, 76, 78,
- 79, 81, 82, 82, 83, 81, 83, 81, 81, 82, 57, 54, 53, 52, 51, 51, 50, 51,
- 53, 56, 56, 60, 61, 63, 66, 67, 70, 71, 73, 76, 76, 79, 80, 82, 82, 83,
- 84, 85, 83, 84, 84, 82, 60, 57, 56, 54, 53, 53, 52, 53, 55, 58, 58, 61,
- 63, 65, 68, 68, 72, 73, 75, 78, 79, 82, 83, 85, 86, 86, 88, 86, 87, 86,
- 85, 86, 61, 58, 57, 55, 55, 54, 53, 54, 56, 58, 59, 62, 64, 66, 69, 69,
- 73, 74, 76, 79, 80, 83, 84, 86, 87, 88, 89, 89, 88, 88, 88, 86, 63, 60,
- 59, 57, 56, 56, 54, 55, 57, 60, 60, 63, 65, 67, 70, 71, 75, 76, 78, 81,
- 82, 85, 86, 89, 90, 90, 92, 91, 91, 90, 89, 91, 64, 61, 60, 58, 57, 57,
- 55, 56, 58, 60, 61, 64, 66, 68, 70, 71, 75, 77, 79, 82, 82, 86, 87, 90,
- 91, 91, 93, 93, 93, 92, 93, 91, 65, 61, 60, 58, 57, 57, 55, 56, 58, 61,
- 61, 64, 66, 68, 71, 71, 75, 77, 79, 82, 83, 86, 88, 90, 91, 91, 93, 94,
- 95, 95, 93, 95, 67, 63, 62, 60, 59, 59, 57, 58, 60, 62, 63, 66, 67, 69,
- 72, 73, 77, 78, 80, 83, 84, 88, 89, 92, 93, 93, 95, 95, 96, 96, 97, 95,
- 67, 64, 63, 61, 60, 60, 58, 58, 61, 61, 63, 65, 67, 70, 70, 74, 75, 78,
- 80, 81, 85, 86, 89, 91, 93, 94, 95, 97, 97, 98, 98, 100, 68, 65, 64, 62,
- 61, 60, 59, 58, 61, 61, 64, 65, 67, 69, 71, 73, 75, 78, 79, 83, 83, 87,
- 88, 91, 93, 95, 96, 97, 99, 98, 100, 100, 69, 65, 65, 62, 62, 61, 60,
- 59, 61, 62, 64, 65, 68, 68, 72, 72, 76, 76, 80, 81, 84, 86, 88, 90, 92,
- 95, 96, 98, 98, 100, 100, 101, 70, 66, 66, 63, 63, 62, 61, 60, 60, 63,
- 64, 66, 67, 69, 71, 73, 75, 77, 79, 81, 84, 85, 88, 89, 93, 93, 97, 98,
- 100, 100, 102, 101, 71, 67, 67, 64, 64, 62, 62, 60, 60, 64, 64, 67, 67,
- 70, 70, 74, 74, 78, 78, 82, 82, 86, 86, 91, 91, 95, 95, 100, 100, 101,
- 101, 104,
- /* Size 4x8 */
- 31, 47, 53, 63, 36, 47, 50, 59, 46, 52, 55, 61, 45, 53, 63, 70, 49, 55,
- 71, 77, 54, 58, 77, 86, 59, 61, 81, 94, 63, 65, 80, 95,
- /* Size 8x4 */
- 31, 36, 46, 45, 49, 54, 59, 63, 47, 47, 52, 53, 55, 58, 61, 65, 53, 50,
- 55, 63, 71, 77, 81, 80, 63, 59, 61, 70, 77, 86, 94, 95,
- /* Size 8x16 */
- 32, 33, 45, 49, 52, 57, 64, 68, 31, 34, 45, 46, 49, 53, 60, 64, 33, 37,
- 46, 45, 47, 51, 57, 61, 37, 43, 47, 45, 47, 50, 55, 59, 42, 44, 49, 49,
- 50, 53, 58, 60, 49, 47, 52, 53, 54, 57, 61, 63, 48, 46, 51, 57, 59, 61,
- 66, 67, 50, 46, 52, 59, 63, 66, 71, 71, 52, 47, 53, 61, 66, 71, 75, 74,
- 54, 49, 54, 62, 68, 73, 79, 79, 57, 51, 55, 64, 70, 76, 83, 83, 61, 55,
- 58, 66, 73, 80, 87, 87, 64, 57, 60, 68, 75, 83, 91, 91, 66, 59, 61, 69,
- 77, 84, 93, 95, 68, 61, 61, 68, 77, 86, 94, 97, 70, 63, 61, 67, 75, 83,
- 92, 98,
- /* Size 16x8 */
- 32, 31, 33, 37, 42, 49, 48, 50, 52, 54, 57, 61, 64, 66, 68, 70, 33, 34,
- 37, 43, 44, 47, 46, 46, 47, 49, 51, 55, 57, 59, 61, 63, 45, 45, 46, 47,
- 49, 52, 51, 52, 53, 54, 55, 58, 60, 61, 61, 61, 49, 46, 45, 45, 49, 53,
- 57, 59, 61, 62, 64, 66, 68, 69, 68, 67, 52, 49, 47, 47, 50, 54, 59, 63,
- 66, 68, 70, 73, 75, 77, 77, 75, 57, 53, 51, 50, 53, 57, 61, 66, 71, 73,
- 76, 80, 83, 84, 86, 83, 64, 60, 57, 55, 58, 61, 66, 71, 75, 79, 83, 87,
- 91, 93, 94, 92, 68, 64, 61, 59, 60, 63, 67, 71, 74, 79, 83, 87, 91, 95,
- 97, 98,
- /* Size 16x32 */
- 32, 31, 33, 37, 45, 48, 49, 50, 52, 56, 57, 63, 64, 67, 68, 68, 31, 31,
- 34, 38, 45, 47, 47, 48, 50, 53, 54, 60, 61, 63, 64, 65, 31, 32, 34, 39,
- 45, 46, 46, 47, 49, 52, 53, 59, 60, 62, 64, 65, 30, 32, 35, 40, 44, 46,
- 45, 46, 48, 51, 52, 57, 58, 60, 61, 62, 33, 35, 37, 42, 46, 47, 45, 46,
- 47, 50, 51, 56, 57, 60, 61, 62, 33, 36, 38, 43, 46, 47, 46, 46, 47, 50,
- 51, 56, 57, 59, 60, 60, 37, 40, 43, 47, 47, 47, 45, 46, 47, 49, 50, 54,
- 55, 57, 59, 61, 39, 41, 43, 47, 48, 48, 47, 47, 48, 50, 51, 55, 56, 57,
- 58, 59, 42, 43, 44, 47, 49, 50, 49, 50, 50, 53, 53, 57, 58, 60, 60, 59,
- 47, 46, 46, 48, 51, 52, 53, 53, 53, 55, 56, 60, 61, 61, 61, 62, 49, 46,
- 47, 48, 52, 53, 53, 54, 54, 56, 57, 60, 61, 63, 63, 62, 48, 46, 46, 47,
- 51, 53, 56, 56, 57, 59, 60, 64, 64, 65, 64, 65, 48, 45, 46, 46, 51, 53,
- 57, 57, 59, 61, 61, 65, 66, 66, 67, 65, 49, 45, 45, 46, 51, 53, 58, 59,
- 61, 63, 64, 67, 68, 70, 67, 68, 50, 46, 46, 46, 52, 54, 59, 61, 63, 65,
- 66, 70, 71, 70, 71, 68, 50, 46, 46, 46, 52, 54, 59, 61, 64, 66, 67, 71,
- 71, 73, 71, 72, 52, 48, 47, 47, 53, 54, 61, 63, 66, 70, 71, 75, 75, 75,
- 74, 72, 53, 49, 48, 48, 53, 55, 61, 64, 67, 71, 72, 76, 77, 77, 75, 76,
- 54, 50, 49, 49, 54, 55, 62, 65, 68, 72, 73, 78, 79, 80, 79, 76, 56, 51,
- 51, 50, 55, 56, 63, 66, 70, 74, 76, 81, 82, 81, 80, 80, 57, 52, 51, 50,
- 55, 56, 64, 66, 70, 75, 76, 82, 83, 85, 83, 80, 60, 54, 54, 52, 57, 58,
- 65, 68, 72, 77, 79, 85, 86, 86, 85, 84, 61, 56, 55, 53, 58, 59, 66, 69,
- 73, 79, 80, 86, 87, 89, 87, 84, 63, 57, 56, 55, 59, 60, 67, 70, 75, 80,
- 82, 89, 90, 91, 89, 89, 64, 58, 57, 56, 60, 61, 68, 71, 75, 81, 83, 90,
- 91, 93, 91, 89, 64, 59, 58, 56, 60, 61, 68, 71, 75, 81, 83, 90, 91, 94,
- 94, 93, 66, 60, 59, 57, 61, 63, 69, 72, 77, 82, 84, 92, 93, 94, 95, 93,
- 67, 61, 60, 58, 61, 63, 69, 70, 78, 80, 85, 90, 93, 96, 97, 97, 68, 62,
- 61, 59, 61, 64, 68, 71, 77, 79, 86, 88, 94, 96, 97, 98, 69, 63, 62, 59,
- 61, 65, 68, 72, 76, 80, 85, 88, 94, 95, 99, 99, 70, 63, 63, 60, 61, 66,
- 67, 73, 75, 81, 83, 89, 92, 97, 98, 99, 70, 64, 64, 61, 61, 67, 67, 74,
- 74, 82, 82, 90, 90, 98, 98, 102,
- /* Size 32x16 */
- 32, 31, 31, 30, 33, 33, 37, 39, 42, 47, 49, 48, 48, 49, 50, 50, 52, 53,
- 54, 56, 57, 60, 61, 63, 64, 64, 66, 67, 68, 69, 70, 70, 31, 31, 32, 32,
- 35, 36, 40, 41, 43, 46, 46, 46, 45, 45, 46, 46, 48, 49, 50, 51, 52, 54,
- 56, 57, 58, 59, 60, 61, 62, 63, 63, 64, 33, 34, 34, 35, 37, 38, 43, 43,
- 44, 46, 47, 46, 46, 45, 46, 46, 47, 48, 49, 51, 51, 54, 55, 56, 57, 58,
- 59, 60, 61, 62, 63, 64, 37, 38, 39, 40, 42, 43, 47, 47, 47, 48, 48, 47,
- 46, 46, 46, 46, 47, 48, 49, 50, 50, 52, 53, 55, 56, 56, 57, 58, 59, 59,
- 60, 61, 45, 45, 45, 44, 46, 46, 47, 48, 49, 51, 52, 51, 51, 51, 52, 52,
- 53, 53, 54, 55, 55, 57, 58, 59, 60, 60, 61, 61, 61, 61, 61, 61, 48, 47,
- 46, 46, 47, 47, 47, 48, 50, 52, 53, 53, 53, 53, 54, 54, 54, 55, 55, 56,
- 56, 58, 59, 60, 61, 61, 63, 63, 64, 65, 66, 67, 49, 47, 46, 45, 45, 46,
- 45, 47, 49, 53, 53, 56, 57, 58, 59, 59, 61, 61, 62, 63, 64, 65, 66, 67,
- 68, 68, 69, 69, 68, 68, 67, 67, 50, 48, 47, 46, 46, 46, 46, 47, 50, 53,
- 54, 56, 57, 59, 61, 61, 63, 64, 65, 66, 66, 68, 69, 70, 71, 71, 72, 70,
- 71, 72, 73, 74, 52, 50, 49, 48, 47, 47, 47, 48, 50, 53, 54, 57, 59, 61,
- 63, 64, 66, 67, 68, 70, 70, 72, 73, 75, 75, 75, 77, 78, 77, 76, 75, 74,
- 56, 53, 52, 51, 50, 50, 49, 50, 53, 55, 56, 59, 61, 63, 65, 66, 70, 71,
- 72, 74, 75, 77, 79, 80, 81, 81, 82, 80, 79, 80, 81, 82, 57, 54, 53, 52,
- 51, 51, 50, 51, 53, 56, 57, 60, 61, 64, 66, 67, 71, 72, 73, 76, 76, 79,
- 80, 82, 83, 83, 84, 85, 86, 85, 83, 82, 63, 60, 59, 57, 56, 56, 54, 55,
- 57, 60, 60, 64, 65, 67, 70, 71, 75, 76, 78, 81, 82, 85, 86, 89, 90, 90,
- 92, 90, 88, 88, 89, 90, 64, 61, 60, 58, 57, 57, 55, 56, 58, 61, 61, 64,
- 66, 68, 71, 71, 75, 77, 79, 82, 83, 86, 87, 90, 91, 91, 93, 93, 94, 94,
- 92, 90, 67, 63, 62, 60, 60, 59, 57, 57, 60, 61, 63, 65, 66, 70, 70, 73,
- 75, 77, 80, 81, 85, 86, 89, 91, 93, 94, 94, 96, 96, 95, 97, 98, 68, 64,
- 64, 61, 61, 60, 59, 58, 60, 61, 63, 64, 67, 67, 71, 71, 74, 75, 79, 80,
- 83, 85, 87, 89, 91, 94, 95, 97, 97, 99, 98, 98, 68, 65, 65, 62, 62, 60,
- 61, 59, 59, 62, 62, 65, 65, 68, 68, 72, 72, 76, 76, 80, 80, 84, 84, 89,
- 89, 93, 93, 97, 98, 99, 99, 102,
- /* Size 4x16 */
- 31, 48, 56, 67, 32, 46, 52, 62, 35, 47, 50, 60, 40, 47, 49, 57, 43, 50,
- 53, 60, 46, 53, 56, 63, 45, 53, 61, 66, 46, 54, 65, 70, 48, 54, 70, 75,
- 50, 55, 72, 80, 52, 56, 75, 85, 56, 59, 79, 89, 58, 61, 81, 93, 60, 63,
- 82, 94, 62, 64, 79, 96, 63, 66, 81, 97,
- /* Size 16x4 */
- 31, 32, 35, 40, 43, 46, 45, 46, 48, 50, 52, 56, 58, 60, 62, 63, 48, 46,
- 47, 47, 50, 53, 53, 54, 54, 55, 56, 59, 61, 63, 64, 66, 56, 52, 50, 49,
- 53, 56, 61, 65, 70, 72, 75, 79, 81, 82, 79, 81, 67, 62, 60, 57, 60, 63,
- 66, 70, 75, 80, 85, 89, 93, 94, 96, 97,
- /* Size 8x32 */
- 32, 33, 45, 49, 52, 57, 64, 68, 31, 34, 45, 47, 50, 54, 61, 64, 31, 34,
- 45, 46, 49, 53, 60, 64, 30, 35, 44, 45, 48, 52, 58, 61, 33, 37, 46, 45,
- 47, 51, 57, 61, 33, 38, 46, 46, 47, 51, 57, 60, 37, 43, 47, 45, 47, 50,
- 55, 59, 39, 43, 48, 47, 48, 51, 56, 58, 42, 44, 49, 49, 50, 53, 58, 60,
- 47, 46, 51, 53, 53, 56, 61, 61, 49, 47, 52, 53, 54, 57, 61, 63, 48, 46,
- 51, 56, 57, 60, 64, 64, 48, 46, 51, 57, 59, 61, 66, 67, 49, 45, 51, 58,
- 61, 64, 68, 67, 50, 46, 52, 59, 63, 66, 71, 71, 50, 46, 52, 59, 64, 67,
- 71, 71, 52, 47, 53, 61, 66, 71, 75, 74, 53, 48, 53, 61, 67, 72, 77, 75,
- 54, 49, 54, 62, 68, 73, 79, 79, 56, 51, 55, 63, 70, 76, 82, 80, 57, 51,
- 55, 64, 70, 76, 83, 83, 60, 54, 57, 65, 72, 79, 86, 85, 61, 55, 58, 66,
- 73, 80, 87, 87, 63, 56, 59, 67, 75, 82, 90, 89, 64, 57, 60, 68, 75, 83,
- 91, 91, 64, 58, 60, 68, 75, 83, 91, 94, 66, 59, 61, 69, 77, 84, 93, 95,
- 67, 60, 61, 69, 78, 85, 93, 97, 68, 61, 61, 68, 77, 86, 94, 97, 69, 62,
- 61, 68, 76, 85, 94, 99, 70, 63, 61, 67, 75, 83, 92, 98, 70, 64, 61, 67,
- 74, 82, 90, 98,
- /* Size 32x8 */
- 32, 31, 31, 30, 33, 33, 37, 39, 42, 47, 49, 48, 48, 49, 50, 50, 52, 53,
- 54, 56, 57, 60, 61, 63, 64, 64, 66, 67, 68, 69, 70, 70, 33, 34, 34, 35,
- 37, 38, 43, 43, 44, 46, 47, 46, 46, 45, 46, 46, 47, 48, 49, 51, 51, 54,
- 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 45, 45, 45, 44, 46, 46, 47, 48,
- 49, 51, 52, 51, 51, 51, 52, 52, 53, 53, 54, 55, 55, 57, 58, 59, 60, 60,
- 61, 61, 61, 61, 61, 61, 49, 47, 46, 45, 45, 46, 45, 47, 49, 53, 53, 56,
- 57, 58, 59, 59, 61, 61, 62, 63, 64, 65, 66, 67, 68, 68, 69, 69, 68, 68,
- 67, 67, 52, 50, 49, 48, 47, 47, 47, 48, 50, 53, 54, 57, 59, 61, 63, 64,
- 66, 67, 68, 70, 70, 72, 73, 75, 75, 75, 77, 78, 77, 76, 75, 74, 57, 54,
- 53, 52, 51, 51, 50, 51, 53, 56, 57, 60, 61, 64, 66, 67, 71, 72, 73, 76,
- 76, 79, 80, 82, 83, 83, 84, 85, 86, 85, 83, 82, 64, 61, 60, 58, 57, 57,
- 55, 56, 58, 61, 61, 64, 66, 68, 71, 71, 75, 77, 79, 82, 83, 86, 87, 90,
- 91, 91, 93, 93, 94, 94, 92, 90, 68, 64, 64, 61, 61, 60, 59, 58, 60, 61,
- 63, 64, 67, 67, 71, 71, 74, 75, 79, 80, 83, 85, 87, 89, 91, 94, 95, 97,
- 97, 99, 98, 98 },
- },
- {
- { /* Luma */
- /* Size 4x4 */
- 32, 34, 53, 75, 34, 49, 64, 81, 53, 64, 91, 112, 75, 81, 112, 140,
- /* Size 8x8 */
- 32, 32, 34, 39, 50, 62, 76, 84, 32, 33, 35, 40, 48, 59, 71, 79, 34, 35,
- 39, 46, 53, 63, 74, 81, 39, 40, 46, 56, 65, 75, 86, 92, 50, 48, 53, 65,
- 78, 90, 101, 106, 62, 59, 63, 75, 90, 105, 118, 123, 76, 71, 74, 86,
- 101, 118, 134, 142, 84, 79, 81, 92, 106, 123, 142, 153,
- /* Size 16x16 */
- 32, 31, 31, 32, 33, 36, 39, 44, 48, 54, 59, 66, 74, 81, 86, 91, 31, 32,
- 32, 32, 33, 35, 38, 42, 46, 51, 56, 63, 70, 77, 81, 86, 31, 32, 32, 33,
- 34, 35, 38, 41, 45, 49, 54, 60, 67, 73, 77, 82, 32, 32, 33, 34, 36, 37,
- 40, 42, 45, 49, 53, 59, 66, 71, 75, 80, 33, 33, 34, 36, 38, 42, 44, 46,
- 50, 53, 57, 63, 69, 74, 78, 80, 36, 35, 35, 37, 42, 48, 50, 54, 57, 60,
- 64, 69, 75, 80, 84, 85, 39, 38, 38, 40, 44, 50, 54, 58, 61, 65, 69, 74,
- 80, 85, 89, 91, 44, 42, 41, 42, 46, 54, 58, 63, 67, 71, 75, 80, 86, 91,
- 95, 97, 48, 46, 45, 45, 50, 57, 61, 67, 71, 76, 80, 86, 93, 98, 101,
- 104, 54, 51, 49, 49, 53, 60, 65, 71, 76, 82, 87, 93, 100, 105, 109, 112,
- 59, 56, 54, 53, 57, 64, 69, 75, 80, 87, 92, 99, 106, 112, 116, 120, 66,
- 63, 60, 59, 63, 69, 74, 80, 86, 93, 99, 107, 115, 121, 125, 129, 74, 70,
- 67, 66, 69, 75, 80, 86, 93, 100, 106, 115, 123, 130, 135, 138, 81, 77,
- 73, 71, 74, 80, 85, 91, 98, 105, 112, 121, 130, 137, 142, 148, 86, 81,
- 77, 75, 78, 84, 89, 95, 101, 109, 116, 125, 135, 142, 147, 153, 91, 86,
- 82, 80, 80, 85, 91, 97, 104, 112, 120, 129, 138, 148, 153, 159,
- /* Size 32x32 */
- 32, 31, 31, 31, 31, 31, 32, 32, 33, 34, 36, 36, 39, 41, 44, 46, 48, 52,
- 54, 58, 59, 65, 66, 71, 74, 80, 81, 83, 86, 89, 91, 93, 31, 32, 32, 32,
- 32, 32, 32, 32, 33, 34, 35, 35, 38, 39, 42, 44, 46, 50, 51, 56, 56, 62,
- 63, 68, 71, 76, 77, 78, 82, 84, 86, 88, 31, 32, 32, 32, 32, 32, 32, 32,
- 33, 34, 35, 35, 38, 39, 42, 44, 46, 49, 51, 55, 56, 61, 63, 67, 70, 75,
- 77, 78, 81, 84, 86, 88, 31, 32, 32, 32, 32, 32, 32, 32, 33, 33, 34, 34,
- 37, 38, 41, 42, 44, 48, 49, 53, 54, 59, 60, 65, 68, 72, 74, 75, 78, 80,
- 82, 84, 31, 32, 32, 32, 32, 33, 33, 33, 34, 34, 35, 35, 38, 39, 41, 43,
- 45, 48, 49, 53, 54, 59, 60, 65, 67, 72, 73, 74, 77, 80, 82, 84, 31, 32,
- 32, 32, 33, 33, 33, 34, 35, 35, 36, 36, 39, 40, 42, 44, 45, 48, 50, 53,
- 54, 59, 60, 64, 67, 71, 73, 74, 77, 79, 81, 83, 32, 32, 32, 32, 33, 33,
- 34, 35, 36, 36, 37, 38, 40, 40, 42, 44, 45, 48, 49, 53, 53, 58, 59, 63,
- 66, 70, 71, 72, 75, 78, 80, 83, 32, 32, 32, 32, 33, 34, 35, 35, 36, 37,
- 38, 38, 40, 41, 42, 44, 46, 48, 49, 53, 53, 58, 59, 63, 65, 69, 71, 72,
- 74, 77, 79, 80, 33, 33, 33, 33, 34, 35, 36, 36, 38, 39, 42, 42, 44, 45,
- 46, 48, 50, 52, 53, 57, 57, 62, 63, 67, 69, 73, 74, 75, 78, 79, 80, 81,
- 34, 34, 34, 33, 34, 35, 36, 37, 39, 39, 42, 43, 45, 46, 47, 49, 51, 53,
- 54, 58, 58, 63, 64, 68, 70, 74, 75, 76, 79, 81, 84, 86, 36, 35, 35, 34,
- 35, 36, 37, 38, 42, 42, 48, 48, 50, 51, 54, 55, 57, 59, 60, 63, 64, 68,
- 69, 73, 75, 79, 80, 81, 84, 85, 85, 86, 36, 35, 35, 34, 35, 36, 38, 38,
- 42, 43, 48, 49, 51, 52, 54, 55, 57, 59, 60, 64, 64, 68, 69, 73, 75, 79,
- 80, 81, 84, 86, 88, 91, 39, 38, 38, 37, 38, 39, 40, 40, 44, 45, 50, 51,
- 54, 55, 58, 59, 61, 64, 65, 68, 69, 73, 74, 78, 80, 84, 85, 86, 89, 91,
- 91, 91, 41, 39, 39, 38, 39, 40, 40, 41, 45, 46, 51, 52, 55, 56, 59, 61,
- 63, 65, 67, 70, 70, 75, 76, 80, 82, 86, 87, 88, 91, 92, 94, 96, 44, 42,
- 42, 41, 41, 42, 42, 42, 46, 47, 54, 54, 58, 59, 63, 65, 67, 70, 71, 75,
- 75, 79, 80, 84, 86, 90, 91, 92, 95, 97, 97, 97, 46, 44, 44, 42, 43, 44,
- 44, 44, 48, 49, 55, 55, 59, 61, 65, 67, 69, 72, 74, 77, 78, 82, 83, 87,
- 89, 93, 94, 95, 98, 98, 100, 103, 48, 46, 46, 44, 45, 45, 45, 46, 50,
- 51, 57, 57, 61, 63, 67, 69, 71, 74, 76, 80, 80, 85, 86, 90, 93, 96, 98,
- 99, 101, 104, 104, 103, 52, 50, 49, 48, 48, 48, 48, 48, 52, 53, 59, 59,
- 64, 65, 70, 72, 74, 78, 80, 84, 85, 90, 91, 95, 97, 101, 103, 104, 106,
- 106, 107, 110, 54, 51, 51, 49, 49, 50, 49, 49, 53, 54, 60, 60, 65, 67,
- 71, 74, 76, 80, 82, 86, 87, 92, 93, 97, 100, 104, 105, 106, 109, 112,
- 112, 110, 58, 56, 55, 53, 53, 53, 53, 53, 57, 58, 63, 64, 68, 70, 75,
- 77, 80, 84, 86, 91, 91, 97, 98, 103, 105, 110, 111, 112, 115, 114, 115,
- 118, 59, 56, 56, 54, 54, 54, 53, 53, 57, 58, 64, 64, 69, 70, 75, 78, 80,
- 85, 87, 91, 92, 98, 99, 103, 106, 110, 112, 113, 116, 119, 120, 119, 65,
- 62, 61, 59, 59, 59, 58, 58, 62, 63, 68, 68, 73, 75, 79, 82, 85, 90, 92,
- 97, 98, 105, 106, 111, 114, 118, 120, 121, 124, 123, 123, 126, 66, 63,
- 63, 60, 60, 60, 59, 59, 63, 64, 69, 69, 74, 76, 80, 83, 86, 91, 93, 98,
- 99, 106, 107, 112, 115, 119, 121, 122, 125, 128, 129, 126, 71, 68, 67,
- 65, 65, 64, 63, 63, 67, 68, 73, 73, 78, 80, 84, 87, 90, 95, 97, 103,
- 103, 111, 112, 117, 120, 125, 127, 128, 131, 132, 132, 135, 74, 71, 70,
- 68, 67, 67, 66, 65, 69, 70, 75, 75, 80, 82, 86, 89, 93, 97, 100, 105,
- 106, 114, 115, 120, 123, 128, 130, 131, 135, 135, 138, 136, 80, 76, 75,
- 72, 72, 71, 70, 69, 73, 74, 79, 79, 84, 86, 90, 93, 96, 101, 104, 110,
- 110, 118, 119, 125, 128, 134, 136, 137, 140, 142, 140, 144, 81, 77, 77,
- 74, 73, 73, 71, 71, 74, 75, 80, 80, 85, 87, 91, 94, 98, 103, 105, 111,
- 112, 120, 121, 127, 130, 136, 137, 139, 142, 145, 148, 144, 83, 78, 78,
- 75, 74, 74, 72, 72, 75, 76, 81, 81, 86, 88, 92, 95, 99, 104, 106, 112,
- 113, 121, 122, 128, 131, 137, 139, 140, 144, 148, 150, 155, 86, 82, 81,
- 78, 77, 77, 75, 74, 78, 79, 84, 84, 89, 91, 95, 98, 101, 106, 109, 115,
- 116, 124, 125, 131, 135, 140, 142, 144, 147, 149, 153, 155, 89, 84, 84,
- 80, 80, 79, 78, 77, 79, 81, 85, 86, 91, 92, 97, 98, 104, 106, 112, 114,
- 119, 123, 128, 132, 135, 142, 145, 148, 149, 153, 154, 159, 91, 86, 86,
- 82, 82, 81, 80, 79, 80, 84, 85, 88, 91, 94, 97, 100, 104, 107, 112, 115,
- 120, 123, 129, 132, 138, 140, 148, 150, 153, 154, 159, 159, 93, 88, 88,
- 84, 84, 83, 83, 80, 81, 86, 86, 91, 91, 96, 97, 103, 103, 110, 110, 118,
- 119, 126, 126, 135, 136, 144, 144, 155, 155, 159, 159, 164,
- /* Size 4x8 */
- 32, 35, 51, 77, 32, 36, 50, 72, 34, 42, 54, 75, 38, 51, 67, 87, 48, 59,
- 80, 103, 60, 68, 92, 119, 72, 79, 104, 135, 81, 86, 112, 144,
- /* Size 8x4 */
- 32, 32, 34, 38, 48, 60, 72, 81, 35, 36, 42, 51, 59, 68, 79, 86, 51, 50,
- 54, 67, 80, 92, 104, 112, 77, 72, 75, 87, 103, 119, 135, 144,
- /* Size 8x16 */
- 32, 31, 33, 40, 51, 65, 79, 87, 31, 32, 33, 39, 49, 61, 74, 82, 31, 32,
- 34, 38, 47, 59, 71, 79, 32, 33, 36, 40, 48, 58, 69, 77, 33, 34, 38, 44,
- 52, 62, 72, 78, 36, 35, 42, 51, 58, 68, 78, 84, 39, 38, 44, 54, 63, 73,
- 84, 89, 44, 41, 46, 59, 69, 79, 90, 96, 48, 45, 50, 62, 74, 85, 96, 103,
- 53, 49, 53, 66, 79, 92, 103, 111, 58, 54, 57, 70, 84, 98, 110, 118, 66,
- 60, 63, 75, 90, 106, 119, 126, 74, 67, 69, 81, 97, 113, 128, 134, 81,
- 73, 75, 86, 102, 120, 135, 143, 86, 78, 78, 90, 106, 124, 140, 147, 91,
- 82, 80, 90, 103, 119, 137, 151,
- /* Size 16x8 */
- 32, 31, 31, 32, 33, 36, 39, 44, 48, 53, 58, 66, 74, 81, 86, 91, 31, 32,
- 32, 33, 34, 35, 38, 41, 45, 49, 54, 60, 67, 73, 78, 82, 33, 33, 34, 36,
- 38, 42, 44, 46, 50, 53, 57, 63, 69, 75, 78, 80, 40, 39, 38, 40, 44, 51,
- 54, 59, 62, 66, 70, 75, 81, 86, 90, 90, 51, 49, 47, 48, 52, 58, 63, 69,
- 74, 79, 84, 90, 97, 102, 106, 103, 65, 61, 59, 58, 62, 68, 73, 79, 85,
- 92, 98, 106, 113, 120, 124, 119, 79, 74, 71, 69, 72, 78, 84, 90, 96,
- 103, 110, 119, 128, 135, 140, 137, 87, 82, 79, 77, 78, 84, 89, 96, 103,
- 111, 118, 126, 134, 143, 147, 151,
- /* Size 16x32 */
- 32, 31, 31, 32, 33, 36, 40, 44, 51, 53, 65, 66, 79, 81, 87, 90, 31, 32,
- 32, 32, 33, 35, 39, 42, 49, 51, 62, 63, 75, 77, 83, 85, 31, 32, 32, 32,
- 33, 35, 39, 42, 49, 51, 61, 62, 74, 76, 82, 85, 31, 32, 32, 33, 33, 34,
- 38, 41, 47, 49, 59, 60, 72, 74, 79, 81, 31, 32, 32, 33, 34, 35, 38, 41,
- 47, 49, 59, 60, 71, 73, 79, 81, 32, 32, 33, 34, 35, 36, 39, 42, 48, 50,
- 59, 60, 71, 72, 78, 80, 32, 32, 33, 35, 36, 37, 40, 42, 48, 49, 58, 59,
- 69, 71, 77, 80, 32, 33, 33, 35, 36, 38, 41, 42, 48, 49, 58, 59, 69, 70,
- 75, 77, 33, 33, 34, 36, 38, 41, 44, 46, 52, 53, 62, 63, 72, 74, 78, 78,
- 34, 34, 34, 37, 39, 42, 45, 48, 53, 54, 63, 64, 73, 75, 80, 83, 36, 34,
- 35, 38, 42, 48, 51, 54, 58, 60, 68, 69, 78, 80, 84, 83, 36, 35, 35, 38,
- 42, 48, 51, 54, 59, 60, 68, 69, 79, 80, 85, 87, 39, 37, 38, 40, 44, 50,
- 54, 58, 63, 65, 73, 74, 84, 85, 89, 88, 40, 38, 39, 41, 45, 51, 56, 59,
- 65, 67, 75, 76, 85, 87, 90, 93, 44, 41, 41, 43, 46, 53, 59, 63, 69, 71,
- 79, 80, 90, 91, 96, 93, 46, 43, 43, 44, 48, 55, 60, 65, 72, 73, 82, 83,
- 93, 94, 97, 100, 48, 45, 45, 46, 50, 56, 62, 67, 74, 76, 85, 86, 96, 98,
- 103, 100, 52, 48, 48, 49, 52, 59, 65, 70, 78, 80, 90, 91, 101, 103, 105,
- 107, 53, 49, 49, 50, 53, 60, 66, 71, 79, 82, 92, 93, 103, 105, 111, 107,
- 58, 53, 53, 53, 57, 63, 69, 74, 83, 86, 97, 98, 109, 111, 113, 115, 58,
- 54, 54, 54, 57, 63, 70, 75, 84, 87, 98, 99, 110, 112, 118, 115, 65, 60,
- 59, 58, 62, 68, 74, 79, 89, 92, 105, 106, 118, 119, 122, 123, 66, 61,
- 60, 59, 63, 69, 75, 80, 90, 93, 106, 107, 119, 121, 126, 123, 71, 65,
- 65, 63, 67, 73, 79, 84, 94, 97, 111, 112, 125, 127, 131, 132, 74, 68,
- 67, 66, 69, 75, 81, 86, 97, 100, 113, 115, 128, 130, 134, 132, 79, 72,
- 72, 70, 73, 79, 85, 90, 101, 104, 118, 119, 133, 135, 141, 140, 81, 74,
- 73, 71, 75, 80, 86, 91, 102, 105, 120, 121, 135, 137, 143, 140, 82, 75,
- 74, 72, 75, 81, 87, 92, 103, 106, 121, 122, 136, 139, 147, 151, 86, 78,
- 78, 75, 78, 84, 90, 95, 106, 109, 124, 125, 140, 142, 147, 151, 88, 81,
- 80, 77, 80, 86, 90, 98, 105, 112, 122, 127, 140, 144, 152, 155, 91, 83,
- 82, 79, 80, 88, 90, 100, 103, 114, 119, 130, 137, 148, 151, 155, 93, 85,
- 85, 81, 81, 90, 90, 102, 103, 117, 117, 134, 134, 151, 152, 160,
- /* Size 32x16 */
- 32, 31, 31, 31, 31, 32, 32, 32, 33, 34, 36, 36, 39, 40, 44, 46, 48, 52,
- 53, 58, 58, 65, 66, 71, 74, 79, 81, 82, 86, 88, 91, 93, 31, 32, 32, 32,
- 32, 32, 32, 33, 33, 34, 34, 35, 37, 38, 41, 43, 45, 48, 49, 53, 54, 60,
- 61, 65, 68, 72, 74, 75, 78, 81, 83, 85, 31, 32, 32, 32, 32, 33, 33, 33,
- 34, 34, 35, 35, 38, 39, 41, 43, 45, 48, 49, 53, 54, 59, 60, 65, 67, 72,
- 73, 74, 78, 80, 82, 85, 32, 32, 32, 33, 33, 34, 35, 35, 36, 37, 38, 38,
- 40, 41, 43, 44, 46, 49, 50, 53, 54, 58, 59, 63, 66, 70, 71, 72, 75, 77,
- 79, 81, 33, 33, 33, 33, 34, 35, 36, 36, 38, 39, 42, 42, 44, 45, 46, 48,
- 50, 52, 53, 57, 57, 62, 63, 67, 69, 73, 75, 75, 78, 80, 80, 81, 36, 35,
- 35, 34, 35, 36, 37, 38, 41, 42, 48, 48, 50, 51, 53, 55, 56, 59, 60, 63,
- 63, 68, 69, 73, 75, 79, 80, 81, 84, 86, 88, 90, 40, 39, 39, 38, 38, 39,
- 40, 41, 44, 45, 51, 51, 54, 56, 59, 60, 62, 65, 66, 69, 70, 74, 75, 79,
- 81, 85, 86, 87, 90, 90, 90, 90, 44, 42, 42, 41, 41, 42, 42, 42, 46, 48,
- 54, 54, 58, 59, 63, 65, 67, 70, 71, 74, 75, 79, 80, 84, 86, 90, 91, 92,
- 95, 98, 100, 102, 51, 49, 49, 47, 47, 48, 48, 48, 52, 53, 58, 59, 63,
- 65, 69, 72, 74, 78, 79, 83, 84, 89, 90, 94, 97, 101, 102, 103, 106, 105,
- 103, 103, 53, 51, 51, 49, 49, 50, 49, 49, 53, 54, 60, 60, 65, 67, 71,
- 73, 76, 80, 82, 86, 87, 92, 93, 97, 100, 104, 105, 106, 109, 112, 114,
- 117, 65, 62, 61, 59, 59, 59, 58, 58, 62, 63, 68, 68, 73, 75, 79, 82, 85,
- 90, 92, 97, 98, 105, 106, 111, 113, 118, 120, 121, 124, 122, 119, 117,
- 66, 63, 62, 60, 60, 60, 59, 59, 63, 64, 69, 69, 74, 76, 80, 83, 86, 91,
- 93, 98, 99, 106, 107, 112, 115, 119, 121, 122, 125, 127, 130, 134, 79,
- 75, 74, 72, 71, 71, 69, 69, 72, 73, 78, 79, 84, 85, 90, 93, 96, 101,
- 103, 109, 110, 118, 119, 125, 128, 133, 135, 136, 140, 140, 137, 134,
- 81, 77, 76, 74, 73, 72, 71, 70, 74, 75, 80, 80, 85, 87, 91, 94, 98, 103,
- 105, 111, 112, 119, 121, 127, 130, 135, 137, 139, 142, 144, 148, 151,
- 87, 83, 82, 79, 79, 78, 77, 75, 78, 80, 84, 85, 89, 90, 96, 97, 103,
- 105, 111, 113, 118, 122, 126, 131, 134, 141, 143, 147, 147, 152, 151,
- 152, 90, 85, 85, 81, 81, 80, 80, 77, 78, 83, 83, 87, 88, 93, 93, 100,
- 100, 107, 107, 115, 115, 123, 123, 132, 132, 140, 140, 151, 151, 155,
- 155, 160,
- /* Size 4x16 */
- 31, 36, 53, 81, 32, 35, 51, 76, 32, 35, 49, 73, 32, 37, 49, 71, 33, 41,
- 53, 74, 34, 48, 60, 80, 37, 50, 65, 85, 41, 53, 71, 91, 45, 56, 76, 98,
- 49, 60, 82, 105, 54, 63, 87, 112, 61, 69, 93, 121, 68, 75, 100, 130, 74,
- 80, 105, 137, 78, 84, 109, 142, 83, 88, 114, 148,
- /* Size 16x4 */
- 31, 32, 32, 32, 33, 34, 37, 41, 45, 49, 54, 61, 68, 74, 78, 83, 36, 35,
- 35, 37, 41, 48, 50, 53, 56, 60, 63, 69, 75, 80, 84, 88, 53, 51, 49, 49,
- 53, 60, 65, 71, 76, 82, 87, 93, 100, 105, 109, 114, 81, 76, 73, 71, 74,
- 80, 85, 91, 98, 105, 112, 121, 130, 137, 142, 148,
- /* Size 8x32 */
- 32, 31, 33, 40, 51, 65, 79, 87, 31, 32, 33, 39, 49, 62, 75, 83, 31, 32,
- 33, 39, 49, 61, 74, 82, 31, 32, 33, 38, 47, 59, 72, 79, 31, 32, 34, 38,
- 47, 59, 71, 79, 32, 33, 35, 39, 48, 59, 71, 78, 32, 33, 36, 40, 48, 58,
- 69, 77, 32, 33, 36, 41, 48, 58, 69, 75, 33, 34, 38, 44, 52, 62, 72, 78,
- 34, 34, 39, 45, 53, 63, 73, 80, 36, 35, 42, 51, 58, 68, 78, 84, 36, 35,
- 42, 51, 59, 68, 79, 85, 39, 38, 44, 54, 63, 73, 84, 89, 40, 39, 45, 56,
- 65, 75, 85, 90, 44, 41, 46, 59, 69, 79, 90, 96, 46, 43, 48, 60, 72, 82,
- 93, 97, 48, 45, 50, 62, 74, 85, 96, 103, 52, 48, 52, 65, 78, 90, 101,
- 105, 53, 49, 53, 66, 79, 92, 103, 111, 58, 53, 57, 69, 83, 97, 109, 113,
- 58, 54, 57, 70, 84, 98, 110, 118, 65, 59, 62, 74, 89, 105, 118, 122, 66,
- 60, 63, 75, 90, 106, 119, 126, 71, 65, 67, 79, 94, 111, 125, 131, 74,
- 67, 69, 81, 97, 113, 128, 134, 79, 72, 73, 85, 101, 118, 133, 141, 81,
- 73, 75, 86, 102, 120, 135, 143, 82, 74, 75, 87, 103, 121, 136, 147, 86,
- 78, 78, 90, 106, 124, 140, 147, 88, 80, 80, 90, 105, 122, 140, 152, 91,
- 82, 80, 90, 103, 119, 137, 151, 93, 85, 81, 90, 103, 117, 134, 152,
- /* Size 32x8 */
- 32, 31, 31, 31, 31, 32, 32, 32, 33, 34, 36, 36, 39, 40, 44, 46, 48, 52,
- 53, 58, 58, 65, 66, 71, 74, 79, 81, 82, 86, 88, 91, 93, 31, 32, 32, 32,
- 32, 33, 33, 33, 34, 34, 35, 35, 38, 39, 41, 43, 45, 48, 49, 53, 54, 59,
- 60, 65, 67, 72, 73, 74, 78, 80, 82, 85, 33, 33, 33, 33, 34, 35, 36, 36,
- 38, 39, 42, 42, 44, 45, 46, 48, 50, 52, 53, 57, 57, 62, 63, 67, 69, 73,
- 75, 75, 78, 80, 80, 81, 40, 39, 39, 38, 38, 39, 40, 41, 44, 45, 51, 51,
- 54, 56, 59, 60, 62, 65, 66, 69, 70, 74, 75, 79, 81, 85, 86, 87, 90, 90,
- 90, 90, 51, 49, 49, 47, 47, 48, 48, 48, 52, 53, 58, 59, 63, 65, 69, 72,
- 74, 78, 79, 83, 84, 89, 90, 94, 97, 101, 102, 103, 106, 105, 103, 103,
- 65, 62, 61, 59, 59, 59, 58, 58, 62, 63, 68, 68, 73, 75, 79, 82, 85, 90,
- 92, 97, 98, 105, 106, 111, 113, 118, 120, 121, 124, 122, 119, 117, 79,
- 75, 74, 72, 71, 71, 69, 69, 72, 73, 78, 79, 84, 85, 90, 93, 96, 101,
- 103, 109, 110, 118, 119, 125, 128, 133, 135, 136, 140, 140, 137, 134,
- 87, 83, 82, 79, 79, 78, 77, 75, 78, 80, 84, 85, 89, 90, 96, 97, 103,
- 105, 111, 113, 118, 122, 126, 131, 134, 141, 143, 147, 147, 152, 151,
- 152 },
- { /* Chroma */
- /* Size 4x4 */
- 32, 46, 49, 58, 46, 53, 55, 62, 49, 55, 70, 78, 58, 62, 78, 91,
- /* Size 8x8 */
- 31, 34, 42, 47, 49, 54, 60, 64, 34, 39, 45, 46, 47, 51, 56, 59, 42, 45,
- 48, 49, 50, 53, 57, 60, 47, 46, 49, 55, 58, 61, 65, 66, 49, 47, 50, 58,
- 65, 69, 73, 74, 54, 51, 53, 61, 69, 76, 82, 83, 60, 56, 57, 65, 73, 82,
- 89, 92, 64, 59, 60, 66, 74, 83, 92, 96,
- /* Size 16x16 */
- 32, 31, 31, 35, 40, 49, 48, 49, 50, 52, 54, 57, 61, 64, 66, 68, 31, 31,
- 32, 37, 41, 47, 47, 46, 48, 49, 51, 54, 57, 60, 62, 64, 31, 32, 34, 39,
- 43, 46, 46, 45, 46, 47, 49, 52, 55, 57, 59, 61, 35, 37, 39, 44, 46, 47,
- 46, 45, 46, 47, 48, 51, 53, 56, 57, 59, 40, 41, 43, 46, 48, 50, 49, 48,
- 49, 49, 51, 53, 55, 57, 59, 59, 49, 47, 46, 47, 50, 53, 53, 53, 54, 54,
- 55, 57, 59, 61, 62, 62, 48, 47, 46, 46, 49, 53, 54, 55, 56, 57, 58, 60,
- 62, 64, 65, 65, 49, 46, 45, 45, 48, 53, 55, 58, 60, 61, 62, 64, 66, 68,
- 69, 69, 50, 48, 46, 46, 49, 54, 56, 60, 61, 63, 65, 67, 69, 71, 72, 72,
- 52, 49, 47, 47, 49, 54, 57, 61, 63, 66, 68, 71, 73, 75, 76, 77, 54, 51,
- 49, 48, 51, 55, 58, 62, 65, 68, 71, 74, 76, 78, 80, 81, 57, 54, 52, 51,
- 53, 57, 60, 64, 67, 71, 74, 77, 80, 83, 84, 85, 61, 57, 55, 53, 55, 59,
- 62, 66, 69, 73, 76, 80, 84, 87, 89, 89, 64, 60, 57, 56, 57, 61, 64, 68,
- 71, 75, 78, 83, 87, 90, 92, 94, 66, 62, 59, 57, 59, 62, 65, 69, 72, 76,
- 80, 84, 89, 92, 94, 96, 68, 64, 61, 59, 59, 62, 65, 69, 72, 77, 81, 85,
- 89, 94, 96, 98,
- /* Size 32x32 */
- 32, 31, 31, 30, 31, 33, 35, 36, 40, 41, 49, 49, 48, 48, 49, 50, 50, 52,
- 52, 54, 54, 57, 57, 60, 61, 63, 64, 65, 66, 67, 68, 69, 31, 31, 31, 31,
- 32, 34, 37, 38, 41, 42, 47, 47, 47, 47, 47, 47, 48, 49, 50, 52, 52, 54,
- 55, 57, 58, 60, 61, 61, 63, 64, 64, 65, 31, 31, 31, 31, 32, 35, 37, 39,
- 41, 42, 47, 47, 47, 46, 46, 47, 48, 49, 49, 51, 51, 54, 54, 56, 57, 59,
- 60, 61, 62, 63, 64, 65, 30, 31, 31, 32, 33, 35, 38, 40, 42, 42, 46, 46,
- 45, 45, 45, 45, 46, 47, 47, 49, 49, 52, 52, 54, 55, 57, 58, 58, 60, 61,
- 61, 62, 31, 32, 32, 33, 34, 37, 39, 41, 43, 43, 46, 46, 46, 45, 45, 46,
- 46, 47, 47, 49, 49, 51, 52, 54, 55, 57, 57, 58, 59, 60, 61, 62, 33, 34,
- 35, 35, 37, 39, 41, 43, 44, 45, 47, 47, 46, 46, 45, 46, 46, 47, 47, 49,
- 49, 51, 51, 53, 54, 56, 57, 57, 58, 59, 60, 61, 35, 37, 37, 38, 39, 41,
- 44, 46, 46, 46, 47, 47, 46, 46, 45, 46, 46, 47, 47, 48, 48, 50, 51, 52,
- 53, 55, 56, 56, 57, 58, 59, 61, 36, 38, 39, 40, 41, 43, 46, 47, 47, 47,
- 48, 47, 46, 46, 45, 46, 46, 46, 47, 48, 48, 50, 50, 52, 53, 54, 55, 55,
- 56, 57, 58, 58, 40, 41, 41, 42, 43, 44, 46, 47, 48, 48, 50, 49, 49, 49,
- 48, 49, 49, 49, 49, 51, 51, 52, 53, 54, 55, 57, 57, 58, 59, 59, 59, 59,
- 41, 42, 42, 42, 43, 45, 46, 47, 48, 48, 50, 50, 49, 49, 49, 49, 50, 50,
- 50, 52, 52, 53, 53, 55, 56, 57, 58, 58, 59, 60, 61, 62, 49, 47, 47, 46,
- 46, 47, 47, 48, 50, 50, 53, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 56,
- 57, 58, 59, 60, 61, 61, 62, 62, 62, 62, 49, 47, 47, 46, 46, 47, 47, 47,
- 49, 50, 53, 53, 53, 53, 54, 54, 54, 54, 54, 55, 56, 57, 57, 59, 59, 61,
- 61, 62, 63, 63, 64, 65, 48, 47, 47, 45, 46, 46, 46, 46, 49, 49, 53, 53,
- 54, 54, 55, 56, 56, 57, 57, 58, 58, 60, 60, 61, 62, 63, 64, 64, 65, 66,
- 65, 65, 48, 47, 46, 45, 45, 46, 46, 46, 49, 49, 53, 53, 54, 55, 56, 57,
- 57, 58, 58, 59, 60, 61, 61, 63, 63, 65, 65, 65, 66, 66, 67, 68, 49, 47,
- 46, 45, 45, 45, 45, 45, 48, 49, 53, 54, 55, 56, 58, 59, 60, 61, 61, 62,
- 62, 63, 64, 65, 66, 67, 68, 68, 69, 70, 69, 68, 50, 47, 47, 45, 46, 46,
- 46, 46, 49, 49, 54, 54, 56, 57, 59, 60, 60, 62, 62, 63, 64, 65, 65, 67,
- 68, 69, 69, 70, 70, 70, 71, 71, 50, 48, 48, 46, 46, 46, 46, 46, 49, 50,
- 54, 54, 56, 57, 60, 60, 61, 63, 63, 65, 65, 67, 67, 68, 69, 71, 71, 71,
- 72, 73, 72, 71, 52, 49, 49, 47, 47, 47, 47, 46, 49, 50, 54, 54, 57, 58,
- 61, 62, 63, 65, 65, 67, 67, 69, 70, 71, 72, 73, 74, 74, 75, 74, 74, 75,
- 52, 50, 49, 47, 47, 47, 47, 47, 49, 50, 54, 54, 57, 58, 61, 62, 63, 65,
- 66, 68, 68, 70, 71, 72, 73, 75, 75, 75, 76, 77, 77, 75, 54, 52, 51, 49,
- 49, 49, 48, 48, 51, 52, 55, 55, 58, 59, 62, 63, 65, 67, 68, 70, 70, 73,
- 73, 75, 76, 78, 78, 78, 79, 78, 78, 79, 54, 52, 51, 49, 49, 49, 48, 48,
- 51, 52, 55, 56, 58, 60, 62, 64, 65, 67, 68, 70, 71, 73, 74, 75, 76, 78,
- 78, 79, 80, 81, 81, 79, 57, 54, 54, 52, 51, 51, 50, 50, 52, 53, 56, 57,
- 60, 61, 63, 65, 67, 69, 70, 73, 73, 76, 77, 79, 80, 82, 82, 83, 84, 83,
- 82, 83, 57, 55, 54, 52, 52, 51, 51, 50, 53, 53, 57, 57, 60, 61, 64, 65,
- 67, 70, 71, 73, 74, 77, 77, 79, 80, 82, 83, 83, 84, 85, 85, 83, 60, 57,
- 56, 54, 54, 53, 52, 52, 54, 55, 58, 59, 61, 63, 65, 67, 68, 71, 72, 75,
- 75, 79, 79, 82, 83, 85, 86, 86, 87, 87, 86, 87, 61, 58, 57, 55, 55, 54,
- 53, 53, 55, 56, 59, 59, 62, 63, 66, 68, 69, 72, 73, 76, 76, 80, 80, 83,
- 84, 86, 87, 88, 89, 89, 89, 87, 63, 60, 59, 57, 57, 56, 55, 54, 57, 57,
- 60, 61, 63, 65, 67, 69, 71, 73, 75, 78, 78, 82, 82, 85, 86, 89, 89, 90,
- 91, 92, 90, 91, 64, 61, 60, 58, 57, 57, 56, 55, 57, 58, 61, 61, 64, 65,
- 68, 69, 71, 74, 75, 78, 78, 82, 83, 86, 87, 89, 90, 91, 92, 93, 94, 91,
- 65, 61, 61, 58, 58, 57, 56, 55, 58, 58, 61, 62, 64, 65, 68, 70, 71, 74,
- 75, 78, 79, 83, 83, 86, 88, 90, 91, 91, 93, 94, 94, 96, 66, 63, 62, 60,
- 59, 58, 57, 56, 59, 59, 62, 63, 65, 66, 69, 70, 72, 75, 76, 79, 80, 84,
- 84, 87, 89, 91, 92, 93, 94, 94, 96, 96, 67, 64, 63, 61, 60, 59, 58, 57,
- 59, 60, 62, 63, 66, 66, 70, 70, 73, 74, 77, 78, 81, 83, 85, 87, 89, 92,
- 93, 94, 94, 96, 96, 97, 68, 64, 64, 61, 61, 60, 59, 58, 59, 61, 62, 64,
- 65, 67, 69, 71, 72, 74, 77, 78, 81, 82, 85, 86, 89, 90, 94, 94, 96, 96,
- 98, 97, 69, 65, 65, 62, 62, 61, 61, 58, 59, 62, 62, 65, 65, 68, 68, 71,
- 71, 75, 75, 79, 79, 83, 83, 87, 87, 91, 91, 96, 96, 97, 97, 99,
- /* Size 4x8 */
- 31, 47, 50, 61, 36, 47, 47, 57, 43, 50, 50, 58, 45, 53, 58, 65, 47, 54,
- 66, 74, 52, 56, 70, 82, 57, 60, 75, 90, 61, 63, 77, 93,
- /* Size 8x4 */
- 31, 36, 43, 45, 47, 52, 57, 61, 47, 47, 50, 53, 54, 56, 60, 63, 50, 47,
- 50, 58, 66, 70, 75, 77, 61, 57, 58, 65, 74, 82, 90, 93,
- /* Size 8x16 */
- 32, 32, 40, 49, 51, 57, 63, 67, 31, 33, 41, 47, 49, 54, 59, 63, 31, 35,
- 43, 46, 47, 51, 57, 60, 35, 39, 46, 46, 47, 50, 55, 58, 41, 43, 48, 49,
- 49, 52, 57, 59, 49, 47, 50, 53, 54, 57, 60, 62, 48, 46, 49, 54, 57, 60,
- 64, 65, 49, 45, 48, 56, 61, 64, 67, 69, 50, 46, 49, 57, 63, 67, 71, 73,
- 52, 48, 50, 58, 65, 71, 75, 77, 54, 50, 51, 59, 67, 73, 78, 81, 57, 52,
- 53, 61, 69, 77, 82, 85, 61, 55, 56, 63, 72, 80, 86, 88, 64, 58, 58, 65,
- 73, 82, 89, 92, 66, 59, 59, 66, 75, 84, 91, 94, 68, 61, 59, 65, 72, 81,
- 89, 95,
- /* Size 16x8 */
- 32, 31, 31, 35, 41, 49, 48, 49, 50, 52, 54, 57, 61, 64, 66, 68, 32, 33,
- 35, 39, 43, 47, 46, 45, 46, 48, 50, 52, 55, 58, 59, 61, 40, 41, 43, 46,
- 48, 50, 49, 48, 49, 50, 51, 53, 56, 58, 59, 59, 49, 47, 46, 46, 49, 53,
- 54, 56, 57, 58, 59, 61, 63, 65, 66, 65, 51, 49, 47, 47, 49, 54, 57, 61,
- 63, 65, 67, 69, 72, 73, 75, 72, 57, 54, 51, 50, 52, 57, 60, 64, 67, 71,
- 73, 77, 80, 82, 84, 81, 63, 59, 57, 55, 57, 60, 64, 67, 71, 75, 78, 82,
- 86, 89, 91, 89, 67, 63, 60, 58, 59, 62, 65, 69, 73, 77, 81, 85, 88, 92,
- 94, 95,
- /* Size 16x32 */
- 32, 31, 32, 37, 40, 48, 49, 49, 51, 52, 57, 58, 63, 64, 67, 67, 31, 31,
- 33, 38, 41, 47, 47, 47, 49, 50, 54, 55, 60, 61, 63, 64, 31, 31, 33, 38,
- 41, 47, 47, 47, 49, 49, 54, 54, 59, 60, 63, 64, 30, 32, 33, 40, 42, 46,
- 45, 45, 47, 48, 52, 52, 57, 58, 60, 61, 31, 33, 35, 41, 43, 46, 46, 45,
- 47, 48, 51, 52, 57, 57, 60, 61, 33, 36, 37, 43, 44, 47, 46, 46, 47, 47,
- 51, 52, 56, 57, 59, 60, 35, 38, 39, 45, 46, 47, 46, 45, 47, 47, 50, 51,
- 55, 56, 58, 60, 37, 40, 41, 47, 47, 47, 46, 45, 46, 47, 50, 50, 54, 55,
- 57, 58, 41, 42, 43, 47, 48, 49, 49, 48, 49, 50, 52, 53, 57, 57, 59, 58,
- 42, 43, 43, 47, 48, 50, 49, 49, 50, 50, 53, 54, 57, 58, 60, 61, 49, 46,
- 47, 48, 50, 53, 53, 53, 54, 54, 57, 57, 60, 61, 62, 61, 49, 46, 47, 48,
- 50, 53, 53, 54, 54, 55, 57, 57, 61, 61, 63, 64, 48, 46, 46, 47, 49, 53,
- 54, 56, 57, 57, 60, 60, 64, 64, 65, 64, 48, 45, 46, 46, 49, 53, 55, 56,
- 58, 58, 61, 61, 65, 65, 66, 67, 49, 45, 45, 46, 48, 53, 56, 58, 61, 61,
- 64, 64, 67, 68, 69, 67, 49, 46, 46, 46, 49, 53, 57, 59, 62, 62, 65, 66,
- 69, 69, 70, 70, 50, 46, 46, 46, 49, 54, 57, 59, 63, 64, 67, 67, 71, 71,
- 73, 71, 51, 47, 47, 47, 49, 54, 58, 61, 64, 66, 69, 70, 73, 74, 74, 74,
- 52, 48, 48, 47, 50, 54, 58, 61, 65, 66, 71, 71, 75, 75, 77, 74, 54, 50,
- 49, 48, 51, 55, 59, 62, 67, 68, 73, 73, 77, 78, 78, 78, 54, 50, 50, 49,
- 51, 55, 59, 62, 67, 68, 73, 74, 78, 78, 81, 78, 57, 52, 52, 50, 52, 56,
- 60, 64, 69, 70, 76, 77, 82, 82, 83, 82, 57, 52, 52, 51, 53, 57, 61, 64,
- 69, 71, 77, 77, 82, 83, 85, 82, 60, 54, 54, 52, 55, 58, 62, 65, 71, 72,
- 79, 79, 85, 86, 87, 86, 61, 56, 55, 53, 56, 59, 63, 66, 72, 73, 80, 81,
- 86, 87, 88, 86, 63, 57, 57, 55, 57, 60, 64, 67, 73, 75, 82, 82, 89, 90,
- 92, 90, 64, 58, 58, 55, 58, 61, 65, 68, 73, 75, 82, 83, 89, 90, 92, 90,
- 64, 59, 58, 56, 58, 61, 65, 68, 74, 75, 83, 83, 90, 91, 94, 95, 66, 60,
- 59, 57, 59, 62, 66, 69, 75, 76, 84, 85, 91, 92, 94, 95, 67, 61, 60, 58,
- 59, 63, 66, 70, 74, 77, 82, 85, 91, 93, 96, 96, 68, 62, 61, 58, 59, 64,
- 65, 71, 72, 78, 81, 86, 89, 94, 95, 96, 68, 62, 62, 59, 59, 65, 65, 71,
- 71, 79, 79, 87, 87, 95, 95, 98,
- /* Size 32x16 */
- 32, 31, 31, 30, 31, 33, 35, 37, 41, 42, 49, 49, 48, 48, 49, 49, 50, 51,
- 52, 54, 54, 57, 57, 60, 61, 63, 64, 64, 66, 67, 68, 68, 31, 31, 31, 32,
- 33, 36, 38, 40, 42, 43, 46, 46, 46, 45, 45, 46, 46, 47, 48, 50, 50, 52,
- 52, 54, 56, 57, 58, 59, 60, 61, 62, 62, 32, 33, 33, 33, 35, 37, 39, 41,
- 43, 43, 47, 47, 46, 46, 45, 46, 46, 47, 48, 49, 50, 52, 52, 54, 55, 57,
- 58, 58, 59, 60, 61, 62, 37, 38, 38, 40, 41, 43, 45, 47, 47, 47, 48, 48,
- 47, 46, 46, 46, 46, 47, 47, 48, 49, 50, 51, 52, 53, 55, 55, 56, 57, 58,
- 58, 59, 40, 41, 41, 42, 43, 44, 46, 47, 48, 48, 50, 50, 49, 49, 48, 49,
- 49, 49, 50, 51, 51, 52, 53, 55, 56, 57, 58, 58, 59, 59, 59, 59, 48, 47,
- 47, 46, 46, 47, 47, 47, 49, 50, 53, 53, 53, 53, 53, 53, 54, 54, 54, 55,
- 55, 56, 57, 58, 59, 60, 61, 61, 62, 63, 64, 65, 49, 47, 47, 45, 46, 46,
- 46, 46, 49, 49, 53, 53, 54, 55, 56, 57, 57, 58, 58, 59, 59, 60, 61, 62,
- 63, 64, 65, 65, 66, 66, 65, 65, 49, 47, 47, 45, 45, 46, 45, 45, 48, 49,
- 53, 54, 56, 56, 58, 59, 59, 61, 61, 62, 62, 64, 64, 65, 66, 67, 68, 68,
- 69, 70, 71, 71, 51, 49, 49, 47, 47, 47, 47, 46, 49, 50, 54, 54, 57, 58,
- 61, 62, 63, 64, 65, 67, 67, 69, 69, 71, 72, 73, 73, 74, 75, 74, 72, 71,
- 52, 50, 49, 48, 48, 47, 47, 47, 50, 50, 54, 55, 57, 58, 61, 62, 64, 66,
- 66, 68, 68, 70, 71, 72, 73, 75, 75, 75, 76, 77, 78, 79, 57, 54, 54, 52,
- 51, 51, 50, 50, 52, 53, 57, 57, 60, 61, 64, 65, 67, 69, 71, 73, 73, 76,
- 77, 79, 80, 82, 82, 83, 84, 82, 81, 79, 58, 55, 54, 52, 52, 52, 51, 50,
- 53, 54, 57, 57, 60, 61, 64, 66, 67, 70, 71, 73, 74, 77, 77, 79, 81, 82,
- 83, 83, 85, 85, 86, 87, 63, 60, 59, 57, 57, 56, 55, 54, 57, 57, 60, 61,
- 64, 65, 67, 69, 71, 73, 75, 77, 78, 82, 82, 85, 86, 89, 89, 90, 91, 91,
- 89, 87, 64, 61, 60, 58, 57, 57, 56, 55, 57, 58, 61, 61, 64, 65, 68, 69,
- 71, 74, 75, 78, 78, 82, 83, 86, 87, 90, 90, 91, 92, 93, 94, 95, 67, 63,
- 63, 60, 60, 59, 58, 57, 59, 60, 62, 63, 65, 66, 69, 70, 73, 74, 77, 78,
- 81, 83, 85, 87, 88, 92, 92, 94, 94, 96, 95, 95, 67, 64, 64, 61, 61, 60,
- 60, 58, 58, 61, 61, 64, 64, 67, 67, 70, 71, 74, 74, 78, 78, 82, 82, 86,
- 86, 90, 90, 95, 95, 96, 96, 98,
- /* Size 4x16 */
- 31, 48, 52, 64, 31, 47, 49, 60, 33, 46, 48, 57, 38, 47, 47, 56, 42, 49,
- 50, 57, 46, 53, 54, 61, 46, 53, 57, 64, 45, 53, 61, 68, 46, 54, 64, 71,
- 48, 54, 66, 75, 50, 55, 68, 78, 52, 57, 71, 83, 56, 59, 73, 87, 58, 61,
- 75, 90, 60, 62, 76, 92, 62, 64, 78, 94,
- /* Size 16x4 */
- 31, 31, 33, 38, 42, 46, 46, 45, 46, 48, 50, 52, 56, 58, 60, 62, 48, 47,
- 46, 47, 49, 53, 53, 53, 54, 54, 55, 57, 59, 61, 62, 64, 52, 49, 48, 47,
- 50, 54, 57, 61, 64, 66, 68, 71, 73, 75, 76, 78, 64, 60, 57, 56, 57, 61,
- 64, 68, 71, 75, 78, 83, 87, 90, 92, 94,
- /* Size 8x32 */
- 32, 32, 40, 49, 51, 57, 63, 67, 31, 33, 41, 47, 49, 54, 60, 63, 31, 33,
- 41, 47, 49, 54, 59, 63, 30, 33, 42, 45, 47, 52, 57, 60, 31, 35, 43, 46,
- 47, 51, 57, 60, 33, 37, 44, 46, 47, 51, 56, 59, 35, 39, 46, 46, 47, 50,
- 55, 58, 37, 41, 47, 46, 46, 50, 54, 57, 41, 43, 48, 49, 49, 52, 57, 59,
- 42, 43, 48, 49, 50, 53, 57, 60, 49, 47, 50, 53, 54, 57, 60, 62, 49, 47,
- 50, 53, 54, 57, 61, 63, 48, 46, 49, 54, 57, 60, 64, 65, 48, 46, 49, 55,
- 58, 61, 65, 66, 49, 45, 48, 56, 61, 64, 67, 69, 49, 46, 49, 57, 62, 65,
- 69, 70, 50, 46, 49, 57, 63, 67, 71, 73, 51, 47, 49, 58, 64, 69, 73, 74,
- 52, 48, 50, 58, 65, 71, 75, 77, 54, 49, 51, 59, 67, 73, 77, 78, 54, 50,
- 51, 59, 67, 73, 78, 81, 57, 52, 52, 60, 69, 76, 82, 83, 57, 52, 53, 61,
- 69, 77, 82, 85, 60, 54, 55, 62, 71, 79, 85, 87, 61, 55, 56, 63, 72, 80,
- 86, 88, 63, 57, 57, 64, 73, 82, 89, 92, 64, 58, 58, 65, 73, 82, 89, 92,
- 64, 58, 58, 65, 74, 83, 90, 94, 66, 59, 59, 66, 75, 84, 91, 94, 67, 60,
- 59, 66, 74, 82, 91, 96, 68, 61, 59, 65, 72, 81, 89, 95, 68, 62, 59, 65,
- 71, 79, 87, 95,
- /* Size 32x8 */
- 32, 31, 31, 30, 31, 33, 35, 37, 41, 42, 49, 49, 48, 48, 49, 49, 50, 51,
- 52, 54, 54, 57, 57, 60, 61, 63, 64, 64, 66, 67, 68, 68, 32, 33, 33, 33,
- 35, 37, 39, 41, 43, 43, 47, 47, 46, 46, 45, 46, 46, 47, 48, 49, 50, 52,
- 52, 54, 55, 57, 58, 58, 59, 60, 61, 62, 40, 41, 41, 42, 43, 44, 46, 47,
- 48, 48, 50, 50, 49, 49, 48, 49, 49, 49, 50, 51, 51, 52, 53, 55, 56, 57,
- 58, 58, 59, 59, 59, 59, 49, 47, 47, 45, 46, 46, 46, 46, 49, 49, 53, 53,
- 54, 55, 56, 57, 57, 58, 58, 59, 59, 60, 61, 62, 63, 64, 65, 65, 66, 66,
- 65, 65, 51, 49, 49, 47, 47, 47, 47, 46, 49, 50, 54, 54, 57, 58, 61, 62,
- 63, 64, 65, 67, 67, 69, 69, 71, 72, 73, 73, 74, 75, 74, 72, 71, 57, 54,
- 54, 52, 51, 51, 50, 50, 52, 53, 57, 57, 60, 61, 64, 65, 67, 69, 71, 73,
- 73, 76, 77, 79, 80, 82, 82, 83, 84, 82, 81, 79, 63, 60, 59, 57, 57, 56,
- 55, 54, 57, 57, 60, 61, 64, 65, 67, 69, 71, 73, 75, 77, 78, 82, 82, 85,
- 86, 89, 89, 90, 91, 91, 89, 87, 67, 63, 63, 60, 60, 59, 58, 57, 59, 60,
- 62, 63, 65, 66, 69, 70, 73, 74, 77, 78, 81, 83, 85, 87, 88, 92, 92, 94,
- 94, 96, 95, 95 },
- },
- {
- { /* Luma */
- /* Size 4x4 */
- 32, 34, 49, 72, 34, 48, 60, 79, 49, 60, 82, 104, 72, 79, 104, 134,
- /* Size 8x8 */
- 32, 32, 34, 38, 46, 56, 68, 78, 32, 33, 35, 39, 45, 54, 64, 74, 34, 35,
- 39, 45, 51, 58, 68, 76, 38, 39, 45, 54, 61, 69, 78, 86, 46, 45, 51, 61,
- 71, 80, 90, 99, 56, 54, 58, 69, 80, 92, 103, 113, 68, 64, 68, 78, 90,
- 103, 117, 128, 78, 74, 76, 86, 99, 113, 128, 140,
- /* Size 16x16 */
- 32, 31, 31, 31, 32, 34, 36, 39, 44, 48, 54, 59, 65, 71, 80, 83, 31, 32,
- 32, 32, 32, 34, 35, 38, 42, 46, 51, 56, 62, 68, 76, 78, 31, 32, 32, 32,
- 32, 33, 34, 37, 41, 44, 49, 54, 59, 65, 72, 75, 31, 32, 32, 33, 34, 35,
- 36, 39, 42, 45, 50, 54, 59, 64, 71, 74, 32, 32, 32, 34, 35, 37, 38, 40,
- 42, 46, 49, 53, 58, 63, 69, 72, 34, 34, 33, 35, 37, 39, 42, 45, 47, 51,
- 54, 58, 63, 68, 74, 76, 36, 35, 34, 36, 38, 42, 48, 50, 54, 57, 60, 64,
- 68, 73, 79, 81, 39, 38, 37, 39, 40, 45, 50, 54, 58, 61, 65, 69, 73, 78,
- 84, 86, 44, 42, 41, 42, 42, 47, 54, 58, 63, 67, 71, 75, 79, 84, 90, 92,
- 48, 46, 44, 45, 46, 51, 57, 61, 67, 71, 76, 80, 85, 90, 96, 99, 54, 51,
- 49, 50, 49, 54, 60, 65, 71, 76, 82, 87, 92, 97, 104, 106, 59, 56, 54,
- 54, 53, 58, 64, 69, 75, 80, 87, 92, 98, 103, 110, 113, 65, 62, 59, 59,
- 58, 63, 68, 73, 79, 85, 92, 98, 105, 111, 118, 121, 71, 68, 65, 64, 63,
- 68, 73, 78, 84, 90, 97, 103, 111, 117, 125, 128, 80, 76, 72, 71, 69, 74,
- 79, 84, 90, 96, 104, 110, 118, 125, 134, 137, 83, 78, 75, 74, 72, 76,
- 81, 86, 92, 99, 106, 113, 121, 128, 137, 140,
- /* Size 32x32 */
- 32, 31, 31, 31, 31, 31, 31, 32, 32, 34, 34, 36, 36, 39, 39, 44, 44, 48,
- 48, 54, 54, 59, 59, 65, 65, 71, 71, 80, 80, 83, 83, 87, 31, 32, 32, 32,
- 32, 32, 32, 32, 32, 34, 34, 35, 35, 38, 38, 42, 42, 46, 46, 51, 51, 56,
- 56, 62, 62, 68, 68, 76, 76, 78, 78, 83, 31, 32, 32, 32, 32, 32, 32, 32,
- 32, 34, 34, 35, 35, 38, 38, 42, 42, 46, 46, 51, 51, 56, 56, 62, 62, 68,
- 68, 76, 76, 78, 78, 83, 31, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 34,
- 34, 37, 37, 41, 41, 44, 44, 49, 49, 54, 54, 59, 59, 65, 65, 72, 72, 75,
- 75, 79, 31, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 34, 34, 37, 37, 41,
- 41, 44, 44, 49, 49, 54, 54, 59, 59, 65, 65, 72, 72, 75, 75, 79, 31, 32,
- 32, 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 39, 39, 42, 42, 45, 45, 50,
- 50, 54, 54, 59, 59, 64, 64, 71, 71, 74, 74, 77, 31, 32, 32, 32, 32, 33,
- 33, 34, 34, 35, 35, 36, 36, 39, 39, 42, 42, 45, 45, 50, 50, 54, 54, 59,
- 59, 64, 64, 71, 71, 74, 74, 77, 32, 32, 32, 32, 32, 34, 34, 35, 35, 37,
- 37, 38, 38, 40, 40, 42, 42, 46, 46, 49, 49, 53, 53, 58, 58, 63, 63, 69,
- 69, 72, 72, 75, 32, 32, 32, 32, 32, 34, 34, 35, 35, 37, 37, 38, 38, 40,
- 40, 42, 42, 46, 46, 49, 49, 53, 53, 58, 58, 63, 63, 69, 69, 72, 72, 75,
- 34, 34, 34, 33, 33, 35, 35, 37, 37, 39, 39, 42, 42, 45, 45, 47, 47, 51,
- 51, 54, 54, 58, 58, 63, 63, 68, 68, 74, 74, 76, 76, 80, 34, 34, 34, 33,
- 33, 35, 35, 37, 37, 39, 39, 42, 42, 45, 45, 47, 47, 51, 51, 54, 54, 58,
- 58, 63, 63, 68, 68, 74, 74, 76, 76, 80, 36, 35, 35, 34, 34, 36, 36, 38,
- 38, 42, 42, 48, 48, 50, 50, 54, 54, 57, 57, 60, 60, 64, 64, 68, 68, 73,
- 73, 79, 79, 81, 81, 84, 36, 35, 35, 34, 34, 36, 36, 38, 38, 42, 42, 48,
- 48, 50, 50, 54, 54, 57, 57, 60, 60, 64, 64, 68, 68, 73, 73, 79, 79, 81,
- 81, 84, 39, 38, 38, 37, 37, 39, 39, 40, 40, 45, 45, 50, 50, 54, 54, 58,
- 58, 61, 61, 65, 65, 69, 69, 73, 73, 78, 78, 84, 84, 86, 86, 90, 39, 38,
- 38, 37, 37, 39, 39, 40, 40, 45, 45, 50, 50, 54, 54, 58, 58, 61, 61, 65,
- 65, 69, 69, 73, 73, 78, 78, 84, 84, 86, 86, 90, 44, 42, 42, 41, 41, 42,
- 42, 42, 42, 47, 47, 54, 54, 58, 58, 63, 63, 67, 67, 71, 71, 75, 75, 79,
- 79, 84, 84, 90, 90, 92, 92, 96, 44, 42, 42, 41, 41, 42, 42, 42, 42, 47,
- 47, 54, 54, 58, 58, 63, 63, 67, 67, 71, 71, 75, 75, 79, 79, 84, 84, 90,
- 90, 92, 92, 96, 48, 46, 46, 44, 44, 45, 45, 46, 46, 51, 51, 57, 57, 61,
- 61, 67, 67, 71, 71, 76, 76, 80, 80, 85, 85, 90, 90, 96, 96, 99, 99, 102,
- 48, 46, 46, 44, 44, 45, 45, 46, 46, 51, 51, 57, 57, 61, 61, 67, 67, 71,
- 71, 76, 76, 80, 80, 85, 85, 90, 90, 96, 96, 99, 99, 102, 54, 51, 51, 49,
- 49, 50, 50, 49, 49, 54, 54, 60, 60, 65, 65, 71, 71, 76, 76, 82, 82, 87,
- 87, 92, 92, 97, 97, 104, 104, 106, 106, 109, 54, 51, 51, 49, 49, 50, 50,
- 49, 49, 54, 54, 60, 60, 65, 65, 71, 71, 76, 76, 82, 82, 87, 87, 92, 92,
- 97, 97, 104, 104, 106, 106, 109, 59, 56, 56, 54, 54, 54, 54, 53, 53, 58,
- 58, 64, 64, 69, 69, 75, 75, 80, 80, 87, 87, 92, 92, 98, 98, 103, 103,
- 110, 110, 113, 113, 116, 59, 56, 56, 54, 54, 54, 54, 53, 53, 58, 58, 64,
- 64, 69, 69, 75, 75, 80, 80, 87, 87, 92, 92, 98, 98, 103, 103, 110, 110,
- 113, 113, 116, 65, 62, 62, 59, 59, 59, 59, 58, 58, 63, 63, 68, 68, 73,
- 73, 79, 79, 85, 85, 92, 92, 98, 98, 105, 105, 111, 111, 118, 118, 121,
- 121, 124, 65, 62, 62, 59, 59, 59, 59, 58, 58, 63, 63, 68, 68, 73, 73,
- 79, 79, 85, 85, 92, 92, 98, 98, 105, 105, 111, 111, 118, 118, 121, 121,
- 124, 71, 68, 68, 65, 65, 64, 64, 63, 63, 68, 68, 73, 73, 78, 78, 84, 84,
- 90, 90, 97, 97, 103, 103, 111, 111, 117, 117, 125, 125, 128, 128, 132,
- 71, 68, 68, 65, 65, 64, 64, 63, 63, 68, 68, 73, 73, 78, 78, 84, 84, 90,
- 90, 97, 97, 103, 103, 111, 111, 117, 117, 125, 125, 128, 128, 132, 80,
- 76, 76, 72, 72, 71, 71, 69, 69, 74, 74, 79, 79, 84, 84, 90, 90, 96, 96,
- 104, 104, 110, 110, 118, 118, 125, 125, 134, 134, 137, 137, 141, 80, 76,
- 76, 72, 72, 71, 71, 69, 69, 74, 74, 79, 79, 84, 84, 90, 90, 96, 96, 104,
- 104, 110, 110, 118, 118, 125, 125, 134, 134, 137, 137, 141, 83, 78, 78,
- 75, 75, 74, 74, 72, 72, 76, 76, 81, 81, 86, 86, 92, 92, 99, 99, 106,
- 106, 113, 113, 121, 121, 128, 128, 137, 137, 140, 140, 144, 83, 78, 78,
- 75, 75, 74, 74, 72, 72, 76, 76, 81, 81, 86, 86, 92, 92, 99, 99, 106,
- 106, 113, 113, 121, 121, 128, 128, 137, 137, 140, 140, 144, 87, 83, 83,
- 79, 79, 77, 77, 75, 75, 80, 80, 84, 84, 90, 90, 96, 96, 102, 102, 109,
- 109, 116, 116, 124, 124, 132, 132, 141, 141, 144, 144, 149,
- /* Size 4x8 */
- 32, 35, 51, 75, 32, 36, 50, 71, 34, 42, 54, 73, 37, 50, 65, 84, 45, 56,
- 76, 96, 54, 63, 87, 110, 65, 73, 97, 125, 75, 81, 106, 136,
- /* Size 8x4 */
- 32, 32, 34, 37, 45, 54, 65, 75, 35, 36, 42, 50, 56, 63, 73, 81, 51, 50,
- 54, 65, 76, 87, 97, 106, 75, 71, 73, 84, 96, 110, 125, 136,
- /* Size 8x16 */
- 32, 31, 32, 36, 44, 53, 65, 79, 31, 32, 32, 35, 42, 51, 62, 75, 31, 32,
- 33, 34, 41, 49, 59, 72, 32, 32, 34, 36, 42, 50, 59, 71, 32, 33, 35, 38,
- 42, 49, 58, 69, 34, 34, 37, 42, 48, 54, 63, 73, 36, 34, 38, 48, 54, 60,
- 68, 78, 39, 37, 40, 50, 58, 65, 73, 84, 44, 41, 43, 53, 63, 71, 79, 90,
- 48, 45, 46, 56, 67, 76, 85, 96, 53, 49, 50, 60, 71, 82, 92, 103, 58, 54,
- 54, 63, 75, 87, 98, 110, 65, 60, 58, 68, 79, 92, 105, 118, 71, 65, 63,
- 73, 84, 97, 111, 125, 79, 72, 70, 79, 90, 104, 118, 133, 82, 75, 72, 81,
- 92, 106, 121, 136,
- /* Size 16x8 */
- 32, 31, 31, 32, 32, 34, 36, 39, 44, 48, 53, 58, 65, 71, 79, 82, 31, 32,
- 32, 32, 33, 34, 34, 37, 41, 45, 49, 54, 60, 65, 72, 75, 32, 32, 33, 34,
- 35, 37, 38, 40, 43, 46, 50, 54, 58, 63, 70, 72, 36, 35, 34, 36, 38, 42,
- 48, 50, 53, 56, 60, 63, 68, 73, 79, 81, 44, 42, 41, 42, 42, 48, 54, 58,
- 63, 67, 71, 75, 79, 84, 90, 92, 53, 51, 49, 50, 49, 54, 60, 65, 71, 76,
- 82, 87, 92, 97, 104, 106, 65, 62, 59, 59, 58, 63, 68, 73, 79, 85, 92,
- 98, 105, 111, 118, 121, 79, 75, 72, 71, 69, 73, 78, 84, 90, 96, 103,
- 110, 118, 125, 133, 136,
- /* Size 16x32 */
- 32, 31, 31, 32, 32, 36, 36, 44, 44, 53, 53, 65, 65, 79, 79, 87, 31, 32,
- 32, 32, 32, 35, 35, 42, 42, 51, 51, 62, 62, 75, 75, 82, 31, 32, 32, 32,
- 32, 35, 35, 42, 42, 51, 51, 62, 62, 75, 75, 82, 31, 32, 32, 33, 33, 34,
- 34, 41, 41, 49, 49, 59, 59, 72, 72, 78, 31, 32, 32, 33, 33, 34, 34, 41,
- 41, 49, 49, 59, 59, 72, 72, 78, 32, 32, 32, 34, 34, 36, 36, 42, 42, 50,
- 50, 59, 59, 71, 71, 77, 32, 32, 32, 34, 34, 36, 36, 42, 42, 50, 50, 59,
- 59, 71, 71, 77, 32, 33, 33, 35, 35, 38, 38, 42, 42, 49, 49, 58, 58, 69,
- 69, 75, 32, 33, 33, 35, 35, 38, 38, 42, 42, 49, 49, 58, 58, 69, 69, 75,
- 34, 34, 34, 37, 37, 42, 42, 48, 48, 54, 54, 63, 63, 73, 73, 79, 34, 34,
- 34, 37, 37, 42, 42, 48, 48, 54, 54, 63, 63, 73, 73, 79, 36, 34, 34, 38,
- 38, 48, 48, 54, 54, 60, 60, 68, 68, 78, 78, 84, 36, 34, 34, 38, 38, 48,
- 48, 54, 54, 60, 60, 68, 68, 78, 78, 84, 39, 37, 37, 40, 40, 50, 50, 58,
- 58, 65, 65, 73, 73, 84, 84, 89, 39, 37, 37, 40, 40, 50, 50, 58, 58, 65,
- 65, 73, 73, 84, 84, 89, 44, 41, 41, 43, 43, 53, 53, 63, 63, 71, 71, 79,
- 79, 90, 90, 95, 44, 41, 41, 43, 43, 53, 53, 63, 63, 71, 71, 79, 79, 90,
- 90, 95, 48, 45, 45, 46, 46, 56, 56, 67, 67, 76, 76, 85, 85, 96, 96, 102,
- 48, 45, 45, 46, 46, 56, 56, 67, 67, 76, 76, 85, 85, 96, 96, 102, 53, 49,
- 49, 50, 50, 60, 60, 71, 71, 82, 82, 92, 92, 103, 103, 109, 53, 49, 49,
- 50, 50, 60, 60, 71, 71, 82, 82, 92, 92, 103, 103, 109, 58, 54, 54, 54,
- 54, 63, 63, 75, 75, 87, 87, 98, 98, 110, 110, 116, 58, 54, 54, 54, 54,
- 63, 63, 75, 75, 87, 87, 98, 98, 110, 110, 116, 65, 60, 60, 58, 58, 68,
- 68, 79, 79, 92, 92, 105, 105, 118, 118, 124, 65, 60, 60, 58, 58, 68, 68,
- 79, 79, 92, 92, 105, 105, 118, 118, 124, 71, 65, 65, 63, 63, 73, 73, 84,
- 84, 97, 97, 111, 111, 125, 125, 132, 71, 65, 65, 63, 63, 73, 73, 84, 84,
- 97, 97, 111, 111, 125, 125, 132, 79, 72, 72, 70, 70, 79, 79, 90, 90,
- 104, 104, 118, 118, 133, 133, 141, 79, 72, 72, 70, 70, 79, 79, 90, 90,
- 104, 104, 118, 118, 133, 133, 141, 82, 75, 75, 72, 72, 81, 81, 92, 92,
- 106, 106, 121, 121, 136, 136, 144, 82, 75, 75, 72, 72, 81, 81, 92, 92,
- 106, 106, 121, 121, 136, 136, 144, 87, 79, 79, 76, 76, 84, 84, 96, 96,
- 109, 109, 124, 124, 141, 141, 149,
- /* Size 32x16 */
- 32, 31, 31, 31, 31, 32, 32, 32, 32, 34, 34, 36, 36, 39, 39, 44, 44, 48,
- 48, 53, 53, 58, 58, 65, 65, 71, 71, 79, 79, 82, 82, 87, 31, 32, 32, 32,
- 32, 32, 32, 33, 33, 34, 34, 34, 34, 37, 37, 41, 41, 45, 45, 49, 49, 54,
- 54, 60, 60, 65, 65, 72, 72, 75, 75, 79, 31, 32, 32, 32, 32, 32, 32, 33,
- 33, 34, 34, 34, 34, 37, 37, 41, 41, 45, 45, 49, 49, 54, 54, 60, 60, 65,
- 65, 72, 72, 75, 75, 79, 32, 32, 32, 33, 33, 34, 34, 35, 35, 37, 37, 38,
- 38, 40, 40, 43, 43, 46, 46, 50, 50, 54, 54, 58, 58, 63, 63, 70, 70, 72,
- 72, 76, 32, 32, 32, 33, 33, 34, 34, 35, 35, 37, 37, 38, 38, 40, 40, 43,
- 43, 46, 46, 50, 50, 54, 54, 58, 58, 63, 63, 70, 70, 72, 72, 76, 36, 35,
- 35, 34, 34, 36, 36, 38, 38, 42, 42, 48, 48, 50, 50, 53, 53, 56, 56, 60,
- 60, 63, 63, 68, 68, 73, 73, 79, 79, 81, 81, 84, 36, 35, 35, 34, 34, 36,
- 36, 38, 38, 42, 42, 48, 48, 50, 50, 53, 53, 56, 56, 60, 60, 63, 63, 68,
- 68, 73, 73, 79, 79, 81, 81, 84, 44, 42, 42, 41, 41, 42, 42, 42, 42, 48,
- 48, 54, 54, 58, 58, 63, 63, 67, 67, 71, 71, 75, 75, 79, 79, 84, 84, 90,
- 90, 92, 92, 96, 44, 42, 42, 41, 41, 42, 42, 42, 42, 48, 48, 54, 54, 58,
- 58, 63, 63, 67, 67, 71, 71, 75, 75, 79, 79, 84, 84, 90, 90, 92, 92, 96,
- 53, 51, 51, 49, 49, 50, 50, 49, 49, 54, 54, 60, 60, 65, 65, 71, 71, 76,
- 76, 82, 82, 87, 87, 92, 92, 97, 97, 104, 104, 106, 106, 109, 53, 51, 51,
- 49, 49, 50, 50, 49, 49, 54, 54, 60, 60, 65, 65, 71, 71, 76, 76, 82, 82,
- 87, 87, 92, 92, 97, 97, 104, 104, 106, 106, 109, 65, 62, 62, 59, 59, 59,
- 59, 58, 58, 63, 63, 68, 68, 73, 73, 79, 79, 85, 85, 92, 92, 98, 98, 105,
- 105, 111, 111, 118, 118, 121, 121, 124, 65, 62, 62, 59, 59, 59, 59, 58,
- 58, 63, 63, 68, 68, 73, 73, 79, 79, 85, 85, 92, 92, 98, 98, 105, 105,
- 111, 111, 118, 118, 121, 121, 124, 79, 75, 75, 72, 72, 71, 71, 69, 69,
- 73, 73, 78, 78, 84, 84, 90, 90, 96, 96, 103, 103, 110, 110, 118, 118,
- 125, 125, 133, 133, 136, 136, 141, 79, 75, 75, 72, 72, 71, 71, 69, 69,
- 73, 73, 78, 78, 84, 84, 90, 90, 96, 96, 103, 103, 110, 110, 118, 118,
- 125, 125, 133, 133, 136, 136, 141, 87, 82, 82, 78, 78, 77, 77, 75, 75,
- 79, 79, 84, 84, 89, 89, 95, 95, 102, 102, 109, 109, 116, 116, 124, 124,
- 132, 132, 141, 141, 144, 144, 149,
- /* Size 4x16 */
- 31, 36, 53, 79, 32, 35, 51, 75, 32, 34, 49, 72, 32, 36, 50, 71, 33, 38,
- 49, 69, 34, 42, 54, 73, 34, 48, 60, 78, 37, 50, 65, 84, 41, 53, 71, 90,
- 45, 56, 76, 96, 49, 60, 82, 103, 54, 63, 87, 110, 60, 68, 92, 118, 65,
- 73, 97, 125, 72, 79, 104, 133, 75, 81, 106, 136,
- /* Size 16x4 */
- 31, 32, 32, 32, 33, 34, 34, 37, 41, 45, 49, 54, 60, 65, 72, 75, 36, 35,
- 34, 36, 38, 42, 48, 50, 53, 56, 60, 63, 68, 73, 79, 81, 53, 51, 49, 50,
- 49, 54, 60, 65, 71, 76, 82, 87, 92, 97, 104, 106, 79, 75, 72, 71, 69,
- 73, 78, 84, 90, 96, 103, 110, 118, 125, 133, 136,
- /* Size 8x32 */
- 32, 31, 32, 36, 44, 53, 65, 79, 31, 32, 32, 35, 42, 51, 62, 75, 31, 32,
- 32, 35, 42, 51, 62, 75, 31, 32, 33, 34, 41, 49, 59, 72, 31, 32, 33, 34,
- 41, 49, 59, 72, 32, 32, 34, 36, 42, 50, 59, 71, 32, 32, 34, 36, 42, 50,
- 59, 71, 32, 33, 35, 38, 42, 49, 58, 69, 32, 33, 35, 38, 42, 49, 58, 69,
- 34, 34, 37, 42, 48, 54, 63, 73, 34, 34, 37, 42, 48, 54, 63, 73, 36, 34,
- 38, 48, 54, 60, 68, 78, 36, 34, 38, 48, 54, 60, 68, 78, 39, 37, 40, 50,
- 58, 65, 73, 84, 39, 37, 40, 50, 58, 65, 73, 84, 44, 41, 43, 53, 63, 71,
- 79, 90, 44, 41, 43, 53, 63, 71, 79, 90, 48, 45, 46, 56, 67, 76, 85, 96,
- 48, 45, 46, 56, 67, 76, 85, 96, 53, 49, 50, 60, 71, 82, 92, 103, 53, 49,
- 50, 60, 71, 82, 92, 103, 58, 54, 54, 63, 75, 87, 98, 110, 58, 54, 54,
- 63, 75, 87, 98, 110, 65, 60, 58, 68, 79, 92, 105, 118, 65, 60, 58, 68,
- 79, 92, 105, 118, 71, 65, 63, 73, 84, 97, 111, 125, 71, 65, 63, 73, 84,
- 97, 111, 125, 79, 72, 70, 79, 90, 104, 118, 133, 79, 72, 70, 79, 90,
- 104, 118, 133, 82, 75, 72, 81, 92, 106, 121, 136, 82, 75, 72, 81, 92,
- 106, 121, 136, 87, 79, 76, 84, 96, 109, 124, 141,
- /* Size 32x8 */
- 32, 31, 31, 31, 31, 32, 32, 32, 32, 34, 34, 36, 36, 39, 39, 44, 44, 48,
- 48, 53, 53, 58, 58, 65, 65, 71, 71, 79, 79, 82, 82, 87, 31, 32, 32, 32,
- 32, 32, 32, 33, 33, 34, 34, 34, 34, 37, 37, 41, 41, 45, 45, 49, 49, 54,
- 54, 60, 60, 65, 65, 72, 72, 75, 75, 79, 32, 32, 32, 33, 33, 34, 34, 35,
- 35, 37, 37, 38, 38, 40, 40, 43, 43, 46, 46, 50, 50, 54, 54, 58, 58, 63,
- 63, 70, 70, 72, 72, 76, 36, 35, 35, 34, 34, 36, 36, 38, 38, 42, 42, 48,
- 48, 50, 50, 53, 53, 56, 56, 60, 60, 63, 63, 68, 68, 73, 73, 79, 79, 81,
- 81, 84, 44, 42, 42, 41, 41, 42, 42, 42, 42, 48, 48, 54, 54, 58, 58, 63,
- 63, 67, 67, 71, 71, 75, 75, 79, 79, 84, 84, 90, 90, 92, 92, 96, 53, 51,
- 51, 49, 49, 50, 50, 49, 49, 54, 54, 60, 60, 65, 65, 71, 71, 76, 76, 82,
- 82, 87, 87, 92, 92, 97, 97, 104, 104, 106, 106, 109, 65, 62, 62, 59, 59,
- 59, 59, 58, 58, 63, 63, 68, 68, 73, 73, 79, 79, 85, 85, 92, 92, 98, 98,
- 105, 105, 111, 111, 118, 118, 121, 121, 124, 79, 75, 75, 72, 72, 71, 71,
- 69, 69, 73, 73, 78, 78, 84, 84, 90, 90, 96, 96, 103, 103, 110, 110, 118,
- 118, 125, 125, 133, 133, 136, 136, 141 },
- { /* Chroma */
- /* Size 4x4 */
- 32, 46, 47, 57, 46, 53, 54, 60, 47, 54, 66, 75, 57, 60, 75, 89,
- /* Size 8x8 */
- 31, 34, 42, 47, 48, 52, 57, 61, 34, 39, 45, 46, 46, 49, 53, 57, 42, 45,
- 48, 49, 50, 52, 55, 58, 47, 46, 49, 54, 56, 58, 61, 64, 48, 46, 50, 56,
- 61, 65, 68, 71, 52, 49, 52, 58, 65, 71, 75, 79, 57, 53, 55, 61, 68, 75,
- 82, 86, 61, 57, 58, 64, 71, 79, 86, 91,
- /* Size 16x16 */
- 32, 31, 30, 33, 36, 41, 49, 48, 49, 50, 52, 54, 57, 60, 63, 65, 31, 31,
- 31, 34, 38, 42, 47, 47, 47, 48, 50, 52, 54, 57, 60, 61, 30, 31, 32, 35,
- 40, 42, 46, 45, 45, 46, 47, 49, 52, 54, 57, 58, 33, 34, 35, 39, 43, 45,
- 47, 46, 45, 46, 47, 49, 51, 53, 56, 57, 36, 38, 40, 43, 47, 47, 48, 46,
- 45, 46, 47, 48, 50, 52, 54, 55, 41, 42, 42, 45, 47, 48, 50, 49, 49, 50,
- 50, 52, 53, 55, 57, 58, 49, 47, 46, 47, 48, 50, 53, 53, 53, 54, 54, 55,
- 56, 58, 60, 61, 48, 47, 45, 46, 46, 49, 53, 54, 55, 56, 57, 58, 60, 61,
- 63, 64, 49, 47, 45, 45, 45, 49, 53, 55, 58, 60, 61, 62, 63, 65, 67, 68,
- 50, 48, 46, 46, 46, 50, 54, 56, 60, 61, 63, 65, 67, 68, 71, 71, 52, 50,
- 47, 47, 47, 50, 54, 57, 61, 63, 66, 68, 70, 72, 75, 75, 54, 52, 49, 49,
- 48, 52, 55, 58, 62, 65, 68, 71, 73, 75, 78, 79, 57, 54, 52, 51, 50, 53,
- 56, 60, 63, 67, 70, 73, 76, 79, 82, 83, 60, 57, 54, 53, 52, 55, 58, 61,
- 65, 68, 72, 75, 79, 82, 85, 86, 63, 60, 57, 56, 54, 57, 60, 63, 67, 71,
- 75, 78, 82, 85, 89, 90, 65, 61, 58, 57, 55, 58, 61, 64, 68, 71, 75, 79,
- 83, 86, 90, 91,
- /* Size 32x32 */
- 32, 31, 31, 30, 30, 33, 33, 36, 36, 41, 41, 49, 49, 48, 48, 49, 49, 50,
- 50, 52, 52, 54, 54, 57, 57, 60, 60, 63, 63, 65, 65, 67, 31, 31, 31, 31,
- 31, 34, 34, 38, 38, 42, 42, 47, 47, 47, 47, 47, 47, 48, 48, 50, 50, 52,
- 52, 54, 54, 57, 57, 60, 60, 61, 61, 63, 31, 31, 31, 31, 31, 34, 34, 38,
- 38, 42, 42, 47, 47, 47, 47, 47, 47, 48, 48, 50, 50, 52, 52, 54, 54, 57,
- 57, 60, 60, 61, 61, 63, 30, 31, 31, 32, 32, 35, 35, 40, 40, 42, 42, 46,
- 46, 45, 45, 45, 45, 46, 46, 47, 47, 49, 49, 52, 52, 54, 54, 57, 57, 58,
- 58, 60, 30, 31, 31, 32, 32, 35, 35, 40, 40, 42, 42, 46, 46, 45, 45, 45,
- 45, 46, 46, 47, 47, 49, 49, 52, 52, 54, 54, 57, 57, 58, 58, 60, 33, 34,
- 34, 35, 35, 39, 39, 43, 43, 45, 45, 47, 47, 46, 46, 45, 45, 46, 46, 47,
- 47, 49, 49, 51, 51, 53, 53, 56, 56, 57, 57, 59, 33, 34, 34, 35, 35, 39,
- 39, 43, 43, 45, 45, 47, 47, 46, 46, 45, 45, 46, 46, 47, 47, 49, 49, 51,
- 51, 53, 53, 56, 56, 57, 57, 59, 36, 38, 38, 40, 40, 43, 43, 47, 47, 47,
- 47, 48, 48, 46, 46, 45, 45, 46, 46, 47, 47, 48, 48, 50, 50, 52, 52, 54,
- 54, 55, 55, 57, 36, 38, 38, 40, 40, 43, 43, 47, 47, 47, 47, 48, 48, 46,
- 46, 45, 45, 46, 46, 47, 47, 48, 48, 50, 50, 52, 52, 54, 54, 55, 55, 57,
- 41, 42, 42, 42, 42, 45, 45, 47, 47, 48, 48, 50, 50, 49, 49, 49, 49, 50,
- 50, 50, 50, 52, 52, 53, 53, 55, 55, 57, 57, 58, 58, 60, 41, 42, 42, 42,
- 42, 45, 45, 47, 47, 48, 48, 50, 50, 49, 49, 49, 49, 50, 50, 50, 50, 52,
- 52, 53, 53, 55, 55, 57, 57, 58, 58, 60, 49, 47, 47, 46, 46, 47, 47, 48,
- 48, 50, 50, 53, 53, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 56, 56, 58,
- 58, 60, 60, 61, 61, 62, 49, 47, 47, 46, 46, 47, 47, 48, 48, 50, 50, 53,
- 53, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 56, 56, 58, 58, 60, 60, 61,
- 61, 62, 48, 47, 47, 45, 45, 46, 46, 46, 46, 49, 49, 53, 53, 54, 54, 55,
- 55, 56, 56, 57, 57, 58, 58, 60, 60, 61, 61, 63, 63, 64, 64, 66, 48, 47,
- 47, 45, 45, 46, 46, 46, 46, 49, 49, 53, 53, 54, 54, 55, 55, 56, 56, 57,
- 57, 58, 58, 60, 60, 61, 61, 63, 63, 64, 64, 66, 49, 47, 47, 45, 45, 45,
- 45, 45, 45, 49, 49, 53, 53, 55, 55, 58, 58, 60, 60, 61, 61, 62, 62, 63,
- 63, 65, 65, 67, 67, 68, 68, 69, 49, 47, 47, 45, 45, 45, 45, 45, 45, 49,
- 49, 53, 53, 55, 55, 58, 58, 60, 60, 61, 61, 62, 62, 63, 63, 65, 65, 67,
- 67, 68, 68, 69, 50, 48, 48, 46, 46, 46, 46, 46, 46, 50, 50, 54, 54, 56,
- 56, 60, 60, 61, 61, 63, 63, 65, 65, 67, 67, 68, 68, 71, 71, 71, 71, 72,
- 50, 48, 48, 46, 46, 46, 46, 46, 46, 50, 50, 54, 54, 56, 56, 60, 60, 61,
- 61, 63, 63, 65, 65, 67, 67, 68, 68, 71, 71, 71, 71, 72, 52, 50, 50, 47,
- 47, 47, 47, 47, 47, 50, 50, 54, 54, 57, 57, 61, 61, 63, 63, 66, 66, 68,
- 68, 70, 70, 72, 72, 75, 75, 75, 75, 76, 52, 50, 50, 47, 47, 47, 47, 47,
- 47, 50, 50, 54, 54, 57, 57, 61, 61, 63, 63, 66, 66, 68, 68, 70, 70, 72,
- 72, 75, 75, 75, 75, 76, 54, 52, 52, 49, 49, 49, 49, 48, 48, 52, 52, 55,
- 55, 58, 58, 62, 62, 65, 65, 68, 68, 71, 71, 73, 73, 75, 75, 78, 78, 79,
- 79, 80, 54, 52, 52, 49, 49, 49, 49, 48, 48, 52, 52, 55, 55, 58, 58, 62,
- 62, 65, 65, 68, 68, 71, 71, 73, 73, 75, 75, 78, 78, 79, 79, 80, 57, 54,
- 54, 52, 52, 51, 51, 50, 50, 53, 53, 56, 56, 60, 60, 63, 63, 67, 67, 70,
- 70, 73, 73, 76, 76, 79, 79, 82, 82, 83, 83, 84, 57, 54, 54, 52, 52, 51,
- 51, 50, 50, 53, 53, 56, 56, 60, 60, 63, 63, 67, 67, 70, 70, 73, 73, 76,
- 76, 79, 79, 82, 82, 83, 83, 84, 60, 57, 57, 54, 54, 53, 53, 52, 52, 55,
- 55, 58, 58, 61, 61, 65, 65, 68, 68, 72, 72, 75, 75, 79, 79, 82, 82, 85,
- 85, 86, 86, 88, 60, 57, 57, 54, 54, 53, 53, 52, 52, 55, 55, 58, 58, 61,
- 61, 65, 65, 68, 68, 72, 72, 75, 75, 79, 79, 82, 82, 85, 85, 86, 86, 88,
- 63, 60, 60, 57, 57, 56, 56, 54, 54, 57, 57, 60, 60, 63, 63, 67, 67, 71,
- 71, 75, 75, 78, 78, 82, 82, 85, 85, 89, 89, 90, 90, 92, 63, 60, 60, 57,
- 57, 56, 56, 54, 54, 57, 57, 60, 60, 63, 63, 67, 67, 71, 71, 75, 75, 78,
- 78, 82, 82, 85, 85, 89, 89, 90, 90, 92, 65, 61, 61, 58, 58, 57, 57, 55,
- 55, 58, 58, 61, 61, 64, 64, 68, 68, 71, 71, 75, 75, 79, 79, 83, 83, 86,
- 86, 90, 90, 91, 91, 93, 65, 61, 61, 58, 58, 57, 57, 55, 55, 58, 58, 61,
- 61, 64, 64, 68, 68, 71, 71, 75, 75, 79, 79, 83, 83, 86, 86, 90, 90, 91,
- 91, 93, 67, 63, 63, 60, 60, 59, 59, 57, 57, 60, 60, 62, 62, 66, 66, 69,
- 69, 72, 72, 76, 76, 80, 80, 84, 84, 88, 88, 92, 92, 93, 93, 95,
- /* Size 4x8 */
- 31, 47, 50, 60, 36, 47, 47, 56, 43, 50, 50, 57, 46, 53, 57, 64, 46, 54,
- 64, 71, 50, 55, 68, 78, 54, 58, 72, 85, 59, 61, 75, 90,
- /* Size 8x4 */
- 31, 36, 43, 46, 46, 50, 54, 59, 47, 47, 50, 53, 54, 55, 58, 61, 50, 47,
- 50, 57, 64, 68, 72, 75, 60, 56, 57, 64, 71, 78, 85, 90,
- /* Size 8x16 */
- 32, 31, 37, 48, 49, 52, 57, 63, 31, 31, 38, 47, 47, 50, 54, 60, 30, 32,
- 40, 46, 45, 48, 52, 57, 33, 36, 43, 47, 46, 47, 51, 56, 37, 40, 47, 47,
- 45, 47, 50, 54, 42, 43, 47, 50, 49, 50, 53, 57, 49, 46, 48, 53, 53, 54,
- 57, 60, 48, 46, 47, 53, 56, 57, 60, 64, 49, 45, 46, 53, 58, 61, 64, 67,
- 50, 46, 46, 54, 59, 64, 67, 71, 52, 48, 47, 54, 61, 66, 71, 75, 54, 50,
- 49, 55, 62, 68, 73, 78, 57, 52, 50, 56, 64, 70, 76, 82, 60, 54, 52, 58,
- 65, 72, 79, 85, 63, 57, 55, 60, 67, 75, 82, 89, 64, 59, 56, 61, 68, 75,
- 83, 90,
- /* Size 16x8 */
- 32, 31, 30, 33, 37, 42, 49, 48, 49, 50, 52, 54, 57, 60, 63, 64, 31, 31,
- 32, 36, 40, 43, 46, 46, 45, 46, 48, 50, 52, 54, 57, 59, 37, 38, 40, 43,
- 47, 47, 48, 47, 46, 46, 47, 49, 50, 52, 55, 56, 48, 47, 46, 47, 47, 50,
- 53, 53, 53, 54, 54, 55, 56, 58, 60, 61, 49, 47, 45, 46, 45, 49, 53, 56,
- 58, 59, 61, 62, 64, 65, 67, 68, 52, 50, 48, 47, 47, 50, 54, 57, 61, 64,
- 66, 68, 70, 72, 75, 75, 57, 54, 52, 51, 50, 53, 57, 60, 64, 67, 71, 73,
- 76, 79, 82, 83, 63, 60, 57, 56, 54, 57, 60, 64, 67, 71, 75, 78, 82, 85,
- 89, 90,
- /* Size 16x32 */
- 32, 31, 31, 37, 37, 48, 48, 49, 49, 52, 52, 57, 57, 63, 63, 66, 31, 31,
- 31, 38, 38, 47, 47, 47, 47, 50, 50, 54, 54, 60, 60, 63, 31, 31, 31, 38,
- 38, 47, 47, 47, 47, 50, 50, 54, 54, 60, 60, 63, 30, 32, 32, 40, 40, 46,
- 46, 45, 45, 48, 48, 52, 52, 57, 57, 60, 30, 32, 32, 40, 40, 46, 46, 45,
- 45, 48, 48, 52, 52, 57, 57, 60, 33, 36, 36, 43, 43, 47, 47, 46, 46, 47,
- 47, 51, 51, 56, 56, 59, 33, 36, 36, 43, 43, 47, 47, 46, 46, 47, 47, 51,
- 51, 56, 56, 59, 37, 40, 40, 47, 47, 47, 47, 45, 45, 47, 47, 50, 50, 54,
- 54, 57, 37, 40, 40, 47, 47, 47, 47, 45, 45, 47, 47, 50, 50, 54, 54, 57,
- 42, 43, 43, 47, 47, 50, 50, 49, 49, 50, 50, 53, 53, 57, 57, 60, 42, 43,
- 43, 47, 47, 50, 50, 49, 49, 50, 50, 53, 53, 57, 57, 60, 49, 46, 46, 48,
- 48, 53, 53, 53, 53, 54, 54, 57, 57, 60, 60, 62, 49, 46, 46, 48, 48, 53,
- 53, 53, 53, 54, 54, 57, 57, 60, 60, 62, 48, 46, 46, 47, 47, 53, 53, 56,
- 56, 57, 57, 60, 60, 64, 64, 66, 48, 46, 46, 47, 47, 53, 53, 56, 56, 57,
- 57, 60, 60, 64, 64, 66, 49, 45, 45, 46, 46, 53, 53, 58, 58, 61, 61, 64,
- 64, 67, 67, 69, 49, 45, 45, 46, 46, 53, 53, 58, 58, 61, 61, 64, 64, 67,
- 67, 69, 50, 46, 46, 46, 46, 54, 54, 59, 59, 64, 64, 67, 67, 71, 71, 73,
- 50, 46, 46, 46, 46, 54, 54, 59, 59, 64, 64, 67, 67, 71, 71, 73, 52, 48,
- 48, 47, 47, 54, 54, 61, 61, 66, 66, 71, 71, 75, 75, 77, 52, 48, 48, 47,
- 47, 54, 54, 61, 61, 66, 66, 71, 71, 75, 75, 77, 54, 50, 50, 49, 49, 55,
- 55, 62, 62, 68, 68, 73, 73, 78, 78, 80, 54, 50, 50, 49, 49, 55, 55, 62,
- 62, 68, 68, 73, 73, 78, 78, 80, 57, 52, 52, 50, 50, 56, 56, 64, 64, 70,
- 70, 76, 76, 82, 82, 84, 57, 52, 52, 50, 50, 56, 56, 64, 64, 70, 70, 76,
- 76, 82, 82, 84, 60, 54, 54, 52, 52, 58, 58, 65, 65, 72, 72, 79, 79, 85,
- 85, 88, 60, 54, 54, 52, 52, 58, 58, 65, 65, 72, 72, 79, 79, 85, 85, 88,
- 63, 57, 57, 55, 55, 60, 60, 67, 67, 75, 75, 82, 82, 89, 89, 92, 63, 57,
- 57, 55, 55, 60, 60, 67, 67, 75, 75, 82, 82, 89, 89, 92, 64, 59, 59, 56,
- 56, 61, 61, 68, 68, 75, 75, 83, 83, 90, 90, 93, 64, 59, 59, 56, 56, 61,
- 61, 68, 68, 75, 75, 83, 83, 90, 90, 93, 66, 60, 60, 57, 57, 63, 63, 69,
- 69, 77, 77, 84, 84, 92, 92, 95,
- /* Size 32x16 */
- 32, 31, 31, 30, 30, 33, 33, 37, 37, 42, 42, 49, 49, 48, 48, 49, 49, 50,
- 50, 52, 52, 54, 54, 57, 57, 60, 60, 63, 63, 64, 64, 66, 31, 31, 31, 32,
- 32, 36, 36, 40, 40, 43, 43, 46, 46, 46, 46, 45, 45, 46, 46, 48, 48, 50,
- 50, 52, 52, 54, 54, 57, 57, 59, 59, 60, 31, 31, 31, 32, 32, 36, 36, 40,
- 40, 43, 43, 46, 46, 46, 46, 45, 45, 46, 46, 48, 48, 50, 50, 52, 52, 54,
- 54, 57, 57, 59, 59, 60, 37, 38, 38, 40, 40, 43, 43, 47, 47, 47, 47, 48,
- 48, 47, 47, 46, 46, 46, 46, 47, 47, 49, 49, 50, 50, 52, 52, 55, 55, 56,
- 56, 57, 37, 38, 38, 40, 40, 43, 43, 47, 47, 47, 47, 48, 48, 47, 47, 46,
- 46, 46, 46, 47, 47, 49, 49, 50, 50, 52, 52, 55, 55, 56, 56, 57, 48, 47,
- 47, 46, 46, 47, 47, 47, 47, 50, 50, 53, 53, 53, 53, 53, 53, 54, 54, 54,
- 54, 55, 55, 56, 56, 58, 58, 60, 60, 61, 61, 63, 48, 47, 47, 46, 46, 47,
- 47, 47, 47, 50, 50, 53, 53, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 56,
- 56, 58, 58, 60, 60, 61, 61, 63, 49, 47, 47, 45, 45, 46, 46, 45, 45, 49,
- 49, 53, 53, 56, 56, 58, 58, 59, 59, 61, 61, 62, 62, 64, 64, 65, 65, 67,
- 67, 68, 68, 69, 49, 47, 47, 45, 45, 46, 46, 45, 45, 49, 49, 53, 53, 56,
- 56, 58, 58, 59, 59, 61, 61, 62, 62, 64, 64, 65, 65, 67, 67, 68, 68, 69,
- 52, 50, 50, 48, 48, 47, 47, 47, 47, 50, 50, 54, 54, 57, 57, 61, 61, 64,
- 64, 66, 66, 68, 68, 70, 70, 72, 72, 75, 75, 75, 75, 77, 52, 50, 50, 48,
- 48, 47, 47, 47, 47, 50, 50, 54, 54, 57, 57, 61, 61, 64, 64, 66, 66, 68,
- 68, 70, 70, 72, 72, 75, 75, 75, 75, 77, 57, 54, 54, 52, 52, 51, 51, 50,
- 50, 53, 53, 57, 57, 60, 60, 64, 64, 67, 67, 71, 71, 73, 73, 76, 76, 79,
- 79, 82, 82, 83, 83, 84, 57, 54, 54, 52, 52, 51, 51, 50, 50, 53, 53, 57,
- 57, 60, 60, 64, 64, 67, 67, 71, 71, 73, 73, 76, 76, 79, 79, 82, 82, 83,
- 83, 84, 63, 60, 60, 57, 57, 56, 56, 54, 54, 57, 57, 60, 60, 64, 64, 67,
- 67, 71, 71, 75, 75, 78, 78, 82, 82, 85, 85, 89, 89, 90, 90, 92, 63, 60,
- 60, 57, 57, 56, 56, 54, 54, 57, 57, 60, 60, 64, 64, 67, 67, 71, 71, 75,
- 75, 78, 78, 82, 82, 85, 85, 89, 89, 90, 90, 92, 66, 63, 63, 60, 60, 59,
- 59, 57, 57, 60, 60, 62, 62, 66, 66, 69, 69, 73, 73, 77, 77, 80, 80, 84,
- 84, 88, 88, 92, 92, 93, 93, 95,
- /* Size 4x16 */
- 31, 48, 52, 63, 31, 47, 50, 60, 32, 46, 48, 57, 36, 47, 47, 56, 40, 47,
- 47, 54, 43, 50, 50, 57, 46, 53, 54, 60, 46, 53, 57, 64, 45, 53, 61, 67,
- 46, 54, 64, 71, 48, 54, 66, 75, 50, 55, 68, 78, 52, 56, 70, 82, 54, 58,
- 72, 85, 57, 60, 75, 89, 59, 61, 75, 90,
- /* Size 16x4 */
- 31, 31, 32, 36, 40, 43, 46, 46, 45, 46, 48, 50, 52, 54, 57, 59, 48, 47,
- 46, 47, 47, 50, 53, 53, 53, 54, 54, 55, 56, 58, 60, 61, 52, 50, 48, 47,
- 47, 50, 54, 57, 61, 64, 66, 68, 70, 72, 75, 75, 63, 60, 57, 56, 54, 57,
- 60, 64, 67, 71, 75, 78, 82, 85, 89, 90,
- /* Size 8x32 */
- 32, 31, 37, 48, 49, 52, 57, 63, 31, 31, 38, 47, 47, 50, 54, 60, 31, 31,
- 38, 47, 47, 50, 54, 60, 30, 32, 40, 46, 45, 48, 52, 57, 30, 32, 40, 46,
- 45, 48, 52, 57, 33, 36, 43, 47, 46, 47, 51, 56, 33, 36, 43, 47, 46, 47,
- 51, 56, 37, 40, 47, 47, 45, 47, 50, 54, 37, 40, 47, 47, 45, 47, 50, 54,
- 42, 43, 47, 50, 49, 50, 53, 57, 42, 43, 47, 50, 49, 50, 53, 57, 49, 46,
- 48, 53, 53, 54, 57, 60, 49, 46, 48, 53, 53, 54, 57, 60, 48, 46, 47, 53,
- 56, 57, 60, 64, 48, 46, 47, 53, 56, 57, 60, 64, 49, 45, 46, 53, 58, 61,
- 64, 67, 49, 45, 46, 53, 58, 61, 64, 67, 50, 46, 46, 54, 59, 64, 67, 71,
- 50, 46, 46, 54, 59, 64, 67, 71, 52, 48, 47, 54, 61, 66, 71, 75, 52, 48,
- 47, 54, 61, 66, 71, 75, 54, 50, 49, 55, 62, 68, 73, 78, 54, 50, 49, 55,
- 62, 68, 73, 78, 57, 52, 50, 56, 64, 70, 76, 82, 57, 52, 50, 56, 64, 70,
- 76, 82, 60, 54, 52, 58, 65, 72, 79, 85, 60, 54, 52, 58, 65, 72, 79, 85,
- 63, 57, 55, 60, 67, 75, 82, 89, 63, 57, 55, 60, 67, 75, 82, 89, 64, 59,
- 56, 61, 68, 75, 83, 90, 64, 59, 56, 61, 68, 75, 83, 90, 66, 60, 57, 63,
- 69, 77, 84, 92,
- /* Size 32x8 */
- 32, 31, 31, 30, 30, 33, 33, 37, 37, 42, 42, 49, 49, 48, 48, 49, 49, 50,
- 50, 52, 52, 54, 54, 57, 57, 60, 60, 63, 63, 64, 64, 66, 31, 31, 31, 32,
- 32, 36, 36, 40, 40, 43, 43, 46, 46, 46, 46, 45, 45, 46, 46, 48, 48, 50,
- 50, 52, 52, 54, 54, 57, 57, 59, 59, 60, 37, 38, 38, 40, 40, 43, 43, 47,
- 47, 47, 47, 48, 48, 47, 47, 46, 46, 46, 46, 47, 47, 49, 49, 50, 50, 52,
- 52, 55, 55, 56, 56, 57, 48, 47, 47, 46, 46, 47, 47, 47, 47, 50, 50, 53,
- 53, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 56, 56, 58, 58, 60, 60, 61,
- 61, 63, 49, 47, 47, 45, 45, 46, 46, 45, 45, 49, 49, 53, 53, 56, 56, 58,
- 58, 59, 59, 61, 61, 62, 62, 64, 64, 65, 65, 67, 67, 68, 68, 69, 52, 50,
- 50, 48, 48, 47, 47, 47, 47, 50, 50, 54, 54, 57, 57, 61, 61, 64, 64, 66,
- 66, 68, 68, 70, 70, 72, 72, 75, 75, 75, 75, 77, 57, 54, 54, 52, 52, 51,
- 51, 50, 50, 53, 53, 57, 57, 60, 60, 64, 64, 67, 67, 71, 71, 73, 73, 76,
- 76, 79, 79, 82, 82, 83, 83, 84, 63, 60, 60, 57, 57, 56, 56, 54, 54, 57,
- 57, 60, 60, 64, 64, 67, 67, 71, 71, 75, 75, 78, 78, 82, 82, 85, 85, 89,
- 89, 90, 90, 92 },
- },
- {
- { /* Luma */
- /* Size 4x4 */
- 32, 33, 45, 62, 33, 39, 51, 64, 45, 51, 71, 87, 62, 64, 87, 108,
- /* Size 8x8 */
- 31, 32, 32, 35, 42, 51, 59, 69, 32, 32, 33, 35, 41, 49, 56, 65, 32, 33,
- 35, 38, 43, 49, 56, 64, 35, 35, 38, 48, 54, 59, 66, 73, 42, 41, 43, 54,
- 63, 71, 77, 85, 51, 49, 49, 59, 71, 81, 89, 97, 59, 56, 56, 66, 77, 89,
- 98, 108, 69, 65, 64, 73, 85, 97, 108, 119,
- /* Size 16x16 */
- 32, 31, 31, 31, 32, 34, 35, 38, 41, 45, 48, 54, 59, 65, 71, 80, 31, 32,
- 32, 32, 32, 34, 35, 37, 40, 43, 46, 51, 56, 62, 68, 76, 31, 32, 32, 32,
- 32, 33, 34, 36, 38, 41, 44, 49, 54, 59, 65, 72, 31, 32, 32, 33, 34, 35,
- 36, 38, 40, 42, 45, 50, 54, 59, 64, 71, 32, 32, 32, 34, 35, 37, 38, 39,
- 41, 43, 46, 49, 53, 58, 63, 69, 34, 34, 33, 35, 37, 39, 42, 44, 46, 48,
- 51, 54, 58, 63, 68, 74, 35, 35, 34, 36, 38, 42, 46, 48, 50, 53, 55, 59,
- 62, 67, 72, 78, 38, 37, 36, 38, 39, 44, 48, 51, 54, 57, 59, 63, 67, 71,
- 76, 82, 41, 40, 38, 40, 41, 46, 50, 54, 57, 60, 63, 67, 71, 75, 80, 86,
- 45, 43, 41, 42, 43, 48, 53, 57, 60, 65, 68, 72, 76, 81, 85, 91, 48, 46,
- 44, 45, 46, 51, 55, 59, 63, 68, 71, 76, 80, 85, 90, 96, 54, 51, 49, 50,
- 49, 54, 59, 63, 67, 72, 76, 82, 87, 92, 97, 104, 59, 56, 54, 54, 53, 58,
- 62, 67, 71, 76, 80, 87, 92, 98, 103, 110, 65, 62, 59, 59, 58, 63, 67,
- 71, 75, 81, 85, 92, 98, 105, 111, 118, 71, 68, 65, 64, 63, 68, 72, 76,
- 80, 85, 90, 97, 103, 111, 117, 125, 80, 76, 72, 71, 69, 74, 78, 82, 86,
- 91, 96, 104, 110, 118, 125, 134,
- /* Size 32x32 */
- 32, 31, 31, 31, 31, 31, 31, 32, 32, 32, 34, 34, 35, 36, 38, 39, 41, 44,
- 45, 48, 48, 53, 54, 57, 59, 62, 65, 67, 71, 72, 80, 80, 31, 31, 32, 32,
- 32, 32, 32, 32, 32, 32, 34, 34, 35, 35, 37, 38, 40, 42, 43, 46, 46, 51,
- 52, 55, 56, 59, 62, 64, 68, 69, 76, 76, 31, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 34, 34, 35, 35, 37, 38, 40, 42, 43, 46, 46, 51, 51, 55, 56, 59,
- 62, 64, 68, 69, 76, 76, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33,
- 34, 34, 36, 38, 39, 41, 42, 45, 45, 49, 50, 53, 54, 57, 60, 62, 66, 66,
- 73, 73, 31, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 34, 34, 36, 37,
- 38, 41, 41, 44, 44, 49, 49, 52, 54, 56, 59, 61, 65, 65, 72, 72, 31, 32,
- 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 35, 35, 37, 38, 39, 41, 42, 45,
- 45, 49, 49, 52, 54, 56, 59, 61, 64, 65, 72, 72, 31, 32, 32, 32, 32, 33,
- 33, 33, 34, 34, 35, 35, 36, 36, 38, 39, 40, 42, 42, 45, 45, 49, 50, 52,
- 54, 56, 59, 60, 64, 65, 71, 71, 32, 32, 32, 32, 32, 33, 33, 34, 34, 34,
- 35, 35, 36, 37, 38, 39, 40, 42, 43, 45, 45, 49, 49, 52, 54, 56, 59, 60,
- 64, 64, 70, 70, 32, 32, 32, 32, 32, 33, 34, 34, 35, 35, 37, 37, 38, 38,
- 39, 40, 41, 42, 43, 46, 46, 49, 49, 52, 53, 55, 58, 59, 63, 63, 69, 69,
- 32, 32, 32, 32, 33, 33, 34, 34, 35, 35, 37, 37, 38, 38, 40, 41, 41, 43,
- 43, 46, 46, 49, 50, 52, 54, 56, 58, 60, 63, 64, 70, 70, 34, 34, 34, 33,
- 33, 34, 35, 35, 37, 37, 39, 39, 42, 42, 44, 45, 46, 47, 48, 51, 51, 54,
- 54, 57, 58, 60, 63, 64, 68, 68, 74, 74, 34, 34, 34, 33, 33, 34, 35, 35,
- 37, 37, 39, 39, 42, 42, 44, 45, 46, 47, 48, 51, 51, 54, 54, 57, 58, 60,
- 63, 64, 68, 68, 74, 74, 35, 35, 35, 34, 34, 35, 36, 36, 38, 38, 42, 42,
- 46, 47, 48, 49, 50, 52, 53, 55, 55, 58, 59, 61, 62, 64, 67, 68, 72, 72,
- 78, 78, 36, 35, 35, 34, 34, 35, 36, 37, 38, 38, 42, 42, 47, 48, 50, 50,
- 52, 54, 54, 57, 57, 59, 60, 62, 64, 66, 68, 69, 73, 73, 79, 79, 38, 37,
- 37, 36, 36, 37, 38, 38, 39, 40, 44, 44, 48, 50, 51, 52, 54, 56, 57, 59,
- 59, 62, 63, 65, 67, 69, 71, 72, 76, 76, 82, 82, 39, 38, 38, 38, 37, 38,
- 39, 39, 40, 41, 45, 45, 49, 50, 52, 54, 55, 58, 58, 61, 61, 64, 65, 67,
- 69, 71, 73, 74, 78, 78, 84, 84, 41, 40, 40, 39, 38, 39, 40, 40, 41, 41,
- 46, 46, 50, 52, 54, 55, 57, 60, 60, 63, 63, 67, 67, 70, 71, 73, 75, 77,
- 80, 81, 86, 86, 44, 42, 42, 41, 41, 41, 42, 42, 42, 43, 47, 47, 52, 54,
- 56, 58, 60, 63, 64, 67, 67, 71, 71, 74, 75, 77, 79, 81, 84, 85, 90, 90,
- 45, 43, 43, 42, 41, 42, 42, 43, 43, 43, 48, 48, 53, 54, 57, 58, 60, 64,
- 65, 68, 68, 72, 72, 75, 76, 78, 81, 82, 85, 86, 91, 91, 48, 46, 46, 45,
- 44, 45, 45, 45, 46, 46, 51, 51, 55, 57, 59, 61, 63, 67, 68, 71, 71, 75,
- 76, 79, 80, 83, 85, 87, 90, 91, 96, 96, 48, 46, 46, 45, 44, 45, 45, 45,
- 46, 46, 51, 51, 55, 57, 59, 61, 63, 67, 68, 71, 71, 75, 76, 79, 80, 83,
- 85, 87, 90, 91, 96, 96, 53, 51, 51, 49, 49, 49, 49, 49, 49, 49, 54, 54,
- 58, 59, 62, 64, 67, 71, 72, 75, 75, 81, 81, 85, 86, 89, 91, 93, 97, 97,
- 103, 103, 54, 52, 51, 50, 49, 49, 50, 49, 49, 50, 54, 54, 59, 60, 63,
- 65, 67, 71, 72, 76, 76, 81, 82, 85, 87, 89, 92, 94, 97, 98, 104, 104,
- 57, 55, 55, 53, 52, 52, 52, 52, 52, 52, 57, 57, 61, 62, 65, 67, 70, 74,
- 75, 79, 79, 85, 85, 89, 90, 93, 96, 98, 102, 102, 108, 108, 59, 56, 56,
- 54, 54, 54, 54, 54, 53, 54, 58, 58, 62, 64, 67, 69, 71, 75, 76, 80, 80,
- 86, 87, 90, 92, 95, 98, 99, 103, 104, 110, 110, 62, 59, 59, 57, 56, 56,
- 56, 56, 55, 56, 60, 60, 64, 66, 69, 71, 73, 77, 78, 83, 83, 89, 89, 93,
- 95, 98, 101, 103, 107, 108, 114, 114, 65, 62, 62, 60, 59, 59, 59, 59,
- 58, 58, 63, 63, 67, 68, 71, 73, 75, 79, 81, 85, 85, 91, 92, 96, 98, 101,
- 105, 106, 111, 111, 118, 118, 67, 64, 64, 62, 61, 61, 60, 60, 59, 60,
- 64, 64, 68, 69, 72, 74, 77, 81, 82, 87, 87, 93, 94, 98, 99, 103, 106,
- 108, 113, 113, 120, 120, 71, 68, 68, 66, 65, 64, 64, 64, 63, 63, 68, 68,
- 72, 73, 76, 78, 80, 84, 85, 90, 90, 97, 97, 102, 103, 107, 111, 113,
- 117, 118, 125, 125, 72, 69, 69, 66, 65, 65, 65, 64, 63, 64, 68, 68, 72,
- 73, 76, 78, 81, 85, 86, 91, 91, 97, 98, 102, 104, 108, 111, 113, 118,
- 119, 126, 126, 80, 76, 76, 73, 72, 72, 71, 70, 69, 70, 74, 74, 78, 79,
- 82, 84, 86, 90, 91, 96, 96, 103, 104, 108, 110, 114, 118, 120, 125, 126,
- 134, 134, 80, 76, 76, 73, 72, 72, 71, 70, 69, 70, 74, 74, 78, 79, 82,
- 84, 86, 90, 91, 96, 96, 103, 104, 108, 110, 114, 118, 120, 125, 126,
- 134, 134,
- /* Size 4x8 */
- 32, 34, 43, 62, 32, 34, 42, 59, 33, 37, 44, 58, 35, 43, 54, 68, 41, 48,
- 64, 79, 49, 54, 71, 91, 57, 60, 78, 101, 66, 68, 86, 111,
- /* Size 8x4 */
- 32, 32, 33, 35, 41, 49, 57, 66, 34, 34, 37, 43, 48, 54, 60, 68, 43, 42,
- 44, 54, 64, 71, 78, 86, 62, 59, 58, 68, 79, 91, 101, 111,
- /* Size 8x16 */
- 32, 31, 32, 36, 44, 53, 62, 73, 31, 32, 32, 35, 42, 51, 59, 69, 31, 32,
- 33, 34, 41, 49, 57, 66, 32, 32, 34, 36, 42, 50, 57, 65, 32, 33, 35, 38,
- 42, 49, 56, 64, 34, 34, 37, 42, 48, 54, 61, 69, 35, 34, 38, 47, 52, 59,
- 65, 73, 38, 36, 40, 49, 56, 63, 69, 77, 41, 39, 41, 51, 60, 67, 74, 81,
- 44, 42, 43, 54, 64, 72, 79, 86, 48, 45, 46, 56, 67, 76, 83, 91, 53, 49,
- 50, 60, 71, 82, 90, 99, 58, 54, 54, 63, 75, 87, 95, 105, 65, 60, 58, 68,
- 79, 92, 102, 112, 71, 65, 63, 73, 84, 97, 108, 119, 79, 72, 70, 79, 90,
- 104, 115, 127,
- /* Size 16x8 */
- 32, 31, 31, 32, 32, 34, 35, 38, 41, 44, 48, 53, 58, 65, 71, 79, 31, 32,
- 32, 32, 33, 34, 34, 36, 39, 42, 45, 49, 54, 60, 65, 72, 32, 32, 33, 34,
- 35, 37, 38, 40, 41, 43, 46, 50, 54, 58, 63, 70, 36, 35, 34, 36, 38, 42,
- 47, 49, 51, 54, 56, 60, 63, 68, 73, 79, 44, 42, 41, 42, 42, 48, 52, 56,
- 60, 64, 67, 71, 75, 79, 84, 90, 53, 51, 49, 50, 49, 54, 59, 63, 67, 72,
- 76, 82, 87, 92, 97, 104, 62, 59, 57, 57, 56, 61, 65, 69, 74, 79, 83, 90,
- 95, 102, 108, 115, 73, 69, 66, 65, 64, 69, 73, 77, 81, 86, 91, 99, 105,
- 112, 119, 127,
- /* Size 16x32 */
- 32, 31, 31, 32, 32, 34, 36, 38, 44, 44, 53, 53, 62, 65, 73, 79, 31, 32,
- 32, 32, 32, 34, 35, 37, 42, 43, 51, 51, 60, 62, 70, 75, 31, 32, 32, 32,
- 32, 34, 35, 37, 42, 43, 51, 51, 59, 62, 69, 75, 31, 32, 32, 32, 32, 33,
- 35, 36, 41, 42, 50, 50, 58, 60, 67, 73, 31, 32, 32, 32, 33, 33, 34, 36,
- 41, 41, 49, 49, 57, 59, 66, 72, 31, 32, 32, 33, 33, 34, 35, 37, 41, 42,
- 49, 49, 57, 59, 66, 71, 32, 32, 32, 33, 34, 35, 36, 38, 42, 43, 50, 50,
- 57, 59, 65, 71, 32, 32, 32, 34, 34, 35, 37, 38, 42, 43, 49, 49, 56, 59,
- 65, 70, 32, 32, 33, 34, 35, 37, 38, 39, 42, 43, 49, 49, 56, 58, 64, 69,
- 32, 33, 33, 34, 35, 37, 39, 40, 43, 44, 50, 50, 56, 58, 64, 69, 34, 34,
- 34, 36, 37, 39, 42, 44, 48, 48, 54, 54, 61, 63, 69, 73, 34, 34, 34, 36,
- 37, 39, 42, 44, 48, 48, 54, 54, 61, 63, 69, 73, 35, 34, 34, 37, 38, 42,
- 47, 48, 52, 53, 59, 59, 65, 67, 73, 77, 36, 35, 34, 37, 38, 43, 48, 49,
- 54, 54, 60, 60, 66, 68, 74, 78, 38, 36, 36, 38, 40, 44, 49, 51, 56, 57,
- 63, 63, 69, 71, 77, 81, 39, 38, 37, 40, 40, 45, 50, 52, 58, 58, 65, 65,
- 71, 73, 79, 84, 41, 39, 39, 41, 41, 46, 51, 54, 60, 60, 67, 67, 74, 76,
- 81, 86, 44, 41, 41, 42, 43, 48, 53, 56, 63, 64, 71, 71, 78, 79, 85, 90,
- 44, 42, 42, 43, 43, 48, 54, 56, 64, 64, 72, 72, 79, 81, 86, 91, 48, 45,
- 45, 46, 46, 51, 56, 59, 67, 67, 76, 76, 83, 85, 91, 96, 48, 45, 45, 46,
- 46, 51, 56, 59, 67, 67, 76, 76, 83, 85, 91, 96, 53, 49, 49, 49, 49, 54,
- 59, 62, 71, 71, 81, 81, 89, 91, 98, 103, 53, 50, 49, 50, 50, 54, 60, 63,
- 71, 72, 82, 82, 90, 92, 99, 103, 57, 53, 52, 52, 52, 57, 62, 65, 74, 75,
- 85, 85, 94, 96, 103, 108, 58, 54, 54, 54, 54, 58, 63, 67, 75, 76, 87,
- 87, 95, 98, 105, 110, 61, 57, 57, 56, 56, 60, 66, 69, 77, 78, 89, 89,
- 98, 101, 108, 114, 65, 60, 60, 59, 58, 63, 68, 71, 79, 80, 92, 92, 102,
- 105, 112, 118, 67, 62, 61, 60, 60, 64, 69, 72, 81, 82, 94, 94, 103, 106,
- 114, 120, 71, 66, 65, 64, 63, 68, 73, 76, 84, 85, 97, 97, 108, 111, 119,
- 125, 72, 66, 66, 64, 64, 68, 73, 76, 85, 86, 98, 98, 108, 111, 119, 125,
- 79, 73, 72, 71, 70, 74, 79, 82, 90, 91, 104, 104, 115, 118, 127, 133,
- 79, 73, 72, 71, 70, 74, 79, 82, 90, 91, 104, 104, 115, 118, 127, 133,
- /* Size 32x16 */
- 32, 31, 31, 31, 31, 31, 32, 32, 32, 32, 34, 34, 35, 36, 38, 39, 41, 44,
- 44, 48, 48, 53, 53, 57, 58, 61, 65, 67, 71, 72, 79, 79, 31, 32, 32, 32,
- 32, 32, 32, 32, 32, 33, 34, 34, 34, 35, 36, 38, 39, 41, 42, 45, 45, 49,
- 50, 53, 54, 57, 60, 62, 66, 66, 73, 73, 31, 32, 32, 32, 32, 32, 32, 32,
- 33, 33, 34, 34, 34, 34, 36, 37, 39, 41, 42, 45, 45, 49, 49, 52, 54, 57,
- 60, 61, 65, 66, 72, 72, 32, 32, 32, 32, 32, 33, 33, 34, 34, 34, 36, 36,
- 37, 37, 38, 40, 41, 42, 43, 46, 46, 49, 50, 52, 54, 56, 59, 60, 64, 64,
- 71, 71, 32, 32, 32, 32, 33, 33, 34, 34, 35, 35, 37, 37, 38, 38, 40, 40,
- 41, 43, 43, 46, 46, 49, 50, 52, 54, 56, 58, 60, 63, 64, 70, 70, 34, 34,
- 34, 33, 33, 34, 35, 35, 37, 37, 39, 39, 42, 43, 44, 45, 46, 48, 48, 51,
- 51, 54, 54, 57, 58, 60, 63, 64, 68, 68, 74, 74, 36, 35, 35, 35, 34, 35,
- 36, 37, 38, 39, 42, 42, 47, 48, 49, 50, 51, 53, 54, 56, 56, 59, 60, 62,
- 63, 66, 68, 69, 73, 73, 79, 79, 38, 37, 37, 36, 36, 37, 38, 38, 39, 40,
- 44, 44, 48, 49, 51, 52, 54, 56, 56, 59, 59, 62, 63, 65, 67, 69, 71, 72,
- 76, 76, 82, 82, 44, 42, 42, 41, 41, 41, 42, 42, 42, 43, 48, 48, 52, 54,
- 56, 58, 60, 63, 64, 67, 67, 71, 71, 74, 75, 77, 79, 81, 84, 85, 90, 90,
- 44, 43, 43, 42, 41, 42, 43, 43, 43, 44, 48, 48, 53, 54, 57, 58, 60, 64,
- 64, 67, 67, 71, 72, 75, 76, 78, 80, 82, 85, 86, 91, 91, 53, 51, 51, 50,
- 49, 49, 50, 49, 49, 50, 54, 54, 59, 60, 63, 65, 67, 71, 72, 76, 76, 81,
- 82, 85, 87, 89, 92, 94, 97, 98, 104, 104, 53, 51, 51, 50, 49, 49, 50,
- 49, 49, 50, 54, 54, 59, 60, 63, 65, 67, 71, 72, 76, 76, 81, 82, 85, 87,
- 89, 92, 94, 97, 98, 104, 104, 62, 60, 59, 58, 57, 57, 57, 56, 56, 56,
- 61, 61, 65, 66, 69, 71, 74, 78, 79, 83, 83, 89, 90, 94, 95, 98, 102,
- 103, 108, 108, 115, 115, 65, 62, 62, 60, 59, 59, 59, 59, 58, 58, 63, 63,
- 67, 68, 71, 73, 76, 79, 81, 85, 85, 91, 92, 96, 98, 101, 105, 106, 111,
- 111, 118, 118, 73, 70, 69, 67, 66, 66, 65, 65, 64, 64, 69, 69, 73, 74,
- 77, 79, 81, 85, 86, 91, 91, 98, 99, 103, 105, 108, 112, 114, 119, 119,
- 127, 127, 79, 75, 75, 73, 72, 71, 71, 70, 69, 69, 73, 73, 77, 78, 81,
- 84, 86, 90, 91, 96, 96, 103, 103, 108, 110, 114, 118, 120, 125, 125,
- 133, 133,
- /* Size 4x16 */
- 31, 34, 44, 65, 32, 34, 43, 62, 32, 33, 41, 59, 32, 35, 43, 59, 32, 37,
- 43, 58, 34, 39, 48, 63, 34, 42, 53, 67, 36, 44, 57, 71, 39, 46, 60, 76,
- 42, 48, 64, 81, 45, 51, 67, 85, 50, 54, 72, 92, 54, 58, 76, 98, 60, 63,
- 80, 105, 66, 68, 85, 111, 73, 74, 91, 118,
- /* Size 16x4 */
- 31, 32, 32, 32, 32, 34, 34, 36, 39, 42, 45, 50, 54, 60, 66, 73, 34, 34,
- 33, 35, 37, 39, 42, 44, 46, 48, 51, 54, 58, 63, 68, 74, 44, 43, 41, 43,
- 43, 48, 53, 57, 60, 64, 67, 72, 76, 80, 85, 91, 65, 62, 59, 59, 58, 63,
- 67, 71, 76, 81, 85, 92, 98, 105, 111, 118,
- /* Size 8x32 */
- 32, 31, 32, 36, 44, 53, 62, 73, 31, 32, 32, 35, 42, 51, 60, 70, 31, 32,
- 32, 35, 42, 51, 59, 69, 31, 32, 32, 35, 41, 50, 58, 67, 31, 32, 33, 34,
- 41, 49, 57, 66, 31, 32, 33, 35, 41, 49, 57, 66, 32, 32, 34, 36, 42, 50,
- 57, 65, 32, 32, 34, 37, 42, 49, 56, 65, 32, 33, 35, 38, 42, 49, 56, 64,
- 32, 33, 35, 39, 43, 50, 56, 64, 34, 34, 37, 42, 48, 54, 61, 69, 34, 34,
- 37, 42, 48, 54, 61, 69, 35, 34, 38, 47, 52, 59, 65, 73, 36, 34, 38, 48,
- 54, 60, 66, 74, 38, 36, 40, 49, 56, 63, 69, 77, 39, 37, 40, 50, 58, 65,
- 71, 79, 41, 39, 41, 51, 60, 67, 74, 81, 44, 41, 43, 53, 63, 71, 78, 85,
- 44, 42, 43, 54, 64, 72, 79, 86, 48, 45, 46, 56, 67, 76, 83, 91, 48, 45,
- 46, 56, 67, 76, 83, 91, 53, 49, 49, 59, 71, 81, 89, 98, 53, 49, 50, 60,
- 71, 82, 90, 99, 57, 52, 52, 62, 74, 85, 94, 103, 58, 54, 54, 63, 75, 87,
- 95, 105, 61, 57, 56, 66, 77, 89, 98, 108, 65, 60, 58, 68, 79, 92, 102,
- 112, 67, 61, 60, 69, 81, 94, 103, 114, 71, 65, 63, 73, 84, 97, 108, 119,
- 72, 66, 64, 73, 85, 98, 108, 119, 79, 72, 70, 79, 90, 104, 115, 127, 79,
- 72, 70, 79, 90, 104, 115, 127,
- /* Size 32x8 */
- 32, 31, 31, 31, 31, 31, 32, 32, 32, 32, 34, 34, 35, 36, 38, 39, 41, 44,
- 44, 48, 48, 53, 53, 57, 58, 61, 65, 67, 71, 72, 79, 79, 31, 32, 32, 32,
- 32, 32, 32, 32, 33, 33, 34, 34, 34, 34, 36, 37, 39, 41, 42, 45, 45, 49,
- 49, 52, 54, 57, 60, 61, 65, 66, 72, 72, 32, 32, 32, 32, 33, 33, 34, 34,
- 35, 35, 37, 37, 38, 38, 40, 40, 41, 43, 43, 46, 46, 49, 50, 52, 54, 56,
- 58, 60, 63, 64, 70, 70, 36, 35, 35, 35, 34, 35, 36, 37, 38, 39, 42, 42,
- 47, 48, 49, 50, 51, 53, 54, 56, 56, 59, 60, 62, 63, 66, 68, 69, 73, 73,
- 79, 79, 44, 42, 42, 41, 41, 41, 42, 42, 42, 43, 48, 48, 52, 54, 56, 58,
- 60, 63, 64, 67, 67, 71, 71, 74, 75, 77, 79, 81, 84, 85, 90, 90, 53, 51,
- 51, 50, 49, 49, 50, 49, 49, 50, 54, 54, 59, 60, 63, 65, 67, 71, 72, 76,
- 76, 81, 82, 85, 87, 89, 92, 94, 97, 98, 104, 104, 62, 60, 59, 58, 57,
- 57, 57, 56, 56, 56, 61, 61, 65, 66, 69, 71, 74, 78, 79, 83, 83, 89, 90,
- 94, 95, 98, 102, 103, 108, 108, 115, 115, 73, 70, 69, 67, 66, 66, 65,
- 65, 64, 64, 69, 69, 73, 74, 77, 79, 81, 85, 86, 91, 91, 98, 99, 103,
- 105, 108, 112, 114, 119, 119, 127, 127 },
- { /* Chroma */
- /* Size 4x4 */
- 31, 42, 47, 53, 42, 48, 50, 54, 47, 50, 61, 67, 53, 54, 67, 78,
- /* Size 8x8 */
- 31, 32, 38, 48, 47, 50, 53, 57, 32, 35, 42, 47, 45, 47, 50, 54, 38, 42,
- 47, 48, 45, 47, 49, 52, 48, 47, 48, 53, 53, 54, 56, 58, 47, 45, 45, 53,
- 58, 61, 63, 65, 50, 47, 47, 54, 61, 66, 69, 72, 53, 50, 49, 56, 63, 69,
- 73, 77, 57, 54, 52, 58, 65, 72, 77, 82,
- /* Size 16x16 */
- 32, 31, 30, 33, 36, 41, 47, 49, 49, 49, 50, 52, 54, 57, 60, 63, 31, 31,
- 31, 34, 38, 42, 46, 47, 47, 47, 48, 50, 52, 54, 57, 60, 30, 31, 32, 35,
- 40, 42, 45, 46, 45, 45, 46, 47, 49, 52, 54, 57, 33, 34, 35, 39, 43, 45,
- 47, 46, 46, 45, 46, 47, 49, 51, 53, 56, 36, 38, 40, 43, 47, 47, 47, 47,
- 46, 45, 46, 47, 48, 50, 52, 54, 41, 42, 42, 45, 47, 48, 50, 50, 49, 49,
- 50, 50, 52, 53, 55, 57, 47, 46, 45, 47, 47, 50, 52, 52, 52, 52, 53, 53,
- 55, 56, 58, 60, 49, 47, 46, 46, 47, 50, 52, 53, 54, 55, 55, 56, 57, 58,
- 60, 62, 49, 47, 45, 46, 46, 49, 52, 54, 55, 57, 58, 59, 60, 61, 63, 65,
- 49, 47, 45, 45, 45, 49, 52, 55, 57, 59, 60, 61, 63, 64, 66, 68, 50, 48,
- 46, 46, 46, 50, 53, 55, 58, 60, 61, 63, 65, 67, 68, 71, 52, 50, 47, 47,
- 47, 50, 53, 56, 59, 61, 63, 66, 68, 70, 72, 75, 54, 52, 49, 49, 48, 52,
- 55, 57, 60, 63, 65, 68, 71, 73, 75, 78, 57, 54, 52, 51, 50, 53, 56, 58,
- 61, 64, 67, 70, 73, 76, 79, 82, 60, 57, 54, 53, 52, 55, 58, 60, 63, 66,
- 68, 72, 75, 79, 82, 85, 63, 60, 57, 56, 54, 57, 60, 62, 65, 68, 71, 75,
- 78, 82, 85, 89,
- /* Size 32x32 */
- 32, 31, 31, 30, 30, 32, 33, 34, 36, 37, 41, 41, 47, 49, 49, 48, 49, 49,
- 49, 50, 50, 52, 52, 54, 54, 56, 57, 58, 60, 60, 63, 63, 31, 31, 31, 31,
- 31, 32, 34, 35, 38, 38, 42, 42, 46, 48, 47, 47, 47, 47, 47, 48, 48, 50,
- 50, 51, 52, 53, 54, 55, 57, 57, 60, 60, 31, 31, 31, 31, 31, 33, 34, 35,
- 38, 39, 42, 42, 46, 47, 47, 47, 47, 47, 47, 48, 48, 49, 50, 51, 52, 53,
- 54, 55, 57, 57, 60, 60, 30, 31, 31, 31, 31, 33, 35, 36, 39, 40, 42, 42,
- 46, 47, 46, 46, 46, 45, 46, 47, 47, 48, 48, 50, 50, 51, 52, 53, 55, 55,
- 58, 58, 30, 31, 31, 31, 32, 33, 35, 36, 40, 40, 42, 42, 45, 46, 46, 45,
- 45, 45, 45, 46, 46, 47, 47, 49, 49, 51, 52, 52, 54, 54, 57, 57, 32, 32,
- 33, 33, 33, 35, 37, 38, 41, 42, 43, 43, 46, 47, 46, 46, 45, 45, 45, 46,
- 46, 47, 47, 49, 49, 50, 51, 52, 54, 54, 57, 57, 33, 34, 34, 35, 35, 37,
- 39, 40, 43, 43, 45, 45, 47, 47, 46, 46, 46, 45, 45, 46, 46, 47, 47, 49,
- 49, 50, 51, 52, 53, 54, 56, 56, 34, 35, 35, 36, 36, 38, 40, 41, 44, 44,
- 45, 45, 47, 47, 47, 46, 46, 45, 45, 46, 46, 47, 47, 48, 49, 50, 51, 51,
- 53, 53, 55, 55, 36, 38, 38, 39, 40, 41, 43, 44, 47, 47, 47, 47, 47, 48,
- 47, 46, 46, 45, 45, 46, 46, 46, 47, 48, 48, 49, 50, 50, 52, 52, 54, 54,
- 37, 38, 39, 40, 40, 42, 43, 44, 47, 47, 47, 47, 48, 48, 47, 47, 46, 45,
- 46, 46, 46, 47, 47, 48, 48, 49, 50, 51, 52, 52, 55, 55, 41, 42, 42, 42,
- 42, 43, 45, 45, 47, 47, 48, 48, 50, 50, 50, 49, 49, 49, 49, 50, 50, 50,
- 50, 51, 52, 52, 53, 54, 55, 55, 57, 57, 41, 42, 42, 42, 42, 43, 45, 45,
- 47, 47, 48, 48, 50, 50, 50, 49, 49, 49, 49, 50, 50, 50, 50, 51, 52, 52,
- 53, 54, 55, 55, 57, 57, 47, 46, 46, 46, 45, 46, 47, 47, 47, 48, 50, 50,
- 52, 52, 52, 52, 52, 52, 52, 53, 53, 53, 53, 54, 55, 55, 56, 56, 58, 58,
- 60, 60, 49, 48, 47, 47, 46, 47, 47, 47, 48, 48, 50, 50, 52, 53, 53, 53,
- 53, 53, 53, 54, 54, 54, 54, 55, 55, 56, 56, 57, 58, 58, 60, 60, 49, 47,
- 47, 46, 46, 46, 46, 47, 47, 47, 50, 50, 52, 53, 53, 54, 54, 55, 55, 55,
- 55, 56, 56, 57, 57, 58, 58, 59, 60, 60, 62, 62, 48, 47, 47, 46, 45, 46,
- 46, 46, 46, 47, 49, 49, 52, 53, 54, 54, 55, 55, 56, 56, 56, 57, 57, 58,
- 58, 59, 60, 60, 61, 62, 63, 63, 49, 47, 47, 46, 45, 45, 46, 46, 46, 46,
- 49, 49, 52, 53, 54, 55, 55, 57, 57, 58, 58, 59, 59, 60, 60, 61, 61, 62,
- 63, 63, 65, 65, 49, 47, 47, 45, 45, 45, 45, 45, 45, 45, 49, 49, 52, 53,
- 55, 55, 57, 58, 59, 60, 60, 61, 61, 62, 62, 63, 63, 64, 65, 65, 67, 67,
- 49, 47, 47, 46, 45, 45, 45, 45, 45, 46, 49, 49, 52, 53, 55, 56, 57, 59,
- 59, 60, 60, 61, 61, 62, 63, 63, 64, 65, 66, 66, 68, 68, 50, 48, 48, 47,
- 46, 46, 46, 46, 46, 46, 50, 50, 53, 54, 55, 56, 58, 60, 60, 61, 61, 63,
- 63, 65, 65, 66, 67, 67, 68, 69, 71, 71, 50, 48, 48, 47, 46, 46, 46, 46,
- 46, 46, 50, 50, 53, 54, 55, 56, 58, 60, 60, 61, 61, 63, 63, 65, 65, 66,
- 67, 67, 68, 69, 71, 71, 52, 50, 49, 48, 47, 47, 47, 47, 46, 47, 50, 50,
- 53, 54, 56, 57, 59, 61, 61, 63, 63, 66, 66, 67, 68, 69, 70, 71, 72, 72,
- 74, 74, 52, 50, 50, 48, 47, 47, 47, 47, 47, 47, 50, 50, 53, 54, 56, 57,
- 59, 61, 61, 63, 63, 66, 66, 68, 68, 69, 70, 71, 72, 73, 75, 75, 54, 51,
- 51, 50, 49, 49, 49, 48, 48, 48, 51, 51, 54, 55, 57, 58, 60, 62, 62, 65,
- 65, 67, 68, 69, 70, 71, 72, 73, 74, 75, 77, 77, 54, 52, 52, 50, 49, 49,
- 49, 49, 48, 48, 52, 52, 55, 55, 57, 58, 60, 62, 63, 65, 65, 68, 68, 70,
- 71, 72, 73, 74, 75, 76, 78, 78, 56, 53, 53, 51, 51, 50, 50, 50, 49, 49,
- 52, 52, 55, 56, 58, 59, 61, 63, 63, 66, 66, 69, 69, 71, 72, 73, 75, 75,
- 77, 77, 80, 80, 57, 54, 54, 52, 52, 51, 51, 51, 50, 50, 53, 53, 56, 56,
- 58, 60, 61, 63, 64, 67, 67, 70, 70, 72, 73, 75, 76, 77, 79, 79, 82, 82,
- 58, 55, 55, 53, 52, 52, 52, 51, 50, 51, 54, 54, 56, 57, 59, 60, 62, 64,
- 65, 67, 67, 71, 71, 73, 74, 75, 77, 78, 80, 80, 83, 83, 60, 57, 57, 55,
- 54, 54, 53, 53, 52, 52, 55, 55, 58, 58, 60, 61, 63, 65, 66, 68, 68, 72,
- 72, 74, 75, 77, 79, 80, 82, 82, 85, 85, 60, 57, 57, 55, 54, 54, 54, 53,
- 52, 52, 55, 55, 58, 58, 60, 62, 63, 65, 66, 69, 69, 72, 73, 75, 76, 77,
- 79, 80, 82, 82, 85, 85, 63, 60, 60, 58, 57, 57, 56, 55, 54, 55, 57, 57,
- 60, 60, 62, 63, 65, 67, 68, 71, 71, 74, 75, 77, 78, 80, 82, 83, 85, 85,
- 89, 89, 63, 60, 60, 58, 57, 57, 56, 55, 54, 55, 57, 57, 60, 60, 62, 63,
- 65, 67, 68, 71, 71, 74, 75, 77, 78, 80, 82, 83, 85, 85, 89, 89,
- /* Size 4x8 */
- 31, 42, 47, 54, 33, 44, 45, 51, 40, 47, 46, 50, 47, 50, 54, 57, 45, 49,
- 59, 64, 48, 50, 61, 70, 51, 52, 63, 75, 55, 55, 66, 79,
- /* Size 8x4 */
- 31, 33, 40, 47, 45, 48, 51, 55, 42, 44, 47, 50, 49, 50, 52, 55, 47, 45,
- 46, 54, 59, 61, 63, 66, 54, 51, 50, 57, 64, 70, 75, 79,
- /* Size 8x16 */
- 32, 31, 37, 48, 49, 52, 56, 61, 31, 31, 38, 47, 47, 50, 53, 57, 30, 32,
- 40, 46, 45, 48, 51, 55, 33, 36, 43, 47, 46, 47, 50, 54, 37, 40, 47, 47,
- 45, 47, 49, 52, 42, 43, 47, 50, 49, 50, 53, 56, 47, 46, 48, 52, 53, 53,
- 55, 58, 48, 46, 47, 53, 55, 56, 58, 61, 48, 45, 46, 53, 57, 59, 61, 63,
- 49, 45, 46, 53, 58, 62, 64, 66, 50, 46, 46, 54, 59, 64, 66, 69, 52, 48,
- 47, 54, 61, 66, 70, 73, 54, 50, 49, 55, 62, 68, 72, 76, 57, 52, 50, 56,
- 64, 70, 75, 79, 60, 54, 52, 58, 65, 72, 77, 82, 63, 57, 55, 60, 67, 75,
- 80, 86,
- /* Size 16x8 */
- 32, 31, 30, 33, 37, 42, 47, 48, 48, 49, 50, 52, 54, 57, 60, 63, 31, 31,
- 32, 36, 40, 43, 46, 46, 45, 45, 46, 48, 50, 52, 54, 57, 37, 38, 40, 43,
- 47, 47, 48, 47, 46, 46, 46, 47, 49, 50, 52, 55, 48, 47, 46, 47, 47, 50,
- 52, 53, 53, 53, 54, 54, 55, 56, 58, 60, 49, 47, 45, 46, 45, 49, 53, 55,
- 57, 58, 59, 61, 62, 64, 65, 67, 52, 50, 48, 47, 47, 50, 53, 56, 59, 62,
- 64, 66, 68, 70, 72, 75, 56, 53, 51, 50, 49, 53, 55, 58, 61, 64, 66, 70,
- 72, 75, 77, 80, 61, 57, 55, 54, 52, 56, 58, 61, 63, 66, 69, 73, 76, 79,
- 82, 86,
- /* Size 16x32 */
- 32, 31, 31, 35, 37, 42, 48, 48, 49, 49, 52, 52, 56, 57, 61, 63, 31, 31,
- 31, 36, 38, 42, 47, 47, 47, 47, 50, 50, 54, 54, 58, 60, 31, 31, 31, 36,
- 38, 42, 47, 47, 47, 47, 50, 50, 53, 54, 57, 60, 30, 32, 32, 37, 39, 42,
- 46, 46, 46, 46, 48, 48, 52, 52, 56, 58, 30, 32, 32, 37, 40, 42, 46, 46,
- 45, 45, 48, 48, 51, 52, 55, 57, 32, 33, 34, 39, 41, 44, 46, 46, 45, 45,
- 48, 48, 51, 51, 54, 57, 33, 35, 36, 40, 43, 45, 47, 46, 46, 46, 47, 47,
- 50, 51, 54, 56, 34, 37, 37, 42, 44, 45, 47, 47, 45, 46, 47, 47, 50, 51,
- 53, 55, 37, 40, 40, 45, 47, 47, 47, 47, 45, 46, 47, 47, 49, 50, 52, 54,
- 37, 40, 40, 45, 47, 47, 48, 47, 46, 46, 47, 47, 49, 50, 53, 55, 42, 43,
- 43, 46, 47, 48, 50, 50, 49, 49, 50, 50, 53, 53, 56, 57, 42, 43, 43, 46,
- 47, 48, 50, 50, 49, 49, 50, 50, 53, 53, 56, 57, 47, 46, 46, 47, 48, 50,
- 52, 52, 53, 53, 53, 53, 55, 56, 58, 60, 49, 47, 46, 47, 48, 50, 53, 53,
- 53, 54, 54, 54, 56, 57, 59, 60, 48, 46, 46, 47, 47, 50, 53, 53, 55, 55,
- 56, 56, 58, 58, 61, 62, 48, 46, 46, 46, 47, 50, 53, 54, 56, 56, 57, 57,
- 59, 60, 62, 64, 48, 46, 45, 46, 46, 49, 53, 54, 57, 57, 59, 59, 61, 61,
- 63, 65, 49, 45, 45, 45, 46, 49, 53, 55, 58, 59, 61, 61, 63, 64, 66, 67,
- 49, 46, 45, 46, 46, 49, 53, 55, 58, 59, 62, 62, 64, 64, 66, 68, 50, 47,
- 46, 46, 46, 50, 54, 55, 59, 60, 64, 64, 66, 67, 69, 71, 50, 47, 46, 46,
- 46, 50, 54, 55, 59, 60, 64, 64, 66, 67, 69, 71, 52, 48, 48, 47, 47, 50,
- 54, 56, 61, 61, 66, 66, 69, 70, 72, 74, 52, 48, 48, 47, 47, 50, 54, 56,
- 61, 61, 66, 66, 70, 71, 73, 75, 53, 50, 49, 48, 48, 51, 55, 57, 62, 62,
- 68, 68, 71, 72, 75, 77, 54, 50, 50, 49, 49, 52, 55, 57, 62, 63, 68, 68,
- 72, 73, 76, 78, 55, 51, 51, 50, 49, 52, 56, 58, 63, 63, 69, 69, 74, 75,
- 78, 80, 57, 52, 52, 51, 50, 53, 56, 58, 64, 64, 70, 70, 75, 76, 79, 82,
- 58, 53, 53, 51, 51, 54, 57, 59, 64, 65, 71, 71, 76, 77, 80, 83, 60, 55,
- 54, 53, 52, 55, 58, 60, 65, 66, 72, 72, 77, 79, 82, 85, 60, 55, 55, 53,
- 53, 55, 59, 60, 65, 66, 73, 73, 78, 79, 83, 85, 63, 58, 57, 56, 55, 58,
- 60, 62, 67, 68, 75, 75, 80, 82, 86, 89, 63, 58, 57, 56, 55, 58, 60, 62,
- 67, 68, 75, 75, 80, 82, 86, 89,
- /* Size 32x16 */
- 32, 31, 31, 30, 30, 32, 33, 34, 37, 37, 42, 42, 47, 49, 48, 48, 48, 49,
- 49, 50, 50, 52, 52, 53, 54, 55, 57, 58, 60, 60, 63, 63, 31, 31, 31, 32,
- 32, 33, 35, 37, 40, 40, 43, 43, 46, 47, 46, 46, 46, 45, 46, 47, 47, 48,
- 48, 50, 50, 51, 52, 53, 55, 55, 58, 58, 31, 31, 31, 32, 32, 34, 36, 37,
- 40, 40, 43, 43, 46, 46, 46, 46, 45, 45, 45, 46, 46, 48, 48, 49, 50, 51,
- 52, 53, 54, 55, 57, 57, 35, 36, 36, 37, 37, 39, 40, 42, 45, 45, 46, 46,
- 47, 47, 47, 46, 46, 45, 46, 46, 46, 47, 47, 48, 49, 50, 51, 51, 53, 53,
- 56, 56, 37, 38, 38, 39, 40, 41, 43, 44, 47, 47, 47, 47, 48, 48, 47, 47,
- 46, 46, 46, 46, 46, 47, 47, 48, 49, 49, 50, 51, 52, 53, 55, 55, 42, 42,
- 42, 42, 42, 44, 45, 45, 47, 47, 48, 48, 50, 50, 50, 50, 49, 49, 49, 50,
- 50, 50, 50, 51, 52, 52, 53, 54, 55, 55, 58, 58, 48, 47, 47, 46, 46, 46,
- 47, 47, 47, 48, 50, 50, 52, 53, 53, 53, 53, 53, 53, 54, 54, 54, 54, 55,
- 55, 56, 56, 57, 58, 59, 60, 60, 48, 47, 47, 46, 46, 46, 46, 47, 47, 47,
- 50, 50, 52, 53, 53, 54, 54, 55, 55, 55, 55, 56, 56, 57, 57, 58, 58, 59,
- 60, 60, 62, 62, 49, 47, 47, 46, 45, 45, 46, 45, 45, 46, 49, 49, 53, 53,
- 55, 56, 57, 58, 58, 59, 59, 61, 61, 62, 62, 63, 64, 64, 65, 65, 67, 67,
- 49, 47, 47, 46, 45, 45, 46, 46, 46, 46, 49, 49, 53, 54, 55, 56, 57, 59,
- 59, 60, 60, 61, 61, 62, 63, 63, 64, 65, 66, 66, 68, 68, 52, 50, 50, 48,
- 48, 48, 47, 47, 47, 47, 50, 50, 53, 54, 56, 57, 59, 61, 62, 64, 64, 66,
- 66, 68, 68, 69, 70, 71, 72, 73, 75, 75, 52, 50, 50, 48, 48, 48, 47, 47,
- 47, 47, 50, 50, 53, 54, 56, 57, 59, 61, 62, 64, 64, 66, 66, 68, 68, 69,
- 70, 71, 72, 73, 75, 75, 56, 54, 53, 52, 51, 51, 50, 50, 49, 49, 53, 53,
- 55, 56, 58, 59, 61, 63, 64, 66, 66, 69, 70, 71, 72, 74, 75, 76, 77, 78,
- 80, 80, 57, 54, 54, 52, 52, 51, 51, 51, 50, 50, 53, 53, 56, 57, 58, 60,
- 61, 64, 64, 67, 67, 70, 71, 72, 73, 75, 76, 77, 79, 79, 82, 82, 61, 58,
- 57, 56, 55, 54, 54, 53, 52, 53, 56, 56, 58, 59, 61, 62, 63, 66, 66, 69,
- 69, 72, 73, 75, 76, 78, 79, 80, 82, 83, 86, 86, 63, 60, 60, 58, 57, 57,
- 56, 55, 54, 55, 57, 57, 60, 60, 62, 64, 65, 67, 68, 71, 71, 74, 75, 77,
- 78, 80, 82, 83, 85, 85, 89, 89,
- /* Size 4x16 */
- 31, 42, 49, 57, 31, 42, 47, 54, 32, 42, 45, 52, 35, 45, 46, 51, 40, 47,
- 46, 50, 43, 48, 49, 53, 46, 50, 53, 56, 46, 50, 55, 58, 46, 49, 57, 61,
- 46, 49, 59, 64, 47, 50, 60, 67, 48, 50, 61, 71, 50, 52, 63, 73, 52, 53,
- 64, 76, 55, 55, 66, 79, 58, 58, 68, 82,
- /* Size 16x4 */
- 31, 31, 32, 35, 40, 43, 46, 46, 46, 46, 47, 48, 50, 52, 55, 58, 42, 42,
- 42, 45, 47, 48, 50, 50, 49, 49, 50, 50, 52, 53, 55, 58, 49, 47, 45, 46,
- 46, 49, 53, 55, 57, 59, 60, 61, 63, 64, 66, 68, 57, 54, 52, 51, 50, 53,
- 56, 58, 61, 64, 67, 71, 73, 76, 79, 82,
- /* Size 8x32 */
- 32, 31, 37, 48, 49, 52, 56, 61, 31, 31, 38, 47, 47, 50, 54, 58, 31, 31,
- 38, 47, 47, 50, 53, 57, 30, 32, 39, 46, 46, 48, 52, 56, 30, 32, 40, 46,
- 45, 48, 51, 55, 32, 34, 41, 46, 45, 48, 51, 54, 33, 36, 43, 47, 46, 47,
- 50, 54, 34, 37, 44, 47, 45, 47, 50, 53, 37, 40, 47, 47, 45, 47, 49, 52,
- 37, 40, 47, 48, 46, 47, 49, 53, 42, 43, 47, 50, 49, 50, 53, 56, 42, 43,
- 47, 50, 49, 50, 53, 56, 47, 46, 48, 52, 53, 53, 55, 58, 49, 46, 48, 53,
- 53, 54, 56, 59, 48, 46, 47, 53, 55, 56, 58, 61, 48, 46, 47, 53, 56, 57,
- 59, 62, 48, 45, 46, 53, 57, 59, 61, 63, 49, 45, 46, 53, 58, 61, 63, 66,
- 49, 45, 46, 53, 58, 62, 64, 66, 50, 46, 46, 54, 59, 64, 66, 69, 50, 46,
- 46, 54, 59, 64, 66, 69, 52, 48, 47, 54, 61, 66, 69, 72, 52, 48, 47, 54,
- 61, 66, 70, 73, 53, 49, 48, 55, 62, 68, 71, 75, 54, 50, 49, 55, 62, 68,
- 72, 76, 55, 51, 49, 56, 63, 69, 74, 78, 57, 52, 50, 56, 64, 70, 75, 79,
- 58, 53, 51, 57, 64, 71, 76, 80, 60, 54, 52, 58, 65, 72, 77, 82, 60, 55,
- 53, 59, 65, 73, 78, 83, 63, 57, 55, 60, 67, 75, 80, 86, 63, 57, 55, 60,
- 67, 75, 80, 86,
- /* Size 32x8 */
- 32, 31, 31, 30, 30, 32, 33, 34, 37, 37, 42, 42, 47, 49, 48, 48, 48, 49,
- 49, 50, 50, 52, 52, 53, 54, 55, 57, 58, 60, 60, 63, 63, 31, 31, 31, 32,
- 32, 34, 36, 37, 40, 40, 43, 43, 46, 46, 46, 46, 45, 45, 45, 46, 46, 48,
- 48, 49, 50, 51, 52, 53, 54, 55, 57, 57, 37, 38, 38, 39, 40, 41, 43, 44,
- 47, 47, 47, 47, 48, 48, 47, 47, 46, 46, 46, 46, 46, 47, 47, 48, 49, 49,
- 50, 51, 52, 53, 55, 55, 48, 47, 47, 46, 46, 46, 47, 47, 47, 48, 50, 50,
- 52, 53, 53, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 56, 56, 57, 58, 59,
- 60, 60, 49, 47, 47, 46, 45, 45, 46, 45, 45, 46, 49, 49, 53, 53, 55, 56,
- 57, 58, 58, 59, 59, 61, 61, 62, 62, 63, 64, 64, 65, 65, 67, 67, 52, 50,
- 50, 48, 48, 48, 47, 47, 47, 47, 50, 50, 53, 54, 56, 57, 59, 61, 62, 64,
- 64, 66, 66, 68, 68, 69, 70, 71, 72, 73, 75, 75, 56, 54, 53, 52, 51, 51,
- 50, 50, 49, 49, 53, 53, 55, 56, 58, 59, 61, 63, 64, 66, 66, 69, 70, 71,
- 72, 74, 75, 76, 77, 78, 80, 80, 61, 58, 57, 56, 55, 54, 54, 53, 52, 53,
- 56, 56, 58, 59, 61, 62, 63, 66, 66, 69, 69, 72, 73, 75, 76, 78, 79, 80,
- 82, 83, 86, 86 },
- },
- {
- { /* Luma */
- /* Size 4x4 */
- 32, 33, 42, 55, 33, 38, 46, 57, 42, 46, 63, 75, 55, 57, 75, 92,
- /* Size 8x8 */
- 31, 32, 32, 34, 38, 46, 52, 63, 32, 32, 32, 34, 37, 44, 49, 59, 32, 32,
- 35, 37, 40, 45, 49, 58, 34, 34, 37, 42, 47, 52, 56, 65, 38, 37, 40, 47,
- 54, 60, 65, 73, 46, 44, 45, 52, 60, 69, 75, 84, 52, 49, 49, 56, 65, 75,
- 82, 92, 63, 59, 58, 65, 73, 84, 92, 105,
- /* Size 16x16 */
- 32, 31, 31, 31, 32, 32, 34, 36, 38, 41, 44, 48, 54, 58, 61, 65, 31, 32,
- 32, 32, 32, 32, 34, 35, 38, 40, 42, 46, 51, 55, 58, 62, 31, 32, 32, 32,
- 32, 32, 33, 34, 37, 38, 41, 44, 49, 53, 56, 59, 31, 32, 32, 33, 33, 33,
- 35, 36, 38, 40, 42, 45, 49, 53, 56, 59, 32, 32, 32, 33, 34, 34, 36, 37,
- 39, 40, 42, 45, 49, 53, 55, 59, 32, 32, 32, 33, 34, 35, 37, 38, 40, 41,
- 42, 46, 49, 52, 55, 58, 34, 34, 33, 35, 36, 37, 39, 42, 44, 46, 47, 51,
- 54, 57, 60, 63, 36, 35, 34, 36, 37, 38, 42, 48, 50, 52, 54, 57, 60, 63,
- 65, 68, 38, 38, 37, 38, 39, 40, 44, 50, 52, 54, 57, 60, 64, 67, 69, 72,
- 41, 40, 38, 40, 40, 41, 46, 52, 54, 57, 60, 63, 67, 70, 73, 75, 44, 42,
- 41, 42, 42, 42, 47, 54, 57, 60, 63, 67, 71, 74, 77, 79, 48, 46, 44, 45,
- 45, 46, 51, 57, 60, 63, 67, 71, 76, 79, 82, 85, 54, 51, 49, 49, 49, 49,
- 54, 60, 64, 67, 71, 76, 82, 86, 89, 92, 58, 55, 53, 53, 53, 52, 57, 63,
- 67, 70, 74, 79, 86, 90, 93, 97, 61, 58, 56, 56, 55, 55, 60, 65, 69, 73,
- 77, 82, 89, 93, 97, 101, 65, 62, 59, 59, 59, 58, 63, 68, 72, 75, 79, 85,
- 92, 97, 101, 105,
- /* Size 32x32 */
- 32, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 33, 34, 34, 36, 36, 38, 39,
- 41, 44, 44, 47, 48, 50, 54, 54, 58, 59, 61, 65, 65, 70, 31, 31, 31, 32,
- 32, 32, 32, 32, 32, 32, 32, 33, 34, 34, 35, 35, 38, 38, 40, 42, 42, 46,
- 47, 49, 52, 52, 56, 57, 59, 63, 63, 67, 31, 31, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 33, 34, 34, 35, 35, 38, 38, 40, 42, 42, 45, 46, 48, 51, 51,
- 55, 56, 58, 62, 62, 67, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33,
- 34, 34, 35, 35, 37, 38, 39, 42, 42, 45, 45, 47, 50, 50, 54, 55, 57, 61,
- 61, 65, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 34, 34, 34,
- 37, 37, 38, 41, 41, 44, 44, 46, 49, 49, 53, 54, 56, 59, 59, 64, 31, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 34, 34, 34, 37, 37, 38, 41,
- 41, 44, 44, 46, 49, 49, 53, 54, 56, 59, 59, 64, 31, 32, 32, 32, 32, 32,
- 33, 33, 33, 33, 33, 34, 35, 35, 36, 36, 38, 39, 40, 42, 42, 44, 45, 47,
- 49, 49, 53, 54, 56, 59, 59, 63, 31, 32, 32, 32, 32, 32, 33, 33, 33, 34,
- 34, 35, 35, 36, 36, 36, 38, 39, 40, 42, 42, 45, 45, 47, 50, 50, 53, 54,
- 56, 59, 59, 63, 32, 32, 32, 32, 32, 32, 33, 33, 34, 34, 34, 35, 36, 36,
- 37, 37, 39, 39, 40, 42, 42, 45, 45, 47, 49, 49, 53, 54, 55, 59, 59, 63,
- 32, 32, 32, 32, 32, 32, 33, 34, 34, 35, 35, 36, 37, 37, 38, 38, 40, 40,
- 41, 42, 42, 45, 46, 47, 49, 49, 52, 53, 55, 58, 58, 62, 32, 32, 32, 32,
- 32, 32, 33, 34, 34, 35, 35, 36, 37, 37, 38, 38, 40, 40, 41, 42, 42, 45,
- 46, 47, 49, 49, 52, 53, 55, 58, 58, 62, 33, 33, 33, 33, 33, 33, 34, 35,
- 35, 36, 36, 38, 39, 40, 42, 42, 43, 44, 45, 46, 46, 49, 50, 51, 53, 53,
- 56, 57, 59, 62, 62, 66, 34, 34, 34, 34, 33, 33, 35, 35, 36, 37, 37, 39,
- 39, 41, 42, 42, 44, 45, 46, 47, 47, 50, 51, 52, 54, 54, 57, 58, 60, 63,
- 63, 67, 34, 34, 34, 34, 34, 34, 35, 36, 36, 37, 37, 40, 41, 42, 45, 45,
- 46, 47, 48, 50, 50, 52, 53, 54, 56, 56, 59, 60, 62, 65, 65, 69, 36, 35,
- 35, 35, 34, 34, 36, 36, 37, 38, 38, 42, 42, 45, 48, 48, 50, 50, 52, 54,
- 54, 56, 57, 58, 60, 60, 63, 64, 65, 68, 68, 72, 36, 35, 35, 35, 34, 34,
- 36, 36, 37, 38, 38, 42, 42, 45, 48, 48, 50, 50, 52, 54, 54, 56, 57, 58,
- 60, 60, 63, 64, 65, 68, 68, 72, 38, 38, 38, 37, 37, 37, 38, 38, 39, 40,
- 40, 43, 44, 46, 50, 50, 52, 53, 54, 57, 57, 59, 60, 61, 64, 64, 67, 68,
- 69, 72, 72, 76, 39, 38, 38, 38, 37, 37, 39, 39, 39, 40, 40, 44, 45, 47,
- 50, 50, 53, 54, 55, 58, 58, 60, 61, 62, 65, 65, 68, 69, 70, 73, 73, 77,
- 41, 40, 40, 39, 38, 38, 40, 40, 40, 41, 41, 45, 46, 48, 52, 52, 54, 55,
- 57, 60, 60, 62, 63, 65, 67, 67, 70, 71, 73, 75, 75, 79, 44, 42, 42, 42,
- 41, 41, 42, 42, 42, 42, 42, 46, 47, 50, 54, 54, 57, 58, 60, 63, 63, 66,
- 67, 68, 71, 71, 74, 75, 77, 79, 79, 83, 44, 42, 42, 42, 41, 41, 42, 42,
- 42, 42, 42, 46, 47, 50, 54, 54, 57, 58, 60, 63, 63, 66, 67, 68, 71, 71,
- 74, 75, 77, 79, 79, 83, 47, 46, 45, 45, 44, 44, 44, 45, 45, 45, 45, 49,
- 50, 52, 56, 56, 59, 60, 62, 66, 66, 69, 70, 72, 75, 75, 78, 79, 81, 84,
- 84, 88, 48, 47, 46, 45, 44, 44, 45, 45, 45, 46, 46, 50, 51, 53, 57, 57,
- 60, 61, 63, 67, 67, 70, 71, 73, 76, 76, 79, 80, 82, 85, 85, 89, 50, 49,
- 48, 47, 46, 46, 47, 47, 47, 47, 47, 51, 52, 54, 58, 58, 61, 62, 65, 68,
- 68, 72, 73, 75, 78, 78, 82, 83, 85, 88, 88, 92, 54, 52, 51, 50, 49, 49,
- 49, 50, 49, 49, 49, 53, 54, 56, 60, 60, 64, 65, 67, 71, 71, 75, 76, 78,
- 82, 82, 86, 87, 89, 92, 92, 96, 54, 52, 51, 50, 49, 49, 49, 50, 49, 49,
- 49, 53, 54, 56, 60, 60, 64, 65, 67, 71, 71, 75, 76, 78, 82, 82, 86, 87,
- 89, 92, 92, 96, 58, 56, 55, 54, 53, 53, 53, 53, 53, 52, 52, 56, 57, 59,
- 63, 63, 67, 68, 70, 74, 74, 78, 79, 82, 86, 86, 90, 91, 93, 97, 97, 101,
- 59, 57, 56, 55, 54, 54, 54, 54, 54, 53, 53, 57, 58, 60, 64, 64, 68, 69,
- 71, 75, 75, 79, 80, 83, 87, 87, 91, 92, 94, 98, 98, 102, 61, 59, 58, 57,
- 56, 56, 56, 56, 55, 55, 55, 59, 60, 62, 65, 65, 69, 70, 73, 77, 77, 81,
- 82, 85, 89, 89, 93, 94, 97, 101, 101, 105, 65, 63, 62, 61, 59, 59, 59,
- 59, 59, 58, 58, 62, 63, 65, 68, 68, 72, 73, 75, 79, 79, 84, 85, 88, 92,
- 92, 97, 98, 101, 105, 105, 109, 65, 63, 62, 61, 59, 59, 59, 59, 59, 58,
- 58, 62, 63, 65, 68, 68, 72, 73, 75, 79, 79, 84, 85, 88, 92, 92, 97, 98,
- 101, 105, 105, 109, 70, 67, 67, 65, 64, 64, 63, 63, 63, 62, 62, 66, 67,
- 69, 72, 72, 76, 77, 79, 83, 83, 88, 89, 92, 96, 96, 101, 102, 105, 109,
- 109, 114,
- /* Size 4x8 */
- 32, 32, 42, 56, 32, 33, 41, 53, 32, 35, 42, 52, 34, 37, 50, 59, 38, 40,
- 58, 68, 44, 45, 66, 78, 50, 50, 71, 86, 61, 58, 79, 97,
- /* Size 8x4 */
- 32, 32, 32, 34, 38, 44, 50, 61, 32, 33, 35, 37, 40, 45, 50, 58, 42, 41,
- 42, 50, 58, 66, 71, 79, 56, 53, 52, 59, 68, 78, 86, 97,
- /* Size 8x16 */
- 32, 31, 32, 35, 39, 44, 53, 65, 31, 32, 32, 35, 38, 42, 51, 62, 31, 32,
- 33, 34, 37, 41, 49, 59, 31, 32, 34, 35, 38, 42, 49, 59, 32, 32, 34, 36,
- 39, 42, 49, 58, 32, 33, 35, 37, 40, 42, 49, 58, 34, 34, 37, 41, 44, 48,
- 54, 63, 36, 34, 38, 46, 50, 54, 60, 68, 38, 37, 40, 47, 52, 57, 64, 72,
- 41, 39, 41, 49, 54, 60, 67, 76, 44, 41, 43, 51, 57, 63, 71, 79, 48, 45,
- 46, 54, 60, 67, 76, 85, 53, 49, 50, 57, 64, 71, 82, 92, 57, 53, 53, 60,
- 67, 74, 86, 97, 61, 56, 56, 63, 69, 77, 89, 100, 65, 60, 58, 66, 72, 79,
- 92, 105,
- /* Size 16x8 */
- 32, 31, 31, 31, 32, 32, 34, 36, 38, 41, 44, 48, 53, 57, 61, 65, 31, 32,
- 32, 32, 32, 33, 34, 34, 37, 39, 41, 45, 49, 53, 56, 60, 32, 32, 33, 34,
- 34, 35, 37, 38, 40, 41, 43, 46, 50, 53, 56, 58, 35, 35, 34, 35, 36, 37,
- 41, 46, 47, 49, 51, 54, 57, 60, 63, 66, 39, 38, 37, 38, 39, 40, 44, 50,
- 52, 54, 57, 60, 64, 67, 69, 72, 44, 42, 41, 42, 42, 42, 48, 54, 57, 60,
- 63, 67, 71, 74, 77, 79, 53, 51, 49, 49, 49, 49, 54, 60, 64, 67, 71, 76,
- 82, 86, 89, 92, 65, 62, 59, 59, 58, 58, 63, 68, 72, 76, 79, 85, 92, 97,
- 100, 105,
- /* Size 16x32 */
- 32, 31, 31, 31, 32, 32, 35, 36, 39, 44, 44, 51, 53, 58, 65, 65, 31, 32,
- 32, 32, 32, 32, 35, 35, 38, 42, 42, 49, 52, 56, 63, 63, 31, 32, 32, 32,
- 32, 32, 35, 35, 38, 42, 42, 49, 51, 55, 62, 62, 31, 32, 32, 32, 32, 32,
- 34, 35, 37, 41, 41, 48, 50, 54, 61, 61, 31, 32, 32, 32, 33, 33, 34, 34,
- 37, 41, 41, 47, 49, 53, 59, 59, 31, 32, 32, 32, 33, 33, 34, 34, 37, 41,
- 41, 47, 49, 53, 59, 59, 31, 32, 32, 33, 34, 34, 35, 36, 38, 42, 42, 48,
- 49, 53, 59, 59, 32, 32, 32, 33, 34, 34, 36, 36, 38, 42, 42, 48, 50, 53,
- 59, 59, 32, 32, 32, 33, 34, 34, 36, 37, 39, 42, 42, 48, 49, 53, 58, 58,
- 32, 32, 33, 34, 35, 35, 37, 38, 40, 42, 42, 48, 49, 52, 58, 58, 32, 32,
- 33, 34, 35, 35, 37, 38, 40, 42, 42, 48, 49, 52, 58, 58, 33, 33, 33, 35,
- 36, 36, 40, 41, 43, 46, 46, 52, 53, 56, 62, 62, 34, 34, 34, 35, 37, 37,
- 41, 42, 44, 48, 48, 53, 54, 57, 63, 63, 34, 34, 34, 35, 37, 37, 43, 44,
- 46, 50, 50, 55, 56, 59, 65, 65, 36, 35, 34, 36, 38, 38, 46, 48, 50, 54,
- 54, 58, 60, 63, 68, 68, 36, 35, 34, 36, 38, 38, 46, 48, 50, 54, 54, 58,
- 60, 63, 68, 68, 38, 37, 37, 38, 40, 40, 47, 50, 52, 57, 57, 62, 64, 67,
- 72, 72, 39, 38, 37, 39, 40, 40, 48, 50, 53, 58, 58, 63, 65, 68, 73, 73,
- 41, 39, 39, 40, 41, 41, 49, 51, 54, 60, 60, 66, 67, 70, 76, 76, 44, 41,
- 41, 42, 43, 43, 51, 53, 57, 63, 63, 69, 71, 74, 79, 79, 44, 41, 41, 42,
- 43, 43, 51, 53, 57, 63, 63, 69, 71, 74, 79, 79, 47, 44, 44, 44, 45, 45,
- 53, 56, 59, 66, 66, 73, 75, 78, 84, 84, 48, 45, 45, 45, 46, 46, 54, 56,
- 60, 67, 67, 74, 76, 79, 85, 85, 50, 47, 46, 47, 47, 47, 55, 58, 61, 68,
- 68, 76, 78, 82, 88, 88, 53, 50, 49, 50, 50, 50, 57, 60, 64, 71, 71, 79,
- 82, 86, 92, 92, 53, 50, 49, 50, 50, 50, 57, 60, 64, 71, 71, 79, 82, 86,
- 92, 92, 57, 54, 53, 53, 53, 53, 60, 63, 67, 74, 74, 83, 86, 90, 97, 97,
- 58, 55, 54, 54, 54, 54, 61, 63, 68, 75, 75, 84, 87, 91, 98, 98, 61, 57,
- 56, 56, 56, 56, 63, 65, 69, 77, 77, 86, 89, 93, 100, 100, 65, 61, 60,
- 59, 58, 58, 66, 68, 72, 79, 79, 89, 92, 97, 105, 105, 65, 61, 60, 59,
- 58, 58, 66, 68, 72, 79, 79, 89, 92, 97, 105, 105, 70, 65, 64, 63, 62,
- 62, 70, 72, 76, 83, 83, 93, 96, 101, 109, 109,
- /* Size 32x16 */
- 32, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 33, 34, 34, 36, 36, 38, 39,
- 41, 44, 44, 47, 48, 50, 53, 53, 57, 58, 61, 65, 65, 70, 31, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 33, 34, 34, 35, 35, 37, 38, 39, 41, 41, 44,
- 45, 47, 50, 50, 54, 55, 57, 61, 61, 65, 31, 32, 32, 32, 32, 32, 32, 32,
- 32, 33, 33, 33, 34, 34, 34, 34, 37, 37, 39, 41, 41, 44, 45, 46, 49, 49,
- 53, 54, 56, 60, 60, 64, 31, 32, 32, 32, 32, 32, 33, 33, 33, 34, 34, 35,
- 35, 35, 36, 36, 38, 39, 40, 42, 42, 44, 45, 47, 50, 50, 53, 54, 56, 59,
- 59, 63, 32, 32, 32, 32, 33, 33, 34, 34, 34, 35, 35, 36, 37, 37, 38, 38,
- 40, 40, 41, 43, 43, 45, 46, 47, 50, 50, 53, 54, 56, 58, 58, 62, 32, 32,
- 32, 32, 33, 33, 34, 34, 34, 35, 35, 36, 37, 37, 38, 38, 40, 40, 41, 43,
- 43, 45, 46, 47, 50, 50, 53, 54, 56, 58, 58, 62, 35, 35, 35, 34, 34, 34,
- 35, 36, 36, 37, 37, 40, 41, 43, 46, 46, 47, 48, 49, 51, 51, 53, 54, 55,
- 57, 57, 60, 61, 63, 66, 66, 70, 36, 35, 35, 35, 34, 34, 36, 36, 37, 38,
- 38, 41, 42, 44, 48, 48, 50, 50, 51, 53, 53, 56, 56, 58, 60, 60, 63, 63,
- 65, 68, 68, 72, 39, 38, 38, 37, 37, 37, 38, 38, 39, 40, 40, 43, 44, 46,
- 50, 50, 52, 53, 54, 57, 57, 59, 60, 61, 64, 64, 67, 68, 69, 72, 72, 76,
- 44, 42, 42, 41, 41, 41, 42, 42, 42, 42, 42, 46, 48, 50, 54, 54, 57, 58,
- 60, 63, 63, 66, 67, 68, 71, 71, 74, 75, 77, 79, 79, 83, 44, 42, 42, 41,
- 41, 41, 42, 42, 42, 42, 42, 46, 48, 50, 54, 54, 57, 58, 60, 63, 63, 66,
- 67, 68, 71, 71, 74, 75, 77, 79, 79, 83, 51, 49, 49, 48, 47, 47, 48, 48,
- 48, 48, 48, 52, 53, 55, 58, 58, 62, 63, 66, 69, 69, 73, 74, 76, 79, 79,
- 83, 84, 86, 89, 89, 93, 53, 52, 51, 50, 49, 49, 49, 50, 49, 49, 49, 53,
- 54, 56, 60, 60, 64, 65, 67, 71, 71, 75, 76, 78, 82, 82, 86, 87, 89, 92,
- 92, 96, 58, 56, 55, 54, 53, 53, 53, 53, 53, 52, 52, 56, 57, 59, 63, 63,
- 67, 68, 70, 74, 74, 78, 79, 82, 86, 86, 90, 91, 93, 97, 97, 101, 65, 63,
- 62, 61, 59, 59, 59, 59, 58, 58, 58, 62, 63, 65, 68, 68, 72, 73, 76, 79,
- 79, 84, 85, 88, 92, 92, 97, 98, 100, 105, 105, 109, 65, 63, 62, 61, 59,
- 59, 59, 59, 58, 58, 58, 62, 63, 65, 68, 68, 72, 73, 76, 79, 79, 84, 85,
- 88, 92, 92, 97, 98, 100, 105, 105, 109,
- /* Size 4x16 */
- 31, 32, 44, 58, 32, 32, 42, 55, 32, 33, 41, 53, 32, 34, 42, 53, 32, 34,
- 42, 53, 32, 35, 42, 52, 34, 37, 48, 57, 35, 38, 54, 63, 37, 40, 57, 67,
- 39, 41, 60, 70, 41, 43, 63, 74, 45, 46, 67, 79, 50, 50, 71, 86, 54, 53,
- 74, 90, 57, 56, 77, 93, 61, 58, 79, 97,
- /* Size 16x4 */
- 31, 32, 32, 32, 32, 32, 34, 35, 37, 39, 41, 45, 50, 54, 57, 61, 32, 32,
- 33, 34, 34, 35, 37, 38, 40, 41, 43, 46, 50, 53, 56, 58, 44, 42, 41, 42,
- 42, 42, 48, 54, 57, 60, 63, 67, 71, 74, 77, 79, 58, 55, 53, 53, 53, 52,
- 57, 63, 67, 70, 74, 79, 86, 90, 93, 97,
- /* Size 8x32 */
- 32, 31, 32, 35, 39, 44, 53, 65, 31, 32, 32, 35, 38, 42, 52, 63, 31, 32,
- 32, 35, 38, 42, 51, 62, 31, 32, 32, 34, 37, 41, 50, 61, 31, 32, 33, 34,
- 37, 41, 49, 59, 31, 32, 33, 34, 37, 41, 49, 59, 31, 32, 34, 35, 38, 42,
- 49, 59, 32, 32, 34, 36, 38, 42, 50, 59, 32, 32, 34, 36, 39, 42, 49, 58,
- 32, 33, 35, 37, 40, 42, 49, 58, 32, 33, 35, 37, 40, 42, 49, 58, 33, 33,
- 36, 40, 43, 46, 53, 62, 34, 34, 37, 41, 44, 48, 54, 63, 34, 34, 37, 43,
- 46, 50, 56, 65, 36, 34, 38, 46, 50, 54, 60, 68, 36, 34, 38, 46, 50, 54,
- 60, 68, 38, 37, 40, 47, 52, 57, 64, 72, 39, 37, 40, 48, 53, 58, 65, 73,
- 41, 39, 41, 49, 54, 60, 67, 76, 44, 41, 43, 51, 57, 63, 71, 79, 44, 41,
- 43, 51, 57, 63, 71, 79, 47, 44, 45, 53, 59, 66, 75, 84, 48, 45, 46, 54,
- 60, 67, 76, 85, 50, 46, 47, 55, 61, 68, 78, 88, 53, 49, 50, 57, 64, 71,
- 82, 92, 53, 49, 50, 57, 64, 71, 82, 92, 57, 53, 53, 60, 67, 74, 86, 97,
- 58, 54, 54, 61, 68, 75, 87, 98, 61, 56, 56, 63, 69, 77, 89, 100, 65, 60,
- 58, 66, 72, 79, 92, 105, 65, 60, 58, 66, 72, 79, 92, 105, 70, 64, 62,
- 70, 76, 83, 96, 109,
- /* Size 32x8 */
- 32, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 33, 34, 34, 36, 36, 38, 39,
- 41, 44, 44, 47, 48, 50, 53, 53, 57, 58, 61, 65, 65, 70, 31, 32, 32, 32,
- 32, 32, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34, 37, 37, 39, 41, 41, 44,
- 45, 46, 49, 49, 53, 54, 56, 60, 60, 64, 32, 32, 32, 32, 33, 33, 34, 34,
- 34, 35, 35, 36, 37, 37, 38, 38, 40, 40, 41, 43, 43, 45, 46, 47, 50, 50,
- 53, 54, 56, 58, 58, 62, 35, 35, 35, 34, 34, 34, 35, 36, 36, 37, 37, 40,
- 41, 43, 46, 46, 47, 48, 49, 51, 51, 53, 54, 55, 57, 57, 60, 61, 63, 66,
- 66, 70, 39, 38, 38, 37, 37, 37, 38, 38, 39, 40, 40, 43, 44, 46, 50, 50,
- 52, 53, 54, 57, 57, 59, 60, 61, 64, 64, 67, 68, 69, 72, 72, 76, 44, 42,
- 42, 41, 41, 41, 42, 42, 42, 42, 42, 46, 48, 50, 54, 54, 57, 58, 60, 63,
- 63, 66, 67, 68, 71, 71, 74, 75, 77, 79, 79, 83, 53, 52, 51, 50, 49, 49,
- 49, 50, 49, 49, 49, 53, 54, 56, 60, 60, 64, 65, 67, 71, 71, 75, 76, 78,
- 82, 82, 86, 87, 89, 92, 92, 96, 65, 63, 62, 61, 59, 59, 59, 59, 58, 58,
- 58, 62, 63, 65, 68, 68, 72, 73, 76, 79, 79, 84, 85, 88, 92, 92, 97, 98,
- 100, 105, 105, 109 },
- { /* Chroma */
- /* Size 4x4 */
- 31, 41, 46, 51, 41, 48, 48, 51, 46, 48, 58, 62, 51, 51, 62, 71,
- /* Size 8x8 */
- 31, 31, 38, 44, 47, 48, 50, 55, 31, 32, 40, 44, 45, 46, 47, 52, 38, 40,
- 47, 47, 46, 46, 47, 50, 44, 44, 47, 50, 51, 51, 52, 54, 47, 45, 46, 51,
- 54, 56, 57, 60, 48, 46, 46, 51, 56, 61, 63, 66, 50, 47, 47, 52, 57, 63,
- 66, 70, 55, 52, 50, 54, 60, 66, 70, 76,
- /* Size 16x16 */
- 32, 31, 30, 33, 34, 36, 41, 49, 48, 49, 49, 50, 52, 54, 55, 57, 31, 31,
- 31, 34, 36, 38, 42, 47, 47, 47, 47, 48, 50, 51, 53, 54, 30, 31, 32, 34,
- 37, 40, 42, 46, 45, 45, 45, 46, 47, 49, 50, 52, 33, 34, 34, 37, 40, 42,
- 44, 47, 46, 46, 45, 46, 47, 49, 50, 51, 34, 36, 37, 40, 42, 45, 46, 47,
- 46, 46, 45, 46, 47, 48, 49, 50, 36, 38, 40, 42, 45, 47, 47, 48, 47, 46,
- 45, 46, 47, 48, 49, 50, 41, 42, 42, 44, 46, 47, 48, 50, 50, 49, 49, 50,
- 50, 51, 52, 53, 49, 47, 46, 47, 47, 48, 50, 53, 53, 53, 53, 54, 54, 55,
- 56, 56, 48, 47, 45, 46, 46, 47, 50, 53, 54, 54, 55, 56, 57, 58, 58, 59,
- 49, 47, 45, 46, 46, 46, 49, 53, 54, 55, 57, 58, 59, 60, 60, 61, 49, 47,
- 45, 45, 45, 45, 49, 53, 55, 57, 58, 60, 61, 62, 63, 63, 50, 48, 46, 46,
- 46, 46, 50, 54, 56, 58, 60, 61, 63, 65, 66, 67, 52, 50, 47, 47, 47, 47,
- 50, 54, 57, 59, 61, 63, 66, 68, 69, 70, 54, 51, 49, 49, 48, 48, 51, 55,
- 58, 60, 62, 65, 68, 70, 71, 73, 55, 53, 50, 50, 49, 49, 52, 56, 58, 60,
- 63, 66, 69, 71, 73, 74, 57, 54, 52, 51, 50, 50, 53, 56, 59, 61, 63, 67,
- 70, 73, 74, 76,
- /* Size 32x32 */
- 32, 31, 31, 31, 30, 30, 33, 33, 34, 36, 36, 40, 41, 44, 49, 49, 48, 48,
- 49, 49, 49, 50, 50, 51, 52, 52, 54, 54, 55, 57, 57, 59, 31, 31, 31, 31,
- 31, 31, 33, 34, 36, 38, 38, 41, 42, 44, 48, 48, 47, 47, 47, 47, 47, 48,
- 49, 49, 50, 50, 52, 52, 53, 55, 55, 57, 31, 31, 31, 31, 31, 31, 34, 34,
- 36, 38, 38, 41, 42, 44, 47, 47, 47, 47, 47, 47, 47, 48, 48, 49, 50, 50,
- 51, 52, 53, 54, 54, 56, 31, 31, 31, 31, 31, 31, 34, 35, 36, 39, 39, 41,
- 42, 44, 47, 47, 46, 46, 46, 46, 46, 47, 47, 48, 49, 49, 50, 51, 52, 53,
- 53, 55, 30, 31, 31, 31, 32, 32, 34, 35, 37, 40, 40, 42, 42, 44, 46, 46,
- 45, 45, 45, 45, 45, 46, 46, 47, 47, 47, 49, 49, 50, 52, 52, 54, 30, 31,
- 31, 31, 32, 32, 34, 35, 37, 40, 40, 42, 42, 44, 46, 46, 45, 45, 45, 45,
- 45, 46, 46, 47, 47, 47, 49, 49, 50, 52, 52, 54, 33, 33, 34, 34, 34, 34,
- 37, 38, 40, 42, 42, 44, 44, 45, 47, 47, 46, 46, 46, 45, 45, 46, 46, 47,
- 47, 47, 49, 49, 50, 51, 51, 53, 33, 34, 34, 35, 35, 35, 38, 39, 40, 43,
- 43, 44, 45, 46, 47, 47, 46, 46, 46, 45, 45, 46, 46, 47, 47, 47, 49, 49,
- 50, 51, 51, 53, 34, 36, 36, 36, 37, 37, 40, 40, 42, 45, 45, 45, 46, 46,
- 47, 47, 46, 46, 46, 45, 45, 46, 46, 47, 47, 47, 48, 49, 49, 50, 50, 52,
- 36, 38, 38, 39, 40, 40, 42, 43, 45, 47, 47, 47, 47, 47, 48, 48, 47, 46,
- 46, 45, 45, 46, 46, 46, 47, 47, 48, 48, 49, 50, 50, 51, 36, 38, 38, 39,
- 40, 40, 42, 43, 45, 47, 47, 47, 47, 47, 48, 48, 47, 46, 46, 45, 45, 46,
- 46, 46, 47, 47, 48, 48, 49, 50, 50, 51, 40, 41, 41, 41, 42, 42, 44, 44,
- 45, 47, 47, 48, 48, 49, 50, 50, 49, 49, 49, 48, 48, 49, 49, 49, 49, 49,
- 51, 51, 51, 52, 52, 54, 41, 42, 42, 42, 42, 42, 44, 45, 46, 47, 47, 48,
- 48, 49, 50, 50, 50, 49, 49, 49, 49, 50, 50, 50, 50, 50, 51, 52, 52, 53,
- 53, 55, 44, 44, 44, 44, 44, 44, 45, 46, 46, 47, 47, 49, 49, 50, 51, 51,
- 51, 51, 51, 51, 51, 51, 51, 51, 52, 52, 53, 53, 54, 54, 54, 56, 49, 48,
- 47, 47, 46, 46, 47, 47, 47, 48, 48, 50, 50, 51, 53, 53, 53, 53, 53, 53,
- 53, 54, 54, 54, 54, 54, 55, 55, 56, 56, 56, 58, 49, 48, 47, 47, 46, 46,
- 47, 47, 47, 48, 48, 50, 50, 51, 53, 53, 53, 53, 53, 53, 53, 54, 54, 54,
- 54, 54, 55, 55, 56, 56, 56, 58, 48, 47, 47, 46, 45, 45, 46, 46, 46, 47,
- 47, 49, 50, 51, 53, 53, 54, 54, 54, 55, 55, 56, 56, 56, 57, 57, 58, 58,
- 58, 59, 59, 60, 48, 47, 47, 46, 45, 45, 46, 46, 46, 46, 46, 49, 49, 51,
- 53, 53, 54, 54, 55, 55, 55, 56, 56, 57, 57, 57, 58, 58, 59, 60, 60, 61,
- 49, 47, 47, 46, 45, 45, 46, 46, 46, 46, 46, 49, 49, 51, 53, 53, 54, 55,
- 55, 57, 57, 57, 58, 58, 59, 59, 60, 60, 60, 61, 61, 63, 49, 47, 47, 46,
- 45, 45, 45, 45, 45, 45, 45, 48, 49, 51, 53, 53, 55, 55, 57, 58, 58, 59,
- 60, 60, 61, 61, 62, 62, 63, 63, 63, 65, 49, 47, 47, 46, 45, 45, 45, 45,
- 45, 45, 45, 48, 49, 51, 53, 53, 55, 55, 57, 58, 58, 59, 60, 60, 61, 61,
- 62, 62, 63, 63, 63, 65, 50, 48, 48, 47, 46, 46, 46, 46, 46, 46, 46, 49,
- 50, 51, 54, 54, 56, 56, 57, 59, 59, 61, 61, 62, 63, 63, 64, 64, 65, 66,
- 66, 67, 50, 49, 48, 47, 46, 46, 46, 46, 46, 46, 46, 49, 50, 51, 54, 54,
- 56, 56, 58, 60, 60, 61, 61, 62, 63, 63, 65, 65, 66, 67, 67, 68, 51, 49,
- 49, 48, 47, 47, 47, 47, 47, 46, 46, 49, 50, 51, 54, 54, 56, 57, 58, 60,
- 60, 62, 62, 63, 65, 65, 66, 66, 67, 68, 68, 70, 52, 50, 50, 49, 47, 47,
- 47, 47, 47, 47, 47, 49, 50, 52, 54, 54, 57, 57, 59, 61, 61, 63, 63, 65,
- 66, 66, 68, 68, 69, 70, 70, 72, 52, 50, 50, 49, 47, 47, 47, 47, 47, 47,
- 47, 49, 50, 52, 54, 54, 57, 57, 59, 61, 61, 63, 63, 65, 66, 66, 68, 68,
- 69, 70, 70, 72, 54, 52, 51, 50, 49, 49, 49, 49, 48, 48, 48, 51, 51, 53,
- 55, 55, 58, 58, 60, 62, 62, 64, 65, 66, 68, 68, 70, 70, 71, 73, 73, 74,
- 54, 52, 52, 51, 49, 49, 49, 49, 49, 48, 48, 51, 52, 53, 55, 55, 58, 58,
- 60, 62, 62, 64, 65, 66, 68, 68, 70, 71, 72, 73, 73, 75, 55, 53, 53, 52,
- 50, 50, 50, 50, 49, 49, 49, 51, 52, 54, 56, 56, 58, 59, 60, 63, 63, 65,
- 66, 67, 69, 69, 71, 72, 73, 74, 74, 76, 57, 55, 54, 53, 52, 52, 51, 51,
- 50, 50, 50, 52, 53, 54, 56, 56, 59, 60, 61, 63, 63, 66, 67, 68, 70, 70,
- 73, 73, 74, 76, 76, 78, 57, 55, 54, 53, 52, 52, 51, 51, 50, 50, 50, 52,
- 53, 54, 56, 56, 59, 60, 61, 63, 63, 66, 67, 68, 70, 70, 73, 73, 74, 76,
- 76, 78, 59, 57, 56, 55, 54, 54, 53, 53, 52, 51, 51, 54, 55, 56, 58, 58,
- 60, 61, 63, 65, 65, 67, 68, 70, 72, 72, 74, 75, 76, 78, 78, 80,
- /* Size 4x8 */
- 31, 38, 47, 52, 32, 40, 45, 49, 39, 47, 45, 48, 44, 47, 51, 53, 46, 47,
- 56, 58, 47, 46, 59, 64, 48, 47, 61, 68, 53, 50, 64, 73,
- /* Size 8x4 */
- 31, 32, 39, 44, 46, 47, 48, 53, 38, 40, 47, 47, 47, 46, 47, 50, 47, 45,
- 45, 51, 56, 59, 61, 64, 52, 49, 48, 53, 58, 64, 68, 73,
- /* Size 8x16 */
- 32, 31, 37, 45, 48, 49, 52, 57, 31, 31, 38, 45, 47, 47, 50, 54, 30, 32,
- 40, 44, 45, 45, 48, 52, 33, 35, 42, 46, 46, 45, 47, 51, 35, 37, 44, 46,
- 46, 45, 47, 51, 37, 40, 47, 47, 47, 45, 47, 50, 42, 43, 47, 49, 50, 49,
- 50, 53, 49, 46, 48, 52, 53, 53, 54, 57, 48, 46, 47, 51, 54, 55, 57, 59,
- 48, 45, 46, 51, 54, 57, 59, 61, 49, 45, 46, 51, 55, 58, 61, 64, 50, 46,
- 46, 52, 56, 59, 64, 67, 52, 48, 47, 53, 57, 61, 66, 71, 54, 49, 48, 54,
- 58, 62, 68, 73, 55, 51, 49, 54, 58, 63, 69, 74, 57, 52, 50, 55, 59, 64,
- 70, 76,
- /* Size 16x8 */
- 32, 31, 30, 33, 35, 37, 42, 49, 48, 48, 49, 50, 52, 54, 55, 57, 31, 31,
- 32, 35, 37, 40, 43, 46, 46, 45, 45, 46, 48, 49, 51, 52, 37, 38, 40, 42,
- 44, 47, 47, 48, 47, 46, 46, 46, 47, 48, 49, 50, 45, 45, 44, 46, 46, 47,
- 49, 52, 51, 51, 51, 52, 53, 54, 54, 55, 48, 47, 45, 46, 46, 47, 50, 53,
- 54, 54, 55, 56, 57, 58, 58, 59, 49, 47, 45, 45, 45, 45, 49, 53, 55, 57,
- 58, 59, 61, 62, 63, 64, 52, 50, 48, 47, 47, 47, 50, 54, 57, 59, 61, 64,
- 66, 68, 69, 70, 57, 54, 52, 51, 51, 50, 53, 57, 59, 61, 64, 67, 71, 73,
- 74, 76,
- /* Size 16x32 */
- 32, 31, 31, 33, 37, 37, 45, 48, 48, 49, 49, 51, 52, 54, 57, 57, 31, 31,
- 31, 34, 38, 38, 45, 47, 47, 47, 47, 50, 50, 52, 55, 55, 31, 31, 31, 34,
- 38, 38, 45, 47, 47, 47, 47, 49, 50, 51, 54, 54, 31, 31, 32, 34, 39, 39,
- 45, 46, 46, 46, 46, 48, 49, 51, 53, 53, 30, 32, 32, 35, 40, 40, 44, 46,
- 45, 45, 45, 47, 48, 49, 52, 52, 30, 32, 32, 35, 40, 40, 44, 46, 45, 45,
- 45, 47, 48, 49, 52, 52, 33, 34, 35, 37, 42, 42, 46, 47, 46, 45, 45, 47,
- 47, 49, 51, 51, 33, 35, 36, 38, 43, 43, 46, 47, 46, 46, 46, 47, 47, 49,
- 51, 51, 35, 37, 37, 40, 44, 44, 46, 47, 46, 45, 45, 47, 47, 48, 51, 51,
- 37, 39, 40, 43, 47, 47, 47, 47, 47, 45, 45, 46, 47, 48, 50, 50, 37, 39,
- 40, 43, 47, 47, 47, 47, 47, 45, 45, 46, 47, 48, 50, 50, 41, 42, 42, 44,
- 47, 47, 49, 49, 49, 48, 48, 49, 50, 51, 52, 52, 42, 42, 43, 44, 47, 47,
- 49, 50, 50, 49, 49, 50, 50, 51, 53, 53, 44, 44, 44, 45, 47, 47, 50, 51,
- 51, 51, 51, 52, 52, 53, 54, 54, 49, 47, 46, 47, 48, 48, 52, 53, 53, 53,
- 53, 54, 54, 55, 57, 57, 49, 47, 46, 47, 48, 48, 52, 53, 53, 53, 53, 54,
- 54, 55, 57, 57, 48, 46, 46, 46, 47, 47, 51, 53, 54, 55, 55, 56, 57, 58,
- 59, 59, 48, 46, 46, 46, 47, 47, 51, 53, 54, 56, 56, 57, 57, 58, 60, 60,
- 48, 46, 45, 46, 46, 46, 51, 53, 54, 57, 57, 58, 59, 60, 61, 61, 49, 46,
- 45, 45, 46, 46, 51, 53, 55, 58, 58, 61, 61, 62, 64, 64, 49, 46, 45, 45,
- 46, 46, 51, 53, 55, 58, 58, 61, 61, 62, 64, 64, 50, 47, 46, 46, 46, 46,
- 52, 54, 56, 59, 59, 62, 63, 64, 66, 66, 50, 47, 46, 46, 46, 46, 52, 54,
- 56, 59, 59, 63, 64, 65, 67, 67, 51, 48, 47, 47, 47, 47, 52, 54, 56, 60,
- 60, 64, 65, 66, 68, 68, 52, 48, 48, 47, 47, 47, 53, 54, 57, 61, 61, 65,
- 66, 68, 71, 71, 52, 48, 48, 47, 47, 47, 53, 54, 57, 61, 61, 65, 66, 68,
- 71, 71, 54, 50, 49, 49, 48, 48, 54, 55, 58, 62, 62, 67, 68, 70, 73, 73,
- 54, 51, 50, 49, 49, 49, 54, 55, 58, 62, 62, 67, 68, 70, 73, 73, 55, 51,
- 51, 50, 49, 49, 54, 56, 58, 63, 63, 68, 69, 71, 74, 74, 57, 53, 52, 51,
- 50, 50, 55, 56, 59, 64, 64, 69, 70, 73, 76, 76, 57, 53, 52, 51, 50, 50,
- 55, 56, 59, 64, 64, 69, 70, 73, 76, 76, 59, 55, 54, 53, 52, 52, 57, 58,
- 61, 65, 65, 70, 72, 74, 78, 78,
- /* Size 32x16 */
- 32, 31, 31, 31, 30, 30, 33, 33, 35, 37, 37, 41, 42, 44, 49, 49, 48, 48,
- 48, 49, 49, 50, 50, 51, 52, 52, 54, 54, 55, 57, 57, 59, 31, 31, 31, 31,
- 32, 32, 34, 35, 37, 39, 39, 42, 42, 44, 47, 47, 46, 46, 46, 46, 46, 47,
- 47, 48, 48, 48, 50, 51, 51, 53, 53, 55, 31, 31, 31, 32, 32, 32, 35, 36,
- 37, 40, 40, 42, 43, 44, 46, 46, 46, 46, 45, 45, 45, 46, 46, 47, 48, 48,
- 49, 50, 51, 52, 52, 54, 33, 34, 34, 34, 35, 35, 37, 38, 40, 43, 43, 44,
- 44, 45, 47, 47, 46, 46, 46, 45, 45, 46, 46, 47, 47, 47, 49, 49, 50, 51,
- 51, 53, 37, 38, 38, 39, 40, 40, 42, 43, 44, 47, 47, 47, 47, 47, 48, 48,
- 47, 47, 46, 46, 46, 46, 46, 47, 47, 47, 48, 49, 49, 50, 50, 52, 37, 38,
- 38, 39, 40, 40, 42, 43, 44, 47, 47, 47, 47, 47, 48, 48, 47, 47, 46, 46,
- 46, 46, 46, 47, 47, 47, 48, 49, 49, 50, 50, 52, 45, 45, 45, 45, 44, 44,
- 46, 46, 46, 47, 47, 49, 49, 50, 52, 52, 51, 51, 51, 51, 51, 52, 52, 52,
- 53, 53, 54, 54, 54, 55, 55, 57, 48, 47, 47, 46, 46, 46, 47, 47, 47, 47,
- 47, 49, 50, 51, 53, 53, 53, 53, 53, 53, 53, 54, 54, 54, 54, 54, 55, 55,
- 56, 56, 56, 58, 48, 47, 47, 46, 45, 45, 46, 46, 46, 47, 47, 49, 50, 51,
- 53, 53, 54, 54, 54, 55, 55, 56, 56, 56, 57, 57, 58, 58, 58, 59, 59, 61,
- 49, 47, 47, 46, 45, 45, 45, 46, 45, 45, 45, 48, 49, 51, 53, 53, 55, 56,
- 57, 58, 58, 59, 59, 60, 61, 61, 62, 62, 63, 64, 64, 65, 49, 47, 47, 46,
- 45, 45, 45, 46, 45, 45, 45, 48, 49, 51, 53, 53, 55, 56, 57, 58, 58, 59,
- 59, 60, 61, 61, 62, 62, 63, 64, 64, 65, 51, 50, 49, 48, 47, 47, 47, 47,
- 47, 46, 46, 49, 50, 52, 54, 54, 56, 57, 58, 61, 61, 62, 63, 64, 65, 65,
- 67, 67, 68, 69, 69, 70, 52, 50, 50, 49, 48, 48, 47, 47, 47, 47, 47, 50,
- 50, 52, 54, 54, 57, 57, 59, 61, 61, 63, 64, 65, 66, 66, 68, 68, 69, 70,
- 70, 72, 54, 52, 51, 51, 49, 49, 49, 49, 48, 48, 48, 51, 51, 53, 55, 55,
- 58, 58, 60, 62, 62, 64, 65, 66, 68, 68, 70, 70, 71, 73, 73, 74, 57, 55,
- 54, 53, 52, 52, 51, 51, 51, 50, 50, 52, 53, 54, 57, 57, 59, 60, 61, 64,
- 64, 66, 67, 68, 71, 71, 73, 73, 74, 76, 76, 78, 57, 55, 54, 53, 52, 52,
- 51, 51, 51, 50, 50, 52, 53, 54, 57, 57, 59, 60, 61, 64, 64, 66, 67, 68,
- 71, 71, 73, 73, 74, 76, 76, 78,
- /* Size 4x16 */
- 31, 37, 49, 54, 31, 38, 47, 51, 32, 40, 45, 49, 34, 42, 45, 49, 37, 44,
- 45, 48, 39, 47, 45, 48, 42, 47, 49, 51, 47, 48, 53, 55, 46, 47, 55, 58,
- 46, 46, 57, 60, 46, 46, 58, 62, 47, 46, 59, 65, 48, 47, 61, 68, 50, 48,
- 62, 70, 51, 49, 63, 71, 53, 50, 64, 73,
- /* Size 16x4 */
- 31, 31, 32, 34, 37, 39, 42, 47, 46, 46, 46, 47, 48, 50, 51, 53, 37, 38,
- 40, 42, 44, 47, 47, 48, 47, 46, 46, 46, 47, 48, 49, 50, 49, 47, 45, 45,
- 45, 45, 49, 53, 55, 57, 58, 59, 61, 62, 63, 64, 54, 51, 49, 49, 48, 48,
- 51, 55, 58, 60, 62, 65, 68, 70, 71, 73,
- /* Size 8x32 */
- 32, 31, 37, 45, 48, 49, 52, 57, 31, 31, 38, 45, 47, 47, 50, 55, 31, 31,
- 38, 45, 47, 47, 50, 54, 31, 32, 39, 45, 46, 46, 49, 53, 30, 32, 40, 44,
- 45, 45, 48, 52, 30, 32, 40, 44, 45, 45, 48, 52, 33, 35, 42, 46, 46, 45,
- 47, 51, 33, 36, 43, 46, 46, 46, 47, 51, 35, 37, 44, 46, 46, 45, 47, 51,
- 37, 40, 47, 47, 47, 45, 47, 50, 37, 40, 47, 47, 47, 45, 47, 50, 41, 42,
- 47, 49, 49, 48, 50, 52, 42, 43, 47, 49, 50, 49, 50, 53, 44, 44, 47, 50,
- 51, 51, 52, 54, 49, 46, 48, 52, 53, 53, 54, 57, 49, 46, 48, 52, 53, 53,
- 54, 57, 48, 46, 47, 51, 54, 55, 57, 59, 48, 46, 47, 51, 54, 56, 57, 60,
- 48, 45, 46, 51, 54, 57, 59, 61, 49, 45, 46, 51, 55, 58, 61, 64, 49, 45,
- 46, 51, 55, 58, 61, 64, 50, 46, 46, 52, 56, 59, 63, 66, 50, 46, 46, 52,
- 56, 59, 64, 67, 51, 47, 47, 52, 56, 60, 65, 68, 52, 48, 47, 53, 57, 61,
- 66, 71, 52, 48, 47, 53, 57, 61, 66, 71, 54, 49, 48, 54, 58, 62, 68, 73,
- 54, 50, 49, 54, 58, 62, 68, 73, 55, 51, 49, 54, 58, 63, 69, 74, 57, 52,
- 50, 55, 59, 64, 70, 76, 57, 52, 50, 55, 59, 64, 70, 76, 59, 54, 52, 57,
- 61, 65, 72, 78,
- /* Size 32x8 */
- 32, 31, 31, 31, 30, 30, 33, 33, 35, 37, 37, 41, 42, 44, 49, 49, 48, 48,
- 48, 49, 49, 50, 50, 51, 52, 52, 54, 54, 55, 57, 57, 59, 31, 31, 31, 32,
- 32, 32, 35, 36, 37, 40, 40, 42, 43, 44, 46, 46, 46, 46, 45, 45, 45, 46,
- 46, 47, 48, 48, 49, 50, 51, 52, 52, 54, 37, 38, 38, 39, 40, 40, 42, 43,
- 44, 47, 47, 47, 47, 47, 48, 48, 47, 47, 46, 46, 46, 46, 46, 47, 47, 47,
- 48, 49, 49, 50, 50, 52, 45, 45, 45, 45, 44, 44, 46, 46, 46, 47, 47, 49,
- 49, 50, 52, 52, 51, 51, 51, 51, 51, 52, 52, 52, 53, 53, 54, 54, 54, 55,
- 55, 57, 48, 47, 47, 46, 45, 45, 46, 46, 46, 47, 47, 49, 50, 51, 53, 53,
- 54, 54, 54, 55, 55, 56, 56, 56, 57, 57, 58, 58, 58, 59, 59, 61, 49, 47,
- 47, 46, 45, 45, 45, 46, 45, 45, 45, 48, 49, 51, 53, 53, 55, 56, 57, 58,
- 58, 59, 59, 60, 61, 61, 62, 62, 63, 64, 64, 65, 52, 50, 50, 49, 48, 48,
- 47, 47, 47, 47, 47, 50, 50, 52, 54, 54, 57, 57, 59, 61, 61, 63, 64, 65,
- 66, 66, 68, 68, 69, 70, 70, 72, 57, 55, 54, 53, 52, 52, 51, 51, 51, 50,
- 50, 52, 53, 54, 57, 57, 59, 60, 61, 64, 64, 66, 67, 68, 71, 71, 73, 73,
- 74, 76, 76, 78 },
- },
- {
- { /* Luma */
- /* Size 4x4 */
- 32, 32, 38, 51, 32, 35, 40, 49, 38, 40, 54, 64, 51, 49, 64, 81,
- /* Size 8x8 */
- 31, 32, 32, 34, 35, 41, 47, 53, 32, 32, 32, 33, 34, 40, 44, 50, 32, 32,
- 34, 35, 37, 41, 45, 51, 34, 33, 35, 39, 42, 47, 51, 55, 35, 34, 37, 42,
- 48, 53, 57, 61, 41, 40, 41, 47, 53, 60, 65, 70, 47, 44, 45, 51, 57, 65,
- 71, 77, 53, 50, 51, 55, 61, 70, 77, 85,
- /* Size 16x16 */
- 32, 31, 31, 31, 31, 32, 32, 34, 36, 38, 39, 44, 47, 49, 54, 59, 31, 32,
- 32, 32, 32, 32, 33, 34, 35, 37, 38, 42, 45, 47, 51, 56, 31, 32, 32, 32,
- 32, 32, 33, 33, 34, 36, 37, 41, 44, 46, 50, 54, 31, 32, 32, 32, 32, 33,
- 33, 34, 35, 36, 38, 41, 44, 45, 49, 54, 31, 32, 32, 32, 33, 34, 34, 35,
- 36, 38, 39, 42, 45, 46, 50, 54, 32, 32, 32, 33, 34, 35, 36, 37, 38, 39,
- 40, 42, 45, 46, 49, 53, 32, 33, 33, 33, 34, 36, 36, 38, 40, 41, 42, 44,
- 47, 48, 51, 55, 34, 34, 33, 34, 35, 37, 38, 39, 42, 44, 45, 47, 50, 51,
- 54, 58, 36, 35, 34, 35, 36, 38, 40, 42, 48, 50, 50, 54, 56, 57, 60, 64,
- 38, 37, 36, 36, 38, 39, 41, 44, 50, 51, 52, 56, 58, 60, 63, 67, 39, 38,
- 37, 38, 39, 40, 42, 45, 50, 52, 54, 58, 60, 62, 65, 69, 44, 42, 41, 41,
- 42, 42, 44, 47, 54, 56, 58, 63, 66, 68, 71, 75, 47, 45, 44, 44, 45, 45,
- 47, 50, 56, 58, 60, 66, 69, 71, 75, 79, 49, 47, 46, 45, 46, 46, 48, 51,
- 57, 60, 62, 68, 71, 73, 77, 81, 54, 51, 50, 49, 50, 49, 51, 54, 60, 63,
- 65, 71, 75, 77, 82, 87, 59, 56, 54, 54, 54, 53, 55, 58, 64, 67, 69, 75,
- 79, 81, 87, 92,
- /* Size 32x32 */
- 32, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 34, 34, 35, 36, 36,
- 38, 39, 39, 42, 44, 44, 47, 48, 49, 53, 54, 55, 59, 59, 31, 31, 31, 31,
- 32, 32, 32, 32, 32, 32, 32, 32, 33, 34, 34, 34, 35, 35, 37, 39, 39, 41,
- 43, 43, 46, 47, 48, 51, 52, 53, 57, 57, 31, 31, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 33, 34, 34, 34, 35, 35, 37, 38, 38, 41, 42, 43, 45, 46,
- 47, 51, 51, 53, 56, 56, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 33, 34, 34, 34, 35, 35, 37, 38, 38, 41, 42, 42, 45, 46, 47, 51, 51, 52,
- 56, 56, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 34,
- 34, 34, 36, 37, 37, 40, 41, 41, 44, 45, 46, 49, 50, 51, 54, 54, 31, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 34, 34, 34, 36, 37,
- 37, 40, 41, 41, 44, 44, 45, 49, 49, 50, 54, 54, 31, 32, 32, 32, 32, 32,
- 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 35, 35, 36, 38, 38, 40, 41, 41,
- 44, 45, 45, 49, 49, 50, 54, 54, 31, 32, 32, 32, 32, 32, 32, 33, 33, 33,
- 34, 34, 34, 35, 35, 35, 36, 36, 38, 39, 39, 41, 42, 42, 44, 45, 46, 49,
- 50, 51, 54, 54, 31, 32, 32, 32, 32, 32, 32, 33, 33, 33, 34, 34, 34, 35,
- 35, 36, 36, 36, 38, 39, 39, 41, 42, 42, 45, 45, 46, 49, 50, 51, 54, 54,
- 32, 32, 32, 32, 32, 32, 33, 33, 33, 34, 34, 34, 35, 35, 35, 36, 37, 37,
- 38, 39, 39, 41, 42, 42, 45, 45, 46, 49, 49, 51, 54, 54, 32, 32, 32, 32,
- 32, 32, 33, 34, 34, 34, 35, 35, 36, 37, 37, 37, 38, 38, 39, 40, 40, 42,
- 42, 43, 45, 46, 46, 49, 49, 50, 53, 53, 32, 32, 32, 32, 32, 32, 33, 34,
- 34, 34, 35, 35, 36, 37, 37, 37, 38, 38, 39, 40, 40, 42, 42, 43, 45, 46,
- 46, 49, 49, 50, 53, 53, 32, 33, 33, 33, 33, 33, 33, 34, 34, 35, 36, 36,
- 36, 38, 38, 39, 40, 40, 41, 42, 42, 44, 44, 45, 47, 47, 48, 51, 51, 52,
- 55, 55, 34, 34, 34, 34, 33, 33, 34, 35, 35, 35, 37, 37, 38, 39, 39, 41,
- 42, 42, 44, 45, 45, 47, 47, 48, 50, 51, 51, 54, 54, 55, 58, 58, 34, 34,
- 34, 34, 33, 33, 34, 35, 35, 35, 37, 37, 38, 39, 39, 41, 42, 42, 44, 45,
- 45, 47, 47, 48, 50, 51, 51, 54, 54, 55, 58, 58, 35, 34, 34, 34, 34, 34,
- 34, 35, 36, 36, 37, 37, 39, 41, 41, 43, 45, 45, 47, 47, 47, 49, 50, 51,
- 53, 53, 54, 57, 57, 58, 61, 61, 36, 35, 35, 35, 34, 34, 35, 36, 36, 37,
- 38, 38, 40, 42, 42, 45, 48, 48, 50, 50, 50, 53, 54, 54, 56, 57, 57, 59,
- 60, 61, 64, 64, 36, 35, 35, 35, 34, 34, 35, 36, 36, 37, 38, 38, 40, 42,
- 42, 45, 48, 48, 50, 50, 50, 53, 54, 54, 56, 57, 57, 59, 60, 61, 64, 64,
- 38, 37, 37, 37, 36, 36, 36, 38, 38, 38, 39, 39, 41, 44, 44, 47, 50, 50,
- 51, 52, 52, 55, 56, 56, 58, 59, 60, 62, 63, 64, 67, 67, 39, 39, 38, 38,
- 37, 37, 38, 39, 39, 39, 40, 40, 42, 45, 45, 47, 50, 50, 52, 54, 54, 56,
- 58, 58, 60, 61, 62, 64, 65, 66, 69, 69, 39, 39, 38, 38, 37, 37, 38, 39,
- 39, 39, 40, 40, 42, 45, 45, 47, 50, 50, 52, 54, 54, 56, 58, 58, 60, 61,
- 62, 64, 65, 66, 69, 69, 42, 41, 41, 41, 40, 40, 40, 41, 41, 41, 42, 42,
- 44, 47, 47, 49, 53, 53, 55, 56, 56, 60, 61, 62, 64, 65, 66, 69, 69, 70,
- 73, 73, 44, 43, 42, 42, 41, 41, 41, 42, 42, 42, 42, 42, 44, 47, 47, 50,
- 54, 54, 56, 58, 58, 61, 63, 64, 66, 67, 68, 71, 71, 72, 75, 75, 44, 43,
- 43, 42, 41, 41, 41, 42, 42, 42, 43, 43, 45, 48, 48, 51, 54, 54, 56, 58,
- 58, 62, 64, 64, 66, 67, 68, 71, 72, 73, 76, 76, 47, 46, 45, 45, 44, 44,
- 44, 44, 45, 45, 45, 45, 47, 50, 50, 53, 56, 56, 58, 60, 60, 64, 66, 66,
- 69, 70, 71, 74, 75, 76, 79, 79, 48, 47, 46, 46, 45, 44, 45, 45, 45, 45,
- 46, 46, 47, 51, 51, 53, 57, 57, 59, 61, 61, 65, 67, 67, 70, 71, 72, 75,
- 76, 77, 80, 80, 49, 48, 47, 47, 46, 45, 45, 46, 46, 46, 46, 46, 48, 51,
- 51, 54, 57, 57, 60, 62, 62, 66, 68, 68, 71, 72, 73, 77, 77, 78, 81, 81,
- 53, 51, 51, 51, 49, 49, 49, 49, 49, 49, 49, 49, 51, 54, 54, 57, 59, 59,
- 62, 64, 64, 69, 71, 71, 74, 75, 77, 81, 81, 83, 86, 86, 54, 52, 51, 51,
- 50, 49, 49, 50, 50, 49, 49, 49, 51, 54, 54, 57, 60, 60, 63, 65, 65, 69,
- 71, 72, 75, 76, 77, 81, 82, 83, 87, 87, 55, 53, 53, 52, 51, 50, 50, 51,
- 51, 51, 50, 50, 52, 55, 55, 58, 61, 61, 64, 66, 66, 70, 72, 73, 76, 77,
- 78, 83, 83, 85, 88, 88, 59, 57, 56, 56, 54, 54, 54, 54, 54, 54, 53, 53,
- 55, 58, 58, 61, 64, 64, 67, 69, 69, 73, 75, 76, 79, 80, 81, 86, 87, 88,
- 92, 92, 59, 57, 56, 56, 54, 54, 54, 54, 54, 54, 53, 53, 55, 58, 58, 61,
- 64, 64, 67, 69, 69, 73, 75, 76, 79, 80, 81, 86, 87, 88, 92, 92,
- /* Size 4x8 */
- 32, 32, 37, 52, 32, 33, 36, 49, 32, 34, 38, 49, 34, 37, 44, 54, 35, 38,
- 49, 60, 40, 42, 55, 69, 46, 46, 59, 76, 52, 51, 64, 83,
- /* Size 8x4 */
- 32, 32, 32, 34, 35, 40, 46, 52, 32, 33, 34, 37, 38, 42, 46, 51, 37, 36,
- 38, 44, 49, 55, 59, 64, 52, 49, 49, 54, 60, 69, 76, 83,
- /* Size 8x16 */
- 32, 31, 32, 32, 36, 44, 47, 53, 31, 32, 32, 33, 35, 42, 45, 51, 31, 32,
- 32, 33, 35, 41, 44, 49, 31, 32, 33, 33, 35, 41, 44, 49, 32, 32, 34, 34,
- 36, 42, 45, 50, 32, 33, 35, 36, 38, 42, 45, 49, 32, 33, 35, 36, 40, 44,
- 47, 51, 34, 34, 36, 38, 42, 48, 50, 54, 36, 34, 37, 40, 48, 54, 56, 60,
- 38, 36, 39, 41, 49, 56, 58, 63, 39, 37, 40, 42, 50, 58, 60, 65, 44, 41,
- 42, 45, 53, 63, 66, 71, 47, 44, 45, 47, 56, 66, 69, 75, 49, 46, 47, 48,
- 57, 67, 71, 77, 53, 49, 50, 51, 60, 71, 75, 82, 58, 54, 54, 55, 63, 75,
- 79, 87,
- /* Size 16x8 */
- 32, 31, 31, 31, 32, 32, 32, 34, 36, 38, 39, 44, 47, 49, 53, 58, 31, 32,
- 32, 32, 32, 33, 33, 34, 34, 36, 37, 41, 44, 46, 49, 54, 32, 32, 32, 33,
- 34, 35, 35, 36, 37, 39, 40, 42, 45, 47, 50, 54, 32, 33, 33, 33, 34, 36,
- 36, 38, 40, 41, 42, 45, 47, 48, 51, 55, 36, 35, 35, 35, 36, 38, 40, 42,
- 48, 49, 50, 53, 56, 57, 60, 63, 44, 42, 41, 41, 42, 42, 44, 48, 54, 56,
- 58, 63, 66, 67, 71, 75, 47, 45, 44, 44, 45, 45, 47, 50, 56, 58, 60, 66,
- 69, 71, 75, 79, 53, 51, 49, 49, 50, 49, 51, 54, 60, 63, 65, 71, 75, 77,
- 82, 87,
- /* Size 16x32 */
- 32, 31, 31, 31, 32, 32, 32, 35, 36, 38, 44, 44, 47, 53, 53, 59, 31, 32,
- 32, 32, 32, 32, 33, 35, 35, 37, 43, 43, 46, 52, 52, 57, 31, 32, 32, 32,
- 32, 32, 33, 35, 35, 37, 42, 42, 45, 51, 51, 56, 31, 32, 32, 32, 32, 32,
- 33, 35, 35, 37, 42, 42, 45, 51, 51, 56, 31, 32, 32, 32, 32, 32, 33, 34,
- 35, 36, 41, 41, 44, 49, 49, 54, 31, 32, 32, 32, 32, 33, 33, 34, 34, 36,
- 41, 41, 44, 49, 49, 54, 31, 32, 32, 32, 33, 33, 33, 35, 35, 36, 41, 41,
- 44, 49, 49, 54, 32, 32, 32, 32, 33, 34, 34, 36, 36, 38, 42, 42, 45, 49,
- 49, 54, 32, 32, 32, 33, 34, 34, 34, 36, 36, 38, 42, 42, 45, 50, 50, 54,
- 32, 32, 32, 33, 34, 34, 35, 37, 37, 38, 42, 42, 45, 49, 49, 54, 32, 32,
- 33, 33, 35, 35, 36, 38, 38, 39, 42, 42, 45, 49, 49, 53, 32, 32, 33, 33,
- 35, 35, 36, 38, 38, 39, 42, 42, 45, 49, 49, 53, 32, 33, 33, 33, 35, 36,
- 36, 39, 40, 41, 44, 44, 47, 51, 51, 55, 34, 34, 34, 34, 36, 37, 38, 42,
- 42, 44, 48, 48, 50, 54, 54, 58, 34, 34, 34, 34, 36, 37, 38, 42, 42, 44,
- 48, 48, 50, 54, 54, 58, 35, 34, 34, 34, 37, 37, 39, 44, 45, 46, 50, 50,
- 53, 57, 57, 61, 36, 35, 34, 35, 37, 38, 40, 47, 48, 49, 54, 54, 56, 60,
- 60, 64, 36, 35, 34, 35, 37, 38, 40, 47, 48, 49, 54, 54, 56, 60, 60, 64,
- 38, 37, 36, 37, 39, 40, 41, 48, 49, 51, 56, 56, 58, 63, 63, 67, 39, 38,
- 37, 38, 40, 40, 42, 49, 50, 52, 58, 58, 60, 65, 65, 69, 39, 38, 37, 38,
- 40, 40, 42, 49, 50, 52, 58, 58, 60, 65, 65, 69, 42, 40, 40, 40, 42, 42,
- 44, 51, 52, 55, 61, 61, 64, 69, 69, 73, 44, 42, 41, 41, 42, 43, 45, 52,
- 53, 56, 63, 63, 66, 71, 71, 75, 44, 42, 41, 41, 43, 43, 45, 52, 54, 56,
- 63, 63, 66, 72, 72, 76, 47, 45, 44, 44, 45, 45, 47, 54, 56, 58, 66, 66,
- 69, 75, 75, 79, 48, 46, 45, 45, 46, 46, 48, 55, 56, 59, 67, 67, 70, 76,
- 76, 80, 49, 47, 46, 46, 47, 47, 48, 56, 57, 60, 67, 67, 71, 77, 77, 81,
- 53, 50, 49, 49, 49, 49, 51, 58, 59, 62, 71, 71, 74, 81, 81, 86, 53, 51,
- 49, 49, 50, 50, 51, 59, 60, 63, 71, 71, 75, 82, 82, 87, 55, 52, 51, 51,
- 51, 51, 53, 60, 61, 64, 72, 72, 76, 83, 83, 88, 58, 55, 54, 54, 54, 54,
- 55, 62, 63, 67, 75, 75, 79, 87, 87, 92, 58, 55, 54, 54, 54, 54, 55, 62,
- 63, 67, 75, 75, 79, 87, 87, 92,
- /* Size 32x16 */
- 32, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 34, 34, 35, 36, 36,
- 38, 39, 39, 42, 44, 44, 47, 48, 49, 53, 53, 55, 58, 58, 31, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 33, 34, 34, 34, 35, 35, 37, 38, 38, 40,
- 42, 42, 45, 46, 47, 50, 51, 52, 55, 55, 31, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 33, 33, 33, 34, 34, 34, 34, 34, 36, 37, 37, 40, 41, 41, 44, 45,
- 46, 49, 49, 51, 54, 54, 31, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33,
- 33, 34, 34, 34, 35, 35, 37, 38, 38, 40, 41, 41, 44, 45, 46, 49, 49, 51,
- 54, 54, 32, 32, 32, 32, 32, 32, 33, 33, 34, 34, 35, 35, 35, 36, 36, 37,
- 37, 37, 39, 40, 40, 42, 42, 43, 45, 46, 47, 49, 50, 51, 54, 54, 32, 32,
- 32, 32, 32, 33, 33, 34, 34, 34, 35, 35, 36, 37, 37, 37, 38, 38, 40, 40,
- 40, 42, 43, 43, 45, 46, 47, 49, 50, 51, 54, 54, 32, 33, 33, 33, 33, 33,
- 33, 34, 34, 35, 36, 36, 36, 38, 38, 39, 40, 40, 41, 42, 42, 44, 45, 45,
- 47, 48, 48, 51, 51, 53, 55, 55, 35, 35, 35, 35, 34, 34, 35, 36, 36, 37,
- 38, 38, 39, 42, 42, 44, 47, 47, 48, 49, 49, 51, 52, 52, 54, 55, 56, 58,
- 59, 60, 62, 62, 36, 35, 35, 35, 35, 34, 35, 36, 36, 37, 38, 38, 40, 42,
- 42, 45, 48, 48, 49, 50, 50, 52, 53, 54, 56, 56, 57, 59, 60, 61, 63, 63,
- 38, 37, 37, 37, 36, 36, 36, 38, 38, 38, 39, 39, 41, 44, 44, 46, 49, 49,
- 51, 52, 52, 55, 56, 56, 58, 59, 60, 62, 63, 64, 67, 67, 44, 43, 42, 42,
- 41, 41, 41, 42, 42, 42, 42, 42, 44, 48, 48, 50, 54, 54, 56, 58, 58, 61,
- 63, 63, 66, 67, 67, 71, 71, 72, 75, 75, 44, 43, 42, 42, 41, 41, 41, 42,
- 42, 42, 42, 42, 44, 48, 48, 50, 54, 54, 56, 58, 58, 61, 63, 63, 66, 67,
- 67, 71, 71, 72, 75, 75, 47, 46, 45, 45, 44, 44, 44, 45, 45, 45, 45, 45,
- 47, 50, 50, 53, 56, 56, 58, 60, 60, 64, 66, 66, 69, 70, 71, 74, 75, 76,
- 79, 79, 53, 52, 51, 51, 49, 49, 49, 49, 50, 49, 49, 49, 51, 54, 54, 57,
- 60, 60, 63, 65, 65, 69, 71, 72, 75, 76, 77, 81, 82, 83, 87, 87, 53, 52,
- 51, 51, 49, 49, 49, 49, 50, 49, 49, 49, 51, 54, 54, 57, 60, 60, 63, 65,
- 65, 69, 71, 72, 75, 76, 77, 81, 82, 83, 87, 87, 59, 57, 56, 56, 54, 54,
- 54, 54, 54, 54, 53, 53, 55, 58, 58, 61, 64, 64, 67, 69, 69, 73, 75, 76,
- 79, 80, 81, 86, 87, 88, 92, 92,
- /* Size 4x16 */
- 31, 32, 38, 53, 32, 32, 37, 51, 32, 32, 36, 49, 32, 33, 36, 49, 32, 34,
- 38, 50, 32, 35, 39, 49, 33, 36, 41, 51, 34, 37, 44, 54, 35, 38, 49, 60,
- 37, 40, 51, 63, 38, 40, 52, 65, 42, 43, 56, 71, 45, 45, 58, 75, 47, 47,
- 60, 77, 51, 50, 63, 82, 55, 54, 67, 87,
- /* Size 16x4 */
- 31, 32, 32, 32, 32, 32, 33, 34, 35, 37, 38, 42, 45, 47, 51, 55, 32, 32,
- 32, 33, 34, 35, 36, 37, 38, 40, 40, 43, 45, 47, 50, 54, 38, 37, 36, 36,
- 38, 39, 41, 44, 49, 51, 52, 56, 58, 60, 63, 67, 53, 51, 49, 49, 50, 49,
- 51, 54, 60, 63, 65, 71, 75, 77, 82, 87,
- /* Size 8x32 */
- 32, 31, 32, 32, 36, 44, 47, 53, 31, 32, 32, 33, 35, 43, 46, 52, 31, 32,
- 32, 33, 35, 42, 45, 51, 31, 32, 32, 33, 35, 42, 45, 51, 31, 32, 32, 33,
- 35, 41, 44, 49, 31, 32, 32, 33, 34, 41, 44, 49, 31, 32, 33, 33, 35, 41,
- 44, 49, 32, 32, 33, 34, 36, 42, 45, 49, 32, 32, 34, 34, 36, 42, 45, 50,
- 32, 32, 34, 35, 37, 42, 45, 49, 32, 33, 35, 36, 38, 42, 45, 49, 32, 33,
- 35, 36, 38, 42, 45, 49, 32, 33, 35, 36, 40, 44, 47, 51, 34, 34, 36, 38,
- 42, 48, 50, 54, 34, 34, 36, 38, 42, 48, 50, 54, 35, 34, 37, 39, 45, 50,
- 53, 57, 36, 34, 37, 40, 48, 54, 56, 60, 36, 34, 37, 40, 48, 54, 56, 60,
- 38, 36, 39, 41, 49, 56, 58, 63, 39, 37, 40, 42, 50, 58, 60, 65, 39, 37,
- 40, 42, 50, 58, 60, 65, 42, 40, 42, 44, 52, 61, 64, 69, 44, 41, 42, 45,
- 53, 63, 66, 71, 44, 41, 43, 45, 54, 63, 66, 72, 47, 44, 45, 47, 56, 66,
- 69, 75, 48, 45, 46, 48, 56, 67, 70, 76, 49, 46, 47, 48, 57, 67, 71, 77,
- 53, 49, 49, 51, 59, 71, 74, 81, 53, 49, 50, 51, 60, 71, 75, 82, 55, 51,
- 51, 53, 61, 72, 76, 83, 58, 54, 54, 55, 63, 75, 79, 87, 58, 54, 54, 55,
- 63, 75, 79, 87,
- /* Size 32x8 */
- 32, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 34, 34, 35, 36, 36,
- 38, 39, 39, 42, 44, 44, 47, 48, 49, 53, 53, 55, 58, 58, 31, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34, 34, 36, 37, 37, 40,
- 41, 41, 44, 45, 46, 49, 49, 51, 54, 54, 32, 32, 32, 32, 32, 32, 33, 33,
- 34, 34, 35, 35, 35, 36, 36, 37, 37, 37, 39, 40, 40, 42, 42, 43, 45, 46,
- 47, 49, 50, 51, 54, 54, 32, 33, 33, 33, 33, 33, 33, 34, 34, 35, 36, 36,
- 36, 38, 38, 39, 40, 40, 41, 42, 42, 44, 45, 45, 47, 48, 48, 51, 51, 53,
- 55, 55, 36, 35, 35, 35, 35, 34, 35, 36, 36, 37, 38, 38, 40, 42, 42, 45,
- 48, 48, 49, 50, 50, 52, 53, 54, 56, 56, 57, 59, 60, 61, 63, 63, 44, 43,
- 42, 42, 41, 41, 41, 42, 42, 42, 42, 42, 44, 48, 48, 50, 54, 54, 56, 58,
- 58, 61, 63, 63, 66, 67, 67, 71, 71, 72, 75, 75, 47, 46, 45, 45, 44, 44,
- 44, 45, 45, 45, 45, 45, 47, 50, 50, 53, 56, 56, 58, 60, 60, 64, 66, 66,
- 69, 70, 71, 74, 75, 76, 79, 79, 53, 52, 51, 51, 49, 49, 49, 49, 50, 49,
- 49, 49, 51, 54, 54, 57, 60, 60, 63, 65, 65, 69, 71, 72, 75, 76, 77, 81,
- 82, 83, 87, 87 },
- { /* Chroma */
- /* Size 4x4 */
- 31, 38, 47, 49, 38, 47, 46, 46, 47, 46, 54, 57, 49, 46, 57, 66,
- /* Size 8x8 */
- 31, 31, 35, 42, 48, 47, 49, 51, 31, 32, 36, 42, 46, 45, 46, 48, 35, 36,
- 41, 45, 47, 45, 46, 48, 42, 42, 45, 48, 50, 49, 50, 51, 48, 46, 47, 50,
- 53, 53, 54, 54, 47, 45, 45, 49, 53, 57, 59, 60, 49, 46, 46, 50, 54, 59,
- 61, 64, 51, 48, 48, 51, 54, 60, 64, 68,
- /* Size 16x16 */
- 32, 31, 30, 31, 33, 36, 38, 41, 49, 49, 48, 49, 50, 51, 52, 54, 31, 31,
- 31, 32, 34, 38, 40, 42, 47, 47, 47, 47, 48, 48, 50, 52, 30, 31, 31, 32,
- 35, 39, 41, 42, 46, 46, 46, 45, 46, 47, 48, 50, 31, 32, 32, 33, 36, 40,
- 41, 43, 46, 46, 45, 45, 46, 46, 47, 49, 33, 34, 35, 36, 39, 43, 44, 45,
- 47, 46, 46, 45, 46, 47, 47, 49, 36, 38, 39, 40, 43, 47, 47, 47, 48, 47,
- 46, 45, 46, 46, 47, 48, 38, 40, 41, 41, 44, 47, 47, 48, 49, 48, 48, 47,
- 47, 47, 48, 49, 41, 42, 42, 43, 45, 47, 48, 48, 50, 50, 49, 49, 50, 50,
- 50, 52, 49, 47, 46, 46, 47, 48, 49, 50, 53, 53, 53, 53, 54, 54, 54, 55,
- 49, 47, 46, 46, 46, 47, 48, 50, 53, 53, 54, 55, 55, 55, 56, 57, 48, 47,
- 46, 45, 46, 46, 48, 49, 53, 54, 54, 55, 56, 56, 57, 58, 49, 47, 45, 45,
- 45, 45, 47, 49, 53, 55, 55, 58, 59, 60, 61, 62, 50, 48, 46, 46, 46, 46,
- 47, 50, 54, 55, 56, 59, 61, 61, 63, 64, 51, 48, 47, 46, 47, 46, 47, 50,
- 54, 55, 56, 60, 61, 62, 64, 66, 52, 50, 48, 47, 47, 47, 48, 50, 54, 56,
- 57, 61, 63, 64, 66, 68, 54, 52, 50, 49, 49, 48, 49, 52, 55, 57, 58, 62,
- 64, 66, 68, 71,
- /* Size 32x32 */
- 32, 31, 31, 31, 30, 30, 31, 33, 33, 34, 36, 36, 38, 41, 41, 45, 49, 49,
- 49, 48, 48, 49, 49, 49, 50, 50, 51, 52, 52, 53, 54, 54, 31, 31, 31, 31,
- 31, 31, 31, 34, 34, 35, 38, 38, 39, 42, 42, 45, 48, 48, 47, 47, 47, 47,
- 47, 47, 49, 49, 49, 50, 50, 51, 53, 53, 31, 31, 31, 31, 31, 31, 32, 34,
- 34, 35, 38, 38, 40, 42, 42, 45, 47, 47, 47, 47, 47, 47, 47, 47, 48, 48,
- 48, 49, 50, 50, 52, 52, 31, 31, 31, 31, 31, 31, 32, 34, 34, 36, 38, 38,
- 40, 42, 42, 45, 47, 47, 47, 47, 47, 47, 46, 47, 48, 48, 48, 49, 49, 50,
- 52, 52, 30, 31, 31, 31, 31, 31, 32, 35, 35, 36, 39, 39, 41, 42, 42, 44,
- 46, 46, 46, 46, 46, 45, 45, 45, 46, 47, 47, 48, 48, 48, 50, 50, 30, 31,
- 31, 31, 31, 32, 32, 35, 35, 36, 40, 40, 41, 42, 42, 44, 46, 46, 46, 45,
- 45, 45, 45, 45, 46, 46, 46, 47, 47, 48, 49, 49, 31, 31, 32, 32, 32, 32,
- 33, 35, 36, 37, 40, 40, 41, 43, 43, 44, 46, 46, 46, 45, 45, 45, 45, 45,
- 46, 46, 46, 47, 47, 48, 49, 49, 33, 34, 34, 34, 35, 35, 35, 38, 38, 40,
- 43, 43, 43, 44, 44, 46, 47, 47, 46, 46, 46, 45, 45, 45, 46, 46, 47, 47,
- 47, 48, 49, 49, 33, 34, 34, 34, 35, 35, 36, 38, 39, 40, 43, 43, 44, 45,
- 45, 46, 47, 47, 46, 46, 46, 45, 45, 45, 46, 46, 47, 47, 47, 48, 49, 49,
- 34, 35, 35, 36, 36, 36, 37, 40, 40, 41, 44, 44, 45, 45, 45, 46, 47, 47,
- 47, 46, 46, 45, 45, 45, 46, 46, 46, 47, 47, 48, 49, 49, 36, 38, 38, 38,
- 39, 40, 40, 43, 43, 44, 47, 47, 47, 47, 47, 47, 48, 48, 47, 46, 46, 45,
- 45, 45, 46, 46, 46, 46, 47, 47, 48, 48, 36, 38, 38, 38, 39, 40, 40, 43,
- 43, 44, 47, 47, 47, 47, 47, 47, 48, 48, 47, 46, 46, 45, 45, 45, 46, 46,
- 46, 46, 47, 47, 48, 48, 38, 39, 40, 40, 41, 41, 41, 43, 44, 45, 47, 47,
- 47, 48, 48, 48, 49, 49, 48, 48, 48, 47, 47, 47, 47, 47, 47, 48, 48, 48,
- 49, 49, 41, 42, 42, 42, 42, 42, 43, 44, 45, 45, 47, 47, 48, 48, 48, 49,
- 50, 50, 50, 49, 49, 49, 49, 49, 50, 50, 50, 50, 50, 51, 52, 52, 41, 42,
- 42, 42, 42, 42, 43, 44, 45, 45, 47, 47, 48, 48, 48, 49, 50, 50, 50, 49,
- 49, 49, 49, 49, 50, 50, 50, 50, 50, 51, 52, 52, 45, 45, 45, 45, 44, 44,
- 44, 46, 46, 46, 47, 47, 48, 49, 49, 50, 51, 51, 51, 51, 51, 51, 51, 51,
- 52, 52, 52, 52, 52, 52, 53, 53, 49, 48, 47, 47, 46, 46, 46, 47, 47, 47,
- 48, 48, 49, 50, 50, 51, 53, 53, 53, 53, 53, 53, 53, 53, 54, 54, 54, 54,
- 54, 54, 55, 55, 49, 48, 47, 47, 46, 46, 46, 47, 47, 47, 48, 48, 49, 50,
- 50, 51, 53, 53, 53, 53, 53, 53, 53, 53, 54, 54, 54, 54, 54, 54, 55, 55,
- 49, 47, 47, 47, 46, 46, 46, 46, 46, 47, 47, 47, 48, 50, 50, 51, 53, 53,
- 53, 54, 54, 54, 55, 55, 55, 55, 55, 56, 56, 56, 57, 57, 48, 47, 47, 47,
- 46, 45, 45, 46, 46, 46, 46, 46, 48, 49, 49, 51, 53, 53, 54, 54, 54, 55,
- 55, 56, 56, 56, 56, 57, 57, 58, 58, 58, 48, 47, 47, 47, 46, 45, 45, 46,
- 46, 46, 46, 46, 48, 49, 49, 51, 53, 53, 54, 54, 54, 55, 55, 56, 56, 56,
- 56, 57, 57, 58, 58, 58, 49, 47, 47, 47, 45, 45, 45, 45, 45, 45, 45, 45,
- 47, 49, 49, 51, 53, 53, 54, 55, 55, 57, 57, 58, 58, 59, 59, 60, 60, 60,
- 61, 61, 49, 47, 47, 46, 45, 45, 45, 45, 45, 45, 45, 45, 47, 49, 49, 51,
- 53, 53, 55, 55, 55, 57, 58, 58, 59, 60, 60, 61, 61, 61, 62, 62, 49, 47,
- 47, 47, 45, 45, 45, 45, 45, 45, 45, 45, 47, 49, 49, 51, 53, 53, 55, 56,
- 56, 58, 58, 59, 59, 60, 60, 61, 61, 62, 63, 63, 50, 49, 48, 48, 46, 46,
- 46, 46, 46, 46, 46, 46, 47, 50, 50, 52, 54, 54, 55, 56, 56, 58, 59, 59,
- 61, 61, 61, 63, 63, 63, 64, 64, 50, 49, 48, 48, 47, 46, 46, 46, 46, 46,
- 46, 46, 47, 50, 50, 52, 54, 54, 55, 56, 56, 59, 60, 60, 61, 61, 62, 63,
- 63, 64, 65, 65, 51, 49, 48, 48, 47, 46, 46, 47, 47, 46, 46, 46, 47, 50,
- 50, 52, 54, 54, 55, 56, 56, 59, 60, 60, 61, 62, 62, 64, 64, 64, 66, 66,
- 52, 50, 49, 49, 48, 47, 47, 47, 47, 47, 46, 46, 48, 50, 50, 52, 54, 54,
- 56, 57, 57, 60, 61, 61, 63, 63, 64, 66, 66, 67, 68, 68, 52, 50, 50, 49,
- 48, 47, 47, 47, 47, 47, 47, 47, 48, 50, 50, 52, 54, 54, 56, 57, 57, 60,
- 61, 61, 63, 63, 64, 66, 66, 67, 68, 68, 53, 51, 50, 50, 48, 48, 48, 48,
- 48, 48, 47, 47, 48, 51, 51, 52, 54, 54, 56, 58, 58, 60, 61, 62, 63, 64,
- 64, 67, 67, 68, 69, 69, 54, 53, 52, 52, 50, 49, 49, 49, 49, 49, 48, 48,
- 49, 52, 52, 53, 55, 55, 57, 58, 58, 61, 62, 63, 64, 65, 66, 68, 68, 69,
- 71, 71, 54, 53, 52, 52, 50, 49, 49, 49, 49, 49, 48, 48, 49, 52, 52, 53,
- 55, 55, 57, 58, 58, 61, 62, 63, 64, 65, 66, 68, 68, 69, 71, 71,
- /* Size 4x8 */
- 31, 38, 47, 50, 31, 40, 46, 48, 36, 44, 47, 47, 42, 47, 50, 50, 47, 48,
- 53, 54, 46, 46, 54, 60, 48, 46, 55, 64, 50, 48, 56, 67,
- /* Size 8x4 */
- 31, 31, 36, 42, 47, 46, 48, 50, 38, 40, 44, 47, 48, 46, 46, 48, 47, 46,
- 47, 50, 53, 54, 55, 56, 50, 48, 47, 50, 54, 60, 64, 67,
- /* Size 8x16 */
- 32, 31, 35, 38, 48, 49, 50, 52, 31, 31, 37, 40, 47, 47, 48, 50, 30, 32,
- 38, 40, 46, 45, 46, 48, 31, 33, 38, 41, 46, 45, 46, 48, 33, 36, 41, 44,
- 47, 46, 46, 47, 37, 40, 45, 47, 47, 45, 46, 47, 39, 41, 46, 47, 48, 47,
- 47, 48, 42, 43, 46, 48, 50, 49, 50, 50, 49, 46, 48, 49, 53, 53, 54, 54,
- 48, 46, 47, 48, 53, 55, 55, 56, 48, 46, 46, 48, 53, 56, 56, 57, 49, 45,
- 45, 47, 53, 58, 59, 61, 50, 46, 46, 48, 54, 59, 61, 63, 51, 47, 47, 48,
- 54, 60, 61, 64, 52, 48, 47, 48, 54, 61, 63, 66, 54, 50, 49, 50, 55, 62,
- 65, 68,
- /* Size 16x8 */
- 32, 31, 30, 31, 33, 37, 39, 42, 49, 48, 48, 49, 50, 51, 52, 54, 31, 31,
- 32, 33, 36, 40, 41, 43, 46, 46, 46, 45, 46, 47, 48, 50, 35, 37, 38, 38,
- 41, 45, 46, 46, 48, 47, 46, 45, 46, 47, 47, 49, 38, 40, 40, 41, 44, 47,
- 47, 48, 49, 48, 48, 47, 48, 48, 48, 50, 48, 47, 46, 46, 47, 47, 48, 50,
- 53, 53, 53, 53, 54, 54, 54, 55, 49, 47, 45, 45, 46, 45, 47, 49, 53, 55,
- 56, 58, 59, 60, 61, 62, 50, 48, 46, 46, 46, 46, 47, 50, 54, 55, 56, 59,
- 61, 61, 63, 65, 52, 50, 48, 48, 47, 47, 48, 50, 54, 56, 57, 61, 63, 64,
- 66, 68,
- /* Size 16x32 */
- 32, 31, 31, 31, 35, 37, 38, 47, 48, 48, 49, 49, 50, 52, 52, 54, 31, 31,
- 31, 32, 36, 38, 39, 46, 47, 47, 48, 48, 49, 50, 50, 53, 31, 31, 31, 32,
- 37, 38, 40, 46, 47, 47, 47, 47, 48, 50, 50, 52, 31, 31, 31, 32, 37, 38,
- 40, 46, 47, 47, 47, 47, 48, 50, 50, 52, 30, 31, 32, 32, 38, 39, 40, 45,
- 46, 46, 45, 45, 46, 48, 48, 50, 30, 31, 32, 33, 38, 40, 41, 45, 46, 46,
- 45, 45, 46, 48, 48, 50, 31, 32, 33, 33, 38, 40, 41, 45, 46, 46, 45, 45,
- 46, 48, 48, 50, 33, 35, 35, 36, 41, 43, 43, 46, 47, 46, 45, 45, 46, 47,
- 47, 49, 33, 35, 36, 36, 41, 43, 44, 46, 47, 46, 46, 46, 46, 47, 47, 49,
- 34, 36, 37, 37, 42, 44, 45, 47, 47, 47, 45, 45, 46, 47, 47, 49, 37, 39,
- 40, 41, 45, 47, 47, 47, 47, 47, 45, 45, 46, 47, 47, 48, 37, 39, 40, 41,
- 45, 47, 47, 47, 47, 47, 45, 45, 46, 47, 47, 48, 39, 40, 41, 42, 46, 47,
- 47, 48, 48, 48, 47, 47, 47, 48, 48, 50, 42, 42, 43, 43, 46, 47, 48, 50,
- 50, 50, 49, 49, 50, 50, 50, 52, 42, 42, 43, 43, 46, 47, 48, 50, 50, 50,
- 49, 49, 50, 50, 50, 52, 45, 45, 44, 45, 47, 47, 48, 51, 51, 51, 51, 51,
- 52, 52, 52, 54, 49, 47, 46, 47, 48, 48, 49, 52, 53, 53, 53, 53, 54, 54,
- 54, 55, 49, 47, 46, 47, 48, 48, 49, 52, 53, 53, 53, 53, 54, 54, 54, 55,
- 48, 47, 46, 46, 47, 47, 48, 52, 53, 53, 55, 55, 55, 56, 56, 57, 48, 46,
- 46, 46, 46, 47, 48, 52, 53, 54, 56, 56, 56, 57, 57, 59, 48, 46, 46, 46,
- 46, 47, 48, 52, 53, 54, 56, 56, 56, 57, 57, 59, 49, 46, 45, 45, 46, 46,
- 47, 52, 53, 54, 57, 57, 58, 60, 60, 61, 49, 46, 45, 45, 45, 46, 47, 52,
- 53, 55, 58, 58, 59, 61, 61, 62, 49, 46, 45, 45, 46, 46, 47, 52, 53, 55,
- 58, 58, 60, 61, 61, 63, 50, 47, 46, 46, 46, 46, 48, 53, 54, 55, 59, 59,
- 61, 63, 63, 65, 50, 48, 46, 46, 46, 46, 48, 53, 54, 55, 59, 59, 61, 64,
- 64, 65, 51, 48, 47, 47, 47, 47, 48, 53, 54, 55, 60, 60, 61, 64, 64, 66,
- 52, 49, 48, 48, 47, 47, 48, 53, 54, 56, 61, 61, 63, 66, 66, 68, 52, 49,
- 48, 48, 47, 47, 48, 53, 54, 56, 61, 61, 63, 66, 66, 68, 53, 50, 48, 48,
- 48, 48, 49, 54, 54, 56, 61, 61, 63, 67, 67, 69, 54, 51, 50, 50, 49, 49,
- 50, 55, 55, 57, 62, 62, 65, 68, 68, 71, 54, 51, 50, 50, 49, 49, 50, 55,
- 55, 57, 62, 62, 65, 68, 68, 71,
- /* Size 32x16 */
- 32, 31, 31, 31, 30, 30, 31, 33, 33, 34, 37, 37, 39, 42, 42, 45, 49, 49,
- 48, 48, 48, 49, 49, 49, 50, 50, 51, 52, 52, 53, 54, 54, 31, 31, 31, 31,
- 31, 31, 32, 35, 35, 36, 39, 39, 40, 42, 42, 45, 47, 47, 47, 46, 46, 46,
- 46, 46, 47, 48, 48, 49, 49, 50, 51, 51, 31, 31, 31, 31, 32, 32, 33, 35,
- 36, 37, 40, 40, 41, 43, 43, 44, 46, 46, 46, 46, 46, 45, 45, 45, 46, 46,
- 47, 48, 48, 48, 50, 50, 31, 32, 32, 32, 32, 33, 33, 36, 36, 37, 41, 41,
- 42, 43, 43, 45, 47, 47, 46, 46, 46, 45, 45, 45, 46, 46, 47, 48, 48, 48,
- 50, 50, 35, 36, 37, 37, 38, 38, 38, 41, 41, 42, 45, 45, 46, 46, 46, 47,
- 48, 48, 47, 46, 46, 46, 45, 46, 46, 46, 47, 47, 47, 48, 49, 49, 37, 38,
- 38, 38, 39, 40, 40, 43, 43, 44, 47, 47, 47, 47, 47, 47, 48, 48, 47, 47,
- 47, 46, 46, 46, 46, 46, 47, 47, 47, 48, 49, 49, 38, 39, 40, 40, 40, 41,
- 41, 43, 44, 45, 47, 47, 47, 48, 48, 48, 49, 49, 48, 48, 48, 47, 47, 47,
- 48, 48, 48, 48, 48, 49, 50, 50, 47, 46, 46, 46, 45, 45, 45, 46, 46, 47,
- 47, 47, 48, 50, 50, 51, 52, 52, 52, 52, 52, 52, 52, 52, 53, 53, 53, 53,
- 53, 54, 55, 55, 48, 47, 47, 47, 46, 46, 46, 47, 47, 47, 47, 47, 48, 50,
- 50, 51, 53, 53, 53, 53, 53, 53, 53, 53, 54, 54, 54, 54, 54, 54, 55, 55,
- 48, 47, 47, 47, 46, 46, 46, 46, 46, 47, 47, 47, 48, 50, 50, 51, 53, 53,
- 53, 54, 54, 54, 55, 55, 55, 55, 55, 56, 56, 56, 57, 57, 49, 48, 47, 47,
- 45, 45, 45, 45, 46, 45, 45, 45, 47, 49, 49, 51, 53, 53, 55, 56, 56, 57,
- 58, 58, 59, 59, 60, 61, 61, 61, 62, 62, 49, 48, 47, 47, 45, 45, 45, 45,
- 46, 45, 45, 45, 47, 49, 49, 51, 53, 53, 55, 56, 56, 57, 58, 58, 59, 59,
- 60, 61, 61, 61, 62, 62, 50, 49, 48, 48, 46, 46, 46, 46, 46, 46, 46, 46,
- 47, 50, 50, 52, 54, 54, 55, 56, 56, 58, 59, 60, 61, 61, 61, 63, 63, 63,
- 65, 65, 52, 50, 50, 50, 48, 48, 48, 47, 47, 47, 47, 47, 48, 50, 50, 52,
- 54, 54, 56, 57, 57, 60, 61, 61, 63, 64, 64, 66, 66, 67, 68, 68, 52, 50,
- 50, 50, 48, 48, 48, 47, 47, 47, 47, 47, 48, 50, 50, 52, 54, 54, 56, 57,
- 57, 60, 61, 61, 63, 64, 64, 66, 66, 67, 68, 68, 54, 53, 52, 52, 50, 50,
- 50, 49, 49, 49, 48, 48, 50, 52, 52, 54, 55, 55, 57, 59, 59, 61, 62, 63,
- 65, 65, 66, 68, 68, 69, 71, 71,
- /* Size 4x16 */
- 31, 37, 48, 52, 31, 38, 47, 50, 31, 39, 46, 48, 32, 40, 46, 48, 35, 43,
- 46, 47, 39, 47, 47, 47, 40, 47, 48, 48, 42, 47, 50, 50, 47, 48, 53, 54,
- 47, 47, 53, 56, 46, 47, 54, 57, 46, 46, 55, 61, 47, 46, 55, 63, 48, 47,
- 55, 64, 49, 47, 56, 66, 51, 49, 57, 68,
- /* Size 16x4 */
- 31, 31, 31, 32, 35, 39, 40, 42, 47, 47, 46, 46, 47, 48, 49, 51, 37, 38,
- 39, 40, 43, 47, 47, 47, 48, 47, 47, 46, 46, 47, 47, 49, 48, 47, 46, 46,
- 46, 47, 48, 50, 53, 53, 54, 55, 55, 55, 56, 57, 52, 50, 48, 48, 47, 47,
- 48, 50, 54, 56, 57, 61, 63, 64, 66, 68,
- /* Size 8x32 */
- 32, 31, 35, 38, 48, 49, 50, 52, 31, 31, 36, 39, 47, 48, 49, 50, 31, 31,
- 37, 40, 47, 47, 48, 50, 31, 31, 37, 40, 47, 47, 48, 50, 30, 32, 38, 40,
- 46, 45, 46, 48, 30, 32, 38, 41, 46, 45, 46, 48, 31, 33, 38, 41, 46, 45,
- 46, 48, 33, 35, 41, 43, 47, 45, 46, 47, 33, 36, 41, 44, 47, 46, 46, 47,
- 34, 37, 42, 45, 47, 45, 46, 47, 37, 40, 45, 47, 47, 45, 46, 47, 37, 40,
- 45, 47, 47, 45, 46, 47, 39, 41, 46, 47, 48, 47, 47, 48, 42, 43, 46, 48,
- 50, 49, 50, 50, 42, 43, 46, 48, 50, 49, 50, 50, 45, 44, 47, 48, 51, 51,
- 52, 52, 49, 46, 48, 49, 53, 53, 54, 54, 49, 46, 48, 49, 53, 53, 54, 54,
- 48, 46, 47, 48, 53, 55, 55, 56, 48, 46, 46, 48, 53, 56, 56, 57, 48, 46,
- 46, 48, 53, 56, 56, 57, 49, 45, 46, 47, 53, 57, 58, 60, 49, 45, 45, 47,
- 53, 58, 59, 61, 49, 45, 46, 47, 53, 58, 60, 61, 50, 46, 46, 48, 54, 59,
- 61, 63, 50, 46, 46, 48, 54, 59, 61, 64, 51, 47, 47, 48, 54, 60, 61, 64,
- 52, 48, 47, 48, 54, 61, 63, 66, 52, 48, 47, 48, 54, 61, 63, 66, 53, 48,
- 48, 49, 54, 61, 63, 67, 54, 50, 49, 50, 55, 62, 65, 68, 54, 50, 49, 50,
- 55, 62, 65, 68,
- /* Size 32x8 */
- 32, 31, 31, 31, 30, 30, 31, 33, 33, 34, 37, 37, 39, 42, 42, 45, 49, 49,
- 48, 48, 48, 49, 49, 49, 50, 50, 51, 52, 52, 53, 54, 54, 31, 31, 31, 31,
- 32, 32, 33, 35, 36, 37, 40, 40, 41, 43, 43, 44, 46, 46, 46, 46, 46, 45,
- 45, 45, 46, 46, 47, 48, 48, 48, 50, 50, 35, 36, 37, 37, 38, 38, 38, 41,
- 41, 42, 45, 45, 46, 46, 46, 47, 48, 48, 47, 46, 46, 46, 45, 46, 46, 46,
- 47, 47, 47, 48, 49, 49, 38, 39, 40, 40, 40, 41, 41, 43, 44, 45, 47, 47,
- 47, 48, 48, 48, 49, 49, 48, 48, 48, 47, 47, 47, 48, 48, 48, 48, 48, 49,
- 50, 50, 48, 47, 47, 47, 46, 46, 46, 47, 47, 47, 47, 47, 48, 50, 50, 51,
- 53, 53, 53, 53, 53, 53, 53, 53, 54, 54, 54, 54, 54, 54, 55, 55, 49, 48,
- 47, 47, 45, 45, 45, 45, 46, 45, 45, 45, 47, 49, 49, 51, 53, 53, 55, 56,
- 56, 57, 58, 58, 59, 59, 60, 61, 61, 61, 62, 62, 50, 49, 48, 48, 46, 46,
- 46, 46, 46, 46, 46, 46, 47, 50, 50, 52, 54, 54, 55, 56, 56, 58, 59, 60,
- 61, 61, 61, 63, 63, 63, 65, 65, 52, 50, 50, 50, 48, 48, 48, 47, 47, 47,
- 47, 47, 48, 50, 50, 52, 54, 54, 56, 57, 57, 60, 61, 61, 63, 64, 64, 66,
- 66, 67, 68, 68 },
- },
- {
- { /* Luma */
- /* Size 4x4 */
- 32, 32, 35, 43, 32, 34, 37, 43, 35, 37, 48, 54, 43, 43, 54, 65,
- /* Size 8x8 */
- 31, 31, 32, 32, 34, 37, 43, 47, 31, 32, 32, 32, 34, 36, 41, 44, 32, 32,
- 33, 34, 35, 38, 42, 45, 32, 32, 34, 35, 37, 39, 42, 46, 34, 34, 35, 37,
- 41, 45, 49, 52, 37, 36, 38, 39, 45, 51, 56, 59, 43, 41, 42, 42, 49, 56,
- 63, 67, 47, 44, 45, 46, 52, 59, 67, 71,
- /* Size 16x16 */
- 32, 31, 31, 31, 31, 31, 32, 32, 34, 35, 36, 39, 41, 44, 47, 48, 31, 32,
- 32, 32, 32, 32, 32, 33, 34, 35, 35, 38, 40, 42, 45, 46, 31, 32, 32, 32,
- 32, 32, 32, 33, 34, 34, 35, 38, 39, 42, 45, 45, 31, 32, 32, 32, 32, 32,
- 32, 33, 33, 34, 34, 37, 38, 41, 44, 44, 31, 32, 32, 32, 33, 33, 33, 34,
- 35, 36, 36, 39, 40, 42, 44, 45, 31, 32, 32, 32, 33, 33, 34, 34, 35, 36,
- 36, 39, 40, 42, 45, 45, 32, 32, 32, 32, 33, 34, 35, 36, 37, 38, 38, 40,
- 41, 42, 45, 46, 32, 33, 33, 33, 34, 34, 36, 36, 38, 39, 40, 42, 43, 44,
- 47, 47, 34, 34, 34, 33, 35, 35, 37, 38, 39, 42, 42, 45, 46, 47, 50, 51,
- 35, 35, 34, 34, 36, 36, 38, 39, 42, 46, 47, 49, 50, 52, 55, 55, 36, 35,
- 35, 34, 36, 36, 38, 40, 42, 47, 48, 50, 52, 54, 56, 57, 39, 38, 38, 37,
- 39, 39, 40, 42, 45, 49, 50, 54, 55, 58, 60, 61, 41, 40, 39, 38, 40, 40,
- 41, 43, 46, 50, 52, 55, 57, 60, 62, 63, 44, 42, 42, 41, 42, 42, 42, 44,
- 47, 52, 54, 58, 60, 63, 66, 67, 47, 45, 45, 44, 44, 45, 45, 47, 50, 55,
- 56, 60, 62, 66, 69, 70, 48, 46, 45, 44, 45, 45, 46, 47, 51, 55, 57, 61,
- 63, 67, 70, 71,
- /* Size 32x32 */
- 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 34, 34, 34,
- 35, 36, 36, 38, 39, 39, 41, 44, 44, 45, 47, 48, 48, 51, 31, 31, 31, 31,
- 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 33, 34, 34, 34, 35, 35, 35, 37,
- 39, 39, 40, 43, 43, 44, 46, 47, 47, 50, 31, 31, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 33, 34, 34, 34, 35, 35, 35, 37, 38, 38, 40, 42,
- 42, 43, 45, 46, 46, 49, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 33, 34, 34, 34, 35, 35, 35, 37, 38, 38, 40, 42, 42, 43, 45, 46,
- 46, 49, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 34,
- 34, 34, 34, 35, 35, 36, 38, 38, 39, 42, 42, 42, 45, 45, 45, 48, 31, 31,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 34, 34, 34,
- 34, 36, 37, 37, 38, 41, 41, 41, 44, 44, 44, 47, 31, 31, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34, 36, 37, 37,
- 38, 41, 41, 41, 44, 44, 44, 47, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 36, 38, 38, 39, 41, 41, 42,
- 44, 45, 45, 47, 31, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33,
- 34, 35, 35, 35, 36, 36, 36, 37, 39, 39, 40, 42, 42, 42, 44, 45, 45, 48,
- 31, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 35, 35, 35,
- 36, 36, 36, 38, 39, 39, 40, 42, 42, 42, 45, 45, 45, 48, 31, 32, 32, 32,
- 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 35, 35, 35, 36, 36, 36, 38,
- 39, 39, 40, 42, 42, 42, 45, 45, 45, 48, 32, 32, 32, 32, 32, 32, 32, 33,
- 33, 33, 33, 34, 35, 35, 35, 36, 36, 36, 37, 37, 37, 39, 40, 40, 41, 42,
- 42, 43, 45, 45, 45, 48, 32, 32, 32, 32, 32, 32, 32, 33, 33, 34, 34, 35,
- 35, 35, 36, 37, 37, 37, 38, 38, 38, 39, 40, 40, 41, 42, 42, 43, 45, 46,
- 46, 48, 32, 32, 32, 32, 32, 32, 32, 33, 33, 34, 34, 35, 35, 35, 36, 37,
- 37, 37, 38, 38, 38, 39, 40, 40, 41, 42, 42, 43, 45, 46, 46, 48, 32, 33,
- 33, 33, 33, 33, 33, 33, 34, 34, 34, 35, 36, 36, 36, 38, 38, 38, 39, 40,
- 40, 41, 42, 42, 43, 44, 44, 45, 47, 47, 47, 50, 34, 34, 34, 34, 34, 33,
- 33, 34, 35, 35, 35, 36, 37, 37, 38, 39, 39, 40, 42, 42, 42, 44, 45, 45,
- 46, 47, 47, 48, 50, 51, 51, 53, 34, 34, 34, 34, 34, 33, 33, 34, 35, 35,
- 35, 36, 37, 37, 38, 39, 39, 40, 42, 42, 42, 44, 45, 45, 46, 47, 47, 48,
- 50, 51, 51, 53, 34, 34, 34, 34, 34, 34, 34, 34, 35, 35, 35, 36, 37, 37,
- 38, 40, 40, 41, 43, 44, 44, 45, 46, 46, 47, 49, 49, 49, 51, 52, 52, 54,
- 35, 35, 35, 35, 34, 34, 34, 34, 36, 36, 36, 37, 38, 38, 39, 42, 42, 43,
- 46, 47, 47, 48, 49, 49, 50, 52, 52, 53, 55, 55, 55, 57, 36, 35, 35, 35,
- 35, 34, 34, 35, 36, 36, 36, 37, 38, 38, 40, 42, 42, 44, 47, 48, 48, 50,
- 50, 50, 52, 54, 54, 54, 56, 57, 57, 58, 36, 35, 35, 35, 35, 34, 34, 35,
- 36, 36, 36, 37, 38, 38, 40, 42, 42, 44, 47, 48, 48, 50, 50, 50, 52, 54,
- 54, 54, 56, 57, 57, 58, 38, 37, 37, 37, 36, 36, 36, 36, 37, 38, 38, 39,
- 39, 39, 41, 44, 44, 45, 48, 50, 50, 51, 52, 52, 54, 56, 56, 57, 58, 59,
- 59, 61, 39, 39, 38, 38, 38, 37, 37, 38, 39, 39, 39, 40, 40, 40, 42, 45,
- 45, 46, 49, 50, 50, 52, 54, 54, 55, 58, 58, 58, 60, 61, 61, 63, 39, 39,
- 38, 38, 38, 37, 37, 38, 39, 39, 39, 40, 40, 40, 42, 45, 45, 46, 49, 50,
- 50, 52, 54, 54, 55, 58, 58, 58, 60, 61, 61, 63, 41, 40, 40, 40, 39, 38,
- 38, 39, 40, 40, 40, 41, 41, 41, 43, 46, 46, 47, 50, 52, 52, 54, 55, 55,
- 57, 60, 60, 60, 62, 63, 63, 66, 44, 43, 42, 42, 42, 41, 41, 41, 42, 42,
- 42, 42, 42, 42, 44, 47, 47, 49, 52, 54, 54, 56, 58, 58, 60, 63, 63, 64,
- 66, 67, 67, 69, 44, 43, 42, 42, 42, 41, 41, 41, 42, 42, 42, 42, 42, 42,
- 44, 47, 47, 49, 52, 54, 54, 56, 58, 58, 60, 63, 63, 64, 66, 67, 67, 69,
- 45, 44, 43, 43, 42, 41, 41, 42, 42, 42, 42, 43, 43, 43, 45, 48, 48, 49,
- 53, 54, 54, 57, 58, 58, 60, 64, 64, 65, 67, 68, 68, 70, 47, 46, 45, 45,
- 45, 44, 44, 44, 44, 45, 45, 45, 45, 45, 47, 50, 50, 51, 55, 56, 56, 58,
- 60, 60, 62, 66, 66, 67, 69, 70, 70, 73, 48, 47, 46, 46, 45, 44, 44, 45,
- 45, 45, 45, 45, 46, 46, 47, 51, 51, 52, 55, 57, 57, 59, 61, 61, 63, 67,
- 67, 68, 70, 71, 71, 74, 48, 47, 46, 46, 45, 44, 44, 45, 45, 45, 45, 45,
- 46, 46, 47, 51, 51, 52, 55, 57, 57, 59, 61, 61, 63, 67, 67, 68, 70, 71,
- 71, 74, 51, 50, 49, 49, 48, 47, 47, 47, 48, 48, 48, 48, 48, 48, 50, 53,
- 53, 54, 57, 58, 58, 61, 63, 63, 66, 69, 69, 70, 73, 74, 74, 77,
- /* Size 4x8 */
- 31, 32, 35, 43, 32, 33, 34, 41, 32, 34, 36, 42, 32, 35, 38, 42, 34, 37,
- 43, 49, 37, 40, 49, 56, 42, 43, 53, 63, 46, 46, 56, 67,
- /* Size 8x4 */
- 31, 32, 32, 32, 34, 37, 42, 46, 32, 33, 34, 35, 37, 40, 43, 46, 35, 34,
- 36, 38, 43, 49, 53, 56, 43, 41, 42, 42, 49, 56, 63, 67,
- /* Size 8x16 */
- 32, 31, 31, 32, 35, 36, 44, 47, 31, 32, 32, 32, 35, 35, 42, 45, 31, 32,
- 32, 32, 34, 35, 41, 45, 31, 32, 32, 33, 34, 34, 41, 44, 31, 32, 33, 34,
- 35, 36, 42, 44, 32, 32, 33, 34, 36, 36, 42, 45, 32, 33, 34, 35, 37, 38,
- 42, 45, 32, 33, 34, 36, 39, 40, 44, 47, 34, 34, 35, 37, 41, 42, 48, 50,
- 35, 34, 36, 38, 45, 47, 52, 55, 36, 34, 36, 38, 46, 48, 54, 56, 39, 37,
- 39, 40, 48, 50, 58, 60, 41, 39, 40, 41, 49, 51, 60, 62, 44, 41, 42, 43,
- 51, 53, 63, 66, 47, 44, 44, 45, 53, 56, 66, 69, 48, 45, 45, 46, 54, 56,
- 67, 70,
- /* Size 16x8 */
- 32, 31, 31, 31, 31, 32, 32, 32, 34, 35, 36, 39, 41, 44, 47, 48, 31, 32,
- 32, 32, 32, 32, 33, 33, 34, 34, 34, 37, 39, 41, 44, 45, 31, 32, 32, 32,
- 33, 33, 34, 34, 35, 36, 36, 39, 40, 42, 44, 45, 32, 32, 32, 33, 34, 34,
- 35, 36, 37, 38, 38, 40, 41, 43, 45, 46, 35, 35, 34, 34, 35, 36, 37, 39,
- 41, 45, 46, 48, 49, 51, 53, 54, 36, 35, 35, 34, 36, 36, 38, 40, 42, 47,
- 48, 50, 51, 53, 56, 56, 44, 42, 41, 41, 42, 42, 42, 44, 48, 52, 54, 58,
- 60, 63, 66, 67, 47, 45, 45, 44, 44, 45, 45, 47, 50, 55, 56, 60, 62, 66,
- 69, 70,
- /* Size 16x32 */
- 32, 31, 31, 31, 31, 32, 32, 32, 35, 36, 36, 40, 44, 44, 47, 53, 31, 31,
- 32, 32, 32, 32, 32, 33, 35, 35, 35, 39, 43, 43, 46, 52, 31, 32, 32, 32,
- 32, 32, 32, 33, 35, 35, 35, 39, 42, 42, 45, 51, 31, 32, 32, 32, 32, 32,
- 32, 33, 35, 35, 35, 39, 42, 42, 45, 51, 31, 32, 32, 32, 32, 32, 32, 33,
- 34, 35, 35, 39, 41, 41, 45, 50, 31, 32, 32, 32, 32, 33, 33, 33, 34, 34,
- 34, 38, 41, 41, 44, 49, 31, 32, 32, 32, 32, 33, 33, 33, 34, 34, 34, 38,
- 41, 41, 44, 49, 31, 32, 32, 32, 32, 33, 33, 33, 34, 35, 35, 38, 41, 41,
- 44, 49, 31, 32, 32, 32, 33, 34, 34, 34, 35, 36, 36, 39, 42, 42, 44, 49,
- 32, 32, 32, 32, 33, 34, 34, 34, 36, 36, 36, 39, 42, 42, 45, 50, 32, 32,
- 32, 32, 33, 34, 34, 34, 36, 36, 36, 39, 42, 42, 45, 50, 32, 32, 32, 32,
- 33, 35, 35, 35, 37, 37, 37, 40, 42, 42, 45, 49, 32, 32, 33, 33, 34, 35,
- 35, 36, 37, 38, 38, 41, 42, 42, 45, 49, 32, 32, 33, 33, 34, 35, 35, 36,
- 37, 38, 38, 41, 42, 42, 45, 49, 32, 33, 33, 33, 34, 36, 36, 36, 39, 40,
- 40, 42, 44, 44, 47, 51, 34, 34, 34, 34, 35, 37, 37, 38, 41, 42, 42, 45,
- 48, 48, 50, 54, 34, 34, 34, 34, 35, 37, 37, 38, 41, 42, 42, 45, 48, 48,
- 50, 54, 34, 34, 34, 34, 35, 37, 37, 38, 42, 43, 43, 46, 49, 49, 51, 55,
- 35, 35, 34, 34, 36, 38, 38, 39, 45, 47, 47, 50, 52, 52, 55, 59, 36, 35,
- 34, 34, 36, 38, 38, 40, 46, 48, 48, 51, 54, 54, 56, 60, 36, 35, 34, 34,
- 36, 38, 38, 40, 46, 48, 48, 51, 54, 54, 56, 60, 38, 37, 36, 36, 37, 40,
- 40, 41, 47, 49, 49, 53, 56, 56, 58, 63, 39, 38, 37, 37, 39, 40, 40, 42,
- 48, 50, 50, 54, 58, 58, 60, 65, 39, 38, 37, 37, 39, 40, 40, 42, 48, 50,
- 50, 54, 58, 58, 60, 65, 41, 40, 39, 39, 40, 41, 41, 43, 49, 51, 51, 56,
- 60, 60, 62, 67, 44, 42, 41, 41, 42, 43, 43, 45, 51, 53, 53, 59, 63, 63,
- 66, 71, 44, 42, 41, 41, 42, 43, 43, 45, 51, 53, 53, 59, 63, 63, 66, 71,
- 44, 43, 42, 42, 42, 43, 43, 45, 51, 54, 54, 59, 64, 64, 67, 72, 47, 45,
- 44, 44, 44, 45, 45, 47, 53, 56, 56, 61, 66, 66, 69, 75, 48, 46, 45, 45,
- 45, 46, 46, 48, 54, 56, 56, 62, 67, 67, 70, 76, 48, 46, 45, 45, 45, 46,
- 46, 48, 54, 56, 56, 62, 67, 67, 70, 76, 51, 49, 47, 47, 48, 48, 48, 50,
- 56, 58, 58, 64, 69, 69, 73, 79,
- /* Size 32x16 */
- 32, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 34, 34, 34,
- 35, 36, 36, 38, 39, 39, 41, 44, 44, 44, 47, 48, 48, 51, 31, 31, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 34, 34, 34, 35, 35, 35, 37,
- 38, 38, 40, 42, 42, 43, 45, 46, 46, 49, 31, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34, 34, 34, 36, 37, 37, 39, 41,
- 41, 42, 44, 45, 45, 47, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 33, 33, 33, 34, 34, 34, 34, 34, 34, 36, 37, 37, 39, 41, 41, 42, 44, 45,
- 45, 47, 31, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 35,
- 35, 35, 36, 36, 36, 37, 39, 39, 40, 42, 42, 42, 44, 45, 45, 48, 32, 32,
- 32, 32, 32, 33, 33, 33, 34, 34, 34, 35, 35, 35, 36, 37, 37, 37, 38, 38,
- 38, 40, 40, 40, 41, 43, 43, 43, 45, 46, 46, 48, 32, 32, 32, 32, 32, 33,
- 33, 33, 34, 34, 34, 35, 35, 35, 36, 37, 37, 37, 38, 38, 38, 40, 40, 40,
- 41, 43, 43, 43, 45, 46, 46, 48, 32, 33, 33, 33, 33, 33, 33, 33, 34, 34,
- 34, 35, 36, 36, 36, 38, 38, 38, 39, 40, 40, 41, 42, 42, 43, 45, 45, 45,
- 47, 48, 48, 50, 35, 35, 35, 35, 34, 34, 34, 34, 35, 36, 36, 37, 37, 37,
- 39, 41, 41, 42, 45, 46, 46, 47, 48, 48, 49, 51, 51, 51, 53, 54, 54, 56,
- 36, 35, 35, 35, 35, 34, 34, 35, 36, 36, 36, 37, 38, 38, 40, 42, 42, 43,
- 47, 48, 48, 49, 50, 50, 51, 53, 53, 54, 56, 56, 56, 58, 36, 35, 35, 35,
- 35, 34, 34, 35, 36, 36, 36, 37, 38, 38, 40, 42, 42, 43, 47, 48, 48, 49,
- 50, 50, 51, 53, 53, 54, 56, 56, 56, 58, 40, 39, 39, 39, 39, 38, 38, 38,
- 39, 39, 39, 40, 41, 41, 42, 45, 45, 46, 50, 51, 51, 53, 54, 54, 56, 59,
- 59, 59, 61, 62, 62, 64, 44, 43, 42, 42, 41, 41, 41, 41, 42, 42, 42, 42,
- 42, 42, 44, 48, 48, 49, 52, 54, 54, 56, 58, 58, 60, 63, 63, 64, 66, 67,
- 67, 69, 44, 43, 42, 42, 41, 41, 41, 41, 42, 42, 42, 42, 42, 42, 44, 48,
- 48, 49, 52, 54, 54, 56, 58, 58, 60, 63, 63, 64, 66, 67, 67, 69, 47, 46,
- 45, 45, 45, 44, 44, 44, 44, 45, 45, 45, 45, 45, 47, 50, 50, 51, 55, 56,
- 56, 58, 60, 60, 62, 66, 66, 67, 69, 70, 70, 73, 53, 52, 51, 51, 50, 49,
- 49, 49, 49, 50, 50, 49, 49, 49, 51, 54, 54, 55, 59, 60, 60, 63, 65, 65,
- 67, 71, 71, 72, 75, 76, 76, 79,
- /* Size 4x16 */
- 31, 32, 36, 44, 32, 32, 35, 42, 32, 32, 35, 41, 32, 33, 34, 41, 32, 34,
- 36, 42, 32, 34, 36, 42, 32, 35, 38, 42, 33, 36, 40, 44, 34, 37, 42, 48,
- 35, 38, 47, 52, 35, 38, 48, 54, 38, 40, 50, 58, 40, 41, 51, 60, 42, 43,
- 53, 63, 45, 45, 56, 66, 46, 46, 56, 67,
- /* Size 16x4 */
- 31, 32, 32, 32, 32, 32, 32, 33, 34, 35, 35, 38, 40, 42, 45, 46, 32, 32,
- 32, 33, 34, 34, 35, 36, 37, 38, 38, 40, 41, 43, 45, 46, 36, 35, 35, 34,
- 36, 36, 38, 40, 42, 47, 48, 50, 51, 53, 56, 56, 44, 42, 41, 41, 42, 42,
- 42, 44, 48, 52, 54, 58, 60, 63, 66, 67,
- /* Size 8x32 */
- 32, 31, 31, 32, 35, 36, 44, 47, 31, 32, 32, 32, 35, 35, 43, 46, 31, 32,
- 32, 32, 35, 35, 42, 45, 31, 32, 32, 32, 35, 35, 42, 45, 31, 32, 32, 32,
- 34, 35, 41, 45, 31, 32, 32, 33, 34, 34, 41, 44, 31, 32, 32, 33, 34, 34,
- 41, 44, 31, 32, 32, 33, 34, 35, 41, 44, 31, 32, 33, 34, 35, 36, 42, 44,
- 32, 32, 33, 34, 36, 36, 42, 45, 32, 32, 33, 34, 36, 36, 42, 45, 32, 32,
- 33, 35, 37, 37, 42, 45, 32, 33, 34, 35, 37, 38, 42, 45, 32, 33, 34, 35,
- 37, 38, 42, 45, 32, 33, 34, 36, 39, 40, 44, 47, 34, 34, 35, 37, 41, 42,
- 48, 50, 34, 34, 35, 37, 41, 42, 48, 50, 34, 34, 35, 37, 42, 43, 49, 51,
- 35, 34, 36, 38, 45, 47, 52, 55, 36, 34, 36, 38, 46, 48, 54, 56, 36, 34,
- 36, 38, 46, 48, 54, 56, 38, 36, 37, 40, 47, 49, 56, 58, 39, 37, 39, 40,
- 48, 50, 58, 60, 39, 37, 39, 40, 48, 50, 58, 60, 41, 39, 40, 41, 49, 51,
- 60, 62, 44, 41, 42, 43, 51, 53, 63, 66, 44, 41, 42, 43, 51, 53, 63, 66,
- 44, 42, 42, 43, 51, 54, 64, 67, 47, 44, 44, 45, 53, 56, 66, 69, 48, 45,
- 45, 46, 54, 56, 67, 70, 48, 45, 45, 46, 54, 56, 67, 70, 51, 47, 48, 48,
- 56, 58, 69, 73,
- /* Size 32x8 */
- 32, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 34, 34, 34,
- 35, 36, 36, 38, 39, 39, 41, 44, 44, 44, 47, 48, 48, 51, 31, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34, 34, 34, 36,
- 37, 37, 39, 41, 41, 42, 44, 45, 45, 47, 31, 32, 32, 32, 32, 32, 32, 32,
- 33, 33, 33, 33, 34, 34, 34, 35, 35, 35, 36, 36, 36, 37, 39, 39, 40, 42,
- 42, 42, 44, 45, 45, 48, 32, 32, 32, 32, 32, 33, 33, 33, 34, 34, 34, 35,
- 35, 35, 36, 37, 37, 37, 38, 38, 38, 40, 40, 40, 41, 43, 43, 43, 45, 46,
- 46, 48, 35, 35, 35, 35, 34, 34, 34, 34, 35, 36, 36, 37, 37, 37, 39, 41,
- 41, 42, 45, 46, 46, 47, 48, 48, 49, 51, 51, 51, 53, 54, 54, 56, 36, 35,
- 35, 35, 35, 34, 34, 35, 36, 36, 36, 37, 38, 38, 40, 42, 42, 43, 47, 48,
- 48, 49, 50, 50, 51, 53, 53, 54, 56, 56, 56, 58, 44, 43, 42, 42, 41, 41,
- 41, 41, 42, 42, 42, 42, 42, 42, 44, 48, 48, 49, 52, 54, 54, 56, 58, 58,
- 60, 63, 63, 64, 66, 67, 67, 69, 47, 46, 45, 45, 45, 44, 44, 44, 44, 45,
- 45, 45, 45, 45, 47, 50, 50, 51, 55, 56, 56, 58, 60, 60, 62, 66, 66, 67,
- 69, 70, 70, 73 },
- { /* Chroma */
- /* Size 4x4 */
- 31, 37, 47, 47, 37, 44, 47, 45, 47, 47, 53, 53, 47, 45, 53, 59,
- /* Size 8x8 */
- 31, 31, 34, 37, 43, 48, 47, 49, 31, 32, 35, 40, 43, 46, 45, 46, 34, 35,
- 39, 43, 45, 46, 45, 46, 37, 40, 43, 47, 47, 47, 45, 46, 43, 43, 45, 47,
- 49, 50, 50, 50, 48, 46, 46, 47, 50, 53, 55, 55, 47, 45, 45, 45, 50, 55,
- 58, 60, 49, 46, 46, 46, 50, 55, 60, 61,
- /* Size 16x16 */
- 32, 31, 31, 30, 33, 33, 36, 38, 41, 47, 49, 48, 49, 49, 50, 50, 31, 31,
- 31, 31, 34, 34, 38, 40, 42, 46, 47, 47, 47, 47, 48, 48, 31, 31, 31, 31,
- 34, 35, 39, 40, 42, 46, 47, 46, 46, 46, 47, 47, 30, 31, 31, 32, 34, 35,
- 40, 41, 42, 45, 46, 45, 45, 45, 46, 46, 33, 34, 34, 34, 37, 38, 42, 43,
- 44, 46, 47, 46, 46, 45, 46, 46, 33, 34, 35, 35, 38, 39, 43, 44, 45, 47,
- 47, 46, 46, 45, 46, 46, 36, 38, 39, 40, 42, 43, 47, 47, 47, 47, 48, 46,
- 46, 45, 46, 46, 38, 40, 40, 41, 43, 44, 47, 47, 48, 48, 49, 48, 47, 47,
- 47, 47, 41, 42, 42, 42, 44, 45, 47, 48, 48, 50, 50, 49, 49, 49, 50, 50,
- 47, 46, 46, 45, 46, 47, 47, 48, 50, 52, 52, 52, 52, 52, 53, 53, 49, 47,
- 47, 46, 47, 47, 48, 49, 50, 52, 53, 53, 53, 53, 54, 54, 48, 47, 46, 45,
- 46, 46, 46, 48, 49, 52, 53, 54, 55, 55, 56, 56, 49, 47, 46, 45, 46, 46,
- 46, 47, 49, 52, 53, 55, 55, 57, 57, 58, 49, 47, 46, 45, 45, 45, 45, 47,
- 49, 52, 53, 55, 57, 58, 59, 60, 50, 48, 47, 46, 46, 46, 46, 47, 50, 53,
- 54, 56, 57, 59, 61, 61, 50, 48, 47, 46, 46, 46, 46, 47, 50, 53, 54, 56,
- 58, 60, 61, 61,
- /* Size 32x32 */
- 32, 31, 31, 31, 31, 30, 30, 31, 33, 33, 33, 35, 36, 36, 38, 41, 41, 43,
- 47, 49, 49, 49, 48, 48, 49, 49, 49, 49, 50, 50, 50, 51, 31, 31, 31, 31,
- 31, 31, 31, 31, 33, 34, 34, 36, 37, 37, 39, 42, 42, 43, 47, 48, 48, 48,
- 47, 47, 47, 47, 47, 48, 49, 49, 49, 50, 31, 31, 31, 31, 31, 31, 31, 32,
- 34, 34, 34, 37, 38, 38, 40, 42, 42, 43, 46, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 48, 48, 48, 49, 31, 31, 31, 31, 31, 31, 31, 32, 34, 34, 34, 37,
- 38, 38, 40, 42, 42, 43, 46, 47, 47, 47, 47, 47, 47, 47, 47, 47, 48, 48,
- 48, 49, 31, 31, 31, 31, 31, 31, 31, 32, 34, 35, 35, 37, 39, 39, 40, 42,
- 42, 43, 46, 47, 47, 46, 46, 46, 46, 46, 46, 46, 47, 47, 47, 48, 30, 31,
- 31, 31, 31, 32, 32, 32, 34, 35, 35, 38, 40, 40, 41, 42, 42, 43, 45, 46,
- 46, 46, 45, 45, 45, 45, 45, 45, 46, 46, 46, 47, 30, 31, 31, 31, 31, 32,
- 32, 32, 34, 35, 35, 38, 40, 40, 41, 42, 42, 43, 45, 46, 46, 46, 45, 45,
- 45, 45, 45, 45, 46, 46, 46, 47, 31, 31, 32, 32, 32, 32, 32, 33, 35, 36,
- 36, 38, 40, 40, 41, 43, 43, 43, 46, 46, 46, 46, 45, 45, 45, 45, 45, 45,
- 46, 46, 46, 47, 33, 33, 34, 34, 34, 34, 34, 35, 37, 38, 38, 41, 42, 42,
- 43, 44, 44, 45, 46, 47, 47, 46, 46, 46, 46, 45, 45, 45, 46, 46, 46, 47,
- 33, 34, 34, 34, 35, 35, 35, 36, 38, 39, 39, 41, 43, 43, 44, 45, 45, 45,
- 47, 47, 47, 46, 46, 46, 46, 45, 45, 45, 46, 46, 46, 47, 33, 34, 34, 34,
- 35, 35, 35, 36, 38, 39, 39, 41, 43, 43, 44, 45, 45, 45, 47, 47, 47, 46,
- 46, 46, 46, 45, 45, 45, 46, 46, 46, 47, 35, 36, 37, 37, 37, 38, 38, 38,
- 41, 41, 41, 44, 46, 46, 46, 46, 46, 46, 47, 47, 47, 47, 46, 46, 46, 45,
- 45, 45, 46, 46, 46, 47, 36, 37, 38, 38, 39, 40, 40, 40, 42, 43, 43, 46,
- 47, 47, 47, 47, 47, 47, 47, 48, 48, 47, 46, 46, 46, 45, 45, 45, 46, 46,
- 46, 46, 36, 37, 38, 38, 39, 40, 40, 40, 42, 43, 43, 46, 47, 47, 47, 47,
- 47, 47, 47, 48, 48, 47, 46, 46, 46, 45, 45, 45, 46, 46, 46, 46, 38, 39,
- 40, 40, 40, 41, 41, 41, 43, 44, 44, 46, 47, 47, 47, 48, 48, 48, 48, 49,
- 49, 48, 48, 48, 47, 47, 47, 47, 47, 47, 47, 48, 41, 42, 42, 42, 42, 42,
- 42, 43, 44, 45, 45, 46, 47, 47, 48, 48, 48, 49, 50, 50, 50, 50, 49, 49,
- 49, 49, 49, 49, 50, 50, 50, 50, 41, 42, 42, 42, 42, 42, 42, 43, 44, 45,
- 45, 46, 47, 47, 48, 48, 48, 49, 50, 50, 50, 50, 49, 49, 49, 49, 49, 49,
- 50, 50, 50, 50, 43, 43, 43, 43, 43, 43, 43, 43, 45, 45, 45, 46, 47, 47,
- 48, 49, 49, 49, 50, 51, 51, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 51,
- 47, 47, 46, 46, 46, 45, 45, 46, 46, 47, 47, 47, 47, 47, 48, 50, 50, 50,
- 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 53, 53, 53, 53, 49, 48, 47, 47,
- 47, 46, 46, 46, 47, 47, 47, 47, 48, 48, 49, 50, 50, 51, 52, 53, 53, 53,
- 53, 53, 53, 53, 53, 53, 54, 54, 54, 54, 49, 48, 47, 47, 47, 46, 46, 46,
- 47, 47, 47, 47, 48, 48, 49, 50, 50, 51, 52, 53, 53, 53, 53, 53, 53, 53,
- 53, 53, 54, 54, 54, 54, 49, 48, 47, 47, 46, 46, 46, 46, 46, 46, 46, 47,
- 47, 47, 48, 50, 50, 50, 52, 53, 53, 53, 54, 54, 54, 55, 55, 55, 55, 55,
- 55, 56, 48, 47, 47, 47, 46, 45, 45, 45, 46, 46, 46, 46, 46, 46, 48, 49,
- 49, 50, 52, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 56, 57, 48, 47,
- 47, 47, 46, 45, 45, 45, 46, 46, 46, 46, 46, 46, 48, 49, 49, 50, 52, 53,
- 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 56, 57, 49, 47, 47, 47, 46, 45,
- 45, 45, 46, 46, 46, 46, 46, 46, 47, 49, 49, 50, 52, 53, 53, 54, 55, 55,
- 55, 57, 57, 57, 57, 58, 58, 58, 49, 47, 47, 47, 46, 45, 45, 45, 45, 45,
- 45, 45, 45, 45, 47, 49, 49, 50, 52, 53, 53, 55, 55, 55, 57, 58, 58, 59,
- 59, 60, 60, 60, 49, 47, 47, 47, 46, 45, 45, 45, 45, 45, 45, 45, 45, 45,
- 47, 49, 49, 50, 52, 53, 53, 55, 55, 55, 57, 58, 58, 59, 59, 60, 60, 60,
- 49, 48, 47, 47, 46, 45, 45, 45, 45, 45, 45, 45, 45, 45, 47, 49, 49, 50,
- 52, 53, 53, 55, 56, 56, 57, 59, 59, 59, 60, 60, 60, 61, 50, 49, 48, 48,
- 47, 46, 46, 46, 46, 46, 46, 46, 46, 46, 47, 50, 50, 50, 53, 54, 54, 55,
- 56, 56, 57, 59, 59, 60, 61, 61, 61, 62, 50, 49, 48, 48, 47, 46, 46, 46,
- 46, 46, 46, 46, 46, 46, 47, 50, 50, 50, 53, 54, 54, 55, 56, 56, 58, 60,
- 60, 60, 61, 61, 61, 63, 50, 49, 48, 48, 47, 46, 46, 46, 46, 46, 46, 46,
- 46, 46, 47, 50, 50, 50, 53, 54, 54, 55, 56, 56, 58, 60, 60, 60, 61, 61,
- 61, 63, 51, 50, 49, 49, 48, 47, 47, 47, 47, 47, 47, 47, 46, 46, 48, 50,
- 50, 51, 53, 54, 54, 56, 57, 57, 58, 60, 60, 61, 62, 63, 63, 64,
- /* Size 4x8 */
- 31, 38, 47, 48, 31, 40, 46, 45, 35, 43, 47, 46, 39, 47, 47, 45, 43, 47,
- 50, 50, 47, 47, 53, 55, 46, 46, 53, 58, 48, 46, 54, 59,
- /* Size 8x4 */
- 31, 31, 35, 39, 43, 47, 46, 48, 38, 40, 43, 47, 47, 47, 46, 46, 47, 46,
- 47, 47, 50, 53, 53, 54, 48, 45, 46, 45, 50, 55, 58, 59,
- /* Size 8x16 */
- 32, 31, 33, 37, 45, 48, 49, 50, 31, 31, 34, 38, 45, 47, 47, 48, 31, 32,
- 34, 39, 45, 46, 46, 47, 30, 32, 35, 40, 44, 46, 45, 46, 33, 35, 37, 42,
- 46, 47, 45, 46, 33, 36, 38, 43, 46, 47, 46, 46, 37, 40, 43, 47, 47, 47,
- 45, 46, 39, 41, 43, 47, 48, 48, 47, 47, 42, 43, 44, 47, 49, 50, 49, 50,
- 47, 46, 46, 48, 51, 52, 53, 53, 49, 46, 47, 48, 52, 53, 53, 54, 48, 46,
- 46, 47, 51, 53, 56, 56, 48, 45, 46, 46, 51, 53, 57, 57, 49, 45, 45, 46,
- 51, 53, 58, 59, 50, 46, 46, 46, 52, 54, 59, 61, 50, 46, 46, 46, 52, 54,
- 59, 61,
- /* Size 16x8 */
- 32, 31, 31, 30, 33, 33, 37, 39, 42, 47, 49, 48, 48, 49, 50, 50, 31, 31,
- 32, 32, 35, 36, 40, 41, 43, 46, 46, 46, 45, 45, 46, 46, 33, 34, 34, 35,
- 37, 38, 43, 43, 44, 46, 47, 46, 46, 45, 46, 46, 37, 38, 39, 40, 42, 43,
- 47, 47, 47, 48, 48, 47, 46, 46, 46, 46, 45, 45, 45, 44, 46, 46, 47, 48,
- 49, 51, 52, 51, 51, 51, 52, 52, 48, 47, 46, 46, 47, 47, 47, 48, 50, 52,
- 53, 53, 53, 53, 54, 54, 49, 47, 46, 45, 45, 46, 45, 47, 49, 53, 53, 56,
- 57, 58, 59, 59, 50, 48, 47, 46, 46, 46, 46, 47, 50, 53, 54, 56, 57, 59,
- 61, 61,
- /* Size 16x32 */
- 32, 31, 31, 31, 33, 37, 37, 38, 45, 48, 48, 49, 49, 49, 50, 52, 31, 31,
- 31, 31, 33, 38, 38, 39, 45, 47, 47, 48, 48, 48, 49, 51, 31, 31, 31, 31,
- 34, 38, 38, 40, 45, 47, 47, 47, 47, 47, 48, 50, 31, 31, 31, 31, 34, 38,
- 38, 40, 45, 47, 47, 47, 47, 47, 48, 50, 31, 31, 32, 32, 34, 39, 39, 40,
- 45, 46, 46, 46, 46, 46, 47, 49, 30, 31, 32, 32, 35, 40, 40, 41, 44, 46,
- 46, 45, 45, 45, 46, 48, 30, 31, 32, 32, 35, 40, 40, 41, 44, 46, 46, 45,
- 45, 45, 46, 48, 31, 32, 33, 33, 35, 40, 40, 41, 45, 46, 46, 45, 45, 45,
- 46, 48, 33, 34, 35, 35, 37, 42, 42, 43, 46, 47, 47, 46, 45, 45, 46, 47,
- 33, 35, 36, 36, 38, 43, 43, 44, 46, 47, 47, 46, 46, 46, 46, 47, 33, 35,
- 36, 36, 38, 43, 43, 44, 46, 47, 47, 46, 46, 46, 46, 47, 35, 37, 38, 38,
- 41, 45, 45, 46, 47, 47, 47, 46, 45, 45, 46, 47, 37, 39, 40, 40, 43, 47,
- 47, 47, 47, 47, 47, 46, 45, 45, 46, 47, 37, 39, 40, 40, 43, 47, 47, 47,
- 47, 47, 47, 46, 45, 45, 46, 47, 39, 40, 41, 41, 43, 47, 47, 47, 48, 48,
- 48, 47, 47, 47, 47, 48, 42, 42, 43, 43, 44, 47, 47, 48, 49, 50, 50, 49,
- 49, 49, 50, 50, 42, 42, 43, 43, 44, 47, 47, 48, 49, 50, 50, 49, 49, 49,
- 50, 50, 43, 43, 43, 43, 45, 47, 47, 48, 50, 50, 50, 50, 50, 50, 50, 51,
- 47, 46, 46, 46, 46, 48, 48, 48, 51, 52, 52, 52, 53, 53, 53, 53, 49, 47,
- 46, 46, 47, 48, 48, 49, 52, 53, 53, 53, 53, 53, 54, 54, 49, 47, 46, 46,
- 47, 48, 48, 49, 52, 53, 53, 53, 53, 53, 54, 54, 48, 47, 46, 46, 46, 47,
- 47, 48, 52, 53, 53, 54, 55, 55, 55, 56, 48, 47, 46, 46, 46, 47, 47, 48,
- 51, 53, 53, 54, 56, 56, 56, 57, 48, 47, 46, 46, 46, 47, 47, 48, 51, 53,
- 53, 54, 56, 56, 56, 57, 48, 47, 45, 45, 46, 46, 46, 47, 51, 53, 53, 55,
- 57, 57, 57, 59, 49, 46, 45, 45, 45, 46, 46, 47, 51, 53, 53, 56, 58, 58,
- 59, 61, 49, 46, 45, 45, 45, 46, 46, 47, 51, 53, 53, 56, 58, 58, 59, 61,
- 49, 47, 45, 45, 45, 46, 46, 47, 52, 53, 53, 56, 58, 58, 60, 62, 50, 48,
- 46, 46, 46, 46, 46, 48, 52, 54, 54, 57, 59, 59, 61, 63, 50, 48, 46, 46,
- 46, 46, 46, 48, 52, 54, 54, 57, 59, 59, 61, 64, 50, 48, 46, 46, 46, 46,
- 46, 48, 52, 54, 54, 57, 59, 59, 61, 64, 51, 49, 47, 47, 47, 47, 47, 48,
- 52, 54, 54, 58, 60, 60, 62, 65,
- /* Size 32x16 */
- 32, 31, 31, 31, 31, 30, 30, 31, 33, 33, 33, 35, 37, 37, 39, 42, 42, 43,
- 47, 49, 49, 48, 48, 48, 48, 49, 49, 49, 50, 50, 50, 51, 31, 31, 31, 31,
- 31, 31, 31, 32, 34, 35, 35, 37, 39, 39, 40, 42, 42, 43, 46, 47, 47, 47,
- 47, 47, 47, 46, 46, 47, 48, 48, 48, 49, 31, 31, 31, 31, 32, 32, 32, 33,
- 35, 36, 36, 38, 40, 40, 41, 43, 43, 43, 46, 46, 46, 46, 46, 46, 45, 45,
- 45, 45, 46, 46, 46, 47, 31, 31, 31, 31, 32, 32, 32, 33, 35, 36, 36, 38,
- 40, 40, 41, 43, 43, 43, 46, 46, 46, 46, 46, 46, 45, 45, 45, 45, 46, 46,
- 46, 47, 33, 33, 34, 34, 34, 35, 35, 35, 37, 38, 38, 41, 43, 43, 43, 44,
- 44, 45, 46, 47, 47, 46, 46, 46, 46, 45, 45, 45, 46, 46, 46, 47, 37, 38,
- 38, 38, 39, 40, 40, 40, 42, 43, 43, 45, 47, 47, 47, 47, 47, 47, 48, 48,
- 48, 47, 47, 47, 46, 46, 46, 46, 46, 46, 46, 47, 37, 38, 38, 38, 39, 40,
- 40, 40, 42, 43, 43, 45, 47, 47, 47, 47, 47, 47, 48, 48, 48, 47, 47, 47,
- 46, 46, 46, 46, 46, 46, 46, 47, 38, 39, 40, 40, 40, 41, 41, 41, 43, 44,
- 44, 46, 47, 47, 47, 48, 48, 48, 48, 49, 49, 48, 48, 48, 47, 47, 47, 47,
- 48, 48, 48, 48, 45, 45, 45, 45, 45, 44, 44, 45, 46, 46, 46, 47, 47, 47,
- 48, 49, 49, 50, 51, 52, 52, 52, 51, 51, 51, 51, 51, 52, 52, 52, 52, 52,
- 48, 47, 47, 47, 46, 46, 46, 46, 47, 47, 47, 47, 47, 47, 48, 50, 50, 50,
- 52, 53, 53, 53, 53, 53, 53, 53, 53, 53, 54, 54, 54, 54, 48, 47, 47, 47,
- 46, 46, 46, 46, 47, 47, 47, 47, 47, 47, 48, 50, 50, 50, 52, 53, 53, 53,
- 53, 53, 53, 53, 53, 53, 54, 54, 54, 54, 49, 48, 47, 47, 46, 45, 45, 45,
- 46, 46, 46, 46, 46, 46, 47, 49, 49, 50, 52, 53, 53, 54, 54, 54, 55, 56,
- 56, 56, 57, 57, 57, 58, 49, 48, 47, 47, 46, 45, 45, 45, 45, 46, 46, 45,
- 45, 45, 47, 49, 49, 50, 53, 53, 53, 55, 56, 56, 57, 58, 58, 58, 59, 59,
- 59, 60, 49, 48, 47, 47, 46, 45, 45, 45, 45, 46, 46, 45, 45, 45, 47, 49,
- 49, 50, 53, 53, 53, 55, 56, 56, 57, 58, 58, 58, 59, 59, 59, 60, 50, 49,
- 48, 48, 47, 46, 46, 46, 46, 46, 46, 46, 46, 46, 47, 50, 50, 50, 53, 54,
- 54, 55, 56, 56, 57, 59, 59, 60, 61, 61, 61, 62, 52, 51, 50, 50, 49, 48,
- 48, 48, 47, 47, 47, 47, 47, 47, 48, 50, 50, 51, 53, 54, 54, 56, 57, 57,
- 59, 61, 61, 62, 63, 64, 64, 65,
- /* Size 4x16 */
- 31, 37, 48, 49, 31, 38, 47, 47, 31, 39, 46, 46, 31, 40, 46, 45, 34, 42,
- 47, 45, 35, 43, 47, 46, 39, 47, 47, 45, 40, 47, 48, 47, 42, 47, 50, 49,
- 46, 48, 52, 53, 47, 48, 53, 53, 47, 47, 53, 56, 47, 46, 53, 57, 46, 46,
- 53, 58, 48, 46, 54, 59, 48, 46, 54, 59,
- /* Size 16x4 */
- 31, 31, 31, 31, 34, 35, 39, 40, 42, 46, 47, 47, 47, 46, 48, 48, 37, 38,
- 39, 40, 42, 43, 47, 47, 47, 48, 48, 47, 46, 46, 46, 46, 48, 47, 46, 46,
- 47, 47, 47, 48, 50, 52, 53, 53, 53, 53, 54, 54, 49, 47, 46, 45, 45, 46,
- 45, 47, 49, 53, 53, 56, 57, 58, 59, 59,
- /* Size 8x32 */
- 32, 31, 33, 37, 45, 48, 49, 50, 31, 31, 33, 38, 45, 47, 48, 49, 31, 31,
- 34, 38, 45, 47, 47, 48, 31, 31, 34, 38, 45, 47, 47, 48, 31, 32, 34, 39,
- 45, 46, 46, 47, 30, 32, 35, 40, 44, 46, 45, 46, 30, 32, 35, 40, 44, 46,
- 45, 46, 31, 33, 35, 40, 45, 46, 45, 46, 33, 35, 37, 42, 46, 47, 45, 46,
- 33, 36, 38, 43, 46, 47, 46, 46, 33, 36, 38, 43, 46, 47, 46, 46, 35, 38,
- 41, 45, 47, 47, 45, 46, 37, 40, 43, 47, 47, 47, 45, 46, 37, 40, 43, 47,
- 47, 47, 45, 46, 39, 41, 43, 47, 48, 48, 47, 47, 42, 43, 44, 47, 49, 50,
- 49, 50, 42, 43, 44, 47, 49, 50, 49, 50, 43, 43, 45, 47, 50, 50, 50, 50,
- 47, 46, 46, 48, 51, 52, 53, 53, 49, 46, 47, 48, 52, 53, 53, 54, 49, 46,
- 47, 48, 52, 53, 53, 54, 48, 46, 46, 47, 52, 53, 55, 55, 48, 46, 46, 47,
- 51, 53, 56, 56, 48, 46, 46, 47, 51, 53, 56, 56, 48, 45, 46, 46, 51, 53,
- 57, 57, 49, 45, 45, 46, 51, 53, 58, 59, 49, 45, 45, 46, 51, 53, 58, 59,
- 49, 45, 45, 46, 52, 53, 58, 60, 50, 46, 46, 46, 52, 54, 59, 61, 50, 46,
- 46, 46, 52, 54, 59, 61, 50, 46, 46, 46, 52, 54, 59, 61, 51, 47, 47, 47,
- 52, 54, 60, 62,
- /* Size 32x8 */
- 32, 31, 31, 31, 31, 30, 30, 31, 33, 33, 33, 35, 37, 37, 39, 42, 42, 43,
- 47, 49, 49, 48, 48, 48, 48, 49, 49, 49, 50, 50, 50, 51, 31, 31, 31, 31,
- 32, 32, 32, 33, 35, 36, 36, 38, 40, 40, 41, 43, 43, 43, 46, 46, 46, 46,
- 46, 46, 45, 45, 45, 45, 46, 46, 46, 47, 33, 33, 34, 34, 34, 35, 35, 35,
- 37, 38, 38, 41, 43, 43, 43, 44, 44, 45, 46, 47, 47, 46, 46, 46, 46, 45,
- 45, 45, 46, 46, 46, 47, 37, 38, 38, 38, 39, 40, 40, 40, 42, 43, 43, 45,
- 47, 47, 47, 47, 47, 47, 48, 48, 48, 47, 47, 47, 46, 46, 46, 46, 46, 46,
- 46, 47, 45, 45, 45, 45, 45, 44, 44, 45, 46, 46, 46, 47, 47, 47, 48, 49,
- 49, 50, 51, 52, 52, 52, 51, 51, 51, 51, 51, 52, 52, 52, 52, 52, 48, 47,
- 47, 47, 46, 46, 46, 46, 47, 47, 47, 47, 47, 47, 48, 50, 50, 50, 52, 53,
- 53, 53, 53, 53, 53, 53, 53, 53, 54, 54, 54, 54, 49, 48, 47, 47, 46, 45,
- 45, 45, 45, 46, 46, 45, 45, 45, 47, 49, 49, 50, 53, 53, 53, 55, 56, 56,
- 57, 58, 58, 58, 59, 59, 59, 60, 50, 49, 48, 48, 47, 46, 46, 46, 46, 46,
- 46, 46, 46, 46, 47, 50, 50, 50, 53, 54, 54, 55, 56, 56, 57, 59, 59, 60,
- 61, 61, 61, 62 },
- },
- {
- { /* Luma */
- /* Size 4x4 */
- 32, 32, 34, 38, 32, 33, 35, 39, 34, 35, 39, 45, 38, 39, 45, 54,
- /* Size 8x8 */
- 31, 31, 32, 32, 33, 34, 37, 41, 31, 32, 32, 32, 33, 34, 36, 39, 32, 32,
- 32, 33, 34, 35, 37, 40, 32, 32, 33, 34, 35, 36, 38, 41, 33, 33, 34, 35,
- 37, 39, 41, 44, 34, 34, 35, 36, 39, 43, 46, 49, 37, 36, 37, 38, 41, 46,
- 51, 54, 41, 39, 40, 41, 44, 49, 54, 58,
- /* Size 16x16 */
- 32, 31, 31, 31, 31, 31, 31, 32, 32, 34, 34, 36, 36, 39, 39, 44, 31, 32,
- 32, 32, 32, 32, 32, 32, 32, 34, 34, 35, 35, 38, 38, 42, 31, 32, 32, 32,
- 32, 32, 32, 32, 32, 34, 34, 35, 35, 38, 38, 42, 31, 32, 32, 32, 32, 32,
- 32, 32, 32, 33, 33, 34, 34, 37, 37, 41, 31, 32, 32, 32, 32, 32, 32, 32,
- 32, 33, 33, 34, 34, 37, 37, 41, 31, 32, 32, 32, 32, 33, 33, 34, 34, 35,
- 35, 36, 36, 39, 39, 42, 31, 32, 32, 32, 32, 33, 33, 34, 34, 35, 35, 36,
- 36, 39, 39, 42, 32, 32, 32, 32, 32, 34, 34, 35, 35, 37, 37, 38, 38, 40,
- 40, 42, 32, 32, 32, 32, 32, 34, 34, 35, 35, 37, 37, 38, 38, 40, 40, 42,
- 34, 34, 34, 33, 33, 35, 35, 37, 37, 39, 39, 42, 42, 45, 45, 47, 34, 34,
- 34, 33, 33, 35, 35, 37, 37, 39, 39, 42, 42, 45, 45, 47, 36, 35, 35, 34,
- 34, 36, 36, 38, 38, 42, 42, 48, 48, 50, 50, 54, 36, 35, 35, 34, 34, 36,
- 36, 38, 38, 42, 42, 48, 48, 50, 50, 54, 39, 38, 38, 37, 37, 39, 39, 40,
- 40, 45, 45, 50, 50, 54, 54, 58, 39, 38, 38, 37, 37, 39, 39, 40, 40, 45,
- 45, 50, 50, 54, 54, 58, 44, 42, 42, 41, 41, 42, 42, 42, 42, 47, 47, 54,
- 54, 58, 58, 63,
- /* Size 32x32 */
- 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 33,
- 34, 34, 34, 35, 36, 36, 36, 37, 39, 39, 39, 41, 44, 44, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 33, 34, 34, 34, 34,
- 35, 35, 35, 37, 39, 39, 39, 41, 43, 43, 31, 31, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 34, 34, 34, 34, 35, 35, 35, 37,
- 38, 38, 38, 40, 42, 42, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 33, 34, 34, 34, 34, 35, 35, 35, 37, 38, 38, 38, 40,
- 42, 42, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 33, 34, 34, 34, 34, 35, 35, 35, 37, 38, 38, 38, 40, 42, 42, 31, 31,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 34, 34,
- 34, 34, 35, 35, 35, 36, 38, 38, 38, 39, 41, 41, 31, 31, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34,
- 34, 36, 37, 37, 37, 39, 41, 41, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 36, 37, 37,
- 37, 39, 41, 41, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 36, 37, 37, 37, 39, 41, 41,
- 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 34,
- 34, 34, 34, 35, 35, 35, 35, 37, 38, 38, 38, 40, 41, 41, 31, 32, 32, 32,
- 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 36,
- 36, 36, 36, 38, 39, 39, 39, 40, 42, 42, 31, 32, 32, 32, 32, 32, 32, 32,
- 32, 33, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 36, 36, 36, 36, 38,
- 39, 39, 39, 40, 42, 42, 31, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33,
- 33, 33, 34, 34, 34, 34, 35, 35, 35, 36, 36, 36, 36, 38, 39, 39, 39, 40,
- 42, 42, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34,
- 34, 35, 36, 36, 36, 36, 37, 37, 37, 38, 40, 40, 40, 41, 42, 42, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 33, 34, 34, 34, 34, 35, 35, 35, 36, 37, 37,
- 37, 37, 38, 38, 38, 39, 40, 40, 40, 41, 42, 42, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 33, 34, 34, 34, 34, 35, 35, 35, 36, 37, 37, 37, 37, 38, 38,
- 38, 39, 40, 40, 40, 41, 42, 42, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33,
- 34, 34, 34, 34, 35, 35, 35, 36, 37, 37, 37, 37, 38, 38, 38, 39, 40, 40,
- 40, 41, 42, 42, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 35,
- 36, 36, 36, 37, 38, 38, 38, 39, 40, 40, 40, 41, 42, 42, 42, 44, 45, 45,
- 34, 34, 34, 34, 34, 34, 33, 33, 33, 34, 35, 35, 35, 36, 37, 37, 37, 38,
- 39, 39, 39, 41, 42, 42, 42, 44, 45, 45, 45, 46, 47, 47, 34, 34, 34, 34,
- 34, 34, 33, 33, 33, 34, 35, 35, 35, 36, 37, 37, 37, 38, 39, 39, 39, 41,
- 42, 42, 42, 44, 45, 45, 45, 46, 47, 47, 34, 34, 34, 34, 34, 34, 33, 33,
- 33, 34, 35, 35, 35, 36, 37, 37, 37, 38, 39, 39, 39, 41, 42, 42, 42, 44,
- 45, 45, 45, 46, 47, 47, 35, 34, 34, 34, 34, 34, 34, 34, 34, 35, 36, 36,
- 36, 36, 37, 37, 37, 39, 41, 41, 41, 43, 45, 45, 45, 46, 47, 47, 47, 49,
- 50, 50, 36, 35, 35, 35, 35, 35, 34, 34, 34, 35, 36, 36, 36, 37, 38, 38,
- 38, 40, 42, 42, 42, 45, 48, 48, 48, 49, 50, 50, 50, 52, 54, 54, 36, 35,
- 35, 35, 35, 35, 34, 34, 34, 35, 36, 36, 36, 37, 38, 38, 38, 40, 42, 42,
- 42, 45, 48, 48, 48, 49, 50, 50, 50, 52, 54, 54, 36, 35, 35, 35, 35, 35,
- 34, 34, 34, 35, 36, 36, 36, 37, 38, 38, 38, 40, 42, 42, 42, 45, 48, 48,
- 48, 49, 50, 50, 50, 52, 54, 54, 37, 37, 37, 37, 37, 36, 36, 36, 36, 37,
- 38, 38, 38, 38, 39, 39, 39, 41, 44, 44, 44, 46, 49, 49, 49, 51, 52, 52,
- 52, 54, 56, 56, 39, 39, 38, 38, 38, 38, 37, 37, 37, 38, 39, 39, 39, 40,
- 40, 40, 40, 42, 45, 45, 45, 47, 50, 50, 50, 52, 54, 54, 54, 56, 58, 58,
- 39, 39, 38, 38, 38, 38, 37, 37, 37, 38, 39, 39, 39, 40, 40, 40, 40, 42,
- 45, 45, 45, 47, 50, 50, 50, 52, 54, 54, 54, 56, 58, 58, 39, 39, 38, 38,
- 38, 38, 37, 37, 37, 38, 39, 39, 39, 40, 40, 40, 40, 42, 45, 45, 45, 47,
- 50, 50, 50, 52, 54, 54, 54, 56, 58, 58, 41, 41, 40, 40, 40, 39, 39, 39,
- 39, 40, 40, 40, 40, 41, 41, 41, 41, 44, 46, 46, 46, 49, 52, 52, 52, 54,
- 56, 56, 56, 58, 60, 60, 44, 43, 42, 42, 42, 41, 41, 41, 41, 41, 42, 42,
- 42, 42, 42, 42, 42, 45, 47, 47, 47, 50, 54, 54, 54, 56, 58, 58, 58, 60,
- 63, 63, 44, 43, 42, 42, 42, 41, 41, 41, 41, 41, 42, 42, 42, 42, 42, 42,
- 42, 45, 47, 47, 47, 50, 54, 54, 54, 56, 58, 58, 58, 60, 63, 63,
- /* Size 4x8 */
- 31, 32, 34, 39, 32, 32, 34, 38, 32, 33, 34, 38, 32, 33, 36, 40, 33, 34,
- 38, 42, 34, 36, 41, 47, 37, 38, 44, 52, 40, 40, 46, 56,
- /* Size 8x4 */
- 31, 32, 32, 32, 33, 34, 37, 40, 32, 32, 33, 33, 34, 36, 38, 40, 34, 34,
- 34, 36, 38, 41, 44, 46, 39, 38, 38, 40, 42, 47, 52, 56,
- /* Size 8x16 */
- 32, 31, 31, 32, 32, 36, 36, 44, 31, 32, 32, 32, 32, 35, 35, 42, 31, 32,
- 32, 32, 32, 35, 35, 42, 31, 32, 32, 33, 33, 34, 34, 41, 31, 32, 32, 33,
- 33, 34, 34, 41, 32, 32, 32, 34, 34, 36, 36, 42, 32, 32, 32, 34, 34, 36,
- 36, 42, 32, 33, 33, 35, 35, 38, 38, 42, 32, 33, 33, 35, 35, 38, 38, 42,
- 34, 34, 34, 37, 37, 42, 42, 48, 34, 34, 34, 37, 37, 42, 42, 48, 36, 34,
- 34, 38, 38, 48, 48, 54, 36, 34, 34, 38, 38, 48, 48, 54, 39, 37, 37, 40,
- 40, 50, 50, 58, 39, 37, 37, 40, 40, 50, 50, 58, 44, 41, 41, 43, 43, 53,
- 53, 63,
- /* Size 16x8 */
- 32, 31, 31, 31, 31, 32, 32, 32, 32, 34, 34, 36, 36, 39, 39, 44, 31, 32,
- 32, 32, 32, 32, 32, 33, 33, 34, 34, 34, 34, 37, 37, 41, 31, 32, 32, 32,
- 32, 32, 32, 33, 33, 34, 34, 34, 34, 37, 37, 41, 32, 32, 32, 33, 33, 34,
- 34, 35, 35, 37, 37, 38, 38, 40, 40, 43, 32, 32, 32, 33, 33, 34, 34, 35,
- 35, 37, 37, 38, 38, 40, 40, 43, 36, 35, 35, 34, 34, 36, 36, 38, 38, 42,
- 42, 48, 48, 50, 50, 53, 36, 35, 35, 34, 34, 36, 36, 38, 38, 42, 42, 48,
- 48, 50, 50, 53, 44, 42, 42, 41, 41, 42, 42, 42, 42, 48, 48, 54, 54, 58,
- 58, 63,
- /* Size 16x32 */
- 32, 31, 31, 31, 31, 32, 32, 32, 32, 34, 36, 36, 36, 39, 44, 44, 31, 31,
- 31, 31, 31, 32, 32, 32, 32, 34, 35, 35, 35, 39, 43, 43, 31, 32, 32, 32,
- 32, 32, 32, 32, 32, 34, 35, 35, 35, 38, 42, 42, 31, 32, 32, 32, 32, 32,
- 32, 32, 32, 34, 35, 35, 35, 38, 42, 42, 31, 32, 32, 32, 32, 32, 32, 32,
- 32, 34, 35, 35, 35, 38, 42, 42, 31, 32, 32, 32, 32, 32, 32, 32, 32, 34,
- 35, 35, 35, 38, 41, 41, 31, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34,
- 34, 37, 41, 41, 31, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 37,
- 41, 41, 31, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 37, 41, 41,
- 31, 32, 32, 32, 32, 33, 33, 33, 33, 34, 35, 35, 35, 38, 41, 41, 32, 32,
- 32, 32, 32, 33, 34, 34, 34, 35, 36, 36, 36, 39, 42, 42, 32, 32, 32, 32,
- 32, 33, 34, 34, 34, 35, 36, 36, 36, 39, 42, 42, 32, 32, 32, 32, 32, 33,
- 34, 34, 34, 35, 36, 36, 36, 39, 42, 42, 32, 32, 32, 32, 32, 33, 34, 34,
- 34, 36, 37, 37, 37, 40, 42, 42, 32, 32, 33, 33, 33, 34, 35, 35, 35, 37,
- 38, 38, 38, 40, 42, 42, 32, 32, 33, 33, 33, 34, 35, 35, 35, 37, 38, 38,
- 38, 40, 42, 42, 32, 32, 33, 33, 33, 34, 35, 35, 35, 37, 38, 38, 38, 40,
- 42, 42, 33, 33, 33, 33, 33, 34, 36, 36, 36, 38, 40, 40, 40, 42, 45, 45,
- 34, 34, 34, 34, 34, 35, 37, 37, 37, 39, 42, 42, 42, 45, 48, 48, 34, 34,
- 34, 34, 34, 35, 37, 37, 37, 39, 42, 42, 42, 45, 48, 48, 34, 34, 34, 34,
- 34, 35, 37, 37, 37, 39, 42, 42, 42, 45, 48, 48, 35, 34, 34, 34, 34, 36,
- 37, 37, 37, 41, 45, 45, 45, 47, 50, 50, 36, 35, 34, 34, 34, 36, 38, 38,
- 38, 43, 48, 48, 48, 51, 54, 54, 36, 35, 34, 34, 34, 36, 38, 38, 38, 43,
- 48, 48, 48, 51, 54, 54, 36, 35, 34, 34, 34, 36, 38, 38, 38, 43, 48, 48,
- 48, 51, 54, 54, 37, 37, 36, 36, 36, 38, 39, 39, 39, 44, 49, 49, 49, 52,
- 56, 56, 39, 38, 37, 37, 37, 39, 40, 40, 40, 45, 50, 50, 50, 54, 58, 58,
- 39, 38, 37, 37, 37, 39, 40, 40, 40, 45, 50, 50, 50, 54, 58, 58, 39, 38,
- 37, 37, 37, 39, 40, 40, 40, 45, 50, 50, 50, 54, 58, 58, 41, 40, 39, 39,
- 39, 40, 42, 42, 42, 46, 52, 52, 52, 56, 60, 60, 44, 42, 41, 41, 41, 42,
- 43, 43, 43, 48, 53, 53, 53, 58, 63, 63, 44, 42, 41, 41, 41, 42, 43, 43,
- 43, 48, 53, 53, 53, 58, 63, 63,
- /* Size 32x16 */
- 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 33,
- 34, 34, 34, 35, 36, 36, 36, 37, 39, 39, 39, 41, 44, 44, 31, 31, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 34, 34, 34, 34,
- 35, 35, 35, 37, 38, 38, 38, 40, 42, 42, 31, 31, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 34, 36,
- 37, 37, 37, 39, 41, 41, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 34, 36, 37, 37, 37, 39,
- 41, 41, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33,
- 33, 33, 34, 34, 34, 34, 34, 34, 34, 36, 37, 37, 37, 39, 41, 41, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35,
- 35, 36, 36, 36, 36, 38, 39, 39, 39, 40, 42, 42, 32, 32, 32, 32, 32, 32,
- 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 36, 37, 37, 37, 37, 38, 38,
- 38, 39, 40, 40, 40, 42, 43, 43, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33,
- 34, 34, 34, 34, 35, 35, 35, 36, 37, 37, 37, 37, 38, 38, 38, 39, 40, 40,
- 40, 42, 43, 43, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34,
- 35, 35, 35, 36, 37, 37, 37, 37, 38, 38, 38, 39, 40, 40, 40, 42, 43, 43,
- 34, 34, 34, 34, 34, 34, 33, 33, 33, 34, 35, 35, 35, 36, 37, 37, 37, 38,
- 39, 39, 39, 41, 43, 43, 43, 44, 45, 45, 45, 46, 48, 48, 36, 35, 35, 35,
- 35, 35, 34, 34, 34, 35, 36, 36, 36, 37, 38, 38, 38, 40, 42, 42, 42, 45,
- 48, 48, 48, 49, 50, 50, 50, 52, 53, 53, 36, 35, 35, 35, 35, 35, 34, 34,
- 34, 35, 36, 36, 36, 37, 38, 38, 38, 40, 42, 42, 42, 45, 48, 48, 48, 49,
- 50, 50, 50, 52, 53, 53, 36, 35, 35, 35, 35, 35, 34, 34, 34, 35, 36, 36,
- 36, 37, 38, 38, 38, 40, 42, 42, 42, 45, 48, 48, 48, 49, 50, 50, 50, 52,
- 53, 53, 39, 39, 38, 38, 38, 38, 37, 37, 37, 38, 39, 39, 39, 40, 40, 40,
- 40, 42, 45, 45, 45, 47, 51, 51, 51, 52, 54, 54, 54, 56, 58, 58, 44, 43,
- 42, 42, 42, 41, 41, 41, 41, 41, 42, 42, 42, 42, 42, 42, 42, 45, 48, 48,
- 48, 50, 54, 54, 54, 56, 58, 58, 58, 60, 63, 63, 44, 43, 42, 42, 42, 41,
- 41, 41, 41, 41, 42, 42, 42, 42, 42, 42, 42, 45, 48, 48, 48, 50, 54, 54,
- 54, 56, 58, 58, 58, 60, 63, 63,
- /* Size 4x16 */
- 31, 32, 34, 39, 32, 32, 34, 38, 32, 32, 34, 38, 32, 32, 33, 37, 32, 32,
- 33, 37, 32, 33, 35, 39, 32, 33, 35, 39, 32, 34, 37, 40, 32, 34, 37, 40,
- 34, 35, 39, 45, 34, 35, 39, 45, 35, 36, 43, 51, 35, 36, 43, 51, 38, 39,
- 45, 54, 38, 39, 45, 54, 42, 42, 48, 58,
- /* Size 16x4 */
- 31, 32, 32, 32, 32, 32, 32, 32, 32, 34, 34, 35, 35, 38, 38, 42, 32, 32,
- 32, 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 39, 39, 42, 34, 34, 34, 33,
- 33, 35, 35, 37, 37, 39, 39, 43, 43, 45, 45, 48, 39, 38, 38, 37, 37, 39,
- 39, 40, 40, 45, 45, 51, 51, 54, 54, 58,
- /* Size 8x32 */
- 32, 31, 31, 32, 32, 36, 36, 44, 31, 31, 31, 32, 32, 35, 35, 43, 31, 32,
- 32, 32, 32, 35, 35, 42, 31, 32, 32, 32, 32, 35, 35, 42, 31, 32, 32, 32,
- 32, 35, 35, 42, 31, 32, 32, 32, 32, 35, 35, 41, 31, 32, 32, 33, 33, 34,
- 34, 41, 31, 32, 32, 33, 33, 34, 34, 41, 31, 32, 32, 33, 33, 34, 34, 41,
- 31, 32, 32, 33, 33, 35, 35, 41, 32, 32, 32, 34, 34, 36, 36, 42, 32, 32,
- 32, 34, 34, 36, 36, 42, 32, 32, 32, 34, 34, 36, 36, 42, 32, 32, 32, 34,
- 34, 37, 37, 42, 32, 33, 33, 35, 35, 38, 38, 42, 32, 33, 33, 35, 35, 38,
- 38, 42, 32, 33, 33, 35, 35, 38, 38, 42, 33, 33, 33, 36, 36, 40, 40, 45,
- 34, 34, 34, 37, 37, 42, 42, 48, 34, 34, 34, 37, 37, 42, 42, 48, 34, 34,
- 34, 37, 37, 42, 42, 48, 35, 34, 34, 37, 37, 45, 45, 50, 36, 34, 34, 38,
- 38, 48, 48, 54, 36, 34, 34, 38, 38, 48, 48, 54, 36, 34, 34, 38, 38, 48,
- 48, 54, 37, 36, 36, 39, 39, 49, 49, 56, 39, 37, 37, 40, 40, 50, 50, 58,
- 39, 37, 37, 40, 40, 50, 50, 58, 39, 37, 37, 40, 40, 50, 50, 58, 41, 39,
- 39, 42, 42, 52, 52, 60, 44, 41, 41, 43, 43, 53, 53, 63, 44, 41, 41, 43,
- 43, 53, 53, 63,
- /* Size 32x8 */
- 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 33,
- 34, 34, 34, 35, 36, 36, 36, 37, 39, 39, 39, 41, 44, 44, 31, 31, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34,
- 34, 34, 34, 36, 37, 37, 37, 39, 41, 41, 31, 31, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 34, 36,
- 37, 37, 37, 39, 41, 41, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34,
- 34, 34, 35, 35, 35, 36, 37, 37, 37, 37, 38, 38, 38, 39, 40, 40, 40, 42,
- 43, 43, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35,
- 35, 36, 37, 37, 37, 37, 38, 38, 38, 39, 40, 40, 40, 42, 43, 43, 36, 35,
- 35, 35, 35, 35, 34, 34, 34, 35, 36, 36, 36, 37, 38, 38, 38, 40, 42, 42,
- 42, 45, 48, 48, 48, 49, 50, 50, 50, 52, 53, 53, 36, 35, 35, 35, 35, 35,
- 34, 34, 34, 35, 36, 36, 36, 37, 38, 38, 38, 40, 42, 42, 42, 45, 48, 48,
- 48, 49, 50, 50, 50, 52, 53, 53, 44, 43, 42, 42, 42, 41, 41, 41, 41, 41,
- 42, 42, 42, 42, 42, 42, 42, 45, 48, 48, 48, 50, 54, 54, 54, 56, 58, 58,
- 58, 60, 63, 63 },
- { /* Chroma */
- /* Size 4x4 */
- 31, 34, 42, 47, 34, 39, 45, 46, 42, 45, 48, 49, 47, 46, 49, 54,
- /* Size 8x8 */
- 31, 31, 32, 35, 39, 45, 48, 48, 31, 31, 33, 37, 41, 44, 46, 46, 32, 33,
- 35, 39, 42, 45, 46, 45, 35, 37, 39, 43, 45, 47, 47, 46, 39, 41, 42, 45,
- 47, 48, 48, 47, 45, 44, 45, 47, 48, 50, 51, 51, 48, 46, 46, 47, 48, 51,
- 53, 54, 48, 46, 45, 46, 47, 51, 54, 56,
- /* Size 16x16 */
- 32, 31, 31, 30, 30, 33, 33, 36, 36, 41, 41, 49, 49, 48, 48, 49, 31, 31,
- 31, 31, 31, 34, 34, 38, 38, 42, 42, 47, 47, 47, 47, 47, 31, 31, 31, 31,
- 31, 34, 34, 38, 38, 42, 42, 47, 47, 47, 47, 47, 30, 31, 31, 32, 32, 35,
- 35, 40, 40, 42, 42, 46, 46, 45, 45, 45, 30, 31, 31, 32, 32, 35, 35, 40,
- 40, 42, 42, 46, 46, 45, 45, 45, 33, 34, 34, 35, 35, 39, 39, 43, 43, 45,
- 45, 47, 47, 46, 46, 45, 33, 34, 34, 35, 35, 39, 39, 43, 43, 45, 45, 47,
- 47, 46, 46, 45, 36, 38, 38, 40, 40, 43, 43, 47, 47, 47, 47, 48, 48, 46,
- 46, 45, 36, 38, 38, 40, 40, 43, 43, 47, 47, 47, 47, 48, 48, 46, 46, 45,
- 41, 42, 42, 42, 42, 45, 45, 47, 47, 48, 48, 50, 50, 49, 49, 49, 41, 42,
- 42, 42, 42, 45, 45, 47, 47, 48, 48, 50, 50, 49, 49, 49, 49, 47, 47, 46,
- 46, 47, 47, 48, 48, 50, 50, 53, 53, 53, 53, 53, 49, 47, 47, 46, 46, 47,
- 47, 48, 48, 50, 50, 53, 53, 53, 53, 53, 48, 47, 47, 45, 45, 46, 46, 46,
- 46, 49, 49, 53, 53, 54, 54, 55, 48, 47, 47, 45, 45, 46, 46, 46, 46, 49,
- 49, 53, 53, 54, 54, 55, 49, 47, 47, 45, 45, 45, 45, 45, 45, 49, 49, 53,
- 53, 55, 55, 58,
- /* Size 32x32 */
- 32, 31, 31, 31, 31, 31, 30, 30, 30, 32, 33, 33, 33, 35, 36, 36, 36, 39,
- 41, 41, 41, 45, 49, 49, 49, 49, 48, 48, 48, 49, 49, 49, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 32, 34, 34, 34, 35, 37, 37, 37, 39, 42, 42, 42, 45,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 33, 34, 34, 34, 36, 38, 38, 38, 40, 42, 42, 42, 45, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 31, 31, 31, 31, 31, 31, 31, 31, 31, 33, 34, 34,
- 34, 36, 38, 38, 38, 40, 42, 42, 42, 45, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 31, 31, 31, 31, 31, 31, 31, 31, 31, 33, 34, 34, 34, 36, 38, 38,
- 38, 40, 42, 42, 42, 45, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 33, 35, 35, 35, 37, 39, 39, 39, 41, 42, 42,
- 42, 44, 47, 47, 47, 46, 46, 46, 46, 46, 46, 46, 30, 31, 31, 31, 31, 31,
- 32, 32, 32, 33, 35, 35, 35, 37, 40, 40, 40, 41, 42, 42, 42, 44, 46, 46,
- 46, 46, 45, 45, 45, 45, 45, 45, 30, 31, 31, 31, 31, 31, 32, 32, 32, 33,
- 35, 35, 35, 37, 40, 40, 40, 41, 42, 42, 42, 44, 46, 46, 46, 46, 45, 45,
- 45, 45, 45, 45, 30, 31, 31, 31, 31, 31, 32, 32, 32, 33, 35, 35, 35, 37,
- 40, 40, 40, 41, 42, 42, 42, 44, 46, 46, 46, 46, 45, 45, 45, 45, 45, 45,
- 32, 32, 33, 33, 33, 33, 33, 33, 33, 35, 37, 37, 37, 39, 41, 41, 41, 42,
- 43, 43, 43, 45, 47, 47, 47, 46, 46, 46, 46, 45, 45, 45, 33, 34, 34, 34,
- 34, 35, 35, 35, 35, 37, 39, 39, 39, 41, 43, 43, 43, 44, 45, 45, 45, 46,
- 47, 47, 47, 47, 46, 46, 46, 46, 45, 45, 33, 34, 34, 34, 34, 35, 35, 35,
- 35, 37, 39, 39, 39, 41, 43, 43, 43, 44, 45, 45, 45, 46, 47, 47, 47, 47,
- 46, 46, 46, 46, 45, 45, 33, 34, 34, 34, 34, 35, 35, 35, 35, 37, 39, 39,
- 39, 41, 43, 43, 43, 44, 45, 45, 45, 46, 47, 47, 47, 47, 46, 46, 46, 46,
- 45, 45, 35, 35, 36, 36, 36, 37, 37, 37, 37, 39, 41, 41, 41, 43, 45, 45,
- 45, 45, 46, 46, 46, 47, 47, 47, 47, 47, 46, 46, 46, 46, 45, 45, 36, 37,
- 38, 38, 38, 39, 40, 40, 40, 41, 43, 43, 43, 45, 47, 47, 47, 47, 47, 47,
- 47, 47, 48, 48, 48, 47, 46, 46, 46, 46, 45, 45, 36, 37, 38, 38, 38, 39,
- 40, 40, 40, 41, 43, 43, 43, 45, 47, 47, 47, 47, 47, 47, 47, 47, 48, 48,
- 48, 47, 46, 46, 46, 46, 45, 45, 36, 37, 38, 38, 38, 39, 40, 40, 40, 41,
- 43, 43, 43, 45, 47, 47, 47, 47, 47, 47, 47, 47, 48, 48, 48, 47, 46, 46,
- 46, 46, 45, 45, 39, 39, 40, 40, 40, 41, 41, 41, 41, 42, 44, 44, 44, 45,
- 47, 47, 47, 47, 48, 48, 48, 48, 49, 49, 49, 48, 48, 48, 48, 47, 47, 47,
- 41, 42, 42, 42, 42, 42, 42, 42, 42, 43, 45, 45, 45, 46, 47, 47, 47, 48,
- 48, 48, 48, 49, 50, 50, 50, 50, 49, 49, 49, 49, 49, 49, 41, 42, 42, 42,
- 42, 42, 42, 42, 42, 43, 45, 45, 45, 46, 47, 47, 47, 48, 48, 48, 48, 49,
- 50, 50, 50, 50, 49, 49, 49, 49, 49, 49, 41, 42, 42, 42, 42, 42, 42, 42,
- 42, 43, 45, 45, 45, 46, 47, 47, 47, 48, 48, 48, 48, 49, 50, 50, 50, 50,
- 49, 49, 49, 49, 49, 49, 45, 45, 45, 45, 45, 44, 44, 44, 44, 45, 46, 46,
- 46, 47, 47, 47, 47, 48, 49, 49, 49, 50, 51, 51, 51, 51, 51, 51, 51, 51,
- 51, 51, 49, 48, 47, 47, 47, 47, 46, 46, 46, 47, 47, 47, 47, 47, 48, 48,
- 48, 49, 50, 50, 50, 51, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 49, 48,
- 47, 47, 47, 47, 46, 46, 46, 47, 47, 47, 47, 47, 48, 48, 48, 49, 50, 50,
- 50, 51, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 49, 48, 47, 47, 47, 47,
- 46, 46, 46, 47, 47, 47, 47, 47, 48, 48, 48, 49, 50, 50, 50, 51, 53, 53,
- 53, 53, 53, 53, 53, 53, 53, 53, 49, 48, 47, 47, 47, 46, 46, 46, 46, 46,
- 47, 47, 47, 47, 47, 47, 47, 48, 50, 50, 50, 51, 53, 53, 53, 53, 53, 53,
- 53, 54, 54, 54, 48, 48, 47, 47, 47, 46, 45, 45, 45, 46, 46, 46, 46, 46,
- 46, 46, 46, 48, 49, 49, 49, 51, 53, 53, 53, 53, 54, 54, 54, 55, 55, 55,
- 48, 48, 47, 47, 47, 46, 45, 45, 45, 46, 46, 46, 46, 46, 46, 46, 46, 48,
- 49, 49, 49, 51, 53, 53, 53, 53, 54, 54, 54, 55, 55, 55, 48, 48, 47, 47,
- 47, 46, 45, 45, 45, 46, 46, 46, 46, 46, 46, 46, 46, 48, 49, 49, 49, 51,
- 53, 53, 53, 53, 54, 54, 54, 55, 55, 55, 49, 48, 47, 47, 47, 46, 45, 45,
- 45, 45, 46, 46, 46, 46, 46, 46, 46, 47, 49, 49, 49, 51, 53, 53, 53, 54,
- 55, 55, 55, 56, 57, 57, 49, 48, 47, 47, 47, 46, 45, 45, 45, 45, 45, 45,
- 45, 45, 45, 45, 45, 47, 49, 49, 49, 51, 53, 53, 53, 54, 55, 55, 55, 57,
- 58, 58, 49, 48, 47, 47, 47, 46, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
- 45, 47, 49, 49, 49, 51, 53, 53, 53, 54, 55, 55, 55, 57, 58, 58,
- /* Size 4x8 */
- 31, 34, 42, 48, 31, 35, 42, 46, 33, 37, 44, 46, 36, 41, 46, 46, 40, 44,
- 48, 48, 45, 46, 49, 51, 47, 47, 50, 54, 47, 46, 49, 55,
- /* Size 8x4 */
- 31, 31, 33, 36, 40, 45, 47, 47, 34, 35, 37, 41, 44, 46, 47, 46, 42, 42,
- 44, 46, 48, 49, 50, 49, 48, 46, 46, 46, 48, 51, 54, 55,
- /* Size 8x16 */
- 32, 31, 31, 37, 37, 48, 48, 49, 31, 31, 31, 38, 38, 47, 47, 47, 31, 31,
- 31, 38, 38, 47, 47, 47, 30, 32, 32, 40, 40, 46, 46, 45, 30, 32, 32, 40,
- 40, 46, 46, 45, 33, 36, 36, 43, 43, 47, 47, 46, 33, 36, 36, 43, 43, 47,
- 47, 46, 37, 40, 40, 47, 47, 47, 47, 45, 37, 40, 40, 47, 47, 47, 47, 45,
- 42, 43, 43, 47, 47, 50, 50, 49, 42, 43, 43, 47, 47, 50, 50, 49, 49, 46,
- 46, 48, 48, 53, 53, 53, 49, 46, 46, 48, 48, 53, 53, 53, 48, 46, 46, 47,
- 47, 53, 53, 56, 48, 46, 46, 47, 47, 53, 53, 56, 49, 45, 45, 46, 46, 53,
- 53, 58,
- /* Size 16x8 */
- 32, 31, 31, 30, 30, 33, 33, 37, 37, 42, 42, 49, 49, 48, 48, 49, 31, 31,
- 31, 32, 32, 36, 36, 40, 40, 43, 43, 46, 46, 46, 46, 45, 31, 31, 31, 32,
- 32, 36, 36, 40, 40, 43, 43, 46, 46, 46, 46, 45, 37, 38, 38, 40, 40, 43,
- 43, 47, 47, 47, 47, 48, 48, 47, 47, 46, 37, 38, 38, 40, 40, 43, 43, 47,
- 47, 47, 47, 48, 48, 47, 47, 46, 48, 47, 47, 46, 46, 47, 47, 47, 47, 50,
- 50, 53, 53, 53, 53, 53, 48, 47, 47, 46, 46, 47, 47, 47, 47, 50, 50, 53,
- 53, 53, 53, 53, 49, 47, 47, 45, 45, 46, 46, 45, 45, 49, 49, 53, 53, 56,
- 56, 58,
- /* Size 16x32 */
- 32, 31, 31, 31, 31, 33, 37, 37, 37, 42, 48, 48, 48, 48, 49, 49, 31, 31,
- 31, 31, 31, 34, 37, 37, 37, 42, 47, 47, 47, 48, 48, 48, 31, 31, 31, 31,
- 31, 34, 38, 38, 38, 42, 47, 47, 47, 47, 47, 47, 31, 31, 31, 31, 31, 34,
- 38, 38, 38, 42, 47, 47, 47, 47, 47, 47, 31, 31, 31, 31, 31, 34, 38, 38,
- 38, 42, 47, 47, 47, 47, 47, 47, 31, 31, 32, 32, 32, 35, 39, 39, 39, 42,
- 46, 46, 46, 46, 46, 46, 30, 31, 32, 32, 32, 35, 40, 40, 40, 42, 46, 46,
- 46, 45, 45, 45, 30, 31, 32, 32, 32, 35, 40, 40, 40, 42, 46, 46, 46, 45,
- 45, 45, 30, 31, 32, 32, 32, 35, 40, 40, 40, 42, 46, 46, 46, 45, 45, 45,
- 32, 33, 34, 34, 34, 37, 41, 41, 41, 44, 46, 46, 46, 46, 45, 45, 33, 34,
- 36, 36, 36, 39, 43, 43, 43, 45, 47, 47, 47, 46, 46, 46, 33, 34, 36, 36,
- 36, 39, 43, 43, 43, 45, 47, 47, 47, 46, 46, 46, 33, 34, 36, 36, 36, 39,
- 43, 43, 43, 45, 47, 47, 47, 46, 46, 46, 35, 36, 38, 38, 38, 41, 45, 45,
- 45, 46, 47, 47, 47, 46, 45, 45, 37, 38, 40, 40, 40, 43, 47, 47, 47, 47,
- 47, 47, 47, 46, 45, 45, 37, 38, 40, 40, 40, 43, 47, 47, 47, 47, 47, 47,
- 47, 46, 45, 45, 37, 38, 40, 40, 40, 43, 47, 47, 47, 47, 47, 47, 47, 46,
- 45, 45, 39, 40, 41, 41, 41, 44, 47, 47, 47, 48, 49, 49, 49, 48, 47, 47,
- 42, 42, 43, 43, 43, 45, 47, 47, 47, 48, 50, 50, 50, 50, 49, 49, 42, 42,
- 43, 43, 43, 45, 47, 47, 47, 48, 50, 50, 50, 50, 49, 49, 42, 42, 43, 43,
- 43, 45, 47, 47, 47, 48, 50, 50, 50, 50, 49, 49, 45, 45, 44, 44, 44, 46,
- 47, 47, 47, 49, 51, 51, 51, 51, 51, 51, 49, 48, 46, 46, 46, 47, 48, 48,
- 48, 50, 53, 53, 53, 53, 53, 53, 49, 48, 46, 46, 46, 47, 48, 48, 48, 50,
- 53, 53, 53, 53, 53, 53, 49, 48, 46, 46, 46, 47, 48, 48, 48, 50, 53, 53,
- 53, 53, 53, 53, 48, 47, 46, 46, 46, 47, 47, 47, 47, 50, 53, 53, 53, 54,
- 54, 54, 48, 47, 46, 46, 46, 46, 47, 47, 47, 50, 53, 53, 53, 54, 56, 56,
- 48, 47, 46, 46, 46, 46, 47, 47, 47, 50, 53, 53, 53, 54, 56, 56, 48, 47,
- 46, 46, 46, 46, 47, 47, 47, 50, 53, 53, 53, 54, 56, 56, 48, 47, 45, 45,
- 45, 46, 46, 46, 46, 49, 53, 53, 53, 55, 57, 57, 49, 47, 45, 45, 45, 45,
- 46, 46, 46, 49, 53, 53, 53, 56, 58, 58, 49, 47, 45, 45, 45, 45, 46, 46,
- 46, 49, 53, 53, 53, 56, 58, 58,
- /* Size 32x16 */
- 32, 31, 31, 31, 31, 31, 30, 30, 30, 32, 33, 33, 33, 35, 37, 37, 37, 39,
- 42, 42, 42, 45, 49, 49, 49, 48, 48, 48, 48, 48, 49, 49, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 33, 34, 34, 34, 36, 38, 38, 38, 40, 42, 42, 42, 45,
- 48, 48, 48, 47, 47, 47, 47, 47, 47, 47, 31, 31, 31, 31, 31, 32, 32, 32,
- 32, 34, 36, 36, 36, 38, 40, 40, 40, 41, 43, 43, 43, 44, 46, 46, 46, 46,
- 46, 46, 46, 45, 45, 45, 31, 31, 31, 31, 31, 32, 32, 32, 32, 34, 36, 36,
- 36, 38, 40, 40, 40, 41, 43, 43, 43, 44, 46, 46, 46, 46, 46, 46, 46, 45,
- 45, 45, 31, 31, 31, 31, 31, 32, 32, 32, 32, 34, 36, 36, 36, 38, 40, 40,
- 40, 41, 43, 43, 43, 44, 46, 46, 46, 46, 46, 46, 46, 45, 45, 45, 33, 34,
- 34, 34, 34, 35, 35, 35, 35, 37, 39, 39, 39, 41, 43, 43, 43, 44, 45, 45,
- 45, 46, 47, 47, 47, 47, 46, 46, 46, 46, 45, 45, 37, 37, 38, 38, 38, 39,
- 40, 40, 40, 41, 43, 43, 43, 45, 47, 47, 47, 47, 47, 47, 47, 47, 48, 48,
- 48, 47, 47, 47, 47, 46, 46, 46, 37, 37, 38, 38, 38, 39, 40, 40, 40, 41,
- 43, 43, 43, 45, 47, 47, 47, 47, 47, 47, 47, 47, 48, 48, 48, 47, 47, 47,
- 47, 46, 46, 46, 37, 37, 38, 38, 38, 39, 40, 40, 40, 41, 43, 43, 43, 45,
- 47, 47, 47, 47, 47, 47, 47, 47, 48, 48, 48, 47, 47, 47, 47, 46, 46, 46,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 44, 45, 45, 45, 46, 47, 47, 47, 48,
- 48, 48, 48, 49, 50, 50, 50, 50, 50, 50, 50, 49, 49, 49, 48, 47, 47, 47,
- 47, 46, 46, 46, 46, 46, 47, 47, 47, 47, 47, 47, 47, 49, 50, 50, 50, 51,
- 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 48, 47, 47, 47, 47, 46, 46, 46,
- 46, 46, 47, 47, 47, 47, 47, 47, 47, 49, 50, 50, 50, 51, 53, 53, 53, 53,
- 53, 53, 53, 53, 53, 53, 48, 47, 47, 47, 47, 46, 46, 46, 46, 46, 47, 47,
- 47, 47, 47, 47, 47, 49, 50, 50, 50, 51, 53, 53, 53, 53, 53, 53, 53, 53,
- 53, 53, 48, 48, 47, 47, 47, 46, 45, 45, 45, 46, 46, 46, 46, 46, 46, 46,
- 46, 48, 50, 50, 50, 51, 53, 53, 53, 54, 54, 54, 54, 55, 56, 56, 49, 48,
- 47, 47, 47, 46, 45, 45, 45, 45, 46, 46, 46, 45, 45, 45, 45, 47, 49, 49,
- 49, 51, 53, 53, 53, 54, 56, 56, 56, 57, 58, 58, 49, 48, 47, 47, 47, 46,
- 45, 45, 45, 45, 46, 46, 46, 45, 45, 45, 45, 47, 49, 49, 49, 51, 53, 53,
- 53, 54, 56, 56, 56, 57, 58, 58,
- /* Size 4x16 */
- 31, 33, 42, 48, 31, 34, 42, 47, 31, 34, 42, 47, 31, 35, 42, 45, 31, 35,
- 42, 45, 34, 39, 45, 46, 34, 39, 45, 46, 38, 43, 47, 46, 38, 43, 47, 46,
- 42, 45, 48, 50, 42, 45, 48, 50, 48, 47, 50, 53, 48, 47, 50, 53, 47, 46,
- 50, 54, 47, 46, 50, 54, 47, 45, 49, 56,
- /* Size 16x4 */
- 31, 31, 31, 31, 31, 34, 34, 38, 38, 42, 42, 48, 48, 47, 47, 47, 33, 34,
- 34, 35, 35, 39, 39, 43, 43, 45, 45, 47, 47, 46, 46, 45, 42, 42, 42, 42,
- 42, 45, 45, 47, 47, 48, 48, 50, 50, 50, 50, 49, 48, 47, 47, 45, 45, 46,
- 46, 46, 46, 50, 50, 53, 53, 54, 54, 56,
- /* Size 8x32 */
- 32, 31, 31, 37, 37, 48, 48, 49, 31, 31, 31, 37, 37, 47, 47, 48, 31, 31,
- 31, 38, 38, 47, 47, 47, 31, 31, 31, 38, 38, 47, 47, 47, 31, 31, 31, 38,
- 38, 47, 47, 47, 31, 32, 32, 39, 39, 46, 46, 46, 30, 32, 32, 40, 40, 46,
- 46, 45, 30, 32, 32, 40, 40, 46, 46, 45, 30, 32, 32, 40, 40, 46, 46, 45,
- 32, 34, 34, 41, 41, 46, 46, 45, 33, 36, 36, 43, 43, 47, 47, 46, 33, 36,
- 36, 43, 43, 47, 47, 46, 33, 36, 36, 43, 43, 47, 47, 46, 35, 38, 38, 45,
- 45, 47, 47, 45, 37, 40, 40, 47, 47, 47, 47, 45, 37, 40, 40, 47, 47, 47,
- 47, 45, 37, 40, 40, 47, 47, 47, 47, 45, 39, 41, 41, 47, 47, 49, 49, 47,
- 42, 43, 43, 47, 47, 50, 50, 49, 42, 43, 43, 47, 47, 50, 50, 49, 42, 43,
- 43, 47, 47, 50, 50, 49, 45, 44, 44, 47, 47, 51, 51, 51, 49, 46, 46, 48,
- 48, 53, 53, 53, 49, 46, 46, 48, 48, 53, 53, 53, 49, 46, 46, 48, 48, 53,
- 53, 53, 48, 46, 46, 47, 47, 53, 53, 54, 48, 46, 46, 47, 47, 53, 53, 56,
- 48, 46, 46, 47, 47, 53, 53, 56, 48, 46, 46, 47, 47, 53, 53, 56, 48, 45,
- 45, 46, 46, 53, 53, 57, 49, 45, 45, 46, 46, 53, 53, 58, 49, 45, 45, 46,
- 46, 53, 53, 58,
- /* Size 32x8 */
- 32, 31, 31, 31, 31, 31, 30, 30, 30, 32, 33, 33, 33, 35, 37, 37, 37, 39,
- 42, 42, 42, 45, 49, 49, 49, 48, 48, 48, 48, 48, 49, 49, 31, 31, 31, 31,
- 31, 32, 32, 32, 32, 34, 36, 36, 36, 38, 40, 40, 40, 41, 43, 43, 43, 44,
- 46, 46, 46, 46, 46, 46, 46, 45, 45, 45, 31, 31, 31, 31, 31, 32, 32, 32,
- 32, 34, 36, 36, 36, 38, 40, 40, 40, 41, 43, 43, 43, 44, 46, 46, 46, 46,
- 46, 46, 46, 45, 45, 45, 37, 37, 38, 38, 38, 39, 40, 40, 40, 41, 43, 43,
- 43, 45, 47, 47, 47, 47, 47, 47, 47, 47, 48, 48, 48, 47, 47, 47, 47, 46,
- 46, 46, 37, 37, 38, 38, 38, 39, 40, 40, 40, 41, 43, 43, 43, 45, 47, 47,
- 47, 47, 47, 47, 47, 47, 48, 48, 48, 47, 47, 47, 47, 46, 46, 46, 48, 47,
- 47, 47, 47, 46, 46, 46, 46, 46, 47, 47, 47, 47, 47, 47, 47, 49, 50, 50,
- 50, 51, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 48, 47, 47, 47, 47, 46,
- 46, 46, 46, 46, 47, 47, 47, 47, 47, 47, 47, 49, 50, 50, 50, 51, 53, 53,
- 53, 53, 53, 53, 53, 53, 53, 53, 49, 48, 47, 47, 47, 46, 45, 45, 45, 45,
- 46, 46, 46, 45, 45, 45, 45, 47, 49, 49, 49, 51, 53, 53, 53, 54, 56, 56,
- 56, 57, 58, 58 },
- },
- {
- { /* Luma */
- /* Size 4x4 */
- 32, 32, 32, 35, 32, 32, 33, 35, 32, 33, 35, 38, 35, 35, 38, 46,
- /* Size 8x8 */
- 31, 31, 31, 32, 32, 32, 34, 35, 31, 32, 32, 32, 32, 33, 34, 35, 31, 32,
- 32, 32, 32, 33, 33, 34, 32, 32, 32, 33, 34, 34, 35, 36, 32, 32, 32, 34,
- 35, 35, 36, 38, 32, 33, 33, 34, 35, 36, 38, 40, 34, 34, 33, 35, 36, 38,
- 39, 42, 35, 35, 34, 36, 38, 40, 42, 48,
- /* Size 16x16 */
- 32, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 33, 34, 34, 36, 36, 31, 31,
- 31, 32, 32, 32, 32, 32, 32, 32, 32, 33, 34, 34, 35, 35, 31, 31, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 33, 34, 34, 35, 35, 31, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 33, 34, 34, 35, 35, 31, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 33, 33, 34, 34, 34, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 33, 33, 34, 34, 34, 31, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 34,
- 35, 35, 36, 36, 31, 32, 32, 32, 32, 32, 33, 33, 33, 34, 34, 35, 35, 36,
- 36, 36, 32, 32, 32, 32, 32, 32, 33, 33, 34, 34, 34, 35, 36, 36, 37, 37,
- 32, 32, 32, 32, 32, 32, 33, 34, 34, 35, 35, 36, 37, 37, 38, 38, 32, 32,
- 32, 32, 32, 32, 33, 34, 34, 35, 35, 36, 37, 37, 38, 38, 33, 33, 33, 33,
- 33, 33, 34, 35, 35, 36, 36, 38, 39, 40, 42, 42, 34, 34, 34, 34, 33, 33,
- 35, 35, 36, 37, 37, 39, 39, 41, 42, 42, 34, 34, 34, 34, 34, 34, 35, 36,
- 36, 37, 37, 40, 41, 42, 45, 45, 36, 35, 35, 35, 34, 34, 36, 36, 37, 38,
- 38, 42, 42, 45, 48, 48, 36, 35, 35, 35, 34, 34, 36, 36, 37, 38, 38, 42,
- 42, 45, 48, 48,
- /* Size 32x32 */
- 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32,
- 32, 32, 32, 32, 33, 34, 34, 34, 34, 35, 36, 36, 36, 37, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 33, 34, 34, 34, 34, 35, 35, 35, 35, 37, 31, 31, 31, 31, 31, 31, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 34, 34, 34,
- 34, 35, 35, 35, 35, 36, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 34, 34, 34, 34, 35, 35, 35,
- 35, 36, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35, 36, 31, 31,
- 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35, 36, 31, 31, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 34,
- 34, 34, 34, 34, 35, 35, 35, 36, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 34, 34,
- 34, 34, 34, 35, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 35,
- 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 35, 31, 31, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33,
- 33, 33, 33, 33, 34, 34, 34, 34, 34, 35, 31, 31, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34,
- 34, 35, 35, 35, 35, 36, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 35, 35, 35, 35, 36, 36, 36,
- 36, 37, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33,
- 33, 34, 34, 34, 34, 34, 35, 35, 35, 35, 36, 36, 36, 36, 36, 37, 31, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 34, 34, 34,
- 34, 34, 35, 35, 35, 35, 36, 36, 36, 36, 36, 37, 31, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 35, 35,
- 35, 35, 36, 36, 36, 36, 36, 37, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 35, 35, 36, 36, 36, 36, 37,
- 37, 37, 37, 38, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 34,
- 34, 34, 34, 35, 35, 35, 35, 35, 36, 36, 36, 36, 37, 37, 38, 38, 38, 39,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 34, 34, 34, 34, 35,
- 35, 35, 35, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35, 36,
- 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35, 36, 36, 37, 37, 37,
- 37, 38, 38, 38, 38, 39, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 34, 34, 34, 34, 35, 35, 36, 36, 36, 36, 37, 38, 38, 38, 38, 39, 40, 40,
- 40, 41, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 35, 35, 35,
- 35, 36, 36, 36, 36, 37, 38, 39, 39, 39, 40, 41, 42, 42, 42, 42, 34, 34,
- 34, 34, 34, 34, 34, 33, 33, 33, 33, 34, 35, 35, 35, 35, 36, 36, 37, 37,
- 37, 38, 39, 39, 39, 39, 41, 42, 42, 42, 42, 43, 34, 34, 34, 34, 34, 34,
- 34, 33, 33, 33, 33, 34, 35, 35, 35, 35, 36, 36, 37, 37, 37, 38, 39, 39,
- 39, 39, 41, 42, 42, 42, 42, 43, 34, 34, 34, 34, 34, 34, 34, 33, 33, 33,
- 33, 34, 35, 35, 35, 35, 36, 36, 37, 37, 37, 38, 39, 39, 39, 39, 41, 42,
- 42, 42, 42, 43, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 35, 36,
- 36, 36, 36, 37, 37, 37, 37, 38, 40, 41, 41, 41, 42, 44, 45, 45, 45, 45,
- 35, 35, 35, 35, 35, 35, 34, 34, 34, 34, 34, 35, 36, 36, 36, 36, 37, 37,
- 38, 38, 38, 39, 41, 42, 42, 42, 44, 46, 47, 47, 47, 48, 36, 35, 35, 35,
- 35, 35, 35, 34, 34, 34, 34, 35, 36, 36, 36, 36, 37, 38, 38, 38, 38, 40,
- 42, 42, 42, 42, 45, 47, 48, 48, 48, 49, 36, 35, 35, 35, 35, 35, 35, 34,
- 34, 34, 34, 35, 36, 36, 36, 36, 37, 38, 38, 38, 38, 40, 42, 42, 42, 42,
- 45, 47, 48, 48, 48, 49, 36, 35, 35, 35, 35, 35, 35, 34, 34, 34, 34, 35,
- 36, 36, 36, 36, 37, 38, 38, 38, 38, 40, 42, 42, 42, 42, 45, 47, 48, 48,
- 48, 49, 37, 37, 36, 36, 36, 36, 36, 35, 35, 35, 35, 36, 37, 37, 37, 37,
- 38, 39, 39, 39, 39, 41, 42, 43, 43, 43, 45, 48, 49, 49, 49, 50,
- /* Size 4x8 */
- 31, 31, 32, 35, 32, 32, 32, 35, 32, 32, 33, 34, 32, 32, 34, 36, 32, 33,
- 35, 38, 33, 33, 36, 40, 34, 34, 37, 42, 35, 34, 38, 48,
- /* Size 8x4 */
- 31, 32, 32, 32, 32, 33, 34, 35, 31, 32, 32, 32, 33, 33, 34, 34, 32, 32,
- 33, 34, 35, 36, 37, 38, 35, 35, 34, 36, 38, 40, 42, 48,
- /* Size 8x16 */
- 32, 31, 31, 31, 32, 32, 35, 36, 31, 32, 32, 32, 32, 32, 35, 35, 31, 32,
- 32, 32, 32, 32, 35, 35, 31, 32, 32, 32, 32, 32, 34, 35, 31, 32, 32, 32,
- 33, 33, 34, 34, 31, 32, 32, 32, 33, 33, 34, 34, 31, 32, 32, 33, 34, 34,
- 35, 36, 32, 32, 32, 33, 34, 34, 36, 36, 32, 32, 32, 33, 34, 34, 36, 37,
- 32, 32, 33, 34, 35, 35, 37, 38, 32, 32, 33, 34, 35, 35, 37, 38, 33, 33,
- 33, 35, 36, 36, 40, 41, 34, 34, 34, 35, 37, 37, 41, 42, 34, 34, 34, 35,
- 37, 37, 43, 44, 36, 35, 34, 36, 38, 38, 46, 48, 36, 35, 34, 36, 38, 38,
- 46, 48,
- /* Size 16x8 */
- 32, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 33, 34, 34, 36, 36, 31, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 34, 34, 35, 35, 31, 32, 32, 32,
- 32, 32, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34, 31, 32, 32, 32, 32, 32,
- 33, 33, 33, 34, 34, 35, 35, 35, 36, 36, 32, 32, 32, 32, 33, 33, 34, 34,
- 34, 35, 35, 36, 37, 37, 38, 38, 32, 32, 32, 32, 33, 33, 34, 34, 34, 35,
- 35, 36, 37, 37, 38, 38, 35, 35, 35, 34, 34, 34, 35, 36, 36, 37, 37, 40,
- 41, 43, 46, 46, 36, 35, 35, 35, 34, 34, 36, 36, 37, 38, 38, 41, 42, 44,
- 48, 48,
- /* Size 16x32 */
- 32, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 33, 35, 36, 36, 36, 31, 31,
- 31, 31, 31, 31, 32, 32, 32, 32, 32, 33, 35, 35, 35, 35, 31, 31, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 33, 35, 35, 35, 35, 31, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 33, 35, 35, 35, 35, 31, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 33, 35, 35, 35, 35, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 33, 35, 35, 35, 35, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33,
- 34, 35, 35, 35, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 34, 35,
- 35, 35, 31, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34,
- 31, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 31, 32,
- 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 31, 32, 32, 32,
- 32, 32, 33, 33, 33, 33, 33, 34, 35, 35, 35, 35, 31, 32, 32, 32, 32, 32,
- 33, 33, 34, 34, 34, 34, 35, 36, 36, 36, 32, 32, 32, 32, 32, 32, 33, 34,
- 34, 34, 34, 35, 36, 36, 36, 36, 32, 32, 32, 32, 32, 32, 33, 34, 34, 34,
- 34, 35, 36, 36, 36, 36, 32, 32, 32, 32, 32, 32, 33, 34, 34, 34, 34, 35,
- 36, 36, 36, 36, 32, 32, 32, 32, 32, 32, 33, 34, 34, 34, 34, 35, 36, 37,
- 37, 37, 32, 32, 32, 33, 33, 33, 33, 34, 35, 35, 35, 36, 37, 38, 38, 38,
- 32, 32, 32, 33, 33, 33, 34, 35, 35, 35, 35, 36, 37, 38, 38, 38, 32, 32,
- 32, 33, 33, 33, 34, 35, 35, 35, 35, 36, 37, 38, 38, 38, 32, 32, 32, 33,
- 33, 33, 34, 35, 35, 35, 35, 36, 37, 38, 38, 38, 32, 33, 33, 33, 33, 33,
- 34, 35, 36, 36, 36, 37, 39, 40, 40, 40, 33, 33, 33, 33, 33, 33, 35, 36,
- 36, 36, 36, 38, 40, 41, 41, 41, 34, 34, 34, 34, 34, 34, 35, 36, 37, 37,
- 37, 39, 41, 42, 42, 42, 34, 34, 34, 34, 34, 34, 35, 36, 37, 37, 37, 39,
- 41, 42, 42, 42, 34, 34, 34, 34, 34, 34, 35, 36, 37, 37, 37, 39, 41, 42,
- 42, 42, 34, 34, 34, 34, 34, 34, 35, 37, 37, 37, 37, 40, 43, 44, 44, 44,
- 35, 35, 34, 34, 34, 34, 36, 37, 38, 38, 38, 41, 45, 47, 47, 47, 36, 35,
- 35, 34, 34, 34, 36, 37, 38, 38, 38, 42, 46, 48, 48, 48, 36, 35, 35, 34,
- 34, 34, 36, 37, 38, 38, 38, 42, 46, 48, 48, 48, 36, 35, 35, 34, 34, 34,
- 36, 37, 38, 38, 38, 42, 46, 48, 48, 48, 37, 36, 36, 36, 36, 36, 37, 38,
- 39, 39, 39, 42, 46, 49, 49, 49,
- /* Size 32x16 */
- 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 33, 34, 34, 34, 34, 35, 36, 36, 36, 37, 31, 31, 31, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33,
- 33, 34, 34, 34, 34, 35, 35, 35, 35, 36, 31, 31, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 34, 34, 34,
- 34, 34, 35, 35, 35, 36, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 34,
- 34, 36, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 34, 34, 36, 31, 31,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33,
- 33, 33, 33, 34, 34, 34, 34, 34, 34, 34, 34, 36, 31, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35,
- 35, 35, 35, 36, 36, 36, 36, 37, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 33, 33, 34, 34, 34, 34, 34, 35, 35, 35, 35, 36, 36, 36, 36, 37, 37,
- 37, 37, 37, 38, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34,
- 34, 34, 34, 35, 35, 35, 35, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39,
- 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 34, 35,
- 35, 35, 35, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 32, 32, 32, 32,
- 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 34, 35, 35, 35, 35, 36,
- 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 34, 34, 35, 35, 35, 35, 36, 36, 36, 36, 37, 38, 39, 39, 39,
- 40, 41, 42, 42, 42, 42, 35, 35, 35, 35, 35, 35, 34, 34, 34, 34, 34, 35,
- 35, 36, 36, 36, 36, 37, 37, 37, 37, 39, 40, 41, 41, 41, 43, 45, 46, 46,
- 46, 46, 36, 35, 35, 35, 35, 35, 35, 35, 34, 34, 34, 35, 36, 36, 36, 36,
- 37, 38, 38, 38, 38, 40, 41, 42, 42, 42, 44, 47, 48, 48, 48, 49, 36, 35,
- 35, 35, 35, 35, 35, 35, 34, 34, 34, 35, 36, 36, 36, 36, 37, 38, 38, 38,
- 38, 40, 41, 42, 42, 42, 44, 47, 48, 48, 48, 49, 36, 35, 35, 35, 35, 35,
- 35, 35, 34, 34, 34, 35, 36, 36, 36, 36, 37, 38, 38, 38, 38, 40, 41, 42,
- 42, 42, 44, 47, 48, 48, 48, 49,
- /* Size 4x16 */
- 31, 31, 32, 36, 31, 32, 32, 35, 32, 32, 32, 35, 32, 32, 32, 35, 32, 32,
- 33, 34, 32, 32, 33, 34, 32, 32, 34, 36, 32, 32, 34, 36, 32, 32, 34, 37,
- 32, 33, 35, 38, 32, 33, 35, 38, 33, 33, 36, 41, 34, 34, 37, 42, 34, 34,
- 37, 44, 35, 34, 38, 48, 35, 34, 38, 48,
- /* Size 16x4 */
- 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 34, 34, 35, 35, 31, 32,
- 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34, 32, 32, 32, 32,
- 33, 33, 34, 34, 34, 35, 35, 36, 37, 37, 38, 38, 36, 35, 35, 35, 34, 34,
- 36, 36, 37, 38, 38, 41, 42, 44, 48, 48,
- /* Size 8x32 */
- 32, 31, 31, 31, 32, 32, 35, 36, 31, 31, 31, 32, 32, 32, 35, 35, 31, 32,
- 32, 32, 32, 32, 35, 35, 31, 32, 32, 32, 32, 32, 35, 35, 31, 32, 32, 32,
- 32, 32, 35, 35, 31, 32, 32, 32, 32, 32, 35, 35, 31, 32, 32, 32, 32, 32,
- 34, 35, 31, 32, 32, 32, 32, 32, 34, 35, 31, 32, 32, 32, 33, 33, 34, 34,
- 31, 32, 32, 32, 33, 33, 34, 34, 31, 32, 32, 32, 33, 33, 34, 34, 31, 32,
- 32, 33, 33, 33, 35, 35, 31, 32, 32, 33, 34, 34, 35, 36, 32, 32, 32, 33,
- 34, 34, 36, 36, 32, 32, 32, 33, 34, 34, 36, 36, 32, 32, 32, 33, 34, 34,
- 36, 36, 32, 32, 32, 33, 34, 34, 36, 37, 32, 32, 33, 33, 35, 35, 37, 38,
- 32, 32, 33, 34, 35, 35, 37, 38, 32, 32, 33, 34, 35, 35, 37, 38, 32, 32,
- 33, 34, 35, 35, 37, 38, 32, 33, 33, 34, 36, 36, 39, 40, 33, 33, 33, 35,
- 36, 36, 40, 41, 34, 34, 34, 35, 37, 37, 41, 42, 34, 34, 34, 35, 37, 37,
- 41, 42, 34, 34, 34, 35, 37, 37, 41, 42, 34, 34, 34, 35, 37, 37, 43, 44,
- 35, 34, 34, 36, 38, 38, 45, 47, 36, 35, 34, 36, 38, 38, 46, 48, 36, 35,
- 34, 36, 38, 38, 46, 48, 36, 35, 34, 36, 38, 38, 46, 48, 37, 36, 36, 37,
- 39, 39, 46, 49,
- /* Size 32x8 */
- 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 33, 34, 34, 34, 34, 35, 36, 36, 36, 37, 31, 31, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33,
- 33, 34, 34, 34, 34, 34, 35, 35, 35, 36, 31, 31, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 34, 34, 34,
- 34, 34, 34, 34, 34, 36, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33,
- 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35, 35, 36, 36, 36,
- 36, 37, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34,
- 34, 35, 35, 35, 35, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 32, 32,
- 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 34, 35, 35, 35,
- 35, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 35, 35, 35, 35, 35, 35,
- 34, 34, 34, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, 39, 40, 41,
- 41, 41, 43, 45, 46, 46, 46, 46, 36, 35, 35, 35, 35, 35, 35, 35, 34, 34,
- 34, 35, 36, 36, 36, 36, 37, 38, 38, 38, 38, 40, 41, 42, 42, 42, 44, 47,
- 48, 48, 48, 49 },
- { /* Chroma */
- /* Size 4x4 */
- 31, 32, 38, 46, 32, 34, 41, 46, 38, 41, 47, 47, 46, 46, 47, 52,
- /* Size 8x8 */
- 31, 31, 30, 34, 36, 39, 42, 48, 31, 31, 31, 34, 37, 40, 42, 47, 30, 31,
- 32, 35, 39, 41, 42, 46, 34, 34, 35, 39, 42, 44, 45, 47, 36, 37, 39, 42,
- 46, 47, 47, 47, 39, 40, 41, 44, 47, 47, 48, 49, 42, 42, 42, 45, 47, 48,
- 48, 50, 48, 47, 46, 47, 47, 49, 50, 53,
- /* Size 16x16 */
- 32, 31, 31, 31, 30, 30, 33, 33, 34, 36, 36, 40, 41, 44, 49, 49, 31, 31,
- 31, 31, 31, 31, 33, 34, 36, 38, 38, 41, 42, 44, 48, 48, 31, 31, 31, 31,
- 31, 31, 34, 34, 36, 38, 38, 41, 42, 44, 47, 47, 31, 31, 31, 31, 31, 31,
- 34, 35, 36, 39, 39, 41, 42, 44, 47, 47, 30, 31, 31, 31, 32, 32, 34, 35,
- 37, 40, 40, 42, 42, 44, 46, 46, 30, 31, 31, 31, 32, 32, 34, 35, 37, 40,
- 40, 42, 42, 44, 46, 46, 33, 33, 34, 34, 34, 34, 37, 38, 40, 42, 42, 44,
- 44, 45, 47, 47, 33, 34, 34, 35, 35, 35, 38, 39, 40, 43, 43, 44, 45, 46,
- 47, 47, 34, 36, 36, 36, 37, 37, 40, 40, 42, 45, 45, 45, 46, 46, 47, 47,
- 36, 38, 38, 39, 40, 40, 42, 43, 45, 47, 47, 47, 47, 47, 48, 48, 36, 38,
- 38, 39, 40, 40, 42, 43, 45, 47, 47, 47, 47, 47, 48, 48, 40, 41, 41, 41,
- 42, 42, 44, 44, 45, 47, 47, 48, 48, 49, 50, 50, 41, 42, 42, 42, 42, 42,
- 44, 45, 46, 47, 47, 48, 48, 49, 50, 50, 44, 44, 44, 44, 44, 44, 45, 46,
- 46, 47, 47, 49, 49, 50, 51, 51, 49, 48, 47, 47, 46, 46, 47, 47, 47, 48,
- 48, 50, 50, 51, 53, 53, 49, 48, 47, 47, 46, 46, 47, 47, 47, 48, 48, 50,
- 50, 51, 53, 53,
- /* Size 32x32 */
- 32, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 31, 33, 33, 33, 33, 34, 36,
- 36, 36, 36, 38, 40, 41, 41, 41, 44, 47, 49, 49, 49, 49, 31, 31, 31, 31,
- 31, 31, 31, 31, 30, 30, 30, 32, 33, 34, 34, 34, 35, 36, 37, 37, 37, 39,
- 41, 42, 42, 42, 44, 47, 48, 48, 48, 48, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 32, 33, 34, 34, 34, 36, 37, 38, 38, 38, 39, 41, 42, 42, 42,
- 44, 46, 48, 48, 48, 47, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32,
- 34, 34, 34, 34, 36, 37, 38, 38, 38, 40, 41, 42, 42, 42, 44, 46, 47, 47,
- 47, 47, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 34, 34, 34, 34,
- 36, 37, 38, 38, 38, 40, 41, 42, 42, 42, 44, 46, 47, 47, 47, 47, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 34, 34, 34, 34, 36, 37, 38, 38,
- 38, 40, 41, 42, 42, 42, 44, 46, 47, 47, 47, 47, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 33, 34, 35, 35, 35, 36, 38, 39, 39, 39, 40, 41, 42,
- 42, 42, 44, 46, 47, 47, 47, 47, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 33, 34, 35, 35, 35, 37, 38, 39, 39, 39, 41, 42, 42, 42, 42, 44, 46,
- 46, 46, 46, 46, 30, 30, 31, 31, 31, 31, 31, 31, 32, 32, 32, 33, 34, 35,
- 35, 35, 37, 39, 40, 40, 40, 41, 42, 42, 42, 42, 44, 45, 46, 46, 46, 46,
- 30, 30, 31, 31, 31, 31, 31, 31, 32, 32, 32, 33, 34, 35, 35, 35, 37, 39,
- 40, 40, 40, 41, 42, 42, 42, 42, 44, 45, 46, 46, 46, 46, 30, 30, 31, 31,
- 31, 31, 31, 31, 32, 32, 32, 33, 34, 35, 35, 35, 37, 39, 40, 40, 40, 41,
- 42, 42, 42, 42, 44, 45, 46, 46, 46, 46, 31, 32, 32, 32, 32, 32, 33, 33,
- 33, 33, 33, 34, 36, 37, 37, 37, 38, 40, 41, 41, 41, 42, 43, 43, 43, 43,
- 44, 46, 46, 46, 46, 46, 33, 33, 33, 34, 34, 34, 34, 34, 34, 34, 34, 36,
- 37, 38, 38, 38, 40, 41, 42, 42, 42, 43, 44, 44, 44, 44, 45, 46, 47, 47,
- 47, 46, 33, 34, 34, 34, 34, 34, 35, 35, 35, 35, 35, 37, 38, 39, 39, 39,
- 40, 42, 43, 43, 43, 44, 44, 45, 45, 45, 46, 47, 47, 47, 47, 47, 33, 34,
- 34, 34, 34, 34, 35, 35, 35, 35, 35, 37, 38, 39, 39, 39, 40, 42, 43, 43,
- 43, 44, 44, 45, 45, 45, 46, 47, 47, 47, 47, 47, 33, 34, 34, 34, 34, 34,
- 35, 35, 35, 35, 35, 37, 38, 39, 39, 39, 40, 42, 43, 43, 43, 44, 44, 45,
- 45, 45, 46, 47, 47, 47, 47, 47, 34, 35, 36, 36, 36, 36, 36, 37, 37, 37,
- 37, 38, 40, 40, 40, 40, 42, 44, 45, 45, 45, 45, 45, 46, 46, 46, 46, 47,
- 47, 47, 47, 47, 36, 36, 37, 37, 37, 37, 38, 38, 39, 39, 39, 40, 41, 42,
- 42, 42, 44, 46, 46, 46, 46, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 36, 37, 38, 38, 38, 38, 39, 39, 40, 40, 40, 41, 42, 43, 43, 43, 45, 46,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 48, 48, 48, 47, 36, 37, 38, 38,
- 38, 38, 39, 39, 40, 40, 40, 41, 42, 43, 43, 43, 45, 46, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 48, 48, 48, 47, 36, 37, 38, 38, 38, 38, 39, 39,
- 40, 40, 40, 41, 42, 43, 43, 43, 45, 46, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 48, 48, 48, 47, 38, 39, 39, 40, 40, 40, 40, 41, 41, 41, 41, 42,
- 43, 44, 44, 44, 45, 47, 47, 47, 47, 47, 48, 48, 48, 48, 48, 48, 49, 49,
- 49, 48, 40, 41, 41, 41, 41, 41, 41, 42, 42, 42, 42, 43, 44, 44, 44, 44,
- 45, 47, 47, 47, 47, 48, 48, 48, 48, 48, 49, 49, 50, 50, 50, 49, 41, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 43, 44, 45, 45, 45, 46, 47, 47, 47,
- 47, 48, 48, 48, 48, 48, 49, 50, 50, 50, 50, 50, 41, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 43, 44, 45, 45, 45, 46, 47, 47, 47, 47, 48, 48, 48,
- 48, 48, 49, 50, 50, 50, 50, 50, 41, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 43, 44, 45, 45, 45, 46, 47, 47, 47, 47, 48, 48, 48, 48, 48, 49, 50,
- 50, 50, 50, 50, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 45, 46,
- 46, 46, 46, 47, 47, 47, 47, 48, 49, 49, 49, 49, 50, 51, 51, 51, 51, 51,
- 47, 47, 46, 46, 46, 46, 46, 46, 45, 45, 45, 46, 46, 47, 47, 47, 47, 47,
- 47, 47, 47, 48, 49, 50, 50, 50, 51, 52, 52, 52, 52, 52, 49, 48, 48, 47,
- 47, 47, 47, 46, 46, 46, 46, 46, 47, 47, 47, 47, 47, 47, 48, 48, 48, 49,
- 50, 50, 50, 50, 51, 52, 53, 53, 53, 53, 49, 48, 48, 47, 47, 47, 47, 46,
- 46, 46, 46, 46, 47, 47, 47, 47, 47, 47, 48, 48, 48, 49, 50, 50, 50, 50,
- 51, 52, 53, 53, 53, 53, 49, 48, 48, 47, 47, 47, 47, 46, 46, 46, 46, 46,
- 47, 47, 47, 47, 47, 47, 48, 48, 48, 49, 50, 50, 50, 50, 51, 52, 53, 53,
- 53, 53, 49, 48, 47, 47, 47, 47, 47, 46, 46, 46, 46, 46, 46, 47, 47, 47,
- 47, 47, 47, 47, 47, 48, 49, 50, 50, 50, 51, 52, 53, 53, 53, 53,
- /* Size 4x8 */
- 31, 31, 37, 48, 31, 31, 38, 47, 31, 32, 40, 46, 34, 36, 43, 47, 37, 39,
- 46, 47, 39, 41, 47, 48, 42, 43, 47, 50, 48, 46, 48, 53,
- /* Size 8x4 */
- 31, 31, 31, 34, 37, 39, 42, 48, 31, 31, 32, 36, 39, 41, 43, 46, 37, 38,
- 40, 43, 46, 47, 47, 48, 48, 47, 46, 47, 47, 48, 50, 53,
- /* Size 8x16 */
- 32, 31, 31, 33, 37, 37, 45, 48, 31, 31, 31, 34, 38, 38, 45, 47, 31, 31,
- 31, 34, 38, 38, 45, 47, 31, 31, 32, 34, 39, 39, 45, 46, 30, 32, 32, 35,
- 40, 40, 44, 46, 30, 32, 32, 35, 40, 40, 44, 46, 33, 34, 35, 37, 42, 42,
- 46, 47, 33, 35, 36, 38, 43, 43, 46, 47, 35, 37, 37, 40, 44, 44, 46, 47,
- 37, 39, 40, 43, 47, 47, 47, 47, 37, 39, 40, 43, 47, 47, 47, 47, 41, 42,
- 42, 44, 47, 47, 49, 49, 42, 42, 43, 44, 47, 47, 49, 50, 44, 44, 44, 45,
- 47, 47, 50, 51, 49, 47, 46, 47, 48, 48, 52, 53, 49, 47, 46, 47, 48, 48,
- 52, 53,
- /* Size 16x8 */
- 32, 31, 31, 31, 30, 30, 33, 33, 35, 37, 37, 41, 42, 44, 49, 49, 31, 31,
- 31, 31, 32, 32, 34, 35, 37, 39, 39, 42, 42, 44, 47, 47, 31, 31, 31, 32,
- 32, 32, 35, 36, 37, 40, 40, 42, 43, 44, 46, 46, 33, 34, 34, 34, 35, 35,
- 37, 38, 40, 43, 43, 44, 44, 45, 47, 47, 37, 38, 38, 39, 40, 40, 42, 43,
- 44, 47, 47, 47, 47, 47, 48, 48, 37, 38, 38, 39, 40, 40, 42, 43, 44, 47,
- 47, 47, 47, 47, 48, 48, 45, 45, 45, 45, 44, 44, 46, 46, 46, 47, 47, 49,
- 49, 50, 52, 52, 48, 47, 47, 46, 46, 46, 47, 47, 47, 47, 47, 49, 50, 51,
- 53, 53,
- /* Size 16x32 */
- 32, 31, 31, 31, 31, 31, 33, 35, 37, 37, 37, 40, 45, 48, 48, 48, 31, 31,
- 31, 31, 31, 31, 33, 36, 37, 37, 37, 41, 45, 48, 48, 48, 31, 31, 31, 31,
- 31, 31, 34, 36, 38, 38, 38, 41, 45, 47, 47, 47, 31, 31, 31, 31, 31, 31,
- 34, 37, 38, 38, 38, 41, 45, 47, 47, 47, 31, 31, 31, 31, 31, 31, 34, 37,
- 38, 38, 38, 41, 45, 47, 47, 47, 31, 31, 31, 31, 31, 31, 34, 37, 38, 38,
- 38, 41, 45, 47, 47, 47, 31, 31, 31, 32, 32, 32, 34, 37, 39, 39, 39, 41,
- 45, 46, 46, 46, 30, 31, 31, 32, 32, 32, 34, 38, 39, 39, 39, 42, 44, 46,
- 46, 46, 30, 31, 32, 32, 32, 32, 35, 38, 40, 40, 40, 42, 44, 46, 46, 46,
- 30, 31, 32, 32, 32, 32, 35, 38, 40, 40, 40, 42, 44, 46, 46, 46, 30, 31,
- 32, 32, 32, 32, 35, 38, 40, 40, 40, 42, 44, 46, 46, 46, 31, 32, 33, 33,
- 33, 33, 36, 39, 41, 41, 41, 43, 45, 46, 46, 46, 33, 34, 34, 35, 35, 35,
- 37, 40, 42, 42, 42, 44, 46, 47, 47, 47, 33, 34, 35, 36, 36, 36, 38, 41,
- 43, 43, 43, 44, 46, 47, 47, 47, 33, 34, 35, 36, 36, 36, 38, 41, 43, 43,
- 43, 44, 46, 47, 47, 47, 33, 34, 35, 36, 36, 36, 38, 41, 43, 43, 43, 44,
- 46, 47, 47, 47, 35, 36, 37, 37, 37, 37, 40, 43, 44, 44, 44, 45, 46, 47,
- 47, 47, 36, 37, 38, 39, 39, 39, 42, 44, 46, 46, 46, 47, 47, 47, 47, 47,
- 37, 38, 39, 40, 40, 40, 43, 45, 47, 47, 47, 47, 47, 47, 47, 47, 37, 38,
- 39, 40, 40, 40, 43, 45, 47, 47, 47, 47, 47, 47, 47, 47, 37, 38, 39, 40,
- 40, 40, 43, 45, 47, 47, 47, 47, 47, 47, 47, 47, 39, 39, 40, 41, 41, 41,
- 43, 46, 47, 47, 47, 48, 48, 48, 48, 48, 41, 41, 42, 42, 42, 42, 44, 46,
- 47, 47, 47, 48, 49, 49, 49, 49, 42, 42, 42, 43, 43, 43, 44, 46, 47, 47,
- 47, 48, 49, 50, 50, 50, 42, 42, 42, 43, 43, 43, 44, 46, 47, 47, 47, 48,
- 49, 50, 50, 50, 42, 42, 42, 43, 43, 43, 44, 46, 47, 47, 47, 48, 49, 50,
- 50, 50, 44, 44, 44, 44, 44, 44, 45, 47, 47, 47, 47, 49, 50, 51, 51, 51,
- 47, 46, 46, 46, 46, 46, 46, 47, 48, 48, 48, 49, 51, 52, 52, 52, 49, 48,
- 47, 46, 46, 46, 47, 48, 48, 48, 48, 50, 52, 53, 53, 53, 49, 48, 47, 46,
- 46, 46, 47, 48, 48, 48, 48, 50, 52, 53, 53, 53, 49, 48, 47, 46, 46, 46,
- 47, 48, 48, 48, 48, 50, 52, 53, 53, 53, 49, 48, 47, 46, 46, 46, 47, 47,
- 47, 47, 47, 49, 52, 53, 53, 53,
- /* Size 32x16 */
- 32, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 31, 33, 33, 33, 33, 35, 36,
- 37, 37, 37, 39, 41, 42, 42, 42, 44, 47, 49, 49, 49, 49, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 32, 34, 34, 34, 34, 36, 37, 38, 38, 38, 39,
- 41, 42, 42, 42, 44, 46, 48, 48, 48, 48, 31, 31, 31, 31, 31, 31, 31, 31,
- 32, 32, 32, 33, 34, 35, 35, 35, 37, 38, 39, 39, 39, 40, 42, 42, 42, 42,
- 44, 46, 47, 47, 47, 47, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 33,
- 35, 36, 36, 36, 37, 39, 40, 40, 40, 41, 42, 43, 43, 43, 44, 46, 46, 46,
- 46, 46, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 33, 35, 36, 36, 36,
- 37, 39, 40, 40, 40, 41, 42, 43, 43, 43, 44, 46, 46, 46, 46, 46, 31, 31,
- 31, 31, 31, 31, 32, 32, 32, 32, 32, 33, 35, 36, 36, 36, 37, 39, 40, 40,
- 40, 41, 42, 43, 43, 43, 44, 46, 46, 46, 46, 46, 33, 33, 34, 34, 34, 34,
- 34, 34, 35, 35, 35, 36, 37, 38, 38, 38, 40, 42, 43, 43, 43, 43, 44, 44,
- 44, 44, 45, 46, 47, 47, 47, 47, 35, 36, 36, 37, 37, 37, 37, 38, 38, 38,
- 38, 39, 40, 41, 41, 41, 43, 44, 45, 45, 45, 46, 46, 46, 46, 46, 47, 47,
- 48, 48, 48, 47, 37, 37, 38, 38, 38, 38, 39, 39, 40, 40, 40, 41, 42, 43,
- 43, 43, 44, 46, 47, 47, 47, 47, 47, 47, 47, 47, 47, 48, 48, 48, 48, 47,
- 37, 37, 38, 38, 38, 38, 39, 39, 40, 40, 40, 41, 42, 43, 43, 43, 44, 46,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 48, 48, 48, 48, 47, 37, 37, 38, 38,
- 38, 38, 39, 39, 40, 40, 40, 41, 42, 43, 43, 43, 44, 46, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 48, 48, 48, 48, 47, 40, 41, 41, 41, 41, 41, 41, 42,
- 42, 42, 42, 43, 44, 44, 44, 44, 45, 47, 47, 47, 47, 48, 48, 48, 48, 48,
- 49, 49, 50, 50, 50, 49, 45, 45, 45, 45, 45, 45, 45, 44, 44, 44, 44, 45,
- 46, 46, 46, 46, 46, 47, 47, 47, 47, 48, 49, 49, 49, 49, 50, 51, 52, 52,
- 52, 52, 48, 48, 47, 47, 47, 47, 46, 46, 46, 46, 46, 46, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 48, 49, 50, 50, 50, 51, 52, 53, 53, 53, 53, 48, 48,
- 47, 47, 47, 47, 46, 46, 46, 46, 46, 46, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 48, 49, 50, 50, 50, 51, 52, 53, 53, 53, 53, 48, 48, 47, 47, 47, 47,
- 46, 46, 46, 46, 46, 46, 47, 47, 47, 47, 47, 47, 47, 47, 47, 48, 49, 50,
- 50, 50, 51, 52, 53, 53, 53, 53,
- /* Size 4x16 */
- 31, 31, 37, 48, 31, 31, 38, 47, 31, 31, 38, 47, 31, 32, 39, 46, 31, 32,
- 40, 46, 31, 32, 40, 46, 34, 35, 42, 47, 34, 36, 43, 47, 36, 37, 44, 47,
- 38, 40, 47, 47, 38, 40, 47, 47, 41, 42, 47, 49, 42, 43, 47, 50, 44, 44,
- 47, 51, 48, 46, 48, 53, 48, 46, 48, 53,
- /* Size 16x4 */
- 31, 31, 31, 31, 31, 31, 34, 34, 36, 38, 38, 41, 42, 44, 48, 48, 31, 31,
- 31, 32, 32, 32, 35, 36, 37, 40, 40, 42, 43, 44, 46, 46, 37, 38, 38, 39,
- 40, 40, 42, 43, 44, 47, 47, 47, 47, 47, 48, 48, 48, 47, 47, 46, 46, 46,
- 47, 47, 47, 47, 47, 49, 50, 51, 53, 53,
- /* Size 8x32 */
- 32, 31, 31, 33, 37, 37, 45, 48, 31, 31, 31, 33, 37, 37, 45, 48, 31, 31,
- 31, 34, 38, 38, 45, 47, 31, 31, 31, 34, 38, 38, 45, 47, 31, 31, 31, 34,
- 38, 38, 45, 47, 31, 31, 31, 34, 38, 38, 45, 47, 31, 31, 32, 34, 39, 39,
- 45, 46, 30, 31, 32, 34, 39, 39, 44, 46, 30, 32, 32, 35, 40, 40, 44, 46,
- 30, 32, 32, 35, 40, 40, 44, 46, 30, 32, 32, 35, 40, 40, 44, 46, 31, 33,
- 33, 36, 41, 41, 45, 46, 33, 34, 35, 37, 42, 42, 46, 47, 33, 35, 36, 38,
- 43, 43, 46, 47, 33, 35, 36, 38, 43, 43, 46, 47, 33, 35, 36, 38, 43, 43,
- 46, 47, 35, 37, 37, 40, 44, 44, 46, 47, 36, 38, 39, 42, 46, 46, 47, 47,
- 37, 39, 40, 43, 47, 47, 47, 47, 37, 39, 40, 43, 47, 47, 47, 47, 37, 39,
- 40, 43, 47, 47, 47, 47, 39, 40, 41, 43, 47, 47, 48, 48, 41, 42, 42, 44,
- 47, 47, 49, 49, 42, 42, 43, 44, 47, 47, 49, 50, 42, 42, 43, 44, 47, 47,
- 49, 50, 42, 42, 43, 44, 47, 47, 49, 50, 44, 44, 44, 45, 47, 47, 50, 51,
- 47, 46, 46, 46, 48, 48, 51, 52, 49, 47, 46, 47, 48, 48, 52, 53, 49, 47,
- 46, 47, 48, 48, 52, 53, 49, 47, 46, 47, 48, 48, 52, 53, 49, 47, 46, 47,
- 47, 47, 52, 53,
- /* Size 32x8 */
- 32, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 31, 33, 33, 33, 33, 35, 36,
- 37, 37, 37, 39, 41, 42, 42, 42, 44, 47, 49, 49, 49, 49, 31, 31, 31, 31,
- 31, 31, 31, 31, 32, 32, 32, 33, 34, 35, 35, 35, 37, 38, 39, 39, 39, 40,
- 42, 42, 42, 42, 44, 46, 47, 47, 47, 47, 31, 31, 31, 31, 31, 31, 32, 32,
- 32, 32, 32, 33, 35, 36, 36, 36, 37, 39, 40, 40, 40, 41, 42, 43, 43, 43,
- 44, 46, 46, 46, 46, 46, 33, 33, 34, 34, 34, 34, 34, 34, 35, 35, 35, 36,
- 37, 38, 38, 38, 40, 42, 43, 43, 43, 43, 44, 44, 44, 44, 45, 46, 47, 47,
- 47, 47, 37, 37, 38, 38, 38, 38, 39, 39, 40, 40, 40, 41, 42, 43, 43, 43,
- 44, 46, 47, 47, 47, 47, 47, 47, 47, 47, 47, 48, 48, 48, 48, 47, 37, 37,
- 38, 38, 38, 38, 39, 39, 40, 40, 40, 41, 42, 43, 43, 43, 44, 46, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 48, 48, 48, 48, 47, 45, 45, 45, 45, 45, 45,
- 45, 44, 44, 44, 44, 45, 46, 46, 46, 46, 46, 47, 47, 47, 47, 48, 49, 49,
- 49, 49, 50, 51, 52, 52, 52, 52, 48, 48, 47, 47, 47, 47, 46, 46, 46, 46,
- 46, 46, 47, 47, 47, 47, 47, 47, 47, 47, 47, 48, 49, 50, 50, 50, 51, 52,
- 53, 53, 53, 53 },
- },
- {
- { /* Luma */
- /* Size 4x4 */
- 31, 32, 32, 32, 32, 32, 32, 33, 32, 32, 33, 34, 32, 33, 34, 35,
- /* Size 8x8 */
- 31, 31, 31, 31, 32, 32, 32, 33, 31, 32, 32, 32, 32, 32, 32, 33, 31, 32,
- 32, 32, 32, 32, 32, 33, 31, 32, 32, 32, 32, 32, 32, 33, 32, 32, 32, 32,
- 33, 33, 34, 35, 32, 32, 32, 32, 33, 34, 34, 35, 32, 32, 32, 32, 34, 34,
- 35, 36, 33, 33, 33, 33, 35, 35, 36, 38,
- /* Size 16x16 */
- 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 34, 31, 31,
- 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 33, 34, 31, 31, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 34, 31, 31, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 33, 34, 31, 31, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 33, 34, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 33, 33, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 33, 33, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33,
- 33, 34, 31, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 34, 35,
- 31, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 35, 31, 32,
- 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 35, 32, 32, 32, 32,
- 32, 32, 32, 33, 33, 33, 33, 34, 35, 35, 35, 36, 32, 32, 32, 32, 32, 32,
- 32, 33, 33, 34, 34, 35, 35, 35, 36, 37, 32, 32, 32, 32, 32, 32, 32, 33,
- 33, 34, 34, 35, 35, 35, 36, 37, 32, 33, 33, 33, 33, 33, 33, 33, 34, 34,
- 34, 35, 36, 36, 36, 38, 34, 34, 34, 34, 34, 33, 33, 34, 35, 35, 35, 36,
- 37, 37, 38, 39,
- /* Size 32x32 */
- 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 33, 34, 34, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 33, 34, 34, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 33, 33, 34, 34, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33,
- 34, 34, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 34, 34, 31, 31,
- 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 34, 34, 31, 31, 31, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 33, 33, 34, 34, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 33, 33, 34, 34, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 34, 34,
- 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 31, 31, 31, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 31, 31, 31, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 33, 33, 33, 33, 33, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33,
- 33, 33, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 31, 31,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 31, 31, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 34, 34, 34, 34, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34,
- 34, 34, 35, 35, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 35, 35, 35,
- 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33,
- 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 35, 35, 35, 31, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33,
- 33, 34, 34, 34, 34, 34, 34, 35, 35, 35, 31, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34,
- 34, 34, 34, 35, 35, 35, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 34, 35, 35,
- 35, 35, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33,
- 33, 33, 33, 33, 33, 34, 34, 34, 35, 35, 35, 35, 35, 35, 36, 36, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 34, 34, 34,
- 34, 34, 34, 35, 35, 35, 35, 35, 35, 36, 36, 36, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34, 34, 35, 35,
- 35, 35, 35, 35, 36, 36, 37, 37, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34, 34, 35, 35, 35, 35, 35, 35,
- 36, 36, 37, 37, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 33, 33, 33, 34, 34, 34, 34, 34, 35, 35, 35, 35, 35, 35, 36, 36, 37, 37,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 34, 34,
- 34, 34, 34, 34, 35, 35, 35, 35, 35, 35, 36, 36, 37, 37, 32, 32, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 35,
- 35, 35, 36, 36, 36, 36, 36, 37, 38, 38, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 34, 34, 35, 35, 35, 35, 35, 35, 36, 36, 36,
- 36, 36, 37, 38, 38, 38, 34, 34, 34, 34, 34, 34, 34, 34, 34, 33, 33, 33,
- 33, 33, 34, 34, 35, 35, 35, 35, 35, 35, 36, 36, 37, 37, 37, 37, 38, 38,
- 39, 39, 34, 34, 34, 34, 34, 34, 34, 34, 34, 33, 33, 33, 33, 33, 34, 34,
- 35, 35, 35, 35, 35, 35, 36, 36, 37, 37, 37, 37, 38, 38, 39, 39,
- /* Size 4x8 */
- 31, 31, 32, 32, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 32, 32,
- 33, 34, 32, 32, 34, 34, 32, 33, 34, 35, 33, 33, 35, 36,
- /* Size 8x4 */
- 31, 31, 32, 32, 32, 32, 32, 33, 31, 32, 32, 32, 32, 32, 33, 33, 32, 32,
- 32, 32, 33, 34, 34, 35, 32, 32, 32, 33, 34, 34, 35, 36,
- /* Size 8x16 */
- 32, 31, 31, 31, 31, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 33, 31, 32,
- 32, 32, 32, 32, 32, 33, 31, 32, 32, 32, 32, 32, 32, 33, 31, 32, 32, 32,
- 32, 32, 32, 33, 31, 32, 32, 32, 32, 33, 33, 33, 31, 32, 32, 32, 32, 33,
- 33, 33, 31, 32, 32, 32, 32, 33, 33, 33, 31, 32, 32, 32, 33, 34, 34, 34,
- 32, 32, 32, 32, 33, 34, 34, 34, 32, 32, 32, 32, 33, 34, 34, 34, 32, 32,
- 32, 32, 33, 35, 35, 35, 32, 32, 33, 33, 34, 35, 35, 36, 32, 32, 33, 33,
- 34, 35, 35, 36, 32, 33, 33, 33, 34, 36, 36, 36, 34, 34, 34, 34, 35, 37,
- 37, 38,
- /* Size 16x8 */
- 32, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 34, 31, 31,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 34, 31, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 34, 31, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 33, 33, 33, 34, 31, 32, 32, 32, 32, 32, 32, 32,
- 33, 33, 33, 33, 34, 34, 34, 35, 32, 32, 32, 32, 32, 33, 33, 33, 34, 34,
- 34, 35, 35, 35, 36, 37, 32, 32, 32, 32, 32, 33, 33, 33, 34, 34, 34, 35,
- 35, 35, 36, 37, 32, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 35, 36, 36,
- 36, 38,
- /* Size 16x32 */
- 32, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 34, 31, 31,
- 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 33, 34, 31, 31, 31, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 34, 31, 31, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 33, 34, 31, 31, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 33, 34, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 33, 34, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 33, 34, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 33, 34, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 34,
- 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 31, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 31, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 31, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 31, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 33, 33, 33, 33, 33, 33, 31, 32, 32, 32, 32, 32, 32, 32, 32, 33,
- 33, 33, 33, 33, 33, 34, 31, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33,
- 33, 33, 34, 34, 31, 32, 32, 32, 32, 32, 32, 32, 33, 33, 34, 34, 34, 34,
- 34, 35, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34, 34, 35,
- 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34, 34, 35, 32, 32,
- 32, 32, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34, 34, 35, 32, 32, 32, 32,
- 32, 32, 32, 33, 33, 33, 34, 34, 34, 34, 34, 35, 32, 32, 32, 32, 32, 32,
- 32, 33, 33, 34, 34, 34, 34, 34, 35, 35, 32, 32, 32, 32, 32, 32, 32, 33,
- 33, 34, 35, 35, 35, 35, 35, 36, 32, 32, 32, 32, 33, 33, 33, 33, 33, 34,
- 35, 35, 35, 35, 36, 36, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 35, 35,
- 35, 35, 36, 37, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 35, 35, 35, 35,
- 36, 37, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 35, 35, 35, 35, 36, 37,
- 32, 32, 32, 33, 33, 33, 33, 33, 34, 34, 35, 35, 35, 35, 36, 37, 32, 33,
- 33, 33, 33, 33, 33, 33, 34, 35, 36, 36, 36, 36, 36, 38, 33, 33, 33, 33,
- 33, 33, 33, 34, 34, 35, 36, 36, 36, 36, 37, 38, 34, 34, 34, 34, 34, 34,
- 34, 34, 35, 36, 37, 37, 37, 37, 38, 39, 34, 34, 34, 34, 34, 34, 34, 34,
- 35, 36, 37, 37, 37, 37, 38, 39,
- /* Size 32x16 */
- 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 34, 34, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 33, 33, 34, 34, 31, 31, 31, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 33, 33, 34, 34, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33,
- 34, 34, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 34, 34, 31, 31,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 34, 34, 31, 31, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33,
- 33, 33, 33, 33, 33, 33, 34, 34, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 34, 34, 34, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 35, 35,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33,
- 33, 33, 33, 34, 34, 34, 34, 34, 34, 34, 35, 35, 36, 36, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34,
- 35, 35, 35, 35, 35, 35, 36, 36, 37, 37, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 35, 35, 35, 35,
- 35, 35, 36, 36, 37, 37, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33,
- 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 35, 35, 35, 35, 35, 35, 36, 36,
- 37, 37, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33,
- 34, 34, 34, 34, 34, 34, 35, 35, 35, 35, 35, 35, 36, 36, 37, 37, 32, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34,
- 34, 35, 35, 36, 36, 36, 36, 36, 36, 37, 38, 38, 34, 34, 34, 34, 34, 34,
- 34, 34, 34, 33, 33, 33, 33, 33, 34, 34, 35, 35, 35, 35, 35, 35, 36, 36,
- 37, 37, 37, 37, 38, 38, 39, 39,
- /* Size 4x16 */
- 31, 31, 32, 32, 31, 32, 32, 32, 31, 32, 32, 32, 31, 32, 32, 32, 31, 32,
- 32, 32, 32, 32, 32, 33, 32, 32, 32, 33, 32, 32, 33, 33, 32, 32, 33, 34,
- 32, 32, 33, 34, 32, 32, 33, 34, 32, 32, 34, 35, 32, 33, 34, 35, 32, 33,
- 34, 35, 33, 33, 35, 36, 34, 34, 36, 37,
- /* Size 16x4 */
- 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 34, 31, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 34, 32, 32, 32, 32,
- 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 35, 36, 32, 32, 32, 32, 32, 33,
- 33, 33, 34, 34, 34, 35, 35, 35, 36, 37,
- /* Size 8x32 */
- 32, 31, 31, 31, 31, 32, 32, 32, 31, 31, 31, 31, 32, 32, 32, 33, 31, 31,
- 32, 32, 32, 32, 32, 33, 31, 32, 32, 32, 32, 32, 32, 33, 31, 32, 32, 32,
- 32, 32, 32, 33, 31, 32, 32, 32, 32, 32, 32, 33, 31, 32, 32, 32, 32, 32,
- 32, 33, 31, 32, 32, 32, 32, 32, 32, 33, 31, 32, 32, 32, 32, 32, 32, 33,
- 31, 32, 32, 32, 32, 32, 32, 33, 31, 32, 32, 32, 32, 33, 33, 33, 31, 32,
- 32, 32, 32, 33, 33, 33, 31, 32, 32, 32, 32, 33, 33, 33, 31, 32, 32, 32,
- 32, 33, 33, 33, 31, 32, 32, 32, 32, 33, 33, 33, 31, 32, 32, 32, 33, 33,
- 33, 34, 31, 32, 32, 32, 33, 34, 34, 34, 32, 32, 32, 32, 33, 34, 34, 34,
- 32, 32, 32, 32, 33, 34, 34, 34, 32, 32, 32, 32, 33, 34, 34, 34, 32, 32,
- 32, 32, 33, 34, 34, 34, 32, 32, 32, 32, 33, 34, 34, 35, 32, 32, 32, 32,
- 33, 35, 35, 35, 32, 32, 33, 33, 33, 35, 35, 36, 32, 32, 33, 33, 34, 35,
- 35, 36, 32, 32, 33, 33, 34, 35, 35, 36, 32, 32, 33, 33, 34, 35, 35, 36,
- 32, 32, 33, 33, 34, 35, 35, 36, 32, 33, 33, 33, 34, 36, 36, 36, 33, 33,
- 33, 33, 34, 36, 36, 37, 34, 34, 34, 34, 35, 37, 37, 38, 34, 34, 34, 34,
- 35, 37, 37, 38,
- /* Size 32x8 */
- 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 34, 34, 31, 31, 31, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 33, 33, 34, 34, 31, 31, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33,
- 33, 33, 33, 33, 34, 34, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33,
- 34, 34, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 35, 35, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34,
- 34, 34, 35, 35, 35, 35, 35, 35, 36, 36, 37, 37, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 35, 35,
- 35, 35, 35, 35, 36, 36, 37, 37, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 35, 35, 36, 36, 36, 36, 36,
- 36, 37, 38, 38 },
- { /* Chroma */
- /* Size 4x4 */
- 31, 31, 34, 38, 31, 32, 35, 40, 34, 35, 39, 43, 38, 40, 43, 47,
- /* Size 8x8 */
- 31, 31, 31, 30, 34, 35, 37, 40, 31, 31, 31, 31, 34, 35, 38, 41, 31, 31,
- 31, 31, 35, 36, 39, 41, 30, 31, 31, 32, 35, 36, 40, 42, 34, 34, 35, 35,
- 39, 40, 43, 44, 35, 35, 36, 36, 40, 41, 44, 45, 37, 38, 39, 40, 43, 44,
- 47, 47, 40, 41, 41, 42, 44, 45, 47, 48,
- /* Size 16x16 */
- 32, 31, 31, 31, 31, 30, 30, 31, 33, 33, 33, 35, 36, 36, 38, 41, 31, 31,
- 31, 31, 31, 31, 31, 31, 33, 34, 34, 36, 37, 37, 39, 42, 31, 31, 31, 31,
- 31, 31, 31, 32, 34, 34, 34, 37, 38, 38, 40, 42, 31, 31, 31, 31, 31, 31,
- 31, 32, 34, 34, 34, 37, 38, 38, 40, 42, 31, 31, 31, 31, 31, 31, 31, 32,
- 34, 35, 35, 37, 39, 39, 40, 42, 30, 31, 31, 31, 31, 32, 32, 32, 34, 35,
- 35, 38, 40, 40, 41, 42, 30, 31, 31, 31, 31, 32, 32, 32, 34, 35, 35, 38,
- 40, 40, 41, 42, 31, 31, 32, 32, 32, 32, 32, 33, 35, 36, 36, 38, 40, 40,
- 41, 43, 33, 33, 34, 34, 34, 34, 34, 35, 37, 38, 38, 41, 42, 42, 43, 44,
- 33, 34, 34, 34, 35, 35, 35, 36, 38, 39, 39, 41, 43, 43, 44, 45, 33, 34,
- 34, 34, 35, 35, 35, 36, 38, 39, 39, 41, 43, 43, 44, 45, 35, 36, 37, 37,
- 37, 38, 38, 38, 41, 41, 41, 44, 46, 46, 46, 46, 36, 37, 38, 38, 39, 40,
- 40, 40, 42, 43, 43, 46, 47, 47, 47, 47, 36, 37, 38, 38, 39, 40, 40, 40,
- 42, 43, 43, 46, 47, 47, 47, 47, 38, 39, 40, 40, 40, 41, 41, 41, 43, 44,
- 44, 46, 47, 47, 47, 48, 41, 42, 42, 42, 42, 42, 42, 43, 44, 45, 45, 46,
- 47, 47, 48, 48,
- /* Size 32x32 */
- 32, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 31, 32, 33, 33,
- 33, 33, 33, 34, 35, 36, 36, 36, 36, 37, 38, 40, 41, 41, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 31, 32, 33, 34, 34, 34, 34, 35,
- 36, 37, 37, 37, 37, 37, 39, 40, 42, 42, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 32, 33, 34, 34, 34, 34, 35, 36, 37, 37, 37,
- 37, 38, 39, 40, 42, 42, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 32, 32, 34, 34, 34, 34, 34, 35, 36, 38, 38, 38, 38, 38, 40, 41,
- 42, 42, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33,
- 34, 34, 34, 34, 34, 35, 37, 38, 38, 38, 38, 39, 40, 41, 42, 42, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 34, 34, 34, 34,
- 34, 35, 37, 38, 38, 38, 38, 39, 40, 41, 42, 42, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 34, 34, 34, 34, 34, 35, 37, 38,
- 38, 38, 38, 39, 40, 41, 42, 42, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 32, 33, 34, 34, 34, 34, 34, 36, 37, 38, 38, 38, 38, 39,
- 40, 41, 42, 42, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 32, 33, 34, 35, 35, 35, 35, 36, 37, 38, 39, 39, 39, 39, 40, 41, 42, 42,
- 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 34, 35,
- 35, 35, 35, 36, 37, 39, 39, 39, 39, 40, 40, 41, 42, 42, 30, 30, 31, 31,
- 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 33, 34, 35, 35, 35, 35, 36,
- 38, 39, 40, 40, 40, 40, 41, 42, 42, 42, 30, 30, 31, 31, 31, 31, 31, 31,
- 31, 31, 32, 32, 32, 32, 32, 33, 34, 35, 35, 35, 35, 36, 38, 39, 40, 40,
- 40, 40, 41, 42, 42, 42, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32,
- 32, 32, 32, 33, 34, 35, 35, 35, 35, 36, 38, 39, 40, 40, 40, 40, 41, 42,
- 42, 42, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 33,
- 34, 35, 35, 35, 35, 36, 38, 39, 40, 40, 40, 40, 41, 42, 42, 42, 31, 31,
- 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 34, 35, 36, 36, 36,
- 36, 37, 38, 40, 40, 40, 40, 41, 41, 42, 43, 43, 32, 32, 32, 32, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 34, 35, 36, 37, 37, 37, 37, 38, 39, 41,
- 41, 41, 41, 42, 42, 43, 43, 43, 33, 33, 33, 34, 34, 34, 34, 34, 34, 34,
- 34, 34, 34, 34, 35, 36, 37, 38, 38, 38, 38, 39, 41, 42, 42, 42, 42, 43,
- 43, 44, 44, 44, 33, 34, 34, 34, 34, 34, 34, 34, 35, 35, 35, 35, 35, 35,
- 36, 37, 38, 39, 39, 39, 39, 40, 41, 43, 43, 43, 43, 43, 44, 44, 45, 45,
- 33, 34, 34, 34, 34, 34, 34, 34, 35, 35, 35, 35, 35, 35, 36, 37, 38, 39,
- 39, 39, 39, 40, 41, 43, 43, 43, 43, 43, 44, 44, 45, 45, 33, 34, 34, 34,
- 34, 34, 34, 34, 35, 35, 35, 35, 35, 35, 36, 37, 38, 39, 39, 39, 39, 40,
- 41, 43, 43, 43, 43, 43, 44, 44, 45, 45, 33, 34, 34, 34, 34, 34, 34, 34,
- 35, 35, 35, 35, 35, 35, 36, 37, 38, 39, 39, 39, 39, 40, 41, 43, 43, 43,
- 43, 43, 44, 44, 45, 45, 34, 35, 35, 35, 35, 35, 35, 36, 36, 36, 36, 36,
- 36, 36, 37, 38, 39, 40, 40, 40, 40, 41, 42, 44, 44, 44, 44, 44, 45, 45,
- 45, 45, 35, 36, 36, 36, 37, 37, 37, 37, 37, 37, 38, 38, 38, 38, 38, 39,
- 41, 41, 41, 41, 41, 42, 44, 45, 46, 46, 46, 46, 46, 46, 46, 46, 36, 37,
- 37, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 40, 41, 42, 43, 43, 43,
- 43, 44, 45, 46, 47, 47, 47, 47, 47, 47, 47, 47, 36, 37, 37, 38, 38, 38,
- 38, 38, 39, 39, 40, 40, 40, 40, 40, 41, 42, 43, 43, 43, 43, 44, 46, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 36, 37, 37, 38, 38, 38, 38, 38, 39, 39,
- 40, 40, 40, 40, 40, 41, 42, 43, 43, 43, 43, 44, 46, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 36, 37, 37, 38, 38, 38, 38, 38, 39, 39, 40, 40, 40, 40,
- 40, 41, 42, 43, 43, 43, 43, 44, 46, 47, 47, 47, 47, 47, 47, 47, 47, 47,
- 37, 37, 38, 38, 39, 39, 39, 39, 39, 40, 40, 40, 40, 40, 41, 42, 43, 43,
- 43, 43, 43, 44, 46, 47, 47, 47, 47, 47, 47, 47, 47, 47, 38, 39, 39, 40,
- 40, 40, 40, 40, 40, 40, 41, 41, 41, 41, 41, 42, 43, 44, 44, 44, 44, 45,
- 46, 47, 47, 47, 47, 47, 47, 48, 48, 48, 40, 40, 40, 41, 41, 41, 41, 41,
- 41, 41, 42, 42, 42, 42, 42, 43, 44, 44, 44, 44, 44, 45, 46, 47, 47, 47,
- 47, 47, 48, 48, 48, 48, 41, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 43, 43, 44, 45, 45, 45, 45, 45, 46, 47, 47, 47, 47, 47, 48, 48,
- 48, 48, 41, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 43, 43,
- 44, 45, 45, 45, 45, 45, 46, 47, 47, 47, 47, 47, 48, 48, 48, 48,
- /* Size 4x8 */
- 31, 31, 35, 37, 31, 31, 36, 38, 31, 32, 37, 39, 31, 32, 37, 40, 34, 36,
- 40, 43, 35, 37, 42, 44, 38, 40, 45, 47, 41, 42, 45, 47,
- /* Size 8x4 */
- 31, 31, 31, 31, 34, 35, 38, 41, 31, 31, 32, 32, 36, 37, 40, 42, 35, 36,
- 37, 37, 40, 42, 45, 45, 37, 38, 39, 40, 43, 44, 47, 47,
- /* Size 8x16 */
- 32, 31, 31, 31, 33, 37, 37, 38, 31, 31, 31, 31, 33, 38, 38, 39, 31, 31,
- 31, 31, 34, 38, 38, 40, 31, 31, 31, 31, 34, 38, 38, 40, 31, 31, 32, 32,
- 34, 39, 39, 40, 30, 31, 32, 32, 35, 40, 40, 41, 30, 31, 32, 32, 35, 40,
- 40, 41, 31, 32, 33, 33, 35, 40, 40, 41, 33, 34, 35, 35, 37, 42, 42, 43,
- 33, 35, 36, 36, 38, 43, 43, 44, 33, 35, 36, 36, 38, 43, 43, 44, 35, 37,
- 38, 38, 41, 45, 45, 46, 37, 39, 40, 40, 43, 47, 47, 47, 37, 39, 40, 40,
- 43, 47, 47, 47, 39, 40, 41, 41, 43, 47, 47, 47, 42, 42, 43, 43, 44, 47,
- 47, 48,
- /* Size 16x8 */
- 32, 31, 31, 31, 31, 30, 30, 31, 33, 33, 33, 35, 37, 37, 39, 42, 31, 31,
- 31, 31, 31, 31, 31, 32, 34, 35, 35, 37, 39, 39, 40, 42, 31, 31, 31, 31,
- 32, 32, 32, 33, 35, 36, 36, 38, 40, 40, 41, 43, 31, 31, 31, 31, 32, 32,
- 32, 33, 35, 36, 36, 38, 40, 40, 41, 43, 33, 33, 34, 34, 34, 35, 35, 35,
- 37, 38, 38, 41, 43, 43, 43, 44, 37, 38, 38, 38, 39, 40, 40, 40, 42, 43,
- 43, 45, 47, 47, 47, 47, 37, 38, 38, 38, 39, 40, 40, 40, 42, 43, 43, 45,
- 47, 47, 47, 47, 38, 39, 40, 40, 40, 41, 41, 41, 43, 44, 44, 46, 47, 47,
- 47, 48,
- /* Size 16x32 */
- 32, 31, 31, 31, 31, 31, 31, 31, 33, 35, 37, 37, 37, 37, 38, 42, 31, 31,
- 31, 31, 31, 31, 31, 31, 33, 35, 37, 37, 37, 37, 39, 42, 31, 31, 31, 31,
- 31, 31, 31, 32, 33, 35, 38, 38, 38, 38, 39, 42, 31, 31, 31, 31, 31, 31,
- 31, 32, 34, 36, 38, 38, 38, 38, 40, 42, 31, 31, 31, 31, 31, 31, 31, 32,
- 34, 36, 38, 38, 38, 38, 40, 42, 31, 31, 31, 31, 31, 31, 31, 32, 34, 36,
- 38, 38, 38, 38, 40, 42, 31, 31, 31, 31, 31, 31, 31, 32, 34, 36, 38, 38,
- 38, 38, 40, 42, 31, 31, 31, 31, 31, 31, 31, 32, 34, 36, 38, 38, 38, 38,
- 40, 42, 31, 31, 31, 31, 32, 32, 32, 32, 34, 36, 39, 39, 39, 39, 40, 42,
- 30, 31, 31, 32, 32, 32, 32, 32, 34, 37, 39, 39, 39, 39, 40, 42, 30, 31,
- 31, 32, 32, 32, 32, 33, 35, 37, 40, 40, 40, 40, 41, 42, 30, 31, 31, 32,
- 32, 32, 32, 33, 35, 37, 40, 40, 40, 40, 41, 42, 30, 31, 31, 32, 32, 32,
- 32, 33, 35, 37, 40, 40, 40, 40, 41, 42, 30, 31, 31, 32, 32, 32, 32, 33,
- 35, 37, 40, 40, 40, 40, 41, 42, 31, 31, 32, 32, 33, 33, 33, 33, 35, 38,
- 40, 40, 40, 40, 41, 43, 32, 32, 33, 33, 34, 34, 34, 34, 36, 39, 41, 41,
- 41, 41, 42, 44, 33, 33, 34, 35, 35, 35, 35, 35, 37, 40, 42, 42, 42, 42,
- 43, 44, 33, 34, 35, 35, 36, 36, 36, 36, 38, 40, 43, 43, 43, 43, 44, 45,
- 33, 34, 35, 35, 36, 36, 36, 36, 38, 40, 43, 43, 43, 43, 44, 45, 33, 34,
- 35, 35, 36, 36, 36, 36, 38, 40, 43, 43, 43, 43, 44, 45, 33, 34, 35, 35,
- 36, 36, 36, 36, 38, 40, 43, 43, 43, 43, 44, 45, 34, 35, 36, 37, 37, 37,
- 37, 37, 39, 42, 44, 44, 44, 44, 45, 45, 35, 36, 37, 38, 38, 38, 38, 39,
- 41, 43, 45, 45, 45, 45, 46, 46, 36, 37, 38, 39, 39, 39, 39, 40, 42, 44,
- 47, 47, 47, 47, 47, 47, 37, 38, 39, 40, 40, 40, 40, 41, 43, 45, 47, 47,
- 47, 47, 47, 47, 37, 38, 39, 40, 40, 40, 40, 41, 43, 45, 47, 47, 47, 47,
- 47, 47, 37, 38, 39, 40, 40, 40, 40, 41, 43, 45, 47, 47, 47, 47, 47, 47,
- 37, 38, 39, 40, 40, 40, 40, 41, 43, 45, 47, 47, 47, 47, 47, 47, 39, 39,
- 40, 41, 41, 41, 41, 42, 43, 45, 47, 47, 47, 47, 47, 48, 40, 41, 41, 42,
- 42, 42, 42, 42, 44, 45, 47, 47, 47, 47, 47, 48, 42, 42, 42, 43, 43, 43,
- 43, 43, 44, 46, 47, 47, 47, 47, 48, 48, 42, 42, 42, 43, 43, 43, 43, 43,
- 44, 46, 47, 47, 47, 47, 48, 48,
- /* Size 32x16 */
- 32, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 31, 32, 33, 33,
- 33, 33, 33, 34, 35, 36, 37, 37, 37, 37, 39, 40, 42, 42, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 34, 34, 34, 34, 35,
- 36, 37, 38, 38, 38, 38, 39, 41, 42, 42, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 32, 33, 34, 35, 35, 35, 35, 36, 37, 38, 39, 39,
- 39, 39, 40, 41, 42, 42, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32,
- 32, 32, 32, 33, 35, 35, 35, 35, 35, 37, 38, 39, 40, 40, 40, 40, 41, 42,
- 43, 43, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 33, 34,
- 35, 36, 36, 36, 36, 37, 38, 39, 40, 40, 40, 40, 41, 42, 43, 43, 31, 31,
- 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 33, 34, 35, 36, 36, 36,
- 36, 37, 38, 39, 40, 40, 40, 40, 41, 42, 43, 43, 31, 31, 31, 31, 31, 31,
- 31, 31, 32, 32, 32, 32, 32, 32, 33, 34, 35, 36, 36, 36, 36, 37, 38, 39,
- 40, 40, 40, 40, 41, 42, 43, 43, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32,
- 33, 33, 33, 33, 33, 34, 35, 36, 36, 36, 36, 37, 39, 40, 41, 41, 41, 41,
- 42, 42, 43, 43, 33, 33, 33, 34, 34, 34, 34, 34, 34, 34, 35, 35, 35, 35,
- 35, 36, 37, 38, 38, 38, 38, 39, 41, 42, 43, 43, 43, 43, 43, 44, 44, 44,
- 35, 35, 35, 36, 36, 36, 36, 36, 36, 37, 37, 37, 37, 37, 38, 39, 40, 40,
- 40, 40, 40, 42, 43, 44, 45, 45, 45, 45, 45, 45, 46, 46, 37, 37, 38, 38,
- 38, 38, 38, 38, 39, 39, 40, 40, 40, 40, 40, 41, 42, 43, 43, 43, 43, 44,
- 45, 47, 47, 47, 47, 47, 47, 47, 47, 47, 37, 37, 38, 38, 38, 38, 38, 38,
- 39, 39, 40, 40, 40, 40, 40, 41, 42, 43, 43, 43, 43, 44, 45, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 37, 37, 38, 38, 38, 38, 38, 38, 39, 39, 40, 40,
- 40, 40, 40, 41, 42, 43, 43, 43, 43, 44, 45, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 37, 37, 38, 38, 38, 38, 38, 38, 39, 39, 40, 40, 40, 40, 40, 41,
- 42, 43, 43, 43, 43, 44, 45, 47, 47, 47, 47, 47, 47, 47, 47, 47, 38, 39,
- 39, 40, 40, 40, 40, 40, 40, 40, 41, 41, 41, 41, 41, 42, 43, 44, 44, 44,
- 44, 45, 46, 47, 47, 47, 47, 47, 47, 47, 48, 48, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 43, 44, 44, 45, 45, 45, 45, 45, 46, 47,
- 47, 47, 47, 47, 48, 48, 48, 48,
- /* Size 4x16 */
- 31, 31, 35, 37, 31, 31, 35, 38, 31, 31, 36, 38, 31, 31, 36, 38, 31, 32,
- 36, 39, 31, 32, 37, 40, 31, 32, 37, 40, 31, 33, 38, 40, 33, 35, 40, 42,
- 34, 36, 40, 43, 34, 36, 40, 43, 36, 38, 43, 45, 38, 40, 45, 47, 38, 40,
- 45, 47, 39, 41, 45, 47, 42, 43, 46, 47,
- /* Size 16x4 */
- 31, 31, 31, 31, 31, 31, 31, 31, 33, 34, 34, 36, 38, 38, 39, 42, 31, 31,
- 31, 31, 32, 32, 32, 33, 35, 36, 36, 38, 40, 40, 41, 43, 35, 35, 36, 36,
- 36, 37, 37, 38, 40, 40, 40, 43, 45, 45, 45, 46, 37, 38, 38, 38, 39, 40,
- 40, 40, 42, 43, 43, 45, 47, 47, 47, 47,
- /* Size 8x32 */
- 32, 31, 31, 31, 33, 37, 37, 38, 31, 31, 31, 31, 33, 37, 37, 39, 31, 31,
- 31, 31, 33, 38, 38, 39, 31, 31, 31, 31, 34, 38, 38, 40, 31, 31, 31, 31,
- 34, 38, 38, 40, 31, 31, 31, 31, 34, 38, 38, 40, 31, 31, 31, 31, 34, 38,
- 38, 40, 31, 31, 31, 31, 34, 38, 38, 40, 31, 31, 32, 32, 34, 39, 39, 40,
- 30, 31, 32, 32, 34, 39, 39, 40, 30, 31, 32, 32, 35, 40, 40, 41, 30, 31,
- 32, 32, 35, 40, 40, 41, 30, 31, 32, 32, 35, 40, 40, 41, 30, 31, 32, 32,
- 35, 40, 40, 41, 31, 32, 33, 33, 35, 40, 40, 41, 32, 33, 34, 34, 36, 41,
- 41, 42, 33, 34, 35, 35, 37, 42, 42, 43, 33, 35, 36, 36, 38, 43, 43, 44,
- 33, 35, 36, 36, 38, 43, 43, 44, 33, 35, 36, 36, 38, 43, 43, 44, 33, 35,
- 36, 36, 38, 43, 43, 44, 34, 36, 37, 37, 39, 44, 44, 45, 35, 37, 38, 38,
- 41, 45, 45, 46, 36, 38, 39, 39, 42, 47, 47, 47, 37, 39, 40, 40, 43, 47,
- 47, 47, 37, 39, 40, 40, 43, 47, 47, 47, 37, 39, 40, 40, 43, 47, 47, 47,
- 37, 39, 40, 40, 43, 47, 47, 47, 39, 40, 41, 41, 43, 47, 47, 47, 40, 41,
- 42, 42, 44, 47, 47, 47, 42, 42, 43, 43, 44, 47, 47, 48, 42, 42, 43, 43,
- 44, 47, 47, 48,
- /* Size 32x8 */
- 32, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 31, 32, 33, 33,
- 33, 33, 33, 34, 35, 36, 37, 37, 37, 37, 39, 40, 42, 42, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 34, 35, 35, 35, 35, 36,
- 37, 38, 39, 39, 39, 39, 40, 41, 42, 42, 31, 31, 31, 31, 31, 31, 31, 31,
- 32, 32, 32, 32, 32, 32, 33, 34, 35, 36, 36, 36, 36, 37, 38, 39, 40, 40,
- 40, 40, 41, 42, 43, 43, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32,
- 32, 32, 33, 34, 35, 36, 36, 36, 36, 37, 38, 39, 40, 40, 40, 40, 41, 42,
- 43, 43, 33, 33, 33, 34, 34, 34, 34, 34, 34, 34, 35, 35, 35, 35, 35, 36,
- 37, 38, 38, 38, 38, 39, 41, 42, 43, 43, 43, 43, 43, 44, 44, 44, 37, 37,
- 38, 38, 38, 38, 38, 38, 39, 39, 40, 40, 40, 40, 40, 41, 42, 43, 43, 43,
- 43, 44, 45, 47, 47, 47, 47, 47, 47, 47, 47, 47, 37, 37, 38, 38, 38, 38,
- 38, 38, 39, 39, 40, 40, 40, 40, 40, 41, 42, 43, 43, 43, 43, 44, 45, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 38, 39, 39, 40, 40, 40, 40, 40, 40, 40,
- 41, 41, 41, 41, 41, 42, 43, 44, 44, 44, 44, 45, 46, 47, 47, 47, 47, 47,
- 47, 47, 48, 48 },
- },
- {
- { /* Luma */
- /* Size 4x4 */
- 31, 31, 31, 32, 31, 32, 32, 32, 31, 32, 32, 32, 32, 32, 32, 33,
- /* Size 8x8 */
- 31, 31, 31, 31, 31, 31, 32, 32, 31, 32, 32, 32, 32, 32, 32, 32, 31, 32,
- 32, 32, 32, 32, 32, 32, 31, 32, 32, 32, 32, 32, 32, 32, 31, 32, 32, 32,
- 32, 32, 32, 32, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 33, 33, 32, 32, 32, 32, 32, 32, 33, 33,
- /* Size 16x16 */
- 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 31, 31, 31, 31,
- 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 31, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 31, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 33, 33, 33, 33, 33, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 33, 33, 33, 33, 33, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33,
- 33, 33, 33, 33,
- /* Size 32x32 */
- 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31,
- 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31,
- 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31,
- 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 33, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 31, 31,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 31, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 31, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 31, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- /* Size 4x8 */
- 31, 31, 31, 32, 31, 32, 32, 32, 31, 32, 32, 32, 31, 32, 32, 32, 31, 32,
- 32, 32, 31, 32, 32, 33, 32, 32, 32, 33, 32, 32, 32, 33,
- /* Size 8x4 */
- 31, 31, 31, 31, 31, 31, 32, 32, 31, 32, 32, 32, 32, 32, 32, 32, 31, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33,
- /* Size 8x16 */
- 32, 31, 31, 31, 31, 31, 31, 32, 31, 31, 31, 31, 31, 31, 32, 32, 31, 31,
- 32, 32, 32, 32, 32, 32, 31, 32, 32, 32, 32, 32, 32, 32, 31, 32, 32, 32,
- 32, 32, 32, 32, 31, 32, 32, 32, 32, 32, 32, 32, 31, 32, 32, 32, 32, 32,
- 32, 32, 31, 32, 32, 32, 32, 32, 32, 32, 31, 32, 32, 32, 32, 32, 32, 32,
- 31, 32, 32, 32, 32, 32, 32, 32, 31, 32, 32, 32, 32, 32, 32, 32, 31, 32,
- 32, 32, 32, 32, 33, 33, 31, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32,
- 32, 32, 33, 34, 32, 32, 32, 32, 32, 32, 33, 34, 32, 32, 32, 32, 32, 32,
- 33, 34,
- /* Size 16x8 */
- 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 31, 31,
- 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33,
- 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 34,
- 34, 34,
- /* Size 16x32 */
- 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 31, 31, 31, 31, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 33, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 33, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33,
- 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 31, 31,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 31, 31, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 31, 31, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 31, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 33, 33, 33, 33, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 33, 33, 33, 33, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33,
- 33, 33, 33, 34, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33,
- 34, 34, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 34, 34,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 34, 34, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 34, 34, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 34, 34, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 33, 33, 33, 34, 34, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 33, 33, 33, 34, 34,
- /* Size 32x16 */
- 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31,
- 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33,
- 33, 33, 33, 33, 33, 33, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 34, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 34, 34, 34, 34, 34, 34, 34, 34,
- /* Size 4x16 */
- 31, 31, 31, 32, 31, 31, 31, 32, 31, 32, 32, 32, 31, 32, 32, 32, 31, 32,
- 32, 32, 31, 32, 32, 32, 31, 32, 32, 32, 31, 32, 32, 32, 31, 32, 32, 32,
- 31, 32, 32, 32, 31, 32, 32, 32, 32, 32, 32, 33, 32, 32, 32, 33, 32, 32,
- 32, 33, 32, 32, 32, 33, 32, 32, 32, 33,
- /* Size 16x4 */
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 31, 31,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 33, 33, 33, 33, 33,
- /* Size 8x32 */
- 32, 31, 31, 31, 31, 31, 31, 32, 31, 31, 31, 31, 31, 31, 32, 32, 31, 31,
- 31, 31, 31, 31, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32,
- 32, 32, 32, 32, 31, 32, 32, 32, 32, 32, 32, 32, 31, 32, 32, 32, 32, 32,
- 32, 32, 31, 32, 32, 32, 32, 32, 32, 32, 31, 32, 32, 32, 32, 32, 32, 32,
- 31, 32, 32, 32, 32, 32, 32, 32, 31, 32, 32, 32, 32, 32, 32, 32, 31, 32,
- 32, 32, 32, 32, 32, 32, 31, 32, 32, 32, 32, 32, 32, 32, 31, 32, 32, 32,
- 32, 32, 32, 32, 31, 32, 32, 32, 32, 32, 32, 32, 31, 32, 32, 32, 32, 32,
- 32, 32, 31, 32, 32, 32, 32, 32, 32, 32, 31, 32, 32, 32, 32, 32, 32, 32,
- 31, 32, 32, 32, 32, 32, 32, 32, 31, 32, 32, 32, 32, 32, 32, 32, 31, 32,
- 32, 32, 32, 32, 32, 32, 31, 32, 32, 32, 32, 32, 32, 33, 31, 32, 32, 32,
- 32, 32, 33, 33, 31, 32, 32, 32, 32, 32, 33, 33, 31, 32, 32, 32, 32, 32,
- 33, 33, 32, 32, 32, 32, 32, 32, 33, 34, 32, 32, 32, 32, 32, 32, 33, 34,
- 32, 32, 32, 32, 32, 32, 33, 34, 32, 32, 32, 32, 32, 32, 33, 34, 32, 32,
- 32, 32, 32, 32, 33, 34, 32, 32, 32, 32, 32, 32, 33, 34, 32, 32, 32, 32,
- 32, 32, 33, 34,
- /* Size 32x8 */
- 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31,
- 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31,
- 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34,
- 34, 34, 34, 34 },
- { /* Chroma */
- /* Size 4x4 */
- 31, 31, 31, 34, 31, 31, 31, 35, 31, 31, 32, 35, 34, 35, 35, 39,
- /* Size 8x8 */
- 31, 31, 31, 31, 30, 31, 33, 33, 31, 31, 31, 31, 31, 32, 34, 34, 31, 31,
- 31, 31, 31, 32, 34, 34, 31, 31, 31, 31, 31, 32, 35, 35, 30, 31, 31, 31,
- 32, 32, 35, 35, 31, 32, 32, 32, 32, 33, 36, 36, 33, 34, 34, 35, 35, 36,
- 39, 39, 33, 34, 34, 35, 35, 36, 39, 39,
- /* Size 16x16 */
- 32, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 31, 33, 33, 33, 33, 31, 31,
- 31, 31, 31, 31, 31, 31, 30, 30, 30, 32, 33, 34, 34, 34, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 32, 33, 34, 34, 34, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 32, 34, 34, 34, 34, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 32, 34, 34, 34, 34, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 32, 34, 34, 34, 34, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 33,
- 34, 35, 35, 35, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 33, 34, 35,
- 35, 35, 30, 30, 31, 31, 31, 31, 31, 31, 32, 32, 32, 33, 34, 35, 35, 35,
- 30, 30, 31, 31, 31, 31, 31, 31, 32, 32, 32, 33, 34, 35, 35, 35, 30, 30,
- 31, 31, 31, 31, 31, 31, 32, 32, 32, 33, 34, 35, 35, 35, 31, 32, 32, 32,
- 32, 32, 33, 33, 33, 33, 33, 34, 36, 37, 37, 37, 33, 33, 33, 34, 34, 34,
- 34, 34, 34, 34, 34, 36, 37, 38, 38, 38, 33, 34, 34, 34, 34, 34, 35, 35,
- 35, 35, 35, 37, 38, 39, 39, 39, 33, 34, 34, 34, 34, 34, 35, 35, 35, 35,
- 35, 37, 38, 39, 39, 39, 33, 34, 34, 34, 34, 34, 35, 35, 35, 35, 35, 37,
- 38, 39, 39, 39,
- /* Size 32x32 */
- 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30,
- 30, 30, 30, 31, 31, 32, 33, 33, 33, 33, 33, 33, 33, 34, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, 30, 31,
- 31, 32, 33, 33, 33, 33, 33, 33, 33, 34, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, 31, 32, 32, 33, 34,
- 34, 34, 34, 34, 34, 34, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33, 34, 34, 34, 34, 34,
- 34, 35, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 32, 33, 33, 34, 34, 34, 34, 34, 34, 35, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 32, 32, 33, 34, 34, 34, 34, 34, 34, 34, 35, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 33,
- 34, 34, 34, 34, 34, 34, 34, 35, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 33, 34, 34, 34, 34,
- 34, 34, 34, 35, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 32, 32, 33, 34, 34, 34, 34, 34, 34, 34, 35,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 32, 32, 33, 34, 34, 34, 34, 34, 34, 34, 35, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32,
- 32, 33, 34, 34, 34, 34, 34, 34, 34, 35, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 33, 34, 35,
- 35, 35, 35, 35, 35, 35, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33, 34, 35, 35, 35, 35, 35,
- 35, 35, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 32, 33, 33, 34, 35, 35, 35, 35, 35, 35, 36, 30, 30,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 32, 33, 34, 34, 35, 35, 35, 35, 35, 35, 36, 30, 30, 30, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 33, 34,
- 34, 35, 35, 35, 35, 35, 35, 36, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 33, 34, 34, 35, 35, 35,
- 35, 35, 35, 36, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 32, 32, 32, 32, 32, 32, 32, 33, 34, 34, 35, 35, 35, 35, 35, 35, 36,
- 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32,
- 32, 32, 32, 32, 33, 34, 34, 35, 35, 35, 35, 35, 35, 36, 30, 30, 30, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32,
- 33, 34, 34, 35, 35, 35, 35, 35, 35, 36, 30, 30, 30, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 33, 34, 34, 35,
- 35, 35, 35, 35, 35, 36, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 34, 34, 35, 36, 36, 36, 36, 36,
- 36, 37, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 34, 34, 35, 36, 37, 37, 37, 37, 37, 37, 37, 32, 32,
- 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34,
- 34, 34, 35, 36, 37, 37, 37, 37, 37, 37, 37, 38, 33, 33, 33, 33, 33, 34,
- 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 35, 36, 37,
- 37, 38, 38, 38, 38, 38, 38, 39, 33, 33, 34, 34, 34, 34, 34, 34, 34, 34,
- 34, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 36, 37, 37, 38, 39, 39, 39,
- 39, 39, 39, 40, 33, 33, 34, 34, 34, 34, 34, 34, 34, 34, 34, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 36, 37, 37, 38, 39, 39, 39, 39, 39, 39, 40,
- 33, 33, 34, 34, 34, 34, 34, 34, 34, 34, 34, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 36, 37, 37, 38, 39, 39, 39, 39, 39, 39, 40, 33, 33, 34, 34,
- 34, 34, 34, 34, 34, 34, 34, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 36,
- 37, 37, 38, 39, 39, 39, 39, 39, 39, 40, 33, 33, 34, 34, 34, 34, 34, 34,
- 34, 34, 34, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 36, 37, 37, 38, 39,
- 39, 39, 39, 39, 39, 40, 33, 33, 34, 34, 34, 34, 34, 34, 34, 34, 34, 35,
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 36, 37, 37, 38, 39, 39, 39, 39, 39,
- 39, 40, 34, 34, 34, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 36, 36, 36,
- 36, 36, 36, 36, 36, 37, 37, 38, 39, 40, 40, 40, 40, 40, 40, 40,
- /* Size 4x8 */
- 31, 31, 31, 34, 31, 31, 31, 35, 31, 31, 31, 35, 31, 32, 32, 36, 31, 32,
- 32, 36, 31, 33, 33, 37, 34, 36, 36, 40, 34, 36, 36, 40,
- /* Size 8x4 */
- 31, 31, 31, 31, 31, 31, 34, 34, 31, 31, 31, 32, 32, 33, 36, 36, 31, 31,
- 31, 32, 32, 33, 36, 36, 34, 35, 35, 36, 36, 37, 40, 40,
- /* Size 8x16 */
- 32, 31, 31, 31, 31, 31, 33, 35, 31, 31, 31, 31, 31, 31, 33, 36, 31, 31,
- 31, 31, 31, 31, 34, 36, 31, 31, 31, 31, 31, 31, 34, 37, 31, 31, 31, 31,
- 31, 31, 34, 37, 31, 31, 31, 31, 31, 31, 34, 37, 31, 31, 31, 32, 32, 32,
- 34, 37, 30, 31, 31, 32, 32, 32, 34, 38, 30, 31, 32, 32, 32, 32, 35, 38,
- 30, 31, 32, 32, 32, 32, 35, 38, 30, 31, 32, 32, 32, 32, 35, 38, 31, 32,
- 33, 33, 33, 33, 36, 39, 33, 34, 34, 35, 35, 35, 37, 40, 33, 34, 35, 36,
- 36, 36, 38, 41, 33, 34, 35, 36, 36, 36, 38, 41, 33, 34, 35, 36, 36, 36,
- 38, 41,
- /* Size 16x8 */
- 32, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 31, 33, 33, 33, 33, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 34, 34, 34, 34, 31, 31, 31, 31,
- 31, 31, 31, 31, 32, 32, 32, 33, 34, 35, 35, 35, 31, 31, 31, 31, 31, 31,
- 32, 32, 32, 32, 32, 33, 35, 36, 36, 36, 31, 31, 31, 31, 31, 31, 32, 32,
- 32, 32, 32, 33, 35, 36, 36, 36, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32,
- 32, 33, 35, 36, 36, 36, 33, 33, 34, 34, 34, 34, 34, 34, 35, 35, 35, 36,
- 37, 38, 38, 38, 35, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 40, 41,
- 41, 41,
- /* Size 16x32 */
- 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 34, 35, 37, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 34, 35, 37, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 32, 33, 34, 36, 37, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 32, 33, 35, 36, 38, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 32, 34, 35, 36, 38, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 33, 34, 35, 37, 38, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 33,
- 34, 35, 37, 38, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 33, 34, 35,
- 37, 38, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 33, 34, 35, 37, 38,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 33, 34, 35, 37, 38, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 33, 34, 35, 37, 38, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 33, 34, 35, 37, 38, 31, 31, 31, 31, 31, 32,
- 32, 32, 32, 32, 32, 33, 34, 36, 37, 39, 31, 31, 31, 31, 31, 32, 32, 32,
- 32, 32, 32, 33, 34, 36, 37, 39, 30, 31, 31, 31, 31, 32, 32, 32, 32, 32,
- 32, 33, 34, 36, 38, 39, 30, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 33,
- 35, 36, 38, 40, 30, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 33, 35, 36,
- 38, 40, 30, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 33, 35, 36, 38, 40,
- 30, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 33, 35, 36, 38, 40, 30, 31,
- 31, 31, 32, 32, 32, 32, 32, 32, 32, 33, 35, 36, 38, 40, 30, 31, 31, 31,
- 32, 32, 32, 32, 32, 32, 32, 33, 35, 36, 38, 40, 31, 31, 31, 32, 32, 33,
- 33, 33, 33, 33, 33, 34, 35, 37, 38, 40, 31, 32, 32, 33, 33, 33, 33, 33,
- 33, 33, 33, 35, 36, 37, 39, 41, 32, 32, 33, 33, 34, 34, 34, 34, 34, 34,
- 34, 35, 37, 38, 40, 41, 33, 33, 34, 34, 34, 35, 35, 35, 35, 35, 35, 36,
- 37, 39, 40, 42, 33, 34, 34, 35, 35, 36, 36, 36, 36, 36, 36, 37, 38, 40,
- 41, 43, 33, 34, 34, 35, 35, 36, 36, 36, 36, 36, 36, 37, 38, 40, 41, 43,
- 33, 34, 34, 35, 35, 36, 36, 36, 36, 36, 36, 37, 38, 40, 41, 43, 33, 34,
- 34, 35, 35, 36, 36, 36, 36, 36, 36, 37, 38, 40, 41, 43, 33, 34, 34, 35,
- 35, 36, 36, 36, 36, 36, 36, 37, 38, 40, 41, 43, 33, 34, 34, 35, 35, 36,
- 36, 36, 36, 36, 36, 37, 38, 40, 41, 43, 34, 34, 35, 35, 36, 36, 36, 36,
- 36, 36, 36, 38, 39, 40, 42, 44,
- /* Size 32x16 */
- 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30,
- 30, 30, 30, 31, 31, 32, 33, 33, 33, 33, 33, 33, 33, 34, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 32, 32, 33, 34, 34, 34, 34, 34, 34, 34, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 34, 34,
- 34, 34, 34, 34, 34, 35, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33, 34, 35, 35, 35, 35, 35,
- 35, 35, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32,
- 32, 32, 32, 32, 32, 32, 33, 34, 34, 35, 35, 35, 35, 35, 35, 36, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 33, 33, 34, 35, 36, 36, 36, 36, 36, 36, 36, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 34,
- 35, 36, 36, 36, 36, 36, 36, 36, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 34, 35, 36, 36, 36,
- 36, 36, 36, 36, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 33, 33, 34, 35, 36, 36, 36, 36, 36, 36, 36,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 33, 33, 34, 35, 36, 36, 36, 36, 36, 36, 36, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33,
- 33, 34, 35, 36, 36, 36, 36, 36, 36, 36, 32, 32, 32, 32, 32, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 35, 35, 36, 37,
- 37, 37, 37, 37, 37, 38, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 34, 34,
- 34, 34, 34, 35, 35, 35, 35, 35, 35, 35, 36, 37, 37, 38, 38, 38, 38, 38,
- 38, 39, 34, 34, 34, 35, 35, 35, 35, 35, 35, 35, 35, 35, 36, 36, 36, 36,
- 36, 36, 36, 36, 36, 37, 37, 38, 39, 40, 40, 40, 40, 40, 40, 40, 35, 35,
- 36, 36, 36, 37, 37, 37, 37, 37, 37, 37, 37, 37, 38, 38, 38, 38, 38, 38,
- 38, 38, 39, 40, 40, 41, 41, 41, 41, 41, 41, 42, 37, 37, 37, 38, 38, 38,
- 38, 38, 38, 38, 38, 38, 39, 39, 39, 40, 40, 40, 40, 40, 40, 40, 41, 41,
- 42, 43, 43, 43, 43, 43, 43, 44,
- /* Size 4x16 */
- 31, 31, 31, 34, 31, 31, 31, 34, 31, 31, 31, 35, 31, 31, 31, 35, 31, 31,
- 31, 35, 31, 31, 31, 35, 31, 32, 32, 36, 31, 32, 32, 36, 31, 32, 32, 36,
- 31, 32, 32, 36, 31, 32, 32, 36, 32, 33, 33, 37, 33, 35, 35, 39, 34, 36,
- 36, 40, 34, 36, 36, 40, 34, 36, 36, 40,
- /* Size 16x4 */
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 34, 34, 34, 31, 31,
- 31, 31, 31, 31, 32, 32, 32, 32, 32, 33, 35, 36, 36, 36, 31, 31, 31, 31,
- 31, 31, 32, 32, 32, 32, 32, 33, 35, 36, 36, 36, 34, 34, 35, 35, 35, 35,
- 36, 36, 36, 36, 36, 37, 39, 40, 40, 40,
- /* Size 8x32 */
- 32, 31, 31, 31, 31, 31, 33, 35, 31, 31, 31, 31, 31, 31, 33, 35, 31, 31,
- 31, 31, 31, 31, 33, 36, 31, 31, 31, 31, 31, 31, 33, 36, 31, 31, 31, 31,
- 31, 31, 34, 36, 31, 31, 31, 31, 31, 31, 34, 37, 31, 31, 31, 31, 31, 31,
- 34, 37, 31, 31, 31, 31, 31, 31, 34, 37, 31, 31, 31, 31, 31, 31, 34, 37,
- 31, 31, 31, 31, 31, 31, 34, 37, 31, 31, 31, 31, 31, 31, 34, 37, 31, 31,
- 31, 31, 31, 31, 34, 37, 31, 31, 31, 32, 32, 32, 34, 37, 31, 31, 31, 32,
- 32, 32, 34, 37, 30, 31, 31, 32, 32, 32, 34, 38, 30, 31, 32, 32, 32, 32,
- 35, 38, 30, 31, 32, 32, 32, 32, 35, 38, 30, 31, 32, 32, 32, 32, 35, 38,
- 30, 31, 32, 32, 32, 32, 35, 38, 30, 31, 32, 32, 32, 32, 35, 38, 30, 31,
- 32, 32, 32, 32, 35, 38, 31, 31, 32, 33, 33, 33, 35, 38, 31, 32, 33, 33,
- 33, 33, 36, 39, 32, 33, 34, 34, 34, 34, 37, 40, 33, 34, 34, 35, 35, 35,
- 37, 40, 33, 34, 35, 36, 36, 36, 38, 41, 33, 34, 35, 36, 36, 36, 38, 41,
- 33, 34, 35, 36, 36, 36, 38, 41, 33, 34, 35, 36, 36, 36, 38, 41, 33, 34,
- 35, 36, 36, 36, 38, 41, 33, 34, 35, 36, 36, 36, 38, 41, 34, 35, 36, 36,
- 36, 36, 39, 42,
- /* Size 32x8 */
- 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30,
- 30, 30, 30, 31, 31, 32, 33, 33, 33, 33, 33, 33, 33, 34, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 32, 33, 34, 34, 34, 34, 34, 34, 34, 35, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 33, 34, 34, 35,
- 35, 35, 35, 35, 35, 36, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 34, 35, 36, 36, 36, 36, 36,
- 36, 36, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 33, 33, 34, 35, 36, 36, 36, 36, 36, 36, 36, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 33, 33, 34, 35, 36, 36, 36, 36, 36, 36, 36, 33, 33, 33, 33, 34, 34,
- 34, 34, 34, 34, 34, 34, 34, 34, 34, 35, 35, 35, 35, 35, 35, 35, 36, 37,
- 37, 38, 38, 38, 38, 38, 38, 39, 35, 35, 36, 36, 36, 37, 37, 37, 37, 37,
- 37, 37, 37, 37, 38, 38, 38, 38, 38, 38, 38, 38, 39, 40, 40, 41, 41, 41,
- 41, 41, 41, 42 },
- },
- {
- { /* Luma */
- /* Size 4x4 */
- 31, 31, 31, 31, 31, 32, 32, 32, 31, 32, 32, 32, 31, 32, 32, 32,
- /* Size 8x8 */
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32,
- 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32,
- 32, 32, 31, 31, 32, 32, 32, 32, 32, 32,
- /* Size 16x16 */
- 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31,
- 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32,
- /* Size 32x32 */
- 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32,
- 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31,
- 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31,
- 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31,
- 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- /* Size 4x8 */
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 31, 32, 32, 32, 31, 32,
- 32, 32, 31, 32, 32, 32, 31, 32, 32, 32, 31, 32, 32, 32,
- /* Size 8x4 */
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 31, 31,
- 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32,
- /* Size 8x16 */
- 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 31, 31, 31, 32,
- 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32,
- 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32,
- 31, 31, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 31, 31,
- 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32,
- 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32,
- 32, 32,
- /* Size 16x8 */
- 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32,
- /* Size 16x32 */
- 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32,
- 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31,
- 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31,
- 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31,
- 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32,
- /* Size 32x16 */
- 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32,
- 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31,
- 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31,
- 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31,
- 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31,
- 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32,
- /* Size 4x16 */
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 31, 32,
- 32, 32, 31, 32, 32, 32, 31, 32, 32, 32, 31, 32, 32, 32, 31, 32, 32, 32,
- 31, 32, 32, 32, 31, 32, 32, 32, 31, 32, 32, 32, 31, 32, 32, 32, 31, 32,
- 32, 32, 31, 32, 32, 32, 31, 32, 32, 32,
- /* Size 16x4 */
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- /* Size 8x32 */
- 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32,
- 32, 32, 31, 31, 31, 32, 32, 32, 32, 32, 31, 31, 31, 32, 32, 32, 32, 32,
- 31, 31, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 31, 31,
- 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32,
- 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32,
- 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32,
- 31, 31, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 31, 31,
- 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32,
- 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32,
- 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32,
- 31, 31, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 31, 31,
- 32, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32, 32, 32, 31, 31, 32, 32,
- 32, 32, 32, 32,
- /* Size 32x8 */
- 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31,
- 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32 },
- { /* Chroma */
- /* Size 4x4 */
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- /* Size 8x8 */
- 31, 31, 31, 31, 31, 31, 31, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 30, 31, 31, 31, 31, 31, 31, 31,
- /* Size 16x16 */
- 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 32,
- /* Size 32x32 */
- 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 30, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30,
- 30, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 32, 32, 30, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32,
- /* Size 4x8 */
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 30, 31, 32, 32,
- /* Size 8x4 */
- 31, 31, 31, 31, 31, 31, 31, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 32, 32, 31, 31, 31, 31, 31, 31, 32, 32,
- /* Size 8x16 */
- 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 31, 31, 31, 31,
- 31, 32, 32, 32, 30, 31, 31, 31, 31, 32, 32, 32, 30, 31, 31, 31, 32, 32,
- 32, 32,
- /* Size 16x8 */
- 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32,
- 32, 32,
- /* Size 16x32 */
- 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32,
- 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32,
- 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32,
- 30, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 30, 31,
- 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 30, 30, 31, 31,
- 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 30, 30, 31, 31, 31, 31,
- 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 30, 30, 31, 31, 31, 31, 31, 31,
- 32, 32, 32, 32, 32, 32, 32, 32,
- /* Size 32x16 */
- 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32,
- 32, 32, 32, 32, 32, 32, 32, 32,
- /* Size 4x16 */
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 31, 31,
- 32, 32, 31, 31, 32, 32, 30, 31, 32, 32,
- /* Size 16x4 */
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 32, 32, 32, 32,
- /* Size 8x32 */
- 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 31, 31, 31, 31, 31, 32,
- 32, 32, 31, 31, 31, 31, 31, 32, 32, 32, 31, 31, 31, 31, 31, 32, 32, 32,
- 30, 31, 31, 31, 31, 32, 32, 32, 30, 31, 31, 31, 31, 32, 32, 32, 30, 31,
- 31, 31, 32, 32, 32, 32, 30, 31, 31, 31, 32, 32, 32, 32, 30, 31, 31, 31,
- 32, 32, 32, 32,
- /* Size 32x8 */
- 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32,
- 32, 32, 32, 32 },
- },
- {
- { /* Luma */
- /* Size 4x4 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- /* Size 8x8 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- /* Size 16x16 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32,
- /* Size 32x32 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- /* Size 4x8 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- /* Size 8x4 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- /* Size 8x16 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32,
- /* Size 16x8 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32,
- /* Size 16x32 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32,
- /* Size 32x16 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32,
- /* Size 4x16 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- /* Size 16x4 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- /* Size 8x32 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32,
- /* Size 32x8 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32 },
- { /* Chroma */
- /* Size 4x4 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- /* Size 8x8 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- /* Size 16x16 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32,
- /* Size 32x32 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- /* Size 4x8 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- /* Size 8x4 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- /* Size 8x16 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32,
- /* Size 16x8 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32,
- /* Size 16x32 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32,
- /* Size 32x16 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32,
- /* Size 4x16 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- /* Size 16x4 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- /* Size 8x32 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32,
- /* Size 32x8 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32 },
- },
-};
-
-static const qm_val_t wt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
- {
- { /* Luma */
- /* Size 4x4 */
- 32, 24, 14, 11, 24, 15, 11, 9, 14, 11, 7, 7, 11, 9, 7, 5,
- /* Size 8x8 */
- 32, 32, 27, 20, 15, 12, 11, 9, 32, 29, 26, 21, 16, 13, 12, 10, 27, 26,
- 19, 16, 13, 11, 10, 10, 20, 21, 16, 12, 11, 9, 9, 8, 15, 16, 13, 11, 9,
- 8, 7, 7, 12, 13, 11, 9, 8, 7, 6, 6, 11, 12, 10, 9, 7, 6, 6, 5, 9, 10,
- 10, 8, 7, 6, 5, 5,
- /* Size 16x16 */
- 32, 33, 33, 30, 28, 23, 21, 17, 16, 13, 12, 11, 11, 10, 9, 9, 33, 32,
- 32, 31, 30, 25, 23, 19, 17, 14, 14, 12, 11, 11, 10, 9, 33, 32, 31, 29,
- 28, 24, 23, 19, 17, 14, 14, 13, 12, 11, 10, 10, 30, 31, 29, 26, 24, 22,
- 20, 18, 16, 14, 13, 13, 12, 11, 11, 10, 28, 30, 28, 24, 21, 19, 18, 16,
- 15, 13, 13, 12, 11, 11, 10, 10, 23, 25, 24, 22, 19, 16, 15, 14, 13, 11,
- 11, 11, 10, 10, 9, 9, 21, 23, 23, 20, 18, 15, 14, 13, 12, 11, 10, 10, 9,
- 9, 9, 9, 17, 19, 19, 18, 16, 14, 13, 11, 10, 9, 9, 9, 9, 8, 8, 8, 16,
- 17, 17, 16, 15, 13, 12, 10, 10, 9, 8, 8, 8, 8, 8, 7, 13, 14, 14, 14, 13,
- 11, 11, 9, 9, 8, 7, 7, 7, 7, 7, 7, 12, 14, 14, 13, 13, 11, 10, 9, 8, 7,
- 7, 7, 7, 7, 6, 6, 11, 12, 13, 13, 12, 11, 10, 9, 8, 7, 7, 6, 6, 6, 6, 6,
- 11, 11, 12, 12, 11, 10, 9, 9, 8, 7, 7, 6, 6, 6, 5, 5, 10, 11, 11, 11,
- 11, 10, 9, 8, 8, 7, 7, 6, 6, 5, 5, 5, 9, 10, 10, 11, 10, 9, 9, 8, 8, 7,
- 6, 6, 5, 5, 5, 5, 9, 9, 10, 10, 10, 9, 9, 8, 7, 7, 6, 6, 5, 5, 5, 4,
- /* Size 32x32 */
- 32, 33, 33, 33, 33, 32, 30, 29, 28, 26, 23, 22, 21, 19, 17, 17, 16, 14,
- 13, 13, 12, 12, 11, 11, 11, 10, 10, 10, 9, 9, 9, 8, 33, 32, 32, 32, 32,
- 32, 30, 30, 29, 27, 24, 23, 22, 20, 18, 17, 17, 15, 13, 13, 13, 12, 12,
- 12, 11, 11, 10, 10, 10, 9, 9, 9, 33, 32, 32, 32, 32, 32, 31, 30, 30, 28,
- 25, 24, 23, 21, 19, 18, 17, 16, 14, 14, 14, 13, 12, 12, 11, 11, 11, 10,
- 10, 9, 9, 9, 33, 32, 32, 32, 31, 31, 30, 29, 29, 27, 25, 24, 23, 21, 19,
- 18, 17, 16, 14, 14, 14, 13, 12, 12, 12, 11, 11, 11, 10, 10, 10, 9, 33,
- 32, 32, 31, 31, 30, 29, 28, 28, 26, 24, 23, 23, 20, 19, 18, 17, 16, 14,
- 14, 14, 13, 13, 12, 12, 12, 11, 11, 10, 10, 10, 9, 32, 32, 32, 31, 30,
- 29, 28, 28, 27, 26, 24, 23, 22, 21, 19, 19, 18, 16, 15, 15, 14, 13, 13,
- 12, 12, 12, 11, 11, 10, 10, 10, 9, 30, 30, 31, 30, 29, 28, 26, 25, 24,
- 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 14, 13, 13, 13, 12, 12, 12, 11,
- 11, 11, 10, 10, 9, 29, 30, 30, 29, 28, 28, 25, 24, 23, 22, 20, 20, 19,
- 18, 17, 16, 16, 15, 13, 13, 13, 12, 12, 12, 12, 11, 11, 11, 11, 10, 10,
- 10, 28, 29, 30, 29, 28, 27, 24, 23, 21, 20, 19, 19, 18, 17, 16, 16, 15,
- 14, 13, 13, 13, 12, 12, 11, 11, 11, 11, 10, 10, 10, 10, 10, 26, 27, 28,
- 27, 26, 26, 23, 22, 20, 19, 18, 17, 17, 16, 15, 14, 14, 13, 12, 12, 12,
- 11, 11, 11, 11, 10, 10, 10, 10, 10, 9, 9, 23, 24, 25, 25, 24, 24, 22,
- 20, 19, 18, 16, 16, 15, 14, 14, 13, 13, 12, 11, 11, 11, 11, 11, 11, 10,
- 10, 10, 10, 9, 9, 9, 9, 22, 23, 24, 24, 23, 23, 21, 20, 19, 17, 16, 15,
- 15, 14, 13, 13, 12, 12, 11, 11, 11, 10, 10, 10, 10, 10, 9, 9, 9, 9, 9,
- 8, 21, 22, 23, 23, 23, 22, 20, 19, 18, 17, 15, 15, 14, 13, 13, 12, 12,
- 11, 11, 11, 10, 10, 10, 10, 9, 9, 9, 9, 9, 9, 9, 8, 19, 20, 21, 21, 20,
- 21, 19, 18, 17, 16, 14, 14, 13, 12, 12, 12, 11, 11, 10, 10, 10, 9, 9, 9,
- 9, 9, 9, 9, 9, 8, 8, 8, 17, 18, 19, 19, 19, 19, 18, 17, 16, 15, 14, 13,
- 13, 12, 11, 11, 10, 10, 9, 9, 9, 9, 9, 9, 9, 9, 8, 8, 8, 8, 8, 8, 17,
- 17, 18, 18, 18, 19, 17, 16, 16, 14, 13, 13, 12, 12, 11, 10, 10, 10, 9,
- 9, 9, 9, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 16, 17, 17, 17, 17, 18, 16, 16,
- 15, 14, 13, 12, 12, 11, 10, 10, 10, 9, 9, 9, 8, 8, 8, 8, 8, 8, 8, 8, 8,
- 7, 7, 7, 14, 15, 16, 16, 16, 16, 15, 15, 14, 13, 12, 12, 11, 11, 10, 10,
- 9, 9, 8, 8, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 13, 13, 14, 14, 14, 15,
- 14, 13, 13, 12, 11, 11, 11, 10, 9, 9, 9, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 13, 13, 14, 14, 14, 15, 14, 13, 13, 12, 11, 11, 11, 10,
- 9, 9, 9, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 12, 13, 14, 14,
- 14, 14, 13, 13, 13, 12, 11, 11, 10, 10, 9, 9, 8, 8, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 6, 6, 6, 6, 6, 12, 12, 13, 13, 13, 13, 13, 12, 12, 11, 11, 10, 10,
- 9, 9, 9, 8, 8, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 11, 12, 12, 12,
- 13, 13, 13, 12, 12, 11, 11, 10, 10, 9, 9, 8, 8, 8, 7, 7, 7, 7, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 11, 12, 12, 12, 12, 12, 12, 12, 11, 11, 11, 10, 10,
- 9, 9, 8, 8, 8, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 11, 11, 11, 12,
- 12, 12, 12, 12, 11, 11, 10, 10, 9, 9, 9, 8, 8, 8, 7, 7, 7, 6, 6, 6, 6,
- 6, 6, 5, 5, 5, 5, 5, 10, 11, 11, 11, 12, 12, 12, 11, 11, 10, 10, 10, 9,
- 9, 9, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 10, 10, 11, 11,
- 11, 11, 11, 11, 11, 10, 10, 9, 9, 9, 8, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, 6,
- 5, 5, 5, 5, 5, 5, 10, 10, 10, 11, 11, 11, 11, 11, 10, 10, 10, 9, 9, 9,
- 8, 8, 8, 7, 7, 7, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 9, 10, 10, 10, 10,
- 10, 11, 11, 10, 10, 9, 9, 9, 9, 8, 8, 8, 7, 7, 7, 6, 6, 6, 6, 5, 5, 5,
- 5, 5, 5, 5, 5, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 9, 9, 9, 8, 8, 8, 7,
- 7, 7, 7, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 4, 9, 9, 9, 10, 10, 10, 10,
- 10, 10, 9, 9, 9, 9, 8, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
- 4, 4, 8, 9, 9, 9, 9, 9, 9, 10, 10, 9, 9, 8, 8, 8, 8, 7, 7, 7, 7, 6, 6,
- 6, 6, 5, 5, 5, 5, 5, 5, 4, 4, 4,
- /* Size 4x8 */
- 32, 24, 14, 11, 31, 24, 15, 12, 28, 18, 12, 11, 21, 14, 10, 9, 16, 12,
- 8, 8, 13, 11, 7, 7, 11, 10, 7, 6, 10, 9, 7, 5,
- /* Size 8x4 */
- 32, 31, 28, 21, 16, 13, 11, 10, 24, 24, 18, 14, 12, 11, 10, 9, 14, 15,
- 12, 10, 8, 7, 7, 7, 11, 12, 11, 9, 8, 7, 6, 5,
- /* Size 8x16 */
- 32, 32, 28, 19, 16, 12, 11, 10, 33, 31, 30, 21, 17, 13, 12, 11, 32, 30,
- 28, 20, 17, 13, 12, 12, 30, 28, 24, 19, 16, 13, 13, 12, 28, 27, 21, 17,
- 15, 12, 12, 11, 23, 24, 19, 14, 13, 11, 11, 11, 21, 22, 18, 13, 12, 10,
- 10, 10, 18, 19, 16, 12, 10, 9, 9, 9, 16, 18, 15, 11, 10, 8, 8, 8, 13,
- 15, 13, 10, 9, 7, 8, 8, 12, 14, 13, 10, 8, 7, 7, 7, 11, 13, 12, 10, 8,
- 7, 6, 6, 11, 12, 11, 10, 8, 7, 6, 6, 10, 11, 10, 9, 8, 7, 6, 6, 9, 10,
- 10, 9, 7, 6, 6, 5, 9, 10, 10, 9, 8, 7, 6, 5,
- /* Size 16x8 */
- 32, 33, 32, 30, 28, 23, 21, 18, 16, 13, 12, 11, 11, 10, 9, 9, 32, 31,
- 30, 28, 27, 24, 22, 19, 18, 15, 14, 13, 12, 11, 10, 10, 28, 30, 28, 24,
- 21, 19, 18, 16, 15, 13, 13, 12, 11, 10, 10, 10, 19, 21, 20, 19, 17, 14,
- 13, 12, 11, 10, 10, 10, 10, 9, 9, 9, 16, 17, 17, 16, 15, 13, 12, 10, 10,
- 9, 8, 8, 8, 8, 7, 8, 12, 13, 13, 13, 12, 11, 10, 9, 8, 7, 7, 7, 7, 7, 6,
- 7, 11, 12, 12, 13, 12, 11, 10, 9, 8, 8, 7, 6, 6, 6, 6, 6, 10, 11, 12,
- 12, 11, 11, 10, 9, 8, 8, 7, 6, 6, 6, 5, 5,
- /* Size 16x32 */
- 32, 33, 32, 30, 28, 23, 19, 17, 16, 13, 12, 11, 11, 11, 10, 10, 33, 32,
- 32, 30, 29, 24, 20, 18, 17, 14, 12, 12, 12, 11, 11, 11, 33, 32, 31, 31,
- 30, 25, 21, 19, 17, 14, 13, 12, 12, 11, 11, 11, 33, 32, 31, 30, 29, 25,
- 21, 19, 17, 14, 13, 13, 12, 12, 11, 11, 32, 32, 30, 29, 28, 24, 20, 19,
- 17, 14, 13, 13, 12, 12, 12, 11, 32, 31, 29, 28, 27, 24, 21, 19, 18, 15,
- 14, 13, 12, 12, 12, 11, 30, 30, 28, 26, 24, 21, 19, 18, 16, 14, 13, 13,
- 13, 12, 12, 11, 29, 30, 28, 25, 23, 20, 18, 17, 16, 13, 12, 12, 12, 12,
- 12, 11, 28, 30, 27, 24, 21, 19, 17, 16, 15, 13, 12, 12, 12, 12, 11, 11,
- 26, 28, 26, 23, 20, 18, 16, 15, 14, 12, 12, 12, 11, 11, 11, 11, 23, 25,
- 24, 21, 19, 16, 14, 14, 13, 11, 11, 11, 11, 11, 11, 11, 22, 24, 23, 21,
- 19, 16, 14, 13, 12, 11, 10, 10, 10, 10, 10, 10, 21, 23, 22, 20, 18, 15,
- 13, 13, 12, 11, 10, 10, 10, 10, 10, 10, 19, 21, 20, 19, 17, 14, 12, 12,
- 11, 10, 9, 10, 10, 9, 10, 9, 18, 19, 19, 18, 16, 14, 12, 11, 10, 9, 9,
- 9, 9, 9, 9, 9, 17, 18, 18, 17, 16, 13, 12, 11, 10, 9, 9, 9, 9, 9, 9, 9,
- 16, 17, 18, 16, 15, 13, 11, 10, 10, 9, 8, 8, 8, 8, 8, 8, 14, 16, 16, 15,
- 14, 12, 11, 10, 9, 8, 8, 8, 8, 8, 8, 8, 13, 14, 15, 14, 13, 11, 10, 9,
- 9, 8, 7, 8, 8, 8, 8, 8, 13, 14, 14, 14, 13, 11, 10, 9, 9, 8, 7, 7, 7, 7,
- 7, 7, 12, 14, 14, 13, 13, 11, 10, 9, 8, 8, 7, 7, 7, 7, 7, 7, 12, 13, 13,
- 13, 12, 11, 9, 9, 8, 7, 7, 7, 7, 7, 7, 7, 11, 12, 13, 13, 12, 10, 10, 9,
- 8, 7, 7, 7, 6, 6, 6, 7, 11, 12, 12, 12, 11, 10, 10, 9, 8, 7, 7, 6, 6, 6,
- 6, 6, 11, 12, 12, 12, 11, 10, 10, 8, 8, 7, 7, 6, 6, 6, 6, 6, 10, 11, 12,
- 12, 11, 10, 9, 8, 8, 7, 7, 6, 6, 6, 6, 6, 10, 11, 11, 11, 10, 10, 9, 9,
- 8, 7, 7, 6, 6, 6, 6, 6, 10, 11, 11, 11, 10, 10, 9, 9, 8, 7, 7, 6, 6, 5,
- 5, 5, 9, 10, 10, 11, 10, 9, 9, 8, 7, 7, 6, 6, 6, 5, 5, 5, 9, 10, 10, 10,
- 10, 9, 9, 8, 7, 7, 6, 6, 6, 5, 5, 5, 9, 9, 10, 10, 10, 9, 9, 8, 8, 7, 7,
- 6, 6, 5, 5, 5, 8, 9, 9, 10, 10, 9, 9, 8, 8, 7, 7, 6, 6, 5, 5, 5,
- /* Size 32x16 */
- 32, 33, 33, 33, 32, 32, 30, 29, 28, 26, 23, 22, 21, 19, 18, 17, 16, 14,
- 13, 13, 12, 12, 11, 11, 11, 10, 10, 10, 9, 9, 9, 8, 33, 32, 32, 32, 32,
- 31, 30, 30, 30, 28, 25, 24, 23, 21, 19, 18, 17, 16, 14, 14, 14, 13, 12,
- 12, 12, 11, 11, 11, 10, 10, 9, 9, 32, 32, 31, 31, 30, 29, 28, 28, 27,
- 26, 24, 23, 22, 20, 19, 18, 18, 16, 15, 14, 14, 13, 13, 12, 12, 12, 11,
- 11, 10, 10, 10, 9, 30, 30, 31, 30, 29, 28, 26, 25, 24, 23, 21, 21, 20,
- 19, 18, 17, 16, 15, 14, 14, 13, 13, 13, 12, 12, 12, 11, 11, 11, 10, 10,
- 10, 28, 29, 30, 29, 28, 27, 24, 23, 21, 20, 19, 19, 18, 17, 16, 16, 15,
- 14, 13, 13, 13, 12, 12, 11, 11, 11, 10, 10, 10, 10, 10, 10, 23, 24, 25,
- 25, 24, 24, 21, 20, 19, 18, 16, 16, 15, 14, 14, 13, 13, 12, 11, 11, 11,
- 11, 10, 10, 10, 10, 10, 10, 9, 9, 9, 9, 19, 20, 21, 21, 20, 21, 19, 18,
- 17, 16, 14, 14, 13, 12, 12, 12, 11, 11, 10, 10, 10, 9, 10, 10, 10, 9, 9,
- 9, 9, 9, 9, 9, 17, 18, 19, 19, 19, 19, 18, 17, 16, 15, 14, 13, 13, 12,
- 11, 11, 10, 10, 9, 9, 9, 9, 9, 9, 8, 8, 9, 9, 8, 8, 8, 8, 16, 17, 17,
- 17, 17, 18, 16, 16, 15, 14, 13, 12, 12, 11, 10, 10, 10, 9, 9, 9, 8, 8,
- 8, 8, 8, 8, 8, 8, 7, 7, 8, 8, 13, 14, 14, 14, 14, 15, 14, 13, 13, 12,
- 11, 11, 11, 10, 9, 9, 9, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 12, 12, 13, 13, 13, 14, 13, 12, 12, 12, 11, 10, 10, 9, 9, 9, 8, 8, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 7, 7, 11, 12, 12, 13, 13, 13, 13, 12, 12,
- 12, 11, 10, 10, 10, 9, 9, 8, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 11, 12, 12, 12, 12, 12, 13, 12, 12, 11, 11, 10, 10, 10, 9, 9, 8, 8,
- 8, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 11, 11, 11, 12, 12, 12, 12,
- 12, 12, 11, 11, 10, 10, 9, 9, 9, 8, 8, 8, 7, 7, 7, 6, 6, 6, 6, 6, 5, 5,
- 5, 5, 5, 10, 11, 11, 11, 12, 12, 12, 12, 11, 11, 11, 10, 10, 10, 9, 9,
- 8, 8, 8, 7, 7, 7, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 10, 11, 11, 11, 11, 11,
- 11, 11, 11, 11, 11, 10, 10, 9, 9, 9, 8, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, 5,
- 5, 5, 5, 5,
- /* Size 4x16 */
- 33, 23, 13, 11, 32, 25, 14, 11, 32, 24, 14, 12, 30, 21, 14, 12, 30, 19,
- 13, 12, 25, 16, 11, 11, 23, 15, 11, 10, 19, 14, 9, 9, 17, 13, 9, 8, 14,
- 11, 8, 8, 14, 11, 8, 7, 12, 10, 7, 6, 12, 10, 7, 6, 11, 10, 7, 6, 10, 9,
- 7, 5, 9, 9, 7, 5,
- /* Size 16x4 */
- 33, 32, 32, 30, 30, 25, 23, 19, 17, 14, 14, 12, 12, 11, 10, 9, 23, 25,
- 24, 21, 19, 16, 15, 14, 13, 11, 11, 10, 10, 10, 9, 9, 13, 14, 14, 14,
- 13, 11, 11, 9, 9, 8, 8, 7, 7, 7, 7, 7, 11, 11, 12, 12, 12, 11, 10, 9, 8,
- 8, 7, 6, 6, 6, 5, 5,
- /* Size 8x32 */
- 32, 32, 28, 19, 16, 12, 11, 10, 33, 32, 29, 20, 17, 12, 12, 11, 33, 31,
- 30, 21, 17, 13, 12, 11, 33, 31, 29, 21, 17, 13, 12, 11, 32, 30, 28, 20,
- 17, 13, 12, 12, 32, 29, 27, 21, 18, 14, 12, 12, 30, 28, 24, 19, 16, 13,
- 13, 12, 29, 28, 23, 18, 16, 12, 12, 12, 28, 27, 21, 17, 15, 12, 12, 11,
- 26, 26, 20, 16, 14, 12, 11, 11, 23, 24, 19, 14, 13, 11, 11, 11, 22, 23,
- 19, 14, 12, 10, 10, 10, 21, 22, 18, 13, 12, 10, 10, 10, 19, 20, 17, 12,
- 11, 9, 10, 10, 18, 19, 16, 12, 10, 9, 9, 9, 17, 18, 16, 12, 10, 9, 9, 9,
- 16, 18, 15, 11, 10, 8, 8, 8, 14, 16, 14, 11, 9, 8, 8, 8, 13, 15, 13, 10,
- 9, 7, 8, 8, 13, 14, 13, 10, 9, 7, 7, 7, 12, 14, 13, 10, 8, 7, 7, 7, 12,
- 13, 12, 9, 8, 7, 7, 7, 11, 13, 12, 10, 8, 7, 6, 6, 11, 12, 11, 10, 8, 7,
- 6, 6, 11, 12, 11, 10, 8, 7, 6, 6, 10, 12, 11, 9, 8, 7, 6, 6, 10, 11, 10,
- 9, 8, 7, 6, 6, 10, 11, 10, 9, 8, 7, 6, 5, 9, 10, 10, 9, 7, 6, 6, 5, 9,
- 10, 10, 9, 7, 6, 6, 5, 9, 10, 10, 9, 8, 7, 6, 5, 8, 9, 10, 9, 8, 7, 6,
- 5,
- /* Size 32x8 */
- 32, 33, 33, 33, 32, 32, 30, 29, 28, 26, 23, 22, 21, 19, 18, 17, 16, 14,
- 13, 13, 12, 12, 11, 11, 11, 10, 10, 10, 9, 9, 9, 8, 32, 32, 31, 31, 30,
- 29, 28, 28, 27, 26, 24, 23, 22, 20, 19, 18, 18, 16, 15, 14, 14, 13, 13,
- 12, 12, 12, 11, 11, 10, 10, 10, 9, 28, 29, 30, 29, 28, 27, 24, 23, 21,
- 20, 19, 19, 18, 17, 16, 16, 15, 14, 13, 13, 13, 12, 12, 11, 11, 11, 10,
- 10, 10, 10, 10, 10, 19, 20, 21, 21, 20, 21, 19, 18, 17, 16, 14, 14, 13,
- 12, 12, 12, 11, 11, 10, 10, 10, 9, 10, 10, 10, 9, 9, 9, 9, 9, 9, 9, 16,
- 17, 17, 17, 17, 18, 16, 16, 15, 14, 13, 12, 12, 11, 10, 10, 10, 9, 9, 9,
- 8, 8, 8, 8, 8, 8, 8, 8, 7, 7, 8, 8, 12, 12, 13, 13, 13, 14, 13, 12, 12,
- 12, 11, 10, 10, 9, 9, 9, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 7, 7,
- 11, 12, 12, 12, 12, 12, 13, 12, 12, 11, 11, 10, 10, 10, 9, 9, 8, 8, 8,
- 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 10, 11, 11, 11, 12, 12, 12, 12,
- 11, 11, 11, 10, 10, 10, 9, 9, 8, 8, 8, 7, 7, 7, 6, 6, 6, 6, 6, 5, 5, 5,
- 5, 5 },
- { /* Chroma */
- /* Size 4x4 */
- 29, 22, 18, 16, 22, 17, 15, 14, 18, 15, 11, 11, 16, 14, 11, 9,
- /* Size 8x8 */
- 33, 27, 22, 20, 18, 16, 15, 14, 27, 22, 22, 22, 20, 18, 17, 15, 22, 22,
- 19, 18, 17, 16, 15, 15, 20, 22, 18, 16, 14, 13, 14, 14, 18, 20, 17, 14,
- 12, 12, 12, 12, 16, 18, 16, 13, 12, 11, 11, 11, 15, 17, 15, 14, 12, 11,
- 10, 10, 14, 15, 15, 14, 12, 11, 10, 9,
- /* Size 16x16 */
- 32, 34, 31, 25, 21, 21, 20, 19, 18, 16, 16, 15, 15, 14, 14, 13, 34, 32,
- 29, 24, 22, 23, 22, 21, 20, 18, 18, 17, 16, 15, 15, 14, 31, 29, 26, 23,
- 22, 23, 22, 21, 20, 18, 18, 17, 17, 16, 16, 15, 25, 24, 23, 21, 20, 21,
- 20, 20, 19, 18, 18, 17, 17, 17, 16, 15, 21, 22, 22, 20, 19, 19, 19, 19,
- 18, 17, 17, 16, 16, 16, 16, 16, 21, 23, 23, 21, 19, 18, 17, 17, 16, 15,
- 15, 15, 15, 15, 15, 15, 20, 22, 22, 20, 19, 17, 17, 16, 15, 14, 14, 14,
- 14, 14, 14, 14, 19, 21, 21, 20, 19, 17, 16, 14, 14, 13, 13, 13, 13, 13,
- 13, 13, 18, 20, 20, 19, 18, 16, 15, 14, 13, 12, 12, 12, 12, 12, 12, 12,
- 16, 18, 18, 18, 17, 15, 14, 13, 12, 12, 11, 11, 12, 12, 12, 12, 16, 18,
- 18, 18, 17, 15, 14, 13, 12, 11, 11, 11, 11, 11, 11, 11, 15, 17, 17, 17,
- 16, 15, 14, 13, 12, 11, 11, 10, 10, 10, 10, 10, 15, 16, 17, 17, 16, 15,
- 14, 13, 12, 12, 11, 10, 10, 10, 10, 10, 14, 15, 16, 17, 16, 15, 14, 13,
- 12, 12, 11, 10, 10, 10, 9, 9, 14, 15, 16, 16, 16, 15, 14, 13, 12, 12,
- 11, 10, 10, 9, 9, 9, 13, 14, 15, 15, 16, 15, 14, 13, 12, 12, 11, 10, 10,
- 9, 9, 9,
- /* Size 32x32 */
- 32, 33, 34, 32, 31, 28, 25, 23, 21, 21, 21, 20, 20, 20, 19, 18, 18, 17,
- 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 14, 14, 13, 13, 33, 33, 33, 31,
- 30, 27, 24, 23, 22, 22, 22, 22, 21, 20, 20, 19, 19, 18, 17, 17, 17, 16,
- 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 34, 33, 32, 31, 29, 26, 24, 23,
- 22, 23, 23, 23, 22, 22, 21, 20, 20, 19, 18, 18, 18, 17, 17, 16, 16, 16,
- 15, 15, 15, 14, 14, 14, 32, 31, 31, 29, 28, 25, 24, 23, 22, 22, 23, 22,
- 22, 22, 21, 20, 20, 19, 18, 18, 18, 17, 17, 17, 16, 16, 16, 16, 15, 15,
- 15, 15, 31, 30, 29, 28, 26, 24, 23, 22, 22, 22, 23, 22, 22, 22, 21, 20,
- 20, 19, 18, 18, 18, 17, 17, 17, 17, 16, 16, 16, 16, 15, 15, 15, 28, 27,
- 26, 25, 24, 22, 22, 22, 21, 22, 23, 22, 22, 22, 21, 21, 20, 20, 19, 19,
- 19, 18, 18, 17, 17, 17, 16, 16, 16, 15, 15, 15, 25, 24, 24, 24, 23, 22,
- 21, 21, 20, 21, 21, 21, 20, 20, 20, 20, 19, 19, 18, 18, 18, 17, 17, 17,
- 17, 17, 17, 16, 16, 16, 15, 15, 23, 23, 23, 23, 22, 22, 21, 20, 20, 20,
- 20, 20, 20, 20, 19, 19, 19, 18, 17, 17, 17, 17, 17, 17, 16, 16, 16, 16,
- 16, 16, 16, 16, 21, 22, 22, 22, 22, 21, 20, 20, 19, 19, 19, 19, 19, 19,
- 19, 18, 18, 18, 17, 17, 17, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
- 21, 22, 23, 22, 22, 22, 21, 20, 19, 19, 19, 18, 18, 18, 18, 17, 17, 17,
- 16, 16, 16, 16, 16, 16, 16, 15, 15, 15, 15, 15, 15, 15, 21, 22, 23, 23,
- 23, 23, 21, 20, 19, 19, 18, 17, 17, 17, 17, 16, 16, 16, 15, 15, 15, 15,
- 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 20, 22, 23, 22, 22, 22, 21, 20,
- 19, 18, 17, 17, 17, 17, 16, 16, 16, 15, 15, 15, 15, 15, 14, 14, 15, 15,
- 14, 14, 14, 14, 14, 14, 20, 21, 22, 22, 22, 22, 20, 20, 19, 18, 17, 17,
- 17, 16, 16, 16, 15, 15, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 20, 20, 22, 22, 22, 22, 20, 20, 19, 18, 17, 17, 16, 16, 15, 15,
- 15, 14, 14, 14, 14, 13, 14, 14, 13, 14, 14, 13, 14, 14, 13, 13, 19, 20,
- 21, 21, 21, 21, 20, 19, 19, 18, 17, 16, 16, 15, 14, 14, 14, 14, 13, 13,
- 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 18, 19, 20, 20, 20, 21,
- 20, 19, 18, 17, 16, 16, 16, 15, 14, 14, 14, 13, 13, 13, 13, 13, 12, 13,
- 13, 13, 13, 13, 13, 13, 13, 12, 18, 19, 20, 20, 20, 20, 19, 19, 18, 17,
- 16, 16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
- 12, 12, 12, 12, 17, 18, 19, 19, 19, 20, 19, 18, 18, 17, 16, 15, 15, 14,
- 14, 13, 13, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
- 16, 17, 18, 18, 18, 19, 18, 17, 17, 16, 15, 15, 14, 14, 13, 13, 12, 12,
- 12, 12, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 17, 18, 18,
- 18, 19, 18, 17, 17, 16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 11,
- 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 16, 17, 18, 18, 18, 19, 18, 17,
- 17, 16, 15, 15, 14, 14, 13, 13, 12, 12, 11, 11, 11, 11, 11, 11, 11, 11,
- 11, 11, 11, 11, 11, 11, 15, 16, 17, 17, 17, 18, 17, 17, 16, 16, 15, 15,
- 14, 13, 13, 13, 12, 12, 11, 11, 11, 11, 11, 11, 10, 11, 11, 11, 11, 11,
- 11, 11, 15, 16, 17, 17, 17, 18, 17, 17, 16, 16, 15, 14, 14, 14, 13, 12,
- 12, 12, 11, 11, 11, 11, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 15, 16,
- 16, 17, 17, 17, 17, 17, 16, 16, 15, 14, 14, 14, 13, 13, 12, 12, 12, 11,
- 11, 11, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 15, 16, 16, 16, 17, 17,
- 17, 16, 16, 16, 15, 15, 14, 13, 13, 13, 12, 12, 12, 11, 11, 10, 10, 10,
- 10, 10, 10, 10, 10, 10, 10, 10, 14, 15, 16, 16, 16, 17, 17, 16, 16, 15,
- 15, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 11, 10, 10, 10, 10, 10, 10,
- 10, 10, 10, 10, 14, 15, 15, 16, 16, 16, 17, 16, 16, 15, 15, 14, 14, 14,
- 13, 13, 12, 12, 12, 11, 11, 11, 10, 10, 10, 10, 10, 10, 9, 9, 9, 10, 14,
- 15, 15, 16, 16, 16, 16, 16, 16, 15, 15, 14, 14, 13, 13, 13, 12, 12, 12,
- 11, 11, 11, 10, 10, 10, 10, 10, 9, 9, 9, 9, 9, 14, 15, 15, 15, 16, 16,
- 16, 16, 16, 15, 15, 14, 14, 14, 13, 13, 12, 12, 12, 11, 11, 11, 10, 10,
- 10, 10, 9, 9, 9, 9, 9, 9, 14, 14, 14, 15, 15, 15, 16, 16, 16, 15, 15,
- 14, 14, 14, 13, 13, 12, 12, 12, 11, 11, 11, 10, 10, 10, 10, 9, 9, 9, 9,
- 9, 9, 13, 14, 14, 15, 15, 15, 15, 16, 16, 15, 15, 14, 14, 13, 13, 13,
- 12, 12, 12, 11, 11, 11, 10, 10, 10, 10, 9, 9, 9, 9, 9, 9, 13, 14, 14,
- 15, 15, 15, 15, 16, 16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 12, 11, 11,
- 11, 11, 10, 10, 10, 10, 9, 9, 9, 9, 9,
- /* Size 4x8 */
- 33, 22, 17, 16, 26, 23, 19, 17, 22, 18, 16, 16, 21, 17, 14, 14, 19, 16,
- 12, 12, 17, 15, 11, 11, 16, 15, 11, 10, 15, 14, 12, 10,
- /* Size 8x4 */
- 33, 26, 22, 21, 19, 17, 16, 15, 22, 23, 18, 17, 16, 15, 15, 14, 17, 19,
- 16, 14, 12, 11, 11, 12, 16, 17, 16, 14, 12, 11, 10, 10,
- /* Size 8x16 */
- 32, 28, 21, 20, 18, 16, 15, 14, 34, 26, 22, 21, 20, 17, 16, 16, 31, 24,
- 22, 22, 20, 17, 17, 16, 24, 22, 20, 20, 19, 17, 17, 17, 21, 21, 19, 19,
- 18, 17, 17, 17, 21, 22, 19, 17, 16, 15, 16, 16, 20, 22, 19, 16, 15, 14,
- 14, 15, 19, 21, 19, 15, 14, 13, 13, 14, 18, 20, 18, 15, 13, 12, 13, 13,
- 16, 19, 17, 14, 12, 11, 12, 12, 16, 18, 17, 14, 12, 11, 11, 12, 15, 17,
- 16, 14, 12, 11, 10, 11, 15, 17, 16, 14, 12, 11, 10, 10, 14, 16, 16, 14,
- 12, 11, 10, 10, 14, 15, 16, 14, 12, 11, 10, 10, 13, 15, 15, 14, 12, 11,
- 10, 9,
- /* Size 16x8 */
- 32, 34, 31, 24, 21, 21, 20, 19, 18, 16, 16, 15, 15, 14, 14, 13, 28, 26,
- 24, 22, 21, 22, 22, 21, 20, 19, 18, 17, 17, 16, 15, 15, 21, 22, 22, 20,
- 19, 19, 19, 19, 18, 17, 17, 16, 16, 16, 16, 15, 20, 21, 22, 20, 19, 17,
- 16, 15, 15, 14, 14, 14, 14, 14, 14, 14, 18, 20, 20, 19, 18, 16, 15, 14,
- 13, 12, 12, 12, 12, 12, 12, 12, 16, 17, 17, 17, 17, 15, 14, 13, 12, 11,
- 11, 11, 11, 11, 11, 11, 15, 16, 17, 17, 17, 16, 14, 13, 13, 12, 11, 10,
- 10, 10, 10, 10, 14, 16, 16, 17, 17, 16, 15, 14, 13, 12, 12, 11, 10, 10,
- 10, 9,
- /* Size 16x32 */
- 32, 33, 28, 24, 21, 21, 20, 19, 18, 16, 16, 15, 15, 15, 14, 14, 33, 33,
- 27, 24, 22, 22, 20, 20, 19, 17, 16, 16, 16, 16, 15, 15, 34, 32, 26, 24,
- 22, 23, 21, 20, 20, 18, 17, 17, 16, 16, 16, 15, 32, 30, 25, 23, 22, 23,
- 21, 21, 20, 18, 17, 17, 17, 16, 16, 16, 31, 28, 24, 23, 22, 22, 22, 21,
- 20, 18, 17, 17, 17, 17, 16, 16, 28, 26, 22, 22, 22, 23, 22, 21, 20, 19,
- 18, 18, 17, 17, 17, 16, 24, 24, 22, 21, 20, 21, 20, 20, 19, 18, 17, 18,
- 17, 17, 17, 16, 23, 23, 22, 21, 20, 20, 20, 19, 19, 17, 17, 17, 17, 17,
- 17, 17, 21, 22, 21, 20, 19, 19, 19, 19, 18, 17, 17, 16, 17, 16, 17, 17,
- 21, 22, 22, 20, 19, 18, 18, 17, 17, 16, 16, 16, 16, 16, 16, 16, 21, 23,
- 22, 21, 19, 18, 17, 17, 16, 15, 15, 15, 16, 16, 16, 16, 21, 22, 22, 21,
- 19, 17, 17, 16, 16, 15, 14, 15, 15, 15, 15, 15, 20, 22, 22, 20, 19, 17,
- 16, 16, 15, 14, 14, 14, 14, 15, 15, 15, 20, 21, 22, 20, 19, 17, 16, 15,
- 14, 14, 13, 14, 14, 14, 14, 14, 19, 20, 21, 20, 19, 17, 15, 14, 14, 13,
- 13, 13, 13, 14, 14, 14, 19, 20, 21, 20, 18, 16, 15, 14, 14, 13, 12, 13,
- 13, 13, 13, 13, 18, 20, 20, 19, 18, 16, 15, 14, 13, 12, 12, 12, 13, 13,
- 13, 13, 17, 19, 20, 19, 18, 16, 14, 14, 13, 12, 12, 12, 12, 12, 13, 13,
- 16, 18, 19, 18, 17, 15, 14, 13, 12, 12, 11, 12, 12, 12, 12, 13, 16, 18,
- 19, 18, 17, 15, 14, 13, 12, 12, 11, 11, 12, 12, 12, 12, 16, 17, 18, 18,
- 17, 15, 14, 13, 12, 11, 11, 11, 11, 11, 12, 12, 15, 17, 18, 17, 16, 15,
- 13, 13, 12, 11, 11, 11, 11, 11, 11, 11, 15, 17, 17, 17, 16, 14, 14, 13,
- 12, 11, 11, 11, 10, 11, 11, 11, 15, 17, 17, 17, 16, 15, 14, 13, 12, 12,
- 11, 10, 10, 10, 11, 11, 15, 16, 17, 17, 16, 15, 14, 13, 12, 12, 11, 11,
- 10, 10, 10, 11, 14, 16, 16, 17, 15, 15, 14, 13, 12, 11, 11, 10, 10, 10,
- 10, 10, 14, 16, 16, 17, 16, 15, 14, 13, 12, 12, 11, 10, 10, 10, 10, 10,
- 14, 16, 16, 16, 16, 15, 14, 13, 12, 12, 11, 10, 10, 10, 10, 10, 14, 15,
- 15, 16, 16, 15, 14, 13, 12, 12, 11, 11, 10, 10, 10, 10, 14, 15, 15, 16,
- 16, 14, 14, 13, 12, 12, 11, 11, 10, 10, 9, 9, 13, 15, 15, 16, 15, 14,
- 14, 13, 12, 12, 11, 11, 10, 10, 9, 9, 13, 15, 15, 15, 15, 14, 14, 13,
- 13, 11, 11, 10, 10, 9, 9, 9,
- /* Size 32x16 */
- 32, 33, 34, 32, 31, 28, 24, 23, 21, 21, 21, 21, 20, 20, 19, 19, 18, 17,
- 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 14, 14, 13, 13, 33, 33, 32, 30,
- 28, 26, 24, 23, 22, 22, 23, 22, 22, 21, 20, 20, 20, 19, 18, 18, 17, 17,
- 17, 17, 16, 16, 16, 16, 15, 15, 15, 15, 28, 27, 26, 25, 24, 22, 22, 22,
- 21, 22, 22, 22, 22, 22, 21, 21, 20, 20, 19, 19, 18, 18, 17, 17, 17, 16,
- 16, 16, 15, 15, 15, 15, 24, 24, 24, 23, 23, 22, 21, 21, 20, 20, 21, 21,
- 20, 20, 20, 20, 19, 19, 18, 18, 18, 17, 17, 17, 17, 17, 17, 16, 16, 16,
- 16, 15, 21, 22, 22, 22, 22, 22, 20, 20, 19, 19, 19, 19, 19, 19, 19, 18,
- 18, 18, 17, 17, 17, 16, 16, 16, 16, 15, 16, 16, 16, 16, 15, 15, 21, 22,
- 23, 23, 22, 23, 21, 20, 19, 18, 18, 17, 17, 17, 17, 16, 16, 16, 15, 15,
- 15, 15, 14, 15, 15, 15, 15, 15, 15, 14, 14, 14, 20, 20, 21, 21, 22, 22,
- 20, 20, 19, 18, 17, 17, 16, 16, 15, 15, 15, 14, 14, 14, 14, 13, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 19, 20, 20, 21, 21, 21, 20, 19, 19, 17,
- 17, 16, 16, 15, 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
- 13, 13, 13, 13, 18, 19, 20, 20, 20, 20, 19, 19, 18, 17, 16, 16, 15, 14,
- 14, 14, 13, 13, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13,
- 16, 17, 18, 18, 18, 19, 18, 17, 17, 16, 15, 15, 14, 14, 13, 13, 12, 12,
- 12, 12, 11, 11, 11, 12, 12, 11, 12, 12, 12, 12, 12, 11, 16, 16, 17, 17,
- 17, 18, 17, 17, 17, 16, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 11, 11,
- 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 15, 16, 17, 17, 17, 18, 18, 17,
- 16, 16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 11, 11, 10, 11, 10,
- 10, 10, 11, 11, 11, 10, 15, 16, 16, 17, 17, 17, 17, 17, 17, 16, 16, 15,
- 14, 14, 13, 13, 13, 12, 12, 12, 11, 11, 10, 10, 10, 10, 10, 10, 10, 10,
- 10, 10, 15, 16, 16, 16, 17, 17, 17, 17, 16, 16, 16, 15, 15, 14, 14, 13,
- 13, 12, 12, 12, 11, 11, 11, 10, 10, 10, 10, 10, 10, 10, 10, 9, 14, 15,
- 16, 16, 16, 17, 17, 17, 17, 16, 16, 15, 15, 14, 14, 13, 13, 13, 12, 12,
- 12, 11, 11, 11, 10, 10, 10, 10, 10, 9, 9, 9, 14, 15, 15, 16, 16, 16, 16,
- 17, 17, 16, 16, 15, 15, 14, 14, 13, 13, 13, 13, 12, 12, 11, 11, 11, 11,
- 10, 10, 10, 10, 9, 9, 9,
- /* Size 4x16 */
- 33, 21, 16, 15, 32, 23, 18, 16, 28, 22, 18, 17, 24, 21, 18, 17, 22, 19,
- 17, 16, 23, 18, 15, 16, 22, 17, 14, 15, 20, 17, 13, 14, 20, 16, 12, 13,
- 18, 15, 12, 12, 17, 15, 11, 11, 17, 14, 11, 11, 16, 15, 12, 10, 16, 15,
- 12, 10, 15, 15, 12, 10, 15, 14, 12, 10,
- /* Size 16x4 */
- 33, 32, 28, 24, 22, 23, 22, 20, 20, 18, 17, 17, 16, 16, 15, 15, 21, 23,
- 22, 21, 19, 18, 17, 17, 16, 15, 15, 14, 15, 15, 15, 14, 16, 18, 18, 18,
- 17, 15, 14, 13, 12, 12, 11, 11, 12, 12, 12, 12, 15, 16, 17, 17, 16, 16,
- 15, 14, 13, 12, 11, 11, 10, 10, 10, 10,
- /* Size 8x32 */
- 32, 28, 21, 20, 18, 16, 15, 14, 33, 27, 22, 20, 19, 16, 16, 15, 34, 26,
- 22, 21, 20, 17, 16, 16, 32, 25, 22, 21, 20, 17, 17, 16, 31, 24, 22, 22,
- 20, 17, 17, 16, 28, 22, 22, 22, 20, 18, 17, 17, 24, 22, 20, 20, 19, 17,
- 17, 17, 23, 22, 20, 20, 19, 17, 17, 17, 21, 21, 19, 19, 18, 17, 17, 17,
- 21, 22, 19, 18, 17, 16, 16, 16, 21, 22, 19, 17, 16, 15, 16, 16, 21, 22,
- 19, 17, 16, 14, 15, 15, 20, 22, 19, 16, 15, 14, 14, 15, 20, 22, 19, 16,
- 14, 13, 14, 14, 19, 21, 19, 15, 14, 13, 13, 14, 19, 21, 18, 15, 14, 12,
- 13, 13, 18, 20, 18, 15, 13, 12, 13, 13, 17, 20, 18, 14, 13, 12, 12, 13,
- 16, 19, 17, 14, 12, 11, 12, 12, 16, 19, 17, 14, 12, 11, 12, 12, 16, 18,
- 17, 14, 12, 11, 11, 12, 15, 18, 16, 13, 12, 11, 11, 11, 15, 17, 16, 14,
- 12, 11, 10, 11, 15, 17, 16, 14, 12, 11, 10, 11, 15, 17, 16, 14, 12, 11,
- 10, 10, 14, 16, 15, 14, 12, 11, 10, 10, 14, 16, 16, 14, 12, 11, 10, 10,
- 14, 16, 16, 14, 12, 11, 10, 10, 14, 15, 16, 14, 12, 11, 10, 10, 14, 15,
- 16, 14, 12, 11, 10, 9, 13, 15, 15, 14, 12, 11, 10, 9, 13, 15, 15, 14,
- 13, 11, 10, 9,
- /* Size 32x8 */
- 32, 33, 34, 32, 31, 28, 24, 23, 21, 21, 21, 21, 20, 20, 19, 19, 18, 17,
- 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 14, 14, 13, 13, 28, 27, 26, 25,
- 24, 22, 22, 22, 21, 22, 22, 22, 22, 22, 21, 21, 20, 20, 19, 19, 18, 18,
- 17, 17, 17, 16, 16, 16, 15, 15, 15, 15, 21, 22, 22, 22, 22, 22, 20, 20,
- 19, 19, 19, 19, 19, 19, 19, 18, 18, 18, 17, 17, 17, 16, 16, 16, 16, 15,
- 16, 16, 16, 16, 15, 15, 20, 20, 21, 21, 22, 22, 20, 20, 19, 18, 17, 17,
- 16, 16, 15, 15, 15, 14, 14, 14, 14, 13, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 18, 19, 20, 20, 20, 20, 19, 19, 18, 17, 16, 16, 15, 14, 14, 14,
- 13, 13, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 16, 16,
- 17, 17, 17, 18, 17, 17, 17, 16, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11,
- 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 15, 16, 16, 17, 17, 17,
- 17, 17, 17, 16, 16, 15, 14, 14, 13, 13, 13, 12, 12, 12, 11, 11, 10, 10,
- 10, 10, 10, 10, 10, 10, 10, 10, 14, 15, 16, 16, 16, 17, 17, 17, 17, 16,
- 16, 15, 15, 14, 14, 13, 13, 13, 12, 12, 12, 11, 11, 11, 10, 10, 10, 10,
- 10, 9, 9, 9 },
- },
- {
- { /* Luma */
- /* Size 4x4 */
- 32, 25, 15, 11, 25, 16, 12, 10, 15, 12, 8, 7, 11, 10, 7, 6,
- /* Size 8x8 */
- 32, 32, 28, 22, 17, 13, 11, 10, 32, 29, 26, 22, 18, 14, 12, 11, 28, 26,
- 20, 17, 14, 12, 11, 10, 22, 22, 17, 14, 12, 10, 10, 9, 17, 18, 14, 12,
- 10, 8, 8, 8, 13, 14, 12, 10, 8, 7, 7, 7, 11, 12, 11, 10, 8, 7, 6, 6, 10,
- 11, 10, 9, 8, 7, 6, 5,
- /* Size 16x16 */
- 32, 33, 33, 32, 28, 26, 22, 19, 17, 14, 13, 12, 11, 10, 10, 9, 33, 32,
- 32, 31, 30, 28, 23, 20, 18, 16, 14, 13, 12, 11, 10, 10, 33, 32, 31, 30,
- 28, 26, 23, 20, 18, 16, 14, 13, 12, 12, 11, 10, 32, 31, 30, 28, 26, 24,
- 22, 20, 18, 16, 14, 13, 13, 12, 11, 10, 28, 30, 28, 26, 21, 20, 18, 17,
- 16, 14, 13, 12, 12, 11, 11, 10, 26, 28, 26, 24, 20, 19, 17, 16, 15, 13,
- 12, 12, 11, 11, 10, 10, 22, 23, 23, 22, 18, 17, 15, 14, 13, 12, 11, 10,
- 10, 10, 9, 9, 19, 20, 20, 20, 17, 16, 14, 12, 12, 11, 10, 9, 9, 9, 9, 8,
- 17, 18, 18, 18, 16, 15, 13, 12, 11, 10, 9, 9, 8, 8, 8, 8, 14, 16, 16,
- 16, 14, 13, 12, 11, 10, 9, 8, 8, 8, 8, 8, 7, 13, 14, 14, 14, 13, 12, 11,
- 10, 9, 8, 8, 7, 7, 7, 7, 7, 12, 13, 13, 13, 12, 12, 10, 9, 9, 8, 7, 7,
- 7, 7, 6, 6, 11, 12, 12, 13, 12, 11, 10, 9, 8, 8, 7, 7, 6, 6, 6, 6, 10,
- 11, 12, 12, 11, 11, 10, 9, 8, 8, 7, 7, 6, 6, 6, 5, 10, 10, 11, 11, 11,
- 10, 9, 9, 8, 8, 7, 6, 6, 6, 5, 5, 9, 10, 10, 10, 10, 10, 9, 8, 8, 7, 7,
- 6, 6, 5, 5, 5,
- /* Size 32x32 */
- 32, 33, 33, 33, 33, 32, 32, 30, 28, 27, 26, 23, 22, 21, 19, 17, 17, 16,
- 14, 13, 13, 12, 12, 12, 11, 11, 10, 10, 10, 9, 9, 9, 33, 32, 32, 32, 32,
- 32, 31, 30, 29, 28, 27, 24, 23, 22, 20, 18, 18, 17, 15, 14, 13, 13, 12,
- 12, 12, 11, 11, 11, 10, 10, 10, 9, 33, 32, 32, 32, 32, 32, 31, 31, 30,
- 28, 28, 25, 23, 22, 20, 19, 18, 17, 16, 15, 14, 13, 13, 12, 12, 12, 11,
- 11, 10, 10, 10, 9, 33, 32, 32, 32, 32, 31, 31, 30, 29, 28, 27, 25, 23,
- 23, 21, 19, 18, 17, 16, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 10, 10,
- 10, 33, 32, 32, 32, 31, 30, 30, 29, 28, 27, 26, 24, 23, 22, 20, 19, 18,
- 17, 16, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 11, 10, 10, 32, 32, 32,
- 31, 30, 29, 28, 28, 27, 26, 26, 24, 23, 22, 21, 19, 19, 18, 16, 16, 15,
- 14, 14, 13, 13, 12, 12, 11, 11, 11, 10, 10, 32, 31, 31, 31, 30, 28, 28,
- 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 14, 13, 13, 13,
- 12, 12, 12, 11, 11, 10, 10, 30, 30, 31, 30, 29, 28, 27, 26, 24, 23, 23,
- 22, 20, 20, 19, 18, 17, 16, 15, 14, 14, 13, 13, 12, 12, 12, 12, 12, 11,
- 11, 11, 10, 28, 29, 30, 29, 28, 27, 26, 24, 21, 20, 20, 19, 18, 18, 17,
- 16, 16, 15, 14, 13, 13, 13, 12, 12, 12, 12, 11, 11, 11, 11, 10, 10, 27,
- 28, 28, 28, 27, 26, 25, 23, 20, 20, 20, 18, 18, 17, 16, 15, 15, 14, 13,
- 13, 12, 12, 12, 12, 11, 11, 11, 11, 11, 10, 10, 10, 26, 27, 28, 27, 26,
- 26, 24, 23, 20, 20, 19, 18, 17, 17, 16, 15, 15, 14, 13, 13, 12, 12, 12,
- 11, 11, 11, 11, 10, 10, 10, 10, 10, 23, 24, 25, 25, 24, 24, 23, 22, 19,
- 18, 18, 16, 16, 15, 14, 14, 13, 13, 12, 12, 11, 11, 11, 11, 11, 10, 10,
- 10, 10, 10, 9, 9, 22, 23, 23, 23, 23, 23, 22, 20, 18, 18, 17, 16, 15,
- 14, 14, 13, 13, 12, 12, 11, 11, 11, 10, 10, 10, 10, 10, 10, 9, 9, 9, 9,
- 21, 22, 22, 23, 22, 22, 21, 20, 18, 17, 17, 15, 14, 14, 13, 13, 12, 12,
- 11, 11, 10, 10, 10, 10, 10, 10, 9, 9, 9, 9, 9, 9, 19, 20, 20, 21, 20,
- 21, 20, 19, 17, 16, 16, 14, 14, 13, 12, 12, 12, 11, 11, 10, 10, 10, 9,
- 9, 9, 9, 9, 9, 9, 8, 8, 8, 17, 18, 19, 19, 19, 19, 19, 18, 16, 15, 15,
- 14, 13, 13, 12, 11, 11, 10, 10, 10, 9, 9, 9, 9, 9, 9, 9, 8, 8, 8, 8, 8,
- 17, 18, 18, 18, 18, 19, 18, 17, 16, 15, 15, 13, 13, 12, 12, 11, 11, 10,
- 10, 9, 9, 9, 9, 9, 8, 8, 8, 8, 8, 8, 8, 8, 16, 17, 17, 17, 17, 18, 17,
- 16, 15, 14, 14, 13, 12, 12, 11, 10, 10, 10, 9, 9, 9, 8, 8, 8, 8, 8, 8,
- 8, 8, 8, 8, 7, 14, 15, 16, 16, 16, 16, 16, 15, 14, 13, 13, 12, 12, 11,
- 11, 10, 10, 9, 9, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 7, 7, 13, 14, 15, 15,
- 15, 16, 15, 14, 13, 13, 13, 12, 11, 11, 10, 10, 9, 9, 8, 8, 8, 8, 8, 8,
- 7, 7, 7, 7, 7, 7, 7, 7, 13, 13, 14, 14, 14, 15, 14, 14, 13, 12, 12, 11,
- 11, 10, 10, 9, 9, 9, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 12, 13,
- 13, 14, 14, 14, 14, 13, 13, 12, 12, 11, 11, 10, 10, 9, 9, 8, 8, 8, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 12, 12, 13, 13, 13, 14, 13, 13, 12, 12,
- 12, 11, 10, 10, 9, 9, 9, 8, 8, 8, 7, 7, 7, 7, 7, 6, 7, 6, 6, 6, 6, 6,
- 12, 12, 12, 13, 13, 13, 13, 12, 12, 12, 11, 11, 10, 10, 9, 9, 9, 8, 8,
- 8, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 11, 12, 12, 12, 12, 13, 13, 12,
- 12, 11, 11, 11, 10, 10, 9, 9, 8, 8, 8, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6,
- 6, 6, 11, 11, 12, 12, 12, 12, 12, 12, 12, 11, 11, 10, 10, 10, 9, 9, 8,
- 8, 8, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 10, 11, 11, 12, 12, 12, 12,
- 12, 11, 11, 11, 10, 10, 9, 9, 9, 8, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6,
- 5, 5, 5, 10, 11, 11, 11, 11, 11, 12, 12, 11, 11, 10, 10, 10, 9, 9, 8, 8,
- 8, 8, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 10, 10, 10, 11, 11, 11, 11,
- 11, 11, 11, 10, 10, 9, 9, 9, 8, 8, 8, 8, 7, 7, 7, 6, 6, 6, 6, 6, 6, 5,
- 5, 5, 5, 9, 10, 10, 10, 11, 11, 11, 11, 11, 10, 10, 10, 9, 9, 8, 8, 8,
- 8, 7, 7, 7, 7, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 9, 10, 10, 10, 10, 10, 10,
- 11, 10, 10, 10, 9, 9, 9, 8, 8, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, 5, 5, 5, 5,
- 5, 5, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 9, 9, 9, 8, 8, 8, 7, 7,
- 7, 7, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5,
- /* Size 4x8 */
- 32, 24, 15, 12, 31, 24, 16, 12, 28, 18, 13, 12, 22, 15, 11, 10, 17, 13,
- 9, 8, 14, 11, 8, 7, 12, 11, 8, 6, 10, 10, 8, 6,
- /* Size 8x4 */
- 32, 31, 28, 22, 17, 14, 12, 10, 24, 24, 18, 15, 13, 11, 11, 10, 15, 16,
- 13, 11, 9, 8, 8, 8, 12, 12, 12, 10, 8, 7, 6, 6,
- /* Size 8x16 */
- 32, 32, 28, 22, 16, 13, 11, 11, 33, 32, 29, 23, 17, 14, 12, 11, 32, 30,
- 28, 23, 17, 14, 13, 12, 32, 29, 26, 22, 17, 14, 13, 12, 28, 28, 21, 18,
- 15, 13, 12, 12, 26, 26, 20, 17, 14, 12, 11, 11, 22, 23, 18, 15, 12, 11,
- 10, 10, 19, 20, 17, 14, 11, 10, 9, 9, 17, 18, 16, 13, 10, 9, 9, 9, 14,
- 16, 14, 12, 9, 8, 8, 8, 13, 15, 13, 11, 9, 8, 7, 7, 12, 13, 12, 10, 8,
- 7, 7, 7, 11, 12, 12, 10, 8, 7, 7, 6, 10, 12, 11, 9, 8, 7, 6, 6, 10, 11,
- 11, 9, 8, 7, 6, 6, 9, 10, 10, 9, 8, 7, 6, 5,
- /* Size 16x8 */
- 32, 33, 32, 32, 28, 26, 22, 19, 17, 14, 13, 12, 11, 10, 10, 9, 32, 32,
- 30, 29, 28, 26, 23, 20, 18, 16, 15, 13, 12, 12, 11, 10, 28, 29, 28, 26,
- 21, 20, 18, 17, 16, 14, 13, 12, 12, 11, 11, 10, 22, 23, 23, 22, 18, 17,
- 15, 14, 13, 12, 11, 10, 10, 9, 9, 9, 16, 17, 17, 17, 15, 14, 12, 11, 10,
- 9, 9, 8, 8, 8, 8, 8, 13, 14, 14, 14, 13, 12, 11, 10, 9, 8, 8, 7, 7, 7,
- 7, 7, 11, 12, 13, 13, 12, 11, 10, 9, 9, 8, 7, 7, 7, 6, 6, 6, 11, 11, 12,
- 12, 12, 11, 10, 9, 9, 8, 7, 7, 6, 6, 6, 5,
- /* Size 16x32 */
- 32, 33, 32, 32, 28, 23, 22, 19, 16, 14, 13, 12, 11, 11, 11, 10, 33, 32,
- 32, 31, 29, 24, 23, 20, 17, 15, 14, 12, 12, 12, 11, 11, 33, 32, 32, 31,
- 29, 25, 23, 21, 17, 15, 14, 13, 12, 12, 11, 11, 33, 32, 31, 31, 29, 25,
- 23, 21, 17, 16, 14, 13, 12, 12, 12, 11, 32, 32, 30, 30, 28, 24, 23, 20,
- 17, 16, 14, 13, 13, 12, 12, 11, 32, 31, 29, 28, 27, 24, 23, 21, 18, 16,
- 15, 13, 13, 12, 12, 12, 32, 31, 29, 28, 26, 23, 22, 20, 17, 16, 14, 13,
- 13, 13, 12, 12, 30, 30, 28, 27, 24, 21, 20, 19, 16, 15, 14, 13, 12, 13,
- 12, 12, 28, 30, 28, 26, 21, 19, 18, 17, 15, 14, 13, 12, 12, 12, 12, 12,
- 27, 28, 26, 25, 21, 18, 18, 16, 14, 13, 13, 12, 12, 12, 11, 11, 26, 28,
- 26, 24, 20, 18, 17, 16, 14, 13, 12, 11, 11, 11, 11, 11, 23, 25, 24, 23,
- 19, 16, 16, 14, 13, 12, 11, 11, 11, 11, 11, 10, 22, 23, 23, 22, 18, 16,
- 15, 14, 12, 11, 11, 10, 10, 10, 10, 10, 21, 22, 22, 21, 18, 15, 14, 13,
- 12, 11, 11, 10, 10, 10, 10, 10, 19, 21, 20, 20, 17, 14, 14, 12, 11, 10,
- 10, 9, 9, 10, 9, 10, 18, 19, 19, 19, 16, 14, 13, 12, 10, 10, 9, 9, 9, 9,
- 9, 9, 17, 18, 18, 18, 16, 13, 13, 12, 10, 10, 9, 9, 9, 9, 9, 9, 16, 17,
- 17, 17, 15, 13, 12, 11, 10, 9, 9, 8, 8, 8, 8, 8, 14, 16, 16, 16, 14, 12,
- 12, 11, 9, 9, 8, 8, 8, 8, 8, 8, 13, 15, 15, 15, 13, 12, 11, 10, 9, 8, 8,
- 8, 8, 8, 8, 8, 13, 14, 15, 14, 13, 11, 11, 10, 9, 8, 8, 7, 7, 7, 7, 8,
- 12, 14, 14, 14, 13, 11, 11, 10, 8, 8, 8, 7, 7, 7, 7, 7, 12, 13, 13, 13,
- 12, 11, 10, 9, 8, 8, 7, 7, 7, 7, 7, 7, 12, 13, 13, 13, 12, 11, 10, 9, 8,
- 8, 7, 7, 7, 7, 7, 6, 11, 12, 12, 13, 12, 11, 10, 9, 8, 8, 7, 7, 7, 6, 6,
- 6, 11, 12, 12, 12, 11, 11, 10, 9, 9, 8, 7, 7, 6, 6, 6, 6, 10, 12, 12,
- 12, 11, 11, 9, 9, 8, 8, 7, 6, 6, 6, 6, 6, 10, 11, 11, 12, 11, 10, 9, 9,
- 8, 8, 7, 6, 6, 6, 6, 6, 10, 11, 11, 11, 11, 10, 9, 9, 8, 8, 7, 7, 6, 6,
- 6, 6, 10, 10, 11, 11, 11, 10, 9, 9, 8, 8, 7, 7, 6, 6, 5, 5, 9, 10, 10,
- 11, 10, 9, 9, 8, 8, 7, 7, 6, 6, 6, 5, 5, 9, 10, 10, 10, 10, 9, 9, 8, 8,
- 7, 7, 6, 6, 5, 5, 5,
- /* Size 32x16 */
- 32, 33, 33, 33, 32, 32, 32, 30, 28, 27, 26, 23, 22, 21, 19, 18, 17, 16,
- 14, 13, 13, 12, 12, 12, 11, 11, 10, 10, 10, 10, 9, 9, 33, 32, 32, 32,
- 32, 31, 31, 30, 30, 28, 28, 25, 23, 22, 21, 19, 18, 17, 16, 15, 14, 14,
- 13, 13, 12, 12, 12, 11, 11, 10, 10, 10, 32, 32, 32, 31, 30, 29, 29, 28,
- 28, 26, 26, 24, 23, 22, 20, 19, 18, 17, 16, 15, 15, 14, 13, 13, 12, 12,
- 12, 11, 11, 11, 10, 10, 32, 31, 31, 31, 30, 28, 28, 27, 26, 25, 24, 23,
- 22, 21, 20, 19, 18, 17, 16, 15, 14, 14, 13, 13, 13, 12, 12, 12, 11, 11,
- 11, 10, 28, 29, 29, 29, 28, 27, 26, 24, 21, 21, 20, 19, 18, 18, 17, 16,
- 16, 15, 14, 13, 13, 13, 12, 12, 12, 11, 11, 11, 11, 11, 10, 10, 23, 24,
- 25, 25, 24, 24, 23, 21, 19, 18, 18, 16, 16, 15, 14, 14, 13, 13, 12, 12,
- 11, 11, 11, 11, 11, 11, 11, 10, 10, 10, 9, 9, 22, 23, 23, 23, 23, 23,
- 22, 20, 18, 18, 17, 16, 15, 14, 14, 13, 13, 12, 12, 11, 11, 11, 10, 10,
- 10, 10, 9, 9, 9, 9, 9, 9, 19, 20, 21, 21, 20, 21, 20, 19, 17, 16, 16,
- 14, 14, 13, 12, 12, 12, 11, 11, 10, 10, 10, 9, 9, 9, 9, 9, 9, 9, 9, 8,
- 8, 16, 17, 17, 17, 17, 18, 17, 16, 15, 14, 14, 13, 12, 12, 11, 10, 10,
- 10, 9, 9, 9, 8, 8, 8, 8, 9, 8, 8, 8, 8, 8, 8, 14, 15, 15, 16, 16, 16,
- 16, 15, 14, 13, 13, 12, 11, 11, 10, 10, 10, 9, 9, 8, 8, 8, 8, 8, 8, 8,
- 8, 8, 8, 8, 7, 7, 13, 14, 14, 14, 14, 15, 14, 14, 13, 13, 12, 11, 11,
- 11, 10, 9, 9, 9, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 12, 12, 13,
- 13, 13, 13, 13, 13, 12, 12, 11, 11, 10, 10, 9, 9, 9, 8, 8, 8, 7, 7, 7,
- 7, 7, 7, 6, 6, 7, 7, 6, 6, 11, 12, 12, 12, 13, 13, 13, 12, 12, 12, 11,
- 11, 10, 10, 9, 9, 9, 8, 8, 8, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 11,
- 12, 12, 12, 12, 12, 13, 13, 12, 12, 11, 11, 10, 10, 10, 9, 9, 8, 8, 8,
- 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 5, 11, 11, 11, 12, 12, 12, 12, 12, 12,
- 11, 11, 11, 10, 10, 9, 9, 9, 8, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, 6, 5, 5,
- 5, 10, 11, 11, 11, 11, 12, 12, 12, 12, 11, 11, 10, 10, 10, 10, 9, 9, 8,
- 8, 8, 8, 7, 7, 6, 6, 6, 6, 6, 6, 5, 5, 5,
- /* Size 4x16 */
- 33, 23, 14, 11, 32, 25, 15, 12, 32, 24, 16, 12, 31, 23, 16, 13, 30, 19,
- 14, 12, 28, 18, 13, 11, 23, 16, 11, 10, 21, 14, 10, 10, 18, 13, 10, 9,
- 16, 12, 9, 8, 14, 11, 8, 7, 13, 11, 8, 7, 12, 11, 8, 6, 12, 11, 8, 6,
- 11, 10, 8, 6, 10, 9, 7, 6,
- /* Size 16x4 */
- 33, 32, 32, 31, 30, 28, 23, 21, 18, 16, 14, 13, 12, 12, 11, 10, 23, 25,
- 24, 23, 19, 18, 16, 14, 13, 12, 11, 11, 11, 11, 10, 9, 14, 15, 16, 16,
- 14, 13, 11, 10, 10, 9, 8, 8, 8, 8, 8, 7, 11, 12, 12, 13, 12, 11, 10, 10,
- 9, 8, 7, 7, 6, 6, 6, 6,
- /* Size 8x32 */
- 32, 32, 28, 22, 16, 13, 11, 11, 33, 32, 29, 23, 17, 14, 12, 11, 33, 32,
- 29, 23, 17, 14, 12, 11, 33, 31, 29, 23, 17, 14, 12, 12, 32, 30, 28, 23,
- 17, 14, 13, 12, 32, 29, 27, 23, 18, 15, 13, 12, 32, 29, 26, 22, 17, 14,
- 13, 12, 30, 28, 24, 20, 16, 14, 12, 12, 28, 28, 21, 18, 15, 13, 12, 12,
- 27, 26, 21, 18, 14, 13, 12, 11, 26, 26, 20, 17, 14, 12, 11, 11, 23, 24,
- 19, 16, 13, 11, 11, 11, 22, 23, 18, 15, 12, 11, 10, 10, 21, 22, 18, 14,
- 12, 11, 10, 10, 19, 20, 17, 14, 11, 10, 9, 9, 18, 19, 16, 13, 10, 9, 9,
- 9, 17, 18, 16, 13, 10, 9, 9, 9, 16, 17, 15, 12, 10, 9, 8, 8, 14, 16, 14,
- 12, 9, 8, 8, 8, 13, 15, 13, 11, 9, 8, 8, 8, 13, 15, 13, 11, 9, 8, 7, 7,
- 12, 14, 13, 11, 8, 8, 7, 7, 12, 13, 12, 10, 8, 7, 7, 7, 12, 13, 12, 10,
- 8, 7, 7, 7, 11, 12, 12, 10, 8, 7, 7, 6, 11, 12, 11, 10, 9, 7, 6, 6, 10,
- 12, 11, 9, 8, 7, 6, 6, 10, 11, 11, 9, 8, 7, 6, 6, 10, 11, 11, 9, 8, 7,
- 6, 6, 10, 11, 11, 9, 8, 7, 6, 5, 9, 10, 10, 9, 8, 7, 6, 5, 9, 10, 10, 9,
- 8, 7, 6, 5,
- /* Size 32x8 */
- 32, 33, 33, 33, 32, 32, 32, 30, 28, 27, 26, 23, 22, 21, 19, 18, 17, 16,
- 14, 13, 13, 12, 12, 12, 11, 11, 10, 10, 10, 10, 9, 9, 32, 32, 32, 31,
- 30, 29, 29, 28, 28, 26, 26, 24, 23, 22, 20, 19, 18, 17, 16, 15, 15, 14,
- 13, 13, 12, 12, 12, 11, 11, 11, 10, 10, 28, 29, 29, 29, 28, 27, 26, 24,
- 21, 21, 20, 19, 18, 18, 17, 16, 16, 15, 14, 13, 13, 13, 12, 12, 12, 11,
- 11, 11, 11, 11, 10, 10, 22, 23, 23, 23, 23, 23, 22, 20, 18, 18, 17, 16,
- 15, 14, 14, 13, 13, 12, 12, 11, 11, 11, 10, 10, 10, 10, 9, 9, 9, 9, 9,
- 9, 16, 17, 17, 17, 17, 18, 17, 16, 15, 14, 14, 13, 12, 12, 11, 10, 10,
- 10, 9, 9, 9, 8, 8, 8, 8, 9, 8, 8, 8, 8, 8, 8, 13, 14, 14, 14, 14, 15,
- 14, 14, 13, 13, 12, 11, 11, 11, 10, 9, 9, 9, 8, 8, 8, 8, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 11, 12, 12, 12, 13, 13, 13, 12, 12, 12, 11, 11, 10, 10,
- 9, 9, 9, 8, 8, 8, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 11, 11, 11, 12,
- 12, 12, 12, 12, 12, 11, 11, 11, 10, 10, 9, 9, 9, 8, 8, 8, 7, 7, 7, 7, 6,
- 6, 6, 6, 6, 5, 5, 5 },
- { /* Chroma */
- /* Size 4x4 */
- 31, 23, 18, 16, 23, 18, 16, 15, 18, 16, 12, 12, 16, 15, 12, 10,
- /* Size 8x8 */
- 33, 27, 22, 21, 19, 17, 16, 15, 27, 22, 22, 22, 20, 19, 17, 16, 22, 22,
- 19, 19, 18, 16, 16, 16, 21, 22, 19, 17, 15, 14, 14, 14, 19, 20, 18, 15,
- 13, 12, 12, 12, 17, 19, 16, 14, 12, 11, 11, 11, 16, 17, 16, 14, 12, 11,
- 10, 10, 15, 16, 16, 14, 12, 11, 10, 9,
- /* Size 16x16 */
- 32, 34, 31, 27, 21, 21, 20, 20, 19, 17, 16, 16, 15, 15, 14, 14, 34, 33,
- 29, 25, 22, 22, 22, 21, 20, 19, 18, 17, 16, 16, 15, 15, 31, 29, 26, 23,
- 22, 22, 22, 22, 20, 19, 18, 18, 17, 17, 16, 15, 27, 25, 23, 22, 21, 21,
- 22, 21, 20, 19, 19, 18, 18, 17, 17, 16, 21, 22, 22, 21, 19, 19, 19, 19,
- 18, 18, 17, 17, 17, 16, 16, 16, 21, 22, 22, 21, 19, 19, 18, 18, 17, 17,
- 16, 16, 15, 16, 15, 15, 20, 22, 22, 22, 19, 18, 17, 16, 16, 15, 15, 14,
- 14, 14, 14, 14, 20, 21, 22, 21, 19, 18, 16, 16, 15, 14, 14, 13, 14, 13,
- 13, 13, 19, 20, 20, 20, 18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13,
- 17, 19, 19, 19, 18, 17, 15, 14, 13, 12, 12, 12, 12, 12, 12, 12, 16, 18,
- 18, 19, 17, 16, 15, 14, 13, 12, 12, 11, 11, 11, 12, 11, 16, 17, 18, 18,
- 17, 16, 14, 13, 13, 12, 11, 11, 11, 11, 11, 11, 15, 16, 17, 18, 17, 15,
- 14, 14, 13, 12, 11, 11, 10, 10, 10, 10, 15, 16, 17, 17, 16, 16, 14, 13,
- 13, 12, 11, 11, 10, 10, 10, 10, 14, 15, 16, 17, 16, 15, 14, 13, 13, 12,
- 12, 11, 10, 10, 10, 9, 14, 15, 15, 16, 16, 15, 14, 13, 13, 12, 11, 11,
- 10, 10, 9, 9,
- /* Size 32x32 */
- 32, 33, 34, 33, 31, 28, 27, 25, 21, 21, 21, 21, 20, 20, 20, 19, 19, 18,
- 17, 17, 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 14, 14, 33, 33, 33, 32,
- 30, 27, 26, 24, 22, 22, 22, 22, 21, 21, 20, 20, 19, 19, 18, 17, 17, 17,
- 16, 16, 16, 16, 15, 15, 15, 15, 15, 14, 34, 33, 33, 32, 29, 26, 25, 24,
- 22, 22, 22, 23, 22, 22, 21, 20, 20, 20, 19, 18, 18, 17, 17, 17, 16, 16,
- 16, 15, 15, 15, 15, 14, 33, 32, 32, 31, 28, 26, 25, 24, 22, 22, 23, 23,
- 22, 22, 22, 21, 20, 20, 19, 18, 18, 18, 17, 17, 17, 16, 16, 16, 16, 16,
- 15, 15, 31, 30, 29, 28, 26, 24, 23, 23, 22, 22, 22, 23, 22, 22, 22, 21,
- 20, 20, 19, 19, 18, 18, 18, 17, 17, 17, 17, 16, 16, 16, 15, 15, 28, 27,
- 26, 26, 24, 22, 22, 22, 21, 22, 22, 23, 22, 22, 22, 21, 21, 20, 20, 19,
- 19, 19, 18, 18, 17, 17, 17, 16, 16, 16, 16, 16, 27, 26, 25, 25, 23, 22,
- 22, 21, 21, 21, 21, 22, 22, 22, 21, 21, 20, 20, 19, 19, 19, 18, 18, 18,
- 18, 17, 17, 17, 17, 16, 16, 16, 25, 24, 24, 24, 23, 22, 21, 21, 20, 20,
- 21, 21, 20, 20, 20, 20, 20, 19, 19, 18, 18, 18, 17, 17, 17, 17, 17, 17,
- 17, 16, 16, 16, 21, 22, 22, 22, 22, 21, 21, 20, 19, 19, 19, 19, 19, 19,
- 19, 19, 18, 18, 18, 17, 17, 17, 17, 16, 17, 17, 16, 16, 16, 16, 16, 16,
- 21, 22, 22, 22, 22, 22, 21, 20, 19, 19, 19, 19, 19, 19, 18, 18, 18, 18,
- 17, 17, 17, 16, 16, 16, 16, 16, 16, 16, 16, 16, 15, 15, 21, 22, 22, 23,
- 22, 22, 21, 21, 19, 19, 19, 19, 18, 18, 18, 18, 17, 17, 17, 16, 16, 16,
- 16, 16, 15, 16, 16, 15, 15, 15, 15, 15, 21, 22, 23, 23, 23, 23, 22, 21,
- 19, 19, 19, 18, 17, 17, 17, 17, 16, 16, 16, 16, 15, 15, 15, 15, 15, 15,
- 15, 15, 15, 15, 15, 14, 20, 21, 22, 22, 22, 22, 22, 20, 19, 19, 18, 17,
- 17, 17, 16, 16, 16, 16, 15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 20, 21, 22, 22, 22, 22, 22, 20, 19, 19, 18, 17, 17, 17, 16, 16,
- 16, 15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 20, 20,
- 21, 22, 22, 22, 21, 20, 19, 18, 18, 17, 16, 16, 16, 15, 15, 15, 14, 14,
- 14, 14, 13, 13, 14, 13, 13, 14, 13, 13, 13, 14, 19, 20, 20, 21, 21, 21,
- 21, 20, 19, 18, 18, 17, 16, 16, 15, 14, 14, 14, 14, 13, 13, 13, 13, 13,
- 13, 13, 13, 13, 13, 13, 13, 13, 19, 19, 20, 20, 20, 21, 20, 20, 18, 18,
- 17, 16, 16, 16, 15, 14, 14, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
- 13, 13, 13, 13, 18, 19, 20, 20, 20, 20, 20, 19, 18, 18, 17, 16, 16, 15,
- 15, 14, 14, 13, 13, 13, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
- 17, 18, 19, 19, 19, 20, 19, 19, 18, 17, 17, 16, 15, 15, 14, 14, 13, 13,
- 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 17, 17, 18, 18,
- 19, 19, 19, 18, 17, 17, 16, 16, 15, 15, 14, 13, 13, 13, 12, 12, 12, 12,
- 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 17, 18, 18, 18, 19, 19, 18,
- 17, 17, 16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 12, 11, 11, 11, 11, 11,
- 11, 11, 12, 11, 11, 12, 16, 17, 17, 18, 18, 19, 18, 18, 17, 16, 16, 15,
- 14, 14, 14, 13, 13, 12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
- 11, 11, 16, 16, 17, 17, 18, 18, 18, 17, 17, 16, 16, 15, 14, 14, 13, 13,
- 13, 12, 12, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 15, 16,
- 17, 17, 17, 18, 18, 17, 16, 16, 16, 15, 14, 14, 13, 13, 13, 12, 12, 12,
- 11, 11, 11, 11, 11, 10, 10, 11, 11, 11, 11, 10, 15, 16, 16, 17, 17, 17,
- 18, 17, 17, 16, 15, 15, 14, 14, 14, 13, 13, 12, 12, 12, 11, 11, 11, 11,
- 10, 10, 10, 10, 10, 10, 10, 10, 15, 16, 16, 16, 17, 17, 17, 17, 17, 16,
- 16, 15, 14, 14, 13, 13, 13, 12, 12, 12, 11, 11, 11, 10, 10, 10, 10, 10,
- 10, 10, 10, 10, 15, 15, 16, 16, 17, 17, 17, 17, 16, 16, 16, 15, 14, 14,
- 13, 13, 13, 12, 12, 12, 11, 11, 11, 10, 10, 10, 10, 10, 10, 10, 10, 10,
- 14, 15, 15, 16, 16, 16, 17, 17, 16, 16, 15, 15, 14, 14, 14, 13, 13, 12,
- 12, 12, 11, 11, 11, 11, 10, 10, 10, 10, 10, 10, 10, 9, 14, 15, 15, 16,
- 16, 16, 17, 17, 16, 16, 15, 15, 14, 14, 13, 13, 13, 12, 12, 12, 12, 11,
- 11, 11, 10, 10, 10, 10, 10, 10, 9, 9, 14, 15, 15, 16, 16, 16, 16, 16,
- 16, 16, 15, 15, 14, 14, 13, 13, 13, 12, 12, 12, 11, 11, 11, 11, 10, 10,
- 10, 10, 10, 9, 9, 9, 14, 15, 15, 15, 15, 16, 16, 16, 16, 15, 15, 15, 14,
- 14, 13, 13, 13, 12, 12, 12, 11, 11, 11, 11, 10, 10, 10, 10, 9, 9, 9, 9,
- 14, 14, 14, 15, 15, 16, 16, 16, 16, 15, 15, 14, 14, 14, 14, 13, 13, 12,
- 12, 12, 12, 11, 11, 10, 10, 10, 10, 9, 9, 9, 9, 9,
- /* Size 4x8 */
- 33, 22, 18, 16, 26, 23, 20, 17, 22, 19, 17, 16, 22, 17, 15, 14, 20, 16,
- 13, 13, 17, 15, 12, 11, 16, 16, 12, 10, 16, 15, 12, 10,
- /* Size 8x4 */
- 33, 26, 22, 22, 20, 17, 16, 16, 22, 23, 19, 17, 16, 15, 16, 15, 18, 20,
- 17, 15, 13, 12, 12, 12, 16, 17, 16, 14, 13, 11, 10, 10,
- /* Size 8x16 */
- 32, 29, 21, 20, 18, 16, 15, 15, 34, 27, 22, 22, 20, 18, 16, 16, 31, 25,
- 22, 22, 20, 18, 17, 16, 26, 22, 21, 22, 20, 19, 18, 17, 21, 21, 19, 19,
- 18, 17, 17, 17, 21, 22, 19, 18, 17, 16, 16, 16, 20, 22, 19, 17, 16, 15,
- 14, 15, 20, 22, 19, 16, 14, 14, 14, 14, 19, 21, 18, 16, 14, 13, 13, 13,
- 17, 19, 18, 15, 13, 12, 12, 12, 16, 19, 17, 15, 12, 12, 11, 12, 16, 18,
- 17, 14, 12, 11, 11, 11, 15, 17, 16, 14, 13, 11, 11, 11, 15, 17, 16, 14,
- 13, 12, 10, 10, 14, 16, 16, 14, 12, 11, 10, 10, 14, 15, 16, 14, 13, 12,
- 10, 10,
- /* Size 16x8 */
- 32, 34, 31, 26, 21, 21, 20, 20, 19, 17, 16, 16, 15, 15, 14, 14, 29, 27,
- 25, 22, 21, 22, 22, 22, 21, 19, 19, 18, 17, 17, 16, 15, 21, 22, 22, 21,
- 19, 19, 19, 19, 18, 18, 17, 17, 16, 16, 16, 16, 20, 22, 22, 22, 19, 18,
- 17, 16, 16, 15, 15, 14, 14, 14, 14, 14, 18, 20, 20, 20, 18, 17, 16, 14,
- 14, 13, 12, 12, 13, 13, 12, 13, 16, 18, 18, 19, 17, 16, 15, 14, 13, 12,
- 12, 11, 11, 12, 11, 12, 15, 16, 17, 18, 17, 16, 14, 14, 13, 12, 11, 11,
- 11, 10, 10, 10, 15, 16, 16, 17, 17, 16, 15, 14, 13, 12, 12, 11, 11, 10,
- 10, 10,
- /* Size 16x32 */
- 32, 33, 29, 27, 21, 21, 20, 20, 18, 17, 16, 15, 15, 15, 15, 14, 33, 33,
- 28, 26, 22, 22, 21, 20, 19, 18, 17, 16, 16, 16, 16, 15, 34, 32, 27, 26,
- 22, 23, 22, 21, 20, 19, 18, 17, 16, 16, 16, 15, 33, 31, 27, 25, 22, 23,
- 22, 21, 20, 19, 18, 17, 17, 17, 16, 16, 31, 28, 25, 23, 22, 22, 22, 22,
- 20, 19, 18, 17, 17, 17, 16, 16, 28, 26, 23, 22, 22, 23, 22, 22, 20, 20,
- 19, 18, 17, 17, 17, 17, 26, 25, 22, 22, 21, 22, 22, 21, 20, 19, 19, 18,
- 18, 17, 17, 17, 24, 24, 22, 21, 20, 21, 20, 20, 19, 18, 18, 17, 17, 17,
- 17, 17, 21, 22, 21, 21, 19, 19, 19, 19, 18, 17, 17, 16, 17, 17, 17, 17,
- 21, 22, 22, 21, 19, 19, 19, 18, 18, 17, 17, 16, 16, 16, 16, 16, 21, 22,
- 22, 21, 19, 18, 18, 18, 17, 17, 16, 16, 16, 16, 16, 16, 21, 23, 23, 22,
- 19, 18, 17, 17, 16, 16, 15, 15, 15, 15, 16, 15, 20, 22, 22, 21, 19, 17,
- 17, 16, 16, 15, 15, 14, 14, 15, 15, 15, 20, 22, 22, 21, 19, 17, 17, 16,
- 15, 15, 14, 14, 14, 14, 15, 14, 20, 21, 22, 21, 19, 17, 16, 16, 14, 14,
- 14, 13, 14, 14, 14, 14, 19, 20, 21, 20, 19, 17, 16, 15, 14, 13, 13, 13,
- 13, 13, 14, 14, 19, 20, 21, 20, 18, 16, 16, 15, 14, 13, 13, 13, 13, 13,
- 13, 14, 18, 20, 20, 20, 18, 16, 16, 15, 13, 13, 12, 12, 12, 13, 13, 13,
- 17, 19, 19, 19, 18, 16, 15, 14, 13, 12, 12, 12, 12, 12, 12, 13, 17, 18,
- 19, 19, 17, 16, 15, 14, 13, 12, 12, 12, 12, 12, 12, 12, 16, 18, 19, 18,
- 17, 15, 15, 14, 12, 12, 12, 11, 11, 12, 12, 12, 16, 17, 18, 18, 17, 15,
- 14, 14, 12, 12, 11, 11, 11, 11, 12, 12, 16, 17, 18, 18, 17, 15, 14, 13,
- 12, 12, 11, 11, 11, 11, 11, 12, 15, 17, 17, 18, 16, 15, 14, 13, 12, 12,
- 11, 11, 11, 11, 11, 11, 15, 17, 17, 17, 16, 15, 14, 13, 13, 12, 11, 11,
- 11, 10, 11, 11, 15, 16, 17, 17, 16, 16, 14, 13, 13, 12, 11, 11, 10, 10,
- 10, 10, 15, 16, 17, 17, 16, 16, 14, 13, 13, 12, 12, 11, 10, 10, 10, 10,
- 14, 16, 16, 17, 16, 15, 14, 14, 12, 12, 11, 11, 10, 10, 10, 10, 14, 16,
- 16, 17, 16, 15, 14, 14, 12, 12, 11, 11, 10, 10, 10, 10, 14, 16, 16, 16,
- 16, 15, 14, 13, 13, 12, 11, 11, 10, 10, 10, 10, 14, 15, 15, 16, 16, 15,
- 14, 13, 13, 12, 12, 11, 10, 10, 10, 10, 14, 15, 15, 16, 16, 14, 14, 13,
- 13, 12, 12, 11, 11, 10, 10, 9,
- /* Size 32x16 */
- 32, 33, 34, 33, 31, 28, 26, 24, 21, 21, 21, 21, 20, 20, 20, 19, 19, 18,
- 17, 17, 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 14, 14, 33, 33, 32, 31,
- 28, 26, 25, 24, 22, 22, 22, 23, 22, 22, 21, 20, 20, 20, 19, 18, 18, 17,
- 17, 17, 17, 16, 16, 16, 16, 16, 15, 15, 29, 28, 27, 27, 25, 23, 22, 22,
- 21, 22, 22, 23, 22, 22, 22, 21, 21, 20, 19, 19, 19, 18, 18, 17, 17, 17,
- 17, 16, 16, 16, 15, 15, 27, 26, 26, 25, 23, 22, 22, 21, 21, 21, 21, 22,
- 21, 21, 21, 20, 20, 20, 19, 19, 18, 18, 18, 18, 17, 17, 17, 17, 17, 16,
- 16, 16, 21, 22, 22, 22, 22, 22, 21, 20, 19, 19, 19, 19, 19, 19, 19, 19,
- 18, 18, 18, 17, 17, 17, 17, 16, 16, 16, 16, 16, 16, 16, 16, 16, 21, 22,
- 23, 23, 22, 23, 22, 21, 19, 19, 18, 18, 17, 17, 17, 17, 16, 16, 16, 16,
- 15, 15, 15, 15, 15, 16, 16, 15, 15, 15, 15, 14, 20, 21, 22, 22, 22, 22,
- 22, 20, 19, 19, 18, 17, 17, 17, 16, 16, 16, 16, 15, 15, 15, 14, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 20, 20, 21, 21, 22, 22, 21, 20, 19, 18,
- 18, 17, 16, 16, 16, 15, 15, 15, 14, 14, 14, 14, 13, 13, 13, 13, 13, 14,
- 14, 13, 13, 13, 18, 19, 20, 20, 20, 20, 20, 19, 18, 18, 17, 16, 16, 15,
- 14, 14, 14, 13, 13, 13, 12, 12, 12, 12, 13, 13, 13, 12, 12, 13, 13, 13,
- 17, 18, 19, 19, 19, 20, 19, 18, 17, 17, 17, 16, 15, 15, 14, 13, 13, 13,
- 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 17, 18, 18,
- 18, 19, 19, 18, 17, 17, 16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 12, 11,
- 11, 11, 11, 11, 12, 11, 11, 11, 12, 12, 15, 16, 17, 17, 17, 18, 18, 17,
- 16, 16, 16, 15, 14, 14, 13, 13, 13, 12, 12, 12, 11, 11, 11, 11, 11, 11,
- 11, 11, 11, 11, 11, 11, 15, 16, 16, 17, 17, 17, 18, 17, 17, 16, 16, 15,
- 14, 14, 14, 13, 13, 12, 12, 12, 11, 11, 11, 11, 11, 10, 10, 10, 10, 10,
- 10, 11, 15, 16, 16, 17, 17, 17, 17, 17, 17, 16, 16, 15, 15, 14, 14, 13,
- 13, 13, 12, 12, 12, 11, 11, 11, 10, 10, 10, 10, 10, 10, 10, 10, 15, 16,
- 16, 16, 16, 17, 17, 17, 17, 16, 16, 16, 15, 15, 14, 14, 13, 13, 12, 12,
- 12, 12, 11, 11, 11, 10, 10, 10, 10, 10, 10, 10, 14, 15, 15, 16, 16, 17,
- 17, 17, 17, 16, 16, 15, 15, 14, 14, 14, 14, 13, 13, 12, 12, 12, 12, 11,
- 11, 10, 10, 10, 10, 10, 10, 9,
- /* Size 4x16 */
- 33, 21, 17, 15, 32, 23, 19, 16, 28, 22, 19, 17, 25, 22, 19, 17, 22, 19,
- 17, 17, 22, 18, 17, 16, 22, 17, 15, 15, 21, 17, 14, 14, 20, 16, 13, 13,
- 19, 16, 12, 12, 18, 15, 12, 12, 17, 15, 12, 11, 17, 15, 12, 10, 16, 16,
- 12, 10, 16, 15, 12, 10, 15, 15, 12, 10,
- /* Size 16x4 */
- 33, 32, 28, 25, 22, 22, 22, 21, 20, 19, 18, 17, 17, 16, 16, 15, 21, 23,
- 22, 22, 19, 18, 17, 17, 16, 16, 15, 15, 15, 16, 15, 15, 17, 19, 19, 19,
- 17, 17, 15, 14, 13, 12, 12, 12, 12, 12, 12, 12, 15, 16, 17, 17, 17, 16,
- 15, 14, 13, 12, 12, 11, 10, 10, 10, 10,
- /* Size 8x32 */
- 32, 29, 21, 20, 18, 16, 15, 15, 33, 28, 22, 21, 19, 17, 16, 16, 34, 27,
- 22, 22, 20, 18, 16, 16, 33, 27, 22, 22, 20, 18, 17, 16, 31, 25, 22, 22,
- 20, 18, 17, 16, 28, 23, 22, 22, 20, 19, 17, 17, 26, 22, 21, 22, 20, 19,
- 18, 17, 24, 22, 20, 20, 19, 18, 17, 17, 21, 21, 19, 19, 18, 17, 17, 17,
- 21, 22, 19, 19, 18, 17, 16, 16, 21, 22, 19, 18, 17, 16, 16, 16, 21, 23,
- 19, 17, 16, 15, 15, 16, 20, 22, 19, 17, 16, 15, 14, 15, 20, 22, 19, 17,
- 15, 14, 14, 15, 20, 22, 19, 16, 14, 14, 14, 14, 19, 21, 19, 16, 14, 13,
- 13, 14, 19, 21, 18, 16, 14, 13, 13, 13, 18, 20, 18, 16, 13, 12, 12, 13,
- 17, 19, 18, 15, 13, 12, 12, 12, 17, 19, 17, 15, 13, 12, 12, 12, 16, 19,
- 17, 15, 12, 12, 11, 12, 16, 18, 17, 14, 12, 11, 11, 12, 16, 18, 17, 14,
- 12, 11, 11, 11, 15, 17, 16, 14, 12, 11, 11, 11, 15, 17, 16, 14, 13, 11,
- 11, 11, 15, 17, 16, 14, 13, 11, 10, 10, 15, 17, 16, 14, 13, 12, 10, 10,
- 14, 16, 16, 14, 12, 11, 10, 10, 14, 16, 16, 14, 12, 11, 10, 10, 14, 16,
- 16, 14, 13, 11, 10, 10, 14, 15, 16, 14, 13, 12, 10, 10, 14, 15, 16, 14,
- 13, 12, 11, 10,
- /* Size 32x8 */
- 32, 33, 34, 33, 31, 28, 26, 24, 21, 21, 21, 21, 20, 20, 20, 19, 19, 18,
- 17, 17, 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 14, 14, 29, 28, 27, 27,
- 25, 23, 22, 22, 21, 22, 22, 23, 22, 22, 22, 21, 21, 20, 19, 19, 19, 18,
- 18, 17, 17, 17, 17, 16, 16, 16, 15, 15, 21, 22, 22, 22, 22, 22, 21, 20,
- 19, 19, 19, 19, 19, 19, 19, 19, 18, 18, 18, 17, 17, 17, 17, 16, 16, 16,
- 16, 16, 16, 16, 16, 16, 20, 21, 22, 22, 22, 22, 22, 20, 19, 19, 18, 17,
- 17, 17, 16, 16, 16, 16, 15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 18, 19, 20, 20, 20, 20, 20, 19, 18, 18, 17, 16, 16, 15, 14, 14,
- 14, 13, 13, 13, 12, 12, 12, 12, 13, 13, 13, 12, 12, 13, 13, 13, 16, 17,
- 18, 18, 18, 19, 19, 18, 17, 17, 16, 15, 15, 14, 14, 13, 13, 12, 12, 12,
- 12, 11, 11, 11, 11, 11, 12, 11, 11, 11, 12, 12, 15, 16, 16, 17, 17, 17,
- 18, 17, 17, 16, 16, 15, 14, 14, 14, 13, 13, 12, 12, 12, 11, 11, 11, 11,
- 11, 10, 10, 10, 10, 10, 10, 11, 15, 16, 16, 16, 16, 17, 17, 17, 17, 16,
- 16, 16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 12, 11, 11, 11, 10, 10, 10,
- 10, 10, 10, 10 },
- },
- {
- { /* Luma */
- /* Size 4x4 */
- 32, 27, 16, 12, 27, 18, 13, 11, 16, 13, 9, 8, 12, 11, 8, 6,
- /* Size 8x8 */
- 32, 32, 29, 22, 18, 13, 12, 11, 32, 30, 28, 23, 19, 15, 13, 11, 29, 28,
- 21, 18, 16, 13, 12, 11, 22, 23, 18, 15, 13, 11, 10, 10, 18, 19, 16, 13,
- 11, 9, 8, 8, 13, 15, 13, 11, 9, 8, 7, 7, 12, 13, 12, 10, 8, 7, 7, 6, 11,
- 11, 11, 10, 8, 7, 6, 6,
- /* Size 16x16 */
- 32, 33, 33, 32, 30, 26, 23, 21, 18, 16, 14, 13, 12, 11, 10, 10, 33, 32,
- 32, 32, 30, 27, 25, 22, 19, 17, 16, 14, 13, 12, 11, 10, 33, 32, 31, 30,
- 28, 26, 24, 22, 19, 17, 16, 14, 13, 12, 12, 11, 32, 32, 30, 29, 28, 26,
- 24, 22, 20, 18, 16, 14, 14, 13, 12, 11, 30, 30, 28, 28, 24, 22, 20, 19,
- 17, 16, 15, 13, 12, 12, 12, 11, 26, 27, 26, 26, 22, 19, 18, 17, 15, 14,
- 13, 12, 11, 11, 11, 10, 23, 25, 24, 24, 20, 18, 16, 15, 14, 13, 12, 11,
- 11, 10, 10, 10, 21, 22, 22, 22, 19, 17, 15, 14, 13, 12, 11, 10, 10, 10,
- 9, 9, 18, 19, 19, 20, 17, 15, 14, 13, 11, 11, 10, 9, 9, 9, 9, 8, 16, 17,
- 17, 18, 16, 14, 13, 12, 11, 10, 9, 9, 8, 8, 8, 8, 14, 16, 16, 16, 15,
- 13, 12, 11, 10, 9, 9, 8, 8, 8, 8, 7, 13, 14, 14, 14, 13, 12, 11, 10, 9,
- 9, 8, 7, 7, 7, 7, 7, 12, 13, 13, 14, 12, 11, 11, 10, 9, 8, 8, 7, 7, 7,
- 6, 6, 11, 12, 12, 13, 12, 11, 10, 10, 9, 8, 8, 7, 7, 6, 6, 6, 10, 11,
- 12, 12, 12, 11, 10, 9, 9, 8, 8, 7, 6, 6, 6, 6, 10, 10, 11, 11, 11, 10,
- 10, 9, 8, 8, 7, 7, 6, 6, 6, 5,
- /* Size 32x32 */
- 32, 33, 33, 33, 33, 32, 32, 30, 30, 28, 26, 25, 23, 21, 21, 19, 18, 17,
- 16, 15, 14, 13, 13, 12, 12, 11, 11, 11, 10, 10, 10, 10, 33, 32, 32, 32,
- 32, 32, 32, 30, 30, 29, 27, 26, 24, 22, 22, 20, 19, 18, 17, 16, 15, 13,
- 13, 13, 12, 12, 12, 11, 11, 11, 10, 10, 33, 32, 32, 32, 32, 32, 32, 31,
- 30, 30, 27, 26, 25, 23, 22, 20, 19, 19, 17, 16, 16, 14, 14, 13, 13, 12,
- 12, 12, 11, 11, 10, 10, 33, 32, 32, 32, 32, 32, 32, 31, 30, 30, 28, 27,
- 25, 23, 23, 21, 19, 19, 17, 16, 16, 14, 14, 14, 13, 13, 12, 12, 12, 11,
- 11, 11, 33, 32, 32, 32, 31, 31, 30, 29, 28, 28, 26, 26, 24, 23, 22, 20,
- 19, 19, 17, 16, 16, 14, 14, 14, 13, 13, 12, 12, 12, 11, 11, 11, 32, 32,
- 32, 32, 31, 30, 30, 28, 28, 28, 26, 26, 24, 23, 22, 21, 19, 19, 18, 17,
- 16, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 11, 32, 32, 32, 32, 30, 30,
- 29, 28, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 14,
- 14, 13, 13, 12, 12, 12, 11, 11, 30, 30, 31, 31, 29, 28, 28, 26, 25, 24,
- 23, 22, 22, 20, 20, 19, 18, 17, 16, 16, 15, 14, 14, 13, 13, 13, 12, 12,
- 12, 12, 11, 11, 30, 30, 30, 30, 28, 28, 28, 25, 24, 23, 22, 21, 20, 19,
- 19, 18, 17, 17, 16, 15, 15, 13, 13, 13, 12, 12, 12, 12, 12, 11, 11, 11,
- 28, 29, 30, 30, 28, 28, 27, 24, 23, 21, 20, 20, 19, 18, 18, 17, 16, 16,
- 15, 14, 14, 13, 13, 13, 12, 12, 12, 12, 11, 11, 11, 10, 26, 27, 27, 28,
- 26, 26, 26, 23, 22, 20, 19, 19, 18, 17, 17, 16, 15, 15, 14, 13, 13, 12,
- 12, 12, 11, 12, 11, 11, 11, 11, 10, 10, 25, 26, 26, 27, 26, 26, 25, 22,
- 21, 20, 19, 18, 17, 17, 16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11,
- 11, 11, 11, 10, 10, 10, 23, 24, 25, 25, 24, 24, 24, 22, 20, 19, 18, 17,
- 16, 16, 15, 14, 14, 14, 13, 12, 12, 11, 11, 11, 11, 10, 10, 10, 10, 10,
- 10, 10, 21, 22, 23, 23, 23, 23, 23, 20, 19, 18, 17, 17, 16, 15, 14, 13,
- 13, 13, 12, 12, 11, 11, 11, 10, 10, 10, 10, 10, 10, 10, 9, 9, 21, 22,
- 22, 23, 22, 22, 22, 20, 19, 18, 17, 16, 15, 14, 14, 13, 13, 12, 12, 11,
- 11, 10, 10, 10, 10, 10, 10, 10, 9, 9, 9, 9, 19, 20, 20, 21, 20, 21, 21,
- 19, 18, 17, 16, 15, 14, 13, 13, 12, 12, 12, 11, 11, 11, 10, 10, 10, 9,
- 9, 9, 9, 9, 9, 9, 9, 18, 19, 19, 19, 19, 19, 20, 18, 17, 16, 15, 15, 14,
- 13, 13, 12, 11, 11, 11, 10, 10, 9, 9, 9, 9, 9, 9, 9, 9, 8, 8, 9, 17, 18,
- 19, 19, 19, 19, 19, 17, 17, 16, 15, 14, 14, 13, 12, 12, 11, 11, 10, 10,
- 10, 9, 9, 9, 9, 8, 9, 8, 8, 8, 8, 8, 16, 17, 17, 17, 17, 18, 18, 16, 16,
- 15, 14, 14, 13, 12, 12, 11, 11, 10, 10, 9, 9, 9, 9, 8, 8, 8, 8, 8, 8, 8,
- 8, 8, 15, 16, 16, 16, 16, 17, 17, 16, 15, 14, 13, 13, 12, 12, 11, 11,
- 10, 10, 9, 9, 9, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 14, 15, 16, 16, 16,
- 16, 16, 15, 15, 14, 13, 13, 12, 11, 11, 11, 10, 10, 9, 9, 9, 8, 8, 8, 8,
- 8, 8, 7, 8, 7, 7, 7, 13, 13, 14, 14, 14, 15, 15, 14, 13, 13, 12, 12, 11,
- 11, 10, 10, 9, 9, 9, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 13, 13, 14,
- 14, 14, 14, 14, 14, 13, 13, 12, 12, 11, 11, 10, 10, 9, 9, 9, 8, 8, 8, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 12, 13, 13, 14, 14, 14, 14, 13, 13, 13, 12,
- 12, 11, 10, 10, 10, 9, 9, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 12,
- 12, 13, 13, 13, 13, 14, 13, 12, 12, 11, 11, 11, 10, 10, 9, 9, 9, 8, 8,
- 8, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 11, 12, 12, 13, 13, 13, 13, 13, 12,
- 12, 12, 11, 10, 10, 10, 9, 9, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6,
- 6, 11, 12, 12, 12, 12, 12, 13, 12, 12, 12, 11, 11, 10, 10, 10, 9, 9, 9,
- 8, 8, 8, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 11, 11, 12, 12, 12, 12, 12,
- 12, 12, 12, 11, 11, 10, 10, 10, 9, 9, 8, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6,
- 6, 6, 6, 6, 10, 11, 11, 12, 12, 12, 12, 12, 12, 11, 11, 11, 10, 10, 9,
- 9, 9, 8, 8, 8, 8, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 10, 11, 11, 11, 11,
- 11, 12, 12, 11, 11, 11, 10, 10, 10, 9, 9, 8, 8, 8, 8, 7, 7, 7, 7, 6, 6,
- 6, 6, 6, 6, 6, 5, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 10, 10, 10, 9,
- 9, 9, 8, 8, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 5, 5, 10, 10, 10, 11,
- 11, 11, 11, 11, 11, 10, 10, 10, 10, 9, 9, 9, 9, 8, 8, 7, 7, 7, 7, 6, 6,
- 6, 6, 6, 6, 5, 5, 5,
- /* Size 4x8 */
- 32, 27, 17, 12, 32, 26, 18, 13, 30, 20, 15, 12, 23, 17, 12, 10, 19, 15,
- 10, 9, 14, 12, 9, 8, 12, 12, 8, 7, 11, 10, 8, 6,
- /* Size 8x4 */
- 32, 32, 30, 23, 19, 14, 12, 11, 27, 26, 20, 17, 15, 12, 12, 10, 17, 18,
- 15, 12, 10, 9, 8, 8, 12, 13, 12, 10, 9, 8, 7, 6,
- /* Size 8x16 */
- 32, 32, 28, 23, 18, 13, 12, 11, 33, 32, 29, 25, 19, 14, 13, 12, 32, 31,
- 28, 24, 19, 14, 13, 12, 32, 30, 27, 24, 20, 15, 13, 12, 30, 28, 23, 20,
- 17, 14, 13, 12, 26, 26, 20, 18, 15, 12, 12, 11, 23, 24, 19, 16, 14, 11,
- 11, 11, 21, 22, 18, 15, 13, 11, 10, 10, 18, 19, 16, 14, 11, 9, 9, 9, 16,
- 17, 15, 13, 11, 9, 8, 8, 14, 16, 14, 12, 10, 8, 8, 8, 13, 14, 13, 11, 9,
- 8, 7, 7, 12, 13, 12, 11, 9, 7, 7, 7, 11, 12, 12, 10, 9, 8, 7, 6, 10, 12,
- 12, 10, 8, 7, 6, 6, 10, 11, 11, 10, 9, 7, 6, 6,
- /* Size 16x8 */
- 32, 33, 32, 32, 30, 26, 23, 21, 18, 16, 14, 13, 12, 11, 10, 10, 32, 32,
- 31, 30, 28, 26, 24, 22, 19, 17, 16, 14, 13, 12, 12, 11, 28, 29, 28, 27,
- 23, 20, 19, 18, 16, 15, 14, 13, 12, 12, 12, 11, 23, 25, 24, 24, 20, 18,
- 16, 15, 14, 13, 12, 11, 11, 10, 10, 10, 18, 19, 19, 20, 17, 15, 14, 13,
- 11, 11, 10, 9, 9, 9, 8, 9, 13, 14, 14, 15, 14, 12, 11, 11, 9, 9, 8, 8,
- 7, 8, 7, 7, 12, 13, 13, 13, 13, 12, 11, 10, 9, 8, 8, 7, 7, 7, 6, 6, 11,
- 12, 12, 12, 12, 11, 11, 10, 9, 8, 8, 7, 7, 6, 6, 6,
- /* Size 16x32 */
- 32, 33, 32, 32, 28, 26, 23, 19, 18, 16, 13, 13, 12, 11, 11, 11, 33, 32,
- 32, 32, 29, 27, 24, 20, 19, 17, 14, 13, 12, 12, 12, 11, 33, 32, 32, 32,
- 29, 27, 25, 20, 19, 17, 14, 14, 13, 12, 12, 11, 33, 32, 32, 31, 30, 28,
- 25, 21, 19, 17, 14, 14, 13, 12, 12, 12, 32, 32, 31, 30, 28, 26, 24, 20,
- 19, 17, 14, 14, 13, 13, 12, 12, 32, 32, 30, 30, 28, 26, 24, 21, 19, 18,
- 15, 14, 13, 13, 12, 12, 32, 31, 30, 29, 27, 26, 24, 21, 20, 18, 15, 15,
- 13, 13, 12, 12, 30, 30, 29, 28, 24, 23, 21, 19, 18, 16, 14, 14, 13, 13,
- 13, 12, 30, 30, 28, 28, 23, 22, 20, 18, 17, 16, 14, 13, 13, 12, 12, 12,
- 28, 30, 28, 27, 21, 20, 19, 17, 16, 15, 13, 13, 12, 12, 12, 12, 26, 28,
- 26, 26, 20, 19, 18, 16, 15, 14, 12, 12, 12, 12, 11, 12, 26, 27, 26, 25,
- 20, 19, 17, 15, 15, 14, 12, 12, 11, 11, 11, 11, 23, 25, 24, 24, 19, 18,
- 16, 14, 14, 13, 11, 11, 11, 11, 11, 11, 22, 23, 23, 22, 18, 17, 16, 14,
- 13, 12, 11, 11, 10, 10, 10, 10, 21, 22, 22, 22, 18, 17, 15, 13, 13, 12,
- 11, 10, 10, 10, 10, 10, 19, 21, 20, 20, 17, 16, 14, 12, 12, 11, 10, 10,
- 9, 9, 10, 9, 18, 19, 19, 19, 16, 15, 14, 12, 11, 11, 9, 9, 9, 9, 9, 9,
- 17, 19, 19, 19, 16, 15, 14, 12, 11, 10, 9, 9, 9, 9, 9, 9, 16, 17, 17,
- 18, 15, 14, 13, 11, 11, 10, 9, 9, 8, 8, 8, 9, 15, 16, 17, 17, 14, 13,
- 12, 11, 10, 9, 8, 8, 8, 8, 8, 8, 14, 16, 16, 16, 14, 13, 12, 11, 10, 9,
- 8, 8, 8, 8, 8, 8, 13, 14, 14, 15, 13, 12, 11, 10, 9, 9, 8, 8, 7, 8, 8,
- 7, 13, 14, 14, 14, 13, 12, 11, 10, 9, 9, 8, 7, 7, 7, 7, 7, 12, 14, 14,
- 14, 13, 12, 11, 10, 9, 8, 8, 7, 7, 7, 7, 7, 12, 13, 13, 13, 12, 11, 11,
- 9, 9, 8, 7, 7, 7, 7, 7, 7, 11, 12, 13, 13, 12, 12, 10, 9, 9, 8, 8, 7, 7,
- 7, 6, 6, 11, 12, 12, 13, 12, 11, 10, 10, 9, 8, 8, 7, 7, 6, 6, 6, 11, 12,
- 12, 12, 12, 11, 10, 10, 9, 8, 7, 7, 7, 6, 6, 6, 10, 12, 12, 12, 12, 11,
- 10, 9, 8, 8, 7, 7, 6, 6, 6, 6, 10, 11, 11, 12, 11, 10, 10, 9, 9, 8, 7,
- 7, 6, 6, 6, 6, 10, 11, 11, 11, 11, 10, 10, 9, 9, 8, 7, 7, 6, 6, 6, 6,
- 10, 11, 11, 11, 11, 10, 10, 9, 9, 8, 8, 7, 7, 6, 6, 5,
- /* Size 32x16 */
- 32, 33, 33, 33, 32, 32, 32, 30, 30, 28, 26, 26, 23, 22, 21, 19, 18, 17,
- 16, 15, 14, 13, 13, 12, 12, 11, 11, 11, 10, 10, 10, 10, 33, 32, 32, 32,
- 32, 32, 31, 30, 30, 30, 28, 27, 25, 23, 22, 21, 19, 19, 17, 16, 16, 14,
- 14, 14, 13, 12, 12, 12, 12, 11, 11, 11, 32, 32, 32, 32, 31, 30, 30, 29,
- 28, 28, 26, 26, 24, 23, 22, 20, 19, 19, 17, 17, 16, 14, 14, 14, 13, 13,
- 12, 12, 12, 11, 11, 11, 32, 32, 32, 31, 30, 30, 29, 28, 28, 27, 26, 25,
- 24, 22, 22, 20, 19, 19, 18, 17, 16, 15, 14, 14, 13, 13, 13, 12, 12, 12,
- 11, 11, 28, 29, 29, 30, 28, 28, 27, 24, 23, 21, 20, 20, 19, 18, 18, 17,
- 16, 16, 15, 14, 14, 13, 13, 13, 12, 12, 12, 12, 12, 11, 11, 11, 26, 27,
- 27, 28, 26, 26, 26, 23, 22, 20, 19, 19, 18, 17, 17, 16, 15, 15, 14, 13,
- 13, 12, 12, 12, 11, 12, 11, 11, 11, 10, 10, 10, 23, 24, 25, 25, 24, 24,
- 24, 21, 20, 19, 18, 17, 16, 16, 15, 14, 14, 14, 13, 12, 12, 11, 11, 11,
- 11, 10, 10, 10, 10, 10, 10, 10, 19, 20, 20, 21, 20, 21, 21, 19, 18, 17,
- 16, 15, 14, 14, 13, 12, 12, 12, 11, 11, 11, 10, 10, 10, 9, 9, 10, 10, 9,
- 9, 9, 9, 18, 19, 19, 19, 19, 19, 20, 18, 17, 16, 15, 15, 14, 13, 13, 12,
- 11, 11, 11, 10, 10, 9, 9, 9, 9, 9, 9, 9, 8, 9, 9, 9, 16, 17, 17, 17, 17,
- 18, 18, 16, 16, 15, 14, 14, 13, 12, 12, 11, 11, 10, 10, 9, 9, 9, 9, 8,
- 8, 8, 8, 8, 8, 8, 8, 8, 13, 14, 14, 14, 14, 15, 15, 14, 14, 13, 12, 12,
- 11, 11, 11, 10, 9, 9, 9, 8, 8, 8, 8, 8, 7, 8, 8, 7, 7, 7, 7, 8, 13, 13,
- 14, 14, 14, 14, 15, 14, 13, 13, 12, 12, 11, 11, 10, 10, 9, 9, 9, 8, 8,
- 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 12, 12, 13, 13, 13, 13, 13, 13, 13, 12,
- 12, 11, 11, 10, 10, 9, 9, 9, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 7,
- 11, 12, 12, 12, 13, 13, 13, 13, 12, 12, 12, 11, 11, 10, 10, 9, 9, 9, 8,
- 8, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 11, 12, 12, 12, 12, 12, 12, 13,
- 12, 12, 11, 11, 11, 10, 10, 10, 9, 9, 8, 8, 8, 8, 7, 7, 7, 6, 6, 6, 6,
- 6, 6, 6, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 11, 11, 10, 10, 9,
- 9, 9, 9, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 5,
- /* Size 4x16 */
- 33, 26, 16, 11, 32, 27, 17, 12, 32, 26, 17, 13, 31, 26, 18, 13, 30, 22,
- 16, 12, 28, 19, 14, 12, 25, 18, 13, 11, 22, 17, 12, 10, 19, 15, 11, 9,
- 17, 14, 10, 8, 16, 13, 9, 8, 14, 12, 9, 7, 13, 11, 8, 7, 12, 11, 8, 6,
- 12, 11, 8, 6, 11, 10, 8, 6,
- /* Size 16x4 */
- 33, 32, 32, 31, 30, 28, 25, 22, 19, 17, 16, 14, 13, 12, 12, 11, 26, 27,
- 26, 26, 22, 19, 18, 17, 15, 14, 13, 12, 11, 11, 11, 10, 16, 17, 17, 18,
- 16, 14, 13, 12, 11, 10, 9, 9, 8, 8, 8, 8, 11, 12, 13, 13, 12, 12, 11,
- 10, 9, 8, 8, 7, 7, 6, 6, 6,
- /* Size 8x32 */
- 32, 32, 28, 23, 18, 13, 12, 11, 33, 32, 29, 24, 19, 14, 12, 12, 33, 32,
- 29, 25, 19, 14, 13, 12, 33, 32, 30, 25, 19, 14, 13, 12, 32, 31, 28, 24,
- 19, 14, 13, 12, 32, 30, 28, 24, 19, 15, 13, 12, 32, 30, 27, 24, 20, 15,
- 13, 12, 30, 29, 24, 21, 18, 14, 13, 13, 30, 28, 23, 20, 17, 14, 13, 12,
- 28, 28, 21, 19, 16, 13, 12, 12, 26, 26, 20, 18, 15, 12, 12, 11, 26, 26,
- 20, 17, 15, 12, 11, 11, 23, 24, 19, 16, 14, 11, 11, 11, 22, 23, 18, 16,
- 13, 11, 10, 10, 21, 22, 18, 15, 13, 11, 10, 10, 19, 20, 17, 14, 12, 10,
- 9, 10, 18, 19, 16, 14, 11, 9, 9, 9, 17, 19, 16, 14, 11, 9, 9, 9, 16, 17,
- 15, 13, 11, 9, 8, 8, 15, 17, 14, 12, 10, 8, 8, 8, 14, 16, 14, 12, 10, 8,
- 8, 8, 13, 14, 13, 11, 9, 8, 7, 8, 13, 14, 13, 11, 9, 8, 7, 7, 12, 14,
- 13, 11, 9, 8, 7, 7, 12, 13, 12, 11, 9, 7, 7, 7, 11, 13, 12, 10, 9, 8, 7,
- 6, 11, 12, 12, 10, 9, 8, 7, 6, 11, 12, 12, 10, 9, 7, 7, 6, 10, 12, 12,
- 10, 8, 7, 6, 6, 10, 11, 11, 10, 9, 7, 6, 6, 10, 11, 11, 10, 9, 7, 6, 6,
- 10, 11, 11, 10, 9, 8, 7, 6,
- /* Size 32x8 */
- 32, 33, 33, 33, 32, 32, 32, 30, 30, 28, 26, 26, 23, 22, 21, 19, 18, 17,
- 16, 15, 14, 13, 13, 12, 12, 11, 11, 11, 10, 10, 10, 10, 32, 32, 32, 32,
- 31, 30, 30, 29, 28, 28, 26, 26, 24, 23, 22, 20, 19, 19, 17, 17, 16, 14,
- 14, 14, 13, 13, 12, 12, 12, 11, 11, 11, 28, 29, 29, 30, 28, 28, 27, 24,
- 23, 21, 20, 20, 19, 18, 18, 17, 16, 16, 15, 14, 14, 13, 13, 13, 12, 12,
- 12, 12, 12, 11, 11, 11, 23, 24, 25, 25, 24, 24, 24, 21, 20, 19, 18, 17,
- 16, 16, 15, 14, 14, 14, 13, 12, 12, 11, 11, 11, 11, 10, 10, 10, 10, 10,
- 10, 10, 18, 19, 19, 19, 19, 19, 20, 18, 17, 16, 15, 15, 14, 13, 13, 12,
- 11, 11, 11, 10, 10, 9, 9, 9, 9, 9, 9, 9, 8, 9, 9, 9, 13, 14, 14, 14, 14,
- 15, 15, 14, 14, 13, 12, 12, 11, 11, 11, 10, 9, 9, 9, 8, 8, 8, 8, 8, 7,
- 8, 8, 7, 7, 7, 7, 8, 12, 12, 13, 13, 13, 13, 13, 13, 13, 12, 12, 11, 11,
- 10, 10, 9, 9, 9, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 7, 11, 12, 12,
- 12, 12, 12, 12, 13, 12, 12, 11, 11, 11, 10, 10, 10, 9, 9, 8, 8, 8, 8, 7,
- 7, 7, 6, 6, 6, 6, 6, 6, 6 },
- { /* Chroma */
- /* Size 4x4 */
- 32, 23, 19, 16, 23, 19, 17, 15, 19, 17, 13, 12, 16, 15, 12, 10,
- /* Size 8x8 */
- 33, 28, 22, 21, 20, 17, 16, 15, 28, 24, 22, 22, 21, 19, 17, 16, 22, 22,
- 19, 19, 19, 17, 16, 16, 21, 22, 19, 17, 16, 15, 14, 14, 20, 21, 19, 16,
- 14, 13, 13, 13, 17, 19, 17, 15, 13, 12, 12, 12, 16, 17, 16, 14, 13, 12,
- 11, 10, 15, 16, 16, 14, 13, 12, 10, 10,
- /* Size 16x16 */
- 32, 34, 31, 28, 23, 21, 21, 20, 19, 18, 17, 16, 15, 15, 15, 14, 34, 33,
- 29, 26, 23, 22, 22, 22, 20, 19, 19, 17, 17, 16, 16, 15, 31, 29, 26, 24,
- 22, 22, 23, 22, 21, 20, 19, 18, 17, 17, 16, 16, 28, 26, 24, 22, 22, 22,
- 23, 22, 21, 20, 20, 19, 18, 18, 17, 16, 23, 23, 22, 22, 20, 20, 20, 20,
- 19, 19, 18, 17, 17, 17, 16, 17, 21, 22, 22, 22, 20, 19, 19, 18, 18, 17,
- 17, 16, 16, 16, 16, 16, 21, 22, 23, 23, 20, 19, 18, 17, 17, 16, 16, 15,
- 15, 15, 15, 15, 20, 22, 22, 22, 20, 18, 17, 17, 16, 15, 15, 14, 14, 14,
- 14, 14, 19, 20, 21, 21, 19, 18, 17, 16, 15, 14, 14, 13, 13, 13, 13, 13,
- 18, 19, 20, 20, 19, 17, 16, 15, 14, 13, 13, 12, 12, 12, 12, 12, 17, 19,
- 19, 20, 18, 17, 16, 15, 14, 13, 12, 12, 12, 12, 12, 12, 16, 17, 18, 19,
- 17, 16, 15, 14, 13, 12, 12, 11, 11, 11, 11, 11, 15, 17, 17, 18, 17, 16,
- 15, 14, 13, 12, 12, 11, 11, 11, 11, 11, 15, 16, 17, 18, 17, 16, 15, 14,
- 13, 12, 12, 11, 11, 10, 10, 10, 15, 16, 16, 17, 16, 16, 15, 14, 13, 12,
- 12, 11, 11, 10, 10, 10, 14, 15, 16, 16, 17, 16, 15, 14, 13, 12, 12, 11,
- 11, 10, 10, 10,
- /* Size 32x32 */
- 32, 33, 34, 34, 31, 29, 28, 25, 23, 21, 21, 21, 21, 20, 20, 20, 19, 19,
- 18, 17, 17, 16, 16, 16, 15, 15, 15, 15, 15, 14, 14, 14, 33, 33, 33, 33,
- 30, 28, 27, 24, 23, 22, 22, 22, 22, 21, 21, 20, 20, 20, 19, 18, 18, 17,
- 17, 17, 16, 16, 16, 16, 15, 15, 15, 15, 34, 33, 33, 33, 29, 28, 26, 24,
- 23, 22, 22, 22, 22, 22, 22, 21, 20, 20, 19, 19, 19, 18, 17, 17, 17, 16,
- 16, 16, 16, 15, 15, 15, 34, 33, 33, 32, 29, 28, 26, 24, 23, 22, 23, 23,
- 23, 22, 22, 22, 21, 20, 20, 19, 19, 18, 18, 18, 17, 17, 17, 16, 16, 16,
- 16, 16, 31, 30, 29, 29, 26, 25, 24, 23, 22, 22, 22, 22, 23, 22, 22, 22,
- 21, 21, 20, 19, 19, 18, 18, 18, 17, 17, 17, 17, 16, 16, 16, 16, 29, 28,
- 28, 28, 25, 24, 23, 22, 22, 22, 22, 22, 23, 22, 22, 22, 21, 21, 20, 20,
- 19, 19, 18, 18, 18, 17, 17, 17, 17, 16, 16, 16, 28, 27, 26, 26, 24, 23,
- 22, 22, 22, 21, 22, 22, 23, 22, 22, 22, 21, 21, 20, 20, 20, 19, 19, 19,
- 18, 18, 18, 17, 17, 17, 16, 16, 25, 24, 24, 24, 23, 22, 22, 21, 21, 20,
- 21, 21, 21, 20, 20, 20, 20, 20, 19, 19, 19, 18, 18, 18, 17, 17, 17, 17,
- 17, 17, 17, 17, 23, 23, 23, 23, 22, 22, 22, 21, 20, 20, 20, 20, 20, 20,
- 20, 20, 19, 19, 19, 18, 18, 17, 17, 17, 17, 17, 17, 17, 16, 17, 17, 17,
- 21, 22, 22, 22, 22, 22, 21, 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 18, 18, 18, 17, 17, 17, 16, 16, 16, 16, 16, 16, 16, 16, 21, 22, 22, 23,
- 22, 22, 22, 21, 20, 19, 19, 19, 19, 18, 18, 18, 18, 17, 17, 17, 17, 16,
- 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 21, 22, 22, 23, 22, 22, 22, 21,
- 20, 19, 19, 19, 18, 18, 18, 18, 17, 17, 17, 17, 16, 16, 16, 16, 16, 15,
- 15, 15, 15, 15, 15, 15, 21, 22, 22, 23, 23, 23, 23, 21, 20, 19, 19, 18,
- 18, 17, 17, 17, 17, 17, 16, 16, 16, 15, 15, 15, 15, 15, 15, 15, 15, 15,
- 15, 15, 20, 21, 22, 22, 22, 22, 22, 20, 20, 19, 18, 18, 17, 17, 17, 16,
- 16, 16, 16, 15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 20, 21,
- 22, 22, 22, 22, 22, 20, 20, 19, 18, 18, 17, 17, 17, 16, 16, 16, 15, 15,
- 15, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 20, 20, 21, 22, 22, 22,
- 22, 20, 20, 19, 18, 18, 17, 16, 16, 16, 15, 15, 15, 14, 14, 14, 14, 14,
- 13, 13, 14, 13, 13, 14, 14, 13, 19, 20, 20, 21, 21, 21, 21, 20, 19, 19,
- 18, 17, 17, 16, 16, 15, 15, 15, 14, 14, 14, 13, 13, 13, 13, 13, 13, 13,
- 13, 13, 13, 13, 19, 20, 20, 20, 21, 21, 21, 20, 19, 19, 17, 17, 17, 16,
- 16, 15, 15, 14, 14, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
- 18, 19, 19, 20, 20, 20, 20, 19, 19, 18, 17, 17, 16, 16, 15, 15, 14, 14,
- 13, 13, 13, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 17, 18, 19, 19,
- 19, 20, 20, 19, 18, 18, 17, 17, 16, 15, 15, 14, 14, 14, 13, 13, 13, 12,
- 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 17, 18, 19, 19, 19, 19, 20, 19,
- 18, 18, 17, 16, 16, 15, 15, 14, 14, 13, 13, 13, 12, 12, 12, 12, 12, 12,
- 12, 12, 12, 12, 12, 12, 16, 17, 18, 18, 18, 19, 19, 18, 17, 17, 16, 16,
- 15, 15, 14, 14, 13, 13, 12, 12, 12, 12, 12, 11, 11, 12, 11, 12, 11, 12,
- 12, 12, 16, 17, 17, 18, 18, 18, 19, 18, 17, 17, 16, 16, 15, 14, 14, 14,
- 13, 13, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 16, 17,
- 17, 18, 18, 18, 19, 18, 17, 17, 16, 16, 15, 14, 14, 14, 13, 13, 12, 12,
- 12, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 15, 16, 17, 17, 17, 18,
- 18, 17, 17, 16, 16, 16, 15, 14, 14, 13, 13, 13, 12, 12, 12, 11, 11, 11,
- 11, 11, 11, 11, 11, 11, 11, 11, 15, 16, 16, 17, 17, 17, 18, 17, 17, 16,
- 16, 15, 15, 14, 14, 13, 13, 13, 12, 12, 12, 12, 11, 11, 11, 11, 11, 10,
- 10, 10, 11, 10, 15, 16, 16, 17, 17, 17, 18, 17, 17, 16, 16, 15, 15, 14,
- 14, 14, 13, 13, 12, 12, 12, 11, 11, 11, 11, 11, 10, 10, 10, 10, 10, 10,
- 15, 16, 16, 16, 17, 17, 17, 17, 17, 16, 16, 15, 15, 14, 14, 13, 13, 13,
- 12, 12, 12, 12, 11, 11, 11, 10, 10, 10, 10, 10, 10, 10, 15, 15, 16, 16,
- 16, 17, 17, 17, 16, 16, 16, 15, 15, 14, 14, 13, 13, 13, 12, 12, 12, 11,
- 11, 11, 11, 10, 10, 10, 10, 10, 10, 10, 14, 15, 15, 16, 16, 16, 17, 17,
- 17, 16, 16, 15, 15, 14, 14, 14, 13, 13, 12, 12, 12, 12, 11, 11, 11, 10,
- 10, 10, 10, 10, 10, 10, 14, 15, 15, 16, 16, 16, 16, 17, 17, 16, 16, 15,
- 15, 14, 14, 14, 13, 13, 12, 12, 12, 12, 11, 11, 11, 11, 10, 10, 10, 10,
- 10, 10, 14, 15, 15, 16, 16, 16, 16, 17, 17, 16, 16, 15, 15, 14, 14, 13,
- 13, 13, 13, 12, 12, 12, 11, 11, 11, 10, 10, 10, 10, 10, 10, 9,
- /* Size 4x8 */
- 33, 22, 19, 16, 27, 22, 20, 17, 22, 19, 18, 17, 22, 18, 16, 14, 20, 17,
- 14, 13, 18, 16, 12, 12, 17, 16, 12, 11, 16, 15, 12, 10,
- /* Size 8x4 */
- 33, 27, 22, 22, 20, 18, 17, 16, 22, 22, 19, 18, 17, 16, 16, 15, 19, 20,
- 18, 16, 14, 12, 12, 12, 16, 17, 17, 14, 13, 12, 11, 10,
- /* Size 8x16 */
- 32, 30, 21, 21, 19, 16, 15, 15, 33, 28, 22, 22, 20, 18, 17, 16, 31, 26,
- 22, 22, 21, 18, 17, 17, 28, 23, 22, 23, 21, 19, 18, 17, 23, 22, 20, 20,
- 19, 17, 17, 17, 21, 22, 19, 18, 18, 16, 16, 16, 21, 23, 19, 18, 17, 15,
- 15, 15, 20, 22, 19, 17, 16, 14, 14, 14, 19, 21, 19, 17, 15, 13, 13, 13,
- 18, 20, 18, 16, 14, 12, 12, 13, 17, 19, 18, 16, 14, 12, 12, 12, 16, 18,
- 17, 15, 13, 12, 11, 12, 16, 17, 16, 15, 13, 11, 11, 11, 15, 17, 16, 14,
- 13, 12, 11, 10, 15, 16, 16, 15, 13, 12, 11, 10, 14, 16, 16, 15, 13, 12,
- 11, 10,
- /* Size 16x8 */
- 32, 33, 31, 28, 23, 21, 21, 20, 19, 18, 17, 16, 16, 15, 15, 14, 30, 28,
- 26, 23, 22, 22, 23, 22, 21, 20, 19, 18, 17, 17, 16, 16, 21, 22, 22, 22,
- 20, 19, 19, 19, 19, 18, 18, 17, 16, 16, 16, 16, 21, 22, 22, 23, 20, 18,
- 18, 17, 17, 16, 16, 15, 15, 14, 15, 15, 19, 20, 21, 21, 19, 18, 17, 16,
- 15, 14, 14, 13, 13, 13, 13, 13, 16, 18, 18, 19, 17, 16, 15, 14, 13, 12,
- 12, 12, 11, 12, 12, 12, 15, 17, 17, 18, 17, 16, 15, 14, 13, 12, 12, 11,
- 11, 11, 11, 11, 15, 16, 17, 17, 17, 16, 15, 14, 13, 13, 12, 12, 11, 10,
- 10, 10,
- /* Size 16x32 */
- 32, 33, 30, 28, 21, 21, 21, 20, 19, 18, 16, 16, 15, 15, 15, 15, 33, 33,
- 29, 27, 22, 22, 22, 20, 20, 19, 17, 17, 16, 16, 16, 16, 33, 32, 28, 26,
- 22, 22, 22, 21, 20, 19, 18, 17, 17, 16, 16, 16, 34, 32, 28, 26, 22, 23,
- 23, 21, 21, 20, 18, 18, 17, 17, 17, 16, 31, 28, 26, 24, 22, 22, 22, 22,
- 21, 20, 18, 18, 17, 17, 17, 16, 29, 27, 24, 23, 22, 22, 23, 22, 21, 20,
- 19, 18, 18, 17, 17, 17, 28, 26, 23, 22, 22, 22, 23, 22, 21, 20, 19, 19,
- 18, 18, 17, 17, 24, 24, 23, 22, 20, 20, 21, 20, 20, 19, 18, 18, 17, 18,
- 17, 17, 23, 23, 22, 22, 20, 20, 20, 20, 19, 19, 17, 17, 17, 17, 17, 17,
- 21, 22, 22, 21, 19, 19, 19, 19, 19, 18, 17, 17, 16, 17, 17, 16, 21, 22,
- 22, 22, 19, 19, 18, 18, 18, 17, 16, 16, 16, 16, 16, 16, 21, 23, 22, 22,
- 19, 19, 18, 18, 17, 17, 16, 16, 16, 16, 16, 16, 21, 23, 23, 22, 19, 18,
- 18, 17, 17, 16, 15, 15, 15, 15, 15, 16, 20, 22, 22, 22, 19, 18, 17, 16,
- 16, 16, 15, 14, 15, 14, 15, 15, 20, 22, 22, 22, 19, 18, 17, 16, 16, 15,
- 14, 14, 14, 14, 14, 15, 20, 21, 22, 22, 19, 18, 17, 16, 15, 14, 14, 14,
- 13, 14, 14, 14, 19, 21, 21, 21, 19, 18, 17, 15, 15, 14, 13, 13, 13, 13,
- 13, 14, 19, 20, 21, 21, 19, 17, 17, 15, 15, 14, 13, 13, 13, 13, 13, 13,
- 18, 20, 20, 20, 18, 17, 16, 15, 14, 13, 12, 12, 12, 12, 13, 13, 17, 19,
- 20, 20, 18, 17, 16, 14, 14, 13, 12, 12, 12, 12, 12, 12, 17, 19, 19, 20,
- 18, 17, 16, 14, 14, 13, 12, 12, 12, 12, 12, 12, 16, 18, 18, 19, 17, 16,
- 15, 14, 13, 12, 12, 11, 11, 12, 12, 12, 16, 18, 18, 19, 17, 16, 15, 14,
- 13, 12, 12, 11, 11, 11, 12, 12, 16, 17, 18, 18, 17, 16, 15, 14, 13, 12,
- 11, 11, 11, 11, 11, 11, 16, 17, 17, 18, 16, 16, 15, 13, 13, 12, 11, 11,
- 11, 11, 11, 11, 15, 17, 17, 18, 16, 16, 15, 14, 13, 12, 12, 11, 11, 11,
- 11, 11, 15, 17, 17, 17, 16, 16, 14, 14, 13, 12, 12, 11, 11, 11, 10, 11,
- 15, 16, 17, 17, 16, 16, 14, 14, 13, 12, 12, 11, 11, 10, 10, 10, 15, 16,
- 16, 17, 16, 16, 15, 14, 13, 13, 12, 11, 11, 10, 10, 10, 14, 16, 16, 17,
- 16, 15, 15, 14, 13, 12, 12, 11, 11, 10, 10, 10, 14, 16, 16, 17, 16, 15,
- 15, 14, 13, 12, 12, 11, 11, 10, 10, 10, 14, 16, 16, 16, 16, 15, 15, 13,
- 13, 12, 12, 11, 11, 10, 10, 10,
- /* Size 32x16 */
- 32, 33, 33, 34, 31, 29, 28, 24, 23, 21, 21, 21, 21, 20, 20, 20, 19, 19,
- 18, 17, 17, 16, 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 33, 33, 32, 32,
- 28, 27, 26, 24, 23, 22, 22, 23, 23, 22, 22, 21, 21, 20, 20, 19, 19, 18,
- 18, 17, 17, 17, 17, 16, 16, 16, 16, 16, 30, 29, 28, 28, 26, 24, 23, 23,
- 22, 22, 22, 22, 23, 22, 22, 22, 21, 21, 20, 20, 19, 18, 18, 18, 17, 17,
- 17, 17, 16, 16, 16, 16, 28, 27, 26, 26, 24, 23, 22, 22, 22, 21, 22, 22,
- 22, 22, 22, 22, 21, 21, 20, 20, 20, 19, 19, 18, 18, 18, 17, 17, 17, 17,
- 17, 16, 21, 22, 22, 22, 22, 22, 22, 20, 20, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 18, 18, 18, 17, 17, 17, 16, 16, 16, 16, 16, 16, 16, 16, 21, 22,
- 22, 23, 22, 22, 22, 20, 20, 19, 19, 19, 18, 18, 18, 18, 18, 17, 17, 17,
- 17, 16, 16, 16, 16, 16, 16, 16, 16, 15, 15, 15, 21, 22, 22, 23, 22, 23,
- 23, 21, 20, 19, 18, 18, 18, 17, 17, 17, 17, 17, 16, 16, 16, 15, 15, 15,
- 15, 15, 14, 14, 15, 15, 15, 15, 20, 20, 21, 21, 22, 22, 22, 20, 20, 19,
- 18, 18, 17, 16, 16, 16, 15, 15, 15, 14, 14, 14, 14, 14, 13, 14, 14, 14,
- 14, 14, 14, 13, 19, 20, 20, 21, 21, 21, 21, 20, 19, 19, 18, 17, 17, 16,
- 16, 15, 15, 15, 14, 14, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
- 18, 19, 19, 20, 20, 20, 20, 19, 19, 18, 17, 17, 16, 16, 15, 14, 14, 14,
- 13, 13, 13, 12, 12, 12, 12, 12, 12, 12, 13, 12, 12, 12, 16, 17, 18, 18,
- 18, 19, 19, 18, 17, 17, 16, 16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 12,
- 12, 11, 11, 12, 12, 12, 12, 12, 12, 12, 16, 17, 17, 18, 18, 18, 19, 18,
- 17, 17, 16, 16, 15, 14, 14, 14, 13, 13, 12, 12, 12, 11, 11, 11, 11, 11,
- 11, 11, 11, 11, 11, 11, 15, 16, 17, 17, 17, 18, 18, 17, 17, 16, 16, 16,
- 15, 15, 14, 13, 13, 13, 12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 11, 11,
- 11, 11, 15, 16, 16, 17, 17, 17, 18, 18, 17, 17, 16, 16, 15, 14, 14, 14,
- 13, 13, 12, 12, 12, 12, 11, 11, 11, 11, 11, 10, 10, 10, 10, 10, 15, 16,
- 16, 17, 17, 17, 17, 17, 17, 17, 16, 16, 15, 15, 14, 14, 13, 13, 13, 12,
- 12, 12, 12, 11, 11, 11, 10, 10, 10, 10, 10, 10, 15, 16, 16, 16, 16, 17,
- 17, 17, 17, 16, 16, 16, 16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 12, 11,
- 11, 11, 11, 10, 10, 10, 10, 10,
- /* Size 4x16 */
- 33, 21, 18, 15, 32, 22, 19, 16, 28, 22, 20, 17, 26, 22, 20, 18, 23, 20,
- 19, 17, 22, 19, 17, 16, 23, 18, 16, 15, 22, 18, 15, 14, 21, 18, 14, 13,
- 20, 17, 13, 12, 19, 17, 13, 12, 18, 16, 12, 11, 17, 16, 12, 11, 17, 16,
- 12, 11, 16, 16, 13, 10, 16, 15, 12, 10,
- /* Size 16x4 */
- 33, 32, 28, 26, 23, 22, 23, 22, 21, 20, 19, 18, 17, 17, 16, 16, 21, 22,
- 22, 22, 20, 19, 18, 18, 18, 17, 17, 16, 16, 16, 16, 15, 18, 19, 20, 20,
- 19, 17, 16, 15, 14, 13, 13, 12, 12, 12, 13, 12, 15, 16, 17, 18, 17, 16,
- 15, 14, 13, 12, 12, 11, 11, 11, 10, 10,
- /* Size 8x32 */
- 32, 30, 21, 21, 19, 16, 15, 15, 33, 29, 22, 22, 20, 17, 16, 16, 33, 28,
- 22, 22, 20, 18, 17, 16, 34, 28, 22, 23, 21, 18, 17, 17, 31, 26, 22, 22,
- 21, 18, 17, 17, 29, 24, 22, 23, 21, 19, 18, 17, 28, 23, 22, 23, 21, 19,
- 18, 17, 24, 23, 20, 21, 20, 18, 17, 17, 23, 22, 20, 20, 19, 17, 17, 17,
- 21, 22, 19, 19, 19, 17, 16, 17, 21, 22, 19, 18, 18, 16, 16, 16, 21, 22,
- 19, 18, 17, 16, 16, 16, 21, 23, 19, 18, 17, 15, 15, 15, 20, 22, 19, 17,
- 16, 15, 15, 15, 20, 22, 19, 17, 16, 14, 14, 14, 20, 22, 19, 17, 15, 14,
- 13, 14, 19, 21, 19, 17, 15, 13, 13, 13, 19, 21, 19, 17, 15, 13, 13, 13,
- 18, 20, 18, 16, 14, 12, 12, 13, 17, 20, 18, 16, 14, 12, 12, 12, 17, 19,
- 18, 16, 14, 12, 12, 12, 16, 18, 17, 15, 13, 12, 11, 12, 16, 18, 17, 15,
- 13, 12, 11, 12, 16, 18, 17, 15, 13, 11, 11, 11, 16, 17, 16, 15, 13, 11,
- 11, 11, 15, 17, 16, 15, 13, 12, 11, 11, 15, 17, 16, 14, 13, 12, 11, 10,
- 15, 17, 16, 14, 13, 12, 11, 10, 15, 16, 16, 15, 13, 12, 11, 10, 14, 16,
- 16, 15, 13, 12, 11, 10, 14, 16, 16, 15, 13, 12, 11, 10, 14, 16, 16, 15,
- 13, 12, 11, 10,
- /* Size 32x8 */
- 32, 33, 33, 34, 31, 29, 28, 24, 23, 21, 21, 21, 21, 20, 20, 20, 19, 19,
- 18, 17, 17, 16, 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 30, 29, 28, 28,
- 26, 24, 23, 23, 22, 22, 22, 22, 23, 22, 22, 22, 21, 21, 20, 20, 19, 18,
- 18, 18, 17, 17, 17, 17, 16, 16, 16, 16, 21, 22, 22, 22, 22, 22, 22, 20,
- 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18, 18, 17, 17, 17, 16, 16,
- 16, 16, 16, 16, 16, 16, 21, 22, 22, 23, 22, 23, 23, 21, 20, 19, 18, 18,
- 18, 17, 17, 17, 17, 17, 16, 16, 16, 15, 15, 15, 15, 15, 14, 14, 15, 15,
- 15, 15, 19, 20, 20, 21, 21, 21, 21, 20, 19, 19, 18, 17, 17, 16, 16, 15,
- 15, 15, 14, 14, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 16, 17,
- 18, 18, 18, 19, 19, 18, 17, 17, 16, 16, 15, 15, 14, 14, 13, 13, 12, 12,
- 12, 12, 12, 11, 11, 12, 12, 12, 12, 12, 12, 12, 15, 16, 17, 17, 17, 18,
- 18, 17, 17, 16, 16, 16, 15, 15, 14, 13, 13, 13, 12, 12, 12, 11, 11, 11,
- 11, 11, 11, 11, 11, 11, 11, 11, 15, 16, 16, 17, 17, 17, 17, 17, 17, 17,
- 16, 16, 15, 15, 14, 14, 13, 13, 13, 12, 12, 12, 12, 11, 11, 11, 10, 10,
- 10, 10, 10, 10 },
- },
- {
- { /* Luma */
- /* Size 4x4 */
- 32, 28, 18, 13, 28, 19, 14, 11, 18, 14, 10, 8, 13, 11, 8, 7,
- /* Size 8x8 */
- 32, 32, 29, 24, 19, 15, 13, 11, 32, 31, 28, 24, 20, 16, 14, 12, 29, 28,
- 22, 20, 17, 14, 13, 12, 24, 24, 20, 16, 14, 12, 11, 10, 19, 20, 17, 14,
- 12, 10, 9, 9, 15, 16, 14, 12, 10, 9, 8, 8, 13, 14, 13, 11, 9, 8, 7, 7,
- 11, 12, 12, 10, 9, 8, 7, 6,
- /* Size 16x16 */
- 32, 33, 33, 32, 30, 28, 25, 22, 19, 17, 16, 14, 12, 12, 11, 11, 33, 32,
- 32, 32, 30, 29, 26, 23, 20, 19, 17, 15, 13, 13, 12, 11, 33, 32, 31, 31,
- 29, 28, 26, 23, 21, 19, 17, 15, 14, 13, 12, 12, 32, 32, 31, 29, 28, 27,
- 25, 23, 21, 19, 18, 16, 14, 14, 13, 12, 30, 30, 29, 28, 26, 24, 22, 20,
- 19, 18, 16, 15, 13, 13, 12, 12, 28, 29, 28, 27, 24, 21, 20, 18, 17, 16,
- 15, 14, 13, 12, 11, 11, 25, 26, 26, 25, 22, 20, 18, 17, 15, 14, 14, 12,
- 12, 11, 11, 11, 22, 23, 23, 23, 20, 18, 17, 15, 14, 13, 12, 11, 11, 10,
- 10, 10, 19, 20, 21, 21, 19, 17, 15, 14, 12, 12, 11, 10, 10, 9, 9, 9, 17,
- 19, 19, 19, 18, 16, 14, 13, 12, 11, 10, 10, 9, 9, 9, 8, 16, 17, 17, 18,
- 16, 15, 14, 12, 11, 10, 10, 9, 9, 8, 8, 8, 14, 15, 15, 16, 15, 14, 12,
- 11, 10, 10, 9, 8, 8, 8, 7, 7, 12, 13, 14, 14, 13, 13, 12, 11, 10, 9, 9,
- 8, 7, 7, 7, 7, 12, 13, 13, 14, 13, 12, 11, 10, 9, 9, 8, 8, 7, 7, 7, 6,
- 11, 12, 12, 13, 12, 11, 11, 10, 9, 9, 8, 7, 7, 7, 6, 6, 11, 11, 12, 12,
- 12, 11, 11, 10, 9, 8, 8, 7, 7, 6, 6, 6,
- /* Size 32x32 */
- 32, 33, 33, 33, 33, 33, 32, 32, 30, 29, 28, 26, 25, 23, 22, 21, 19, 18,
- 17, 16, 16, 14, 14, 13, 12, 12, 12, 11, 11, 11, 11, 10, 33, 32, 32, 32,
- 32, 32, 32, 31, 30, 29, 29, 27, 26, 24, 23, 22, 20, 19, 18, 17, 17, 15,
- 14, 13, 13, 13, 12, 12, 12, 11, 11, 11, 33, 32, 32, 32, 32, 32, 32, 31,
- 30, 30, 29, 27, 26, 24, 23, 23, 20, 20, 19, 17, 17, 15, 15, 14, 13, 13,
- 13, 12, 12, 12, 11, 11, 33, 32, 32, 32, 32, 32, 32, 31, 31, 30, 30, 28,
- 27, 25, 23, 23, 21, 20, 19, 18, 17, 16, 15, 14, 14, 14, 13, 13, 12, 12,
- 12, 11, 33, 32, 32, 32, 31, 31, 31, 30, 29, 28, 28, 26, 26, 24, 23, 23,
- 21, 20, 19, 18, 17, 16, 15, 14, 14, 14, 13, 13, 12, 12, 12, 11, 33, 32,
- 32, 32, 31, 31, 30, 30, 29, 28, 28, 26, 26, 24, 23, 23, 20, 20, 19, 18,
- 17, 16, 15, 14, 14, 14, 13, 13, 12, 12, 12, 12, 32, 32, 32, 32, 31, 30,
- 29, 28, 28, 27, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 18, 16, 16, 15,
- 14, 14, 14, 13, 13, 12, 12, 12, 32, 31, 31, 31, 30, 30, 28, 28, 27, 26,
- 26, 24, 24, 23, 22, 22, 20, 19, 19, 17, 17, 16, 15, 14, 14, 14, 13, 13,
- 13, 12, 12, 12, 30, 30, 30, 31, 29, 29, 28, 27, 26, 24, 24, 23, 22, 22,
- 20, 20, 19, 18, 18, 17, 16, 15, 15, 14, 13, 13, 13, 12, 12, 12, 12, 12,
- 29, 29, 30, 30, 28, 28, 27, 26, 24, 22, 22, 21, 20, 20, 19, 19, 17, 17,
- 17, 16, 15, 14, 14, 13, 13, 13, 12, 12, 12, 12, 11, 11, 28, 29, 29, 30,
- 28, 28, 27, 26, 24, 22, 21, 20, 20, 19, 18, 18, 17, 17, 16, 15, 15, 14,
- 14, 13, 13, 13, 12, 12, 11, 11, 11, 11, 26, 27, 27, 28, 26, 26, 26, 24,
- 23, 21, 20, 19, 19, 18, 17, 17, 16, 16, 15, 14, 14, 13, 13, 12, 12, 12,
- 11, 11, 11, 11, 11, 11, 25, 26, 26, 27, 26, 26, 25, 24, 22, 20, 20, 19,
- 18, 17, 17, 16, 15, 15, 14, 14, 14, 13, 12, 12, 12, 12, 11, 11, 11, 11,
- 11, 10, 23, 24, 24, 25, 24, 24, 24, 23, 22, 20, 19, 18, 17, 16, 16, 15,
- 14, 14, 14, 13, 13, 12, 12, 11, 11, 11, 11, 10, 10, 10, 10, 10, 22, 23,
- 23, 23, 23, 23, 23, 22, 20, 19, 18, 17, 17, 16, 15, 15, 14, 13, 13, 12,
- 12, 12, 11, 11, 11, 11, 10, 10, 10, 10, 10, 10, 21, 22, 23, 23, 23, 23,
- 22, 22, 20, 19, 18, 17, 16, 15, 15, 14, 13, 13, 13, 12, 12, 11, 11, 11,
- 10, 10, 10, 10, 10, 10, 9, 9, 19, 20, 20, 21, 21, 20, 21, 20, 19, 17,
- 17, 16, 15, 14, 14, 13, 12, 12, 12, 11, 11, 11, 10, 10, 10, 10, 9, 9, 9,
- 9, 9, 9, 18, 19, 20, 20, 20, 20, 20, 19, 18, 17, 17, 16, 15, 14, 13, 13,
- 12, 12, 12, 11, 11, 10, 10, 10, 9, 9, 9, 9, 9, 9, 9, 9, 17, 18, 19, 19,
- 19, 19, 19, 19, 18, 17, 16, 15, 14, 14, 13, 13, 12, 12, 11, 11, 10, 10,
- 10, 9, 9, 9, 9, 9, 9, 8, 8, 9, 16, 17, 17, 18, 18, 18, 18, 17, 17, 16,
- 15, 14, 14, 13, 12, 12, 11, 11, 11, 10, 10, 9, 9, 9, 9, 9, 8, 8, 8, 8,
- 8, 8, 16, 17, 17, 17, 17, 17, 18, 17, 16, 15, 15, 14, 14, 13, 12, 12,
- 11, 11, 10, 10, 10, 9, 9, 9, 9, 8, 8, 8, 8, 8, 8, 8, 14, 15, 15, 16, 16,
- 16, 16, 16, 15, 14, 14, 13, 13, 12, 12, 11, 11, 10, 10, 9, 9, 9, 9, 8,
- 8, 8, 8, 8, 8, 8, 8, 7, 14, 14, 15, 15, 15, 15, 16, 15, 15, 14, 14, 13,
- 12, 12, 11, 11, 10, 10, 10, 9, 9, 9, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7, 13,
- 13, 14, 14, 14, 14, 15, 14, 14, 13, 13, 12, 12, 11, 11, 11, 10, 10, 9,
- 9, 9, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 12, 13, 13, 14, 14, 14, 14, 14,
- 13, 13, 13, 12, 12, 11, 11, 10, 10, 9, 9, 9, 9, 8, 8, 8, 7, 7, 7, 7, 7,
- 7, 7, 7, 12, 13, 13, 14, 14, 14, 14, 14, 13, 13, 13, 12, 12, 11, 11, 10,
- 10, 9, 9, 9, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 12, 12, 13, 13, 13, 13,
- 14, 13, 13, 12, 12, 11, 11, 11, 10, 10, 9, 9, 9, 8, 8, 8, 8, 7, 7, 7, 7,
- 7, 7, 6, 6, 6, 11, 12, 12, 13, 13, 13, 13, 13, 12, 12, 12, 11, 11, 10,
- 10, 10, 9, 9, 9, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 11, 12, 12, 12,
- 12, 12, 13, 13, 12, 12, 11, 11, 11, 10, 10, 10, 9, 9, 9, 8, 8, 8, 7, 7,
- 7, 7, 7, 7, 6, 6, 6, 6, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 11, 11,
- 11, 10, 10, 10, 9, 9, 8, 8, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 11, 11,
- 11, 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, 10, 10, 9, 9, 9, 8, 8, 8, 8,
- 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 10, 11, 11, 11, 11, 12, 12, 12, 12, 11,
- 11, 11, 10, 10, 10, 9, 9, 9, 9, 8, 8, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6,
- /* Size 4x8 */
- 32, 29, 17, 12, 32, 28, 18, 13, 30, 22, 16, 12, 25, 19, 13, 11, 20, 17,
- 11, 9, 16, 14, 9, 8, 14, 13, 9, 7, 12, 11, 9, 7,
- /* Size 8x4 */
- 32, 32, 30, 25, 20, 16, 14, 12, 29, 28, 22, 19, 17, 14, 13, 11, 17, 18,
- 16, 13, 11, 9, 9, 9, 12, 13, 12, 11, 9, 8, 7, 7,
- /* Size 8x16 */
- 32, 33, 29, 23, 19, 16, 12, 11, 33, 32, 30, 25, 20, 17, 13, 12, 33, 31,
- 29, 24, 21, 17, 14, 13, 32, 30, 28, 24, 21, 18, 14, 13, 30, 29, 25, 21,
- 19, 16, 13, 13, 28, 28, 22, 19, 17, 15, 13, 12, 25, 26, 21, 17, 15, 13,
- 12, 11, 22, 23, 19, 16, 14, 12, 11, 10, 19, 20, 18, 14, 12, 11, 10, 9,
- 18, 19, 17, 14, 12, 10, 9, 9, 16, 17, 16, 13, 11, 10, 9, 8, 14, 15, 14,
- 12, 10, 9, 8, 8, 12, 14, 13, 11, 10, 9, 7, 7, 12, 13, 12, 11, 9, 8, 7,
- 7, 11, 12, 12, 11, 9, 8, 7, 7, 11, 12, 12, 11, 9, 8, 7, 6,
- /* Size 16x8 */
- 32, 33, 33, 32, 30, 28, 25, 22, 19, 18, 16, 14, 12, 12, 11, 11, 33, 32,
- 31, 30, 29, 28, 26, 23, 20, 19, 17, 15, 14, 13, 12, 12, 29, 30, 29, 28,
- 25, 22, 21, 19, 18, 17, 16, 14, 13, 12, 12, 12, 23, 25, 24, 24, 21, 19,
- 17, 16, 14, 14, 13, 12, 11, 11, 11, 11, 19, 20, 21, 21, 19, 17, 15, 14,
- 12, 12, 11, 10, 10, 9, 9, 9, 16, 17, 17, 18, 16, 15, 13, 12, 11, 10, 10,
- 9, 9, 8, 8, 8, 12, 13, 14, 14, 13, 13, 12, 11, 10, 9, 9, 8, 7, 7, 7, 7,
- 11, 12, 13, 13, 13, 12, 11, 10, 9, 9, 8, 8, 7, 7, 7, 6,
- /* Size 16x32 */
- 32, 33, 33, 32, 29, 28, 23, 22, 19, 17, 16, 13, 12, 12, 11, 11, 33, 32,
- 32, 32, 29, 29, 24, 23, 20, 17, 17, 14, 13, 12, 12, 12, 33, 32, 32, 32,
- 30, 29, 25, 23, 20, 18, 17, 14, 13, 12, 12, 12, 33, 32, 32, 31, 30, 30,
- 25, 23, 21, 18, 17, 14, 14, 13, 12, 12, 33, 32, 31, 30, 29, 28, 24, 23,
- 21, 18, 17, 14, 14, 13, 13, 12, 32, 32, 31, 30, 28, 28, 24, 23, 20, 18,
- 17, 14, 14, 13, 13, 12, 32, 31, 30, 29, 28, 27, 24, 23, 21, 18, 18, 15,
- 14, 13, 13, 12, 32, 31, 30, 28, 26, 26, 23, 22, 20, 18, 17, 14, 14, 13,
- 13, 13, 30, 30, 29, 28, 25, 24, 21, 20, 19, 17, 16, 14, 13, 13, 13, 13,
- 29, 30, 28, 27, 23, 22, 20, 19, 17, 16, 15, 13, 13, 12, 12, 12, 28, 30,
- 28, 27, 22, 21, 19, 18, 17, 16, 15, 13, 13, 12, 12, 12, 26, 28, 26, 26,
- 21, 20, 18, 17, 16, 14, 14, 12, 12, 12, 12, 11, 25, 26, 26, 25, 21, 20,
- 17, 17, 15, 14, 13, 12, 12, 11, 11, 11, 23, 25, 24, 24, 20, 19, 16, 16,
- 14, 13, 13, 11, 11, 11, 11, 11, 22, 23, 23, 23, 19, 18, 16, 15, 14, 12,
- 12, 11, 11, 10, 10, 10, 21, 23, 23, 22, 19, 18, 15, 15, 13, 12, 12, 11,
- 10, 10, 10, 10, 19, 21, 20, 20, 18, 17, 14, 14, 12, 11, 11, 10, 10, 10,
- 9, 10, 19, 20, 20, 20, 17, 17, 14, 13, 12, 11, 11, 10, 9, 9, 9, 9, 18,
- 19, 19, 19, 17, 16, 14, 13, 12, 11, 10, 9, 9, 9, 9, 9, 16, 18, 18, 18,
- 16, 15, 13, 12, 11, 10, 10, 9, 9, 9, 9, 8, 16, 17, 17, 18, 16, 15, 13,
- 12, 11, 10, 10, 9, 9, 8, 8, 8, 14, 16, 16, 16, 14, 14, 12, 12, 11, 9, 9,
- 8, 8, 8, 8, 8, 14, 15, 15, 16, 14, 14, 12, 11, 10, 9, 9, 8, 8, 8, 8, 8,
- 13, 14, 14, 15, 13, 13, 11, 11, 10, 9, 9, 8, 8, 7, 7, 7, 12, 14, 14, 14,
- 13, 13, 11, 11, 10, 9, 9, 8, 7, 7, 7, 7, 12, 14, 14, 14, 13, 13, 11, 11,
- 10, 9, 8, 8, 7, 7, 7, 7, 12, 13, 13, 13, 12, 12, 11, 10, 9, 9, 8, 7, 7,
- 7, 7, 7, 12, 12, 13, 13, 12, 12, 11, 10, 9, 9, 8, 7, 7, 7, 7, 6, 11, 12,
- 12, 13, 12, 12, 11, 10, 9, 9, 8, 8, 7, 7, 7, 6, 11, 12, 12, 12, 12, 11,
- 11, 10, 9, 9, 8, 8, 7, 7, 6, 6, 11, 12, 12, 12, 12, 11, 11, 10, 9, 8, 8,
- 7, 7, 6, 6, 6, 10, 11, 11, 12, 12, 11, 11, 9, 9, 8, 8, 7, 7, 6, 6, 6,
- /* Size 32x16 */
- 32, 33, 33, 33, 33, 32, 32, 32, 30, 29, 28, 26, 25, 23, 22, 21, 19, 19,
- 18, 16, 16, 14, 14, 13, 12, 12, 12, 12, 11, 11, 11, 10, 33, 32, 32, 32,
- 32, 32, 31, 31, 30, 30, 30, 28, 26, 25, 23, 23, 21, 20, 19, 18, 17, 16,
- 15, 14, 14, 14, 13, 12, 12, 12, 12, 11, 33, 32, 32, 32, 31, 31, 30, 30,
- 29, 28, 28, 26, 26, 24, 23, 23, 20, 20, 19, 18, 17, 16, 15, 14, 14, 14,
- 13, 13, 12, 12, 12, 11, 32, 32, 32, 31, 30, 30, 29, 28, 28, 27, 27, 26,
- 25, 24, 23, 22, 20, 20, 19, 18, 18, 16, 16, 15, 14, 14, 13, 13, 13, 12,
- 12, 12, 29, 29, 30, 30, 29, 28, 28, 26, 25, 23, 22, 21, 21, 20, 19, 19,
- 18, 17, 17, 16, 16, 14, 14, 13, 13, 13, 12, 12, 12, 12, 12, 12, 28, 29,
- 29, 30, 28, 28, 27, 26, 24, 22, 21, 20, 20, 19, 18, 18, 17, 17, 16, 15,
- 15, 14, 14, 13, 13, 13, 12, 12, 12, 11, 11, 11, 23, 24, 25, 25, 24, 24,
- 24, 23, 21, 20, 19, 18, 17, 16, 16, 15, 14, 14, 14, 13, 13, 12, 12, 11,
- 11, 11, 11, 11, 11, 11, 11, 11, 22, 23, 23, 23, 23, 23, 23, 22, 20, 19,
- 18, 17, 17, 16, 15, 15, 14, 13, 13, 12, 12, 12, 11, 11, 11, 11, 10, 10,
- 10, 10, 10, 9, 19, 20, 20, 21, 21, 20, 21, 20, 19, 17, 17, 16, 15, 14,
- 14, 13, 12, 12, 12, 11, 11, 11, 10, 10, 10, 10, 9, 9, 9, 9, 9, 9, 17,
- 17, 18, 18, 18, 18, 18, 18, 17, 16, 16, 14, 14, 13, 12, 12, 11, 11, 11,
- 10, 10, 9, 9, 9, 9, 9, 9, 9, 9, 9, 8, 8, 16, 17, 17, 17, 17, 17, 18, 17,
- 16, 15, 15, 14, 13, 13, 12, 12, 11, 11, 10, 10, 10, 9, 9, 9, 9, 8, 8, 8,
- 8, 8, 8, 8, 13, 14, 14, 14, 14, 14, 15, 14, 14, 13, 13, 12, 12, 11, 11,
- 11, 10, 10, 9, 9, 9, 8, 8, 8, 8, 8, 7, 7, 8, 8, 7, 7, 12, 13, 13, 14,
- 14, 14, 14, 14, 13, 13, 13, 12, 12, 11, 11, 10, 10, 9, 9, 9, 9, 8, 8, 8,
- 7, 7, 7, 7, 7, 7, 7, 7, 12, 12, 12, 13, 13, 13, 13, 13, 13, 12, 12, 12,
- 11, 11, 10, 10, 10, 9, 9, 9, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 6, 6, 11, 12,
- 12, 12, 13, 13, 13, 13, 13, 12, 12, 12, 11, 11, 10, 10, 9, 9, 9, 9, 8,
- 8, 8, 7, 7, 7, 7, 7, 7, 6, 6, 6, 11, 12, 12, 12, 12, 12, 12, 13, 13, 12,
- 12, 11, 11, 11, 10, 10, 10, 9, 9, 8, 8, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, 6,
- /* Size 4x16 */
- 33, 28, 17, 12, 32, 29, 18, 12, 32, 28, 18, 13, 31, 27, 18, 13, 30, 24,
- 17, 13, 30, 21, 16, 12, 26, 20, 14, 11, 23, 18, 12, 10, 21, 17, 11, 10,
- 19, 16, 11, 9, 17, 15, 10, 8, 15, 14, 9, 8, 14, 13, 9, 7, 13, 12, 9, 7,
- 12, 12, 9, 7, 12, 11, 8, 6,
- /* Size 16x4 */
- 33, 32, 32, 31, 30, 30, 26, 23, 21, 19, 17, 15, 14, 13, 12, 12, 28, 29,
- 28, 27, 24, 21, 20, 18, 17, 16, 15, 14, 13, 12, 12, 11, 17, 18, 18, 18,
- 17, 16, 14, 12, 11, 11, 10, 9, 9, 9, 9, 8, 12, 12, 13, 13, 13, 12, 11,
- 10, 10, 9, 8, 8, 7, 7, 7, 6,
- /* Size 8x32 */
- 32, 33, 29, 23, 19, 16, 12, 11, 33, 32, 29, 24, 20, 17, 13, 12, 33, 32,
- 30, 25, 20, 17, 13, 12, 33, 32, 30, 25, 21, 17, 14, 12, 33, 31, 29, 24,
- 21, 17, 14, 13, 32, 31, 28, 24, 20, 17, 14, 13, 32, 30, 28, 24, 21, 18,
- 14, 13, 32, 30, 26, 23, 20, 17, 14, 13, 30, 29, 25, 21, 19, 16, 13, 13,
- 29, 28, 23, 20, 17, 15, 13, 12, 28, 28, 22, 19, 17, 15, 13, 12, 26, 26,
- 21, 18, 16, 14, 12, 12, 25, 26, 21, 17, 15, 13, 12, 11, 23, 24, 20, 16,
- 14, 13, 11, 11, 22, 23, 19, 16, 14, 12, 11, 10, 21, 23, 19, 15, 13, 12,
- 10, 10, 19, 20, 18, 14, 12, 11, 10, 9, 19, 20, 17, 14, 12, 11, 9, 9, 18,
- 19, 17, 14, 12, 10, 9, 9, 16, 18, 16, 13, 11, 10, 9, 9, 16, 17, 16, 13,
- 11, 10, 9, 8, 14, 16, 14, 12, 11, 9, 8, 8, 14, 15, 14, 12, 10, 9, 8, 8,
- 13, 14, 13, 11, 10, 9, 8, 7, 12, 14, 13, 11, 10, 9, 7, 7, 12, 14, 13,
- 11, 10, 8, 7, 7, 12, 13, 12, 11, 9, 8, 7, 7, 12, 13, 12, 11, 9, 8, 7, 7,
- 11, 12, 12, 11, 9, 8, 7, 7, 11, 12, 12, 11, 9, 8, 7, 6, 11, 12, 12, 11,
- 9, 8, 7, 6, 10, 11, 12, 11, 9, 8, 7, 6,
- /* Size 32x8 */
- 32, 33, 33, 33, 33, 32, 32, 32, 30, 29, 28, 26, 25, 23, 22, 21, 19, 19,
- 18, 16, 16, 14, 14, 13, 12, 12, 12, 12, 11, 11, 11, 10, 33, 32, 32, 32,
- 31, 31, 30, 30, 29, 28, 28, 26, 26, 24, 23, 23, 20, 20, 19, 18, 17, 16,
- 15, 14, 14, 14, 13, 13, 12, 12, 12, 11, 29, 29, 30, 30, 29, 28, 28, 26,
- 25, 23, 22, 21, 21, 20, 19, 19, 18, 17, 17, 16, 16, 14, 14, 13, 13, 13,
- 12, 12, 12, 12, 12, 12, 23, 24, 25, 25, 24, 24, 24, 23, 21, 20, 19, 18,
- 17, 16, 16, 15, 14, 14, 14, 13, 13, 12, 12, 11, 11, 11, 11, 11, 11, 11,
- 11, 11, 19, 20, 20, 21, 21, 20, 21, 20, 19, 17, 17, 16, 15, 14, 14, 13,
- 12, 12, 12, 11, 11, 11, 10, 10, 10, 10, 9, 9, 9, 9, 9, 9, 16, 17, 17,
- 17, 17, 17, 18, 17, 16, 15, 15, 14, 13, 13, 12, 12, 11, 11, 10, 10, 10,
- 9, 9, 9, 9, 8, 8, 8, 8, 8, 8, 8, 12, 13, 13, 14, 14, 14, 14, 14, 13, 13,
- 13, 12, 12, 11, 11, 10, 10, 9, 9, 9, 9, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7,
- 11, 12, 12, 12, 13, 13, 13, 13, 13, 12, 12, 12, 11, 11, 10, 10, 9, 9, 9,
- 9, 8, 8, 8, 7, 7, 7, 7, 7, 7, 6, 6, 6 },
- { /* Chroma */
- /* Size 4x4 */
- 32, 23, 20, 17, 23, 19, 17, 16, 20, 17, 14, 13, 17, 16, 13, 11,
- /* Size 8x8 */
- 33, 30, 22, 22, 20, 18, 17, 16, 30, 26, 22, 23, 21, 19, 18, 17, 22, 22,
- 20, 20, 19, 18, 17, 17, 22, 23, 20, 18, 17, 16, 15, 15, 20, 21, 19, 17,
- 15, 14, 13, 13, 18, 19, 18, 16, 14, 12, 12, 12, 17, 18, 17, 15, 13, 12,
- 11, 11, 16, 17, 17, 15, 13, 12, 11, 10,
- /* Size 16x16 */
- 32, 33, 31, 28, 25, 21, 21, 20, 20, 19, 18, 17, 16, 15, 15, 15, 33, 33,
- 30, 26, 24, 22, 22, 22, 21, 20, 19, 18, 17, 17, 16, 16, 31, 30, 28, 24,
- 23, 22, 22, 22, 22, 21, 20, 19, 18, 17, 17, 16, 28, 26, 24, 22, 22, 21,
- 22, 22, 22, 21, 20, 19, 19, 18, 17, 17, 25, 24, 23, 22, 21, 20, 21, 20,
- 20, 20, 19, 18, 18, 17, 17, 17, 21, 22, 22, 21, 20, 19, 19, 19, 19, 19,
- 18, 17, 17, 16, 16, 16, 21, 22, 22, 22, 21, 19, 19, 18, 17, 17, 17, 16,
- 16, 15, 15, 15, 20, 22, 22, 22, 20, 19, 18, 17, 16, 16, 16, 15, 15, 14,
- 14, 14, 20, 21, 22, 22, 20, 19, 17, 16, 16, 15, 15, 14, 14, 13, 14, 14,
- 19, 20, 21, 21, 20, 19, 17, 16, 15, 14, 14, 13, 13, 13, 13, 13, 18, 19,
- 20, 20, 19, 18, 17, 16, 15, 14, 13, 13, 12, 12, 12, 12, 17, 18, 19, 19,
- 18, 17, 16, 15, 14, 13, 13, 12, 12, 12, 12, 12, 16, 17, 18, 19, 18, 17,
- 16, 15, 14, 13, 12, 12, 11, 11, 11, 11, 15, 17, 17, 18, 17, 16, 15, 14,
- 13, 13, 12, 12, 11, 11, 11, 11, 15, 16, 17, 17, 17, 16, 15, 14, 14, 13,
- 12, 12, 11, 11, 10, 10, 15, 16, 16, 17, 17, 16, 15, 14, 14, 13, 12, 12,
- 11, 11, 10, 10,
- /* Size 32x32 */
- 32, 33, 33, 34, 31, 31, 28, 27, 25, 22, 21, 21, 21, 21, 20, 20, 20, 19,
- 19, 18, 18, 17, 17, 16, 16, 16, 15, 15, 15, 15, 15, 14, 33, 33, 33, 33,
- 30, 30, 27, 26, 24, 22, 22, 22, 22, 22, 21, 21, 20, 20, 20, 19, 19, 18,
- 18, 17, 17, 17, 16, 16, 16, 16, 16, 15, 33, 33, 33, 33, 30, 29, 26, 26,
- 24, 22, 22, 22, 22, 22, 22, 22, 21, 20, 20, 19, 19, 18, 18, 17, 17, 17,
- 17, 16, 16, 16, 16, 15, 34, 33, 33, 32, 30, 29, 26, 25, 24, 23, 22, 23,
- 23, 23, 22, 22, 22, 21, 21, 20, 20, 19, 19, 18, 18, 18, 17, 17, 17, 17,
- 16, 16, 31, 30, 30, 30, 28, 27, 24, 24, 23, 22, 22, 22, 22, 23, 22, 22,
- 22, 21, 21, 20, 20, 19, 19, 18, 18, 18, 17, 17, 17, 17, 16, 16, 31, 30,
- 29, 29, 27, 26, 24, 23, 23, 22, 22, 22, 22, 23, 22, 22, 22, 21, 21, 20,
- 20, 19, 19, 18, 18, 18, 17, 17, 17, 17, 17, 17, 28, 27, 26, 26, 24, 24,
- 22, 22, 22, 22, 21, 22, 22, 23, 22, 22, 22, 22, 21, 21, 20, 20, 19, 19,
- 19, 19, 18, 18, 17, 17, 17, 17, 27, 26, 26, 25, 24, 23, 22, 22, 21, 21,
- 21, 21, 22, 22, 22, 22, 21, 21, 21, 20, 20, 19, 19, 19, 18, 18, 18, 18,
- 18, 17, 17, 17, 25, 24, 24, 24, 23, 23, 22, 21, 21, 20, 20, 21, 21, 21,
- 20, 20, 20, 20, 20, 19, 19, 19, 18, 18, 18, 18, 17, 17, 17, 17, 17, 17,
- 22, 22, 22, 23, 22, 22, 22, 21, 20, 20, 20, 20, 20, 20, 19, 19, 19, 19,
- 19, 19, 18, 18, 18, 17, 17, 17, 17, 17, 17, 17, 16, 16, 21, 22, 22, 22,
- 22, 22, 21, 21, 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18, 18,
- 17, 17, 17, 17, 16, 16, 16, 16, 16, 16, 21, 22, 22, 23, 22, 22, 22, 21,
- 21, 20, 19, 19, 19, 19, 18, 18, 18, 18, 18, 17, 17, 17, 17, 16, 16, 16,
- 16, 16, 16, 16, 16, 15, 21, 22, 22, 23, 22, 22, 22, 22, 21, 20, 19, 19,
- 19, 18, 18, 18, 17, 17, 17, 17, 17, 16, 16, 16, 16, 16, 15, 15, 15, 15,
- 15, 15, 21, 22, 22, 23, 23, 23, 23, 22, 21, 20, 19, 19, 18, 18, 17, 17,
- 17, 17, 17, 16, 16, 16, 16, 15, 15, 15, 15, 15, 15, 15, 15, 15, 20, 21,
- 22, 22, 22, 22, 22, 22, 20, 19, 19, 18, 18, 17, 17, 17, 16, 16, 16, 16,
- 16, 15, 15, 15, 15, 14, 14, 15, 14, 14, 14, 15, 20, 21, 22, 22, 22, 22,
- 22, 22, 20, 19, 19, 18, 18, 17, 17, 17, 16, 16, 16, 16, 15, 15, 15, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 20, 20, 21, 22, 22, 22, 22, 21, 20, 19,
- 19, 18, 17, 17, 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 14, 14, 13, 14,
- 14, 13, 14, 14, 19, 20, 20, 21, 21, 21, 22, 21, 20, 19, 19, 18, 17, 17,
- 16, 16, 15, 15, 15, 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13,
- 19, 20, 20, 21, 21, 21, 21, 21, 20, 19, 19, 18, 17, 17, 16, 16, 15, 15,
- 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 18, 19, 19, 20,
- 20, 20, 21, 20, 19, 19, 18, 17, 17, 16, 16, 16, 15, 14, 14, 14, 13, 13,
- 13, 13, 12, 12, 12, 13, 12, 13, 13, 12, 18, 19, 19, 20, 20, 20, 20, 20,
- 19, 18, 18, 17, 17, 16, 16, 15, 15, 14, 14, 13, 13, 13, 13, 12, 12, 12,
- 12, 12, 12, 12, 12, 12, 17, 18, 18, 19, 19, 19, 20, 19, 19, 18, 18, 17,
- 16, 16, 15, 15, 14, 14, 14, 13, 13, 12, 12, 12, 12, 12, 12, 12, 12, 12,
- 12, 12, 17, 18, 18, 19, 19, 19, 19, 19, 18, 18, 17, 17, 16, 16, 15, 15,
- 14, 14, 13, 13, 13, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 17,
- 17, 18, 18, 18, 19, 19, 18, 17, 17, 16, 16, 15, 15, 14, 14, 13, 13, 13,
- 12, 12, 12, 12, 11, 11, 11, 11, 11, 11, 12, 11, 16, 17, 17, 18, 18, 18,
- 19, 18, 18, 17, 17, 16, 16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 12, 11,
- 11, 11, 11, 11, 11, 11, 11, 11, 16, 17, 17, 18, 18, 18, 19, 18, 18, 17,
- 17, 16, 16, 15, 14, 14, 14, 13, 13, 12, 12, 12, 12, 11, 11, 11, 11, 11,
- 11, 11, 11, 11, 15, 16, 17, 17, 17, 17, 18, 18, 17, 17, 16, 16, 15, 15,
- 14, 14, 13, 13, 13, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 11, 11,
- 15, 16, 16, 17, 17, 17, 18, 18, 17, 17, 16, 16, 15, 15, 15, 14, 14, 13,
- 13, 13, 12, 12, 12, 11, 11, 11, 11, 11, 11, 10, 10, 10, 15, 16, 16, 17,
- 17, 17, 17, 18, 17, 17, 16, 16, 15, 15, 14, 14, 14, 13, 13, 12, 12, 12,
- 12, 11, 11, 11, 11, 11, 10, 10, 10, 10, 15, 16, 16, 17, 17, 17, 17, 17,
- 17, 17, 16, 16, 15, 15, 14, 14, 13, 13, 13, 13, 12, 12, 12, 11, 11, 11,
- 11, 10, 10, 10, 10, 10, 15, 16, 16, 16, 16, 17, 17, 17, 17, 16, 16, 16,
- 15, 15, 14, 14, 14, 13, 13, 13, 12, 12, 12, 12, 11, 11, 11, 10, 10, 10,
- 10, 10, 14, 15, 15, 16, 16, 17, 17, 17, 17, 16, 16, 15, 15, 15, 15, 14,
- 14, 13, 13, 12, 12, 12, 12, 11, 11, 11, 11, 10, 10, 10, 10, 10,
- /* Size 4x8 */
- 33, 22, 19, 16, 28, 22, 20, 17, 22, 20, 19, 17, 23, 19, 16, 15, 21, 19,
- 14, 13, 19, 18, 13, 12, 17, 17, 13, 11, 16, 16, 13, 11,
- /* Size 8x4 */
- 33, 28, 22, 23, 21, 19, 17, 16, 22, 22, 20, 19, 19, 18, 17, 16, 19, 20,
- 19, 16, 14, 13, 13, 13, 16, 17, 17, 15, 13, 12, 11, 11,
- /* Size 8x16 */
- 32, 31, 23, 21, 20, 18, 16, 15, 33, 30, 23, 22, 21, 19, 17, 16, 31, 28,
- 22, 23, 22, 20, 18, 17, 28, 24, 22, 23, 22, 20, 19, 17, 24, 23, 21, 21,
- 20, 19, 18, 17, 21, 22, 20, 19, 19, 18, 17, 16, 21, 22, 20, 18, 17, 17,
- 16, 15, 20, 22, 20, 17, 16, 16, 14, 14, 20, 22, 19, 17, 16, 14, 14, 14,
- 19, 21, 19, 17, 15, 14, 13, 13, 18, 20, 19, 16, 15, 13, 12, 12, 17, 19,
- 18, 16, 14, 13, 12, 12, 16, 18, 17, 15, 14, 12, 11, 11, 16, 17, 17, 15,
- 13, 12, 11, 11, 15, 17, 17, 15, 13, 12, 11, 11, 15, 16, 17, 15, 14, 12,
- 11, 10,
- /* Size 16x8 */
- 32, 33, 31, 28, 24, 21, 21, 20, 20, 19, 18, 17, 16, 16, 15, 15, 31, 30,
- 28, 24, 23, 22, 22, 22, 22, 21, 20, 19, 18, 17, 17, 16, 23, 23, 22, 22,
- 21, 20, 20, 20, 19, 19, 19, 18, 17, 17, 17, 17, 21, 22, 23, 23, 21, 19,
- 18, 17, 17, 17, 16, 16, 15, 15, 15, 15, 20, 21, 22, 22, 20, 19, 17, 16,
- 16, 15, 15, 14, 14, 13, 13, 14, 18, 19, 20, 20, 19, 18, 17, 16, 14, 14,
- 13, 13, 12, 12, 12, 12, 16, 17, 18, 19, 18, 17, 16, 14, 14, 13, 12, 12,
- 11, 11, 11, 11, 15, 16, 17, 17, 17, 16, 15, 14, 14, 13, 12, 12, 11, 11,
- 11, 10,
- /* Size 16x32 */
- 32, 33, 31, 28, 23, 21, 21, 20, 20, 18, 18, 16, 16, 15, 15, 15, 33, 33,
- 30, 27, 23, 22, 22, 21, 20, 19, 19, 17, 17, 16, 16, 16, 33, 32, 30, 26,
- 23, 22, 22, 22, 21, 20, 19, 17, 17, 17, 16, 16, 34, 32, 29, 26, 23, 22,
- 23, 22, 21, 20, 20, 18, 18, 17, 17, 17, 31, 29, 28, 24, 22, 22, 23, 22,
- 22, 20, 20, 18, 18, 17, 17, 17, 31, 28, 27, 24, 22, 22, 22, 22, 22, 20,
- 20, 18, 18, 17, 17, 17, 28, 26, 24, 22, 22, 22, 23, 22, 22, 21, 20, 19,
- 19, 18, 17, 17, 26, 25, 24, 22, 21, 21, 22, 22, 21, 20, 20, 19, 18, 18,
- 18, 17, 24, 24, 23, 22, 21, 20, 21, 20, 20, 19, 19, 18, 18, 17, 17, 17,
- 22, 22, 22, 21, 20, 20, 19, 19, 19, 19, 18, 17, 17, 17, 17, 17, 21, 22,
- 22, 21, 20, 19, 19, 19, 19, 18, 18, 17, 17, 16, 16, 17, 21, 22, 22, 22,
- 20, 19, 18, 18, 18, 17, 17, 16, 16, 16, 16, 16, 21, 23, 22, 22, 20, 19,
- 18, 18, 17, 17, 17, 16, 16, 16, 15, 16, 21, 23, 23, 22, 20, 19, 18, 17,
- 17, 16, 16, 15, 15, 15, 15, 15, 20, 22, 22, 22, 20, 19, 17, 17, 16, 16,
- 16, 15, 14, 15, 14, 15, 20, 22, 22, 22, 20, 19, 17, 17, 16, 16, 15, 14,
- 14, 14, 14, 14, 20, 21, 22, 22, 19, 19, 17, 16, 16, 15, 14, 14, 14, 14,
- 14, 14, 19, 21, 21, 21, 19, 19, 17, 16, 15, 14, 14, 13, 13, 13, 14, 13,
- 19, 20, 21, 21, 19, 19, 17, 16, 15, 14, 14, 13, 13, 13, 13, 13, 18, 20,
- 20, 20, 19, 18, 16, 16, 15, 14, 13, 13, 12, 13, 13, 13, 18, 20, 20, 20,
- 19, 18, 16, 16, 15, 14, 13, 12, 12, 12, 12, 13, 17, 19, 19, 20, 18, 18,
- 16, 15, 14, 13, 13, 12, 12, 12, 12, 12, 17, 18, 19, 19, 18, 17, 16, 15,
- 14, 13, 13, 12, 12, 12, 12, 12, 16, 18, 18, 19, 17, 17, 15, 15, 14, 13,
- 12, 12, 11, 11, 12, 12, 16, 18, 18, 18, 17, 17, 15, 14, 14, 13, 12, 11,
- 11, 11, 11, 12, 16, 17, 18, 18, 17, 17, 15, 14, 14, 13, 12, 11, 11, 11,
- 11, 11, 16, 17, 17, 18, 17, 16, 15, 14, 13, 12, 12, 11, 11, 11, 11, 11,
- 15, 17, 17, 18, 17, 16, 15, 15, 13, 13, 12, 11, 11, 11, 11, 11, 15, 17,
- 17, 17, 17, 16, 15, 14, 13, 13, 12, 12, 11, 11, 11, 10, 15, 16, 17, 17,
- 17, 16, 15, 14, 13, 13, 12, 12, 11, 11, 10, 10, 15, 16, 16, 17, 17, 16,
- 15, 14, 14, 13, 12, 12, 11, 11, 10, 10, 15, 16, 16, 17, 17, 15, 15, 14,
- 14, 12, 12, 11, 11, 10, 10, 10,
- /* Size 32x16 */
- 32, 33, 33, 34, 31, 31, 28, 26, 24, 22, 21, 21, 21, 21, 20, 20, 20, 19,
- 19, 18, 18, 17, 17, 16, 16, 16, 16, 15, 15, 15, 15, 15, 33, 33, 32, 32,
- 29, 28, 26, 25, 24, 22, 22, 22, 23, 23, 22, 22, 21, 21, 20, 20, 20, 19,
- 18, 18, 18, 17, 17, 17, 17, 16, 16, 16, 31, 30, 30, 29, 28, 27, 24, 24,
- 23, 22, 22, 22, 22, 23, 22, 22, 22, 21, 21, 20, 20, 19, 19, 18, 18, 18,
- 17, 17, 17, 17, 16, 16, 28, 27, 26, 26, 24, 24, 22, 22, 22, 21, 21, 22,
- 22, 22, 22, 22, 22, 21, 21, 20, 20, 20, 19, 19, 18, 18, 18, 18, 17, 17,
- 17, 17, 23, 23, 23, 23, 22, 22, 22, 21, 21, 20, 20, 20, 20, 20, 20, 20,
- 19, 19, 19, 19, 19, 18, 18, 17, 17, 17, 17, 17, 17, 17, 17, 17, 21, 22,
- 22, 22, 22, 22, 22, 21, 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18,
- 18, 18, 17, 17, 17, 17, 16, 16, 16, 16, 16, 15, 21, 22, 22, 23, 23, 22,
- 23, 22, 21, 19, 19, 18, 18, 18, 17, 17, 17, 17, 17, 16, 16, 16, 16, 15,
- 15, 15, 15, 15, 15, 15, 15, 15, 20, 21, 22, 22, 22, 22, 22, 22, 20, 19,
- 19, 18, 18, 17, 17, 17, 16, 16, 16, 16, 16, 15, 15, 15, 14, 14, 14, 15,
- 14, 14, 14, 14, 20, 20, 21, 21, 22, 22, 22, 21, 20, 19, 19, 18, 17, 17,
- 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 14, 14, 13, 13, 13, 13, 14, 14,
- 18, 19, 20, 20, 20, 20, 21, 20, 19, 19, 18, 17, 17, 16, 16, 16, 15, 14,
- 14, 14, 14, 13, 13, 13, 13, 13, 12, 13, 13, 13, 13, 12, 18, 19, 19, 20,
- 20, 20, 20, 20, 19, 18, 18, 17, 17, 16, 16, 15, 14, 14, 14, 13, 13, 13,
- 13, 12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 17, 17, 18, 18, 18, 19, 19,
- 18, 17, 17, 16, 16, 15, 15, 14, 14, 13, 13, 13, 12, 12, 12, 12, 11, 11,
- 11, 11, 12, 12, 12, 11, 16, 17, 17, 18, 18, 18, 19, 18, 18, 17, 17, 16,
- 16, 15, 14, 14, 14, 13, 13, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11, 11,
- 11, 11, 15, 16, 17, 17, 17, 17, 18, 18, 17, 17, 16, 16, 16, 15, 15, 14,
- 14, 13, 13, 13, 12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 11, 10, 15, 16,
- 16, 17, 17, 17, 17, 18, 17, 17, 16, 16, 15, 15, 14, 14, 14, 14, 13, 13,
- 12, 12, 12, 12, 11, 11, 11, 11, 11, 10, 10, 10, 15, 16, 16, 17, 17, 17,
- 17, 17, 17, 17, 17, 16, 16, 15, 15, 14, 14, 13, 13, 13, 13, 12, 12, 12,
- 12, 11, 11, 11, 10, 10, 10, 10,
- /* Size 4x16 */
- 33, 21, 18, 15, 32, 22, 20, 17, 29, 22, 20, 17, 26, 22, 21, 18, 24, 20,
- 19, 17, 22, 19, 18, 16, 23, 19, 17, 16, 22, 19, 16, 15, 21, 19, 15, 14,
- 20, 19, 14, 13, 20, 18, 14, 12, 18, 17, 13, 12, 18, 17, 13, 11, 17, 16,
- 12, 11, 17, 16, 13, 11, 16, 16, 13, 11,
- /* Size 16x4 */
- 33, 32, 29, 26, 24, 22, 23, 22, 21, 20, 20, 18, 18, 17, 17, 16, 21, 22,
- 22, 22, 20, 19, 19, 19, 19, 19, 18, 17, 17, 16, 16, 16, 18, 20, 20, 21,
- 19, 18, 17, 16, 15, 14, 14, 13, 13, 12, 13, 13, 15, 17, 17, 18, 17, 16,
- 16, 15, 14, 13, 12, 12, 11, 11, 11, 11,
- /* Size 8x32 */
- 32, 31, 23, 21, 20, 18, 16, 15, 33, 30, 23, 22, 20, 19, 17, 16, 33, 30,
- 23, 22, 21, 19, 17, 16, 34, 29, 23, 23, 21, 20, 18, 17, 31, 28, 22, 23,
- 22, 20, 18, 17, 31, 27, 22, 22, 22, 20, 18, 17, 28, 24, 22, 23, 22, 20,
- 19, 17, 26, 24, 21, 22, 21, 20, 18, 18, 24, 23, 21, 21, 20, 19, 18, 17,
- 22, 22, 20, 19, 19, 18, 17, 17, 21, 22, 20, 19, 19, 18, 17, 16, 21, 22,
- 20, 18, 18, 17, 16, 16, 21, 22, 20, 18, 17, 17, 16, 15, 21, 23, 20, 18,
- 17, 16, 15, 15, 20, 22, 20, 17, 16, 16, 14, 14, 20, 22, 20, 17, 16, 15,
- 14, 14, 20, 22, 19, 17, 16, 14, 14, 14, 19, 21, 19, 17, 15, 14, 13, 14,
- 19, 21, 19, 17, 15, 14, 13, 13, 18, 20, 19, 16, 15, 13, 12, 13, 18, 20,
- 19, 16, 15, 13, 12, 12, 17, 19, 18, 16, 14, 13, 12, 12, 17, 19, 18, 16,
- 14, 13, 12, 12, 16, 18, 17, 15, 14, 12, 11, 12, 16, 18, 17, 15, 14, 12,
- 11, 11, 16, 18, 17, 15, 14, 12, 11, 11, 16, 17, 17, 15, 13, 12, 11, 11,
- 15, 17, 17, 15, 13, 12, 11, 11, 15, 17, 17, 15, 13, 12, 11, 11, 15, 17,
- 17, 15, 13, 12, 11, 10, 15, 16, 17, 15, 14, 12, 11, 10, 15, 16, 17, 15,
- 14, 12, 11, 10,
- /* Size 32x8 */
- 32, 33, 33, 34, 31, 31, 28, 26, 24, 22, 21, 21, 21, 21, 20, 20, 20, 19,
- 19, 18, 18, 17, 17, 16, 16, 16, 16, 15, 15, 15, 15, 15, 31, 30, 30, 29,
- 28, 27, 24, 24, 23, 22, 22, 22, 22, 23, 22, 22, 22, 21, 21, 20, 20, 19,
- 19, 18, 18, 18, 17, 17, 17, 17, 16, 16, 23, 23, 23, 23, 22, 22, 22, 21,
- 21, 20, 20, 20, 20, 20, 20, 20, 19, 19, 19, 19, 19, 18, 18, 17, 17, 17,
- 17, 17, 17, 17, 17, 17, 21, 22, 22, 23, 23, 22, 23, 22, 21, 19, 19, 18,
- 18, 18, 17, 17, 17, 17, 17, 16, 16, 16, 16, 15, 15, 15, 15, 15, 15, 15,
- 15, 15, 20, 20, 21, 21, 22, 22, 22, 21, 20, 19, 19, 18, 17, 17, 16, 16,
- 16, 15, 15, 15, 15, 14, 14, 14, 14, 14, 13, 13, 13, 13, 14, 14, 18, 19,
- 19, 20, 20, 20, 20, 20, 19, 18, 18, 17, 17, 16, 16, 15, 14, 14, 14, 13,
- 13, 13, 13, 12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 17, 17, 18, 18, 18,
- 19, 18, 18, 17, 17, 16, 16, 15, 14, 14, 14, 13, 13, 12, 12, 12, 12, 11,
- 11, 11, 11, 11, 11, 11, 11, 11, 15, 16, 16, 17, 17, 17, 17, 18, 17, 17,
- 16, 16, 15, 15, 14, 14, 14, 14, 13, 13, 12, 12, 12, 12, 11, 11, 11, 11,
- 11, 10, 10, 10 },
- },
- {
- { /* Luma */
- /* Size 4x4 */
- 32, 30, 19, 14, 30, 21, 16, 13, 19, 16, 11, 9, 14, 13, 9, 7,
- /* Size 8x8 */
- 32, 32, 30, 26, 20, 17, 13, 12, 32, 31, 29, 26, 21, 17, 14, 13, 30, 29,
- 26, 22, 19, 16, 14, 13, 26, 26, 22, 18, 16, 14, 12, 11, 20, 21, 19, 16,
- 13, 11, 10, 10, 17, 17, 16, 14, 11, 10, 9, 8, 13, 14, 14, 12, 10, 9, 8,
- 7, 12, 13, 13, 11, 10, 8, 7, 7,
- /* Size 16x16 */
- 32, 33, 33, 32, 31, 28, 26, 23, 21, 19, 17, 16, 14, 13, 12, 11, 33, 32,
- 32, 32, 31, 29, 27, 24, 22, 20, 18, 16, 15, 13, 13, 12, 33, 32, 32, 31,
- 30, 29, 27, 25, 23, 21, 19, 17, 15, 14, 13, 12, 32, 32, 31, 30, 28, 28,
- 26, 24, 23, 21, 19, 17, 16, 14, 14, 13, 31, 31, 30, 28, 27, 24, 23, 22,
- 20, 19, 18, 16, 15, 14, 13, 13, 28, 29, 29, 28, 24, 21, 20, 19, 18, 17,
- 16, 15, 14, 13, 12, 12, 26, 27, 27, 26, 23, 20, 19, 18, 17, 16, 15, 14,
- 13, 12, 12, 11, 23, 24, 25, 24, 22, 19, 18, 16, 15, 14, 14, 13, 12, 11,
- 11, 11, 21, 22, 23, 23, 20, 18, 17, 15, 14, 13, 13, 12, 11, 10, 10, 10,
- 19, 20, 21, 21, 19, 17, 16, 14, 13, 12, 12, 11, 10, 10, 9, 9, 17, 18,
- 19, 19, 18, 16, 15, 14, 13, 12, 11, 10, 10, 9, 9, 9, 16, 16, 17, 17, 16,
- 15, 14, 13, 12, 11, 10, 10, 9, 8, 8, 8, 14, 15, 15, 16, 15, 14, 13, 12,
- 11, 10, 10, 9, 8, 8, 8, 7, 13, 13, 14, 14, 14, 13, 12, 11, 10, 10, 9, 8,
- 8, 7, 7, 7, 12, 13, 13, 14, 13, 12, 12, 11, 10, 9, 9, 8, 8, 7, 7, 7, 11,
- 12, 12, 13, 13, 12, 11, 11, 10, 9, 9, 8, 7, 7, 7, 6,
- /* Size 32x32 */
- 32, 33, 33, 33, 33, 33, 32, 32, 31, 30, 28, 28, 26, 25, 23, 22, 21, 20,
- 19, 18, 17, 16, 16, 14, 14, 13, 13, 12, 12, 12, 11, 11, 33, 32, 32, 32,
- 32, 32, 32, 32, 31, 30, 29, 29, 27, 26, 24, 23, 22, 20, 20, 18, 18, 17,
- 16, 15, 14, 13, 13, 13, 12, 12, 12, 12, 33, 32, 32, 32, 32, 32, 32, 32,
- 31, 30, 29, 29, 27, 26, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 15, 14,
- 13, 13, 13, 12, 12, 12, 33, 32, 32, 32, 32, 32, 32, 32, 31, 31, 30, 30,
- 28, 27, 25, 24, 23, 21, 21, 19, 19, 17, 17, 16, 15, 14, 14, 14, 13, 13,
- 12, 12, 33, 32, 32, 32, 32, 31, 31, 31, 30, 30, 29, 29, 27, 26, 25, 24,
- 23, 21, 21, 19, 19, 17, 17, 16, 15, 14, 14, 14, 13, 13, 12, 12, 33, 32,
- 32, 32, 31, 31, 31, 30, 29, 29, 28, 28, 26, 26, 24, 23, 23, 21, 20, 19,
- 19, 17, 17, 16, 15, 14, 14, 14, 13, 13, 13, 12, 32, 32, 32, 32, 31, 31,
- 30, 29, 28, 28, 28, 27, 26, 26, 24, 23, 23, 21, 21, 19, 19, 18, 17, 16,
- 16, 15, 14, 14, 14, 13, 13, 12, 32, 32, 32, 32, 31, 30, 29, 29, 28, 28,
- 27, 27, 26, 25, 24, 23, 22, 21, 21, 19, 19, 18, 17, 16, 16, 15, 14, 14,
- 14, 13, 13, 13, 31, 31, 31, 31, 30, 29, 28, 28, 27, 26, 24, 24, 23, 23,
- 22, 21, 20, 20, 19, 18, 18, 17, 16, 15, 15, 14, 14, 14, 13, 13, 13, 13,
- 30, 30, 30, 31, 30, 29, 28, 28, 26, 26, 24, 24, 23, 22, 22, 21, 20, 19,
- 19, 18, 18, 16, 16, 15, 15, 14, 14, 13, 13, 13, 12, 12, 28, 29, 29, 30,
- 29, 28, 28, 27, 24, 24, 21, 21, 20, 20, 19, 19, 18, 17, 17, 16, 16, 15,
- 15, 14, 14, 13, 13, 13, 12, 12, 12, 12, 28, 29, 29, 30, 29, 28, 27, 27,
- 24, 24, 21, 21, 20, 20, 19, 19, 18, 17, 17, 16, 16, 15, 15, 14, 14, 13,
- 13, 13, 12, 12, 12, 11, 26, 27, 27, 28, 27, 26, 26, 26, 23, 23, 20, 20,
- 19, 19, 18, 17, 17, 16, 16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 12, 11,
- 11, 11, 25, 26, 26, 27, 26, 26, 26, 25, 23, 22, 20, 20, 19, 18, 17, 17,
- 16, 16, 15, 15, 15, 14, 13, 13, 12, 12, 12, 12, 11, 11, 11, 11, 23, 24,
- 24, 25, 25, 24, 24, 24, 22, 22, 19, 19, 18, 17, 16, 16, 15, 15, 14, 14,
- 14, 13, 13, 12, 12, 11, 11, 11, 11, 11, 11, 11, 22, 23, 23, 24, 24, 23,
- 23, 23, 21, 21, 19, 19, 17, 17, 16, 15, 15, 14, 14, 13, 13, 12, 12, 12,
- 12, 11, 11, 11, 10, 10, 10, 10, 21, 22, 22, 23, 23, 23, 23, 22, 20, 20,
- 18, 18, 17, 16, 15, 15, 14, 14, 13, 13, 13, 12, 12, 11, 11, 11, 10, 10,
- 10, 10, 10, 10, 20, 20, 21, 21, 21, 21, 21, 21, 20, 19, 17, 17, 16, 16,
- 15, 14, 14, 13, 13, 12, 12, 11, 11, 11, 11, 10, 10, 10, 10, 10, 10, 9,
- 19, 20, 20, 21, 21, 20, 21, 21, 19, 19, 17, 17, 16, 15, 14, 14, 13, 13,
- 12, 12, 12, 11, 11, 11, 10, 10, 10, 10, 9, 9, 9, 9, 18, 18, 19, 19, 19,
- 19, 19, 19, 18, 18, 16, 16, 15, 15, 14, 13, 13, 12, 12, 11, 11, 11, 10,
- 10, 10, 9, 9, 9, 9, 9, 9, 9, 17, 18, 18, 19, 19, 19, 19, 19, 18, 18, 16,
- 16, 15, 15, 14, 13, 13, 12, 12, 11, 11, 10, 10, 10, 10, 9, 9, 9, 9, 9,
- 9, 9, 16, 17, 17, 17, 17, 17, 18, 18, 17, 16, 15, 15, 14, 14, 13, 12,
- 12, 11, 11, 11, 10, 10, 10, 9, 9, 9, 9, 8, 8, 8, 8, 8, 16, 16, 16, 17,
- 17, 17, 17, 17, 16, 16, 15, 15, 14, 13, 13, 12, 12, 11, 11, 10, 10, 10,
- 10, 9, 9, 9, 8, 8, 8, 8, 8, 8, 14, 15, 15, 16, 16, 16, 16, 16, 15, 15,
- 14, 14, 13, 13, 12, 12, 11, 11, 11, 10, 10, 9, 9, 9, 9, 8, 8, 8, 8, 8,
- 8, 8, 14, 14, 15, 15, 15, 15, 16, 16, 15, 15, 14, 14, 13, 12, 12, 12,
- 11, 11, 10, 10, 10, 9, 9, 9, 8, 8, 8, 8, 8, 8, 7, 8, 13, 13, 14, 14, 14,
- 14, 15, 15, 14, 14, 13, 13, 12, 12, 11, 11, 11, 10, 10, 9, 9, 9, 9, 8,
- 8, 8, 8, 7, 7, 7, 7, 7, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 13, 13,
- 12, 12, 11, 11, 10, 10, 10, 9, 9, 9, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 12,
- 13, 13, 14, 14, 14, 14, 14, 14, 13, 13, 13, 12, 12, 11, 11, 10, 10, 10,
- 9, 9, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 12, 12, 13, 13, 13, 13, 14, 14,
- 13, 13, 12, 12, 12, 11, 11, 10, 10, 10, 9, 9, 9, 8, 8, 8, 8, 7, 7, 7, 7,
- 7, 7, 7, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 12, 12, 11, 11, 11, 10,
- 10, 10, 9, 9, 9, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 6, 11, 12, 12, 12, 12,
- 13, 13, 13, 13, 12, 12, 12, 11, 11, 11, 10, 10, 10, 9, 9, 9, 8, 8, 8, 7,
- 7, 7, 7, 7, 7, 6, 6, 11, 12, 12, 12, 12, 12, 12, 13, 13, 12, 12, 11, 11,
- 11, 11, 10, 10, 9, 9, 9, 9, 8, 8, 8, 8, 7, 7, 7, 7, 6, 6, 6,
- /* Size 4x8 */
- 32, 29, 20, 13, 32, 28, 20, 14, 30, 24, 19, 14, 27, 20, 15, 12, 21, 17,
- 13, 10, 17, 15, 11, 9, 14, 13, 10, 8, 13, 12, 9, 7,
- /* Size 8x4 */
- 32, 32, 30, 27, 21, 17, 14, 13, 29, 28, 24, 20, 17, 15, 13, 12, 20, 20,
- 19, 15, 13, 11, 10, 9, 13, 14, 14, 12, 10, 9, 8, 7,
- /* Size 8x16 */
- 32, 33, 31, 26, 20, 16, 13, 12, 33, 32, 31, 26, 21, 17, 14, 12, 33, 32,
- 30, 27, 22, 17, 14, 13, 32, 31, 28, 26, 21, 18, 15, 13, 31, 30, 27, 23,
- 20, 17, 14, 13, 28, 29, 24, 20, 18, 15, 13, 12, 26, 27, 23, 19, 16, 14,
- 12, 12, 23, 25, 22, 17, 15, 13, 11, 11, 21, 23, 20, 17, 14, 12, 11, 10,
- 19, 21, 19, 16, 13, 11, 10, 9, 18, 19, 18, 15, 12, 10, 9, 9, 16, 17, 16,
- 14, 11, 10, 9, 8, 14, 15, 15, 13, 11, 9, 8, 8, 13, 14, 14, 12, 10, 9, 8,
- 7, 12, 13, 13, 11, 10, 8, 7, 7, 11, 12, 13, 11, 10, 9, 7, 7,
- /* Size 16x8 */
- 32, 33, 33, 32, 31, 28, 26, 23, 21, 19, 18, 16, 14, 13, 12, 11, 33, 32,
- 32, 31, 30, 29, 27, 25, 23, 21, 19, 17, 15, 14, 13, 12, 31, 31, 30, 28,
- 27, 24, 23, 22, 20, 19, 18, 16, 15, 14, 13, 13, 26, 26, 27, 26, 23, 20,
- 19, 17, 17, 16, 15, 14, 13, 12, 11, 11, 20, 21, 22, 21, 20, 18, 16, 15,
- 14, 13, 12, 11, 11, 10, 10, 10, 16, 17, 17, 18, 17, 15, 14, 13, 12, 11,
- 10, 10, 9, 9, 8, 9, 13, 14, 14, 15, 14, 13, 12, 11, 11, 10, 9, 9, 8, 8,
- 7, 7, 12, 12, 13, 13, 13, 12, 12, 11, 10, 9, 9, 8, 8, 7, 7, 7,
- /* Size 16x32 */
- 32, 33, 33, 32, 31, 28, 26, 23, 20, 19, 16, 16, 13, 13, 12, 11, 33, 32,
- 32, 32, 31, 29, 26, 24, 21, 20, 17, 16, 14, 13, 12, 12, 33, 32, 32, 32,
- 31, 29, 26, 24, 21, 20, 17, 17, 14, 13, 12, 12, 33, 32, 32, 31, 31, 30,
- 27, 25, 22, 21, 17, 17, 14, 14, 13, 13, 33, 32, 32, 31, 30, 29, 27, 25,
- 22, 21, 17, 17, 14, 14, 13, 13, 32, 32, 31, 30, 29, 28, 26, 24, 21, 20,
- 17, 17, 14, 14, 13, 13, 32, 32, 31, 29, 28, 28, 26, 24, 21, 21, 18, 17,
- 15, 14, 13, 13, 32, 31, 31, 29, 28, 27, 25, 24, 21, 21, 18, 17, 15, 15,
- 14, 13, 31, 31, 30, 28, 27, 25, 23, 22, 20, 19, 17, 16, 14, 14, 13, 13,
- 30, 30, 30, 28, 26, 24, 23, 21, 19, 19, 16, 16, 14, 14, 13, 12, 28, 30,
- 29, 27, 24, 21, 20, 19, 18, 17, 15, 15, 13, 13, 12, 12, 28, 29, 29, 27,
- 24, 21, 20, 19, 17, 17, 15, 15, 13, 13, 12, 12, 26, 28, 27, 26, 23, 20,
- 19, 18, 16, 16, 14, 14, 12, 12, 12, 12, 26, 27, 26, 25, 23, 20, 18, 17,
- 16, 15, 14, 13, 12, 12, 11, 11, 23, 25, 25, 24, 22, 19, 17, 16, 15, 14,
- 13, 13, 11, 11, 11, 11, 22, 24, 24, 23, 21, 19, 17, 16, 14, 14, 12, 12,
- 11, 11, 11, 10, 21, 23, 23, 22, 20, 18, 17, 15, 14, 13, 12, 12, 11, 10,
- 10, 10, 20, 21, 21, 21, 20, 17, 16, 15, 13, 13, 11, 11, 10, 10, 10, 10,
- 19, 21, 21, 20, 19, 17, 16, 14, 13, 12, 11, 11, 10, 10, 9, 10, 18, 19,
- 19, 19, 18, 16, 15, 14, 12, 12, 11, 10, 9, 9, 9, 9, 18, 19, 19, 19, 18,
- 16, 15, 14, 12, 12, 10, 10, 9, 9, 9, 9, 16, 17, 17, 18, 17, 15, 14, 13,
- 12, 11, 10, 10, 9, 9, 8, 8, 16, 17, 17, 17, 16, 15, 14, 13, 11, 11, 10,
- 10, 9, 8, 8, 8, 14, 16, 16, 16, 15, 14, 13, 12, 11, 11, 9, 9, 8, 8, 8,
- 8, 14, 15, 15, 16, 15, 14, 13, 12, 11, 10, 9, 9, 8, 8, 8, 8, 13, 14, 14,
- 15, 14, 13, 12, 11, 10, 10, 9, 9, 8, 8, 7, 7, 13, 14, 14, 14, 14, 13,
- 12, 11, 10, 10, 9, 8, 8, 7, 7, 7, 12, 14, 14, 14, 14, 13, 12, 11, 10,
- 10, 8, 8, 8, 7, 7, 7, 12, 13, 13, 14, 13, 12, 11, 11, 10, 9, 8, 8, 7, 7,
- 7, 7, 12, 13, 13, 13, 13, 12, 11, 10, 10, 9, 8, 8, 7, 7, 7, 7, 11, 12,
- 12, 13, 13, 12, 11, 10, 10, 9, 9, 8, 7, 7, 7, 7, 11, 12, 12, 13, 13, 11,
- 11, 10, 10, 9, 9, 8, 8, 7, 7, 6,
- /* Size 32x16 */
- 32, 33, 33, 33, 33, 32, 32, 32, 31, 30, 28, 28, 26, 26, 23, 22, 21, 20,
- 19, 18, 18, 16, 16, 14, 14, 13, 13, 12, 12, 12, 11, 11, 33, 32, 32, 32,
- 32, 32, 32, 31, 31, 30, 30, 29, 28, 27, 25, 24, 23, 21, 21, 19, 19, 17,
- 17, 16, 15, 14, 14, 14, 13, 13, 12, 12, 33, 32, 32, 32, 32, 31, 31, 31,
- 30, 30, 29, 29, 27, 26, 25, 24, 23, 21, 21, 19, 19, 17, 17, 16, 15, 14,
- 14, 14, 13, 13, 12, 12, 32, 32, 32, 31, 31, 30, 29, 29, 28, 28, 27, 27,
- 26, 25, 24, 23, 22, 21, 20, 19, 19, 18, 17, 16, 16, 15, 14, 14, 14, 13,
- 13, 13, 31, 31, 31, 31, 30, 29, 28, 28, 27, 26, 24, 24, 23, 23, 22, 21,
- 20, 20, 19, 18, 18, 17, 16, 15, 15, 14, 14, 14, 13, 13, 13, 13, 28, 29,
- 29, 30, 29, 28, 28, 27, 25, 24, 21, 21, 20, 20, 19, 19, 18, 17, 17, 16,
- 16, 15, 15, 14, 14, 13, 13, 13, 12, 12, 12, 11, 26, 26, 26, 27, 27, 26,
- 26, 25, 23, 23, 20, 20, 19, 18, 17, 17, 17, 16, 16, 15, 15, 14, 14, 13,
- 13, 12, 12, 12, 11, 11, 11, 11, 23, 24, 24, 25, 25, 24, 24, 24, 22, 21,
- 19, 19, 18, 17, 16, 16, 15, 15, 14, 14, 14, 13, 13, 12, 12, 11, 11, 11,
- 11, 10, 10, 10, 20, 21, 21, 22, 22, 21, 21, 21, 20, 19, 18, 17, 16, 16,
- 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 11, 10, 10, 10, 10, 10, 10, 10,
- 19, 20, 20, 21, 21, 20, 21, 21, 19, 19, 17, 17, 16, 15, 14, 14, 13, 13,
- 12, 12, 12, 11, 11, 11, 10, 10, 10, 10, 9, 9, 9, 9, 16, 17, 17, 17, 17,
- 17, 18, 18, 17, 16, 15, 15, 14, 14, 13, 12, 12, 11, 11, 11, 10, 10, 10,
- 9, 9, 9, 9, 8, 8, 8, 9, 9, 16, 16, 17, 17, 17, 17, 17, 17, 16, 16, 15,
- 15, 14, 13, 13, 12, 12, 11, 11, 10, 10, 10, 10, 9, 9, 9, 8, 8, 8, 8, 8,
- 8, 13, 14, 14, 14, 14, 14, 15, 15, 14, 14, 13, 13, 12, 12, 11, 11, 11,
- 10, 10, 9, 9, 9, 9, 8, 8, 8, 8, 8, 7, 7, 7, 8, 13, 13, 13, 14, 14, 14,
- 14, 15, 14, 14, 13, 13, 12, 12, 11, 11, 10, 10, 10, 9, 9, 9, 8, 8, 8, 8,
- 7, 7, 7, 7, 7, 7, 12, 12, 12, 13, 13, 13, 13, 14, 13, 13, 12, 12, 12,
- 11, 11, 11, 10, 10, 9, 9, 9, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 11, 12,
- 12, 13, 13, 13, 13, 13, 13, 12, 12, 12, 12, 11, 11, 10, 10, 10, 10, 9,
- 9, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 6,
- /* Size 4x16 */
- 33, 28, 19, 13, 32, 29, 20, 13, 32, 29, 21, 14, 32, 28, 21, 14, 31, 25,
- 19, 14, 30, 21, 17, 13, 28, 20, 16, 12, 25, 19, 14, 11, 23, 18, 13, 10,
- 21, 17, 12, 10, 19, 16, 12, 9, 17, 15, 11, 8, 15, 14, 10, 8, 14, 13, 10,
- 7, 13, 12, 9, 7, 12, 12, 9, 7,
- /* Size 16x4 */
- 33, 32, 32, 32, 31, 30, 28, 25, 23, 21, 19, 17, 15, 14, 13, 12, 28, 29,
- 29, 28, 25, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 12, 19, 20, 21, 21,
- 19, 17, 16, 14, 13, 12, 12, 11, 10, 10, 9, 9, 13, 13, 14, 14, 14, 13,
- 12, 11, 10, 10, 9, 8, 8, 7, 7, 7,
- /* Size 8x32 */
- 32, 33, 31, 26, 20, 16, 13, 12, 33, 32, 31, 26, 21, 17, 14, 12, 33, 32,
- 31, 26, 21, 17, 14, 12, 33, 32, 31, 27, 22, 17, 14, 13, 33, 32, 30, 27,
- 22, 17, 14, 13, 32, 31, 29, 26, 21, 17, 14, 13, 32, 31, 28, 26, 21, 18,
- 15, 13, 32, 31, 28, 25, 21, 18, 15, 14, 31, 30, 27, 23, 20, 17, 14, 13,
- 30, 30, 26, 23, 19, 16, 14, 13, 28, 29, 24, 20, 18, 15, 13, 12, 28, 29,
- 24, 20, 17, 15, 13, 12, 26, 27, 23, 19, 16, 14, 12, 12, 26, 26, 23, 18,
- 16, 14, 12, 11, 23, 25, 22, 17, 15, 13, 11, 11, 22, 24, 21, 17, 14, 12,
- 11, 11, 21, 23, 20, 17, 14, 12, 11, 10, 20, 21, 20, 16, 13, 11, 10, 10,
- 19, 21, 19, 16, 13, 11, 10, 9, 18, 19, 18, 15, 12, 11, 9, 9, 18, 19, 18,
- 15, 12, 10, 9, 9, 16, 17, 17, 14, 12, 10, 9, 8, 16, 17, 16, 14, 11, 10,
- 9, 8, 14, 16, 15, 13, 11, 9, 8, 8, 14, 15, 15, 13, 11, 9, 8, 8, 13, 14,
- 14, 12, 10, 9, 8, 7, 13, 14, 14, 12, 10, 9, 8, 7, 12, 14, 14, 12, 10, 8,
- 8, 7, 12, 13, 13, 11, 10, 8, 7, 7, 12, 13, 13, 11, 10, 8, 7, 7, 11, 12,
- 13, 11, 10, 9, 7, 7, 11, 12, 13, 11, 10, 9, 8, 7,
- /* Size 32x8 */
- 32, 33, 33, 33, 33, 32, 32, 32, 31, 30, 28, 28, 26, 26, 23, 22, 21, 20,
- 19, 18, 18, 16, 16, 14, 14, 13, 13, 12, 12, 12, 11, 11, 33, 32, 32, 32,
- 32, 31, 31, 31, 30, 30, 29, 29, 27, 26, 25, 24, 23, 21, 21, 19, 19, 17,
- 17, 16, 15, 14, 14, 14, 13, 13, 12, 12, 31, 31, 31, 31, 30, 29, 28, 28,
- 27, 26, 24, 24, 23, 23, 22, 21, 20, 20, 19, 18, 18, 17, 16, 15, 15, 14,
- 14, 14, 13, 13, 13, 13, 26, 26, 26, 27, 27, 26, 26, 25, 23, 23, 20, 20,
- 19, 18, 17, 17, 17, 16, 16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11,
- 11, 11, 20, 21, 21, 22, 22, 21, 21, 21, 20, 19, 18, 17, 16, 16, 15, 14,
- 14, 13, 13, 12, 12, 12, 11, 11, 11, 10, 10, 10, 10, 10, 10, 10, 16, 17,
- 17, 17, 17, 17, 18, 18, 17, 16, 15, 15, 14, 14, 13, 12, 12, 11, 11, 11,
- 10, 10, 10, 9, 9, 9, 9, 8, 8, 8, 9, 9, 13, 14, 14, 14, 14, 14, 15, 15,
- 14, 14, 13, 13, 12, 12, 11, 11, 11, 10, 10, 9, 9, 9, 9, 8, 8, 8, 8, 8,
- 7, 7, 7, 8, 12, 12, 12, 13, 13, 13, 13, 14, 13, 13, 12, 12, 12, 11, 11,
- 11, 10, 10, 9, 9, 9, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7 },
- { /* Chroma */
- /* Size 4x4 */
- 32, 22, 21, 18, 22, 19, 19, 17, 21, 19, 15, 13, 18, 17, 13, 11,
- /* Size 8x8 */
- 33, 30, 24, 22, 21, 19, 17, 16, 30, 26, 23, 22, 22, 20, 18, 17, 24, 23,
- 21, 21, 20, 19, 18, 17, 22, 22, 21, 19, 18, 17, 16, 16, 21, 22, 20, 18,
- 16, 15, 14, 14, 19, 20, 19, 17, 15, 13, 12, 12, 17, 18, 18, 16, 14, 12,
- 12, 11, 16, 17, 17, 16, 14, 12, 11, 11,
- /* Size 16x16 */
- 32, 33, 33, 29, 26, 21, 21, 21, 20, 20, 19, 18, 17, 16, 16, 15, 33, 33,
- 32, 28, 25, 22, 22, 22, 21, 21, 20, 19, 18, 17, 17, 16, 33, 32, 30, 26,
- 24, 22, 22, 23, 22, 22, 21, 20, 19, 18, 17, 17, 29, 28, 26, 23, 22, 22,
- 22, 23, 22, 22, 21, 20, 19, 18, 18, 17, 26, 25, 24, 22, 21, 20, 21, 21,
- 21, 21, 20, 19, 19, 18, 17, 17, 21, 22, 22, 22, 20, 19, 19, 19, 19, 19,
- 19, 18, 17, 17, 17, 17, 21, 22, 22, 22, 21, 19, 19, 19, 18, 18, 18, 17,
- 17, 16, 16, 16, 21, 22, 23, 23, 21, 19, 19, 18, 17, 17, 17, 16, 16, 15,
- 15, 15, 20, 21, 22, 22, 21, 19, 18, 17, 17, 16, 16, 15, 15, 14, 14, 14,
- 20, 21, 22, 22, 21, 19, 18, 17, 16, 16, 15, 14, 14, 14, 13, 13, 19, 20,
- 21, 21, 20, 19, 18, 17, 16, 15, 14, 14, 13, 13, 13, 13, 18, 19, 20, 20,
- 19, 18, 17, 16, 15, 14, 14, 13, 13, 12, 12, 12, 17, 18, 19, 19, 19, 17,
- 17, 16, 15, 14, 13, 13, 12, 12, 12, 12, 16, 17, 18, 18, 18, 17, 16, 15,
- 14, 14, 13, 12, 12, 11, 11, 11, 16, 17, 17, 18, 17, 17, 16, 15, 14, 13,
- 13, 12, 12, 11, 11, 11, 15, 16, 17, 17, 17, 17, 16, 15, 14, 13, 13, 12,
- 12, 11, 11, 10,
- /* Size 32x32 */
- 32, 33, 33, 34, 33, 31, 29, 28, 26, 25, 21, 21, 21, 21, 21, 20, 20, 20,
- 20, 19, 19, 18, 18, 17, 17, 16, 16, 16, 16, 15, 15, 15, 33, 33, 33, 33,
- 32, 30, 28, 27, 25, 24, 22, 22, 22, 22, 22, 22, 21, 21, 20, 20, 20, 19,
- 19, 18, 18, 17, 17, 17, 16, 16, 16, 16, 33, 33, 33, 33, 32, 29, 28, 26,
- 25, 24, 22, 22, 22, 22, 22, 22, 21, 21, 21, 20, 20, 19, 19, 18, 18, 17,
- 17, 17, 17, 16, 16, 16, 34, 33, 33, 32, 31, 29, 27, 26, 24, 24, 22, 22,
- 23, 23, 23, 23, 22, 22, 22, 21, 21, 20, 20, 19, 19, 18, 18, 18, 17, 17,
- 17, 17, 33, 32, 32, 31, 30, 28, 26, 25, 24, 24, 22, 22, 22, 23, 23, 22,
- 22, 22, 22, 21, 21, 20, 20, 19, 19, 18, 18, 18, 17, 17, 17, 17, 31, 30,
- 29, 29, 28, 26, 25, 24, 23, 23, 22, 22, 22, 22, 23, 22, 22, 22, 22, 21,
- 21, 20, 20, 19, 19, 18, 18, 18, 18, 17, 17, 17, 29, 28, 28, 27, 26, 25,
- 23, 22, 22, 22, 22, 22, 22, 22, 23, 22, 22, 22, 22, 21, 21, 20, 20, 20,
- 19, 19, 18, 18, 18, 18, 17, 17, 28, 27, 26, 26, 25, 24, 22, 22, 22, 22,
- 21, 22, 22, 22, 23, 22, 22, 22, 22, 21, 21, 20, 20, 20, 19, 19, 19, 19,
- 18, 18, 18, 18, 26, 25, 25, 24, 24, 23, 22, 22, 21, 21, 20, 21, 21, 21,
- 21, 21, 21, 21, 21, 20, 20, 20, 19, 19, 19, 18, 18, 18, 17, 17, 17, 17,
- 25, 24, 24, 24, 24, 23, 22, 22, 21, 21, 20, 20, 21, 21, 21, 21, 20, 20,
- 20, 20, 20, 19, 19, 19, 18, 18, 18, 18, 17, 17, 17, 17, 21, 22, 22, 22,
- 22, 22, 22, 21, 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18,
- 18, 18, 17, 17, 17, 17, 17, 17, 17, 17, 21, 22, 22, 22, 22, 22, 22, 22,
- 21, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18, 18, 17, 17, 17,
- 17, 17, 16, 16, 16, 16, 21, 22, 22, 23, 22, 22, 22, 22, 21, 21, 19, 19,
- 19, 19, 19, 18, 18, 18, 18, 18, 18, 17, 17, 17, 17, 16, 16, 16, 16, 16,
- 16, 16, 21, 22, 22, 23, 23, 22, 22, 22, 21, 21, 19, 19, 19, 19, 18, 18,
- 18, 18, 18, 17, 17, 17, 17, 16, 16, 16, 16, 16, 16, 16, 15, 15, 21, 22,
- 22, 23, 23, 23, 23, 23, 21, 21, 19, 19, 19, 18, 18, 17, 17, 17, 17, 17,
- 17, 16, 16, 16, 16, 15, 15, 15, 15, 15, 15, 15, 20, 22, 22, 23, 22, 22,
- 22, 22, 21, 21, 19, 19, 18, 18, 17, 17, 17, 17, 17, 16, 16, 16, 16, 15,
- 15, 15, 15, 15, 15, 15, 14, 14, 20, 21, 21, 22, 22, 22, 22, 22, 21, 20,
- 19, 19, 18, 18, 17, 17, 17, 16, 16, 16, 16, 15, 15, 15, 15, 14, 14, 14,
- 14, 14, 14, 14, 20, 21, 21, 22, 22, 22, 22, 22, 21, 20, 19, 19, 18, 18,
- 17, 17, 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 20, 20, 21, 22, 22, 22, 22, 22, 21, 20, 19, 19, 18, 18, 17, 17, 16, 16,
- 16, 15, 15, 15, 14, 14, 14, 14, 14, 14, 13, 13, 13, 14, 19, 20, 20, 21,
- 21, 21, 21, 21, 20, 20, 19, 19, 18, 17, 17, 16, 16, 15, 15, 15, 15, 14,
- 14, 14, 13, 13, 13, 13, 13, 13, 13, 13, 19, 20, 20, 21, 21, 21, 21, 21,
- 20, 20, 19, 18, 18, 17, 17, 16, 16, 15, 15, 15, 14, 14, 14, 14, 13, 13,
- 13, 13, 13, 13, 13, 13, 18, 19, 19, 20, 20, 20, 20, 20, 20, 19, 18, 18,
- 17, 17, 16, 16, 15, 15, 15, 14, 14, 13, 13, 13, 13, 12, 12, 12, 12, 12,
- 12, 12, 18, 19, 19, 20, 20, 20, 20, 20, 19, 19, 18, 18, 17, 17, 16, 16,
- 15, 15, 14, 14, 14, 13, 13, 13, 13, 12, 12, 12, 12, 12, 12, 12, 17, 18,
- 18, 19, 19, 19, 20, 20, 19, 19, 18, 17, 17, 16, 16, 15, 15, 14, 14, 14,
- 14, 13, 13, 12, 12, 12, 12, 12, 12, 12, 12, 12, 17, 18, 18, 19, 19, 19,
- 19, 19, 19, 18, 17, 17, 17, 16, 16, 15, 15, 14, 14, 13, 13, 13, 13, 12,
- 12, 12, 12, 12, 12, 12, 12, 12, 16, 17, 17, 18, 18, 18, 19, 19, 18, 18,
- 17, 17, 16, 16, 15, 15, 14, 14, 14, 13, 13, 12, 12, 12, 12, 12, 12, 11,
- 11, 11, 11, 11, 16, 17, 17, 18, 18, 18, 18, 19, 18, 18, 17, 17, 16, 16,
- 15, 15, 14, 14, 14, 13, 13, 12, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11,
- 16, 17, 17, 18, 18, 18, 18, 19, 18, 18, 17, 17, 16, 16, 15, 15, 14, 14,
- 14, 13, 13, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 16, 16, 17, 17,
- 17, 18, 18, 18, 17, 17, 17, 16, 16, 16, 15, 15, 14, 14, 13, 13, 13, 12,
- 12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 15, 16, 16, 17, 17, 17, 18, 18,
- 17, 17, 17, 16, 16, 16, 15, 15, 14, 14, 13, 13, 13, 12, 12, 12, 12, 11,
- 11, 11, 11, 11, 11, 11, 15, 16, 16, 17, 17, 17, 17, 18, 17, 17, 17, 16,
- 16, 15, 15, 14, 14, 14, 13, 13, 13, 12, 12, 12, 12, 11, 11, 11, 11, 11,
- 10, 11, 15, 16, 16, 17, 17, 17, 17, 18, 17, 17, 17, 16, 16, 15, 15, 14,
- 14, 14, 14, 13, 13, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11, 10,
- /* Size 4x8 */
- 33, 22, 20, 17, 28, 22, 22, 18, 24, 20, 20, 18, 23, 19, 18, 16, 22, 19,
- 16, 14, 20, 18, 15, 12, 18, 17, 14, 11, 17, 16, 13, 11,
- /* Size 8x4 */
- 33, 28, 24, 23, 22, 20, 18, 17, 22, 22, 20, 19, 19, 18, 17, 16, 20, 22,
- 20, 18, 16, 15, 14, 13, 17, 18, 18, 16, 14, 12, 11, 11,
- /* Size 8x16 */
- 32, 32, 26, 21, 20, 18, 16, 15, 33, 31, 25, 22, 21, 19, 17, 16, 33, 29,
- 24, 22, 22, 20, 18, 17, 29, 26, 22, 22, 22, 20, 19, 18, 25, 24, 21, 21,
- 21, 20, 18, 17, 21, 22, 20, 19, 19, 18, 17, 17, 21, 22, 21, 19, 18, 17,
- 16, 16, 21, 23, 21, 18, 17, 16, 15, 15, 20, 22, 21, 18, 16, 15, 14, 14,
- 20, 21, 20, 18, 16, 14, 14, 13, 19, 20, 20, 17, 15, 14, 13, 13, 18, 20,
- 19, 17, 15, 13, 12, 12, 17, 19, 18, 16, 14, 13, 12, 12, 16, 18, 18, 16,
- 14, 12, 12, 11, 16, 17, 17, 16, 14, 12, 11, 11, 15, 17, 17, 16, 14, 13,
- 12, 11,
- /* Size 16x8 */
- 32, 33, 33, 29, 25, 21, 21, 21, 20, 20, 19, 18, 17, 16, 16, 15, 32, 31,
- 29, 26, 24, 22, 22, 23, 22, 21, 20, 20, 19, 18, 17, 17, 26, 25, 24, 22,
- 21, 20, 21, 21, 21, 20, 20, 19, 18, 18, 17, 17, 21, 22, 22, 22, 21, 19,
- 19, 18, 18, 18, 17, 17, 16, 16, 16, 16, 20, 21, 22, 22, 21, 19, 18, 17,
- 16, 16, 15, 15, 14, 14, 14, 14, 18, 19, 20, 20, 20, 18, 17, 16, 15, 14,
- 14, 13, 13, 12, 12, 13, 16, 17, 18, 19, 18, 17, 16, 15, 14, 14, 13, 12,
- 12, 12, 11, 12, 15, 16, 17, 18, 17, 17, 16, 15, 14, 13, 13, 12, 12, 11,
- 11, 11,
- /* Size 16x32 */
- 32, 33, 32, 28, 26, 21, 21, 21, 20, 20, 18, 18, 16, 16, 15, 15, 33, 33,
- 31, 27, 25, 22, 22, 22, 21, 20, 19, 19, 17, 17, 16, 16, 33, 33, 31, 27,
- 25, 22, 22, 22, 21, 21, 19, 19, 17, 17, 16, 16, 34, 32, 31, 26, 24, 22,
- 23, 23, 22, 21, 20, 20, 18, 18, 17, 17, 33, 31, 29, 25, 24, 22, 22, 23,
- 22, 21, 20, 20, 18, 18, 17, 17, 31, 28, 28, 24, 23, 22, 22, 22, 22, 22,
- 20, 20, 18, 18, 17, 17, 29, 27, 26, 23, 22, 22, 22, 23, 22, 22, 20, 20,
- 19, 18, 18, 17, 28, 26, 25, 22, 22, 22, 22, 23, 22, 22, 20, 20, 19, 19,
- 18, 18, 25, 24, 24, 22, 21, 21, 21, 21, 21, 20, 20, 19, 18, 18, 17, 18,
- 24, 24, 24, 22, 21, 20, 21, 21, 20, 20, 19, 19, 18, 18, 17, 17, 21, 22,
- 22, 21, 20, 19, 19, 19, 19, 19, 18, 18, 17, 17, 17, 17, 21, 22, 22, 21,
- 20, 19, 19, 19, 19, 19, 18, 18, 17, 17, 16, 16, 21, 22, 22, 22, 21, 19,
- 19, 18, 18, 18, 17, 17, 16, 16, 16, 16, 21, 23, 22, 22, 21, 19, 19, 18,
- 18, 18, 17, 17, 16, 16, 16, 15, 21, 23, 23, 22, 21, 19, 18, 18, 17, 17,
- 16, 16, 15, 15, 15, 15, 21, 22, 22, 22, 21, 19, 18, 17, 17, 17, 16, 16,
- 15, 15, 15, 15, 20, 22, 22, 22, 21, 19, 18, 17, 16, 16, 15, 15, 14, 14,
- 14, 14, 20, 22, 22, 22, 21, 19, 18, 17, 16, 16, 15, 15, 14, 14, 14, 14,
- 20, 21, 21, 22, 20, 19, 18, 17, 16, 16, 14, 14, 14, 14, 13, 14, 19, 20,
- 21, 21, 20, 19, 17, 17, 15, 15, 14, 14, 13, 13, 13, 13, 19, 20, 20, 21,
- 20, 19, 17, 17, 15, 15, 14, 14, 13, 13, 13, 13, 18, 20, 20, 20, 20, 18,
- 17, 16, 15, 15, 13, 13, 12, 12, 12, 12, 18, 20, 20, 20, 19, 18, 17, 16,
- 15, 14, 13, 13, 12, 12, 12, 12, 17, 19, 19, 20, 19, 18, 17, 16, 14, 14,
- 13, 13, 12, 12, 12, 12, 17, 18, 19, 19, 18, 17, 16, 16, 14, 14, 13, 13,
- 12, 12, 12, 12, 16, 18, 18, 19, 18, 17, 16, 15, 14, 14, 12, 12, 12, 11,
- 11, 11, 16, 18, 18, 19, 18, 17, 16, 15, 14, 14, 12, 12, 12, 11, 11, 11,
- 16, 17, 18, 18, 18, 17, 16, 15, 14, 14, 12, 12, 11, 11, 11, 11, 16, 17,
- 17, 18, 17, 17, 16, 15, 14, 13, 12, 12, 11, 11, 11, 11, 15, 17, 17, 18,
- 17, 16, 16, 15, 14, 13, 12, 12, 11, 11, 11, 11, 15, 17, 17, 18, 17, 16,
- 16, 14, 14, 13, 13, 12, 12, 11, 11, 11, 15, 17, 17, 17, 17, 16, 16, 14,
- 14, 13, 13, 12, 12, 11, 11, 10,
- /* Size 32x16 */
- 32, 33, 33, 34, 33, 31, 29, 28, 25, 24, 21, 21, 21, 21, 21, 21, 20, 20,
- 20, 19, 19, 18, 18, 17, 17, 16, 16, 16, 16, 15, 15, 15, 33, 33, 33, 32,
- 31, 28, 27, 26, 24, 24, 22, 22, 22, 23, 23, 22, 22, 22, 21, 20, 20, 20,
- 20, 19, 18, 18, 18, 17, 17, 17, 17, 17, 32, 31, 31, 31, 29, 28, 26, 25,
- 24, 24, 22, 22, 22, 22, 23, 22, 22, 22, 21, 21, 20, 20, 20, 19, 19, 18,
- 18, 18, 17, 17, 17, 17, 28, 27, 27, 26, 25, 24, 23, 22, 22, 22, 21, 21,
- 22, 22, 22, 22, 22, 22, 22, 21, 21, 20, 20, 20, 19, 19, 19, 18, 18, 18,
- 18, 17, 26, 25, 25, 24, 24, 23, 22, 22, 21, 21, 20, 20, 21, 21, 21, 21,
- 21, 21, 20, 20, 20, 20, 19, 19, 18, 18, 18, 18, 17, 17, 17, 17, 21, 22,
- 22, 22, 22, 22, 22, 22, 21, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 18, 18, 18, 17, 17, 17, 17, 17, 16, 16, 16, 21, 22, 22, 23, 22, 22,
- 22, 22, 21, 21, 19, 19, 19, 19, 18, 18, 18, 18, 18, 17, 17, 17, 17, 17,
- 16, 16, 16, 16, 16, 16, 16, 16, 21, 22, 22, 23, 23, 22, 23, 23, 21, 21,
- 19, 19, 18, 18, 18, 17, 17, 17, 17, 17, 17, 16, 16, 16, 16, 15, 15, 15,
- 15, 15, 14, 14, 20, 21, 21, 22, 22, 22, 22, 22, 21, 20, 19, 19, 18, 18,
- 17, 17, 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 20, 20, 21, 21, 21, 22, 22, 22, 20, 20, 19, 19, 18, 18, 17, 17, 16, 16,
- 16, 15, 15, 15, 14, 14, 14, 14, 14, 14, 13, 13, 13, 13, 18, 19, 19, 20,
- 20, 20, 20, 20, 20, 19, 18, 18, 17, 17, 16, 16, 15, 15, 14, 14, 14, 13,
- 13, 13, 13, 12, 12, 12, 12, 12, 13, 13, 18, 19, 19, 20, 20, 20, 20, 20,
- 19, 19, 18, 18, 17, 17, 16, 16, 15, 15, 14, 14, 14, 13, 13, 13, 13, 12,
- 12, 12, 12, 12, 12, 12, 16, 17, 17, 18, 18, 18, 19, 19, 18, 18, 17, 17,
- 16, 16, 15, 15, 14, 14, 14, 13, 13, 12, 12, 12, 12, 12, 12, 11, 11, 11,
- 12, 12, 16, 17, 17, 18, 18, 18, 18, 19, 18, 18, 17, 17, 16, 16, 15, 15,
- 14, 14, 14, 13, 13, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 15, 16,
- 16, 17, 17, 17, 18, 18, 17, 17, 17, 16, 16, 16, 15, 15, 14, 14, 13, 13,
- 13, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 15, 16, 16, 17, 17, 17,
- 17, 18, 18, 17, 17, 16, 16, 15, 15, 15, 14, 14, 14, 13, 13, 12, 12, 12,
- 12, 11, 11, 11, 11, 11, 11, 10,
- /* Size 4x16 */
- 33, 21, 20, 16, 33, 22, 21, 17, 31, 22, 21, 18, 27, 22, 22, 18, 24, 21,
- 20, 18, 22, 19, 19, 17, 22, 19, 18, 16, 23, 19, 17, 15, 22, 19, 16, 14,
- 21, 19, 16, 14, 20, 19, 15, 13, 20, 18, 14, 12, 18, 17, 14, 12, 18, 17,
- 14, 11, 17, 17, 13, 11, 17, 16, 13, 11,
- /* Size 16x4 */
- 33, 33, 31, 27, 24, 22, 22, 23, 22, 21, 20, 20, 18, 18, 17, 17, 21, 22,
- 22, 22, 21, 19, 19, 19, 19, 19, 19, 18, 17, 17, 17, 16, 20, 21, 21, 22,
- 20, 19, 18, 17, 16, 16, 15, 14, 14, 14, 13, 13, 16, 17, 18, 18, 18, 17,
- 16, 15, 14, 14, 13, 12, 12, 11, 11, 11,
- /* Size 8x32 */
- 32, 32, 26, 21, 20, 18, 16, 15, 33, 31, 25, 22, 21, 19, 17, 16, 33, 31,
- 25, 22, 21, 19, 17, 16, 34, 31, 24, 23, 22, 20, 18, 17, 33, 29, 24, 22,
- 22, 20, 18, 17, 31, 28, 23, 22, 22, 20, 18, 17, 29, 26, 22, 22, 22, 20,
- 19, 18, 28, 25, 22, 22, 22, 20, 19, 18, 25, 24, 21, 21, 21, 20, 18, 17,
- 24, 24, 21, 21, 20, 19, 18, 17, 21, 22, 20, 19, 19, 18, 17, 17, 21, 22,
- 20, 19, 19, 18, 17, 16, 21, 22, 21, 19, 18, 17, 16, 16, 21, 22, 21, 19,
- 18, 17, 16, 16, 21, 23, 21, 18, 17, 16, 15, 15, 21, 22, 21, 18, 17, 16,
- 15, 15, 20, 22, 21, 18, 16, 15, 14, 14, 20, 22, 21, 18, 16, 15, 14, 14,
- 20, 21, 20, 18, 16, 14, 14, 13, 19, 21, 20, 17, 15, 14, 13, 13, 19, 20,
- 20, 17, 15, 14, 13, 13, 18, 20, 20, 17, 15, 13, 12, 12, 18, 20, 19, 17,
- 15, 13, 12, 12, 17, 19, 19, 17, 14, 13, 12, 12, 17, 19, 18, 16, 14, 13,
- 12, 12, 16, 18, 18, 16, 14, 12, 12, 11, 16, 18, 18, 16, 14, 12, 12, 11,
- 16, 18, 18, 16, 14, 12, 11, 11, 16, 17, 17, 16, 14, 12, 11, 11, 15, 17,
- 17, 16, 14, 12, 11, 11, 15, 17, 17, 16, 14, 13, 12, 11, 15, 17, 17, 16,
- 14, 13, 12, 11,
- /* Size 32x8 */
- 32, 33, 33, 34, 33, 31, 29, 28, 25, 24, 21, 21, 21, 21, 21, 21, 20, 20,
- 20, 19, 19, 18, 18, 17, 17, 16, 16, 16, 16, 15, 15, 15, 32, 31, 31, 31,
- 29, 28, 26, 25, 24, 24, 22, 22, 22, 22, 23, 22, 22, 22, 21, 21, 20, 20,
- 20, 19, 19, 18, 18, 18, 17, 17, 17, 17, 26, 25, 25, 24, 24, 23, 22, 22,
- 21, 21, 20, 20, 21, 21, 21, 21, 21, 21, 20, 20, 20, 20, 19, 19, 18, 18,
- 18, 18, 17, 17, 17, 17, 21, 22, 22, 23, 22, 22, 22, 22, 21, 21, 19, 19,
- 19, 19, 18, 18, 18, 18, 18, 17, 17, 17, 17, 17, 16, 16, 16, 16, 16, 16,
- 16, 16, 20, 21, 21, 22, 22, 22, 22, 22, 21, 20, 19, 19, 18, 18, 17, 17,
- 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 14, 14, 18, 19,
- 19, 20, 20, 20, 20, 20, 20, 19, 18, 18, 17, 17, 16, 16, 15, 15, 14, 14,
- 14, 13, 13, 13, 13, 12, 12, 12, 12, 12, 13, 13, 16, 17, 17, 18, 18, 18,
- 19, 19, 18, 18, 17, 17, 16, 16, 15, 15, 14, 14, 14, 13, 13, 12, 12, 12,
- 12, 12, 12, 11, 11, 11, 12, 12, 15, 16, 16, 17, 17, 17, 18, 18, 17, 17,
- 17, 16, 16, 16, 15, 15, 14, 14, 13, 13, 13, 12, 12, 12, 12, 11, 11, 11,
- 11, 11, 11, 11 },
- },
- {
- { /* Luma */
- /* Size 4x4 */
- 32, 30, 21, 14, 30, 21, 17, 13, 21, 17, 12, 10, 14, 13, 10, 8,
- /* Size 8x8 */
- 32, 32, 30, 27, 22, 18, 15, 13, 32, 31, 29, 26, 23, 19, 16, 14, 30, 29,
- 26, 23, 20, 18, 15, 13, 27, 26, 23, 19, 17, 15, 13, 12, 22, 23, 20, 17,
- 14, 13, 11, 10, 18, 19, 18, 15, 13, 11, 10, 9, 15, 16, 15, 13, 11, 10,
- 9, 8, 13, 14, 13, 12, 10, 9, 8, 7,
- /* Size 16x16 */
- 32, 33, 33, 33, 32, 30, 28, 26, 23, 21, 19, 17, 16, 14, 13, 12, 33, 32,
- 32, 32, 32, 30, 29, 27, 24, 22, 20, 18, 17, 15, 13, 13, 33, 32, 32, 32,
- 32, 31, 30, 28, 25, 23, 21, 19, 17, 16, 14, 14, 33, 32, 32, 31, 30, 29,
- 28, 26, 24, 23, 20, 19, 17, 16, 14, 14, 32, 32, 32, 30, 29, 28, 27, 26,
- 24, 22, 21, 19, 18, 16, 15, 14, 30, 30, 31, 29, 28, 26, 24, 23, 22, 20,
- 19, 18, 16, 15, 14, 13, 28, 29, 30, 28, 27, 24, 21, 20, 19, 18, 17, 16,
- 15, 14, 13, 13, 26, 27, 28, 26, 26, 23, 20, 19, 18, 17, 16, 15, 14, 13,
- 12, 12, 23, 24, 25, 24, 24, 22, 19, 18, 16, 15, 14, 14, 13, 12, 11, 11,
- 21, 22, 23, 23, 22, 20, 18, 17, 15, 14, 13, 13, 12, 11, 11, 10, 19, 20,
- 21, 20, 21, 19, 17, 16, 14, 13, 12, 12, 11, 11, 10, 10, 17, 18, 19, 19,
- 19, 18, 16, 15, 14, 13, 12, 11, 10, 10, 9, 9, 16, 17, 17, 17, 18, 16,
- 15, 14, 13, 12, 11, 10, 10, 9, 9, 8, 14, 15, 16, 16, 16, 15, 14, 13, 12,
- 11, 11, 10, 9, 9, 8, 8, 13, 13, 14, 14, 15, 14, 13, 12, 11, 11, 10, 9,
- 9, 8, 8, 7, 12, 13, 14, 14, 14, 13, 13, 12, 11, 10, 10, 9, 8, 8, 7, 7,
- /* Size 32x32 */
- 32, 33, 33, 33, 33, 33, 33, 32, 32, 30, 30, 28, 28, 26, 26, 23, 23, 21,
- 21, 19, 19, 17, 17, 16, 16, 14, 14, 13, 13, 12, 12, 12, 33, 32, 32, 32,
- 32, 32, 32, 32, 32, 30, 30, 29, 29, 27, 27, 24, 24, 22, 22, 20, 20, 18,
- 18, 17, 17, 15, 15, 13, 13, 13, 13, 12, 33, 32, 32, 32, 32, 32, 32, 32,
- 32, 30, 30, 29, 29, 27, 27, 24, 24, 22, 22, 20, 20, 18, 18, 17, 17, 15,
- 15, 13, 13, 13, 13, 12, 33, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 30,
- 30, 28, 28, 25, 25, 23, 23, 21, 21, 19, 19, 17, 17, 16, 16, 14, 14, 14,
- 14, 13, 33, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 30, 30, 28, 28, 25,
- 25, 23, 23, 21, 21, 19, 19, 17, 17, 16, 16, 14, 14, 14, 14, 13, 33, 32,
- 32, 32, 32, 31, 31, 30, 30, 29, 29, 28, 28, 26, 26, 24, 24, 23, 23, 20,
- 20, 19, 19, 17, 17, 16, 16, 14, 14, 14, 14, 13, 33, 32, 32, 32, 32, 31,
- 31, 30, 30, 29, 29, 28, 28, 26, 26, 24, 24, 23, 23, 20, 20, 19, 19, 17,
- 17, 16, 16, 14, 14, 14, 14, 13, 32, 32, 32, 32, 32, 30, 30, 29, 29, 28,
- 28, 27, 27, 26, 26, 24, 24, 22, 22, 21, 21, 19, 19, 18, 18, 16, 16, 15,
- 15, 14, 14, 14, 32, 32, 32, 32, 32, 30, 30, 29, 29, 28, 28, 27, 27, 26,
- 26, 24, 24, 22, 22, 21, 21, 19, 19, 18, 18, 16, 16, 15, 15, 14, 14, 14,
- 30, 30, 30, 31, 31, 29, 29, 28, 28, 26, 26, 24, 24, 23, 23, 22, 22, 20,
- 20, 19, 19, 18, 18, 16, 16, 15, 15, 14, 14, 13, 13, 13, 30, 30, 30, 31,
- 31, 29, 29, 28, 28, 26, 26, 24, 24, 23, 23, 22, 22, 20, 20, 19, 19, 18,
- 18, 16, 16, 15, 15, 14, 14, 13, 13, 13, 28, 29, 29, 30, 30, 28, 28, 27,
- 27, 24, 24, 21, 21, 20, 20, 19, 19, 18, 18, 17, 17, 16, 16, 15, 15, 14,
- 14, 13, 13, 13, 13, 12, 28, 29, 29, 30, 30, 28, 28, 27, 27, 24, 24, 21,
- 21, 20, 20, 19, 19, 18, 18, 17, 17, 16, 16, 15, 15, 14, 14, 13, 13, 13,
- 13, 12, 26, 27, 27, 28, 28, 26, 26, 26, 26, 23, 23, 20, 20, 19, 19, 18,
- 18, 17, 17, 16, 16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 12, 11, 26, 27,
- 27, 28, 28, 26, 26, 26, 26, 23, 23, 20, 20, 19, 19, 18, 18, 17, 17, 16,
- 16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 12, 11, 23, 24, 24, 25, 25, 24,
- 24, 24, 24, 22, 22, 19, 19, 18, 18, 16, 16, 15, 15, 14, 14, 14, 14, 13,
- 13, 12, 12, 11, 11, 11, 11, 11, 23, 24, 24, 25, 25, 24, 24, 24, 24, 22,
- 22, 19, 19, 18, 18, 16, 16, 15, 15, 14, 14, 14, 14, 13, 13, 12, 12, 11,
- 11, 11, 11, 11, 21, 22, 22, 23, 23, 23, 23, 22, 22, 20, 20, 18, 18, 17,
- 17, 15, 15, 14, 14, 13, 13, 13, 13, 12, 12, 11, 11, 11, 11, 10, 10, 10,
- 21, 22, 22, 23, 23, 23, 23, 22, 22, 20, 20, 18, 18, 17, 17, 15, 15, 14,
- 14, 13, 13, 13, 13, 12, 12, 11, 11, 11, 11, 10, 10, 10, 19, 20, 20, 21,
- 21, 20, 20, 21, 21, 19, 19, 17, 17, 16, 16, 14, 14, 13, 13, 12, 12, 12,
- 12, 11, 11, 11, 11, 10, 10, 10, 10, 9, 19, 20, 20, 21, 21, 20, 20, 21,
- 21, 19, 19, 17, 17, 16, 16, 14, 14, 13, 13, 12, 12, 12, 12, 11, 11, 11,
- 11, 10, 10, 10, 10, 9, 17, 18, 18, 19, 19, 19, 19, 19, 19, 18, 18, 16,
- 16, 15, 15, 14, 14, 13, 13, 12, 12, 11, 11, 10, 10, 10, 10, 9, 9, 9, 9,
- 9, 17, 18, 18, 19, 19, 19, 19, 19, 19, 18, 18, 16, 16, 15, 15, 14, 14,
- 13, 13, 12, 12, 11, 11, 10, 10, 10, 10, 9, 9, 9, 9, 9, 16, 17, 17, 17,
- 17, 17, 17, 18, 18, 16, 16, 15, 15, 14, 14, 13, 13, 12, 12, 11, 11, 10,
- 10, 10, 10, 9, 9, 9, 9, 8, 8, 8, 16, 17, 17, 17, 17, 17, 17, 18, 18, 16,
- 16, 15, 15, 14, 14, 13, 13, 12, 12, 11, 11, 10, 10, 10, 10, 9, 9, 9, 9,
- 8, 8, 8, 14, 15, 15, 16, 16, 16, 16, 16, 16, 15, 15, 14, 14, 13, 13, 12,
- 12, 11, 11, 11, 11, 10, 10, 9, 9, 9, 9, 8, 8, 8, 8, 8, 14, 15, 15, 16,
- 16, 16, 16, 16, 16, 15, 15, 14, 14, 13, 13, 12, 12, 11, 11, 11, 11, 10,
- 10, 9, 9, 9, 9, 8, 8, 8, 8, 8, 13, 13, 13, 14, 14, 14, 14, 15, 15, 14,
- 14, 13, 13, 12, 12, 11, 11, 11, 11, 10, 10, 9, 9, 9, 9, 8, 8, 8, 8, 7,
- 7, 7, 13, 13, 13, 14, 14, 14, 14, 15, 15, 14, 14, 13, 13, 12, 12, 11,
- 11, 11, 11, 10, 10, 9, 9, 9, 9, 8, 8, 8, 8, 7, 7, 7, 12, 13, 13, 14, 14,
- 14, 14, 14, 14, 13, 13, 13, 13, 12, 12, 11, 11, 10, 10, 10, 10, 9, 9, 8,
- 8, 8, 8, 7, 7, 7, 7, 7, 12, 13, 13, 14, 14, 14, 14, 14, 14, 13, 13, 13,
- 13, 12, 12, 11, 11, 10, 10, 10, 10, 9, 9, 8, 8, 8, 8, 7, 7, 7, 7, 7, 12,
- 12, 12, 13, 13, 13, 13, 14, 14, 13, 13, 12, 12, 11, 11, 11, 11, 10, 10,
- 9, 9, 9, 9, 8, 8, 8, 8, 7, 7, 7, 7, 7,
- /* Size 4x8 */
- 32, 29, 20, 14, 32, 28, 20, 14, 30, 24, 19, 14, 28, 20, 16, 12, 23, 18,
- 13, 11, 19, 16, 12, 9, 16, 14, 11, 8, 14, 13, 10, 8,
- /* Size 8x4 */
- 32, 32, 30, 28, 23, 19, 16, 14, 29, 28, 24, 20, 18, 16, 14, 13, 20, 20,
- 19, 16, 13, 12, 11, 10, 14, 14, 14, 12, 11, 9, 8, 8,
- /* Size 8x16 */
- 32, 33, 32, 28, 23, 19, 16, 13, 33, 32, 32, 29, 24, 20, 17, 14, 33, 32,
- 31, 30, 25, 21, 17, 14, 32, 32, 30, 28, 24, 20, 17, 14, 32, 31, 29, 27,
- 24, 21, 18, 15, 30, 30, 28, 24, 21, 19, 16, 14, 28, 30, 27, 21, 19, 17,
- 15, 13, 26, 28, 26, 20, 18, 16, 14, 12, 23, 25, 24, 19, 16, 14, 13, 11,
- 21, 23, 22, 18, 15, 13, 12, 11, 19, 21, 20, 17, 14, 12, 11, 10, 18, 19,
- 19, 16, 14, 12, 10, 9, 16, 17, 18, 15, 13, 11, 10, 9, 14, 16, 16, 14,
- 12, 11, 9, 8, 13, 14, 15, 13, 11, 10, 9, 8, 12, 14, 14, 13, 11, 10, 8,
- 8,
- /* Size 16x8 */
- 32, 33, 33, 32, 32, 30, 28, 26, 23, 21, 19, 18, 16, 14, 13, 12, 33, 32,
- 32, 32, 31, 30, 30, 28, 25, 23, 21, 19, 17, 16, 14, 14, 32, 32, 31, 30,
- 29, 28, 27, 26, 24, 22, 20, 19, 18, 16, 15, 14, 28, 29, 30, 28, 27, 24,
- 21, 20, 19, 18, 17, 16, 15, 14, 13, 13, 23, 24, 25, 24, 24, 21, 19, 18,
- 16, 15, 14, 14, 13, 12, 11, 11, 19, 20, 21, 20, 21, 19, 17, 16, 14, 13,
- 12, 12, 11, 11, 10, 10, 16, 17, 17, 17, 18, 16, 15, 14, 13, 12, 11, 10,
- 10, 9, 9, 8, 13, 14, 14, 14, 15, 14, 13, 12, 11, 11, 10, 9, 9, 8, 8, 8,
- /* Size 16x32 */
- 32, 33, 33, 32, 32, 28, 28, 23, 23, 19, 19, 16, 16, 13, 13, 12, 33, 32,
- 32, 32, 32, 29, 29, 24, 24, 20, 20, 17, 17, 14, 14, 12, 33, 32, 32, 32,
- 32, 29, 29, 24, 24, 20, 20, 17, 17, 14, 14, 12, 33, 32, 32, 31, 31, 30,
- 30, 25, 25, 21, 21, 17, 17, 14, 14, 13, 33, 32, 32, 31, 31, 30, 30, 25,
- 25, 21, 21, 17, 17, 14, 14, 13, 32, 32, 32, 30, 30, 28, 28, 24, 24, 20,
- 20, 17, 17, 14, 14, 13, 32, 32, 32, 30, 30, 28, 28, 24, 24, 20, 20, 17,
- 17, 14, 14, 13, 32, 31, 31, 29, 29, 27, 27, 24, 24, 21, 21, 18, 18, 15,
- 15, 14, 32, 31, 31, 29, 29, 27, 27, 24, 24, 21, 21, 18, 18, 15, 15, 14,
- 30, 30, 30, 28, 28, 24, 24, 21, 21, 19, 19, 16, 16, 14, 14, 13, 30, 30,
- 30, 28, 28, 24, 24, 21, 21, 19, 19, 16, 16, 14, 14, 13, 28, 30, 30, 27,
- 27, 21, 21, 19, 19, 17, 17, 15, 15, 13, 13, 12, 28, 30, 30, 27, 27, 21,
- 21, 19, 19, 17, 17, 15, 15, 13, 13, 12, 26, 28, 28, 26, 26, 20, 20, 18,
- 18, 16, 16, 14, 14, 12, 12, 12, 26, 28, 28, 26, 26, 20, 20, 18, 18, 16,
- 16, 14, 14, 12, 12, 12, 23, 25, 25, 24, 24, 19, 19, 16, 16, 14, 14, 13,
- 13, 11, 11, 11, 23, 25, 25, 24, 24, 19, 19, 16, 16, 14, 14, 13, 13, 11,
- 11, 11, 21, 23, 23, 22, 22, 18, 18, 15, 15, 13, 13, 12, 12, 11, 11, 10,
- 21, 23, 23, 22, 22, 18, 18, 15, 15, 13, 13, 12, 12, 11, 11, 10, 19, 21,
- 21, 20, 20, 17, 17, 14, 14, 12, 12, 11, 11, 10, 10, 9, 19, 21, 21, 20,
- 20, 17, 17, 14, 14, 12, 12, 11, 11, 10, 10, 9, 18, 19, 19, 19, 19, 16,
- 16, 14, 14, 12, 12, 10, 10, 9, 9, 9, 18, 19, 19, 19, 19, 16, 16, 14, 14,
- 12, 12, 10, 10, 9, 9, 9, 16, 17, 17, 18, 18, 15, 15, 13, 13, 11, 11, 10,
- 10, 9, 9, 8, 16, 17, 17, 18, 18, 15, 15, 13, 13, 11, 11, 10, 10, 9, 9,
- 8, 14, 16, 16, 16, 16, 14, 14, 12, 12, 11, 11, 9, 9, 8, 8, 8, 14, 16,
- 16, 16, 16, 14, 14, 12, 12, 11, 11, 9, 9, 8, 8, 8, 13, 14, 14, 15, 15,
- 13, 13, 11, 11, 10, 10, 9, 9, 8, 8, 7, 13, 14, 14, 15, 15, 13, 13, 11,
- 11, 10, 10, 9, 9, 8, 8, 7, 12, 14, 14, 14, 14, 13, 13, 11, 11, 10, 10,
- 8, 8, 8, 8, 7, 12, 14, 14, 14, 14, 13, 13, 11, 11, 10, 10, 8, 8, 8, 8,
- 7, 12, 13, 13, 13, 13, 12, 12, 11, 11, 9, 9, 8, 8, 7, 7, 7,
- /* Size 32x16 */
- 32, 33, 33, 33, 33, 32, 32, 32, 32, 30, 30, 28, 28, 26, 26, 23, 23, 21,
- 21, 19, 19, 18, 18, 16, 16, 14, 14, 13, 13, 12, 12, 12, 33, 32, 32, 32,
- 32, 32, 32, 31, 31, 30, 30, 30, 30, 28, 28, 25, 25, 23, 23, 21, 21, 19,
- 19, 17, 17, 16, 16, 14, 14, 14, 14, 13, 33, 32, 32, 32, 32, 32, 32, 31,
- 31, 30, 30, 30, 30, 28, 28, 25, 25, 23, 23, 21, 21, 19, 19, 17, 17, 16,
- 16, 14, 14, 14, 14, 13, 32, 32, 32, 31, 31, 30, 30, 29, 29, 28, 28, 27,
- 27, 26, 26, 24, 24, 22, 22, 20, 20, 19, 19, 18, 18, 16, 16, 15, 15, 14,
- 14, 13, 32, 32, 32, 31, 31, 30, 30, 29, 29, 28, 28, 27, 27, 26, 26, 24,
- 24, 22, 22, 20, 20, 19, 19, 18, 18, 16, 16, 15, 15, 14, 14, 13, 28, 29,
- 29, 30, 30, 28, 28, 27, 27, 24, 24, 21, 21, 20, 20, 19, 19, 18, 18, 17,
- 17, 16, 16, 15, 15, 14, 14, 13, 13, 13, 13, 12, 28, 29, 29, 30, 30, 28,
- 28, 27, 27, 24, 24, 21, 21, 20, 20, 19, 19, 18, 18, 17, 17, 16, 16, 15,
- 15, 14, 14, 13, 13, 13, 13, 12, 23, 24, 24, 25, 25, 24, 24, 24, 24, 21,
- 21, 19, 19, 18, 18, 16, 16, 15, 15, 14, 14, 14, 14, 13, 13, 12, 12, 11,
- 11, 11, 11, 11, 23, 24, 24, 25, 25, 24, 24, 24, 24, 21, 21, 19, 19, 18,
- 18, 16, 16, 15, 15, 14, 14, 14, 14, 13, 13, 12, 12, 11, 11, 11, 11, 11,
- 19, 20, 20, 21, 21, 20, 20, 21, 21, 19, 19, 17, 17, 16, 16, 14, 14, 13,
- 13, 12, 12, 12, 12, 11, 11, 11, 11, 10, 10, 10, 10, 9, 19, 20, 20, 21,
- 21, 20, 20, 21, 21, 19, 19, 17, 17, 16, 16, 14, 14, 13, 13, 12, 12, 12,
- 12, 11, 11, 11, 11, 10, 10, 10, 10, 9, 16, 17, 17, 17, 17, 17, 17, 18,
- 18, 16, 16, 15, 15, 14, 14, 13, 13, 12, 12, 11, 11, 10, 10, 10, 10, 9,
- 9, 9, 9, 8, 8, 8, 16, 17, 17, 17, 17, 17, 17, 18, 18, 16, 16, 15, 15,
- 14, 14, 13, 13, 12, 12, 11, 11, 10, 10, 10, 10, 9, 9, 9, 9, 8, 8, 8, 13,
- 14, 14, 14, 14, 14, 14, 15, 15, 14, 14, 13, 13, 12, 12, 11, 11, 11, 11,
- 10, 10, 9, 9, 9, 9, 8, 8, 8, 8, 8, 8, 7, 13, 14, 14, 14, 14, 14, 14, 15,
- 15, 14, 14, 13, 13, 12, 12, 11, 11, 11, 11, 10, 10, 9, 9, 9, 9, 8, 8, 8,
- 8, 8, 8, 7, 12, 12, 12, 13, 13, 13, 13, 14, 14, 13, 13, 12, 12, 12, 12,
- 11, 11, 10, 10, 9, 9, 9, 9, 8, 8, 8, 8, 7, 7, 7, 7, 7,
- /* Size 4x16 */
- 33, 28, 19, 13, 32, 29, 20, 14, 32, 30, 21, 14, 32, 28, 20, 14, 31, 27,
- 21, 15, 30, 24, 19, 14, 30, 21, 17, 13, 28, 20, 16, 12, 25, 19, 14, 11,
- 23, 18, 13, 11, 21, 17, 12, 10, 19, 16, 12, 9, 17, 15, 11, 9, 16, 14,
- 11, 8, 14, 13, 10, 8, 14, 13, 10, 8,
- /* Size 16x4 */
- 33, 32, 32, 32, 31, 30, 30, 28, 25, 23, 21, 19, 17, 16, 14, 14, 28, 29,
- 30, 28, 27, 24, 21, 20, 19, 18, 17, 16, 15, 14, 13, 13, 19, 20, 21, 20,
- 21, 19, 17, 16, 14, 13, 12, 12, 11, 11, 10, 10, 13, 14, 14, 14, 15, 14,
- 13, 12, 11, 11, 10, 9, 9, 8, 8, 8,
- /* Size 8x32 */
- 32, 33, 32, 28, 23, 19, 16, 13, 33, 32, 32, 29, 24, 20, 17, 14, 33, 32,
- 32, 29, 24, 20, 17, 14, 33, 32, 31, 30, 25, 21, 17, 14, 33, 32, 31, 30,
- 25, 21, 17, 14, 32, 32, 30, 28, 24, 20, 17, 14, 32, 32, 30, 28, 24, 20,
- 17, 14, 32, 31, 29, 27, 24, 21, 18, 15, 32, 31, 29, 27, 24, 21, 18, 15,
- 30, 30, 28, 24, 21, 19, 16, 14, 30, 30, 28, 24, 21, 19, 16, 14, 28, 30,
- 27, 21, 19, 17, 15, 13, 28, 30, 27, 21, 19, 17, 15, 13, 26, 28, 26, 20,
- 18, 16, 14, 12, 26, 28, 26, 20, 18, 16, 14, 12, 23, 25, 24, 19, 16, 14,
- 13, 11, 23, 25, 24, 19, 16, 14, 13, 11, 21, 23, 22, 18, 15, 13, 12, 11,
- 21, 23, 22, 18, 15, 13, 12, 11, 19, 21, 20, 17, 14, 12, 11, 10, 19, 21,
- 20, 17, 14, 12, 11, 10, 18, 19, 19, 16, 14, 12, 10, 9, 18, 19, 19, 16,
- 14, 12, 10, 9, 16, 17, 18, 15, 13, 11, 10, 9, 16, 17, 18, 15, 13, 11,
- 10, 9, 14, 16, 16, 14, 12, 11, 9, 8, 14, 16, 16, 14, 12, 11, 9, 8, 13,
- 14, 15, 13, 11, 10, 9, 8, 13, 14, 15, 13, 11, 10, 9, 8, 12, 14, 14, 13,
- 11, 10, 8, 8, 12, 14, 14, 13, 11, 10, 8, 8, 12, 13, 13, 12, 11, 9, 8, 7,
- /* Size 32x8 */
- 32, 33, 33, 33, 33, 32, 32, 32, 32, 30, 30, 28, 28, 26, 26, 23, 23, 21,
- 21, 19, 19, 18, 18, 16, 16, 14, 14, 13, 13, 12, 12, 12, 33, 32, 32, 32,
- 32, 32, 32, 31, 31, 30, 30, 30, 30, 28, 28, 25, 25, 23, 23, 21, 21, 19,
- 19, 17, 17, 16, 16, 14, 14, 14, 14, 13, 32, 32, 32, 31, 31, 30, 30, 29,
- 29, 28, 28, 27, 27, 26, 26, 24, 24, 22, 22, 20, 20, 19, 19, 18, 18, 16,
- 16, 15, 15, 14, 14, 13, 28, 29, 29, 30, 30, 28, 28, 27, 27, 24, 24, 21,
- 21, 20, 20, 19, 19, 18, 18, 17, 17, 16, 16, 15, 15, 14, 14, 13, 13, 13,
- 13, 12, 23, 24, 24, 25, 25, 24, 24, 24, 24, 21, 21, 19, 19, 18, 18, 16,
- 16, 15, 15, 14, 14, 14, 14, 13, 13, 12, 12, 11, 11, 11, 11, 11, 19, 20,
- 20, 21, 21, 20, 20, 21, 21, 19, 19, 17, 17, 16, 16, 14, 14, 13, 13, 12,
- 12, 12, 12, 11, 11, 11, 11, 10, 10, 10, 10, 9, 16, 17, 17, 17, 17, 17,
- 17, 18, 18, 16, 16, 15, 15, 14, 14, 13, 13, 12, 12, 11, 11, 10, 10, 10,
- 10, 9, 9, 9, 9, 8, 8, 8, 13, 14, 14, 14, 14, 14, 14, 15, 15, 14, 14, 13,
- 13, 12, 12, 11, 11, 11, 11, 10, 10, 9, 9, 9, 9, 8, 8, 8, 8, 8, 8, 7 },
- { /* Chroma */
- /* Size 4x4 */
- 32, 22, 22, 18, 22, 19, 19, 17, 22, 19, 16, 14, 18, 17, 14, 12,
- /* Size 8x8 */
- 33, 30, 24, 22, 21, 20, 18, 17, 30, 26, 23, 22, 22, 21, 19, 18, 24, 23,
- 21, 21, 20, 20, 19, 18, 22, 22, 21, 19, 18, 18, 17, 16, 21, 22, 20, 18,
- 17, 16, 15, 14, 20, 21, 20, 18, 16, 14, 14, 13, 18, 19, 19, 17, 15, 14,
- 12, 12, 17, 18, 18, 16, 14, 13, 12, 11,
- /* Size 16x16 */
- 32, 33, 34, 31, 28, 25, 21, 21, 21, 20, 20, 19, 18, 17, 16, 16, 33, 33,
- 33, 30, 27, 24, 22, 22, 22, 21, 20, 20, 19, 18, 17, 17, 34, 33, 32, 29,
- 26, 24, 22, 23, 23, 22, 22, 21, 20, 19, 18, 18, 31, 30, 29, 26, 24, 23,
- 22, 22, 23, 22, 22, 21, 20, 19, 18, 18, 28, 27, 26, 24, 22, 22, 21, 22,
- 23, 22, 22, 21, 20, 20, 19, 19, 25, 24, 24, 23, 22, 21, 20, 21, 21, 20,
- 20, 20, 19, 19, 18, 18, 21, 22, 22, 22, 21, 20, 19, 19, 19, 19, 19, 19,
- 18, 18, 17, 17, 21, 22, 23, 22, 22, 21, 19, 19, 19, 18, 18, 18, 17, 17,
- 16, 16, 21, 22, 23, 23, 23, 21, 19, 19, 18, 17, 17, 17, 16, 16, 15, 15,
- 20, 21, 22, 22, 22, 20, 19, 18, 17, 17, 16, 16, 15, 15, 14, 14, 20, 20,
- 22, 22, 22, 20, 19, 18, 17, 16, 16, 15, 15, 14, 14, 14, 19, 20, 21, 21,
- 21, 20, 19, 18, 17, 16, 15, 14, 14, 14, 13, 13, 18, 19, 20, 20, 20, 19,
- 18, 17, 16, 15, 15, 14, 13, 13, 12, 12, 17, 18, 19, 19, 20, 19, 18, 17,
- 16, 15, 14, 14, 13, 12, 12, 12, 16, 17, 18, 18, 19, 18, 17, 16, 15, 14,
- 14, 13, 12, 12, 12, 11, 16, 17, 18, 18, 19, 18, 17, 16, 15, 14, 14, 13,
- 12, 12, 11, 11,
- /* Size 32x32 */
- 32, 33, 33, 34, 34, 31, 31, 28, 28, 25, 25, 21, 21, 21, 21, 21, 21, 20,
- 20, 20, 20, 19, 19, 18, 18, 17, 17, 16, 16, 16, 16, 15, 33, 33, 33, 33,
- 33, 30, 30, 27, 27, 24, 24, 22, 22, 22, 22, 22, 22, 21, 21, 20, 20, 20,
- 20, 19, 19, 18, 18, 17, 17, 17, 17, 16, 33, 33, 33, 33, 33, 30, 30, 27,
- 27, 24, 24, 22, 22, 22, 22, 22, 22, 21, 21, 20, 20, 20, 20, 19, 19, 18,
- 18, 17, 17, 17, 17, 16, 34, 33, 33, 32, 32, 29, 29, 26, 26, 24, 24, 22,
- 22, 23, 23, 23, 23, 22, 22, 22, 22, 21, 21, 20, 20, 19, 19, 18, 18, 18,
- 18, 17, 34, 33, 33, 32, 32, 29, 29, 26, 26, 24, 24, 22, 22, 23, 23, 23,
- 23, 22, 22, 22, 22, 21, 21, 20, 20, 19, 19, 18, 18, 18, 18, 17, 31, 30,
- 30, 29, 29, 26, 26, 24, 24, 23, 23, 22, 22, 22, 22, 23, 23, 22, 22, 22,
- 22, 21, 21, 20, 20, 19, 19, 18, 18, 18, 18, 17, 31, 30, 30, 29, 29, 26,
- 26, 24, 24, 23, 23, 22, 22, 22, 22, 23, 23, 22, 22, 22, 22, 21, 21, 20,
- 20, 19, 19, 18, 18, 18, 18, 17, 28, 27, 27, 26, 26, 24, 24, 22, 22, 22,
- 22, 21, 21, 22, 22, 23, 23, 22, 22, 22, 22, 21, 21, 20, 20, 20, 20, 19,
- 19, 19, 19, 18, 28, 27, 27, 26, 26, 24, 24, 22, 22, 22, 22, 21, 21, 22,
- 22, 23, 23, 22, 22, 22, 22, 21, 21, 20, 20, 20, 20, 19, 19, 19, 19, 18,
- 25, 24, 24, 24, 24, 23, 23, 22, 22, 21, 21, 20, 20, 21, 21, 21, 21, 20,
- 20, 20, 20, 20, 20, 19, 19, 19, 19, 18, 18, 18, 18, 17, 25, 24, 24, 24,
- 24, 23, 23, 22, 22, 21, 21, 20, 20, 21, 21, 21, 21, 20, 20, 20, 20, 20,
- 20, 19, 19, 19, 19, 18, 18, 18, 18, 17, 21, 22, 22, 22, 22, 22, 22, 21,
- 21, 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18, 18,
- 18, 17, 17, 17, 17, 17, 21, 22, 22, 22, 22, 22, 22, 21, 21, 20, 20, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18, 18, 18, 17, 17, 17,
- 17, 17, 21, 22, 22, 23, 23, 22, 22, 22, 22, 21, 21, 19, 19, 19, 19, 19,
- 19, 18, 18, 18, 18, 18, 18, 17, 17, 17, 17, 16, 16, 16, 16, 16, 21, 22,
- 22, 23, 23, 22, 22, 22, 22, 21, 21, 19, 19, 19, 19, 19, 19, 18, 18, 18,
- 18, 18, 18, 17, 17, 17, 17, 16, 16, 16, 16, 16, 21, 22, 22, 23, 23, 23,
- 23, 23, 23, 21, 21, 19, 19, 19, 19, 18, 18, 17, 17, 17, 17, 17, 17, 16,
- 16, 16, 16, 15, 15, 15, 15, 15, 21, 22, 22, 23, 23, 23, 23, 23, 23, 21,
- 21, 19, 19, 19, 19, 18, 18, 17, 17, 17, 17, 17, 17, 16, 16, 16, 16, 15,
- 15, 15, 15, 15, 20, 21, 21, 22, 22, 22, 22, 22, 22, 20, 20, 19, 19, 18,
- 18, 17, 17, 17, 17, 16, 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 14, 14,
- 20, 21, 21, 22, 22, 22, 22, 22, 22, 20, 20, 19, 19, 18, 18, 17, 17, 17,
- 17, 16, 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 14, 14, 20, 20, 20, 22,
- 22, 22, 22, 22, 22, 20, 20, 19, 19, 18, 18, 17, 17, 16, 16, 16, 16, 15,
- 15, 15, 15, 14, 14, 14, 14, 14, 14, 13, 20, 20, 20, 22, 22, 22, 22, 22,
- 22, 20, 20, 19, 19, 18, 18, 17, 17, 16, 16, 16, 16, 15, 15, 15, 15, 14,
- 14, 14, 14, 14, 14, 13, 19, 20, 20, 21, 21, 21, 21, 21, 21, 20, 20, 19,
- 19, 18, 18, 17, 17, 16, 16, 15, 15, 14, 14, 14, 14, 14, 14, 13, 13, 13,
- 13, 13, 19, 20, 20, 21, 21, 21, 21, 21, 21, 20, 20, 19, 19, 18, 18, 17,
- 17, 16, 16, 15, 15, 14, 14, 14, 14, 14, 14, 13, 13, 13, 13, 13, 18, 19,
- 19, 20, 20, 20, 20, 20, 20, 19, 19, 18, 18, 17, 17, 16, 16, 15, 15, 15,
- 15, 14, 14, 13, 13, 13, 13, 12, 12, 12, 12, 12, 18, 19, 19, 20, 20, 20,
- 20, 20, 20, 19, 19, 18, 18, 17, 17, 16, 16, 15, 15, 15, 15, 14, 14, 13,
- 13, 13, 13, 12, 12, 12, 12, 12, 17, 18, 18, 19, 19, 19, 19, 20, 20, 19,
- 19, 18, 18, 17, 17, 16, 16, 15, 15, 14, 14, 14, 14, 13, 13, 12, 12, 12,
- 12, 12, 12, 12, 17, 18, 18, 19, 19, 19, 19, 20, 20, 19, 19, 18, 18, 17,
- 17, 16, 16, 15, 15, 14, 14, 14, 14, 13, 13, 12, 12, 12, 12, 12, 12, 12,
- 16, 17, 17, 18, 18, 18, 18, 19, 19, 18, 18, 17, 17, 16, 16, 15, 15, 14,
- 14, 14, 14, 13, 13, 12, 12, 12, 12, 12, 12, 11, 11, 11, 16, 17, 17, 18,
- 18, 18, 18, 19, 19, 18, 18, 17, 17, 16, 16, 15, 15, 14, 14, 14, 14, 13,
- 13, 12, 12, 12, 12, 12, 12, 11, 11, 11, 16, 17, 17, 18, 18, 18, 18, 19,
- 19, 18, 18, 17, 17, 16, 16, 15, 15, 14, 14, 14, 14, 13, 13, 12, 12, 12,
- 12, 11, 11, 11, 11, 11, 16, 17, 17, 18, 18, 18, 18, 19, 19, 18, 18, 17,
- 17, 16, 16, 15, 15, 14, 14, 14, 14, 13, 13, 12, 12, 12, 12, 11, 11, 11,
- 11, 11, 15, 16, 16, 17, 17, 17, 17, 18, 18, 17, 17, 17, 17, 16, 16, 15,
- 15, 14, 14, 13, 13, 13, 13, 12, 12, 12, 12, 11, 11, 11, 11, 11,
- /* Size 4x8 */
- 33, 22, 20, 17, 28, 22, 22, 18, 24, 20, 20, 18, 22, 19, 18, 16, 22, 19,
- 16, 14, 20, 19, 15, 13, 19, 18, 14, 12, 17, 17, 14, 11,
- /* Size 8x4 */
- 33, 28, 24, 22, 22, 20, 19, 17, 22, 22, 20, 19, 19, 19, 18, 17, 20, 22,
- 20, 18, 16, 15, 14, 14, 17, 18, 18, 16, 14, 13, 12, 11,
- /* Size 8x16 */
- 32, 33, 28, 21, 21, 20, 18, 16, 33, 33, 27, 22, 22, 20, 19, 17, 34, 32,
- 26, 22, 23, 21, 20, 18, 31, 28, 24, 22, 22, 22, 20, 18, 28, 26, 22, 22,
- 23, 22, 20, 19, 24, 24, 22, 20, 21, 20, 19, 18, 21, 22, 21, 19, 19, 19,
- 18, 17, 21, 22, 22, 19, 18, 18, 17, 16, 21, 23, 22, 19, 18, 17, 16, 15,
- 20, 22, 22, 19, 17, 16, 15, 14, 20, 21, 22, 19, 17, 16, 14, 14, 19, 20,
- 21, 19, 17, 15, 14, 13, 18, 20, 20, 18, 16, 15, 13, 12, 17, 19, 20, 18,
- 16, 14, 13, 12, 16, 18, 19, 17, 15, 14, 12, 12, 16, 17, 18, 17, 15, 14,
- 12, 11,
- /* Size 16x8 */
- 32, 33, 34, 31, 28, 24, 21, 21, 21, 20, 20, 19, 18, 17, 16, 16, 33, 33,
- 32, 28, 26, 24, 22, 22, 23, 22, 21, 20, 20, 19, 18, 17, 28, 27, 26, 24,
- 22, 22, 21, 22, 22, 22, 22, 21, 20, 20, 19, 18, 21, 22, 22, 22, 22, 20,
- 19, 19, 19, 19, 19, 19, 18, 18, 17, 17, 21, 22, 23, 22, 23, 21, 19, 18,
- 18, 17, 17, 17, 16, 16, 15, 15, 20, 20, 21, 22, 22, 20, 19, 18, 17, 16,
- 16, 15, 15, 14, 14, 14, 18, 19, 20, 20, 20, 19, 18, 17, 16, 15, 14, 14,
- 13, 13, 12, 12, 16, 17, 18, 18, 19, 18, 17, 16, 15, 14, 14, 13, 12, 12,
- 12, 11,
- /* Size 16x32 */
- 32, 33, 33, 28, 28, 21, 21, 21, 21, 20, 20, 18, 18, 16, 16, 16, 33, 33,
- 33, 27, 27, 22, 22, 22, 22, 20, 20, 19, 19, 17, 17, 16, 33, 33, 33, 27,
- 27, 22, 22, 22, 22, 20, 20, 19, 19, 17, 17, 16, 34, 32, 32, 26, 26, 22,
- 22, 23, 23, 21, 21, 20, 20, 18, 18, 17, 34, 32, 32, 26, 26, 22, 22, 23,
- 23, 21, 21, 20, 20, 18, 18, 17, 31, 28, 28, 24, 24, 22, 22, 22, 22, 22,
- 22, 20, 20, 18, 18, 17, 31, 28, 28, 24, 24, 22, 22, 22, 22, 22, 22, 20,
- 20, 18, 18, 17, 28, 26, 26, 22, 22, 22, 22, 23, 23, 22, 22, 20, 20, 19,
- 19, 18, 28, 26, 26, 22, 22, 22, 22, 23, 23, 22, 22, 20, 20, 19, 19, 18,
- 24, 24, 24, 22, 22, 20, 20, 21, 21, 20, 20, 19, 19, 18, 18, 17, 24, 24,
- 24, 22, 22, 20, 20, 21, 21, 20, 20, 19, 19, 18, 18, 17, 21, 22, 22, 21,
- 21, 19, 19, 19, 19, 19, 19, 18, 18, 17, 17, 17, 21, 22, 22, 21, 21, 19,
- 19, 19, 19, 19, 19, 18, 18, 17, 17, 17, 21, 22, 22, 22, 22, 19, 19, 18,
- 18, 18, 18, 17, 17, 16, 16, 16, 21, 22, 22, 22, 22, 19, 19, 18, 18, 18,
- 18, 17, 17, 16, 16, 16, 21, 23, 23, 22, 22, 19, 19, 18, 18, 17, 17, 16,
- 16, 15, 15, 15, 21, 23, 23, 22, 22, 19, 19, 18, 18, 17, 17, 16, 16, 15,
- 15, 15, 20, 22, 22, 22, 22, 19, 19, 17, 17, 16, 16, 15, 15, 14, 14, 14,
- 20, 22, 22, 22, 22, 19, 19, 17, 17, 16, 16, 15, 15, 14, 14, 14, 20, 21,
- 21, 22, 22, 19, 19, 17, 17, 16, 16, 14, 14, 14, 14, 13, 20, 21, 21, 22,
- 22, 19, 19, 17, 17, 16, 16, 14, 14, 14, 14, 13, 19, 20, 20, 21, 21, 19,
- 19, 17, 17, 15, 15, 14, 14, 13, 13, 13, 19, 20, 20, 21, 21, 19, 19, 17,
- 17, 15, 15, 14, 14, 13, 13, 13, 18, 20, 20, 20, 20, 18, 18, 16, 16, 15,
- 15, 13, 13, 12, 12, 12, 18, 20, 20, 20, 20, 18, 18, 16, 16, 15, 15, 13,
- 13, 12, 12, 12, 17, 19, 19, 20, 20, 18, 18, 16, 16, 14, 14, 13, 13, 12,
- 12, 12, 17, 19, 19, 20, 20, 18, 18, 16, 16, 14, 14, 13, 13, 12, 12, 12,
- 16, 18, 18, 19, 19, 17, 17, 15, 15, 14, 14, 12, 12, 12, 12, 11, 16, 18,
- 18, 19, 19, 17, 17, 15, 15, 14, 14, 12, 12, 12, 12, 11, 16, 17, 17, 18,
- 18, 17, 17, 15, 15, 14, 14, 12, 12, 11, 11, 11, 16, 17, 17, 18, 18, 17,
- 17, 15, 15, 14, 14, 12, 12, 11, 11, 11, 16, 17, 17, 18, 18, 16, 16, 15,
- 15, 13, 13, 12, 12, 11, 11, 11,
- /* Size 32x16 */
- 32, 33, 33, 34, 34, 31, 31, 28, 28, 24, 24, 21, 21, 21, 21, 21, 21, 20,
- 20, 20, 20, 19, 19, 18, 18, 17, 17, 16, 16, 16, 16, 16, 33, 33, 33, 32,
- 32, 28, 28, 26, 26, 24, 24, 22, 22, 22, 22, 23, 23, 22, 22, 21, 21, 20,
- 20, 20, 20, 19, 19, 18, 18, 17, 17, 17, 33, 33, 33, 32, 32, 28, 28, 26,
- 26, 24, 24, 22, 22, 22, 22, 23, 23, 22, 22, 21, 21, 20, 20, 20, 20, 19,
- 19, 18, 18, 17, 17, 17, 28, 27, 27, 26, 26, 24, 24, 22, 22, 22, 22, 21,
- 21, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 20, 20, 20, 20, 19, 19, 18,
- 18, 18, 28, 27, 27, 26, 26, 24, 24, 22, 22, 22, 22, 21, 21, 22, 22, 22,
- 22, 22, 22, 22, 22, 21, 21, 20, 20, 20, 20, 19, 19, 18, 18, 18, 21, 22,
- 22, 22, 22, 22, 22, 22, 22, 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 18, 18, 18, 18, 17, 17, 17, 17, 16, 21, 22, 22, 22, 22, 22,
- 22, 22, 22, 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18,
- 18, 18, 18, 17, 17, 17, 17, 16, 21, 22, 22, 23, 23, 22, 22, 23, 23, 21,
- 21, 19, 19, 18, 18, 18, 18, 17, 17, 17, 17, 17, 17, 16, 16, 16, 16, 15,
- 15, 15, 15, 15, 21, 22, 22, 23, 23, 22, 22, 23, 23, 21, 21, 19, 19, 18,
- 18, 18, 18, 17, 17, 17, 17, 17, 17, 16, 16, 16, 16, 15, 15, 15, 15, 15,
- 20, 20, 20, 21, 21, 22, 22, 22, 22, 20, 20, 19, 19, 18, 18, 17, 17, 16,
- 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 14, 14, 14, 13, 20, 20, 20, 21,
- 21, 22, 22, 22, 22, 20, 20, 19, 19, 18, 18, 17, 17, 16, 16, 16, 16, 15,
- 15, 15, 15, 14, 14, 14, 14, 14, 14, 13, 18, 19, 19, 20, 20, 20, 20, 20,
- 20, 19, 19, 18, 18, 17, 17, 16, 16, 15, 15, 14, 14, 14, 14, 13, 13, 13,
- 13, 12, 12, 12, 12, 12, 18, 19, 19, 20, 20, 20, 20, 20, 20, 19, 19, 18,
- 18, 17, 17, 16, 16, 15, 15, 14, 14, 14, 14, 13, 13, 13, 13, 12, 12, 12,
- 12, 12, 16, 17, 17, 18, 18, 18, 18, 19, 19, 18, 18, 17, 17, 16, 16, 15,
- 15, 14, 14, 14, 14, 13, 13, 12, 12, 12, 12, 12, 12, 11, 11, 11, 16, 17,
- 17, 18, 18, 18, 18, 19, 19, 18, 18, 17, 17, 16, 16, 15, 15, 14, 14, 14,
- 14, 13, 13, 12, 12, 12, 12, 12, 12, 11, 11, 11, 16, 16, 16, 17, 17, 17,
- 17, 18, 18, 17, 17, 17, 17, 16, 16, 15, 15, 14, 14, 13, 13, 13, 13, 12,
- 12, 12, 12, 11, 11, 11, 11, 11,
- /* Size 4x16 */
- 33, 21, 20, 16, 33, 22, 20, 17, 32, 22, 21, 18, 28, 22, 22, 18, 26, 22,
- 22, 19, 24, 20, 20, 18, 22, 19, 19, 17, 22, 19, 18, 16, 23, 19, 17, 15,
- 22, 19, 16, 14, 21, 19, 16, 14, 20, 19, 15, 13, 20, 18, 15, 12, 19, 18,
- 14, 12, 18, 17, 14, 12, 17, 17, 14, 11,
- /* Size 16x4 */
- 33, 33, 32, 28, 26, 24, 22, 22, 23, 22, 21, 20, 20, 19, 18, 17, 21, 22,
- 22, 22, 22, 20, 19, 19, 19, 19, 19, 19, 18, 18, 17, 17, 20, 20, 21, 22,
- 22, 20, 19, 18, 17, 16, 16, 15, 15, 14, 14, 14, 16, 17, 18, 18, 19, 18,
- 17, 16, 15, 14, 14, 13, 12, 12, 12, 11,
- /* Size 8x32 */
- 32, 33, 28, 21, 21, 20, 18, 16, 33, 33, 27, 22, 22, 20, 19, 17, 33, 33,
- 27, 22, 22, 20, 19, 17, 34, 32, 26, 22, 23, 21, 20, 18, 34, 32, 26, 22,
- 23, 21, 20, 18, 31, 28, 24, 22, 22, 22, 20, 18, 31, 28, 24, 22, 22, 22,
- 20, 18, 28, 26, 22, 22, 23, 22, 20, 19, 28, 26, 22, 22, 23, 22, 20, 19,
- 24, 24, 22, 20, 21, 20, 19, 18, 24, 24, 22, 20, 21, 20, 19, 18, 21, 22,
- 21, 19, 19, 19, 18, 17, 21, 22, 21, 19, 19, 19, 18, 17, 21, 22, 22, 19,
- 18, 18, 17, 16, 21, 22, 22, 19, 18, 18, 17, 16, 21, 23, 22, 19, 18, 17,
- 16, 15, 21, 23, 22, 19, 18, 17, 16, 15, 20, 22, 22, 19, 17, 16, 15, 14,
- 20, 22, 22, 19, 17, 16, 15, 14, 20, 21, 22, 19, 17, 16, 14, 14, 20, 21,
- 22, 19, 17, 16, 14, 14, 19, 20, 21, 19, 17, 15, 14, 13, 19, 20, 21, 19,
- 17, 15, 14, 13, 18, 20, 20, 18, 16, 15, 13, 12, 18, 20, 20, 18, 16, 15,
- 13, 12, 17, 19, 20, 18, 16, 14, 13, 12, 17, 19, 20, 18, 16, 14, 13, 12,
- 16, 18, 19, 17, 15, 14, 12, 12, 16, 18, 19, 17, 15, 14, 12, 12, 16, 17,
- 18, 17, 15, 14, 12, 11, 16, 17, 18, 17, 15, 14, 12, 11, 16, 17, 18, 16,
- 15, 13, 12, 11,
- /* Size 32x8 */
- 32, 33, 33, 34, 34, 31, 31, 28, 28, 24, 24, 21, 21, 21, 21, 21, 21, 20,
- 20, 20, 20, 19, 19, 18, 18, 17, 17, 16, 16, 16, 16, 16, 33, 33, 33, 32,
- 32, 28, 28, 26, 26, 24, 24, 22, 22, 22, 22, 23, 23, 22, 22, 21, 21, 20,
- 20, 20, 20, 19, 19, 18, 18, 17, 17, 17, 28, 27, 27, 26, 26, 24, 24, 22,
- 22, 22, 22, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 20, 20, 20,
- 20, 19, 19, 18, 18, 18, 21, 22, 22, 22, 22, 22, 22, 22, 22, 20, 20, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18, 18, 18, 17, 17, 17,
- 17, 16, 21, 22, 22, 23, 23, 22, 22, 23, 23, 21, 21, 19, 19, 18, 18, 18,
- 18, 17, 17, 17, 17, 17, 17, 16, 16, 16, 16, 15, 15, 15, 15, 15, 20, 20,
- 20, 21, 21, 22, 22, 22, 22, 20, 20, 19, 19, 18, 18, 17, 17, 16, 16, 16,
- 16, 15, 15, 15, 15, 14, 14, 14, 14, 14, 14, 13, 18, 19, 19, 20, 20, 20,
- 20, 20, 20, 19, 19, 18, 18, 17, 17, 16, 16, 15, 15, 14, 14, 14, 14, 13,
- 13, 13, 13, 12, 12, 12, 12, 12, 16, 17, 17, 18, 18, 18, 18, 19, 19, 18,
- 18, 17, 17, 16, 16, 15, 15, 14, 14, 14, 14, 13, 13, 12, 12, 12, 12, 12,
- 12, 11, 11, 11 },
- },
- {
- { /* Luma */
- /* Size 4x4 */
- 32, 31, 23, 17, 31, 26, 20, 16, 23, 20, 14, 12, 17, 16, 12, 9,
- /* Size 8x8 */
- 33, 32, 32, 29, 24, 20, 17, 15, 32, 32, 31, 29, 25, 21, 18, 16, 32, 31,
- 29, 27, 24, 21, 18, 16, 29, 29, 27, 21, 19, 17, 16, 14, 24, 25, 24, 19,
- 16, 14, 13, 12, 20, 21, 21, 17, 14, 13, 12, 11, 17, 18, 18, 16, 13, 12,
- 10, 9, 15, 16, 16, 14, 12, 11, 9, 9,
- /* Size 16x16 */
- 32, 33, 33, 33, 32, 30, 29, 27, 25, 23, 21, 19, 17, 16, 14, 13, 33, 32,
- 32, 32, 32, 30, 29, 28, 26, 24, 22, 20, 18, 17, 15, 13, 33, 32, 32, 32,
- 32, 31, 30, 28, 27, 25, 23, 21, 19, 17, 16, 14, 33, 32, 32, 31, 30, 29,
- 28, 27, 26, 24, 23, 20, 19, 17, 16, 14, 32, 32, 32, 30, 29, 28, 27, 26,
- 25, 24, 22, 21, 19, 18, 16, 15, 30, 30, 31, 29, 28, 26, 24, 23, 22, 21,
- 20, 19, 18, 16, 15, 14, 29, 29, 30, 28, 27, 24, 22, 21, 20, 19, 19, 17,
- 17, 15, 14, 13, 27, 28, 28, 27, 26, 23, 21, 20, 19, 18, 17, 16, 15, 14,
- 13, 12, 25, 26, 27, 26, 25, 22, 20, 19, 18, 17, 16, 15, 14, 14, 13, 12,
- 23, 24, 25, 24, 24, 21, 19, 18, 17, 16, 15, 14, 13, 13, 12, 11, 21, 22,
- 23, 23, 22, 20, 19, 17, 16, 15, 14, 13, 13, 12, 11, 11, 19, 20, 21, 20,
- 21, 19, 17, 16, 15, 14, 13, 12, 12, 11, 11, 10, 17, 18, 19, 19, 19, 18,
- 17, 15, 14, 13, 13, 12, 11, 10, 10, 9, 16, 17, 17, 17, 18, 16, 15, 14,
- 14, 13, 12, 11, 10, 10, 9, 9, 14, 15, 16, 16, 16, 15, 14, 13, 13, 12,
- 11, 11, 10, 9, 9, 8, 13, 13, 14, 14, 15, 14, 13, 12, 12, 11, 11, 10, 9,
- 9, 8, 8,
- /* Size 32x32 */
- 32, 33, 33, 33, 33, 33, 33, 32, 32, 32, 30, 30, 29, 28, 27, 26, 25, 23,
- 23, 21, 21, 19, 19, 18, 17, 17, 16, 15, 14, 14, 13, 13, 33, 33, 32, 32,
- 32, 32, 32, 32, 32, 32, 30, 30, 29, 29, 28, 27, 26, 24, 24, 22, 22, 20,
- 20, 19, 18, 17, 17, 16, 15, 15, 13, 13, 33, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 30, 30, 29, 29, 28, 27, 26, 24, 24, 22, 22, 20, 20, 19, 18, 17,
- 17, 16, 15, 15, 13, 13, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31,
- 30, 30, 28, 27, 26, 25, 24, 23, 23, 21, 20, 19, 19, 18, 17, 17, 16, 16,
- 14, 14, 33, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 30, 30, 28, 28,
- 27, 25, 25, 23, 23, 21, 21, 20, 19, 18, 17, 17, 16, 16, 14, 14, 33, 32,
- 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 29, 29, 28, 27, 26, 25, 24, 23,
- 23, 21, 21, 20, 19, 18, 17, 17, 16, 16, 14, 14, 33, 32, 32, 32, 32, 31,
- 31, 31, 30, 30, 29, 29, 28, 28, 27, 26, 26, 24, 24, 23, 23, 21, 20, 20,
- 19, 18, 17, 17, 16, 16, 14, 14, 32, 32, 32, 32, 32, 31, 31, 30, 30, 30,
- 29, 29, 28, 28, 27, 26, 26, 24, 24, 23, 23, 21, 21, 20, 19, 18, 17, 17,
- 16, 16, 15, 15, 32, 32, 32, 32, 32, 31, 30, 30, 29, 29, 28, 28, 27, 27,
- 26, 26, 25, 24, 24, 22, 22, 21, 21, 20, 19, 19, 18, 17, 16, 16, 15, 15,
- 32, 32, 32, 32, 31, 31, 30, 30, 29, 29, 28, 28, 27, 27, 26, 25, 25, 24,
- 24, 22, 22, 21, 20, 20, 19, 18, 18, 17, 16, 16, 15, 15, 30, 30, 30, 31,
- 31, 30, 29, 29, 28, 28, 26, 26, 24, 24, 23, 23, 22, 22, 21, 20, 20, 19,
- 19, 18, 18, 17, 16, 16, 15, 15, 14, 14, 30, 30, 30, 31, 31, 30, 29, 29,
- 28, 28, 26, 26, 24, 24, 23, 23, 22, 22, 21, 20, 20, 19, 19, 18, 18, 17,
- 16, 16, 15, 15, 14, 14, 29, 29, 29, 30, 30, 29, 28, 28, 27, 27, 24, 24,
- 22, 22, 21, 21, 20, 20, 19, 19, 19, 18, 17, 17, 17, 16, 15, 15, 14, 14,
- 13, 13, 28, 29, 29, 30, 30, 29, 28, 28, 27, 27, 24, 24, 22, 21, 20, 20,
- 20, 19, 19, 18, 18, 17, 17, 17, 16, 16, 15, 15, 14, 14, 13, 13, 27, 28,
- 28, 28, 28, 28, 27, 27, 26, 26, 23, 23, 21, 20, 20, 20, 19, 18, 18, 17,
- 17, 17, 16, 16, 15, 15, 14, 14, 13, 13, 12, 12, 26, 27, 27, 27, 28, 27,
- 26, 26, 26, 25, 23, 23, 21, 20, 20, 19, 19, 18, 18, 17, 17, 16, 16, 15,
- 15, 14, 14, 14, 13, 13, 12, 12, 25, 26, 26, 26, 27, 26, 26, 26, 25, 25,
- 22, 22, 20, 20, 19, 19, 18, 17, 17, 16, 16, 15, 15, 15, 14, 14, 14, 13,
- 13, 13, 12, 12, 23, 24, 24, 25, 25, 25, 24, 24, 24, 24, 22, 22, 20, 19,
- 18, 18, 17, 16, 16, 15, 15, 14, 14, 14, 14, 13, 13, 13, 12, 12, 11, 11,
- 23, 24, 24, 24, 25, 24, 24, 24, 24, 24, 21, 21, 19, 19, 18, 18, 17, 16,
- 16, 15, 15, 14, 14, 14, 13, 13, 13, 12, 12, 12, 11, 11, 21, 22, 22, 23,
- 23, 23, 23, 23, 22, 22, 20, 20, 19, 18, 17, 17, 16, 15, 15, 14, 14, 14,
- 13, 13, 13, 12, 12, 12, 11, 11, 11, 11, 21, 22, 22, 23, 23, 23, 23, 23,
- 22, 22, 20, 20, 19, 18, 17, 17, 16, 15, 15, 14, 14, 14, 13, 13, 13, 12,
- 12, 12, 11, 11, 11, 11, 19, 20, 20, 21, 21, 21, 21, 21, 21, 21, 19, 19,
- 18, 17, 17, 16, 15, 14, 14, 14, 14, 13, 13, 12, 12, 12, 11, 11, 11, 11,
- 10, 10, 19, 20, 20, 20, 21, 21, 20, 21, 21, 20, 19, 19, 17, 17, 16, 16,
- 15, 14, 14, 13, 13, 13, 12, 12, 12, 12, 11, 11, 11, 10, 10, 10, 18, 19,
- 19, 19, 20, 20, 20, 20, 20, 20, 18, 18, 17, 17, 16, 15, 15, 14, 14, 13,
- 13, 12, 12, 12, 11, 11, 11, 10, 10, 10, 9, 9, 17, 18, 18, 19, 19, 19,
- 19, 19, 19, 19, 18, 18, 17, 16, 15, 15, 14, 14, 13, 13, 13, 12, 12, 11,
- 11, 11, 10, 10, 10, 10, 9, 9, 17, 17, 17, 18, 18, 18, 18, 18, 19, 18,
- 17, 17, 16, 16, 15, 14, 14, 13, 13, 12, 12, 12, 12, 11, 11, 10, 10, 10,
- 10, 9, 9, 9, 16, 17, 17, 17, 17, 17, 17, 17, 18, 18, 16, 16, 15, 15, 14,
- 14, 14, 13, 13, 12, 12, 11, 11, 11, 10, 10, 10, 10, 9, 9, 9, 9, 15, 16,
- 16, 17, 17, 17, 17, 17, 17, 17, 16, 16, 15, 15, 14, 14, 13, 13, 12, 12,
- 12, 11, 11, 10, 10, 10, 10, 9, 9, 9, 9, 9, 14, 15, 15, 16, 16, 16, 16,
- 16, 16, 16, 15, 15, 14, 14, 13, 13, 13, 12, 12, 11, 11, 11, 11, 10, 10,
- 10, 9, 9, 9, 9, 8, 8, 14, 15, 15, 16, 16, 16, 16, 16, 16, 16, 15, 15,
- 14, 14, 13, 13, 13, 12, 12, 11, 11, 11, 10, 10, 10, 9, 9, 9, 9, 9, 8, 8,
- 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 14, 14, 13, 13, 12, 12, 12, 11,
- 11, 11, 11, 10, 10, 9, 9, 9, 9, 9, 8, 8, 8, 8, 13, 13, 13, 14, 14, 14,
- 14, 15, 15, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 11, 11, 10, 10, 9,
- 9, 9, 9, 9, 8, 8, 8, 8,
- /* Size 4x8 */
- 32, 30, 24, 17, 32, 30, 24, 17, 31, 28, 23, 18, 29, 24, 19, 15, 25, 21,
- 16, 13, 21, 19, 14, 11, 18, 17, 13, 10, 16, 15, 12, 9,
- /* Size 8x4 */
- 32, 32, 31, 29, 25, 21, 18, 16, 30, 30, 28, 24, 21, 19, 17, 15, 24, 24,
- 23, 19, 16, 14, 13, 12, 17, 17, 18, 15, 13, 11, 10, 9,
- /* Size 8x16 */
- 32, 33, 32, 28, 23, 19, 17, 14, 33, 32, 32, 29, 24, 20, 17, 15, 33, 32,
- 31, 30, 25, 21, 18, 16, 32, 32, 30, 28, 24, 20, 18, 16, 32, 31, 29, 27,
- 24, 21, 18, 16, 30, 30, 28, 24, 21, 19, 17, 15, 29, 30, 27, 22, 20, 17,
- 16, 14, 27, 28, 26, 21, 18, 16, 15, 13, 25, 26, 25, 20, 17, 15, 14, 13,
- 23, 24, 24, 19, 16, 14, 13, 12, 21, 23, 22, 18, 15, 13, 12, 11, 19, 21,
- 20, 17, 14, 12, 11, 10, 18, 19, 19, 16, 14, 12, 11, 10, 16, 17, 18, 15,
- 13, 11, 10, 9, 14, 16, 16, 14, 12, 11, 9, 9, 13, 14, 15, 13, 11, 10, 9,
- 8,
- /* Size 16x8 */
- 32, 33, 33, 32, 32, 30, 29, 27, 25, 23, 21, 19, 18, 16, 14, 13, 33, 32,
- 32, 32, 31, 30, 30, 28, 26, 24, 23, 21, 19, 17, 16, 14, 32, 32, 31, 30,
- 29, 28, 27, 26, 25, 24, 22, 20, 19, 18, 16, 15, 28, 29, 30, 28, 27, 24,
- 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 23, 24, 25, 24, 24, 21, 20, 18,
- 17, 16, 15, 14, 14, 13, 12, 11, 19, 20, 21, 20, 21, 19, 17, 16, 15, 14,
- 13, 12, 12, 11, 11, 10, 17, 17, 18, 18, 18, 17, 16, 15, 14, 13, 12, 11,
- 11, 10, 9, 9, 14, 15, 16, 16, 16, 15, 14, 13, 13, 12, 11, 10, 10, 9, 9,
- 8,
- /* Size 16x32 */
- 32, 33, 33, 32, 32, 30, 28, 27, 23, 23, 19, 19, 17, 16, 14, 13, 33, 32,
- 32, 32, 32, 30, 29, 28, 24, 24, 20, 20, 17, 17, 15, 14, 33, 32, 32, 32,
- 32, 30, 29, 28, 24, 24, 20, 20, 17, 17, 15, 14, 33, 32, 32, 32, 32, 31,
- 29, 28, 25, 24, 20, 20, 18, 17, 15, 14, 33, 32, 32, 32, 31, 31, 30, 28,
- 25, 25, 21, 21, 18, 17, 16, 14, 33, 32, 32, 31, 31, 30, 29, 28, 25, 24,
- 21, 21, 18, 17, 16, 14, 32, 32, 32, 31, 30, 29, 28, 27, 24, 24, 20, 20,
- 18, 17, 16, 14, 32, 32, 32, 30, 30, 29, 28, 27, 24, 24, 21, 21, 18, 17,
- 16, 15, 32, 32, 31, 30, 29, 28, 27, 26, 24, 24, 21, 21, 18, 18, 16, 15,
- 32, 31, 31, 30, 29, 28, 26, 26, 24, 23, 20, 20, 18, 18, 16, 15, 30, 30,
- 30, 28, 28, 26, 24, 23, 21, 21, 19, 19, 17, 16, 15, 14, 30, 30, 30, 28,
- 28, 26, 24, 23, 21, 21, 19, 19, 17, 16, 15, 14, 29, 30, 30, 28, 27, 24,
- 22, 21, 20, 19, 17, 17, 16, 15, 14, 13, 28, 29, 30, 28, 27, 24, 21, 21,
- 19, 19, 17, 17, 16, 15, 14, 13, 27, 28, 28, 27, 26, 23, 21, 20, 18, 18,
- 16, 16, 15, 14, 13, 13, 26, 27, 28, 26, 26, 23, 20, 20, 18, 18, 16, 16,
- 14, 14, 13, 12, 25, 26, 26, 25, 25, 22, 20, 19, 17, 17, 15, 15, 14, 13,
- 13, 12, 23, 25, 25, 24, 24, 21, 19, 18, 16, 16, 14, 14, 13, 13, 12, 11,
- 23, 24, 24, 24, 24, 21, 19, 18, 16, 16, 14, 14, 13, 13, 12, 11, 21, 23,
- 23, 22, 22, 20, 18, 17, 15, 15, 13, 13, 12, 12, 11, 11, 21, 23, 23, 22,
- 22, 20, 18, 17, 15, 15, 13, 13, 12, 12, 11, 11, 19, 21, 21, 21, 21, 19,
- 17, 17, 14, 14, 13, 13, 12, 11, 10, 10, 19, 20, 21, 20, 20, 19, 17, 16,
- 14, 14, 12, 12, 11, 11, 10, 10, 18, 19, 20, 20, 20, 18, 17, 16, 14, 14,
- 12, 12, 11, 11, 10, 9, 18, 19, 19, 19, 19, 18, 16, 15, 14, 13, 12, 12,
- 11, 10, 10, 9, 17, 18, 18, 18, 18, 17, 16, 15, 13, 13, 12, 12, 10, 10,
- 9, 9, 16, 17, 17, 17, 18, 16, 15, 14, 13, 13, 11, 11, 10, 10, 9, 9, 15,
- 17, 17, 17, 17, 16, 15, 14, 13, 12, 11, 11, 10, 10, 9, 9, 14, 16, 16,
- 16, 16, 15, 14, 13, 12, 12, 11, 11, 9, 9, 9, 8, 14, 16, 16, 16, 16, 15,
- 14, 13, 12, 12, 10, 10, 9, 9, 9, 8, 13, 14, 14, 14, 15, 14, 13, 12, 11,
- 11, 10, 10, 9, 9, 8, 8, 13, 14, 14, 14, 15, 14, 13, 12, 11, 11, 10, 10,
- 9, 9, 8, 8,
- /* Size 32x16 */
- 32, 33, 33, 33, 33, 33, 32, 32, 32, 32, 30, 30, 29, 28, 27, 26, 25, 23,
- 23, 21, 21, 19, 19, 18, 18, 17, 16, 15, 14, 14, 13, 13, 33, 32, 32, 32,
- 32, 32, 32, 32, 32, 31, 30, 30, 30, 29, 28, 27, 26, 25, 24, 23, 23, 21,
- 20, 19, 19, 18, 17, 17, 16, 16, 14, 14, 33, 32, 32, 32, 32, 32, 32, 32,
- 31, 31, 30, 30, 30, 30, 28, 28, 26, 25, 24, 23, 23, 21, 21, 20, 19, 18,
- 17, 17, 16, 16, 14, 14, 32, 32, 32, 32, 32, 31, 31, 30, 30, 30, 28, 28,
- 28, 28, 27, 26, 25, 24, 24, 22, 22, 21, 20, 20, 19, 18, 17, 17, 16, 16,
- 14, 14, 32, 32, 32, 32, 31, 31, 30, 30, 29, 29, 28, 28, 27, 27, 26, 26,
- 25, 24, 24, 22, 22, 21, 20, 20, 19, 18, 18, 17, 16, 16, 15, 15, 30, 30,
- 30, 31, 31, 30, 29, 29, 28, 28, 26, 26, 24, 24, 23, 23, 22, 21, 21, 20,
- 20, 19, 19, 18, 18, 17, 16, 16, 15, 15, 14, 14, 28, 29, 29, 29, 30, 29,
- 28, 28, 27, 26, 24, 24, 22, 21, 21, 20, 20, 19, 19, 18, 18, 17, 17, 17,
- 16, 16, 15, 15, 14, 14, 13, 13, 27, 28, 28, 28, 28, 28, 27, 27, 26, 26,
- 23, 23, 21, 21, 20, 20, 19, 18, 18, 17, 17, 17, 16, 16, 15, 15, 14, 14,
- 13, 13, 12, 12, 23, 24, 24, 25, 25, 25, 24, 24, 24, 24, 21, 21, 20, 19,
- 18, 18, 17, 16, 16, 15, 15, 14, 14, 14, 14, 13, 13, 13, 12, 12, 11, 11,
- 23, 24, 24, 24, 25, 24, 24, 24, 24, 23, 21, 21, 19, 19, 18, 18, 17, 16,
- 16, 15, 15, 14, 14, 14, 13, 13, 13, 12, 12, 12, 11, 11, 19, 20, 20, 20,
- 21, 21, 20, 21, 21, 20, 19, 19, 17, 17, 16, 16, 15, 14, 14, 13, 13, 13,
- 12, 12, 12, 12, 11, 11, 11, 10, 10, 10, 19, 20, 20, 20, 21, 21, 20, 21,
- 21, 20, 19, 19, 17, 17, 16, 16, 15, 14, 14, 13, 13, 13, 12, 12, 12, 12,
- 11, 11, 11, 10, 10, 10, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 17, 17,
- 16, 16, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 11, 10, 10, 10, 9, 9, 9,
- 9, 16, 17, 17, 17, 17, 17, 17, 17, 18, 18, 16, 16, 15, 15, 14, 14, 13,
- 13, 13, 12, 12, 11, 11, 11, 10, 10, 10, 10, 9, 9, 9, 9, 14, 15, 15, 15,
- 16, 16, 16, 16, 16, 16, 15, 15, 14, 14, 13, 13, 13, 12, 12, 11, 11, 10,
- 10, 10, 10, 9, 9, 9, 9, 9, 8, 8, 13, 14, 14, 14, 14, 14, 14, 15, 15, 15,
- 14, 14, 13, 13, 13, 12, 12, 11, 11, 11, 11, 10, 10, 9, 9, 9, 9, 9, 8, 8,
- 8, 8,
- /* Size 4x16 */
- 33, 30, 23, 16, 32, 30, 24, 17, 32, 31, 25, 17, 32, 29, 24, 17, 32, 28,
- 24, 18, 30, 26, 21, 16, 30, 24, 19, 15, 28, 23, 18, 14, 26, 22, 17, 13,
- 24, 21, 16, 13, 23, 20, 15, 12, 20, 19, 14, 11, 19, 18, 13, 10, 17, 16,
- 13, 10, 16, 15, 12, 9, 14, 14, 11, 9,
- /* Size 16x4 */
- 33, 32, 32, 32, 32, 30, 30, 28, 26, 24, 23, 20, 19, 17, 16, 14, 30, 30,
- 31, 29, 28, 26, 24, 23, 22, 21, 20, 19, 18, 16, 15, 14, 23, 24, 25, 24,
- 24, 21, 19, 18, 17, 16, 15, 14, 13, 13, 12, 11, 16, 17, 17, 17, 18, 16,
- 15, 14, 13, 13, 12, 11, 10, 10, 9, 9,
- /* Size 8x32 */
- 32, 33, 32, 28, 23, 19, 17, 14, 33, 32, 32, 29, 24, 20, 17, 15, 33, 32,
- 32, 29, 24, 20, 17, 15, 33, 32, 32, 29, 25, 20, 18, 15, 33, 32, 31, 30,
- 25, 21, 18, 16, 33, 32, 31, 29, 25, 21, 18, 16, 32, 32, 30, 28, 24, 20,
- 18, 16, 32, 32, 30, 28, 24, 21, 18, 16, 32, 31, 29, 27, 24, 21, 18, 16,
- 32, 31, 29, 26, 24, 20, 18, 16, 30, 30, 28, 24, 21, 19, 17, 15, 30, 30,
- 28, 24, 21, 19, 17, 15, 29, 30, 27, 22, 20, 17, 16, 14, 28, 30, 27, 21,
- 19, 17, 16, 14, 27, 28, 26, 21, 18, 16, 15, 13, 26, 28, 26, 20, 18, 16,
- 14, 13, 25, 26, 25, 20, 17, 15, 14, 13, 23, 25, 24, 19, 16, 14, 13, 12,
- 23, 24, 24, 19, 16, 14, 13, 12, 21, 23, 22, 18, 15, 13, 12, 11, 21, 23,
- 22, 18, 15, 13, 12, 11, 19, 21, 21, 17, 14, 13, 12, 10, 19, 21, 20, 17,
- 14, 12, 11, 10, 18, 20, 20, 17, 14, 12, 11, 10, 18, 19, 19, 16, 14, 12,
- 11, 10, 17, 18, 18, 16, 13, 12, 10, 9, 16, 17, 18, 15, 13, 11, 10, 9,
- 15, 17, 17, 15, 13, 11, 10, 9, 14, 16, 16, 14, 12, 11, 9, 9, 14, 16, 16,
- 14, 12, 10, 9, 9, 13, 14, 15, 13, 11, 10, 9, 8, 13, 14, 15, 13, 11, 10,
- 9, 8,
- /* Size 32x8 */
- 32, 33, 33, 33, 33, 33, 32, 32, 32, 32, 30, 30, 29, 28, 27, 26, 25, 23,
- 23, 21, 21, 19, 19, 18, 18, 17, 16, 15, 14, 14, 13, 13, 33, 32, 32, 32,
- 32, 32, 32, 32, 31, 31, 30, 30, 30, 30, 28, 28, 26, 25, 24, 23, 23, 21,
- 21, 20, 19, 18, 17, 17, 16, 16, 14, 14, 32, 32, 32, 32, 31, 31, 30, 30,
- 29, 29, 28, 28, 27, 27, 26, 26, 25, 24, 24, 22, 22, 21, 20, 20, 19, 18,
- 18, 17, 16, 16, 15, 15, 28, 29, 29, 29, 30, 29, 28, 28, 27, 26, 24, 24,
- 22, 21, 21, 20, 20, 19, 19, 18, 18, 17, 17, 17, 16, 16, 15, 15, 14, 14,
- 13, 13, 23, 24, 24, 25, 25, 25, 24, 24, 24, 24, 21, 21, 20, 19, 18, 18,
- 17, 16, 16, 15, 15, 14, 14, 14, 14, 13, 13, 13, 12, 12, 11, 11, 19, 20,
- 20, 20, 21, 21, 20, 21, 21, 20, 19, 19, 17, 17, 16, 16, 15, 14, 14, 13,
- 13, 13, 12, 12, 12, 12, 11, 11, 11, 10, 10, 10, 17, 17, 17, 18, 18, 18,
- 18, 18, 18, 18, 17, 17, 16, 16, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11,
- 11, 10, 10, 10, 9, 9, 9, 9, 14, 15, 15, 15, 16, 16, 16, 16, 16, 16, 15,
- 15, 14, 14, 13, 13, 13, 12, 12, 11, 11, 10, 10, 10, 10, 9, 9, 9, 9, 9,
- 8, 8 },
- { /* Chroma */
- /* Size 4x4 */
- 33, 24, 22, 19, 24, 21, 20, 19, 22, 20, 17, 15, 19, 19, 15, 13,
- /* Size 8x8 */
- 33, 32, 27, 21, 22, 20, 19, 18, 32, 29, 24, 22, 23, 22, 20, 19, 27, 24,
- 22, 21, 23, 22, 21, 20, 21, 22, 21, 19, 19, 19, 18, 18, 22, 23, 23, 19,
- 18, 17, 16, 16, 20, 22, 22, 19, 17, 16, 15, 14, 19, 20, 21, 18, 16, 15,
- 14, 13, 18, 19, 20, 18, 16, 14, 13, 12,
- /* Size 16x16 */
- 32, 33, 34, 31, 28, 25, 22, 21, 21, 21, 20, 20, 19, 18, 17, 16, 33, 33,
- 33, 30, 27, 24, 22, 22, 22, 22, 21, 20, 20, 19, 18, 17, 34, 33, 32, 29,
- 26, 24, 23, 22, 23, 23, 22, 22, 21, 20, 19, 18, 31, 30, 29, 26, 24, 23,
- 22, 22, 22, 23, 22, 22, 21, 20, 19, 18, 28, 27, 26, 24, 22, 22, 22, 22,
- 22, 23, 22, 22, 21, 20, 20, 19, 25, 24, 24, 23, 22, 21, 20, 20, 21, 21,
- 20, 20, 20, 19, 19, 18, 22, 22, 23, 22, 22, 20, 20, 20, 20, 20, 19, 19,
- 19, 18, 18, 17, 21, 22, 22, 22, 22, 20, 20, 19, 19, 19, 19, 18, 18, 18,
- 17, 17, 21, 22, 23, 22, 22, 21, 20, 19, 19, 18, 18, 17, 17, 17, 16, 16,
- 21, 22, 23, 23, 23, 21, 20, 19, 18, 17, 17, 17, 16, 16, 16, 15, 20, 21,
- 22, 22, 22, 20, 19, 19, 18, 17, 17, 16, 16, 15, 15, 14, 20, 20, 22, 22,
- 22, 20, 19, 18, 17, 17, 16, 16, 15, 15, 14, 14, 19, 20, 21, 21, 21, 20,
- 19, 18, 17, 16, 16, 15, 14, 14, 14, 13, 18, 19, 20, 20, 20, 19, 18, 18,
- 17, 16, 15, 15, 14, 13, 13, 12, 17, 18, 19, 19, 20, 19, 18, 17, 16, 16,
- 15, 14, 14, 13, 12, 12, 16, 17, 18, 18, 19, 18, 17, 17, 16, 15, 14, 14,
- 13, 12, 12, 12,
- /* Size 32x32 */
- 32, 33, 33, 34, 34, 32, 31, 30, 28, 28, 25, 25, 22, 21, 21, 21, 21, 21,
- 21, 20, 20, 20, 20, 19, 19, 18, 18, 18, 17, 17, 16, 16, 33, 33, 33, 33,
- 33, 32, 30, 29, 27, 27, 24, 24, 22, 21, 22, 22, 22, 22, 22, 21, 21, 20,
- 20, 20, 20, 19, 19, 19, 18, 18, 17, 17, 33, 33, 33, 33, 33, 31, 30, 29,
- 27, 26, 24, 24, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 20, 20, 20, 19,
- 19, 19, 18, 18, 17, 17, 34, 33, 33, 33, 33, 31, 29, 28, 26, 26, 24, 24,
- 22, 22, 22, 22, 22, 23, 22, 22, 22, 21, 21, 20, 20, 20, 20, 19, 19, 19,
- 18, 18, 34, 33, 33, 33, 32, 31, 29, 28, 26, 26, 24, 24, 23, 22, 22, 23,
- 23, 23, 23, 22, 22, 22, 22, 21, 21, 20, 20, 20, 19, 19, 18, 18, 32, 32,
- 31, 31, 31, 29, 28, 27, 25, 24, 24, 24, 22, 22, 22, 22, 23, 23, 23, 22,
- 22, 22, 22, 21, 21, 20, 20, 20, 19, 19, 18, 18, 31, 30, 30, 29, 29, 28,
- 26, 26, 24, 24, 23, 23, 22, 22, 22, 22, 22, 23, 23, 22, 22, 22, 22, 21,
- 21, 20, 20, 20, 19, 19, 18, 18, 30, 29, 29, 28, 28, 27, 26, 25, 23, 23,
- 23, 23, 22, 22, 22, 22, 22, 23, 23, 22, 22, 22, 22, 21, 21, 20, 20, 20,
- 19, 19, 19, 19, 28, 27, 27, 26, 26, 25, 24, 23, 22, 22, 22, 22, 22, 21,
- 22, 22, 22, 23, 23, 22, 22, 22, 22, 21, 21, 21, 20, 20, 20, 20, 19, 19,
- 28, 27, 26, 26, 26, 24, 24, 23, 22, 22, 22, 22, 21, 21, 22, 22, 22, 23,
- 22, 22, 22, 22, 22, 21, 21, 21, 20, 20, 20, 20, 19, 19, 25, 24, 24, 24,
- 24, 24, 23, 23, 22, 22, 21, 21, 20, 20, 20, 21, 21, 21, 21, 20, 20, 20,
- 20, 20, 20, 20, 19, 19, 19, 19, 18, 18, 25, 24, 24, 24, 24, 24, 23, 23,
- 22, 22, 21, 21, 20, 20, 20, 21, 21, 21, 21, 20, 20, 20, 20, 20, 20, 20,
- 19, 19, 19, 19, 18, 18, 22, 22, 22, 22, 23, 22, 22, 22, 22, 21, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 19, 19, 19, 19, 19, 19, 19, 18, 18, 18, 18,
- 17, 17, 21, 21, 22, 22, 22, 22, 22, 22, 21, 21, 20, 20, 20, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18, 18, 18, 18, 17, 17, 21, 22,
- 22, 22, 22, 22, 22, 22, 22, 22, 20, 20, 20, 19, 19, 19, 19, 19, 19, 19,
- 19, 18, 18, 18, 18, 18, 18, 17, 17, 17, 17, 17, 21, 22, 22, 22, 23, 22,
- 22, 22, 22, 22, 21, 21, 20, 19, 19, 19, 19, 19, 18, 18, 18, 18, 18, 18,
- 18, 17, 17, 17, 17, 17, 16, 16, 21, 22, 22, 22, 23, 23, 22, 22, 22, 22,
- 21, 21, 20, 19, 19, 19, 19, 18, 18, 18, 18, 17, 17, 17, 17, 17, 17, 17,
- 16, 16, 16, 16, 21, 22, 22, 23, 23, 23, 23, 23, 23, 23, 21, 21, 20, 19,
- 19, 19, 18, 18, 17, 17, 17, 17, 17, 17, 17, 16, 16, 16, 16, 16, 15, 15,
- 21, 22, 22, 22, 23, 23, 23, 23, 23, 22, 21, 21, 20, 19, 19, 18, 18, 17,
- 17, 17, 17, 17, 17, 17, 16, 16, 16, 16, 16, 16, 15, 15, 20, 21, 21, 22,
- 22, 22, 22, 22, 22, 22, 20, 20, 19, 19, 19, 18, 18, 17, 17, 17, 17, 16,
- 16, 16, 16, 16, 15, 15, 15, 15, 14, 14, 20, 21, 21, 22, 22, 22, 22, 22,
- 22, 22, 20, 20, 19, 19, 19, 18, 18, 17, 17, 17, 17, 16, 16, 16, 16, 16,
- 15, 15, 15, 15, 14, 14, 20, 20, 21, 21, 22, 22, 22, 22, 22, 22, 20, 20,
- 19, 19, 18, 18, 17, 17, 17, 16, 16, 16, 16, 15, 15, 15, 15, 14, 14, 14,
- 14, 14, 20, 20, 20, 21, 22, 22, 22, 22, 22, 22, 20, 20, 19, 19, 18, 18,
- 17, 17, 17, 16, 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 14, 14, 19, 20,
- 20, 20, 21, 21, 21, 21, 21, 21, 20, 20, 19, 19, 18, 18, 17, 17, 17, 16,
- 16, 15, 15, 15, 15, 14, 14, 14, 14, 14, 13, 13, 19, 20, 20, 20, 21, 21,
- 21, 21, 21, 21, 20, 20, 19, 19, 18, 18, 17, 17, 16, 16, 16, 15, 15, 15,
- 14, 14, 14, 14, 14, 13, 13, 13, 18, 19, 19, 20, 20, 20, 20, 20, 21, 21,
- 20, 20, 19, 18, 18, 17, 17, 16, 16, 16, 16, 15, 15, 14, 14, 14, 14, 14,
- 13, 13, 13, 13, 18, 19, 19, 20, 20, 20, 20, 20, 20, 20, 19, 19, 18, 18,
- 18, 17, 17, 16, 16, 15, 15, 15, 15, 14, 14, 14, 13, 13, 13, 13, 12, 12,
- 18, 19, 19, 19, 20, 20, 20, 20, 20, 20, 19, 19, 18, 18, 17, 17, 17, 16,
- 16, 15, 15, 14, 14, 14, 14, 14, 13, 13, 13, 13, 12, 12, 17, 18, 18, 19,
- 19, 19, 19, 19, 20, 20, 19, 19, 18, 18, 17, 17, 16, 16, 16, 15, 15, 14,
- 14, 14, 14, 13, 13, 13, 12, 12, 12, 12, 17, 18, 18, 19, 19, 19, 19, 19,
- 20, 20, 19, 19, 18, 18, 17, 17, 16, 16, 16, 15, 15, 14, 14, 14, 13, 13,
- 13, 13, 12, 12, 12, 12, 16, 17, 17, 18, 18, 18, 18, 19, 19, 19, 18, 18,
- 17, 17, 17, 16, 16, 15, 15, 14, 14, 14, 14, 13, 13, 13, 12, 12, 12, 12,
- 12, 12, 16, 17, 17, 18, 18, 18, 18, 19, 19, 19, 18, 18, 17, 17, 17, 16,
- 16, 15, 15, 14, 14, 14, 14, 13, 13, 13, 12, 12, 12, 12, 12, 12,
- /* Size 4x8 */
- 33, 24, 22, 19, 31, 23, 23, 20, 26, 22, 22, 20, 22, 20, 19, 18, 23, 21,
- 17, 16, 21, 20, 17, 15, 20, 20, 16, 14, 19, 19, 16, 13,
- /* Size 8x4 */
- 33, 31, 26, 22, 23, 21, 20, 19, 24, 23, 22, 20, 21, 20, 20, 19, 22, 23,
- 22, 19, 17, 17, 16, 16, 19, 20, 20, 18, 16, 15, 14, 13,
- /* Size 8x16 */
- 32, 33, 28, 21, 21, 20, 18, 17, 33, 33, 27, 22, 22, 20, 19, 18, 34, 32,
- 26, 22, 23, 21, 20, 19, 31, 28, 24, 22, 22, 22, 20, 19, 28, 26, 22, 22,
- 23, 22, 21, 20, 24, 24, 22, 20, 21, 20, 19, 18, 22, 22, 21, 20, 19, 19,
- 19, 18, 21, 22, 22, 19, 19, 18, 18, 17, 21, 23, 22, 19, 18, 17, 17, 16,
- 21, 23, 22, 19, 18, 17, 16, 16, 20, 22, 22, 19, 17, 16, 16, 15, 20, 21,
- 22, 19, 17, 16, 15, 14, 19, 20, 21, 19, 17, 15, 14, 13, 18, 20, 20, 18,
- 16, 15, 14, 13, 17, 19, 20, 18, 16, 14, 13, 12, 16, 18, 19, 17, 15, 14,
- 13, 12,
- /* Size 16x8 */
- 32, 33, 34, 31, 28, 24, 22, 21, 21, 21, 20, 20, 19, 18, 17, 16, 33, 33,
- 32, 28, 26, 24, 22, 22, 23, 23, 22, 21, 20, 20, 19, 18, 28, 27, 26, 24,
- 22, 22, 21, 22, 22, 22, 22, 22, 21, 20, 20, 19, 21, 22, 22, 22, 22, 20,
- 20, 19, 19, 19, 19, 19, 19, 18, 18, 17, 21, 22, 23, 22, 23, 21, 19, 19,
- 18, 18, 17, 17, 17, 16, 16, 15, 20, 20, 21, 22, 22, 20, 19, 18, 17, 17,
- 16, 16, 15, 15, 14, 14, 18, 19, 20, 20, 21, 19, 19, 18, 17, 16, 16, 15,
- 14, 14, 13, 13, 17, 18, 19, 19, 20, 18, 18, 17, 16, 16, 15, 14, 13, 13,
- 12, 12,
- /* Size 16x32 */
- 32, 33, 33, 29, 28, 24, 21, 21, 21, 21, 20, 20, 18, 18, 17, 16, 33, 33,
- 33, 28, 27, 24, 22, 22, 22, 22, 20, 20, 19, 19, 18, 17, 33, 33, 33, 28,
- 27, 24, 22, 22, 22, 22, 20, 20, 19, 19, 18, 17, 34, 32, 32, 28, 26, 24,
- 22, 22, 22, 22, 21, 21, 20, 20, 18, 18, 34, 32, 32, 28, 26, 24, 22, 22,
- 23, 23, 21, 21, 20, 20, 19, 18, 32, 31, 30, 26, 25, 23, 22, 22, 23, 23,
- 21, 21, 20, 20, 19, 18, 31, 29, 28, 26, 24, 23, 22, 22, 22, 22, 22, 22,
- 20, 20, 19, 18, 30, 28, 28, 24, 23, 23, 22, 22, 23, 22, 22, 22, 20, 20,
- 19, 19, 28, 26, 26, 23, 22, 22, 22, 22, 23, 22, 22, 22, 21, 20, 20, 19,
- 28, 26, 26, 23, 22, 22, 21, 22, 22, 22, 22, 22, 21, 20, 19, 19, 24, 24,
- 24, 22, 22, 21, 20, 20, 21, 21, 20, 20, 19, 19, 18, 18, 24, 24, 24, 22,
- 22, 21, 20, 20, 21, 21, 20, 20, 19, 19, 18, 18, 22, 22, 22, 22, 21, 20,
- 20, 20, 19, 19, 19, 19, 19, 18, 18, 17, 21, 22, 22, 22, 21, 20, 19, 19,
- 19, 19, 19, 19, 18, 18, 17, 17, 21, 22, 22, 22, 22, 20, 19, 19, 19, 19,
- 18, 18, 18, 18, 17, 17, 21, 22, 22, 22, 22, 20, 19, 19, 18, 18, 18, 18,
- 17, 17, 17, 16, 21, 22, 23, 22, 22, 21, 19, 19, 18, 18, 17, 17, 17, 17,
- 16, 16, 21, 23, 23, 23, 22, 21, 19, 19, 18, 17, 17, 17, 16, 16, 16, 15,
- 21, 22, 23, 22, 22, 21, 19, 19, 18, 17, 17, 17, 16, 16, 16, 15, 20, 22,
- 22, 22, 22, 20, 19, 19, 17, 17, 16, 16, 16, 15, 15, 14, 20, 22, 22, 22,
- 22, 20, 19, 19, 17, 17, 16, 16, 16, 15, 15, 14, 20, 21, 21, 22, 22, 20,
- 19, 18, 17, 17, 16, 16, 15, 15, 14, 14, 20, 21, 21, 22, 22, 20, 19, 18,
- 17, 17, 16, 16, 15, 14, 14, 14, 19, 20, 21, 21, 21, 20, 19, 18, 17, 17,
- 15, 15, 14, 14, 14, 13, 19, 20, 20, 21, 21, 20, 19, 18, 17, 16, 15, 15,
- 14, 14, 13, 13, 19, 20, 20, 20, 21, 20, 18, 18, 16, 16, 15, 15, 14, 14,
- 13, 13, 18, 20, 20, 20, 20, 19, 18, 18, 16, 16, 15, 15, 14, 13, 13, 12,
- 18, 19, 19, 20, 20, 19, 18, 17, 16, 16, 14, 14, 13, 13, 13, 12, 17, 19,
- 19, 19, 20, 19, 18, 17, 16, 16, 14, 14, 13, 13, 12, 12, 17, 19, 19, 19,
- 19, 19, 17, 17, 16, 16, 14, 14, 13, 13, 12, 12, 16, 18, 18, 18, 19, 18,
- 17, 17, 15, 15, 14, 14, 13, 12, 12, 12, 16, 18, 18, 18, 19, 18, 17, 17,
- 15, 15, 14, 14, 13, 12, 12, 12,
- /* Size 32x16 */
- 32, 33, 33, 34, 34, 32, 31, 30, 28, 28, 24, 24, 22, 21, 21, 21, 21, 21,
- 21, 20, 20, 20, 20, 19, 19, 19, 18, 18, 17, 17, 16, 16, 33, 33, 33, 32,
- 32, 31, 29, 28, 26, 26, 24, 24, 22, 22, 22, 22, 22, 23, 22, 22, 22, 21,
- 21, 20, 20, 20, 20, 19, 19, 19, 18, 18, 33, 33, 33, 32, 32, 30, 28, 28,
- 26, 26, 24, 24, 22, 22, 22, 22, 23, 23, 23, 22, 22, 21, 21, 21, 20, 20,
- 20, 19, 19, 19, 18, 18, 29, 28, 28, 28, 28, 26, 26, 24, 23, 23, 22, 22,
- 22, 22, 22, 22, 22, 23, 22, 22, 22, 22, 22, 21, 21, 20, 20, 20, 19, 19,
- 18, 18, 28, 27, 27, 26, 26, 25, 24, 23, 22, 22, 22, 22, 21, 21, 22, 22,
- 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 20, 20, 20, 19, 19, 19, 24, 24,
- 24, 24, 24, 23, 23, 23, 22, 22, 21, 21, 20, 20, 20, 20, 21, 21, 21, 20,
- 20, 20, 20, 20, 20, 20, 19, 19, 19, 19, 18, 18, 21, 22, 22, 22, 22, 22,
- 22, 22, 22, 21, 20, 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 18, 18, 18, 18, 17, 17, 17, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22,
- 20, 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18, 18, 18, 18, 18, 17,
- 17, 17, 17, 17, 21, 22, 22, 22, 23, 23, 22, 23, 23, 22, 21, 21, 19, 19,
- 19, 18, 18, 18, 18, 17, 17, 17, 17, 17, 17, 16, 16, 16, 16, 16, 15, 15,
- 21, 22, 22, 22, 23, 23, 22, 22, 22, 22, 21, 21, 19, 19, 19, 18, 18, 17,
- 17, 17, 17, 17, 17, 17, 16, 16, 16, 16, 16, 16, 15, 15, 20, 20, 20, 21,
- 21, 21, 22, 22, 22, 22, 20, 20, 19, 19, 18, 18, 17, 17, 17, 16, 16, 16,
- 16, 15, 15, 15, 15, 14, 14, 14, 14, 14, 20, 20, 20, 21, 21, 21, 22, 22,
- 22, 22, 20, 20, 19, 19, 18, 18, 17, 17, 17, 16, 16, 16, 16, 15, 15, 15,
- 15, 14, 14, 14, 14, 14, 18, 19, 19, 20, 20, 20, 20, 20, 21, 21, 19, 19,
- 19, 18, 18, 17, 17, 16, 16, 16, 16, 15, 15, 14, 14, 14, 14, 13, 13, 13,
- 13, 13, 18, 19, 19, 20, 20, 20, 20, 20, 20, 20, 19, 19, 18, 18, 18, 17,
- 17, 16, 16, 15, 15, 15, 14, 14, 14, 14, 13, 13, 13, 13, 12, 12, 17, 18,
- 18, 18, 19, 19, 19, 19, 20, 19, 18, 18, 18, 17, 17, 17, 16, 16, 16, 15,
- 15, 14, 14, 14, 13, 13, 13, 13, 12, 12, 12, 12, 16, 17, 17, 18, 18, 18,
- 18, 19, 19, 19, 18, 18, 17, 17, 17, 16, 16, 15, 15, 14, 14, 14, 14, 13,
- 13, 13, 12, 12, 12, 12, 12, 12,
- /* Size 4x16 */
- 33, 24, 21, 18, 33, 24, 22, 19, 32, 24, 23, 20, 29, 23, 22, 20, 26, 22,
- 22, 20, 24, 21, 21, 19, 22, 20, 19, 18, 22, 20, 19, 18, 22, 21, 18, 17,
- 22, 21, 17, 16, 22, 20, 17, 15, 21, 20, 17, 14, 20, 20, 16, 14, 20, 19,
- 16, 13, 19, 19, 16, 13, 18, 18, 15, 12,
- /* Size 16x4 */
- 33, 33, 32, 29, 26, 24, 22, 22, 22, 22, 22, 21, 20, 20, 19, 18, 24, 24,
- 24, 23, 22, 21, 20, 20, 21, 21, 20, 20, 20, 19, 19, 18, 21, 22, 23, 22,
- 22, 21, 19, 19, 18, 17, 17, 17, 16, 16, 16, 15, 18, 19, 20, 20, 20, 19,
- 18, 18, 17, 16, 15, 14, 14, 13, 13, 12,
- /* Size 8x32 */
- 32, 33, 28, 21, 21, 20, 18, 17, 33, 33, 27, 22, 22, 20, 19, 18, 33, 33,
- 27, 22, 22, 20, 19, 18, 34, 32, 26, 22, 22, 21, 20, 18, 34, 32, 26, 22,
- 23, 21, 20, 19, 32, 30, 25, 22, 23, 21, 20, 19, 31, 28, 24, 22, 22, 22,
- 20, 19, 30, 28, 23, 22, 23, 22, 20, 19, 28, 26, 22, 22, 23, 22, 21, 20,
- 28, 26, 22, 21, 22, 22, 21, 19, 24, 24, 22, 20, 21, 20, 19, 18, 24, 24,
- 22, 20, 21, 20, 19, 18, 22, 22, 21, 20, 19, 19, 19, 18, 21, 22, 21, 19,
- 19, 19, 18, 17, 21, 22, 22, 19, 19, 18, 18, 17, 21, 22, 22, 19, 18, 18,
- 17, 17, 21, 23, 22, 19, 18, 17, 17, 16, 21, 23, 22, 19, 18, 17, 16, 16,
- 21, 23, 22, 19, 18, 17, 16, 16, 20, 22, 22, 19, 17, 16, 16, 15, 20, 22,
- 22, 19, 17, 16, 16, 15, 20, 21, 22, 19, 17, 16, 15, 14, 20, 21, 22, 19,
- 17, 16, 15, 14, 19, 21, 21, 19, 17, 15, 14, 14, 19, 20, 21, 19, 17, 15,
- 14, 13, 19, 20, 21, 18, 16, 15, 14, 13, 18, 20, 20, 18, 16, 15, 14, 13,
- 18, 19, 20, 18, 16, 14, 13, 13, 17, 19, 20, 18, 16, 14, 13, 12, 17, 19,
- 19, 17, 16, 14, 13, 12, 16, 18, 19, 17, 15, 14, 13, 12, 16, 18, 19, 17,
- 15, 14, 13, 12,
- /* Size 32x8 */
- 32, 33, 33, 34, 34, 32, 31, 30, 28, 28, 24, 24, 22, 21, 21, 21, 21, 21,
- 21, 20, 20, 20, 20, 19, 19, 19, 18, 18, 17, 17, 16, 16, 33, 33, 33, 32,
- 32, 30, 28, 28, 26, 26, 24, 24, 22, 22, 22, 22, 23, 23, 23, 22, 22, 21,
- 21, 21, 20, 20, 20, 19, 19, 19, 18, 18, 28, 27, 27, 26, 26, 25, 24, 23,
- 22, 22, 22, 22, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21,
- 20, 20, 20, 19, 19, 19, 21, 22, 22, 22, 22, 22, 22, 22, 22, 21, 20, 20,
- 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18, 18, 18, 17,
- 17, 17, 21, 22, 22, 22, 23, 23, 22, 23, 23, 22, 21, 21, 19, 19, 19, 18,
- 18, 18, 18, 17, 17, 17, 17, 17, 17, 16, 16, 16, 16, 16, 15, 15, 20, 20,
- 20, 21, 21, 21, 22, 22, 22, 22, 20, 20, 19, 19, 18, 18, 17, 17, 17, 16,
- 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 14, 14, 18, 19, 19, 20, 20, 20,
- 20, 20, 21, 21, 19, 19, 19, 18, 18, 17, 17, 16, 16, 16, 16, 15, 15, 14,
- 14, 14, 14, 13, 13, 13, 13, 13, 17, 18, 18, 18, 19, 19, 19, 19, 20, 19,
- 18, 18, 18, 17, 17, 17, 16, 16, 16, 15, 15, 14, 14, 14, 13, 13, 13, 13,
- 12, 12, 12, 12 },
- },
- {
- { /* Luma */
- /* Size 4x4 */
- 32, 31, 24, 19, 31, 27, 22, 18, 24, 22, 16, 14, 19, 18, 14, 11,
- /* Size 8x8 */
- 33, 32, 32, 30, 27, 22, 20, 16, 32, 32, 32, 30, 28, 23, 21, 17, 32, 32,
- 29, 28, 26, 23, 21, 18, 30, 30, 28, 24, 22, 20, 18, 16, 27, 28, 26, 22,
- 19, 17, 16, 14, 22, 23, 23, 20, 17, 15, 14, 12, 20, 21, 21, 18, 16, 14,
- 12, 11, 16, 17, 18, 16, 14, 12, 11, 10,
- /* Size 16x16 */
- 32, 33, 33, 33, 32, 32, 30, 28, 27, 25, 23, 21, 19, 18, 17, 16, 33, 32,
- 32, 32, 32, 32, 30, 29, 27, 26, 24, 22, 20, 19, 18, 17, 33, 32, 32, 32,
- 32, 32, 31, 30, 28, 27, 25, 23, 21, 19, 18, 17, 33, 32, 32, 31, 31, 31,
- 29, 28, 27, 26, 24, 23, 21, 19, 18, 17, 32, 32, 32, 31, 30, 30, 28, 28,
- 26, 26, 24, 23, 21, 19, 19, 17, 32, 32, 32, 31, 30, 29, 28, 27, 26, 25,
- 24, 22, 21, 20, 19, 18, 30, 30, 31, 29, 28, 28, 26, 24, 23, 22, 22, 20,
- 19, 18, 17, 16, 28, 29, 30, 28, 28, 27, 24, 21, 20, 20, 19, 18, 17, 16,
- 16, 15, 27, 27, 28, 27, 26, 26, 23, 20, 20, 19, 18, 17, 16, 15, 15, 14,
- 25, 26, 27, 26, 26, 25, 22, 20, 19, 18, 17, 16, 15, 15, 14, 14, 23, 24,
- 25, 24, 24, 24, 22, 19, 18, 17, 16, 15, 14, 14, 13, 13, 21, 22, 23, 23,
- 23, 22, 20, 18, 17, 16, 15, 14, 13, 13, 12, 12, 19, 20, 21, 21, 21, 21,
- 19, 17, 16, 15, 14, 13, 12, 12, 12, 11, 18, 19, 19, 19, 19, 20, 18, 16,
- 15, 15, 14, 13, 12, 11, 11, 11, 17, 18, 18, 18, 19, 19, 17, 16, 15, 14,
- 13, 12, 12, 11, 11, 10, 16, 17, 17, 17, 17, 18, 16, 15, 14, 14, 13, 12,
- 11, 11, 10, 10,
- /* Size 32x32 */
- 32, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 31, 30, 30, 28, 28, 27, 26,
- 25, 23, 23, 22, 21, 20, 19, 19, 18, 17, 17, 16, 16, 15, 33, 33, 33, 32,
- 32, 32, 32, 32, 32, 32, 32, 31, 30, 30, 29, 29, 27, 27, 26, 24, 24, 22,
- 22, 21, 20, 20, 18, 18, 17, 16, 16, 15, 33, 33, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 31, 30, 30, 29, 29, 27, 27, 26, 24, 24, 23, 22, 21, 20, 20,
- 19, 18, 18, 17, 17, 15, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31,
- 30, 30, 29, 29, 28, 27, 26, 24, 24, 23, 23, 22, 20, 20, 19, 19, 18, 17,
- 17, 16, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 30, 30, 30,
- 28, 28, 27, 25, 25, 23, 23, 22, 21, 21, 19, 19, 18, 17, 17, 16, 33, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 30, 30, 30, 28, 28, 27, 25,
- 25, 23, 23, 22, 21, 21, 19, 19, 18, 17, 17, 16, 33, 32, 32, 32, 32, 32,
- 31, 31, 31, 31, 31, 30, 29, 29, 28, 28, 27, 26, 26, 24, 24, 23, 23, 22,
- 21, 21, 19, 19, 18, 17, 17, 16, 33, 32, 32, 32, 32, 32, 31, 31, 31, 30,
- 30, 29, 29, 28, 28, 28, 27, 26, 26, 24, 24, 23, 23, 22, 20, 20, 19, 19,
- 18, 17, 17, 16, 32, 32, 32, 32, 32, 32, 31, 31, 30, 30, 30, 29, 28, 28,
- 28, 28, 26, 26, 26, 24, 24, 23, 23, 22, 21, 21, 19, 19, 19, 17, 17, 16,
- 32, 32, 32, 32, 32, 32, 31, 30, 30, 29, 29, 28, 28, 28, 27, 27, 26, 26,
- 25, 24, 24, 23, 22, 22, 21, 21, 20, 19, 19, 18, 18, 17, 32, 32, 32, 32,
- 32, 32, 31, 30, 30, 29, 29, 28, 28, 28, 27, 27, 26, 26, 25, 24, 24, 23,
- 22, 22, 21, 21, 20, 19, 19, 18, 18, 17, 31, 31, 31, 31, 31, 31, 30, 29,
- 29, 28, 28, 27, 26, 26, 24, 24, 24, 23, 23, 22, 22, 21, 20, 20, 19, 19,
- 18, 18, 17, 17, 17, 16, 30, 30, 30, 30, 31, 31, 29, 29, 28, 28, 28, 26,
- 26, 25, 24, 24, 23, 23, 22, 22, 22, 20, 20, 20, 19, 19, 18, 18, 17, 16,
- 16, 15, 30, 30, 30, 30, 30, 30, 29, 28, 28, 28, 28, 26, 25, 24, 23, 23,
- 22, 22, 21, 20, 20, 20, 19, 19, 18, 18, 17, 17, 17, 16, 16, 15, 28, 29,
- 29, 29, 30, 30, 28, 28, 28, 27, 27, 24, 24, 23, 21, 21, 20, 20, 20, 19,
- 19, 18, 18, 18, 17, 17, 16, 16, 16, 15, 15, 14, 28, 29, 29, 29, 30, 30,
- 28, 28, 28, 27, 27, 24, 24, 23, 21, 21, 20, 20, 20, 19, 19, 18, 18, 18,
- 17, 17, 16, 16, 16, 15, 15, 14, 27, 27, 27, 28, 28, 28, 27, 27, 26, 26,
- 26, 24, 23, 22, 20, 20, 20, 19, 19, 18, 18, 17, 17, 17, 16, 16, 15, 15,
- 15, 14, 14, 13, 26, 27, 27, 27, 28, 28, 26, 26, 26, 26, 26, 23, 23, 22,
- 20, 20, 19, 19, 19, 18, 18, 17, 17, 17, 16, 16, 15, 15, 15, 14, 14, 13,
- 25, 26, 26, 26, 27, 27, 26, 26, 26, 25, 25, 23, 22, 21, 20, 20, 19, 19,
- 18, 17, 17, 17, 16, 16, 15, 15, 15, 14, 14, 14, 14, 13, 23, 24, 24, 24,
- 25, 25, 24, 24, 24, 24, 24, 22, 22, 20, 19, 19, 18, 18, 17, 16, 16, 16,
- 15, 15, 14, 14, 14, 14, 13, 13, 13, 12, 23, 24, 24, 24, 25, 25, 24, 24,
- 24, 24, 24, 22, 22, 20, 19, 19, 18, 18, 17, 16, 16, 16, 15, 15, 14, 14,
- 14, 14, 13, 13, 13, 12, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 23, 21,
- 20, 20, 18, 18, 17, 17, 17, 16, 16, 15, 15, 14, 14, 14, 13, 13, 13, 12,
- 12, 12, 21, 22, 22, 23, 23, 23, 23, 23, 23, 22, 22, 20, 20, 19, 18, 18,
- 17, 17, 16, 15, 15, 15, 14, 14, 13, 13, 13, 13, 12, 12, 12, 12, 20, 21,
- 21, 22, 22, 22, 22, 22, 22, 22, 22, 20, 20, 19, 18, 18, 17, 17, 16, 15,
- 15, 14, 14, 14, 13, 13, 12, 12, 12, 12, 12, 11, 19, 20, 20, 20, 21, 21,
- 21, 20, 21, 21, 21, 19, 19, 18, 17, 17, 16, 16, 15, 14, 14, 14, 13, 13,
- 12, 12, 12, 12, 12, 11, 11, 11, 19, 20, 20, 20, 21, 21, 21, 20, 21, 21,
- 21, 19, 19, 18, 17, 17, 16, 16, 15, 14, 14, 14, 13, 13, 12, 12, 12, 12,
- 12, 11, 11, 11, 18, 18, 19, 19, 19, 19, 19, 19, 19, 20, 20, 18, 18, 17,
- 16, 16, 15, 15, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 11, 11, 11, 10,
- 17, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18, 17, 16, 16, 15, 15,
- 14, 14, 14, 13, 13, 12, 12, 12, 11, 11, 11, 10, 10, 10, 17, 17, 18, 18,
- 18, 18, 18, 18, 19, 19, 19, 17, 17, 17, 16, 16, 15, 15, 14, 13, 13, 13,
- 12, 12, 12, 12, 11, 11, 11, 10, 10, 10, 16, 16, 17, 17, 17, 17, 17, 17,
- 17, 18, 18, 17, 16, 16, 15, 15, 14, 14, 14, 13, 13, 12, 12, 12, 11, 11,
- 11, 10, 10, 10, 10, 9, 16, 16, 17, 17, 17, 17, 17, 17, 17, 18, 18, 17,
- 16, 16, 15, 15, 14, 14, 14, 13, 13, 12, 12, 12, 11, 11, 11, 10, 10, 10,
- 10, 9, 15, 15, 15, 16, 16, 16, 16, 16, 16, 17, 17, 16, 15, 15, 14, 14,
- 13, 13, 13, 12, 12, 12, 12, 11, 11, 11, 10, 10, 10, 9, 9, 9,
- /* Size 4x8 */
- 32, 32, 24, 18, 32, 31, 25, 19, 32, 29, 24, 20, 30, 28, 20, 17, 27, 26,
- 18, 15, 23, 23, 16, 13, 20, 20, 14, 12, 17, 18, 13, 11,
- /* Size 8x4 */
- 32, 32, 32, 30, 27, 23, 20, 17, 32, 31, 29, 28, 26, 23, 20, 18, 24, 25,
- 24, 20, 18, 16, 14, 13, 18, 19, 20, 17, 15, 13, 12, 11,
- /* Size 8x16 */
- 32, 33, 32, 29, 26, 23, 19, 16, 33, 32, 32, 29, 27, 24, 20, 17, 33, 32,
- 31, 30, 28, 25, 21, 17, 33, 32, 30, 29, 27, 24, 21, 17, 32, 32, 30, 28,
- 26, 24, 21, 18, 32, 31, 29, 28, 26, 24, 21, 18, 30, 30, 28, 25, 23, 21,
- 19, 16, 28, 30, 27, 22, 20, 19, 17, 15, 27, 28, 26, 22, 20, 18, 16, 14,
- 25, 26, 25, 21, 19, 17, 15, 13, 23, 25, 24, 20, 18, 16, 14, 13, 21, 23,
- 22, 19, 17, 15, 13, 12, 19, 21, 20, 18, 16, 14, 12, 11, 18, 19, 19, 17,
- 15, 14, 12, 11, 17, 18, 18, 16, 15, 13, 12, 10, 16, 17, 18, 16, 14, 13,
- 11, 10,
- /* Size 16x8 */
- 32, 33, 33, 33, 32, 32, 30, 28, 27, 25, 23, 21, 19, 18, 17, 16, 33, 32,
- 32, 32, 32, 31, 30, 30, 28, 26, 25, 23, 21, 19, 18, 17, 32, 32, 31, 30,
- 30, 29, 28, 27, 26, 25, 24, 22, 20, 19, 18, 18, 29, 29, 30, 29, 28, 28,
- 25, 22, 22, 21, 20, 19, 18, 17, 16, 16, 26, 27, 28, 27, 26, 26, 23, 20,
- 20, 19, 18, 17, 16, 15, 15, 14, 23, 24, 25, 24, 24, 24, 21, 19, 18, 17,
- 16, 15, 14, 14, 13, 13, 19, 20, 21, 21, 21, 21, 19, 17, 16, 15, 14, 13,
- 12, 12, 12, 11, 16, 17, 17, 17, 18, 18, 16, 15, 14, 13, 13, 12, 11, 11,
- 10, 10,
- /* Size 16x32 */
- 32, 33, 33, 33, 32, 32, 29, 28, 26, 23, 23, 20, 19, 18, 16, 16, 33, 32,
- 32, 32, 32, 32, 29, 29, 27, 24, 24, 21, 20, 18, 16, 16, 33, 32, 32, 32,
- 32, 32, 29, 29, 27, 24, 24, 21, 20, 19, 17, 17, 33, 32, 32, 32, 32, 32,
- 30, 29, 28, 25, 25, 21, 20, 19, 17, 17, 33, 32, 32, 32, 31, 31, 30, 30,
- 28, 25, 25, 22, 21, 19, 17, 17, 33, 32, 32, 32, 31, 31, 30, 30, 28, 25,
- 25, 22, 21, 19, 17, 17, 33, 32, 32, 31, 30, 30, 29, 28, 27, 24, 24, 21,
- 21, 19, 17, 17, 32, 32, 32, 31, 30, 30, 28, 28, 27, 24, 24, 21, 20, 19,
- 17, 17, 32, 32, 32, 31, 30, 30, 28, 28, 26, 24, 24, 21, 21, 19, 18, 18,
- 32, 32, 31, 30, 29, 29, 28, 27, 26, 24, 24, 21, 21, 20, 18, 18, 32, 32,
- 31, 30, 29, 29, 28, 27, 26, 24, 24, 21, 21, 20, 18, 18, 31, 31, 31, 29,
- 28, 28, 26, 25, 24, 22, 22, 20, 19, 18, 17, 17, 30, 30, 30, 29, 28, 28,
- 25, 24, 23, 21, 21, 19, 19, 18, 16, 16, 30, 30, 30, 29, 28, 28, 24, 23,
- 22, 20, 20, 19, 18, 17, 16, 16, 28, 29, 30, 28, 27, 27, 22, 21, 20, 19,
- 19, 18, 17, 16, 15, 15, 28, 29, 30, 28, 27, 27, 22, 21, 20, 19, 19, 18,
- 17, 16, 15, 15, 27, 28, 28, 27, 26, 26, 22, 20, 20, 18, 18, 17, 16, 15,
- 14, 14, 26, 27, 28, 26, 26, 26, 21, 20, 19, 18, 18, 16, 16, 15, 14, 14,
- 25, 26, 26, 26, 25, 25, 21, 20, 19, 17, 17, 16, 15, 15, 13, 13, 23, 25,
- 25, 24, 24, 24, 20, 19, 18, 16, 16, 15, 14, 14, 13, 13, 23, 25, 25, 24,
- 24, 24, 20, 19, 18, 16, 16, 15, 14, 14, 13, 13, 22, 23, 23, 23, 23, 23,
- 19, 18, 17, 16, 16, 14, 14, 13, 12, 12, 21, 23, 23, 23, 22, 22, 19, 18,
- 17, 15, 15, 14, 13, 13, 12, 12, 20, 22, 22, 22, 22, 22, 19, 18, 17, 15,
- 15, 13, 13, 12, 12, 12, 19, 20, 21, 20, 20, 20, 18, 17, 16, 14, 14, 13,
- 12, 12, 11, 11, 19, 20, 21, 20, 20, 20, 18, 17, 16, 14, 14, 13, 12, 12,
- 11, 11, 18, 19, 19, 19, 19, 19, 17, 16, 15, 14, 14, 12, 12, 11, 11, 11,
- 18, 19, 19, 19, 19, 19, 17, 16, 15, 14, 14, 12, 12, 11, 10, 10, 17, 18,
- 18, 18, 18, 18, 16, 16, 15, 13, 13, 12, 12, 11, 10, 10, 16, 17, 17, 17,
- 18, 18, 16, 15, 14, 13, 13, 12, 11, 11, 10, 10, 16, 17, 17, 17, 18, 18,
- 16, 15, 14, 13, 13, 12, 11, 11, 10, 10, 15, 16, 16, 16, 17, 17, 15, 14,
- 13, 12, 12, 11, 11, 10, 9, 9,
- /* Size 32x16 */
- 32, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 31, 30, 30, 28, 28, 27, 26,
- 25, 23, 23, 22, 21, 20, 19, 19, 18, 18, 17, 16, 16, 15, 33, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 31, 30, 30, 29, 29, 28, 27, 26, 25, 25, 23,
- 23, 22, 20, 20, 19, 19, 18, 17, 17, 16, 33, 32, 32, 32, 32, 32, 32, 32,
- 32, 31, 31, 31, 30, 30, 30, 30, 28, 28, 26, 25, 25, 23, 23, 22, 21, 21,
- 19, 19, 18, 17, 17, 16, 33, 32, 32, 32, 32, 32, 31, 31, 31, 30, 30, 29,
- 29, 29, 28, 28, 27, 26, 26, 24, 24, 23, 23, 22, 20, 20, 19, 19, 18, 17,
- 17, 16, 32, 32, 32, 32, 31, 31, 30, 30, 30, 29, 29, 28, 28, 28, 27, 27,
- 26, 26, 25, 24, 24, 23, 22, 22, 20, 20, 19, 19, 18, 18, 18, 17, 32, 32,
- 32, 32, 31, 31, 30, 30, 30, 29, 29, 28, 28, 28, 27, 27, 26, 26, 25, 24,
- 24, 23, 22, 22, 20, 20, 19, 19, 18, 18, 18, 17, 29, 29, 29, 30, 30, 30,
- 29, 28, 28, 28, 28, 26, 25, 24, 22, 22, 22, 21, 21, 20, 20, 19, 19, 19,
- 18, 18, 17, 17, 16, 16, 16, 15, 28, 29, 29, 29, 30, 30, 28, 28, 28, 27,
- 27, 25, 24, 23, 21, 21, 20, 20, 20, 19, 19, 18, 18, 18, 17, 17, 16, 16,
- 16, 15, 15, 14, 26, 27, 27, 28, 28, 28, 27, 27, 26, 26, 26, 24, 23, 22,
- 20, 20, 20, 19, 19, 18, 18, 17, 17, 17, 16, 16, 15, 15, 15, 14, 14, 13,
- 23, 24, 24, 25, 25, 25, 24, 24, 24, 24, 24, 22, 21, 20, 19, 19, 18, 18,
- 17, 16, 16, 16, 15, 15, 14, 14, 14, 14, 13, 13, 13, 12, 23, 24, 24, 25,
- 25, 25, 24, 24, 24, 24, 24, 22, 21, 20, 19, 19, 18, 18, 17, 16, 16, 16,
- 15, 15, 14, 14, 14, 14, 13, 13, 13, 12, 20, 21, 21, 21, 22, 22, 21, 21,
- 21, 21, 21, 20, 19, 19, 18, 18, 17, 16, 16, 15, 15, 14, 14, 13, 13, 13,
- 12, 12, 12, 12, 12, 11, 19, 20, 20, 20, 21, 21, 21, 20, 21, 21, 21, 19,
- 19, 18, 17, 17, 16, 16, 15, 14, 14, 14, 13, 13, 12, 12, 12, 12, 12, 11,
- 11, 11, 18, 18, 19, 19, 19, 19, 19, 19, 19, 20, 20, 18, 18, 17, 16, 16,
- 15, 15, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 11, 11, 11, 10, 16, 16,
- 17, 17, 17, 17, 17, 17, 18, 18, 18, 17, 16, 16, 15, 15, 14, 14, 13, 13,
- 13, 12, 12, 12, 11, 11, 11, 10, 10, 10, 10, 9, 16, 16, 17, 17, 17, 17,
- 17, 17, 18, 18, 18, 17, 16, 16, 15, 15, 14, 14, 13, 13, 13, 12, 12, 12,
- 11, 11, 11, 10, 10, 10, 10, 9,
- /* Size 4x16 */
- 33, 32, 23, 18, 32, 32, 24, 19, 32, 31, 25, 19, 32, 30, 24, 19, 32, 30,
- 24, 19, 32, 29, 24, 20, 30, 28, 21, 18, 29, 27, 19, 16, 28, 26, 18, 15,
- 26, 25, 17, 15, 25, 24, 16, 14, 23, 22, 15, 13, 20, 20, 14, 12, 19, 19,
- 14, 11, 18, 18, 13, 11, 17, 18, 13, 11,
- /* Size 16x4 */
- 33, 32, 32, 32, 32, 32, 30, 29, 28, 26, 25, 23, 20, 19, 18, 17, 32, 32,
- 31, 30, 30, 29, 28, 27, 26, 25, 24, 22, 20, 19, 18, 18, 23, 24, 25, 24,
- 24, 24, 21, 19, 18, 17, 16, 15, 14, 14, 13, 13, 18, 19, 19, 19, 19, 20,
- 18, 16, 15, 15, 14, 13, 12, 11, 11, 11,
- /* Size 8x32 */
- 32, 33, 32, 29, 26, 23, 19, 16, 33, 32, 32, 29, 27, 24, 20, 16, 33, 32,
- 32, 29, 27, 24, 20, 17, 33, 32, 32, 30, 28, 25, 20, 17, 33, 32, 31, 30,
- 28, 25, 21, 17, 33, 32, 31, 30, 28, 25, 21, 17, 33, 32, 30, 29, 27, 24,
- 21, 17, 32, 32, 30, 28, 27, 24, 20, 17, 32, 32, 30, 28, 26, 24, 21, 18,
- 32, 31, 29, 28, 26, 24, 21, 18, 32, 31, 29, 28, 26, 24, 21, 18, 31, 31,
- 28, 26, 24, 22, 19, 17, 30, 30, 28, 25, 23, 21, 19, 16, 30, 30, 28, 24,
- 22, 20, 18, 16, 28, 30, 27, 22, 20, 19, 17, 15, 28, 30, 27, 22, 20, 19,
- 17, 15, 27, 28, 26, 22, 20, 18, 16, 14, 26, 28, 26, 21, 19, 18, 16, 14,
- 25, 26, 25, 21, 19, 17, 15, 13, 23, 25, 24, 20, 18, 16, 14, 13, 23, 25,
- 24, 20, 18, 16, 14, 13, 22, 23, 23, 19, 17, 16, 14, 12, 21, 23, 22, 19,
- 17, 15, 13, 12, 20, 22, 22, 19, 17, 15, 13, 12, 19, 21, 20, 18, 16, 14,
- 12, 11, 19, 21, 20, 18, 16, 14, 12, 11, 18, 19, 19, 17, 15, 14, 12, 11,
- 18, 19, 19, 17, 15, 14, 12, 10, 17, 18, 18, 16, 15, 13, 12, 10, 16, 17,
- 18, 16, 14, 13, 11, 10, 16, 17, 18, 16, 14, 13, 11, 10, 15, 16, 17, 15,
- 13, 12, 11, 9,
- /* Size 32x8 */
- 32, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 31, 30, 30, 28, 28, 27, 26,
- 25, 23, 23, 22, 21, 20, 19, 19, 18, 18, 17, 16, 16, 15, 33, 32, 32, 32,
- 32, 32, 32, 32, 32, 31, 31, 31, 30, 30, 30, 30, 28, 28, 26, 25, 25, 23,
- 23, 22, 21, 21, 19, 19, 18, 17, 17, 16, 32, 32, 32, 32, 31, 31, 30, 30,
- 30, 29, 29, 28, 28, 28, 27, 27, 26, 26, 25, 24, 24, 23, 22, 22, 20, 20,
- 19, 19, 18, 18, 18, 17, 29, 29, 29, 30, 30, 30, 29, 28, 28, 28, 28, 26,
- 25, 24, 22, 22, 22, 21, 21, 20, 20, 19, 19, 19, 18, 18, 17, 17, 16, 16,
- 16, 15, 26, 27, 27, 28, 28, 28, 27, 27, 26, 26, 26, 24, 23, 22, 20, 20,
- 20, 19, 19, 18, 18, 17, 17, 17, 16, 16, 15, 15, 15, 14, 14, 13, 23, 24,
- 24, 25, 25, 25, 24, 24, 24, 24, 24, 22, 21, 20, 19, 19, 18, 18, 17, 16,
- 16, 16, 15, 15, 14, 14, 14, 14, 13, 13, 13, 12, 19, 20, 20, 20, 21, 21,
- 21, 20, 21, 21, 21, 19, 19, 18, 17, 17, 16, 16, 15, 14, 14, 14, 13, 13,
- 12, 12, 12, 12, 12, 11, 11, 11, 16, 16, 17, 17, 17, 17, 17, 17, 18, 18,
- 18, 17, 16, 16, 15, 15, 14, 14, 13, 13, 13, 12, 12, 12, 11, 11, 11, 10,
- 10, 10, 10, 9 },
- { /* Chroma */
- /* Size 4x4 */
- 33, 25, 22, 20, 25, 21, 21, 20, 22, 21, 18, 17, 20, 20, 17, 14,
- /* Size 8x8 */
- 33, 33, 27, 23, 22, 21, 20, 19, 33, 32, 26, 23, 23, 22, 22, 20, 27, 26,
- 22, 22, 22, 22, 22, 20, 23, 23, 22, 20, 20, 20, 20, 19, 22, 23, 22, 20,
- 19, 18, 18, 17, 21, 22, 22, 20, 18, 17, 16, 16, 20, 22, 22, 20, 18, 16,
- 16, 15, 19, 20, 20, 19, 17, 16, 15, 13,
- /* Size 16x16 */
- 32, 33, 34, 31, 30, 28, 25, 21, 21, 21, 21, 20, 20, 19, 19, 18, 33, 33,
- 33, 30, 28, 27, 24, 22, 22, 22, 22, 21, 20, 20, 19, 19, 34, 33, 32, 30,
- 28, 26, 24, 22, 23, 23, 23, 22, 22, 21, 20, 20, 31, 30, 30, 28, 26, 24,
- 23, 22, 22, 22, 23, 22, 22, 21, 20, 20, 30, 28, 28, 26, 24, 23, 22, 22,
- 22, 22, 23, 22, 22, 21, 21, 20, 28, 27, 26, 24, 23, 22, 22, 21, 22, 22,
- 23, 22, 22, 21, 21, 20, 25, 24, 24, 23, 22, 22, 21, 20, 20, 21, 21, 20,
- 20, 20, 20, 19, 21, 22, 22, 22, 22, 21, 20, 19, 19, 19, 19, 19, 19, 19,
- 18, 18, 21, 22, 23, 22, 22, 22, 20, 19, 19, 19, 19, 18, 18, 18, 18, 17,
- 21, 22, 23, 22, 22, 22, 21, 19, 19, 19, 18, 18, 17, 17, 17, 17, 21, 22,
- 23, 23, 23, 23, 21, 19, 19, 18, 18, 17, 17, 17, 16, 16, 20, 21, 22, 22,
- 22, 22, 20, 19, 18, 18, 17, 17, 16, 16, 16, 15, 20, 20, 22, 22, 22, 22,
- 20, 19, 18, 17, 17, 16, 16, 15, 15, 15, 19, 20, 21, 21, 21, 21, 20, 19,
- 18, 17, 17, 16, 15, 15, 14, 14, 19, 19, 20, 20, 21, 21, 20, 18, 18, 17,
- 16, 16, 15, 14, 14, 14, 18, 19, 20, 20, 20, 20, 19, 18, 17, 17, 16, 15,
- 15, 14, 14, 13,
- /* Size 32x32 */
- 32, 33, 33, 33, 34, 34, 31, 31, 30, 28, 28, 26, 25, 23, 21, 21, 21, 21,
- 21, 21, 21, 20, 20, 20, 20, 20, 19, 19, 19, 18, 18, 17, 33, 33, 33, 33,
- 33, 33, 31, 30, 28, 27, 27, 25, 24, 23, 21, 21, 22, 22, 22, 22, 22, 21,
- 21, 21, 20, 20, 20, 20, 19, 19, 19, 18, 33, 33, 33, 33, 33, 33, 30, 30,
- 28, 27, 27, 25, 24, 23, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 20, 20,
- 20, 20, 19, 19, 19, 18, 33, 33, 33, 33, 33, 33, 30, 29, 28, 26, 26, 25,
- 24, 23, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 20, 20, 20, 19,
- 19, 19, 34, 33, 33, 33, 32, 32, 30, 29, 28, 26, 26, 24, 24, 23, 22, 22,
- 23, 23, 23, 23, 23, 22, 22, 22, 22, 22, 21, 21, 20, 20, 20, 19, 34, 33,
- 33, 33, 32, 32, 30, 29, 28, 26, 26, 24, 24, 23, 22, 22, 23, 23, 23, 23,
- 23, 22, 22, 22, 22, 22, 21, 21, 20, 20, 20, 19, 31, 31, 30, 30, 30, 30,
- 28, 27, 26, 24, 24, 23, 23, 23, 22, 22, 22, 22, 22, 23, 23, 22, 22, 22,
- 22, 22, 21, 21, 20, 20, 20, 19, 31, 30, 30, 29, 29, 29, 27, 26, 26, 24,
- 24, 23, 23, 22, 22, 22, 22, 22, 22, 23, 23, 22, 22, 22, 22, 22, 21, 21,
- 20, 20, 20, 19, 30, 28, 28, 28, 28, 28, 26, 26, 24, 23, 23, 23, 22, 22,
- 22, 22, 22, 22, 22, 23, 23, 22, 22, 22, 22, 22, 21, 21, 21, 20, 20, 20,
- 28, 27, 27, 26, 26, 26, 24, 24, 23, 22, 22, 22, 22, 22, 21, 21, 22, 22,
- 22, 23, 23, 22, 22, 22, 22, 22, 21, 21, 21, 20, 20, 20, 28, 27, 27, 26,
- 26, 26, 24, 24, 23, 22, 22, 22, 22, 22, 21, 21, 22, 22, 22, 23, 23, 22,
- 22, 22, 22, 22, 21, 21, 21, 20, 20, 20, 26, 25, 25, 25, 24, 24, 23, 23,
- 23, 22, 22, 21, 21, 21, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 20, 20, 20, 20, 20, 19, 25, 24, 24, 24, 24, 24, 23, 23, 22, 22, 22, 21,
- 21, 21, 20, 20, 20, 21, 21, 21, 21, 20, 20, 20, 20, 20, 20, 20, 20, 19,
- 19, 19, 23, 23, 23, 23, 23, 23, 23, 22, 22, 22, 22, 21, 21, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 19, 19, 19, 19, 19, 18, 21, 21,
- 22, 22, 22, 22, 22, 22, 22, 21, 21, 20, 20, 20, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 18, 18, 18, 18, 21, 21, 22, 22, 22, 22,
- 22, 22, 22, 21, 21, 20, 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 18, 18, 18, 18, 21, 22, 22, 22, 23, 23, 22, 22, 22, 22,
- 22, 21, 20, 20, 19, 19, 19, 19, 19, 19, 19, 18, 18, 18, 18, 18, 18, 18,
- 18, 17, 17, 17, 21, 22, 22, 22, 23, 23, 22, 22, 22, 22, 22, 21, 21, 20,
- 19, 19, 19, 19, 19, 19, 19, 18, 18, 18, 18, 18, 18, 18, 17, 17, 17, 17,
- 21, 22, 22, 22, 23, 23, 22, 22, 22, 22, 22, 21, 21, 20, 19, 19, 19, 19,
- 19, 18, 18, 18, 18, 18, 17, 17, 17, 17, 17, 17, 17, 16, 21, 22, 22, 22,
- 23, 23, 23, 23, 23, 23, 23, 21, 21, 20, 19, 19, 19, 19, 18, 18, 18, 17,
- 17, 17, 17, 17, 17, 17, 16, 16, 16, 16, 21, 22, 22, 22, 23, 23, 23, 23,
- 23, 23, 23, 21, 21, 20, 19, 19, 19, 19, 18, 18, 18, 17, 17, 17, 17, 17,
- 17, 17, 16, 16, 16, 16, 20, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 21,
- 20, 20, 19, 19, 18, 18, 18, 17, 17, 17, 17, 17, 16, 16, 16, 16, 16, 16,
- 16, 15, 20, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 21, 20, 20, 19, 19,
- 18, 18, 18, 17, 17, 17, 17, 17, 16, 16, 16, 16, 16, 15, 15, 15, 20, 21,
- 21, 21, 22, 22, 22, 22, 22, 22, 22, 21, 20, 20, 19, 19, 18, 18, 18, 17,
- 17, 17, 17, 16, 16, 16, 16, 16, 15, 15, 15, 15, 20, 20, 20, 21, 22, 22,
- 22, 22, 22, 22, 22, 21, 20, 20, 19, 19, 18, 18, 17, 17, 17, 16, 16, 16,
- 16, 16, 15, 15, 15, 15, 15, 14, 20, 20, 20, 21, 22, 22, 22, 22, 22, 22,
- 22, 21, 20, 20, 19, 19, 18, 18, 17, 17, 17, 16, 16, 16, 16, 16, 15, 15,
- 15, 15, 15, 14, 19, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 20, 20, 19,
- 19, 19, 18, 18, 17, 17, 17, 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 14,
- 19, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 20, 20, 19, 19, 19, 18, 18,
- 17, 17, 17, 16, 16, 16, 15, 15, 15, 14, 14, 14, 14, 14, 19, 19, 19, 20,
- 20, 20, 20, 20, 21, 21, 21, 20, 20, 19, 18, 18, 18, 17, 17, 16, 16, 16,
- 16, 15, 15, 15, 14, 14, 14, 14, 14, 13, 18, 19, 19, 19, 20, 20, 20, 20,
- 20, 20, 20, 20, 19, 19, 18, 18, 17, 17, 17, 16, 16, 16, 15, 15, 15, 15,
- 14, 14, 14, 13, 13, 13, 18, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20,
- 19, 19, 18, 18, 17, 17, 17, 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 13,
- 13, 13, 17, 18, 18, 19, 19, 19, 19, 19, 20, 20, 20, 19, 19, 18, 18, 18,
- 17, 17, 16, 16, 16, 15, 15, 15, 14, 14, 14, 14, 13, 13, 13, 13,
- /* Size 4x8 */
- 33, 27, 22, 20, 32, 26, 23, 21, 26, 22, 23, 21, 23, 22, 20, 19, 22, 22,
- 18, 18, 22, 22, 17, 16, 21, 22, 17, 15, 19, 20, 16, 14,
- /* Size 8x4 */
- 33, 32, 26, 23, 22, 22, 21, 19, 27, 26, 22, 22, 22, 22, 22, 20, 22, 23,
- 23, 20, 18, 17, 17, 16, 20, 21, 21, 19, 18, 16, 15, 14,
- /* Size 8x16 */
- 32, 33, 28, 23, 21, 21, 20, 18, 33, 33, 27, 23, 22, 22, 20, 19, 34, 32,
- 26, 23, 23, 23, 21, 20, 31, 29, 24, 22, 22, 23, 22, 20, 29, 28, 23, 22,
- 22, 23, 22, 20, 28, 26, 22, 22, 22, 23, 22, 20, 24, 24, 22, 21, 20, 21,
- 20, 19, 21, 22, 21, 20, 19, 19, 19, 18, 21, 22, 22, 20, 19, 19, 18, 17,
- 21, 23, 22, 20, 19, 18, 17, 17, 21, 23, 22, 20, 19, 18, 17, 16, 20, 22,
- 22, 20, 18, 17, 16, 15, 20, 21, 22, 19, 18, 17, 16, 14, 19, 21, 21, 19,
- 18, 17, 15, 14, 19, 20, 21, 19, 18, 16, 15, 14, 18, 20, 20, 19, 17, 16,
- 15, 13,
- /* Size 16x8 */
- 32, 33, 34, 31, 29, 28, 24, 21, 21, 21, 21, 20, 20, 19, 19, 18, 33, 33,
- 32, 29, 28, 26, 24, 22, 22, 23, 23, 22, 21, 21, 20, 20, 28, 27, 26, 24,
- 23, 22, 22, 21, 22, 22, 22, 22, 22, 21, 21, 20, 23, 23, 23, 22, 22, 22,
- 21, 20, 20, 20, 20, 20, 19, 19, 19, 19, 21, 22, 23, 22, 22, 22, 20, 19,
- 19, 19, 19, 18, 18, 18, 18, 17, 21, 22, 23, 23, 23, 23, 21, 19, 19, 18,
- 18, 17, 17, 17, 16, 16, 20, 20, 21, 22, 22, 22, 20, 19, 18, 17, 17, 16,
- 16, 15, 15, 15, 18, 19, 20, 20, 20, 20, 19, 18, 17, 17, 16, 15, 14, 14,
- 14, 13,
- /* Size 16x32 */
- 32, 33, 33, 31, 28, 28, 23, 21, 21, 21, 21, 20, 20, 19, 18, 18, 33, 33,
- 33, 30, 27, 27, 23, 22, 22, 22, 22, 20, 20, 20, 19, 19, 33, 33, 33, 30,
- 27, 27, 23, 22, 22, 22, 22, 21, 20, 20, 19, 19, 33, 33, 32, 30, 26, 26,
- 23, 22, 22, 22, 22, 21, 21, 20, 19, 19, 34, 32, 32, 29, 26, 26, 23, 22,
- 23, 23, 23, 22, 21, 21, 20, 20, 34, 32, 32, 29, 26, 26, 23, 22, 23, 23,
- 23, 22, 21, 21, 20, 20, 31, 30, 29, 28, 24, 24, 22, 22, 22, 23, 23, 22,
- 22, 21, 20, 20, 31, 29, 28, 27, 24, 24, 22, 22, 22, 22, 22, 22, 22, 21,
- 20, 20, 29, 28, 28, 26, 23, 23, 22, 22, 22, 23, 23, 22, 22, 21, 20, 20,
- 28, 26, 26, 24, 22, 22, 22, 22, 22, 23, 23, 22, 22, 21, 20, 20, 28, 26,
- 26, 24, 22, 22, 22, 22, 22, 23, 23, 22, 22, 21, 20, 20, 25, 24, 24, 23,
- 22, 22, 21, 21, 21, 21, 21, 21, 20, 20, 20, 20, 24, 24, 24, 23, 22, 22,
- 21, 20, 20, 21, 21, 20, 20, 20, 19, 19, 23, 23, 23, 23, 22, 22, 20, 20,
- 20, 20, 20, 20, 20, 19, 19, 19, 21, 22, 22, 22, 21, 21, 20, 19, 19, 19,
- 19, 19, 19, 19, 18, 18, 21, 22, 22, 22, 21, 21, 20, 19, 19, 19, 19, 19,
- 19, 19, 18, 18, 21, 22, 22, 22, 22, 22, 20, 19, 19, 19, 19, 18, 18, 18,
- 17, 17, 21, 22, 22, 22, 22, 22, 20, 19, 19, 18, 18, 18, 18, 18, 17, 17,
- 21, 22, 23, 22, 22, 22, 20, 19, 19, 18, 18, 18, 17, 17, 17, 17, 21, 22,
- 23, 23, 22, 22, 20, 19, 19, 18, 18, 17, 17, 17, 16, 16, 21, 22, 23, 23,
- 22, 22, 20, 19, 19, 18, 18, 17, 17, 17, 16, 16, 20, 22, 22, 22, 22, 22,
- 20, 19, 18, 17, 17, 17, 16, 16, 16, 16, 20, 22, 22, 22, 22, 22, 20, 19,
- 18, 17, 17, 16, 16, 16, 15, 15, 20, 21, 22, 22, 22, 22, 20, 19, 18, 17,
- 17, 16, 16, 16, 15, 15, 20, 21, 21, 22, 22, 22, 19, 19, 18, 17, 17, 16,
- 16, 15, 14, 14, 20, 21, 21, 22, 22, 22, 19, 19, 18, 17, 17, 16, 16, 15,
- 14, 14, 19, 20, 21, 21, 21, 21, 19, 19, 18, 17, 17, 15, 15, 15, 14, 14,
- 19, 20, 20, 21, 21, 21, 19, 19, 18, 17, 17, 15, 15, 15, 14, 14, 19, 20,
- 20, 20, 21, 21, 19, 18, 18, 16, 16, 15, 15, 14, 14, 14, 18, 19, 20, 20,
- 20, 20, 19, 18, 17, 16, 16, 15, 15, 14, 13, 13, 18, 19, 20, 20, 20, 20,
- 19, 18, 17, 16, 16, 15, 15, 14, 13, 13, 17, 19, 19, 19, 20, 20, 18, 18,
- 17, 16, 16, 15, 14, 14, 13, 13,
- /* Size 32x16 */
- 32, 33, 33, 33, 34, 34, 31, 31, 29, 28, 28, 25, 24, 23, 21, 21, 21, 21,
- 21, 21, 21, 20, 20, 20, 20, 20, 19, 19, 19, 18, 18, 17, 33, 33, 33, 33,
- 32, 32, 30, 29, 28, 26, 26, 24, 24, 23, 22, 22, 22, 22, 22, 22, 22, 22,
- 22, 21, 21, 21, 20, 20, 20, 19, 19, 19, 33, 33, 33, 32, 32, 32, 29, 28,
- 28, 26, 26, 24, 24, 23, 22, 22, 22, 22, 23, 23, 23, 22, 22, 22, 21, 21,
- 21, 20, 20, 20, 20, 19, 31, 30, 30, 30, 29, 29, 28, 27, 26, 24, 24, 23,
- 23, 23, 22, 22, 22, 22, 22, 23, 23, 22, 22, 22, 22, 22, 21, 21, 20, 20,
- 20, 19, 28, 27, 27, 26, 26, 26, 24, 24, 23, 22, 22, 22, 22, 22, 21, 21,
- 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 20, 20, 20, 28, 27,
- 27, 26, 26, 26, 24, 24, 23, 22, 22, 22, 22, 22, 21, 21, 22, 22, 22, 22,
- 22, 22, 22, 22, 22, 22, 21, 21, 21, 20, 20, 20, 23, 23, 23, 23, 23, 23,
- 22, 22, 22, 22, 22, 21, 21, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 19, 19, 19, 19, 19, 19, 19, 18, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22,
- 22, 21, 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 18, 18, 18, 18, 21, 22, 22, 22, 23, 23, 22, 22, 22, 22, 22, 21, 20, 20,
- 19, 19, 19, 19, 19, 19, 19, 18, 18, 18, 18, 18, 18, 18, 18, 17, 17, 17,
- 21, 22, 22, 22, 23, 23, 23, 22, 23, 23, 23, 21, 21, 20, 19, 19, 19, 18,
- 18, 18, 18, 17, 17, 17, 17, 17, 17, 17, 16, 16, 16, 16, 21, 22, 22, 22,
- 23, 23, 23, 22, 23, 23, 23, 21, 21, 20, 19, 19, 19, 18, 18, 18, 18, 17,
- 17, 17, 17, 17, 17, 17, 16, 16, 16, 16, 20, 20, 21, 21, 22, 22, 22, 22,
- 22, 22, 22, 21, 20, 20, 19, 19, 18, 18, 18, 17, 17, 17, 16, 16, 16, 16,
- 15, 15, 15, 15, 15, 15, 20, 20, 20, 21, 21, 21, 22, 22, 22, 22, 22, 20,
- 20, 20, 19, 19, 18, 18, 17, 17, 17, 16, 16, 16, 16, 16, 15, 15, 15, 15,
- 15, 14, 19, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 20, 20, 19, 19, 19,
- 18, 18, 17, 17, 17, 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 14, 18, 19,
- 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 19, 19, 18, 18, 17, 17, 17, 16,
- 16, 16, 15, 15, 14, 14, 14, 14, 14, 13, 13, 13, 18, 19, 19, 19, 20, 20,
- 20, 20, 20, 20, 20, 20, 19, 19, 18, 18, 17, 17, 17, 16, 16, 16, 15, 15,
- 14, 14, 14, 14, 14, 13, 13, 13,
- /* Size 4x16 */
- 33, 28, 21, 19, 33, 27, 22, 20, 32, 26, 23, 21, 30, 24, 23, 21, 28, 23,
- 23, 21, 26, 22, 23, 21, 24, 22, 21, 20, 22, 21, 19, 19, 22, 22, 19, 18,
- 22, 22, 18, 17, 22, 22, 18, 17, 22, 22, 17, 16, 21, 22, 17, 15, 20, 21,
- 17, 15, 20, 21, 16, 14, 19, 20, 16, 14,
- /* Size 16x4 */
- 33, 33, 32, 30, 28, 26, 24, 22, 22, 22, 22, 22, 21, 20, 20, 19, 28, 27,
- 26, 24, 23, 22, 22, 21, 22, 22, 22, 22, 22, 21, 21, 20, 21, 22, 23, 23,
- 23, 23, 21, 19, 19, 18, 18, 17, 17, 17, 16, 16, 19, 20, 21, 21, 21, 21,
- 20, 19, 18, 17, 17, 16, 15, 15, 14, 14,
- /* Size 8x32 */
- 32, 33, 28, 23, 21, 21, 20, 18, 33, 33, 27, 23, 22, 22, 20, 19, 33, 33,
- 27, 23, 22, 22, 20, 19, 33, 32, 26, 23, 22, 22, 21, 19, 34, 32, 26, 23,
- 23, 23, 21, 20, 34, 32, 26, 23, 23, 23, 21, 20, 31, 29, 24, 22, 22, 23,
- 22, 20, 31, 28, 24, 22, 22, 22, 22, 20, 29, 28, 23, 22, 22, 23, 22, 20,
- 28, 26, 22, 22, 22, 23, 22, 20, 28, 26, 22, 22, 22, 23, 22, 20, 25, 24,
- 22, 21, 21, 21, 20, 20, 24, 24, 22, 21, 20, 21, 20, 19, 23, 23, 22, 20,
- 20, 20, 20, 19, 21, 22, 21, 20, 19, 19, 19, 18, 21, 22, 21, 20, 19, 19,
- 19, 18, 21, 22, 22, 20, 19, 19, 18, 17, 21, 22, 22, 20, 19, 18, 18, 17,
- 21, 23, 22, 20, 19, 18, 17, 17, 21, 23, 22, 20, 19, 18, 17, 16, 21, 23,
- 22, 20, 19, 18, 17, 16, 20, 22, 22, 20, 18, 17, 16, 16, 20, 22, 22, 20,
- 18, 17, 16, 15, 20, 22, 22, 20, 18, 17, 16, 15, 20, 21, 22, 19, 18, 17,
- 16, 14, 20, 21, 22, 19, 18, 17, 16, 14, 19, 21, 21, 19, 18, 17, 15, 14,
- 19, 20, 21, 19, 18, 17, 15, 14, 19, 20, 21, 19, 18, 16, 15, 14, 18, 20,
- 20, 19, 17, 16, 15, 13, 18, 20, 20, 19, 17, 16, 15, 13, 17, 19, 20, 18,
- 17, 16, 14, 13,
- /* Size 32x8 */
- 32, 33, 33, 33, 34, 34, 31, 31, 29, 28, 28, 25, 24, 23, 21, 21, 21, 21,
- 21, 21, 21, 20, 20, 20, 20, 20, 19, 19, 19, 18, 18, 17, 33, 33, 33, 32,
- 32, 32, 29, 28, 28, 26, 26, 24, 24, 23, 22, 22, 22, 22, 23, 23, 23, 22,
- 22, 22, 21, 21, 21, 20, 20, 20, 20, 19, 28, 27, 27, 26, 26, 26, 24, 24,
- 23, 22, 22, 22, 22, 22, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
- 21, 21, 21, 20, 20, 20, 23, 23, 23, 23, 23, 23, 22, 22, 22, 22, 22, 21,
- 21, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 19, 19, 19, 19, 19, 19,
- 19, 18, 21, 22, 22, 22, 23, 23, 22, 22, 22, 22, 22, 21, 20, 20, 19, 19,
- 19, 19, 19, 19, 19, 18, 18, 18, 18, 18, 18, 18, 18, 17, 17, 17, 21, 22,
- 22, 22, 23, 23, 23, 22, 23, 23, 23, 21, 21, 20, 19, 19, 19, 18, 18, 18,
- 18, 17, 17, 17, 17, 17, 17, 17, 16, 16, 16, 16, 20, 20, 20, 21, 21, 21,
- 22, 22, 22, 22, 22, 20, 20, 20, 19, 19, 18, 18, 17, 17, 17, 16, 16, 16,
- 16, 16, 15, 15, 15, 15, 15, 14, 18, 19, 19, 19, 20, 20, 20, 20, 20, 20,
- 20, 20, 19, 19, 18, 18, 17, 17, 17, 16, 16, 16, 15, 15, 14, 14, 14, 14,
- 14, 13, 13, 13 },
- },
- {
- { /* Luma */
- /* Size 4x4 */
- 32, 32, 27, 20, 32, 29, 26, 21, 27, 26, 19, 16, 20, 21, 16, 13,
- /* Size 8x8 */
- 33, 32, 32, 30, 29, 25, 22, 19, 32, 32, 32, 31, 30, 26, 23, 20, 32, 32,
- 30, 29, 28, 25, 23, 20, 30, 31, 29, 26, 24, 22, 20, 19, 29, 30, 28, 24,
- 21, 19, 18, 17, 25, 26, 25, 22, 19, 17, 16, 15, 22, 23, 23, 20, 18, 16,
- 14, 13, 19, 20, 20, 19, 17, 15, 13, 12,
- /* Size 16x16 */
- 32, 33, 33, 33, 33, 32, 32, 30, 28, 27, 26, 23, 22, 21, 19, 17, 33, 32,
- 32, 32, 32, 32, 31, 30, 29, 28, 27, 24, 23, 22, 20, 18, 33, 32, 32, 32,
- 32, 32, 31, 31, 30, 28, 28, 25, 23, 22, 20, 19, 33, 32, 32, 32, 32, 31,
- 31, 30, 29, 28, 27, 25, 23, 23, 21, 19, 33, 32, 32, 32, 31, 30, 30, 29,
- 28, 27, 26, 24, 23, 22, 20, 19, 32, 32, 32, 31, 30, 29, 28, 28, 27, 26,
- 26, 24, 23, 22, 21, 19, 32, 31, 31, 31, 30, 28, 28, 27, 26, 25, 24, 23,
- 22, 21, 20, 19, 30, 30, 31, 30, 29, 28, 27, 26, 24, 23, 23, 22, 20, 20,
- 19, 18, 28, 29, 30, 29, 28, 27, 26, 24, 21, 20, 20, 19, 18, 18, 17, 16,
- 27, 28, 28, 28, 27, 26, 25, 23, 20, 20, 20, 18, 18, 17, 16, 15, 26, 27,
- 28, 27, 26, 26, 24, 23, 20, 20, 19, 18, 17, 17, 16, 15, 23, 24, 25, 25,
- 24, 24, 23, 22, 19, 18, 18, 16, 16, 15, 14, 14, 22, 23, 23, 23, 23, 23,
- 22, 20, 18, 18, 17, 16, 15, 14, 14, 13, 21, 22, 22, 23, 22, 22, 21, 20,
- 18, 17, 17, 15, 14, 14, 13, 13, 19, 20, 20, 21, 20, 21, 20, 19, 17, 16,
- 16, 14, 14, 13, 12, 12, 17, 18, 19, 19, 19, 19, 19, 18, 16, 15, 15, 14,
- 13, 13, 12, 11,
- /* Size 32x32 */
- 32, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 30, 30, 29, 28, 28,
- 27, 26, 26, 24, 23, 23, 22, 21, 21, 19, 19, 19, 17, 17, 33, 33, 33, 33,
- 32, 32, 32, 32, 32, 32, 32, 32, 31, 30, 30, 30, 29, 29, 28, 26, 26, 25,
- 24, 24, 22, 22, 21, 20, 20, 19, 18, 18, 33, 33, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 31, 30, 30, 30, 29, 29, 28, 27, 27, 25, 24, 24, 23, 22,
- 22, 20, 20, 19, 18, 18, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 31, 30, 30, 30, 29, 29, 28, 27, 27, 25, 24, 24, 23, 22, 22, 20, 20, 20,
- 18, 18, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 30,
- 30, 30, 28, 28, 28, 26, 25, 25, 23, 23, 22, 21, 20, 20, 19, 19, 33, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 30, 30, 30, 28, 28,
- 28, 26, 25, 25, 23, 23, 23, 21, 21, 20, 19, 19, 33, 32, 32, 32, 32, 32,
- 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, 29, 29, 28, 27, 27, 26, 25, 25,
- 23, 23, 23, 21, 21, 20, 19, 19, 33, 32, 32, 32, 32, 32, 32, 31, 31, 31,
- 30, 30, 30, 29, 29, 29, 28, 28, 27, 26, 26, 25, 24, 24, 23, 23, 22, 21,
- 20, 20, 19, 19, 33, 32, 32, 32, 32, 32, 32, 31, 31, 31, 30, 30, 30, 29,
- 29, 28, 28, 28, 27, 26, 26, 25, 24, 24, 23, 23, 22, 21, 20, 20, 19, 19,
- 32, 32, 32, 32, 32, 32, 31, 31, 31, 30, 30, 30, 29, 29, 29, 28, 28, 28,
- 27, 26, 26, 25, 24, 24, 23, 23, 22, 21, 21, 20, 19, 19, 32, 32, 32, 32,
- 32, 32, 31, 30, 30, 30, 29, 29, 28, 28, 28, 28, 27, 27, 26, 26, 26, 24,
- 24, 24, 23, 22, 22, 21, 21, 20, 19, 19, 32, 32, 32, 32, 32, 32, 31, 30,
- 30, 30, 29, 29, 28, 28, 28, 28, 27, 27, 26, 26, 26, 24, 24, 24, 23, 22,
- 22, 21, 21, 20, 19, 19, 32, 31, 31, 31, 31, 31, 31, 30, 30, 29, 28, 28,
- 28, 27, 27, 26, 26, 26, 25, 24, 24, 23, 23, 23, 22, 22, 21, 20, 20, 20,
- 19, 19, 30, 30, 30, 30, 31, 31, 30, 29, 29, 29, 28, 28, 27, 26, 26, 25,
- 24, 24, 23, 23, 23, 22, 22, 21, 20, 20, 20, 19, 19, 19, 18, 18, 30, 30,
- 30, 30, 31, 31, 30, 29, 29, 29, 28, 28, 27, 26, 26, 25, 24, 24, 23, 23,
- 23, 22, 22, 21, 20, 20, 20, 19, 19, 19, 18, 18, 29, 30, 30, 30, 30, 30,
- 30, 29, 28, 28, 28, 28, 26, 25, 25, 24, 23, 23, 22, 22, 22, 21, 20, 20,
- 19, 19, 19, 18, 18, 18, 17, 17, 28, 29, 29, 29, 30, 30, 29, 28, 28, 28,
- 27, 27, 26, 24, 24, 23, 21, 21, 20, 20, 20, 19, 19, 19, 18, 18, 18, 17,
- 17, 17, 16, 16, 28, 29, 29, 29, 30, 30, 29, 28, 28, 28, 27, 27, 26, 24,
- 24, 23, 21, 21, 20, 20, 20, 19, 19, 19, 18, 18, 18, 17, 17, 17, 16, 16,
- 27, 28, 28, 28, 28, 28, 28, 27, 27, 27, 26, 26, 25, 23, 23, 22, 20, 20,
- 20, 20, 20, 19, 18, 18, 18, 17, 17, 17, 16, 16, 15, 15, 26, 26, 27, 27,
- 28, 28, 27, 26, 26, 26, 26, 26, 24, 23, 23, 22, 20, 20, 20, 19, 19, 18,
- 18, 18, 17, 17, 17, 16, 16, 16, 15, 15, 26, 26, 27, 27, 28, 28, 27, 26,
- 26, 26, 26, 26, 24, 23, 23, 22, 20, 20, 20, 19, 19, 18, 18, 18, 17, 17,
- 17, 16, 16, 16, 15, 15, 24, 25, 25, 25, 26, 26, 26, 25, 25, 25, 24, 24,
- 23, 22, 22, 21, 19, 19, 19, 18, 18, 17, 17, 17, 16, 16, 16, 15, 15, 15,
- 14, 14, 23, 24, 24, 24, 25, 25, 25, 24, 24, 24, 24, 24, 23, 22, 22, 20,
- 19, 19, 18, 18, 18, 17, 16, 16, 16, 15, 15, 14, 14, 14, 14, 14, 23, 24,
- 24, 24, 25, 25, 25, 24, 24, 24, 24, 24, 23, 21, 21, 20, 19, 19, 18, 18,
- 18, 17, 16, 16, 16, 15, 15, 14, 14, 14, 13, 13, 22, 22, 23, 23, 23, 23,
- 23, 23, 23, 23, 23, 23, 22, 20, 20, 19, 18, 18, 18, 17, 17, 16, 16, 16,
- 15, 15, 14, 14, 14, 13, 13, 13, 21, 22, 22, 22, 23, 23, 23, 23, 23, 23,
- 22, 22, 22, 20, 20, 19, 18, 18, 17, 17, 17, 16, 15, 15, 15, 14, 14, 14,
- 13, 13, 13, 13, 21, 21, 22, 22, 22, 23, 23, 22, 22, 22, 22, 22, 21, 20,
- 20, 19, 18, 18, 17, 17, 17, 16, 15, 15, 14, 14, 14, 13, 13, 13, 13, 13,
- 19, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 20, 19, 19, 18, 17, 17,
- 17, 16, 16, 15, 14, 14, 14, 14, 13, 13, 13, 12, 12, 12, 19, 20, 20, 20,
- 20, 21, 21, 20, 20, 21, 21, 21, 20, 19, 19, 18, 17, 17, 16, 16, 16, 15,
- 14, 14, 14, 13, 13, 13, 12, 12, 12, 12, 19, 19, 19, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 19, 19, 18, 17, 17, 16, 16, 16, 15, 14, 14, 13, 13,
- 13, 12, 12, 12, 12, 12, 17, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 18, 18, 17, 16, 16, 15, 15, 15, 14, 14, 13, 13, 13, 13, 12, 12, 12,
- 11, 11, 17, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18, 17,
- 16, 16, 15, 15, 15, 14, 14, 13, 13, 13, 13, 12, 12, 12, 11, 11,
- /* Size 4x8 */
- 32, 32, 28, 20, 32, 31, 28, 21, 32, 30, 27, 21, 30, 28, 23, 19, 29, 27,
- 21, 17, 26, 24, 19, 15, 22, 22, 17, 13, 20, 20, 16, 12,
- /* Size 8x4 */
- 32, 32, 32, 30, 29, 26, 22, 20, 32, 31, 30, 28, 27, 24, 22, 20, 28, 28,
- 27, 23, 21, 19, 17, 16, 20, 21, 21, 19, 17, 15, 13, 12,
- /* Size 8x16 */
- 32, 33, 32, 32, 28, 23, 22, 19, 33, 32, 32, 31, 29, 24, 23, 20, 33, 32,
- 32, 31, 29, 25, 23, 21, 33, 32, 31, 31, 29, 25, 23, 21, 32, 32, 30, 30,
- 28, 24, 23, 20, 32, 31, 29, 28, 27, 24, 23, 21, 32, 31, 29, 28, 26, 23,
- 22, 20, 30, 30, 28, 27, 24, 21, 20, 19, 28, 30, 28, 26, 21, 19, 18, 17,
- 27, 28, 26, 25, 21, 18, 18, 16, 26, 28, 26, 24, 20, 18, 17, 16, 23, 25,
- 24, 23, 19, 16, 16, 14, 22, 23, 23, 22, 18, 16, 15, 14, 21, 22, 22, 21,
- 18, 15, 14, 13, 19, 21, 20, 20, 17, 14, 14, 12, 18, 19, 19, 19, 16, 14,
- 13, 12,
- /* Size 16x8 */
- 32, 33, 33, 33, 32, 32, 32, 30, 28, 27, 26, 23, 22, 21, 19, 18, 33, 32,
- 32, 32, 32, 31, 31, 30, 30, 28, 28, 25, 23, 22, 21, 19, 32, 32, 32, 31,
- 30, 29, 29, 28, 28, 26, 26, 24, 23, 22, 20, 19, 32, 31, 31, 31, 30, 28,
- 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 28, 29, 29, 29, 28, 27, 26, 24,
- 21, 21, 20, 19, 18, 18, 17, 16, 23, 24, 25, 25, 24, 24, 23, 21, 19, 18,
- 18, 16, 16, 15, 14, 14, 22, 23, 23, 23, 23, 23, 22, 20, 18, 18, 17, 16,
- 15, 14, 14, 13, 19, 20, 21, 21, 20, 21, 20, 19, 17, 16, 16, 14, 14, 13,
- 12, 12,
- /* Size 16x32 */
- 32, 33, 33, 33, 32, 32, 32, 29, 28, 27, 23, 23, 22, 19, 19, 17, 33, 32,
- 32, 32, 32, 32, 31, 29, 29, 28, 24, 24, 22, 20, 20, 18, 33, 32, 32, 32,
- 32, 32, 31, 29, 29, 28, 24, 24, 23, 20, 20, 18, 33, 32, 32, 32, 32, 32,
- 31, 29, 29, 28, 24, 24, 23, 20, 20, 18, 33, 32, 32, 32, 32, 32, 31, 30,
- 29, 28, 25, 25, 23, 21, 21, 19, 33, 32, 32, 32, 32, 31, 31, 30, 30, 28,
- 25, 25, 23, 21, 21, 19, 33, 32, 32, 32, 31, 31, 31, 29, 29, 28, 25, 25,
- 23, 21, 21, 19, 32, 32, 32, 32, 31, 30, 30, 28, 28, 27, 24, 24, 23, 21,
- 21, 19, 32, 32, 32, 31, 30, 30, 30, 28, 28, 27, 24, 24, 23, 20, 20, 19,
- 32, 32, 32, 31, 30, 30, 29, 28, 28, 27, 24, 24, 23, 21, 21, 19, 32, 32,
- 31, 31, 29, 29, 28, 27, 27, 26, 24, 24, 23, 21, 21, 19, 32, 32, 31, 31,
- 29, 29, 28, 27, 27, 26, 24, 24, 23, 21, 21, 19, 32, 31, 31, 31, 29, 28,
- 28, 26, 26, 25, 23, 23, 22, 20, 20, 19, 30, 30, 30, 30, 28, 28, 27, 24,
- 24, 23, 21, 21, 20, 19, 19, 18, 30, 30, 30, 30, 28, 28, 27, 24, 24, 23,
- 21, 21, 20, 19, 19, 18, 29, 30, 30, 30, 28, 28, 26, 23, 23, 22, 20, 20,
- 19, 18, 18, 17, 28, 29, 30, 29, 28, 27, 26, 22, 21, 21, 19, 19, 18, 17,
- 17, 16, 28, 29, 30, 29, 28, 27, 26, 22, 21, 21, 19, 19, 18, 17, 17, 16,
- 27, 28, 28, 28, 26, 26, 25, 21, 21, 20, 18, 18, 18, 16, 16, 15, 26, 27,
- 28, 27, 26, 26, 24, 21, 20, 20, 18, 18, 17, 16, 16, 15, 26, 27, 28, 27,
- 26, 26, 24, 21, 20, 20, 18, 18, 17, 16, 16, 15, 24, 26, 26, 26, 24, 24,
- 23, 20, 20, 19, 17, 17, 16, 15, 15, 14, 23, 24, 25, 25, 24, 24, 23, 20,
- 19, 18, 16, 16, 16, 14, 14, 14, 23, 24, 25, 25, 24, 24, 23, 20, 19, 18,
- 16, 16, 16, 14, 14, 13, 22, 23, 23, 23, 23, 23, 22, 19, 18, 18, 16, 16,
- 15, 14, 14, 13, 21, 22, 23, 23, 22, 22, 21, 19, 18, 17, 15, 15, 15, 13,
- 13, 13, 21, 22, 22, 22, 22, 22, 21, 18, 18, 17, 15, 15, 14, 13, 13, 13,
- 19, 20, 21, 21, 21, 21, 20, 18, 17, 17, 14, 14, 14, 13, 13, 12, 19, 20,
- 21, 21, 20, 20, 20, 17, 17, 16, 14, 14, 14, 12, 12, 12, 19, 20, 20, 20,
- 20, 20, 19, 17, 17, 16, 14, 14, 13, 12, 12, 12, 18, 19, 19, 19, 19, 19,
- 19, 17, 16, 15, 14, 14, 13, 12, 12, 11, 18, 19, 19, 19, 19, 19, 19, 17,
- 16, 15, 14, 14, 13, 12, 12, 11,
- /* Size 32x16 */
- 32, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 30, 30, 29, 28, 28,
- 27, 26, 26, 24, 23, 23, 22, 21, 21, 19, 19, 19, 18, 18, 33, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 31, 30, 30, 30, 29, 29, 28, 27, 27, 26,
- 24, 24, 23, 22, 22, 20, 20, 20, 19, 19, 33, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 31, 31, 31, 30, 30, 30, 30, 30, 28, 28, 28, 26, 25, 25, 23, 23,
- 22, 21, 21, 20, 19, 19, 33, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31,
- 31, 30, 30, 30, 29, 29, 28, 27, 27, 26, 25, 25, 23, 23, 22, 21, 21, 20,
- 19, 19, 32, 32, 32, 32, 32, 32, 31, 31, 30, 30, 29, 29, 29, 28, 28, 28,
- 28, 28, 26, 26, 26, 24, 24, 24, 23, 22, 22, 21, 20, 20, 19, 19, 32, 32,
- 32, 32, 32, 31, 31, 30, 30, 30, 29, 29, 28, 28, 28, 28, 27, 27, 26, 26,
- 26, 24, 24, 24, 23, 22, 22, 21, 20, 20, 19, 19, 32, 31, 31, 31, 31, 31,
- 31, 30, 30, 29, 28, 28, 28, 27, 27, 26, 26, 26, 25, 24, 24, 23, 23, 23,
- 22, 21, 21, 20, 20, 19, 19, 19, 29, 29, 29, 29, 30, 30, 29, 28, 28, 28,
- 27, 27, 26, 24, 24, 23, 22, 22, 21, 21, 21, 20, 20, 20, 19, 19, 18, 18,
- 17, 17, 17, 17, 28, 29, 29, 29, 29, 30, 29, 28, 28, 28, 27, 27, 26, 24,
- 24, 23, 21, 21, 21, 20, 20, 20, 19, 19, 18, 18, 18, 17, 17, 17, 16, 16,
- 27, 28, 28, 28, 28, 28, 28, 27, 27, 27, 26, 26, 25, 23, 23, 22, 21, 21,
- 20, 20, 20, 19, 18, 18, 18, 17, 17, 17, 16, 16, 15, 15, 23, 24, 24, 24,
- 25, 25, 25, 24, 24, 24, 24, 24, 23, 21, 21, 20, 19, 19, 18, 18, 18, 17,
- 16, 16, 16, 15, 15, 14, 14, 14, 14, 14, 23, 24, 24, 24, 25, 25, 25, 24,
- 24, 24, 24, 24, 23, 21, 21, 20, 19, 19, 18, 18, 18, 17, 16, 16, 16, 15,
- 15, 14, 14, 14, 14, 14, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
- 22, 20, 20, 19, 18, 18, 18, 17, 17, 16, 16, 16, 15, 15, 14, 14, 14, 13,
- 13, 13, 19, 20, 20, 20, 21, 21, 21, 21, 20, 21, 21, 21, 20, 19, 19, 18,
- 17, 17, 16, 16, 16, 15, 14, 14, 14, 13, 13, 13, 12, 12, 12, 12, 19, 20,
- 20, 20, 21, 21, 21, 21, 20, 21, 21, 21, 20, 19, 19, 18, 17, 17, 16, 16,
- 16, 15, 14, 14, 14, 13, 13, 13, 12, 12, 12, 12, 17, 18, 18, 18, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 18, 18, 17, 16, 16, 15, 15, 15, 14, 14, 13,
- 13, 13, 13, 12, 12, 12, 11, 11,
- /* Size 4x16 */
- 33, 32, 27, 19, 32, 32, 28, 20, 32, 32, 28, 21, 32, 31, 28, 21, 32, 30,
- 27, 20, 32, 29, 26, 21, 31, 28, 25, 20, 30, 28, 23, 19, 29, 27, 21, 17,
- 28, 26, 20, 16, 27, 26, 20, 16, 24, 24, 18, 14, 23, 23, 18, 14, 22, 22,
- 17, 13, 20, 20, 16, 12, 19, 19, 15, 12,
- /* Size 16x4 */
- 33, 32, 32, 32, 32, 32, 31, 30, 29, 28, 27, 24, 23, 22, 20, 19, 32, 32,
- 32, 31, 30, 29, 28, 28, 27, 26, 26, 24, 23, 22, 20, 19, 27, 28, 28, 28,
- 27, 26, 25, 23, 21, 20, 20, 18, 18, 17, 16, 15, 19, 20, 21, 21, 20, 21,
- 20, 19, 17, 16, 16, 14, 14, 13, 12, 12,
- /* Size 8x32 */
- 32, 33, 32, 32, 28, 23, 22, 19, 33, 32, 32, 31, 29, 24, 22, 20, 33, 32,
- 32, 31, 29, 24, 23, 20, 33, 32, 32, 31, 29, 24, 23, 20, 33, 32, 32, 31,
- 29, 25, 23, 21, 33, 32, 32, 31, 30, 25, 23, 21, 33, 32, 31, 31, 29, 25,
- 23, 21, 32, 32, 31, 30, 28, 24, 23, 21, 32, 32, 30, 30, 28, 24, 23, 20,
- 32, 32, 30, 29, 28, 24, 23, 21, 32, 31, 29, 28, 27, 24, 23, 21, 32, 31,
- 29, 28, 27, 24, 23, 21, 32, 31, 29, 28, 26, 23, 22, 20, 30, 30, 28, 27,
- 24, 21, 20, 19, 30, 30, 28, 27, 24, 21, 20, 19, 29, 30, 28, 26, 23, 20,
- 19, 18, 28, 30, 28, 26, 21, 19, 18, 17, 28, 30, 28, 26, 21, 19, 18, 17,
- 27, 28, 26, 25, 21, 18, 18, 16, 26, 28, 26, 24, 20, 18, 17, 16, 26, 28,
- 26, 24, 20, 18, 17, 16, 24, 26, 24, 23, 20, 17, 16, 15, 23, 25, 24, 23,
- 19, 16, 16, 14, 23, 25, 24, 23, 19, 16, 16, 14, 22, 23, 23, 22, 18, 16,
- 15, 14, 21, 23, 22, 21, 18, 15, 15, 13, 21, 22, 22, 21, 18, 15, 14, 13,
- 19, 21, 21, 20, 17, 14, 14, 13, 19, 21, 20, 20, 17, 14, 14, 12, 19, 20,
- 20, 19, 17, 14, 13, 12, 18, 19, 19, 19, 16, 14, 13, 12, 18, 19, 19, 19,
- 16, 14, 13, 12,
- /* Size 32x8 */
- 32, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 30, 30, 29, 28, 28,
- 27, 26, 26, 24, 23, 23, 22, 21, 21, 19, 19, 19, 18, 18, 33, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 31, 31, 31, 30, 30, 30, 30, 30, 28, 28, 28, 26,
- 25, 25, 23, 23, 22, 21, 21, 20, 19, 19, 32, 32, 32, 32, 32, 32, 31, 31,
- 30, 30, 29, 29, 29, 28, 28, 28, 28, 28, 26, 26, 26, 24, 24, 24, 23, 22,
- 22, 21, 20, 20, 19, 19, 32, 31, 31, 31, 31, 31, 31, 30, 30, 29, 28, 28,
- 28, 27, 27, 26, 26, 26, 25, 24, 24, 23, 23, 23, 22, 21, 21, 20, 20, 19,
- 19, 19, 28, 29, 29, 29, 29, 30, 29, 28, 28, 28, 27, 27, 26, 24, 24, 23,
- 21, 21, 21, 20, 20, 20, 19, 19, 18, 18, 18, 17, 17, 17, 16, 16, 23, 24,
- 24, 24, 25, 25, 25, 24, 24, 24, 24, 24, 23, 21, 21, 20, 19, 19, 18, 18,
- 18, 17, 16, 16, 16, 15, 15, 14, 14, 14, 14, 14, 22, 22, 23, 23, 23, 23,
- 23, 23, 23, 23, 23, 23, 22, 20, 20, 19, 18, 18, 18, 17, 17, 16, 16, 16,
- 15, 15, 14, 14, 14, 13, 13, 13, 19, 20, 20, 20, 21, 21, 21, 21, 20, 21,
- 21, 21, 20, 19, 19, 18, 17, 17, 16, 16, 16, 15, 14, 14, 14, 13, 13, 13,
- 12, 12, 12, 12 },
- { /* Chroma */
- /* Size 4x4 */
- 33, 27, 22, 21, 27, 22, 22, 22, 22, 22, 19, 18, 21, 22, 18, 16,
- /* Size 8x8 */
- 33, 33, 29, 24, 21, 22, 21, 20, 33, 32, 28, 24, 22, 23, 22, 21, 29, 28,
- 25, 23, 22, 23, 22, 21, 24, 24, 23, 21, 20, 21, 20, 20, 21, 22, 22, 20,
- 19, 19, 19, 19, 22, 23, 23, 21, 19, 18, 17, 17, 21, 22, 22, 20, 19, 17,
- 17, 16, 20, 21, 21, 20, 19, 17, 16, 15,
- /* Size 16x16 */
- 32, 33, 34, 33, 31, 28, 27, 25, 21, 21, 21, 21, 20, 20, 20, 19, 33, 33,
- 33, 32, 30, 27, 26, 24, 22, 22, 22, 22, 21, 21, 20, 20, 34, 33, 33, 32,
- 29, 26, 25, 24, 22, 22, 22, 23, 22, 22, 21, 20, 33, 32, 32, 31, 28, 26,
- 25, 24, 22, 22, 23, 23, 22, 22, 22, 21, 31, 30, 29, 28, 26, 24, 23, 23,
- 22, 22, 22, 23, 22, 22, 22, 21, 28, 27, 26, 26, 24, 22, 22, 22, 21, 22,
- 22, 23, 22, 22, 22, 21, 27, 26, 25, 25, 23, 22, 22, 21, 21, 21, 21, 22,
- 22, 22, 21, 21, 25, 24, 24, 24, 23, 22, 21, 21, 20, 20, 21, 21, 20, 20,
- 20, 20, 21, 22, 22, 22, 22, 21, 21, 20, 19, 19, 19, 19, 19, 19, 19, 19,
- 21, 22, 22, 22, 22, 22, 21, 20, 19, 19, 19, 19, 19, 19, 18, 18, 21, 22,
- 22, 23, 22, 22, 21, 21, 19, 19, 19, 19, 18, 18, 18, 18, 21, 22, 23, 23,
- 23, 23, 22, 21, 19, 19, 19, 18, 17, 17, 17, 17, 20, 21, 22, 22, 22, 22,
- 22, 20, 19, 19, 18, 17, 17, 17, 16, 16, 20, 21, 22, 22, 22, 22, 22, 20,
- 19, 19, 18, 17, 17, 17, 16, 16, 20, 20, 21, 22, 22, 22, 21, 20, 19, 18,
- 18, 17, 16, 16, 16, 15, 19, 20, 20, 21, 21, 21, 21, 20, 19, 18, 18, 17,
- 16, 16, 15, 14,
- /* Size 32x32 */
- 32, 33, 33, 33, 34, 34, 33, 31, 31, 30, 28, 28, 27, 25, 25, 23, 21, 21,
- 21, 21, 21, 21, 21, 21, 20, 20, 20, 20, 20, 19, 19, 19, 33, 33, 33, 33,
- 33, 33, 33, 30, 30, 29, 27, 27, 26, 24, 24, 23, 21, 21, 22, 22, 22, 22,
- 22, 22, 21, 21, 21, 20, 20, 20, 19, 19, 33, 33, 33, 33, 33, 33, 32, 30,
- 30, 29, 27, 27, 26, 24, 24, 23, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21,
- 21, 21, 20, 20, 20, 20, 33, 33, 33, 33, 33, 33, 32, 30, 30, 28, 27, 27,
- 26, 24, 24, 23, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 21, 21, 20,
- 20, 20, 34, 33, 33, 33, 33, 33, 32, 29, 29, 28, 26, 26, 25, 24, 24, 23,
- 22, 22, 22, 22, 22, 23, 23, 23, 22, 22, 22, 21, 21, 21, 20, 20, 34, 33,
- 33, 33, 33, 32, 32, 29, 29, 28, 26, 26, 25, 24, 24, 23, 22, 22, 22, 23,
- 23, 23, 23, 23, 22, 22, 22, 22, 22, 21, 21, 21, 33, 33, 32, 32, 32, 32,
- 31, 29, 28, 28, 26, 26, 25, 24, 24, 23, 22, 22, 22, 23, 23, 23, 23, 23,
- 22, 22, 22, 22, 22, 21, 21, 21, 31, 30, 30, 30, 29, 29, 29, 27, 27, 26,
- 24, 24, 24, 23, 23, 22, 22, 22, 22, 22, 22, 23, 23, 23, 22, 22, 22, 22,
- 22, 21, 21, 21, 31, 30, 30, 30, 29, 29, 28, 27, 26, 26, 24, 24, 23, 23,
- 23, 22, 22, 22, 22, 22, 22, 23, 23, 23, 22, 22, 22, 22, 22, 21, 21, 21,
- 30, 29, 29, 28, 28, 28, 28, 26, 26, 25, 23, 23, 23, 23, 23, 22, 22, 22,
- 22, 22, 22, 23, 23, 23, 22, 22, 22, 22, 22, 21, 21, 21, 28, 27, 27, 27,
- 26, 26, 26, 24, 24, 23, 22, 22, 22, 22, 22, 22, 21, 21, 22, 22, 22, 23,
- 23, 23, 22, 22, 22, 22, 22, 22, 21, 21, 28, 27, 27, 27, 26, 26, 26, 24,
- 24, 23, 22, 22, 22, 22, 22, 22, 21, 21, 22, 22, 22, 23, 23, 23, 22, 22,
- 22, 22, 22, 22, 21, 21, 27, 26, 26, 26, 25, 25, 25, 24, 23, 23, 22, 22,
- 22, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 21, 21, 21,
- 21, 21, 25, 24, 24, 24, 24, 24, 24, 23, 23, 23, 22, 22, 21, 21, 21, 21,
- 20, 20, 20, 21, 21, 21, 21, 21, 20, 20, 20, 20, 20, 20, 20, 20, 25, 24,
- 24, 24, 24, 24, 24, 23, 23, 23, 22, 22, 21, 21, 21, 21, 20, 20, 20, 21,
- 21, 21, 21, 21, 20, 20, 20, 20, 20, 20, 20, 20, 23, 23, 23, 23, 23, 23,
- 23, 22, 22, 22, 22, 22, 21, 21, 21, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 19, 19, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22,
- 21, 21, 21, 20, 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 20,
- 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 20, 20, 20, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18, 18, 18, 18, 21, 22, 22, 22,
- 22, 23, 23, 22, 22, 22, 22, 22, 21, 21, 21, 20, 19, 19, 19, 19, 19, 19,
- 19, 18, 18, 18, 18, 18, 18, 18, 18, 18, 21, 22, 22, 22, 22, 23, 23, 22,
- 22, 22, 22, 22, 21, 21, 21, 20, 19, 19, 19, 19, 19, 19, 19, 18, 18, 18,
- 18, 18, 18, 18, 18, 18, 21, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
- 22, 21, 21, 20, 19, 19, 19, 19, 19, 18, 18, 18, 18, 17, 17, 17, 17, 17,
- 17, 17, 21, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 22, 21, 21, 20,
- 19, 19, 19, 19, 19, 18, 18, 18, 17, 17, 17, 17, 17, 17, 17, 17, 21, 22,
- 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 22, 21, 21, 20, 19, 19, 19, 18,
- 18, 18, 18, 17, 17, 17, 17, 17, 17, 17, 16, 16, 20, 21, 21, 21, 22, 22,
- 22, 22, 22, 22, 22, 22, 22, 20, 20, 20, 19, 19, 19, 18, 18, 18, 17, 17,
- 17, 17, 17, 16, 16, 16, 16, 16, 20, 21, 21, 21, 22, 22, 22, 22, 22, 22,
- 22, 22, 22, 20, 20, 20, 19, 19, 19, 18, 18, 17, 17, 17, 17, 17, 17, 16,
- 16, 16, 16, 16, 20, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 20,
- 20, 20, 19, 19, 19, 18, 18, 17, 17, 17, 17, 17, 17, 16, 16, 16, 16, 16,
- 20, 20, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 21, 20, 20, 20, 19, 19,
- 18, 18, 18, 17, 17, 17, 16, 16, 16, 16, 16, 15, 15, 15, 20, 20, 20, 21,
- 21, 22, 22, 22, 22, 22, 22, 22, 21, 20, 20, 20, 19, 19, 18, 18, 18, 17,
- 17, 17, 16, 16, 16, 16, 16, 15, 15, 15, 19, 20, 20, 20, 21, 21, 21, 21,
- 21, 21, 22, 22, 21, 20, 20, 20, 19, 19, 18, 18, 18, 17, 17, 17, 16, 16,
- 16, 15, 15, 15, 15, 15, 19, 19, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21,
- 21, 20, 20, 19, 19, 19, 18, 18, 18, 17, 17, 16, 16, 16, 16, 15, 15, 15,
- 14, 14, 19, 19, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 20, 20, 19,
- 19, 19, 18, 18, 18, 17, 17, 16, 16, 16, 16, 15, 15, 15, 14, 14,
- /* Size 4x8 */
- 33, 27, 22, 20, 33, 26, 22, 21, 28, 23, 22, 22, 24, 22, 20, 20, 22, 21,
- 19, 19, 22, 22, 19, 17, 21, 22, 19, 16, 20, 21, 18, 15,
- /* Size 8x4 */
- 33, 33, 28, 24, 22, 22, 21, 20, 27, 26, 23, 22, 21, 22, 22, 21, 22, 22,
- 22, 20, 19, 19, 19, 18, 20, 21, 22, 20, 19, 17, 16, 15,
- /* Size 8x16 */
- 32, 33, 29, 27, 21, 21, 20, 20, 33, 33, 28, 26, 22, 22, 21, 20, 34, 32,
- 27, 26, 22, 23, 22, 21, 33, 31, 27, 25, 22, 23, 22, 21, 31, 28, 25, 23,
- 22, 22, 22, 22, 28, 26, 23, 22, 22, 23, 22, 22, 26, 25, 22, 22, 21, 22,
- 22, 21, 24, 24, 22, 21, 20, 21, 20, 20, 21, 22, 21, 21, 19, 19, 19, 19,
- 21, 22, 22, 21, 19, 19, 19, 18, 21, 22, 22, 21, 19, 18, 18, 18, 21, 23,
- 23, 22, 19, 18, 17, 17, 20, 22, 22, 21, 19, 17, 17, 16, 20, 22, 22, 21,
- 19, 17, 17, 16, 20, 21, 22, 21, 19, 17, 16, 16, 19, 20, 21, 20, 19, 17,
- 16, 15,
- /* Size 16x8 */
- 32, 33, 34, 33, 31, 28, 26, 24, 21, 21, 21, 21, 20, 20, 20, 19, 33, 33,
- 32, 31, 28, 26, 25, 24, 22, 22, 22, 23, 22, 22, 21, 20, 29, 28, 27, 27,
- 25, 23, 22, 22, 21, 22, 22, 23, 22, 22, 22, 21, 27, 26, 26, 25, 23, 22,
- 22, 21, 21, 21, 21, 22, 21, 21, 21, 20, 21, 22, 22, 22, 22, 22, 21, 20,
- 19, 19, 19, 19, 19, 19, 19, 19, 21, 22, 23, 23, 22, 23, 22, 21, 19, 19,
- 18, 18, 17, 17, 17, 17, 20, 21, 22, 22, 22, 22, 22, 20, 19, 19, 18, 17,
- 17, 17, 16, 16, 20, 20, 21, 21, 22, 22, 21, 20, 19, 18, 18, 17, 16, 16,
- 16, 15,
- /* Size 16x32 */
- 32, 33, 33, 33, 29, 28, 27, 22, 21, 21, 21, 21, 20, 20, 20, 19, 33, 33,
- 33, 32, 28, 27, 26, 22, 22, 22, 21, 21, 21, 20, 20, 19, 33, 33, 33, 32,
- 28, 27, 26, 22, 22, 22, 22, 22, 21, 20, 20, 20, 33, 33, 33, 32, 28, 27,
- 26, 22, 22, 22, 22, 22, 21, 20, 20, 20, 34, 33, 32, 32, 27, 26, 26, 23,
- 22, 22, 23, 23, 22, 21, 21, 20, 34, 33, 32, 31, 27, 26, 25, 23, 22, 22,
- 23, 23, 22, 21, 21, 20, 33, 32, 31, 31, 27, 26, 25, 23, 22, 22, 23, 23,
- 22, 21, 21, 20, 31, 29, 29, 28, 25, 24, 24, 22, 22, 22, 23, 23, 22, 22,
- 22, 21, 31, 29, 28, 28, 25, 24, 23, 22, 22, 22, 22, 22, 22, 22, 22, 21,
- 30, 28, 28, 28, 24, 23, 23, 22, 22, 22, 23, 23, 22, 22, 22, 21, 28, 26,
- 26, 25, 23, 22, 22, 22, 22, 22, 23, 23, 22, 22, 22, 21, 28, 26, 26, 25,
- 23, 22, 22, 22, 22, 22, 23, 23, 22, 22, 22, 21, 26, 26, 25, 24, 22, 22,
- 22, 21, 21, 21, 22, 22, 22, 21, 21, 20, 24, 24, 24, 24, 22, 22, 21, 20,
- 20, 20, 21, 21, 20, 20, 20, 20, 24, 24, 24, 24, 22, 22, 21, 20, 20, 20,
- 21, 21, 20, 20, 20, 20, 23, 23, 23, 23, 22, 22, 21, 20, 20, 20, 20, 20,
- 20, 20, 20, 19, 21, 22, 22, 22, 21, 21, 21, 20, 19, 19, 19, 19, 19, 19,
- 19, 19, 21, 22, 22, 22, 21, 21, 21, 20, 19, 19, 19, 19, 19, 19, 19, 19,
- 21, 22, 22, 22, 22, 22, 21, 20, 19, 19, 19, 19, 19, 18, 18, 18, 21, 22,
- 22, 22, 22, 22, 21, 20, 19, 19, 18, 18, 18, 18, 18, 17, 21, 22, 22, 22,
- 22, 22, 21, 20, 19, 19, 18, 18, 18, 18, 18, 17, 21, 22, 23, 23, 22, 22,
- 22, 20, 19, 19, 18, 18, 18, 17, 17, 17, 21, 22, 23, 23, 23, 22, 22, 20,
- 19, 19, 18, 18, 17, 17, 17, 17, 21, 22, 23, 23, 22, 22, 22, 20, 19, 19,
- 18, 18, 17, 17, 17, 16, 20, 22, 22, 22, 22, 22, 21, 19, 19, 19, 17, 17,
- 17, 16, 16, 16, 20, 21, 22, 22, 22, 22, 21, 19, 19, 19, 17, 17, 17, 16,
- 16, 16, 20, 21, 22, 22, 22, 22, 21, 19, 19, 19, 17, 17, 17, 16, 16, 16,
- 20, 21, 21, 21, 22, 22, 21, 19, 19, 18, 17, 17, 16, 16, 16, 15, 20, 21,
- 21, 21, 22, 22, 21, 19, 19, 18, 17, 17, 16, 16, 16, 15, 19, 20, 21, 21,
- 21, 21, 21, 19, 19, 18, 17, 17, 16, 15, 15, 15, 19, 20, 20, 20, 21, 21,
- 20, 19, 19, 18, 17, 17, 16, 15, 15, 14, 19, 20, 20, 20, 21, 21, 20, 19,
- 19, 18, 17, 17, 16, 15, 15, 14,
- /* Size 32x16 */
- 32, 33, 33, 33, 34, 34, 33, 31, 31, 30, 28, 28, 26, 24, 24, 23, 21, 21,
- 21, 21, 21, 21, 21, 21, 20, 20, 20, 20, 20, 19, 19, 19, 33, 33, 33, 33,
- 33, 33, 32, 29, 29, 28, 26, 26, 26, 24, 24, 23, 22, 22, 22, 22, 22, 22,
- 22, 22, 22, 21, 21, 21, 21, 20, 20, 20, 33, 33, 33, 33, 32, 32, 31, 29,
- 28, 28, 26, 26, 25, 24, 24, 23, 22, 22, 22, 22, 22, 23, 23, 23, 22, 22,
- 22, 21, 21, 21, 20, 20, 33, 32, 32, 32, 32, 31, 31, 28, 28, 28, 25, 25,
- 24, 24, 24, 23, 22, 22, 22, 22, 22, 23, 23, 23, 22, 22, 22, 21, 21, 21,
- 20, 20, 29, 28, 28, 28, 27, 27, 27, 25, 25, 24, 23, 23, 22, 22, 22, 22,
- 21, 21, 22, 22, 22, 22, 23, 22, 22, 22, 22, 22, 22, 21, 21, 21, 28, 27,
- 27, 27, 26, 26, 26, 24, 24, 23, 22, 22, 22, 22, 22, 22, 21, 21, 22, 22,
- 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 27, 26, 26, 26, 26, 25,
- 25, 24, 23, 23, 22, 22, 22, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22,
- 21, 21, 21, 21, 21, 21, 20, 20, 22, 22, 22, 22, 23, 23, 23, 22, 22, 22,
- 22, 22, 21, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 19, 19, 19, 19,
- 19, 19, 19, 19, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 20,
- 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 20, 20, 20, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18, 18, 18, 18, 21, 21, 22, 22,
- 23, 23, 23, 23, 22, 23, 23, 23, 22, 21, 21, 20, 19, 19, 19, 18, 18, 18,
- 18, 18, 17, 17, 17, 17, 17, 17, 17, 17, 21, 21, 22, 22, 23, 23, 23, 23,
- 22, 23, 23, 23, 22, 21, 21, 20, 19, 19, 19, 18, 18, 18, 18, 18, 17, 17,
- 17, 17, 17, 17, 17, 17, 20, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22,
- 22, 20, 20, 20, 19, 19, 19, 18, 18, 18, 17, 17, 17, 17, 17, 16, 16, 16,
- 16, 16, 20, 20, 20, 20, 21, 21, 21, 22, 22, 22, 22, 22, 21, 20, 20, 20,
- 19, 19, 18, 18, 18, 17, 17, 17, 16, 16, 16, 16, 16, 15, 15, 15, 20, 20,
- 20, 20, 21, 21, 21, 22, 22, 22, 22, 22, 21, 20, 20, 20, 19, 19, 18, 18,
- 18, 17, 17, 17, 16, 16, 16, 16, 16, 15, 15, 15, 19, 19, 20, 20, 20, 20,
- 20, 21, 21, 21, 21, 21, 20, 20, 20, 19, 19, 19, 18, 17, 17, 17, 17, 16,
- 16, 16, 16, 15, 15, 15, 14, 14,
- /* Size 4x16 */
- 33, 28, 21, 20, 33, 27, 22, 20, 33, 26, 22, 21, 32, 26, 22, 21, 29, 24,
- 22, 22, 26, 22, 22, 22, 26, 22, 21, 21, 24, 22, 20, 20, 22, 21, 19, 19,
- 22, 22, 19, 18, 22, 22, 19, 18, 22, 22, 19, 17, 22, 22, 19, 16, 21, 22,
- 19, 16, 21, 22, 18, 16, 20, 21, 18, 15,
- /* Size 16x4 */
- 33, 33, 33, 32, 29, 26, 26, 24, 22, 22, 22, 22, 22, 21, 21, 20, 28, 27,
- 26, 26, 24, 22, 22, 22, 21, 22, 22, 22, 22, 22, 22, 21, 21, 22, 22, 22,
- 22, 22, 21, 20, 19, 19, 19, 19, 19, 19, 18, 18, 20, 20, 21, 21, 22, 22,
- 21, 20, 19, 18, 18, 17, 16, 16, 16, 15,
- /* Size 8x32 */
- 32, 33, 29, 27, 21, 21, 20, 20, 33, 33, 28, 26, 22, 21, 21, 20, 33, 33,
- 28, 26, 22, 22, 21, 20, 33, 33, 28, 26, 22, 22, 21, 20, 34, 32, 27, 26,
- 22, 23, 22, 21, 34, 32, 27, 25, 22, 23, 22, 21, 33, 31, 27, 25, 22, 23,
- 22, 21, 31, 29, 25, 24, 22, 23, 22, 22, 31, 28, 25, 23, 22, 22, 22, 22,
- 30, 28, 24, 23, 22, 23, 22, 22, 28, 26, 23, 22, 22, 23, 22, 22, 28, 26,
- 23, 22, 22, 23, 22, 22, 26, 25, 22, 22, 21, 22, 22, 21, 24, 24, 22, 21,
- 20, 21, 20, 20, 24, 24, 22, 21, 20, 21, 20, 20, 23, 23, 22, 21, 20, 20,
- 20, 20, 21, 22, 21, 21, 19, 19, 19, 19, 21, 22, 21, 21, 19, 19, 19, 19,
- 21, 22, 22, 21, 19, 19, 19, 18, 21, 22, 22, 21, 19, 18, 18, 18, 21, 22,
- 22, 21, 19, 18, 18, 18, 21, 23, 22, 22, 19, 18, 18, 17, 21, 23, 23, 22,
- 19, 18, 17, 17, 21, 23, 22, 22, 19, 18, 17, 17, 20, 22, 22, 21, 19, 17,
- 17, 16, 20, 22, 22, 21, 19, 17, 17, 16, 20, 22, 22, 21, 19, 17, 17, 16,
- 20, 21, 22, 21, 19, 17, 16, 16, 20, 21, 22, 21, 19, 17, 16, 16, 19, 21,
- 21, 21, 19, 17, 16, 15, 19, 20, 21, 20, 19, 17, 16, 15, 19, 20, 21, 20,
- 19, 17, 16, 15,
- /* Size 32x8 */
- 32, 33, 33, 33, 34, 34, 33, 31, 31, 30, 28, 28, 26, 24, 24, 23, 21, 21,
- 21, 21, 21, 21, 21, 21, 20, 20, 20, 20, 20, 19, 19, 19, 33, 33, 33, 33,
- 32, 32, 31, 29, 28, 28, 26, 26, 25, 24, 24, 23, 22, 22, 22, 22, 22, 23,
- 23, 23, 22, 22, 22, 21, 21, 21, 20, 20, 29, 28, 28, 28, 27, 27, 27, 25,
- 25, 24, 23, 23, 22, 22, 22, 22, 21, 21, 22, 22, 22, 22, 23, 22, 22, 22,
- 22, 22, 22, 21, 21, 21, 27, 26, 26, 26, 26, 25, 25, 24, 23, 23, 22, 22,
- 22, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 21, 21, 21, 21, 21, 21,
- 20, 20, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 20, 20, 20,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 21, 21,
- 22, 22, 23, 23, 23, 23, 22, 23, 23, 23, 22, 21, 21, 20, 19, 19, 19, 18,
- 18, 18, 18, 18, 17, 17, 17, 17, 17, 17, 17, 17, 20, 21, 21, 21, 22, 22,
- 22, 22, 22, 22, 22, 22, 22, 20, 20, 20, 19, 19, 19, 18, 18, 18, 17, 17,
- 17, 17, 17, 16, 16, 16, 16, 16, 20, 20, 20, 20, 21, 21, 21, 22, 22, 22,
- 22, 22, 21, 20, 20, 20, 19, 19, 18, 18, 18, 17, 17, 17, 16, 16, 16, 16,
- 16, 15, 15, 15 },
- },
- {
- { /* Luma */
- /* Size 4x4 */
- 32, 32, 29, 24, 32, 30, 28, 24, 29, 28, 21, 19, 24, 24, 19, 16,
- /* Size 8x8 */
- 33, 33, 32, 32, 30, 28, 24, 22, 33, 32, 32, 32, 30, 28, 25, 23, 32, 32,
- 31, 30, 29, 27, 24, 23, 32, 32, 30, 29, 28, 26, 24, 22, 30, 30, 29, 28,
- 25, 23, 21, 20, 28, 28, 27, 26, 23, 20, 18, 17, 24, 25, 24, 24, 21, 18,
- 16, 15, 22, 23, 23, 22, 20, 17, 15, 14,
- /* Size 16x16 */
- 32, 33, 33, 33, 33, 33, 32, 32, 30, 29, 28, 26, 25, 23, 22, 21, 33, 32,
- 32, 32, 32, 32, 32, 31, 30, 29, 29, 27, 26, 24, 23, 22, 33, 32, 32, 32,
- 32, 32, 32, 31, 30, 30, 29, 27, 26, 24, 23, 23, 33, 32, 32, 32, 32, 32,
- 32, 31, 31, 30, 30, 28, 27, 25, 23, 23, 33, 32, 32, 32, 31, 31, 31, 30,
- 29, 28, 28, 26, 26, 24, 23, 23, 33, 32, 32, 32, 31, 31, 30, 30, 29, 28,
- 28, 26, 26, 24, 23, 23, 32, 32, 32, 32, 31, 30, 29, 28, 28, 27, 27, 26,
- 25, 24, 23, 22, 32, 31, 31, 31, 30, 30, 28, 28, 27, 26, 26, 24, 24, 23,
- 22, 22, 30, 30, 30, 31, 29, 29, 28, 27, 26, 24, 24, 23, 22, 22, 20, 20,
- 29, 29, 30, 30, 28, 28, 27, 26, 24, 22, 22, 21, 20, 20, 19, 19, 28, 29,
- 29, 30, 28, 28, 27, 26, 24, 22, 21, 20, 20, 19, 18, 18, 26, 27, 27, 28,
- 26, 26, 26, 24, 23, 21, 20, 19, 19, 18, 17, 17, 25, 26, 26, 27, 26, 26,
- 25, 24, 22, 20, 20, 19, 18, 17, 17, 16, 23, 24, 24, 25, 24, 24, 24, 23,
- 22, 20, 19, 18, 17, 16, 16, 15, 22, 23, 23, 23, 23, 23, 23, 22, 20, 19,
- 18, 17, 17, 16, 15, 15, 21, 22, 23, 23, 23, 23, 22, 22, 20, 19, 18, 17,
- 16, 15, 15, 14,
- /* Size 32x32 */
- 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 30, 30, 30,
- 29, 28, 28, 27, 26, 26, 25, 23, 23, 23, 22, 21, 21, 20, 33, 33, 33, 33,
- 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 31, 30, 30, 30, 29, 29, 29, 28,
- 26, 26, 26, 24, 24, 23, 22, 22, 22, 20, 33, 33, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 31, 30, 30, 30, 29, 29, 29, 28, 27, 27, 26, 24,
- 24, 24, 23, 22, 22, 21, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 31, 30, 30, 30, 29, 29, 29, 28, 27, 27, 26, 24, 24, 24, 23, 22,
- 22, 21, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 30,
- 30, 30, 30, 29, 29, 28, 27, 27, 26, 24, 24, 24, 23, 23, 23, 21, 33, 33,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 30, 30, 30,
- 30, 28, 28, 28, 27, 25, 25, 25, 23, 23, 23, 22, 33, 33, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 30, 30, 30, 30, 28, 28, 28,
- 27, 25, 25, 25, 23, 23, 23, 22, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 31, 31, 31, 31, 30, 30, 30, 30, 29, 29, 28, 27, 27, 26, 25, 25, 24,
- 23, 23, 23, 22, 33, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31,
- 30, 29, 29, 29, 28, 28, 28, 28, 26, 26, 26, 24, 24, 24, 23, 23, 23, 21,
- 33, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, 29, 29, 29,
- 28, 28, 28, 27, 26, 26, 26, 24, 24, 24, 23, 23, 23, 21, 33, 32, 32, 32,
- 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, 29, 29, 29, 28, 28, 28, 27,
- 26, 26, 26, 24, 24, 24, 23, 23, 23, 21, 32, 32, 32, 32, 32, 32, 32, 31,
- 31, 31, 31, 30, 29, 29, 29, 28, 28, 28, 28, 28, 28, 26, 26, 26, 25, 24,
- 24, 24, 23, 23, 23, 21, 32, 32, 32, 32, 32, 32, 32, 31, 31, 30, 30, 29,
- 29, 29, 28, 28, 28, 28, 27, 27, 27, 26, 26, 26, 25, 24, 24, 24, 23, 22,
- 22, 21, 32, 32, 32, 32, 32, 32, 32, 31, 31, 30, 30, 29, 29, 29, 28, 28,
- 28, 28, 27, 27, 27, 26, 26, 26, 25, 24, 24, 24, 23, 22, 22, 21, 32, 31,
- 31, 31, 31, 31, 31, 31, 30, 30, 30, 29, 28, 28, 28, 27, 27, 27, 26, 26,
- 26, 25, 24, 24, 24, 23, 23, 23, 22, 22, 22, 20, 30, 30, 30, 30, 30, 31,
- 31, 30, 29, 29, 29, 28, 28, 28, 27, 26, 26, 26, 24, 24, 24, 23, 23, 23,
- 22, 22, 22, 21, 20, 20, 20, 19, 30, 30, 30, 30, 30, 31, 31, 30, 29, 29,
- 29, 28, 28, 28, 27, 26, 26, 26, 24, 24, 24, 23, 23, 23, 22, 22, 22, 21,
- 20, 20, 20, 19, 30, 30, 30, 30, 30, 30, 30, 30, 29, 29, 29, 28, 28, 28,
- 27, 26, 26, 25, 24, 23, 23, 23, 22, 22, 22, 21, 21, 21, 20, 20, 20, 19,
- 29, 29, 29, 29, 30, 30, 30, 30, 28, 28, 28, 28, 27, 27, 26, 24, 24, 24,
- 22, 22, 22, 21, 21, 21, 20, 20, 20, 19, 19, 19, 19, 18, 28, 29, 29, 29,
- 29, 30, 30, 29, 28, 28, 28, 28, 27, 27, 26, 24, 24, 23, 22, 21, 21, 20,
- 20, 20, 20, 19, 19, 19, 18, 18, 18, 18, 28, 29, 29, 29, 29, 30, 30, 29,
- 28, 28, 28, 28, 27, 27, 26, 24, 24, 23, 22, 21, 21, 20, 20, 20, 20, 19,
- 19, 19, 18, 18, 18, 18, 27, 28, 28, 28, 28, 28, 28, 28, 28, 27, 27, 26,
- 26, 26, 25, 23, 23, 23, 21, 20, 20, 20, 20, 20, 19, 18, 18, 18, 18, 17,
- 17, 17, 26, 26, 27, 27, 27, 28, 28, 27, 26, 26, 26, 26, 26, 26, 24, 23,
- 23, 22, 21, 20, 20, 20, 19, 19, 19, 18, 18, 18, 17, 17, 17, 16, 26, 26,
- 27, 27, 27, 28, 28, 27, 26, 26, 26, 26, 26, 26, 24, 23, 23, 22, 21, 20,
- 20, 20, 19, 19, 19, 18, 18, 18, 17, 17, 17, 16, 25, 26, 26, 26, 26, 27,
- 27, 26, 26, 26, 26, 25, 25, 25, 24, 22, 22, 22, 20, 20, 20, 19, 19, 19,
- 18, 17, 17, 17, 17, 16, 16, 16, 23, 24, 24, 24, 24, 25, 25, 25, 24, 24,
- 24, 24, 24, 24, 23, 22, 22, 21, 20, 19, 19, 18, 18, 18, 17, 16, 16, 16,
- 16, 15, 15, 15, 23, 24, 24, 24, 24, 25, 25, 25, 24, 24, 24, 24, 24, 24,
- 23, 22, 22, 21, 20, 19, 19, 18, 18, 18, 17, 16, 16, 16, 16, 15, 15, 15,
- 23, 23, 24, 24, 24, 25, 25, 24, 24, 24, 24, 24, 24, 24, 23, 21, 21, 21,
- 19, 19, 19, 18, 18, 18, 17, 16, 16, 16, 15, 15, 15, 15, 22, 22, 23, 23,
- 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 22, 20, 20, 20, 19, 18, 18, 18,
- 17, 17, 17, 16, 16, 15, 15, 15, 15, 14, 21, 22, 22, 22, 23, 23, 23, 23,
- 23, 23, 23, 23, 22, 22, 22, 20, 20, 20, 19, 18, 18, 17, 17, 17, 16, 15,
- 15, 15, 15, 14, 14, 14, 21, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
- 22, 22, 22, 20, 20, 20, 19, 18, 18, 17, 17, 17, 16, 15, 15, 15, 15, 14,
- 14, 14, 20, 20, 21, 21, 21, 22, 22, 22, 21, 21, 21, 21, 21, 21, 20, 19,
- 19, 19, 18, 18, 18, 17, 16, 16, 16, 15, 15, 15, 14, 14, 14, 13,
- /* Size 4x8 */
- 33, 32, 29, 24, 32, 31, 30, 25, 32, 30, 28, 24, 32, 29, 27, 24, 30, 28,
- 24, 21, 28, 26, 21, 18, 24, 24, 19, 16, 22, 22, 18, 15,
- /* Size 8x4 */
- 33, 32, 32, 32, 30, 28, 24, 22, 32, 31, 30, 29, 28, 26, 24, 22, 29, 30,
- 28, 27, 24, 21, 19, 18, 24, 25, 24, 24, 21, 18, 16, 15,
- /* Size 8x16 */
- 32, 33, 33, 32, 29, 28, 23, 22, 33, 32, 32, 32, 29, 29, 24, 23, 33, 32,
- 32, 32, 30, 29, 25, 23, 33, 32, 32, 31, 30, 30, 25, 23, 33, 32, 31, 30,
- 29, 28, 24, 23, 32, 32, 31, 30, 28, 28, 24, 23, 32, 31, 30, 29, 28, 27,
- 24, 23, 32, 31, 30, 28, 26, 26, 23, 22, 30, 30, 29, 28, 25, 24, 21, 20,
- 29, 30, 28, 27, 23, 22, 20, 19, 28, 30, 28, 27, 22, 21, 19, 18, 26, 28,
- 26, 26, 21, 20, 18, 17, 25, 26, 26, 25, 21, 20, 17, 17, 23, 25, 24, 24,
- 20, 19, 16, 16, 22, 23, 23, 23, 19, 18, 16, 15, 21, 23, 23, 22, 19, 18,
- 15, 15,
- /* Size 16x8 */
- 32, 33, 33, 33, 33, 32, 32, 32, 30, 29, 28, 26, 25, 23, 22, 21, 33, 32,
- 32, 32, 32, 32, 31, 31, 30, 30, 30, 28, 26, 25, 23, 23, 33, 32, 32, 32,
- 31, 31, 30, 30, 29, 28, 28, 26, 26, 24, 23, 23, 32, 32, 32, 31, 30, 30,
- 29, 28, 28, 27, 27, 26, 25, 24, 23, 22, 29, 29, 30, 30, 29, 28, 28, 26,
- 25, 23, 22, 21, 21, 20, 19, 19, 28, 29, 29, 30, 28, 28, 27, 26, 24, 22,
- 21, 20, 20, 19, 18, 18, 23, 24, 25, 25, 24, 24, 24, 23, 21, 20, 19, 18,
- 17, 16, 16, 15, 22, 23, 23, 23, 23, 23, 23, 22, 20, 19, 18, 17, 17, 16,
- 15, 15,
- /* Size 16x32 */
- 32, 33, 33, 33, 33, 32, 32, 32, 29, 28, 28, 26, 23, 23, 22, 19, 33, 33,
- 32, 32, 32, 32, 32, 31, 29, 29, 29, 26, 24, 24, 22, 20, 33, 32, 32, 32,
- 32, 32, 32, 31, 29, 29, 29, 26, 24, 24, 23, 20, 33, 32, 32, 32, 32, 32,
- 32, 31, 29, 29, 29, 26, 24, 24, 23, 20, 33, 32, 32, 32, 32, 32, 32, 31,
- 30, 29, 29, 26, 25, 25, 23, 20, 33, 32, 32, 32, 32, 31, 31, 31, 30, 30,
- 30, 27, 25, 25, 23, 21, 33, 32, 32, 32, 32, 31, 31, 31, 30, 30, 30, 27,
- 25, 25, 23, 21, 33, 32, 32, 32, 32, 31, 31, 31, 30, 29, 29, 27, 25, 25,
- 23, 21, 33, 32, 32, 32, 31, 30, 30, 30, 29, 28, 28, 26, 24, 24, 23, 21,
- 32, 32, 32, 32, 31, 30, 30, 30, 28, 28, 28, 26, 24, 24, 23, 20, 32, 32,
- 32, 32, 31, 30, 30, 30, 28, 28, 28, 26, 24, 24, 23, 20, 32, 32, 32, 32,
- 31, 29, 29, 29, 28, 28, 28, 26, 24, 24, 23, 21, 32, 32, 31, 31, 30, 29,
- 29, 28, 28, 27, 27, 25, 24, 24, 23, 21, 32, 32, 31, 31, 30, 29, 29, 28,
- 28, 27, 27, 25, 24, 24, 23, 21, 32, 31, 31, 31, 30, 28, 28, 28, 26, 26,
- 26, 24, 23, 23, 22, 20, 30, 30, 30, 30, 29, 28, 28, 27, 25, 24, 24, 23,
- 21, 21, 20, 19, 30, 30, 30, 30, 29, 28, 28, 27, 25, 24, 24, 23, 21, 21,
- 20, 19, 30, 30, 30, 30, 29, 28, 28, 27, 24, 24, 24, 22, 21, 21, 20, 19,
- 29, 29, 30, 30, 28, 27, 27, 26, 23, 22, 22, 20, 20, 20, 19, 17, 28, 29,
- 30, 30, 28, 27, 27, 26, 22, 21, 21, 20, 19, 19, 18, 17, 28, 29, 30, 30,
- 28, 27, 27, 26, 22, 21, 21, 20, 19, 19, 18, 17, 27, 28, 28, 28, 28, 26,
- 26, 25, 22, 21, 21, 19, 18, 18, 18, 16, 26, 27, 28, 28, 26, 26, 26, 24,
- 21, 20, 20, 19, 18, 18, 17, 16, 26, 27, 28, 28, 26, 26, 26, 24, 21, 20,
- 20, 19, 18, 18, 17, 16, 25, 26, 26, 26, 26, 25, 25, 24, 21, 20, 20, 18,
- 17, 17, 17, 15, 23, 24, 25, 25, 24, 24, 24, 23, 20, 19, 19, 17, 16, 16,
- 16, 14, 23, 24, 25, 25, 24, 24, 24, 23, 20, 19, 19, 17, 16, 16, 16, 14,
- 23, 24, 24, 24, 24, 24, 24, 23, 20, 19, 19, 17, 16, 16, 15, 14, 22, 23,
- 23, 23, 23, 23, 23, 22, 19, 18, 18, 17, 16, 16, 15, 14, 21, 22, 23, 23,
- 23, 22, 22, 21, 19, 18, 18, 17, 15, 15, 15, 13, 21, 22, 23, 23, 23, 22,
- 22, 21, 19, 18, 18, 17, 15, 15, 15, 13, 20, 21, 22, 22, 21, 21, 21, 20,
- 18, 18, 18, 16, 15, 15, 14, 13,
- /* Size 32x16 */
- 32, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 30, 30, 30,
- 29, 28, 28, 27, 26, 26, 25, 23, 23, 23, 22, 21, 21, 20, 33, 33, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 30, 30, 30, 29, 29, 29, 28,
- 27, 27, 26, 24, 24, 24, 23, 22, 22, 21, 33, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 31, 31, 31, 30, 30, 30, 30, 30, 30, 28, 28, 28, 26, 25,
- 25, 24, 23, 23, 23, 22, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 31, 31, 31, 30, 30, 30, 30, 30, 30, 28, 28, 28, 26, 25, 25, 24, 23, 23,
- 23, 22, 33, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, 29,
- 29, 29, 28, 28, 28, 28, 26, 26, 26, 24, 24, 24, 23, 23, 23, 21, 32, 32,
- 32, 32, 32, 31, 31, 31, 30, 30, 30, 29, 29, 29, 28, 28, 28, 28, 27, 27,
- 27, 26, 26, 26, 25, 24, 24, 24, 23, 22, 22, 21, 32, 32, 32, 32, 32, 31,
- 31, 31, 30, 30, 30, 29, 29, 29, 28, 28, 28, 28, 27, 27, 27, 26, 26, 26,
- 25, 24, 24, 24, 23, 22, 22, 21, 32, 31, 31, 31, 31, 31, 31, 31, 30, 30,
- 30, 29, 28, 28, 28, 27, 27, 27, 26, 26, 26, 25, 24, 24, 24, 23, 23, 23,
- 22, 21, 21, 20, 29, 29, 29, 29, 30, 30, 30, 30, 29, 28, 28, 28, 28, 28,
- 26, 25, 25, 24, 23, 22, 22, 22, 21, 21, 21, 20, 20, 20, 19, 19, 19, 18,
- 28, 29, 29, 29, 29, 30, 30, 29, 28, 28, 28, 28, 27, 27, 26, 24, 24, 24,
- 22, 21, 21, 21, 20, 20, 20, 19, 19, 19, 18, 18, 18, 18, 28, 29, 29, 29,
- 29, 30, 30, 29, 28, 28, 28, 28, 27, 27, 26, 24, 24, 24, 22, 21, 21, 21,
- 20, 20, 20, 19, 19, 19, 18, 18, 18, 18, 26, 26, 26, 26, 26, 27, 27, 27,
- 26, 26, 26, 26, 25, 25, 24, 23, 23, 22, 20, 20, 20, 19, 19, 19, 18, 17,
- 17, 17, 17, 17, 17, 16, 23, 24, 24, 24, 25, 25, 25, 25, 24, 24, 24, 24,
- 24, 24, 23, 21, 21, 21, 20, 19, 19, 18, 18, 18, 17, 16, 16, 16, 16, 15,
- 15, 15, 23, 24, 24, 24, 25, 25, 25, 25, 24, 24, 24, 24, 24, 24, 23, 21,
- 21, 21, 20, 19, 19, 18, 18, 18, 17, 16, 16, 16, 16, 15, 15, 15, 22, 22,
- 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 22, 20, 20, 20, 19, 18,
- 18, 18, 17, 17, 17, 16, 16, 15, 15, 15, 15, 14, 19, 20, 20, 20, 20, 21,
- 21, 21, 21, 20, 20, 21, 21, 21, 20, 19, 19, 19, 17, 17, 17, 16, 16, 16,
- 15, 14, 14, 14, 14, 13, 13, 13,
- /* Size 4x16 */
- 33, 32, 28, 23, 32, 32, 29, 24, 32, 32, 29, 25, 32, 31, 30, 25, 32, 30,
- 28, 24, 32, 30, 28, 24, 32, 29, 27, 24, 31, 28, 26, 23, 30, 28, 24, 21,
- 29, 27, 22, 20, 29, 27, 21, 19, 27, 26, 20, 18, 26, 25, 20, 17, 24, 24,
- 19, 16, 23, 23, 18, 16, 22, 22, 18, 15,
- /* Size 16x4 */
- 33, 32, 32, 32, 32, 32, 32, 31, 30, 29, 29, 27, 26, 24, 23, 22, 32, 32,
- 32, 31, 30, 30, 29, 28, 28, 27, 27, 26, 25, 24, 23, 22, 28, 29, 29, 30,
- 28, 28, 27, 26, 24, 22, 21, 20, 20, 19, 18, 18, 23, 24, 25, 25, 24, 24,
- 24, 23, 21, 20, 19, 18, 17, 16, 16, 15,
- /* Size 8x32 */
- 32, 33, 33, 32, 29, 28, 23, 22, 33, 32, 32, 32, 29, 29, 24, 22, 33, 32,
- 32, 32, 29, 29, 24, 23, 33, 32, 32, 32, 29, 29, 24, 23, 33, 32, 32, 32,
- 30, 29, 25, 23, 33, 32, 32, 31, 30, 30, 25, 23, 33, 32, 32, 31, 30, 30,
- 25, 23, 33, 32, 32, 31, 30, 29, 25, 23, 33, 32, 31, 30, 29, 28, 24, 23,
- 32, 32, 31, 30, 28, 28, 24, 23, 32, 32, 31, 30, 28, 28, 24, 23, 32, 32,
- 31, 29, 28, 28, 24, 23, 32, 31, 30, 29, 28, 27, 24, 23, 32, 31, 30, 29,
- 28, 27, 24, 23, 32, 31, 30, 28, 26, 26, 23, 22, 30, 30, 29, 28, 25, 24,
- 21, 20, 30, 30, 29, 28, 25, 24, 21, 20, 30, 30, 29, 28, 24, 24, 21, 20,
- 29, 30, 28, 27, 23, 22, 20, 19, 28, 30, 28, 27, 22, 21, 19, 18, 28, 30,
- 28, 27, 22, 21, 19, 18, 27, 28, 28, 26, 22, 21, 18, 18, 26, 28, 26, 26,
- 21, 20, 18, 17, 26, 28, 26, 26, 21, 20, 18, 17, 25, 26, 26, 25, 21, 20,
- 17, 17, 23, 25, 24, 24, 20, 19, 16, 16, 23, 25, 24, 24, 20, 19, 16, 16,
- 23, 24, 24, 24, 20, 19, 16, 15, 22, 23, 23, 23, 19, 18, 16, 15, 21, 23,
- 23, 22, 19, 18, 15, 15, 21, 23, 23, 22, 19, 18, 15, 15, 20, 22, 21, 21,
- 18, 18, 15, 14,
- /* Size 32x8 */
- 32, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 30, 30, 30,
- 29, 28, 28, 27, 26, 26, 25, 23, 23, 23, 22, 21, 21, 20, 33, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 30, 30, 30, 30, 30, 30, 28,
- 28, 28, 26, 25, 25, 24, 23, 23, 23, 22, 33, 32, 32, 32, 32, 32, 32, 32,
- 31, 31, 31, 31, 30, 30, 30, 29, 29, 29, 28, 28, 28, 28, 26, 26, 26, 24,
- 24, 24, 23, 23, 23, 21, 32, 32, 32, 32, 32, 31, 31, 31, 30, 30, 30, 29,
- 29, 29, 28, 28, 28, 28, 27, 27, 27, 26, 26, 26, 25, 24, 24, 24, 23, 22,
- 22, 21, 29, 29, 29, 29, 30, 30, 30, 30, 29, 28, 28, 28, 28, 28, 26, 25,
- 25, 24, 23, 22, 22, 22, 21, 21, 21, 20, 20, 20, 19, 19, 19, 18, 28, 29,
- 29, 29, 29, 30, 30, 29, 28, 28, 28, 28, 27, 27, 26, 24, 24, 24, 22, 21,
- 21, 21, 20, 20, 20, 19, 19, 19, 18, 18, 18, 18, 23, 24, 24, 24, 25, 25,
- 25, 25, 24, 24, 24, 24, 24, 24, 23, 21, 21, 21, 20, 19, 19, 18, 18, 18,
- 17, 16, 16, 16, 16, 15, 15, 15, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
- 23, 23, 23, 23, 22, 20, 20, 20, 19, 18, 18, 18, 17, 17, 17, 16, 16, 15,
- 15, 15, 15, 14 },
- { /* Chroma */
- /* Size 4x4 */
- 33, 28, 22, 22, 28, 23, 22, 23, 22, 22, 19, 19, 22, 23, 19, 17,
- /* Size 8x8 */
- 33, 33, 30, 28, 24, 21, 22, 21, 33, 32, 29, 26, 24, 22, 23, 22, 30, 29,
- 26, 24, 23, 22, 23, 22, 28, 26, 24, 22, 22, 22, 23, 22, 24, 24, 23, 22,
- 21, 20, 20, 20, 21, 22, 22, 22, 20, 19, 19, 19, 22, 23, 23, 23, 20, 19,
- 18, 17, 21, 22, 22, 22, 20, 19, 17, 17,
- /* Size 16x16 */
- 32, 33, 33, 34, 31, 31, 28, 27, 25, 22, 21, 21, 21, 21, 20, 20, 33, 33,
- 33, 33, 30, 30, 27, 26, 24, 22, 22, 22, 22, 22, 21, 21, 33, 33, 33, 33,
- 30, 29, 26, 26, 24, 22, 22, 22, 22, 22, 22, 22, 34, 33, 33, 32, 30, 29,
- 26, 25, 24, 23, 22, 23, 23, 23, 22, 22, 31, 30, 30, 30, 28, 27, 24, 24,
- 23, 22, 22, 22, 22, 23, 22, 22, 31, 30, 29, 29, 27, 26, 24, 23, 23, 22,
- 22, 22, 22, 23, 22, 22, 28, 27, 26, 26, 24, 24, 22, 22, 22, 22, 21, 22,
- 22, 23, 22, 22, 27, 26, 26, 25, 24, 23, 22, 22, 21, 21, 21, 21, 22, 22,
- 22, 22, 25, 24, 24, 24, 23, 23, 22, 21, 21, 20, 20, 21, 21, 21, 20, 20,
- 22, 22, 22, 23, 22, 22, 22, 21, 20, 20, 20, 20, 20, 20, 19, 19, 21, 22,
- 22, 22, 22, 22, 21, 21, 20, 20, 19, 19, 19, 19, 19, 19, 21, 22, 22, 23,
- 22, 22, 22, 21, 21, 20, 19, 19, 19, 19, 18, 18, 21, 22, 22, 23, 22, 22,
- 22, 22, 21, 20, 19, 19, 19, 18, 18, 18, 21, 22, 22, 23, 23, 23, 23, 22,
- 21, 20, 19, 19, 18, 18, 17, 17, 20, 21, 22, 22, 22, 22, 22, 22, 20, 19,
- 19, 18, 18, 17, 17, 17, 20, 21, 22, 22, 22, 22, 22, 22, 20, 19, 19, 18,
- 18, 17, 17, 17,
- /* Size 32x32 */
- 32, 33, 33, 33, 33, 34, 34, 33, 31, 31, 31, 29, 28, 28, 27, 25, 25, 24,
- 22, 21, 21, 21, 21, 21, 21, 21, 21, 21, 20, 20, 20, 20, 33, 33, 33, 33,
- 33, 33, 33, 33, 31, 30, 30, 28, 28, 28, 26, 24, 24, 24, 22, 21, 21, 21,
- 22, 22, 22, 22, 22, 21, 21, 21, 21, 20, 33, 33, 33, 33, 33, 33, 33, 32,
- 30, 30, 30, 28, 27, 27, 26, 24, 24, 24, 22, 22, 22, 22, 22, 22, 22, 22,
- 22, 22, 21, 21, 21, 21, 33, 33, 33, 33, 33, 33, 33, 32, 30, 30, 30, 28,
- 27, 27, 26, 24, 24, 24, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21,
- 21, 21, 33, 33, 33, 33, 33, 33, 33, 32, 30, 29, 29, 28, 26, 26, 26, 24,
- 24, 24, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 34, 33,
- 33, 33, 33, 32, 32, 32, 30, 29, 29, 27, 26, 26, 25, 24, 24, 24, 23, 22,
- 22, 22, 23, 23, 23, 23, 23, 23, 22, 22, 22, 22, 34, 33, 33, 33, 33, 32,
- 32, 32, 30, 29, 29, 27, 26, 26, 25, 24, 24, 24, 23, 22, 22, 22, 23, 23,
- 23, 23, 23, 23, 22, 22, 22, 22, 33, 33, 32, 32, 32, 32, 32, 31, 29, 28,
- 28, 27, 26, 26, 25, 24, 24, 24, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23,
- 22, 22, 22, 22, 31, 31, 30, 30, 30, 30, 30, 29, 28, 27, 27, 25, 24, 24,
- 24, 23, 23, 23, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 22, 22, 22, 22,
- 31, 30, 30, 30, 29, 29, 29, 28, 27, 26, 26, 25, 24, 24, 23, 23, 23, 23,
- 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 22, 22, 22, 22, 31, 30, 30, 30,
- 29, 29, 29, 28, 27, 26, 26, 25, 24, 24, 23, 23, 23, 23, 22, 22, 22, 22,
- 22, 22, 22, 23, 23, 23, 22, 22, 22, 22, 29, 28, 28, 28, 28, 27, 27, 27,
- 25, 25, 25, 23, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23,
- 23, 23, 22, 22, 22, 22, 28, 28, 27, 27, 26, 26, 26, 26, 24, 24, 24, 22,
- 22, 22, 22, 22, 22, 22, 22, 21, 21, 22, 22, 22, 22, 23, 23, 23, 22, 22,
- 22, 22, 28, 28, 27, 27, 26, 26, 26, 26, 24, 24, 24, 22, 22, 22, 22, 22,
- 22, 22, 22, 21, 21, 22, 22, 22, 22, 23, 23, 23, 22, 22, 22, 22, 27, 26,
- 26, 26, 26, 25, 25, 25, 24, 23, 23, 22, 22, 22, 22, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 21, 25, 24, 24, 24, 24, 24,
- 24, 24, 23, 23, 23, 22, 22, 22, 21, 21, 21, 21, 20, 20, 20, 20, 21, 21,
- 21, 21, 21, 21, 20, 20, 20, 20, 25, 24, 24, 24, 24, 24, 24, 24, 23, 23,
- 23, 22, 22, 22, 21, 21, 21, 21, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21,
- 20, 20, 20, 20, 24, 24, 24, 24, 24, 24, 24, 24, 23, 23, 23, 22, 22, 22,
- 21, 21, 21, 21, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 22, 22, 22, 22, 22, 23, 23, 22, 22, 22, 22, 22, 22, 22, 21, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 19, 19, 19, 19, 21, 21, 22, 22,
- 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 20, 20, 20, 20, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 21, 21, 22, 22, 22, 22, 22, 22,
- 22, 22, 22, 22, 21, 21, 21, 20, 20, 20, 20, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
- 22, 22, 21, 20, 20, 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 18, 21, 22, 22, 22, 22, 23, 23, 23, 22, 22, 22, 22, 22, 22, 21, 21,
- 21, 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18, 18, 18, 18, 21, 22,
- 22, 22, 22, 23, 23, 23, 22, 22, 22, 22, 22, 22, 21, 21, 21, 20, 20, 19,
- 19, 19, 19, 19, 19, 19, 19, 18, 18, 18, 18, 18, 21, 22, 22, 22, 22, 23,
- 23, 23, 22, 22, 22, 22, 22, 22, 22, 21, 21, 20, 20, 19, 19, 19, 19, 19,
- 19, 18, 18, 18, 18, 18, 18, 18, 21, 22, 22, 22, 22, 23, 23, 23, 23, 23,
- 23, 23, 23, 23, 22, 21, 21, 20, 20, 19, 19, 19, 19, 19, 18, 18, 18, 17,
- 17, 17, 17, 17, 21, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 23,
- 22, 21, 21, 20, 20, 19, 19, 19, 19, 19, 18, 18, 18, 17, 17, 17, 17, 17,
- 21, 21, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 23, 22, 21, 21, 20,
- 20, 19, 19, 19, 18, 18, 18, 17, 17, 17, 17, 17, 17, 17, 20, 21, 21, 21,
- 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 20, 20, 20, 19, 19, 19, 19,
- 18, 18, 18, 17, 17, 17, 17, 17, 17, 17, 20, 21, 21, 21, 22, 22, 22, 22,
- 22, 22, 22, 22, 22, 22, 22, 20, 20, 20, 19, 19, 19, 19, 18, 18, 18, 17,
- 17, 17, 17, 17, 17, 16, 20, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22,
- 22, 22, 22, 20, 20, 20, 19, 19, 19, 19, 18, 18, 18, 17, 17, 17, 17, 17,
- 17, 16, 20, 20, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 20,
- 20, 20, 19, 19, 19, 18, 18, 18, 18, 17, 17, 17, 17, 16, 16, 16,
- /* Size 4x8 */
- 33, 27, 22, 21, 33, 26, 22, 23, 29, 24, 22, 22, 26, 22, 22, 23, 24, 22,
- 20, 20, 22, 22, 19, 19, 22, 22, 19, 18, 21, 22, 19, 17,
- /* Size 8x4 */
- 33, 33, 29, 26, 24, 22, 22, 21, 27, 26, 24, 22, 22, 22, 22, 22, 22, 22,
- 22, 22, 20, 19, 19, 19, 21, 23, 22, 23, 20, 19, 18, 17,
- /* Size 8x16 */
- 32, 33, 31, 28, 23, 21, 21, 20, 33, 33, 30, 27, 23, 22, 22, 21, 33, 32,
- 30, 26, 23, 22, 22, 22, 34, 32, 29, 26, 23, 22, 23, 22, 31, 29, 28, 24,
- 22, 22, 23, 22, 31, 28, 27, 24, 22, 22, 22, 22, 28, 26, 24, 22, 22, 22,
- 23, 22, 26, 25, 24, 22, 21, 21, 22, 22, 24, 24, 23, 22, 21, 20, 21, 20,
- 22, 22, 22, 21, 20, 20, 19, 19, 21, 22, 22, 21, 20, 19, 19, 19, 21, 22,
- 22, 22, 20, 19, 18, 18, 21, 23, 22, 22, 20, 19, 18, 18, 21, 23, 23, 22,
- 20, 19, 18, 17, 20, 22, 22, 22, 20, 19, 17, 17, 20, 22, 22, 22, 20, 19,
- 17, 17,
- /* Size 16x8 */
- 32, 33, 33, 34, 31, 31, 28, 26, 24, 22, 21, 21, 21, 21, 20, 20, 33, 33,
- 32, 32, 29, 28, 26, 25, 24, 22, 22, 22, 23, 23, 22, 22, 31, 30, 30, 29,
- 28, 27, 24, 24, 23, 22, 22, 22, 22, 23, 22, 22, 28, 27, 26, 26, 24, 24,
- 22, 22, 22, 21, 21, 22, 22, 22, 22, 22, 23, 23, 23, 23, 22, 22, 22, 21,
- 21, 20, 20, 20, 20, 20, 20, 20, 21, 22, 22, 22, 22, 22, 22, 21, 20, 20,
- 19, 19, 19, 19, 19, 19, 21, 22, 22, 23, 23, 22, 23, 22, 21, 19, 19, 18,
- 18, 18, 17, 17, 20, 21, 22, 22, 22, 22, 22, 22, 20, 19, 19, 18, 18, 17,
- 17, 17,
- /* Size 16x32 */
- 32, 33, 33, 33, 31, 28, 28, 27, 23, 21, 21, 21, 21, 21, 20, 20, 33, 33,
- 33, 33, 31, 27, 27, 26, 23, 22, 22, 21, 21, 21, 21, 20, 33, 33, 33, 33,
- 30, 27, 27, 26, 23, 22, 22, 22, 22, 22, 21, 20, 33, 33, 33, 33, 30, 27,
- 27, 26, 23, 22, 22, 22, 22, 22, 21, 20, 33, 33, 32, 32, 30, 26, 26, 26,
- 23, 22, 22, 22, 22, 22, 22, 21, 34, 33, 32, 32, 29, 26, 26, 25, 23, 22,
- 22, 23, 23, 23, 22, 21, 34, 33, 32, 32, 29, 26, 26, 25, 23, 22, 22, 23,
- 23, 23, 22, 21, 33, 32, 31, 31, 29, 26, 26, 25, 23, 22, 22, 23, 23, 23,
- 22, 21, 31, 30, 29, 29, 28, 24, 24, 24, 22, 22, 22, 22, 23, 23, 22, 22,
- 31, 29, 28, 28, 27, 24, 24, 23, 22, 22, 22, 22, 22, 22, 22, 22, 31, 29,
- 28, 28, 27, 24, 24, 23, 22, 22, 22, 22, 22, 22, 22, 22, 29, 28, 27, 27,
- 25, 23, 23, 22, 22, 22, 22, 22, 23, 23, 22, 22, 28, 26, 26, 26, 24, 22,
- 22, 22, 22, 22, 22, 22, 23, 23, 22, 22, 28, 26, 26, 26, 24, 22, 22, 22,
- 22, 22, 22, 22, 23, 23, 22, 22, 26, 26, 25, 25, 24, 22, 22, 22, 21, 21,
- 21, 22, 22, 22, 22, 21, 24, 24, 24, 24, 23, 22, 22, 21, 21, 20, 20, 21,
- 21, 21, 20, 20, 24, 24, 24, 24, 23, 22, 22, 21, 21, 20, 20, 21, 21, 21,
- 20, 20, 24, 24, 24, 24, 23, 22, 22, 21, 20, 20, 20, 20, 20, 20, 20, 20,
- 22, 22, 22, 22, 22, 21, 21, 21, 20, 20, 20, 20, 19, 19, 19, 19, 21, 22,
- 22, 22, 22, 21, 21, 21, 20, 19, 19, 19, 19, 19, 19, 19, 21, 22, 22, 22,
- 22, 21, 21, 21, 20, 19, 19, 19, 19, 19, 19, 19, 21, 22, 22, 22, 22, 22,
- 22, 21, 20, 19, 19, 19, 19, 19, 19, 18, 21, 22, 22, 22, 22, 22, 22, 21,
- 20, 19, 19, 19, 18, 18, 18, 18, 21, 22, 22, 22, 22, 22, 22, 21, 20, 19,
- 19, 19, 18, 18, 18, 18, 21, 22, 23, 23, 22, 22, 22, 22, 20, 19, 19, 19,
- 18, 18, 18, 17, 21, 22, 23, 23, 23, 22, 22, 22, 20, 19, 19, 18, 18, 18,
- 17, 17, 21, 22, 23, 23, 23, 22, 22, 22, 20, 19, 19, 18, 18, 18, 17, 17,
- 21, 22, 23, 23, 23, 22, 22, 22, 20, 19, 19, 18, 18, 18, 17, 17, 20, 21,
- 22, 22, 22, 22, 22, 21, 20, 19, 19, 18, 17, 17, 17, 16, 20, 21, 22, 22,
- 22, 22, 22, 21, 20, 19, 19, 18, 17, 17, 17, 16, 20, 21, 22, 22, 22, 22,
- 22, 21, 20, 19, 19, 18, 17, 17, 17, 16, 20, 21, 22, 22, 22, 22, 22, 21,
- 20, 19, 19, 18, 17, 17, 17, 16,
- /* Size 32x16 */
- 32, 33, 33, 33, 33, 34, 34, 33, 31, 31, 31, 29, 28, 28, 26, 24, 24, 24,
- 22, 21, 21, 21, 21, 21, 21, 21, 21, 21, 20, 20, 20, 20, 33, 33, 33, 33,
- 33, 33, 33, 32, 30, 29, 29, 28, 26, 26, 26, 24, 24, 24, 22, 22, 22, 22,
- 22, 22, 22, 22, 22, 22, 21, 21, 21, 21, 33, 33, 33, 33, 32, 32, 32, 31,
- 29, 28, 28, 27, 26, 26, 25, 24, 24, 24, 22, 22, 22, 22, 22, 22, 23, 23,
- 23, 23, 22, 22, 22, 22, 33, 33, 33, 33, 32, 32, 32, 31, 29, 28, 28, 27,
- 26, 26, 25, 24, 24, 24, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 22, 22,
- 22, 22, 31, 31, 30, 30, 30, 29, 29, 29, 28, 27, 27, 25, 24, 24, 24, 23,
- 23, 23, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 22, 22, 22, 22, 28, 27,
- 27, 27, 26, 26, 26, 26, 24, 24, 24, 23, 22, 22, 22, 22, 22, 22, 21, 21,
- 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 28, 27, 27, 27, 26, 26,
- 26, 26, 24, 24, 24, 23, 22, 22, 22, 22, 22, 22, 21, 21, 21, 22, 22, 22,
- 22, 22, 22, 22, 22, 22, 22, 22, 27, 26, 26, 26, 26, 25, 25, 25, 24, 23,
- 23, 22, 22, 22, 22, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22,
- 21, 21, 21, 21, 23, 23, 23, 23, 23, 23, 23, 23, 22, 22, 22, 22, 22, 22,
- 21, 21, 21, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 20, 20, 20,
- 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 21, 22, 22, 22,
- 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 20, 20, 20, 20, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 21, 21, 22, 22, 22, 23, 23, 23,
- 22, 22, 22, 22, 22, 22, 22, 21, 21, 20, 20, 19, 19, 19, 19, 19, 19, 18,
- 18, 18, 18, 18, 18, 18, 21, 21, 22, 22, 22, 23, 23, 23, 23, 22, 22, 23,
- 23, 23, 22, 21, 21, 20, 19, 19, 19, 19, 18, 18, 18, 18, 18, 18, 17, 17,
- 17, 17, 21, 21, 22, 22, 22, 23, 23, 23, 23, 22, 22, 23, 23, 23, 22, 21,
- 21, 20, 19, 19, 19, 19, 18, 18, 18, 18, 18, 18, 17, 17, 17, 17, 20, 21,
- 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 20, 20, 20, 19, 19,
- 19, 19, 18, 18, 18, 17, 17, 17, 17, 17, 17, 17, 20, 20, 20, 20, 21, 21,
- 21, 21, 22, 22, 22, 22, 22, 22, 21, 20, 20, 20, 19, 19, 19, 18, 18, 18,
- 17, 17, 17, 17, 16, 16, 16, 16,
- /* Size 4x16 */
- 33, 28, 21, 21, 33, 27, 22, 22, 33, 26, 22, 22, 33, 26, 22, 23, 30, 24,
- 22, 23, 29, 24, 22, 22, 26, 22, 22, 23, 26, 22, 21, 22, 24, 22, 20, 21,
- 22, 21, 20, 19, 22, 21, 19, 19, 22, 22, 19, 18, 22, 22, 19, 18, 22, 22,
- 19, 18, 21, 22, 19, 17, 21, 22, 19, 17,
- /* Size 16x4 */
- 33, 33, 33, 33, 30, 29, 26, 26, 24, 22, 22, 22, 22, 22, 21, 21, 28, 27,
- 26, 26, 24, 24, 22, 22, 22, 21, 21, 22, 22, 22, 22, 22, 21, 22, 22, 22,
- 22, 22, 22, 21, 20, 20, 19, 19, 19, 19, 19, 19, 21, 22, 22, 23, 23, 22,
- 23, 22, 21, 19, 19, 18, 18, 18, 17, 17,
- /* Size 8x32 */
- 32, 33, 31, 28, 23, 21, 21, 20, 33, 33, 31, 27, 23, 22, 21, 21, 33, 33,
- 30, 27, 23, 22, 22, 21, 33, 33, 30, 27, 23, 22, 22, 21, 33, 32, 30, 26,
- 23, 22, 22, 22, 34, 32, 29, 26, 23, 22, 23, 22, 34, 32, 29, 26, 23, 22,
- 23, 22, 33, 31, 29, 26, 23, 22, 23, 22, 31, 29, 28, 24, 22, 22, 23, 22,
- 31, 28, 27, 24, 22, 22, 22, 22, 31, 28, 27, 24, 22, 22, 22, 22, 29, 27,
- 25, 23, 22, 22, 23, 22, 28, 26, 24, 22, 22, 22, 23, 22, 28, 26, 24, 22,
- 22, 22, 23, 22, 26, 25, 24, 22, 21, 21, 22, 22, 24, 24, 23, 22, 21, 20,
- 21, 20, 24, 24, 23, 22, 21, 20, 21, 20, 24, 24, 23, 22, 20, 20, 20, 20,
- 22, 22, 22, 21, 20, 20, 19, 19, 21, 22, 22, 21, 20, 19, 19, 19, 21, 22,
- 22, 21, 20, 19, 19, 19, 21, 22, 22, 22, 20, 19, 19, 19, 21, 22, 22, 22,
- 20, 19, 18, 18, 21, 22, 22, 22, 20, 19, 18, 18, 21, 23, 22, 22, 20, 19,
- 18, 18, 21, 23, 23, 22, 20, 19, 18, 17, 21, 23, 23, 22, 20, 19, 18, 17,
- 21, 23, 23, 22, 20, 19, 18, 17, 20, 22, 22, 22, 20, 19, 17, 17, 20, 22,
- 22, 22, 20, 19, 17, 17, 20, 22, 22, 22, 20, 19, 17, 17, 20, 22, 22, 22,
- 20, 19, 17, 17,
- /* Size 32x8 */
- 32, 33, 33, 33, 33, 34, 34, 33, 31, 31, 31, 29, 28, 28, 26, 24, 24, 24,
- 22, 21, 21, 21, 21, 21, 21, 21, 21, 21, 20, 20, 20, 20, 33, 33, 33, 33,
- 32, 32, 32, 31, 29, 28, 28, 27, 26, 26, 25, 24, 24, 24, 22, 22, 22, 22,
- 22, 22, 23, 23, 23, 23, 22, 22, 22, 22, 31, 31, 30, 30, 30, 29, 29, 29,
- 28, 27, 27, 25, 24, 24, 24, 23, 23, 23, 22, 22, 22, 22, 22, 22, 22, 23,
- 23, 23, 22, 22, 22, 22, 28, 27, 27, 27, 26, 26, 26, 26, 24, 24, 24, 23,
- 22, 22, 22, 22, 22, 22, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22,
- 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 22, 22, 22, 22, 22, 22, 21, 21,
- 21, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21, 22,
- 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 20, 20, 20, 20, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 21, 21, 22, 22, 22, 23,
- 23, 23, 23, 22, 22, 23, 23, 23, 22, 21, 21, 20, 19, 19, 19, 19, 18, 18,
- 18, 18, 18, 18, 17, 17, 17, 17, 20, 21, 21, 21, 22, 22, 22, 22, 22, 22,
- 22, 22, 22, 22, 22, 20, 20, 20, 19, 19, 19, 19, 18, 18, 18, 17, 17, 17,
- 17, 17, 17, 17 },
- },
- {
- { /* Luma */
- /* Size 4x4 */
- 32, 32, 30, 27, 32, 31, 29, 26, 30, 29, 26, 23, 27, 26, 23, 19,
- /* Size 8x8 */
- 33, 33, 32, 32, 31, 30, 28, 25, 33, 32, 32, 32, 31, 30, 28, 26, 32, 32,
- 32, 31, 30, 29, 28, 26, 32, 32, 31, 30, 29, 28, 27, 25, 31, 31, 30, 29,
- 28, 26, 25, 23, 30, 30, 29, 28, 26, 24, 22, 21, 28, 28, 28, 27, 25, 22,
- 20, 19, 25, 26, 26, 25, 23, 21, 19, 18,
- /* Size 16x16 */
- 32, 33, 33, 33, 33, 33, 33, 32, 32, 30, 30, 28, 28, 26, 26, 23, 33, 32,
- 32, 32, 32, 32, 32, 32, 32, 30, 30, 29, 29, 27, 27, 24, 33, 32, 32, 32,
- 32, 32, 32, 32, 32, 30, 30, 29, 29, 27, 27, 24, 33, 32, 32, 32, 32, 32,
- 32, 32, 32, 31, 31, 30, 30, 28, 28, 25, 33, 32, 32, 32, 32, 32, 32, 32,
- 32, 31, 31, 30, 30, 28, 28, 25, 33, 32, 32, 32, 32, 31, 31, 30, 30, 29,
- 29, 28, 28, 26, 26, 24, 33, 32, 32, 32, 32, 31, 31, 30, 30, 29, 29, 28,
- 28, 26, 26, 24, 32, 32, 32, 32, 32, 30, 30, 29, 29, 28, 28, 27, 27, 26,
- 26, 24, 32, 32, 32, 32, 32, 30, 30, 29, 29, 28, 28, 27, 27, 26, 26, 24,
- 30, 30, 30, 31, 31, 29, 29, 28, 28, 26, 26, 24, 24, 23, 23, 22, 30, 30,
- 30, 31, 31, 29, 29, 28, 28, 26, 26, 24, 24, 23, 23, 22, 28, 29, 29, 30,
- 30, 28, 28, 27, 27, 24, 24, 21, 21, 20, 20, 19, 28, 29, 29, 30, 30, 28,
- 28, 27, 27, 24, 24, 21, 21, 20, 20, 19, 26, 27, 27, 28, 28, 26, 26, 26,
- 26, 23, 23, 20, 20, 19, 19, 18, 26, 27, 27, 28, 28, 26, 26, 26, 26, 23,
- 23, 20, 20, 19, 19, 18, 23, 24, 24, 25, 25, 24, 24, 24, 24, 22, 22, 19,
- 19, 18, 18, 16,
- /* Size 32x32 */
- 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 31,
- 30, 30, 30, 29, 28, 28, 28, 28, 26, 26, 26, 25, 23, 23, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 31, 30, 30, 30, 30,
- 29, 29, 29, 28, 26, 26, 26, 25, 24, 24, 33, 33, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 30, 30, 30, 30, 29, 29, 29, 28,
- 27, 27, 27, 26, 24, 24, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 31, 30, 30, 30, 30, 29, 29, 29, 28, 27, 27, 27, 26,
- 24, 24, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 31, 30, 30, 30, 30, 29, 29, 29, 28, 27, 27, 27, 26, 24, 24, 33, 33,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 30, 30,
- 30, 30, 29, 29, 29, 28, 27, 27, 27, 26, 25, 25, 33, 33, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 30,
- 30, 28, 28, 28, 28, 26, 25, 25, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, 30, 28, 28, 28,
- 28, 26, 25, 25, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, 30, 28, 28, 28, 28, 26, 25, 25,
- 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 30,
- 30, 30, 30, 29, 29, 29, 29, 28, 27, 27, 27, 26, 25, 25, 33, 32, 32, 32,
- 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 30, 30, 30, 30, 29, 29, 29, 28,
- 28, 28, 28, 27, 26, 26, 26, 26, 24, 24, 33, 32, 32, 32, 32, 32, 32, 32,
- 32, 31, 31, 31, 31, 31, 30, 30, 30, 30, 29, 29, 29, 28, 28, 28, 28, 27,
- 26, 26, 26, 26, 24, 24, 33, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31,
- 31, 31, 30, 30, 30, 30, 29, 29, 29, 28, 28, 28, 28, 27, 26, 26, 26, 26,
- 24, 24, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 30,
- 30, 29, 28, 28, 28, 28, 28, 28, 28, 27, 26, 26, 26, 25, 24, 24, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 31, 30, 30, 30, 30, 29, 29, 29, 28, 28, 28,
- 28, 28, 27, 27, 27, 26, 26, 26, 26, 25, 24, 24, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 31, 30, 30, 30, 30, 29, 29, 29, 28, 28, 28, 28, 28, 27, 27,
- 27, 26, 26, 26, 26, 25, 24, 24, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31,
- 30, 30, 30, 30, 29, 29, 29, 28, 28, 28, 28, 28, 27, 27, 27, 26, 26, 26,
- 26, 25, 24, 24, 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 29,
- 28, 28, 28, 28, 27, 27, 27, 26, 26, 26, 26, 25, 24, 24, 24, 23, 23, 23,
- 30, 30, 30, 30, 30, 30, 31, 31, 31, 30, 29, 29, 29, 28, 28, 28, 28, 27,
- 26, 26, 26, 25, 24, 24, 24, 23, 23, 23, 23, 22, 22, 22, 30, 30, 30, 30,
- 30, 30, 31, 31, 31, 30, 29, 29, 29, 28, 28, 28, 28, 27, 26, 26, 26, 25,
- 24, 24, 24, 23, 23, 23, 23, 22, 22, 22, 30, 30, 30, 30, 30, 30, 31, 31,
- 31, 30, 29, 29, 29, 28, 28, 28, 28, 27, 26, 26, 26, 25, 24, 24, 24, 23,
- 23, 23, 23, 22, 22, 22, 29, 30, 30, 30, 30, 30, 30, 30, 30, 29, 28, 28,
- 28, 28, 28, 28, 28, 26, 25, 25, 25, 24, 23, 23, 23, 22, 22, 22, 22, 21,
- 20, 20, 28, 29, 29, 29, 29, 29, 30, 30, 30, 29, 28, 28, 28, 28, 27, 27,
- 27, 26, 24, 24, 24, 23, 21, 21, 21, 21, 20, 20, 20, 20, 19, 19, 28, 29,
- 29, 29, 29, 29, 30, 30, 30, 29, 28, 28, 28, 28, 27, 27, 27, 26, 24, 24,
- 24, 23, 21, 21, 21, 21, 20, 20, 20, 20, 19, 19, 28, 29, 29, 29, 29, 29,
- 30, 30, 30, 29, 28, 28, 28, 28, 27, 27, 27, 26, 24, 24, 24, 23, 21, 21,
- 21, 21, 20, 20, 20, 20, 19, 19, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
- 27, 27, 27, 27, 26, 26, 26, 25, 23, 23, 23, 22, 21, 21, 21, 20, 20, 20,
- 20, 19, 18, 18, 26, 26, 27, 27, 27, 27, 28, 28, 28, 27, 26, 26, 26, 26,
- 26, 26, 26, 24, 23, 23, 23, 22, 20, 20, 20, 20, 19, 19, 19, 18, 18, 18,
- 26, 26, 27, 27, 27, 27, 28, 28, 28, 27, 26, 26, 26, 26, 26, 26, 26, 24,
- 23, 23, 23, 22, 20, 20, 20, 20, 19, 19, 19, 18, 18, 18, 26, 26, 27, 27,
- 27, 27, 28, 28, 28, 27, 26, 26, 26, 26, 26, 26, 26, 24, 23, 23, 23, 22,
- 20, 20, 20, 20, 19, 19, 19, 18, 18, 18, 25, 25, 26, 26, 26, 26, 26, 26,
- 26, 26, 26, 26, 26, 25, 25, 25, 25, 23, 22, 22, 22, 21, 20, 20, 20, 19,
- 18, 18, 18, 18, 17, 17, 23, 24, 24, 24, 24, 25, 25, 25, 25, 25, 24, 24,
- 24, 24, 24, 24, 24, 23, 22, 22, 22, 20, 19, 19, 19, 18, 18, 18, 18, 17,
- 16, 16, 23, 24, 24, 24, 24, 25, 25, 25, 25, 25, 24, 24, 24, 24, 24, 24,
- 24, 23, 22, 22, 22, 20, 19, 19, 19, 18, 18, 18, 18, 17, 16, 16,
- /* Size 4x8 */
- 33, 32, 30, 26, 32, 32, 30, 27, 32, 31, 30, 27, 32, 31, 28, 26, 31, 30,
- 27, 24, 30, 28, 25, 22, 28, 27, 23, 20, 26, 26, 22, 18,
- /* Size 8x4 */
- 33, 32, 32, 32, 31, 30, 28, 26, 32, 32, 31, 31, 30, 28, 27, 26, 30, 30,
- 30, 28, 27, 25, 23, 22, 26, 27, 27, 26, 24, 22, 20, 18,
- /* Size 8x16 */
- 32, 33, 33, 32, 32, 28, 28, 23, 33, 32, 32, 32, 32, 29, 29, 24, 33, 32,
- 32, 32, 32, 29, 29, 24, 33, 32, 32, 31, 31, 30, 30, 25, 33, 32, 32, 31,
- 31, 30, 30, 25, 32, 32, 32, 30, 30, 28, 28, 24, 32, 32, 32, 30, 30, 28,
- 28, 24, 32, 31, 31, 29, 29, 27, 27, 24, 32, 31, 31, 29, 29, 27, 27, 24,
- 30, 30, 30, 28, 28, 24, 24, 21, 30, 30, 30, 28, 28, 24, 24, 21, 28, 30,
- 30, 27, 27, 21, 21, 19, 28, 30, 30, 27, 27, 21, 21, 19, 26, 28, 28, 26,
- 26, 20, 20, 18, 26, 28, 28, 26, 26, 20, 20, 18, 23, 25, 25, 24, 24, 19,
- 19, 16,
- /* Size 16x8 */
- 32, 33, 33, 33, 33, 32, 32, 32, 32, 30, 30, 28, 28, 26, 26, 23, 33, 32,
- 32, 32, 32, 32, 32, 31, 31, 30, 30, 30, 30, 28, 28, 25, 33, 32, 32, 32,
- 32, 32, 32, 31, 31, 30, 30, 30, 30, 28, 28, 25, 32, 32, 32, 31, 31, 30,
- 30, 29, 29, 28, 28, 27, 27, 26, 26, 24, 32, 32, 32, 31, 31, 30, 30, 29,
- 29, 28, 28, 27, 27, 26, 26, 24, 28, 29, 29, 30, 30, 28, 28, 27, 27, 24,
- 24, 21, 21, 20, 20, 19, 28, 29, 29, 30, 30, 28, 28, 27, 27, 24, 24, 21,
- 21, 20, 20, 19, 23, 24, 24, 25, 25, 24, 24, 24, 24, 21, 21, 19, 19, 18,
- 18, 16,
- /* Size 16x32 */
- 32, 33, 33, 33, 33, 32, 32, 32, 32, 30, 28, 28, 28, 26, 23, 23, 33, 33,
- 33, 33, 33, 32, 32, 32, 32, 30, 29, 29, 29, 26, 24, 24, 33, 32, 32, 32,
- 32, 32, 32, 32, 32, 30, 29, 29, 29, 27, 24, 24, 33, 32, 32, 32, 32, 32,
- 32, 32, 32, 30, 29, 29, 29, 27, 24, 24, 33, 32, 32, 32, 32, 32, 32, 32,
- 32, 30, 29, 29, 29, 27, 24, 24, 33, 32, 32, 32, 32, 32, 32, 32, 32, 30,
- 29, 29, 29, 27, 25, 25, 33, 32, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30,
- 30, 28, 25, 25, 33, 32, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, 28,
- 25, 25, 33, 32, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, 28, 25, 25,
- 33, 32, 32, 32, 32, 31, 31, 31, 31, 30, 29, 29, 29, 27, 25, 25, 32, 32,
- 32, 32, 32, 31, 30, 30, 30, 29, 28, 28, 28, 26, 24, 24, 32, 32, 32, 32,
- 32, 31, 30, 30, 30, 29, 28, 28, 28, 26, 24, 24, 32, 32, 32, 32, 32, 31,
- 30, 30, 30, 29, 28, 28, 28, 26, 24, 24, 32, 32, 32, 32, 32, 31, 30, 30,
- 30, 28, 28, 28, 28, 26, 24, 24, 32, 32, 31, 31, 31, 30, 29, 29, 29, 28,
- 27, 27, 27, 26, 24, 24, 32, 32, 31, 31, 31, 30, 29, 29, 29, 28, 27, 27,
- 27, 26, 24, 24, 32, 32, 31, 31, 31, 30, 29, 29, 29, 28, 27, 27, 27, 26,
- 24, 24, 31, 31, 31, 31, 31, 30, 28, 28, 28, 27, 26, 26, 26, 24, 23, 23,
- 30, 30, 30, 30, 30, 29, 28, 28, 28, 26, 24, 24, 24, 23, 21, 21, 30, 30,
- 30, 30, 30, 29, 28, 28, 28, 26, 24, 24, 24, 23, 21, 21, 30, 30, 30, 30,
- 30, 29, 28, 28, 28, 26, 24, 24, 24, 23, 21, 21, 29, 30, 30, 30, 30, 28,
- 28, 28, 28, 25, 23, 23, 23, 22, 20, 20, 28, 29, 30, 30, 30, 28, 27, 27,
- 27, 24, 21, 21, 21, 20, 19, 19, 28, 29, 30, 30, 30, 28, 27, 27, 27, 24,
- 21, 21, 21, 20, 19, 19, 28, 29, 30, 30, 30, 28, 27, 27, 27, 24, 21, 21,
- 21, 20, 19, 19, 28, 28, 28, 28, 28, 27, 26, 26, 26, 23, 21, 21, 21, 20,
- 18, 18, 26, 27, 28, 28, 28, 26, 26, 26, 26, 23, 20, 20, 20, 19, 18, 18,
- 26, 27, 28, 28, 28, 26, 26, 26, 26, 23, 20, 20, 20, 19, 18, 18, 26, 27,
- 28, 28, 28, 26, 26, 26, 26, 23, 20, 20, 20, 19, 18, 18, 25, 26, 26, 26,
- 26, 26, 24, 24, 24, 22, 20, 20, 20, 18, 17, 17, 23, 24, 25, 25, 25, 24,
- 24, 24, 24, 21, 19, 19, 19, 18, 16, 16, 23, 24, 25, 25, 25, 24, 24, 24,
- 24, 21, 19, 19, 19, 18, 16, 16,
- /* Size 32x16 */
- 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 31,
- 30, 30, 30, 29, 28, 28, 28, 28, 26, 26, 26, 25, 23, 23, 33, 33, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 30, 30, 30, 30,
- 29, 29, 29, 28, 27, 27, 27, 26, 24, 24, 33, 33, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, 30, 28,
- 28, 28, 28, 26, 25, 25, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, 30, 28, 28, 28, 28, 26,
- 25, 25, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31,
- 31, 31, 30, 30, 30, 30, 30, 30, 30, 28, 28, 28, 28, 26, 25, 25, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 30, 30, 30, 30, 29, 29,
- 29, 28, 28, 28, 28, 27, 26, 26, 26, 26, 24, 24, 32, 32, 32, 32, 32, 32,
- 31, 31, 31, 31, 30, 30, 30, 30, 29, 29, 29, 28, 28, 28, 28, 28, 27, 27,
- 27, 26, 26, 26, 26, 24, 24, 24, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31,
- 30, 30, 30, 30, 29, 29, 29, 28, 28, 28, 28, 28, 27, 27, 27, 26, 26, 26,
- 26, 24, 24, 24, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, 30,
- 29, 29, 29, 28, 28, 28, 28, 28, 27, 27, 27, 26, 26, 26, 26, 24, 24, 24,
- 30, 30, 30, 30, 30, 30, 31, 31, 31, 30, 29, 29, 29, 28, 28, 28, 28, 27,
- 26, 26, 26, 25, 24, 24, 24, 23, 23, 23, 23, 22, 21, 21, 28, 29, 29, 29,
- 29, 29, 30, 30, 30, 29, 28, 28, 28, 28, 27, 27, 27, 26, 24, 24, 24, 23,
- 21, 21, 21, 21, 20, 20, 20, 20, 19, 19, 28, 29, 29, 29, 29, 29, 30, 30,
- 30, 29, 28, 28, 28, 28, 27, 27, 27, 26, 24, 24, 24, 23, 21, 21, 21, 21,
- 20, 20, 20, 20, 19, 19, 28, 29, 29, 29, 29, 29, 30, 30, 30, 29, 28, 28,
- 28, 28, 27, 27, 27, 26, 24, 24, 24, 23, 21, 21, 21, 21, 20, 20, 20, 20,
- 19, 19, 26, 26, 27, 27, 27, 27, 28, 28, 28, 27, 26, 26, 26, 26, 26, 26,
- 26, 24, 23, 23, 23, 22, 20, 20, 20, 20, 19, 19, 19, 18, 18, 18, 23, 24,
- 24, 24, 24, 25, 25, 25, 25, 25, 24, 24, 24, 24, 24, 24, 24, 23, 21, 21,
- 21, 20, 19, 19, 19, 18, 18, 18, 18, 17, 16, 16, 23, 24, 24, 24, 24, 25,
- 25, 25, 25, 25, 24, 24, 24, 24, 24, 24, 24, 23, 21, 21, 21, 20, 19, 19,
- 19, 18, 18, 18, 18, 17, 16, 16,
- /* Size 4x16 */
- 33, 32, 30, 26, 32, 32, 30, 27, 32, 32, 30, 27, 32, 32, 31, 28, 32, 32,
- 31, 28, 32, 31, 29, 26, 32, 31, 29, 26, 32, 30, 28, 26, 32, 30, 28, 26,
- 30, 29, 26, 23, 30, 29, 26, 23, 29, 28, 24, 20, 29, 28, 24, 20, 27, 26,
- 23, 19, 27, 26, 23, 19, 24, 24, 21, 18,
- /* Size 16x4 */
- 33, 32, 32, 32, 32, 32, 32, 32, 32, 30, 30, 29, 29, 27, 27, 24, 32, 32,
- 32, 32, 32, 31, 31, 30, 30, 29, 29, 28, 28, 26, 26, 24, 30, 30, 30, 31,
- 31, 29, 29, 28, 28, 26, 26, 24, 24, 23, 23, 21, 26, 27, 27, 28, 28, 26,
- 26, 26, 26, 23, 23, 20, 20, 19, 19, 18,
- /* Size 8x32 */
- 32, 33, 33, 32, 32, 28, 28, 23, 33, 33, 33, 32, 32, 29, 29, 24, 33, 32,
- 32, 32, 32, 29, 29, 24, 33, 32, 32, 32, 32, 29, 29, 24, 33, 32, 32, 32,
- 32, 29, 29, 24, 33, 32, 32, 32, 32, 29, 29, 25, 33, 32, 32, 31, 31, 30,
- 30, 25, 33, 32, 32, 31, 31, 30, 30, 25, 33, 32, 32, 31, 31, 30, 30, 25,
- 33, 32, 32, 31, 31, 29, 29, 25, 32, 32, 32, 30, 30, 28, 28, 24, 32, 32,
- 32, 30, 30, 28, 28, 24, 32, 32, 32, 30, 30, 28, 28, 24, 32, 32, 32, 30,
- 30, 28, 28, 24, 32, 31, 31, 29, 29, 27, 27, 24, 32, 31, 31, 29, 29, 27,
- 27, 24, 32, 31, 31, 29, 29, 27, 27, 24, 31, 31, 31, 28, 28, 26, 26, 23,
- 30, 30, 30, 28, 28, 24, 24, 21, 30, 30, 30, 28, 28, 24, 24, 21, 30, 30,
- 30, 28, 28, 24, 24, 21, 29, 30, 30, 28, 28, 23, 23, 20, 28, 30, 30, 27,
- 27, 21, 21, 19, 28, 30, 30, 27, 27, 21, 21, 19, 28, 30, 30, 27, 27, 21,
- 21, 19, 28, 28, 28, 26, 26, 21, 21, 18, 26, 28, 28, 26, 26, 20, 20, 18,
- 26, 28, 28, 26, 26, 20, 20, 18, 26, 28, 28, 26, 26, 20, 20, 18, 25, 26,
- 26, 24, 24, 20, 20, 17, 23, 25, 25, 24, 24, 19, 19, 16, 23, 25, 25, 24,
- 24, 19, 19, 16,
- /* Size 32x8 */
- 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 31,
- 30, 30, 30, 29, 28, 28, 28, 28, 26, 26, 26, 25, 23, 23, 33, 33, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, 30,
- 30, 30, 30, 28, 28, 28, 28, 26, 25, 25, 33, 33, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, 30, 28,
- 28, 28, 28, 26, 25, 25, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30,
- 30, 30, 29, 29, 29, 28, 28, 28, 28, 28, 27, 27, 27, 26, 26, 26, 26, 24,
- 24, 24, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, 30, 29, 29,
- 29, 28, 28, 28, 28, 28, 27, 27, 27, 26, 26, 26, 26, 24, 24, 24, 28, 29,
- 29, 29, 29, 29, 30, 30, 30, 29, 28, 28, 28, 28, 27, 27, 27, 26, 24, 24,
- 24, 23, 21, 21, 21, 21, 20, 20, 20, 20, 19, 19, 28, 29, 29, 29, 29, 29,
- 30, 30, 30, 29, 28, 28, 28, 28, 27, 27, 27, 26, 24, 24, 24, 23, 21, 21,
- 21, 21, 20, 20, 20, 20, 19, 19, 23, 24, 24, 24, 24, 25, 25, 25, 25, 25,
- 24, 24, 24, 24, 24, 24, 24, 23, 21, 21, 21, 20, 19, 19, 19, 18, 18, 18,
- 18, 17, 16, 16 },
- { /* Chroma */
- /* Size 4x4 */
- 33, 30, 24, 22, 30, 26, 23, 22, 24, 23, 21, 21, 22, 22, 21, 19,
- /* Size 8x8 */
- 33, 33, 32, 29, 26, 23, 21, 21, 33, 33, 31, 28, 25, 23, 22, 22, 32, 31,
- 29, 26, 24, 23, 22, 23, 29, 28, 26, 24, 23, 22, 22, 22, 26, 25, 24, 23,
- 22, 21, 21, 22, 23, 23, 23, 22, 21, 20, 20, 20, 21, 22, 22, 22, 21, 20,
- 19, 19, 21, 22, 23, 22, 22, 20, 19, 18,
- /* Size 16x16 */
- 32, 33, 33, 34, 34, 31, 31, 28, 28, 25, 25, 21, 21, 21, 21, 21, 33, 33,
- 33, 33, 33, 30, 30, 27, 27, 24, 24, 22, 22, 22, 22, 22, 33, 33, 33, 33,
- 33, 30, 30, 27, 27, 24, 24, 22, 22, 22, 22, 22, 34, 33, 33, 32, 32, 29,
- 29, 26, 26, 24, 24, 22, 22, 23, 23, 23, 34, 33, 33, 32, 32, 29, 29, 26,
- 26, 24, 24, 22, 22, 23, 23, 23, 31, 30, 30, 29, 29, 26, 26, 24, 24, 23,
- 23, 22, 22, 22, 22, 23, 31, 30, 30, 29, 29, 26, 26, 24, 24, 23, 23, 22,
- 22, 22, 22, 23, 28, 27, 27, 26, 26, 24, 24, 22, 22, 22, 22, 21, 21, 22,
- 22, 23, 28, 27, 27, 26, 26, 24, 24, 22, 22, 22, 22, 21, 21, 22, 22, 23,
- 25, 24, 24, 24, 24, 23, 23, 22, 22, 21, 21, 20, 20, 21, 21, 21, 25, 24,
- 24, 24, 24, 23, 23, 22, 22, 21, 21, 20, 20, 21, 21, 21, 21, 22, 22, 22,
- 22, 22, 22, 21, 21, 20, 20, 19, 19, 19, 19, 19, 21, 22, 22, 22, 22, 22,
- 22, 21, 21, 20, 20, 19, 19, 19, 19, 19, 21, 22, 22, 23, 23, 22, 22, 22,
- 22, 21, 21, 19, 19, 19, 19, 19, 21, 22, 22, 23, 23, 22, 22, 22, 22, 21,
- 21, 19, 19, 19, 19, 19, 21, 22, 22, 23, 23, 23, 23, 23, 23, 21, 21, 19,
- 19, 19, 19, 18,
- /* Size 32x32 */
- 32, 33, 33, 33, 33, 33, 34, 34, 34, 32, 31, 31, 31, 29, 28, 28, 28, 26,
- 25, 25, 25, 23, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 32, 30, 30, 30, 29, 28, 28, 28, 26, 24, 24, 24, 23,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 31, 30, 30, 30, 28, 27, 27, 27, 26, 24, 24, 24, 23, 22, 22, 22, 22,
- 22, 22, 22, 22, 22, 22, 33, 33, 33, 33, 33, 33, 33, 33, 33, 31, 30, 30,
- 30, 28, 27, 27, 27, 26, 24, 24, 24, 23, 22, 22, 22, 22, 22, 22, 22, 22,
- 22, 22, 33, 33, 33, 33, 33, 33, 33, 33, 33, 31, 30, 30, 30, 28, 27, 27,
- 27, 26, 24, 24, 24, 23, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 31, 29, 29, 29, 28, 26, 26, 26, 25, 24, 24,
- 24, 23, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 34, 33, 33, 33, 33, 33,
- 32, 32, 32, 31, 29, 29, 29, 28, 26, 26, 26, 25, 24, 24, 24, 23, 22, 22,
- 22, 22, 23, 23, 23, 23, 23, 23, 34, 33, 33, 33, 33, 33, 32, 32, 32, 31,
- 29, 29, 29, 28, 26, 26, 26, 25, 24, 24, 24, 23, 22, 22, 22, 22, 23, 23,
- 23, 23, 23, 23, 34, 33, 33, 33, 33, 33, 32, 32, 32, 31, 29, 29, 29, 28,
- 26, 26, 26, 25, 24, 24, 24, 23, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23,
- 32, 32, 31, 31, 31, 31, 31, 31, 31, 29, 28, 28, 28, 26, 25, 25, 25, 24,
- 24, 24, 24, 23, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 31, 30, 30, 30,
- 30, 29, 29, 29, 29, 28, 26, 26, 26, 25, 24, 24, 24, 23, 23, 23, 23, 22,
- 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 31, 30, 30, 30, 30, 29, 29, 29,
- 29, 28, 26, 26, 26, 25, 24, 24, 24, 23, 23, 23, 23, 22, 22, 22, 22, 22,
- 22, 22, 22, 22, 23, 23, 31, 30, 30, 30, 30, 29, 29, 29, 29, 28, 26, 26,
- 26, 25, 24, 24, 24, 23, 23, 23, 23, 22, 22, 22, 22, 22, 22, 22, 22, 22,
- 23, 23, 29, 29, 28, 28, 28, 28, 28, 28, 28, 26, 25, 25, 25, 24, 23, 23,
- 23, 23, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 28, 28,
- 27, 27, 27, 26, 26, 26, 26, 25, 24, 24, 24, 23, 22, 22, 22, 22, 22, 22,
- 22, 22, 21, 21, 21, 22, 22, 22, 22, 22, 23, 23, 28, 28, 27, 27, 27, 26,
- 26, 26, 26, 25, 24, 24, 24, 23, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21,
- 21, 22, 22, 22, 22, 22, 23, 23, 28, 28, 27, 27, 27, 26, 26, 26, 26, 25,
- 24, 24, 24, 23, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 22, 22, 22,
- 22, 22, 23, 23, 26, 26, 26, 26, 26, 25, 25, 25, 25, 24, 23, 23, 23, 23,
- 22, 22, 22, 22, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22,
- 25, 24, 24, 24, 24, 24, 24, 24, 24, 24, 23, 23, 23, 22, 22, 22, 22, 21,
- 21, 21, 21, 21, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 25, 24, 24, 24,
- 24, 24, 24, 24, 24, 24, 23, 23, 23, 22, 22, 22, 22, 21, 21, 21, 21, 21,
- 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 25, 24, 24, 24, 24, 24, 24, 24,
- 24, 24, 23, 23, 23, 22, 22, 22, 22, 21, 21, 21, 21, 21, 20, 20, 20, 20,
- 21, 21, 21, 21, 21, 21, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 22, 22,
- 22, 22, 22, 22, 22, 21, 21, 21, 21, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21,
- 21, 21, 20, 20, 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 21, 21,
- 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 21, 20, 20,
- 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 21, 21, 22, 22, 22, 22,
- 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 21, 20, 20, 20, 20, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22,
- 22, 22, 22, 22, 22, 22, 22, 21, 20, 20, 20, 20, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 21, 21, 22, 22, 22, 22, 23, 23, 23, 22, 22, 22, 22, 22,
- 22, 22, 22, 21, 21, 21, 21, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 21, 21, 22, 22, 22, 22, 23, 23, 23, 22, 22, 22, 22, 22, 22, 22, 22, 21,
- 21, 21, 21, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 21, 21, 22, 22,
- 22, 22, 23, 23, 23, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 21, 20,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 21, 21, 22, 22, 22, 22, 23, 23,
- 23, 23, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 20, 19, 19, 19, 19,
- 19, 19, 19, 18, 18, 18, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23,
- 23, 23, 23, 23, 23, 22, 21, 21, 21, 20, 19, 19, 19, 19, 19, 19, 19, 18,
- 18, 18, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
- 23, 22, 21, 21, 21, 20, 19, 19, 19, 19, 19, 19, 19, 18, 18, 18,
- /* Size 4x8 */
- 33, 30, 24, 21, 33, 29, 24, 22, 31, 28, 23, 22, 28, 25, 22, 22, 26, 23,
- 21, 21, 23, 22, 21, 20, 22, 22, 20, 19, 22, 22, 21, 19,
- /* Size 8x4 */
- 33, 33, 31, 28, 26, 23, 22, 22, 30, 29, 28, 25, 23, 22, 22, 22, 24, 24,
- 23, 22, 21, 21, 20, 21, 21, 22, 22, 22, 21, 20, 19, 19,
- /* Size 8x16 */
- 32, 33, 33, 28, 28, 21, 21, 21, 33, 33, 33, 27, 27, 22, 22, 22, 33, 33,
- 33, 27, 27, 22, 22, 22, 34, 32, 32, 26, 26, 22, 22, 23, 34, 32, 32, 26,
- 26, 22, 22, 23, 31, 28, 28, 24, 24, 22, 22, 22, 31, 28, 28, 24, 24, 22,
- 22, 22, 28, 26, 26, 22, 22, 22, 22, 23, 28, 26, 26, 22, 22, 22, 22, 23,
- 24, 24, 24, 22, 22, 20, 20, 21, 24, 24, 24, 22, 22, 20, 20, 21, 21, 22,
- 22, 21, 21, 19, 19, 19, 21, 22, 22, 21, 21, 19, 19, 19, 21, 22, 22, 22,
- 22, 19, 19, 18, 21, 22, 22, 22, 22, 19, 19, 18, 21, 23, 23, 22, 22, 19,
- 19, 18,
- /* Size 16x8 */
- 32, 33, 33, 34, 34, 31, 31, 28, 28, 24, 24, 21, 21, 21, 21, 21, 33, 33,
- 33, 32, 32, 28, 28, 26, 26, 24, 24, 22, 22, 22, 22, 23, 33, 33, 33, 32,
- 32, 28, 28, 26, 26, 24, 24, 22, 22, 22, 22, 23, 28, 27, 27, 26, 26, 24,
- 24, 22, 22, 22, 22, 21, 21, 22, 22, 22, 28, 27, 27, 26, 26, 24, 24, 22,
- 22, 22, 22, 21, 21, 22, 22, 22, 21, 22, 22, 22, 22, 22, 22, 22, 22, 20,
- 20, 19, 19, 19, 19, 19, 21, 22, 22, 22, 22, 22, 22, 22, 22, 20, 20, 19,
- 19, 19, 19, 19, 21, 22, 22, 23, 23, 22, 22, 23, 23, 21, 21, 19, 19, 18,
- 18, 18,
- /* Size 16x32 */
- 32, 33, 33, 33, 33, 31, 28, 28, 28, 24, 21, 21, 21, 21, 21, 21, 33, 33,
- 33, 33, 33, 30, 28, 28, 28, 24, 22, 22, 22, 21, 21, 21, 33, 33, 33, 33,
- 33, 30, 27, 27, 27, 24, 22, 22, 22, 22, 22, 22, 33, 33, 33, 33, 33, 30,
- 27, 27, 27, 24, 22, 22, 22, 22, 22, 22, 33, 33, 33, 33, 33, 30, 27, 27,
- 27, 24, 22, 22, 22, 22, 22, 22, 33, 33, 32, 32, 32, 29, 26, 26, 26, 24,
- 22, 22, 22, 22, 22, 22, 34, 33, 32, 32, 32, 29, 26, 26, 26, 24, 22, 22,
- 22, 23, 23, 23, 34, 33, 32, 32, 32, 29, 26, 26, 26, 24, 22, 22, 22, 23,
- 23, 23, 34, 33, 32, 32, 32, 29, 26, 26, 26, 24, 22, 22, 22, 23, 23, 23,
- 32, 31, 30, 30, 30, 28, 25, 25, 25, 23, 22, 22, 22, 22, 23, 23, 31, 30,
- 28, 28, 28, 26, 24, 24, 24, 23, 22, 22, 22, 22, 22, 22, 31, 30, 28, 28,
- 28, 26, 24, 24, 24, 23, 22, 22, 22, 22, 22, 22, 31, 30, 28, 28, 28, 26,
- 24, 24, 24, 23, 22, 22, 22, 22, 22, 22, 29, 28, 27, 27, 27, 25, 23, 23,
- 23, 22, 22, 22, 22, 22, 23, 23, 28, 27, 26, 26, 26, 24, 22, 22, 22, 22,
- 22, 22, 22, 22, 23, 23, 28, 27, 26, 26, 26, 24, 22, 22, 22, 22, 22, 22,
- 22, 22, 23, 23, 28, 27, 26, 26, 26, 24, 22, 22, 22, 22, 22, 22, 22, 22,
- 23, 23, 26, 26, 25, 25, 25, 23, 22, 22, 22, 21, 21, 21, 21, 21, 22, 22,
- 24, 24, 24, 24, 24, 23, 22, 22, 22, 21, 20, 20, 20, 20, 21, 21, 24, 24,
- 24, 24, 24, 23, 22, 22, 22, 21, 20, 20, 20, 20, 21, 21, 24, 24, 24, 24,
- 24, 23, 22, 22, 22, 21, 20, 20, 20, 20, 21, 21, 23, 23, 23, 23, 23, 22,
- 22, 22, 22, 21, 20, 20, 20, 20, 20, 20, 21, 21, 22, 22, 22, 22, 21, 21,
- 21, 20, 19, 19, 19, 19, 19, 19, 21, 21, 22, 22, 22, 22, 21, 21, 21, 20,
- 19, 19, 19, 19, 19, 19, 21, 21, 22, 22, 22, 22, 21, 21, 21, 20, 19, 19,
- 19, 19, 19, 19, 21, 22, 22, 22, 22, 22, 22, 22, 22, 20, 19, 19, 19, 19,
- 19, 19, 21, 22, 22, 22, 22, 22, 22, 22, 22, 20, 19, 19, 19, 19, 18, 18,
- 21, 22, 22, 22, 22, 22, 22, 22, 22, 20, 19, 19, 19, 19, 18, 18, 21, 22,
- 22, 22, 22, 22, 22, 22, 22, 20, 19, 19, 19, 19, 18, 18, 21, 22, 23, 23,
- 23, 22, 22, 22, 22, 21, 19, 19, 19, 19, 18, 18, 21, 22, 23, 23, 23, 23,
- 22, 22, 22, 21, 19, 19, 19, 18, 18, 18, 21, 22, 23, 23, 23, 23, 22, 22,
- 22, 21, 19, 19, 19, 18, 18, 18,
- /* Size 32x16 */
- 32, 33, 33, 33, 33, 33, 34, 34, 34, 32, 31, 31, 31, 29, 28, 28, 28, 26,
- 24, 24, 24, 23, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 31, 30, 30, 30, 28, 27, 27, 27, 26, 24, 24, 24, 23,
- 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 33, 33, 33, 33, 33, 32, 32, 32,
- 32, 30, 28, 28, 28, 27, 26, 26, 26, 25, 24, 24, 24, 23, 22, 22, 22, 22,
- 22, 22, 22, 23, 23, 23, 33, 33, 33, 33, 33, 32, 32, 32, 32, 30, 28, 28,
- 28, 27, 26, 26, 26, 25, 24, 24, 24, 23, 22, 22, 22, 22, 22, 22, 22, 23,
- 23, 23, 33, 33, 33, 33, 33, 32, 32, 32, 32, 30, 28, 28, 28, 27, 26, 26,
- 26, 25, 24, 24, 24, 23, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 31, 30,
- 30, 30, 30, 29, 29, 29, 29, 28, 26, 26, 26, 25, 24, 24, 24, 23, 23, 23,
- 23, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 28, 28, 27, 27, 27, 26,
- 26, 26, 26, 25, 24, 24, 24, 23, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21,
- 21, 22, 22, 22, 22, 22, 22, 22, 28, 28, 27, 27, 27, 26, 26, 26, 26, 25,
- 24, 24, 24, 23, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 22, 22, 22,
- 22, 22, 22, 22, 28, 28, 27, 27, 27, 26, 26, 26, 26, 25, 24, 24, 24, 23,
- 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22,
- 24, 24, 24, 24, 24, 24, 24, 24, 24, 23, 23, 23, 23, 22, 22, 22, 22, 21,
- 21, 21, 21, 21, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22,
- 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 20, 20, 20, 20,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 21, 22, 22, 22, 22, 22, 22, 22,
- 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 20, 20, 20, 20, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
- 22, 22, 22, 22, 22, 21, 20, 20, 20, 20, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 21, 21, 22, 22, 22, 22, 23, 23, 23, 22, 22, 22, 22, 22, 22, 22,
- 22, 21, 20, 20, 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18, 21, 21,
- 22, 22, 22, 22, 23, 23, 23, 23, 22, 22, 22, 23, 23, 23, 23, 22, 21, 21,
- 21, 20, 19, 19, 19, 19, 18, 18, 18, 18, 18, 18, 21, 21, 22, 22, 22, 22,
- 23, 23, 23, 23, 22, 22, 22, 23, 23, 23, 23, 22, 21, 21, 21, 20, 19, 19,
- 19, 19, 18, 18, 18, 18, 18, 18,
- /* Size 4x16 */
- 33, 31, 24, 21, 33, 30, 24, 22, 33, 30, 24, 22, 33, 29, 24, 23, 33, 29,
- 24, 23, 30, 26, 23, 22, 30, 26, 23, 22, 27, 24, 22, 22, 27, 24, 22, 22,
- 24, 23, 21, 20, 24, 23, 21, 20, 21, 22, 20, 19, 21, 22, 20, 19, 22, 22,
- 20, 19, 22, 22, 20, 19, 22, 23, 21, 18,
- /* Size 16x4 */
- 33, 33, 33, 33, 33, 30, 30, 27, 27, 24, 24, 21, 21, 22, 22, 22, 31, 30,
- 30, 29, 29, 26, 26, 24, 24, 23, 23, 22, 22, 22, 22, 23, 24, 24, 24, 24,
- 24, 23, 23, 22, 22, 21, 21, 20, 20, 20, 20, 21, 21, 22, 22, 23, 23, 22,
- 22, 22, 22, 20, 20, 19, 19, 19, 19, 18,
- /* Size 8x32 */
- 32, 33, 33, 28, 28, 21, 21, 21, 33, 33, 33, 28, 28, 22, 22, 21, 33, 33,
- 33, 27, 27, 22, 22, 22, 33, 33, 33, 27, 27, 22, 22, 22, 33, 33, 33, 27,
- 27, 22, 22, 22, 33, 32, 32, 26, 26, 22, 22, 22, 34, 32, 32, 26, 26, 22,
- 22, 23, 34, 32, 32, 26, 26, 22, 22, 23, 34, 32, 32, 26, 26, 22, 22, 23,
- 32, 30, 30, 25, 25, 22, 22, 23, 31, 28, 28, 24, 24, 22, 22, 22, 31, 28,
- 28, 24, 24, 22, 22, 22, 31, 28, 28, 24, 24, 22, 22, 22, 29, 27, 27, 23,
- 23, 22, 22, 23, 28, 26, 26, 22, 22, 22, 22, 23, 28, 26, 26, 22, 22, 22,
- 22, 23, 28, 26, 26, 22, 22, 22, 22, 23, 26, 25, 25, 22, 22, 21, 21, 22,
- 24, 24, 24, 22, 22, 20, 20, 21, 24, 24, 24, 22, 22, 20, 20, 21, 24, 24,
- 24, 22, 22, 20, 20, 21, 23, 23, 23, 22, 22, 20, 20, 20, 21, 22, 22, 21,
- 21, 19, 19, 19, 21, 22, 22, 21, 21, 19, 19, 19, 21, 22, 22, 21, 21, 19,
- 19, 19, 21, 22, 22, 22, 22, 19, 19, 19, 21, 22, 22, 22, 22, 19, 19, 18,
- 21, 22, 22, 22, 22, 19, 19, 18, 21, 22, 22, 22, 22, 19, 19, 18, 21, 23,
- 23, 22, 22, 19, 19, 18, 21, 23, 23, 22, 22, 19, 19, 18, 21, 23, 23, 22,
- 22, 19, 19, 18,
- /* Size 32x8 */
- 32, 33, 33, 33, 33, 33, 34, 34, 34, 32, 31, 31, 31, 29, 28, 28, 28, 26,
- 24, 24, 24, 23, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 33, 33, 33, 33,
- 33, 32, 32, 32, 32, 30, 28, 28, 28, 27, 26, 26, 26, 25, 24, 24, 24, 23,
- 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 33, 33, 33, 33, 33, 32, 32, 32,
- 32, 30, 28, 28, 28, 27, 26, 26, 26, 25, 24, 24, 24, 23, 22, 22, 22, 22,
- 22, 22, 22, 23, 23, 23, 28, 28, 27, 27, 27, 26, 26, 26, 26, 25, 24, 24,
- 24, 23, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 22, 22, 22, 22, 22,
- 22, 22, 28, 28, 27, 27, 27, 26, 26, 26, 26, 25, 24, 24, 24, 23, 22, 22,
- 22, 22, 22, 22, 22, 22, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 21, 22,
- 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 20, 20,
- 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 21, 22, 22, 22, 22, 22,
- 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 20, 20, 20, 20, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23,
- 22, 22, 22, 23, 23, 23, 23, 22, 21, 21, 21, 20, 19, 19, 19, 19, 18, 18,
- 18, 18, 18, 18 },
- },
- {
- { /* Luma */
- /* Size 4x4 */
- 32, 32, 32, 29, 32, 32, 31, 29, 32, 31, 29, 27, 29, 29, 27, 22,
- /* Size 8x8 */
- 33, 33, 33, 32, 32, 32, 30, 29, 33, 32, 32, 32, 32, 31, 30, 29, 33, 32,
- 32, 32, 32, 31, 31, 30, 32, 32, 32, 31, 30, 30, 29, 28, 32, 32, 32, 30,
- 29, 29, 28, 27, 32, 31, 31, 30, 29, 28, 27, 26, 30, 30, 31, 29, 28, 27,
- 26, 24, 29, 29, 30, 28, 27, 26, 24, 21,
- /* Size 16x16 */
- 32, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 31, 30, 30, 28, 28, 33, 33,
- 33, 32, 32, 32, 32, 32, 32, 32, 32, 31, 30, 30, 29, 29, 33, 33, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 31, 30, 30, 29, 29, 33, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 31, 30, 30, 29, 29, 33, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 31, 31, 30, 30, 30, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 31, 31, 30, 30, 30, 33, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 30,
- 29, 29, 28, 28, 33, 32, 32, 32, 32, 32, 31, 31, 31, 30, 30, 29, 29, 28,
- 28, 28, 32, 32, 32, 32, 32, 32, 31, 31, 30, 30, 30, 29, 28, 28, 28, 28,
- 32, 32, 32, 32, 32, 32, 31, 30, 30, 29, 29, 28, 28, 28, 27, 27, 32, 32,
- 32, 32, 32, 32, 31, 30, 30, 29, 29, 28, 28, 28, 27, 27, 31, 31, 31, 31,
- 31, 31, 30, 29, 29, 28, 28, 27, 26, 26, 24, 24, 30, 30, 30, 30, 31, 31,
- 29, 29, 28, 28, 28, 26, 26, 25, 24, 24, 30, 30, 30, 30, 30, 30, 29, 28,
- 28, 28, 28, 26, 25, 24, 23, 23, 28, 29, 29, 29, 30, 30, 28, 28, 28, 27,
- 27, 24, 24, 23, 21, 21, 28, 29, 29, 29, 30, 30, 28, 28, 28, 27, 27, 24,
- 24, 23, 21, 21,
- /* Size 32x32 */
- 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32,
- 32, 32, 32, 32, 31, 30, 30, 30, 30, 29, 28, 28, 28, 28, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 31, 30, 30, 30, 30, 29, 29, 29, 29, 28, 33, 33, 33, 33, 33, 33, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 30, 30, 30,
- 30, 29, 29, 29, 29, 28, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 30, 30, 30, 30, 29, 29, 29,
- 29, 28, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 31, 31, 30, 30, 30, 30, 29, 29, 29, 29, 28, 33, 33,
- 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 31, 31, 30, 30, 30, 30, 29, 29, 29, 29, 28, 33, 33, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 30,
- 30, 30, 30, 30, 29, 29, 29, 28, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 30, 30,
- 30, 30, 30, 29, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 29,
- 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 29, 33, 33, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31,
- 31, 31, 31, 31, 30, 30, 30, 30, 30, 29, 33, 33, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30,
- 30, 29, 29, 29, 29, 28, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 29, 29, 29, 29, 28, 28, 28,
- 28, 28, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31,
- 31, 30, 30, 30, 30, 30, 29, 29, 29, 29, 28, 28, 28, 28, 28, 28, 33, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 30, 30, 30,
- 30, 30, 29, 29, 29, 29, 28, 28, 28, 28, 28, 28, 33, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 29, 29,
- 29, 29, 28, 28, 28, 28, 28, 28, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 29, 29, 28, 28, 28, 28, 28,
- 28, 28, 28, 27, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 30,
- 30, 30, 30, 29, 29, 29, 29, 29, 28, 28, 28, 28, 28, 28, 27, 27, 27, 26,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 30, 30, 30, 30, 29,
- 29, 29, 29, 28, 28, 28, 28, 28, 28, 27, 27, 27, 27, 26, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 31, 31, 30, 30, 30, 30, 29, 29, 29, 29, 28,
- 28, 28, 28, 28, 28, 27, 27, 27, 27, 26, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 31, 31, 30, 30, 30, 30, 29, 29, 29, 29, 28, 28, 28, 28, 28,
- 28, 27, 27, 27, 27, 26, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 30, 30, 30, 30, 29, 29, 28, 28, 28, 28, 28, 27, 27, 27, 27, 26, 26, 26,
- 26, 25, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 29, 29, 29,
- 29, 28, 28, 28, 28, 28, 27, 26, 26, 26, 26, 25, 24, 24, 24, 24, 30, 30,
- 30, 30, 30, 30, 30, 31, 31, 31, 31, 30, 29, 29, 29, 29, 28, 28, 28, 28,
- 28, 27, 26, 26, 26, 26, 25, 24, 24, 24, 24, 24, 30, 30, 30, 30, 30, 30,
- 30, 31, 31, 31, 31, 30, 29, 29, 29, 29, 28, 28, 28, 28, 28, 27, 26, 26,
- 26, 26, 25, 24, 24, 24, 24, 24, 30, 30, 30, 30, 30, 30, 30, 31, 31, 31,
- 31, 30, 29, 29, 29, 29, 28, 28, 28, 28, 28, 27, 26, 26, 26, 26, 25, 24,
- 24, 24, 24, 24, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 29, 28,
- 28, 28, 28, 28, 28, 28, 28, 27, 26, 25, 25, 25, 24, 23, 23, 23, 23, 23,
- 29, 29, 29, 29, 29, 29, 30, 30, 30, 30, 30, 29, 28, 28, 28, 28, 28, 28,
- 27, 27, 27, 26, 25, 24, 24, 24, 23, 22, 22, 22, 22, 21, 28, 29, 29, 29,
- 29, 29, 29, 30, 30, 30, 30, 29, 28, 28, 28, 28, 28, 27, 27, 27, 27, 26,
- 24, 24, 24, 24, 23, 22, 21, 21, 21, 21, 28, 29, 29, 29, 29, 29, 29, 30,
- 30, 30, 30, 29, 28, 28, 28, 28, 28, 27, 27, 27, 27, 26, 24, 24, 24, 24,
- 23, 22, 21, 21, 21, 21, 28, 29, 29, 29, 29, 29, 29, 30, 30, 30, 30, 29,
- 28, 28, 28, 28, 28, 27, 27, 27, 27, 26, 24, 24, 24, 24, 23, 22, 21, 21,
- 21, 21, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 28, 28, 28, 28, 28,
- 27, 26, 26, 26, 26, 25, 24, 24, 24, 24, 23, 21, 21, 21, 21, 20,
- /* Size 4x8 */
- 33, 33, 32, 29, 32, 32, 32, 29, 32, 32, 31, 30, 32, 32, 30, 28, 32, 31,
- 29, 27, 31, 31, 28, 26, 30, 30, 28, 24, 29, 30, 27, 21,
- /* Size 8x4 */
- 33, 32, 32, 32, 32, 31, 30, 29, 33, 32, 32, 32, 31, 31, 30, 30, 32, 32,
- 31, 30, 29, 28, 28, 27, 29, 29, 30, 28, 27, 26, 24, 21,
- /* Size 8x16 */
- 32, 33, 33, 33, 32, 32, 29, 28, 33, 32, 32, 32, 32, 32, 29, 29, 33, 32,
- 32, 32, 32, 32, 29, 29, 33, 32, 32, 32, 32, 32, 30, 29, 33, 32, 32, 32,
- 31, 31, 30, 30, 33, 32, 32, 32, 31, 31, 30, 30, 33, 32, 32, 31, 30, 30,
- 29, 28, 32, 32, 32, 31, 30, 30, 28, 28, 32, 32, 32, 31, 30, 30, 28, 28,
- 32, 32, 31, 30, 29, 29, 28, 27, 32, 32, 31, 30, 29, 29, 28, 27, 31, 31,
- 31, 29, 28, 28, 26, 25, 30, 30, 30, 29, 28, 28, 25, 24, 30, 30, 30, 29,
- 28, 28, 24, 23, 28, 29, 30, 28, 27, 27, 22, 21, 28, 29, 30, 28, 27, 27,
- 22, 21,
- /* Size 16x8 */
- 32, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 31, 30, 30, 28, 28, 33, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 30, 30, 29, 29, 33, 32, 32, 32,
- 32, 32, 32, 32, 32, 31, 31, 31, 30, 30, 30, 30, 33, 32, 32, 32, 32, 32,
- 31, 31, 31, 30, 30, 29, 29, 29, 28, 28, 32, 32, 32, 32, 31, 31, 30, 30,
- 30, 29, 29, 28, 28, 28, 27, 27, 32, 32, 32, 32, 31, 31, 30, 30, 30, 29,
- 29, 28, 28, 28, 27, 27, 29, 29, 29, 30, 30, 30, 29, 28, 28, 28, 28, 26,
- 25, 24, 22, 22, 28, 29, 29, 29, 30, 30, 28, 28, 28, 27, 27, 25, 24, 23,
- 21, 21,
- /* Size 16x32 */
- 32, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 31, 29, 28, 28, 28, 33, 33,
- 33, 33, 33, 33, 32, 32, 32, 32, 32, 31, 29, 29, 29, 29, 33, 33, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 31, 29, 29, 29, 29, 33, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 31, 29, 29, 29, 29, 33, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 31, 29, 29, 29, 29, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 31, 29, 29, 29, 29, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31,
- 30, 29, 29, 29, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 30, 29,
- 29, 29, 33, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, 30,
- 33, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, 30, 33, 32,
- 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, 30, 33, 32, 32, 32,
- 32, 32, 31, 31, 31, 31, 31, 30, 29, 29, 29, 29, 33, 32, 32, 32, 32, 32,
- 31, 31, 30, 30, 30, 30, 29, 28, 28, 28, 32, 32, 32, 32, 32, 32, 31, 30,
- 30, 30, 30, 29, 28, 28, 28, 28, 32, 32, 32, 32, 32, 32, 31, 30, 30, 30,
- 30, 29, 28, 28, 28, 28, 32, 32, 32, 32, 32, 32, 31, 30, 30, 30, 30, 29,
- 28, 28, 28, 28, 32, 32, 32, 32, 32, 32, 31, 30, 30, 30, 30, 29, 28, 28,
- 28, 28, 32, 32, 32, 31, 31, 31, 31, 30, 29, 29, 29, 28, 28, 27, 27, 27,
- 32, 32, 32, 31, 31, 31, 30, 29, 29, 29, 29, 28, 28, 27, 27, 27, 32, 32,
- 32, 31, 31, 31, 30, 29, 29, 29, 29, 28, 28, 27, 27, 27, 32, 32, 32, 31,
- 31, 31, 30, 29, 29, 29, 29, 28, 28, 27, 27, 27, 32, 31, 31, 31, 31, 31,
- 30, 29, 28, 28, 28, 28, 26, 26, 26, 26, 31, 31, 31, 31, 31, 31, 29, 28,
- 28, 28, 28, 27, 26, 25, 25, 25, 30, 30, 30, 30, 30, 30, 29, 28, 28, 28,
- 28, 26, 25, 24, 24, 24, 30, 30, 30, 30, 30, 30, 29, 28, 28, 28, 28, 26,
- 25, 24, 24, 24, 30, 30, 30, 30, 30, 30, 29, 28, 28, 28, 28, 26, 25, 24,
- 24, 24, 30, 30, 30, 30, 30, 30, 29, 28, 28, 28, 28, 26, 24, 23, 23, 23,
- 29, 29, 30, 30, 30, 30, 28, 28, 27, 27, 27, 25, 23, 22, 22, 22, 28, 29,
- 29, 30, 30, 30, 28, 28, 27, 27, 27, 24, 22, 21, 21, 21, 28, 29, 29, 30,
- 30, 30, 28, 28, 27, 27, 27, 24, 22, 21, 21, 21, 28, 29, 29, 30, 30, 30,
- 28, 28, 27, 27, 27, 24, 22, 21, 21, 21, 28, 28, 28, 28, 28, 28, 28, 27,
- 26, 26, 26, 24, 22, 21, 21, 21,
- /* Size 32x16 */
- 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 31, 30, 30, 30, 30, 29, 28, 28, 28, 28, 33, 33, 33, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31,
- 31, 30, 30, 30, 30, 29, 29, 29, 29, 28, 33, 33, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 30, 30, 30,
- 30, 30, 29, 29, 29, 28, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, 30,
- 30, 28, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, 30, 30, 28, 33, 33,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31,
- 31, 31, 31, 30, 30, 30, 30, 30, 30, 30, 30, 28, 33, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 29, 29,
- 29, 29, 29, 28, 28, 28, 28, 28, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 31, 31, 30, 30, 30, 30, 30, 29, 29, 29, 29, 28, 28, 28, 28, 28, 28,
- 28, 28, 28, 27, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30,
- 30, 30, 30, 29, 29, 29, 29, 28, 28, 28, 28, 28, 28, 27, 27, 27, 27, 26,
- 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, 30, 30, 29,
- 29, 29, 29, 28, 28, 28, 28, 28, 28, 27, 27, 27, 27, 26, 32, 32, 32, 32,
- 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, 30, 30, 29, 29, 29, 29, 28,
- 28, 28, 28, 28, 28, 27, 27, 27, 27, 26, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 30, 30, 29, 29, 29, 29, 28, 28, 28, 28, 28, 27, 26, 26, 26,
- 26, 25, 24, 24, 24, 24, 29, 29, 29, 29, 29, 29, 30, 30, 30, 30, 30, 29,
- 29, 28, 28, 28, 28, 28, 28, 28, 28, 26, 26, 25, 25, 25, 24, 23, 22, 22,
- 22, 22, 28, 29, 29, 29, 29, 29, 29, 29, 30, 30, 30, 29, 28, 28, 28, 28,
- 28, 27, 27, 27, 27, 26, 25, 24, 24, 24, 23, 22, 21, 21, 21, 21, 28, 29,
- 29, 29, 29, 29, 29, 29, 30, 30, 30, 29, 28, 28, 28, 28, 28, 27, 27, 27,
- 27, 26, 25, 24, 24, 24, 23, 22, 21, 21, 21, 21, 28, 29, 29, 29, 29, 29,
- 29, 29, 30, 30, 30, 29, 28, 28, 28, 28, 28, 27, 27, 27, 27, 26, 25, 24,
- 24, 24, 23, 22, 21, 21, 21, 21,
- /* Size 4x16 */
- 33, 33, 32, 28, 33, 32, 32, 29, 32, 32, 32, 29, 32, 32, 32, 29, 32, 32,
- 31, 30, 32, 32, 31, 30, 32, 32, 30, 28, 32, 32, 30, 28, 32, 32, 30, 28,
- 32, 31, 29, 27, 32, 31, 29, 27, 31, 31, 28, 25, 30, 30, 28, 24, 30, 30,
- 28, 23, 29, 30, 27, 21, 29, 30, 27, 21,
- /* Size 16x4 */
- 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 30, 30, 29, 29, 33, 32,
- 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 30, 30, 30, 30, 32, 32, 32, 32,
- 31, 31, 30, 30, 30, 29, 29, 28, 28, 28, 27, 27, 28, 29, 29, 29, 30, 30,
- 28, 28, 28, 27, 27, 25, 24, 23, 21, 21,
- /* Size 8x32 */
- 32, 33, 33, 33, 32, 32, 29, 28, 33, 33, 33, 32, 32, 32, 29, 29, 33, 32,
- 32, 32, 32, 32, 29, 29, 33, 32, 32, 32, 32, 32, 29, 29, 33, 32, 32, 32,
- 32, 32, 29, 29, 33, 32, 32, 32, 32, 32, 29, 29, 33, 32, 32, 32, 32, 32,
- 30, 29, 33, 32, 32, 32, 32, 32, 30, 29, 33, 32, 32, 32, 31, 31, 30, 30,
- 33, 32, 32, 32, 31, 31, 30, 30, 33, 32, 32, 32, 31, 31, 30, 30, 33, 32,
- 32, 31, 31, 31, 29, 29, 33, 32, 32, 31, 30, 30, 29, 28, 32, 32, 32, 31,
- 30, 30, 28, 28, 32, 32, 32, 31, 30, 30, 28, 28, 32, 32, 32, 31, 30, 30,
- 28, 28, 32, 32, 32, 31, 30, 30, 28, 28, 32, 32, 31, 31, 29, 29, 28, 27,
- 32, 32, 31, 30, 29, 29, 28, 27, 32, 32, 31, 30, 29, 29, 28, 27, 32, 32,
- 31, 30, 29, 29, 28, 27, 32, 31, 31, 30, 28, 28, 26, 26, 31, 31, 31, 29,
- 28, 28, 26, 25, 30, 30, 30, 29, 28, 28, 25, 24, 30, 30, 30, 29, 28, 28,
- 25, 24, 30, 30, 30, 29, 28, 28, 25, 24, 30, 30, 30, 29, 28, 28, 24, 23,
- 29, 30, 30, 28, 27, 27, 23, 22, 28, 29, 30, 28, 27, 27, 22, 21, 28, 29,
- 30, 28, 27, 27, 22, 21, 28, 29, 30, 28, 27, 27, 22, 21, 28, 28, 28, 28,
- 26, 26, 22, 21,
- /* Size 32x8 */
- 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 31, 30, 30, 30, 30, 29, 28, 28, 28, 28, 33, 33, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31,
- 31, 30, 30, 30, 30, 30, 29, 29, 29, 28, 33, 33, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 30, 30, 30,
- 30, 30, 30, 30, 30, 28, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31,
- 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 29, 29, 29, 29, 29, 28, 28, 28,
- 28, 28, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, 30,
- 30, 29, 29, 29, 29, 28, 28, 28, 28, 28, 28, 27, 27, 27, 27, 26, 32, 32,
- 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, 30, 30, 29, 29, 29,
- 29, 28, 28, 28, 28, 28, 28, 27, 27, 27, 27, 26, 29, 29, 29, 29, 29, 29,
- 30, 30, 30, 30, 30, 29, 29, 28, 28, 28, 28, 28, 28, 28, 28, 26, 26, 25,
- 25, 25, 24, 23, 22, 22, 22, 22, 28, 29, 29, 29, 29, 29, 29, 29, 30, 30,
- 30, 29, 28, 28, 28, 28, 28, 27, 27, 27, 27, 26, 25, 24, 24, 24, 23, 22,
- 21, 21, 21, 21 },
- { /* Chroma */
- /* Size 4x4 */
- 33, 32, 27, 22, 32, 30, 25, 22, 27, 25, 22, 22, 22, 22, 22, 20,
- /* Size 8x8 */
- 33, 33, 34, 30, 28, 26, 24, 21, 33, 33, 33, 30, 28, 26, 24, 22, 34, 33,
- 32, 29, 26, 25, 24, 22, 30, 30, 29, 26, 24, 23, 23, 22, 28, 28, 26, 24,
- 22, 22, 22, 22, 26, 26, 25, 23, 22, 22, 21, 21, 24, 24, 24, 23, 22, 21,
- 21, 20, 21, 22, 22, 22, 22, 21, 20, 19,
- /* Size 16x16 */
- 32, 33, 33, 33, 34, 34, 31, 31, 30, 28, 28, 26, 25, 23, 21, 21, 33, 33,
- 33, 33, 33, 33, 31, 30, 28, 27, 27, 25, 24, 23, 21, 21, 33, 33, 33, 33,
- 33, 33, 30, 30, 28, 27, 27, 25, 24, 23, 22, 22, 33, 33, 33, 33, 33, 33,
- 30, 29, 28, 26, 26, 25, 24, 23, 22, 22, 34, 33, 33, 33, 32, 32, 30, 29,
- 28, 26, 26, 24, 24, 23, 22, 22, 34, 33, 33, 33, 32, 32, 30, 29, 28, 26,
- 26, 24, 24, 23, 22, 22, 31, 31, 30, 30, 30, 30, 28, 27, 26, 24, 24, 23,
- 23, 23, 22, 22, 31, 30, 30, 29, 29, 29, 27, 26, 26, 24, 24, 23, 23, 22,
- 22, 22, 30, 28, 28, 28, 28, 28, 26, 26, 24, 23, 23, 23, 22, 22, 22, 22,
- 28, 27, 27, 26, 26, 26, 24, 24, 23, 22, 22, 22, 22, 22, 21, 21, 28, 27,
- 27, 26, 26, 26, 24, 24, 23, 22, 22, 22, 22, 22, 21, 21, 26, 25, 25, 25,
- 24, 24, 23, 23, 23, 22, 22, 21, 21, 21, 20, 20, 25, 24, 24, 24, 24, 24,
- 23, 23, 22, 22, 22, 21, 21, 21, 20, 20, 23, 23, 23, 23, 23, 23, 23, 22,
- 22, 22, 22, 21, 21, 20, 20, 20, 21, 21, 22, 22, 22, 22, 22, 22, 22, 21,
- 21, 20, 20, 20, 19, 19, 21, 21, 22, 22, 22, 22, 22, 22, 22, 21, 21, 20,
- 20, 20, 19, 19,
- /* Size 32x32 */
- 32, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 33, 31, 31, 31, 31, 30, 28,
- 28, 28, 28, 27, 26, 25, 25, 25, 23, 22, 21, 21, 21, 21, 33, 33, 33, 33,
- 33, 33, 33, 33, 34, 34, 34, 32, 31, 30, 30, 30, 29, 28, 28, 28, 28, 26,
- 25, 24, 24, 24, 23, 22, 21, 21, 21, 21, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 32, 31, 30, 30, 30, 28, 28, 27, 27, 27, 26, 25, 24, 24, 24,
- 23, 22, 21, 21, 21, 22, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32,
- 30, 30, 30, 30, 28, 28, 27, 27, 27, 26, 25, 24, 24, 24, 23, 22, 22, 22,
- 22, 22, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 30, 30, 30, 30,
- 28, 28, 27, 27, 27, 26, 25, 24, 24, 24, 23, 22, 22, 22, 22, 22, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 30, 30, 30, 30, 28, 28, 27, 27,
- 27, 26, 25, 24, 24, 24, 23, 22, 22, 22, 22, 22, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 31, 30, 29, 29, 29, 28, 27, 26, 26, 26, 26, 25, 24,
- 24, 24, 23, 22, 22, 22, 22, 22, 34, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 31, 30, 29, 29, 29, 28, 27, 26, 26, 26, 25, 24, 24, 24, 24, 23, 22,
- 22, 22, 22, 22, 34, 34, 33, 33, 33, 33, 33, 33, 32, 32, 32, 31, 30, 29,
- 29, 29, 28, 26, 26, 26, 26, 25, 24, 24, 24, 24, 23, 23, 22, 22, 22, 22,
- 34, 34, 33, 33, 33, 33, 33, 33, 32, 32, 32, 31, 30, 29, 29, 29, 28, 26,
- 26, 26, 26, 25, 24, 24, 24, 24, 23, 23, 22, 22, 22, 22, 34, 34, 33, 33,
- 33, 33, 33, 33, 32, 32, 32, 31, 30, 29, 29, 29, 28, 26, 26, 26, 26, 25,
- 24, 24, 24, 24, 23, 23, 22, 22, 22, 22, 33, 32, 32, 32, 32, 32, 31, 31,
- 31, 31, 31, 30, 28, 28, 28, 28, 27, 26, 25, 25, 25, 24, 24, 24, 24, 24,
- 23, 22, 22, 22, 22, 22, 31, 31, 31, 30, 30, 30, 30, 30, 30, 30, 30, 28,
- 28, 27, 27, 27, 26, 25, 24, 24, 24, 24, 23, 23, 23, 23, 23, 22, 22, 22,
- 22, 22, 31, 30, 30, 30, 30, 30, 29, 29, 29, 29, 29, 28, 27, 26, 26, 26,
- 26, 24, 24, 24, 24, 23, 23, 23, 23, 23, 22, 22, 22, 22, 22, 22, 31, 30,
- 30, 30, 30, 30, 29, 29, 29, 29, 29, 28, 27, 26, 26, 26, 26, 24, 24, 24,
- 24, 23, 23, 23, 23, 23, 22, 22, 22, 22, 22, 22, 31, 30, 30, 30, 30, 30,
- 29, 29, 29, 29, 29, 28, 27, 26, 26, 26, 26, 24, 24, 24, 24, 23, 23, 23,
- 23, 23, 22, 22, 22, 22, 22, 22, 30, 29, 28, 28, 28, 28, 28, 28, 28, 28,
- 28, 27, 26, 26, 26, 26, 24, 23, 23, 23, 23, 23, 23, 22, 22, 22, 22, 22,
- 22, 22, 22, 22, 28, 28, 28, 28, 28, 28, 27, 27, 26, 26, 26, 26, 25, 24,
- 24, 24, 23, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
- 28, 28, 27, 27, 27, 27, 26, 26, 26, 26, 26, 25, 24, 24, 24, 24, 23, 22,
- 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 22, 28, 28, 27, 27,
- 27, 27, 26, 26, 26, 26, 26, 25, 24, 24, 24, 24, 23, 22, 22, 22, 22, 22,
- 22, 22, 22, 22, 22, 22, 21, 21, 21, 22, 28, 28, 27, 27, 27, 27, 26, 26,
- 26, 26, 26, 25, 24, 24, 24, 24, 23, 22, 22, 22, 22, 22, 22, 22, 22, 22,
- 22, 22, 21, 21, 21, 22, 27, 26, 26, 26, 26, 26, 26, 25, 25, 25, 25, 24,
- 24, 23, 23, 23, 23, 22, 22, 22, 22, 22, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 26, 25, 25, 25, 25, 25, 25, 24, 24, 24, 24, 24, 23, 23, 23, 23,
- 23, 22, 22, 22, 22, 21, 21, 21, 21, 21, 21, 21, 20, 20, 20, 21, 25, 24,
- 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 23, 23, 23, 23, 22, 22, 22, 22,
- 22, 21, 21, 21, 21, 21, 21, 20, 20, 20, 20, 20, 25, 24, 24, 24, 24, 24,
- 24, 24, 24, 24, 24, 24, 23, 23, 23, 23, 22, 22, 22, 22, 22, 21, 21, 21,
- 21, 21, 21, 20, 20, 20, 20, 20, 25, 24, 24, 24, 24, 24, 24, 24, 24, 24,
- 24, 24, 23, 23, 23, 23, 22, 22, 22, 22, 22, 21, 21, 21, 21, 21, 21, 20,
- 20, 20, 20, 20, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 22,
- 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 21, 21, 20, 20, 20, 20, 20, 20,
- 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 22, 22, 22, 22, 22, 22, 22,
- 22, 22, 22, 21, 21, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 22,
- 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 21,
- 20, 20, 20, 20, 20, 20, 19, 19, 19, 19, 21, 21, 21, 22, 22, 22, 22, 22,
- 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 21, 20, 20, 20, 20,
- 20, 20, 19, 19, 19, 19, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22,
- 22, 22, 22, 22, 22, 22, 21, 21, 21, 21, 20, 20, 20, 20, 20, 20, 19, 19,
- 19, 19, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
- 22, 22, 22, 22, 22, 21, 21, 20, 20, 20, 20, 20, 19, 19, 19, 19,
- /* Size 4x8 */
- 33, 33, 28, 21, 33, 33, 27, 22, 33, 32, 26, 22, 30, 28, 24, 22, 28, 26,
- 22, 22, 26, 25, 22, 21, 24, 24, 22, 20, 21, 22, 21, 19,
- /* Size 8x4 */
- 33, 33, 33, 30, 28, 26, 24, 21, 33, 33, 32, 28, 26, 25, 24, 22, 28, 27,
- 26, 24, 22, 22, 22, 21, 21, 22, 22, 22, 22, 21, 20, 19,
- /* Size 8x16 */
- 32, 33, 33, 31, 28, 28, 23, 21, 33, 33, 33, 30, 27, 27, 23, 22, 33, 33,
- 33, 30, 27, 27, 23, 22, 33, 33, 32, 30, 26, 26, 23, 22, 34, 32, 32, 29,
- 26, 26, 23, 22, 34, 32, 32, 29, 26, 26, 23, 22, 31, 30, 29, 28, 24, 24,
- 22, 22, 31, 29, 28, 27, 24, 24, 22, 22, 29, 28, 28, 26, 23, 23, 22, 22,
- 28, 26, 26, 24, 22, 22, 22, 22, 28, 26, 26, 24, 22, 22, 22, 22, 25, 24,
- 24, 23, 22, 22, 21, 21, 24, 24, 24, 23, 22, 22, 21, 20, 23, 23, 23, 23,
- 22, 22, 20, 20, 21, 22, 22, 22, 21, 21, 20, 19, 21, 22, 22, 22, 21, 21,
- 20, 19,
- /* Size 16x8 */
- 32, 33, 33, 33, 34, 34, 31, 31, 29, 28, 28, 25, 24, 23, 21, 21, 33, 33,
- 33, 33, 32, 32, 30, 29, 28, 26, 26, 24, 24, 23, 22, 22, 33, 33, 33, 32,
- 32, 32, 29, 28, 28, 26, 26, 24, 24, 23, 22, 22, 31, 30, 30, 30, 29, 29,
- 28, 27, 26, 24, 24, 23, 23, 23, 22, 22, 28, 27, 27, 26, 26, 26, 24, 24,
- 23, 22, 22, 22, 22, 22, 21, 21, 28, 27, 27, 26, 26, 26, 24, 24, 23, 22,
- 22, 22, 22, 22, 21, 21, 23, 23, 23, 23, 23, 23, 22, 22, 22, 22, 22, 21,
- 21, 20, 20, 20, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 20, 20,
- 19, 19,
- /* Size 16x32 */
- 32, 33, 33, 33, 33, 33, 31, 29, 28, 28, 28, 26, 23, 21, 21, 21, 33, 33,
- 33, 33, 33, 33, 31, 28, 28, 28, 28, 25, 23, 21, 21, 21, 33, 33, 33, 33,
- 33, 33, 30, 28, 27, 27, 27, 25, 23, 22, 22, 22, 33, 33, 33, 33, 33, 33,
- 30, 28, 27, 27, 27, 25, 23, 22, 22, 22, 33, 33, 33, 33, 33, 33, 30, 28,
- 27, 27, 27, 25, 23, 22, 22, 22, 33, 33, 33, 33, 33, 33, 30, 28, 27, 27,
- 27, 25, 23, 22, 22, 22, 33, 33, 33, 32, 32, 32, 30, 28, 26, 26, 26, 25,
- 23, 22, 22, 22, 34, 33, 33, 32, 32, 32, 30, 27, 26, 26, 26, 24, 23, 22,
- 22, 22, 34, 33, 32, 32, 32, 32, 29, 27, 26, 26, 26, 24, 23, 22, 22, 22,
- 34, 33, 32, 32, 32, 32, 29, 27, 26, 26, 26, 24, 23, 22, 22, 22, 34, 33,
- 32, 32, 32, 32, 29, 27, 26, 26, 26, 24, 23, 22, 22, 22, 33, 32, 31, 31,
- 31, 31, 28, 26, 25, 25, 25, 24, 23, 22, 22, 22, 31, 30, 30, 29, 29, 29,
- 28, 26, 24, 24, 24, 23, 22, 22, 22, 22, 31, 30, 29, 28, 28, 28, 27, 25,
- 24, 24, 24, 23, 22, 22, 22, 22, 31, 30, 29, 28, 28, 28, 27, 25, 24, 24,
- 24, 23, 22, 22, 22, 22, 31, 30, 29, 28, 28, 28, 27, 25, 24, 24, 24, 23,
- 22, 22, 22, 22, 29, 28, 28, 28, 28, 28, 26, 24, 23, 23, 23, 23, 22, 22,
- 22, 22, 28, 28, 27, 26, 26, 26, 24, 23, 22, 22, 22, 22, 22, 22, 22, 22,
- 28, 27, 26, 26, 26, 26, 24, 23, 22, 22, 22, 22, 22, 22, 22, 22, 28, 27,
- 26, 26, 26, 26, 24, 23, 22, 22, 22, 22, 22, 22, 22, 22, 28, 27, 26, 26,
- 26, 26, 24, 23, 22, 22, 22, 22, 22, 22, 22, 22, 26, 26, 26, 25, 25, 25,
- 24, 22, 22, 22, 22, 21, 21, 21, 21, 21, 25, 25, 24, 24, 24, 24, 23, 22,
- 22, 22, 22, 21, 21, 21, 21, 21, 24, 24, 24, 24, 24, 24, 23, 22, 22, 22,
- 22, 21, 21, 20, 20, 20, 24, 24, 24, 24, 24, 24, 23, 22, 22, 22, 22, 21,
- 21, 20, 20, 20, 24, 24, 24, 24, 24, 24, 23, 22, 22, 22, 22, 21, 21, 20,
- 20, 20, 23, 23, 23, 23, 23, 23, 23, 22, 22, 22, 22, 21, 20, 20, 20, 20,
- 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 21, 20, 20, 20, 20, 21, 21,
- 22, 22, 22, 22, 22, 21, 21, 21, 21, 20, 20, 19, 19, 19, 21, 21, 22, 22,
- 22, 22, 22, 21, 21, 21, 21, 20, 20, 19, 19, 19, 21, 21, 22, 22, 22, 22,
- 22, 21, 21, 21, 21, 20, 20, 19, 19, 19, 21, 21, 22, 22, 22, 22, 22, 22,
- 22, 22, 22, 21, 20, 19, 19, 19,
- /* Size 32x16 */
- 32, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 33, 31, 31, 31, 31, 29, 28,
- 28, 28, 28, 26, 25, 24, 24, 24, 23, 22, 21, 21, 21, 21, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 32, 30, 30, 30, 30, 28, 28, 27, 27, 27, 26,
- 25, 24, 24, 24, 23, 22, 21, 21, 21, 21, 33, 33, 33, 33, 33, 33, 33, 33,
- 32, 32, 32, 31, 30, 29, 29, 29, 28, 27, 26, 26, 26, 26, 24, 24, 24, 24,
- 23, 22, 22, 22, 22, 22, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 31,
- 29, 28, 28, 28, 28, 26, 26, 26, 26, 25, 24, 24, 24, 24, 23, 22, 22, 22,
- 22, 22, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 31, 29, 28, 28, 28,
- 28, 26, 26, 26, 26, 25, 24, 24, 24, 24, 23, 22, 22, 22, 22, 22, 33, 33,
- 33, 33, 33, 33, 32, 32, 32, 32, 32, 31, 29, 28, 28, 28, 28, 26, 26, 26,
- 26, 25, 24, 24, 24, 24, 23, 22, 22, 22, 22, 22, 31, 31, 30, 30, 30, 30,
- 30, 30, 29, 29, 29, 28, 28, 27, 27, 27, 26, 24, 24, 24, 24, 24, 23, 23,
- 23, 23, 23, 22, 22, 22, 22, 22, 29, 28, 28, 28, 28, 28, 28, 27, 27, 27,
- 27, 26, 26, 25, 25, 25, 24, 23, 23, 23, 23, 22, 22, 22, 22, 22, 22, 22,
- 21, 21, 21, 22, 28, 28, 27, 27, 27, 27, 26, 26, 26, 26, 26, 25, 24, 24,
- 24, 24, 23, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 21, 22,
- 28, 28, 27, 27, 27, 27, 26, 26, 26, 26, 26, 25, 24, 24, 24, 24, 23, 22,
- 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 21, 22, 28, 28, 27, 27,
- 27, 27, 26, 26, 26, 26, 26, 25, 24, 24, 24, 24, 23, 22, 22, 22, 22, 22,
- 22, 22, 22, 22, 22, 21, 21, 21, 21, 22, 26, 25, 25, 25, 25, 25, 25, 24,
- 24, 24, 24, 24, 23, 23, 23, 23, 23, 22, 22, 22, 22, 21, 21, 21, 21, 21,
- 21, 21, 20, 20, 20, 21, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
- 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 21, 21, 20, 20, 20, 20,
- 20, 20, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
- 22, 22, 22, 22, 22, 21, 21, 20, 20, 20, 20, 20, 19, 19, 19, 19, 21, 21,
- 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
- 22, 21, 21, 20, 20, 20, 20, 20, 19, 19, 19, 19, 21, 21, 22, 22, 22, 22,
- 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 20,
- 20, 20, 20, 20, 19, 19, 19, 19,
- /* Size 4x16 */
- 33, 33, 28, 21, 33, 33, 27, 22, 33, 33, 27, 22, 33, 32, 26, 22, 33, 32,
- 26, 22, 33, 32, 26, 22, 30, 29, 24, 22, 30, 28, 24, 22, 28, 28, 23, 22,
- 27, 26, 22, 22, 27, 26, 22, 22, 25, 24, 22, 21, 24, 24, 22, 20, 23, 23,
- 22, 20, 21, 22, 21, 19, 21, 22, 21, 19,
- /* Size 16x4 */
- 33, 33, 33, 33, 33, 33, 30, 30, 28, 27, 27, 25, 24, 23, 21, 21, 33, 33,
- 33, 32, 32, 32, 29, 28, 28, 26, 26, 24, 24, 23, 22, 22, 28, 27, 27, 26,
- 26, 26, 24, 24, 23, 22, 22, 22, 22, 22, 21, 21, 21, 22, 22, 22, 22, 22,
- 22, 22, 22, 22, 22, 21, 20, 20, 19, 19,
- /* Size 8x32 */
- 32, 33, 33, 31, 28, 28, 23, 21, 33, 33, 33, 31, 28, 28, 23, 21, 33, 33,
- 33, 30, 27, 27, 23, 22, 33, 33, 33, 30, 27, 27, 23, 22, 33, 33, 33, 30,
- 27, 27, 23, 22, 33, 33, 33, 30, 27, 27, 23, 22, 33, 33, 32, 30, 26, 26,
- 23, 22, 34, 33, 32, 30, 26, 26, 23, 22, 34, 32, 32, 29, 26, 26, 23, 22,
- 34, 32, 32, 29, 26, 26, 23, 22, 34, 32, 32, 29, 26, 26, 23, 22, 33, 31,
- 31, 28, 25, 25, 23, 22, 31, 30, 29, 28, 24, 24, 22, 22, 31, 29, 28, 27,
- 24, 24, 22, 22, 31, 29, 28, 27, 24, 24, 22, 22, 31, 29, 28, 27, 24, 24,
- 22, 22, 29, 28, 28, 26, 23, 23, 22, 22, 28, 27, 26, 24, 22, 22, 22, 22,
- 28, 26, 26, 24, 22, 22, 22, 22, 28, 26, 26, 24, 22, 22, 22, 22, 28, 26,
- 26, 24, 22, 22, 22, 22, 26, 26, 25, 24, 22, 22, 21, 21, 25, 24, 24, 23,
- 22, 22, 21, 21, 24, 24, 24, 23, 22, 22, 21, 20, 24, 24, 24, 23, 22, 22,
- 21, 20, 24, 24, 24, 23, 22, 22, 21, 20, 23, 23, 23, 23, 22, 22, 20, 20,
- 22, 22, 22, 22, 21, 21, 20, 20, 21, 22, 22, 22, 21, 21, 20, 19, 21, 22,
- 22, 22, 21, 21, 20, 19, 21, 22, 22, 22, 21, 21, 20, 19, 21, 22, 22, 22,
- 22, 22, 20, 19,
- /* Size 32x8 */
- 32, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 33, 31, 31, 31, 31, 29, 28,
- 28, 28, 28, 26, 25, 24, 24, 24, 23, 22, 21, 21, 21, 21, 33, 33, 33, 33,
- 33, 33, 33, 33, 32, 32, 32, 31, 30, 29, 29, 29, 28, 27, 26, 26, 26, 26,
- 24, 24, 24, 24, 23, 22, 22, 22, 22, 22, 33, 33, 33, 33, 33, 33, 32, 32,
- 32, 32, 32, 31, 29, 28, 28, 28, 28, 26, 26, 26, 26, 25, 24, 24, 24, 24,
- 23, 22, 22, 22, 22, 22, 31, 31, 30, 30, 30, 30, 30, 30, 29, 29, 29, 28,
- 28, 27, 27, 27, 26, 24, 24, 24, 24, 24, 23, 23, 23, 23, 23, 22, 22, 22,
- 22, 22, 28, 28, 27, 27, 27, 27, 26, 26, 26, 26, 26, 25, 24, 24, 24, 24,
- 23, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 21, 22, 28, 28,
- 27, 27, 27, 27, 26, 26, 26, 26, 26, 25, 24, 24, 24, 24, 23, 22, 22, 22,
- 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 21, 22, 23, 23, 23, 23, 23, 23,
- 23, 23, 23, 23, 23, 23, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21,
- 21, 21, 20, 20, 20, 20, 20, 20, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22,
- 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 20, 20, 20, 20, 20,
- 19, 19, 19, 19 },
- },
- {
- { /* Luma */
- /* Size 4x4 */
- 33, 32, 32, 32, 32, 32, 32, 31, 32, 32, 31, 30, 32, 31, 30, 29,
- /* Size 8x8 */
- 33, 33, 33, 33, 32, 32, 32, 31, 33, 32, 32, 32, 32, 32, 32, 31, 33, 32,
- 32, 32, 32, 32, 32, 31, 33, 32, 32, 32, 32, 32, 32, 31, 32, 32, 32, 32,
- 31, 31, 30, 29, 32, 32, 32, 32, 31, 30, 30, 29, 32, 32, 32, 32, 30, 30,
- 29, 28, 31, 31, 31, 31, 29, 29, 28, 27,
- /* Size 16x16 */
- 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 30, 33, 33,
- 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 31, 30, 33, 33, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 30, 33, 33, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 31, 30, 33, 33, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 31, 30, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 31, 31, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 31, 31, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31,
- 31, 30, 33, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 30, 29,
- 33, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, 29, 33, 32,
- 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, 29, 32, 32, 32, 32,
- 32, 32, 32, 31, 31, 31, 31, 30, 29, 29, 29, 28, 32, 32, 32, 32, 32, 32,
- 32, 31, 31, 30, 30, 29, 29, 29, 28, 28, 32, 32, 32, 32, 32, 32, 32, 31,
- 31, 30, 30, 29, 29, 29, 28, 28, 32, 31, 31, 31, 31, 31, 31, 31, 30, 30,
- 30, 29, 28, 28, 28, 27, 30, 30, 30, 30, 30, 31, 31, 30, 29, 29, 29, 28,
- 28, 28, 27, 26,
- /* Size 32x32 */
- 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 31, 30, 30, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 31, 30, 30, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 31, 31, 30, 30, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31,
- 30, 30, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 30, 30, 33, 33,
- 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 30, 30, 33, 33, 33, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 31, 31, 30, 30, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 31, 31, 30, 30, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 30, 30,
- 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 33, 33, 33, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 33, 33, 33, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 31, 31, 31, 31, 31, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31,
- 31, 31, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 33, 33,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 33, 33, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 30, 30, 30, 30, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 30,
- 30, 30, 29, 29, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, 29, 29, 29,
- 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31,
- 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, 29, 29, 29, 33, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31,
- 31, 30, 30, 30, 30, 30, 30, 29, 29, 29, 33, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30,
- 30, 30, 30, 29, 29, 29, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, 30, 29, 29,
- 29, 29, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31,
- 31, 31, 31, 31, 31, 30, 30, 30, 29, 29, 29, 29, 29, 29, 28, 28, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 30, 30, 30,
- 30, 30, 30, 29, 29, 29, 29, 29, 29, 28, 28, 28, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 30, 30, 30, 30, 30, 29, 29,
- 29, 29, 29, 29, 28, 28, 28, 28, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 31, 31, 31, 30, 30, 30, 30, 30, 29, 29, 29, 29, 29, 29,
- 28, 28, 28, 28, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 31, 31, 31, 30, 30, 30, 30, 30, 29, 29, 29, 29, 29, 29, 28, 28, 28, 28,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 30, 30,
- 30, 30, 30, 30, 29, 29, 29, 29, 29, 29, 28, 28, 28, 28, 32, 32, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, 29,
- 29, 29, 28, 28, 28, 28, 28, 28, 27, 27, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 30, 30, 29, 29, 29, 29, 29, 29, 28, 28, 28,
- 28, 28, 28, 27, 27, 27, 30, 30, 30, 30, 30, 30, 30, 30, 30, 31, 31, 31,
- 31, 31, 30, 30, 29, 29, 29, 29, 29, 29, 28, 28, 28, 28, 28, 28, 27, 27,
- 26, 26, 30, 30, 30, 30, 30, 30, 30, 30, 30, 31, 31, 31, 31, 31, 30, 30,
- 29, 29, 29, 29, 29, 29, 28, 28, 28, 28, 28, 28, 27, 27, 26, 26,
- /* Size 4x8 */
- 33, 33, 32, 32, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 32, 32,
- 31, 30, 32, 32, 30, 30, 32, 31, 30, 29, 31, 31, 29, 28,
- /* Size 8x4 */
- 33, 33, 32, 32, 32, 32, 32, 31, 33, 32, 32, 32, 32, 32, 31, 31, 32, 32,
- 32, 32, 31, 30, 30, 29, 32, 32, 32, 31, 30, 30, 29, 28,
- /* Size 8x16 */
- 32, 33, 33, 33, 33, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 31, 33, 32,
- 32, 32, 32, 32, 32, 31, 33, 32, 32, 32, 32, 32, 32, 31, 33, 32, 32, 32,
- 32, 32, 32, 31, 33, 32, 32, 32, 32, 31, 31, 31, 33, 32, 32, 32, 32, 31,
- 31, 31, 33, 32, 32, 32, 32, 31, 31, 31, 33, 32, 32, 32, 31, 30, 30, 30,
- 32, 32, 32, 32, 31, 30, 30, 30, 32, 32, 32, 32, 31, 30, 30, 30, 32, 32,
- 32, 32, 31, 29, 29, 29, 32, 32, 31, 31, 30, 29, 29, 28, 32, 32, 31, 31,
- 30, 29, 29, 28, 32, 31, 31, 31, 30, 28, 28, 28, 30, 30, 30, 30, 29, 28,
- 28, 27,
- /* Size 16x8 */
- 32, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 30, 33, 33,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 30, 33, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 30, 33, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 31, 31, 31, 30, 33, 32, 32, 32, 32, 32, 32, 32,
- 31, 31, 31, 31, 30, 30, 30, 29, 32, 32, 32, 32, 32, 31, 31, 31, 30, 30,
- 30, 29, 29, 29, 28, 28, 32, 32, 32, 32, 32, 31, 31, 31, 30, 30, 30, 29,
- 29, 29, 28, 28, 32, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 29, 28, 28,
- 28, 27,
- /* Size 16x32 */
- 32, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 30, 33, 33,
- 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 31, 30, 33, 33, 33, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 30, 33, 33, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 31, 30, 33, 33, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 31, 30, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 31, 30, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 31, 30, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 31, 30, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 30,
- 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 33, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 33, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 33, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 33, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 31, 31, 31, 31, 31, 31, 33, 32, 32, 32, 32, 32, 32, 32, 32, 31,
- 31, 31, 31, 31, 31, 30, 33, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31,
- 31, 31, 30, 30, 33, 32, 32, 32, 32, 32, 32, 32, 31, 31, 30, 30, 30, 30,
- 30, 29, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 30, 30, 30, 30, 30, 29,
- 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 30, 30, 30, 30, 30, 29, 32, 32,
- 32, 32, 32, 32, 32, 31, 31, 31, 30, 30, 30, 30, 30, 29, 32, 32, 32, 32,
- 32, 32, 32, 31, 31, 31, 30, 30, 30, 30, 30, 29, 32, 32, 32, 32, 32, 32,
- 32, 31, 31, 30, 30, 30, 30, 30, 29, 29, 32, 32, 32, 32, 32, 32, 32, 31,
- 31, 30, 29, 29, 29, 29, 29, 28, 32, 32, 32, 32, 31, 31, 31, 31, 31, 30,
- 29, 29, 29, 29, 28, 28, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 29, 29,
- 29, 29, 28, 28, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 29, 29, 29, 29,
- 28, 28, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 29, 29, 29, 29, 28, 28,
- 32, 32, 32, 31, 31, 31, 31, 31, 30, 30, 29, 29, 29, 29, 28, 28, 32, 31,
- 31, 31, 31, 31, 31, 31, 30, 29, 28, 28, 28, 28, 28, 27, 31, 31, 31, 31,
- 31, 31, 31, 30, 30, 29, 28, 28, 28, 28, 28, 27, 30, 30, 30, 30, 30, 30,
- 30, 30, 29, 28, 28, 28, 28, 28, 27, 26, 30, 30, 30, 30, 30, 30, 30, 30,
- 29, 28, 28, 28, 28, 28, 27, 26,
- /* Size 32x16 */
- 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 30, 30, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 31, 31, 30, 30, 33, 33, 33, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 31, 31, 30, 30, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31,
- 30, 30, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 30, 30, 33, 33,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 30, 30, 33, 33, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31,
- 31, 31, 31, 31, 31, 31, 30, 30, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 30, 30, 30, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, 29, 29,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31,
- 31, 31, 31, 30, 30, 30, 30, 30, 30, 30, 29, 29, 28, 28, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 30,
- 29, 29, 29, 29, 29, 29, 28, 28, 28, 28, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, 29, 29, 29, 29,
- 29, 29, 28, 28, 28, 28, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31,
- 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, 29, 29, 29, 29, 29, 29, 28, 28,
- 28, 28, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31,
- 30, 30, 30, 30, 30, 30, 29, 29, 29, 29, 29, 29, 28, 28, 28, 28, 32, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30,
- 30, 29, 29, 28, 28, 28, 28, 28, 28, 28, 27, 27, 30, 30, 30, 30, 30, 30,
- 30, 30, 30, 31, 31, 31, 31, 31, 30, 30, 29, 29, 29, 29, 29, 29, 28, 28,
- 28, 28, 28, 28, 27, 27, 26, 26,
- /* Size 4x16 */
- 33, 33, 32, 32, 33, 32, 32, 32, 33, 32, 32, 32, 33, 32, 32, 32, 33, 32,
- 32, 32, 32, 32, 32, 31, 32, 32, 32, 31, 32, 32, 31, 31, 32, 32, 31, 30,
- 32, 32, 31, 30, 32, 32, 31, 30, 32, 32, 30, 29, 32, 31, 30, 29, 32, 31,
- 30, 29, 31, 31, 29, 28, 30, 30, 28, 28,
- /* Size 16x4 */
- 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 30, 33, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 30, 32, 32, 32, 32,
- 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, 29, 28, 32, 32, 32, 32, 32, 31,
- 31, 31, 30, 30, 30, 29, 29, 29, 28, 28,
- /* Size 8x32 */
- 32, 33, 33, 33, 33, 32, 32, 32, 33, 33, 33, 33, 32, 32, 32, 31, 33, 33,
- 32, 32, 32, 32, 32, 31, 33, 32, 32, 32, 32, 32, 32, 31, 33, 32, 32, 32,
- 32, 32, 32, 31, 33, 32, 32, 32, 32, 32, 32, 31, 33, 32, 32, 32, 32, 32,
- 32, 31, 33, 32, 32, 32, 32, 32, 32, 31, 33, 32, 32, 32, 32, 32, 32, 31,
- 33, 32, 32, 32, 32, 32, 32, 31, 33, 32, 32, 32, 32, 31, 31, 31, 33, 32,
- 32, 32, 32, 31, 31, 31, 33, 32, 32, 32, 32, 31, 31, 31, 33, 32, 32, 32,
- 32, 31, 31, 31, 33, 32, 32, 32, 32, 31, 31, 31, 33, 32, 32, 32, 31, 31,
- 31, 30, 33, 32, 32, 32, 31, 30, 30, 30, 32, 32, 32, 32, 31, 30, 30, 30,
- 32, 32, 32, 32, 31, 30, 30, 30, 32, 32, 32, 32, 31, 30, 30, 30, 32, 32,
- 32, 32, 31, 30, 30, 30, 32, 32, 32, 32, 31, 30, 30, 29, 32, 32, 32, 32,
- 31, 29, 29, 29, 32, 32, 31, 31, 31, 29, 29, 28, 32, 32, 31, 31, 30, 29,
- 29, 28, 32, 32, 31, 31, 30, 29, 29, 28, 32, 32, 31, 31, 30, 29, 29, 28,
- 32, 32, 31, 31, 30, 29, 29, 28, 32, 31, 31, 31, 30, 28, 28, 28, 31, 31,
- 31, 31, 30, 28, 28, 28, 30, 30, 30, 30, 29, 28, 28, 27, 30, 30, 30, 30,
- 29, 28, 28, 27,
- /* Size 32x8 */
- 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 30, 30, 33, 33, 33, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 31, 31, 30, 30, 33, 33, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31,
- 31, 31, 31, 31, 30, 30, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31,
- 30, 30, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, 29, 29, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30,
- 30, 30, 29, 29, 29, 29, 29, 29, 28, 28, 28, 28, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, 29, 29,
- 29, 29, 29, 29, 28, 28, 28, 28, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, 29, 29, 28, 28, 28, 28, 28,
- 28, 28, 27, 27 },
- { /* Chroma */
- /* Size 4x4 */
- 33, 33, 30, 27, 33, 32, 29, 26, 30, 29, 26, 24, 27, 26, 24, 22,
- /* Size 8x8 */
- 33, 33, 33, 34, 30, 29, 28, 26, 33, 33, 33, 33, 30, 29, 27, 25, 33, 33,
- 33, 33, 29, 28, 26, 25, 34, 33, 33, 32, 29, 28, 26, 24, 30, 30, 29, 29,
- 26, 26, 24, 23, 29, 29, 28, 28, 26, 25, 23, 23, 28, 27, 26, 26, 24, 23,
- 22, 22, 26, 25, 25, 24, 23, 23, 22, 21,
- /* Size 16x16 */
- 32, 33, 33, 33, 33, 34, 34, 33, 31, 31, 31, 29, 28, 28, 27, 25, 33, 33,
- 33, 33, 33, 33, 33, 33, 31, 30, 30, 28, 28, 28, 26, 24, 33, 33, 33, 33,
- 33, 33, 33, 32, 30, 30, 30, 28, 27, 27, 26, 24, 33, 33, 33, 33, 33, 33,
- 33, 32, 30, 30, 30, 28, 27, 27, 26, 24, 33, 33, 33, 33, 33, 33, 33, 32,
- 30, 29, 29, 28, 26, 26, 26, 24, 34, 33, 33, 33, 33, 32, 32, 32, 30, 29,
- 29, 27, 26, 26, 25, 24, 34, 33, 33, 33, 33, 32, 32, 32, 30, 29, 29, 27,
- 26, 26, 25, 24, 33, 33, 32, 32, 32, 32, 32, 31, 29, 28, 28, 27, 26, 26,
- 25, 24, 31, 31, 30, 30, 30, 30, 30, 29, 28, 27, 27, 25, 24, 24, 24, 23,
- 31, 30, 30, 30, 29, 29, 29, 28, 27, 26, 26, 25, 24, 24, 23, 23, 31, 30,
- 30, 30, 29, 29, 29, 28, 27, 26, 26, 25, 24, 24, 23, 23, 29, 28, 28, 28,
- 28, 27, 27, 27, 25, 25, 25, 23, 22, 22, 22, 22, 28, 28, 27, 27, 26, 26,
- 26, 26, 24, 24, 24, 22, 22, 22, 22, 22, 28, 28, 27, 27, 26, 26, 26, 26,
- 24, 24, 24, 22, 22, 22, 22, 22, 27, 26, 26, 26, 26, 25, 25, 25, 24, 23,
- 23, 22, 22, 22, 22, 21, 25, 24, 24, 24, 24, 24, 24, 24, 23, 23, 23, 22,
- 22, 22, 21, 21,
- /* Size 32x32 */
- 32, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 33, 32, 31, 31,
- 31, 31, 31, 30, 29, 28, 28, 28, 28, 28, 27, 26, 25, 25, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 33, 32, 31, 30, 30, 30, 30, 29,
- 28, 28, 28, 28, 28, 28, 26, 26, 24, 24, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 32, 31, 30, 30, 30, 30, 29, 28, 28, 28, 28,
- 28, 27, 26, 26, 24, 24, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 32, 32, 30, 30, 30, 30, 30, 29, 28, 27, 27, 27, 27, 27, 26, 25,
- 24, 24, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 31,
- 30, 30, 30, 30, 30, 29, 28, 27, 27, 27, 27, 26, 26, 25, 24, 24, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 31, 30, 30, 30, 30,
- 30, 29, 28, 27, 27, 27, 27, 26, 26, 25, 24, 24, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 32, 31, 30, 30, 30, 30, 30, 29, 28, 27,
- 27, 27, 27, 26, 26, 25, 24, 24, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 32, 31, 30, 30, 30, 30, 30, 28, 28, 27, 27, 27, 27, 26,
- 26, 25, 24, 24, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 32, 31, 30, 29, 29, 29, 29, 28, 28, 27, 26, 26, 26, 26, 26, 25, 24, 24,
- 34, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 31, 30, 29,
- 29, 29, 29, 28, 28, 26, 26, 26, 26, 26, 26, 25, 24, 24, 34, 34, 33, 33,
- 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 31, 30, 29, 29, 29, 29, 28,
- 27, 26, 26, 26, 26, 26, 25, 24, 24, 24, 34, 34, 33, 33, 33, 33, 33, 33,
- 33, 33, 32, 32, 32, 32, 32, 31, 30, 29, 29, 29, 29, 28, 27, 26, 26, 26,
- 26, 26, 25, 24, 24, 24, 34, 34, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32,
- 32, 32, 32, 31, 30, 29, 29, 29, 29, 28, 27, 26, 26, 26, 26, 26, 25, 24,
- 24, 24, 34, 34, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 31,
- 30, 29, 29, 29, 29, 28, 27, 26, 26, 26, 26, 26, 25, 24, 24, 24, 33, 33,
- 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 30, 29, 28, 28, 28,
- 28, 28, 27, 26, 26, 26, 26, 25, 25, 24, 24, 24, 32, 32, 32, 32, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 30, 29, 28, 28, 28, 28, 28, 27, 26, 25,
- 25, 25, 25, 24, 24, 24, 24, 24, 31, 31, 31, 30, 30, 30, 30, 30, 30, 30,
- 30, 30, 30, 30, 29, 28, 28, 27, 27, 27, 27, 26, 25, 24, 24, 24, 24, 24,
- 24, 23, 23, 23, 31, 30, 30, 30, 30, 30, 30, 30, 29, 29, 29, 29, 29, 29,
- 28, 28, 27, 26, 26, 26, 26, 26, 25, 24, 24, 24, 24, 24, 23, 23, 23, 23,
- 31, 30, 30, 30, 30, 30, 30, 30, 29, 29, 29, 29, 29, 29, 28, 28, 27, 26,
- 26, 26, 26, 26, 25, 24, 24, 24, 24, 24, 23, 23, 23, 23, 31, 30, 30, 30,
- 30, 30, 30, 30, 29, 29, 29, 29, 29, 29, 28, 28, 27, 26, 26, 26, 26, 26,
- 25, 24, 24, 24, 24, 24, 23, 23, 23, 23, 31, 30, 30, 30, 30, 30, 30, 30,
- 29, 29, 29, 29, 29, 29, 28, 28, 27, 26, 26, 26, 26, 26, 25, 24, 24, 24,
- 24, 24, 23, 23, 23, 23, 30, 29, 29, 29, 29, 29, 29, 28, 28, 28, 28, 28,
- 28, 28, 28, 27, 26, 26, 26, 26, 26, 25, 24, 23, 23, 23, 23, 23, 23, 23,
- 23, 23, 29, 28, 28, 28, 28, 28, 28, 28, 28, 28, 27, 27, 27, 27, 27, 26,
- 25, 25, 25, 25, 25, 24, 23, 23, 22, 22, 22, 22, 22, 22, 22, 22, 28, 28,
- 28, 27, 27, 27, 27, 27, 27, 26, 26, 26, 26, 26, 26, 25, 24, 24, 24, 24,
- 24, 23, 23, 22, 22, 22, 22, 22, 22, 22, 22, 22, 28, 28, 28, 27, 27, 27,
- 27, 27, 26, 26, 26, 26, 26, 26, 26, 25, 24, 24, 24, 24, 24, 23, 22, 22,
- 22, 22, 22, 22, 22, 22, 22, 22, 28, 28, 28, 27, 27, 27, 27, 27, 26, 26,
- 26, 26, 26, 26, 26, 25, 24, 24, 24, 24, 24, 23, 22, 22, 22, 22, 22, 22,
- 22, 22, 22, 22, 28, 28, 28, 27, 27, 27, 27, 27, 26, 26, 26, 26, 26, 26,
- 26, 25, 24, 24, 24, 24, 24, 23, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
- 28, 28, 27, 27, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 25, 24, 24, 24,
- 24, 24, 24, 23, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 27, 26, 26, 26,
- 26, 26, 26, 26, 26, 26, 25, 25, 25, 25, 25, 24, 24, 23, 23, 23, 23, 23,
- 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 26, 26, 26, 25, 25, 25, 25, 25,
- 25, 25, 24, 24, 24, 24, 24, 24, 23, 23, 23, 23, 23, 23, 22, 22, 22, 22,
- 22, 22, 21, 21, 21, 21, 25, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
- 24, 24, 24, 24, 23, 23, 23, 23, 23, 23, 22, 22, 22, 22, 22, 22, 21, 21,
- 21, 21, 25, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
- 23, 23, 23, 23, 23, 23, 22, 22, 22, 22, 22, 22, 21, 21, 21, 21,
- /* Size 4x8 */
- 33, 33, 29, 28, 33, 33, 28, 27, 33, 32, 28, 26, 33, 32, 28, 26, 30, 28,
- 26, 24, 29, 28, 24, 23, 27, 26, 23, 22, 25, 24, 23, 22,
- /* Size 8x4 */
- 33, 33, 33, 33, 30, 29, 27, 25, 33, 33, 32, 32, 28, 28, 26, 24, 29, 28,
- 28, 28, 26, 24, 23, 23, 28, 27, 26, 26, 24, 23, 22, 22,
- /* Size 8x16 */
- 32, 33, 33, 33, 31, 28, 28, 27, 33, 33, 33, 33, 31, 27, 27, 26, 33, 33,
- 33, 33, 30, 27, 27, 26, 33, 33, 33, 33, 30, 27, 27, 26, 33, 33, 32, 32,
- 30, 26, 26, 26, 34, 33, 32, 32, 29, 26, 26, 25, 34, 33, 32, 32, 29, 26,
- 26, 25, 33, 32, 31, 31, 29, 26, 26, 25, 31, 30, 29, 29, 28, 24, 24, 24,
- 31, 29, 28, 28, 27, 24, 24, 23, 31, 29, 28, 28, 27, 24, 24, 23, 29, 28,
- 27, 27, 25, 23, 23, 22, 28, 26, 26, 26, 24, 22, 22, 22, 28, 26, 26, 26,
- 24, 22, 22, 22, 26, 26, 25, 25, 24, 22, 22, 22, 24, 24, 24, 24, 23, 22,
- 22, 21,
- /* Size 16x8 */
- 32, 33, 33, 33, 33, 34, 34, 33, 31, 31, 31, 29, 28, 28, 26, 24, 33, 33,
- 33, 33, 33, 33, 33, 32, 30, 29, 29, 28, 26, 26, 26, 24, 33, 33, 33, 33,
- 32, 32, 32, 31, 29, 28, 28, 27, 26, 26, 25, 24, 33, 33, 33, 33, 32, 32,
- 32, 31, 29, 28, 28, 27, 26, 26, 25, 24, 31, 31, 30, 30, 30, 29, 29, 29,
- 28, 27, 27, 25, 24, 24, 24, 23, 28, 27, 27, 27, 26, 26, 26, 26, 24, 24,
- 24, 23, 22, 22, 22, 22, 28, 27, 27, 27, 26, 26, 26, 26, 24, 24, 24, 23,
- 22, 22, 22, 22, 27, 26, 26, 26, 26, 25, 25, 25, 24, 23, 23, 22, 22, 22,
- 22, 21,
- /* Size 16x32 */
- 32, 33, 33, 33, 33, 33, 33, 33, 31, 29, 28, 28, 28, 28, 27, 24, 33, 33,
- 33, 33, 33, 33, 33, 33, 31, 29, 28, 28, 28, 28, 26, 24, 33, 33, 33, 33,
- 33, 33, 33, 32, 31, 29, 27, 27, 27, 27, 26, 24, 33, 33, 33, 33, 33, 33,
- 33, 32, 30, 28, 27, 27, 27, 27, 26, 24, 33, 33, 33, 33, 33, 33, 33, 32,
- 30, 28, 27, 27, 27, 27, 26, 24, 33, 33, 33, 33, 33, 33, 33, 32, 30, 28,
- 27, 27, 27, 27, 26, 24, 33, 33, 33, 33, 33, 33, 33, 32, 30, 28, 27, 27,
- 27, 27, 26, 24, 33, 33, 33, 33, 33, 33, 33, 32, 30, 28, 27, 27, 27, 27,
- 26, 24, 33, 33, 33, 33, 32, 32, 32, 32, 30, 28, 26, 26, 26, 26, 26, 24,
- 34, 33, 33, 32, 32, 32, 32, 32, 30, 28, 26, 26, 26, 26, 26, 24, 34, 33,
- 33, 32, 32, 32, 32, 31, 29, 28, 26, 26, 26, 26, 25, 24, 34, 33, 33, 32,
- 32, 32, 32, 31, 29, 28, 26, 26, 26, 26, 25, 24, 34, 33, 33, 32, 32, 32,
- 32, 31, 29, 28, 26, 26, 26, 26, 25, 24, 34, 33, 33, 32, 32, 32, 32, 31,
- 29, 28, 26, 26, 26, 26, 25, 24, 33, 33, 32, 32, 31, 31, 31, 31, 29, 27,
- 26, 26, 26, 26, 25, 24, 32, 32, 31, 31, 30, 30, 30, 30, 28, 26, 25, 25,
- 25, 25, 24, 23, 31, 31, 30, 29, 29, 29, 29, 29, 28, 26, 24, 24, 24, 24,
- 24, 23, 31, 30, 29, 29, 28, 28, 28, 28, 27, 26, 24, 24, 24, 24, 23, 23,
- 31, 30, 29, 29, 28, 28, 28, 28, 27, 26, 24, 24, 24, 24, 23, 23, 31, 30,
- 29, 29, 28, 28, 28, 28, 27, 26, 24, 24, 24, 24, 23, 23, 31, 30, 29, 29,
- 28, 28, 28, 28, 27, 26, 24, 24, 24, 24, 23, 23, 30, 29, 28, 28, 28, 28,
- 28, 28, 26, 24, 23, 23, 23, 23, 23, 23, 29, 28, 28, 27, 27, 27, 27, 26,
- 25, 24, 23, 23, 23, 23, 22, 22, 28, 28, 27, 26, 26, 26, 26, 26, 24, 23,
- 22, 22, 22, 22, 22, 22, 28, 27, 26, 26, 26, 26, 26, 25, 24, 23, 22, 22,
- 22, 22, 22, 22, 28, 27, 26, 26, 26, 26, 26, 25, 24, 23, 22, 22, 22, 22,
- 22, 22, 28, 27, 26, 26, 26, 26, 26, 25, 24, 23, 22, 22, 22, 22, 22, 22,
- 28, 27, 26, 26, 26, 26, 26, 25, 24, 23, 22, 22, 22, 22, 22, 22, 26, 26,
- 26, 25, 25, 25, 25, 24, 24, 23, 22, 22, 22, 22, 22, 21, 26, 25, 25, 24,
- 24, 24, 24, 24, 23, 23, 22, 22, 22, 22, 22, 21, 24, 24, 24, 24, 24, 24,
- 24, 24, 23, 22, 22, 22, 22, 22, 21, 21, 24, 24, 24, 24, 24, 24, 24, 24,
- 23, 22, 22, 22, 22, 22, 21, 21,
- /* Size 32x16 */
- 32, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 33, 32, 31, 31,
- 31, 31, 31, 30, 29, 28, 28, 28, 28, 28, 26, 26, 24, 24, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 31, 30, 30, 30, 30, 29,
- 28, 28, 27, 27, 27, 27, 26, 25, 24, 24, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 32, 31, 30, 29, 29, 29, 29, 28, 28, 27, 26, 26,
- 26, 26, 26, 25, 24, 24, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32,
- 32, 32, 32, 31, 29, 29, 29, 29, 29, 28, 27, 26, 26, 26, 26, 26, 25, 24,
- 24, 24, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 31, 30,
- 29, 28, 28, 28, 28, 28, 27, 26, 26, 26, 26, 26, 25, 24, 24, 24, 33, 33,
- 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 31, 30, 29, 28, 28, 28,
- 28, 28, 27, 26, 26, 26, 26, 26, 25, 24, 24, 24, 33, 33, 33, 33, 33, 33,
- 33, 33, 32, 32, 32, 32, 32, 32, 31, 30, 29, 28, 28, 28, 28, 28, 27, 26,
- 26, 26, 26, 26, 25, 24, 24, 24, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32,
- 31, 31, 31, 31, 31, 30, 29, 28, 28, 28, 28, 28, 26, 26, 25, 25, 25, 25,
- 24, 24, 24, 24, 31, 31, 31, 30, 30, 30, 30, 30, 30, 30, 29, 29, 29, 29,
- 29, 28, 28, 27, 27, 27, 27, 26, 25, 24, 24, 24, 24, 24, 24, 23, 23, 23,
- 29, 29, 29, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 27, 26, 26, 26,
- 26, 26, 26, 24, 24, 23, 23, 23, 23, 23, 23, 23, 22, 22, 28, 28, 27, 27,
- 27, 27, 27, 27, 26, 26, 26, 26, 26, 26, 26, 25, 24, 24, 24, 24, 24, 23,
- 23, 22, 22, 22, 22, 22, 22, 22, 22, 22, 28, 28, 27, 27, 27, 27, 27, 27,
- 26, 26, 26, 26, 26, 26, 26, 25, 24, 24, 24, 24, 24, 23, 23, 22, 22, 22,
- 22, 22, 22, 22, 22, 22, 28, 28, 27, 27, 27, 27, 27, 27, 26, 26, 26, 26,
- 26, 26, 26, 25, 24, 24, 24, 24, 24, 23, 23, 22, 22, 22, 22, 22, 22, 22,
- 22, 22, 28, 28, 27, 27, 27, 27, 27, 27, 26, 26, 26, 26, 26, 26, 26, 25,
- 24, 24, 24, 24, 24, 23, 23, 22, 22, 22, 22, 22, 22, 22, 22, 22, 27, 26,
- 26, 26, 26, 26, 26, 26, 26, 26, 25, 25, 25, 25, 25, 24, 24, 23, 23, 23,
- 23, 23, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 24, 24, 24, 24, 24, 24,
- 24, 24, 24, 24, 24, 24, 24, 24, 24, 23, 23, 23, 23, 23, 23, 23, 22, 22,
- 22, 22, 22, 22, 21, 21, 21, 21,
- /* Size 4x16 */
- 33, 33, 29, 28, 33, 33, 29, 27, 33, 33, 28, 27, 33, 33, 28, 27, 33, 32,
- 28, 26, 33, 32, 28, 26, 33, 32, 28, 26, 33, 31, 27, 26, 31, 29, 26, 24,
- 30, 28, 26, 24, 30, 28, 26, 24, 28, 27, 24, 23, 27, 26, 23, 22, 27, 26,
- 23, 22, 26, 25, 23, 22, 24, 24, 22, 22,
- /* Size 16x4 */
- 33, 33, 33, 33, 33, 33, 33, 33, 31, 30, 30, 28, 27, 27, 26, 24, 33, 33,
- 33, 33, 32, 32, 32, 31, 29, 28, 28, 27, 26, 26, 25, 24, 29, 29, 28, 28,
- 28, 28, 28, 27, 26, 26, 26, 24, 23, 23, 23, 22, 28, 27, 27, 27, 26, 26,
- 26, 26, 24, 24, 24, 23, 22, 22, 22, 22,
- /* Size 8x32 */
- 32, 33, 33, 33, 31, 28, 28, 27, 33, 33, 33, 33, 31, 28, 28, 26, 33, 33,
- 33, 33, 31, 27, 27, 26, 33, 33, 33, 33, 30, 27, 27, 26, 33, 33, 33, 33,
- 30, 27, 27, 26, 33, 33, 33, 33, 30, 27, 27, 26, 33, 33, 33, 33, 30, 27,
- 27, 26, 33, 33, 33, 33, 30, 27, 27, 26, 33, 33, 32, 32, 30, 26, 26, 26,
- 34, 33, 32, 32, 30, 26, 26, 26, 34, 33, 32, 32, 29, 26, 26, 25, 34, 33,
- 32, 32, 29, 26, 26, 25, 34, 33, 32, 32, 29, 26, 26, 25, 34, 33, 32, 32,
- 29, 26, 26, 25, 33, 32, 31, 31, 29, 26, 26, 25, 32, 31, 30, 30, 28, 25,
- 25, 24, 31, 30, 29, 29, 28, 24, 24, 24, 31, 29, 28, 28, 27, 24, 24, 23,
- 31, 29, 28, 28, 27, 24, 24, 23, 31, 29, 28, 28, 27, 24, 24, 23, 31, 29,
- 28, 28, 27, 24, 24, 23, 30, 28, 28, 28, 26, 23, 23, 23, 29, 28, 27, 27,
- 25, 23, 23, 22, 28, 27, 26, 26, 24, 22, 22, 22, 28, 26, 26, 26, 24, 22,
- 22, 22, 28, 26, 26, 26, 24, 22, 22, 22, 28, 26, 26, 26, 24, 22, 22, 22,
- 28, 26, 26, 26, 24, 22, 22, 22, 26, 26, 25, 25, 24, 22, 22, 22, 26, 25,
- 24, 24, 23, 22, 22, 22, 24, 24, 24, 24, 23, 22, 22, 21, 24, 24, 24, 24,
- 23, 22, 22, 21,
- /* Size 32x8 */
- 32, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 33, 32, 31, 31,
- 31, 31, 31, 30, 29, 28, 28, 28, 28, 28, 26, 26, 24, 24, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 31, 30, 29, 29, 29, 29, 28,
- 28, 27, 26, 26, 26, 26, 26, 25, 24, 24, 33, 33, 33, 33, 33, 33, 33, 33,
- 32, 32, 32, 32, 32, 32, 31, 30, 29, 28, 28, 28, 28, 28, 27, 26, 26, 26,
- 26, 26, 25, 24, 24, 24, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32,
- 32, 32, 31, 30, 29, 28, 28, 28, 28, 28, 27, 26, 26, 26, 26, 26, 25, 24,
- 24, 24, 31, 31, 31, 30, 30, 30, 30, 30, 30, 30, 29, 29, 29, 29, 29, 28,
- 28, 27, 27, 27, 27, 26, 25, 24, 24, 24, 24, 24, 24, 23, 23, 23, 28, 28,
- 27, 27, 27, 27, 27, 27, 26, 26, 26, 26, 26, 26, 26, 25, 24, 24, 24, 24,
- 24, 23, 23, 22, 22, 22, 22, 22, 22, 22, 22, 22, 28, 28, 27, 27, 27, 27,
- 27, 27, 26, 26, 26, 26, 26, 26, 26, 25, 24, 24, 24, 24, 24, 23, 23, 22,
- 22, 22, 22, 22, 22, 22, 22, 22, 27, 26, 26, 26, 26, 26, 26, 26, 26, 26,
- 25, 25, 25, 25, 25, 24, 24, 23, 23, 23, 23, 23, 22, 22, 22, 22, 22, 22,
- 22, 22, 21, 21 },
- },
- {
- { /* Luma */
- /* Size 4x4 */
- 33, 33, 33, 32, 33, 32, 32, 32, 33, 32, 32, 32, 32, 32, 32, 31,
- /* Size 8x8 */
- 33, 33, 33, 33, 33, 33, 32, 32, 33, 32, 32, 32, 32, 32, 32, 32, 33, 32,
- 32, 32, 32, 32, 32, 32, 33, 32, 32, 32, 32, 32, 32, 32, 33, 32, 32, 32,
- 32, 32, 32, 32, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 31, 31, 32, 32, 32, 32, 32, 32, 31, 31,
- /* Size 16x16 */
- 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 33, 33, 33, 33,
- 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 33, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 33, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 31, 31, 31, 31, 31, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 31, 31, 31, 31, 31, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31,
- 31, 31, 31, 31,
- /* Size 32x32 */
- 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33,
- 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33,
- 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33,
- 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 31, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 33, 33,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 33, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31,
- 31, 31, 31, 31, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 33, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 33, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- /* Size 4x8 */
- 33, 33, 33, 32, 33, 32, 32, 32, 33, 32, 32, 32, 33, 32, 32, 32, 33, 32,
- 32, 32, 33, 32, 32, 31, 32, 32, 32, 31, 32, 32, 32, 31,
- /* Size 8x4 */
- 33, 33, 33, 33, 33, 33, 32, 32, 33, 32, 32, 32, 32, 32, 32, 32, 33, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31,
- /* Size 8x16 */
- 32, 33, 33, 33, 33, 33, 33, 32, 33, 33, 33, 33, 33, 33, 32, 32, 33, 33,
- 32, 32, 32, 32, 32, 32, 33, 32, 32, 32, 32, 32, 32, 32, 33, 32, 32, 32,
- 32, 32, 32, 32, 33, 32, 32, 32, 32, 32, 32, 32, 33, 32, 32, 32, 32, 32,
- 32, 32, 33, 32, 32, 32, 32, 32, 32, 32, 33, 32, 32, 32, 32, 32, 32, 32,
- 33, 32, 32, 32, 32, 32, 32, 32, 33, 32, 32, 32, 32, 32, 32, 32, 33, 32,
- 32, 32, 32, 32, 31, 31, 33, 32, 32, 32, 32, 32, 31, 31, 32, 32, 32, 32,
- 32, 32, 31, 30, 32, 32, 32, 32, 32, 32, 31, 30, 32, 32, 32, 32, 32, 32,
- 31, 30,
- /* Size 16x8 */
- 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 33, 33,
- 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31,
- 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 30,
- 30, 30,
- /* Size 16x32 */
- 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 33, 33, 33, 33, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 31, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 31, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31,
- 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 33, 33,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 33, 33, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 33, 33, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 33, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 31, 31, 31, 31, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 31, 31, 31, 31, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31,
- 31, 31, 31, 30, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31,
- 30, 30, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 30, 30,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 30, 30, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 30, 30, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 30, 30, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 31, 31, 31, 30, 30, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 31, 31, 31, 30, 30,
- /* Size 32x16 */
- 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33,
- 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31,
- 31, 31, 31, 31, 31, 31, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31,
- 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, 30, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31,
- 30, 30, 30, 30, 30, 30, 30, 30,
- /* Size 4x16 */
- 33, 33, 33, 32, 33, 33, 33, 32, 33, 32, 32, 32, 33, 32, 32, 32, 33, 32,
- 32, 32, 33, 32, 32, 32, 33, 32, 32, 32, 33, 32, 32, 32, 33, 32, 32, 32,
- 33, 32, 32, 32, 33, 32, 32, 32, 32, 32, 32, 31, 32, 32, 32, 31, 32, 32,
- 32, 31, 32, 32, 32, 31, 32, 32, 32, 31,
- /* Size 16x4 */
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 33, 33,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 31, 31, 31, 31, 31,
- /* Size 8x32 */
- 32, 33, 33, 33, 33, 33, 33, 32, 33, 33, 33, 33, 33, 33, 32, 32, 33, 33,
- 33, 33, 33, 33, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32,
- 32, 32, 32, 32, 33, 32, 32, 32, 32, 32, 32, 32, 33, 32, 32, 32, 32, 32,
- 32, 32, 33, 32, 32, 32, 32, 32, 32, 32, 33, 32, 32, 32, 32, 32, 32, 32,
- 33, 32, 32, 32, 32, 32, 32, 32, 33, 32, 32, 32, 32, 32, 32, 32, 33, 32,
- 32, 32, 32, 32, 32, 32, 33, 32, 32, 32, 32, 32, 32, 32, 33, 32, 32, 32,
- 32, 32, 32, 32, 33, 32, 32, 32, 32, 32, 32, 32, 33, 32, 32, 32, 32, 32,
- 32, 32, 33, 32, 32, 32, 32, 32, 32, 32, 33, 32, 32, 32, 32, 32, 32, 32,
- 33, 32, 32, 32, 32, 32, 32, 32, 33, 32, 32, 32, 32, 32, 32, 32, 33, 32,
- 32, 32, 32, 32, 32, 32, 33, 32, 32, 32, 32, 32, 32, 31, 33, 32, 32, 32,
- 32, 32, 31, 31, 33, 32, 32, 32, 32, 32, 31, 31, 33, 32, 32, 32, 32, 32,
- 31, 31, 32, 32, 32, 32, 32, 32, 31, 30, 32, 32, 32, 32, 32, 32, 31, 30,
- 32, 32, 32, 32, 32, 32, 31, 30, 32, 32, 32, 32, 32, 32, 31, 30, 32, 32,
- 32, 32, 32, 32, 31, 30, 32, 32, 32, 32, 32, 32, 31, 30, 32, 32, 32, 32,
- 32, 32, 31, 30,
- /* Size 32x8 */
- 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33,
- 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33,
- 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 30,
- 30, 30, 30, 30 },
- { /* Chroma */
- /* Size 4x4 */
- 33, 33, 33, 30, 33, 33, 33, 29, 33, 33, 32, 29, 30, 29, 29, 26,
- /* Size 8x8 */
- 33, 33, 33, 33, 34, 33, 31, 31, 33, 33, 33, 33, 33, 32, 30, 30, 33, 33,
- 33, 33, 33, 32, 30, 30, 33, 33, 33, 33, 33, 32, 29, 29, 34, 33, 33, 33,
- 32, 32, 29, 29, 33, 32, 32, 32, 32, 31, 28, 28, 31, 30, 30, 29, 29, 28,
- 26, 26, 31, 30, 30, 29, 29, 28, 26, 26,
- /* Size 16x16 */
- 32, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 33, 31, 31, 31, 31, 33, 33,
- 33, 33, 33, 33, 33, 33, 34, 34, 34, 32, 31, 30, 30, 30, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 32, 31, 30, 30, 30, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 32, 30, 30, 30, 30, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 32, 30, 30, 30, 30, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 32, 30, 30, 30, 30, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 31,
- 30, 29, 29, 29, 34, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 31, 30, 29,
- 29, 29, 34, 34, 33, 33, 33, 33, 33, 33, 32, 32, 32, 31, 30, 29, 29, 29,
- 34, 34, 33, 33, 33, 33, 33, 33, 32, 32, 32, 31, 30, 29, 29, 29, 34, 34,
- 33, 33, 33, 33, 33, 33, 32, 32, 32, 31, 30, 29, 29, 29, 33, 32, 32, 32,
- 32, 32, 31, 31, 31, 31, 31, 30, 28, 28, 28, 28, 31, 31, 31, 30, 30, 30,
- 30, 30, 30, 30, 30, 28, 28, 27, 27, 27, 31, 30, 30, 30, 30, 30, 29, 29,
- 29, 29, 29, 28, 27, 26, 26, 26, 31, 30, 30, 30, 30, 30, 29, 29, 29, 29,
- 29, 28, 27, 26, 26, 26, 31, 30, 30, 30, 30, 30, 29, 29, 29, 29, 29, 28,
- 27, 26, 26, 26,
- /* Size 32x32 */
- 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34,
- 34, 34, 34, 33, 33, 32, 31, 31, 31, 31, 31, 31, 31, 30, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 34, 33,
- 33, 32, 31, 31, 31, 31, 31, 31, 31, 30, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 33, 32, 32, 31, 30,
- 30, 30, 30, 30, 30, 30, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 31, 31, 30, 30, 30, 30, 30,
- 30, 29, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 32, 31, 31, 30, 30, 30, 30, 30, 30, 29, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 32, 32, 31, 30, 30, 30, 30, 30, 30, 30, 29, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 31,
- 30, 30, 30, 30, 30, 30, 30, 29, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 31, 30, 30, 30, 30,
- 30, 30, 30, 29, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 32, 32, 31, 30, 30, 30, 30, 30, 30, 30, 29,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 32, 32, 31, 30, 30, 30, 30, 30, 30, 30, 29, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32,
- 32, 31, 30, 30, 30, 30, 30, 30, 30, 29, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 31, 30, 29,
- 29, 29, 29, 29, 29, 29, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 31, 31, 30, 29, 29, 29, 29, 29,
- 29, 29, 34, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 32, 31, 31, 30, 29, 29, 29, 29, 29, 29, 28, 34, 34,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 32, 31, 30, 30, 29, 29, 29, 29, 29, 29, 28, 34, 34, 34, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 31, 30,
- 30, 29, 29, 29, 29, 29, 29, 28, 34, 34, 34, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 31, 30, 30, 29, 29, 29,
- 29, 29, 29, 28, 34, 34, 34, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 32, 32, 32, 32, 32, 32, 32, 31, 30, 30, 29, 29, 29, 29, 29, 29, 28,
- 34, 34, 34, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32,
- 32, 32, 32, 32, 31, 30, 30, 29, 29, 29, 29, 29, 29, 28, 34, 34, 34, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32,
- 31, 30, 30, 29, 29, 29, 29, 29, 29, 28, 34, 34, 34, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 31, 30, 30, 29,
- 29, 29, 29, 29, 29, 28, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 30, 30, 29, 28, 28, 28, 28, 28,
- 28, 28, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 31, 31,
- 31, 31, 31, 31, 31, 30, 30, 29, 28, 28, 28, 28, 28, 28, 28, 28, 32, 32,
- 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 30,
- 30, 30, 29, 28, 28, 28, 28, 28, 28, 28, 28, 27, 31, 31, 31, 31, 31, 30,
- 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 29, 28, 28,
- 28, 27, 27, 27, 27, 27, 27, 26, 31, 31, 30, 30, 30, 30, 30, 30, 30, 30,
- 30, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 28, 28, 28, 27, 26, 26, 26,
- 26, 26, 26, 26, 31, 31, 30, 30, 30, 30, 30, 30, 30, 30, 30, 29, 29, 29,
- 29, 29, 29, 29, 29, 29, 29, 28, 28, 28, 27, 26, 26, 26, 26, 26, 26, 26,
- 31, 31, 30, 30, 30, 30, 30, 30, 30, 30, 30, 29, 29, 29, 29, 29, 29, 29,
- 29, 29, 29, 28, 28, 28, 27, 26, 26, 26, 26, 26, 26, 26, 31, 31, 30, 30,
- 30, 30, 30, 30, 30, 30, 30, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 28,
- 28, 28, 27, 26, 26, 26, 26, 26, 26, 26, 31, 31, 30, 30, 30, 30, 30, 30,
- 30, 30, 30, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 28, 28, 28, 27, 26,
- 26, 26, 26, 26, 26, 26, 31, 31, 30, 30, 30, 30, 30, 30, 30, 30, 30, 29,
- 29, 29, 29, 29, 29, 29, 29, 29, 29, 28, 28, 28, 27, 26, 26, 26, 26, 26,
- 26, 26, 30, 30, 30, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 28, 28, 28,
- 28, 28, 28, 28, 28, 28, 28, 27, 26, 26, 26, 26, 26, 26, 26, 26,
- /* Size 4x8 */
- 33, 33, 33, 30, 33, 33, 33, 29, 33, 33, 33, 29, 33, 32, 32, 28, 33, 32,
- 32, 28, 33, 31, 31, 28, 30, 28, 28, 26, 30, 28, 28, 26,
- /* Size 8x4 */
- 33, 33, 33, 33, 33, 33, 30, 30, 33, 33, 33, 32, 32, 31, 28, 28, 33, 33,
- 33, 32, 32, 31, 28, 28, 30, 29, 29, 28, 28, 28, 26, 26,
- /* Size 8x16 */
- 32, 33, 33, 33, 33, 33, 31, 29, 33, 33, 33, 33, 33, 33, 31, 28, 33, 33,
- 33, 33, 33, 33, 30, 28, 33, 33, 33, 33, 33, 33, 30, 28, 33, 33, 33, 33,
- 33, 33, 30, 28, 33, 33, 33, 33, 33, 33, 30, 28, 33, 33, 33, 32, 32, 32,
- 30, 28, 34, 33, 33, 32, 32, 32, 30, 27, 34, 33, 32, 32, 32, 32, 29, 27,
- 34, 33, 32, 32, 32, 32, 29, 27, 34, 33, 32, 32, 32, 32, 29, 27, 33, 32,
- 31, 31, 31, 31, 28, 26, 31, 30, 30, 29, 29, 29, 28, 26, 31, 30, 29, 28,
- 28, 28, 27, 25, 31, 30, 29, 28, 28, 28, 27, 25, 31, 30, 29, 28, 28, 28,
- 27, 25,
- /* Size 16x8 */
- 32, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 33, 31, 31, 31, 31, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 30, 30, 30, 30, 33, 33, 33, 33,
- 33, 33, 33, 33, 32, 32, 32, 31, 30, 29, 29, 29, 33, 33, 33, 33, 33, 33,
- 32, 32, 32, 32, 32, 31, 29, 28, 28, 28, 33, 33, 33, 33, 33, 33, 32, 32,
- 32, 32, 32, 31, 29, 28, 28, 28, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32,
- 32, 31, 29, 28, 28, 28, 31, 31, 30, 30, 30, 30, 30, 30, 29, 29, 29, 28,
- 28, 27, 27, 27, 29, 28, 28, 28, 28, 28, 28, 27, 27, 27, 27, 26, 26, 25,
- 25, 25,
- /* Size 16x32 */
- 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 31, 30, 29, 28, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 31, 30, 29, 28, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 32, 31, 30, 28, 28, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 32, 31, 29, 28, 27, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 32, 30, 29, 28, 27, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 31, 30, 29, 28, 27, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 31,
- 30, 29, 28, 27, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 31, 30, 29,
- 28, 27, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 31, 30, 29, 28, 27,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 31, 30, 29, 28, 27, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 31, 30, 29, 28, 27, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 31, 30, 29, 28, 27, 33, 33, 33, 33, 33, 32,
- 32, 32, 32, 32, 32, 31, 30, 28, 28, 26, 33, 33, 33, 33, 33, 32, 32, 32,
- 32, 32, 32, 31, 30, 28, 28, 26, 34, 33, 33, 33, 33, 32, 32, 32, 32, 32,
- 32, 31, 30, 28, 27, 26, 34, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 31,
- 29, 28, 27, 26, 34, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 31, 29, 28,
- 27, 26, 34, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 31, 29, 28, 27, 26,
- 34, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 31, 29, 28, 27, 26, 34, 33,
- 33, 33, 32, 32, 32, 32, 32, 32, 32, 31, 29, 28, 27, 26, 34, 33, 33, 33,
- 32, 32, 32, 32, 32, 32, 32, 31, 29, 28, 27, 26, 33, 33, 33, 32, 32, 31,
- 31, 31, 31, 31, 31, 30, 29, 28, 27, 26, 33, 32, 32, 31, 31, 31, 31, 31,
- 31, 31, 31, 29, 28, 28, 26, 25, 32, 32, 31, 31, 30, 30, 30, 30, 30, 30,
- 30, 29, 28, 27, 26, 25, 31, 31, 30, 30, 30, 29, 29, 29, 29, 29, 29, 28,
- 28, 26, 26, 24, 31, 30, 30, 29, 29, 28, 28, 28, 28, 28, 28, 28, 27, 26,
- 25, 24, 31, 30, 30, 29, 29, 28, 28, 28, 28, 28, 28, 28, 27, 26, 25, 24,
- 31, 30, 30, 29, 29, 28, 28, 28, 28, 28, 28, 28, 27, 26, 25, 24, 31, 30,
- 30, 29, 29, 28, 28, 28, 28, 28, 28, 28, 27, 26, 25, 24, 31, 30, 30, 29,
- 29, 28, 28, 28, 28, 28, 28, 28, 27, 26, 25, 24, 31, 30, 30, 29, 29, 28,
- 28, 28, 28, 28, 28, 28, 27, 26, 25, 24, 30, 30, 29, 29, 28, 28, 28, 28,
- 28, 28, 28, 27, 26, 26, 24, 23,
- /* Size 32x16 */
- 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34,
- 34, 34, 34, 33, 33, 32, 31, 31, 31, 31, 31, 31, 31, 30, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 32, 32, 31, 30, 30, 30, 30, 30, 30, 30, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 31, 30, 30,
- 30, 30, 30, 30, 30, 29, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 31, 31, 30, 29, 29, 29, 29, 29,
- 29, 29, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32,
- 32, 32, 32, 32, 32, 32, 31, 30, 30, 29, 29, 29, 29, 29, 29, 28, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 31, 31, 30, 29, 28, 28, 28, 28, 28, 28, 28, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 30,
- 29, 28, 28, 28, 28, 28, 28, 28, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 30, 29, 28, 28, 28,
- 28, 28, 28, 28, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 31, 31, 30, 29, 28, 28, 28, 28, 28, 28, 28,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 31, 31, 30, 29, 28, 28, 28, 28, 28, 28, 28, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31,
- 31, 30, 29, 28, 28, 28, 28, 28, 28, 28, 32, 32, 32, 32, 32, 31, 31, 31,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 30, 29, 29, 28, 28,
- 28, 28, 28, 28, 28, 27, 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, 30, 30,
- 30, 30, 30, 29, 29, 29, 29, 29, 29, 29, 28, 28, 28, 27, 27, 27, 27, 27,
- 27, 26, 30, 30, 30, 29, 29, 29, 29, 29, 29, 29, 29, 29, 28, 28, 28, 28,
- 28, 28, 28, 28, 28, 28, 28, 27, 26, 26, 26, 26, 26, 26, 26, 26, 29, 29,
- 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 27, 27, 27, 27, 27, 27,
- 27, 27, 26, 26, 26, 25, 25, 25, 25, 25, 25, 24, 28, 28, 28, 27, 27, 27,
- 27, 27, 27, 27, 27, 27, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 25, 25,
- 24, 24, 24, 24, 24, 24, 24, 23,
- /* Size 4x16 */
- 33, 33, 33, 30, 33, 33, 33, 30, 33, 33, 33, 29, 33, 33, 33, 29, 33, 33,
- 33, 29, 33, 33, 33, 29, 33, 32, 32, 28, 33, 32, 32, 28, 33, 32, 32, 28,
- 33, 32, 32, 28, 33, 32, 32, 28, 32, 31, 31, 28, 31, 29, 29, 26, 30, 28,
- 28, 26, 30, 28, 28, 26, 30, 28, 28, 26,
- /* Size 16x4 */
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 31, 30, 30, 30, 33, 33,
- 33, 33, 33, 33, 32, 32, 32, 32, 32, 31, 29, 28, 28, 28, 33, 33, 33, 33,
- 33, 33, 32, 32, 32, 32, 32, 31, 29, 28, 28, 28, 30, 30, 29, 29, 29, 29,
- 28, 28, 28, 28, 28, 28, 26, 26, 26, 26,
- /* Size 8x32 */
- 32, 33, 33, 33, 33, 33, 31, 29, 33, 33, 33, 33, 33, 33, 31, 29, 33, 33,
- 33, 33, 33, 33, 31, 28, 33, 33, 33, 33, 33, 33, 31, 28, 33, 33, 33, 33,
- 33, 33, 30, 28, 33, 33, 33, 33, 33, 33, 30, 28, 33, 33, 33, 33, 33, 33,
- 30, 28, 33, 33, 33, 33, 33, 33, 30, 28, 33, 33, 33, 33, 33, 33, 30, 28,
- 33, 33, 33, 33, 33, 33, 30, 28, 33, 33, 33, 33, 33, 33, 30, 28, 33, 33,
- 33, 33, 33, 33, 30, 28, 33, 33, 33, 32, 32, 32, 30, 28, 33, 33, 33, 32,
- 32, 32, 30, 28, 34, 33, 33, 32, 32, 32, 30, 27, 34, 33, 32, 32, 32, 32,
- 29, 27, 34, 33, 32, 32, 32, 32, 29, 27, 34, 33, 32, 32, 32, 32, 29, 27,
- 34, 33, 32, 32, 32, 32, 29, 27, 34, 33, 32, 32, 32, 32, 29, 27, 34, 33,
- 32, 32, 32, 32, 29, 27, 33, 33, 32, 31, 31, 31, 29, 27, 33, 32, 31, 31,
- 31, 31, 28, 26, 32, 31, 30, 30, 30, 30, 28, 26, 31, 30, 30, 29, 29, 29,
- 28, 26, 31, 30, 29, 28, 28, 28, 27, 25, 31, 30, 29, 28, 28, 28, 27, 25,
- 31, 30, 29, 28, 28, 28, 27, 25, 31, 30, 29, 28, 28, 28, 27, 25, 31, 30,
- 29, 28, 28, 28, 27, 25, 31, 30, 29, 28, 28, 28, 27, 25, 30, 29, 28, 28,
- 28, 28, 26, 24,
- /* Size 32x8 */
- 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34,
- 34, 34, 34, 33, 33, 32, 31, 31, 31, 31, 31, 31, 31, 30, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 32, 31, 30, 30, 30, 30, 30, 30, 30, 29, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 31, 30, 30, 29,
- 29, 29, 29, 29, 29, 28, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 31, 30, 29, 28, 28, 28, 28, 28,
- 28, 28, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 31, 31, 30, 29, 28, 28, 28, 28, 28, 28, 28, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 31, 31, 30, 29, 28, 28, 28, 28, 28, 28, 28, 31, 31, 31, 31, 30, 30,
- 30, 30, 30, 30, 30, 30, 30, 30, 30, 29, 29, 29, 29, 29, 29, 29, 28, 28,
- 28, 27, 27, 27, 27, 27, 27, 26, 29, 29, 28, 28, 28, 28, 28, 28, 28, 28,
- 28, 28, 28, 28, 27, 27, 27, 27, 27, 27, 27, 27, 26, 26, 26, 25, 25, 25,
- 25, 25, 25, 24 },
- },
- {
- { /* Luma */
- /* Size 4x4 */
- 33, 33, 33, 33, 33, 32, 32, 32, 33, 32, 32, 32, 33, 32, 32, 32,
- /* Size 8x8 */
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32,
- 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32,
- 32, 32, 33, 33, 32, 32, 32, 32, 32, 32,
- /* Size 16x16 */
- 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33,
- 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32,
- /* Size 32x32 */
- 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32,
- 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33,
- 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33,
- 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33,
- 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- /* Size 4x8 */
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 33, 32, 32, 32, 33, 32,
- 32, 32, 33, 32, 32, 32, 33, 32, 32, 32, 33, 32, 32, 32,
- /* Size 8x4 */
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 33, 33,
- 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32,
- /* Size 8x16 */
- 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 33, 33, 33, 32,
- 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32,
- 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32,
- 33, 33, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 33, 33,
- 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32,
- 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32,
- 32, 32,
- /* Size 16x8 */
- 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32,
- /* Size 16x32 */
- 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32,
- 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33,
- 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33,
- 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33,
- 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32,
- /* Size 32x16 */
- 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32,
- 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33,
- 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33,
- 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33,
- 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33,
- 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32,
- /* Size 4x16 */
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 33, 32,
- 32, 32, 33, 32, 32, 32, 33, 32, 32, 32, 33, 32, 32, 32, 33, 32, 32, 32,
- 33, 32, 32, 32, 33, 32, 32, 32, 33, 32, 32, 32, 33, 32, 32, 32, 33, 32,
- 32, 32, 33, 32, 32, 32, 33, 32, 32, 32,
- /* Size 16x4 */
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- /* Size 8x32 */
- 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32,
- 32, 32, 33, 33, 33, 32, 32, 32, 32, 32, 33, 33, 33, 32, 32, 32, 32, 32,
- 33, 33, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 33, 33,
- 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32,
- 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32,
- 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32,
- 33, 33, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 33, 33,
- 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32,
- 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32,
- 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32,
- 33, 33, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 33, 33,
- 32, 32, 32, 32, 32, 32, 33, 33, 32, 32, 32, 32, 32, 32, 33, 33, 32, 32,
- 32, 32, 32, 32,
- /* Size 32x8 */
- 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33,
- 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32 },
- { /* Chroma */
- /* Size 4x4 */
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- /* Size 8x8 */
- 33, 33, 33, 33, 33, 33, 33, 34, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 34, 33, 33, 33, 33, 33, 33, 33,
- /* Size 16x16 */
- 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 34, 34, 34, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 32,
- /* Size 32x32 */
- 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 34, 34, 34, 34, 34, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34,
- 34, 34, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 34, 34, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 34, 34, 34, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 32, 32, 34, 34, 34, 34, 34, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32,
- /* Size 4x8 */
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 34, 33, 32, 32,
- /* Size 8x4 */
- 33, 33, 33, 33, 33, 33, 33, 34, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 32, 32, 33, 33, 33, 33, 33, 33, 32, 32,
- /* Size 8x16 */
- 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 33, 33, 33, 33,
- 33, 32, 32, 32, 34, 33, 33, 33, 33, 32, 32, 32, 34, 33, 33, 33, 32, 32,
- 32, 32,
- /* Size 16x8 */
- 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32,
- 32, 32,
- /* Size 16x32 */
- 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32,
- 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32,
- 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32,
- 34, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 34, 33,
- 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 34, 34, 33, 33,
- 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 34, 34, 33, 33, 33, 33,
- 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 34, 34, 33, 33, 33, 33, 33, 33,
- 32, 32, 32, 32, 32, 32, 32, 32,
- /* Size 32x16 */
- 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32,
- 32, 32, 32, 32, 32, 32, 32, 32,
- /* Size 4x16 */
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 33, 33,
- 32, 32, 33, 33, 32, 32, 34, 33, 32, 32,
- /* Size 16x4 */
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 32, 32, 32, 32,
- /* Size 8x32 */
- 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 33, 33, 33, 33, 33, 32,
- 32, 32, 33, 33, 33, 33, 33, 32, 32, 32, 33, 33, 33, 33, 33, 32, 32, 32,
- 34, 33, 33, 33, 33, 32, 32, 32, 34, 33, 33, 33, 33, 32, 32, 32, 34, 33,
- 33, 33, 32, 32, 32, 32, 34, 33, 33, 33, 32, 32, 32, 32, 34, 33, 33, 33,
- 32, 32, 32, 32,
- /* Size 32x8 */
- 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 32, 32, 32, 32, 32,
- 32, 32, 32, 32 },
- },
- {
- { /* Luma */
- /* Size 4x4 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- /* Size 8x8 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- /* Size 16x16 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32,
- /* Size 32x32 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- /* Size 4x8 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- /* Size 8x4 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- /* Size 8x16 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32,
- /* Size 16x8 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32,
- /* Size 16x32 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32,
- /* Size 32x16 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32,
- /* Size 4x16 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- /* Size 16x4 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- /* Size 8x32 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32,
- /* Size 32x8 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32 },
- { /* Chroma */
- /* Size 4x4 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- /* Size 8x8 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- /* Size 16x16 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32,
- /* Size 32x32 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- /* Size 4x8 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- /* Size 8x4 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- /* Size 8x16 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32,
- /* Size 16x8 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32,
- /* Size 16x32 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32,
- /* Size 32x16 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32,
- /* Size 4x16 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- /* Size 16x4 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- /* Size 8x32 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32,
- /* Size 32x8 */
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
- 32, 32, 32, 32 },
- },
-};
diff --git a/third_party/aom/av1/common/quant_common.h b/third_party/aom/av1/common/quant_common.h
deleted file mode 100644
index d1f52a660..000000000
--- a/third_party/aom/av1/common/quant_common.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_COMMON_QUANT_COMMON_H_
-#define AOM_AV1_COMMON_QUANT_COMMON_H_
-
-#include "aom/aom_codec.h"
-#include "av1/common/seg_common.h"
-#include "av1/common/enums.h"
-#include "av1/common/entropy.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define MINQ 0
-#define MAXQ 255
-#define QINDEX_RANGE (MAXQ - MINQ + 1)
-#define QINDEX_BITS 8
-// Total number of QM sets stored
-#define QM_LEVEL_BITS 4
-#define NUM_QM_LEVELS (1 << QM_LEVEL_BITS)
-/* Range of QMS is between first and last value, with offset applied to inter
- * blocks*/
-#define DEFAULT_QM_Y 10
-#define DEFAULT_QM_U 11
-#define DEFAULT_QM_V 12
-#define DEFAULT_QM_FIRST 5
-#define DEFAULT_QM_LAST 9
-
-struct AV1Common;
-
-int16_t av1_dc_quant_Q3(int qindex, int delta, aom_bit_depth_t bit_depth);
-int16_t av1_ac_quant_Q3(int qindex, int delta, aom_bit_depth_t bit_depth);
-int16_t av1_dc_quant_QTX(int qindex, int delta, aom_bit_depth_t bit_depth);
-int16_t av1_ac_quant_QTX(int qindex, int delta, aom_bit_depth_t bit_depth);
-
-int av1_get_qindex(const struct segmentation *seg, int segment_id,
- int base_qindex);
-// Reduce the large number of quantizers to a smaller number of levels for which
-// different matrices may be defined
-static INLINE int aom_get_qmlevel(int qindex, int first, int last) {
- return first + (qindex * (last + 1 - first)) / QINDEX_RANGE;
-}
-void av1_qm_init(struct AV1Common *cm);
-const qm_val_t *av1_iqmatrix(struct AV1Common *cm, int qindex, int comp,
- TX_SIZE tx_size);
-const qm_val_t *av1_qmatrix(struct AV1Common *cm, int qindex, int comp,
- TX_SIZE tx_size);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_COMMON_QUANT_COMMON_H_
diff --git a/third_party/aom/av1/common/reconinter.c b/third_party/aom/av1/common/reconinter.c
deleted file mode 100644
index 3203efce4..000000000
--- a/third_party/aom/av1/common/reconinter.c
+++ /dev/null
@@ -1,1162 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <stdio.h>
-#include <limits.h>
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-#include "config/aom_scale_rtcd.h"
-
-#include "aom/aom_integer.h"
-#include "aom_dsp/blend.h"
-
-#include "av1/common/blockd.h"
-#include "av1/common/mvref_common.h"
-#include "av1/common/reconinter.h"
-#include "av1/common/reconintra.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/common/obmc.h"
-
-#define USE_PRECOMPUTED_WEDGE_MASK 1
-#define USE_PRECOMPUTED_WEDGE_SIGN 1
-
-// This function will determine whether or not to create a warped
-// prediction.
-int av1_allow_warp(const MB_MODE_INFO *const mbmi,
- const WarpTypesAllowed *const warp_types,
- const WarpedMotionParams *const gm_params,
- int build_for_obmc, int x_scale, int y_scale,
- WarpedMotionParams *final_warp_params) {
- if (x_scale != SCALE_SUBPEL_SHIFTS || y_scale != SCALE_SUBPEL_SHIFTS)
- return 0;
-
- if (final_warp_params != NULL) *final_warp_params = default_warp_params;
-
- if (build_for_obmc) return 0;
-
- if (warp_types->local_warp_allowed && !mbmi->wm_params.invalid) {
- if (final_warp_params != NULL)
- memcpy(final_warp_params, &mbmi->wm_params, sizeof(*final_warp_params));
- return 1;
- } else if (warp_types->global_warp_allowed && !gm_params->invalid) {
- if (final_warp_params != NULL)
- memcpy(final_warp_params, gm_params, sizeof(*final_warp_params));
- return 1;
- }
-
- return 0;
-}
-
-void av1_make_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst,
- int dst_stride, const SubpelParams *subpel_params,
- const struct scale_factors *sf, int w, int h,
- ConvolveParams *conv_params,
- InterpFilters interp_filters,
- const WarpTypesAllowed *warp_types, int p_col,
- int p_row, int plane, int ref,
- const MB_MODE_INFO *mi, int build_for_obmc,
- const MACROBLOCKD *xd, int can_use_previous) {
- // Make sure the selected motion mode is valid for this configuration
- assert_motion_mode_valid(mi->motion_mode, xd->global_motion, xd, mi,
- can_use_previous);
- assert(IMPLIES(conv_params->is_compound, conv_params->dst != NULL));
-
- WarpedMotionParams final_warp_params;
- const int do_warp =
- (w >= 8 && h >= 8 &&
- av1_allow_warp(mi, warp_types, &xd->global_motion[mi->ref_frame[ref]],
- build_for_obmc, subpel_params->xs, subpel_params->ys,
- &final_warp_params));
- const int is_intrabc = mi->use_intrabc;
- assert(IMPLIES(is_intrabc, !do_warp));
-
- if (do_warp && xd->cur_frame_force_integer_mv == 0) {
- const struct macroblockd_plane *const pd = &xd->plane[plane];
- const struct buf_2d *const pre_buf = &pd->pre[ref];
- av1_warp_plane(&final_warp_params,
- xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, xd->bd,
- pre_buf->buf0, pre_buf->width, pre_buf->height,
- pre_buf->stride, dst, p_col, p_row, w, h, dst_stride,
- pd->subsampling_x, pd->subsampling_y, conv_params);
- } else if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_params, sf,
- w, h, conv_params, interp_filters, is_intrabc,
- xd->bd);
- } else {
- inter_predictor(src, src_stride, dst, dst_stride, subpel_params, sf, w, h,
- conv_params, interp_filters, is_intrabc);
- }
-}
-
-#if USE_PRECOMPUTED_WEDGE_MASK
-static const uint8_t wedge_master_oblique_odd[MASK_MASTER_SIZE] = {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 6, 18,
- 37, 53, 60, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
- 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
-};
-static const uint8_t wedge_master_oblique_even[MASK_MASTER_SIZE] = {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 4, 11, 27,
- 46, 58, 62, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
- 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
-};
-static const uint8_t wedge_master_vertical[MASK_MASTER_SIZE] = {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 7, 21,
- 43, 57, 62, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
- 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
-};
-
-static void shift_copy(const uint8_t *src, uint8_t *dst, int shift, int width) {
- if (shift >= 0) {
- memcpy(dst + shift, src, width - shift);
- memset(dst, src[0], shift);
- } else {
- shift = -shift;
- memcpy(dst, src + shift, width - shift);
- memset(dst + width - shift, src[width - 1], shift);
- }
-}
-#endif // USE_PRECOMPUTED_WEDGE_MASK
-
-#if USE_PRECOMPUTED_WEDGE_SIGN
-/* clang-format off */
-DECLARE_ALIGNED(16, static uint8_t,
- wedge_signflip_lookup[BLOCK_SIZES_ALL][MAX_WEDGE_TYPES]) = {
- { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
- { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
- { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
- { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
- { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
- { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
- { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
- { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
- { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
- { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
- { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
- { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
- { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
- { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
- { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
- { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
- { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
- { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
- { 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, },
- { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, },
- { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
- { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
-};
-/* clang-format on */
-#else
-DECLARE_ALIGNED(16, static uint8_t,
- wedge_signflip_lookup[BLOCK_SIZES_ALL][MAX_WEDGE_TYPES]);
-#endif // USE_PRECOMPUTED_WEDGE_SIGN
-
-// [negative][direction]
-DECLARE_ALIGNED(
- 16, static uint8_t,
- wedge_mask_obl[2][WEDGE_DIRECTIONS][MASK_MASTER_SIZE * MASK_MASTER_SIZE]);
-
-// 4 * MAX_WEDGE_SQUARE is an easy to compute and fairly tight upper bound
-// on the sum of all mask sizes up to an including MAX_WEDGE_SQUARE.
-DECLARE_ALIGNED(16, static uint8_t,
- wedge_mask_buf[2 * MAX_WEDGE_TYPES * 4 * MAX_WEDGE_SQUARE]);
-
-static wedge_masks_type wedge_masks[BLOCK_SIZES_ALL][2];
-
-static const wedge_code_type wedge_codebook_16_hgtw[16] = {
- { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 },
- { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
- { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 },
- { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 4, 4 },
- { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 },
- { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
- { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 },
- { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
-};
-
-static const wedge_code_type wedge_codebook_16_hltw[16] = {
- { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 },
- { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
- { WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 4, 4 },
- { WEDGE_VERTICAL, 6, 4 }, { WEDGE_HORIZONTAL, 4, 4 },
- { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 },
- { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
- { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 },
- { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
-};
-
-static const wedge_code_type wedge_codebook_16_heqw[16] = {
- { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 },
- { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
- { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
- { WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 6, 4 },
- { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 },
- { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
- { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 },
- { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
-};
-
-const wedge_params_type wedge_params_lookup[BLOCK_SIZES_ALL] = {
- { 0, NULL, NULL, NULL },
- { 0, NULL, NULL, NULL },
- { 0, NULL, NULL, NULL },
- { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_8X8],
- wedge_masks[BLOCK_8X8] },
- { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X16],
- wedge_masks[BLOCK_8X16] },
- { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_16X8],
- wedge_masks[BLOCK_16X8] },
- { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_16X16],
- wedge_masks[BLOCK_16X16] },
- { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_16X32],
- wedge_masks[BLOCK_16X32] },
- { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X16],
- wedge_masks[BLOCK_32X16] },
- { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_32X32],
- wedge_masks[BLOCK_32X32] },
- { 0, NULL, NULL, NULL },
- { 0, NULL, NULL, NULL },
- { 0, NULL, NULL, NULL },
- { 0, NULL, NULL, NULL },
- { 0, NULL, NULL, NULL },
- { 0, NULL, NULL, NULL },
- { 0, NULL, NULL, NULL },
- { 0, NULL, NULL, NULL },
- { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X32],
- wedge_masks[BLOCK_8X32] },
- { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X8],
- wedge_masks[BLOCK_32X8] },
- { 0, NULL, NULL, NULL },
- { 0, NULL, NULL, NULL },
-};
-
-static const uint8_t *get_wedge_mask_inplace(int wedge_index, int neg,
- BLOCK_SIZE sb_type) {
- const uint8_t *master;
- const int bh = block_size_high[sb_type];
- const int bw = block_size_wide[sb_type];
- const wedge_code_type *a =
- wedge_params_lookup[sb_type].codebook + wedge_index;
- int woff, hoff;
- const uint8_t wsignflip = wedge_params_lookup[sb_type].signflip[wedge_index];
-
- assert(wedge_index >= 0 &&
- wedge_index < (1 << get_wedge_bits_lookup(sb_type)));
- woff = (a->x_offset * bw) >> 3;
- hoff = (a->y_offset * bh) >> 3;
- master = wedge_mask_obl[neg ^ wsignflip][a->direction] +
- MASK_MASTER_STRIDE * (MASK_MASTER_SIZE / 2 - hoff) +
- MASK_MASTER_SIZE / 2 - woff;
- return master;
-}
-
-const uint8_t *av1_get_compound_type_mask(
- const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type) {
- assert(is_masked_compound_type(comp_data->type));
- (void)sb_type;
- switch (comp_data->type) {
- case COMPOUND_WEDGE:
- return av1_get_contiguous_soft_mask(comp_data->wedge_index,
- comp_data->wedge_sign, sb_type);
- case COMPOUND_DIFFWTD: return comp_data->seg_mask;
- default: assert(0); return NULL;
- }
-}
-
-static void diffwtd_mask_d16(uint8_t *mask, int which_inverse, int mask_base,
- const CONV_BUF_TYPE *src0, int src0_stride,
- const CONV_BUF_TYPE *src1, int src1_stride, int h,
- int w, ConvolveParams *conv_params, int bd) {
- int round =
- 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1 + (bd - 8);
- int i, j, m, diff;
- for (i = 0; i < h; ++i) {
- for (j = 0; j < w; ++j) {
- diff = abs(src0[i * src0_stride + j] - src1[i * src1_stride + j]);
- diff = ROUND_POWER_OF_TWO(diff, round);
- m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
- mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
- }
- }
-}
-
-void av1_build_compound_diffwtd_mask_d16_c(
- uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0,
- int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w,
- ConvolveParams *conv_params, int bd) {
- switch (mask_type) {
- case DIFFWTD_38:
- diffwtd_mask_d16(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w,
- conv_params, bd);
- break;
- case DIFFWTD_38_INV:
- diffwtd_mask_d16(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w,
- conv_params, bd);
- break;
- default: assert(0);
- }
-}
-
-static void diffwtd_mask(uint8_t *mask, int which_inverse, int mask_base,
- const uint8_t *src0, int src0_stride,
- const uint8_t *src1, int src1_stride, int h, int w) {
- int i, j, m, diff;
- for (i = 0; i < h; ++i) {
- for (j = 0; j < w; ++j) {
- diff =
- abs((int)src0[i * src0_stride + j] - (int)src1[i * src1_stride + j]);
- m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
- mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
- }
- }
-}
-
-void av1_build_compound_diffwtd_mask_c(uint8_t *mask,
- DIFFWTD_MASK_TYPE mask_type,
- const uint8_t *src0, int src0_stride,
- const uint8_t *src1, int src1_stride,
- int h, int w) {
- switch (mask_type) {
- case DIFFWTD_38:
- diffwtd_mask(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w);
- break;
- case DIFFWTD_38_INV:
- diffwtd_mask(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w);
- break;
- default: assert(0);
- }
-}
-
-static AOM_FORCE_INLINE void diffwtd_mask_highbd(
- uint8_t *mask, int which_inverse, int mask_base, const uint16_t *src0,
- int src0_stride, const uint16_t *src1, int src1_stride, int h, int w,
- const unsigned int bd) {
- assert(bd >= 8);
- if (bd == 8) {
- if (which_inverse) {
- for (int i = 0; i < h; ++i) {
- for (int j = 0; j < w; ++j) {
- int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR;
- unsigned int m = negative_to_zero(mask_base + diff);
- m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
- mask[j] = AOM_BLEND_A64_MAX_ALPHA - m;
- }
- src0 += src0_stride;
- src1 += src1_stride;
- mask += w;
- }
- } else {
- for (int i = 0; i < h; ++i) {
- for (int j = 0; j < w; ++j) {
- int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR;
- unsigned int m = negative_to_zero(mask_base + diff);
- m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
- mask[j] = m;
- }
- src0 += src0_stride;
- src1 += src1_stride;
- mask += w;
- }
- }
- } else {
- const unsigned int bd_shift = bd - 8;
- if (which_inverse) {
- for (int i = 0; i < h; ++i) {
- for (int j = 0; j < w; ++j) {
- int diff =
- (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR;
- unsigned int m = negative_to_zero(mask_base + diff);
- m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
- mask[j] = AOM_BLEND_A64_MAX_ALPHA - m;
- }
- src0 += src0_stride;
- src1 += src1_stride;
- mask += w;
- }
- } else {
- for (int i = 0; i < h; ++i) {
- for (int j = 0; j < w; ++j) {
- int diff =
- (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR;
- unsigned int m = negative_to_zero(mask_base + diff);
- m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
- mask[j] = m;
- }
- src0 += src0_stride;
- src1 += src1_stride;
- mask += w;
- }
- }
- }
-}
-
-void av1_build_compound_diffwtd_mask_highbd_c(
- uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0,
- int src0_stride, const uint8_t *src1, int src1_stride, int h, int w,
- int bd) {
- switch (mask_type) {
- case DIFFWTD_38:
- diffwtd_mask_highbd(mask, 0, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
- CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd);
- break;
- case DIFFWTD_38_INV:
- diffwtd_mask_highbd(mask, 1, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
- CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd);
- break;
- default: assert(0);
- }
-}
-
-static void init_wedge_master_masks() {
- int i, j;
- const int w = MASK_MASTER_SIZE;
- const int h = MASK_MASTER_SIZE;
- const int stride = MASK_MASTER_STRIDE;
-// Note: index [0] stores the masters, and [1] its complement.
-#if USE_PRECOMPUTED_WEDGE_MASK
- // Generate prototype by shifting the masters
- int shift = h / 4;
- for (i = 0; i < h; i += 2) {
- shift_copy(wedge_master_oblique_even,
- &wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride], shift,
- MASK_MASTER_SIZE);
- shift--;
- shift_copy(wedge_master_oblique_odd,
- &wedge_mask_obl[0][WEDGE_OBLIQUE63][(i + 1) * stride], shift,
- MASK_MASTER_SIZE);
- memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][i * stride],
- wedge_master_vertical,
- MASK_MASTER_SIZE * sizeof(wedge_master_vertical[0]));
- memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][(i + 1) * stride],
- wedge_master_vertical,
- MASK_MASTER_SIZE * sizeof(wedge_master_vertical[0]));
- }
-#else
- static const double smoother_param = 2.85;
- const int a[2] = { 2, 1 };
- const double asqrt = sqrt(a[0] * a[0] + a[1] * a[1]);
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; ++j) {
- int x = (2 * j + 1 - w);
- int y = (2 * i + 1 - h);
- double d = (a[0] * x + a[1] * y) / asqrt;
- const int msk = (int)rint((1.0 + tanh(d / smoother_param)) * 32);
- wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride + j] = msk;
- const int mskx = (int)rint((1.0 + tanh(x / smoother_param)) * 32);
- wedge_mask_obl[0][WEDGE_VERTICAL][i * stride + j] = mskx;
- }
- }
-#endif // USE_PRECOMPUTED_WEDGE_MASK
- for (i = 0; i < h; ++i) {
- for (j = 0; j < w; ++j) {
- const int msk = wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride + j];
- wedge_mask_obl[0][WEDGE_OBLIQUE27][j * stride + i] = msk;
- wedge_mask_obl[0][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
- wedge_mask_obl[0][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] =
- (1 << WEDGE_WEIGHT_BITS) - msk;
- wedge_mask_obl[1][WEDGE_OBLIQUE63][i * stride + j] =
- wedge_mask_obl[1][WEDGE_OBLIQUE27][j * stride + i] =
- (1 << WEDGE_WEIGHT_BITS) - msk;
- wedge_mask_obl[1][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
- wedge_mask_obl[1][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] = msk;
- const int mskx = wedge_mask_obl[0][WEDGE_VERTICAL][i * stride + j];
- wedge_mask_obl[0][WEDGE_HORIZONTAL][j * stride + i] = mskx;
- wedge_mask_obl[1][WEDGE_VERTICAL][i * stride + j] =
- wedge_mask_obl[1][WEDGE_HORIZONTAL][j * stride + i] =
- (1 << WEDGE_WEIGHT_BITS) - mskx;
- }
- }
-}
-
-#if !USE_PRECOMPUTED_WEDGE_SIGN
-// If the signs for the wedges for various blocksizes are
-// inconsistent flip the sign flag. Do it only once for every
-// wedge codebook.
-static void init_wedge_signs() {
- BLOCK_SIZE sb_type;
- memset(wedge_signflip_lookup, 0, sizeof(wedge_signflip_lookup));
- for (sb_type = BLOCK_4X4; sb_type < BLOCK_SIZES_ALL; ++sb_type) {
- const int bw = block_size_wide[sb_type];
- const int bh = block_size_high[sb_type];
- const wedge_params_type wedge_params = wedge_params_lookup[sb_type];
- const int wbits = wedge_params.bits;
- const int wtypes = 1 << wbits;
- int i, w;
- if (wbits) {
- for (w = 0; w < wtypes; ++w) {
- // Get the mask master, i.e. index [0]
- const uint8_t *mask = get_wedge_mask_inplace(w, 0, sb_type);
- int avg = 0;
- for (i = 0; i < bw; ++i) avg += mask[i];
- for (i = 1; i < bh; ++i) avg += mask[i * MASK_MASTER_STRIDE];
- avg = (avg + (bw + bh - 1) / 2) / (bw + bh - 1);
- // Default sign of this wedge is 1 if the average < 32, 0 otherwise.
- // If default sign is 1:
- // If sign requested is 0, we need to flip the sign and return
- // the complement i.e. index [1] instead. If sign requested is 1
- // we need to flip the sign and return index [0] instead.
- // If default sign is 0:
- // If sign requested is 0, we need to return index [0] the master
- // if sign requested is 1, we need to return the complement index [1]
- // instead.
- wedge_params.signflip[w] = (avg < 32);
- }
- }
- }
-}
-#endif // !USE_PRECOMPUTED_WEDGE_SIGN
-
-static void init_wedge_masks() {
- uint8_t *dst = wedge_mask_buf;
- BLOCK_SIZE bsize;
- memset(wedge_masks, 0, sizeof(wedge_masks));
- for (bsize = BLOCK_4X4; bsize < BLOCK_SIZES_ALL; ++bsize) {
- const uint8_t *mask;
- const int bw = block_size_wide[bsize];
- const int bh = block_size_high[bsize];
- const wedge_params_type *wedge_params = &wedge_params_lookup[bsize];
- const int wbits = wedge_params->bits;
- const int wtypes = 1 << wbits;
- int w;
- if (wbits == 0) continue;
- for (w = 0; w < wtypes; ++w) {
- mask = get_wedge_mask_inplace(w, 0, bsize);
- aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw, NULL, 0, NULL, 0, bw,
- bh);
- wedge_params->masks[0][w] = dst;
- dst += bw * bh;
-
- mask = get_wedge_mask_inplace(w, 1, bsize);
- aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw, NULL, 0, NULL, 0, bw,
- bh);
- wedge_params->masks[1][w] = dst;
- dst += bw * bh;
- }
- assert(sizeof(wedge_mask_buf) >= (size_t)(dst - wedge_mask_buf));
- }
-}
-
-// Equation of line: f(x, y) = a[0]*(x - a[2]*w/8) + a[1]*(y - a[3]*h/8) = 0
-void av1_init_wedge_masks() {
- init_wedge_master_masks();
-#if !USE_PRECOMPUTED_WEDGE_SIGN
- init_wedge_signs();
-#endif // !USE_PRECOMPUTED_WEDGE_SIGN
- init_wedge_masks();
-}
-
-static void build_masked_compound_no_round(
- uint8_t *dst, int dst_stride, const CONV_BUF_TYPE *src0, int src0_stride,
- const CONV_BUF_TYPE *src1, int src1_stride,
- const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h,
- int w, ConvolveParams *conv_params, MACROBLOCKD *xd) {
- // Derive subsampling from h and w passed in. May be refactored to
- // pass in subsampling factors directly.
- const int subh = (2 << mi_size_high_log2[sb_type]) == h;
- const int subw = (2 << mi_size_wide_log2[sb_type]) == w;
- const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type);
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
- aom_highbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
- src1_stride, mask, block_size_wide[sb_type],
- w, h, subw, subh, conv_params, xd->bd);
- else
- aom_lowbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
- src1_stride, mask, block_size_wide[sb_type], w,
- h, subw, subh, conv_params);
-}
-
-void av1_make_masked_inter_predictor(
- const uint8_t *pre, int pre_stride, uint8_t *dst, int dst_stride,
- const SubpelParams *subpel_params, const struct scale_factors *sf, int w,
- int h, ConvolveParams *conv_params, InterpFilters interp_filters, int plane,
- const WarpTypesAllowed *warp_types, int p_col, int p_row, int ref,
- MACROBLOCKD *xd, int can_use_previous) {
- MB_MODE_INFO *mi = xd->mi[0];
- (void)dst;
- (void)dst_stride;
- mi->interinter_comp.seg_mask = xd->seg_mask;
- const INTERINTER_COMPOUND_DATA *comp_data = &mi->interinter_comp;
-
-// We're going to call av1_make_inter_predictor to generate a prediction into
-// a temporary buffer, then will blend that temporary buffer with that from
-// the other reference.
-//
-#define INTER_PRED_BYTES_PER_PIXEL 2
-
- DECLARE_ALIGNED(32, uint8_t,
- tmp_buf[INTER_PRED_BYTES_PER_PIXEL * MAX_SB_SQUARE]);
-#undef INTER_PRED_BYTES_PER_PIXEL
-
- uint8_t *tmp_dst = get_buf_by_bd(xd, tmp_buf);
-
- const int tmp_buf_stride = MAX_SB_SIZE;
- CONV_BUF_TYPE *org_dst = conv_params->dst;
- int org_dst_stride = conv_params->dst_stride;
- CONV_BUF_TYPE *tmp_buf16 = (CONV_BUF_TYPE *)tmp_buf;
- conv_params->dst = tmp_buf16;
- conv_params->dst_stride = tmp_buf_stride;
- assert(conv_params->do_average == 0);
-
- // This will generate a prediction in tmp_buf for the second reference
- av1_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE, subpel_params,
- sf, w, h, conv_params, interp_filters, warp_types,
- p_col, p_row, plane, ref, mi, 0, xd,
- can_use_previous);
-
- if (!plane && comp_data->type == COMPOUND_DIFFWTD) {
- av1_build_compound_diffwtd_mask_d16(
- comp_data->seg_mask, comp_data->mask_type, org_dst, org_dst_stride,
- tmp_buf16, tmp_buf_stride, h, w, conv_params, xd->bd);
- }
- build_masked_compound_no_round(dst, dst_stride, org_dst, org_dst_stride,
- tmp_buf16, tmp_buf_stride, comp_data,
- mi->sb_type, h, w, conv_params, xd);
-}
-
-void av1_jnt_comp_weight_assign(const AV1_COMMON *cm, const MB_MODE_INFO *mbmi,
- int order_idx, int *fwd_offset, int *bck_offset,
- int *use_jnt_comp_avg, int is_compound) {
- assert(fwd_offset != NULL && bck_offset != NULL);
- if (!is_compound || mbmi->compound_idx) {
- *use_jnt_comp_avg = 0;
- return;
- }
-
- *use_jnt_comp_avg = 1;
- const int bck_idx = cm->frame_refs[mbmi->ref_frame[0] - LAST_FRAME].idx;
- const int fwd_idx = cm->frame_refs[mbmi->ref_frame[1] - LAST_FRAME].idx;
- const int cur_frame_index = cm->cur_frame->cur_frame_offset;
- int bck_frame_index = 0, fwd_frame_index = 0;
-
- if (bck_idx >= 0) {
- bck_frame_index = cm->buffer_pool->frame_bufs[bck_idx].cur_frame_offset;
- }
-
- if (fwd_idx >= 0) {
- fwd_frame_index = cm->buffer_pool->frame_bufs[fwd_idx].cur_frame_offset;
- }
-
- int d0 = clamp(abs(get_relative_dist(cm, fwd_frame_index, cur_frame_index)),
- 0, MAX_FRAME_DISTANCE);
- int d1 = clamp(abs(get_relative_dist(cm, cur_frame_index, bck_frame_index)),
- 0, MAX_FRAME_DISTANCE);
-
- const int order = d0 <= d1;
-
- if (d0 == 0 || d1 == 0) {
- *fwd_offset = quant_dist_lookup_table[order_idx][3][order];
- *bck_offset = quant_dist_lookup_table[order_idx][3][1 - order];
- return;
- }
-
- int i;
- for (i = 0; i < 3; ++i) {
- int c0 = quant_dist_weight[i][order];
- int c1 = quant_dist_weight[i][!order];
- int d0_c0 = d0 * c0;
- int d1_c1 = d1 * c1;
- if ((d0 > d1 && d0_c0 < d1_c1) || (d0 <= d1 && d0_c0 > d1_c1)) break;
- }
-
- *fwd_offset = quant_dist_lookup_table[order_idx][i][order];
- *bck_offset = quant_dist_lookup_table[order_idx][i][1 - order];
-}
-
-void av1_setup_dst_planes(struct macroblockd_plane *planes, BLOCK_SIZE bsize,
- const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
- const int plane_start, const int plane_end) {
- // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
- // the static analysis warnings.
- for (int i = plane_start; i < AOMMIN(plane_end, MAX_MB_PLANE); ++i) {
- struct macroblockd_plane *const pd = &planes[i];
- const int is_uv = i > 0;
- setup_pred_plane(&pd->dst, bsize, src->buffers[i], src->crop_widths[is_uv],
- src->crop_heights[is_uv], src->strides[is_uv], mi_row,
- mi_col, NULL, pd->subsampling_x, pd->subsampling_y);
- }
-}
-
-void av1_setup_pre_planes(MACROBLOCKD *xd, int idx,
- const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
- const struct scale_factors *sf,
- const int num_planes) {
- if (src != NULL) {
- // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
- // the static analysis warnings.
- for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); ++i) {
- struct macroblockd_plane *const pd = &xd->plane[i];
- const int is_uv = i > 0;
- setup_pred_plane(&pd->pre[idx], xd->mi[0]->sb_type, src->buffers[i],
- src->crop_widths[is_uv], src->crop_heights[is_uv],
- src->strides[is_uv], mi_row, mi_col, sf,
- pd->subsampling_x, pd->subsampling_y);
- }
- }
-}
-
-// obmc_mask_N[overlap_position]
-static const uint8_t obmc_mask_1[1] = { 64 };
-
-static const uint8_t obmc_mask_2[2] = { 45, 64 };
-
-static const uint8_t obmc_mask_4[4] = { 39, 50, 59, 64 };
-
-static const uint8_t obmc_mask_8[8] = { 36, 42, 48, 53, 57, 61, 64, 64 };
-
-static const uint8_t obmc_mask_16[16] = { 34, 37, 40, 43, 46, 49, 52, 54,
- 56, 58, 60, 61, 64, 64, 64, 64 };
-
-static const uint8_t obmc_mask_32[32] = { 33, 35, 36, 38, 40, 41, 43, 44,
- 45, 47, 48, 50, 51, 52, 53, 55,
- 56, 57, 58, 59, 60, 60, 61, 62,
- 64, 64, 64, 64, 64, 64, 64, 64 };
-
-static const uint8_t obmc_mask_64[64] = {
- 33, 34, 35, 35, 36, 37, 38, 39, 40, 40, 41, 42, 43, 44, 44, 44,
- 45, 46, 47, 47, 48, 49, 50, 51, 51, 51, 52, 52, 53, 54, 55, 56,
- 56, 56, 57, 57, 58, 58, 59, 60, 60, 60, 60, 60, 61, 62, 62, 62,
- 62, 62, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
-};
-
-const uint8_t *av1_get_obmc_mask(int length) {
- switch (length) {
- case 1: return obmc_mask_1;
- case 2: return obmc_mask_2;
- case 4: return obmc_mask_4;
- case 8: return obmc_mask_8;
- case 16: return obmc_mask_16;
- case 32: return obmc_mask_32;
- case 64: return obmc_mask_64;
- default: assert(0); return NULL;
- }
-}
-
-static INLINE void increment_int_ptr(MACROBLOCKD *xd, int rel_mi_rc,
- uint8_t mi_hw, MB_MODE_INFO *mi,
- void *fun_ctxt, const int num_planes) {
- (void)xd;
- (void)rel_mi_rc;
- (void)mi_hw;
- (void)mi;
- ++*(int *)fun_ctxt;
- (void)num_planes;
-}
-
-void av1_count_overlappable_neighbors(const AV1_COMMON *cm, MACROBLOCKD *xd,
- int mi_row, int mi_col) {
- MB_MODE_INFO *mbmi = xd->mi[0];
-
- mbmi->overlappable_neighbors[0] = 0;
- mbmi->overlappable_neighbors[1] = 0;
-
- if (!is_motion_variation_allowed_bsize(mbmi->sb_type)) return;
-
- foreach_overlappable_nb_above(cm, xd, mi_col, INT_MAX, increment_int_ptr,
- &mbmi->overlappable_neighbors[0]);
- foreach_overlappable_nb_left(cm, xd, mi_row, INT_MAX, increment_int_ptr,
- &mbmi->overlappable_neighbors[1]);
-}
-
-// HW does not support < 4x4 prediction. To limit the bandwidth requirement, if
-// block-size of current plane is smaller than 8x8, always only blend with the
-// left neighbor(s) (skip blending with the above side).
-#define DISABLE_CHROMA_U8X8_OBMC 0 // 0: one-sided obmc; 1: disable
-
-int av1_skip_u4x4_pred_in_obmc(BLOCK_SIZE bsize,
- const struct macroblockd_plane *pd, int dir) {
- assert(is_motion_variation_allowed_bsize(bsize));
-
- const BLOCK_SIZE bsize_plane =
- get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
- switch (bsize_plane) {
-#if DISABLE_CHROMA_U8X8_OBMC
- case BLOCK_4X4:
- case BLOCK_8X4:
- case BLOCK_4X8: return 1; break;
-#else
- case BLOCK_4X4:
- case BLOCK_8X4:
- case BLOCK_4X8: return dir == 0; break;
-#endif
- default: return 0;
- }
-}
-
-void av1_modify_neighbor_predictor_for_obmc(MB_MODE_INFO *mbmi) {
- mbmi->ref_frame[1] = NONE_FRAME;
- mbmi->interinter_comp.type = COMPOUND_AVERAGE;
-
- return;
-}
-
-struct obmc_inter_pred_ctxt {
- uint8_t **adjacent;
- int *adjacent_stride;
-};
-
-static INLINE void build_obmc_inter_pred_above(MACROBLOCKD *xd, int rel_mi_col,
- uint8_t above_mi_width,
- MB_MODE_INFO *above_mi,
- void *fun_ctxt,
- const int num_planes) {
- (void)above_mi;
- struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
- const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
- const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
- const int overlap =
- AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
-
- for (int plane = 0; plane < num_planes; ++plane) {
- const struct macroblockd_plane *pd = &xd->plane[plane];
- const int bw = (above_mi_width * MI_SIZE) >> pd->subsampling_x;
- const int bh = overlap >> pd->subsampling_y;
- const int plane_col = (rel_mi_col * MI_SIZE) >> pd->subsampling_x;
-
- if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 0)) continue;
-
- const int dst_stride = pd->dst.stride;
- uint8_t *const dst = &pd->dst.buf[plane_col];
- const int tmp_stride = ctxt->adjacent_stride[plane];
- const uint8_t *const tmp = &ctxt->adjacent[plane][plane_col];
- const uint8_t *const mask = av1_get_obmc_mask(bh);
-
- if (is_hbd)
- aom_highbd_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp,
- tmp_stride, mask, bw, bh, xd->bd);
- else
- aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
- mask, bw, bh);
- }
-}
-
-static INLINE void build_obmc_inter_pred_left(MACROBLOCKD *xd, int rel_mi_row,
- uint8_t left_mi_height,
- MB_MODE_INFO *left_mi,
- void *fun_ctxt,
- const int num_planes) {
- (void)left_mi;
- struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
- const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
- const int overlap =
- AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
- const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
-
- for (int plane = 0; plane < num_planes; ++plane) {
- const struct macroblockd_plane *pd = &xd->plane[plane];
- const int bw = overlap >> pd->subsampling_x;
- const int bh = (left_mi_height * MI_SIZE) >> pd->subsampling_y;
- const int plane_row = (rel_mi_row * MI_SIZE) >> pd->subsampling_y;
-
- if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 1)) continue;
-
- const int dst_stride = pd->dst.stride;
- uint8_t *const dst = &pd->dst.buf[plane_row * dst_stride];
- const int tmp_stride = ctxt->adjacent_stride[plane];
- const uint8_t *const tmp = &ctxt->adjacent[plane][plane_row * tmp_stride];
- const uint8_t *const mask = av1_get_obmc_mask(bw);
-
- if (is_hbd)
- aom_highbd_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp,
- tmp_stride, mask, bw, bh, xd->bd);
- else
- aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
- mask, bw, bh);
- }
-}
-
-// This function combines motion compensated predictions that are generated by
-// top/left neighboring blocks' inter predictors with the regular inter
-// prediction. We assume the original prediction (bmc) is stored in
-// xd->plane[].dst.buf
-void av1_build_obmc_inter_prediction(const AV1_COMMON *cm, MACROBLOCKD *xd,
- int mi_row, int mi_col,
- uint8_t *above[MAX_MB_PLANE],
- int above_stride[MAX_MB_PLANE],
- uint8_t *left[MAX_MB_PLANE],
- int left_stride[MAX_MB_PLANE]) {
- const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
-
- // handle above row
- struct obmc_inter_pred_ctxt ctxt_above = { above, above_stride };
- foreach_overlappable_nb_above(cm, xd, mi_col,
- max_neighbor_obmc[mi_size_wide_log2[bsize]],
- build_obmc_inter_pred_above, &ctxt_above);
-
- // handle left column
- struct obmc_inter_pred_ctxt ctxt_left = { left, left_stride };
- foreach_overlappable_nb_left(cm, xd, mi_row,
- max_neighbor_obmc[mi_size_high_log2[bsize]],
- build_obmc_inter_pred_left, &ctxt_left);
-}
-
-void av1_setup_build_prediction_by_above_pred(
- MACROBLOCKD *xd, int rel_mi_col, uint8_t above_mi_width,
- MB_MODE_INFO *above_mbmi, struct build_prediction_ctxt *ctxt,
- const int num_planes) {
- const BLOCK_SIZE a_bsize = AOMMAX(BLOCK_8X8, above_mbmi->sb_type);
- const int above_mi_col = ctxt->mi_col + rel_mi_col;
-
- av1_modify_neighbor_predictor_for_obmc(above_mbmi);
-
- for (int j = 0; j < num_planes; ++j) {
- struct macroblockd_plane *const pd = &xd->plane[j];
- setup_pred_plane(&pd->dst, a_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j],
- ctxt->tmp_height[j], ctxt->tmp_stride[j], 0, rel_mi_col,
- NULL, pd->subsampling_x, pd->subsampling_y);
- }
-
- const int num_refs = 1 + has_second_ref(above_mbmi);
-
- for (int ref = 0; ref < num_refs; ++ref) {
- const MV_REFERENCE_FRAME frame = above_mbmi->ref_frame[ref];
-
- const RefBuffer *const ref_buf = &ctxt->cm->frame_refs[frame - LAST_FRAME];
-
- xd->block_refs[ref] = ref_buf;
- if ((!av1_is_valid_scale(&ref_buf->sf)))
- aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
- "Reference frame has invalid dimensions");
- av1_setup_pre_planes(xd, ref, ref_buf->buf, ctxt->mi_row, above_mi_col,
- &ref_buf->sf, num_planes);
- }
-
- xd->mb_to_left_edge = 8 * MI_SIZE * (-above_mi_col);
- xd->mb_to_right_edge = ctxt->mb_to_far_edge +
- (xd->n4_w - rel_mi_col - above_mi_width) * MI_SIZE * 8;
-}
-
-void av1_setup_build_prediction_by_left_pred(MACROBLOCKD *xd, int rel_mi_row,
- uint8_t left_mi_height,
- MB_MODE_INFO *left_mbmi,
- struct build_prediction_ctxt *ctxt,
- const int num_planes) {
- const BLOCK_SIZE l_bsize = AOMMAX(BLOCK_8X8, left_mbmi->sb_type);
- const int left_mi_row = ctxt->mi_row + rel_mi_row;
-
- av1_modify_neighbor_predictor_for_obmc(left_mbmi);
-
- for (int j = 0; j < num_planes; ++j) {
- struct macroblockd_plane *const pd = &xd->plane[j];
- setup_pred_plane(&pd->dst, l_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j],
- ctxt->tmp_height[j], ctxt->tmp_stride[j], rel_mi_row, 0,
- NULL, pd->subsampling_x, pd->subsampling_y);
- }
-
- const int num_refs = 1 + has_second_ref(left_mbmi);
-
- for (int ref = 0; ref < num_refs; ++ref) {
- const MV_REFERENCE_FRAME frame = left_mbmi->ref_frame[ref];
-
- const RefBuffer *const ref_buf = &ctxt->cm->frame_refs[frame - LAST_FRAME];
-
- xd->block_refs[ref] = ref_buf;
- if ((!av1_is_valid_scale(&ref_buf->sf)))
- aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
- "Reference frame has invalid dimensions");
- av1_setup_pre_planes(xd, ref, ref_buf->buf, left_mi_row, ctxt->mi_col,
- &ref_buf->sf, num_planes);
- }
-
- xd->mb_to_top_edge = 8 * MI_SIZE * (-left_mi_row);
- xd->mb_to_bottom_edge =
- ctxt->mb_to_far_edge +
- (xd->n4_h - rel_mi_row - left_mi_height) * MI_SIZE * 8;
-}
-
-/* clang-format off */
-static const uint8_t ii_weights1d[MAX_SB_SIZE] = {
- 60, 58, 56, 54, 52, 50, 48, 47, 45, 44, 42, 41, 39, 38, 37, 35, 34, 33, 32,
- 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 22, 21, 20, 19, 19, 18, 18, 17, 16,
- 16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 10, 10, 10, 9, 9, 9, 8,
- 8, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 4, 4,
- 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
-};
-static uint8_t ii_size_scales[BLOCK_SIZES_ALL] = {
- 32, 16, 16, 16, 8, 8, 8, 4,
- 4, 4, 2, 2, 2, 1, 1, 1,
- 8, 8, 4, 4, 2, 2
-};
-/* clang-format on */
-
-static void build_smooth_interintra_mask(uint8_t *mask, int stride,
- BLOCK_SIZE plane_bsize,
- INTERINTRA_MODE mode) {
- int i, j;
- const int bw = block_size_wide[plane_bsize];
- const int bh = block_size_high[plane_bsize];
- const int size_scale = ii_size_scales[plane_bsize];
-
- switch (mode) {
- case II_V_PRED:
- for (i = 0; i < bh; ++i) {
- memset(mask, ii_weights1d[i * size_scale], bw * sizeof(mask[0]));
- mask += stride;
- }
- break;
-
- case II_H_PRED:
- for (i = 0; i < bh; ++i) {
- for (j = 0; j < bw; ++j) mask[j] = ii_weights1d[j * size_scale];
- mask += stride;
- }
- break;
-
- case II_SMOOTH_PRED:
- for (i = 0; i < bh; ++i) {
- for (j = 0; j < bw; ++j)
- mask[j] = ii_weights1d[(i < j ? i : j) * size_scale];
- mask += stride;
- }
- break;
-
- case II_DC_PRED:
- default:
- for (i = 0; i < bh; ++i) {
- memset(mask, 32, bw * sizeof(mask[0]));
- mask += stride;
- }
- break;
- }
-}
-
-static void combine_interintra(INTERINTRA_MODE mode, int use_wedge_interintra,
- int wedge_index, int wedge_sign,
- BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
- uint8_t *comppred, int compstride,
- const uint8_t *interpred, int interstride,
- const uint8_t *intrapred, int intrastride) {
- const int bw = block_size_wide[plane_bsize];
- const int bh = block_size_high[plane_bsize];
-
- if (use_wedge_interintra) {
- if (is_interintra_wedge_used(bsize)) {
- const uint8_t *mask =
- av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
- const int subw = 2 * mi_size_wide[bsize] == bw;
- const int subh = 2 * mi_size_high[bsize] == bh;
- aom_blend_a64_mask(comppred, compstride, intrapred, intrastride,
- interpred, interstride, mask, block_size_wide[bsize],
- bw, bh, subw, subh);
- }
- return;
- }
-
- uint8_t mask[MAX_SB_SQUARE];
- build_smooth_interintra_mask(mask, bw, plane_bsize, mode);
- aom_blend_a64_mask(comppred, compstride, intrapred, intrastride, interpred,
- interstride, mask, bw, bw, bh, 0, 0);
-}
-
-static void combine_interintra_highbd(
- INTERINTRA_MODE mode, int use_wedge_interintra, int wedge_index,
- int wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
- uint8_t *comppred8, int compstride, const uint8_t *interpred8,
- int interstride, const uint8_t *intrapred8, int intrastride, int bd) {
- const int bw = block_size_wide[plane_bsize];
- const int bh = block_size_high[plane_bsize];
-
- if (use_wedge_interintra) {
- if (is_interintra_wedge_used(bsize)) {
- const uint8_t *mask =
- av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
- const int subh = 2 * mi_size_high[bsize] == bh;
- const int subw = 2 * mi_size_wide[bsize] == bw;
- aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride,
- interpred8, interstride, mask,
- block_size_wide[bsize], bw, bh, subw, subh, bd);
- }
- return;
- }
-
- uint8_t mask[MAX_SB_SQUARE];
- build_smooth_interintra_mask(mask, bw, plane_bsize, mode);
- aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride,
- interpred8, interstride, mask, bw, bw, bh, 0, 0,
- bd);
-}
-
-void av1_build_intra_predictors_for_interintra(const AV1_COMMON *cm,
- MACROBLOCKD *xd,
- BLOCK_SIZE bsize, int plane,
- BUFFER_SET *ctx, uint8_t *dst,
- int dst_stride) {
- struct macroblockd_plane *const pd = &xd->plane[plane];
- const int ssx = xd->plane[plane].subsampling_x;
- const int ssy = xd->plane[plane].subsampling_y;
- BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy);
- PREDICTION_MODE mode = interintra_to_intra_mode[xd->mi[0]->interintra_mode];
- assert(xd->mi[0]->angle_delta[PLANE_TYPE_Y] == 0);
- assert(xd->mi[0]->angle_delta[PLANE_TYPE_UV] == 0);
- assert(xd->mi[0]->filter_intra_mode_info.use_filter_intra == 0);
- assert(xd->mi[0]->use_intrabc == 0);
-
- av1_predict_intra_block(cm, xd, pd->width, pd->height,
- max_txsize_rect_lookup[plane_bsize], mode, 0, 0,
- FILTER_INTRA_MODES, ctx->plane[plane],
- ctx->stride[plane], dst, dst_stride, 0, 0, plane);
-}
-
-void av1_combine_interintra(MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
- const uint8_t *inter_pred, int inter_stride,
- const uint8_t *intra_pred, int intra_stride) {
- const int ssx = xd->plane[plane].subsampling_x;
- const int ssy = xd->plane[plane].subsampling_y;
- const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy);
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- combine_interintra_highbd(
- xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra,
- xd->mi[0]->interintra_wedge_index, xd->mi[0]->interintra_wedge_sign,
- bsize, plane_bsize, xd->plane[plane].dst.buf,
- xd->plane[plane].dst.stride, inter_pred, inter_stride, intra_pred,
- intra_stride, xd->bd);
- return;
- }
- combine_interintra(
- xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra,
- xd->mi[0]->interintra_wedge_index, xd->mi[0]->interintra_wedge_sign,
- bsize, plane_bsize, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride,
- inter_pred, inter_stride, intra_pred, intra_stride);
-}
-
-// build interintra_predictors for one plane
-void av1_build_interintra_predictors_sbp(const AV1_COMMON *cm, MACROBLOCKD *xd,
- uint8_t *pred, int stride,
- BUFFER_SET *ctx, int plane,
- BLOCK_SIZE bsize) {
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- DECLARE_ALIGNED(16, uint16_t, intrapredictor[MAX_SB_SQUARE]);
- av1_build_intra_predictors_for_interintra(
- cm, xd, bsize, plane, ctx, CONVERT_TO_BYTEPTR(intrapredictor),
- MAX_SB_SIZE);
- av1_combine_interintra(xd, bsize, plane, pred, stride,
- CONVERT_TO_BYTEPTR(intrapredictor), MAX_SB_SIZE);
- } else {
- DECLARE_ALIGNED(16, uint8_t, intrapredictor[MAX_SB_SQUARE]);
- av1_build_intra_predictors_for_interintra(cm, xd, bsize, plane, ctx,
- intrapredictor, MAX_SB_SIZE);
- av1_combine_interintra(xd, bsize, plane, pred, stride, intrapredictor,
- MAX_SB_SIZE);
- }
-}
-
-void av1_build_interintra_predictors_sbuv(const AV1_COMMON *cm, MACROBLOCKD *xd,
- uint8_t *upred, uint8_t *vpred,
- int ustride, int vstride,
- BUFFER_SET *ctx, BLOCK_SIZE bsize) {
- av1_build_interintra_predictors_sbp(cm, xd, upred, ustride, ctx, 1, bsize);
- av1_build_interintra_predictors_sbp(cm, xd, vpred, vstride, ctx, 2, bsize);
-}
diff --git a/third_party/aom/av1/common/reconinter.h b/third_party/aom/av1/common/reconinter.h
deleted file mode 100644
index db86c777e..000000000
--- a/third_party/aom/av1/common/reconinter.h
+++ /dev/null
@@ -1,365 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_COMMON_RECONINTER_H_
-#define AOM_AV1_COMMON_RECONINTER_H_
-
-#include "av1/common/filter.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/common/convolve.h"
-#include "av1/common/warped_motion.h"
-#include "aom/aom_integer.h"
-
-// Work out how many pixels off the edge of a reference frame we're allowed
-// to go when forming an inter prediction.
-// The outermost row/col of each referernce frame is extended by
-// (AOM_BORDER_IN_PIXELS >> subsampling) pixels, but we need to keep
-// at least AOM_INTERP_EXTEND pixels within that to account for filtering.
-//
-// We have to break this up into two macros to keep both clang-format and
-// tools/lint-hunks.py happy.
-#define AOM_LEFT_TOP_MARGIN_PX(subsampling) \
- ((AOM_BORDER_IN_PIXELS >> subsampling) - AOM_INTERP_EXTEND)
-#define AOM_LEFT_TOP_MARGIN_SCALED(subsampling) \
- (AOM_LEFT_TOP_MARGIN_PX(subsampling) << SCALE_SUBPEL_BITS)
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// Set to (1 << 5) if the 32-ary codebooks are used for any bock size
-#define MAX_WEDGE_TYPES (1 << 4)
-
-#define MAX_WEDGE_SIZE_LOG2 5 // 32x32
-#define MAX_WEDGE_SIZE (1 << MAX_WEDGE_SIZE_LOG2)
-#define MAX_WEDGE_SQUARE (MAX_WEDGE_SIZE * MAX_WEDGE_SIZE)
-
-#define WEDGE_WEIGHT_BITS 6
-
-#define WEDGE_NONE -1
-
-// Angles are with respect to horizontal anti-clockwise
-typedef enum {
- WEDGE_HORIZONTAL = 0,
- WEDGE_VERTICAL = 1,
- WEDGE_OBLIQUE27 = 2,
- WEDGE_OBLIQUE63 = 3,
- WEDGE_OBLIQUE117 = 4,
- WEDGE_OBLIQUE153 = 5,
- WEDGE_DIRECTIONS
-} WedgeDirectionType;
-
-// 3-tuple: {direction, x_offset, y_offset}
-typedef struct {
- WedgeDirectionType direction;
- int x_offset;
- int y_offset;
-} wedge_code_type;
-
-typedef uint8_t *wedge_masks_type[MAX_WEDGE_TYPES];
-
-typedef struct {
- int bits;
- const wedge_code_type *codebook;
- uint8_t *signflip;
- wedge_masks_type *masks;
-} wedge_params_type;
-
-extern const wedge_params_type wedge_params_lookup[BLOCK_SIZES_ALL];
-
-typedef struct SubpelParams {
- int xs;
- int ys;
- int subpel_x;
- int subpel_y;
-} SubpelParams;
-
-struct build_prediction_ctxt {
- const AV1_COMMON *cm;
- int mi_row;
- int mi_col;
- uint8_t **tmp_buf;
- int *tmp_width;
- int *tmp_height;
- int *tmp_stride;
- int mb_to_far_edge;
-};
-
-static INLINE int has_scale(int xs, int ys) {
- return xs != SCALE_SUBPEL_SHIFTS || ys != SCALE_SUBPEL_SHIFTS;
-}
-
-static INLINE void revert_scale_extra_bits(SubpelParams *sp) {
- sp->subpel_x >>= SCALE_EXTRA_BITS;
- sp->subpel_y >>= SCALE_EXTRA_BITS;
- sp->xs >>= SCALE_EXTRA_BITS;
- sp->ys >>= SCALE_EXTRA_BITS;
- assert(sp->subpel_x < SUBPEL_SHIFTS);
- assert(sp->subpel_y < SUBPEL_SHIFTS);
- assert(sp->xs <= SUBPEL_SHIFTS);
- assert(sp->ys <= SUBPEL_SHIFTS);
-}
-
-static INLINE void inter_predictor(const uint8_t *src, int src_stride,
- uint8_t *dst, int dst_stride,
- const SubpelParams *subpel_params,
- const struct scale_factors *sf, int w, int h,
- ConvolveParams *conv_params,
- InterpFilters interp_filters,
- int is_intrabc) {
- assert(conv_params->do_average == 0 || conv_params->do_average == 1);
- assert(sf);
- const int is_scaled = has_scale(subpel_params->xs, subpel_params->ys);
- assert(IMPLIES(is_intrabc, !is_scaled));
- if (is_scaled) {
- av1_convolve_2d_facade(src, src_stride, dst, dst_stride, w, h,
- interp_filters, subpel_params->subpel_x,
- subpel_params->xs, subpel_params->subpel_y,
- subpel_params->ys, 1, conv_params, sf, is_intrabc);
- } else {
- SubpelParams sp = *subpel_params;
- revert_scale_extra_bits(&sp);
- av1_convolve_2d_facade(src, src_stride, dst, dst_stride, w, h,
- interp_filters, sp.subpel_x, sp.xs, sp.subpel_y,
- sp.ys, 0, conv_params, sf, is_intrabc);
- }
-}
-
-static INLINE void highbd_inter_predictor(const uint8_t *src, int src_stride,
- uint8_t *dst, int dst_stride,
- const SubpelParams *subpel_params,
- const struct scale_factors *sf, int w,
- int h, ConvolveParams *conv_params,
- InterpFilters interp_filters,
- int is_intrabc, int bd) {
- assert(conv_params->do_average == 0 || conv_params->do_average == 1);
- assert(sf);
- const int is_scaled = has_scale(subpel_params->xs, subpel_params->ys);
- assert(IMPLIES(is_intrabc, !is_scaled));
- if (is_scaled) {
- av1_highbd_convolve_2d_facade(
- src, src_stride, dst, dst_stride, w, h, interp_filters,
- subpel_params->subpel_x, subpel_params->xs, subpel_params->subpel_y,
- subpel_params->ys, 1, conv_params, sf, is_intrabc, bd);
- } else {
- SubpelParams sp = *subpel_params;
- revert_scale_extra_bits(&sp);
- av1_highbd_convolve_2d_facade(
- src, src_stride, dst, dst_stride, w, h, interp_filters, sp.subpel_x,
- sp.xs, sp.subpel_y, sp.ys, 0, conv_params, sf, is_intrabc, bd);
- }
-}
-
-void av1_modify_neighbor_predictor_for_obmc(MB_MODE_INFO *mbmi);
-int av1_skip_u4x4_pred_in_obmc(BLOCK_SIZE bsize,
- const struct macroblockd_plane *pd, int dir);
-
-static INLINE int is_interinter_compound_used(COMPOUND_TYPE type,
- BLOCK_SIZE sb_type) {
- const int comp_allowed = is_comp_ref_allowed(sb_type);
- switch (type) {
- case COMPOUND_AVERAGE:
- case COMPOUND_DIFFWTD: return comp_allowed;
- case COMPOUND_WEDGE:
- return comp_allowed && wedge_params_lookup[sb_type].bits > 0;
- default: assert(0); return 0;
- }
-}
-
-static INLINE int is_any_masked_compound_used(BLOCK_SIZE sb_type) {
- COMPOUND_TYPE comp_type;
- int i;
- if (!is_comp_ref_allowed(sb_type)) return 0;
- for (i = 0; i < COMPOUND_TYPES; i++) {
- comp_type = (COMPOUND_TYPE)i;
- if (is_masked_compound_type(comp_type) &&
- is_interinter_compound_used(comp_type, sb_type))
- return 1;
- }
- return 0;
-}
-
-static INLINE int get_wedge_bits_lookup(BLOCK_SIZE sb_type) {
- return wedge_params_lookup[sb_type].bits;
-}
-
-static INLINE int get_interinter_wedge_bits(BLOCK_SIZE sb_type) {
- const int wbits = wedge_params_lookup[sb_type].bits;
- return (wbits > 0) ? wbits + 1 : 0;
-}
-
-static INLINE int is_interintra_wedge_used(BLOCK_SIZE sb_type) {
- return wedge_params_lookup[sb_type].bits > 0;
-}
-
-static INLINE int get_interintra_wedge_bits(BLOCK_SIZE sb_type) {
- return wedge_params_lookup[sb_type].bits;
-}
-
-void av1_make_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst,
- int dst_stride, const SubpelParams *subpel_params,
- const struct scale_factors *sf, int w, int h,
- ConvolveParams *conv_params,
- InterpFilters interp_filters,
- const WarpTypesAllowed *warp_types, int p_col,
- int p_row, int plane, int ref,
- const MB_MODE_INFO *mi, int build_for_obmc,
- const MACROBLOCKD *xd, int can_use_previous);
-
-void av1_make_masked_inter_predictor(
- const uint8_t *pre, int pre_stride, uint8_t *dst, int dst_stride,
- const SubpelParams *subpel_params, const struct scale_factors *sf, int w,
- int h, ConvolveParams *conv_params, InterpFilters interp_filters, int plane,
- const WarpTypesAllowed *warp_types, int p_col, int p_row, int ref,
- MACROBLOCKD *xd, int can_use_previous);
-
-// TODO(jkoleszar): yet another mv clamping function :-(
-static INLINE MV clamp_mv_to_umv_border_sb(const MACROBLOCKD *xd,
- const MV *src_mv, int bw, int bh,
- int ss_x, int ss_y) {
- // If the MV points so far into the UMV border that no visible pixels
- // are used for reconstruction, the subpel part of the MV can be
- // discarded and the MV limited to 16 pixels with equivalent results.
- const int spel_left = (AOM_INTERP_EXTEND + bw) << SUBPEL_BITS;
- const int spel_right = spel_left - SUBPEL_SHIFTS;
- const int spel_top = (AOM_INTERP_EXTEND + bh) << SUBPEL_BITS;
- const int spel_bottom = spel_top - SUBPEL_SHIFTS;
- MV clamped_mv = { (int16_t)(src_mv->row * (1 << (1 - ss_y))),
- (int16_t)(src_mv->col * (1 << (1 - ss_x))) };
- assert(ss_x <= 1);
- assert(ss_y <= 1);
-
- clamp_mv(&clamped_mv, xd->mb_to_left_edge * (1 << (1 - ss_x)) - spel_left,
- xd->mb_to_right_edge * (1 << (1 - ss_x)) + spel_right,
- xd->mb_to_top_edge * (1 << (1 - ss_y)) - spel_top,
- xd->mb_to_bottom_edge * (1 << (1 - ss_y)) + spel_bottom);
-
- return clamped_mv;
-}
-
-static INLINE int scaled_buffer_offset(int x_offset, int y_offset, int stride,
- const struct scale_factors *sf) {
- const int x =
- sf ? sf->scale_value_x(x_offset, sf) >> SCALE_EXTRA_BITS : x_offset;
- const int y =
- sf ? sf->scale_value_y(y_offset, sf) >> SCALE_EXTRA_BITS : y_offset;
- return y * stride + x;
-}
-
-static INLINE void setup_pred_plane(struct buf_2d *dst, BLOCK_SIZE bsize,
- uint8_t *src, int width, int height,
- int stride, int mi_row, int mi_col,
- const struct scale_factors *scale,
- int subsampling_x, int subsampling_y) {
- // Offset the buffer pointer
- if (subsampling_y && (mi_row & 0x01) && (mi_size_high[bsize] == 1))
- mi_row -= 1;
- if (subsampling_x && (mi_col & 0x01) && (mi_size_wide[bsize] == 1))
- mi_col -= 1;
-
- const int x = (MI_SIZE * mi_col) >> subsampling_x;
- const int y = (MI_SIZE * mi_row) >> subsampling_y;
- dst->buf = src + scaled_buffer_offset(x, y, stride, scale);
- dst->buf0 = src;
- dst->width = width;
- dst->height = height;
- dst->stride = stride;
-}
-
-void av1_setup_dst_planes(struct macroblockd_plane *planes, BLOCK_SIZE bsize,
- const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
- const int plane_start, const int plane_end);
-
-void av1_setup_pre_planes(MACROBLOCKD *xd, int idx,
- const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
- const struct scale_factors *sf, const int num_planes);
-
-static INLINE void set_default_interp_filters(
- MB_MODE_INFO *const mbmi, InterpFilter frame_interp_filter) {
- mbmi->interp_filters =
- av1_broadcast_interp_filter(av1_unswitchable_filter(frame_interp_filter));
-}
-
-static INLINE int av1_is_interp_needed(const MACROBLOCKD *const xd) {
- const MB_MODE_INFO *const mbmi = xd->mi[0];
- if (mbmi->skip_mode) return 0;
- if (mbmi->motion_mode == WARPED_CAUSAL) return 0;
- if (is_nontrans_global_motion(xd, xd->mi[0])) return 0;
- return 1;
-}
-
-void av1_setup_build_prediction_by_above_pred(
- MACROBLOCKD *xd, int rel_mi_col, uint8_t above_mi_width,
- MB_MODE_INFO *above_mbmi, struct build_prediction_ctxt *ctxt,
- const int num_planes);
-void av1_setup_build_prediction_by_left_pred(MACROBLOCKD *xd, int rel_mi_row,
- uint8_t left_mi_height,
- MB_MODE_INFO *left_mbmi,
- struct build_prediction_ctxt *ctxt,
- const int num_planes);
-void av1_build_obmc_inter_prediction(const AV1_COMMON *cm, MACROBLOCKD *xd,
- int mi_row, int mi_col,
- uint8_t *above[MAX_MB_PLANE],
- int above_stride[MAX_MB_PLANE],
- uint8_t *left[MAX_MB_PLANE],
- int left_stride[MAX_MB_PLANE]);
-
-const uint8_t *av1_get_obmc_mask(int length);
-void av1_count_overlappable_neighbors(const AV1_COMMON *cm, MACROBLOCKD *xd,
- int mi_row, int mi_col);
-
-#define MASK_MASTER_SIZE ((MAX_WEDGE_SIZE) << 1)
-#define MASK_MASTER_STRIDE (MASK_MASTER_SIZE)
-
-void av1_init_wedge_masks();
-
-static INLINE const uint8_t *av1_get_contiguous_soft_mask(int wedge_index,
- int wedge_sign,
- BLOCK_SIZE sb_type) {
- return wedge_params_lookup[sb_type].masks[wedge_sign][wedge_index];
-}
-
-const uint8_t *av1_get_compound_type_mask(
- const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type);
-
-// build interintra_predictors for one plane
-void av1_build_interintra_predictors_sbp(const AV1_COMMON *cm, MACROBLOCKD *xd,
- uint8_t *pred, int stride,
- BUFFER_SET *ctx, int plane,
- BLOCK_SIZE bsize);
-
-void av1_build_interintra_predictors_sbuv(const AV1_COMMON *cm, MACROBLOCKD *xd,
- uint8_t *upred, uint8_t *vpred,
- int ustride, int vstride,
- BUFFER_SET *ctx, BLOCK_SIZE bsize);
-
-void av1_build_intra_predictors_for_interintra(
- const AV1_COMMON *cm, MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
- BUFFER_SET *ctx, uint8_t *intra_pred, int intra_stride);
-
-void av1_combine_interintra(MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
- const uint8_t *inter_pred, int inter_stride,
- const uint8_t *intra_pred, int intra_stride);
-
-void av1_jnt_comp_weight_assign(const AV1_COMMON *cm, const MB_MODE_INFO *mbmi,
- int order_idx, int *fwd_offset, int *bck_offset,
- int *use_jnt_comp_avg, int is_compound);
-int av1_allow_warp(const MB_MODE_INFO *const mbmi,
- const WarpTypesAllowed *const warp_types,
- const WarpedMotionParams *const gm_params,
- int build_for_obmc, int x_scale, int y_scale,
- WarpedMotionParams *final_warp_params);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_COMMON_RECONINTER_H_
diff --git a/third_party/aom/av1/common/reconintra.c b/third_party/aom/av1/common/reconintra.c
deleted file mode 100644
index 71a52e73e..000000000
--- a/third_party/aom/av1/common/reconintra.c
+++ /dev/null
@@ -1,1640 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <math.h>
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-#include "config/av1_rtcd.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_mem/aom_mem.h"
-#include "aom_ports/aom_once.h"
-#include "aom_ports/mem.h"
-#include "aom_ports/system_state.h"
-#include "av1/common/reconintra.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/common/cfl.h"
-
-enum {
- NEED_LEFT = 1 << 1,
- NEED_ABOVE = 1 << 2,
- NEED_ABOVERIGHT = 1 << 3,
- NEED_ABOVELEFT = 1 << 4,
- NEED_BOTTOMLEFT = 1 << 5,
-};
-
-#define INTRA_EDGE_FILT 3
-#define INTRA_EDGE_TAPS 5
-#define MAX_UPSAMPLE_SZ 16
-
-static const uint8_t extend_modes[INTRA_MODES] = {
- NEED_ABOVE | NEED_LEFT, // DC
- NEED_ABOVE, // V
- NEED_LEFT, // H
- NEED_ABOVE | NEED_ABOVERIGHT, // D45
- NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // D135
- NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // D113
- NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // D157
- NEED_LEFT | NEED_BOTTOMLEFT, // D203
- NEED_ABOVE | NEED_ABOVERIGHT, // D67
- NEED_LEFT | NEED_ABOVE, // SMOOTH
- NEED_LEFT | NEED_ABOVE, // SMOOTH_V
- NEED_LEFT | NEED_ABOVE, // SMOOTH_H
- NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // PAETH
-};
-
-// Tables to store if the top-right reference pixels are available. The flags
-// are represented with bits, packed into 8-bit integers. E.g., for the 32x32
-// blocks in a 128x128 superblock, the index of the "o" block is 10 (in raster
-// order), so its flag is stored at the 3rd bit of the 2nd entry in the table,
-// i.e. (table[10 / 8] >> (10 % 8)) & 1.
-// . . . .
-// . . . .
-// . . o .
-// . . . .
-static uint8_t has_tr_4x4[128] = {
- 255, 255, 255, 255, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
- 127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
- 255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
- 127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
- 255, 255, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
- 127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
- 255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
- 127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
-};
-static uint8_t has_tr_4x8[64] = {
- 255, 255, 255, 255, 119, 119, 119, 119, 127, 127, 127, 127, 119,
- 119, 119, 119, 255, 127, 255, 127, 119, 119, 119, 119, 127, 127,
- 127, 127, 119, 119, 119, 119, 255, 255, 255, 127, 119, 119, 119,
- 119, 127, 127, 127, 127, 119, 119, 119, 119, 255, 127, 255, 127,
- 119, 119, 119, 119, 127, 127, 127, 127, 119, 119, 119, 119,
-};
-static uint8_t has_tr_8x4[64] = {
- 255, 255, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
- 127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
- 255, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
- 127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
-};
-static uint8_t has_tr_8x8[32] = {
- 255, 255, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85,
- 255, 127, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85,
-};
-static uint8_t has_tr_8x16[16] = {
- 255, 255, 119, 119, 127, 127, 119, 119,
- 255, 127, 119, 119, 127, 127, 119, 119,
-};
-static uint8_t has_tr_16x8[16] = {
- 255, 0, 85, 0, 119, 0, 85, 0, 127, 0, 85, 0, 119, 0, 85, 0,
-};
-static uint8_t has_tr_16x16[8] = {
- 255, 85, 119, 85, 127, 85, 119, 85,
-};
-static uint8_t has_tr_16x32[4] = { 255, 119, 127, 119 };
-static uint8_t has_tr_32x16[4] = { 15, 5, 7, 5 };
-static uint8_t has_tr_32x32[2] = { 95, 87 };
-static uint8_t has_tr_32x64[1] = { 127 };
-static uint8_t has_tr_64x32[1] = { 19 };
-static uint8_t has_tr_64x64[1] = { 7 };
-static uint8_t has_tr_64x128[1] = { 3 };
-static uint8_t has_tr_128x64[1] = { 1 };
-static uint8_t has_tr_128x128[1] = { 1 };
-static uint8_t has_tr_4x16[32] = {
- 255, 255, 255, 255, 127, 127, 127, 127, 255, 127, 255,
- 127, 127, 127, 127, 127, 255, 255, 255, 127, 127, 127,
- 127, 127, 255, 127, 255, 127, 127, 127, 127, 127,
-};
-static uint8_t has_tr_16x4[32] = {
- 255, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0,
- 127, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0,
-};
-static uint8_t has_tr_8x32[8] = {
- 255, 255, 127, 127, 255, 127, 127, 127,
-};
-static uint8_t has_tr_32x8[8] = {
- 15, 0, 5, 0, 7, 0, 5, 0,
-};
-static uint8_t has_tr_16x64[2] = { 255, 127 };
-static uint8_t has_tr_64x16[2] = { 3, 1 };
-
-static const uint8_t *const has_tr_tables[BLOCK_SIZES_ALL] = {
- // 4X4
- has_tr_4x4,
- // 4X8, 8X4, 8X8
- has_tr_4x8, has_tr_8x4, has_tr_8x8,
- // 8X16, 16X8, 16X16
- has_tr_8x16, has_tr_16x8, has_tr_16x16,
- // 16X32, 32X16, 32X32
- has_tr_16x32, has_tr_32x16, has_tr_32x32,
- // 32X64, 64X32, 64X64
- has_tr_32x64, has_tr_64x32, has_tr_64x64,
- // 64x128, 128x64, 128x128
- has_tr_64x128, has_tr_128x64, has_tr_128x128,
- // 4x16, 16x4, 8x32
- has_tr_4x16, has_tr_16x4, has_tr_8x32,
- // 32x8, 16x64, 64x16
- has_tr_32x8, has_tr_16x64, has_tr_64x16
-};
-
-static uint8_t has_tr_vert_8x8[32] = {
- 255, 255, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0,
- 255, 127, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0,
-};
-static uint8_t has_tr_vert_16x16[8] = {
- 255, 0, 119, 0, 127, 0, 119, 0,
-};
-static uint8_t has_tr_vert_32x32[2] = { 15, 7 };
-static uint8_t has_tr_vert_64x64[1] = { 3 };
-
-// The _vert_* tables are like the ordinary tables above, but describe the
-// order we visit square blocks when doing a PARTITION_VERT_A or
-// PARTITION_VERT_B. This is the same order as normal except for on the last
-// split where we go vertically (TL, BL, TR, BR). We treat the rectangular block
-// as a pair of squares, which means that these tables work correctly for both
-// mixed vertical partition types.
-//
-// There are tables for each of the square sizes. Vertical rectangles (like
-// BLOCK_16X32) use their respective "non-vert" table
-static const uint8_t *const has_tr_vert_tables[BLOCK_SIZES] = {
- // 4X4
- NULL,
- // 4X8, 8X4, 8X8
- has_tr_4x8, NULL, has_tr_vert_8x8,
- // 8X16, 16X8, 16X16
- has_tr_8x16, NULL, has_tr_vert_16x16,
- // 16X32, 32X16, 32X32
- has_tr_16x32, NULL, has_tr_vert_32x32,
- // 32X64, 64X32, 64X64
- has_tr_32x64, NULL, has_tr_vert_64x64,
- // 64x128, 128x64, 128x128
- has_tr_64x128, NULL, has_tr_128x128
-};
-
-static const uint8_t *get_has_tr_table(PARTITION_TYPE partition,
- BLOCK_SIZE bsize) {
- const uint8_t *ret = NULL;
- // If this is a mixed vertical partition, look up bsize in orders_vert.
- if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) {
- assert(bsize < BLOCK_SIZES);
- ret = has_tr_vert_tables[bsize];
- } else {
- ret = has_tr_tables[bsize];
- }
- assert(ret);
- return ret;
-}
-
-static int has_top_right(const AV1_COMMON *cm, BLOCK_SIZE bsize, int mi_row,
- int mi_col, int top_available, int right_available,
- PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
- int col_off, int ss_x, int ss_y) {
- if (!top_available || !right_available) return 0;
-
- const int bw_unit = block_size_wide[bsize] >> tx_size_wide_log2[0];
- const int plane_bw_unit = AOMMAX(bw_unit >> ss_x, 1);
- const int top_right_count_unit = tx_size_wide_unit[txsz];
-
- if (row_off > 0) { // Just need to check if enough pixels on the right.
- if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64]) {
- // Special case: For 128x128 blocks, the transform unit whose
- // top-right corner is at the center of the block does in fact have
- // pixels available at its top-right corner.
- if (row_off == mi_size_high[BLOCK_64X64] >> ss_y &&
- col_off + top_right_count_unit == mi_size_wide[BLOCK_64X64] >> ss_x) {
- return 1;
- }
- const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x;
- const int col_off_64 = col_off % plane_bw_unit_64;
- return col_off_64 + top_right_count_unit < plane_bw_unit_64;
- }
- return col_off + top_right_count_unit < plane_bw_unit;
- } else {
- // All top-right pixels are in the block above, which is already available.
- if (col_off + top_right_count_unit < plane_bw_unit) return 1;
-
- const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
- const int bh_in_mi_log2 = mi_size_high_log2[bsize];
- const int sb_mi_size = mi_size_high[cm->seq_params.sb_size];
- const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
- const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
-
- // Top row of superblock: so top-right pixels are in the top and/or
- // top-right superblocks, both of which are already available.
- if (blk_row_in_sb == 0) return 1;
-
- // Rightmost column of superblock (and not the top row): so top-right pixels
- // fall in the right superblock, which is not available yet.
- if (((blk_col_in_sb + 1) << bw_in_mi_log2) >= sb_mi_size) {
- return 0;
- }
-
- // General case (neither top row nor rightmost column): check if the
- // top-right block is coded before the current block.
- const int this_blk_index =
- ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
- blk_col_in_sb + 0;
- const int idx1 = this_blk_index / 8;
- const int idx2 = this_blk_index % 8;
- const uint8_t *has_tr_table = get_has_tr_table(partition, bsize);
- return (has_tr_table[idx1] >> idx2) & 1;
- }
-}
-
-// Similar to the has_tr_* tables, but store if the bottom-left reference
-// pixels are available.
-static uint8_t has_bl_4x4[128] = {
- 84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 1, 1, 1, 84, 85, 85,
- 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 0, 1, 0, 84, 85, 85, 85, 16, 17,
- 17, 17, 84, 85, 85, 85, 0, 1, 1, 1, 84, 85, 85, 85, 16, 17, 17, 17, 84,
- 85, 85, 85, 0, 0, 0, 0, 84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85,
- 0, 1, 1, 1, 84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 0, 1,
- 0, 84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 1, 1, 1, 84, 85,
- 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 0, 0, 0,
-};
-static uint8_t has_bl_4x8[64] = {
- 16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0,
- 16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0,
- 16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0,
- 16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0,
-};
-static uint8_t has_bl_8x4[64] = {
- 254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1,
- 254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0,
- 254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1,
- 254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0,
-};
-static uint8_t has_bl_8x8[32] = {
- 84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0,
- 84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0,
-};
-static uint8_t has_bl_8x16[16] = {
- 16, 17, 0, 1, 16, 17, 0, 0, 16, 17, 0, 1, 16, 17, 0, 0,
-};
-static uint8_t has_bl_16x8[16] = {
- 254, 84, 254, 16, 254, 84, 254, 0, 254, 84, 254, 16, 254, 84, 254, 0,
-};
-static uint8_t has_bl_16x16[8] = {
- 84, 16, 84, 0, 84, 16, 84, 0,
-};
-static uint8_t has_bl_16x32[4] = { 16, 0, 16, 0 };
-static uint8_t has_bl_32x16[4] = { 78, 14, 78, 14 };
-static uint8_t has_bl_32x32[2] = { 4, 4 };
-static uint8_t has_bl_32x64[1] = { 0 };
-static uint8_t has_bl_64x32[1] = { 34 };
-static uint8_t has_bl_64x64[1] = { 0 };
-static uint8_t has_bl_64x128[1] = { 0 };
-static uint8_t has_bl_128x64[1] = { 0 };
-static uint8_t has_bl_128x128[1] = { 0 };
-static uint8_t has_bl_4x16[32] = {
- 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
- 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
-};
-static uint8_t has_bl_16x4[32] = {
- 254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0,
- 254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0,
-};
-static uint8_t has_bl_8x32[8] = {
- 0, 1, 0, 0, 0, 1, 0, 0,
-};
-static uint8_t has_bl_32x8[8] = {
- 238, 78, 238, 14, 238, 78, 238, 14,
-};
-static uint8_t has_bl_16x64[2] = { 0, 0 };
-static uint8_t has_bl_64x16[2] = { 42, 42 };
-
-static const uint8_t *const has_bl_tables[BLOCK_SIZES_ALL] = {
- // 4X4
- has_bl_4x4,
- // 4X8, 8X4, 8X8
- has_bl_4x8, has_bl_8x4, has_bl_8x8,
- // 8X16, 16X8, 16X16
- has_bl_8x16, has_bl_16x8, has_bl_16x16,
- // 16X32, 32X16, 32X32
- has_bl_16x32, has_bl_32x16, has_bl_32x32,
- // 32X64, 64X32, 64X64
- has_bl_32x64, has_bl_64x32, has_bl_64x64,
- // 64x128, 128x64, 128x128
- has_bl_64x128, has_bl_128x64, has_bl_128x128,
- // 4x16, 16x4, 8x32
- has_bl_4x16, has_bl_16x4, has_bl_8x32,
- // 32x8, 16x64, 64x16
- has_bl_32x8, has_bl_16x64, has_bl_64x16
-};
-
-static uint8_t has_bl_vert_8x8[32] = {
- 254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0,
- 254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0,
-};
-static uint8_t has_bl_vert_16x16[8] = {
- 254, 16, 254, 0, 254, 16, 254, 0,
-};
-static uint8_t has_bl_vert_32x32[2] = { 14, 14 };
-static uint8_t has_bl_vert_64x64[1] = { 2 };
-
-// The _vert_* tables are like the ordinary tables above, but describe the
-// order we visit square blocks when doing a PARTITION_VERT_A or
-// PARTITION_VERT_B. This is the same order as normal except for on the last
-// split where we go vertically (TL, BL, TR, BR). We treat the rectangular block
-// as a pair of squares, which means that these tables work correctly for both
-// mixed vertical partition types.
-//
-// There are tables for each of the square sizes. Vertical rectangles (like
-// BLOCK_16X32) use their respective "non-vert" table
-static const uint8_t *const has_bl_vert_tables[BLOCK_SIZES] = {
- // 4X4
- NULL,
- // 4X8, 8X4, 8X8
- has_bl_4x8, NULL, has_bl_vert_8x8,
- // 8X16, 16X8, 16X16
- has_bl_8x16, NULL, has_bl_vert_16x16,
- // 16X32, 32X16, 32X32
- has_bl_16x32, NULL, has_bl_vert_32x32,
- // 32X64, 64X32, 64X64
- has_bl_32x64, NULL, has_bl_vert_64x64,
- // 64x128, 128x64, 128x128
- has_bl_64x128, NULL, has_bl_128x128
-};
-
-static const uint8_t *get_has_bl_table(PARTITION_TYPE partition,
- BLOCK_SIZE bsize) {
- const uint8_t *ret = NULL;
- // If this is a mixed vertical partition, look up bsize in orders_vert.
- if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) {
- assert(bsize < BLOCK_SIZES);
- ret = has_bl_vert_tables[bsize];
- } else {
- ret = has_bl_tables[bsize];
- }
- assert(ret);
- return ret;
-}
-
-static int has_bottom_left(const AV1_COMMON *cm, BLOCK_SIZE bsize, int mi_row,
- int mi_col, int bottom_available, int left_available,
- PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
- int col_off, int ss_x, int ss_y) {
- if (!bottom_available || !left_available) return 0;
-
- // Special case for 128x* blocks, when col_off is half the block width.
- // This is needed because 128x* superblocks are divided into 64x* blocks in
- // raster order
- if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64] && col_off > 0) {
- const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x;
- const int col_off_64 = col_off % plane_bw_unit_64;
- if (col_off_64 == 0) {
- // We are at the left edge of top-right or bottom-right 64x* block.
- const int plane_bh_unit_64 = mi_size_high[BLOCK_64X64] >> ss_y;
- const int row_off_64 = row_off % plane_bh_unit_64;
- const int plane_bh_unit =
- AOMMIN(mi_size_high[bsize] >> ss_y, plane_bh_unit_64);
- // Check if all bottom-left pixels are in the left 64x* block (which is
- // already coded).
- return row_off_64 + tx_size_high_unit[txsz] < plane_bh_unit;
- }
- }
-
- if (col_off > 0) {
- // Bottom-left pixels are in the bottom-left block, which is not available.
- return 0;
- } else {
- const int bh_unit = block_size_high[bsize] >> tx_size_high_log2[0];
- const int plane_bh_unit = AOMMAX(bh_unit >> ss_y, 1);
- const int bottom_left_count_unit = tx_size_high_unit[txsz];
-
- // All bottom-left pixels are in the left block, which is already available.
- if (row_off + bottom_left_count_unit < plane_bh_unit) return 1;
-
- const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
- const int bh_in_mi_log2 = mi_size_high_log2[bsize];
- const int sb_mi_size = mi_size_high[cm->seq_params.sb_size];
- const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
- const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
-
- // Leftmost column of superblock: so bottom-left pixels maybe in the left
- // and/or bottom-left superblocks. But only the left superblock is
- // available, so check if all required pixels fall in that superblock.
- if (blk_col_in_sb == 0) {
- const int blk_start_row_off = blk_row_in_sb
- << (bh_in_mi_log2 + MI_SIZE_LOG2 -
- tx_size_wide_log2[0]) >>
- ss_y;
- const int row_off_in_sb = blk_start_row_off + row_off;
- const int sb_height_unit = sb_mi_size >> ss_y;
- return row_off_in_sb + bottom_left_count_unit < sb_height_unit;
- }
-
- // Bottom row of superblock (and not the leftmost column): so bottom-left
- // pixels fall in the bottom superblock, which is not available yet.
- if (((blk_row_in_sb + 1) << bh_in_mi_log2) >= sb_mi_size) return 0;
-
- // General case (neither leftmost column nor bottom row): check if the
- // bottom-left block is coded before the current block.
- const int this_blk_index =
- ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
- blk_col_in_sb + 0;
- const int idx1 = this_blk_index / 8;
- const int idx2 = this_blk_index % 8;
- const uint8_t *has_bl_table = get_has_bl_table(partition, bsize);
- return (has_bl_table[idx1] >> idx2) & 1;
- }
-}
-
-typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left);
-
-static intra_pred_fn pred[INTRA_MODES][TX_SIZES_ALL];
-static intra_pred_fn dc_pred[2][2][TX_SIZES_ALL];
-
-typedef void (*intra_high_pred_fn)(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above, const uint16_t *left,
- int bd);
-static intra_high_pred_fn pred_high[INTRA_MODES][TX_SIZES_ALL];
-static intra_high_pred_fn dc_pred_high[2][2][TX_SIZES_ALL];
-
-static void init_intra_predictors_internal(void) {
- assert(NELEMENTS(mode_to_angle_map) == INTRA_MODES);
-
-#define INIT_RECTANGULAR(p, type) \
- p[TX_4X8] = aom_##type##_predictor_4x8; \
- p[TX_8X4] = aom_##type##_predictor_8x4; \
- p[TX_8X16] = aom_##type##_predictor_8x16; \
- p[TX_16X8] = aom_##type##_predictor_16x8; \
- p[TX_16X32] = aom_##type##_predictor_16x32; \
- p[TX_32X16] = aom_##type##_predictor_32x16; \
- p[TX_32X64] = aom_##type##_predictor_32x64; \
- p[TX_64X32] = aom_##type##_predictor_64x32; \
- p[TX_4X16] = aom_##type##_predictor_4x16; \
- p[TX_16X4] = aom_##type##_predictor_16x4; \
- p[TX_8X32] = aom_##type##_predictor_8x32; \
- p[TX_32X8] = aom_##type##_predictor_32x8; \
- p[TX_16X64] = aom_##type##_predictor_16x64; \
- p[TX_64X16] = aom_##type##_predictor_64x16;
-
-#define INIT_NO_4X4(p, type) \
- p[TX_8X8] = aom_##type##_predictor_8x8; \
- p[TX_16X16] = aom_##type##_predictor_16x16; \
- p[TX_32X32] = aom_##type##_predictor_32x32; \
- p[TX_64X64] = aom_##type##_predictor_64x64; \
- INIT_RECTANGULAR(p, type)
-
-#define INIT_ALL_SIZES(p, type) \
- p[TX_4X4] = aom_##type##_predictor_4x4; \
- INIT_NO_4X4(p, type)
-
- INIT_ALL_SIZES(pred[V_PRED], v);
- INIT_ALL_SIZES(pred[H_PRED], h);
- INIT_ALL_SIZES(pred[PAETH_PRED], paeth);
- INIT_ALL_SIZES(pred[SMOOTH_PRED], smooth);
- INIT_ALL_SIZES(pred[SMOOTH_V_PRED], smooth_v);
- INIT_ALL_SIZES(pred[SMOOTH_H_PRED], smooth_h);
- INIT_ALL_SIZES(dc_pred[0][0], dc_128);
- INIT_ALL_SIZES(dc_pred[0][1], dc_top);
- INIT_ALL_SIZES(dc_pred[1][0], dc_left);
- INIT_ALL_SIZES(dc_pred[1][1], dc);
-
- INIT_ALL_SIZES(pred_high[V_PRED], highbd_v);
- INIT_ALL_SIZES(pred_high[H_PRED], highbd_h);
- INIT_ALL_SIZES(pred_high[PAETH_PRED], highbd_paeth);
- INIT_ALL_SIZES(pred_high[SMOOTH_PRED], highbd_smooth);
- INIT_ALL_SIZES(pred_high[SMOOTH_V_PRED], highbd_smooth_v);
- INIT_ALL_SIZES(pred_high[SMOOTH_H_PRED], highbd_smooth_h);
- INIT_ALL_SIZES(dc_pred_high[0][0], highbd_dc_128);
- INIT_ALL_SIZES(dc_pred_high[0][1], highbd_dc_top);
- INIT_ALL_SIZES(dc_pred_high[1][0], highbd_dc_left);
- INIT_ALL_SIZES(dc_pred_high[1][1], highbd_dc);
-#undef intra_pred_allsizes
-}
-
-// Directional prediction, zone 1: 0 < angle < 90
-void av1_dr_prediction_z1_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
- const uint8_t *above, const uint8_t *left,
- int upsample_above, int dx, int dy) {
- int r, c, x, base, shift, val;
-
- (void)left;
- (void)dy;
- assert(dy == 1);
- assert(dx > 0);
-
- const int max_base_x = ((bw + bh) - 1) << upsample_above;
- const int frac_bits = 6 - upsample_above;
- const int base_inc = 1 << upsample_above;
- x = dx;
- for (r = 0; r < bh; ++r, dst += stride, x += dx) {
- base = x >> frac_bits;
- shift = ((x << upsample_above) & 0x3F) >> 1;
-
- if (base >= max_base_x) {
- for (int i = r; i < bh; ++i) {
- memset(dst, above[max_base_x], bw * sizeof(dst[0]));
- dst += stride;
- }
- return;
- }
-
- for (c = 0; c < bw; ++c, base += base_inc) {
- if (base < max_base_x) {
- val = above[base] * (32 - shift) + above[base + 1] * shift;
- dst[c] = ROUND_POWER_OF_TWO(val, 5);
- } else {
- dst[c] = above[max_base_x];
- }
- }
- }
-}
-
-// Directional prediction, zone 2: 90 < angle < 180
-void av1_dr_prediction_z2_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
- const uint8_t *above, const uint8_t *left,
- int upsample_above, int upsample_left, int dx,
- int dy) {
- int r, c, x, y, shift1, shift2, val, base1, base2;
-
- assert(dx > 0);
- assert(dy > 0);
-
- const int min_base_x = -(1 << upsample_above);
- const int frac_bits_x = 6 - upsample_above;
- const int frac_bits_y = 6 - upsample_left;
- const int base_inc_x = 1 << upsample_above;
- x = -dx;
- for (r = 0; r < bh; ++r, x -= dx, dst += stride) {
- base1 = x >> frac_bits_x;
- y = (r << 6) - dy;
- for (c = 0; c < bw; ++c, base1 += base_inc_x, y -= dy) {
- if (base1 >= min_base_x) {
- shift1 = ((x * (1 << upsample_above)) & 0x3F) >> 1;
- val = above[base1] * (32 - shift1) + above[base1 + 1] * shift1;
- val = ROUND_POWER_OF_TWO(val, 5);
- } else {
- base2 = y >> frac_bits_y;
- assert(base2 >= -(1 << upsample_left));
- shift2 = ((y * (1 << upsample_left)) & 0x3F) >> 1;
- val = left[base2] * (32 - shift2) + left[base2 + 1] * shift2;
- val = ROUND_POWER_OF_TWO(val, 5);
- }
- dst[c] = val;
- }
- }
-}
-
-// Directional prediction, zone 3: 180 < angle < 270
-void av1_dr_prediction_z3_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
- const uint8_t *above, const uint8_t *left,
- int upsample_left, int dx, int dy) {
- int r, c, y, base, shift, val;
-
- (void)above;
- (void)dx;
-
- assert(dx == 1);
- assert(dy > 0);
-
- const int max_base_y = (bw + bh - 1) << upsample_left;
- const int frac_bits = 6 - upsample_left;
- const int base_inc = 1 << upsample_left;
- y = dy;
- for (c = 0; c < bw; ++c, y += dy) {
- base = y >> frac_bits;
- shift = ((y << upsample_left) & 0x3F) >> 1;
-
- for (r = 0; r < bh; ++r, base += base_inc) {
- if (base < max_base_y) {
- val = left[base] * (32 - shift) + left[base + 1] * shift;
- dst[r * stride + c] = val = ROUND_POWER_OF_TWO(val, 5);
- } else {
- for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
- break;
- }
- }
- }
-}
-
-static void dr_predictor(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size,
- const uint8_t *above, const uint8_t *left,
- int upsample_above, int upsample_left, int angle) {
- const int dx = av1_get_dx(angle);
- const int dy = av1_get_dy(angle);
- const int bw = tx_size_wide[tx_size];
- const int bh = tx_size_high[tx_size];
- assert(angle > 0 && angle < 270);
-
- if (angle > 0 && angle < 90) {
- av1_dr_prediction_z1(dst, stride, bw, bh, above, left, upsample_above, dx,
- dy);
- } else if (angle > 90 && angle < 180) {
- av1_dr_prediction_z2(dst, stride, bw, bh, above, left, upsample_above,
- upsample_left, dx, dy);
- } else if (angle > 180 && angle < 270) {
- av1_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left, dx,
- dy);
- } else if (angle == 90) {
- pred[V_PRED][tx_size](dst, stride, above, left);
- } else if (angle == 180) {
- pred[H_PRED][tx_size](dst, stride, above, left);
- }
-}
-
-// Directional prediction, zone 1: 0 < angle < 90
-void av1_highbd_dr_prediction_z1_c(uint16_t *dst, ptrdiff_t stride, int bw,
- int bh, const uint16_t *above,
- const uint16_t *left, int upsample_above,
- int dx, int dy, int bd) {
- int r, c, x, base, shift, val;
-
- (void)left;
- (void)dy;
- (void)bd;
- assert(dy == 1);
- assert(dx > 0);
-
- const int max_base_x = ((bw + bh) - 1) << upsample_above;
- const int frac_bits = 6 - upsample_above;
- const int base_inc = 1 << upsample_above;
- x = dx;
- for (r = 0; r < bh; ++r, dst += stride, x += dx) {
- base = x >> frac_bits;
- shift = ((x << upsample_above) & 0x3F) >> 1;
-
- if (base >= max_base_x) {
- for (int i = r; i < bh; ++i) {
- aom_memset16(dst, above[max_base_x], bw);
- dst += stride;
- }
- return;
- }
-
- for (c = 0; c < bw; ++c, base += base_inc) {
- if (base < max_base_x) {
- val = above[base] * (32 - shift) + above[base + 1] * shift;
- dst[c] = ROUND_POWER_OF_TWO(val, 5);
- } else {
- dst[c] = above[max_base_x];
- }
- }
- }
-}
-
-// Directional prediction, zone 2: 90 < angle < 180
-void av1_highbd_dr_prediction_z2_c(uint16_t *dst, ptrdiff_t stride, int bw,
- int bh, const uint16_t *above,
- const uint16_t *left, int upsample_above,
- int upsample_left, int dx, int dy, int bd) {
- int r, c, x, y, shift, val, base;
-
- (void)bd;
- assert(dx > 0);
- assert(dy > 0);
-
- const int min_base_x = -(1 << upsample_above);
- const int frac_bits_x = 6 - upsample_above;
- const int frac_bits_y = 6 - upsample_left;
- for (r = 0; r < bh; ++r) {
- for (c = 0; c < bw; ++c) {
- y = r + 1;
- x = (c << 6) - y * dx;
- base = x >> frac_bits_x;
- if (base >= min_base_x) {
- shift = ((x * (1 << upsample_above)) & 0x3F) >> 1;
- val = above[base] * (32 - shift) + above[base + 1] * shift;
- val = ROUND_POWER_OF_TWO(val, 5);
- } else {
- x = c + 1;
- y = (r << 6) - x * dy;
- base = y >> frac_bits_y;
- shift = ((y * (1 << upsample_left)) & 0x3F) >> 1;
- val = left[base] * (32 - shift) + left[base + 1] * shift;
- val = ROUND_POWER_OF_TWO(val, 5);
- }
- dst[c] = val;
- }
- dst += stride;
- }
-}
-
-// Directional prediction, zone 3: 180 < angle < 270
-void av1_highbd_dr_prediction_z3_c(uint16_t *dst, ptrdiff_t stride, int bw,
- int bh, const uint16_t *above,
- const uint16_t *left, int upsample_left,
- int dx, int dy, int bd) {
- int r, c, y, base, shift, val;
-
- (void)above;
- (void)dx;
- (void)bd;
- assert(dx == 1);
- assert(dy > 0);
-
- const int max_base_y = (bw + bh - 1) << upsample_left;
- const int frac_bits = 6 - upsample_left;
- const int base_inc = 1 << upsample_left;
- y = dy;
- for (c = 0; c < bw; ++c, y += dy) {
- base = y >> frac_bits;
- shift = ((y << upsample_left) & 0x3F) >> 1;
-
- for (r = 0; r < bh; ++r, base += base_inc) {
- if (base < max_base_y) {
- val = left[base] * (32 - shift) + left[base + 1] * shift;
- dst[r * stride + c] = ROUND_POWER_OF_TWO(val, 5);
- } else {
- for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
- break;
- }
- }
- }
-}
-
-static void highbd_dr_predictor(uint16_t *dst, ptrdiff_t stride,
- TX_SIZE tx_size, const uint16_t *above,
- const uint16_t *left, int upsample_above,
- int upsample_left, int angle, int bd) {
- const int dx = av1_get_dx(angle);
- const int dy = av1_get_dy(angle);
- const int bw = tx_size_wide[tx_size];
- const int bh = tx_size_high[tx_size];
- assert(angle > 0 && angle < 270);
-
- if (angle > 0 && angle < 90) {
- av1_highbd_dr_prediction_z1(dst, stride, bw, bh, above, left,
- upsample_above, dx, dy, bd);
- } else if (angle > 90 && angle < 180) {
- av1_highbd_dr_prediction_z2(dst, stride, bw, bh, above, left,
- upsample_above, upsample_left, dx, dy, bd);
- } else if (angle > 180 && angle < 270) {
- av1_highbd_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left,
- dx, dy, bd);
- } else if (angle == 90) {
- pred_high[V_PRED][tx_size](dst, stride, above, left, bd);
- } else if (angle == 180) {
- pred_high[H_PRED][tx_size](dst, stride, above, left, bd);
- }
-}
-
-DECLARE_ALIGNED(16, const int8_t,
- av1_filter_intra_taps[FILTER_INTRA_MODES][8][8]) = {
- {
- { -6, 10, 0, 0, 0, 12, 0, 0 },
- { -5, 2, 10, 0, 0, 9, 0, 0 },
- { -3, 1, 1, 10, 0, 7, 0, 0 },
- { -3, 1, 1, 2, 10, 5, 0, 0 },
- { -4, 6, 0, 0, 0, 2, 12, 0 },
- { -3, 2, 6, 0, 0, 2, 9, 0 },
- { -3, 2, 2, 6, 0, 2, 7, 0 },
- { -3, 1, 2, 2, 6, 3, 5, 0 },
- },
- {
- { -10, 16, 0, 0, 0, 10, 0, 0 },
- { -6, 0, 16, 0, 0, 6, 0, 0 },
- { -4, 0, 0, 16, 0, 4, 0, 0 },
- { -2, 0, 0, 0, 16, 2, 0, 0 },
- { -10, 16, 0, 0, 0, 0, 10, 0 },
- { -6, 0, 16, 0, 0, 0, 6, 0 },
- { -4, 0, 0, 16, 0, 0, 4, 0 },
- { -2, 0, 0, 0, 16, 0, 2, 0 },
- },
- {
- { -8, 8, 0, 0, 0, 16, 0, 0 },
- { -8, 0, 8, 0, 0, 16, 0, 0 },
- { -8, 0, 0, 8, 0, 16, 0, 0 },
- { -8, 0, 0, 0, 8, 16, 0, 0 },
- { -4, 4, 0, 0, 0, 0, 16, 0 },
- { -4, 0, 4, 0, 0, 0, 16, 0 },
- { -4, 0, 0, 4, 0, 0, 16, 0 },
- { -4, 0, 0, 0, 4, 0, 16, 0 },
- },
- {
- { -2, 8, 0, 0, 0, 10, 0, 0 },
- { -1, 3, 8, 0, 0, 6, 0, 0 },
- { -1, 2, 3, 8, 0, 4, 0, 0 },
- { 0, 1, 2, 3, 8, 2, 0, 0 },
- { -1, 4, 0, 0, 0, 3, 10, 0 },
- { -1, 3, 4, 0, 0, 4, 6, 0 },
- { -1, 2, 3, 4, 0, 4, 4, 0 },
- { -1, 2, 2, 3, 4, 3, 3, 0 },
- },
- {
- { -12, 14, 0, 0, 0, 14, 0, 0 },
- { -10, 0, 14, 0, 0, 12, 0, 0 },
- { -9, 0, 0, 14, 0, 11, 0, 0 },
- { -8, 0, 0, 0, 14, 10, 0, 0 },
- { -10, 12, 0, 0, 0, 0, 14, 0 },
- { -9, 1, 12, 0, 0, 0, 12, 0 },
- { -8, 0, 0, 12, 0, 1, 11, 0 },
- { -7, 0, 0, 1, 12, 1, 9, 0 },
- },
-};
-
-void av1_filter_intra_predictor_c(uint8_t *dst, ptrdiff_t stride,
- TX_SIZE tx_size, const uint8_t *above,
- const uint8_t *left, int mode) {
- int r, c;
- uint8_t buffer[33][33];
- const int bw = tx_size_wide[tx_size];
- const int bh = tx_size_high[tx_size];
-
- assert(bw <= 32 && bh <= 32);
-
- // The initialization is just for silencing Jenkins static analysis warnings
- for (r = 0; r < bh + 1; ++r)
- memset(buffer[r], 0, (bw + 1) * sizeof(buffer[0][0]));
-
- for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
- memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(uint8_t));
-
- for (r = 1; r < bh + 1; r += 2)
- for (c = 1; c < bw + 1; c += 4) {
- const uint8_t p0 = buffer[r - 1][c - 1];
- const uint8_t p1 = buffer[r - 1][c];
- const uint8_t p2 = buffer[r - 1][c + 1];
- const uint8_t p3 = buffer[r - 1][c + 2];
- const uint8_t p4 = buffer[r - 1][c + 3];
- const uint8_t p5 = buffer[r][c - 1];
- const uint8_t p6 = buffer[r + 1][c - 1];
- for (int k = 0; k < 8; ++k) {
- int r_offset = k >> 2;
- int c_offset = k & 0x03;
- buffer[r + r_offset][c + c_offset] =
- clip_pixel(ROUND_POWER_OF_TWO_SIGNED(
- av1_filter_intra_taps[mode][k][0] * p0 +
- av1_filter_intra_taps[mode][k][1] * p1 +
- av1_filter_intra_taps[mode][k][2] * p2 +
- av1_filter_intra_taps[mode][k][3] * p3 +
- av1_filter_intra_taps[mode][k][4] * p4 +
- av1_filter_intra_taps[mode][k][5] * p5 +
- av1_filter_intra_taps[mode][k][6] * p6,
- FILTER_INTRA_SCALE_BITS));
- }
- }
-
- for (r = 0; r < bh; ++r) {
- memcpy(dst, &buffer[r + 1][1], bw * sizeof(uint8_t));
- dst += stride;
- }
-}
-
-static void highbd_filter_intra_predictor(uint16_t *dst, ptrdiff_t stride,
- TX_SIZE tx_size,
- const uint16_t *above,
- const uint16_t *left, int mode,
- int bd) {
- int r, c;
- uint16_t buffer[33][33];
- const int bw = tx_size_wide[tx_size];
- const int bh = tx_size_high[tx_size];
-
- assert(bw <= 32 && bh <= 32);
-
- // The initialization is just for silencing Jenkins static analysis warnings
- for (r = 0; r < bh + 1; ++r)
- memset(buffer[r], 0, (bw + 1) * sizeof(buffer[0][0]));
-
- for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
- memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(buffer[0][0]));
-
- for (r = 1; r < bh + 1; r += 2)
- for (c = 1; c < bw + 1; c += 4) {
- const uint16_t p0 = buffer[r - 1][c - 1];
- const uint16_t p1 = buffer[r - 1][c];
- const uint16_t p2 = buffer[r - 1][c + 1];
- const uint16_t p3 = buffer[r - 1][c + 2];
- const uint16_t p4 = buffer[r - 1][c + 3];
- const uint16_t p5 = buffer[r][c - 1];
- const uint16_t p6 = buffer[r + 1][c - 1];
- for (int k = 0; k < 8; ++k) {
- int r_offset = k >> 2;
- int c_offset = k & 0x03;
- buffer[r + r_offset][c + c_offset] =
- clip_pixel_highbd(ROUND_POWER_OF_TWO_SIGNED(
- av1_filter_intra_taps[mode][k][0] * p0 +
- av1_filter_intra_taps[mode][k][1] * p1 +
- av1_filter_intra_taps[mode][k][2] * p2 +
- av1_filter_intra_taps[mode][k][3] * p3 +
- av1_filter_intra_taps[mode][k][4] * p4 +
- av1_filter_intra_taps[mode][k][5] * p5 +
- av1_filter_intra_taps[mode][k][6] * p6,
- FILTER_INTRA_SCALE_BITS),
- bd);
- }
- }
-
- for (r = 0; r < bh; ++r) {
- memcpy(dst, &buffer[r + 1][1], bw * sizeof(dst[0]));
- dst += stride;
- }
-}
-
-static int is_smooth(const MB_MODE_INFO *mbmi, int plane) {
- if (plane == 0) {
- const PREDICTION_MODE mode = mbmi->mode;
- return (mode == SMOOTH_PRED || mode == SMOOTH_V_PRED ||
- mode == SMOOTH_H_PRED);
- } else {
- // uv_mode is not set for inter blocks, so need to explicitly
- // detect that case.
- if (is_inter_block(mbmi)) return 0;
-
- const UV_PREDICTION_MODE uv_mode = mbmi->uv_mode;
- return (uv_mode == UV_SMOOTH_PRED || uv_mode == UV_SMOOTH_V_PRED ||
- uv_mode == UV_SMOOTH_H_PRED);
- }
-}
-
-static int get_filt_type(const MACROBLOCKD *xd, int plane) {
- int ab_sm, le_sm;
-
- if (plane == 0) {
- const MB_MODE_INFO *ab = xd->above_mbmi;
- const MB_MODE_INFO *le = xd->left_mbmi;
- ab_sm = ab ? is_smooth(ab, plane) : 0;
- le_sm = le ? is_smooth(le, plane) : 0;
- } else {
- const MB_MODE_INFO *ab = xd->chroma_above_mbmi;
- const MB_MODE_INFO *le = xd->chroma_left_mbmi;
- ab_sm = ab ? is_smooth(ab, plane) : 0;
- le_sm = le ? is_smooth(le, plane) : 0;
- }
-
- return (ab_sm || le_sm) ? 1 : 0;
-}
-
-static int intra_edge_filter_strength(int bs0, int bs1, int delta, int type) {
- const int d = abs(delta);
- int strength = 0;
-
- const int blk_wh = bs0 + bs1;
- if (type == 0) {
- if (blk_wh <= 8) {
- if (d >= 56) strength = 1;
- } else if (blk_wh <= 12) {
- if (d >= 40) strength = 1;
- } else if (blk_wh <= 16) {
- if (d >= 40) strength = 1;
- } else if (blk_wh <= 24) {
- if (d >= 8) strength = 1;
- if (d >= 16) strength = 2;
- if (d >= 32) strength = 3;
- } else if (blk_wh <= 32) {
- if (d >= 1) strength = 1;
- if (d >= 4) strength = 2;
- if (d >= 32) strength = 3;
- } else {
- if (d >= 1) strength = 3;
- }
- } else {
- if (blk_wh <= 8) {
- if (d >= 40) strength = 1;
- if (d >= 64) strength = 2;
- } else if (blk_wh <= 16) {
- if (d >= 20) strength = 1;
- if (d >= 48) strength = 2;
- } else if (blk_wh <= 24) {
- if (d >= 4) strength = 3;
- } else {
- if (d >= 1) strength = 3;
- }
- }
- return strength;
-}
-
-void av1_filter_intra_edge_c(uint8_t *p, int sz, int strength) {
- if (!strength) return;
-
- const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = {
- { 0, 4, 8, 4, 0 }, { 0, 5, 6, 5, 0 }, { 2, 4, 4, 4, 2 }
- };
- const int filt = strength - 1;
- uint8_t edge[129];
-
- memcpy(edge, p, sz * sizeof(*p));
- for (int i = 1; i < sz; i++) {
- int s = 0;
- for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
- int k = i - 2 + j;
- k = (k < 0) ? 0 : k;
- k = (k > sz - 1) ? sz - 1 : k;
- s += edge[k] * kernel[filt][j];
- }
- s = (s + 8) >> 4;
- p[i] = s;
- }
-}
-
-static void filter_intra_edge_corner(uint8_t *p_above, uint8_t *p_left) {
- const int kernel[3] = { 5, 6, 5 };
-
- int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) +
- (p_above[0] * kernel[2]);
- s = (s + 8) >> 4;
- p_above[-1] = s;
- p_left[-1] = s;
-}
-
-void av1_filter_intra_edge_high_c(uint16_t *p, int sz, int strength) {
- if (!strength) return;
-
- const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = {
- { 0, 4, 8, 4, 0 }, { 0, 5, 6, 5, 0 }, { 2, 4, 4, 4, 2 }
- };
- const int filt = strength - 1;
- uint16_t edge[129];
-
- memcpy(edge, p, sz * sizeof(*p));
- for (int i = 1; i < sz; i++) {
- int s = 0;
- for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
- int k = i - 2 + j;
- k = (k < 0) ? 0 : k;
- k = (k > sz - 1) ? sz - 1 : k;
- s += edge[k] * kernel[filt][j];
- }
- s = (s + 8) >> 4;
- p[i] = s;
- }
-}
-
-static void filter_intra_edge_corner_high(uint16_t *p_above, uint16_t *p_left) {
- const int kernel[3] = { 5, 6, 5 };
-
- int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) +
- (p_above[0] * kernel[2]);
- s = (s + 8) >> 4;
- p_above[-1] = s;
- p_left[-1] = s;
-}
-
-void av1_upsample_intra_edge_c(uint8_t *p, int sz) {
- // interpolate half-sample positions
- assert(sz <= MAX_UPSAMPLE_SZ);
-
- uint8_t in[MAX_UPSAMPLE_SZ + 3];
- // copy p[-1..(sz-1)] and extend first and last samples
- in[0] = p[-1];
- in[1] = p[-1];
- for (int i = 0; i < sz; i++) {
- in[i + 2] = p[i];
- }
- in[sz + 2] = p[sz - 1];
-
- // interpolate half-sample edge positions
- p[-2] = in[0];
- for (int i = 0; i < sz; i++) {
- int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3];
- s = clip_pixel((s + 8) >> 4);
- p[2 * i - 1] = s;
- p[2 * i] = in[i + 2];
- }
-}
-
-void av1_upsample_intra_edge_high_c(uint16_t *p, int sz, int bd) {
- // interpolate half-sample positions
- assert(sz <= MAX_UPSAMPLE_SZ);
-
- uint16_t in[MAX_UPSAMPLE_SZ + 3];
- // copy p[-1..(sz-1)] and extend first and last samples
- in[0] = p[-1];
- in[1] = p[-1];
- for (int i = 0; i < sz; i++) {
- in[i + 2] = p[i];
- }
- in[sz + 2] = p[sz - 1];
-
- // interpolate half-sample edge positions
- p[-2] = in[0];
- for (int i = 0; i < sz; i++) {
- int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3];
- s = (s + 8) >> 4;
- s = clip_pixel_highbd(s, bd);
- p[2 * i - 1] = s;
- p[2 * i] = in[i + 2];
- }
-}
-
-static void build_intra_predictors_high(
- const MACROBLOCKD *xd, const uint8_t *ref8, int ref_stride, uint8_t *dst8,
- int dst_stride, PREDICTION_MODE mode, int angle_delta,
- FILTER_INTRA_MODE filter_intra_mode, TX_SIZE tx_size,
- int disable_edge_filter, int n_top_px, int n_topright_px, int n_left_px,
- int n_bottomleft_px, int plane) {
- int i;
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
- uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
- DECLARE_ALIGNED(16, uint16_t, left_data[MAX_TX_SIZE * 2 + 32]);
- DECLARE_ALIGNED(16, uint16_t, above_data[MAX_TX_SIZE * 2 + 32]);
- uint16_t *const above_row = above_data + 16;
- uint16_t *const left_col = left_data + 16;
- const int txwpx = tx_size_wide[tx_size];
- const int txhpx = tx_size_high[tx_size];
- int need_left = extend_modes[mode] & NEED_LEFT;
- int need_above = extend_modes[mode] & NEED_ABOVE;
- int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
- const uint16_t *above_ref = ref - ref_stride;
- const uint16_t *left_ref = ref - 1;
- int p_angle = 0;
- const int is_dr_mode = av1_is_directional_mode(mode);
- const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
- int base = 128 << (xd->bd - 8);
-
- // The default values if ref pixels are not available:
- // base-1 base-1 base-1 .. base-1 base-1 base-1 base-1 base-1 base-1
- // base+1 A B .. Y Z
- // base+1 C D .. W X
- // base+1 E F .. U V
- // base+1 G H .. S T T T T T
-
- if (is_dr_mode) {
- p_angle = mode_to_angle_map[mode] + angle_delta;
- if (p_angle <= 90)
- need_above = 1, need_left = 0, need_above_left = 1;
- else if (p_angle < 180)
- need_above = 1, need_left = 1, need_above_left = 1;
- else
- need_above = 0, need_left = 1, need_above_left = 1;
- }
- if (use_filter_intra) need_left = need_above = need_above_left = 1;
-
- assert(n_top_px >= 0);
- assert(n_topright_px >= 0);
- assert(n_left_px >= 0);
- assert(n_bottomleft_px >= 0);
-
- if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
- int val;
- if (need_left) {
- val = (n_top_px > 0) ? above_ref[0] : base + 1;
- } else {
- val = (n_left_px > 0) ? left_ref[0] : base - 1;
- }
- for (i = 0; i < txhpx; ++i) {
- aom_memset16(dst, val, txwpx);
- dst += dst_stride;
- }
- return;
- }
-
- // NEED_LEFT
- if (need_left) {
- int need_bottom = !!(extend_modes[mode] & NEED_BOTTOMLEFT);
- if (use_filter_intra) need_bottom = 0;
- if (is_dr_mode) need_bottom = p_angle > 180;
- const int num_left_pixels_needed = txhpx + (need_bottom ? txwpx : 0);
- i = 0;
- if (n_left_px > 0) {
- for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
- if (need_bottom && n_bottomleft_px > 0) {
- assert(i == txhpx);
- for (; i < txhpx + n_bottomleft_px; i++)
- left_col[i] = left_ref[i * ref_stride];
- }
- if (i < num_left_pixels_needed)
- aom_memset16(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
- } else {
- if (n_top_px > 0) {
- aom_memset16(left_col, above_ref[0], num_left_pixels_needed);
- } else {
- aom_memset16(left_col, base + 1, num_left_pixels_needed);
- }
- }
- }
-
- // NEED_ABOVE
- if (need_above) {
- int need_right = !!(extend_modes[mode] & NEED_ABOVERIGHT);
- if (use_filter_intra) need_right = 0;
- if (is_dr_mode) need_right = p_angle < 90;
- const int num_top_pixels_needed = txwpx + (need_right ? txhpx : 0);
- if (n_top_px > 0) {
- memcpy(above_row, above_ref, n_top_px * sizeof(above_ref[0]));
- i = n_top_px;
- if (need_right && n_topright_px > 0) {
- assert(n_top_px == txwpx);
- memcpy(above_row + txwpx, above_ref + txwpx,
- n_topright_px * sizeof(above_ref[0]));
- i += n_topright_px;
- }
- if (i < num_top_pixels_needed)
- aom_memset16(&above_row[i], above_row[i - 1],
- num_top_pixels_needed - i);
- } else {
- if (n_left_px > 0) {
- aom_memset16(above_row, left_ref[0], num_top_pixels_needed);
- } else {
- aom_memset16(above_row, base - 1, num_top_pixels_needed);
- }
- }
- }
-
- if (need_above_left) {
- if (n_top_px > 0 && n_left_px > 0) {
- above_row[-1] = above_ref[-1];
- } else if (n_top_px > 0) {
- above_row[-1] = above_ref[0];
- } else if (n_left_px > 0) {
- above_row[-1] = left_ref[0];
- } else {
- above_row[-1] = base;
- }
- left_col[-1] = above_row[-1];
- }
-
- if (use_filter_intra) {
- highbd_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
- filter_intra_mode, xd->bd);
- return;
- }
-
- if (is_dr_mode) {
- int upsample_above = 0;
- int upsample_left = 0;
- if (!disable_edge_filter) {
- const int need_right = p_angle < 90;
- const int need_bottom = p_angle > 180;
- const int filt_type = get_filt_type(xd, plane);
- if (p_angle != 90 && p_angle != 180) {
- const int ab_le = need_above_left ? 1 : 0;
- if (need_above && need_left && (txwpx + txhpx >= 24)) {
- filter_intra_edge_corner_high(above_row, left_col);
- }
- if (need_above && n_top_px > 0) {
- const int strength =
- intra_edge_filter_strength(txwpx, txhpx, p_angle - 90, filt_type);
- const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
- av1_filter_intra_edge_high(above_row - ab_le, n_px, strength);
- }
- if (need_left && n_left_px > 0) {
- const int strength = intra_edge_filter_strength(
- txhpx, txwpx, p_angle - 180, filt_type);
- const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
- av1_filter_intra_edge_high(left_col - ab_le, n_px, strength);
- }
- }
- upsample_above =
- av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90, filt_type);
- if (need_above && upsample_above) {
- const int n_px = txwpx + (need_right ? txhpx : 0);
- av1_upsample_intra_edge_high(above_row, n_px, xd->bd);
- }
- upsample_left =
- av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180, filt_type);
- if (need_left && upsample_left) {
- const int n_px = txhpx + (need_bottom ? txwpx : 0);
- av1_upsample_intra_edge_high(left_col, n_px, xd->bd);
- }
- }
- highbd_dr_predictor(dst, dst_stride, tx_size, above_row, left_col,
- upsample_above, upsample_left, p_angle, xd->bd);
- return;
- }
-
- // predict
- if (mode == DC_PRED) {
- dc_pred_high[n_left_px > 0][n_top_px > 0][tx_size](
- dst, dst_stride, above_row, left_col, xd->bd);
- } else {
- pred_high[mode][tx_size](dst, dst_stride, above_row, left_col, xd->bd);
- }
-}
-
-static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
- int ref_stride, uint8_t *dst, int dst_stride,
- PREDICTION_MODE mode, int angle_delta,
- FILTER_INTRA_MODE filter_intra_mode,
- TX_SIZE tx_size, int disable_edge_filter,
- int n_top_px, int n_topright_px,
- int n_left_px, int n_bottomleft_px,
- int plane) {
- int i;
- const uint8_t *above_ref = ref - ref_stride;
- const uint8_t *left_ref = ref - 1;
- DECLARE_ALIGNED(16, uint8_t, left_data[MAX_TX_SIZE * 2 + 32]);
- DECLARE_ALIGNED(16, uint8_t, above_data[MAX_TX_SIZE * 2 + 32]);
- uint8_t *const above_row = above_data + 16;
- uint8_t *const left_col = left_data + 16;
- const int txwpx = tx_size_wide[tx_size];
- const int txhpx = tx_size_high[tx_size];
- int need_left = extend_modes[mode] & NEED_LEFT;
- int need_above = extend_modes[mode] & NEED_ABOVE;
- int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
- int p_angle = 0;
- const int is_dr_mode = av1_is_directional_mode(mode);
- const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
-
- // The default values if ref pixels are not available:
- // 127 127 127 .. 127 127 127 127 127 127
- // 129 A B .. Y Z
- // 129 C D .. W X
- // 129 E F .. U V
- // 129 G H .. S T T T T T
- // ..
-
- if (is_dr_mode) {
- p_angle = mode_to_angle_map[mode] + angle_delta;
- if (p_angle <= 90)
- need_above = 1, need_left = 0, need_above_left = 1;
- else if (p_angle < 180)
- need_above = 1, need_left = 1, need_above_left = 1;
- else
- need_above = 0, need_left = 1, need_above_left = 1;
- }
- if (use_filter_intra) need_left = need_above = need_above_left = 1;
-
- assert(n_top_px >= 0);
- assert(n_topright_px >= 0);
- assert(n_left_px >= 0);
- assert(n_bottomleft_px >= 0);
-
- if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
- int val;
- if (need_left) {
- val = (n_top_px > 0) ? above_ref[0] : 129;
- } else {
- val = (n_left_px > 0) ? left_ref[0] : 127;
- }
- for (i = 0; i < txhpx; ++i) {
- memset(dst, val, txwpx);
- dst += dst_stride;
- }
- return;
- }
-
- // NEED_LEFT
- if (need_left) {
- int need_bottom = !!(extend_modes[mode] & NEED_BOTTOMLEFT);
- if (use_filter_intra) need_bottom = 0;
- if (is_dr_mode) need_bottom = p_angle > 180;
- const int num_left_pixels_needed = txhpx + (need_bottom ? txwpx : 0);
- i = 0;
- if (n_left_px > 0) {
- for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
- if (need_bottom && n_bottomleft_px > 0) {
- assert(i == txhpx);
- for (; i < txhpx + n_bottomleft_px; i++)
- left_col[i] = left_ref[i * ref_stride];
- }
- if (i < num_left_pixels_needed)
- memset(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
- } else {
- if (n_top_px > 0) {
- memset(left_col, above_ref[0], num_left_pixels_needed);
- } else {
- memset(left_col, 129, num_left_pixels_needed);
- }
- }
- }
-
- // NEED_ABOVE
- if (need_above) {
- int need_right = !!(extend_modes[mode] & NEED_ABOVERIGHT);
- if (use_filter_intra) need_right = 0;
- if (is_dr_mode) need_right = p_angle < 90;
- const int num_top_pixels_needed = txwpx + (need_right ? txhpx : 0);
- if (n_top_px > 0) {
- memcpy(above_row, above_ref, n_top_px);
- i = n_top_px;
- if (need_right && n_topright_px > 0) {
- assert(n_top_px == txwpx);
- memcpy(above_row + txwpx, above_ref + txwpx, n_topright_px);
- i += n_topright_px;
- }
- if (i < num_top_pixels_needed)
- memset(&above_row[i], above_row[i - 1], num_top_pixels_needed - i);
- } else {
- if (n_left_px > 0) {
- memset(above_row, left_ref[0], num_top_pixels_needed);
- } else {
- memset(above_row, 127, num_top_pixels_needed);
- }
- }
- }
-
- if (need_above_left) {
- if (n_top_px > 0 && n_left_px > 0) {
- above_row[-1] = above_ref[-1];
- } else if (n_top_px > 0) {
- above_row[-1] = above_ref[0];
- } else if (n_left_px > 0) {
- above_row[-1] = left_ref[0];
- } else {
- above_row[-1] = 128;
- }
- left_col[-1] = above_row[-1];
- }
-
- if (use_filter_intra) {
- av1_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
- filter_intra_mode);
- return;
- }
-
- if (is_dr_mode) {
- int upsample_above = 0;
- int upsample_left = 0;
- if (!disable_edge_filter) {
- const int need_right = p_angle < 90;
- const int need_bottom = p_angle > 180;
- const int filt_type = get_filt_type(xd, plane);
- if (p_angle != 90 && p_angle != 180) {
- const int ab_le = need_above_left ? 1 : 0;
- if (need_above && need_left && (txwpx + txhpx >= 24)) {
- filter_intra_edge_corner(above_row, left_col);
- }
- if (need_above && n_top_px > 0) {
- const int strength =
- intra_edge_filter_strength(txwpx, txhpx, p_angle - 90, filt_type);
- const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
- av1_filter_intra_edge(above_row - ab_le, n_px, strength);
- }
- if (need_left && n_left_px > 0) {
- const int strength = intra_edge_filter_strength(
- txhpx, txwpx, p_angle - 180, filt_type);
- const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
- av1_filter_intra_edge(left_col - ab_le, n_px, strength);
- }
- }
- upsample_above =
- av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90, filt_type);
- if (need_above && upsample_above) {
- const int n_px = txwpx + (need_right ? txhpx : 0);
- av1_upsample_intra_edge(above_row, n_px);
- }
- upsample_left =
- av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180, filt_type);
- if (need_left && upsample_left) {
- const int n_px = txhpx + (need_bottom ? txwpx : 0);
- av1_upsample_intra_edge(left_col, n_px);
- }
- }
- dr_predictor(dst, dst_stride, tx_size, above_row, left_col, upsample_above,
- upsample_left, p_angle);
- return;
- }
-
- // predict
- if (mode == DC_PRED) {
- dc_pred[n_left_px > 0][n_top_px > 0][tx_size](dst, dst_stride, above_row,
- left_col);
- } else {
- pred[mode][tx_size](dst, dst_stride, above_row, left_col);
- }
-}
-
-void av1_predict_intra_block(
- const AV1_COMMON *cm, const MACROBLOCKD *xd, int wpx, int hpx,
- TX_SIZE tx_size, PREDICTION_MODE mode, int angle_delta, int use_palette,
- FILTER_INTRA_MODE filter_intra_mode, const uint8_t *ref, int ref_stride,
- uint8_t *dst, int dst_stride, int col_off, int row_off, int plane) {
- const MB_MODE_INFO *const mbmi = xd->mi[0];
- const int txwpx = tx_size_wide[tx_size];
- const int txhpx = tx_size_high[tx_size];
- const int x = col_off << tx_size_wide_log2[0];
- const int y = row_off << tx_size_high_log2[0];
-
- if (use_palette) {
- int r, c;
- const uint8_t *const map = xd->plane[plane != 0].color_index_map +
- xd->color_index_map_offset[plane != 0];
- const uint16_t *const palette =
- mbmi->palette_mode_info.palette_colors + plane * PALETTE_MAX_SIZE;
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
- for (r = 0; r < txhpx; ++r) {
- for (c = 0; c < txwpx; ++c) {
- dst16[r * dst_stride + c] = palette[map[(r + y) * wpx + c + x]];
- }
- }
- } else {
- for (r = 0; r < txhpx; ++r) {
- for (c = 0; c < txwpx; ++c) {
- dst[r * dst_stride + c] =
- (uint8_t)palette[map[(r + y) * wpx + c + x]];
- }
- }
- }
- return;
- }
-
- BLOCK_SIZE bsize = mbmi->sb_type;
- const struct macroblockd_plane *const pd = &xd->plane[plane];
- const int txw = tx_size_wide_unit[tx_size];
- const int txh = tx_size_high_unit[tx_size];
- const int have_top = row_off || (pd->subsampling_y ? xd->chroma_up_available
- : xd->up_available);
- const int have_left =
- col_off ||
- (pd->subsampling_x ? xd->chroma_left_available : xd->left_available);
- const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2);
- const int mi_col = -xd->mb_to_left_edge >> (3 + MI_SIZE_LOG2);
- const int xr_chr_offset = 0;
- const int yd_chr_offset = 0;
-
- // Distance between the right edge of this prediction block to
- // the frame right edge
- const int xr = (xd->mb_to_right_edge >> (3 + pd->subsampling_x)) +
- (wpx - x - txwpx) - xr_chr_offset;
- // Distance between the bottom edge of this prediction block to
- // the frame bottom edge
- const int yd = (xd->mb_to_bottom_edge >> (3 + pd->subsampling_y)) +
- (hpx - y - txhpx) - yd_chr_offset;
- const int right_available =
- mi_col + ((col_off + txw) << pd->subsampling_x) < xd->tile.mi_col_end;
- const int bottom_available =
- (yd > 0) &&
- (mi_row + ((row_off + txh) << pd->subsampling_y) < xd->tile.mi_row_end);
-
- const PARTITION_TYPE partition = mbmi->partition;
-
- // force 4x4 chroma component block size.
- bsize = scale_chroma_bsize(bsize, pd->subsampling_x, pd->subsampling_y);
-
- const int have_top_right = has_top_right(
- cm, bsize, mi_row, mi_col, have_top, right_available, partition, tx_size,
- row_off, col_off, pd->subsampling_x, pd->subsampling_y);
- const int have_bottom_left = has_bottom_left(
- cm, bsize, mi_row, mi_col, bottom_available, have_left, partition,
- tx_size, row_off, col_off, pd->subsampling_x, pd->subsampling_y);
-
- const int disable_edge_filter = !cm->seq_params.enable_intra_edge_filter;
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- build_intra_predictors_high(
- xd, ref, ref_stride, dst, dst_stride, mode, angle_delta,
- filter_intra_mode, tx_size, disable_edge_filter,
- have_top ? AOMMIN(txwpx, xr + txwpx) : 0,
- have_top_right ? AOMMIN(txwpx, xr) : 0,
- have_left ? AOMMIN(txhpx, yd + txhpx) : 0,
- have_bottom_left ? AOMMIN(txhpx, yd) : 0, plane);
- return;
- }
-
- build_intra_predictors(xd, ref, ref_stride, dst, dst_stride, mode,
- angle_delta, filter_intra_mode, tx_size,
- disable_edge_filter,
- have_top ? AOMMIN(txwpx, xr + txwpx) : 0,
- have_top_right ? AOMMIN(txwpx, xr) : 0,
- have_left ? AOMMIN(txhpx, yd + txhpx) : 0,
- have_bottom_left ? AOMMIN(txhpx, yd) : 0, plane);
-}
-
-void av1_predict_intra_block_facade(const AV1_COMMON *cm, MACROBLOCKD *xd,
- int plane, int blk_col, int blk_row,
- TX_SIZE tx_size) {
- const MB_MODE_INFO *const mbmi = xd->mi[0];
- struct macroblockd_plane *const pd = &xd->plane[plane];
- const int dst_stride = pd->dst.stride;
- uint8_t *dst =
- &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
- const PREDICTION_MODE mode =
- (plane == AOM_PLANE_Y) ? mbmi->mode : get_uv_mode(mbmi->uv_mode);
- const int use_palette = mbmi->palette_mode_info.palette_size[plane != 0] > 0;
- const FILTER_INTRA_MODE filter_intra_mode =
- (plane == AOM_PLANE_Y && mbmi->filter_intra_mode_info.use_filter_intra)
- ? mbmi->filter_intra_mode_info.filter_intra_mode
- : FILTER_INTRA_MODES;
- const int angle_delta = mbmi->angle_delta[plane != AOM_PLANE_Y] * ANGLE_STEP;
-
- if (plane != AOM_PLANE_Y && mbmi->uv_mode == UV_CFL_PRED) {
-#if CONFIG_DEBUG
- assert(is_cfl_allowed(xd));
- const BLOCK_SIZE plane_bsize = get_plane_block_size(
- mbmi->sb_type, pd->subsampling_x, pd->subsampling_y);
- (void)plane_bsize;
- assert(plane_bsize < BLOCK_SIZES_ALL);
- if (!xd->lossless[mbmi->segment_id]) {
- assert(blk_col == 0);
- assert(blk_row == 0);
- assert(block_size_wide[plane_bsize] == tx_size_wide[tx_size]);
- assert(block_size_high[plane_bsize] == tx_size_high[tx_size]);
- }
-#endif
- CFL_CTX *const cfl = &xd->cfl;
- CFL_PRED_TYPE pred_plane = get_cfl_pred_type(plane);
- if (cfl->dc_pred_is_cached[pred_plane] == 0) {
- av1_predict_intra_block(cm, xd, pd->width, pd->height, tx_size, mode,
- angle_delta, use_palette, filter_intra_mode, dst,
- dst_stride, dst, dst_stride, blk_col, blk_row,
- plane);
- if (cfl->use_dc_pred_cache) {
- cfl_store_dc_pred(xd, dst, pred_plane, tx_size_wide[tx_size]);
- cfl->dc_pred_is_cached[pred_plane] = 1;
- }
- } else {
- cfl_load_dc_pred(xd, dst, dst_stride, tx_size, pred_plane);
- }
- cfl_predict_block(xd, dst, dst_stride, tx_size, plane);
- return;
- }
- av1_predict_intra_block(cm, xd, pd->width, pd->height, tx_size, mode,
- angle_delta, use_palette, filter_intra_mode, dst,
- dst_stride, dst, dst_stride, blk_col, blk_row, plane);
-}
-
-void av1_init_intra_predictors(void) {
- aom_once(init_intra_predictors_internal);
-}
diff --git a/third_party/aom/av1/common/reconintra.h b/third_party/aom/av1/common/reconintra.h
deleted file mode 100644
index 07853aba0..000000000
--- a/third_party/aom/av1/common/reconintra.h
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_COMMON_RECONINTRA_H_
-#define AOM_AV1_COMMON_RECONINTRA_H_
-
-#include <stdlib.h>
-
-#include "aom/aom_integer.h"
-#include "av1/common/blockd.h"
-#include "av1/common/onyxc_int.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void av1_init_intra_predictors(void);
-void av1_predict_intra_block_facade(const AV1_COMMON *cm, MACROBLOCKD *xd,
- int plane, int blk_col, int blk_row,
- TX_SIZE tx_size);
-void av1_predict_intra_block(const AV1_COMMON *cm, const MACROBLOCKD *xd,
- int bw, int bh, TX_SIZE tx_size,
- PREDICTION_MODE mode, int angle_delta,
- int use_palette,
- FILTER_INTRA_MODE filter_intra_mode,
- const uint8_t *ref, int ref_stride, uint8_t *dst,
- int dst_stride, int aoff, int loff, int plane);
-
-// Mapping of interintra to intra mode for use in the intra component
-static const PREDICTION_MODE interintra_to_intra_mode[INTERINTRA_MODES] = {
- DC_PRED, V_PRED, H_PRED, SMOOTH_PRED
-};
-
-// Mapping of intra mode to the interintra mode
-static const INTERINTRA_MODE intra_to_interintra_mode[INTRA_MODES] = {
- II_DC_PRED, II_V_PRED, II_H_PRED, II_V_PRED, II_SMOOTH_PRED, II_V_PRED,
- II_H_PRED, II_H_PRED, II_V_PRED, II_SMOOTH_PRED, II_SMOOTH_PRED
-};
-
-#define FILTER_INTRA_SCALE_BITS 4
-
-static INLINE int av1_is_directional_mode(PREDICTION_MODE mode) {
- return mode >= V_PRED && mode <= D67_PRED;
-}
-
-static INLINE int av1_use_angle_delta(BLOCK_SIZE bsize) {
- return bsize >= BLOCK_8X8;
-}
-
-static INLINE int av1_allow_intrabc(const AV1_COMMON *const cm) {
- return frame_is_intra_only(cm) && cm->allow_screen_content_tools &&
- cm->allow_intrabc;
-}
-
-static INLINE int av1_filter_intra_allowed_bsize(const AV1_COMMON *const cm,
- BLOCK_SIZE bs) {
- if (!cm->seq_params.enable_filter_intra || bs == BLOCK_INVALID) return 0;
-
- return block_size_wide[bs] <= 32 && block_size_high[bs] <= 32;
-}
-
-static INLINE int av1_filter_intra_allowed(const AV1_COMMON *const cm,
- const MB_MODE_INFO *mbmi) {
- return mbmi->mode == DC_PRED &&
- mbmi->palette_mode_info.palette_size[0] == 0 &&
- av1_filter_intra_allowed_bsize(cm, mbmi->sb_type);
-}
-
-extern const int8_t av1_filter_intra_taps[FILTER_INTRA_MODES][8][8];
-
-// Get the shift (up-scaled by 256) in X w.r.t a unit change in Y.
-// If angle > 0 && angle < 90, dx = -((int)(256 / t));
-// If angle > 90 && angle < 180, dx = (int)(256 / t);
-// If angle > 180 && angle < 270, dx = 1;
-static INLINE int av1_get_dx(int angle) {
- if (angle > 0 && angle < 90) {
- return dr_intra_derivative[angle];
- } else if (angle > 90 && angle < 180) {
- return dr_intra_derivative[180 - angle];
- } else {
- // In this case, we are not really going to use dx. We may return any value.
- return 1;
- }
-}
-
-// Get the shift (up-scaled by 256) in Y w.r.t a unit change in X.
-// If angle > 0 && angle < 90, dy = 1;
-// If angle > 90 && angle < 180, dy = (int)(256 * t);
-// If angle > 180 && angle < 270, dy = -((int)(256 * t));
-static INLINE int av1_get_dy(int angle) {
- if (angle > 90 && angle < 180) {
- return dr_intra_derivative[angle - 90];
- } else if (angle > 180 && angle < 270) {
- return dr_intra_derivative[270 - angle];
- } else {
- // In this case, we are not really going to use dy. We may return any value.
- return 1;
- }
-}
-
-static INLINE int av1_use_intra_edge_upsample(int bs0, int bs1, int delta,
- int type) {
- const int d = abs(delta);
- const int blk_wh = bs0 + bs1;
- if (d <= 0 || d >= 40) return 0;
- return type ? (blk_wh <= 8) : (blk_wh <= 16);
-}
-#ifdef __cplusplus
-} // extern "C"
-#endif
-#endif // AOM_AV1_COMMON_RECONINTRA_H_
diff --git a/third_party/aom/av1/common/resize.c b/third_party/aom/av1/common/resize.c
deleted file mode 100644
index d61a20aa2..000000000
--- a/third_party/aom/av1/common/resize.c
+++ /dev/null
@@ -1,1280 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <limits.h>
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "config/aom_config.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_ports/mem.h"
-#include "aom_scale/aom_scale.h"
-#include "av1/common/common.h"
-#include "av1/common/resize.h"
-
-#include "config/aom_scale_rtcd.h"
-
-// Filters for interpolation (0.5-band) - note this also filters integer pels.
-static const InterpKernel filteredinterp_filters500[(1 << RS_SUBPEL_BITS)] = {
- { -3, 0, 35, 64, 35, 0, -3, 0 }, { -3, 0, 34, 64, 36, 0, -3, 0 },
- { -3, -1, 34, 64, 36, 1, -3, 0 }, { -3, -1, 33, 64, 37, 1, -3, 0 },
- { -3, -1, 32, 64, 38, 1, -3, 0 }, { -3, -1, 31, 64, 39, 1, -3, 0 },
- { -3, -1, 31, 63, 39, 2, -3, 0 }, { -2, -2, 30, 63, 40, 2, -3, 0 },
- { -2, -2, 29, 63, 41, 2, -3, 0 }, { -2, -2, 29, 63, 41, 3, -4, 0 },
- { -2, -2, 28, 63, 42, 3, -4, 0 }, { -2, -2, 27, 63, 43, 3, -4, 0 },
- { -2, -3, 27, 63, 43, 4, -4, 0 }, { -2, -3, 26, 62, 44, 5, -4, 0 },
- { -2, -3, 25, 62, 45, 5, -4, 0 }, { -2, -3, 25, 62, 45, 5, -4, 0 },
- { -2, -3, 24, 62, 46, 5, -4, 0 }, { -2, -3, 23, 61, 47, 6, -4, 0 },
- { -2, -3, 23, 61, 47, 6, -4, 0 }, { -2, -3, 22, 61, 48, 7, -4, -1 },
- { -2, -3, 21, 60, 49, 7, -4, 0 }, { -1, -4, 20, 60, 49, 8, -4, 0 },
- { -1, -4, 20, 60, 50, 8, -4, -1 }, { -1, -4, 19, 59, 51, 9, -4, -1 },
- { -1, -4, 19, 59, 51, 9, -4, -1 }, { -1, -4, 18, 58, 52, 10, -4, -1 },
- { -1, -4, 17, 58, 52, 11, -4, -1 }, { -1, -4, 16, 58, 53, 11, -4, -1 },
- { -1, -4, 16, 57, 53, 12, -4, -1 }, { -1, -4, 15, 57, 54, 12, -4, -1 },
- { -1, -4, 15, 56, 54, 13, -4, -1 }, { -1, -4, 14, 56, 55, 13, -4, -1 },
- { -1, -4, 14, 55, 55, 14, -4, -1 }, { -1, -4, 13, 55, 56, 14, -4, -1 },
- { -1, -4, 13, 54, 56, 15, -4, -1 }, { -1, -4, 12, 54, 57, 15, -4, -1 },
- { -1, -4, 12, 53, 57, 16, -4, -1 }, { -1, -4, 11, 53, 58, 16, -4, -1 },
- { -1, -4, 11, 52, 58, 17, -4, -1 }, { -1, -4, 10, 52, 58, 18, -4, -1 },
- { -1, -4, 9, 51, 59, 19, -4, -1 }, { -1, -4, 9, 51, 59, 19, -4, -1 },
- { -1, -4, 8, 50, 60, 20, -4, -1 }, { 0, -4, 8, 49, 60, 20, -4, -1 },
- { 0, -4, 7, 49, 60, 21, -3, -2 }, { -1, -4, 7, 48, 61, 22, -3, -2 },
- { 0, -4, 6, 47, 61, 23, -3, -2 }, { 0, -4, 6, 47, 61, 23, -3, -2 },
- { 0, -4, 5, 46, 62, 24, -3, -2 }, { 0, -4, 5, 45, 62, 25, -3, -2 },
- { 0, -4, 5, 45, 62, 25, -3, -2 }, { 0, -4, 5, 44, 62, 26, -3, -2 },
- { 0, -4, 4, 43, 63, 27, -3, -2 }, { 0, -4, 3, 43, 63, 27, -2, -2 },
- { 0, -4, 3, 42, 63, 28, -2, -2 }, { 0, -4, 3, 41, 63, 29, -2, -2 },
- { 0, -3, 2, 41, 63, 29, -2, -2 }, { 0, -3, 2, 40, 63, 30, -2, -2 },
- { 0, -3, 2, 39, 63, 31, -1, -3 }, { 0, -3, 1, 39, 64, 31, -1, -3 },
- { 0, -3, 1, 38, 64, 32, -1, -3 }, { 0, -3, 1, 37, 64, 33, -1, -3 },
- { 0, -3, 1, 36, 64, 34, -1, -3 }, { 0, -3, 0, 36, 64, 34, 0, -3 },
-};
-
-// Filters for interpolation (0.625-band) - note this also filters integer pels.
-static const InterpKernel filteredinterp_filters625[(1 << RS_SUBPEL_BITS)] = {
- { -1, -8, 33, 80, 33, -8, -1, 0 }, { -1, -8, 31, 80, 34, -8, -1, 1 },
- { -1, -8, 30, 80, 35, -8, -1, 1 }, { -1, -8, 29, 80, 36, -7, -2, 1 },
- { -1, -8, 28, 80, 37, -7, -2, 1 }, { -1, -8, 27, 80, 38, -7, -2, 1 },
- { 0, -8, 26, 79, 39, -7, -2, 1 }, { 0, -8, 25, 79, 40, -7, -2, 1 },
- { 0, -8, 24, 79, 41, -7, -2, 1 }, { 0, -8, 23, 78, 42, -6, -2, 1 },
- { 0, -8, 22, 78, 43, -6, -2, 1 }, { 0, -8, 21, 78, 44, -6, -2, 1 },
- { 0, -8, 20, 78, 45, -5, -3, 1 }, { 0, -8, 19, 77, 47, -5, -3, 1 },
- { 0, -8, 18, 77, 48, -5, -3, 1 }, { 0, -8, 17, 77, 49, -5, -3, 1 },
- { 0, -8, 16, 76, 50, -4, -3, 1 }, { 0, -8, 15, 76, 51, -4, -3, 1 },
- { 0, -8, 15, 75, 52, -3, -4, 1 }, { 0, -7, 14, 74, 53, -3, -4, 1 },
- { 0, -7, 13, 74, 54, -3, -4, 1 }, { 0, -7, 12, 73, 55, -2, -4, 1 },
- { 0, -7, 11, 73, 56, -2, -4, 1 }, { 0, -7, 10, 72, 57, -1, -4, 1 },
- { 1, -7, 10, 71, 58, -1, -5, 1 }, { 0, -7, 9, 71, 59, 0, -5, 1 },
- { 1, -7, 8, 70, 60, 0, -5, 1 }, { 1, -7, 7, 69, 61, 1, -5, 1 },
- { 1, -6, 6, 68, 62, 1, -5, 1 }, { 0, -6, 6, 68, 62, 2, -5, 1 },
- { 1, -6, 5, 67, 63, 2, -5, 1 }, { 1, -6, 5, 66, 64, 3, -6, 1 },
- { 1, -6, 4, 65, 65, 4, -6, 1 }, { 1, -6, 3, 64, 66, 5, -6, 1 },
- { 1, -5, 2, 63, 67, 5, -6, 1 }, { 1, -5, 2, 62, 68, 6, -6, 0 },
- { 1, -5, 1, 62, 68, 6, -6, 1 }, { 1, -5, 1, 61, 69, 7, -7, 1 },
- { 1, -5, 0, 60, 70, 8, -7, 1 }, { 1, -5, 0, 59, 71, 9, -7, 0 },
- { 1, -5, -1, 58, 71, 10, -7, 1 }, { 1, -4, -1, 57, 72, 10, -7, 0 },
- { 1, -4, -2, 56, 73, 11, -7, 0 }, { 1, -4, -2, 55, 73, 12, -7, 0 },
- { 1, -4, -3, 54, 74, 13, -7, 0 }, { 1, -4, -3, 53, 74, 14, -7, 0 },
- { 1, -4, -3, 52, 75, 15, -8, 0 }, { 1, -3, -4, 51, 76, 15, -8, 0 },
- { 1, -3, -4, 50, 76, 16, -8, 0 }, { 1, -3, -5, 49, 77, 17, -8, 0 },
- { 1, -3, -5, 48, 77, 18, -8, 0 }, { 1, -3, -5, 47, 77, 19, -8, 0 },
- { 1, -3, -5, 45, 78, 20, -8, 0 }, { 1, -2, -6, 44, 78, 21, -8, 0 },
- { 1, -2, -6, 43, 78, 22, -8, 0 }, { 1, -2, -6, 42, 78, 23, -8, 0 },
- { 1, -2, -7, 41, 79, 24, -8, 0 }, { 1, -2, -7, 40, 79, 25, -8, 0 },
- { 1, -2, -7, 39, 79, 26, -8, 0 }, { 1, -2, -7, 38, 80, 27, -8, -1 },
- { 1, -2, -7, 37, 80, 28, -8, -1 }, { 1, -2, -7, 36, 80, 29, -8, -1 },
- { 1, -1, -8, 35, 80, 30, -8, -1 }, { 1, -1, -8, 34, 80, 31, -8, -1 },
-};
-
-// Filters for interpolation (0.75-band) - note this also filters integer pels.
-static const InterpKernel filteredinterp_filters750[(1 << RS_SUBPEL_BITS)] = {
- { 2, -11, 25, 96, 25, -11, 2, 0 }, { 2, -11, 24, 96, 26, -11, 2, 0 },
- { 2, -11, 22, 96, 28, -11, 2, 0 }, { 2, -10, 21, 96, 29, -12, 2, 0 },
- { 2, -10, 19, 96, 31, -12, 2, 0 }, { 2, -10, 18, 95, 32, -11, 2, 0 },
- { 2, -10, 17, 95, 34, -12, 2, 0 }, { 2, -9, 15, 95, 35, -12, 2, 0 },
- { 2, -9, 14, 94, 37, -12, 2, 0 }, { 2, -9, 13, 94, 38, -12, 2, 0 },
- { 2, -8, 12, 93, 40, -12, 1, 0 }, { 2, -8, 11, 93, 41, -12, 1, 0 },
- { 2, -8, 9, 92, 43, -12, 1, 1 }, { 2, -8, 8, 92, 44, -12, 1, 1 },
- { 2, -7, 7, 91, 46, -12, 1, 0 }, { 2, -7, 6, 90, 47, -12, 1, 1 },
- { 2, -7, 5, 90, 49, -12, 1, 0 }, { 2, -6, 4, 89, 50, -12, 1, 0 },
- { 2, -6, 3, 88, 52, -12, 0, 1 }, { 2, -6, 2, 87, 54, -12, 0, 1 },
- { 2, -5, 1, 86, 55, -12, 0, 1 }, { 2, -5, 0, 85, 57, -12, 0, 1 },
- { 2, -5, -1, 84, 58, -11, 0, 1 }, { 2, -5, -2, 83, 60, -11, 0, 1 },
- { 2, -4, -2, 82, 61, -11, -1, 1 }, { 1, -4, -3, 81, 63, -10, -1, 1 },
- { 2, -4, -4, 80, 64, -10, -1, 1 }, { 1, -4, -4, 79, 66, -10, -1, 1 },
- { 1, -3, -5, 77, 67, -9, -1, 1 }, { 1, -3, -6, 76, 69, -9, -1, 1 },
- { 1, -3, -6, 75, 70, -8, -2, 1 }, { 1, -2, -7, 74, 71, -8, -2, 1 },
- { 1, -2, -7, 72, 72, -7, -2, 1 }, { 1, -2, -8, 71, 74, -7, -2, 1 },
- { 1, -2, -8, 70, 75, -6, -3, 1 }, { 1, -1, -9, 69, 76, -6, -3, 1 },
- { 1, -1, -9, 67, 77, -5, -3, 1 }, { 1, -1, -10, 66, 79, -4, -4, 1 },
- { 1, -1, -10, 64, 80, -4, -4, 2 }, { 1, -1, -10, 63, 81, -3, -4, 1 },
- { 1, -1, -11, 61, 82, -2, -4, 2 }, { 1, 0, -11, 60, 83, -2, -5, 2 },
- { 1, 0, -11, 58, 84, -1, -5, 2 }, { 1, 0, -12, 57, 85, 0, -5, 2 },
- { 1, 0, -12, 55, 86, 1, -5, 2 }, { 1, 0, -12, 54, 87, 2, -6, 2 },
- { 1, 0, -12, 52, 88, 3, -6, 2 }, { 0, 1, -12, 50, 89, 4, -6, 2 },
- { 0, 1, -12, 49, 90, 5, -7, 2 }, { 1, 1, -12, 47, 90, 6, -7, 2 },
- { 0, 1, -12, 46, 91, 7, -7, 2 }, { 1, 1, -12, 44, 92, 8, -8, 2 },
- { 1, 1, -12, 43, 92, 9, -8, 2 }, { 0, 1, -12, 41, 93, 11, -8, 2 },
- { 0, 1, -12, 40, 93, 12, -8, 2 }, { 0, 2, -12, 38, 94, 13, -9, 2 },
- { 0, 2, -12, 37, 94, 14, -9, 2 }, { 0, 2, -12, 35, 95, 15, -9, 2 },
- { 0, 2, -12, 34, 95, 17, -10, 2 }, { 0, 2, -11, 32, 95, 18, -10, 2 },
- { 0, 2, -12, 31, 96, 19, -10, 2 }, { 0, 2, -12, 29, 96, 21, -10, 2 },
- { 0, 2, -11, 28, 96, 22, -11, 2 }, { 0, 2, -11, 26, 96, 24, -11, 2 },
-};
-
-// Filters for interpolation (0.875-band) - note this also filters integer pels.
-static const InterpKernel filteredinterp_filters875[(1 << RS_SUBPEL_BITS)] = {
- { 3, -8, 13, 112, 13, -8, 3, 0 }, { 2, -7, 12, 112, 15, -8, 3, -1 },
- { 3, -7, 10, 112, 17, -9, 3, -1 }, { 2, -6, 8, 112, 19, -9, 3, -1 },
- { 2, -6, 7, 112, 21, -10, 3, -1 }, { 2, -5, 6, 111, 22, -10, 3, -1 },
- { 2, -5, 4, 111, 24, -10, 3, -1 }, { 2, -4, 3, 110, 26, -11, 3, -1 },
- { 2, -4, 1, 110, 28, -11, 3, -1 }, { 2, -4, 0, 109, 30, -12, 4, -1 },
- { 1, -3, -1, 108, 32, -12, 4, -1 }, { 1, -3, -2, 108, 34, -13, 4, -1 },
- { 1, -2, -4, 107, 36, -13, 4, -1 }, { 1, -2, -5, 106, 38, -13, 4, -1 },
- { 1, -1, -6, 105, 40, -14, 4, -1 }, { 1, -1, -7, 104, 42, -14, 4, -1 },
- { 1, -1, -7, 103, 44, -15, 4, -1 }, { 1, 0, -8, 101, 46, -15, 4, -1 },
- { 1, 0, -9, 100, 48, -15, 4, -1 }, { 1, 0, -10, 99, 50, -15, 4, -1 },
- { 1, 1, -11, 97, 53, -16, 4, -1 }, { 0, 1, -11, 96, 55, -16, 4, -1 },
- { 0, 1, -12, 95, 57, -16, 4, -1 }, { 0, 2, -13, 93, 59, -16, 4, -1 },
- { 0, 2, -13, 91, 61, -16, 4, -1 }, { 0, 2, -14, 90, 63, -16, 4, -1 },
- { 0, 2, -14, 88, 65, -16, 4, -1 }, { 0, 2, -15, 86, 67, -16, 4, 0 },
- { 0, 3, -15, 84, 69, -17, 4, 0 }, { 0, 3, -16, 83, 71, -17, 4, 0 },
- { 0, 3, -16, 81, 73, -16, 3, 0 }, { 0, 3, -16, 79, 75, -16, 3, 0 },
- { 0, 3, -16, 77, 77, -16, 3, 0 }, { 0, 3, -16, 75, 79, -16, 3, 0 },
- { 0, 3, -16, 73, 81, -16, 3, 0 }, { 0, 4, -17, 71, 83, -16, 3, 0 },
- { 0, 4, -17, 69, 84, -15, 3, 0 }, { 0, 4, -16, 67, 86, -15, 2, 0 },
- { -1, 4, -16, 65, 88, -14, 2, 0 }, { -1, 4, -16, 63, 90, -14, 2, 0 },
- { -1, 4, -16, 61, 91, -13, 2, 0 }, { -1, 4, -16, 59, 93, -13, 2, 0 },
- { -1, 4, -16, 57, 95, -12, 1, 0 }, { -1, 4, -16, 55, 96, -11, 1, 0 },
- { -1, 4, -16, 53, 97, -11, 1, 1 }, { -1, 4, -15, 50, 99, -10, 0, 1 },
- { -1, 4, -15, 48, 100, -9, 0, 1 }, { -1, 4, -15, 46, 101, -8, 0, 1 },
- { -1, 4, -15, 44, 103, -7, -1, 1 }, { -1, 4, -14, 42, 104, -7, -1, 1 },
- { -1, 4, -14, 40, 105, -6, -1, 1 }, { -1, 4, -13, 38, 106, -5, -2, 1 },
- { -1, 4, -13, 36, 107, -4, -2, 1 }, { -1, 4, -13, 34, 108, -2, -3, 1 },
- { -1, 4, -12, 32, 108, -1, -3, 1 }, { -1, 4, -12, 30, 109, 0, -4, 2 },
- { -1, 3, -11, 28, 110, 1, -4, 2 }, { -1, 3, -11, 26, 110, 3, -4, 2 },
- { -1, 3, -10, 24, 111, 4, -5, 2 }, { -1, 3, -10, 22, 111, 6, -5, 2 },
- { -1, 3, -10, 21, 112, 7, -6, 2 }, { -1, 3, -9, 19, 112, 8, -6, 2 },
- { -1, 3, -9, 17, 112, 10, -7, 3 }, { -1, 3, -8, 15, 112, 12, -7, 2 },
-};
-
-const int16_t av1_resize_filter_normative[(
- 1 << RS_SUBPEL_BITS)][UPSCALE_NORMATIVE_TAPS] = {
-#if UPSCALE_NORMATIVE_TAPS == 8
- { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 0, -1, 128, 2, -1, 0, 0 },
- { 0, 1, -3, 127, 4, -2, 1, 0 }, { 0, 1, -4, 127, 6, -3, 1, 0 },
- { 0, 2, -6, 126, 8, -3, 1, 0 }, { 0, 2, -7, 125, 11, -4, 1, 0 },
- { -1, 2, -8, 125, 13, -5, 2, 0 }, { -1, 3, -9, 124, 15, -6, 2, 0 },
- { -1, 3, -10, 123, 18, -6, 2, -1 }, { -1, 3, -11, 122, 20, -7, 3, -1 },
- { -1, 4, -12, 121, 22, -8, 3, -1 }, { -1, 4, -13, 120, 25, -9, 3, -1 },
- { -1, 4, -14, 118, 28, -9, 3, -1 }, { -1, 4, -15, 117, 30, -10, 4, -1 },
- { -1, 5, -16, 116, 32, -11, 4, -1 }, { -1, 5, -16, 114, 35, -12, 4, -1 },
- { -1, 5, -17, 112, 38, -12, 4, -1 }, { -1, 5, -18, 111, 40, -13, 5, -1 },
- { -1, 5, -18, 109, 43, -14, 5, -1 }, { -1, 6, -19, 107, 45, -14, 5, -1 },
- { -1, 6, -19, 105, 48, -15, 5, -1 }, { -1, 6, -19, 103, 51, -16, 5, -1 },
- { -1, 6, -20, 101, 53, -16, 6, -1 }, { -1, 6, -20, 99, 56, -17, 6, -1 },
- { -1, 6, -20, 97, 58, -17, 6, -1 }, { -1, 6, -20, 95, 61, -18, 6, -1 },
- { -2, 7, -20, 93, 64, -18, 6, -2 }, { -2, 7, -20, 91, 66, -19, 6, -1 },
- { -2, 7, -20, 88, 69, -19, 6, -1 }, { -2, 7, -20, 86, 71, -19, 6, -1 },
- { -2, 7, -20, 84, 74, -20, 7, -2 }, { -2, 7, -20, 81, 76, -20, 7, -1 },
- { -2, 7, -20, 79, 79, -20, 7, -2 }, { -1, 7, -20, 76, 81, -20, 7, -2 },
- { -2, 7, -20, 74, 84, -20, 7, -2 }, { -1, 6, -19, 71, 86, -20, 7, -2 },
- { -1, 6, -19, 69, 88, -20, 7, -2 }, { -1, 6, -19, 66, 91, -20, 7, -2 },
- { -2, 6, -18, 64, 93, -20, 7, -2 }, { -1, 6, -18, 61, 95, -20, 6, -1 },
- { -1, 6, -17, 58, 97, -20, 6, -1 }, { -1, 6, -17, 56, 99, -20, 6, -1 },
- { -1, 6, -16, 53, 101, -20, 6, -1 }, { -1, 5, -16, 51, 103, -19, 6, -1 },
- { -1, 5, -15, 48, 105, -19, 6, -1 }, { -1, 5, -14, 45, 107, -19, 6, -1 },
- { -1, 5, -14, 43, 109, -18, 5, -1 }, { -1, 5, -13, 40, 111, -18, 5, -1 },
- { -1, 4, -12, 38, 112, -17, 5, -1 }, { -1, 4, -12, 35, 114, -16, 5, -1 },
- { -1, 4, -11, 32, 116, -16, 5, -1 }, { -1, 4, -10, 30, 117, -15, 4, -1 },
- { -1, 3, -9, 28, 118, -14, 4, -1 }, { -1, 3, -9, 25, 120, -13, 4, -1 },
- { -1, 3, -8, 22, 121, -12, 4, -1 }, { -1, 3, -7, 20, 122, -11, 3, -1 },
- { -1, 2, -6, 18, 123, -10, 3, -1 }, { 0, 2, -6, 15, 124, -9, 3, -1 },
- { 0, 2, -5, 13, 125, -8, 2, -1 }, { 0, 1, -4, 11, 125, -7, 2, 0 },
- { 0, 1, -3, 8, 126, -6, 2, 0 }, { 0, 1, -3, 6, 127, -4, 1, 0 },
- { 0, 1, -2, 4, 127, -3, 1, 0 }, { 0, 0, -1, 2, 128, -1, 0, 0 },
-#else
-#error "Invalid value of UPSCALE_NORMATIVE_TAPS"
-#endif // UPSCALE_NORMATIVE_TAPS == 8
-};
-
-// Filters for interpolation (full-band) - no filtering for integer pixels
-#define filteredinterp_filters1000 av1_resize_filter_normative
-
-// Filters for factor of 2 downsampling.
-static const int16_t av1_down2_symeven_half_filter[] = { 56, 12, -3, -1 };
-static const int16_t av1_down2_symodd_half_filter[] = { 64, 35, 0, -3 };
-
-static const InterpKernel *choose_interp_filter(int in_length, int out_length) {
- int out_length16 = out_length * 16;
- if (out_length16 >= in_length * 16)
- return filteredinterp_filters1000;
- else if (out_length16 >= in_length * 13)
- return filteredinterp_filters875;
- else if (out_length16 >= in_length * 11)
- return filteredinterp_filters750;
- else if (out_length16 >= in_length * 9)
- return filteredinterp_filters625;
- else
- return filteredinterp_filters500;
-}
-
-static void interpolate_core(const uint8_t *const input, int in_length,
- uint8_t *output, int out_length,
- const int16_t *interp_filters, int interp_taps) {
- const int32_t delta =
- (((uint32_t)in_length << RS_SCALE_SUBPEL_BITS) + out_length / 2) /
- out_length;
- const int32_t offset =
- in_length > out_length
- ? (((int32_t)(in_length - out_length) << (RS_SCALE_SUBPEL_BITS - 1)) +
- out_length / 2) /
- out_length
- : -(((int32_t)(out_length - in_length)
- << (RS_SCALE_SUBPEL_BITS - 1)) +
- out_length / 2) /
- out_length;
- uint8_t *optr = output;
- int x, x1, x2, sum, k, int_pel, sub_pel;
- int32_t y;
-
- x = 0;
- y = offset + RS_SCALE_EXTRA_OFF;
- while ((y >> RS_SCALE_SUBPEL_BITS) < (interp_taps / 2 - 1)) {
- x++;
- y += delta;
- }
- x1 = x;
- x = out_length - 1;
- y = delta * x + offset + RS_SCALE_EXTRA_OFF;
- while ((y >> RS_SCALE_SUBPEL_BITS) + (int32_t)(interp_taps / 2) >=
- in_length) {
- x--;
- y -= delta;
- }
- x2 = x;
- if (x1 > x2) {
- for (x = 0, y = offset + RS_SCALE_EXTRA_OFF; x < out_length;
- ++x, y += delta) {
- int_pel = y >> RS_SCALE_SUBPEL_BITS;
- sub_pel = (y >> RS_SCALE_EXTRA_BITS) & RS_SUBPEL_MASK;
- const int16_t *filter = &interp_filters[sub_pel * interp_taps];
- sum = 0;
- for (k = 0; k < interp_taps; ++k) {
- const int pk = int_pel - interp_taps / 2 + 1 + k;
- sum += filter[k] * input[AOMMAX(AOMMIN(pk, in_length - 1), 0)];
- }
- *optr++ = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
- }
- } else {
- // Initial part.
- for (x = 0, y = offset + RS_SCALE_EXTRA_OFF; x < x1; ++x, y += delta) {
- int_pel = y >> RS_SCALE_SUBPEL_BITS;
- sub_pel = (y >> RS_SCALE_EXTRA_BITS) & RS_SUBPEL_MASK;
- const int16_t *filter = &interp_filters[sub_pel * interp_taps];
- sum = 0;
- for (k = 0; k < interp_taps; ++k)
- sum += filter[k] * input[AOMMAX(int_pel - interp_taps / 2 + 1 + k, 0)];
- *optr++ = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
- }
- // Middle part.
- for (; x <= x2; ++x, y += delta) {
- int_pel = y >> RS_SCALE_SUBPEL_BITS;
- sub_pel = (y >> RS_SCALE_EXTRA_BITS) & RS_SUBPEL_MASK;
- const int16_t *filter = &interp_filters[sub_pel * interp_taps];
- sum = 0;
- for (k = 0; k < interp_taps; ++k)
- sum += filter[k] * input[int_pel - interp_taps / 2 + 1 + k];
- *optr++ = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
- }
- // End part.
- for (; x < out_length; ++x, y += delta) {
- int_pel = y >> RS_SCALE_SUBPEL_BITS;
- sub_pel = (y >> RS_SCALE_EXTRA_BITS) & RS_SUBPEL_MASK;
- const int16_t *filter = &interp_filters[sub_pel * interp_taps];
- sum = 0;
- for (k = 0; k < interp_taps; ++k)
- sum += filter[k] *
- input[AOMMIN(int_pel - interp_taps / 2 + 1 + k, in_length - 1)];
- *optr++ = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
- }
- }
-}
-
-static void interpolate(const uint8_t *const input, int in_length,
- uint8_t *output, int out_length) {
- const InterpKernel *interp_filters =
- choose_interp_filter(in_length, out_length);
-
- interpolate_core(input, in_length, output, out_length, &interp_filters[0][0],
- SUBPEL_TAPS);
-}
-
-int32_t av1_get_upscale_convolve_step(int in_length, int out_length) {
- return ((in_length << RS_SCALE_SUBPEL_BITS) + out_length / 2) / out_length;
-}
-
-static int32_t get_upscale_convolve_x0(int in_length, int out_length,
- int32_t x_step_qn) {
- const int err = out_length * x_step_qn - (in_length << RS_SCALE_SUBPEL_BITS);
- const int32_t x0 =
- (-((out_length - in_length) << (RS_SCALE_SUBPEL_BITS - 1)) +
- out_length / 2) /
- out_length +
- RS_SCALE_EXTRA_OFF - err / 2;
- return (int32_t)((uint32_t)x0 & RS_SCALE_SUBPEL_MASK);
-}
-
-#ifndef __clang_analyzer__
-static void down2_symeven(const uint8_t *const input, int length,
- uint8_t *output) {
- // Actual filter len = 2 * filter_len_half.
- const int16_t *filter = av1_down2_symeven_half_filter;
- const int filter_len_half = sizeof(av1_down2_symeven_half_filter) / 2;
- int i, j;
- uint8_t *optr = output;
- int l1 = filter_len_half;
- int l2 = (length - filter_len_half);
- l1 += (l1 & 1);
- l2 += (l2 & 1);
- if (l1 > l2) {
- // Short input length.
- for (i = 0; i < length; i += 2) {
- int sum = (1 << (FILTER_BITS - 1));
- for (j = 0; j < filter_len_half; ++j) {
- sum +=
- (input[AOMMAX(i - j, 0)] + input[AOMMIN(i + 1 + j, length - 1)]) *
- filter[j];
- }
- sum >>= FILTER_BITS;
- *optr++ = clip_pixel(sum);
- }
- } else {
- // Initial part.
- for (i = 0; i < l1; i += 2) {
- int sum = (1 << (FILTER_BITS - 1));
- for (j = 0; j < filter_len_half; ++j) {
- sum += (input[AOMMAX(i - j, 0)] + input[i + 1 + j]) * filter[j];
- }
- sum >>= FILTER_BITS;
- *optr++ = clip_pixel(sum);
- }
- // Middle part.
- for (; i < l2; i += 2) {
- int sum = (1 << (FILTER_BITS - 1));
- for (j = 0; j < filter_len_half; ++j) {
- sum += (input[i - j] + input[i + 1 + j]) * filter[j];
- }
- sum >>= FILTER_BITS;
- *optr++ = clip_pixel(sum);
- }
- // End part.
- for (; i < length; i += 2) {
- int sum = (1 << (FILTER_BITS - 1));
- for (j = 0; j < filter_len_half; ++j) {
- sum +=
- (input[i - j] + input[AOMMIN(i + 1 + j, length - 1)]) * filter[j];
- }
- sum >>= FILTER_BITS;
- *optr++ = clip_pixel(sum);
- }
- }
-}
-#endif
-
-static void down2_symodd(const uint8_t *const input, int length,
- uint8_t *output) {
- // Actual filter len = 2 * filter_len_half - 1.
- const int16_t *filter = av1_down2_symodd_half_filter;
- const int filter_len_half = sizeof(av1_down2_symodd_half_filter) / 2;
- int i, j;
- uint8_t *optr = output;
- int l1 = filter_len_half - 1;
- int l2 = (length - filter_len_half + 1);
- l1 += (l1 & 1);
- l2 += (l2 & 1);
- if (l1 > l2) {
- // Short input length.
- for (i = 0; i < length; i += 2) {
- int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0];
- for (j = 1; j < filter_len_half; ++j) {
- sum += (input[(i - j < 0 ? 0 : i - j)] +
- input[(i + j >= length ? length - 1 : i + j)]) *
- filter[j];
- }
- sum >>= FILTER_BITS;
- *optr++ = clip_pixel(sum);
- }
- } else {
- // Initial part.
- for (i = 0; i < l1; i += 2) {
- int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0];
- for (j = 1; j < filter_len_half; ++j) {
- sum += (input[(i - j < 0 ? 0 : i - j)] + input[i + j]) * filter[j];
- }
- sum >>= FILTER_BITS;
- *optr++ = clip_pixel(sum);
- }
- // Middle part.
- for (; i < l2; i += 2) {
- int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0];
- for (j = 1; j < filter_len_half; ++j) {
- sum += (input[i - j] + input[i + j]) * filter[j];
- }
- sum >>= FILTER_BITS;
- *optr++ = clip_pixel(sum);
- }
- // End part.
- for (; i < length; i += 2) {
- int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0];
- for (j = 1; j < filter_len_half; ++j) {
- sum += (input[i - j] + input[(i + j >= length ? length - 1 : i + j)]) *
- filter[j];
- }
- sum >>= FILTER_BITS;
- *optr++ = clip_pixel(sum);
- }
- }
-}
-
-static int get_down2_length(int length, int steps) {
- for (int s = 0; s < steps; ++s) length = (length + 1) >> 1;
- return length;
-}
-
-static int get_down2_steps(int in_length, int out_length) {
- int steps = 0;
- int proj_in_length;
- while ((proj_in_length = get_down2_length(in_length, 1)) >= out_length) {
- ++steps;
- in_length = proj_in_length;
- if (in_length == 1) {
- // Special case: we break because any further calls to get_down2_length()
- // with be with length == 1, which return 1, resulting in an infinite
- // loop.
- break;
- }
- }
- return steps;
-}
-
-static void resize_multistep(const uint8_t *const input, int length,
- uint8_t *output, int olength, uint8_t *otmp) {
- if (length == olength) {
- memcpy(output, input, sizeof(output[0]) * length);
- return;
- }
- const int steps = get_down2_steps(length, olength);
-
- if (steps > 0) {
- uint8_t *out = NULL;
- int filteredlength = length;
-
- assert(otmp != NULL);
- uint8_t *otmp2 = otmp + get_down2_length(length, 1);
- for (int s = 0; s < steps; ++s) {
- const int proj_filteredlength = get_down2_length(filteredlength, 1);
- const uint8_t *const in = (s == 0 ? input : out);
- if (s == steps - 1 && proj_filteredlength == olength)
- out = output;
- else
- out = (s & 1 ? otmp2 : otmp);
- if (filteredlength & 1)
- down2_symodd(in, filteredlength, out);
- else
- down2_symeven(in, filteredlength, out);
- filteredlength = proj_filteredlength;
- }
- if (filteredlength != olength) {
- interpolate(out, filteredlength, output, olength);
- }
- } else {
- interpolate(input, length, output, olength);
- }
-}
-
-static void fill_col_to_arr(uint8_t *img, int stride, int len, uint8_t *arr) {
- int i;
- uint8_t *iptr = img;
- uint8_t *aptr = arr;
- for (i = 0; i < len; ++i, iptr += stride) {
- *aptr++ = *iptr;
- }
-}
-
-static void fill_arr_to_col(uint8_t *img, int stride, int len, uint8_t *arr) {
- int i;
- uint8_t *iptr = img;
- uint8_t *aptr = arr;
- for (i = 0; i < len; ++i, iptr += stride) {
- *iptr = *aptr++;
- }
-}
-
-static void resize_plane(const uint8_t *const input, int height, int width,
- int in_stride, uint8_t *output, int height2,
- int width2, int out_stride) {
- int i;
- uint8_t *intbuf = (uint8_t *)aom_malloc(sizeof(uint8_t) * width2 * height);
- uint8_t *tmpbuf =
- (uint8_t *)aom_malloc(sizeof(uint8_t) * AOMMAX(width, height));
- uint8_t *arrbuf = (uint8_t *)aom_malloc(sizeof(uint8_t) * height);
- uint8_t *arrbuf2 = (uint8_t *)aom_malloc(sizeof(uint8_t) * height2);
- if (intbuf == NULL || tmpbuf == NULL || arrbuf == NULL || arrbuf2 == NULL)
- goto Error;
- assert(width > 0);
- assert(height > 0);
- assert(width2 > 0);
- assert(height2 > 0);
- for (i = 0; i < height; ++i)
- resize_multistep(input + in_stride * i, width, intbuf + width2 * i, width2,
- tmpbuf);
- for (i = 0; i < width2; ++i) {
- fill_col_to_arr(intbuf + i, width2, height, arrbuf);
- resize_multistep(arrbuf, height, arrbuf2, height2, tmpbuf);
- fill_arr_to_col(output + i, out_stride, height2, arrbuf2);
- }
-
-Error:
- aom_free(intbuf);
- aom_free(tmpbuf);
- aom_free(arrbuf);
- aom_free(arrbuf2);
-}
-
-static void upscale_normative_rect(const uint8_t *const input, int height,
- int width, int in_stride, uint8_t *output,
- int height2, int width2, int out_stride,
- int x_step_qn, int x0_qn, int pad_left,
- int pad_right) {
- assert(width > 0);
- assert(height > 0);
- assert(width2 > 0);
- assert(height2 > 0);
- assert(height2 == height);
-
- // Extend the left/right pixels of the tile column if needed
- // (either because we can't sample from other tiles, or because we're at
- // a frame edge).
- // Save the overwritten pixels into tmp_left and tmp_right.
- // Note: Because we pass input-1 to av1_convolve_horiz_rs, we need one extra
- // column of border pixels compared to what we'd naively think.
- const int border_cols = UPSCALE_NORMATIVE_TAPS / 2 + 1;
- uint8_t *tmp_left =
- NULL; // Silence spurious "may be used uninitialized" warnings
- uint8_t *tmp_right = NULL;
- uint8_t *const in_tl = (uint8_t *)(input - border_cols); // Cast off 'const'
- uint8_t *const in_tr = (uint8_t *)(input + width);
- if (pad_left) {
- tmp_left = (uint8_t *)aom_malloc(sizeof(*tmp_left) * border_cols * height);
- for (int i = 0; i < height; i++) {
- memcpy(tmp_left + i * border_cols, in_tl + i * in_stride, border_cols);
- memset(in_tl + i * in_stride, input[i * in_stride], border_cols);
- }
- }
- if (pad_right) {
- tmp_right =
- (uint8_t *)aom_malloc(sizeof(*tmp_right) * border_cols * height);
- for (int i = 0; i < height; i++) {
- memcpy(tmp_right + i * border_cols, in_tr + i * in_stride, border_cols);
- memset(in_tr + i * in_stride, input[i * in_stride + width - 1],
- border_cols);
- }
- }
-
- av1_convolve_horiz_rs(input - 1, in_stride, output, out_stride, width2,
- height2, &av1_resize_filter_normative[0][0], x0_qn,
- x_step_qn);
-
- // Restore the left/right border pixels
- if (pad_left) {
- for (int i = 0; i < height; i++) {
- memcpy(in_tl + i * in_stride, tmp_left + i * border_cols, border_cols);
- }
- aom_free(tmp_left);
- }
- if (pad_right) {
- for (int i = 0; i < height; i++) {
- memcpy(in_tr + i * in_stride, tmp_right + i * border_cols, border_cols);
- }
- aom_free(tmp_right);
- }
-}
-
-static void highbd_interpolate_core(const uint16_t *const input, int in_length,
- uint16_t *output, int out_length, int bd,
- const int16_t *interp_filters,
- int interp_taps) {
- const int32_t delta =
- (((uint32_t)in_length << RS_SCALE_SUBPEL_BITS) + out_length / 2) /
- out_length;
- const int32_t offset =
- in_length > out_length
- ? (((int32_t)(in_length - out_length) << (RS_SCALE_SUBPEL_BITS - 1)) +
- out_length / 2) /
- out_length
- : -(((int32_t)(out_length - in_length)
- << (RS_SCALE_SUBPEL_BITS - 1)) +
- out_length / 2) /
- out_length;
- uint16_t *optr = output;
- int x, x1, x2, sum, k, int_pel, sub_pel;
- int32_t y;
-
- x = 0;
- y = offset + RS_SCALE_EXTRA_OFF;
- while ((y >> RS_SCALE_SUBPEL_BITS) < (interp_taps / 2 - 1)) {
- x++;
- y += delta;
- }
- x1 = x;
- x = out_length - 1;
- y = delta * x + offset + RS_SCALE_EXTRA_OFF;
- while ((y >> RS_SCALE_SUBPEL_BITS) + (int32_t)(interp_taps / 2) >=
- in_length) {
- x--;
- y -= delta;
- }
- x2 = x;
- if (x1 > x2) {
- for (x = 0, y = offset + RS_SCALE_EXTRA_OFF; x < out_length;
- ++x, y += delta) {
- int_pel = y >> RS_SCALE_SUBPEL_BITS;
- sub_pel = (y >> RS_SCALE_EXTRA_BITS) & RS_SUBPEL_MASK;
- const int16_t *filter = &interp_filters[sub_pel * interp_taps];
- sum = 0;
- for (k = 0; k < interp_taps; ++k) {
- const int pk = int_pel - interp_taps / 2 + 1 + k;
- sum += filter[k] * input[AOMMAX(AOMMIN(pk, in_length - 1), 0)];
- }
- *optr++ = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
- }
- } else {
- // Initial part.
- for (x = 0, y = offset + RS_SCALE_EXTRA_OFF; x < x1; ++x, y += delta) {
- int_pel = y >> RS_SCALE_SUBPEL_BITS;
- sub_pel = (y >> RS_SCALE_EXTRA_BITS) & RS_SUBPEL_MASK;
- const int16_t *filter = &interp_filters[sub_pel * interp_taps];
- sum = 0;
- for (k = 0; k < interp_taps; ++k)
- sum += filter[k] * input[AOMMAX(int_pel - interp_taps / 2 + 1 + k, 0)];
- *optr++ = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
- }
- // Middle part.
- for (; x <= x2; ++x, y += delta) {
- int_pel = y >> RS_SCALE_SUBPEL_BITS;
- sub_pel = (y >> RS_SCALE_EXTRA_BITS) & RS_SUBPEL_MASK;
- const int16_t *filter = &interp_filters[sub_pel * interp_taps];
- sum = 0;
- for (k = 0; k < interp_taps; ++k)
- sum += filter[k] * input[int_pel - interp_taps / 2 + 1 + k];
- *optr++ = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
- }
- // End part.
- for (; x < out_length; ++x, y += delta) {
- int_pel = y >> RS_SCALE_SUBPEL_BITS;
- sub_pel = (y >> RS_SCALE_EXTRA_BITS) & RS_SUBPEL_MASK;
- const int16_t *filter = &interp_filters[sub_pel * interp_taps];
- sum = 0;
- for (k = 0; k < interp_taps; ++k)
- sum += filter[k] *
- input[AOMMIN(int_pel - interp_taps / 2 + 1 + k, in_length - 1)];
- *optr++ = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
- }
- }
-}
-
-static void highbd_interpolate(const uint16_t *const input, int in_length,
- uint16_t *output, int out_length, int bd) {
- const InterpKernel *interp_filters =
- choose_interp_filter(in_length, out_length);
-
- highbd_interpolate_core(input, in_length, output, out_length, bd,
- &interp_filters[0][0], SUBPEL_TAPS);
-}
-
-#ifndef __clang_analyzer__
-static void highbd_down2_symeven(const uint16_t *const input, int length,
- uint16_t *output, int bd) {
- // Actual filter len = 2 * filter_len_half.
- static const int16_t *filter = av1_down2_symeven_half_filter;
- const int filter_len_half = sizeof(av1_down2_symeven_half_filter) / 2;
- int i, j;
- uint16_t *optr = output;
- int l1 = filter_len_half;
- int l2 = (length - filter_len_half);
- l1 += (l1 & 1);
- l2 += (l2 & 1);
- if (l1 > l2) {
- // Short input length.
- for (i = 0; i < length; i += 2) {
- int sum = (1 << (FILTER_BITS - 1));
- for (j = 0; j < filter_len_half; ++j) {
- sum +=
- (input[AOMMAX(0, i - j)] + input[AOMMIN(i + 1 + j, length - 1)]) *
- filter[j];
- }
- sum >>= FILTER_BITS;
- *optr++ = clip_pixel_highbd(sum, bd);
- }
- } else {
- // Initial part.
- for (i = 0; i < l1; i += 2) {
- int sum = (1 << (FILTER_BITS - 1));
- for (j = 0; j < filter_len_half; ++j) {
- sum += (input[AOMMAX(0, i - j)] + input[i + 1 + j]) * filter[j];
- }
- sum >>= FILTER_BITS;
- *optr++ = clip_pixel_highbd(sum, bd);
- }
- // Middle part.
- for (; i < l2; i += 2) {
- int sum = (1 << (FILTER_BITS - 1));
- for (j = 0; j < filter_len_half; ++j) {
- sum += (input[i - j] + input[i + 1 + j]) * filter[j];
- }
- sum >>= FILTER_BITS;
- *optr++ = clip_pixel_highbd(sum, bd);
- }
- // End part.
- for (; i < length; i += 2) {
- int sum = (1 << (FILTER_BITS - 1));
- for (j = 0; j < filter_len_half; ++j) {
- sum +=
- (input[i - j] + input[AOMMIN(i + 1 + j, length - 1)]) * filter[j];
- }
- sum >>= FILTER_BITS;
- *optr++ = clip_pixel_highbd(sum, bd);
- }
- }
-}
-
-static void highbd_down2_symodd(const uint16_t *const input, int length,
- uint16_t *output, int bd) {
- // Actual filter len = 2 * filter_len_half - 1.
- static const int16_t *filter = av1_down2_symodd_half_filter;
- const int filter_len_half = sizeof(av1_down2_symodd_half_filter) / 2;
- int i, j;
- uint16_t *optr = output;
- int l1 = filter_len_half - 1;
- int l2 = (length - filter_len_half + 1);
- l1 += (l1 & 1);
- l2 += (l2 & 1);
- if (l1 > l2) {
- // Short input length.
- for (i = 0; i < length; i += 2) {
- int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0];
- for (j = 1; j < filter_len_half; ++j) {
- sum += (input[AOMMAX(i - j, 0)] + input[AOMMIN(i + j, length - 1)]) *
- filter[j];
- }
- sum >>= FILTER_BITS;
- *optr++ = clip_pixel_highbd(sum, bd);
- }
- } else {
- // Initial part.
- for (i = 0; i < l1; i += 2) {
- int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0];
- for (j = 1; j < filter_len_half; ++j) {
- sum += (input[AOMMAX(i - j, 0)] + input[i + j]) * filter[j];
- }
- sum >>= FILTER_BITS;
- *optr++ = clip_pixel_highbd(sum, bd);
- }
- // Middle part.
- for (; i < l2; i += 2) {
- int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0];
- for (j = 1; j < filter_len_half; ++j) {
- sum += (input[i - j] + input[i + j]) * filter[j];
- }
- sum >>= FILTER_BITS;
- *optr++ = clip_pixel_highbd(sum, bd);
- }
- // End part.
- for (; i < length; i += 2) {
- int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0];
- for (j = 1; j < filter_len_half; ++j) {
- sum += (input[i - j] + input[AOMMIN(i + j, length - 1)]) * filter[j];
- }
- sum >>= FILTER_BITS;
- *optr++ = clip_pixel_highbd(sum, bd);
- }
- }
-}
-#endif
-
-static void highbd_resize_multistep(const uint16_t *const input, int length,
- uint16_t *output, int olength,
- uint16_t *otmp, int bd) {
- if (length == olength) {
- memcpy(output, input, sizeof(output[0]) * length);
- return;
- }
- const int steps = get_down2_steps(length, olength);
-
- if (steps > 0) {
- uint16_t *out = NULL;
- int filteredlength = length;
-
- assert(otmp != NULL);
- uint16_t *otmp2 = otmp + get_down2_length(length, 1);
- for (int s = 0; s < steps; ++s) {
- const int proj_filteredlength = get_down2_length(filteredlength, 1);
- const uint16_t *const in = (s == 0 ? input : out);
- if (s == steps - 1 && proj_filteredlength == olength)
- out = output;
- else
- out = (s & 1 ? otmp2 : otmp);
- if (filteredlength & 1)
- highbd_down2_symodd(in, filteredlength, out, bd);
- else
- highbd_down2_symeven(in, filteredlength, out, bd);
- filteredlength = proj_filteredlength;
- }
- if (filteredlength != olength) {
- highbd_interpolate(out, filteredlength, output, olength, bd);
- }
- } else {
- highbd_interpolate(input, length, output, olength, bd);
- }
-}
-
-static void highbd_fill_col_to_arr(uint16_t *img, int stride, int len,
- uint16_t *arr) {
- int i;
- uint16_t *iptr = img;
- uint16_t *aptr = arr;
- for (i = 0; i < len; ++i, iptr += stride) {
- *aptr++ = *iptr;
- }
-}
-
-static void highbd_fill_arr_to_col(uint16_t *img, int stride, int len,
- uint16_t *arr) {
- int i;
- uint16_t *iptr = img;
- uint16_t *aptr = arr;
- for (i = 0; i < len; ++i, iptr += stride) {
- *iptr = *aptr++;
- }
-}
-
-static void highbd_resize_plane(const uint8_t *const input, int height,
- int width, int in_stride, uint8_t *output,
- int height2, int width2, int out_stride,
- int bd) {
- int i;
- uint16_t *intbuf = (uint16_t *)aom_malloc(sizeof(uint16_t) * width2 * height);
- uint16_t *tmpbuf =
- (uint16_t *)aom_malloc(sizeof(uint16_t) * AOMMAX(width, height));
- uint16_t *arrbuf = (uint16_t *)aom_malloc(sizeof(uint16_t) * height);
- uint16_t *arrbuf2 = (uint16_t *)aom_malloc(sizeof(uint16_t) * height2);
- if (intbuf == NULL || tmpbuf == NULL || arrbuf == NULL || arrbuf2 == NULL)
- goto Error;
- for (i = 0; i < height; ++i) {
- highbd_resize_multistep(CONVERT_TO_SHORTPTR(input + in_stride * i), width,
- intbuf + width2 * i, width2, tmpbuf, bd);
- }
- for (i = 0; i < width2; ++i) {
- highbd_fill_col_to_arr(intbuf + i, width2, height, arrbuf);
- highbd_resize_multistep(arrbuf, height, arrbuf2, height2, tmpbuf, bd);
- highbd_fill_arr_to_col(CONVERT_TO_SHORTPTR(output + i), out_stride, height2,
- arrbuf2);
- }
-
-Error:
- aom_free(intbuf);
- aom_free(tmpbuf);
- aom_free(arrbuf);
- aom_free(arrbuf2);
-}
-
-static void highbd_upscale_normative_rect(const uint8_t *const input,
- int height, int width, int in_stride,
- uint8_t *output, int height2,
- int width2, int out_stride,
- int x_step_qn, int x0_qn,
- int pad_left, int pad_right, int bd) {
- assert(width > 0);
- assert(height > 0);
- assert(width2 > 0);
- assert(height2 > 0);
- assert(height2 == height);
-
- // Extend the left/right pixels of the tile column if needed
- // (either because we can't sample from other tiles, or because we're at
- // a frame edge).
- // Save the overwritten pixels into tmp_left and tmp_right.
- // Note: Because we pass input-1 to av1_convolve_horiz_rs, we need one extra
- // column of border pixels compared to what we'd naively think.
- const int border_cols = UPSCALE_NORMATIVE_TAPS / 2 + 1;
- const int border_size = border_cols * sizeof(uint16_t);
- uint16_t *tmp_left =
- NULL; // Silence spurious "may be used uninitialized" warnings
- uint16_t *tmp_right = NULL;
- uint16_t *const input16 = CONVERT_TO_SHORTPTR(input);
- uint16_t *const in_tl = input16 - border_cols;
- uint16_t *const in_tr = input16 + width;
- if (pad_left) {
- tmp_left = (uint16_t *)aom_malloc(sizeof(*tmp_left) * border_cols * height);
- for (int i = 0; i < height; i++) {
- memcpy(tmp_left + i * border_cols, in_tl + i * in_stride, border_size);
- aom_memset16(in_tl + i * in_stride, input16[i * in_stride], border_cols);
- }
- }
- if (pad_right) {
- tmp_right =
- (uint16_t *)aom_malloc(sizeof(*tmp_right) * border_cols * height);
- for (int i = 0; i < height; i++) {
- memcpy(tmp_right + i * border_cols, in_tr + i * in_stride, border_size);
- aom_memset16(in_tr + i * in_stride, input16[i * in_stride + width - 1],
- border_cols);
- }
- }
-
- av1_highbd_convolve_horiz_rs(CONVERT_TO_SHORTPTR(input - 1), in_stride,
- CONVERT_TO_SHORTPTR(output), out_stride, width2,
- height2, &av1_resize_filter_normative[0][0],
- x0_qn, x_step_qn, bd);
-
- // Restore the left/right border pixels
- if (pad_left) {
- for (int i = 0; i < height; i++) {
- memcpy(in_tl + i * in_stride, tmp_left + i * border_cols, border_size);
- }
- aom_free(tmp_left);
- }
- if (pad_right) {
- for (int i = 0; i < height; i++) {
- memcpy(in_tr + i * in_stride, tmp_right + i * border_cols, border_size);
- }
- aom_free(tmp_right);
- }
-}
-
-void av1_resize_frame420(const uint8_t *const y, int y_stride,
- const uint8_t *const u, const uint8_t *const v,
- int uv_stride, int height, int width, uint8_t *oy,
- int oy_stride, uint8_t *ou, uint8_t *ov,
- int ouv_stride, int oheight, int owidth) {
- resize_plane(y, height, width, y_stride, oy, oheight, owidth, oy_stride);
- resize_plane(u, height / 2, width / 2, uv_stride, ou, oheight / 2, owidth / 2,
- ouv_stride);
- resize_plane(v, height / 2, width / 2, uv_stride, ov, oheight / 2, owidth / 2,
- ouv_stride);
-}
-
-void av1_resize_frame422(const uint8_t *const y, int y_stride,
- const uint8_t *const u, const uint8_t *const v,
- int uv_stride, int height, int width, uint8_t *oy,
- int oy_stride, uint8_t *ou, uint8_t *ov,
- int ouv_stride, int oheight, int owidth) {
- resize_plane(y, height, width, y_stride, oy, oheight, owidth, oy_stride);
- resize_plane(u, height, width / 2, uv_stride, ou, oheight, owidth / 2,
- ouv_stride);
- resize_plane(v, height, width / 2, uv_stride, ov, oheight, owidth / 2,
- ouv_stride);
-}
-
-void av1_resize_frame444(const uint8_t *const y, int y_stride,
- const uint8_t *const u, const uint8_t *const v,
- int uv_stride, int height, int width, uint8_t *oy,
- int oy_stride, uint8_t *ou, uint8_t *ov,
- int ouv_stride, int oheight, int owidth) {
- resize_plane(y, height, width, y_stride, oy, oheight, owidth, oy_stride);
- resize_plane(u, height, width, uv_stride, ou, oheight, owidth, ouv_stride);
- resize_plane(v, height, width, uv_stride, ov, oheight, owidth, ouv_stride);
-}
-
-void av1_highbd_resize_frame420(const uint8_t *const y, int y_stride,
- const uint8_t *const u, const uint8_t *const v,
- int uv_stride, int height, int width,
- uint8_t *oy, int oy_stride, uint8_t *ou,
- uint8_t *ov, int ouv_stride, int oheight,
- int owidth, int bd) {
- highbd_resize_plane(y, height, width, y_stride, oy, oheight, owidth,
- oy_stride, bd);
- highbd_resize_plane(u, height / 2, width / 2, uv_stride, ou, oheight / 2,
- owidth / 2, ouv_stride, bd);
- highbd_resize_plane(v, height / 2, width / 2, uv_stride, ov, oheight / 2,
- owidth / 2, ouv_stride, bd);
-}
-
-void av1_highbd_resize_frame422(const uint8_t *const y, int y_stride,
- const uint8_t *const u, const uint8_t *const v,
- int uv_stride, int height, int width,
- uint8_t *oy, int oy_stride, uint8_t *ou,
- uint8_t *ov, int ouv_stride, int oheight,
- int owidth, int bd) {
- highbd_resize_plane(y, height, width, y_stride, oy, oheight, owidth,
- oy_stride, bd);
- highbd_resize_plane(u, height, width / 2, uv_stride, ou, oheight, owidth / 2,
- ouv_stride, bd);
- highbd_resize_plane(v, height, width / 2, uv_stride, ov, oheight, owidth / 2,
- ouv_stride, bd);
-}
-
-void av1_highbd_resize_frame444(const uint8_t *const y, int y_stride,
- const uint8_t *const u, const uint8_t *const v,
- int uv_stride, int height, int width,
- uint8_t *oy, int oy_stride, uint8_t *ou,
- uint8_t *ov, int ouv_stride, int oheight,
- int owidth, int bd) {
- highbd_resize_plane(y, height, width, y_stride, oy, oheight, owidth,
- oy_stride, bd);
- highbd_resize_plane(u, height, width, uv_stride, ou, oheight, owidth,
- ouv_stride, bd);
- highbd_resize_plane(v, height, width, uv_stride, ov, oheight, owidth,
- ouv_stride, bd);
-}
-
-void av1_resize_and_extend_frame(const YV12_BUFFER_CONFIG *src,
- YV12_BUFFER_CONFIG *dst, int bd,
- const int num_planes) {
- // TODO(dkovalev): replace YV12_BUFFER_CONFIG with aom_image_t
-
- // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
- // the static analysis warnings.
- for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); ++i) {
- const int is_uv = i > 0;
- if (src->flags & YV12_FLAG_HIGHBITDEPTH)
- highbd_resize_plane(src->buffers[i], src->crop_heights[is_uv],
- src->crop_widths[is_uv], src->strides[is_uv],
- dst->buffers[i], dst->crop_heights[is_uv],
- dst->crop_widths[is_uv], dst->strides[is_uv], bd);
- else
- resize_plane(src->buffers[i], src->crop_heights[is_uv],
- src->crop_widths[is_uv], src->strides[is_uv],
- dst->buffers[i], dst->crop_heights[is_uv],
- dst->crop_widths[is_uv], dst->strides[is_uv]);
- }
- aom_extend_frame_borders(dst, num_planes);
-}
-
-void av1_upscale_normative_rows(const AV1_COMMON *cm, const uint8_t *src,
- int src_stride, uint8_t *dst, int dst_stride,
- int plane, int rows) {
- const int is_uv = (plane > 0);
- const int ss_x = is_uv && cm->seq_params.subsampling_x;
- const int downscaled_plane_width = ROUND_POWER_OF_TWO(cm->width, ss_x);
- const int upscaled_plane_width =
- ROUND_POWER_OF_TWO(cm->superres_upscaled_width, ss_x);
- const int superres_denom = cm->superres_scale_denominator;
-
- TileInfo tile_col;
- const int32_t x_step_qn = av1_get_upscale_convolve_step(
- downscaled_plane_width, upscaled_plane_width);
- int32_t x0_qn = get_upscale_convolve_x0(downscaled_plane_width,
- upscaled_plane_width, x_step_qn);
-
- for (int j = 0; j < cm->tile_cols; j++) {
- av1_tile_set_col(&tile_col, cm, j);
- // Determine the limits of this tile column in both the source
- // and destination images.
- // Note: The actual location which we start sampling from is
- // (downscaled_x0 - 1 + (x0_qn/2^14)), and this quantity increases
- // by exactly dst_width * (x_step_qn/2^14) pixels each iteration.
- const int downscaled_x0 = tile_col.mi_col_start << (MI_SIZE_LOG2 - ss_x);
- const int downscaled_x1 = tile_col.mi_col_end << (MI_SIZE_LOG2 - ss_x);
- const int src_width = downscaled_x1 - downscaled_x0;
-
- const int upscaled_x0 = (downscaled_x0 * superres_denom) / SCALE_NUMERATOR;
- int upscaled_x1;
- if (j == cm->tile_cols - 1) {
- // Note that we can't just use AOMMIN here - due to rounding,
- // (downscaled_x1 * superres_denom) / SCALE_NUMERATOR may be less than
- // upscaled_plane_width.
- upscaled_x1 = upscaled_plane_width;
- } else {
- upscaled_x1 = (downscaled_x1 * superres_denom) / SCALE_NUMERATOR;
- }
-
- const uint8_t *const src_ptr = src + downscaled_x0;
- uint8_t *const dst_ptr = dst + upscaled_x0;
- const int dst_width = upscaled_x1 - upscaled_x0;
-
- const int pad_left = (j == 0);
- const int pad_right = (j == cm->tile_cols - 1);
-
- if (cm->seq_params.use_highbitdepth)
- highbd_upscale_normative_rect(src_ptr, rows, src_width, src_stride,
- dst_ptr, rows, dst_width, dst_stride,
- x_step_qn, x0_qn, pad_left, pad_right,
- cm->seq_params.bit_depth);
- else
- upscale_normative_rect(src_ptr, rows, src_width, src_stride, dst_ptr,
- rows, dst_width, dst_stride, x_step_qn, x0_qn,
- pad_left, pad_right);
-
- // Update the fractional pixel offset to prepare for the next tile column.
- x0_qn += (dst_width * x_step_qn) - (src_width << RS_SCALE_SUBPEL_BITS);
- }
-}
-
-void av1_upscale_normative_and_extend_frame(const AV1_COMMON *cm,
- const YV12_BUFFER_CONFIG *src,
- YV12_BUFFER_CONFIG *dst) {
- const int num_planes = av1_num_planes(cm);
- for (int i = 0; i < num_planes; ++i) {
- const int is_uv = (i > 0);
- av1_upscale_normative_rows(cm, src->buffers[i], src->strides[is_uv],
- dst->buffers[i], dst->strides[is_uv], i,
- src->crop_heights[is_uv]);
- }
-
- aom_extend_frame_borders(dst, num_planes);
-}
-
-YV12_BUFFER_CONFIG *av1_scale_if_required(AV1_COMMON *cm,
- YV12_BUFFER_CONFIG *unscaled,
- YV12_BUFFER_CONFIG *scaled) {
- const int num_planes = av1_num_planes(cm);
- if (cm->width != unscaled->y_crop_width ||
- cm->height != unscaled->y_crop_height) {
- av1_resize_and_extend_frame(unscaled, scaled, (int)cm->seq_params.bit_depth,
- num_planes);
- return scaled;
- } else {
- return unscaled;
- }
-}
-
-// Calculates the scaled dimension given the original dimension and the scale
-// denominator.
-static void calculate_scaled_size_helper(int *dim, int denom) {
- if (denom != SCALE_NUMERATOR) {
- // Use this version if we need *dim to be even
- // *width = (*width * SCALE_NUMERATOR + denom) / (2 * denom);
- // *width <<= 1;
- *dim = (*dim * SCALE_NUMERATOR + denom / 2) / (denom);
- }
-}
-
-void av1_calculate_scaled_size(int *width, int *height, int resize_denom) {
- calculate_scaled_size_helper(width, resize_denom);
- calculate_scaled_size_helper(height, resize_denom);
-}
-
-void av1_calculate_scaled_superres_size(int *width, int *height,
- int superres_denom) {
- (void)height;
- calculate_scaled_size_helper(width, superres_denom);
-}
-
-void av1_calculate_unscaled_superres_size(int *width, int *height, int denom) {
- if (denom != SCALE_NUMERATOR) {
- // Note: av1_calculate_scaled_superres_size() rounds *up* after division
- // when the resulting dimensions are odd. So here, we round *down*.
- *width = *width * denom / SCALE_NUMERATOR;
- (void)height;
- }
-}
-
-// Copy only the config data from 'src' to 'dst'.
-static void copy_buffer_config(const YV12_BUFFER_CONFIG *const src,
- YV12_BUFFER_CONFIG *const dst) {
- dst->bit_depth = src->bit_depth;
- dst->color_primaries = src->color_primaries;
- dst->transfer_characteristics = src->transfer_characteristics;
- dst->matrix_coefficients = src->matrix_coefficients;
- dst->monochrome = src->monochrome;
- dst->chroma_sample_position = src->chroma_sample_position;
- dst->color_range = src->color_range;
-}
-
-// TODO(afergs): Look for in-place upscaling
-// TODO(afergs): aom_ vs av1_ functions? Which can I use?
-// Upscale decoded image.
-void av1_superres_upscale(AV1_COMMON *cm, BufferPool *const pool) {
- const int num_planes = av1_num_planes(cm);
- if (!av1_superres_scaled(cm)) return;
- const SequenceHeader *const seq_params = &cm->seq_params;
-
- YV12_BUFFER_CONFIG copy_buffer;
- memset(&copy_buffer, 0, sizeof(copy_buffer));
-
- YV12_BUFFER_CONFIG *const frame_to_show = get_frame_new_buffer(cm);
-
- const int aligned_width = ALIGN_POWER_OF_TWO(cm->width, 3);
- if (aom_alloc_frame_buffer(
- &copy_buffer, aligned_width, cm->height, seq_params->subsampling_x,
- seq_params->subsampling_y, seq_params->use_highbitdepth,
- AOM_BORDER_IN_PIXELS, cm->byte_alignment))
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
- "Failed to allocate copy buffer for superres upscaling");
-
- // Copy function assumes the frames are the same size.
- // Note that it does not copy YV12_BUFFER_CONFIG config data.
- aom_yv12_copy_frame(frame_to_show, &copy_buffer, num_planes);
-
- assert(copy_buffer.y_crop_width == aligned_width);
- assert(copy_buffer.y_crop_height == cm->height);
-
- // Realloc the current frame buffer at a higher resolution in place.
- if (pool != NULL) {
- // Use callbacks if on the decoder.
- aom_codec_frame_buffer_t *fb =
- &pool->frame_bufs[cm->new_fb_idx].raw_frame_buffer;
- aom_release_frame_buffer_cb_fn_t release_fb_cb = pool->release_fb_cb;
- aom_get_frame_buffer_cb_fn_t cb = pool->get_fb_cb;
- void *cb_priv = pool->cb_priv;
-
- // Realloc with callback does not release the frame buffer - release first.
- if (release_fb_cb(cb_priv, fb))
- aom_internal_error(
- &cm->error, AOM_CODEC_MEM_ERROR,
- "Failed to free current frame buffer before superres upscaling");
-
- // aom_realloc_frame_buffer() leaves config data for frame_to_show intact
- if (aom_realloc_frame_buffer(
- frame_to_show, cm->superres_upscaled_width,
- cm->superres_upscaled_height, seq_params->subsampling_x,
- seq_params->subsampling_y, seq_params->use_highbitdepth,
- AOM_BORDER_IN_PIXELS, cm->byte_alignment, fb, cb, cb_priv))
- aom_internal_error(
- &cm->error, AOM_CODEC_MEM_ERROR,
- "Failed to allocate current frame buffer for superres upscaling");
- } else {
- // Make a copy of the config data for frame_to_show in copy_buffer
- copy_buffer_config(frame_to_show, &copy_buffer);
-
- // Don't use callbacks on the encoder.
- // aom_alloc_frame_buffer() clears the config data for frame_to_show
- if (aom_alloc_frame_buffer(
- frame_to_show, cm->superres_upscaled_width,
- cm->superres_upscaled_height, seq_params->subsampling_x,
- seq_params->subsampling_y, seq_params->use_highbitdepth,
- AOM_BORDER_IN_PIXELS, cm->byte_alignment))
- aom_internal_error(
- &cm->error, AOM_CODEC_MEM_ERROR,
- "Failed to reallocate current frame buffer for superres upscaling");
-
- // Restore config data back to frame_to_show
- copy_buffer_config(&copy_buffer, frame_to_show);
- }
- // TODO(afergs): verify frame_to_show is correct after realloc
- // encoder:
- // decoder:
-
- assert(frame_to_show->y_crop_width == cm->superres_upscaled_width);
- assert(frame_to_show->y_crop_height == cm->superres_upscaled_height);
-
- // Scale up and back into frame_to_show.
- assert(frame_to_show->y_crop_width != cm->width);
- av1_upscale_normative_and_extend_frame(cm, &copy_buffer, frame_to_show);
-
- // Free the copy buffer
- aom_free_frame_buffer(&copy_buffer);
-}
diff --git a/third_party/aom/av1/common/resize.h b/third_party/aom/av1/common/resize.h
deleted file mode 100644
index 9a59a8d63..000000000
--- a/third_party/aom/av1/common/resize.h
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_COMMON_RESIZE_H_
-#define AOM_AV1_COMMON_RESIZE_H_
-
-#include <stdio.h>
-#include "aom/aom_integer.h"
-#include "av1/common/onyxc_int.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void av1_resize_plane(const uint8_t *const input, int height, int width,
- int in_stride, uint8_t *output, int height2, int width2,
- int out_stride);
-void av1_resize_frame420(const uint8_t *const y, int y_stride,
- const uint8_t *const u, const uint8_t *const v,
- int uv_stride, int height, int width, uint8_t *oy,
- int oy_stride, uint8_t *ou, uint8_t *ov,
- int ouv_stride, int oheight, int owidth);
-void av1_resize_frame422(const uint8_t *const y, int y_stride,
- const uint8_t *const u, const uint8_t *const v,
- int uv_stride, int height, int width, uint8_t *oy,
- int oy_stride, uint8_t *ou, uint8_t *ov,
- int ouv_stride, int oheight, int owidth);
-void av1_resize_frame444(const uint8_t *const y, int y_stride,
- const uint8_t *const u, const uint8_t *const v,
- int uv_stride, int height, int width, uint8_t *oy,
- int oy_stride, uint8_t *ou, uint8_t *ov,
- int ouv_stride, int oheight, int owidth);
-
-void av1_highbd_resize_plane(const uint8_t *const input, int height, int width,
- int in_stride, uint8_t *output, int height2,
- int width2, int out_stride, int bd);
-void av1_highbd_resize_frame420(const uint8_t *const y, int y_stride,
- const uint8_t *const u, const uint8_t *const v,
- int uv_stride, int height, int width,
- uint8_t *oy, int oy_stride, uint8_t *ou,
- uint8_t *ov, int ouv_stride, int oheight,
- int owidth, int bd);
-void av1_highbd_resize_frame422(const uint8_t *const y, int y_stride,
- const uint8_t *const u, const uint8_t *const v,
- int uv_stride, int height, int width,
- uint8_t *oy, int oy_stride, uint8_t *ou,
- uint8_t *ov, int ouv_stride, int oheight,
- int owidth, int bd);
-void av1_highbd_resize_frame444(const uint8_t *const y, int y_stride,
- const uint8_t *const u, const uint8_t *const v,
- int uv_stride, int height, int width,
- uint8_t *oy, int oy_stride, uint8_t *ou,
- uint8_t *ov, int ouv_stride, int oheight,
- int owidth, int bd);
-void av1_resize_and_extend_frame(const YV12_BUFFER_CONFIG *src,
- YV12_BUFFER_CONFIG *dst, int bd,
- const int num_planes);
-
-void av1_upscale_normative_rows(const AV1_COMMON *cm, const uint8_t *src,
- int src_stride, uint8_t *dst, int dst_stride,
- int plane, int rows);
-void av1_upscale_normative_and_extend_frame(const AV1_COMMON *cm,
- const YV12_BUFFER_CONFIG *src,
- YV12_BUFFER_CONFIG *dst);
-
-YV12_BUFFER_CONFIG *av1_scale_if_required(AV1_COMMON *cm,
- YV12_BUFFER_CONFIG *unscaled,
- YV12_BUFFER_CONFIG *scaled);
-
-// Calculates the scaled dimensions from the given original dimensions and the
-// resize scale denominator.
-void av1_calculate_scaled_size(int *width, int *height, int resize_denom);
-
-// Similar to above, but calculates scaled dimensions after superres from the
-// given original dimensions and superres scale denominator.
-void av1_calculate_scaled_superres_size(int *width, int *height,
- int superres_denom);
-
-// Inverse of av1_calculate_scaled_superres_size() above: calculates the
-// original dimensions from the given scaled dimensions and the scale
-// denominator.
-void av1_calculate_unscaled_superres_size(int *width, int *height, int denom);
-
-void av1_superres_upscale(AV1_COMMON *cm, BufferPool *const pool);
-
-// Returns 1 if a superres upscaled frame is scaled and 0 otherwise.
-static INLINE int av1_superres_scaled(const AV1_COMMON *cm) {
- // Note: for some corner cases (e.g. cm->width of 1), there may be no scaling
- // required even though cm->superres_scale_denominator != SCALE_NUMERATOR.
- // So, the following check is more accurate.
- return !(cm->width == cm->superres_upscaled_width);
-}
-
-#define UPSCALE_NORMATIVE_TAPS 8
-extern const int16_t av1_resize_filter_normative[1 << RS_SUBPEL_BITS]
- [UPSCALE_NORMATIVE_TAPS];
-
-int32_t av1_get_upscale_convolve_step(int in_length, int out_length);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_COMMON_RESIZE_H_
diff --git a/third_party/aom/av1/common/restoration.c b/third_party/aom/av1/common/restoration.c
deleted file mode 100644
index d276a915b..000000000
--- a/third_party/aom/av1/common/restoration.c
+++ /dev/null
@@ -1,1556 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- *
- */
-
-#include <math.h>
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-#include "config/aom_scale_rtcd.h"
-
-#include "aom_mem/aom_mem.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/common/resize.h"
-#include "av1/common/restoration.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_mem/aom_mem.h"
-
-#include "aom_ports/mem.h"
-
-// The 's' values are calculated based on original 'r' and 'e' values in the
-// spec using GenSgrprojVtable().
-// Note: Setting r = 0 skips the filter; with corresponding s = -1 (invalid).
-const sgr_params_type sgr_params[SGRPROJ_PARAMS] = {
- { { 2, 1 }, { 140, 3236 } }, { { 2, 1 }, { 112, 2158 } },
- { { 2, 1 }, { 93, 1618 } }, { { 2, 1 }, { 80, 1438 } },
- { { 2, 1 }, { 70, 1295 } }, { { 2, 1 }, { 58, 1177 } },
- { { 2, 1 }, { 47, 1079 } }, { { 2, 1 }, { 37, 996 } },
- { { 2, 1 }, { 30, 925 } }, { { 2, 1 }, { 25, 863 } },
- { { 0, 1 }, { -1, 2589 } }, { { 0, 1 }, { -1, 1618 } },
- { { 0, 1 }, { -1, 1177 } }, { { 0, 1 }, { -1, 925 } },
- { { 2, 0 }, { 56, -1 } }, { { 2, 0 }, { 22, -1 } },
-};
-
-AV1PixelRect av1_whole_frame_rect(const AV1_COMMON *cm, int is_uv) {
- AV1PixelRect rect;
-
- int ss_x = is_uv && cm->seq_params.subsampling_x;
- int ss_y = is_uv && cm->seq_params.subsampling_y;
-
- rect.top = 0;
- rect.bottom = ROUND_POWER_OF_TWO(cm->height, ss_y);
- rect.left = 0;
- rect.right = ROUND_POWER_OF_TWO(cm->superres_upscaled_width, ss_x);
- return rect;
-}
-
-// Count horizontal or vertical units per tile (use a width or height for
-// tile_size, respectively). We basically want to divide the tile size by the
-// size of a restoration unit. Rather than rounding up unconditionally as you
-// might expect, we round to nearest, which models the way a right or bottom
-// restoration unit can extend to up to 150% its normal width or height. The
-// max with 1 is to deal with tiles that are smaller than half of a restoration
-// unit.
-int av1_lr_count_units_in_tile(int unit_size, int tile_size) {
- return AOMMAX((tile_size + (unit_size >> 1)) / unit_size, 1);
-}
-
-void av1_alloc_restoration_struct(AV1_COMMON *cm, RestorationInfo *rsi,
- int is_uv) {
- // We need to allocate enough space for restoration units to cover the
- // largest tile. Without CONFIG_MAX_TILE, this is always the tile at the
- // top-left and we can use av1_get_tile_rect(). With CONFIG_MAX_TILE, we have
- // to do the computation ourselves, iterating over the tiles and keeping
- // track of the largest width and height, then upscaling.
- const AV1PixelRect tile_rect = av1_whole_frame_rect(cm, is_uv);
- const int max_tile_w = tile_rect.right - tile_rect.left;
- const int max_tile_h = tile_rect.bottom - tile_rect.top;
-
- // To calculate hpertile and vpertile (horizontal and vertical units per
- // tile), we basically want to divide the largest tile width or height by the
- // size of a restoration unit. Rather than rounding up unconditionally as you
- // might expect, we round to nearest, which models the way a right or bottom
- // restoration unit can extend to up to 150% its normal width or height. The
- // max with 1 is to deal with tiles that are smaller than half of a
- // restoration unit.
- const int unit_size = rsi->restoration_unit_size;
- const int hpertile = av1_lr_count_units_in_tile(unit_size, max_tile_w);
- const int vpertile = av1_lr_count_units_in_tile(unit_size, max_tile_h);
-
- rsi->units_per_tile = hpertile * vpertile;
- rsi->horz_units_per_tile = hpertile;
- rsi->vert_units_per_tile = vpertile;
-
- const int ntiles = 1;
- const int nunits = ntiles * rsi->units_per_tile;
-
- aom_free(rsi->unit_info);
- CHECK_MEM_ERROR(cm, rsi->unit_info,
- (RestorationUnitInfo *)aom_memalign(
- 16, sizeof(*rsi->unit_info) * nunits));
-}
-
-void av1_free_restoration_struct(RestorationInfo *rst_info) {
- aom_free(rst_info->unit_info);
- rst_info->unit_info = NULL;
-}
-
-#if 0
-// Pair of values for each sgrproj parameter:
-// Index 0 corresponds to r[0], e[0]
-// Index 1 corresponds to r[1], e[1]
-int sgrproj_mtable[SGRPROJ_PARAMS][2];
-
-static void GenSgrprojVtable() {
- for (int i = 0; i < SGRPROJ_PARAMS; ++i) {
- const sgr_params_type *const params = &sgr_params[i];
- for (int j = 0; j < 2; ++j) {
- const int e = params->e[j];
- const int r = params->r[j];
- if (r == 0) { // filter is disabled
- sgrproj_mtable[i][j] = -1; // mark invalid
- } else { // filter is enabled
- const int n = (2 * r + 1) * (2 * r + 1);
- const int n2e = n * n * e;
- assert(n2e != 0);
- sgrproj_mtable[i][j] = (((1 << SGRPROJ_MTABLE_BITS) + n2e / 2) / n2e);
- }
- }
- }
-}
-#endif
-
-void av1_loop_restoration_precal() {
-#if 0
- GenSgrprojVtable();
-#endif
-}
-
-static void extend_frame_lowbd(uint8_t *data, int width, int height, int stride,
- int border_horz, int border_vert) {
- uint8_t *data_p;
- int i;
- for (i = 0; i < height; ++i) {
- data_p = data + i * stride;
- memset(data_p - border_horz, data_p[0], border_horz);
- memset(data_p + width, data_p[width - 1], border_horz);
- }
- data_p = data - border_horz;
- for (i = -border_vert; i < 0; ++i) {
- memcpy(data_p + i * stride, data_p, width + 2 * border_horz);
- }
- for (i = height; i < height + border_vert; ++i) {
- memcpy(data_p + i * stride, data_p + (height - 1) * stride,
- width + 2 * border_horz);
- }
-}
-
-static void extend_frame_highbd(uint16_t *data, int width, int height,
- int stride, int border_horz, int border_vert) {
- uint16_t *data_p;
- int i, j;
- for (i = 0; i < height; ++i) {
- data_p = data + i * stride;
- for (j = -border_horz; j < 0; ++j) data_p[j] = data_p[0];
- for (j = width; j < width + border_horz; ++j) data_p[j] = data_p[width - 1];
- }
- data_p = data - border_horz;
- for (i = -border_vert; i < 0; ++i) {
- memcpy(data_p + i * stride, data_p,
- (width + 2 * border_horz) * sizeof(uint16_t));
- }
- for (i = height; i < height + border_vert; ++i) {
- memcpy(data_p + i * stride, data_p + (height - 1) * stride,
- (width + 2 * border_horz) * sizeof(uint16_t));
- }
-}
-
-void extend_frame(uint8_t *data, int width, int height, int stride,
- int border_horz, int border_vert, int highbd) {
- if (highbd)
- extend_frame_highbd(CONVERT_TO_SHORTPTR(data), width, height, stride,
- border_horz, border_vert);
- else
- extend_frame_lowbd(data, width, height, stride, border_horz, border_vert);
-}
-
-static void copy_tile_lowbd(int width, int height, const uint8_t *src,
- int src_stride, uint8_t *dst, int dst_stride) {
- for (int i = 0; i < height; ++i)
- memcpy(dst + i * dst_stride, src + i * src_stride, width);
-}
-
-static void copy_tile_highbd(int width, int height, const uint16_t *src,
- int src_stride, uint16_t *dst, int dst_stride) {
- for (int i = 0; i < height; ++i)
- memcpy(dst + i * dst_stride, src + i * src_stride, width * sizeof(*dst));
-}
-
-static void copy_tile(int width, int height, const uint8_t *src, int src_stride,
- uint8_t *dst, int dst_stride, int highbd) {
- if (highbd)
- copy_tile_highbd(width, height, CONVERT_TO_SHORTPTR(src), src_stride,
- CONVERT_TO_SHORTPTR(dst), dst_stride);
- else
- copy_tile_lowbd(width, height, src, src_stride, dst, dst_stride);
-}
-
-#define REAL_PTR(hbd, d) ((hbd) ? (uint8_t *)CONVERT_TO_SHORTPTR(d) : (d))
-
-// With striped loop restoration, the filtering for each 64-pixel stripe gets
-// most of its input from the output of CDEF (stored in data8), but we need to
-// fill out a border of 3 pixels above/below the stripe according to the
-// following
-// rules:
-//
-// * At a frame boundary, we copy the outermost row of CDEF pixels three times.
-// This extension is done by a call to extend_frame() at the start of the loop
-// restoration process, so the value of copy_above/copy_below doesn't strictly
-// matter.
-// However, by setting *copy_above = *copy_below = 1 whenever loop filtering
-// across tiles is disabled, we can allow
-// {setup,restore}_processing_stripe_boundary to assume that the top/bottom
-// data has always been copied, simplifying the behaviour at the left and
-// right edges of tiles.
-//
-// * If we're at a tile boundary and loop filtering across tiles is enabled,
-// then there is a logical stripe which is 64 pixels high, but which is split
-// into an 8px high and a 56px high stripe so that the processing (and
-// coefficient set usage) can be aligned to tiles.
-// In this case, we use the 3 rows of CDEF output across the boundary for
-// context; this corresponds to leaving the frame buffer as-is.
-//
-// * If we're at a tile boundary and loop filtering across tiles is disabled,
-// then we take the outermost row of CDEF pixels *within the current tile*
-// and copy it three times. Thus we behave exactly as if the tile were a full
-// frame.
-//
-// * Otherwise, we're at a stripe boundary within a tile. In that case, we
-// take 2 rows of deblocked pixels and extend them to 3 rows of context.
-//
-// The distinction between the latter two cases is handled by the
-// av1_loop_restoration_save_boundary_lines() function, so here we just need
-// to decide if we're overwriting the above/below boundary pixels or not.
-static void get_stripe_boundary_info(const RestorationTileLimits *limits,
- const AV1PixelRect *tile_rect, int ss_y,
- int *copy_above, int *copy_below) {
- *copy_above = 1;
- *copy_below = 1;
-
- const int full_stripe_height = RESTORATION_PROC_UNIT_SIZE >> ss_y;
- const int runit_offset = RESTORATION_UNIT_OFFSET >> ss_y;
-
- const int first_stripe_in_tile = (limits->v_start == tile_rect->top);
- const int this_stripe_height =
- full_stripe_height - (first_stripe_in_tile ? runit_offset : 0);
- const int last_stripe_in_tile =
- (limits->v_start + this_stripe_height >= tile_rect->bottom);
-
- if (first_stripe_in_tile) *copy_above = 0;
- if (last_stripe_in_tile) *copy_below = 0;
-}
-
-// Overwrite the border pixels around a processing stripe so that the conditions
-// listed above get_stripe_boundary_info() are preserved.
-// We save the pixels which get overwritten into a temporary buffer, so that
-// they can be restored by restore_processing_stripe_boundary() after we've
-// processed the stripe.
-//
-// limits gives the rectangular limits of the remaining stripes for the current
-// restoration unit. rsb is the stored stripe boundaries (taken from either
-// deblock or CDEF output as necessary).
-//
-// tile_rect is the limits of the current tile and tile_stripe0 is the index of
-// the first stripe in this tile (needed to convert the tile-relative stripe
-// index we get from limits into something we can look up in rsb).
-static void setup_processing_stripe_boundary(
- const RestorationTileLimits *limits, const RestorationStripeBoundaries *rsb,
- int rsb_row, int use_highbd, int h, uint8_t *data8, int data_stride,
- RestorationLineBuffers *rlbs, int copy_above, int copy_below, int opt) {
- // Offsets within the line buffers. The buffer logically starts at column
- // -RESTORATION_EXTRA_HORZ so the 1st column (at x0 - RESTORATION_EXTRA_HORZ)
- // has column x0 in the buffer.
- const int buf_stride = rsb->stripe_boundary_stride;
- const int buf_x0_off = limits->h_start;
- const int line_width =
- (limits->h_end - limits->h_start) + 2 * RESTORATION_EXTRA_HORZ;
- const int line_size = line_width << use_highbd;
-
- const int data_x0 = limits->h_start - RESTORATION_EXTRA_HORZ;
-
- // Replace RESTORATION_BORDER pixels above the top of the stripe
- // We expand RESTORATION_CTX_VERT=2 lines from rsb->stripe_boundary_above
- // to fill RESTORATION_BORDER=3 lines of above pixels. This is done by
- // duplicating the topmost of the 2 lines (see the AOMMAX call when
- // calculating src_row, which gets the values 0, 0, 1 for i = -3, -2, -1).
- //
- // Special case: If we're at the top of a tile, which isn't on the topmost
- // tile row, and we're allowed to loop filter across tiles, then we have a
- // logical 64-pixel-high stripe which has been split into an 8-pixel high
- // stripe and a 56-pixel high stripe (the current one). So, in this case,
- // we want to leave the boundary alone!
- if (!opt) {
- if (copy_above) {
- uint8_t *data8_tl = data8 + data_x0 + limits->v_start * data_stride;
-
- for (int i = -RESTORATION_BORDER; i < 0; ++i) {
- const int buf_row = rsb_row + AOMMAX(i + RESTORATION_CTX_VERT, 0);
- const int buf_off = buf_x0_off + buf_row * buf_stride;
- const uint8_t *buf =
- rsb->stripe_boundary_above + (buf_off << use_highbd);
- uint8_t *dst8 = data8_tl + i * data_stride;
- // Save old pixels, then replace with data from stripe_boundary_above
- memcpy(rlbs->tmp_save_above[i + RESTORATION_BORDER],
- REAL_PTR(use_highbd, dst8), line_size);
- memcpy(REAL_PTR(use_highbd, dst8), buf, line_size);
- }
- }
-
- // Replace RESTORATION_BORDER pixels below the bottom of the stripe.
- // The second buffer row is repeated, so src_row gets the values 0, 1, 1
- // for i = 0, 1, 2.
- if (copy_below) {
- const int stripe_end = limits->v_start + h;
- uint8_t *data8_bl = data8 + data_x0 + stripe_end * data_stride;
-
- for (int i = 0; i < RESTORATION_BORDER; ++i) {
- const int buf_row = rsb_row + AOMMIN(i, RESTORATION_CTX_VERT - 1);
- const int buf_off = buf_x0_off + buf_row * buf_stride;
- const uint8_t *src =
- rsb->stripe_boundary_below + (buf_off << use_highbd);
-
- uint8_t *dst8 = data8_bl + i * data_stride;
- // Save old pixels, then replace with data from stripe_boundary_below
- memcpy(rlbs->tmp_save_below[i], REAL_PTR(use_highbd, dst8), line_size);
- memcpy(REAL_PTR(use_highbd, dst8), src, line_size);
- }
- }
- } else {
- if (copy_above) {
- uint8_t *data8_tl = data8 + data_x0 + limits->v_start * data_stride;
-
- // Only save and overwrite i=-RESTORATION_BORDER line.
- uint8_t *dst8 = data8_tl + (-RESTORATION_BORDER) * data_stride;
- // Save old pixels, then replace with data from stripe_boundary_above
- memcpy(rlbs->tmp_save_above[0], REAL_PTR(use_highbd, dst8), line_size);
- memcpy(REAL_PTR(use_highbd, dst8),
- REAL_PTR(use_highbd,
- data8_tl + (-RESTORATION_BORDER + 1) * data_stride),
- line_size);
- }
-
- if (copy_below) {
- const int stripe_end = limits->v_start + h;
- uint8_t *data8_bl = data8 + data_x0 + stripe_end * data_stride;
-
- // Only save and overwrite i=2 line.
- uint8_t *dst8 = data8_bl + 2 * data_stride;
- // Save old pixels, then replace with data from stripe_boundary_below
- memcpy(rlbs->tmp_save_below[2], REAL_PTR(use_highbd, dst8), line_size);
- memcpy(REAL_PTR(use_highbd, dst8),
- REAL_PTR(use_highbd, data8_bl + (2 - 1) * data_stride), line_size);
- }
- }
-}
-
-// This function restores the boundary lines modified by
-// setup_processing_stripe_boundary.
-//
-// Note: We need to be careful when handling the corners of the processing
-// unit, because (eg.) the top-left corner is considered to be part of
-// both the left and top borders. This means that, depending on the
-// loop_filter_across_tiles_enabled flag, the corner pixels might get
-// overwritten twice, once as part of the "top" border and once as part
-// of the "left" border (or similar for other corners).
-//
-// Everything works out fine as long as we make sure to reverse the order
-// when restoring, ie. we need to restore the left/right borders followed
-// by the top/bottom borders.
-static void restore_processing_stripe_boundary(
- const RestorationTileLimits *limits, const RestorationLineBuffers *rlbs,
- int use_highbd, int h, uint8_t *data8, int data_stride, int copy_above,
- int copy_below, int opt) {
- const int line_width =
- (limits->h_end - limits->h_start) + 2 * RESTORATION_EXTRA_HORZ;
- const int line_size = line_width << use_highbd;
-
- const int data_x0 = limits->h_start - RESTORATION_EXTRA_HORZ;
-
- if (!opt) {
- if (copy_above) {
- uint8_t *data8_tl = data8 + data_x0 + limits->v_start * data_stride;
- for (int i = -RESTORATION_BORDER; i < 0; ++i) {
- uint8_t *dst8 = data8_tl + i * data_stride;
- memcpy(REAL_PTR(use_highbd, dst8),
- rlbs->tmp_save_above[i + RESTORATION_BORDER], line_size);
- }
- }
-
- if (copy_below) {
- const int stripe_bottom = limits->v_start + h;
- uint8_t *data8_bl = data8 + data_x0 + stripe_bottom * data_stride;
-
- for (int i = 0; i < RESTORATION_BORDER; ++i) {
- if (stripe_bottom + i >= limits->v_end + RESTORATION_BORDER) break;
-
- uint8_t *dst8 = data8_bl + i * data_stride;
- memcpy(REAL_PTR(use_highbd, dst8), rlbs->tmp_save_below[i], line_size);
- }
- }
- } else {
- if (copy_above) {
- uint8_t *data8_tl = data8 + data_x0 + limits->v_start * data_stride;
-
- // Only restore i=-RESTORATION_BORDER line.
- uint8_t *dst8 = data8_tl + (-RESTORATION_BORDER) * data_stride;
- memcpy(REAL_PTR(use_highbd, dst8), rlbs->tmp_save_above[0], line_size);
- }
-
- if (copy_below) {
- const int stripe_bottom = limits->v_start + h;
- uint8_t *data8_bl = data8 + data_x0 + stripe_bottom * data_stride;
-
- // Only restore i=2 line.
- if (stripe_bottom + 2 < limits->v_end + RESTORATION_BORDER) {
- uint8_t *dst8 = data8_bl + 2 * data_stride;
- memcpy(REAL_PTR(use_highbd, dst8), rlbs->tmp_save_below[2], line_size);
- }
- }
- }
-}
-
-static void wiener_filter_stripe(const RestorationUnitInfo *rui,
- int stripe_width, int stripe_height,
- int procunit_width, const uint8_t *src,
- int src_stride, uint8_t *dst, int dst_stride,
- int32_t *tmpbuf, int bit_depth) {
- (void)tmpbuf;
- (void)bit_depth;
- assert(bit_depth == 8);
- const ConvolveParams conv_params = get_conv_params_wiener(8);
-
- for (int j = 0; j < stripe_width; j += procunit_width) {
- int w = AOMMIN(procunit_width, (stripe_width - j + 15) & ~15);
- const uint8_t *src_p = src + j;
- uint8_t *dst_p = dst + j;
- av1_wiener_convolve_add_src(
- src_p, src_stride, dst_p, dst_stride, rui->wiener_info.hfilter, 16,
- rui->wiener_info.vfilter, 16, w, stripe_height, &conv_params);
- }
-}
-
-/* Calculate windowed sums (if sqr=0) or sums of squares (if sqr=1)
- over the input. The window is of size (2r + 1)x(2r + 1), and we
- specialize to r = 1, 2, 3. A default function is used for r > 3.
-
- Each loop follows the same format: We keep a window's worth of input
- in individual variables and select data out of that as appropriate.
-*/
-static void boxsum1(int32_t *src, int width, int height, int src_stride,
- int sqr, int32_t *dst, int dst_stride) {
- int i, j, a, b, c;
- assert(width > 2 * SGRPROJ_BORDER_HORZ);
- assert(height > 2 * SGRPROJ_BORDER_VERT);
-
- // Vertical sum over 3-pixel regions, from src into dst.
- if (!sqr) {
- for (j = 0; j < width; ++j) {
- a = src[j];
- b = src[src_stride + j];
- c = src[2 * src_stride + j];
-
- dst[j] = a + b;
- for (i = 1; i < height - 2; ++i) {
- // Loop invariant: At the start of each iteration,
- // a = src[(i - 1) * src_stride + j]
- // b = src[(i ) * src_stride + j]
- // c = src[(i + 1) * src_stride + j]
- dst[i * dst_stride + j] = a + b + c;
- a = b;
- b = c;
- c = src[(i + 2) * src_stride + j];
- }
- dst[i * dst_stride + j] = a + b + c;
- dst[(i + 1) * dst_stride + j] = b + c;
- }
- } else {
- for (j = 0; j < width; ++j) {
- a = src[j] * src[j];
- b = src[src_stride + j] * src[src_stride + j];
- c = src[2 * src_stride + j] * src[2 * src_stride + j];
-
- dst[j] = a + b;
- for (i = 1; i < height - 2; ++i) {
- dst[i * dst_stride + j] = a + b + c;
- a = b;
- b = c;
- c = src[(i + 2) * src_stride + j] * src[(i + 2) * src_stride + j];
- }
- dst[i * dst_stride + j] = a + b + c;
- dst[(i + 1) * dst_stride + j] = b + c;
- }
- }
-
- // Horizontal sum over 3-pixel regions of dst
- for (i = 0; i < height; ++i) {
- a = dst[i * dst_stride];
- b = dst[i * dst_stride + 1];
- c = dst[i * dst_stride + 2];
-
- dst[i * dst_stride] = a + b;
- for (j = 1; j < width - 2; ++j) {
- // Loop invariant: At the start of each iteration,
- // a = src[i * src_stride + (j - 1)]
- // b = src[i * src_stride + (j )]
- // c = src[i * src_stride + (j + 1)]
- dst[i * dst_stride + j] = a + b + c;
- a = b;
- b = c;
- c = dst[i * dst_stride + (j + 2)];
- }
- dst[i * dst_stride + j] = a + b + c;
- dst[i * dst_stride + (j + 1)] = b + c;
- }
-}
-
-static void boxsum2(int32_t *src, int width, int height, int src_stride,
- int sqr, int32_t *dst, int dst_stride) {
- int i, j, a, b, c, d, e;
- assert(width > 2 * SGRPROJ_BORDER_HORZ);
- assert(height > 2 * SGRPROJ_BORDER_VERT);
-
- // Vertical sum over 5-pixel regions, from src into dst.
- if (!sqr) {
- for (j = 0; j < width; ++j) {
- a = src[j];
- b = src[src_stride + j];
- c = src[2 * src_stride + j];
- d = src[3 * src_stride + j];
- e = src[4 * src_stride + j];
-
- dst[j] = a + b + c;
- dst[dst_stride + j] = a + b + c + d;
- for (i = 2; i < height - 3; ++i) {
- // Loop invariant: At the start of each iteration,
- // a = src[(i - 2) * src_stride + j]
- // b = src[(i - 1) * src_stride + j]
- // c = src[(i ) * src_stride + j]
- // d = src[(i + 1) * src_stride + j]
- // e = src[(i + 2) * src_stride + j]
- dst[i * dst_stride + j] = a + b + c + d + e;
- a = b;
- b = c;
- c = d;
- d = e;
- e = src[(i + 3) * src_stride + j];
- }
- dst[i * dst_stride + j] = a + b + c + d + e;
- dst[(i + 1) * dst_stride + j] = b + c + d + e;
- dst[(i + 2) * dst_stride + j] = c + d + e;
- }
- } else {
- for (j = 0; j < width; ++j) {
- a = src[j] * src[j];
- b = src[src_stride + j] * src[src_stride + j];
- c = src[2 * src_stride + j] * src[2 * src_stride + j];
- d = src[3 * src_stride + j] * src[3 * src_stride + j];
- e = src[4 * src_stride + j] * src[4 * src_stride + j];
-
- dst[j] = a + b + c;
- dst[dst_stride + j] = a + b + c + d;
- for (i = 2; i < height - 3; ++i) {
- dst[i * dst_stride + j] = a + b + c + d + e;
- a = b;
- b = c;
- c = d;
- d = e;
- e = src[(i + 3) * src_stride + j] * src[(i + 3) * src_stride + j];
- }
- dst[i * dst_stride + j] = a + b + c + d + e;
- dst[(i + 1) * dst_stride + j] = b + c + d + e;
- dst[(i + 2) * dst_stride + j] = c + d + e;
- }
- }
-
- // Horizontal sum over 5-pixel regions of dst
- for (i = 0; i < height; ++i) {
- a = dst[i * dst_stride];
- b = dst[i * dst_stride + 1];
- c = dst[i * dst_stride + 2];
- d = dst[i * dst_stride + 3];
- e = dst[i * dst_stride + 4];
-
- dst[i * dst_stride] = a + b + c;
- dst[i * dst_stride + 1] = a + b + c + d;
- for (j = 2; j < width - 3; ++j) {
- // Loop invariant: At the start of each iteration,
- // a = src[i * src_stride + (j - 2)]
- // b = src[i * src_stride + (j - 1)]
- // c = src[i * src_stride + (j )]
- // d = src[i * src_stride + (j + 1)]
- // e = src[i * src_stride + (j + 2)]
- dst[i * dst_stride + j] = a + b + c + d + e;
- a = b;
- b = c;
- c = d;
- d = e;
- e = dst[i * dst_stride + (j + 3)];
- }
- dst[i * dst_stride + j] = a + b + c + d + e;
- dst[i * dst_stride + (j + 1)] = b + c + d + e;
- dst[i * dst_stride + (j + 2)] = c + d + e;
- }
-}
-
-static void boxsum(int32_t *src, int width, int height, int src_stride, int r,
- int sqr, int32_t *dst, int dst_stride) {
- if (r == 1)
- boxsum1(src, width, height, src_stride, sqr, dst, dst_stride);
- else if (r == 2)
- boxsum2(src, width, height, src_stride, sqr, dst, dst_stride);
- else
- assert(0 && "Invalid value of r in self-guided filter");
-}
-
-void decode_xq(const int *xqd, int *xq, const sgr_params_type *params) {
- if (params->r[0] == 0) {
- xq[0] = 0;
- xq[1] = (1 << SGRPROJ_PRJ_BITS) - xqd[1];
- } else if (params->r[1] == 0) {
- xq[0] = xqd[0];
- xq[1] = 0;
- } else {
- xq[0] = xqd[0];
- xq[1] = (1 << SGRPROJ_PRJ_BITS) - xq[0] - xqd[1];
- }
-}
-
-const int32_t x_by_xplus1[256] = {
- // Special case: Map 0 -> 1 (corresponding to a value of 1/256)
- // instead of 0. See comments in selfguided_restoration_internal() for why
- 1, 128, 171, 192, 205, 213, 219, 224, 228, 230, 233, 235, 236, 238, 239,
- 240, 241, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, 247, 247, 247,
- 248, 248, 248, 248, 249, 249, 249, 249, 249, 250, 250, 250, 250, 250, 250,
- 250, 251, 251, 251, 251, 251, 251, 251, 251, 251, 251, 252, 252, 252, 252,
- 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 253, 253,
- 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253,
- 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 254, 254, 254,
- 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254,
- 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254,
- 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254,
- 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254,
- 254, 254, 254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
- 256,
-};
-
-const int32_t one_by_x[MAX_NELEM] = {
- 4096, 2048, 1365, 1024, 819, 683, 585, 512, 455, 410, 372, 341, 315,
- 293, 273, 256, 241, 228, 216, 205, 195, 186, 178, 171, 164,
-};
-
-static void calculate_intermediate_result(int32_t *dgd, int width, int height,
- int dgd_stride, int bit_depth,
- int sgr_params_idx, int radius_idx,
- int pass, int32_t *A, int32_t *B) {
- const sgr_params_type *const params = &sgr_params[sgr_params_idx];
- const int r = params->r[radius_idx];
- const int width_ext = width + 2 * SGRPROJ_BORDER_HORZ;
- const int height_ext = height + 2 * SGRPROJ_BORDER_VERT;
- // Adjusting the stride of A and B here appears to avoid bad cache effects,
- // leading to a significant speed improvement.
- // We also align the stride to a multiple of 16 bytes, for consistency
- // with the SIMD version of this function.
- int buf_stride = ((width_ext + 3) & ~3) + 16;
- const int step = pass == 0 ? 1 : 2;
- int i, j;
-
- assert(r <= MAX_RADIUS && "Need MAX_RADIUS >= r");
- assert(r <= SGRPROJ_BORDER_VERT - 1 && r <= SGRPROJ_BORDER_HORZ - 1 &&
- "Need SGRPROJ_BORDER_* >= r+1");
-
- boxsum(dgd - dgd_stride * SGRPROJ_BORDER_VERT - SGRPROJ_BORDER_HORZ,
- width_ext, height_ext, dgd_stride, r, 0, B, buf_stride);
- boxsum(dgd - dgd_stride * SGRPROJ_BORDER_VERT - SGRPROJ_BORDER_HORZ,
- width_ext, height_ext, dgd_stride, r, 1, A, buf_stride);
- A += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ;
- B += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ;
- // Calculate the eventual A[] and B[] arrays. Include a 1-pixel border - ie,
- // for a 64x64 processing unit, we calculate 66x66 pixels of A[] and B[].
- for (i = -1; i < height + 1; i += step) {
- for (j = -1; j < width + 1; ++j) {
- const int k = i * buf_stride + j;
- const int n = (2 * r + 1) * (2 * r + 1);
-
- // a < 2^16 * n < 2^22 regardless of bit depth
- uint32_t a = ROUND_POWER_OF_TWO(A[k], 2 * (bit_depth - 8));
- // b < 2^8 * n < 2^14 regardless of bit depth
- uint32_t b = ROUND_POWER_OF_TWO(B[k], bit_depth - 8);
-
- // Each term in calculating p = a * n - b * b is < 2^16 * n^2 < 2^28,
- // and p itself satisfies p < 2^14 * n^2 < 2^26.
- // This bound on p is due to:
- // https://en.wikipedia.org/wiki/Popoviciu's_inequality_on_variances
- //
- // Note: Sometimes, in high bit depth, we can end up with a*n < b*b.
- // This is an artefact of rounding, and can only happen if all pixels
- // are (almost) identical, so in this case we saturate to p=0.
- uint32_t p = (a * n < b * b) ? 0 : a * n - b * b;
-
- const uint32_t s = params->s[radius_idx];
-
- // p * s < (2^14 * n^2) * round(2^20 / n^2 eps) < 2^34 / eps < 2^32
- // as long as eps >= 4. So p * s fits into a uint32_t, and z < 2^12
- // (this holds even after accounting for the rounding in s)
- const uint32_t z = ROUND_POWER_OF_TWO(p * s, SGRPROJ_MTABLE_BITS);
-
- // Note: We have to be quite careful about the value of A[k].
- // This is used as a blend factor between individual pixel values and the
- // local mean. So it logically has a range of [0, 256], including both
- // endpoints.
- //
- // This is a pain for hardware, as we'd like something which can be stored
- // in exactly 8 bits.
- // Further, in the calculation of B[k] below, if z == 0 and r == 2,
- // then A[k] "should be" 0. But then we can end up setting B[k] to a value
- // slightly above 2^(8 + bit depth), due to rounding in the value of
- // one_by_x[25-1].
- //
- // Thus we saturate so that, when z == 0, A[k] is set to 1 instead of 0.
- // This fixes the above issues (256 - A[k] fits in a uint8, and we can't
- // overflow), without significantly affecting the final result: z == 0
- // implies that the image is essentially "flat", so the local mean and
- // individual pixel values are very similar.
- //
- // Note that saturating on the other side, ie. requring A[k] <= 255,
- // would be a bad idea, as that corresponds to the case where the image
- // is very variable, when we want to preserve the local pixel value as
- // much as possible.
- A[k] = x_by_xplus1[AOMMIN(z, 255)]; // in range [1, 256]
-
- // SGRPROJ_SGR - A[k] < 2^8 (from above), B[k] < 2^(bit_depth) * n,
- // one_by_x[n - 1] = round(2^12 / n)
- // => the product here is < 2^(20 + bit_depth) <= 2^32,
- // and B[k] is set to a value < 2^(8 + bit depth)
- // This holds even with the rounding in one_by_x and in the overall
- // result, as long as SGRPROJ_SGR - A[k] is strictly less than 2^8.
- B[k] = (int32_t)ROUND_POWER_OF_TWO((uint32_t)(SGRPROJ_SGR - A[k]) *
- (uint32_t)B[k] *
- (uint32_t)one_by_x[n - 1],
- SGRPROJ_RECIP_BITS);
- }
- }
-}
-
-static void selfguided_restoration_fast_internal(
- int32_t *dgd, int width, int height, int dgd_stride, int32_t *dst,
- int dst_stride, int bit_depth, int sgr_params_idx, int radius_idx) {
- const sgr_params_type *const params = &sgr_params[sgr_params_idx];
- const int r = params->r[radius_idx];
- const int width_ext = width + 2 * SGRPROJ_BORDER_HORZ;
- // Adjusting the stride of A and B here appears to avoid bad cache effects,
- // leading to a significant speed improvement.
- // We also align the stride to a multiple of 16 bytes, for consistency
- // with the SIMD version of this function.
- int buf_stride = ((width_ext + 3) & ~3) + 16;
- int32_t A_[RESTORATION_PROC_UNIT_PELS];
- int32_t B_[RESTORATION_PROC_UNIT_PELS];
- int32_t *A = A_;
- int32_t *B = B_;
- int i, j;
- calculate_intermediate_result(dgd, width, height, dgd_stride, bit_depth,
- sgr_params_idx, radius_idx, 1, A, B);
- A += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ;
- B += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ;
-
- // Use the A[] and B[] arrays to calculate the filtered image
- (void)r;
- assert(r == 2);
- for (i = 0; i < height; ++i) {
- if (!(i & 1)) { // even row
- for (j = 0; j < width; ++j) {
- const int k = i * buf_stride + j;
- const int l = i * dgd_stride + j;
- const int m = i * dst_stride + j;
- const int nb = 5;
- const int32_t a = (A[k - buf_stride] + A[k + buf_stride]) * 6 +
- (A[k - 1 - buf_stride] + A[k - 1 + buf_stride] +
- A[k + 1 - buf_stride] + A[k + 1 + buf_stride]) *
- 5;
- const int32_t b = (B[k - buf_stride] + B[k + buf_stride]) * 6 +
- (B[k - 1 - buf_stride] + B[k - 1 + buf_stride] +
- B[k + 1 - buf_stride] + B[k + 1 + buf_stride]) *
- 5;
- const int32_t v = a * dgd[l] + b;
- dst[m] =
- ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
- }
- } else { // odd row
- for (j = 0; j < width; ++j) {
- const int k = i * buf_stride + j;
- const int l = i * dgd_stride + j;
- const int m = i * dst_stride + j;
- const int nb = 4;
- const int32_t a = A[k] * 6 + (A[k - 1] + A[k + 1]) * 5;
- const int32_t b = B[k] * 6 + (B[k - 1] + B[k + 1]) * 5;
- const int32_t v = a * dgd[l] + b;
- dst[m] =
- ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
- }
- }
- }
-}
-
-static void selfguided_restoration_internal(int32_t *dgd, int width, int height,
- int dgd_stride, int32_t *dst,
- int dst_stride, int bit_depth,
- int sgr_params_idx,
- int radius_idx) {
- const int width_ext = width + 2 * SGRPROJ_BORDER_HORZ;
- // Adjusting the stride of A and B here appears to avoid bad cache effects,
- // leading to a significant speed improvement.
- // We also align the stride to a multiple of 16 bytes, for consistency
- // with the SIMD version of this function.
- int buf_stride = ((width_ext + 3) & ~3) + 16;
- int32_t A_[RESTORATION_PROC_UNIT_PELS];
- int32_t B_[RESTORATION_PROC_UNIT_PELS];
- int32_t *A = A_;
- int32_t *B = B_;
- int i, j;
- calculate_intermediate_result(dgd, width, height, dgd_stride, bit_depth,
- sgr_params_idx, radius_idx, 0, A, B);
- A += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ;
- B += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ;
-
- // Use the A[] and B[] arrays to calculate the filtered image
- for (i = 0; i < height; ++i) {
- for (j = 0; j < width; ++j) {
- const int k = i * buf_stride + j;
- const int l = i * dgd_stride + j;
- const int m = i * dst_stride + j;
- const int nb = 5;
- const int32_t a =
- (A[k] + A[k - 1] + A[k + 1] + A[k - buf_stride] + A[k + buf_stride]) *
- 4 +
- (A[k - 1 - buf_stride] + A[k - 1 + buf_stride] +
- A[k + 1 - buf_stride] + A[k + 1 + buf_stride]) *
- 3;
- const int32_t b =
- (B[k] + B[k - 1] + B[k + 1] + B[k - buf_stride] + B[k + buf_stride]) *
- 4 +
- (B[k - 1 - buf_stride] + B[k - 1 + buf_stride] +
- B[k + 1 - buf_stride] + B[k + 1 + buf_stride]) *
- 3;
- const int32_t v = a * dgd[l] + b;
- dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
- }
- }
-}
-
-int av1_selfguided_restoration_c(const uint8_t *dgd8, int width, int height,
- int dgd_stride, int32_t *flt0, int32_t *flt1,
- int flt_stride, int sgr_params_idx,
- int bit_depth, int highbd) {
- int32_t dgd32_[RESTORATION_PROC_UNIT_PELS];
- const int dgd32_stride = width + 2 * SGRPROJ_BORDER_HORZ;
- int32_t *dgd32 =
- dgd32_ + dgd32_stride * SGRPROJ_BORDER_VERT + SGRPROJ_BORDER_HORZ;
-
- if (highbd) {
- const uint16_t *dgd16 = CONVERT_TO_SHORTPTR(dgd8);
- for (int i = -SGRPROJ_BORDER_VERT; i < height + SGRPROJ_BORDER_VERT; ++i) {
- for (int j = -SGRPROJ_BORDER_HORZ; j < width + SGRPROJ_BORDER_HORZ; ++j) {
- dgd32[i * dgd32_stride + j] = dgd16[i * dgd_stride + j];
- }
- }
- } else {
- for (int i = -SGRPROJ_BORDER_VERT; i < height + SGRPROJ_BORDER_VERT; ++i) {
- for (int j = -SGRPROJ_BORDER_HORZ; j < width + SGRPROJ_BORDER_HORZ; ++j) {
- dgd32[i * dgd32_stride + j] = dgd8[i * dgd_stride + j];
- }
- }
- }
-
- const sgr_params_type *const params = &sgr_params[sgr_params_idx];
- // If params->r == 0 we skip the corresponding filter. We only allow one of
- // the radii to be 0, as having both equal to 0 would be equivalent to
- // skipping SGR entirely.
- assert(!(params->r[0] == 0 && params->r[1] == 0));
-
- if (params->r[0] > 0)
- selfguided_restoration_fast_internal(dgd32, width, height, dgd32_stride,
- flt0, flt_stride, bit_depth,
- sgr_params_idx, 0);
- if (params->r[1] > 0)
- selfguided_restoration_internal(dgd32, width, height, dgd32_stride, flt1,
- flt_stride, bit_depth, sgr_params_idx, 1);
- return 0;
-}
-
-void apply_selfguided_restoration_c(const uint8_t *dat8, int width, int height,
- int stride, int eps, const int *xqd,
- uint8_t *dst8, int dst_stride,
- int32_t *tmpbuf, int bit_depth,
- int highbd) {
- int32_t *flt0 = tmpbuf;
- int32_t *flt1 = flt0 + RESTORATION_UNITPELS_MAX;
- assert(width * height <= RESTORATION_UNITPELS_MAX);
-
- const int ret = av1_selfguided_restoration_c(
- dat8, width, height, stride, flt0, flt1, width, eps, bit_depth, highbd);
- (void)ret;
- assert(!ret);
- const sgr_params_type *const params = &sgr_params[eps];
- int xq[2];
- decode_xq(xqd, xq, params);
- for (int i = 0; i < height; ++i) {
- for (int j = 0; j < width; ++j) {
- const int k = i * width + j;
- uint8_t *dst8ij = dst8 + i * dst_stride + j;
- const uint8_t *dat8ij = dat8 + i * stride + j;
-
- const uint16_t pre_u = highbd ? *CONVERT_TO_SHORTPTR(dat8ij) : *dat8ij;
- const int32_t u = (int32_t)pre_u << SGRPROJ_RST_BITS;
- int32_t v = u << SGRPROJ_PRJ_BITS;
- // If params->r == 0 then we skipped the filtering in
- // av1_selfguided_restoration_c, i.e. flt[k] == u
- if (params->r[0] > 0) v += xq[0] * (flt0[k] - u);
- if (params->r[1] > 0) v += xq[1] * (flt1[k] - u);
- const int16_t w =
- (int16_t)ROUND_POWER_OF_TWO(v, SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS);
-
- const uint16_t out = clip_pixel_highbd(w, bit_depth);
- if (highbd)
- *CONVERT_TO_SHORTPTR(dst8ij) = out;
- else
- *dst8ij = (uint8_t)out;
- }
- }
-}
-
-static void sgrproj_filter_stripe(const RestorationUnitInfo *rui,
- int stripe_width, int stripe_height,
- int procunit_width, const uint8_t *src,
- int src_stride, uint8_t *dst, int dst_stride,
- int32_t *tmpbuf, int bit_depth) {
- (void)bit_depth;
- assert(bit_depth == 8);
-
- for (int j = 0; j < stripe_width; j += procunit_width) {
- int w = AOMMIN(procunit_width, stripe_width - j);
- apply_selfguided_restoration(src + j, w, stripe_height, src_stride,
- rui->sgrproj_info.ep, rui->sgrproj_info.xqd,
- dst + j, dst_stride, tmpbuf, bit_depth, 0);
- }
-}
-
-static void wiener_filter_stripe_highbd(const RestorationUnitInfo *rui,
- int stripe_width, int stripe_height,
- int procunit_width, const uint8_t *src8,
- int src_stride, uint8_t *dst8,
- int dst_stride, int32_t *tmpbuf,
- int bit_depth) {
- (void)tmpbuf;
- const ConvolveParams conv_params = get_conv_params_wiener(bit_depth);
-
- for (int j = 0; j < stripe_width; j += procunit_width) {
- int w = AOMMIN(procunit_width, (stripe_width - j + 15) & ~15);
- const uint8_t *src8_p = src8 + j;
- uint8_t *dst8_p = dst8 + j;
- av1_highbd_wiener_convolve_add_src(src8_p, src_stride, dst8_p, dst_stride,
- rui->wiener_info.hfilter, 16,
- rui->wiener_info.vfilter, 16, w,
- stripe_height, &conv_params, bit_depth);
- }
-}
-
-static void sgrproj_filter_stripe_highbd(const RestorationUnitInfo *rui,
- int stripe_width, int stripe_height,
- int procunit_width,
- const uint8_t *src8, int src_stride,
- uint8_t *dst8, int dst_stride,
- int32_t *tmpbuf, int bit_depth) {
- for (int j = 0; j < stripe_width; j += procunit_width) {
- int w = AOMMIN(procunit_width, stripe_width - j);
- apply_selfguided_restoration(src8 + j, w, stripe_height, src_stride,
- rui->sgrproj_info.ep, rui->sgrproj_info.xqd,
- dst8 + j, dst_stride, tmpbuf, bit_depth, 1);
- }
-}
-
-typedef void (*stripe_filter_fun)(const RestorationUnitInfo *rui,
- int stripe_width, int stripe_height,
- int procunit_width, const uint8_t *src,
- int src_stride, uint8_t *dst, int dst_stride,
- int32_t *tmpbuf, int bit_depth);
-
-#define NUM_STRIPE_FILTERS 4
-
-static const stripe_filter_fun stripe_filters[NUM_STRIPE_FILTERS] = {
- wiener_filter_stripe, sgrproj_filter_stripe, wiener_filter_stripe_highbd,
- sgrproj_filter_stripe_highbd
-};
-
-// Filter one restoration unit
-void av1_loop_restoration_filter_unit(
- const RestorationTileLimits *limits, const RestorationUnitInfo *rui,
- const RestorationStripeBoundaries *rsb, RestorationLineBuffers *rlbs,
- const AV1PixelRect *tile_rect, int tile_stripe0, int ss_x, int ss_y,
- int highbd, int bit_depth, uint8_t *data8, int stride, uint8_t *dst8,
- int dst_stride, int32_t *tmpbuf, int optimized_lr) {
- RestorationType unit_rtype = rui->restoration_type;
-
- int unit_h = limits->v_end - limits->v_start;
- int unit_w = limits->h_end - limits->h_start;
- uint8_t *data8_tl = data8 + limits->v_start * stride + limits->h_start;
- uint8_t *dst8_tl = dst8 + limits->v_start * dst_stride + limits->h_start;
-
- if (unit_rtype == RESTORE_NONE) {
- copy_tile(unit_w, unit_h, data8_tl, stride, dst8_tl, dst_stride, highbd);
- return;
- }
-
- const int filter_idx = 2 * highbd + (unit_rtype == RESTORE_SGRPROJ);
- assert(filter_idx < NUM_STRIPE_FILTERS);
- const stripe_filter_fun stripe_filter = stripe_filters[filter_idx];
-
- const int procunit_width = RESTORATION_PROC_UNIT_SIZE >> ss_x;
-
- // Convolve the whole tile one stripe at a time
- RestorationTileLimits remaining_stripes = *limits;
- int i = 0;
- while (i < unit_h) {
- int copy_above, copy_below;
- remaining_stripes.v_start = limits->v_start + i;
-
- get_stripe_boundary_info(&remaining_stripes, tile_rect, ss_y, &copy_above,
- &copy_below);
-
- const int full_stripe_height = RESTORATION_PROC_UNIT_SIZE >> ss_y;
- const int runit_offset = RESTORATION_UNIT_OFFSET >> ss_y;
-
- // Work out where this stripe's boundaries are within
- // rsb->stripe_boundary_{above,below}
- const int tile_stripe =
- (remaining_stripes.v_start - tile_rect->top + runit_offset) /
- full_stripe_height;
- const int frame_stripe = tile_stripe0 + tile_stripe;
- const int rsb_row = RESTORATION_CTX_VERT * frame_stripe;
-
- // Calculate this stripe's height, based on two rules:
- // * The topmost stripe in each tile is 8 luma pixels shorter than usual.
- // * We can't extend past the end of the current restoration unit
- const int nominal_stripe_height =
- full_stripe_height - ((tile_stripe == 0) ? runit_offset : 0);
- const int h = AOMMIN(nominal_stripe_height,
- remaining_stripes.v_end - remaining_stripes.v_start);
-
- setup_processing_stripe_boundary(&remaining_stripes, rsb, rsb_row, highbd,
- h, data8, stride, rlbs, copy_above,
- copy_below, optimized_lr);
-
- stripe_filter(rui, unit_w, h, procunit_width, data8_tl + i * stride, stride,
- dst8_tl + i * dst_stride, dst_stride, tmpbuf, bit_depth);
-
- restore_processing_stripe_boundary(&remaining_stripes, rlbs, highbd, h,
- data8, stride, copy_above, copy_below,
- optimized_lr);
-
- i += h;
- }
-}
-
-static void filter_frame_on_tile(int tile_row, int tile_col, void *priv,
- AV1_COMMON *cm) {
- (void)tile_col;
- FilterFrameCtxt *ctxt = (FilterFrameCtxt *)priv;
- ctxt->tile_stripe0 = (tile_row == 0) ? 0 : cm->rst_end_stripe[tile_row - 1];
-}
-
-static void filter_frame_on_unit(const RestorationTileLimits *limits,
- const AV1PixelRect *tile_rect,
- int rest_unit_idx, void *priv, int32_t *tmpbuf,
- RestorationLineBuffers *rlbs) {
- FilterFrameCtxt *ctxt = (FilterFrameCtxt *)priv;
- const RestorationInfo *rsi = ctxt->rsi;
-
- av1_loop_restoration_filter_unit(
- limits, &rsi->unit_info[rest_unit_idx], &rsi->boundaries, rlbs, tile_rect,
- ctxt->tile_stripe0, ctxt->ss_x, ctxt->ss_y, ctxt->highbd, ctxt->bit_depth,
- ctxt->data8, ctxt->data_stride, ctxt->dst8, ctxt->dst_stride, tmpbuf,
- rsi->optimized_lr);
-}
-
-void av1_loop_restoration_filter_frame_init(AV1LrStruct *lr_ctxt,
- YV12_BUFFER_CONFIG *frame,
- AV1_COMMON *cm, int optimized_lr,
- int num_planes) {
- const SequenceHeader *const seq_params = &cm->seq_params;
- const int bit_depth = seq_params->bit_depth;
- const int highbd = seq_params->use_highbitdepth;
- lr_ctxt->dst = &cm->rst_frame;
-
- const int frame_width = frame->crop_widths[0];
- const int frame_height = frame->crop_heights[0];
- if (aom_realloc_frame_buffer(
- lr_ctxt->dst, frame_width, frame_height, seq_params->subsampling_x,
- seq_params->subsampling_y, highbd, AOM_BORDER_IN_PIXELS,
- cm->byte_alignment, NULL, NULL, NULL) < 0)
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
- "Failed to allocate restoration dst buffer");
-
- lr_ctxt->on_rest_unit = filter_frame_on_unit;
- lr_ctxt->frame = frame;
- for (int plane = 0; plane < num_planes; ++plane) {
- RestorationInfo *rsi = &cm->rst_info[plane];
- RestorationType rtype = rsi->frame_restoration_type;
- rsi->optimized_lr = optimized_lr;
-
- if (rtype == RESTORE_NONE) {
- continue;
- }
-
- const int is_uv = plane > 0;
- const int plane_width = frame->crop_widths[is_uv];
- const int plane_height = frame->crop_heights[is_uv];
- FilterFrameCtxt *lr_plane_ctxt = &lr_ctxt->ctxt[plane];
-
- extend_frame(frame->buffers[plane], plane_width, plane_height,
- frame->strides[is_uv], RESTORATION_BORDER, RESTORATION_BORDER,
- highbd);
-
- lr_plane_ctxt->rsi = rsi;
- lr_plane_ctxt->ss_x = is_uv && seq_params->subsampling_x;
- lr_plane_ctxt->ss_y = is_uv && seq_params->subsampling_y;
- lr_plane_ctxt->highbd = highbd;
- lr_plane_ctxt->bit_depth = bit_depth;
- lr_plane_ctxt->data8 = frame->buffers[plane];
- lr_plane_ctxt->dst8 = lr_ctxt->dst->buffers[plane];
- lr_plane_ctxt->data_stride = frame->strides[is_uv];
- lr_plane_ctxt->dst_stride = lr_ctxt->dst->strides[is_uv];
- lr_plane_ctxt->tile_rect = av1_whole_frame_rect(cm, is_uv);
- filter_frame_on_tile(LR_TILE_ROW, LR_TILE_COL, lr_plane_ctxt, cm);
- }
-}
-
-void av1_loop_restoration_copy_planes(AV1LrStruct *loop_rest_ctxt,
- AV1_COMMON *cm, int num_planes) {
- typedef void (*copy_fun)(const YV12_BUFFER_CONFIG *src_ybc,
- YV12_BUFFER_CONFIG *dst_ybc, int hstart, int hend,
- int vstart, int vend);
- static const copy_fun copy_funs[3] = {
- aom_yv12_partial_copy_y, aom_yv12_partial_copy_u, aom_yv12_partial_copy_v
- };
-
- for (int plane = 0; plane < num_planes; ++plane) {
- if (cm->rst_info[plane].frame_restoration_type == RESTORE_NONE) continue;
- AV1PixelRect tile_rect = loop_rest_ctxt->ctxt[plane].tile_rect;
- copy_funs[plane](loop_rest_ctxt->dst, loop_rest_ctxt->frame, tile_rect.left,
- tile_rect.right, tile_rect.top, tile_rect.bottom);
- }
-}
-
-static void foreach_rest_unit_in_planes(AV1LrStruct *lr_ctxt, AV1_COMMON *cm,
- int num_planes) {
- FilterFrameCtxt *ctxt = lr_ctxt->ctxt;
-
- for (int plane = 0; plane < num_planes; ++plane) {
- if (cm->rst_info[plane].frame_restoration_type == RESTORE_NONE) {
- continue;
- }
-
- av1_foreach_rest_unit_in_plane(cm, plane, lr_ctxt->on_rest_unit,
- &ctxt[plane], &ctxt[plane].tile_rect,
- cm->rst_tmpbuf, cm->rlbs);
- }
-}
-
-void av1_loop_restoration_filter_frame(YV12_BUFFER_CONFIG *frame,
- AV1_COMMON *cm, int optimized_lr,
- void *lr_ctxt) {
- assert(!cm->all_lossless);
- const int num_planes = av1_num_planes(cm);
-
- AV1LrStruct *loop_rest_ctxt = (AV1LrStruct *)lr_ctxt;
-
- av1_loop_restoration_filter_frame_init(loop_rest_ctxt, frame, cm,
- optimized_lr, num_planes);
-
- foreach_rest_unit_in_planes(loop_rest_ctxt, cm, num_planes);
-
- av1_loop_restoration_copy_planes(loop_rest_ctxt, cm, num_planes);
-}
-
-void av1_foreach_rest_unit_in_row(
- RestorationTileLimits *limits, const AV1PixelRect *tile_rect,
- rest_unit_visitor_t on_rest_unit, int row_number, int unit_size,
- int unit_idx0, int hunits_per_tile, int vunits_per_tile, int plane,
- void *priv, int32_t *tmpbuf, RestorationLineBuffers *rlbs,
- sync_read_fn_t on_sync_read, sync_write_fn_t on_sync_write,
- struct AV1LrSyncData *const lr_sync) {
- const int tile_w = tile_rect->right - tile_rect->left;
- const int ext_size = unit_size * 3 / 2;
- int x0 = 0, j = 0;
- while (x0 < tile_w) {
- int remaining_w = tile_w - x0;
- int w = (remaining_w < ext_size) ? remaining_w : unit_size;
-
- limits->h_start = tile_rect->left + x0;
- limits->h_end = tile_rect->left + x0 + w;
- assert(limits->h_end <= tile_rect->right);
-
- const int unit_idx = unit_idx0 + row_number * hunits_per_tile + j;
-
- // No sync for even numbered rows
- // For odd numbered rows, Loop Restoration of current block requires the LR
- // of top-right and bottom-right blocks to be completed
-
- // top-right sync
- on_sync_read(lr_sync, row_number, j, plane);
- if ((row_number + 1) < vunits_per_tile)
- // bottom-right sync
- on_sync_read(lr_sync, row_number + 2, j, plane);
-
- on_rest_unit(limits, tile_rect, unit_idx, priv, tmpbuf, rlbs);
-
- on_sync_write(lr_sync, row_number, j, hunits_per_tile, plane);
-
- x0 += w;
- ++j;
- }
-}
-
-void av1_lr_sync_read_dummy(void *const lr_sync, int r, int c, int plane) {
- (void)lr_sync;
- (void)r;
- (void)c;
- (void)plane;
-}
-
-void av1_lr_sync_write_dummy(void *const lr_sync, int r, int c,
- const int sb_cols, int plane) {
- (void)lr_sync;
- (void)r;
- (void)c;
- (void)sb_cols;
- (void)plane;
-}
-
-static void foreach_rest_unit_in_tile(
- const AV1PixelRect *tile_rect, int tile_row, int tile_col, int tile_cols,
- int hunits_per_tile, int vunits_per_tile, int units_per_tile, int unit_size,
- int ss_y, int plane, rest_unit_visitor_t on_rest_unit, void *priv,
- int32_t *tmpbuf, RestorationLineBuffers *rlbs) {
- const int tile_h = tile_rect->bottom - tile_rect->top;
- const int ext_size = unit_size * 3 / 2;
-
- const int tile_idx = tile_col + tile_row * tile_cols;
- const int unit_idx0 = tile_idx * units_per_tile;
-
- int y0 = 0, i = 0;
- while (y0 < tile_h) {
- int remaining_h = tile_h - y0;
- int h = (remaining_h < ext_size) ? remaining_h : unit_size;
-
- RestorationTileLimits limits;
- limits.v_start = tile_rect->top + y0;
- limits.v_end = tile_rect->top + y0 + h;
- assert(limits.v_end <= tile_rect->bottom);
- // Offset the tile upwards to align with the restoration processing stripe
- const int voffset = RESTORATION_UNIT_OFFSET >> ss_y;
- limits.v_start = AOMMAX(tile_rect->top, limits.v_start - voffset);
- if (limits.v_end < tile_rect->bottom) limits.v_end -= voffset;
-
- av1_foreach_rest_unit_in_row(
- &limits, tile_rect, on_rest_unit, i, unit_size, unit_idx0,
- hunits_per_tile, vunits_per_tile, plane, priv, tmpbuf, rlbs,
- av1_lr_sync_read_dummy, av1_lr_sync_write_dummy, NULL);
-
- y0 += h;
- ++i;
- }
-}
-
-void av1_foreach_rest_unit_in_plane(const struct AV1Common *cm, int plane,
- rest_unit_visitor_t on_rest_unit,
- void *priv, AV1PixelRect *tile_rect,
- int32_t *tmpbuf,
- RestorationLineBuffers *rlbs) {
- const int is_uv = plane > 0;
- const int ss_y = is_uv && cm->seq_params.subsampling_y;
-
- const RestorationInfo *rsi = &cm->rst_info[plane];
-
- foreach_rest_unit_in_tile(tile_rect, LR_TILE_ROW, LR_TILE_COL, LR_TILE_COLS,
- rsi->horz_units_per_tile, rsi->vert_units_per_tile,
- rsi->units_per_tile, rsi->restoration_unit_size,
- ss_y, plane, on_rest_unit, priv, tmpbuf, rlbs);
-}
-
-int av1_loop_restoration_corners_in_sb(const struct AV1Common *cm, int plane,
- int mi_row, int mi_col, BLOCK_SIZE bsize,
- int *rcol0, int *rcol1, int *rrow0,
- int *rrow1) {
- assert(rcol0 && rcol1 && rrow0 && rrow1);
-
- if (bsize != cm->seq_params.sb_size) return 0;
- if (cm->rst_info[plane].frame_restoration_type == RESTORE_NONE) return 0;
-
- assert(!cm->all_lossless);
-
- const int is_uv = plane > 0;
-
- const AV1PixelRect tile_rect = av1_whole_frame_rect(cm, is_uv);
- const int tile_w = tile_rect.right - tile_rect.left;
- const int tile_h = tile_rect.bottom - tile_rect.top;
-
- const int mi_top = 0;
- const int mi_left = 0;
-
- // Compute the mi-unit corners of the superblock relative to the top-left of
- // the tile
- const int mi_rel_row0 = mi_row - mi_top;
- const int mi_rel_col0 = mi_col - mi_left;
- const int mi_rel_row1 = mi_rel_row0 + mi_size_high[bsize];
- const int mi_rel_col1 = mi_rel_col0 + mi_size_wide[bsize];
-
- const RestorationInfo *rsi = &cm->rst_info[plane];
- const int size = rsi->restoration_unit_size;
-
- // Calculate the number of restoration units in this tile (which might be
- // strictly less than rsi->horz_units_per_tile and rsi->vert_units_per_tile)
- const int horz_units = av1_lr_count_units_in_tile(size, tile_w);
- const int vert_units = av1_lr_count_units_in_tile(size, tile_h);
-
- // The size of an MI-unit on this plane of the image
- const int ss_x = is_uv && cm->seq_params.subsampling_x;
- const int ss_y = is_uv && cm->seq_params.subsampling_y;
- const int mi_size_x = MI_SIZE >> ss_x;
- const int mi_size_y = MI_SIZE >> ss_y;
-
- // Write m for the relative mi column or row, D for the superres denominator
- // and N for the superres numerator. If u is the upscaled pixel offset then
- // we can write the downscaled pixel offset in two ways as:
- //
- // MI_SIZE * m = N / D u
- //
- // from which we get u = D * MI_SIZE * m / N
- const int mi_to_num_x = av1_superres_scaled(cm)
- ? mi_size_x * cm->superres_scale_denominator
- : mi_size_x;
- const int mi_to_num_y = mi_size_y;
- const int denom_x = av1_superres_scaled(cm) ? size * SCALE_NUMERATOR : size;
- const int denom_y = size;
-
- const int rnd_x = denom_x - 1;
- const int rnd_y = denom_y - 1;
-
- // rcol0/rrow0 should be the first column/row of restoration units (relative
- // to the top-left of the tile) that doesn't start left/below of
- // mi_col/mi_row. For this calculation, we need to round up the division (if
- // the sb starts at runit column 10.1, the first matching runit has column
- // index 11)
- *rcol0 = (mi_rel_col0 * mi_to_num_x + rnd_x) / denom_x;
- *rrow0 = (mi_rel_row0 * mi_to_num_y + rnd_y) / denom_y;
-
- // rel_col1/rel_row1 is the equivalent calculation, but for the superblock
- // below-right. If we're at the bottom or right of the tile, this restoration
- // unit might not exist, in which case we'll clamp accordingly.
- *rcol1 = AOMMIN((mi_rel_col1 * mi_to_num_x + rnd_x) / denom_x, horz_units);
- *rrow1 = AOMMIN((mi_rel_row1 * mi_to_num_y + rnd_y) / denom_y, vert_units);
-
- return *rcol0 < *rcol1 && *rrow0 < *rrow1;
-}
-
-// Extend to left and right
-static void extend_lines(uint8_t *buf, int width, int height, int stride,
- int extend, int use_highbitdepth) {
- for (int i = 0; i < height; ++i) {
- if (use_highbitdepth) {
- uint16_t *buf16 = (uint16_t *)buf;
- aom_memset16(buf16 - extend, buf16[0], extend);
- aom_memset16(buf16 + width, buf16[width - 1], extend);
- } else {
- memset(buf - extend, buf[0], extend);
- memset(buf + width, buf[width - 1], extend);
- }
- buf += stride;
- }
-}
-
-static void save_deblock_boundary_lines(
- const YV12_BUFFER_CONFIG *frame, const AV1_COMMON *cm, int plane, int row,
- int stripe, int use_highbd, int is_above,
- RestorationStripeBoundaries *boundaries) {
- const int is_uv = plane > 0;
- const uint8_t *src_buf = REAL_PTR(use_highbd, frame->buffers[plane]);
- const int src_stride = frame->strides[is_uv] << use_highbd;
- const uint8_t *src_rows = src_buf + row * src_stride;
-
- uint8_t *bdry_buf = is_above ? boundaries->stripe_boundary_above
- : boundaries->stripe_boundary_below;
- uint8_t *bdry_start = bdry_buf + (RESTORATION_EXTRA_HORZ << use_highbd);
- const int bdry_stride = boundaries->stripe_boundary_stride << use_highbd;
- uint8_t *bdry_rows = bdry_start + RESTORATION_CTX_VERT * stripe * bdry_stride;
-
- // There is a rare case in which a processing stripe can end 1px above the
- // crop border. In this case, we do want to use deblocked pixels from below
- // the stripe (hence why we ended up in this function), but instead of
- // fetching 2 "below" rows we need to fetch one and duplicate it.
- // This is equivalent to clamping the sample locations against the crop border
- const int lines_to_save =
- AOMMIN(RESTORATION_CTX_VERT, frame->crop_heights[is_uv] - row);
- assert(lines_to_save == 1 || lines_to_save == 2);
-
- int upscaled_width;
- int line_bytes;
- if (av1_superres_scaled(cm)) {
- const int ss_x = is_uv && cm->seq_params.subsampling_x;
- upscaled_width = (cm->superres_upscaled_width + ss_x) >> ss_x;
- line_bytes = upscaled_width << use_highbd;
- if (use_highbd)
- av1_upscale_normative_rows(
- cm, CONVERT_TO_BYTEPTR(src_rows), frame->strides[is_uv],
- CONVERT_TO_BYTEPTR(bdry_rows), boundaries->stripe_boundary_stride,
- plane, lines_to_save);
- else
- av1_upscale_normative_rows(cm, src_rows, frame->strides[is_uv], bdry_rows,
- boundaries->stripe_boundary_stride, plane,
- lines_to_save);
- } else {
- upscaled_width = frame->crop_widths[is_uv];
- line_bytes = upscaled_width << use_highbd;
- for (int i = 0; i < lines_to_save; i++) {
- memcpy(bdry_rows + i * bdry_stride, src_rows + i * src_stride,
- line_bytes);
- }
- }
- // If we only saved one line, then copy it into the second line buffer
- if (lines_to_save == 1)
- memcpy(bdry_rows + bdry_stride, bdry_rows, line_bytes);
-
- extend_lines(bdry_rows, upscaled_width, RESTORATION_CTX_VERT, bdry_stride,
- RESTORATION_EXTRA_HORZ, use_highbd);
-}
-
-static void save_cdef_boundary_lines(const YV12_BUFFER_CONFIG *frame,
- const AV1_COMMON *cm, int plane, int row,
- int stripe, int use_highbd, int is_above,
- RestorationStripeBoundaries *boundaries) {
- const int is_uv = plane > 0;
- const uint8_t *src_buf = REAL_PTR(use_highbd, frame->buffers[plane]);
- const int src_stride = frame->strides[is_uv] << use_highbd;
- const uint8_t *src_rows = src_buf + row * src_stride;
-
- uint8_t *bdry_buf = is_above ? boundaries->stripe_boundary_above
- : boundaries->stripe_boundary_below;
- uint8_t *bdry_start = bdry_buf + (RESTORATION_EXTRA_HORZ << use_highbd);
- const int bdry_stride = boundaries->stripe_boundary_stride << use_highbd;
- uint8_t *bdry_rows = bdry_start + RESTORATION_CTX_VERT * stripe * bdry_stride;
- const int src_width = frame->crop_widths[is_uv];
-
- // At the point where this function is called, we've already applied
- // superres. So we don't need to extend the lines here, we can just
- // pull directly from the topmost row of the upscaled frame.
- const int ss_x = is_uv && cm->seq_params.subsampling_x;
- const int upscaled_width = av1_superres_scaled(cm)
- ? (cm->superres_upscaled_width + ss_x) >> ss_x
- : src_width;
- const int line_bytes = upscaled_width << use_highbd;
- for (int i = 0; i < RESTORATION_CTX_VERT; i++) {
- // Copy the line at 'row' into both context lines. This is because
- // we want to (effectively) extend the outermost row of CDEF data
- // from this tile to produce a border, rather than using deblocked
- // pixels from the tile above/below.
- memcpy(bdry_rows + i * bdry_stride, src_rows, line_bytes);
- }
- extend_lines(bdry_rows, upscaled_width, RESTORATION_CTX_VERT, bdry_stride,
- RESTORATION_EXTRA_HORZ, use_highbd);
-}
-
-static void save_tile_row_boundary_lines(const YV12_BUFFER_CONFIG *frame,
- int use_highbd, int plane,
- AV1_COMMON *cm, int after_cdef) {
- const int is_uv = plane > 0;
- const int ss_y = is_uv && cm->seq_params.subsampling_y;
- const int stripe_height = RESTORATION_PROC_UNIT_SIZE >> ss_y;
- const int stripe_off = RESTORATION_UNIT_OFFSET >> ss_y;
-
- // Get the tile rectangle, with height rounded up to the next multiple of 8
- // luma pixels (only relevant for the bottom tile of the frame)
- const AV1PixelRect tile_rect = av1_whole_frame_rect(cm, is_uv);
- const int stripe0 = 0;
-
- RestorationStripeBoundaries *boundaries = &cm->rst_info[plane].boundaries;
-
- const int plane_height = ROUND_POWER_OF_TWO(cm->height, ss_y);
-
- int tile_stripe;
- for (tile_stripe = 0;; ++tile_stripe) {
- const int rel_y0 = AOMMAX(0, tile_stripe * stripe_height - stripe_off);
- const int y0 = tile_rect.top + rel_y0;
- if (y0 >= tile_rect.bottom) break;
-
- const int rel_y1 = (tile_stripe + 1) * stripe_height - stripe_off;
- const int y1 = AOMMIN(tile_rect.top + rel_y1, tile_rect.bottom);
-
- const int frame_stripe = stripe0 + tile_stripe;
-
- // In this case, we should only use CDEF pixels at the top
- // and bottom of the frame as a whole; internal tile boundaries
- // can use deblocked pixels from adjacent tiles for context.
- const int use_deblock_above = (frame_stripe > 0);
- const int use_deblock_below = (y1 < plane_height);
-
- if (!after_cdef) {
- // Save deblocked context where needed.
- if (use_deblock_above) {
- save_deblock_boundary_lines(frame, cm, plane, y0 - RESTORATION_CTX_VERT,
- frame_stripe, use_highbd, 1, boundaries);
- }
- if (use_deblock_below) {
- save_deblock_boundary_lines(frame, cm, plane, y1, frame_stripe,
- use_highbd, 0, boundaries);
- }
- } else {
- // Save CDEF context where needed. Note that we need to save the CDEF
- // context for a particular boundary iff we *didn't* save deblocked
- // context for that boundary.
- //
- // In addition, we need to save copies of the outermost line within
- // the tile, rather than using data from outside the tile.
- if (!use_deblock_above) {
- save_cdef_boundary_lines(frame, cm, plane, y0, frame_stripe, use_highbd,
- 1, boundaries);
- }
- if (!use_deblock_below) {
- save_cdef_boundary_lines(frame, cm, plane, y1 - 1, frame_stripe,
- use_highbd, 0, boundaries);
- }
- }
- }
-}
-
-// For each RESTORATION_PROC_UNIT_SIZE pixel high stripe, save 4 scan
-// lines to be used as boundary in the loop restoration process. The
-// lines are saved in rst_internal.stripe_boundary_lines
-void av1_loop_restoration_save_boundary_lines(const YV12_BUFFER_CONFIG *frame,
- AV1_COMMON *cm, int after_cdef) {
- const int num_planes = av1_num_planes(cm);
- const int use_highbd = cm->seq_params.use_highbitdepth;
- for (int p = 0; p < num_planes; ++p) {
- save_tile_row_boundary_lines(frame, use_highbd, p, cm, after_cdef);
- }
-}
diff --git a/third_party/aom/av1/common/restoration.h b/third_party/aom/av1/common/restoration.h
deleted file mode 100644
index d834f9270..000000000
--- a/third_party/aom/av1/common/restoration.h
+++ /dev/null
@@ -1,377 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_COMMON_RESTORATION_H_
-#define AOM_AV1_COMMON_RESTORATION_H_
-
-#include "aom_ports/mem.h"
-#include "config/aom_config.h"
-
-#include "av1/common/blockd.h"
-#include "av1/common/enums.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define CLIP(x, lo, hi) ((x) < (lo) ? (lo) : (x) > (hi) ? (hi) : (x))
-#define RINT(x) ((x) < 0 ? (int)((x)-0.5) : (int)((x) + 0.5))
-
-#define RESTORATION_PROC_UNIT_SIZE 64
-
-// Filter tile grid offset upwards compared to the superblock grid
-#define RESTORATION_UNIT_OFFSET 8
-
-#define SGRPROJ_BORDER_VERT 3 // Vertical border used for Sgr
-#define SGRPROJ_BORDER_HORZ 3 // Horizontal border used for Sgr
-
-#define WIENER_BORDER_VERT 2 // Vertical border used for Wiener
-#define WIENER_HALFWIN 3
-#define WIENER_BORDER_HORZ (WIENER_HALFWIN) // Horizontal border for Wiener
-
-// RESTORATION_BORDER_VERT determines line buffer requirement for LR.
-// Should be set at the max of SGRPROJ_BORDER_VERT and WIENER_BORDER_VERT.
-// Note the line buffer needed is twice the value of this macro.
-#if SGRPROJ_BORDER_VERT >= WIENER_BORDER_VERT
-#define RESTORATION_BORDER_VERT (SGRPROJ_BORDER_VERT)
-#else
-#define RESTORATION_BORDER_VERT (WIENER_BORDER_VERT)
-#endif // SGRPROJ_BORDER_VERT >= WIENER_BORDER_VERT
-
-#if SGRPROJ_BORDER_HORZ >= WIENER_BORDER_HORZ
-#define RESTORATION_BORDER_HORZ (SGRPROJ_BORDER_HORZ)
-#else
-#define RESTORATION_BORDER_HORZ (WIENER_BORDER_HORZ)
-#endif // SGRPROJ_BORDER_VERT >= WIENER_BORDER_VERT
-
-// How many border pixels do we need for each processing unit?
-#define RESTORATION_BORDER 3
-
-// How many rows of deblocked pixels do we save above/below each processing
-// stripe?
-#define RESTORATION_CTX_VERT 2
-
-// Additional pixels to the left and right in above/below buffers
-// It is RESTORATION_BORDER_HORZ rounded up to get nicer buffer alignment
-#define RESTORATION_EXTRA_HORZ 4
-
-// Pad up to 20 more (may be much less is needed)
-#define RESTORATION_PADDING 20
-#define RESTORATION_PROC_UNIT_PELS \
- ((RESTORATION_PROC_UNIT_SIZE + RESTORATION_BORDER_HORZ * 2 + \
- RESTORATION_PADDING) * \
- (RESTORATION_PROC_UNIT_SIZE + RESTORATION_BORDER_VERT * 2 + \
- RESTORATION_PADDING))
-
-#define RESTORATION_UNITSIZE_MAX 256
-#define RESTORATION_UNITPELS_HORZ_MAX \
- (RESTORATION_UNITSIZE_MAX * 3 / 2 + 2 * RESTORATION_BORDER_HORZ + 16)
-#define RESTORATION_UNITPELS_VERT_MAX \
- ((RESTORATION_UNITSIZE_MAX * 3 / 2 + 2 * RESTORATION_BORDER_VERT + \
- RESTORATION_UNIT_OFFSET))
-#define RESTORATION_UNITPELS_MAX \
- (RESTORATION_UNITPELS_HORZ_MAX * RESTORATION_UNITPELS_VERT_MAX)
-
-// Two 32-bit buffers needed for the restored versions from two filters
-// TODO(debargha, rupert): Refactor to not need the large tilesize to be stored
-// on the decoder side.
-#define SGRPROJ_TMPBUF_SIZE (RESTORATION_UNITPELS_MAX * 2 * sizeof(int32_t))
-
-#define SGRPROJ_EXTBUF_SIZE (0)
-#define SGRPROJ_PARAMS_BITS 4
-#define SGRPROJ_PARAMS (1 << SGRPROJ_PARAMS_BITS)
-
-// Precision bits for projection
-#define SGRPROJ_PRJ_BITS 7
-// Restoration precision bits generated higher than source before projection
-#define SGRPROJ_RST_BITS 4
-// Internal precision bits for core selfguided_restoration
-#define SGRPROJ_SGR_BITS 8
-#define SGRPROJ_SGR (1 << SGRPROJ_SGR_BITS)
-
-#define SGRPROJ_PRJ_MIN0 (-(1 << SGRPROJ_PRJ_BITS) * 3 / 4)
-#define SGRPROJ_PRJ_MAX0 (SGRPROJ_PRJ_MIN0 + (1 << SGRPROJ_PRJ_BITS) - 1)
-#define SGRPROJ_PRJ_MIN1 (-(1 << SGRPROJ_PRJ_BITS) / 4)
-#define SGRPROJ_PRJ_MAX1 (SGRPROJ_PRJ_MIN1 + (1 << SGRPROJ_PRJ_BITS) - 1)
-
-#define SGRPROJ_PRJ_SUBEXP_K 4
-
-#define SGRPROJ_BITS (SGRPROJ_PRJ_BITS * 2 + SGRPROJ_PARAMS_BITS)
-
-#define MAX_RADIUS 2 // Only 1, 2, 3 allowed
-#define MAX_NELEM ((2 * MAX_RADIUS + 1) * (2 * MAX_RADIUS + 1))
-#define SGRPROJ_MTABLE_BITS 20
-#define SGRPROJ_RECIP_BITS 12
-
-#define WIENER_HALFWIN1 (WIENER_HALFWIN + 1)
-#define WIENER_WIN (2 * WIENER_HALFWIN + 1)
-#define WIENER_WIN2 ((WIENER_WIN) * (WIENER_WIN))
-#define WIENER_TMPBUF_SIZE (0)
-#define WIENER_EXTBUF_SIZE (0)
-
-// If WIENER_WIN_CHROMA == WIENER_WIN - 2, that implies 5x5 filters are used for
-// chroma. To use 7x7 for chroma set WIENER_WIN_CHROMA to WIENER_WIN.
-#define WIENER_WIN_CHROMA (WIENER_WIN - 2)
-#define WIENER_WIN2_CHROMA ((WIENER_WIN_CHROMA) * (WIENER_WIN_CHROMA))
-
-#define WIENER_FILT_PREC_BITS 7
-#define WIENER_FILT_STEP (1 << WIENER_FILT_PREC_BITS)
-
-// Central values for the taps
-#define WIENER_FILT_TAP0_MIDV (3)
-#define WIENER_FILT_TAP1_MIDV (-7)
-#define WIENER_FILT_TAP2_MIDV (15)
-#define WIENER_FILT_TAP3_MIDV \
- (WIENER_FILT_STEP - 2 * (WIENER_FILT_TAP0_MIDV + WIENER_FILT_TAP1_MIDV + \
- WIENER_FILT_TAP2_MIDV))
-
-#define WIENER_FILT_TAP0_BITS 4
-#define WIENER_FILT_TAP1_BITS 5
-#define WIENER_FILT_TAP2_BITS 6
-
-#define WIENER_FILT_BITS \
- ((WIENER_FILT_TAP0_BITS + WIENER_FILT_TAP1_BITS + WIENER_FILT_TAP2_BITS) * 2)
-
-#define WIENER_FILT_TAP0_MINV \
- (WIENER_FILT_TAP0_MIDV - (1 << WIENER_FILT_TAP0_BITS) / 2)
-#define WIENER_FILT_TAP1_MINV \
- (WIENER_FILT_TAP1_MIDV - (1 << WIENER_FILT_TAP1_BITS) / 2)
-#define WIENER_FILT_TAP2_MINV \
- (WIENER_FILT_TAP2_MIDV - (1 << WIENER_FILT_TAP2_BITS) / 2)
-
-#define WIENER_FILT_TAP0_MAXV \
- (WIENER_FILT_TAP0_MIDV - 1 + (1 << WIENER_FILT_TAP0_BITS) / 2)
-#define WIENER_FILT_TAP1_MAXV \
- (WIENER_FILT_TAP1_MIDV - 1 + (1 << WIENER_FILT_TAP1_BITS) / 2)
-#define WIENER_FILT_TAP2_MAXV \
- (WIENER_FILT_TAP2_MIDV - 1 + (1 << WIENER_FILT_TAP2_BITS) / 2)
-
-#define WIENER_FILT_TAP0_SUBEXP_K 1
-#define WIENER_FILT_TAP1_SUBEXP_K 2
-#define WIENER_FILT_TAP2_SUBEXP_K 3
-
-// Max of SGRPROJ_TMPBUF_SIZE, DOMAINTXFMRF_TMPBUF_SIZE, WIENER_TMPBUF_SIZE
-#define RESTORATION_TMPBUF_SIZE (SGRPROJ_TMPBUF_SIZE)
-
-// Max of SGRPROJ_EXTBUF_SIZE, WIENER_EXTBUF_SIZE
-#define RESTORATION_EXTBUF_SIZE (WIENER_EXTBUF_SIZE)
-
-// Check the assumptions of the existing code
-#if SUBPEL_TAPS != WIENER_WIN + 1
-#error "Wiener filter currently only works if SUBPEL_TAPS == WIENER_WIN + 1"
-#endif
-#if WIENER_FILT_PREC_BITS != 7
-#error "Wiener filter currently only works if WIENER_FILT_PREC_BITS == 7"
-#endif
-
-#define LR_TILE_ROW 0
-#define LR_TILE_COL 0
-#define LR_TILE_COLS 1
-
-typedef struct {
- int r[2]; // radii
- int s[2]; // sgr parameters for r[0] and r[1], based on GenSgrprojVtable()
-} sgr_params_type;
-
-typedef struct {
- RestorationType restoration_type;
- WienerInfo wiener_info;
- SgrprojInfo sgrproj_info;
-} RestorationUnitInfo;
-
-// A restoration line buffer needs space for two lines plus a horizontal filter
-// margin of RESTORATION_EXTRA_HORZ on each side.
-#define RESTORATION_LINEBUFFER_WIDTH \
- (RESTORATION_UNITSIZE_MAX * 3 / 2 + 2 * RESTORATION_EXTRA_HORZ)
-
-// Similarly, the column buffers (used when we're at a vertical tile edge
-// that we can't filter across) need space for one processing unit's worth
-// of pixels, plus the top/bottom border width
-#define RESTORATION_COLBUFFER_HEIGHT \
- (RESTORATION_PROC_UNIT_SIZE + 2 * RESTORATION_BORDER)
-
-typedef struct {
- // Temporary buffers to save/restore 3 lines above/below the restoration
- // stripe.
- uint16_t tmp_save_above[RESTORATION_BORDER][RESTORATION_LINEBUFFER_WIDTH];
- uint16_t tmp_save_below[RESTORATION_BORDER][RESTORATION_LINEBUFFER_WIDTH];
-} RestorationLineBuffers;
-
-typedef struct {
- uint8_t *stripe_boundary_above;
- uint8_t *stripe_boundary_below;
- int stripe_boundary_stride;
- int stripe_boundary_size;
-} RestorationStripeBoundaries;
-
-typedef struct {
- RestorationType frame_restoration_type;
- int restoration_unit_size;
-
- // Fields below here are allocated and initialised by
- // av1_alloc_restoration_struct. (horz_)units_per_tile give the number of
- // restoration units in (one row of) the largest tile in the frame. The data
- // in unit_info is laid out with units_per_tile entries for each tile, which
- // have stride horz_units_per_tile.
- //
- // Even if there are tiles of different sizes, the data in unit_info is laid
- // out as if all tiles are of full size.
- int units_per_tile;
- int vert_units_per_tile, horz_units_per_tile;
- RestorationUnitInfo *unit_info;
- RestorationStripeBoundaries boundaries;
- int optimized_lr;
-} RestorationInfo;
-
-static INLINE void set_default_sgrproj(SgrprojInfo *sgrproj_info) {
- sgrproj_info->xqd[0] = (SGRPROJ_PRJ_MIN0 + SGRPROJ_PRJ_MAX0) / 2;
- sgrproj_info->xqd[1] = (SGRPROJ_PRJ_MIN1 + SGRPROJ_PRJ_MAX1) / 2;
-}
-
-static INLINE void set_default_wiener(WienerInfo *wiener_info) {
- wiener_info->vfilter[0] = wiener_info->hfilter[0] = WIENER_FILT_TAP0_MIDV;
- wiener_info->vfilter[1] = wiener_info->hfilter[1] = WIENER_FILT_TAP1_MIDV;
- wiener_info->vfilter[2] = wiener_info->hfilter[2] = WIENER_FILT_TAP2_MIDV;
- wiener_info->vfilter[WIENER_HALFWIN] = wiener_info->hfilter[WIENER_HALFWIN] =
- -2 *
- (WIENER_FILT_TAP2_MIDV + WIENER_FILT_TAP1_MIDV + WIENER_FILT_TAP0_MIDV);
- wiener_info->vfilter[4] = wiener_info->hfilter[4] = WIENER_FILT_TAP2_MIDV;
- wiener_info->vfilter[5] = wiener_info->hfilter[5] = WIENER_FILT_TAP1_MIDV;
- wiener_info->vfilter[6] = wiener_info->hfilter[6] = WIENER_FILT_TAP0_MIDV;
-}
-
-typedef struct {
- int h_start, h_end, v_start, v_end;
-} RestorationTileLimits;
-
-typedef void (*rest_unit_visitor_t)(const RestorationTileLimits *limits,
- const AV1PixelRect *tile_rect,
- int rest_unit_idx, void *priv,
- int32_t *tmpbuf,
- RestorationLineBuffers *rlbs);
-
-typedef struct FilterFrameCtxt {
- const RestorationInfo *rsi;
- int tile_stripe0;
- int ss_x, ss_y;
- int highbd, bit_depth;
- uint8_t *data8, *dst8;
- int data_stride, dst_stride;
- AV1PixelRect tile_rect;
-} FilterFrameCtxt;
-
-typedef struct AV1LrStruct {
- rest_unit_visitor_t on_rest_unit;
- FilterFrameCtxt ctxt[MAX_MB_PLANE];
- YV12_BUFFER_CONFIG *frame;
- YV12_BUFFER_CONFIG *dst;
-} AV1LrStruct;
-
-extern const sgr_params_type sgr_params[SGRPROJ_PARAMS];
-extern int sgrproj_mtable[SGRPROJ_PARAMS][2];
-extern const int32_t x_by_xplus1[256];
-extern const int32_t one_by_x[MAX_NELEM];
-
-void av1_alloc_restoration_struct(struct AV1Common *cm, RestorationInfo *rsi,
- int is_uv);
-void av1_free_restoration_struct(RestorationInfo *rst_info);
-
-void extend_frame(uint8_t *data, int width, int height, int stride,
- int border_horz, int border_vert, int highbd);
-void decode_xq(const int *xqd, int *xq, const sgr_params_type *params);
-
-// Filter a single loop restoration unit.
-//
-// limits is the limits of the unit. rui gives the mode to use for this unit
-// and its coefficients. If striped loop restoration is enabled, rsb contains
-// deblocked pixels to use for stripe boundaries; rlbs is just some space to
-// use as a scratch buffer. tile_rect gives the limits of the tile containing
-// this unit. tile_stripe0 is the index of the first stripe in this tile.
-//
-// ss_x and ss_y are flags which should be 1 if this is a plane with
-// horizontal/vertical subsampling, respectively. highbd is a flag which should
-// be 1 in high bit depth mode, in which case bit_depth is the bit depth.
-//
-// data8 is the frame data (pointing at the top-left corner of the frame, not
-// the restoration unit) and stride is its stride. dst8 is the buffer where the
-// results will be written and has stride dst_stride. Like data8, dst8 should
-// point at the top-left corner of the frame.
-//
-// Finally tmpbuf is a scratch buffer used by the sgrproj filter which should
-// be at least SGRPROJ_TMPBUF_SIZE big.
-void av1_loop_restoration_filter_unit(
- const RestorationTileLimits *limits, const RestorationUnitInfo *rui,
- const RestorationStripeBoundaries *rsb, RestorationLineBuffers *rlbs,
- const AV1PixelRect *tile_rect, int tile_stripe0, int ss_x, int ss_y,
- int highbd, int bit_depth, uint8_t *data8, int stride, uint8_t *dst8,
- int dst_stride, int32_t *tmpbuf, int optimized_lr);
-
-void av1_loop_restoration_filter_frame(YV12_BUFFER_CONFIG *frame,
- struct AV1Common *cm, int optimized_lr,
- void *lr_ctxt);
-void av1_loop_restoration_precal();
-
-typedef void (*rest_tile_start_visitor_t)(int tile_row, int tile_col,
- void *priv);
-struct AV1LrSyncData;
-
-typedef void (*sync_read_fn_t)(void *const lr_sync, int r, int c, int plane);
-
-typedef void (*sync_write_fn_t)(void *const lr_sync, int r, int c,
- const int sb_cols, int plane);
-
-// Call on_rest_unit for each loop restoration unit in the plane.
-void av1_foreach_rest_unit_in_plane(const struct AV1Common *cm, int plane,
- rest_unit_visitor_t on_rest_unit,
- void *priv, AV1PixelRect *tile_rect,
- int32_t *tmpbuf,
- RestorationLineBuffers *rlbs);
-
-// Return 1 iff the block at mi_row, mi_col with size bsize is a
-// top-level superblock containing the top-left corner of at least one
-// loop restoration unit.
-//
-// If the block is a top-level superblock, the function writes to
-// *rcol0, *rcol1, *rrow0, *rrow1. The rectangle of restoration unit
-// indices given by [*rcol0, *rcol1) x [*rrow0, *rrow1) are relative
-// to the current tile, whose starting index is returned as
-// *tile_tl_idx.
-int av1_loop_restoration_corners_in_sb(const struct AV1Common *cm, int plane,
- int mi_row, int mi_col, BLOCK_SIZE bsize,
- int *rcol0, int *rcol1, int *rrow0,
- int *rrow1);
-
-void av1_loop_restoration_save_boundary_lines(const YV12_BUFFER_CONFIG *frame,
- struct AV1Common *cm,
- int after_cdef);
-void av1_loop_restoration_filter_frame_init(AV1LrStruct *lr_ctxt,
- YV12_BUFFER_CONFIG *frame,
- struct AV1Common *cm,
- int optimized_lr, int num_planes);
-void av1_loop_restoration_copy_planes(AV1LrStruct *loop_rest_ctxt,
- struct AV1Common *cm, int num_planes);
-void av1_foreach_rest_unit_in_row(
- RestorationTileLimits *limits, const AV1PixelRect *tile_rect,
- rest_unit_visitor_t on_rest_unit, int row_number, int unit_size,
- int unit_idx0, int hunits_per_tile, int vunits_per_tile, int plane,
- void *priv, int32_t *tmpbuf, RestorationLineBuffers *rlbs,
- sync_read_fn_t on_sync_read, sync_write_fn_t on_sync_write,
- struct AV1LrSyncData *const lr_sync);
-AV1PixelRect av1_whole_frame_rect(const struct AV1Common *cm, int is_uv);
-int av1_lr_count_units_in_tile(int unit_size, int tile_size);
-void av1_lr_sync_read_dummy(void *const lr_sync, int r, int c, int plane);
-void av1_lr_sync_write_dummy(void *const lr_sync, int r, int c,
- const int sb_cols, int plane);
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_COMMON_RESTORATION_H_
diff --git a/third_party/aom/av1/common/scale.c b/third_party/aom/av1/common/scale.c
deleted file mode 100644
index c525fe229..000000000
--- a/third_party/aom/av1/common/scale.c
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "config/aom_dsp_rtcd.h"
-#include "config/av1_rtcd.h"
-
-#include "av1/common/filter.h"
-#include "av1/common/scale.h"
-#include "aom_dsp/aom_filter.h"
-
-// Note: Expect val to be in q4 precision
-static INLINE int scaled_x(int val, const struct scale_factors *sf) {
- const int off =
- (sf->x_scale_fp - (1 << REF_SCALE_SHIFT)) * (1 << (SUBPEL_BITS - 1));
- const int64_t tval = (int64_t)val * sf->x_scale_fp + off;
- return (int)ROUND_POWER_OF_TWO_SIGNED_64(tval,
- REF_SCALE_SHIFT - SCALE_EXTRA_BITS);
-}
-
-// Note: Expect val to be in q4 precision
-static INLINE int scaled_y(int val, const struct scale_factors *sf) {
- const int off =
- (sf->y_scale_fp - (1 << REF_SCALE_SHIFT)) * (1 << (SUBPEL_BITS - 1));
- const int64_t tval = (int64_t)val * sf->y_scale_fp + off;
- return (int)ROUND_POWER_OF_TWO_SIGNED_64(tval,
- REF_SCALE_SHIFT - SCALE_EXTRA_BITS);
-}
-
-// Note: Expect val to be in q4 precision
-static int unscaled_value(int val, const struct scale_factors *sf) {
- (void)sf;
- return val << SCALE_EXTRA_BITS;
-}
-
-static int get_fixed_point_scale_factor(int other_size, int this_size) {
- // Calculate scaling factor once for each reference frame
- // and use fixed point scaling factors in decoding and encoding routines.
- // Hardware implementations can calculate scale factor in device driver
- // and use multiplication and shifting on hardware instead of division.
- return ((other_size << REF_SCALE_SHIFT) + this_size / 2) / this_size;
-}
-
-// Given the fixed point scale, calculate coarse point scale.
-static int fixed_point_scale_to_coarse_point_scale(int scale_fp) {
- return ROUND_POWER_OF_TWO(scale_fp, REF_SCALE_SHIFT - SCALE_SUBPEL_BITS);
-}
-
-// Note: x and y are integer precision, mvq4 is q4 precision.
-MV32 av1_scale_mv(const MV *mvq4, int x, int y,
- const struct scale_factors *sf) {
- const int x_off_q4 = scaled_x(x << SUBPEL_BITS, sf);
- const int y_off_q4 = scaled_y(y << SUBPEL_BITS, sf);
- const MV32 res = { scaled_y((y << SUBPEL_BITS) + mvq4->row, sf) - y_off_q4,
- scaled_x((x << SUBPEL_BITS) + mvq4->col, sf) - x_off_q4 };
- return res;
-}
-
-void av1_setup_scale_factors_for_frame(struct scale_factors *sf, int other_w,
- int other_h, int this_w, int this_h) {
- if (!valid_ref_frame_size(other_w, other_h, this_w, this_h)) {
- sf->x_scale_fp = REF_INVALID_SCALE;
- sf->y_scale_fp = REF_INVALID_SCALE;
- return;
- }
-
- sf->x_scale_fp = get_fixed_point_scale_factor(other_w, this_w);
- sf->y_scale_fp = get_fixed_point_scale_factor(other_h, this_h);
-
- sf->x_step_q4 = fixed_point_scale_to_coarse_point_scale(sf->x_scale_fp);
- sf->y_step_q4 = fixed_point_scale_to_coarse_point_scale(sf->y_scale_fp);
-
- if (av1_is_scaled(sf)) {
- sf->scale_value_x = scaled_x;
- sf->scale_value_y = scaled_y;
- } else {
- sf->scale_value_x = unscaled_value;
- sf->scale_value_y = unscaled_value;
- }
-
- // AV1 convolve functions
- // Special case convolve functions should produce the same result as
- // av1_convolve_2d.
- // subpel_x_q4 == 0 && subpel_y_q4 == 0
- sf->convolve[0][0][0] = av1_convolve_2d_copy_sr;
- // subpel_x_q4 == 0
- sf->convolve[0][1][0] = av1_convolve_y_sr;
- // subpel_y_q4 == 0
- sf->convolve[1][0][0] = av1_convolve_x_sr;
- // subpel_x_q4 != 0 && subpel_y_q4 != 0
- sf->convolve[1][1][0] = av1_convolve_2d_sr;
- // subpel_x_q4 == 0 && subpel_y_q4 == 0
- sf->convolve[0][0][1] = av1_jnt_convolve_2d_copy;
- // subpel_x_q4 == 0
- sf->convolve[0][1][1] = av1_jnt_convolve_y;
- // subpel_y_q4 == 0
- sf->convolve[1][0][1] = av1_jnt_convolve_x;
- // subpel_x_q4 != 0 && subpel_y_q4 != 0
- sf->convolve[1][1][1] = av1_jnt_convolve_2d;
- // AV1 High BD convolve functions
- // Special case convolve functions should produce the same result as
- // av1_highbd_convolve_2d.
- // subpel_x_q4 == 0 && subpel_y_q4 == 0
- sf->highbd_convolve[0][0][0] = av1_highbd_convolve_2d_copy_sr;
- // subpel_x_q4 == 0
- sf->highbd_convolve[0][1][0] = av1_highbd_convolve_y_sr;
- // subpel_y_q4 == 0
- sf->highbd_convolve[1][0][0] = av1_highbd_convolve_x_sr;
- // subpel_x_q4 != 0 && subpel_y_q4 != 0
- sf->highbd_convolve[1][1][0] = av1_highbd_convolve_2d_sr;
- // subpel_x_q4 == 0 && subpel_y_q4 == 0
- sf->highbd_convolve[0][0][1] = av1_highbd_jnt_convolve_2d_copy;
- // subpel_x_q4 == 0
- sf->highbd_convolve[0][1][1] = av1_highbd_jnt_convolve_y;
- // subpel_y_q4 == 0
- sf->highbd_convolve[1][0][1] = av1_highbd_jnt_convolve_x;
- // subpel_x_q4 != 0 && subpel_y_q4 != 0
- sf->highbd_convolve[1][1][1] = av1_highbd_jnt_convolve_2d;
-}
diff --git a/third_party/aom/av1/common/scale.h b/third_party/aom/av1/common/scale.h
deleted file mode 100644
index 748e958c3..000000000
--- a/third_party/aom/av1/common/scale.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_COMMON_SCALE_H_
-#define AOM_AV1_COMMON_SCALE_H_
-
-#include "av1/common/convolve.h"
-#include "av1/common/mv.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define SCALE_NUMERATOR 8
-
-#define REF_SCALE_SHIFT 14
-#define REF_NO_SCALE (1 << REF_SCALE_SHIFT)
-#define REF_INVALID_SCALE -1
-
-struct scale_factors {
- int x_scale_fp; // horizontal fixed point scale factor
- int y_scale_fp; // vertical fixed point scale factor
- int x_step_q4;
- int y_step_q4;
-
- int (*scale_value_x)(int val, const struct scale_factors *sf);
- int (*scale_value_y)(int val, const struct scale_factors *sf);
-
- // convolve_fn_ptr[subpel_x != 0][subpel_y != 0][is_compound]
- aom_convolve_fn_t convolve[2][2][2];
- aom_highbd_convolve_fn_t highbd_convolve[2][2][2];
-};
-
-MV32 av1_scale_mv(const MV *mv, int x, int y, const struct scale_factors *sf);
-
-void av1_setup_scale_factors_for_frame(struct scale_factors *sf, int other_w,
- int other_h, int this_w, int this_h);
-
-static INLINE int av1_is_valid_scale(const struct scale_factors *sf) {
- return sf->x_scale_fp != REF_INVALID_SCALE &&
- sf->y_scale_fp != REF_INVALID_SCALE;
-}
-
-static INLINE int av1_is_scaled(const struct scale_factors *sf) {
- return av1_is_valid_scale(sf) &&
- (sf->x_scale_fp != REF_NO_SCALE || sf->y_scale_fp != REF_NO_SCALE);
-}
-
-static INLINE int valid_ref_frame_size(int ref_width, int ref_height,
- int this_width, int this_height) {
- return 2 * this_width >= ref_width && 2 * this_height >= ref_height &&
- this_width <= 16 * ref_width && this_height <= 16 * ref_height;
-}
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_COMMON_SCALE_H_
diff --git a/third_party/aom/av1/common/scan.c b/third_party/aom/av1/common/scan.c
deleted file mode 100644
index 31a787b53..000000000
--- a/third_party/aom/av1/common/scan.c
+++ /dev/null
@@ -1,3735 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-
-#include "av1/common/common_data.h"
-#include "av1/common/scan.h"
-
-DECLARE_ALIGNED(16, static const int16_t, default_scan_4x4[16]) = {
- 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
-};
-
-DECLARE_ALIGNED(16, static const int16_t, mcol_scan_4x4[16]) = {
- 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, mrow_scan_4x4[16]) = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, default_scan_4x8[32]) = {
- 0, 1, 4, 2, 5, 8, 3, 6, 9, 12, 7, 10, 13, 16, 11, 14,
- 17, 20, 15, 18, 21, 24, 19, 22, 25, 28, 23, 26, 29, 27, 30, 31,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, mcol_scan_4x8[32]) = {
- 0, 4, 8, 12, 16, 20, 24, 28, 1, 5, 9, 13, 17, 21, 25, 29,
- 2, 6, 10, 14, 18, 22, 26, 30, 3, 7, 11, 15, 19, 23, 27, 31,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, mrow_scan_4x8[32]) = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
- 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, default_scan_8x4[32]) = {
- 0, 8, 1, 16, 9, 2, 24, 17, 10, 3, 25, 18, 11, 4, 26, 19,
- 12, 5, 27, 20, 13, 6, 28, 21, 14, 7, 29, 22, 15, 30, 23, 31,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, mcol_scan_8x4[32]) = {
- 0, 8, 16, 24, 1, 9, 17, 25, 2, 10, 18, 26, 3, 11, 19, 27,
- 4, 12, 20, 28, 5, 13, 21, 29, 6, 14, 22, 30, 7, 15, 23, 31,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, mrow_scan_8x4[32]) = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
- 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, default_scan_4x16[64]) = {
- 0, 1, 4, 2, 5, 8, 3, 6, 9, 12, 7, 10, 13, 16, 11, 14,
- 17, 20, 15, 18, 21, 24, 19, 22, 25, 28, 23, 26, 29, 32, 27, 30,
- 33, 36, 31, 34, 37, 40, 35, 38, 41, 44, 39, 42, 45, 48, 43, 46,
- 49, 52, 47, 50, 53, 56, 51, 54, 57, 60, 55, 58, 61, 59, 62, 63,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, default_scan_16x4[64]) = {
- 0, 16, 1, 32, 17, 2, 48, 33, 18, 3, 49, 34, 19, 4, 50, 35,
- 20, 5, 51, 36, 21, 6, 52, 37, 22, 7, 53, 38, 23, 8, 54, 39,
- 24, 9, 55, 40, 25, 10, 56, 41, 26, 11, 57, 42, 27, 12, 58, 43,
- 28, 13, 59, 44, 29, 14, 60, 45, 30, 15, 61, 46, 31, 62, 47, 63,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, mrow_scan_4x16[64]) = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
- 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
- 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
- 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, mrow_scan_16x4[64]) = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
- 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
- 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
- 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, mcol_scan_4x16[64]) = {
- 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60,
- 1, 5, 9, 13, 17, 21, 25, 29, 33, 37, 41, 45, 49, 53, 57, 61,
- 2, 6, 10, 14, 18, 22, 26, 30, 34, 38, 42, 46, 50, 54, 58, 62,
- 3, 7, 11, 15, 19, 23, 27, 31, 35, 39, 43, 47, 51, 55, 59, 63,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, mcol_scan_16x4[64]) = {
- 0, 16, 32, 48, 1, 17, 33, 49, 2, 18, 34, 50, 3, 19, 35, 51,
- 4, 20, 36, 52, 5, 21, 37, 53, 6, 22, 38, 54, 7, 23, 39, 55,
- 8, 24, 40, 56, 9, 25, 41, 57, 10, 26, 42, 58, 11, 27, 43, 59,
- 12, 28, 44, 60, 13, 29, 45, 61, 14, 30, 46, 62, 15, 31, 47, 63,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, default_scan_8x32[256]) = {
- 0, 1, 8, 2, 9, 16, 3, 10, 17, 24, 4, 11, 18, 25, 32,
- 5, 12, 19, 26, 33, 40, 6, 13, 20, 27, 34, 41, 48, 7, 14,
- 21, 28, 35, 42, 49, 56, 15, 22, 29, 36, 43, 50, 57, 64, 23,
- 30, 37, 44, 51, 58, 65, 72, 31, 38, 45, 52, 59, 66, 73, 80,
- 39, 46, 53, 60, 67, 74, 81, 88, 47, 54, 61, 68, 75, 82, 89,
- 96, 55, 62, 69, 76, 83, 90, 97, 104, 63, 70, 77, 84, 91, 98,
- 105, 112, 71, 78, 85, 92, 99, 106, 113, 120, 79, 86, 93, 100, 107,
- 114, 121, 128, 87, 94, 101, 108, 115, 122, 129, 136, 95, 102, 109, 116,
- 123, 130, 137, 144, 103, 110, 117, 124, 131, 138, 145, 152, 111, 118, 125,
- 132, 139, 146, 153, 160, 119, 126, 133, 140, 147, 154, 161, 168, 127, 134,
- 141, 148, 155, 162, 169, 176, 135, 142, 149, 156, 163, 170, 177, 184, 143,
- 150, 157, 164, 171, 178, 185, 192, 151, 158, 165, 172, 179, 186, 193, 200,
- 159, 166, 173, 180, 187, 194, 201, 208, 167, 174, 181, 188, 195, 202, 209,
- 216, 175, 182, 189, 196, 203, 210, 217, 224, 183, 190, 197, 204, 211, 218,
- 225, 232, 191, 198, 205, 212, 219, 226, 233, 240, 199, 206, 213, 220, 227,
- 234, 241, 248, 207, 214, 221, 228, 235, 242, 249, 215, 222, 229, 236, 243,
- 250, 223, 230, 237, 244, 251, 231, 238, 245, 252, 239, 246, 253, 247, 254,
- 255,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, default_scan_32x8[256]) = {
- 0, 32, 1, 64, 33, 2, 96, 65, 34, 3, 128, 97, 66, 35, 4,
- 160, 129, 98, 67, 36, 5, 192, 161, 130, 99, 68, 37, 6, 224, 193,
- 162, 131, 100, 69, 38, 7, 225, 194, 163, 132, 101, 70, 39, 8, 226,
- 195, 164, 133, 102, 71, 40, 9, 227, 196, 165, 134, 103, 72, 41, 10,
- 228, 197, 166, 135, 104, 73, 42, 11, 229, 198, 167, 136, 105, 74, 43,
- 12, 230, 199, 168, 137, 106, 75, 44, 13, 231, 200, 169, 138, 107, 76,
- 45, 14, 232, 201, 170, 139, 108, 77, 46, 15, 233, 202, 171, 140, 109,
- 78, 47, 16, 234, 203, 172, 141, 110, 79, 48, 17, 235, 204, 173, 142,
- 111, 80, 49, 18, 236, 205, 174, 143, 112, 81, 50, 19, 237, 206, 175,
- 144, 113, 82, 51, 20, 238, 207, 176, 145, 114, 83, 52, 21, 239, 208,
- 177, 146, 115, 84, 53, 22, 240, 209, 178, 147, 116, 85, 54, 23, 241,
- 210, 179, 148, 117, 86, 55, 24, 242, 211, 180, 149, 118, 87, 56, 25,
- 243, 212, 181, 150, 119, 88, 57, 26, 244, 213, 182, 151, 120, 89, 58,
- 27, 245, 214, 183, 152, 121, 90, 59, 28, 246, 215, 184, 153, 122, 91,
- 60, 29, 247, 216, 185, 154, 123, 92, 61, 30, 248, 217, 186, 155, 124,
- 93, 62, 31, 249, 218, 187, 156, 125, 94, 63, 250, 219, 188, 157, 126,
- 95, 251, 220, 189, 158, 127, 252, 221, 190, 159, 253, 222, 191, 254, 223,
- 255,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, mrow_scan_8x32[256]) = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
- 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
- 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
- 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
- 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
- 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
- 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
- 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
- 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
- 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
- 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
- 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
- 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
- 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
- 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224,
- 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
- 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
- 255,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, mrow_scan_32x8[256]) = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
- 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
- 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
- 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
- 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
- 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
- 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
- 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
- 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
- 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
- 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
- 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
- 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
- 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
- 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224,
- 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
- 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
- 255,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, mcol_scan_8x32[256]) = {
- 0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112,
- 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232,
- 240, 248, 1, 9, 17, 25, 33, 41, 49, 57, 65, 73, 81, 89, 97,
- 105, 113, 121, 129, 137, 145, 153, 161, 169, 177, 185, 193, 201, 209, 217,
- 225, 233, 241, 249, 2, 10, 18, 26, 34, 42, 50, 58, 66, 74, 82,
- 90, 98, 106, 114, 122, 130, 138, 146, 154, 162, 170, 178, 186, 194, 202,
- 210, 218, 226, 234, 242, 250, 3, 11, 19, 27, 35, 43, 51, 59, 67,
- 75, 83, 91, 99, 107, 115, 123, 131, 139, 147, 155, 163, 171, 179, 187,
- 195, 203, 211, 219, 227, 235, 243, 251, 4, 12, 20, 28, 36, 44, 52,
- 60, 68, 76, 84, 92, 100, 108, 116, 124, 132, 140, 148, 156, 164, 172,
- 180, 188, 196, 204, 212, 220, 228, 236, 244, 252, 5, 13, 21, 29, 37,
- 45, 53, 61, 69, 77, 85, 93, 101, 109, 117, 125, 133, 141, 149, 157,
- 165, 173, 181, 189, 197, 205, 213, 221, 229, 237, 245, 253, 6, 14, 22,
- 30, 38, 46, 54, 62, 70, 78, 86, 94, 102, 110, 118, 126, 134, 142,
- 150, 158, 166, 174, 182, 190, 198, 206, 214, 222, 230, 238, 246, 254, 7,
- 15, 23, 31, 39, 47, 55, 63, 71, 79, 87, 95, 103, 111, 119, 127,
- 135, 143, 151, 159, 167, 175, 183, 191, 199, 207, 215, 223, 231, 239, 247,
- 255,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, mcol_scan_32x8[256]) = {
- 0, 32, 64, 96, 128, 160, 192, 224, 1, 33, 65, 97, 129, 161, 193, 225,
- 2, 34, 66, 98, 130, 162, 194, 226, 3, 35, 67, 99, 131, 163, 195, 227,
- 4, 36, 68, 100, 132, 164, 196, 228, 5, 37, 69, 101, 133, 165, 197, 229,
- 6, 38, 70, 102, 134, 166, 198, 230, 7, 39, 71, 103, 135, 167, 199, 231,
- 8, 40, 72, 104, 136, 168, 200, 232, 9, 41, 73, 105, 137, 169, 201, 233,
- 10, 42, 74, 106, 138, 170, 202, 234, 11, 43, 75, 107, 139, 171, 203, 235,
- 12, 44, 76, 108, 140, 172, 204, 236, 13, 45, 77, 109, 141, 173, 205, 237,
- 14, 46, 78, 110, 142, 174, 206, 238, 15, 47, 79, 111, 143, 175, 207, 239,
- 16, 48, 80, 112, 144, 176, 208, 240, 17, 49, 81, 113, 145, 177, 209, 241,
- 18, 50, 82, 114, 146, 178, 210, 242, 19, 51, 83, 115, 147, 179, 211, 243,
- 20, 52, 84, 116, 148, 180, 212, 244, 21, 53, 85, 117, 149, 181, 213, 245,
- 22, 54, 86, 118, 150, 182, 214, 246, 23, 55, 87, 119, 151, 183, 215, 247,
- 24, 56, 88, 120, 152, 184, 216, 248, 25, 57, 89, 121, 153, 185, 217, 249,
- 26, 58, 90, 122, 154, 186, 218, 250, 27, 59, 91, 123, 155, 187, 219, 251,
- 28, 60, 92, 124, 156, 188, 220, 252, 29, 61, 93, 125, 157, 189, 221, 253,
- 30, 62, 94, 126, 158, 190, 222, 254, 31, 63, 95, 127, 159, 191, 223, 255,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, default_scan_8x8[64]) = {
- 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5,
- 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28,
- 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51,
- 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63
-};
-
-DECLARE_ALIGNED(16, static const int16_t, mcol_scan_8x8[64]) = {
- 0, 8, 16, 24, 32, 40, 48, 56, 1, 9, 17, 25, 33, 41, 49, 57,
- 2, 10, 18, 26, 34, 42, 50, 58, 3, 11, 19, 27, 35, 43, 51, 59,
- 4, 12, 20, 28, 36, 44, 52, 60, 5, 13, 21, 29, 37, 45, 53, 61,
- 6, 14, 22, 30, 38, 46, 54, 62, 7, 15, 23, 31, 39, 47, 55, 63,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, mrow_scan_8x8[64]) = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
- 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
- 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
- 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, default_scan_8x16[128]) = {
- 0, 1, 8, 2, 9, 16, 3, 10, 17, 24, 4, 11, 18, 25, 32,
- 5, 12, 19, 26, 33, 40, 6, 13, 20, 27, 34, 41, 48, 7, 14,
- 21, 28, 35, 42, 49, 56, 15, 22, 29, 36, 43, 50, 57, 64, 23,
- 30, 37, 44, 51, 58, 65, 72, 31, 38, 45, 52, 59, 66, 73, 80,
- 39, 46, 53, 60, 67, 74, 81, 88, 47, 54, 61, 68, 75, 82, 89,
- 96, 55, 62, 69, 76, 83, 90, 97, 104, 63, 70, 77, 84, 91, 98,
- 105, 112, 71, 78, 85, 92, 99, 106, 113, 120, 79, 86, 93, 100, 107,
- 114, 121, 87, 94, 101, 108, 115, 122, 95, 102, 109, 116, 123, 103, 110,
- 117, 124, 111, 118, 125, 119, 126, 127,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, default_scan_16x8[128]) = {
- 0, 16, 1, 32, 17, 2, 48, 33, 18, 3, 64, 49, 34, 19, 4, 80,
- 65, 50, 35, 20, 5, 96, 81, 66, 51, 36, 21, 6, 112, 97, 82, 67,
- 52, 37, 22, 7, 113, 98, 83, 68, 53, 38, 23, 8, 114, 99, 84, 69,
- 54, 39, 24, 9, 115, 100, 85, 70, 55, 40, 25, 10, 116, 101, 86, 71,
- 56, 41, 26, 11, 117, 102, 87, 72, 57, 42, 27, 12, 118, 103, 88, 73,
- 58, 43, 28, 13, 119, 104, 89, 74, 59, 44, 29, 14, 120, 105, 90, 75,
- 60, 45, 30, 15, 121, 106, 91, 76, 61, 46, 31, 122, 107, 92, 77, 62,
- 47, 123, 108, 93, 78, 63, 124, 109, 94, 79, 125, 110, 95, 126, 111, 127,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, mcol_scan_8x16[128]) = {
- 0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120,
- 1, 9, 17, 25, 33, 41, 49, 57, 65, 73, 81, 89, 97, 105, 113, 121,
- 2, 10, 18, 26, 34, 42, 50, 58, 66, 74, 82, 90, 98, 106, 114, 122,
- 3, 11, 19, 27, 35, 43, 51, 59, 67, 75, 83, 91, 99, 107, 115, 123,
- 4, 12, 20, 28, 36, 44, 52, 60, 68, 76, 84, 92, 100, 108, 116, 124,
- 5, 13, 21, 29, 37, 45, 53, 61, 69, 77, 85, 93, 101, 109, 117, 125,
- 6, 14, 22, 30, 38, 46, 54, 62, 70, 78, 86, 94, 102, 110, 118, 126,
- 7, 15, 23, 31, 39, 47, 55, 63, 71, 79, 87, 95, 103, 111, 119, 127,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, mcol_scan_16x8[128]) = {
- 0, 16, 32, 48, 64, 80, 96, 112, 1, 17, 33, 49, 65, 81, 97, 113,
- 2, 18, 34, 50, 66, 82, 98, 114, 3, 19, 35, 51, 67, 83, 99, 115,
- 4, 20, 36, 52, 68, 84, 100, 116, 5, 21, 37, 53, 69, 85, 101, 117,
- 6, 22, 38, 54, 70, 86, 102, 118, 7, 23, 39, 55, 71, 87, 103, 119,
- 8, 24, 40, 56, 72, 88, 104, 120, 9, 25, 41, 57, 73, 89, 105, 121,
- 10, 26, 42, 58, 74, 90, 106, 122, 11, 27, 43, 59, 75, 91, 107, 123,
- 12, 28, 44, 60, 76, 92, 108, 124, 13, 29, 45, 61, 77, 93, 109, 125,
- 14, 30, 46, 62, 78, 94, 110, 126, 15, 31, 47, 63, 79, 95, 111, 127,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, mrow_scan_8x16[128]) = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
- 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
- 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
- 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
- 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
- 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
- 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
- 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
- 120, 121, 122, 123, 124, 125, 126, 127,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, mrow_scan_16x8[128]) = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
- 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
- 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
- 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
- 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
- 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
- 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
- 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
- 120, 121, 122, 123, 124, 125, 126, 127,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, default_scan_16x32[512]) = {
- 0, 1, 16, 2, 17, 32, 3, 18, 33, 48, 4, 19, 34, 49, 64,
- 5, 20, 35, 50, 65, 80, 6, 21, 36, 51, 66, 81, 96, 7, 22,
- 37, 52, 67, 82, 97, 112, 8, 23, 38, 53, 68, 83, 98, 113, 128,
- 9, 24, 39, 54, 69, 84, 99, 114, 129, 144, 10, 25, 40, 55, 70,
- 85, 100, 115, 130, 145, 160, 11, 26, 41, 56, 71, 86, 101, 116, 131,
- 146, 161, 176, 12, 27, 42, 57, 72, 87, 102, 117, 132, 147, 162, 177,
- 192, 13, 28, 43, 58, 73, 88, 103, 118, 133, 148, 163, 178, 193, 208,
- 14, 29, 44, 59, 74, 89, 104, 119, 134, 149, 164, 179, 194, 209, 224,
- 15, 30, 45, 60, 75, 90, 105, 120, 135, 150, 165, 180, 195, 210, 225,
- 240, 31, 46, 61, 76, 91, 106, 121, 136, 151, 166, 181, 196, 211, 226,
- 241, 256, 47, 62, 77, 92, 107, 122, 137, 152, 167, 182, 197, 212, 227,
- 242, 257, 272, 63, 78, 93, 108, 123, 138, 153, 168, 183, 198, 213, 228,
- 243, 258, 273, 288, 79, 94, 109, 124, 139, 154, 169, 184, 199, 214, 229,
- 244, 259, 274, 289, 304, 95, 110, 125, 140, 155, 170, 185, 200, 215, 230,
- 245, 260, 275, 290, 305, 320, 111, 126, 141, 156, 171, 186, 201, 216, 231,
- 246, 261, 276, 291, 306, 321, 336, 127, 142, 157, 172, 187, 202, 217, 232,
- 247, 262, 277, 292, 307, 322, 337, 352, 143, 158, 173, 188, 203, 218, 233,
- 248, 263, 278, 293, 308, 323, 338, 353, 368, 159, 174, 189, 204, 219, 234,
- 249, 264, 279, 294, 309, 324, 339, 354, 369, 384, 175, 190, 205, 220, 235,
- 250, 265, 280, 295, 310, 325, 340, 355, 370, 385, 400, 191, 206, 221, 236,
- 251, 266, 281, 296, 311, 326, 341, 356, 371, 386, 401, 416, 207, 222, 237,
- 252, 267, 282, 297, 312, 327, 342, 357, 372, 387, 402, 417, 432, 223, 238,
- 253, 268, 283, 298, 313, 328, 343, 358, 373, 388, 403, 418, 433, 448, 239,
- 254, 269, 284, 299, 314, 329, 344, 359, 374, 389, 404, 419, 434, 449, 464,
- 255, 270, 285, 300, 315, 330, 345, 360, 375, 390, 405, 420, 435, 450, 465,
- 480, 271, 286, 301, 316, 331, 346, 361, 376, 391, 406, 421, 436, 451, 466,
- 481, 496, 287, 302, 317, 332, 347, 362, 377, 392, 407, 422, 437, 452, 467,
- 482, 497, 303, 318, 333, 348, 363, 378, 393, 408, 423, 438, 453, 468, 483,
- 498, 319, 334, 349, 364, 379, 394, 409, 424, 439, 454, 469, 484, 499, 335,
- 350, 365, 380, 395, 410, 425, 440, 455, 470, 485, 500, 351, 366, 381, 396,
- 411, 426, 441, 456, 471, 486, 501, 367, 382, 397, 412, 427, 442, 457, 472,
- 487, 502, 383, 398, 413, 428, 443, 458, 473, 488, 503, 399, 414, 429, 444,
- 459, 474, 489, 504, 415, 430, 445, 460, 475, 490, 505, 431, 446, 461, 476,
- 491, 506, 447, 462, 477, 492, 507, 463, 478, 493, 508, 479, 494, 509, 495,
- 510, 511,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, default_scan_32x16[512]) = {
- 0, 32, 1, 64, 33, 2, 96, 65, 34, 3, 128, 97, 66, 35, 4,
- 160, 129, 98, 67, 36, 5, 192, 161, 130, 99, 68, 37, 6, 224, 193,
- 162, 131, 100, 69, 38, 7, 256, 225, 194, 163, 132, 101, 70, 39, 8,
- 288, 257, 226, 195, 164, 133, 102, 71, 40, 9, 320, 289, 258, 227, 196,
- 165, 134, 103, 72, 41, 10, 352, 321, 290, 259, 228, 197, 166, 135, 104,
- 73, 42, 11, 384, 353, 322, 291, 260, 229, 198, 167, 136, 105, 74, 43,
- 12, 416, 385, 354, 323, 292, 261, 230, 199, 168, 137, 106, 75, 44, 13,
- 448, 417, 386, 355, 324, 293, 262, 231, 200, 169, 138, 107, 76, 45, 14,
- 480, 449, 418, 387, 356, 325, 294, 263, 232, 201, 170, 139, 108, 77, 46,
- 15, 481, 450, 419, 388, 357, 326, 295, 264, 233, 202, 171, 140, 109, 78,
- 47, 16, 482, 451, 420, 389, 358, 327, 296, 265, 234, 203, 172, 141, 110,
- 79, 48, 17, 483, 452, 421, 390, 359, 328, 297, 266, 235, 204, 173, 142,
- 111, 80, 49, 18, 484, 453, 422, 391, 360, 329, 298, 267, 236, 205, 174,
- 143, 112, 81, 50, 19, 485, 454, 423, 392, 361, 330, 299, 268, 237, 206,
- 175, 144, 113, 82, 51, 20, 486, 455, 424, 393, 362, 331, 300, 269, 238,
- 207, 176, 145, 114, 83, 52, 21, 487, 456, 425, 394, 363, 332, 301, 270,
- 239, 208, 177, 146, 115, 84, 53, 22, 488, 457, 426, 395, 364, 333, 302,
- 271, 240, 209, 178, 147, 116, 85, 54, 23, 489, 458, 427, 396, 365, 334,
- 303, 272, 241, 210, 179, 148, 117, 86, 55, 24, 490, 459, 428, 397, 366,
- 335, 304, 273, 242, 211, 180, 149, 118, 87, 56, 25, 491, 460, 429, 398,
- 367, 336, 305, 274, 243, 212, 181, 150, 119, 88, 57, 26, 492, 461, 430,
- 399, 368, 337, 306, 275, 244, 213, 182, 151, 120, 89, 58, 27, 493, 462,
- 431, 400, 369, 338, 307, 276, 245, 214, 183, 152, 121, 90, 59, 28, 494,
- 463, 432, 401, 370, 339, 308, 277, 246, 215, 184, 153, 122, 91, 60, 29,
- 495, 464, 433, 402, 371, 340, 309, 278, 247, 216, 185, 154, 123, 92, 61,
- 30, 496, 465, 434, 403, 372, 341, 310, 279, 248, 217, 186, 155, 124, 93,
- 62, 31, 497, 466, 435, 404, 373, 342, 311, 280, 249, 218, 187, 156, 125,
- 94, 63, 498, 467, 436, 405, 374, 343, 312, 281, 250, 219, 188, 157, 126,
- 95, 499, 468, 437, 406, 375, 344, 313, 282, 251, 220, 189, 158, 127, 500,
- 469, 438, 407, 376, 345, 314, 283, 252, 221, 190, 159, 501, 470, 439, 408,
- 377, 346, 315, 284, 253, 222, 191, 502, 471, 440, 409, 378, 347, 316, 285,
- 254, 223, 503, 472, 441, 410, 379, 348, 317, 286, 255, 504, 473, 442, 411,
- 380, 349, 318, 287, 505, 474, 443, 412, 381, 350, 319, 506, 475, 444, 413,
- 382, 351, 507, 476, 445, 414, 383, 508, 477, 446, 415, 509, 478, 447, 510,
- 479, 511,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, mcol_scan_16x32[512]) = {
- 0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224,
- 240, 256, 272, 288, 304, 320, 336, 352, 368, 384, 400, 416, 432, 448, 464,
- 480, 496, 1, 17, 33, 49, 65, 81, 97, 113, 129, 145, 161, 177, 193,
- 209, 225, 241, 257, 273, 289, 305, 321, 337, 353, 369, 385, 401, 417, 433,
- 449, 465, 481, 497, 2, 18, 34, 50, 66, 82, 98, 114, 130, 146, 162,
- 178, 194, 210, 226, 242, 258, 274, 290, 306, 322, 338, 354, 370, 386, 402,
- 418, 434, 450, 466, 482, 498, 3, 19, 35, 51, 67, 83, 99, 115, 131,
- 147, 163, 179, 195, 211, 227, 243, 259, 275, 291, 307, 323, 339, 355, 371,
- 387, 403, 419, 435, 451, 467, 483, 499, 4, 20, 36, 52, 68, 84, 100,
- 116, 132, 148, 164, 180, 196, 212, 228, 244, 260, 276, 292, 308, 324, 340,
- 356, 372, 388, 404, 420, 436, 452, 468, 484, 500, 5, 21, 37, 53, 69,
- 85, 101, 117, 133, 149, 165, 181, 197, 213, 229, 245, 261, 277, 293, 309,
- 325, 341, 357, 373, 389, 405, 421, 437, 453, 469, 485, 501, 6, 22, 38,
- 54, 70, 86, 102, 118, 134, 150, 166, 182, 198, 214, 230, 246, 262, 278,
- 294, 310, 326, 342, 358, 374, 390, 406, 422, 438, 454, 470, 486, 502, 7,
- 23, 39, 55, 71, 87, 103, 119, 135, 151, 167, 183, 199, 215, 231, 247,
- 263, 279, 295, 311, 327, 343, 359, 375, 391, 407, 423, 439, 455, 471, 487,
- 503, 8, 24, 40, 56, 72, 88, 104, 120, 136, 152, 168, 184, 200, 216,
- 232, 248, 264, 280, 296, 312, 328, 344, 360, 376, 392, 408, 424, 440, 456,
- 472, 488, 504, 9, 25, 41, 57, 73, 89, 105, 121, 137, 153, 169, 185,
- 201, 217, 233, 249, 265, 281, 297, 313, 329, 345, 361, 377, 393, 409, 425,
- 441, 457, 473, 489, 505, 10, 26, 42, 58, 74, 90, 106, 122, 138, 154,
- 170, 186, 202, 218, 234, 250, 266, 282, 298, 314, 330, 346, 362, 378, 394,
- 410, 426, 442, 458, 474, 490, 506, 11, 27, 43, 59, 75, 91, 107, 123,
- 139, 155, 171, 187, 203, 219, 235, 251, 267, 283, 299, 315, 331, 347, 363,
- 379, 395, 411, 427, 443, 459, 475, 491, 507, 12, 28, 44, 60, 76, 92,
- 108, 124, 140, 156, 172, 188, 204, 220, 236, 252, 268, 284, 300, 316, 332,
- 348, 364, 380, 396, 412, 428, 444, 460, 476, 492, 508, 13, 29, 45, 61,
- 77, 93, 109, 125, 141, 157, 173, 189, 205, 221, 237, 253, 269, 285, 301,
- 317, 333, 349, 365, 381, 397, 413, 429, 445, 461, 477, 493, 509, 14, 30,
- 46, 62, 78, 94, 110, 126, 142, 158, 174, 190, 206, 222, 238, 254, 270,
- 286, 302, 318, 334, 350, 366, 382, 398, 414, 430, 446, 462, 478, 494, 510,
- 15, 31, 47, 63, 79, 95, 111, 127, 143, 159, 175, 191, 207, 223, 239,
- 255, 271, 287, 303, 319, 335, 351, 367, 383, 399, 415, 431, 447, 463, 479,
- 495, 511,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, mcol_scan_32x16[512]) = {
- 0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448, 480,
- 1, 33, 65, 97, 129, 161, 193, 225, 257, 289, 321, 353, 385, 417, 449, 481,
- 2, 34, 66, 98, 130, 162, 194, 226, 258, 290, 322, 354, 386, 418, 450, 482,
- 3, 35, 67, 99, 131, 163, 195, 227, 259, 291, 323, 355, 387, 419, 451, 483,
- 4, 36, 68, 100, 132, 164, 196, 228, 260, 292, 324, 356, 388, 420, 452, 484,
- 5, 37, 69, 101, 133, 165, 197, 229, 261, 293, 325, 357, 389, 421, 453, 485,
- 6, 38, 70, 102, 134, 166, 198, 230, 262, 294, 326, 358, 390, 422, 454, 486,
- 7, 39, 71, 103, 135, 167, 199, 231, 263, 295, 327, 359, 391, 423, 455, 487,
- 8, 40, 72, 104, 136, 168, 200, 232, 264, 296, 328, 360, 392, 424, 456, 488,
- 9, 41, 73, 105, 137, 169, 201, 233, 265, 297, 329, 361, 393, 425, 457, 489,
- 10, 42, 74, 106, 138, 170, 202, 234, 266, 298, 330, 362, 394, 426, 458, 490,
- 11, 43, 75, 107, 139, 171, 203, 235, 267, 299, 331, 363, 395, 427, 459, 491,
- 12, 44, 76, 108, 140, 172, 204, 236, 268, 300, 332, 364, 396, 428, 460, 492,
- 13, 45, 77, 109, 141, 173, 205, 237, 269, 301, 333, 365, 397, 429, 461, 493,
- 14, 46, 78, 110, 142, 174, 206, 238, 270, 302, 334, 366, 398, 430, 462, 494,
- 15, 47, 79, 111, 143, 175, 207, 239, 271, 303, 335, 367, 399, 431, 463, 495,
- 16, 48, 80, 112, 144, 176, 208, 240, 272, 304, 336, 368, 400, 432, 464, 496,
- 17, 49, 81, 113, 145, 177, 209, 241, 273, 305, 337, 369, 401, 433, 465, 497,
- 18, 50, 82, 114, 146, 178, 210, 242, 274, 306, 338, 370, 402, 434, 466, 498,
- 19, 51, 83, 115, 147, 179, 211, 243, 275, 307, 339, 371, 403, 435, 467, 499,
- 20, 52, 84, 116, 148, 180, 212, 244, 276, 308, 340, 372, 404, 436, 468, 500,
- 21, 53, 85, 117, 149, 181, 213, 245, 277, 309, 341, 373, 405, 437, 469, 501,
- 22, 54, 86, 118, 150, 182, 214, 246, 278, 310, 342, 374, 406, 438, 470, 502,
- 23, 55, 87, 119, 151, 183, 215, 247, 279, 311, 343, 375, 407, 439, 471, 503,
- 24, 56, 88, 120, 152, 184, 216, 248, 280, 312, 344, 376, 408, 440, 472, 504,
- 25, 57, 89, 121, 153, 185, 217, 249, 281, 313, 345, 377, 409, 441, 473, 505,
- 26, 58, 90, 122, 154, 186, 218, 250, 282, 314, 346, 378, 410, 442, 474, 506,
- 27, 59, 91, 123, 155, 187, 219, 251, 283, 315, 347, 379, 411, 443, 475, 507,
- 28, 60, 92, 124, 156, 188, 220, 252, 284, 316, 348, 380, 412, 444, 476, 508,
- 29, 61, 93, 125, 157, 189, 221, 253, 285, 317, 349, 381, 413, 445, 477, 509,
- 30, 62, 94, 126, 158, 190, 222, 254, 286, 318, 350, 382, 414, 446, 478, 510,
- 31, 63, 95, 127, 159, 191, 223, 255, 287, 319, 351, 383, 415, 447, 479, 511,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, mrow_scan_16x32[512]) = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
- 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
- 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
- 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
- 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
- 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
- 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
- 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
- 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
- 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
- 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
- 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
- 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
- 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
- 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224,
- 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
- 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
- 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269,
- 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284,
- 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299,
- 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314,
- 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329,
- 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344,
- 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359,
- 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374,
- 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389,
- 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404,
- 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419,
- 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434,
- 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449,
- 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464,
- 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479,
- 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494,
- 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509,
- 510, 511,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, mrow_scan_32x16[512]) = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
- 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
- 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
- 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
- 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
- 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
- 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
- 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
- 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
- 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
- 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
- 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
- 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
- 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
- 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224,
- 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
- 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
- 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269,
- 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284,
- 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299,
- 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314,
- 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329,
- 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344,
- 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359,
- 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374,
- 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389,
- 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404,
- 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419,
- 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434,
- 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449,
- 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464,
- 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479,
- 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494,
- 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509,
- 510, 511,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, default_scan_16x16[256]) = {
- 0, 1, 16, 32, 17, 2, 3, 18, 33, 48, 64, 49, 34, 19, 4,
- 5, 20, 35, 50, 65, 80, 96, 81, 66, 51, 36, 21, 6, 7, 22,
- 37, 52, 67, 82, 97, 112, 128, 113, 98, 83, 68, 53, 38, 23, 8,
- 9, 24, 39, 54, 69, 84, 99, 114, 129, 144, 160, 145, 130, 115, 100,
- 85, 70, 55, 40, 25, 10, 11, 26, 41, 56, 71, 86, 101, 116, 131,
- 146, 161, 176, 192, 177, 162, 147, 132, 117, 102, 87, 72, 57, 42, 27,
- 12, 13, 28, 43, 58, 73, 88, 103, 118, 133, 148, 163, 178, 193, 208,
- 224, 209, 194, 179, 164, 149, 134, 119, 104, 89, 74, 59, 44, 29, 14,
- 15, 30, 45, 60, 75, 90, 105, 120, 135, 150, 165, 180, 195, 210, 225,
- 240, 241, 226, 211, 196, 181, 166, 151, 136, 121, 106, 91, 76, 61, 46,
- 31, 47, 62, 77, 92, 107, 122, 137, 152, 167, 182, 197, 212, 227, 242,
- 243, 228, 213, 198, 183, 168, 153, 138, 123, 108, 93, 78, 63, 79, 94,
- 109, 124, 139, 154, 169, 184, 199, 214, 229, 244, 245, 230, 215, 200, 185,
- 170, 155, 140, 125, 110, 95, 111, 126, 141, 156, 171, 186, 201, 216, 231,
- 246, 247, 232, 217, 202, 187, 172, 157, 142, 127, 143, 158, 173, 188, 203,
- 218, 233, 248, 249, 234, 219, 204, 189, 174, 159, 175, 190, 205, 220, 235,
- 250, 251, 236, 221, 206, 191, 207, 222, 237, 252, 253, 238, 223, 239, 254,
- 255
-};
-
-DECLARE_ALIGNED(16, static const int16_t, mcol_scan_16x16[256]) = {
- 0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240,
- 1, 17, 33, 49, 65, 81, 97, 113, 129, 145, 161, 177, 193, 209, 225, 241,
- 2, 18, 34, 50, 66, 82, 98, 114, 130, 146, 162, 178, 194, 210, 226, 242,
- 3, 19, 35, 51, 67, 83, 99, 115, 131, 147, 163, 179, 195, 211, 227, 243,
- 4, 20, 36, 52, 68, 84, 100, 116, 132, 148, 164, 180, 196, 212, 228, 244,
- 5, 21, 37, 53, 69, 85, 101, 117, 133, 149, 165, 181, 197, 213, 229, 245,
- 6, 22, 38, 54, 70, 86, 102, 118, 134, 150, 166, 182, 198, 214, 230, 246,
- 7, 23, 39, 55, 71, 87, 103, 119, 135, 151, 167, 183, 199, 215, 231, 247,
- 8, 24, 40, 56, 72, 88, 104, 120, 136, 152, 168, 184, 200, 216, 232, 248,
- 9, 25, 41, 57, 73, 89, 105, 121, 137, 153, 169, 185, 201, 217, 233, 249,
- 10, 26, 42, 58, 74, 90, 106, 122, 138, 154, 170, 186, 202, 218, 234, 250,
- 11, 27, 43, 59, 75, 91, 107, 123, 139, 155, 171, 187, 203, 219, 235, 251,
- 12, 28, 44, 60, 76, 92, 108, 124, 140, 156, 172, 188, 204, 220, 236, 252,
- 13, 29, 45, 61, 77, 93, 109, 125, 141, 157, 173, 189, 205, 221, 237, 253,
- 14, 30, 46, 62, 78, 94, 110, 126, 142, 158, 174, 190, 206, 222, 238, 254,
- 15, 31, 47, 63, 79, 95, 111, 127, 143, 159, 175, 191, 207, 223, 239, 255,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, mrow_scan_16x16[256]) = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
- 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
- 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
- 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
- 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
- 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
- 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
- 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
- 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
- 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
- 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
- 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
- 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
- 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
- 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224,
- 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
- 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
- 255,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, mcol_scan_32x32[1024]) = {
- 0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416,
- 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800, 832, 864,
- 896, 928, 960, 992, 1, 33, 65, 97, 129, 161, 193, 225, 257, 289,
- 321, 353, 385, 417, 449, 481, 513, 545, 577, 609, 641, 673, 705, 737,
- 769, 801, 833, 865, 897, 929, 961, 993, 2, 34, 66, 98, 130, 162,
- 194, 226, 258, 290, 322, 354, 386, 418, 450, 482, 514, 546, 578, 610,
- 642, 674, 706, 738, 770, 802, 834, 866, 898, 930, 962, 994, 3, 35,
- 67, 99, 131, 163, 195, 227, 259, 291, 323, 355, 387, 419, 451, 483,
- 515, 547, 579, 611, 643, 675, 707, 739, 771, 803, 835, 867, 899, 931,
- 963, 995, 4, 36, 68, 100, 132, 164, 196, 228, 260, 292, 324, 356,
- 388, 420, 452, 484, 516, 548, 580, 612, 644, 676, 708, 740, 772, 804,
- 836, 868, 900, 932, 964, 996, 5, 37, 69, 101, 133, 165, 197, 229,
- 261, 293, 325, 357, 389, 421, 453, 485, 517, 549, 581, 613, 645, 677,
- 709, 741, 773, 805, 837, 869, 901, 933, 965, 997, 6, 38, 70, 102,
- 134, 166, 198, 230, 262, 294, 326, 358, 390, 422, 454, 486, 518, 550,
- 582, 614, 646, 678, 710, 742, 774, 806, 838, 870, 902, 934, 966, 998,
- 7, 39, 71, 103, 135, 167, 199, 231, 263, 295, 327, 359, 391, 423,
- 455, 487, 519, 551, 583, 615, 647, 679, 711, 743, 775, 807, 839, 871,
- 903, 935, 967, 999, 8, 40, 72, 104, 136, 168, 200, 232, 264, 296,
- 328, 360, 392, 424, 456, 488, 520, 552, 584, 616, 648, 680, 712, 744,
- 776, 808, 840, 872, 904, 936, 968, 1000, 9, 41, 73, 105, 137, 169,
- 201, 233, 265, 297, 329, 361, 393, 425, 457, 489, 521, 553, 585, 617,
- 649, 681, 713, 745, 777, 809, 841, 873, 905, 937, 969, 1001, 10, 42,
- 74, 106, 138, 170, 202, 234, 266, 298, 330, 362, 394, 426, 458, 490,
- 522, 554, 586, 618, 650, 682, 714, 746, 778, 810, 842, 874, 906, 938,
- 970, 1002, 11, 43, 75, 107, 139, 171, 203, 235, 267, 299, 331, 363,
- 395, 427, 459, 491, 523, 555, 587, 619, 651, 683, 715, 747, 779, 811,
- 843, 875, 907, 939, 971, 1003, 12, 44, 76, 108, 140, 172, 204, 236,
- 268, 300, 332, 364, 396, 428, 460, 492, 524, 556, 588, 620, 652, 684,
- 716, 748, 780, 812, 844, 876, 908, 940, 972, 1004, 13, 45, 77, 109,
- 141, 173, 205, 237, 269, 301, 333, 365, 397, 429, 461, 493, 525, 557,
- 589, 621, 653, 685, 717, 749, 781, 813, 845, 877, 909, 941, 973, 1005,
- 14, 46, 78, 110, 142, 174, 206, 238, 270, 302, 334, 366, 398, 430,
- 462, 494, 526, 558, 590, 622, 654, 686, 718, 750, 782, 814, 846, 878,
- 910, 942, 974, 1006, 15, 47, 79, 111, 143, 175, 207, 239, 271, 303,
- 335, 367, 399, 431, 463, 495, 527, 559, 591, 623, 655, 687, 719, 751,
- 783, 815, 847, 879, 911, 943, 975, 1007, 16, 48, 80, 112, 144, 176,
- 208, 240, 272, 304, 336, 368, 400, 432, 464, 496, 528, 560, 592, 624,
- 656, 688, 720, 752, 784, 816, 848, 880, 912, 944, 976, 1008, 17, 49,
- 81, 113, 145, 177, 209, 241, 273, 305, 337, 369, 401, 433, 465, 497,
- 529, 561, 593, 625, 657, 689, 721, 753, 785, 817, 849, 881, 913, 945,
- 977, 1009, 18, 50, 82, 114, 146, 178, 210, 242, 274, 306, 338, 370,
- 402, 434, 466, 498, 530, 562, 594, 626, 658, 690, 722, 754, 786, 818,
- 850, 882, 914, 946, 978, 1010, 19, 51, 83, 115, 147, 179, 211, 243,
- 275, 307, 339, 371, 403, 435, 467, 499, 531, 563, 595, 627, 659, 691,
- 723, 755, 787, 819, 851, 883, 915, 947, 979, 1011, 20, 52, 84, 116,
- 148, 180, 212, 244, 276, 308, 340, 372, 404, 436, 468, 500, 532, 564,
- 596, 628, 660, 692, 724, 756, 788, 820, 852, 884, 916, 948, 980, 1012,
- 21, 53, 85, 117, 149, 181, 213, 245, 277, 309, 341, 373, 405, 437,
- 469, 501, 533, 565, 597, 629, 661, 693, 725, 757, 789, 821, 853, 885,
- 917, 949, 981, 1013, 22, 54, 86, 118, 150, 182, 214, 246, 278, 310,
- 342, 374, 406, 438, 470, 502, 534, 566, 598, 630, 662, 694, 726, 758,
- 790, 822, 854, 886, 918, 950, 982, 1014, 23, 55, 87, 119, 151, 183,
- 215, 247, 279, 311, 343, 375, 407, 439, 471, 503, 535, 567, 599, 631,
- 663, 695, 727, 759, 791, 823, 855, 887, 919, 951, 983, 1015, 24, 56,
- 88, 120, 152, 184, 216, 248, 280, 312, 344, 376, 408, 440, 472, 504,
- 536, 568, 600, 632, 664, 696, 728, 760, 792, 824, 856, 888, 920, 952,
- 984, 1016, 25, 57, 89, 121, 153, 185, 217, 249, 281, 313, 345, 377,
- 409, 441, 473, 505, 537, 569, 601, 633, 665, 697, 729, 761, 793, 825,
- 857, 889, 921, 953, 985, 1017, 26, 58, 90, 122, 154, 186, 218, 250,
- 282, 314, 346, 378, 410, 442, 474, 506, 538, 570, 602, 634, 666, 698,
- 730, 762, 794, 826, 858, 890, 922, 954, 986, 1018, 27, 59, 91, 123,
- 155, 187, 219, 251, 283, 315, 347, 379, 411, 443, 475, 507, 539, 571,
- 603, 635, 667, 699, 731, 763, 795, 827, 859, 891, 923, 955, 987, 1019,
- 28, 60, 92, 124, 156, 188, 220, 252, 284, 316, 348, 380, 412, 444,
- 476, 508, 540, 572, 604, 636, 668, 700, 732, 764, 796, 828, 860, 892,
- 924, 956, 988, 1020, 29, 61, 93, 125, 157, 189, 221, 253, 285, 317,
- 349, 381, 413, 445, 477, 509, 541, 573, 605, 637, 669, 701, 733, 765,
- 797, 829, 861, 893, 925, 957, 989, 1021, 30, 62, 94, 126, 158, 190,
- 222, 254, 286, 318, 350, 382, 414, 446, 478, 510, 542, 574, 606, 638,
- 670, 702, 734, 766, 798, 830, 862, 894, 926, 958, 990, 1022, 31, 63,
- 95, 127, 159, 191, 223, 255, 287, 319, 351, 383, 415, 447, 479, 511,
- 543, 575, 607, 639, 671, 703, 735, 767, 799, 831, 863, 895, 927, 959,
- 991, 1023,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, mrow_scan_32x32[1024]) = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
- 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
- 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
- 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
- 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
- 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77,
- 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
- 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103,
- 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
- 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
- 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
- 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155,
- 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168,
- 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181,
- 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
- 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
- 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220,
- 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233,
- 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246,
- 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259,
- 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272,
- 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285,
- 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298,
- 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311,
- 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324,
- 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337,
- 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350,
- 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363,
- 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376,
- 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389,
- 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402,
- 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415,
- 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428,
- 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441,
- 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454,
- 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467,
- 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480,
- 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493,
- 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506,
- 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519,
- 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532,
- 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545,
- 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558,
- 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571,
- 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584,
- 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597,
- 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, 608, 609, 610,
- 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623,
- 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, 634, 635, 636,
- 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649,
- 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662,
- 663, 664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675,
- 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688,
- 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701,
- 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714,
- 715, 716, 717, 718, 719, 720, 721, 722, 723, 724, 725, 726, 727,
- 728, 729, 730, 731, 732, 733, 734, 735, 736, 737, 738, 739, 740,
- 741, 742, 743, 744, 745, 746, 747, 748, 749, 750, 751, 752, 753,
- 754, 755, 756, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766,
- 767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779,
- 780, 781, 782, 783, 784, 785, 786, 787, 788, 789, 790, 791, 792,
- 793, 794, 795, 796, 797, 798, 799, 800, 801, 802, 803, 804, 805,
- 806, 807, 808, 809, 810, 811, 812, 813, 814, 815, 816, 817, 818,
- 819, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831,
- 832, 833, 834, 835, 836, 837, 838, 839, 840, 841, 842, 843, 844,
- 845, 846, 847, 848, 849, 850, 851, 852, 853, 854, 855, 856, 857,
- 858, 859, 860, 861, 862, 863, 864, 865, 866, 867, 868, 869, 870,
- 871, 872, 873, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883,
- 884, 885, 886, 887, 888, 889, 890, 891, 892, 893, 894, 895, 896,
- 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909,
- 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922,
- 923, 924, 925, 926, 927, 928, 929, 930, 931, 932, 933, 934, 935,
- 936, 937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947, 948,
- 949, 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961,
- 962, 963, 964, 965, 966, 967, 968, 969, 970, 971, 972, 973, 974,
- 975, 976, 977, 978, 979, 980, 981, 982, 983, 984, 985, 986, 987,
- 988, 989, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999, 1000,
- 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010, 1011, 1012, 1013,
- 1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, default_scan_32x32[1024]) = {
- 0, 1, 32, 64, 33, 2, 3, 34, 65, 96, 128, 97, 66,
- 35, 4, 5, 36, 67, 98, 129, 160, 192, 161, 130, 99, 68,
- 37, 6, 7, 38, 69, 100, 131, 162, 193, 224, 256, 225, 194,
- 163, 132, 101, 70, 39, 8, 9, 40, 71, 102, 133, 164, 195,
- 226, 257, 288, 320, 289, 258, 227, 196, 165, 134, 103, 72, 41,
- 10, 11, 42, 73, 104, 135, 166, 197, 228, 259, 290, 321, 352,
- 384, 353, 322, 291, 260, 229, 198, 167, 136, 105, 74, 43, 12,
- 13, 44, 75, 106, 137, 168, 199, 230, 261, 292, 323, 354, 385,
- 416, 448, 417, 386, 355, 324, 293, 262, 231, 200, 169, 138, 107,
- 76, 45, 14, 15, 46, 77, 108, 139, 170, 201, 232, 263, 294,
- 325, 356, 387, 418, 449, 480, 512, 481, 450, 419, 388, 357, 326,
- 295, 264, 233, 202, 171, 140, 109, 78, 47, 16, 17, 48, 79,
- 110, 141, 172, 203, 234, 265, 296, 327, 358, 389, 420, 451, 482,
- 513, 544, 576, 545, 514, 483, 452, 421, 390, 359, 328, 297, 266,
- 235, 204, 173, 142, 111, 80, 49, 18, 19, 50, 81, 112, 143,
- 174, 205, 236, 267, 298, 329, 360, 391, 422, 453, 484, 515, 546,
- 577, 608, 640, 609, 578, 547, 516, 485, 454, 423, 392, 361, 330,
- 299, 268, 237, 206, 175, 144, 113, 82, 51, 20, 21, 52, 83,
- 114, 145, 176, 207, 238, 269, 300, 331, 362, 393, 424, 455, 486,
- 517, 548, 579, 610, 641, 672, 704, 673, 642, 611, 580, 549, 518,
- 487, 456, 425, 394, 363, 332, 301, 270, 239, 208, 177, 146, 115,
- 84, 53, 22, 23, 54, 85, 116, 147, 178, 209, 240, 271, 302,
- 333, 364, 395, 426, 457, 488, 519, 550, 581, 612, 643, 674, 705,
- 736, 768, 737, 706, 675, 644, 613, 582, 551, 520, 489, 458, 427,
- 396, 365, 334, 303, 272, 241, 210, 179, 148, 117, 86, 55, 24,
- 25, 56, 87, 118, 149, 180, 211, 242, 273, 304, 335, 366, 397,
- 428, 459, 490, 521, 552, 583, 614, 645, 676, 707, 738, 769, 800,
- 832, 801, 770, 739, 708, 677, 646, 615, 584, 553, 522, 491, 460,
- 429, 398, 367, 336, 305, 274, 243, 212, 181, 150, 119, 88, 57,
- 26, 27, 58, 89, 120, 151, 182, 213, 244, 275, 306, 337, 368,
- 399, 430, 461, 492, 523, 554, 585, 616, 647, 678, 709, 740, 771,
- 802, 833, 864, 896, 865, 834, 803, 772, 741, 710, 679, 648, 617,
- 586, 555, 524, 493, 462, 431, 400, 369, 338, 307, 276, 245, 214,
- 183, 152, 121, 90, 59, 28, 29, 60, 91, 122, 153, 184, 215,
- 246, 277, 308, 339, 370, 401, 432, 463, 494, 525, 556, 587, 618,
- 649, 680, 711, 742, 773, 804, 835, 866, 897, 928, 960, 929, 898,
- 867, 836, 805, 774, 743, 712, 681, 650, 619, 588, 557, 526, 495,
- 464, 433, 402, 371, 340, 309, 278, 247, 216, 185, 154, 123, 92,
- 61, 30, 31, 62, 93, 124, 155, 186, 217, 248, 279, 310, 341,
- 372, 403, 434, 465, 496, 527, 558, 589, 620, 651, 682, 713, 744,
- 775, 806, 837, 868, 899, 930, 961, 992, 993, 962, 931, 900, 869,
- 838, 807, 776, 745, 714, 683, 652, 621, 590, 559, 528, 497, 466,
- 435, 404, 373, 342, 311, 280, 249, 218, 187, 156, 125, 94, 63,
- 95, 126, 157, 188, 219, 250, 281, 312, 343, 374, 405, 436, 467,
- 498, 529, 560, 591, 622, 653, 684, 715, 746, 777, 808, 839, 870,
- 901, 932, 963, 994, 995, 964, 933, 902, 871, 840, 809, 778, 747,
- 716, 685, 654, 623, 592, 561, 530, 499, 468, 437, 406, 375, 344,
- 313, 282, 251, 220, 189, 158, 127, 159, 190, 221, 252, 283, 314,
- 345, 376, 407, 438, 469, 500, 531, 562, 593, 624, 655, 686, 717,
- 748, 779, 810, 841, 872, 903, 934, 965, 996, 997, 966, 935, 904,
- 873, 842, 811, 780, 749, 718, 687, 656, 625, 594, 563, 532, 501,
- 470, 439, 408, 377, 346, 315, 284, 253, 222, 191, 223, 254, 285,
- 316, 347, 378, 409, 440, 471, 502, 533, 564, 595, 626, 657, 688,
- 719, 750, 781, 812, 843, 874, 905, 936, 967, 998, 999, 968, 937,
- 906, 875, 844, 813, 782, 751, 720, 689, 658, 627, 596, 565, 534,
- 503, 472, 441, 410, 379, 348, 317, 286, 255, 287, 318, 349, 380,
- 411, 442, 473, 504, 535, 566, 597, 628, 659, 690, 721, 752, 783,
- 814, 845, 876, 907, 938, 969, 1000, 1001, 970, 939, 908, 877, 846,
- 815, 784, 753, 722, 691, 660, 629, 598, 567, 536, 505, 474, 443,
- 412, 381, 350, 319, 351, 382, 413, 444, 475, 506, 537, 568, 599,
- 630, 661, 692, 723, 754, 785, 816, 847, 878, 909, 940, 971, 1002,
- 1003, 972, 941, 910, 879, 848, 817, 786, 755, 724, 693, 662, 631,
- 600, 569, 538, 507, 476, 445, 414, 383, 415, 446, 477, 508, 539,
- 570, 601, 632, 663, 694, 725, 756, 787, 818, 849, 880, 911, 942,
- 973, 1004, 1005, 974, 943, 912, 881, 850, 819, 788, 757, 726, 695,
- 664, 633, 602, 571, 540, 509, 478, 447, 479, 510, 541, 572, 603,
- 634, 665, 696, 727, 758, 789, 820, 851, 882, 913, 944, 975, 1006,
- 1007, 976, 945, 914, 883, 852, 821, 790, 759, 728, 697, 666, 635,
- 604, 573, 542, 511, 543, 574, 605, 636, 667, 698, 729, 760, 791,
- 822, 853, 884, 915, 946, 977, 1008, 1009, 978, 947, 916, 885, 854,
- 823, 792, 761, 730, 699, 668, 637, 606, 575, 607, 638, 669, 700,
- 731, 762, 793, 824, 855, 886, 917, 948, 979, 1010, 1011, 980, 949,
- 918, 887, 856, 825, 794, 763, 732, 701, 670, 639, 671, 702, 733,
- 764, 795, 826, 857, 888, 919, 950, 981, 1012, 1013, 982, 951, 920,
- 889, 858, 827, 796, 765, 734, 703, 735, 766, 797, 828, 859, 890,
- 921, 952, 983, 1014, 1015, 984, 953, 922, 891, 860, 829, 798, 767,
- 799, 830, 861, 892, 923, 954, 985, 1016, 1017, 986, 955, 924, 893,
- 862, 831, 863, 894, 925, 956, 987, 1018, 1019, 988, 957, 926, 895,
- 927, 958, 989, 1020, 1021, 990, 959, 991, 1022, 1023
-};
-
-// Neighborhood 2-tuples for various scans and blocksizes,
-// in {top, left} order for each position in corresponding scan order.
-DECLARE_ALIGNED(16, static const int16_t,
- default_scan_4x4_neighbors[17 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 0, 0, 4, 4, 1, 4, 1, 1, 2, 2, 2, 5, 5,
- 8, 8, 8, 9, 12, 6, 9, 3, 6, 7, 10, 10, 13, 11, 14, 0, 0
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- mcol_scan_4x4_neighbors[17 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 4, 4, 8, 8, 0, 0, 1, 4, 5, 8, 9, 12, 1,
- 1, 2, 5, 6, 9, 10, 13, 2, 2, 3, 6, 7, 10, 11, 14, 0, 0,
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- mrow_scan_4x4_neighbors[17 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 1, 1, 2, 2, 0, 0, 1, 4, 2, 5, 3, 6, 4,
- 4, 5, 8, 6, 9, 7, 10, 8, 8, 9, 12, 10, 13, 11, 14, 0, 0,
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- default_scan_4x8_neighbors[33 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 0, 0, 1, 4, 1, 1, 4, 4, 2, 5, 5, 8, 6,
- 9, 2, 2, 8, 8, 3, 6, 9, 12, 7, 10, 10, 13, 12, 12, 13, 16,
- 11, 14, 14, 17, 15, 18, 16, 16, 17, 20, 18, 21, 19, 22, 20, 20, 21,
- 24, 22, 25, 23, 26, 24, 24, 25, 28, 26, 29, 27, 30, 0, 0
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- mcol_scan_4x8_neighbors[33 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 4, 4, 8, 8, 12, 12, 16, 16, 20, 20, 24, 24, 0,
- 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 1, 1,
- 2, 5, 6, 9, 10, 13, 14, 17, 18, 21, 22, 25, 26, 29, 2, 2, 3,
- 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 0, 0
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- mrow_scan_4x8_neighbors[33 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 1, 1, 2, 2, 0, 0, 1, 4, 2, 5, 3, 6, 4,
- 4, 5, 8, 6, 9, 7, 10, 8, 8, 9, 12, 10, 13, 11, 14, 12, 12,
- 13, 16, 14, 17, 15, 18, 16, 16, 17, 20, 18, 21, 19, 22, 20, 20, 21,
- 24, 22, 25, 23, 26, 24, 24, 25, 28, 26, 29, 27, 30, 0, 0
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- default_scan_8x4_neighbors[33 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 0, 0, 1, 8, 1, 1, 8, 8, 2, 9, 9, 16, 10,
- 17, 2, 2, 16, 16, 3, 10, 17, 24, 11, 18, 18, 25, 3, 3, 4, 11,
- 19, 26, 12, 19, 4, 4, 20, 27, 5, 12, 13, 20, 21, 28, 5, 5, 6,
- 13, 14, 21, 22, 29, 6, 6, 7, 14, 15, 22, 23, 30, 0, 0
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- mcol_scan_8x4_neighbors[33 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 8, 8, 16, 16, 0, 0, 1, 8, 9, 16, 17, 24, 1,
- 1, 2, 9, 10, 17, 18, 25, 2, 2, 3, 10, 11, 18, 19, 26, 3, 3,
- 4, 11, 12, 19, 20, 27, 4, 4, 5, 12, 13, 20, 21, 28, 5, 5, 6,
- 13, 14, 21, 22, 29, 6, 6, 7, 14, 15, 22, 23, 30, 0, 0
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- mrow_scan_8x4_neighbors[33 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 0,
- 0, 1, 8, 2, 9, 3, 10, 4, 11, 5, 12, 6, 13, 7, 14, 8, 8,
- 9, 16, 10, 17, 11, 18, 12, 19, 13, 20, 14, 21, 15, 22, 16, 16, 17,
- 24, 18, 25, 19, 26, 20, 27, 21, 28, 22, 29, 23, 30, 0, 0
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- default_scan_4x16_neighbors[65 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 0, 0, 1, 4, 1, 1, 4, 4, 2, 5, 5, 8, 6, 9, 2,
- 2, 8, 8, 3, 6, 9, 12, 7, 10, 10, 13, 12, 12, 13, 16, 11, 14, 14, 17,
- 15, 18, 16, 16, 17, 20, 18, 21, 19, 22, 20, 20, 21, 24, 22, 25, 23, 26, 24,
- 24, 25, 28, 26, 29, 27, 30, 28, 28, 29, 32, 30, 33, 31, 34, 32, 32, 33, 36,
- 34, 37, 35, 38, 36, 36, 37, 40, 38, 41, 39, 42, 40, 40, 41, 44, 42, 45, 43,
- 46, 44, 44, 45, 48, 46, 49, 47, 50, 48, 48, 49, 52, 50, 53, 51, 54, 52, 52,
- 53, 56, 54, 57, 55, 58, 56, 56, 57, 60, 58, 61, 59, 62, 0, 0
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- default_scan_16x4_neighbors[65 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 0, 0, 1, 16, 1, 1, 16, 16, 2, 17, 17, 32, 18, 33, 2,
- 2, 32, 32, 3, 18, 33, 48, 19, 34, 34, 49, 3, 3, 4, 19, 35, 50, 20, 35,
- 4, 4, 36, 51, 5, 20, 21, 36, 37, 52, 5, 5, 6, 21, 22, 37, 38, 53, 6,
- 6, 7, 22, 23, 38, 39, 54, 7, 7, 8, 23, 24, 39, 40, 55, 8, 8, 9, 24,
- 25, 40, 41, 56, 9, 9, 10, 25, 26, 41, 42, 57, 10, 10, 11, 26, 27, 42, 43,
- 58, 11, 11, 12, 27, 28, 43, 44, 59, 12, 12, 13, 28, 29, 44, 45, 60, 13, 13,
- 14, 29, 30, 45, 46, 61, 14, 14, 15, 30, 31, 46, 47, 62, 0, 0
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- mrow_scan_4x16_neighbors[65 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 1, 1, 2, 2, 0, 0, 1, 4, 2, 5, 3, 6, 4, 4, 5,
- 8, 6, 9, 7, 10, 8, 8, 9, 12, 10, 13, 11, 14, 12, 12, 13, 16, 14, 17,
- 15, 18, 16, 16, 17, 20, 18, 21, 19, 22, 20, 20, 21, 24, 22, 25, 23, 26, 24,
- 24, 25, 28, 26, 29, 27, 30, 28, 28, 29, 32, 30, 33, 31, 34, 32, 32, 33, 36,
- 34, 37, 35, 38, 36, 36, 37, 40, 38, 41, 39, 42, 40, 40, 41, 44, 42, 45, 43,
- 46, 44, 44, 45, 48, 46, 49, 47, 50, 48, 48, 49, 52, 50, 53, 51, 54, 52, 52,
- 53, 56, 54, 57, 55, 58, 56, 56, 57, 60, 58, 61, 59, 62, 0, 0
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- mrow_scan_16x4_neighbors[65 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8,
- 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 0, 0, 1, 16, 2, 17,
- 3, 18, 4, 19, 5, 20, 6, 21, 7, 22, 8, 23, 9, 24, 10, 25, 11, 26, 12,
- 27, 13, 28, 14, 29, 15, 30, 16, 16, 17, 32, 18, 33, 19, 34, 20, 35, 21, 36,
- 22, 37, 23, 38, 24, 39, 25, 40, 26, 41, 27, 42, 28, 43, 29, 44, 30, 45, 31,
- 46, 32, 32, 33, 48, 34, 49, 35, 50, 36, 51, 37, 52, 38, 53, 39, 54, 40, 55,
- 41, 56, 42, 57, 43, 58, 44, 59, 45, 60, 46, 61, 47, 62, 0, 0
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- mcol_scan_4x16_neighbors[65 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 4, 4, 8, 8, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28, 32,
- 32, 36, 36, 40, 40, 44, 44, 48, 48, 52, 52, 56, 56, 0, 0, 1, 4, 5, 8,
- 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29, 32, 33, 36, 37, 40, 41, 44, 45,
- 48, 49, 52, 53, 56, 57, 60, 1, 1, 2, 5, 6, 9, 10, 13, 14, 17, 18, 21,
- 22, 25, 26, 29, 30, 33, 34, 37, 38, 41, 42, 45, 46, 49, 50, 53, 54, 57, 58,
- 61, 2, 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31, 34,
- 35, 38, 39, 42, 43, 46, 47, 50, 51, 54, 55, 58, 59, 62, 0, 0
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- mcol_scan_16x4_neighbors[65 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 16, 16, 32, 32, 0, 0, 1, 16, 17, 32, 33, 48, 1, 1, 2,
- 17, 18, 33, 34, 49, 2, 2, 3, 18, 19, 34, 35, 50, 3, 3, 4, 19, 20, 35,
- 36, 51, 4, 4, 5, 20, 21, 36, 37, 52, 5, 5, 6, 21, 22, 37, 38, 53, 6,
- 6, 7, 22, 23, 38, 39, 54, 7, 7, 8, 23, 24, 39, 40, 55, 8, 8, 9, 24,
- 25, 40, 41, 56, 9, 9, 10, 25, 26, 41, 42, 57, 10, 10, 11, 26, 27, 42, 43,
- 58, 11, 11, 12, 27, 28, 43, 44, 59, 12, 12, 13, 28, 29, 44, 45, 60, 13, 13,
- 14, 29, 30, 45, 46, 61, 14, 14, 15, 30, 31, 46, 47, 62, 0, 0
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- default_scan_8x32_neighbors[257 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 0, 0, 1, 1, 1, 8, 8, 8, 2, 2, 2,
- 9, 9, 16, 16, 16, 3, 3, 3, 10, 10, 17, 17, 24, 24, 24,
- 4, 4, 4, 11, 11, 18, 18, 25, 25, 32, 32, 32, 5, 5, 5,
- 12, 12, 19, 19, 26, 26, 33, 33, 40, 40, 40, 6, 6, 6, 13,
- 13, 20, 20, 27, 27, 34, 34, 41, 41, 48, 48, 48, 7, 14, 14,
- 21, 21, 28, 28, 35, 35, 42, 42, 49, 49, 56, 56, 56, 15, 22,
- 22, 29, 29, 36, 36, 43, 43, 50, 50, 57, 57, 64, 64, 64, 23,
- 30, 30, 37, 37, 44, 44, 51, 51, 58, 58, 65, 65, 72, 72, 72,
- 31, 38, 38, 45, 45, 52, 52, 59, 59, 66, 66, 73, 73, 80, 80,
- 80, 39, 46, 46, 53, 53, 60, 60, 67, 67, 74, 74, 81, 81, 88,
- 88, 88, 47, 54, 54, 61, 61, 68, 68, 75, 75, 82, 82, 89, 89,
- 96, 96, 96, 55, 62, 62, 69, 69, 76, 76, 83, 83, 90, 90, 97,
- 97, 104, 104, 104, 63, 70, 70, 77, 77, 84, 84, 91, 91, 98, 98,
- 105, 105, 112, 112, 112, 71, 78, 78, 85, 85, 92, 92, 99, 99, 106,
- 106, 113, 113, 120, 120, 120, 79, 86, 86, 93, 93, 100, 100, 107, 107,
- 114, 114, 121, 121, 128, 128, 128, 87, 94, 94, 101, 101, 108, 108, 115,
- 115, 122, 122, 129, 129, 136, 136, 136, 95, 102, 102, 109, 109, 116, 116,
- 123, 123, 130, 130, 137, 137, 144, 144, 144, 103, 110, 110, 117, 117, 124,
- 124, 131, 131, 138, 138, 145, 145, 152, 152, 152, 111, 118, 118, 125, 125,
- 132, 132, 139, 139, 146, 146, 153, 153, 160, 160, 160, 119, 126, 126, 133,
- 133, 140, 140, 147, 147, 154, 154, 161, 161, 168, 168, 168, 127, 134, 134,
- 141, 141, 148, 148, 155, 155, 162, 162, 169, 169, 176, 176, 176, 135, 142,
- 142, 149, 149, 156, 156, 163, 163, 170, 170, 177, 177, 184, 184, 184, 143,
- 150, 150, 157, 157, 164, 164, 171, 171, 178, 178, 185, 185, 192, 192, 192,
- 151, 158, 158, 165, 165, 172, 172, 179, 179, 186, 186, 193, 193, 200, 200,
- 200, 159, 166, 166, 173, 173, 180, 180, 187, 187, 194, 194, 201, 201, 208,
- 208, 208, 167, 174, 174, 181, 181, 188, 188, 195, 195, 202, 202, 209, 209,
- 216, 216, 216, 175, 182, 182, 189, 189, 196, 196, 203, 203, 210, 210, 217,
- 217, 224, 224, 224, 183, 190, 190, 197, 197, 204, 204, 211, 211, 218, 218,
- 225, 225, 232, 232, 232, 191, 198, 198, 205, 205, 212, 212, 219, 219, 226,
- 226, 233, 233, 240, 240, 240, 199, 206, 206, 213, 213, 220, 220, 227, 227,
- 234, 234, 241, 241, 248, 207, 214, 214, 221, 221, 228, 228, 235, 235, 242,
- 242, 249, 215, 222, 222, 229, 229, 236, 236, 243, 243, 250, 223, 230, 230,
- 237, 237, 244, 244, 251, 231, 238, 238, 245, 245, 252, 239, 246, 246, 253,
- 247, 254, 0, 0
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- default_scan_32x8_neighbors[257 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 0, 0, 1, 1, 1, 32, 32, 32, 2, 2, 2,
- 33, 33, 64, 64, 64, 3, 3, 3, 34, 34, 65, 65, 96, 96, 96,
- 4, 4, 4, 35, 35, 66, 66, 97, 97, 128, 128, 128, 5, 5, 5,
- 36, 36, 67, 67, 98, 98, 129, 129, 160, 160, 160, 6, 6, 6, 37,
- 37, 68, 68, 99, 99, 130, 130, 161, 161, 192, 192, 192, 7, 7, 7,
- 38, 38, 69, 69, 100, 100, 131, 131, 162, 162, 193, 193, 224, 8, 8,
- 8, 39, 39, 70, 70, 101, 101, 132, 132, 163, 163, 194, 194, 225, 9,
- 9, 9, 40, 40, 71, 71, 102, 102, 133, 133, 164, 164, 195, 195, 226,
- 10, 10, 10, 41, 41, 72, 72, 103, 103, 134, 134, 165, 165, 196, 196,
- 227, 11, 11, 11, 42, 42, 73, 73, 104, 104, 135, 135, 166, 166, 197,
- 197, 228, 12, 12, 12, 43, 43, 74, 74, 105, 105, 136, 136, 167, 167,
- 198, 198, 229, 13, 13, 13, 44, 44, 75, 75, 106, 106, 137, 137, 168,
- 168, 199, 199, 230, 14, 14, 14, 45, 45, 76, 76, 107, 107, 138, 138,
- 169, 169, 200, 200, 231, 15, 15, 15, 46, 46, 77, 77, 108, 108, 139,
- 139, 170, 170, 201, 201, 232, 16, 16, 16, 47, 47, 78, 78, 109, 109,
- 140, 140, 171, 171, 202, 202, 233, 17, 17, 17, 48, 48, 79, 79, 110,
- 110, 141, 141, 172, 172, 203, 203, 234, 18, 18, 18, 49, 49, 80, 80,
- 111, 111, 142, 142, 173, 173, 204, 204, 235, 19, 19, 19, 50, 50, 81,
- 81, 112, 112, 143, 143, 174, 174, 205, 205, 236, 20, 20, 20, 51, 51,
- 82, 82, 113, 113, 144, 144, 175, 175, 206, 206, 237, 21, 21, 21, 52,
- 52, 83, 83, 114, 114, 145, 145, 176, 176, 207, 207, 238, 22, 22, 22,
- 53, 53, 84, 84, 115, 115, 146, 146, 177, 177, 208, 208, 239, 23, 23,
- 23, 54, 54, 85, 85, 116, 116, 147, 147, 178, 178, 209, 209, 240, 24,
- 24, 24, 55, 55, 86, 86, 117, 117, 148, 148, 179, 179, 210, 210, 241,
- 25, 25, 25, 56, 56, 87, 87, 118, 118, 149, 149, 180, 180, 211, 211,
- 242, 26, 26, 26, 57, 57, 88, 88, 119, 119, 150, 150, 181, 181, 212,
- 212, 243, 27, 27, 27, 58, 58, 89, 89, 120, 120, 151, 151, 182, 182,
- 213, 213, 244, 28, 28, 28, 59, 59, 90, 90, 121, 121, 152, 152, 183,
- 183, 214, 214, 245, 29, 29, 29, 60, 60, 91, 91, 122, 122, 153, 153,
- 184, 184, 215, 215, 246, 30, 30, 30, 61, 61, 92, 92, 123, 123, 154,
- 154, 185, 185, 216, 216, 247, 31, 62, 62, 93, 93, 124, 124, 155, 155,
- 186, 186, 217, 217, 248, 63, 94, 94, 125, 125, 156, 156, 187, 187, 218,
- 218, 249, 95, 126, 126, 157, 157, 188, 188, 219, 219, 250, 127, 158, 158,
- 189, 189, 220, 220, 251, 159, 190, 190, 221, 221, 252, 191, 222, 222, 253,
- 223, 254, 0, 0
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- mrow_scan_8x32_neighbors[257 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6,
- 6, 0, 0, 1, 8, 2, 9, 3, 10, 4, 11, 5, 12, 6, 13,
- 7, 14, 8, 8, 9, 16, 10, 17, 11, 18, 12, 19, 13, 20, 14,
- 21, 15, 22, 16, 16, 17, 24, 18, 25, 19, 26, 20, 27, 21, 28,
- 22, 29, 23, 30, 24, 24, 25, 32, 26, 33, 27, 34, 28, 35, 29,
- 36, 30, 37, 31, 38, 32, 32, 33, 40, 34, 41, 35, 42, 36, 43,
- 37, 44, 38, 45, 39, 46, 40, 40, 41, 48, 42, 49, 43, 50, 44,
- 51, 45, 52, 46, 53, 47, 54, 48, 48, 49, 56, 50, 57, 51, 58,
- 52, 59, 53, 60, 54, 61, 55, 62, 56, 56, 57, 64, 58, 65, 59,
- 66, 60, 67, 61, 68, 62, 69, 63, 70, 64, 64, 65, 72, 66, 73,
- 67, 74, 68, 75, 69, 76, 70, 77, 71, 78, 72, 72, 73, 80, 74,
- 81, 75, 82, 76, 83, 77, 84, 78, 85, 79, 86, 80, 80, 81, 88,
- 82, 89, 83, 90, 84, 91, 85, 92, 86, 93, 87, 94, 88, 88, 89,
- 96, 90, 97, 91, 98, 92, 99, 93, 100, 94, 101, 95, 102, 96, 96,
- 97, 104, 98, 105, 99, 106, 100, 107, 101, 108, 102, 109, 103, 110, 104,
- 104, 105, 112, 106, 113, 107, 114, 108, 115, 109, 116, 110, 117, 111, 118,
- 112, 112, 113, 120, 114, 121, 115, 122, 116, 123, 117, 124, 118, 125, 119,
- 126, 120, 120, 121, 128, 122, 129, 123, 130, 124, 131, 125, 132, 126, 133,
- 127, 134, 128, 128, 129, 136, 130, 137, 131, 138, 132, 139, 133, 140, 134,
- 141, 135, 142, 136, 136, 137, 144, 138, 145, 139, 146, 140, 147, 141, 148,
- 142, 149, 143, 150, 144, 144, 145, 152, 146, 153, 147, 154, 148, 155, 149,
- 156, 150, 157, 151, 158, 152, 152, 153, 160, 154, 161, 155, 162, 156, 163,
- 157, 164, 158, 165, 159, 166, 160, 160, 161, 168, 162, 169, 163, 170, 164,
- 171, 165, 172, 166, 173, 167, 174, 168, 168, 169, 176, 170, 177, 171, 178,
- 172, 179, 173, 180, 174, 181, 175, 182, 176, 176, 177, 184, 178, 185, 179,
- 186, 180, 187, 181, 188, 182, 189, 183, 190, 184, 184, 185, 192, 186, 193,
- 187, 194, 188, 195, 189, 196, 190, 197, 191, 198, 192, 192, 193, 200, 194,
- 201, 195, 202, 196, 203, 197, 204, 198, 205, 199, 206, 200, 200, 201, 208,
- 202, 209, 203, 210, 204, 211, 205, 212, 206, 213, 207, 214, 208, 208, 209,
- 216, 210, 217, 211, 218, 212, 219, 213, 220, 214, 221, 215, 222, 216, 216,
- 217, 224, 218, 225, 219, 226, 220, 227, 221, 228, 222, 229, 223, 230, 224,
- 224, 225, 232, 226, 233, 227, 234, 228, 235, 229, 236, 230, 237, 231, 238,
- 232, 232, 233, 240, 234, 241, 235, 242, 236, 243, 237, 244, 238, 245, 239,
- 246, 240, 240, 241, 248, 242, 249, 243, 250, 244, 251, 245, 252, 246, 253,
- 247, 254, 0, 0
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- mrow_scan_32x8_neighbors[257 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6,
- 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
- 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21,
- 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28,
- 29, 29, 30, 30, 0, 0, 1, 32, 2, 33, 3, 34, 4, 35, 5,
- 36, 6, 37, 7, 38, 8, 39, 9, 40, 10, 41, 11, 42, 12, 43,
- 13, 44, 14, 45, 15, 46, 16, 47, 17, 48, 18, 49, 19, 50, 20,
- 51, 21, 52, 22, 53, 23, 54, 24, 55, 25, 56, 26, 57, 27, 58,
- 28, 59, 29, 60, 30, 61, 31, 62, 32, 32, 33, 64, 34, 65, 35,
- 66, 36, 67, 37, 68, 38, 69, 39, 70, 40, 71, 41, 72, 42, 73,
- 43, 74, 44, 75, 45, 76, 46, 77, 47, 78, 48, 79, 49, 80, 50,
- 81, 51, 82, 52, 83, 53, 84, 54, 85, 55, 86, 56, 87, 57, 88,
- 58, 89, 59, 90, 60, 91, 61, 92, 62, 93, 63, 94, 64, 64, 65,
- 96, 66, 97, 67, 98, 68, 99, 69, 100, 70, 101, 71, 102, 72, 103,
- 73, 104, 74, 105, 75, 106, 76, 107, 77, 108, 78, 109, 79, 110, 80,
- 111, 81, 112, 82, 113, 83, 114, 84, 115, 85, 116, 86, 117, 87, 118,
- 88, 119, 89, 120, 90, 121, 91, 122, 92, 123, 93, 124, 94, 125, 95,
- 126, 96, 96, 97, 128, 98, 129, 99, 130, 100, 131, 101, 132, 102, 133,
- 103, 134, 104, 135, 105, 136, 106, 137, 107, 138, 108, 139, 109, 140, 110,
- 141, 111, 142, 112, 143, 113, 144, 114, 145, 115, 146, 116, 147, 117, 148,
- 118, 149, 119, 150, 120, 151, 121, 152, 122, 153, 123, 154, 124, 155, 125,
- 156, 126, 157, 127, 158, 128, 128, 129, 160, 130, 161, 131, 162, 132, 163,
- 133, 164, 134, 165, 135, 166, 136, 167, 137, 168, 138, 169, 139, 170, 140,
- 171, 141, 172, 142, 173, 143, 174, 144, 175, 145, 176, 146, 177, 147, 178,
- 148, 179, 149, 180, 150, 181, 151, 182, 152, 183, 153, 184, 154, 185, 155,
- 186, 156, 187, 157, 188, 158, 189, 159, 190, 160, 160, 161, 192, 162, 193,
- 163, 194, 164, 195, 165, 196, 166, 197, 167, 198, 168, 199, 169, 200, 170,
- 201, 171, 202, 172, 203, 173, 204, 174, 205, 175, 206, 176, 207, 177, 208,
- 178, 209, 179, 210, 180, 211, 181, 212, 182, 213, 183, 214, 184, 215, 185,
- 216, 186, 217, 187, 218, 188, 219, 189, 220, 190, 221, 191, 222, 192, 192,
- 193, 224, 194, 225, 195, 226, 196, 227, 197, 228, 198, 229, 199, 230, 200,
- 231, 201, 232, 202, 233, 203, 234, 204, 235, 205, 236, 206, 237, 207, 238,
- 208, 239, 209, 240, 210, 241, 211, 242, 212, 243, 213, 244, 214, 245, 215,
- 246, 216, 247, 217, 248, 218, 249, 219, 250, 220, 251, 221, 252, 222, 253,
- 223, 254, 0, 0
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- mcol_scan_8x32_neighbors[257 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 8, 8, 16, 16, 24, 24, 32, 32, 40, 40, 48,
- 48, 56, 56, 64, 64, 72, 72, 80, 80, 88, 88, 96, 96, 104, 104,
- 112, 112, 120, 120, 128, 128, 136, 136, 144, 144, 152, 152, 160, 160, 168,
- 168, 176, 176, 184, 184, 192, 192, 200, 200, 208, 208, 216, 216, 224, 224,
- 232, 232, 240, 240, 0, 0, 1, 8, 9, 16, 17, 24, 25, 32, 33,
- 40, 41, 48, 49, 56, 57, 64, 65, 72, 73, 80, 81, 88, 89, 96,
- 97, 104, 105, 112, 113, 120, 121, 128, 129, 136, 137, 144, 145, 152, 153,
- 160, 161, 168, 169, 176, 177, 184, 185, 192, 193, 200, 201, 208, 209, 216,
- 217, 224, 225, 232, 233, 240, 241, 248, 1, 1, 2, 9, 10, 17, 18,
- 25, 26, 33, 34, 41, 42, 49, 50, 57, 58, 65, 66, 73, 74, 81,
- 82, 89, 90, 97, 98, 105, 106, 113, 114, 121, 122, 129, 130, 137, 138,
- 145, 146, 153, 154, 161, 162, 169, 170, 177, 178, 185, 186, 193, 194, 201,
- 202, 209, 210, 217, 218, 225, 226, 233, 234, 241, 242, 249, 2, 2, 3,
- 10, 11, 18, 19, 26, 27, 34, 35, 42, 43, 50, 51, 58, 59, 66,
- 67, 74, 75, 82, 83, 90, 91, 98, 99, 106, 107, 114, 115, 122, 123,
- 130, 131, 138, 139, 146, 147, 154, 155, 162, 163, 170, 171, 178, 179, 186,
- 187, 194, 195, 202, 203, 210, 211, 218, 219, 226, 227, 234, 235, 242, 243,
- 250, 3, 3, 4, 11, 12, 19, 20, 27, 28, 35, 36, 43, 44, 51,
- 52, 59, 60, 67, 68, 75, 76, 83, 84, 91, 92, 99, 100, 107, 108,
- 115, 116, 123, 124, 131, 132, 139, 140, 147, 148, 155, 156, 163, 164, 171,
- 172, 179, 180, 187, 188, 195, 196, 203, 204, 211, 212, 219, 220, 227, 228,
- 235, 236, 243, 244, 251, 4, 4, 5, 12, 13, 20, 21, 28, 29, 36,
- 37, 44, 45, 52, 53, 60, 61, 68, 69, 76, 77, 84, 85, 92, 93,
- 100, 101, 108, 109, 116, 117, 124, 125, 132, 133, 140, 141, 148, 149, 156,
- 157, 164, 165, 172, 173, 180, 181, 188, 189, 196, 197, 204, 205, 212, 213,
- 220, 221, 228, 229, 236, 237, 244, 245, 252, 5, 5, 6, 13, 14, 21,
- 22, 29, 30, 37, 38, 45, 46, 53, 54, 61, 62, 69, 70, 77, 78,
- 85, 86, 93, 94, 101, 102, 109, 110, 117, 118, 125, 126, 133, 134, 141,
- 142, 149, 150, 157, 158, 165, 166, 173, 174, 181, 182, 189, 190, 197, 198,
- 205, 206, 213, 214, 221, 222, 229, 230, 237, 238, 245, 246, 253, 6, 6,
- 7, 14, 15, 22, 23, 30, 31, 38, 39, 46, 47, 54, 55, 62, 63,
- 70, 71, 78, 79, 86, 87, 94, 95, 102, 103, 110, 111, 118, 119, 126,
- 127, 134, 135, 142, 143, 150, 151, 158, 159, 166, 167, 174, 175, 182, 183,
- 190, 191, 198, 199, 206, 207, 214, 215, 222, 223, 230, 231, 238, 239, 246,
- 247, 254, 0, 0
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- mcol_scan_32x8_neighbors[257 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 32, 32, 64, 64, 96, 96, 128, 128, 160, 160, 192, 192,
- 0, 0, 1, 32, 33, 64, 65, 96, 97, 128, 129, 160, 161, 192, 193, 224,
- 1, 1, 2, 33, 34, 65, 66, 97, 98, 129, 130, 161, 162, 193, 194, 225,
- 2, 2, 3, 34, 35, 66, 67, 98, 99, 130, 131, 162, 163, 194, 195, 226,
- 3, 3, 4, 35, 36, 67, 68, 99, 100, 131, 132, 163, 164, 195, 196, 227,
- 4, 4, 5, 36, 37, 68, 69, 100, 101, 132, 133, 164, 165, 196, 197, 228,
- 5, 5, 6, 37, 38, 69, 70, 101, 102, 133, 134, 165, 166, 197, 198, 229,
- 6, 6, 7, 38, 39, 70, 71, 102, 103, 134, 135, 166, 167, 198, 199, 230,
- 7, 7, 8, 39, 40, 71, 72, 103, 104, 135, 136, 167, 168, 199, 200, 231,
- 8, 8, 9, 40, 41, 72, 73, 104, 105, 136, 137, 168, 169, 200, 201, 232,
- 9, 9, 10, 41, 42, 73, 74, 105, 106, 137, 138, 169, 170, 201, 202, 233,
- 10, 10, 11, 42, 43, 74, 75, 106, 107, 138, 139, 170, 171, 202, 203, 234,
- 11, 11, 12, 43, 44, 75, 76, 107, 108, 139, 140, 171, 172, 203, 204, 235,
- 12, 12, 13, 44, 45, 76, 77, 108, 109, 140, 141, 172, 173, 204, 205, 236,
- 13, 13, 14, 45, 46, 77, 78, 109, 110, 141, 142, 173, 174, 205, 206, 237,
- 14, 14, 15, 46, 47, 78, 79, 110, 111, 142, 143, 174, 175, 206, 207, 238,
- 15, 15, 16, 47, 48, 79, 80, 111, 112, 143, 144, 175, 176, 207, 208, 239,
- 16, 16, 17, 48, 49, 80, 81, 112, 113, 144, 145, 176, 177, 208, 209, 240,
- 17, 17, 18, 49, 50, 81, 82, 113, 114, 145, 146, 177, 178, 209, 210, 241,
- 18, 18, 19, 50, 51, 82, 83, 114, 115, 146, 147, 178, 179, 210, 211, 242,
- 19, 19, 20, 51, 52, 83, 84, 115, 116, 147, 148, 179, 180, 211, 212, 243,
- 20, 20, 21, 52, 53, 84, 85, 116, 117, 148, 149, 180, 181, 212, 213, 244,
- 21, 21, 22, 53, 54, 85, 86, 117, 118, 149, 150, 181, 182, 213, 214, 245,
- 22, 22, 23, 54, 55, 86, 87, 118, 119, 150, 151, 182, 183, 214, 215, 246,
- 23, 23, 24, 55, 56, 87, 88, 119, 120, 151, 152, 183, 184, 215, 216, 247,
- 24, 24, 25, 56, 57, 88, 89, 120, 121, 152, 153, 184, 185, 216, 217, 248,
- 25, 25, 26, 57, 58, 89, 90, 121, 122, 153, 154, 185, 186, 217, 218, 249,
- 26, 26, 27, 58, 59, 90, 91, 122, 123, 154, 155, 186, 187, 218, 219, 250,
- 27, 27, 28, 59, 60, 91, 92, 123, 124, 155, 156, 187, 188, 219, 220, 251,
- 28, 28, 29, 60, 61, 92, 93, 124, 125, 156, 157, 188, 189, 220, 221, 252,
- 29, 29, 30, 61, 62, 93, 94, 125, 126, 157, 158, 189, 190, 221, 222, 253,
- 30, 30, 31, 62, 63, 94, 95, 126, 127, 158, 159, 190, 191, 222, 223, 254,
- 0, 0
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- mcol_scan_8x8_neighbors[65 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 8, 8, 16, 16, 24, 24, 32, 32, 40, 40, 48, 48, 0, 0, 1,
- 8, 9, 16, 17, 24, 25, 32, 33, 40, 41, 48, 49, 56, 1, 1, 2, 9, 10, 17,
- 18, 25, 26, 33, 34, 41, 42, 49, 50, 57, 2, 2, 3, 10, 11, 18, 19, 26, 27,
- 34, 35, 42, 43, 50, 51, 58, 3, 3, 4, 11, 12, 19, 20, 27, 28, 35, 36, 43,
- 44, 51, 52, 59, 4, 4, 5, 12, 13, 20, 21, 28, 29, 36, 37, 44, 45, 52, 53,
- 60, 5, 5, 6, 13, 14, 21, 22, 29, 30, 37, 38, 45, 46, 53, 54, 61, 6, 6,
- 7, 14, 15, 22, 23, 30, 31, 38, 39, 46, 47, 54, 55, 62, 0, 0,
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- mrow_scan_8x8_neighbors[65 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 0, 0, 1,
- 8, 2, 9, 3, 10, 4, 11, 5, 12, 6, 13, 7, 14, 8, 8, 9, 16, 10, 17,
- 11, 18, 12, 19, 13, 20, 14, 21, 15, 22, 16, 16, 17, 24, 18, 25, 19, 26, 20,
- 27, 21, 28, 22, 29, 23, 30, 24, 24, 25, 32, 26, 33, 27, 34, 28, 35, 29, 36,
- 30, 37, 31, 38, 32, 32, 33, 40, 34, 41, 35, 42, 36, 43, 37, 44, 38, 45, 39,
- 46, 40, 40, 41, 48, 42, 49, 43, 50, 44, 51, 45, 52, 46, 53, 47, 54, 48, 48,
- 49, 56, 50, 57, 51, 58, 52, 59, 53, 60, 54, 61, 55, 62, 0, 0,
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- default_scan_8x8_neighbors[65 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 0, 0, 8, 8, 1, 8, 1, 1, 2, 2, 2, 9, 9, 16, 16,
- 16, 24, 24, 17, 24, 10, 17, 3, 10, 3, 3, 4, 4, 4, 11, 11, 18, 18, 25,
- 25, 32, 32, 32, 40, 40, 33, 40, 26, 33, 19, 26, 12, 19, 5, 12, 5, 5, 6,
- 6, 6, 13, 13, 20, 20, 27, 27, 34, 34, 41, 41, 48, 48, 48, 49, 56, 42, 49,
- 35, 42, 28, 35, 21, 28, 14, 21, 7, 14, 15, 22, 22, 29, 29, 36, 36, 43, 43,
- 50, 50, 57, 51, 58, 44, 51, 37, 44, 30, 37, 23, 30, 31, 38, 38, 45, 45, 52,
- 52, 59, 53, 60, 46, 53, 39, 46, 47, 54, 54, 61, 55, 62, 0, 0
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- default_scan_8x16_neighbors[129 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 0, 0, 1, 1, 1, 8, 8, 8, 2, 2, 2,
- 9, 9, 16, 16, 16, 3, 3, 3, 10, 10, 17, 17, 24, 24, 24,
- 4, 4, 4, 11, 11, 18, 18, 25, 25, 32, 32, 32, 5, 5, 5,
- 12, 12, 19, 19, 26, 26, 33, 33, 40, 40, 40, 6, 6, 6, 13,
- 13, 20, 20, 27, 27, 34, 34, 41, 41, 48, 48, 48, 7, 14, 14,
- 21, 21, 28, 28, 35, 35, 42, 42, 49, 49, 56, 56, 56, 15, 22,
- 22, 29, 29, 36, 36, 43, 43, 50, 50, 57, 57, 64, 64, 64, 23,
- 30, 30, 37, 37, 44, 44, 51, 51, 58, 58, 65, 65, 72, 72, 72,
- 31, 38, 38, 45, 45, 52, 52, 59, 59, 66, 66, 73, 73, 80, 80,
- 80, 39, 46, 46, 53, 53, 60, 60, 67, 67, 74, 74, 81, 81, 88,
- 88, 88, 47, 54, 54, 61, 61, 68, 68, 75, 75, 82, 82, 89, 89,
- 96, 96, 96, 55, 62, 62, 69, 69, 76, 76, 83, 83, 90, 90, 97,
- 97, 104, 104, 104, 63, 70, 70, 77, 77, 84, 84, 91, 91, 98, 98,
- 105, 105, 112, 112, 112, 71, 78, 78, 85, 85, 92, 92, 99, 99, 106,
- 106, 113, 113, 120, 79, 86, 86, 93, 93, 100, 100, 107, 107, 114, 114,
- 121, 87, 94, 94, 101, 101, 108, 108, 115, 115, 122, 95, 102, 102, 109,
- 109, 116, 116, 123, 103, 110, 110, 117, 117, 124, 111, 118, 118, 125, 119,
- 126, 0, 0
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- default_scan_16x8_neighbors[129 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 0, 0, 1, 1, 1, 16, 16, 16, 2, 2, 2,
- 17, 17, 32, 32, 32, 3, 3, 3, 18, 18, 33, 33, 48, 48, 48,
- 4, 4, 4, 19, 19, 34, 34, 49, 49, 64, 64, 64, 5, 5, 5,
- 20, 20, 35, 35, 50, 50, 65, 65, 80, 80, 80, 6, 6, 6, 21,
- 21, 36, 36, 51, 51, 66, 66, 81, 81, 96, 96, 96, 7, 7, 7,
- 22, 22, 37, 37, 52, 52, 67, 67, 82, 82, 97, 97, 112, 8, 8,
- 8, 23, 23, 38, 38, 53, 53, 68, 68, 83, 83, 98, 98, 113, 9,
- 9, 9, 24, 24, 39, 39, 54, 54, 69, 69, 84, 84, 99, 99, 114,
- 10, 10, 10, 25, 25, 40, 40, 55, 55, 70, 70, 85, 85, 100, 100,
- 115, 11, 11, 11, 26, 26, 41, 41, 56, 56, 71, 71, 86, 86, 101,
- 101, 116, 12, 12, 12, 27, 27, 42, 42, 57, 57, 72, 72, 87, 87,
- 102, 102, 117, 13, 13, 13, 28, 28, 43, 43, 58, 58, 73, 73, 88,
- 88, 103, 103, 118, 14, 14, 14, 29, 29, 44, 44, 59, 59, 74, 74,
- 89, 89, 104, 104, 119, 15, 30, 30, 45, 45, 60, 60, 75, 75, 90,
- 90, 105, 105, 120, 31, 46, 46, 61, 61, 76, 76, 91, 91, 106, 106,
- 121, 47, 62, 62, 77, 77, 92, 92, 107, 107, 122, 63, 78, 78, 93,
- 93, 108, 108, 123, 79, 94, 94, 109, 109, 124, 95, 110, 110, 125, 111,
- 126, 0, 0
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- mcol_scan_8x16_neighbors[129 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 8, 8, 16, 16, 24, 24, 32, 32, 40, 40, 48, 48,
- 56, 56, 64, 64, 72, 72, 80, 80, 88, 88, 96, 96, 104, 104, 112, 112,
- 0, 0, 1, 8, 9, 16, 17, 24, 25, 32, 33, 40, 41, 48, 49, 56,
- 57, 64, 65, 72, 73, 80, 81, 88, 89, 96, 97, 104, 105, 112, 113, 120,
- 1, 1, 2, 9, 10, 17, 18, 25, 26, 33, 34, 41, 42, 49, 50, 57,
- 58, 65, 66, 73, 74, 81, 82, 89, 90, 97, 98, 105, 106, 113, 114, 121,
- 2, 2, 3, 10, 11, 18, 19, 26, 27, 34, 35, 42, 43, 50, 51, 58,
- 59, 66, 67, 74, 75, 82, 83, 90, 91, 98, 99, 106, 107, 114, 115, 122,
- 3, 3, 4, 11, 12, 19, 20, 27, 28, 35, 36, 43, 44, 51, 52, 59,
- 60, 67, 68, 75, 76, 83, 84, 91, 92, 99, 100, 107, 108, 115, 116, 123,
- 4, 4, 5, 12, 13, 20, 21, 28, 29, 36, 37, 44, 45, 52, 53, 60,
- 61, 68, 69, 76, 77, 84, 85, 92, 93, 100, 101, 108, 109, 116, 117, 124,
- 5, 5, 6, 13, 14, 21, 22, 29, 30, 37, 38, 45, 46, 53, 54, 61,
- 62, 69, 70, 77, 78, 85, 86, 93, 94, 101, 102, 109, 110, 117, 118, 125,
- 6, 6, 7, 14, 15, 22, 23, 30, 31, 38, 39, 46, 47, 54, 55, 62,
- 63, 70, 71, 78, 79, 86, 87, 94, 95, 102, 103, 110, 111, 118, 119, 126,
- 0, 0
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- mcol_scan_16x8_neighbors[129 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 16, 16, 32, 32, 48, 48, 64, 64, 80, 80, 96, 96,
- 0, 0, 1, 16, 17, 32, 33, 48, 49, 64, 65, 80, 81, 96, 97, 112,
- 1, 1, 2, 17, 18, 33, 34, 49, 50, 65, 66, 81, 82, 97, 98, 113,
- 2, 2, 3, 18, 19, 34, 35, 50, 51, 66, 67, 82, 83, 98, 99, 114,
- 3, 3, 4, 19, 20, 35, 36, 51, 52, 67, 68, 83, 84, 99, 100, 115,
- 4, 4, 5, 20, 21, 36, 37, 52, 53, 68, 69, 84, 85, 100, 101, 116,
- 5, 5, 6, 21, 22, 37, 38, 53, 54, 69, 70, 85, 86, 101, 102, 117,
- 6, 6, 7, 22, 23, 38, 39, 54, 55, 70, 71, 86, 87, 102, 103, 118,
- 7, 7, 8, 23, 24, 39, 40, 55, 56, 71, 72, 87, 88, 103, 104, 119,
- 8, 8, 9, 24, 25, 40, 41, 56, 57, 72, 73, 88, 89, 104, 105, 120,
- 9, 9, 10, 25, 26, 41, 42, 57, 58, 73, 74, 89, 90, 105, 106, 121,
- 10, 10, 11, 26, 27, 42, 43, 58, 59, 74, 75, 90, 91, 106, 107, 122,
- 11, 11, 12, 27, 28, 43, 44, 59, 60, 75, 76, 91, 92, 107, 108, 123,
- 12, 12, 13, 28, 29, 44, 45, 60, 61, 76, 77, 92, 93, 108, 109, 124,
- 13, 13, 14, 29, 30, 45, 46, 61, 62, 77, 78, 93, 94, 109, 110, 125,
- 14, 14, 15, 30, 31, 46, 47, 62, 63, 78, 79, 94, 95, 110, 111, 126,
- 0, 0
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- mrow_scan_8x16_neighbors[129 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6,
- 6, 0, 0, 1, 8, 2, 9, 3, 10, 4, 11, 5, 12, 6, 13,
- 7, 14, 8, 8, 9, 16, 10, 17, 11, 18, 12, 19, 13, 20, 14,
- 21, 15, 22, 16, 16, 17, 24, 18, 25, 19, 26, 20, 27, 21, 28,
- 22, 29, 23, 30, 24, 24, 25, 32, 26, 33, 27, 34, 28, 35, 29,
- 36, 30, 37, 31, 38, 32, 32, 33, 40, 34, 41, 35, 42, 36, 43,
- 37, 44, 38, 45, 39, 46, 40, 40, 41, 48, 42, 49, 43, 50, 44,
- 51, 45, 52, 46, 53, 47, 54, 48, 48, 49, 56, 50, 57, 51, 58,
- 52, 59, 53, 60, 54, 61, 55, 62, 56, 56, 57, 64, 58, 65, 59,
- 66, 60, 67, 61, 68, 62, 69, 63, 70, 64, 64, 65, 72, 66, 73,
- 67, 74, 68, 75, 69, 76, 70, 77, 71, 78, 72, 72, 73, 80, 74,
- 81, 75, 82, 76, 83, 77, 84, 78, 85, 79, 86, 80, 80, 81, 88,
- 82, 89, 83, 90, 84, 91, 85, 92, 86, 93, 87, 94, 88, 88, 89,
- 96, 90, 97, 91, 98, 92, 99, 93, 100, 94, 101, 95, 102, 96, 96,
- 97, 104, 98, 105, 99, 106, 100, 107, 101, 108, 102, 109, 103, 110, 104,
- 104, 105, 112, 106, 113, 107, 114, 108, 115, 109, 116, 110, 117, 111, 118,
- 112, 112, 113, 120, 114, 121, 115, 122, 116, 123, 117, 124, 118, 125, 119,
- 126, 0, 0
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- mrow_scan_16x8_neighbors[129 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6,
- 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
- 14, 14, 0, 0, 1, 16, 2, 17, 3, 18, 4, 19, 5, 20, 6,
- 21, 7, 22, 8, 23, 9, 24, 10, 25, 11, 26, 12, 27, 13, 28,
- 14, 29, 15, 30, 16, 16, 17, 32, 18, 33, 19, 34, 20, 35, 21,
- 36, 22, 37, 23, 38, 24, 39, 25, 40, 26, 41, 27, 42, 28, 43,
- 29, 44, 30, 45, 31, 46, 32, 32, 33, 48, 34, 49, 35, 50, 36,
- 51, 37, 52, 38, 53, 39, 54, 40, 55, 41, 56, 42, 57, 43, 58,
- 44, 59, 45, 60, 46, 61, 47, 62, 48, 48, 49, 64, 50, 65, 51,
- 66, 52, 67, 53, 68, 54, 69, 55, 70, 56, 71, 57, 72, 58, 73,
- 59, 74, 60, 75, 61, 76, 62, 77, 63, 78, 64, 64, 65, 80, 66,
- 81, 67, 82, 68, 83, 69, 84, 70, 85, 71, 86, 72, 87, 73, 88,
- 74, 89, 75, 90, 76, 91, 77, 92, 78, 93, 79, 94, 80, 80, 81,
- 96, 82, 97, 83, 98, 84, 99, 85, 100, 86, 101, 87, 102, 88, 103,
- 89, 104, 90, 105, 91, 106, 92, 107, 93, 108, 94, 109, 95, 110, 96,
- 96, 97, 112, 98, 113, 99, 114, 100, 115, 101, 116, 102, 117, 103, 118,
- 104, 119, 105, 120, 106, 121, 107, 122, 108, 123, 109, 124, 110, 125, 111,
- 126, 0, 0
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- default_scan_16x32_neighbors[513 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 0, 0, 1, 1, 1, 16, 16, 16, 2, 2, 2,
- 17, 17, 32, 32, 32, 3, 3, 3, 18, 18, 33, 33, 48, 48, 48,
- 4, 4, 4, 19, 19, 34, 34, 49, 49, 64, 64, 64, 5, 5, 5,
- 20, 20, 35, 35, 50, 50, 65, 65, 80, 80, 80, 6, 6, 6, 21,
- 21, 36, 36, 51, 51, 66, 66, 81, 81, 96, 96, 96, 7, 7, 7,
- 22, 22, 37, 37, 52, 52, 67, 67, 82, 82, 97, 97, 112, 112, 112,
- 8, 8, 8, 23, 23, 38, 38, 53, 53, 68, 68, 83, 83, 98, 98,
- 113, 113, 128, 128, 128, 9, 9, 9, 24, 24, 39, 39, 54, 54, 69,
- 69, 84, 84, 99, 99, 114, 114, 129, 129, 144, 144, 144, 10, 10, 10,
- 25, 25, 40, 40, 55, 55, 70, 70, 85, 85, 100, 100, 115, 115, 130,
- 130, 145, 145, 160, 160, 160, 11, 11, 11, 26, 26, 41, 41, 56, 56,
- 71, 71, 86, 86, 101, 101, 116, 116, 131, 131, 146, 146, 161, 161, 176,
- 176, 176, 12, 12, 12, 27, 27, 42, 42, 57, 57, 72, 72, 87, 87,
- 102, 102, 117, 117, 132, 132, 147, 147, 162, 162, 177, 177, 192, 192, 192,
- 13, 13, 13, 28, 28, 43, 43, 58, 58, 73, 73, 88, 88, 103, 103,
- 118, 118, 133, 133, 148, 148, 163, 163, 178, 178, 193, 193, 208, 208, 208,
- 14, 14, 14, 29, 29, 44, 44, 59, 59, 74, 74, 89, 89, 104, 104,
- 119, 119, 134, 134, 149, 149, 164, 164, 179, 179, 194, 194, 209, 209, 224,
- 224, 224, 15, 30, 30, 45, 45, 60, 60, 75, 75, 90, 90, 105, 105,
- 120, 120, 135, 135, 150, 150, 165, 165, 180, 180, 195, 195, 210, 210, 225,
- 225, 240, 240, 240, 31, 46, 46, 61, 61, 76, 76, 91, 91, 106, 106,
- 121, 121, 136, 136, 151, 151, 166, 166, 181, 181, 196, 196, 211, 211, 226,
- 226, 241, 241, 256, 256, 256, 47, 62, 62, 77, 77, 92, 92, 107, 107,
- 122, 122, 137, 137, 152, 152, 167, 167, 182, 182, 197, 197, 212, 212, 227,
- 227, 242, 242, 257, 257, 272, 272, 272, 63, 78, 78, 93, 93, 108, 108,
- 123, 123, 138, 138, 153, 153, 168, 168, 183, 183, 198, 198, 213, 213, 228,
- 228, 243, 243, 258, 258, 273, 273, 288, 288, 288, 79, 94, 94, 109, 109,
- 124, 124, 139, 139, 154, 154, 169, 169, 184, 184, 199, 199, 214, 214, 229,
- 229, 244, 244, 259, 259, 274, 274, 289, 289, 304, 304, 304, 95, 110, 110,
- 125, 125, 140, 140, 155, 155, 170, 170, 185, 185, 200, 200, 215, 215, 230,
- 230, 245, 245, 260, 260, 275, 275, 290, 290, 305, 305, 320, 320, 320, 111,
- 126, 126, 141, 141, 156, 156, 171, 171, 186, 186, 201, 201, 216, 216, 231,
- 231, 246, 246, 261, 261, 276, 276, 291, 291, 306, 306, 321, 321, 336, 336,
- 336, 127, 142, 142, 157, 157, 172, 172, 187, 187, 202, 202, 217, 217, 232,
- 232, 247, 247, 262, 262, 277, 277, 292, 292, 307, 307, 322, 322, 337, 337,
- 352, 352, 352, 143, 158, 158, 173, 173, 188, 188, 203, 203, 218, 218, 233,
- 233, 248, 248, 263, 263, 278, 278, 293, 293, 308, 308, 323, 323, 338, 338,
- 353, 353, 368, 368, 368, 159, 174, 174, 189, 189, 204, 204, 219, 219, 234,
- 234, 249, 249, 264, 264, 279, 279, 294, 294, 309, 309, 324, 324, 339, 339,
- 354, 354, 369, 369, 384, 384, 384, 175, 190, 190, 205, 205, 220, 220, 235,
- 235, 250, 250, 265, 265, 280, 280, 295, 295, 310, 310, 325, 325, 340, 340,
- 355, 355, 370, 370, 385, 385, 400, 400, 400, 191, 206, 206, 221, 221, 236,
- 236, 251, 251, 266, 266, 281, 281, 296, 296, 311, 311, 326, 326, 341, 341,
- 356, 356, 371, 371, 386, 386, 401, 401, 416, 416, 416, 207, 222, 222, 237,
- 237, 252, 252, 267, 267, 282, 282, 297, 297, 312, 312, 327, 327, 342, 342,
- 357, 357, 372, 372, 387, 387, 402, 402, 417, 417, 432, 432, 432, 223, 238,
- 238, 253, 253, 268, 268, 283, 283, 298, 298, 313, 313, 328, 328, 343, 343,
- 358, 358, 373, 373, 388, 388, 403, 403, 418, 418, 433, 433, 448, 448, 448,
- 239, 254, 254, 269, 269, 284, 284, 299, 299, 314, 314, 329, 329, 344, 344,
- 359, 359, 374, 374, 389, 389, 404, 404, 419, 419, 434, 434, 449, 449, 464,
- 464, 464, 255, 270, 270, 285, 285, 300, 300, 315, 315, 330, 330, 345, 345,
- 360, 360, 375, 375, 390, 390, 405, 405, 420, 420, 435, 435, 450, 450, 465,
- 465, 480, 480, 480, 271, 286, 286, 301, 301, 316, 316, 331, 331, 346, 346,
- 361, 361, 376, 376, 391, 391, 406, 406, 421, 421, 436, 436, 451, 451, 466,
- 466, 481, 481, 496, 287, 302, 302, 317, 317, 332, 332, 347, 347, 362, 362,
- 377, 377, 392, 392, 407, 407, 422, 422, 437, 437, 452, 452, 467, 467, 482,
- 482, 497, 303, 318, 318, 333, 333, 348, 348, 363, 363, 378, 378, 393, 393,
- 408, 408, 423, 423, 438, 438, 453, 453, 468, 468, 483, 483, 498, 319, 334,
- 334, 349, 349, 364, 364, 379, 379, 394, 394, 409, 409, 424, 424, 439, 439,
- 454, 454, 469, 469, 484, 484, 499, 335, 350, 350, 365, 365, 380, 380, 395,
- 395, 410, 410, 425, 425, 440, 440, 455, 455, 470, 470, 485, 485, 500, 351,
- 366, 366, 381, 381, 396, 396, 411, 411, 426, 426, 441, 441, 456, 456, 471,
- 471, 486, 486, 501, 367, 382, 382, 397, 397, 412, 412, 427, 427, 442, 442,
- 457, 457, 472, 472, 487, 487, 502, 383, 398, 398, 413, 413, 428, 428, 443,
- 443, 458, 458, 473, 473, 488, 488, 503, 399, 414, 414, 429, 429, 444, 444,
- 459, 459, 474, 474, 489, 489, 504, 415, 430, 430, 445, 445, 460, 460, 475,
- 475, 490, 490, 505, 431, 446, 446, 461, 461, 476, 476, 491, 491, 506, 447,
- 462, 462, 477, 477, 492, 492, 507, 463, 478, 478, 493, 493, 508, 479, 494,
- 494, 509, 495, 510, 0, 0
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- default_scan_32x16_neighbors[513 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 0, 0, 1, 1, 1, 32, 32, 32, 2, 2, 2,
- 33, 33, 64, 64, 64, 3, 3, 3, 34, 34, 65, 65, 96, 96, 96,
- 4, 4, 4, 35, 35, 66, 66, 97, 97, 128, 128, 128, 5, 5, 5,
- 36, 36, 67, 67, 98, 98, 129, 129, 160, 160, 160, 6, 6, 6, 37,
- 37, 68, 68, 99, 99, 130, 130, 161, 161, 192, 192, 192, 7, 7, 7,
- 38, 38, 69, 69, 100, 100, 131, 131, 162, 162, 193, 193, 224, 224, 224,
- 8, 8, 8, 39, 39, 70, 70, 101, 101, 132, 132, 163, 163, 194, 194,
- 225, 225, 256, 256, 256, 9, 9, 9, 40, 40, 71, 71, 102, 102, 133,
- 133, 164, 164, 195, 195, 226, 226, 257, 257, 288, 288, 288, 10, 10, 10,
- 41, 41, 72, 72, 103, 103, 134, 134, 165, 165, 196, 196, 227, 227, 258,
- 258, 289, 289, 320, 320, 320, 11, 11, 11, 42, 42, 73, 73, 104, 104,
- 135, 135, 166, 166, 197, 197, 228, 228, 259, 259, 290, 290, 321, 321, 352,
- 352, 352, 12, 12, 12, 43, 43, 74, 74, 105, 105, 136, 136, 167, 167,
- 198, 198, 229, 229, 260, 260, 291, 291, 322, 322, 353, 353, 384, 384, 384,
- 13, 13, 13, 44, 44, 75, 75, 106, 106, 137, 137, 168, 168, 199, 199,
- 230, 230, 261, 261, 292, 292, 323, 323, 354, 354, 385, 385, 416, 416, 416,
- 14, 14, 14, 45, 45, 76, 76, 107, 107, 138, 138, 169, 169, 200, 200,
- 231, 231, 262, 262, 293, 293, 324, 324, 355, 355, 386, 386, 417, 417, 448,
- 448, 448, 15, 15, 15, 46, 46, 77, 77, 108, 108, 139, 139, 170, 170,
- 201, 201, 232, 232, 263, 263, 294, 294, 325, 325, 356, 356, 387, 387, 418,
- 418, 449, 449, 480, 16, 16, 16, 47, 47, 78, 78, 109, 109, 140, 140,
- 171, 171, 202, 202, 233, 233, 264, 264, 295, 295, 326, 326, 357, 357, 388,
- 388, 419, 419, 450, 450, 481, 17, 17, 17, 48, 48, 79, 79, 110, 110,
- 141, 141, 172, 172, 203, 203, 234, 234, 265, 265, 296, 296, 327, 327, 358,
- 358, 389, 389, 420, 420, 451, 451, 482, 18, 18, 18, 49, 49, 80, 80,
- 111, 111, 142, 142, 173, 173, 204, 204, 235, 235, 266, 266, 297, 297, 328,
- 328, 359, 359, 390, 390, 421, 421, 452, 452, 483, 19, 19, 19, 50, 50,
- 81, 81, 112, 112, 143, 143, 174, 174, 205, 205, 236, 236, 267, 267, 298,
- 298, 329, 329, 360, 360, 391, 391, 422, 422, 453, 453, 484, 20, 20, 20,
- 51, 51, 82, 82, 113, 113, 144, 144, 175, 175, 206, 206, 237, 237, 268,
- 268, 299, 299, 330, 330, 361, 361, 392, 392, 423, 423, 454, 454, 485, 21,
- 21, 21, 52, 52, 83, 83, 114, 114, 145, 145, 176, 176, 207, 207, 238,
- 238, 269, 269, 300, 300, 331, 331, 362, 362, 393, 393, 424, 424, 455, 455,
- 486, 22, 22, 22, 53, 53, 84, 84, 115, 115, 146, 146, 177, 177, 208,
- 208, 239, 239, 270, 270, 301, 301, 332, 332, 363, 363, 394, 394, 425, 425,
- 456, 456, 487, 23, 23, 23, 54, 54, 85, 85, 116, 116, 147, 147, 178,
- 178, 209, 209, 240, 240, 271, 271, 302, 302, 333, 333, 364, 364, 395, 395,
- 426, 426, 457, 457, 488, 24, 24, 24, 55, 55, 86, 86, 117, 117, 148,
- 148, 179, 179, 210, 210, 241, 241, 272, 272, 303, 303, 334, 334, 365, 365,
- 396, 396, 427, 427, 458, 458, 489, 25, 25, 25, 56, 56, 87, 87, 118,
- 118, 149, 149, 180, 180, 211, 211, 242, 242, 273, 273, 304, 304, 335, 335,
- 366, 366, 397, 397, 428, 428, 459, 459, 490, 26, 26, 26, 57, 57, 88,
- 88, 119, 119, 150, 150, 181, 181, 212, 212, 243, 243, 274, 274, 305, 305,
- 336, 336, 367, 367, 398, 398, 429, 429, 460, 460, 491, 27, 27, 27, 58,
- 58, 89, 89, 120, 120, 151, 151, 182, 182, 213, 213, 244, 244, 275, 275,
- 306, 306, 337, 337, 368, 368, 399, 399, 430, 430, 461, 461, 492, 28, 28,
- 28, 59, 59, 90, 90, 121, 121, 152, 152, 183, 183, 214, 214, 245, 245,
- 276, 276, 307, 307, 338, 338, 369, 369, 400, 400, 431, 431, 462, 462, 493,
- 29, 29, 29, 60, 60, 91, 91, 122, 122, 153, 153, 184, 184, 215, 215,
- 246, 246, 277, 277, 308, 308, 339, 339, 370, 370, 401, 401, 432, 432, 463,
- 463, 494, 30, 30, 30, 61, 61, 92, 92, 123, 123, 154, 154, 185, 185,
- 216, 216, 247, 247, 278, 278, 309, 309, 340, 340, 371, 371, 402, 402, 433,
- 433, 464, 464, 495, 31, 62, 62, 93, 93, 124, 124, 155, 155, 186, 186,
- 217, 217, 248, 248, 279, 279, 310, 310, 341, 341, 372, 372, 403, 403, 434,
- 434, 465, 465, 496, 63, 94, 94, 125, 125, 156, 156, 187, 187, 218, 218,
- 249, 249, 280, 280, 311, 311, 342, 342, 373, 373, 404, 404, 435, 435, 466,
- 466, 497, 95, 126, 126, 157, 157, 188, 188, 219, 219, 250, 250, 281, 281,
- 312, 312, 343, 343, 374, 374, 405, 405, 436, 436, 467, 467, 498, 127, 158,
- 158, 189, 189, 220, 220, 251, 251, 282, 282, 313, 313, 344, 344, 375, 375,
- 406, 406, 437, 437, 468, 468, 499, 159, 190, 190, 221, 221, 252, 252, 283,
- 283, 314, 314, 345, 345, 376, 376, 407, 407, 438, 438, 469, 469, 500, 191,
- 222, 222, 253, 253, 284, 284, 315, 315, 346, 346, 377, 377, 408, 408, 439,
- 439, 470, 470, 501, 223, 254, 254, 285, 285, 316, 316, 347, 347, 378, 378,
- 409, 409, 440, 440, 471, 471, 502, 255, 286, 286, 317, 317, 348, 348, 379,
- 379, 410, 410, 441, 441, 472, 472, 503, 287, 318, 318, 349, 349, 380, 380,
- 411, 411, 442, 442, 473, 473, 504, 319, 350, 350, 381, 381, 412, 412, 443,
- 443, 474, 474, 505, 351, 382, 382, 413, 413, 444, 444, 475, 475, 506, 383,
- 414, 414, 445, 445, 476, 476, 507, 415, 446, 446, 477, 477, 508, 447, 478,
- 478, 509, 479, 510, 0, 0
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- mcol_scan_16x32_neighbors[513 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 16, 16, 32, 32, 48, 48, 64, 64, 80, 80, 96,
- 96, 112, 112, 128, 128, 144, 144, 160, 160, 176, 176, 192, 192, 208, 208,
- 224, 224, 240, 240, 256, 256, 272, 272, 288, 288, 304, 304, 320, 320, 336,
- 336, 352, 352, 368, 368, 384, 384, 400, 400, 416, 416, 432, 432, 448, 448,
- 464, 464, 480, 480, 0, 0, 1, 16, 17, 32, 33, 48, 49, 64, 65,
- 80, 81, 96, 97, 112, 113, 128, 129, 144, 145, 160, 161, 176, 177, 192,
- 193, 208, 209, 224, 225, 240, 241, 256, 257, 272, 273, 288, 289, 304, 305,
- 320, 321, 336, 337, 352, 353, 368, 369, 384, 385, 400, 401, 416, 417, 432,
- 433, 448, 449, 464, 465, 480, 481, 496, 1, 1, 2, 17, 18, 33, 34,
- 49, 50, 65, 66, 81, 82, 97, 98, 113, 114, 129, 130, 145, 146, 161,
- 162, 177, 178, 193, 194, 209, 210, 225, 226, 241, 242, 257, 258, 273, 274,
- 289, 290, 305, 306, 321, 322, 337, 338, 353, 354, 369, 370, 385, 386, 401,
- 402, 417, 418, 433, 434, 449, 450, 465, 466, 481, 482, 497, 2, 2, 3,
- 18, 19, 34, 35, 50, 51, 66, 67, 82, 83, 98, 99, 114, 115, 130,
- 131, 146, 147, 162, 163, 178, 179, 194, 195, 210, 211, 226, 227, 242, 243,
- 258, 259, 274, 275, 290, 291, 306, 307, 322, 323, 338, 339, 354, 355, 370,
- 371, 386, 387, 402, 403, 418, 419, 434, 435, 450, 451, 466, 467, 482, 483,
- 498, 3, 3, 4, 19, 20, 35, 36, 51, 52, 67, 68, 83, 84, 99,
- 100, 115, 116, 131, 132, 147, 148, 163, 164, 179, 180, 195, 196, 211, 212,
- 227, 228, 243, 244, 259, 260, 275, 276, 291, 292, 307, 308, 323, 324, 339,
- 340, 355, 356, 371, 372, 387, 388, 403, 404, 419, 420, 435, 436, 451, 452,
- 467, 468, 483, 484, 499, 4, 4, 5, 20, 21, 36, 37, 52, 53, 68,
- 69, 84, 85, 100, 101, 116, 117, 132, 133, 148, 149, 164, 165, 180, 181,
- 196, 197, 212, 213, 228, 229, 244, 245, 260, 261, 276, 277, 292, 293, 308,
- 309, 324, 325, 340, 341, 356, 357, 372, 373, 388, 389, 404, 405, 420, 421,
- 436, 437, 452, 453, 468, 469, 484, 485, 500, 5, 5, 6, 21, 22, 37,
- 38, 53, 54, 69, 70, 85, 86, 101, 102, 117, 118, 133, 134, 149, 150,
- 165, 166, 181, 182, 197, 198, 213, 214, 229, 230, 245, 246, 261, 262, 277,
- 278, 293, 294, 309, 310, 325, 326, 341, 342, 357, 358, 373, 374, 389, 390,
- 405, 406, 421, 422, 437, 438, 453, 454, 469, 470, 485, 486, 501, 6, 6,
- 7, 22, 23, 38, 39, 54, 55, 70, 71, 86, 87, 102, 103, 118, 119,
- 134, 135, 150, 151, 166, 167, 182, 183, 198, 199, 214, 215, 230, 231, 246,
- 247, 262, 263, 278, 279, 294, 295, 310, 311, 326, 327, 342, 343, 358, 359,
- 374, 375, 390, 391, 406, 407, 422, 423, 438, 439, 454, 455, 470, 471, 486,
- 487, 502, 7, 7, 8, 23, 24, 39, 40, 55, 56, 71, 72, 87, 88,
- 103, 104, 119, 120, 135, 136, 151, 152, 167, 168, 183, 184, 199, 200, 215,
- 216, 231, 232, 247, 248, 263, 264, 279, 280, 295, 296, 311, 312, 327, 328,
- 343, 344, 359, 360, 375, 376, 391, 392, 407, 408, 423, 424, 439, 440, 455,
- 456, 471, 472, 487, 488, 503, 8, 8, 9, 24, 25, 40, 41, 56, 57,
- 72, 73, 88, 89, 104, 105, 120, 121, 136, 137, 152, 153, 168, 169, 184,
- 185, 200, 201, 216, 217, 232, 233, 248, 249, 264, 265, 280, 281, 296, 297,
- 312, 313, 328, 329, 344, 345, 360, 361, 376, 377, 392, 393, 408, 409, 424,
- 425, 440, 441, 456, 457, 472, 473, 488, 489, 504, 9, 9, 10, 25, 26,
- 41, 42, 57, 58, 73, 74, 89, 90, 105, 106, 121, 122, 137, 138, 153,
- 154, 169, 170, 185, 186, 201, 202, 217, 218, 233, 234, 249, 250, 265, 266,
- 281, 282, 297, 298, 313, 314, 329, 330, 345, 346, 361, 362, 377, 378, 393,
- 394, 409, 410, 425, 426, 441, 442, 457, 458, 473, 474, 489, 490, 505, 10,
- 10, 11, 26, 27, 42, 43, 58, 59, 74, 75, 90, 91, 106, 107, 122,
- 123, 138, 139, 154, 155, 170, 171, 186, 187, 202, 203, 218, 219, 234, 235,
- 250, 251, 266, 267, 282, 283, 298, 299, 314, 315, 330, 331, 346, 347, 362,
- 363, 378, 379, 394, 395, 410, 411, 426, 427, 442, 443, 458, 459, 474, 475,
- 490, 491, 506, 11, 11, 12, 27, 28, 43, 44, 59, 60, 75, 76, 91,
- 92, 107, 108, 123, 124, 139, 140, 155, 156, 171, 172, 187, 188, 203, 204,
- 219, 220, 235, 236, 251, 252, 267, 268, 283, 284, 299, 300, 315, 316, 331,
- 332, 347, 348, 363, 364, 379, 380, 395, 396, 411, 412, 427, 428, 443, 444,
- 459, 460, 475, 476, 491, 492, 507, 12, 12, 13, 28, 29, 44, 45, 60,
- 61, 76, 77, 92, 93, 108, 109, 124, 125, 140, 141, 156, 157, 172, 173,
- 188, 189, 204, 205, 220, 221, 236, 237, 252, 253, 268, 269, 284, 285, 300,
- 301, 316, 317, 332, 333, 348, 349, 364, 365, 380, 381, 396, 397, 412, 413,
- 428, 429, 444, 445, 460, 461, 476, 477, 492, 493, 508, 13, 13, 14, 29,
- 30, 45, 46, 61, 62, 77, 78, 93, 94, 109, 110, 125, 126, 141, 142,
- 157, 158, 173, 174, 189, 190, 205, 206, 221, 222, 237, 238, 253, 254, 269,
- 270, 285, 286, 301, 302, 317, 318, 333, 334, 349, 350, 365, 366, 381, 382,
- 397, 398, 413, 414, 429, 430, 445, 446, 461, 462, 477, 478, 493, 494, 509,
- 14, 14, 15, 30, 31, 46, 47, 62, 63, 78, 79, 94, 95, 110, 111,
- 126, 127, 142, 143, 158, 159, 174, 175, 190, 191, 206, 207, 222, 223, 238,
- 239, 254, 255, 270, 271, 286, 287, 302, 303, 318, 319, 334, 335, 350, 351,
- 366, 367, 382, 383, 398, 399, 414, 415, 430, 431, 446, 447, 462, 463, 478,
- 479, 494, 495, 510, 0, 0
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- mcol_scan_32x16_neighbors[513 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 32, 32, 64, 64, 96, 96, 128, 128, 160, 160, 192,
- 192, 224, 224, 256, 256, 288, 288, 320, 320, 352, 352, 384, 384, 416, 416,
- 448, 448, 0, 0, 1, 32, 33, 64, 65, 96, 97, 128, 129, 160, 161,
- 192, 193, 224, 225, 256, 257, 288, 289, 320, 321, 352, 353, 384, 385, 416,
- 417, 448, 449, 480, 1, 1, 2, 33, 34, 65, 66, 97, 98, 129, 130,
- 161, 162, 193, 194, 225, 226, 257, 258, 289, 290, 321, 322, 353, 354, 385,
- 386, 417, 418, 449, 450, 481, 2, 2, 3, 34, 35, 66, 67, 98, 99,
- 130, 131, 162, 163, 194, 195, 226, 227, 258, 259, 290, 291, 322, 323, 354,
- 355, 386, 387, 418, 419, 450, 451, 482, 3, 3, 4, 35, 36, 67, 68,
- 99, 100, 131, 132, 163, 164, 195, 196, 227, 228, 259, 260, 291, 292, 323,
- 324, 355, 356, 387, 388, 419, 420, 451, 452, 483, 4, 4, 5, 36, 37,
- 68, 69, 100, 101, 132, 133, 164, 165, 196, 197, 228, 229, 260, 261, 292,
- 293, 324, 325, 356, 357, 388, 389, 420, 421, 452, 453, 484, 5, 5, 6,
- 37, 38, 69, 70, 101, 102, 133, 134, 165, 166, 197, 198, 229, 230, 261,
- 262, 293, 294, 325, 326, 357, 358, 389, 390, 421, 422, 453, 454, 485, 6,
- 6, 7, 38, 39, 70, 71, 102, 103, 134, 135, 166, 167, 198, 199, 230,
- 231, 262, 263, 294, 295, 326, 327, 358, 359, 390, 391, 422, 423, 454, 455,
- 486, 7, 7, 8, 39, 40, 71, 72, 103, 104, 135, 136, 167, 168, 199,
- 200, 231, 232, 263, 264, 295, 296, 327, 328, 359, 360, 391, 392, 423, 424,
- 455, 456, 487, 8, 8, 9, 40, 41, 72, 73, 104, 105, 136, 137, 168,
- 169, 200, 201, 232, 233, 264, 265, 296, 297, 328, 329, 360, 361, 392, 393,
- 424, 425, 456, 457, 488, 9, 9, 10, 41, 42, 73, 74, 105, 106, 137,
- 138, 169, 170, 201, 202, 233, 234, 265, 266, 297, 298, 329, 330, 361, 362,
- 393, 394, 425, 426, 457, 458, 489, 10, 10, 11, 42, 43, 74, 75, 106,
- 107, 138, 139, 170, 171, 202, 203, 234, 235, 266, 267, 298, 299, 330, 331,
- 362, 363, 394, 395, 426, 427, 458, 459, 490, 11, 11, 12, 43, 44, 75,
- 76, 107, 108, 139, 140, 171, 172, 203, 204, 235, 236, 267, 268, 299, 300,
- 331, 332, 363, 364, 395, 396, 427, 428, 459, 460, 491, 12, 12, 13, 44,
- 45, 76, 77, 108, 109, 140, 141, 172, 173, 204, 205, 236, 237, 268, 269,
- 300, 301, 332, 333, 364, 365, 396, 397, 428, 429, 460, 461, 492, 13, 13,
- 14, 45, 46, 77, 78, 109, 110, 141, 142, 173, 174, 205, 206, 237, 238,
- 269, 270, 301, 302, 333, 334, 365, 366, 397, 398, 429, 430, 461, 462, 493,
- 14, 14, 15, 46, 47, 78, 79, 110, 111, 142, 143, 174, 175, 206, 207,
- 238, 239, 270, 271, 302, 303, 334, 335, 366, 367, 398, 399, 430, 431, 462,
- 463, 494, 15, 15, 16, 47, 48, 79, 80, 111, 112, 143, 144, 175, 176,
- 207, 208, 239, 240, 271, 272, 303, 304, 335, 336, 367, 368, 399, 400, 431,
- 432, 463, 464, 495, 16, 16, 17, 48, 49, 80, 81, 112, 113, 144, 145,
- 176, 177, 208, 209, 240, 241, 272, 273, 304, 305, 336, 337, 368, 369, 400,
- 401, 432, 433, 464, 465, 496, 17, 17, 18, 49, 50, 81, 82, 113, 114,
- 145, 146, 177, 178, 209, 210, 241, 242, 273, 274, 305, 306, 337, 338, 369,
- 370, 401, 402, 433, 434, 465, 466, 497, 18, 18, 19, 50, 51, 82, 83,
- 114, 115, 146, 147, 178, 179, 210, 211, 242, 243, 274, 275, 306, 307, 338,
- 339, 370, 371, 402, 403, 434, 435, 466, 467, 498, 19, 19, 20, 51, 52,
- 83, 84, 115, 116, 147, 148, 179, 180, 211, 212, 243, 244, 275, 276, 307,
- 308, 339, 340, 371, 372, 403, 404, 435, 436, 467, 468, 499, 20, 20, 21,
- 52, 53, 84, 85, 116, 117, 148, 149, 180, 181, 212, 213, 244, 245, 276,
- 277, 308, 309, 340, 341, 372, 373, 404, 405, 436, 437, 468, 469, 500, 21,
- 21, 22, 53, 54, 85, 86, 117, 118, 149, 150, 181, 182, 213, 214, 245,
- 246, 277, 278, 309, 310, 341, 342, 373, 374, 405, 406, 437, 438, 469, 470,
- 501, 22, 22, 23, 54, 55, 86, 87, 118, 119, 150, 151, 182, 183, 214,
- 215, 246, 247, 278, 279, 310, 311, 342, 343, 374, 375, 406, 407, 438, 439,
- 470, 471, 502, 23, 23, 24, 55, 56, 87, 88, 119, 120, 151, 152, 183,
- 184, 215, 216, 247, 248, 279, 280, 311, 312, 343, 344, 375, 376, 407, 408,
- 439, 440, 471, 472, 503, 24, 24, 25, 56, 57, 88, 89, 120, 121, 152,
- 153, 184, 185, 216, 217, 248, 249, 280, 281, 312, 313, 344, 345, 376, 377,
- 408, 409, 440, 441, 472, 473, 504, 25, 25, 26, 57, 58, 89, 90, 121,
- 122, 153, 154, 185, 186, 217, 218, 249, 250, 281, 282, 313, 314, 345, 346,
- 377, 378, 409, 410, 441, 442, 473, 474, 505, 26, 26, 27, 58, 59, 90,
- 91, 122, 123, 154, 155, 186, 187, 218, 219, 250, 251, 282, 283, 314, 315,
- 346, 347, 378, 379, 410, 411, 442, 443, 474, 475, 506, 27, 27, 28, 59,
- 60, 91, 92, 123, 124, 155, 156, 187, 188, 219, 220, 251, 252, 283, 284,
- 315, 316, 347, 348, 379, 380, 411, 412, 443, 444, 475, 476, 507, 28, 28,
- 29, 60, 61, 92, 93, 124, 125, 156, 157, 188, 189, 220, 221, 252, 253,
- 284, 285, 316, 317, 348, 349, 380, 381, 412, 413, 444, 445, 476, 477, 508,
- 29, 29, 30, 61, 62, 93, 94, 125, 126, 157, 158, 189, 190, 221, 222,
- 253, 254, 285, 286, 317, 318, 349, 350, 381, 382, 413, 414, 445, 446, 477,
- 478, 509, 30, 30, 31, 62, 63, 94, 95, 126, 127, 158, 159, 190, 191,
- 222, 223, 254, 255, 286, 287, 318, 319, 350, 351, 382, 383, 414, 415, 446,
- 447, 478, 479, 510, 0, 0
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- mrow_scan_16x32_neighbors[513 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6,
- 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
- 14, 14, 0, 0, 1, 16, 2, 17, 3, 18, 4, 19, 5, 20, 6,
- 21, 7, 22, 8, 23, 9, 24, 10, 25, 11, 26, 12, 27, 13, 28,
- 14, 29, 15, 30, 16, 16, 17, 32, 18, 33, 19, 34, 20, 35, 21,
- 36, 22, 37, 23, 38, 24, 39, 25, 40, 26, 41, 27, 42, 28, 43,
- 29, 44, 30, 45, 31, 46, 32, 32, 33, 48, 34, 49, 35, 50, 36,
- 51, 37, 52, 38, 53, 39, 54, 40, 55, 41, 56, 42, 57, 43, 58,
- 44, 59, 45, 60, 46, 61, 47, 62, 48, 48, 49, 64, 50, 65, 51,
- 66, 52, 67, 53, 68, 54, 69, 55, 70, 56, 71, 57, 72, 58, 73,
- 59, 74, 60, 75, 61, 76, 62, 77, 63, 78, 64, 64, 65, 80, 66,
- 81, 67, 82, 68, 83, 69, 84, 70, 85, 71, 86, 72, 87, 73, 88,
- 74, 89, 75, 90, 76, 91, 77, 92, 78, 93, 79, 94, 80, 80, 81,
- 96, 82, 97, 83, 98, 84, 99, 85, 100, 86, 101, 87, 102, 88, 103,
- 89, 104, 90, 105, 91, 106, 92, 107, 93, 108, 94, 109, 95, 110, 96,
- 96, 97, 112, 98, 113, 99, 114, 100, 115, 101, 116, 102, 117, 103, 118,
- 104, 119, 105, 120, 106, 121, 107, 122, 108, 123, 109, 124, 110, 125, 111,
- 126, 112, 112, 113, 128, 114, 129, 115, 130, 116, 131, 117, 132, 118, 133,
- 119, 134, 120, 135, 121, 136, 122, 137, 123, 138, 124, 139, 125, 140, 126,
- 141, 127, 142, 128, 128, 129, 144, 130, 145, 131, 146, 132, 147, 133, 148,
- 134, 149, 135, 150, 136, 151, 137, 152, 138, 153, 139, 154, 140, 155, 141,
- 156, 142, 157, 143, 158, 144, 144, 145, 160, 146, 161, 147, 162, 148, 163,
- 149, 164, 150, 165, 151, 166, 152, 167, 153, 168, 154, 169, 155, 170, 156,
- 171, 157, 172, 158, 173, 159, 174, 160, 160, 161, 176, 162, 177, 163, 178,
- 164, 179, 165, 180, 166, 181, 167, 182, 168, 183, 169, 184, 170, 185, 171,
- 186, 172, 187, 173, 188, 174, 189, 175, 190, 176, 176, 177, 192, 178, 193,
- 179, 194, 180, 195, 181, 196, 182, 197, 183, 198, 184, 199, 185, 200, 186,
- 201, 187, 202, 188, 203, 189, 204, 190, 205, 191, 206, 192, 192, 193, 208,
- 194, 209, 195, 210, 196, 211, 197, 212, 198, 213, 199, 214, 200, 215, 201,
- 216, 202, 217, 203, 218, 204, 219, 205, 220, 206, 221, 207, 222, 208, 208,
- 209, 224, 210, 225, 211, 226, 212, 227, 213, 228, 214, 229, 215, 230, 216,
- 231, 217, 232, 218, 233, 219, 234, 220, 235, 221, 236, 222, 237, 223, 238,
- 224, 224, 225, 240, 226, 241, 227, 242, 228, 243, 229, 244, 230, 245, 231,
- 246, 232, 247, 233, 248, 234, 249, 235, 250, 236, 251, 237, 252, 238, 253,
- 239, 254, 240, 240, 241, 256, 242, 257, 243, 258, 244, 259, 245, 260, 246,
- 261, 247, 262, 248, 263, 249, 264, 250, 265, 251, 266, 252, 267, 253, 268,
- 254, 269, 255, 270, 256, 256, 257, 272, 258, 273, 259, 274, 260, 275, 261,
- 276, 262, 277, 263, 278, 264, 279, 265, 280, 266, 281, 267, 282, 268, 283,
- 269, 284, 270, 285, 271, 286, 272, 272, 273, 288, 274, 289, 275, 290, 276,
- 291, 277, 292, 278, 293, 279, 294, 280, 295, 281, 296, 282, 297, 283, 298,
- 284, 299, 285, 300, 286, 301, 287, 302, 288, 288, 289, 304, 290, 305, 291,
- 306, 292, 307, 293, 308, 294, 309, 295, 310, 296, 311, 297, 312, 298, 313,
- 299, 314, 300, 315, 301, 316, 302, 317, 303, 318, 304, 304, 305, 320, 306,
- 321, 307, 322, 308, 323, 309, 324, 310, 325, 311, 326, 312, 327, 313, 328,
- 314, 329, 315, 330, 316, 331, 317, 332, 318, 333, 319, 334, 320, 320, 321,
- 336, 322, 337, 323, 338, 324, 339, 325, 340, 326, 341, 327, 342, 328, 343,
- 329, 344, 330, 345, 331, 346, 332, 347, 333, 348, 334, 349, 335, 350, 336,
- 336, 337, 352, 338, 353, 339, 354, 340, 355, 341, 356, 342, 357, 343, 358,
- 344, 359, 345, 360, 346, 361, 347, 362, 348, 363, 349, 364, 350, 365, 351,
- 366, 352, 352, 353, 368, 354, 369, 355, 370, 356, 371, 357, 372, 358, 373,
- 359, 374, 360, 375, 361, 376, 362, 377, 363, 378, 364, 379, 365, 380, 366,
- 381, 367, 382, 368, 368, 369, 384, 370, 385, 371, 386, 372, 387, 373, 388,
- 374, 389, 375, 390, 376, 391, 377, 392, 378, 393, 379, 394, 380, 395, 381,
- 396, 382, 397, 383, 398, 384, 384, 385, 400, 386, 401, 387, 402, 388, 403,
- 389, 404, 390, 405, 391, 406, 392, 407, 393, 408, 394, 409, 395, 410, 396,
- 411, 397, 412, 398, 413, 399, 414, 400, 400, 401, 416, 402, 417, 403, 418,
- 404, 419, 405, 420, 406, 421, 407, 422, 408, 423, 409, 424, 410, 425, 411,
- 426, 412, 427, 413, 428, 414, 429, 415, 430, 416, 416, 417, 432, 418, 433,
- 419, 434, 420, 435, 421, 436, 422, 437, 423, 438, 424, 439, 425, 440, 426,
- 441, 427, 442, 428, 443, 429, 444, 430, 445, 431, 446, 432, 432, 433, 448,
- 434, 449, 435, 450, 436, 451, 437, 452, 438, 453, 439, 454, 440, 455, 441,
- 456, 442, 457, 443, 458, 444, 459, 445, 460, 446, 461, 447, 462, 448, 448,
- 449, 464, 450, 465, 451, 466, 452, 467, 453, 468, 454, 469, 455, 470, 456,
- 471, 457, 472, 458, 473, 459, 474, 460, 475, 461, 476, 462, 477, 463, 478,
- 464, 464, 465, 480, 466, 481, 467, 482, 468, 483, 469, 484, 470, 485, 471,
- 486, 472, 487, 473, 488, 474, 489, 475, 490, 476, 491, 477, 492, 478, 493,
- 479, 494, 480, 480, 481, 496, 482, 497, 483, 498, 484, 499, 485, 500, 486,
- 501, 487, 502, 488, 503, 489, 504, 490, 505, 491, 506, 492, 507, 493, 508,
- 494, 509, 495, 510, 0, 0
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- mrow_scan_32x16_neighbors[513 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6,
- 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
- 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21,
- 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28,
- 29, 29, 30, 30, 0, 0, 1, 32, 2, 33, 3, 34, 4, 35, 5,
- 36, 6, 37, 7, 38, 8, 39, 9, 40, 10, 41, 11, 42, 12, 43,
- 13, 44, 14, 45, 15, 46, 16, 47, 17, 48, 18, 49, 19, 50, 20,
- 51, 21, 52, 22, 53, 23, 54, 24, 55, 25, 56, 26, 57, 27, 58,
- 28, 59, 29, 60, 30, 61, 31, 62, 32, 32, 33, 64, 34, 65, 35,
- 66, 36, 67, 37, 68, 38, 69, 39, 70, 40, 71, 41, 72, 42, 73,
- 43, 74, 44, 75, 45, 76, 46, 77, 47, 78, 48, 79, 49, 80, 50,
- 81, 51, 82, 52, 83, 53, 84, 54, 85, 55, 86, 56, 87, 57, 88,
- 58, 89, 59, 90, 60, 91, 61, 92, 62, 93, 63, 94, 64, 64, 65,
- 96, 66, 97, 67, 98, 68, 99, 69, 100, 70, 101, 71, 102, 72, 103,
- 73, 104, 74, 105, 75, 106, 76, 107, 77, 108, 78, 109, 79, 110, 80,
- 111, 81, 112, 82, 113, 83, 114, 84, 115, 85, 116, 86, 117, 87, 118,
- 88, 119, 89, 120, 90, 121, 91, 122, 92, 123, 93, 124, 94, 125, 95,
- 126, 96, 96, 97, 128, 98, 129, 99, 130, 100, 131, 101, 132, 102, 133,
- 103, 134, 104, 135, 105, 136, 106, 137, 107, 138, 108, 139, 109, 140, 110,
- 141, 111, 142, 112, 143, 113, 144, 114, 145, 115, 146, 116, 147, 117, 148,
- 118, 149, 119, 150, 120, 151, 121, 152, 122, 153, 123, 154, 124, 155, 125,
- 156, 126, 157, 127, 158, 128, 128, 129, 160, 130, 161, 131, 162, 132, 163,
- 133, 164, 134, 165, 135, 166, 136, 167, 137, 168, 138, 169, 139, 170, 140,
- 171, 141, 172, 142, 173, 143, 174, 144, 175, 145, 176, 146, 177, 147, 178,
- 148, 179, 149, 180, 150, 181, 151, 182, 152, 183, 153, 184, 154, 185, 155,
- 186, 156, 187, 157, 188, 158, 189, 159, 190, 160, 160, 161, 192, 162, 193,
- 163, 194, 164, 195, 165, 196, 166, 197, 167, 198, 168, 199, 169, 200, 170,
- 201, 171, 202, 172, 203, 173, 204, 174, 205, 175, 206, 176, 207, 177, 208,
- 178, 209, 179, 210, 180, 211, 181, 212, 182, 213, 183, 214, 184, 215, 185,
- 216, 186, 217, 187, 218, 188, 219, 189, 220, 190, 221, 191, 222, 192, 192,
- 193, 224, 194, 225, 195, 226, 196, 227, 197, 228, 198, 229, 199, 230, 200,
- 231, 201, 232, 202, 233, 203, 234, 204, 235, 205, 236, 206, 237, 207, 238,
- 208, 239, 209, 240, 210, 241, 211, 242, 212, 243, 213, 244, 214, 245, 215,
- 246, 216, 247, 217, 248, 218, 249, 219, 250, 220, 251, 221, 252, 222, 253,
- 223, 254, 224, 224, 225, 256, 226, 257, 227, 258, 228, 259, 229, 260, 230,
- 261, 231, 262, 232, 263, 233, 264, 234, 265, 235, 266, 236, 267, 237, 268,
- 238, 269, 239, 270, 240, 271, 241, 272, 242, 273, 243, 274, 244, 275, 245,
- 276, 246, 277, 247, 278, 248, 279, 249, 280, 250, 281, 251, 282, 252, 283,
- 253, 284, 254, 285, 255, 286, 256, 256, 257, 288, 258, 289, 259, 290, 260,
- 291, 261, 292, 262, 293, 263, 294, 264, 295, 265, 296, 266, 297, 267, 298,
- 268, 299, 269, 300, 270, 301, 271, 302, 272, 303, 273, 304, 274, 305, 275,
- 306, 276, 307, 277, 308, 278, 309, 279, 310, 280, 311, 281, 312, 282, 313,
- 283, 314, 284, 315, 285, 316, 286, 317, 287, 318, 288, 288, 289, 320, 290,
- 321, 291, 322, 292, 323, 293, 324, 294, 325, 295, 326, 296, 327, 297, 328,
- 298, 329, 299, 330, 300, 331, 301, 332, 302, 333, 303, 334, 304, 335, 305,
- 336, 306, 337, 307, 338, 308, 339, 309, 340, 310, 341, 311, 342, 312, 343,
- 313, 344, 314, 345, 315, 346, 316, 347, 317, 348, 318, 349, 319, 350, 320,
- 320, 321, 352, 322, 353, 323, 354, 324, 355, 325, 356, 326, 357, 327, 358,
- 328, 359, 329, 360, 330, 361, 331, 362, 332, 363, 333, 364, 334, 365, 335,
- 366, 336, 367, 337, 368, 338, 369, 339, 370, 340, 371, 341, 372, 342, 373,
- 343, 374, 344, 375, 345, 376, 346, 377, 347, 378, 348, 379, 349, 380, 350,
- 381, 351, 382, 352, 352, 353, 384, 354, 385, 355, 386, 356, 387, 357, 388,
- 358, 389, 359, 390, 360, 391, 361, 392, 362, 393, 363, 394, 364, 395, 365,
- 396, 366, 397, 367, 398, 368, 399, 369, 400, 370, 401, 371, 402, 372, 403,
- 373, 404, 374, 405, 375, 406, 376, 407, 377, 408, 378, 409, 379, 410, 380,
- 411, 381, 412, 382, 413, 383, 414, 384, 384, 385, 416, 386, 417, 387, 418,
- 388, 419, 389, 420, 390, 421, 391, 422, 392, 423, 393, 424, 394, 425, 395,
- 426, 396, 427, 397, 428, 398, 429, 399, 430, 400, 431, 401, 432, 402, 433,
- 403, 434, 404, 435, 405, 436, 406, 437, 407, 438, 408, 439, 409, 440, 410,
- 441, 411, 442, 412, 443, 413, 444, 414, 445, 415, 446, 416, 416, 417, 448,
- 418, 449, 419, 450, 420, 451, 421, 452, 422, 453, 423, 454, 424, 455, 425,
- 456, 426, 457, 427, 458, 428, 459, 429, 460, 430, 461, 431, 462, 432, 463,
- 433, 464, 434, 465, 435, 466, 436, 467, 437, 468, 438, 469, 439, 470, 440,
- 471, 441, 472, 442, 473, 443, 474, 444, 475, 445, 476, 446, 477, 447, 478,
- 448, 448, 449, 480, 450, 481, 451, 482, 452, 483, 453, 484, 454, 485, 455,
- 486, 456, 487, 457, 488, 458, 489, 459, 490, 460, 491, 461, 492, 462, 493,
- 463, 494, 464, 495, 465, 496, 466, 497, 467, 498, 468, 499, 469, 500, 470,
- 501, 471, 502, 472, 503, 473, 504, 474, 505, 475, 506, 476, 507, 477, 508,
- 478, 509, 479, 510, 0, 0
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- mcol_scan_16x16_neighbors[257 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 16, 16, 32, 32, 48, 48, 64, 64, 80, 80, 96,
- 96, 112, 112, 128, 128, 144, 144, 160, 160, 176, 176, 192, 192, 208, 208,
- 224, 224, 0, 0, 1, 16, 17, 32, 33, 48, 49, 64, 65, 80, 81,
- 96, 97, 112, 113, 128, 129, 144, 145, 160, 161, 176, 177, 192, 193, 208,
- 209, 224, 225, 240, 1, 1, 2, 17, 18, 33, 34, 49, 50, 65, 66,
- 81, 82, 97, 98, 113, 114, 129, 130, 145, 146, 161, 162, 177, 178, 193,
- 194, 209, 210, 225, 226, 241, 2, 2, 3, 18, 19, 34, 35, 50, 51,
- 66, 67, 82, 83, 98, 99, 114, 115, 130, 131, 146, 147, 162, 163, 178,
- 179, 194, 195, 210, 211, 226, 227, 242, 3, 3, 4, 19, 20, 35, 36,
- 51, 52, 67, 68, 83, 84, 99, 100, 115, 116, 131, 132, 147, 148, 163,
- 164, 179, 180, 195, 196, 211, 212, 227, 228, 243, 4, 4, 5, 20, 21,
- 36, 37, 52, 53, 68, 69, 84, 85, 100, 101, 116, 117, 132, 133, 148,
- 149, 164, 165, 180, 181, 196, 197, 212, 213, 228, 229, 244, 5, 5, 6,
- 21, 22, 37, 38, 53, 54, 69, 70, 85, 86, 101, 102, 117, 118, 133,
- 134, 149, 150, 165, 166, 181, 182, 197, 198, 213, 214, 229, 230, 245, 6,
- 6, 7, 22, 23, 38, 39, 54, 55, 70, 71, 86, 87, 102, 103, 118,
- 119, 134, 135, 150, 151, 166, 167, 182, 183, 198, 199, 214, 215, 230, 231,
- 246, 7, 7, 8, 23, 24, 39, 40, 55, 56, 71, 72, 87, 88, 103,
- 104, 119, 120, 135, 136, 151, 152, 167, 168, 183, 184, 199, 200, 215, 216,
- 231, 232, 247, 8, 8, 9, 24, 25, 40, 41, 56, 57, 72, 73, 88,
- 89, 104, 105, 120, 121, 136, 137, 152, 153, 168, 169, 184, 185, 200, 201,
- 216, 217, 232, 233, 248, 9, 9, 10, 25, 26, 41, 42, 57, 58, 73,
- 74, 89, 90, 105, 106, 121, 122, 137, 138, 153, 154, 169, 170, 185, 186,
- 201, 202, 217, 218, 233, 234, 249, 10, 10, 11, 26, 27, 42, 43, 58,
- 59, 74, 75, 90, 91, 106, 107, 122, 123, 138, 139, 154, 155, 170, 171,
- 186, 187, 202, 203, 218, 219, 234, 235, 250, 11, 11, 12, 27, 28, 43,
- 44, 59, 60, 75, 76, 91, 92, 107, 108, 123, 124, 139, 140, 155, 156,
- 171, 172, 187, 188, 203, 204, 219, 220, 235, 236, 251, 12, 12, 13, 28,
- 29, 44, 45, 60, 61, 76, 77, 92, 93, 108, 109, 124, 125, 140, 141,
- 156, 157, 172, 173, 188, 189, 204, 205, 220, 221, 236, 237, 252, 13, 13,
- 14, 29, 30, 45, 46, 61, 62, 77, 78, 93, 94, 109, 110, 125, 126,
- 141, 142, 157, 158, 173, 174, 189, 190, 205, 206, 221, 222, 237, 238, 253,
- 14, 14, 15, 30, 31, 46, 47, 62, 63, 78, 79, 94, 95, 110, 111,
- 126, 127, 142, 143, 158, 159, 174, 175, 190, 191, 206, 207, 222, 223, 238,
- 239, 254, 0, 0,
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- mrow_scan_16x16_neighbors[257 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6,
- 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
- 14, 14, 0, 0, 1, 16, 2, 17, 3, 18, 4, 19, 5, 20, 6,
- 21, 7, 22, 8, 23, 9, 24, 10, 25, 11, 26, 12, 27, 13, 28,
- 14, 29, 15, 30, 16, 16, 17, 32, 18, 33, 19, 34, 20, 35, 21,
- 36, 22, 37, 23, 38, 24, 39, 25, 40, 26, 41, 27, 42, 28, 43,
- 29, 44, 30, 45, 31, 46, 32, 32, 33, 48, 34, 49, 35, 50, 36,
- 51, 37, 52, 38, 53, 39, 54, 40, 55, 41, 56, 42, 57, 43, 58,
- 44, 59, 45, 60, 46, 61, 47, 62, 48, 48, 49, 64, 50, 65, 51,
- 66, 52, 67, 53, 68, 54, 69, 55, 70, 56, 71, 57, 72, 58, 73,
- 59, 74, 60, 75, 61, 76, 62, 77, 63, 78, 64, 64, 65, 80, 66,
- 81, 67, 82, 68, 83, 69, 84, 70, 85, 71, 86, 72, 87, 73, 88,
- 74, 89, 75, 90, 76, 91, 77, 92, 78, 93, 79, 94, 80, 80, 81,
- 96, 82, 97, 83, 98, 84, 99, 85, 100, 86, 101, 87, 102, 88, 103,
- 89, 104, 90, 105, 91, 106, 92, 107, 93, 108, 94, 109, 95, 110, 96,
- 96, 97, 112, 98, 113, 99, 114, 100, 115, 101, 116, 102, 117, 103, 118,
- 104, 119, 105, 120, 106, 121, 107, 122, 108, 123, 109, 124, 110, 125, 111,
- 126, 112, 112, 113, 128, 114, 129, 115, 130, 116, 131, 117, 132, 118, 133,
- 119, 134, 120, 135, 121, 136, 122, 137, 123, 138, 124, 139, 125, 140, 126,
- 141, 127, 142, 128, 128, 129, 144, 130, 145, 131, 146, 132, 147, 133, 148,
- 134, 149, 135, 150, 136, 151, 137, 152, 138, 153, 139, 154, 140, 155, 141,
- 156, 142, 157, 143, 158, 144, 144, 145, 160, 146, 161, 147, 162, 148, 163,
- 149, 164, 150, 165, 151, 166, 152, 167, 153, 168, 154, 169, 155, 170, 156,
- 171, 157, 172, 158, 173, 159, 174, 160, 160, 161, 176, 162, 177, 163, 178,
- 164, 179, 165, 180, 166, 181, 167, 182, 168, 183, 169, 184, 170, 185, 171,
- 186, 172, 187, 173, 188, 174, 189, 175, 190, 176, 176, 177, 192, 178, 193,
- 179, 194, 180, 195, 181, 196, 182, 197, 183, 198, 184, 199, 185, 200, 186,
- 201, 187, 202, 188, 203, 189, 204, 190, 205, 191, 206, 192, 192, 193, 208,
- 194, 209, 195, 210, 196, 211, 197, 212, 198, 213, 199, 214, 200, 215, 201,
- 216, 202, 217, 203, 218, 204, 219, 205, 220, 206, 221, 207, 222, 208, 208,
- 209, 224, 210, 225, 211, 226, 212, 227, 213, 228, 214, 229, 215, 230, 216,
- 231, 217, 232, 218, 233, 219, 234, 220, 235, 221, 236, 222, 237, 223, 238,
- 224, 224, 225, 240, 226, 241, 227, 242, 228, 243, 229, 244, 230, 245, 231,
- 246, 232, 247, 233, 248, 234, 249, 235, 250, 236, 251, 237, 252, 238, 253,
- 239, 254, 0, 0,
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- default_scan_16x16_neighbors[257 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 0, 0, 16, 16, 1, 16, 1, 1, 2, 2, 2,
- 17, 17, 32, 32, 32, 48, 48, 33, 48, 18, 33, 3, 18, 3, 3,
- 4, 4, 4, 19, 19, 34, 34, 49, 49, 64, 64, 64, 80, 80, 65,
- 80, 50, 65, 35, 50, 20, 35, 5, 20, 5, 5, 6, 6, 6, 21,
- 21, 36, 36, 51, 51, 66, 66, 81, 81, 96, 96, 96, 112, 112, 97,
- 112, 82, 97, 67, 82, 52, 67, 37, 52, 22, 37, 7, 22, 7, 7,
- 8, 8, 8, 23, 23, 38, 38, 53, 53, 68, 68, 83, 83, 98, 98,
- 113, 113, 128, 128, 128, 144, 144, 129, 144, 114, 129, 99, 114, 84, 99,
- 69, 84, 54, 69, 39, 54, 24, 39, 9, 24, 9, 9, 10, 10, 10,
- 25, 25, 40, 40, 55, 55, 70, 70, 85, 85, 100, 100, 115, 115, 130,
- 130, 145, 145, 160, 160, 160, 176, 176, 161, 176, 146, 161, 131, 146, 116,
- 131, 101, 116, 86, 101, 71, 86, 56, 71, 41, 56, 26, 41, 11, 26,
- 11, 11, 12, 12, 12, 27, 27, 42, 42, 57, 57, 72, 72, 87, 87,
- 102, 102, 117, 117, 132, 132, 147, 147, 162, 162, 177, 177, 192, 192, 192,
- 208, 208, 193, 208, 178, 193, 163, 178, 148, 163, 133, 148, 118, 133, 103,
- 118, 88, 103, 73, 88, 58, 73, 43, 58, 28, 43, 13, 28, 13, 13,
- 14, 14, 14, 29, 29, 44, 44, 59, 59, 74, 74, 89, 89, 104, 104,
- 119, 119, 134, 134, 149, 149, 164, 164, 179, 179, 194, 194, 209, 209, 224,
- 224, 224, 225, 240, 210, 225, 195, 210, 180, 195, 165, 180, 150, 165, 135,
- 150, 120, 135, 105, 120, 90, 105, 75, 90, 60, 75, 45, 60, 30, 45,
- 15, 30, 31, 46, 46, 61, 61, 76, 76, 91, 91, 106, 106, 121, 121,
- 136, 136, 151, 151, 166, 166, 181, 181, 196, 196, 211, 211, 226, 226, 241,
- 227, 242, 212, 227, 197, 212, 182, 197, 167, 182, 152, 167, 137, 152, 122,
- 137, 107, 122, 92, 107, 77, 92, 62, 77, 47, 62, 63, 78, 78, 93,
- 93, 108, 108, 123, 123, 138, 138, 153, 153, 168, 168, 183, 183, 198, 198,
- 213, 213, 228, 228, 243, 229, 244, 214, 229, 199, 214, 184, 199, 169, 184,
- 154, 169, 139, 154, 124, 139, 109, 124, 94, 109, 79, 94, 95, 110, 110,
- 125, 125, 140, 140, 155, 155, 170, 170, 185, 185, 200, 200, 215, 215, 230,
- 230, 245, 231, 246, 216, 231, 201, 216, 186, 201, 171, 186, 156, 171, 141,
- 156, 126, 141, 111, 126, 127, 142, 142, 157, 157, 172, 172, 187, 187, 202,
- 202, 217, 217, 232, 232, 247, 233, 248, 218, 233, 203, 218, 188, 203, 173,
- 188, 158, 173, 143, 158, 159, 174, 174, 189, 189, 204, 204, 219, 219, 234,
- 234, 249, 235, 250, 220, 235, 205, 220, 190, 205, 175, 190, 191, 206, 206,
- 221, 221, 236, 236, 251, 237, 252, 222, 237, 207, 222, 223, 238, 238, 253,
- 239, 254, 0, 0
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- mcol_scan_32x32_neighbors[1025 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 32, 32, 64, 64, 96, 96, 128, 128, 160, 160,
- 192, 192, 224, 224, 256, 256, 288, 288, 320, 320, 352, 352, 384, 384,
- 416, 416, 448, 448, 480, 480, 512, 512, 544, 544, 576, 576, 608, 608,
- 640, 640, 672, 672, 704, 704, 736, 736, 768, 768, 800, 800, 832, 832,
- 864, 864, 896, 896, 928, 928, 960, 960, 0, 0, 1, 32, 33, 64,
- 65, 96, 97, 128, 129, 160, 161, 192, 193, 224, 225, 256, 257, 288,
- 289, 320, 321, 352, 353, 384, 385, 416, 417, 448, 449, 480, 481, 512,
- 513, 544, 545, 576, 577, 608, 609, 640, 641, 672, 673, 704, 705, 736,
- 737, 768, 769, 800, 801, 832, 833, 864, 865, 896, 897, 928, 929, 960,
- 961, 992, 1, 1, 2, 33, 34, 65, 66, 97, 98, 129, 130, 161,
- 162, 193, 194, 225, 226, 257, 258, 289, 290, 321, 322, 353, 354, 385,
- 386, 417, 418, 449, 450, 481, 482, 513, 514, 545, 546, 577, 578, 609,
- 610, 641, 642, 673, 674, 705, 706, 737, 738, 769, 770, 801, 802, 833,
- 834, 865, 866, 897, 898, 929, 930, 961, 962, 993, 2, 2, 3, 34,
- 35, 66, 67, 98, 99, 130, 131, 162, 163, 194, 195, 226, 227, 258,
- 259, 290, 291, 322, 323, 354, 355, 386, 387, 418, 419, 450, 451, 482,
- 483, 514, 515, 546, 547, 578, 579, 610, 611, 642, 643, 674, 675, 706,
- 707, 738, 739, 770, 771, 802, 803, 834, 835, 866, 867, 898, 899, 930,
- 931, 962, 963, 994, 3, 3, 4, 35, 36, 67, 68, 99, 100, 131,
- 132, 163, 164, 195, 196, 227, 228, 259, 260, 291, 292, 323, 324, 355,
- 356, 387, 388, 419, 420, 451, 452, 483, 484, 515, 516, 547, 548, 579,
- 580, 611, 612, 643, 644, 675, 676, 707, 708, 739, 740, 771, 772, 803,
- 804, 835, 836, 867, 868, 899, 900, 931, 932, 963, 964, 995, 4, 4,
- 5, 36, 37, 68, 69, 100, 101, 132, 133, 164, 165, 196, 197, 228,
- 229, 260, 261, 292, 293, 324, 325, 356, 357, 388, 389, 420, 421, 452,
- 453, 484, 485, 516, 517, 548, 549, 580, 581, 612, 613, 644, 645, 676,
- 677, 708, 709, 740, 741, 772, 773, 804, 805, 836, 837, 868, 869, 900,
- 901, 932, 933, 964, 965, 996, 5, 5, 6, 37, 38, 69, 70, 101,
- 102, 133, 134, 165, 166, 197, 198, 229, 230, 261, 262, 293, 294, 325,
- 326, 357, 358, 389, 390, 421, 422, 453, 454, 485, 486, 517, 518, 549,
- 550, 581, 582, 613, 614, 645, 646, 677, 678, 709, 710, 741, 742, 773,
- 774, 805, 806, 837, 838, 869, 870, 901, 902, 933, 934, 965, 966, 997,
- 6, 6, 7, 38, 39, 70, 71, 102, 103, 134, 135, 166, 167, 198,
- 199, 230, 231, 262, 263, 294, 295, 326, 327, 358, 359, 390, 391, 422,
- 423, 454, 455, 486, 487, 518, 519, 550, 551, 582, 583, 614, 615, 646,
- 647, 678, 679, 710, 711, 742, 743, 774, 775, 806, 807, 838, 839, 870,
- 871, 902, 903, 934, 935, 966, 967, 998, 7, 7, 8, 39, 40, 71,
- 72, 103, 104, 135, 136, 167, 168, 199, 200, 231, 232, 263, 264, 295,
- 296, 327, 328, 359, 360, 391, 392, 423, 424, 455, 456, 487, 488, 519,
- 520, 551, 552, 583, 584, 615, 616, 647, 648, 679, 680, 711, 712, 743,
- 744, 775, 776, 807, 808, 839, 840, 871, 872, 903, 904, 935, 936, 967,
- 968, 999, 8, 8, 9, 40, 41, 72, 73, 104, 105, 136, 137, 168,
- 169, 200, 201, 232, 233, 264, 265, 296, 297, 328, 329, 360, 361, 392,
- 393, 424, 425, 456, 457, 488, 489, 520, 521, 552, 553, 584, 585, 616,
- 617, 648, 649, 680, 681, 712, 713, 744, 745, 776, 777, 808, 809, 840,
- 841, 872, 873, 904, 905, 936, 937, 968, 969, 1000, 9, 9, 10, 41,
- 42, 73, 74, 105, 106, 137, 138, 169, 170, 201, 202, 233, 234, 265,
- 266, 297, 298, 329, 330, 361, 362, 393, 394, 425, 426, 457, 458, 489,
- 490, 521, 522, 553, 554, 585, 586, 617, 618, 649, 650, 681, 682, 713,
- 714, 745, 746, 777, 778, 809, 810, 841, 842, 873, 874, 905, 906, 937,
- 938, 969, 970, 1001, 10, 10, 11, 42, 43, 74, 75, 106, 107, 138,
- 139, 170, 171, 202, 203, 234, 235, 266, 267, 298, 299, 330, 331, 362,
- 363, 394, 395, 426, 427, 458, 459, 490, 491, 522, 523, 554, 555, 586,
- 587, 618, 619, 650, 651, 682, 683, 714, 715, 746, 747, 778, 779, 810,
- 811, 842, 843, 874, 875, 906, 907, 938, 939, 970, 971, 1002, 11, 11,
- 12, 43, 44, 75, 76, 107, 108, 139, 140, 171, 172, 203, 204, 235,
- 236, 267, 268, 299, 300, 331, 332, 363, 364, 395, 396, 427, 428, 459,
- 460, 491, 492, 523, 524, 555, 556, 587, 588, 619, 620, 651, 652, 683,
- 684, 715, 716, 747, 748, 779, 780, 811, 812, 843, 844, 875, 876, 907,
- 908, 939, 940, 971, 972, 1003, 12, 12, 13, 44, 45, 76, 77, 108,
- 109, 140, 141, 172, 173, 204, 205, 236, 237, 268, 269, 300, 301, 332,
- 333, 364, 365, 396, 397, 428, 429, 460, 461, 492, 493, 524, 525, 556,
- 557, 588, 589, 620, 621, 652, 653, 684, 685, 716, 717, 748, 749, 780,
- 781, 812, 813, 844, 845, 876, 877, 908, 909, 940, 941, 972, 973, 1004,
- 13, 13, 14, 45, 46, 77, 78, 109, 110, 141, 142, 173, 174, 205,
- 206, 237, 238, 269, 270, 301, 302, 333, 334, 365, 366, 397, 398, 429,
- 430, 461, 462, 493, 494, 525, 526, 557, 558, 589, 590, 621, 622, 653,
- 654, 685, 686, 717, 718, 749, 750, 781, 782, 813, 814, 845, 846, 877,
- 878, 909, 910, 941, 942, 973, 974, 1005, 14, 14, 15, 46, 47, 78,
- 79, 110, 111, 142, 143, 174, 175, 206, 207, 238, 239, 270, 271, 302,
- 303, 334, 335, 366, 367, 398, 399, 430, 431, 462, 463, 494, 495, 526,
- 527, 558, 559, 590, 591, 622, 623, 654, 655, 686, 687, 718, 719, 750,
- 751, 782, 783, 814, 815, 846, 847, 878, 879, 910, 911, 942, 943, 974,
- 975, 1006, 15, 15, 16, 47, 48, 79, 80, 111, 112, 143, 144, 175,
- 176, 207, 208, 239, 240, 271, 272, 303, 304, 335, 336, 367, 368, 399,
- 400, 431, 432, 463, 464, 495, 496, 527, 528, 559, 560, 591, 592, 623,
- 624, 655, 656, 687, 688, 719, 720, 751, 752, 783, 784, 815, 816, 847,
- 848, 879, 880, 911, 912, 943, 944, 975, 976, 1007, 16, 16, 17, 48,
- 49, 80, 81, 112, 113, 144, 145, 176, 177, 208, 209, 240, 241, 272,
- 273, 304, 305, 336, 337, 368, 369, 400, 401, 432, 433, 464, 465, 496,
- 497, 528, 529, 560, 561, 592, 593, 624, 625, 656, 657, 688, 689, 720,
- 721, 752, 753, 784, 785, 816, 817, 848, 849, 880, 881, 912, 913, 944,
- 945, 976, 977, 1008, 17, 17, 18, 49, 50, 81, 82, 113, 114, 145,
- 146, 177, 178, 209, 210, 241, 242, 273, 274, 305, 306, 337, 338, 369,
- 370, 401, 402, 433, 434, 465, 466, 497, 498, 529, 530, 561, 562, 593,
- 594, 625, 626, 657, 658, 689, 690, 721, 722, 753, 754, 785, 786, 817,
- 818, 849, 850, 881, 882, 913, 914, 945, 946, 977, 978, 1009, 18, 18,
- 19, 50, 51, 82, 83, 114, 115, 146, 147, 178, 179, 210, 211, 242,
- 243, 274, 275, 306, 307, 338, 339, 370, 371, 402, 403, 434, 435, 466,
- 467, 498, 499, 530, 531, 562, 563, 594, 595, 626, 627, 658, 659, 690,
- 691, 722, 723, 754, 755, 786, 787, 818, 819, 850, 851, 882, 883, 914,
- 915, 946, 947, 978, 979, 1010, 19, 19, 20, 51, 52, 83, 84, 115,
- 116, 147, 148, 179, 180, 211, 212, 243, 244, 275, 276, 307, 308, 339,
- 340, 371, 372, 403, 404, 435, 436, 467, 468, 499, 500, 531, 532, 563,
- 564, 595, 596, 627, 628, 659, 660, 691, 692, 723, 724, 755, 756, 787,
- 788, 819, 820, 851, 852, 883, 884, 915, 916, 947, 948, 979, 980, 1011,
- 20, 20, 21, 52, 53, 84, 85, 116, 117, 148, 149, 180, 181, 212,
- 213, 244, 245, 276, 277, 308, 309, 340, 341, 372, 373, 404, 405, 436,
- 437, 468, 469, 500, 501, 532, 533, 564, 565, 596, 597, 628, 629, 660,
- 661, 692, 693, 724, 725, 756, 757, 788, 789, 820, 821, 852, 853, 884,
- 885, 916, 917, 948, 949, 980, 981, 1012, 21, 21, 22, 53, 54, 85,
- 86, 117, 118, 149, 150, 181, 182, 213, 214, 245, 246, 277, 278, 309,
- 310, 341, 342, 373, 374, 405, 406, 437, 438, 469, 470, 501, 502, 533,
- 534, 565, 566, 597, 598, 629, 630, 661, 662, 693, 694, 725, 726, 757,
- 758, 789, 790, 821, 822, 853, 854, 885, 886, 917, 918, 949, 950, 981,
- 982, 1013, 22, 22, 23, 54, 55, 86, 87, 118, 119, 150, 151, 182,
- 183, 214, 215, 246, 247, 278, 279, 310, 311, 342, 343, 374, 375, 406,
- 407, 438, 439, 470, 471, 502, 503, 534, 535, 566, 567, 598, 599, 630,
- 631, 662, 663, 694, 695, 726, 727, 758, 759, 790, 791, 822, 823, 854,
- 855, 886, 887, 918, 919, 950, 951, 982, 983, 1014, 23, 23, 24, 55,
- 56, 87, 88, 119, 120, 151, 152, 183, 184, 215, 216, 247, 248, 279,
- 280, 311, 312, 343, 344, 375, 376, 407, 408, 439, 440, 471, 472, 503,
- 504, 535, 536, 567, 568, 599, 600, 631, 632, 663, 664, 695, 696, 727,
- 728, 759, 760, 791, 792, 823, 824, 855, 856, 887, 888, 919, 920, 951,
- 952, 983, 984, 1015, 24, 24, 25, 56, 57, 88, 89, 120, 121, 152,
- 153, 184, 185, 216, 217, 248, 249, 280, 281, 312, 313, 344, 345, 376,
- 377, 408, 409, 440, 441, 472, 473, 504, 505, 536, 537, 568, 569, 600,
- 601, 632, 633, 664, 665, 696, 697, 728, 729, 760, 761, 792, 793, 824,
- 825, 856, 857, 888, 889, 920, 921, 952, 953, 984, 985, 1016, 25, 25,
- 26, 57, 58, 89, 90, 121, 122, 153, 154, 185, 186, 217, 218, 249,
- 250, 281, 282, 313, 314, 345, 346, 377, 378, 409, 410, 441, 442, 473,
- 474, 505, 506, 537, 538, 569, 570, 601, 602, 633, 634, 665, 666, 697,
- 698, 729, 730, 761, 762, 793, 794, 825, 826, 857, 858, 889, 890, 921,
- 922, 953, 954, 985, 986, 1017, 26, 26, 27, 58, 59, 90, 91, 122,
- 123, 154, 155, 186, 187, 218, 219, 250, 251, 282, 283, 314, 315, 346,
- 347, 378, 379, 410, 411, 442, 443, 474, 475, 506, 507, 538, 539, 570,
- 571, 602, 603, 634, 635, 666, 667, 698, 699, 730, 731, 762, 763, 794,
- 795, 826, 827, 858, 859, 890, 891, 922, 923, 954, 955, 986, 987, 1018,
- 27, 27, 28, 59, 60, 91, 92, 123, 124, 155, 156, 187, 188, 219,
- 220, 251, 252, 283, 284, 315, 316, 347, 348, 379, 380, 411, 412, 443,
- 444, 475, 476, 507, 508, 539, 540, 571, 572, 603, 604, 635, 636, 667,
- 668, 699, 700, 731, 732, 763, 764, 795, 796, 827, 828, 859, 860, 891,
- 892, 923, 924, 955, 956, 987, 988, 1019, 28, 28, 29, 60, 61, 92,
- 93, 124, 125, 156, 157, 188, 189, 220, 221, 252, 253, 284, 285, 316,
- 317, 348, 349, 380, 381, 412, 413, 444, 445, 476, 477, 508, 509, 540,
- 541, 572, 573, 604, 605, 636, 637, 668, 669, 700, 701, 732, 733, 764,
- 765, 796, 797, 828, 829, 860, 861, 892, 893, 924, 925, 956, 957, 988,
- 989, 1020, 29, 29, 30, 61, 62, 93, 94, 125, 126, 157, 158, 189,
- 190, 221, 222, 253, 254, 285, 286, 317, 318, 349, 350, 381, 382, 413,
- 414, 445, 446, 477, 478, 509, 510, 541, 542, 573, 574, 605, 606, 637,
- 638, 669, 670, 701, 702, 733, 734, 765, 766, 797, 798, 829, 830, 861,
- 862, 893, 894, 925, 926, 957, 958, 989, 990, 1021, 30, 30, 31, 62,
- 63, 94, 95, 126, 127, 158, 159, 190, 191, 222, 223, 254, 255, 286,
- 287, 318, 319, 350, 351, 382, 383, 414, 415, 446, 447, 478, 479, 510,
- 511, 542, 543, 574, 575, 606, 607, 638, 639, 670, 671, 702, 703, 734,
- 735, 766, 767, 798, 799, 830, 831, 862, 863, 894, 895, 926, 927, 958,
- 959, 990, 991, 1022, 0, 0,
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- mrow_scan_32x32_neighbors[1025 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5,
- 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12,
- 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19,
- 20, 20, 21, 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26,
- 27, 27, 28, 28, 29, 29, 30, 30, 0, 0, 1, 32, 2, 33,
- 3, 34, 4, 35, 5, 36, 6, 37, 7, 38, 8, 39, 9, 40,
- 10, 41, 11, 42, 12, 43, 13, 44, 14, 45, 15, 46, 16, 47,
- 17, 48, 18, 49, 19, 50, 20, 51, 21, 52, 22, 53, 23, 54,
- 24, 55, 25, 56, 26, 57, 27, 58, 28, 59, 29, 60, 30, 61,
- 31, 62, 32, 32, 33, 64, 34, 65, 35, 66, 36, 67, 37, 68,
- 38, 69, 39, 70, 40, 71, 41, 72, 42, 73, 43, 74, 44, 75,
- 45, 76, 46, 77, 47, 78, 48, 79, 49, 80, 50, 81, 51, 82,
- 52, 83, 53, 84, 54, 85, 55, 86, 56, 87, 57, 88, 58, 89,
- 59, 90, 60, 91, 61, 92, 62, 93, 63, 94, 64, 64, 65, 96,
- 66, 97, 67, 98, 68, 99, 69, 100, 70, 101, 71, 102, 72, 103,
- 73, 104, 74, 105, 75, 106, 76, 107, 77, 108, 78, 109, 79, 110,
- 80, 111, 81, 112, 82, 113, 83, 114, 84, 115, 85, 116, 86, 117,
- 87, 118, 88, 119, 89, 120, 90, 121, 91, 122, 92, 123, 93, 124,
- 94, 125, 95, 126, 96, 96, 97, 128, 98, 129, 99, 130, 100, 131,
- 101, 132, 102, 133, 103, 134, 104, 135, 105, 136, 106, 137, 107, 138,
- 108, 139, 109, 140, 110, 141, 111, 142, 112, 143, 113, 144, 114, 145,
- 115, 146, 116, 147, 117, 148, 118, 149, 119, 150, 120, 151, 121, 152,
- 122, 153, 123, 154, 124, 155, 125, 156, 126, 157, 127, 158, 128, 128,
- 129, 160, 130, 161, 131, 162, 132, 163, 133, 164, 134, 165, 135, 166,
- 136, 167, 137, 168, 138, 169, 139, 170, 140, 171, 141, 172, 142, 173,
- 143, 174, 144, 175, 145, 176, 146, 177, 147, 178, 148, 179, 149, 180,
- 150, 181, 151, 182, 152, 183, 153, 184, 154, 185, 155, 186, 156, 187,
- 157, 188, 158, 189, 159, 190, 160, 160, 161, 192, 162, 193, 163, 194,
- 164, 195, 165, 196, 166, 197, 167, 198, 168, 199, 169, 200, 170, 201,
- 171, 202, 172, 203, 173, 204, 174, 205, 175, 206, 176, 207, 177, 208,
- 178, 209, 179, 210, 180, 211, 181, 212, 182, 213, 183, 214, 184, 215,
- 185, 216, 186, 217, 187, 218, 188, 219, 189, 220, 190, 221, 191, 222,
- 192, 192, 193, 224, 194, 225, 195, 226, 196, 227, 197, 228, 198, 229,
- 199, 230, 200, 231, 201, 232, 202, 233, 203, 234, 204, 235, 205, 236,
- 206, 237, 207, 238, 208, 239, 209, 240, 210, 241, 211, 242, 212, 243,
- 213, 244, 214, 245, 215, 246, 216, 247, 217, 248, 218, 249, 219, 250,
- 220, 251, 221, 252, 222, 253, 223, 254, 224, 224, 225, 256, 226, 257,
- 227, 258, 228, 259, 229, 260, 230, 261, 231, 262, 232, 263, 233, 264,
- 234, 265, 235, 266, 236, 267, 237, 268, 238, 269, 239, 270, 240, 271,
- 241, 272, 242, 273, 243, 274, 244, 275, 245, 276, 246, 277, 247, 278,
- 248, 279, 249, 280, 250, 281, 251, 282, 252, 283, 253, 284, 254, 285,
- 255, 286, 256, 256, 257, 288, 258, 289, 259, 290, 260, 291, 261, 292,
- 262, 293, 263, 294, 264, 295, 265, 296, 266, 297, 267, 298, 268, 299,
- 269, 300, 270, 301, 271, 302, 272, 303, 273, 304, 274, 305, 275, 306,
- 276, 307, 277, 308, 278, 309, 279, 310, 280, 311, 281, 312, 282, 313,
- 283, 314, 284, 315, 285, 316, 286, 317, 287, 318, 288, 288, 289, 320,
- 290, 321, 291, 322, 292, 323, 293, 324, 294, 325, 295, 326, 296, 327,
- 297, 328, 298, 329, 299, 330, 300, 331, 301, 332, 302, 333, 303, 334,
- 304, 335, 305, 336, 306, 337, 307, 338, 308, 339, 309, 340, 310, 341,
- 311, 342, 312, 343, 313, 344, 314, 345, 315, 346, 316, 347, 317, 348,
- 318, 349, 319, 350, 320, 320, 321, 352, 322, 353, 323, 354, 324, 355,
- 325, 356, 326, 357, 327, 358, 328, 359, 329, 360, 330, 361, 331, 362,
- 332, 363, 333, 364, 334, 365, 335, 366, 336, 367, 337, 368, 338, 369,
- 339, 370, 340, 371, 341, 372, 342, 373, 343, 374, 344, 375, 345, 376,
- 346, 377, 347, 378, 348, 379, 349, 380, 350, 381, 351, 382, 352, 352,
- 353, 384, 354, 385, 355, 386, 356, 387, 357, 388, 358, 389, 359, 390,
- 360, 391, 361, 392, 362, 393, 363, 394, 364, 395, 365, 396, 366, 397,
- 367, 398, 368, 399, 369, 400, 370, 401, 371, 402, 372, 403, 373, 404,
- 374, 405, 375, 406, 376, 407, 377, 408, 378, 409, 379, 410, 380, 411,
- 381, 412, 382, 413, 383, 414, 384, 384, 385, 416, 386, 417, 387, 418,
- 388, 419, 389, 420, 390, 421, 391, 422, 392, 423, 393, 424, 394, 425,
- 395, 426, 396, 427, 397, 428, 398, 429, 399, 430, 400, 431, 401, 432,
- 402, 433, 403, 434, 404, 435, 405, 436, 406, 437, 407, 438, 408, 439,
- 409, 440, 410, 441, 411, 442, 412, 443, 413, 444, 414, 445, 415, 446,
- 416, 416, 417, 448, 418, 449, 419, 450, 420, 451, 421, 452, 422, 453,
- 423, 454, 424, 455, 425, 456, 426, 457, 427, 458, 428, 459, 429, 460,
- 430, 461, 431, 462, 432, 463, 433, 464, 434, 465, 435, 466, 436, 467,
- 437, 468, 438, 469, 439, 470, 440, 471, 441, 472, 442, 473, 443, 474,
- 444, 475, 445, 476, 446, 477, 447, 478, 448, 448, 449, 480, 450, 481,
- 451, 482, 452, 483, 453, 484, 454, 485, 455, 486, 456, 487, 457, 488,
- 458, 489, 459, 490, 460, 491, 461, 492, 462, 493, 463, 494, 464, 495,
- 465, 496, 466, 497, 467, 498, 468, 499, 469, 500, 470, 501, 471, 502,
- 472, 503, 473, 504, 474, 505, 475, 506, 476, 507, 477, 508, 478, 509,
- 479, 510, 480, 480, 481, 512, 482, 513, 483, 514, 484, 515, 485, 516,
- 486, 517, 487, 518, 488, 519, 489, 520, 490, 521, 491, 522, 492, 523,
- 493, 524, 494, 525, 495, 526, 496, 527, 497, 528, 498, 529, 499, 530,
- 500, 531, 501, 532, 502, 533, 503, 534, 504, 535, 505, 536, 506, 537,
- 507, 538, 508, 539, 509, 540, 510, 541, 511, 542, 512, 512, 513, 544,
- 514, 545, 515, 546, 516, 547, 517, 548, 518, 549, 519, 550, 520, 551,
- 521, 552, 522, 553, 523, 554, 524, 555, 525, 556, 526, 557, 527, 558,
- 528, 559, 529, 560, 530, 561, 531, 562, 532, 563, 533, 564, 534, 565,
- 535, 566, 536, 567, 537, 568, 538, 569, 539, 570, 540, 571, 541, 572,
- 542, 573, 543, 574, 544, 544, 545, 576, 546, 577, 547, 578, 548, 579,
- 549, 580, 550, 581, 551, 582, 552, 583, 553, 584, 554, 585, 555, 586,
- 556, 587, 557, 588, 558, 589, 559, 590, 560, 591, 561, 592, 562, 593,
- 563, 594, 564, 595, 565, 596, 566, 597, 567, 598, 568, 599, 569, 600,
- 570, 601, 571, 602, 572, 603, 573, 604, 574, 605, 575, 606, 576, 576,
- 577, 608, 578, 609, 579, 610, 580, 611, 581, 612, 582, 613, 583, 614,
- 584, 615, 585, 616, 586, 617, 587, 618, 588, 619, 589, 620, 590, 621,
- 591, 622, 592, 623, 593, 624, 594, 625, 595, 626, 596, 627, 597, 628,
- 598, 629, 599, 630, 600, 631, 601, 632, 602, 633, 603, 634, 604, 635,
- 605, 636, 606, 637, 607, 638, 608, 608, 609, 640, 610, 641, 611, 642,
- 612, 643, 613, 644, 614, 645, 615, 646, 616, 647, 617, 648, 618, 649,
- 619, 650, 620, 651, 621, 652, 622, 653, 623, 654, 624, 655, 625, 656,
- 626, 657, 627, 658, 628, 659, 629, 660, 630, 661, 631, 662, 632, 663,
- 633, 664, 634, 665, 635, 666, 636, 667, 637, 668, 638, 669, 639, 670,
- 640, 640, 641, 672, 642, 673, 643, 674, 644, 675, 645, 676, 646, 677,
- 647, 678, 648, 679, 649, 680, 650, 681, 651, 682, 652, 683, 653, 684,
- 654, 685, 655, 686, 656, 687, 657, 688, 658, 689, 659, 690, 660, 691,
- 661, 692, 662, 693, 663, 694, 664, 695, 665, 696, 666, 697, 667, 698,
- 668, 699, 669, 700, 670, 701, 671, 702, 672, 672, 673, 704, 674, 705,
- 675, 706, 676, 707, 677, 708, 678, 709, 679, 710, 680, 711, 681, 712,
- 682, 713, 683, 714, 684, 715, 685, 716, 686, 717, 687, 718, 688, 719,
- 689, 720, 690, 721, 691, 722, 692, 723, 693, 724, 694, 725, 695, 726,
- 696, 727, 697, 728, 698, 729, 699, 730, 700, 731, 701, 732, 702, 733,
- 703, 734, 704, 704, 705, 736, 706, 737, 707, 738, 708, 739, 709, 740,
- 710, 741, 711, 742, 712, 743, 713, 744, 714, 745, 715, 746, 716, 747,
- 717, 748, 718, 749, 719, 750, 720, 751, 721, 752, 722, 753, 723, 754,
- 724, 755, 725, 756, 726, 757, 727, 758, 728, 759, 729, 760, 730, 761,
- 731, 762, 732, 763, 733, 764, 734, 765, 735, 766, 736, 736, 737, 768,
- 738, 769, 739, 770, 740, 771, 741, 772, 742, 773, 743, 774, 744, 775,
- 745, 776, 746, 777, 747, 778, 748, 779, 749, 780, 750, 781, 751, 782,
- 752, 783, 753, 784, 754, 785, 755, 786, 756, 787, 757, 788, 758, 789,
- 759, 790, 760, 791, 761, 792, 762, 793, 763, 794, 764, 795, 765, 796,
- 766, 797, 767, 798, 768, 768, 769, 800, 770, 801, 771, 802, 772, 803,
- 773, 804, 774, 805, 775, 806, 776, 807, 777, 808, 778, 809, 779, 810,
- 780, 811, 781, 812, 782, 813, 783, 814, 784, 815, 785, 816, 786, 817,
- 787, 818, 788, 819, 789, 820, 790, 821, 791, 822, 792, 823, 793, 824,
- 794, 825, 795, 826, 796, 827, 797, 828, 798, 829, 799, 830, 800, 800,
- 801, 832, 802, 833, 803, 834, 804, 835, 805, 836, 806, 837, 807, 838,
- 808, 839, 809, 840, 810, 841, 811, 842, 812, 843, 813, 844, 814, 845,
- 815, 846, 816, 847, 817, 848, 818, 849, 819, 850, 820, 851, 821, 852,
- 822, 853, 823, 854, 824, 855, 825, 856, 826, 857, 827, 858, 828, 859,
- 829, 860, 830, 861, 831, 862, 832, 832, 833, 864, 834, 865, 835, 866,
- 836, 867, 837, 868, 838, 869, 839, 870, 840, 871, 841, 872, 842, 873,
- 843, 874, 844, 875, 845, 876, 846, 877, 847, 878, 848, 879, 849, 880,
- 850, 881, 851, 882, 852, 883, 853, 884, 854, 885, 855, 886, 856, 887,
- 857, 888, 858, 889, 859, 890, 860, 891, 861, 892, 862, 893, 863, 894,
- 864, 864, 865, 896, 866, 897, 867, 898, 868, 899, 869, 900, 870, 901,
- 871, 902, 872, 903, 873, 904, 874, 905, 875, 906, 876, 907, 877, 908,
- 878, 909, 879, 910, 880, 911, 881, 912, 882, 913, 883, 914, 884, 915,
- 885, 916, 886, 917, 887, 918, 888, 919, 889, 920, 890, 921, 891, 922,
- 892, 923, 893, 924, 894, 925, 895, 926, 896, 896, 897, 928, 898, 929,
- 899, 930, 900, 931, 901, 932, 902, 933, 903, 934, 904, 935, 905, 936,
- 906, 937, 907, 938, 908, 939, 909, 940, 910, 941, 911, 942, 912, 943,
- 913, 944, 914, 945, 915, 946, 916, 947, 917, 948, 918, 949, 919, 950,
- 920, 951, 921, 952, 922, 953, 923, 954, 924, 955, 925, 956, 926, 957,
- 927, 958, 928, 928, 929, 960, 930, 961, 931, 962, 932, 963, 933, 964,
- 934, 965, 935, 966, 936, 967, 937, 968, 938, 969, 939, 970, 940, 971,
- 941, 972, 942, 973, 943, 974, 944, 975, 945, 976, 946, 977, 947, 978,
- 948, 979, 949, 980, 950, 981, 951, 982, 952, 983, 953, 984, 954, 985,
- 955, 986, 956, 987, 957, 988, 958, 989, 959, 990, 960, 960, 961, 992,
- 962, 993, 963, 994, 964, 995, 965, 996, 966, 997, 967, 998, 968, 999,
- 969, 1000, 970, 1001, 971, 1002, 972, 1003, 973, 1004, 974, 1005, 975, 1006,
- 976, 1007, 977, 1008, 978, 1009, 979, 1010, 980, 1011, 981, 1012, 982, 1013,
- 983, 1014, 984, 1015, 985, 1016, 986, 1017, 987, 1018, 988, 1019, 989, 1020,
- 990, 1021, 991, 1022, 0, 0,
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- default_scan_32x32_neighbors[1025 * MAX_NEIGHBORS]) = {
- 0, 0, 0, 0, 0, 0, 32, 32, 1, 32, 1, 1, 2, 2,
- 2, 33, 33, 64, 64, 64, 96, 96, 65, 96, 34, 65, 3, 34,
- 3, 3, 4, 4, 4, 35, 35, 66, 66, 97, 97, 128, 128, 128,
- 160, 160, 129, 160, 98, 129, 67, 98, 36, 67, 5, 36, 5, 5,
- 6, 6, 6, 37, 37, 68, 68, 99, 99, 130, 130, 161, 161, 192,
- 192, 192, 224, 224, 193, 224, 162, 193, 131, 162, 100, 131, 69, 100,
- 38, 69, 7, 38, 7, 7, 8, 8, 8, 39, 39, 70, 70, 101,
- 101, 132, 132, 163, 163, 194, 194, 225, 225, 256, 256, 256, 288, 288,
- 257, 288, 226, 257, 195, 226, 164, 195, 133, 164, 102, 133, 71, 102,
- 40, 71, 9, 40, 9, 9, 10, 10, 10, 41, 41, 72, 72, 103,
- 103, 134, 134, 165, 165, 196, 196, 227, 227, 258, 258, 289, 289, 320,
- 320, 320, 352, 352, 321, 352, 290, 321, 259, 290, 228, 259, 197, 228,
- 166, 197, 135, 166, 104, 135, 73, 104, 42, 73, 11, 42, 11, 11,
- 12, 12, 12, 43, 43, 74, 74, 105, 105, 136, 136, 167, 167, 198,
- 198, 229, 229, 260, 260, 291, 291, 322, 322, 353, 353, 384, 384, 384,
- 416, 416, 385, 416, 354, 385, 323, 354, 292, 323, 261, 292, 230, 261,
- 199, 230, 168, 199, 137, 168, 106, 137, 75, 106, 44, 75, 13, 44,
- 13, 13, 14, 14, 14, 45, 45, 76, 76, 107, 107, 138, 138, 169,
- 169, 200, 200, 231, 231, 262, 262, 293, 293, 324, 324, 355, 355, 386,
- 386, 417, 417, 448, 448, 448, 480, 480, 449, 480, 418, 449, 387, 418,
- 356, 387, 325, 356, 294, 325, 263, 294, 232, 263, 201, 232, 170, 201,
- 139, 170, 108, 139, 77, 108, 46, 77, 15, 46, 15, 15, 16, 16,
- 16, 47, 47, 78, 78, 109, 109, 140, 140, 171, 171, 202, 202, 233,
- 233, 264, 264, 295, 295, 326, 326, 357, 357, 388, 388, 419, 419, 450,
- 450, 481, 481, 512, 512, 512, 544, 544, 513, 544, 482, 513, 451, 482,
- 420, 451, 389, 420, 358, 389, 327, 358, 296, 327, 265, 296, 234, 265,
- 203, 234, 172, 203, 141, 172, 110, 141, 79, 110, 48, 79, 17, 48,
- 17, 17, 18, 18, 18, 49, 49, 80, 80, 111, 111, 142, 142, 173,
- 173, 204, 204, 235, 235, 266, 266, 297, 297, 328, 328, 359, 359, 390,
- 390, 421, 421, 452, 452, 483, 483, 514, 514, 545, 545, 576, 576, 576,
- 608, 608, 577, 608, 546, 577, 515, 546, 484, 515, 453, 484, 422, 453,
- 391, 422, 360, 391, 329, 360, 298, 329, 267, 298, 236, 267, 205, 236,
- 174, 205, 143, 174, 112, 143, 81, 112, 50, 81, 19, 50, 19, 19,
- 20, 20, 20, 51, 51, 82, 82, 113, 113, 144, 144, 175, 175, 206,
- 206, 237, 237, 268, 268, 299, 299, 330, 330, 361, 361, 392, 392, 423,
- 423, 454, 454, 485, 485, 516, 516, 547, 547, 578, 578, 609, 609, 640,
- 640, 640, 672, 672, 641, 672, 610, 641, 579, 610, 548, 579, 517, 548,
- 486, 517, 455, 486, 424, 455, 393, 424, 362, 393, 331, 362, 300, 331,
- 269, 300, 238, 269, 207, 238, 176, 207, 145, 176, 114, 145, 83, 114,
- 52, 83, 21, 52, 21, 21, 22, 22, 22, 53, 53, 84, 84, 115,
- 115, 146, 146, 177, 177, 208, 208, 239, 239, 270, 270, 301, 301, 332,
- 332, 363, 363, 394, 394, 425, 425, 456, 456, 487, 487, 518, 518, 549,
- 549, 580, 580, 611, 611, 642, 642, 673, 673, 704, 704, 704, 736, 736,
- 705, 736, 674, 705, 643, 674, 612, 643, 581, 612, 550, 581, 519, 550,
- 488, 519, 457, 488, 426, 457, 395, 426, 364, 395, 333, 364, 302, 333,
- 271, 302, 240, 271, 209, 240, 178, 209, 147, 178, 116, 147, 85, 116,
- 54, 85, 23, 54, 23, 23, 24, 24, 24, 55, 55, 86, 86, 117,
- 117, 148, 148, 179, 179, 210, 210, 241, 241, 272, 272, 303, 303, 334,
- 334, 365, 365, 396, 396, 427, 427, 458, 458, 489, 489, 520, 520, 551,
- 551, 582, 582, 613, 613, 644, 644, 675, 675, 706, 706, 737, 737, 768,
- 768, 768, 800, 800, 769, 800, 738, 769, 707, 738, 676, 707, 645, 676,
- 614, 645, 583, 614, 552, 583, 521, 552, 490, 521, 459, 490, 428, 459,
- 397, 428, 366, 397, 335, 366, 304, 335, 273, 304, 242, 273, 211, 242,
- 180, 211, 149, 180, 118, 149, 87, 118, 56, 87, 25, 56, 25, 25,
- 26, 26, 26, 57, 57, 88, 88, 119, 119, 150, 150, 181, 181, 212,
- 212, 243, 243, 274, 274, 305, 305, 336, 336, 367, 367, 398, 398, 429,
- 429, 460, 460, 491, 491, 522, 522, 553, 553, 584, 584, 615, 615, 646,
- 646, 677, 677, 708, 708, 739, 739, 770, 770, 801, 801, 832, 832, 832,
- 864, 864, 833, 864, 802, 833, 771, 802, 740, 771, 709, 740, 678, 709,
- 647, 678, 616, 647, 585, 616, 554, 585, 523, 554, 492, 523, 461, 492,
- 430, 461, 399, 430, 368, 399, 337, 368, 306, 337, 275, 306, 244, 275,
- 213, 244, 182, 213, 151, 182, 120, 151, 89, 120, 58, 89, 27, 58,
- 27, 27, 28, 28, 28, 59, 59, 90, 90, 121, 121, 152, 152, 183,
- 183, 214, 214, 245, 245, 276, 276, 307, 307, 338, 338, 369, 369, 400,
- 400, 431, 431, 462, 462, 493, 493, 524, 524, 555, 555, 586, 586, 617,
- 617, 648, 648, 679, 679, 710, 710, 741, 741, 772, 772, 803, 803, 834,
- 834, 865, 865, 896, 896, 896, 928, 928, 897, 928, 866, 897, 835, 866,
- 804, 835, 773, 804, 742, 773, 711, 742, 680, 711, 649, 680, 618, 649,
- 587, 618, 556, 587, 525, 556, 494, 525, 463, 494, 432, 463, 401, 432,
- 370, 401, 339, 370, 308, 339, 277, 308, 246, 277, 215, 246, 184, 215,
- 153, 184, 122, 153, 91, 122, 60, 91, 29, 60, 29, 29, 30, 30,
- 30, 61, 61, 92, 92, 123, 123, 154, 154, 185, 185, 216, 216, 247,
- 247, 278, 278, 309, 309, 340, 340, 371, 371, 402, 402, 433, 433, 464,
- 464, 495, 495, 526, 526, 557, 557, 588, 588, 619, 619, 650, 650, 681,
- 681, 712, 712, 743, 743, 774, 774, 805, 805, 836, 836, 867, 867, 898,
- 898, 929, 929, 960, 960, 960, 961, 992, 930, 961, 899, 930, 868, 899,
- 837, 868, 806, 837, 775, 806, 744, 775, 713, 744, 682, 713, 651, 682,
- 620, 651, 589, 620, 558, 589, 527, 558, 496, 527, 465, 496, 434, 465,
- 403, 434, 372, 403, 341, 372, 310, 341, 279, 310, 248, 279, 217, 248,
- 186, 217, 155, 186, 124, 155, 93, 124, 62, 93, 31, 62, 63, 94,
- 94, 125, 125, 156, 156, 187, 187, 218, 218, 249, 249, 280, 280, 311,
- 311, 342, 342, 373, 373, 404, 404, 435, 435, 466, 466, 497, 497, 528,
- 528, 559, 559, 590, 590, 621, 621, 652, 652, 683, 683, 714, 714, 745,
- 745, 776, 776, 807, 807, 838, 838, 869, 869, 900, 900, 931, 931, 962,
- 962, 993, 963, 994, 932, 963, 901, 932, 870, 901, 839, 870, 808, 839,
- 777, 808, 746, 777, 715, 746, 684, 715, 653, 684, 622, 653, 591, 622,
- 560, 591, 529, 560, 498, 529, 467, 498, 436, 467, 405, 436, 374, 405,
- 343, 374, 312, 343, 281, 312, 250, 281, 219, 250, 188, 219, 157, 188,
- 126, 157, 95, 126, 127, 158, 158, 189, 189, 220, 220, 251, 251, 282,
- 282, 313, 313, 344, 344, 375, 375, 406, 406, 437, 437, 468, 468, 499,
- 499, 530, 530, 561, 561, 592, 592, 623, 623, 654, 654, 685, 685, 716,
- 716, 747, 747, 778, 778, 809, 809, 840, 840, 871, 871, 902, 902, 933,
- 933, 964, 964, 995, 965, 996, 934, 965, 903, 934, 872, 903, 841, 872,
- 810, 841, 779, 810, 748, 779, 717, 748, 686, 717, 655, 686, 624, 655,
- 593, 624, 562, 593, 531, 562, 500, 531, 469, 500, 438, 469, 407, 438,
- 376, 407, 345, 376, 314, 345, 283, 314, 252, 283, 221, 252, 190, 221,
- 159, 190, 191, 222, 222, 253, 253, 284, 284, 315, 315, 346, 346, 377,
- 377, 408, 408, 439, 439, 470, 470, 501, 501, 532, 532, 563, 563, 594,
- 594, 625, 625, 656, 656, 687, 687, 718, 718, 749, 749, 780, 780, 811,
- 811, 842, 842, 873, 873, 904, 904, 935, 935, 966, 966, 997, 967, 998,
- 936, 967, 905, 936, 874, 905, 843, 874, 812, 843, 781, 812, 750, 781,
- 719, 750, 688, 719, 657, 688, 626, 657, 595, 626, 564, 595, 533, 564,
- 502, 533, 471, 502, 440, 471, 409, 440, 378, 409, 347, 378, 316, 347,
- 285, 316, 254, 285, 223, 254, 255, 286, 286, 317, 317, 348, 348, 379,
- 379, 410, 410, 441, 441, 472, 472, 503, 503, 534, 534, 565, 565, 596,
- 596, 627, 627, 658, 658, 689, 689, 720, 720, 751, 751, 782, 782, 813,
- 813, 844, 844, 875, 875, 906, 906, 937, 937, 968, 968, 999, 969, 1000,
- 938, 969, 907, 938, 876, 907, 845, 876, 814, 845, 783, 814, 752, 783,
- 721, 752, 690, 721, 659, 690, 628, 659, 597, 628, 566, 597, 535, 566,
- 504, 535, 473, 504, 442, 473, 411, 442, 380, 411, 349, 380, 318, 349,
- 287, 318, 319, 350, 350, 381, 381, 412, 412, 443, 443, 474, 474, 505,
- 505, 536, 536, 567, 567, 598, 598, 629, 629, 660, 660, 691, 691, 722,
- 722, 753, 753, 784, 784, 815, 815, 846, 846, 877, 877, 908, 908, 939,
- 939, 970, 970, 1001, 971, 1002, 940, 971, 909, 940, 878, 909, 847, 878,
- 816, 847, 785, 816, 754, 785, 723, 754, 692, 723, 661, 692, 630, 661,
- 599, 630, 568, 599, 537, 568, 506, 537, 475, 506, 444, 475, 413, 444,
- 382, 413, 351, 382, 383, 414, 414, 445, 445, 476, 476, 507, 507, 538,
- 538, 569, 569, 600, 600, 631, 631, 662, 662, 693, 693, 724, 724, 755,
- 755, 786, 786, 817, 817, 848, 848, 879, 879, 910, 910, 941, 941, 972,
- 972, 1003, 973, 1004, 942, 973, 911, 942, 880, 911, 849, 880, 818, 849,
- 787, 818, 756, 787, 725, 756, 694, 725, 663, 694, 632, 663, 601, 632,
- 570, 601, 539, 570, 508, 539, 477, 508, 446, 477, 415, 446, 447, 478,
- 478, 509, 509, 540, 540, 571, 571, 602, 602, 633, 633, 664, 664, 695,
- 695, 726, 726, 757, 757, 788, 788, 819, 819, 850, 850, 881, 881, 912,
- 912, 943, 943, 974, 974, 1005, 975, 1006, 944, 975, 913, 944, 882, 913,
- 851, 882, 820, 851, 789, 820, 758, 789, 727, 758, 696, 727, 665, 696,
- 634, 665, 603, 634, 572, 603, 541, 572, 510, 541, 479, 510, 511, 542,
- 542, 573, 573, 604, 604, 635, 635, 666, 666, 697, 697, 728, 728, 759,
- 759, 790, 790, 821, 821, 852, 852, 883, 883, 914, 914, 945, 945, 976,
- 976, 1007, 977, 1008, 946, 977, 915, 946, 884, 915, 853, 884, 822, 853,
- 791, 822, 760, 791, 729, 760, 698, 729, 667, 698, 636, 667, 605, 636,
- 574, 605, 543, 574, 575, 606, 606, 637, 637, 668, 668, 699, 699, 730,
- 730, 761, 761, 792, 792, 823, 823, 854, 854, 885, 885, 916, 916, 947,
- 947, 978, 978, 1009, 979, 1010, 948, 979, 917, 948, 886, 917, 855, 886,
- 824, 855, 793, 824, 762, 793, 731, 762, 700, 731, 669, 700, 638, 669,
- 607, 638, 639, 670, 670, 701, 701, 732, 732, 763, 763, 794, 794, 825,
- 825, 856, 856, 887, 887, 918, 918, 949, 949, 980, 980, 1011, 981, 1012,
- 950, 981, 919, 950, 888, 919, 857, 888, 826, 857, 795, 826, 764, 795,
- 733, 764, 702, 733, 671, 702, 703, 734, 734, 765, 765, 796, 796, 827,
- 827, 858, 858, 889, 889, 920, 920, 951, 951, 982, 982, 1013, 983, 1014,
- 952, 983, 921, 952, 890, 921, 859, 890, 828, 859, 797, 828, 766, 797,
- 735, 766, 767, 798, 798, 829, 829, 860, 860, 891, 891, 922, 922, 953,
- 953, 984, 984, 1015, 985, 1016, 954, 985, 923, 954, 892, 923, 861, 892,
- 830, 861, 799, 830, 831, 862, 862, 893, 893, 924, 924, 955, 955, 986,
- 986, 1017, 987, 1018, 956, 987, 925, 956, 894, 925, 863, 894, 895, 926,
- 926, 957, 957, 988, 988, 1019, 989, 1020, 958, 989, 927, 958, 959, 990,
- 990, 1021, 991, 1022, 0, 0
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_4x4[16]) = {
- 0, 1, 5, 6, 2, 4, 7, 12, 3, 8, 11, 13, 9, 10, 14, 15
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_4x4[16]) = {
- 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_mrow_iscan_4x4[16]) = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_4x8[32]) = {
- 0, 1, 3, 6, 2, 4, 7, 10, 5, 8, 11, 14, 9, 12, 15, 18,
- 13, 16, 19, 22, 17, 20, 23, 26, 21, 24, 27, 29, 25, 28, 30, 31,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_4x8[32]) = {
- 0, 8, 16, 24, 1, 9, 17, 25, 2, 10, 18, 26, 3, 11, 19, 27,
- 4, 12, 20, 28, 5, 13, 21, 29, 6, 14, 22, 30, 7, 15, 23, 31,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_mrow_iscan_4x8[32]) = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
- 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_8x4[32]) = {
- 0, 2, 5, 9, 13, 17, 21, 25, 1, 4, 8, 12, 16, 20, 24, 28,
- 3, 7, 11, 15, 19, 23, 27, 30, 6, 10, 14, 18, 22, 26, 29, 31,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_8x4[32]) = {
- 0, 4, 8, 12, 16, 20, 24, 28, 1, 5, 9, 13, 17, 21, 25, 29,
- 2, 6, 10, 14, 18, 22, 26, 30, 3, 7, 11, 15, 19, 23, 27, 31,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_mrow_iscan_8x4[32]) = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
- 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_4x16[64]) = {
- 0, 1, 3, 6, 2, 4, 7, 10, 5, 8, 11, 14, 9, 12, 15, 18,
- 13, 16, 19, 22, 17, 20, 23, 26, 21, 24, 27, 30, 25, 28, 31, 34,
- 29, 32, 35, 38, 33, 36, 39, 42, 37, 40, 43, 46, 41, 44, 47, 50,
- 45, 48, 51, 54, 49, 52, 55, 58, 53, 56, 59, 61, 57, 60, 62, 63,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_16x4[64]) = {
- 0, 2, 5, 9, 13, 17, 21, 25, 29, 33, 37, 41, 45, 49, 53, 57,
- 1, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60,
- 3, 7, 11, 15, 19, 23, 27, 31, 35, 39, 43, 47, 51, 55, 59, 62,
- 6, 10, 14, 18, 22, 26, 30, 34, 38, 42, 46, 50, 54, 58, 61, 63,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_mrow_iscan_4x16[64]) = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
- 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
- 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
- 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_mrow_iscan_16x4[64]) = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
- 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
- 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
- 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_4x16[64]) = {
- 0, 16, 32, 48, 1, 17, 33, 49, 2, 18, 34, 50, 3, 19, 35, 51,
- 4, 20, 36, 52, 5, 21, 37, 53, 6, 22, 38, 54, 7, 23, 39, 55,
- 8, 24, 40, 56, 9, 25, 41, 57, 10, 26, 42, 58, 11, 27, 43, 59,
- 12, 28, 44, 60, 13, 29, 45, 61, 14, 30, 46, 62, 15, 31, 47, 63,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_16x4[64]) = {
- 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60,
- 1, 5, 9, 13, 17, 21, 25, 29, 33, 37, 41, 45, 49, 53, 57, 61,
- 2, 6, 10, 14, 18, 22, 26, 30, 34, 38, 42, 46, 50, 54, 58, 62,
- 3, 7, 11, 15, 19, 23, 27, 31, 35, 39, 43, 47, 51, 55, 59, 63,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_8x32[256]) = {
- 0, 1, 3, 6, 10, 15, 21, 28, 2, 4, 7, 11, 16, 22, 29,
- 36, 5, 8, 12, 17, 23, 30, 37, 44, 9, 13, 18, 24, 31, 38,
- 45, 52, 14, 19, 25, 32, 39, 46, 53, 60, 20, 26, 33, 40, 47,
- 54, 61, 68, 27, 34, 41, 48, 55, 62, 69, 76, 35, 42, 49, 56,
- 63, 70, 77, 84, 43, 50, 57, 64, 71, 78, 85, 92, 51, 58, 65,
- 72, 79, 86, 93, 100, 59, 66, 73, 80, 87, 94, 101, 108, 67, 74,
- 81, 88, 95, 102, 109, 116, 75, 82, 89, 96, 103, 110, 117, 124, 83,
- 90, 97, 104, 111, 118, 125, 132, 91, 98, 105, 112, 119, 126, 133, 140,
- 99, 106, 113, 120, 127, 134, 141, 148, 107, 114, 121, 128, 135, 142, 149,
- 156, 115, 122, 129, 136, 143, 150, 157, 164, 123, 130, 137, 144, 151, 158,
- 165, 172, 131, 138, 145, 152, 159, 166, 173, 180, 139, 146, 153, 160, 167,
- 174, 181, 188, 147, 154, 161, 168, 175, 182, 189, 196, 155, 162, 169, 176,
- 183, 190, 197, 204, 163, 170, 177, 184, 191, 198, 205, 212, 171, 178, 185,
- 192, 199, 206, 213, 220, 179, 186, 193, 200, 207, 214, 221, 228, 187, 194,
- 201, 208, 215, 222, 229, 235, 195, 202, 209, 216, 223, 230, 236, 241, 203,
- 210, 217, 224, 231, 237, 242, 246, 211, 218, 225, 232, 238, 243, 247, 250,
- 219, 226, 233, 239, 244, 248, 251, 253, 227, 234, 240, 245, 249, 252, 254,
- 255,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_32x8[256]) = {
- 0, 2, 5, 9, 14, 20, 27, 35, 43, 51, 59, 67, 75, 83, 91,
- 99, 107, 115, 123, 131, 139, 147, 155, 163, 171, 179, 187, 195, 203, 211,
- 219, 227, 1, 4, 8, 13, 19, 26, 34, 42, 50, 58, 66, 74, 82,
- 90, 98, 106, 114, 122, 130, 138, 146, 154, 162, 170, 178, 186, 194, 202,
- 210, 218, 226, 234, 3, 7, 12, 18, 25, 33, 41, 49, 57, 65, 73,
- 81, 89, 97, 105, 113, 121, 129, 137, 145, 153, 161, 169, 177, 185, 193,
- 201, 209, 217, 225, 233, 240, 6, 11, 17, 24, 32, 40, 48, 56, 64,
- 72, 80, 88, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184,
- 192, 200, 208, 216, 224, 232, 239, 245, 10, 16, 23, 31, 39, 47, 55,
- 63, 71, 79, 87, 95, 103, 111, 119, 127, 135, 143, 151, 159, 167, 175,
- 183, 191, 199, 207, 215, 223, 231, 238, 244, 249, 15, 22, 30, 38, 46,
- 54, 62, 70, 78, 86, 94, 102, 110, 118, 126, 134, 142, 150, 158, 166,
- 174, 182, 190, 198, 206, 214, 222, 230, 237, 243, 248, 252, 21, 29, 37,
- 45, 53, 61, 69, 77, 85, 93, 101, 109, 117, 125, 133, 141, 149, 157,
- 165, 173, 181, 189, 197, 205, 213, 221, 229, 236, 242, 247, 251, 254, 28,
- 36, 44, 52, 60, 68, 76, 84, 92, 100, 108, 116, 124, 132, 140, 148,
- 156, 164, 172, 180, 188, 196, 204, 212, 220, 228, 235, 241, 246, 250, 253,
- 255,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_mrow_iscan_8x32[256]) = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
- 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
- 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
- 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
- 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
- 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
- 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
- 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
- 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
- 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
- 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
- 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
- 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
- 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
- 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224,
- 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
- 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
- 255,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_mrow_iscan_32x8[256]) = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
- 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
- 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
- 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
- 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
- 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
- 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
- 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
- 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
- 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
- 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
- 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
- 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
- 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
- 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224,
- 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
- 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
- 255,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_8x32[256]) = {
- 0, 32, 64, 96, 128, 160, 192, 224, 1, 33, 65, 97, 129, 161, 193, 225,
- 2, 34, 66, 98, 130, 162, 194, 226, 3, 35, 67, 99, 131, 163, 195, 227,
- 4, 36, 68, 100, 132, 164, 196, 228, 5, 37, 69, 101, 133, 165, 197, 229,
- 6, 38, 70, 102, 134, 166, 198, 230, 7, 39, 71, 103, 135, 167, 199, 231,
- 8, 40, 72, 104, 136, 168, 200, 232, 9, 41, 73, 105, 137, 169, 201, 233,
- 10, 42, 74, 106, 138, 170, 202, 234, 11, 43, 75, 107, 139, 171, 203, 235,
- 12, 44, 76, 108, 140, 172, 204, 236, 13, 45, 77, 109, 141, 173, 205, 237,
- 14, 46, 78, 110, 142, 174, 206, 238, 15, 47, 79, 111, 143, 175, 207, 239,
- 16, 48, 80, 112, 144, 176, 208, 240, 17, 49, 81, 113, 145, 177, 209, 241,
- 18, 50, 82, 114, 146, 178, 210, 242, 19, 51, 83, 115, 147, 179, 211, 243,
- 20, 52, 84, 116, 148, 180, 212, 244, 21, 53, 85, 117, 149, 181, 213, 245,
- 22, 54, 86, 118, 150, 182, 214, 246, 23, 55, 87, 119, 151, 183, 215, 247,
- 24, 56, 88, 120, 152, 184, 216, 248, 25, 57, 89, 121, 153, 185, 217, 249,
- 26, 58, 90, 122, 154, 186, 218, 250, 27, 59, 91, 123, 155, 187, 219, 251,
- 28, 60, 92, 124, 156, 188, 220, 252, 29, 61, 93, 125, 157, 189, 221, 253,
- 30, 62, 94, 126, 158, 190, 222, 254, 31, 63, 95, 127, 159, 191, 223, 255,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_32x8[256]) = {
- 0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112,
- 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232,
- 240, 248, 1, 9, 17, 25, 33, 41, 49, 57, 65, 73, 81, 89, 97,
- 105, 113, 121, 129, 137, 145, 153, 161, 169, 177, 185, 193, 201, 209, 217,
- 225, 233, 241, 249, 2, 10, 18, 26, 34, 42, 50, 58, 66, 74, 82,
- 90, 98, 106, 114, 122, 130, 138, 146, 154, 162, 170, 178, 186, 194, 202,
- 210, 218, 226, 234, 242, 250, 3, 11, 19, 27, 35, 43, 51, 59, 67,
- 75, 83, 91, 99, 107, 115, 123, 131, 139, 147, 155, 163, 171, 179, 187,
- 195, 203, 211, 219, 227, 235, 243, 251, 4, 12, 20, 28, 36, 44, 52,
- 60, 68, 76, 84, 92, 100, 108, 116, 124, 132, 140, 148, 156, 164, 172,
- 180, 188, 196, 204, 212, 220, 228, 236, 244, 252, 5, 13, 21, 29, 37,
- 45, 53, 61, 69, 77, 85, 93, 101, 109, 117, 125, 133, 141, 149, 157,
- 165, 173, 181, 189, 197, 205, 213, 221, 229, 237, 245, 253, 6, 14, 22,
- 30, 38, 46, 54, 62, 70, 78, 86, 94, 102, 110, 118, 126, 134, 142,
- 150, 158, 166, 174, 182, 190, 198, 206, 214, 222, 230, 238, 246, 254, 7,
- 15, 23, 31, 39, 47, 55, 63, 71, 79, 87, 95, 103, 111, 119, 127,
- 135, 143, 151, 159, 167, 175, 183, 191, 199, 207, 215, 223, 231, 239, 247,
- 255,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_8x8[64]) = {
- 0, 8, 16, 24, 32, 40, 48, 56, 1, 9, 17, 25, 33, 41, 49, 57,
- 2, 10, 18, 26, 34, 42, 50, 58, 3, 11, 19, 27, 35, 43, 51, 59,
- 4, 12, 20, 28, 36, 44, 52, 60, 5, 13, 21, 29, 37, 45, 53, 61,
- 6, 14, 22, 30, 38, 46, 54, 62, 7, 15, 23, 31, 39, 47, 55, 63,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_mrow_iscan_8x8[64]) = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
- 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
- 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
- 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_8x8[64]) = {
- 0, 1, 5, 6, 14, 15, 27, 28, 2, 4, 7, 13, 16, 26, 29, 42,
- 3, 8, 12, 17, 25, 30, 41, 43, 9, 11, 18, 24, 31, 40, 44, 53,
- 10, 19, 23, 32, 39, 45, 52, 54, 20, 22, 33, 38, 46, 51, 55, 60,
- 21, 34, 37, 47, 50, 56, 59, 61, 35, 36, 48, 49, 57, 58, 62, 63
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_8x16[128]) = {
- 0, 1, 3, 6, 10, 15, 21, 28, 2, 4, 7, 11, 16, 22, 29, 36,
- 5, 8, 12, 17, 23, 30, 37, 44, 9, 13, 18, 24, 31, 38, 45, 52,
- 14, 19, 25, 32, 39, 46, 53, 60, 20, 26, 33, 40, 47, 54, 61, 68,
- 27, 34, 41, 48, 55, 62, 69, 76, 35, 42, 49, 56, 63, 70, 77, 84,
- 43, 50, 57, 64, 71, 78, 85, 92, 51, 58, 65, 72, 79, 86, 93, 100,
- 59, 66, 73, 80, 87, 94, 101, 107, 67, 74, 81, 88, 95, 102, 108, 113,
- 75, 82, 89, 96, 103, 109, 114, 118, 83, 90, 97, 104, 110, 115, 119, 122,
- 91, 98, 105, 111, 116, 120, 123, 125, 99, 106, 112, 117, 121, 124, 126, 127,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_16x8[128]) = {
- 0, 2, 5, 9, 14, 20, 27, 35, 43, 51, 59, 67, 75, 83, 91, 99,
- 1, 4, 8, 13, 19, 26, 34, 42, 50, 58, 66, 74, 82, 90, 98, 106,
- 3, 7, 12, 18, 25, 33, 41, 49, 57, 65, 73, 81, 89, 97, 105, 112,
- 6, 11, 17, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 111, 117,
- 10, 16, 23, 31, 39, 47, 55, 63, 71, 79, 87, 95, 103, 110, 116, 121,
- 15, 22, 30, 38, 46, 54, 62, 70, 78, 86, 94, 102, 109, 115, 120, 124,
- 21, 29, 37, 45, 53, 61, 69, 77, 85, 93, 101, 108, 114, 119, 123, 126,
- 28, 36, 44, 52, 60, 68, 76, 84, 92, 100, 107, 113, 118, 122, 125, 127,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_8x16[128]) = {
- 0, 16, 32, 48, 64, 80, 96, 112, 1, 17, 33, 49, 65, 81, 97, 113,
- 2, 18, 34, 50, 66, 82, 98, 114, 3, 19, 35, 51, 67, 83, 99, 115,
- 4, 20, 36, 52, 68, 84, 100, 116, 5, 21, 37, 53, 69, 85, 101, 117,
- 6, 22, 38, 54, 70, 86, 102, 118, 7, 23, 39, 55, 71, 87, 103, 119,
- 8, 24, 40, 56, 72, 88, 104, 120, 9, 25, 41, 57, 73, 89, 105, 121,
- 10, 26, 42, 58, 74, 90, 106, 122, 11, 27, 43, 59, 75, 91, 107, 123,
- 12, 28, 44, 60, 76, 92, 108, 124, 13, 29, 45, 61, 77, 93, 109, 125,
- 14, 30, 46, 62, 78, 94, 110, 126, 15, 31, 47, 63, 79, 95, 111, 127,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_16x8[128]) = {
- 0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120,
- 1, 9, 17, 25, 33, 41, 49, 57, 65, 73, 81, 89, 97, 105, 113, 121,
- 2, 10, 18, 26, 34, 42, 50, 58, 66, 74, 82, 90, 98, 106, 114, 122,
- 3, 11, 19, 27, 35, 43, 51, 59, 67, 75, 83, 91, 99, 107, 115, 123,
- 4, 12, 20, 28, 36, 44, 52, 60, 68, 76, 84, 92, 100, 108, 116, 124,
- 5, 13, 21, 29, 37, 45, 53, 61, 69, 77, 85, 93, 101, 109, 117, 125,
- 6, 14, 22, 30, 38, 46, 54, 62, 70, 78, 86, 94, 102, 110, 118, 126,
- 7, 15, 23, 31, 39, 47, 55, 63, 71, 79, 87, 95, 103, 111, 119, 127,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_mrow_iscan_8x16[128]) = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
- 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
- 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
- 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
- 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
- 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
- 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
- 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
- 120, 121, 122, 123, 124, 125, 126, 127,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_mrow_iscan_16x8[128]) = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
- 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
- 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
- 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
- 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
- 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
- 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
- 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
- 120, 121, 122, 123, 124, 125, 126, 127,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_16x32[512]) = {
- 0, 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 66, 78, 91, 105,
- 120, 2, 4, 7, 11, 16, 22, 29, 37, 46, 56, 67, 79, 92, 106,
- 121, 136, 5, 8, 12, 17, 23, 30, 38, 47, 57, 68, 80, 93, 107,
- 122, 137, 152, 9, 13, 18, 24, 31, 39, 48, 58, 69, 81, 94, 108,
- 123, 138, 153, 168, 14, 19, 25, 32, 40, 49, 59, 70, 82, 95, 109,
- 124, 139, 154, 169, 184, 20, 26, 33, 41, 50, 60, 71, 83, 96, 110,
- 125, 140, 155, 170, 185, 200, 27, 34, 42, 51, 61, 72, 84, 97, 111,
- 126, 141, 156, 171, 186, 201, 216, 35, 43, 52, 62, 73, 85, 98, 112,
- 127, 142, 157, 172, 187, 202, 217, 232, 44, 53, 63, 74, 86, 99, 113,
- 128, 143, 158, 173, 188, 203, 218, 233, 248, 54, 64, 75, 87, 100, 114,
- 129, 144, 159, 174, 189, 204, 219, 234, 249, 264, 65, 76, 88, 101, 115,
- 130, 145, 160, 175, 190, 205, 220, 235, 250, 265, 280, 77, 89, 102, 116,
- 131, 146, 161, 176, 191, 206, 221, 236, 251, 266, 281, 296, 90, 103, 117,
- 132, 147, 162, 177, 192, 207, 222, 237, 252, 267, 282, 297, 312, 104, 118,
- 133, 148, 163, 178, 193, 208, 223, 238, 253, 268, 283, 298, 313, 328, 119,
- 134, 149, 164, 179, 194, 209, 224, 239, 254, 269, 284, 299, 314, 329, 344,
- 135, 150, 165, 180, 195, 210, 225, 240, 255, 270, 285, 300, 315, 330, 345,
- 360, 151, 166, 181, 196, 211, 226, 241, 256, 271, 286, 301, 316, 331, 346,
- 361, 376, 167, 182, 197, 212, 227, 242, 257, 272, 287, 302, 317, 332, 347,
- 362, 377, 392, 183, 198, 213, 228, 243, 258, 273, 288, 303, 318, 333, 348,
- 363, 378, 393, 407, 199, 214, 229, 244, 259, 274, 289, 304, 319, 334, 349,
- 364, 379, 394, 408, 421, 215, 230, 245, 260, 275, 290, 305, 320, 335, 350,
- 365, 380, 395, 409, 422, 434, 231, 246, 261, 276, 291, 306, 321, 336, 351,
- 366, 381, 396, 410, 423, 435, 446, 247, 262, 277, 292, 307, 322, 337, 352,
- 367, 382, 397, 411, 424, 436, 447, 457, 263, 278, 293, 308, 323, 338, 353,
- 368, 383, 398, 412, 425, 437, 448, 458, 467, 279, 294, 309, 324, 339, 354,
- 369, 384, 399, 413, 426, 438, 449, 459, 468, 476, 295, 310, 325, 340, 355,
- 370, 385, 400, 414, 427, 439, 450, 460, 469, 477, 484, 311, 326, 341, 356,
- 371, 386, 401, 415, 428, 440, 451, 461, 470, 478, 485, 491, 327, 342, 357,
- 372, 387, 402, 416, 429, 441, 452, 462, 471, 479, 486, 492, 497, 343, 358,
- 373, 388, 403, 417, 430, 442, 453, 463, 472, 480, 487, 493, 498, 502, 359,
- 374, 389, 404, 418, 431, 443, 454, 464, 473, 481, 488, 494, 499, 503, 506,
- 375, 390, 405, 419, 432, 444, 455, 465, 474, 482, 489, 495, 500, 504, 507,
- 509, 391, 406, 420, 433, 445, 456, 466, 475, 483, 490, 496, 501, 505, 508,
- 510, 511,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_32x16[512]) = {
- 0, 2, 5, 9, 14, 20, 27, 35, 44, 54, 65, 77, 90, 104, 119,
- 135, 151, 167, 183, 199, 215, 231, 247, 263, 279, 295, 311, 327, 343, 359,
- 375, 391, 1, 4, 8, 13, 19, 26, 34, 43, 53, 64, 76, 89, 103,
- 118, 134, 150, 166, 182, 198, 214, 230, 246, 262, 278, 294, 310, 326, 342,
- 358, 374, 390, 406, 3, 7, 12, 18, 25, 33, 42, 52, 63, 75, 88,
- 102, 117, 133, 149, 165, 181, 197, 213, 229, 245, 261, 277, 293, 309, 325,
- 341, 357, 373, 389, 405, 420, 6, 11, 17, 24, 32, 41, 51, 62, 74,
- 87, 101, 116, 132, 148, 164, 180, 196, 212, 228, 244, 260, 276, 292, 308,
- 324, 340, 356, 372, 388, 404, 419, 433, 10, 16, 23, 31, 40, 50, 61,
- 73, 86, 100, 115, 131, 147, 163, 179, 195, 211, 227, 243, 259, 275, 291,
- 307, 323, 339, 355, 371, 387, 403, 418, 432, 445, 15, 22, 30, 39, 49,
- 60, 72, 85, 99, 114, 130, 146, 162, 178, 194, 210, 226, 242, 258, 274,
- 290, 306, 322, 338, 354, 370, 386, 402, 417, 431, 444, 456, 21, 29, 38,
- 48, 59, 71, 84, 98, 113, 129, 145, 161, 177, 193, 209, 225, 241, 257,
- 273, 289, 305, 321, 337, 353, 369, 385, 401, 416, 430, 443, 455, 466, 28,
- 37, 47, 58, 70, 83, 97, 112, 128, 144, 160, 176, 192, 208, 224, 240,
- 256, 272, 288, 304, 320, 336, 352, 368, 384, 400, 415, 429, 442, 454, 465,
- 475, 36, 46, 57, 69, 82, 96, 111, 127, 143, 159, 175, 191, 207, 223,
- 239, 255, 271, 287, 303, 319, 335, 351, 367, 383, 399, 414, 428, 441, 453,
- 464, 474, 483, 45, 56, 68, 81, 95, 110, 126, 142, 158, 174, 190, 206,
- 222, 238, 254, 270, 286, 302, 318, 334, 350, 366, 382, 398, 413, 427, 440,
- 452, 463, 473, 482, 490, 55, 67, 80, 94, 109, 125, 141, 157, 173, 189,
- 205, 221, 237, 253, 269, 285, 301, 317, 333, 349, 365, 381, 397, 412, 426,
- 439, 451, 462, 472, 481, 489, 496, 66, 79, 93, 108, 124, 140, 156, 172,
- 188, 204, 220, 236, 252, 268, 284, 300, 316, 332, 348, 364, 380, 396, 411,
- 425, 438, 450, 461, 471, 480, 488, 495, 501, 78, 92, 107, 123, 139, 155,
- 171, 187, 203, 219, 235, 251, 267, 283, 299, 315, 331, 347, 363, 379, 395,
- 410, 424, 437, 449, 460, 470, 479, 487, 494, 500, 505, 91, 106, 122, 138,
- 154, 170, 186, 202, 218, 234, 250, 266, 282, 298, 314, 330, 346, 362, 378,
- 394, 409, 423, 436, 448, 459, 469, 478, 486, 493, 499, 504, 508, 105, 121,
- 137, 153, 169, 185, 201, 217, 233, 249, 265, 281, 297, 313, 329, 345, 361,
- 377, 393, 408, 422, 435, 447, 458, 468, 477, 485, 492, 498, 503, 507, 510,
- 120, 136, 152, 168, 184, 200, 216, 232, 248, 264, 280, 296, 312, 328, 344,
- 360, 376, 392, 407, 421, 434, 446, 457, 467, 476, 484, 491, 497, 502, 506,
- 509, 511,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_16x32[512]) = {
- 0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448, 480,
- 1, 33, 65, 97, 129, 161, 193, 225, 257, 289, 321, 353, 385, 417, 449, 481,
- 2, 34, 66, 98, 130, 162, 194, 226, 258, 290, 322, 354, 386, 418, 450, 482,
- 3, 35, 67, 99, 131, 163, 195, 227, 259, 291, 323, 355, 387, 419, 451, 483,
- 4, 36, 68, 100, 132, 164, 196, 228, 260, 292, 324, 356, 388, 420, 452, 484,
- 5, 37, 69, 101, 133, 165, 197, 229, 261, 293, 325, 357, 389, 421, 453, 485,
- 6, 38, 70, 102, 134, 166, 198, 230, 262, 294, 326, 358, 390, 422, 454, 486,
- 7, 39, 71, 103, 135, 167, 199, 231, 263, 295, 327, 359, 391, 423, 455, 487,
- 8, 40, 72, 104, 136, 168, 200, 232, 264, 296, 328, 360, 392, 424, 456, 488,
- 9, 41, 73, 105, 137, 169, 201, 233, 265, 297, 329, 361, 393, 425, 457, 489,
- 10, 42, 74, 106, 138, 170, 202, 234, 266, 298, 330, 362, 394, 426, 458, 490,
- 11, 43, 75, 107, 139, 171, 203, 235, 267, 299, 331, 363, 395, 427, 459, 491,
- 12, 44, 76, 108, 140, 172, 204, 236, 268, 300, 332, 364, 396, 428, 460, 492,
- 13, 45, 77, 109, 141, 173, 205, 237, 269, 301, 333, 365, 397, 429, 461, 493,
- 14, 46, 78, 110, 142, 174, 206, 238, 270, 302, 334, 366, 398, 430, 462, 494,
- 15, 47, 79, 111, 143, 175, 207, 239, 271, 303, 335, 367, 399, 431, 463, 495,
- 16, 48, 80, 112, 144, 176, 208, 240, 272, 304, 336, 368, 400, 432, 464, 496,
- 17, 49, 81, 113, 145, 177, 209, 241, 273, 305, 337, 369, 401, 433, 465, 497,
- 18, 50, 82, 114, 146, 178, 210, 242, 274, 306, 338, 370, 402, 434, 466, 498,
- 19, 51, 83, 115, 147, 179, 211, 243, 275, 307, 339, 371, 403, 435, 467, 499,
- 20, 52, 84, 116, 148, 180, 212, 244, 276, 308, 340, 372, 404, 436, 468, 500,
- 21, 53, 85, 117, 149, 181, 213, 245, 277, 309, 341, 373, 405, 437, 469, 501,
- 22, 54, 86, 118, 150, 182, 214, 246, 278, 310, 342, 374, 406, 438, 470, 502,
- 23, 55, 87, 119, 151, 183, 215, 247, 279, 311, 343, 375, 407, 439, 471, 503,
- 24, 56, 88, 120, 152, 184, 216, 248, 280, 312, 344, 376, 408, 440, 472, 504,
- 25, 57, 89, 121, 153, 185, 217, 249, 281, 313, 345, 377, 409, 441, 473, 505,
- 26, 58, 90, 122, 154, 186, 218, 250, 282, 314, 346, 378, 410, 442, 474, 506,
- 27, 59, 91, 123, 155, 187, 219, 251, 283, 315, 347, 379, 411, 443, 475, 507,
- 28, 60, 92, 124, 156, 188, 220, 252, 284, 316, 348, 380, 412, 444, 476, 508,
- 29, 61, 93, 125, 157, 189, 221, 253, 285, 317, 349, 381, 413, 445, 477, 509,
- 30, 62, 94, 126, 158, 190, 222, 254, 286, 318, 350, 382, 414, 446, 478, 510,
- 31, 63, 95, 127, 159, 191, 223, 255, 287, 319, 351, 383, 415, 447, 479, 511,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_32x16[512]) = {
- 0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224,
- 240, 256, 272, 288, 304, 320, 336, 352, 368, 384, 400, 416, 432, 448, 464,
- 480, 496, 1, 17, 33, 49, 65, 81, 97, 113, 129, 145, 161, 177, 193,
- 209, 225, 241, 257, 273, 289, 305, 321, 337, 353, 369, 385, 401, 417, 433,
- 449, 465, 481, 497, 2, 18, 34, 50, 66, 82, 98, 114, 130, 146, 162,
- 178, 194, 210, 226, 242, 258, 274, 290, 306, 322, 338, 354, 370, 386, 402,
- 418, 434, 450, 466, 482, 498, 3, 19, 35, 51, 67, 83, 99, 115, 131,
- 147, 163, 179, 195, 211, 227, 243, 259, 275, 291, 307, 323, 339, 355, 371,
- 387, 403, 419, 435, 451, 467, 483, 499, 4, 20, 36, 52, 68, 84, 100,
- 116, 132, 148, 164, 180, 196, 212, 228, 244, 260, 276, 292, 308, 324, 340,
- 356, 372, 388, 404, 420, 436, 452, 468, 484, 500, 5, 21, 37, 53, 69,
- 85, 101, 117, 133, 149, 165, 181, 197, 213, 229, 245, 261, 277, 293, 309,
- 325, 341, 357, 373, 389, 405, 421, 437, 453, 469, 485, 501, 6, 22, 38,
- 54, 70, 86, 102, 118, 134, 150, 166, 182, 198, 214, 230, 246, 262, 278,
- 294, 310, 326, 342, 358, 374, 390, 406, 422, 438, 454, 470, 486, 502, 7,
- 23, 39, 55, 71, 87, 103, 119, 135, 151, 167, 183, 199, 215, 231, 247,
- 263, 279, 295, 311, 327, 343, 359, 375, 391, 407, 423, 439, 455, 471, 487,
- 503, 8, 24, 40, 56, 72, 88, 104, 120, 136, 152, 168, 184, 200, 216,
- 232, 248, 264, 280, 296, 312, 328, 344, 360, 376, 392, 408, 424, 440, 456,
- 472, 488, 504, 9, 25, 41, 57, 73, 89, 105, 121, 137, 153, 169, 185,
- 201, 217, 233, 249, 265, 281, 297, 313, 329, 345, 361, 377, 393, 409, 425,
- 441, 457, 473, 489, 505, 10, 26, 42, 58, 74, 90, 106, 122, 138, 154,
- 170, 186, 202, 218, 234, 250, 266, 282, 298, 314, 330, 346, 362, 378, 394,
- 410, 426, 442, 458, 474, 490, 506, 11, 27, 43, 59, 75, 91, 107, 123,
- 139, 155, 171, 187, 203, 219, 235, 251, 267, 283, 299, 315, 331, 347, 363,
- 379, 395, 411, 427, 443, 459, 475, 491, 507, 12, 28, 44, 60, 76, 92,
- 108, 124, 140, 156, 172, 188, 204, 220, 236, 252, 268, 284, 300, 316, 332,
- 348, 364, 380, 396, 412, 428, 444, 460, 476, 492, 508, 13, 29, 45, 61,
- 77, 93, 109, 125, 141, 157, 173, 189, 205, 221, 237, 253, 269, 285, 301,
- 317, 333, 349, 365, 381, 397, 413, 429, 445, 461, 477, 493, 509, 14, 30,
- 46, 62, 78, 94, 110, 126, 142, 158, 174, 190, 206, 222, 238, 254, 270,
- 286, 302, 318, 334, 350, 366, 382, 398, 414, 430, 446, 462, 478, 494, 510,
- 15, 31, 47, 63, 79, 95, 111, 127, 143, 159, 175, 191, 207, 223, 239,
- 255, 271, 287, 303, 319, 335, 351, 367, 383, 399, 415, 431, 447, 463, 479,
- 495, 511,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_mrow_iscan_16x32[512]) = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
- 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
- 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
- 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
- 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
- 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
- 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
- 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
- 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
- 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
- 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
- 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
- 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
- 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
- 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224,
- 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
- 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
- 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269,
- 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284,
- 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299,
- 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314,
- 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329,
- 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344,
- 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359,
- 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374,
- 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389,
- 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404,
- 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419,
- 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434,
- 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449,
- 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464,
- 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479,
- 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494,
- 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509,
- 510, 511,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_mrow_iscan_32x16[512]) = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
- 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
- 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
- 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
- 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
- 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
- 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
- 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
- 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
- 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
- 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
- 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
- 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
- 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
- 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224,
- 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
- 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
- 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269,
- 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284,
- 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299,
- 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314,
- 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329,
- 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344,
- 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359,
- 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374,
- 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389,
- 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404,
- 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419,
- 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434,
- 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449,
- 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464,
- 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479,
- 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494,
- 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509,
- 510, 511,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_16x16[256]) = {
- 0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240,
- 1, 17, 33, 49, 65, 81, 97, 113, 129, 145, 161, 177, 193, 209, 225, 241,
- 2, 18, 34, 50, 66, 82, 98, 114, 130, 146, 162, 178, 194, 210, 226, 242,
- 3, 19, 35, 51, 67, 83, 99, 115, 131, 147, 163, 179, 195, 211, 227, 243,
- 4, 20, 36, 52, 68, 84, 100, 116, 132, 148, 164, 180, 196, 212, 228, 244,
- 5, 21, 37, 53, 69, 85, 101, 117, 133, 149, 165, 181, 197, 213, 229, 245,
- 6, 22, 38, 54, 70, 86, 102, 118, 134, 150, 166, 182, 198, 214, 230, 246,
- 7, 23, 39, 55, 71, 87, 103, 119, 135, 151, 167, 183, 199, 215, 231, 247,
- 8, 24, 40, 56, 72, 88, 104, 120, 136, 152, 168, 184, 200, 216, 232, 248,
- 9, 25, 41, 57, 73, 89, 105, 121, 137, 153, 169, 185, 201, 217, 233, 249,
- 10, 26, 42, 58, 74, 90, 106, 122, 138, 154, 170, 186, 202, 218, 234, 250,
- 11, 27, 43, 59, 75, 91, 107, 123, 139, 155, 171, 187, 203, 219, 235, 251,
- 12, 28, 44, 60, 76, 92, 108, 124, 140, 156, 172, 188, 204, 220, 236, 252,
- 13, 29, 45, 61, 77, 93, 109, 125, 141, 157, 173, 189, 205, 221, 237, 253,
- 14, 30, 46, 62, 78, 94, 110, 126, 142, 158, 174, 190, 206, 222, 238, 254,
- 15, 31, 47, 63, 79, 95, 111, 127, 143, 159, 175, 191, 207, 223, 239, 255,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_mrow_iscan_16x16[256]) = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
- 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
- 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
- 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
- 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
- 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
- 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
- 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
- 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
- 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
- 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
- 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
- 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
- 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
- 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224,
- 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
- 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
- 255,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_16x16[256]) = {
- 0, 1, 5, 6, 14, 15, 27, 28, 44, 45, 65, 66, 90, 91, 119,
- 120, 2, 4, 7, 13, 16, 26, 29, 43, 46, 64, 67, 89, 92, 118,
- 121, 150, 3, 8, 12, 17, 25, 30, 42, 47, 63, 68, 88, 93, 117,
- 122, 149, 151, 9, 11, 18, 24, 31, 41, 48, 62, 69, 87, 94, 116,
- 123, 148, 152, 177, 10, 19, 23, 32, 40, 49, 61, 70, 86, 95, 115,
- 124, 147, 153, 176, 178, 20, 22, 33, 39, 50, 60, 71, 85, 96, 114,
- 125, 146, 154, 175, 179, 200, 21, 34, 38, 51, 59, 72, 84, 97, 113,
- 126, 145, 155, 174, 180, 199, 201, 35, 37, 52, 58, 73, 83, 98, 112,
- 127, 144, 156, 173, 181, 198, 202, 219, 36, 53, 57, 74, 82, 99, 111,
- 128, 143, 157, 172, 182, 197, 203, 218, 220, 54, 56, 75, 81, 100, 110,
- 129, 142, 158, 171, 183, 196, 204, 217, 221, 234, 55, 76, 80, 101, 109,
- 130, 141, 159, 170, 184, 195, 205, 216, 222, 233, 235, 77, 79, 102, 108,
- 131, 140, 160, 169, 185, 194, 206, 215, 223, 232, 236, 245, 78, 103, 107,
- 132, 139, 161, 168, 186, 193, 207, 214, 224, 231, 237, 244, 246, 104, 106,
- 133, 138, 162, 167, 187, 192, 208, 213, 225, 230, 238, 243, 247, 252, 105,
- 134, 137, 163, 166, 188, 191, 209, 212, 226, 229, 239, 242, 248, 251, 253,
- 135, 136, 164, 165, 189, 190, 210, 211, 227, 228, 240, 241, 249, 250, 254,
- 255
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_32x32[1024]) = {
- 0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416,
- 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800, 832, 864,
- 896, 928, 960, 992, 1, 33, 65, 97, 129, 161, 193, 225, 257, 289,
- 321, 353, 385, 417, 449, 481, 513, 545, 577, 609, 641, 673, 705, 737,
- 769, 801, 833, 865, 897, 929, 961, 993, 2, 34, 66, 98, 130, 162,
- 194, 226, 258, 290, 322, 354, 386, 418, 450, 482, 514, 546, 578, 610,
- 642, 674, 706, 738, 770, 802, 834, 866, 898, 930, 962, 994, 3, 35,
- 67, 99, 131, 163, 195, 227, 259, 291, 323, 355, 387, 419, 451, 483,
- 515, 547, 579, 611, 643, 675, 707, 739, 771, 803, 835, 867, 899, 931,
- 963, 995, 4, 36, 68, 100, 132, 164, 196, 228, 260, 292, 324, 356,
- 388, 420, 452, 484, 516, 548, 580, 612, 644, 676, 708, 740, 772, 804,
- 836, 868, 900, 932, 964, 996, 5, 37, 69, 101, 133, 165, 197, 229,
- 261, 293, 325, 357, 389, 421, 453, 485, 517, 549, 581, 613, 645, 677,
- 709, 741, 773, 805, 837, 869, 901, 933, 965, 997, 6, 38, 70, 102,
- 134, 166, 198, 230, 262, 294, 326, 358, 390, 422, 454, 486, 518, 550,
- 582, 614, 646, 678, 710, 742, 774, 806, 838, 870, 902, 934, 966, 998,
- 7, 39, 71, 103, 135, 167, 199, 231, 263, 295, 327, 359, 391, 423,
- 455, 487, 519, 551, 583, 615, 647, 679, 711, 743, 775, 807, 839, 871,
- 903, 935, 967, 999, 8, 40, 72, 104, 136, 168, 200, 232, 264, 296,
- 328, 360, 392, 424, 456, 488, 520, 552, 584, 616, 648, 680, 712, 744,
- 776, 808, 840, 872, 904, 936, 968, 1000, 9, 41, 73, 105, 137, 169,
- 201, 233, 265, 297, 329, 361, 393, 425, 457, 489, 521, 553, 585, 617,
- 649, 681, 713, 745, 777, 809, 841, 873, 905, 937, 969, 1001, 10, 42,
- 74, 106, 138, 170, 202, 234, 266, 298, 330, 362, 394, 426, 458, 490,
- 522, 554, 586, 618, 650, 682, 714, 746, 778, 810, 842, 874, 906, 938,
- 970, 1002, 11, 43, 75, 107, 139, 171, 203, 235, 267, 299, 331, 363,
- 395, 427, 459, 491, 523, 555, 587, 619, 651, 683, 715, 747, 779, 811,
- 843, 875, 907, 939, 971, 1003, 12, 44, 76, 108, 140, 172, 204, 236,
- 268, 300, 332, 364, 396, 428, 460, 492, 524, 556, 588, 620, 652, 684,
- 716, 748, 780, 812, 844, 876, 908, 940, 972, 1004, 13, 45, 77, 109,
- 141, 173, 205, 237, 269, 301, 333, 365, 397, 429, 461, 493, 525, 557,
- 589, 621, 653, 685, 717, 749, 781, 813, 845, 877, 909, 941, 973, 1005,
- 14, 46, 78, 110, 142, 174, 206, 238, 270, 302, 334, 366, 398, 430,
- 462, 494, 526, 558, 590, 622, 654, 686, 718, 750, 782, 814, 846, 878,
- 910, 942, 974, 1006, 15, 47, 79, 111, 143, 175, 207, 239, 271, 303,
- 335, 367, 399, 431, 463, 495, 527, 559, 591, 623, 655, 687, 719, 751,
- 783, 815, 847, 879, 911, 943, 975, 1007, 16, 48, 80, 112, 144, 176,
- 208, 240, 272, 304, 336, 368, 400, 432, 464, 496, 528, 560, 592, 624,
- 656, 688, 720, 752, 784, 816, 848, 880, 912, 944, 976, 1008, 17, 49,
- 81, 113, 145, 177, 209, 241, 273, 305, 337, 369, 401, 433, 465, 497,
- 529, 561, 593, 625, 657, 689, 721, 753, 785, 817, 849, 881, 913, 945,
- 977, 1009, 18, 50, 82, 114, 146, 178, 210, 242, 274, 306, 338, 370,
- 402, 434, 466, 498, 530, 562, 594, 626, 658, 690, 722, 754, 786, 818,
- 850, 882, 914, 946, 978, 1010, 19, 51, 83, 115, 147, 179, 211, 243,
- 275, 307, 339, 371, 403, 435, 467, 499, 531, 563, 595, 627, 659, 691,
- 723, 755, 787, 819, 851, 883, 915, 947, 979, 1011, 20, 52, 84, 116,
- 148, 180, 212, 244, 276, 308, 340, 372, 404, 436, 468, 500, 532, 564,
- 596, 628, 660, 692, 724, 756, 788, 820, 852, 884, 916, 948, 980, 1012,
- 21, 53, 85, 117, 149, 181, 213, 245, 277, 309, 341, 373, 405, 437,
- 469, 501, 533, 565, 597, 629, 661, 693, 725, 757, 789, 821, 853, 885,
- 917, 949, 981, 1013, 22, 54, 86, 118, 150, 182, 214, 246, 278, 310,
- 342, 374, 406, 438, 470, 502, 534, 566, 598, 630, 662, 694, 726, 758,
- 790, 822, 854, 886, 918, 950, 982, 1014, 23, 55, 87, 119, 151, 183,
- 215, 247, 279, 311, 343, 375, 407, 439, 471, 503, 535, 567, 599, 631,
- 663, 695, 727, 759, 791, 823, 855, 887, 919, 951, 983, 1015, 24, 56,
- 88, 120, 152, 184, 216, 248, 280, 312, 344, 376, 408, 440, 472, 504,
- 536, 568, 600, 632, 664, 696, 728, 760, 792, 824, 856, 888, 920, 952,
- 984, 1016, 25, 57, 89, 121, 153, 185, 217, 249, 281, 313, 345, 377,
- 409, 441, 473, 505, 537, 569, 601, 633, 665, 697, 729, 761, 793, 825,
- 857, 889, 921, 953, 985, 1017, 26, 58, 90, 122, 154, 186, 218, 250,
- 282, 314, 346, 378, 410, 442, 474, 506, 538, 570, 602, 634, 666, 698,
- 730, 762, 794, 826, 858, 890, 922, 954, 986, 1018, 27, 59, 91, 123,
- 155, 187, 219, 251, 283, 315, 347, 379, 411, 443, 475, 507, 539, 571,
- 603, 635, 667, 699, 731, 763, 795, 827, 859, 891, 923, 955, 987, 1019,
- 28, 60, 92, 124, 156, 188, 220, 252, 284, 316, 348, 380, 412, 444,
- 476, 508, 540, 572, 604, 636, 668, 700, 732, 764, 796, 828, 860, 892,
- 924, 956, 988, 1020, 29, 61, 93, 125, 157, 189, 221, 253, 285, 317,
- 349, 381, 413, 445, 477, 509, 541, 573, 605, 637, 669, 701, 733, 765,
- 797, 829, 861, 893, 925, 957, 989, 1021, 30, 62, 94, 126, 158, 190,
- 222, 254, 286, 318, 350, 382, 414, 446, 478, 510, 542, 574, 606, 638,
- 670, 702, 734, 766, 798, 830, 862, 894, 926, 958, 990, 1022, 31, 63,
- 95, 127, 159, 191, 223, 255, 287, 319, 351, 383, 415, 447, 479, 511,
- 543, 575, 607, 639, 671, 703, 735, 767, 799, 831, 863, 895, 927, 959,
- 991, 1023,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_mrow_iscan_32x32[1024]) = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
- 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
- 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
- 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
- 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
- 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77,
- 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
- 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103,
- 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
- 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
- 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
- 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155,
- 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168,
- 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181,
- 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
- 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
- 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220,
- 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233,
- 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246,
- 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259,
- 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272,
- 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285,
- 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298,
- 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311,
- 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324,
- 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337,
- 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350,
- 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363,
- 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376,
- 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389,
- 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402,
- 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415,
- 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428,
- 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441,
- 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454,
- 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467,
- 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480,
- 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493,
- 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506,
- 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519,
- 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532,
- 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545,
- 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558,
- 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571,
- 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584,
- 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597,
- 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, 608, 609, 610,
- 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623,
- 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, 634, 635, 636,
- 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649,
- 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662,
- 663, 664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675,
- 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688,
- 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701,
- 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714,
- 715, 716, 717, 718, 719, 720, 721, 722, 723, 724, 725, 726, 727,
- 728, 729, 730, 731, 732, 733, 734, 735, 736, 737, 738, 739, 740,
- 741, 742, 743, 744, 745, 746, 747, 748, 749, 750, 751, 752, 753,
- 754, 755, 756, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766,
- 767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779,
- 780, 781, 782, 783, 784, 785, 786, 787, 788, 789, 790, 791, 792,
- 793, 794, 795, 796, 797, 798, 799, 800, 801, 802, 803, 804, 805,
- 806, 807, 808, 809, 810, 811, 812, 813, 814, 815, 816, 817, 818,
- 819, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831,
- 832, 833, 834, 835, 836, 837, 838, 839, 840, 841, 842, 843, 844,
- 845, 846, 847, 848, 849, 850, 851, 852, 853, 854, 855, 856, 857,
- 858, 859, 860, 861, 862, 863, 864, 865, 866, 867, 868, 869, 870,
- 871, 872, 873, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883,
- 884, 885, 886, 887, 888, 889, 890, 891, 892, 893, 894, 895, 896,
- 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909,
- 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922,
- 923, 924, 925, 926, 927, 928, 929, 930, 931, 932, 933, 934, 935,
- 936, 937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947, 948,
- 949, 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961,
- 962, 963, 964, 965, 966, 967, 968, 969, 970, 971, 972, 973, 974,
- 975, 976, 977, 978, 979, 980, 981, 982, 983, 984, 985, 986, 987,
- 988, 989, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999, 1000,
- 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010, 1011, 1012, 1013,
- 1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_32x32[1024]) = {
- 0, 1, 5, 6, 14, 15, 27, 28, 44, 45, 65, 66, 90,
- 91, 119, 120, 152, 153, 189, 190, 230, 231, 275, 276, 324, 325,
- 377, 378, 434, 435, 495, 496, 2, 4, 7, 13, 16, 26, 29,
- 43, 46, 64, 67, 89, 92, 118, 121, 151, 154, 188, 191, 229,
- 232, 274, 277, 323, 326, 376, 379, 433, 436, 494, 497, 558, 3,
- 8, 12, 17, 25, 30, 42, 47, 63, 68, 88, 93, 117, 122,
- 150, 155, 187, 192, 228, 233, 273, 278, 322, 327, 375, 380, 432,
- 437, 493, 498, 557, 559, 9, 11, 18, 24, 31, 41, 48, 62,
- 69, 87, 94, 116, 123, 149, 156, 186, 193, 227, 234, 272, 279,
- 321, 328, 374, 381, 431, 438, 492, 499, 556, 560, 617, 10, 19,
- 23, 32, 40, 49, 61, 70, 86, 95, 115, 124, 148, 157, 185,
- 194, 226, 235, 271, 280, 320, 329, 373, 382, 430, 439, 491, 500,
- 555, 561, 616, 618, 20, 22, 33, 39, 50, 60, 71, 85, 96,
- 114, 125, 147, 158, 184, 195, 225, 236, 270, 281, 319, 330, 372,
- 383, 429, 440, 490, 501, 554, 562, 615, 619, 672, 21, 34, 38,
- 51, 59, 72, 84, 97, 113, 126, 146, 159, 183, 196, 224, 237,
- 269, 282, 318, 331, 371, 384, 428, 441, 489, 502, 553, 563, 614,
- 620, 671, 673, 35, 37, 52, 58, 73, 83, 98, 112, 127, 145,
- 160, 182, 197, 223, 238, 268, 283, 317, 332, 370, 385, 427, 442,
- 488, 503, 552, 564, 613, 621, 670, 674, 723, 36, 53, 57, 74,
- 82, 99, 111, 128, 144, 161, 181, 198, 222, 239, 267, 284, 316,
- 333, 369, 386, 426, 443, 487, 504, 551, 565, 612, 622, 669, 675,
- 722, 724, 54, 56, 75, 81, 100, 110, 129, 143, 162, 180, 199,
- 221, 240, 266, 285, 315, 334, 368, 387, 425, 444, 486, 505, 550,
- 566, 611, 623, 668, 676, 721, 725, 770, 55, 76, 80, 101, 109,
- 130, 142, 163, 179, 200, 220, 241, 265, 286, 314, 335, 367, 388,
- 424, 445, 485, 506, 549, 567, 610, 624, 667, 677, 720, 726, 769,
- 771, 77, 79, 102, 108, 131, 141, 164, 178, 201, 219, 242, 264,
- 287, 313, 336, 366, 389, 423, 446, 484, 507, 548, 568, 609, 625,
- 666, 678, 719, 727, 768, 772, 813, 78, 103, 107, 132, 140, 165,
- 177, 202, 218, 243, 263, 288, 312, 337, 365, 390, 422, 447, 483,
- 508, 547, 569, 608, 626, 665, 679, 718, 728, 767, 773, 812, 814,
- 104, 106, 133, 139, 166, 176, 203, 217, 244, 262, 289, 311, 338,
- 364, 391, 421, 448, 482, 509, 546, 570, 607, 627, 664, 680, 717,
- 729, 766, 774, 811, 815, 852, 105, 134, 138, 167, 175, 204, 216,
- 245, 261, 290, 310, 339, 363, 392, 420, 449, 481, 510, 545, 571,
- 606, 628, 663, 681, 716, 730, 765, 775, 810, 816, 851, 853, 135,
- 137, 168, 174, 205, 215, 246, 260, 291, 309, 340, 362, 393, 419,
- 450, 480, 511, 544, 572, 605, 629, 662, 682, 715, 731, 764, 776,
- 809, 817, 850, 854, 887, 136, 169, 173, 206, 214, 247, 259, 292,
- 308, 341, 361, 394, 418, 451, 479, 512, 543, 573, 604, 630, 661,
- 683, 714, 732, 763, 777, 808, 818, 849, 855, 886, 888, 170, 172,
- 207, 213, 248, 258, 293, 307, 342, 360, 395, 417, 452, 478, 513,
- 542, 574, 603, 631, 660, 684, 713, 733, 762, 778, 807, 819, 848,
- 856, 885, 889, 918, 171, 208, 212, 249, 257, 294, 306, 343, 359,
- 396, 416, 453, 477, 514, 541, 575, 602, 632, 659, 685, 712, 734,
- 761, 779, 806, 820, 847, 857, 884, 890, 917, 919, 209, 211, 250,
- 256, 295, 305, 344, 358, 397, 415, 454, 476, 515, 540, 576, 601,
- 633, 658, 686, 711, 735, 760, 780, 805, 821, 846, 858, 883, 891,
- 916, 920, 945, 210, 251, 255, 296, 304, 345, 357, 398, 414, 455,
- 475, 516, 539, 577, 600, 634, 657, 687, 710, 736, 759, 781, 804,
- 822, 845, 859, 882, 892, 915, 921, 944, 946, 252, 254, 297, 303,
- 346, 356, 399, 413, 456, 474, 517, 538, 578, 599, 635, 656, 688,
- 709, 737, 758, 782, 803, 823, 844, 860, 881, 893, 914, 922, 943,
- 947, 968, 253, 298, 302, 347, 355, 400, 412, 457, 473, 518, 537,
- 579, 598, 636, 655, 689, 708, 738, 757, 783, 802, 824, 843, 861,
- 880, 894, 913, 923, 942, 948, 967, 969, 299, 301, 348, 354, 401,
- 411, 458, 472, 519, 536, 580, 597, 637, 654, 690, 707, 739, 756,
- 784, 801, 825, 842, 862, 879, 895, 912, 924, 941, 949, 966, 970,
- 987, 300, 349, 353, 402, 410, 459, 471, 520, 535, 581, 596, 638,
- 653, 691, 706, 740, 755, 785, 800, 826, 841, 863, 878, 896, 911,
- 925, 940, 950, 965, 971, 986, 988, 350, 352, 403, 409, 460, 470,
- 521, 534, 582, 595, 639, 652, 692, 705, 741, 754, 786, 799, 827,
- 840, 864, 877, 897, 910, 926, 939, 951, 964, 972, 985, 989, 1002,
- 351, 404, 408, 461, 469, 522, 533, 583, 594, 640, 651, 693, 704,
- 742, 753, 787, 798, 828, 839, 865, 876, 898, 909, 927, 938, 952,
- 963, 973, 984, 990, 1001, 1003, 405, 407, 462, 468, 523, 532, 584,
- 593, 641, 650, 694, 703, 743, 752, 788, 797, 829, 838, 866, 875,
- 899, 908, 928, 937, 953, 962, 974, 983, 991, 1000, 1004, 1013, 406,
- 463, 467, 524, 531, 585, 592, 642, 649, 695, 702, 744, 751, 789,
- 796, 830, 837, 867, 874, 900, 907, 929, 936, 954, 961, 975, 982,
- 992, 999, 1005, 1012, 1014, 464, 466, 525, 530, 586, 591, 643, 648,
- 696, 701, 745, 750, 790, 795, 831, 836, 868, 873, 901, 906, 930,
- 935, 955, 960, 976, 981, 993, 998, 1006, 1011, 1015, 1020, 465, 526,
- 529, 587, 590, 644, 647, 697, 700, 746, 749, 791, 794, 832, 835,
- 869, 872, 902, 905, 931, 934, 956, 959, 977, 980, 994, 997, 1007,
- 1010, 1016, 1019, 1021, 527, 528, 588, 589, 645, 646, 698, 699, 747,
- 748, 792, 793, 833, 834, 870, 871, 903, 904, 932, 933, 957, 958,
- 978, 979, 995, 996, 1008, 1009, 1017, 1018, 1022, 1023
-};
-
-const SCAN_ORDER av1_default_scan_orders[TX_SIZES] = {
- { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
- { default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors },
- { default_scan_16x16, av1_default_iscan_16x16, default_scan_16x16_neighbors },
- { default_scan_32x32, av1_default_iscan_32x32, default_scan_32x32_neighbors },
- // Half of the coefficients of tx64 at higher frequencies are set to
- // zeros. So tx32's scan order is used.
- { default_scan_32x32, av1_default_iscan_32x32, default_scan_32x32_neighbors },
-};
-
-const SCAN_ORDER av1_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
- {
- // TX_4X4
- { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
- { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
- { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
- { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
- { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
- { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
- { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
- { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
- { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
- { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
- { mrow_scan_4x4, av1_mrow_iscan_4x4, mrow_scan_4x4_neighbors },
- { mcol_scan_4x4, av1_mcol_iscan_4x4, mcol_scan_4x4_neighbors },
- { mrow_scan_4x4, av1_mrow_iscan_4x4, mrow_scan_4x4_neighbors },
- { mcol_scan_4x4, av1_mcol_iscan_4x4, mcol_scan_4x4_neighbors },
- { mrow_scan_4x4, av1_mrow_iscan_4x4, mrow_scan_4x4_neighbors },
- { mcol_scan_4x4, av1_mcol_iscan_4x4, mcol_scan_4x4_neighbors },
- },
- {
- // TX_8X8
- { default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors },
- { default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors },
- { default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors },
- { default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors },
- { default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors },
- { default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors },
- { default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors },
- { default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors },
- { default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors },
- { default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors },
- { mrow_scan_8x8, av1_mrow_iscan_8x8, mrow_scan_8x8_neighbors },
- { mcol_scan_8x8, av1_mcol_iscan_8x8, mcol_scan_8x8_neighbors },
- { mrow_scan_8x8, av1_mrow_iscan_8x8, mrow_scan_8x8_neighbors },
- { mcol_scan_8x8, av1_mcol_iscan_8x8, mcol_scan_8x8_neighbors },
- { mrow_scan_8x8, av1_mrow_iscan_8x8, mrow_scan_8x8_neighbors },
- { mcol_scan_8x8, av1_mcol_iscan_8x8, mcol_scan_8x8_neighbors },
- },
- {
- // TX_16X16
- { default_scan_16x16, av1_default_iscan_16x16,
- default_scan_16x16_neighbors },
- { default_scan_16x16, av1_default_iscan_16x16,
- default_scan_16x16_neighbors },
- { default_scan_16x16, av1_default_iscan_16x16,
- default_scan_16x16_neighbors },
- { default_scan_16x16, av1_default_iscan_16x16,
- default_scan_16x16_neighbors },
- { default_scan_16x16, av1_default_iscan_16x16,
- default_scan_16x16_neighbors },
- { default_scan_16x16, av1_default_iscan_16x16,
- default_scan_16x16_neighbors },
- { default_scan_16x16, av1_default_iscan_16x16,
- default_scan_16x16_neighbors },
- { default_scan_16x16, av1_default_iscan_16x16,
- default_scan_16x16_neighbors },
- { default_scan_16x16, av1_default_iscan_16x16,
- default_scan_16x16_neighbors },
- { default_scan_16x16, av1_default_iscan_16x16,
- default_scan_16x16_neighbors },
- { mrow_scan_16x16, av1_mrow_iscan_16x16, mrow_scan_16x16_neighbors },
- { mcol_scan_16x16, av1_mcol_iscan_16x16, mcol_scan_16x16_neighbors },
- { mrow_scan_16x16, av1_mrow_iscan_16x16, mrow_scan_16x16_neighbors },
- { mcol_scan_16x16, av1_mcol_iscan_16x16, mcol_scan_16x16_neighbors },
- { mrow_scan_16x16, av1_mrow_iscan_16x16, mrow_scan_16x16_neighbors },
- { mcol_scan_16x16, av1_mcol_iscan_16x16, mcol_scan_16x16_neighbors },
- },
- {
- // TX_32X32
- { default_scan_32x32, av1_default_iscan_32x32,
- default_scan_32x32_neighbors },
- { default_scan_32x32, av1_default_iscan_32x32,
- default_scan_32x32_neighbors },
- { default_scan_32x32, av1_default_iscan_32x32,
- default_scan_32x32_neighbors },
- { default_scan_32x32, av1_default_iscan_32x32,
- default_scan_32x32_neighbors },
- { default_scan_32x32, av1_default_iscan_32x32,
- default_scan_32x32_neighbors },
- { default_scan_32x32, av1_default_iscan_32x32,
- default_scan_32x32_neighbors },
- { default_scan_32x32, av1_default_iscan_32x32,
- default_scan_32x32_neighbors },
- { default_scan_32x32, av1_default_iscan_32x32,
- default_scan_32x32_neighbors },
- { default_scan_32x32, av1_default_iscan_32x32,
- default_scan_32x32_neighbors },
- { default_scan_32x32, av1_default_iscan_32x32,
- default_scan_32x32_neighbors },
- { mrow_scan_32x32, av1_mrow_iscan_32x32, mrow_scan_32x32_neighbors },
- { mcol_scan_32x32, av1_mcol_iscan_32x32, mcol_scan_32x32_neighbors },
- { mrow_scan_32x32, av1_mrow_iscan_32x32, mrow_scan_32x32_neighbors },
- { mcol_scan_32x32, av1_mcol_iscan_32x32, mcol_scan_32x32_neighbors },
- { mrow_scan_32x32, av1_mrow_iscan_32x32, mrow_scan_32x32_neighbors },
- { mcol_scan_32x32, av1_mcol_iscan_32x32, mcol_scan_32x32_neighbors },
- },
- {
- // TX_64X64
- // Half of the coefficients of tx64 at higher frequencies are set to
- // zeros. So tx32's scan order is used.
- { default_scan_32x32, av1_default_iscan_32x32,
- default_scan_32x32_neighbors },
- { default_scan_32x32, av1_default_iscan_32x32,
- default_scan_32x32_neighbors },
- { default_scan_32x32, av1_default_iscan_32x32,
- default_scan_32x32_neighbors },
- { default_scan_32x32, av1_default_iscan_32x32,
- default_scan_32x32_neighbors },
- { default_scan_32x32, av1_default_iscan_32x32,
- default_scan_32x32_neighbors },
- { default_scan_32x32, av1_default_iscan_32x32,
- default_scan_32x32_neighbors },
- { default_scan_32x32, av1_default_iscan_32x32,
- default_scan_32x32_neighbors },
- { default_scan_32x32, av1_default_iscan_32x32,
- default_scan_32x32_neighbors },
- { default_scan_32x32, av1_default_iscan_32x32,
- default_scan_32x32_neighbors },
- { default_scan_32x32, av1_default_iscan_32x32,
- default_scan_32x32_neighbors },
- { mrow_scan_32x32, av1_mrow_iscan_32x32, mrow_scan_32x32_neighbors },
- { mcol_scan_32x32, av1_mcol_iscan_32x32, mcol_scan_32x32_neighbors },
- { mrow_scan_32x32, av1_mrow_iscan_32x32, mrow_scan_32x32_neighbors },
- { mcol_scan_32x32, av1_mcol_iscan_32x32, mcol_scan_32x32_neighbors },
- { mrow_scan_32x32, av1_mrow_iscan_32x32, mrow_scan_32x32_neighbors },
- { mcol_scan_32x32, av1_mcol_iscan_32x32, mcol_scan_32x32_neighbors },
- },
- {
- // TX_4X8
- { default_scan_4x8, av1_default_iscan_4x8, default_scan_4x8_neighbors },
- { default_scan_4x8, av1_default_iscan_4x8, default_scan_4x8_neighbors },
- { default_scan_4x8, av1_default_iscan_4x8, default_scan_4x8_neighbors },
- { default_scan_4x8, av1_default_iscan_4x8, default_scan_4x8_neighbors },
- { default_scan_4x8, av1_default_iscan_4x8, default_scan_4x8_neighbors },
- { default_scan_4x8, av1_default_iscan_4x8, default_scan_4x8_neighbors },
- { default_scan_4x8, av1_default_iscan_4x8, default_scan_4x8_neighbors },
- { default_scan_4x8, av1_default_iscan_4x8, default_scan_4x8_neighbors },
- { default_scan_4x8, av1_default_iscan_4x8, default_scan_4x8_neighbors },
- { default_scan_4x8, av1_default_iscan_4x8, default_scan_4x8_neighbors },
- { mrow_scan_4x8, av1_mrow_iscan_4x8, mrow_scan_4x8_neighbors },
- { mcol_scan_4x8, av1_mcol_iscan_4x8, mcol_scan_4x8_neighbors },
- { mrow_scan_4x8, av1_mrow_iscan_4x8, mrow_scan_4x8_neighbors },
- { mcol_scan_4x8, av1_mcol_iscan_4x8, mcol_scan_4x8_neighbors },
- { mrow_scan_4x8, av1_mrow_iscan_4x8, mrow_scan_4x8_neighbors },
- { mcol_scan_4x8, av1_mcol_iscan_4x8, mcol_scan_4x8_neighbors },
- },
- {
- // TX_8X4
- { default_scan_8x4, av1_default_iscan_8x4, default_scan_8x4_neighbors },
- { default_scan_8x4, av1_default_iscan_8x4, default_scan_8x4_neighbors },
- { default_scan_8x4, av1_default_iscan_8x4, default_scan_8x4_neighbors },
- { default_scan_8x4, av1_default_iscan_8x4, default_scan_8x4_neighbors },
- { default_scan_8x4, av1_default_iscan_8x4, default_scan_8x4_neighbors },
- { default_scan_8x4, av1_default_iscan_8x4, default_scan_8x4_neighbors },
- { default_scan_8x4, av1_default_iscan_8x4, default_scan_8x4_neighbors },
- { default_scan_8x4, av1_default_iscan_8x4, default_scan_8x4_neighbors },
- { default_scan_8x4, av1_default_iscan_8x4, default_scan_8x4_neighbors },
- { default_scan_8x4, av1_default_iscan_8x4, default_scan_8x4_neighbors },
- { mrow_scan_8x4, av1_mrow_iscan_8x4, mrow_scan_8x4_neighbors },
- { mcol_scan_8x4, av1_mcol_iscan_8x4, mcol_scan_8x4_neighbors },
- { mrow_scan_8x4, av1_mrow_iscan_8x4, mrow_scan_8x4_neighbors },
- { mcol_scan_8x4, av1_mcol_iscan_8x4, mcol_scan_8x4_neighbors },
- { mrow_scan_8x4, av1_mrow_iscan_8x4, mrow_scan_8x4_neighbors },
- { mcol_scan_8x4, av1_mcol_iscan_8x4, mcol_scan_8x4_neighbors },
- },
- {
- // TX_8X16
- { default_scan_8x16, av1_default_iscan_8x16,
- default_scan_8x16_neighbors },
- { default_scan_8x16, av1_default_iscan_8x16,
- default_scan_8x16_neighbors },
- { default_scan_8x16, av1_default_iscan_8x16,
- default_scan_8x16_neighbors },
- { default_scan_8x16, av1_default_iscan_8x16,
- default_scan_8x16_neighbors },
- { default_scan_8x16, av1_default_iscan_8x16,
- default_scan_8x16_neighbors },
- { default_scan_8x16, av1_default_iscan_8x16,
- default_scan_8x16_neighbors },
- { default_scan_8x16, av1_default_iscan_8x16,
- default_scan_8x16_neighbors },
- { default_scan_8x16, av1_default_iscan_8x16,
- default_scan_8x16_neighbors },
- { default_scan_8x16, av1_default_iscan_8x16,
- default_scan_8x16_neighbors },
- { default_scan_8x16, av1_default_iscan_8x16,
- default_scan_8x16_neighbors },
- { mrow_scan_8x16, av1_mrow_iscan_8x16, mrow_scan_8x16_neighbors },
- { mcol_scan_8x16, av1_mcol_iscan_8x16, mcol_scan_8x16_neighbors },
- { mrow_scan_8x16, av1_mrow_iscan_8x16, mrow_scan_8x16_neighbors },
- { mcol_scan_8x16, av1_mcol_iscan_8x16, mcol_scan_8x16_neighbors },
- { mrow_scan_8x16, av1_mrow_iscan_8x16, mrow_scan_8x16_neighbors },
- { mcol_scan_8x16, av1_mcol_iscan_8x16, mcol_scan_8x16_neighbors },
- },
- {
- // TX_16X8
- { default_scan_16x8, av1_default_iscan_16x8,
- default_scan_16x8_neighbors },
- { default_scan_16x8, av1_default_iscan_16x8,
- default_scan_16x8_neighbors },
- { default_scan_16x8, av1_default_iscan_16x8,
- default_scan_16x8_neighbors },
- { default_scan_16x8, av1_default_iscan_16x8,
- default_scan_16x8_neighbors },
- { default_scan_16x8, av1_default_iscan_16x8,
- default_scan_16x8_neighbors },
- { default_scan_16x8, av1_default_iscan_16x8,
- default_scan_16x8_neighbors },
- { default_scan_16x8, av1_default_iscan_16x8,
- default_scan_16x8_neighbors },
- { default_scan_16x8, av1_default_iscan_16x8,
- default_scan_16x8_neighbors },
- { default_scan_16x8, av1_default_iscan_16x8,
- default_scan_16x8_neighbors },
- { default_scan_16x8, av1_default_iscan_16x8,
- default_scan_16x8_neighbors },
- { mrow_scan_16x8, av1_mrow_iscan_16x8, mrow_scan_16x8_neighbors },
- { mcol_scan_16x8, av1_mcol_iscan_16x8, mcol_scan_16x8_neighbors },
- { mrow_scan_16x8, av1_mrow_iscan_16x8, mrow_scan_16x8_neighbors },
- { mcol_scan_16x8, av1_mcol_iscan_16x8, mcol_scan_16x8_neighbors },
- { mrow_scan_16x8, av1_mrow_iscan_16x8, mrow_scan_16x8_neighbors },
- { mcol_scan_16x8, av1_mcol_iscan_16x8, mcol_scan_16x8_neighbors },
- },
- {
- // TX_16X32
- { default_scan_16x32, av1_default_iscan_16x32,
- default_scan_16x32_neighbors },
- { default_scan_16x32, av1_default_iscan_16x32,
- default_scan_16x32_neighbors },
- { default_scan_16x32, av1_default_iscan_16x32,
- default_scan_16x32_neighbors },
- { default_scan_16x32, av1_default_iscan_16x32,
- default_scan_16x32_neighbors },
- { default_scan_16x32, av1_default_iscan_16x32,
- default_scan_16x32_neighbors },
- { default_scan_16x32, av1_default_iscan_16x32,
- default_scan_16x32_neighbors },
- { default_scan_16x32, av1_default_iscan_16x32,
- default_scan_16x32_neighbors },
- { default_scan_16x32, av1_default_iscan_16x32,
- default_scan_16x32_neighbors },
- { default_scan_16x32, av1_default_iscan_16x32,
- default_scan_16x32_neighbors },
- { default_scan_16x32, av1_default_iscan_16x32,
- default_scan_16x32_neighbors },
- { mrow_scan_16x32, av1_mrow_iscan_16x32, mrow_scan_16x32_neighbors },
- { mcol_scan_16x32, av1_mcol_iscan_16x32, mcol_scan_16x32_neighbors },
- { mrow_scan_16x32, av1_mrow_iscan_16x32, mrow_scan_16x32_neighbors },
- { mcol_scan_16x32, av1_mcol_iscan_16x32, mcol_scan_16x32_neighbors },
- { mrow_scan_16x32, av1_mrow_iscan_16x32, mrow_scan_16x32_neighbors },
- { mcol_scan_16x32, av1_mcol_iscan_16x32, mcol_scan_16x32_neighbors },
- },
- {
- // TX_32X16
- { default_scan_32x16, av1_default_iscan_32x16,
- default_scan_32x16_neighbors },
- { default_scan_32x16, av1_default_iscan_32x16,
- default_scan_32x16_neighbors },
- { default_scan_32x16, av1_default_iscan_32x16,
- default_scan_32x16_neighbors },
- { default_scan_32x16, av1_default_iscan_32x16,
- default_scan_32x16_neighbors },
- { default_scan_32x16, av1_default_iscan_32x16,
- default_scan_32x16_neighbors },
- { default_scan_32x16, av1_default_iscan_32x16,
- default_scan_32x16_neighbors },
- { default_scan_32x16, av1_default_iscan_32x16,
- default_scan_32x16_neighbors },
- { default_scan_32x16, av1_default_iscan_32x16,
- default_scan_32x16_neighbors },
- { default_scan_32x16, av1_default_iscan_32x16,
- default_scan_32x16_neighbors },
- { default_scan_32x16, av1_default_iscan_32x16,
- default_scan_32x16_neighbors },
- { mrow_scan_32x16, av1_mrow_iscan_32x16, mrow_scan_32x16_neighbors },
- { mcol_scan_32x16, av1_mcol_iscan_32x16, mcol_scan_32x16_neighbors },
- { mrow_scan_32x16, av1_mrow_iscan_32x16, mrow_scan_32x16_neighbors },
- { mcol_scan_32x16, av1_mcol_iscan_32x16, mcol_scan_32x16_neighbors },
- { mrow_scan_32x16, av1_mrow_iscan_32x16, mrow_scan_32x16_neighbors },
- { mcol_scan_32x16, av1_mcol_iscan_32x16, mcol_scan_32x16_neighbors },
- },
- {
- // TX_32X64
- // Half of the coefficients of tx64 at higher frequencies are set to
- // zeros. So tx32's scan order is used.
- { default_scan_32x32, av1_default_iscan_32x32,
- default_scan_32x32_neighbors },
- { default_scan_32x32, av1_default_iscan_32x32,
- default_scan_32x32_neighbors },
- { default_scan_32x32, av1_default_iscan_32x32,
- default_scan_32x32_neighbors },
- { default_scan_32x32, av1_default_iscan_32x32,
- default_scan_32x32_neighbors },
- { default_scan_32x32, av1_default_iscan_32x32,
- default_scan_32x32_neighbors },
- { default_scan_32x32, av1_default_iscan_32x32,
- default_scan_32x32_neighbors },
- { default_scan_32x32, av1_default_iscan_32x32,
- default_scan_32x32_neighbors },
- { default_scan_32x32, av1_default_iscan_32x32,
- default_scan_32x32_neighbors },
- { default_scan_32x32, av1_default_iscan_32x32,
- default_scan_32x32_neighbors },
- { default_scan_32x32, av1_default_iscan_32x32,
- default_scan_32x32_neighbors },
- { mrow_scan_32x32, av1_mrow_iscan_32x32, mrow_scan_32x32_neighbors },
- { mcol_scan_32x32, av1_mcol_iscan_32x32, mcol_scan_32x32_neighbors },
- { mrow_scan_32x32, av1_mrow_iscan_32x32, mrow_scan_32x32_neighbors },
- { mcol_scan_32x32, av1_mcol_iscan_32x32, mcol_scan_32x32_neighbors },
- { mrow_scan_32x32, av1_mrow_iscan_32x32, mrow_scan_32x32_neighbors },
- { mcol_scan_32x32, av1_mcol_iscan_32x32, mcol_scan_32x32_neighbors },
- },
- {
- // TX_64X32
- // Half of the coefficients of tx64 at higher frequencies are set to
- // zeros. So tx32's scan order is used.
- { default_scan_32x32, av1_default_iscan_32x32,
- default_scan_32x32_neighbors },
- { default_scan_32x32, av1_default_iscan_32x32,
- default_scan_32x32_neighbors },
- { default_scan_32x32, av1_default_iscan_32x32,
- default_scan_32x32_neighbors },
- { default_scan_32x32, av1_default_iscan_32x32,
- default_scan_32x32_neighbors },
- { default_scan_32x32, av1_default_iscan_32x32,
- default_scan_32x32_neighbors },
- { default_scan_32x32, av1_default_iscan_32x32,
- default_scan_32x32_neighbors },
- { default_scan_32x32, av1_default_iscan_32x32,
- default_scan_32x32_neighbors },
- { default_scan_32x32, av1_default_iscan_32x32,
- default_scan_32x32_neighbors },
- { default_scan_32x32, av1_default_iscan_32x32,
- default_scan_32x32_neighbors },
- { default_scan_32x32, av1_default_iscan_32x32,
- default_scan_32x32_neighbors },
- { mrow_scan_32x32, av1_mrow_iscan_32x32, mrow_scan_32x32_neighbors },
- { mcol_scan_32x32, av1_mcol_iscan_32x32, mcol_scan_32x32_neighbors },
- { mrow_scan_32x32, av1_mrow_iscan_32x32, mrow_scan_32x32_neighbors },
- { mcol_scan_32x32, av1_mcol_iscan_32x32, mcol_scan_32x32_neighbors },
- { mrow_scan_32x32, av1_mrow_iscan_32x32, mrow_scan_32x32_neighbors },
- { mcol_scan_32x32, av1_mcol_iscan_32x32, mcol_scan_32x32_neighbors },
- },
- {
- // TX_4X16
- { default_scan_4x16, av1_default_iscan_4x16,
- default_scan_4x16_neighbors },
- { default_scan_4x16, av1_default_iscan_4x16,
- default_scan_4x16_neighbors },
- { default_scan_4x16, av1_default_iscan_4x16,
- default_scan_4x16_neighbors },
- { default_scan_4x16, av1_default_iscan_4x16,
- default_scan_4x16_neighbors },
- { default_scan_4x16, av1_default_iscan_4x16,
- default_scan_4x16_neighbors },
- { default_scan_4x16, av1_default_iscan_4x16,
- default_scan_4x16_neighbors },
- { default_scan_4x16, av1_default_iscan_4x16,
- default_scan_4x16_neighbors },
- { default_scan_4x16, av1_default_iscan_4x16,
- default_scan_4x16_neighbors },
- { default_scan_4x16, av1_default_iscan_4x16,
- default_scan_4x16_neighbors },
- { default_scan_4x16, av1_default_iscan_4x16,
- default_scan_4x16_neighbors },
- { mrow_scan_4x16, av1_mrow_iscan_4x16, mrow_scan_4x16_neighbors },
- { mcol_scan_4x16, av1_mcol_iscan_4x16, mcol_scan_4x16_neighbors },
- { mrow_scan_4x16, av1_mrow_iscan_4x16, mrow_scan_4x16_neighbors },
- { mcol_scan_4x16, av1_mcol_iscan_4x16, mcol_scan_4x16_neighbors },
- { mrow_scan_4x16, av1_mrow_iscan_4x16, mrow_scan_4x16_neighbors },
- { mcol_scan_4x16, av1_mcol_iscan_4x16, mcol_scan_4x16_neighbors },
- },
- {
- // TX_16X4
- { default_scan_16x4, av1_default_iscan_16x4,
- default_scan_16x4_neighbors },
- { default_scan_16x4, av1_default_iscan_16x4,
- default_scan_16x4_neighbors },
- { default_scan_16x4, av1_default_iscan_16x4,
- default_scan_16x4_neighbors },
- { default_scan_16x4, av1_default_iscan_16x4,
- default_scan_16x4_neighbors },
- { default_scan_16x4, av1_default_iscan_16x4,
- default_scan_16x4_neighbors },
- { default_scan_16x4, av1_default_iscan_16x4,
- default_scan_16x4_neighbors },
- { default_scan_16x4, av1_default_iscan_16x4,
- default_scan_16x4_neighbors },
- { default_scan_16x4, av1_default_iscan_16x4,
- default_scan_16x4_neighbors },
- { default_scan_16x4, av1_default_iscan_16x4,
- default_scan_16x4_neighbors },
- { default_scan_16x4, av1_default_iscan_16x4,
- default_scan_16x4_neighbors },
- { mrow_scan_16x4, av1_mrow_iscan_16x4, mrow_scan_16x4_neighbors },
- { mcol_scan_16x4, av1_mcol_iscan_16x4, mcol_scan_16x4_neighbors },
- { mrow_scan_16x4, av1_mrow_iscan_16x4, mrow_scan_16x4_neighbors },
- { mcol_scan_16x4, av1_mcol_iscan_16x4, mcol_scan_16x4_neighbors },
- { mrow_scan_16x4, av1_mrow_iscan_16x4, mrow_scan_16x4_neighbors },
- { mcol_scan_16x4, av1_mcol_iscan_16x4, mcol_scan_16x4_neighbors },
- },
- {
- // TX_8X32
- { default_scan_8x32, av1_default_iscan_8x32,
- default_scan_8x32_neighbors },
- { default_scan_8x32, av1_default_iscan_8x32,
- default_scan_8x32_neighbors },
- { default_scan_8x32, av1_default_iscan_8x32,
- default_scan_8x32_neighbors },
- { default_scan_8x32, av1_default_iscan_8x32,
- default_scan_8x32_neighbors },
- { default_scan_8x32, av1_default_iscan_8x32,
- default_scan_8x32_neighbors },
- { default_scan_8x32, av1_default_iscan_8x32,
- default_scan_8x32_neighbors },
- { default_scan_8x32, av1_default_iscan_8x32,
- default_scan_8x32_neighbors },
- { default_scan_8x32, av1_default_iscan_8x32,
- default_scan_8x32_neighbors },
- { default_scan_8x32, av1_default_iscan_8x32,
- default_scan_8x32_neighbors },
- { default_scan_8x32, av1_default_iscan_8x32,
- default_scan_8x32_neighbors },
- { mrow_scan_8x32, av1_mrow_iscan_8x32, mrow_scan_8x32_neighbors },
- { mcol_scan_8x32, av1_mcol_iscan_8x32, mcol_scan_8x32_neighbors },
- { mrow_scan_8x32, av1_mrow_iscan_8x32, mrow_scan_8x32_neighbors },
- { mcol_scan_8x32, av1_mcol_iscan_8x32, mcol_scan_8x32_neighbors },
- { mrow_scan_8x32, av1_mrow_iscan_8x32, mrow_scan_8x32_neighbors },
- { mcol_scan_8x32, av1_mcol_iscan_8x32, mcol_scan_8x32_neighbors },
- },
- {
- // TX_32X8
- { default_scan_32x8, av1_default_iscan_32x8,
- default_scan_32x8_neighbors },
- { default_scan_32x8, av1_default_iscan_32x8,
- default_scan_32x8_neighbors },
- { default_scan_32x8, av1_default_iscan_32x8,
- default_scan_32x8_neighbors },
- { default_scan_32x8, av1_default_iscan_32x8,
- default_scan_32x8_neighbors },
- { default_scan_32x8, av1_default_iscan_32x8,
- default_scan_32x8_neighbors },
- { default_scan_32x8, av1_default_iscan_32x8,
- default_scan_32x8_neighbors },
- { default_scan_32x8, av1_default_iscan_32x8,
- default_scan_32x8_neighbors },
- { default_scan_32x8, av1_default_iscan_32x8,
- default_scan_32x8_neighbors },
- { default_scan_32x8, av1_default_iscan_32x8,
- default_scan_32x8_neighbors },
- { default_scan_32x8, av1_default_iscan_32x8,
- default_scan_32x8_neighbors },
- { mrow_scan_32x8, av1_mrow_iscan_32x8, mrow_scan_32x8_neighbors },
- { mcol_scan_32x8, av1_mcol_iscan_32x8, mcol_scan_32x8_neighbors },
- { mrow_scan_32x8, av1_mrow_iscan_32x8, mrow_scan_32x8_neighbors },
- { mcol_scan_32x8, av1_mcol_iscan_32x8, mcol_scan_32x8_neighbors },
- { mrow_scan_32x8, av1_mrow_iscan_32x8, mrow_scan_32x8_neighbors },
- { mcol_scan_32x8, av1_mcol_iscan_32x8, mcol_scan_32x8_neighbors },
- },
- {
- // TX_16X64
- // Half of the coefficients of tx64 at higher frequencies are set to
- // zeros. So tx32's scan order is used.
- { default_scan_16x32, av1_default_iscan_16x32,
- default_scan_16x32_neighbors },
- { default_scan_16x32, av1_default_iscan_16x32,
- default_scan_16x32_neighbors },
- { default_scan_16x32, av1_default_iscan_16x32,
- default_scan_16x32_neighbors },
- { default_scan_16x32, av1_default_iscan_16x32,
- default_scan_16x32_neighbors },
- { default_scan_16x32, av1_default_iscan_16x32,
- default_scan_16x32_neighbors },
- { default_scan_16x32, av1_default_iscan_16x32,
- default_scan_16x32_neighbors },
- { default_scan_16x32, av1_default_iscan_16x32,
- default_scan_16x32_neighbors },
- { default_scan_16x32, av1_default_iscan_16x32,
- default_scan_16x32_neighbors },
- { default_scan_16x32, av1_default_iscan_16x32,
- default_scan_16x32_neighbors },
- { default_scan_16x32, av1_default_iscan_16x32,
- default_scan_16x32_neighbors },
- { mrow_scan_16x32, av1_mrow_iscan_16x32, mrow_scan_16x32_neighbors },
- { mcol_scan_16x32, av1_mcol_iscan_16x32, mcol_scan_16x32_neighbors },
- { mrow_scan_16x32, av1_mrow_iscan_16x32, mrow_scan_16x32_neighbors },
- { mcol_scan_16x32, av1_mcol_iscan_16x32, mcol_scan_16x32_neighbors },
- { mrow_scan_16x32, av1_mrow_iscan_16x32, mrow_scan_16x32_neighbors },
- { mcol_scan_16x32, av1_mcol_iscan_16x32, mcol_scan_16x32_neighbors },
- },
- {
- // TX_64X16
- // Half of the coefficients of tx64 at higher frequencies are set to
- // zeros. So tx32's scan order is used.
- { default_scan_32x16, av1_default_iscan_32x16,
- default_scan_32x16_neighbors },
- { default_scan_32x16, av1_default_iscan_32x16,
- default_scan_32x16_neighbors },
- { default_scan_32x16, av1_default_iscan_32x16,
- default_scan_32x16_neighbors },
- { default_scan_32x16, av1_default_iscan_32x16,
- default_scan_32x16_neighbors },
- { default_scan_32x16, av1_default_iscan_32x16,
- default_scan_32x16_neighbors },
- { default_scan_32x16, av1_default_iscan_32x16,
- default_scan_32x16_neighbors },
- { default_scan_32x16, av1_default_iscan_32x16,
- default_scan_32x16_neighbors },
- { default_scan_32x16, av1_default_iscan_32x16,
- default_scan_32x16_neighbors },
- { default_scan_32x16, av1_default_iscan_32x16,
- default_scan_32x16_neighbors },
- { default_scan_32x16, av1_default_iscan_32x16,
- default_scan_32x16_neighbors },
- { mrow_scan_32x16, av1_mrow_iscan_32x16, mrow_scan_32x16_neighbors },
- { mcol_scan_32x16, av1_mcol_iscan_32x16, mcol_scan_32x16_neighbors },
- { mrow_scan_32x16, av1_mrow_iscan_32x16, mrow_scan_32x16_neighbors },
- { mcol_scan_32x16, av1_mcol_iscan_32x16, mcol_scan_32x16_neighbors },
- { mrow_scan_32x16, av1_mrow_iscan_32x16, mrow_scan_32x16_neighbors },
- { mcol_scan_32x16, av1_mcol_iscan_32x16, mcol_scan_32x16_neighbors },
- },
-};
diff --git a/third_party/aom/av1/common/scan.h b/third_party/aom/av1/common/scan.h
deleted file mode 100644
index 233dc0efa..000000000
--- a/third_party/aom/av1/common/scan.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_COMMON_SCAN_H_
-#define AOM_AV1_COMMON_SCAN_H_
-
-#include "aom/aom_integer.h"
-#include "aom_ports/mem.h"
-
-#include "av1/common/enums.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/common/blockd.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define MAX_NEIGHBORS 2
-
-typedef enum SCAN_MODE {
- SCAN_MODE_ZIG_ZAG,
- SCAN_MODE_COL_DIAG,
- SCAN_MODE_ROW_DIAG,
- SCAN_MODE_COL_1D,
- SCAN_MODE_ROW_1D,
- SCAN_MODES
-} SCAN_MODE;
-
-extern const SCAN_ORDER av1_default_scan_orders[TX_SIZES];
-extern const SCAN_ORDER av1_scan_orders[TX_SIZES_ALL][TX_TYPES];
-
-void av1_deliver_eob_threshold(const AV1_COMMON *cm, MACROBLOCKD *xd);
-
-static INLINE const SCAN_ORDER *get_default_scan(TX_SIZE tx_size,
- TX_TYPE tx_type) {
- return &av1_scan_orders[tx_size][tx_type];
-}
-
-static INLINE const SCAN_ORDER *get_scan(TX_SIZE tx_size, TX_TYPE tx_type) {
- return get_default_scan(tx_size, tx_type);
-}
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_COMMON_SCAN_H_
diff --git a/third_party/aom/av1/common/seg_common.c b/third_party/aom/av1/common/seg_common.c
deleted file mode 100644
index cd189ad76..000000000
--- a/third_party/aom/av1/common/seg_common.c
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-
-#include "av1/common/av1_loopfilter.h"
-#include "av1/common/blockd.h"
-#include "av1/common/seg_common.h"
-#include "av1/common/quant_common.h"
-
-static const int seg_feature_data_signed[SEG_LVL_MAX] = { 1, 1, 1, 1, 1, 0, 0 };
-
-static const int seg_feature_data_max[SEG_LVL_MAX] = {
- MAXQ, MAX_LOOP_FILTER, MAX_LOOP_FILTER, MAX_LOOP_FILTER, MAX_LOOP_FILTER, 7, 0
-};
-
-// These functions provide access to new segment level features.
-// Eventually these function may be "optimized out" but for the moment,
-// the coding mechanism is still subject to change so these provide a
-// convenient single point of change.
-
-void av1_clearall_segfeatures(struct segmentation *seg) {
- av1_zero(seg->feature_data);
- av1_zero(seg->feature_mask);
-}
-
-void calculate_segdata(struct segmentation *seg) {
- seg->segid_preskip = 0;
- seg->last_active_segid = 0;
- for (int i = 0; i < MAX_SEGMENTS; i++) {
- for (int j = 0; j < SEG_LVL_MAX; j++) {
- if (seg->feature_mask[i] & (1 << j)) {
- seg->segid_preskip |= (j >= SEG_LVL_REF_FRAME);
- seg->last_active_segid = i;
- }
- }
- }
-}
-
-void av1_enable_segfeature(struct segmentation *seg, int segment_id,
- SEG_LVL_FEATURES feature_id) {
- seg->feature_mask[segment_id] |= 1 << feature_id;
-}
-
-int av1_seg_feature_data_max(SEG_LVL_FEATURES feature_id) {
- return seg_feature_data_max[feature_id];
-}
-
-int av1_is_segfeature_signed(SEG_LVL_FEATURES feature_id) {
- return seg_feature_data_signed[feature_id];
-}
-
-// The 'seg_data' given for each segment can be either deltas (from the default
-// value chosen for the frame) or absolute values.
-//
-// Valid range for abs values is (0-127 for MB_LVL_ALT_Q), (0-63 for
-// SEGMENT_ALT_LF)
-// Valid range for delta values are (+/-127 for MB_LVL_ALT_Q), (+/-63 for
-// SEGMENT_ALT_LF)
-//
-// abs_delta = SEGMENT_DELTADATA (deltas) abs_delta = SEGMENT_ABSDATA (use
-// the absolute values given).
-
-void av1_set_segdata(struct segmentation *seg, int segment_id,
- SEG_LVL_FEATURES feature_id, int seg_data) {
- if (seg_data < 0) {
- assert(seg_feature_data_signed[feature_id]);
- assert(-seg_data <= seg_feature_data_max[feature_id]);
- } else {
- assert(seg_data <= seg_feature_data_max[feature_id]);
- }
-
- seg->feature_data[segment_id][feature_id] = seg_data;
-}
-
-// TBD? Functions to read and write segment data with range / validity checking
diff --git a/third_party/aom/av1/common/seg_common.h b/third_party/aom/av1/common/seg_common.h
deleted file mode 100644
index 8c35bba86..000000000
--- a/third_party/aom/av1/common/seg_common.h
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_COMMON_SEG_COMMON_H_
-#define AOM_AV1_COMMON_SEG_COMMON_H_
-
-#include "aom_dsp/prob.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define MAX_SEGMENTS 8
-#define SEG_TREE_PROBS (MAX_SEGMENTS - 1)
-
-#define SEG_TEMPORAL_PRED_CTXS 3
-#define SPATIAL_PREDICTION_PROBS 3
-
-typedef enum {
- SEG_LVL_ALT_Q, // Use alternate Quantizer ....
- SEG_LVL_ALT_LF_Y_V, // Use alternate loop filter value on y plane vertical
- SEG_LVL_ALT_LF_Y_H, // Use alternate loop filter value on y plane horizontal
- SEG_LVL_ALT_LF_U, // Use alternate loop filter value on u plane
- SEG_LVL_ALT_LF_V, // Use alternate loop filter value on v plane
- SEG_LVL_REF_FRAME, // Optional Segment reference frame
- SEG_LVL_SKIP, // Optional Segment (0,0) + skip mode
- SEG_LVL_GLOBALMV,
- SEG_LVL_MAX
-} SEG_LVL_FEATURES;
-
-struct segmentation {
- uint8_t enabled;
- uint8_t update_map;
- uint8_t update_data;
- uint8_t temporal_update;
-
- int16_t feature_data[MAX_SEGMENTS][SEG_LVL_MAX];
- unsigned int feature_mask[MAX_SEGMENTS];
- int last_active_segid; // The highest numbered segment id that has some
- // enabled feature.
- uint8_t segid_preskip; // Whether the segment id will be read before the
- // skip syntax element.
- // 1: the segment id will be read first.
- // 0: the skip syntax element will be read first.
-};
-
-struct segmentation_probs {
- aom_cdf_prob tree_cdf[CDF_SIZE(MAX_SEGMENTS)];
- aom_cdf_prob pred_cdf[SEG_TEMPORAL_PRED_CTXS][CDF_SIZE(2)];
- aom_cdf_prob spatial_pred_seg_cdf[SPATIAL_PREDICTION_PROBS]
- [CDF_SIZE(MAX_SEGMENTS)];
-};
-
-static INLINE int segfeature_active(const struct segmentation *seg,
- int segment_id,
- SEG_LVL_FEATURES feature_id) {
- return seg->enabled && (seg->feature_mask[segment_id] & (1 << feature_id));
-}
-
-static INLINE void segfeatures_copy(struct segmentation *dst,
- const struct segmentation *src) {
- int i, j;
- for (i = 0; i < MAX_SEGMENTS; i++) {
- dst->feature_mask[i] = src->feature_mask[i];
- for (j = 0; j < SEG_LVL_MAX; j++) {
- dst->feature_data[i][j] = src->feature_data[i][j];
- }
- }
- dst->segid_preskip = src->segid_preskip;
- dst->last_active_segid = src->last_active_segid;
-}
-
-void av1_clearall_segfeatures(struct segmentation *seg);
-
-void av1_enable_segfeature(struct segmentation *seg, int segment_id,
- SEG_LVL_FEATURES feature_id);
-
-void calculate_segdata(struct segmentation *seg);
-
-int av1_seg_feature_data_max(SEG_LVL_FEATURES feature_id);
-
-int av1_is_segfeature_signed(SEG_LVL_FEATURES feature_id);
-
-void av1_set_segdata(struct segmentation *seg, int segment_id,
- SEG_LVL_FEATURES feature_id, int seg_data);
-
-static INLINE int get_segdata(const struct segmentation *seg, int segment_id,
- SEG_LVL_FEATURES feature_id) {
- return seg->feature_data[segment_id][feature_id];
-}
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_COMMON_SEG_COMMON_H_
diff --git a/third_party/aom/av1/common/thread_common.c b/third_party/aom/av1/common/thread_common.c
deleted file mode 100644
index 8df4c9a09..000000000
--- a/third_party/aom/av1/common/thread_common.c
+++ /dev/null
@@ -1,786 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "config/aom_config.h"
-#include "config/aom_scale_rtcd.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_mem/aom_mem.h"
-#include "av1/common/av1_loopfilter.h"
-#include "av1/common/entropymode.h"
-#include "av1/common/thread_common.h"
-#include "av1/common/reconinter.h"
-
-// Set up nsync by width.
-static INLINE int get_sync_range(int width) {
- // nsync numbers are picked by testing. For example, for 4k
- // video, using 4 gives best performance.
- if (width < 640)
- return 1;
- else if (width <= 1280)
- return 2;
- else if (width <= 4096)
- return 4;
- else
- return 8;
-}
-
-static INLINE int get_lr_sync_range(int width) {
-#if 0
- // nsync numbers are picked by testing. For example, for 4k
- // video, using 4 gives best performance.
- if (width < 640)
- return 1;
- else if (width <= 1280)
- return 2;
- else if (width <= 4096)
- return 4;
- else
- return 8;
-#else
- (void)width;
- return 1;
-#endif
-}
-
-// Allocate memory for lf row synchronization
-static void loop_filter_alloc(AV1LfSync *lf_sync, AV1_COMMON *cm, int rows,
- int width, int num_workers) {
- lf_sync->rows = rows;
-#if CONFIG_MULTITHREAD
- {
- int i, j;
-
- for (j = 0; j < MAX_MB_PLANE; j++) {
- CHECK_MEM_ERROR(cm, lf_sync->mutex_[j],
- aom_malloc(sizeof(*(lf_sync->mutex_[j])) * rows));
- if (lf_sync->mutex_[j]) {
- for (i = 0; i < rows; ++i) {
- pthread_mutex_init(&lf_sync->mutex_[j][i], NULL);
- }
- }
-
- CHECK_MEM_ERROR(cm, lf_sync->cond_[j],
- aom_malloc(sizeof(*(lf_sync->cond_[j])) * rows));
- if (lf_sync->cond_[j]) {
- for (i = 0; i < rows; ++i) {
- pthread_cond_init(&lf_sync->cond_[j][i], NULL);
- }
- }
- }
-
- CHECK_MEM_ERROR(cm, lf_sync->job_mutex,
- aom_malloc(sizeof(*(lf_sync->job_mutex))));
- if (lf_sync->job_mutex) {
- pthread_mutex_init(lf_sync->job_mutex, NULL);
- }
- }
-#endif // CONFIG_MULTITHREAD
- CHECK_MEM_ERROR(cm, lf_sync->lfdata,
- aom_malloc(num_workers * sizeof(*(lf_sync->lfdata))));
- lf_sync->num_workers = num_workers;
-
- for (int j = 0; j < MAX_MB_PLANE; j++) {
- CHECK_MEM_ERROR(cm, lf_sync->cur_sb_col[j],
- aom_malloc(sizeof(*(lf_sync->cur_sb_col[j])) * rows));
- }
- CHECK_MEM_ERROR(
- cm, lf_sync->job_queue,
- aom_malloc(sizeof(*(lf_sync->job_queue)) * rows * MAX_MB_PLANE * 2));
- // Set up nsync.
- lf_sync->sync_range = get_sync_range(width);
-}
-
-// Deallocate lf synchronization related mutex and data
-void av1_loop_filter_dealloc(AV1LfSync *lf_sync) {
- if (lf_sync != NULL) {
- int j;
-#if CONFIG_MULTITHREAD
- int i;
- for (j = 0; j < MAX_MB_PLANE; j++) {
- if (lf_sync->mutex_[j] != NULL) {
- for (i = 0; i < lf_sync->rows; ++i) {
- pthread_mutex_destroy(&lf_sync->mutex_[j][i]);
- }
- aom_free(lf_sync->mutex_[j]);
- }
- if (lf_sync->cond_[j] != NULL) {
- for (i = 0; i < lf_sync->rows; ++i) {
- pthread_cond_destroy(&lf_sync->cond_[j][i]);
- }
- aom_free(lf_sync->cond_[j]);
- }
- }
- if (lf_sync->job_mutex != NULL) {
- pthread_mutex_destroy(lf_sync->job_mutex);
- aom_free(lf_sync->job_mutex);
- }
-#endif // CONFIG_MULTITHREAD
- aom_free(lf_sync->lfdata);
- for (j = 0; j < MAX_MB_PLANE; j++) {
- aom_free(lf_sync->cur_sb_col[j]);
- }
-
- aom_free(lf_sync->job_queue);
- // clear the structure as the source of this call may be a resize in which
- // case this call will be followed by an _alloc() which may fail.
- av1_zero(*lf_sync);
- }
-}
-
-static void loop_filter_data_reset(LFWorkerData *lf_data,
- YV12_BUFFER_CONFIG *frame_buffer,
- struct AV1Common *cm, MACROBLOCKD *xd) {
- struct macroblockd_plane *pd = xd->plane;
- lf_data->frame_buffer = frame_buffer;
- lf_data->cm = cm;
- lf_data->xd = xd;
- for (int i = 0; i < MAX_MB_PLANE; i++) {
- memcpy(&lf_data->planes[i].dst, &pd[i].dst, sizeof(lf_data->planes[i].dst));
- lf_data->planes[i].subsampling_x = pd[i].subsampling_x;
- lf_data->planes[i].subsampling_y = pd[i].subsampling_y;
- }
-}
-
-static INLINE void sync_read(AV1LfSync *const lf_sync, int r, int c,
- int plane) {
-#if CONFIG_MULTITHREAD
- const int nsync = lf_sync->sync_range;
-
- if (r && !(c & (nsync - 1))) {
- pthread_mutex_t *const mutex = &lf_sync->mutex_[plane][r - 1];
- pthread_mutex_lock(mutex);
-
- while (c > lf_sync->cur_sb_col[plane][r - 1] - nsync) {
- pthread_cond_wait(&lf_sync->cond_[plane][r - 1], mutex);
- }
- pthread_mutex_unlock(mutex);
- }
-#else
- (void)lf_sync;
- (void)r;
- (void)c;
- (void)plane;
-#endif // CONFIG_MULTITHREAD
-}
-
-static INLINE void sync_write(AV1LfSync *const lf_sync, int r, int c,
- const int sb_cols, int plane) {
-#if CONFIG_MULTITHREAD
- const int nsync = lf_sync->sync_range;
- int cur;
- // Only signal when there are enough filtered SB for next row to run.
- int sig = 1;
-
- if (c < sb_cols - 1) {
- cur = c;
- if (c % nsync) sig = 0;
- } else {
- cur = sb_cols + nsync;
- }
-
- if (sig) {
- pthread_mutex_lock(&lf_sync->mutex_[plane][r]);
-
- lf_sync->cur_sb_col[plane][r] = cur;
-
- pthread_cond_broadcast(&lf_sync->cond_[plane][r]);
- pthread_mutex_unlock(&lf_sync->mutex_[plane][r]);
- }
-#else
- (void)lf_sync;
- (void)r;
- (void)c;
- (void)sb_cols;
- (void)plane;
-#endif // CONFIG_MULTITHREAD
-}
-
-static void enqueue_lf_jobs(AV1LfSync *lf_sync, AV1_COMMON *cm, int start,
- int stop, int plane_start, int plane_end) {
- int mi_row, plane, dir;
- AV1LfMTInfo *lf_job_queue = lf_sync->job_queue;
- lf_sync->jobs_enqueued = 0;
- lf_sync->jobs_dequeued = 0;
-
- for (dir = 0; dir < 2; dir++) {
- for (plane = plane_start; plane < plane_end; plane++) {
- if (plane == 0 && !(cm->lf.filter_level[0]) && !(cm->lf.filter_level[1]))
- break;
- else if (plane == 1 && !(cm->lf.filter_level_u))
- continue;
- else if (plane == 2 && !(cm->lf.filter_level_v))
- continue;
- for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
- lf_job_queue->mi_row = mi_row;
- lf_job_queue->plane = plane;
- lf_job_queue->dir = dir;
- lf_job_queue++;
- lf_sync->jobs_enqueued++;
- }
- }
- }
-}
-
-AV1LfMTInfo *get_lf_job_info(AV1LfSync *lf_sync) {
- AV1LfMTInfo *cur_job_info = NULL;
-
-#if CONFIG_MULTITHREAD
- pthread_mutex_lock(lf_sync->job_mutex);
-
- if (lf_sync->jobs_dequeued < lf_sync->jobs_enqueued) {
- cur_job_info = lf_sync->job_queue + lf_sync->jobs_dequeued;
- lf_sync->jobs_dequeued++;
- }
-
- pthread_mutex_unlock(lf_sync->job_mutex);
-#else
- (void)lf_sync;
-#endif
-
- return cur_job_info;
-}
-
-// Implement row loopfiltering for each thread.
-static INLINE void thread_loop_filter_rows(
- const YV12_BUFFER_CONFIG *const frame_buffer, AV1_COMMON *const cm,
- struct macroblockd_plane *planes, MACROBLOCKD *xd,
- AV1LfSync *const lf_sync) {
- const int sb_cols =
- ALIGN_POWER_OF_TWO(cm->mi_cols, MAX_MIB_SIZE_LOG2) >> MAX_MIB_SIZE_LOG2;
- int mi_row, mi_col, plane, dir;
- int r, c;
-
- while (1) {
- AV1LfMTInfo *cur_job_info = get_lf_job_info(lf_sync);
-
- if (cur_job_info != NULL) {
- mi_row = cur_job_info->mi_row;
- plane = cur_job_info->plane;
- dir = cur_job_info->dir;
- r = mi_row >> MAX_MIB_SIZE_LOG2;
-
- if (dir == 0) {
- for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MAX_MIB_SIZE) {
- c = mi_col >> MAX_MIB_SIZE_LOG2;
-
- av1_setup_dst_planes(planes, cm->seq_params.sb_size, frame_buffer,
- mi_row, mi_col, plane, plane + 1);
-
- av1_filter_block_plane_vert(cm, xd, plane, &planes[plane], mi_row,
- mi_col);
- sync_write(lf_sync, r, c, sb_cols, plane);
- }
- } else if (dir == 1) {
- for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MAX_MIB_SIZE) {
- c = mi_col >> MAX_MIB_SIZE_LOG2;
-
- // Wait for vertical edge filtering of the top-right block to be
- // completed
- sync_read(lf_sync, r, c, plane);
-
- // Wait for vertical edge filtering of the right block to be
- // completed
- sync_read(lf_sync, r + 1, c, plane);
-
- av1_setup_dst_planes(planes, cm->seq_params.sb_size, frame_buffer,
- mi_row, mi_col, plane, plane + 1);
- av1_filter_block_plane_horz(cm, xd, plane, &planes[plane], mi_row,
- mi_col);
- }
- }
- } else {
- break;
- }
- }
-}
-
-// Row-based multi-threaded loopfilter hook
-static int loop_filter_row_worker(void *arg1, void *arg2) {
- AV1LfSync *const lf_sync = (AV1LfSync *)arg1;
- LFWorkerData *const lf_data = (LFWorkerData *)arg2;
- thread_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes,
- lf_data->xd, lf_sync);
- return 1;
-}
-
-static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
- MACROBLOCKD *xd, int start, int stop,
- int plane_start, int plane_end,
- AVxWorker *workers, int nworkers,
- AV1LfSync *lf_sync) {
- const AVxWorkerInterface *const winterface = aom_get_worker_interface();
- // Number of superblock rows and cols
- const int sb_rows =
- ALIGN_POWER_OF_TWO(cm->mi_rows, MAX_MIB_SIZE_LOG2) >> MAX_MIB_SIZE_LOG2;
- const int num_workers = nworkers;
- int i;
-
- if (!lf_sync->sync_range || sb_rows != lf_sync->rows ||
- num_workers > lf_sync->num_workers) {
- av1_loop_filter_dealloc(lf_sync);
- loop_filter_alloc(lf_sync, cm, sb_rows, cm->width, num_workers);
- }
-
- // Initialize cur_sb_col to -1 for all SB rows.
- for (i = 0; i < MAX_MB_PLANE; i++) {
- memset(lf_sync->cur_sb_col[i], -1,
- sizeof(*(lf_sync->cur_sb_col[i])) * sb_rows);
- }
-
- enqueue_lf_jobs(lf_sync, cm, start, stop, plane_start, plane_end);
-
- // Set up loopfilter thread data.
- for (i = 0; i < num_workers; ++i) {
- AVxWorker *const worker = &workers[i];
- LFWorkerData *const lf_data = &lf_sync->lfdata[i];
-
- worker->hook = loop_filter_row_worker;
- worker->data1 = lf_sync;
- worker->data2 = lf_data;
-
- // Loopfilter data
- loop_filter_data_reset(lf_data, frame, cm, xd);
-
- // Start loopfiltering
- if (i == num_workers - 1) {
- winterface->execute(worker);
- } else {
- winterface->launch(worker);
- }
- }
-
- // Wait till all rows are finished
- for (i = 0; i < num_workers; ++i) {
- winterface->sync(&workers[i]);
- }
-}
-
-void av1_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
- MACROBLOCKD *xd, int plane_start, int plane_end,
- int partial_frame, AVxWorker *workers,
- int num_workers, AV1LfSync *lf_sync) {
- int start_mi_row, end_mi_row, mi_rows_to_filter;
-
- start_mi_row = 0;
- mi_rows_to_filter = cm->mi_rows;
- if (partial_frame && cm->mi_rows > 8) {
- start_mi_row = cm->mi_rows >> 1;
- start_mi_row &= 0xfffffff8;
- mi_rows_to_filter = AOMMAX(cm->mi_rows / 8, 8);
- }
- end_mi_row = start_mi_row + mi_rows_to_filter;
- av1_loop_filter_frame_init(cm, plane_start, plane_end);
-
- loop_filter_rows_mt(frame, cm, xd, start_mi_row, end_mi_row, plane_start,
- plane_end, workers, num_workers, lf_sync);
-}
-
-static INLINE void lr_sync_read(void *const lr_sync, int r, int c, int plane) {
-#if CONFIG_MULTITHREAD
- AV1LrSync *const loop_res_sync = (AV1LrSync *)lr_sync;
- const int nsync = loop_res_sync->sync_range;
-
- if (r && !(c & (nsync - 1))) {
- pthread_mutex_t *const mutex = &loop_res_sync->mutex_[plane][r - 1];
- pthread_mutex_lock(mutex);
-
- while (c > loop_res_sync->cur_sb_col[plane][r - 1] - nsync) {
- pthread_cond_wait(&loop_res_sync->cond_[plane][r - 1], mutex);
- }
- pthread_mutex_unlock(mutex);
- }
-#else
- (void)lr_sync;
- (void)r;
- (void)c;
- (void)plane;
-#endif // CONFIG_MULTITHREAD
-}
-
-static INLINE void lr_sync_write(void *const lr_sync, int r, int c,
- const int sb_cols, int plane) {
-#if CONFIG_MULTITHREAD
- AV1LrSync *const loop_res_sync = (AV1LrSync *)lr_sync;
- const int nsync = loop_res_sync->sync_range;
- int cur;
- // Only signal when there are enough filtered SB for next row to run.
- int sig = 1;
-
- if (c < sb_cols - 1) {
- cur = c;
- if (c % nsync) sig = 0;
- } else {
- cur = sb_cols + nsync;
- }
-
- if (sig) {
- pthread_mutex_lock(&loop_res_sync->mutex_[plane][r]);
-
- loop_res_sync->cur_sb_col[plane][r] = cur;
-
- pthread_cond_broadcast(&loop_res_sync->cond_[plane][r]);
- pthread_mutex_unlock(&loop_res_sync->mutex_[plane][r]);
- }
-#else
- (void)lr_sync;
- (void)r;
- (void)c;
- (void)sb_cols;
- (void)plane;
-#endif // CONFIG_MULTITHREAD
-}
-
-// Allocate memory for loop restoration row synchronization
-static void loop_restoration_alloc(AV1LrSync *lr_sync, AV1_COMMON *cm,
- int num_workers, int num_rows_lr,
- int num_planes, int width) {
- lr_sync->rows = num_rows_lr;
- lr_sync->num_planes = num_planes;
-#if CONFIG_MULTITHREAD
- {
- int i, j;
-
- for (j = 0; j < num_planes; j++) {
- CHECK_MEM_ERROR(cm, lr_sync->mutex_[j],
- aom_malloc(sizeof(*(lr_sync->mutex_[j])) * num_rows_lr));
- if (lr_sync->mutex_[j]) {
- for (i = 0; i < num_rows_lr; ++i) {
- pthread_mutex_init(&lr_sync->mutex_[j][i], NULL);
- }
- }
-
- CHECK_MEM_ERROR(cm, lr_sync->cond_[j],
- aom_malloc(sizeof(*(lr_sync->cond_[j])) * num_rows_lr));
- if (lr_sync->cond_[j]) {
- for (i = 0; i < num_rows_lr; ++i) {
- pthread_cond_init(&lr_sync->cond_[j][i], NULL);
- }
- }
- }
-
- CHECK_MEM_ERROR(cm, lr_sync->job_mutex,
- aom_malloc(sizeof(*(lr_sync->job_mutex))));
- if (lr_sync->job_mutex) {
- pthread_mutex_init(lr_sync->job_mutex, NULL);
- }
- }
-#endif // CONFIG_MULTITHREAD
- CHECK_MEM_ERROR(cm, lr_sync->lrworkerdata,
- aom_malloc(num_workers * sizeof(*(lr_sync->lrworkerdata))));
-
- for (int worker_idx = 0; worker_idx < num_workers; ++worker_idx) {
- if (worker_idx < num_workers - 1) {
- CHECK_MEM_ERROR(cm, lr_sync->lrworkerdata[worker_idx].rst_tmpbuf,
- (int32_t *)aom_memalign(16, RESTORATION_TMPBUF_SIZE));
- CHECK_MEM_ERROR(cm, lr_sync->lrworkerdata[worker_idx].rlbs,
- aom_malloc(sizeof(RestorationLineBuffers)));
-
- } else {
- lr_sync->lrworkerdata[worker_idx].rst_tmpbuf = cm->rst_tmpbuf;
- lr_sync->lrworkerdata[worker_idx].rlbs = cm->rlbs;
- }
- }
-
- lr_sync->num_workers = num_workers;
-
- for (int j = 0; j < num_planes; j++) {
- CHECK_MEM_ERROR(
- cm, lr_sync->cur_sb_col[j],
- aom_malloc(sizeof(*(lr_sync->cur_sb_col[j])) * num_rows_lr));
- }
- CHECK_MEM_ERROR(
- cm, lr_sync->job_queue,
- aom_malloc(sizeof(*(lr_sync->job_queue)) * num_rows_lr * num_planes));
- // Set up nsync.
- lr_sync->sync_range = get_lr_sync_range(width);
-}
-
-// Deallocate loop restoration synchronization related mutex and data
-void av1_loop_restoration_dealloc(AV1LrSync *lr_sync, int num_workers) {
- if (lr_sync != NULL) {
- int j;
-#if CONFIG_MULTITHREAD
- int i;
- for (j = 0; j < MAX_MB_PLANE; j++) {
- if (lr_sync->mutex_[j] != NULL) {
- for (i = 0; i < lr_sync->rows; ++i) {
- pthread_mutex_destroy(&lr_sync->mutex_[j][i]);
- }
- aom_free(lr_sync->mutex_[j]);
- }
- if (lr_sync->cond_[j] != NULL) {
- for (i = 0; i < lr_sync->rows; ++i) {
- pthread_cond_destroy(&lr_sync->cond_[j][i]);
- }
- aom_free(lr_sync->cond_[j]);
- }
- }
- if (lr_sync->job_mutex != NULL) {
- pthread_mutex_destroy(lr_sync->job_mutex);
- aom_free(lr_sync->job_mutex);
- }
-#endif // CONFIG_MULTITHREAD
- for (j = 0; j < MAX_MB_PLANE; j++) {
- aom_free(lr_sync->cur_sb_col[j]);
- }
-
- aom_free(lr_sync->job_queue);
-
- if (lr_sync->lrworkerdata) {
- for (int worker_idx = 0; worker_idx < num_workers - 1; worker_idx++) {
- LRWorkerData *const workerdata_data =
- lr_sync->lrworkerdata + worker_idx;
-
- aom_free(workerdata_data->rst_tmpbuf);
- aom_free(workerdata_data->rlbs);
- }
- aom_free(lr_sync->lrworkerdata);
- }
-
- // clear the structure as the source of this call may be a resize in which
- // case this call will be followed by an _alloc() which may fail.
- av1_zero(*lr_sync);
- }
-}
-
-static void enqueue_lr_jobs(AV1LrSync *lr_sync, AV1LrStruct *lr_ctxt,
- AV1_COMMON *cm) {
- FilterFrameCtxt *ctxt = lr_ctxt->ctxt;
-
- const int num_planes = av1_num_planes(cm);
- AV1LrMTInfo *lr_job_queue = lr_sync->job_queue;
- int32_t lr_job_counter[2], num_even_lr_jobs = 0;
- lr_sync->jobs_enqueued = 0;
- lr_sync->jobs_dequeued = 0;
-
- for (int plane = 0; plane < num_planes; plane++) {
- if (cm->rst_info[plane].frame_restoration_type == RESTORE_NONE) continue;
- num_even_lr_jobs =
- num_even_lr_jobs + ((ctxt[plane].rsi->vert_units_per_tile + 1) >> 1);
- }
- lr_job_counter[0] = 0;
- lr_job_counter[1] = num_even_lr_jobs;
-
- for (int plane = 0; plane < num_planes; plane++) {
- if (cm->rst_info[plane].frame_restoration_type == RESTORE_NONE) continue;
- const int is_uv = plane > 0;
- const int ss_y = is_uv && cm->seq_params.subsampling_y;
-
- AV1PixelRect tile_rect = ctxt[plane].tile_rect;
- const int unit_size = ctxt[plane].rsi->restoration_unit_size;
-
- const int tile_h = tile_rect.bottom - tile_rect.top;
- const int ext_size = unit_size * 3 / 2;
-
- int y0 = 0, i = 0;
- while (y0 < tile_h) {
- int remaining_h = tile_h - y0;
- int h = (remaining_h < ext_size) ? remaining_h : unit_size;
-
- RestorationTileLimits limits;
- limits.v_start = tile_rect.top + y0;
- limits.v_end = tile_rect.top + y0 + h;
- assert(limits.v_end <= tile_rect.bottom);
- // Offset the tile upwards to align with the restoration processing stripe
- const int voffset = RESTORATION_UNIT_OFFSET >> ss_y;
- limits.v_start = AOMMAX(tile_rect.top, limits.v_start - voffset);
- if (limits.v_end < tile_rect.bottom) limits.v_end -= voffset;
-
- assert(lr_job_counter[0] <= num_even_lr_jobs);
-
- lr_job_queue[lr_job_counter[i & 1]].lr_unit_row = i;
- lr_job_queue[lr_job_counter[i & 1]].plane = plane;
- lr_job_queue[lr_job_counter[i & 1]].v_start = limits.v_start;
- lr_job_queue[lr_job_counter[i & 1]].v_end = limits.v_end;
- lr_job_queue[lr_job_counter[i & 1]].sync_mode = i & 1;
- if ((i & 1) == 0) {
- lr_job_queue[lr_job_counter[i & 1]].v_copy_start =
- limits.v_start + RESTORATION_BORDER;
- lr_job_queue[lr_job_counter[i & 1]].v_copy_end =
- limits.v_end - RESTORATION_BORDER;
- if (i == 0) {
- assert(limits.v_start == tile_rect.top);
- lr_job_queue[lr_job_counter[i & 1]].v_copy_start = tile_rect.top;
- }
- if (i == (ctxt[plane].rsi->vert_units_per_tile - 1)) {
- assert(limits.v_end == tile_rect.bottom);
- lr_job_queue[lr_job_counter[i & 1]].v_copy_end = tile_rect.bottom;
- }
- } else {
- lr_job_queue[lr_job_counter[i & 1]].v_copy_start =
- AOMMAX(limits.v_start - RESTORATION_BORDER, tile_rect.top);
- lr_job_queue[lr_job_counter[i & 1]].v_copy_end =
- AOMMIN(limits.v_end + RESTORATION_BORDER, tile_rect.bottom);
- }
- lr_job_counter[i & 1]++;
- lr_sync->jobs_enqueued++;
-
- y0 += h;
- ++i;
- }
- }
-}
-
-AV1LrMTInfo *get_lr_job_info(AV1LrSync *lr_sync) {
- AV1LrMTInfo *cur_job_info = NULL;
-
-#if CONFIG_MULTITHREAD
- pthread_mutex_lock(lr_sync->job_mutex);
-
- if (lr_sync->jobs_dequeued < lr_sync->jobs_enqueued) {
- cur_job_info = lr_sync->job_queue + lr_sync->jobs_dequeued;
- lr_sync->jobs_dequeued++;
- }
-
- pthread_mutex_unlock(lr_sync->job_mutex);
-#else
- (void)lr_sync;
-#endif
-
- return cur_job_info;
-}
-
-// Implement row loop restoration for each thread.
-static int loop_restoration_row_worker(void *arg1, void *arg2) {
- AV1LrSync *const lr_sync = (AV1LrSync *)arg1;
- LRWorkerData *lrworkerdata = (LRWorkerData *)arg2;
- AV1LrStruct *lr_ctxt = (AV1LrStruct *)lrworkerdata->lr_ctxt;
- FilterFrameCtxt *ctxt = lr_ctxt->ctxt;
- int lr_unit_row;
- int plane;
- const int tile_row = LR_TILE_ROW;
- const int tile_col = LR_TILE_COL;
- const int tile_cols = LR_TILE_COLS;
- const int tile_idx = tile_col + tile_row * tile_cols;
- typedef void (*copy_fun)(const YV12_BUFFER_CONFIG *src_ybc,
- YV12_BUFFER_CONFIG *dst_ybc, int hstart, int hend,
- int vstart, int vend);
- static const copy_fun copy_funs[3] = {
- aom_yv12_partial_copy_y, aom_yv12_partial_copy_u, aom_yv12_partial_copy_v
- };
-
- while (1) {
- AV1LrMTInfo *cur_job_info = get_lr_job_info(lr_sync);
- if (cur_job_info != NULL) {
- RestorationTileLimits limits;
- sync_read_fn_t on_sync_read;
- sync_write_fn_t on_sync_write;
- limits.v_start = cur_job_info->v_start;
- limits.v_end = cur_job_info->v_end;
- lr_unit_row = cur_job_info->lr_unit_row;
- plane = cur_job_info->plane;
- const int unit_idx0 = tile_idx * ctxt[plane].rsi->units_per_tile;
-
- // sync_mode == 1 implies only sync read is required in LR Multi-threading
- // sync_mode == 0 implies only sync write is required.
- on_sync_read =
- cur_job_info->sync_mode == 1 ? lr_sync_read : av1_lr_sync_read_dummy;
- on_sync_write = cur_job_info->sync_mode == 0 ? lr_sync_write
- : av1_lr_sync_write_dummy;
-
- av1_foreach_rest_unit_in_row(
- &limits, &(ctxt[plane].tile_rect), lr_ctxt->on_rest_unit, lr_unit_row,
- ctxt[plane].rsi->restoration_unit_size, unit_idx0,
- ctxt[plane].rsi->horz_units_per_tile,
- ctxt[plane].rsi->vert_units_per_tile, plane, &ctxt[plane],
- lrworkerdata->rst_tmpbuf, lrworkerdata->rlbs, on_sync_read,
- on_sync_write, lr_sync);
-
- copy_funs[plane](lr_ctxt->dst, lr_ctxt->frame, ctxt[plane].tile_rect.left,
- ctxt[plane].tile_rect.right, cur_job_info->v_copy_start,
- cur_job_info->v_copy_end);
- } else {
- break;
- }
- }
- return 1;
-}
-
-static void foreach_rest_unit_in_planes_mt(AV1LrStruct *lr_ctxt,
- AVxWorker *workers, int nworkers,
- AV1LrSync *lr_sync, AV1_COMMON *cm) {
- FilterFrameCtxt *ctxt = lr_ctxt->ctxt;
-
- const int num_planes = av1_num_planes(cm);
-
- const AVxWorkerInterface *const winterface = aom_get_worker_interface();
- int num_rows_lr = 0;
-
- for (int plane = 0; plane < num_planes; plane++) {
- if (cm->rst_info[plane].frame_restoration_type == RESTORE_NONE) continue;
-
- const AV1PixelRect tile_rect = ctxt[plane].tile_rect;
- const int max_tile_h = tile_rect.bottom - tile_rect.top;
-
- const int unit_size = cm->rst_info[plane].restoration_unit_size;
-
- num_rows_lr =
- AOMMAX(num_rows_lr, av1_lr_count_units_in_tile(unit_size, max_tile_h));
- }
-
- const int num_workers = nworkers;
- int i;
- assert(MAX_MB_PLANE == 3);
-
- if (!lr_sync->sync_range || num_rows_lr != lr_sync->rows ||
- num_workers > lr_sync->num_workers || num_planes != lr_sync->num_planes) {
- av1_loop_restoration_dealloc(lr_sync, num_workers);
- loop_restoration_alloc(lr_sync, cm, num_workers, num_rows_lr, num_planes,
- cm->width);
- }
-
- // Initialize cur_sb_col to -1 for all SB rows.
- for (i = 0; i < num_planes; i++) {
- memset(lr_sync->cur_sb_col[i], -1,
- sizeof(*(lr_sync->cur_sb_col[i])) * num_rows_lr);
- }
-
- enqueue_lr_jobs(lr_sync, lr_ctxt, cm);
-
- // Set up looprestoration thread data.
- for (i = 0; i < num_workers; ++i) {
- AVxWorker *const worker = &workers[i];
- lr_sync->lrworkerdata[i].lr_ctxt = (void *)lr_ctxt;
- worker->hook = loop_restoration_row_worker;
- worker->data1 = lr_sync;
- worker->data2 = &lr_sync->lrworkerdata[i];
-
- // Start loopfiltering
- if (i == num_workers - 1) {
- winterface->execute(worker);
- } else {
- winterface->launch(worker);
- }
- }
-
- // Wait till all rows are finished
- for (i = 0; i < num_workers; ++i) {
- winterface->sync(&workers[i]);
- }
-}
-
-void av1_loop_restoration_filter_frame_mt(YV12_BUFFER_CONFIG *frame,
- AV1_COMMON *cm, int optimized_lr,
- AVxWorker *workers, int num_workers,
- AV1LrSync *lr_sync, void *lr_ctxt) {
- assert(!cm->all_lossless);
-
- const int num_planes = av1_num_planes(cm);
-
- AV1LrStruct *loop_rest_ctxt = (AV1LrStruct *)lr_ctxt;
-
- av1_loop_restoration_filter_frame_init(loop_rest_ctxt, frame, cm,
- optimized_lr, num_planes);
-
- foreach_rest_unit_in_planes_mt(loop_rest_ctxt, workers, num_workers, lr_sync,
- cm);
-}
diff --git a/third_party/aom/av1/common/thread_common.h b/third_party/aom/av1/common/thread_common.h
deleted file mode 100644
index 23d61d72a..000000000
--- a/third_party/aom/av1/common/thread_common.h
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_COMMON_THREAD_COMMON_H_
-#define AOM_AV1_COMMON_THREAD_COMMON_H_
-
-#include "config/aom_config.h"
-
-#include "av1/common/av1_loopfilter.h"
-#include "aom_util/aom_thread.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct AV1Common;
-
-typedef struct AV1LfMTInfo {
- int mi_row;
- int plane;
- int dir;
-} AV1LfMTInfo;
-
-// Loopfilter row synchronization
-typedef struct AV1LfSyncData {
-#if CONFIG_MULTITHREAD
- pthread_mutex_t *mutex_[MAX_MB_PLANE];
- pthread_cond_t *cond_[MAX_MB_PLANE];
-#endif
- // Allocate memory to store the loop-filtered superblock index in each row.
- int *cur_sb_col[MAX_MB_PLANE];
- // The optimal sync_range for different resolution and platform should be
- // determined by testing. Currently, it is chosen to be a power-of-2 number.
- int sync_range;
- int rows;
-
- // Row-based parallel loopfilter data
- LFWorkerData *lfdata;
- int num_workers;
-
-#if CONFIG_MULTITHREAD
- pthread_mutex_t *job_mutex;
-#endif
- AV1LfMTInfo *job_queue;
- int jobs_enqueued;
- int jobs_dequeued;
-} AV1LfSync;
-
-typedef struct AV1LrMTInfo {
- int v_start;
- int v_end;
- int lr_unit_row;
- int plane;
- int sync_mode;
- int v_copy_start;
- int v_copy_end;
-} AV1LrMTInfo;
-
-typedef struct LoopRestorationWorkerData {
- int32_t *rst_tmpbuf;
- void *rlbs;
- void *lr_ctxt;
-} LRWorkerData;
-
-// Looprestoration row synchronization
-typedef struct AV1LrSyncData {
-#if CONFIG_MULTITHREAD
- pthread_mutex_t *mutex_[MAX_MB_PLANE];
- pthread_cond_t *cond_[MAX_MB_PLANE];
-#endif
- // Allocate memory to store the loop-restoration block index in each row.
- int *cur_sb_col[MAX_MB_PLANE];
- // The optimal sync_range for different resolution and platform should be
- // determined by testing. Currently, it is chosen to be a power-of-2 number.
- int sync_range;
- int rows;
- int num_planes;
-
- int num_workers;
-
-#if CONFIG_MULTITHREAD
- pthread_mutex_t *job_mutex;
-#endif
- // Row-based parallel loopfilter data
- LRWorkerData *lrworkerdata;
-
- AV1LrMTInfo *job_queue;
- int jobs_enqueued;
- int jobs_dequeued;
-} AV1LrSync;
-
-// Deallocate loopfilter synchronization related mutex and data.
-void av1_loop_filter_dealloc(AV1LfSync *lf_sync);
-
-void av1_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, struct AV1Common *cm,
- struct macroblockd *mbd, int plane_start,
- int plane_end, int partial_frame,
- AVxWorker *workers, int num_workers,
- AV1LfSync *lf_sync);
-void av1_loop_restoration_filter_frame_mt(YV12_BUFFER_CONFIG *frame,
- struct AV1Common *cm,
- int optimized_lr, AVxWorker *workers,
- int num_workers, AV1LrSync *lr_sync,
- void *lr_ctxt);
-void av1_loop_restoration_dealloc(AV1LrSync *lr_sync, int num_workers);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_COMMON_THREAD_COMMON_H_
diff --git a/third_party/aom/av1/common/tile_common.c b/third_party/aom/av1/common/tile_common.c
deleted file mode 100644
index 1b413487f..000000000
--- a/third_party/aom/av1/common/tile_common.c
+++ /dev/null
@@ -1,207 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "av1/common/tile_common.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/common/resize.h"
-#include "aom_dsp/aom_dsp_common.h"
-
-void av1_tile_init(TileInfo *tile, const AV1_COMMON *cm, int row, int col) {
- av1_tile_set_row(tile, cm, row);
- av1_tile_set_col(tile, cm, col);
-}
-
-// Find smallest k>=0 such that (blk_size << k) >= target
-static int tile_log2(int blk_size, int target) {
- int k;
- for (k = 0; (blk_size << k) < target; k++) {
- }
- return k;
-}
-
-void av1_get_tile_limits(AV1_COMMON *const cm) {
- int mi_cols = ALIGN_POWER_OF_TWO(cm->mi_cols, cm->seq_params.mib_size_log2);
- int mi_rows = ALIGN_POWER_OF_TWO(cm->mi_rows, cm->seq_params.mib_size_log2);
- int sb_cols = mi_cols >> cm->seq_params.mib_size_log2;
- int sb_rows = mi_rows >> cm->seq_params.mib_size_log2;
-
- int sb_size_log2 = cm->seq_params.mib_size_log2 + MI_SIZE_LOG2;
- cm->max_tile_width_sb = MAX_TILE_WIDTH >> sb_size_log2;
- int max_tile_area_sb = MAX_TILE_AREA >> (2 * sb_size_log2);
-
- cm->min_log2_tile_cols = tile_log2(cm->max_tile_width_sb, sb_cols);
- cm->max_log2_tile_cols = tile_log2(1, AOMMIN(sb_cols, MAX_TILE_COLS));
- cm->max_log2_tile_rows = tile_log2(1, AOMMIN(sb_rows, MAX_TILE_ROWS));
- cm->min_log2_tiles = tile_log2(max_tile_area_sb, sb_cols * sb_rows);
- cm->min_log2_tiles = AOMMAX(cm->min_log2_tiles, cm->min_log2_tile_cols);
-}
-
-void av1_calculate_tile_cols(AV1_COMMON *const cm) {
- int mi_cols = ALIGN_POWER_OF_TWO(cm->mi_cols, cm->seq_params.mib_size_log2);
- int mi_rows = ALIGN_POWER_OF_TWO(cm->mi_rows, cm->seq_params.mib_size_log2);
- int sb_cols = mi_cols >> cm->seq_params.mib_size_log2;
- int sb_rows = mi_rows >> cm->seq_params.mib_size_log2;
- int i;
-
- if (cm->uniform_tile_spacing_flag) {
- int start_sb;
- int size_sb = ALIGN_POWER_OF_TWO(sb_cols, cm->log2_tile_cols);
- size_sb >>= cm->log2_tile_cols;
- assert(size_sb > 0);
- for (i = 0, start_sb = 0; start_sb < sb_cols; i++) {
- cm->tile_col_start_sb[i] = start_sb;
- start_sb += size_sb;
- }
- cm->tile_cols = i;
- cm->tile_col_start_sb[i] = sb_cols;
- cm->min_log2_tile_rows = AOMMAX(cm->min_log2_tiles - cm->log2_tile_cols, 0);
- cm->max_tile_height_sb = sb_rows >> cm->min_log2_tile_rows;
-
- cm->tile_width = size_sb << cm->seq_params.mib_size_log2;
- cm->tile_width = AOMMIN(cm->tile_width, cm->mi_cols);
- } else {
- int max_tile_area_sb = (sb_rows * sb_cols);
- int widest_tile_sb = 1;
- cm->log2_tile_cols = tile_log2(1, cm->tile_cols);
- for (i = 0; i < cm->tile_cols; i++) {
- int size_sb = cm->tile_col_start_sb[i + 1] - cm->tile_col_start_sb[i];
- widest_tile_sb = AOMMAX(widest_tile_sb, size_sb);
- }
- if (cm->min_log2_tiles) {
- max_tile_area_sb >>= (cm->min_log2_tiles + 1);
- }
- cm->max_tile_height_sb = AOMMAX(max_tile_area_sb / widest_tile_sb, 1);
- }
-}
-
-void av1_calculate_tile_rows(AV1_COMMON *const cm) {
- int mi_rows = ALIGN_POWER_OF_TWO(cm->mi_rows, cm->seq_params.mib_size_log2);
- int sb_rows = mi_rows >> cm->seq_params.mib_size_log2;
- int start_sb, size_sb, i;
-
- if (cm->uniform_tile_spacing_flag) {
- size_sb = ALIGN_POWER_OF_TWO(sb_rows, cm->log2_tile_rows);
- size_sb >>= cm->log2_tile_rows;
- assert(size_sb > 0);
- for (i = 0, start_sb = 0; start_sb < sb_rows; i++) {
- cm->tile_row_start_sb[i] = start_sb;
- start_sb += size_sb;
- }
- cm->tile_rows = i;
- cm->tile_row_start_sb[i] = sb_rows;
-
- cm->tile_height = size_sb << cm->seq_params.mib_size_log2;
- cm->tile_height = AOMMIN(cm->tile_height, cm->mi_rows);
- } else {
- cm->log2_tile_rows = tile_log2(1, cm->tile_rows);
- }
-}
-
-void av1_tile_set_row(TileInfo *tile, const AV1_COMMON *cm, int row) {
- assert(row < cm->tile_rows);
- int mi_row_start = cm->tile_row_start_sb[row] << cm->seq_params.mib_size_log2;
- int mi_row_end = cm->tile_row_start_sb[row + 1]
- << cm->seq_params.mib_size_log2;
- tile->tile_row = row;
- tile->mi_row_start = mi_row_start;
- tile->mi_row_end = AOMMIN(mi_row_end, cm->mi_rows);
- assert(tile->mi_row_end > tile->mi_row_start);
-}
-
-void av1_tile_set_col(TileInfo *tile, const AV1_COMMON *cm, int col) {
- assert(col < cm->tile_cols);
- int mi_col_start = cm->tile_col_start_sb[col] << cm->seq_params.mib_size_log2;
- int mi_col_end = cm->tile_col_start_sb[col + 1]
- << cm->seq_params.mib_size_log2;
- tile->tile_col = col;
- tile->mi_col_start = mi_col_start;
- tile->mi_col_end = AOMMIN(mi_col_end, cm->mi_cols);
- assert(tile->mi_col_end > tile->mi_col_start);
-}
-
-int av1_get_sb_rows_in_tile(AV1_COMMON *cm, TileInfo tile) {
- int mi_rows_aligned_to_sb = ALIGN_POWER_OF_TWO(
- tile.mi_row_end - tile.mi_row_start, cm->seq_params.mib_size_log2);
- int sb_rows = mi_rows_aligned_to_sb >> cm->seq_params.mib_size_log2;
-
- return sb_rows;
-}
-
-int av1_get_sb_cols_in_tile(AV1_COMMON *cm, TileInfo tile) {
- int mi_cols_aligned_to_sb = ALIGN_POWER_OF_TWO(
- tile.mi_col_end - tile.mi_col_start, cm->seq_params.mib_size_log2);
- int sb_cols = mi_cols_aligned_to_sb >> cm->seq_params.mib_size_log2;
-
- return sb_cols;
-}
-
-int get_tile_size(int mi_frame_size, int log2_tile_num, int *ntiles) {
- // Round the frame up to a whole number of max superblocks
- mi_frame_size = ALIGN_POWER_OF_TWO(mi_frame_size, MAX_MIB_SIZE_LOG2);
-
- // Divide by the signalled number of tiles, rounding up to the multiple of
- // the max superblock size. To do this, shift right (and round up) to get the
- // tile size in max super-blocks and then shift left again to convert it to
- // mi units.
- const int shift = log2_tile_num + MAX_MIB_SIZE_LOG2;
- const int max_sb_tile_size =
- ALIGN_POWER_OF_TWO(mi_frame_size, shift) >> shift;
- const int mi_tile_size = max_sb_tile_size << MAX_MIB_SIZE_LOG2;
-
- // The actual number of tiles is the ceiling of the frame size in mi units
- // divided by mi_size. This is at most 1 << log2_tile_num but might be
- // strictly less if max_sb_tile_size got rounded up significantly.
- if (ntiles) {
- *ntiles = (mi_frame_size + mi_tile_size - 1) / mi_tile_size;
- assert(*ntiles <= (1 << log2_tile_num));
- }
-
- return mi_tile_size;
-}
-
-AV1PixelRect av1_get_tile_rect(const TileInfo *tile_info, const AV1_COMMON *cm,
- int is_uv) {
- AV1PixelRect r;
-
- // Calculate position in the Y plane
- r.left = tile_info->mi_col_start * MI_SIZE;
- r.right = tile_info->mi_col_end * MI_SIZE;
- r.top = tile_info->mi_row_start * MI_SIZE;
- r.bottom = tile_info->mi_row_end * MI_SIZE;
-
- // If upscaling is enabled, the tile limits need scaling to match the
- // upscaled frame where the restoration units live. To do this, scale up the
- // top-left and bottom-right of the tile.
- if (av1_superres_scaled(cm)) {
- av1_calculate_unscaled_superres_size(&r.left, &r.top,
- cm->superres_scale_denominator);
- av1_calculate_unscaled_superres_size(&r.right, &r.bottom,
- cm->superres_scale_denominator);
- }
-
- const int frame_w = cm->superres_upscaled_width;
- const int frame_h = cm->superres_upscaled_height;
-
- // Make sure we don't fall off the bottom-right of the frame.
- r.right = AOMMIN(r.right, frame_w);
- r.bottom = AOMMIN(r.bottom, frame_h);
-
- // Convert to coordinates in the appropriate plane
- const int ss_x = is_uv && cm->seq_params.subsampling_x;
- const int ss_y = is_uv && cm->seq_params.subsampling_y;
-
- r.left = ROUND_POWER_OF_TWO(r.left, ss_x);
- r.right = ROUND_POWER_OF_TWO(r.right, ss_x);
- r.top = ROUND_POWER_OF_TWO(r.top, ss_y);
- r.bottom = ROUND_POWER_OF_TWO(r.bottom, ss_y);
-
- return r;
-}
diff --git a/third_party/aom/av1/common/tile_common.h b/third_party/aom/av1/common/tile_common.h
deleted file mode 100644
index c03553dc6..000000000
--- a/third_party/aom/av1/common/tile_common.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_COMMON_TILE_COMMON_H_
-#define AOM_AV1_COMMON_TILE_COMMON_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "config/aom_config.h"
-
-struct AV1Common;
-
-#define DEFAULT_MAX_NUM_TG 1
-
-typedef struct TileInfo {
- int mi_row_start, mi_row_end;
- int mi_col_start, mi_col_end;
- int tg_horz_boundary;
- int tile_row;
- int tile_col;
-} TileInfo;
-
-// initializes 'tile->mi_(row|col)_(start|end)' for (row, col) based on
-// 'cm->log2_tile_(rows|cols)' & 'cm->mi_(rows|cols)'
-void av1_tile_init(TileInfo *tile, const struct AV1Common *cm, int row,
- int col);
-
-void av1_tile_set_row(TileInfo *tile, const struct AV1Common *cm, int row);
-void av1_tile_set_col(TileInfo *tile, const struct AV1Common *cm, int col);
-void av1_get_tile_n_bits(int mi_cols, int *min_log2_tile_cols,
- int *max_log2_tile_cols);
-
-// Calculate the correct tile size (width or height) for (1 << log2_tile_num)
-// tiles horizontally or vertically in the frame.
-int get_tile_size(int mi_frame_size, int log2_tile_num, int *ntiles);
-
-int av1_get_sb_rows_in_tile(struct AV1Common *cm, TileInfo tile);
-int av1_get_sb_cols_in_tile(struct AV1Common *cm, TileInfo tile);
-
-typedef struct {
- int left, top, right, bottom;
-} AV1PixelRect;
-
-// Return the pixel extents of the given tile
-AV1PixelRect av1_get_tile_rect(const TileInfo *tile_info,
- const struct AV1Common *cm, int is_uv);
-
-// Define tile maximum width and area
-// There is no maximum height since height is limited by area and width limits
-// The minimum tile width or height is fixed at one superblock
-#define MAX_TILE_WIDTH (4096) // Max Tile width in pixels
-#define MAX_TILE_AREA (4096 * 2304) // Maximum tile area in pixels
-
-void av1_get_tile_limits(struct AV1Common *const cm);
-void av1_calculate_tile_cols(struct AV1Common *const cm);
-void av1_calculate_tile_rows(struct AV1Common *const cm);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_COMMON_TILE_COMMON_H_
diff --git a/third_party/aom/av1/common/timing.c b/third_party/aom/av1/common/timing.c
deleted file mode 100644
index 49dbde78f..000000000
--- a/third_party/aom/av1/common/timing.c
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "av1/common/timing.h"
-
-/* Tables for AV1 max bitrates for different levels of main and high tier.
- * The tables are in Kbps instead of Mbps in the specification.
- * Note that depending on the profile, a multiplier is needed.
- */
-
-/* Max Bitrates for levels of Main Tier in kbps. Bitrate in main_kbps [31] */
-/* is a dummy value. The decoder model is not applicable for level 31. */
-static int32_t main_kbps[1 << LEVEL_BITS] = {
- 1500, 3000, 0, 0, 6000, 10000, 0, 0, 12000, 20000, 0,
- 0, 30000, 40000, 60000, 60000, 60000, 100000, 160000, 160000, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, (1 << 26)
-};
-
-/* Max Bitrates for levels of High Tier in kbps. Bitrate in high_kbps [31] */
-/* is a dummy value. The decoder model is not applicable for level 31. */
-static int32_t high_kbps[1 << LEVEL_BITS] = {
- 0, 0, 0, 0, 0, 0, 0, 0,
- 30000, 50000, 0, 0, 100000, 160000, 240000, 240000,
- 240000, 480000, 800000, 800000, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, (1 << 26)
-};
-
-/* BitrateProfileFactor */
-static int bitrate_profile_factor[1 << PROFILE_BITS] = {
- 1, 2, 3, 0, 0, 0, 0, 0
-};
-
-int64_t max_level_bitrate(BITSTREAM_PROFILE seq_profile, int seq_level_idx,
- int seq_tier) {
- int64_t bitrate;
-
- if (seq_tier) {
- bitrate = high_kbps[seq_level_idx] * bitrate_profile_factor[seq_profile];
- } else {
- bitrate = main_kbps[seq_level_idx] * bitrate_profile_factor[seq_profile];
- }
-
- return bitrate * 1000;
-}
-
-void set_aom_dec_model_info(aom_dec_model_info_t *decoder_model) {
- decoder_model->encoder_decoder_buffer_delay_length = 16;
- decoder_model->buffer_removal_time_length = 10;
- decoder_model->frame_presentation_time_length = 10;
-}
-
-void set_dec_model_op_parameters(aom_dec_model_op_parameters_t *op_params) {
- op_params->decoder_model_param_present_flag = 1;
- op_params->decoder_buffer_delay = 90000 >> 1; // 0.5 s
- op_params->encoder_buffer_delay = 90000 >> 1; // 0.5 s
- op_params->low_delay_mode_flag = 0;
- op_params->display_model_param_present_flag = 1;
- op_params->initial_display_delay = 8; // 8 frames delay
-}
-
-void set_resource_availability_parameters(
- aom_dec_model_op_parameters_t *op_params) {
- op_params->decoder_model_param_present_flag = 0;
- op_params->decoder_buffer_delay =
- 70000; // Resource availability mode default
- op_params->encoder_buffer_delay =
- 20000; // Resource availability mode default
- op_params->low_delay_mode_flag = 0; // Resource availability mode default
- op_params->display_model_param_present_flag = 1;
- op_params->initial_display_delay = 8; // 8 frames delay
-}
diff --git a/third_party/aom/av1/common/timing.h b/third_party/aom/av1/common/timing.h
deleted file mode 100644
index 06939ae43..000000000
--- a/third_party/aom/av1/common/timing.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_COMMON_TIMING_H_
-#define AOM_AV1_COMMON_TIMING_H_
-
-#include "aom/aom_integer.h"
-#include "av1/common/enums.h"
-
-#define MAX_NUM_OP_POINTS 32
-
-typedef struct aom_timing {
- uint32_t num_units_in_display_tick;
- uint32_t time_scale;
- int equal_picture_interval;
- uint32_t num_ticks_per_picture;
-} aom_timing_info_t;
-
-typedef struct aom_dec_model_info {
- uint32_t num_units_in_decoding_tick;
- int encoder_decoder_buffer_delay_length;
- int buffer_removal_time_length;
- int frame_presentation_time_length;
-} aom_dec_model_info_t;
-
-typedef struct aom_dec_model_op_parameters {
- int decoder_model_param_present_flag;
- int64_t bitrate;
- int64_t buffer_size;
- uint32_t decoder_buffer_delay;
- uint32_t encoder_buffer_delay;
- int low_delay_mode_flag;
- int display_model_param_present_flag;
- int initial_display_delay;
-} aom_dec_model_op_parameters_t;
-
-typedef struct aom_op_timing_info_t {
- uint32_t buffer_removal_time;
-} aom_op_timing_info_t;
-
-void set_aom_dec_model_info(aom_dec_model_info_t *decoder_model);
-
-void set_dec_model_op_parameters(aom_dec_model_op_parameters_t *op_params);
-
-void set_resource_availability_parameters(
- aom_dec_model_op_parameters_t *op_params);
-
-int64_t max_level_bitrate(BITSTREAM_PROFILE seq_profile, int seq_level_idx,
- int seq_tier);
-
-#endif // AOM_AV1_COMMON_TIMING_H_
diff --git a/third_party/aom/av1/common/token_cdfs.h b/third_party/aom/av1/common/token_cdfs.h
deleted file mode 100644
index 53e956450..000000000
--- a/third_party/aom/av1/common/token_cdfs.h
+++ /dev/null
@@ -1,3555 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_COMMON_TOKEN_CDFS_H_
-#define AOM_AV1_COMMON_TOKEN_CDFS_H_
-
-#include "config/aom_config.h"
-
-#include "av1/common/entropy.h"
-
-static const aom_cdf_prob
- av1_default_dc_sign_cdfs[TOKEN_CDF_Q_CTXS][PLANE_TYPES][DC_SIGN_CONTEXTS]
- [CDF_SIZE(2)] = {
- { {
- { AOM_CDF2(128 * 125) },
- { AOM_CDF2(128 * 102) },
- { AOM_CDF2(128 * 147) },
- },
- {
- { AOM_CDF2(128 * 119) },
- { AOM_CDF2(128 * 101) },
- { AOM_CDF2(128 * 135) },
- } },
- { {
- { AOM_CDF2(128 * 125) },
- { AOM_CDF2(128 * 102) },
- { AOM_CDF2(128 * 147) },
- },
- {
- { AOM_CDF2(128 * 119) },
- { AOM_CDF2(128 * 101) },
- { AOM_CDF2(128 * 135) },
- } },
- { {
- { AOM_CDF2(128 * 125) },
- { AOM_CDF2(128 * 102) },
- { AOM_CDF2(128 * 147) },
- },
- {
- { AOM_CDF2(128 * 119) },
- { AOM_CDF2(128 * 101) },
- { AOM_CDF2(128 * 135) },
- } },
- { {
- { AOM_CDF2(128 * 125) },
- { AOM_CDF2(128 * 102) },
- { AOM_CDF2(128 * 147) },
- },
- {
- { AOM_CDF2(128 * 119) },
- { AOM_CDF2(128 * 101) },
- { AOM_CDF2(128 * 135) },
- } },
- };
-
-static const aom_cdf_prob
- av1_default_txb_skip_cdfs[TOKEN_CDF_Q_CTXS][TX_SIZES][TXB_SKIP_CONTEXTS]
- [CDF_SIZE(2)] = { { { { AOM_CDF2(31849) },
- { AOM_CDF2(5892) },
- { AOM_CDF2(12112) },
- { AOM_CDF2(21935) },
- { AOM_CDF2(20289) },
- { AOM_CDF2(27473) },
- { AOM_CDF2(32487) },
- { AOM_CDF2(7654) },
- { AOM_CDF2(19473) },
- { AOM_CDF2(29984) },
- { AOM_CDF2(9961) },
- { AOM_CDF2(30242) },
- { AOM_CDF2(32117) } },
- { { AOM_CDF2(31548) },
- { AOM_CDF2(1549) },
- { AOM_CDF2(10130) },
- { AOM_CDF2(16656) },
- { AOM_CDF2(18591) },
- { AOM_CDF2(26308) },
- { AOM_CDF2(32537) },
- { AOM_CDF2(5403) },
- { AOM_CDF2(18096) },
- { AOM_CDF2(30003) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) } },
- { { AOM_CDF2(29957) },
- { AOM_CDF2(5391) },
- { AOM_CDF2(18039) },
- { AOM_CDF2(23566) },
- { AOM_CDF2(22431) },
- { AOM_CDF2(25822) },
- { AOM_CDF2(32197) },
- { AOM_CDF2(3778) },
- { AOM_CDF2(15336) },
- { AOM_CDF2(28981) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) } },
- { { AOM_CDF2(17920) },
- { AOM_CDF2(1818) },
- { AOM_CDF2(7282) },
- { AOM_CDF2(25273) },
- { AOM_CDF2(10923) },
- { AOM_CDF2(31554) },
- { AOM_CDF2(32624) },
- { AOM_CDF2(1366) },
- { AOM_CDF2(15628) },
- { AOM_CDF2(30462) },
- { AOM_CDF2(146) },
- { AOM_CDF2(5132) },
- { AOM_CDF2(31657) } },
- { { AOM_CDF2(6308) },
- { AOM_CDF2(117) },
- { AOM_CDF2(1638) },
- { AOM_CDF2(2161) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(10923) },
- { AOM_CDF2(30247) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) } } },
- { { { AOM_CDF2(30371) },
- { AOM_CDF2(7570) },
- { AOM_CDF2(13155) },
- { AOM_CDF2(20751) },
- { AOM_CDF2(20969) },
- { AOM_CDF2(27067) },
- { AOM_CDF2(32013) },
- { AOM_CDF2(5495) },
- { AOM_CDF2(17942) },
- { AOM_CDF2(28280) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) } },
- { { AOM_CDF2(31782) },
- { AOM_CDF2(1836) },
- { AOM_CDF2(10689) },
- { AOM_CDF2(17604) },
- { AOM_CDF2(21622) },
- { AOM_CDF2(27518) },
- { AOM_CDF2(32399) },
- { AOM_CDF2(4419) },
- { AOM_CDF2(16294) },
- { AOM_CDF2(28345) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) } },
- { { AOM_CDF2(31901) },
- { AOM_CDF2(10311) },
- { AOM_CDF2(18047) },
- { AOM_CDF2(24806) },
- { AOM_CDF2(23288) },
- { AOM_CDF2(27914) },
- { AOM_CDF2(32296) },
- { AOM_CDF2(4215) },
- { AOM_CDF2(15756) },
- { AOM_CDF2(28341) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) } },
- { { AOM_CDF2(26726) },
- { AOM_CDF2(1045) },
- { AOM_CDF2(11703) },
- { AOM_CDF2(20590) },
- { AOM_CDF2(18554) },
- { AOM_CDF2(25970) },
- { AOM_CDF2(31938) },
- { AOM_CDF2(5583) },
- { AOM_CDF2(21313) },
- { AOM_CDF2(29390) },
- { AOM_CDF2(641) },
- { AOM_CDF2(22265) },
- { AOM_CDF2(31452) } },
- { { AOM_CDF2(26584) },
- { AOM_CDF2(188) },
- { AOM_CDF2(8847) },
- { AOM_CDF2(24519) },
- { AOM_CDF2(22938) },
- { AOM_CDF2(30583) },
- { AOM_CDF2(32608) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) } } },
- { { { AOM_CDF2(29614) },
- { AOM_CDF2(9068) },
- { AOM_CDF2(12924) },
- { AOM_CDF2(19538) },
- { AOM_CDF2(17737) },
- { AOM_CDF2(24619) },
- { AOM_CDF2(30642) },
- { AOM_CDF2(4119) },
- { AOM_CDF2(16026) },
- { AOM_CDF2(25657) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) } },
- { { AOM_CDF2(31957) },
- { AOM_CDF2(3230) },
- { AOM_CDF2(11153) },
- { AOM_CDF2(18123) },
- { AOM_CDF2(20143) },
- { AOM_CDF2(26536) },
- { AOM_CDF2(31986) },
- { AOM_CDF2(3050) },
- { AOM_CDF2(14603) },
- { AOM_CDF2(25155) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) } },
- { { AOM_CDF2(32363) },
- { AOM_CDF2(10692) },
- { AOM_CDF2(19090) },
- { AOM_CDF2(24357) },
- { AOM_CDF2(24442) },
- { AOM_CDF2(28312) },
- { AOM_CDF2(32169) },
- { AOM_CDF2(3648) },
- { AOM_CDF2(15690) },
- { AOM_CDF2(26815) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) } },
- { { AOM_CDF2(30669) },
- { AOM_CDF2(3832) },
- { AOM_CDF2(11663) },
- { AOM_CDF2(18889) },
- { AOM_CDF2(19782) },
- { AOM_CDF2(23313) },
- { AOM_CDF2(31330) },
- { AOM_CDF2(5124) },
- { AOM_CDF2(18719) },
- { AOM_CDF2(28468) },
- { AOM_CDF2(3082) },
- { AOM_CDF2(20982) },
- { AOM_CDF2(29443) } },
- { { AOM_CDF2(28573) },
- { AOM_CDF2(3183) },
- { AOM_CDF2(17802) },
- { AOM_CDF2(25977) },
- { AOM_CDF2(26677) },
- { AOM_CDF2(27832) },
- { AOM_CDF2(32387) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) } } },
- { { { AOM_CDF2(26887) },
- { AOM_CDF2(6729) },
- { AOM_CDF2(10361) },
- { AOM_CDF2(17442) },
- { AOM_CDF2(15045) },
- { AOM_CDF2(22478) },
- { AOM_CDF2(29072) },
- { AOM_CDF2(2713) },
- { AOM_CDF2(11861) },
- { AOM_CDF2(20773) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) } },
- { { AOM_CDF2(31903) },
- { AOM_CDF2(2044) },
- { AOM_CDF2(7528) },
- { AOM_CDF2(14618) },
- { AOM_CDF2(16182) },
- { AOM_CDF2(24168) },
- { AOM_CDF2(31037) },
- { AOM_CDF2(2786) },
- { AOM_CDF2(11194) },
- { AOM_CDF2(20155) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) } },
- { { AOM_CDF2(32510) },
- { AOM_CDF2(8430) },
- { AOM_CDF2(17318) },
- { AOM_CDF2(24154) },
- { AOM_CDF2(23674) },
- { AOM_CDF2(28789) },
- { AOM_CDF2(32139) },
- { AOM_CDF2(3440) },
- { AOM_CDF2(13117) },
- { AOM_CDF2(22702) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) } },
- { { AOM_CDF2(31671) },
- { AOM_CDF2(2056) },
- { AOM_CDF2(11746) },
- { AOM_CDF2(16852) },
- { AOM_CDF2(18635) },
- { AOM_CDF2(24715) },
- { AOM_CDF2(31484) },
- { AOM_CDF2(4656) },
- { AOM_CDF2(16074) },
- { AOM_CDF2(24704) },
- { AOM_CDF2(1806) },
- { AOM_CDF2(14645) },
- { AOM_CDF2(25336) } },
- { { AOM_CDF2(31539) },
- { AOM_CDF2(8433) },
- { AOM_CDF2(20576) },
- { AOM_CDF2(27904) },
- { AOM_CDF2(27852) },
- { AOM_CDF2(30026) },
- { AOM_CDF2(32441) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) } } } };
-
-static const aom_cdf_prob
- av1_default_eob_extra_cdfs[TOKEN_CDF_Q_CTXS][TX_SIZES][PLANE_TYPES]
- [EOB_COEF_CONTEXTS][CDF_SIZE(2)] = {
- { { {
- { AOM_CDF2(16961) },
- { AOM_CDF2(17223) },
- { AOM_CDF2(7621) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- },
- {
- { AOM_CDF2(19069) },
- { AOM_CDF2(22525) },
- { AOM_CDF2(13377) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- } },
- { {
- { AOM_CDF2(20401) },
- { AOM_CDF2(17025) },
- { AOM_CDF2(12845) },
- { AOM_CDF2(12873) },
- { AOM_CDF2(14094) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- },
- {
- { AOM_CDF2(20681) },
- { AOM_CDF2(20701) },
- { AOM_CDF2(15250) },
- { AOM_CDF2(15017) },
- { AOM_CDF2(14928) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- } },
- { {
- { AOM_CDF2(23905) },
- { AOM_CDF2(17194) },
- { AOM_CDF2(16170) },
- { AOM_CDF2(17695) },
- { AOM_CDF2(13826) },
- { AOM_CDF2(15810) },
- { AOM_CDF2(12036) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- },
- {
- { AOM_CDF2(23959) },
- { AOM_CDF2(20799) },
- { AOM_CDF2(19021) },
- { AOM_CDF2(16203) },
- { AOM_CDF2(17886) },
- { AOM_CDF2(14144) },
- { AOM_CDF2(12010) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- } },
- { {
- { AOM_CDF2(27399) },
- { AOM_CDF2(16327) },
- { AOM_CDF2(18071) },
- { AOM_CDF2(19584) },
- { AOM_CDF2(20721) },
- { AOM_CDF2(18432) },
- { AOM_CDF2(19560) },
- { AOM_CDF2(10150) },
- { AOM_CDF2(8805) },
- },
- {
- { AOM_CDF2(24932) },
- { AOM_CDF2(20833) },
- { AOM_CDF2(12027) },
- { AOM_CDF2(16670) },
- { AOM_CDF2(19914) },
- { AOM_CDF2(15106) },
- { AOM_CDF2(17662) },
- { AOM_CDF2(13783) },
- { AOM_CDF2(28756) },
- } },
- { {
- { AOM_CDF2(23406) },
- { AOM_CDF2(21845) },
- { AOM_CDF2(18432) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(17096) },
- { AOM_CDF2(12561) },
- { AOM_CDF2(17320) },
- { AOM_CDF2(22395) },
- { AOM_CDF2(21370) },
- },
- {
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- } } },
- { { {
- { AOM_CDF2(17471) },
- { AOM_CDF2(20223) },
- { AOM_CDF2(11357) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- },
- {
- { AOM_CDF2(20335) },
- { AOM_CDF2(21667) },
- { AOM_CDF2(14818) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- } },
- { {
- { AOM_CDF2(20430) },
- { AOM_CDF2(20662) },
- { AOM_CDF2(15367) },
- { AOM_CDF2(16970) },
- { AOM_CDF2(14657) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- },
- {
- { AOM_CDF2(22117) },
- { AOM_CDF2(22028) },
- { AOM_CDF2(18650) },
- { AOM_CDF2(16042) },
- { AOM_CDF2(15885) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- } },
- { {
- { AOM_CDF2(22409) },
- { AOM_CDF2(21012) },
- { AOM_CDF2(15650) },
- { AOM_CDF2(17395) },
- { AOM_CDF2(15469) },
- { AOM_CDF2(20205) },
- { AOM_CDF2(19511) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- },
- {
- { AOM_CDF2(24220) },
- { AOM_CDF2(22480) },
- { AOM_CDF2(17737) },
- { AOM_CDF2(18916) },
- { AOM_CDF2(19268) },
- { AOM_CDF2(18412) },
- { AOM_CDF2(18844) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- } },
- { {
- { AOM_CDF2(25991) },
- { AOM_CDF2(20314) },
- { AOM_CDF2(17731) },
- { AOM_CDF2(19678) },
- { AOM_CDF2(18649) },
- { AOM_CDF2(17307) },
- { AOM_CDF2(21798) },
- { AOM_CDF2(17549) },
- { AOM_CDF2(15630) },
- },
- {
- { AOM_CDF2(26585) },
- { AOM_CDF2(21469) },
- { AOM_CDF2(20432) },
- { AOM_CDF2(17735) },
- { AOM_CDF2(19280) },
- { AOM_CDF2(15235) },
- { AOM_CDF2(20297) },
- { AOM_CDF2(22471) },
- { AOM_CDF2(28997) },
- } },
- { {
- { AOM_CDF2(26605) },
- { AOM_CDF2(11304) },
- { AOM_CDF2(16726) },
- { AOM_CDF2(16560) },
- { AOM_CDF2(20866) },
- { AOM_CDF2(23524) },
- { AOM_CDF2(19878) },
- { AOM_CDF2(13469) },
- { AOM_CDF2(23084) },
- },
- {
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- } } },
- { { {
- { AOM_CDF2(18983) },
- { AOM_CDF2(20512) },
- { AOM_CDF2(14885) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- },
- {
- { AOM_CDF2(20090) },
- { AOM_CDF2(19444) },
- { AOM_CDF2(17286) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- } },
- { {
- { AOM_CDF2(19139) },
- { AOM_CDF2(21487) },
- { AOM_CDF2(18959) },
- { AOM_CDF2(20910) },
- { AOM_CDF2(19089) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- },
- {
- { AOM_CDF2(20536) },
- { AOM_CDF2(20664) },
- { AOM_CDF2(20625) },
- { AOM_CDF2(19123) },
- { AOM_CDF2(14862) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- } },
- { {
- { AOM_CDF2(19833) },
- { AOM_CDF2(21502) },
- { AOM_CDF2(17485) },
- { AOM_CDF2(20267) },
- { AOM_CDF2(18353) },
- { AOM_CDF2(23329) },
- { AOM_CDF2(21478) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- },
- {
- { AOM_CDF2(22041) },
- { AOM_CDF2(23434) },
- { AOM_CDF2(20001) },
- { AOM_CDF2(20554) },
- { AOM_CDF2(20951) },
- { AOM_CDF2(20145) },
- { AOM_CDF2(15562) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- } },
- { {
- { AOM_CDF2(23312) },
- { AOM_CDF2(21607) },
- { AOM_CDF2(16526) },
- { AOM_CDF2(18957) },
- { AOM_CDF2(18034) },
- { AOM_CDF2(18934) },
- { AOM_CDF2(24247) },
- { AOM_CDF2(16921) },
- { AOM_CDF2(17080) },
- },
- {
- { AOM_CDF2(26579) },
- { AOM_CDF2(24910) },
- { AOM_CDF2(18637) },
- { AOM_CDF2(19800) },
- { AOM_CDF2(20388) },
- { AOM_CDF2(9887) },
- { AOM_CDF2(15642) },
- { AOM_CDF2(30198) },
- { AOM_CDF2(24721) },
- } },
- { {
- { AOM_CDF2(26998) },
- { AOM_CDF2(16737) },
- { AOM_CDF2(17838) },
- { AOM_CDF2(18922) },
- { AOM_CDF2(19515) },
- { AOM_CDF2(18636) },
- { AOM_CDF2(17333) },
- { AOM_CDF2(15776) },
- { AOM_CDF2(22658) },
- },
- {
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- } } },
- { { {
- { AOM_CDF2(20177) },
- { AOM_CDF2(20789) },
- { AOM_CDF2(20262) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- },
- {
- { AOM_CDF2(21416) },
- { AOM_CDF2(20855) },
- { AOM_CDF2(23410) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- } },
- { {
- { AOM_CDF2(20238) },
- { AOM_CDF2(21057) },
- { AOM_CDF2(19159) },
- { AOM_CDF2(22337) },
- { AOM_CDF2(20159) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- },
- {
- { AOM_CDF2(20125) },
- { AOM_CDF2(20559) },
- { AOM_CDF2(21707) },
- { AOM_CDF2(22296) },
- { AOM_CDF2(17333) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- } },
- { {
- { AOM_CDF2(19941) },
- { AOM_CDF2(20527) },
- { AOM_CDF2(21470) },
- { AOM_CDF2(22487) },
- { AOM_CDF2(19558) },
- { AOM_CDF2(22354) },
- { AOM_CDF2(20331) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- },
- {
- { AOM_CDF2(22752) },
- { AOM_CDF2(25006) },
- { AOM_CDF2(22075) },
- { AOM_CDF2(21576) },
- { AOM_CDF2(17740) },
- { AOM_CDF2(21690) },
- { AOM_CDF2(19211) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- } },
- { {
- { AOM_CDF2(21442) },
- { AOM_CDF2(22358) },
- { AOM_CDF2(18503) },
- { AOM_CDF2(20291) },
- { AOM_CDF2(19945) },
- { AOM_CDF2(21294) },
- { AOM_CDF2(21178) },
- { AOM_CDF2(19400) },
- { AOM_CDF2(10556) },
- },
- {
- { AOM_CDF2(24648) },
- { AOM_CDF2(24949) },
- { AOM_CDF2(20708) },
- { AOM_CDF2(23905) },
- { AOM_CDF2(20501) },
- { AOM_CDF2(9558) },
- { AOM_CDF2(9423) },
- { AOM_CDF2(30365) },
- { AOM_CDF2(19253) },
- } },
- { {
- { AOM_CDF2(26064) },
- { AOM_CDF2(22098) },
- { AOM_CDF2(19613) },
- { AOM_CDF2(20525) },
- { AOM_CDF2(17595) },
- { AOM_CDF2(16618) },
- { AOM_CDF2(20497) },
- { AOM_CDF2(18989) },
- { AOM_CDF2(15513) },
- },
- {
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- { AOM_CDF2(16384) },
- } } }
- };
-
-static const aom_cdf_prob
- av1_default_eob_multi16_cdfs[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][CDF_SIZE(
- 5)] = { { { { AOM_CDF5(840, 1039, 1980, 4895) },
- { AOM_CDF5(370, 671, 1883, 4471) } },
- { { AOM_CDF5(3247, 4950, 9688, 14563) },
- { AOM_CDF5(1904, 3354, 7763, 14647) } } },
- { { { AOM_CDF5(2125, 2551, 5165, 8946) },
- { AOM_CDF5(513, 765, 1859, 6339) } },
- { { AOM_CDF5(7637, 9498, 14259, 19108) },
- { AOM_CDF5(2497, 4096, 8866, 16993) } } },
- { { { AOM_CDF5(4016, 4897, 8881, 14968) },
- { AOM_CDF5(716, 1105, 2646, 10056) } },
- { { AOM_CDF5(11139, 13270, 18241, 23566) },
- { AOM_CDF5(3192, 5032, 10297, 19755) } } },
- { { { AOM_CDF5(6708, 8958, 14746, 22133) },
- { AOM_CDF5(1222, 2074, 4783, 15410) } },
- { { AOM_CDF5(19575, 21766, 26044, 29709) },
- { AOM_CDF5(7297, 10767, 19273, 28194) } } } };
-
-static const aom_cdf_prob
- av1_default_eob_multi32_cdfs[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][CDF_SIZE(
- 6)] = { { { { AOM_CDF6(400, 520, 977, 2102, 6542) },
- { AOM_CDF6(210, 405, 1315, 3326, 7537) } },
- { { AOM_CDF6(2636, 4273, 7588, 11794, 20401) },
- { AOM_CDF6(1786, 3179, 6902, 11357, 19054) } } },
- { { { AOM_CDF6(989, 1249, 2019, 4151, 10785) },
- { AOM_CDF6(313, 441, 1099, 2917, 8562) } },
- { { AOM_CDF6(8394, 10352, 13932, 18855, 26014) },
- { AOM_CDF6(2578, 4124, 8181, 13670, 24234) } } },
- { { { AOM_CDF6(2515, 3003, 4452, 8162, 16041) },
- { AOM_CDF6(574, 821, 1836, 5089, 13128) } },
- { { AOM_CDF6(13468, 16303, 20361, 25105, 29281) },
- { AOM_CDF6(3542, 5502, 10415, 16760, 25644) } } },
- { { { AOM_CDF6(4617, 5709, 8446, 13584, 23135) },
- { AOM_CDF6(1156, 1702, 3675, 9274, 20539) } },
- { { AOM_CDF6(22086, 24282, 27010, 29770, 31743) },
- { AOM_CDF6(7699, 10897, 20891, 26926, 31628) } } } };
-
-static const aom_cdf_prob
- av1_default_eob_multi64_cdfs[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][CDF_SIZE(
- 7)] = { { { { AOM_CDF7(329, 498, 1101, 1784, 3265, 7758) },
- { AOM_CDF7(335, 730, 1459, 5494, 8755, 12997) } },
- { { AOM_CDF7(3505, 5304, 10086, 13814, 17684, 23370) },
- { AOM_CDF7(1563, 2700, 4876, 10911, 14706, 22480) } } },
- { { { AOM_CDF7(1260, 1446, 2253, 3712, 6652, 13369) },
- { AOM_CDF7(401, 605, 1029, 2563, 5845, 12626) } },
- { { AOM_CDF7(8609, 10612, 14624, 18714, 22614, 29024) },
- { AOM_CDF7(1923, 3127, 5867, 9703, 14277, 27100) } } },
- { { { AOM_CDF7(2374, 2772, 4583, 7276, 12288, 19706) },
- { AOM_CDF7(497, 810, 1315, 3000, 7004, 15641) } },
- { { AOM_CDF7(15050, 17126, 21410, 24886, 28156, 30726) },
- { AOM_CDF7(4034, 6290, 10235, 14982, 21214, 28491) } } },
- { { { AOM_CDF7(6307, 7541, 12060, 16358, 22553, 27865) },
- { AOM_CDF7(1289, 2320, 3971, 7926, 14153, 24291) } },
- { { AOM_CDF7(24212, 25708, 28268, 30035, 31307, 32049) },
- { AOM_CDF7(8726, 12378, 19409, 26450, 30038, 32462) } } } };
-
-static const aom_cdf_prob
- av1_default_eob_multi128_cdfs[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][CDF_SIZE(
- 8)] = {
- { { { AOM_CDF8(219, 482, 1140, 2091, 3680, 6028, 12586) },
- { AOM_CDF8(371, 699, 1254, 4830, 9479, 12562, 17497) } },
- { { AOM_CDF8(5245, 7456, 12880, 15852, 20033, 23932, 27608) },
- { AOM_CDF8(2054, 3472, 5869, 14232, 18242, 20590, 26752) } } },
- { { { AOM_CDF8(685, 933, 1488, 2714, 4766, 8562, 19254) },
- { AOM_CDF8(217, 352, 618, 2303, 5261, 9969, 17472) } },
- { { AOM_CDF8(8045, 11200, 15497, 19595, 23948, 27408, 30938) },
- { AOM_CDF8(2310, 4160, 7471, 14997, 17931, 20768, 30240) } } },
- { { { AOM_CDF8(1366, 1738, 2527, 5016, 9355, 15797, 24643) },
- { AOM_CDF8(354, 558, 944, 2760, 7287, 14037, 21779) } },
- { { AOM_CDF8(13627, 16246, 20173, 24429, 27948, 30415, 31863) },
- { AOM_CDF8(6275, 9889, 14769, 23164, 27988, 30493, 32272) } } },
- { { { AOM_CDF8(3472, 4885, 7489, 12481, 18517, 24536, 29635) },
- { AOM_CDF8(886, 1731, 3271, 8469, 15569, 22126, 28383) } },
- { { AOM_CDF8(24313, 26062, 28385, 30107, 31217, 31898, 32345) },
- { AOM_CDF8(9165, 13282, 21150, 30286, 31894, 32571, 32712) } } }
- };
-
-static const aom_cdf_prob
- av1_default_eob_multi256_cdfs[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][CDF_SIZE(
- 9)] = {
- { { { AOM_CDF9(310, 584, 1887, 3589, 6168, 8611, 11352, 15652) },
- { AOM_CDF9(998, 1850, 2998, 5604, 17341, 19888, 22899, 25583) } },
- { { AOM_CDF9(2520, 3240, 5952, 8870, 12577, 17558, 19954, 24168) },
- { AOM_CDF9(2203, 4130, 7435, 10739, 20652, 23681, 25609, 27261) } } },
- { { { AOM_CDF9(1448, 2109, 4151, 6263, 9329, 13260, 17944, 23300) },
- { AOM_CDF9(399, 1019, 1749, 3038, 10444, 15546, 22739, 27294) } },
- { { AOM_CDF9(6402, 8148, 12623, 15072, 18728, 22847, 26447, 29377) },
- { AOM_CDF9(1674, 3252, 5734, 10159, 22397, 23802, 24821, 30940) } } },
- { { { AOM_CDF9(3089, 3920, 6038, 9460, 14266, 19881, 25766, 29176) },
- { AOM_CDF9(1084, 2358, 3488, 5122, 11483, 18103, 26023, 29799) } },
- { { AOM_CDF9(11514, 13794, 17480, 20754, 24361, 27378, 29492, 31277) },
- { AOM_CDF9(6571, 9610, 15516, 21826, 29092, 30829, 31842,
- 32708) } } },
- { { { AOM_CDF9(5348, 7113, 11820, 15924, 22106, 26777, 30334, 31757) },
- { AOM_CDF9(2453, 4474, 6307, 8777, 16474, 22975, 29000, 31547) } },
- { { AOM_CDF9(23110, 24597, 27140, 28894, 30167, 30927, 31392, 32094) },
- { AOM_CDF9(9998, 17661, 25178, 28097, 31308, 32038, 32403,
- 32695) } } }
- };
-
-static const aom_cdf_prob
- av1_default_eob_multi512_cdfs[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][CDF_SIZE(
- 10)] = { { { { AOM_CDF10(641, 983, 3707, 5430, 10234, 14958, 18788,
- 23412, 26061) },
- { AOM_CDF10(3277, 6554, 9830, 13107, 16384, 19661, 22938,
- 26214, 29491) } },
- { { AOM_CDF10(5095, 6446, 9996, 13354, 16017, 17986, 20919,
- 26129, 29140) },
- { AOM_CDF10(3277, 6554, 9830, 13107, 16384, 19661, 22938,
- 26214, 29491) } } },
- { { { AOM_CDF10(1230, 2278, 5035, 7776, 11871, 15346, 19590,
- 24584, 28749) },
- { AOM_CDF10(3277, 6554, 9830, 13107, 16384, 19661, 22938,
- 26214, 29491) } },
- { { AOM_CDF10(7265, 9979, 15819, 19250, 21780, 23846, 26478,
- 28396, 31811) },
- { AOM_CDF10(3277, 6554, 9830, 13107, 16384, 19661, 22938,
- 26214, 29491) } } },
- { { { AOM_CDF10(2624, 3936, 6480, 9686, 13979, 17726, 23267,
- 28410, 31078) },
- { AOM_CDF10(3277, 6554, 9830, 13107, 16384, 19661, 22938,
- 26214, 29491) } },
- { { AOM_CDF10(12015, 14769, 19588, 22052, 24222, 25812,
- 27300, 29219, 32114) },
- { AOM_CDF10(3277, 6554, 9830, 13107, 16384, 19661, 22938,
- 26214, 29491) } } },
- { { { AOM_CDF10(5927, 7809, 10923, 14597, 19439, 24135, 28456,
- 31142, 32060) },
- { AOM_CDF10(3277, 6554, 9830, 13107, 16384, 19661, 22938,
- 26214, 29491) } },
- { { AOM_CDF10(21093, 23043, 25742, 27658, 29097, 29716,
- 30073, 30820, 31956) },
- { AOM_CDF10(3277, 6554, 9830, 13107, 16384, 19661, 22938,
- 26214, 29491) } } } };
-
-static const aom_cdf_prob
- av1_default_eob_multi1024_cdfs[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][CDF_SIZE(
- 11)] = { { { { AOM_CDF11(393, 421, 751, 1623, 3160, 6352, 13345, 18047,
- 22571, 25830) },
- { AOM_CDF11(2979, 5958, 8937, 11916, 14895, 17873, 20852,
- 23831, 26810, 29789) } },
- { { AOM_CDF11(1865, 1988, 2930, 4242, 10533, 16538, 21354,
- 27255, 28546, 31784) },
- { AOM_CDF11(2979, 5958, 8937, 11916, 14895, 17873, 20852,
- 23831, 26810, 29789) } } },
- { { { AOM_CDF11(696, 948, 3145, 5702, 9706, 13217, 17851,
- 21856, 25692, 28034) },
- { AOM_CDF11(2979, 5958, 8937, 11916, 14895, 17873, 20852,
- 23831, 26810, 29789) } },
- { { AOM_CDF11(2672, 3591, 9330, 17084, 22725, 24284, 26527,
- 28027, 28377, 30876) },
- { AOM_CDF11(2979, 5958, 8937, 11916, 14895, 17873, 20852,
- 23831, 26810, 29789) } } },
- { { { AOM_CDF11(2784, 3831, 7041, 10521, 14847, 18844, 23155,
- 26682, 29229, 31045) },
- { AOM_CDF11(2979, 5958, 8937, 11916, 14895, 17873, 20852,
- 23831, 26810, 29789) } },
- { { AOM_CDF11(9577, 12466, 17739, 20750, 22061, 23215, 24601,
- 25483, 25843, 32056) },
- { AOM_CDF11(2979, 5958, 8937, 11916, 14895, 17873, 20852,
- 23831, 26810, 29789) } } },
- { { { AOM_CDF11(6698, 8334, 11961, 15762, 20186, 23862, 27434,
- 29326, 31082, 32050) },
- { AOM_CDF11(2979, 5958, 8937, 11916, 14895, 17873, 20852,
- 23831, 26810, 29789) } },
- { { AOM_CDF11(20569, 22426, 25569, 26859, 28053, 28913,
- 29486, 29724, 29807, 32570) },
- { AOM_CDF11(2979, 5958, 8937, 11916, 14895, 17873, 20852,
- 23831, 26810, 29789) } } } };
-
-static const aom_cdf_prob av1_default_coeff_lps_multi_cdfs
- [TOKEN_CDF_Q_CTXS][TX_SIZES][PLANE_TYPES][LEVEL_CONTEXTS]
- [CDF_SIZE(BR_CDF_SIZE)] = {
- { { { { AOM_CDF4(14298, 20718, 24174) },
- { AOM_CDF4(12536, 19601, 23789) },
- { AOM_CDF4(8712, 15051, 19503) },
- { AOM_CDF4(6170, 11327, 15434) },
- { AOM_CDF4(4742, 8926, 12538) },
- { AOM_CDF4(3803, 7317, 10546) },
- { AOM_CDF4(1696, 3317, 4871) },
- { AOM_CDF4(14392, 19951, 22756) },
- { AOM_CDF4(15978, 23218, 26818) },
- { AOM_CDF4(12187, 19474, 23889) },
- { AOM_CDF4(9176, 15640, 20259) },
- { AOM_CDF4(7068, 12655, 17028) },
- { AOM_CDF4(5656, 10442, 14472) },
- { AOM_CDF4(2580, 4992, 7244) },
- { AOM_CDF4(12136, 18049, 21426) },
- { AOM_CDF4(13784, 20721, 24481) },
- { AOM_CDF4(10836, 17621, 21900) },
- { AOM_CDF4(8372, 14444, 18847) },
- { AOM_CDF4(6523, 11779, 16000) },
- { AOM_CDF4(5337, 9898, 13760) },
- { AOM_CDF4(3034, 5860, 8462) } },
- { { AOM_CDF4(15967, 22905, 26286) },
- { AOM_CDF4(13534, 20654, 24579) },
- { AOM_CDF4(9504, 16092, 20535) },
- { AOM_CDF4(6975, 12568, 16903) },
- { AOM_CDF4(5364, 10091, 14020) },
- { AOM_CDF4(4357, 8370, 11857) },
- { AOM_CDF4(2506, 4934, 7218) },
- { AOM_CDF4(23032, 28815, 30936) },
- { AOM_CDF4(19540, 26704, 29719) },
- { AOM_CDF4(15158, 22969, 27097) },
- { AOM_CDF4(11408, 18865, 23650) },
- { AOM_CDF4(8885, 15448, 20250) },
- { AOM_CDF4(7108, 12853, 17416) },
- { AOM_CDF4(4231, 8041, 11480) },
- { AOM_CDF4(19823, 26490, 29156) },
- { AOM_CDF4(18890, 25929, 28932) },
- { AOM_CDF4(15660, 23491, 27433) },
- { AOM_CDF4(12147, 19776, 24488) },
- { AOM_CDF4(9728, 16774, 21649) },
- { AOM_CDF4(7919, 14277, 19066) },
- { AOM_CDF4(5440, 10170, 14185) } } },
- { { { AOM_CDF4(14406, 20862, 24414) },
- { AOM_CDF4(11824, 18907, 23109) },
- { AOM_CDF4(8257, 14393, 18803) },
- { AOM_CDF4(5860, 10747, 14778) },
- { AOM_CDF4(4475, 8486, 11984) },
- { AOM_CDF4(3606, 6954, 10043) },
- { AOM_CDF4(1736, 3410, 5048) },
- { AOM_CDF4(14430, 20046, 22882) },
- { AOM_CDF4(15593, 22899, 26709) },
- { AOM_CDF4(12102, 19368, 23811) },
- { AOM_CDF4(9059, 15584, 20262) },
- { AOM_CDF4(6999, 12603, 17048) },
- { AOM_CDF4(5684, 10497, 14553) },
- { AOM_CDF4(2822, 5438, 7862) },
- { AOM_CDF4(15785, 21585, 24359) },
- { AOM_CDF4(18347, 25229, 28266) },
- { AOM_CDF4(14974, 22487, 26389) },
- { AOM_CDF4(11423, 18681, 23271) },
- { AOM_CDF4(8863, 15350, 20008) },
- { AOM_CDF4(7153, 12852, 17278) },
- { AOM_CDF4(3707, 7036, 9982) } },
- { { AOM_CDF4(15460, 21696, 25469) },
- { AOM_CDF4(12170, 19249, 23191) },
- { AOM_CDF4(8723, 15027, 19332) },
- { AOM_CDF4(6428, 11704, 15874) },
- { AOM_CDF4(4922, 9292, 13052) },
- { AOM_CDF4(4139, 7695, 11010) },
- { AOM_CDF4(2291, 4508, 6598) },
- { AOM_CDF4(19856, 26920, 29828) },
- { AOM_CDF4(17923, 25289, 28792) },
- { AOM_CDF4(14278, 21968, 26297) },
- { AOM_CDF4(10910, 18136, 22950) },
- { AOM_CDF4(8423, 14815, 19627) },
- { AOM_CDF4(6771, 12283, 16774) },
- { AOM_CDF4(4074, 7750, 11081) },
- { AOM_CDF4(19852, 26074, 28672) },
- { AOM_CDF4(19371, 26110, 28989) },
- { AOM_CDF4(16265, 23873, 27663) },
- { AOM_CDF4(12758, 20378, 24952) },
- { AOM_CDF4(10095, 17098, 21961) },
- { AOM_CDF4(8250, 14628, 19451) },
- { AOM_CDF4(5205, 9745, 13622) } } },
- { { { AOM_CDF4(10563, 16233, 19763) },
- { AOM_CDF4(9794, 16022, 19804) },
- { AOM_CDF4(6750, 11945, 15759) },
- { AOM_CDF4(4963, 9186, 12752) },
- { AOM_CDF4(3845, 7435, 10627) },
- { AOM_CDF4(3051, 6085, 8834) },
- { AOM_CDF4(1311, 2596, 3830) },
- { AOM_CDF4(11246, 16404, 19689) },
- { AOM_CDF4(12315, 18911, 22731) },
- { AOM_CDF4(10557, 17095, 21289) },
- { AOM_CDF4(8136, 14006, 18249) },
- { AOM_CDF4(6348, 11474, 15565) },
- { AOM_CDF4(5196, 9655, 13400) },
- { AOM_CDF4(2349, 4526, 6587) },
- { AOM_CDF4(13337, 18730, 21569) },
- { AOM_CDF4(19306, 26071, 28882) },
- { AOM_CDF4(15952, 23540, 27254) },
- { AOM_CDF4(12409, 19934, 24430) },
- { AOM_CDF4(9760, 16706, 21389) },
- { AOM_CDF4(8004, 14220, 18818) },
- { AOM_CDF4(4138, 7794, 10961) } },
- { { AOM_CDF4(10870, 16684, 20949) },
- { AOM_CDF4(9664, 15230, 18680) },
- { AOM_CDF4(6886, 12109, 15408) },
- { AOM_CDF4(4825, 8900, 12305) },
- { AOM_CDF4(3630, 7162, 10314) },
- { AOM_CDF4(3036, 6429, 9387) },
- { AOM_CDF4(1671, 3296, 4940) },
- { AOM_CDF4(13819, 19159, 23026) },
- { AOM_CDF4(11984, 19108, 23120) },
- { AOM_CDF4(10690, 17210, 21663) },
- { AOM_CDF4(7984, 14154, 18333) },
- { AOM_CDF4(6868, 12294, 16124) },
- { AOM_CDF4(5274, 8994, 12868) },
- { AOM_CDF4(2988, 5771, 8424) },
- { AOM_CDF4(19736, 26647, 29141) },
- { AOM_CDF4(18933, 26070, 28984) },
- { AOM_CDF4(15779, 23048, 27200) },
- { AOM_CDF4(12638, 20061, 24532) },
- { AOM_CDF4(10692, 17545, 22220) },
- { AOM_CDF4(9217, 15251, 20054) },
- { AOM_CDF4(5078, 9284, 12594) } } },
- { { { AOM_CDF4(2331, 3662, 5244) },
- { AOM_CDF4(2891, 4771, 6145) },
- { AOM_CDF4(4598, 7623, 9729) },
- { AOM_CDF4(3520, 6845, 9199) },
- { AOM_CDF4(3417, 6119, 9324) },
- { AOM_CDF4(2601, 5412, 7385) },
- { AOM_CDF4(600, 1173, 1744) },
- { AOM_CDF4(7672, 13286, 17469) },
- { AOM_CDF4(4232, 7792, 10793) },
- { AOM_CDF4(2915, 5317, 7397) },
- { AOM_CDF4(2318, 4356, 6152) },
- { AOM_CDF4(2127, 4000, 5554) },
- { AOM_CDF4(1850, 3478, 5275) },
- { AOM_CDF4(977, 1933, 2843) },
- { AOM_CDF4(18280, 24387, 27989) },
- { AOM_CDF4(15852, 22671, 26185) },
- { AOM_CDF4(13845, 20951, 24789) },
- { AOM_CDF4(11055, 17966, 22129) },
- { AOM_CDF4(9138, 15422, 19801) },
- { AOM_CDF4(7454, 13145, 17456) },
- { AOM_CDF4(3370, 6393, 9013) } },
- { { AOM_CDF4(5842, 9229, 10838) },
- { AOM_CDF4(2313, 3491, 4276) },
- { AOM_CDF4(2998, 6104, 7496) },
- { AOM_CDF4(2420, 7447, 9868) },
- { AOM_CDF4(3034, 8495, 10923) },
- { AOM_CDF4(4076, 8937, 10975) },
- { AOM_CDF4(1086, 2370, 3299) },
- { AOM_CDF4(9714, 17254, 20444) },
- { AOM_CDF4(8543, 13698, 17123) },
- { AOM_CDF4(4918, 9007, 11910) },
- { AOM_CDF4(4129, 7532, 10553) },
- { AOM_CDF4(2364, 5533, 8058) },
- { AOM_CDF4(1834, 3546, 5563) },
- { AOM_CDF4(1473, 2908, 4133) },
- { AOM_CDF4(15405, 21193, 25619) },
- { AOM_CDF4(15691, 21952, 26561) },
- { AOM_CDF4(12962, 19194, 24165) },
- { AOM_CDF4(10272, 17855, 22129) },
- { AOM_CDF4(8588, 15270, 20718) },
- { AOM_CDF4(8682, 14669, 19500) },
- { AOM_CDF4(4870, 9636, 13205) } } },
- { { { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) } },
- { { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) } } } },
- { { { { AOM_CDF4(14995, 21341, 24749) },
- { AOM_CDF4(13158, 20289, 24601) },
- { AOM_CDF4(8941, 15326, 19876) },
- { AOM_CDF4(6297, 11541, 15807) },
- { AOM_CDF4(4817, 9029, 12776) },
- { AOM_CDF4(3731, 7273, 10627) },
- { AOM_CDF4(1847, 3617, 5354) },
- { AOM_CDF4(14472, 19659, 22343) },
- { AOM_CDF4(16806, 24162, 27533) },
- { AOM_CDF4(12900, 20404, 24713) },
- { AOM_CDF4(9411, 16112, 20797) },
- { AOM_CDF4(7056, 12697, 17148) },
- { AOM_CDF4(5544, 10339, 14460) },
- { AOM_CDF4(2954, 5704, 8319) },
- { AOM_CDF4(12464, 18071, 21354) },
- { AOM_CDF4(15482, 22528, 26034) },
- { AOM_CDF4(12070, 19269, 23624) },
- { AOM_CDF4(8953, 15406, 20106) },
- { AOM_CDF4(7027, 12730, 17220) },
- { AOM_CDF4(5887, 10913, 15140) },
- { AOM_CDF4(3793, 7278, 10447) } },
- { { AOM_CDF4(15571, 22232, 25749) },
- { AOM_CDF4(14506, 21575, 25374) },
- { AOM_CDF4(10189, 17089, 21569) },
- { AOM_CDF4(7316, 13301, 17915) },
- { AOM_CDF4(5783, 10912, 15190) },
- { AOM_CDF4(4760, 9155, 13088) },
- { AOM_CDF4(2993, 5966, 8774) },
- { AOM_CDF4(23424, 28903, 30778) },
- { AOM_CDF4(20775, 27666, 30290) },
- { AOM_CDF4(16474, 24410, 28299) },
- { AOM_CDF4(12471, 20180, 24987) },
- { AOM_CDF4(9410, 16487, 21439) },
- { AOM_CDF4(7536, 13614, 18529) },
- { AOM_CDF4(5048, 9586, 13549) },
- { AOM_CDF4(21090, 27290, 29756) },
- { AOM_CDF4(20796, 27402, 30026) },
- { AOM_CDF4(17819, 25485, 28969) },
- { AOM_CDF4(13860, 21909, 26462) },
- { AOM_CDF4(11002, 18494, 23529) },
- { AOM_CDF4(8953, 15929, 20897) },
- { AOM_CDF4(6448, 11918, 16454) } } },
- { { { AOM_CDF4(15999, 22208, 25449) },
- { AOM_CDF4(13050, 19988, 24122) },
- { AOM_CDF4(8594, 14864, 19378) },
- { AOM_CDF4(6033, 11079, 15238) },
- { AOM_CDF4(4554, 8683, 12347) },
- { AOM_CDF4(3672, 7139, 10337) },
- { AOM_CDF4(1900, 3771, 5576) },
- { AOM_CDF4(15788, 21340, 23949) },
- { AOM_CDF4(16825, 24235, 27758) },
- { AOM_CDF4(12873, 20402, 24810) },
- { AOM_CDF4(9590, 16363, 21094) },
- { AOM_CDF4(7352, 13209, 17733) },
- { AOM_CDF4(5960, 10989, 15184) },
- { AOM_CDF4(3232, 6234, 9007) },
- { AOM_CDF4(15761, 20716, 23224) },
- { AOM_CDF4(19318, 25989, 28759) },
- { AOM_CDF4(15529, 23094, 26929) },
- { AOM_CDF4(11662, 18989, 23641) },
- { AOM_CDF4(8955, 15568, 20366) },
- { AOM_CDF4(7281, 13106, 17708) },
- { AOM_CDF4(4248, 8059, 11440) } },
- { { AOM_CDF4(14899, 21217, 24503) },
- { AOM_CDF4(13519, 20283, 24047) },
- { AOM_CDF4(9429, 15966, 20365) },
- { AOM_CDF4(6700, 12355, 16652) },
- { AOM_CDF4(5088, 9704, 13716) },
- { AOM_CDF4(4243, 8154, 11731) },
- { AOM_CDF4(2702, 5364, 7861) },
- { AOM_CDF4(22745, 28388, 30454) },
- { AOM_CDF4(20235, 27146, 29922) },
- { AOM_CDF4(15896, 23715, 27637) },
- { AOM_CDF4(11840, 19350, 24131) },
- { AOM_CDF4(9122, 15932, 20880) },
- { AOM_CDF4(7488, 13581, 18362) },
- { AOM_CDF4(5114, 9568, 13370) },
- { AOM_CDF4(20845, 26553, 28932) },
- { AOM_CDF4(20981, 27372, 29884) },
- { AOM_CDF4(17781, 25335, 28785) },
- { AOM_CDF4(13760, 21708, 26297) },
- { AOM_CDF4(10975, 18415, 23365) },
- { AOM_CDF4(9045, 15789, 20686) },
- { AOM_CDF4(6130, 11199, 15423) } } },
- { { { AOM_CDF4(13549, 19724, 23158) },
- { AOM_CDF4(11844, 18382, 22246) },
- { AOM_CDF4(7919, 13619, 17773) },
- { AOM_CDF4(5486, 10143, 13946) },
- { AOM_CDF4(4166, 7983, 11324) },
- { AOM_CDF4(3364, 6506, 9427) },
- { AOM_CDF4(1598, 3160, 4674) },
- { AOM_CDF4(15281, 20979, 23781) },
- { AOM_CDF4(14939, 22119, 25952) },
- { AOM_CDF4(11363, 18407, 22812) },
- { AOM_CDF4(8609, 14857, 19370) },
- { AOM_CDF4(6737, 12184, 16480) },
- { AOM_CDF4(5506, 10263, 14262) },
- { AOM_CDF4(2990, 5786, 8380) },
- { AOM_CDF4(20249, 25253, 27417) },
- { AOM_CDF4(21070, 27518, 30001) },
- { AOM_CDF4(16854, 24469, 28074) },
- { AOM_CDF4(12864, 20486, 25000) },
- { AOM_CDF4(9962, 16978, 21778) },
- { AOM_CDF4(8074, 14338, 19048) },
- { AOM_CDF4(4494, 8479, 11906) } },
- { { AOM_CDF4(13960, 19617, 22829) },
- { AOM_CDF4(11150, 17341, 21228) },
- { AOM_CDF4(7150, 12964, 17190) },
- { AOM_CDF4(5331, 10002, 13867) },
- { AOM_CDF4(4167, 7744, 11057) },
- { AOM_CDF4(3480, 6629, 9646) },
- { AOM_CDF4(1883, 3784, 5686) },
- { AOM_CDF4(18752, 25660, 28912) },
- { AOM_CDF4(16968, 24586, 28030) },
- { AOM_CDF4(13520, 21055, 25313) },
- { AOM_CDF4(10453, 17626, 22280) },
- { AOM_CDF4(8386, 14505, 19116) },
- { AOM_CDF4(6742, 12595, 17008) },
- { AOM_CDF4(4273, 8140, 11499) },
- { AOM_CDF4(22120, 27827, 30233) },
- { AOM_CDF4(20563, 27358, 29895) },
- { AOM_CDF4(17076, 24644, 28153) },
- { AOM_CDF4(13362, 20942, 25309) },
- { AOM_CDF4(10794, 17965, 22695) },
- { AOM_CDF4(9014, 15652, 20319) },
- { AOM_CDF4(5708, 10512, 14497) } } },
- { { { AOM_CDF4(5705, 10930, 15725) },
- { AOM_CDF4(7946, 12765, 16115) },
- { AOM_CDF4(6801, 12123, 16226) },
- { AOM_CDF4(5462, 10135, 14200) },
- { AOM_CDF4(4189, 8011, 11507) },
- { AOM_CDF4(3191, 6229, 9408) },
- { AOM_CDF4(1057, 2137, 3212) },
- { AOM_CDF4(10018, 17067, 21491) },
- { AOM_CDF4(7380, 12582, 16453) },
- { AOM_CDF4(6068, 10845, 14339) },
- { AOM_CDF4(5098, 9198, 12555) },
- { AOM_CDF4(4312, 8010, 11119) },
- { AOM_CDF4(3700, 6966, 9781) },
- { AOM_CDF4(1693, 3326, 4887) },
- { AOM_CDF4(18757, 24930, 27774) },
- { AOM_CDF4(17648, 24596, 27817) },
- { AOM_CDF4(14707, 22052, 26026) },
- { AOM_CDF4(11720, 18852, 23292) },
- { AOM_CDF4(9357, 15952, 20525) },
- { AOM_CDF4(7810, 13753, 18210) },
- { AOM_CDF4(3879, 7333, 10328) } },
- { { AOM_CDF4(8278, 13242, 15922) },
- { AOM_CDF4(10547, 15867, 18919) },
- { AOM_CDF4(9106, 15842, 20609) },
- { AOM_CDF4(6833, 13007, 17218) },
- { AOM_CDF4(4811, 9712, 13923) },
- { AOM_CDF4(3985, 7352, 11128) },
- { AOM_CDF4(1688, 3458, 5262) },
- { AOM_CDF4(12951, 21861, 26510) },
- { AOM_CDF4(9788, 16044, 20276) },
- { AOM_CDF4(6309, 11244, 14870) },
- { AOM_CDF4(5183, 9349, 12566) },
- { AOM_CDF4(4389, 8229, 11492) },
- { AOM_CDF4(3633, 6945, 10620) },
- { AOM_CDF4(3600, 6847, 9907) },
- { AOM_CDF4(21748, 28137, 30255) },
- { AOM_CDF4(19436, 26581, 29560) },
- { AOM_CDF4(16359, 24201, 27953) },
- { AOM_CDF4(13961, 21693, 25871) },
- { AOM_CDF4(11544, 18686, 23322) },
- { AOM_CDF4(9372, 16462, 20952) },
- { AOM_CDF4(6138, 11210, 15390) } } },
- { { { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) } },
- { { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) } } } },
- { { { { AOM_CDF4(16138, 22223, 25509) },
- { AOM_CDF4(15347, 22430, 26332) },
- { AOM_CDF4(9614, 16736, 21332) },
- { AOM_CDF4(6600, 12275, 16907) },
- { AOM_CDF4(4811, 9424, 13547) },
- { AOM_CDF4(3748, 7809, 11420) },
- { AOM_CDF4(2254, 4587, 6890) },
- { AOM_CDF4(15196, 20284, 23177) },
- { AOM_CDF4(18317, 25469, 28451) },
- { AOM_CDF4(13918, 21651, 25842) },
- { AOM_CDF4(10052, 17150, 21995) },
- { AOM_CDF4(7499, 13630, 18587) },
- { AOM_CDF4(6158, 11417, 16003) },
- { AOM_CDF4(4014, 7785, 11252) },
- { AOM_CDF4(15048, 21067, 24384) },
- { AOM_CDF4(18202, 25346, 28553) },
- { AOM_CDF4(14302, 22019, 26356) },
- { AOM_CDF4(10839, 18139, 23166) },
- { AOM_CDF4(8715, 15744, 20806) },
- { AOM_CDF4(7536, 13576, 18544) },
- { AOM_CDF4(5413, 10335, 14498) } },
- { { AOM_CDF4(17394, 24501, 27895) },
- { AOM_CDF4(15889, 23420, 27185) },
- { AOM_CDF4(11561, 19133, 23870) },
- { AOM_CDF4(8285, 14812, 19844) },
- { AOM_CDF4(6496, 12043, 16550) },
- { AOM_CDF4(4771, 9574, 13677) },
- { AOM_CDF4(3603, 6830, 10144) },
- { AOM_CDF4(21656, 27704, 30200) },
- { AOM_CDF4(21324, 27915, 30511) },
- { AOM_CDF4(17327, 25336, 28997) },
- { AOM_CDF4(13417, 21381, 26033) },
- { AOM_CDF4(10132, 17425, 22338) },
- { AOM_CDF4(8580, 15016, 19633) },
- { AOM_CDF4(5694, 11477, 16411) },
- { AOM_CDF4(24116, 29780, 31450) },
- { AOM_CDF4(23853, 29695, 31591) },
- { AOM_CDF4(20085, 27614, 30428) },
- { AOM_CDF4(15326, 24335, 28575) },
- { AOM_CDF4(11814, 19472, 24810) },
- { AOM_CDF4(10221, 18611, 24767) },
- { AOM_CDF4(7689, 14558, 20321) } } },
- { { { AOM_CDF4(16214, 22380, 25770) },
- { AOM_CDF4(14213, 21304, 25295) },
- { AOM_CDF4(9213, 15823, 20455) },
- { AOM_CDF4(6395, 11758, 16139) },
- { AOM_CDF4(4779, 9187, 13066) },
- { AOM_CDF4(3821, 7501, 10953) },
- { AOM_CDF4(2293, 4567, 6795) },
- { AOM_CDF4(15859, 21283, 23820) },
- { AOM_CDF4(18404, 25602, 28726) },
- { AOM_CDF4(14325, 21980, 26206) },
- { AOM_CDF4(10669, 17937, 22720) },
- { AOM_CDF4(8297, 14642, 19447) },
- { AOM_CDF4(6746, 12389, 16893) },
- { AOM_CDF4(4324, 8251, 11770) },
- { AOM_CDF4(16532, 21631, 24475) },
- { AOM_CDF4(20667, 27150, 29668) },
- { AOM_CDF4(16728, 24510, 28175) },
- { AOM_CDF4(12861, 20645, 25332) },
- { AOM_CDF4(10076, 17361, 22417) },
- { AOM_CDF4(8395, 14940, 19963) },
- { AOM_CDF4(5731, 10683, 14912) } },
- { { AOM_CDF4(14433, 21155, 24938) },
- { AOM_CDF4(14658, 21716, 25545) },
- { AOM_CDF4(9923, 16824, 21557) },
- { AOM_CDF4(6982, 13052, 17721) },
- { AOM_CDF4(5419, 10503, 15050) },
- { AOM_CDF4(4852, 9162, 13014) },
- { AOM_CDF4(3271, 6395, 9630) },
- { AOM_CDF4(22210, 27833, 30109) },
- { AOM_CDF4(20750, 27368, 29821) },
- { AOM_CDF4(16894, 24828, 28573) },
- { AOM_CDF4(13247, 21276, 25757) },
- { AOM_CDF4(10038, 17265, 22563) },
- { AOM_CDF4(8587, 14947, 20327) },
- { AOM_CDF4(5645, 11371, 15252) },
- { AOM_CDF4(22027, 27526, 29714) },
- { AOM_CDF4(23098, 29146, 31221) },
- { AOM_CDF4(19886, 27341, 30272) },
- { AOM_CDF4(15609, 23747, 28046) },
- { AOM_CDF4(11993, 20065, 24939) },
- { AOM_CDF4(9637, 18267, 23671) },
- { AOM_CDF4(7625, 13801, 19144) } } },
- { { { AOM_CDF4(14438, 20798, 24089) },
- { AOM_CDF4(12621, 19203, 23097) },
- { AOM_CDF4(8177, 14125, 18402) },
- { AOM_CDF4(5674, 10501, 14456) },
- { AOM_CDF4(4236, 8239, 11733) },
- { AOM_CDF4(3447, 6750, 9806) },
- { AOM_CDF4(1986, 3950, 5864) },
- { AOM_CDF4(16208, 22099, 24930) },
- { AOM_CDF4(16537, 24025, 27585) },
- { AOM_CDF4(12780, 20381, 24867) },
- { AOM_CDF4(9767, 16612, 21416) },
- { AOM_CDF4(7686, 13738, 18398) },
- { AOM_CDF4(6333, 11614, 15964) },
- { AOM_CDF4(3941, 7571, 10836) },
- { AOM_CDF4(22819, 27422, 29202) },
- { AOM_CDF4(22224, 28514, 30721) },
- { AOM_CDF4(17660, 25433, 28913) },
- { AOM_CDF4(13574, 21482, 26002) },
- { AOM_CDF4(10629, 17977, 22938) },
- { AOM_CDF4(8612, 15298, 20265) },
- { AOM_CDF4(5607, 10491, 14596) } },
- { { AOM_CDF4(13569, 19800, 23206) },
- { AOM_CDF4(13128, 19924, 23869) },
- { AOM_CDF4(8329, 14841, 19403) },
- { AOM_CDF4(6130, 10976, 15057) },
- { AOM_CDF4(4682, 8839, 12518) },
- { AOM_CDF4(3656, 7409, 10588) },
- { AOM_CDF4(2577, 5099, 7412) },
- { AOM_CDF4(22427, 28684, 30585) },
- { AOM_CDF4(20913, 27750, 30139) },
- { AOM_CDF4(15840, 24109, 27834) },
- { AOM_CDF4(12308, 20029, 24569) },
- { AOM_CDF4(10216, 16785, 21458) },
- { AOM_CDF4(8309, 14203, 19113) },
- { AOM_CDF4(6043, 11168, 15307) },
- { AOM_CDF4(23166, 28901, 30998) },
- { AOM_CDF4(21899, 28405, 30751) },
- { AOM_CDF4(18413, 26091, 29443) },
- { AOM_CDF4(15233, 23114, 27352) },
- { AOM_CDF4(12683, 20472, 25288) },
- { AOM_CDF4(10702, 18259, 23409) },
- { AOM_CDF4(8125, 14464, 19226) } } },
- { { { AOM_CDF4(9040, 14786, 18360) },
- { AOM_CDF4(9979, 15718, 19415) },
- { AOM_CDF4(7913, 13918, 18311) },
- { AOM_CDF4(5859, 10889, 15184) },
- { AOM_CDF4(4593, 8677, 12510) },
- { AOM_CDF4(3820, 7396, 10791) },
- { AOM_CDF4(1730, 3471, 5192) },
- { AOM_CDF4(11803, 18365, 22709) },
- { AOM_CDF4(11419, 18058, 22225) },
- { AOM_CDF4(9418, 15774, 20243) },
- { AOM_CDF4(7539, 13325, 17657) },
- { AOM_CDF4(6233, 11317, 15384) },
- { AOM_CDF4(5137, 9656, 13545) },
- { AOM_CDF4(2977, 5774, 8349) },
- { AOM_CDF4(21207, 27246, 29640) },
- { AOM_CDF4(19547, 26578, 29497) },
- { AOM_CDF4(16169, 23871, 27690) },
- { AOM_CDF4(12820, 20458, 25018) },
- { AOM_CDF4(10224, 17332, 22214) },
- { AOM_CDF4(8526, 15048, 19884) },
- { AOM_CDF4(5037, 9410, 13118) } },
- { { AOM_CDF4(12339, 17329, 20140) },
- { AOM_CDF4(13505, 19895, 23225) },
- { AOM_CDF4(9847, 16944, 21564) },
- { AOM_CDF4(7280, 13256, 18348) },
- { AOM_CDF4(4712, 10009, 14454) },
- { AOM_CDF4(4361, 7914, 12477) },
- { AOM_CDF4(2870, 5628, 7995) },
- { AOM_CDF4(20061, 25504, 28526) },
- { AOM_CDF4(15235, 22878, 26145) },
- { AOM_CDF4(12985, 19958, 24155) },
- { AOM_CDF4(9782, 16641, 21403) },
- { AOM_CDF4(9456, 16360, 20760) },
- { AOM_CDF4(6855, 12940, 18557) },
- { AOM_CDF4(5661, 10564, 15002) },
- { AOM_CDF4(25656, 30602, 31894) },
- { AOM_CDF4(22570, 29107, 31092) },
- { AOM_CDF4(18917, 26423, 29541) },
- { AOM_CDF4(15940, 23649, 27754) },
- { AOM_CDF4(12803, 20581, 25219) },
- { AOM_CDF4(11082, 18695, 23376) },
- { AOM_CDF4(7939, 14373, 19005) } } },
- { { { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) } },
- { { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) } } } },
- { { { { AOM_CDF4(18315, 24289, 27551) },
- { AOM_CDF4(16854, 24068, 27835) },
- { AOM_CDF4(10140, 17927, 23173) },
- { AOM_CDF4(6722, 12982, 18267) },
- { AOM_CDF4(4661, 9826, 14706) },
- { AOM_CDF4(3832, 8165, 12294) },
- { AOM_CDF4(2795, 6098, 9245) },
- { AOM_CDF4(17145, 23326, 26672) },
- { AOM_CDF4(20733, 27680, 30308) },
- { AOM_CDF4(16032, 24461, 28546) },
- { AOM_CDF4(11653, 20093, 25081) },
- { AOM_CDF4(9290, 16429, 22086) },
- { AOM_CDF4(7796, 14598, 19982) },
- { AOM_CDF4(6502, 12378, 17441) },
- { AOM_CDF4(21681, 27732, 30320) },
- { AOM_CDF4(22389, 29044, 31261) },
- { AOM_CDF4(19027, 26731, 30087) },
- { AOM_CDF4(14739, 23755, 28624) },
- { AOM_CDF4(11358, 20778, 25511) },
- { AOM_CDF4(10995, 18073, 24190) },
- { AOM_CDF4(9162, 14990, 20617) } },
- { { AOM_CDF4(21425, 27952, 30388) },
- { AOM_CDF4(18062, 25838, 29034) },
- { AOM_CDF4(11956, 19881, 24808) },
- { AOM_CDF4(7718, 15000, 20980) },
- { AOM_CDF4(5702, 11254, 16143) },
- { AOM_CDF4(4898, 9088, 16864) },
- { AOM_CDF4(3679, 6776, 11907) },
- { AOM_CDF4(23294, 30160, 31663) },
- { AOM_CDF4(24397, 29896, 31836) },
- { AOM_CDF4(19245, 27128, 30593) },
- { AOM_CDF4(13202, 19825, 26404) },
- { AOM_CDF4(11578, 19297, 23957) },
- { AOM_CDF4(8073, 13297, 21370) },
- { AOM_CDF4(5461, 10923, 19745) },
- { AOM_CDF4(27367, 30521, 31934) },
- { AOM_CDF4(24904, 30671, 31940) },
- { AOM_CDF4(23075, 28460, 31299) },
- { AOM_CDF4(14400, 23658, 30417) },
- { AOM_CDF4(13885, 23882, 28325) },
- { AOM_CDF4(14746, 22938, 27853) },
- { AOM_CDF4(5461, 16384, 27307) } } },
- { { { AOM_CDF4(18274, 24813, 27890) },
- { AOM_CDF4(15537, 23149, 27003) },
- { AOM_CDF4(9449, 16740, 21827) },
- { AOM_CDF4(6700, 12498, 17261) },
- { AOM_CDF4(4988, 9866, 14198) },
- { AOM_CDF4(4236, 8147, 11902) },
- { AOM_CDF4(2867, 5860, 8654) },
- { AOM_CDF4(17124, 23171, 26101) },
- { AOM_CDF4(20396, 27477, 30148) },
- { AOM_CDF4(16573, 24629, 28492) },
- { AOM_CDF4(12749, 20846, 25674) },
- { AOM_CDF4(10233, 17878, 22818) },
- { AOM_CDF4(8525, 15332, 20363) },
- { AOM_CDF4(6283, 11632, 16255) },
- { AOM_CDF4(20466, 26511, 29286) },
- { AOM_CDF4(23059, 29174, 31191) },
- { AOM_CDF4(19481, 27263, 30241) },
- { AOM_CDF4(15458, 23631, 28137) },
- { AOM_CDF4(12416, 20608, 25693) },
- { AOM_CDF4(10261, 18011, 23261) },
- { AOM_CDF4(8016, 14655, 19666) } },
- { { AOM_CDF4(17616, 24586, 28112) },
- { AOM_CDF4(15809, 23299, 27155) },
- { AOM_CDF4(10767, 18890, 23793) },
- { AOM_CDF4(7727, 14255, 18865) },
- { AOM_CDF4(6129, 11926, 16882) },
- { AOM_CDF4(4482, 9704, 14861) },
- { AOM_CDF4(3277, 7452, 11522) },
- { AOM_CDF4(22956, 28551, 30730) },
- { AOM_CDF4(22724, 28937, 30961) },
- { AOM_CDF4(18467, 26324, 29580) },
- { AOM_CDF4(13234, 20713, 25649) },
- { AOM_CDF4(11181, 17592, 22481) },
- { AOM_CDF4(8291, 18358, 24576) },
- { AOM_CDF4(7568, 11881, 14984) },
- { AOM_CDF4(24948, 29001, 31147) },
- { AOM_CDF4(25674, 30619, 32151) },
- { AOM_CDF4(20841, 26793, 29603) },
- { AOM_CDF4(14669, 24356, 28666) },
- { AOM_CDF4(11334, 23593, 28219) },
- { AOM_CDF4(8922, 14762, 22873) },
- { AOM_CDF4(8301, 13544, 20535) } } },
- { { { AOM_CDF4(17113, 23733, 27081) },
- { AOM_CDF4(14139, 21406, 25452) },
- { AOM_CDF4(8552, 15002, 19776) },
- { AOM_CDF4(5871, 11120, 15378) },
- { AOM_CDF4(4455, 8616, 12253) },
- { AOM_CDF4(3469, 6910, 10386) },
- { AOM_CDF4(2255, 4553, 6782) },
- { AOM_CDF4(18224, 24376, 27053) },
- { AOM_CDF4(19290, 26710, 29614) },
- { AOM_CDF4(14936, 22991, 27184) },
- { AOM_CDF4(11238, 18951, 23762) },
- { AOM_CDF4(8786, 15617, 20588) },
- { AOM_CDF4(7317, 13228, 18003) },
- { AOM_CDF4(5101, 9512, 13493) },
- { AOM_CDF4(22639, 28222, 30210) },
- { AOM_CDF4(23216, 29331, 31307) },
- { AOM_CDF4(19075, 26762, 29895) },
- { AOM_CDF4(15014, 23113, 27457) },
- { AOM_CDF4(11938, 19857, 24752) },
- { AOM_CDF4(9942, 17280, 22282) },
- { AOM_CDF4(7167, 13144, 17752) } },
- { { AOM_CDF4(15820, 22738, 26488) },
- { AOM_CDF4(13530, 20885, 25216) },
- { AOM_CDF4(8395, 15530, 20452) },
- { AOM_CDF4(6574, 12321, 16380) },
- { AOM_CDF4(5353, 10419, 14568) },
- { AOM_CDF4(4613, 8446, 12381) },
- { AOM_CDF4(3440, 7158, 9903) },
- { AOM_CDF4(24247, 29051, 31224) },
- { AOM_CDF4(22118, 28058, 30369) },
- { AOM_CDF4(16498, 24768, 28389) },
- { AOM_CDF4(12920, 21175, 26137) },
- { AOM_CDF4(10730, 18619, 25352) },
- { AOM_CDF4(10187, 16279, 22791) },
- { AOM_CDF4(9310, 14631, 22127) },
- { AOM_CDF4(24970, 30558, 32057) },
- { AOM_CDF4(24801, 29942, 31698) },
- { AOM_CDF4(22432, 28453, 30855) },
- { AOM_CDF4(19054, 25680, 29580) },
- { AOM_CDF4(14392, 23036, 28109) },
- { AOM_CDF4(12495, 20947, 26650) },
- { AOM_CDF4(12442, 20326, 26214) } } },
- { { { AOM_CDF4(12162, 18785, 22648) },
- { AOM_CDF4(12749, 19697, 23806) },
- { AOM_CDF4(8580, 15297, 20346) },
- { AOM_CDF4(6169, 11749, 16543) },
- { AOM_CDF4(4836, 9391, 13448) },
- { AOM_CDF4(3821, 7711, 11613) },
- { AOM_CDF4(2228, 4601, 7070) },
- { AOM_CDF4(16319, 24725, 28280) },
- { AOM_CDF4(15698, 23277, 27168) },
- { AOM_CDF4(12726, 20368, 25047) },
- { AOM_CDF4(9912, 17015, 21976) },
- { AOM_CDF4(7888, 14220, 19179) },
- { AOM_CDF4(6777, 12284, 17018) },
- { AOM_CDF4(4492, 8590, 12252) },
- { AOM_CDF4(23249, 28904, 30947) },
- { AOM_CDF4(21050, 27908, 30512) },
- { AOM_CDF4(17440, 25340, 28949) },
- { AOM_CDF4(14059, 22018, 26541) },
- { AOM_CDF4(11288, 18903, 23898) },
- { AOM_CDF4(9411, 16342, 21428) },
- { AOM_CDF4(6278, 11588, 15944) } },
- { { AOM_CDF4(13981, 20067, 23226) },
- { AOM_CDF4(16922, 23580, 26783) },
- { AOM_CDF4(11005, 19039, 24487) },
- { AOM_CDF4(7389, 14218, 19798) },
- { AOM_CDF4(5598, 11505, 17206) },
- { AOM_CDF4(6090, 11213, 15659) },
- { AOM_CDF4(3820, 7371, 10119) },
- { AOM_CDF4(21082, 26925, 29675) },
- { AOM_CDF4(21262, 28627, 31128) },
- { AOM_CDF4(18392, 26454, 30437) },
- { AOM_CDF4(14870, 22910, 27096) },
- { AOM_CDF4(12620, 19484, 24908) },
- { AOM_CDF4(9290, 16553, 22802) },
- { AOM_CDF4(6668, 14288, 20004) },
- { AOM_CDF4(27704, 31055, 31949) },
- { AOM_CDF4(24709, 29978, 31788) },
- { AOM_CDF4(21668, 29264, 31657) },
- { AOM_CDF4(18295, 26968, 30074) },
- { AOM_CDF4(16399, 24422, 29313) },
- { AOM_CDF4(14347, 23026, 28104) },
- { AOM_CDF4(12370, 19806, 24477) } } },
- { { { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) } },
- { { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) } } } }
- };
-
-static const aom_cdf_prob av1_default_coeff_base_multi_cdfs
- [TOKEN_CDF_Q_CTXS][TX_SIZES][PLANE_TYPES][SIG_COEF_CONTEXTS]
- [CDF_SIZE(NUM_BASE_LEVELS + 2)] =
- { { { { { AOM_CDF4(4034, 8930, 12727) },
- { AOM_CDF4(18082, 29741, 31877) },
- { AOM_CDF4(12596, 26124, 30493) },
- { AOM_CDF4(9446, 21118, 27005) },
- { AOM_CDF4(6308, 15141, 21279) },
- { AOM_CDF4(2463, 6357, 9783) },
- { AOM_CDF4(20667, 30546, 31929) },
- { AOM_CDF4(13043, 26123, 30134) },
- { AOM_CDF4(8151, 18757, 24778) },
- { AOM_CDF4(5255, 12839, 18632) },
- { AOM_CDF4(2820, 7206, 11161) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(15736, 27553, 30604) },
- { AOM_CDF4(11210, 23794, 28787) },
- { AOM_CDF4(5947, 13874, 19701) },
- { AOM_CDF4(4215, 9323, 13891) },
- { AOM_CDF4(2833, 6462, 10059) },
- { AOM_CDF4(19605, 30393, 31582) },
- { AOM_CDF4(13523, 26252, 30248) },
- { AOM_CDF4(8446, 18622, 24512) },
- { AOM_CDF4(3818, 10343, 15974) },
- { AOM_CDF4(1481, 4117, 6796) },
- { AOM_CDF4(22649, 31302, 32190) },
- { AOM_CDF4(14829, 27127, 30449) },
- { AOM_CDF4(8313, 17702, 23304) },
- { AOM_CDF4(3022, 8301, 12786) },
- { AOM_CDF4(1536, 4412, 7184) },
- { AOM_CDF4(22354, 29774, 31372) },
- { AOM_CDF4(14723, 25472, 29214) },
- { AOM_CDF4(6673, 13745, 18662) },
- { AOM_CDF4(2068, 5766, 9322) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) } },
- { { AOM_CDF4(6302, 16444, 21761) },
- { AOM_CDF4(23040, 31538, 32475) },
- { AOM_CDF4(15196, 28452, 31496) },
- { AOM_CDF4(10020, 22946, 28514) },
- { AOM_CDF4(6533, 16862, 23501) },
- { AOM_CDF4(3538, 9816, 15076) },
- { AOM_CDF4(24444, 31875, 32525) },
- { AOM_CDF4(15881, 28924, 31635) },
- { AOM_CDF4(9922, 22873, 28466) },
- { AOM_CDF4(6527, 16966, 23691) },
- { AOM_CDF4(4114, 11303, 17220) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(20201, 30770, 32209) },
- { AOM_CDF4(14754, 28071, 31258) },
- { AOM_CDF4(8378, 20186, 26517) },
- { AOM_CDF4(5916, 15299, 21978) },
- { AOM_CDF4(4268, 11583, 17901) },
- { AOM_CDF4(24361, 32025, 32581) },
- { AOM_CDF4(18673, 30105, 31943) },
- { AOM_CDF4(10196, 22244, 27576) },
- { AOM_CDF4(5495, 14349, 20417) },
- { AOM_CDF4(2676, 7415, 11498) },
- { AOM_CDF4(24678, 31958, 32585) },
- { AOM_CDF4(18629, 29906, 31831) },
- { AOM_CDF4(9364, 20724, 26315) },
- { AOM_CDF4(4641, 12318, 18094) },
- { AOM_CDF4(2758, 7387, 11579) },
- { AOM_CDF4(25433, 31842, 32469) },
- { AOM_CDF4(18795, 29289, 31411) },
- { AOM_CDF4(7644, 17584, 23592) },
- { AOM_CDF4(3408, 9014, 15047) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) } } },
- { { { AOM_CDF4(4536, 10072, 14001) },
- { AOM_CDF4(25459, 31416, 32206) },
- { AOM_CDF4(16605, 28048, 30818) },
- { AOM_CDF4(11008, 22857, 27719) },
- { AOM_CDF4(6915, 16268, 22315) },
- { AOM_CDF4(2625, 6812, 10537) },
- { AOM_CDF4(24257, 31788, 32499) },
- { AOM_CDF4(16880, 29454, 31879) },
- { AOM_CDF4(11958, 25054, 29778) },
- { AOM_CDF4(7916, 18718, 25084) },
- { AOM_CDF4(3383, 8777, 13446) },
- { AOM_CDF4(22720, 31603, 32393) },
- { AOM_CDF4(14960, 28125, 31335) },
- { AOM_CDF4(9731, 22210, 27928) },
- { AOM_CDF4(6304, 15832, 22277) },
- { AOM_CDF4(2910, 7818, 12166) },
- { AOM_CDF4(20375, 30627, 32131) },
- { AOM_CDF4(13904, 27284, 30887) },
- { AOM_CDF4(9368, 21558, 27144) },
- { AOM_CDF4(5937, 14966, 21119) },
- { AOM_CDF4(2667, 7225, 11319) },
- { AOM_CDF4(23970, 31470, 32378) },
- { AOM_CDF4(17173, 29734, 32018) },
- { AOM_CDF4(12795, 25441, 29965) },
- { AOM_CDF4(8981, 19680, 25893) },
- { AOM_CDF4(4728, 11372, 16902) },
- { AOM_CDF4(24287, 31797, 32439) },
- { AOM_CDF4(16703, 29145, 31696) },
- { AOM_CDF4(10833, 23554, 28725) },
- { AOM_CDF4(6468, 16566, 23057) },
- { AOM_CDF4(2415, 6562, 10278) },
- { AOM_CDF4(26610, 32395, 32659) },
- { AOM_CDF4(18590, 30498, 32117) },
- { AOM_CDF4(12420, 25756, 29950) },
- { AOM_CDF4(7639, 18746, 24710) },
- { AOM_CDF4(3001, 8086, 12347) },
- { AOM_CDF4(25076, 32064, 32580) },
- { AOM_CDF4(17946, 30128, 32028) },
- { AOM_CDF4(12024, 24985, 29378) },
- { AOM_CDF4(7517, 18390, 24304) },
- { AOM_CDF4(3243, 8781, 13331) },
- { AOM_CDF4(8192, 16384, 24576) } },
- { { AOM_CDF4(6037, 16771, 21957) },
- { AOM_CDF4(24774, 31704, 32426) },
- { AOM_CDF4(16830, 28589, 31056) },
- { AOM_CDF4(10602, 22828, 27760) },
- { AOM_CDF4(6733, 16829, 23071) },
- { AOM_CDF4(3250, 8914, 13556) },
- { AOM_CDF4(25582, 32220, 32668) },
- { AOM_CDF4(18659, 30342, 32223) },
- { AOM_CDF4(12546, 26149, 30515) },
- { AOM_CDF4(8420, 20451, 26801) },
- { AOM_CDF4(4636, 12420, 18344) },
- { AOM_CDF4(27581, 32362, 32639) },
- { AOM_CDF4(18987, 30083, 31978) },
- { AOM_CDF4(11327, 24248, 29084) },
- { AOM_CDF4(7264, 17719, 24120) },
- { AOM_CDF4(3995, 10768, 16169) },
- { AOM_CDF4(25893, 31831, 32487) },
- { AOM_CDF4(16577, 28587, 31379) },
- { AOM_CDF4(10189, 22748, 28182) },
- { AOM_CDF4(6832, 17094, 23556) },
- { AOM_CDF4(3708, 10110, 15334) },
- { AOM_CDF4(25904, 32282, 32656) },
- { AOM_CDF4(19721, 30792, 32276) },
- { AOM_CDF4(12819, 26243, 30411) },
- { AOM_CDF4(8572, 20614, 26891) },
- { AOM_CDF4(5364, 14059, 20467) },
- { AOM_CDF4(26580, 32438, 32677) },
- { AOM_CDF4(20852, 31225, 32340) },
- { AOM_CDF4(12435, 25700, 29967) },
- { AOM_CDF4(8691, 20825, 26976) },
- { AOM_CDF4(4446, 12209, 17269) },
- { AOM_CDF4(27350, 32429, 32696) },
- { AOM_CDF4(21372, 30977, 32272) },
- { AOM_CDF4(12673, 25270, 29853) },
- { AOM_CDF4(9208, 20925, 26640) },
- { AOM_CDF4(5018, 13351, 18732) },
- { AOM_CDF4(27351, 32479, 32713) },
- { AOM_CDF4(21398, 31209, 32387) },
- { AOM_CDF4(12162, 25047, 29842) },
- { AOM_CDF4(7896, 18691, 25319) },
- { AOM_CDF4(4670, 12882, 18881) },
- { AOM_CDF4(8192, 16384, 24576) } } },
- { { { AOM_CDF4(5487, 10460, 13708) },
- { AOM_CDF4(21597, 28303, 30674) },
- { AOM_CDF4(11037, 21953, 26476) },
- { AOM_CDF4(8147, 17962, 22952) },
- { AOM_CDF4(5242, 13061, 18532) },
- { AOM_CDF4(1889, 5208, 8182) },
- { AOM_CDF4(26774, 32133, 32590) },
- { AOM_CDF4(17844, 29564, 31767) },
- { AOM_CDF4(11690, 24438, 29171) },
- { AOM_CDF4(7542, 18215, 24459) },
- { AOM_CDF4(2993, 8050, 12319) },
- { AOM_CDF4(28023, 32328, 32591) },
- { AOM_CDF4(18651, 30126, 31954) },
- { AOM_CDF4(12164, 25146, 29589) },
- { AOM_CDF4(7762, 18530, 24771) },
- { AOM_CDF4(3492, 9183, 13920) },
- { AOM_CDF4(27591, 32008, 32491) },
- { AOM_CDF4(17149, 28853, 31510) },
- { AOM_CDF4(11485, 24003, 28860) },
- { AOM_CDF4(7697, 18086, 24210) },
- { AOM_CDF4(3075, 7999, 12218) },
- { AOM_CDF4(28268, 32482, 32654) },
- { AOM_CDF4(19631, 31051, 32404) },
- { AOM_CDF4(13860, 27260, 31020) },
- { AOM_CDF4(9605, 21613, 27594) },
- { AOM_CDF4(4876, 12162, 17908) },
- { AOM_CDF4(27248, 32316, 32576) },
- { AOM_CDF4(18955, 30457, 32075) },
- { AOM_CDF4(11824, 23997, 28795) },
- { AOM_CDF4(7346, 18196, 24647) },
- { AOM_CDF4(3403, 9247, 14111) },
- { AOM_CDF4(29711, 32655, 32735) },
- { AOM_CDF4(21169, 31394, 32417) },
- { AOM_CDF4(13487, 27198, 30957) },
- { AOM_CDF4(8828, 21683, 27614) },
- { AOM_CDF4(4270, 11451, 17038) },
- { AOM_CDF4(28708, 32578, 32731) },
- { AOM_CDF4(20120, 31241, 32482) },
- { AOM_CDF4(13692, 27550, 31321) },
- { AOM_CDF4(9418, 22514, 28439) },
- { AOM_CDF4(4999, 13283, 19462) },
- { AOM_CDF4(8192, 16384, 24576) } },
- { { AOM_CDF4(5673, 14302, 19711) },
- { AOM_CDF4(26251, 30701, 31834) },
- { AOM_CDF4(12782, 23783, 27803) },
- { AOM_CDF4(9127, 20657, 25808) },
- { AOM_CDF4(6368, 16208, 21462) },
- { AOM_CDF4(2465, 7177, 10822) },
- { AOM_CDF4(29961, 32563, 32719) },
- { AOM_CDF4(18318, 29891, 31949) },
- { AOM_CDF4(11361, 24514, 29357) },
- { AOM_CDF4(7900, 19603, 25607) },
- { AOM_CDF4(4002, 10590, 15546) },
- { AOM_CDF4(29637, 32310, 32595) },
- { AOM_CDF4(18296, 29913, 31809) },
- { AOM_CDF4(10144, 21515, 26871) },
- { AOM_CDF4(5358, 14322, 20394) },
- { AOM_CDF4(3067, 8362, 13346) },
- { AOM_CDF4(28652, 32470, 32676) },
- { AOM_CDF4(17538, 30771, 32209) },
- { AOM_CDF4(13924, 26882, 30494) },
- { AOM_CDF4(10496, 22837, 27869) },
- { AOM_CDF4(7236, 16396, 21621) },
- { AOM_CDF4(30743, 32687, 32746) },
- { AOM_CDF4(23006, 31676, 32489) },
- { AOM_CDF4(14494, 27828, 31120) },
- { AOM_CDF4(10174, 22801, 28352) },
- { AOM_CDF4(6242, 15281, 21043) },
- { AOM_CDF4(25817, 32243, 32720) },
- { AOM_CDF4(18618, 31367, 32325) },
- { AOM_CDF4(13997, 28318, 31878) },
- { AOM_CDF4(12255, 26534, 31383) },
- { AOM_CDF4(9561, 21588, 28450) },
- { AOM_CDF4(28188, 32635, 32724) },
- { AOM_CDF4(22060, 32365, 32728) },
- { AOM_CDF4(18102, 30690, 32528) },
- { AOM_CDF4(14196, 28864, 31999) },
- { AOM_CDF4(12262, 25792, 30865) },
- { AOM_CDF4(24176, 32109, 32628) },
- { AOM_CDF4(18280, 29681, 31963) },
- { AOM_CDF4(10205, 23703, 29664) },
- { AOM_CDF4(7889, 20025, 27676) },
- { AOM_CDF4(6060, 16743, 23970) },
- { AOM_CDF4(8192, 16384, 24576) } } },
- { { { AOM_CDF4(5141, 7096, 8260) },
- { AOM_CDF4(27186, 29022, 29789) },
- { AOM_CDF4(6668, 12568, 15682) },
- { AOM_CDF4(2172, 6181, 8638) },
- { AOM_CDF4(1126, 3379, 4531) },
- { AOM_CDF4(443, 1361, 2254) },
- { AOM_CDF4(26083, 31153, 32436) },
- { AOM_CDF4(13486, 24603, 28483) },
- { AOM_CDF4(6508, 14840, 19910) },
- { AOM_CDF4(3386, 8800, 13286) },
- { AOM_CDF4(1530, 4322, 7054) },
- { AOM_CDF4(29639, 32080, 32548) },
- { AOM_CDF4(15897, 27552, 30290) },
- { AOM_CDF4(8588, 20047, 25383) },
- { AOM_CDF4(4889, 13339, 19269) },
- { AOM_CDF4(2240, 6871, 10498) },
- { AOM_CDF4(28165, 32197, 32517) },
- { AOM_CDF4(20735, 30427, 31568) },
- { AOM_CDF4(14325, 24671, 27692) },
- { AOM_CDF4(5119, 12554, 17805) },
- { AOM_CDF4(1810, 5441, 8261) },
- { AOM_CDF4(31212, 32724, 32748) },
- { AOM_CDF4(23352, 31766, 32545) },
- { AOM_CDF4(14669, 27570, 31059) },
- { AOM_CDF4(8492, 20894, 27272) },
- { AOM_CDF4(3644, 10194, 15204) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) } },
- { { AOM_CDF4(2461, 7013, 9371) },
- { AOM_CDF4(24749, 29600, 30986) },
- { AOM_CDF4(9466, 19037, 22417) },
- { AOM_CDF4(3584, 9280, 14400) },
- { AOM_CDF4(1505, 3929, 5433) },
- { AOM_CDF4(677, 1500, 2736) },
- { AOM_CDF4(23987, 30702, 32117) },
- { AOM_CDF4(13554, 24571, 29263) },
- { AOM_CDF4(6211, 14556, 21155) },
- { AOM_CDF4(3135, 10972, 15625) },
- { AOM_CDF4(2435, 7127, 11427) },
- { AOM_CDF4(31300, 32532, 32550) },
- { AOM_CDF4(14757, 30365, 31954) },
- { AOM_CDF4(4405, 11612, 18553) },
- { AOM_CDF4(580, 4132, 7322) },
- { AOM_CDF4(1695, 10169, 14124) },
- { AOM_CDF4(30008, 32282, 32591) },
- { AOM_CDF4(19244, 30108, 31748) },
- { AOM_CDF4(11180, 24158, 29555) },
- { AOM_CDF4(5650, 14972, 19209) },
- { AOM_CDF4(2114, 5109, 8456) },
- { AOM_CDF4(31856, 32716, 32748) },
- { AOM_CDF4(23012, 31664, 32572) },
- { AOM_CDF4(13694, 26656, 30636) },
- { AOM_CDF4(8142, 19508, 26093) },
- { AOM_CDF4(4253, 10955, 16724) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) } } },
- { { { AOM_CDF4(601, 983, 1311) },
- { AOM_CDF4(18725, 23406, 28087) },
- { AOM_CDF4(5461, 8192, 10923) },
- { AOM_CDF4(3781, 15124, 21425) },
- { AOM_CDF4(2587, 7761, 12072) },
- { AOM_CDF4(106, 458, 810) },
- { AOM_CDF4(22282, 29710, 31894) },
- { AOM_CDF4(8508, 20926, 25984) },
- { AOM_CDF4(3726, 12713, 18083) },
- { AOM_CDF4(1620, 7112, 10893) },
- { AOM_CDF4(729, 2236, 3495) },
- { AOM_CDF4(30163, 32474, 32684) },
- { AOM_CDF4(18304, 30464, 32000) },
- { AOM_CDF4(11443, 26526, 29647) },
- { AOM_CDF4(6007, 15292, 21299) },
- { AOM_CDF4(2234, 6703, 8937) },
- { AOM_CDF4(30954, 32177, 32571) },
- { AOM_CDF4(17363, 29562, 31076) },
- { AOM_CDF4(9686, 22464, 27410) },
- { AOM_CDF4(8192, 16384, 21390) },
- { AOM_CDF4(1755, 8046, 11264) },
- { AOM_CDF4(31168, 32734, 32748) },
- { AOM_CDF4(22486, 31441, 32471) },
- { AOM_CDF4(12833, 25627, 29738) },
- { AOM_CDF4(6980, 17379, 23122) },
- { AOM_CDF4(3111, 8887, 13479) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) } },
- { { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) } } } },
- { { { { AOM_CDF4(6041, 11854, 15927) },
- { AOM_CDF4(20326, 30905, 32251) },
- { AOM_CDF4(14164, 26831, 30725) },
- { AOM_CDF4(9760, 20647, 26585) },
- { AOM_CDF4(6416, 14953, 21219) },
- { AOM_CDF4(2966, 7151, 10891) },
- { AOM_CDF4(23567, 31374, 32254) },
- { AOM_CDF4(14978, 27416, 30946) },
- { AOM_CDF4(9434, 20225, 26254) },
- { AOM_CDF4(6658, 14558, 20535) },
- { AOM_CDF4(3916, 8677, 12989) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(18088, 29545, 31587) },
- { AOM_CDF4(13062, 25843, 30073) },
- { AOM_CDF4(8940, 16827, 22251) },
- { AOM_CDF4(7654, 13220, 17973) },
- { AOM_CDF4(5733, 10316, 14456) },
- { AOM_CDF4(22879, 31388, 32114) },
- { AOM_CDF4(15215, 27993, 30955) },
- { AOM_CDF4(9397, 19445, 24978) },
- { AOM_CDF4(3442, 9813, 15344) },
- { AOM_CDF4(1368, 3936, 6532) },
- { AOM_CDF4(25494, 32033, 32406) },
- { AOM_CDF4(16772, 27963, 30718) },
- { AOM_CDF4(9419, 18165, 23260) },
- { AOM_CDF4(2677, 7501, 11797) },
- { AOM_CDF4(1516, 4344, 7170) },
- { AOM_CDF4(26556, 31454, 32101) },
- { AOM_CDF4(17128, 27035, 30108) },
- { AOM_CDF4(8324, 15344, 20249) },
- { AOM_CDF4(1903, 5696, 9469) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) } },
- { { AOM_CDF4(8455, 19003, 24368) },
- { AOM_CDF4(23563, 32021, 32604) },
- { AOM_CDF4(16237, 29446, 31935) },
- { AOM_CDF4(10724, 23999, 29358) },
- { AOM_CDF4(6725, 17528, 24416) },
- { AOM_CDF4(3927, 10927, 16825) },
- { AOM_CDF4(26313, 32288, 32634) },
- { AOM_CDF4(17430, 30095, 32095) },
- { AOM_CDF4(11116, 24606, 29679) },
- { AOM_CDF4(7195, 18384, 25269) },
- { AOM_CDF4(4726, 12852, 19315) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(22822, 31648, 32483) },
- { AOM_CDF4(16724, 29633, 31929) },
- { AOM_CDF4(10261, 23033, 28725) },
- { AOM_CDF4(7029, 17840, 24528) },
- { AOM_CDF4(4867, 13886, 21502) },
- { AOM_CDF4(25298, 31892, 32491) },
- { AOM_CDF4(17809, 29330, 31512) },
- { AOM_CDF4(9668, 21329, 26579) },
- { AOM_CDF4(4774, 12956, 18976) },
- { AOM_CDF4(2322, 7030, 11540) },
- { AOM_CDF4(25472, 31920, 32543) },
- { AOM_CDF4(17957, 29387, 31632) },
- { AOM_CDF4(9196, 20593, 26400) },
- { AOM_CDF4(4680, 12705, 19202) },
- { AOM_CDF4(2917, 8456, 13436) },
- { AOM_CDF4(26471, 32059, 32574) },
- { AOM_CDF4(18458, 29783, 31909) },
- { AOM_CDF4(8400, 19464, 25956) },
- { AOM_CDF4(3812, 10973, 17206) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) } } },
- { { { AOM_CDF4(6779, 13743, 17678) },
- { AOM_CDF4(24806, 31797, 32457) },
- { AOM_CDF4(17616, 29047, 31372) },
- { AOM_CDF4(11063, 23175, 28003) },
- { AOM_CDF4(6521, 16110, 22324) },
- { AOM_CDF4(2764, 7504, 11654) },
- { AOM_CDF4(25266, 32367, 32637) },
- { AOM_CDF4(19054, 30553, 32175) },
- { AOM_CDF4(12139, 25212, 29807) },
- { AOM_CDF4(7311, 18162, 24704) },
- { AOM_CDF4(3397, 9164, 14074) },
- { AOM_CDF4(25988, 32208, 32522) },
- { AOM_CDF4(16253, 28912, 31526) },
- { AOM_CDF4(9151, 21387, 27372) },
- { AOM_CDF4(5688, 14915, 21496) },
- { AOM_CDF4(2717, 7627, 12004) },
- { AOM_CDF4(23144, 31855, 32443) },
- { AOM_CDF4(16070, 28491, 31325) },
- { AOM_CDF4(8702, 20467, 26517) },
- { AOM_CDF4(5243, 13956, 20367) },
- { AOM_CDF4(2621, 7335, 11567) },
- { AOM_CDF4(26636, 32340, 32630) },
- { AOM_CDF4(19990, 31050, 32341) },
- { AOM_CDF4(13243, 26105, 30315) },
- { AOM_CDF4(8588, 19521, 25918) },
- { AOM_CDF4(4717, 11585, 17304) },
- { AOM_CDF4(25844, 32292, 32582) },
- { AOM_CDF4(19090, 30635, 32097) },
- { AOM_CDF4(11963, 24546, 28939) },
- { AOM_CDF4(6218, 16087, 22354) },
- { AOM_CDF4(2340, 6608, 10426) },
- { AOM_CDF4(28046, 32576, 32694) },
- { AOM_CDF4(21178, 31313, 32296) },
- { AOM_CDF4(13486, 26184, 29870) },
- { AOM_CDF4(7149, 17871, 23723) },
- { AOM_CDF4(2833, 7958, 12259) },
- { AOM_CDF4(27710, 32528, 32686) },
- { AOM_CDF4(20674, 31076, 32268) },
- { AOM_CDF4(12413, 24955, 29243) },
- { AOM_CDF4(6676, 16927, 23097) },
- { AOM_CDF4(2966, 8333, 12919) },
- { AOM_CDF4(8192, 16384, 24576) } },
- { { AOM_CDF4(8639, 19339, 24429) },
- { AOM_CDF4(24404, 31837, 32525) },
- { AOM_CDF4(16997, 29425, 31784) },
- { AOM_CDF4(11253, 24234, 29149) },
- { AOM_CDF4(6751, 17394, 24028) },
- { AOM_CDF4(3490, 9830, 15191) },
- { AOM_CDF4(26283, 32471, 32714) },
- { AOM_CDF4(19599, 31168, 32442) },
- { AOM_CDF4(13146, 26954, 30893) },
- { AOM_CDF4(8214, 20588, 26890) },
- { AOM_CDF4(4699, 13081, 19300) },
- { AOM_CDF4(28212, 32458, 32669) },
- { AOM_CDF4(18594, 30316, 32100) },
- { AOM_CDF4(11219, 24408, 29234) },
- { AOM_CDF4(6865, 17656, 24149) },
- { AOM_CDF4(3678, 10362, 16006) },
- { AOM_CDF4(25825, 32136, 32616) },
- { AOM_CDF4(17313, 29853, 32021) },
- { AOM_CDF4(11197, 24471, 29472) },
- { AOM_CDF4(6947, 17781, 24405) },
- { AOM_CDF4(3768, 10660, 16261) },
- { AOM_CDF4(27352, 32500, 32706) },
- { AOM_CDF4(20850, 31468, 32469) },
- { AOM_CDF4(14021, 27707, 31133) },
- { AOM_CDF4(8964, 21748, 27838) },
- { AOM_CDF4(5437, 14665, 21187) },
- { AOM_CDF4(26304, 32492, 32698) },
- { AOM_CDF4(20409, 31380, 32385) },
- { AOM_CDF4(13682, 27222, 30632) },
- { AOM_CDF4(8974, 21236, 26685) },
- { AOM_CDF4(4234, 11665, 16934) },
- { AOM_CDF4(26273, 32357, 32711) },
- { AOM_CDF4(20672, 31242, 32441) },
- { AOM_CDF4(14172, 27254, 30902) },
- { AOM_CDF4(9870, 21898, 27275) },
- { AOM_CDF4(5164, 13506, 19270) },
- { AOM_CDF4(26725, 32459, 32728) },
- { AOM_CDF4(20991, 31442, 32527) },
- { AOM_CDF4(13071, 26434, 30811) },
- { AOM_CDF4(8184, 20090, 26742) },
- { AOM_CDF4(4803, 13255, 19895) },
- { AOM_CDF4(8192, 16384, 24576) } } },
- { { { AOM_CDF4(7555, 14942, 18501) },
- { AOM_CDF4(24410, 31178, 32287) },
- { AOM_CDF4(14394, 26738, 30253) },
- { AOM_CDF4(8413, 19554, 25195) },
- { AOM_CDF4(4766, 12924, 18785) },
- { AOM_CDF4(2029, 5806, 9207) },
- { AOM_CDF4(26776, 32364, 32663) },
- { AOM_CDF4(18732, 29967, 31931) },
- { AOM_CDF4(11005, 23786, 28852) },
- { AOM_CDF4(6466, 16909, 23510) },
- { AOM_CDF4(3044, 8638, 13419) },
- { AOM_CDF4(29208, 32582, 32704) },
- { AOM_CDF4(20068, 30857, 32208) },
- { AOM_CDF4(12003, 25085, 29595) },
- { AOM_CDF4(6947, 17750, 24189) },
- { AOM_CDF4(3245, 9103, 14007) },
- { AOM_CDF4(27359, 32465, 32669) },
- { AOM_CDF4(19421, 30614, 32174) },
- { AOM_CDF4(11915, 25010, 29579) },
- { AOM_CDF4(6950, 17676, 24074) },
- { AOM_CDF4(3007, 8473, 13096) },
- { AOM_CDF4(29002, 32676, 32735) },
- { AOM_CDF4(22102, 31849, 32576) },
- { AOM_CDF4(14408, 28009, 31405) },
- { AOM_CDF4(9027, 21679, 27931) },
- { AOM_CDF4(4694, 12678, 18748) },
- { AOM_CDF4(28216, 32528, 32682) },
- { AOM_CDF4(20849, 31264, 32318) },
- { AOM_CDF4(12756, 25815, 29751) },
- { AOM_CDF4(7565, 18801, 24923) },
- { AOM_CDF4(3509, 9533, 14477) },
- { AOM_CDF4(30133, 32687, 32739) },
- { AOM_CDF4(23063, 31910, 32515) },
- { AOM_CDF4(14588, 28051, 31132) },
- { AOM_CDF4(9085, 21649, 27457) },
- { AOM_CDF4(4261, 11654, 17264) },
- { AOM_CDF4(29518, 32691, 32748) },
- { AOM_CDF4(22451, 31959, 32613) },
- { AOM_CDF4(14864, 28722, 31700) },
- { AOM_CDF4(9695, 22964, 28716) },
- { AOM_CDF4(4932, 13358, 19502) },
- { AOM_CDF4(8192, 16384, 24576) } },
- { { AOM_CDF4(6465, 16958, 21688) },
- { AOM_CDF4(25199, 31514, 32360) },
- { AOM_CDF4(14774, 27149, 30607) },
- { AOM_CDF4(9257, 21438, 26972) },
- { AOM_CDF4(5723, 15183, 21882) },
- { AOM_CDF4(3150, 8879, 13731) },
- { AOM_CDF4(26989, 32262, 32682) },
- { AOM_CDF4(17396, 29937, 32085) },
- { AOM_CDF4(11387, 24901, 29784) },
- { AOM_CDF4(7289, 18821, 25548) },
- { AOM_CDF4(3734, 10577, 16086) },
- { AOM_CDF4(29728, 32501, 32695) },
- { AOM_CDF4(17431, 29701, 31903) },
- { AOM_CDF4(9921, 22826, 28300) },
- { AOM_CDF4(5896, 15434, 22068) },
- { AOM_CDF4(3430, 9646, 14757) },
- { AOM_CDF4(28614, 32511, 32705) },
- { AOM_CDF4(19364, 30638, 32263) },
- { AOM_CDF4(13129, 26254, 30402) },
- { AOM_CDF4(8754, 20484, 26440) },
- { AOM_CDF4(4378, 11607, 17110) },
- { AOM_CDF4(30292, 32671, 32744) },
- { AOM_CDF4(21780, 31603, 32501) },
- { AOM_CDF4(14314, 27829, 31291) },
- { AOM_CDF4(9611, 22327, 28263) },
- { AOM_CDF4(4890, 13087, 19065) },
- { AOM_CDF4(25862, 32567, 32733) },
- { AOM_CDF4(20794, 32050, 32567) },
- { AOM_CDF4(17243, 30625, 32254) },
- { AOM_CDF4(13283, 27628, 31474) },
- { AOM_CDF4(9669, 22532, 28918) },
- { AOM_CDF4(27435, 32697, 32748) },
- { AOM_CDF4(24922, 32390, 32714) },
- { AOM_CDF4(21449, 31504, 32536) },
- { AOM_CDF4(16392, 29729, 31832) },
- { AOM_CDF4(11692, 24884, 29076) },
- { AOM_CDF4(24193, 32290, 32735) },
- { AOM_CDF4(18909, 31104, 32563) },
- { AOM_CDF4(12236, 26841, 31403) },
- { AOM_CDF4(8171, 21840, 29082) },
- { AOM_CDF4(7224, 17280, 25275) },
- { AOM_CDF4(8192, 16384, 24576) } } },
- { { { AOM_CDF4(3078, 6839, 9890) },
- { AOM_CDF4(13837, 20450, 24479) },
- { AOM_CDF4(5914, 14222, 19328) },
- { AOM_CDF4(3866, 10267, 14762) },
- { AOM_CDF4(2612, 7208, 11042) },
- { AOM_CDF4(1067, 2991, 4776) },
- { AOM_CDF4(25817, 31646, 32529) },
- { AOM_CDF4(13708, 26338, 30385) },
- { AOM_CDF4(7328, 18585, 24870) },
- { AOM_CDF4(4691, 13080, 19276) },
- { AOM_CDF4(1825, 5253, 8352) },
- { AOM_CDF4(29386, 32315, 32624) },
- { AOM_CDF4(17160, 29001, 31360) },
- { AOM_CDF4(9602, 21862, 27396) },
- { AOM_CDF4(5915, 15772, 22148) },
- { AOM_CDF4(2786, 7779, 12047) },
- { AOM_CDF4(29246, 32450, 32663) },
- { AOM_CDF4(18696, 29929, 31818) },
- { AOM_CDF4(10510, 23369, 28560) },
- { AOM_CDF4(6229, 16499, 23125) },
- { AOM_CDF4(2608, 7448, 11705) },
- { AOM_CDF4(30753, 32710, 32748) },
- { AOM_CDF4(21638, 31487, 32503) },
- { AOM_CDF4(12937, 26854, 30870) },
- { AOM_CDF4(8182, 20596, 26970) },
- { AOM_CDF4(3637, 10269, 15497) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) } },
- { { AOM_CDF4(5244, 12150, 16906) },
- { AOM_CDF4(20486, 26858, 29701) },
- { AOM_CDF4(7756, 18317, 23735) },
- { AOM_CDF4(3452, 9256, 13146) },
- { AOM_CDF4(2020, 5206, 8229) },
- { AOM_CDF4(1801, 4993, 7903) },
- { AOM_CDF4(27051, 31858, 32531) },
- { AOM_CDF4(15988, 27531, 30619) },
- { AOM_CDF4(9188, 21484, 26719) },
- { AOM_CDF4(6273, 17186, 23800) },
- { AOM_CDF4(3108, 9355, 14764) },
- { AOM_CDF4(31076, 32520, 32680) },
- { AOM_CDF4(18119, 30037, 31850) },
- { AOM_CDF4(10244, 22969, 27472) },
- { AOM_CDF4(4692, 14077, 19273) },
- { AOM_CDF4(3694, 11677, 17556) },
- { AOM_CDF4(30060, 32581, 32720) },
- { AOM_CDF4(21011, 30775, 32120) },
- { AOM_CDF4(11931, 24820, 29289) },
- { AOM_CDF4(7119, 17662, 24356) },
- { AOM_CDF4(3833, 10706, 16304) },
- { AOM_CDF4(31954, 32731, 32748) },
- { AOM_CDF4(23913, 31724, 32489) },
- { AOM_CDF4(15520, 28060, 31286) },
- { AOM_CDF4(11517, 23008, 28571) },
- { AOM_CDF4(6193, 14508, 20629) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) } } },
- { { { AOM_CDF4(1035, 2807, 4156) },
- { AOM_CDF4(13162, 18138, 20939) },
- { AOM_CDF4(2696, 6633, 8755) },
- { AOM_CDF4(1373, 4161, 6853) },
- { AOM_CDF4(1099, 2746, 4716) },
- { AOM_CDF4(340, 1021, 1599) },
- { AOM_CDF4(22826, 30419, 32135) },
- { AOM_CDF4(10395, 21762, 26942) },
- { AOM_CDF4(4726, 12407, 17361) },
- { AOM_CDF4(2447, 7080, 10593) },
- { AOM_CDF4(1227, 3717, 6011) },
- { AOM_CDF4(28156, 31424, 31934) },
- { AOM_CDF4(16915, 27754, 30373) },
- { AOM_CDF4(9148, 20990, 26431) },
- { AOM_CDF4(5950, 15515, 21148) },
- { AOM_CDF4(2492, 7327, 11526) },
- { AOM_CDF4(30602, 32477, 32670) },
- { AOM_CDF4(20026, 29955, 31568) },
- { AOM_CDF4(11220, 23628, 28105) },
- { AOM_CDF4(6652, 17019, 22973) },
- { AOM_CDF4(3064, 8536, 13043) },
- { AOM_CDF4(31769, 32724, 32748) },
- { AOM_CDF4(22230, 30887, 32373) },
- { AOM_CDF4(12234, 25079, 29731) },
- { AOM_CDF4(7326, 18816, 25353) },
- { AOM_CDF4(3933, 10907, 16616) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) } },
- { { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) } } } },
- { { { { AOM_CDF4(8896, 16227, 20630) },
- { AOM_CDF4(23629, 31782, 32527) },
- { AOM_CDF4(15173, 27755, 31321) },
- { AOM_CDF4(10158, 21233, 27382) },
- { AOM_CDF4(6420, 14857, 21558) },
- { AOM_CDF4(3269, 8155, 12646) },
- { AOM_CDF4(24835, 32009, 32496) },
- { AOM_CDF4(16509, 28421, 31579) },
- { AOM_CDF4(10957, 21514, 27418) },
- { AOM_CDF4(7881, 15930, 22096) },
- { AOM_CDF4(5388, 10960, 15918) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(20745, 30773, 32093) },
- { AOM_CDF4(15200, 27221, 30861) },
- { AOM_CDF4(13032, 20873, 25667) },
- { AOM_CDF4(12285, 18663, 23494) },
- { AOM_CDF4(11563, 17481, 21489) },
- { AOM_CDF4(26260, 31982, 32320) },
- { AOM_CDF4(15397, 28083, 31100) },
- { AOM_CDF4(9742, 19217, 24824) },
- { AOM_CDF4(3261, 9629, 15362) },
- { AOM_CDF4(1480, 4322, 7499) },
- { AOM_CDF4(27599, 32256, 32460) },
- { AOM_CDF4(16857, 27659, 30774) },
- { AOM_CDF4(9551, 18290, 23748) },
- { AOM_CDF4(3052, 8933, 14103) },
- { AOM_CDF4(2021, 5910, 9787) },
- { AOM_CDF4(29005, 32015, 32392) },
- { AOM_CDF4(17677, 27694, 30863) },
- { AOM_CDF4(9204, 17356, 23219) },
- { AOM_CDF4(2403, 7516, 12814) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) } },
- { { AOM_CDF4(10808, 22056, 26896) },
- { AOM_CDF4(25739, 32313, 32676) },
- { AOM_CDF4(17288, 30203, 32221) },
- { AOM_CDF4(11359, 24878, 29896) },
- { AOM_CDF4(6949, 17767, 24893) },
- { AOM_CDF4(4287, 11796, 18071) },
- { AOM_CDF4(27880, 32521, 32705) },
- { AOM_CDF4(19038, 31004, 32414) },
- { AOM_CDF4(12564, 26345, 30768) },
- { AOM_CDF4(8269, 19947, 26779) },
- { AOM_CDF4(5674, 14657, 21674) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(25742, 32319, 32671) },
- { AOM_CDF4(19557, 31164, 32454) },
- { AOM_CDF4(13381, 26381, 30755) },
- { AOM_CDF4(10101, 21466, 26722) },
- { AOM_CDF4(9209, 19650, 26825) },
- { AOM_CDF4(27107, 31917, 32432) },
- { AOM_CDF4(18056, 28893, 31203) },
- { AOM_CDF4(10200, 21434, 26764) },
- { AOM_CDF4(4660, 12913, 19502) },
- { AOM_CDF4(2368, 6930, 12504) },
- { AOM_CDF4(26960, 32158, 32613) },
- { AOM_CDF4(18628, 30005, 32031) },
- { AOM_CDF4(10233, 22442, 28232) },
- { AOM_CDF4(5471, 14630, 21516) },
- { AOM_CDF4(3235, 10767, 17109) },
- { AOM_CDF4(27696, 32440, 32692) },
- { AOM_CDF4(20032, 31167, 32438) },
- { AOM_CDF4(8700, 21341, 28442) },
- { AOM_CDF4(5662, 14831, 21795) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) } } },
- { { { AOM_CDF4(9704, 17294, 21132) },
- { AOM_CDF4(26762, 32278, 32633) },
- { AOM_CDF4(18382, 29620, 31819) },
- { AOM_CDF4(10891, 23475, 28723) },
- { AOM_CDF4(6358, 16583, 23309) },
- { AOM_CDF4(3248, 9118, 14141) },
- { AOM_CDF4(27204, 32573, 32699) },
- { AOM_CDF4(19818, 30824, 32329) },
- { AOM_CDF4(11772, 25120, 30041) },
- { AOM_CDF4(6995, 18033, 25039) },
- { AOM_CDF4(3752, 10442, 16098) },
- { AOM_CDF4(27222, 32256, 32559) },
- { AOM_CDF4(15356, 28399, 31475) },
- { AOM_CDF4(8821, 20635, 27057) },
- { AOM_CDF4(5511, 14404, 21239) },
- { AOM_CDF4(2935, 8222, 13051) },
- { AOM_CDF4(24875, 32120, 32529) },
- { AOM_CDF4(15233, 28265, 31445) },
- { AOM_CDF4(8605, 20570, 26932) },
- { AOM_CDF4(5431, 14413, 21196) },
- { AOM_CDF4(2994, 8341, 13223) },
- { AOM_CDF4(28201, 32604, 32700) },
- { AOM_CDF4(21041, 31446, 32456) },
- { AOM_CDF4(13221, 26213, 30475) },
- { AOM_CDF4(8255, 19385, 26037) },
- { AOM_CDF4(4930, 12585, 18830) },
- { AOM_CDF4(28768, 32448, 32627) },
- { AOM_CDF4(19705, 30561, 32021) },
- { AOM_CDF4(11572, 23589, 28220) },
- { AOM_CDF4(5532, 15034, 21446) },
- { AOM_CDF4(2460, 7150, 11456) },
- { AOM_CDF4(29874, 32619, 32699) },
- { AOM_CDF4(21621, 31071, 32201) },
- { AOM_CDF4(12511, 24747, 28992) },
- { AOM_CDF4(6281, 16395, 22748) },
- { AOM_CDF4(3246, 9278, 14497) },
- { AOM_CDF4(29715, 32625, 32712) },
- { AOM_CDF4(20958, 31011, 32283) },
- { AOM_CDF4(11233, 23671, 28806) },
- { AOM_CDF4(6012, 16128, 22868) },
- { AOM_CDF4(3427, 9851, 15414) },
- { AOM_CDF4(8192, 16384, 24576) } },
- { { AOM_CDF4(11016, 22111, 26794) },
- { AOM_CDF4(25946, 32357, 32677) },
- { AOM_CDF4(17890, 30452, 32252) },
- { AOM_CDF4(11678, 25142, 29816) },
- { AOM_CDF4(6720, 17534, 24584) },
- { AOM_CDF4(4230, 11665, 17820) },
- { AOM_CDF4(28400, 32623, 32747) },
- { AOM_CDF4(21164, 31668, 32575) },
- { AOM_CDF4(13572, 27388, 31182) },
- { AOM_CDF4(8234, 20750, 27358) },
- { AOM_CDF4(5065, 14055, 20897) },
- { AOM_CDF4(28981, 32547, 32705) },
- { AOM_CDF4(18681, 30543, 32239) },
- { AOM_CDF4(10919, 24075, 29286) },
- { AOM_CDF4(6431, 17199, 24077) },
- { AOM_CDF4(3819, 10464, 16618) },
- { AOM_CDF4(26870, 32467, 32693) },
- { AOM_CDF4(19041, 30831, 32347) },
- { AOM_CDF4(11794, 25211, 30016) },
- { AOM_CDF4(6888, 18019, 24970) },
- { AOM_CDF4(4370, 12363, 18992) },
- { AOM_CDF4(29578, 32670, 32744) },
- { AOM_CDF4(23159, 32007, 32613) },
- { AOM_CDF4(15315, 28669, 31676) },
- { AOM_CDF4(9298, 22607, 28782) },
- { AOM_CDF4(6144, 15913, 22968) },
- { AOM_CDF4(28110, 32499, 32669) },
- { AOM_CDF4(21574, 30937, 32015) },
- { AOM_CDF4(12759, 24818, 28727) },
- { AOM_CDF4(6545, 16761, 23042) },
- { AOM_CDF4(3649, 10597, 16833) },
- { AOM_CDF4(28163, 32552, 32728) },
- { AOM_CDF4(22101, 31469, 32464) },
- { AOM_CDF4(13160, 25472, 30143) },
- { AOM_CDF4(7303, 18684, 25468) },
- { AOM_CDF4(5241, 13975, 20955) },
- { AOM_CDF4(28400, 32631, 32744) },
- { AOM_CDF4(22104, 31793, 32603) },
- { AOM_CDF4(13557, 26571, 30846) },
- { AOM_CDF4(7749, 19861, 26675) },
- { AOM_CDF4(4873, 14030, 21234) },
- { AOM_CDF4(8192, 16384, 24576) } } },
- { { { AOM_CDF4(9800, 17635, 21073) },
- { AOM_CDF4(26153, 31885, 32527) },
- { AOM_CDF4(15038, 27852, 31006) },
- { AOM_CDF4(8718, 20564, 26486) },
- { AOM_CDF4(5128, 14076, 20514) },
- { AOM_CDF4(2636, 7566, 11925) },
- { AOM_CDF4(27551, 32504, 32701) },
- { AOM_CDF4(18310, 30054, 32100) },
- { AOM_CDF4(10211, 23420, 29082) },
- { AOM_CDF4(6222, 16876, 23916) },
- { AOM_CDF4(3462, 9954, 15498) },
- { AOM_CDF4(29991, 32633, 32721) },
- { AOM_CDF4(19883, 30751, 32201) },
- { AOM_CDF4(11141, 24184, 29285) },
- { AOM_CDF4(6420, 16940, 23774) },
- { AOM_CDF4(3392, 9753, 15118) },
- { AOM_CDF4(28465, 32616, 32712) },
- { AOM_CDF4(19850, 30702, 32244) },
- { AOM_CDF4(10983, 24024, 29223) },
- { AOM_CDF4(6294, 16770, 23582) },
- { AOM_CDF4(3244, 9283, 14509) },
- { AOM_CDF4(30023, 32717, 32748) },
- { AOM_CDF4(22940, 32032, 32626) },
- { AOM_CDF4(14282, 27928, 31473) },
- { AOM_CDF4(8562, 21327, 27914) },
- { AOM_CDF4(4846, 13393, 19919) },
- { AOM_CDF4(29981, 32590, 32695) },
- { AOM_CDF4(20465, 30963, 32166) },
- { AOM_CDF4(11479, 23579, 28195) },
- { AOM_CDF4(5916, 15648, 22073) },
- { AOM_CDF4(3031, 8605, 13398) },
- { AOM_CDF4(31146, 32691, 32739) },
- { AOM_CDF4(23106, 31724, 32444) },
- { AOM_CDF4(13783, 26738, 30439) },
- { AOM_CDF4(7852, 19468, 25807) },
- { AOM_CDF4(3860, 11124, 16853) },
- { AOM_CDF4(31014, 32724, 32748) },
- { AOM_CDF4(23629, 32109, 32628) },
- { AOM_CDF4(14747, 28115, 31403) },
- { AOM_CDF4(8545, 21242, 27478) },
- { AOM_CDF4(4574, 12781, 19067) },
- { AOM_CDF4(8192, 16384, 24576) } },
- { { AOM_CDF4(9185, 19694, 24688) },
- { AOM_CDF4(26081, 31985, 32621) },
- { AOM_CDF4(16015, 29000, 31787) },
- { AOM_CDF4(10542, 23690, 29206) },
- { AOM_CDF4(6732, 17945, 24677) },
- { AOM_CDF4(3916, 11039, 16722) },
- { AOM_CDF4(28224, 32566, 32744) },
- { AOM_CDF4(19100, 31138, 32485) },
- { AOM_CDF4(12528, 26620, 30879) },
- { AOM_CDF4(7741, 20277, 26885) },
- { AOM_CDF4(4566, 12845, 18990) },
- { AOM_CDF4(29933, 32593, 32718) },
- { AOM_CDF4(17670, 30333, 32155) },
- { AOM_CDF4(10385, 23600, 28909) },
- { AOM_CDF4(6243, 16236, 22407) },
- { AOM_CDF4(3976, 10389, 16017) },
- { AOM_CDF4(28377, 32561, 32738) },
- { AOM_CDF4(19366, 31175, 32482) },
- { AOM_CDF4(13327, 27175, 31094) },
- { AOM_CDF4(8258, 20769, 27143) },
- { AOM_CDF4(4703, 13198, 19527) },
- { AOM_CDF4(31086, 32706, 32748) },
- { AOM_CDF4(22853, 31902, 32583) },
- { AOM_CDF4(14759, 28186, 31419) },
- { AOM_CDF4(9284, 22382, 28348) },
- { AOM_CDF4(5585, 15192, 21868) },
- { AOM_CDF4(28291, 32652, 32746) },
- { AOM_CDF4(19849, 32107, 32571) },
- { AOM_CDF4(14834, 26818, 29214) },
- { AOM_CDF4(10306, 22594, 28672) },
- { AOM_CDF4(6615, 17384, 23384) },
- { AOM_CDF4(28947, 32604, 32745) },
- { AOM_CDF4(25625, 32289, 32646) },
- { AOM_CDF4(18758, 28672, 31403) },
- { AOM_CDF4(10017, 23430, 28523) },
- { AOM_CDF4(6862, 15269, 22131) },
- { AOM_CDF4(23933, 32509, 32739) },
- { AOM_CDF4(19927, 31495, 32631) },
- { AOM_CDF4(11903, 26023, 30621) },
- { AOM_CDF4(7026, 20094, 27252) },
- { AOM_CDF4(5998, 18106, 24437) },
- { AOM_CDF4(8192, 16384, 24576) } } },
- { { { AOM_CDF4(4456, 11274, 15533) },
- { AOM_CDF4(21219, 29079, 31616) },
- { AOM_CDF4(11173, 23774, 28567) },
- { AOM_CDF4(7282, 18293, 24263) },
- { AOM_CDF4(4890, 13286, 19115) },
- { AOM_CDF4(1890, 5508, 8659) },
- { AOM_CDF4(26651, 32136, 32647) },
- { AOM_CDF4(14630, 28254, 31455) },
- { AOM_CDF4(8716, 21287, 27395) },
- { AOM_CDF4(5615, 15331, 22008) },
- { AOM_CDF4(2675, 7700, 12150) },
- { AOM_CDF4(29954, 32526, 32690) },
- { AOM_CDF4(16126, 28982, 31633) },
- { AOM_CDF4(9030, 21361, 27352) },
- { AOM_CDF4(5411, 14793, 21271) },
- { AOM_CDF4(2943, 8422, 13163) },
- { AOM_CDF4(29539, 32601, 32730) },
- { AOM_CDF4(18125, 30385, 32201) },
- { AOM_CDF4(10422, 24090, 29468) },
- { AOM_CDF4(6468, 17487, 24438) },
- { AOM_CDF4(2970, 8653, 13531) },
- { AOM_CDF4(30912, 32715, 32748) },
- { AOM_CDF4(20666, 31373, 32497) },
- { AOM_CDF4(12509, 26640, 30917) },
- { AOM_CDF4(8058, 20629, 27290) },
- { AOM_CDF4(4231, 12006, 18052) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) } },
- { { AOM_CDF4(10202, 20633, 25484) },
- { AOM_CDF4(27336, 31445, 32352) },
- { AOM_CDF4(12420, 24384, 28552) },
- { AOM_CDF4(7648, 18115, 23856) },
- { AOM_CDF4(5662, 14341, 19902) },
- { AOM_CDF4(3611, 10328, 15390) },
- { AOM_CDF4(30945, 32616, 32736) },
- { AOM_CDF4(18682, 30505, 32253) },
- { AOM_CDF4(11513, 25336, 30203) },
- { AOM_CDF4(7449, 19452, 26148) },
- { AOM_CDF4(4482, 13051, 18886) },
- { AOM_CDF4(32022, 32690, 32747) },
- { AOM_CDF4(18578, 30501, 32146) },
- { AOM_CDF4(11249, 23368, 28631) },
- { AOM_CDF4(5645, 16958, 22158) },
- { AOM_CDF4(5009, 11444, 16637) },
- { AOM_CDF4(31357, 32710, 32748) },
- { AOM_CDF4(21552, 31494, 32504) },
- { AOM_CDF4(13891, 27677, 31340) },
- { AOM_CDF4(9051, 22098, 28172) },
- { AOM_CDF4(5190, 13377, 19486) },
- { AOM_CDF4(32364, 32740, 32748) },
- { AOM_CDF4(24839, 31907, 32551) },
- { AOM_CDF4(17160, 28779, 31696) },
- { AOM_CDF4(12452, 24137, 29602) },
- { AOM_CDF4(6165, 15389, 22477) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) } } },
- { { { AOM_CDF4(2575, 7281, 11077) },
- { AOM_CDF4(14002, 20866, 25402) },
- { AOM_CDF4(6343, 15056, 19658) },
- { AOM_CDF4(4474, 11858, 17041) },
- { AOM_CDF4(2865, 8299, 12534) },
- { AOM_CDF4(1344, 3949, 6391) },
- { AOM_CDF4(24720, 31239, 32459) },
- { AOM_CDF4(12585, 25356, 29968) },
- { AOM_CDF4(7181, 18246, 24444) },
- { AOM_CDF4(5025, 13667, 19885) },
- { AOM_CDF4(2521, 7304, 11605) },
- { AOM_CDF4(29908, 32252, 32584) },
- { AOM_CDF4(17421, 29156, 31575) },
- { AOM_CDF4(9889, 22188, 27782) },
- { AOM_CDF4(5878, 15647, 22123) },
- { AOM_CDF4(2814, 8665, 13323) },
- { AOM_CDF4(30183, 32568, 32713) },
- { AOM_CDF4(18528, 30195, 32049) },
- { AOM_CDF4(10982, 24606, 29657) },
- { AOM_CDF4(6957, 18165, 25231) },
- { AOM_CDF4(3508, 10118, 15468) },
- { AOM_CDF4(31761, 32736, 32748) },
- { AOM_CDF4(21041, 31328, 32546) },
- { AOM_CDF4(12568, 26732, 31166) },
- { AOM_CDF4(8052, 20720, 27733) },
- { AOM_CDF4(4336, 12192, 18396) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) } },
- { { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) } } } },
- { { { { AOM_CDF4(7062, 16472, 22319) },
- { AOM_CDF4(24538, 32261, 32674) },
- { AOM_CDF4(13675, 28041, 31779) },
- { AOM_CDF4(8590, 20674, 27631) },
- { AOM_CDF4(5685, 14675, 22013) },
- { AOM_CDF4(3655, 9898, 15731) },
- { AOM_CDF4(26493, 32418, 32658) },
- { AOM_CDF4(16376, 29342, 32090) },
- { AOM_CDF4(10594, 22649, 28970) },
- { AOM_CDF4(8176, 17170, 24303) },
- { AOM_CDF4(5605, 12694, 19139) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(23888, 31902, 32542) },
- { AOM_CDF4(18612, 29687, 31987) },
- { AOM_CDF4(16245, 24852, 29249) },
- { AOM_CDF4(15765, 22608, 27559) },
- { AOM_CDF4(19895, 24699, 27510) },
- { AOM_CDF4(28401, 32212, 32457) },
- { AOM_CDF4(15274, 27825, 30980) },
- { AOM_CDF4(9364, 18128, 24332) },
- { AOM_CDF4(2283, 8193, 15082) },
- { AOM_CDF4(1228, 3972, 7881) },
- { AOM_CDF4(29455, 32469, 32620) },
- { AOM_CDF4(17981, 28245, 31388) },
- { AOM_CDF4(10921, 20098, 26240) },
- { AOM_CDF4(3743, 11829, 18657) },
- { AOM_CDF4(2374, 9593, 15715) },
- { AOM_CDF4(31068, 32466, 32635) },
- { AOM_CDF4(20321, 29572, 31971) },
- { AOM_CDF4(10771, 20255, 27119) },
- { AOM_CDF4(2795, 10410, 17361) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) } },
- { { AOM_CDF4(9320, 22102, 27840) },
- { AOM_CDF4(27057, 32464, 32724) },
- { AOM_CDF4(16331, 30268, 32309) },
- { AOM_CDF4(10319, 23935, 29720) },
- { AOM_CDF4(6189, 16448, 24106) },
- { AOM_CDF4(3589, 10884, 18808) },
- { AOM_CDF4(29026, 32624, 32748) },
- { AOM_CDF4(19226, 31507, 32587) },
- { AOM_CDF4(12692, 26921, 31203) },
- { AOM_CDF4(7049, 19532, 27635) },
- { AOM_CDF4(7727, 15669, 23252) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(28056, 32625, 32748) },
- { AOM_CDF4(22383, 32075, 32669) },
- { AOM_CDF4(15417, 27098, 31749) },
- { AOM_CDF4(18127, 26493, 27190) },
- { AOM_CDF4(5461, 16384, 21845) },
- { AOM_CDF4(27982, 32091, 32584) },
- { AOM_CDF4(19045, 29868, 31972) },
- { AOM_CDF4(10397, 22266, 27932) },
- { AOM_CDF4(5990, 13697, 21500) },
- { AOM_CDF4(1792, 6912, 15104) },
- { AOM_CDF4(28198, 32501, 32718) },
- { AOM_CDF4(21534, 31521, 32569) },
- { AOM_CDF4(11109, 25217, 30017) },
- { AOM_CDF4(5671, 15124, 26151) },
- { AOM_CDF4(4681, 14043, 18725) },
- { AOM_CDF4(28688, 32580, 32741) },
- { AOM_CDF4(22576, 32079, 32661) },
- { AOM_CDF4(10627, 22141, 28340) },
- { AOM_CDF4(9362, 14043, 28087) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) } } },
- { { { AOM_CDF4(7754, 16948, 22142) },
- { AOM_CDF4(25670, 32330, 32691) },
- { AOM_CDF4(15663, 29225, 31994) },
- { AOM_CDF4(9878, 23288, 29158) },
- { AOM_CDF4(6419, 17088, 24336) },
- { AOM_CDF4(3859, 11003, 17039) },
- { AOM_CDF4(27562, 32595, 32725) },
- { AOM_CDF4(17575, 30588, 32399) },
- { AOM_CDF4(10819, 24838, 30309) },
- { AOM_CDF4(7124, 18686, 25916) },
- { AOM_CDF4(4479, 12688, 19340) },
- { AOM_CDF4(28385, 32476, 32673) },
- { AOM_CDF4(15306, 29005, 31938) },
- { AOM_CDF4(8937, 21615, 28322) },
- { AOM_CDF4(5982, 15603, 22786) },
- { AOM_CDF4(3620, 10267, 16136) },
- { AOM_CDF4(27280, 32464, 32667) },
- { AOM_CDF4(15607, 29160, 32004) },
- { AOM_CDF4(9091, 22135, 28740) },
- { AOM_CDF4(6232, 16632, 24020) },
- { AOM_CDF4(4047, 11377, 17672) },
- { AOM_CDF4(29220, 32630, 32718) },
- { AOM_CDF4(19650, 31220, 32462) },
- { AOM_CDF4(13050, 26312, 30827) },
- { AOM_CDF4(9228, 20870, 27468) },
- { AOM_CDF4(6146, 15149, 21971) },
- { AOM_CDF4(30169, 32481, 32623) },
- { AOM_CDF4(17212, 29311, 31554) },
- { AOM_CDF4(9911, 21311, 26882) },
- { AOM_CDF4(4487, 13314, 20372) },
- { AOM_CDF4(2570, 7772, 12889) },
- { AOM_CDF4(30924, 32613, 32708) },
- { AOM_CDF4(19490, 30206, 32107) },
- { AOM_CDF4(11232, 23998, 29276) },
- { AOM_CDF4(6769, 17955, 25035) },
- { AOM_CDF4(4398, 12623, 19214) },
- { AOM_CDF4(30609, 32627, 32722) },
- { AOM_CDF4(19370, 30582, 32287) },
- { AOM_CDF4(10457, 23619, 29409) },
- { AOM_CDF4(6443, 17637, 24834) },
- { AOM_CDF4(4645, 13236, 20106) },
- { AOM_CDF4(8192, 16384, 24576) } },
- { { AOM_CDF4(8626, 20271, 26216) },
- { AOM_CDF4(26707, 32406, 32711) },
- { AOM_CDF4(16999, 30329, 32286) },
- { AOM_CDF4(11445, 25123, 30286) },
- { AOM_CDF4(6411, 18828, 25601) },
- { AOM_CDF4(6801, 12458, 20248) },
- { AOM_CDF4(29918, 32682, 32748) },
- { AOM_CDF4(20649, 31739, 32618) },
- { AOM_CDF4(12879, 27773, 31581) },
- { AOM_CDF4(7896, 21751, 28244) },
- { AOM_CDF4(5260, 14870, 23698) },
- { AOM_CDF4(29252, 32593, 32731) },
- { AOM_CDF4(17072, 30460, 32294) },
- { AOM_CDF4(10653, 24143, 29365) },
- { AOM_CDF4(6536, 17490, 23983) },
- { AOM_CDF4(4929, 13170, 20085) },
- { AOM_CDF4(28137, 32518, 32715) },
- { AOM_CDF4(18171, 30784, 32407) },
- { AOM_CDF4(11437, 25436, 30459) },
- { AOM_CDF4(7252, 18534, 26176) },
- { AOM_CDF4(4126, 13353, 20978) },
- { AOM_CDF4(31162, 32726, 32748) },
- { AOM_CDF4(23017, 32222, 32701) },
- { AOM_CDF4(15629, 29233, 32046) },
- { AOM_CDF4(9387, 22621, 29480) },
- { AOM_CDF4(6922, 17616, 25010) },
- { AOM_CDF4(28838, 32265, 32614) },
- { AOM_CDF4(19701, 30206, 31920) },
- { AOM_CDF4(11214, 22410, 27933) },
- { AOM_CDF4(5320, 14177, 23034) },
- { AOM_CDF4(5049, 12881, 17827) },
- { AOM_CDF4(27484, 32471, 32734) },
- { AOM_CDF4(21076, 31526, 32561) },
- { AOM_CDF4(12707, 26303, 31211) },
- { AOM_CDF4(8169, 21722, 28219) },
- { AOM_CDF4(6045, 19406, 27042) },
- { AOM_CDF4(27753, 32572, 32745) },
- { AOM_CDF4(20832, 31878, 32653) },
- { AOM_CDF4(13250, 27356, 31674) },
- { AOM_CDF4(7718, 21508, 29858) },
- { AOM_CDF4(7209, 18350, 25559) },
- { AOM_CDF4(8192, 16384, 24576) } } },
- { { { AOM_CDF4(7876, 16901, 21741) },
- { AOM_CDF4(24001, 31898, 32625) },
- { AOM_CDF4(14529, 27959, 31451) },
- { AOM_CDF4(8273, 20818, 27258) },
- { AOM_CDF4(5278, 14673, 21510) },
- { AOM_CDF4(2983, 8843, 14039) },
- { AOM_CDF4(28016, 32574, 32732) },
- { AOM_CDF4(17471, 30306, 32301) },
- { AOM_CDF4(10224, 24063, 29728) },
- { AOM_CDF4(6602, 17954, 25052) },
- { AOM_CDF4(4002, 11585, 17759) },
- { AOM_CDF4(30190, 32634, 32739) },
- { AOM_CDF4(17497, 30282, 32270) },
- { AOM_CDF4(10229, 23729, 29538) },
- { AOM_CDF4(6344, 17211, 24440) },
- { AOM_CDF4(3849, 11189, 17108) },
- { AOM_CDF4(28570, 32583, 32726) },
- { AOM_CDF4(17521, 30161, 32238) },
- { AOM_CDF4(10153, 23565, 29378) },
- { AOM_CDF4(6455, 17341, 24443) },
- { AOM_CDF4(3907, 11042, 17024) },
- { AOM_CDF4(30689, 32715, 32748) },
- { AOM_CDF4(21546, 31840, 32610) },
- { AOM_CDF4(13547, 27581, 31459) },
- { AOM_CDF4(8912, 21757, 28309) },
- { AOM_CDF4(5548, 15080, 22046) },
- { AOM_CDF4(30783, 32540, 32685) },
- { AOM_CDF4(17540, 29528, 31668) },
- { AOM_CDF4(10160, 21468, 26783) },
- { AOM_CDF4(4724, 13393, 20054) },
- { AOM_CDF4(2702, 8174, 13102) },
- { AOM_CDF4(31648, 32686, 32742) },
- { AOM_CDF4(20954, 31094, 32337) },
- { AOM_CDF4(12420, 25698, 30179) },
- { AOM_CDF4(7304, 19320, 26248) },
- { AOM_CDF4(4366, 12261, 18864) },
- { AOM_CDF4(31581, 32723, 32748) },
- { AOM_CDF4(21373, 31586, 32525) },
- { AOM_CDF4(12744, 26625, 30885) },
- { AOM_CDF4(7431, 20322, 26950) },
- { AOM_CDF4(4692, 13323, 20111) },
- { AOM_CDF4(8192, 16384, 24576) } },
- { { AOM_CDF4(7833, 18369, 24095) },
- { AOM_CDF4(26650, 32273, 32702) },
- { AOM_CDF4(16371, 29961, 32191) },
- { AOM_CDF4(11055, 24082, 29629) },
- { AOM_CDF4(6892, 18644, 25400) },
- { AOM_CDF4(5006, 13057, 19240) },
- { AOM_CDF4(29834, 32666, 32748) },
- { AOM_CDF4(19577, 31335, 32570) },
- { AOM_CDF4(12253, 26509, 31122) },
- { AOM_CDF4(7991, 20772, 27711) },
- { AOM_CDF4(5677, 15910, 23059) },
- { AOM_CDF4(30109, 32532, 32720) },
- { AOM_CDF4(16747, 30166, 32252) },
- { AOM_CDF4(10134, 23542, 29184) },
- { AOM_CDF4(5791, 16176, 23556) },
- { AOM_CDF4(4362, 10414, 17284) },
- { AOM_CDF4(29492, 32626, 32748) },
- { AOM_CDF4(19894, 31402, 32525) },
- { AOM_CDF4(12942, 27071, 30869) },
- { AOM_CDF4(8346, 21216, 27405) },
- { AOM_CDF4(6572, 17087, 23859) },
- { AOM_CDF4(32035, 32735, 32748) },
- { AOM_CDF4(22957, 31838, 32618) },
- { AOM_CDF4(14724, 28572, 31772) },
- { AOM_CDF4(10364, 23999, 29553) },
- { AOM_CDF4(7004, 18433, 25655) },
- { AOM_CDF4(27528, 32277, 32681) },
- { AOM_CDF4(16959, 31171, 32096) },
- { AOM_CDF4(10486, 23593, 27962) },
- { AOM_CDF4(8192, 16384, 23211) },
- { AOM_CDF4(8937, 17873, 20852) },
- { AOM_CDF4(27715, 32002, 32615) },
- { AOM_CDF4(15073, 29491, 31676) },
- { AOM_CDF4(11264, 24576, 28672) },
- { AOM_CDF4(2341, 18725, 23406) },
- { AOM_CDF4(7282, 18204, 25486) },
- { AOM_CDF4(28547, 32213, 32657) },
- { AOM_CDF4(20788, 29773, 32239) },
- { AOM_CDF4(6780, 21469, 30508) },
- { AOM_CDF4(5958, 14895, 23831) },
- { AOM_CDF4(16384, 21845, 27307) },
- { AOM_CDF4(8192, 16384, 24576) } } },
- { { { AOM_CDF4(5992, 14304, 19765) },
- { AOM_CDF4(22612, 31238, 32456) },
- { AOM_CDF4(13456, 27162, 31087) },
- { AOM_CDF4(8001, 20062, 26504) },
- { AOM_CDF4(5168, 14105, 20764) },
- { AOM_CDF4(2632, 7771, 12385) },
- { AOM_CDF4(27034, 32344, 32709) },
- { AOM_CDF4(15850, 29415, 31997) },
- { AOM_CDF4(9494, 22776, 28841) },
- { AOM_CDF4(6151, 16830, 23969) },
- { AOM_CDF4(3461, 10039, 15722) },
- { AOM_CDF4(30134, 32569, 32731) },
- { AOM_CDF4(15638, 29422, 31945) },
- { AOM_CDF4(9150, 21865, 28218) },
- { AOM_CDF4(5647, 15719, 22676) },
- { AOM_CDF4(3402, 9772, 15477) },
- { AOM_CDF4(28530, 32586, 32735) },
- { AOM_CDF4(17139, 30298, 32292) },
- { AOM_CDF4(10200, 24039, 29685) },
- { AOM_CDF4(6419, 17674, 24786) },
- { AOM_CDF4(3544, 10225, 15824) },
- { AOM_CDF4(31333, 32726, 32748) },
- { AOM_CDF4(20618, 31487, 32544) },
- { AOM_CDF4(12901, 27217, 31232) },
- { AOM_CDF4(8624, 21734, 28171) },
- { AOM_CDF4(5104, 14191, 20748) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) } },
- { { AOM_CDF4(11206, 21090, 26561) },
- { AOM_CDF4(28759, 32279, 32671) },
- { AOM_CDF4(14171, 27952, 31569) },
- { AOM_CDF4(9743, 22907, 29141) },
- { AOM_CDF4(6871, 17886, 24868) },
- { AOM_CDF4(4960, 13152, 19315) },
- { AOM_CDF4(31077, 32661, 32748) },
- { AOM_CDF4(19400, 31195, 32515) },
- { AOM_CDF4(12752, 26858, 31040) },
- { AOM_CDF4(8370, 22098, 28591) },
- { AOM_CDF4(5457, 15373, 22298) },
- { AOM_CDF4(31697, 32706, 32748) },
- { AOM_CDF4(17860, 30657, 32333) },
- { AOM_CDF4(12510, 24812, 29261) },
- { AOM_CDF4(6180, 19124, 24722) },
- { AOM_CDF4(5041, 13548, 17959) },
- { AOM_CDF4(31552, 32716, 32748) },
- { AOM_CDF4(21908, 31769, 32623) },
- { AOM_CDF4(14470, 28201, 31565) },
- { AOM_CDF4(9493, 22982, 28608) },
- { AOM_CDF4(6858, 17240, 24137) },
- { AOM_CDF4(32543, 32752, 32756) },
- { AOM_CDF4(24286, 32097, 32666) },
- { AOM_CDF4(15958, 29217, 32024) },
- { AOM_CDF4(10207, 24234, 29958) },
- { AOM_CDF4(6929, 18305, 25652) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) } } },
- { { { AOM_CDF4(4137, 10847, 15682) },
- { AOM_CDF4(17824, 27001, 30058) },
- { AOM_CDF4(10204, 22796, 28291) },
- { AOM_CDF4(6076, 15935, 22125) },
- { AOM_CDF4(3852, 10937, 16816) },
- { AOM_CDF4(2252, 6324, 10131) },
- { AOM_CDF4(25840, 32016, 32662) },
- { AOM_CDF4(15109, 28268, 31531) },
- { AOM_CDF4(9385, 22231, 28340) },
- { AOM_CDF4(6082, 16672, 23479) },
- { AOM_CDF4(3318, 9427, 14681) },
- { AOM_CDF4(30594, 32574, 32718) },
- { AOM_CDF4(16836, 29552, 31859) },
- { AOM_CDF4(9556, 22542, 28356) },
- { AOM_CDF4(6305, 16725, 23540) },
- { AOM_CDF4(3376, 9895, 15184) },
- { AOM_CDF4(29383, 32617, 32745) },
- { AOM_CDF4(18891, 30809, 32401) },
- { AOM_CDF4(11688, 25942, 30687) },
- { AOM_CDF4(7468, 19469, 26651) },
- { AOM_CDF4(3909, 11358, 17012) },
- { AOM_CDF4(31564, 32736, 32748) },
- { AOM_CDF4(20906, 31611, 32600) },
- { AOM_CDF4(13191, 27621, 31537) },
- { AOM_CDF4(8768, 22029, 28676) },
- { AOM_CDF4(5079, 14109, 20906) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) } },
- { { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) },
- { AOM_CDF4(8192, 16384, 24576) } } } } };
-
-static const aom_cdf_prob av1_default_coeff_base_eob_multi_cdfs
- [TOKEN_CDF_Q_CTXS][TX_SIZES][PLANE_TYPES][SIG_COEF_CONTEXTS_EOB][CDF_SIZE(
- NUM_BASE_LEVELS + 1)] = { { { { { AOM_CDF3(17837, 29055) },
- { AOM_CDF3(29600, 31446) },
- { AOM_CDF3(30844, 31878) },
- { AOM_CDF3(24926, 28948) } },
- { { AOM_CDF3(21365, 30026) },
- { AOM_CDF3(30512, 32423) },
- { AOM_CDF3(31658, 32621) },
- { AOM_CDF3(29630, 31881) } } },
- { { { AOM_CDF3(5717, 26477) },
- { AOM_CDF3(30491, 31703) },
- { AOM_CDF3(31550, 32158) },
- { AOM_CDF3(29648, 31491) } },
- { { AOM_CDF3(12608, 27820) },
- { AOM_CDF3(30680, 32225) },
- { AOM_CDF3(30809, 32335) },
- { AOM_CDF3(31299, 32423) } } },
- { { { AOM_CDF3(1786, 12612) },
- { AOM_CDF3(30663, 31625) },
- { AOM_CDF3(32339, 32468) },
- { AOM_CDF3(31148, 31833) } },
- { { AOM_CDF3(18857, 23865) },
- { AOM_CDF3(31428, 32428) },
- { AOM_CDF3(31744, 32373) },
- { AOM_CDF3(31775, 32526) } } },
- { { { AOM_CDF3(1787, 2532) },
- { AOM_CDF3(30832, 31662) },
- { AOM_CDF3(31824, 32682) },
- { AOM_CDF3(32133, 32569) } },
- { { AOM_CDF3(13751, 22235) },
- { AOM_CDF3(32089, 32409) },
- { AOM_CDF3(27084, 27920) },
- { AOM_CDF3(29291, 32594) } } },
- { { { AOM_CDF3(1725, 3449) },
- { AOM_CDF3(31102, 31935) },
- { AOM_CDF3(32457, 32613) },
- { AOM_CDF3(32412, 32649) } },
- { { AOM_CDF3(10923, 21845) },
- { AOM_CDF3(10923, 21845) },
- { AOM_CDF3(10923, 21845) },
- { AOM_CDF3(10923, 21845) } } } },
- { { { { AOM_CDF3(17560, 29888) },
- { AOM_CDF3(29671, 31549) },
- { AOM_CDF3(31007, 32056) },
- { AOM_CDF3(27286, 30006) } },
- { { AOM_CDF3(26594, 31212) },
- { AOM_CDF3(31208, 32582) },
- { AOM_CDF3(31835, 32637) },
- { AOM_CDF3(30595, 32206) } } },
- { { { AOM_CDF3(15239, 29932) },
- { AOM_CDF3(31315, 32095) },
- { AOM_CDF3(32130, 32434) },
- { AOM_CDF3(30864, 31996) } },
- { { AOM_CDF3(26279, 30968) },
- { AOM_CDF3(31142, 32495) },
- { AOM_CDF3(31713, 32540) },
- { AOM_CDF3(31929, 32594) } } },
- { { { AOM_CDF3(2644, 25198) },
- { AOM_CDF3(32038, 32451) },
- { AOM_CDF3(32639, 32695) },
- { AOM_CDF3(32166, 32518) } },
- { { AOM_CDF3(17187, 27668) },
- { AOM_CDF3(31714, 32550) },
- { AOM_CDF3(32283, 32678) },
- { AOM_CDF3(31930, 32563) } } },
- { { { AOM_CDF3(1044, 2257) },
- { AOM_CDF3(30755, 31923) },
- { AOM_CDF3(32208, 32693) },
- { AOM_CDF3(32244, 32615) } },
- { { AOM_CDF3(21317, 26207) },
- { AOM_CDF3(29133, 30868) },
- { AOM_CDF3(29311, 31231) },
- { AOM_CDF3(29657, 31087) } } },
- { { { AOM_CDF3(478, 1834) },
- { AOM_CDF3(31005, 31987) },
- { AOM_CDF3(32317, 32724) },
- { AOM_CDF3(30865, 32648) } },
- { { AOM_CDF3(10923, 21845) },
- { AOM_CDF3(10923, 21845) },
- { AOM_CDF3(10923, 21845) },
- { AOM_CDF3(10923, 21845) } } } },
- { { { { AOM_CDF3(20092, 30774) },
- { AOM_CDF3(30695, 32020) },
- { AOM_CDF3(31131, 32103) },
- { AOM_CDF3(28666, 30870) } },
- { { AOM_CDF3(27258, 31095) },
- { AOM_CDF3(31804, 32623) },
- { AOM_CDF3(31763, 32528) },
- { AOM_CDF3(31438, 32506) } } },
- { { { AOM_CDF3(18049, 30489) },
- { AOM_CDF3(31706, 32286) },
- { AOM_CDF3(32163, 32473) },
- { AOM_CDF3(31550, 32184) } },
- { { AOM_CDF3(27116, 30842) },
- { AOM_CDF3(31971, 32598) },
- { AOM_CDF3(32088, 32576) },
- { AOM_CDF3(32067, 32664) } } },
- { { { AOM_CDF3(12854, 29093) },
- { AOM_CDF3(32272, 32558) },
- { AOM_CDF3(32667, 32729) },
- { AOM_CDF3(32306, 32585) } },
- { { AOM_CDF3(25476, 30366) },
- { AOM_CDF3(32169, 32687) },
- { AOM_CDF3(32479, 32689) },
- { AOM_CDF3(31673, 32634) } } },
- { { { AOM_CDF3(2809, 19301) },
- { AOM_CDF3(32205, 32622) },
- { AOM_CDF3(32338, 32730) },
- { AOM_CDF3(31786, 32616) } },
- { { AOM_CDF3(22737, 29105) },
- { AOM_CDF3(30810, 32362) },
- { AOM_CDF3(30014, 32627) },
- { AOM_CDF3(30528, 32574) } } },
- { { { AOM_CDF3(935, 3382) },
- { AOM_CDF3(30789, 31909) },
- { AOM_CDF3(32466, 32756) },
- { AOM_CDF3(30860, 32513) } },
- { { AOM_CDF3(10923, 21845) },
- { AOM_CDF3(10923, 21845) },
- { AOM_CDF3(10923, 21845) },
- { AOM_CDF3(10923, 21845) } } } },
- { { { { AOM_CDF3(22497, 31198) },
- { AOM_CDF3(31715, 32495) },
- { AOM_CDF3(31606, 32337) },
- { AOM_CDF3(30388, 31990) } },
- { { AOM_CDF3(27877, 31584) },
- { AOM_CDF3(32170, 32728) },
- { AOM_CDF3(32155, 32688) },
- { AOM_CDF3(32219, 32702) } } },
- { { { AOM_CDF3(21457, 31043) },
- { AOM_CDF3(31951, 32483) },
- { AOM_CDF3(32153, 32562) },
- { AOM_CDF3(31473, 32215) } },
- { { AOM_CDF3(27558, 31151) },
- { AOM_CDF3(32020, 32640) },
- { AOM_CDF3(32097, 32575) },
- { AOM_CDF3(32242, 32719) } } },
- { { { AOM_CDF3(19980, 30591) },
- { AOM_CDF3(32219, 32597) },
- { AOM_CDF3(32581, 32706) },
- { AOM_CDF3(31803, 32287) } },
- { { AOM_CDF3(26473, 30507) },
- { AOM_CDF3(32431, 32723) },
- { AOM_CDF3(32196, 32611) },
- { AOM_CDF3(31588, 32528) } } },
- { { { AOM_CDF3(24647, 30463) },
- { AOM_CDF3(32412, 32695) },
- { AOM_CDF3(32468, 32720) },
- { AOM_CDF3(31269, 32523) } },
- { { AOM_CDF3(28482, 31505) },
- { AOM_CDF3(32152, 32701) },
- { AOM_CDF3(31732, 32598) },
- { AOM_CDF3(31767, 32712) } } },
- { { { AOM_CDF3(12358, 24977) },
- { AOM_CDF3(31331, 32385) },
- { AOM_CDF3(32634, 32756) },
- { AOM_CDF3(30411, 32548) } },
- { { AOM_CDF3(10923, 21845) },
- { AOM_CDF3(10923, 21845) },
- { AOM_CDF3(10923, 21845) },
- { AOM_CDF3(10923, 21845) } } } } };
-
-#endif // AOM_AV1_COMMON_TOKEN_CDFS_H_
diff --git a/third_party/aom/av1/common/txb_common.c b/third_party/aom/av1/common/txb_common.c
deleted file mode 100644
index c96d37cca..000000000
--- a/third_party/aom/av1/common/txb_common.c
+++ /dev/null
@@ -1,475 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#include "aom/aom_integer.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/common/txb_common.h"
-
-const int8_t av1_coeff_band_4x4[16] = { 0, 1, 2, 3, 4, 5, 6, 7,
- 8, 9, 10, 11, 12, 13, 14, 15 };
-
-const int8_t av1_coeff_band_8x8[64] = {
- 0, 1, 2, 2, 3, 3, 4, 4, 5, 6, 2, 2, 3, 3, 4, 4,
- 7, 7, 8, 8, 9, 9, 10, 10, 7, 7, 8, 8, 9, 9, 10, 10,
- 11, 11, 12, 12, 13, 13, 14, 14, 11, 11, 12, 12, 13, 13, 14, 14,
- 15, 15, 16, 16, 17, 17, 18, 18, 15, 15, 16, 16, 17, 17, 18, 18,
-};
-
-const int8_t av1_coeff_band_16x16[256] = {
- 0, 1, 4, 4, 7, 7, 7, 7, 8, 8, 8, 8, 9, 9, 9, 9, 2, 3, 4,
- 4, 7, 7, 7, 7, 8, 8, 8, 8, 9, 9, 9, 9, 5, 5, 6, 6, 7, 7,
- 7, 7, 8, 8, 8, 8, 9, 9, 9, 9, 5, 5, 6, 6, 7, 7, 7, 7, 8,
- 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12, 12, 12, 12,
- 13, 13, 13, 13, 10, 10, 10, 10, 11, 11, 11, 11, 12, 12, 12, 12, 13, 13, 13,
- 13, 10, 10, 10, 10, 11, 11, 11, 11, 12, 12, 12, 12, 13, 13, 13, 13, 10, 10,
- 10, 10, 11, 11, 11, 11, 12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15,
- 15, 15, 15, 16, 16, 16, 16, 17, 17, 17, 17, 14, 14, 14, 14, 15, 15, 15, 15,
- 16, 16, 16, 16, 17, 17, 17, 17, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16,
- 16, 17, 17, 17, 17, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 17, 17,
- 17, 17, 18, 18, 18, 18, 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 18,
- 18, 18, 18, 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 18, 18, 18, 18,
- 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 18, 18, 18, 18, 19, 19, 19,
- 19, 20, 20, 20, 20, 21, 21, 21, 21,
-};
-
-const int8_t av1_coeff_band_32x32[1024] = {
- 0, 1, 4, 4, 7, 7, 7, 7, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11,
- 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 2, 3, 4, 4, 7, 7,
- 7, 7, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 12,
- 12, 12, 12, 12, 12, 12, 12, 5, 5, 6, 6, 7, 7, 7, 7, 10, 10, 10, 10,
- 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12,
- 12, 5, 5, 6, 6, 7, 7, 7, 7, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11,
- 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 8, 8, 8, 8, 9,
- 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11,
- 12, 12, 12, 12, 12, 12, 12, 12, 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10,
- 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12,
- 12, 12, 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 11,
- 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 8, 8, 8, 8,
- 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11,
- 11, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14,
- 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16,
- 16, 16, 16, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14,
- 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 13, 13, 13,
- 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15,
- 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 13, 13, 13, 13, 13, 13, 13, 13, 14,
- 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16,
- 16, 16, 16, 16, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14,
- 14, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 13, 13,
- 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15,
- 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 13, 13, 13, 13, 13, 13, 13, 13,
- 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16,
- 16, 16, 16, 16, 16, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14,
- 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 17,
- 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19,
- 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 17, 17, 17, 17, 17, 17, 17,
- 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19, 20, 20,
- 20, 20, 20, 20, 20, 20, 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18,
- 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20,
- 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19,
- 19, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 17, 17, 17, 17, 17, 17,
- 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19, 20,
- 20, 20, 20, 20, 20, 20, 20, 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18,
- 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20,
- 20, 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19,
- 19, 19, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 17, 17, 17, 17, 17,
- 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19,
- 20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22,
- 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24,
- 24, 24, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 23,
- 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 21, 21, 21, 21,
- 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23,
- 23, 24, 24, 24, 24, 24, 24, 24, 24, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22,
- 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24,
- 24, 24, 24, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22,
- 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 21, 21, 21,
- 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23,
- 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 21, 21, 21, 21, 21, 21, 21, 21, 22,
- 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24,
- 24, 24, 24, 24, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22,
- 22, 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24,
-};
-
-// The ctx offset table when TX is TX_CLASS_2D.
-// TX col and row indices are clamped to 4
-
-const int8_t av1_nz_map_ctx_offset_4x4[16] = {
- 0, 1, 6, 6, 1, 6, 6, 21, 6, 6, 21, 21, 6, 21, 21, 21,
-};
-
-const int8_t av1_nz_map_ctx_offset_8x8[64] = {
- 0, 1, 6, 6, 21, 21, 21, 21, 1, 6, 6, 21, 21, 21, 21, 21,
- 6, 6, 21, 21, 21, 21, 21, 21, 6, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
-};
-
-const int8_t av1_nz_map_ctx_offset_16x16[256] = {
- 0, 1, 6, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 1, 6, 6,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 6, 6, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 6, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21,
-};
-
-const int8_t av1_nz_map_ctx_offset_32x32[1024] = {
- 0, 1, 6, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 1, 6, 6, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 6, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
-};
-
-const int8_t av1_nz_map_ctx_offset_8x4[32] = {
- 0, 16, 6, 6, 21, 21, 21, 21, 16, 16, 6, 21, 21, 21, 21, 21,
- 16, 16, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21,
-};
-
-const int8_t av1_nz_map_ctx_offset_8x16[128] = {
- 0, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 6, 6, 21,
- 21, 21, 21, 21, 21, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
-};
-
-const int8_t av1_nz_map_ctx_offset_16x8[128] = {
- 0, 16, 6, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 6,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
-};
-
-const int8_t av1_nz_map_ctx_offset_16x32[512] = {
- 0, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
- 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 6, 6, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 6, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
-};
-
-const int8_t av1_nz_map_ctx_offset_32x16[512] = {
- 0, 16, 6, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 6, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
-};
-
-const int8_t av1_nz_map_ctx_offset_32x64[1024] = {
- 0, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
- 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
- 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
- 11, 11, 11, 11, 11, 11, 11, 6, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
-};
-
-const int8_t av1_nz_map_ctx_offset_64x32[1024] = {
- 0, 16, 6, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 6, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16,
- 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
-};
-
-const int8_t av1_nz_map_ctx_offset_4x16[64] = {
- 0, 11, 11, 11, 11, 11, 11, 11, 6, 6, 21, 21, 6, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
-};
-
-const int8_t av1_nz_map_ctx_offset_16x4[64] = {
- 0, 16, 6, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 16, 16, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
-};
-
-const int8_t av1_nz_map_ctx_offset_8x32[256] = {
- 0, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 6, 6, 21,
- 21, 21, 21, 21, 21, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21,
-};
-
-const int8_t av1_nz_map_ctx_offset_32x8[256] = {
- 0, 16, 6, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 6, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 16, 16, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21,
-};
-
-const int8_t *av1_nz_map_ctx_offset[19] = {
- av1_nz_map_ctx_offset_4x4, // TX_4x4
- av1_nz_map_ctx_offset_8x8, // TX_8x8
- av1_nz_map_ctx_offset_16x16, // TX_16x16
- av1_nz_map_ctx_offset_32x32, // TX_32x32
- av1_nz_map_ctx_offset_32x32, // TX_32x32
- av1_nz_map_ctx_offset_4x16, // TX_4x8
- av1_nz_map_ctx_offset_8x4, // TX_8x4
- av1_nz_map_ctx_offset_8x32, // TX_8x16
- av1_nz_map_ctx_offset_16x8, // TX_16x8
- av1_nz_map_ctx_offset_16x32, // TX_16x32
- av1_nz_map_ctx_offset_32x16, // TX_32x16
- av1_nz_map_ctx_offset_32x64, // TX_32x64
- av1_nz_map_ctx_offset_64x32, // TX_64x32
- av1_nz_map_ctx_offset_4x16, // TX_4x16
- av1_nz_map_ctx_offset_16x4, // TX_16x4
- av1_nz_map_ctx_offset_8x32, // TX_8x32
- av1_nz_map_ctx_offset_32x8, // TX_32x8
- av1_nz_map_ctx_offset_16x32, // TX_16x64
- av1_nz_map_ctx_offset_64x32, // TX_64x16
-};
-
-void av1_init_lv_map(AV1_COMMON *cm) {
- LV_MAP_CTX_TABLE *coeff_ctx_table = &cm->coeff_ctx_table;
- for (int row = 0; row < 2; ++row) {
- for (int col = 0; col < 2; ++col) {
- for (int sig_mag = 0; sig_mag < 3; ++sig_mag) {
- for (int count = 0; count < BASE_CONTEXT_POSITION_NUM + 1; ++count) {
- if (row == 0 && col == 0 && count > 5) continue;
- if ((row == 0 || col == 0) && count > 8) continue;
-
- coeff_ctx_table->base_ctx_table[row][col][sig_mag][count] =
- get_base_ctx_from_count_mag(row, col, count, sig_mag);
- }
- }
- }
- }
-}
-
-const int16_t k_eob_group_start[12] = { 0, 1, 2, 3, 5, 9,
- 17, 33, 65, 129, 257, 513 };
-const int16_t k_eob_offset_bits[12] = { 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
diff --git a/third_party/aom/av1/common/txb_common.h b/third_party/aom/av1/common/txb_common.h
deleted file mode 100644
index 1dda51f8b..000000000
--- a/third_party/aom/av1/common/txb_common.h
+++ /dev/null
@@ -1,424 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_COMMON_TXB_COMMON_H_
-#define AOM_AV1_COMMON_TXB_COMMON_H_
-
-extern const int16_t k_eob_group_start[12];
-extern const int16_t k_eob_offset_bits[12];
-
-extern const int8_t av1_coeff_band_4x4[16];
-
-extern const int8_t av1_coeff_band_8x8[64];
-
-extern const int8_t av1_coeff_band_16x16[256];
-
-extern const int8_t av1_coeff_band_32x32[1024];
-
-extern const int8_t *av1_nz_map_ctx_offset[TX_SIZES_ALL];
-
-typedef struct txb_ctx {
- int txb_skip_ctx;
- int dc_sign_ctx;
-} TXB_CTX;
-
-static const int base_level_count_to_index[13] = {
- 0, 0, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
-};
-
-static const TX_CLASS tx_type_to_class[TX_TYPES] = {
- TX_CLASS_2D, // DCT_DCT
- TX_CLASS_2D, // ADST_DCT
- TX_CLASS_2D, // DCT_ADST
- TX_CLASS_2D, // ADST_ADST
- TX_CLASS_2D, // FLIPADST_DCT
- TX_CLASS_2D, // DCT_FLIPADST
- TX_CLASS_2D, // FLIPADST_FLIPADST
- TX_CLASS_2D, // ADST_FLIPADST
- TX_CLASS_2D, // FLIPADST_ADST
- TX_CLASS_2D, // IDTX
- TX_CLASS_VERT, // V_DCT
- TX_CLASS_HORIZ, // H_DCT
- TX_CLASS_VERT, // V_ADST
- TX_CLASS_HORIZ, // H_ADST
- TX_CLASS_VERT, // V_FLIPADST
- TX_CLASS_HORIZ, // H_FLIPADST
-};
-
-static INLINE int get_txb_bwl(TX_SIZE tx_size) {
- tx_size = av1_get_adjusted_tx_size(tx_size);
- return tx_size_wide_log2[tx_size];
-}
-
-static INLINE int get_txb_wide(TX_SIZE tx_size) {
- tx_size = av1_get_adjusted_tx_size(tx_size);
- return tx_size_wide[tx_size];
-}
-
-static INLINE int get_txb_high(TX_SIZE tx_size) {
- tx_size = av1_get_adjusted_tx_size(tx_size);
- return tx_size_high[tx_size];
-}
-
-static INLINE uint8_t *set_levels(uint8_t *const levels_buf, const int width) {
- return levels_buf + TX_PAD_TOP * (width + TX_PAD_HOR);
-}
-
-static INLINE int get_padded_idx(const int idx, const int bwl) {
- return idx + ((idx >> bwl) << TX_PAD_HOR_LOG2);
-}
-
-static INLINE int get_base_ctx_from_count_mag(int row, int col, int count,
- int sig_mag) {
- const int ctx = base_level_count_to_index[count];
- int ctx_idx = -1;
-
- if (row == 0 && col == 0) {
- if (sig_mag >= 2) return ctx_idx = 0;
- if (sig_mag == 1) {
- if (count >= 2)
- ctx_idx = 1;
- else
- ctx_idx = 2;
-
- return ctx_idx;
- }
-
- ctx_idx = 3 + ctx;
- assert(ctx_idx <= 6);
- return ctx_idx;
- } else if (row == 0) {
- if (sig_mag >= 2) return ctx_idx = 6;
- if (sig_mag == 1) {
- if (count >= 2)
- ctx_idx = 7;
- else
- ctx_idx = 8;
- return ctx_idx;
- }
-
- ctx_idx = 9 + ctx;
- assert(ctx_idx <= 11);
- return ctx_idx;
- } else if (col == 0) {
- if (sig_mag >= 2) return ctx_idx = 12;
- if (sig_mag == 1) {
- if (count >= 2)
- ctx_idx = 13;
- else
- ctx_idx = 14;
-
- return ctx_idx;
- }
-
- ctx_idx = 15 + ctx;
- assert(ctx_idx <= 17);
- // TODO(angiebird): turn this on once the optimization is finalized
- // assert(ctx_idx < 28);
- } else {
- if (sig_mag >= 2) return ctx_idx = 18;
- if (sig_mag == 1) {
- if (count >= 2)
- ctx_idx = 19;
- else
- ctx_idx = 20;
- return ctx_idx;
- }
-
- ctx_idx = 21 + ctx;
-
- assert(ctx_idx <= 24);
- }
- return ctx_idx;
-}
-
-static INLINE int get_br_ctx_2d(const uint8_t *const levels,
- const int c, // raster order
- const int bwl) {
- assert(c > 0);
- const int row = c >> bwl;
- const int col = c - (row << bwl);
- const int stride = (1 << bwl) + TX_PAD_HOR;
- const int pos = row * stride + col;
- int mag = AOMMIN(levels[pos + 1], MAX_BASE_BR_RANGE) +
- AOMMIN(levels[pos + stride], MAX_BASE_BR_RANGE) +
- AOMMIN(levels[pos + 1 + stride], MAX_BASE_BR_RANGE);
- mag = AOMMIN((mag + 1) >> 1, 6);
- //((row | col) < 2) is equivalent to ((row < 2) && (col < 2))
- if ((row | col) < 2) return mag + 7;
- return mag + 14;
-}
-
-static AOM_FORCE_INLINE int get_br_ctx(const uint8_t *const levels,
- const int c, // raster order
- const int bwl, const TX_CLASS tx_class) {
- const int row = c >> bwl;
- const int col = c - (row << bwl);
- const int stride = (1 << bwl) + TX_PAD_HOR;
- const int pos = row * stride + col;
- int mag = levels[pos + 1];
- mag += levels[pos + stride];
- switch (tx_class) {
- case TX_CLASS_2D:
- mag += levels[pos + stride + 1];
- mag = AOMMIN((mag + 1) >> 1, 6);
- if (c == 0) return mag;
- if ((row < 2) && (col < 2)) return mag + 7;
- break;
- case TX_CLASS_HORIZ:
- mag += levels[pos + 2];
- mag = AOMMIN((mag + 1) >> 1, 6);
- if (c == 0) return mag;
- if (col == 0) return mag + 7;
- break;
- case TX_CLASS_VERT:
- mag += levels[pos + (stride << 1)];
- mag = AOMMIN((mag + 1) >> 1, 6);
- if (c == 0) return mag;
- if (row == 0) return mag + 7;
- break;
- default: break;
- }
-
- return mag + 14;
-}
-
-static const uint8_t clip_max3[256] = {
- 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
-};
-
-static AOM_FORCE_INLINE int get_nz_mag(const uint8_t *const levels,
- const int bwl, const TX_CLASS tx_class) {
- int mag;
-
- // Note: AOMMIN(level, 3) is useless for decoder since level < 3.
- mag = clip_max3[levels[1]]; // { 0, 1 }
- mag += clip_max3[levels[(1 << bwl) + TX_PAD_HOR]]; // { 1, 0 }
-
- if (tx_class == TX_CLASS_2D) {
- mag += clip_max3[levels[(1 << bwl) + TX_PAD_HOR + 1]]; // { 1, 1 }
- mag += clip_max3[levels[2]]; // { 0, 2 }
- mag += clip_max3[levels[(2 << bwl) + (2 << TX_PAD_HOR_LOG2)]]; // { 2, 0 }
- } else if (tx_class == TX_CLASS_VERT) {
- mag += clip_max3[levels[(2 << bwl) + (2 << TX_PAD_HOR_LOG2)]]; // { 2, 0 }
- mag += clip_max3[levels[(3 << bwl) + (3 << TX_PAD_HOR_LOG2)]]; // { 3, 0 }
- mag += clip_max3[levels[(4 << bwl) + (4 << TX_PAD_HOR_LOG2)]]; // { 4, 0 }
- } else {
- mag += clip_max3[levels[2]]; // { 0, 2 }
- mag += clip_max3[levels[3]]; // { 0, 3 }
- mag += clip_max3[levels[4]]; // { 0, 4 }
- }
-
- return mag;
-}
-
-#define NZ_MAP_CTX_0 SIG_COEF_CONTEXTS_2D
-#define NZ_MAP_CTX_5 (NZ_MAP_CTX_0 + 5)
-#define NZ_MAP_CTX_10 (NZ_MAP_CTX_0 + 10)
-
-static const int nz_map_ctx_offset_1d[32] = {
- NZ_MAP_CTX_0, NZ_MAP_CTX_5, NZ_MAP_CTX_10, NZ_MAP_CTX_10, NZ_MAP_CTX_10,
- NZ_MAP_CTX_10, NZ_MAP_CTX_10, NZ_MAP_CTX_10, NZ_MAP_CTX_10, NZ_MAP_CTX_10,
- NZ_MAP_CTX_10, NZ_MAP_CTX_10, NZ_MAP_CTX_10, NZ_MAP_CTX_10, NZ_MAP_CTX_10,
- NZ_MAP_CTX_10, NZ_MAP_CTX_10, NZ_MAP_CTX_10, NZ_MAP_CTX_10, NZ_MAP_CTX_10,
- NZ_MAP_CTX_10, NZ_MAP_CTX_10, NZ_MAP_CTX_10, NZ_MAP_CTX_10, NZ_MAP_CTX_10,
- NZ_MAP_CTX_10, NZ_MAP_CTX_10, NZ_MAP_CTX_10, NZ_MAP_CTX_10, NZ_MAP_CTX_10,
- NZ_MAP_CTX_10, NZ_MAP_CTX_10,
-};
-
-static AOM_FORCE_INLINE int get_nz_map_ctx_from_stats(
- const int stats,
- const int coeff_idx, // raster order
- const int bwl, const TX_SIZE tx_size, const TX_CLASS tx_class) {
- // tx_class == 0(TX_CLASS_2D)
- if ((tx_class | coeff_idx) == 0) return 0;
- int ctx = (stats + 1) >> 1;
- ctx = AOMMIN(ctx, 4);
- switch (tx_class) {
- case TX_CLASS_2D: {
- // This is the algorithm to generate av1_nz_map_ctx_offset[][]
- // const int width = tx_size_wide[tx_size];
- // const int height = tx_size_high[tx_size];
- // if (width < height) {
- // if (row < 2) return 11 + ctx;
- // } else if (width > height) {
- // if (col < 2) return 16 + ctx;
- // }
- // if (row + col < 2) return ctx + 1;
- // if (row + col < 4) return 5 + ctx + 1;
- // return 21 + ctx;
- return ctx + av1_nz_map_ctx_offset[tx_size][coeff_idx];
- }
- case TX_CLASS_HORIZ: {
- const int row = coeff_idx >> bwl;
- const int col = coeff_idx - (row << bwl);
- return ctx + nz_map_ctx_offset_1d[col];
- break;
- }
- case TX_CLASS_VERT: {
- const int row = coeff_idx >> bwl;
- return ctx + nz_map_ctx_offset_1d[row];
- break;
- }
- default: break;
- }
- return 0;
-}
-
-typedef aom_cdf_prob (*base_cdf_arr)[CDF_SIZE(4)];
-typedef aom_cdf_prob (*br_cdf_arr)[CDF_SIZE(BR_CDF_SIZE)];
-
-static INLINE int get_lower_levels_ctx_eob(int bwl, int height, int scan_idx) {
- if (scan_idx == 0) return 0;
- if (scan_idx <= (height << bwl) / 8) return 1;
- if (scan_idx <= (height << bwl) / 4) return 2;
- return 3;
-}
-
-static INLINE int get_lower_levels_ctx_2d(const uint8_t *levels, int coeff_idx,
- int bwl, TX_SIZE tx_size) {
- assert(coeff_idx > 0);
- int mag;
- // Note: AOMMIN(level, 3) is useless for decoder since level < 3.
- levels = levels + get_padded_idx(coeff_idx, bwl);
- mag = AOMMIN(levels[1], 3); // { 0, 1 }
- mag += AOMMIN(levels[(1 << bwl) + TX_PAD_HOR], 3); // { 1, 0 }
- mag += AOMMIN(levels[(1 << bwl) + TX_PAD_HOR + 1], 3); // { 1, 1 }
- mag += AOMMIN(levels[2], 3); // { 0, 2 }
- mag += AOMMIN(levels[(2 << bwl) + (2 << TX_PAD_HOR_LOG2)], 3); // { 2, 0 }
-
- const int ctx = AOMMIN((mag + 1) >> 1, 4);
- return ctx + av1_nz_map_ctx_offset[tx_size][coeff_idx];
-}
-static AOM_FORCE_INLINE int get_lower_levels_ctx(const uint8_t *levels,
- int coeff_idx, int bwl,
- TX_SIZE tx_size,
- TX_CLASS tx_class) {
- const int stats =
- get_nz_mag(levels + get_padded_idx(coeff_idx, bwl), bwl, tx_class);
- return get_nz_map_ctx_from_stats(stats, coeff_idx, bwl, tx_size, tx_class);
-}
-
-static INLINE int get_lower_levels_ctx_general(int is_last, int scan_idx,
- int bwl, int height,
- const uint8_t *levels,
- int coeff_idx, TX_SIZE tx_size,
- TX_CLASS tx_class) {
- if (is_last) {
- if (scan_idx == 0) return 0;
- if (scan_idx <= (height << bwl) >> 3) return 1;
- if (scan_idx <= (height << bwl) >> 2) return 2;
- return 3;
- }
- return get_lower_levels_ctx(levels, coeff_idx, bwl, tx_size, tx_class);
-}
-
-static INLINE void set_dc_sign(int *cul_level, int dc_val) {
- if (dc_val < 0)
- *cul_level |= 1 << COEFF_CONTEXT_BITS;
- else if (dc_val > 0)
- *cul_level += 2 << COEFF_CONTEXT_BITS;
-}
-
-static INLINE void get_txb_ctx(const BLOCK_SIZE plane_bsize,
- const TX_SIZE tx_size, const int plane,
- const ENTROPY_CONTEXT *const a,
- const ENTROPY_CONTEXT *const l,
- TXB_CTX *const txb_ctx) {
-#define MAX_TX_SIZE_UNIT 16
- static const int8_t signs[3] = { 0, -1, 1 };
- static const int8_t dc_sign_contexts[4 * MAX_TX_SIZE_UNIT + 1] = {
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
- };
- const int txb_w_unit = tx_size_wide_unit[tx_size];
- const int txb_h_unit = tx_size_high_unit[tx_size];
- int dc_sign = 0;
- int k = 0;
-
- do {
- const unsigned int sign = ((uint8_t)a[k]) >> COEFF_CONTEXT_BITS;
- assert(sign <= 2);
- dc_sign += signs[sign];
- } while (++k < txb_w_unit);
-
- k = 0;
- do {
- const unsigned int sign = ((uint8_t)l[k]) >> COEFF_CONTEXT_BITS;
- assert(sign <= 2);
- dc_sign += signs[sign];
- } while (++k < txb_h_unit);
-
- txb_ctx->dc_sign_ctx = dc_sign_contexts[dc_sign + 2 * MAX_TX_SIZE_UNIT];
-
- if (plane == 0) {
- if (plane_bsize == txsize_to_bsize[tx_size]) {
- txb_ctx->txb_skip_ctx = 0;
- } else {
- // This is the algorithm to generate table skip_contexts[min][max].
- // if (!max)
- // txb_skip_ctx = 1;
- // else if (!min)
- // txb_skip_ctx = 2 + (max > 3);
- // else if (max <= 3)
- // txb_skip_ctx = 4;
- // else if (min <= 3)
- // txb_skip_ctx = 5;
- // else
- // txb_skip_ctx = 6;
- static const uint8_t skip_contexts[5][5] = { { 1, 2, 2, 2, 3 },
- { 1, 4, 4, 4, 5 },
- { 1, 4, 4, 4, 5 },
- { 1, 4, 4, 4, 5 },
- { 1, 4, 4, 4, 6 } };
- int top = 0;
- int left = 0;
-
- k = 0;
- do {
- top |= a[k];
- } while (++k < txb_w_unit);
- top &= COEFF_CONTEXT_MASK;
-
- k = 0;
- do {
- left |= l[k];
- } while (++k < txb_h_unit);
- left &= COEFF_CONTEXT_MASK;
- const int max = AOMMIN(top | left, 4);
- const int min = AOMMIN(AOMMIN(top, left), 4);
-
- txb_ctx->txb_skip_ctx = skip_contexts[min][max];
- }
- } else {
- const int ctx_base = get_entropy_context(tx_size, a, l);
- const int ctx_offset = (num_pels_log2_lookup[plane_bsize] >
- num_pels_log2_lookup[txsize_to_bsize[tx_size]])
- ? 10
- : 7;
- txb_ctx->txb_skip_ctx = ctx_base + ctx_offset;
- }
-#undef MAX_TX_SIZE_UNIT
-}
-
-void av1_init_lv_map(AV1_COMMON *cm);
-
-#endif // AOM_AV1_COMMON_TXB_COMMON_H_
diff --git a/third_party/aom/av1/common/warped_motion.c b/third_party/aom/av1/common/warped_motion.c
deleted file mode 100644
index 4144c4389..000000000
--- a/third_party/aom/av1/common/warped_motion.c
+++ /dev/null
@@ -1,1148 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <memory.h>
-#include <math.h>
-#include <assert.h>
-
-#include "config/av1_rtcd.h"
-
-#include "av1/common/warped_motion.h"
-#include "av1/common/scale.h"
-
-#define WARP_ERROR_BLOCK 32
-
-/* clang-format off */
-static const int error_measure_lut[512] = {
- // pow 0.7
- 16384, 16339, 16294, 16249, 16204, 16158, 16113, 16068,
- 16022, 15977, 15932, 15886, 15840, 15795, 15749, 15703,
- 15657, 15612, 15566, 15520, 15474, 15427, 15381, 15335,
- 15289, 15242, 15196, 15149, 15103, 15056, 15010, 14963,
- 14916, 14869, 14822, 14775, 14728, 14681, 14634, 14587,
- 14539, 14492, 14445, 14397, 14350, 14302, 14254, 14206,
- 14159, 14111, 14063, 14015, 13967, 13918, 13870, 13822,
- 13773, 13725, 13676, 13628, 13579, 13530, 13481, 13432,
- 13383, 13334, 13285, 13236, 13187, 13137, 13088, 13038,
- 12988, 12939, 12889, 12839, 12789, 12739, 12689, 12639,
- 12588, 12538, 12487, 12437, 12386, 12335, 12285, 12234,
- 12183, 12132, 12080, 12029, 11978, 11926, 11875, 11823,
- 11771, 11719, 11667, 11615, 11563, 11511, 11458, 11406,
- 11353, 11301, 11248, 11195, 11142, 11089, 11036, 10982,
- 10929, 10875, 10822, 10768, 10714, 10660, 10606, 10552,
- 10497, 10443, 10388, 10333, 10279, 10224, 10168, 10113,
- 10058, 10002, 9947, 9891, 9835, 9779, 9723, 9666,
- 9610, 9553, 9497, 9440, 9383, 9326, 9268, 9211,
- 9153, 9095, 9037, 8979, 8921, 8862, 8804, 8745,
- 8686, 8627, 8568, 8508, 8449, 8389, 8329, 8269,
- 8208, 8148, 8087, 8026, 7965, 7903, 7842, 7780,
- 7718, 7656, 7593, 7531, 7468, 7405, 7341, 7278,
- 7214, 7150, 7086, 7021, 6956, 6891, 6826, 6760,
- 6695, 6628, 6562, 6495, 6428, 6361, 6293, 6225,
- 6157, 6089, 6020, 5950, 5881, 5811, 5741, 5670,
- 5599, 5527, 5456, 5383, 5311, 5237, 5164, 5090,
- 5015, 4941, 4865, 4789, 4713, 4636, 4558, 4480,
- 4401, 4322, 4242, 4162, 4080, 3998, 3916, 3832,
- 3748, 3663, 3577, 3490, 3402, 3314, 3224, 3133,
- 3041, 2948, 2854, 2758, 2661, 2562, 2461, 2359,
- 2255, 2148, 2040, 1929, 1815, 1698, 1577, 1452,
- 1323, 1187, 1045, 894, 731, 550, 339, 0,
- 339, 550, 731, 894, 1045, 1187, 1323, 1452,
- 1577, 1698, 1815, 1929, 2040, 2148, 2255, 2359,
- 2461, 2562, 2661, 2758, 2854, 2948, 3041, 3133,
- 3224, 3314, 3402, 3490, 3577, 3663, 3748, 3832,
- 3916, 3998, 4080, 4162, 4242, 4322, 4401, 4480,
- 4558, 4636, 4713, 4789, 4865, 4941, 5015, 5090,
- 5164, 5237, 5311, 5383, 5456, 5527, 5599, 5670,
- 5741, 5811, 5881, 5950, 6020, 6089, 6157, 6225,
- 6293, 6361, 6428, 6495, 6562, 6628, 6695, 6760,
- 6826, 6891, 6956, 7021, 7086, 7150, 7214, 7278,
- 7341, 7405, 7468, 7531, 7593, 7656, 7718, 7780,
- 7842, 7903, 7965, 8026, 8087, 8148, 8208, 8269,
- 8329, 8389, 8449, 8508, 8568, 8627, 8686, 8745,
- 8804, 8862, 8921, 8979, 9037, 9095, 9153, 9211,
- 9268, 9326, 9383, 9440, 9497, 9553, 9610, 9666,
- 9723, 9779, 9835, 9891, 9947, 10002, 10058, 10113,
- 10168, 10224, 10279, 10333, 10388, 10443, 10497, 10552,
- 10606, 10660, 10714, 10768, 10822, 10875, 10929, 10982,
- 11036, 11089, 11142, 11195, 11248, 11301, 11353, 11406,
- 11458, 11511, 11563, 11615, 11667, 11719, 11771, 11823,
- 11875, 11926, 11978, 12029, 12080, 12132, 12183, 12234,
- 12285, 12335, 12386, 12437, 12487, 12538, 12588, 12639,
- 12689, 12739, 12789, 12839, 12889, 12939, 12988, 13038,
- 13088, 13137, 13187, 13236, 13285, 13334, 13383, 13432,
- 13481, 13530, 13579, 13628, 13676, 13725, 13773, 13822,
- 13870, 13918, 13967, 14015, 14063, 14111, 14159, 14206,
- 14254, 14302, 14350, 14397, 14445, 14492, 14539, 14587,
- 14634, 14681, 14728, 14775, 14822, 14869, 14916, 14963,
- 15010, 15056, 15103, 15149, 15196, 15242, 15289, 15335,
- 15381, 15427, 15474, 15520, 15566, 15612, 15657, 15703,
- 15749, 15795, 15840, 15886, 15932, 15977, 16022, 16068,
- 16113, 16158, 16204, 16249, 16294, 16339, 16384, 16384,
-};
-/* clang-format on */
-
-// For warping, we really use a 6-tap filter, but we do blocks of 8 pixels
-// at a time. The zoom/rotation/shear in the model are applied to the
-// "fractional" position of each pixel, which therefore varies within
-// [-1, 2) * WARPEDPIXEL_PREC_SHIFTS.
-// We need an extra 2 taps to fit this in, for a total of 8 taps.
-/* clang-format off */
-const int16_t warped_filter[WARPEDPIXEL_PREC_SHIFTS * 3 + 1][8] = {
-#if WARPEDPIXEL_PREC_BITS == 6
- // [-1, 0)
- { 0, 0, 127, 1, 0, 0, 0, 0 }, { 0, - 1, 127, 2, 0, 0, 0, 0 },
- { 1, - 3, 127, 4, - 1, 0, 0, 0 }, { 1, - 4, 126, 6, - 2, 1, 0, 0 },
- { 1, - 5, 126, 8, - 3, 1, 0, 0 }, { 1, - 6, 125, 11, - 4, 1, 0, 0 },
- { 1, - 7, 124, 13, - 4, 1, 0, 0 }, { 2, - 8, 123, 15, - 5, 1, 0, 0 },
- { 2, - 9, 122, 18, - 6, 1, 0, 0 }, { 2, -10, 121, 20, - 6, 1, 0, 0 },
- { 2, -11, 120, 22, - 7, 2, 0, 0 }, { 2, -12, 119, 25, - 8, 2, 0, 0 },
- { 3, -13, 117, 27, - 8, 2, 0, 0 }, { 3, -13, 116, 29, - 9, 2, 0, 0 },
- { 3, -14, 114, 32, -10, 3, 0, 0 }, { 3, -15, 113, 35, -10, 2, 0, 0 },
- { 3, -15, 111, 37, -11, 3, 0, 0 }, { 3, -16, 109, 40, -11, 3, 0, 0 },
- { 3, -16, 108, 42, -12, 3, 0, 0 }, { 4, -17, 106, 45, -13, 3, 0, 0 },
- { 4, -17, 104, 47, -13, 3, 0, 0 }, { 4, -17, 102, 50, -14, 3, 0, 0 },
- { 4, -17, 100, 52, -14, 3, 0, 0 }, { 4, -18, 98, 55, -15, 4, 0, 0 },
- { 4, -18, 96, 58, -15, 3, 0, 0 }, { 4, -18, 94, 60, -16, 4, 0, 0 },
- { 4, -18, 91, 63, -16, 4, 0, 0 }, { 4, -18, 89, 65, -16, 4, 0, 0 },
- { 4, -18, 87, 68, -17, 4, 0, 0 }, { 4, -18, 85, 70, -17, 4, 0, 0 },
- { 4, -18, 82, 73, -17, 4, 0, 0 }, { 4, -18, 80, 75, -17, 4, 0, 0 },
- { 4, -18, 78, 78, -18, 4, 0, 0 }, { 4, -17, 75, 80, -18, 4, 0, 0 },
- { 4, -17, 73, 82, -18, 4, 0, 0 }, { 4, -17, 70, 85, -18, 4, 0, 0 },
- { 4, -17, 68, 87, -18, 4, 0, 0 }, { 4, -16, 65, 89, -18, 4, 0, 0 },
- { 4, -16, 63, 91, -18, 4, 0, 0 }, { 4, -16, 60, 94, -18, 4, 0, 0 },
- { 3, -15, 58, 96, -18, 4, 0, 0 }, { 4, -15, 55, 98, -18, 4, 0, 0 },
- { 3, -14, 52, 100, -17, 4, 0, 0 }, { 3, -14, 50, 102, -17, 4, 0, 0 },
- { 3, -13, 47, 104, -17, 4, 0, 0 }, { 3, -13, 45, 106, -17, 4, 0, 0 },
- { 3, -12, 42, 108, -16, 3, 0, 0 }, { 3, -11, 40, 109, -16, 3, 0, 0 },
- { 3, -11, 37, 111, -15, 3, 0, 0 }, { 2, -10, 35, 113, -15, 3, 0, 0 },
- { 3, -10, 32, 114, -14, 3, 0, 0 }, { 2, - 9, 29, 116, -13, 3, 0, 0 },
- { 2, - 8, 27, 117, -13, 3, 0, 0 }, { 2, - 8, 25, 119, -12, 2, 0, 0 },
- { 2, - 7, 22, 120, -11, 2, 0, 0 }, { 1, - 6, 20, 121, -10, 2, 0, 0 },
- { 1, - 6, 18, 122, - 9, 2, 0, 0 }, { 1, - 5, 15, 123, - 8, 2, 0, 0 },
- { 1, - 4, 13, 124, - 7, 1, 0, 0 }, { 1, - 4, 11, 125, - 6, 1, 0, 0 },
- { 1, - 3, 8, 126, - 5, 1, 0, 0 }, { 1, - 2, 6, 126, - 4, 1, 0, 0 },
- { 0, - 1, 4, 127, - 3, 1, 0, 0 }, { 0, 0, 2, 127, - 1, 0, 0, 0 },
-
- // [0, 1)
- { 0, 0, 0, 127, 1, 0, 0, 0}, { 0, 0, -1, 127, 2, 0, 0, 0},
- { 0, 1, -3, 127, 4, -2, 1, 0}, { 0, 1, -5, 127, 6, -2, 1, 0},
- { 0, 2, -6, 126, 8, -3, 1, 0}, {-1, 2, -7, 126, 11, -4, 2, -1},
- {-1, 3, -8, 125, 13, -5, 2, -1}, {-1, 3, -10, 124, 16, -6, 3, -1},
- {-1, 4, -11, 123, 18, -7, 3, -1}, {-1, 4, -12, 122, 20, -7, 3, -1},
- {-1, 4, -13, 121, 23, -8, 3, -1}, {-2, 5, -14, 120, 25, -9, 4, -1},
- {-1, 5, -15, 119, 27, -10, 4, -1}, {-1, 5, -16, 118, 30, -11, 4, -1},
- {-2, 6, -17, 116, 33, -12, 5, -1}, {-2, 6, -17, 114, 35, -12, 5, -1},
- {-2, 6, -18, 113, 38, -13, 5, -1}, {-2, 7, -19, 111, 41, -14, 6, -2},
- {-2, 7, -19, 110, 43, -15, 6, -2}, {-2, 7, -20, 108, 46, -15, 6, -2},
- {-2, 7, -20, 106, 49, -16, 6, -2}, {-2, 7, -21, 104, 51, -16, 7, -2},
- {-2, 7, -21, 102, 54, -17, 7, -2}, {-2, 8, -21, 100, 56, -18, 7, -2},
- {-2, 8, -22, 98, 59, -18, 7, -2}, {-2, 8, -22, 96, 62, -19, 7, -2},
- {-2, 8, -22, 94, 64, -19, 7, -2}, {-2, 8, -22, 91, 67, -20, 8, -2},
- {-2, 8, -22, 89, 69, -20, 8, -2}, {-2, 8, -22, 87, 72, -21, 8, -2},
- {-2, 8, -21, 84, 74, -21, 8, -2}, {-2, 8, -22, 82, 77, -21, 8, -2},
- {-2, 8, -21, 79, 79, -21, 8, -2}, {-2, 8, -21, 77, 82, -22, 8, -2},
- {-2, 8, -21, 74, 84, -21, 8, -2}, {-2, 8, -21, 72, 87, -22, 8, -2},
- {-2, 8, -20, 69, 89, -22, 8, -2}, {-2, 8, -20, 67, 91, -22, 8, -2},
- {-2, 7, -19, 64, 94, -22, 8, -2}, {-2, 7, -19, 62, 96, -22, 8, -2},
- {-2, 7, -18, 59, 98, -22, 8, -2}, {-2, 7, -18, 56, 100, -21, 8, -2},
- {-2, 7, -17, 54, 102, -21, 7, -2}, {-2, 7, -16, 51, 104, -21, 7, -2},
- {-2, 6, -16, 49, 106, -20, 7, -2}, {-2, 6, -15, 46, 108, -20, 7, -2},
- {-2, 6, -15, 43, 110, -19, 7, -2}, {-2, 6, -14, 41, 111, -19, 7, -2},
- {-1, 5, -13, 38, 113, -18, 6, -2}, {-1, 5, -12, 35, 114, -17, 6, -2},
- {-1, 5, -12, 33, 116, -17, 6, -2}, {-1, 4, -11, 30, 118, -16, 5, -1},
- {-1, 4, -10, 27, 119, -15, 5, -1}, {-1, 4, -9, 25, 120, -14, 5, -2},
- {-1, 3, -8, 23, 121, -13, 4, -1}, {-1, 3, -7, 20, 122, -12, 4, -1},
- {-1, 3, -7, 18, 123, -11, 4, -1}, {-1, 3, -6, 16, 124, -10, 3, -1},
- {-1, 2, -5, 13, 125, -8, 3, -1}, {-1, 2, -4, 11, 126, -7, 2, -1},
- { 0, 1, -3, 8, 126, -6, 2, 0}, { 0, 1, -2, 6, 127, -5, 1, 0},
- { 0, 1, -2, 4, 127, -3, 1, 0}, { 0, 0, 0, 2, 127, -1, 0, 0},
-
- // [1, 2)
- { 0, 0, 0, 1, 127, 0, 0, 0 }, { 0, 0, 0, - 1, 127, 2, 0, 0 },
- { 0, 0, 1, - 3, 127, 4, - 1, 0 }, { 0, 0, 1, - 4, 126, 6, - 2, 1 },
- { 0, 0, 1, - 5, 126, 8, - 3, 1 }, { 0, 0, 1, - 6, 125, 11, - 4, 1 },
- { 0, 0, 1, - 7, 124, 13, - 4, 1 }, { 0, 0, 2, - 8, 123, 15, - 5, 1 },
- { 0, 0, 2, - 9, 122, 18, - 6, 1 }, { 0, 0, 2, -10, 121, 20, - 6, 1 },
- { 0, 0, 2, -11, 120, 22, - 7, 2 }, { 0, 0, 2, -12, 119, 25, - 8, 2 },
- { 0, 0, 3, -13, 117, 27, - 8, 2 }, { 0, 0, 3, -13, 116, 29, - 9, 2 },
- { 0, 0, 3, -14, 114, 32, -10, 3 }, { 0, 0, 3, -15, 113, 35, -10, 2 },
- { 0, 0, 3, -15, 111, 37, -11, 3 }, { 0, 0, 3, -16, 109, 40, -11, 3 },
- { 0, 0, 3, -16, 108, 42, -12, 3 }, { 0, 0, 4, -17, 106, 45, -13, 3 },
- { 0, 0, 4, -17, 104, 47, -13, 3 }, { 0, 0, 4, -17, 102, 50, -14, 3 },
- { 0, 0, 4, -17, 100, 52, -14, 3 }, { 0, 0, 4, -18, 98, 55, -15, 4 },
- { 0, 0, 4, -18, 96, 58, -15, 3 }, { 0, 0, 4, -18, 94, 60, -16, 4 },
- { 0, 0, 4, -18, 91, 63, -16, 4 }, { 0, 0, 4, -18, 89, 65, -16, 4 },
- { 0, 0, 4, -18, 87, 68, -17, 4 }, { 0, 0, 4, -18, 85, 70, -17, 4 },
- { 0, 0, 4, -18, 82, 73, -17, 4 }, { 0, 0, 4, -18, 80, 75, -17, 4 },
- { 0, 0, 4, -18, 78, 78, -18, 4 }, { 0, 0, 4, -17, 75, 80, -18, 4 },
- { 0, 0, 4, -17, 73, 82, -18, 4 }, { 0, 0, 4, -17, 70, 85, -18, 4 },
- { 0, 0, 4, -17, 68, 87, -18, 4 }, { 0, 0, 4, -16, 65, 89, -18, 4 },
- { 0, 0, 4, -16, 63, 91, -18, 4 }, { 0, 0, 4, -16, 60, 94, -18, 4 },
- { 0, 0, 3, -15, 58, 96, -18, 4 }, { 0, 0, 4, -15, 55, 98, -18, 4 },
- { 0, 0, 3, -14, 52, 100, -17, 4 }, { 0, 0, 3, -14, 50, 102, -17, 4 },
- { 0, 0, 3, -13, 47, 104, -17, 4 }, { 0, 0, 3, -13, 45, 106, -17, 4 },
- { 0, 0, 3, -12, 42, 108, -16, 3 }, { 0, 0, 3, -11, 40, 109, -16, 3 },
- { 0, 0, 3, -11, 37, 111, -15, 3 }, { 0, 0, 2, -10, 35, 113, -15, 3 },
- { 0, 0, 3, -10, 32, 114, -14, 3 }, { 0, 0, 2, - 9, 29, 116, -13, 3 },
- { 0, 0, 2, - 8, 27, 117, -13, 3 }, { 0, 0, 2, - 8, 25, 119, -12, 2 },
- { 0, 0, 2, - 7, 22, 120, -11, 2 }, { 0, 0, 1, - 6, 20, 121, -10, 2 },
- { 0, 0, 1, - 6, 18, 122, - 9, 2 }, { 0, 0, 1, - 5, 15, 123, - 8, 2 },
- { 0, 0, 1, - 4, 13, 124, - 7, 1 }, { 0, 0, 1, - 4, 11, 125, - 6, 1 },
- { 0, 0, 1, - 3, 8, 126, - 5, 1 }, { 0, 0, 1, - 2, 6, 126, - 4, 1 },
- { 0, 0, 0, - 1, 4, 127, - 3, 1 }, { 0, 0, 0, 0, 2, 127, - 1, 0 },
- // dummy (replicate row index 191)
- { 0, 0, 0, 0, 2, 127, - 1, 0 },
-
-#elif WARPEDPIXEL_PREC_BITS == 5
- // [-1, 0)
- {0, 0, 127, 1, 0, 0, 0, 0}, {1, -3, 127, 4, -1, 0, 0, 0},
- {1, -5, 126, 8, -3, 1, 0, 0}, {1, -7, 124, 13, -4, 1, 0, 0},
- {2, -9, 122, 18, -6, 1, 0, 0}, {2, -11, 120, 22, -7, 2, 0, 0},
- {3, -13, 117, 27, -8, 2, 0, 0}, {3, -14, 114, 32, -10, 3, 0, 0},
- {3, -15, 111, 37, -11, 3, 0, 0}, {3, -16, 108, 42, -12, 3, 0, 0},
- {4, -17, 104, 47, -13, 3, 0, 0}, {4, -17, 100, 52, -14, 3, 0, 0},
- {4, -18, 96, 58, -15, 3, 0, 0}, {4, -18, 91, 63, -16, 4, 0, 0},
- {4, -18, 87, 68, -17, 4, 0, 0}, {4, -18, 82, 73, -17, 4, 0, 0},
- {4, -18, 78, 78, -18, 4, 0, 0}, {4, -17, 73, 82, -18, 4, 0, 0},
- {4, -17, 68, 87, -18, 4, 0, 0}, {4, -16, 63, 91, -18, 4, 0, 0},
- {3, -15, 58, 96, -18, 4, 0, 0}, {3, -14, 52, 100, -17, 4, 0, 0},
- {3, -13, 47, 104, -17, 4, 0, 0}, {3, -12, 42, 108, -16, 3, 0, 0},
- {3, -11, 37, 111, -15, 3, 0, 0}, {3, -10, 32, 114, -14, 3, 0, 0},
- {2, -8, 27, 117, -13, 3, 0, 0}, {2, -7, 22, 120, -11, 2, 0, 0},
- {1, -6, 18, 122, -9, 2, 0, 0}, {1, -4, 13, 124, -7, 1, 0, 0},
- {1, -3, 8, 126, -5, 1, 0, 0}, {0, -1, 4, 127, -3, 1, 0, 0},
- // [0, 1)
- { 0, 0, 0, 127, 1, 0, 0, 0}, { 0, 1, -3, 127, 4, -2, 1, 0},
- { 0, 2, -6, 126, 8, -3, 1, 0}, {-1, 3, -8, 125, 13, -5, 2, -1},
- {-1, 4, -11, 123, 18, -7, 3, -1}, {-1, 4, -13, 121, 23, -8, 3, -1},
- {-1, 5, -15, 119, 27, -10, 4, -1}, {-2, 6, -17, 116, 33, -12, 5, -1},
- {-2, 6, -18, 113, 38, -13, 5, -1}, {-2, 7, -19, 110, 43, -15, 6, -2},
- {-2, 7, -20, 106, 49, -16, 6, -2}, {-2, 7, -21, 102, 54, -17, 7, -2},
- {-2, 8, -22, 98, 59, -18, 7, -2}, {-2, 8, -22, 94, 64, -19, 7, -2},
- {-2, 8, -22, 89, 69, -20, 8, -2}, {-2, 8, -21, 84, 74, -21, 8, -2},
- {-2, 8, -21, 79, 79, -21, 8, -2}, {-2, 8, -21, 74, 84, -21, 8, -2},
- {-2, 8, -20, 69, 89, -22, 8, -2}, {-2, 7, -19, 64, 94, -22, 8, -2},
- {-2, 7, -18, 59, 98, -22, 8, -2}, {-2, 7, -17, 54, 102, -21, 7, -2},
- {-2, 6, -16, 49, 106, -20, 7, -2}, {-2, 6, -15, 43, 110, -19, 7, -2},
- {-1, 5, -13, 38, 113, -18, 6, -2}, {-1, 5, -12, 33, 116, -17, 6, -2},
- {-1, 4, -10, 27, 119, -15, 5, -1}, {-1, 3, -8, 23, 121, -13, 4, -1},
- {-1, 3, -7, 18, 123, -11, 4, -1}, {-1, 2, -5, 13, 125, -8, 3, -1},
- { 0, 1, -3, 8, 126, -6, 2, 0}, { 0, 1, -2, 4, 127, -3, 1, 0},
- // [1, 2)
- {0, 0, 0, 1, 127, 0, 0, 0}, {0, 0, 1, -3, 127, 4, -1, 0},
- {0, 0, 1, -5, 126, 8, -3, 1}, {0, 0, 1, -7, 124, 13, -4, 1},
- {0, 0, 2, -9, 122, 18, -6, 1}, {0, 0, 2, -11, 120, 22, -7, 2},
- {0, 0, 3, -13, 117, 27, -8, 2}, {0, 0, 3, -14, 114, 32, -10, 3},
- {0, 0, 3, -15, 111, 37, -11, 3}, {0, 0, 3, -16, 108, 42, -12, 3},
- {0, 0, 4, -17, 104, 47, -13, 3}, {0, 0, 4, -17, 100, 52, -14, 3},
- {0, 0, 4, -18, 96, 58, -15, 3}, {0, 0, 4, -18, 91, 63, -16, 4},
- {0, 0, 4, -18, 87, 68, -17, 4}, {0, 0, 4, -18, 82, 73, -17, 4},
- {0, 0, 4, -18, 78, 78, -18, 4}, {0, 0, 4, -17, 73, 82, -18, 4},
- {0, 0, 4, -17, 68, 87, -18, 4}, {0, 0, 4, -16, 63, 91, -18, 4},
- {0, 0, 3, -15, 58, 96, -18, 4}, {0, 0, 3, -14, 52, 100, -17, 4},
- {0, 0, 3, -13, 47, 104, -17, 4}, {0, 0, 3, -12, 42, 108, -16, 3},
- {0, 0, 3, -11, 37, 111, -15, 3}, {0, 0, 3, -10, 32, 114, -14, 3},
- {0, 0, 2, -8, 27, 117, -13, 3}, {0, 0, 2, -7, 22, 120, -11, 2},
- {0, 0, 1, -6, 18, 122, -9, 2}, {0, 0, 1, -4, 13, 124, -7, 1},
- {0, 0, 1, -3, 8, 126, -5, 1}, {0, 0, 0, -1, 4, 127, -3, 1},
- // dummy (replicate row index 95)
- {0, 0, 0, -1, 4, 127, -3, 1},
-
-#endif // WARPEDPIXEL_PREC_BITS == 6
-};
-
-/* clang-format on */
-
-#define DIV_LUT_PREC_BITS 14
-#define DIV_LUT_BITS 8
-#define DIV_LUT_NUM (1 << DIV_LUT_BITS)
-
-static const uint16_t div_lut[DIV_LUT_NUM + 1] = {
- 16384, 16320, 16257, 16194, 16132, 16070, 16009, 15948, 15888, 15828, 15768,
- 15709, 15650, 15592, 15534, 15477, 15420, 15364, 15308, 15252, 15197, 15142,
- 15087, 15033, 14980, 14926, 14873, 14821, 14769, 14717, 14665, 14614, 14564,
- 14513, 14463, 14413, 14364, 14315, 14266, 14218, 14170, 14122, 14075, 14028,
- 13981, 13935, 13888, 13843, 13797, 13752, 13707, 13662, 13618, 13574, 13530,
- 13487, 13443, 13400, 13358, 13315, 13273, 13231, 13190, 13148, 13107, 13066,
- 13026, 12985, 12945, 12906, 12866, 12827, 12788, 12749, 12710, 12672, 12633,
- 12596, 12558, 12520, 12483, 12446, 12409, 12373, 12336, 12300, 12264, 12228,
- 12193, 12157, 12122, 12087, 12053, 12018, 11984, 11950, 11916, 11882, 11848,
- 11815, 11782, 11749, 11716, 11683, 11651, 11619, 11586, 11555, 11523, 11491,
- 11460, 11429, 11398, 11367, 11336, 11305, 11275, 11245, 11215, 11185, 11155,
- 11125, 11096, 11067, 11038, 11009, 10980, 10951, 10923, 10894, 10866, 10838,
- 10810, 10782, 10755, 10727, 10700, 10673, 10645, 10618, 10592, 10565, 10538,
- 10512, 10486, 10460, 10434, 10408, 10382, 10356, 10331, 10305, 10280, 10255,
- 10230, 10205, 10180, 10156, 10131, 10107, 10082, 10058, 10034, 10010, 9986,
- 9963, 9939, 9916, 9892, 9869, 9846, 9823, 9800, 9777, 9754, 9732,
- 9709, 9687, 9664, 9642, 9620, 9598, 9576, 9554, 9533, 9511, 9489,
- 9468, 9447, 9425, 9404, 9383, 9362, 9341, 9321, 9300, 9279, 9259,
- 9239, 9218, 9198, 9178, 9158, 9138, 9118, 9098, 9079, 9059, 9039,
- 9020, 9001, 8981, 8962, 8943, 8924, 8905, 8886, 8867, 8849, 8830,
- 8812, 8793, 8775, 8756, 8738, 8720, 8702, 8684, 8666, 8648, 8630,
- 8613, 8595, 8577, 8560, 8542, 8525, 8508, 8490, 8473, 8456, 8439,
- 8422, 8405, 8389, 8372, 8355, 8339, 8322, 8306, 8289, 8273, 8257,
- 8240, 8224, 8208, 8192,
-};
-
-// Decomposes a divisor D such that 1/D = y/2^shift, where y is returned
-// at precision of DIV_LUT_PREC_BITS along with the shift.
-static int16_t resolve_divisor_64(uint64_t D, int16_t *shift) {
- int64_t f;
- *shift = (int16_t)((D >> 32) ? get_msb((unsigned int)(D >> 32)) + 32
- : get_msb((unsigned int)D));
- // e is obtained from D after resetting the most significant 1 bit.
- const int64_t e = D - ((uint64_t)1 << *shift);
- // Get the most significant DIV_LUT_BITS (8) bits of e into f
- if (*shift > DIV_LUT_BITS)
- f = ROUND_POWER_OF_TWO_64(e, *shift - DIV_LUT_BITS);
- else
- f = e << (DIV_LUT_BITS - *shift);
- assert(f <= DIV_LUT_NUM);
- *shift += DIV_LUT_PREC_BITS;
- // Use f as lookup into the precomputed table of multipliers
- return div_lut[f];
-}
-
-static int16_t resolve_divisor_32(uint32_t D, int16_t *shift) {
- int32_t f;
- *shift = get_msb(D);
- // e is obtained from D after resetting the most significant 1 bit.
- const int32_t e = D - ((uint32_t)1 << *shift);
- // Get the most significant DIV_LUT_BITS (8) bits of e into f
- if (*shift > DIV_LUT_BITS)
- f = ROUND_POWER_OF_TWO(e, *shift - DIV_LUT_BITS);
- else
- f = e << (DIV_LUT_BITS - *shift);
- assert(f <= DIV_LUT_NUM);
- *shift += DIV_LUT_PREC_BITS;
- // Use f as lookup into the precomputed table of multipliers
- return div_lut[f];
-}
-
-static int is_affine_valid(const WarpedMotionParams *const wm) {
- const int32_t *mat = wm->wmmat;
- return (mat[2] > 0);
-}
-
-static int is_affine_shear_allowed(int16_t alpha, int16_t beta, int16_t gamma,
- int16_t delta) {
- if ((4 * abs(alpha) + 7 * abs(beta) >= (1 << WARPEDMODEL_PREC_BITS)) ||
- (4 * abs(gamma) + 4 * abs(delta) >= (1 << WARPEDMODEL_PREC_BITS)))
- return 0;
- else
- return 1;
-}
-
-// Returns 1 on success or 0 on an invalid affine set
-int get_shear_params(WarpedMotionParams *wm) {
- const int32_t *mat = wm->wmmat;
- if (!is_affine_valid(wm)) return 0;
- wm->alpha =
- clamp(mat[2] - (1 << WARPEDMODEL_PREC_BITS), INT16_MIN, INT16_MAX);
- wm->beta = clamp(mat[3], INT16_MIN, INT16_MAX);
- int16_t shift;
- int16_t y = resolve_divisor_32(abs(mat[2]), &shift) * (mat[2] < 0 ? -1 : 1);
- int64_t v = ((int64_t)mat[4] * (1 << WARPEDMODEL_PREC_BITS)) * y;
- wm->gamma =
- clamp((int)ROUND_POWER_OF_TWO_SIGNED_64(v, shift), INT16_MIN, INT16_MAX);
- v = ((int64_t)mat[3] * mat[4]) * y;
- wm->delta = clamp(mat[5] - (int)ROUND_POWER_OF_TWO_SIGNED_64(v, shift) -
- (1 << WARPEDMODEL_PREC_BITS),
- INT16_MIN, INT16_MAX);
-
- wm->alpha = ROUND_POWER_OF_TWO_SIGNED(wm->alpha, WARP_PARAM_REDUCE_BITS) *
- (1 << WARP_PARAM_REDUCE_BITS);
- wm->beta = ROUND_POWER_OF_TWO_SIGNED(wm->beta, WARP_PARAM_REDUCE_BITS) *
- (1 << WARP_PARAM_REDUCE_BITS);
- wm->gamma = ROUND_POWER_OF_TWO_SIGNED(wm->gamma, WARP_PARAM_REDUCE_BITS) *
- (1 << WARP_PARAM_REDUCE_BITS);
- wm->delta = ROUND_POWER_OF_TWO_SIGNED(wm->delta, WARP_PARAM_REDUCE_BITS) *
- (1 << WARP_PARAM_REDUCE_BITS);
-
- if (!is_affine_shear_allowed(wm->alpha, wm->beta, wm->gamma, wm->delta))
- return 0;
-
- return 1;
-}
-
-static INLINE int highbd_error_measure(int err, int bd) {
- const int b = bd - 8;
- const int bmask = (1 << b) - 1;
- const int v = (1 << b);
- err = abs(err);
- const int e1 = err >> b;
- const int e2 = err & bmask;
- return error_measure_lut[255 + e1] * (v - e2) +
- error_measure_lut[256 + e1] * e2;
-}
-
-/* Note: For an explanation of the warp algorithm, and some notes on bit widths
- for hardware implementations, see the comments above av1_warp_affine_c
-*/
-void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref,
- int width, int height, int stride, uint16_t *pred,
- int p_col, int p_row, int p_width, int p_height,
- int p_stride, int subsampling_x,
- int subsampling_y, int bd,
- ConvolveParams *conv_params, int16_t alpha,
- int16_t beta, int16_t gamma, int16_t delta) {
- int32_t tmp[15 * 8];
- const int reduce_bits_horiz =
- conv_params->round_0 +
- AOMMAX(bd + FILTER_BITS - conv_params->round_0 - 14, 0);
- const int reduce_bits_vert = conv_params->is_compound
- ? conv_params->round_1
- : 2 * FILTER_BITS - reduce_bits_horiz;
- const int max_bits_horiz = bd + FILTER_BITS + 1 - reduce_bits_horiz;
- const int offset_bits_horiz = bd + FILTER_BITS - 1;
- const int offset_bits_vert = bd + 2 * FILTER_BITS - reduce_bits_horiz;
- const int round_bits =
- 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
- const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
- (void)max_bits_horiz;
- assert(IMPLIES(conv_params->is_compound, conv_params->dst != NULL));
-
- for (int i = p_row; i < p_row + p_height; i += 8) {
- for (int j = p_col; j < p_col + p_width; j += 8) {
- // Calculate the center of this 8x8 block,
- // project to luma coordinates (if in a subsampled chroma plane),
- // apply the affine transformation,
- // then convert back to the original coordinates (if necessary)
- const int32_t src_x = (j + 4) << subsampling_x;
- const int32_t src_y = (i + 4) << subsampling_y;
- const int32_t dst_x = mat[2] * src_x + mat[3] * src_y + mat[0];
- const int32_t dst_y = mat[4] * src_x + mat[5] * src_y + mat[1];
- const int32_t x4 = dst_x >> subsampling_x;
- const int32_t y4 = dst_y >> subsampling_y;
-
- const int32_t ix4 = x4 >> WARPEDMODEL_PREC_BITS;
- int32_t sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
- const int32_t iy4 = y4 >> WARPEDMODEL_PREC_BITS;
- int32_t sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
-
- sx4 += alpha * (-4) + beta * (-4);
- sy4 += gamma * (-4) + delta * (-4);
-
- sx4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1);
- sy4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1);
-
- // Horizontal filter
- for (int k = -7; k < 8; ++k) {
- const int iy = clamp(iy4 + k, 0, height - 1);
-
- int sx = sx4 + beta * (k + 4);
- for (int l = -4; l < 4; ++l) {
- int ix = ix4 + l - 3;
- const int offs = ROUND_POWER_OF_TWO(sx, WARPEDDIFF_PREC_BITS) +
- WARPEDPIXEL_PREC_SHIFTS;
- assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3);
- const int16_t *coeffs = warped_filter[offs];
-
- int32_t sum = 1 << offset_bits_horiz;
- for (int m = 0; m < 8; ++m) {
- const int sample_x = clamp(ix + m, 0, width - 1);
- sum += ref[iy * stride + sample_x] * coeffs[m];
- }
- sum = ROUND_POWER_OF_TWO(sum, reduce_bits_horiz);
- assert(0 <= sum && sum < (1 << max_bits_horiz));
- tmp[(k + 7) * 8 + (l + 4)] = sum;
- sx += alpha;
- }
- }
-
- // Vertical filter
- for (int k = -4; k < AOMMIN(4, p_row + p_height - i - 4); ++k) {
- int sy = sy4 + delta * (k + 4);
- for (int l = -4; l < AOMMIN(4, p_col + p_width - j - 4); ++l) {
- const int offs = ROUND_POWER_OF_TWO(sy, WARPEDDIFF_PREC_BITS) +
- WARPEDPIXEL_PREC_SHIFTS;
- assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3);
- const int16_t *coeffs = warped_filter[offs];
-
- int32_t sum = 1 << offset_bits_vert;
- for (int m = 0; m < 8; ++m) {
- sum += tmp[(k + m + 4) * 8 + (l + 4)] * coeffs[m];
- }
-
- if (conv_params->is_compound) {
- CONV_BUF_TYPE *p =
- &conv_params
- ->dst[(i - p_row + k + 4) * conv_params->dst_stride +
- (j - p_col + l + 4)];
- sum = ROUND_POWER_OF_TWO(sum, reduce_bits_vert);
- if (conv_params->do_average) {
- uint16_t *dst16 =
- &pred[(i - p_row + k + 4) * p_stride + (j - p_col + l + 4)];
- int32_t tmp32 = *p;
- if (conv_params->use_jnt_comp_avg) {
- tmp32 = tmp32 * conv_params->fwd_offset +
- sum * conv_params->bck_offset;
- tmp32 = tmp32 >> DIST_PRECISION_BITS;
- } else {
- tmp32 += sum;
- tmp32 = tmp32 >> 1;
- }
- tmp32 = tmp32 - (1 << (offset_bits - conv_params->round_1)) -
- (1 << (offset_bits - conv_params->round_1 - 1));
- *dst16 =
- clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp32, round_bits), bd);
- } else {
- *p = sum;
- }
- } else {
- uint16_t *p =
- &pred[(i - p_row + k + 4) * p_stride + (j - p_col + l + 4)];
- sum = ROUND_POWER_OF_TWO(sum, reduce_bits_vert);
- assert(0 <= sum && sum < (1 << (bd + 2)));
- *p = clip_pixel_highbd(sum - (1 << (bd - 1)) - (1 << bd), bd);
- }
- sy += gamma;
- }
- }
- }
- }
-}
-
-static void highbd_warp_plane(WarpedMotionParams *wm, const uint8_t *const ref8,
- int width, int height, int stride,
- const uint8_t *const pred8, int p_col, int p_row,
- int p_width, int p_height, int p_stride,
- int subsampling_x, int subsampling_y, int bd,
- ConvolveParams *conv_params) {
- assert(wm->wmtype <= AFFINE);
- if (wm->wmtype == ROTZOOM) {
- wm->wmmat[5] = wm->wmmat[2];
- wm->wmmat[4] = -wm->wmmat[3];
- }
- const int32_t *const mat = wm->wmmat;
- const int16_t alpha = wm->alpha;
- const int16_t beta = wm->beta;
- const int16_t gamma = wm->gamma;
- const int16_t delta = wm->delta;
-
- const uint16_t *const ref = CONVERT_TO_SHORTPTR(ref8);
- uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
- av1_highbd_warp_affine(mat, ref, width, height, stride, pred, p_col, p_row,
- p_width, p_height, p_stride, subsampling_x,
- subsampling_y, bd, conv_params, alpha, beta, gamma,
- delta);
-}
-
-static int64_t highbd_frame_error(const uint16_t *const ref, int stride,
- const uint16_t *const dst, int p_width,
- int p_height, int p_stride, int bd) {
- int64_t sum_error = 0;
- for (int i = 0; i < p_height; ++i) {
- for (int j = 0; j < p_width; ++j) {
- sum_error +=
- highbd_error_measure(dst[j + i * p_stride] - ref[j + i * stride], bd);
- }
- }
- return sum_error;
-}
-
-static int64_t highbd_warp_error(
- WarpedMotionParams *wm, const uint8_t *const ref8, int width, int height,
- int stride, const uint8_t *const dst8, int p_col, int p_row, int p_width,
- int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd,
- int64_t best_error) {
- int64_t gm_sumerr = 0;
- const int error_bsize_w = AOMMIN(p_width, WARP_ERROR_BLOCK);
- const int error_bsize_h = AOMMIN(p_height, WARP_ERROR_BLOCK);
- uint16_t tmp[WARP_ERROR_BLOCK * WARP_ERROR_BLOCK];
-
- ConvolveParams conv_params = get_conv_params(0, 0, bd);
- conv_params.use_jnt_comp_avg = 0;
- for (int i = p_row; i < p_row + p_height; i += WARP_ERROR_BLOCK) {
- for (int j = p_col; j < p_col + p_width; j += WARP_ERROR_BLOCK) {
- // avoid warping extra 8x8 blocks in the padded region of the frame
- // when p_width and p_height are not multiples of WARP_ERROR_BLOCK
- const int warp_w = AOMMIN(error_bsize_w, p_col + p_width - j);
- const int warp_h = AOMMIN(error_bsize_h, p_row + p_height - i);
- highbd_warp_plane(wm, ref8, width, height, stride,
- CONVERT_TO_BYTEPTR(tmp), j, i, warp_w, warp_h,
- WARP_ERROR_BLOCK, subsampling_x, subsampling_y, bd,
- &conv_params);
-
- gm_sumerr += highbd_frame_error(
- tmp, WARP_ERROR_BLOCK, CONVERT_TO_SHORTPTR(dst8) + j + i * p_stride,
- warp_w, warp_h, p_stride, bd);
- if (gm_sumerr > best_error) return gm_sumerr;
- }
- }
- return gm_sumerr;
-}
-
-static INLINE int error_measure(int err) {
- return error_measure_lut[255 + err];
-}
-
-/* The warp filter for ROTZOOM and AFFINE models works as follows:
- * Split the input into 8x8 blocks
- * For each block, project the point (4, 4) within the block, to get the
- overall block position. Split into integer and fractional coordinates,
- maintaining full WARPEDMODEL precision
- * Filter horizontally: Generate 15 rows of 8 pixels each. Each pixel gets a
- variable horizontal offset. This means that, while the rows of the
- intermediate buffer align with the rows of the *reference* image, the
- columns align with the columns of the *destination* image.
- * Filter vertically: Generate the output block (up to 8x8 pixels, but if the
- destination is too small we crop the output at this stage). Each pixel has
- a variable vertical offset, so that the resulting rows are aligned with
- the rows of the destination image.
-
- To accomplish these alignments, we factor the warp matrix as a
- product of two shear / asymmetric zoom matrices:
- / a b \ = / 1 0 \ * / 1+alpha beta \
- \ c d / \ gamma 1+delta / \ 0 1 /
- where a, b, c, d are wmmat[2], wmmat[3], wmmat[4], wmmat[5] respectively.
- The horizontal shear (with alpha and beta) is applied first,
- then the vertical shear (with gamma and delta) is applied second.
-
- The only limitation is that, to fit this in a fixed 8-tap filter size,
- the fractional pixel offsets must be at most +-1. Since the horizontal filter
- generates 15 rows of 8 columns, and the initial point we project is at (4, 4)
- within the block, the parameters must satisfy
- 4 * |alpha| + 7 * |beta| <= 1 and 4 * |gamma| + 4 * |delta| <= 1
- for this filter to be applicable.
-
- Note: This function assumes that the caller has done all of the relevant
- checks, ie. that we have a ROTZOOM or AFFINE model, that wm[4] and wm[5]
- are set appropriately (if using a ROTZOOM model), and that alpha, beta,
- gamma, delta are all in range.
-
- TODO(david.barker): Maybe support scaled references?
-*/
-/* A note on hardware implementation:
- The warp filter is intended to be implementable using the same hardware as
- the high-precision convolve filters from the loop-restoration and
- convolve-round experiments.
-
- For a single filter stage, considering all of the coefficient sets for the
- warp filter and the regular convolution filter, an input in the range
- [0, 2^k - 1] is mapped into the range [-56 * (2^k - 1), 184 * (2^k - 1)]
- before rounding.
-
- Allowing for some changes to the filter coefficient sets, call the range
- [-64 * 2^k, 192 * 2^k]. Then, if we initialize the accumulator to 64 * 2^k,
- we can replace this by the range [0, 256 * 2^k], which can be stored in an
- unsigned value with 8 + k bits.
-
- This allows the derivation of the appropriate bit widths and offsets for
- the various intermediate values: If
-
- F := FILTER_BITS = 7 (or else the above ranges need adjusting)
- So a *single* filter stage maps a k-bit input to a (k + F + 1)-bit
- intermediate value.
- H := ROUND0_BITS
- V := VERSHEAR_REDUCE_PREC_BITS
- (and note that we must have H + V = 2*F for the output to have the same
- scale as the input)
-
- then we end up with the following offsets and ranges:
- Horizontal filter: Apply an offset of 1 << (bd + F - 1), sum fits into a
- uint{bd + F + 1}
- After rounding: The values stored in 'tmp' fit into a uint{bd + F + 1 - H}.
- Vertical filter: Apply an offset of 1 << (bd + 2*F - H), sum fits into a
- uint{bd + 2*F + 2 - H}
- After rounding: The final value, before undoing the offset, fits into a
- uint{bd + 2}.
-
- Then we need to undo the offsets before clamping to a pixel. Note that,
- if we do this at the end, the amount to subtract is actually independent
- of H and V:
-
- offset to subtract = (1 << ((bd + F - 1) - H + F - V)) +
- (1 << ((bd + 2*F - H) - V))
- == (1 << (bd - 1)) + (1 << bd)
-
- This allows us to entirely avoid clamping in both the warp filter and
- the convolve-round experiment. As of the time of writing, the Wiener filter
- from loop-restoration can encode a central coefficient up to 216, which
- leads to a maximum value of about 282 * 2^k after applying the offset.
- So in that case we still need to clamp.
-*/
-void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width,
- int height, int stride, uint8_t *pred, int p_col,
- int p_row, int p_width, int p_height, int p_stride,
- int subsampling_x, int subsampling_y,
- ConvolveParams *conv_params, int16_t alpha, int16_t beta,
- int16_t gamma, int16_t delta) {
- int32_t tmp[15 * 8];
- const int bd = 8;
- const int reduce_bits_horiz = conv_params->round_0;
- const int reduce_bits_vert = conv_params->is_compound
- ? conv_params->round_1
- : 2 * FILTER_BITS - reduce_bits_horiz;
- const int max_bits_horiz = bd + FILTER_BITS + 1 - reduce_bits_horiz;
- const int offset_bits_horiz = bd + FILTER_BITS - 1;
- const int offset_bits_vert = bd + 2 * FILTER_BITS - reduce_bits_horiz;
- const int round_bits =
- 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
- const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
- (void)max_bits_horiz;
- assert(IMPLIES(conv_params->is_compound, conv_params->dst != NULL));
- assert(IMPLIES(conv_params->do_average, conv_params->is_compound));
-
- for (int i = p_row; i < p_row + p_height; i += 8) {
- for (int j = p_col; j < p_col + p_width; j += 8) {
- // Calculate the center of this 8x8 block,
- // project to luma coordinates (if in a subsampled chroma plane),
- // apply the affine transformation,
- // then convert back to the original coordinates (if necessary)
- const int32_t src_x = (j + 4) << subsampling_x;
- const int32_t src_y = (i + 4) << subsampling_y;
- const int32_t dst_x = mat[2] * src_x + mat[3] * src_y + mat[0];
- const int32_t dst_y = mat[4] * src_x + mat[5] * src_y + mat[1];
- const int32_t x4 = dst_x >> subsampling_x;
- const int32_t y4 = dst_y >> subsampling_y;
-
- int32_t ix4 = x4 >> WARPEDMODEL_PREC_BITS;
- int32_t sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
- int32_t iy4 = y4 >> WARPEDMODEL_PREC_BITS;
- int32_t sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
-
- sx4 += alpha * (-4) + beta * (-4);
- sy4 += gamma * (-4) + delta * (-4);
-
- sx4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1);
- sy4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1);
-
- // Horizontal filter
- for (int k = -7; k < 8; ++k) {
- // Clamp to top/bottom edge of the frame
- const int iy = clamp(iy4 + k, 0, height - 1);
-
- int sx = sx4 + beta * (k + 4);
-
- for (int l = -4; l < 4; ++l) {
- int ix = ix4 + l - 3;
- // At this point, sx = sx4 + alpha * l + beta * k
- const int offs = ROUND_POWER_OF_TWO(sx, WARPEDDIFF_PREC_BITS) +
- WARPEDPIXEL_PREC_SHIFTS;
- assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3);
- const int16_t *coeffs = warped_filter[offs];
-
- int32_t sum = 1 << offset_bits_horiz;
- for (int m = 0; m < 8; ++m) {
- // Clamp to left/right edge of the frame
- const int sample_x = clamp(ix + m, 0, width - 1);
-
- sum += ref[iy * stride + sample_x] * coeffs[m];
- }
- sum = ROUND_POWER_OF_TWO(sum, reduce_bits_horiz);
- assert(0 <= sum && sum < (1 << max_bits_horiz));
- tmp[(k + 7) * 8 + (l + 4)] = sum;
- sx += alpha;
- }
- }
-
- // Vertical filter
- for (int k = -4; k < AOMMIN(4, p_row + p_height - i - 4); ++k) {
- int sy = sy4 + delta * (k + 4);
- for (int l = -4; l < AOMMIN(4, p_col + p_width - j - 4); ++l) {
- // At this point, sy = sy4 + gamma * l + delta * k
- const int offs = ROUND_POWER_OF_TWO(sy, WARPEDDIFF_PREC_BITS) +
- WARPEDPIXEL_PREC_SHIFTS;
- assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3);
- const int16_t *coeffs = warped_filter[offs];
-
- int32_t sum = 1 << offset_bits_vert;
- for (int m = 0; m < 8; ++m) {
- sum += tmp[(k + m + 4) * 8 + (l + 4)] * coeffs[m];
- }
-
- if (conv_params->is_compound) {
- CONV_BUF_TYPE *p =
- &conv_params
- ->dst[(i - p_row + k + 4) * conv_params->dst_stride +
- (j - p_col + l + 4)];
- sum = ROUND_POWER_OF_TWO(sum, reduce_bits_vert);
- if (conv_params->do_average) {
- uint8_t *dst8 =
- &pred[(i - p_row + k + 4) * p_stride + (j - p_col + l + 4)];
- int32_t tmp32 = *p;
- if (conv_params->use_jnt_comp_avg) {
- tmp32 = tmp32 * conv_params->fwd_offset +
- sum * conv_params->bck_offset;
- tmp32 = tmp32 >> DIST_PRECISION_BITS;
- } else {
- tmp32 += sum;
- tmp32 = tmp32 >> 1;
- }
- tmp32 = tmp32 - (1 << (offset_bits - conv_params->round_1)) -
- (1 << (offset_bits - conv_params->round_1 - 1));
- *dst8 = clip_pixel(ROUND_POWER_OF_TWO(tmp32, round_bits));
- } else {
- *p = sum;
- }
- } else {
- uint8_t *p =
- &pred[(i - p_row + k + 4) * p_stride + (j - p_col + l + 4)];
- sum = ROUND_POWER_OF_TWO(sum, reduce_bits_vert);
- assert(0 <= sum && sum < (1 << (bd + 2)));
- *p = clip_pixel(sum - (1 << (bd - 1)) - (1 << bd));
- }
- sy += gamma;
- }
- }
- }
- }
-}
-
-static void warp_plane(WarpedMotionParams *wm, const uint8_t *const ref,
- int width, int height, int stride, uint8_t *pred,
- int p_col, int p_row, int p_width, int p_height,
- int p_stride, int subsampling_x, int subsampling_y,
- ConvolveParams *conv_params) {
- assert(wm->wmtype <= AFFINE);
- if (wm->wmtype == ROTZOOM) {
- wm->wmmat[5] = wm->wmmat[2];
- wm->wmmat[4] = -wm->wmmat[3];
- }
- const int32_t *const mat = wm->wmmat;
- const int16_t alpha = wm->alpha;
- const int16_t beta = wm->beta;
- const int16_t gamma = wm->gamma;
- const int16_t delta = wm->delta;
- av1_warp_affine(mat, ref, width, height, stride, pred, p_col, p_row, p_width,
- p_height, p_stride, subsampling_x, subsampling_y, conv_params,
- alpha, beta, gamma, delta);
-}
-
-static int64_t frame_error(const uint8_t *const ref, int stride,
- const uint8_t *const dst, int p_width, int p_height,
- int p_stride) {
- int64_t sum_error = 0;
- for (int i = 0; i < p_height; ++i) {
- for (int j = 0; j < p_width; ++j) {
- sum_error +=
- (int64_t)error_measure(dst[j + i * p_stride] - ref[j + i * stride]);
- }
- }
- return sum_error;
-}
-
-static int64_t warp_error(WarpedMotionParams *wm, const uint8_t *const ref,
- int width, int height, int stride,
- const uint8_t *const dst, int p_col, int p_row,
- int p_width, int p_height, int p_stride,
- int subsampling_x, int subsampling_y,
- int64_t best_error) {
- int64_t gm_sumerr = 0;
- int warp_w, warp_h;
- int error_bsize_w = AOMMIN(p_width, WARP_ERROR_BLOCK);
- int error_bsize_h = AOMMIN(p_height, WARP_ERROR_BLOCK);
- uint8_t tmp[WARP_ERROR_BLOCK * WARP_ERROR_BLOCK];
- ConvolveParams conv_params = get_conv_params(0, 0, 8);
- conv_params.use_jnt_comp_avg = 0;
-
- for (int i = p_row; i < p_row + p_height; i += WARP_ERROR_BLOCK) {
- for (int j = p_col; j < p_col + p_width; j += WARP_ERROR_BLOCK) {
- // avoid warping extra 8x8 blocks in the padded region of the frame
- // when p_width and p_height are not multiples of WARP_ERROR_BLOCK
- warp_w = AOMMIN(error_bsize_w, p_col + p_width - j);
- warp_h = AOMMIN(error_bsize_h, p_row + p_height - i);
- warp_plane(wm, ref, width, height, stride, tmp, j, i, warp_w, warp_h,
- WARP_ERROR_BLOCK, subsampling_x, subsampling_y, &conv_params);
-
- gm_sumerr += frame_error(tmp, WARP_ERROR_BLOCK, dst + j + i * p_stride,
- warp_w, warp_h, p_stride);
- if (gm_sumerr > best_error) return gm_sumerr;
- }
- }
- return gm_sumerr;
-}
-
-int64_t av1_frame_error(int use_hbd, int bd, const uint8_t *ref, int stride,
- uint8_t *dst, int p_width, int p_height, int p_stride) {
- if (use_hbd) {
- return highbd_frame_error(CONVERT_TO_SHORTPTR(ref), stride,
- CONVERT_TO_SHORTPTR(dst), p_width, p_height,
- p_stride, bd);
- }
- return frame_error(ref, stride, dst, p_width, p_height, p_stride);
-}
-
-int64_t av1_warp_error(WarpedMotionParams *wm, int use_hbd, int bd,
- const uint8_t *ref, int width, int height, int stride,
- uint8_t *dst, int p_col, int p_row, int p_width,
- int p_height, int p_stride, int subsampling_x,
- int subsampling_y, int64_t best_error) {
- if (wm->wmtype <= AFFINE)
- if (!get_shear_params(wm)) return 1;
- if (use_hbd)
- return highbd_warp_error(wm, ref, width, height, stride, dst, p_col, p_row,
- p_width, p_height, p_stride, subsampling_x,
- subsampling_y, bd, best_error);
- return warp_error(wm, ref, width, height, stride, dst, p_col, p_row, p_width,
- p_height, p_stride, subsampling_x, subsampling_y,
- best_error);
-}
-
-void av1_warp_plane(WarpedMotionParams *wm, int use_hbd, int bd,
- const uint8_t *ref, int width, int height, int stride,
- uint8_t *pred, int p_col, int p_row, int p_width,
- int p_height, int p_stride, int subsampling_x,
- int subsampling_y, ConvolveParams *conv_params) {
- if (use_hbd)
- highbd_warp_plane(wm, ref, width, height, stride, pred, p_col, p_row,
- p_width, p_height, p_stride, subsampling_x, subsampling_y,
- bd, conv_params);
- else
- warp_plane(wm, ref, width, height, stride, pred, p_col, p_row, p_width,
- p_height, p_stride, subsampling_x, subsampling_y, conv_params);
-}
-
-#define LS_MV_MAX 256 // max mv in 1/8-pel
-// Use LS_STEP = 8 so that 2 less bits needed for A, Bx, By.
-#define LS_STEP 8
-
-// Assuming LS_MV_MAX is < MAX_SB_SIZE * 8,
-// the precision needed is:
-// (MAX_SB_SIZE_LOG2 + 3) [for sx * sx magnitude] +
-// (MAX_SB_SIZE_LOG2 + 4) [for sx * dx magnitude] +
-// 1 [for sign] +
-// LEAST_SQUARES_SAMPLES_MAX_BITS
-// [for adding up to LEAST_SQUARES_SAMPLES_MAX samples]
-// The value is 23
-#define LS_MAT_RANGE_BITS \
- ((MAX_SB_SIZE_LOG2 + 4) * 2 + LEAST_SQUARES_SAMPLES_MAX_BITS)
-
-// Bit-depth reduction from the full-range
-#define LS_MAT_DOWN_BITS 2
-
-// bits range of A, Bx and By after downshifting
-#define LS_MAT_BITS (LS_MAT_RANGE_BITS - LS_MAT_DOWN_BITS)
-#define LS_MAT_MIN (-(1 << (LS_MAT_BITS - 1)))
-#define LS_MAT_MAX ((1 << (LS_MAT_BITS - 1)) - 1)
-
-// By setting LS_STEP = 8, the least 2 bits of every elements in A, Bx, By are
-// 0. So, we can reduce LS_MAT_RANGE_BITS(2) bits here.
-#define LS_SQUARE(a) \
- (((a) * (a)*4 + (a)*4 * LS_STEP + LS_STEP * LS_STEP * 2) >> \
- (2 + LS_MAT_DOWN_BITS))
-#define LS_PRODUCT1(a, b) \
- (((a) * (b)*4 + ((a) + (b)) * 2 * LS_STEP + LS_STEP * LS_STEP) >> \
- (2 + LS_MAT_DOWN_BITS))
-#define LS_PRODUCT2(a, b) \
- (((a) * (b)*4 + ((a) + (b)) * 2 * LS_STEP + LS_STEP * LS_STEP * 2) >> \
- (2 + LS_MAT_DOWN_BITS))
-
-#define USE_LIMITED_PREC_MULT 0
-
-#if USE_LIMITED_PREC_MULT
-
-#define MUL_PREC_BITS 16
-static uint16_t resolve_multiplier_64(uint64_t D, int16_t *shift) {
- int msb = 0;
- uint16_t mult = 0;
- *shift = 0;
- if (D != 0) {
- msb = (int16_t)((D >> 32) ? get_msb((unsigned int)(D >> 32)) + 32
- : get_msb((unsigned int)D));
- if (msb >= MUL_PREC_BITS) {
- mult = (uint16_t)ROUND_POWER_OF_TWO_64(D, msb + 1 - MUL_PREC_BITS);
- *shift = msb + 1 - MUL_PREC_BITS;
- } else {
- mult = (uint16_t)D;
- *shift = 0;
- }
- }
- return mult;
-}
-
-static int32_t get_mult_shift_ndiag(int64_t Px, int16_t iDet, int shift) {
- int32_t ret;
- int16_t mshift;
- uint16_t Mul = resolve_multiplier_64(llabs(Px), &mshift);
- int32_t v = (int32_t)Mul * (int32_t)iDet * (Px < 0 ? -1 : 1);
- shift -= mshift;
- if (shift > 0) {
- return (int32_t)clamp(ROUND_POWER_OF_TWO_SIGNED(v, shift),
- -WARPEDMODEL_NONDIAGAFFINE_CLAMP + 1,
- WARPEDMODEL_NONDIAGAFFINE_CLAMP - 1);
- } else {
- return (int32_t)clamp(v * (1 << (-shift)),
- -WARPEDMODEL_NONDIAGAFFINE_CLAMP + 1,
- WARPEDMODEL_NONDIAGAFFINE_CLAMP - 1);
- }
- return ret;
-}
-
-static int32_t get_mult_shift_diag(int64_t Px, int16_t iDet, int shift) {
- int16_t mshift;
- uint16_t Mul = resolve_multiplier_64(llabs(Px), &mshift);
- int32_t v = (int32_t)Mul * (int32_t)iDet * (Px < 0 ? -1 : 1);
- shift -= mshift;
- if (shift > 0) {
- return (int32_t)clamp(
- ROUND_POWER_OF_TWO_SIGNED(v, shift),
- (1 << WARPEDMODEL_PREC_BITS) - WARPEDMODEL_NONDIAGAFFINE_CLAMP + 1,
- (1 << WARPEDMODEL_PREC_BITS) + WARPEDMODEL_NONDIAGAFFINE_CLAMP - 1);
- } else {
- return (int32_t)clamp(
- v * (1 << (-shift)),
- (1 << WARPEDMODEL_PREC_BITS) - WARPEDMODEL_NONDIAGAFFINE_CLAMP + 1,
- (1 << WARPEDMODEL_PREC_BITS) + WARPEDMODEL_NONDIAGAFFINE_CLAMP - 1);
- }
-}
-
-#else
-
-static int32_t get_mult_shift_ndiag(int64_t Px, int16_t iDet, int shift) {
- int64_t v = Px * (int64_t)iDet;
- return (int32_t)clamp64(ROUND_POWER_OF_TWO_SIGNED_64(v, shift),
- -WARPEDMODEL_NONDIAGAFFINE_CLAMP + 1,
- WARPEDMODEL_NONDIAGAFFINE_CLAMP - 1);
-}
-
-static int32_t get_mult_shift_diag(int64_t Px, int16_t iDet, int shift) {
- int64_t v = Px * (int64_t)iDet;
- return (int32_t)clamp64(
- ROUND_POWER_OF_TWO_SIGNED_64(v, shift),
- (1 << WARPEDMODEL_PREC_BITS) - WARPEDMODEL_NONDIAGAFFINE_CLAMP + 1,
- (1 << WARPEDMODEL_PREC_BITS) + WARPEDMODEL_NONDIAGAFFINE_CLAMP - 1);
-}
-#endif // USE_LIMITED_PREC_MULT
-
-static int find_affine_int(int np, const int *pts1, const int *pts2,
- BLOCK_SIZE bsize, int mvy, int mvx,
- WarpedMotionParams *wm, int mi_row, int mi_col) {
- int32_t A[2][2] = { { 0, 0 }, { 0, 0 } };
- int32_t Bx[2] = { 0, 0 };
- int32_t By[2] = { 0, 0 };
- int i;
-
- const int bw = block_size_wide[bsize];
- const int bh = block_size_high[bsize];
- const int rsuy = (AOMMAX(bh, MI_SIZE) / 2 - 1);
- const int rsux = (AOMMAX(bw, MI_SIZE) / 2 - 1);
- const int suy = rsuy * 8;
- const int sux = rsux * 8;
- const int duy = suy + mvy;
- const int dux = sux + mvx;
- const int isuy = (mi_row * MI_SIZE + rsuy);
- const int isux = (mi_col * MI_SIZE + rsux);
-
- // Assume the center pixel of the block has exactly the same motion vector
- // as transmitted for the block. First shift the origin of the source
- // points to the block center, and the origin of the destination points to
- // the block center added to the motion vector transmitted.
- // Let (xi, yi) denote the source points and (xi', yi') denote destination
- // points after origin shfifting, for i = 0, 1, 2, .... n-1.
- // Then if P = [x0, y0,
- // x1, y1
- // x2, y1,
- // ....
- // ]
- // q = [x0', x1', x2', ... ]'
- // r = [y0', y1', y2', ... ]'
- // the least squares problems that need to be solved are:
- // [h1, h2]' = inv(P'P)P'q and
- // [h3, h4]' = inv(P'P)P'r
- // where the affine transformation is given by:
- // x' = h1.x + h2.y
- // y' = h3.x + h4.y
- //
- // The loop below computes: A = P'P, Bx = P'q, By = P'r
- // We need to just compute inv(A).Bx and inv(A).By for the solutions.
- // Contribution from neighbor block
- for (i = 0; i < np; i++) {
- const int dx = pts2[i * 2] - dux;
- const int dy = pts2[i * 2 + 1] - duy;
- const int sx = pts1[i * 2] - sux;
- const int sy = pts1[i * 2 + 1] - suy;
- // (TODO)yunqing: This comparison wouldn't be necessary if the sample
- // selection is done in find_samples(). Also, global offset can be removed
- // while collecting samples.
- if (abs(sx - dx) < LS_MV_MAX && abs(sy - dy) < LS_MV_MAX) {
- A[0][0] += LS_SQUARE(sx);
- A[0][1] += LS_PRODUCT1(sx, sy);
- A[1][1] += LS_SQUARE(sy);
- Bx[0] += LS_PRODUCT2(sx, dx);
- Bx[1] += LS_PRODUCT1(sy, dx);
- By[0] += LS_PRODUCT1(sx, dy);
- By[1] += LS_PRODUCT2(sy, dy);
- }
- }
-
- // Just for debugging, and can be removed later.
- assert(A[0][0] >= LS_MAT_MIN && A[0][0] <= LS_MAT_MAX);
- assert(A[0][1] >= LS_MAT_MIN && A[0][1] <= LS_MAT_MAX);
- assert(A[1][1] >= LS_MAT_MIN && A[1][1] <= LS_MAT_MAX);
- assert(Bx[0] >= LS_MAT_MIN && Bx[0] <= LS_MAT_MAX);
- assert(Bx[1] >= LS_MAT_MIN && Bx[1] <= LS_MAT_MAX);
- assert(By[0] >= LS_MAT_MIN && By[0] <= LS_MAT_MAX);
- assert(By[1] >= LS_MAT_MIN && By[1] <= LS_MAT_MAX);
-
- int64_t Det;
- int16_t iDet, shift;
-
- // Compute Determinant of A
- Det = (int64_t)A[0][0] * A[1][1] - (int64_t)A[0][1] * A[0][1];
- if (Det == 0) return 1;
- iDet = resolve_divisor_64(llabs(Det), &shift) * (Det < 0 ? -1 : 1);
- shift -= WARPEDMODEL_PREC_BITS;
- if (shift < 0) {
- iDet <<= (-shift);
- shift = 0;
- }
-
- int64_t Px[2], Py[2];
-
- // These divided by the Det, are the least squares solutions
- Px[0] = (int64_t)A[1][1] * Bx[0] - (int64_t)A[0][1] * Bx[1];
- Px[1] = -(int64_t)A[0][1] * Bx[0] + (int64_t)A[0][0] * Bx[1];
- Py[0] = (int64_t)A[1][1] * By[0] - (int64_t)A[0][1] * By[1];
- Py[1] = -(int64_t)A[0][1] * By[0] + (int64_t)A[0][0] * By[1];
-
- wm->wmmat[2] = get_mult_shift_diag(Px[0], iDet, shift);
- wm->wmmat[3] = get_mult_shift_ndiag(Px[1], iDet, shift);
- wm->wmmat[4] = get_mult_shift_ndiag(Py[0], iDet, shift);
- wm->wmmat[5] = get_mult_shift_diag(Py[1], iDet, shift);
-
- // Note: In the vx, vy expressions below, the max value of each of the
- // 2nd and 3rd terms are (2^16 - 1) * (2^13 - 1). That leaves enough room
- // for the first term so that the overall sum in the worst case fits
- // within 32 bits overall.
- int32_t vx = mvx * (1 << (WARPEDMODEL_PREC_BITS - 3)) -
- (isux * (wm->wmmat[2] - (1 << WARPEDMODEL_PREC_BITS)) +
- isuy * wm->wmmat[3]);
- int32_t vy = mvy * (1 << (WARPEDMODEL_PREC_BITS - 3)) -
- (isux * wm->wmmat[4] +
- isuy * (wm->wmmat[5] - (1 << WARPEDMODEL_PREC_BITS)));
- wm->wmmat[0] =
- clamp(vx, -WARPEDMODEL_TRANS_CLAMP, WARPEDMODEL_TRANS_CLAMP - 1);
- wm->wmmat[1] =
- clamp(vy, -WARPEDMODEL_TRANS_CLAMP, WARPEDMODEL_TRANS_CLAMP - 1);
-
- wm->wmmat[6] = wm->wmmat[7] = 0;
- return 0;
-}
-
-int find_projection(int np, int *pts1, int *pts2, BLOCK_SIZE bsize, int mvy,
- int mvx, WarpedMotionParams *wm_params, int mi_row,
- int mi_col) {
- assert(wm_params->wmtype == AFFINE);
-
- if (find_affine_int(np, pts1, pts2, bsize, mvy, mvx, wm_params, mi_row,
- mi_col))
- return 1;
-
- // check compatibility with the fast warp filter
- if (!get_shear_params(wm_params)) return 1;
-
- return 0;
-}
diff --git a/third_party/aom/av1/common/warped_motion.h b/third_party/aom/av1/common/warped_motion.h
deleted file mode 100644
index a1a4f067d..000000000
--- a/third_party/aom/av1/common/warped_motion.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_COMMON_WARPED_MOTION_H_
-#define AOM_AV1_COMMON_WARPED_MOTION_H_
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <memory.h>
-#include <math.h>
-#include <assert.h>
-
-#include "config/aom_config.h"
-
-#include "aom_ports/mem.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "av1/common/mv.h"
-#include "av1/common/convolve.h"
-
-#define MAX_PARAMDIM 9
-#define LEAST_SQUARES_SAMPLES_MAX_BITS 3
-#define LEAST_SQUARES_SAMPLES_MAX (1 << LEAST_SQUARES_SAMPLES_MAX_BITS)
-#define SAMPLES_ARRAY_SIZE (LEAST_SQUARES_SAMPLES_MAX * 2)
-#define WARPED_MOTION_DEBUG 0
-#define DEFAULT_WMTYPE AFFINE
-
-extern const int16_t warped_filter[WARPEDPIXEL_PREC_SHIFTS * 3 + 1][8];
-
-static const uint8_t warp_pad_left[14][16] = {
- { 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
- { 2, 2, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
- { 3, 3, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
- { 4, 4, 4, 4, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
- { 5, 5, 5, 5, 5, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
- { 6, 6, 6, 6, 6, 6, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
- { 7, 7, 7, 7, 7, 7, 7, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
- { 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 10, 11, 12, 13, 14, 15 },
- { 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10, 11, 12, 13, 14, 15 },
- { 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 12, 13, 14, 15 },
- { 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 13, 14, 15 },
- { 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 14, 15 },
- { 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 15 },
- { 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15 },
-};
-
-static const uint8_t warp_pad_right[14][16] = {
- { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 14 },
- { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 13, 13 },
- { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 12, 12, 12 },
- { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11, 11, 11, 11 },
- { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 10, 10, 10, 10 },
- { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 9, 9, 9, 9, 9 },
- { 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8 },
- { 0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7 },
- { 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 },
- { 0, 1, 2, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 },
- { 0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 },
- { 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 },
- { 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
- { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }
-};
-
-// Returns the error between the result of applying motion 'wm' to the frame
-// described by 'ref' and the frame described by 'dst'.
-int64_t av1_warp_error(WarpedMotionParams *wm, int use_hbd, int bd,
- const uint8_t *ref, int width, int height, int stride,
- uint8_t *dst, int p_col, int p_row, int p_width,
- int p_height, int p_stride, int subsampling_x,
- int subsampling_y, int64_t best_error);
-
-// Returns the error between the frame described by 'ref' and the frame
-// described by 'dst'.
-int64_t av1_frame_error(int use_hbd, int bd, const uint8_t *ref, int stride,
- uint8_t *dst, int p_width, int p_height, int p_stride);
-
-void av1_warp_plane(WarpedMotionParams *wm, int use_hbd, int bd,
- const uint8_t *ref, int width, int height, int stride,
- uint8_t *pred, int p_col, int p_row, int p_width,
- int p_height, int p_stride, int subsampling_x,
- int subsampling_y, ConvolveParams *conv_params);
-
-int find_projection(int np, int *pts1, int *pts2, BLOCK_SIZE bsize, int mvy,
- int mvx, WarpedMotionParams *wm_params, int mi_row,
- int mi_col);
-
-int get_shear_params(WarpedMotionParams *wm);
-#endif // AOM_AV1_COMMON_WARPED_MOTION_H_
diff --git a/third_party/aom/av1/common/x86/av1_convolve_horiz_rs_sse4.c b/third_party/aom/av1/common/x86/av1_convolve_horiz_rs_sse4.c
deleted file mode 100644
index 8aa14696f..000000000
--- a/third_party/aom/av1/common/x86/av1_convolve_horiz_rs_sse4.c
+++ /dev/null
@@ -1,228 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <smmintrin.h>
-
-#include "config/av1_rtcd.h"
-
-#include "av1/common/convolve.h"
-#include "av1/common/resize.h"
-#include "aom_dsp/x86/synonyms.h"
-
-// Note: If the crop width is not a multiple of 4, then, unlike the C version,
-// this function will overwrite some of the padding on the right hand side of
-// the frame. This padding appears to be trashed anyway, so this should not
-// affect the running of the decoder.
-void av1_convolve_horiz_rs_sse4_1(const uint8_t *src, int src_stride,
- uint8_t *dst, int dst_stride, int w, int h,
- const int16_t *x_filters, int x0_qn,
- int x_step_qn) {
- assert(UPSCALE_NORMATIVE_TAPS == 8);
-
- src -= UPSCALE_NORMATIVE_TAPS / 2 - 1;
-
- const __m128i round_add = _mm_set1_epi32((1 << FILTER_BITS) >> 1);
- const __m128i zero = _mm_setzero_si128();
-
- const uint8_t *src_y;
- uint8_t *dst_y;
- int x_qn = x0_qn;
- for (int x = 0; x < w; x += 4, x_qn += 4 * x_step_qn) {
- const int x_filter_idx0 =
- ((x_qn + 0 * x_step_qn) & RS_SCALE_SUBPEL_MASK) >> RS_SCALE_EXTRA_BITS;
- const int x_filter_idx1 =
- ((x_qn + 1 * x_step_qn) & RS_SCALE_SUBPEL_MASK) >> RS_SCALE_EXTRA_BITS;
- const int x_filter_idx2 =
- ((x_qn + 2 * x_step_qn) & RS_SCALE_SUBPEL_MASK) >> RS_SCALE_EXTRA_BITS;
- const int x_filter_idx3 =
- ((x_qn + 3 * x_step_qn) & RS_SCALE_SUBPEL_MASK) >> RS_SCALE_EXTRA_BITS;
-
- assert(x_filter_idx0 <= RS_SUBPEL_MASK);
- assert(x_filter_idx1 <= RS_SUBPEL_MASK);
- assert(x_filter_idx2 <= RS_SUBPEL_MASK);
- assert(x_filter_idx3 <= RS_SUBPEL_MASK);
-
- const int16_t *const x_filter0 =
- &x_filters[x_filter_idx0 * UPSCALE_NORMATIVE_TAPS];
- const int16_t *const x_filter1 =
- &x_filters[x_filter_idx1 * UPSCALE_NORMATIVE_TAPS];
- const int16_t *const x_filter2 =
- &x_filters[x_filter_idx2 * UPSCALE_NORMATIVE_TAPS];
- const int16_t *const x_filter3 =
- &x_filters[x_filter_idx3 * UPSCALE_NORMATIVE_TAPS];
-
- const __m128i fil0_16 = xx_loadu_128(x_filter0);
- const __m128i fil1_16 = xx_loadu_128(x_filter1);
- const __m128i fil2_16 = xx_loadu_128(x_filter2);
- const __m128i fil3_16 = xx_loadu_128(x_filter3);
-
- src_y = src;
- dst_y = dst;
- for (int y = 0; y < h; y++, src_y += src_stride, dst_y += dst_stride) {
- const uint8_t *const src_x0 =
- &src_y[(x_qn + 0 * x_step_qn) >> RS_SCALE_SUBPEL_BITS];
- const uint8_t *const src_x1 =
- &src_y[(x_qn + 1 * x_step_qn) >> RS_SCALE_SUBPEL_BITS];
- const uint8_t *const src_x2 =
- &src_y[(x_qn + 2 * x_step_qn) >> RS_SCALE_SUBPEL_BITS];
- const uint8_t *const src_x3 =
- &src_y[(x_qn + 3 * x_step_qn) >> RS_SCALE_SUBPEL_BITS];
-
- // Load up the source data. This is 8-bit input data, so each load
- // gets 8 pixels.
- const __m128i src0_8 = xx_loadl_64(src_x0);
- const __m128i src1_8 = xx_loadl_64(src_x1);
- const __m128i src2_8 = xx_loadl_64(src_x2);
- const __m128i src3_8 = xx_loadl_64(src_x3);
-
- // Now zero-extend up to 16-bit precision, i.e.
- // [ 00 00 00 00 hg fe dc ba ] -> [ 0h 0g 0f 0e 0d 0c 0b 0a ]
- const __m128i src0_16 = _mm_cvtepu8_epi16(src0_8);
- const __m128i src1_16 = _mm_cvtepu8_epi16(src1_8);
- const __m128i src2_16 = _mm_cvtepu8_epi16(src2_8);
- const __m128i src3_16 = _mm_cvtepu8_epi16(src3_8);
-
- // Multiply by filter coefficients (results in a 32-bit value),
- // and add adjacent pairs, i.e.
- // ([ s7 s6 s5 s4 s3 s2 s1 s0], [ f7 f6 f5 f4 f3 f2 f1 f0 ])
- // -> [ {s7*f7+s6*f6} {s5*f5+s4*f4} {s3*f3+s2*f2} {s1*f1+s0*f0} ]
- const __m128i conv0_32 = _mm_madd_epi16(src0_16, fil0_16);
- const __m128i conv1_32 = _mm_madd_epi16(src1_16, fil1_16);
- const __m128i conv2_32 = _mm_madd_epi16(src2_16, fil2_16);
- const __m128i conv3_32 = _mm_madd_epi16(src3_16, fil3_16);
-
- // Reduce horizontally and add, i.e.
- // ([ D C B A ], [ S R Q P ]) -> [ S+R Q+P D+C B+A ]
- const __m128i conv01_32 = _mm_hadd_epi32(conv0_32, conv1_32);
- const __m128i conv23_32 = _mm_hadd_epi32(conv2_32, conv3_32);
-
- const __m128i conv0123_32 = _mm_hadd_epi32(conv01_32, conv23_32);
-
- // Divide down by (1 << FILTER_BITS), rounding to nearest.
- const __m128i shifted_32 =
- _mm_srai_epi32(_mm_add_epi32(conv0123_32, round_add), FILTER_BITS);
-
- // Pack 32-bit values into 16-bit values, i.e.
- // ([ D C B A ], [ 0 0 0 0 ]) -> [ 0 0 0 0 D C B A ]
- const __m128i shifted_16 = _mm_packus_epi32(shifted_32, zero);
-
- // Pack 16-bit values into 8-bit values, i.e.
- // ([ 0 0 0 0 D C B A ], [ 0 0 0 0 0 0 0 0 ])
- // -> [ 0 0 0 0 0 0 DC BA ]
- const __m128i shifted_8 = _mm_packus_epi16(shifted_16, zero);
-
- // Write to the output
- xx_storel_32(&dst_y[x], shifted_8);
- }
- }
-}
-
-// Note: If the crop width is not a multiple of 4, then, unlike the C version,
-// this function will overwrite some of the padding on the right hand side of
-// the frame. This padding appears to be trashed anyway, so this should not
-// affect the running of the decoder.
-void av1_highbd_convolve_horiz_rs_sse4_1(const uint16_t *src, int src_stride,
- uint16_t *dst, int dst_stride, int w,
- int h, const int16_t *x_filters,
- int x0_qn, int x_step_qn, int bd) {
- assert(UPSCALE_NORMATIVE_TAPS == 8);
- assert(bd == 8 || bd == 10 || bd == 12);
-
- src -= UPSCALE_NORMATIVE_TAPS / 2 - 1;
-
- const __m128i round_add = _mm_set1_epi32((1 << FILTER_BITS) >> 1);
- const __m128i zero = _mm_setzero_si128();
- const __m128i clip_maximum = _mm_set1_epi16((1 << bd) - 1);
-
- const uint16_t *src_y;
- uint16_t *dst_y;
- int x_qn = x0_qn;
- for (int x = 0; x < w; x += 4, x_qn += 4 * x_step_qn) {
- const int x_filter_idx0 =
- ((x_qn + 0 * x_step_qn) & RS_SCALE_SUBPEL_MASK) >> RS_SCALE_EXTRA_BITS;
- const int x_filter_idx1 =
- ((x_qn + 1 * x_step_qn) & RS_SCALE_SUBPEL_MASK) >> RS_SCALE_EXTRA_BITS;
- const int x_filter_idx2 =
- ((x_qn + 2 * x_step_qn) & RS_SCALE_SUBPEL_MASK) >> RS_SCALE_EXTRA_BITS;
- const int x_filter_idx3 =
- ((x_qn + 3 * x_step_qn) & RS_SCALE_SUBPEL_MASK) >> RS_SCALE_EXTRA_BITS;
-
- assert(x_filter_idx0 <= RS_SUBPEL_MASK);
- assert(x_filter_idx1 <= RS_SUBPEL_MASK);
- assert(x_filter_idx2 <= RS_SUBPEL_MASK);
- assert(x_filter_idx3 <= RS_SUBPEL_MASK);
-
- const int16_t *const x_filter0 =
- &x_filters[x_filter_idx0 * UPSCALE_NORMATIVE_TAPS];
- const int16_t *const x_filter1 =
- &x_filters[x_filter_idx1 * UPSCALE_NORMATIVE_TAPS];
- const int16_t *const x_filter2 =
- &x_filters[x_filter_idx2 * UPSCALE_NORMATIVE_TAPS];
- const int16_t *const x_filter3 =
- &x_filters[x_filter_idx3 * UPSCALE_NORMATIVE_TAPS];
-
- const __m128i fil0_16 = xx_loadu_128(x_filter0);
- const __m128i fil1_16 = xx_loadu_128(x_filter1);
- const __m128i fil2_16 = xx_loadu_128(x_filter2);
- const __m128i fil3_16 = xx_loadu_128(x_filter3);
-
- src_y = src;
- dst_y = dst;
- for (int y = 0; y < h; y++, src_y += src_stride, dst_y += dst_stride) {
- const uint16_t *const src_x0 =
- &src_y[(x_qn + 0 * x_step_qn) >> RS_SCALE_SUBPEL_BITS];
- const uint16_t *const src_x1 =
- &src_y[(x_qn + 1 * x_step_qn) >> RS_SCALE_SUBPEL_BITS];
- const uint16_t *const src_x2 =
- &src_y[(x_qn + 2 * x_step_qn) >> RS_SCALE_SUBPEL_BITS];
- const uint16_t *const src_x3 =
- &src_y[(x_qn + 3 * x_step_qn) >> RS_SCALE_SUBPEL_BITS];
-
- // Load up the source data. This is 16-bit input data, so each load
- // gets 8 pixels.
- const __m128i src0_16 = xx_loadu_128(src_x0);
- const __m128i src1_16 = xx_loadu_128(src_x1);
- const __m128i src2_16 = xx_loadu_128(src_x2);
- const __m128i src3_16 = xx_loadu_128(src_x3);
-
- // Multiply by filter coefficients (results in a 32-bit value),
- // and add adjacent pairs, i.e.
- // ([ s7 s6 s5 s4 s3 s2 s1 s0], [ f7 f6 f5 f4 f3 f2 f1 f0 ])
- // -> [ {s7*f7+s6*f6} {s5*f5+s4*f4} {s3*f3+s2*f2} {s1*f1+s0*f0} ]
- const __m128i conv0_32 = _mm_madd_epi16(src0_16, fil0_16);
- const __m128i conv1_32 = _mm_madd_epi16(src1_16, fil1_16);
- const __m128i conv2_32 = _mm_madd_epi16(src2_16, fil2_16);
- const __m128i conv3_32 = _mm_madd_epi16(src3_16, fil3_16);
-
- // Reduce horizontally and add, i.e.
- // ([ D C B A ], [ S R Q P ]) -> [ S+R Q+P D+C B+A ]
- const __m128i conv01_32 = _mm_hadd_epi32(conv0_32, conv1_32);
- const __m128i conv23_32 = _mm_hadd_epi32(conv2_32, conv3_32);
-
- const __m128i conv0123_32 = _mm_hadd_epi32(conv01_32, conv23_32);
-
- // Divide down by (1 << FILTER_BITS), rounding to nearest.
- const __m128i shifted_32 =
- _mm_srai_epi32(_mm_add_epi32(conv0123_32, round_add), FILTER_BITS);
-
- // Pack 32-bit values into 16-bit values, i.e.
- // ([ D C B A ], [ 0 0 0 0 ]) -> [ 0 0 0 0 D C B A ]
- const __m128i shifted_16 = _mm_packus_epi32(shifted_32, zero);
-
- // Clip the values at (1 << bd) - 1
- const __m128i clipped_16 = _mm_min_epi16(shifted_16, clip_maximum);
-
- // Write to the output
- xx_storel_64(&dst_y[x], clipped_16);
- }
- }
-}
diff --git a/third_party/aom/av1/common/x86/av1_convolve_scale_sse4.c b/third_party/aom/av1/common/x86/av1_convolve_scale_sse4.c
deleted file mode 100644
index d9fb53785..000000000
--- a/third_party/aom/av1/common/x86/av1_convolve_scale_sse4.c
+++ /dev/null
@@ -1,499 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <smmintrin.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/aom_filter.h"
-#include "av1/common/convolve.h"
-
-// A specialised version of hfilter, the horizontal filter for
-// av1_convolve_2d_scale_sse4_1. This version only supports 8 tap filters.
-static void hfilter8(const uint8_t *src, int src_stride, int16_t *dst, int w,
- int h, int subpel_x_qn, int x_step_qn,
- const InterpFilterParams *filter_params, unsigned round) {
- const int bd = 8;
- const int ntaps = 8;
-
- src -= ntaps / 2 - 1;
-
- int32_t round_add32 = (1 << round) / 2 + (1 << (bd + FILTER_BITS - 1));
- const __m128i round_add = _mm_set1_epi32(round_add32);
- const __m128i round_shift = _mm_cvtsi32_si128(round);
-
- int x_qn = subpel_x_qn;
- for (int x = 0; x < w; ++x, x_qn += x_step_qn) {
- const uint8_t *const src_col = src + (x_qn >> SCALE_SUBPEL_BITS);
- const int filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
- assert(filter_idx < SUBPEL_SHIFTS);
- const int16_t *filter =
- av1_get_interp_filter_subpel_kernel(filter_params, filter_idx);
-
- // Load the filter coefficients
- const __m128i coefflo = _mm_loadu_si128((__m128i *)filter);
- const __m128i zero = _mm_castps_si128(_mm_setzero_ps());
-
- int y;
- for (y = 0; y <= h - 4; y += 4) {
- const uint8_t *const src0 = src_col + y * src_stride;
- const uint8_t *const src1 = src0 + 1 * src_stride;
- const uint8_t *const src2 = src0 + 2 * src_stride;
- const uint8_t *const src3 = src0 + 3 * src_stride;
-
- // Load up source data. This is 8-bit input data; each load is just
- // loading the lower half of the register and gets 8 pixels
- const __m128i data08 = _mm_loadl_epi64((__m128i *)src0);
- const __m128i data18 = _mm_loadl_epi64((__m128i *)src1);
- const __m128i data28 = _mm_loadl_epi64((__m128i *)src2);
- const __m128i data38 = _mm_loadl_epi64((__m128i *)src3);
-
- // Now zero-extend up to 16-bit precision by interleaving with
- // zeros. Drop the upper half of each register (which just had zeros)
- const __m128i data0lo = _mm_unpacklo_epi8(data08, zero);
- const __m128i data1lo = _mm_unpacklo_epi8(data18, zero);
- const __m128i data2lo = _mm_unpacklo_epi8(data28, zero);
- const __m128i data3lo = _mm_unpacklo_epi8(data38, zero);
-
- // Multiply by coefficients
- const __m128i conv0lo = _mm_madd_epi16(data0lo, coefflo);
- const __m128i conv1lo = _mm_madd_epi16(data1lo, coefflo);
- const __m128i conv2lo = _mm_madd_epi16(data2lo, coefflo);
- const __m128i conv3lo = _mm_madd_epi16(data3lo, coefflo);
-
- // Reduce horizontally and add
- const __m128i conv01lo = _mm_hadd_epi32(conv0lo, conv1lo);
- const __m128i conv23lo = _mm_hadd_epi32(conv2lo, conv3lo);
- const __m128i conv = _mm_hadd_epi32(conv01lo, conv23lo);
-
- // Divide down by (1 << round), rounding to nearest.
- __m128i shifted =
- _mm_sra_epi32(_mm_add_epi32(conv, round_add), round_shift);
-
- shifted = _mm_packus_epi32(shifted, shifted);
- // Write transposed to the output
- _mm_storel_epi64((__m128i *)(dst + y + x * h), shifted);
- }
- for (; y < h; ++y) {
- const uint8_t *const src_row = src_col + y * src_stride;
-
- int32_t sum = (1 << (bd + FILTER_BITS - 1));
- for (int k = 0; k < ntaps; ++k) {
- sum += filter[k] * src_row[k];
- }
-
- dst[y + x * h] = ROUND_POWER_OF_TWO(sum, round);
- }
- }
-}
-
-static __m128i convolve_16_8(const int16_t *src, __m128i coeff) {
- __m128i data = _mm_loadu_si128((__m128i *)src);
- return _mm_madd_epi16(data, coeff);
-}
-
-// A specialised version of vfilter, the vertical filter for
-// av1_convolve_2d_scale_sse4_1. This version only supports 8 tap filters.
-static void vfilter8(const int16_t *src, int src_stride, uint8_t *dst,
- int dst_stride, int w, int h, int subpel_y_qn,
- int y_step_qn, const InterpFilterParams *filter_params,
- const ConvolveParams *conv_params, int bd) {
- const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
- const int ntaps = 8;
-
- const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_1);
-
- const int32_t sub32 = ((1 << (offset_bits - conv_params->round_1)) +
- (1 << (offset_bits - conv_params->round_1 - 1)));
- const __m128i sub = _mm_set1_epi16(sub32);
-
- CONV_BUF_TYPE *dst16 = conv_params->dst;
- const int dst16_stride = conv_params->dst_stride;
- const int bits =
- FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1;
- const __m128i bits_shift = _mm_cvtsi32_si128(bits);
- const __m128i bits_const = _mm_set1_epi16(((1 << bits) >> 1));
- const __m128i round_shift_add =
- _mm_set1_epi32(((1 << conv_params->round_1) >> 1));
- const __m128i res_add_const = _mm_set1_epi32(1 << offset_bits);
-
- const int w0 = conv_params->fwd_offset;
- const int w1 = conv_params->bck_offset;
- const __m128i wt0 = _mm_set1_epi16(w0);
- const __m128i wt1 = _mm_set1_epi16(w1);
- const __m128i wt = _mm_unpacklo_epi16(wt0, wt1);
-
- int y_qn = subpel_y_qn;
- for (int y = 0; y < h; ++y, y_qn += y_step_qn) {
- const int16_t *src_y = src + (y_qn >> SCALE_SUBPEL_BITS);
- const int filter_idx = (y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
- assert(filter_idx < SUBPEL_SHIFTS);
- const int16_t *filter =
- av1_get_interp_filter_subpel_kernel(filter_params, filter_idx);
-
- const __m128i coeff0716 = _mm_loadu_si128((__m128i *)filter);
- int x;
- for (x = 0; x <= w - 4; x += 4) {
- const int16_t *const src0 = src_y + x * src_stride;
- const int16_t *const src1 = src0 + 1 * src_stride;
- const int16_t *const src2 = src0 + 2 * src_stride;
- const int16_t *const src3 = src0 + 3 * src_stride;
-
- // Load the source data for the three rows, adding the three registers of
- // convolved products to one as we go (conv0..conv3) to avoid the
- // register pressure getting too high.
- const __m128i conv0 = convolve_16_8(src0, coeff0716);
- const __m128i conv1 = convolve_16_8(src1, coeff0716);
- const __m128i conv2 = convolve_16_8(src2, coeff0716);
- const __m128i conv3 = convolve_16_8(src3, coeff0716);
-
- // Now reduce horizontally to get one lane for each result
- const __m128i conv01 = _mm_hadd_epi32(conv0, conv1);
- const __m128i conv23 = _mm_hadd_epi32(conv2, conv3);
- __m128i conv = _mm_hadd_epi32(conv01, conv23);
-
- conv = _mm_add_epi32(conv, res_add_const);
- // Divide down by (1 << round_1), rounding to nearest and subtract sub32.
- __m128i shifted =
- _mm_sra_epi32(_mm_add_epi32(conv, round_shift_add), round_shift);
-
- uint8_t *dst_x = dst + y * dst_stride + x;
- CONV_BUF_TYPE *dst_16_x = dst16 + y * dst16_stride + x;
- __m128i result;
- __m128i shifted_16 = _mm_packus_epi32(shifted, shifted);
-
- if (conv_params->is_compound) {
- if (conv_params->do_average) {
- const __m128i p_16 = _mm_loadl_epi64((__m128i *)dst_16_x);
- if (conv_params->use_jnt_comp_avg) {
- const __m128i p_16_lo = _mm_unpacklo_epi16(p_16, shifted_16);
- const __m128i wt_res_lo = _mm_madd_epi16(p_16_lo, wt);
- const __m128i shifted_32 =
- _mm_srai_epi32(wt_res_lo, DIST_PRECISION_BITS);
- shifted_16 = _mm_packus_epi32(shifted_32, shifted_32);
- } else {
- shifted_16 = _mm_srai_epi16(_mm_add_epi16(p_16, shifted_16), 1);
- }
- const __m128i subbed = _mm_sub_epi16(shifted_16, sub);
- result = _mm_sra_epi16(_mm_add_epi16(subbed, bits_const), bits_shift);
- const __m128i result_8 = _mm_packus_epi16(result, result);
- *(uint32_t *)dst_x = _mm_cvtsi128_si32(result_8);
- } else {
- _mm_storel_epi64((__m128i *)dst_16_x, shifted_16);
- }
- } else {
- const __m128i subbed = _mm_sub_epi16(shifted_16, sub);
- result = _mm_sra_epi16(_mm_add_epi16(subbed, bits_const), bits_shift);
- const __m128i result_8 = _mm_packus_epi16(result, result);
- *(uint32_t *)dst_x = _mm_cvtsi128_si32(result_8);
- }
- }
- for (; x < w; ++x) {
- const int16_t *src_x = src_y + x * src_stride;
- int32_t sum = 1 << offset_bits;
- for (int k = 0; k < ntaps; ++k) sum += filter[k] * src_x[k];
- CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1);
-
- if (conv_params->is_compound) {
- if (conv_params->do_average) {
- int32_t tmp = dst16[y * dst16_stride + x];
- if (conv_params->use_jnt_comp_avg) {
- tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset;
- tmp = tmp >> DIST_PRECISION_BITS;
- } else {
- tmp += res;
- tmp = tmp >> 1;
- }
- /* Subtract round offset and convolve round */
- tmp = tmp - sub32;
- dst[y * dst_stride + x] = clip_pixel(ROUND_POWER_OF_TWO(tmp, bits));
- } else {
- dst16[y * dst16_stride + x] = res;
- }
- } else {
- /* Subtract round offset and convolve round */
- int32_t tmp = res - ((1 << (offset_bits - conv_params->round_1)) +
- (1 << (offset_bits - conv_params->round_1 - 1)));
- dst[y * dst_stride + x] = clip_pixel(ROUND_POWER_OF_TWO(tmp, bits));
- }
- }
- }
-}
-void av1_convolve_2d_scale_sse4_1(const uint8_t *src, int src_stride,
- uint8_t *dst8, int dst8_stride, int w, int h,
- const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y,
- const int subpel_x_qn, const int x_step_qn,
- const int subpel_y_qn, const int y_step_qn,
- ConvolveParams *conv_params) {
- // TODO(yaowu): remove unnecessary initializations
- int16_t tmp[(2 * MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE] = { 0 };
- int im_h = (((h - 1) * y_step_qn + subpel_y_qn) >> SCALE_SUBPEL_BITS) +
- filter_params_y->taps;
-
- const int xtaps = filter_params_x->taps;
- const int ytaps = filter_params_y->taps;
- const int fo_vert = ytaps / 2 - 1;
- assert((xtaps == 8) && (ytaps == 8));
- (void)xtaps;
-
- // horizontal filter
- hfilter8(src - fo_vert * src_stride, src_stride, tmp, w, im_h, subpel_x_qn,
- x_step_qn, filter_params_x, conv_params->round_0);
-
- // vertical filter (input is transposed)
- vfilter8(tmp, im_h, dst8, dst8_stride, w, h, subpel_y_qn, y_step_qn,
- filter_params_y, conv_params, 8);
-}
-
-// A specialised version of hfilter, the horizontal filter for
-// av1_highbd_convolve_2d_scale_sse4_1. This version only supports 8 tap
-// filters.
-static void highbd_hfilter8(const uint16_t *src, int src_stride, int16_t *dst,
- int w, int h, int subpel_x_qn, int x_step_qn,
- const InterpFilterParams *filter_params,
- unsigned round, int bd) {
- const int ntaps = 8;
-
- src -= ntaps / 2 - 1;
-
- int32_t round_add32 = (1 << round) / 2 + (1 << (bd + FILTER_BITS - 1));
- const __m128i round_add = _mm_set1_epi32(round_add32);
- const __m128i round_shift = _mm_cvtsi32_si128(round);
-
- int x_qn = subpel_x_qn;
- for (int x = 0; x < w; ++x, x_qn += x_step_qn) {
- const uint16_t *const src_col = src + (x_qn >> SCALE_SUBPEL_BITS);
- const int filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
- assert(filter_idx < SUBPEL_SHIFTS);
- const int16_t *filter =
- av1_get_interp_filter_subpel_kernel(filter_params, filter_idx);
-
- // Load the filter coefficients
- const __m128i coefflo = _mm_loadu_si128((__m128i *)filter);
-
- int y;
- for (y = 0; y <= h - 4; y += 4) {
- const uint16_t *const src0 = src_col + y * src_stride;
- const uint16_t *const src1 = src0 + 1 * src_stride;
- const uint16_t *const src2 = src0 + 2 * src_stride;
- const uint16_t *const src3 = src0 + 3 * src_stride;
-
- // Load up source data. This is 16-bit input data, so each load gets the 8
- // pixels we need.
- const __m128i data0lo = _mm_loadu_si128((__m128i *)src0);
- const __m128i data1lo = _mm_loadu_si128((__m128i *)src1);
- const __m128i data2lo = _mm_loadu_si128((__m128i *)src2);
- const __m128i data3lo = _mm_loadu_si128((__m128i *)src3);
-
- // Multiply by coefficients
- const __m128i conv0lo = _mm_madd_epi16(data0lo, coefflo);
- const __m128i conv1lo = _mm_madd_epi16(data1lo, coefflo);
- const __m128i conv2lo = _mm_madd_epi16(data2lo, coefflo);
- const __m128i conv3lo = _mm_madd_epi16(data3lo, coefflo);
-
- // Reduce horizontally and add
- const __m128i conv01lo = _mm_hadd_epi32(conv0lo, conv1lo);
- const __m128i conv23lo = _mm_hadd_epi32(conv2lo, conv3lo);
- const __m128i conv = _mm_hadd_epi32(conv01lo, conv23lo);
-
- // Divide down by (1 << round), rounding to nearest.
- __m128i shifted =
- _mm_sra_epi32(_mm_add_epi32(conv, round_add), round_shift);
-
- shifted = _mm_packus_epi32(shifted, shifted);
- // Write transposed to the output
- _mm_storel_epi64((__m128i *)(dst + y + x * h), shifted);
- }
- for (; y < h; ++y) {
- const uint16_t *const src_row = src_col + y * src_stride;
-
- int32_t sum = (1 << (bd + FILTER_BITS - 1));
- for (int k = 0; k < ntaps; ++k) {
- sum += filter[k] * src_row[k];
- }
-
- dst[y + x * h] = ROUND_POWER_OF_TWO(sum, round);
- }
- }
-}
-// A specialised version of vfilter, the vertical filter for
-// av1_highbd_convolve_2d_scale_sse4_1. This version only supports 8 tap
-// filters.
-static void highbd_vfilter8(const int16_t *src, int src_stride, uint16_t *dst,
- int dst_stride, int w, int h, int subpel_y_qn,
- int y_step_qn,
- const InterpFilterParams *filter_params,
- const ConvolveParams *conv_params, int bd) {
- const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
- const int ntaps = 8;
-
- const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_1);
-
- const int32_t sub32 = ((1 << (offset_bits - conv_params->round_1)) +
- (1 << (offset_bits - conv_params->round_1 - 1)));
- const __m128i sub = _mm_set1_epi32(sub32);
-
- CONV_BUF_TYPE *dst16 = conv_params->dst;
- const int dst16_stride = conv_params->dst_stride;
- const __m128i clip_pixel_ =
- _mm_set1_epi16(bd == 10 ? 1023 : (bd == 12 ? 4095 : 255));
- const int bits =
- FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1;
- const __m128i bits_shift = _mm_cvtsi32_si128(bits);
- const __m128i bits_const = _mm_set1_epi32(((1 << bits) >> 1));
- const __m128i round_shift_add =
- _mm_set1_epi32(((1 << conv_params->round_1) >> 1));
- const __m128i res_add_const = _mm_set1_epi32(1 << offset_bits);
- const int round_bits =
- 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
- __m128i round_bits_shift = _mm_cvtsi32_si128(round_bits);
- __m128i round_bits_const = _mm_set1_epi32(((1 << round_bits) >> 1));
-
- const int w0 = conv_params->fwd_offset;
- const int w1 = conv_params->bck_offset;
- const __m128i wt0 = _mm_set1_epi32(w0);
- const __m128i wt1 = _mm_set1_epi32(w1);
-
- int y_qn = subpel_y_qn;
- for (int y = 0; y < h; ++y, y_qn += y_step_qn) {
- const int16_t *src_y = src + (y_qn >> SCALE_SUBPEL_BITS);
- const int filter_idx = (y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
- assert(filter_idx < SUBPEL_SHIFTS);
- const int16_t *filter =
- av1_get_interp_filter_subpel_kernel(filter_params, filter_idx);
-
- const __m128i coeff0716 = _mm_loadu_si128((__m128i *)filter);
- int x;
- for (x = 0; x <= w - 4; x += 4) {
- const int16_t *const src0 = src_y + x * src_stride;
- const int16_t *const src1 = src0 + 1 * src_stride;
- const int16_t *const src2 = src0 + 2 * src_stride;
- const int16_t *const src3 = src0 + 3 * src_stride;
-
- // Load the source data for the three rows, adding the three registers of
- // convolved products to one as we go (conv0..conv3) to avoid the
- // register pressure getting too high.
- const __m128i conv0 = convolve_16_8(src0, coeff0716);
- const __m128i conv1 = convolve_16_8(src1, coeff0716);
- const __m128i conv2 = convolve_16_8(src2, coeff0716);
- const __m128i conv3 = convolve_16_8(src3, coeff0716);
-
- // Now reduce horizontally to get one lane for each result
- const __m128i conv01 = _mm_hadd_epi32(conv0, conv1);
- const __m128i conv23 = _mm_hadd_epi32(conv2, conv3);
- __m128i conv = _mm_hadd_epi32(conv01, conv23);
- conv = _mm_add_epi32(conv, res_add_const);
-
- // Divide down by (1 << round_1), rounding to nearest and subtract sub32.
- __m128i shifted =
- _mm_sra_epi32(_mm_add_epi32(conv, round_shift_add), round_shift);
-
- uint16_t *dst_x = dst + y * dst_stride + x;
- CONV_BUF_TYPE *dst_16_x = dst16 + y * dst16_stride + x;
-
- __m128i result;
- if (conv_params->is_compound) {
- if (conv_params->do_average) {
- __m128i p_32 =
- _mm_cvtepu16_epi32(_mm_loadl_epi64((__m128i *)dst_16_x));
-
- if (conv_params->use_jnt_comp_avg) {
- shifted = _mm_add_epi32(_mm_mullo_epi32(p_32, wt0),
- _mm_mullo_epi32(shifted, wt1));
- shifted = _mm_srai_epi32(shifted, DIST_PRECISION_BITS);
- } else {
- shifted = _mm_srai_epi32(_mm_add_epi32(p_32, shifted), 1);
- }
- __m128i res32 = _mm_sub_epi32(shifted, sub);
- res32 = _mm_sra_epi32(_mm_add_epi32(res32, round_bits_const),
- round_bits_shift);
-
- __m128i res16 = _mm_packus_epi32(res32, res32);
- res16 = _mm_min_epi16(res16, clip_pixel_);
- _mm_storel_epi64((__m128i *)dst_x, res16);
- } else {
- __m128i shifted_16 = _mm_packus_epi32(shifted, shifted);
- _mm_storel_epi64((__m128i *)dst_16_x, shifted_16);
- }
- } else {
- const __m128i subbed = _mm_sub_epi32(shifted, sub);
- result = _mm_sra_epi16(_mm_add_epi32(subbed, bits_const), bits_shift);
- result = _mm_packus_epi32(result, result);
- result = _mm_min_epi16(result, clip_pixel_);
- _mm_storel_epi64((__m128i *)dst_x, result);
- }
- }
-
- for (; x < w; ++x) {
- const int16_t *src_x = src_y + x * src_stride;
- int32_t sum = 1 << offset_bits;
- for (int k = 0; k < ntaps; ++k) sum += filter[k] * src_x[k];
- CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1);
- if (conv_params->is_compound) {
- if (conv_params->do_average) {
- int32_t tmp = dst16[y * dst16_stride + x];
- if (conv_params->use_jnt_comp_avg) {
- tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset;
- tmp = tmp >> DIST_PRECISION_BITS;
- } else {
- tmp += res;
- tmp = tmp >> 1;
- }
- /* Subtract round offset and convolve round */
- tmp = tmp - ((1 << (offset_bits - conv_params->round_1)) +
- (1 << (offset_bits - conv_params->round_1 - 1)));
- dst[y * dst_stride + x] =
- clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp, bits), bd);
- } else {
- dst16[y * dst16_stride + x] = res;
- }
- } else {
- /* Subtract round offset and convolve round */
- int32_t tmp = res - ((1 << (offset_bits - conv_params->round_1)) +
- (1 << (offset_bits - conv_params->round_1 - 1)));
- dst[y * dst_stride + x] =
- clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp, bits), bd);
- }
- }
- }
-}
-
-void av1_highbd_convolve_2d_scale_sse4_1(
- const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w,
- int h, const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y, const int subpel_x_qn,
- const int x_step_qn, const int subpel_y_qn, const int y_step_qn,
- ConvolveParams *conv_params, int bd) {
- // TODO(yaowu): Move this out of stack
- DECLARE_ALIGNED(16, int16_t,
- tmp[(2 * MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE]);
- int im_h = (((h - 1) * y_step_qn + subpel_y_qn) >> SCALE_SUBPEL_BITS) +
- filter_params_y->taps;
- const int xtaps = filter_params_x->taps;
- const int ytaps = filter_params_y->taps;
- const int fo_vert = ytaps / 2 - 1;
-
- memset(tmp, 0, sizeof(tmp));
- assert((xtaps == 8) && (ytaps == 8));
- (void)xtaps;
-
- // horizontal filter
- highbd_hfilter8(src - fo_vert * src_stride, src_stride, tmp, w, im_h,
- subpel_x_qn, x_step_qn, filter_params_x, conv_params->round_0,
- bd);
-
- // vertical filter (input is transposed)
- highbd_vfilter8(tmp, im_h, dst, dst_stride, w, h, subpel_y_qn, y_step_qn,
- filter_params_y, conv_params, bd);
-}
diff --git a/third_party/aom/av1/common/x86/av1_highbd_convolve_sse4.c b/third_party/aom/av1/common/x86/av1_highbd_convolve_sse4.c
deleted file mode 100644
index 212d3bd72..000000000
--- a/third_party/aom/av1/common/x86/av1_highbd_convolve_sse4.c
+++ /dev/null
@@ -1,205 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <smmintrin.h>
-
-#include "config/av1_rtcd.h"
-
-#include "av1/common/filter.h"
-
-typedef void (*TransposeSave)(int width, int pixelsNum, uint32_t *src,
- int src_stride, uint16_t *dst, int dst_stride,
- int bd);
-
-// pixelsNum 0: write all 4 pixels
-// 1/2/3: residual pixels 1/2/3
-static void writePixel(__m128i *u, int width, int pixelsNum, uint16_t *dst,
- int dst_stride) {
- if (2 == width) {
- if (0 == pixelsNum) {
- *(int *)dst = _mm_cvtsi128_si32(u[0]);
- *(int *)(dst + dst_stride) = _mm_cvtsi128_si32(u[1]);
- *(int *)(dst + 2 * dst_stride) = _mm_cvtsi128_si32(u[2]);
- *(int *)(dst + 3 * dst_stride) = _mm_cvtsi128_si32(u[3]);
- } else if (1 == pixelsNum) {
- *(int *)dst = _mm_cvtsi128_si32(u[0]);
- } else if (2 == pixelsNum) {
- *(int *)dst = _mm_cvtsi128_si32(u[0]);
- *(int *)(dst + dst_stride) = _mm_cvtsi128_si32(u[1]);
- } else if (3 == pixelsNum) {
- *(int *)dst = _mm_cvtsi128_si32(u[0]);
- *(int *)(dst + dst_stride) = _mm_cvtsi128_si32(u[1]);
- *(int *)(dst + 2 * dst_stride) = _mm_cvtsi128_si32(u[2]);
- }
- } else {
- if (0 == pixelsNum) {
- _mm_storel_epi64((__m128i *)dst, u[0]);
- _mm_storel_epi64((__m128i *)(dst + dst_stride), u[1]);
- _mm_storel_epi64((__m128i *)(dst + 2 * dst_stride), u[2]);
- _mm_storel_epi64((__m128i *)(dst + 3 * dst_stride), u[3]);
- } else if (1 == pixelsNum) {
- _mm_storel_epi64((__m128i *)dst, u[0]);
- } else if (2 == pixelsNum) {
- _mm_storel_epi64((__m128i *)dst, u[0]);
- _mm_storel_epi64((__m128i *)(dst + dst_stride), u[1]);
- } else if (3 == pixelsNum) {
- _mm_storel_epi64((__m128i *)dst, u[0]);
- _mm_storel_epi64((__m128i *)(dst + dst_stride), u[1]);
- _mm_storel_epi64((__m128i *)(dst + 2 * dst_stride), u[2]);
- }
- }
-}
-
-// 16-bit pixels clip with bd (10/12)
-static void highbd_clip(__m128i *p, int numVecs, int bd) {
- const __m128i zero = _mm_setzero_si128();
- const __m128i one = _mm_set1_epi16(1);
- const __m128i max = _mm_sub_epi16(_mm_slli_epi16(one, bd), one);
- __m128i clamped, mask;
- int i;
-
- for (i = 0; i < numVecs; i++) {
- mask = _mm_cmpgt_epi16(p[i], max);
- clamped = _mm_andnot_si128(mask, p[i]);
- mask = _mm_and_si128(mask, max);
- clamped = _mm_or_si128(mask, clamped);
- mask = _mm_cmpgt_epi16(clamped, zero);
- p[i] = _mm_and_si128(clamped, mask);
- }
-}
-
-static void transClipPixel(uint32_t *src, int src_stride, __m128i *u, int bd) {
- __m128i v0, v1;
- __m128i rnd = _mm_set1_epi32(1 << (FILTER_BITS - 1));
-
- u[0] = _mm_loadu_si128((__m128i const *)src);
- u[1] = _mm_loadu_si128((__m128i const *)(src + src_stride));
- u[2] = _mm_loadu_si128((__m128i const *)(src + 2 * src_stride));
- u[3] = _mm_loadu_si128((__m128i const *)(src + 3 * src_stride));
-
- u[0] = _mm_add_epi32(u[0], rnd);
- u[1] = _mm_add_epi32(u[1], rnd);
- u[2] = _mm_add_epi32(u[2], rnd);
- u[3] = _mm_add_epi32(u[3], rnd);
-
- u[0] = _mm_srai_epi32(u[0], FILTER_BITS);
- u[1] = _mm_srai_epi32(u[1], FILTER_BITS);
- u[2] = _mm_srai_epi32(u[2], FILTER_BITS);
- u[3] = _mm_srai_epi32(u[3], FILTER_BITS);
-
- u[0] = _mm_packus_epi32(u[0], u[1]);
- u[1] = _mm_packus_epi32(u[2], u[3]);
-
- highbd_clip(u, 2, bd);
-
- v0 = _mm_unpacklo_epi16(u[0], u[1]);
- v1 = _mm_unpackhi_epi16(u[0], u[1]);
-
- u[0] = _mm_unpacklo_epi16(v0, v1);
- u[2] = _mm_unpackhi_epi16(v0, v1);
-
- u[1] = _mm_srli_si128(u[0], 8);
- u[3] = _mm_srli_si128(u[2], 8);
-}
-
-// pixelsNum = 0 : all 4 rows of pixels will be saved.
-// pixelsNum = 1/2/3 : residual 1/2/4 rows of pixels will be saved.
-void trans_save_4x4(int width, int pixelsNum, uint32_t *src, int src_stride,
- uint16_t *dst, int dst_stride, int bd) {
- __m128i u[4];
- transClipPixel(src, src_stride, u, bd);
- writePixel(u, width, pixelsNum, dst, dst_stride);
-}
-
-void trans_accum_save_4x4(int width, int pixelsNum, uint32_t *src,
- int src_stride, uint16_t *dst, int dst_stride,
- int bd) {
- __m128i u[4], v[4];
- const __m128i ones = _mm_set1_epi16(1);
-
- transClipPixel(src, src_stride, u, bd);
-
- v[0] = _mm_loadl_epi64((__m128i const *)dst);
- v[1] = _mm_loadl_epi64((__m128i const *)(dst + dst_stride));
- v[2] = _mm_loadl_epi64((__m128i const *)(dst + 2 * dst_stride));
- v[3] = _mm_loadl_epi64((__m128i const *)(dst + 3 * dst_stride));
-
- u[0] = _mm_add_epi16(u[0], v[0]);
- u[1] = _mm_add_epi16(u[1], v[1]);
- u[2] = _mm_add_epi16(u[2], v[2]);
- u[3] = _mm_add_epi16(u[3], v[3]);
-
- u[0] = _mm_add_epi16(u[0], ones);
- u[1] = _mm_add_epi16(u[1], ones);
- u[2] = _mm_add_epi16(u[2], ones);
- u[3] = _mm_add_epi16(u[3], ones);
-
- u[0] = _mm_srai_epi16(u[0], 1);
- u[1] = _mm_srai_epi16(u[1], 1);
- u[2] = _mm_srai_epi16(u[2], 1);
- u[3] = _mm_srai_epi16(u[3], 1);
-
- writePixel(u, width, pixelsNum, dst, dst_stride);
-}
-
-// Vertical convolutional filter
-
-typedef void (*WritePixels)(__m128i *u, int bd, uint16_t *dst);
-
-static void highbdRndingPacks(__m128i *u) {
- __m128i rnd = _mm_set1_epi32(1 << (FILTER_BITS - 1));
- u[0] = _mm_add_epi32(u[0], rnd);
- u[0] = _mm_srai_epi32(u[0], FILTER_BITS);
- u[0] = _mm_packus_epi32(u[0], u[0]);
-}
-
-static void write2pixelsOnly(__m128i *u, int bd, uint16_t *dst) {
- highbdRndingPacks(u);
- highbd_clip(u, 1, bd);
- *(uint32_t *)dst = _mm_cvtsi128_si32(u[0]);
-}
-
-static void write2pixelsAccum(__m128i *u, int bd, uint16_t *dst) {
- __m128i v = _mm_loadl_epi64((__m128i const *)dst);
- const __m128i ones = _mm_set1_epi16(1);
-
- highbdRndingPacks(u);
- highbd_clip(u, 1, bd);
-
- v = _mm_add_epi16(v, u[0]);
- v = _mm_add_epi16(v, ones);
- v = _mm_srai_epi16(v, 1);
- *(uint32_t *)dst = _mm_cvtsi128_si32(v);
-}
-
-WritePixels write2pixelsTab[2] = { write2pixelsOnly, write2pixelsAccum };
-
-static void write4pixelsOnly(__m128i *u, int bd, uint16_t *dst) {
- highbdRndingPacks(u);
- highbd_clip(u, 1, bd);
- _mm_storel_epi64((__m128i *)dst, u[0]);
-}
-
-static void write4pixelsAccum(__m128i *u, int bd, uint16_t *dst) {
- __m128i v = _mm_loadl_epi64((__m128i const *)dst);
- const __m128i ones = _mm_set1_epi16(1);
-
- highbdRndingPacks(u);
- highbd_clip(u, 1, bd);
-
- v = _mm_add_epi16(v, u[0]);
- v = _mm_add_epi16(v, ones);
- v = _mm_srai_epi16(v, 1);
- _mm_storel_epi64((__m128i *)dst, v);
-}
-
-WritePixels write4pixelsTab[2] = { write4pixelsOnly, write4pixelsAccum };
diff --git a/third_party/aom/av1/common/x86/av1_inv_txfm_avx2.c b/third_party/aom/av1/common/x86/av1_inv_txfm_avx2.c
deleted file mode 100644
index 5db2ccf6c..000000000
--- a/third_party/aom/av1/common/x86/av1_inv_txfm_avx2.c
+++ /dev/null
@@ -1,1945 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "config/aom_config.h"
-
-#include "config/av1_rtcd.h"
-
-#include "av1/common/av1_inv_txfm1d_cfg.h"
-#include "av1/common/x86/av1_txfm_sse2.h"
-#include "av1/common/x86/av1_inv_txfm_avx2.h"
-#include "av1/common/x86/av1_inv_txfm_ssse3.h"
-
-// TODO(venkatsanampudi@ittiam.com): move this to header file
-
-// Sqrt2, Sqrt2^2, Sqrt2^3, Sqrt2^4, Sqrt2^5
-static int32_t NewSqrt2list[TX_SIZES] = { 5793, 2 * 4096, 2 * 5793, 4 * 4096,
- 4 * 5793 };
-
-static INLINE void idct16_stage5_avx2(__m256i *x1, const int32_t *cospi,
- const __m256i _r, int8_t cos_bit) {
- const __m256i cospi_m32_p32 = pair_set_w16_epi16(-cospi[32], cospi[32]);
- const __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]);
- btf_16_adds_subs_avx2(&x1[0], &x1[3]);
- btf_16_adds_subs_avx2(&x1[1], &x1[2]);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[5], &x1[6], _r, cos_bit);
-
- btf_16_adds_subs_avx2(&x1[8], &x1[11]);
- btf_16_adds_subs_avx2(&x1[9], &x1[10]);
- btf_16_adds_subs_avx2(&x1[15], &x1[12]);
- btf_16_adds_subs_avx2(&x1[14], &x1[13]);
-}
-
-static INLINE void idct16_stage6_avx2(__m256i *x, const int32_t *cospi,
- const __m256i _r, int8_t cos_bit) {
- const __m256i cospi_m32_p32 = pair_set_w16_epi16(-cospi[32], cospi[32]);
- const __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]);
- btf_16_adds_subs_avx2(&x[0], &x[7]);
- btf_16_adds_subs_avx2(&x[1], &x[6]);
- btf_16_adds_subs_avx2(&x[2], &x[5]);
- btf_16_adds_subs_avx2(&x[3], &x[4]);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[10], &x[13], _r, cos_bit);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[11], &x[12], _r, cos_bit);
-}
-
-static INLINE void idct16_stage7_avx2(__m256i *output, __m256i *x1) {
- btf_16_adds_subs_out_avx2(&output[0], &output[15], x1[0], x1[15]);
- btf_16_adds_subs_out_avx2(&output[1], &output[14], x1[1], x1[14]);
- btf_16_adds_subs_out_avx2(&output[2], &output[13], x1[2], x1[13]);
- btf_16_adds_subs_out_avx2(&output[3], &output[12], x1[3], x1[12]);
- btf_16_adds_subs_out_avx2(&output[4], &output[11], x1[4], x1[11]);
- btf_16_adds_subs_out_avx2(&output[5], &output[10], x1[5], x1[10]);
- btf_16_adds_subs_out_avx2(&output[6], &output[9], x1[6], x1[9]);
- btf_16_adds_subs_out_avx2(&output[7], &output[8], x1[7], x1[8]);
-}
-
-static void idct16_new_avx2(const __m256i *input, __m256i *output,
- int8_t cos_bit) {
- (void)(cos_bit);
- const int32_t *cospi = cospi_arr(INV_COS_BIT);
- const __m256i _r = _mm256_set1_epi32(1 << (INV_COS_BIT - 1));
-
- __m256i cospi_p60_m04 = pair_set_w16_epi16(cospi[60], -cospi[4]);
- __m256i cospi_p04_p60 = pair_set_w16_epi16(cospi[4], cospi[60]);
- __m256i cospi_p28_m36 = pair_set_w16_epi16(cospi[28], -cospi[36]);
- __m256i cospi_p36_p28 = pair_set_w16_epi16(cospi[36], cospi[28]);
- __m256i cospi_p44_m20 = pair_set_w16_epi16(cospi[44], -cospi[20]);
- __m256i cospi_p20_p44 = pair_set_w16_epi16(cospi[20], cospi[44]);
- __m256i cospi_p12_m52 = pair_set_w16_epi16(cospi[12], -cospi[52]);
- __m256i cospi_p52_p12 = pair_set_w16_epi16(cospi[52], cospi[12]);
- __m256i cospi_p56_m08 = pair_set_w16_epi16(cospi[56], -cospi[8]);
- __m256i cospi_p08_p56 = pair_set_w16_epi16(cospi[8], cospi[56]);
- __m256i cospi_p24_m40 = pair_set_w16_epi16(cospi[24], -cospi[40]);
- __m256i cospi_p40_p24 = pair_set_w16_epi16(cospi[40], cospi[24]);
- __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]);
- __m256i cospi_p32_m32 = pair_set_w16_epi16(cospi[32], -cospi[32]);
- __m256i cospi_p48_m16 = pair_set_w16_epi16(cospi[48], -cospi[16]);
- __m256i cospi_p16_p48 = pair_set_w16_epi16(cospi[16], cospi[48]);
- __m256i cospi_m16_p48 = pair_set_w16_epi16(-cospi[16], cospi[48]);
- __m256i cospi_p48_p16 = pair_set_w16_epi16(cospi[48], cospi[16]);
- __m256i cospi_m48_m16 = pair_set_w16_epi16(-cospi[48], -cospi[16]);
-
- // stage 1
- __m256i x1[16];
- x1[0] = input[0];
- x1[1] = input[8];
- x1[2] = input[4];
- x1[3] = input[12];
- x1[4] = input[2];
- x1[5] = input[10];
- x1[6] = input[6];
- x1[7] = input[14];
- x1[8] = input[1];
- x1[9] = input[9];
- x1[10] = input[5];
- x1[11] = input[13];
- x1[12] = input[3];
- x1[13] = input[11];
- x1[14] = input[7];
- x1[15] = input[15];
-
- // stage 2
- btf_16_w16_avx2(cospi_p60_m04, cospi_p04_p60, &x1[8], &x1[15], _r, cos_bit);
- btf_16_w16_avx2(cospi_p28_m36, cospi_p36_p28, &x1[9], &x1[14], _r, cos_bit);
- btf_16_w16_avx2(cospi_p44_m20, cospi_p20_p44, &x1[10], &x1[13], _r, cos_bit);
- btf_16_w16_avx2(cospi_p12_m52, cospi_p52_p12, &x1[11], &x1[12], _r, cos_bit);
-
- // stage 3
- btf_16_w16_avx2(cospi_p56_m08, cospi_p08_p56, &x1[4], &x1[7], _r, cos_bit);
- btf_16_w16_avx2(cospi_p24_m40, cospi_p40_p24, &x1[5], &x1[6], _r, cos_bit);
- btf_16_adds_subs_avx2(&x1[8], &x1[9]);
- btf_16_adds_subs_avx2(&x1[11], &x1[10]);
- btf_16_adds_subs_avx2(&x1[12], &x1[13]);
- btf_16_adds_subs_avx2(&x1[15], &x1[14]);
-
- // stage 4
- btf_16_w16_avx2(cospi_p32_p32, cospi_p32_m32, &x1[0], &x1[1], _r, cos_bit);
- btf_16_w16_avx2(cospi_p48_m16, cospi_p16_p48, &x1[2], &x1[3], _r, cos_bit);
- btf_16_adds_subs_avx2(&x1[4], &x1[5]);
- btf_16_adds_subs_avx2(&x1[7], &x1[6]);
- btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x1[9], &x1[14], _r, cos_bit);
- btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x1[10], &x1[13], _r, cos_bit);
-
- idct16_stage5_avx2(x1, cospi, _r, cos_bit);
- idct16_stage6_avx2(x1, cospi, _r, cos_bit);
- idct16_stage7_avx2(output, x1);
-}
-
-static void idct16_low8_new_avx2(const __m256i *input, __m256i *output,
- int8_t cos_bit) {
- (void)(cos_bit);
- const int32_t *cospi = cospi_arr(INV_COS_BIT);
- const __m256i _r = _mm256_set1_epi32(1 << (INV_COS_BIT - 1));
-
- const __m256i cospi_m16_p48 = pair_set_w16_epi16(-cospi[16], cospi[48]);
- const __m256i cospi_p48_p16 = pair_set_w16_epi16(cospi[48], cospi[16]);
- const __m256i cospi_m48_m16 = pair_set_w16_epi16(-cospi[48], -cospi[16]);
-
- // stage 1
- __m256i x1[16];
- x1[0] = input[0];
- x1[2] = input[4];
- x1[4] = input[2];
- x1[6] = input[6];
- x1[8] = input[1];
- x1[10] = input[5];
- x1[12] = input[3];
- x1[14] = input[7];
-
- // stage 2
- btf_16_w16_0_avx2(cospi[60], cospi[4], x1[8], x1[8], x1[15]);
- btf_16_w16_0_avx2(-cospi[36], cospi[28], x1[14], x1[9], x1[14]);
- btf_16_w16_0_avx2(cospi[44], cospi[20], x1[10], x1[10], x1[13]);
- btf_16_w16_0_avx2(-cospi[52], cospi[12], x1[12], x1[11], x1[12]);
-
- // stage 3
- btf_16_w16_0_avx2(cospi[56], cospi[8], x1[4], x1[4], x1[7]);
- btf_16_w16_0_avx2(-cospi[40], cospi[24], x1[6], x1[5], x1[6]);
- btf_16_adds_subs_avx2(&x1[8], &x1[9]);
- btf_16_adds_subs_avx2(&x1[11], &x1[10]);
- btf_16_adds_subs_avx2(&x1[12], &x1[13]);
- btf_16_adds_subs_avx2(&x1[15], &x1[14]);
-
- // stage 4
- btf_16_w16_0_avx2(cospi[32], cospi[32], x1[0], x1[0], x1[1]);
- btf_16_w16_0_avx2(cospi[48], cospi[16], x1[2], x1[2], x1[3]);
- btf_16_adds_subs_avx2(&x1[4], &x1[5]);
- btf_16_adds_subs_avx2(&x1[7], &x1[6]);
- btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x1[9], &x1[14], _r, cos_bit);
- btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x1[10], &x1[13], _r, cos_bit);
-
- idct16_stage5_avx2(x1, cospi, _r, cos_bit);
- idct16_stage6_avx2(x1, cospi, _r, cos_bit);
- idct16_stage7_avx2(output, x1);
-}
-
-static void idct16_low1_new_avx2(const __m256i *input, __m256i *output,
- int8_t cos_bit) {
- (void)(cos_bit);
- const int32_t *cospi = cospi_arr(INV_COS_BIT);
-
- // stage 1
- __m256i x1[2];
- x1[0] = input[0];
-
- // stage 2
- // stage 3
- // stage 4
- btf_16_w16_0_avx2(cospi[32], cospi[32], x1[0], x1[0], x1[1]);
-
- // stage 5
- // stage 6
- output[0] = x1[0];
- output[1] = x1[1];
- output[2] = x1[1];
- output[3] = x1[0];
- output[4] = x1[0];
- output[5] = x1[1];
- output[6] = x1[1];
- output[7] = x1[0];
- output[8] = x1[0];
- output[9] = x1[1];
- output[10] = x1[1];
- output[11] = x1[0];
- output[12] = x1[0];
- output[13] = x1[1];
- output[14] = x1[1];
- output[15] = x1[0];
-}
-
-static INLINE void iadst16_stage3_avx2(__m256i *x) {
- btf_16_adds_subs_avx2(&x[0], &x[8]);
- btf_16_adds_subs_avx2(&x[1], &x[9]);
- btf_16_adds_subs_avx2(&x[2], &x[10]);
- btf_16_adds_subs_avx2(&x[3], &x[11]);
- btf_16_adds_subs_avx2(&x[4], &x[12]);
- btf_16_adds_subs_avx2(&x[5], &x[13]);
- btf_16_adds_subs_avx2(&x[6], &x[14]);
- btf_16_adds_subs_avx2(&x[7], &x[15]);
-}
-
-static INLINE void iadst16_stage4_avx2(__m256i *x, const int32_t *cospi,
- const __m256i _r, int8_t cos_bit) {
- const __m256i cospi_p08_p56 = pair_set_w16_epi16(cospi[8], cospi[56]);
- const __m256i cospi_p56_m08 = pair_set_w16_epi16(cospi[56], -cospi[8]);
- const __m256i cospi_p40_p24 = pair_set_w16_epi16(cospi[40], cospi[24]);
- const __m256i cospi_p24_m40 = pair_set_w16_epi16(cospi[24], -cospi[40]);
- const __m256i cospi_m56_p08 = pair_set_w16_epi16(-cospi[56], cospi[8]);
- const __m256i cospi_m24_p40 = pair_set_w16_epi16(-cospi[24], cospi[40]);
- btf_16_w16_avx2(cospi_p08_p56, cospi_p56_m08, &x[8], &x[9], _r, cos_bit);
- btf_16_w16_avx2(cospi_p40_p24, cospi_p24_m40, &x[10], &x[11], _r, cos_bit);
- btf_16_w16_avx2(cospi_m56_p08, cospi_p08_p56, &x[12], &x[13], _r, cos_bit);
- btf_16_w16_avx2(cospi_m24_p40, cospi_p40_p24, &x[14], &x[15], _r, cos_bit);
-}
-
-static INLINE void iadst16_stage5_avx2(__m256i *x) {
- btf_16_adds_subs_avx2(&x[0], &x[4]);
- btf_16_adds_subs_avx2(&x[1], &x[5]);
- btf_16_adds_subs_avx2(&x[2], &x[6]);
- btf_16_adds_subs_avx2(&x[3], &x[7]);
- btf_16_adds_subs_avx2(&x[8], &x[12]);
- btf_16_adds_subs_avx2(&x[9], &x[13]);
- btf_16_adds_subs_avx2(&x[10], &x[14]);
- btf_16_adds_subs_avx2(&x[11], &x[15]);
-}
-
-static INLINE void iadst16_stage6_avx2(__m256i *x, const int32_t *cospi,
- const __m256i _r, int8_t cos_bit) {
- const __m256i cospi_p16_p48 = pair_set_w16_epi16(cospi[16], cospi[48]);
- const __m256i cospi_p48_m16 = pair_set_w16_epi16(cospi[48], -cospi[16]);
- const __m256i cospi_m48_p16 = pair_set_w16_epi16(-cospi[48], cospi[16]);
- btf_16_w16_avx2(cospi_p16_p48, cospi_p48_m16, &x[4], &x[5], _r, cos_bit);
- btf_16_w16_avx2(cospi_m48_p16, cospi_p16_p48, &x[6], &x[7], _r, cos_bit);
- btf_16_w16_avx2(cospi_p16_p48, cospi_p48_m16, &x[12], &x[13], _r, cos_bit);
- btf_16_w16_avx2(cospi_m48_p16, cospi_p16_p48, &x[14], &x[15], _r, cos_bit);
-}
-
-static INLINE void iadst16_stage7_avx2(__m256i *x) {
- btf_16_adds_subs_avx2(&x[0], &x[2]);
- btf_16_adds_subs_avx2(&x[1], &x[3]);
- btf_16_adds_subs_avx2(&x[4], &x[6]);
- btf_16_adds_subs_avx2(&x[5], &x[7]);
- btf_16_adds_subs_avx2(&x[8], &x[10]);
- btf_16_adds_subs_avx2(&x[9], &x[11]);
- btf_16_adds_subs_avx2(&x[12], &x[14]);
- btf_16_adds_subs_avx2(&x[13], &x[15]);
-}
-
-static INLINE void iadst16_stage8_avx2(__m256i *x1, const int32_t *cospi,
- const __m256i _r, int8_t cos_bit) {
- const __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]);
- const __m256i cospi_p32_m32 = pair_set_w16_epi16(cospi[32], -cospi[32]);
- btf_16_w16_avx2(cospi_p32_p32, cospi_p32_m32, &x1[2], &x1[3], _r, cos_bit);
- btf_16_w16_avx2(cospi_p32_p32, cospi_p32_m32, &x1[6], &x1[7], _r, cos_bit);
- btf_16_w16_avx2(cospi_p32_p32, cospi_p32_m32, &x1[10], &x1[11], _r, cos_bit);
- btf_16_w16_avx2(cospi_p32_p32, cospi_p32_m32, &x1[14], &x1[15], _r, cos_bit);
-}
-
-static INLINE void iadst16_stage9_avx2(__m256i *output, __m256i *x1) {
- const __m256i __zero = _mm256_setzero_si256();
- output[0] = x1[0];
- output[1] = _mm256_subs_epi16(__zero, x1[8]);
- output[2] = x1[12];
- output[3] = _mm256_subs_epi16(__zero, x1[4]);
- output[4] = x1[6];
- output[5] = _mm256_subs_epi16(__zero, x1[14]);
- output[6] = x1[10];
- output[7] = _mm256_subs_epi16(__zero, x1[2]);
- output[8] = x1[3];
- output[9] = _mm256_subs_epi16(__zero, x1[11]);
- output[10] = x1[15];
- output[11] = _mm256_subs_epi16(__zero, x1[7]);
- output[12] = x1[5];
- output[13] = _mm256_subs_epi16(__zero, x1[13]);
- output[14] = x1[9];
- output[15] = _mm256_subs_epi16(__zero, x1[1]);
-}
-
-static void iadst16_new_avx2(const __m256i *input, __m256i *output,
- int8_t cos_bit) {
- (void)(cos_bit);
- const int32_t *cospi = cospi_arr(INV_COS_BIT);
-
- const __m256i _r = _mm256_set1_epi32(1 << (INV_COS_BIT - 1));
-
- __m256i cospi_p02_p62 = pair_set_w16_epi16(cospi[2], cospi[62]);
- __m256i cospi_p62_m02 = pair_set_w16_epi16(cospi[62], -cospi[2]);
- __m256i cospi_p10_p54 = pair_set_w16_epi16(cospi[10], cospi[54]);
- __m256i cospi_p54_m10 = pair_set_w16_epi16(cospi[54], -cospi[10]);
- __m256i cospi_p18_p46 = pair_set_w16_epi16(cospi[18], cospi[46]);
- __m256i cospi_p46_m18 = pair_set_w16_epi16(cospi[46], -cospi[18]);
- __m256i cospi_p26_p38 = pair_set_w16_epi16(cospi[26], cospi[38]);
- __m256i cospi_p38_m26 = pair_set_w16_epi16(cospi[38], -cospi[26]);
- __m256i cospi_p34_p30 = pair_set_w16_epi16(cospi[34], cospi[30]);
- __m256i cospi_p30_m34 = pair_set_w16_epi16(cospi[30], -cospi[34]);
- __m256i cospi_p42_p22 = pair_set_w16_epi16(cospi[42], cospi[22]);
- __m256i cospi_p22_m42 = pair_set_w16_epi16(cospi[22], -cospi[42]);
- __m256i cospi_p50_p14 = pair_set_w16_epi16(cospi[50], cospi[14]);
- __m256i cospi_p14_m50 = pair_set_w16_epi16(cospi[14], -cospi[50]);
- __m256i cospi_p58_p06 = pair_set_w16_epi16(cospi[58], cospi[6]);
- __m256i cospi_p06_m58 = pair_set_w16_epi16(cospi[6], -cospi[58]);
-
- // stage 1
- __m256i x1[16];
- x1[0] = input[15];
- x1[1] = input[0];
- x1[2] = input[13];
- x1[3] = input[2];
- x1[4] = input[11];
- x1[5] = input[4];
- x1[6] = input[9];
- x1[7] = input[6];
- x1[8] = input[7];
- x1[9] = input[8];
- x1[10] = input[5];
- x1[11] = input[10];
- x1[12] = input[3];
- x1[13] = input[12];
- x1[14] = input[1];
- x1[15] = input[14];
-
- // stage 2
- btf_16_w16_avx2(cospi_p02_p62, cospi_p62_m02, &x1[0], &x1[1], _r, cos_bit);
- btf_16_w16_avx2(cospi_p10_p54, cospi_p54_m10, &x1[2], &x1[3], _r, cos_bit);
- btf_16_w16_avx2(cospi_p18_p46, cospi_p46_m18, &x1[4], &x1[5], _r, cos_bit);
- btf_16_w16_avx2(cospi_p26_p38, cospi_p38_m26, &x1[6], &x1[7], _r, cos_bit);
- btf_16_w16_avx2(cospi_p34_p30, cospi_p30_m34, &x1[8], &x1[9], _r, cos_bit);
- btf_16_w16_avx2(cospi_p42_p22, cospi_p22_m42, &x1[10], &x1[11], _r, cos_bit);
- btf_16_w16_avx2(cospi_p50_p14, cospi_p14_m50, &x1[12], &x1[13], _r, cos_bit);
- btf_16_w16_avx2(cospi_p58_p06, cospi_p06_m58, &x1[14], &x1[15], _r, cos_bit);
-
- iadst16_stage3_avx2(x1);
- iadst16_stage4_avx2(x1, cospi, _r, cos_bit);
- iadst16_stage5_avx2(x1);
- iadst16_stage6_avx2(x1, cospi, _r, cos_bit);
- iadst16_stage7_avx2(x1);
- iadst16_stage8_avx2(x1, cospi, _r, cos_bit);
- iadst16_stage9_avx2(output, x1);
-}
-
-static void iadst16_low8_new_avx2(const __m256i *input, __m256i *output,
- int8_t cos_bit) {
- (void)(cos_bit);
- const int32_t *cospi = cospi_arr(INV_COS_BIT);
- const __m256i _r = _mm256_set1_epi32(1 << (INV_COS_BIT - 1));
-
- // stage 1
- __m256i x1[16];
- x1[1] = input[0];
- x1[3] = input[2];
- x1[5] = input[4];
- x1[7] = input[6];
- x1[8] = input[7];
- x1[10] = input[5];
- x1[12] = input[3];
- x1[14] = input[1];
-
- // stage 2
- btf_16_w16_0_avx2(cospi[62], -cospi[2], x1[1], x1[0], x1[1]);
- btf_16_w16_0_avx2(cospi[54], -cospi[10], x1[3], x1[2], x1[3]);
- btf_16_w16_0_avx2(cospi[46], -cospi[18], x1[5], x1[4], x1[5]);
- btf_16_w16_0_avx2(cospi[38], -cospi[26], x1[7], x1[6], x1[7]);
- btf_16_w16_0_avx2(cospi[34], cospi[30], x1[8], x1[8], x1[9]);
- btf_16_w16_0_avx2(cospi[42], cospi[22], x1[10], x1[10], x1[11]);
- btf_16_w16_0_avx2(cospi[50], cospi[14], x1[12], x1[12], x1[13]);
- btf_16_w16_0_avx2(cospi[58], cospi[06], x1[14], x1[14], x1[15]);
-
- iadst16_stage3_avx2(x1);
- iadst16_stage4_avx2(x1, cospi, _r, cos_bit);
- iadst16_stage5_avx2(x1);
- iadst16_stage6_avx2(x1, cospi, _r, cos_bit);
- iadst16_stage7_avx2(x1);
- iadst16_stage8_avx2(x1, cospi, _r, cos_bit);
- iadst16_stage9_avx2(output, x1);
-}
-
-static void iadst16_low1_new_avx2(const __m256i *input, __m256i *output,
- int8_t cos_bit) {
- (void)(cos_bit);
- const int32_t *cospi = cospi_arr(INV_COS_BIT);
- const __m256i _r = _mm256_set1_epi32(1 << (INV_COS_BIT - 1));
-
- const __m256i cospi_p08_p56 = pair_set_w16_epi16(cospi[8], cospi[56]);
- const __m256i cospi_p56_m08 = pair_set_w16_epi16(cospi[56], -cospi[8]);
- const __m256i cospi_p16_p48 = pair_set_w16_epi16(cospi[16], cospi[48]);
- const __m256i cospi_p48_m16 = pair_set_w16_epi16(cospi[48], -cospi[16]);
-
- // stage 1
- __m256i x1[16];
- x1[1] = input[0];
-
- // stage 2
- btf_16_w16_0_avx2(cospi[62], -cospi[2], x1[1], x1[0], x1[1]);
-
- // stage 3
- x1[8] = x1[0];
- x1[9] = x1[1];
-
- // stage 4
- btf_16_w16_avx2(cospi_p08_p56, cospi_p56_m08, &x1[8], &x1[9], _r, cos_bit);
-
- // stage 5
- x1[4] = x1[0];
- x1[5] = x1[1];
-
- x1[12] = x1[8];
- x1[13] = x1[9];
-
- // stage 6
- btf_16_w16_avx2(cospi_p16_p48, cospi_p48_m16, &x1[4], &x1[5], _r, cos_bit);
- btf_16_w16_avx2(cospi_p16_p48, cospi_p48_m16, &x1[12], &x1[13], _r, cos_bit);
-
- // stage 7
- x1[2] = x1[0];
- x1[3] = x1[1];
- x1[6] = x1[4];
- x1[7] = x1[5];
- x1[10] = x1[8];
- x1[11] = x1[9];
- x1[14] = x1[12];
- x1[15] = x1[13];
-
- iadst16_stage8_avx2(x1, cospi, _r, cos_bit);
- iadst16_stage9_avx2(output, x1);
-}
-
-static INLINE void idct32_high16_stage3_avx2(__m256i *x) {
- btf_16_adds_subs_avx2(&x[16], &x[17]);
- btf_16_adds_subs_avx2(&x[19], &x[18]);
- btf_16_adds_subs_avx2(&x[20], &x[21]);
- btf_16_adds_subs_avx2(&x[23], &x[22]);
- btf_16_adds_subs_avx2(&x[24], &x[25]);
- btf_16_adds_subs_avx2(&x[27], &x[26]);
- btf_16_adds_subs_avx2(&x[28], &x[29]);
- btf_16_adds_subs_avx2(&x[31], &x[30]);
-}
-
-static INLINE void idct32_high16_stage4_avx2(__m256i *x, const int32_t *cospi,
- const __m256i _r, int8_t cos_bit) {
- const __m256i cospi_m08_p56 = pair_set_w16_epi16(-cospi[8], cospi[56]);
- const __m256i cospi_p56_p08 = pair_set_w16_epi16(cospi[56], cospi[8]);
- const __m256i cospi_m56_m08 = pair_set_w16_epi16(-cospi[56], -cospi[8]);
- const __m256i cospi_m40_p24 = pair_set_w16_epi16(-cospi[40], cospi[24]);
- const __m256i cospi_p24_p40 = pair_set_w16_epi16(cospi[24], cospi[40]);
- const __m256i cospi_m24_m40 = pair_set_w16_epi16(-cospi[24], -cospi[40]);
- btf_16_w16_avx2(cospi_m08_p56, cospi_p56_p08, &x[17], &x[30], _r, cos_bit);
- btf_16_w16_avx2(cospi_m56_m08, cospi_m08_p56, &x[18], &x[29], _r, cos_bit);
- btf_16_w16_avx2(cospi_m40_p24, cospi_p24_p40, &x[21], &x[26], _r, cos_bit);
- btf_16_w16_avx2(cospi_m24_m40, cospi_m40_p24, &x[22], &x[25], _r, cos_bit);
-}
-
-static INLINE void idct32_high24_stage5_avx2(__m256i *x, const int32_t *cospi,
- const __m256i _r, int8_t cos_bit) {
- const __m256i cospi_m16_p48 = pair_set_w16_epi16(-cospi[16], cospi[48]);
- const __m256i cospi_p48_p16 = pair_set_w16_epi16(cospi[48], cospi[16]);
- const __m256i cospi_m48_m16 = pair_set_w16_epi16(-cospi[48], -cospi[16]);
- btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x[9], &x[14], _r, cos_bit);
- btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x[10], &x[13], _r, cos_bit);
- btf_16_adds_subs_avx2(&x[16], &x[19]);
- btf_16_adds_subs_avx2(&x[17], &x[18]);
- btf_16_adds_subs_avx2(&x[23], &x[20]);
- btf_16_adds_subs_avx2(&x[22], &x[21]);
- btf_16_adds_subs_avx2(&x[24], &x[27]);
- btf_16_adds_subs_avx2(&x[25], &x[26]);
- btf_16_adds_subs_avx2(&x[31], &x[28]);
- btf_16_adds_subs_avx2(&x[30], &x[29]);
-}
-
-static INLINE void idct32_high28_stage6_avx2(__m256i *x, const int32_t *cospi,
- const __m256i _r, int8_t cos_bit) {
- const __m256i cospi_m32_p32 = pair_set_w16_epi16(-cospi[32], cospi[32]);
- const __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]);
- const __m256i cospi_m16_p48 = pair_set_w16_epi16(-cospi[16], cospi[48]);
- const __m256i cospi_p48_p16 = pair_set_w16_epi16(cospi[48], cospi[16]);
- const __m256i cospi_m48_m16 = pair_set_w16_epi16(-cospi[48], -cospi[16]);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[5], &x[6], _r, cos_bit);
- btf_16_adds_subs_avx2(&x[8], &x[11]);
- btf_16_adds_subs_avx2(&x[9], &x[10]);
- btf_16_adds_subs_avx2(&x[15], &x[12]);
- btf_16_adds_subs_avx2(&x[14], &x[13]);
- btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x[18], &x[29], _r, cos_bit);
- btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x[19], &x[28], _r, cos_bit);
- btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x[20], &x[27], _r, cos_bit);
- btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x[21], &x[26], _r, cos_bit);
-}
-
-static INLINE void idct32_stage7_avx2(__m256i *x, const int32_t *cospi,
- const __m256i _r, int8_t cos_bit) {
- const __m256i cospi_m32_p32 = pair_set_w16_epi16(-cospi[32], cospi[32]);
- const __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]);
- btf_16_adds_subs_avx2(&x[0], &x[7]);
- btf_16_adds_subs_avx2(&x[1], &x[6]);
- btf_16_adds_subs_avx2(&x[2], &x[5]);
- btf_16_adds_subs_avx2(&x[3], &x[4]);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[10], &x[13], _r, cos_bit);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[11], &x[12], _r, cos_bit);
- btf_16_adds_subs_avx2(&x[16], &x[23]);
- btf_16_adds_subs_avx2(&x[17], &x[22]);
- btf_16_adds_subs_avx2(&x[18], &x[21]);
- btf_16_adds_subs_avx2(&x[19], &x[20]);
- btf_16_adds_subs_avx2(&x[31], &x[24]);
- btf_16_adds_subs_avx2(&x[30], &x[25]);
- btf_16_adds_subs_avx2(&x[29], &x[26]);
- btf_16_adds_subs_avx2(&x[28], &x[27]);
-}
-
-static INLINE void idct32_stage8_avx2(__m256i *x, const int32_t *cospi,
- const __m256i _r, int8_t cos_bit) {
- const __m256i cospi_m32_p32 = pair_set_w16_epi16(-cospi[32], cospi[32]);
- const __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]);
- btf_16_adds_subs_avx2(&x[0], &x[15]);
- btf_16_adds_subs_avx2(&x[1], &x[14]);
- btf_16_adds_subs_avx2(&x[2], &x[13]);
- btf_16_adds_subs_avx2(&x[3], &x[12]);
- btf_16_adds_subs_avx2(&x[4], &x[11]);
- btf_16_adds_subs_avx2(&x[5], &x[10]);
- btf_16_adds_subs_avx2(&x[6], &x[9]);
- btf_16_adds_subs_avx2(&x[7], &x[8]);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[20], &x[27], _r, cos_bit);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[21], &x[26], _r, cos_bit);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[22], &x[25], _r, cos_bit);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[23], &x[24], _r, cos_bit);
-}
-
-static INLINE void idct32_stage9_avx2(__m256i *output, __m256i *x) {
- btf_16_adds_subs_out_avx2(&output[0], &output[31], x[0], x[31]);
- btf_16_adds_subs_out_avx2(&output[1], &output[30], x[1], x[30]);
- btf_16_adds_subs_out_avx2(&output[2], &output[29], x[2], x[29]);
- btf_16_adds_subs_out_avx2(&output[3], &output[28], x[3], x[28]);
- btf_16_adds_subs_out_avx2(&output[4], &output[27], x[4], x[27]);
- btf_16_adds_subs_out_avx2(&output[5], &output[26], x[5], x[26]);
- btf_16_adds_subs_out_avx2(&output[6], &output[25], x[6], x[25]);
- btf_16_adds_subs_out_avx2(&output[7], &output[24], x[7], x[24]);
- btf_16_adds_subs_out_avx2(&output[8], &output[23], x[8], x[23]);
- btf_16_adds_subs_out_avx2(&output[9], &output[22], x[9], x[22]);
- btf_16_adds_subs_out_avx2(&output[10], &output[21], x[10], x[21]);
- btf_16_adds_subs_out_avx2(&output[11], &output[20], x[11], x[20]);
- btf_16_adds_subs_out_avx2(&output[12], &output[19], x[12], x[19]);
- btf_16_adds_subs_out_avx2(&output[13], &output[18], x[13], x[18]);
- btf_16_adds_subs_out_avx2(&output[14], &output[17], x[14], x[17]);
- btf_16_adds_subs_out_avx2(&output[15], &output[16], x[15], x[16]);
-}
-
-static void idct32_low1_new_avx2(const __m256i *input, __m256i *output,
- int8_t cos_bit) {
- (void)cos_bit;
- const int32_t *cospi = cospi_arr(INV_COS_BIT);
-
- // stage 1
- __m256i x[2];
- x[0] = input[0];
-
- // stage 2
- // stage 3
- // stage 4
- // stage 5
- btf_16_w16_0_avx2(cospi[32], cospi[32], x[0], x[0], x[1]);
-
- // stage 6
- // stage 7
- // stage 8
- // stage 9
- output[0] = x[0];
- output[31] = x[0];
- output[1] = x[1];
- output[30] = x[1];
- output[2] = x[1];
- output[29] = x[1];
- output[3] = x[0];
- output[28] = x[0];
- output[4] = x[0];
- output[27] = x[0];
- output[5] = x[1];
- output[26] = x[1];
- output[6] = x[1];
- output[25] = x[1];
- output[7] = x[0];
- output[24] = x[0];
- output[8] = x[0];
- output[23] = x[0];
- output[9] = x[1];
- output[22] = x[1];
- output[10] = x[1];
- output[21] = x[1];
- output[11] = x[0];
- output[20] = x[0];
- output[12] = x[0];
- output[19] = x[0];
- output[13] = x[1];
- output[18] = x[1];
- output[14] = x[1];
- output[17] = x[1];
- output[15] = x[0];
- output[16] = x[0];
-}
-
-static void idct32_low8_new_avx2(const __m256i *input, __m256i *output,
- int8_t cos_bit) {
- (void)cos_bit;
- const int32_t *cospi = cospi_arr(INV_COS_BIT);
- const __m256i _r = _mm256_set1_epi32(1 << (INV_COS_BIT - 1));
-
- // stage 1
- __m256i x[32];
- x[0] = input[0];
- x[4] = input[4];
- x[8] = input[2];
- x[12] = input[6];
- x[16] = input[1];
- x[20] = input[5];
- x[24] = input[3];
- x[28] = input[7];
-
- // stage 2
- btf_16_w16_0_avx2(cospi[62], cospi[2], x[16], x[16], x[31]);
- btf_16_w16_0_avx2(-cospi[50], cospi[14], x[28], x[19], x[28]);
- btf_16_w16_0_avx2(cospi[54], cospi[10], x[20], x[20], x[27]);
- btf_16_w16_0_avx2(-cospi[58], cospi[6], x[24], x[23], x[24]);
-
- // stage 3
- btf_16_w16_0_avx2(cospi[60], cospi[4], x[8], x[8], x[15]);
- btf_16_w16_0_avx2(-cospi[52], cospi[12], x[12], x[11], x[12]);
- x[17] = x[16];
- x[18] = x[19];
- x[21] = x[20];
- x[22] = x[23];
- x[25] = x[24];
- x[26] = x[27];
- x[29] = x[28];
- x[30] = x[31];
-
- // stage 4
- btf_16_w16_0_avx2(cospi[56], cospi[8], x[4], x[4], x[7]);
- x[9] = x[8];
- x[10] = x[11];
- x[13] = x[12];
- x[14] = x[15];
- idct32_high16_stage4_avx2(x, cospi, _r, cos_bit);
-
- // stage 5
- btf_16_w16_0_avx2(cospi[32], cospi[32], x[0], x[0], x[1]);
- x[5] = x[4];
- x[6] = x[7];
- idct32_high24_stage5_avx2(x, cospi, _r, cos_bit);
- // stage 6
- x[3] = x[0];
- x[2] = x[1];
- idct32_high28_stage6_avx2(x, cospi, _r, cos_bit);
-
- idct32_stage7_avx2(x, cospi, _r, cos_bit);
- idct32_stage8_avx2(x, cospi, _r, cos_bit);
- idct32_stage9_avx2(output, x);
-}
-
-static void idct32_low16_new_avx2(const __m256i *input, __m256i *output,
- int8_t cos_bit) {
- (void)cos_bit;
- const int32_t *cospi = cospi_arr(INV_COS_BIT);
- const __m256i _r = _mm256_set1_epi32(1 << (INV_COS_BIT - 1));
-
- // stage 1
- __m256i x[32];
- x[0] = input[0];
- x[2] = input[8];
- x[4] = input[4];
- x[6] = input[12];
- x[8] = input[2];
- x[10] = input[10];
- x[12] = input[6];
- x[14] = input[14];
- x[16] = input[1];
- x[18] = input[9];
- x[20] = input[5];
- x[22] = input[13];
- x[24] = input[3];
- x[26] = input[11];
- x[28] = input[7];
- x[30] = input[15];
-
- // stage 2
- btf_16_w16_0_avx2(cospi[62], cospi[2], x[16], x[16], x[31]);
- btf_16_w16_0_avx2(-cospi[34], cospi[30], x[30], x[17], x[30]);
- btf_16_w16_0_avx2(cospi[46], cospi[18], x[18], x[18], x[29]);
- btf_16_w16_0_avx2(-cospi[50], cospi[14], x[28], x[19], x[28]);
- btf_16_w16_0_avx2(cospi[54], cospi[10], x[20], x[20], x[27]);
- btf_16_w16_0_avx2(-cospi[42], cospi[22], x[26], x[21], x[26]);
- btf_16_w16_0_avx2(cospi[38], cospi[26], x[22], x[22], x[25]);
- btf_16_w16_0_avx2(-cospi[58], cospi[6], x[24], x[23], x[24]);
-
- // stage 3
- btf_16_w16_0_avx2(cospi[60], cospi[4], x[8], x[8], x[15]);
- btf_16_w16_0_avx2(-cospi[36], cospi[28], x[14], x[9], x[14]);
- btf_16_w16_0_avx2(cospi[44], cospi[20], x[10], x[10], x[13]);
- btf_16_w16_0_avx2(-cospi[52], cospi[12], x[12], x[11], x[12]);
- idct32_high16_stage3_avx2(x);
-
- // stage 4
- btf_16_w16_0_avx2(cospi[56], cospi[8], x[4], x[4], x[7]);
- btf_16_w16_0_avx2(-cospi[40], cospi[24], x[6], x[5], x[6]);
- btf_16_adds_subs_avx2(&x[8], &x[9]);
- btf_16_adds_subs_avx2(&x[11], &x[10]);
- btf_16_adds_subs_avx2(&x[12], &x[13]);
- btf_16_adds_subs_avx2(&x[15], &x[14]);
- idct32_high16_stage4_avx2(x, cospi, _r, cos_bit);
-
- // stage 5
- btf_16_w16_0_avx2(cospi[32], cospi[32], x[0], x[0], x[1]);
- btf_16_w16_0_avx2(cospi[48], cospi[16], x[2], x[2], x[3]);
- btf_16_adds_subs_avx2(&x[4], &x[5]);
- btf_16_adds_subs_avx2(&x[7], &x[6]);
- idct32_high24_stage5_avx2(x, cospi, _r, cos_bit);
-
- btf_16_adds_subs_avx2(&x[0], &x[3]);
- btf_16_adds_subs_avx2(&x[1], &x[2]);
- idct32_high28_stage6_avx2(x, cospi, _r, cos_bit);
-
- idct32_stage7_avx2(x, cospi, _r, cos_bit);
- idct32_stage8_avx2(x, cospi, _r, cos_bit);
- idct32_stage9_avx2(output, x);
-}
-
-static void idct32_new_avx2(const __m256i *input, __m256i *output,
- int8_t cos_bit) {
- (void)(cos_bit);
- const int32_t *cospi = cospi_arr(INV_COS_BIT);
- const __m256i _r = _mm256_set1_epi32(1 << (INV_COS_BIT - 1));
-
- __m256i cospi_p62_m02 = pair_set_w16_epi16(cospi[62], -cospi[2]);
- __m256i cospi_p02_p62 = pair_set_w16_epi16(cospi[2], cospi[62]);
- __m256i cospi_p30_m34 = pair_set_w16_epi16(cospi[30], -cospi[34]);
- __m256i cospi_p34_p30 = pair_set_w16_epi16(cospi[34], cospi[30]);
- __m256i cospi_p46_m18 = pair_set_w16_epi16(cospi[46], -cospi[18]);
- __m256i cospi_p18_p46 = pair_set_w16_epi16(cospi[18], cospi[46]);
- __m256i cospi_p14_m50 = pair_set_w16_epi16(cospi[14], -cospi[50]);
- __m256i cospi_p50_p14 = pair_set_w16_epi16(cospi[50], cospi[14]);
- __m256i cospi_p54_m10 = pair_set_w16_epi16(cospi[54], -cospi[10]);
- __m256i cospi_p10_p54 = pair_set_w16_epi16(cospi[10], cospi[54]);
- __m256i cospi_p22_m42 = pair_set_w16_epi16(cospi[22], -cospi[42]);
- __m256i cospi_p42_p22 = pair_set_w16_epi16(cospi[42], cospi[22]);
- __m256i cospi_p38_m26 = pair_set_w16_epi16(cospi[38], -cospi[26]);
- __m256i cospi_p26_p38 = pair_set_w16_epi16(cospi[26], cospi[38]);
- __m256i cospi_p06_m58 = pair_set_w16_epi16(cospi[6], -cospi[58]);
- __m256i cospi_p58_p06 = pair_set_w16_epi16(cospi[58], cospi[6]);
- __m256i cospi_p60_m04 = pair_set_w16_epi16(cospi[60], -cospi[4]);
- __m256i cospi_p04_p60 = pair_set_w16_epi16(cospi[4], cospi[60]);
- __m256i cospi_p28_m36 = pair_set_w16_epi16(cospi[28], -cospi[36]);
- __m256i cospi_p36_p28 = pair_set_w16_epi16(cospi[36], cospi[28]);
- __m256i cospi_p44_m20 = pair_set_w16_epi16(cospi[44], -cospi[20]);
- __m256i cospi_p20_p44 = pair_set_w16_epi16(cospi[20], cospi[44]);
- __m256i cospi_p12_m52 = pair_set_w16_epi16(cospi[12], -cospi[52]);
- __m256i cospi_p52_p12 = pair_set_w16_epi16(cospi[52], cospi[12]);
- __m256i cospi_p56_m08 = pair_set_w16_epi16(cospi[56], -cospi[8]);
- __m256i cospi_p08_p56 = pair_set_w16_epi16(cospi[8], cospi[56]);
- __m256i cospi_p24_m40 = pair_set_w16_epi16(cospi[24], -cospi[40]);
- __m256i cospi_p40_p24 = pair_set_w16_epi16(cospi[40], cospi[24]);
- __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]);
- __m256i cospi_p32_m32 = pair_set_w16_epi16(cospi[32], -cospi[32]);
- __m256i cospi_p48_m16 = pair_set_w16_epi16(cospi[48], -cospi[16]);
- __m256i cospi_p16_p48 = pair_set_w16_epi16(cospi[16], cospi[48]);
-
- // stage 1
- __m256i x1[32];
- x1[0] = input[0];
- x1[1] = input[16];
- x1[2] = input[8];
- x1[3] = input[24];
- x1[4] = input[4];
- x1[5] = input[20];
- x1[6] = input[12];
- x1[7] = input[28];
- x1[8] = input[2];
- x1[9] = input[18];
- x1[10] = input[10];
- x1[11] = input[26];
- x1[12] = input[6];
- x1[13] = input[22];
- x1[14] = input[14];
- x1[15] = input[30];
- x1[16] = input[1];
- x1[17] = input[17];
- x1[18] = input[9];
- x1[19] = input[25];
- x1[20] = input[5];
- x1[21] = input[21];
- x1[22] = input[13];
- x1[23] = input[29];
- x1[24] = input[3];
- x1[25] = input[19];
- x1[26] = input[11];
- x1[27] = input[27];
- x1[28] = input[7];
- x1[29] = input[23];
- x1[30] = input[15];
- x1[31] = input[31];
-
- // stage 2
- btf_16_w16_avx2(cospi_p62_m02, cospi_p02_p62, &x1[16], &x1[31], _r, cos_bit);
- btf_16_w16_avx2(cospi_p30_m34, cospi_p34_p30, &x1[17], &x1[30], _r, cos_bit);
- btf_16_w16_avx2(cospi_p46_m18, cospi_p18_p46, &x1[18], &x1[29], _r, cos_bit);
- btf_16_w16_avx2(cospi_p14_m50, cospi_p50_p14, &x1[19], &x1[28], _r, cos_bit);
- btf_16_w16_avx2(cospi_p54_m10, cospi_p10_p54, &x1[20], &x1[27], _r, cos_bit);
- btf_16_w16_avx2(cospi_p22_m42, cospi_p42_p22, &x1[21], &x1[26], _r, cos_bit);
- btf_16_w16_avx2(cospi_p38_m26, cospi_p26_p38, &x1[22], &x1[25], _r, cos_bit);
- btf_16_w16_avx2(cospi_p06_m58, cospi_p58_p06, &x1[23], &x1[24], _r, cos_bit);
-
- // stage 3
- btf_16_w16_avx2(cospi_p60_m04, cospi_p04_p60, &x1[8], &x1[15], _r, cos_bit);
- btf_16_w16_avx2(cospi_p28_m36, cospi_p36_p28, &x1[9], &x1[14], _r, cos_bit);
- btf_16_w16_avx2(cospi_p44_m20, cospi_p20_p44, &x1[10], &x1[13], _r, cos_bit);
- btf_16_w16_avx2(cospi_p12_m52, cospi_p52_p12, &x1[11], &x1[12], _r, cos_bit);
- idct32_high16_stage3_avx2(x1);
-
- // stage 4
- btf_16_w16_avx2(cospi_p56_m08, cospi_p08_p56, &x1[4], &x1[7], _r, cos_bit);
- btf_16_w16_avx2(cospi_p24_m40, cospi_p40_p24, &x1[5], &x1[6], _r, cos_bit);
- btf_16_adds_subs_avx2(&x1[8], &x1[9]);
- btf_16_adds_subs_avx2(&x1[11], &x1[10]);
- btf_16_adds_subs_avx2(&x1[12], &x1[13]);
- btf_16_adds_subs_avx2(&x1[15], &x1[14]);
- idct32_high16_stage4_avx2(x1, cospi, _r, cos_bit);
-
- // stage 5
- btf_16_w16_avx2(cospi_p32_p32, cospi_p32_m32, &x1[0], &x1[1], _r, cos_bit);
- btf_16_w16_avx2(cospi_p48_m16, cospi_p16_p48, &x1[2], &x1[3], _r, cos_bit);
- btf_16_adds_subs_avx2(&x1[4], &x1[5]);
- btf_16_adds_subs_avx2(&x1[7], &x1[6]);
- idct32_high24_stage5_avx2(x1, cospi, _r, cos_bit);
-
- // stage 6
- btf_16_adds_subs_avx2(&x1[0], &x1[3]);
- btf_16_adds_subs_avx2(&x1[1], &x1[2]);
- idct32_high28_stage6_avx2(x1, cospi, _r, cos_bit);
-
- idct32_stage7_avx2(x1, cospi, _r, cos_bit);
- idct32_stage8_avx2(x1, cospi, _r, cos_bit);
- idct32_stage9_avx2(output, x1);
-}
-
-static INLINE void idct64_stage4_high32_avx2(__m256i *x, const int32_t *cospi,
- const __m256i _r, int8_t cos_bit) {
- (void)cos_bit;
- const __m256i cospi_m04_p60 = pair_set_w16_epi16(-cospi[4], cospi[60]);
- const __m256i cospi_p60_p04 = pair_set_w16_epi16(cospi[60], cospi[4]);
- const __m256i cospi_m60_m04 = pair_set_w16_epi16(-cospi[60], -cospi[4]);
- const __m256i cospi_m36_p28 = pair_set_w16_epi16(-cospi[36], cospi[28]);
- const __m256i cospi_p28_p36 = pair_set_w16_epi16(cospi[28], cospi[36]);
- const __m256i cospi_m28_m36 = pair_set_w16_epi16(-cospi[28], -cospi[36]);
- const __m256i cospi_m20_p44 = pair_set_w16_epi16(-cospi[20], cospi[44]);
- const __m256i cospi_p44_p20 = pair_set_w16_epi16(cospi[44], cospi[20]);
- const __m256i cospi_m44_m20 = pair_set_w16_epi16(-cospi[44], -cospi[20]);
- const __m256i cospi_m52_p12 = pair_set_w16_epi16(-cospi[52], cospi[12]);
- const __m256i cospi_p12_p52 = pair_set_w16_epi16(cospi[12], cospi[52]);
- const __m256i cospi_m12_m52 = pair_set_w16_epi16(-cospi[12], -cospi[52]);
- btf_16_w16_avx2(cospi_m04_p60, cospi_p60_p04, &x[33], &x[62], _r, cos_bit);
- btf_16_w16_avx2(cospi_m60_m04, cospi_m04_p60, &x[34], &x[61], _r, cos_bit);
- btf_16_w16_avx2(cospi_m36_p28, cospi_p28_p36, &x[37], &x[58], _r, cos_bit);
- btf_16_w16_avx2(cospi_m28_m36, cospi_m36_p28, &x[38], &x[57], _r, cos_bit);
- btf_16_w16_avx2(cospi_m20_p44, cospi_p44_p20, &x[41], &x[54], _r, cos_bit);
- btf_16_w16_avx2(cospi_m44_m20, cospi_m20_p44, &x[42], &x[53], _r, cos_bit);
- btf_16_w16_avx2(cospi_m52_p12, cospi_p12_p52, &x[45], &x[50], _r, cos_bit);
- btf_16_w16_avx2(cospi_m12_m52, cospi_m52_p12, &x[46], &x[49], _r, cos_bit);
-}
-
-static INLINE void idct64_stage5_high48_avx2(__m256i *x, const int32_t *cospi,
- const __m256i _r, int8_t cos_bit) {
- (void)cos_bit;
- const __m256i cospi_m08_p56 = pair_set_w16_epi16(-cospi[8], cospi[56]);
- const __m256i cospi_p56_p08 = pair_set_w16_epi16(cospi[56], cospi[8]);
- const __m256i cospi_m56_m08 = pair_set_w16_epi16(-cospi[56], -cospi[8]);
- const __m256i cospi_m40_p24 = pair_set_w16_epi16(-cospi[40], cospi[24]);
- const __m256i cospi_p24_p40 = pair_set_w16_epi16(cospi[24], cospi[40]);
- const __m256i cospi_m24_m40 = pair_set_w16_epi16(-cospi[24], -cospi[40]);
- btf_16_w16_avx2(cospi_m08_p56, cospi_p56_p08, &x[17], &x[30], _r, cos_bit);
- btf_16_w16_avx2(cospi_m56_m08, cospi_m08_p56, &x[18], &x[29], _r, cos_bit);
- btf_16_w16_avx2(cospi_m40_p24, cospi_p24_p40, &x[21], &x[26], _r, cos_bit);
- btf_16_w16_avx2(cospi_m24_m40, cospi_m40_p24, &x[22], &x[25], _r, cos_bit);
- btf_16_adds_subs_avx2(&x[32], &x[35]);
- btf_16_adds_subs_avx2(&x[33], &x[34]);
- btf_16_adds_subs_avx2(&x[39], &x[36]);
- btf_16_adds_subs_avx2(&x[38], &x[37]);
- btf_16_adds_subs_avx2(&x[40], &x[43]);
- btf_16_adds_subs_avx2(&x[41], &x[42]);
- btf_16_adds_subs_avx2(&x[47], &x[44]);
- btf_16_adds_subs_avx2(&x[46], &x[45]);
- btf_16_adds_subs_avx2(&x[48], &x[51]);
- btf_16_adds_subs_avx2(&x[49], &x[50]);
- btf_16_adds_subs_avx2(&x[55], &x[52]);
- btf_16_adds_subs_avx2(&x[54], &x[53]);
- btf_16_adds_subs_avx2(&x[56], &x[59]);
- btf_16_adds_subs_avx2(&x[57], &x[58]);
- btf_16_adds_subs_avx2(&x[63], &x[60]);
- btf_16_adds_subs_avx2(&x[62], &x[61]);
-}
-
-static INLINE void idct64_stage6_high32_avx2(__m256i *x, const int32_t *cospi,
- const __m256i _r, int8_t cos_bit) {
- (void)cos_bit;
- const __m256i cospi_m08_p56 = pair_set_w16_epi16(-cospi[8], cospi[56]);
- const __m256i cospi_p56_p08 = pair_set_w16_epi16(cospi[56], cospi[8]);
- const __m256i cospi_m56_m08 = pair_set_w16_epi16(-cospi[56], -cospi[8]);
- const __m256i cospi_m40_p24 = pair_set_w16_epi16(-cospi[40], cospi[24]);
- const __m256i cospi_p24_p40 = pair_set_w16_epi16(cospi[24], cospi[40]);
- const __m256i cospi_m24_m40 = pair_set_w16_epi16(-cospi[24], -cospi[40]);
- btf_16_w16_avx2(cospi_m08_p56, cospi_p56_p08, &x[34], &x[61], _r, cos_bit);
- btf_16_w16_avx2(cospi_m08_p56, cospi_p56_p08, &x[35], &x[60], _r, cos_bit);
- btf_16_w16_avx2(cospi_m56_m08, cospi_m08_p56, &x[36], &x[59], _r, cos_bit);
- btf_16_w16_avx2(cospi_m56_m08, cospi_m08_p56, &x[37], &x[58], _r, cos_bit);
- btf_16_w16_avx2(cospi_m40_p24, cospi_p24_p40, &x[42], &x[53], _r, cos_bit);
- btf_16_w16_avx2(cospi_m40_p24, cospi_p24_p40, &x[43], &x[52], _r, cos_bit);
- btf_16_w16_avx2(cospi_m24_m40, cospi_m40_p24, &x[44], &x[51], _r, cos_bit);
- btf_16_w16_avx2(cospi_m24_m40, cospi_m40_p24, &x[45], &x[50], _r, cos_bit);
-}
-
-static INLINE void idct64_stage6_high48_avx2(__m256i *x, const int32_t *cospi,
- const __m256i _r, int8_t cos_bit) {
- btf_16_adds_subs_avx2(&x[16], &x[19]);
- btf_16_adds_subs_avx2(&x[17], &x[18]);
- btf_16_adds_subs_avx2(&x[23], &x[20]);
- btf_16_adds_subs_avx2(&x[22], &x[21]);
- btf_16_adds_subs_avx2(&x[24], &x[27]);
- btf_16_adds_subs_avx2(&x[25], &x[26]);
- btf_16_adds_subs_avx2(&x[31], &x[28]);
- btf_16_adds_subs_avx2(&x[30], &x[29]);
- idct64_stage6_high32_avx2(x, cospi, _r, cos_bit);
-}
-
-static INLINE void idct64_stage7_high48_avx2(__m256i *x, const int32_t *cospi,
- const __m256i _r, int8_t cos_bit) {
- (void)cos_bit;
- const __m256i cospi_m16_p48 = pair_set_w16_epi16(-cospi[16], cospi[48]);
- const __m256i cospi_p48_p16 = pair_set_w16_epi16(cospi[48], cospi[16]);
- const __m256i cospi_m48_m16 = pair_set_w16_epi16(-cospi[48], -cospi[16]);
- btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x[18], &x[29], _r, cos_bit);
- btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x[19], &x[28], _r, cos_bit);
- btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x[20], &x[27], _r, cos_bit);
- btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x[21], &x[26], _r, cos_bit);
- btf_16_adds_subs_avx2(&x[32], &x[39]);
- btf_16_adds_subs_avx2(&x[33], &x[38]);
- btf_16_adds_subs_avx2(&x[34], &x[37]);
- btf_16_adds_subs_avx2(&x[35], &x[36]);
- btf_16_adds_subs_avx2(&x[47], &x[40]);
- btf_16_adds_subs_avx2(&x[46], &x[41]);
- btf_16_adds_subs_avx2(&x[45], &x[42]);
- btf_16_adds_subs_avx2(&x[44], &x[43]);
- btf_16_adds_subs_avx2(&x[48], &x[55]);
- btf_16_adds_subs_avx2(&x[49], &x[54]);
- btf_16_adds_subs_avx2(&x[50], &x[53]);
- btf_16_adds_subs_avx2(&x[51], &x[52]);
- btf_16_adds_subs_avx2(&x[63], &x[56]);
- btf_16_adds_subs_avx2(&x[62], &x[57]);
- btf_16_adds_subs_avx2(&x[61], &x[58]);
- btf_16_adds_subs_avx2(&x[60], &x[59]);
-}
-
-static INLINE void idct64_stage8_high48_avx2(__m256i *x, const int32_t *cospi,
- const __m256i _r, int8_t cos_bit) {
- (void)cos_bit;
- const __m256i cospi_m16_p48 = pair_set_w16_epi16(-cospi[16], cospi[48]);
- const __m256i cospi_p48_p16 = pair_set_w16_epi16(cospi[48], cospi[16]);
- const __m256i cospi_m48_m16 = pair_set_w16_epi16(-cospi[48], -cospi[16]);
- btf_16_adds_subs_avx2(&x[16], &x[23]);
- btf_16_adds_subs_avx2(&x[17], &x[22]);
- btf_16_adds_subs_avx2(&x[18], &x[21]);
- btf_16_adds_subs_avx2(&x[19], &x[20]);
- btf_16_adds_subs_avx2(&x[31], &x[24]);
- btf_16_adds_subs_avx2(&x[30], &x[25]);
- btf_16_adds_subs_avx2(&x[29], &x[26]);
- btf_16_adds_subs_avx2(&x[28], &x[27]);
- btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x[36], &x[59], _r, cos_bit);
- btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x[37], &x[58], _r, cos_bit);
- btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x[38], &x[57], _r, cos_bit);
- btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x[39], &x[56], _r, cos_bit);
- btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x[40], &x[55], _r, cos_bit);
- btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x[41], &x[54], _r, cos_bit);
- btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x[42], &x[53], _r, cos_bit);
- btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x[43], &x[52], _r, cos_bit);
-}
-
-static INLINE void idct64_stage9_avx2(__m256i *x, const int32_t *cospi,
- const __m256i _r, int8_t cos_bit) {
- (void)cos_bit;
- const __m256i cospi_m32_p32 = pair_set_w16_epi16(-cospi[32], cospi[32]);
- const __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]);
- btf_16_adds_subs_avx2(&x[0], &x[15]);
- btf_16_adds_subs_avx2(&x[1], &x[14]);
- btf_16_adds_subs_avx2(&x[2], &x[13]);
- btf_16_adds_subs_avx2(&x[3], &x[12]);
- btf_16_adds_subs_avx2(&x[4], &x[11]);
- btf_16_adds_subs_avx2(&x[5], &x[10]);
- btf_16_adds_subs_avx2(&x[6], &x[9]);
- btf_16_adds_subs_avx2(&x[7], &x[8]);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[20], &x[27], _r, cos_bit);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[21], &x[26], _r, cos_bit);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[22], &x[25], _r, cos_bit);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[23], &x[24], _r, cos_bit);
- btf_16_adds_subs_avx2(&x[32], &x[47]);
- btf_16_adds_subs_avx2(&x[33], &x[46]);
- btf_16_adds_subs_avx2(&x[34], &x[45]);
- btf_16_adds_subs_avx2(&x[35], &x[44]);
- btf_16_adds_subs_avx2(&x[36], &x[43]);
- btf_16_adds_subs_avx2(&x[37], &x[42]);
- btf_16_adds_subs_avx2(&x[38], &x[41]);
- btf_16_adds_subs_avx2(&x[39], &x[40]);
- btf_16_adds_subs_avx2(&x[63], &x[48]);
- btf_16_adds_subs_avx2(&x[62], &x[49]);
- btf_16_adds_subs_avx2(&x[61], &x[50]);
- btf_16_adds_subs_avx2(&x[60], &x[51]);
- btf_16_adds_subs_avx2(&x[59], &x[52]);
- btf_16_adds_subs_avx2(&x[58], &x[53]);
- btf_16_adds_subs_avx2(&x[57], &x[54]);
- btf_16_adds_subs_avx2(&x[56], &x[55]);
-}
-
-static INLINE void idct64_stage10_avx2(__m256i *x, const int32_t *cospi,
- const __m256i _r, int8_t cos_bit) {
- (void)cos_bit;
- const __m256i cospi_m32_p32 = pair_set_w16_epi16(-cospi[32], cospi[32]);
- const __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]);
- btf_16_adds_subs_avx2(&x[0], &x[31]);
- btf_16_adds_subs_avx2(&x[1], &x[30]);
- btf_16_adds_subs_avx2(&x[2], &x[29]);
- btf_16_adds_subs_avx2(&x[3], &x[28]);
- btf_16_adds_subs_avx2(&x[4], &x[27]);
- btf_16_adds_subs_avx2(&x[5], &x[26]);
- btf_16_adds_subs_avx2(&x[6], &x[25]);
- btf_16_adds_subs_avx2(&x[7], &x[24]);
- btf_16_adds_subs_avx2(&x[8], &x[23]);
- btf_16_adds_subs_avx2(&x[9], &x[22]);
- btf_16_adds_subs_avx2(&x[10], &x[21]);
- btf_16_adds_subs_avx2(&x[11], &x[20]);
- btf_16_adds_subs_avx2(&x[12], &x[19]);
- btf_16_adds_subs_avx2(&x[13], &x[18]);
- btf_16_adds_subs_avx2(&x[14], &x[17]);
- btf_16_adds_subs_avx2(&x[15], &x[16]);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[40], &x[55], _r, cos_bit);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[41], &x[54], _r, cos_bit);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[42], &x[53], _r, cos_bit);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[43], &x[52], _r, cos_bit);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[44], &x[51], _r, cos_bit);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[45], &x[50], _r, cos_bit);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[46], &x[49], _r, cos_bit);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[47], &x[48], _r, cos_bit);
-}
-
-static INLINE void idct64_stage11_avx2(__m256i *output, __m256i *x) {
- btf_16_adds_subs_out_avx2(&output[0], &output[63], x[0], x[63]);
- btf_16_adds_subs_out_avx2(&output[1], &output[62], x[1], x[62]);
- btf_16_adds_subs_out_avx2(&output[2], &output[61], x[2], x[61]);
- btf_16_adds_subs_out_avx2(&output[3], &output[60], x[3], x[60]);
- btf_16_adds_subs_out_avx2(&output[4], &output[59], x[4], x[59]);
- btf_16_adds_subs_out_avx2(&output[5], &output[58], x[5], x[58]);
- btf_16_adds_subs_out_avx2(&output[6], &output[57], x[6], x[57]);
- btf_16_adds_subs_out_avx2(&output[7], &output[56], x[7], x[56]);
- btf_16_adds_subs_out_avx2(&output[8], &output[55], x[8], x[55]);
- btf_16_adds_subs_out_avx2(&output[9], &output[54], x[9], x[54]);
- btf_16_adds_subs_out_avx2(&output[10], &output[53], x[10], x[53]);
- btf_16_adds_subs_out_avx2(&output[11], &output[52], x[11], x[52]);
- btf_16_adds_subs_out_avx2(&output[12], &output[51], x[12], x[51]);
- btf_16_adds_subs_out_avx2(&output[13], &output[50], x[13], x[50]);
- btf_16_adds_subs_out_avx2(&output[14], &output[49], x[14], x[49]);
- btf_16_adds_subs_out_avx2(&output[15], &output[48], x[15], x[48]);
- btf_16_adds_subs_out_avx2(&output[16], &output[47], x[16], x[47]);
- btf_16_adds_subs_out_avx2(&output[17], &output[46], x[17], x[46]);
- btf_16_adds_subs_out_avx2(&output[18], &output[45], x[18], x[45]);
- btf_16_adds_subs_out_avx2(&output[19], &output[44], x[19], x[44]);
- btf_16_adds_subs_out_avx2(&output[20], &output[43], x[20], x[43]);
- btf_16_adds_subs_out_avx2(&output[21], &output[42], x[21], x[42]);
- btf_16_adds_subs_out_avx2(&output[22], &output[41], x[22], x[41]);
- btf_16_adds_subs_out_avx2(&output[23], &output[40], x[23], x[40]);
- btf_16_adds_subs_out_avx2(&output[24], &output[39], x[24], x[39]);
- btf_16_adds_subs_out_avx2(&output[25], &output[38], x[25], x[38]);
- btf_16_adds_subs_out_avx2(&output[26], &output[37], x[26], x[37]);
- btf_16_adds_subs_out_avx2(&output[27], &output[36], x[27], x[36]);
- btf_16_adds_subs_out_avx2(&output[28], &output[35], x[28], x[35]);
- btf_16_adds_subs_out_avx2(&output[29], &output[34], x[29], x[34]);
- btf_16_adds_subs_out_avx2(&output[30], &output[33], x[30], x[33]);
- btf_16_adds_subs_out_avx2(&output[31], &output[32], x[31], x[32]);
-}
-
-static void idct64_low1_new_avx2(const __m256i *input, __m256i *output,
- int8_t cos_bit) {
- (void)cos_bit;
- const int32_t *cospi = cospi_arr(INV_COS_BIT);
-
- // stage 1
- __m256i x[32];
- x[0] = input[0];
-
- // stage 2
- // stage 3
- // stage 4
- // stage 5
- // stage 6
- btf_16_w16_0_avx2(cospi[32], cospi[32], x[0], x[0], x[1]);
-
- // stage 7
- // stage 8
- // stage 9
- // stage 10
- // stage 11
- output[0] = x[0];
- output[63] = x[0];
- output[1] = x[1];
- output[62] = x[1];
- output[2] = x[1];
- output[61] = x[1];
- output[3] = x[0];
- output[60] = x[0];
- output[4] = x[0];
- output[59] = x[0];
- output[5] = x[1];
- output[58] = x[1];
- output[6] = x[1];
- output[57] = x[1];
- output[7] = x[0];
- output[56] = x[0];
- output[8] = x[0];
- output[55] = x[0];
- output[9] = x[1];
- output[54] = x[1];
- output[10] = x[1];
- output[53] = x[1];
- output[11] = x[0];
- output[52] = x[0];
- output[12] = x[0];
- output[51] = x[0];
- output[13] = x[1];
- output[50] = x[1];
- output[14] = x[1];
- output[49] = x[1];
- output[15] = x[0];
- output[48] = x[0];
- output[16] = x[0];
- output[47] = x[0];
- output[17] = x[1];
- output[46] = x[1];
- output[18] = x[1];
- output[45] = x[1];
- output[19] = x[0];
- output[44] = x[0];
- output[20] = x[0];
- output[43] = x[0];
- output[21] = x[1];
- output[42] = x[1];
- output[22] = x[1];
- output[41] = x[1];
- output[23] = x[0];
- output[40] = x[0];
- output[24] = x[0];
- output[39] = x[0];
- output[25] = x[1];
- output[38] = x[1];
- output[26] = x[1];
- output[37] = x[1];
- output[27] = x[0];
- output[36] = x[0];
- output[28] = x[0];
- output[35] = x[0];
- output[29] = x[1];
- output[34] = x[1];
- output[30] = x[1];
- output[33] = x[1];
- output[31] = x[0];
- output[32] = x[0];
-}
-
-static void idct64_low8_new_avx2(const __m256i *input, __m256i *output,
- int8_t cos_bit) {
- (void)cos_bit;
- const int32_t *cospi = cospi_arr(INV_COS_BIT);
- const __m256i _r = _mm256_set1_epi32(1 << (INV_COS_BIT - 1));
- const __m256i cospi_m04_p60 = pair_set_w16_epi16(-cospi[4], cospi[60]);
- const __m256i cospi_p60_p04 = pair_set_w16_epi16(cospi[60], cospi[4]);
- const __m256i cospi_m36_p28 = pair_set_w16_epi16(-cospi[36], cospi[28]);
- const __m256i cospi_m28_m36 = pair_set_w16_epi16(-cospi[28], -cospi[36]);
- const __m256i cospi_m20_p44 = pair_set_w16_epi16(-cospi[20], cospi[44]);
- const __m256i cospi_p44_p20 = pair_set_w16_epi16(cospi[44], cospi[20]);
- const __m256i cospi_m52_p12 = pair_set_w16_epi16(-cospi[52], cospi[12]);
- const __m256i cospi_m12_m52 = pair_set_w16_epi16(-cospi[12], -cospi[52]);
- const __m256i cospi_m08_p56 = pair_set_w16_epi16(-cospi[8], cospi[56]);
- const __m256i cospi_p56_p08 = pair_set_w16_epi16(cospi[56], cospi[8]);
- const __m256i cospi_m40_p24 = pair_set_w16_epi16(-cospi[40], cospi[24]);
- const __m256i cospi_m24_m40 = pair_set_w16_epi16(-cospi[24], -cospi[40]);
- const __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]);
- const __m256i cospi_m16_p48 = pair_set_w16_epi16(-cospi[16], cospi[48]);
- const __m256i cospi_p48_p16 = pair_set_w16_epi16(cospi[48], cospi[16]);
- const __m256i cospi_m32_p32 = pair_set_w16_epi16(-cospi[32], cospi[32]);
-
- // stage 1
- __m256i x[64];
- x[0] = input[0];
- x[8] = input[4];
- x[16] = input[2];
- x[24] = input[6];
- x[32] = input[1];
- x[40] = input[5];
- x[48] = input[3];
- x[56] = input[7];
-
- // stage 2
- btf_16_w16_0_avx2(cospi[63], cospi[1], x[32], x[32], x[63]);
- btf_16_w16_0_avx2(-cospi[57], cospi[7], x[56], x[39], x[56]);
- btf_16_w16_0_avx2(cospi[59], cospi[5], x[40], x[40], x[55]);
- btf_16_w16_0_avx2(-cospi[61], cospi[3], x[48], x[47], x[48]);
-
- // stage 3
- btf_16_w16_0_avx2(cospi[62], cospi[2], x[16], x[16], x[31]);
- btf_16_w16_0_avx2(-cospi[58], cospi[6], x[24], x[23], x[24]);
- x[33] = x[32];
- x[38] = x[39];
- x[41] = x[40];
- x[46] = x[47];
- x[49] = x[48];
- x[54] = x[55];
- x[57] = x[56];
- x[62] = x[63];
-
- // stage 4
- btf_16_w16_0_avx2(cospi[60], cospi[4], x[8], x[8], x[15]);
- x[17] = x[16];
- x[22] = x[23];
- x[25] = x[24];
- x[30] = x[31];
- btf_16_w16_avx2(cospi_m04_p60, cospi_p60_p04, &x[33], &x[62], _r, cos_bit);
- btf_16_w16_avx2(cospi_m28_m36, cospi_m36_p28, &x[38], &x[57], _r, cos_bit);
- btf_16_w16_avx2(cospi_m20_p44, cospi_p44_p20, &x[41], &x[54], _r, cos_bit);
- btf_16_w16_avx2(cospi_m12_m52, cospi_m52_p12, &x[46], &x[49], _r, cos_bit);
-
- // stage 5
- x[9] = x[8];
- x[14] = x[15];
- btf_16_w16_avx2(cospi_m08_p56, cospi_p56_p08, &x[17], &x[30], _r, cos_bit);
- btf_16_w16_avx2(cospi_m24_m40, cospi_m40_p24, &x[22], &x[25], _r, cos_bit);
- x[35] = x[32];
- x[34] = x[33];
- x[36] = x[39];
- x[37] = x[38];
- x[43] = x[40];
- x[42] = x[41];
- x[44] = x[47];
- x[45] = x[46];
- x[51] = x[48];
- x[50] = x[49];
- x[52] = x[55];
- x[53] = x[54];
- x[59] = x[56];
- x[58] = x[57];
- x[60] = x[63];
- x[61] = x[62];
-
- // stage 6
- btf_16_w16_0_avx2(cospi[32], cospi[32], x[0], x[0], x[1]);
- btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x[9], &x[14], _r, cos_bit);
- x[19] = x[16];
- x[18] = x[17];
- x[20] = x[23];
- x[21] = x[22];
- x[27] = x[24];
- x[26] = x[25];
- x[28] = x[31];
- x[29] = x[30];
- idct64_stage6_high32_avx2(x, cospi, _r, cos_bit);
-
- // stage 7
- x[3] = x[0];
- x[2] = x[1];
- x[11] = x[8];
- x[10] = x[9];
- x[12] = x[15];
- x[13] = x[14];
- idct64_stage7_high48_avx2(x, cospi, _r, cos_bit);
-
- // stage 8
- x[7] = x[0];
- x[6] = x[1];
- x[5] = x[2];
- x[4] = x[3];
- x[9] = x[9];
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[10], &x[13], _r, cos_bit);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[11], &x[12], _r, cos_bit);
- idct64_stage8_high48_avx2(x, cospi, _r, cos_bit);
-
- idct64_stage9_avx2(x, cospi, _r, cos_bit);
- idct64_stage10_avx2(x, cospi, _r, cos_bit);
- idct64_stage11_avx2(output, x);
-}
-
-static void idct64_low16_new_avx2(const __m256i *input, __m256i *output,
- int8_t cos_bit) {
- (void)cos_bit;
- const int32_t *cospi = cospi_arr(INV_COS_BIT);
- const __m256i _r = _mm256_set1_epi32(1 << (INV_COS_BIT - 1));
-
- const __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]);
- const __m256i cospi_m16_p48 = pair_set_w16_epi16(-cospi[16], cospi[48]);
- const __m256i cospi_p48_p16 = pair_set_w16_epi16(cospi[48], cospi[16]);
- const __m256i cospi_m48_m16 = pair_set_w16_epi16(-cospi[48], -cospi[16]);
- const __m256i cospi_m32_p32 = pair_set_w16_epi16(-cospi[32], cospi[32]);
-
- // stage 1
- __m256i x[64];
- x[0] = input[0];
- x[4] = input[8];
- x[8] = input[4];
- x[12] = input[12];
- x[16] = input[2];
- x[20] = input[10];
- x[24] = input[6];
- x[28] = input[14];
- x[32] = input[1];
- x[36] = input[9];
- x[40] = input[5];
- x[44] = input[13];
- x[48] = input[3];
- x[52] = input[11];
- x[56] = input[7];
- x[60] = input[15];
-
- // stage 2
- btf_16_w16_0_avx2(cospi[63], cospi[1], x[32], x[32], x[63]);
- btf_16_w16_0_avx2(-cospi[49], cospi[15], x[60], x[35], x[60]);
- btf_16_w16_0_avx2(cospi[55], cospi[9], x[36], x[36], x[59]);
- btf_16_w16_0_avx2(-cospi[57], cospi[7], x[56], x[39], x[56]);
- btf_16_w16_0_avx2(cospi[59], cospi[5], x[40], x[40], x[55]);
- btf_16_w16_0_avx2(-cospi[53], cospi[11], x[52], x[43], x[52]);
- btf_16_w16_0_avx2(cospi[51], cospi[13], x[44], x[44], x[51]);
- btf_16_w16_0_avx2(-cospi[61], cospi[3], x[48], x[47], x[48]);
-
- // stage 3
- btf_16_w16_0_avx2(cospi[62], cospi[2], x[16], x[16], x[31]);
- btf_16_w16_0_avx2(-cospi[50], cospi[14], x[28], x[19], x[28]);
- btf_16_w16_0_avx2(cospi[54], cospi[10], x[20], x[20], x[27]);
- btf_16_w16_0_avx2(-cospi[58], cospi[6], x[24], x[23], x[24]);
- x[33] = x[32];
- x[34] = x[35];
- x[37] = x[36];
- x[38] = x[39];
- x[41] = x[40];
- x[42] = x[43];
- x[45] = x[44];
- x[46] = x[47];
- x[49] = x[48];
- x[50] = x[51];
- x[53] = x[52];
- x[54] = x[55];
- x[57] = x[56];
- x[58] = x[59];
- x[61] = x[60];
- x[62] = x[63];
-
- // stage 4
- btf_16_w16_0_avx2(cospi[60], cospi[4], x[8], x[8], x[15]);
- btf_16_w16_0_avx2(-cospi[52], cospi[12], x[12], x[11], x[12]);
- x[17] = x[16];
- x[18] = x[19];
- x[21] = x[20];
- x[22] = x[23];
- x[25] = x[24];
- x[26] = x[27];
- x[29] = x[28];
- x[30] = x[31];
- idct64_stage4_high32_avx2(x, cospi, _r, cos_bit);
-
- // stage 5
- btf_16_w16_0_avx2(cospi[56], cospi[8], x[4], x[4], x[7]);
- x[9] = x[8];
- x[10] = x[11];
- x[13] = x[12];
- x[14] = x[15];
- idct64_stage5_high48_avx2(x, cospi, _r, cos_bit);
-
- // stage 6
- btf_16_w16_0_avx2(cospi[32], cospi[32], x[0], x[0], x[1]);
- x[5] = x[4];
- x[6] = x[7];
- btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x[9], &x[14], _r, cos_bit);
- btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x[10], &x[13], _r, cos_bit);
- idct64_stage6_high48_avx2(x, cospi, _r, cos_bit);
-
- // stage 7
- x[3] = x[0];
- x[2] = x[1];
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[5], &x[6], _r, cos_bit);
- btf_16_adds_subs_avx2(&x[8], &x[11]);
- btf_16_adds_subs_avx2(&x[9], &x[10]);
- btf_16_adds_subs_avx2(&x[15], &x[12]);
- btf_16_adds_subs_avx2(&x[14], &x[13]);
- idct64_stage7_high48_avx2(x, cospi, _r, cos_bit);
-
- // stage 8
- btf_16_adds_subs_avx2(&x[0], &x[7]);
- btf_16_adds_subs_avx2(&x[1], &x[6]);
- btf_16_adds_subs_avx2(&x[2], &x[5]);
- btf_16_adds_subs_avx2(&x[3], &x[4]);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[10], &x[13], _r, cos_bit);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[11], &x[12], _r, cos_bit);
- idct64_stage8_high48_avx2(x, cospi, _r, cos_bit);
-
- idct64_stage9_avx2(x, cospi, _r, cos_bit);
- idct64_stage10_avx2(x, cospi, _r, cos_bit);
- idct64_stage11_avx2(output, x);
-}
-
-static void idct64_low32_new_avx2(const __m256i *input, __m256i *output,
- int8_t cos_bit) {
- (void)cos_bit;
- const int32_t *cospi = cospi_arr(INV_COS_BIT);
- const __m256i _r = _mm256_set1_epi32(1 << (INV_COS_BIT - 1));
-
- const __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]);
- const __m256i cospi_m16_p48 = pair_set_w16_epi16(-cospi[16], cospi[48]);
- const __m256i cospi_p48_p16 = pair_set_w16_epi16(cospi[48], cospi[16]);
- const __m256i cospi_m48_m16 = pair_set_w16_epi16(-cospi[48], -cospi[16]);
- const __m256i cospi_m32_p32 = pair_set_w16_epi16(-cospi[32], cospi[32]);
-
- // stage 1
- __m256i x[64];
- x[0] = input[0];
- x[2] = input[16];
- x[4] = input[8];
- x[6] = input[24];
- x[8] = input[4];
- x[10] = input[20];
- x[12] = input[12];
- x[14] = input[28];
- x[16] = input[2];
- x[18] = input[18];
- x[20] = input[10];
- x[22] = input[26];
- x[24] = input[6];
- x[26] = input[22];
- x[28] = input[14];
- x[30] = input[30];
- x[32] = input[1];
- x[34] = input[17];
- x[36] = input[9];
- x[38] = input[25];
- x[40] = input[5];
- x[42] = input[21];
- x[44] = input[13];
- x[46] = input[29];
- x[48] = input[3];
- x[50] = input[19];
- x[52] = input[11];
- x[54] = input[27];
- x[56] = input[7];
- x[58] = input[23];
- x[60] = input[15];
- x[62] = input[31];
-
- // stage 2
- btf_16_w16_0_avx2(cospi[63], cospi[1], x[32], x[32], x[63]);
- btf_16_w16_0_avx2(-cospi[33], cospi[31], x[62], x[33], x[62]);
- btf_16_w16_0_avx2(cospi[47], cospi[17], x[34], x[34], x[61]);
- btf_16_w16_0_avx2(-cospi[49], cospi[15], x[60], x[35], x[60]);
- btf_16_w16_0_avx2(cospi[55], cospi[9], x[36], x[36], x[59]);
- btf_16_w16_0_avx2(-cospi[41], cospi[23], x[58], x[37], x[58]);
- btf_16_w16_0_avx2(cospi[39], cospi[25], x[38], x[38], x[57]);
- btf_16_w16_0_avx2(-cospi[57], cospi[7], x[56], x[39], x[56]);
- btf_16_w16_0_avx2(cospi[59], cospi[5], x[40], x[40], x[55]);
- btf_16_w16_0_avx2(-cospi[37], cospi[27], x[54], x[41], x[54]);
- btf_16_w16_0_avx2(cospi[43], cospi[21], x[42], x[42], x[53]);
- btf_16_w16_0_avx2(-cospi[53], cospi[11], x[52], x[43], x[52]);
- btf_16_w16_0_avx2(cospi[51], cospi[13], x[44], x[44], x[51]);
- btf_16_w16_0_avx2(-cospi[45], cospi[19], x[50], x[45], x[50]);
- btf_16_w16_0_avx2(cospi[35], cospi[29], x[46], x[46], x[49]);
- btf_16_w16_0_avx2(-cospi[61], cospi[3], x[48], x[47], x[48]);
-
- // stage 3
- btf_16_w16_0_avx2(cospi[62], cospi[2], x[16], x[16], x[31]);
- btf_16_w16_0_avx2(-cospi[34], cospi[30], x[30], x[17], x[30]);
- btf_16_w16_0_avx2(cospi[46], cospi[18], x[18], x[18], x[29]);
- btf_16_w16_0_avx2(-cospi[50], cospi[14], x[28], x[19], x[28]);
- btf_16_w16_0_avx2(cospi[54], cospi[10], x[20], x[20], x[27]);
- btf_16_w16_0_avx2(-cospi[42], cospi[22], x[26], x[21], x[26]);
- btf_16_w16_0_avx2(cospi[38], cospi[26], x[22], x[22], x[25]);
- btf_16_w16_0_avx2(-cospi[58], cospi[6], x[24], x[23], x[24]);
- btf_16_adds_subs_avx2(&x[32], &x[33]);
- btf_16_adds_subs_avx2(&x[35], &x[34]);
- btf_16_adds_subs_avx2(&x[36], &x[37]);
- btf_16_adds_subs_avx2(&x[39], &x[38]);
- btf_16_adds_subs_avx2(&x[40], &x[41]);
- btf_16_adds_subs_avx2(&x[43], &x[42]);
- btf_16_adds_subs_avx2(&x[44], &x[45]);
- btf_16_adds_subs_avx2(&x[47], &x[46]);
- btf_16_adds_subs_avx2(&x[48], &x[49]);
- btf_16_adds_subs_avx2(&x[51], &x[50]);
- btf_16_adds_subs_avx2(&x[52], &x[53]);
- btf_16_adds_subs_avx2(&x[55], &x[54]);
- btf_16_adds_subs_avx2(&x[56], &x[57]);
- btf_16_adds_subs_avx2(&x[59], &x[58]);
- btf_16_adds_subs_avx2(&x[60], &x[61]);
- btf_16_adds_subs_avx2(&x[63], &x[62]);
-
- // stage 4
- btf_16_w16_0_avx2(cospi[60], cospi[4], x[8], x[8], x[15]);
- btf_16_w16_0_avx2(-cospi[36], cospi[28], x[14], x[9], x[14]);
- btf_16_w16_0_avx2(cospi[44], cospi[20], x[10], x[10], x[13]);
- btf_16_w16_0_avx2(-cospi[52], cospi[12], x[12], x[11], x[12]);
- btf_16_adds_subs_avx2(&x[16], &x[17]);
- btf_16_adds_subs_avx2(&x[19], &x[18]);
- btf_16_adds_subs_avx2(&x[20], &x[21]);
- btf_16_adds_subs_avx2(&x[23], &x[22]);
- btf_16_adds_subs_avx2(&x[24], &x[25]);
- btf_16_adds_subs_avx2(&x[27], &x[26]);
- btf_16_adds_subs_avx2(&x[28], &x[29]);
- btf_16_adds_subs_avx2(&x[31], &x[30]);
- idct64_stage4_high32_avx2(x, cospi, _r, cos_bit);
-
- // stage 5
- btf_16_w16_0_avx2(cospi[56], cospi[8], x[4], x[4], x[7]);
- btf_16_w16_0_avx2(-cospi[40], cospi[24], x[6], x[5], x[6]);
- btf_16_adds_subs_avx2(&x[8], &x[9]);
- btf_16_adds_subs_avx2(&x[11], &x[10]);
- btf_16_adds_subs_avx2(&x[12], &x[13]);
- btf_16_adds_subs_avx2(&x[15], &x[14]);
- idct64_stage5_high48_avx2(x, cospi, _r, cos_bit);
-
- // stage 6
- btf_16_w16_0_avx2(cospi[32], cospi[32], x[0], x[0], x[1]);
- btf_16_w16_0_avx2(cospi[48], cospi[16], x[2], x[2], x[3]);
- btf_16_adds_subs_avx2(&x[4], &x[5]);
- btf_16_adds_subs_avx2(&x[7], &x[6]);
- btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x[9], &x[14], _r, cos_bit);
- btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x[10], &x[13], _r, cos_bit);
- idct64_stage6_high48_avx2(x, cospi, _r, cos_bit);
-
- // stage 7
- btf_16_adds_subs_avx2(&x[0], &x[3]);
- btf_16_adds_subs_avx2(&x[1], &x[2]);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[5], &x[6], _r, cos_bit);
- btf_16_adds_subs_avx2(&x[8], &x[11]);
- btf_16_adds_subs_avx2(&x[9], &x[10]);
- btf_16_adds_subs_avx2(&x[15], &x[12]);
- btf_16_adds_subs_avx2(&x[14], &x[13]);
- idct64_stage7_high48_avx2(x, cospi, _r, cos_bit);
-
- // stage 8
- btf_16_adds_subs_avx2(&x[0], &x[7]);
- btf_16_adds_subs_avx2(&x[1], &x[6]);
- btf_16_adds_subs_avx2(&x[2], &x[5]);
- btf_16_adds_subs_avx2(&x[3], &x[4]);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[10], &x[13], _r, cos_bit);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x[11], &x[12], _r, cos_bit);
- idct64_stage8_high48_avx2(x, cospi, _r, cos_bit);
-
- // stage 9~11
- idct64_stage9_avx2(x, cospi, _r, cos_bit);
- idct64_stage10_avx2(x, cospi, _r, cos_bit);
- idct64_stage11_avx2(output, x);
-}
-
-// 1D functions process 16 pixels at one time.
-static const transform_1d_avx2
- lowbd_txfm_all_1d_zeros_w16_arr[TX_SIZES][ITX_TYPES_1D][4] = {
- {
- { NULL, NULL, NULL, NULL },
- { NULL, NULL, NULL, NULL },
- { NULL, NULL, NULL, NULL },
- },
- { { NULL, NULL, NULL, NULL },
- { NULL, NULL, NULL, NULL },
- { NULL, NULL, NULL, NULL } },
- {
- { idct16_low1_new_avx2, idct16_low8_new_avx2, idct16_new_avx2, NULL },
- { iadst16_low1_new_avx2, iadst16_low8_new_avx2, iadst16_new_avx2,
- NULL },
- { NULL, NULL, NULL, NULL },
- },
- { { idct32_low1_new_avx2, idct32_low8_new_avx2, idct32_low16_new_avx2,
- idct32_new_avx2 },
- { NULL, NULL, NULL, NULL },
- { NULL, NULL, NULL, NULL } },
- { { idct64_low1_new_avx2, idct64_low8_new_avx2, idct64_low16_new_avx2,
- idct64_low32_new_avx2 },
- { NULL, NULL, NULL, NULL },
- { NULL, NULL, NULL, NULL } }
- };
-
-// only process w >= 16 h >= 16
-static INLINE void lowbd_inv_txfm2d_add_no_identity_avx2(
- const int32_t *input, uint8_t *output, int stride, TX_TYPE tx_type,
- TX_SIZE tx_size, int eob) {
- __m256i buf1[64 * 16];
- int eobx, eoby;
- get_eobx_eoby_scan_default(&eobx, &eoby, tx_size, eob);
- const int8_t *shift = inv_txfm_shift_ls[tx_size];
- const int txw_idx = get_txw_idx(tx_size);
- const int txh_idx = get_txh_idx(tx_size);
- const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
- const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
- const int txfm_size_col = tx_size_wide[tx_size];
- const int txfm_size_row = tx_size_high[tx_size];
- const int buf_size_w_div16 = txfm_size_col >> 4;
- const int buf_size_nonzero_w_div16 = (eobx + 16) >> 4;
- const int buf_size_nonzero_h_div16 = (eoby + 16) >> 4;
- const int input_stride = AOMMIN(32, txfm_size_col);
- const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
-
- const int fun_idx_x = lowbd_txfm_all_1d_zeros_idx[eobx];
- const int fun_idx_y = lowbd_txfm_all_1d_zeros_idx[eoby];
- const transform_1d_avx2 row_txfm =
- lowbd_txfm_all_1d_zeros_w16_arr[txw_idx][hitx_1d_tab[tx_type]][fun_idx_x];
- const transform_1d_avx2 col_txfm =
- lowbd_txfm_all_1d_zeros_w16_arr[txh_idx][vitx_1d_tab[tx_type]][fun_idx_y];
-
- assert(col_txfm != NULL);
- assert(row_txfm != NULL);
- int ud_flip, lr_flip;
- get_flip_cfg(tx_type, &ud_flip, &lr_flip);
- for (int i = 0; i < buf_size_nonzero_h_div16; i++) {
- __m256i buf0[64];
- const int32_t *input_row = input + (i << 4) * input_stride;
- for (int j = 0; j < buf_size_nonzero_w_div16; ++j) {
- __m256i *buf0_cur = buf0 + j * 16;
- const int32_t *input_cur = input_row + j * 16;
- load_buffer_32bit_to_16bit_w16_avx2(input_cur, input_stride, buf0_cur,
- 16);
- transpose_16bit_16x16_avx2(buf0_cur, buf0_cur);
- }
- if (rect_type == 1 || rect_type == -1) {
- round_shift_avx2(buf0, buf0, input_stride); // rect special code
- }
- row_txfm(buf0, buf0, cos_bit_row);
- round_shift_16bit_w16_avx2(buf0, txfm_size_col, shift[0]);
-
- __m256i *buf1_cur = buf1 + (i << 4);
- if (lr_flip) {
- for (int j = 0; j < buf_size_w_div16; ++j) {
- __m256i temp[16];
- flip_buf_avx2(buf0 + 16 * j, temp, 16);
- int offset = txfm_size_row * (buf_size_w_div16 - 1 - j);
- transpose_16bit_16x16_avx2(temp, buf1_cur + offset);
- }
- } else {
- for (int j = 0; j < buf_size_w_div16; ++j) {
- transpose_16bit_16x16_avx2(buf0 + 16 * j, buf1_cur + txfm_size_row * j);
- }
- }
- }
- for (int i = 0; i < buf_size_w_div16; i++) {
- __m256i *buf1_cur = buf1 + i * txfm_size_row;
- col_txfm(buf1_cur, buf1_cur, cos_bit_col);
- round_shift_16bit_w16_avx2(buf1_cur, txfm_size_row, shift[1]);
- }
- for (int i = 0; i < buf_size_w_div16; i++) {
- lowbd_write_buffer_16xn_avx2(buf1 + i * txfm_size_row, output + 16 * i,
- stride, ud_flip, txfm_size_row);
- }
-}
-
-static INLINE void iidentity_row_16xn_avx2(__m256i *out, const int32_t *input,
- int stride, int shift, int height,
- int txw_idx, int rect_type) {
- const int32_t *input_row = input;
- const __m256i scale = _mm256_set1_epi16(NewSqrt2list[txw_idx]);
- const __m256i _r = _mm256_set1_epi16((1 << (NewSqrt2Bits - 1)) +
- (1 << (NewSqrt2Bits - shift - 1)));
- const __m256i one = _mm256_set1_epi16(1);
- const __m256i scale__r = _mm256_unpacklo_epi16(scale, _r);
- if (rect_type != 1 && rect_type != -1) {
- for (int i = 0; i < height; ++i) {
- const __m256i src = load_32bit_to_16bit_w16_avx2(input_row);
- input_row += stride;
- __m256i lo = _mm256_unpacklo_epi16(src, one);
- __m256i hi = _mm256_unpackhi_epi16(src, one);
- lo = _mm256_madd_epi16(lo, scale__r);
- hi = _mm256_madd_epi16(hi, scale__r);
- lo = _mm256_srai_epi32(lo, NewSqrt2Bits - shift);
- hi = _mm256_srai_epi32(hi, NewSqrt2Bits - shift);
- out[i] = _mm256_packs_epi32(lo, hi);
- }
- } else {
- const __m256i rect_scale =
- _mm256_set1_epi16(NewInvSqrt2 << (15 - NewSqrt2Bits));
- for (int i = 0; i < height; ++i) {
- __m256i src = load_32bit_to_16bit_w16_avx2(input_row);
- src = _mm256_mulhrs_epi16(src, rect_scale);
- input_row += stride;
- __m256i lo = _mm256_unpacklo_epi16(src, one);
- __m256i hi = _mm256_unpackhi_epi16(src, one);
- lo = _mm256_madd_epi16(lo, scale__r);
- hi = _mm256_madd_epi16(hi, scale__r);
- lo = _mm256_srai_epi32(lo, NewSqrt2Bits - shift);
- hi = _mm256_srai_epi32(hi, NewSqrt2Bits - shift);
- out[i] = _mm256_packs_epi32(lo, hi);
- }
- }
-}
-
-static INLINE void iidentity_col_16xn_avx2(uint8_t *output, int stride,
- __m256i *buf, int shift, int height,
- int txh_idx) {
- const __m256i scale = _mm256_set1_epi16(NewSqrt2list[txh_idx]);
- const __m256i scale__r = _mm256_set1_epi16(1 << (NewSqrt2Bits - 1));
- const __m256i shift__r = _mm256_set1_epi32(1 << (-shift - 1));
- const __m256i one = _mm256_set1_epi16(1);
- const __m256i scale_coeff = _mm256_unpacklo_epi16(scale, scale__r);
- for (int h = 0; h < height; ++h) {
- __m256i lo = _mm256_unpacklo_epi16(buf[h], one);
- __m256i hi = _mm256_unpackhi_epi16(buf[h], one);
- lo = _mm256_madd_epi16(lo, scale_coeff);
- hi = _mm256_madd_epi16(hi, scale_coeff);
- lo = _mm256_srai_epi32(lo, NewSqrt2Bits);
- hi = _mm256_srai_epi32(hi, NewSqrt2Bits);
- lo = _mm256_add_epi32(lo, shift__r);
- hi = _mm256_add_epi32(hi, shift__r);
- lo = _mm256_srai_epi32(lo, -shift);
- hi = _mm256_srai_epi32(hi, -shift);
- const __m256i x = _mm256_packs_epi32(lo, hi);
- write_recon_w16_avx2(x, output);
- output += stride;
- }
-}
-
-static INLINE void lowbd_inv_txfm2d_add_idtx_avx2(const int32_t *input,
- uint8_t *output, int stride,
- TX_SIZE tx_size,
- int32_t eob) {
- (void)eob;
- const int8_t *shift = inv_txfm_shift_ls[tx_size];
- const int txw_idx = get_txw_idx(tx_size);
- const int txh_idx = get_txh_idx(tx_size);
- const int txfm_size_col = tx_size_wide[tx_size];
- const int txfm_size_row = tx_size_high[tx_size];
- const int input_stride = AOMMIN(32, txfm_size_col);
- const int row_max = AOMMIN(32, txfm_size_row);
- const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
- __m256i buf[32];
- for (int i = 0; i < input_stride; i += 16) {
- iidentity_row_16xn_avx2(buf, input + i, input_stride, shift[0], row_max,
- txw_idx, rect_type);
- iidentity_col_16xn_avx2(output + i, stride, buf, shift[1], row_max,
- txh_idx);
- }
-}
-
-static INLINE void lowbd_inv_txfm2d_add_h_identity_avx2(
- const int32_t *input, uint8_t *output, int stride, TX_TYPE tx_type,
- TX_SIZE tx_size, int eob) {
- int eobx, eoby;
- get_eobx_eoby_scan_h_identity(&eobx, &eoby, tx_size, eob);
- const int8_t *shift = inv_txfm_shift_ls[tx_size];
- const int txw_idx = get_txw_idx(tx_size);
- const int txh_idx = get_txh_idx(tx_size);
- const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
- const int txfm_size_col = tx_size_wide[tx_size];
- const int txfm_size_row = tx_size_high[tx_size];
- const int txfm_size_col_notzero = AOMMIN(32, txfm_size_col);
- const int input_stride = txfm_size_col_notzero;
- const int buf_size_w_div16 = (eobx + 16) >> 4;
- const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
-
- const int fun_idx_y = lowbd_txfm_all_1d_zeros_idx[eoby];
- const transform_1d_avx2 col_txfm =
- lowbd_txfm_all_1d_zeros_w16_arr[txh_idx][vitx_1d_tab[tx_type]][fun_idx_y];
-
- assert(col_txfm != NULL);
-
- int ud_flip, lr_flip;
- get_flip_cfg(tx_type, &ud_flip, &lr_flip);
- for (int i = 0; i < buf_size_w_div16; i++) {
- __m256i buf0[64];
- iidentity_row_16xn_avx2(buf0, input + (i << 4), input_stride, shift[0],
- eoby + 1, txw_idx, rect_type);
- col_txfm(buf0, buf0, cos_bit_col);
- __m256i mshift = _mm256_set1_epi16(1 << (15 + shift[1]));
- int k = ud_flip ? (txfm_size_row - 1) : 0;
- const int step = ud_flip ? -1 : 1;
- for (int j = 0; j < txfm_size_row; ++j, k += step) {
- __m256i res = _mm256_mulhrs_epi16(buf0[k], mshift);
- write_recon_w16_avx2(res, output + (i << 4) + j * stride);
- }
- }
-}
-
-static INLINE void lowbd_inv_txfm2d_add_v_identity_avx2(
- const int32_t *input, uint8_t *output, int stride, TX_TYPE tx_type,
- TX_SIZE tx_size, int eob) {
- __m256i buf1[64];
- int eobx, eoby;
- get_eobx_eoby_scan_v_identity(&eobx, &eoby, tx_size, eob);
- const int8_t *shift = inv_txfm_shift_ls[tx_size];
- const int txw_idx = get_txw_idx(tx_size);
- const int txh_idx = get_txh_idx(tx_size);
- const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
- const int txfm_size_col = tx_size_wide[tx_size];
- const int txfm_size_row = tx_size_high[tx_size];
- const int buf_size_w_div16 = txfm_size_col >> 4;
- const int buf_size_h_div16 = (eoby + 16) >> 4;
- const int input_stride = AOMMIN(32, txfm_size_col);
- const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
-
- const int fun_idx_x = lowbd_txfm_all_1d_zeros_idx[eobx];
- const transform_1d_avx2 row_txfm =
- lowbd_txfm_all_1d_zeros_w16_arr[txw_idx][hitx_1d_tab[tx_type]][fun_idx_x];
-
- assert(row_txfm != NULL);
-
- int ud_flip, lr_flip;
- get_flip_cfg(tx_type, &ud_flip, &lr_flip);
- for (int i = 0; i < buf_size_h_div16; i++) {
- __m256i buf0[64];
- const int32_t *input_row = input + i * input_stride * 16;
- for (int j = 0; j < AOMMIN(4, buf_size_w_div16); ++j) {
- __m256i *buf0_cur = buf0 + j * 16;
- load_buffer_32bit_to_16bit_w16_avx2(input_row + j * 16, input_stride,
- buf0_cur, 16);
- transpose_16bit_16x16_avx2(buf0_cur, buf0_cur);
- }
- if (rect_type == 1 || rect_type == -1) {
- round_shift_avx2(buf0, buf0, input_stride); // rect special code
- }
- row_txfm(buf0, buf0, cos_bit_row);
- round_shift_16bit_w16_avx2(buf0, txfm_size_col, shift[0]);
- __m256i *_buf1 = buf1;
- if (lr_flip) {
- for (int j = 0; j < buf_size_w_div16; ++j) {
- __m256i temp[16];
- flip_buf_avx2(buf0 + 16 * j, temp, 16);
- transpose_16bit_16x16_avx2(temp,
- _buf1 + 16 * (buf_size_w_div16 - 1 - j));
- }
- } else {
- for (int j = 0; j < buf_size_w_div16; ++j) {
- transpose_16bit_16x16_avx2(buf0 + 16 * j, _buf1 + 16 * j);
- }
- }
- for (int j = 0; j < buf_size_w_div16; ++j) {
- iidentity_col_16xn_avx2(output + i * 16 * stride + j * 16, stride,
- buf1 + j * 16, shift[1], 16, txh_idx);
- }
- }
-}
-
-// for 32x32,32x64,64x32,64x64,16x32,32x16,64x16,16x64
-static INLINE void lowbd_inv_txfm2d_add_universe_avx2(
- const int32_t *input, uint8_t *output, int stride, TX_TYPE tx_type,
- TX_SIZE tx_size, int eob) {
- (void)eob;
- switch (tx_type) {
- case DCT_DCT:
- case ADST_DCT: // ADST in vertical, DCT in horizontal
- case DCT_ADST: // DCT in vertical, ADST in horizontal
- case ADST_ADST: // ADST in both directions
- case FLIPADST_DCT:
- case DCT_FLIPADST:
- case FLIPADST_FLIPADST:
- case ADST_FLIPADST:
- case FLIPADST_ADST:
- lowbd_inv_txfm2d_add_no_identity_avx2(input, output, stride, tx_type,
- tx_size, eob);
- break;
- case IDTX:
- lowbd_inv_txfm2d_add_idtx_avx2(input, output, stride, tx_size, eob);
- break;
- case V_DCT:
- case V_ADST:
- case V_FLIPADST:
- lowbd_inv_txfm2d_add_h_identity_avx2(input, output, stride, tx_type,
- tx_size, eob);
- break;
- case H_DCT:
- case H_ADST:
- case H_FLIPADST:
- lowbd_inv_txfm2d_add_v_identity_avx2(input, output, stride, tx_type,
- tx_size, eob);
- break;
- default:
- av1_lowbd_inv_txfm2d_add_ssse3(input, output, stride, tx_type, tx_size,
- eob);
- break;
- }
-}
-
-void av1_lowbd_inv_txfm2d_add_avx2(const int32_t *input, uint8_t *output,
- int stride, TX_TYPE tx_type, TX_SIZE tx_size,
- int eob) {
- switch (tx_size) {
- case TX_4X4:
- case TX_8X8:
- case TX_4X8:
- case TX_8X4:
- case TX_8X16:
- case TX_16X8:
- case TX_4X16:
- case TX_16X4:
- case TX_8X32:
- case TX_32X8:
- av1_lowbd_inv_txfm2d_add_ssse3(input, output, stride, tx_type, tx_size,
- eob);
- break;
- case TX_16X16:
- case TX_32X32:
- case TX_64X64:
- case TX_16X32:
- case TX_32X16:
- case TX_32X64:
- case TX_64X32:
- case TX_16X64:
- case TX_64X16:
- default:
- lowbd_inv_txfm2d_add_universe_avx2(input, output, stride, tx_type,
- tx_size, eob);
- break;
- }
-}
-
-void av1_inv_txfm_add_avx2(const tran_low_t *dqcoeff, uint8_t *dst, int stride,
- const TxfmParam *txfm_param) {
- const TX_TYPE tx_type = txfm_param->tx_type;
- if (!txfm_param->lossless) {
- av1_lowbd_inv_txfm2d_add_avx2(dqcoeff, dst, stride, tx_type,
- txfm_param->tx_size, txfm_param->eob);
- } else {
- av1_inv_txfm_add_c(dqcoeff, dst, stride, txfm_param);
- }
-}
diff --git a/third_party/aom/av1/common/x86/av1_inv_txfm_avx2.h b/third_party/aom/av1/common/x86/av1_inv_txfm_avx2.h
deleted file mode 100644
index f74cbaeaa..000000000
--- a/third_party/aom/av1/common/x86/av1_inv_txfm_avx2.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#ifndef AOM_AV1_COMMON_X86_AV1_INV_TXFM_AVX2_H_
-#define AOM_AV1_COMMON_X86_AV1_INV_TXFM_AVX2_H_
-
-#include <immintrin.h>
-
-#include "config/aom_config.h"
-#include "config/av1_rtcd.h"
-
-#include "aom/aom_integer.h"
-#include "aom_dsp/x86/transpose_sse2.h"
-#include "aom_dsp/x86/txfm_common_sse2.h"
-#include "aom_dsp/x86/txfm_common_avx2.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// half input is zero
-#define btf_16_w16_0_avx2(w0, w1, in, out0, out1) \
- { \
- const __m256i _w0 = _mm256_set1_epi16(w0 * 8); \
- const __m256i _w1 = _mm256_set1_epi16(w1 * 8); \
- const __m256i _in = in; \
- out0 = _mm256_mulhrs_epi16(_in, _w0); \
- out1 = _mm256_mulhrs_epi16(_in, _w1); \
- }
-
-static INLINE void round_shift_avx2(const __m256i *input, __m256i *output,
- int size) {
- const __m256i scale = _mm256_set1_epi16(NewInvSqrt2 * 8);
- for (int i = 0; i < size; ++i) {
- output[i] = _mm256_mulhrs_epi16(input[i], scale);
- }
-}
-
-static INLINE void write_recon_w16_avx2(__m256i res, uint8_t *output) {
- __m128i pred = _mm_loadu_si128((__m128i const *)(output));
- __m256i u = _mm256_adds_epi16(_mm256_cvtepu8_epi16(pred), res);
- __m128i y = _mm256_castsi256_si128(
- _mm256_permute4x64_epi64(_mm256_packus_epi16(u, u), 168));
- _mm_storeu_si128((__m128i *)(output), y);
-}
-
-static INLINE void lowbd_write_buffer_16xn_avx2(__m256i *in, uint8_t *output,
- int stride, int flipud,
- int height) {
- int j = flipud ? (height - 1) : 0;
- const int step = flipud ? -1 : 1;
- for (int i = 0; i < height; ++i, j += step) {
- write_recon_w16_avx2(in[j], output + i * stride);
- }
-}
-
-void av1_lowbd_inv_txfm2d_add_avx2(const int32_t *input, uint8_t *output,
- int stride, TX_TYPE tx_type, TX_SIZE tx_size,
- int eob);
-#ifdef __cplusplus
-}
-#endif
-
-#endif // AOM_AV1_COMMON_X86_AV1_INV_TXFM_AVX2_H_
diff --git a/third_party/aom/av1/common/x86/av1_inv_txfm_ssse3.c b/third_party/aom/av1/common/x86/av1_inv_txfm_ssse3.c
deleted file mode 100644
index 995bc3da4..000000000
--- a/third_party/aom/av1/common/x86/av1_inv_txfm_ssse3.c
+++ /dev/null
@@ -1,2923 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "config/aom_config.h"
-#include "config/av1_rtcd.h"
-
-#include "av1/common/av1_inv_txfm1d_cfg.h"
-#include "av1/common/x86/av1_inv_txfm_ssse3.h"
-#include "av1/common/x86/av1_txfm_sse2.h"
-
-// TODO(venkatsanampudi@ittiam.com): move this to header file
-
-// Sqrt2, Sqrt2^2, Sqrt2^3, Sqrt2^4, Sqrt2^5
-static int32_t NewSqrt2list[TX_SIZES] = { 5793, 2 * 4096, 2 * 5793, 4 * 4096,
- 4 * 5793 };
-
-// TODO(binpengsmail@gmail.com): replace some for loop with do {} while
-
-static void idct4_new_sse2(const __m128i *input, __m128i *output,
- int8_t cos_bit) {
- (void)cos_bit;
- const int32_t *cospi = cospi_arr(INV_COS_BIT);
- const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
-
- const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
- const __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]);
- const __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]);
- const __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]);
-
- // stage 1
- __m128i x[4];
- x[0] = input[0];
- x[1] = input[2];
- x[2] = input[1];
- x[3] = input[3];
-
- // stage 2
- btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x[0], x[1], x[0], x[1]);
- btf_16_sse2(cospi_p48_m16, cospi_p16_p48, x[2], x[3], x[2], x[3]);
-
- // stage 3
- btf_16_adds_subs_out_sse2(output[0], output[3], x[0], x[3]);
- btf_16_adds_subs_out_sse2(output[1], output[2], x[1], x[2]);
-}
-
-void idct4_w4_new_sse2(const __m128i *input, __m128i *output, int8_t cos_bit) {
- (void)cos_bit;
- const int32_t *cospi = cospi_arr(INV_COS_BIT);
- const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
-
- const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
- const __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]);
- const __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]);
- const __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]);
-
- // stage 1
- __m128i x[4];
- x[0] = input[0];
- x[1] = input[2];
- x[2] = input[1];
- x[3] = input[3];
-
- // stage 2
- btf_16_4p_sse2(cospi_p32_p32, cospi_p32_m32, x[0], x[1], x[0], x[1]);
- btf_16_4p_sse2(cospi_p48_m16, cospi_p16_p48, x[2], x[3], x[2], x[3]);
-
- // stage 3
- btf_16_adds_subs_out_sse2(output[0], output[3], x[0], x[3]);
- btf_16_adds_subs_out_sse2(output[1], output[2], x[1], x[2]);
-}
-
-void idct8_low1_new_ssse3(const __m128i *input, __m128i *output,
- int8_t cos_bit) {
- (void)cos_bit;
- const int32_t *cospi = cospi_arr(INV_COS_BIT);
-
- // stage 1
- __m128i x[2];
- x[0] = input[0];
-
- // stage 2
- // stage 3
- btf_16_ssse3(cospi[32], cospi[32], x[0], x[0], x[1]);
-
- // stage 4
- // stage 5
- output[0] = x[0];
- output[7] = x[0];
- output[1] = x[1];
- output[6] = x[1];
- output[2] = x[1];
- output[5] = x[1];
- output[3] = x[0];
- output[4] = x[0];
-}
-
-void idct8_new_sse2(const __m128i *input, __m128i *output, int8_t cos_bit) {
- (void)cos_bit;
- const int32_t *cospi = cospi_arr(INV_COS_BIT);
- const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
-
- const __m128i cospi_p56_m08 = pair_set_epi16(cospi[56], -cospi[8]);
- const __m128i cospi_p08_p56 = pair_set_epi16(cospi[8], cospi[56]);
- const __m128i cospi_p24_m40 = pair_set_epi16(cospi[24], -cospi[40]);
- const __m128i cospi_p40_p24 = pair_set_epi16(cospi[40], cospi[24]);
- const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
- const __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]);
- const __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]);
- const __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]);
- const __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]);
-
- // stage 1
- __m128i x[8];
- x[0] = input[0];
- x[1] = input[4];
- x[2] = input[2];
- x[3] = input[6];
- x[4] = input[1];
- x[5] = input[5];
- x[6] = input[3];
- x[7] = input[7];
-
- // stage 2
- btf_16_sse2(cospi_p56_m08, cospi_p08_p56, x[4], x[7], x[4], x[7]);
- btf_16_sse2(cospi_p24_m40, cospi_p40_p24, x[5], x[6], x[5], x[6]);
-
- // stage 3
- btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x[0], x[1], x[0], x[1]);
- btf_16_sse2(cospi_p48_m16, cospi_p16_p48, x[2], x[3], x[2], x[3]);
- btf_16_adds_subs_sse2(x[4], x[5]);
- btf_16_subs_adds_sse2(x[7], x[6]);
-
- // stage 4
- btf_16_adds_subs_sse2(x[0], x[3]);
- btf_16_adds_subs_sse2(x[1], x[2]);
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[5], x[6], x[5], x[6]);
-
- // stage 5
- btf_16_adds_subs_out_sse2(output[0], output[7], x[0], x[7]);
- btf_16_adds_subs_out_sse2(output[1], output[6], x[1], x[6]);
- btf_16_adds_subs_out_sse2(output[2], output[5], x[2], x[5]);
- btf_16_adds_subs_out_sse2(output[3], output[4], x[3], x[4]);
-}
-
-void idct8_w4_new_sse2(const __m128i *input, __m128i *output, int8_t cos_bit) {
- (void)cos_bit;
- const int32_t *cospi = cospi_arr(INV_COS_BIT);
- const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
-
- const __m128i cospi_p56_m08 = pair_set_epi16(cospi[56], -cospi[8]);
- const __m128i cospi_p08_p56 = pair_set_epi16(cospi[8], cospi[56]);
- const __m128i cospi_p24_m40 = pair_set_epi16(cospi[24], -cospi[40]);
- const __m128i cospi_p40_p24 = pair_set_epi16(cospi[40], cospi[24]);
- const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
- const __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]);
- const __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]);
- const __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]);
- const __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]);
-
- // stage 1
- __m128i x[8];
- x[0] = input[0];
- x[1] = input[4];
- x[2] = input[2];
- x[3] = input[6];
- x[4] = input[1];
- x[5] = input[5];
- x[6] = input[3];
- x[7] = input[7];
-
- // stage 2
- btf_16_4p_sse2(cospi_p56_m08, cospi_p08_p56, x[4], x[7], x[4], x[7]);
- btf_16_4p_sse2(cospi_p24_m40, cospi_p40_p24, x[5], x[6], x[5], x[6]);
-
- // stage 3
- btf_16_4p_sse2(cospi_p32_p32, cospi_p32_m32, x[0], x[1], x[0], x[1]);
- btf_16_4p_sse2(cospi_p48_m16, cospi_p16_p48, x[2], x[3], x[2], x[3]);
- btf_16_adds_subs_sse2(x[4], x[5]);
- btf_16_subs_adds_sse2(x[7], x[6]);
-
- // stage 4
- btf_16_adds_subs_sse2(x[0], x[3]);
- btf_16_adds_subs_sse2(x[1], x[2]);
- btf_16_4p_sse2(cospi_m32_p32, cospi_p32_p32, x[5], x[6], x[5], x[6]);
-
- // stage 5
- btf_16_adds_subs_out_sse2(output[0], output[7], x[0], x[7]);
- btf_16_adds_subs_out_sse2(output[1], output[6], x[1], x[6]);
- btf_16_adds_subs_out_sse2(output[2], output[5], x[2], x[5]);
- btf_16_adds_subs_out_sse2(output[3], output[4], x[3], x[4]);
-}
-
-static INLINE void idct16_stage5_sse2(__m128i *x, const int32_t *cospi,
- const __m128i __rounding,
- int8_t cos_bit) {
- const __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]);
- const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
- btf_16_adds_subs_sse2(x[0], x[3]);
- btf_16_adds_subs_sse2(x[1], x[2]);
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[5], x[6], x[5], x[6]);
- btf_16_adds_subs_sse2(x[8], x[11]);
- btf_16_adds_subs_sse2(x[9], x[10]);
- btf_16_subs_adds_sse2(x[15], x[12]);
- btf_16_subs_adds_sse2(x[14], x[13]);
-}
-
-static INLINE void idct16_stage6_sse2(__m128i *x, const int32_t *cospi,
- const __m128i __rounding,
- int8_t cos_bit) {
- const __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]);
- const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
- btf_16_adds_subs_sse2(x[0], x[7]);
- btf_16_adds_subs_sse2(x[1], x[6]);
- btf_16_adds_subs_sse2(x[2], x[5]);
- btf_16_adds_subs_sse2(x[3], x[4]);
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[10], x[13], x[10], x[13]);
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[11], x[12], x[11], x[12]);
-}
-
-static INLINE void idct16_stage7_sse2(__m128i *output, __m128i *x) {
- btf_16_adds_subs_out_sse2(output[0], output[15], x[0], x[15]);
- btf_16_adds_subs_out_sse2(output[1], output[14], x[1], x[14]);
- btf_16_adds_subs_out_sse2(output[2], output[13], x[2], x[13]);
- btf_16_adds_subs_out_sse2(output[3], output[12], x[3], x[12]);
- btf_16_adds_subs_out_sse2(output[4], output[11], x[4], x[11]);
- btf_16_adds_subs_out_sse2(output[5], output[10], x[5], x[10]);
- btf_16_adds_subs_out_sse2(output[6], output[9], x[6], x[9]);
- btf_16_adds_subs_out_sse2(output[7], output[8], x[7], x[8]);
-}
-
-static void idct16_low1_new_ssse3(const __m128i *input, __m128i *output,
- int8_t cos_bit) {
- (void)cos_bit;
- const int32_t *cospi = cospi_arr(INV_COS_BIT);
-
- // stage 1
- __m128i x[2];
- x[0] = input[0];
-
- // stage 2
- // stage 3
- // stage 4
- btf_16_ssse3(cospi[32], cospi[32], x[0], x[0], x[1]);
-
- // stage 5
- // stage 6
- // stage 7
- output[0] = x[0];
- output[15] = x[0];
- output[1] = x[1];
- output[14] = x[1];
- output[2] = x[1];
- output[13] = x[1];
- output[3] = x[0];
- output[12] = x[0];
- output[4] = x[0];
- output[11] = x[0];
- output[5] = x[1];
- output[10] = x[1];
- output[6] = x[1];
- output[9] = x[1];
- output[7] = x[0];
- output[8] = x[0];
-}
-
-static void idct16_low8_new_ssse3(const __m128i *input, __m128i *output,
- int8_t cos_bit) {
- (void)cos_bit;
- const int32_t *cospi = cospi_arr(INV_COS_BIT);
- const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
- const __m128i cospi_m16_p48 = pair_set_epi16(-cospi[16], cospi[48]);
- const __m128i cospi_p48_p16 = pair_set_epi16(cospi[48], cospi[16]);
- const __m128i cospi_m48_m16 = pair_set_epi16(-cospi[48], -cospi[16]);
-
- // stage 1
- __m128i x[16];
- x[0] = input[0];
- x[2] = input[4];
- x[4] = input[2];
- x[6] = input[6];
- x[8] = input[1];
- x[10] = input[5];
- x[12] = input[3];
- x[14] = input[7];
-
- // stage 2
- btf_16_ssse3(cospi[60], cospi[4], x[8], x[8], x[15]);
- btf_16_ssse3(-cospi[36], cospi[28], x[14], x[9], x[14]);
- btf_16_ssse3(cospi[44], cospi[20], x[10], x[10], x[13]);
- btf_16_ssse3(-cospi[52], cospi[12], x[12], x[11], x[12]);
-
- // stage 3
- btf_16_ssse3(cospi[56], cospi[8], x[4], x[4], x[7]);
- btf_16_ssse3(-cospi[40], cospi[24], x[6], x[5], x[6]);
- btf_16_adds_subs_sse2(x[8], x[9]);
- btf_16_subs_adds_sse2(x[11], x[10]);
- btf_16_adds_subs_sse2(x[12], x[13]);
- btf_16_subs_adds_sse2(x[15], x[14]);
-
- // stage 4
- btf_16_ssse3(cospi[32], cospi[32], x[0], x[0], x[1]);
- btf_16_ssse3(cospi[48], cospi[16], x[2], x[2], x[3]);
- btf_16_adds_subs_sse2(x[4], x[5]);
- btf_16_subs_adds_sse2(x[7], x[6]);
- btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x[9], x[14], x[9], x[14]);
- btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x[10], x[13], x[10], x[13]);
-
- idct16_stage5_sse2(x, cospi, __rounding, cos_bit);
- idct16_stage6_sse2(x, cospi, __rounding, cos_bit);
- idct16_stage7_sse2(output, x);
-}
-
-void idct16_new_sse2(const __m128i *input, __m128i *output, int8_t cos_bit) {
- (void)cos_bit;
- const int32_t *cospi = cospi_arr(INV_COS_BIT);
- const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
-
- const __m128i cospi_p60_m04 = pair_set_epi16(cospi[60], -cospi[4]);
- const __m128i cospi_p04_p60 = pair_set_epi16(cospi[4], cospi[60]);
- const __m128i cospi_p28_m36 = pair_set_epi16(cospi[28], -cospi[36]);
- const __m128i cospi_p36_p28 = pair_set_epi16(cospi[36], cospi[28]);
- const __m128i cospi_p44_m20 = pair_set_epi16(cospi[44], -cospi[20]);
- const __m128i cospi_p20_p44 = pair_set_epi16(cospi[20], cospi[44]);
- const __m128i cospi_p12_m52 = pair_set_epi16(cospi[12], -cospi[52]);
- const __m128i cospi_p52_p12 = pair_set_epi16(cospi[52], cospi[12]);
- const __m128i cospi_p56_m08 = pair_set_epi16(cospi[56], -cospi[8]);
- const __m128i cospi_p08_p56 = pair_set_epi16(cospi[8], cospi[56]);
- const __m128i cospi_p24_m40 = pair_set_epi16(cospi[24], -cospi[40]);
- const __m128i cospi_p40_p24 = pair_set_epi16(cospi[40], cospi[24]);
- const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
- const __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]);
- const __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]);
- const __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]);
- const __m128i cospi_m16_p48 = pair_set_epi16(-cospi[16], cospi[48]);
- const __m128i cospi_p48_p16 = pair_set_epi16(cospi[48], cospi[16]);
- const __m128i cospi_m48_m16 = pair_set_epi16(-cospi[48], -cospi[16]);
-
- // stage 1
- __m128i x[16];
- x[0] = input[0];
- x[1] = input[8];
- x[2] = input[4];
- x[3] = input[12];
- x[4] = input[2];
- x[5] = input[10];
- x[6] = input[6];
- x[7] = input[14];
- x[8] = input[1];
- x[9] = input[9];
- x[10] = input[5];
- x[11] = input[13];
- x[12] = input[3];
- x[13] = input[11];
- x[14] = input[7];
- x[15] = input[15];
-
- // stage 2
- btf_16_sse2(cospi_p60_m04, cospi_p04_p60, x[8], x[15], x[8], x[15]);
- btf_16_sse2(cospi_p28_m36, cospi_p36_p28, x[9], x[14], x[9], x[14]);
- btf_16_sse2(cospi_p44_m20, cospi_p20_p44, x[10], x[13], x[10], x[13]);
- btf_16_sse2(cospi_p12_m52, cospi_p52_p12, x[11], x[12], x[11], x[12]);
-
- // stage 3
- btf_16_sse2(cospi_p56_m08, cospi_p08_p56, x[4], x[7], x[4], x[7]);
- btf_16_sse2(cospi_p24_m40, cospi_p40_p24, x[5], x[6], x[5], x[6]);
- btf_16_adds_subs_sse2(x[8], x[9]);
- btf_16_subs_adds_sse2(x[11], x[10]);
- btf_16_adds_subs_sse2(x[12], x[13]);
- btf_16_subs_adds_sse2(x[15], x[14]);
-
- // stage 4
- btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x[0], x[1], x[0], x[1]);
- btf_16_sse2(cospi_p48_m16, cospi_p16_p48, x[2], x[3], x[2], x[3]);
- btf_16_adds_subs_sse2(x[4], x[5]);
- btf_16_subs_adds_sse2(x[7], x[6]);
- btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x[9], x[14], x[9], x[14]);
- btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x[10], x[13], x[10], x[13]);
-
- // stage 5~7
- idct16_stage5_sse2(x, cospi, __rounding, cos_bit);
- idct16_stage6_sse2(x, cospi, __rounding, cos_bit);
- idct16_stage7_sse2(output, x);
-}
-
-void idct16_w4_new_sse2(const __m128i *input, __m128i *output, int8_t cos_bit) {
- (void)cos_bit;
- const int32_t *cospi = cospi_arr(INV_COS_BIT);
- const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
-
- const __m128i cospi_p60_m04 = pair_set_epi16(cospi[60], -cospi[4]);
- const __m128i cospi_p04_p60 = pair_set_epi16(cospi[4], cospi[60]);
- const __m128i cospi_p28_m36 = pair_set_epi16(cospi[28], -cospi[36]);
- const __m128i cospi_p36_p28 = pair_set_epi16(cospi[36], cospi[28]);
- const __m128i cospi_p44_m20 = pair_set_epi16(cospi[44], -cospi[20]);
- const __m128i cospi_p20_p44 = pair_set_epi16(cospi[20], cospi[44]);
- const __m128i cospi_p12_m52 = pair_set_epi16(cospi[12], -cospi[52]);
- const __m128i cospi_p52_p12 = pair_set_epi16(cospi[52], cospi[12]);
- const __m128i cospi_p56_m08 = pair_set_epi16(cospi[56], -cospi[8]);
- const __m128i cospi_p08_p56 = pair_set_epi16(cospi[8], cospi[56]);
- const __m128i cospi_p24_m40 = pair_set_epi16(cospi[24], -cospi[40]);
- const __m128i cospi_p40_p24 = pair_set_epi16(cospi[40], cospi[24]);
- const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
- const __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]);
- const __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]);
- const __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]);
- const __m128i cospi_m16_p48 = pair_set_epi16(-cospi[16], cospi[48]);
- const __m128i cospi_p48_p16 = pair_set_epi16(cospi[48], cospi[16]);
- const __m128i cospi_m48_m16 = pair_set_epi16(-cospi[48], -cospi[16]);
- const __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]);
-
- // stage 1
- __m128i x[16];
- x[0] = input[0];
- x[1] = input[8];
- x[2] = input[4];
- x[3] = input[12];
- x[4] = input[2];
- x[5] = input[10];
- x[6] = input[6];
- x[7] = input[14];
- x[8] = input[1];
- x[9] = input[9];
- x[10] = input[5];
- x[11] = input[13];
- x[12] = input[3];
- x[13] = input[11];
- x[14] = input[7];
- x[15] = input[15];
-
- // stage 2
- btf_16_4p_sse2(cospi_p60_m04, cospi_p04_p60, x[8], x[15], x[8], x[15]);
- btf_16_4p_sse2(cospi_p28_m36, cospi_p36_p28, x[9], x[14], x[9], x[14]);
- btf_16_4p_sse2(cospi_p44_m20, cospi_p20_p44, x[10], x[13], x[10], x[13]);
- btf_16_4p_sse2(cospi_p12_m52, cospi_p52_p12, x[11], x[12], x[11], x[12]);
-
- // stage 3
- btf_16_4p_sse2(cospi_p56_m08, cospi_p08_p56, x[4], x[7], x[4], x[7]);
- btf_16_4p_sse2(cospi_p24_m40, cospi_p40_p24, x[5], x[6], x[5], x[6]);
- btf_16_adds_subs_sse2(x[8], x[9]);
- btf_16_subs_adds_sse2(x[11], x[10]);
- btf_16_adds_subs_sse2(x[12], x[13]);
- btf_16_subs_adds_sse2(x[15], x[14]);
-
- // stage 4
- btf_16_4p_sse2(cospi_p32_p32, cospi_p32_m32, x[0], x[1], x[0], x[1]);
- btf_16_4p_sse2(cospi_p48_m16, cospi_p16_p48, x[2], x[3], x[2], x[3]);
- btf_16_adds_subs_sse2(x[4], x[5]);
- btf_16_subs_adds_sse2(x[7], x[6]);
- btf_16_4p_sse2(cospi_m16_p48, cospi_p48_p16, x[9], x[14], x[9], x[14]);
- btf_16_4p_sse2(cospi_m48_m16, cospi_m16_p48, x[10], x[13], x[10], x[13]);
-
- // stage 5
- btf_16_adds_subs_sse2(x[0], x[3]);
- btf_16_adds_subs_sse2(x[1], x[2]);
- btf_16_4p_sse2(cospi_m32_p32, cospi_p32_p32, x[5], x[6], x[5], x[6]);
- btf_16_adds_subs_sse2(x[8], x[11]);
- btf_16_adds_subs_sse2(x[9], x[10]);
- btf_16_subs_adds_sse2(x[15], x[12]);
- btf_16_subs_adds_sse2(x[14], x[13]);
-
- // stage 6
- btf_16_adds_subs_sse2(x[0], x[7]);
- btf_16_adds_subs_sse2(x[1], x[6]);
- btf_16_adds_subs_sse2(x[2], x[5]);
- btf_16_adds_subs_sse2(x[3], x[4]);
- btf_16_4p_sse2(cospi_m32_p32, cospi_p32_p32, x[10], x[13], x[10], x[13]);
- btf_16_4p_sse2(cospi_m32_p32, cospi_p32_p32, x[11], x[12], x[11], x[12]);
-
- // stage 7
- idct16_stage7_sse2(output, x);
-}
-
-static INLINE void idct32_high16_stage3_sse2(__m128i *x) {
- btf_16_adds_subs_sse2(x[16], x[17]);
- btf_16_subs_adds_sse2(x[19], x[18]);
- btf_16_adds_subs_sse2(x[20], x[21]);
- btf_16_subs_adds_sse2(x[23], x[22]);
- btf_16_adds_subs_sse2(x[24], x[25]);
- btf_16_subs_adds_sse2(x[27], x[26]);
- btf_16_adds_subs_sse2(x[28], x[29]);
- btf_16_subs_adds_sse2(x[31], x[30]);
-}
-
-static INLINE void idct32_high16_stage4_sse2(__m128i *x, const int32_t *cospi,
- const __m128i __rounding,
- int8_t cos_bit) {
- const __m128i cospi_m08_p56 = pair_set_epi16(-cospi[8], cospi[56]);
- const __m128i cospi_p56_p08 = pair_set_epi16(cospi[56], cospi[8]);
- const __m128i cospi_m56_m08 = pair_set_epi16(-cospi[56], -cospi[8]);
- const __m128i cospi_m40_p24 = pair_set_epi16(-cospi[40], cospi[24]);
- const __m128i cospi_p24_p40 = pair_set_epi16(cospi[24], cospi[40]);
- const __m128i cospi_m24_m40 = pair_set_epi16(-cospi[24], -cospi[40]);
- btf_16_sse2(cospi_m08_p56, cospi_p56_p08, x[17], x[30], x[17], x[30]);
- btf_16_sse2(cospi_m56_m08, cospi_m08_p56, x[18], x[29], x[18], x[29]);
- btf_16_sse2(cospi_m40_p24, cospi_p24_p40, x[21], x[26], x[21], x[26]);
- btf_16_sse2(cospi_m24_m40, cospi_m40_p24, x[22], x[25], x[22], x[25]);
-}
-
-static INLINE void idct32_high24_stage5_sse2(__m128i *x, const int32_t *cospi,
- const __m128i __rounding,
- int8_t cos_bit) {
- const __m128i cospi_m16_p48 = pair_set_epi16(-cospi[16], cospi[48]);
- const __m128i cospi_p48_p16 = pair_set_epi16(cospi[48], cospi[16]);
- const __m128i cospi_m48_m16 = pair_set_epi16(-cospi[48], -cospi[16]);
- btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x[9], x[14], x[9], x[14]);
- btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x[10], x[13], x[10], x[13]);
- btf_16_adds_subs_sse2(x[16], x[19]);
- btf_16_adds_subs_sse2(x[17], x[18]);
- btf_16_subs_adds_sse2(x[23], x[20]);
- btf_16_subs_adds_sse2(x[22], x[21]);
- btf_16_adds_subs_sse2(x[24], x[27]);
- btf_16_adds_subs_sse2(x[25], x[26]);
- btf_16_subs_adds_sse2(x[31], x[28]);
- btf_16_subs_adds_sse2(x[30], x[29]);
-}
-
-static INLINE void idct32_high28_stage6_sse2(__m128i *x, const int32_t *cospi,
- const __m128i __rounding,
- int8_t cos_bit) {
- const __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]);
- const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
- const __m128i cospi_m16_p48 = pair_set_epi16(-cospi[16], cospi[48]);
- const __m128i cospi_p48_p16 = pair_set_epi16(cospi[48], cospi[16]);
- const __m128i cospi_m48_m16 = pair_set_epi16(-cospi[48], -cospi[16]);
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[5], x[6], x[5], x[6]);
- btf_16_adds_subs_sse2(x[8], x[11]);
- btf_16_adds_subs_sse2(x[9], x[10]);
- btf_16_subs_adds_sse2(x[15], x[12]);
- btf_16_subs_adds_sse2(x[14], x[13]);
- btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x[18], x[29], x[18], x[29]);
- btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x[19], x[28], x[19], x[28]);
- btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x[20], x[27], x[20], x[27]);
- btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x[21], x[26], x[21], x[26]);
-}
-
-static INLINE void idct32_stage7_sse2(__m128i *x, const int32_t *cospi,
- const __m128i __rounding,
- int8_t cos_bit) {
- const __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]);
- const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
- btf_16_adds_subs_sse2(x[0], x[7]);
- btf_16_adds_subs_sse2(x[1], x[6]);
- btf_16_adds_subs_sse2(x[2], x[5]);
- btf_16_adds_subs_sse2(x[3], x[4]);
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[10], x[13], x[10], x[13]);
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[11], x[12], x[11], x[12]);
- btf_16_adds_subs_sse2(x[16], x[23]);
- btf_16_adds_subs_sse2(x[17], x[22]);
- btf_16_adds_subs_sse2(x[18], x[21]);
- btf_16_adds_subs_sse2(x[19], x[20]);
- btf_16_subs_adds_sse2(x[31], x[24]);
- btf_16_subs_adds_sse2(x[30], x[25]);
- btf_16_subs_adds_sse2(x[29], x[26]);
- btf_16_subs_adds_sse2(x[28], x[27]);
-}
-
-static INLINE void idct32_stage8_sse2(__m128i *x, const int32_t *cospi,
- const __m128i __rounding,
- int8_t cos_bit) {
- const __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]);
- const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
- btf_16_adds_subs_sse2(x[0], x[15]);
- btf_16_adds_subs_sse2(x[1], x[14]);
- btf_16_adds_subs_sse2(x[2], x[13]);
- btf_16_adds_subs_sse2(x[3], x[12]);
- btf_16_adds_subs_sse2(x[4], x[11]);
- btf_16_adds_subs_sse2(x[5], x[10]);
- btf_16_adds_subs_sse2(x[6], x[9]);
- btf_16_adds_subs_sse2(x[7], x[8]);
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[20], x[27], x[20], x[27]);
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[21], x[26], x[21], x[26]);
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[22], x[25], x[22], x[25]);
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[23], x[24], x[23], x[24]);
-}
-
-static INLINE void idct32_stage9_sse2(__m128i *output, __m128i *x) {
- btf_16_adds_subs_out_sse2(output[0], output[31], x[0], x[31]);
- btf_16_adds_subs_out_sse2(output[1], output[30], x[1], x[30]);
- btf_16_adds_subs_out_sse2(output[2], output[29], x[2], x[29]);
- btf_16_adds_subs_out_sse2(output[3], output[28], x[3], x[28]);
- btf_16_adds_subs_out_sse2(output[4], output[27], x[4], x[27]);
- btf_16_adds_subs_out_sse2(output[5], output[26], x[5], x[26]);
- btf_16_adds_subs_out_sse2(output[6], output[25], x[6], x[25]);
- btf_16_adds_subs_out_sse2(output[7], output[24], x[7], x[24]);
- btf_16_adds_subs_out_sse2(output[8], output[23], x[8], x[23]);
- btf_16_adds_subs_out_sse2(output[9], output[22], x[9], x[22]);
- btf_16_adds_subs_out_sse2(output[10], output[21], x[10], x[21]);
- btf_16_adds_subs_out_sse2(output[11], output[20], x[11], x[20]);
- btf_16_adds_subs_out_sse2(output[12], output[19], x[12], x[19]);
- btf_16_adds_subs_out_sse2(output[13], output[18], x[13], x[18]);
- btf_16_adds_subs_out_sse2(output[14], output[17], x[14], x[17]);
- btf_16_adds_subs_out_sse2(output[15], output[16], x[15], x[16]);
-}
-
-static void idct32_low1_new_ssse3(const __m128i *input, __m128i *output,
- int8_t cos_bit) {
- (void)cos_bit;
- const int32_t *cospi = cospi_arr(INV_COS_BIT);
-
- // stage 1
- __m128i x[2];
- x[0] = input[0];
-
- // stage 2
- // stage 3
- // stage 4
- // stage 5
- btf_16_ssse3(cospi[32], cospi[32], x[0], x[0], x[1]);
-
- // stage 6
- // stage 7
- // stage 8
- // stage 9
- output[0] = x[0];
- output[31] = x[0];
- output[1] = x[1];
- output[30] = x[1];
- output[2] = x[1];
- output[29] = x[1];
- output[3] = x[0];
- output[28] = x[0];
- output[4] = x[0];
- output[27] = x[0];
- output[5] = x[1];
- output[26] = x[1];
- output[6] = x[1];
- output[25] = x[1];
- output[7] = x[0];
- output[24] = x[0];
- output[8] = x[0];
- output[23] = x[0];
- output[9] = x[1];
- output[22] = x[1];
- output[10] = x[1];
- output[21] = x[1];
- output[11] = x[0];
- output[20] = x[0];
- output[12] = x[0];
- output[19] = x[0];
- output[13] = x[1];
- output[18] = x[1];
- output[14] = x[1];
- output[17] = x[1];
- output[15] = x[0];
- output[16] = x[0];
-}
-
-static void idct32_low8_new_ssse3(const __m128i *input, __m128i *output,
- int8_t cos_bit) {
- (void)cos_bit;
- const int32_t *cospi = cospi_arr(INV_COS_BIT);
- const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
-
- // stage 1
- __m128i x[32];
- x[0] = input[0];
- x[4] = input[4];
- x[8] = input[2];
- x[12] = input[6];
- x[16] = input[1];
- x[20] = input[5];
- x[24] = input[3];
- x[28] = input[7];
-
- // stage 2
- btf_16_ssse3(cospi[62], cospi[2], x[16], x[16], x[31]);
- btf_16_ssse3(-cospi[50], cospi[14], x[28], x[19], x[28]);
- btf_16_ssse3(cospi[54], cospi[10], x[20], x[20], x[27]);
- btf_16_ssse3(-cospi[58], cospi[6], x[24], x[23], x[24]);
-
- // stage 3
- btf_16_ssse3(cospi[60], cospi[4], x[8], x[8], x[15]);
- btf_16_ssse3(-cospi[52], cospi[12], x[12], x[11], x[12]);
- x[17] = x[16];
- x[18] = x[19];
- x[21] = x[20];
- x[22] = x[23];
- x[25] = x[24];
- x[26] = x[27];
- x[29] = x[28];
- x[30] = x[31];
-
- // stage 4
- btf_16_ssse3(cospi[56], cospi[8], x[4], x[4], x[7]);
- x[9] = x[8];
- x[10] = x[11];
- x[13] = x[12];
- x[14] = x[15];
- idct32_high16_stage4_sse2(x, cospi, __rounding, cos_bit);
-
- // stage 5
- btf_16_ssse3(cospi[32], cospi[32], x[0], x[0], x[1]);
- x[5] = x[4];
- x[6] = x[7];
- idct32_high24_stage5_sse2(x, cospi, __rounding, cos_bit);
- // stage 6
- x[3] = x[0];
- x[2] = x[1];
- idct32_high28_stage6_sse2(x, cospi, __rounding, cos_bit);
-
- idct32_stage7_sse2(x, cospi, __rounding, cos_bit);
- idct32_stage8_sse2(x, cospi, __rounding, cos_bit);
- idct32_stage9_sse2(output, x);
-}
-
-static void idct32_low16_new_ssse3(const __m128i *input, __m128i *output,
- int8_t cos_bit) {
- (void)cos_bit;
- const int32_t *cospi = cospi_arr(INV_COS_BIT);
- const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
-
- // stage 1
- __m128i x[32];
- x[0] = input[0];
- x[2] = input[8];
- x[4] = input[4];
- x[6] = input[12];
- x[8] = input[2];
- x[10] = input[10];
- x[12] = input[6];
- x[14] = input[14];
- x[16] = input[1];
- x[18] = input[9];
- x[20] = input[5];
- x[22] = input[13];
- x[24] = input[3];
- x[26] = input[11];
- x[28] = input[7];
- x[30] = input[15];
-
- // stage 2
- btf_16_ssse3(cospi[62], cospi[2], x[16], x[16], x[31]);
- btf_16_ssse3(-cospi[34], cospi[30], x[30], x[17], x[30]);
- btf_16_ssse3(cospi[46], cospi[18], x[18], x[18], x[29]);
- btf_16_ssse3(-cospi[50], cospi[14], x[28], x[19], x[28]);
- btf_16_ssse3(cospi[54], cospi[10], x[20], x[20], x[27]);
- btf_16_ssse3(-cospi[42], cospi[22], x[26], x[21], x[26]);
- btf_16_ssse3(cospi[38], cospi[26], x[22], x[22], x[25]);
- btf_16_ssse3(-cospi[58], cospi[6], x[24], x[23], x[24]);
-
- // stage 3
- btf_16_ssse3(cospi[60], cospi[4], x[8], x[8], x[15]);
- btf_16_ssse3(-cospi[36], cospi[28], x[14], x[9], x[14]);
- btf_16_ssse3(cospi[44], cospi[20], x[10], x[10], x[13]);
- btf_16_ssse3(-cospi[52], cospi[12], x[12], x[11], x[12]);
- idct32_high16_stage3_sse2(x);
-
- // stage 4
- btf_16_ssse3(cospi[56], cospi[8], x[4], x[4], x[7]);
- btf_16_ssse3(-cospi[40], cospi[24], x[6], x[5], x[6]);
- btf_16_adds_subs_sse2(x[8], x[9]);
- btf_16_subs_adds_sse2(x[11], x[10]);
- btf_16_adds_subs_sse2(x[12], x[13]);
- btf_16_subs_adds_sse2(x[15], x[14]);
- idct32_high16_stage4_sse2(x, cospi, __rounding, cos_bit);
-
- // stage 5
- btf_16_ssse3(cospi[32], cospi[32], x[0], x[0], x[1]);
- btf_16_ssse3(cospi[48], cospi[16], x[2], x[2], x[3]);
- btf_16_adds_subs_sse2(x[4], x[5]);
- btf_16_subs_adds_sse2(x[7], x[6]);
- idct32_high24_stage5_sse2(x, cospi, __rounding, cos_bit);
-
- btf_16_adds_subs_sse2(x[0], x[3]);
- btf_16_adds_subs_sse2(x[1], x[2]);
- idct32_high28_stage6_sse2(x, cospi, __rounding, cos_bit);
-
- idct32_stage7_sse2(x, cospi, __rounding, cos_bit);
- idct32_stage8_sse2(x, cospi, __rounding, cos_bit);
- idct32_stage9_sse2(output, x);
-}
-
-static void idct32_new_sse2(const __m128i *input, __m128i *output,
- int8_t cos_bit) {
- (void)cos_bit;
- const int32_t *cospi = cospi_arr(INV_COS_BIT);
- const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
-
- const __m128i cospi_p62_m02 = pair_set_epi16(cospi[62], -cospi[2]);
- const __m128i cospi_p02_p62 = pair_set_epi16(cospi[2], cospi[62]);
- const __m128i cospi_p30_m34 = pair_set_epi16(cospi[30], -cospi[34]);
- const __m128i cospi_p34_p30 = pair_set_epi16(cospi[34], cospi[30]);
- const __m128i cospi_p46_m18 = pair_set_epi16(cospi[46], -cospi[18]);
- const __m128i cospi_p18_p46 = pair_set_epi16(cospi[18], cospi[46]);
- const __m128i cospi_p14_m50 = pair_set_epi16(cospi[14], -cospi[50]);
- const __m128i cospi_p50_p14 = pair_set_epi16(cospi[50], cospi[14]);
- const __m128i cospi_p54_m10 = pair_set_epi16(cospi[54], -cospi[10]);
- const __m128i cospi_p10_p54 = pair_set_epi16(cospi[10], cospi[54]);
- const __m128i cospi_p22_m42 = pair_set_epi16(cospi[22], -cospi[42]);
- const __m128i cospi_p42_p22 = pair_set_epi16(cospi[42], cospi[22]);
- const __m128i cospi_p38_m26 = pair_set_epi16(cospi[38], -cospi[26]);
- const __m128i cospi_p26_p38 = pair_set_epi16(cospi[26], cospi[38]);
- const __m128i cospi_p06_m58 = pair_set_epi16(cospi[6], -cospi[58]);
- const __m128i cospi_p58_p06 = pair_set_epi16(cospi[58], cospi[6]);
- const __m128i cospi_p60_m04 = pair_set_epi16(cospi[60], -cospi[4]);
- const __m128i cospi_p04_p60 = pair_set_epi16(cospi[4], cospi[60]);
- const __m128i cospi_p28_m36 = pair_set_epi16(cospi[28], -cospi[36]);
- const __m128i cospi_p36_p28 = pair_set_epi16(cospi[36], cospi[28]);
- const __m128i cospi_p44_m20 = pair_set_epi16(cospi[44], -cospi[20]);
- const __m128i cospi_p20_p44 = pair_set_epi16(cospi[20], cospi[44]);
- const __m128i cospi_p12_m52 = pair_set_epi16(cospi[12], -cospi[52]);
- const __m128i cospi_p52_p12 = pair_set_epi16(cospi[52], cospi[12]);
- const __m128i cospi_p56_m08 = pair_set_epi16(cospi[56], -cospi[8]);
- const __m128i cospi_p08_p56 = pair_set_epi16(cospi[8], cospi[56]);
- const __m128i cospi_p24_m40 = pair_set_epi16(cospi[24], -cospi[40]);
- const __m128i cospi_p40_p24 = pair_set_epi16(cospi[40], cospi[24]);
- const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
- const __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]);
- const __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]);
- const __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]);
-
- // stage 1
- __m128i x[32];
- x[0] = input[0];
- x[1] = input[16];
- x[2] = input[8];
- x[3] = input[24];
- x[4] = input[4];
- x[5] = input[20];
- x[6] = input[12];
- x[7] = input[28];
- x[8] = input[2];
- x[9] = input[18];
- x[10] = input[10];
- x[11] = input[26];
- x[12] = input[6];
- x[13] = input[22];
- x[14] = input[14];
- x[15] = input[30];
- x[16] = input[1];
- x[17] = input[17];
- x[18] = input[9];
- x[19] = input[25];
- x[20] = input[5];
- x[21] = input[21];
- x[22] = input[13];
- x[23] = input[29];
- x[24] = input[3];
- x[25] = input[19];
- x[26] = input[11];
- x[27] = input[27];
- x[28] = input[7];
- x[29] = input[23];
- x[30] = input[15];
- x[31] = input[31];
-
- // stage 2
- btf_16_sse2(cospi_p62_m02, cospi_p02_p62, x[16], x[31], x[16], x[31]);
- btf_16_sse2(cospi_p30_m34, cospi_p34_p30, x[17], x[30], x[17], x[30]);
- btf_16_sse2(cospi_p46_m18, cospi_p18_p46, x[18], x[29], x[18], x[29]);
- btf_16_sse2(cospi_p14_m50, cospi_p50_p14, x[19], x[28], x[19], x[28]);
- btf_16_sse2(cospi_p54_m10, cospi_p10_p54, x[20], x[27], x[20], x[27]);
- btf_16_sse2(cospi_p22_m42, cospi_p42_p22, x[21], x[26], x[21], x[26]);
- btf_16_sse2(cospi_p38_m26, cospi_p26_p38, x[22], x[25], x[22], x[25]);
- btf_16_sse2(cospi_p06_m58, cospi_p58_p06, x[23], x[24], x[23], x[24]);
-
- // stage 3
- btf_16_sse2(cospi_p60_m04, cospi_p04_p60, x[8], x[15], x[8], x[15]);
- btf_16_sse2(cospi_p28_m36, cospi_p36_p28, x[9], x[14], x[9], x[14]);
- btf_16_sse2(cospi_p44_m20, cospi_p20_p44, x[10], x[13], x[10], x[13]);
- btf_16_sse2(cospi_p12_m52, cospi_p52_p12, x[11], x[12], x[11], x[12]);
- idct32_high16_stage3_sse2(x);
-
- // stage 4
- btf_16_sse2(cospi_p56_m08, cospi_p08_p56, x[4], x[7], x[4], x[7]);
- btf_16_sse2(cospi_p24_m40, cospi_p40_p24, x[5], x[6], x[5], x[6]);
- btf_16_adds_subs_sse2(x[8], x[9]);
- btf_16_subs_adds_sse2(x[11], x[10]);
- btf_16_adds_subs_sse2(x[12], x[13]);
- btf_16_subs_adds_sse2(x[15], x[14]);
- idct32_high16_stage4_sse2(x, cospi, __rounding, cos_bit);
-
- // stage 5
- btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x[0], x[1], x[0], x[1]);
- btf_16_sse2(cospi_p48_m16, cospi_p16_p48, x[2], x[3], x[2], x[3]);
- btf_16_adds_subs_sse2(x[4], x[5]);
- btf_16_adds_subs_sse2(x[7], x[6]);
- idct32_high24_stage5_sse2(x, cospi, __rounding, cos_bit);
-
- // stage 6
- btf_16_adds_subs_sse2(x[0], x[3]);
- btf_16_adds_subs_sse2(x[1], x[2]);
- idct32_high28_stage6_sse2(x, cospi, __rounding, cos_bit);
-
- // stage 7~8
- idct32_stage7_sse2(x, cospi, __rounding, cos_bit);
- idct32_stage8_sse2(x, cospi, __rounding, cos_bit);
- idct32_stage9_sse2(output, x);
-}
-
-static INLINE void idct64_stage4_high32_sse2(__m128i *x, const int32_t *cospi,
- const __m128i __rounding,
- int8_t cos_bit) {
- const __m128i cospi_m04_p60 = pair_set_epi16(-cospi[4], cospi[60]);
- const __m128i cospi_p60_p04 = pair_set_epi16(cospi[60], cospi[4]);
- const __m128i cospi_m60_m04 = pair_set_epi16(-cospi[60], -cospi[4]);
- const __m128i cospi_m36_p28 = pair_set_epi16(-cospi[36], cospi[28]);
- const __m128i cospi_p28_p36 = pair_set_epi16(cospi[28], cospi[36]);
- const __m128i cospi_m28_m36 = pair_set_epi16(-cospi[28], -cospi[36]);
- const __m128i cospi_m20_p44 = pair_set_epi16(-cospi[20], cospi[44]);
- const __m128i cospi_p44_p20 = pair_set_epi16(cospi[44], cospi[20]);
- const __m128i cospi_m44_m20 = pair_set_epi16(-cospi[44], -cospi[20]);
- const __m128i cospi_m52_p12 = pair_set_epi16(-cospi[52], cospi[12]);
- const __m128i cospi_p12_p52 = pair_set_epi16(cospi[12], cospi[52]);
- const __m128i cospi_m12_m52 = pair_set_epi16(-cospi[12], -cospi[52]);
- btf_16_sse2(cospi_m04_p60, cospi_p60_p04, x[33], x[62], x[33], x[62]);
- btf_16_sse2(cospi_m60_m04, cospi_m04_p60, x[34], x[61], x[34], x[61]);
- btf_16_sse2(cospi_m36_p28, cospi_p28_p36, x[37], x[58], x[37], x[58]);
- btf_16_sse2(cospi_m28_m36, cospi_m36_p28, x[38], x[57], x[38], x[57]);
- btf_16_sse2(cospi_m20_p44, cospi_p44_p20, x[41], x[54], x[41], x[54]);
- btf_16_sse2(cospi_m44_m20, cospi_m20_p44, x[42], x[53], x[42], x[53]);
- btf_16_sse2(cospi_m52_p12, cospi_p12_p52, x[45], x[50], x[45], x[50]);
- btf_16_sse2(cospi_m12_m52, cospi_m52_p12, x[46], x[49], x[46], x[49]);
-}
-
-static INLINE void idct64_stage5_high48_sse2(__m128i *x, const int32_t *cospi,
- const __m128i __rounding,
- int8_t cos_bit) {
- const __m128i cospi_m08_p56 = pair_set_epi16(-cospi[8], cospi[56]);
- const __m128i cospi_p56_p08 = pair_set_epi16(cospi[56], cospi[8]);
- const __m128i cospi_m56_m08 = pair_set_epi16(-cospi[56], -cospi[8]);
- const __m128i cospi_m40_p24 = pair_set_epi16(-cospi[40], cospi[24]);
- const __m128i cospi_p24_p40 = pair_set_epi16(cospi[24], cospi[40]);
- const __m128i cospi_m24_m40 = pair_set_epi16(-cospi[24], -cospi[40]);
- btf_16_sse2(cospi_m08_p56, cospi_p56_p08, x[17], x[30], x[17], x[30]);
- btf_16_sse2(cospi_m56_m08, cospi_m08_p56, x[18], x[29], x[18], x[29]);
- btf_16_sse2(cospi_m40_p24, cospi_p24_p40, x[21], x[26], x[21], x[26]);
- btf_16_sse2(cospi_m24_m40, cospi_m40_p24, x[22], x[25], x[22], x[25]);
- btf_16_adds_subs_sse2(x[32], x[35]);
- btf_16_adds_subs_sse2(x[33], x[34]);
- btf_16_subs_adds_sse2(x[39], x[36]);
- btf_16_subs_adds_sse2(x[38], x[37]);
- btf_16_adds_subs_sse2(x[40], x[43]);
- btf_16_adds_subs_sse2(x[41], x[42]);
- btf_16_subs_adds_sse2(x[47], x[44]);
- btf_16_subs_adds_sse2(x[46], x[45]);
- btf_16_adds_subs_sse2(x[48], x[51]);
- btf_16_adds_subs_sse2(x[49], x[50]);
- btf_16_subs_adds_sse2(x[55], x[52]);
- btf_16_subs_adds_sse2(x[54], x[53]);
- btf_16_adds_subs_sse2(x[56], x[59]);
- btf_16_adds_subs_sse2(x[57], x[58]);
- btf_16_subs_adds_sse2(x[63], x[60]);
- btf_16_subs_adds_sse2(x[62], x[61]);
-}
-
-static INLINE void idct64_stage6_high32_sse2(__m128i *x, const int32_t *cospi,
- const __m128i __rounding,
- int8_t cos_bit) {
- const __m128i cospi_m08_p56 = pair_set_epi16(-cospi[8], cospi[56]);
- const __m128i cospi_p56_p08 = pair_set_epi16(cospi[56], cospi[8]);
- const __m128i cospi_m56_m08 = pair_set_epi16(-cospi[56], -cospi[8]);
- const __m128i cospi_m40_p24 = pair_set_epi16(-cospi[40], cospi[24]);
- const __m128i cospi_p24_p40 = pair_set_epi16(cospi[24], cospi[40]);
- const __m128i cospi_m24_m40 = pair_set_epi16(-cospi[24], -cospi[40]);
- btf_16_sse2(cospi_m08_p56, cospi_p56_p08, x[34], x[61], x[34], x[61]);
- btf_16_sse2(cospi_m08_p56, cospi_p56_p08, x[35], x[60], x[35], x[60]);
- btf_16_sse2(cospi_m56_m08, cospi_m08_p56, x[36], x[59], x[36], x[59]);
- btf_16_sse2(cospi_m56_m08, cospi_m08_p56, x[37], x[58], x[37], x[58]);
- btf_16_sse2(cospi_m40_p24, cospi_p24_p40, x[42], x[53], x[42], x[53]);
- btf_16_sse2(cospi_m40_p24, cospi_p24_p40, x[43], x[52], x[43], x[52]);
- btf_16_sse2(cospi_m24_m40, cospi_m40_p24, x[44], x[51], x[44], x[51]);
- btf_16_sse2(cospi_m24_m40, cospi_m40_p24, x[45], x[50], x[45], x[50]);
-}
-
-static INLINE void idct64_stage6_high48_sse2(__m128i *x, const int32_t *cospi,
- const __m128i __rounding,
- int8_t cos_bit) {
- btf_16_adds_subs_sse2(x[16], x[19]);
- btf_16_adds_subs_sse2(x[17], x[18]);
- btf_16_subs_adds_sse2(x[23], x[20]);
- btf_16_subs_adds_sse2(x[22], x[21]);
- btf_16_adds_subs_sse2(x[24], x[27]);
- btf_16_adds_subs_sse2(x[25], x[26]);
- btf_16_subs_adds_sse2(x[31], x[28]);
- btf_16_subs_adds_sse2(x[30], x[29]);
- idct64_stage6_high32_sse2(x, cospi, __rounding, cos_bit);
-}
-
-static INLINE void idct64_stage7_high48_sse2(__m128i *x, const int32_t *cospi,
- const __m128i __rounding,
- int8_t cos_bit) {
- const __m128i cospi_m16_p48 = pair_set_epi16(-cospi[16], cospi[48]);
- const __m128i cospi_p48_p16 = pair_set_epi16(cospi[48], cospi[16]);
- const __m128i cospi_m48_m16 = pair_set_epi16(-cospi[48], -cospi[16]);
- btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x[18], x[29], x[18], x[29]);
- btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x[19], x[28], x[19], x[28]);
- btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x[20], x[27], x[20], x[27]);
- btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x[21], x[26], x[21], x[26]);
- btf_16_adds_subs_sse2(x[32], x[39]);
- btf_16_adds_subs_sse2(x[33], x[38]);
- btf_16_adds_subs_sse2(x[34], x[37]);
- btf_16_adds_subs_sse2(x[35], x[36]);
- btf_16_subs_adds_sse2(x[47], x[40]);
- btf_16_subs_adds_sse2(x[46], x[41]);
- btf_16_subs_adds_sse2(x[45], x[42]);
- btf_16_subs_adds_sse2(x[44], x[43]);
- btf_16_adds_subs_sse2(x[48], x[55]);
- btf_16_adds_subs_sse2(x[49], x[54]);
- btf_16_adds_subs_sse2(x[50], x[53]);
- btf_16_adds_subs_sse2(x[51], x[52]);
- btf_16_subs_adds_sse2(x[63], x[56]);
- btf_16_subs_adds_sse2(x[62], x[57]);
- btf_16_subs_adds_sse2(x[61], x[58]);
- btf_16_subs_adds_sse2(x[60], x[59]);
-}
-
-static INLINE void idct64_stage8_high48_sse2(__m128i *x, const int32_t *cospi,
- const __m128i __rounding,
- int8_t cos_bit) {
- const __m128i cospi_m16_p48 = pair_set_epi16(-cospi[16], cospi[48]);
- const __m128i cospi_p48_p16 = pair_set_epi16(cospi[48], cospi[16]);
- const __m128i cospi_m48_m16 = pair_set_epi16(-cospi[48], -cospi[16]);
- btf_16_adds_subs_sse2(x[16], x[23]);
- btf_16_adds_subs_sse2(x[17], x[22]);
- btf_16_adds_subs_sse2(x[18], x[21]);
- btf_16_adds_subs_sse2(x[19], x[20]);
- btf_16_subs_adds_sse2(x[31], x[24]);
- btf_16_subs_adds_sse2(x[30], x[25]);
- btf_16_subs_adds_sse2(x[29], x[26]);
- btf_16_subs_adds_sse2(x[28], x[27]);
- btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x[36], x[59], x[36], x[59]);
- btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x[37], x[58], x[37], x[58]);
- btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x[38], x[57], x[38], x[57]);
- btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x[39], x[56], x[39], x[56]);
- btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x[40], x[55], x[40], x[55]);
- btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x[41], x[54], x[41], x[54]);
- btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x[42], x[53], x[42], x[53]);
- btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x[43], x[52], x[43], x[52]);
-}
-
-static INLINE void idct64_stage9_sse2(__m128i *x, const int32_t *cospi,
- const __m128i __rounding,
- int8_t cos_bit) {
- const __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]);
- const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
- btf_16_adds_subs_sse2(x[0], x[15]);
- btf_16_adds_subs_sse2(x[1], x[14]);
- btf_16_adds_subs_sse2(x[2], x[13]);
- btf_16_adds_subs_sse2(x[3], x[12]);
- btf_16_adds_subs_sse2(x[4], x[11]);
- btf_16_adds_subs_sse2(x[5], x[10]);
- btf_16_adds_subs_sse2(x[6], x[9]);
- btf_16_adds_subs_sse2(x[7], x[8]);
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[20], x[27], x[20], x[27]);
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[21], x[26], x[21], x[26]);
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[22], x[25], x[22], x[25]);
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[23], x[24], x[23], x[24]);
- btf_16_adds_subs_sse2(x[32], x[47]);
- btf_16_adds_subs_sse2(x[33], x[46]);
- btf_16_adds_subs_sse2(x[34], x[45]);
- btf_16_adds_subs_sse2(x[35], x[44]);
- btf_16_adds_subs_sse2(x[36], x[43]);
- btf_16_adds_subs_sse2(x[37], x[42]);
- btf_16_adds_subs_sse2(x[38], x[41]);
- btf_16_adds_subs_sse2(x[39], x[40]);
- btf_16_subs_adds_sse2(x[63], x[48]);
- btf_16_subs_adds_sse2(x[62], x[49]);
- btf_16_subs_adds_sse2(x[61], x[50]);
- btf_16_subs_adds_sse2(x[60], x[51]);
- btf_16_subs_adds_sse2(x[59], x[52]);
- btf_16_subs_adds_sse2(x[58], x[53]);
- btf_16_subs_adds_sse2(x[57], x[54]);
- btf_16_subs_adds_sse2(x[56], x[55]);
-}
-
-static INLINE void idct64_stage10_sse2(__m128i *x, const int32_t *cospi,
- const __m128i __rounding,
- int8_t cos_bit) {
- const __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]);
- const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
- btf_16_adds_subs_sse2(x[0], x[31]);
- btf_16_adds_subs_sse2(x[1], x[30]);
- btf_16_adds_subs_sse2(x[2], x[29]);
- btf_16_adds_subs_sse2(x[3], x[28]);
- btf_16_adds_subs_sse2(x[4], x[27]);
- btf_16_adds_subs_sse2(x[5], x[26]);
- btf_16_adds_subs_sse2(x[6], x[25]);
- btf_16_adds_subs_sse2(x[7], x[24]);
- btf_16_adds_subs_sse2(x[8], x[23]);
- btf_16_adds_subs_sse2(x[9], x[22]);
- btf_16_adds_subs_sse2(x[10], x[21]);
- btf_16_adds_subs_sse2(x[11], x[20]);
- btf_16_adds_subs_sse2(x[12], x[19]);
- btf_16_adds_subs_sse2(x[13], x[18]);
- btf_16_adds_subs_sse2(x[14], x[17]);
- btf_16_adds_subs_sse2(x[15], x[16]);
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[40], x[55], x[40], x[55]);
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[41], x[54], x[41], x[54]);
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[42], x[53], x[42], x[53]);
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[43], x[52], x[43], x[52]);
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[44], x[51], x[44], x[51]);
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[45], x[50], x[45], x[50]);
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[46], x[49], x[46], x[49]);
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[47], x[48], x[47], x[48]);
-}
-
-static INLINE void idct64_stage11_sse2(__m128i *output, __m128i *x) {
- btf_16_adds_subs_out_sse2(output[0], output[63], x[0], x[63]);
- btf_16_adds_subs_out_sse2(output[1], output[62], x[1], x[62]);
- btf_16_adds_subs_out_sse2(output[2], output[61], x[2], x[61]);
- btf_16_adds_subs_out_sse2(output[3], output[60], x[3], x[60]);
- btf_16_adds_subs_out_sse2(output[4], output[59], x[4], x[59]);
- btf_16_adds_subs_out_sse2(output[5], output[58], x[5], x[58]);
- btf_16_adds_subs_out_sse2(output[6], output[57], x[6], x[57]);
- btf_16_adds_subs_out_sse2(output[7], output[56], x[7], x[56]);
- btf_16_adds_subs_out_sse2(output[8], output[55], x[8], x[55]);
- btf_16_adds_subs_out_sse2(output[9], output[54], x[9], x[54]);
- btf_16_adds_subs_out_sse2(output[10], output[53], x[10], x[53]);
- btf_16_adds_subs_out_sse2(output[11], output[52], x[11], x[52]);
- btf_16_adds_subs_out_sse2(output[12], output[51], x[12], x[51]);
- btf_16_adds_subs_out_sse2(output[13], output[50], x[13], x[50]);
- btf_16_adds_subs_out_sse2(output[14], output[49], x[14], x[49]);
- btf_16_adds_subs_out_sse2(output[15], output[48], x[15], x[48]);
- btf_16_adds_subs_out_sse2(output[16], output[47], x[16], x[47]);
- btf_16_adds_subs_out_sse2(output[17], output[46], x[17], x[46]);
- btf_16_adds_subs_out_sse2(output[18], output[45], x[18], x[45]);
- btf_16_adds_subs_out_sse2(output[19], output[44], x[19], x[44]);
- btf_16_adds_subs_out_sse2(output[20], output[43], x[20], x[43]);
- btf_16_adds_subs_out_sse2(output[21], output[42], x[21], x[42]);
- btf_16_adds_subs_out_sse2(output[22], output[41], x[22], x[41]);
- btf_16_adds_subs_out_sse2(output[23], output[40], x[23], x[40]);
- btf_16_adds_subs_out_sse2(output[24], output[39], x[24], x[39]);
- btf_16_adds_subs_out_sse2(output[25], output[38], x[25], x[38]);
- btf_16_adds_subs_out_sse2(output[26], output[37], x[26], x[37]);
- btf_16_adds_subs_out_sse2(output[27], output[36], x[27], x[36]);
- btf_16_adds_subs_out_sse2(output[28], output[35], x[28], x[35]);
- btf_16_adds_subs_out_sse2(output[29], output[34], x[29], x[34]);
- btf_16_adds_subs_out_sse2(output[30], output[33], x[30], x[33]);
- btf_16_adds_subs_out_sse2(output[31], output[32], x[31], x[32]);
-}
-
-static void idct64_low1_new_ssse3(const __m128i *input, __m128i *output,
- int8_t cos_bit) {
- (void)cos_bit;
- const int32_t *cospi = cospi_arr(INV_COS_BIT);
-
- // stage 1
- __m128i x[32];
- x[0] = input[0];
-
- // stage 2
- // stage 3
- // stage 4
- // stage 5
- // stage 6
- btf_16_ssse3(cospi[32], cospi[32], x[0], x[0], x[1]);
-
- // stage 7
- // stage 8
- // stage 9
- // stage 10
- // stage 11
- output[0] = x[0];
- output[63] = x[0];
- output[1] = x[1];
- output[62] = x[1];
- output[2] = x[1];
- output[61] = x[1];
- output[3] = x[0];
- output[60] = x[0];
- output[4] = x[0];
- output[59] = x[0];
- output[5] = x[1];
- output[58] = x[1];
- output[6] = x[1];
- output[57] = x[1];
- output[7] = x[0];
- output[56] = x[0];
- output[8] = x[0];
- output[55] = x[0];
- output[9] = x[1];
- output[54] = x[1];
- output[10] = x[1];
- output[53] = x[1];
- output[11] = x[0];
- output[52] = x[0];
- output[12] = x[0];
- output[51] = x[0];
- output[13] = x[1];
- output[50] = x[1];
- output[14] = x[1];
- output[49] = x[1];
- output[15] = x[0];
- output[48] = x[0];
- output[16] = x[0];
- output[47] = x[0];
- output[17] = x[1];
- output[46] = x[1];
- output[18] = x[1];
- output[45] = x[1];
- output[19] = x[0];
- output[44] = x[0];
- output[20] = x[0];
- output[43] = x[0];
- output[21] = x[1];
- output[42] = x[1];
- output[22] = x[1];
- output[41] = x[1];
- output[23] = x[0];
- output[40] = x[0];
- output[24] = x[0];
- output[39] = x[0];
- output[25] = x[1];
- output[38] = x[1];
- output[26] = x[1];
- output[37] = x[1];
- output[27] = x[0];
- output[36] = x[0];
- output[28] = x[0];
- output[35] = x[0];
- output[29] = x[1];
- output[34] = x[1];
- output[30] = x[1];
- output[33] = x[1];
- output[31] = x[0];
- output[32] = x[0];
-}
-
-static void idct64_low8_new_ssse3(const __m128i *input, __m128i *output,
- int8_t cos_bit) {
- (void)cos_bit;
- const int32_t *cospi = cospi_arr(INV_COS_BIT);
- const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
- const __m128i cospi_m04_p60 = pair_set_epi16(-cospi[4], cospi[60]);
- const __m128i cospi_p60_p04 = pair_set_epi16(cospi[60], cospi[4]);
- const __m128i cospi_m36_p28 = pair_set_epi16(-cospi[36], cospi[28]);
- const __m128i cospi_m28_m36 = pair_set_epi16(-cospi[28], -cospi[36]);
- const __m128i cospi_m20_p44 = pair_set_epi16(-cospi[20], cospi[44]);
- const __m128i cospi_p44_p20 = pair_set_epi16(cospi[44], cospi[20]);
- const __m128i cospi_m52_p12 = pair_set_epi16(-cospi[52], cospi[12]);
- const __m128i cospi_m12_m52 = pair_set_epi16(-cospi[12], -cospi[52]);
- const __m128i cospi_m08_p56 = pair_set_epi16(-cospi[8], cospi[56]);
- const __m128i cospi_p56_p08 = pair_set_epi16(cospi[56], cospi[8]);
- const __m128i cospi_m40_p24 = pair_set_epi16(-cospi[40], cospi[24]);
- const __m128i cospi_m24_m40 = pair_set_epi16(-cospi[24], -cospi[40]);
- const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
- const __m128i cospi_m16_p48 = pair_set_epi16(-cospi[16], cospi[48]);
- const __m128i cospi_p48_p16 = pair_set_epi16(cospi[48], cospi[16]);
- const __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]);
-
- // stage 1
- __m128i x[64];
- x[0] = input[0];
- x[8] = input[4];
- x[16] = input[2];
- x[24] = input[6];
- x[32] = input[1];
- x[40] = input[5];
- x[48] = input[3];
- x[56] = input[7];
-
- // stage 2
- btf_16_ssse3(cospi[63], cospi[1], x[32], x[32], x[63]);
- btf_16_ssse3(-cospi[57], cospi[7], x[56], x[39], x[56]);
- btf_16_ssse3(cospi[59], cospi[5], x[40], x[40], x[55]);
- btf_16_ssse3(-cospi[61], cospi[3], x[48], x[47], x[48]);
-
- // stage 3
- btf_16_ssse3(cospi[62], cospi[2], x[16], x[16], x[31]);
- btf_16_ssse3(-cospi[58], cospi[6], x[24], x[23], x[24]);
- x[33] = x[32];
- x[38] = x[39];
- x[41] = x[40];
- x[46] = x[47];
- x[49] = x[48];
- x[54] = x[55];
- x[57] = x[56];
- x[62] = x[63];
-
- // stage 4
- btf_16_ssse3(cospi[60], cospi[4], x[8], x[8], x[15]);
- x[17] = x[16];
- x[22] = x[23];
- x[25] = x[24];
- x[30] = x[31];
- btf_16_sse2(cospi_m04_p60, cospi_p60_p04, x[33], x[62], x[33], x[62]);
- btf_16_sse2(cospi_m28_m36, cospi_m36_p28, x[38], x[57], x[38], x[57]);
- btf_16_sse2(cospi_m20_p44, cospi_p44_p20, x[41], x[54], x[41], x[54]);
- btf_16_sse2(cospi_m12_m52, cospi_m52_p12, x[46], x[49], x[46], x[49]);
-
- // stage 5
- x[9] = x[8];
- x[14] = x[15];
- btf_16_sse2(cospi_m08_p56, cospi_p56_p08, x[17], x[30], x[17], x[30]);
- btf_16_sse2(cospi_m24_m40, cospi_m40_p24, x[22], x[25], x[22], x[25]);
- x[35] = x[32];
- x[34] = x[33];
- x[36] = x[39];
- x[37] = x[38];
- x[43] = x[40];
- x[42] = x[41];
- x[44] = x[47];
- x[45] = x[46];
- x[51] = x[48];
- x[50] = x[49];
- x[52] = x[55];
- x[53] = x[54];
- x[59] = x[56];
- x[58] = x[57];
- x[60] = x[63];
- x[61] = x[62];
-
- // stage 6
- btf_16_ssse3(cospi[32], cospi[32], x[0], x[0], x[1]);
- btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x[9], x[14], x[9], x[14]);
- x[19] = x[16];
- x[18] = x[17];
- x[20] = x[23];
- x[21] = x[22];
- x[27] = x[24];
- x[26] = x[25];
- x[28] = x[31];
- x[29] = x[30];
- idct64_stage6_high32_sse2(x, cospi, __rounding, cos_bit);
-
- // stage 7
- x[3] = x[0];
- x[2] = x[1];
- x[11] = x[8];
- x[10] = x[9];
- x[12] = x[15];
- x[13] = x[14];
- idct64_stage7_high48_sse2(x, cospi, __rounding, cos_bit);
-
- // stage 8
- x[7] = x[0];
- x[6] = x[1];
- x[5] = x[2];
- x[4] = x[3];
- x[9] = x[9];
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[10], x[13], x[10], x[13]);
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[11], x[12], x[11], x[12]);
- idct64_stage8_high48_sse2(x, cospi, __rounding, cos_bit);
-
- idct64_stage9_sse2(x, cospi, __rounding, cos_bit);
- idct64_stage10_sse2(x, cospi, __rounding, cos_bit);
- idct64_stage11_sse2(output, x);
-}
-
-static void idct64_low16_new_ssse3(const __m128i *input, __m128i *output,
- int8_t cos_bit) {
- (void)cos_bit;
- const int32_t *cospi = cospi_arr(INV_COS_BIT);
- const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
-
- const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
- const __m128i cospi_m16_p48 = pair_set_epi16(-cospi[16], cospi[48]);
- const __m128i cospi_p48_p16 = pair_set_epi16(cospi[48], cospi[16]);
- const __m128i cospi_m48_m16 = pair_set_epi16(-cospi[48], -cospi[16]);
- const __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]);
-
- // stage 1
- __m128i x[64];
- x[0] = input[0];
- x[4] = input[8];
- x[8] = input[4];
- x[12] = input[12];
- x[16] = input[2];
- x[20] = input[10];
- x[24] = input[6];
- x[28] = input[14];
- x[32] = input[1];
- x[36] = input[9];
- x[40] = input[5];
- x[44] = input[13];
- x[48] = input[3];
- x[52] = input[11];
- x[56] = input[7];
- x[60] = input[15];
-
- // stage 2
- btf_16_ssse3(cospi[63], cospi[1], x[32], x[32], x[63]);
- btf_16_ssse3(-cospi[49], cospi[15], x[60], x[35], x[60]);
- btf_16_ssse3(cospi[55], cospi[9], x[36], x[36], x[59]);
- btf_16_ssse3(-cospi[57], cospi[7], x[56], x[39], x[56]);
- btf_16_ssse3(cospi[59], cospi[5], x[40], x[40], x[55]);
- btf_16_ssse3(-cospi[53], cospi[11], x[52], x[43], x[52]);
- btf_16_ssse3(cospi[51], cospi[13], x[44], x[44], x[51]);
- btf_16_ssse3(-cospi[61], cospi[3], x[48], x[47], x[48]);
-
- // stage 3
- btf_16_ssse3(cospi[62], cospi[2], x[16], x[16], x[31]);
- btf_16_ssse3(-cospi[50], cospi[14], x[28], x[19], x[28]);
- btf_16_ssse3(cospi[54], cospi[10], x[20], x[20], x[27]);
- btf_16_ssse3(-cospi[58], cospi[6], x[24], x[23], x[24]);
- x[33] = x[32];
- x[34] = x[35];
- x[37] = x[36];
- x[38] = x[39];
- x[41] = x[40];
- x[42] = x[43];
- x[45] = x[44];
- x[46] = x[47];
- x[49] = x[48];
- x[50] = x[51];
- x[53] = x[52];
- x[54] = x[55];
- x[57] = x[56];
- x[58] = x[59];
- x[61] = x[60];
- x[62] = x[63];
-
- // stage 4
- btf_16_ssse3(cospi[60], cospi[4], x[8], x[8], x[15]);
- btf_16_ssse3(-cospi[52], cospi[12], x[12], x[11], x[12]);
- x[17] = x[16];
- x[18] = x[19];
- x[21] = x[20];
- x[22] = x[23];
- x[25] = x[24];
- x[26] = x[27];
- x[29] = x[28];
- x[30] = x[31];
- idct64_stage4_high32_sse2(x, cospi, __rounding, cos_bit);
-
- // stage 5
- btf_16_ssse3(cospi[56], cospi[8], x[4], x[4], x[7]);
- x[9] = x[8];
- x[10] = x[11];
- x[13] = x[12];
- x[14] = x[15];
- idct64_stage5_high48_sse2(x, cospi, __rounding, cos_bit);
-
- // stage 6
- btf_16_ssse3(cospi[32], cospi[32], x[0], x[0], x[1]);
- x[5] = x[4];
- x[6] = x[7];
- btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x[9], x[14], x[9], x[14]);
- btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x[10], x[13], x[10], x[13]);
- idct64_stage6_high48_sse2(x, cospi, __rounding, cos_bit);
-
- // stage 7
- x[3] = x[0];
- x[2] = x[1];
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[5], x[6], x[5], x[6]);
- btf_16_adds_subs_sse2(x[8], x[11]);
- btf_16_adds_subs_sse2(x[9], x[10]);
- btf_16_subs_adds_sse2(x[15], x[12]);
- btf_16_subs_adds_sse2(x[14], x[13]);
- idct64_stage7_high48_sse2(x, cospi, __rounding, cos_bit);
-
- // stage 8
- btf_16_adds_subs_sse2(x[0], x[7]);
- btf_16_adds_subs_sse2(x[1], x[6]);
- btf_16_adds_subs_sse2(x[2], x[5]);
- btf_16_adds_subs_sse2(x[3], x[4]);
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[10], x[13], x[10], x[13]);
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[11], x[12], x[11], x[12]);
- idct64_stage8_high48_sse2(x, cospi, __rounding, cos_bit);
-
- idct64_stage9_sse2(x, cospi, __rounding, cos_bit);
- idct64_stage10_sse2(x, cospi, __rounding, cos_bit);
- idct64_stage11_sse2(output, x);
-}
-
-static void idct64_low32_new_ssse3(const __m128i *input, __m128i *output,
- int8_t cos_bit) {
- (void)cos_bit;
- const int32_t *cospi = cospi_arr(INV_COS_BIT);
- const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
-
- const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
- const __m128i cospi_m16_p48 = pair_set_epi16(-cospi[16], cospi[48]);
- const __m128i cospi_p48_p16 = pair_set_epi16(cospi[48], cospi[16]);
- const __m128i cospi_m48_m16 = pair_set_epi16(-cospi[48], -cospi[16]);
- const __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]);
-
- // stage 1
- __m128i x[64];
- x[0] = input[0];
- x[2] = input[16];
- x[4] = input[8];
- x[6] = input[24];
- x[8] = input[4];
- x[10] = input[20];
- x[12] = input[12];
- x[14] = input[28];
- x[16] = input[2];
- x[18] = input[18];
- x[20] = input[10];
- x[22] = input[26];
- x[24] = input[6];
- x[26] = input[22];
- x[28] = input[14];
- x[30] = input[30];
- x[32] = input[1];
- x[34] = input[17];
- x[36] = input[9];
- x[38] = input[25];
- x[40] = input[5];
- x[42] = input[21];
- x[44] = input[13];
- x[46] = input[29];
- x[48] = input[3];
- x[50] = input[19];
- x[52] = input[11];
- x[54] = input[27];
- x[56] = input[7];
- x[58] = input[23];
- x[60] = input[15];
- x[62] = input[31];
-
- // stage 2
- btf_16_ssse3(cospi[63], cospi[1], x[32], x[32], x[63]);
- btf_16_ssse3(-cospi[33], cospi[31], x[62], x[33], x[62]);
- btf_16_ssse3(cospi[47], cospi[17], x[34], x[34], x[61]);
- btf_16_ssse3(-cospi[49], cospi[15], x[60], x[35], x[60]);
- btf_16_ssse3(cospi[55], cospi[9], x[36], x[36], x[59]);
- btf_16_ssse3(-cospi[41], cospi[23], x[58], x[37], x[58]);
- btf_16_ssse3(cospi[39], cospi[25], x[38], x[38], x[57]);
- btf_16_ssse3(-cospi[57], cospi[7], x[56], x[39], x[56]);
- btf_16_ssse3(cospi[59], cospi[5], x[40], x[40], x[55]);
- btf_16_ssse3(-cospi[37], cospi[27], x[54], x[41], x[54]);
- btf_16_ssse3(cospi[43], cospi[21], x[42], x[42], x[53]);
- btf_16_ssse3(-cospi[53], cospi[11], x[52], x[43], x[52]);
- btf_16_ssse3(cospi[51], cospi[13], x[44], x[44], x[51]);
- btf_16_ssse3(-cospi[45], cospi[19], x[50], x[45], x[50]);
- btf_16_ssse3(cospi[35], cospi[29], x[46], x[46], x[49]);
- btf_16_ssse3(-cospi[61], cospi[3], x[48], x[47], x[48]);
-
- // stage 3
- btf_16_ssse3(cospi[62], cospi[2], x[16], x[16], x[31]);
- btf_16_ssse3(-cospi[34], cospi[30], x[30], x[17], x[30]);
- btf_16_ssse3(cospi[46], cospi[18], x[18], x[18], x[29]);
- btf_16_ssse3(-cospi[50], cospi[14], x[28], x[19], x[28]);
- btf_16_ssse3(cospi[54], cospi[10], x[20], x[20], x[27]);
- btf_16_ssse3(-cospi[42], cospi[22], x[26], x[21], x[26]);
- btf_16_ssse3(cospi[38], cospi[26], x[22], x[22], x[25]);
- btf_16_ssse3(-cospi[58], cospi[6], x[24], x[23], x[24]);
- btf_16_adds_subs_sse2(x[32], x[33]);
- btf_16_subs_adds_sse2(x[35], x[34]);
- btf_16_adds_subs_sse2(x[36], x[37]);
- btf_16_subs_adds_sse2(x[39], x[38]);
- btf_16_adds_subs_sse2(x[40], x[41]);
- btf_16_subs_adds_sse2(x[43], x[42]);
- btf_16_adds_subs_sse2(x[44], x[45]);
- btf_16_subs_adds_sse2(x[47], x[46]);
- btf_16_adds_subs_sse2(x[48], x[49]);
- btf_16_subs_adds_sse2(x[51], x[50]);
- btf_16_adds_subs_sse2(x[52], x[53]);
- btf_16_subs_adds_sse2(x[55], x[54]);
- btf_16_adds_subs_sse2(x[56], x[57]);
- btf_16_subs_adds_sse2(x[59], x[58]);
- btf_16_adds_subs_sse2(x[60], x[61]);
- btf_16_subs_adds_sse2(x[63], x[62]);
-
- // stage 4
- btf_16_ssse3(cospi[60], cospi[4], x[8], x[8], x[15]);
- btf_16_ssse3(-cospi[36], cospi[28], x[14], x[9], x[14]);
- btf_16_ssse3(cospi[44], cospi[20], x[10], x[10], x[13]);
- btf_16_ssse3(-cospi[52], cospi[12], x[12], x[11], x[12]);
- btf_16_adds_subs_sse2(x[16], x[17]);
- btf_16_subs_adds_sse2(x[19], x[18]);
- btf_16_adds_subs_sse2(x[20], x[21]);
- btf_16_subs_adds_sse2(x[23], x[22]);
- btf_16_adds_subs_sse2(x[24], x[25]);
- btf_16_subs_adds_sse2(x[27], x[26]);
- btf_16_adds_subs_sse2(x[28], x[29]);
- btf_16_subs_adds_sse2(x[31], x[30]);
- idct64_stage4_high32_sse2(x, cospi, __rounding, cos_bit);
-
- // stage 5
- btf_16_ssse3(cospi[56], cospi[8], x[4], x[4], x[7]);
- btf_16_ssse3(-cospi[40], cospi[24], x[6], x[5], x[6]);
- btf_16_adds_subs_sse2(x[8], x[9]);
- btf_16_subs_adds_sse2(x[11], x[10]);
- btf_16_adds_subs_sse2(x[12], x[13]);
- btf_16_subs_adds_sse2(x[15], x[14]);
- idct64_stage5_high48_sse2(x, cospi, __rounding, cos_bit);
-
- // stage 6
- btf_16_ssse3(cospi[32], cospi[32], x[0], x[0], x[1]);
- btf_16_ssse3(cospi[48], cospi[16], x[2], x[2], x[3]);
- btf_16_adds_subs_sse2(x[4], x[5]);
- btf_16_subs_adds_sse2(x[7], x[6]);
- btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x[9], x[14], x[9], x[14]);
- btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x[10], x[13], x[10], x[13]);
- idct64_stage6_high48_sse2(x, cospi, __rounding, cos_bit);
-
- // stage 7
- btf_16_adds_subs_sse2(x[0], x[3]);
- btf_16_adds_subs_sse2(x[1], x[2]);
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[5], x[6], x[5], x[6]);
- btf_16_adds_subs_sse2(x[8], x[11]);
- btf_16_adds_subs_sse2(x[9], x[10]);
- btf_16_subs_adds_sse2(x[15], x[12]);
- btf_16_subs_adds_sse2(x[14], x[13]);
- idct64_stage7_high48_sse2(x, cospi, __rounding, cos_bit);
-
- // stage 8
- btf_16_adds_subs_sse2(x[0], x[7]);
- btf_16_adds_subs_sse2(x[1], x[6]);
- btf_16_adds_subs_sse2(x[2], x[5]);
- btf_16_adds_subs_sse2(x[3], x[4]);
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[10], x[13], x[10], x[13]);
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x[11], x[12], x[11], x[12]);
- idct64_stage8_high48_sse2(x, cospi, __rounding, cos_bit);
-
- // stage 9~11
- idct64_stage9_sse2(x, cospi, __rounding, cos_bit);
- idct64_stage10_sse2(x, cospi, __rounding, cos_bit);
- idct64_stage11_sse2(output, x);
-}
-
-void iadst4_new_sse2(const __m128i *input, __m128i *output, int8_t cos_bit) {
- (void)cos_bit;
- const int32_t *sinpi = sinpi_arr(INV_COS_BIT);
- const __m128i sinpi_p01_p04 = pair_set_epi16(sinpi[1], sinpi[4]);
- const __m128i sinpi_p02_m01 = pair_set_epi16(sinpi[2], -sinpi[1]);
- const __m128i sinpi_p03_p02 = pair_set_epi16(sinpi[3], sinpi[2]);
- const __m128i sinpi_p03_m04 = pair_set_epi16(sinpi[3], -sinpi[4]);
- const __m128i sinpi_p03_m03 = pair_set_epi16(sinpi[3], -sinpi[3]);
- const __m128i sinpi_0_p03 = pair_set_epi16(0, sinpi[3]);
- const __m128i sinpi_p04_p02 = pair_set_epi16(sinpi[4], sinpi[2]);
- const __m128i sinpi_m03_m01 = pair_set_epi16(-sinpi[3], -sinpi[1]);
- __m128i x0[4];
- x0[0] = input[0];
- x0[1] = input[1];
- x0[2] = input[2];
- x0[3] = input[3];
-
- __m128i u[4];
- u[0] = _mm_unpacklo_epi16(x0[0], x0[2]);
- u[1] = _mm_unpackhi_epi16(x0[0], x0[2]);
- u[2] = _mm_unpacklo_epi16(x0[1], x0[3]);
- u[3] = _mm_unpackhi_epi16(x0[1], x0[3]);
-
- __m128i x1[16];
- x1[0] = _mm_madd_epi16(u[0], sinpi_p01_p04); // x0*sin1 + x2*sin4
- x1[1] = _mm_madd_epi16(u[1], sinpi_p01_p04);
- x1[2] = _mm_madd_epi16(u[0], sinpi_p02_m01); // x0*sin2 - x2*sin1
- x1[3] = _mm_madd_epi16(u[1], sinpi_p02_m01);
- x1[4] = _mm_madd_epi16(u[2], sinpi_p03_p02); // x1*sin3 + x3*sin2
- x1[5] = _mm_madd_epi16(u[3], sinpi_p03_p02);
- x1[6] = _mm_madd_epi16(u[2], sinpi_p03_m04); // x1*sin3 - x3*sin4
- x1[7] = _mm_madd_epi16(u[3], sinpi_p03_m04);
- x1[8] = _mm_madd_epi16(u[0], sinpi_p03_m03); // x0*sin3 - x2*sin3
- x1[9] = _mm_madd_epi16(u[1], sinpi_p03_m03);
- x1[10] = _mm_madd_epi16(u[2], sinpi_0_p03); // x2*sin3
- x1[11] = _mm_madd_epi16(u[3], sinpi_0_p03);
- x1[12] = _mm_madd_epi16(u[0], sinpi_p04_p02); // x0*sin4 + x2*sin2
- x1[13] = _mm_madd_epi16(u[1], sinpi_p04_p02);
- x1[14] = _mm_madd_epi16(u[2], sinpi_m03_m01); // -x1*sin3 - x3*sin1
- x1[15] = _mm_madd_epi16(u[3], sinpi_m03_m01);
-
- __m128i x2[8];
- x2[0] = _mm_add_epi32(x1[0], x1[4]); // x0*sin1 +x2*sin4 +x1*sin3 +x3*sin2
- x2[1] = _mm_add_epi32(x1[1], x1[5]);
- x2[2] = _mm_add_epi32(x1[2], x1[6]); // x0*sin2 -x2*sin1 +x1*sin3 -x3*sin4
- x2[3] = _mm_add_epi32(x1[3], x1[7]);
- x2[4] = _mm_add_epi32(x1[8], x1[10]); // x0*sin3 -x2*sin3 +x3*sin3
- x2[5] = _mm_add_epi32(x1[9], x1[11]);
- x2[6] = _mm_add_epi32(x1[12], x1[14]); // x0*sin1 +x2*sin4 +x0*sin2 -x2*sin1
- x2[7] = _mm_add_epi32(x1[13], x1[15]);
-
- const __m128i rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
- for (int i = 0; i < 4; ++i) {
- __m128i out0 = _mm_add_epi32(x2[2 * i], rounding);
- __m128i out1 = _mm_add_epi32(x2[2 * i + 1], rounding);
- out0 = _mm_srai_epi32(out0, INV_COS_BIT);
- out1 = _mm_srai_epi32(out1, INV_COS_BIT);
- output[i] = _mm_packs_epi32(out0, out1);
- }
-}
-
-// TODO(binpengsmail@gmail.com):
-// To explore the reuse of VP9 versions of corresponding SSE2 functions and
-// evaluate whether there is a possibility for further speedup.
-void iadst4_w4_new_sse2(const __m128i *input, __m128i *output, int8_t cos_bit) {
- (void)cos_bit;
- const int32_t *sinpi = sinpi_arr(INV_COS_BIT);
- const __m128i sinpi_p01_p04 = pair_set_epi16(sinpi[1], sinpi[4]);
- const __m128i sinpi_p02_m01 = pair_set_epi16(sinpi[2], -sinpi[1]);
- const __m128i sinpi_p03_p02 = pair_set_epi16(sinpi[3], sinpi[2]);
- const __m128i sinpi_p03_m04 = pair_set_epi16(sinpi[3], -sinpi[4]);
- const __m128i sinpi_p03_m03 = pair_set_epi16(sinpi[3], -sinpi[3]);
- const __m128i sinpi_0_p03 = pair_set_epi16(0, sinpi[3]);
- const __m128i sinpi_p04_p02 = pair_set_epi16(sinpi[4], sinpi[2]);
- const __m128i sinpi_m03_m01 = pair_set_epi16(-sinpi[3], -sinpi[1]);
- __m128i x0[4];
- x0[0] = input[0];
- x0[1] = input[1];
- x0[2] = input[2];
- x0[3] = input[3];
-
- __m128i u[2];
- u[0] = _mm_unpacklo_epi16(x0[0], x0[2]);
- u[1] = _mm_unpacklo_epi16(x0[1], x0[3]);
-
- __m128i x1[8];
- x1[0] = _mm_madd_epi16(u[0], sinpi_p01_p04); // x0*sin1 + x2*sin4
- x1[1] = _mm_madd_epi16(u[0], sinpi_p02_m01); // x0*sin2 - x2*sin1
- x1[2] = _mm_madd_epi16(u[1], sinpi_p03_p02); // x1*sin3 + x3*sin2
- x1[3] = _mm_madd_epi16(u[1], sinpi_p03_m04); // x1*sin3 - x3*sin4
- x1[4] = _mm_madd_epi16(u[0], sinpi_p03_m03); // x0*sin3 - x2*sin3
- x1[5] = _mm_madd_epi16(u[1], sinpi_0_p03); // x2*sin3
- x1[6] = _mm_madd_epi16(u[0], sinpi_p04_p02); // x0*sin4 + x2*sin2
- x1[7] = _mm_madd_epi16(u[1], sinpi_m03_m01); // -x1*sin3 - x3*sin1
-
- __m128i x2[4];
- x2[0] = _mm_add_epi32(x1[0], x1[2]); // x0*sin1 + x2*sin4 + x1*sin3 + x3*sin2
- x2[1] = _mm_add_epi32(x1[1], x1[3]); // x0*sin2 - x2*sin1 + x1*sin3 - x3*sin4
- x2[2] = _mm_add_epi32(x1[4], x1[5]); // x0*sin3 - x2*sin3 + x3*sin3
- x2[3] = _mm_add_epi32(x1[6], x1[7]); // x0*sin4 + x2*sin2 - x1*sin3 - x3*sin1
-
- const __m128i rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
- for (int i = 0; i < 4; ++i) {
- __m128i out0 = _mm_add_epi32(x2[i], rounding);
- out0 = _mm_srai_epi32(out0, INV_COS_BIT);
- output[i] = _mm_packs_epi32(out0, out0);
- }
-}
-
-static void iadst8_low1_new_ssse3(const __m128i *input, __m128i *output,
- int8_t cos_bit) {
- (void)cos_bit;
- const int32_t *cospi = cospi_arr(INV_COS_BIT);
- const __m128i __zero = _mm_setzero_si128();
- const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
-
- const __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]);
- const __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]);
- const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
- const __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]);
-
- // stage 1
- __m128i x[8];
- x[1] = input[0];
-
- // stage 2
- btf_16_ssse3(cospi[60], -cospi[4], x[1], x[0], x[1]);
-
- // stage 3
- x[4] = x[0];
- x[5] = x[1];
-
- // stage 4
- btf_16_sse2(cospi_p16_p48, cospi_p48_m16, x[4], x[5], x[4], x[5]);
-
- // stage 5
- x[2] = x[0];
- x[3] = x[1];
- x[6] = x[4];
- x[7] = x[5];
-
- // stage 6
- btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x[2], x[3], x[2], x[3]);
- btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x[6], x[7], x[6], x[7]);
-
- // stage 7
- output[0] = x[0];
- output[1] = _mm_subs_epi16(__zero, x[4]);
- output[2] = x[6];
- output[3] = _mm_subs_epi16(__zero, x[2]);
- output[4] = x[3];
- output[5] = _mm_subs_epi16(__zero, x[7]);
- output[6] = x[5];
- output[7] = _mm_subs_epi16(__zero, x[1]);
-}
-
-void iadst8_new_sse2(const __m128i *input, __m128i *output, int8_t cos_bit) {
- (void)cos_bit;
- const int32_t *cospi = cospi_arr(INV_COS_BIT);
- const __m128i __zero = _mm_setzero_si128();
- const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
-
- const __m128i cospi_p04_p60 = pair_set_epi16(cospi[4], cospi[60]);
- const __m128i cospi_p60_m04 = pair_set_epi16(cospi[60], -cospi[4]);
- const __m128i cospi_p20_p44 = pair_set_epi16(cospi[20], cospi[44]);
- const __m128i cospi_p44_m20 = pair_set_epi16(cospi[44], -cospi[20]);
- const __m128i cospi_p36_p28 = pair_set_epi16(cospi[36], cospi[28]);
- const __m128i cospi_p28_m36 = pair_set_epi16(cospi[28], -cospi[36]);
- const __m128i cospi_p52_p12 = pair_set_epi16(cospi[52], cospi[12]);
- const __m128i cospi_p12_m52 = pair_set_epi16(cospi[12], -cospi[52]);
- const __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]);
- const __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]);
- const __m128i cospi_m48_p16 = pair_set_epi16(-cospi[48], cospi[16]);
- const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
- const __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]);
-
- // stage 1
- __m128i x[8];
- x[0] = input[7];
- x[1] = input[0];
- x[2] = input[5];
- x[3] = input[2];
- x[4] = input[3];
- x[5] = input[4];
- x[6] = input[1];
- x[7] = input[6];
-
- // stage 2
- btf_16_sse2(cospi_p04_p60, cospi_p60_m04, x[0], x[1], x[0], x[1]);
- btf_16_sse2(cospi_p20_p44, cospi_p44_m20, x[2], x[3], x[2], x[3]);
- btf_16_sse2(cospi_p36_p28, cospi_p28_m36, x[4], x[5], x[4], x[5]);
- btf_16_sse2(cospi_p52_p12, cospi_p12_m52, x[6], x[7], x[6], x[7]);
-
- // stage 3
- btf_16_adds_subs_sse2(x[0], x[4]);
- btf_16_adds_subs_sse2(x[1], x[5]);
- btf_16_adds_subs_sse2(x[2], x[6]);
- btf_16_adds_subs_sse2(x[3], x[7]);
-
- // stage 4
- btf_16_sse2(cospi_p16_p48, cospi_p48_m16, x[4], x[5], x[4], x[5]);
- btf_16_sse2(cospi_m48_p16, cospi_p16_p48, x[6], x[7], x[6], x[7]);
-
- // stage 5
- btf_16_adds_subs_sse2(x[0], x[2]);
- btf_16_adds_subs_sse2(x[1], x[3]);
- btf_16_adds_subs_sse2(x[4], x[6]);
- btf_16_adds_subs_sse2(x[5], x[7]);
-
- // stage 6
- btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x[2], x[3], x[2], x[3]);
- btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x[6], x[7], x[6], x[7]);
-
- // stage 7
- output[0] = x[0];
- output[1] = _mm_subs_epi16(__zero, x[4]);
- output[2] = x[6];
- output[3] = _mm_subs_epi16(__zero, x[2]);
- output[4] = x[3];
- output[5] = _mm_subs_epi16(__zero, x[7]);
- output[6] = x[5];
- output[7] = _mm_subs_epi16(__zero, x[1]);
-}
-
-void iadst8_w4_new_sse2(const __m128i *input, __m128i *output, int8_t cos_bit) {
- (void)cos_bit;
- const int32_t *cospi = cospi_arr(INV_COS_BIT);
- const __m128i __zero = _mm_setzero_si128();
- const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
-
- const __m128i cospi_p04_p60 = pair_set_epi16(cospi[4], cospi[60]);
- const __m128i cospi_p60_m04 = pair_set_epi16(cospi[60], -cospi[4]);
- const __m128i cospi_p20_p44 = pair_set_epi16(cospi[20], cospi[44]);
- const __m128i cospi_p44_m20 = pair_set_epi16(cospi[44], -cospi[20]);
- const __m128i cospi_p36_p28 = pair_set_epi16(cospi[36], cospi[28]);
- const __m128i cospi_p28_m36 = pair_set_epi16(cospi[28], -cospi[36]);
- const __m128i cospi_p52_p12 = pair_set_epi16(cospi[52], cospi[12]);
- const __m128i cospi_p12_m52 = pair_set_epi16(cospi[12], -cospi[52]);
- const __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]);
- const __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]);
- const __m128i cospi_m48_p16 = pair_set_epi16(-cospi[48], cospi[16]);
- const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
- const __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]);
-
- // stage 1
- __m128i x[8];
- x[0] = input[7];
- x[1] = input[0];
- x[2] = input[5];
- x[3] = input[2];
- x[4] = input[3];
- x[5] = input[4];
- x[6] = input[1];
- x[7] = input[6];
-
- // stage 2
- btf_16_4p_sse2(cospi_p04_p60, cospi_p60_m04, x[0], x[1], x[0], x[1]);
- btf_16_4p_sse2(cospi_p20_p44, cospi_p44_m20, x[2], x[3], x[2], x[3]);
- btf_16_4p_sse2(cospi_p36_p28, cospi_p28_m36, x[4], x[5], x[4], x[5]);
- btf_16_4p_sse2(cospi_p52_p12, cospi_p12_m52, x[6], x[7], x[6], x[7]);
-
- // stage 3
- btf_16_adds_subs_sse2(x[0], x[4]);
- btf_16_adds_subs_sse2(x[1], x[5]);
- btf_16_adds_subs_sse2(x[2], x[6]);
- btf_16_adds_subs_sse2(x[3], x[7]);
-
- // stage 4
- btf_16_4p_sse2(cospi_p16_p48, cospi_p48_m16, x[4], x[5], x[4], x[5]);
- btf_16_4p_sse2(cospi_m48_p16, cospi_p16_p48, x[6], x[7], x[6], x[7]);
-
- // stage 5
- btf_16_adds_subs_sse2(x[0], x[2]);
- btf_16_adds_subs_sse2(x[1], x[3]);
- btf_16_adds_subs_sse2(x[4], x[6]);
- btf_16_adds_subs_sse2(x[5], x[7]);
-
- // stage 6
- btf_16_4p_sse2(cospi_p32_p32, cospi_p32_m32, x[2], x[3], x[2], x[3]);
- btf_16_4p_sse2(cospi_p32_p32, cospi_p32_m32, x[6], x[7], x[6], x[7]);
-
- // stage 7
- output[0] = x[0];
- output[1] = _mm_subs_epi16(__zero, x[4]);
- output[2] = x[6];
- output[3] = _mm_subs_epi16(__zero, x[2]);
- output[4] = x[3];
- output[5] = _mm_subs_epi16(__zero, x[7]);
- output[6] = x[5];
- output[7] = _mm_subs_epi16(__zero, x[1]);
-}
-
-static INLINE void iadst16_stage3_ssse3(__m128i *x) {
- btf_16_adds_subs_sse2(x[0], x[8]);
- btf_16_adds_subs_sse2(x[1], x[9]);
- btf_16_adds_subs_sse2(x[2], x[10]);
- btf_16_adds_subs_sse2(x[3], x[11]);
- btf_16_adds_subs_sse2(x[4], x[12]);
- btf_16_adds_subs_sse2(x[5], x[13]);
- btf_16_adds_subs_sse2(x[6], x[14]);
- btf_16_adds_subs_sse2(x[7], x[15]);
-}
-
-static INLINE void iadst16_stage4_ssse3(__m128i *x, const int32_t *cospi,
- const __m128i __rounding,
- int8_t cos_bit) {
- const __m128i cospi_p08_p56 = pair_set_epi16(cospi[8], cospi[56]);
- const __m128i cospi_p56_m08 = pair_set_epi16(cospi[56], -cospi[8]);
- const __m128i cospi_p40_p24 = pair_set_epi16(cospi[40], cospi[24]);
- const __m128i cospi_p24_m40 = pair_set_epi16(cospi[24], -cospi[40]);
- const __m128i cospi_m56_p08 = pair_set_epi16(-cospi[56], cospi[8]);
- const __m128i cospi_m24_p40 = pair_set_epi16(-cospi[24], cospi[40]);
- btf_16_sse2(cospi_p08_p56, cospi_p56_m08, x[8], x[9], x[8], x[9]);
- btf_16_sse2(cospi_p40_p24, cospi_p24_m40, x[10], x[11], x[10], x[11]);
- btf_16_sse2(cospi_m56_p08, cospi_p08_p56, x[12], x[13], x[12], x[13]);
- btf_16_sse2(cospi_m24_p40, cospi_p40_p24, x[14], x[15], x[14], x[15]);
-}
-
-static INLINE void iadst16_stage5_ssse3(__m128i *x) {
- btf_16_adds_subs_sse2(x[0], x[4]);
- btf_16_adds_subs_sse2(x[1], x[5]);
- btf_16_adds_subs_sse2(x[2], x[6]);
- btf_16_adds_subs_sse2(x[3], x[7]);
- btf_16_adds_subs_sse2(x[8], x[12]);
- btf_16_adds_subs_sse2(x[9], x[13]);
- btf_16_adds_subs_sse2(x[10], x[14]);
- btf_16_adds_subs_sse2(x[11], x[15]);
-}
-
-static INLINE void iadst16_stage6_ssse3(__m128i *x, const int32_t *cospi,
- const __m128i __rounding,
- int8_t cos_bit) {
- const __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]);
- const __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]);
- const __m128i cospi_m48_p16 = pair_set_epi16(-cospi[48], cospi[16]);
- btf_16_sse2(cospi_p16_p48, cospi_p48_m16, x[4], x[5], x[4], x[5]);
- btf_16_sse2(cospi_m48_p16, cospi_p16_p48, x[6], x[7], x[6], x[7]);
- btf_16_sse2(cospi_p16_p48, cospi_p48_m16, x[12], x[13], x[12], x[13]);
- btf_16_sse2(cospi_m48_p16, cospi_p16_p48, x[14], x[15], x[14], x[15]);
-}
-
-static INLINE void iadst16_stage7_ssse3(__m128i *x) {
- btf_16_adds_subs_sse2(x[0], x[2]);
- btf_16_adds_subs_sse2(x[1], x[3]);
- btf_16_adds_subs_sse2(x[4], x[6]);
- btf_16_adds_subs_sse2(x[5], x[7]);
- btf_16_adds_subs_sse2(x[8], x[10]);
- btf_16_adds_subs_sse2(x[9], x[11]);
- btf_16_adds_subs_sse2(x[12], x[14]);
- btf_16_adds_subs_sse2(x[13], x[15]);
-}
-
-static INLINE void iadst16_stage8_ssse3(__m128i *x, const int32_t *cospi,
- const __m128i __rounding,
- int8_t cos_bit) {
- const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
- const __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]);
- btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x[2], x[3], x[2], x[3]);
- btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x[6], x[7], x[6], x[7]);
- btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x[10], x[11], x[10], x[11]);
- btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x[14], x[15], x[14], x[15]);
-}
-
-static INLINE void iadst16_stage9_ssse3(__m128i *output, __m128i *x) {
- const __m128i __zero = _mm_setzero_si128();
- output[0] = x[0];
- output[1] = _mm_subs_epi16(__zero, x[8]);
- output[2] = x[12];
- output[3] = _mm_subs_epi16(__zero, x[4]);
- output[4] = x[6];
- output[5] = _mm_subs_epi16(__zero, x[14]);
- output[6] = x[10];
- output[7] = _mm_subs_epi16(__zero, x[2]);
- output[8] = x[3];
- output[9] = _mm_subs_epi16(__zero, x[11]);
- output[10] = x[15];
- output[11] = _mm_subs_epi16(__zero, x[7]);
- output[12] = x[5];
- output[13] = _mm_subs_epi16(__zero, x[13]);
- output[14] = x[9];
- output[15] = _mm_subs_epi16(__zero, x[1]);
-}
-
-static void iadst16_low1_new_ssse3(const __m128i *input, __m128i *output,
- int8_t cos_bit) {
- (void)cos_bit;
- const int32_t *cospi = cospi_arr(INV_COS_BIT);
- const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
-
- const __m128i cospi_p08_p56 = pair_set_epi16(cospi[8], cospi[56]);
- const __m128i cospi_p56_m08 = pair_set_epi16(cospi[56], -cospi[8]);
- const __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]);
- const __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]);
-
- // stage 1
- __m128i x[16];
- x[1] = input[0];
-
- // stage 2
- btf_16_ssse3(cospi[62], -cospi[2], x[1], x[0], x[1]);
-
- // stage 3
- x[8] = x[0];
- x[9] = x[1];
-
- // stage 4
- btf_16_sse2(cospi_p08_p56, cospi_p56_m08, x[8], x[9], x[8], x[9]);
-
- // stage 5
- x[4] = x[0];
- x[5] = x[1];
- x[12] = x[8];
- x[13] = x[9];
-
- // stage 6
- btf_16_sse2(cospi_p16_p48, cospi_p48_m16, x[4], x[5], x[4], x[5]);
- btf_16_sse2(cospi_p16_p48, cospi_p48_m16, x[12], x[13], x[12], x[13]);
-
- // stage 7
- x[2] = x[0];
- x[3] = x[1];
- x[6] = x[4];
- x[7] = x[5];
- x[10] = x[8];
- x[11] = x[9];
- x[14] = x[12];
- x[15] = x[13];
-
- iadst16_stage8_ssse3(x, cospi, __rounding, cos_bit);
- iadst16_stage9_ssse3(output, x);
-}
-
-static void iadst16_low8_new_ssse3(const __m128i *input, __m128i *output,
- int8_t cos_bit) {
- (void)cos_bit;
- const int32_t *cospi = cospi_arr(INV_COS_BIT);
- const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
-
- // stage 1
- __m128i x[16];
- x[1] = input[0];
- x[3] = input[2];
- x[5] = input[4];
- x[7] = input[6];
- x[8] = input[7];
- x[10] = input[5];
- x[12] = input[3];
- x[14] = input[1];
-
- // stage 2
- btf_16_ssse3(cospi[62], -cospi[2], x[1], x[0], x[1]);
- btf_16_ssse3(cospi[54], -cospi[10], x[3], x[2], x[3]);
- btf_16_ssse3(cospi[46], -cospi[18], x[5], x[4], x[5]);
- btf_16_ssse3(cospi[38], -cospi[26], x[7], x[6], x[7]);
- btf_16_ssse3(cospi[34], cospi[30], x[8], x[8], x[9]);
- btf_16_ssse3(cospi[42], cospi[22], x[10], x[10], x[11]);
- btf_16_ssse3(cospi[50], cospi[14], x[12], x[12], x[13]);
- btf_16_ssse3(cospi[58], cospi[6], x[14], x[14], x[15]);
-
- // stage 3
- iadst16_stage3_ssse3(x);
- iadst16_stage4_ssse3(x, cospi, __rounding, cos_bit);
- iadst16_stage5_ssse3(x);
- iadst16_stage6_ssse3(x, cospi, __rounding, cos_bit);
- iadst16_stage7_ssse3(x);
- iadst16_stage8_ssse3(x, cospi, __rounding, cos_bit);
- iadst16_stage9_ssse3(output, x);
-}
-void iadst16_new_sse2(const __m128i *input, __m128i *output, int8_t cos_bit) {
- (void)cos_bit;
- const int32_t *cospi = cospi_arr(INV_COS_BIT);
- const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
- const __m128i cospi_p02_p62 = pair_set_epi16(cospi[2], cospi[62]);
- const __m128i cospi_p62_m02 = pair_set_epi16(cospi[62], -cospi[2]);
- const __m128i cospi_p10_p54 = pair_set_epi16(cospi[10], cospi[54]);
- const __m128i cospi_p54_m10 = pair_set_epi16(cospi[54], -cospi[10]);
- const __m128i cospi_p18_p46 = pair_set_epi16(cospi[18], cospi[46]);
- const __m128i cospi_p46_m18 = pair_set_epi16(cospi[46], -cospi[18]);
- const __m128i cospi_p26_p38 = pair_set_epi16(cospi[26], cospi[38]);
- const __m128i cospi_p38_m26 = pair_set_epi16(cospi[38], -cospi[26]);
- const __m128i cospi_p34_p30 = pair_set_epi16(cospi[34], cospi[30]);
- const __m128i cospi_p30_m34 = pair_set_epi16(cospi[30], -cospi[34]);
- const __m128i cospi_p42_p22 = pair_set_epi16(cospi[42], cospi[22]);
- const __m128i cospi_p22_m42 = pair_set_epi16(cospi[22], -cospi[42]);
- const __m128i cospi_p50_p14 = pair_set_epi16(cospi[50], cospi[14]);
- const __m128i cospi_p14_m50 = pair_set_epi16(cospi[14], -cospi[50]);
- const __m128i cospi_p58_p06 = pair_set_epi16(cospi[58], cospi[6]);
- const __m128i cospi_p06_m58 = pair_set_epi16(cospi[6], -cospi[58]);
-
- // stage 1
- __m128i x[16];
- x[0] = input[15];
- x[1] = input[0];
- x[2] = input[13];
- x[3] = input[2];
- x[4] = input[11];
- x[5] = input[4];
- x[6] = input[9];
- x[7] = input[6];
- x[8] = input[7];
- x[9] = input[8];
- x[10] = input[5];
- x[11] = input[10];
- x[12] = input[3];
- x[13] = input[12];
- x[14] = input[1];
- x[15] = input[14];
-
- // stage 2
- btf_16_sse2(cospi_p02_p62, cospi_p62_m02, x[0], x[1], x[0], x[1]);
- btf_16_sse2(cospi_p10_p54, cospi_p54_m10, x[2], x[3], x[2], x[3]);
- btf_16_sse2(cospi_p18_p46, cospi_p46_m18, x[4], x[5], x[4], x[5]);
- btf_16_sse2(cospi_p26_p38, cospi_p38_m26, x[6], x[7], x[6], x[7]);
- btf_16_sse2(cospi_p34_p30, cospi_p30_m34, x[8], x[9], x[8], x[9]);
- btf_16_sse2(cospi_p42_p22, cospi_p22_m42, x[10], x[11], x[10], x[11]);
- btf_16_sse2(cospi_p50_p14, cospi_p14_m50, x[12], x[13], x[12], x[13]);
- btf_16_sse2(cospi_p58_p06, cospi_p06_m58, x[14], x[15], x[14], x[15]);
-
- // stage 3~9
- iadst16_stage3_ssse3(x);
- iadst16_stage4_ssse3(x, cospi, __rounding, cos_bit);
- iadst16_stage5_ssse3(x);
- iadst16_stage6_ssse3(x, cospi, __rounding, cos_bit);
- iadst16_stage7_ssse3(x);
- iadst16_stage8_ssse3(x, cospi, __rounding, cos_bit);
- iadst16_stage9_ssse3(output, x);
-}
-
-void iadst16_w4_new_sse2(const __m128i *input, __m128i *output,
- int8_t cos_bit) {
- (void)cos_bit;
- const int32_t *cospi = cospi_arr(INV_COS_BIT);
- const __m128i __rounding = _mm_set1_epi32(1 << (INV_COS_BIT - 1));
-
- const __m128i cospi_p02_p62 = pair_set_epi16(cospi[2], cospi[62]);
- const __m128i cospi_p62_m02 = pair_set_epi16(cospi[62], -cospi[2]);
- const __m128i cospi_p10_p54 = pair_set_epi16(cospi[10], cospi[54]);
- const __m128i cospi_p54_m10 = pair_set_epi16(cospi[54], -cospi[10]);
- const __m128i cospi_p18_p46 = pair_set_epi16(cospi[18], cospi[46]);
- const __m128i cospi_p46_m18 = pair_set_epi16(cospi[46], -cospi[18]);
- const __m128i cospi_p26_p38 = pair_set_epi16(cospi[26], cospi[38]);
- const __m128i cospi_p38_m26 = pair_set_epi16(cospi[38], -cospi[26]);
- const __m128i cospi_p34_p30 = pair_set_epi16(cospi[34], cospi[30]);
- const __m128i cospi_p30_m34 = pair_set_epi16(cospi[30], -cospi[34]);
- const __m128i cospi_p42_p22 = pair_set_epi16(cospi[42], cospi[22]);
- const __m128i cospi_p22_m42 = pair_set_epi16(cospi[22], -cospi[42]);
- const __m128i cospi_p50_p14 = pair_set_epi16(cospi[50], cospi[14]);
- const __m128i cospi_p14_m50 = pair_set_epi16(cospi[14], -cospi[50]);
- const __m128i cospi_p58_p06 = pair_set_epi16(cospi[58], cospi[6]);
- const __m128i cospi_p06_m58 = pair_set_epi16(cospi[6], -cospi[58]);
- const __m128i cospi_p08_p56 = pair_set_epi16(cospi[8], cospi[56]);
- const __m128i cospi_p56_m08 = pair_set_epi16(cospi[56], -cospi[8]);
- const __m128i cospi_p40_p24 = pair_set_epi16(cospi[40], cospi[24]);
- const __m128i cospi_p24_m40 = pair_set_epi16(cospi[24], -cospi[40]);
- const __m128i cospi_m56_p08 = pair_set_epi16(-cospi[56], cospi[8]);
- const __m128i cospi_m24_p40 = pair_set_epi16(-cospi[24], cospi[40]);
- const __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]);
- const __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]);
- const __m128i cospi_m48_p16 = pair_set_epi16(-cospi[48], cospi[16]);
- const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
- const __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]);
-
- // stage 1
- __m128i x[16];
- x[0] = input[15];
- x[1] = input[0];
- x[2] = input[13];
- x[3] = input[2];
- x[4] = input[11];
- x[5] = input[4];
- x[6] = input[9];
- x[7] = input[6];
- x[8] = input[7];
- x[9] = input[8];
- x[10] = input[5];
- x[11] = input[10];
- x[12] = input[3];
- x[13] = input[12];
- x[14] = input[1];
- x[15] = input[14];
-
- // stage 2
- btf_16_4p_sse2(cospi_p02_p62, cospi_p62_m02, x[0], x[1], x[0], x[1]);
- btf_16_4p_sse2(cospi_p10_p54, cospi_p54_m10, x[2], x[3], x[2], x[3]);
- btf_16_4p_sse2(cospi_p18_p46, cospi_p46_m18, x[4], x[5], x[4], x[5]);
- btf_16_4p_sse2(cospi_p26_p38, cospi_p38_m26, x[6], x[7], x[6], x[7]);
- btf_16_4p_sse2(cospi_p34_p30, cospi_p30_m34, x[8], x[9], x[8], x[9]);
- btf_16_4p_sse2(cospi_p42_p22, cospi_p22_m42, x[10], x[11], x[10], x[11]);
- btf_16_4p_sse2(cospi_p50_p14, cospi_p14_m50, x[12], x[13], x[12], x[13]);
- btf_16_4p_sse2(cospi_p58_p06, cospi_p06_m58, x[14], x[15], x[14], x[15]);
-
- // stage 3
- iadst16_stage3_ssse3(x);
-
- // stage 4
- btf_16_4p_sse2(cospi_p08_p56, cospi_p56_m08, x[8], x[9], x[8], x[9]);
- btf_16_4p_sse2(cospi_p40_p24, cospi_p24_m40, x[10], x[11], x[10], x[11]);
- btf_16_4p_sse2(cospi_m56_p08, cospi_p08_p56, x[12], x[13], x[12], x[13]);
- btf_16_4p_sse2(cospi_m24_p40, cospi_p40_p24, x[14], x[15], x[14], x[15]);
-
- // stage 5
- iadst16_stage5_ssse3(x);
-
- // stage 6
- btf_16_4p_sse2(cospi_p16_p48, cospi_p48_m16, x[4], x[5], x[4], x[5]);
- btf_16_4p_sse2(cospi_m48_p16, cospi_p16_p48, x[6], x[7], x[6], x[7]);
- btf_16_4p_sse2(cospi_p16_p48, cospi_p48_m16, x[12], x[13], x[12], x[13]);
- btf_16_4p_sse2(cospi_m48_p16, cospi_p16_p48, x[14], x[15], x[14], x[15]);
-
- // stage 7
- iadst16_stage7_ssse3(x);
-
- // stage 8
- btf_16_4p_sse2(cospi_p32_p32, cospi_p32_m32, x[2], x[3], x[2], x[3]);
- btf_16_4p_sse2(cospi_p32_p32, cospi_p32_m32, x[6], x[7], x[6], x[7]);
- btf_16_4p_sse2(cospi_p32_p32, cospi_p32_m32, x[10], x[11], x[10], x[11]);
- btf_16_4p_sse2(cospi_p32_p32, cospi_p32_m32, x[14], x[15], x[14], x[15]);
-
- // stage 9
- iadst16_stage9_ssse3(output, x);
-}
-
-static void iidentity4_new_ssse3(const __m128i *input, __m128i *output,
- int8_t cos_bit) {
- (void)cos_bit;
- const int16_t scale_fractional = (NewSqrt2 - (1 << NewSqrt2Bits));
- const __m128i scale = _mm_set1_epi16(scale_fractional << (15 - NewSqrt2Bits));
- for (int i = 0; i < 4; ++i) {
- __m128i x = _mm_mulhrs_epi16(input[i], scale);
- output[i] = _mm_adds_epi16(x, input[i]);
- }
-}
-
-static void iidentity8_new_sse2(const __m128i *input, __m128i *output,
- int8_t cos_bit) {
- (void)cos_bit;
- for (int i = 0; i < 8; ++i) {
- output[i] = _mm_adds_epi16(input[i], input[i]);
- }
-}
-
-static void iidentity16_new_ssse3(const __m128i *input, __m128i *output,
- int8_t cos_bit) {
- (void)cos_bit;
- const int16_t scale_fractional = 2 * (NewSqrt2 - (1 << NewSqrt2Bits));
- const __m128i scale = _mm_set1_epi16(scale_fractional << (15 - NewSqrt2Bits));
- for (int i = 0; i < 16; ++i) {
- __m128i x = _mm_mulhrs_epi16(input[i], scale);
- __m128i srcx2 = _mm_adds_epi16(input[i], input[i]);
- output[i] = _mm_adds_epi16(x, srcx2);
- }
-}
-
-static INLINE __m128i lowbd_get_recon_8x8_sse2(const __m128i pred,
- __m128i res) {
- const __m128i zero = _mm_setzero_si128();
- __m128i x0 = _mm_adds_epi16(res, _mm_unpacklo_epi8(pred, zero));
- return _mm_packus_epi16(x0, x0);
-}
-
-static INLINE void lowbd_write_buffer_4xn_sse2(__m128i *in, uint8_t *output,
- int stride, int flipud,
- const int height) {
- int j = flipud ? (height - 1) : 0;
- const int step = flipud ? -1 : 1;
- const __m128i zero = _mm_setzero_si128();
- for (int i = 0; i < height; ++i, j += step) {
- const __m128i v = _mm_cvtsi32_si128(*((uint32_t *)(output + i * stride)));
- __m128i u = _mm_adds_epi16(in[j], _mm_unpacklo_epi8(v, zero));
- u = _mm_packus_epi16(u, zero);
- *((uint32_t *)(output + i * stride)) = _mm_cvtsi128_si32(u);
- }
-}
-
-static INLINE void lowbd_write_buffer_8xn_sse2(__m128i *in, uint8_t *output,
- int stride, int flipud,
- const int height) {
- int j = flipud ? (height - 1) : 0;
- const int step = flipud ? -1 : 1;
- for (int i = 0; i < height; ++i, j += step) {
- const __m128i v = _mm_loadl_epi64((__m128i const *)(output + i * stride));
- const __m128i u = lowbd_get_recon_8x8_sse2(v, in[j]);
- _mm_storel_epi64((__m128i *)(output + i * stride), u);
- }
-}
-
-// 1D functions process process 8 pixels at one time.
-static const transform_1d_ssse3
- lowbd_txfm_all_1d_w8_arr[TX_SIZES][ITX_TYPES_1D] = {
- { idct4_new_sse2, iadst4_new_sse2, iidentity4_new_ssse3 },
- { idct8_new_sse2, iadst8_new_sse2, iidentity8_new_sse2 },
- { idct16_new_sse2, iadst16_new_sse2, iidentity16_new_ssse3 },
- { idct32_new_sse2, NULL, NULL },
- { idct64_low32_new_ssse3, NULL, NULL },
- };
-
-// functions for blocks with eob at DC and within
-// topleft 8x8, 16x16, 32x32 corner
-static const transform_1d_ssse3
- lowbd_txfm_all_1d_zeros_w8_arr[TX_SIZES][ITX_TYPES_1D][4] = {
- {
- { idct4_new_sse2, idct4_new_sse2, NULL, NULL },
- { iadst4_new_sse2, iadst4_new_sse2, NULL, NULL },
- { iidentity4_new_ssse3, iidentity4_new_ssse3, NULL, NULL },
- },
- { { idct8_low1_new_ssse3, idct8_new_sse2, NULL, NULL },
- { iadst8_low1_new_ssse3, iadst8_new_sse2, NULL, NULL },
- { iidentity8_new_sse2, iidentity8_new_sse2, NULL, NULL } },
- {
- { idct16_low1_new_ssse3, idct16_low8_new_ssse3, idct16_new_sse2,
- NULL },
- { iadst16_low1_new_ssse3, iadst16_low8_new_ssse3, iadst16_new_sse2,
- NULL },
- { NULL, NULL, NULL, NULL },
- },
- { { idct32_low1_new_ssse3, idct32_low8_new_ssse3, idct32_low16_new_ssse3,
- idct32_new_sse2 },
- { NULL, NULL, NULL, NULL },
- { NULL, NULL, NULL, NULL } },
- { { idct64_low1_new_ssse3, idct64_low8_new_ssse3, idct64_low16_new_ssse3,
- idct64_low32_new_ssse3 },
- { NULL, NULL, NULL, NULL },
- { NULL, NULL, NULL, NULL } }
- };
-
-// 1D functions process process 4 pixels at one time.
-// used in 4x4, 4x8, 4x16, 8x4, 16x4
-static const transform_1d_ssse3
- lowbd_txfm_all_1d_w4_arr[TX_SIZES][ITX_TYPES_1D] = {
- { idct4_w4_new_sse2, iadst4_w4_new_sse2, iidentity4_new_ssse3 },
- { idct8_w4_new_sse2, iadst8_w4_new_sse2, iidentity8_new_sse2 },
- { idct16_w4_new_sse2, iadst16_w4_new_sse2, iidentity16_new_ssse3 },
- { NULL, NULL, NULL },
- { NULL, NULL, NULL },
- };
-
-static INLINE void iidentity_row_8xn_ssse3(__m128i *out, const int32_t *input,
- int stride, int shift, int height,
- int txw_idx, int rect_type) {
- const int32_t *input_row = input;
- const __m128i scale = _mm_set1_epi16(NewSqrt2list[txw_idx]);
- const __m128i rounding = _mm_set1_epi16((1 << (NewSqrt2Bits - 1)) +
- (1 << (NewSqrt2Bits - shift - 1)));
- const __m128i one = _mm_set1_epi16(1);
- const __m128i scale_rounding = _mm_unpacklo_epi16(scale, rounding);
- if (rect_type != 1 && rect_type != -1) {
- for (int i = 0; i < height; ++i) {
- const __m128i src = load_32bit_to_16bit(input_row);
- input_row += stride;
- __m128i lo = _mm_unpacklo_epi16(src, one);
- __m128i hi = _mm_unpackhi_epi16(src, one);
- lo = _mm_madd_epi16(lo, scale_rounding);
- hi = _mm_madd_epi16(hi, scale_rounding);
- lo = _mm_srai_epi32(lo, NewSqrt2Bits - shift);
- hi = _mm_srai_epi32(hi, NewSqrt2Bits - shift);
- out[i] = _mm_packs_epi32(lo, hi);
- }
- } else {
- const __m128i rect_scale =
- _mm_set1_epi16(NewInvSqrt2 << (15 - NewSqrt2Bits));
- for (int i = 0; i < height; ++i) {
- __m128i src = load_32bit_to_16bit(input_row);
- src = _mm_mulhrs_epi16(src, rect_scale);
- input_row += stride;
- __m128i lo = _mm_unpacklo_epi16(src, one);
- __m128i hi = _mm_unpackhi_epi16(src, one);
- lo = _mm_madd_epi16(lo, scale_rounding);
- hi = _mm_madd_epi16(hi, scale_rounding);
- lo = _mm_srai_epi32(lo, NewSqrt2Bits - shift);
- hi = _mm_srai_epi32(hi, NewSqrt2Bits - shift);
- out[i] = _mm_packs_epi32(lo, hi);
- }
- }
-}
-
-static INLINE void iidentity_col_8xn_ssse3(uint8_t *output, int stride,
- __m128i *buf, int shift, int height,
- int txh_idx) {
- const __m128i scale = _mm_set1_epi16(NewSqrt2list[txh_idx]);
- const __m128i scale_rounding = _mm_set1_epi16(1 << (NewSqrt2Bits - 1));
- const __m128i shift_rounding = _mm_set1_epi32(1 << (-shift - 1));
- const __m128i one = _mm_set1_epi16(1);
- const __m128i scale_coeff = _mm_unpacklo_epi16(scale, scale_rounding);
- const __m128i zero = _mm_setzero_si128();
- for (int h = 0; h < height; ++h) {
- __m128i lo = _mm_unpacklo_epi16(buf[h], one);
- __m128i hi = _mm_unpackhi_epi16(buf[h], one);
- lo = _mm_madd_epi16(lo, scale_coeff);
- hi = _mm_madd_epi16(hi, scale_coeff);
- lo = _mm_srai_epi32(lo, NewSqrt2Bits);
- hi = _mm_srai_epi32(hi, NewSqrt2Bits);
- lo = _mm_add_epi32(lo, shift_rounding);
- hi = _mm_add_epi32(hi, shift_rounding);
- lo = _mm_srai_epi32(lo, -shift);
- hi = _mm_srai_epi32(hi, -shift);
- __m128i x = _mm_packs_epi32(lo, hi);
-
- const __m128i pred = _mm_loadl_epi64((__m128i const *)(output));
- x = _mm_adds_epi16(x, _mm_unpacklo_epi8(pred, zero));
- const __m128i u = _mm_packus_epi16(x, x);
- _mm_storel_epi64((__m128i *)(output), u);
- output += stride;
- }
-}
-
-static INLINE void lowbd_inv_txfm2d_add_idtx_ssse3(const int32_t *input,
- uint8_t *output, int stride,
- TX_SIZE tx_size) {
- const int8_t *shift = inv_txfm_shift_ls[tx_size];
- const int txw_idx = get_txw_idx(tx_size);
- const int txh_idx = get_txh_idx(tx_size);
- const int txfm_size_col = tx_size_wide[tx_size];
- const int txfm_size_row = tx_size_high[tx_size];
- const int input_stride = AOMMIN(32, txfm_size_col);
- const int row_max = AOMMIN(32, txfm_size_row);
- const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
- __m128i buf[32];
-
- for (int i = 0; i < (input_stride >> 3); ++i) {
- iidentity_row_8xn_ssse3(buf, input + 8 * i, input_stride, shift[0], row_max,
- txw_idx, rect_type);
- iidentity_col_8xn_ssse3(output + 8 * i, stride, buf, shift[1], row_max,
- txh_idx);
- }
-}
-
-void lowbd_inv_txfm2d_add_4x4_ssse3(const int32_t *input, uint8_t *output,
- int stride, TX_TYPE tx_type,
- TX_SIZE tx_size_, int eob) {
- (void)tx_size_;
- (void)eob;
- __m128i buf[4];
- const TX_SIZE tx_size = TX_4X4;
- const int8_t *shift = inv_txfm_shift_ls[tx_size];
- const int txw_idx = get_txw_idx(tx_size);
- const int txh_idx = get_txh_idx(tx_size);
- const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
- const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
- const int txfm_size_col = tx_size_wide[tx_size];
- const int txfm_size_row = tx_size_high[tx_size];
-
- const transform_1d_ssse3 row_txfm =
- lowbd_txfm_all_1d_w4_arr[txw_idx][hitx_1d_tab[tx_type]];
- const transform_1d_ssse3 col_txfm =
- lowbd_txfm_all_1d_w4_arr[txh_idx][vitx_1d_tab[tx_type]];
-
- int ud_flip, lr_flip;
- get_flip_cfg(tx_type, &ud_flip, &lr_flip);
- load_buffer_32bit_to_16bit_w4(input, txfm_size_col, buf, txfm_size_row);
- transpose_16bit_4x4(buf, buf);
- row_txfm(buf, buf, cos_bit_row);
- if (lr_flip) {
- __m128i temp[4];
- flip_buf_sse2(buf, temp, txfm_size_col);
- transpose_16bit_4x4(temp, buf);
- } else {
- transpose_16bit_4x4(buf, buf);
- }
- col_txfm(buf, buf, cos_bit_col);
- round_shift_16bit_ssse3(buf, txfm_size_row, shift[1]);
- lowbd_write_buffer_4xn_sse2(buf, output, stride, ud_flip, txfm_size_row);
-}
-
-static INLINE __m128i lowbd_get_recon_16x16_sse2(const __m128i pred,
- __m128i res0, __m128i res1) {
- const __m128i zero = _mm_setzero_si128();
- __m128i x0 = _mm_unpacklo_epi8(pred, zero);
- __m128i x1 = _mm_unpackhi_epi8(pred, zero);
- x0 = _mm_adds_epi16(res0, x0);
- x1 = _mm_adds_epi16(res1, x1);
- return _mm_packus_epi16(x0, x1);
-}
-
-static INLINE void lowbd_write_buffer_16xn_sse2(__m128i *in, uint8_t *output,
- int stride, int flipud,
- int height) {
- int j = flipud ? (height - 1) : 0;
- const int step = flipud ? -1 : 1;
- for (int i = 0; i < height; ++i, j += step) {
- __m128i v = _mm_loadu_si128((__m128i const *)(output + i * stride));
- __m128i u = lowbd_get_recon_16x16_sse2(v, in[j], in[j + height]);
- _mm_storeu_si128((__m128i *)(output + i * stride), u);
- }
-}
-
-static INLINE void round_shift_ssse3(const __m128i *input, __m128i *output,
- int size) {
- const __m128i scale = _mm_set1_epi16(NewInvSqrt2 * 8);
- for (int i = 0; i < size; ++i) {
- output[i] = _mm_mulhrs_epi16(input[i], scale);
- }
-}
-
-static INLINE void lowbd_inv_txfm2d_add_no_identity_ssse3(
- const int32_t *input, uint8_t *output, int stride, TX_TYPE tx_type,
- TX_SIZE tx_size, int eob) {
- __m128i buf1[64 * 8];
- int eobx, eoby;
- get_eobx_eoby_scan_default(&eobx, &eoby, tx_size, eob);
- const int8_t *shift = inv_txfm_shift_ls[tx_size];
- const int txw_idx = get_txw_idx(tx_size);
- const int txh_idx = get_txh_idx(tx_size);
- const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
- const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
- const int txfm_size_col = tx_size_wide[tx_size];
- const int txfm_size_row = tx_size_high[tx_size];
- const int buf_size_w_div8 = txfm_size_col >> 3;
- const int buf_size_nonzero_w_div8 = (eobx + 8) >> 3;
- const int buf_size_nonzero_h_div8 = (eoby + 8) >> 3;
- const int input_stride = AOMMIN(32, txfm_size_col);
- const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
-
- const int fun_idx_x = lowbd_txfm_all_1d_zeros_idx[eobx];
- const int fun_idx_y = lowbd_txfm_all_1d_zeros_idx[eoby];
- const transform_1d_ssse3 row_txfm =
- lowbd_txfm_all_1d_zeros_w8_arr[txw_idx][hitx_1d_tab[tx_type]][fun_idx_x];
- const transform_1d_ssse3 col_txfm =
- lowbd_txfm_all_1d_zeros_w8_arr[txh_idx][vitx_1d_tab[tx_type]][fun_idx_y];
-
- assert(col_txfm != NULL);
- assert(row_txfm != NULL);
- int ud_flip, lr_flip;
- get_flip_cfg(tx_type, &ud_flip, &lr_flip);
- for (int i = 0; i < buf_size_nonzero_h_div8; i++) {
- __m128i buf0[64];
- const int32_t *input_row = input + i * input_stride * 8;
- for (int j = 0; j < buf_size_nonzero_w_div8; ++j) {
- __m128i *buf0_cur = buf0 + j * 8;
- load_buffer_32bit_to_16bit(input_row + j * 8, input_stride, buf0_cur, 8);
- transpose_16bit_8x8(buf0_cur, buf0_cur);
- }
- if (rect_type == 1 || rect_type == -1) {
- round_shift_ssse3(buf0, buf0, input_stride); // rect special code
- }
- row_txfm(buf0, buf0, cos_bit_row);
- round_shift_16bit_ssse3(buf0, txfm_size_col, shift[0]);
- __m128i *_buf1 = buf1 + i * 8;
- if (lr_flip) {
- for (int j = 0; j < buf_size_w_div8; ++j) {
- __m128i temp[8];
- flip_buf_sse2(buf0 + 8 * j, temp, 8);
- transpose_16bit_8x8(temp,
- _buf1 + txfm_size_row * (buf_size_w_div8 - 1 - j));
- }
- } else {
- for (int j = 0; j < buf_size_w_div8; ++j) {
- transpose_16bit_8x8(buf0 + 8 * j, _buf1 + txfm_size_row * j);
- }
- }
- }
- for (int i = 0; i < buf_size_w_div8; i++) {
- col_txfm(buf1 + i * txfm_size_row, buf1 + i * txfm_size_row, cos_bit_col);
- round_shift_16bit_ssse3(buf1 + i * txfm_size_row, txfm_size_row, shift[1]);
- }
-
- if (txfm_size_col >= 16) {
- for (int i = 0; i < (txfm_size_col >> 4); i++) {
- lowbd_write_buffer_16xn_sse2(buf1 + i * txfm_size_row * 2,
- output + 16 * i, stride, ud_flip,
- txfm_size_row);
- }
- } else if (txfm_size_col == 8) {
- lowbd_write_buffer_8xn_sse2(buf1, output, stride, ud_flip, txfm_size_row);
- }
-}
-
-static INLINE void lowbd_inv_txfm2d_add_h_identity_ssse3(
- const int32_t *input, uint8_t *output, int stride, TX_TYPE tx_type,
- TX_SIZE tx_size, int eob) {
- const int8_t *shift = inv_txfm_shift_ls[tx_size];
- int eobx, eoby;
- get_eobx_eoby_scan_h_identity(&eobx, &eoby, tx_size, eob);
- const int txw_idx = get_txw_idx(tx_size);
- const int txh_idx = get_txh_idx(tx_size);
- const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
- const int txfm_size_col = tx_size_wide[tx_size];
- const int txfm_size_row = tx_size_high[tx_size];
- const int buf_size_w_div8 = (eobx + 8) >> 3;
- const int input_stride = AOMMIN(32, txfm_size_col);
- const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
-
- const int fun_idx = lowbd_txfm_all_1d_zeros_idx[eoby];
- assert(fun_idx < 5);
- const transform_1d_ssse3 col_txfm =
- lowbd_txfm_all_1d_zeros_w8_arr[txh_idx][vitx_1d_tab[tx_type]][fun_idx];
-
- assert(col_txfm != NULL);
-
- int ud_flip, lr_flip;
- get_flip_cfg(tx_type, &ud_flip, &lr_flip);
- for (int i = 0; i < buf_size_w_div8; i++) {
- __m128i buf0[64];
- iidentity_row_8xn_ssse3(buf0, input + 8 * i, input_stride, shift[0],
- eoby + 1, txw_idx, rect_type);
- col_txfm(buf0, buf0, cos_bit_col);
- __m128i mshift = _mm_set1_epi16(1 << (15 + shift[1]));
- int k = ud_flip ? (txfm_size_row - 1) : 0;
- const int step = ud_flip ? -1 : 1;
- uint8_t *out = output + 8 * i;
- for (int j = 0; j < txfm_size_row; ++j, k += step) {
- const __m128i v = _mm_loadl_epi64((__m128i const *)(out));
- __m128i res = _mm_mulhrs_epi16(buf0[k], mshift);
- const __m128i u = lowbd_get_recon_8x8_sse2(v, res);
- _mm_storel_epi64((__m128i *)(out), u);
- out += stride;
- }
- }
-}
-
-static INLINE void lowbd_inv_txfm2d_add_v_identity_ssse3(
- const int32_t *input, uint8_t *output, int stride, TX_TYPE tx_type,
- TX_SIZE tx_size, int eob) {
- __m128i buf1[64];
- int eobx, eoby;
- get_eobx_eoby_scan_v_identity(&eobx, &eoby, tx_size, eob);
- const int8_t *shift = inv_txfm_shift_ls[tx_size];
- const int txw_idx = get_txw_idx(tx_size);
- const int txh_idx = get_txh_idx(tx_size);
- const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
- const int txfm_size_col = tx_size_wide[tx_size];
- const int txfm_size_row = tx_size_high[tx_size];
- const int buf_size_w_div8 = txfm_size_col >> 3;
- const int buf_size_h_div8 = (eoby + 8) >> 3;
- const int input_stride = AOMMIN(32, txfm_size_col);
- const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
-
- const int fun_idx = lowbd_txfm_all_1d_zeros_idx[eobx];
- const transform_1d_ssse3 row_txfm =
- lowbd_txfm_all_1d_zeros_w8_arr[txw_idx][hitx_1d_tab[tx_type]][fun_idx];
-
- assert(row_txfm != NULL);
- int ud_flip, lr_flip;
- get_flip_cfg(tx_type, &ud_flip, &lr_flip);
- for (int i = 0; i < buf_size_h_div8; i++) {
- __m128i buf0[64];
- const int32_t *input_row = input + i * input_stride * 8;
- for (int j = 0; j < AOMMIN(4, buf_size_w_div8); ++j) {
- __m128i *buf0_cur = buf0 + j * 8;
- load_buffer_32bit_to_16bit(input_row + j * 8, input_stride, buf0_cur, 8);
- transpose_16bit_8x8(buf0_cur, buf0_cur);
- }
- if (rect_type == 1 || rect_type == -1) {
- round_shift_ssse3(buf0, buf0, input_stride); // rect special code
- }
- row_txfm(buf0, buf0, cos_bit_row);
- round_shift_16bit_ssse3(buf0, txfm_size_col, shift[0]);
- __m128i *_buf1 = buf1;
- if (lr_flip) {
- for (int j = 0; j < buf_size_w_div8; ++j) {
- __m128i temp[8];
- flip_buf_sse2(buf0 + 8 * j, temp, 8);
- transpose_16bit_8x8(temp, _buf1 + 8 * (buf_size_w_div8 - 1 - j));
- }
- } else {
- for (int j = 0; j < buf_size_w_div8; ++j) {
- transpose_16bit_8x8(buf0 + 8 * j, _buf1 + 8 * j);
- }
- }
-
- for (int j = 0; j < buf_size_w_div8; ++j) {
- iidentity_col_8xn_ssse3(output + i * 8 * stride + j * 8, stride,
- buf1 + j * 8, shift[1], 8, txh_idx);
- }
- }
-}
-
-// for 32x32,32x64,64x32,64x64,32x8,8x32,16x32,32x16,64x16,16x64
-static INLINE void lowbd_inv_txfm2d_add_universe_ssse3(
- const int32_t *input, uint8_t *output, int stride, TX_TYPE tx_type,
- TX_SIZE tx_size, int eob) {
- switch (tx_type) {
- case DCT_DCT:
- lowbd_inv_txfm2d_add_no_identity_ssse3(input, output, stride, tx_type,
- tx_size, eob);
- break;
- case IDTX:
- lowbd_inv_txfm2d_add_idtx_ssse3(input, output, stride, tx_size);
- break;
- case V_DCT:
- case V_ADST:
- case V_FLIPADST:
- lowbd_inv_txfm2d_add_h_identity_ssse3(input, output, stride, tx_type,
- tx_size, eob);
- break;
- case H_DCT:
- case H_ADST:
- case H_FLIPADST:
- lowbd_inv_txfm2d_add_v_identity_ssse3(input, output, stride, tx_type,
- tx_size, eob);
- break;
- default:
- lowbd_inv_txfm2d_add_no_identity_ssse3(input, output, stride, tx_type,
- tx_size, eob);
- break;
- }
-}
-
-void lowbd_inv_txfm2d_add_4x8_ssse3(const int32_t *input, uint8_t *output,
- int stride, TX_TYPE tx_type,
- TX_SIZE tx_size_, int eob) {
- (void)tx_size_;
- (void)eob;
- __m128i buf[8];
- const TX_SIZE tx_size = TX_4X8;
- const int8_t *shift = inv_txfm_shift_ls[tx_size];
- const int txw_idx = get_txw_idx(tx_size);
- const int txh_idx = get_txh_idx(tx_size);
- const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
- const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
- const int txfm_size_col = tx_size_wide[tx_size];
- const int txfm_size_row = tx_size_high[tx_size];
-
- const transform_1d_ssse3 row_txfm =
- lowbd_txfm_all_1d_w8_arr[txw_idx][hitx_1d_tab[tx_type]];
- const transform_1d_ssse3 col_txfm =
- lowbd_txfm_all_1d_w4_arr[txh_idx][vitx_1d_tab[tx_type]];
-
- int ud_flip, lr_flip;
- get_flip_cfg(tx_type, &ud_flip, &lr_flip);
- load_buffer_32bit_to_16bit_w4(input, txfm_size_col, buf, txfm_size_row);
- transpose_16bit_4x8(buf, buf);
- round_shift_ssse3(buf, buf, txfm_size_col); // rect special code
- row_txfm(buf, buf, cos_bit_row);
- // round_shift_16bit_ssse3(buf, txfm_size_col, shift[0]);// shift[0] is 0
- if (lr_flip) {
- __m128i temp[4];
- flip_buf_sse2(buf, temp, txfm_size_col);
- transpose_16bit_8x4(temp, buf);
- } else {
- transpose_16bit_8x4(buf, buf);
- }
- col_txfm(buf, buf, cos_bit_col);
- round_shift_16bit_ssse3(buf, txfm_size_row, shift[1]);
- lowbd_write_buffer_4xn_sse2(buf, output, stride, ud_flip, txfm_size_row);
-}
-
-void lowbd_inv_txfm2d_add_8x4_ssse3(const int32_t *input, uint8_t *output,
- int stride, TX_TYPE tx_type,
- TX_SIZE tx_size_, int eob) {
- (void)tx_size_;
- (void)eob;
- __m128i buf[8];
- const TX_SIZE tx_size = TX_8X4;
- const int8_t *shift = inv_txfm_shift_ls[tx_size];
- const int txw_idx = get_txw_idx(tx_size);
- const int txh_idx = get_txh_idx(tx_size);
- const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
- const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
- const int txfm_size_col = tx_size_wide[tx_size];
- const int txfm_size_row = tx_size_high[tx_size];
-
- const transform_1d_ssse3 row_txfm =
- lowbd_txfm_all_1d_w4_arr[txw_idx][hitx_1d_tab[tx_type]];
- const transform_1d_ssse3 col_txfm =
- lowbd_txfm_all_1d_w8_arr[txh_idx][vitx_1d_tab[tx_type]];
-
- int ud_flip, lr_flip;
- get_flip_cfg(tx_type, &ud_flip, &lr_flip);
- load_buffer_32bit_to_16bit(input, txfm_size_col, buf, txfm_size_row);
- transpose_16bit_8x4(buf, buf);
- round_shift_ssse3(buf, buf, txfm_size_col); // rect special code
- row_txfm(buf, buf, cos_bit_row);
- // round_shift_16bit_ssse3(buf, txfm_size_col, shift[0]); // shift[0] is 0
- if (lr_flip) {
- __m128i temp[8];
- flip_buf_sse2(buf, temp, txfm_size_col);
- transpose_16bit_4x8(temp, buf);
- } else {
- transpose_16bit_4x8(buf, buf);
- }
- col_txfm(buf, buf, cos_bit_col);
- round_shift_16bit_ssse3(buf, txfm_size_row, shift[1]);
- lowbd_write_buffer_8xn_sse2(buf, output, stride, ud_flip, txfm_size_row);
-}
-
-void lowbd_inv_txfm2d_add_4x16_ssse3(const int32_t *input, uint8_t *output,
- int stride, TX_TYPE tx_type,
- TX_SIZE tx_size_, int eob) {
- (void)tx_size_;
- (void)eob;
- __m128i buf[16];
- const TX_SIZE tx_size = TX_4X16;
- const int8_t *shift = inv_txfm_shift_ls[tx_size];
- const int txw_idx = get_txw_idx(tx_size);
- const int txh_idx = get_txh_idx(tx_size);
- const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
- const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
- const int txfm_size_col = tx_size_wide[tx_size];
- const int txfm_size_row = tx_size_high[tx_size];
-
- const transform_1d_ssse3 row_txfm =
- lowbd_txfm_all_1d_w8_arr[txw_idx][hitx_1d_tab[tx_type]];
- const transform_1d_ssse3 col_txfm =
- lowbd_txfm_all_1d_w4_arr[txh_idx][vitx_1d_tab[tx_type]];
-
- int ud_flip, lr_flip;
- get_flip_cfg(tx_type, &ud_flip, &lr_flip);
-
- const int row_one_loop = 8;
- for (int i = 0; i < 2; ++i) {
- const int32_t *input_cur = input + i * txfm_size_col * row_one_loop;
- __m128i *buf_cur = buf + i * row_one_loop;
- load_buffer_32bit_to_16bit_w4(input_cur, txfm_size_col, buf_cur,
- row_one_loop);
- transpose_16bit_4x8(buf_cur, buf_cur);
- row_txfm(buf_cur, buf_cur, cos_bit_row);
- round_shift_16bit_ssse3(buf_cur, row_one_loop, shift[0]);
- if (lr_flip) {
- __m128i temp[8];
- flip_buf_sse2(buf_cur, temp, txfm_size_col);
- transpose_16bit_8x4(temp, buf_cur);
- } else {
- transpose_16bit_8x4(buf_cur, buf_cur);
- }
- }
- col_txfm(buf, buf, cos_bit_col);
- round_shift_16bit_ssse3(buf, txfm_size_row, shift[1]);
- lowbd_write_buffer_4xn_sse2(buf, output, stride, ud_flip, txfm_size_row);
-}
-
-void lowbd_inv_txfm2d_add_16x4_ssse3(const int32_t *input, uint8_t *output,
- int stride, TX_TYPE tx_type,
- TX_SIZE tx_size_, int eob) {
- (void)tx_size_;
- (void)eob;
- __m128i buf[16];
- const TX_SIZE tx_size = TX_16X4;
- const int8_t *shift = inv_txfm_shift_ls[tx_size];
- const int txw_idx = get_txw_idx(tx_size);
- const int txh_idx = get_txh_idx(tx_size);
- const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
- const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
- const int txfm_size_col = tx_size_wide[tx_size];
- const int txfm_size_row = tx_size_high[tx_size];
- const int buf_size_w_div8 = txfm_size_col >> 3;
-
- const transform_1d_ssse3 row_txfm =
- lowbd_txfm_all_1d_w4_arr[txw_idx][hitx_1d_tab[tx_type]];
- const transform_1d_ssse3 col_txfm =
- lowbd_txfm_all_1d_w8_arr[txh_idx][vitx_1d_tab[tx_type]];
-
- int ud_flip, lr_flip;
- get_flip_cfg(tx_type, &ud_flip, &lr_flip);
- const int row_one_loop = 8;
- for (int i = 0; i < buf_size_w_div8; ++i) {
- const int32_t *input_cur = input + i * row_one_loop;
- __m128i *buf_cur = buf + i * row_one_loop;
- load_buffer_32bit_to_16bit(input_cur, txfm_size_col, buf_cur,
- txfm_size_row);
- transpose_16bit_8x4(buf_cur, buf_cur);
- }
- row_txfm(buf, buf, cos_bit_row);
- round_shift_16bit_ssse3(buf, txfm_size_col, shift[0]);
- if (lr_flip) {
- __m128i temp[16];
- flip_buf_sse2(buf, temp, 16);
- transpose_16bit_4x8(temp, buf);
- transpose_16bit_4x8(temp + 8, buf + 8);
- } else {
- transpose_16bit_4x8(buf, buf);
- transpose_16bit_4x8(buf + row_one_loop, buf + row_one_loop);
- }
- for (int i = 0; i < buf_size_w_div8; i++) {
- col_txfm(buf + i * row_one_loop, buf + i * row_one_loop, cos_bit_col);
- round_shift_16bit_ssse3(buf + i * row_one_loop, txfm_size_row, shift[1]);
- }
- lowbd_write_buffer_8xn_sse2(buf, output, stride, ud_flip, 4);
- lowbd_write_buffer_8xn_sse2(buf + 8, output + 8, stride, ud_flip, 4);
-}
-
-void av1_lowbd_inv_txfm2d_add_ssse3(const int32_t *input, uint8_t *output,
- int stride, TX_TYPE tx_type,
- TX_SIZE tx_size, int eob) {
- switch (tx_size) {
- case TX_4X4:
- lowbd_inv_txfm2d_add_4x4_ssse3(input, output, stride, tx_type, tx_size,
- eob);
- break;
- case TX_4X8:
- lowbd_inv_txfm2d_add_4x8_ssse3(input, output, stride, tx_type, tx_size,
- eob);
- break;
- case TX_8X4:
- lowbd_inv_txfm2d_add_8x4_ssse3(input, output, stride, tx_type, tx_size,
- eob);
- break;
- case TX_4X16:
- lowbd_inv_txfm2d_add_4x16_ssse3(input, output, stride, tx_type, tx_size,
- eob);
- break;
- case TX_16X4:
- lowbd_inv_txfm2d_add_16x4_ssse3(input, output, stride, tx_type, tx_size,
- eob);
- break;
- default:
- lowbd_inv_txfm2d_add_universe_ssse3(input, output, stride, tx_type,
- tx_size, eob);
- break;
- }
-}
-void av1_inv_txfm_add_ssse3(const tran_low_t *dqcoeff, uint8_t *dst, int stride,
- const TxfmParam *txfm_param) {
- const TX_TYPE tx_type = txfm_param->tx_type;
- if (!txfm_param->lossless) {
- av1_lowbd_inv_txfm2d_add_ssse3(dqcoeff, dst, stride, tx_type,
- txfm_param->tx_size, txfm_param->eob);
- } else {
- av1_inv_txfm_add_c(dqcoeff, dst, stride, txfm_param);
- }
-}
diff --git a/third_party/aom/av1/common/x86/av1_inv_txfm_ssse3.h b/third_party/aom/av1/common/x86/av1_inv_txfm_ssse3.h
deleted file mode 100644
index 66bd339d1..000000000
--- a/third_party/aom/av1/common/x86/av1_inv_txfm_ssse3.h
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#ifndef AOM_AV1_COMMON_X86_AV1_INV_TXFM_SSSE3_H_
-#define AOM_AV1_COMMON_X86_AV1_INV_TXFM_SSSE3_H_
-
-#include <emmintrin.h> // SSE2
-#include <tmmintrin.h> // SSSE3
-
-#include "config/aom_config.h"
-#include "config/av1_rtcd.h"
-
-#include "aom/aom_integer.h"
-#include "aom_dsp/x86/transpose_sse2.h"
-#include "aom_dsp/x86/txfm_common_sse2.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define btf_16_ssse3(w0, w1, in, out0, out1) \
- do { \
- const __m128i _w0 = _mm_set1_epi16(w0 * 8); \
- const __m128i _w1 = _mm_set1_epi16(w1 * 8); \
- const __m128i _in = in; \
- out0 = _mm_mulhrs_epi16(_in, _w0); \
- out1 = _mm_mulhrs_epi16(_in, _w1); \
- } while (0)
-
-#define btf_16_adds_subs_sse2(in0, in1) \
- do { \
- const __m128i _in0 = in0; \
- const __m128i _in1 = in1; \
- in0 = _mm_adds_epi16(_in0, _in1); \
- in1 = _mm_subs_epi16(_in0, _in1); \
- } while (0)
-
-#define btf_16_subs_adds_sse2(in0, in1) \
- do { \
- const __m128i _in0 = in0; \
- const __m128i _in1 = in1; \
- in1 = _mm_subs_epi16(_in0, _in1); \
- in0 = _mm_adds_epi16(_in0, _in1); \
- } while (0)
-
-#define btf_16_adds_subs_out_sse2(out0, out1, in0, in1) \
- do { \
- const __m128i _in0 = in0; \
- const __m128i _in1 = in1; \
- out0 = _mm_adds_epi16(_in0, _in1); \
- out1 = _mm_subs_epi16(_in0, _in1); \
- } while (0)
-
-static INLINE void round_shift_16bit_ssse3(__m128i *in, int size, int bit) {
- if (bit < 0) {
- const __m128i scale = _mm_set1_epi16(1 << (15 + bit));
- for (int i = 0; i < size; ++i) {
- in[i] = _mm_mulhrs_epi16(in[i], scale);
- }
- } else if (bit > 0) {
- for (int i = 0; i < size; ++i) {
- in[i] = _mm_slli_epi16(in[i], bit);
- }
- }
-}
-
-// 1D itx types
-typedef enum ATTRIBUTE_PACKED {
- IDCT_1D,
- IADST_1D,
- IFLIPADST_1D = IADST_1D,
- IIDENTITY_1D,
- ITX_TYPES_1D,
-} ITX_TYPE_1D;
-
-static const ITX_TYPE_1D vitx_1d_tab[TX_TYPES] = {
- IDCT_1D, IADST_1D, IDCT_1D, IADST_1D,
- IFLIPADST_1D, IDCT_1D, IFLIPADST_1D, IADST_1D,
- IFLIPADST_1D, IIDENTITY_1D, IDCT_1D, IIDENTITY_1D,
- IADST_1D, IIDENTITY_1D, IFLIPADST_1D, IIDENTITY_1D,
-};
-
-static const ITX_TYPE_1D hitx_1d_tab[TX_TYPES] = {
- IDCT_1D, IDCT_1D, IADST_1D, IADST_1D,
- IDCT_1D, IFLIPADST_1D, IFLIPADST_1D, IFLIPADST_1D,
- IADST_1D, IIDENTITY_1D, IIDENTITY_1D, IDCT_1D,
- IIDENTITY_1D, IADST_1D, IIDENTITY_1D, IFLIPADST_1D,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_8x8_default[8]) = {
- 0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0707,
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- av1_eob_to_eobxy_16x16_default[16]) = {
- 0x0707, 0x0707, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f,
- 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f,
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- av1_eob_to_eobxy_32x32_default[32]) = {
- 0x0707, 0x0f0f, 0x0f0f, 0x0f0f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f,
- 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f,
- 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f,
- 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_8x16_default[16]) = {
- 0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0f07, 0x0f07, 0x0f07,
- 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_16x8_default[8]) = {
- 0x0707, 0x0707, 0x070f, 0x070f, 0x070f, 0x070f, 0x070f, 0x070f,
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- av1_eob_to_eobxy_16x32_default[32]) = {
- 0x0707, 0x0707, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f,
- 0x0f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f,
- 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f,
- 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f,
-};
-
-DECLARE_ALIGNED(16, static const int16_t,
- av1_eob_to_eobxy_32x16_default[16]) = {
- 0x0707, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f,
- 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_8x32_default[32]) = {
- 0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0f07, 0x0f07, 0x0f07,
- 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x1f07, 0x1f07, 0x1f07,
- 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07,
- 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07,
-};
-
-DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_32x8_default[8]) = {
- 0x0707, 0x070f, 0x070f, 0x071f, 0x071f, 0x071f, 0x071f, 0x071f,
-};
-
-DECLARE_ALIGNED(16, static const int16_t *,
- av1_eob_to_eobxy_default[TX_SIZES_ALL]) = {
- NULL,
- av1_eob_to_eobxy_8x8_default,
- av1_eob_to_eobxy_16x16_default,
- av1_eob_to_eobxy_32x32_default,
- av1_eob_to_eobxy_32x32_default,
- NULL,
- NULL,
- av1_eob_to_eobxy_8x16_default,
- av1_eob_to_eobxy_16x8_default,
- av1_eob_to_eobxy_16x32_default,
- av1_eob_to_eobxy_32x16_default,
- av1_eob_to_eobxy_32x32_default,
- av1_eob_to_eobxy_32x32_default,
- NULL,
- NULL,
- av1_eob_to_eobxy_8x32_default,
- av1_eob_to_eobxy_32x8_default,
- av1_eob_to_eobxy_16x32_default,
- av1_eob_to_eobxy_32x16_default,
-};
-
-static const int lowbd_txfm_all_1d_zeros_idx[32] = {
- 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-};
-
-// Transform block width in log2 for eob (size of 64 map to 32)
-static const int tx_size_wide_log2_eob[TX_SIZES_ALL] = {
- 2, 3, 4, 5, 5, 2, 3, 3, 4, 4, 5, 5, 5, 2, 4, 3, 5, 4, 5,
-};
-
-static INLINE void get_eobx_eoby_scan_default(int *eobx, int *eoby,
- TX_SIZE tx_size, int eob) {
- if (eob == 1) {
- *eobx = 0;
- *eoby = 0;
- return;
- }
-
- const int tx_w_log2 = tx_size_wide_log2_eob[tx_size];
- const int eob_row = (eob - 1) >> tx_w_log2;
- const int eobxy = av1_eob_to_eobxy_default[tx_size][eob_row];
- *eobx = eobxy & 0xFF;
- *eoby = eobxy >> 8;
-}
-
-static int eob_fill[32] = {
- 0, 7, 7, 7, 7, 7, 7, 7, 15, 15, 15, 15, 15, 15, 15, 15,
- 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
-};
-
-static INLINE void get_eobx_eoby_scan_h_identity(int *eobx, int *eoby,
- TX_SIZE tx_size, int eob) {
- eob -= 1;
- const int txfm_size_col = tx_size_wide[tx_size];
- const int eobx_max = AOMMIN(32, txfm_size_col) - 1;
- *eobx = (eob >= eobx_max) ? eobx_max : eob_fill[eob];
- const int temp_eoby = eob / (eobx_max + 1);
- assert(temp_eoby < 32);
- *eoby = eob_fill[temp_eoby];
-}
-
-static INLINE void get_eobx_eoby_scan_v_identity(int *eobx, int *eoby,
- TX_SIZE tx_size, int eob) {
- eob -= 1;
- const int txfm_size_row = tx_size_high[tx_size];
- const int eoby_max = AOMMIN(32, txfm_size_row) - 1;
- *eobx = eob / (eoby_max + 1);
- *eoby = (eob >= eoby_max) ? eoby_max : eob_fill[eob];
-}
-
-typedef void (*transform_1d_ssse3)(const __m128i *input, __m128i *output,
- int8_t cos_bit);
-
-void av1_lowbd_inv_txfm2d_add_ssse3(const int32_t *input, uint8_t *output,
- int stride, TX_TYPE tx_type,
- TX_SIZE tx_size, int eob);
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_COMMON_X86_AV1_INV_TXFM_SSSE3_H_
diff --git a/third_party/aom/av1/common/x86/av1_txfm_sse2.h b/third_party/aom/av1/common/x86/av1_txfm_sse2.h
deleted file mode 100644
index 77aeb6eb1..000000000
--- a/third_party/aom/av1/common/x86/av1_txfm_sse2.h
+++ /dev/null
@@ -1,317 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#ifndef AOM_AV1_COMMON_X86_AV1_TXFM_SSE2_H_
-#define AOM_AV1_COMMON_X86_AV1_TXFM_SSE2_H_
-
-#include <emmintrin.h> // SSE2
-
-#include "config/aom_config.h"
-#include "config/av1_rtcd.h"
-
-#include "aom/aom_integer.h"
-#include "aom_dsp/x86/transpose_sse2.h"
-#include "aom_dsp/x86/txfm_common_sse2.h"
-#include "av1/common/av1_txfm.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-static INLINE void btf_16_w4_sse2(
- const __m128i *const w0, const __m128i *const w1, const __m128i __rounding,
- const int8_t cos_bit, const __m128i *const in0, const __m128i *const in1,
- __m128i *const out0, __m128i *const out1) {
- const __m128i t0 = _mm_unpacklo_epi16(*in0, *in1);
- const __m128i u0 = _mm_madd_epi16(t0, *w0);
- const __m128i v0 = _mm_madd_epi16(t0, *w1);
- const __m128i a0 = _mm_add_epi32(u0, __rounding);
- const __m128i b0 = _mm_add_epi32(v0, __rounding);
- const __m128i c0 = _mm_srai_epi32(a0, cos_bit);
- const __m128i d0 = _mm_srai_epi32(b0, cos_bit);
-
- *out0 = _mm_packs_epi32(c0, c0);
- *out1 = _mm_packs_epi32(d0, c0);
-}
-
-#define btf_16_4p_sse2(w0, w1, in0, in1, out0, out1) \
- { \
- __m128i t0 = _mm_unpacklo_epi16(in0, in1); \
- __m128i u0 = _mm_madd_epi16(t0, w0); \
- __m128i v0 = _mm_madd_epi16(t0, w1); \
- \
- __m128i a0 = _mm_add_epi32(u0, __rounding); \
- __m128i b0 = _mm_add_epi32(v0, __rounding); \
- \
- __m128i c0 = _mm_srai_epi32(a0, cos_bit); \
- __m128i d0 = _mm_srai_epi32(b0, cos_bit); \
- \
- out0 = _mm_packs_epi32(c0, c0); \
- out1 = _mm_packs_epi32(d0, d0); \
- }
-
-#define btf_16_sse2(w0, w1, in0, in1, out0, out1) \
- { \
- __m128i t0 = _mm_unpacklo_epi16(in0, in1); \
- __m128i t1 = _mm_unpackhi_epi16(in0, in1); \
- __m128i u0 = _mm_madd_epi16(t0, w0); \
- __m128i u1 = _mm_madd_epi16(t1, w0); \
- __m128i v0 = _mm_madd_epi16(t0, w1); \
- __m128i v1 = _mm_madd_epi16(t1, w1); \
- \
- __m128i a0 = _mm_add_epi32(u0, __rounding); \
- __m128i a1 = _mm_add_epi32(u1, __rounding); \
- __m128i b0 = _mm_add_epi32(v0, __rounding); \
- __m128i b1 = _mm_add_epi32(v1, __rounding); \
- \
- __m128i c0 = _mm_srai_epi32(a0, cos_bit); \
- __m128i c1 = _mm_srai_epi32(a1, cos_bit); \
- __m128i d0 = _mm_srai_epi32(b0, cos_bit); \
- __m128i d1 = _mm_srai_epi32(b1, cos_bit); \
- \
- out0 = _mm_packs_epi32(c0, c1); \
- out1 = _mm_packs_epi32(d0, d1); \
- }
-
-static INLINE __m128i load_16bit_to_16bit(const int16_t *a) {
- return _mm_load_si128((const __m128i *)a);
-}
-
-static INLINE __m128i load_32bit_to_16bit(const int32_t *a) {
- const __m128i a_low = _mm_load_si128((const __m128i *)a);
- return _mm_packs_epi32(a_low, *(const __m128i *)(a + 4));
-}
-
-static INLINE __m128i load_32bit_to_16bit_w4(const int32_t *a) {
- const __m128i a_low = _mm_load_si128((const __m128i *)a);
- return _mm_packs_epi32(a_low, a_low);
-}
-
-// Store 4 16 bit values. Sign extend the values.
-static INLINE void store_16bit_to_32bit_w4(const __m128i a, int32_t *const b) {
- const __m128i a_lo = _mm_unpacklo_epi16(a, a);
- const __m128i a_1 = _mm_srai_epi32(a_lo, 16);
- _mm_store_si128((__m128i *)b, a_1);
-}
-
-// Store 8 16 bit values. Sign extend the values.
-static INLINE void store_16bit_to_32bit(__m128i a, int32_t *b) {
- const __m128i a_lo = _mm_unpacklo_epi16(a, a);
- const __m128i a_hi = _mm_unpackhi_epi16(a, a);
- const __m128i a_1 = _mm_srai_epi32(a_lo, 16);
- const __m128i a_2 = _mm_srai_epi32(a_hi, 16);
- _mm_store_si128((__m128i *)b, a_1);
- _mm_store_si128((__m128i *)(b + 4), a_2);
-}
-
-static INLINE __m128i scale_round_sse2(const __m128i a, const int scale) {
- const __m128i scale_rounding = pair_set_epi16(scale, 1 << (NewSqrt2Bits - 1));
- const __m128i b = _mm_madd_epi16(a, scale_rounding);
- return _mm_srai_epi32(b, NewSqrt2Bits);
-}
-
-static INLINE void store_rect_16bit_to_32bit_w4(const __m128i a,
- int32_t *const b) {
- const __m128i one = _mm_set1_epi16(1);
- const __m128i a_lo = _mm_unpacklo_epi16(a, one);
- const __m128i b_lo = scale_round_sse2(a_lo, NewSqrt2);
- _mm_store_si128((__m128i *)b, b_lo);
-}
-
-static INLINE void store_rect_16bit_to_32bit(const __m128i a,
- int32_t *const b) {
- const __m128i one = _mm_set1_epi16(1);
- const __m128i a_lo = _mm_unpacklo_epi16(a, one);
- const __m128i a_hi = _mm_unpackhi_epi16(a, one);
- const __m128i b_lo = scale_round_sse2(a_lo, NewSqrt2);
- const __m128i b_hi = scale_round_sse2(a_hi, NewSqrt2);
- _mm_store_si128((__m128i *)b, b_lo);
- _mm_store_si128((__m128i *)(b + 4), b_hi);
-}
-
-static INLINE void load_buffer_16bit_to_16bit_w4(const int16_t *const in,
- const int stride,
- __m128i *const out,
- const int out_size) {
- for (int i = 0; i < out_size; ++i) {
- out[i] = _mm_loadl_epi64((const __m128i *)(in + i * stride));
- }
-}
-
-static INLINE void load_buffer_16bit_to_16bit_w4_flip(const int16_t *const in,
- const int stride,
- __m128i *const out,
- const int out_size) {
- for (int i = 0; i < out_size; ++i) {
- out[out_size - i - 1] = _mm_loadl_epi64((const __m128i *)(in + i * stride));
- }
-}
-
-static INLINE void load_buffer_16bit_to_16bit(const int16_t *in, int stride,
- __m128i *out, int out_size) {
- for (int i = 0; i < out_size; ++i) {
- out[i] = load_16bit_to_16bit(in + i * stride);
- }
-}
-
-static INLINE void load_buffer_16bit_to_16bit_flip(const int16_t *in,
- int stride, __m128i *out,
- int out_size) {
- for (int i = 0; i < out_size; ++i) {
- out[out_size - i - 1] = load_16bit_to_16bit(in + i * stride);
- }
-}
-
-static INLINE void load_buffer_32bit_to_16bit(const int32_t *in, int stride,
- __m128i *out, int out_size) {
- for (int i = 0; i < out_size; ++i) {
- out[i] = load_32bit_to_16bit(in + i * stride);
- }
-}
-
-static INLINE void load_buffer_32bit_to_16bit_w4(const int32_t *in, int stride,
- __m128i *out, int out_size) {
- for (int i = 0; i < out_size; ++i) {
- out[i] = load_32bit_to_16bit_w4(in + i * stride);
- }
-}
-
-static INLINE void load_buffer_32bit_to_16bit_flip(const int32_t *in,
- int stride, __m128i *out,
- int out_size) {
- for (int i = 0; i < out_size; ++i) {
- out[out_size - i - 1] = load_32bit_to_16bit(in + i * stride);
- }
-}
-
-static INLINE void store_buffer_16bit_to_32bit_w4(const __m128i *const in,
- int32_t *const out,
- const int stride,
- const int out_size) {
- for (int i = 0; i < out_size; ++i) {
- store_16bit_to_32bit_w4(in[i], out + i * stride);
- }
-}
-
-static INLINE void store_buffer_16bit_to_32bit_w8(const __m128i *const in,
- int32_t *const out,
- const int stride,
- const int out_size) {
- for (int i = 0; i < out_size; ++i) {
- store_16bit_to_32bit(in[i], out + i * stride);
- }
-}
-
-static INLINE void store_rect_buffer_16bit_to_32bit_w4(const __m128i *const in,
- int32_t *const out,
- const int stride,
- const int out_size) {
- for (int i = 0; i < out_size; ++i) {
- store_rect_16bit_to_32bit_w4(in[i], out + i * stride);
- }
-}
-
-static INLINE void store_rect_buffer_16bit_to_32bit_w8(const __m128i *const in,
- int32_t *const out,
- const int stride,
- const int out_size) {
- for (int i = 0; i < out_size; ++i) {
- store_rect_16bit_to_32bit(in[i], out + i * stride);
- }
-}
-
-static INLINE void store_buffer_16bit_to_16bit_8x8(const __m128i *in,
- uint16_t *out,
- const int stride) {
- for (int i = 0; i < 8; ++i) {
- _mm_store_si128((__m128i *)(out + i * stride), in[i]);
- }
-}
-
-static INLINE void round_shift_16bit(__m128i *in, int size, int bit) {
- if (bit < 0) {
- bit = -bit;
- __m128i rounding = _mm_set1_epi16(1 << (bit - 1));
- for (int i = 0; i < size; ++i) {
- in[i] = _mm_adds_epi16(in[i], rounding);
- in[i] = _mm_srai_epi16(in[i], bit);
- }
- } else if (bit > 0) {
- for (int i = 0; i < size; ++i) {
- in[i] = _mm_slli_epi16(in[i], bit);
- }
- }
-}
-
-static INLINE void flip_buf_sse2(__m128i *in, __m128i *out, int size) {
- for (int i = 0; i < size; ++i) {
- out[size - i - 1] = in[i];
- }
-}
-
-void av1_lowbd_fwd_txfm2d_4x4_sse2(const int16_t *input, int32_t *output,
- int stride, TX_TYPE tx_type, int bd);
-
-void av1_lowbd_fwd_txfm2d_4x8_sse2(const int16_t *input, int32_t *output,
- int stride, TX_TYPE tx_type, int bd);
-
-void av1_lowbd_fwd_txfm2d_4x16_sse2(const int16_t *input, int32_t *output,
- int stride, TX_TYPE tx_type, int bd);
-
-void av1_lowbd_fwd_txfm2d_8x4_sse2(const int16_t *input, int32_t *output,
- int stride, TX_TYPE tx_type, int bd);
-
-void av1_lowbd_fwd_txfm2d_8x8_sse2(const int16_t *input, int32_t *output,
- int stride, TX_TYPE tx_type, int bd);
-
-void av1_lowbd_fwd_txfm2d_8x16_sse2(const int16_t *input, int32_t *output,
- int stride, TX_TYPE tx_type, int bd);
-
-void av1_lowbd_fwd_txfm2d_8x32_sse2(const int16_t *input, int32_t *output,
- int stride, TX_TYPE tx_type, int bd);
-
-void av1_lowbd_fwd_txfm2d_16x4_sse2(const int16_t *input, int32_t *output,
- int stride, TX_TYPE tx_type, int bd);
-
-void av1_lowbd_fwd_txfm2d_16x8_sse2(const int16_t *input, int32_t *output,
- int stride, TX_TYPE tx_type, int bd);
-
-void av1_lowbd_fwd_txfm2d_16x16_sse2(const int16_t *input, int32_t *output,
- int stride, TX_TYPE tx_type, int bd);
-
-void av1_lowbd_fwd_txfm2d_16x32_sse2(const int16_t *input, int32_t *output,
- int stride, TX_TYPE tx_type, int bd);
-
-void av1_lowbd_fwd_txfm2d_32x8_sse2(const int16_t *input, int32_t *output,
- int stride, TX_TYPE tx_type, int bd);
-
-void av1_lowbd_fwd_txfm2d_32x16_sse2(const int16_t *input, int32_t *output,
- int stride, TX_TYPE tx_type, int bd);
-
-void av1_lowbd_fwd_txfm2d_32x32_sse2(const int16_t *input, int32_t *output,
- int stride, TX_TYPE tx_type, int bd);
-
-void av1_lowbd_fwd_txfm2d_16x64_sse2(const int16_t *input, int32_t *output,
- int stride, TX_TYPE tx_type, int bd);
-
-void av1_lowbd_fwd_txfm2d_64x16_sse2(const int16_t *input, int32_t *output,
- int stride, TX_TYPE tx_type, int bd);
-
-typedef void (*transform_1d_sse2)(const __m128i *input, __m128i *output,
- int8_t cos_bit);
-
-typedef struct {
- transform_1d_sse2 col, row; // vertical and horizontal
-} transform_2d_sse2;
-
-#ifdef __cplusplus
-}
-#endif // __cplusplus
-#endif // AOM_AV1_COMMON_X86_AV1_TXFM_SSE2_H_
diff --git a/third_party/aom/av1/common/x86/av1_txfm_sse4.c b/third_party/aom/av1/common/x86/av1_txfm_sse4.c
deleted file mode 100644
index 90b9879cc..000000000
--- a/third_party/aom/av1/common/x86/av1_txfm_sse4.c
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "av1/common/av1_txfm.h"
-#include "av1/common/x86/av1_txfm_sse4.h"
-
-void av1_round_shift_array_sse4_1(int32_t *arr, int size, int bit) {
- __m128i *const vec = (__m128i *)arr;
- const int vec_size = size >> 2;
- av1_round_shift_array_32_sse4_1(vec, vec, vec_size, bit);
-}
diff --git a/third_party/aom/av1/common/x86/av1_txfm_sse4.h b/third_party/aom/av1/common/x86/av1_txfm_sse4.h
deleted file mode 100644
index 6cad821b1..000000000
--- a/third_party/aom/av1/common/x86/av1_txfm_sse4.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_COMMON_X86_AV1_TXFM_SSE4_H_
-#define AOM_AV1_COMMON_X86_AV1_TXFM_SSE4_H_
-
-#include <smmintrin.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-static INLINE __m128i av1_round_shift_32_sse4_1(__m128i vec, int bit) {
- __m128i tmp, round;
- round = _mm_set1_epi32(1 << (bit - 1));
- tmp = _mm_add_epi32(vec, round);
- return _mm_srai_epi32(tmp, bit);
-}
-
-static INLINE void av1_round_shift_array_32_sse4_1(__m128i *input,
- __m128i *output,
- const int size,
- const int bit) {
- if (bit > 0) {
- int i;
- for (i = 0; i < size; i++) {
- output[i] = av1_round_shift_32_sse4_1(input[i], bit);
- }
- } else {
- int i;
- for (i = 0; i < size; i++) {
- output[i] = _mm_slli_epi32(input[i], -bit);
- }
- }
-}
-
-static INLINE void av1_round_shift_rect_array_32_sse4_1(__m128i *input,
- __m128i *output,
- const int size,
- const int bit,
- const int val) {
- const __m128i sqrt2 = _mm_set1_epi32(val);
- if (bit > 0) {
- int i;
- for (i = 0; i < size; i++) {
- const __m128i r0 = av1_round_shift_32_sse4_1(input[i], bit);
- const __m128i r1 = _mm_mullo_epi32(sqrt2, r0);
- output[i] = av1_round_shift_32_sse4_1(r1, NewSqrt2Bits);
- }
- } else {
- int i;
- for (i = 0; i < size; i++) {
- const __m128i r0 = _mm_slli_epi32(input[i], -bit);
- const __m128i r1 = _mm_mullo_epi32(sqrt2, r0);
- output[i] = av1_round_shift_32_sse4_1(r1, NewSqrt2Bits);
- }
- }
-}
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // AOM_AV1_COMMON_X86_AV1_TXFM_SSE4_H_
diff --git a/third_party/aom/av1/common/x86/cfl_avx2.c b/third_party/aom/av1/common/x86/cfl_avx2.c
deleted file mode 100644
index a8bfdcce6..000000000
--- a/third_party/aom/av1/common/x86/cfl_avx2.c
+++ /dev/null
@@ -1,491 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#include <immintrin.h>
-
-#include "config/av1_rtcd.h"
-
-#include "av1/common/cfl.h"
-
-#include "av1/common/x86/cfl_simd.h"
-
-#define CFL_GET_SUBSAMPLE_FUNCTION_AVX2(sub, bd) \
- CFL_SUBSAMPLE(avx2, sub, bd, 32, 32) \
- CFL_SUBSAMPLE(avx2, sub, bd, 32, 16) \
- CFL_SUBSAMPLE(avx2, sub, bd, 32, 8) \
- cfl_subsample_##bd##_fn cfl_get_luma_subsampling_##sub##_##bd##_avx2( \
- TX_SIZE tx_size) { \
- static const cfl_subsample_##bd##_fn subfn_##sub[TX_SIZES_ALL] = { \
- subsample_##bd##_##sub##_4x4_ssse3, /* 4x4 */ \
- subsample_##bd##_##sub##_8x8_ssse3, /* 8x8 */ \
- subsample_##bd##_##sub##_16x16_ssse3, /* 16x16 */ \
- subsample_##bd##_##sub##_32x32_avx2, /* 32x32 */ \
- cfl_subsample_##bd##_null, /* 64x64 (invalid CFL size) */ \
- subsample_##bd##_##sub##_4x8_ssse3, /* 4x8 */ \
- subsample_##bd##_##sub##_8x4_ssse3, /* 8x4 */ \
- subsample_##bd##_##sub##_8x16_ssse3, /* 8x16 */ \
- subsample_##bd##_##sub##_16x8_ssse3, /* 16x8 */ \
- subsample_##bd##_##sub##_16x32_ssse3, /* 16x32 */ \
- subsample_##bd##_##sub##_32x16_avx2, /* 32x16 */ \
- cfl_subsample_##bd##_null, /* 32x64 (invalid CFL size) */ \
- cfl_subsample_##bd##_null, /* 64x32 (invalid CFL size) */ \
- subsample_##bd##_##sub##_4x16_ssse3, /* 4x16 */ \
- subsample_##bd##_##sub##_16x4_ssse3, /* 16x4 */ \
- subsample_##bd##_##sub##_8x32_ssse3, /* 8x32 */ \
- subsample_##bd##_##sub##_32x8_avx2, /* 32x8 */ \
- cfl_subsample_##bd##_null, /* 16x64 (invalid CFL size) */ \
- cfl_subsample_##bd##_null, /* 64x16 (invalid CFL size) */ \
- }; \
- return subfn_##sub[tx_size]; \
- }
-
-/**
- * Adds 4 pixels (in a 2x2 grid) and multiplies them by 2. Resulting in a more
- * precise version of a box filter 4:2:0 pixel subsampling in Q3.
- *
- * The CfL prediction buffer is always of size CFL_BUF_SQUARE. However, the
- * active area is specified using width and height.
- *
- * Note: We don't need to worry about going over the active area, as long as we
- * stay inside the CfL prediction buffer.
- *
- * Note: For 4:2:0 luma subsampling, the width will never be greater than 16.
- */
-static void cfl_luma_subsampling_420_lbd_avx2(const uint8_t *input,
- int input_stride,
- uint16_t *pred_buf_q3, int width,
- int height) {
- (void)width; // Forever 32
- const __m256i twos = _mm256_set1_epi8(2); // Thirty two twos
- const int luma_stride = input_stride << 1;
- __m256i *row = (__m256i *)pred_buf_q3;
- const __m256i *row_end = row + (height >> 1) * CFL_BUF_LINE_I256;
- do {
- __m256i top = _mm256_loadu_si256((__m256i *)input);
- __m256i bot = _mm256_loadu_si256((__m256i *)(input + input_stride));
-
- __m256i top_16x16 = _mm256_maddubs_epi16(top, twos);
- __m256i bot_16x16 = _mm256_maddubs_epi16(bot, twos);
- __m256i sum_16x16 = _mm256_add_epi16(top_16x16, bot_16x16);
-
- _mm256_storeu_si256(row, sum_16x16);
-
- input += luma_stride;
- } while ((row += CFL_BUF_LINE_I256) < row_end);
-}
-
-CFL_GET_SUBSAMPLE_FUNCTION_AVX2(420, lbd)
-
-/**
- * Adds 2 pixels (in a 2x1 grid) and multiplies them by 4. Resulting in a more
- * precise version of a box filter 4:2:2 pixel subsampling in Q3.
- *
- * The CfL prediction buffer is always of size CFL_BUF_SQUARE. However, the
- * active area is specified using width and height.
- *
- * Note: We don't need to worry about going over the active area, as long as we
- * stay inside the CfL prediction buffer.
- */
-static void cfl_luma_subsampling_422_lbd_avx2(const uint8_t *input,
- int input_stride,
- uint16_t *pred_buf_q3, int width,
- int height) {
- (void)width; // Forever 32
- const __m256i fours = _mm256_set1_epi8(4); // Thirty two fours
- __m256i *row = (__m256i *)pred_buf_q3;
- const __m256i *row_end = row + height * CFL_BUF_LINE_I256;
- do {
- __m256i top = _mm256_loadu_si256((__m256i *)input);
- __m256i top_16x16 = _mm256_maddubs_epi16(top, fours);
- _mm256_storeu_si256(row, top_16x16);
- input += input_stride;
- } while ((row += CFL_BUF_LINE_I256) < row_end);
-}
-
-CFL_GET_SUBSAMPLE_FUNCTION_AVX2(422, lbd)
-
-/**
- * Multiplies the pixels by 8 (scaling in Q3). The AVX2 subsampling is only
- * performed on block of width 32.
- *
- * The CfL prediction buffer is always of size CFL_BUF_SQUARE. However, the
- * active area is specified using width and height.
- *
- * Note: We don't need to worry about going over the active area, as long as we
- * stay inside the CfL prediction buffer.
- */
-static void cfl_luma_subsampling_444_lbd_avx2(const uint8_t *input,
- int input_stride,
- uint16_t *pred_buf_q3, int width,
- int height) {
- (void)width; // Forever 32
- __m256i *row = (__m256i *)pred_buf_q3;
- const __m256i *row_end = row + height * CFL_BUF_LINE_I256;
- const __m256i zeros = _mm256_setzero_si256();
- do {
- __m256i top = _mm256_loadu_si256((__m256i *)input);
- top = _mm256_permute4x64_epi64(top, _MM_SHUFFLE(3, 1, 2, 0));
-
- __m256i row_lo = _mm256_unpacklo_epi8(top, zeros);
- row_lo = _mm256_slli_epi16(row_lo, 3);
- __m256i row_hi = _mm256_unpackhi_epi8(top, zeros);
- row_hi = _mm256_slli_epi16(row_hi, 3);
-
- _mm256_storeu_si256(row, row_lo);
- _mm256_storeu_si256(row + 1, row_hi);
-
- input += input_stride;
- } while ((row += CFL_BUF_LINE_I256) < row_end);
-}
-
-CFL_GET_SUBSAMPLE_FUNCTION_AVX2(444, lbd)
-
-/**
- * Adds 4 pixels (in a 2x2 grid) and multiplies them by 2. Resulting in a more
- * precise version of a box filter 4:2:0 pixel subsampling in Q3.
- *
- * The CfL prediction buffer is always of size CFL_BUF_SQUARE. However, the
- * active area is specified using width and height.
- *
- * Note: We don't need to worry about going over the active area, as long as we
- * stay inside the CfL prediction buffer.
- *
- * Note: For 4:2:0 luma subsampling, the width will never be greater than 16.
- */
-static void cfl_luma_subsampling_420_hbd_avx2(const uint16_t *input,
- int input_stride,
- uint16_t *pred_buf_q3, int width,
- int height) {
- (void)width; // Forever 32
- const int luma_stride = input_stride << 1;
- __m256i *row = (__m256i *)pred_buf_q3;
- const __m256i *row_end = row + (height >> 1) * CFL_BUF_LINE_I256;
- do {
- __m256i top = _mm256_loadu_si256((__m256i *)input);
- __m256i bot = _mm256_loadu_si256((__m256i *)(input + input_stride));
- __m256i sum = _mm256_add_epi16(top, bot);
-
- __m256i top_1 = _mm256_loadu_si256((__m256i *)(input + 16));
- __m256i bot_1 = _mm256_loadu_si256((__m256i *)(input + 16 + input_stride));
- __m256i sum_1 = _mm256_add_epi16(top_1, bot_1);
-
- __m256i hsum = _mm256_hadd_epi16(sum, sum_1);
- hsum = _mm256_permute4x64_epi64(hsum, _MM_SHUFFLE(3, 1, 2, 0));
- hsum = _mm256_add_epi16(hsum, hsum);
-
- _mm256_storeu_si256(row, hsum);
-
- input += luma_stride;
- } while ((row += CFL_BUF_LINE_I256) < row_end);
-}
-
-CFL_GET_SUBSAMPLE_FUNCTION_AVX2(420, hbd)
-
-/**
- * Adds 2 pixels (in a 2x1 grid) and multiplies them by 4. Resulting in a more
- * precise version of a box filter 4:2:2 pixel subsampling in Q3.
- *
- * The CfL prediction buffer is always of size CFL_BUF_SQUARE. However, the
- * active area is specified using width and height.
- *
- * Note: We don't need to worry about going over the active area, as long as we
- * stay inside the CfL prediction buffer.
- *
- */
-static void cfl_luma_subsampling_422_hbd_avx2(const uint16_t *input,
- int input_stride,
- uint16_t *pred_buf_q3, int width,
- int height) {
- (void)width; // Forever 32
- __m256i *row = (__m256i *)pred_buf_q3;
- const __m256i *row_end = row + height * CFL_BUF_LINE_I256;
- do {
- __m256i top = _mm256_loadu_si256((__m256i *)input);
- __m256i top_1 = _mm256_loadu_si256((__m256i *)(input + 16));
- __m256i hsum = _mm256_hadd_epi16(top, top_1);
- hsum = _mm256_permute4x64_epi64(hsum, _MM_SHUFFLE(3, 1, 2, 0));
- hsum = _mm256_slli_epi16(hsum, 2);
-
- _mm256_storeu_si256(row, hsum);
-
- input += input_stride;
- } while ((row += CFL_BUF_LINE_I256) < row_end);
-}
-
-CFL_GET_SUBSAMPLE_FUNCTION_AVX2(422, hbd)
-
-static void cfl_luma_subsampling_444_hbd_avx2(const uint16_t *input,
- int input_stride,
- uint16_t *pred_buf_q3, int width,
- int height) {
- (void)width; // Forever 32
- __m256i *row = (__m256i *)pred_buf_q3;
- const __m256i *row_end = row + height * CFL_BUF_LINE_I256;
- do {
- __m256i top = _mm256_loadu_si256((__m256i *)input);
- __m256i top_1 = _mm256_loadu_si256((__m256i *)(input + 16));
- _mm256_storeu_si256(row, _mm256_slli_epi16(top, 3));
- _mm256_storeu_si256(row + 1, _mm256_slli_epi16(top_1, 3));
- input += input_stride;
- } while ((row += CFL_BUF_LINE_I256) < row_end);
-}
-
-CFL_GET_SUBSAMPLE_FUNCTION_AVX2(444, hbd)
-
-static INLINE __m256i predict_unclipped(const __m256i *input, __m256i alpha_q12,
- __m256i alpha_sign, __m256i dc_q0) {
- __m256i ac_q3 = _mm256_loadu_si256(input);
- __m256i ac_sign = _mm256_sign_epi16(alpha_sign, ac_q3);
- __m256i scaled_luma_q0 =
- _mm256_mulhrs_epi16(_mm256_abs_epi16(ac_q3), alpha_q12);
- scaled_luma_q0 = _mm256_sign_epi16(scaled_luma_q0, ac_sign);
- return _mm256_add_epi16(scaled_luma_q0, dc_q0);
-}
-
-static INLINE void cfl_predict_lbd_avx2(const int16_t *pred_buf_q3,
- uint8_t *dst, int dst_stride,
- int alpha_q3, int width, int height) {
- (void)width;
- const __m256i alpha_sign = _mm256_set1_epi16(alpha_q3);
- const __m256i alpha_q12 = _mm256_slli_epi16(_mm256_abs_epi16(alpha_sign), 9);
- const __m256i dc_q0 = _mm256_set1_epi16(*dst);
- __m256i *row = (__m256i *)pred_buf_q3;
- const __m256i *row_end = row + height * CFL_BUF_LINE_I256;
-
- do {
- __m256i res = predict_unclipped(row, alpha_q12, alpha_sign, dc_q0);
- __m256i next = predict_unclipped(row + 1, alpha_q12, alpha_sign, dc_q0);
- res = _mm256_packus_epi16(res, next);
- res = _mm256_permute4x64_epi64(res, _MM_SHUFFLE(3, 1, 2, 0));
- _mm256_storeu_si256((__m256i *)dst, res);
- dst += dst_stride;
- } while ((row += CFL_BUF_LINE_I256) < row_end);
-}
-
-CFL_PREDICT_X(avx2, 32, 8, lbd);
-CFL_PREDICT_X(avx2, 32, 16, lbd);
-CFL_PREDICT_X(avx2, 32, 32, lbd);
-
-cfl_predict_lbd_fn get_predict_lbd_fn_avx2(TX_SIZE tx_size) {
- static const cfl_predict_lbd_fn pred[TX_SIZES_ALL] = {
- predict_lbd_4x4_ssse3, /* 4x4 */
- predict_lbd_8x8_ssse3, /* 8x8 */
- predict_lbd_16x16_ssse3, /* 16x16 */
- predict_lbd_32x32_avx2, /* 32x32 */
- cfl_predict_lbd_null, /* 64x64 (invalid CFL size) */
- predict_lbd_4x8_ssse3, /* 4x8 */
- predict_lbd_8x4_ssse3, /* 8x4 */
- predict_lbd_8x16_ssse3, /* 8x16 */
- predict_lbd_16x8_ssse3, /* 16x8 */
- predict_lbd_16x32_ssse3, /* 16x32 */
- predict_lbd_32x16_avx2, /* 32x16 */
- cfl_predict_lbd_null, /* 32x64 (invalid CFL size) */
- cfl_predict_lbd_null, /* 64x32 (invalid CFL size) */
- predict_lbd_4x16_ssse3, /* 4x16 */
- predict_lbd_16x4_ssse3, /* 16x4 */
- predict_lbd_8x32_ssse3, /* 8x32 */
- predict_lbd_32x8_avx2, /* 32x8 */
- cfl_predict_lbd_null, /* 16x64 (invalid CFL size) */
- cfl_predict_lbd_null, /* 64x16 (invalid CFL size) */
- };
- // Modulo TX_SIZES_ALL to ensure that an attacker won't be able to index the
- // function pointer array out of bounds.
- return pred[tx_size % TX_SIZES_ALL];
-}
-
-static __m256i highbd_max_epi16(int bd) {
- const __m256i neg_one = _mm256_set1_epi16(-1);
- // (1 << bd) - 1 => -(-1 << bd) -1 => -1 - (-1 << bd) => -1 ^ (-1 << bd)
- return _mm256_xor_si256(_mm256_slli_epi16(neg_one, bd), neg_one);
-}
-
-static __m256i highbd_clamp_epi16(__m256i u, __m256i zero, __m256i max) {
- return _mm256_max_epi16(_mm256_min_epi16(u, max), zero);
-}
-
-static INLINE void cfl_predict_hbd_avx2(const int16_t *pred_buf_q3,
- uint16_t *dst, int dst_stride,
- int alpha_q3, int bd, int width,
- int height) {
- // Use SSSE3 version for smaller widths
- assert(width == 16 || width == 32);
- const __m256i alpha_sign = _mm256_set1_epi16(alpha_q3);
- const __m256i alpha_q12 = _mm256_slli_epi16(_mm256_abs_epi16(alpha_sign), 9);
- const __m256i dc_q0 = _mm256_loadu_si256((__m256i *)dst);
- const __m256i max = highbd_max_epi16(bd);
-
- __m256i *row = (__m256i *)pred_buf_q3;
- const __m256i *row_end = row + height * CFL_BUF_LINE_I256;
- do {
- const __m256i res = predict_unclipped(row, alpha_q12, alpha_sign, dc_q0);
- _mm256_storeu_si256((__m256i *)dst,
- highbd_clamp_epi16(res, _mm256_setzero_si256(), max));
- if (width == 32) {
- const __m256i res_1 =
- predict_unclipped(row + 1, alpha_q12, alpha_sign, dc_q0);
- _mm256_storeu_si256(
- (__m256i *)(dst + 16),
- highbd_clamp_epi16(res_1, _mm256_setzero_si256(), max));
- }
- dst += dst_stride;
- } while ((row += CFL_BUF_LINE_I256) < row_end);
-}
-
-CFL_PREDICT_X(avx2, 16, 4, hbd)
-CFL_PREDICT_X(avx2, 16, 8, hbd)
-CFL_PREDICT_X(avx2, 16, 16, hbd)
-CFL_PREDICT_X(avx2, 16, 32, hbd)
-CFL_PREDICT_X(avx2, 32, 8, hbd)
-CFL_PREDICT_X(avx2, 32, 16, hbd)
-CFL_PREDICT_X(avx2, 32, 32, hbd)
-
-cfl_predict_hbd_fn get_predict_hbd_fn_avx2(TX_SIZE tx_size) {
- static const cfl_predict_hbd_fn pred[TX_SIZES_ALL] = {
- predict_hbd_4x4_ssse3, /* 4x4 */
- predict_hbd_8x8_ssse3, /* 8x8 */
- predict_hbd_16x16_avx2, /* 16x16 */
- predict_hbd_32x32_avx2, /* 32x32 */
- cfl_predict_hbd_null, /* 64x64 (invalid CFL size) */
- predict_hbd_4x8_ssse3, /* 4x8 */
- predict_hbd_8x4_ssse3, /* 8x4 */
- predict_hbd_8x16_ssse3, /* 8x16 */
- predict_hbd_16x8_avx2, /* 16x8 */
- predict_hbd_16x32_avx2, /* 16x32 */
- predict_hbd_32x16_avx2, /* 32x16 */
- cfl_predict_hbd_null, /* 32x64 (invalid CFL size) */
- cfl_predict_hbd_null, /* 64x32 (invalid CFL size) */
- predict_hbd_4x16_ssse3, /* 4x16 */
- predict_hbd_16x4_avx2, /* 16x4 */
- predict_hbd_8x32_ssse3, /* 8x32 */
- predict_hbd_32x8_avx2, /* 32x8 */
- cfl_predict_hbd_null, /* 16x64 (invalid CFL size) */
- cfl_predict_hbd_null, /* 64x16 (invalid CFL size) */
- };
- // Modulo TX_SIZES_ALL to ensure that an attacker won't be able to index the
- // function pointer array out of bounds.
- return pred[tx_size % TX_SIZES_ALL];
-}
-
-// Returns a vector where all the (32-bits) elements are the sum of all the
-// lanes in a.
-static INLINE __m256i fill_sum_epi32(__m256i a) {
- // Given that a == [A, B, C, D, E, F, G, H]
- a = _mm256_hadd_epi32(a, a);
- // Given that A' == A + B, C' == C + D, E' == E + F, G' == G + H
- // a == [A', C', A', C', E', G', E', G']
- a = _mm256_permute4x64_epi64(a, _MM_SHUFFLE(3, 1, 2, 0));
- // a == [A', C', E', G', A', C', E', G']
- a = _mm256_hadd_epi32(a, a);
- // Given that A'' == A' + C' and E'' == E' + G'
- // a == [A'', E'', A'', E'', A'', E'', A'', E'']
- return _mm256_hadd_epi32(a, a);
- // Given that A''' == A'' + E''
- // a == [A''', A''', A''', A''', A''', A''', A''', A''']
-}
-
-static INLINE __m256i _mm256_addl_epi16(__m256i a) {
- return _mm256_add_epi32(_mm256_unpacklo_epi16(a, _mm256_setzero_si256()),
- _mm256_unpackhi_epi16(a, _mm256_setzero_si256()));
-}
-
-static INLINE void subtract_average_avx2(const uint16_t *src_ptr,
- int16_t *dst_ptr, int width,
- int height, int round_offset,
- int num_pel_log2) {
- // Use SSE2 version for smaller widths
- assert(width == 16 || width == 32);
-
- const __m256i *src = (__m256i *)src_ptr;
- const __m256i *const end = src + height * CFL_BUF_LINE_I256;
- // To maximize usage of the AVX2 registers, we sum two rows per loop
- // iteration
- const int step = 2 * CFL_BUF_LINE_I256;
-
- __m256i sum = _mm256_setzero_si256();
- // For width 32, we use a second sum accumulator to reduce accumulator
- // dependencies in the loop.
- __m256i sum2;
- if (width == 32) sum2 = _mm256_setzero_si256();
-
- do {
- // Add top row to the bottom row
- __m256i l0 = _mm256_add_epi16(_mm256_loadu_si256(src),
- _mm256_loadu_si256(src + CFL_BUF_LINE_I256));
- sum = _mm256_add_epi32(sum, _mm256_addl_epi16(l0));
- if (width == 32) { /* Don't worry, this if it gets optimized out. */
- // Add the second part of the top row to the second part of the bottom row
- __m256i l1 =
- _mm256_add_epi16(_mm256_loadu_si256(src + 1),
- _mm256_loadu_si256(src + 1 + CFL_BUF_LINE_I256));
- sum2 = _mm256_add_epi32(sum2, _mm256_addl_epi16(l1));
- }
- src += step;
- } while (src < end);
- // Combine both sum accumulators
- if (width == 32) sum = _mm256_add_epi32(sum, sum2);
-
- __m256i fill = fill_sum_epi32(sum);
-
- __m256i avg_epi16 = _mm256_srli_epi32(
- _mm256_add_epi32(fill, _mm256_set1_epi32(round_offset)), num_pel_log2);
- avg_epi16 = _mm256_packs_epi32(avg_epi16, avg_epi16);
-
- // Store and subtract loop
- src = (__m256i *)src_ptr;
- __m256i *dst = (__m256i *)dst_ptr;
- do {
- _mm256_storeu_si256(dst,
- _mm256_sub_epi16(_mm256_loadu_si256(src), avg_epi16));
- if (width == 32) {
- _mm256_storeu_si256(
- dst + 1, _mm256_sub_epi16(_mm256_loadu_si256(src + 1), avg_epi16));
- }
- src += CFL_BUF_LINE_I256;
- dst += CFL_BUF_LINE_I256;
- } while (src < end);
-}
-
-// Declare wrappers for AVX2 sizes
-CFL_SUB_AVG_X(avx2, 16, 4, 32, 6)
-CFL_SUB_AVG_X(avx2, 16, 8, 64, 7)
-CFL_SUB_AVG_X(avx2, 16, 16, 128, 8)
-CFL_SUB_AVG_X(avx2, 16, 32, 256, 9)
-CFL_SUB_AVG_X(avx2, 32, 8, 128, 8)
-CFL_SUB_AVG_X(avx2, 32, 16, 256, 9)
-CFL_SUB_AVG_X(avx2, 32, 32, 512, 10)
-
-// Based on the observation that for small blocks AVX2 does not outperform
-// SSE2, we call the SSE2 code for block widths 4 and 8.
-cfl_subtract_average_fn get_subtract_average_fn_avx2(TX_SIZE tx_size) {
- static const cfl_subtract_average_fn sub_avg[TX_SIZES_ALL] = {
- subtract_average_4x4_sse2, /* 4x4 */
- subtract_average_8x8_sse2, /* 8x8 */
- subtract_average_16x16_avx2, /* 16x16 */
- subtract_average_32x32_avx2, /* 32x32 */
- cfl_subtract_average_null, /* 64x64 (invalid CFL size) */
- subtract_average_4x8_sse2, /* 4x8 */
- subtract_average_8x4_sse2, /* 8x4 */
- subtract_average_8x16_sse2, /* 8x16 */
- subtract_average_16x8_avx2, /* 16x8 */
- subtract_average_16x32_avx2, /* 16x32 */
- subtract_average_32x16_avx2, /* 32x16 */
- cfl_subtract_average_null, /* 32x64 (invalid CFL size) */
- cfl_subtract_average_null, /* 64x32 (invalid CFL size) */
- subtract_average_4x16_sse2, /* 4x16 */
- subtract_average_16x4_avx2, /* 16x4 */
- subtract_average_8x32_sse2, /* 8x32 */
- subtract_average_32x8_avx2, /* 32x8 */
- cfl_subtract_average_null, /* 16x64 (invalid CFL size) */
- cfl_subtract_average_null, /* 64x16 (invalid CFL size) */
- };
- // Modulo TX_SIZES_ALL to ensure that an attacker won't be able to
- // index the function pointer array out of bounds.
- return sub_avg[tx_size % TX_SIZES_ALL];
-}
diff --git a/third_party/aom/av1/common/x86/cfl_simd.h b/third_party/aom/av1/common/x86/cfl_simd.h
deleted file mode 100644
index 3b342cd4e..000000000
--- a/third_party/aom/av1/common/x86/cfl_simd.h
+++ /dev/null
@@ -1,243 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_COMMON_X86_CFL_SIMD_H_
-#define AOM_AV1_COMMON_X86_CFL_SIMD_H_
-
-#include "av1/common/blockd.h"
-
-// SSSE3 version is optimal for with == 4, we reuse them in AVX2
-void subsample_lbd_420_4x4_ssse3(const uint8_t *input, int input_stride,
- uint16_t *output_q3);
-void subsample_lbd_420_4x8_ssse3(const uint8_t *input, int input_stride,
- uint16_t *output_q3);
-void subsample_lbd_420_4x16_ssse3(const uint8_t *input, int input_stride,
- uint16_t *output_q3);
-
-// SSSE3 version is optimal for with == 8, we reuse it in AVX2
-void subsample_lbd_420_8x4_ssse3(const uint8_t *input, int input_stride,
- uint16_t *output_q3);
-void subsample_lbd_420_8x8_ssse3(const uint8_t *input, int input_stride,
- uint16_t *output_q3);
-void subsample_lbd_420_8x16_ssse3(const uint8_t *input, int input_stride,
- uint16_t *output_q3);
-void subsample_lbd_420_8x32_ssse3(const uint8_t *input, int input_stride,
- uint16_t *output_q3);
-
-// SSSE3 version is optimal for with == 16, we reuse it in AVX2
-void subsample_lbd_420_16x4_ssse3(const uint8_t *input, int input_stride,
- uint16_t *output_q3);
-void subsample_lbd_420_16x8_ssse3(const uint8_t *input, int input_stride,
- uint16_t *output_q3);
-void subsample_lbd_420_16x16_ssse3(const uint8_t *input, int input_stride,
- uint16_t *output_q3);
-void subsample_lbd_420_16x32_ssse3(const uint8_t *input, int input_stride,
- uint16_t *output_q3);
-
-// SSSE3 version is optimal for with == 4, we reuse them in AVX2
-void subsample_lbd_422_4x4_ssse3(const uint8_t *input, int input_stride,
- uint16_t *output_q3);
-void subsample_lbd_422_4x8_ssse3(const uint8_t *input, int input_stride,
- uint16_t *output_q3);
-void subsample_lbd_422_4x16_ssse3(const uint8_t *input, int input_stride,
- uint16_t *output_q3);
-
-// SSSE3 version is optimal for with == 8, we reuse it in AVX2
-void subsample_lbd_422_8x4_ssse3(const uint8_t *input, int input_stride,
- uint16_t *output_q3);
-void subsample_lbd_422_8x8_ssse3(const uint8_t *input, int input_stride,
- uint16_t *output_q3);
-void subsample_lbd_422_8x16_ssse3(const uint8_t *input, int input_stride,
- uint16_t *output_q3);
-void subsample_lbd_422_8x32_ssse3(const uint8_t *input, int input_stride,
- uint16_t *output_q3);
-
-// SSSE3 version is optimal for with == 16, we reuse it in AVX2
-void subsample_lbd_422_16x4_ssse3(const uint8_t *input, int input_stride,
- uint16_t *output_q3);
-void subsample_lbd_422_16x8_ssse3(const uint8_t *input, int input_stride,
- uint16_t *output_q3);
-void subsample_lbd_422_16x16_ssse3(const uint8_t *input, int input_stride,
- uint16_t *output_q3);
-void subsample_lbd_422_16x32_ssse3(const uint8_t *input, int input_stride,
- uint16_t *output_q3);
-
-// SSSE3 version is optimal for with == 4, we reuse them in AVX2
-void subsample_lbd_444_4x4_ssse3(const uint8_t *input, int input_stride,
- uint16_t *output_q3);
-void subsample_lbd_444_4x8_ssse3(const uint8_t *input, int input_stride,
- uint16_t *output_q3);
-void subsample_lbd_444_4x16_ssse3(const uint8_t *input, int input_stride,
- uint16_t *output_q3);
-
-// SSSE3 version is optimal for with == 8, we reuse it in AVX2
-void subsample_lbd_444_8x4_ssse3(const uint8_t *input, int input_stride,
- uint16_t *output_q3);
-void subsample_lbd_444_8x8_ssse3(const uint8_t *input, int input_stride,
- uint16_t *output_q3);
-void subsample_lbd_444_8x16_ssse3(const uint8_t *input, int input_stride,
- uint16_t *output_q3);
-void subsample_lbd_444_8x32_ssse3(const uint8_t *input, int input_stride,
- uint16_t *output_q3);
-
-// SSSE3 version is optimal for with == 16, we reuse it in AVX2
-void subsample_lbd_444_16x4_ssse3(const uint8_t *input, int input_stride,
- uint16_t *output_q3);
-void subsample_lbd_444_16x8_ssse3(const uint8_t *input, int input_stride,
- uint16_t *output_q3);
-void subsample_lbd_444_16x16_ssse3(const uint8_t *input, int input_stride,
- uint16_t *output_q3);
-void subsample_lbd_444_16x32_ssse3(const uint8_t *input, int input_stride,
- uint16_t *output_q3);
-
-void subsample_hbd_420_4x4_ssse3(const uint16_t *input, int input_stride,
- uint16_t *output_q3);
-void subsample_hbd_420_4x8_ssse3(const uint16_t *input, int input_stride,
- uint16_t *output_q3);
-void subsample_hbd_420_4x16_ssse3(const uint16_t *input, int input_stride,
- uint16_t *output_q3);
-
-// SSSE3 version is optimal for with == 8, we reuse it in AVX2
-void subsample_hbd_420_8x4_ssse3(const uint16_t *input, int input_stride,
- uint16_t *output_q3);
-void subsample_hbd_420_8x8_ssse3(const uint16_t *input, int input_stride,
- uint16_t *output_q3);
-void subsample_hbd_420_8x16_ssse3(const uint16_t *input, int input_stride,
- uint16_t *output_q3);
-void subsample_hbd_420_8x32_ssse3(const uint16_t *input, int input_stride,
- uint16_t *output_q3);
-
-// SSSE3 version is faster for with == 16, we reuse it in AVX2
-void subsample_hbd_420_16x4_ssse3(const uint16_t *input, int input_stride,
- uint16_t *output_q3);
-void subsample_hbd_420_16x8_ssse3(const uint16_t *input, int input_stride,
- uint16_t *output_q3);
-void subsample_hbd_420_16x16_ssse3(const uint16_t *input, int input_stride,
- uint16_t *output_q3);
-void subsample_hbd_420_16x32_ssse3(const uint16_t *input, int input_stride,
- uint16_t *output_q3);
-
-void subsample_hbd_422_4x4_ssse3(const uint16_t *input, int input_stride,
- uint16_t *output_q3);
-void subsample_hbd_422_4x8_ssse3(const uint16_t *input, int input_stride,
- uint16_t *output_q3);
-void subsample_hbd_422_4x16_ssse3(const uint16_t *input, int input_stride,
- uint16_t *output_q3);
-
-// SSSE3 version is optimal for with == 8, we reuse it in AVX2
-void subsample_hbd_422_8x4_ssse3(const uint16_t *input, int input_stride,
- uint16_t *output_q3);
-void subsample_hbd_422_8x8_ssse3(const uint16_t *input, int input_stride,
- uint16_t *output_q3);
-void subsample_hbd_422_8x16_ssse3(const uint16_t *input, int input_stride,
- uint16_t *output_q3);
-void subsample_hbd_422_8x32_ssse3(const uint16_t *input, int input_stride,
- uint16_t *output_q3);
-
-// SSSE3 version is faster for with == 16, we reuse it in AVX2
-void subsample_hbd_422_16x4_ssse3(const uint16_t *input, int input_stride,
- uint16_t *output_q3);
-void subsample_hbd_422_16x8_ssse3(const uint16_t *input, int input_stride,
- uint16_t *output_q3);
-void subsample_hbd_422_16x16_ssse3(const uint16_t *input, int input_stride,
- uint16_t *output_q3);
-void subsample_hbd_422_16x32_ssse3(const uint16_t *input, int input_stride,
- uint16_t *output_q3);
-
-void subsample_hbd_444_4x4_ssse3(const uint16_t *input, int input_stride,
- uint16_t *output_q3);
-void subsample_hbd_444_4x8_ssse3(const uint16_t *input, int input_stride,
- uint16_t *output_q3);
-void subsample_hbd_444_4x16_ssse3(const uint16_t *input, int input_stride,
- uint16_t *output_q3);
-
-// SSSE3 version is optimal for with == 8, we reuse it in AVX2
-void subsample_hbd_444_8x4_ssse3(const uint16_t *input, int input_stride,
- uint16_t *output_q3);
-void subsample_hbd_444_8x8_ssse3(const uint16_t *input, int input_stride,
- uint16_t *output_q3);
-void subsample_hbd_444_8x16_ssse3(const uint16_t *input, int input_stride,
- uint16_t *output_q3);
-void subsample_hbd_444_8x32_ssse3(const uint16_t *input, int input_stride,
- uint16_t *output_q3);
-
-// SSSE3 version is faster for with == 16, we reuse it in AVX2
-void subsample_hbd_444_16x4_ssse3(const uint16_t *input, int input_stride,
- uint16_t *output_q3);
-void subsample_hbd_444_16x8_ssse3(const uint16_t *input, int input_stride,
- uint16_t *output_q3);
-void subsample_hbd_444_16x16_ssse3(const uint16_t *input, int input_stride,
- uint16_t *output_q3);
-void subsample_hbd_444_16x32_ssse3(const uint16_t *input, int input_stride,
- uint16_t *output_q3);
-
-// SSE2 version is optimal for with == 4, we reuse them in AVX2
-void subtract_average_4x4_sse2(const uint16_t *src, int16_t *dst);
-void subtract_average_4x8_sse2(const uint16_t *src, int16_t *dst);
-void subtract_average_4x16_sse2(const uint16_t *src, int16_t *dst);
-
-// SSE2 version is optimal for with == 8, we reuse them in AVX2
-void subtract_average_8x4_sse2(const uint16_t *src, int16_t *dst);
-void subtract_average_8x8_sse2(const uint16_t *src, int16_t *dst);
-void subtract_average_8x16_sse2(const uint16_t *src, int16_t *dst);
-void subtract_average_8x32_sse2(const uint16_t *src, int16_t *dst);
-
-void predict_lbd_4x4_ssse3(const int16_t *pred_buf_q3, uint8_t *dst,
- int dst_stride, int alpha_q3);
-void predict_lbd_4x8_ssse3(const int16_t *pred_buf_q3, uint8_t *dst,
- int dst_stride, int alpha_q3);
-void predict_lbd_4x16_ssse3(const int16_t *pred_buf_q3, uint8_t *dst,
- int dst_stride, int alpha_q3);
-
-void predict_lbd_8x4_ssse3(const int16_t *pred_buf_q3, uint8_t *dst,
- int dst_stride, int alpha_q3);
-void predict_lbd_8x8_ssse3(const int16_t *pred_buf_q3, uint8_t *dst,
- int dst_stride, int alpha_q3);
-void predict_lbd_8x16_ssse3(const int16_t *pred_buf_q3, uint8_t *dst,
- int dst_stride, int alpha_q3);
-void predict_lbd_8x32_ssse3(const int16_t *pred_buf_q3, uint8_t *dst,
- int dst_stride, int alpha_q3);
-
-void predict_lbd_16x4_ssse3(const int16_t *pred_buf_q3, uint8_t *dst,
- int dst_stride, int alpha_q3);
-void predict_lbd_16x8_ssse3(const int16_t *pred_buf_q3, uint8_t *dst,
- int dst_stride, int alpha_q3);
-void predict_lbd_16x16_ssse3(const int16_t *pred_buf_q3, uint8_t *dst,
- int dst_stride, int alpha_q3);
-void predict_lbd_16x32_ssse3(const int16_t *pred_buf_q3, uint8_t *dst,
- int dst_stride, int alpha_q3);
-
-void predict_hbd_4x4_ssse3(const int16_t *pred_buf_q3, uint16_t *dst,
- int dst_stride, int alpha_q3, int bd);
-void predict_hbd_4x8_ssse3(const int16_t *pred_buf_q3, uint16_t *dst,
- int dst_stride, int alpha_q3, int bd);
-void predict_hbd_4x16_ssse3(const int16_t *pred_buf_q3, uint16_t *dst,
- int dst_stride, int alpha_q3, int bd);
-
-void predict_hbd_8x4_ssse3(const int16_t *pred_buf_q3, uint16_t *dst,
- int dst_stride, int alpha_q3, int bd);
-void predict_hbd_8x8_ssse3(const int16_t *pred_buf_q3, uint16_t *dst,
- int dst_stride, int alpha_q3, int bd);
-void predict_hbd_8x16_ssse3(const int16_t *pred_buf_q3, uint16_t *dst,
- int dst_stride, int alpha_q3, int bd);
-void predict_hbd_8x32_ssse3(const int16_t *pred_buf_q3, uint16_t *dst,
- int dst_stride, int alpha_q3, int bd);
-
-void predict_hbd_16x4_ssse3(const int16_t *pred_buf_q3, uint16_t *dst,
- int dst_stride, int alpha_q3, int bd);
-void predict_hbd_16x8_ssse3(const int16_t *pred_buf_q3, uint16_t *dst,
- int dst_stride, int alpha_q3, int bd);
-void predict_hbd_16x16_ssse3(const int16_t *pred_buf_q3, uint16_t *dst,
- int dst_stride, int alpha_q3, int bd);
-void predict_hbd_16x32_ssse3(const int16_t *pred_buf_q3, uint16_t *dst,
- int dst_stride, int alpha_q3, int bd);
-
-#endif // AOM_AV1_COMMON_X86_CFL_SIMD_H_
diff --git a/third_party/aom/av1/common/x86/cfl_sse2.c b/third_party/aom/av1/common/x86/cfl_sse2.c
deleted file mode 100644
index 4783fe098..000000000
--- a/third_party/aom/av1/common/x86/cfl_sse2.c
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <emmintrin.h>
-
-#include "av1/common/cfl.h"
-#include "config/av1_rtcd.h"
-
-static INLINE __m128i fill_sum_epi32(__m128i l0) {
- l0 = _mm_add_epi32(l0, _mm_shuffle_epi32(l0, _MM_SHUFFLE(1, 0, 3, 2)));
- return _mm_add_epi32(l0, _mm_shuffle_epi32(l0, _MM_SHUFFLE(2, 3, 0, 1)));
-}
-
-static INLINE void subtract_average_sse2(const uint16_t *src_ptr,
- int16_t *dst_ptr, int width,
- int height, int round_offset,
- int num_pel_log2) {
- const __m128i zeros = _mm_setzero_si128();
- const __m128i round_offset_epi32 = _mm_set1_epi32(round_offset);
- const __m128i *src = (__m128i *)src_ptr;
- const __m128i *const end = src + height * CFL_BUF_LINE_I128;
- const int step = CFL_BUF_LINE_I128 * (1 + (width == 8) + 3 * (width == 4));
-
- __m128i sum = zeros;
- do {
- __m128i l0;
- if (width == 4) {
- l0 = _mm_add_epi16(_mm_loadl_epi64(src),
- _mm_loadl_epi64(src + CFL_BUF_LINE_I128));
- __m128i l1 = _mm_add_epi16(_mm_loadl_epi64(src + 2 * CFL_BUF_LINE_I128),
- _mm_loadl_epi64(src + 3 * CFL_BUF_LINE_I128));
- sum = _mm_add_epi32(sum, _mm_add_epi32(_mm_unpacklo_epi16(l0, zeros),
- _mm_unpacklo_epi16(l1, zeros)));
- } else {
- if (width == 8) {
- l0 = _mm_add_epi16(_mm_loadu_si128(src),
- _mm_loadu_si128(src + CFL_BUF_LINE_I128));
- } else {
- l0 = _mm_add_epi16(_mm_loadu_si128(src), _mm_loadu_si128(src + 1));
- }
- sum = _mm_add_epi32(sum, _mm_add_epi32(_mm_unpacklo_epi16(l0, zeros),
- _mm_unpackhi_epi16(l0, zeros)));
- if (width == 32) {
- l0 = _mm_add_epi16(_mm_loadu_si128(src + 2), _mm_loadu_si128(src + 3));
- sum = _mm_add_epi32(sum, _mm_add_epi32(_mm_unpacklo_epi16(l0, zeros),
- _mm_unpackhi_epi16(l0, zeros)));
- }
- }
- src += step;
- } while (src < end);
-
- sum = fill_sum_epi32(sum);
-
- __m128i avg_epi16 =
- _mm_srli_epi32(_mm_add_epi32(sum, round_offset_epi32), num_pel_log2);
- avg_epi16 = _mm_packs_epi32(avg_epi16, avg_epi16);
-
- src = (__m128i *)src_ptr;
- __m128i *dst = (__m128i *)dst_ptr;
- do {
- if (width == 4) {
- _mm_storel_epi64(dst, _mm_sub_epi16(_mm_loadl_epi64(src), avg_epi16));
- } else {
- _mm_storeu_si128(dst, _mm_sub_epi16(_mm_loadu_si128(src), avg_epi16));
- if (width > 8) {
- _mm_storeu_si128(dst + 1,
- _mm_sub_epi16(_mm_loadu_si128(src + 1), avg_epi16));
- if (width == 32) {
- _mm_storeu_si128(dst + 2,
- _mm_sub_epi16(_mm_loadu_si128(src + 2), avg_epi16));
- _mm_storeu_si128(dst + 3,
- _mm_sub_epi16(_mm_loadu_si128(src + 3), avg_epi16));
- }
- }
- }
- src += CFL_BUF_LINE_I128;
- dst += CFL_BUF_LINE_I128;
- } while (src < end);
-}
-
-CFL_SUB_AVG_FN(sse2)
diff --git a/third_party/aom/av1/common/x86/cfl_ssse3.c b/third_party/aom/av1/common/x86/cfl_ssse3.c
deleted file mode 100644
index bbf007295..000000000
--- a/third_party/aom/av1/common/x86/cfl_ssse3.c
+++ /dev/null
@@ -1,393 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <tmmintrin.h>
-
-#include "config/av1_rtcd.h"
-
-#include "av1/common/cfl.h"
-
-#include "av1/common/x86/cfl_simd.h"
-
-// Load 32-bit integer from memory into the first element of dst.
-static INLINE __m128i _mm_loadh_epi32(__m128i const *mem_addr) {
- return _mm_cvtsi32_si128(*((int *)mem_addr));
-}
-
-// Store 32-bit integer from the first element of a into memory.
-static INLINE void _mm_storeh_epi32(__m128i const *mem_addr, __m128i a) {
- *((int *)mem_addr) = _mm_cvtsi128_si32(a);
-}
-
-/**
- * Adds 4 pixels (in a 2x2 grid) and multiplies them by 2. Resulting in a more
- * precise version of a box filter 4:2:0 pixel subsampling in Q3.
- *
- * The CfL prediction buffer is always of size CFL_BUF_SQUARE. However, the
- * active area is specified using width and height.
- *
- * Note: We don't need to worry about going over the active area, as long as we
- * stay inside the CfL prediction buffer.
- */
-static INLINE void cfl_luma_subsampling_420_lbd_ssse3(const uint8_t *input,
- int input_stride,
- uint16_t *pred_buf_q3,
- int width, int height) {
- const __m128i twos = _mm_set1_epi8(2);
- __m128i *pred_buf_m128i = (__m128i *)pred_buf_q3;
- const __m128i *end = pred_buf_m128i + (height >> 1) * CFL_BUF_LINE_I128;
- const int luma_stride = input_stride << 1;
- do {
- if (width == 4) {
- __m128i top = _mm_loadh_epi32((__m128i *)input);
- top = _mm_maddubs_epi16(top, twos);
- __m128i bot = _mm_loadh_epi32((__m128i *)(input + input_stride));
- bot = _mm_maddubs_epi16(bot, twos);
- const __m128i sum = _mm_add_epi16(top, bot);
- _mm_storeh_epi32(pred_buf_m128i, sum);
- } else if (width == 8) {
- __m128i top = _mm_loadl_epi64((__m128i *)input);
- top = _mm_maddubs_epi16(top, twos);
- __m128i bot = _mm_loadl_epi64((__m128i *)(input + input_stride));
- bot = _mm_maddubs_epi16(bot, twos);
- const __m128i sum = _mm_add_epi16(top, bot);
- _mm_storel_epi64(pred_buf_m128i, sum);
- } else {
- __m128i top = _mm_loadu_si128((__m128i *)input);
- top = _mm_maddubs_epi16(top, twos);
- __m128i bot = _mm_loadu_si128((__m128i *)(input + input_stride));
- bot = _mm_maddubs_epi16(bot, twos);
- const __m128i sum = _mm_add_epi16(top, bot);
- _mm_storeu_si128(pred_buf_m128i, sum);
- if (width == 32) {
- __m128i top_1 = _mm_loadu_si128(((__m128i *)input) + 1);
- __m128i bot_1 =
- _mm_loadu_si128(((__m128i *)(input + input_stride)) + 1);
- top_1 = _mm_maddubs_epi16(top_1, twos);
- bot_1 = _mm_maddubs_epi16(bot_1, twos);
- __m128i sum_1 = _mm_add_epi16(top_1, bot_1);
- _mm_storeu_si128(pred_buf_m128i + 1, sum_1);
- }
- }
- input += luma_stride;
- pred_buf_m128i += CFL_BUF_LINE_I128;
- } while (pred_buf_m128i < end);
-}
-
-/**
- * Adds 2 pixels (in a 2x1 grid) and multiplies them by 4. Resulting in a more
- * precise version of a box filter 4:2:2 pixel subsampling in Q3.
- *
- * The CfL prediction buffer is always of size CFL_BUF_SQUARE. However, the
- * active area is specified using width and height.
- *
- * Note: We don't need to worry about going over the active area, as long as we
- * stay inside the CfL prediction buffer.
- */
-static INLINE void cfl_luma_subsampling_422_lbd_ssse3(const uint8_t *input,
- int input_stride,
- uint16_t *pred_buf_q3,
- int width, int height) {
- const __m128i fours = _mm_set1_epi8(4);
- __m128i *pred_buf_m128i = (__m128i *)pred_buf_q3;
- const __m128i *end = pred_buf_m128i + height * CFL_BUF_LINE_I128;
- do {
- if (width == 4) {
- __m128i top = _mm_loadh_epi32((__m128i *)input);
- top = _mm_maddubs_epi16(top, fours);
- _mm_storeh_epi32(pred_buf_m128i, top);
- } else if (width == 8) {
- __m128i top = _mm_loadl_epi64((__m128i *)input);
- top = _mm_maddubs_epi16(top, fours);
- _mm_storel_epi64(pred_buf_m128i, top);
- } else {
- __m128i top = _mm_loadu_si128((__m128i *)input);
- top = _mm_maddubs_epi16(top, fours);
- _mm_storeu_si128(pred_buf_m128i, top);
- if (width == 32) {
- __m128i top_1 = _mm_loadu_si128(((__m128i *)input) + 1);
- top_1 = _mm_maddubs_epi16(top_1, fours);
- _mm_storeu_si128(pred_buf_m128i + 1, top_1);
- }
- }
- input += input_stride;
- pred_buf_m128i += CFL_BUF_LINE_I128;
- } while (pred_buf_m128i < end);
-}
-
-/**
- * Multiplies the pixels by 8 (scaling in Q3).
- *
- * The CfL prediction buffer is always of size CFL_BUF_SQUARE. However, the
- * active area is specified using width and height.
- *
- * Note: We don't need to worry about going over the active area, as long as we
- * stay inside the CfL prediction buffer.
- */
-static INLINE void cfl_luma_subsampling_444_lbd_ssse3(const uint8_t *input,
- int input_stride,
- uint16_t *pred_buf_q3,
- int width, int height) {
- const __m128i zeros = _mm_setzero_si128();
- const int luma_stride = input_stride;
- __m128i *pred_buf_m128i = (__m128i *)pred_buf_q3;
- const __m128i *end = pred_buf_m128i + height * CFL_BUF_LINE_I128;
- do {
- if (width == 4) {
- __m128i row = _mm_loadh_epi32((__m128i *)input);
- row = _mm_unpacklo_epi8(row, zeros);
- _mm_storel_epi64(pred_buf_m128i, _mm_slli_epi16(row, 3));
- } else if (width == 8) {
- __m128i row = _mm_loadl_epi64((__m128i *)input);
- row = _mm_unpacklo_epi8(row, zeros);
- _mm_storeu_si128(pred_buf_m128i, _mm_slli_epi16(row, 3));
- } else {
- __m128i row = _mm_loadu_si128((__m128i *)input);
- const __m128i row_lo = _mm_unpacklo_epi8(row, zeros);
- const __m128i row_hi = _mm_unpackhi_epi8(row, zeros);
- _mm_storeu_si128(pred_buf_m128i, _mm_slli_epi16(row_lo, 3));
- _mm_storeu_si128(pred_buf_m128i + 1, _mm_slli_epi16(row_hi, 3));
- if (width == 32) {
- __m128i row_1 = _mm_loadu_si128(((__m128i *)input) + 1);
- const __m128i row_1_lo = _mm_unpacklo_epi8(row_1, zeros);
- const __m128i row_1_hi = _mm_unpackhi_epi8(row_1, zeros);
- _mm_storeu_si128(pred_buf_m128i + 2, _mm_slli_epi16(row_1_lo, 3));
- _mm_storeu_si128(pred_buf_m128i + 3, _mm_slli_epi16(row_1_hi, 3));
- }
- }
- input += luma_stride;
- pred_buf_m128i += CFL_BUF_LINE_I128;
- } while (pred_buf_m128i < end);
-}
-
-/**
- * Adds 4 pixels (in a 2x2 grid) and multiplies them by 2. Resulting in a more
- * precise version of a box filter 4:2:0 pixel subsampling in Q3.
- *
- * The CfL prediction buffer is always of size CFL_BUF_SQUARE. However, the
- * active area is specified using width and height.
- *
- * Note: We don't need to worry about going over the active area, as long as we
- * stay inside the CfL prediction buffer.
- */
-static INLINE void cfl_luma_subsampling_420_hbd_ssse3(const uint16_t *input,
- int input_stride,
- uint16_t *pred_buf_q3,
- int width, int height) {
- const uint16_t *end = pred_buf_q3 + (height >> 1) * CFL_BUF_LINE;
- const int luma_stride = input_stride << 1;
- do {
- if (width == 4) {
- const __m128i top = _mm_loadl_epi64((__m128i *)input);
- const __m128i bot = _mm_loadl_epi64((__m128i *)(input + input_stride));
- __m128i sum = _mm_add_epi16(top, bot);
- sum = _mm_hadd_epi16(sum, sum);
- *((int *)pred_buf_q3) = _mm_cvtsi128_si32(_mm_add_epi16(sum, sum));
- } else {
- const __m128i top = _mm_loadu_si128((__m128i *)input);
- const __m128i bot = _mm_loadu_si128((__m128i *)(input + input_stride));
- __m128i sum = _mm_add_epi16(top, bot);
- if (width == 8) {
- sum = _mm_hadd_epi16(sum, sum);
- _mm_storel_epi64((__m128i *)pred_buf_q3, _mm_add_epi16(sum, sum));
- } else {
- const __m128i top_1 = _mm_loadu_si128(((__m128i *)input) + 1);
- const __m128i bot_1 =
- _mm_loadu_si128(((__m128i *)(input + input_stride)) + 1);
- sum = _mm_hadd_epi16(sum, _mm_add_epi16(top_1, bot_1));
- _mm_storeu_si128((__m128i *)pred_buf_q3, _mm_add_epi16(sum, sum));
- if (width == 32) {
- const __m128i top_2 = _mm_loadu_si128(((__m128i *)input) + 2);
- const __m128i bot_2 =
- _mm_loadu_si128(((__m128i *)(input + input_stride)) + 2);
- const __m128i top_3 = _mm_loadu_si128(((__m128i *)input) + 3);
- const __m128i bot_3 =
- _mm_loadu_si128(((__m128i *)(input + input_stride)) + 3);
- const __m128i sum_2 = _mm_add_epi16(top_2, bot_2);
- const __m128i sum_3 = _mm_add_epi16(top_3, bot_3);
- __m128i next_sum = _mm_hadd_epi16(sum_2, sum_3);
- _mm_storeu_si128(((__m128i *)pred_buf_q3) + 1,
- _mm_add_epi16(next_sum, next_sum));
- }
- }
- }
- input += luma_stride;
- } while ((pred_buf_q3 += CFL_BUF_LINE) < end);
-}
-
-/**
- * Adds 2 pixels (in a 2x1 grid) and multiplies them by 4. Resulting in a more
- * precise version of a box filter 4:2:2 pixel subsampling in Q3.
- *
- * The CfL prediction buffer is always of size CFL_BUF_SQUARE. However, the
- * active area is specified using width and height.
- *
- * Note: We don't need to worry about going over the active area, as long as we
- * stay inside the CfL prediction buffer.
- */
-static INLINE void cfl_luma_subsampling_422_hbd_ssse3(const uint16_t *input,
- int input_stride,
- uint16_t *pred_buf_q3,
- int width, int height) {
- __m128i *pred_buf_m128i = (__m128i *)pred_buf_q3;
- const __m128i *end = pred_buf_m128i + height * CFL_BUF_LINE_I128;
- do {
- if (width == 4) {
- const __m128i top = _mm_loadl_epi64((__m128i *)input);
- const __m128i sum = _mm_slli_epi16(_mm_hadd_epi16(top, top), 2);
- _mm_storeh_epi32(pred_buf_m128i, sum);
- } else {
- const __m128i top = _mm_loadu_si128((__m128i *)input);
- if (width == 8) {
- const __m128i sum = _mm_slli_epi16(_mm_hadd_epi16(top, top), 2);
- _mm_storel_epi64(pred_buf_m128i, sum);
- } else {
- const __m128i top_1 = _mm_loadu_si128(((__m128i *)input) + 1);
- const __m128i sum = _mm_slli_epi16(_mm_hadd_epi16(top, top_1), 2);
- _mm_storeu_si128(pred_buf_m128i, sum);
- if (width == 32) {
- const __m128i top_2 = _mm_loadu_si128(((__m128i *)input) + 2);
- const __m128i top_3 = _mm_loadu_si128(((__m128i *)input) + 3);
- const __m128i sum_1 = _mm_slli_epi16(_mm_hadd_epi16(top_2, top_3), 2);
- _mm_storeu_si128(pred_buf_m128i + 1, sum_1);
- }
- }
- }
- pred_buf_m128i += CFL_BUF_LINE_I128;
- input += input_stride;
- } while (pred_buf_m128i < end);
-}
-
-static INLINE void cfl_luma_subsampling_444_hbd_ssse3(const uint16_t *input,
- int input_stride,
- uint16_t *pred_buf_q3,
- int width, int height) {
- const uint16_t *end = pred_buf_q3 + height * CFL_BUF_LINE;
- do {
- if (width == 4) {
- const __m128i row = _mm_slli_epi16(_mm_loadl_epi64((__m128i *)input), 3);
- _mm_storel_epi64((__m128i *)pred_buf_q3, row);
- } else {
- const __m128i row = _mm_slli_epi16(_mm_loadu_si128((__m128i *)input), 3);
- _mm_storeu_si128((__m128i *)pred_buf_q3, row);
- if (width >= 16) {
- __m128i row_1 = _mm_loadu_si128(((__m128i *)input) + 1);
- row_1 = _mm_slli_epi16(row_1, 3);
- _mm_storeu_si128(((__m128i *)pred_buf_q3) + 1, row_1);
- if (width == 32) {
- __m128i row_2 = _mm_loadu_si128(((__m128i *)input) + 2);
- row_2 = _mm_slli_epi16(row_2, 3);
- _mm_storeu_si128(((__m128i *)pred_buf_q3) + 2, row_2);
- __m128i row_3 = _mm_loadu_si128(((__m128i *)input) + 3);
- row_3 = _mm_slli_epi16(row_3, 3);
- _mm_storeu_si128(((__m128i *)pred_buf_q3) + 3, row_3);
- }
- }
- }
- input += input_stride;
- pred_buf_q3 += CFL_BUF_LINE;
- } while (pred_buf_q3 < end);
-}
-
-CFL_GET_SUBSAMPLE_FUNCTION(ssse3)
-
-static INLINE __m128i predict_unclipped(const __m128i *input, __m128i alpha_q12,
- __m128i alpha_sign, __m128i dc_q0) {
- __m128i ac_q3 = _mm_loadu_si128(input);
- __m128i ac_sign = _mm_sign_epi16(alpha_sign, ac_q3);
- __m128i scaled_luma_q0 = _mm_mulhrs_epi16(_mm_abs_epi16(ac_q3), alpha_q12);
- scaled_luma_q0 = _mm_sign_epi16(scaled_luma_q0, ac_sign);
- return _mm_add_epi16(scaled_luma_q0, dc_q0);
-}
-
-static INLINE void cfl_predict_lbd_ssse3(const int16_t *pred_buf_q3,
- uint8_t *dst, int dst_stride,
- int alpha_q3, int width, int height) {
- const __m128i alpha_sign = _mm_set1_epi16(alpha_q3);
- const __m128i alpha_q12 = _mm_slli_epi16(_mm_abs_epi16(alpha_sign), 9);
- const __m128i dc_q0 = _mm_set1_epi16(*dst);
- __m128i *row = (__m128i *)pred_buf_q3;
- const __m128i *row_end = row + height * CFL_BUF_LINE_I128;
- do {
- __m128i res = predict_unclipped(row, alpha_q12, alpha_sign, dc_q0);
- if (width < 16) {
- res = _mm_packus_epi16(res, res);
- if (width == 4)
- _mm_storeh_epi32((__m128i *)dst, res);
- else
- _mm_storel_epi64((__m128i *)dst, res);
- } else {
- __m128i next = predict_unclipped(row + 1, alpha_q12, alpha_sign, dc_q0);
- res = _mm_packus_epi16(res, next);
- _mm_storeu_si128((__m128i *)dst, res);
- if (width == 32) {
- res = predict_unclipped(row + 2, alpha_q12, alpha_sign, dc_q0);
- next = predict_unclipped(row + 3, alpha_q12, alpha_sign, dc_q0);
- res = _mm_packus_epi16(res, next);
- _mm_storeu_si128((__m128i *)(dst + 16), res);
- }
- }
- dst += dst_stride;
- } while ((row += CFL_BUF_LINE_I128) < row_end);
-}
-
-CFL_PREDICT_FN(ssse3, lbd)
-
-static INLINE __m128i highbd_max_epi16(int bd) {
- const __m128i neg_one = _mm_set1_epi16(-1);
- // (1 << bd) - 1 => -(-1 << bd) -1 => -1 - (-1 << bd) => -1 ^ (-1 << bd)
- return _mm_xor_si128(_mm_slli_epi16(neg_one, bd), neg_one);
-}
-
-static INLINE __m128i highbd_clamp_epi16(__m128i u, __m128i zero, __m128i max) {
- return _mm_max_epi16(_mm_min_epi16(u, max), zero);
-}
-
-static INLINE void cfl_predict_hbd_ssse3(const int16_t *pred_buf_q3,
- uint16_t *dst, int dst_stride,
- int alpha_q3, int bd, int width,
- int height) {
- const __m128i alpha_sign = _mm_set1_epi16(alpha_q3);
- const __m128i alpha_q12 = _mm_slli_epi16(_mm_abs_epi16(alpha_sign), 9);
- const __m128i dc_q0 = _mm_set1_epi16(*dst);
- const __m128i max = highbd_max_epi16(bd);
- const __m128i zeros = _mm_setzero_si128();
- __m128i *row = (__m128i *)pred_buf_q3;
- const __m128i *row_end = row + height * CFL_BUF_LINE_I128;
- do {
- __m128i res = predict_unclipped(row, alpha_q12, alpha_sign, dc_q0);
- res = highbd_clamp_epi16(res, zeros, max);
- if (width == 4) {
- _mm_storel_epi64((__m128i *)dst, res);
- } else {
- _mm_storeu_si128((__m128i *)dst, res);
- }
- if (width >= 16) {
- const __m128i res_1 =
- predict_unclipped(row + 1, alpha_q12, alpha_sign, dc_q0);
- _mm_storeu_si128(((__m128i *)dst) + 1,
- highbd_clamp_epi16(res_1, zeros, max));
- }
- if (width == 32) {
- const __m128i res_2 =
- predict_unclipped(row + 2, alpha_q12, alpha_sign, dc_q0);
- _mm_storeu_si128((__m128i *)(dst + 16),
- highbd_clamp_epi16(res_2, zeros, max));
- const __m128i res_3 =
- predict_unclipped(row + 3, alpha_q12, alpha_sign, dc_q0);
- _mm_storeu_si128((__m128i *)(dst + 24),
- highbd_clamp_epi16(res_3, zeros, max));
- }
- dst += dst_stride;
- } while ((row += CFL_BUF_LINE_I128) < row_end);
-}
-
-CFL_PREDICT_FN(ssse3, hbd)
diff --git a/third_party/aom/av1/common/x86/convolve_2d_avx2.c b/third_party/aom/av1/common/x86/convolve_2d_avx2.c
deleted file mode 100644
index 0acafd044..000000000
--- a/third_party/aom/av1/common/x86/convolve_2d_avx2.c
+++ /dev/null
@@ -1,283 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <immintrin.h>
-
-#include "config/av1_rtcd.h"
-
-#include "aom_dsp/x86/convolve_avx2.h"
-#include "aom_dsp/x86/convolve_common_intrin.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/aom_filter.h"
-#include "aom_dsp/x86/synonyms.h"
-#include "av1/common/convolve.h"
-
-void av1_convolve_2d_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst,
- int dst_stride, int w, int h,
- const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y,
- const int subpel_x_q4, const int subpel_y_q4,
- ConvolveParams *conv_params) {
- const int bd = 8;
-
- DECLARE_ALIGNED(32, int16_t, im_block[(MAX_SB_SIZE + MAX_FILTER_TAP) * 8]);
- int im_h = h + filter_params_y->taps - 1;
- int im_stride = 8;
- int i, j;
- const int fo_vert = filter_params_y->taps / 2 - 1;
- const int fo_horiz = filter_params_x->taps / 2 - 1;
- const uint8_t *const src_ptr = src - fo_vert * src_stride - fo_horiz;
-
- const int bits =
- FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1;
- const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
-
- __m256i filt[4], coeffs_h[4], coeffs_v[4];
-
- assert(conv_params->round_0 > 0);
-
- filt[0] = _mm256_load_si256((__m256i const *)filt_global_avx2);
- filt[1] = _mm256_load_si256((__m256i const *)(filt_global_avx2 + 32));
- filt[2] = _mm256_load_si256((__m256i const *)(filt_global_avx2 + 32 * 2));
- filt[3] = _mm256_load_si256((__m256i const *)(filt_global_avx2 + 32 * 3));
-
- prepare_coeffs_lowbd(filter_params_x, subpel_x_q4, coeffs_h);
- prepare_coeffs(filter_params_y, subpel_y_q4, coeffs_v);
-
- const __m256i round_const_h = _mm256_set1_epi16(
- ((1 << (conv_params->round_0 - 1)) >> 1) + (1 << (bd + FILTER_BITS - 2)));
- const __m128i round_shift_h = _mm_cvtsi32_si128(conv_params->round_0 - 1);
-
- const __m256i sum_round_v = _mm256_set1_epi32(
- (1 << offset_bits) + ((1 << conv_params->round_1) >> 1));
- const __m128i sum_shift_v = _mm_cvtsi32_si128(conv_params->round_1);
-
- const __m256i round_const_v = _mm256_set1_epi32(
- ((1 << bits) >> 1) - (1 << (offset_bits - conv_params->round_1)) -
- ((1 << (offset_bits - conv_params->round_1)) >> 1));
- const __m128i round_shift_v = _mm_cvtsi32_si128(bits);
-
- for (j = 0; j < w; j += 8) {
- for (i = 0; i < im_h; i += 2) {
- __m256i data = _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)&src_ptr[(i * src_stride) + j]));
-
- // Load the next line
- if (i + 1 < im_h)
- data = _mm256_inserti128_si256(
- data,
- _mm_loadu_si128(
- (__m128i *)&src_ptr[(i * src_stride) + j + src_stride]),
- 1);
-
- __m256i res = convolve_lowbd_x(data, coeffs_h, filt);
-
- res =
- _mm256_sra_epi16(_mm256_add_epi16(res, round_const_h), round_shift_h);
-
- _mm256_store_si256((__m256i *)&im_block[i * im_stride], res);
- }
-
- /* Vertical filter */
- {
- __m256i src_0 = _mm256_loadu_si256((__m256i *)(im_block + 0 * im_stride));
- __m256i src_1 = _mm256_loadu_si256((__m256i *)(im_block + 1 * im_stride));
- __m256i src_2 = _mm256_loadu_si256((__m256i *)(im_block + 2 * im_stride));
- __m256i src_3 = _mm256_loadu_si256((__m256i *)(im_block + 3 * im_stride));
- __m256i src_4 = _mm256_loadu_si256((__m256i *)(im_block + 4 * im_stride));
- __m256i src_5 = _mm256_loadu_si256((__m256i *)(im_block + 5 * im_stride));
-
- __m256i s[8];
- s[0] = _mm256_unpacklo_epi16(src_0, src_1);
- s[1] = _mm256_unpacklo_epi16(src_2, src_3);
- s[2] = _mm256_unpacklo_epi16(src_4, src_5);
-
- s[4] = _mm256_unpackhi_epi16(src_0, src_1);
- s[5] = _mm256_unpackhi_epi16(src_2, src_3);
- s[6] = _mm256_unpackhi_epi16(src_4, src_5);
-
- for (i = 0; i < h; i += 2) {
- const int16_t *data = &im_block[i * im_stride];
-
- const __m256i s6 =
- _mm256_loadu_si256((__m256i *)(data + 6 * im_stride));
- const __m256i s7 =
- _mm256_loadu_si256((__m256i *)(data + 7 * im_stride));
-
- s[3] = _mm256_unpacklo_epi16(s6, s7);
- s[7] = _mm256_unpackhi_epi16(s6, s7);
-
- __m256i res_a = convolve(s, coeffs_v);
- __m256i res_b = convolve(s + 4, coeffs_v);
-
- // Combine V round and 2F-H-V round into a single rounding
- res_a =
- _mm256_sra_epi32(_mm256_add_epi32(res_a, sum_round_v), sum_shift_v);
- res_b =
- _mm256_sra_epi32(_mm256_add_epi32(res_b, sum_round_v), sum_shift_v);
-
- const __m256i res_a_round = _mm256_sra_epi32(
- _mm256_add_epi32(res_a, round_const_v), round_shift_v);
- const __m256i res_b_round = _mm256_sra_epi32(
- _mm256_add_epi32(res_b, round_const_v), round_shift_v);
-
- /* rounding code */
- // 16 bit conversion
- const __m256i res_16bit = _mm256_packs_epi32(res_a_round, res_b_round);
- // 8 bit conversion and saturation to uint8
- const __m256i res_8b = _mm256_packus_epi16(res_16bit, res_16bit);
-
- const __m128i res_0 = _mm256_castsi256_si128(res_8b);
- const __m128i res_1 = _mm256_extracti128_si256(res_8b, 1);
-
- // Store values into the destination buffer
- __m128i *const p_0 = (__m128i *)&dst[i * dst_stride + j];
- __m128i *const p_1 = (__m128i *)&dst[i * dst_stride + j + dst_stride];
- if (w - j > 4) {
- _mm_storel_epi64(p_0, res_0);
- _mm_storel_epi64(p_1, res_1);
- } else if (w == 4) {
- xx_storel_32(p_0, res_0);
- xx_storel_32(p_1, res_1);
- } else {
- *(uint16_t *)p_0 = _mm_cvtsi128_si32(res_0);
- *(uint16_t *)p_1 = _mm_cvtsi128_si32(res_1);
- }
-
- s[0] = s[1];
- s[1] = s[2];
- s[2] = s[3];
-
- s[4] = s[5];
- s[5] = s[6];
- s[6] = s[7];
- }
- }
- }
-}
-
-static INLINE void copy_128(const uint8_t *src, uint8_t *dst) {
- __m256i s[4];
- s[0] = _mm256_loadu_si256((__m256i *)(src + 0 * 32));
- s[1] = _mm256_loadu_si256((__m256i *)(src + 1 * 32));
- s[2] = _mm256_loadu_si256((__m256i *)(src + 2 * 32));
- s[3] = _mm256_loadu_si256((__m256i *)(src + 3 * 32));
- _mm256_storeu_si256((__m256i *)(dst + 0 * 32), s[0]);
- _mm256_storeu_si256((__m256i *)(dst + 1 * 32), s[1]);
- _mm256_storeu_si256((__m256i *)(dst + 2 * 32), s[2]);
- _mm256_storeu_si256((__m256i *)(dst + 3 * 32), s[3]);
-}
-
-void av1_convolve_2d_copy_sr_avx2(const uint8_t *src, int src_stride,
- uint8_t *dst, int dst_stride, int w, int h,
- const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y,
- const int subpel_x_q4, const int subpel_y_q4,
- ConvolveParams *conv_params) {
- (void)filter_params_x;
- (void)filter_params_y;
- (void)subpel_x_q4;
- (void)subpel_y_q4;
- (void)conv_params;
-
- if (w >= 16) {
- assert(!((intptr_t)dst % 16));
- assert(!(dst_stride % 16));
- }
-
- if (w == 2) {
- do {
- memcpy(dst, src, 2 * sizeof(*src));
- src += src_stride;
- dst += dst_stride;
- memcpy(dst, src, 2 * sizeof(*src));
- src += src_stride;
- dst += dst_stride;
- h -= 2;
- } while (h);
- } else if (w == 4) {
- do {
- memcpy(dst, src, 4 * sizeof(*src));
- src += src_stride;
- dst += dst_stride;
- memcpy(dst, src, 4 * sizeof(*src));
- src += src_stride;
- dst += dst_stride;
- h -= 2;
- } while (h);
- } else if (w == 8) {
- do {
- __m128i s[2];
- s[0] = _mm_loadl_epi64((__m128i *)src);
- src += src_stride;
- s[1] = _mm_loadl_epi64((__m128i *)src);
- src += src_stride;
- _mm_storel_epi64((__m128i *)dst, s[0]);
- dst += dst_stride;
- _mm_storel_epi64((__m128i *)dst, s[1]);
- dst += dst_stride;
- h -= 2;
- } while (h);
- } else if (w == 16) {
- do {
- __m128i s[2];
- s[0] = _mm_loadu_si128((__m128i *)src);
- src += src_stride;
- s[1] = _mm_loadu_si128((__m128i *)src);
- src += src_stride;
- _mm_store_si128((__m128i *)dst, s[0]);
- dst += dst_stride;
- _mm_store_si128((__m128i *)dst, s[1]);
- dst += dst_stride;
- h -= 2;
- } while (h);
- } else if (w == 32) {
- do {
- __m256i s[2];
- s[0] = _mm256_loadu_si256((__m256i *)src);
- src += src_stride;
- s[1] = _mm256_loadu_si256((__m256i *)src);
- src += src_stride;
- _mm256_storeu_si256((__m256i *)dst, s[0]);
- dst += dst_stride;
- _mm256_storeu_si256((__m256i *)dst, s[1]);
- dst += dst_stride;
- h -= 2;
- } while (h);
- } else if (w == 64) {
- do {
- __m256i s[4];
- s[0] = _mm256_loadu_si256((__m256i *)(src + 0 * 32));
- s[1] = _mm256_loadu_si256((__m256i *)(src + 1 * 32));
- src += src_stride;
- s[2] = _mm256_loadu_si256((__m256i *)(src + 0 * 32));
- s[3] = _mm256_loadu_si256((__m256i *)(src + 1 * 32));
- src += src_stride;
- _mm256_storeu_si256((__m256i *)(dst + 0 * 32), s[0]);
- _mm256_storeu_si256((__m256i *)(dst + 1 * 32), s[1]);
- dst += dst_stride;
- _mm256_storeu_si256((__m256i *)(dst + 0 * 32), s[2]);
- _mm256_storeu_si256((__m256i *)(dst + 1 * 32), s[3]);
- dst += dst_stride;
- h -= 2;
- } while (h);
- } else {
- do {
- copy_128(src, dst);
- src += src_stride;
- dst += dst_stride;
- copy_128(src, dst);
- src += src_stride;
- dst += dst_stride;
- h -= 2;
- } while (h);
- }
-}
diff --git a/third_party/aom/av1/common/x86/convolve_2d_sse2.c b/third_party/aom/av1/common/x86/convolve_2d_sse2.c
deleted file mode 100644
index b1a62a4f6..000000000
--- a/third_party/aom/av1/common/x86/convolve_2d_sse2.c
+++ /dev/null
@@ -1,472 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <emmintrin.h>
-
-#include "config/av1_rtcd.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/aom_filter.h"
-#include "aom_dsp/x86/convolve_sse2.h"
-#include "av1/common/convolve.h"
-
-void av1_convolve_2d_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst,
- int dst_stride, int w, int h,
- const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y,
- const int subpel_x_q4, const int subpel_y_q4,
- ConvolveParams *conv_params) {
- const int bd = 8;
-
- DECLARE_ALIGNED(16, int16_t,
- im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE]);
- int im_h = h + filter_params_y->taps - 1;
- int im_stride = MAX_SB_SIZE;
- int i, j;
- const int fo_vert = filter_params_y->taps / 2 - 1;
- const int fo_horiz = filter_params_x->taps / 2 - 1;
- const uint8_t *const src_ptr = src - fo_vert * src_stride - fo_horiz;
-
- const __m128i zero = _mm_setzero_si128();
- const int bits =
- FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1;
- const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
-
- assert(conv_params->round_0 > 0);
-
- /* Horizontal filter */
- {
- const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
- filter_params_x, subpel_x_q4 & SUBPEL_MASK);
- const __m128i coeffs_x = _mm_loadu_si128((__m128i *)x_filter);
-
- // coeffs 0 1 0 1 2 3 2 3
- const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_x, coeffs_x);
- // coeffs 4 5 4 5 6 7 6 7
- const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_x, coeffs_x);
-
- // coeffs 0 1 0 1 0 1 0 1
- const __m128i coeff_01 = _mm_unpacklo_epi64(tmp_0, tmp_0);
- // coeffs 2 3 2 3 2 3 2 3
- const __m128i coeff_23 = _mm_unpackhi_epi64(tmp_0, tmp_0);
- // coeffs 4 5 4 5 4 5 4 5
- const __m128i coeff_45 = _mm_unpacklo_epi64(tmp_1, tmp_1);
- // coeffs 6 7 6 7 6 7 6 7
- const __m128i coeff_67 = _mm_unpackhi_epi64(tmp_1, tmp_1);
-
- const __m128i round_const = _mm_set1_epi32(
- (1 << (bd + FILTER_BITS - 1)) + ((1 << conv_params->round_0) >> 1));
- const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_0);
-
- for (i = 0; i < im_h; ++i) {
- for (j = 0; j < w; j += 8) {
- const __m128i data =
- _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j]);
-
- // Filter even-index pixels
- const __m128i src_0 = _mm_unpacklo_epi8(data, zero);
- const __m128i res_0 = _mm_madd_epi16(src_0, coeff_01);
- const __m128i src_2 = _mm_unpacklo_epi8(_mm_srli_si128(data, 2), zero);
- const __m128i res_2 = _mm_madd_epi16(src_2, coeff_23);
- const __m128i src_4 = _mm_unpacklo_epi8(_mm_srli_si128(data, 4), zero);
- const __m128i res_4 = _mm_madd_epi16(src_4, coeff_45);
- const __m128i src_6 = _mm_unpacklo_epi8(_mm_srli_si128(data, 6), zero);
- const __m128i res_6 = _mm_madd_epi16(src_6, coeff_67);
-
- __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_4),
- _mm_add_epi32(res_2, res_6));
- res_even =
- _mm_sra_epi32(_mm_add_epi32(res_even, round_const), round_shift);
-
- // Filter odd-index pixels
- const __m128i src_1 = _mm_unpacklo_epi8(_mm_srli_si128(data, 1), zero);
- const __m128i res_1 = _mm_madd_epi16(src_1, coeff_01);
- const __m128i src_3 = _mm_unpacklo_epi8(_mm_srli_si128(data, 3), zero);
- const __m128i res_3 = _mm_madd_epi16(src_3, coeff_23);
- const __m128i src_5 = _mm_unpacklo_epi8(_mm_srli_si128(data, 5), zero);
- const __m128i res_5 = _mm_madd_epi16(src_5, coeff_45);
- const __m128i src_7 = _mm_unpacklo_epi8(_mm_srli_si128(data, 7), zero);
- const __m128i res_7 = _mm_madd_epi16(src_7, coeff_67);
-
- __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_5),
- _mm_add_epi32(res_3, res_7));
- res_odd =
- _mm_sra_epi32(_mm_add_epi32(res_odd, round_const), round_shift);
-
- // Pack in the column order 0, 2, 4, 6, 1, 3, 5, 7
- __m128i res = _mm_packs_epi32(res_even, res_odd);
- _mm_storeu_si128((__m128i *)&im_block[i * im_stride + j], res);
- }
- }
- }
-
- /* Vertical filter */
- {
- const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
- filter_params_y, subpel_y_q4 & SUBPEL_MASK);
- const __m128i coeffs_y = _mm_loadu_si128((__m128i *)y_filter);
-
- // coeffs 0 1 0 1 2 3 2 3
- const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_y, coeffs_y);
- // coeffs 4 5 4 5 6 7 6 7
- const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_y, coeffs_y);
-
- // coeffs 0 1 0 1 0 1 0 1
- const __m128i coeff_01 = _mm_unpacklo_epi64(tmp_0, tmp_0);
- // coeffs 2 3 2 3 2 3 2 3
- const __m128i coeff_23 = _mm_unpackhi_epi64(tmp_0, tmp_0);
- // coeffs 4 5 4 5 4 5 4 5
- const __m128i coeff_45 = _mm_unpacklo_epi64(tmp_1, tmp_1);
- // coeffs 6 7 6 7 6 7 6 7
- const __m128i coeff_67 = _mm_unpackhi_epi64(tmp_1, tmp_1);
-
- const __m128i sum_round =
- _mm_set1_epi32((1 << offset_bits) + ((1 << conv_params->round_1) >> 1));
- const __m128i sum_shift = _mm_cvtsi32_si128(conv_params->round_1);
-
- const __m128i round_const = _mm_set1_epi32(
- ((1 << bits) >> 1) - (1 << (offset_bits - conv_params->round_1)) -
- ((1 << (offset_bits - conv_params->round_1)) >> 1));
- const __m128i round_shift = _mm_cvtsi32_si128(bits);
-
- for (i = 0; i < h; ++i) {
- for (j = 0; j < w; j += 8) {
- // Filter even-index pixels
- const int16_t *data = &im_block[i * im_stride + j];
- const __m128i src_0 =
- _mm_unpacklo_epi16(*(__m128i *)(data + 0 * im_stride),
- *(__m128i *)(data + 1 * im_stride));
- const __m128i src_2 =
- _mm_unpacklo_epi16(*(__m128i *)(data + 2 * im_stride),
- *(__m128i *)(data + 3 * im_stride));
- const __m128i src_4 =
- _mm_unpacklo_epi16(*(__m128i *)(data + 4 * im_stride),
- *(__m128i *)(data + 5 * im_stride));
- const __m128i src_6 =
- _mm_unpacklo_epi16(*(__m128i *)(data + 6 * im_stride),
- *(__m128i *)(data + 7 * im_stride));
-
- const __m128i res_0 = _mm_madd_epi16(src_0, coeff_01);
- const __m128i res_2 = _mm_madd_epi16(src_2, coeff_23);
- const __m128i res_4 = _mm_madd_epi16(src_4, coeff_45);
- const __m128i res_6 = _mm_madd_epi16(src_6, coeff_67);
-
- const __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_2),
- _mm_add_epi32(res_4, res_6));
-
- // Filter odd-index pixels
- const __m128i src_1 =
- _mm_unpackhi_epi16(*(__m128i *)(data + 0 * im_stride),
- *(__m128i *)(data + 1 * im_stride));
- const __m128i src_3 =
- _mm_unpackhi_epi16(*(__m128i *)(data + 2 * im_stride),
- *(__m128i *)(data + 3 * im_stride));
- const __m128i src_5 =
- _mm_unpackhi_epi16(*(__m128i *)(data + 4 * im_stride),
- *(__m128i *)(data + 5 * im_stride));
- const __m128i src_7 =
- _mm_unpackhi_epi16(*(__m128i *)(data + 6 * im_stride),
- *(__m128i *)(data + 7 * im_stride));
-
- const __m128i res_1 = _mm_madd_epi16(src_1, coeff_01);
- const __m128i res_3 = _mm_madd_epi16(src_3, coeff_23);
- const __m128i res_5 = _mm_madd_epi16(src_5, coeff_45);
- const __m128i res_7 = _mm_madd_epi16(src_7, coeff_67);
-
- const __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_3),
- _mm_add_epi32(res_5, res_7));
-
- // Rearrange pixels back into the order 0 ... 7
- const __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd);
- const __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd);
-
- __m128i res_lo_round =
- _mm_sra_epi32(_mm_add_epi32(res_lo, sum_round), sum_shift);
- __m128i res_hi_round =
- _mm_sra_epi32(_mm_add_epi32(res_hi, sum_round), sum_shift);
-
- res_lo_round = _mm_sra_epi32(_mm_add_epi32(res_lo_round, round_const),
- round_shift);
- res_hi_round = _mm_sra_epi32(_mm_add_epi32(res_hi_round, round_const),
- round_shift);
-
- const __m128i res16 = _mm_packs_epi32(res_lo_round, res_hi_round);
- const __m128i res = _mm_packus_epi16(res16, res16);
-
- // Accumulate values into the destination buffer
- __m128i *const p = (__m128i *)&dst[i * dst_stride + j];
-
- if (w == 2) {
- *(uint16_t *)p = _mm_cvtsi128_si32(res);
- } else if (w == 4) {
- *(uint32_t *)p = _mm_cvtsi128_si32(res);
- } else {
- _mm_storel_epi64(p, res);
- }
- }
- }
- }
-}
-
-static INLINE void copy_128(const uint8_t *src, uint8_t *dst) {
- __m128i s[8];
- s[0] = _mm_loadu_si128((__m128i *)(src + 0 * 16));
- s[1] = _mm_loadu_si128((__m128i *)(src + 1 * 16));
- s[2] = _mm_loadu_si128((__m128i *)(src + 2 * 16));
- s[3] = _mm_loadu_si128((__m128i *)(src + 3 * 16));
- s[4] = _mm_loadu_si128((__m128i *)(src + 4 * 16));
- s[5] = _mm_loadu_si128((__m128i *)(src + 5 * 16));
- s[6] = _mm_loadu_si128((__m128i *)(src + 6 * 16));
- s[7] = _mm_loadu_si128((__m128i *)(src + 7 * 16));
- _mm_store_si128((__m128i *)(dst + 0 * 16), s[0]);
- _mm_store_si128((__m128i *)(dst + 1 * 16), s[1]);
- _mm_store_si128((__m128i *)(dst + 2 * 16), s[2]);
- _mm_store_si128((__m128i *)(dst + 3 * 16), s[3]);
- _mm_store_si128((__m128i *)(dst + 4 * 16), s[4]);
- _mm_store_si128((__m128i *)(dst + 5 * 16), s[5]);
- _mm_store_si128((__m128i *)(dst + 6 * 16), s[6]);
- _mm_store_si128((__m128i *)(dst + 7 * 16), s[7]);
-}
-
-void av1_convolve_2d_copy_sr_sse2(const uint8_t *src, int src_stride,
- uint8_t *dst, int dst_stride, int w, int h,
- const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y,
- const int subpel_x_q4, const int subpel_y_q4,
- ConvolveParams *conv_params) {
- (void)filter_params_x;
- (void)filter_params_y;
- (void)subpel_x_q4;
- (void)subpel_y_q4;
- (void)conv_params;
-
- if (w >= 16) {
- assert(!((intptr_t)dst % 16));
- assert(!(dst_stride % 16));
- }
-
- if (w == 2) {
- do {
- memcpy(dst, src, 2 * sizeof(*src));
- src += src_stride;
- dst += dst_stride;
- memcpy(dst, src, 2 * sizeof(*src));
- src += src_stride;
- dst += dst_stride;
- h -= 2;
- } while (h);
- } else if (w == 4) {
- do {
- memcpy(dst, src, 4 * sizeof(*src));
- src += src_stride;
- dst += dst_stride;
- memcpy(dst, src, 4 * sizeof(*src));
- src += src_stride;
- dst += dst_stride;
- h -= 2;
- } while (h);
- } else if (w == 8) {
- do {
- __m128i s[2];
- s[0] = _mm_loadl_epi64((__m128i *)src);
- src += src_stride;
- s[1] = _mm_loadl_epi64((__m128i *)src);
- src += src_stride;
- _mm_storel_epi64((__m128i *)dst, s[0]);
- dst += dst_stride;
- _mm_storel_epi64((__m128i *)dst, s[1]);
- dst += dst_stride;
- h -= 2;
- } while (h);
- } else if (w == 16) {
- do {
- __m128i s[2];
- s[0] = _mm_loadu_si128((__m128i *)src);
- src += src_stride;
- s[1] = _mm_loadu_si128((__m128i *)src);
- src += src_stride;
- _mm_store_si128((__m128i *)dst, s[0]);
- dst += dst_stride;
- _mm_store_si128((__m128i *)dst, s[1]);
- dst += dst_stride;
- h -= 2;
- } while (h);
- } else if (w == 32) {
- do {
- __m128i s[4];
- s[0] = _mm_loadu_si128((__m128i *)(src + 0 * 16));
- s[1] = _mm_loadu_si128((__m128i *)(src + 1 * 16));
- src += src_stride;
- s[2] = _mm_loadu_si128((__m128i *)(src + 0 * 16));
- s[3] = _mm_loadu_si128((__m128i *)(src + 1 * 16));
- src += src_stride;
- _mm_store_si128((__m128i *)(dst + 0 * 16), s[0]);
- _mm_store_si128((__m128i *)(dst + 1 * 16), s[1]);
- dst += dst_stride;
- _mm_store_si128((__m128i *)(dst + 0 * 16), s[2]);
- _mm_store_si128((__m128i *)(dst + 1 * 16), s[3]);
- dst += dst_stride;
- h -= 2;
- } while (h);
- } else if (w == 64) {
- do {
- __m128i s[8];
- s[0] = _mm_loadu_si128((__m128i *)(src + 0 * 16));
- s[1] = _mm_loadu_si128((__m128i *)(src + 1 * 16));
- s[2] = _mm_loadu_si128((__m128i *)(src + 2 * 16));
- s[3] = _mm_loadu_si128((__m128i *)(src + 3 * 16));
- src += src_stride;
- s[4] = _mm_loadu_si128((__m128i *)(src + 0 * 16));
- s[5] = _mm_loadu_si128((__m128i *)(src + 1 * 16));
- s[6] = _mm_loadu_si128((__m128i *)(src + 2 * 16));
- s[7] = _mm_loadu_si128((__m128i *)(src + 3 * 16));
- src += src_stride;
- _mm_store_si128((__m128i *)(dst + 0 * 16), s[0]);
- _mm_store_si128((__m128i *)(dst + 1 * 16), s[1]);
- _mm_store_si128((__m128i *)(dst + 2 * 16), s[2]);
- _mm_store_si128((__m128i *)(dst + 3 * 16), s[3]);
- dst += dst_stride;
- _mm_store_si128((__m128i *)(dst + 0 * 16), s[4]);
- _mm_store_si128((__m128i *)(dst + 1 * 16), s[5]);
- _mm_store_si128((__m128i *)(dst + 2 * 16), s[6]);
- _mm_store_si128((__m128i *)(dst + 3 * 16), s[7]);
- dst += dst_stride;
- h -= 2;
- } while (h);
- } else {
- do {
- copy_128(src, dst);
- src += src_stride;
- dst += dst_stride;
- copy_128(src, dst);
- src += src_stride;
- dst += dst_stride;
- h -= 2;
- } while (h);
- }
-}
-
-void av1_jnt_convolve_2d_copy_sse2(const uint8_t *src, int src_stride,
- uint8_t *dst0, int dst_stride0, int w, int h,
- const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y,
- const int subpel_x_q4, const int subpel_y_q4,
- ConvolveParams *conv_params) {
- const int bd = 8;
- CONV_BUF_TYPE *dst = conv_params->dst;
- int dst_stride = conv_params->dst_stride;
- (void)filter_params_x;
- (void)filter_params_y;
- (void)subpel_x_q4;
- (void)subpel_y_q4;
-
- const int bits =
- FILTER_BITS * 2 - conv_params->round_1 - conv_params->round_0;
- const int do_average = conv_params->do_average;
- const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg;
- const __m128i zero = _mm_setzero_si128();
- const __m128i left_shift = _mm_cvtsi32_si128(bits);
- int i, j;
-
- const int w0 = conv_params->fwd_offset;
- const int w1 = conv_params->bck_offset;
- const __m128i wt0 = _mm_set1_epi16(w0);
- const __m128i wt1 = _mm_set1_epi16(w1);
- const __m128i wt = _mm_unpacklo_epi16(wt0, wt1);
-
- const int offset_0 =
- bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
- const int offset = (1 << offset_0) + (1 << (offset_0 - 1));
- const __m128i offset_const = _mm_set1_epi16(offset);
- const int rounding_shift =
- 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
- const __m128i rounding_const = _mm_set1_epi16((1 << rounding_shift) >> 1);
-
- assert((w % 4) == 0);
-
- if (!(w % 16)) {
- for (i = 0; i < h; ++i) {
- for (j = 0; j < w; j += 16) {
- const __m128i d8 = _mm_loadu_si128((__m128i *)&src[j]);
-
- const __m128i d16_lo = _mm_unpacklo_epi8(d8, zero);
- const __m128i d16_hi = _mm_unpackhi_epi8(d8, zero);
-
- const __m128i res_lo = _mm_sll_epi16(d16_lo, left_shift);
- const __m128i res_unsigned_lo = _mm_add_epi16(res_lo, offset_const);
-
- const __m128i res_hi = _mm_sll_epi16(d16_hi, left_shift);
- const __m128i res_unsigned_hi = _mm_add_epi16(res_hi, offset_const);
-
- if (do_average) {
- const __m128i data_ref_0_lo = _mm_loadu_si128((__m128i *)(&dst[j]));
- const __m128i data_ref_0_hi =
- _mm_loadu_si128((__m128i *)(&dst[j + 8]));
-
- const __m128i comp_avg_res_lo =
- comp_avg(&data_ref_0_lo, &res_unsigned_lo, &wt, use_jnt_comp_avg);
-
- const __m128i round_result_lo = convolve_rounding(
- &comp_avg_res_lo, &offset_const, &rounding_const, rounding_shift);
-
- const __m128i comp_avg_res_hi =
- comp_avg(&data_ref_0_hi, &res_unsigned_hi, &wt, use_jnt_comp_avg);
-
- const __m128i round_result_hi = convolve_rounding(
- &comp_avg_res_hi, &offset_const, &rounding_const, rounding_shift);
-
- const __m128i res_8 =
- _mm_packus_epi16(round_result_lo, round_result_hi);
-
- _mm_store_si128((__m128i *)(&dst0[j]), res_8);
- } else {
- _mm_store_si128((__m128i *)(&dst[j]), res_unsigned_lo);
- _mm_store_si128((__m128i *)(&dst[j + 8]), res_unsigned_hi);
- }
- }
- src += src_stride;
- dst += dst_stride;
- dst0 += dst_stride0;
- }
- } else {
- for (i = 0; i < h; ++i) {
- for (j = 0; j < w; j += 8) {
- const __m128i d8 = _mm_loadl_epi64((__m128i *)&src[j]);
- const __m128i d16_0 = _mm_unpacklo_epi8(d8, zero);
-
- const __m128i res = _mm_sll_epi16(d16_0, left_shift);
- const __m128i res_unsigned = _mm_add_epi16(res, offset_const);
-
- if (do_average) {
- const __m128i data_ref_0 = _mm_loadu_si128((__m128i *)(&dst[j]));
-
- const __m128i comp_avg_res =
- comp_avg(&data_ref_0, &res_unsigned, &wt, use_jnt_comp_avg);
-
- const __m128i round_result = convolve_rounding(
- &comp_avg_res, &offset_const, &rounding_const, rounding_shift);
-
- const __m128i res_8 = _mm_packus_epi16(round_result, round_result);
-
- if (w > 4)
- _mm_storel_epi64((__m128i *)(&dst0[j]), res_8);
- else
- *(uint32_t *)(&dst0[j]) = _mm_cvtsi128_si32(res_8);
- } else {
- _mm_store_si128((__m128i *)(&dst[j]), res_unsigned);
- }
- }
- src += src_stride;
- dst += dst_stride;
- dst0 += dst_stride0;
- }
- }
-}
diff --git a/third_party/aom/av1/common/x86/convolve_avx2.c b/third_party/aom/av1/common/x86/convolve_avx2.c
deleted file mode 100644
index 0e91ea947..000000000
--- a/third_party/aom/av1/common/x86/convolve_avx2.c
+++ /dev/null
@@ -1,277 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <immintrin.h>
-
-#include "config/av1_rtcd.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/x86/convolve_avx2.h"
-#include "aom_dsp/x86/synonyms.h"
-
-void av1_convolve_y_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst,
- int dst_stride, int w, int h,
- const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y,
- const int subpel_x_q4, const int subpel_y_q4,
- ConvolveParams *conv_params) {
- int i, j;
- const int fo_vert = filter_params_y->taps / 2 - 1;
- const uint8_t *const src_ptr = src - fo_vert * src_stride;
-
- // right shift is F-1 because we are already dividing
- // filter co-efficients by 2
- const int right_shift_bits = (FILTER_BITS - 1);
- const __m128i right_shift = _mm_cvtsi32_si128(right_shift_bits);
- const __m256i right_shift_const =
- _mm256_set1_epi16((1 << right_shift_bits) >> 1);
- __m256i coeffs[4], s[8];
-
- assert(conv_params->round_0 <= FILTER_BITS);
- assert(((conv_params->round_0 + conv_params->round_1) <= (FILTER_BITS + 1)) ||
- ((conv_params->round_0 + conv_params->round_1) == (2 * FILTER_BITS)));
-
- prepare_coeffs_lowbd(filter_params_y, subpel_y_q4, coeffs);
-
- (void)filter_params_x;
- (void)subpel_x_q4;
- (void)conv_params;
-
- for (j = 0; j < w; j += 16) {
- const uint8_t *data = &src_ptr[j];
- __m256i src6;
-
- // Load lines a and b. Line a to lower 128, line b to upper 128
- const __m256i src_01a = _mm256_permute2x128_si256(
- _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(data + 0 * src_stride))),
- _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(data + 1 * src_stride))),
- 0x20);
-
- const __m256i src_12a = _mm256_permute2x128_si256(
- _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(data + 1 * src_stride))),
- _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(data + 2 * src_stride))),
- 0x20);
-
- const __m256i src_23a = _mm256_permute2x128_si256(
- _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(data + 2 * src_stride))),
- _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(data + 3 * src_stride))),
- 0x20);
-
- const __m256i src_34a = _mm256_permute2x128_si256(
- _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(data + 3 * src_stride))),
- _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(data + 4 * src_stride))),
- 0x20);
-
- const __m256i src_45a = _mm256_permute2x128_si256(
- _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(data + 4 * src_stride))),
- _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(data + 5 * src_stride))),
- 0x20);
-
- src6 = _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(data + 6 * src_stride)));
- const __m256i src_56a = _mm256_permute2x128_si256(
- _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(data + 5 * src_stride))),
- src6, 0x20);
-
- s[0] = _mm256_unpacklo_epi8(src_01a, src_12a);
- s[1] = _mm256_unpacklo_epi8(src_23a, src_34a);
- s[2] = _mm256_unpacklo_epi8(src_45a, src_56a);
-
- s[4] = _mm256_unpackhi_epi8(src_01a, src_12a);
- s[5] = _mm256_unpackhi_epi8(src_23a, src_34a);
- s[6] = _mm256_unpackhi_epi8(src_45a, src_56a);
-
- for (i = 0; i < h; i += 2) {
- data = &src_ptr[i * src_stride + j];
- const __m256i src_67a = _mm256_permute2x128_si256(
- src6,
- _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(data + 7 * src_stride))),
- 0x20);
-
- src6 = _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(data + 8 * src_stride)));
- const __m256i src_78a = _mm256_permute2x128_si256(
- _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(data + 7 * src_stride))),
- src6, 0x20);
-
- s[3] = _mm256_unpacklo_epi8(src_67a, src_78a);
- s[7] = _mm256_unpackhi_epi8(src_67a, src_78a);
-
- const __m256i res_lo = convolve_lowbd(s, coeffs);
-
- /* rounding code */
- // shift by F - 1
- const __m256i res_16b_lo = _mm256_sra_epi16(
- _mm256_add_epi16(res_lo, right_shift_const), right_shift);
- // 8 bit conversion and saturation to uint8
- __m256i res_8b_lo = _mm256_packus_epi16(res_16b_lo, res_16b_lo);
-
- if (w - j > 8) {
- const __m256i res_hi = convolve_lowbd(s + 4, coeffs);
-
- /* rounding code */
- // shift by F - 1
- const __m256i res_16b_hi = _mm256_sra_epi16(
- _mm256_add_epi16(res_hi, right_shift_const), right_shift);
- // 8 bit conversion and saturation to uint8
- __m256i res_8b_hi = _mm256_packus_epi16(res_16b_hi, res_16b_hi);
-
- __m256i res_a = _mm256_unpacklo_epi64(res_8b_lo, res_8b_hi);
-
- const __m128i res_0 = _mm256_castsi256_si128(res_a);
- const __m128i res_1 = _mm256_extracti128_si256(res_a, 1);
-
- _mm_storeu_si128((__m128i *)&dst[i * dst_stride + j], res_0);
- _mm_storeu_si128((__m128i *)&dst[i * dst_stride + j + dst_stride],
- res_1);
- } else {
- const __m128i res_0 = _mm256_castsi256_si128(res_8b_lo);
- const __m128i res_1 = _mm256_extracti128_si256(res_8b_lo, 1);
- if (w - j > 4) {
- _mm_storel_epi64((__m128i *)&dst[i * dst_stride + j], res_0);
- _mm_storel_epi64((__m128i *)&dst[i * dst_stride + j + dst_stride],
- res_1);
- } else if (w - j > 2) {
- xx_storel_32(&dst[i * dst_stride + j], res_0);
- xx_storel_32(&dst[i * dst_stride + j + dst_stride], res_1);
- } else {
- __m128i *const p_0 = (__m128i *)&dst[i * dst_stride + j];
- __m128i *const p_1 = (__m128i *)&dst[i * dst_stride + j + dst_stride];
- *(uint16_t *)p_0 = _mm_cvtsi128_si32(res_0);
- *(uint16_t *)p_1 = _mm_cvtsi128_si32(res_1);
- }
- }
-
- s[0] = s[1];
- s[1] = s[2];
- s[2] = s[3];
-
- s[4] = s[5];
- s[5] = s[6];
- s[6] = s[7];
- }
- }
-}
-
-void av1_convolve_x_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst,
- int dst_stride, int w, int h,
- const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y,
- const int subpel_x_q4, const int subpel_y_q4,
- ConvolveParams *conv_params) {
- int i, j;
- const int fo_horiz = filter_params_x->taps / 2 - 1;
- const uint8_t *const src_ptr = src - fo_horiz;
- const int bits = FILTER_BITS - conv_params->round_0;
-
- __m256i filt[4], coeffs[4];
-
- filt[0] = _mm256_load_si256((__m256i const *)filt_global_avx2);
- filt[1] = _mm256_load_si256((__m256i const *)(filt_global_avx2 + 32));
- filt[2] = _mm256_load_si256((__m256i const *)(filt_global_avx2 + 32 * 2));
- filt[3] = _mm256_load_si256((__m256i const *)(filt_global_avx2 + 32 * 3));
-
- prepare_coeffs_lowbd(filter_params_x, subpel_x_q4, coeffs);
-
- const __m256i round_0_const =
- _mm256_set1_epi16((1 << (conv_params->round_0 - 1)) >> 1);
- const __m128i round_0_shift = _mm_cvtsi32_si128(conv_params->round_0 - 1);
- const __m256i round_const = _mm256_set1_epi16((1 << bits) >> 1);
- const __m128i round_shift = _mm_cvtsi32_si128(bits);
-
- (void)filter_params_y;
- (void)subpel_y_q4;
-
- assert(bits >= 0);
- assert((FILTER_BITS - conv_params->round_1) >= 0 ||
- ((conv_params->round_0 + conv_params->round_1) == 2 * FILTER_BITS));
- assert(conv_params->round_0 > 0);
-
- if (w <= 8) {
- for (i = 0; i < h; i += 2) {
- const __m256i data = _mm256_permute2x128_si256(
- _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(&src_ptr[i * src_stride]))),
- _mm256_castsi128_si256(_mm_loadu_si128(
- (__m128i *)(&src_ptr[i * src_stride + src_stride]))),
- 0x20);
-
- __m256i res_16b = convolve_lowbd_x(data, coeffs, filt);
-
- res_16b = _mm256_sra_epi16(_mm256_add_epi16(res_16b, round_0_const),
- round_0_shift);
-
- res_16b =
- _mm256_sra_epi16(_mm256_add_epi16(res_16b, round_const), round_shift);
-
- /* rounding code */
- // 8 bit conversion and saturation to uint8
- __m256i res_8b = _mm256_packus_epi16(res_16b, res_16b);
-
- const __m128i res_0 = _mm256_castsi256_si128(res_8b);
- const __m128i res_1 = _mm256_extracti128_si256(res_8b, 1);
- if (w > 4) {
- _mm_storel_epi64((__m128i *)&dst[i * dst_stride], res_0);
- _mm_storel_epi64((__m128i *)&dst[i * dst_stride + dst_stride], res_1);
- } else if (w > 2) {
- xx_storel_32(&dst[i * dst_stride], res_0);
- xx_storel_32(&dst[i * dst_stride + dst_stride], res_1);
- } else {
- __m128i *const p_0 = (__m128i *)&dst[i * dst_stride];
- __m128i *const p_1 = (__m128i *)&dst[i * dst_stride + dst_stride];
- *(uint16_t *)p_0 = _mm_cvtsi128_si32(res_0);
- *(uint16_t *)p_1 = _mm_cvtsi128_si32(res_1);
- }
- }
- } else {
- for (i = 0; i < h; ++i) {
- for (j = 0; j < w; j += 16) {
- // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 8 9 10 11 12 13 14 15 16 17 18
- // 19 20 21 22 23
- const __m256i data = _mm256_inserti128_si256(
- _mm256_loadu_si256((__m256i *)&src_ptr[(i * src_stride) + j]),
- _mm_loadu_si128((__m128i *)&src_ptr[(i * src_stride) + (j + 8)]),
- 1);
-
- __m256i res_16b = convolve_lowbd_x(data, coeffs, filt);
-
- res_16b = _mm256_sra_epi16(_mm256_add_epi16(res_16b, round_0_const),
- round_0_shift);
-
- res_16b = _mm256_sra_epi16(_mm256_add_epi16(res_16b, round_const),
- round_shift);
-
- /* rounding code */
- // 8 bit conversion and saturation to uint8
- __m256i res_8b = _mm256_packus_epi16(res_16b, res_16b);
-
- // Store values into the destination buffer
- // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
- res_8b = _mm256_permute4x64_epi64(res_8b, 216);
- __m128i res = _mm256_castsi256_si128(res_8b);
- _mm_storeu_si128((__m128i *)&dst[i * dst_stride + j], res);
- }
- }
- }
-}
diff --git a/third_party/aom/av1/common/x86/convolve_sse2.c b/third_party/aom/av1/common/x86/convolve_sse2.c
deleted file mode 100644
index 5016642de..000000000
--- a/third_party/aom/av1/common/x86/convolve_sse2.c
+++ /dev/null
@@ -1,338 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <emmintrin.h>
-
-#include "config/av1_rtcd.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/aom_filter.h"
-#include "aom_dsp/x86/convolve_common_intrin.h"
-#include "av1/common/convolve.h"
-
-static INLINE void prepare_coeffs(const InterpFilterParams *const filter_params,
- const int subpel_q4,
- __m128i *const coeffs /* [4] */) {
- const int16_t *const y_filter = av1_get_interp_filter_subpel_kernel(
- filter_params, subpel_q4 & SUBPEL_MASK);
- const __m128i coeffs_y = _mm_loadu_si128((__m128i *)y_filter);
- // coeffs 0 1 0 1 2 3 2 3
- const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_y, coeffs_y);
- // coeffs 4 5 4 5 6 7 6 7
- const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_y, coeffs_y);
-
- coeffs[0] = _mm_unpacklo_epi64(tmp_0, tmp_0); // coeffs 0 1 0 1 0 1 0 1
- coeffs[1] = _mm_unpackhi_epi64(tmp_0, tmp_0); // coeffs 2 3 2 3 2 3 2 3
- coeffs[2] = _mm_unpacklo_epi64(tmp_1, tmp_1); // coeffs 4 5 4 5 4 5 4 5
- coeffs[3] = _mm_unpackhi_epi64(tmp_1, tmp_1); // coeffs 6 7 6 7 6 7 6 7
-}
-
-static INLINE __m128i convolve(const __m128i *const s,
- const __m128i *const coeffs) {
- const __m128i d0 = _mm_madd_epi16(s[0], coeffs[0]);
- const __m128i d1 = _mm_madd_epi16(s[1], coeffs[1]);
- const __m128i d2 = _mm_madd_epi16(s[2], coeffs[2]);
- const __m128i d3 = _mm_madd_epi16(s[3], coeffs[3]);
- const __m128i d = _mm_add_epi32(_mm_add_epi32(d0, d1), _mm_add_epi32(d2, d3));
- return d;
-}
-
-static INLINE __m128i convolve_lo_x(const __m128i *const s,
- const __m128i *const coeffs) {
- __m128i ss[4];
- ss[0] = _mm_unpacklo_epi8(s[0], _mm_setzero_si128());
- ss[1] = _mm_unpacklo_epi8(s[1], _mm_setzero_si128());
- ss[2] = _mm_unpacklo_epi8(s[2], _mm_setzero_si128());
- ss[3] = _mm_unpacklo_epi8(s[3], _mm_setzero_si128());
- return convolve(ss, coeffs);
-}
-
-static INLINE __m128i convolve_lo_y(const __m128i *const s,
- const __m128i *const coeffs) {
- __m128i ss[4];
- ss[0] = _mm_unpacklo_epi8(s[0], _mm_setzero_si128());
- ss[1] = _mm_unpacklo_epi8(s[2], _mm_setzero_si128());
- ss[2] = _mm_unpacklo_epi8(s[4], _mm_setzero_si128());
- ss[3] = _mm_unpacklo_epi8(s[6], _mm_setzero_si128());
- return convolve(ss, coeffs);
-}
-
-static INLINE __m128i convolve_hi_y(const __m128i *const s,
- const __m128i *const coeffs) {
- __m128i ss[4];
- ss[0] = _mm_unpackhi_epi8(s[0], _mm_setzero_si128());
- ss[1] = _mm_unpackhi_epi8(s[2], _mm_setzero_si128());
- ss[2] = _mm_unpackhi_epi8(s[4], _mm_setzero_si128());
- ss[3] = _mm_unpackhi_epi8(s[6], _mm_setzero_si128());
- return convolve(ss, coeffs);
-}
-
-void av1_convolve_y_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst,
- int dst_stride, int w, int h,
- const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y,
- const int subpel_x_q4, const int subpel_y_q4,
- ConvolveParams *conv_params) {
- const int fo_vert = filter_params_y->taps / 2 - 1;
- const uint8_t *src_ptr = src - fo_vert * src_stride;
- const __m128i round_const = _mm_set1_epi32((1 << FILTER_BITS) >> 1);
- const __m128i round_shift = _mm_cvtsi32_si128(FILTER_BITS);
- __m128i coeffs[4];
-
- (void)filter_params_x;
- (void)subpel_x_q4;
- (void)conv_params;
-
- assert(conv_params->round_0 <= FILTER_BITS);
- assert(((conv_params->round_0 + conv_params->round_1) <= (FILTER_BITS + 1)) ||
- ((conv_params->round_0 + conv_params->round_1) == (2 * FILTER_BITS)));
-
- prepare_coeffs(filter_params_y, subpel_y_q4, coeffs);
-
- if (w <= 4) {
- __m128i s[8], src6, res, res_round, res16;
- uint32_t res_int;
- src6 = _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 6 * src_stride));
- s[0] = _mm_unpacklo_epi8(
- _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 0 * src_stride)),
- _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 1 * src_stride)));
- s[1] = _mm_unpacklo_epi8(
- _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 1 * src_stride)),
- _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 2 * src_stride)));
- s[2] = _mm_unpacklo_epi8(
- _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 2 * src_stride)),
- _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 3 * src_stride)));
- s[3] = _mm_unpacklo_epi8(
- _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 3 * src_stride)),
- _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 4 * src_stride)));
- s[4] = _mm_unpacklo_epi8(
- _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 4 * src_stride)),
- _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 5 * src_stride)));
- s[5] = _mm_unpacklo_epi8(
- _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 5 * src_stride)), src6);
-
- do {
- s[6] = _mm_unpacklo_epi8(
- src6, _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 7 * src_stride)));
- src6 = _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 8 * src_stride));
- s[7] = _mm_unpacklo_epi8(
- _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 7 * src_stride)), src6);
-
- res = convolve_lo_y(s + 0, coeffs);
- res_round = _mm_sra_epi32(_mm_add_epi32(res, round_const), round_shift);
- res16 = _mm_packs_epi32(res_round, res_round);
- res_int = _mm_cvtsi128_si32(_mm_packus_epi16(res16, res16));
-
- if (w == 2)
- *(uint16_t *)dst = res_int;
- else
- *(uint32_t *)dst = res_int;
-
- src_ptr += src_stride;
- dst += dst_stride;
-
- res = convolve_lo_y(s + 1, coeffs);
- res_round = _mm_sra_epi32(_mm_add_epi32(res, round_const), round_shift);
- res16 = _mm_packs_epi32(res_round, res_round);
- res_int = _mm_cvtsi128_si32(_mm_packus_epi16(res16, res16));
-
- if (w == 2)
- *(uint16_t *)dst = res_int;
- else
- *(uint32_t *)dst = res_int;
-
- src_ptr += src_stride;
- dst += dst_stride;
-
- s[0] = s[2];
- s[1] = s[3];
- s[2] = s[4];
- s[3] = s[5];
- s[4] = s[6];
- s[5] = s[7];
- h -= 2;
- } while (h);
- } else {
- assert(!(w % 8));
- int j = 0;
- do {
- __m128i s[8], src6, res_lo, res_hi;
- __m128i res_lo_round, res_hi_round, res16, res;
- const uint8_t *data = &src_ptr[j];
-
- src6 = _mm_loadl_epi64((__m128i *)(data + 6 * src_stride));
- s[0] = _mm_unpacklo_epi8(
- _mm_loadl_epi64((__m128i *)(data + 0 * src_stride)),
- _mm_loadl_epi64((__m128i *)(data + 1 * src_stride)));
- s[1] = _mm_unpacklo_epi8(
- _mm_loadl_epi64((__m128i *)(data + 1 * src_stride)),
- _mm_loadl_epi64((__m128i *)(data + 2 * src_stride)));
- s[2] = _mm_unpacklo_epi8(
- _mm_loadl_epi64((__m128i *)(data + 2 * src_stride)),
- _mm_loadl_epi64((__m128i *)(data + 3 * src_stride)));
- s[3] = _mm_unpacklo_epi8(
- _mm_loadl_epi64((__m128i *)(data + 3 * src_stride)),
- _mm_loadl_epi64((__m128i *)(data + 4 * src_stride)));
- s[4] = _mm_unpacklo_epi8(
- _mm_loadl_epi64((__m128i *)(data + 4 * src_stride)),
- _mm_loadl_epi64((__m128i *)(data + 5 * src_stride)));
- s[5] = _mm_unpacklo_epi8(
- _mm_loadl_epi64((__m128i *)(data + 5 * src_stride)), src6);
-
- int i = 0;
- do {
- data = &src_ptr[i * src_stride + j];
- s[6] = _mm_unpacklo_epi8(
- src6, _mm_loadl_epi64((__m128i *)(data + 7 * src_stride)));
- src6 = _mm_loadl_epi64((__m128i *)(data + 8 * src_stride));
- s[7] = _mm_unpacklo_epi8(
- _mm_loadl_epi64((__m128i *)(data + 7 * src_stride)), src6);
-
- res_lo = convolve_lo_y(s, coeffs); // Filter low index pixels
- res_hi = convolve_hi_y(s, coeffs); // Filter high index pixels
-
- res_lo_round =
- _mm_sra_epi32(_mm_add_epi32(res_lo, round_const), round_shift);
- res_hi_round =
- _mm_sra_epi32(_mm_add_epi32(res_hi, round_const), round_shift);
-
- res16 = _mm_packs_epi32(res_lo_round, res_hi_round);
- res = _mm_packus_epi16(res16, res16);
-
- _mm_storel_epi64((__m128i *)(dst + i * dst_stride + j), res);
- i++;
-
- res_lo = convolve_lo_y(s + 1, coeffs); // Filter low index pixels
- res_hi = convolve_hi_y(s + 1, coeffs); // Filter high index pixels
-
- res_lo_round =
- _mm_sra_epi32(_mm_add_epi32(res_lo, round_const), round_shift);
- res_hi_round =
- _mm_sra_epi32(_mm_add_epi32(res_hi, round_const), round_shift);
-
- res16 = _mm_packs_epi32(res_lo_round, res_hi_round);
- res = _mm_packus_epi16(res16, res16);
-
- _mm_storel_epi64((__m128i *)(dst + i * dst_stride + j), res);
- i++;
-
- s[0] = s[2];
- s[1] = s[3];
- s[2] = s[4];
- s[3] = s[5];
- s[4] = s[6];
- s[5] = s[7];
- } while (i < h);
- j += 8;
- } while (j < w);
- }
-}
-
-void av1_convolve_x_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst,
- int dst_stride, int w, int h,
- const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y,
- const int subpel_x_q4, const int subpel_y_q4,
- ConvolveParams *conv_params) {
- const int fo_horiz = filter_params_x->taps / 2 - 1;
- const uint8_t *src_ptr = src - fo_horiz;
- const int bits = FILTER_BITS - conv_params->round_0;
- const __m128i round_0_const =
- _mm_set1_epi32((1 << conv_params->round_0) >> 1);
- const __m128i round_const = _mm_set1_epi32((1 << bits) >> 1);
- const __m128i round_0_shift = _mm_cvtsi32_si128(conv_params->round_0);
- const __m128i round_shift = _mm_cvtsi32_si128(bits);
- __m128i coeffs[4];
-
- (void)filter_params_y;
- (void)subpel_y_q4;
-
- assert(bits >= 0);
- assert((FILTER_BITS - conv_params->round_1) >= 0 ||
- ((conv_params->round_0 + conv_params->round_1) == 2 * FILTER_BITS));
-
- prepare_coeffs(filter_params_x, subpel_x_q4, coeffs);
-
- if (w <= 4) {
- do {
- const __m128i data = _mm_loadu_si128((__m128i *)src_ptr);
- __m128i s[4];
-
- s[0] = _mm_unpacklo_epi8(data, _mm_srli_si128(data, 1));
- s[1] =
- _mm_unpacklo_epi8(_mm_srli_si128(data, 2), _mm_srli_si128(data, 3));
- s[2] =
- _mm_unpacklo_epi8(_mm_srli_si128(data, 4), _mm_srli_si128(data, 5));
- s[3] =
- _mm_unpacklo_epi8(_mm_srli_si128(data, 6), _mm_srli_si128(data, 7));
- const __m128i res_lo = convolve_lo_x(s, coeffs);
- __m128i res_lo_round =
- _mm_sra_epi32(_mm_add_epi32(res_lo, round_0_const), round_0_shift);
- res_lo_round =
- _mm_sra_epi32(_mm_add_epi32(res_lo_round, round_const), round_shift);
-
- const __m128i res16 = _mm_packs_epi32(res_lo_round, res_lo_round);
- const __m128i res = _mm_packus_epi16(res16, res16);
-
- uint32_t r = _mm_cvtsi128_si32(res);
- if (w == 2)
- *(uint16_t *)dst = r;
- else
- *(uint32_t *)dst = r;
-
- src_ptr += src_stride;
- dst += dst_stride;
- } while (--h);
- } else {
- assert(!(w % 8));
- int i = 0;
- do {
- int j = 0;
- do {
- const __m128i data =
- _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j]);
- __m128i s[4];
-
- // Filter even-index pixels
- s[0] = data;
- s[1] = _mm_srli_si128(data, 2);
- s[2] = _mm_srli_si128(data, 4);
- s[3] = _mm_srli_si128(data, 6);
- const __m128i res_even = convolve_lo_x(s, coeffs);
-
- // Filter odd-index pixels
- s[0] = _mm_srli_si128(data, 1);
- s[1] = _mm_srli_si128(data, 3);
- s[2] = _mm_srli_si128(data, 5);
- s[3] = _mm_srli_si128(data, 7);
- const __m128i res_odd = convolve_lo_x(s, coeffs);
-
- // Rearrange pixels back into the order 0 ... 7
- const __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd);
- const __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd);
- __m128i res_lo_round =
- _mm_sra_epi32(_mm_add_epi32(res_lo, round_0_const), round_0_shift);
- res_lo_round = _mm_sra_epi32(_mm_add_epi32(res_lo_round, round_const),
- round_shift);
- __m128i res_hi_round =
- _mm_sra_epi32(_mm_add_epi32(res_hi, round_0_const), round_0_shift);
- res_hi_round = _mm_sra_epi32(_mm_add_epi32(res_hi_round, round_const),
- round_shift);
-
- const __m128i res16 = _mm_packs_epi32(res_lo_round, res_hi_round);
- const __m128i res = _mm_packus_epi16(res16, res16);
-
- _mm_storel_epi64((__m128i *)(dst + i * dst_stride + j), res);
- j += 8;
- } while (j < w);
- } while (++i < h);
- }
-}
diff --git a/third_party/aom/av1/common/x86/filterintra_sse4.c b/third_party/aom/av1/common/x86/filterintra_sse4.c
deleted file mode 100644
index c11edc1d4..000000000
--- a/third_party/aom/av1/common/x86/filterintra_sse4.c
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <smmintrin.h>
-
-#include "config/av1_rtcd.h"
-
-#include "aom_dsp/x86/synonyms.h"
-#include "av1/common/enums.h"
-#include "av1/common/reconintra.h"
-
-void av1_filter_intra_predictor_sse4_1(uint8_t *dst, ptrdiff_t stride,
- TX_SIZE tx_size, const uint8_t *above,
- const uint8_t *left, int mode) {
- int r, c;
- uint8_t buffer[33][33];
- const int bw = tx_size_wide[tx_size];
- const int bh = tx_size_high[tx_size];
-
- assert(bw <= 32 && bh <= 32);
-
- // The initialization is just for silencing Jenkins static analysis warnings
- for (r = 0; r < bh + 1; ++r)
- memset(buffer[r], 0, (bw + 1) * sizeof(buffer[0][0]));
-
- for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
- memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(uint8_t));
-
- const __m128i f1f0 = xx_load_128(av1_filter_intra_taps[mode][0]);
- const __m128i f3f2 = xx_load_128(av1_filter_intra_taps[mode][2]);
- const __m128i f5f4 = xx_load_128(av1_filter_intra_taps[mode][4]);
- const __m128i f7f6 = xx_load_128(av1_filter_intra_taps[mode][6]);
- const __m128i filter_intra_scale_bits =
- _mm_set1_epi16(1 << (15 - FILTER_INTRA_SCALE_BITS));
-
- for (r = 1; r < bh + 1; r += 2) {
- for (c = 1; c < bw + 1; c += 4) {
- DECLARE_ALIGNED(16, uint8_t, p[8]);
- memcpy(p, &buffer[r - 1][c - 1], 5 * sizeof(uint8_t));
- p[5] = buffer[r][c - 1];
- p[6] = buffer[r + 1][c - 1];
- p[7] = 0;
- const __m128i p_b = xx_loadl_64(p);
- const __m128i in = _mm_unpacklo_epi64(p_b, p_b);
- const __m128i out_01 = _mm_maddubs_epi16(in, f1f0);
- const __m128i out_23 = _mm_maddubs_epi16(in, f3f2);
- const __m128i out_45 = _mm_maddubs_epi16(in, f5f4);
- const __m128i out_67 = _mm_maddubs_epi16(in, f7f6);
- const __m128i out_0123 = _mm_hadd_epi16(out_01, out_23);
- const __m128i out_4567 = _mm_hadd_epi16(out_45, out_67);
- const __m128i out_01234567 = _mm_hadd_epi16(out_0123, out_4567);
- // Rounding
- const __m128i round_w =
- _mm_mulhrs_epi16(out_01234567, filter_intra_scale_bits);
- const __m128i out_r = _mm_packus_epi16(round_w, round_w);
- const __m128i out_r1 = _mm_srli_si128(out_r, 4);
- // Storing
- xx_storel_32(&buffer[r][c], out_r);
- xx_storel_32(&buffer[r + 1][c], out_r1);
- }
- }
-
- for (r = 0; r < bh; ++r) {
- memcpy(dst, &buffer[r + 1][1], bw * sizeof(uint8_t));
- dst += stride;
- }
-}
diff --git a/third_party/aom/av1/common/x86/highbd_convolve_2d_avx2.c b/third_party/aom/av1/common/x86/highbd_convolve_2d_avx2.c
deleted file mode 100644
index ae68f0bbb..000000000
--- a/third_party/aom/av1/common/x86/highbd_convolve_2d_avx2.c
+++ /dev/null
@@ -1,326 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <immintrin.h>
-#include <assert.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/x86/convolve_avx2.h"
-#include "aom_dsp/x86/synonyms.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/aom_filter.h"
-#include "av1/common/convolve.h"
-
-void av1_highbd_convolve_2d_sr_avx2(const uint16_t *src, int src_stride,
- uint16_t *dst, int dst_stride, int w, int h,
- const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y,
- const int subpel_x_q4,
- const int subpel_y_q4,
- ConvolveParams *conv_params, int bd) {
- DECLARE_ALIGNED(32, int16_t, im_block[(MAX_SB_SIZE + MAX_FILTER_TAP) * 8]);
- int im_h = h + filter_params_y->taps - 1;
- int im_stride = 8;
- int i, j;
- const int fo_vert = filter_params_y->taps / 2 - 1;
- const int fo_horiz = filter_params_x->taps / 2 - 1;
- const uint16_t *const src_ptr = src - fo_vert * src_stride - fo_horiz;
-
- // Check that, even with 12-bit input, the intermediate values will fit
- // into an unsigned 16-bit intermediate array.
- assert(bd + FILTER_BITS + 2 - conv_params->round_0 <= 16);
-
- __m256i s[8], coeffs_y[4], coeffs_x[4];
-
- const __m256i round_const_x = _mm256_set1_epi32(
- ((1 << conv_params->round_0) >> 1) + (1 << (bd + FILTER_BITS - 1)));
- const __m128i round_shift_x = _mm_cvtsi32_si128(conv_params->round_0);
-
- const __m256i round_const_y = _mm256_set1_epi32(
- ((1 << conv_params->round_1) >> 1) -
- (1 << (bd + 2 * FILTER_BITS - conv_params->round_0 - 1)));
- const __m128i round_shift_y = _mm_cvtsi32_si128(conv_params->round_1);
-
- const int bits =
- FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1;
- const __m128i round_shift_bits = _mm_cvtsi32_si128(bits);
- const __m256i round_const_bits = _mm256_set1_epi32((1 << bits) >> 1);
- const __m256i clip_pixel =
- _mm256_set1_epi16(bd == 10 ? 1023 : (bd == 12 ? 4095 : 255));
- const __m256i zero = _mm256_setzero_si256();
-
- prepare_coeffs(filter_params_x, subpel_x_q4, coeffs_x);
- prepare_coeffs(filter_params_y, subpel_y_q4, coeffs_y);
-
- for (j = 0; j < w; j += 8) {
- /* Horizontal filter */
- {
- for (i = 0; i < im_h; i += 2) {
- const __m256i row0 =
- _mm256_loadu_si256((__m256i *)&src_ptr[i * src_stride + j]);
- __m256i row1 = _mm256_set1_epi16(0);
- if (i + 1 < im_h)
- row1 =
- _mm256_loadu_si256((__m256i *)&src_ptr[(i + 1) * src_stride + j]);
-
- const __m256i r0 = _mm256_permute2x128_si256(row0, row1, 0x20);
- const __m256i r1 = _mm256_permute2x128_si256(row0, row1, 0x31);
-
- // even pixels
- s[0] = _mm256_alignr_epi8(r1, r0, 0);
- s[1] = _mm256_alignr_epi8(r1, r0, 4);
- s[2] = _mm256_alignr_epi8(r1, r0, 8);
- s[3] = _mm256_alignr_epi8(r1, r0, 12);
-
- __m256i res_even = convolve(s, coeffs_x);
- res_even = _mm256_sra_epi32(_mm256_add_epi32(res_even, round_const_x),
- round_shift_x);
-
- // odd pixels
- s[0] = _mm256_alignr_epi8(r1, r0, 2);
- s[1] = _mm256_alignr_epi8(r1, r0, 6);
- s[2] = _mm256_alignr_epi8(r1, r0, 10);
- s[3] = _mm256_alignr_epi8(r1, r0, 14);
-
- __m256i res_odd = convolve(s, coeffs_x);
- res_odd = _mm256_sra_epi32(_mm256_add_epi32(res_odd, round_const_x),
- round_shift_x);
-
- __m256i res_even1 = _mm256_packs_epi32(res_even, res_even);
- __m256i res_odd1 = _mm256_packs_epi32(res_odd, res_odd);
- __m256i res = _mm256_unpacklo_epi16(res_even1, res_odd1);
-
- _mm256_store_si256((__m256i *)&im_block[i * im_stride], res);
- }
- }
-
- /* Vertical filter */
- {
- __m256i s0 = _mm256_loadu_si256((__m256i *)(im_block + 0 * im_stride));
- __m256i s1 = _mm256_loadu_si256((__m256i *)(im_block + 1 * im_stride));
- __m256i s2 = _mm256_loadu_si256((__m256i *)(im_block + 2 * im_stride));
- __m256i s3 = _mm256_loadu_si256((__m256i *)(im_block + 3 * im_stride));
- __m256i s4 = _mm256_loadu_si256((__m256i *)(im_block + 4 * im_stride));
- __m256i s5 = _mm256_loadu_si256((__m256i *)(im_block + 5 * im_stride));
-
- s[0] = _mm256_unpacklo_epi16(s0, s1);
- s[1] = _mm256_unpacklo_epi16(s2, s3);
- s[2] = _mm256_unpacklo_epi16(s4, s5);
-
- s[4] = _mm256_unpackhi_epi16(s0, s1);
- s[5] = _mm256_unpackhi_epi16(s2, s3);
- s[6] = _mm256_unpackhi_epi16(s4, s5);
-
- for (i = 0; i < h; i += 2) {
- const int16_t *data = &im_block[i * im_stride];
-
- const __m256i s6 =
- _mm256_loadu_si256((__m256i *)(data + 6 * im_stride));
- const __m256i s7 =
- _mm256_loadu_si256((__m256i *)(data + 7 * im_stride));
-
- s[3] = _mm256_unpacklo_epi16(s6, s7);
- s[7] = _mm256_unpackhi_epi16(s6, s7);
-
- const __m256i res_a = convolve(s, coeffs_y);
- __m256i res_a_round = _mm256_sra_epi32(
- _mm256_add_epi32(res_a, round_const_y), round_shift_y);
-
- res_a_round = _mm256_sra_epi32(
- _mm256_add_epi32(res_a_round, round_const_bits), round_shift_bits);
-
- if (w - j > 4) {
- const __m256i res_b = convolve(s + 4, coeffs_y);
- __m256i res_b_round = _mm256_sra_epi32(
- _mm256_add_epi32(res_b, round_const_y), round_shift_y);
- res_b_round =
- _mm256_sra_epi32(_mm256_add_epi32(res_b_round, round_const_bits),
- round_shift_bits);
-
- __m256i res_16bit = _mm256_packs_epi32(res_a_round, res_b_round);
- res_16bit = _mm256_min_epi16(res_16bit, clip_pixel);
- res_16bit = _mm256_max_epi16(res_16bit, zero);
-
- _mm_storeu_si128((__m128i *)&dst[i * dst_stride + j],
- _mm256_castsi256_si128(res_16bit));
- _mm_storeu_si128((__m128i *)&dst[i * dst_stride + j + dst_stride],
- _mm256_extracti128_si256(res_16bit, 1));
- } else if (w == 4) {
- res_a_round = _mm256_packs_epi32(res_a_round, res_a_round);
- res_a_round = _mm256_min_epi16(res_a_round, clip_pixel);
- res_a_round = _mm256_max_epi16(res_a_round, zero);
-
- _mm_storel_epi64((__m128i *)&dst[i * dst_stride + j],
- _mm256_castsi256_si128(res_a_round));
- _mm_storel_epi64((__m128i *)&dst[i * dst_stride + j + dst_stride],
- _mm256_extracti128_si256(res_a_round, 1));
- } else {
- res_a_round = _mm256_packs_epi32(res_a_round, res_a_round);
- res_a_round = _mm256_min_epi16(res_a_round, clip_pixel);
- res_a_round = _mm256_max_epi16(res_a_round, zero);
-
- xx_storel_32((__m128i *)&dst[i * dst_stride + j],
- _mm256_castsi256_si128(res_a_round));
- xx_storel_32((__m128i *)&dst[i * dst_stride + j + dst_stride],
- _mm256_extracti128_si256(res_a_round, 1));
- }
-
- s[0] = s[1];
- s[1] = s[2];
- s[2] = s[3];
-
- s[4] = s[5];
- s[5] = s[6];
- s[6] = s[7];
- }
- }
- }
-}
-
-static INLINE void copy_64(const uint16_t *src, uint16_t *dst) {
- __m256i s[4];
- s[0] = _mm256_loadu_si256((__m256i *)(src + 0 * 16));
- s[1] = _mm256_loadu_si256((__m256i *)(src + 1 * 16));
- s[2] = _mm256_loadu_si256((__m256i *)(src + 2 * 16));
- s[3] = _mm256_loadu_si256((__m256i *)(src + 3 * 16));
- _mm256_storeu_si256((__m256i *)(dst + 0 * 16), s[0]);
- _mm256_storeu_si256((__m256i *)(dst + 1 * 16), s[1]);
- _mm256_storeu_si256((__m256i *)(dst + 2 * 16), s[2]);
- _mm256_storeu_si256((__m256i *)(dst + 3 * 16), s[3]);
-}
-
-static INLINE void copy_128(const uint16_t *src, uint16_t *dst) {
- __m256i s[8];
- s[0] = _mm256_loadu_si256((__m256i *)(src + 0 * 16));
- s[1] = _mm256_loadu_si256((__m256i *)(src + 1 * 16));
- s[2] = _mm256_loadu_si256((__m256i *)(src + 2 * 16));
- s[3] = _mm256_loadu_si256((__m256i *)(src + 3 * 16));
- s[4] = _mm256_loadu_si256((__m256i *)(src + 4 * 16));
- s[5] = _mm256_loadu_si256((__m256i *)(src + 5 * 16));
- s[6] = _mm256_loadu_si256((__m256i *)(src + 6 * 16));
- s[7] = _mm256_loadu_si256((__m256i *)(src + 7 * 16));
-
- _mm256_storeu_si256((__m256i *)(dst + 0 * 16), s[0]);
- _mm256_storeu_si256((__m256i *)(dst + 1 * 16), s[1]);
- _mm256_storeu_si256((__m256i *)(dst + 2 * 16), s[2]);
- _mm256_storeu_si256((__m256i *)(dst + 3 * 16), s[3]);
- _mm256_storeu_si256((__m256i *)(dst + 4 * 16), s[4]);
- _mm256_storeu_si256((__m256i *)(dst + 5 * 16), s[5]);
- _mm256_storeu_si256((__m256i *)(dst + 6 * 16), s[6]);
- _mm256_storeu_si256((__m256i *)(dst + 7 * 16), s[7]);
-}
-
-void av1_highbd_convolve_2d_copy_sr_avx2(
- const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w,
- int h, const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y, const int subpel_x_q4,
- const int subpel_y_q4, ConvolveParams *conv_params, int bd) {
- (void)filter_params_x;
- (void)filter_params_y;
- (void)subpel_x_q4;
- (void)subpel_y_q4;
- (void)conv_params;
- (void)bd;
-
- if (w >= 16) {
- assert(!((intptr_t)dst % 16));
- assert(!(dst_stride % 16));
- }
-
- if (w == 2) {
- do {
- memcpy(dst, src, 2 * sizeof(*src));
- src += src_stride;
- dst += dst_stride;
- memcpy(dst, src, 2 * sizeof(*src));
- src += src_stride;
- dst += dst_stride;
- h -= 2;
- } while (h);
- } else if (w == 4) {
- do {
- __m128i s[2];
- s[0] = _mm_loadl_epi64((__m128i *)src);
- src += src_stride;
- s[1] = _mm_loadl_epi64((__m128i *)src);
- src += src_stride;
- _mm_storel_epi64((__m128i *)dst, s[0]);
- dst += dst_stride;
- _mm_storel_epi64((__m128i *)dst, s[1]);
- dst += dst_stride;
- h -= 2;
- } while (h);
- } else if (w == 8) {
- do {
- __m128i s[2];
- s[0] = _mm_loadu_si128((__m128i *)src);
- src += src_stride;
- s[1] = _mm_loadu_si128((__m128i *)src);
- src += src_stride;
- _mm_store_si128((__m128i *)dst, s[0]);
- dst += dst_stride;
- _mm_store_si128((__m128i *)dst, s[1]);
- dst += dst_stride;
- h -= 2;
- } while (h);
- } else if (w == 16) {
- do {
- __m256i s[2];
- s[0] = _mm256_loadu_si256((__m256i *)src);
- src += src_stride;
- s[1] = _mm256_loadu_si256((__m256i *)src);
- src += src_stride;
- _mm256_storeu_si256((__m256i *)dst, s[0]);
- dst += dst_stride;
- _mm256_storeu_si256((__m256i *)dst, s[1]);
- dst += dst_stride;
- h -= 2;
- } while (h);
- } else if (w == 32) {
- do {
- __m256i s[4];
- s[0] = _mm256_loadu_si256((__m256i *)(src + 0 * 16));
- s[1] = _mm256_loadu_si256((__m256i *)(src + 1 * 16));
- src += src_stride;
- s[2] = _mm256_loadu_si256((__m256i *)(src + 0 * 16));
- s[3] = _mm256_loadu_si256((__m256i *)(src + 1 * 16));
- src += src_stride;
- _mm256_storeu_si256((__m256i *)(dst + 0 * 16), s[0]);
- _mm256_storeu_si256((__m256i *)(dst + 1 * 16), s[1]);
- dst += dst_stride;
- _mm256_storeu_si256((__m256i *)(dst + 0 * 16), s[2]);
- _mm256_storeu_si256((__m256i *)(dst + 1 * 16), s[3]);
- dst += dst_stride;
- h -= 2;
- } while (h);
- } else if (w == 64) {
- do {
- copy_64(src, dst);
- src += src_stride;
- dst += dst_stride;
- copy_64(src, dst);
- src += src_stride;
- dst += dst_stride;
- h -= 2;
- } while (h);
- } else {
- do {
- copy_128(src, dst);
- src += src_stride;
- dst += dst_stride;
- copy_128(src, dst);
- src += src_stride;
- dst += dst_stride;
- h -= 2;
- } while (h);
- }
-}
diff --git a/third_party/aom/av1/common/x86/highbd_convolve_2d_sse2.c b/third_party/aom/av1/common/x86/highbd_convolve_2d_sse2.c
deleted file mode 100644
index 15f8872c1..000000000
--- a/third_party/aom/av1/common/x86/highbd_convolve_2d_sse2.c
+++ /dev/null
@@ -1,191 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#include <emmintrin.h>
-#include <assert.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/aom_filter.h"
-
-static INLINE void copy_64(const uint16_t *src, uint16_t *dst) {
- __m128i s[8];
- s[0] = _mm_loadu_si128((__m128i *)(src + 0 * 8));
- s[1] = _mm_loadu_si128((__m128i *)(src + 1 * 8));
- s[2] = _mm_loadu_si128((__m128i *)(src + 2 * 8));
- s[3] = _mm_loadu_si128((__m128i *)(src + 3 * 8));
- s[4] = _mm_loadu_si128((__m128i *)(src + 4 * 8));
- s[5] = _mm_loadu_si128((__m128i *)(src + 5 * 8));
- s[6] = _mm_loadu_si128((__m128i *)(src + 6 * 8));
- s[7] = _mm_loadu_si128((__m128i *)(src + 7 * 8));
- _mm_store_si128((__m128i *)(dst + 0 * 8), s[0]);
- _mm_store_si128((__m128i *)(dst + 1 * 8), s[1]);
- _mm_store_si128((__m128i *)(dst + 2 * 8), s[2]);
- _mm_store_si128((__m128i *)(dst + 3 * 8), s[3]);
- _mm_store_si128((__m128i *)(dst + 4 * 8), s[4]);
- _mm_store_si128((__m128i *)(dst + 5 * 8), s[5]);
- _mm_store_si128((__m128i *)(dst + 6 * 8), s[6]);
- _mm_store_si128((__m128i *)(dst + 7 * 8), s[7]);
-}
-
-static INLINE void copy_128(const uint16_t *src, uint16_t *dst) {
- __m128i s[16];
- s[0] = _mm_loadu_si128((__m128i *)(src + 0 * 8));
- s[1] = _mm_loadu_si128((__m128i *)(src + 1 * 8));
- s[2] = _mm_loadu_si128((__m128i *)(src + 2 * 8));
- s[3] = _mm_loadu_si128((__m128i *)(src + 3 * 8));
- s[4] = _mm_loadu_si128((__m128i *)(src + 4 * 8));
- s[5] = _mm_loadu_si128((__m128i *)(src + 5 * 8));
- s[6] = _mm_loadu_si128((__m128i *)(src + 6 * 8));
- s[7] = _mm_loadu_si128((__m128i *)(src + 7 * 8));
- s[8] = _mm_loadu_si128((__m128i *)(src + 8 * 8));
- s[9] = _mm_loadu_si128((__m128i *)(src + 9 * 8));
- s[10] = _mm_loadu_si128((__m128i *)(src + 10 * 8));
- s[11] = _mm_loadu_si128((__m128i *)(src + 11 * 8));
- s[12] = _mm_loadu_si128((__m128i *)(src + 12 * 8));
- s[13] = _mm_loadu_si128((__m128i *)(src + 13 * 8));
- s[14] = _mm_loadu_si128((__m128i *)(src + 14 * 8));
- s[15] = _mm_loadu_si128((__m128i *)(src + 15 * 8));
- _mm_store_si128((__m128i *)(dst + 0 * 8), s[0]);
- _mm_store_si128((__m128i *)(dst + 1 * 8), s[1]);
- _mm_store_si128((__m128i *)(dst + 2 * 8), s[2]);
- _mm_store_si128((__m128i *)(dst + 3 * 8), s[3]);
- _mm_store_si128((__m128i *)(dst + 4 * 8), s[4]);
- _mm_store_si128((__m128i *)(dst + 5 * 8), s[5]);
- _mm_store_si128((__m128i *)(dst + 6 * 8), s[6]);
- _mm_store_si128((__m128i *)(dst + 7 * 8), s[7]);
- _mm_store_si128((__m128i *)(dst + 8 * 8), s[8]);
- _mm_store_si128((__m128i *)(dst + 9 * 8), s[9]);
- _mm_store_si128((__m128i *)(dst + 10 * 8), s[10]);
- _mm_store_si128((__m128i *)(dst + 11 * 8), s[11]);
- _mm_store_si128((__m128i *)(dst + 12 * 8), s[12]);
- _mm_store_si128((__m128i *)(dst + 13 * 8), s[13]);
- _mm_store_si128((__m128i *)(dst + 14 * 8), s[14]);
- _mm_store_si128((__m128i *)(dst + 15 * 8), s[15]);
-}
-
-void av1_highbd_convolve_2d_copy_sr_sse2(
- const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w,
- int h, const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y, const int subpel_x_q4,
- const int subpel_y_q4, ConvolveParams *conv_params, int bd) {
- (void)filter_params_x;
- (void)filter_params_y;
- (void)subpel_x_q4;
- (void)subpel_y_q4;
- (void)conv_params;
- (void)bd;
- if (w >= 16) {
- assert(!((intptr_t)dst % 16));
- assert(!(dst_stride % 16));
- }
-
- if (w == 2) {
- do {
- __m128i s = _mm_loadl_epi64((__m128i *)src);
- *(uint32_t *)dst = _mm_cvtsi128_si32(s);
- src += src_stride;
- dst += dst_stride;
- s = _mm_loadl_epi64((__m128i *)src);
- *(uint32_t *)dst = _mm_cvtsi128_si32(s);
- src += src_stride;
- dst += dst_stride;
- h -= 2;
- } while (h);
- } else if (w == 4) {
- do {
- __m128i s[2];
- s[0] = _mm_loadl_epi64((__m128i *)src);
- src += src_stride;
- s[1] = _mm_loadl_epi64((__m128i *)src);
- src += src_stride;
- _mm_storel_epi64((__m128i *)dst, s[0]);
- dst += dst_stride;
- _mm_storel_epi64((__m128i *)dst, s[1]);
- dst += dst_stride;
- h -= 2;
- } while (h);
- } else if (w == 8) {
- do {
- __m128i s[2];
- s[0] = _mm_loadu_si128((__m128i *)src);
- src += src_stride;
- s[1] = _mm_loadu_si128((__m128i *)src);
- src += src_stride;
- _mm_store_si128((__m128i *)dst, s[0]);
- dst += dst_stride;
- _mm_store_si128((__m128i *)dst, s[1]);
- dst += dst_stride;
- h -= 2;
- } while (h);
- } else if (w == 16) {
- do {
- __m128i s[4];
- s[0] = _mm_loadu_si128((__m128i *)(src + 0 * 8));
- s[1] = _mm_loadu_si128((__m128i *)(src + 1 * 8));
- src += src_stride;
- s[2] = _mm_loadu_si128((__m128i *)(src + 0 * 8));
- s[3] = _mm_loadu_si128((__m128i *)(src + 1 * 8));
- src += src_stride;
- _mm_store_si128((__m128i *)(dst + 0 * 8), s[0]);
- _mm_store_si128((__m128i *)(dst + 1 * 8), s[1]);
- dst += dst_stride;
- _mm_store_si128((__m128i *)(dst + 0 * 8), s[2]);
- _mm_store_si128((__m128i *)(dst + 1 * 8), s[3]);
- dst += dst_stride;
- h -= 2;
- } while (h);
- } else if (w == 32) {
- do {
- __m128i s[8];
- s[0] = _mm_loadu_si128((__m128i *)(src + 0 * 8));
- s[1] = _mm_loadu_si128((__m128i *)(src + 1 * 8));
- s[2] = _mm_loadu_si128((__m128i *)(src + 2 * 8));
- s[3] = _mm_loadu_si128((__m128i *)(src + 3 * 8));
- src += src_stride;
- s[4] = _mm_loadu_si128((__m128i *)(src + 0 * 8));
- s[5] = _mm_loadu_si128((__m128i *)(src + 1 * 8));
- s[6] = _mm_loadu_si128((__m128i *)(src + 2 * 8));
- s[7] = _mm_loadu_si128((__m128i *)(src + 3 * 8));
- src += src_stride;
- _mm_store_si128((__m128i *)(dst + 0 * 8), s[0]);
- _mm_store_si128((__m128i *)(dst + 1 * 8), s[1]);
- _mm_store_si128((__m128i *)(dst + 2 * 8), s[2]);
- _mm_store_si128((__m128i *)(dst + 3 * 8), s[3]);
- dst += dst_stride;
- _mm_store_si128((__m128i *)(dst + 0 * 8), s[4]);
- _mm_store_si128((__m128i *)(dst + 1 * 8), s[5]);
- _mm_store_si128((__m128i *)(dst + 2 * 8), s[6]);
- _mm_store_si128((__m128i *)(dst + 3 * 8), s[7]);
- dst += dst_stride;
- h -= 2;
- } while (h);
- } else if (w == 64) {
- do {
- copy_64(src, dst);
- src += src_stride;
- dst += dst_stride;
- copy_64(src, dst);
- src += src_stride;
- dst += dst_stride;
- h -= 2;
- } while (h);
- } else {
- do {
- copy_128(src, dst);
- src += src_stride;
- dst += dst_stride;
- copy_128(src, dst);
- src += src_stride;
- dst += dst_stride;
- h -= 2;
- } while (h);
- }
-}
diff --git a/third_party/aom/av1/common/x86/highbd_convolve_2d_sse4.c b/third_party/aom/av1/common/x86/highbd_convolve_2d_sse4.c
deleted file mode 100644
index 3f8dafb4b..000000000
--- a/third_party/aom/av1/common/x86/highbd_convolve_2d_sse4.c
+++ /dev/null
@@ -1,420 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <tmmintrin.h>
-#include <smmintrin.h>
-#include <assert.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/aom_filter.h"
-#include "aom_dsp/x86/convolve_sse2.h"
-#include "aom_dsp/x86/convolve_sse4_1.h"
-#include "av1/common/convolve.h"
-
-void av1_highbd_jnt_convolve_2d_copy_sse4_1(
- const uint16_t *src, int src_stride, uint16_t *dst0, int dst_stride0, int w,
- int h, const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y, const int subpel_x_q4,
- const int subpel_y_q4, ConvolveParams *conv_params, int bd) {
- CONV_BUF_TYPE *dst = conv_params->dst;
- int dst_stride = conv_params->dst_stride;
- (void)filter_params_x;
- (void)filter_params_y;
- (void)subpel_x_q4;
- (void)subpel_y_q4;
-
- const int bits =
- FILTER_BITS * 2 - conv_params->round_1 - conv_params->round_0;
- const __m128i left_shift = _mm_cvtsi32_si128(bits);
- const int do_average = conv_params->do_average;
- const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg;
- const int w0 = conv_params->fwd_offset;
- const int w1 = conv_params->bck_offset;
- const __m128i wt0 = _mm_set1_epi32(w0);
- const __m128i wt1 = _mm_set1_epi32(w1);
- const __m128i zero = _mm_setzero_si128();
- int i, j;
-
- const int offset_0 =
- bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
- const int offset = (1 << offset_0) + (1 << (offset_0 - 1));
- const __m128i offset_const = _mm_set1_epi32(offset);
- const __m128i offset_const_16b = _mm_set1_epi16(offset);
- const int rounding_shift =
- 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
- const __m128i rounding_const = _mm_set1_epi32((1 << rounding_shift) >> 1);
- const __m128i clip_pixel_to_bd =
- _mm_set1_epi16(bd == 10 ? 1023 : (bd == 12 ? 4095 : 255));
-
- assert(bits <= 4);
-
- if (!(w % 8)) {
- for (i = 0; i < h; i += 1) {
- for (j = 0; j < w; j += 8) {
- const __m128i src_16bit =
- _mm_loadu_si128((__m128i *)(&src[i * src_stride + j]));
- const __m128i res = _mm_sll_epi16(src_16bit, left_shift);
- if (do_average) {
- const __m128i data_0 =
- _mm_loadu_si128((__m128i *)(&dst[i * dst_stride + j]));
-
- const __m128i data_ref_0_lo = _mm_unpacklo_epi16(data_0, zero);
- const __m128i data_ref_0_hi = _mm_unpackhi_epi16(data_0, zero);
-
- const __m128i res_32b_lo = _mm_unpacklo_epi16(res, zero);
- const __m128i res_unsigned_lo =
- _mm_add_epi32(res_32b_lo, offset_const);
-
- const __m128i comp_avg_res_lo = highbd_comp_avg_sse4_1(
- &data_ref_0_lo, &res_unsigned_lo, &wt0, &wt1, use_jnt_comp_avg);
-
- const __m128i res_32b_hi = _mm_unpackhi_epi16(res, zero);
- const __m128i res_unsigned_hi =
- _mm_add_epi32(res_32b_hi, offset_const);
-
- const __m128i comp_avg_res_hi = highbd_comp_avg_sse4_1(
- &data_ref_0_hi, &res_unsigned_hi, &wt0, &wt1, use_jnt_comp_avg);
-
- const __m128i round_result_lo = highbd_convolve_rounding_sse2(
- &comp_avg_res_lo, &offset_const, &rounding_const, rounding_shift);
- const __m128i round_result_hi = highbd_convolve_rounding_sse2(
- &comp_avg_res_hi, &offset_const, &rounding_const, rounding_shift);
-
- const __m128i res_16b =
- _mm_packus_epi32(round_result_lo, round_result_hi);
- const __m128i res_clip = _mm_min_epi16(res_16b, clip_pixel_to_bd);
-
- _mm_store_si128((__m128i *)(&dst0[i * dst_stride0 + j]), res_clip);
- } else {
- const __m128i res_unsigned_16b =
- _mm_adds_epu16(res, offset_const_16b);
-
- _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]),
- res_unsigned_16b);
- }
- }
- }
- } else if (!(w % 4)) {
- for (i = 0; i < h; i += 2) {
- for (j = 0; j < w; j += 4) {
- const __m128i src_row_0 =
- _mm_loadl_epi64((__m128i *)(&src[i * src_stride + j]));
- const __m128i src_row_1 =
- _mm_loadl_epi64((__m128i *)(&src[i * src_stride + j + src_stride]));
- const __m128i src_10 = _mm_unpacklo_epi64(src_row_0, src_row_1);
-
- const __m128i res = _mm_sll_epi16(src_10, left_shift);
-
- if (do_average) {
- const __m128i data_0 =
- _mm_loadl_epi64((__m128i *)(&dst[i * dst_stride + j]));
- const __m128i data_1 = _mm_loadl_epi64(
- (__m128i *)(&dst[i * dst_stride + j + dst_stride]));
-
- const __m128i data_ref_0 = _mm_unpacklo_epi16(data_0, zero);
- const __m128i data_ref_1 = _mm_unpacklo_epi16(data_1, zero);
-
- const __m128i res_32b = _mm_unpacklo_epi16(res, zero);
- const __m128i res_unsigned_lo = _mm_add_epi32(res_32b, offset_const);
-
- const __m128i res_32b_hi = _mm_unpackhi_epi16(res, zero);
- const __m128i res_unsigned_hi =
- _mm_add_epi32(res_32b_hi, offset_const);
-
- const __m128i comp_avg_res_lo = highbd_comp_avg_sse4_1(
- &data_ref_0, &res_unsigned_lo, &wt0, &wt1, use_jnt_comp_avg);
- const __m128i comp_avg_res_hi = highbd_comp_avg_sse4_1(
- &data_ref_1, &res_unsigned_hi, &wt0, &wt1, use_jnt_comp_avg);
-
- const __m128i round_result_lo = highbd_convolve_rounding_sse2(
- &comp_avg_res_lo, &offset_const, &rounding_const, rounding_shift);
- const __m128i round_result_hi = highbd_convolve_rounding_sse2(
- &comp_avg_res_hi, &offset_const, &rounding_const, rounding_shift);
-
- const __m128i res_16b =
- _mm_packus_epi32(round_result_lo, round_result_hi);
- const __m128i res_clip = _mm_min_epi16(res_16b, clip_pixel_to_bd);
-
- const __m128i res_1 = _mm_srli_si128(res_clip, 8);
-
- _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_clip);
- _mm_storel_epi64(
- (__m128i *)(&dst0[i * dst_stride0 + j + dst_stride0]), res_1);
- } else {
- const __m128i res_unsigned_16b =
- _mm_adds_epu16(res, offset_const_16b);
-
- const __m128i res_1 = _mm_srli_si128(res_unsigned_16b, 8);
-
- _mm_storel_epi64((__m128i *)(&dst[i * dst_stride + j]),
- res_unsigned_16b);
- _mm_storel_epi64((__m128i *)(&dst[i * dst_stride + j + dst_stride]),
- res_1);
- }
- }
- }
- }
-}
-
-void av1_highbd_jnt_convolve_2d_sse4_1(
- const uint16_t *src, int src_stride, uint16_t *dst0, int dst_stride0, int w,
- int h, const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y, const int subpel_x_q4,
- const int subpel_y_q4, ConvolveParams *conv_params, int bd) {
- DECLARE_ALIGNED(16, int16_t,
- im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE]);
- CONV_BUF_TYPE *dst = conv_params->dst;
- int dst_stride = conv_params->dst_stride;
- int im_h = h + filter_params_y->taps - 1;
- int im_stride = MAX_SB_SIZE;
- int i, j;
- const int do_average = conv_params->do_average;
- const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg;
- const int fo_vert = filter_params_y->taps / 2 - 1;
- const int fo_horiz = filter_params_x->taps / 2 - 1;
- const uint16_t *const src_ptr = src - fo_vert * src_stride - fo_horiz;
-
- const int w0 = conv_params->fwd_offset;
- const int w1 = conv_params->bck_offset;
- const __m128i wt0 = _mm_set1_epi32(w0);
- const __m128i wt1 = _mm_set1_epi32(w1);
-
- const int offset_0 =
- bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
- const int offset = (1 << offset_0) + (1 << (offset_0 - 1));
- const __m128i offset_const = _mm_set1_epi32(offset);
- const int rounding_shift =
- 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
- const __m128i rounding_const = _mm_set1_epi32((1 << rounding_shift) >> 1);
- const __m128i clip_pixel_to_bd =
- _mm_set1_epi16(bd == 10 ? 1023 : (bd == 12 ? 4095 : 255));
-
- // Check that, even with 12-bit input, the intermediate values will fit
- // into an unsigned 16-bit intermediate array.
- assert(bd + FILTER_BITS + 2 - conv_params->round_0 <= 16);
-
- /* Horizontal filter */
- {
- const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
- filter_params_x, subpel_x_q4 & SUBPEL_MASK);
- const __m128i coeffs_x = _mm_loadu_si128((__m128i *)x_filter);
-
- // coeffs 0 1 0 1 2 3 2 3
- const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_x, coeffs_x);
- // coeffs 4 5 4 5 6 7 6 7
- const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_x, coeffs_x);
-
- // coeffs 0 1 0 1 0 1 0 1
- const __m128i coeff_01 = _mm_unpacklo_epi64(tmp_0, tmp_0);
- // coeffs 2 3 2 3 2 3 2 3
- const __m128i coeff_23 = _mm_unpackhi_epi64(tmp_0, tmp_0);
- // coeffs 4 5 4 5 4 5 4 5
- const __m128i coeff_45 = _mm_unpacklo_epi64(tmp_1, tmp_1);
- // coeffs 6 7 6 7 6 7 6 7
- const __m128i coeff_67 = _mm_unpackhi_epi64(tmp_1, tmp_1);
-
- const __m128i round_const = _mm_set1_epi32(
- ((1 << conv_params->round_0) >> 1) + (1 << (bd + FILTER_BITS - 1)));
- const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_0);
-
- for (i = 0; i < im_h; ++i) {
- for (j = 0; j < w; j += 8) {
- const __m128i data =
- _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j]);
- const __m128i data2 =
- _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j + 8]);
-
- // Filter even-index pixels
- const __m128i res_0 = _mm_madd_epi16(data, coeff_01);
- const __m128i res_2 =
- _mm_madd_epi16(_mm_alignr_epi8(data2, data, 4), coeff_23);
- const __m128i res_4 =
- _mm_madd_epi16(_mm_alignr_epi8(data2, data, 8), coeff_45);
- const __m128i res_6 =
- _mm_madd_epi16(_mm_alignr_epi8(data2, data, 12), coeff_67);
-
- __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_4),
- _mm_add_epi32(res_2, res_6));
- res_even =
- _mm_sra_epi32(_mm_add_epi32(res_even, round_const), round_shift);
-
- // Filter odd-index pixels
- const __m128i res_1 =
- _mm_madd_epi16(_mm_alignr_epi8(data2, data, 2), coeff_01);
- const __m128i res_3 =
- _mm_madd_epi16(_mm_alignr_epi8(data2, data, 6), coeff_23);
- const __m128i res_5 =
- _mm_madd_epi16(_mm_alignr_epi8(data2, data, 10), coeff_45);
- const __m128i res_7 =
- _mm_madd_epi16(_mm_alignr_epi8(data2, data, 14), coeff_67);
-
- __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_5),
- _mm_add_epi32(res_3, res_7));
- res_odd =
- _mm_sra_epi32(_mm_add_epi32(res_odd, round_const), round_shift);
-
- // Pack in the column order 0, 2, 4, 6, 1, 3, 5, 7
- __m128i res = _mm_packs_epi32(res_even, res_odd);
- _mm_storeu_si128((__m128i *)&im_block[i * im_stride + j], res);
- }
- }
- }
-
- /* Vertical filter */
- {
- const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
- filter_params_y, subpel_y_q4 & SUBPEL_MASK);
- const __m128i coeffs_y = _mm_loadu_si128((__m128i *)y_filter);
-
- // coeffs 0 1 0 1 2 3 2 3
- const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_y, coeffs_y);
- // coeffs 4 5 4 5 6 7 6 7
- const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_y, coeffs_y);
-
- // coeffs 0 1 0 1 0 1 0 1
- const __m128i coeff_01 = _mm_unpacklo_epi64(tmp_0, tmp_0);
- // coeffs 2 3 2 3 2 3 2 3
- const __m128i coeff_23 = _mm_unpackhi_epi64(tmp_0, tmp_0);
- // coeffs 4 5 4 5 4 5 4 5
- const __m128i coeff_45 = _mm_unpacklo_epi64(tmp_1, tmp_1);
- // coeffs 6 7 6 7 6 7 6 7
- const __m128i coeff_67 = _mm_unpackhi_epi64(tmp_1, tmp_1);
-
- const __m128i round_const = _mm_set1_epi32(
- ((1 << conv_params->round_1) >> 1) -
- (1 << (bd + 2 * FILTER_BITS - conv_params->round_0 - 1)));
- const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_1);
-
- for (i = 0; i < h; ++i) {
- for (j = 0; j < w; j += 8) {
- // Filter even-index pixels
- const int16_t *data = &im_block[i * im_stride + j];
- const __m128i src_0 =
- _mm_unpacklo_epi16(*(__m128i *)(data + 0 * im_stride),
- *(__m128i *)(data + 1 * im_stride));
- const __m128i src_2 =
- _mm_unpacklo_epi16(*(__m128i *)(data + 2 * im_stride),
- *(__m128i *)(data + 3 * im_stride));
- const __m128i src_4 =
- _mm_unpacklo_epi16(*(__m128i *)(data + 4 * im_stride),
- *(__m128i *)(data + 5 * im_stride));
- const __m128i src_6 =
- _mm_unpacklo_epi16(*(__m128i *)(data + 6 * im_stride),
- *(__m128i *)(data + 7 * im_stride));
-
- const __m128i res_0 = _mm_madd_epi16(src_0, coeff_01);
- const __m128i res_2 = _mm_madd_epi16(src_2, coeff_23);
- const __m128i res_4 = _mm_madd_epi16(src_4, coeff_45);
- const __m128i res_6 = _mm_madd_epi16(src_6, coeff_67);
-
- const __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_2),
- _mm_add_epi32(res_4, res_6));
-
- // Filter odd-index pixels
- const __m128i src_1 =
- _mm_unpackhi_epi16(*(__m128i *)(data + 0 * im_stride),
- *(__m128i *)(data + 1 * im_stride));
- const __m128i src_3 =
- _mm_unpackhi_epi16(*(__m128i *)(data + 2 * im_stride),
- *(__m128i *)(data + 3 * im_stride));
- const __m128i src_5 =
- _mm_unpackhi_epi16(*(__m128i *)(data + 4 * im_stride),
- *(__m128i *)(data + 5 * im_stride));
- const __m128i src_7 =
- _mm_unpackhi_epi16(*(__m128i *)(data + 6 * im_stride),
- *(__m128i *)(data + 7 * im_stride));
-
- const __m128i res_1 = _mm_madd_epi16(src_1, coeff_01);
- const __m128i res_3 = _mm_madd_epi16(src_3, coeff_23);
- const __m128i res_5 = _mm_madd_epi16(src_5, coeff_45);
- const __m128i res_7 = _mm_madd_epi16(src_7, coeff_67);
-
- const __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_3),
- _mm_add_epi32(res_5, res_7));
-
- // Rearrange pixels back into the order 0 ... 7
- const __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd);
- const __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd);
-
- const __m128i res_lo_round =
- _mm_sra_epi32(_mm_add_epi32(res_lo, round_const), round_shift);
-
- const __m128i res_unsigned_lo =
- _mm_add_epi32(res_lo_round, offset_const);
-
- if (w < 8) {
- if (do_average) {
- const __m128i data_0 =
- _mm_loadl_epi64((__m128i *)(&dst[i * dst_stride + j]));
-
- const __m128i data_ref_0 = _mm_cvtepu16_epi32(data_0);
-
- const __m128i comp_avg_res = highbd_comp_avg_sse4_1(
- &data_ref_0, &res_unsigned_lo, &wt0, &wt1, use_jnt_comp_avg);
-
- const __m128i round_result = highbd_convolve_rounding_sse2(
- &comp_avg_res, &offset_const, &rounding_const, rounding_shift);
-
- const __m128i res_16b =
- _mm_packus_epi32(round_result, round_result);
- const __m128i res_clip = _mm_min_epi16(res_16b, clip_pixel_to_bd);
-
- _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_clip);
- } else {
- const __m128i res_16b =
- _mm_packus_epi32(res_unsigned_lo, res_unsigned_lo);
- _mm_storel_epi64((__m128i *)(&dst[i * dst_stride + j]), res_16b);
- }
- } else {
- const __m128i res_hi_round =
- _mm_sra_epi32(_mm_add_epi32(res_hi, round_const), round_shift);
-
- const __m128i res_unsigned_hi =
- _mm_add_epi32(res_hi_round, offset_const);
-
- if (do_average) {
- const __m128i data_lo =
- _mm_loadl_epi64((__m128i *)(&dst[i * dst_stride + j]));
- const __m128i data_hi =
- _mm_loadl_epi64((__m128i *)(&dst[i * dst_stride + j + 4]));
-
- const __m128i data_ref_0_lo = _mm_cvtepu16_epi32(data_lo);
- const __m128i data_ref_0_hi = _mm_cvtepu16_epi32(data_hi);
-
- const __m128i comp_avg_res_lo = highbd_comp_avg_sse4_1(
- &data_ref_0_lo, &res_unsigned_lo, &wt0, &wt1, use_jnt_comp_avg);
- const __m128i comp_avg_res_hi = highbd_comp_avg_sse4_1(
- &data_ref_0_hi, &res_unsigned_hi, &wt0, &wt1, use_jnt_comp_avg);
-
- const __m128i round_result_lo =
- highbd_convolve_rounding_sse2(&comp_avg_res_lo, &offset_const,
- &rounding_const, rounding_shift);
- const __m128i round_result_hi =
- highbd_convolve_rounding_sse2(&comp_avg_res_hi, &offset_const,
- &rounding_const, rounding_shift);
-
- const __m128i res_16b =
- _mm_packus_epi32(round_result_lo, round_result_hi);
- const __m128i res_clip = _mm_min_epi16(res_16b, clip_pixel_to_bd);
-
- _mm_store_si128((__m128i *)(&dst0[i * dst_stride0 + j]), res_clip);
- } else {
- const __m128i res_16b =
- _mm_packus_epi32(res_unsigned_lo, res_unsigned_hi);
- _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_16b);
- }
- }
- }
- }
- }
-}
diff --git a/third_party/aom/av1/common/x86/highbd_convolve_2d_ssse3.c b/third_party/aom/av1/common/x86/highbd_convolve_2d_ssse3.c
deleted file mode 100644
index 1d029db39..000000000
--- a/third_party/aom/av1/common/x86/highbd_convolve_2d_ssse3.c
+++ /dev/null
@@ -1,217 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <tmmintrin.h>
-#include <assert.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/aom_filter.h"
-#include "aom_dsp/x86/convolve_sse2.h"
-#include "av1/common/convolve.h"
-
-void av1_highbd_convolve_2d_sr_ssse3(
- const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w,
- int h, const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y, const int subpel_x_q4,
- const int subpel_y_q4, ConvolveParams *conv_params, int bd) {
- DECLARE_ALIGNED(32, int16_t, im_block[(MAX_SB_SIZE + MAX_FILTER_TAP) * 8]);
- int im_h = h + filter_params_y->taps - 1;
- int im_stride = 8;
- int i, j;
- const int fo_vert = filter_params_y->taps / 2 - 1;
- const int fo_horiz = filter_params_x->taps / 2 - 1;
- const uint16_t *const src_ptr = src - fo_vert * src_stride - fo_horiz;
-
- // Check that, even with 12-bit input, the intermediate values will fit
- // into an unsigned 16-bit intermediate array.
- assert(bd + FILTER_BITS + 2 - conv_params->round_0 <= 16);
- __m128i coeffs_x[4], coeffs_y[4], s[16];
-
- const __m128i round_const_x = _mm_set1_epi32(
- ((1 << conv_params->round_0) >> 1) + (1 << (bd + FILTER_BITS - 1)));
- const __m128i round_shift_x = _mm_cvtsi32_si128(conv_params->round_0);
-
- const __m128i round_const_y =
- _mm_set1_epi32(((1 << conv_params->round_1) >> 1) -
- (1 << (bd + 2 * FILTER_BITS - conv_params->round_0 - 1)));
- const __m128i round_shift_y = _mm_cvtsi32_si128(conv_params->round_1);
-
- const int bits =
- FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1;
- const __m128i round_shift_bits = _mm_cvtsi32_si128(bits);
- const __m128i round_const_bits = _mm_set1_epi32((1 << bits) >> 1);
- const __m128i clip_pixel =
- _mm_set1_epi16(bd == 10 ? 1023 : (bd == 12 ? 4095 : 255));
- const __m128i zero = _mm_setzero_si128();
-
- prepare_coeffs(filter_params_x, subpel_x_q4, coeffs_x);
- prepare_coeffs(filter_params_y, subpel_y_q4, coeffs_y);
-
- for (j = 0; j < w; j += 8) {
- /* Horizontal filter */
- {
- for (i = 0; i < im_h; i += 1) {
- const __m128i row00 =
- _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j]);
- const __m128i row01 =
- _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + (j + 8)]);
-
- // even pixels
- s[0] = _mm_alignr_epi8(row01, row00, 0);
- s[1] = _mm_alignr_epi8(row01, row00, 4);
- s[2] = _mm_alignr_epi8(row01, row00, 8);
- s[3] = _mm_alignr_epi8(row01, row00, 12);
-
- __m128i res_even = convolve(s, coeffs_x);
- res_even = _mm_sra_epi32(_mm_add_epi32(res_even, round_const_x),
- round_shift_x);
-
- // odd pixels
- s[0] = _mm_alignr_epi8(row01, row00, 2);
- s[1] = _mm_alignr_epi8(row01, row00, 6);
- s[2] = _mm_alignr_epi8(row01, row00, 10);
- s[3] = _mm_alignr_epi8(row01, row00, 14);
-
- __m128i res_odd = convolve(s, coeffs_x);
- res_odd =
- _mm_sra_epi32(_mm_add_epi32(res_odd, round_const_x), round_shift_x);
-
- __m128i res_even1 = _mm_packs_epi32(res_even, res_even);
- __m128i res_odd1 = _mm_packs_epi32(res_odd, res_odd);
- __m128i res = _mm_unpacklo_epi16(res_even1, res_odd1);
-
- _mm_store_si128((__m128i *)&im_block[i * im_stride], res);
- }
- }
- /* Vertical filter */
- {
- __m128i s0 = _mm_loadu_si128((__m128i *)(im_block + 0 * im_stride));
- __m128i s1 = _mm_loadu_si128((__m128i *)(im_block + 1 * im_stride));
- __m128i s2 = _mm_loadu_si128((__m128i *)(im_block + 2 * im_stride));
- __m128i s3 = _mm_loadu_si128((__m128i *)(im_block + 3 * im_stride));
- __m128i s4 = _mm_loadu_si128((__m128i *)(im_block + 4 * im_stride));
- __m128i s5 = _mm_loadu_si128((__m128i *)(im_block + 5 * im_stride));
- __m128i s6 = _mm_loadu_si128((__m128i *)(im_block + 6 * im_stride));
-
- s[0] = _mm_unpacklo_epi16(s0, s1);
- s[1] = _mm_unpacklo_epi16(s2, s3);
- s[2] = _mm_unpacklo_epi16(s4, s5);
-
- s[4] = _mm_unpackhi_epi16(s0, s1);
- s[5] = _mm_unpackhi_epi16(s2, s3);
- s[6] = _mm_unpackhi_epi16(s4, s5);
-
- s[0 + 8] = _mm_unpacklo_epi16(s1, s2);
- s[1 + 8] = _mm_unpacklo_epi16(s3, s4);
- s[2 + 8] = _mm_unpacklo_epi16(s5, s6);
-
- s[4 + 8] = _mm_unpackhi_epi16(s1, s2);
- s[5 + 8] = _mm_unpackhi_epi16(s3, s4);
- s[6 + 8] = _mm_unpackhi_epi16(s5, s6);
-
- for (i = 0; i < h; i += 2) {
- const int16_t *data = &im_block[i * im_stride];
-
- __m128i s7 = _mm_loadu_si128((__m128i *)(data + 7 * im_stride));
- __m128i s8 = _mm_loadu_si128((__m128i *)(data + 8 * im_stride));
-
- s[3] = _mm_unpacklo_epi16(s6, s7);
- s[7] = _mm_unpackhi_epi16(s6, s7);
-
- s[3 + 8] = _mm_unpacklo_epi16(s7, s8);
- s[7 + 8] = _mm_unpackhi_epi16(s7, s8);
-
- const __m128i res_a0 = convolve(s, coeffs_y);
- __m128i res_a_round0 =
- _mm_sra_epi32(_mm_add_epi32(res_a0, round_const_y), round_shift_y);
- res_a_round0 = _mm_sra_epi32(
- _mm_add_epi32(res_a_round0, round_const_bits), round_shift_bits);
-
- const __m128i res_a1 = convolve(s + 8, coeffs_y);
- __m128i res_a_round1 =
- _mm_sra_epi32(_mm_add_epi32(res_a1, round_const_y), round_shift_y);
- res_a_round1 = _mm_sra_epi32(
- _mm_add_epi32(res_a_round1, round_const_bits), round_shift_bits);
-
- if (w - j > 4) {
- const __m128i res_b0 = convolve(s + 4, coeffs_y);
- __m128i res_b_round0 = _mm_sra_epi32(
- _mm_add_epi32(res_b0, round_const_y), round_shift_y);
- res_b_round0 = _mm_sra_epi32(
- _mm_add_epi32(res_b_round0, round_const_bits), round_shift_bits);
-
- const __m128i res_b1 = convolve(s + 4 + 8, coeffs_y);
- __m128i res_b_round1 = _mm_sra_epi32(
- _mm_add_epi32(res_b1, round_const_y), round_shift_y);
- res_b_round1 = _mm_sra_epi32(
- _mm_add_epi32(res_b_round1, round_const_bits), round_shift_bits);
-
- __m128i res_16bit0 = _mm_packs_epi32(res_a_round0, res_b_round0);
- res_16bit0 = _mm_min_epi16(res_16bit0, clip_pixel);
- res_16bit0 = _mm_max_epi16(res_16bit0, zero);
-
- __m128i res_16bit1 = _mm_packs_epi32(res_a_round1, res_b_round1);
- res_16bit1 = _mm_min_epi16(res_16bit1, clip_pixel);
- res_16bit1 = _mm_max_epi16(res_16bit1, zero);
-
- _mm_storeu_si128((__m128i *)&dst[i * dst_stride + j], res_16bit0);
- _mm_storeu_si128((__m128i *)&dst[i * dst_stride + j + dst_stride],
- res_16bit1);
- } else if (w == 4) {
- res_a_round0 = _mm_packs_epi32(res_a_round0, res_a_round0);
- res_a_round0 = _mm_min_epi16(res_a_round0, clip_pixel);
- res_a_round0 = _mm_max_epi16(res_a_round0, zero);
-
- res_a_round1 = _mm_packs_epi32(res_a_round1, res_a_round1);
- res_a_round1 = _mm_min_epi16(res_a_round1, clip_pixel);
- res_a_round1 = _mm_max_epi16(res_a_round1, zero);
-
- _mm_storel_epi64((__m128i *)&dst[i * dst_stride + j], res_a_round0);
- _mm_storel_epi64((__m128i *)&dst[i * dst_stride + j + dst_stride],
- res_a_round1);
- } else {
- res_a_round0 = _mm_packs_epi32(res_a_round0, res_a_round0);
- res_a_round0 = _mm_min_epi16(res_a_round0, clip_pixel);
- res_a_round0 = _mm_max_epi16(res_a_round0, zero);
-
- res_a_round1 = _mm_packs_epi32(res_a_round1, res_a_round1);
- res_a_round1 = _mm_min_epi16(res_a_round1, clip_pixel);
- res_a_round1 = _mm_max_epi16(res_a_round1, zero);
-
- *((uint32_t *)(&dst[i * dst_stride + j])) =
- _mm_cvtsi128_si32(res_a_round0);
-
- *((uint32_t *)(&dst[i * dst_stride + j + dst_stride])) =
- _mm_cvtsi128_si32(res_a_round1);
- }
- s[0] = s[1];
- s[1] = s[2];
- s[2] = s[3];
-
- s[4] = s[5];
- s[5] = s[6];
- s[6] = s[7];
-
- s[0 + 8] = s[1 + 8];
- s[1 + 8] = s[2 + 8];
- s[2 + 8] = s[3 + 8];
-
- s[4 + 8] = s[5 + 8];
- s[5 + 8] = s[6 + 8];
- s[6 + 8] = s[7 + 8];
-
- s6 = s8;
- }
- }
- }
-}
diff --git a/third_party/aom/av1/common/x86/highbd_inv_txfm_avx2.c b/third_party/aom/av1/common/x86/highbd_inv_txfm_avx2.c
deleted file mode 100644
index ade2af03e..000000000
--- a/third_party/aom/av1/common/x86/highbd_inv_txfm_avx2.c
+++ /dev/null
@@ -1,1349 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#include <assert.h>
-#include <immintrin.h>
-
-#include "config/aom_config.h"
-#include "config/av1_rtcd.h"
-
-#include "av1/common/av1_inv_txfm1d_cfg.h"
-#include "av1/common/idct.h"
-#include "av1/common/x86/av1_inv_txfm_ssse3.h"
-#include "av1/common/x86/highbd_txfm_utility_sse4.h"
-
-// Note:
-// Total 32x4 registers to represent 32x32 block coefficients.
-// For high bit depth, each coefficient is 4-byte.
-// Each __m256i register holds 8 coefficients.
-// So each "row" we needs 4 register. Totally 32 rows
-// Register layout:
-// v0, v1, v2, v3,
-// v4, v5, v6, v7,
-// ... ...
-// v124, v125, v126, v127
-
-static INLINE __m256i highbd_clamp_epi16_avx2(__m256i u, int bd) {
- const __m256i zero = _mm256_setzero_si256();
- const __m256i one = _mm256_set1_epi16(1);
- const __m256i max = _mm256_sub_epi16(_mm256_slli_epi16(one, bd), one);
- __m256i clamped, mask;
-
- mask = _mm256_cmpgt_epi16(u, max);
- clamped = _mm256_andnot_si256(mask, u);
- mask = _mm256_and_si256(mask, max);
- clamped = _mm256_or_si256(mask, clamped);
- mask = _mm256_cmpgt_epi16(clamped, zero);
- clamped = _mm256_and_si256(clamped, mask);
-
- return clamped;
-}
-
-static INLINE __m256i highbd_get_recon_16x8_avx2(const __m256i pred,
- __m256i res0, __m256i res1,
- const int bd) {
- __m256i x0 = _mm256_cvtepi16_epi32(_mm256_castsi256_si128(pred));
- __m256i x1 = _mm256_cvtepi16_epi32(_mm256_extractf128_si256(pred, 1));
-
- x0 = _mm256_add_epi32(res0, x0);
- x1 = _mm256_add_epi32(res1, x1);
- x0 = _mm256_packus_epi32(x0, x1);
- x0 = _mm256_permute4x64_epi64(x0, 0xd8);
- x0 = highbd_clamp_epi16_avx2(x0, bd);
- return x0;
-}
-
-static INLINE void highbd_write_buffer_16xn_avx2(__m256i *in, uint16_t *output,
- int stride, int flipud,
- int height, const int bd) {
- int j = flipud ? (height - 1) : 0;
- const int step = flipud ? -1 : 1;
- for (int i = 0; i < height; ++i, j += step) {
- __m256i v = _mm256_loadu_si256((__m256i const *)(output + i * stride));
- __m256i u = highbd_get_recon_16x8_avx2(v, in[j], in[j + height], bd);
-
- _mm256_storeu_si256((__m256i *)(output + i * stride), u);
- }
-}
-
-static INLINE __m256i av1_round_shift_32_avx2(__m256i vec, int bit) {
- __m256i tmp, round;
- round = _mm256_set1_epi32(1 << (bit - 1));
- tmp = _mm256_add_epi32(vec, round);
- return _mm256_srai_epi32(tmp, bit);
-}
-
-static INLINE void av1_round_shift_array_32_avx2(__m256i *input,
- __m256i *output,
- const int size,
- const int bit) {
- if (bit > 0) {
- int i;
- for (i = 0; i < size; i++) {
- output[i] = av1_round_shift_32_avx2(input[i], bit);
- }
- } else {
- int i;
- for (i = 0; i < size; i++) {
- output[i] = _mm256_slli_epi32(input[i], -bit);
- }
- }
-}
-
-static void transpose_8x8_avx2(const __m256i *in, __m256i *out) {
- __m256i u0, u1, u2, u3, u4, u5, u6, u7;
- __m256i x0, x1;
-
- u0 = _mm256_unpacklo_epi32(in[0], in[1]);
- u1 = _mm256_unpackhi_epi32(in[0], in[1]);
-
- u2 = _mm256_unpacklo_epi32(in[2], in[3]);
- u3 = _mm256_unpackhi_epi32(in[2], in[3]);
-
- u4 = _mm256_unpacklo_epi32(in[4], in[5]);
- u5 = _mm256_unpackhi_epi32(in[4], in[5]);
-
- u6 = _mm256_unpacklo_epi32(in[6], in[7]);
- u7 = _mm256_unpackhi_epi32(in[6], in[7]);
-
- x0 = _mm256_unpacklo_epi64(u0, u2);
- x1 = _mm256_unpacklo_epi64(u4, u6);
- out[0] = _mm256_permute2f128_si256(x0, x1, 0x20);
- out[4] = _mm256_permute2f128_si256(x0, x1, 0x31);
-
- x0 = _mm256_unpackhi_epi64(u0, u2);
- x1 = _mm256_unpackhi_epi64(u4, u6);
- out[1] = _mm256_permute2f128_si256(x0, x1, 0x20);
- out[5] = _mm256_permute2f128_si256(x0, x1, 0x31);
-
- x0 = _mm256_unpacklo_epi64(u1, u3);
- x1 = _mm256_unpacklo_epi64(u5, u7);
- out[2] = _mm256_permute2f128_si256(x0, x1, 0x20);
- out[6] = _mm256_permute2f128_si256(x0, x1, 0x31);
-
- x0 = _mm256_unpackhi_epi64(u1, u3);
- x1 = _mm256_unpackhi_epi64(u5, u7);
- out[3] = _mm256_permute2f128_si256(x0, x1, 0x20);
- out[7] = _mm256_permute2f128_si256(x0, x1, 0x31);
-}
-
-static void load_buffer_32x32(const int32_t *coeff, __m256i *in,
- int input_stiride, int size) {
- int i;
- for (i = 0; i < size; ++i) {
- in[i] = _mm256_loadu_si256((const __m256i *)(coeff + i * input_stiride));
- }
-}
-
-static INLINE __m256i half_btf_0_avx2(const __m256i *w0, const __m256i *n0,
- const __m256i *rounding, int bit) {
- __m256i x;
- x = _mm256_mullo_epi32(*w0, *n0);
- x = _mm256_add_epi32(x, *rounding);
- x = _mm256_srai_epi32(x, bit);
- return x;
-}
-
-static INLINE __m256i half_btf_avx2(const __m256i *w0, const __m256i *n0,
- const __m256i *w1, const __m256i *n1,
- const __m256i *rounding, int bit) {
- __m256i x, y;
-
- x = _mm256_mullo_epi32(*w0, *n0);
- y = _mm256_mullo_epi32(*w1, *n1);
- x = _mm256_add_epi32(x, y);
- x = _mm256_add_epi32(x, *rounding);
- x = _mm256_srai_epi32(x, bit);
- return x;
-}
-
-static void addsub_avx2(const __m256i in0, const __m256i in1, __m256i *out0,
- __m256i *out1, const __m256i *clamp_lo,
- const __m256i *clamp_hi) {
- __m256i a0 = _mm256_add_epi32(in0, in1);
- __m256i a1 = _mm256_sub_epi32(in0, in1);
-
- a0 = _mm256_max_epi32(a0, *clamp_lo);
- a0 = _mm256_min_epi32(a0, *clamp_hi);
- a1 = _mm256_max_epi32(a1, *clamp_lo);
- a1 = _mm256_min_epi32(a1, *clamp_hi);
-
- *out0 = a0;
- *out1 = a1;
-}
-
-static void addsub_no_clamp_avx2(const __m256i in0, const __m256i in1,
- __m256i *out0, __m256i *out1) {
- __m256i a0 = _mm256_add_epi32(in0, in1);
- __m256i a1 = _mm256_sub_epi32(in0, in1);
-
- *out0 = a0;
- *out1 = a1;
-}
-
-static void addsub_shift_avx2(const __m256i in0, const __m256i in1,
- __m256i *out0, __m256i *out1,
- const __m256i *clamp_lo, const __m256i *clamp_hi,
- int shift) {
- __m256i offset = _mm256_set1_epi32((1 << shift) >> 1);
- __m256i in0_w_offset = _mm256_add_epi32(in0, offset);
- __m256i a0 = _mm256_add_epi32(in0_w_offset, in1);
- __m256i a1 = _mm256_sub_epi32(in0_w_offset, in1);
-
- a0 = _mm256_sra_epi32(a0, _mm_cvtsi32_si128(shift));
- a1 = _mm256_sra_epi32(a1, _mm_cvtsi32_si128(shift));
-
- a0 = _mm256_max_epi32(a0, *clamp_lo);
- a0 = _mm256_min_epi32(a0, *clamp_hi);
- a1 = _mm256_max_epi32(a1, *clamp_lo);
- a1 = _mm256_min_epi32(a1, *clamp_hi);
-
- *out0 = a0;
- *out1 = a1;
-}
-
-static INLINE void idct32_stage4_avx2(
- __m256i *bf1, const __m256i *cospim8, const __m256i *cospi56,
- const __m256i *cospi8, const __m256i *cospim56, const __m256i *cospim40,
- const __m256i *cospi24, const __m256i *cospi40, const __m256i *cospim24,
- const __m256i *rounding, int bit) {
- __m256i temp1, temp2;
- temp1 = half_btf_avx2(cospim8, &bf1[17], cospi56, &bf1[30], rounding, bit);
- bf1[30] = half_btf_avx2(cospi56, &bf1[17], cospi8, &bf1[30], rounding, bit);
- bf1[17] = temp1;
-
- temp2 = half_btf_avx2(cospim56, &bf1[18], cospim8, &bf1[29], rounding, bit);
- bf1[29] = half_btf_avx2(cospim8, &bf1[18], cospi56, &bf1[29], rounding, bit);
- bf1[18] = temp2;
-
- temp1 = half_btf_avx2(cospim40, &bf1[21], cospi24, &bf1[26], rounding, bit);
- bf1[26] = half_btf_avx2(cospi24, &bf1[21], cospi40, &bf1[26], rounding, bit);
- bf1[21] = temp1;
-
- temp2 = half_btf_avx2(cospim24, &bf1[22], cospim40, &bf1[25], rounding, bit);
- bf1[25] = half_btf_avx2(cospim40, &bf1[22], cospi24, &bf1[25], rounding, bit);
- bf1[22] = temp2;
-}
-
-static INLINE void idct32_stage5_avx2(
- __m256i *bf1, const __m256i *cospim16, const __m256i *cospi48,
- const __m256i *cospi16, const __m256i *cospim48, const __m256i *clamp_lo,
- const __m256i *clamp_hi, const __m256i *rounding, int bit) {
- __m256i temp1, temp2;
- temp1 = half_btf_avx2(cospim16, &bf1[9], cospi48, &bf1[14], rounding, bit);
- bf1[14] = half_btf_avx2(cospi48, &bf1[9], cospi16, &bf1[14], rounding, bit);
- bf1[9] = temp1;
-
- temp2 = half_btf_avx2(cospim48, &bf1[10], cospim16, &bf1[13], rounding, bit);
- bf1[13] = half_btf_avx2(cospim16, &bf1[10], cospi48, &bf1[13], rounding, bit);
- bf1[10] = temp2;
-
- addsub_avx2(bf1[16], bf1[19], bf1 + 16, bf1 + 19, clamp_lo, clamp_hi);
- addsub_avx2(bf1[17], bf1[18], bf1 + 17, bf1 + 18, clamp_lo, clamp_hi);
- addsub_avx2(bf1[23], bf1[20], bf1 + 23, bf1 + 20, clamp_lo, clamp_hi);
- addsub_avx2(bf1[22], bf1[21], bf1 + 22, bf1 + 21, clamp_lo, clamp_hi);
- addsub_avx2(bf1[24], bf1[27], bf1 + 24, bf1 + 27, clamp_lo, clamp_hi);
- addsub_avx2(bf1[25], bf1[26], bf1 + 25, bf1 + 26, clamp_lo, clamp_hi);
- addsub_avx2(bf1[31], bf1[28], bf1 + 31, bf1 + 28, clamp_lo, clamp_hi);
- addsub_avx2(bf1[30], bf1[29], bf1 + 30, bf1 + 29, clamp_lo, clamp_hi);
-}
-
-static INLINE void idct32_stage6_avx2(
- __m256i *bf1, const __m256i *cospim32, const __m256i *cospi32,
- const __m256i *cospim16, const __m256i *cospi48, const __m256i *cospi16,
- const __m256i *cospim48, const __m256i *clamp_lo, const __m256i *clamp_hi,
- const __m256i *rounding, int bit) {
- __m256i temp1, temp2;
- temp1 = half_btf_avx2(cospim32, &bf1[5], cospi32, &bf1[6], rounding, bit);
- bf1[6] = half_btf_avx2(cospi32, &bf1[5], cospi32, &bf1[6], rounding, bit);
- bf1[5] = temp1;
-
- addsub_avx2(bf1[8], bf1[11], bf1 + 8, bf1 + 11, clamp_lo, clamp_hi);
- addsub_avx2(bf1[9], bf1[10], bf1 + 9, bf1 + 10, clamp_lo, clamp_hi);
- addsub_avx2(bf1[15], bf1[12], bf1 + 15, bf1 + 12, clamp_lo, clamp_hi);
- addsub_avx2(bf1[14], bf1[13], bf1 + 14, bf1 + 13, clamp_lo, clamp_hi);
-
- temp1 = half_btf_avx2(cospim16, &bf1[18], cospi48, &bf1[29], rounding, bit);
- bf1[29] = half_btf_avx2(cospi48, &bf1[18], cospi16, &bf1[29], rounding, bit);
- bf1[18] = temp1;
- temp2 = half_btf_avx2(cospim16, &bf1[19], cospi48, &bf1[28], rounding, bit);
- bf1[28] = half_btf_avx2(cospi48, &bf1[19], cospi16, &bf1[28], rounding, bit);
- bf1[19] = temp2;
- temp1 = half_btf_avx2(cospim48, &bf1[20], cospim16, &bf1[27], rounding, bit);
- bf1[27] = half_btf_avx2(cospim16, &bf1[20], cospi48, &bf1[27], rounding, bit);
- bf1[20] = temp1;
- temp2 = half_btf_avx2(cospim48, &bf1[21], cospim16, &bf1[26], rounding, bit);
- bf1[26] = half_btf_avx2(cospim16, &bf1[21], cospi48, &bf1[26], rounding, bit);
- bf1[21] = temp2;
-}
-
-static INLINE void idct32_stage7_avx2(__m256i *bf1, const __m256i *cospim32,
- const __m256i *cospi32,
- const __m256i *clamp_lo,
- const __m256i *clamp_hi,
- const __m256i *rounding, int bit) {
- __m256i temp1, temp2;
- addsub_avx2(bf1[0], bf1[7], bf1 + 0, bf1 + 7, clamp_lo, clamp_hi);
- addsub_avx2(bf1[1], bf1[6], bf1 + 1, bf1 + 6, clamp_lo, clamp_hi);
- addsub_avx2(bf1[2], bf1[5], bf1 + 2, bf1 + 5, clamp_lo, clamp_hi);
- addsub_avx2(bf1[3], bf1[4], bf1 + 3, bf1 + 4, clamp_lo, clamp_hi);
-
- temp1 = half_btf_avx2(cospim32, &bf1[10], cospi32, &bf1[13], rounding, bit);
- bf1[13] = half_btf_avx2(cospi32, &bf1[10], cospi32, &bf1[13], rounding, bit);
- bf1[10] = temp1;
- temp2 = half_btf_avx2(cospim32, &bf1[11], cospi32, &bf1[12], rounding, bit);
- bf1[12] = half_btf_avx2(cospi32, &bf1[11], cospi32, &bf1[12], rounding, bit);
- bf1[11] = temp2;
-
- addsub_avx2(bf1[16], bf1[23], bf1 + 16, bf1 + 23, clamp_lo, clamp_hi);
- addsub_avx2(bf1[17], bf1[22], bf1 + 17, bf1 + 22, clamp_lo, clamp_hi);
- addsub_avx2(bf1[18], bf1[21], bf1 + 18, bf1 + 21, clamp_lo, clamp_hi);
- addsub_avx2(bf1[19], bf1[20], bf1 + 19, bf1 + 20, clamp_lo, clamp_hi);
- addsub_avx2(bf1[31], bf1[24], bf1 + 31, bf1 + 24, clamp_lo, clamp_hi);
- addsub_avx2(bf1[30], bf1[25], bf1 + 30, bf1 + 25, clamp_lo, clamp_hi);
- addsub_avx2(bf1[29], bf1[26], bf1 + 29, bf1 + 26, clamp_lo, clamp_hi);
- addsub_avx2(bf1[28], bf1[27], bf1 + 28, bf1 + 27, clamp_lo, clamp_hi);
-}
-
-static INLINE void idct32_stage8_avx2(__m256i *bf1, const __m256i *cospim32,
- const __m256i *cospi32,
- const __m256i *clamp_lo,
- const __m256i *clamp_hi,
- const __m256i *rounding, int bit) {
- __m256i temp1, temp2;
- addsub_avx2(bf1[0], bf1[15], bf1 + 0, bf1 + 15, clamp_lo, clamp_hi);
- addsub_avx2(bf1[1], bf1[14], bf1 + 1, bf1 + 14, clamp_lo, clamp_hi);
- addsub_avx2(bf1[2], bf1[13], bf1 + 2, bf1 + 13, clamp_lo, clamp_hi);
- addsub_avx2(bf1[3], bf1[12], bf1 + 3, bf1 + 12, clamp_lo, clamp_hi);
- addsub_avx2(bf1[4], bf1[11], bf1 + 4, bf1 + 11, clamp_lo, clamp_hi);
- addsub_avx2(bf1[5], bf1[10], bf1 + 5, bf1 + 10, clamp_lo, clamp_hi);
- addsub_avx2(bf1[6], bf1[9], bf1 + 6, bf1 + 9, clamp_lo, clamp_hi);
- addsub_avx2(bf1[7], bf1[8], bf1 + 7, bf1 + 8, clamp_lo, clamp_hi);
-
- temp1 = half_btf_avx2(cospim32, &bf1[20], cospi32, &bf1[27], rounding, bit);
- bf1[27] = half_btf_avx2(cospi32, &bf1[20], cospi32, &bf1[27], rounding, bit);
- bf1[20] = temp1;
- temp2 = half_btf_avx2(cospim32, &bf1[21], cospi32, &bf1[26], rounding, bit);
- bf1[26] = half_btf_avx2(cospi32, &bf1[21], cospi32, &bf1[26], rounding, bit);
- bf1[21] = temp2;
- temp1 = half_btf_avx2(cospim32, &bf1[22], cospi32, &bf1[25], rounding, bit);
- bf1[25] = half_btf_avx2(cospi32, &bf1[22], cospi32, &bf1[25], rounding, bit);
- bf1[22] = temp1;
- temp2 = half_btf_avx2(cospim32, &bf1[23], cospi32, &bf1[24], rounding, bit);
- bf1[24] = half_btf_avx2(cospi32, &bf1[23], cospi32, &bf1[24], rounding, bit);
- bf1[23] = temp2;
-}
-
-static INLINE void idct32_stage9_avx2(__m256i *bf1, __m256i *out,
- const int do_cols, const int bd,
- const int out_shift,
- const int log_range) {
- if (do_cols) {
- addsub_no_clamp_avx2(bf1[0], bf1[31], out + 0, out + 31);
- addsub_no_clamp_avx2(bf1[1], bf1[30], out + 1, out + 30);
- addsub_no_clamp_avx2(bf1[2], bf1[29], out + 2, out + 29);
- addsub_no_clamp_avx2(bf1[3], bf1[28], out + 3, out + 28);
- addsub_no_clamp_avx2(bf1[4], bf1[27], out + 4, out + 27);
- addsub_no_clamp_avx2(bf1[5], bf1[26], out + 5, out + 26);
- addsub_no_clamp_avx2(bf1[6], bf1[25], out + 6, out + 25);
- addsub_no_clamp_avx2(bf1[7], bf1[24], out + 7, out + 24);
- addsub_no_clamp_avx2(bf1[8], bf1[23], out + 8, out + 23);
- addsub_no_clamp_avx2(bf1[9], bf1[22], out + 9, out + 22);
- addsub_no_clamp_avx2(bf1[10], bf1[21], out + 10, out + 21);
- addsub_no_clamp_avx2(bf1[11], bf1[20], out + 11, out + 20);
- addsub_no_clamp_avx2(bf1[12], bf1[19], out + 12, out + 19);
- addsub_no_clamp_avx2(bf1[13], bf1[18], out + 13, out + 18);
- addsub_no_clamp_avx2(bf1[14], bf1[17], out + 14, out + 17);
- addsub_no_clamp_avx2(bf1[15], bf1[16], out + 15, out + 16);
- } else {
- const int log_range_out = AOMMAX(16, bd + 6);
- const __m256i clamp_lo_out = _mm256_set1_epi32(AOMMAX(
- -(1 << (log_range_out - 1)), -(1 << (log_range - 1 - out_shift))));
- const __m256i clamp_hi_out = _mm256_set1_epi32(AOMMIN(
- (1 << (log_range_out - 1)) - 1, (1 << (log_range - 1 - out_shift))));
-
- addsub_shift_avx2(bf1[0], bf1[31], out + 0, out + 31, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_avx2(bf1[1], bf1[30], out + 1, out + 30, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_avx2(bf1[2], bf1[29], out + 2, out + 29, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_avx2(bf1[3], bf1[28], out + 3, out + 28, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_avx2(bf1[4], bf1[27], out + 4, out + 27, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_avx2(bf1[5], bf1[26], out + 5, out + 26, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_avx2(bf1[6], bf1[25], out + 6, out + 25, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_avx2(bf1[7], bf1[24], out + 7, out + 24, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_avx2(bf1[8], bf1[23], out + 8, out + 23, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_avx2(bf1[9], bf1[22], out + 9, out + 22, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_avx2(bf1[10], bf1[21], out + 10, out + 21, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_avx2(bf1[11], bf1[20], out + 11, out + 20, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_avx2(bf1[12], bf1[19], out + 12, out + 19, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_avx2(bf1[13], bf1[18], out + 13, out + 18, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_avx2(bf1[14], bf1[17], out + 14, out + 17, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_avx2(bf1[15], bf1[16], out + 15, out + 16, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- }
-}
-
-static void idct32_low1_avx2(__m256i *in, __m256i *out, int bit, int do_cols,
- int bd, int out_shift) {
- const int32_t *cospi = cospi_arr(bit);
- const __m256i cospi32 = _mm256_set1_epi32(cospi[32]);
- const __m256i rounding = _mm256_set1_epi32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
- const __m256i clamp_lo = _mm256_set1_epi32(-(1 << (log_range - 1)));
- const __m256i clamp_hi = _mm256_set1_epi32((1 << (log_range - 1)) - 1);
- __m256i x;
- // stage 0
- // stage 1
- // stage 2
- // stage 3
- // stage 4
- // stage 5
- x = _mm256_mullo_epi32(in[0], cospi32);
- x = _mm256_add_epi32(x, rounding);
- x = _mm256_srai_epi32(x, bit);
-
- // stage 6
- // stage 7
- // stage 8
- // stage 9
- if (do_cols) {
- x = _mm256_max_epi32(x, clamp_lo);
- x = _mm256_min_epi32(x, clamp_hi);
- } else {
- const int log_range_out = AOMMAX(16, bd + 6);
- const __m256i clamp_lo_out = _mm256_set1_epi32(AOMMAX(
- -(1 << (log_range_out - 1)), -(1 << (log_range - 1 - out_shift))));
- const __m256i clamp_hi_out = _mm256_set1_epi32(AOMMIN(
- (1 << (log_range_out - 1)) - 1, (1 << (log_range - 1 - out_shift))));
- __m256i offset = _mm256_set1_epi32((1 << out_shift) >> 1);
- x = _mm256_add_epi32(offset, x);
- x = _mm256_sra_epi32(x, _mm_cvtsi32_si128(out_shift));
- x = _mm256_max_epi32(x, clamp_lo_out);
- x = _mm256_min_epi32(x, clamp_hi_out);
- }
-
- out[0] = x;
- out[1] = x;
- out[2] = x;
- out[3] = x;
- out[4] = x;
- out[5] = x;
- out[6] = x;
- out[7] = x;
- out[8] = x;
- out[9] = x;
- out[10] = x;
- out[11] = x;
- out[12] = x;
- out[13] = x;
- out[14] = x;
- out[15] = x;
- out[16] = x;
- out[17] = x;
- out[18] = x;
- out[19] = x;
- out[20] = x;
- out[21] = x;
- out[22] = x;
- out[23] = x;
- out[24] = x;
- out[25] = x;
- out[26] = x;
- out[27] = x;
- out[28] = x;
- out[29] = x;
- out[30] = x;
- out[31] = x;
-}
-
-static void idct32_low8_avx2(__m256i *in, __m256i *out, int bit, int do_cols,
- int bd, int out_shift) {
- const int32_t *cospi = cospi_arr(bit);
- const __m256i cospi62 = _mm256_set1_epi32(cospi[62]);
- const __m256i cospi14 = _mm256_set1_epi32(cospi[14]);
- const __m256i cospi54 = _mm256_set1_epi32(cospi[54]);
- const __m256i cospi6 = _mm256_set1_epi32(cospi[6]);
- const __m256i cospi10 = _mm256_set1_epi32(cospi[10]);
- const __m256i cospi2 = _mm256_set1_epi32(cospi[2]);
- const __m256i cospim58 = _mm256_set1_epi32(-cospi[58]);
- const __m256i cospim50 = _mm256_set1_epi32(-cospi[50]);
- const __m256i cospi60 = _mm256_set1_epi32(cospi[60]);
- const __m256i cospi12 = _mm256_set1_epi32(cospi[12]);
- const __m256i cospi4 = _mm256_set1_epi32(cospi[4]);
- const __m256i cospim52 = _mm256_set1_epi32(-cospi[52]);
- const __m256i cospi56 = _mm256_set1_epi32(cospi[56]);
- const __m256i cospi24 = _mm256_set1_epi32(cospi[24]);
- const __m256i cospi40 = _mm256_set1_epi32(cospi[40]);
- const __m256i cospi8 = _mm256_set1_epi32(cospi[8]);
- const __m256i cospim40 = _mm256_set1_epi32(-cospi[40]);
- const __m256i cospim8 = _mm256_set1_epi32(-cospi[8]);
- const __m256i cospim56 = _mm256_set1_epi32(-cospi[56]);
- const __m256i cospim24 = _mm256_set1_epi32(-cospi[24]);
- const __m256i cospi32 = _mm256_set1_epi32(cospi[32]);
- const __m256i cospim32 = _mm256_set1_epi32(-cospi[32]);
- const __m256i cospi48 = _mm256_set1_epi32(cospi[48]);
- const __m256i cospim48 = _mm256_set1_epi32(-cospi[48]);
- const __m256i cospi16 = _mm256_set1_epi32(cospi[16]);
- const __m256i cospim16 = _mm256_set1_epi32(-cospi[16]);
- const __m256i rounding = _mm256_set1_epi32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
- const __m256i clamp_lo = _mm256_set1_epi32(-(1 << (log_range - 1)));
- const __m256i clamp_hi = _mm256_set1_epi32((1 << (log_range - 1)) - 1);
- __m256i bf1[32];
-
- {
- // stage 0
- // stage 1
- bf1[0] = in[0];
- bf1[4] = in[4];
- bf1[8] = in[2];
- bf1[12] = in[6];
- bf1[16] = in[1];
- bf1[20] = in[5];
- bf1[24] = in[3];
- bf1[28] = in[7];
-
- // stage 2
- bf1[31] = half_btf_0_avx2(&cospi2, &bf1[16], &rounding, bit);
- bf1[16] = half_btf_0_avx2(&cospi62, &bf1[16], &rounding, bit);
- bf1[19] = half_btf_0_avx2(&cospim50, &bf1[28], &rounding, bit);
- bf1[28] = half_btf_0_avx2(&cospi14, &bf1[28], &rounding, bit);
- bf1[27] = half_btf_0_avx2(&cospi10, &bf1[20], &rounding, bit);
- bf1[20] = half_btf_0_avx2(&cospi54, &bf1[20], &rounding, bit);
- bf1[23] = half_btf_0_avx2(&cospim58, &bf1[24], &rounding, bit);
- bf1[24] = half_btf_0_avx2(&cospi6, &bf1[24], &rounding, bit);
-
- // stage 3
- bf1[15] = half_btf_0_avx2(&cospi4, &bf1[8], &rounding, bit);
- bf1[8] = half_btf_0_avx2(&cospi60, &bf1[8], &rounding, bit);
-
- bf1[11] = half_btf_0_avx2(&cospim52, &bf1[12], &rounding, bit);
- bf1[12] = half_btf_0_avx2(&cospi12, &bf1[12], &rounding, bit);
- bf1[17] = bf1[16];
- bf1[18] = bf1[19];
- bf1[21] = bf1[20];
- bf1[22] = bf1[23];
- bf1[25] = bf1[24];
- bf1[26] = bf1[27];
- bf1[29] = bf1[28];
- bf1[30] = bf1[31];
-
- // stage 4
- bf1[7] = half_btf_0_avx2(&cospi8, &bf1[4], &rounding, bit);
- bf1[4] = half_btf_0_avx2(&cospi56, &bf1[4], &rounding, bit);
-
- bf1[9] = bf1[8];
- bf1[10] = bf1[11];
- bf1[13] = bf1[12];
- bf1[14] = bf1[15];
-
- idct32_stage4_avx2(bf1, &cospim8, &cospi56, &cospi8, &cospim56, &cospim40,
- &cospi24, &cospi40, &cospim24, &rounding, bit);
-
- // stage 5
- bf1[0] = half_btf_0_avx2(&cospi32, &bf1[0], &rounding, bit);
- bf1[1] = bf1[0];
- bf1[5] = bf1[4];
- bf1[6] = bf1[7];
-
- idct32_stage5_avx2(bf1, &cospim16, &cospi48, &cospi16, &cospim48, &clamp_lo,
- &clamp_hi, &rounding, bit);
-
- // stage 6
- bf1[3] = bf1[0];
- bf1[2] = bf1[1];
-
- idct32_stage6_avx2(bf1, &cospim32, &cospi32, &cospim16, &cospi48, &cospi16,
- &cospim48, &clamp_lo, &clamp_hi, &rounding, bit);
-
- // stage 7
- idct32_stage7_avx2(bf1, &cospim32, &cospi32, &clamp_lo, &clamp_hi,
- &rounding, bit);
-
- // stage 8
- idct32_stage8_avx2(bf1, &cospim32, &cospi32, &clamp_lo, &clamp_hi,
- &rounding, bit);
-
- // stage 9
- idct32_stage9_avx2(bf1, out, do_cols, bd, out_shift, log_range);
- }
-}
-
-static void idct32_low16_avx2(__m256i *in, __m256i *out, int bit, int do_cols,
- int bd, int out_shift) {
- const int32_t *cospi = cospi_arr(bit);
- const __m256i cospi62 = _mm256_set1_epi32(cospi[62]);
- const __m256i cospi30 = _mm256_set1_epi32(cospi[30]);
- const __m256i cospi46 = _mm256_set1_epi32(cospi[46]);
- const __m256i cospi14 = _mm256_set1_epi32(cospi[14]);
- const __m256i cospi54 = _mm256_set1_epi32(cospi[54]);
- const __m256i cospi22 = _mm256_set1_epi32(cospi[22]);
- const __m256i cospi38 = _mm256_set1_epi32(cospi[38]);
- const __m256i cospi6 = _mm256_set1_epi32(cospi[6]);
- const __m256i cospi26 = _mm256_set1_epi32(cospi[26]);
- const __m256i cospi10 = _mm256_set1_epi32(cospi[10]);
- const __m256i cospi18 = _mm256_set1_epi32(cospi[18]);
- const __m256i cospi2 = _mm256_set1_epi32(cospi[2]);
- const __m256i cospim58 = _mm256_set1_epi32(-cospi[58]);
- const __m256i cospim42 = _mm256_set1_epi32(-cospi[42]);
- const __m256i cospim50 = _mm256_set1_epi32(-cospi[50]);
- const __m256i cospim34 = _mm256_set1_epi32(-cospi[34]);
- const __m256i cospi60 = _mm256_set1_epi32(cospi[60]);
- const __m256i cospi28 = _mm256_set1_epi32(cospi[28]);
- const __m256i cospi44 = _mm256_set1_epi32(cospi[44]);
- const __m256i cospi12 = _mm256_set1_epi32(cospi[12]);
- const __m256i cospi20 = _mm256_set1_epi32(cospi[20]);
- const __m256i cospi4 = _mm256_set1_epi32(cospi[4]);
- const __m256i cospim52 = _mm256_set1_epi32(-cospi[52]);
- const __m256i cospim36 = _mm256_set1_epi32(-cospi[36]);
- const __m256i cospi56 = _mm256_set1_epi32(cospi[56]);
- const __m256i cospi24 = _mm256_set1_epi32(cospi[24]);
- const __m256i cospi40 = _mm256_set1_epi32(cospi[40]);
- const __m256i cospi8 = _mm256_set1_epi32(cospi[8]);
- const __m256i cospim40 = _mm256_set1_epi32(-cospi[40]);
- const __m256i cospim8 = _mm256_set1_epi32(-cospi[8]);
- const __m256i cospim56 = _mm256_set1_epi32(-cospi[56]);
- const __m256i cospim24 = _mm256_set1_epi32(-cospi[24]);
- const __m256i cospi32 = _mm256_set1_epi32(cospi[32]);
- const __m256i cospim32 = _mm256_set1_epi32(-cospi[32]);
- const __m256i cospi48 = _mm256_set1_epi32(cospi[48]);
- const __m256i cospim48 = _mm256_set1_epi32(-cospi[48]);
- const __m256i cospi16 = _mm256_set1_epi32(cospi[16]);
- const __m256i cospim16 = _mm256_set1_epi32(-cospi[16]);
- const __m256i rounding = _mm256_set1_epi32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
- const __m256i clamp_lo = _mm256_set1_epi32(-(1 << (log_range - 1)));
- const __m256i clamp_hi = _mm256_set1_epi32((1 << (log_range - 1)) - 1);
- __m256i bf1[32];
-
- {
- // stage 0
- // stage 1
- bf1[0] = in[0];
- bf1[2] = in[8];
- bf1[4] = in[4];
- bf1[6] = in[12];
- bf1[8] = in[2];
- bf1[10] = in[10];
- bf1[12] = in[6];
- bf1[14] = in[14];
- bf1[16] = in[1];
- bf1[18] = in[9];
- bf1[20] = in[5];
- bf1[22] = in[13];
- bf1[24] = in[3];
- bf1[26] = in[11];
- bf1[28] = in[7];
- bf1[30] = in[15];
-
- // stage 2
- bf1[31] = half_btf_0_avx2(&cospi2, &bf1[16], &rounding, bit);
- bf1[16] = half_btf_0_avx2(&cospi62, &bf1[16], &rounding, bit);
- bf1[17] = half_btf_0_avx2(&cospim34, &bf1[30], &rounding, bit);
- bf1[30] = half_btf_0_avx2(&cospi30, &bf1[30], &rounding, bit);
- bf1[29] = half_btf_0_avx2(&cospi18, &bf1[18], &rounding, bit);
- bf1[18] = half_btf_0_avx2(&cospi46, &bf1[18], &rounding, bit);
- bf1[19] = half_btf_0_avx2(&cospim50, &bf1[28], &rounding, bit);
- bf1[28] = half_btf_0_avx2(&cospi14, &bf1[28], &rounding, bit);
- bf1[27] = half_btf_0_avx2(&cospi10, &bf1[20], &rounding, bit);
- bf1[20] = half_btf_0_avx2(&cospi54, &bf1[20], &rounding, bit);
- bf1[21] = half_btf_0_avx2(&cospim42, &bf1[26], &rounding, bit);
- bf1[26] = half_btf_0_avx2(&cospi22, &bf1[26], &rounding, bit);
- bf1[25] = half_btf_0_avx2(&cospi26, &bf1[22], &rounding, bit);
- bf1[22] = half_btf_0_avx2(&cospi38, &bf1[22], &rounding, bit);
- bf1[23] = half_btf_0_avx2(&cospim58, &bf1[24], &rounding, bit);
- bf1[24] = half_btf_0_avx2(&cospi6, &bf1[24], &rounding, bit);
-
- // stage 3
- bf1[15] = half_btf_0_avx2(&cospi4, &bf1[8], &rounding, bit);
- bf1[8] = half_btf_0_avx2(&cospi60, &bf1[8], &rounding, bit);
- bf1[9] = half_btf_0_avx2(&cospim36, &bf1[14], &rounding, bit);
- bf1[14] = half_btf_0_avx2(&cospi28, &bf1[14], &rounding, bit);
- bf1[13] = half_btf_0_avx2(&cospi20, &bf1[10], &rounding, bit);
- bf1[10] = half_btf_0_avx2(&cospi44, &bf1[10], &rounding, bit);
- bf1[11] = half_btf_0_avx2(&cospim52, &bf1[12], &rounding, bit);
- bf1[12] = half_btf_0_avx2(&cospi12, &bf1[12], &rounding, bit);
-
- addsub_avx2(bf1[16], bf1[17], bf1 + 16, bf1 + 17, &clamp_lo, &clamp_hi);
- addsub_avx2(bf1[19], bf1[18], bf1 + 19, bf1 + 18, &clamp_lo, &clamp_hi);
- addsub_avx2(bf1[20], bf1[21], bf1 + 20, bf1 + 21, &clamp_lo, &clamp_hi);
- addsub_avx2(bf1[23], bf1[22], bf1 + 23, bf1 + 22, &clamp_lo, &clamp_hi);
- addsub_avx2(bf1[24], bf1[25], bf1 + 24, bf1 + 25, &clamp_lo, &clamp_hi);
- addsub_avx2(bf1[27], bf1[26], bf1 + 27, bf1 + 26, &clamp_lo, &clamp_hi);
- addsub_avx2(bf1[28], bf1[29], bf1 + 28, bf1 + 29, &clamp_lo, &clamp_hi);
- addsub_avx2(bf1[31], bf1[30], bf1 + 31, bf1 + 30, &clamp_lo, &clamp_hi);
-
- // stage 4
- bf1[7] = half_btf_0_avx2(&cospi8, &bf1[4], &rounding, bit);
- bf1[4] = half_btf_0_avx2(&cospi56, &bf1[4], &rounding, bit);
- bf1[5] = half_btf_0_avx2(&cospim40, &bf1[6], &rounding, bit);
- bf1[6] = half_btf_0_avx2(&cospi24, &bf1[6], &rounding, bit);
-
- addsub_avx2(bf1[8], bf1[9], bf1 + 8, bf1 + 9, &clamp_lo, &clamp_hi);
- addsub_avx2(bf1[11], bf1[10], bf1 + 11, bf1 + 10, &clamp_lo, &clamp_hi);
- addsub_avx2(bf1[12], bf1[13], bf1 + 12, bf1 + 13, &clamp_lo, &clamp_hi);
- addsub_avx2(bf1[15], bf1[14], bf1 + 15, bf1 + 14, &clamp_lo, &clamp_hi);
-
- idct32_stage4_avx2(bf1, &cospim8, &cospi56, &cospi8, &cospim56, &cospim40,
- &cospi24, &cospi40, &cospim24, &rounding, bit);
-
- // stage 5
- bf1[0] = half_btf_0_avx2(&cospi32, &bf1[0], &rounding, bit);
- bf1[1] = bf1[0];
- bf1[3] = half_btf_0_avx2(&cospi16, &bf1[2], &rounding, bit);
- bf1[2] = half_btf_0_avx2(&cospi48, &bf1[2], &rounding, bit);
-
- addsub_avx2(bf1[4], bf1[5], bf1 + 4, bf1 + 5, &clamp_lo, &clamp_hi);
- addsub_avx2(bf1[7], bf1[6], bf1 + 7, bf1 + 6, &clamp_lo, &clamp_hi);
-
- idct32_stage5_avx2(bf1, &cospim16, &cospi48, &cospi16, &cospim48, &clamp_lo,
- &clamp_hi, &rounding, bit);
-
- // stage 6
- addsub_avx2(bf1[0], bf1[3], bf1 + 0, bf1 + 3, &clamp_lo, &clamp_hi);
- addsub_avx2(bf1[1], bf1[2], bf1 + 1, bf1 + 2, &clamp_lo, &clamp_hi);
-
- idct32_stage6_avx2(bf1, &cospim32, &cospi32, &cospim16, &cospi48, &cospi16,
- &cospim48, &clamp_lo, &clamp_hi, &rounding, bit);
-
- // stage 7
- idct32_stage7_avx2(bf1, &cospim32, &cospi32, &clamp_lo, &clamp_hi,
- &rounding, bit);
-
- // stage 8
- idct32_stage8_avx2(bf1, &cospim32, &cospi32, &clamp_lo, &clamp_hi,
- &rounding, bit);
-
- // stage 9
- idct32_stage9_avx2(bf1, out, do_cols, bd, out_shift, log_range);
- }
-}
-
-static void idct32_avx2(__m256i *in, __m256i *out, int bit, int do_cols, int bd,
- int out_shift) {
- const int32_t *cospi = cospi_arr(bit);
- const __m256i cospi62 = _mm256_set1_epi32(cospi[62]);
- const __m256i cospi30 = _mm256_set1_epi32(cospi[30]);
- const __m256i cospi46 = _mm256_set1_epi32(cospi[46]);
- const __m256i cospi14 = _mm256_set1_epi32(cospi[14]);
- const __m256i cospi54 = _mm256_set1_epi32(cospi[54]);
- const __m256i cospi22 = _mm256_set1_epi32(cospi[22]);
- const __m256i cospi38 = _mm256_set1_epi32(cospi[38]);
- const __m256i cospi6 = _mm256_set1_epi32(cospi[6]);
- const __m256i cospi58 = _mm256_set1_epi32(cospi[58]);
- const __m256i cospi26 = _mm256_set1_epi32(cospi[26]);
- const __m256i cospi42 = _mm256_set1_epi32(cospi[42]);
- const __m256i cospi10 = _mm256_set1_epi32(cospi[10]);
- const __m256i cospi50 = _mm256_set1_epi32(cospi[50]);
- const __m256i cospi18 = _mm256_set1_epi32(cospi[18]);
- const __m256i cospi34 = _mm256_set1_epi32(cospi[34]);
- const __m256i cospi2 = _mm256_set1_epi32(cospi[2]);
- const __m256i cospim58 = _mm256_set1_epi32(-cospi[58]);
- const __m256i cospim26 = _mm256_set1_epi32(-cospi[26]);
- const __m256i cospim42 = _mm256_set1_epi32(-cospi[42]);
- const __m256i cospim10 = _mm256_set1_epi32(-cospi[10]);
- const __m256i cospim50 = _mm256_set1_epi32(-cospi[50]);
- const __m256i cospim18 = _mm256_set1_epi32(-cospi[18]);
- const __m256i cospim34 = _mm256_set1_epi32(-cospi[34]);
- const __m256i cospim2 = _mm256_set1_epi32(-cospi[2]);
- const __m256i cospi60 = _mm256_set1_epi32(cospi[60]);
- const __m256i cospi28 = _mm256_set1_epi32(cospi[28]);
- const __m256i cospi44 = _mm256_set1_epi32(cospi[44]);
- const __m256i cospi12 = _mm256_set1_epi32(cospi[12]);
- const __m256i cospi52 = _mm256_set1_epi32(cospi[52]);
- const __m256i cospi20 = _mm256_set1_epi32(cospi[20]);
- const __m256i cospi36 = _mm256_set1_epi32(cospi[36]);
- const __m256i cospi4 = _mm256_set1_epi32(cospi[4]);
- const __m256i cospim52 = _mm256_set1_epi32(-cospi[52]);
- const __m256i cospim20 = _mm256_set1_epi32(-cospi[20]);
- const __m256i cospim36 = _mm256_set1_epi32(-cospi[36]);
- const __m256i cospim4 = _mm256_set1_epi32(-cospi[4]);
- const __m256i cospi56 = _mm256_set1_epi32(cospi[56]);
- const __m256i cospi24 = _mm256_set1_epi32(cospi[24]);
- const __m256i cospi40 = _mm256_set1_epi32(cospi[40]);
- const __m256i cospi8 = _mm256_set1_epi32(cospi[8]);
- const __m256i cospim40 = _mm256_set1_epi32(-cospi[40]);
- const __m256i cospim8 = _mm256_set1_epi32(-cospi[8]);
- const __m256i cospim56 = _mm256_set1_epi32(-cospi[56]);
- const __m256i cospim24 = _mm256_set1_epi32(-cospi[24]);
- const __m256i cospi32 = _mm256_set1_epi32(cospi[32]);
- const __m256i cospim32 = _mm256_set1_epi32(-cospi[32]);
- const __m256i cospi48 = _mm256_set1_epi32(cospi[48]);
- const __m256i cospim48 = _mm256_set1_epi32(-cospi[48]);
- const __m256i cospi16 = _mm256_set1_epi32(cospi[16]);
- const __m256i cospim16 = _mm256_set1_epi32(-cospi[16]);
- const __m256i rounding = _mm256_set1_epi32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
- const __m256i clamp_lo = _mm256_set1_epi32(-(1 << (log_range - 1)));
- const __m256i clamp_hi = _mm256_set1_epi32((1 << (log_range - 1)) - 1);
- __m256i bf1[32], bf0[32];
-
- {
- // stage 0
- // stage 1
- bf1[0] = in[0];
- bf1[1] = in[16];
- bf1[2] = in[8];
- bf1[3] = in[24];
- bf1[4] = in[4];
- bf1[5] = in[20];
- bf1[6] = in[12];
- bf1[7] = in[28];
- bf1[8] = in[2];
- bf1[9] = in[18];
- bf1[10] = in[10];
- bf1[11] = in[26];
- bf1[12] = in[6];
- bf1[13] = in[22];
- bf1[14] = in[14];
- bf1[15] = in[30];
- bf1[16] = in[1];
- bf1[17] = in[17];
- bf1[18] = in[9];
- bf1[19] = in[25];
- bf1[20] = in[5];
- bf1[21] = in[21];
- bf1[22] = in[13];
- bf1[23] = in[29];
- bf1[24] = in[3];
- bf1[25] = in[19];
- bf1[26] = in[11];
- bf1[27] = in[27];
- bf1[28] = in[7];
- bf1[29] = in[23];
- bf1[30] = in[15];
- bf1[31] = in[31];
-
- // stage 2
- bf0[0] = bf1[0];
- bf0[1] = bf1[1];
- bf0[2] = bf1[2];
- bf0[3] = bf1[3];
- bf0[4] = bf1[4];
- bf0[5] = bf1[5];
- bf0[6] = bf1[6];
- bf0[7] = bf1[7];
- bf0[8] = bf1[8];
- bf0[9] = bf1[9];
- bf0[10] = bf1[10];
- bf0[11] = bf1[11];
- bf0[12] = bf1[12];
- bf0[13] = bf1[13];
- bf0[14] = bf1[14];
- bf0[15] = bf1[15];
- bf0[16] =
- half_btf_avx2(&cospi62, &bf1[16], &cospim2, &bf1[31], &rounding, bit);
- bf0[17] =
- half_btf_avx2(&cospi30, &bf1[17], &cospim34, &bf1[30], &rounding, bit);
- bf0[18] =
- half_btf_avx2(&cospi46, &bf1[18], &cospim18, &bf1[29], &rounding, bit);
- bf0[19] =
- half_btf_avx2(&cospi14, &bf1[19], &cospim50, &bf1[28], &rounding, bit);
- bf0[20] =
- half_btf_avx2(&cospi54, &bf1[20], &cospim10, &bf1[27], &rounding, bit);
- bf0[21] =
- half_btf_avx2(&cospi22, &bf1[21], &cospim42, &bf1[26], &rounding, bit);
- bf0[22] =
- half_btf_avx2(&cospi38, &bf1[22], &cospim26, &bf1[25], &rounding, bit);
- bf0[23] =
- half_btf_avx2(&cospi6, &bf1[23], &cospim58, &bf1[24], &rounding, bit);
- bf0[24] =
- half_btf_avx2(&cospi58, &bf1[23], &cospi6, &bf1[24], &rounding, bit);
- bf0[25] =
- half_btf_avx2(&cospi26, &bf1[22], &cospi38, &bf1[25], &rounding, bit);
- bf0[26] =
- half_btf_avx2(&cospi42, &bf1[21], &cospi22, &bf1[26], &rounding, bit);
- bf0[27] =
- half_btf_avx2(&cospi10, &bf1[20], &cospi54, &bf1[27], &rounding, bit);
- bf0[28] =
- half_btf_avx2(&cospi50, &bf1[19], &cospi14, &bf1[28], &rounding, bit);
- bf0[29] =
- half_btf_avx2(&cospi18, &bf1[18], &cospi46, &bf1[29], &rounding, bit);
- bf0[30] =
- half_btf_avx2(&cospi34, &bf1[17], &cospi30, &bf1[30], &rounding, bit);
- bf0[31] =
- half_btf_avx2(&cospi2, &bf1[16], &cospi62, &bf1[31], &rounding, bit);
-
- // stage 3
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = bf0[2];
- bf1[3] = bf0[3];
- bf1[4] = bf0[4];
- bf1[5] = bf0[5];
- bf1[6] = bf0[6];
- bf1[7] = bf0[7];
- bf1[8] =
- half_btf_avx2(&cospi60, &bf0[8], &cospim4, &bf0[15], &rounding, bit);
- bf1[9] =
- half_btf_avx2(&cospi28, &bf0[9], &cospim36, &bf0[14], &rounding, bit);
- bf1[10] =
- half_btf_avx2(&cospi44, &bf0[10], &cospim20, &bf0[13], &rounding, bit);
- bf1[11] =
- half_btf_avx2(&cospi12, &bf0[11], &cospim52, &bf0[12], &rounding, bit);
- bf1[12] =
- half_btf_avx2(&cospi52, &bf0[11], &cospi12, &bf0[12], &rounding, bit);
- bf1[13] =
- half_btf_avx2(&cospi20, &bf0[10], &cospi44, &bf0[13], &rounding, bit);
- bf1[14] =
- half_btf_avx2(&cospi36, &bf0[9], &cospi28, &bf0[14], &rounding, bit);
- bf1[15] =
- half_btf_avx2(&cospi4, &bf0[8], &cospi60, &bf0[15], &rounding, bit);
-
- addsub_avx2(bf0[16], bf0[17], bf1 + 16, bf1 + 17, &clamp_lo, &clamp_hi);
- addsub_avx2(bf0[19], bf0[18], bf1 + 19, bf1 + 18, &clamp_lo, &clamp_hi);
- addsub_avx2(bf0[20], bf0[21], bf1 + 20, bf1 + 21, &clamp_lo, &clamp_hi);
- addsub_avx2(bf0[23], bf0[22], bf1 + 23, bf1 + 22, &clamp_lo, &clamp_hi);
- addsub_avx2(bf0[24], bf0[25], bf1 + 24, bf1 + 25, &clamp_lo, &clamp_hi);
- addsub_avx2(bf0[27], bf0[26], bf1 + 27, bf1 + 26, &clamp_lo, &clamp_hi);
- addsub_avx2(bf0[28], bf0[29], bf1 + 28, bf1 + 29, &clamp_lo, &clamp_hi);
- addsub_avx2(bf0[31], bf0[30], bf1 + 31, bf1 + 30, &clamp_lo, &clamp_hi);
-
- // stage 4
- bf0[0] = bf1[0];
- bf0[1] = bf1[1];
- bf0[2] = bf1[2];
- bf0[3] = bf1[3];
- bf0[4] =
- half_btf_avx2(&cospi56, &bf1[4], &cospim8, &bf1[7], &rounding, bit);
- bf0[5] =
- half_btf_avx2(&cospi24, &bf1[5], &cospim40, &bf1[6], &rounding, bit);
- bf0[6] =
- half_btf_avx2(&cospi40, &bf1[5], &cospi24, &bf1[6], &rounding, bit);
- bf0[7] = half_btf_avx2(&cospi8, &bf1[4], &cospi56, &bf1[7], &rounding, bit);
-
- addsub_avx2(bf1[8], bf1[9], bf0 + 8, bf0 + 9, &clamp_lo, &clamp_hi);
- addsub_avx2(bf1[11], bf1[10], bf0 + 11, bf0 + 10, &clamp_lo, &clamp_hi);
- addsub_avx2(bf1[12], bf1[13], bf0 + 12, bf0 + 13, &clamp_lo, &clamp_hi);
- addsub_avx2(bf1[15], bf1[14], bf0 + 15, bf0 + 14, &clamp_lo, &clamp_hi);
-
- bf0[16] = bf1[16];
- bf0[17] =
- half_btf_avx2(&cospim8, &bf1[17], &cospi56, &bf1[30], &rounding, bit);
- bf0[18] =
- half_btf_avx2(&cospim56, &bf1[18], &cospim8, &bf1[29], &rounding, bit);
- bf0[19] = bf1[19];
- bf0[20] = bf1[20];
- bf0[21] =
- half_btf_avx2(&cospim40, &bf1[21], &cospi24, &bf1[26], &rounding, bit);
- bf0[22] =
- half_btf_avx2(&cospim24, &bf1[22], &cospim40, &bf1[25], &rounding, bit);
- bf0[23] = bf1[23];
- bf0[24] = bf1[24];
- bf0[25] =
- half_btf_avx2(&cospim40, &bf1[22], &cospi24, &bf1[25], &rounding, bit);
- bf0[26] =
- half_btf_avx2(&cospi24, &bf1[21], &cospi40, &bf1[26], &rounding, bit);
- bf0[27] = bf1[27];
- bf0[28] = bf1[28];
- bf0[29] =
- half_btf_avx2(&cospim8, &bf1[18], &cospi56, &bf1[29], &rounding, bit);
- bf0[30] =
- half_btf_avx2(&cospi56, &bf1[17], &cospi8, &bf1[30], &rounding, bit);
- bf0[31] = bf1[31];
-
- // stage 5
- bf1[0] =
- half_btf_avx2(&cospi32, &bf0[0], &cospi32, &bf0[1], &rounding, bit);
- bf1[1] =
- half_btf_avx2(&cospi32, &bf0[0], &cospim32, &bf0[1], &rounding, bit);
- bf1[2] =
- half_btf_avx2(&cospi48, &bf0[2], &cospim16, &bf0[3], &rounding, bit);
- bf1[3] =
- half_btf_avx2(&cospi16, &bf0[2], &cospi48, &bf0[3], &rounding, bit);
- addsub_avx2(bf0[4], bf0[5], bf1 + 4, bf1 + 5, &clamp_lo, &clamp_hi);
- addsub_avx2(bf0[7], bf0[6], bf1 + 7, bf1 + 6, &clamp_lo, &clamp_hi);
- bf1[8] = bf0[8];
- bf1[9] =
- half_btf_avx2(&cospim16, &bf0[9], &cospi48, &bf0[14], &rounding, bit);
- bf1[10] =
- half_btf_avx2(&cospim48, &bf0[10], &cospim16, &bf0[13], &rounding, bit);
- bf1[11] = bf0[11];
- bf1[12] = bf0[12];
- bf1[13] =
- half_btf_avx2(&cospim16, &bf0[10], &cospi48, &bf0[13], &rounding, bit);
- bf1[14] =
- half_btf_avx2(&cospi48, &bf0[9], &cospi16, &bf0[14], &rounding, bit);
- bf1[15] = bf0[15];
- addsub_avx2(bf0[16], bf0[19], bf1 + 16, bf1 + 19, &clamp_lo, &clamp_hi);
- addsub_avx2(bf0[17], bf0[18], bf1 + 17, bf1 + 18, &clamp_lo, &clamp_hi);
- addsub_avx2(bf0[23], bf0[20], bf1 + 23, bf1 + 20, &clamp_lo, &clamp_hi);
- addsub_avx2(bf0[22], bf0[21], bf1 + 22, bf1 + 21, &clamp_lo, &clamp_hi);
- addsub_avx2(bf0[24], bf0[27], bf1 + 24, bf1 + 27, &clamp_lo, &clamp_hi);
- addsub_avx2(bf0[25], bf0[26], bf1 + 25, bf1 + 26, &clamp_lo, &clamp_hi);
- addsub_avx2(bf0[31], bf0[28], bf1 + 31, bf1 + 28, &clamp_lo, &clamp_hi);
- addsub_avx2(bf0[30], bf0[29], bf1 + 30, bf1 + 29, &clamp_lo, &clamp_hi);
-
- // stage 6
- addsub_avx2(bf1[0], bf1[3], bf0 + 0, bf0 + 3, &clamp_lo, &clamp_hi);
- addsub_avx2(bf1[1], bf1[2], bf0 + 1, bf0 + 2, &clamp_lo, &clamp_hi);
- bf0[4] = bf1[4];
- bf0[5] =
- half_btf_avx2(&cospim32, &bf1[5], &cospi32, &bf1[6], &rounding, bit);
- bf0[6] =
- half_btf_avx2(&cospi32, &bf1[5], &cospi32, &bf1[6], &rounding, bit);
- bf0[7] = bf1[7];
- addsub_avx2(bf1[8], bf1[11], bf0 + 8, bf0 + 11, &clamp_lo, &clamp_hi);
- addsub_avx2(bf1[9], bf1[10], bf0 + 9, bf0 + 10, &clamp_lo, &clamp_hi);
- addsub_avx2(bf1[15], bf1[12], bf0 + 15, bf0 + 12, &clamp_lo, &clamp_hi);
- addsub_avx2(bf1[14], bf1[13], bf0 + 14, bf0 + 13, &clamp_lo, &clamp_hi);
- bf0[16] = bf1[16];
- bf0[17] = bf1[17];
- bf0[18] =
- half_btf_avx2(&cospim16, &bf1[18], &cospi48, &bf1[29], &rounding, bit);
- bf0[19] =
- half_btf_avx2(&cospim16, &bf1[19], &cospi48, &bf1[28], &rounding, bit);
- bf0[20] =
- half_btf_avx2(&cospim48, &bf1[20], &cospim16, &bf1[27], &rounding, bit);
- bf0[21] =
- half_btf_avx2(&cospim48, &bf1[21], &cospim16, &bf1[26], &rounding, bit);
- bf0[22] = bf1[22];
- bf0[23] = bf1[23];
- bf0[24] = bf1[24];
- bf0[25] = bf1[25];
- bf0[26] =
- half_btf_avx2(&cospim16, &bf1[21], &cospi48, &bf1[26], &rounding, bit);
- bf0[27] =
- half_btf_avx2(&cospim16, &bf1[20], &cospi48, &bf1[27], &rounding, bit);
- bf0[28] =
- half_btf_avx2(&cospi48, &bf1[19], &cospi16, &bf1[28], &rounding, bit);
- bf0[29] =
- half_btf_avx2(&cospi48, &bf1[18], &cospi16, &bf1[29], &rounding, bit);
- bf0[30] = bf1[30];
- bf0[31] = bf1[31];
-
- // stage 7
- addsub_avx2(bf0[0], bf0[7], bf1 + 0, bf1 + 7, &clamp_lo, &clamp_hi);
- addsub_avx2(bf0[1], bf0[6], bf1 + 1, bf1 + 6, &clamp_lo, &clamp_hi);
- addsub_avx2(bf0[2], bf0[5], bf1 + 2, bf1 + 5, &clamp_lo, &clamp_hi);
- addsub_avx2(bf0[3], bf0[4], bf1 + 3, bf1 + 4, &clamp_lo, &clamp_hi);
- bf1[8] = bf0[8];
- bf1[9] = bf0[9];
- bf1[10] =
- half_btf_avx2(&cospim32, &bf0[10], &cospi32, &bf0[13], &rounding, bit);
- bf1[11] =
- half_btf_avx2(&cospim32, &bf0[11], &cospi32, &bf0[12], &rounding, bit);
- bf1[12] =
- half_btf_avx2(&cospi32, &bf0[11], &cospi32, &bf0[12], &rounding, bit);
- bf1[13] =
- half_btf_avx2(&cospi32, &bf0[10], &cospi32, &bf0[13], &rounding, bit);
- bf1[14] = bf0[14];
- bf1[15] = bf0[15];
- addsub_avx2(bf0[16], bf0[23], bf1 + 16, bf1 + 23, &clamp_lo, &clamp_hi);
- addsub_avx2(bf0[17], bf0[22], bf1 + 17, bf1 + 22, &clamp_lo, &clamp_hi);
- addsub_avx2(bf0[18], bf0[21], bf1 + 18, bf1 + 21, &clamp_lo, &clamp_hi);
- addsub_avx2(bf0[19], bf0[20], bf1 + 19, bf1 + 20, &clamp_lo, &clamp_hi);
- addsub_avx2(bf0[31], bf0[24], bf1 + 31, bf1 + 24, &clamp_lo, &clamp_hi);
- addsub_avx2(bf0[30], bf0[25], bf1 + 30, bf1 + 25, &clamp_lo, &clamp_hi);
- addsub_avx2(bf0[29], bf0[26], bf1 + 29, bf1 + 26, &clamp_lo, &clamp_hi);
- addsub_avx2(bf0[28], bf0[27], bf1 + 28, bf1 + 27, &clamp_lo, &clamp_hi);
-
- // stage 8
- addsub_avx2(bf1[0], bf1[15], bf0 + 0, bf0 + 15, &clamp_lo, &clamp_hi);
- addsub_avx2(bf1[1], bf1[14], bf0 + 1, bf0 + 14, &clamp_lo, &clamp_hi);
- addsub_avx2(bf1[2], bf1[13], bf0 + 2, bf0 + 13, &clamp_lo, &clamp_hi);
- addsub_avx2(bf1[3], bf1[12], bf0 + 3, bf0 + 12, &clamp_lo, &clamp_hi);
- addsub_avx2(bf1[4], bf1[11], bf0 + 4, bf0 + 11, &clamp_lo, &clamp_hi);
- addsub_avx2(bf1[5], bf1[10], bf0 + 5, bf0 + 10, &clamp_lo, &clamp_hi);
- addsub_avx2(bf1[6], bf1[9], bf0 + 6, bf0 + 9, &clamp_lo, &clamp_hi);
- addsub_avx2(bf1[7], bf1[8], bf0 + 7, bf0 + 8, &clamp_lo, &clamp_hi);
- bf0[16] = bf1[16];
- bf0[17] = bf1[17];
- bf0[18] = bf1[18];
- bf0[19] = bf1[19];
- bf0[20] =
- half_btf_avx2(&cospim32, &bf1[20], &cospi32, &bf1[27], &rounding, bit);
- bf0[21] =
- half_btf_avx2(&cospim32, &bf1[21], &cospi32, &bf1[26], &rounding, bit);
- bf0[22] =
- half_btf_avx2(&cospim32, &bf1[22], &cospi32, &bf1[25], &rounding, bit);
- bf0[23] =
- half_btf_avx2(&cospim32, &bf1[23], &cospi32, &bf1[24], &rounding, bit);
- bf0[24] =
- half_btf_avx2(&cospi32, &bf1[23], &cospi32, &bf1[24], &rounding, bit);
- bf0[25] =
- half_btf_avx2(&cospi32, &bf1[22], &cospi32, &bf1[25], &rounding, bit);
- bf0[26] =
- half_btf_avx2(&cospi32, &bf1[21], &cospi32, &bf1[26], &rounding, bit);
- bf0[27] =
- half_btf_avx2(&cospi32, &bf1[20], &cospi32, &bf1[27], &rounding, bit);
- bf0[28] = bf1[28];
- bf0[29] = bf1[29];
- bf0[30] = bf1[30];
- bf0[31] = bf1[31];
-
- // stage 9
- if (do_cols) {
- addsub_no_clamp_avx2(bf0[0], bf0[31], out + 0, out + 31);
- addsub_no_clamp_avx2(bf0[1], bf0[30], out + 1, out + 30);
- addsub_no_clamp_avx2(bf0[2], bf0[29], out + 2, out + 29);
- addsub_no_clamp_avx2(bf0[3], bf0[28], out + 3, out + 28);
- addsub_no_clamp_avx2(bf0[4], bf0[27], out + 4, out + 27);
- addsub_no_clamp_avx2(bf0[5], bf0[26], out + 5, out + 26);
- addsub_no_clamp_avx2(bf0[6], bf0[25], out + 6, out + 25);
- addsub_no_clamp_avx2(bf0[7], bf0[24], out + 7, out + 24);
- addsub_no_clamp_avx2(bf0[8], bf0[23], out + 8, out + 23);
- addsub_no_clamp_avx2(bf0[9], bf0[22], out + 9, out + 22);
- addsub_no_clamp_avx2(bf0[10], bf0[21], out + 10, out + 21);
- addsub_no_clamp_avx2(bf0[11], bf0[20], out + 11, out + 20);
- addsub_no_clamp_avx2(bf0[12], bf0[19], out + 12, out + 19);
- addsub_no_clamp_avx2(bf0[13], bf0[18], out + 13, out + 18);
- addsub_no_clamp_avx2(bf0[14], bf0[17], out + 14, out + 17);
- addsub_no_clamp_avx2(bf0[15], bf0[16], out + 15, out + 16);
- } else {
- const int log_range_out = AOMMAX(16, bd + 6);
- const __m256i clamp_lo_out = _mm256_set1_epi32(AOMMAX(
- -(1 << (log_range_out - 1)), -(1 << (log_range - 1 - out_shift))));
- const __m256i clamp_hi_out = _mm256_set1_epi32(AOMMIN(
- (1 << (log_range_out - 1)) - 1, (1 << (log_range - 1 - out_shift))));
-
- addsub_shift_avx2(bf0[0], bf0[31], out + 0, out + 31, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_avx2(bf0[1], bf0[30], out + 1, out + 30, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_avx2(bf0[2], bf0[29], out + 2, out + 29, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_avx2(bf0[3], bf0[28], out + 3, out + 28, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_avx2(bf0[4], bf0[27], out + 4, out + 27, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_avx2(bf0[5], bf0[26], out + 5, out + 26, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_avx2(bf0[6], bf0[25], out + 6, out + 25, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_avx2(bf0[7], bf0[24], out + 7, out + 24, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_avx2(bf0[8], bf0[23], out + 8, out + 23, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_avx2(bf0[9], bf0[22], out + 9, out + 22, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_avx2(bf0[10], bf0[21], out + 10, out + 21, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_avx2(bf0[11], bf0[20], out + 11, out + 20, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_avx2(bf0[12], bf0[19], out + 12, out + 19, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_avx2(bf0[13], bf0[18], out + 13, out + 18, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_avx2(bf0[14], bf0[17], out + 14, out + 17, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_avx2(bf0[15], bf0[16], out + 15, out + 16, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- }
- }
-}
-
-typedef void (*transform_1d_avx2)(__m256i *in, __m256i *out, int bit,
- int do_cols, int bd, int out_shift);
-
-static const transform_1d_avx2
- highbd_txfm_all_1d_zeros_w8_arr[TX_SIZES][ITX_TYPES_1D][4] = {
- {
- { NULL, NULL, NULL, NULL },
- { NULL, NULL, NULL, NULL },
- { NULL, NULL, NULL, NULL },
- },
- { { NULL, NULL, NULL, NULL },
- { NULL, NULL, NULL, NULL },
- { NULL, NULL, NULL, NULL } },
- {
- { NULL, NULL, NULL, NULL },
- { NULL, NULL, NULL, NULL },
- { NULL, NULL, NULL, NULL },
- },
- { { idct32_low1_avx2, idct32_low8_avx2, idct32_low16_avx2, idct32_avx2 },
- { NULL, NULL, NULL, NULL },
- { NULL, NULL, NULL, NULL } },
-
- { { NULL, NULL, NULL, NULL },
- { NULL, NULL, NULL, NULL },
- { NULL, NULL, NULL, NULL } }
- };
-
-static void highbd_inv_txfm2d_add_no_identity_avx2(const int32_t *input,
- uint16_t *output, int stride,
- TX_TYPE tx_type,
- TX_SIZE tx_size, int eob,
- const int bd) {
- __m256i buf1[64 * 2];
- int eobx, eoby;
- get_eobx_eoby_scan_default(&eobx, &eoby, tx_size, eob);
- const int8_t *shift = inv_txfm_shift_ls[tx_size];
- const int txw_idx = get_txw_idx(tx_size);
- const int txh_idx = get_txh_idx(tx_size);
- const int txfm_size_col = tx_size_wide[tx_size];
- const int txfm_size_row = tx_size_high[tx_size];
- const int buf_size_w_div8 = txfm_size_col >> 3;
- const int buf_size_nonzero_w_div8 = (eobx + 8) >> 3;
- const int buf_size_nonzero_h_div8 = (eoby + 8) >> 3;
- const int input_stride = AOMMIN(32, txfm_size_col);
-
- const int fun_idx_x = lowbd_txfm_all_1d_zeros_idx[eobx];
- const int fun_idx_y = lowbd_txfm_all_1d_zeros_idx[eoby];
- const transform_1d_avx2 row_txfm =
- highbd_txfm_all_1d_zeros_w8_arr[txw_idx][hitx_1d_tab[tx_type]][fun_idx_x];
- const transform_1d_avx2 col_txfm =
- highbd_txfm_all_1d_zeros_w8_arr[txh_idx][vitx_1d_tab[tx_type]][fun_idx_y];
-
- assert(col_txfm != NULL);
- assert(row_txfm != NULL);
- int ud_flip, lr_flip;
- get_flip_cfg(tx_type, &ud_flip, &lr_flip);
-
- // 1st stage: column transform
- for (int i = 0; i < buf_size_nonzero_h_div8; i++) {
- __m256i buf0[32];
- const int32_t *input_row = input + i * input_stride * 8;
- for (int j = 0; j < buf_size_nonzero_w_div8; ++j) {
- __m256i *buf0_cur = buf0 + j * 8;
- load_buffer_32x32(input_row + j * 8, buf0_cur, input_stride, 8);
-
- transpose_8x8_avx2(&buf0_cur[0], &buf0_cur[0]);
- }
-
- row_txfm(buf0, buf0, inv_cos_bit_row[txw_idx][txh_idx], 0, bd, -shift[0]);
-
- __m256i *_buf1 = buf1 + i * 8;
- for (int j = 0; j < buf_size_w_div8; ++j) {
- transpose_8x8_avx2(&buf0[j * 8], &_buf1[j * txfm_size_row]);
- }
- }
- // 2nd stage: column transform
- for (int i = 0; i < buf_size_w_div8; i++) {
- col_txfm(buf1 + i * txfm_size_row, buf1 + i * txfm_size_row,
- inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0);
-
- av1_round_shift_array_32_avx2(buf1 + i * txfm_size_row,
- buf1 + i * txfm_size_row, txfm_size_row,
- -shift[1]);
- }
-
- // write to buffer
- {
- for (int i = 0; i < (txfm_size_col >> 4); i++) {
- highbd_write_buffer_16xn_avx2(buf1 + i * txfm_size_row * 2,
- output + 16 * i, stride, ud_flip,
- txfm_size_row, bd);
- }
- }
-}
-
-void av1_highbd_inv_txfm2d_add_universe_avx2(const int32_t *input,
- uint8_t *output, int stride,
- TX_TYPE tx_type, TX_SIZE tx_size,
- int eob, const int bd) {
- switch (tx_type) {
- case DCT_DCT:
- highbd_inv_txfm2d_add_no_identity_avx2(input, CONVERT_TO_SHORTPTR(output),
- stride, tx_type, tx_size, eob, bd);
- break;
- default: assert(0); break;
- }
-}
-
-void av1_highbd_inv_txfm_add_32x32_avx2(const tran_low_t *input, uint8_t *dest,
- int stride,
- const TxfmParam *txfm_param) {
- const int bd = txfm_param->bd;
- const TX_TYPE tx_type = txfm_param->tx_type;
- const int32_t *src = cast_to_int32(input);
- switch (tx_type) {
- case DCT_DCT:
- av1_highbd_inv_txfm2d_add_universe_avx2(input, dest, stride, tx_type,
- txfm_param->tx_size,
- txfm_param->eob, bd);
- break;
- // Assembly version doesn't support IDTX, so use C version for it.
- case IDTX:
- av1_inv_txfm2d_add_32x32_c(src, CONVERT_TO_SHORTPTR(dest), stride,
- tx_type, bd);
- break;
-
- default: assert(0);
- }
-}
-
-void av1_highbd_inv_txfm_add_avx2(const tran_low_t *input, uint8_t *dest,
- int stride, const TxfmParam *txfm_param) {
- assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]);
- const TX_SIZE tx_size = txfm_param->tx_size;
- switch (tx_size) {
- case TX_32X32:
- av1_highbd_inv_txfm_add_32x32_avx2(input, dest, stride, txfm_param);
- break;
- case TX_16X16:
- av1_highbd_inv_txfm_add_16x16_sse4_1(input, dest, stride, txfm_param);
- break;
- case TX_8X8:
- av1_highbd_inv_txfm_add_8x8_sse4_1(input, dest, stride, txfm_param);
- break;
- case TX_4X8:
- av1_highbd_inv_txfm_add_4x8(input, dest, stride, txfm_param);
- break;
- case TX_8X4:
- av1_highbd_inv_txfm_add_8x4(input, dest, stride, txfm_param);
- break;
- case TX_8X16:
- av1_highbd_inv_txfm_add_8x16_sse4_1(input, dest, stride, txfm_param);
- break;
- case TX_16X8:
- av1_highbd_inv_txfm_add_16x8_sse4_1(input, dest, stride, txfm_param);
- break;
- case TX_16X32:
- av1_highbd_inv_txfm_add_16x32(input, dest, stride, txfm_param);
- break;
- case TX_32X16:
- av1_highbd_inv_txfm_add_32x16(input, dest, stride, txfm_param);
- break;
- case TX_32X64:
- av1_highbd_inv_txfm_add_32x64(input, dest, stride, txfm_param);
- break;
- case TX_64X32:
- av1_highbd_inv_txfm_add_64x32(input, dest, stride, txfm_param);
- break;
- case TX_4X4:
- av1_highbd_inv_txfm_add_4x4_sse4_1(input, dest, stride, txfm_param);
- break;
- case TX_16X4:
- av1_highbd_inv_txfm_add_16x4(input, dest, stride, txfm_param);
- break;
- case TX_4X16:
- av1_highbd_inv_txfm_add_4x16(input, dest, stride, txfm_param);
- break;
- case TX_8X32:
- av1_highbd_inv_txfm_add_8x32(input, dest, stride, txfm_param);
- break;
- case TX_32X8:
- av1_highbd_inv_txfm_add_32x8(input, dest, stride, txfm_param);
- break;
- case TX_64X64:
- case TX_16X64:
- case TX_64X16:
- av1_highbd_inv_txfm2d_add_universe_sse4_1(
- input, dest, stride, txfm_param->tx_type, txfm_param->tx_size,
- txfm_param->eob, txfm_param->bd);
- break;
- default: assert(0 && "Invalid transform size"); break;
- }
-}
diff --git a/third_party/aom/av1/common/x86/highbd_inv_txfm_sse4.c b/third_party/aom/av1/common/x86/highbd_inv_txfm_sse4.c
deleted file mode 100644
index e29e0baf5..000000000
--- a/third_party/aom/av1/common/x86/highbd_inv_txfm_sse4.c
+++ /dev/null
@@ -1,5348 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#include <assert.h>
-#include <smmintrin.h> /* SSE4.1 */
-
-#include "config/aom_config.h"
-#include "config/av1_rtcd.h"
-
-#include "av1/common/av1_inv_txfm1d_cfg.h"
-#include "av1/common/idct.h"
-#include "av1/common/x86/av1_inv_txfm_ssse3.h"
-#include "av1/common/x86/av1_txfm_sse4.h"
-#include "av1/common/x86/highbd_txfm_utility_sse4.h"
-
-static INLINE __m128i highbd_clamp_epi16(__m128i u, int bd) {
- const __m128i zero = _mm_setzero_si128();
- const __m128i one = _mm_set1_epi16(1);
- const __m128i max = _mm_sub_epi16(_mm_slli_epi16(one, bd), one);
- __m128i clamped, mask;
-
- mask = _mm_cmpgt_epi16(u, max);
- clamped = _mm_andnot_si128(mask, u);
- mask = _mm_and_si128(mask, max);
- clamped = _mm_or_si128(mask, clamped);
- mask = _mm_cmpgt_epi16(clamped, zero);
- clamped = _mm_and_si128(clamped, mask);
-
- return clamped;
-}
-
-static INLINE __m128i highbd_get_recon_8x8_sse4_1(const __m128i pred,
- __m128i res0, __m128i res1,
- const int bd) {
- __m128i x0 = _mm_cvtepi16_epi32(pred);
- __m128i x1 = _mm_cvtepi16_epi32(_mm_srli_si128(pred, 8));
-
- x0 = _mm_add_epi32(res0, x0);
- x1 = _mm_add_epi32(res1, x1);
- x0 = _mm_packus_epi32(x0, x1);
- x0 = highbd_clamp_epi16(x0, bd);
- return x0;
-}
-
-static INLINE void highbd_write_buffer_8xn_sse4_1(__m128i *in, uint16_t *output,
- int stride, int flipud,
- int height, const int bd) {
- int j = flipud ? (height - 1) : 0;
- const int step = flipud ? -1 : 1;
- for (int i = 0; i < height; ++i, j += step) {
- __m128i v = _mm_loadu_si128((__m128i const *)(output + i * stride));
- __m128i u = highbd_get_recon_8x8_sse4_1(v, in[j], in[j + height], bd);
-
- _mm_storeu_si128((__m128i *)(output + i * stride), u);
- }
-}
-
-static INLINE void load_buffer_32bit_input(const int32_t *in, int stride,
- __m128i *out, int out_size) {
- for (int i = 0; i < out_size; ++i) {
- out[i] = _mm_loadu_si128((const __m128i *)(in + i * stride));
- }
-}
-
-static INLINE void load_buffer_4x4(const int32_t *coeff, __m128i *in) {
- in[0] = _mm_load_si128((const __m128i *)(coeff + 0));
- in[1] = _mm_load_si128((const __m128i *)(coeff + 4));
- in[2] = _mm_load_si128((const __m128i *)(coeff + 8));
- in[3] = _mm_load_si128((const __m128i *)(coeff + 12));
-}
-
-static void addsub_sse4_1(const __m128i in0, const __m128i in1, __m128i *out0,
- __m128i *out1, const __m128i *clamp_lo,
- const __m128i *clamp_hi) {
- __m128i a0 = _mm_add_epi32(in0, in1);
- __m128i a1 = _mm_sub_epi32(in0, in1);
-
- a0 = _mm_max_epi32(a0, *clamp_lo);
- a0 = _mm_min_epi32(a0, *clamp_hi);
- a1 = _mm_max_epi32(a1, *clamp_lo);
- a1 = _mm_min_epi32(a1, *clamp_hi);
-
- *out0 = a0;
- *out1 = a1;
-}
-
-static void addsub_no_clamp_sse4_1(const __m128i in0, const __m128i in1,
- __m128i *out0, __m128i *out1) {
- __m128i a0 = _mm_add_epi32(in0, in1);
- __m128i a1 = _mm_sub_epi32(in0, in1);
-
- *out0 = a0;
- *out1 = a1;
-}
-
-static void addsub_shift_sse4_1(const __m128i in0, const __m128i in1,
- __m128i *out0, __m128i *out1,
- const __m128i *clamp_lo,
- const __m128i *clamp_hi, int shift) {
- __m128i offset = _mm_set1_epi32((1 << shift) >> 1);
- __m128i in0_w_offset = _mm_add_epi32(in0, offset);
- __m128i a0 = _mm_add_epi32(in0_w_offset, in1);
- __m128i a1 = _mm_sub_epi32(in0_w_offset, in1);
-
- a0 = _mm_sra_epi32(a0, _mm_cvtsi32_si128(shift));
- a1 = _mm_sra_epi32(a1, _mm_cvtsi32_si128(shift));
-
- a0 = _mm_max_epi32(a0, *clamp_lo);
- a0 = _mm_min_epi32(a0, *clamp_hi);
- a1 = _mm_max_epi32(a1, *clamp_lo);
- a1 = _mm_min_epi32(a1, *clamp_hi);
-
- *out0 = a0;
- *out1 = a1;
-}
-
-static INLINE void idct32_stage4_sse4_1(
- __m128i *bf1, const __m128i *cospim8, const __m128i *cospi56,
- const __m128i *cospi8, const __m128i *cospim56, const __m128i *cospim40,
- const __m128i *cospi24, const __m128i *cospi40, const __m128i *cospim24,
- const __m128i *rounding, int bit) {
- __m128i temp1, temp2;
- temp1 = half_btf_sse4_1(cospim8, &bf1[17], cospi56, &bf1[30], rounding, bit);
- bf1[30] = half_btf_sse4_1(cospi56, &bf1[17], cospi8, &bf1[30], rounding, bit);
- bf1[17] = temp1;
-
- temp2 = half_btf_sse4_1(cospim56, &bf1[18], cospim8, &bf1[29], rounding, bit);
- bf1[29] =
- half_btf_sse4_1(cospim8, &bf1[18], cospi56, &bf1[29], rounding, bit);
- bf1[18] = temp2;
-
- temp1 = half_btf_sse4_1(cospim40, &bf1[21], cospi24, &bf1[26], rounding, bit);
- bf1[26] =
- half_btf_sse4_1(cospi24, &bf1[21], cospi40, &bf1[26], rounding, bit);
- bf1[21] = temp1;
-
- temp2 =
- half_btf_sse4_1(cospim24, &bf1[22], cospim40, &bf1[25], rounding, bit);
- bf1[25] =
- half_btf_sse4_1(cospim40, &bf1[22], cospi24, &bf1[25], rounding, bit);
- bf1[22] = temp2;
-}
-
-static INLINE void idct32_stage5_sse4_1(
- __m128i *bf1, const __m128i *cospim16, const __m128i *cospi48,
- const __m128i *cospi16, const __m128i *cospim48, const __m128i *clamp_lo,
- const __m128i *clamp_hi, const __m128i *rounding, int bit) {
- __m128i temp1, temp2;
- temp1 = half_btf_sse4_1(cospim16, &bf1[9], cospi48, &bf1[14], rounding, bit);
- bf1[14] = half_btf_sse4_1(cospi48, &bf1[9], cospi16, &bf1[14], rounding, bit);
- bf1[9] = temp1;
-
- temp2 =
- half_btf_sse4_1(cospim48, &bf1[10], cospim16, &bf1[13], rounding, bit);
- bf1[13] =
- half_btf_sse4_1(cospim16, &bf1[10], cospi48, &bf1[13], rounding, bit);
- bf1[10] = temp2;
-
- addsub_sse4_1(bf1[16], bf1[19], bf1 + 16, bf1 + 19, clamp_lo, clamp_hi);
- addsub_sse4_1(bf1[17], bf1[18], bf1 + 17, bf1 + 18, clamp_lo, clamp_hi);
- addsub_sse4_1(bf1[23], bf1[20], bf1 + 23, bf1 + 20, clamp_lo, clamp_hi);
- addsub_sse4_1(bf1[22], bf1[21], bf1 + 22, bf1 + 21, clamp_lo, clamp_hi);
- addsub_sse4_1(bf1[24], bf1[27], bf1 + 24, bf1 + 27, clamp_lo, clamp_hi);
- addsub_sse4_1(bf1[25], bf1[26], bf1 + 25, bf1 + 26, clamp_lo, clamp_hi);
- addsub_sse4_1(bf1[31], bf1[28], bf1 + 31, bf1 + 28, clamp_lo, clamp_hi);
- addsub_sse4_1(bf1[30], bf1[29], bf1 + 30, bf1 + 29, clamp_lo, clamp_hi);
-}
-
-static INLINE void idct32_stage6_sse4_1(
- __m128i *bf1, const __m128i *cospim32, const __m128i *cospi32,
- const __m128i *cospim16, const __m128i *cospi48, const __m128i *cospi16,
- const __m128i *cospim48, const __m128i *clamp_lo, const __m128i *clamp_hi,
- const __m128i *rounding, int bit) {
- __m128i temp1, temp2;
- temp1 = half_btf_sse4_1(cospim32, &bf1[5], cospi32, &bf1[6], rounding, bit);
- bf1[6] = half_btf_sse4_1(cospi32, &bf1[5], cospi32, &bf1[6], rounding, bit);
- bf1[5] = temp1;
-
- addsub_sse4_1(bf1[8], bf1[11], bf1 + 8, bf1 + 11, clamp_lo, clamp_hi);
- addsub_sse4_1(bf1[9], bf1[10], bf1 + 9, bf1 + 10, clamp_lo, clamp_hi);
- addsub_sse4_1(bf1[15], bf1[12], bf1 + 15, bf1 + 12, clamp_lo, clamp_hi);
- addsub_sse4_1(bf1[14], bf1[13], bf1 + 14, bf1 + 13, clamp_lo, clamp_hi);
-
- temp1 = half_btf_sse4_1(cospim16, &bf1[18], cospi48, &bf1[29], rounding, bit);
- bf1[29] =
- half_btf_sse4_1(cospi48, &bf1[18], cospi16, &bf1[29], rounding, bit);
- bf1[18] = temp1;
- temp2 = half_btf_sse4_1(cospim16, &bf1[19], cospi48, &bf1[28], rounding, bit);
- bf1[28] =
- half_btf_sse4_1(cospi48, &bf1[19], cospi16, &bf1[28], rounding, bit);
- bf1[19] = temp2;
- temp1 =
- half_btf_sse4_1(cospim48, &bf1[20], cospim16, &bf1[27], rounding, bit);
- bf1[27] =
- half_btf_sse4_1(cospim16, &bf1[20], cospi48, &bf1[27], rounding, bit);
- bf1[20] = temp1;
- temp2 =
- half_btf_sse4_1(cospim48, &bf1[21], cospim16, &bf1[26], rounding, bit);
- bf1[26] =
- half_btf_sse4_1(cospim16, &bf1[21], cospi48, &bf1[26], rounding, bit);
- bf1[21] = temp2;
-}
-
-static INLINE void idct32_stage7_sse4_1(__m128i *bf1, const __m128i *cospim32,
- const __m128i *cospi32,
- const __m128i *clamp_lo,
- const __m128i *clamp_hi,
- const __m128i *rounding, int bit) {
- __m128i temp1, temp2;
- addsub_sse4_1(bf1[0], bf1[7], bf1 + 0, bf1 + 7, clamp_lo, clamp_hi);
- addsub_sse4_1(bf1[1], bf1[6], bf1 + 1, bf1 + 6, clamp_lo, clamp_hi);
- addsub_sse4_1(bf1[2], bf1[5], bf1 + 2, bf1 + 5, clamp_lo, clamp_hi);
- addsub_sse4_1(bf1[3], bf1[4], bf1 + 3, bf1 + 4, clamp_lo, clamp_hi);
-
- temp1 = half_btf_sse4_1(cospim32, &bf1[10], cospi32, &bf1[13], rounding, bit);
- bf1[13] =
- half_btf_sse4_1(cospi32, &bf1[10], cospi32, &bf1[13], rounding, bit);
- bf1[10] = temp1;
- temp2 = half_btf_sse4_1(cospim32, &bf1[11], cospi32, &bf1[12], rounding, bit);
- bf1[12] =
- half_btf_sse4_1(cospi32, &bf1[11], cospi32, &bf1[12], rounding, bit);
- bf1[11] = temp2;
-
- addsub_sse4_1(bf1[16], bf1[23], bf1 + 16, bf1 + 23, clamp_lo, clamp_hi);
- addsub_sse4_1(bf1[17], bf1[22], bf1 + 17, bf1 + 22, clamp_lo, clamp_hi);
- addsub_sse4_1(bf1[18], bf1[21], bf1 + 18, bf1 + 21, clamp_lo, clamp_hi);
- addsub_sse4_1(bf1[19], bf1[20], bf1 + 19, bf1 + 20, clamp_lo, clamp_hi);
- addsub_sse4_1(bf1[31], bf1[24], bf1 + 31, bf1 + 24, clamp_lo, clamp_hi);
- addsub_sse4_1(bf1[30], bf1[25], bf1 + 30, bf1 + 25, clamp_lo, clamp_hi);
- addsub_sse4_1(bf1[29], bf1[26], bf1 + 29, bf1 + 26, clamp_lo, clamp_hi);
- addsub_sse4_1(bf1[28], bf1[27], bf1 + 28, bf1 + 27, clamp_lo, clamp_hi);
-}
-
-static INLINE void idct32_stage8_sse4_1(__m128i *bf1, const __m128i *cospim32,
- const __m128i *cospi32,
- const __m128i *clamp_lo,
- const __m128i *clamp_hi,
- const __m128i *rounding, int bit) {
- __m128i temp1, temp2;
- addsub_sse4_1(bf1[0], bf1[15], bf1 + 0, bf1 + 15, clamp_lo, clamp_hi);
- addsub_sse4_1(bf1[1], bf1[14], bf1 + 1, bf1 + 14, clamp_lo, clamp_hi);
- addsub_sse4_1(bf1[2], bf1[13], bf1 + 2, bf1 + 13, clamp_lo, clamp_hi);
- addsub_sse4_1(bf1[3], bf1[12], bf1 + 3, bf1 + 12, clamp_lo, clamp_hi);
- addsub_sse4_1(bf1[4], bf1[11], bf1 + 4, bf1 + 11, clamp_lo, clamp_hi);
- addsub_sse4_1(bf1[5], bf1[10], bf1 + 5, bf1 + 10, clamp_lo, clamp_hi);
- addsub_sse4_1(bf1[6], bf1[9], bf1 + 6, bf1 + 9, clamp_lo, clamp_hi);
- addsub_sse4_1(bf1[7], bf1[8], bf1 + 7, bf1 + 8, clamp_lo, clamp_hi);
-
- temp1 = half_btf_sse4_1(cospim32, &bf1[20], cospi32, &bf1[27], rounding, bit);
- bf1[27] =
- half_btf_sse4_1(cospi32, &bf1[20], cospi32, &bf1[27], rounding, bit);
- bf1[20] = temp1;
- temp2 = half_btf_sse4_1(cospim32, &bf1[21], cospi32, &bf1[26], rounding, bit);
- bf1[26] =
- half_btf_sse4_1(cospi32, &bf1[21], cospi32, &bf1[26], rounding, bit);
- bf1[21] = temp2;
- temp1 = half_btf_sse4_1(cospim32, &bf1[22], cospi32, &bf1[25], rounding, bit);
- bf1[25] =
- half_btf_sse4_1(cospi32, &bf1[22], cospi32, &bf1[25], rounding, bit);
- bf1[22] = temp1;
- temp2 = half_btf_sse4_1(cospim32, &bf1[23], cospi32, &bf1[24], rounding, bit);
- bf1[24] =
- half_btf_sse4_1(cospi32, &bf1[23], cospi32, &bf1[24], rounding, bit);
- bf1[23] = temp2;
-}
-
-static INLINE void idct32_stage9_sse4_1(__m128i *bf1, __m128i *out,
- const int do_cols, const int bd,
- const int out_shift,
- const int log_range) {
- if (do_cols) {
- addsub_no_clamp_sse4_1(bf1[0], bf1[31], out + 0, out + 31);
- addsub_no_clamp_sse4_1(bf1[1], bf1[30], out + 1, out + 30);
- addsub_no_clamp_sse4_1(bf1[2], bf1[29], out + 2, out + 29);
- addsub_no_clamp_sse4_1(bf1[3], bf1[28], out + 3, out + 28);
- addsub_no_clamp_sse4_1(bf1[4], bf1[27], out + 4, out + 27);
- addsub_no_clamp_sse4_1(bf1[5], bf1[26], out + 5, out + 26);
- addsub_no_clamp_sse4_1(bf1[6], bf1[25], out + 6, out + 25);
- addsub_no_clamp_sse4_1(bf1[7], bf1[24], out + 7, out + 24);
- addsub_no_clamp_sse4_1(bf1[8], bf1[23], out + 8, out + 23);
- addsub_no_clamp_sse4_1(bf1[9], bf1[22], out + 9, out + 22);
- addsub_no_clamp_sse4_1(bf1[10], bf1[21], out + 10, out + 21);
- addsub_no_clamp_sse4_1(bf1[11], bf1[20], out + 11, out + 20);
- addsub_no_clamp_sse4_1(bf1[12], bf1[19], out + 12, out + 19);
- addsub_no_clamp_sse4_1(bf1[13], bf1[18], out + 13, out + 18);
- addsub_no_clamp_sse4_1(bf1[14], bf1[17], out + 14, out + 17);
- addsub_no_clamp_sse4_1(bf1[15], bf1[16], out + 15, out + 16);
- } else {
- const int log_range_out = AOMMAX(16, bd + 6);
- const __m128i clamp_lo_out = _mm_set1_epi32(AOMMAX(
- -(1 << (log_range_out - 1)), -(1 << (log_range - 1 - out_shift))));
- const __m128i clamp_hi_out = _mm_set1_epi32(AOMMIN(
- (1 << (log_range_out - 1)) - 1, (1 << (log_range - 1 - out_shift))));
-
- addsub_shift_sse4_1(bf1[0], bf1[31], out + 0, out + 31, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_sse4_1(bf1[1], bf1[30], out + 1, out + 30, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_sse4_1(bf1[2], bf1[29], out + 2, out + 29, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_sse4_1(bf1[3], bf1[28], out + 3, out + 28, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_sse4_1(bf1[4], bf1[27], out + 4, out + 27, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_sse4_1(bf1[5], bf1[26], out + 5, out + 26, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_sse4_1(bf1[6], bf1[25], out + 6, out + 25, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_sse4_1(bf1[7], bf1[24], out + 7, out + 24, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_sse4_1(bf1[8], bf1[23], out + 8, out + 23, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_sse4_1(bf1[9], bf1[22], out + 9, out + 22, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_sse4_1(bf1[10], bf1[21], out + 10, out + 21, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_sse4_1(bf1[11], bf1[20], out + 11, out + 20, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_sse4_1(bf1[12], bf1[19], out + 12, out + 19, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_sse4_1(bf1[13], bf1[18], out + 13, out + 18, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_sse4_1(bf1[14], bf1[17], out + 14, out + 17, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_sse4_1(bf1[15], bf1[16], out + 15, out + 16, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- }
-}
-
-static void neg_shift_sse4_1(const __m128i in0, const __m128i in1,
- __m128i *out0, __m128i *out1,
- const __m128i *clamp_lo, const __m128i *clamp_hi,
- int shift) {
- __m128i offset = _mm_set1_epi32((1 << shift) >> 1);
- __m128i a0 = _mm_add_epi32(offset, in0);
- __m128i a1 = _mm_sub_epi32(offset, in1);
-
- a0 = _mm_sra_epi32(a0, _mm_cvtsi32_si128(shift));
- a1 = _mm_sra_epi32(a1, _mm_cvtsi32_si128(shift));
-
- a0 = _mm_max_epi32(a0, *clamp_lo);
- a0 = _mm_min_epi32(a0, *clamp_hi);
- a1 = _mm_max_epi32(a1, *clamp_lo);
- a1 = _mm_min_epi32(a1, *clamp_hi);
-
- *out0 = a0;
- *out1 = a1;
-}
-
-static void idct4x4_sse4_1(__m128i *in, int bit, int do_cols, int bd) {
- const int32_t *cospi = cospi_arr(bit);
- const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
- const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
- const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
- const __m128i cospim16 = _mm_set1_epi32(-cospi[16]);
- const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
-
- __m128i u0, u1, u2, u3;
- __m128i v0, v1, v2, v3, x, y;
-
- v0 = _mm_unpacklo_epi32(in[0], in[1]);
- v1 = _mm_unpackhi_epi32(in[0], in[1]);
- v2 = _mm_unpacklo_epi32(in[2], in[3]);
- v3 = _mm_unpackhi_epi32(in[2], in[3]);
-
- u0 = _mm_unpacklo_epi64(v0, v2);
- u1 = _mm_unpackhi_epi64(v0, v2);
- u2 = _mm_unpacklo_epi64(v1, v3);
- u3 = _mm_unpackhi_epi64(v1, v3);
-
- x = _mm_mullo_epi32(u0, cospi32);
- y = _mm_mullo_epi32(u2, cospi32);
- v0 = _mm_add_epi32(x, y);
- v0 = _mm_add_epi32(v0, rnding);
- v0 = _mm_srai_epi32(v0, bit);
-
- v1 = _mm_sub_epi32(x, y);
- v1 = _mm_add_epi32(v1, rnding);
- v1 = _mm_srai_epi32(v1, bit);
-
- x = _mm_mullo_epi32(u1, cospi48);
- y = _mm_mullo_epi32(u3, cospim16);
- v2 = _mm_add_epi32(x, y);
- v2 = _mm_add_epi32(v2, rnding);
- v2 = _mm_srai_epi32(v2, bit);
-
- x = _mm_mullo_epi32(u1, cospi16);
- y = _mm_mullo_epi32(u3, cospi48);
- v3 = _mm_add_epi32(x, y);
- v3 = _mm_add_epi32(v3, rnding);
- v3 = _mm_srai_epi32(v3, bit);
-
- if (do_cols) {
- addsub_no_clamp_sse4_1(v0, v3, in + 0, in + 3);
- addsub_no_clamp_sse4_1(v1, v2, in + 1, in + 2);
- } else {
- const int log_range = AOMMAX(16, bd + 6);
- const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1)));
- const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1);
- addsub_sse4_1(v0, v3, in + 0, in + 3, &clamp_lo, &clamp_hi);
- addsub_sse4_1(v1, v2, in + 1, in + 2, &clamp_lo, &clamp_hi);
- }
-}
-
-static void iadst4x4_sse4_1(__m128i *in, int bit, int do_cols, int bd) {
- const int32_t *sinpi = sinpi_arr(bit);
- const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
- const __m128i sinpi1 = _mm_set1_epi32((int)sinpi[1]);
- const __m128i sinpi2 = _mm_set1_epi32((int)sinpi[2]);
- const __m128i sinpi3 = _mm_set1_epi32((int)sinpi[3]);
- const __m128i sinpi4 = _mm_set1_epi32((int)sinpi[4]);
- __m128i t;
- __m128i s0, s1, s2, s3, s4, s5, s6, s7;
- __m128i x0, x1, x2, x3;
- __m128i u0, u1, u2, u3;
- __m128i v0, v1, v2, v3;
-
- v0 = _mm_unpacklo_epi32(in[0], in[1]);
- v1 = _mm_unpackhi_epi32(in[0], in[1]);
- v2 = _mm_unpacklo_epi32(in[2], in[3]);
- v3 = _mm_unpackhi_epi32(in[2], in[3]);
-
- x0 = _mm_unpacklo_epi64(v0, v2);
- x1 = _mm_unpackhi_epi64(v0, v2);
- x2 = _mm_unpacklo_epi64(v1, v3);
- x3 = _mm_unpackhi_epi64(v1, v3);
-
- s0 = _mm_mullo_epi32(x0, sinpi1);
- s1 = _mm_mullo_epi32(x0, sinpi2);
- s2 = _mm_mullo_epi32(x1, sinpi3);
- s3 = _mm_mullo_epi32(x2, sinpi4);
- s4 = _mm_mullo_epi32(x2, sinpi1);
- s5 = _mm_mullo_epi32(x3, sinpi2);
- s6 = _mm_mullo_epi32(x3, sinpi4);
- t = _mm_sub_epi32(x0, x2);
- s7 = _mm_add_epi32(t, x3);
-
- t = _mm_add_epi32(s0, s3);
- s0 = _mm_add_epi32(t, s5);
- t = _mm_sub_epi32(s1, s4);
- s1 = _mm_sub_epi32(t, s6);
- s3 = s2;
- s2 = _mm_mullo_epi32(s7, sinpi3);
-
- u0 = _mm_add_epi32(s0, s3);
- u1 = _mm_add_epi32(s1, s3);
- u2 = s2;
- t = _mm_add_epi32(s0, s1);
- u3 = _mm_sub_epi32(t, s3);
-
- u0 = _mm_add_epi32(u0, rnding);
- u0 = _mm_srai_epi32(u0, bit);
-
- u1 = _mm_add_epi32(u1, rnding);
- u1 = _mm_srai_epi32(u1, bit);
-
- u2 = _mm_add_epi32(u2, rnding);
- u2 = _mm_srai_epi32(u2, bit);
-
- u3 = _mm_add_epi32(u3, rnding);
- u3 = _mm_srai_epi32(u3, bit);
-
- if (!do_cols) {
- const int log_range = AOMMAX(16, bd + 6);
- const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1)));
- const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1);
-
- u0 = _mm_max_epi32(u0, clamp_lo);
- u0 = _mm_min_epi32(u0, clamp_hi);
- u1 = _mm_max_epi32(u1, clamp_lo);
- u1 = _mm_min_epi32(u1, clamp_hi);
- u2 = _mm_max_epi32(u2, clamp_lo);
- u2 = _mm_min_epi32(u2, clamp_hi);
- u3 = _mm_max_epi32(u3, clamp_lo);
- u3 = _mm_min_epi32(u3, clamp_hi);
- }
-
- in[0] = u0;
- in[1] = u1;
- in[2] = u2;
- in[3] = u3;
-}
-
-static INLINE void round_shift_4x4(__m128i *in, int shift) {
- __m128i rnding = _mm_set1_epi32(1 << (shift - 1));
-
- in[0] = _mm_add_epi32(in[0], rnding);
- in[1] = _mm_add_epi32(in[1], rnding);
- in[2] = _mm_add_epi32(in[2], rnding);
- in[3] = _mm_add_epi32(in[3], rnding);
-
- in[0] = _mm_srai_epi32(in[0], shift);
- in[1] = _mm_srai_epi32(in[1], shift);
- in[2] = _mm_srai_epi32(in[2], shift);
- in[3] = _mm_srai_epi32(in[3], shift);
-}
-
-static void write_buffer_4x4(__m128i *in, uint16_t *output, int stride,
- int fliplr, int flipud, int shift, int bd) {
- const __m128i zero = _mm_setzero_si128();
- __m128i u0, u1, u2, u3;
- __m128i v0, v1, v2, v3;
-
- round_shift_4x4(in, shift);
-
- v0 = _mm_loadl_epi64((__m128i const *)(output + 0 * stride));
- v1 = _mm_loadl_epi64((__m128i const *)(output + 1 * stride));
- v2 = _mm_loadl_epi64((__m128i const *)(output + 2 * stride));
- v3 = _mm_loadl_epi64((__m128i const *)(output + 3 * stride));
-
- v0 = _mm_unpacklo_epi16(v0, zero);
- v1 = _mm_unpacklo_epi16(v1, zero);
- v2 = _mm_unpacklo_epi16(v2, zero);
- v3 = _mm_unpacklo_epi16(v3, zero);
-
- if (fliplr) {
- in[0] = _mm_shuffle_epi32(in[0], 0x1B);
- in[1] = _mm_shuffle_epi32(in[1], 0x1B);
- in[2] = _mm_shuffle_epi32(in[2], 0x1B);
- in[3] = _mm_shuffle_epi32(in[3], 0x1B);
- }
-
- if (flipud) {
- u0 = _mm_add_epi32(in[3], v0);
- u1 = _mm_add_epi32(in[2], v1);
- u2 = _mm_add_epi32(in[1], v2);
- u3 = _mm_add_epi32(in[0], v3);
- } else {
- u0 = _mm_add_epi32(in[0], v0);
- u1 = _mm_add_epi32(in[1], v1);
- u2 = _mm_add_epi32(in[2], v2);
- u3 = _mm_add_epi32(in[3], v3);
- }
-
- v0 = _mm_packus_epi32(u0, u1);
- v2 = _mm_packus_epi32(u2, u3);
-
- u0 = highbd_clamp_epi16(v0, bd);
- u2 = highbd_clamp_epi16(v2, bd);
-
- v0 = _mm_unpacklo_epi64(u0, u0);
- v1 = _mm_unpackhi_epi64(u0, u0);
- v2 = _mm_unpacklo_epi64(u2, u2);
- v3 = _mm_unpackhi_epi64(u2, u2);
-
- _mm_storel_epi64((__m128i *)(output + 0 * stride), v0);
- _mm_storel_epi64((__m128i *)(output + 1 * stride), v1);
- _mm_storel_epi64((__m128i *)(output + 2 * stride), v2);
- _mm_storel_epi64((__m128i *)(output + 3 * stride), v3);
-}
-
-void av1_inv_txfm2d_add_4x4_sse4_1(const int32_t *coeff, uint16_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- __m128i in[4];
- const int8_t *shift = inv_txfm_shift_ls[TX_4X4];
- const int txw_idx = get_txw_idx(TX_4X4);
- const int txh_idx = get_txh_idx(TX_4X4);
-
- switch (tx_type) {
- case DCT_DCT:
- load_buffer_4x4(coeff, in);
- idct4x4_sse4_1(in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd);
- idct4x4_sse4_1(in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd);
- write_buffer_4x4(in, output, stride, 0, 0, -shift[1], bd);
- break;
- case ADST_DCT:
- load_buffer_4x4(coeff, in);
- idct4x4_sse4_1(in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd);
- iadst4x4_sse4_1(in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd);
- write_buffer_4x4(in, output, stride, 0, 0, -shift[1], bd);
- break;
- case DCT_ADST:
- load_buffer_4x4(coeff, in);
- iadst4x4_sse4_1(in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd);
- idct4x4_sse4_1(in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd);
- write_buffer_4x4(in, output, stride, 0, 0, -shift[1], bd);
- break;
- case ADST_ADST:
- load_buffer_4x4(coeff, in);
- iadst4x4_sse4_1(in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd);
- iadst4x4_sse4_1(in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd);
- write_buffer_4x4(in, output, stride, 0, 0, -shift[1], bd);
- break;
- case FLIPADST_DCT:
- load_buffer_4x4(coeff, in);
- idct4x4_sse4_1(in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd);
- iadst4x4_sse4_1(in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd);
- write_buffer_4x4(in, output, stride, 0, 1, -shift[1], bd);
- break;
- case DCT_FLIPADST:
- load_buffer_4x4(coeff, in);
- iadst4x4_sse4_1(in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd);
- idct4x4_sse4_1(in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd);
- write_buffer_4x4(in, output, stride, 1, 0, -shift[1], bd);
- break;
- case FLIPADST_FLIPADST:
- load_buffer_4x4(coeff, in);
- iadst4x4_sse4_1(in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd);
- iadst4x4_sse4_1(in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd);
- write_buffer_4x4(in, output, stride, 1, 1, -shift[1], bd);
- break;
- case ADST_FLIPADST:
- load_buffer_4x4(coeff, in);
- iadst4x4_sse4_1(in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd);
- iadst4x4_sse4_1(in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd);
- write_buffer_4x4(in, output, stride, 1, 0, -shift[1], bd);
- break;
- case FLIPADST_ADST:
- load_buffer_4x4(coeff, in);
- iadst4x4_sse4_1(in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd);
- iadst4x4_sse4_1(in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd);
- write_buffer_4x4(in, output, stride, 0, 1, -shift[1], bd);
- break;
- default: assert(0);
- }
-}
-
-// 8x8
-static void load_buffer_8x8(const int32_t *coeff, __m128i *in) {
- in[0] = _mm_load_si128((const __m128i *)(coeff + 0));
- in[1] = _mm_load_si128((const __m128i *)(coeff + 4));
- in[2] = _mm_load_si128((const __m128i *)(coeff + 8));
- in[3] = _mm_load_si128((const __m128i *)(coeff + 12));
- in[4] = _mm_load_si128((const __m128i *)(coeff + 16));
- in[5] = _mm_load_si128((const __m128i *)(coeff + 20));
- in[6] = _mm_load_si128((const __m128i *)(coeff + 24));
- in[7] = _mm_load_si128((const __m128i *)(coeff + 28));
- in[8] = _mm_load_si128((const __m128i *)(coeff + 32));
- in[9] = _mm_load_si128((const __m128i *)(coeff + 36));
- in[10] = _mm_load_si128((const __m128i *)(coeff + 40));
- in[11] = _mm_load_si128((const __m128i *)(coeff + 44));
- in[12] = _mm_load_si128((const __m128i *)(coeff + 48));
- in[13] = _mm_load_si128((const __m128i *)(coeff + 52));
- in[14] = _mm_load_si128((const __m128i *)(coeff + 56));
- in[15] = _mm_load_si128((const __m128i *)(coeff + 60));
-}
-
-static void idct8x8_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
- int bd, int out_shift) {
- const int32_t *cospi = cospi_arr(bit);
- const __m128i cospi56 = _mm_set1_epi32(cospi[56]);
- const __m128i cospim8 = _mm_set1_epi32(-cospi[8]);
- const __m128i cospi24 = _mm_set1_epi32(cospi[24]);
- const __m128i cospim40 = _mm_set1_epi32(-cospi[40]);
- const __m128i cospi40 = _mm_set1_epi32(cospi[40]);
- const __m128i cospi8 = _mm_set1_epi32(cospi[8]);
- const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
- const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
- const __m128i cospim16 = _mm_set1_epi32(-cospi[16]);
- const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
- const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
- const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1)));
- const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1);
- __m128i u0, u1, u2, u3, u4, u5, u6, u7;
- __m128i v0, v1, v2, v3, v4, v5, v6, v7;
- __m128i x, y;
- int col;
-
- // Note:
- // Even column: 0, 2, ..., 14
- // Odd column: 1, 3, ..., 15
- // one even column plus one odd column constructs one row (8 coeffs)
- // total we have 8 rows (8x8).
- for (col = 0; col < 2; ++col) {
- // stage 0
- // stage 1
- // stage 2
- u0 = in[0 * 2 + col];
- u1 = in[4 * 2 + col];
- u2 = in[2 * 2 + col];
- u3 = in[6 * 2 + col];
-
- x = _mm_mullo_epi32(in[1 * 2 + col], cospi56);
- y = _mm_mullo_epi32(in[7 * 2 + col], cospim8);
- u4 = _mm_add_epi32(x, y);
- u4 = _mm_add_epi32(u4, rnding);
- u4 = _mm_srai_epi32(u4, bit);
-
- x = _mm_mullo_epi32(in[1 * 2 + col], cospi8);
- y = _mm_mullo_epi32(in[7 * 2 + col], cospi56);
- u7 = _mm_add_epi32(x, y);
- u7 = _mm_add_epi32(u7, rnding);
- u7 = _mm_srai_epi32(u7, bit);
-
- x = _mm_mullo_epi32(in[5 * 2 + col], cospi24);
- y = _mm_mullo_epi32(in[3 * 2 + col], cospim40);
- u5 = _mm_add_epi32(x, y);
- u5 = _mm_add_epi32(u5, rnding);
- u5 = _mm_srai_epi32(u5, bit);
-
- x = _mm_mullo_epi32(in[5 * 2 + col], cospi40);
- y = _mm_mullo_epi32(in[3 * 2 + col], cospi24);
- u6 = _mm_add_epi32(x, y);
- u6 = _mm_add_epi32(u6, rnding);
- u6 = _mm_srai_epi32(u6, bit);
-
- // stage 3
- x = _mm_mullo_epi32(u0, cospi32);
- y = _mm_mullo_epi32(u1, cospi32);
- v0 = _mm_add_epi32(x, y);
- v0 = _mm_add_epi32(v0, rnding);
- v0 = _mm_srai_epi32(v0, bit);
-
- v1 = _mm_sub_epi32(x, y);
- v1 = _mm_add_epi32(v1, rnding);
- v1 = _mm_srai_epi32(v1, bit);
-
- x = _mm_mullo_epi32(u2, cospi48);
- y = _mm_mullo_epi32(u3, cospim16);
- v2 = _mm_add_epi32(x, y);
- v2 = _mm_add_epi32(v2, rnding);
- v2 = _mm_srai_epi32(v2, bit);
-
- x = _mm_mullo_epi32(u2, cospi16);
- y = _mm_mullo_epi32(u3, cospi48);
- v3 = _mm_add_epi32(x, y);
- v3 = _mm_add_epi32(v3, rnding);
- v3 = _mm_srai_epi32(v3, bit);
-
- addsub_sse4_1(u4, u5, &v4, &v5, &clamp_lo, &clamp_hi);
- addsub_sse4_1(u7, u6, &v7, &v6, &clamp_lo, &clamp_hi);
-
- // stage 4
- addsub_sse4_1(v0, v3, &u0, &u3, &clamp_lo, &clamp_hi);
- addsub_sse4_1(v1, v2, &u1, &u2, &clamp_lo, &clamp_hi);
- u4 = v4;
- u7 = v7;
-
- x = _mm_mullo_epi32(v5, cospi32);
- y = _mm_mullo_epi32(v6, cospi32);
- u6 = _mm_add_epi32(y, x);
- u6 = _mm_add_epi32(u6, rnding);
- u6 = _mm_srai_epi32(u6, bit);
-
- u5 = _mm_sub_epi32(y, x);
- u5 = _mm_add_epi32(u5, rnding);
- u5 = _mm_srai_epi32(u5, bit);
-
- // stage 5
- if (do_cols) {
- addsub_no_clamp_sse4_1(u0, u7, out + 0 * 2 + col, out + 7 * 2 + col);
- addsub_no_clamp_sse4_1(u1, u6, out + 1 * 2 + col, out + 6 * 2 + col);
- addsub_no_clamp_sse4_1(u2, u5, out + 2 * 2 + col, out + 5 * 2 + col);
- addsub_no_clamp_sse4_1(u3, u4, out + 3 * 2 + col, out + 4 * 2 + col);
- } else {
- const int log_range_out = AOMMAX(16, bd + 6);
- const __m128i clamp_lo_out = _mm_set1_epi32(AOMMAX(
- -(1 << (log_range_out - 1)), -(1 << (log_range - 1 - out_shift))));
- const __m128i clamp_hi_out = _mm_set1_epi32(AOMMIN(
- (1 << (log_range_out - 1)) - 1, (1 << (log_range - 1 - out_shift))));
- addsub_shift_sse4_1(u0, u7, out + 0 * 2 + col, out + 7 * 2 + col,
- &clamp_lo_out, &clamp_hi_out, out_shift);
- addsub_shift_sse4_1(u1, u6, out + 1 * 2 + col, out + 6 * 2 + col,
- &clamp_lo_out, &clamp_hi_out, out_shift);
- addsub_shift_sse4_1(u2, u5, out + 2 * 2 + col, out + 5 * 2 + col,
- &clamp_lo_out, &clamp_hi_out, out_shift);
- addsub_shift_sse4_1(u3, u4, out + 3 * 2 + col, out + 4 * 2 + col,
- &clamp_lo_out, &clamp_hi_out, out_shift);
- }
- }
-}
-
-static void iadst8x8_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
- int bd, int out_shift) {
- const int32_t *cospi = cospi_arr(bit);
- const __m128i cospi4 = _mm_set1_epi32(cospi[4]);
- const __m128i cospi60 = _mm_set1_epi32(cospi[60]);
- const __m128i cospi20 = _mm_set1_epi32(cospi[20]);
- const __m128i cospi44 = _mm_set1_epi32(cospi[44]);
- const __m128i cospi36 = _mm_set1_epi32(cospi[36]);
- const __m128i cospi28 = _mm_set1_epi32(cospi[28]);
- const __m128i cospi52 = _mm_set1_epi32(cospi[52]);
- const __m128i cospi12 = _mm_set1_epi32(cospi[12]);
- const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
- const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
- const __m128i cospim48 = _mm_set1_epi32(-cospi[48]);
- const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
- const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
- const __m128i kZero = _mm_setzero_si128();
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
- const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1)));
- const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1);
- __m128i u[8], v[8], x;
-
- // Even 8 points: 0, 2, ..., 14
- // stage 0
- // stage 1
- // stage 2
- // (1)
- u[0] = _mm_mullo_epi32(in[14], cospi4);
- x = _mm_mullo_epi32(in[0], cospi60);
- u[0] = _mm_add_epi32(u[0], x);
- u[0] = _mm_add_epi32(u[0], rnding);
- u[0] = _mm_srai_epi32(u[0], bit);
-
- u[1] = _mm_mullo_epi32(in[14], cospi60);
- x = _mm_mullo_epi32(in[0], cospi4);
- u[1] = _mm_sub_epi32(u[1], x);
- u[1] = _mm_add_epi32(u[1], rnding);
- u[1] = _mm_srai_epi32(u[1], bit);
-
- // (2)
- u[2] = _mm_mullo_epi32(in[10], cospi20);
- x = _mm_mullo_epi32(in[4], cospi44);
- u[2] = _mm_add_epi32(u[2], x);
- u[2] = _mm_add_epi32(u[2], rnding);
- u[2] = _mm_srai_epi32(u[2], bit);
-
- u[3] = _mm_mullo_epi32(in[10], cospi44);
- x = _mm_mullo_epi32(in[4], cospi20);
- u[3] = _mm_sub_epi32(u[3], x);
- u[3] = _mm_add_epi32(u[3], rnding);
- u[3] = _mm_srai_epi32(u[3], bit);
-
- // (3)
- u[4] = _mm_mullo_epi32(in[6], cospi36);
- x = _mm_mullo_epi32(in[8], cospi28);
- u[4] = _mm_add_epi32(u[4], x);
- u[4] = _mm_add_epi32(u[4], rnding);
- u[4] = _mm_srai_epi32(u[4], bit);
-
- u[5] = _mm_mullo_epi32(in[6], cospi28);
- x = _mm_mullo_epi32(in[8], cospi36);
- u[5] = _mm_sub_epi32(u[5], x);
- u[5] = _mm_add_epi32(u[5], rnding);
- u[5] = _mm_srai_epi32(u[5], bit);
-
- // (4)
- u[6] = _mm_mullo_epi32(in[2], cospi52);
- x = _mm_mullo_epi32(in[12], cospi12);
- u[6] = _mm_add_epi32(u[6], x);
- u[6] = _mm_add_epi32(u[6], rnding);
- u[6] = _mm_srai_epi32(u[6], bit);
-
- u[7] = _mm_mullo_epi32(in[2], cospi12);
- x = _mm_mullo_epi32(in[12], cospi52);
- u[7] = _mm_sub_epi32(u[7], x);
- u[7] = _mm_add_epi32(u[7], rnding);
- u[7] = _mm_srai_epi32(u[7], bit);
-
- // stage 3
- addsub_sse4_1(u[0], u[4], &v[0], &v[4], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[1], u[5], &v[1], &v[5], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[2], u[6], &v[2], &v[6], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[3], u[7], &v[3], &v[7], &clamp_lo, &clamp_hi);
-
- // stage 4
- u[0] = v[0];
- u[1] = v[1];
- u[2] = v[2];
- u[3] = v[3];
-
- u[4] = _mm_mullo_epi32(v[4], cospi16);
- x = _mm_mullo_epi32(v[5], cospi48);
- u[4] = _mm_add_epi32(u[4], x);
- u[4] = _mm_add_epi32(u[4], rnding);
- u[4] = _mm_srai_epi32(u[4], bit);
-
- u[5] = _mm_mullo_epi32(v[4], cospi48);
- x = _mm_mullo_epi32(v[5], cospi16);
- u[5] = _mm_sub_epi32(u[5], x);
- u[5] = _mm_add_epi32(u[5], rnding);
- u[5] = _mm_srai_epi32(u[5], bit);
-
- u[6] = _mm_mullo_epi32(v[6], cospim48);
- x = _mm_mullo_epi32(v[7], cospi16);
- u[6] = _mm_add_epi32(u[6], x);
- u[6] = _mm_add_epi32(u[6], rnding);
- u[6] = _mm_srai_epi32(u[6], bit);
-
- u[7] = _mm_mullo_epi32(v[6], cospi16);
- x = _mm_mullo_epi32(v[7], cospim48);
- u[7] = _mm_sub_epi32(u[7], x);
- u[7] = _mm_add_epi32(u[7], rnding);
- u[7] = _mm_srai_epi32(u[7], bit);
-
- // stage 5
- addsub_sse4_1(u[0], u[2], &v[0], &v[2], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[1], u[3], &v[1], &v[3], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[4], u[6], &v[4], &v[6], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[5], u[7], &v[5], &v[7], &clamp_lo, &clamp_hi);
-
- // stage 6
- u[0] = v[0];
- u[1] = v[1];
- u[4] = v[4];
- u[5] = v[5];
-
- v[0] = _mm_mullo_epi32(v[2], cospi32);
- x = _mm_mullo_epi32(v[3], cospi32);
- u[2] = _mm_add_epi32(v[0], x);
- u[2] = _mm_add_epi32(u[2], rnding);
- u[2] = _mm_srai_epi32(u[2], bit);
-
- u[3] = _mm_sub_epi32(v[0], x);
- u[3] = _mm_add_epi32(u[3], rnding);
- u[3] = _mm_srai_epi32(u[3], bit);
-
- v[0] = _mm_mullo_epi32(v[6], cospi32);
- x = _mm_mullo_epi32(v[7], cospi32);
- u[6] = _mm_add_epi32(v[0], x);
- u[6] = _mm_add_epi32(u[6], rnding);
- u[6] = _mm_srai_epi32(u[6], bit);
-
- u[7] = _mm_sub_epi32(v[0], x);
- u[7] = _mm_add_epi32(u[7], rnding);
- u[7] = _mm_srai_epi32(u[7], bit);
-
- // stage 7
- if (do_cols) {
- out[0] = u[0];
- out[2] = _mm_sub_epi32(kZero, u[4]);
- out[4] = u[6];
- out[6] = _mm_sub_epi32(kZero, u[2]);
- out[8] = u[3];
- out[10] = _mm_sub_epi32(kZero, u[7]);
- out[12] = u[5];
- out[14] = _mm_sub_epi32(kZero, u[1]);
- } else {
- const int log_range_out = AOMMAX(16, bd + 6);
- const __m128i clamp_lo_out = _mm_set1_epi32(-(1 << (log_range_out - 1)));
- const __m128i clamp_hi_out = _mm_set1_epi32((1 << (log_range_out - 1)) - 1);
-
- neg_shift_sse4_1(u[0], u[4], out + 0, out + 2, &clamp_lo_out, &clamp_hi_out,
- out_shift);
- neg_shift_sse4_1(u[6], u[2], out + 4, out + 6, &clamp_lo_out, &clamp_hi_out,
- out_shift);
- neg_shift_sse4_1(u[3], u[7], out + 8, out + 10, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- neg_shift_sse4_1(u[5], u[1], out + 12, out + 14, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- }
-
- // Odd 8 points: 1, 3, ..., 15
- // stage 0
- // stage 1
- // stage 2
- // (1)
- u[0] = _mm_mullo_epi32(in[15], cospi4);
- x = _mm_mullo_epi32(in[1], cospi60);
- u[0] = _mm_add_epi32(u[0], x);
- u[0] = _mm_add_epi32(u[0], rnding);
- u[0] = _mm_srai_epi32(u[0], bit);
-
- u[1] = _mm_mullo_epi32(in[15], cospi60);
- x = _mm_mullo_epi32(in[1], cospi4);
- u[1] = _mm_sub_epi32(u[1], x);
- u[1] = _mm_add_epi32(u[1], rnding);
- u[1] = _mm_srai_epi32(u[1], bit);
-
- // (2)
- u[2] = _mm_mullo_epi32(in[11], cospi20);
- x = _mm_mullo_epi32(in[5], cospi44);
- u[2] = _mm_add_epi32(u[2], x);
- u[2] = _mm_add_epi32(u[2], rnding);
- u[2] = _mm_srai_epi32(u[2], bit);
-
- u[3] = _mm_mullo_epi32(in[11], cospi44);
- x = _mm_mullo_epi32(in[5], cospi20);
- u[3] = _mm_sub_epi32(u[3], x);
- u[3] = _mm_add_epi32(u[3], rnding);
- u[3] = _mm_srai_epi32(u[3], bit);
-
- // (3)
- u[4] = _mm_mullo_epi32(in[7], cospi36);
- x = _mm_mullo_epi32(in[9], cospi28);
- u[4] = _mm_add_epi32(u[4], x);
- u[4] = _mm_add_epi32(u[4], rnding);
- u[4] = _mm_srai_epi32(u[4], bit);
-
- u[5] = _mm_mullo_epi32(in[7], cospi28);
- x = _mm_mullo_epi32(in[9], cospi36);
- u[5] = _mm_sub_epi32(u[5], x);
- u[5] = _mm_add_epi32(u[5], rnding);
- u[5] = _mm_srai_epi32(u[5], bit);
-
- // (4)
- u[6] = _mm_mullo_epi32(in[3], cospi52);
- x = _mm_mullo_epi32(in[13], cospi12);
- u[6] = _mm_add_epi32(u[6], x);
- u[6] = _mm_add_epi32(u[6], rnding);
- u[6] = _mm_srai_epi32(u[6], bit);
-
- u[7] = _mm_mullo_epi32(in[3], cospi12);
- x = _mm_mullo_epi32(in[13], cospi52);
- u[7] = _mm_sub_epi32(u[7], x);
- u[7] = _mm_add_epi32(u[7], rnding);
- u[7] = _mm_srai_epi32(u[7], bit);
-
- // stage 3
- addsub_sse4_1(u[0], u[4], &v[0], &v[4], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[1], u[5], &v[1], &v[5], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[2], u[6], &v[2], &v[6], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[3], u[7], &v[3], &v[7], &clamp_lo, &clamp_hi);
-
- // stage 4
- u[0] = v[0];
- u[1] = v[1];
- u[2] = v[2];
- u[3] = v[3];
-
- u[4] = _mm_mullo_epi32(v[4], cospi16);
- x = _mm_mullo_epi32(v[5], cospi48);
- u[4] = _mm_add_epi32(u[4], x);
- u[4] = _mm_add_epi32(u[4], rnding);
- u[4] = _mm_srai_epi32(u[4], bit);
-
- u[5] = _mm_mullo_epi32(v[4], cospi48);
- x = _mm_mullo_epi32(v[5], cospi16);
- u[5] = _mm_sub_epi32(u[5], x);
- u[5] = _mm_add_epi32(u[5], rnding);
- u[5] = _mm_srai_epi32(u[5], bit);
-
- u[6] = _mm_mullo_epi32(v[6], cospim48);
- x = _mm_mullo_epi32(v[7], cospi16);
- u[6] = _mm_add_epi32(u[6], x);
- u[6] = _mm_add_epi32(u[6], rnding);
- u[6] = _mm_srai_epi32(u[6], bit);
-
- u[7] = _mm_mullo_epi32(v[6], cospi16);
- x = _mm_mullo_epi32(v[7], cospim48);
- u[7] = _mm_sub_epi32(u[7], x);
- u[7] = _mm_add_epi32(u[7], rnding);
- u[7] = _mm_srai_epi32(u[7], bit);
-
- // stage 5
- addsub_sse4_1(u[0], u[2], &v[0], &v[2], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[1], u[3], &v[1], &v[3], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[4], u[6], &v[4], &v[6], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[5], u[7], &v[5], &v[7], &clamp_lo, &clamp_hi);
-
- // stage 6
- u[0] = v[0];
- u[1] = v[1];
- u[4] = v[4];
- u[5] = v[5];
-
- v[0] = _mm_mullo_epi32(v[2], cospi32);
- x = _mm_mullo_epi32(v[3], cospi32);
- u[2] = _mm_add_epi32(v[0], x);
- u[2] = _mm_add_epi32(u[2], rnding);
- u[2] = _mm_srai_epi32(u[2], bit);
-
- u[3] = _mm_sub_epi32(v[0], x);
- u[3] = _mm_add_epi32(u[3], rnding);
- u[3] = _mm_srai_epi32(u[3], bit);
-
- v[0] = _mm_mullo_epi32(v[6], cospi32);
- x = _mm_mullo_epi32(v[7], cospi32);
- u[6] = _mm_add_epi32(v[0], x);
- u[6] = _mm_add_epi32(u[6], rnding);
- u[6] = _mm_srai_epi32(u[6], bit);
-
- u[7] = _mm_sub_epi32(v[0], x);
- u[7] = _mm_add_epi32(u[7], rnding);
- u[7] = _mm_srai_epi32(u[7], bit);
-
- // stage 7
- if (do_cols) {
- out[1] = u[0];
- out[3] = _mm_sub_epi32(kZero, u[4]);
- out[5] = u[6];
- out[7] = _mm_sub_epi32(kZero, u[2]);
- out[9] = u[3];
- out[11] = _mm_sub_epi32(kZero, u[7]);
- out[13] = u[5];
- out[15] = _mm_sub_epi32(kZero, u[1]);
- } else {
- const int log_range_out = AOMMAX(16, bd + 6);
- const __m128i clamp_lo_out = _mm_set1_epi32(-(1 << (log_range_out - 1)));
- const __m128i clamp_hi_out = _mm_set1_epi32((1 << (log_range_out - 1)) - 1);
-
- neg_shift_sse4_1(u[0], u[4], out + 1, out + 3, &clamp_lo_out, &clamp_hi_out,
- out_shift);
- neg_shift_sse4_1(u[6], u[2], out + 5, out + 7, &clamp_lo_out, &clamp_hi_out,
- out_shift);
- neg_shift_sse4_1(u[3], u[7], out + 9, out + 11, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- neg_shift_sse4_1(u[5], u[1], out + 13, out + 15, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- }
-}
-
-static void round_shift_8x8(__m128i *in, int shift) {
- round_shift_4x4(&in[0], shift);
- round_shift_4x4(&in[4], shift);
- round_shift_4x4(&in[8], shift);
- round_shift_4x4(&in[12], shift);
-}
-
-static __m128i get_recon_8x8(const __m128i pred, __m128i res_lo, __m128i res_hi,
- int fliplr, int bd) {
- __m128i x0, x1;
- const __m128i zero = _mm_setzero_si128();
-
- x0 = _mm_unpacklo_epi16(pred, zero);
- x1 = _mm_unpackhi_epi16(pred, zero);
-
- if (fliplr) {
- res_lo = _mm_shuffle_epi32(res_lo, 0x1B);
- res_hi = _mm_shuffle_epi32(res_hi, 0x1B);
- x0 = _mm_add_epi32(res_hi, x0);
- x1 = _mm_add_epi32(res_lo, x1);
-
- } else {
- x0 = _mm_add_epi32(res_lo, x0);
- x1 = _mm_add_epi32(res_hi, x1);
- }
-
- x0 = _mm_packus_epi32(x0, x1);
- return highbd_clamp_epi16(x0, bd);
-}
-
-static void write_buffer_8x8(__m128i *in, uint16_t *output, int stride,
- int fliplr, int flipud, int shift, int bd) {
- __m128i u0, u1, u2, u3, u4, u5, u6, u7;
- __m128i v0, v1, v2, v3, v4, v5, v6, v7;
-
- round_shift_8x8(in, shift);
-
- v0 = _mm_load_si128((__m128i const *)(output + 0 * stride));
- v1 = _mm_load_si128((__m128i const *)(output + 1 * stride));
- v2 = _mm_load_si128((__m128i const *)(output + 2 * stride));
- v3 = _mm_load_si128((__m128i const *)(output + 3 * stride));
- v4 = _mm_load_si128((__m128i const *)(output + 4 * stride));
- v5 = _mm_load_si128((__m128i const *)(output + 5 * stride));
- v6 = _mm_load_si128((__m128i const *)(output + 6 * stride));
- v7 = _mm_load_si128((__m128i const *)(output + 7 * stride));
-
- if (flipud) {
- u0 = get_recon_8x8(v0, in[14], in[15], fliplr, bd);
- u1 = get_recon_8x8(v1, in[12], in[13], fliplr, bd);
- u2 = get_recon_8x8(v2, in[10], in[11], fliplr, bd);
- u3 = get_recon_8x8(v3, in[8], in[9], fliplr, bd);
- u4 = get_recon_8x8(v4, in[6], in[7], fliplr, bd);
- u5 = get_recon_8x8(v5, in[4], in[5], fliplr, bd);
- u6 = get_recon_8x8(v6, in[2], in[3], fliplr, bd);
- u7 = get_recon_8x8(v7, in[0], in[1], fliplr, bd);
- } else {
- u0 = get_recon_8x8(v0, in[0], in[1], fliplr, bd);
- u1 = get_recon_8x8(v1, in[2], in[3], fliplr, bd);
- u2 = get_recon_8x8(v2, in[4], in[5], fliplr, bd);
- u3 = get_recon_8x8(v3, in[6], in[7], fliplr, bd);
- u4 = get_recon_8x8(v4, in[8], in[9], fliplr, bd);
- u5 = get_recon_8x8(v5, in[10], in[11], fliplr, bd);
- u6 = get_recon_8x8(v6, in[12], in[13], fliplr, bd);
- u7 = get_recon_8x8(v7, in[14], in[15], fliplr, bd);
- }
-
- _mm_store_si128((__m128i *)(output + 0 * stride), u0);
- _mm_store_si128((__m128i *)(output + 1 * stride), u1);
- _mm_store_si128((__m128i *)(output + 2 * stride), u2);
- _mm_store_si128((__m128i *)(output + 3 * stride), u3);
- _mm_store_si128((__m128i *)(output + 4 * stride), u4);
- _mm_store_si128((__m128i *)(output + 5 * stride), u5);
- _mm_store_si128((__m128i *)(output + 6 * stride), u6);
- _mm_store_si128((__m128i *)(output + 7 * stride), u7);
-}
-
-void av1_inv_txfm2d_add_8x8_sse4_1(const int32_t *coeff, uint16_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- __m128i in[16], out[16];
- const int8_t *shift = inv_txfm_shift_ls[TX_8X8];
- const int txw_idx = get_txw_idx(TX_8X8);
- const int txh_idx = get_txh_idx(TX_8X8);
-
- switch (tx_type) {
- case DCT_DCT:
- load_buffer_8x8(coeff, in);
- transpose_8x8(in, out);
- idct8x8_sse4_1(out, in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd,
- -shift[0]);
- transpose_8x8(in, out);
- idct8x8_sse4_1(out, in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0);
- write_buffer_8x8(in, output, stride, 0, 0, -shift[1], bd);
- break;
- case DCT_ADST:
- load_buffer_8x8(coeff, in);
- transpose_8x8(in, out);
- iadst8x8_sse4_1(out, in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd,
- -shift[0]);
- transpose_8x8(in, out);
- idct8x8_sse4_1(out, in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0);
- write_buffer_8x8(in, output, stride, 0, 0, -shift[1], bd);
- break;
- case ADST_DCT:
- load_buffer_8x8(coeff, in);
- transpose_8x8(in, out);
- idct8x8_sse4_1(out, in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd,
- -shift[0]);
- transpose_8x8(in, out);
- iadst8x8_sse4_1(out, in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0);
- write_buffer_8x8(in, output, stride, 0, 0, -shift[1], bd);
- break;
- case ADST_ADST:
- load_buffer_8x8(coeff, in);
- transpose_8x8(in, out);
- iadst8x8_sse4_1(out, in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd,
- -shift[0]);
- transpose_8x8(in, out);
- iadst8x8_sse4_1(out, in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0);
- write_buffer_8x8(in, output, stride, 0, 0, -shift[1], bd);
- break;
- case FLIPADST_DCT:
- load_buffer_8x8(coeff, in);
- transpose_8x8(in, out);
- idct8x8_sse4_1(out, in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd,
- -shift[0]);
- transpose_8x8(in, out);
- iadst8x8_sse4_1(out, in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0);
- write_buffer_8x8(in, output, stride, 0, 1, -shift[1], bd);
- break;
- case DCT_FLIPADST:
- load_buffer_8x8(coeff, in);
- transpose_8x8(in, out);
- iadst8x8_sse4_1(out, in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd,
- -shift[0]);
- transpose_8x8(in, out);
- idct8x8_sse4_1(out, in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0);
- write_buffer_8x8(in, output, stride, 1, 0, -shift[1], bd);
- break;
- case ADST_FLIPADST:
- load_buffer_8x8(coeff, in);
- transpose_8x8(in, out);
- iadst8x8_sse4_1(out, in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd,
- -shift[0]);
- transpose_8x8(in, out);
- iadst8x8_sse4_1(out, in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0);
- write_buffer_8x8(in, output, stride, 1, 0, -shift[1], bd);
- break;
- case FLIPADST_FLIPADST:
- load_buffer_8x8(coeff, in);
- transpose_8x8(in, out);
- iadst8x8_sse4_1(out, in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd,
- -shift[0]);
- transpose_8x8(in, out);
- iadst8x8_sse4_1(out, in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0);
- write_buffer_8x8(in, output, stride, 1, 1, -shift[1], bd);
- break;
- case FLIPADST_ADST:
- load_buffer_8x8(coeff, in);
- transpose_8x8(in, out);
- iadst8x8_sse4_1(out, in, inv_cos_bit_row[txw_idx][txh_idx], 0, bd,
- -shift[0]);
- transpose_8x8(in, out);
- iadst8x8_sse4_1(out, in, inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0);
- write_buffer_8x8(in, output, stride, 0, 1, -shift[1], bd);
- break;
- default: assert(0);
- }
-}
-
-static void idct8x8_low1_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
- int bd, int out_shift) {
- const int32_t *cospi = cospi_arr(bit);
- const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
- const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
- __m128i x;
-
- // stage 0
- // stage 1
- // stage 2
- // stage 3
- x = _mm_mullo_epi32(in[0], cospi32);
- x = _mm_add_epi32(x, rnding);
- x = _mm_srai_epi32(x, bit);
-
- // stage 4
- // stage 5
- if (!do_cols) {
- const int log_range_out = AOMMAX(16, bd + 6);
- const __m128i clamp_lo_out = _mm_set1_epi32(AOMMAX(
- -(1 << (log_range_out - 1)), -(1 << (log_range - 1 - out_shift))));
- const __m128i clamp_hi_out = _mm_set1_epi32(AOMMIN(
- (1 << (log_range_out - 1)) - 1, (1 << (log_range - 1 - out_shift))));
-
- __m128i offset = _mm_set1_epi32((1 << out_shift) >> 1);
- x = _mm_add_epi32(x, offset);
- x = _mm_sra_epi32(x, _mm_cvtsi32_si128(out_shift));
- x = _mm_max_epi32(x, clamp_lo_out);
- x = _mm_min_epi32(x, clamp_hi_out);
- }
-
- out[0] = x;
- out[1] = x;
- out[2] = x;
- out[3] = x;
- out[4] = x;
- out[5] = x;
- out[6] = x;
- out[7] = x;
-}
-
-static void idct8x8_new_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
- int bd, int out_shift) {
- const int32_t *cospi = cospi_arr(bit);
- const __m128i cospi56 = _mm_set1_epi32(cospi[56]);
- const __m128i cospim8 = _mm_set1_epi32(-cospi[8]);
- const __m128i cospi24 = _mm_set1_epi32(cospi[24]);
- const __m128i cospim40 = _mm_set1_epi32(-cospi[40]);
- const __m128i cospi40 = _mm_set1_epi32(cospi[40]);
- const __m128i cospi8 = _mm_set1_epi32(cospi[8]);
- const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
- const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
- const __m128i cospim16 = _mm_set1_epi32(-cospi[16]);
- const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
- const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
- const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1)));
- const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1);
- __m128i u0, u1, u2, u3, u4, u5, u6, u7;
- __m128i v0, v1, v2, v3, v4, v5, v6, v7;
- __m128i x, y;
-
- // stage 0
- // stage 1
- // stage 2
- u0 = in[0];
- u1 = in[4];
- u2 = in[2];
- u3 = in[6];
-
- x = _mm_mullo_epi32(in[1], cospi56);
- y = _mm_mullo_epi32(in[7], cospim8);
- u4 = _mm_add_epi32(x, y);
- u4 = _mm_add_epi32(u4, rnding);
- u4 = _mm_srai_epi32(u4, bit);
-
- x = _mm_mullo_epi32(in[1], cospi8);
- y = _mm_mullo_epi32(in[7], cospi56);
- u7 = _mm_add_epi32(x, y);
- u7 = _mm_add_epi32(u7, rnding);
- u7 = _mm_srai_epi32(u7, bit);
-
- x = _mm_mullo_epi32(in[5], cospi24);
- y = _mm_mullo_epi32(in[3], cospim40);
- u5 = _mm_add_epi32(x, y);
- u5 = _mm_add_epi32(u5, rnding);
- u5 = _mm_srai_epi32(u5, bit);
-
- x = _mm_mullo_epi32(in[5], cospi40);
- y = _mm_mullo_epi32(in[3], cospi24);
- u6 = _mm_add_epi32(x, y);
- u6 = _mm_add_epi32(u6, rnding);
- u6 = _mm_srai_epi32(u6, bit);
-
- // stage 3
- x = _mm_mullo_epi32(u0, cospi32);
- y = _mm_mullo_epi32(u1, cospi32);
- v0 = _mm_add_epi32(x, y);
- v0 = _mm_add_epi32(v0, rnding);
- v0 = _mm_srai_epi32(v0, bit);
-
- v1 = _mm_sub_epi32(x, y);
- v1 = _mm_add_epi32(v1, rnding);
- v1 = _mm_srai_epi32(v1, bit);
-
- x = _mm_mullo_epi32(u2, cospi48);
- y = _mm_mullo_epi32(u3, cospim16);
- v2 = _mm_add_epi32(x, y);
- v2 = _mm_add_epi32(v2, rnding);
- v2 = _mm_srai_epi32(v2, bit);
-
- x = _mm_mullo_epi32(u2, cospi16);
- y = _mm_mullo_epi32(u3, cospi48);
- v3 = _mm_add_epi32(x, y);
- v3 = _mm_add_epi32(v3, rnding);
- v3 = _mm_srai_epi32(v3, bit);
-
- addsub_sse4_1(u4, u5, &v4, &v5, &clamp_lo, &clamp_hi);
- addsub_sse4_1(u7, u6, &v7, &v6, &clamp_lo, &clamp_hi);
-
- // stage 4
- addsub_sse4_1(v0, v3, &u0, &u3, &clamp_lo, &clamp_hi);
- addsub_sse4_1(v1, v2, &u1, &u2, &clamp_lo, &clamp_hi);
- u4 = v4;
- u7 = v7;
-
- x = _mm_mullo_epi32(v5, cospi32);
- y = _mm_mullo_epi32(v6, cospi32);
- u6 = _mm_add_epi32(y, x);
- u6 = _mm_add_epi32(u6, rnding);
- u6 = _mm_srai_epi32(u6, bit);
-
- u5 = _mm_sub_epi32(y, x);
- u5 = _mm_add_epi32(u5, rnding);
- u5 = _mm_srai_epi32(u5, bit);
-
- // stage 5
- if (do_cols) {
- addsub_no_clamp_sse4_1(u0, u7, out + 0, out + 7);
- addsub_no_clamp_sse4_1(u1, u6, out + 1, out + 6);
- addsub_no_clamp_sse4_1(u2, u5, out + 2, out + 5);
- addsub_no_clamp_sse4_1(u3, u4, out + 3, out + 4);
- } else {
- const int log_range_out = AOMMAX(16, bd + 6);
- const __m128i clamp_lo_out = _mm_set1_epi32(AOMMAX(
- -(1 << (log_range_out - 1)), -(1 << (log_range - 1 - out_shift))));
- const __m128i clamp_hi_out = _mm_set1_epi32(AOMMIN(
- (1 << (log_range_out - 1)) - 1, (1 << (log_range - 1 - out_shift))));
- addsub_shift_sse4_1(u0, u7, out + 0, out + 7, &clamp_lo_out, &clamp_hi_out,
- out_shift);
- addsub_shift_sse4_1(u1, u6, out + 1, out + 6, &clamp_lo_out, &clamp_hi_out,
- out_shift);
- addsub_shift_sse4_1(u2, u5, out + 2, out + 5, &clamp_lo_out, &clamp_hi_out,
- out_shift);
- addsub_shift_sse4_1(u3, u4, out + 3, out + 4, &clamp_lo_out, &clamp_hi_out,
- out_shift);
- }
-}
-
-static void iadst8x8_low1_sse4_1(__m128i *in, __m128i *out, int bit,
- int do_cols, int bd, int out_shift) {
- const int32_t *cospi = cospi_arr(bit);
- const __m128i cospi4 = _mm_set1_epi32(cospi[4]);
- const __m128i cospi60 = _mm_set1_epi32(cospi[60]);
- const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
- const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
- const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
- const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
- const __m128i kZero = _mm_setzero_si128();
- __m128i u[8], x;
-
- // stage 0
- // stage 1
- // stage 2
-
- x = _mm_mullo_epi32(in[0], cospi60);
- u[0] = _mm_add_epi32(x, rnding);
- u[0] = _mm_srai_epi32(u[0], bit);
-
- x = _mm_mullo_epi32(in[0], cospi4);
- u[1] = _mm_sub_epi32(kZero, x);
- u[1] = _mm_add_epi32(u[1], rnding);
- u[1] = _mm_srai_epi32(u[1], bit);
-
- // stage 3
- // stage 4
- __m128i temp1, temp2;
- temp1 = _mm_mullo_epi32(u[0], cospi16);
- x = _mm_mullo_epi32(u[1], cospi48);
- temp1 = _mm_add_epi32(temp1, x);
- temp1 = _mm_add_epi32(temp1, rnding);
- temp1 = _mm_srai_epi32(temp1, bit);
- u[4] = temp1;
-
- temp2 = _mm_mullo_epi32(u[0], cospi48);
- x = _mm_mullo_epi32(u[1], cospi16);
- u[5] = _mm_sub_epi32(temp2, x);
- u[5] = _mm_add_epi32(u[5], rnding);
- u[5] = _mm_srai_epi32(u[5], bit);
-
- // stage 5
- // stage 6
- temp1 = _mm_mullo_epi32(u[0], cospi32);
- x = _mm_mullo_epi32(u[1], cospi32);
- u[2] = _mm_add_epi32(temp1, x);
- u[2] = _mm_add_epi32(u[2], rnding);
- u[2] = _mm_srai_epi32(u[2], bit);
-
- u[3] = _mm_sub_epi32(temp1, x);
- u[3] = _mm_add_epi32(u[3], rnding);
- u[3] = _mm_srai_epi32(u[3], bit);
-
- temp1 = _mm_mullo_epi32(u[4], cospi32);
- x = _mm_mullo_epi32(u[5], cospi32);
- u[6] = _mm_add_epi32(temp1, x);
- u[6] = _mm_add_epi32(u[6], rnding);
- u[6] = _mm_srai_epi32(u[6], bit);
-
- u[7] = _mm_sub_epi32(temp1, x);
- u[7] = _mm_add_epi32(u[7], rnding);
- u[7] = _mm_srai_epi32(u[7], bit);
-
- // stage 7
- if (do_cols) {
- out[0] = u[0];
- out[1] = _mm_sub_epi32(kZero, u[4]);
- out[2] = u[6];
- out[3] = _mm_sub_epi32(kZero, u[2]);
- out[4] = u[3];
- out[5] = _mm_sub_epi32(kZero, u[7]);
- out[6] = u[5];
- out[7] = _mm_sub_epi32(kZero, u[1]);
- } else {
- const int log_range_out = AOMMAX(16, bd + 6);
- const __m128i clamp_lo_out = _mm_set1_epi32(-(1 << (log_range_out - 1)));
- const __m128i clamp_hi_out = _mm_set1_epi32((1 << (log_range_out - 1)) - 1);
-
- neg_shift_sse4_1(u[0], u[4], out + 0, out + 1, &clamp_lo_out, &clamp_hi_out,
- out_shift);
- neg_shift_sse4_1(u[6], u[2], out + 2, out + 3, &clamp_lo_out, &clamp_hi_out,
- out_shift);
- neg_shift_sse4_1(u[3], u[7], out + 4, out + 5, &clamp_lo_out, &clamp_hi_out,
- out_shift);
- neg_shift_sse4_1(u[5], u[1], out + 6, out + 7, &clamp_lo_out, &clamp_hi_out,
- out_shift);
- }
-}
-
-static void iadst8x8_new_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
- int bd, int out_shift) {
- const int32_t *cospi = cospi_arr(bit);
- const __m128i cospi4 = _mm_set1_epi32(cospi[4]);
- const __m128i cospi60 = _mm_set1_epi32(cospi[60]);
- const __m128i cospi20 = _mm_set1_epi32(cospi[20]);
- const __m128i cospi44 = _mm_set1_epi32(cospi[44]);
- const __m128i cospi36 = _mm_set1_epi32(cospi[36]);
- const __m128i cospi28 = _mm_set1_epi32(cospi[28]);
- const __m128i cospi52 = _mm_set1_epi32(cospi[52]);
- const __m128i cospi12 = _mm_set1_epi32(cospi[12]);
- const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
- const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
- const __m128i cospim48 = _mm_set1_epi32(-cospi[48]);
- const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
- const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
- const __m128i kZero = _mm_setzero_si128();
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
- const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1)));
- const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1);
- __m128i u[8], v[8], x;
-
- // stage 0
- // stage 1
- // stage 2
-
- u[0] = _mm_mullo_epi32(in[7], cospi4);
- x = _mm_mullo_epi32(in[0], cospi60);
- u[0] = _mm_add_epi32(u[0], x);
- u[0] = _mm_add_epi32(u[0], rnding);
- u[0] = _mm_srai_epi32(u[0], bit);
-
- u[1] = _mm_mullo_epi32(in[7], cospi60);
- x = _mm_mullo_epi32(in[0], cospi4);
- u[1] = _mm_sub_epi32(u[1], x);
- u[1] = _mm_add_epi32(u[1], rnding);
- u[1] = _mm_srai_epi32(u[1], bit);
-
- // (2)
- u[2] = _mm_mullo_epi32(in[5], cospi20);
- x = _mm_mullo_epi32(in[2], cospi44);
- u[2] = _mm_add_epi32(u[2], x);
- u[2] = _mm_add_epi32(u[2], rnding);
- u[2] = _mm_srai_epi32(u[2], bit);
-
- u[3] = _mm_mullo_epi32(in[5], cospi44);
- x = _mm_mullo_epi32(in[2], cospi20);
- u[3] = _mm_sub_epi32(u[3], x);
- u[3] = _mm_add_epi32(u[3], rnding);
- u[3] = _mm_srai_epi32(u[3], bit);
-
- // (3)
- u[4] = _mm_mullo_epi32(in[3], cospi36);
- x = _mm_mullo_epi32(in[4], cospi28);
- u[4] = _mm_add_epi32(u[4], x);
- u[4] = _mm_add_epi32(u[4], rnding);
- u[4] = _mm_srai_epi32(u[4], bit);
-
- u[5] = _mm_mullo_epi32(in[3], cospi28);
- x = _mm_mullo_epi32(in[4], cospi36);
- u[5] = _mm_sub_epi32(u[5], x);
- u[5] = _mm_add_epi32(u[5], rnding);
- u[5] = _mm_srai_epi32(u[5], bit);
-
- // (4)
- u[6] = _mm_mullo_epi32(in[1], cospi52);
- x = _mm_mullo_epi32(in[6], cospi12);
- u[6] = _mm_add_epi32(u[6], x);
- u[6] = _mm_add_epi32(u[6], rnding);
- u[6] = _mm_srai_epi32(u[6], bit);
-
- u[7] = _mm_mullo_epi32(in[1], cospi12);
- x = _mm_mullo_epi32(in[6], cospi52);
- u[7] = _mm_sub_epi32(u[7], x);
- u[7] = _mm_add_epi32(u[7], rnding);
- u[7] = _mm_srai_epi32(u[7], bit);
-
- // stage 3
- addsub_sse4_1(u[0], u[4], &v[0], &v[4], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[1], u[5], &v[1], &v[5], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[2], u[6], &v[2], &v[6], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[3], u[7], &v[3], &v[7], &clamp_lo, &clamp_hi);
-
- // stage 4
- u[0] = v[0];
- u[1] = v[1];
- u[2] = v[2];
- u[3] = v[3];
-
- u[4] = _mm_mullo_epi32(v[4], cospi16);
- x = _mm_mullo_epi32(v[5], cospi48);
- u[4] = _mm_add_epi32(u[4], x);
- u[4] = _mm_add_epi32(u[4], rnding);
- u[4] = _mm_srai_epi32(u[4], bit);
-
- u[5] = _mm_mullo_epi32(v[4], cospi48);
- x = _mm_mullo_epi32(v[5], cospi16);
- u[5] = _mm_sub_epi32(u[5], x);
- u[5] = _mm_add_epi32(u[5], rnding);
- u[5] = _mm_srai_epi32(u[5], bit);
-
- u[6] = _mm_mullo_epi32(v[6], cospim48);
- x = _mm_mullo_epi32(v[7], cospi16);
- u[6] = _mm_add_epi32(u[6], x);
- u[6] = _mm_add_epi32(u[6], rnding);
- u[6] = _mm_srai_epi32(u[6], bit);
-
- u[7] = _mm_mullo_epi32(v[6], cospi16);
- x = _mm_mullo_epi32(v[7], cospim48);
- u[7] = _mm_sub_epi32(u[7], x);
- u[7] = _mm_add_epi32(u[7], rnding);
- u[7] = _mm_srai_epi32(u[7], bit);
-
- // stage 5
- addsub_sse4_1(u[0], u[2], &v[0], &v[2], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[1], u[3], &v[1], &v[3], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[4], u[6], &v[4], &v[6], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[5], u[7], &v[5], &v[7], &clamp_lo, &clamp_hi);
-
- // stage 6
- u[0] = v[0];
- u[1] = v[1];
- u[4] = v[4];
- u[5] = v[5];
-
- v[0] = _mm_mullo_epi32(v[2], cospi32);
- x = _mm_mullo_epi32(v[3], cospi32);
- u[2] = _mm_add_epi32(v[0], x);
- u[2] = _mm_add_epi32(u[2], rnding);
- u[2] = _mm_srai_epi32(u[2], bit);
-
- u[3] = _mm_sub_epi32(v[0], x);
- u[3] = _mm_add_epi32(u[3], rnding);
- u[3] = _mm_srai_epi32(u[3], bit);
-
- v[0] = _mm_mullo_epi32(v[6], cospi32);
- x = _mm_mullo_epi32(v[7], cospi32);
- u[6] = _mm_add_epi32(v[0], x);
- u[6] = _mm_add_epi32(u[6], rnding);
- u[6] = _mm_srai_epi32(u[6], bit);
-
- u[7] = _mm_sub_epi32(v[0], x);
- u[7] = _mm_add_epi32(u[7], rnding);
- u[7] = _mm_srai_epi32(u[7], bit);
-
- // stage 7
- if (do_cols) {
- out[0] = u[0];
- out[1] = _mm_sub_epi32(kZero, u[4]);
- out[2] = u[6];
- out[3] = _mm_sub_epi32(kZero, u[2]);
- out[4] = u[3];
- out[5] = _mm_sub_epi32(kZero, u[7]);
- out[6] = u[5];
- out[7] = _mm_sub_epi32(kZero, u[1]);
- } else {
- const int log_range_out = AOMMAX(16, bd + 6);
- const __m128i clamp_lo_out = _mm_set1_epi32(-(1 << (log_range_out - 1)));
- const __m128i clamp_hi_out = _mm_set1_epi32((1 << (log_range_out - 1)) - 1);
-
- neg_shift_sse4_1(u[0], u[4], out + 0, out + 1, &clamp_lo_out, &clamp_hi_out,
- out_shift);
- neg_shift_sse4_1(u[6], u[2], out + 2, out + 3, &clamp_lo_out, &clamp_hi_out,
- out_shift);
- neg_shift_sse4_1(u[3], u[7], out + 4, out + 5, &clamp_lo_out, &clamp_hi_out,
- out_shift);
- neg_shift_sse4_1(u[5], u[1], out + 6, out + 7, &clamp_lo_out, &clamp_hi_out,
- out_shift);
- }
-}
-
-static void idct16x16_low1_sse4_1(__m128i *in, __m128i *out, int bit,
- int do_cols, int bd, int out_shift) {
- const int32_t *cospi = cospi_arr(bit);
- const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
- const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
- const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1)));
- const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1);
-
- {
- // stage 0
- // stage 1
- // stage 2
- // stage 3
- // stage 4
- in[0] = _mm_mullo_epi32(in[0], cospi32);
- in[0] = _mm_add_epi32(in[0], rnding);
- in[0] = _mm_srai_epi32(in[0], bit);
-
- // stage 5
- // stage 6
- // stage 7
- if (do_cols) {
- in[0] = _mm_max_epi32(in[0], clamp_lo);
- in[0] = _mm_min_epi32(in[0], clamp_hi);
- } else {
- const int log_range_out = AOMMAX(16, bd + 6);
- const __m128i clamp_lo_out = _mm_set1_epi32(AOMMAX(
- -(1 << (log_range_out - 1)), -(1 << (log_range - 1 - out_shift))));
- const __m128i clamp_hi_out = _mm_set1_epi32(AOMMIN(
- (1 << (log_range_out - 1)) - 1, (1 << (log_range - 1 - out_shift))));
- __m128i offset = _mm_set1_epi32((1 << out_shift) >> 1);
- in[0] = _mm_add_epi32(in[0], offset);
- in[0] = _mm_sra_epi32(in[0], _mm_cvtsi32_si128(out_shift));
- in[0] = _mm_max_epi32(in[0], clamp_lo_out);
- in[0] = _mm_min_epi32(in[0], clamp_hi_out);
- }
-
- out[0] = in[0];
- out[1] = in[0];
- out[2] = in[0];
- out[3] = in[0];
- out[4] = in[0];
- out[5] = in[0];
- out[6] = in[0];
- out[7] = in[0];
- out[8] = in[0];
- out[9] = in[0];
- out[10] = in[0];
- out[11] = in[0];
- out[12] = in[0];
- out[13] = in[0];
- out[14] = in[0];
- out[15] = in[0];
- }
-}
-
-static void idct16x16_low8_sse4_1(__m128i *in, __m128i *out, int bit,
- int do_cols, int bd, int out_shift) {
- const int32_t *cospi = cospi_arr(bit);
- const __m128i cospi60 = _mm_set1_epi32(cospi[60]);
- const __m128i cospi28 = _mm_set1_epi32(cospi[28]);
- const __m128i cospi44 = _mm_set1_epi32(cospi[44]);
- const __m128i cospi20 = _mm_set1_epi32(cospi[20]);
- const __m128i cospi12 = _mm_set1_epi32(cospi[12]);
- const __m128i cospi4 = _mm_set1_epi32(cospi[4]);
- const __m128i cospi56 = _mm_set1_epi32(cospi[56]);
- const __m128i cospi24 = _mm_set1_epi32(cospi[24]);
- const __m128i cospim40 = _mm_set1_epi32(-cospi[40]);
- const __m128i cospi8 = _mm_set1_epi32(cospi[8]);
- const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
- const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
- const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
- const __m128i cospim16 = _mm_set1_epi32(-cospi[16]);
- const __m128i cospim48 = _mm_set1_epi32(-cospi[48]);
- const __m128i cospim36 = _mm_set1_epi32(-cospi[36]);
- const __m128i cospim52 = _mm_set1_epi32(-cospi[52]);
- const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
- const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1)));
- const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1);
- __m128i u[16], x, y;
-
- {
- // stage 0
- // stage 1
- u[0] = in[0];
- u[2] = in[4];
- u[4] = in[2];
- u[6] = in[6];
- u[8] = in[1];
- u[10] = in[5];
- u[12] = in[3];
- u[14] = in[7];
-
- // stage 2
- u[15] = half_btf_0_sse4_1(&cospi4, &u[8], &rnding, bit);
- u[8] = half_btf_0_sse4_1(&cospi60, &u[8], &rnding, bit);
-
- u[9] = half_btf_0_sse4_1(&cospim36, &u[14], &rnding, bit);
- u[14] = half_btf_0_sse4_1(&cospi28, &u[14], &rnding, bit);
-
- u[13] = half_btf_0_sse4_1(&cospi20, &u[10], &rnding, bit);
- u[10] = half_btf_0_sse4_1(&cospi44, &u[10], &rnding, bit);
-
- u[11] = half_btf_0_sse4_1(&cospim52, &u[12], &rnding, bit);
- u[12] = half_btf_0_sse4_1(&cospi12, &u[12], &rnding, bit);
-
- // stage 3
- u[7] = half_btf_0_sse4_1(&cospi8, &u[4], &rnding, bit);
- u[4] = half_btf_0_sse4_1(&cospi56, &u[4], &rnding, bit);
- u[5] = half_btf_0_sse4_1(&cospim40, &u[6], &rnding, bit);
- u[6] = half_btf_0_sse4_1(&cospi24, &u[6], &rnding, bit);
-
- addsub_sse4_1(u[8], u[9], &u[8], &u[9], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[11], u[10], &u[11], &u[10], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[12], u[13], &u[12], &u[13], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[15], u[14], &u[15], &u[14], &clamp_lo, &clamp_hi);
-
- // stage 4
- x = _mm_mullo_epi32(u[0], cospi32);
- u[0] = _mm_add_epi32(x, rnding);
- u[0] = _mm_srai_epi32(u[0], bit);
- u[1] = u[0];
-
- u[3] = half_btf_0_sse4_1(&cospi16, &u[2], &rnding, bit);
- u[2] = half_btf_0_sse4_1(&cospi48, &u[2], &rnding, bit);
-
- addsub_sse4_1(u[4], u[5], &u[4], &u[5], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[7], u[6], &u[7], &u[6], &clamp_lo, &clamp_hi);
-
- x = half_btf_sse4_1(&cospim16, &u[9], &cospi48, &u[14], &rnding, bit);
- u[14] = half_btf_sse4_1(&cospi48, &u[9], &cospi16, &u[14], &rnding, bit);
- u[9] = x;
- y = half_btf_sse4_1(&cospim48, &u[10], &cospim16, &u[13], &rnding, bit);
- u[13] = half_btf_sse4_1(&cospim16, &u[10], &cospi48, &u[13], &rnding, bit);
- u[10] = y;
-
- // stage 5
- addsub_sse4_1(u[0], u[3], &u[0], &u[3], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[1], u[2], &u[1], &u[2], &clamp_lo, &clamp_hi);
-
- x = _mm_mullo_epi32(u[5], cospi32);
- y = _mm_mullo_epi32(u[6], cospi32);
- u[5] = _mm_sub_epi32(y, x);
- u[5] = _mm_add_epi32(u[5], rnding);
- u[5] = _mm_srai_epi32(u[5], bit);
-
- u[6] = _mm_add_epi32(y, x);
- u[6] = _mm_add_epi32(u[6], rnding);
- u[6] = _mm_srai_epi32(u[6], bit);
-
- addsub_sse4_1(u[8], u[11], &u[8], &u[11], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[9], u[10], &u[9], &u[10], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[15], u[12], &u[15], &u[12], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[14], u[13], &u[14], &u[13], &clamp_lo, &clamp_hi);
-
- // stage 6
- addsub_sse4_1(u[0], u[7], &u[0], &u[7], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[1], u[6], &u[1], &u[6], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[2], u[5], &u[2], &u[5], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[3], u[4], &u[3], &u[4], &clamp_lo, &clamp_hi);
-
- x = _mm_mullo_epi32(u[10], cospi32);
- y = _mm_mullo_epi32(u[13], cospi32);
- u[10] = _mm_sub_epi32(y, x);
- u[10] = _mm_add_epi32(u[10], rnding);
- u[10] = _mm_srai_epi32(u[10], bit);
-
- u[13] = _mm_add_epi32(x, y);
- u[13] = _mm_add_epi32(u[13], rnding);
- u[13] = _mm_srai_epi32(u[13], bit);
-
- x = _mm_mullo_epi32(u[11], cospi32);
- y = _mm_mullo_epi32(u[12], cospi32);
- u[11] = _mm_sub_epi32(y, x);
- u[11] = _mm_add_epi32(u[11], rnding);
- u[11] = _mm_srai_epi32(u[11], bit);
-
- u[12] = _mm_add_epi32(x, y);
- u[12] = _mm_add_epi32(u[12], rnding);
- u[12] = _mm_srai_epi32(u[12], bit);
- // stage 7
- if (do_cols) {
- addsub_no_clamp_sse4_1(u[0], u[15], out + 0, out + 15);
- addsub_no_clamp_sse4_1(u[1], u[14], out + 1, out + 14);
- addsub_no_clamp_sse4_1(u[2], u[13], out + 2, out + 13);
- addsub_no_clamp_sse4_1(u[3], u[12], out + 3, out + 12);
- addsub_no_clamp_sse4_1(u[4], u[11], out + 4, out + 11);
- addsub_no_clamp_sse4_1(u[5], u[10], out + 5, out + 10);
- addsub_no_clamp_sse4_1(u[6], u[9], out + 6, out + 9);
- addsub_no_clamp_sse4_1(u[7], u[8], out + 7, out + 8);
- } else {
- const int log_range_out = AOMMAX(16, bd + 6);
- const __m128i clamp_lo_out = _mm_set1_epi32(AOMMAX(
- -(1 << (log_range_out - 1)), -(1 << (log_range - 1 - out_shift))));
- const __m128i clamp_hi_out = _mm_set1_epi32(AOMMIN(
- (1 << (log_range_out - 1)) - 1, (1 << (log_range - 1 - out_shift))));
-
- addsub_shift_sse4_1(u[0], u[15], out + 0, out + 15, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_sse4_1(u[1], u[14], out + 1, out + 14, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_sse4_1(u[2], u[13], out + 2, out + 13, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_sse4_1(u[3], u[12], out + 3, out + 12, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_sse4_1(u[4], u[11], out + 4, out + 11, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_sse4_1(u[5], u[10], out + 5, out + 10, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_sse4_1(u[6], u[9], out + 6, out + 9, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_sse4_1(u[7], u[8], out + 7, out + 8, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- }
- }
-}
-
-static void iadst16x16_low1_sse4_1(__m128i *in, __m128i *out, int bit,
- int do_cols, int bd, int out_shift) {
- const int32_t *cospi = cospi_arr(bit);
- const __m128i cospi2 = _mm_set1_epi32(cospi[2]);
- const __m128i cospi62 = _mm_set1_epi32(cospi[62]);
- const __m128i cospi8 = _mm_set1_epi32(cospi[8]);
- const __m128i cospi56 = _mm_set1_epi32(cospi[56]);
- const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
- const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
- const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
- const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
- const __m128i zero = _mm_setzero_si128();
- __m128i v[16], x, y, temp1, temp2;
-
- // Calculate the column 0, 1, 2, 3
- {
- // stage 0
- // stage 1
- // stage 2
- x = _mm_mullo_epi32(in[0], cospi62);
- v[0] = _mm_add_epi32(x, rnding);
- v[0] = _mm_srai_epi32(v[0], bit);
-
- x = _mm_mullo_epi32(in[0], cospi2);
- v[1] = _mm_sub_epi32(zero, x);
- v[1] = _mm_add_epi32(v[1], rnding);
- v[1] = _mm_srai_epi32(v[1], bit);
-
- // stage 3
- v[8] = v[0];
- v[9] = v[1];
-
- // stage 4
- temp1 = _mm_mullo_epi32(v[8], cospi8);
- x = _mm_mullo_epi32(v[9], cospi56);
- temp1 = _mm_add_epi32(temp1, x);
- temp1 = _mm_add_epi32(temp1, rnding);
- temp1 = _mm_srai_epi32(temp1, bit);
-
- temp2 = _mm_mullo_epi32(v[8], cospi56);
- x = _mm_mullo_epi32(v[9], cospi8);
- temp2 = _mm_sub_epi32(temp2, x);
- temp2 = _mm_add_epi32(temp2, rnding);
- temp2 = _mm_srai_epi32(temp2, bit);
- v[8] = temp1;
- v[9] = temp2;
-
- // stage 5
- v[4] = v[0];
- v[5] = v[1];
- v[12] = v[8];
- v[13] = v[9];
-
- // stage 6
- temp1 = _mm_mullo_epi32(v[4], cospi16);
- x = _mm_mullo_epi32(v[5], cospi48);
- temp1 = _mm_add_epi32(temp1, x);
- temp1 = _mm_add_epi32(temp1, rnding);
- temp1 = _mm_srai_epi32(temp1, bit);
-
- temp2 = _mm_mullo_epi32(v[4], cospi48);
- x = _mm_mullo_epi32(v[5], cospi16);
- temp2 = _mm_sub_epi32(temp2, x);
- temp2 = _mm_add_epi32(temp2, rnding);
- temp2 = _mm_srai_epi32(temp2, bit);
- v[4] = temp1;
- v[5] = temp2;
-
- temp1 = _mm_mullo_epi32(v[12], cospi16);
- x = _mm_mullo_epi32(v[13], cospi48);
- temp1 = _mm_add_epi32(temp1, x);
- temp1 = _mm_add_epi32(temp1, rnding);
- temp1 = _mm_srai_epi32(temp1, bit);
-
- temp2 = _mm_mullo_epi32(v[12], cospi48);
- x = _mm_mullo_epi32(v[13], cospi16);
- temp2 = _mm_sub_epi32(temp2, x);
- temp2 = _mm_add_epi32(temp2, rnding);
- temp2 = _mm_srai_epi32(temp2, bit);
- v[12] = temp1;
- v[13] = temp2;
-
- // stage 7
- v[2] = v[0];
- v[3] = v[1];
- v[6] = v[4];
- v[7] = v[5];
- v[10] = v[8];
- v[11] = v[9];
- v[14] = v[12];
- v[15] = v[13];
-
- // stage 8
- y = _mm_mullo_epi32(v[2], cospi32);
- x = _mm_mullo_epi32(v[3], cospi32);
- v[2] = _mm_add_epi32(y, x);
- v[2] = _mm_add_epi32(v[2], rnding);
- v[2] = _mm_srai_epi32(v[2], bit);
-
- v[3] = _mm_sub_epi32(y, x);
- v[3] = _mm_add_epi32(v[3], rnding);
- v[3] = _mm_srai_epi32(v[3], bit);
-
- y = _mm_mullo_epi32(v[6], cospi32);
- x = _mm_mullo_epi32(v[7], cospi32);
- v[6] = _mm_add_epi32(y, x);
- v[6] = _mm_add_epi32(v[6], rnding);
- v[6] = _mm_srai_epi32(v[6], bit);
-
- v[7] = _mm_sub_epi32(y, x);
- v[7] = _mm_add_epi32(v[7], rnding);
- v[7] = _mm_srai_epi32(v[7], bit);
-
- y = _mm_mullo_epi32(v[10], cospi32);
- x = _mm_mullo_epi32(v[11], cospi32);
- v[10] = _mm_add_epi32(y, x);
- v[10] = _mm_add_epi32(v[10], rnding);
- v[10] = _mm_srai_epi32(v[10], bit);
-
- v[11] = _mm_sub_epi32(y, x);
- v[11] = _mm_add_epi32(v[11], rnding);
- v[11] = _mm_srai_epi32(v[11], bit);
-
- y = _mm_mullo_epi32(v[14], cospi32);
- x = _mm_mullo_epi32(v[15], cospi32);
- v[14] = _mm_add_epi32(y, x);
- v[14] = _mm_add_epi32(v[14], rnding);
- v[14] = _mm_srai_epi32(v[14], bit);
-
- v[15] = _mm_sub_epi32(y, x);
- v[15] = _mm_add_epi32(v[15], rnding);
- v[15] = _mm_srai_epi32(v[15], bit);
-
- // stage 9
- if (do_cols) {
- out[0] = v[0];
- out[1] = _mm_sub_epi32(_mm_setzero_si128(), v[8]);
- out[2] = v[12];
- out[3] = _mm_sub_epi32(_mm_setzero_si128(), v[4]);
- out[4] = v[6];
- out[5] = _mm_sub_epi32(_mm_setzero_si128(), v[14]);
- out[6] = v[10];
- out[7] = _mm_sub_epi32(_mm_setzero_si128(), v[2]);
- out[8] = v[3];
- out[9] = _mm_sub_epi32(_mm_setzero_si128(), v[11]);
- out[10] = v[15];
- out[11] = _mm_sub_epi32(_mm_setzero_si128(), v[7]);
- out[12] = v[5];
- out[13] = _mm_sub_epi32(_mm_setzero_si128(), v[13]);
- out[14] = v[9];
- out[15] = _mm_sub_epi32(_mm_setzero_si128(), v[1]);
- } else {
- const int log_range_out = AOMMAX(16, bd + 6);
- const __m128i clamp_lo_out = _mm_set1_epi32(-(1 << (log_range_out - 1)));
- const __m128i clamp_hi_out =
- _mm_set1_epi32((1 << (log_range_out - 1)) - 1);
-
- neg_shift_sse4_1(v[0], v[8], out + 0, out + 1, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- neg_shift_sse4_1(v[12], v[4], out + 2, out + 3, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- neg_shift_sse4_1(v[6], v[14], out + 4, out + 5, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- neg_shift_sse4_1(v[10], v[2], out + 6, out + 7, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- neg_shift_sse4_1(v[3], v[11], out + 8, out + 9, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- neg_shift_sse4_1(v[15], v[7], out + 10, out + 11, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- neg_shift_sse4_1(v[5], v[13], out + 12, out + 13, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- neg_shift_sse4_1(v[9], v[1], out + 14, out + 15, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- }
- }
-}
-
-static void iadst16x16_low8_sse4_1(__m128i *in, __m128i *out, int bit,
- int do_cols, int bd, int out_shift) {
- const int32_t *cospi = cospi_arr(bit);
- const __m128i cospi2 = _mm_set1_epi32(cospi[2]);
- const __m128i cospi62 = _mm_set1_epi32(cospi[62]);
- const __m128i cospi10 = _mm_set1_epi32(cospi[10]);
- const __m128i cospi54 = _mm_set1_epi32(cospi[54]);
- const __m128i cospi18 = _mm_set1_epi32(cospi[18]);
- const __m128i cospi46 = _mm_set1_epi32(cospi[46]);
- const __m128i cospi26 = _mm_set1_epi32(cospi[26]);
- const __m128i cospi38 = _mm_set1_epi32(cospi[38]);
- const __m128i cospi34 = _mm_set1_epi32(cospi[34]);
- const __m128i cospi30 = _mm_set1_epi32(cospi[30]);
- const __m128i cospi42 = _mm_set1_epi32(cospi[42]);
- const __m128i cospi22 = _mm_set1_epi32(cospi[22]);
- const __m128i cospi50 = _mm_set1_epi32(cospi[50]);
- const __m128i cospi14 = _mm_set1_epi32(cospi[14]);
- const __m128i cospi58 = _mm_set1_epi32(cospi[58]);
- const __m128i cospi6 = _mm_set1_epi32(cospi[6]);
- const __m128i cospi8 = _mm_set1_epi32(cospi[8]);
- const __m128i cospi56 = _mm_set1_epi32(cospi[56]);
- const __m128i cospi40 = _mm_set1_epi32(cospi[40]);
- const __m128i cospi24 = _mm_set1_epi32(cospi[24]);
- const __m128i cospim56 = _mm_set1_epi32(-cospi[56]);
- const __m128i cospim24 = _mm_set1_epi32(-cospi[24]);
- const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
- const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
- const __m128i cospim48 = _mm_set1_epi32(-cospi[48]);
- const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
- const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
- const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1)));
- const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1);
- __m128i u[16], x, y;
-
- // Calculate the column 0, 1, 2, 3
- {
- // stage 0
- // stage 1
- // stage 2
- __m128i zero = _mm_setzero_si128();
- x = _mm_mullo_epi32(in[0], cospi62);
- u[0] = _mm_add_epi32(x, rnding);
- u[0] = _mm_srai_epi32(u[0], bit);
-
- x = _mm_mullo_epi32(in[0], cospi2);
- u[1] = _mm_sub_epi32(zero, x);
- u[1] = _mm_add_epi32(u[1], rnding);
- u[1] = _mm_srai_epi32(u[1], bit);
-
- x = _mm_mullo_epi32(in[2], cospi54);
- u[2] = _mm_add_epi32(x, rnding);
- u[2] = _mm_srai_epi32(u[2], bit);
-
- x = _mm_mullo_epi32(in[2], cospi10);
- u[3] = _mm_sub_epi32(zero, x);
- u[3] = _mm_add_epi32(u[3], rnding);
- u[3] = _mm_srai_epi32(u[3], bit);
-
- x = _mm_mullo_epi32(in[4], cospi46);
- u[4] = _mm_add_epi32(x, rnding);
- u[4] = _mm_srai_epi32(u[4], bit);
-
- x = _mm_mullo_epi32(in[4], cospi18);
- u[5] = _mm_sub_epi32(zero, x);
- u[5] = _mm_add_epi32(u[5], rnding);
- u[5] = _mm_srai_epi32(u[5], bit);
-
- x = _mm_mullo_epi32(in[6], cospi38);
- u[6] = _mm_add_epi32(x, rnding);
- u[6] = _mm_srai_epi32(u[6], bit);
-
- x = _mm_mullo_epi32(in[6], cospi26);
- u[7] = _mm_sub_epi32(zero, x);
- u[7] = _mm_add_epi32(u[7], rnding);
- u[7] = _mm_srai_epi32(u[7], bit);
-
- u[8] = _mm_mullo_epi32(in[7], cospi34);
- u[8] = _mm_add_epi32(u[8], rnding);
- u[8] = _mm_srai_epi32(u[8], bit);
-
- u[9] = _mm_mullo_epi32(in[7], cospi30);
- u[9] = _mm_add_epi32(u[9], rnding);
- u[9] = _mm_srai_epi32(u[9], bit);
-
- u[10] = _mm_mullo_epi32(in[5], cospi42);
- u[10] = _mm_add_epi32(u[10], rnding);
- u[10] = _mm_srai_epi32(u[10], bit);
-
- u[11] = _mm_mullo_epi32(in[5], cospi22);
- u[11] = _mm_add_epi32(u[11], rnding);
- u[11] = _mm_srai_epi32(u[11], bit);
-
- u[12] = _mm_mullo_epi32(in[3], cospi50);
- u[12] = _mm_add_epi32(u[12], rnding);
- u[12] = _mm_srai_epi32(u[12], bit);
-
- u[13] = _mm_mullo_epi32(in[3], cospi14);
- u[13] = _mm_add_epi32(u[13], rnding);
- u[13] = _mm_srai_epi32(u[13], bit);
-
- u[14] = _mm_mullo_epi32(in[1], cospi58);
- u[14] = _mm_add_epi32(u[14], rnding);
- u[14] = _mm_srai_epi32(u[14], bit);
-
- u[15] = _mm_mullo_epi32(in[1], cospi6);
- u[15] = _mm_add_epi32(u[15], rnding);
- u[15] = _mm_srai_epi32(u[15], bit);
-
- // stage 3
- addsub_sse4_1(u[0], u[8], &u[0], &u[8], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[1], u[9], &u[1], &u[9], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[2], u[10], &u[2], &u[10], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[3], u[11], &u[3], &u[11], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[4], u[12], &u[4], &u[12], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[5], u[13], &u[5], &u[13], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[6], u[14], &u[6], &u[14], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[7], u[15], &u[7], &u[15], &clamp_lo, &clamp_hi);
-
- // stage 4
- y = _mm_mullo_epi32(u[8], cospi56);
- x = _mm_mullo_epi32(u[9], cospi56);
- u[8] = _mm_mullo_epi32(u[8], cospi8);
- u[8] = _mm_add_epi32(u[8], x);
- u[8] = _mm_add_epi32(u[8], rnding);
- u[8] = _mm_srai_epi32(u[8], bit);
-
- x = _mm_mullo_epi32(u[9], cospi8);
- u[9] = _mm_sub_epi32(y, x);
- u[9] = _mm_add_epi32(u[9], rnding);
- u[9] = _mm_srai_epi32(u[9], bit);
-
- x = _mm_mullo_epi32(u[11], cospi24);
- y = _mm_mullo_epi32(u[10], cospi24);
- u[10] = _mm_mullo_epi32(u[10], cospi40);
- u[10] = _mm_add_epi32(u[10], x);
- u[10] = _mm_add_epi32(u[10], rnding);
- u[10] = _mm_srai_epi32(u[10], bit);
-
- x = _mm_mullo_epi32(u[11], cospi40);
- u[11] = _mm_sub_epi32(y, x);
- u[11] = _mm_add_epi32(u[11], rnding);
- u[11] = _mm_srai_epi32(u[11], bit);
-
- x = _mm_mullo_epi32(u[13], cospi8);
- y = _mm_mullo_epi32(u[12], cospi8);
- u[12] = _mm_mullo_epi32(u[12], cospim56);
- u[12] = _mm_add_epi32(u[12], x);
- u[12] = _mm_add_epi32(u[12], rnding);
- u[12] = _mm_srai_epi32(u[12], bit);
-
- x = _mm_mullo_epi32(u[13], cospim56);
- u[13] = _mm_sub_epi32(y, x);
- u[13] = _mm_add_epi32(u[13], rnding);
- u[13] = _mm_srai_epi32(u[13], bit);
-
- x = _mm_mullo_epi32(u[15], cospi40);
- y = _mm_mullo_epi32(u[14], cospi40);
- u[14] = _mm_mullo_epi32(u[14], cospim24);
- u[14] = _mm_add_epi32(u[14], x);
- u[14] = _mm_add_epi32(u[14], rnding);
- u[14] = _mm_srai_epi32(u[14], bit);
-
- x = _mm_mullo_epi32(u[15], cospim24);
- u[15] = _mm_sub_epi32(y, x);
- u[15] = _mm_add_epi32(u[15], rnding);
- u[15] = _mm_srai_epi32(u[15], bit);
-
- // stage 5
- addsub_sse4_1(u[0], u[4], &u[0], &u[4], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[1], u[5], &u[1], &u[5], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[2], u[6], &u[2], &u[6], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[3], u[7], &u[3], &u[7], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[8], u[12], &u[8], &u[12], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[9], u[13], &u[9], &u[13], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[10], u[14], &u[10], &u[14], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[11], u[15], &u[11], &u[15], &clamp_lo, &clamp_hi);
-
- // stage 6
- x = _mm_mullo_epi32(u[5], cospi48);
- y = _mm_mullo_epi32(u[4], cospi48);
- u[4] = _mm_mullo_epi32(u[4], cospi16);
- u[4] = _mm_add_epi32(u[4], x);
- u[4] = _mm_add_epi32(u[4], rnding);
- u[4] = _mm_srai_epi32(u[4], bit);
-
- x = _mm_mullo_epi32(u[5], cospi16);
- u[5] = _mm_sub_epi32(y, x);
- u[5] = _mm_add_epi32(u[5], rnding);
- u[5] = _mm_srai_epi32(u[5], bit);
-
- x = _mm_mullo_epi32(u[7], cospi16);
- y = _mm_mullo_epi32(u[6], cospi16);
- u[6] = _mm_mullo_epi32(u[6], cospim48);
- u[6] = _mm_add_epi32(u[6], x);
- u[6] = _mm_add_epi32(u[6], rnding);
- u[6] = _mm_srai_epi32(u[6], bit);
-
- x = _mm_mullo_epi32(u[7], cospim48);
- u[7] = _mm_sub_epi32(y, x);
- u[7] = _mm_add_epi32(u[7], rnding);
- u[7] = _mm_srai_epi32(u[7], bit);
-
- x = _mm_mullo_epi32(u[13], cospi48);
- y = _mm_mullo_epi32(u[12], cospi48);
- u[12] = _mm_mullo_epi32(u[12], cospi16);
- u[12] = _mm_add_epi32(u[12], x);
- u[12] = _mm_add_epi32(u[12], rnding);
- u[12] = _mm_srai_epi32(u[12], bit);
-
- x = _mm_mullo_epi32(u[13], cospi16);
- u[13] = _mm_sub_epi32(y, x);
- u[13] = _mm_add_epi32(u[13], rnding);
- u[13] = _mm_srai_epi32(u[13], bit);
-
- x = _mm_mullo_epi32(u[15], cospi16);
- y = _mm_mullo_epi32(u[14], cospi16);
- u[14] = _mm_mullo_epi32(u[14], cospim48);
- u[14] = _mm_add_epi32(u[14], x);
- u[14] = _mm_add_epi32(u[14], rnding);
- u[14] = _mm_srai_epi32(u[14], bit);
-
- x = _mm_mullo_epi32(u[15], cospim48);
- u[15] = _mm_sub_epi32(y, x);
- u[15] = _mm_add_epi32(u[15], rnding);
- u[15] = _mm_srai_epi32(u[15], bit);
-
- // stage 7
- addsub_sse4_1(u[0], u[2], &u[0], &u[2], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[1], u[3], &u[1], &u[3], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[4], u[6], &u[4], &u[6], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[5], u[7], &u[5], &u[7], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[8], u[10], &u[8], &u[10], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[9], u[11], &u[9], &u[11], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[12], u[14], &u[12], &u[14], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[13], u[15], &u[13], &u[15], &clamp_lo, &clamp_hi);
-
- // stage 8
- y = _mm_mullo_epi32(u[2], cospi32);
- x = _mm_mullo_epi32(u[3], cospi32);
- u[2] = _mm_add_epi32(y, x);
- u[2] = _mm_add_epi32(u[2], rnding);
- u[2] = _mm_srai_epi32(u[2], bit);
-
- u[3] = _mm_sub_epi32(y, x);
- u[3] = _mm_add_epi32(u[3], rnding);
- u[3] = _mm_srai_epi32(u[3], bit);
- y = _mm_mullo_epi32(u[6], cospi32);
- x = _mm_mullo_epi32(u[7], cospi32);
- u[6] = _mm_add_epi32(y, x);
- u[6] = _mm_add_epi32(u[6], rnding);
- u[6] = _mm_srai_epi32(u[6], bit);
-
- u[7] = _mm_sub_epi32(y, x);
- u[7] = _mm_add_epi32(u[7], rnding);
- u[7] = _mm_srai_epi32(u[7], bit);
-
- y = _mm_mullo_epi32(u[10], cospi32);
- x = _mm_mullo_epi32(u[11], cospi32);
- u[10] = _mm_add_epi32(y, x);
- u[10] = _mm_add_epi32(u[10], rnding);
- u[10] = _mm_srai_epi32(u[10], bit);
-
- u[11] = _mm_sub_epi32(y, x);
- u[11] = _mm_add_epi32(u[11], rnding);
- u[11] = _mm_srai_epi32(u[11], bit);
-
- y = _mm_mullo_epi32(u[14], cospi32);
- x = _mm_mullo_epi32(u[15], cospi32);
- u[14] = _mm_add_epi32(y, x);
- u[14] = _mm_add_epi32(u[14], rnding);
- u[14] = _mm_srai_epi32(u[14], bit);
-
- u[15] = _mm_sub_epi32(y, x);
- u[15] = _mm_add_epi32(u[15], rnding);
- u[15] = _mm_srai_epi32(u[15], bit);
-
- // stage 9
- if (do_cols) {
- out[0] = u[0];
- out[1] = _mm_sub_epi32(_mm_setzero_si128(), u[8]);
- out[2] = u[12];
- out[3] = _mm_sub_epi32(_mm_setzero_si128(), u[4]);
- out[4] = u[6];
- out[5] = _mm_sub_epi32(_mm_setzero_si128(), u[14]);
- out[6] = u[10];
- out[7] = _mm_sub_epi32(_mm_setzero_si128(), u[2]);
- out[8] = u[3];
- out[9] = _mm_sub_epi32(_mm_setzero_si128(), u[11]);
- out[10] = u[15];
- out[11] = _mm_sub_epi32(_mm_setzero_si128(), u[7]);
- out[12] = u[5];
- out[13] = _mm_sub_epi32(_mm_setzero_si128(), u[13]);
- out[14] = u[9];
- out[15] = _mm_sub_epi32(_mm_setzero_si128(), u[1]);
- } else {
- const int log_range_out = AOMMAX(16, bd + 6);
- const __m128i clamp_lo_out = _mm_set1_epi32(-(1 << (log_range_out - 1)));
- const __m128i clamp_hi_out =
- _mm_set1_epi32((1 << (log_range_out - 1)) - 1);
-
- neg_shift_sse4_1(u[0], u[8], out + 0, out + 1, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- neg_shift_sse4_1(u[12], u[4], out + 2, out + 3, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- neg_shift_sse4_1(u[6], u[14], out + 4, out + 5, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- neg_shift_sse4_1(u[10], u[2], out + 6, out + 7, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- neg_shift_sse4_1(u[3], u[11], out + 8, out + 9, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- neg_shift_sse4_1(u[15], u[7], out + 10, out + 11, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- neg_shift_sse4_1(u[5], u[13], out + 12, out + 13, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- neg_shift_sse4_1(u[9], u[1], out + 14, out + 15, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- }
- }
-}
-
-static void idct16x16_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
- int bd, int out_shift) {
- const int32_t *cospi = cospi_arr(bit);
- const __m128i cospi60 = _mm_set1_epi32(cospi[60]);
- const __m128i cospim4 = _mm_set1_epi32(-cospi[4]);
- const __m128i cospi28 = _mm_set1_epi32(cospi[28]);
- const __m128i cospim36 = _mm_set1_epi32(-cospi[36]);
- const __m128i cospi44 = _mm_set1_epi32(cospi[44]);
- const __m128i cospi20 = _mm_set1_epi32(cospi[20]);
- const __m128i cospim20 = _mm_set1_epi32(-cospi[20]);
- const __m128i cospi12 = _mm_set1_epi32(cospi[12]);
- const __m128i cospim52 = _mm_set1_epi32(-cospi[52]);
- const __m128i cospi52 = _mm_set1_epi32(cospi[52]);
- const __m128i cospi36 = _mm_set1_epi32(cospi[36]);
- const __m128i cospi4 = _mm_set1_epi32(cospi[4]);
- const __m128i cospi56 = _mm_set1_epi32(cospi[56]);
- const __m128i cospim8 = _mm_set1_epi32(-cospi[8]);
- const __m128i cospi24 = _mm_set1_epi32(cospi[24]);
- const __m128i cospim40 = _mm_set1_epi32(-cospi[40]);
- const __m128i cospi40 = _mm_set1_epi32(cospi[40]);
- const __m128i cospi8 = _mm_set1_epi32(cospi[8]);
- const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
- const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
- const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
- const __m128i cospim16 = _mm_set1_epi32(-cospi[16]);
- const __m128i cospim48 = _mm_set1_epi32(-cospi[48]);
- const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
- const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1)));
- const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1);
- __m128i u[16], v[16], x, y;
-
- {
- // stage 0
- // stage 1
- u[0] = in[0];
- u[1] = in[8];
- u[2] = in[4];
- u[3] = in[12];
- u[4] = in[2];
- u[5] = in[10];
- u[6] = in[6];
- u[7] = in[14];
- u[8] = in[1];
- u[9] = in[9];
- u[10] = in[5];
- u[11] = in[13];
- u[12] = in[3];
- u[13] = in[11];
- u[14] = in[7];
- u[15] = in[15];
-
- // stage 2
- v[0] = u[0];
- v[1] = u[1];
- v[2] = u[2];
- v[3] = u[3];
- v[4] = u[4];
- v[5] = u[5];
- v[6] = u[6];
- v[7] = u[7];
-
- v[8] = half_btf_sse4_1(&cospi60, &u[8], &cospim4, &u[15], &rnding, bit);
- v[9] = half_btf_sse4_1(&cospi28, &u[9], &cospim36, &u[14], &rnding, bit);
- v[10] = half_btf_sse4_1(&cospi44, &u[10], &cospim20, &u[13], &rnding, bit);
- v[11] = half_btf_sse4_1(&cospi12, &u[11], &cospim52, &u[12], &rnding, bit);
- v[12] = half_btf_sse4_1(&cospi52, &u[11], &cospi12, &u[12], &rnding, bit);
- v[13] = half_btf_sse4_1(&cospi20, &u[10], &cospi44, &u[13], &rnding, bit);
- v[14] = half_btf_sse4_1(&cospi36, &u[9], &cospi28, &u[14], &rnding, bit);
- v[15] = half_btf_sse4_1(&cospi4, &u[8], &cospi60, &u[15], &rnding, bit);
-
- // stage 3
- u[0] = v[0];
- u[1] = v[1];
- u[2] = v[2];
- u[3] = v[3];
- u[4] = half_btf_sse4_1(&cospi56, &v[4], &cospim8, &v[7], &rnding, bit);
- u[5] = half_btf_sse4_1(&cospi24, &v[5], &cospim40, &v[6], &rnding, bit);
- u[6] = half_btf_sse4_1(&cospi40, &v[5], &cospi24, &v[6], &rnding, bit);
- u[7] = half_btf_sse4_1(&cospi8, &v[4], &cospi56, &v[7], &rnding, bit);
- addsub_sse4_1(v[8], v[9], &u[8], &u[9], &clamp_lo, &clamp_hi);
- addsub_sse4_1(v[11], v[10], &u[11], &u[10], &clamp_lo, &clamp_hi);
- addsub_sse4_1(v[12], v[13], &u[12], &u[13], &clamp_lo, &clamp_hi);
- addsub_sse4_1(v[15], v[14], &u[15], &u[14], &clamp_lo, &clamp_hi);
-
- // stage 4
- x = _mm_mullo_epi32(u[0], cospi32);
- y = _mm_mullo_epi32(u[1], cospi32);
- v[0] = _mm_add_epi32(x, y);
- v[0] = _mm_add_epi32(v[0], rnding);
- v[0] = _mm_srai_epi32(v[0], bit);
-
- v[1] = _mm_sub_epi32(x, y);
- v[1] = _mm_add_epi32(v[1], rnding);
- v[1] = _mm_srai_epi32(v[1], bit);
-
- v[2] = half_btf_sse4_1(&cospi48, &u[2], &cospim16, &u[3], &rnding, bit);
- v[3] = half_btf_sse4_1(&cospi16, &u[2], &cospi48, &u[3], &rnding, bit);
- addsub_sse4_1(u[4], u[5], &v[4], &v[5], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[7], u[6], &v[7], &v[6], &clamp_lo, &clamp_hi);
- v[8] = u[8];
- v[9] = half_btf_sse4_1(&cospim16, &u[9], &cospi48, &u[14], &rnding, bit);
- v[10] = half_btf_sse4_1(&cospim48, &u[10], &cospim16, &u[13], &rnding, bit);
- v[11] = u[11];
- v[12] = u[12];
- v[13] = half_btf_sse4_1(&cospim16, &u[10], &cospi48, &u[13], &rnding, bit);
- v[14] = half_btf_sse4_1(&cospi48, &u[9], &cospi16, &u[14], &rnding, bit);
- v[15] = u[15];
-
- // stage 5
- addsub_sse4_1(v[0], v[3], &u[0], &u[3], &clamp_lo, &clamp_hi);
- addsub_sse4_1(v[1], v[2], &u[1], &u[2], &clamp_lo, &clamp_hi);
- u[4] = v[4];
-
- x = _mm_mullo_epi32(v[5], cospi32);
- y = _mm_mullo_epi32(v[6], cospi32);
- u[5] = _mm_sub_epi32(y, x);
- u[5] = _mm_add_epi32(u[5], rnding);
- u[5] = _mm_srai_epi32(u[5], bit);
-
- u[6] = _mm_add_epi32(y, x);
- u[6] = _mm_add_epi32(u[6], rnding);
- u[6] = _mm_srai_epi32(u[6], bit);
-
- u[7] = v[7];
- addsub_sse4_1(v[8], v[11], &u[8], &u[11], &clamp_lo, &clamp_hi);
- addsub_sse4_1(v[9], v[10], &u[9], &u[10], &clamp_lo, &clamp_hi);
- addsub_sse4_1(v[15], v[12], &u[15], &u[12], &clamp_lo, &clamp_hi);
- addsub_sse4_1(v[14], v[13], &u[14], &u[13], &clamp_lo, &clamp_hi);
-
- // stage 6
- addsub_sse4_1(u[0], u[7], &v[0], &v[7], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[1], u[6], &v[1], &v[6], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[2], u[5], &v[2], &v[5], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[3], u[4], &v[3], &v[4], &clamp_lo, &clamp_hi);
- v[8] = u[8];
- v[9] = u[9];
-
- x = _mm_mullo_epi32(u[10], cospi32);
- y = _mm_mullo_epi32(u[13], cospi32);
- v[10] = _mm_sub_epi32(y, x);
- v[10] = _mm_add_epi32(v[10], rnding);
- v[10] = _mm_srai_epi32(v[10], bit);
-
- v[13] = _mm_add_epi32(x, y);
- v[13] = _mm_add_epi32(v[13], rnding);
- v[13] = _mm_srai_epi32(v[13], bit);
-
- x = _mm_mullo_epi32(u[11], cospi32);
- y = _mm_mullo_epi32(u[12], cospi32);
- v[11] = _mm_sub_epi32(y, x);
- v[11] = _mm_add_epi32(v[11], rnding);
- v[11] = _mm_srai_epi32(v[11], bit);
-
- v[12] = _mm_add_epi32(x, y);
- v[12] = _mm_add_epi32(v[12], rnding);
- v[12] = _mm_srai_epi32(v[12], bit);
-
- v[14] = u[14];
- v[15] = u[15];
-
- // stage 7
- if (do_cols) {
- addsub_no_clamp_sse4_1(v[0], v[15], out + 0, out + 15);
- addsub_no_clamp_sse4_1(v[1], v[14], out + 1, out + 14);
- addsub_no_clamp_sse4_1(v[2], v[13], out + 2, out + 13);
- addsub_no_clamp_sse4_1(v[3], v[12], out + 3, out + 12);
- addsub_no_clamp_sse4_1(v[4], v[11], out + 4, out + 11);
- addsub_no_clamp_sse4_1(v[5], v[10], out + 5, out + 10);
- addsub_no_clamp_sse4_1(v[6], v[9], out + 6, out + 9);
- addsub_no_clamp_sse4_1(v[7], v[8], out + 7, out + 8);
- } else {
- const int log_range_out = AOMMAX(16, bd + 6);
- const __m128i clamp_lo_out = _mm_set1_epi32(AOMMAX(
- -(1 << (log_range_out - 1)), -(1 << (log_range - 1 - out_shift))));
- const __m128i clamp_hi_out = _mm_set1_epi32(AOMMIN(
- (1 << (log_range_out - 1)) - 1, (1 << (log_range - 1 - out_shift))));
-
- addsub_shift_sse4_1(v[0], v[15], out + 0, out + 15, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_sse4_1(v[1], v[14], out + 1, out + 14, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_sse4_1(v[2], v[13], out + 2, out + 13, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_sse4_1(v[3], v[12], out + 3, out + 12, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_sse4_1(v[4], v[11], out + 4, out + 11, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_sse4_1(v[5], v[10], out + 5, out + 10, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_sse4_1(v[6], v[9], out + 6, out + 9, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_sse4_1(v[7], v[8], out + 7, out + 8, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- }
- }
-}
-
-static void iadst16x16_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
- int bd, int out_shift) {
- const int32_t *cospi = cospi_arr(bit);
- const __m128i cospi2 = _mm_set1_epi32(cospi[2]);
- const __m128i cospi62 = _mm_set1_epi32(cospi[62]);
- const __m128i cospi10 = _mm_set1_epi32(cospi[10]);
- const __m128i cospi54 = _mm_set1_epi32(cospi[54]);
- const __m128i cospi18 = _mm_set1_epi32(cospi[18]);
- const __m128i cospi46 = _mm_set1_epi32(cospi[46]);
- const __m128i cospi26 = _mm_set1_epi32(cospi[26]);
- const __m128i cospi38 = _mm_set1_epi32(cospi[38]);
- const __m128i cospi34 = _mm_set1_epi32(cospi[34]);
- const __m128i cospi30 = _mm_set1_epi32(cospi[30]);
- const __m128i cospi42 = _mm_set1_epi32(cospi[42]);
- const __m128i cospi22 = _mm_set1_epi32(cospi[22]);
- const __m128i cospi50 = _mm_set1_epi32(cospi[50]);
- const __m128i cospi14 = _mm_set1_epi32(cospi[14]);
- const __m128i cospi58 = _mm_set1_epi32(cospi[58]);
- const __m128i cospi6 = _mm_set1_epi32(cospi[6]);
- const __m128i cospi8 = _mm_set1_epi32(cospi[8]);
- const __m128i cospi56 = _mm_set1_epi32(cospi[56]);
- const __m128i cospi40 = _mm_set1_epi32(cospi[40]);
- const __m128i cospi24 = _mm_set1_epi32(cospi[24]);
- const __m128i cospim56 = _mm_set1_epi32(-cospi[56]);
- const __m128i cospim24 = _mm_set1_epi32(-cospi[24]);
- const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
- const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
- const __m128i cospim48 = _mm_set1_epi32(-cospi[48]);
- const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
- const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
- const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1)));
- const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1);
- __m128i u[16], v[16], x, y;
-
- // Calculate the column 0, 1, 2, 3
- {
- // stage 0
- // stage 1
- // stage 2
- v[0] = _mm_mullo_epi32(in[15], cospi2);
- x = _mm_mullo_epi32(in[0], cospi62);
- v[0] = _mm_add_epi32(v[0], x);
- v[0] = _mm_add_epi32(v[0], rnding);
- v[0] = _mm_srai_epi32(v[0], bit);
-
- v[1] = _mm_mullo_epi32(in[15], cospi62);
- x = _mm_mullo_epi32(in[0], cospi2);
- v[1] = _mm_sub_epi32(v[1], x);
- v[1] = _mm_add_epi32(v[1], rnding);
- v[1] = _mm_srai_epi32(v[1], bit);
-
- v[2] = _mm_mullo_epi32(in[13], cospi10);
- x = _mm_mullo_epi32(in[2], cospi54);
- v[2] = _mm_add_epi32(v[2], x);
- v[2] = _mm_add_epi32(v[2], rnding);
- v[2] = _mm_srai_epi32(v[2], bit);
-
- v[3] = _mm_mullo_epi32(in[13], cospi54);
- x = _mm_mullo_epi32(in[2], cospi10);
- v[3] = _mm_sub_epi32(v[3], x);
- v[3] = _mm_add_epi32(v[3], rnding);
- v[3] = _mm_srai_epi32(v[3], bit);
-
- v[4] = _mm_mullo_epi32(in[11], cospi18);
- x = _mm_mullo_epi32(in[4], cospi46);
- v[4] = _mm_add_epi32(v[4], x);
- v[4] = _mm_add_epi32(v[4], rnding);
- v[4] = _mm_srai_epi32(v[4], bit);
-
- v[5] = _mm_mullo_epi32(in[11], cospi46);
- x = _mm_mullo_epi32(in[4], cospi18);
- v[5] = _mm_sub_epi32(v[5], x);
- v[5] = _mm_add_epi32(v[5], rnding);
- v[5] = _mm_srai_epi32(v[5], bit);
-
- v[6] = _mm_mullo_epi32(in[9], cospi26);
- x = _mm_mullo_epi32(in[6], cospi38);
- v[6] = _mm_add_epi32(v[6], x);
- v[6] = _mm_add_epi32(v[6], rnding);
- v[6] = _mm_srai_epi32(v[6], bit);
-
- v[7] = _mm_mullo_epi32(in[9], cospi38);
- x = _mm_mullo_epi32(in[6], cospi26);
- v[7] = _mm_sub_epi32(v[7], x);
- v[7] = _mm_add_epi32(v[7], rnding);
- v[7] = _mm_srai_epi32(v[7], bit);
-
- v[8] = _mm_mullo_epi32(in[7], cospi34);
- x = _mm_mullo_epi32(in[8], cospi30);
- v[8] = _mm_add_epi32(v[8], x);
- v[8] = _mm_add_epi32(v[8], rnding);
- v[8] = _mm_srai_epi32(v[8], bit);
-
- v[9] = _mm_mullo_epi32(in[7], cospi30);
- x = _mm_mullo_epi32(in[8], cospi34);
- v[9] = _mm_sub_epi32(v[9], x);
- v[9] = _mm_add_epi32(v[9], rnding);
- v[9] = _mm_srai_epi32(v[9], bit);
-
- v[10] = _mm_mullo_epi32(in[5], cospi42);
- x = _mm_mullo_epi32(in[10], cospi22);
- v[10] = _mm_add_epi32(v[10], x);
- v[10] = _mm_add_epi32(v[10], rnding);
- v[10] = _mm_srai_epi32(v[10], bit);
-
- v[11] = _mm_mullo_epi32(in[5], cospi22);
- x = _mm_mullo_epi32(in[10], cospi42);
- v[11] = _mm_sub_epi32(v[11], x);
- v[11] = _mm_add_epi32(v[11], rnding);
- v[11] = _mm_srai_epi32(v[11], bit);
-
- v[12] = _mm_mullo_epi32(in[3], cospi50);
- x = _mm_mullo_epi32(in[12], cospi14);
- v[12] = _mm_add_epi32(v[12], x);
- v[12] = _mm_add_epi32(v[12], rnding);
- v[12] = _mm_srai_epi32(v[12], bit);
-
- v[13] = _mm_mullo_epi32(in[3], cospi14);
- x = _mm_mullo_epi32(in[12], cospi50);
- v[13] = _mm_sub_epi32(v[13], x);
- v[13] = _mm_add_epi32(v[13], rnding);
- v[13] = _mm_srai_epi32(v[13], bit);
-
- v[14] = _mm_mullo_epi32(in[1], cospi58);
- x = _mm_mullo_epi32(in[14], cospi6);
- v[14] = _mm_add_epi32(v[14], x);
- v[14] = _mm_add_epi32(v[14], rnding);
- v[14] = _mm_srai_epi32(v[14], bit);
-
- v[15] = _mm_mullo_epi32(in[1], cospi6);
- x = _mm_mullo_epi32(in[14], cospi58);
- v[15] = _mm_sub_epi32(v[15], x);
- v[15] = _mm_add_epi32(v[15], rnding);
- v[15] = _mm_srai_epi32(v[15], bit);
-
- // stage 3
- addsub_sse4_1(v[0], v[8], &u[0], &u[8], &clamp_lo, &clamp_hi);
- addsub_sse4_1(v[1], v[9], &u[1], &u[9], &clamp_lo, &clamp_hi);
- addsub_sse4_1(v[2], v[10], &u[2], &u[10], &clamp_lo, &clamp_hi);
- addsub_sse4_1(v[3], v[11], &u[3], &u[11], &clamp_lo, &clamp_hi);
- addsub_sse4_1(v[4], v[12], &u[4], &u[12], &clamp_lo, &clamp_hi);
- addsub_sse4_1(v[5], v[13], &u[5], &u[13], &clamp_lo, &clamp_hi);
- addsub_sse4_1(v[6], v[14], &u[6], &u[14], &clamp_lo, &clamp_hi);
- addsub_sse4_1(v[7], v[15], &u[7], &u[15], &clamp_lo, &clamp_hi);
-
- // stage 4
- v[0] = u[0];
- v[1] = u[1];
- v[2] = u[2];
- v[3] = u[3];
- v[4] = u[4];
- v[5] = u[5];
- v[6] = u[6];
- v[7] = u[7];
-
- v[8] = _mm_mullo_epi32(u[8], cospi8);
- x = _mm_mullo_epi32(u[9], cospi56);
- v[8] = _mm_add_epi32(v[8], x);
- v[8] = _mm_add_epi32(v[8], rnding);
- v[8] = _mm_srai_epi32(v[8], bit);
-
- v[9] = _mm_mullo_epi32(u[8], cospi56);
- x = _mm_mullo_epi32(u[9], cospi8);
- v[9] = _mm_sub_epi32(v[9], x);
- v[9] = _mm_add_epi32(v[9], rnding);
- v[9] = _mm_srai_epi32(v[9], bit);
-
- v[10] = _mm_mullo_epi32(u[10], cospi40);
- x = _mm_mullo_epi32(u[11], cospi24);
- v[10] = _mm_add_epi32(v[10], x);
- v[10] = _mm_add_epi32(v[10], rnding);
- v[10] = _mm_srai_epi32(v[10], bit);
-
- v[11] = _mm_mullo_epi32(u[10], cospi24);
- x = _mm_mullo_epi32(u[11], cospi40);
- v[11] = _mm_sub_epi32(v[11], x);
- v[11] = _mm_add_epi32(v[11], rnding);
- v[11] = _mm_srai_epi32(v[11], bit);
-
- v[12] = _mm_mullo_epi32(u[12], cospim56);
- x = _mm_mullo_epi32(u[13], cospi8);
- v[12] = _mm_add_epi32(v[12], x);
- v[12] = _mm_add_epi32(v[12], rnding);
- v[12] = _mm_srai_epi32(v[12], bit);
-
- v[13] = _mm_mullo_epi32(u[12], cospi8);
- x = _mm_mullo_epi32(u[13], cospim56);
- v[13] = _mm_sub_epi32(v[13], x);
- v[13] = _mm_add_epi32(v[13], rnding);
- v[13] = _mm_srai_epi32(v[13], bit);
-
- v[14] = _mm_mullo_epi32(u[14], cospim24);
- x = _mm_mullo_epi32(u[15], cospi40);
- v[14] = _mm_add_epi32(v[14], x);
- v[14] = _mm_add_epi32(v[14], rnding);
- v[14] = _mm_srai_epi32(v[14], bit);
-
- v[15] = _mm_mullo_epi32(u[14], cospi40);
- x = _mm_mullo_epi32(u[15], cospim24);
- v[15] = _mm_sub_epi32(v[15], x);
- v[15] = _mm_add_epi32(v[15], rnding);
- v[15] = _mm_srai_epi32(v[15], bit);
-
- // stage 5
- addsub_sse4_1(v[0], v[4], &u[0], &u[4], &clamp_lo, &clamp_hi);
- addsub_sse4_1(v[1], v[5], &u[1], &u[5], &clamp_lo, &clamp_hi);
- addsub_sse4_1(v[2], v[6], &u[2], &u[6], &clamp_lo, &clamp_hi);
- addsub_sse4_1(v[3], v[7], &u[3], &u[7], &clamp_lo, &clamp_hi);
- addsub_sse4_1(v[8], v[12], &u[8], &u[12], &clamp_lo, &clamp_hi);
- addsub_sse4_1(v[9], v[13], &u[9], &u[13], &clamp_lo, &clamp_hi);
- addsub_sse4_1(v[10], v[14], &u[10], &u[14], &clamp_lo, &clamp_hi);
- addsub_sse4_1(v[11], v[15], &u[11], &u[15], &clamp_lo, &clamp_hi);
-
- // stage 6
- v[0] = u[0];
- v[1] = u[1];
- v[2] = u[2];
- v[3] = u[3];
-
- v[4] = _mm_mullo_epi32(u[4], cospi16);
- x = _mm_mullo_epi32(u[5], cospi48);
- v[4] = _mm_add_epi32(v[4], x);
- v[4] = _mm_add_epi32(v[4], rnding);
- v[4] = _mm_srai_epi32(v[4], bit);
-
- v[5] = _mm_mullo_epi32(u[4], cospi48);
- x = _mm_mullo_epi32(u[5], cospi16);
- v[5] = _mm_sub_epi32(v[5], x);
- v[5] = _mm_add_epi32(v[5], rnding);
- v[5] = _mm_srai_epi32(v[5], bit);
-
- v[6] = _mm_mullo_epi32(u[6], cospim48);
- x = _mm_mullo_epi32(u[7], cospi16);
- v[6] = _mm_add_epi32(v[6], x);
- v[6] = _mm_add_epi32(v[6], rnding);
- v[6] = _mm_srai_epi32(v[6], bit);
-
- v[7] = _mm_mullo_epi32(u[6], cospi16);
- x = _mm_mullo_epi32(u[7], cospim48);
- v[7] = _mm_sub_epi32(v[7], x);
- v[7] = _mm_add_epi32(v[7], rnding);
- v[7] = _mm_srai_epi32(v[7], bit);
-
- v[8] = u[8];
- v[9] = u[9];
- v[10] = u[10];
- v[11] = u[11];
-
- v[12] = _mm_mullo_epi32(u[12], cospi16);
- x = _mm_mullo_epi32(u[13], cospi48);
- v[12] = _mm_add_epi32(v[12], x);
- v[12] = _mm_add_epi32(v[12], rnding);
- v[12] = _mm_srai_epi32(v[12], bit);
-
- v[13] = _mm_mullo_epi32(u[12], cospi48);
- x = _mm_mullo_epi32(u[13], cospi16);
- v[13] = _mm_sub_epi32(v[13], x);
- v[13] = _mm_add_epi32(v[13], rnding);
- v[13] = _mm_srai_epi32(v[13], bit);
-
- v[14] = _mm_mullo_epi32(u[14], cospim48);
- x = _mm_mullo_epi32(u[15], cospi16);
- v[14] = _mm_add_epi32(v[14], x);
- v[14] = _mm_add_epi32(v[14], rnding);
- v[14] = _mm_srai_epi32(v[14], bit);
-
- v[15] = _mm_mullo_epi32(u[14], cospi16);
- x = _mm_mullo_epi32(u[15], cospim48);
- v[15] = _mm_sub_epi32(v[15], x);
- v[15] = _mm_add_epi32(v[15], rnding);
- v[15] = _mm_srai_epi32(v[15], bit);
-
- // stage 7
- addsub_sse4_1(v[0], v[2], &u[0], &u[2], &clamp_lo, &clamp_hi);
- addsub_sse4_1(v[1], v[3], &u[1], &u[3], &clamp_lo, &clamp_hi);
- addsub_sse4_1(v[4], v[6], &u[4], &u[6], &clamp_lo, &clamp_hi);
- addsub_sse4_1(v[5], v[7], &u[5], &u[7], &clamp_lo, &clamp_hi);
- addsub_sse4_1(v[8], v[10], &u[8], &u[10], &clamp_lo, &clamp_hi);
- addsub_sse4_1(v[9], v[11], &u[9], &u[11], &clamp_lo, &clamp_hi);
- addsub_sse4_1(v[12], v[14], &u[12], &u[14], &clamp_lo, &clamp_hi);
- addsub_sse4_1(v[13], v[15], &u[13], &u[15], &clamp_lo, &clamp_hi);
-
- // stage 8
- v[0] = u[0];
- v[1] = u[1];
-
- y = _mm_mullo_epi32(u[2], cospi32);
- x = _mm_mullo_epi32(u[3], cospi32);
- v[2] = _mm_add_epi32(y, x);
- v[2] = _mm_add_epi32(v[2], rnding);
- v[2] = _mm_srai_epi32(v[2], bit);
-
- v[3] = _mm_sub_epi32(y, x);
- v[3] = _mm_add_epi32(v[3], rnding);
- v[3] = _mm_srai_epi32(v[3], bit);
-
- v[4] = u[4];
- v[5] = u[5];
-
- y = _mm_mullo_epi32(u[6], cospi32);
- x = _mm_mullo_epi32(u[7], cospi32);
- v[6] = _mm_add_epi32(y, x);
- v[6] = _mm_add_epi32(v[6], rnding);
- v[6] = _mm_srai_epi32(v[6], bit);
-
- v[7] = _mm_sub_epi32(y, x);
- v[7] = _mm_add_epi32(v[7], rnding);
- v[7] = _mm_srai_epi32(v[7], bit);
-
- v[8] = u[8];
- v[9] = u[9];
-
- y = _mm_mullo_epi32(u[10], cospi32);
- x = _mm_mullo_epi32(u[11], cospi32);
- v[10] = _mm_add_epi32(y, x);
- v[10] = _mm_add_epi32(v[10], rnding);
- v[10] = _mm_srai_epi32(v[10], bit);
-
- v[11] = _mm_sub_epi32(y, x);
- v[11] = _mm_add_epi32(v[11], rnding);
- v[11] = _mm_srai_epi32(v[11], bit);
-
- v[12] = u[12];
- v[13] = u[13];
-
- y = _mm_mullo_epi32(u[14], cospi32);
- x = _mm_mullo_epi32(u[15], cospi32);
- v[14] = _mm_add_epi32(y, x);
- v[14] = _mm_add_epi32(v[14], rnding);
- v[14] = _mm_srai_epi32(v[14], bit);
-
- v[15] = _mm_sub_epi32(y, x);
- v[15] = _mm_add_epi32(v[15], rnding);
- v[15] = _mm_srai_epi32(v[15], bit);
-
- // stage 9
- if (do_cols) {
- out[0] = v[0];
- out[1] = _mm_sub_epi32(_mm_setzero_si128(), v[8]);
- out[2] = v[12];
- out[3] = _mm_sub_epi32(_mm_setzero_si128(), v[4]);
- out[4] = v[6];
- out[5] = _mm_sub_epi32(_mm_setzero_si128(), v[14]);
- out[6] = v[10];
- out[7] = _mm_sub_epi32(_mm_setzero_si128(), v[2]);
- out[8] = v[3];
- out[9] = _mm_sub_epi32(_mm_setzero_si128(), v[11]);
- out[10] = v[15];
- out[11] = _mm_sub_epi32(_mm_setzero_si128(), v[7]);
- out[12] = v[5];
- out[13] = _mm_sub_epi32(_mm_setzero_si128(), v[13]);
- out[14] = v[9];
- out[15] = _mm_sub_epi32(_mm_setzero_si128(), v[1]);
- } else {
- const int log_range_out = AOMMAX(16, bd + 6);
- const __m128i clamp_lo_out = _mm_set1_epi32(-(1 << (log_range_out - 1)));
- const __m128i clamp_hi_out =
- _mm_set1_epi32((1 << (log_range_out - 1)) - 1);
-
- neg_shift_sse4_1(v[0], v[8], out + 0, out + 1, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- neg_shift_sse4_1(v[12], v[4], out + 2, out + 3, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- neg_shift_sse4_1(v[6], v[14], out + 4, out + 5, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- neg_shift_sse4_1(v[10], v[2], out + 6, out + 7, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- neg_shift_sse4_1(v[3], v[11], out + 8, out + 9, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- neg_shift_sse4_1(v[15], v[7], out + 10, out + 11, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- neg_shift_sse4_1(v[5], v[13], out + 12, out + 13, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- neg_shift_sse4_1(v[9], v[1], out + 14, out + 15, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- }
- }
-}
-
-static INLINE void idct64_stage8_sse4_1(
- __m128i *u, const __m128i *cospim32, const __m128i *cospi32,
- const __m128i *cospim16, const __m128i *cospi48, const __m128i *cospi16,
- const __m128i *cospim48, const __m128i *clamp_lo, const __m128i *clamp_hi,
- const __m128i *rnding, int bit) {
- int i;
- __m128i temp1, temp2, temp3, temp4;
- temp1 = half_btf_sse4_1(cospim32, &u[10], cospi32, &u[13], rnding, bit);
- u[13] = half_btf_sse4_1(cospi32, &u[10], cospi32, &u[13], rnding, bit);
- u[10] = temp1;
- temp2 = half_btf_sse4_1(cospim32, &u[11], cospi32, &u[12], rnding, bit);
- u[12] = half_btf_sse4_1(cospi32, &u[11], cospi32, &u[12], rnding, bit);
- u[11] = temp2;
-
- for (i = 16; i < 20; ++i) {
- addsub_sse4_1(u[i], u[i ^ 7], &u[i], &u[i ^ 7], clamp_lo, clamp_hi);
- addsub_sse4_1(u[i ^ 15], u[i ^ 8], &u[i ^ 15], &u[i ^ 8], clamp_lo,
- clamp_hi);
- }
-
- temp1 = half_btf_sse4_1(cospim16, &u[36], cospi48, &u[59], rnding, bit);
- temp2 = half_btf_sse4_1(cospim16, &u[37], cospi48, &u[58], rnding, bit);
- temp3 = half_btf_sse4_1(cospim16, &u[38], cospi48, &u[57], rnding, bit);
- temp4 = half_btf_sse4_1(cospim16, &u[39], cospi48, &u[56], rnding, bit);
- u[56] = half_btf_sse4_1(cospi48, &u[39], cospi16, &u[56], rnding, bit);
- u[57] = half_btf_sse4_1(cospi48, &u[38], cospi16, &u[57], rnding, bit);
- u[58] = half_btf_sse4_1(cospi48, &u[37], cospi16, &u[58], rnding, bit);
- u[59] = half_btf_sse4_1(cospi48, &u[36], cospi16, &u[59], rnding, bit);
- u[36] = temp1;
- u[37] = temp2;
- u[38] = temp3;
- u[39] = temp4;
-
- temp1 = half_btf_sse4_1(cospim48, &u[40], cospim16, &u[55], rnding, bit);
- temp2 = half_btf_sse4_1(cospim48, &u[41], cospim16, &u[54], rnding, bit);
- temp3 = half_btf_sse4_1(cospim48, &u[42], cospim16, &u[53], rnding, bit);
- temp4 = half_btf_sse4_1(cospim48, &u[43], cospim16, &u[52], rnding, bit);
- u[52] = half_btf_sse4_1(cospim16, &u[43], cospi48, &u[52], rnding, bit);
- u[53] = half_btf_sse4_1(cospim16, &u[42], cospi48, &u[53], rnding, bit);
- u[54] = half_btf_sse4_1(cospim16, &u[41], cospi48, &u[54], rnding, bit);
- u[55] = half_btf_sse4_1(cospim16, &u[40], cospi48, &u[55], rnding, bit);
- u[40] = temp1;
- u[41] = temp2;
- u[42] = temp3;
- u[43] = temp4;
-}
-
-static INLINE void idct64_stage9_sse4_1(__m128i *u, const __m128i *cospim32,
- const __m128i *cospi32,
- const __m128i *clamp_lo,
- const __m128i *clamp_hi,
- const __m128i *rnding, int bit) {
- int i;
- __m128i temp1, temp2, temp3, temp4;
- for (i = 0; i < 8; ++i) {
- addsub_sse4_1(u[i], u[15 - i], &u[i], &u[15 - i], clamp_lo, clamp_hi);
- }
-
- temp1 = half_btf_sse4_1(cospim32, &u[20], cospi32, &u[27], rnding, bit);
- temp2 = half_btf_sse4_1(cospim32, &u[21], cospi32, &u[26], rnding, bit);
- temp3 = half_btf_sse4_1(cospim32, &u[22], cospi32, &u[25], rnding, bit);
- temp4 = half_btf_sse4_1(cospim32, &u[23], cospi32, &u[24], rnding, bit);
- u[24] = half_btf_sse4_1(cospi32, &u[23], cospi32, &u[24], rnding, bit);
- u[25] = half_btf_sse4_1(cospi32, &u[22], cospi32, &u[25], rnding, bit);
- u[26] = half_btf_sse4_1(cospi32, &u[21], cospi32, &u[26], rnding, bit);
- u[27] = half_btf_sse4_1(cospi32, &u[20], cospi32, &u[27], rnding, bit);
- u[20] = temp1;
- u[21] = temp2;
- u[22] = temp3;
- u[23] = temp4;
- for (i = 32; i < 40; i++) {
- addsub_sse4_1(u[i], u[i ^ 15], &u[i], &u[i ^ 15], clamp_lo, clamp_hi);
- }
-
- for (i = 48; i < 56; i++) {
- addsub_sse4_1(u[i ^ 15], u[i], &u[i ^ 15], &u[i], clamp_lo, clamp_hi);
- }
-}
-
-static INLINE void idct64_stage10_sse4_1(__m128i *u, const __m128i *cospim32,
- const __m128i *cospi32,
- const __m128i *clamp_lo,
- const __m128i *clamp_hi,
- const __m128i *rnding, int bit) {
- __m128i temp1, temp2, temp3, temp4;
- for (int i = 0; i < 16; i++) {
- addsub_sse4_1(u[i], u[31 - i], &u[i], &u[31 - i], clamp_lo, clamp_hi);
- }
-
- temp1 = half_btf_sse4_1(cospim32, &u[40], cospi32, &u[55], rnding, bit);
- temp2 = half_btf_sse4_1(cospim32, &u[41], cospi32, &u[54], rnding, bit);
- temp3 = half_btf_sse4_1(cospim32, &u[42], cospi32, &u[53], rnding, bit);
- temp4 = half_btf_sse4_1(cospim32, &u[43], cospi32, &u[52], rnding, bit);
- u[52] = half_btf_sse4_1(cospi32, &u[43], cospi32, &u[52], rnding, bit);
- u[53] = half_btf_sse4_1(cospi32, &u[42], cospi32, &u[53], rnding, bit);
- u[54] = half_btf_sse4_1(cospi32, &u[41], cospi32, &u[54], rnding, bit);
- u[55] = half_btf_sse4_1(cospi32, &u[40], cospi32, &u[55], rnding, bit);
- u[40] = temp1;
- u[41] = temp2;
- u[42] = temp3;
- u[43] = temp4;
-
- temp1 = half_btf_sse4_1(cospim32, &u[44], cospi32, &u[51], rnding, bit);
- temp2 = half_btf_sse4_1(cospim32, &u[45], cospi32, &u[50], rnding, bit);
- temp3 = half_btf_sse4_1(cospim32, &u[46], cospi32, &u[49], rnding, bit);
- temp4 = half_btf_sse4_1(cospim32, &u[47], cospi32, &u[48], rnding, bit);
- u[48] = half_btf_sse4_1(cospi32, &u[47], cospi32, &u[48], rnding, bit);
- u[49] = half_btf_sse4_1(cospi32, &u[46], cospi32, &u[49], rnding, bit);
- u[50] = half_btf_sse4_1(cospi32, &u[45], cospi32, &u[50], rnding, bit);
- u[51] = half_btf_sse4_1(cospi32, &u[44], cospi32, &u[51], rnding, bit);
- u[44] = temp1;
- u[45] = temp2;
- u[46] = temp3;
- u[47] = temp4;
-}
-
-static INLINE void idct64_stage11_sse4_1(__m128i *u, __m128i *out, int do_cols,
- int bd, int out_shift,
- const int log_range) {
- if (do_cols) {
- for (int i = 0; i < 32; i++) {
- addsub_no_clamp_sse4_1(u[i], u[63 - i], &out[(i)], &out[(63 - i)]);
- }
- } else {
- const int log_range_out = AOMMAX(16, bd + 6);
- const __m128i clamp_lo_out = _mm_set1_epi32(AOMMAX(
- -(1 << (log_range_out - 1)), -(1 << (log_range - 1 - out_shift))));
- const __m128i clamp_hi_out = _mm_set1_epi32(AOMMIN(
- (1 << (log_range_out - 1)) - 1, (1 << (log_range - 1 - out_shift))));
-
- for (int i = 0; i < 32; i++) {
- addsub_shift_sse4_1(u[i], u[63 - i], &out[(i)], &out[(63 - i)],
- &clamp_lo_out, &clamp_hi_out, out_shift);
- }
- }
-}
-
-static void idct64x64_low1_sse4_1(__m128i *in, __m128i *out, int bit,
- int do_cols, int bd, int out_shift) {
- const int32_t *cospi = cospi_arr(bit);
- const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
- const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1)));
- const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1);
-
- const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
-
- {
- __m128i x;
-
- // stage 1
- // stage 2
- // stage 3
- // stage 4
- // stage 5
- // stage 6
- x = half_btf_0_sse4_1(&cospi32, &in[0], &rnding, bit);
-
- // stage 8
- // stage 9
- // stage 10
- // stage 11
- if (do_cols) {
- x = _mm_max_epi32(x, clamp_lo);
- x = _mm_min_epi32(x, clamp_hi);
- } else {
- const int log_range_out = AOMMAX(16, bd + 6);
- const __m128i clamp_lo_out = _mm_set1_epi32(AOMMAX(
- -(1 << (log_range_out - 1)), -(1 << (log_range - 1 - out_shift))));
- const __m128i clamp_hi_out = _mm_set1_epi32(AOMMIN(
- (1 << (log_range_out - 1)) - 1, (1 << (log_range - 1 - out_shift))));
-
- __m128i offset = _mm_set1_epi32((1 << out_shift) >> 1);
- x = _mm_add_epi32(x, offset);
- x = _mm_sra_epi32(x, _mm_cvtsi32_si128(out_shift));
-
- x = _mm_max_epi32(x, clamp_lo_out);
- x = _mm_min_epi32(x, clamp_hi_out);
- }
-
- out[0] = x;
- out[63] = x;
- out[1] = x;
- out[62] = x;
- out[2] = x;
- out[61] = x;
- out[3] = x;
- out[60] = x;
- out[4] = x;
- out[59] = x;
- out[5] = x;
- out[58] = x;
- out[6] = x;
- out[57] = x;
- out[7] = x;
- out[56] = x;
- out[8] = x;
- out[55] = x;
- out[9] = x;
- out[54] = x;
- out[10] = x;
- out[53] = x;
- out[11] = x;
- out[52] = x;
- out[12] = x;
- out[51] = x;
- out[13] = x;
- out[50] = x;
- out[14] = x;
- out[49] = x;
- out[15] = x;
- out[48] = x;
- out[16] = x;
- out[47] = x;
- out[17] = x;
- out[46] = x;
- out[18] = x;
- out[45] = x;
- out[19] = x;
- out[44] = x;
- out[20] = x;
- out[43] = x;
- out[21] = x;
- out[42] = x;
- out[22] = x;
- out[41] = x;
- out[23] = x;
- out[40] = x;
- out[24] = x;
- out[39] = x;
- out[25] = x;
- out[38] = x;
- out[26] = x;
- out[37] = x;
- out[27] = x;
- out[36] = x;
- out[28] = x;
- out[35] = x;
- out[29] = x;
- out[34] = x;
- out[30] = x;
- out[33] = x;
- out[31] = x;
- out[32] = x;
- }
-}
-
-static void idct64x64_low8_sse4_1(__m128i *in, __m128i *out, int bit,
- int do_cols, int bd, int out_shift) {
- int i, j;
- const int32_t *cospi = cospi_arr(bit);
- const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
- const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1)));
- const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1);
-
- const __m128i cospi1 = _mm_set1_epi32(cospi[1]);
- const __m128i cospi2 = _mm_set1_epi32(cospi[2]);
- const __m128i cospi3 = _mm_set1_epi32(cospi[3]);
- const __m128i cospi4 = _mm_set1_epi32(cospi[4]);
- const __m128i cospi6 = _mm_set1_epi32(cospi[6]);
- const __m128i cospi8 = _mm_set1_epi32(cospi[8]);
- const __m128i cospi12 = _mm_set1_epi32(cospi[12]);
- const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
- const __m128i cospi20 = _mm_set1_epi32(cospi[20]);
- const __m128i cospi24 = _mm_set1_epi32(cospi[24]);
- const __m128i cospi28 = _mm_set1_epi32(cospi[28]);
- const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
- const __m128i cospi40 = _mm_set1_epi32(cospi[40]);
- const __m128i cospi44 = _mm_set1_epi32(cospi[44]);
- const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
- const __m128i cospi56 = _mm_set1_epi32(cospi[56]);
- const __m128i cospi60 = _mm_set1_epi32(cospi[60]);
- const __m128i cospim4 = _mm_set1_epi32(-cospi[4]);
- const __m128i cospim8 = _mm_set1_epi32(-cospi[8]);
- const __m128i cospim12 = _mm_set1_epi32(-cospi[12]);
- const __m128i cospim16 = _mm_set1_epi32(-cospi[16]);
- const __m128i cospim20 = _mm_set1_epi32(-cospi[20]);
- const __m128i cospim24 = _mm_set1_epi32(-cospi[24]);
- const __m128i cospim28 = _mm_set1_epi32(-cospi[28]);
- const __m128i cospim32 = _mm_set1_epi32(-cospi[32]);
- const __m128i cospim36 = _mm_set1_epi32(-cospi[36]);
- const __m128i cospim40 = _mm_set1_epi32(-cospi[40]);
- const __m128i cospim48 = _mm_set1_epi32(-cospi[48]);
- const __m128i cospim52 = _mm_set1_epi32(-cospi[52]);
- const __m128i cospim56 = _mm_set1_epi32(-cospi[56]);
- const __m128i cospi63 = _mm_set1_epi32(cospi[63]);
- const __m128i cospim57 = _mm_set1_epi32(-cospi[57]);
- const __m128i cospi7 = _mm_set1_epi32(cospi[7]);
- const __m128i cospi5 = _mm_set1_epi32(cospi[5]);
- const __m128i cospi59 = _mm_set1_epi32(cospi[59]);
- const __m128i cospim61 = _mm_set1_epi32(-cospi[61]);
- const __m128i cospim58 = _mm_set1_epi32(-cospi[58]);
- const __m128i cospi62 = _mm_set1_epi32(cospi[62]);
-
- {
- __m128i u[64];
-
- // stage 1
- u[0] = in[0];
- u[8] = in[4];
- u[16] = in[2];
- u[24] = in[6];
- u[32] = in[1];
- u[40] = in[5];
- u[48] = in[3];
- u[56] = in[7];
-
- // stage 2
- u[63] = half_btf_0_sse4_1(&cospi1, &u[32], &rnding, bit);
- u[32] = half_btf_0_sse4_1(&cospi63, &u[32], &rnding, bit);
- u[39] = half_btf_0_sse4_1(&cospim57, &u[56], &rnding, bit);
- u[56] = half_btf_0_sse4_1(&cospi7, &u[56], &rnding, bit);
- u[55] = half_btf_0_sse4_1(&cospi5, &u[40], &rnding, bit);
- u[40] = half_btf_0_sse4_1(&cospi59, &u[40], &rnding, bit);
- u[47] = half_btf_0_sse4_1(&cospim61, &u[48], &rnding, bit);
- u[48] = half_btf_0_sse4_1(&cospi3, &u[48], &rnding, bit);
-
- // stage 3
- u[31] = half_btf_0_sse4_1(&cospi2, &u[16], &rnding, bit);
- u[16] = half_btf_0_sse4_1(&cospi62, &u[16], &rnding, bit);
- u[23] = half_btf_0_sse4_1(&cospim58, &u[24], &rnding, bit);
- u[24] = half_btf_0_sse4_1(&cospi6, &u[24], &rnding, bit);
- u[33] = u[32];
- u[38] = u[39];
- u[41] = u[40];
- u[46] = u[47];
- u[49] = u[48];
- u[54] = u[55];
- u[57] = u[56];
- u[62] = u[63];
-
- // stage 4
- __m128i temp1, temp2;
- u[15] = half_btf_0_sse4_1(&cospi4, &u[8], &rnding, bit);
- u[8] = half_btf_0_sse4_1(&cospi60, &u[8], &rnding, bit);
- u[17] = u[16];
- u[22] = u[23];
- u[25] = u[24];
- u[30] = u[31];
-
- temp1 = half_btf_sse4_1(&cospim4, &u[33], &cospi60, &u[62], &rnding, bit);
- u[62] = half_btf_sse4_1(&cospi60, &u[33], &cospi4, &u[62], &rnding, bit);
- u[33] = temp1;
-
- temp2 = half_btf_sse4_1(&cospim36, &u[38], &cospi28, &u[57], &rnding, bit);
- u[38] = half_btf_sse4_1(&cospim28, &u[38], &cospim36, &u[57], &rnding, bit);
- u[57] = temp2;
-
- temp1 = half_btf_sse4_1(&cospim20, &u[41], &cospi44, &u[54], &rnding, bit);
- u[54] = half_btf_sse4_1(&cospi44, &u[41], &cospi20, &u[54], &rnding, bit);
- u[41] = temp1;
-
- temp2 = half_btf_sse4_1(&cospim12, &u[46], &cospim52, &u[49], &rnding, bit);
- u[49] = half_btf_sse4_1(&cospim52, &u[46], &cospi12, &u[49], &rnding, bit);
- u[46] = temp2;
-
- // stage 5
- u[9] = u[8];
- u[14] = u[15];
-
- temp1 = half_btf_sse4_1(&cospim8, &u[17], &cospi56, &u[30], &rnding, bit);
- u[30] = half_btf_sse4_1(&cospi56, &u[17], &cospi8, &u[30], &rnding, bit);
- u[17] = temp1;
-
- temp2 = half_btf_sse4_1(&cospim24, &u[22], &cospim40, &u[25], &rnding, bit);
- u[25] = half_btf_sse4_1(&cospim40, &u[22], &cospi24, &u[25], &rnding, bit);
- u[22] = temp2;
-
- u[35] = u[32];
- u[34] = u[33];
- u[36] = u[39];
- u[37] = u[38];
- u[43] = u[40];
- u[42] = u[41];
- u[44] = u[47];
- u[45] = u[46];
- u[51] = u[48];
- u[50] = u[49];
- u[52] = u[55];
- u[53] = u[54];
- u[59] = u[56];
- u[58] = u[57];
- u[60] = u[63];
- u[61] = u[62];
-
- // stage 6
- temp1 = half_btf_0_sse4_1(&cospi32, &u[0], &rnding, bit);
- u[1] = half_btf_0_sse4_1(&cospi32, &u[0], &rnding, bit);
- u[0] = temp1;
-
- temp2 = half_btf_sse4_1(&cospim16, &u[9], &cospi48, &u[14], &rnding, bit);
- u[14] = half_btf_sse4_1(&cospi48, &u[9], &cospi16, &u[14], &rnding, bit);
- u[9] = temp2;
- u[19] = u[16];
- u[18] = u[17];
- u[20] = u[23];
- u[21] = u[22];
- u[27] = u[24];
- u[26] = u[25];
- u[28] = u[31];
- u[29] = u[30];
-
- temp1 = half_btf_sse4_1(&cospim8, &u[34], &cospi56, &u[61], &rnding, bit);
- u[61] = half_btf_sse4_1(&cospi56, &u[34], &cospi8, &u[61], &rnding, bit);
- u[34] = temp1;
- temp2 = half_btf_sse4_1(&cospim8, &u[35], &cospi56, &u[60], &rnding, bit);
- u[60] = half_btf_sse4_1(&cospi56, &u[35], &cospi8, &u[60], &rnding, bit);
- u[35] = temp2;
- temp1 = half_btf_sse4_1(&cospim56, &u[36], &cospim8, &u[59], &rnding, bit);
- u[59] = half_btf_sse4_1(&cospim8, &u[36], &cospi56, &u[59], &rnding, bit);
- u[36] = temp1;
- temp2 = half_btf_sse4_1(&cospim56, &u[37], &cospim8, &u[58], &rnding, bit);
- u[58] = half_btf_sse4_1(&cospim8, &u[37], &cospi56, &u[58], &rnding, bit);
- u[37] = temp2;
- temp1 = half_btf_sse4_1(&cospim40, &u[42], &cospi24, &u[53], &rnding, bit);
- u[53] = half_btf_sse4_1(&cospi24, &u[42], &cospi40, &u[53], &rnding, bit);
- u[42] = temp1;
- temp2 = half_btf_sse4_1(&cospim40, &u[43], &cospi24, &u[52], &rnding, bit);
- u[52] = half_btf_sse4_1(&cospi24, &u[43], &cospi40, &u[52], &rnding, bit);
- u[43] = temp2;
- temp1 = half_btf_sse4_1(&cospim24, &u[44], &cospim40, &u[51], &rnding, bit);
- u[51] = half_btf_sse4_1(&cospim40, &u[44], &cospi24, &u[51], &rnding, bit);
- u[44] = temp1;
- temp2 = half_btf_sse4_1(&cospim24, &u[45], &cospim40, &u[50], &rnding, bit);
- u[50] = half_btf_sse4_1(&cospim40, &u[45], &cospi24, &u[50], &rnding, bit);
- u[45] = temp2;
-
- // stage 7
- u[3] = u[0];
- u[2] = u[1];
- u[11] = u[8];
- u[10] = u[9];
- u[12] = u[15];
- u[13] = u[14];
-
- temp1 = half_btf_sse4_1(&cospim16, &u[18], &cospi48, &u[29], &rnding, bit);
- u[29] = half_btf_sse4_1(&cospi48, &u[18], &cospi16, &u[29], &rnding, bit);
- u[18] = temp1;
- temp2 = half_btf_sse4_1(&cospim16, &u[19], &cospi48, &u[28], &rnding, bit);
- u[28] = half_btf_sse4_1(&cospi48, &u[19], &cospi16, &u[28], &rnding, bit);
- u[19] = temp2;
- temp1 = half_btf_sse4_1(&cospim48, &u[20], &cospim16, &u[27], &rnding, bit);
- u[27] = half_btf_sse4_1(&cospim16, &u[20], &cospi48, &u[27], &rnding, bit);
- u[20] = temp1;
- temp2 = half_btf_sse4_1(&cospim48, &u[21], &cospim16, &u[26], &rnding, bit);
- u[26] = half_btf_sse4_1(&cospim16, &u[21], &cospi48, &u[26], &rnding, bit);
- u[21] = temp2;
- for (i = 32; i < 64; i += 16) {
- for (j = i; j < i + 4; j++) {
- addsub_sse4_1(u[j], u[j ^ 7], &u[j], &u[j ^ 7], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[j ^ 15], u[j ^ 8], &u[j ^ 15], &u[j ^ 8], &clamp_lo,
- &clamp_hi);
- }
- }
-
- // stage 8
- u[7] = u[0];
- u[6] = u[1];
- u[5] = u[2];
- u[4] = u[3];
- u[9] = u[9];
-
- idct64_stage8_sse4_1(u, &cospim32, &cospi32, &cospim16, &cospi48, &cospi16,
- &cospim48, &clamp_lo, &clamp_hi, &rnding, bit);
-
- // stage 9
- idct64_stage9_sse4_1(u, &cospim32, &cospi32, &clamp_lo, &clamp_hi, &rnding,
- bit);
-
- // stage 10
- idct64_stage10_sse4_1(u, &cospim32, &cospi32, &clamp_lo, &clamp_hi, &rnding,
- bit);
-
- // stage 11
- idct64_stage11_sse4_1(u, out, do_cols, bd, out_shift, log_range);
- }
-}
-
-static void idct64x64_low16_sse4_1(__m128i *in, __m128i *out, int bit,
- int do_cols, int bd, int out_shift) {
- int i, j;
- const int32_t *cospi = cospi_arr(bit);
- const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
- const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1)));
- const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1);
-
- const __m128i cospi1 = _mm_set1_epi32(cospi[1]);
- const __m128i cospi2 = _mm_set1_epi32(cospi[2]);
- const __m128i cospi3 = _mm_set1_epi32(cospi[3]);
- const __m128i cospi4 = _mm_set1_epi32(cospi[4]);
- const __m128i cospi5 = _mm_set1_epi32(cospi[5]);
- const __m128i cospi6 = _mm_set1_epi32(cospi[6]);
- const __m128i cospi7 = _mm_set1_epi32(cospi[7]);
- const __m128i cospi8 = _mm_set1_epi32(cospi[8]);
- const __m128i cospi9 = _mm_set1_epi32(cospi[9]);
- const __m128i cospi10 = _mm_set1_epi32(cospi[10]);
- const __m128i cospi11 = _mm_set1_epi32(cospi[11]);
- const __m128i cospi12 = _mm_set1_epi32(cospi[12]);
- const __m128i cospi13 = _mm_set1_epi32(cospi[13]);
- const __m128i cospi14 = _mm_set1_epi32(cospi[14]);
- const __m128i cospi15 = _mm_set1_epi32(cospi[15]);
- const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
- const __m128i cospi20 = _mm_set1_epi32(cospi[20]);
- const __m128i cospi24 = _mm_set1_epi32(cospi[24]);
- const __m128i cospi28 = _mm_set1_epi32(cospi[28]);
- const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
- const __m128i cospi36 = _mm_set1_epi32(cospi[36]);
- const __m128i cospi40 = _mm_set1_epi32(cospi[40]);
- const __m128i cospi44 = _mm_set1_epi32(cospi[44]);
- const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
- const __m128i cospi51 = _mm_set1_epi32(cospi[51]);
- const __m128i cospi52 = _mm_set1_epi32(cospi[52]);
- const __m128i cospi54 = _mm_set1_epi32(cospi[54]);
- const __m128i cospi55 = _mm_set1_epi32(cospi[55]);
- const __m128i cospi56 = _mm_set1_epi32(cospi[56]);
- const __m128i cospi59 = _mm_set1_epi32(cospi[59]);
- const __m128i cospi60 = _mm_set1_epi32(cospi[60]);
- const __m128i cospi62 = _mm_set1_epi32(cospi[62]);
- const __m128i cospi63 = _mm_set1_epi32(cospi[63]);
-
- const __m128i cospim4 = _mm_set1_epi32(-cospi[4]);
- const __m128i cospim8 = _mm_set1_epi32(-cospi[8]);
- const __m128i cospim12 = _mm_set1_epi32(-cospi[12]);
- const __m128i cospim16 = _mm_set1_epi32(-cospi[16]);
- const __m128i cospim20 = _mm_set1_epi32(-cospi[20]);
- const __m128i cospim24 = _mm_set1_epi32(-cospi[24]);
- const __m128i cospim28 = _mm_set1_epi32(-cospi[28]);
- const __m128i cospim32 = _mm_set1_epi32(-cospi[32]);
- const __m128i cospim36 = _mm_set1_epi32(-cospi[36]);
- const __m128i cospim40 = _mm_set1_epi32(-cospi[40]);
- const __m128i cospim44 = _mm_set1_epi32(-cospi[44]);
- const __m128i cospim48 = _mm_set1_epi32(-cospi[48]);
- const __m128i cospim49 = _mm_set1_epi32(-cospi[49]);
- const __m128i cospim50 = _mm_set1_epi32(-cospi[50]);
- const __m128i cospim52 = _mm_set1_epi32(-cospi[52]);
- const __m128i cospim53 = _mm_set1_epi32(-cospi[53]);
- const __m128i cospim56 = _mm_set1_epi32(-cospi[56]);
- const __m128i cospim57 = _mm_set1_epi32(-cospi[57]);
- const __m128i cospim58 = _mm_set1_epi32(-cospi[58]);
- const __m128i cospim60 = _mm_set1_epi32(-cospi[60]);
- const __m128i cospim61 = _mm_set1_epi32(-cospi[61]);
-
- {
- __m128i u[64];
- __m128i tmp1, tmp2, tmp3, tmp4;
- // stage 1
- u[0] = in[0];
- u[32] = in[1];
- u[36] = in[9];
- u[40] = in[5];
- u[44] = in[13];
- u[48] = in[3];
- u[52] = in[11];
- u[56] = in[7];
- u[60] = in[15];
- u[16] = in[2];
- u[20] = in[10];
- u[24] = in[6];
- u[28] = in[14];
- u[4] = in[8];
- u[8] = in[4];
- u[12] = in[12];
-
- // stage 2
- u[63] = half_btf_0_sse4_1(&cospi1, &u[32], &rnding, bit);
- u[32] = half_btf_0_sse4_1(&cospi63, &u[32], &rnding, bit);
- u[35] = half_btf_0_sse4_1(&cospim49, &u[60], &rnding, bit);
- u[60] = half_btf_0_sse4_1(&cospi15, &u[60], &rnding, bit);
- u[59] = half_btf_0_sse4_1(&cospi9, &u[36], &rnding, bit);
- u[36] = half_btf_0_sse4_1(&cospi55, &u[36], &rnding, bit);
- u[39] = half_btf_0_sse4_1(&cospim57, &u[56], &rnding, bit);
- u[56] = half_btf_0_sse4_1(&cospi7, &u[56], &rnding, bit);
- u[55] = half_btf_0_sse4_1(&cospi5, &u[40], &rnding, bit);
- u[40] = half_btf_0_sse4_1(&cospi59, &u[40], &rnding, bit);
- u[43] = half_btf_0_sse4_1(&cospim53, &u[52], &rnding, bit);
- u[52] = half_btf_0_sse4_1(&cospi11, &u[52], &rnding, bit);
- u[47] = half_btf_0_sse4_1(&cospim61, &u[48], &rnding, bit);
- u[48] = half_btf_0_sse4_1(&cospi3, &u[48], &rnding, bit);
- u[51] = half_btf_0_sse4_1(&cospi13, &u[44], &rnding, bit);
- u[44] = half_btf_0_sse4_1(&cospi51, &u[44], &rnding, bit);
-
- // stage 3
- u[31] = half_btf_0_sse4_1(&cospi2, &u[16], &rnding, bit);
- u[16] = half_btf_0_sse4_1(&cospi62, &u[16], &rnding, bit);
- u[19] = half_btf_0_sse4_1(&cospim50, &u[28], &rnding, bit);
- u[28] = half_btf_0_sse4_1(&cospi14, &u[28], &rnding, bit);
- u[27] = half_btf_0_sse4_1(&cospi10, &u[20], &rnding, bit);
- u[20] = half_btf_0_sse4_1(&cospi54, &u[20], &rnding, bit);
- u[23] = half_btf_0_sse4_1(&cospim58, &u[24], &rnding, bit);
- u[24] = half_btf_0_sse4_1(&cospi6, &u[24], &rnding, bit);
- u[33] = u[32];
- u[34] = u[35];
- u[37] = u[36];
- u[38] = u[39];
- u[41] = u[40];
- u[42] = u[43];
- u[45] = u[44];
- u[46] = u[47];
- u[49] = u[48];
- u[50] = u[51];
- u[53] = u[52];
- u[54] = u[55];
- u[57] = u[56];
- u[58] = u[59];
- u[61] = u[60];
- u[62] = u[63];
-
- // stage 4
- u[15] = half_btf_0_sse4_1(&cospi4, &u[8], &rnding, bit);
- u[8] = half_btf_0_sse4_1(&cospi60, &u[8], &rnding, bit);
- u[11] = half_btf_0_sse4_1(&cospim52, &u[12], &rnding, bit);
- u[12] = half_btf_0_sse4_1(&cospi12, &u[12], &rnding, bit);
-
- u[17] = u[16];
- u[18] = u[19];
- u[21] = u[20];
- u[22] = u[23];
- u[25] = u[24];
- u[26] = u[27];
- u[29] = u[28];
- u[30] = u[31];
-
- tmp1 = half_btf_sse4_1(&cospim4, &u[33], &cospi60, &u[62], &rnding, bit);
- tmp2 = half_btf_sse4_1(&cospim60, &u[34], &cospim4, &u[61], &rnding, bit);
- tmp3 = half_btf_sse4_1(&cospim36, &u[37], &cospi28, &u[58], &rnding, bit);
- tmp4 = half_btf_sse4_1(&cospim28, &u[38], &cospim36, &u[57], &rnding, bit);
- u[57] = half_btf_sse4_1(&cospim36, &u[38], &cospi28, &u[57], &rnding, bit);
- u[58] = half_btf_sse4_1(&cospi28, &u[37], &cospi36, &u[58], &rnding, bit);
- u[61] = half_btf_sse4_1(&cospim4, &u[34], &cospi60, &u[61], &rnding, bit);
- u[62] = half_btf_sse4_1(&cospi60, &u[33], &cospi4, &u[62], &rnding, bit);
- u[33] = tmp1;
- u[34] = tmp2;
- u[37] = tmp3;
- u[38] = tmp4;
-
- tmp1 = half_btf_sse4_1(&cospim20, &u[41], &cospi44, &u[54], &rnding, bit);
- tmp2 = half_btf_sse4_1(&cospim44, &u[42], &cospim20, &u[53], &rnding, bit);
- tmp3 = half_btf_sse4_1(&cospim52, &u[45], &cospi12, &u[50], &rnding, bit);
- tmp4 = half_btf_sse4_1(&cospim12, &u[46], &cospim52, &u[49], &rnding, bit);
- u[49] = half_btf_sse4_1(&cospim52, &u[46], &cospi12, &u[49], &rnding, bit);
- u[50] = half_btf_sse4_1(&cospi12, &u[45], &cospi52, &u[50], &rnding, bit);
- u[53] = half_btf_sse4_1(&cospim20, &u[42], &cospi44, &u[53], &rnding, bit);
- u[54] = half_btf_sse4_1(&cospi44, &u[41], &cospi20, &u[54], &rnding, bit);
- u[41] = tmp1;
- u[42] = tmp2;
- u[45] = tmp3;
- u[46] = tmp4;
-
- // stage 5
- u[7] = half_btf_0_sse4_1(&cospi8, &u[4], &rnding, bit);
- u[4] = half_btf_0_sse4_1(&cospi56, &u[4], &rnding, bit);
-
- u[9] = u[8];
- u[10] = u[11];
- u[13] = u[12];
- u[14] = u[15];
-
- tmp1 = half_btf_sse4_1(&cospim8, &u[17], &cospi56, &u[30], &rnding, bit);
- tmp2 = half_btf_sse4_1(&cospim56, &u[18], &cospim8, &u[29], &rnding, bit);
- tmp3 = half_btf_sse4_1(&cospim40, &u[21], &cospi24, &u[26], &rnding, bit);
- tmp4 = half_btf_sse4_1(&cospim24, &u[22], &cospim40, &u[25], &rnding, bit);
- u[25] = half_btf_sse4_1(&cospim40, &u[22], &cospi24, &u[25], &rnding, bit);
- u[26] = half_btf_sse4_1(&cospi24, &u[21], &cospi40, &u[26], &rnding, bit);
- u[29] = half_btf_sse4_1(&cospim8, &u[18], &cospi56, &u[29], &rnding, bit);
- u[30] = half_btf_sse4_1(&cospi56, &u[17], &cospi8, &u[30], &rnding, bit);
- u[17] = tmp1;
- u[18] = tmp2;
- u[21] = tmp3;
- u[22] = tmp4;
-
- for (i = 32; i < 64; i += 8) {
- addsub_sse4_1(u[i + 0], u[i + 3], &u[i + 0], &u[i + 3], &clamp_lo,
- &clamp_hi);
- addsub_sse4_1(u[i + 1], u[i + 2], &u[i + 1], &u[i + 2], &clamp_lo,
- &clamp_hi);
-
- addsub_sse4_1(u[i + 7], u[i + 4], &u[i + 7], &u[i + 4], &clamp_lo,
- &clamp_hi);
- addsub_sse4_1(u[i + 6], u[i + 5], &u[i + 6], &u[i + 5], &clamp_lo,
- &clamp_hi);
- }
-
- // stage 6
- tmp1 = half_btf_0_sse4_1(&cospi32, &u[0], &rnding, bit);
- u[1] = half_btf_0_sse4_1(&cospi32, &u[0], &rnding, bit);
- u[0] = tmp1;
- u[5] = u[4];
- u[6] = u[7];
-
- tmp1 = half_btf_sse4_1(&cospim16, &u[9], &cospi48, &u[14], &rnding, bit);
- u[14] = half_btf_sse4_1(&cospi48, &u[9], &cospi16, &u[14], &rnding, bit);
- u[9] = tmp1;
- tmp2 = half_btf_sse4_1(&cospim48, &u[10], &cospim16, &u[13], &rnding, bit);
- u[13] = half_btf_sse4_1(&cospim16, &u[10], &cospi48, &u[13], &rnding, bit);
- u[10] = tmp2;
-
- for (i = 16; i < 32; i += 8) {
- addsub_sse4_1(u[i + 0], u[i + 3], &u[i + 0], &u[i + 3], &clamp_lo,
- &clamp_hi);
- addsub_sse4_1(u[i + 1], u[i + 2], &u[i + 1], &u[i + 2], &clamp_lo,
- &clamp_hi);
-
- addsub_sse4_1(u[i + 7], u[i + 4], &u[i + 7], &u[i + 4], &clamp_lo,
- &clamp_hi);
- addsub_sse4_1(u[i + 6], u[i + 5], &u[i + 6], &u[i + 5], &clamp_lo,
- &clamp_hi);
- }
-
- tmp1 = half_btf_sse4_1(&cospim8, &u[34], &cospi56, &u[61], &rnding, bit);
- tmp2 = half_btf_sse4_1(&cospim8, &u[35], &cospi56, &u[60], &rnding, bit);
- tmp3 = half_btf_sse4_1(&cospim56, &u[36], &cospim8, &u[59], &rnding, bit);
- tmp4 = half_btf_sse4_1(&cospim56, &u[37], &cospim8, &u[58], &rnding, bit);
- u[58] = half_btf_sse4_1(&cospim8, &u[37], &cospi56, &u[58], &rnding, bit);
- u[59] = half_btf_sse4_1(&cospim8, &u[36], &cospi56, &u[59], &rnding, bit);
- u[60] = half_btf_sse4_1(&cospi56, &u[35], &cospi8, &u[60], &rnding, bit);
- u[61] = half_btf_sse4_1(&cospi56, &u[34], &cospi8, &u[61], &rnding, bit);
- u[34] = tmp1;
- u[35] = tmp2;
- u[36] = tmp3;
- u[37] = tmp4;
-
- tmp1 = half_btf_sse4_1(&cospim40, &u[42], &cospi24, &u[53], &rnding, bit);
- tmp2 = half_btf_sse4_1(&cospim40, &u[43], &cospi24, &u[52], &rnding, bit);
- tmp3 = half_btf_sse4_1(&cospim24, &u[44], &cospim40, &u[51], &rnding, bit);
- tmp4 = half_btf_sse4_1(&cospim24, &u[45], &cospim40, &u[50], &rnding, bit);
- u[50] = half_btf_sse4_1(&cospim40, &u[45], &cospi24, &u[50], &rnding, bit);
- u[51] = half_btf_sse4_1(&cospim40, &u[44], &cospi24, &u[51], &rnding, bit);
- u[52] = half_btf_sse4_1(&cospi24, &u[43], &cospi40, &u[52], &rnding, bit);
- u[53] = half_btf_sse4_1(&cospi24, &u[42], &cospi40, &u[53], &rnding, bit);
- u[42] = tmp1;
- u[43] = tmp2;
- u[44] = tmp3;
- u[45] = tmp4;
-
- // stage 7
- u[3] = u[0];
- u[2] = u[1];
- tmp1 = half_btf_sse4_1(&cospim32, &u[5], &cospi32, &u[6], &rnding, bit);
- u[6] = half_btf_sse4_1(&cospi32, &u[5], &cospi32, &u[6], &rnding, bit);
- u[5] = tmp1;
- addsub_sse4_1(u[8], u[11], &u[8], &u[11], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[9], u[10], &u[9], &u[10], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[15], u[12], &u[15], &u[12], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[14], u[13], &u[14], &u[13], &clamp_lo, &clamp_hi);
-
- tmp1 = half_btf_sse4_1(&cospim16, &u[18], &cospi48, &u[29], &rnding, bit);
- tmp2 = half_btf_sse4_1(&cospim16, &u[19], &cospi48, &u[28], &rnding, bit);
- tmp3 = half_btf_sse4_1(&cospim48, &u[20], &cospim16, &u[27], &rnding, bit);
- tmp4 = half_btf_sse4_1(&cospim48, &u[21], &cospim16, &u[26], &rnding, bit);
- u[26] = half_btf_sse4_1(&cospim16, &u[21], &cospi48, &u[26], &rnding, bit);
- u[27] = half_btf_sse4_1(&cospim16, &u[20], &cospi48, &u[27], &rnding, bit);
- u[28] = half_btf_sse4_1(&cospi48, &u[19], &cospi16, &u[28], &rnding, bit);
- u[29] = half_btf_sse4_1(&cospi48, &u[18], &cospi16, &u[29], &rnding, bit);
- u[18] = tmp1;
- u[19] = tmp2;
- u[20] = tmp3;
- u[21] = tmp4;
-
- for (i = 32; i < 64; i += 16) {
- for (j = i; j < i + 4; j++) {
- addsub_sse4_1(u[j], u[j ^ 7], &u[j], &u[j ^ 7], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[j ^ 15], u[j ^ 8], &u[j ^ 15], &u[j ^ 8], &clamp_lo,
- &clamp_hi);
- }
- }
-
- // stage 8
- for (i = 0; i < 4; ++i) {
- addsub_sse4_1(u[i], u[7 - i], &u[i], &u[7 - i], &clamp_lo, &clamp_hi);
- }
-
- idct64_stage8_sse4_1(u, &cospim32, &cospi32, &cospim16, &cospi48, &cospi16,
- &cospim48, &clamp_lo, &clamp_hi, &rnding, bit);
-
- // stage 9
- idct64_stage9_sse4_1(u, &cospim32, &cospi32, &clamp_lo, &clamp_hi, &rnding,
- bit);
-
- // stage 10
- idct64_stage10_sse4_1(u, &cospim32, &cospi32, &clamp_lo, &clamp_hi, &rnding,
- bit);
-
- // stage 11
- idct64_stage11_sse4_1(u, out, do_cols, bd, out_shift, log_range);
- }
-}
-
-static void idct64x64_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
- int bd, int out_shift) {
- int i, j;
- const int32_t *cospi = cospi_arr(bit);
- const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
- const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1)));
- const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1);
-
- const __m128i cospi1 = _mm_set1_epi32(cospi[1]);
- const __m128i cospi2 = _mm_set1_epi32(cospi[2]);
- const __m128i cospi3 = _mm_set1_epi32(cospi[3]);
- const __m128i cospi4 = _mm_set1_epi32(cospi[4]);
- const __m128i cospi5 = _mm_set1_epi32(cospi[5]);
- const __m128i cospi6 = _mm_set1_epi32(cospi[6]);
- const __m128i cospi7 = _mm_set1_epi32(cospi[7]);
- const __m128i cospi8 = _mm_set1_epi32(cospi[8]);
- const __m128i cospi9 = _mm_set1_epi32(cospi[9]);
- const __m128i cospi10 = _mm_set1_epi32(cospi[10]);
- const __m128i cospi11 = _mm_set1_epi32(cospi[11]);
- const __m128i cospi12 = _mm_set1_epi32(cospi[12]);
- const __m128i cospi13 = _mm_set1_epi32(cospi[13]);
- const __m128i cospi14 = _mm_set1_epi32(cospi[14]);
- const __m128i cospi15 = _mm_set1_epi32(cospi[15]);
- const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
- const __m128i cospi17 = _mm_set1_epi32(cospi[17]);
- const __m128i cospi18 = _mm_set1_epi32(cospi[18]);
- const __m128i cospi19 = _mm_set1_epi32(cospi[19]);
- const __m128i cospi20 = _mm_set1_epi32(cospi[20]);
- const __m128i cospi21 = _mm_set1_epi32(cospi[21]);
- const __m128i cospi22 = _mm_set1_epi32(cospi[22]);
- const __m128i cospi23 = _mm_set1_epi32(cospi[23]);
- const __m128i cospi24 = _mm_set1_epi32(cospi[24]);
- const __m128i cospi25 = _mm_set1_epi32(cospi[25]);
- const __m128i cospi26 = _mm_set1_epi32(cospi[26]);
- const __m128i cospi27 = _mm_set1_epi32(cospi[27]);
- const __m128i cospi28 = _mm_set1_epi32(cospi[28]);
- const __m128i cospi29 = _mm_set1_epi32(cospi[29]);
- const __m128i cospi30 = _mm_set1_epi32(cospi[30]);
- const __m128i cospi31 = _mm_set1_epi32(cospi[31]);
- const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
- const __m128i cospi35 = _mm_set1_epi32(cospi[35]);
- const __m128i cospi36 = _mm_set1_epi32(cospi[36]);
- const __m128i cospi38 = _mm_set1_epi32(cospi[38]);
- const __m128i cospi39 = _mm_set1_epi32(cospi[39]);
- const __m128i cospi40 = _mm_set1_epi32(cospi[40]);
- const __m128i cospi43 = _mm_set1_epi32(cospi[43]);
- const __m128i cospi44 = _mm_set1_epi32(cospi[44]);
- const __m128i cospi46 = _mm_set1_epi32(cospi[46]);
- const __m128i cospi47 = _mm_set1_epi32(cospi[47]);
- const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
- const __m128i cospi51 = _mm_set1_epi32(cospi[51]);
- const __m128i cospi52 = _mm_set1_epi32(cospi[52]);
- const __m128i cospi54 = _mm_set1_epi32(cospi[54]);
- const __m128i cospi55 = _mm_set1_epi32(cospi[55]);
- const __m128i cospi56 = _mm_set1_epi32(cospi[56]);
- const __m128i cospi59 = _mm_set1_epi32(cospi[59]);
- const __m128i cospi60 = _mm_set1_epi32(cospi[60]);
- const __m128i cospi62 = _mm_set1_epi32(cospi[62]);
- const __m128i cospi63 = _mm_set1_epi32(cospi[63]);
-
- const __m128i cospim4 = _mm_set1_epi32(-cospi[4]);
- const __m128i cospim8 = _mm_set1_epi32(-cospi[8]);
- const __m128i cospim12 = _mm_set1_epi32(-cospi[12]);
- const __m128i cospim16 = _mm_set1_epi32(-cospi[16]);
- const __m128i cospim20 = _mm_set1_epi32(-cospi[20]);
- const __m128i cospim24 = _mm_set1_epi32(-cospi[24]);
- const __m128i cospim28 = _mm_set1_epi32(-cospi[28]);
- const __m128i cospim32 = _mm_set1_epi32(-cospi[32]);
- const __m128i cospim33 = _mm_set1_epi32(-cospi[33]);
- const __m128i cospim34 = _mm_set1_epi32(-cospi[34]);
- const __m128i cospim36 = _mm_set1_epi32(-cospi[36]);
- const __m128i cospim37 = _mm_set1_epi32(-cospi[37]);
- const __m128i cospim40 = _mm_set1_epi32(-cospi[40]);
- const __m128i cospim41 = _mm_set1_epi32(-cospi[41]);
- const __m128i cospim42 = _mm_set1_epi32(-cospi[42]);
- const __m128i cospim44 = _mm_set1_epi32(-cospi[44]);
- const __m128i cospim45 = _mm_set1_epi32(-cospi[45]);
- const __m128i cospim48 = _mm_set1_epi32(-cospi[48]);
- const __m128i cospim49 = _mm_set1_epi32(-cospi[49]);
- const __m128i cospim50 = _mm_set1_epi32(-cospi[50]);
- const __m128i cospim52 = _mm_set1_epi32(-cospi[52]);
- const __m128i cospim53 = _mm_set1_epi32(-cospi[53]);
- const __m128i cospim56 = _mm_set1_epi32(-cospi[56]);
- const __m128i cospim57 = _mm_set1_epi32(-cospi[57]);
- const __m128i cospim58 = _mm_set1_epi32(-cospi[58]);
- const __m128i cospim60 = _mm_set1_epi32(-cospi[60]);
- const __m128i cospim61 = _mm_set1_epi32(-cospi[61]);
-
- {
- __m128i u[64], v[64];
-
- // stage 1
- u[32] = in[1];
- u[34] = in[17];
- u[36] = in[9];
- u[38] = in[25];
- u[40] = in[5];
- u[42] = in[21];
- u[44] = in[13];
- u[46] = in[29];
- u[48] = in[3];
- u[50] = in[19];
- u[52] = in[11];
- u[54] = in[27];
- u[56] = in[7];
- u[58] = in[23];
- u[60] = in[15];
- u[62] = in[31];
-
- v[16] = in[2];
- v[18] = in[18];
- v[20] = in[10];
- v[22] = in[26];
- v[24] = in[6];
- v[26] = in[22];
- v[28] = in[14];
- v[30] = in[30];
-
- u[8] = in[4];
- u[10] = in[20];
- u[12] = in[12];
- u[14] = in[28];
-
- v[4] = in[8];
- v[6] = in[24];
-
- u[0] = in[0];
- u[2] = in[16];
-
- // stage 2
- v[32] = half_btf_0_sse4_1(&cospi63, &u[32], &rnding, bit);
- v[33] = half_btf_0_sse4_1(&cospim33, &u[62], &rnding, bit);
- v[34] = half_btf_0_sse4_1(&cospi47, &u[34], &rnding, bit);
- v[35] = half_btf_0_sse4_1(&cospim49, &u[60], &rnding, bit);
- v[36] = half_btf_0_sse4_1(&cospi55, &u[36], &rnding, bit);
- v[37] = half_btf_0_sse4_1(&cospim41, &u[58], &rnding, bit);
- v[38] = half_btf_0_sse4_1(&cospi39, &u[38], &rnding, bit);
- v[39] = half_btf_0_sse4_1(&cospim57, &u[56], &rnding, bit);
- v[40] = half_btf_0_sse4_1(&cospi59, &u[40], &rnding, bit);
- v[41] = half_btf_0_sse4_1(&cospim37, &u[54], &rnding, bit);
- v[42] = half_btf_0_sse4_1(&cospi43, &u[42], &rnding, bit);
- v[43] = half_btf_0_sse4_1(&cospim53, &u[52], &rnding, bit);
- v[44] = half_btf_0_sse4_1(&cospi51, &u[44], &rnding, bit);
- v[45] = half_btf_0_sse4_1(&cospim45, &u[50], &rnding, bit);
- v[46] = half_btf_0_sse4_1(&cospi35, &u[46], &rnding, bit);
- v[47] = half_btf_0_sse4_1(&cospim61, &u[48], &rnding, bit);
- v[48] = half_btf_0_sse4_1(&cospi3, &u[48], &rnding, bit);
- v[49] = half_btf_0_sse4_1(&cospi29, &u[46], &rnding, bit);
- v[50] = half_btf_0_sse4_1(&cospi19, &u[50], &rnding, bit);
- v[51] = half_btf_0_sse4_1(&cospi13, &u[44], &rnding, bit);
- v[52] = half_btf_0_sse4_1(&cospi11, &u[52], &rnding, bit);
- v[53] = half_btf_0_sse4_1(&cospi21, &u[42], &rnding, bit);
- v[54] = half_btf_0_sse4_1(&cospi27, &u[54], &rnding, bit);
- v[55] = half_btf_0_sse4_1(&cospi5, &u[40], &rnding, bit);
- v[56] = half_btf_0_sse4_1(&cospi7, &u[56], &rnding, bit);
- v[57] = half_btf_0_sse4_1(&cospi25, &u[38], &rnding, bit);
- v[58] = half_btf_0_sse4_1(&cospi23, &u[58], &rnding, bit);
- v[59] = half_btf_0_sse4_1(&cospi9, &u[36], &rnding, bit);
- v[60] = half_btf_0_sse4_1(&cospi15, &u[60], &rnding, bit);
- v[61] = half_btf_0_sse4_1(&cospi17, &u[34], &rnding, bit);
- v[62] = half_btf_0_sse4_1(&cospi31, &u[62], &rnding, bit);
- v[63] = half_btf_0_sse4_1(&cospi1, &u[32], &rnding, bit);
-
- // stage 3
- u[16] = half_btf_0_sse4_1(&cospi62, &v[16], &rnding, bit);
- u[17] = half_btf_0_sse4_1(&cospim34, &v[30], &rnding, bit);
- u[18] = half_btf_0_sse4_1(&cospi46, &v[18], &rnding, bit);
- u[19] = half_btf_0_sse4_1(&cospim50, &v[28], &rnding, bit);
- u[20] = half_btf_0_sse4_1(&cospi54, &v[20], &rnding, bit);
- u[21] = half_btf_0_sse4_1(&cospim42, &v[26], &rnding, bit);
- u[22] = half_btf_0_sse4_1(&cospi38, &v[22], &rnding, bit);
- u[23] = half_btf_0_sse4_1(&cospim58, &v[24], &rnding, bit);
- u[24] = half_btf_0_sse4_1(&cospi6, &v[24], &rnding, bit);
- u[25] = half_btf_0_sse4_1(&cospi26, &v[22], &rnding, bit);
- u[26] = half_btf_0_sse4_1(&cospi22, &v[26], &rnding, bit);
- u[27] = half_btf_0_sse4_1(&cospi10, &v[20], &rnding, bit);
- u[28] = half_btf_0_sse4_1(&cospi14, &v[28], &rnding, bit);
- u[29] = half_btf_0_sse4_1(&cospi18, &v[18], &rnding, bit);
- u[30] = half_btf_0_sse4_1(&cospi30, &v[30], &rnding, bit);
- u[31] = half_btf_0_sse4_1(&cospi2, &v[16], &rnding, bit);
-
- for (i = 32; i < 64; i += 4) {
- addsub_sse4_1(v[i + 0], v[i + 1], &u[i + 0], &u[i + 1], &clamp_lo,
- &clamp_hi);
- addsub_sse4_1(v[i + 3], v[i + 2], &u[i + 3], &u[i + 2], &clamp_lo,
- &clamp_hi);
- }
-
- // stage 4
- v[8] = half_btf_0_sse4_1(&cospi60, &u[8], &rnding, bit);
- v[9] = half_btf_0_sse4_1(&cospim36, &u[14], &rnding, bit);
- v[10] = half_btf_0_sse4_1(&cospi44, &u[10], &rnding, bit);
- v[11] = half_btf_0_sse4_1(&cospim52, &u[12], &rnding, bit);
- v[12] = half_btf_0_sse4_1(&cospi12, &u[12], &rnding, bit);
- v[13] = half_btf_0_sse4_1(&cospi20, &u[10], &rnding, bit);
- v[14] = half_btf_0_sse4_1(&cospi28, &u[14], &rnding, bit);
- v[15] = half_btf_0_sse4_1(&cospi4, &u[8], &rnding, bit);
-
- for (i = 16; i < 32; i += 4) {
- addsub_sse4_1(u[i + 0], u[i + 1], &v[i + 0], &v[i + 1], &clamp_lo,
- &clamp_hi);
- addsub_sse4_1(u[i + 3], u[i + 2], &v[i + 3], &v[i + 2], &clamp_lo,
- &clamp_hi);
- }
-
- for (i = 32; i < 64; i += 4) {
- v[i + 0] = u[i + 0];
- v[i + 3] = u[i + 3];
- }
-
- v[33] = half_btf_sse4_1(&cospim4, &u[33], &cospi60, &u[62], &rnding, bit);
- v[34] = half_btf_sse4_1(&cospim60, &u[34], &cospim4, &u[61], &rnding, bit);
- v[37] = half_btf_sse4_1(&cospim36, &u[37], &cospi28, &u[58], &rnding, bit);
- v[38] = half_btf_sse4_1(&cospim28, &u[38], &cospim36, &u[57], &rnding, bit);
- v[41] = half_btf_sse4_1(&cospim20, &u[41], &cospi44, &u[54], &rnding, bit);
- v[42] = half_btf_sse4_1(&cospim44, &u[42], &cospim20, &u[53], &rnding, bit);
- v[45] = half_btf_sse4_1(&cospim52, &u[45], &cospi12, &u[50], &rnding, bit);
- v[46] = half_btf_sse4_1(&cospim12, &u[46], &cospim52, &u[49], &rnding, bit);
- v[49] = half_btf_sse4_1(&cospim52, &u[46], &cospi12, &u[49], &rnding, bit);
- v[50] = half_btf_sse4_1(&cospi12, &u[45], &cospi52, &u[50], &rnding, bit);
- v[53] = half_btf_sse4_1(&cospim20, &u[42], &cospi44, &u[53], &rnding, bit);
- v[54] = half_btf_sse4_1(&cospi44, &u[41], &cospi20, &u[54], &rnding, bit);
- v[57] = half_btf_sse4_1(&cospim36, &u[38], &cospi28, &u[57], &rnding, bit);
- v[58] = half_btf_sse4_1(&cospi28, &u[37], &cospi36, &u[58], &rnding, bit);
- v[61] = half_btf_sse4_1(&cospim4, &u[34], &cospi60, &u[61], &rnding, bit);
- v[62] = half_btf_sse4_1(&cospi60, &u[33], &cospi4, &u[62], &rnding, bit);
-
- // stage 5
- u[4] = half_btf_0_sse4_1(&cospi56, &v[4], &rnding, bit);
- u[5] = half_btf_0_sse4_1(&cospim40, &v[6], &rnding, bit);
- u[6] = half_btf_0_sse4_1(&cospi24, &v[6], &rnding, bit);
- u[7] = half_btf_0_sse4_1(&cospi8, &v[4], &rnding, bit);
-
- for (i = 8; i < 16; i += 4) {
- addsub_sse4_1(v[i + 0], v[i + 1], &u[i + 0], &u[i + 1], &clamp_lo,
- &clamp_hi);
- addsub_sse4_1(v[i + 3], v[i + 2], &u[i + 3], &u[i + 2], &clamp_lo,
- &clamp_hi);
- }
-
- for (i = 16; i < 32; i += 4) {
- u[i + 0] = v[i + 0];
- u[i + 3] = v[i + 3];
- }
-
- u[17] = half_btf_sse4_1(&cospim8, &v[17], &cospi56, &v[30], &rnding, bit);
- u[18] = half_btf_sse4_1(&cospim56, &v[18], &cospim8, &v[29], &rnding, bit);
- u[21] = half_btf_sse4_1(&cospim40, &v[21], &cospi24, &v[26], &rnding, bit);
- u[22] = half_btf_sse4_1(&cospim24, &v[22], &cospim40, &v[25], &rnding, bit);
- u[25] = half_btf_sse4_1(&cospim40, &v[22], &cospi24, &v[25], &rnding, bit);
- u[26] = half_btf_sse4_1(&cospi24, &v[21], &cospi40, &v[26], &rnding, bit);
- u[29] = half_btf_sse4_1(&cospim8, &v[18], &cospi56, &v[29], &rnding, bit);
- u[30] = half_btf_sse4_1(&cospi56, &v[17], &cospi8, &v[30], &rnding, bit);
-
- for (i = 32; i < 64; i += 8) {
- addsub_sse4_1(v[i + 0], v[i + 3], &u[i + 0], &u[i + 3], &clamp_lo,
- &clamp_hi);
- addsub_sse4_1(v[i + 1], v[i + 2], &u[i + 1], &u[i + 2], &clamp_lo,
- &clamp_hi);
-
- addsub_sse4_1(v[i + 7], v[i + 4], &u[i + 7], &u[i + 4], &clamp_lo,
- &clamp_hi);
- addsub_sse4_1(v[i + 6], v[i + 5], &u[i + 6], &u[i + 5], &clamp_lo,
- &clamp_hi);
- }
-
- // stage 6
- v[0] = half_btf_0_sse4_1(&cospi32, &u[0], &rnding, bit);
- v[1] = half_btf_0_sse4_1(&cospi32, &u[0], &rnding, bit);
- v[2] = half_btf_0_sse4_1(&cospi48, &u[2], &rnding, bit);
- v[3] = half_btf_0_sse4_1(&cospi16, &u[2], &rnding, bit);
-
- addsub_sse4_1(u[4], u[5], &v[4], &v[5], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[7], u[6], &v[7], &v[6], &clamp_lo, &clamp_hi);
-
- for (i = 8; i < 16; i += 4) {
- v[i + 0] = u[i + 0];
- v[i + 3] = u[i + 3];
- }
-
- v[9] = half_btf_sse4_1(&cospim16, &u[9], &cospi48, &u[14], &rnding, bit);
- v[10] = half_btf_sse4_1(&cospim48, &u[10], &cospim16, &u[13], &rnding, bit);
- v[13] = half_btf_sse4_1(&cospim16, &u[10], &cospi48, &u[13], &rnding, bit);
- v[14] = half_btf_sse4_1(&cospi48, &u[9], &cospi16, &u[14], &rnding, bit);
-
- for (i = 16; i < 32; i += 8) {
- addsub_sse4_1(u[i + 0], u[i + 3], &v[i + 0], &v[i + 3], &clamp_lo,
- &clamp_hi);
- addsub_sse4_1(u[i + 1], u[i + 2], &v[i + 1], &v[i + 2], &clamp_lo,
- &clamp_hi);
-
- addsub_sse4_1(u[i + 7], u[i + 4], &v[i + 7], &v[i + 4], &clamp_lo,
- &clamp_hi);
- addsub_sse4_1(u[i + 6], u[i + 5], &v[i + 6], &v[i + 5], &clamp_lo,
- &clamp_hi);
- }
-
- for (i = 32; i < 64; i += 8) {
- v[i + 0] = u[i + 0];
- v[i + 1] = u[i + 1];
- v[i + 6] = u[i + 6];
- v[i + 7] = u[i + 7];
- }
-
- v[34] = half_btf_sse4_1(&cospim8, &u[34], &cospi56, &u[61], &rnding, bit);
- v[35] = half_btf_sse4_1(&cospim8, &u[35], &cospi56, &u[60], &rnding, bit);
- v[36] = half_btf_sse4_1(&cospim56, &u[36], &cospim8, &u[59], &rnding, bit);
- v[37] = half_btf_sse4_1(&cospim56, &u[37], &cospim8, &u[58], &rnding, bit);
- v[42] = half_btf_sse4_1(&cospim40, &u[42], &cospi24, &u[53], &rnding, bit);
- v[43] = half_btf_sse4_1(&cospim40, &u[43], &cospi24, &u[52], &rnding, bit);
- v[44] = half_btf_sse4_1(&cospim24, &u[44], &cospim40, &u[51], &rnding, bit);
- v[45] = half_btf_sse4_1(&cospim24, &u[45], &cospim40, &u[50], &rnding, bit);
- v[50] = half_btf_sse4_1(&cospim40, &u[45], &cospi24, &u[50], &rnding, bit);
- v[51] = half_btf_sse4_1(&cospim40, &u[44], &cospi24, &u[51], &rnding, bit);
- v[52] = half_btf_sse4_1(&cospi24, &u[43], &cospi40, &u[52], &rnding, bit);
- v[53] = half_btf_sse4_1(&cospi24, &u[42], &cospi40, &u[53], &rnding, bit);
- v[58] = half_btf_sse4_1(&cospim8, &u[37], &cospi56, &u[58], &rnding, bit);
- v[59] = half_btf_sse4_1(&cospim8, &u[36], &cospi56, &u[59], &rnding, bit);
- v[60] = half_btf_sse4_1(&cospi56, &u[35], &cospi8, &u[60], &rnding, bit);
- v[61] = half_btf_sse4_1(&cospi56, &u[34], &cospi8, &u[61], &rnding, bit);
-
- // stage 7
- addsub_sse4_1(v[0], v[3], &u[0], &u[3], &clamp_lo, &clamp_hi);
- addsub_sse4_1(v[1], v[2], &u[1], &u[2], &clamp_lo, &clamp_hi);
-
- u[4] = v[4];
- u[7] = v[7];
- u[5] = half_btf_sse4_1(&cospim32, &v[5], &cospi32, &v[6], &rnding, bit);
- u[6] = half_btf_sse4_1(&cospi32, &v[5], &cospi32, &v[6], &rnding, bit);
-
- addsub_sse4_1(v[8], v[11], &u[8], &u[11], &clamp_lo, &clamp_hi);
- addsub_sse4_1(v[9], v[10], &u[9], &u[10], &clamp_lo, &clamp_hi);
- addsub_sse4_1(v[15], v[12], &u[15], &u[12], &clamp_lo, &clamp_hi);
- addsub_sse4_1(v[14], v[13], &u[14], &u[13], &clamp_lo, &clamp_hi);
-
- for (i = 16; i < 32; i += 8) {
- u[i + 0] = v[i + 0];
- u[i + 1] = v[i + 1];
- u[i + 6] = v[i + 6];
- u[i + 7] = v[i + 7];
- }
-
- u[18] = half_btf_sse4_1(&cospim16, &v[18], &cospi48, &v[29], &rnding, bit);
- u[19] = half_btf_sse4_1(&cospim16, &v[19], &cospi48, &v[28], &rnding, bit);
- u[20] = half_btf_sse4_1(&cospim48, &v[20], &cospim16, &v[27], &rnding, bit);
- u[21] = half_btf_sse4_1(&cospim48, &v[21], &cospim16, &v[26], &rnding, bit);
- u[26] = half_btf_sse4_1(&cospim16, &v[21], &cospi48, &v[26], &rnding, bit);
- u[27] = half_btf_sse4_1(&cospim16, &v[20], &cospi48, &v[27], &rnding, bit);
- u[28] = half_btf_sse4_1(&cospi48, &v[19], &cospi16, &v[28], &rnding, bit);
- u[29] = half_btf_sse4_1(&cospi48, &v[18], &cospi16, &v[29], &rnding, bit);
-
- for (i = 32; i < 64; i += 16) {
- for (j = i; j < i + 4; j++) {
- addsub_sse4_1(v[j], v[j ^ 7], &u[j], &u[j ^ 7], &clamp_lo, &clamp_hi);
- addsub_sse4_1(v[j ^ 15], v[j ^ 8], &u[j ^ 15], &u[j ^ 8], &clamp_lo,
- &clamp_hi);
- }
- }
-
- // stage 8
- for (i = 0; i < 4; ++i) {
- addsub_sse4_1(u[i], u[7 - i], &v[i], &v[7 - i], &clamp_lo, &clamp_hi);
- }
-
- v[8] = u[8];
- v[9] = u[9];
- v[14] = u[14];
- v[15] = u[15];
-
- v[10] = half_btf_sse4_1(&cospim32, &u[10], &cospi32, &u[13], &rnding, bit);
- v[11] = half_btf_sse4_1(&cospim32, &u[11], &cospi32, &u[12], &rnding, bit);
- v[12] = half_btf_sse4_1(&cospi32, &u[11], &cospi32, &u[12], &rnding, bit);
- v[13] = half_btf_sse4_1(&cospi32, &u[10], &cospi32, &u[13], &rnding, bit);
-
- for (i = 16; i < 20; ++i) {
- addsub_sse4_1(u[i], u[i ^ 7], &v[i], &v[i ^ 7], &clamp_lo, &clamp_hi);
- addsub_sse4_1(u[i ^ 15], u[i ^ 8], &v[i ^ 15], &v[i ^ 8], &clamp_lo,
- &clamp_hi);
- }
-
- for (i = 32; i < 36; ++i) {
- v[i] = u[i];
- v[i + 12] = u[i + 12];
- v[i + 16] = u[i + 16];
- v[i + 28] = u[i + 28];
- }
-
- v[36] = half_btf_sse4_1(&cospim16, &u[36], &cospi48, &u[59], &rnding, bit);
- v[37] = half_btf_sse4_1(&cospim16, &u[37], &cospi48, &u[58], &rnding, bit);
- v[38] = half_btf_sse4_1(&cospim16, &u[38], &cospi48, &u[57], &rnding, bit);
- v[39] = half_btf_sse4_1(&cospim16, &u[39], &cospi48, &u[56], &rnding, bit);
- v[40] = half_btf_sse4_1(&cospim48, &u[40], &cospim16, &u[55], &rnding, bit);
- v[41] = half_btf_sse4_1(&cospim48, &u[41], &cospim16, &u[54], &rnding, bit);
- v[42] = half_btf_sse4_1(&cospim48, &u[42], &cospim16, &u[53], &rnding, bit);
- v[43] = half_btf_sse4_1(&cospim48, &u[43], &cospim16, &u[52], &rnding, bit);
- v[52] = half_btf_sse4_1(&cospim16, &u[43], &cospi48, &u[52], &rnding, bit);
- v[53] = half_btf_sse4_1(&cospim16, &u[42], &cospi48, &u[53], &rnding, bit);
- v[54] = half_btf_sse4_1(&cospim16, &u[41], &cospi48, &u[54], &rnding, bit);
- v[55] = half_btf_sse4_1(&cospim16, &u[40], &cospi48, &u[55], &rnding, bit);
- v[56] = half_btf_sse4_1(&cospi48, &u[39], &cospi16, &u[56], &rnding, bit);
- v[57] = half_btf_sse4_1(&cospi48, &u[38], &cospi16, &u[57], &rnding, bit);
- v[58] = half_btf_sse4_1(&cospi48, &u[37], &cospi16, &u[58], &rnding, bit);
- v[59] = half_btf_sse4_1(&cospi48, &u[36], &cospi16, &u[59], &rnding, bit);
-
- // stage 9
- for (i = 0; i < 8; ++i) {
- addsub_sse4_1(v[i], v[15 - i], &u[i], &u[15 - i], &clamp_lo, &clamp_hi);
- }
-
- for (i = 16; i < 20; ++i) {
- u[i] = v[i];
- u[i + 12] = v[i + 12];
- }
-
- u[20] = half_btf_sse4_1(&cospim32, &v[20], &cospi32, &v[27], &rnding, bit);
- u[21] = half_btf_sse4_1(&cospim32, &v[21], &cospi32, &v[26], &rnding, bit);
- u[22] = half_btf_sse4_1(&cospim32, &v[22], &cospi32, &v[25], &rnding, bit);
- u[23] = half_btf_sse4_1(&cospim32, &v[23], &cospi32, &v[24], &rnding, bit);
- u[24] = half_btf_sse4_1(&cospi32, &v[23], &cospi32, &v[24], &rnding, bit);
- u[25] = half_btf_sse4_1(&cospi32, &v[22], &cospi32, &v[25], &rnding, bit);
- u[26] = half_btf_sse4_1(&cospi32, &v[21], &cospi32, &v[26], &rnding, bit);
- u[27] = half_btf_sse4_1(&cospi32, &v[20], &cospi32, &v[27], &rnding, bit);
-
- for (i = 32; i < 40; i++) {
- addsub_sse4_1(v[i], v[i ^ 15], &u[i], &u[i ^ 15], &clamp_lo, &clamp_hi);
- }
-
- for (i = 48; i < 56; i++) {
- addsub_sse4_1(v[i ^ 15], v[i], &u[i ^ 15], &u[i], &clamp_lo, &clamp_hi);
- }
-
- // stage 10
- for (i = 0; i < 16; i++) {
- addsub_sse4_1(u[i], u[31 - i], &v[i], &v[31 - i], &clamp_lo, &clamp_hi);
- }
-
- for (i = 32; i < 40; i++) v[i] = u[i];
-
- v[40] = half_btf_sse4_1(&cospim32, &u[40], &cospi32, &u[55], &rnding, bit);
- v[41] = half_btf_sse4_1(&cospim32, &u[41], &cospi32, &u[54], &rnding, bit);
- v[42] = half_btf_sse4_1(&cospim32, &u[42], &cospi32, &u[53], &rnding, bit);
- v[43] = half_btf_sse4_1(&cospim32, &u[43], &cospi32, &u[52], &rnding, bit);
- v[44] = half_btf_sse4_1(&cospim32, &u[44], &cospi32, &u[51], &rnding, bit);
- v[45] = half_btf_sse4_1(&cospim32, &u[45], &cospi32, &u[50], &rnding, bit);
- v[46] = half_btf_sse4_1(&cospim32, &u[46], &cospi32, &u[49], &rnding, bit);
- v[47] = half_btf_sse4_1(&cospim32, &u[47], &cospi32, &u[48], &rnding, bit);
- v[48] = half_btf_sse4_1(&cospi32, &u[47], &cospi32, &u[48], &rnding, bit);
- v[49] = half_btf_sse4_1(&cospi32, &u[46], &cospi32, &u[49], &rnding, bit);
- v[50] = half_btf_sse4_1(&cospi32, &u[45], &cospi32, &u[50], &rnding, bit);
- v[51] = half_btf_sse4_1(&cospi32, &u[44], &cospi32, &u[51], &rnding, bit);
- v[52] = half_btf_sse4_1(&cospi32, &u[43], &cospi32, &u[52], &rnding, bit);
- v[53] = half_btf_sse4_1(&cospi32, &u[42], &cospi32, &u[53], &rnding, bit);
- v[54] = half_btf_sse4_1(&cospi32, &u[41], &cospi32, &u[54], &rnding, bit);
- v[55] = half_btf_sse4_1(&cospi32, &u[40], &cospi32, &u[55], &rnding, bit);
-
- for (i = 56; i < 64; i++) v[i] = u[i];
-
- // stage 11
- if (do_cols) {
- for (i = 0; i < 32; i++) {
- addsub_no_clamp_sse4_1(v[i], v[63 - i], &out[(i)], &out[(63 - i)]);
- }
- } else {
- const int log_range_out = AOMMAX(16, bd + 6);
- const __m128i clamp_lo_out = _mm_set1_epi32(AOMMAX(
- -(1 << (log_range_out - 1)), -(1 << (log_range - 1 - out_shift))));
- const __m128i clamp_hi_out = _mm_set1_epi32(AOMMIN(
- (1 << (log_range_out - 1)) - 1, (1 << (log_range - 1 - out_shift))));
-
- for (i = 0; i < 32; i++) {
- addsub_shift_sse4_1(v[i], v[63 - i], &out[(i)], &out[(63 - i)],
- &clamp_lo_out, &clamp_hi_out, out_shift);
- }
- }
- }
-}
-
-static void idct32x32_low1_sse4_1(__m128i *in, __m128i *out, int bit,
- int do_cols, int bd, int out_shift) {
- const int32_t *cospi = cospi_arr(bit);
- const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
- const __m128i rounding = _mm_set1_epi32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
- const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1)));
- const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1);
- __m128i bf1;
-
- // stage 0
- // stage 1
- bf1 = in[0];
-
- // stage 2
- // stage 3
- // stage 4
- // stage 5
- bf1 = half_btf_0_sse4_1(&cospi32, &bf1, &rounding, bit);
-
- // stage 6
- // stage 7
- // stage 8
- // stage 9
- if (do_cols) {
- bf1 = _mm_max_epi32(bf1, clamp_lo);
- bf1 = _mm_min_epi32(bf1, clamp_hi);
- } else {
- const int log_range_out = AOMMAX(16, bd + 6);
- const __m128i clamp_lo_out = _mm_set1_epi32(AOMMAX(
- -(1 << (log_range_out - 1)), -(1 << (log_range - 1 - out_shift))));
- const __m128i clamp_hi_out = _mm_set1_epi32(AOMMIN(
- (1 << (log_range_out - 1)) - 1, (1 << (log_range - 1 - out_shift))));
-
- __m128i offset = _mm_set1_epi32((1 << out_shift) >> 1);
- bf1 = _mm_add_epi32(bf1, offset);
- bf1 = _mm_sra_epi32(bf1, _mm_cvtsi32_si128(out_shift));
- bf1 = _mm_max_epi32(bf1, clamp_lo_out);
- bf1 = _mm_min_epi32(bf1, clamp_hi_out);
- }
- out[0] = bf1;
- out[1] = bf1;
- out[2] = bf1;
- out[3] = bf1;
- out[4] = bf1;
- out[5] = bf1;
- out[6] = bf1;
- out[7] = bf1;
- out[8] = bf1;
- out[9] = bf1;
- out[10] = bf1;
- out[11] = bf1;
- out[12] = bf1;
- out[13] = bf1;
- out[14] = bf1;
- out[15] = bf1;
- out[16] = bf1;
- out[17] = bf1;
- out[18] = bf1;
- out[19] = bf1;
- out[20] = bf1;
- out[21] = bf1;
- out[22] = bf1;
- out[23] = bf1;
- out[24] = bf1;
- out[25] = bf1;
- out[26] = bf1;
- out[27] = bf1;
- out[28] = bf1;
- out[29] = bf1;
- out[30] = bf1;
- out[31] = bf1;
-}
-
-static void idct32x32_low8_sse4_1(__m128i *in, __m128i *out, int bit,
- int do_cols, int bd, int out_shift) {
- const int32_t *cospi = cospi_arr(bit);
- const __m128i cospi62 = _mm_set1_epi32(cospi[62]);
- const __m128i cospi14 = _mm_set1_epi32(cospi[14]);
- const __m128i cospi54 = _mm_set1_epi32(cospi[54]);
- const __m128i cospi6 = _mm_set1_epi32(cospi[6]);
- const __m128i cospi10 = _mm_set1_epi32(cospi[10]);
- const __m128i cospi2 = _mm_set1_epi32(cospi[2]);
- const __m128i cospim58 = _mm_set1_epi32(-cospi[58]);
- const __m128i cospim50 = _mm_set1_epi32(-cospi[50]);
- const __m128i cospi60 = _mm_set1_epi32(cospi[60]);
- const __m128i cospi12 = _mm_set1_epi32(cospi[12]);
- const __m128i cospi4 = _mm_set1_epi32(cospi[4]);
- const __m128i cospim52 = _mm_set1_epi32(-cospi[52]);
- const __m128i cospi56 = _mm_set1_epi32(cospi[56]);
- const __m128i cospi24 = _mm_set1_epi32(cospi[24]);
- const __m128i cospi40 = _mm_set1_epi32(cospi[40]);
- const __m128i cospi8 = _mm_set1_epi32(cospi[8]);
- const __m128i cospim40 = _mm_set1_epi32(-cospi[40]);
- const __m128i cospim8 = _mm_set1_epi32(-cospi[8]);
- const __m128i cospim56 = _mm_set1_epi32(-cospi[56]);
- const __m128i cospim24 = _mm_set1_epi32(-cospi[24]);
- const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
- const __m128i cospim32 = _mm_set1_epi32(-cospi[32]);
- const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
- const __m128i cospim48 = _mm_set1_epi32(-cospi[48]);
- const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
- const __m128i cospim16 = _mm_set1_epi32(-cospi[16]);
- const __m128i rounding = _mm_set1_epi32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
- const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1)));
- const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1);
- __m128i bf1[32];
-
- // stage 0
- // stage 1
- bf1[0] = in[0];
- bf1[4] = in[4];
- bf1[8] = in[2];
- bf1[12] = in[6];
- bf1[16] = in[1];
- bf1[20] = in[5];
- bf1[24] = in[3];
- bf1[28] = in[7];
-
- // stage 2
- bf1[31] = half_btf_0_sse4_1(&cospi2, &bf1[16], &rounding, bit);
- bf1[16] = half_btf_0_sse4_1(&cospi62, &bf1[16], &rounding, bit);
- bf1[19] = half_btf_0_sse4_1(&cospim50, &bf1[28], &rounding, bit);
- bf1[28] = half_btf_0_sse4_1(&cospi14, &bf1[28], &rounding, bit);
- bf1[27] = half_btf_0_sse4_1(&cospi10, &bf1[20], &rounding, bit);
- bf1[20] = half_btf_0_sse4_1(&cospi54, &bf1[20], &rounding, bit);
- bf1[23] = half_btf_0_sse4_1(&cospim58, &bf1[24], &rounding, bit);
- bf1[24] = half_btf_0_sse4_1(&cospi6, &bf1[24], &rounding, bit);
-
- // stage 3
- bf1[15] = half_btf_0_sse4_1(&cospi4, &bf1[8], &rounding, bit);
- bf1[8] = half_btf_0_sse4_1(&cospi60, &bf1[8], &rounding, bit);
-
- bf1[11] = half_btf_0_sse4_1(&cospim52, &bf1[12], &rounding, bit);
- bf1[12] = half_btf_0_sse4_1(&cospi12, &bf1[12], &rounding, bit);
- bf1[17] = bf1[16];
- bf1[18] = bf1[19];
- bf1[21] = bf1[20];
- bf1[22] = bf1[23];
- bf1[25] = bf1[24];
- bf1[26] = bf1[27];
- bf1[29] = bf1[28];
- bf1[30] = bf1[31];
-
- // stage 4 :
- bf1[7] = half_btf_0_sse4_1(&cospi8, &bf1[4], &rounding, bit);
- bf1[4] = half_btf_0_sse4_1(&cospi56, &bf1[4], &rounding, bit);
-
- bf1[9] = bf1[8];
- bf1[10] = bf1[11];
- bf1[13] = bf1[12];
- bf1[14] = bf1[15];
-
- idct32_stage4_sse4_1(bf1, &cospim8, &cospi56, &cospi8, &cospim56, &cospim40,
- &cospi24, &cospi40, &cospim24, &rounding, bit);
-
- // stage 5
- bf1[0] = half_btf_0_sse4_1(&cospi32, &bf1[0], &rounding, bit);
- bf1[1] = bf1[0];
- bf1[5] = bf1[4];
- bf1[6] = bf1[7];
-
- idct32_stage5_sse4_1(bf1, &cospim16, &cospi48, &cospi16, &cospim48, &clamp_lo,
- &clamp_hi, &rounding, bit);
-
- // stage 6
- bf1[3] = bf1[0];
- bf1[2] = bf1[1];
-
- idct32_stage6_sse4_1(bf1, &cospim32, &cospi32, &cospim16, &cospi48, &cospi16,
- &cospim48, &clamp_lo, &clamp_hi, &rounding, bit);
-
- // stage 7
- idct32_stage7_sse4_1(bf1, &cospim32, &cospi32, &clamp_lo, &clamp_hi,
- &rounding, bit);
-
- // stage 8
- idct32_stage8_sse4_1(bf1, &cospim32, &cospi32, &clamp_lo, &clamp_hi,
- &rounding, bit);
-
- // stage 9
- idct32_stage9_sse4_1(bf1, out, do_cols, bd, out_shift, log_range);
-}
-
-static void idct32x32_low16_sse4_1(__m128i *in, __m128i *out, int bit,
- int do_cols, int bd, int out_shift) {
- const int32_t *cospi = cospi_arr(bit);
- const __m128i cospi62 = _mm_set1_epi32(cospi[62]);
- const __m128i cospi30 = _mm_set1_epi32(cospi[30]);
- const __m128i cospi46 = _mm_set1_epi32(cospi[46]);
- const __m128i cospi14 = _mm_set1_epi32(cospi[14]);
- const __m128i cospi54 = _mm_set1_epi32(cospi[54]);
- const __m128i cospi22 = _mm_set1_epi32(cospi[22]);
- const __m128i cospi38 = _mm_set1_epi32(cospi[38]);
- const __m128i cospi6 = _mm_set1_epi32(cospi[6]);
- const __m128i cospi26 = _mm_set1_epi32(cospi[26]);
- const __m128i cospi10 = _mm_set1_epi32(cospi[10]);
- const __m128i cospi18 = _mm_set1_epi32(cospi[18]);
- const __m128i cospi2 = _mm_set1_epi32(cospi[2]);
- const __m128i cospim58 = _mm_set1_epi32(-cospi[58]);
- const __m128i cospim42 = _mm_set1_epi32(-cospi[42]);
- const __m128i cospim50 = _mm_set1_epi32(-cospi[50]);
- const __m128i cospim34 = _mm_set1_epi32(-cospi[34]);
- const __m128i cospi60 = _mm_set1_epi32(cospi[60]);
- const __m128i cospi28 = _mm_set1_epi32(cospi[28]);
- const __m128i cospi44 = _mm_set1_epi32(cospi[44]);
- const __m128i cospi12 = _mm_set1_epi32(cospi[12]);
- const __m128i cospi20 = _mm_set1_epi32(cospi[20]);
- const __m128i cospi4 = _mm_set1_epi32(cospi[4]);
- const __m128i cospim52 = _mm_set1_epi32(-cospi[52]);
- const __m128i cospim36 = _mm_set1_epi32(-cospi[36]);
- const __m128i cospi56 = _mm_set1_epi32(cospi[56]);
- const __m128i cospi24 = _mm_set1_epi32(cospi[24]);
- const __m128i cospi40 = _mm_set1_epi32(cospi[40]);
- const __m128i cospi8 = _mm_set1_epi32(cospi[8]);
- const __m128i cospim40 = _mm_set1_epi32(-cospi[40]);
- const __m128i cospim8 = _mm_set1_epi32(-cospi[8]);
- const __m128i cospim56 = _mm_set1_epi32(-cospi[56]);
- const __m128i cospim24 = _mm_set1_epi32(-cospi[24]);
- const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
- const __m128i cospim32 = _mm_set1_epi32(-cospi[32]);
- const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
- const __m128i cospim48 = _mm_set1_epi32(-cospi[48]);
- const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
- const __m128i cospim16 = _mm_set1_epi32(-cospi[16]);
- const __m128i rounding = _mm_set1_epi32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
- const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1)));
- const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1);
- __m128i bf1[32];
-
- // stage 0
- // stage 1
-
- bf1[0] = in[0];
- bf1[2] = in[8];
- bf1[4] = in[4];
- bf1[6] = in[12];
- bf1[8] = in[2];
- bf1[10] = in[10];
- bf1[12] = in[6];
- bf1[14] = in[14];
- bf1[16] = in[1];
- bf1[18] = in[9];
- bf1[20] = in[5];
- bf1[22] = in[13];
- bf1[24] = in[3];
- bf1[26] = in[11];
- bf1[28] = in[7];
- bf1[30] = in[15];
-
- // stage 2
- bf1[31] = half_btf_0_sse4_1(&cospi2, &bf1[16], &rounding, bit);
- bf1[16] = half_btf_0_sse4_1(&cospi62, &bf1[16], &rounding, bit);
- bf1[17] = half_btf_0_sse4_1(&cospim34, &bf1[30], &rounding, bit);
- bf1[30] = half_btf_0_sse4_1(&cospi30, &bf1[30], &rounding, bit);
- bf1[29] = half_btf_0_sse4_1(&cospi18, &bf1[18], &rounding, bit);
- bf1[18] = half_btf_0_sse4_1(&cospi46, &bf1[18], &rounding, bit);
- bf1[19] = half_btf_0_sse4_1(&cospim50, &bf1[28], &rounding, bit);
- bf1[28] = half_btf_0_sse4_1(&cospi14, &bf1[28], &rounding, bit);
- bf1[27] = half_btf_0_sse4_1(&cospi10, &bf1[20], &rounding, bit);
- bf1[20] = half_btf_0_sse4_1(&cospi54, &bf1[20], &rounding, bit);
- bf1[21] = half_btf_0_sse4_1(&cospim42, &bf1[26], &rounding, bit);
- bf1[26] = half_btf_0_sse4_1(&cospi22, &bf1[26], &rounding, bit);
- bf1[25] = half_btf_0_sse4_1(&cospi26, &bf1[22], &rounding, bit);
- bf1[22] = half_btf_0_sse4_1(&cospi38, &bf1[22], &rounding, bit);
- bf1[23] = half_btf_0_sse4_1(&cospim58, &bf1[24], &rounding, bit);
- bf1[24] = half_btf_0_sse4_1(&cospi6, &bf1[24], &rounding, bit);
-
- // stage 3
- bf1[15] = half_btf_0_sse4_1(&cospi4, &bf1[8], &rounding, bit);
- bf1[8] = half_btf_0_sse4_1(&cospi60, &bf1[8], &rounding, bit);
- bf1[9] = half_btf_0_sse4_1(&cospim36, &bf1[14], &rounding, bit);
- bf1[14] = half_btf_0_sse4_1(&cospi28, &bf1[14], &rounding, bit);
- bf1[13] = half_btf_0_sse4_1(&cospi20, &bf1[10], &rounding, bit);
- bf1[10] = half_btf_0_sse4_1(&cospi44, &bf1[10], &rounding, bit);
- bf1[11] = half_btf_0_sse4_1(&cospim52, &bf1[12], &rounding, bit);
- bf1[12] = half_btf_0_sse4_1(&cospi12, &bf1[12], &rounding, bit);
-
- addsub_sse4_1(bf1[16], bf1[17], bf1 + 16, bf1 + 17, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf1[19], bf1[18], bf1 + 19, bf1 + 18, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf1[20], bf1[21], bf1 + 20, bf1 + 21, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf1[23], bf1[22], bf1 + 23, bf1 + 22, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf1[24], bf1[25], bf1 + 24, bf1 + 25, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf1[27], bf1[26], bf1 + 27, bf1 + 26, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf1[28], bf1[29], bf1 + 28, bf1 + 29, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf1[31], bf1[30], bf1 + 31, bf1 + 30, &clamp_lo, &clamp_hi);
- // stage 4
- bf1[7] = half_btf_0_sse4_1(&cospi8, &bf1[4], &rounding, bit);
- bf1[4] = half_btf_0_sse4_1(&cospi56, &bf1[4], &rounding, bit);
- bf1[5] = half_btf_0_sse4_1(&cospim40, &bf1[6], &rounding, bit);
- bf1[6] = half_btf_0_sse4_1(&cospi24, &bf1[6], &rounding, bit);
-
- addsub_sse4_1(bf1[8], bf1[9], bf1 + 8, bf1 + 9, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf1[11], bf1[10], bf1 + 11, bf1 + 10, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf1[12], bf1[13], bf1 + 12, bf1 + 13, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf1[15], bf1[14], bf1 + 15, bf1 + 14, &clamp_lo, &clamp_hi);
-
- idct32_stage4_sse4_1(bf1, &cospim8, &cospi56, &cospi8, &cospim56, &cospim40,
- &cospi24, &cospi40, &cospim24, &rounding, bit);
-
- // stage 5
- bf1[0] = half_btf_0_sse4_1(&cospi32, &bf1[0], &rounding, bit);
- bf1[1] = bf1[0];
- bf1[3] = half_btf_0_sse4_1(&cospi16, &bf1[2], &rounding, bit);
- bf1[2] = half_btf_0_sse4_1(&cospi48, &bf1[2], &rounding, bit);
-
- addsub_sse4_1(bf1[4], bf1[5], bf1 + 4, bf1 + 5, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf1[7], bf1[6], bf1 + 7, bf1 + 6, &clamp_lo, &clamp_hi);
-
- idct32_stage5_sse4_1(bf1, &cospim16, &cospi48, &cospi16, &cospim48, &clamp_lo,
- &clamp_hi, &rounding, bit);
-
- // stage 6
- addsub_sse4_1(bf1[0], bf1[3], bf1 + 0, bf1 + 3, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf1[1], bf1[2], bf1 + 1, bf1 + 2, &clamp_lo, &clamp_hi);
-
- idct32_stage6_sse4_1(bf1, &cospim32, &cospi32, &cospim16, &cospi48, &cospi16,
- &cospim48, &clamp_lo, &clamp_hi, &rounding, bit);
-
- // stage 7
- idct32_stage7_sse4_1(bf1, &cospim32, &cospi32, &clamp_lo, &clamp_hi,
- &rounding, bit);
-
- // stage 8
- idct32_stage8_sse4_1(bf1, &cospim32, &cospi32, &clamp_lo, &clamp_hi,
- &rounding, bit);
-
- // stage 9
- idct32_stage9_sse4_1(bf1, out, do_cols, bd, out_shift, log_range);
-}
-
-static void idct32x32_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
- int bd, int out_shift) {
- const int32_t *cospi = cospi_arr(bit);
- const __m128i cospi62 = _mm_set1_epi32(cospi[62]);
- const __m128i cospi30 = _mm_set1_epi32(cospi[30]);
- const __m128i cospi46 = _mm_set1_epi32(cospi[46]);
- const __m128i cospi14 = _mm_set1_epi32(cospi[14]);
- const __m128i cospi54 = _mm_set1_epi32(cospi[54]);
- const __m128i cospi22 = _mm_set1_epi32(cospi[22]);
- const __m128i cospi38 = _mm_set1_epi32(cospi[38]);
- const __m128i cospi6 = _mm_set1_epi32(cospi[6]);
- const __m128i cospi58 = _mm_set1_epi32(cospi[58]);
- const __m128i cospi26 = _mm_set1_epi32(cospi[26]);
- const __m128i cospi42 = _mm_set1_epi32(cospi[42]);
- const __m128i cospi10 = _mm_set1_epi32(cospi[10]);
- const __m128i cospi50 = _mm_set1_epi32(cospi[50]);
- const __m128i cospi18 = _mm_set1_epi32(cospi[18]);
- const __m128i cospi34 = _mm_set1_epi32(cospi[34]);
- const __m128i cospi2 = _mm_set1_epi32(cospi[2]);
- const __m128i cospim58 = _mm_set1_epi32(-cospi[58]);
- const __m128i cospim26 = _mm_set1_epi32(-cospi[26]);
- const __m128i cospim42 = _mm_set1_epi32(-cospi[42]);
- const __m128i cospim10 = _mm_set1_epi32(-cospi[10]);
- const __m128i cospim50 = _mm_set1_epi32(-cospi[50]);
- const __m128i cospim18 = _mm_set1_epi32(-cospi[18]);
- const __m128i cospim34 = _mm_set1_epi32(-cospi[34]);
- const __m128i cospim2 = _mm_set1_epi32(-cospi[2]);
- const __m128i cospi60 = _mm_set1_epi32(cospi[60]);
- const __m128i cospi28 = _mm_set1_epi32(cospi[28]);
- const __m128i cospi44 = _mm_set1_epi32(cospi[44]);
- const __m128i cospi12 = _mm_set1_epi32(cospi[12]);
- const __m128i cospi52 = _mm_set1_epi32(cospi[52]);
- const __m128i cospi20 = _mm_set1_epi32(cospi[20]);
- const __m128i cospi36 = _mm_set1_epi32(cospi[36]);
- const __m128i cospi4 = _mm_set1_epi32(cospi[4]);
- const __m128i cospim52 = _mm_set1_epi32(-cospi[52]);
- const __m128i cospim20 = _mm_set1_epi32(-cospi[20]);
- const __m128i cospim36 = _mm_set1_epi32(-cospi[36]);
- const __m128i cospim4 = _mm_set1_epi32(-cospi[4]);
- const __m128i cospi56 = _mm_set1_epi32(cospi[56]);
- const __m128i cospi24 = _mm_set1_epi32(cospi[24]);
- const __m128i cospi40 = _mm_set1_epi32(cospi[40]);
- const __m128i cospi8 = _mm_set1_epi32(cospi[8]);
- const __m128i cospim40 = _mm_set1_epi32(-cospi[40]);
- const __m128i cospim8 = _mm_set1_epi32(-cospi[8]);
- const __m128i cospim56 = _mm_set1_epi32(-cospi[56]);
- const __m128i cospim24 = _mm_set1_epi32(-cospi[24]);
- const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
- const __m128i cospim32 = _mm_set1_epi32(-cospi[32]);
- const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
- const __m128i cospim48 = _mm_set1_epi32(-cospi[48]);
- const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
- const __m128i cospim16 = _mm_set1_epi32(-cospi[16]);
- const __m128i rounding = _mm_set1_epi32(1 << (bit - 1));
- const int log_range = AOMMAX(16, bd + (do_cols ? 6 : 8));
- const __m128i clamp_lo = _mm_set1_epi32(-(1 << (log_range - 1)));
- const __m128i clamp_hi = _mm_set1_epi32((1 << (log_range - 1)) - 1);
- __m128i bf1[32], bf0[32];
-
- // stage 0
- // stage 1
- bf1[0] = in[0];
- bf1[1] = in[16];
- bf1[2] = in[8];
- bf1[3] = in[24];
- bf1[4] = in[4];
- bf1[5] = in[20];
- bf1[6] = in[12];
- bf1[7] = in[28];
- bf1[8] = in[2];
- bf1[9] = in[18];
- bf1[10] = in[10];
- bf1[11] = in[26];
- bf1[12] = in[6];
- bf1[13] = in[22];
- bf1[14] = in[14];
- bf1[15] = in[30];
- bf1[16] = in[1];
- bf1[17] = in[17];
- bf1[18] = in[9];
- bf1[19] = in[25];
- bf1[20] = in[5];
- bf1[21] = in[21];
- bf1[22] = in[13];
- bf1[23] = in[29];
- bf1[24] = in[3];
- bf1[25] = in[19];
- bf1[26] = in[11];
- bf1[27] = in[27];
- bf1[28] = in[7];
- bf1[29] = in[23];
- bf1[30] = in[15];
- bf1[31] = in[31];
-
- // stage 2
- bf0[0] = bf1[0];
- bf0[1] = bf1[1];
- bf0[2] = bf1[2];
- bf0[3] = bf1[3];
- bf0[4] = bf1[4];
- bf0[5] = bf1[5];
- bf0[6] = bf1[6];
- bf0[7] = bf1[7];
- bf0[8] = bf1[8];
- bf0[9] = bf1[9];
- bf0[10] = bf1[10];
- bf0[11] = bf1[11];
- bf0[12] = bf1[12];
- bf0[13] = bf1[13];
- bf0[14] = bf1[14];
- bf0[15] = bf1[15];
- bf0[16] =
- half_btf_sse4_1(&cospi62, &bf1[16], &cospim2, &bf1[31], &rounding, bit);
- bf0[17] =
- half_btf_sse4_1(&cospi30, &bf1[17], &cospim34, &bf1[30], &rounding, bit);
- bf0[18] =
- half_btf_sse4_1(&cospi46, &bf1[18], &cospim18, &bf1[29], &rounding, bit);
- bf0[19] =
- half_btf_sse4_1(&cospi14, &bf1[19], &cospim50, &bf1[28], &rounding, bit);
- bf0[20] =
- half_btf_sse4_1(&cospi54, &bf1[20], &cospim10, &bf1[27], &rounding, bit);
- bf0[21] =
- half_btf_sse4_1(&cospi22, &bf1[21], &cospim42, &bf1[26], &rounding, bit);
- bf0[22] =
- half_btf_sse4_1(&cospi38, &bf1[22], &cospim26, &bf1[25], &rounding, bit);
- bf0[23] =
- half_btf_sse4_1(&cospi6, &bf1[23], &cospim58, &bf1[24], &rounding, bit);
- bf0[24] =
- half_btf_sse4_1(&cospi58, &bf1[23], &cospi6, &bf1[24], &rounding, bit);
- bf0[25] =
- half_btf_sse4_1(&cospi26, &bf1[22], &cospi38, &bf1[25], &rounding, bit);
- bf0[26] =
- half_btf_sse4_1(&cospi42, &bf1[21], &cospi22, &bf1[26], &rounding, bit);
- bf0[27] =
- half_btf_sse4_1(&cospi10, &bf1[20], &cospi54, &bf1[27], &rounding, bit);
- bf0[28] =
- half_btf_sse4_1(&cospi50, &bf1[19], &cospi14, &bf1[28], &rounding, bit);
- bf0[29] =
- half_btf_sse4_1(&cospi18, &bf1[18], &cospi46, &bf1[29], &rounding, bit);
- bf0[30] =
- half_btf_sse4_1(&cospi34, &bf1[17], &cospi30, &bf1[30], &rounding, bit);
- bf0[31] =
- half_btf_sse4_1(&cospi2, &bf1[16], &cospi62, &bf1[31], &rounding, bit);
-
- // stage 3
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = bf0[2];
- bf1[3] = bf0[3];
- bf1[4] = bf0[4];
- bf1[5] = bf0[5];
- bf1[6] = bf0[6];
- bf1[7] = bf0[7];
- bf1[8] =
- half_btf_sse4_1(&cospi60, &bf0[8], &cospim4, &bf0[15], &rounding, bit);
- bf1[9] =
- half_btf_sse4_1(&cospi28, &bf0[9], &cospim36, &bf0[14], &rounding, bit);
- bf1[10] =
- half_btf_sse4_1(&cospi44, &bf0[10], &cospim20, &bf0[13], &rounding, bit);
- bf1[11] =
- half_btf_sse4_1(&cospi12, &bf0[11], &cospim52, &bf0[12], &rounding, bit);
- bf1[12] =
- half_btf_sse4_1(&cospi52, &bf0[11], &cospi12, &bf0[12], &rounding, bit);
- bf1[13] =
- half_btf_sse4_1(&cospi20, &bf0[10], &cospi44, &bf0[13], &rounding, bit);
- bf1[14] =
- half_btf_sse4_1(&cospi36, &bf0[9], &cospi28, &bf0[14], &rounding, bit);
- bf1[15] =
- half_btf_sse4_1(&cospi4, &bf0[8], &cospi60, &bf0[15], &rounding, bit);
-
- addsub_sse4_1(bf0[16], bf0[17], bf1 + 16, bf1 + 17, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf0[19], bf0[18], bf1 + 19, bf1 + 18, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf0[20], bf0[21], bf1 + 20, bf1 + 21, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf0[23], bf0[22], bf1 + 23, bf1 + 22, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf0[24], bf0[25], bf1 + 24, bf1 + 25, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf0[27], bf0[26], bf1 + 27, bf1 + 26, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf0[28], bf0[29], bf1 + 28, bf1 + 29, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf0[31], bf0[30], bf1 + 31, bf1 + 30, &clamp_lo, &clamp_hi);
-
- // stage 4
- bf0[0] = bf1[0];
- bf0[1] = bf1[1];
- bf0[2] = bf1[2];
- bf0[3] = bf1[3];
- bf0[4] =
- half_btf_sse4_1(&cospi56, &bf1[4], &cospim8, &bf1[7], &rounding, bit);
- bf0[5] =
- half_btf_sse4_1(&cospi24, &bf1[5], &cospim40, &bf1[6], &rounding, bit);
- bf0[6] =
- half_btf_sse4_1(&cospi40, &bf1[5], &cospi24, &bf1[6], &rounding, bit);
- bf0[7] = half_btf_sse4_1(&cospi8, &bf1[4], &cospi56, &bf1[7], &rounding, bit);
-
- addsub_sse4_1(bf1[8], bf1[9], bf0 + 8, bf0 + 9, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf1[11], bf1[10], bf0 + 11, bf0 + 10, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf1[12], bf1[13], bf0 + 12, bf0 + 13, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf1[15], bf1[14], bf0 + 15, bf0 + 14, &clamp_lo, &clamp_hi);
-
- bf0[16] = bf1[16];
- bf0[17] =
- half_btf_sse4_1(&cospim8, &bf1[17], &cospi56, &bf1[30], &rounding, bit);
- bf0[18] =
- half_btf_sse4_1(&cospim56, &bf1[18], &cospim8, &bf1[29], &rounding, bit);
- bf0[19] = bf1[19];
- bf0[20] = bf1[20];
- bf0[21] =
- half_btf_sse4_1(&cospim40, &bf1[21], &cospi24, &bf1[26], &rounding, bit);
- bf0[22] =
- half_btf_sse4_1(&cospim24, &bf1[22], &cospim40, &bf1[25], &rounding, bit);
- bf0[23] = bf1[23];
- bf0[24] = bf1[24];
- bf0[25] =
- half_btf_sse4_1(&cospim40, &bf1[22], &cospi24, &bf1[25], &rounding, bit);
- bf0[26] =
- half_btf_sse4_1(&cospi24, &bf1[21], &cospi40, &bf1[26], &rounding, bit);
- bf0[27] = bf1[27];
- bf0[28] = bf1[28];
- bf0[29] =
- half_btf_sse4_1(&cospim8, &bf1[18], &cospi56, &bf1[29], &rounding, bit);
- bf0[30] =
- half_btf_sse4_1(&cospi56, &bf1[17], &cospi8, &bf1[30], &rounding, bit);
- bf0[31] = bf1[31];
-
- // stage 5
- bf1[0] =
- half_btf_sse4_1(&cospi32, &bf0[0], &cospi32, &bf0[1], &rounding, bit);
- bf1[1] =
- half_btf_sse4_1(&cospi32, &bf0[0], &cospim32, &bf0[1], &rounding, bit);
- bf1[2] =
- half_btf_sse4_1(&cospi48, &bf0[2], &cospim16, &bf0[3], &rounding, bit);
- bf1[3] =
- half_btf_sse4_1(&cospi16, &bf0[2], &cospi48, &bf0[3], &rounding, bit);
- addsub_sse4_1(bf0[4], bf0[5], bf1 + 4, bf1 + 5, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf0[7], bf0[6], bf1 + 7, bf1 + 6, &clamp_lo, &clamp_hi);
- bf1[8] = bf0[8];
- bf1[9] =
- half_btf_sse4_1(&cospim16, &bf0[9], &cospi48, &bf0[14], &rounding, bit);
- bf1[10] =
- half_btf_sse4_1(&cospim48, &bf0[10], &cospim16, &bf0[13], &rounding, bit);
- bf1[11] = bf0[11];
- bf1[12] = bf0[12];
- bf1[13] =
- half_btf_sse4_1(&cospim16, &bf0[10], &cospi48, &bf0[13], &rounding, bit);
- bf1[14] =
- half_btf_sse4_1(&cospi48, &bf0[9], &cospi16, &bf0[14], &rounding, bit);
- bf1[15] = bf0[15];
- addsub_sse4_1(bf0[16], bf0[19], bf1 + 16, bf1 + 19, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf0[17], bf0[18], bf1 + 17, bf1 + 18, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf0[23], bf0[20], bf1 + 23, bf1 + 20, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf0[22], bf0[21], bf1 + 22, bf1 + 21, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf0[24], bf0[27], bf1 + 24, bf1 + 27, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf0[25], bf0[26], bf1 + 25, bf1 + 26, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf0[31], bf0[28], bf1 + 31, bf1 + 28, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf0[30], bf0[29], bf1 + 30, bf1 + 29, &clamp_lo, &clamp_hi);
-
- // stage 6
- addsub_sse4_1(bf1[0], bf1[3], bf0 + 0, bf0 + 3, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf1[1], bf1[2], bf0 + 1, bf0 + 2, &clamp_lo, &clamp_hi);
- bf0[4] = bf1[4];
- bf0[5] =
- half_btf_sse4_1(&cospim32, &bf1[5], &cospi32, &bf1[6], &rounding, bit);
- bf0[6] =
- half_btf_sse4_1(&cospi32, &bf1[5], &cospi32, &bf1[6], &rounding, bit);
- bf0[7] = bf1[7];
- addsub_sse4_1(bf1[8], bf1[11], bf0 + 8, bf0 + 11, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf1[9], bf1[10], bf0 + 9, bf0 + 10, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf1[15], bf1[12], bf0 + 15, bf0 + 12, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf1[14], bf1[13], bf0 + 14, bf0 + 13, &clamp_lo, &clamp_hi);
- bf0[16] = bf1[16];
- bf0[17] = bf1[17];
- bf0[18] =
- half_btf_sse4_1(&cospim16, &bf1[18], &cospi48, &bf1[29], &rounding, bit);
- bf0[19] =
- half_btf_sse4_1(&cospim16, &bf1[19], &cospi48, &bf1[28], &rounding, bit);
- bf0[20] =
- half_btf_sse4_1(&cospim48, &bf1[20], &cospim16, &bf1[27], &rounding, bit);
- bf0[21] =
- half_btf_sse4_1(&cospim48, &bf1[21], &cospim16, &bf1[26], &rounding, bit);
- bf0[22] = bf1[22];
- bf0[23] = bf1[23];
- bf0[24] = bf1[24];
- bf0[25] = bf1[25];
- bf0[26] =
- half_btf_sse4_1(&cospim16, &bf1[21], &cospi48, &bf1[26], &rounding, bit);
- bf0[27] =
- half_btf_sse4_1(&cospim16, &bf1[20], &cospi48, &bf1[27], &rounding, bit);
- bf0[28] =
- half_btf_sse4_1(&cospi48, &bf1[19], &cospi16, &bf1[28], &rounding, bit);
- bf0[29] =
- half_btf_sse4_1(&cospi48, &bf1[18], &cospi16, &bf1[29], &rounding, bit);
- bf0[30] = bf1[30];
- bf0[31] = bf1[31];
-
- // stage 7
- addsub_sse4_1(bf0[0], bf0[7], bf1 + 0, bf1 + 7, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf0[1], bf0[6], bf1 + 1, bf1 + 6, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf0[2], bf0[5], bf1 + 2, bf1 + 5, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf0[3], bf0[4], bf1 + 3, bf1 + 4, &clamp_lo, &clamp_hi);
- bf1[8] = bf0[8];
- bf1[9] = bf0[9];
- bf1[10] =
- half_btf_sse4_1(&cospim32, &bf0[10], &cospi32, &bf0[13], &rounding, bit);
- bf1[11] =
- half_btf_sse4_1(&cospim32, &bf0[11], &cospi32, &bf0[12], &rounding, bit);
- bf1[12] =
- half_btf_sse4_1(&cospi32, &bf0[11], &cospi32, &bf0[12], &rounding, bit);
- bf1[13] =
- half_btf_sse4_1(&cospi32, &bf0[10], &cospi32, &bf0[13], &rounding, bit);
- bf1[14] = bf0[14];
- bf1[15] = bf0[15];
- addsub_sse4_1(bf0[16], bf0[23], bf1 + 16, bf1 + 23, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf0[17], bf0[22], bf1 + 17, bf1 + 22, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf0[18], bf0[21], bf1 + 18, bf1 + 21, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf0[19], bf0[20], bf1 + 19, bf1 + 20, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf0[31], bf0[24], bf1 + 31, bf1 + 24, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf0[30], bf0[25], bf1 + 30, bf1 + 25, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf0[29], bf0[26], bf1 + 29, bf1 + 26, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf0[28], bf0[27], bf1 + 28, bf1 + 27, &clamp_lo, &clamp_hi);
-
- // stage 8
- addsub_sse4_1(bf1[0], bf1[15], bf0 + 0, bf0 + 15, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf1[1], bf1[14], bf0 + 1, bf0 + 14, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf1[2], bf1[13], bf0 + 2, bf0 + 13, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf1[3], bf1[12], bf0 + 3, bf0 + 12, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf1[4], bf1[11], bf0 + 4, bf0 + 11, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf1[5], bf1[10], bf0 + 5, bf0 + 10, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf1[6], bf1[9], bf0 + 6, bf0 + 9, &clamp_lo, &clamp_hi);
- addsub_sse4_1(bf1[7], bf1[8], bf0 + 7, bf0 + 8, &clamp_lo, &clamp_hi);
- bf0[16] = bf1[16];
- bf0[17] = bf1[17];
- bf0[18] = bf1[18];
- bf0[19] = bf1[19];
- bf0[20] =
- half_btf_sse4_1(&cospim32, &bf1[20], &cospi32, &bf1[27], &rounding, bit);
- bf0[21] =
- half_btf_sse4_1(&cospim32, &bf1[21], &cospi32, &bf1[26], &rounding, bit);
- bf0[22] =
- half_btf_sse4_1(&cospim32, &bf1[22], &cospi32, &bf1[25], &rounding, bit);
- bf0[23] =
- half_btf_sse4_1(&cospim32, &bf1[23], &cospi32, &bf1[24], &rounding, bit);
- bf0[24] =
- half_btf_sse4_1(&cospi32, &bf1[23], &cospi32, &bf1[24], &rounding, bit);
- bf0[25] =
- half_btf_sse4_1(&cospi32, &bf1[22], &cospi32, &bf1[25], &rounding, bit);
- bf0[26] =
- half_btf_sse4_1(&cospi32, &bf1[21], &cospi32, &bf1[26], &rounding, bit);
- bf0[27] =
- half_btf_sse4_1(&cospi32, &bf1[20], &cospi32, &bf1[27], &rounding, bit);
- bf0[28] = bf1[28];
- bf0[29] = bf1[29];
- bf0[30] = bf1[30];
- bf0[31] = bf1[31];
-
- // stage 9
- if (do_cols) {
- addsub_no_clamp_sse4_1(bf0[0], bf0[31], out + 0, out + 31);
- addsub_no_clamp_sse4_1(bf0[1], bf0[30], out + 1, out + 30);
- addsub_no_clamp_sse4_1(bf0[2], bf0[29], out + 2, out + 29);
- addsub_no_clamp_sse4_1(bf0[3], bf0[28], out + 3, out + 28);
- addsub_no_clamp_sse4_1(bf0[4], bf0[27], out + 4, out + 27);
- addsub_no_clamp_sse4_1(bf0[5], bf0[26], out + 5, out + 26);
- addsub_no_clamp_sse4_1(bf0[6], bf0[25], out + 6, out + 25);
- addsub_no_clamp_sse4_1(bf0[7], bf0[24], out + 7, out + 24);
- addsub_no_clamp_sse4_1(bf0[8], bf0[23], out + 8, out + 23);
- addsub_no_clamp_sse4_1(bf0[9], bf0[22], out + 9, out + 22);
- addsub_no_clamp_sse4_1(bf0[10], bf0[21], out + 10, out + 21);
- addsub_no_clamp_sse4_1(bf0[11], bf0[20], out + 11, out + 20);
- addsub_no_clamp_sse4_1(bf0[12], bf0[19], out + 12, out + 19);
- addsub_no_clamp_sse4_1(bf0[13], bf0[18], out + 13, out + 18);
- addsub_no_clamp_sse4_1(bf0[14], bf0[17], out + 14, out + 17);
- addsub_no_clamp_sse4_1(bf0[15], bf0[16], out + 15, out + 16);
- } else {
- const int log_range_out = AOMMAX(16, bd + 6);
- const __m128i clamp_lo_out = _mm_set1_epi32(AOMMAX(
- -(1 << (log_range_out - 1)), -(1 << (log_range - 1 - out_shift))));
- const __m128i clamp_hi_out = _mm_set1_epi32(AOMMIN(
- (1 << (log_range_out - 1)) - 1, (1 << (log_range - 1 - out_shift))));
-
- addsub_shift_sse4_1(bf0[0], bf0[31], out + 0, out + 31, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_sse4_1(bf0[1], bf0[30], out + 1, out + 30, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_sse4_1(bf0[2], bf0[29], out + 2, out + 29, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_sse4_1(bf0[3], bf0[28], out + 3, out + 28, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_sse4_1(bf0[4], bf0[27], out + 4, out + 27, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_sse4_1(bf0[5], bf0[26], out + 5, out + 26, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_sse4_1(bf0[6], bf0[25], out + 6, out + 25, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_sse4_1(bf0[7], bf0[24], out + 7, out + 24, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_sse4_1(bf0[8], bf0[23], out + 8, out + 23, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_sse4_1(bf0[9], bf0[22], out + 9, out + 22, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_sse4_1(bf0[10], bf0[21], out + 10, out + 21, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_sse4_1(bf0[11], bf0[20], out + 11, out + 20, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_sse4_1(bf0[12], bf0[19], out + 12, out + 19, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_sse4_1(bf0[13], bf0[18], out + 13, out + 18, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_sse4_1(bf0[14], bf0[17], out + 14, out + 17, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- addsub_shift_sse4_1(bf0[15], bf0[16], out + 15, out + 16, &clamp_lo_out,
- &clamp_hi_out, out_shift);
- }
-}
-
-void av1_highbd_inv_txfm_add_8x8_sse4_1(const tran_low_t *input, uint8_t *dest,
- int stride,
- const TxfmParam *txfm_param) {
- int bd = txfm_param->bd;
- const TX_TYPE tx_type = txfm_param->tx_type;
- const int32_t *src = cast_to_int32(input);
- switch (tx_type) {
- // Assembly version doesn't support some transform types, so use C version
- // for those.
- case V_DCT:
- case H_DCT:
- case V_ADST:
- case H_ADST:
- case V_FLIPADST:
- case H_FLIPADST:
- case IDTX:
- av1_inv_txfm2d_add_8x8_c(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
- bd);
- break;
- default:
- av1_inv_txfm2d_add_8x8_sse4_1(src, CONVERT_TO_SHORTPTR(dest), stride,
- tx_type, bd);
- break;
- }
-}
-
-void av1_highbd_inv_txfm_add_16x8_sse4_1(const tran_low_t *input, uint8_t *dest,
- int stride,
- const TxfmParam *txfm_param) {
- int bd = txfm_param->bd;
- const TX_TYPE tx_type = txfm_param->tx_type;
- const int32_t *src = cast_to_int32(input);
- switch (tx_type) {
- // Assembly version doesn't support some transform types, so use C version
- // for those.
- case V_DCT:
- case H_DCT:
- case V_ADST:
- case H_ADST:
- case V_FLIPADST:
- case H_FLIPADST:
- case IDTX:
- av1_inv_txfm2d_add_16x8_c(src, CONVERT_TO_SHORTPTR(dest), stride,
- txfm_param->tx_type, txfm_param->bd);
- break;
- default:
- av1_highbd_inv_txfm2d_add_universe_sse4_1(input, dest, stride, tx_type,
- txfm_param->tx_size,
- txfm_param->eob, bd);
- break;
- }
-}
-
-void av1_highbd_inv_txfm_add_8x16_sse4_1(const tran_low_t *input, uint8_t *dest,
- int stride,
- const TxfmParam *txfm_param) {
- int bd = txfm_param->bd;
- const TX_TYPE tx_type = txfm_param->tx_type;
- const int32_t *src = cast_to_int32(input);
- switch (tx_type) {
- // Assembly version doesn't support some transform types, so use C version
- // for those.
- case V_DCT:
- case H_DCT:
- case V_ADST:
- case H_ADST:
- case V_FLIPADST:
- case H_FLIPADST:
- case IDTX:
- av1_inv_txfm2d_add_8x16_c(src, CONVERT_TO_SHORTPTR(dest), stride,
- txfm_param->tx_type, txfm_param->bd);
- break;
- default:
- av1_highbd_inv_txfm2d_add_universe_sse4_1(input, dest, stride, tx_type,
- txfm_param->tx_size,
- txfm_param->eob, bd);
- break;
- }
-}
-
-void av1_highbd_inv_txfm_add_16x16_sse4_1(const tran_low_t *input,
- uint8_t *dest, int stride,
- const TxfmParam *txfm_param) {
- int bd = txfm_param->bd;
- const TX_TYPE tx_type = txfm_param->tx_type;
- const int32_t *src = cast_to_int32(input);
- switch (tx_type) {
- // Assembly version doesn't support some transform types, so use C version
- // for those.
- case V_DCT:
- case H_DCT:
- case V_ADST:
- case H_ADST:
- case V_FLIPADST:
- case H_FLIPADST:
- case IDTX:
- av1_inv_txfm2d_add_16x16_c(src, CONVERT_TO_SHORTPTR(dest), stride,
- tx_type, bd);
- break;
- default:
- av1_highbd_inv_txfm2d_add_universe_sse4_1(input, dest, stride, tx_type,
- txfm_param->tx_size,
- txfm_param->eob, bd);
- break;
- }
-}
-
-void av1_highbd_inv_txfm_add_32x32_sse4_1(const tran_low_t *input,
- uint8_t *dest, int stride,
- const TxfmParam *txfm_param) {
- int bd = txfm_param->bd;
- const TX_TYPE tx_type = txfm_param->tx_type;
- const int32_t *src = cast_to_int32(input);
- switch (tx_type) {
- case DCT_DCT:
- av1_highbd_inv_txfm2d_add_universe_sse4_1(input, dest, stride, tx_type,
- txfm_param->tx_size,
- txfm_param->eob, bd);
- break;
- // Assembly version doesn't support IDTX, so use C version for it.
- case IDTX:
- av1_inv_txfm2d_add_32x32_c(src, CONVERT_TO_SHORTPTR(dest), stride,
- tx_type, bd);
- break;
- default: assert(0);
- }
-}
-
-void av1_highbd_inv_txfm_add_4x4_sse4_1(const tran_low_t *input, uint8_t *dest,
- int stride,
- const TxfmParam *txfm_param) {
- assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]);
- int eob = txfm_param->eob;
- int bd = txfm_param->bd;
- int lossless = txfm_param->lossless;
- const int32_t *src = cast_to_int32(input);
- const TX_TYPE tx_type = txfm_param->tx_type;
- if (lossless) {
- assert(tx_type == DCT_DCT);
- av1_highbd_iwht4x4_add(input, dest, stride, eob, bd);
- return;
- }
- switch (tx_type) {
- // Assembly version doesn't support some transform types, so use C version
- // for those.
- case V_DCT:
- case H_DCT:
- case V_ADST:
- case H_ADST:
- case V_FLIPADST:
- case H_FLIPADST:
- case IDTX:
- av1_inv_txfm2d_add_4x4_c(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
- bd);
- break;
- default:
- av1_inv_txfm2d_add_4x4_sse4_1(src, CONVERT_TO_SHORTPTR(dest), stride,
- tx_type, bd);
- break;
- }
-}
-
-static const transform_1d_sse4_1
- highbd_txfm_all_1d_zeros_w8_arr[TX_SIZES][ITX_TYPES_1D][4] = {
- {
- { NULL, NULL, NULL, NULL },
- { NULL, NULL, NULL, NULL },
- { NULL, NULL, NULL, NULL },
- },
- { { idct8x8_low1_sse4_1, idct8x8_new_sse4_1, NULL, NULL },
- { iadst8x8_low1_sse4_1, iadst8x8_new_sse4_1, NULL, NULL },
- { NULL, NULL, NULL, NULL } },
- {
- { idct16x16_low1_sse4_1, idct16x16_low8_sse4_1, idct16x16_sse4_1,
- NULL },
- { iadst16x16_low1_sse4_1, iadst16x16_low8_sse4_1, iadst16x16_sse4_1,
- NULL },
- { NULL, NULL, NULL, NULL },
- },
- { { idct32x32_low1_sse4_1, idct32x32_low8_sse4_1, idct32x32_low16_sse4_1,
- idct32x32_sse4_1 },
- { NULL, NULL, NULL, NULL },
- { NULL, NULL, NULL, NULL } },
- { { idct64x64_low1_sse4_1, idct64x64_low8_sse4_1, idct64x64_low16_sse4_1,
- idct64x64_sse4_1 },
- { NULL, NULL, NULL, NULL },
- { NULL, NULL, NULL, NULL } }
- };
-
-static void highbd_inv_txfm2d_add_no_identity_sse41(const int32_t *input,
- uint16_t *output,
- int stride, TX_TYPE tx_type,
- TX_SIZE tx_size, int eob,
- const int bd) {
- __m128i buf1[64 * 16];
- int eobx, eoby;
- get_eobx_eoby_scan_default(&eobx, &eoby, tx_size, eob);
- const int8_t *shift = inv_txfm_shift_ls[tx_size];
- const int txw_idx = get_txw_idx(tx_size);
- const int txh_idx = get_txh_idx(tx_size);
- const int txfm_size_col = tx_size_wide[tx_size];
- const int txfm_size_row = tx_size_high[tx_size];
- const int buf_size_w_div8 = txfm_size_col >> 2;
- const int buf_size_nonzero_w_div8 = (eobx + 8) >> 3;
- const int buf_size_nonzero_h_div8 = (eoby + 8) >> 3;
- const int input_stride = AOMMIN(32, txfm_size_col);
- const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
-
- const int fun_idx_x = lowbd_txfm_all_1d_zeros_idx[eobx];
- const int fun_idx_y = lowbd_txfm_all_1d_zeros_idx[eoby];
- const transform_1d_sse4_1 row_txfm =
- highbd_txfm_all_1d_zeros_w8_arr[txw_idx][hitx_1d_tab[tx_type]][fun_idx_x];
- const transform_1d_sse4_1 col_txfm =
- highbd_txfm_all_1d_zeros_w8_arr[txh_idx][vitx_1d_tab[tx_type]][fun_idx_y];
-
- assert(col_txfm != NULL);
- assert(row_txfm != NULL);
- int ud_flip, lr_flip;
- get_flip_cfg(tx_type, &ud_flip, &lr_flip);
-
- // 1st stage: column transform
- for (int i = 0; i < buf_size_nonzero_h_div8 << 1; i++) {
- __m128i buf0[64];
- const int32_t *input_row = input + i * input_stride * 4;
- for (int j = 0; j < buf_size_nonzero_w_div8 << 1; ++j) {
- __m128i *buf0_cur = buf0 + j * 4;
- load_buffer_32bit_input(input_row + j * 4, input_stride, buf0_cur, 4);
-
- TRANSPOSE_4X4(buf0_cur[0], buf0_cur[1], buf0_cur[2], buf0_cur[3],
- buf0_cur[0], buf0_cur[1], buf0_cur[2], buf0_cur[3]);
- }
- if (rect_type == 1 || rect_type == -1) {
- av1_round_shift_rect_array_32_sse4_1(
- buf0, buf0, buf_size_nonzero_w_div8 << 3, 0, NewInvSqrt2);
- }
- row_txfm(buf0, buf0, inv_cos_bit_row[txw_idx][txh_idx], 0, bd, -shift[0]);
-
- __m128i *_buf1 = buf1 + i * 4;
- if (lr_flip) {
- for (int j = 0; j < buf_size_w_div8; ++j) {
- TRANSPOSE_4X4(buf0[4 * j + 3], buf0[4 * j + 2], buf0[4 * j + 1],
- buf0[4 * j],
- _buf1[txfm_size_row * (buf_size_w_div8 - 1 - j) + 0],
- _buf1[txfm_size_row * (buf_size_w_div8 - 1 - j) + 1],
- _buf1[txfm_size_row * (buf_size_w_div8 - 1 - j) + 2],
- _buf1[txfm_size_row * (buf_size_w_div8 - 1 - j) + 3]);
- }
- } else {
- for (int j = 0; j < buf_size_w_div8; ++j) {
- TRANSPOSE_4X4(
- buf0[j * 4 + 0], buf0[j * 4 + 1], buf0[j * 4 + 2], buf0[j * 4 + 3],
- _buf1[j * txfm_size_row + 0], _buf1[j * txfm_size_row + 1],
- _buf1[j * txfm_size_row + 2], _buf1[j * txfm_size_row + 3]);
- }
- }
- }
- // 2nd stage: column transform
- for (int i = 0; i < buf_size_w_div8; i++) {
- col_txfm(buf1 + i * txfm_size_row, buf1 + i * txfm_size_row,
- inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0);
-
- av1_round_shift_array_32_sse4_1(buf1 + i * txfm_size_row,
- buf1 + i * txfm_size_row, txfm_size_row,
- -shift[1]);
- }
-
- // write to buffer
- {
- for (int i = 0; i < (txfm_size_col >> 3); i++) {
- highbd_write_buffer_8xn_sse4_1(buf1 + i * txfm_size_row * 2,
- output + 8 * i, stride, ud_flip,
- txfm_size_row, bd);
- }
- }
-}
-
-void av1_highbd_inv_txfm2d_add_universe_sse4_1(const int32_t *input,
- uint8_t *output, int stride,
- TX_TYPE tx_type, TX_SIZE tx_size,
- int eob, const int bd) {
- switch (tx_type) {
- case DCT_DCT:
- case ADST_DCT:
- case DCT_ADST:
- case ADST_ADST:
- case FLIPADST_DCT:
- case DCT_FLIPADST:
- case FLIPADST_FLIPADST:
- case ADST_FLIPADST:
- case FLIPADST_ADST:
- highbd_inv_txfm2d_add_no_identity_sse41(
- input, CONVERT_TO_SHORTPTR(output), stride, tx_type, tx_size, eob,
- bd);
- break;
- default: assert(0); break;
- }
-}
-
-void av1_highbd_inv_txfm_add_sse4_1(const tran_low_t *input, uint8_t *dest,
- int stride, const TxfmParam *txfm_param) {
- assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]);
- const TX_SIZE tx_size = txfm_param->tx_size;
- switch (tx_size) {
- case TX_32X32:
- av1_highbd_inv_txfm_add_32x32_sse4_1(input, dest, stride, txfm_param);
- break;
- case TX_16X16:
- av1_highbd_inv_txfm_add_16x16_sse4_1(input, dest, stride, txfm_param);
- break;
- case TX_8X8:
- av1_highbd_inv_txfm_add_8x8_sse4_1(input, dest, stride, txfm_param);
- break;
- case TX_4X8:
- av1_highbd_inv_txfm_add_4x8(input, dest, stride, txfm_param);
- break;
- case TX_8X4:
- av1_highbd_inv_txfm_add_8x4(input, dest, stride, txfm_param);
- break;
- case TX_8X16:
- av1_highbd_inv_txfm_add_8x16_sse4_1(input, dest, stride, txfm_param);
- break;
- case TX_16X8:
- av1_highbd_inv_txfm_add_16x8_sse4_1(input, dest, stride, txfm_param);
- break;
- case TX_16X32:
- av1_highbd_inv_txfm_add_16x32(input, dest, stride, txfm_param);
- break;
- case TX_32X16:
- av1_highbd_inv_txfm_add_32x16(input, dest, stride, txfm_param);
- break;
- case TX_32X64:
- av1_highbd_inv_txfm_add_32x64(input, dest, stride, txfm_param);
- break;
- case TX_64X32:
- av1_highbd_inv_txfm_add_64x32(input, dest, stride, txfm_param);
- break;
- case TX_4X4:
- av1_highbd_inv_txfm_add_4x4_sse4_1(input, dest, stride, txfm_param);
- break;
- case TX_16X4:
- av1_highbd_inv_txfm_add_16x4(input, dest, stride, txfm_param);
- break;
- case TX_4X16:
- av1_highbd_inv_txfm_add_4x16(input, dest, stride, txfm_param);
- break;
- case TX_8X32:
- av1_highbd_inv_txfm_add_8x32(input, dest, stride, txfm_param);
- break;
- case TX_32X8:
- av1_highbd_inv_txfm_add_32x8(input, dest, stride, txfm_param);
- break;
- case TX_64X64:
- case TX_16X64:
- case TX_64X16:
- av1_highbd_inv_txfm2d_add_universe_sse4_1(
- input, dest, stride, txfm_param->tx_type, txfm_param->tx_size,
- txfm_param->eob, txfm_param->bd);
- break;
- default: assert(0 && "Invalid transform size"); break;
- }
-}
diff --git a/third_party/aom/av1/common/x86/highbd_jnt_convolve_avx2.c b/third_party/aom/av1/common/x86/highbd_jnt_convolve_avx2.c
deleted file mode 100644
index e298cf653..000000000
--- a/third_party/aom/av1/common/x86/highbd_jnt_convolve_avx2.c
+++ /dev/null
@@ -1,846 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <immintrin.h>
-#include <assert.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/x86/convolve_avx2.h"
-#include "aom_dsp/x86/convolve_common_intrin.h"
-#include "aom_dsp/x86/convolve_sse4_1.h"
-#include "aom_dsp/x86/synonyms.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/aom_filter.h"
-#include "av1/common/convolve.h"
-
-void av1_highbd_jnt_convolve_2d_copy_avx2(
- const uint16_t *src, int src_stride, uint16_t *dst0, int dst_stride0, int w,
- int h, const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y, const int subpel_x_q4,
- const int subpel_y_q4, ConvolveParams *conv_params, int bd) {
- CONV_BUF_TYPE *dst = conv_params->dst;
- int dst_stride = conv_params->dst_stride;
- (void)filter_params_x;
- (void)filter_params_y;
- (void)subpel_x_q4;
- (void)subpel_y_q4;
-
- const int bits =
- FILTER_BITS * 2 - conv_params->round_1 - conv_params->round_0;
- const __m128i left_shift = _mm_cvtsi32_si128(bits);
- const int do_average = conv_params->do_average;
- const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg;
- const int w0 = conv_params->fwd_offset;
- const int w1 = conv_params->bck_offset;
- const __m256i wt0 = _mm256_set1_epi32(w0);
- const __m256i wt1 = _mm256_set1_epi32(w1);
- const __m256i zero = _mm256_setzero_si256();
- int i, j;
-
- const int offset_0 =
- bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
- const int offset = (1 << offset_0) + (1 << (offset_0 - 1));
- const __m256i offset_const = _mm256_set1_epi32(offset);
- const __m256i offset_const_16b = _mm256_set1_epi16(offset);
- const int rounding_shift =
- 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
- const __m256i rounding_const = _mm256_set1_epi32((1 << rounding_shift) >> 1);
- const __m256i clip_pixel_to_bd =
- _mm256_set1_epi16(bd == 10 ? 1023 : (bd == 12 ? 4095 : 255));
-
- assert(bits <= 4);
-
- if (!(w % 16)) {
- for (i = 0; i < h; i += 1) {
- for (j = 0; j < w; j += 16) {
- const __m256i src_16bit =
- _mm256_loadu_si256((__m256i *)(&src[i * src_stride + j]));
-
- const __m256i res = _mm256_sll_epi16(src_16bit, left_shift);
-
- if (do_average) {
- const __m256i data_0 =
- _mm256_loadu_si256((__m256i *)(&dst[i * dst_stride + j]));
-
- const __m256i data_ref_0_lo = _mm256_unpacklo_epi16(data_0, zero);
- const __m256i data_ref_0_hi = _mm256_unpackhi_epi16(data_0, zero);
-
- const __m256i res_32b_lo = _mm256_unpacklo_epi16(res, zero);
- const __m256i res_unsigned_lo =
- _mm256_add_epi32(res_32b_lo, offset_const);
-
- const __m256i comp_avg_res_lo = highbd_comp_avg(
- &data_ref_0_lo, &res_unsigned_lo, &wt0, &wt1, use_jnt_comp_avg);
-
- const __m256i res_32b_hi = _mm256_unpackhi_epi16(res, zero);
- const __m256i res_unsigned_hi =
- _mm256_add_epi32(res_32b_hi, offset_const);
-
- const __m256i comp_avg_res_hi = highbd_comp_avg(
- &data_ref_0_hi, &res_unsigned_hi, &wt0, &wt1, use_jnt_comp_avg);
-
- const __m256i round_result_lo = highbd_convolve_rounding(
- &comp_avg_res_lo, &offset_const, &rounding_const, rounding_shift);
- const __m256i round_result_hi = highbd_convolve_rounding(
- &comp_avg_res_hi, &offset_const, &rounding_const, rounding_shift);
-
- const __m256i res_16b =
- _mm256_packus_epi32(round_result_lo, round_result_hi);
- const __m256i res_clip = _mm256_min_epi16(res_16b, clip_pixel_to_bd);
-
- _mm256_store_si256((__m256i *)(&dst0[i * dst_stride0 + j]), res_clip);
- } else {
- const __m256i res_unsigned_16b =
- _mm256_adds_epu16(res, offset_const_16b);
-
- _mm256_store_si256((__m256i *)(&dst[i * dst_stride + j]),
- res_unsigned_16b);
- }
- }
- }
- } else if (!(w % 4)) {
- for (i = 0; i < h; i += 2) {
- for (j = 0; j < w; j += 8) {
- const __m128i src_row_0 =
- _mm_loadu_si128((__m128i *)(&src[i * src_stride + j]));
- const __m128i src_row_1 =
- _mm_loadu_si128((__m128i *)(&src[i * src_stride + j + src_stride]));
- // since not all compilers yet support _mm256_set_m128i()
- const __m256i src_10 = _mm256_insertf128_si256(
- _mm256_castsi128_si256(src_row_0), src_row_1, 1);
-
- const __m256i res = _mm256_sll_epi16(src_10, left_shift);
-
- if (w - j < 8) {
- if (do_average) {
- const __m256i data_0 = _mm256_castsi128_si256(
- _mm_loadl_epi64((__m128i *)(&dst[i * dst_stride + j])));
- const __m256i data_1 = _mm256_castsi128_si256(_mm_loadl_epi64(
- (__m128i *)(&dst[i * dst_stride + j + dst_stride])));
- const __m256i data_01 =
- _mm256_permute2x128_si256(data_0, data_1, 0x20);
-
- const __m256i data_ref_0 = _mm256_unpacklo_epi16(data_01, zero);
-
- const __m256i res_32b = _mm256_unpacklo_epi16(res, zero);
- const __m256i res_unsigned_lo =
- _mm256_add_epi32(res_32b, offset_const);
-
- const __m256i comp_avg_res = highbd_comp_avg(
- &data_ref_0, &res_unsigned_lo, &wt0, &wt1, use_jnt_comp_avg);
-
- const __m256i round_result = highbd_convolve_rounding(
- &comp_avg_res, &offset_const, &rounding_const, rounding_shift);
-
- const __m256i res_16b =
- _mm256_packus_epi32(round_result, round_result);
- const __m256i res_clip =
- _mm256_min_epi16(res_16b, clip_pixel_to_bd);
-
- const __m128i res_0 = _mm256_castsi256_si128(res_clip);
- const __m128i res_1 = _mm256_extracti128_si256(res_clip, 1);
-
- _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_0);
- _mm_storel_epi64(
- (__m128i *)(&dst0[i * dst_stride0 + j + dst_stride0]), res_1);
- } else {
- const __m256i res_unsigned_16b =
- _mm256_adds_epu16(res, offset_const_16b);
-
- const __m128i res_0 = _mm256_castsi256_si128(res_unsigned_16b);
- const __m128i res_1 = _mm256_extracti128_si256(res_unsigned_16b, 1);
-
- _mm_storel_epi64((__m128i *)(&dst[i * dst_stride + j]), res_0);
- _mm_storel_epi64((__m128i *)(&dst[i * dst_stride + j + dst_stride]),
- res_1);
- }
- } else {
- if (do_average) {
- const __m256i data_0 = _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(&dst[i * dst_stride + j])));
- const __m256i data_1 = _mm256_castsi128_si256(_mm_loadu_si128(
- (__m128i *)(&dst[i * dst_stride + j + dst_stride])));
- const __m256i data_01 =
- _mm256_permute2x128_si256(data_0, data_1, 0x20);
-
- const __m256i data_ref_0_lo = _mm256_unpacklo_epi16(data_01, zero);
- const __m256i data_ref_0_hi = _mm256_unpackhi_epi16(data_01, zero);
-
- const __m256i res_32b_lo = _mm256_unpacklo_epi16(res, zero);
- const __m256i res_unsigned_lo =
- _mm256_add_epi32(res_32b_lo, offset_const);
-
- const __m256i comp_avg_res_lo = highbd_comp_avg(
- &data_ref_0_lo, &res_unsigned_lo, &wt0, &wt1, use_jnt_comp_avg);
-
- const __m256i res_32b_hi = _mm256_unpackhi_epi16(res, zero);
- const __m256i res_unsigned_hi =
- _mm256_add_epi32(res_32b_hi, offset_const);
-
- const __m256i comp_avg_res_hi = highbd_comp_avg(
- &data_ref_0_hi, &res_unsigned_hi, &wt0, &wt1, use_jnt_comp_avg);
-
- const __m256i round_result_lo =
- highbd_convolve_rounding(&comp_avg_res_lo, &offset_const,
- &rounding_const, rounding_shift);
- const __m256i round_result_hi =
- highbd_convolve_rounding(&comp_avg_res_hi, &offset_const,
- &rounding_const, rounding_shift);
-
- const __m256i res_16b =
- _mm256_packus_epi32(round_result_lo, round_result_hi);
- const __m256i res_clip =
- _mm256_min_epi16(res_16b, clip_pixel_to_bd);
-
- const __m128i res_0 = _mm256_castsi256_si128(res_clip);
- const __m128i res_1 = _mm256_extracti128_si256(res_clip, 1);
-
- _mm_store_si128((__m128i *)(&dst0[i * dst_stride0 + j]), res_0);
- _mm_store_si128(
- (__m128i *)(&dst0[i * dst_stride0 + j + dst_stride0]), res_1);
- } else {
- const __m256i res_unsigned_16b =
- _mm256_adds_epu16(res, offset_const_16b);
- const __m128i res_0 = _mm256_castsi256_si128(res_unsigned_16b);
- const __m128i res_1 = _mm256_extracti128_si256(res_unsigned_16b, 1);
-
- _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_0);
- _mm_store_si128((__m128i *)(&dst[i * dst_stride + j + dst_stride]),
- res_1);
- }
- }
- }
- }
- }
-}
-
-void av1_highbd_jnt_convolve_2d_avx2(
- const uint16_t *src, int src_stride, uint16_t *dst0, int dst_stride0, int w,
- int h, const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y, const int subpel_x_q4,
- const int subpel_y_q4, ConvolveParams *conv_params, int bd) {
- DECLARE_ALIGNED(32, int16_t, im_block[(MAX_SB_SIZE + MAX_FILTER_TAP) * 8]);
- CONV_BUF_TYPE *dst = conv_params->dst;
- int dst_stride = conv_params->dst_stride;
- int im_h = h + filter_params_y->taps - 1;
- int im_stride = 8;
- int i, j;
- const int fo_vert = filter_params_y->taps / 2 - 1;
- const int fo_horiz = filter_params_x->taps / 2 - 1;
- const uint16_t *const src_ptr = src - fo_vert * src_stride - fo_horiz;
-
- // Check that, even with 12-bit input, the intermediate values will fit
- // into an unsigned 16-bit intermediate array.
- assert(bd + FILTER_BITS + 2 - conv_params->round_0 <= 16);
-
- __m256i s[8], coeffs_y[4], coeffs_x[4];
- const int do_average = conv_params->do_average;
- const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg;
-
- const int w0 = conv_params->fwd_offset;
- const int w1 = conv_params->bck_offset;
- const __m256i wt0 = _mm256_set1_epi32(w0);
- const __m256i wt1 = _mm256_set1_epi32(w1);
- const __m256i zero = _mm256_setzero_si256();
-
- const __m256i round_const_x = _mm256_set1_epi32(
- ((1 << conv_params->round_0) >> 1) + (1 << (bd + FILTER_BITS - 1)));
- const __m128i round_shift_x = _mm_cvtsi32_si128(conv_params->round_0);
-
- const __m256i round_const_y = _mm256_set1_epi32(
- ((1 << conv_params->round_1) >> 1) -
- (1 << (bd + 2 * FILTER_BITS - conv_params->round_0 - 1)));
- const __m128i round_shift_y = _mm_cvtsi32_si128(conv_params->round_1);
-
- const int offset_0 =
- bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
- const int offset = (1 << offset_0) + (1 << (offset_0 - 1));
- const __m256i offset_const = _mm256_set1_epi32(offset);
- const int rounding_shift =
- 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
- const __m256i rounding_const = _mm256_set1_epi32((1 << rounding_shift) >> 1);
-
- const __m256i clip_pixel_to_bd =
- _mm256_set1_epi16(bd == 10 ? 1023 : (bd == 12 ? 4095 : 255));
-
- prepare_coeffs(filter_params_x, subpel_x_q4, coeffs_x);
- prepare_coeffs(filter_params_y, subpel_y_q4, coeffs_y);
-
- for (j = 0; j < w; j += 8) {
- /* Horizontal filter */
- {
- for (i = 0; i < im_h; i += 2) {
- const __m256i row0 =
- _mm256_loadu_si256((__m256i *)&src_ptr[i * src_stride + j]);
- __m256i row1 = _mm256_set1_epi16(0);
- if (i + 1 < im_h)
- row1 =
- _mm256_loadu_si256((__m256i *)&src_ptr[(i + 1) * src_stride + j]);
-
- const __m256i r0 = _mm256_permute2x128_si256(row0, row1, 0x20);
- const __m256i r1 = _mm256_permute2x128_si256(row0, row1, 0x31);
-
- // even pixels
- s[0] = _mm256_alignr_epi8(r1, r0, 0);
- s[1] = _mm256_alignr_epi8(r1, r0, 4);
- s[2] = _mm256_alignr_epi8(r1, r0, 8);
- s[3] = _mm256_alignr_epi8(r1, r0, 12);
-
- __m256i res_even = convolve(s, coeffs_x);
- res_even = _mm256_sra_epi32(_mm256_add_epi32(res_even, round_const_x),
- round_shift_x);
-
- // odd pixels
- s[0] = _mm256_alignr_epi8(r1, r0, 2);
- s[1] = _mm256_alignr_epi8(r1, r0, 6);
- s[2] = _mm256_alignr_epi8(r1, r0, 10);
- s[3] = _mm256_alignr_epi8(r1, r0, 14);
-
- __m256i res_odd = convolve(s, coeffs_x);
- res_odd = _mm256_sra_epi32(_mm256_add_epi32(res_odd, round_const_x),
- round_shift_x);
-
- __m256i res_even1 = _mm256_packs_epi32(res_even, res_even);
- __m256i res_odd1 = _mm256_packs_epi32(res_odd, res_odd);
- __m256i res = _mm256_unpacklo_epi16(res_even1, res_odd1);
-
- _mm256_store_si256((__m256i *)&im_block[i * im_stride], res);
- }
- }
-
- /* Vertical filter */
- {
- __m256i s0 = _mm256_loadu_si256((__m256i *)(im_block + 0 * im_stride));
- __m256i s1 = _mm256_loadu_si256((__m256i *)(im_block + 1 * im_stride));
- __m256i s2 = _mm256_loadu_si256((__m256i *)(im_block + 2 * im_stride));
- __m256i s3 = _mm256_loadu_si256((__m256i *)(im_block + 3 * im_stride));
- __m256i s4 = _mm256_loadu_si256((__m256i *)(im_block + 4 * im_stride));
- __m256i s5 = _mm256_loadu_si256((__m256i *)(im_block + 5 * im_stride));
-
- s[0] = _mm256_unpacklo_epi16(s0, s1);
- s[1] = _mm256_unpacklo_epi16(s2, s3);
- s[2] = _mm256_unpacklo_epi16(s4, s5);
-
- s[4] = _mm256_unpackhi_epi16(s0, s1);
- s[5] = _mm256_unpackhi_epi16(s2, s3);
- s[6] = _mm256_unpackhi_epi16(s4, s5);
-
- for (i = 0; i < h; i += 2) {
- const int16_t *data = &im_block[i * im_stride];
-
- const __m256i s6 =
- _mm256_loadu_si256((__m256i *)(data + 6 * im_stride));
- const __m256i s7 =
- _mm256_loadu_si256((__m256i *)(data + 7 * im_stride));
-
- s[3] = _mm256_unpacklo_epi16(s6, s7);
- s[7] = _mm256_unpackhi_epi16(s6, s7);
-
- const __m256i res_a = convolve(s, coeffs_y);
-
- const __m256i res_a_round = _mm256_sra_epi32(
- _mm256_add_epi32(res_a, round_const_y), round_shift_y);
-
- const __m256i res_unsigned_lo =
- _mm256_add_epi32(res_a_round, offset_const);
-
- if (w - j < 8) {
- if (do_average) {
- const __m256i data_0 = _mm256_castsi128_si256(
- _mm_loadl_epi64((__m128i *)(&dst[i * dst_stride + j])));
- const __m256i data_1 = _mm256_castsi128_si256(_mm_loadl_epi64(
- (__m128i *)(&dst[i * dst_stride + j + dst_stride])));
- const __m256i data_01 =
- _mm256_permute2x128_si256(data_0, data_1, 0x20);
-
- const __m256i data_ref_0 = _mm256_unpacklo_epi16(data_01, zero);
-
- const __m256i comp_avg_res = highbd_comp_avg(
- &data_ref_0, &res_unsigned_lo, &wt0, &wt1, use_jnt_comp_avg);
-
- const __m256i round_result = highbd_convolve_rounding(
- &comp_avg_res, &offset_const, &rounding_const, rounding_shift);
-
- const __m256i res_16b =
- _mm256_packus_epi32(round_result, round_result);
- const __m256i res_clip =
- _mm256_min_epi16(res_16b, clip_pixel_to_bd);
-
- const __m128i res_0 = _mm256_castsi256_si128(res_clip);
- const __m128i res_1 = _mm256_extracti128_si256(res_clip, 1);
-
- _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_0);
- _mm_storel_epi64(
- (__m128i *)(&dst0[i * dst_stride0 + j + dst_stride0]), res_1);
- } else {
- __m256i res_16b =
- _mm256_packus_epi32(res_unsigned_lo, res_unsigned_lo);
- const __m128i res_0 = _mm256_castsi256_si128(res_16b);
- const __m128i res_1 = _mm256_extracti128_si256(res_16b, 1);
-
- _mm_storel_epi64((__m128i *)(&dst[i * dst_stride + j]), res_0);
- _mm_storel_epi64((__m128i *)(&dst[i * dst_stride + j + dst_stride]),
- res_1);
- }
- } else {
- const __m256i res_b = convolve(s + 4, coeffs_y);
- const __m256i res_b_round = _mm256_sra_epi32(
- _mm256_add_epi32(res_b, round_const_y), round_shift_y);
-
- __m256i res_unsigned_hi = _mm256_add_epi32(res_b_round, offset_const);
-
- if (do_average) {
- const __m256i data_0 = _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(&dst[i * dst_stride + j])));
- const __m256i data_1 = _mm256_castsi128_si256(_mm_loadu_si128(
- (__m128i *)(&dst[i * dst_stride + j + dst_stride])));
- const __m256i data_01 =
- _mm256_permute2x128_si256(data_0, data_1, 0x20);
-
- const __m256i data_ref_0_lo = _mm256_unpacklo_epi16(data_01, zero);
- const __m256i data_ref_0_hi = _mm256_unpackhi_epi16(data_01, zero);
-
- const __m256i comp_avg_res_lo = highbd_comp_avg(
- &data_ref_0_lo, &res_unsigned_lo, &wt0, &wt1, use_jnt_comp_avg);
- const __m256i comp_avg_res_hi = highbd_comp_avg(
- &data_ref_0_hi, &res_unsigned_hi, &wt0, &wt1, use_jnt_comp_avg);
-
- const __m256i round_result_lo =
- highbd_convolve_rounding(&comp_avg_res_lo, &offset_const,
- &rounding_const, rounding_shift);
- const __m256i round_result_hi =
- highbd_convolve_rounding(&comp_avg_res_hi, &offset_const,
- &rounding_const, rounding_shift);
-
- const __m256i res_16b =
- _mm256_packus_epi32(round_result_lo, round_result_hi);
- const __m256i res_clip =
- _mm256_min_epi16(res_16b, clip_pixel_to_bd);
-
- const __m128i res_0 = _mm256_castsi256_si128(res_clip);
- const __m128i res_1 = _mm256_extracti128_si256(res_clip, 1);
-
- _mm_store_si128((__m128i *)(&dst0[i * dst_stride0 + j]), res_0);
- _mm_store_si128(
- (__m128i *)(&dst0[i * dst_stride0 + j + dst_stride0]), res_1);
- } else {
- __m256i res_16b =
- _mm256_packus_epi32(res_unsigned_lo, res_unsigned_hi);
- const __m128i res_0 = _mm256_castsi256_si128(res_16b);
- const __m128i res_1 = _mm256_extracti128_si256(res_16b, 1);
-
- _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_0);
- _mm_store_si128((__m128i *)(&dst[i * dst_stride + j + dst_stride]),
- res_1);
- }
- }
-
- s[0] = s[1];
- s[1] = s[2];
- s[2] = s[3];
-
- s[4] = s[5];
- s[5] = s[6];
- s[6] = s[7];
- }
- }
- }
-}
-
-void av1_highbd_jnt_convolve_x_avx2(
- const uint16_t *src, int src_stride, uint16_t *dst0, int dst_stride0, int w,
- int h, const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y, const int subpel_x_q4,
- const int subpel_y_q4, ConvolveParams *conv_params, int bd) {
- CONV_BUF_TYPE *dst = conv_params->dst;
- int dst_stride = conv_params->dst_stride;
- const int fo_horiz = filter_params_x->taps / 2 - 1;
- const uint16_t *const src_ptr = src - fo_horiz;
- const int bits = FILTER_BITS - conv_params->round_1;
- (void)filter_params_y;
- (void)subpel_y_q4;
-
- int i, j;
- __m256i s[4], coeffs_x[4];
-
- const int do_average = conv_params->do_average;
- const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg;
- const int w0 = conv_params->fwd_offset;
- const int w1 = conv_params->bck_offset;
- const __m256i wt0 = _mm256_set1_epi32(w0);
- const __m256i wt1 = _mm256_set1_epi32(w1);
- const __m256i zero = _mm256_setzero_si256();
-
- const __m256i round_const_x =
- _mm256_set1_epi32(((1 << conv_params->round_0) >> 1));
- const __m128i round_shift_x = _mm_cvtsi32_si128(conv_params->round_0);
- const __m128i round_shift_bits = _mm_cvtsi32_si128(bits);
-
- const int offset_0 =
- bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
- const int offset = (1 << offset_0) + (1 << (offset_0 - 1));
- const __m256i offset_const = _mm256_set1_epi32(offset);
- const int rounding_shift =
- 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
- const __m256i rounding_const = _mm256_set1_epi32((1 << rounding_shift) >> 1);
- const __m256i clip_pixel_to_bd =
- _mm256_set1_epi16(bd == 10 ? 1023 : (bd == 12 ? 4095 : 255));
-
- assert(bits >= 0);
- prepare_coeffs(filter_params_x, subpel_x_q4, coeffs_x);
-
- for (j = 0; j < w; j += 8) {
- /* Horizontal filter */
- for (i = 0; i < h; i += 2) {
- const __m256i row0 =
- _mm256_loadu_si256((__m256i *)&src_ptr[i * src_stride + j]);
- __m256i row1 =
- _mm256_loadu_si256((__m256i *)&src_ptr[(i + 1) * src_stride + j]);
-
- const __m256i r0 = _mm256_permute2x128_si256(row0, row1, 0x20);
- const __m256i r1 = _mm256_permute2x128_si256(row0, row1, 0x31);
-
- // even pixels
- s[0] = _mm256_alignr_epi8(r1, r0, 0);
- s[1] = _mm256_alignr_epi8(r1, r0, 4);
- s[2] = _mm256_alignr_epi8(r1, r0, 8);
- s[3] = _mm256_alignr_epi8(r1, r0, 12);
-
- __m256i res_even = convolve(s, coeffs_x);
- res_even = _mm256_sra_epi32(_mm256_add_epi32(res_even, round_const_x),
- round_shift_x);
-
- // odd pixels
- s[0] = _mm256_alignr_epi8(r1, r0, 2);
- s[1] = _mm256_alignr_epi8(r1, r0, 6);
- s[2] = _mm256_alignr_epi8(r1, r0, 10);
- s[3] = _mm256_alignr_epi8(r1, r0, 14);
-
- __m256i res_odd = convolve(s, coeffs_x);
- res_odd = _mm256_sra_epi32(_mm256_add_epi32(res_odd, round_const_x),
- round_shift_x);
-
- res_even = _mm256_sll_epi32(res_even, round_shift_bits);
- res_odd = _mm256_sll_epi32(res_odd, round_shift_bits);
-
- __m256i res1 = _mm256_unpacklo_epi32(res_even, res_odd);
-
- __m256i res_unsigned_lo = _mm256_add_epi32(res1, offset_const);
-
- if (w - j < 8) {
- if (do_average) {
- const __m256i data_0 = _mm256_castsi128_si256(
- _mm_loadl_epi64((__m128i *)(&dst[i * dst_stride + j])));
- const __m256i data_1 = _mm256_castsi128_si256(_mm_loadl_epi64(
- (__m128i *)(&dst[i * dst_stride + j + dst_stride])));
- const __m256i data_01 =
- _mm256_permute2x128_si256(data_0, data_1, 0x20);
-
- const __m256i data_ref_0 = _mm256_unpacklo_epi16(data_01, zero);
-
- const __m256i comp_avg_res = highbd_comp_avg(
- &data_ref_0, &res_unsigned_lo, &wt0, &wt1, use_jnt_comp_avg);
-
- const __m256i round_result = highbd_convolve_rounding(
- &comp_avg_res, &offset_const, &rounding_const, rounding_shift);
-
- const __m256i res_16b =
- _mm256_packus_epi32(round_result, round_result);
- const __m256i res_clip = _mm256_min_epi16(res_16b, clip_pixel_to_bd);
-
- const __m128i res_0 = _mm256_castsi256_si128(res_clip);
- const __m128i res_1 = _mm256_extracti128_si256(res_clip, 1);
-
- _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_0);
- _mm_storel_epi64(
- (__m128i *)(&dst0[i * dst_stride0 + j + dst_stride0]), res_1);
- } else {
- __m256i res_16b =
- _mm256_packus_epi32(res_unsigned_lo, res_unsigned_lo);
- const __m128i res_0 = _mm256_castsi256_si128(res_16b);
- const __m128i res_1 = _mm256_extracti128_si256(res_16b, 1);
-
- _mm_storel_epi64((__m128i *)(&dst[i * dst_stride + j]), res_0);
- _mm_storel_epi64((__m128i *)(&dst[i * dst_stride + j + dst_stride]),
- res_1);
- }
- } else {
- __m256i res2 = _mm256_unpackhi_epi32(res_even, res_odd);
- __m256i res_unsigned_hi = _mm256_add_epi32(res2, offset_const);
-
- if (do_average) {
- const __m256i data_0 = _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(&dst[i * dst_stride + j])));
- const __m256i data_1 = _mm256_castsi128_si256(_mm_loadu_si128(
- (__m128i *)(&dst[i * dst_stride + j + dst_stride])));
- const __m256i data_01 =
- _mm256_permute2x128_si256(data_0, data_1, 0x20);
-
- const __m256i data_ref_0_lo = _mm256_unpacklo_epi16(data_01, zero);
- const __m256i data_ref_0_hi = _mm256_unpackhi_epi16(data_01, zero);
-
- const __m256i comp_avg_res_lo = highbd_comp_avg(
- &data_ref_0_lo, &res_unsigned_lo, &wt0, &wt1, use_jnt_comp_avg);
- const __m256i comp_avg_res_hi = highbd_comp_avg(
- &data_ref_0_hi, &res_unsigned_hi, &wt0, &wt1, use_jnt_comp_avg);
-
- const __m256i round_result_lo = highbd_convolve_rounding(
- &comp_avg_res_lo, &offset_const, &rounding_const, rounding_shift);
- const __m256i round_result_hi = highbd_convolve_rounding(
- &comp_avg_res_hi, &offset_const, &rounding_const, rounding_shift);
-
- const __m256i res_16b =
- _mm256_packus_epi32(round_result_lo, round_result_hi);
- const __m256i res_clip = _mm256_min_epi16(res_16b, clip_pixel_to_bd);
-
- const __m128i res_0 = _mm256_castsi256_si128(res_clip);
- const __m128i res_1 = _mm256_extracti128_si256(res_clip, 1);
-
- _mm_store_si128((__m128i *)(&dst0[i * dst_stride0 + j]), res_0);
- _mm_store_si128((__m128i *)(&dst0[i * dst_stride0 + j + dst_stride0]),
- res_1);
- } else {
- __m256i res_16b =
- _mm256_packus_epi32(res_unsigned_lo, res_unsigned_hi);
- const __m128i res_0 = _mm256_castsi256_si128(res_16b);
- const __m128i res_1 = _mm256_extracti128_si256(res_16b, 1);
-
- _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_0);
- _mm_store_si128((__m128i *)(&dst[i * dst_stride + j + dst_stride]),
- res_1);
- }
- }
- }
- }
-}
-
-void av1_highbd_jnt_convolve_y_avx2(
- const uint16_t *src, int src_stride, uint16_t *dst0, int dst_stride0, int w,
- int h, const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y, const int subpel_x_q4,
- const int subpel_y_q4, ConvolveParams *conv_params, int bd) {
- CONV_BUF_TYPE *dst = conv_params->dst;
- int dst_stride = conv_params->dst_stride;
- const int fo_vert = filter_params_y->taps / 2 - 1;
- const uint16_t *const src_ptr = src - fo_vert * src_stride;
- const int bits = FILTER_BITS - conv_params->round_0;
- (void)filter_params_x;
- (void)subpel_x_q4;
-
- assert(bits >= 0);
- int i, j;
- __m256i s[8], coeffs_y[4];
- const int do_average = conv_params->do_average;
- const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg;
-
- const int w0 = conv_params->fwd_offset;
- const int w1 = conv_params->bck_offset;
- const __m256i wt0 = _mm256_set1_epi32(w0);
- const __m256i wt1 = _mm256_set1_epi32(w1);
- const __m256i round_const_y =
- _mm256_set1_epi32(((1 << conv_params->round_1) >> 1));
- const __m128i round_shift_y = _mm_cvtsi32_si128(conv_params->round_1);
- const __m128i round_shift_bits = _mm_cvtsi32_si128(bits);
-
- const int offset_0 =
- bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
- const int offset = (1 << offset_0) + (1 << (offset_0 - 1));
- const __m256i offset_const = _mm256_set1_epi32(offset);
- const int rounding_shift =
- 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
- const __m256i rounding_const = _mm256_set1_epi32((1 << rounding_shift) >> 1);
- const __m256i clip_pixel_to_bd =
- _mm256_set1_epi16(bd == 10 ? 1023 : (bd == 12 ? 4095 : 255));
- const __m256i zero = _mm256_setzero_si256();
-
- prepare_coeffs(filter_params_y, subpel_y_q4, coeffs_y);
-
- for (j = 0; j < w; j += 8) {
- const uint16_t *data = &src_ptr[j];
- /* Vertical filter */
- {
- __m256i src6;
- __m256i s01 = _mm256_permute2x128_si256(
- _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(data + 0 * src_stride))),
- _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(data + 1 * src_stride))),
- 0x20);
- __m256i s12 = _mm256_permute2x128_si256(
- _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(data + 1 * src_stride))),
- _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(data + 2 * src_stride))),
- 0x20);
- __m256i s23 = _mm256_permute2x128_si256(
- _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(data + 2 * src_stride))),
- _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(data + 3 * src_stride))),
- 0x20);
- __m256i s34 = _mm256_permute2x128_si256(
- _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(data + 3 * src_stride))),
- _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(data + 4 * src_stride))),
- 0x20);
- __m256i s45 = _mm256_permute2x128_si256(
- _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(data + 4 * src_stride))),
- _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(data + 5 * src_stride))),
- 0x20);
- src6 = _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(data + 6 * src_stride)));
- __m256i s56 = _mm256_permute2x128_si256(
- _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(data + 5 * src_stride))),
- src6, 0x20);
-
- s[0] = _mm256_unpacklo_epi16(s01, s12);
- s[1] = _mm256_unpacklo_epi16(s23, s34);
- s[2] = _mm256_unpacklo_epi16(s45, s56);
-
- s[4] = _mm256_unpackhi_epi16(s01, s12);
- s[5] = _mm256_unpackhi_epi16(s23, s34);
- s[6] = _mm256_unpackhi_epi16(s45, s56);
-
- for (i = 0; i < h; i += 2) {
- data = &src_ptr[i * src_stride + j];
-
- const __m256i s67 = _mm256_permute2x128_si256(
- src6,
- _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(data + 7 * src_stride))),
- 0x20);
-
- src6 = _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(data + 8 * src_stride)));
-
- const __m256i s78 = _mm256_permute2x128_si256(
- _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(data + 7 * src_stride))),
- src6, 0x20);
-
- s[3] = _mm256_unpacklo_epi16(s67, s78);
- s[7] = _mm256_unpackhi_epi16(s67, s78);
-
- const __m256i res_a = convolve(s, coeffs_y);
-
- __m256i res_a_round = _mm256_sll_epi32(res_a, round_shift_bits);
- res_a_round = _mm256_sra_epi32(
- _mm256_add_epi32(res_a_round, round_const_y), round_shift_y);
-
- __m256i res_unsigned_lo = _mm256_add_epi32(res_a_round, offset_const);
-
- if (w - j < 8) {
- if (do_average) {
- const __m256i data_0 = _mm256_castsi128_si256(
- _mm_loadl_epi64((__m128i *)(&dst[i * dst_stride + j])));
- const __m256i data_1 = _mm256_castsi128_si256(_mm_loadl_epi64(
- (__m128i *)(&dst[i * dst_stride + j + dst_stride])));
- const __m256i data_01 =
- _mm256_permute2x128_si256(data_0, data_1, 0x20);
-
- const __m256i data_ref_0 = _mm256_unpacklo_epi16(data_01, zero);
-
- const __m256i comp_avg_res = highbd_comp_avg(
- &data_ref_0, &res_unsigned_lo, &wt0, &wt1, use_jnt_comp_avg);
-
- const __m256i round_result = highbd_convolve_rounding(
- &comp_avg_res, &offset_const, &rounding_const, rounding_shift);
-
- const __m256i res_16b =
- _mm256_packus_epi32(round_result, round_result);
- const __m256i res_clip =
- _mm256_min_epi16(res_16b, clip_pixel_to_bd);
-
- const __m128i res_0 = _mm256_castsi256_si128(res_clip);
- const __m128i res_1 = _mm256_extracti128_si256(res_clip, 1);
-
- _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_0);
- _mm_storel_epi64(
- (__m128i *)(&dst0[i * dst_stride0 + j + dst_stride0]), res_1);
- } else {
- __m256i res_16b =
- _mm256_packus_epi32(res_unsigned_lo, res_unsigned_lo);
- const __m128i res_0 = _mm256_castsi256_si128(res_16b);
- const __m128i res_1 = _mm256_extracti128_si256(res_16b, 1);
-
- _mm_storel_epi64((__m128i *)(&dst[i * dst_stride + j]), res_0);
- _mm_storel_epi64((__m128i *)(&dst[i * dst_stride + j + dst_stride]),
- res_1);
- }
- } else {
- const __m256i res_b = convolve(s + 4, coeffs_y);
- __m256i res_b_round = _mm256_sll_epi32(res_b, round_shift_bits);
- res_b_round = _mm256_sra_epi32(
- _mm256_add_epi32(res_b_round, round_const_y), round_shift_y);
-
- __m256i res_unsigned_hi = _mm256_add_epi32(res_b_round, offset_const);
-
- if (do_average) {
- const __m256i data_0 = _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(&dst[i * dst_stride + j])));
- const __m256i data_1 = _mm256_castsi128_si256(_mm_loadu_si128(
- (__m128i *)(&dst[i * dst_stride + j + dst_stride])));
- const __m256i data_01 =
- _mm256_permute2x128_si256(data_0, data_1, 0x20);
-
- const __m256i data_ref_0_lo = _mm256_unpacklo_epi16(data_01, zero);
- const __m256i data_ref_0_hi = _mm256_unpackhi_epi16(data_01, zero);
-
- const __m256i comp_avg_res_lo = highbd_comp_avg(
- &data_ref_0_lo, &res_unsigned_lo, &wt0, &wt1, use_jnt_comp_avg);
- const __m256i comp_avg_res_hi = highbd_comp_avg(
- &data_ref_0_hi, &res_unsigned_hi, &wt0, &wt1, use_jnt_comp_avg);
-
- const __m256i round_result_lo =
- highbd_convolve_rounding(&comp_avg_res_lo, &offset_const,
- &rounding_const, rounding_shift);
- const __m256i round_result_hi =
- highbd_convolve_rounding(&comp_avg_res_hi, &offset_const,
- &rounding_const, rounding_shift);
-
- const __m256i res_16b =
- _mm256_packus_epi32(round_result_lo, round_result_hi);
- const __m256i res_clip =
- _mm256_min_epi16(res_16b, clip_pixel_to_bd);
-
- const __m128i res_0 = _mm256_castsi256_si128(res_clip);
- const __m128i res_1 = _mm256_extracti128_si256(res_clip, 1);
-
- _mm_store_si128((__m128i *)(&dst0[i * dst_stride0 + j]), res_0);
- _mm_store_si128(
- (__m128i *)(&dst0[i * dst_stride0 + j + dst_stride0]), res_1);
- } else {
- __m256i res_16b =
- _mm256_packus_epi32(res_unsigned_lo, res_unsigned_hi);
- const __m128i res_0 = _mm256_castsi256_si128(res_16b);
- const __m128i res_1 = _mm256_extracti128_si256(res_16b, 1);
-
- _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_0);
- _mm_store_si128((__m128i *)(&dst[i * dst_stride + j + dst_stride]),
- res_1);
- }
- }
- s[0] = s[1];
- s[1] = s[2];
- s[2] = s[3];
-
- s[4] = s[5];
- s[5] = s[6];
- s[6] = s[7];
- }
- }
- }
-}
diff --git a/third_party/aom/av1/common/x86/highbd_jnt_convolve_sse4.c b/third_party/aom/av1/common/x86/highbd_jnt_convolve_sse4.c
deleted file mode 100644
index 1a29985b5..000000000
--- a/third_party/aom/av1/common/x86/highbd_jnt_convolve_sse4.c
+++ /dev/null
@@ -1,383 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <smmintrin.h>
-#include <assert.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/x86/convolve_sse2.h"
-#include "aom_dsp/x86/convolve_sse4_1.h"
-
-void av1_highbd_jnt_convolve_y_sse4_1(
- const uint16_t *src, int src_stride, uint16_t *dst0, int dst_stride0, int w,
- int h, const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y, const int subpel_x_q4,
- const int subpel_y_q4, ConvolveParams *conv_params, int bd) {
- CONV_BUF_TYPE *dst = conv_params->dst;
- int dst_stride = conv_params->dst_stride;
- const int fo_vert = filter_params_y->taps / 2 - 1;
- const uint16_t *const src_ptr = src - fo_vert * src_stride;
- const int bits = FILTER_BITS - conv_params->round_0;
- (void)filter_params_x;
- (void)subpel_x_q4;
-
- assert(bits >= 0);
- int i, j;
- const int do_average = conv_params->do_average;
- const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg;
-
- const int w0 = conv_params->fwd_offset;
- const int w1 = conv_params->bck_offset;
- const __m128i wt0 = _mm_set1_epi32(w0);
- const __m128i wt1 = _mm_set1_epi32(w1);
- const __m128i round_const_y =
- _mm_set1_epi32(((1 << conv_params->round_1) >> 1));
- const __m128i round_shift_y = _mm_cvtsi32_si128(conv_params->round_1);
- const __m128i round_shift_bits = _mm_cvtsi32_si128(bits);
-
- const int offset_0 =
- bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
- const int offset = (1 << offset_0) + (1 << (offset_0 - 1));
- const __m128i offset_const = _mm_set1_epi32(offset);
- const int rounding_shift =
- 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
- const __m128i rounding_const = _mm_set1_epi32((1 << rounding_shift) >> 1);
- const __m128i clip_pixel_to_bd =
- _mm_set1_epi16(bd == 10 ? 1023 : (bd == 12 ? 4095 : 255));
- const __m128i zero = _mm_setzero_si128();
- __m128i s[16], coeffs_y[4];
-
- prepare_coeffs(filter_params_y, subpel_y_q4, coeffs_y);
-
- for (j = 0; j < w; j += 8) {
- const uint16_t *data = &src_ptr[j];
- /* Vertical filter */
- {
- __m128i s0 = _mm_loadu_si128((__m128i *)(data + 0 * src_stride));
- __m128i s1 = _mm_loadu_si128((__m128i *)(data + 1 * src_stride));
- __m128i s2 = _mm_loadu_si128((__m128i *)(data + 2 * src_stride));
- __m128i s3 = _mm_loadu_si128((__m128i *)(data + 3 * src_stride));
- __m128i s4 = _mm_loadu_si128((__m128i *)(data + 4 * src_stride));
- __m128i s5 = _mm_loadu_si128((__m128i *)(data + 5 * src_stride));
- __m128i s6 = _mm_loadu_si128((__m128i *)(data + 6 * src_stride));
-
- s[0] = _mm_unpacklo_epi16(s0, s1);
- s[1] = _mm_unpacklo_epi16(s2, s3);
- s[2] = _mm_unpacklo_epi16(s4, s5);
-
- s[4] = _mm_unpackhi_epi16(s0, s1);
- s[5] = _mm_unpackhi_epi16(s2, s3);
- s[6] = _mm_unpackhi_epi16(s4, s5);
-
- s[0 + 8] = _mm_unpacklo_epi16(s1, s2);
- s[1 + 8] = _mm_unpacklo_epi16(s3, s4);
- s[2 + 8] = _mm_unpacklo_epi16(s5, s6);
-
- s[4 + 8] = _mm_unpackhi_epi16(s1, s2);
- s[5 + 8] = _mm_unpackhi_epi16(s3, s4);
- s[6 + 8] = _mm_unpackhi_epi16(s5, s6);
-
- for (i = 0; i < h; i += 2) {
- data = &src_ptr[i * src_stride + j];
-
- __m128i s7 = _mm_loadu_si128((__m128i *)(data + 7 * src_stride));
- __m128i s8 = _mm_loadu_si128((__m128i *)(data + 8 * src_stride));
-
- s[3] = _mm_unpacklo_epi16(s6, s7);
- s[7] = _mm_unpackhi_epi16(s6, s7);
-
- s[3 + 8] = _mm_unpacklo_epi16(s7, s8);
- s[7 + 8] = _mm_unpackhi_epi16(s7, s8);
-
- const __m128i res_a0 = convolve(s, coeffs_y);
- __m128i res_a_round0 = _mm_sll_epi32(res_a0, round_shift_bits);
- res_a_round0 = _mm_sra_epi32(_mm_add_epi32(res_a_round0, round_const_y),
- round_shift_y);
-
- const __m128i res_a1 = convolve(s + 8, coeffs_y);
- __m128i res_a_round1 = _mm_sll_epi32(res_a1, round_shift_bits);
- res_a_round1 = _mm_sra_epi32(_mm_add_epi32(res_a_round1, round_const_y),
- round_shift_y);
-
- __m128i res_unsigned_lo_0 = _mm_add_epi32(res_a_round0, offset_const);
- __m128i res_unsigned_lo_1 = _mm_add_epi32(res_a_round1, offset_const);
-
- if (w - j < 8) {
- if (do_average) {
- const __m128i data_0 =
- _mm_loadl_epi64((__m128i *)(&dst[i * dst_stride + j]));
- const __m128i data_1 = _mm_loadl_epi64(
- (__m128i *)(&dst[i * dst_stride + j + dst_stride]));
-
- const __m128i data_ref_0 = _mm_unpacklo_epi16(data_0, zero);
- const __m128i data_ref_1 = _mm_unpacklo_epi16(data_1, zero);
-
- const __m128i comp_avg_res_0 = highbd_comp_avg_sse4_1(
- &data_ref_0, &res_unsigned_lo_0, &wt0, &wt1, use_jnt_comp_avg);
- const __m128i comp_avg_res_1 = highbd_comp_avg_sse4_1(
- &data_ref_1, &res_unsigned_lo_1, &wt0, &wt1, use_jnt_comp_avg);
-
- const __m128i round_result_0 =
- highbd_convolve_rounding_sse2(&comp_avg_res_0, &offset_const,
- &rounding_const, rounding_shift);
- const __m128i round_result_1 =
- highbd_convolve_rounding_sse2(&comp_avg_res_1, &offset_const,
- &rounding_const, rounding_shift);
-
- const __m128i res_16b_0 =
- _mm_packus_epi32(round_result_0, round_result_0);
- const __m128i res_clip_0 =
- _mm_min_epi16(res_16b_0, clip_pixel_to_bd);
- const __m128i res_16b_1 =
- _mm_packus_epi32(round_result_1, round_result_1);
- const __m128i res_clip_1 =
- _mm_min_epi16(res_16b_1, clip_pixel_to_bd);
-
- _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]),
- res_clip_0);
- _mm_storel_epi64(
- (__m128i *)(&dst0[i * dst_stride0 + j + dst_stride0]),
- res_clip_1);
-
- } else {
- __m128i res_16b_0 =
- _mm_packus_epi32(res_unsigned_lo_0, res_unsigned_lo_0);
-
- __m128i res_16b_1 =
- _mm_packus_epi32(res_unsigned_lo_1, res_unsigned_lo_1);
-
- _mm_storel_epi64((__m128i *)&dst[i * dst_stride + j], res_16b_0);
- _mm_storel_epi64((__m128i *)&dst[i * dst_stride + j + dst_stride],
- res_16b_1);
- }
- } else {
- const __m128i res_b0 = convolve(s + 4, coeffs_y);
- __m128i res_b_round0 = _mm_sll_epi32(res_b0, round_shift_bits);
- res_b_round0 = _mm_sra_epi32(
- _mm_add_epi32(res_b_round0, round_const_y), round_shift_y);
-
- const __m128i res_b1 = convolve(s + 4 + 8, coeffs_y);
- __m128i res_b_round1 = _mm_sll_epi32(res_b1, round_shift_bits);
- res_b_round1 = _mm_sra_epi32(
- _mm_add_epi32(res_b_round1, round_const_y), round_shift_y);
-
- __m128i res_unsigned_hi_0 = _mm_add_epi32(res_b_round0, offset_const);
- __m128i res_unsigned_hi_1 = _mm_add_epi32(res_b_round1, offset_const);
-
- if (do_average) {
- const __m128i data_0 =
- _mm_loadu_si128((__m128i *)(&dst[i * dst_stride + j]));
- const __m128i data_1 = _mm_loadu_si128(
- (__m128i *)(&dst[i * dst_stride + j + dst_stride]));
- const __m128i data_ref_0_lo_0 = _mm_unpacklo_epi16(data_0, zero);
- const __m128i data_ref_0_lo_1 = _mm_unpacklo_epi16(data_1, zero);
-
- const __m128i data_ref_0_hi_0 = _mm_unpackhi_epi16(data_0, zero);
- const __m128i data_ref_0_hi_1 = _mm_unpackhi_epi16(data_1, zero);
-
- const __m128i comp_avg_res_lo_0 =
- highbd_comp_avg_sse4_1(&data_ref_0_lo_0, &res_unsigned_lo_0,
- &wt0, &wt1, use_jnt_comp_avg);
- const __m128i comp_avg_res_lo_1 =
- highbd_comp_avg_sse4_1(&data_ref_0_lo_1, &res_unsigned_lo_1,
- &wt0, &wt1, use_jnt_comp_avg);
- const __m128i comp_avg_res_hi_0 =
- highbd_comp_avg_sse4_1(&data_ref_0_hi_0, &res_unsigned_hi_0,
- &wt0, &wt1, use_jnt_comp_avg);
- const __m128i comp_avg_res_hi_1 =
- highbd_comp_avg_sse4_1(&data_ref_0_hi_1, &res_unsigned_hi_1,
- &wt0, &wt1, use_jnt_comp_avg);
-
- const __m128i round_result_lo_0 =
- highbd_convolve_rounding_sse2(&comp_avg_res_lo_0, &offset_const,
- &rounding_const, rounding_shift);
- const __m128i round_result_lo_1 =
- highbd_convolve_rounding_sse2(&comp_avg_res_lo_1, &offset_const,
- &rounding_const, rounding_shift);
- const __m128i round_result_hi_0 =
- highbd_convolve_rounding_sse2(&comp_avg_res_hi_0, &offset_const,
- &rounding_const, rounding_shift);
- const __m128i round_result_hi_1 =
- highbd_convolve_rounding_sse2(&comp_avg_res_hi_1, &offset_const,
- &rounding_const, rounding_shift);
-
- const __m128i res_16b_0 =
- _mm_packus_epi32(round_result_lo_0, round_result_hi_0);
- const __m128i res_clip_0 =
- _mm_min_epi16(res_16b_0, clip_pixel_to_bd);
-
- const __m128i res_16b_1 =
- _mm_packus_epi32(round_result_lo_1, round_result_hi_1);
- const __m128i res_clip_1 =
- _mm_min_epi16(res_16b_1, clip_pixel_to_bd);
-
- _mm_store_si128((__m128i *)(&dst0[i * dst_stride0 + j]),
- res_clip_0);
- _mm_store_si128(
- (__m128i *)(&dst0[i * dst_stride0 + j + dst_stride0]),
- res_clip_1);
- } else {
- __m128i res_16bit0 =
- _mm_packus_epi32(res_unsigned_lo_0, res_unsigned_hi_0);
- __m128i res_16bit1 =
- _mm_packus_epi32(res_unsigned_lo_1, res_unsigned_hi_1);
- _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_16bit0);
- _mm_store_si128((__m128i *)(&dst[i * dst_stride + j + dst_stride]),
- res_16bit1);
- }
- }
- s[0] = s[1];
- s[1] = s[2];
- s[2] = s[3];
-
- s[4] = s[5];
- s[5] = s[6];
- s[6] = s[7];
-
- s[0 + 8] = s[1 + 8];
- s[1 + 8] = s[2 + 8];
- s[2 + 8] = s[3 + 8];
-
- s[4 + 8] = s[5 + 8];
- s[5 + 8] = s[6 + 8];
- s[6 + 8] = s[7 + 8];
-
- s6 = s8;
- }
- }
- }
-}
-
-void av1_highbd_jnt_convolve_x_sse4_1(
- const uint16_t *src, int src_stride, uint16_t *dst0, int dst_stride0, int w,
- int h, const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y, const int subpel_x_q4,
- const int subpel_y_q4, ConvolveParams *conv_params, int bd) {
- CONV_BUF_TYPE *dst = conv_params->dst;
- int dst_stride = conv_params->dst_stride;
- const int fo_horiz = filter_params_x->taps / 2 - 1;
- const uint16_t *const src_ptr = src - fo_horiz;
- const int bits = FILTER_BITS - conv_params->round_1;
- (void)filter_params_y;
- (void)subpel_y_q4;
-
- int i, j;
- __m128i s[4], coeffs_x[4];
-
- const int do_average = conv_params->do_average;
- const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg;
- const int w0 = conv_params->fwd_offset;
- const int w1 = conv_params->bck_offset;
- const __m128i wt0 = _mm_set1_epi32(w0);
- const __m128i wt1 = _mm_set1_epi32(w1);
- const __m128i zero = _mm_setzero_si128();
-
- const __m128i round_const_x =
- _mm_set1_epi32(((1 << conv_params->round_0) >> 1));
- const __m128i round_shift_x = _mm_cvtsi32_si128(conv_params->round_0);
- const __m128i round_shift_bits = _mm_cvtsi32_si128(bits);
-
- const int offset_0 =
- bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
- const int offset = (1 << offset_0) + (1 << (offset_0 - 1));
- const __m128i offset_const = _mm_set1_epi32(offset);
- const int rounding_shift =
- 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
- const __m128i rounding_const = _mm_set1_epi32((1 << rounding_shift) >> 1);
- const __m128i clip_pixel_to_bd =
- _mm_set1_epi16(bd == 10 ? 1023 : (bd == 12 ? 4095 : 255));
-
- assert(bits >= 0);
- prepare_coeffs(filter_params_x, subpel_x_q4, coeffs_x);
-
- for (j = 0; j < w; j += 8) {
- /* Horizontal filter */
- for (i = 0; i < h; i += 1) {
- const __m128i row00 =
- _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j]);
- const __m128i row01 =
- _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + (j + 8)]);
-
- // even pixels
- s[0] = _mm_alignr_epi8(row01, row00, 0);
- s[1] = _mm_alignr_epi8(row01, row00, 4);
- s[2] = _mm_alignr_epi8(row01, row00, 8);
- s[3] = _mm_alignr_epi8(row01, row00, 12);
-
- __m128i res_even = convolve(s, coeffs_x);
- res_even =
- _mm_sra_epi32(_mm_add_epi32(res_even, round_const_x), round_shift_x);
-
- // odd pixels
- s[0] = _mm_alignr_epi8(row01, row00, 2);
- s[1] = _mm_alignr_epi8(row01, row00, 6);
- s[2] = _mm_alignr_epi8(row01, row00, 10);
- s[3] = _mm_alignr_epi8(row01, row00, 14);
-
- __m128i res_odd = convolve(s, coeffs_x);
- res_odd =
- _mm_sra_epi32(_mm_add_epi32(res_odd, round_const_x), round_shift_x);
-
- res_even = _mm_sll_epi32(res_even, round_shift_bits);
- res_odd = _mm_sll_epi32(res_odd, round_shift_bits);
-
- __m128i res1 = _mm_unpacklo_epi32(res_even, res_odd);
- __m128i res_unsigned_lo = _mm_add_epi32(res1, offset_const);
- if (w - j < 8) {
- if (do_average) {
- const __m128i data_0 =
- _mm_loadl_epi64((__m128i *)(&dst[i * dst_stride + j]));
- const __m128i data_ref_0 = _mm_unpacklo_epi16(data_0, zero);
-
- const __m128i comp_avg_res = highbd_comp_avg_sse4_1(
- &data_ref_0, &res_unsigned_lo, &wt0, &wt1, use_jnt_comp_avg);
- const __m128i round_result = highbd_convolve_rounding_sse2(
- &comp_avg_res, &offset_const, &rounding_const, rounding_shift);
-
- const __m128i res_16b = _mm_packus_epi32(round_result, round_result);
- const __m128i res_clip = _mm_min_epi16(res_16b, clip_pixel_to_bd);
- _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_clip);
- } else {
- __m128i res_16b = _mm_packus_epi32(res_unsigned_lo, res_unsigned_lo);
- _mm_storel_epi64((__m128i *)&dst[i * dst_stride + j], res_16b);
- }
- } else {
- __m128i res2 = _mm_unpackhi_epi32(res_even, res_odd);
- __m128i res_unsigned_hi = _mm_add_epi32(res2, offset_const);
- if (do_average) {
- const __m128i data_0 =
- _mm_loadu_si128((__m128i *)(&dst[i * dst_stride + j]));
- const __m128i data_ref_0_lo = _mm_unpacklo_epi16(data_0, zero);
- const __m128i data_ref_0_hi = _mm_unpackhi_epi16(data_0, zero);
-
- const __m128i comp_avg_res_lo = highbd_comp_avg_sse4_1(
- &data_ref_0_lo, &res_unsigned_lo, &wt0, &wt1, use_jnt_comp_avg);
- const __m128i comp_avg_res_hi = highbd_comp_avg_sse4_1(
- &data_ref_0_hi, &res_unsigned_hi, &wt0, &wt1, use_jnt_comp_avg);
-
- const __m128i round_result_lo = highbd_convolve_rounding_sse2(
- &comp_avg_res_lo, &offset_const, &rounding_const, rounding_shift);
- const __m128i round_result_hi = highbd_convolve_rounding_sse2(
- &comp_avg_res_hi, &offset_const, &rounding_const, rounding_shift);
-
- const __m128i res_16b =
- _mm_packus_epi32(round_result_lo, round_result_hi);
- const __m128i res_clip = _mm_min_epi16(res_16b, clip_pixel_to_bd);
- _mm_store_si128((__m128i *)(&dst0[i * dst_stride0 + j]), res_clip);
- } else {
- __m128i res_16b = _mm_packus_epi32(res_unsigned_lo, res_unsigned_hi);
- _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_16b);
- }
- }
- }
- }
-}
diff --git a/third_party/aom/av1/common/x86/highbd_txfm_utility_sse4.h b/third_party/aom/av1/common/x86/highbd_txfm_utility_sse4.h
deleted file mode 100644
index 6f24e5948..000000000
--- a/third_party/aom/av1/common/x86/highbd_txfm_utility_sse4.h
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_COMMON_X86_HIGHBD_TXFM_UTILITY_SSE4_H_
-#define AOM_AV1_COMMON_X86_HIGHBD_TXFM_UTILITY_SSE4_H_
-
-#include <smmintrin.h> /* SSE4.1 */
-
-#define TRANSPOSE_4X4(x0, x1, x2, x3, y0, y1, y2, y3) \
- do { \
- __m128i u0, u1, u2, u3; \
- u0 = _mm_unpacklo_epi32(x0, x1); \
- u1 = _mm_unpackhi_epi32(x0, x1); \
- u2 = _mm_unpacklo_epi32(x2, x3); \
- u3 = _mm_unpackhi_epi32(x2, x3); \
- y0 = _mm_unpacklo_epi64(u0, u2); \
- y1 = _mm_unpackhi_epi64(u0, u2); \
- y2 = _mm_unpacklo_epi64(u1, u3); \
- y3 = _mm_unpackhi_epi64(u1, u3); \
- } while (0)
-
-static INLINE void transpose_8x8(const __m128i *in, __m128i *out) {
- TRANSPOSE_4X4(in[0], in[2], in[4], in[6], out[0], out[2], out[4], out[6]);
- TRANSPOSE_4X4(in[1], in[3], in[5], in[7], out[8], out[10], out[12], out[14]);
- TRANSPOSE_4X4(in[8], in[10], in[12], in[14], out[1], out[3], out[5], out[7]);
- TRANSPOSE_4X4(in[9], in[11], in[13], in[15], out[9], out[11], out[13],
- out[15]);
-}
-
-static INLINE void transpose_16x16(const __m128i *in, __m128i *out) {
- // Upper left 8x8
- TRANSPOSE_4X4(in[0], in[4], in[8], in[12], out[0], out[4], out[8], out[12]);
- TRANSPOSE_4X4(in[1], in[5], in[9], in[13], out[16], out[20], out[24],
- out[28]);
- TRANSPOSE_4X4(in[16], in[20], in[24], in[28], out[1], out[5], out[9],
- out[13]);
- TRANSPOSE_4X4(in[17], in[21], in[25], in[29], out[17], out[21], out[25],
- out[29]);
-
- // Upper right 8x8
- TRANSPOSE_4X4(in[2], in[6], in[10], in[14], out[32], out[36], out[40],
- out[44]);
- TRANSPOSE_4X4(in[3], in[7], in[11], in[15], out[48], out[52], out[56],
- out[60]);
- TRANSPOSE_4X4(in[18], in[22], in[26], in[30], out[33], out[37], out[41],
- out[45]);
- TRANSPOSE_4X4(in[19], in[23], in[27], in[31], out[49], out[53], out[57],
- out[61]);
-
- // Lower left 8x8
- TRANSPOSE_4X4(in[32], in[36], in[40], in[44], out[2], out[6], out[10],
- out[14]);
- TRANSPOSE_4X4(in[33], in[37], in[41], in[45], out[18], out[22], out[26],
- out[30]);
- TRANSPOSE_4X4(in[48], in[52], in[56], in[60], out[3], out[7], out[11],
- out[15]);
- TRANSPOSE_4X4(in[49], in[53], in[57], in[61], out[19], out[23], out[27],
- out[31]);
- // Lower right 8x8
- TRANSPOSE_4X4(in[34], in[38], in[42], in[46], out[34], out[38], out[42],
- out[46]);
- TRANSPOSE_4X4(in[35], in[39], in[43], in[47], out[50], out[54], out[58],
- out[62]);
- TRANSPOSE_4X4(in[50], in[54], in[58], in[62], out[35], out[39], out[43],
- out[47]);
- TRANSPOSE_4X4(in[51], in[55], in[59], in[63], out[51], out[55], out[59],
- out[63]);
-}
-
-static INLINE void transpose_32x32(const __m128i *input, __m128i *output) {
- for (int j = 0; j < 8; j++) {
- for (int i = 0; i < 8; i++) {
- TRANSPOSE_4X4(input[i * 32 + j + 0], input[i * 32 + j + 8],
- input[i * 32 + j + 16], input[i * 32 + j + 24],
- output[j * 32 + i + 0], output[j * 32 + i + 8],
- output[j * 32 + i + 16], output[j * 32 + i + 24]);
- }
- }
-}
-
-// Note:
-// rounding = 1 << (bit - 1)
-static INLINE __m128i half_btf_sse4_1(const __m128i *w0, const __m128i *n0,
- const __m128i *w1, const __m128i *n1,
- const __m128i *rounding, int bit) {
- __m128i x, y;
-
- x = _mm_mullo_epi32(*w0, *n0);
- y = _mm_mullo_epi32(*w1, *n1);
- x = _mm_add_epi32(x, y);
- x = _mm_add_epi32(x, *rounding);
- x = _mm_srai_epi32(x, bit);
- return x;
-}
-
-static INLINE __m128i half_btf_0_sse4_1(const __m128i *w0, const __m128i *n0,
- const __m128i *rounding, int bit) {
- __m128i x;
-
- x = _mm_mullo_epi32(*w0, *n0);
- x = _mm_add_epi32(x, *rounding);
- x = _mm_srai_epi32(x, bit);
- return x;
-}
-
-typedef void (*transform_1d_sse4_1)(__m128i *in, __m128i *out, int bit,
- int do_cols, int bd, int out_shift);
-
-typedef void (*fwd_transform_1d_sse4_1)(__m128i *in, __m128i *out, int bit,
- const int num_cols);
-
-void av1_highbd_inv_txfm2d_add_universe_sse4_1(const int32_t *input,
- uint8_t *output, int stride,
- TX_TYPE tx_type, TX_SIZE tx_size,
- int eob, const int bd);
-
-#endif // AOM_AV1_COMMON_X86_HIGHBD_TXFM_UTILITY_SSE4_H_
diff --git a/third_party/aom/av1/common/x86/highbd_warp_plane_sse4.c b/third_party/aom/av1/common/x86/highbd_warp_plane_sse4.c
deleted file mode 100644
index 4bcab0564..000000000
--- a/third_party/aom/av1/common/x86/highbd_warp_plane_sse4.c
+++ /dev/null
@@ -1,624 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <smmintrin.h>
-
-#include "config/av1_rtcd.h"
-
-#include "av1/common/warped_motion.h"
-
-static const uint8_t warp_highbd_arrange_bytes[16] = {
- 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15
-};
-
-static const uint8_t highbd_shuffle_alpha0_mask0[16] = {
- 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
-};
-static const uint8_t highbd_shuffle_alpha0_mask1[16] = {
- 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7
-};
-static const uint8_t highbd_shuffle_alpha0_mask2[16] = {
- 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, 11
-};
-static const uint8_t highbd_shuffle_alpha0_mask3[16] = {
- 12, 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15
-};
-
-static INLINE void highbd_prepare_horizontal_filter_coeff(int alpha, int sx,
- __m128i *coeff) {
- // Filter even-index pixels
- const __m128i tmp_0 = _mm_loadu_si128(
- (__m128i *)(warped_filter + ((sx + 0 * alpha) >> WARPEDDIFF_PREC_BITS)));
- const __m128i tmp_2 = _mm_loadu_si128(
- (__m128i *)(warped_filter + ((sx + 2 * alpha) >> WARPEDDIFF_PREC_BITS)));
- const __m128i tmp_4 = _mm_loadu_si128(
- (__m128i *)(warped_filter + ((sx + 4 * alpha) >> WARPEDDIFF_PREC_BITS)));
- const __m128i tmp_6 = _mm_loadu_si128(
- (__m128i *)(warped_filter + ((sx + 6 * alpha) >> WARPEDDIFF_PREC_BITS)));
-
- // coeffs 0 1 0 1 2 3 2 3 for pixels 0, 2
- const __m128i tmp_8 = _mm_unpacklo_epi32(tmp_0, tmp_2);
- // coeffs 0 1 0 1 2 3 2 3 for pixels 4, 6
- const __m128i tmp_10 = _mm_unpacklo_epi32(tmp_4, tmp_6);
- // coeffs 4 5 4 5 6 7 6 7 for pixels 0, 2
- const __m128i tmp_12 = _mm_unpackhi_epi32(tmp_0, tmp_2);
- // coeffs 4 5 4 5 6 7 6 7 for pixels 4, 6
- const __m128i tmp_14 = _mm_unpackhi_epi32(tmp_4, tmp_6);
-
- // coeffs 0 1 0 1 0 1 0 1 for pixels 0, 2, 4, 6
- coeff[0] = _mm_unpacklo_epi64(tmp_8, tmp_10);
- // coeffs 2 3 2 3 2 3 2 3 for pixels 0, 2, 4, 6
- coeff[2] = _mm_unpackhi_epi64(tmp_8, tmp_10);
- // coeffs 4 5 4 5 4 5 4 5 for pixels 0, 2, 4, 6
- coeff[4] = _mm_unpacklo_epi64(tmp_12, tmp_14);
- // coeffs 6 7 6 7 6 7 6 7 for pixels 0, 2, 4, 6
- coeff[6] = _mm_unpackhi_epi64(tmp_12, tmp_14);
-
- // Filter odd-index pixels
- const __m128i tmp_1 = _mm_loadu_si128(
- (__m128i *)(warped_filter + ((sx + 1 * alpha) >> WARPEDDIFF_PREC_BITS)));
- const __m128i tmp_3 = _mm_loadu_si128(
- (__m128i *)(warped_filter + ((sx + 3 * alpha) >> WARPEDDIFF_PREC_BITS)));
- const __m128i tmp_5 = _mm_loadu_si128(
- (__m128i *)(warped_filter + ((sx + 5 * alpha) >> WARPEDDIFF_PREC_BITS)));
- const __m128i tmp_7 = _mm_loadu_si128(
- (__m128i *)(warped_filter + ((sx + 7 * alpha) >> WARPEDDIFF_PREC_BITS)));
-
- const __m128i tmp_9 = _mm_unpacklo_epi32(tmp_1, tmp_3);
- const __m128i tmp_11 = _mm_unpacklo_epi32(tmp_5, tmp_7);
- const __m128i tmp_13 = _mm_unpackhi_epi32(tmp_1, tmp_3);
- const __m128i tmp_15 = _mm_unpackhi_epi32(tmp_5, tmp_7);
-
- coeff[1] = _mm_unpacklo_epi64(tmp_9, tmp_11);
- coeff[3] = _mm_unpackhi_epi64(tmp_9, tmp_11);
- coeff[5] = _mm_unpacklo_epi64(tmp_13, tmp_15);
- coeff[7] = _mm_unpackhi_epi64(tmp_13, tmp_15);
-}
-
-static INLINE void highbd_prepare_horizontal_filter_coeff_alpha0(
- int sx, __m128i *coeff) {
- // Filter coeff
- const __m128i tmp_0 = _mm_loadu_si128(
- (__m128i *)(warped_filter + (sx >> WARPEDDIFF_PREC_BITS)));
-
- coeff[0] = _mm_shuffle_epi8(
- tmp_0, _mm_loadu_si128((__m128i *)highbd_shuffle_alpha0_mask0));
- coeff[2] = _mm_shuffle_epi8(
- tmp_0, _mm_loadu_si128((__m128i *)highbd_shuffle_alpha0_mask1));
- coeff[4] = _mm_shuffle_epi8(
- tmp_0, _mm_loadu_si128((__m128i *)highbd_shuffle_alpha0_mask2));
- coeff[6] = _mm_shuffle_epi8(
- tmp_0, _mm_loadu_si128((__m128i *)highbd_shuffle_alpha0_mask3));
-
- coeff[1] = coeff[0];
- coeff[3] = coeff[2];
- coeff[5] = coeff[4];
- coeff[7] = coeff[6];
-}
-
-static INLINE void highbd_filter_src_pixels(
- const __m128i *src, const __m128i *src2, __m128i *tmp, __m128i *coeff,
- const int offset_bits_horiz, const int reduce_bits_horiz, int k) {
- const __m128i src_1 = *src;
- const __m128i src2_1 = *src2;
-
- const __m128i round_const = _mm_set1_epi32((1 << offset_bits_horiz) +
- ((1 << reduce_bits_horiz) >> 1));
-
- const __m128i res_0 = _mm_madd_epi16(src_1, coeff[0]);
- const __m128i res_2 =
- _mm_madd_epi16(_mm_alignr_epi8(src2_1, src_1, 4), coeff[2]);
- const __m128i res_4 =
- _mm_madd_epi16(_mm_alignr_epi8(src2_1, src_1, 8), coeff[4]);
- const __m128i res_6 =
- _mm_madd_epi16(_mm_alignr_epi8(src2_1, src_1, 12), coeff[6]);
-
- __m128i res_even =
- _mm_add_epi32(_mm_add_epi32(res_0, res_4), _mm_add_epi32(res_2, res_6));
- res_even = _mm_sra_epi32(_mm_add_epi32(res_even, round_const),
- _mm_cvtsi32_si128(reduce_bits_horiz));
-
- const __m128i res_1 =
- _mm_madd_epi16(_mm_alignr_epi8(src2_1, src_1, 2), coeff[1]);
- const __m128i res_3 =
- _mm_madd_epi16(_mm_alignr_epi8(src2_1, src_1, 6), coeff[3]);
- const __m128i res_5 =
- _mm_madd_epi16(_mm_alignr_epi8(src2_1, src_1, 10), coeff[5]);
- const __m128i res_7 =
- _mm_madd_epi16(_mm_alignr_epi8(src2_1, src_1, 14), coeff[7]);
-
- __m128i res_odd =
- _mm_add_epi32(_mm_add_epi32(res_1, res_5), _mm_add_epi32(res_3, res_7));
- res_odd = _mm_sra_epi32(_mm_add_epi32(res_odd, round_const),
- _mm_cvtsi32_si128(reduce_bits_horiz));
-
- // Combine results into one register.
- // We store the columns in the order 0, 2, 4, 6, 1, 3, 5, 7
- // as this order helps with the vertical filter.
- tmp[k + 7] = _mm_packs_epi32(res_even, res_odd);
-}
-
-static INLINE void highbd_horiz_filter(const __m128i *src, const __m128i *src2,
- __m128i *tmp, int sx, int alpha, int k,
- const int offset_bits_horiz,
- const int reduce_bits_horiz) {
- __m128i coeff[8];
- highbd_prepare_horizontal_filter_coeff(alpha, sx, coeff);
- highbd_filter_src_pixels(src, src2, tmp, coeff, offset_bits_horiz,
- reduce_bits_horiz, k);
-}
-
-static INLINE void highbd_warp_horizontal_filter_alpha0_beta0(
- const uint16_t *ref, __m128i *tmp, int stride, int32_t ix4, int32_t iy4,
- int32_t sx4, int alpha, int beta, int p_height, int height, int i,
- const int offset_bits_horiz, const int reduce_bits_horiz) {
- (void)beta;
- (void)alpha;
- int k;
-
- __m128i coeff[8];
- highbd_prepare_horizontal_filter_coeff_alpha0(sx4, coeff);
-
- for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
- int iy = iy4 + k;
- if (iy < 0)
- iy = 0;
- else if (iy > height - 1)
- iy = height - 1;
-
- // Load source pixels
- const __m128i src =
- _mm_loadu_si128((__m128i *)(ref + iy * stride + ix4 - 7));
- const __m128i src2 =
- _mm_loadu_si128((__m128i *)(ref + iy * stride + ix4 + 1));
- highbd_filter_src_pixels(&src, &src2, tmp, coeff, offset_bits_horiz,
- reduce_bits_horiz, k);
- }
-}
-
-static INLINE void highbd_warp_horizontal_filter_alpha0(
- const uint16_t *ref, __m128i *tmp, int stride, int32_t ix4, int32_t iy4,
- int32_t sx4, int alpha, int beta, int p_height, int height, int i,
- const int offset_bits_horiz, const int reduce_bits_horiz) {
- (void)alpha;
- int k;
- for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
- int iy = iy4 + k;
- if (iy < 0)
- iy = 0;
- else if (iy > height - 1)
- iy = height - 1;
- int sx = sx4 + beta * (k + 4);
-
- // Load source pixels
- const __m128i src =
- _mm_loadu_si128((__m128i *)(ref + iy * stride + ix4 - 7));
- const __m128i src2 =
- _mm_loadu_si128((__m128i *)(ref + iy * stride + ix4 + 1));
-
- __m128i coeff[8];
- highbd_prepare_horizontal_filter_coeff_alpha0(sx, coeff);
- highbd_filter_src_pixels(&src, &src2, tmp, coeff, offset_bits_horiz,
- reduce_bits_horiz, k);
- }
-}
-
-static INLINE void highbd_warp_horizontal_filter_beta0(
- const uint16_t *ref, __m128i *tmp, int stride, int32_t ix4, int32_t iy4,
- int32_t sx4, int alpha, int beta, int p_height, int height, int i,
- const int offset_bits_horiz, const int reduce_bits_horiz) {
- (void)beta;
- int k;
- __m128i coeff[8];
- highbd_prepare_horizontal_filter_coeff(alpha, sx4, coeff);
-
- for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
- int iy = iy4 + k;
- if (iy < 0)
- iy = 0;
- else if (iy > height - 1)
- iy = height - 1;
-
- // Load source pixels
- const __m128i src =
- _mm_loadu_si128((__m128i *)(ref + iy * stride + ix4 - 7));
- const __m128i src2 =
- _mm_loadu_si128((__m128i *)(ref + iy * stride + ix4 + 1));
- highbd_filter_src_pixels(&src, &src2, tmp, coeff, offset_bits_horiz,
- reduce_bits_horiz, k);
- }
-}
-
-static INLINE void highbd_warp_horizontal_filter(
- const uint16_t *ref, __m128i *tmp, int stride, int32_t ix4, int32_t iy4,
- int32_t sx4, int alpha, int beta, int p_height, int height, int i,
- const int offset_bits_horiz, const int reduce_bits_horiz) {
- int k;
- for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
- int iy = iy4 + k;
- if (iy < 0)
- iy = 0;
- else if (iy > height - 1)
- iy = height - 1;
- int sx = sx4 + beta * (k + 4);
-
- // Load source pixels
- const __m128i src =
- _mm_loadu_si128((__m128i *)(ref + iy * stride + ix4 - 7));
- const __m128i src2 =
- _mm_loadu_si128((__m128i *)(ref + iy * stride + ix4 + 1));
-
- highbd_horiz_filter(&src, &src2, tmp, sx, alpha, k, offset_bits_horiz,
- reduce_bits_horiz);
- }
-}
-
-static INLINE void highbd_prepare_warp_horizontal_filter(
- const uint16_t *ref, __m128i *tmp, int stride, int32_t ix4, int32_t iy4,
- int32_t sx4, int alpha, int beta, int p_height, int height, int i,
- const int offset_bits_horiz, const int reduce_bits_horiz) {
- if (alpha == 0 && beta == 0)
- highbd_warp_horizontal_filter_alpha0_beta0(
- ref, tmp, stride, ix4, iy4, sx4, alpha, beta, p_height, height, i,
- offset_bits_horiz, reduce_bits_horiz);
-
- else if (alpha == 0 && beta != 0)
- highbd_warp_horizontal_filter_alpha0(ref, tmp, stride, ix4, iy4, sx4, alpha,
- beta, p_height, height, i,
- offset_bits_horiz, reduce_bits_horiz);
-
- else if (alpha != 0 && beta == 0)
- highbd_warp_horizontal_filter_beta0(ref, tmp, stride, ix4, iy4, sx4, alpha,
- beta, p_height, height, i,
- offset_bits_horiz, reduce_bits_horiz);
- else
- highbd_warp_horizontal_filter(ref, tmp, stride, ix4, iy4, sx4, alpha, beta,
- p_height, height, i, offset_bits_horiz,
- reduce_bits_horiz);
-}
-
-void av1_highbd_warp_affine_sse4_1(const int32_t *mat, const uint16_t *ref,
- int width, int height, int stride,
- uint16_t *pred, int p_col, int p_row,
- int p_width, int p_height, int p_stride,
- int subsampling_x, int subsampling_y, int bd,
- ConvolveParams *conv_params, int16_t alpha,
- int16_t beta, int16_t gamma, int16_t delta) {
- __m128i tmp[15];
- int i, j, k;
- const int reduce_bits_horiz =
- conv_params->round_0 +
- AOMMAX(bd + FILTER_BITS - conv_params->round_0 - 14, 0);
- const int reduce_bits_vert = conv_params->is_compound
- ? conv_params->round_1
- : 2 * FILTER_BITS - reduce_bits_horiz;
- const int offset_bits_horiz = bd + FILTER_BITS - 1;
- assert(IMPLIES(conv_params->is_compound, conv_params->dst != NULL));
- assert(!(bd == 12 && reduce_bits_horiz < 5));
- assert(IMPLIES(conv_params->do_average, conv_params->is_compound));
-
- const int offset_bits_vert = bd + 2 * FILTER_BITS - reduce_bits_horiz;
- const __m128i clip_pixel =
- _mm_set1_epi16(bd == 10 ? 1023 : (bd == 12 ? 4095 : 255));
- const __m128i reduce_bits_vert_shift = _mm_cvtsi32_si128(reduce_bits_vert);
- const __m128i reduce_bits_vert_const =
- _mm_set1_epi32(((1 << reduce_bits_vert) >> 1));
- const __m128i res_add_const = _mm_set1_epi32(1 << offset_bits_vert);
- const int round_bits =
- 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
- const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
- const __m128i res_sub_const =
- _mm_set1_epi32(-(1 << (offset_bits - conv_params->round_1)) -
- (1 << (offset_bits - conv_params->round_1 - 1)));
- __m128i round_bits_shift = _mm_cvtsi32_si128(round_bits);
- __m128i round_bits_const = _mm_set1_epi32(((1 << round_bits) >> 1));
-
- const int w0 = conv_params->fwd_offset;
- const int w1 = conv_params->bck_offset;
- const __m128i wt0 = _mm_set1_epi32(w0);
- const __m128i wt1 = _mm_set1_epi32(w1);
-
- /* Note: For this code to work, the left/right frame borders need to be
- extended by at least 13 pixels each. By the time we get here, other
- code will have set up this border, but we allow an explicit check
- for debugging purposes.
- */
- /*for (i = 0; i < height; ++i) {
- for (j = 0; j < 13; ++j) {
- assert(ref[i * stride - 13 + j] == ref[i * stride]);
- assert(ref[i * stride + width + j] == ref[i * stride + (width - 1)]);
- }
- }*/
-
- for (i = 0; i < p_height; i += 8) {
- for (j = 0; j < p_width; j += 8) {
- const int32_t src_x = (p_col + j + 4) << subsampling_x;
- const int32_t src_y = (p_row + i + 4) << subsampling_y;
- const int32_t dst_x = mat[2] * src_x + mat[3] * src_y + mat[0];
- const int32_t dst_y = mat[4] * src_x + mat[5] * src_y + mat[1];
- const int32_t x4 = dst_x >> subsampling_x;
- const int32_t y4 = dst_y >> subsampling_y;
-
- int32_t ix4 = x4 >> WARPEDMODEL_PREC_BITS;
- int32_t sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
- int32_t iy4 = y4 >> WARPEDMODEL_PREC_BITS;
- int32_t sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
-
- // Add in all the constant terms, including rounding and offset
- sx4 += alpha * (-4) + beta * (-4) + (1 << (WARPEDDIFF_PREC_BITS - 1)) +
- (WARPEDPIXEL_PREC_SHIFTS << WARPEDDIFF_PREC_BITS);
- sy4 += gamma * (-4) + delta * (-4) + (1 << (WARPEDDIFF_PREC_BITS - 1)) +
- (WARPEDPIXEL_PREC_SHIFTS << WARPEDDIFF_PREC_BITS);
-
- sx4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1);
- sy4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1);
-
- // Horizontal filter
- // If the block is aligned such that, after clamping, every sample
- // would be taken from the leftmost/rightmost column, then we can
- // skip the expensive horizontal filter.
- if (ix4 <= -7) {
- for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
- int iy = iy4 + k;
- if (iy < 0)
- iy = 0;
- else if (iy > height - 1)
- iy = height - 1;
- tmp[k + 7] = _mm_set1_epi16(
- (1 << (bd + FILTER_BITS - reduce_bits_horiz - 1)) +
- ref[iy * stride] * (1 << (FILTER_BITS - reduce_bits_horiz)));
- }
- } else if (ix4 >= width + 6) {
- for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
- int iy = iy4 + k;
- if (iy < 0)
- iy = 0;
- else if (iy > height - 1)
- iy = height - 1;
- tmp[k + 7] =
- _mm_set1_epi16((1 << (bd + FILTER_BITS - reduce_bits_horiz - 1)) +
- ref[iy * stride + (width - 1)] *
- (1 << (FILTER_BITS - reduce_bits_horiz)));
- }
- } else if (((ix4 - 7) < 0) || ((ix4 + 9) > width)) {
- const int out_of_boundary_left = -(ix4 - 6);
- const int out_of_boundary_right = (ix4 + 8) - width;
-
- for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
- int iy = iy4 + k;
- if (iy < 0)
- iy = 0;
- else if (iy > height - 1)
- iy = height - 1;
- int sx = sx4 + beta * (k + 4);
-
- // Load source pixels
- const __m128i src =
- _mm_loadu_si128((__m128i *)(ref + iy * stride + ix4 - 7));
- const __m128i src2 =
- _mm_loadu_si128((__m128i *)(ref + iy * stride + ix4 + 1));
-
- const __m128i src_01 = _mm_shuffle_epi8(
- src, _mm_loadu_si128((__m128i *)warp_highbd_arrange_bytes));
- const __m128i src2_01 = _mm_shuffle_epi8(
- src2, _mm_loadu_si128((__m128i *)warp_highbd_arrange_bytes));
-
- __m128i src_lo = _mm_unpacklo_epi64(src_01, src2_01);
- __m128i src_hi = _mm_unpackhi_epi64(src_01, src2_01);
-
- if (out_of_boundary_left >= 0) {
- const __m128i shuffle_reg_left =
- _mm_loadu_si128((__m128i *)warp_pad_left[out_of_boundary_left]);
- src_lo = _mm_shuffle_epi8(src_lo, shuffle_reg_left);
- src_hi = _mm_shuffle_epi8(src_hi, shuffle_reg_left);
- }
-
- if (out_of_boundary_right >= 0) {
- const __m128i shuffle_reg_right = _mm_loadu_si128(
- (__m128i *)warp_pad_right[out_of_boundary_right]);
- src_lo = _mm_shuffle_epi8(src_lo, shuffle_reg_right);
- src_hi = _mm_shuffle_epi8(src_hi, shuffle_reg_right);
- }
-
- const __m128i src_padded = _mm_unpacklo_epi8(src_lo, src_hi);
- const __m128i src2_padded = _mm_unpackhi_epi8(src_lo, src_hi);
-
- highbd_horiz_filter(&src_padded, &src2_padded, tmp, sx, alpha, k,
- offset_bits_horiz, reduce_bits_horiz);
- }
- } else {
- highbd_prepare_warp_horizontal_filter(
- ref, tmp, stride, ix4, iy4, sx4, alpha, beta, p_height, height, i,
- offset_bits_horiz, reduce_bits_horiz);
- }
-
- // Vertical filter
- for (k = -4; k < AOMMIN(4, p_height - i - 4); ++k) {
- int sy = sy4 + delta * (k + 4);
-
- // Load from tmp and rearrange pairs of consecutive rows into the
- // column order 0 0 2 2 4 4 6 6; 1 1 3 3 5 5 7 7
- const __m128i *src = tmp + (k + 4);
- const __m128i src_0 = _mm_unpacklo_epi16(src[0], src[1]);
- const __m128i src_2 = _mm_unpacklo_epi16(src[2], src[3]);
- const __m128i src_4 = _mm_unpacklo_epi16(src[4], src[5]);
- const __m128i src_6 = _mm_unpacklo_epi16(src[6], src[7]);
-
- // Filter even-index pixels
- const __m128i tmp_0 = _mm_loadu_si128(
- (__m128i *)(warped_filter +
- ((sy + 0 * gamma) >> WARPEDDIFF_PREC_BITS)));
- const __m128i tmp_2 = _mm_loadu_si128(
- (__m128i *)(warped_filter +
- ((sy + 2 * gamma) >> WARPEDDIFF_PREC_BITS)));
- const __m128i tmp_4 = _mm_loadu_si128(
- (__m128i *)(warped_filter +
- ((sy + 4 * gamma) >> WARPEDDIFF_PREC_BITS)));
- const __m128i tmp_6 = _mm_loadu_si128(
- (__m128i *)(warped_filter +
- ((sy + 6 * gamma) >> WARPEDDIFF_PREC_BITS)));
-
- const __m128i tmp_8 = _mm_unpacklo_epi32(tmp_0, tmp_2);
- const __m128i tmp_10 = _mm_unpacklo_epi32(tmp_4, tmp_6);
- const __m128i tmp_12 = _mm_unpackhi_epi32(tmp_0, tmp_2);
- const __m128i tmp_14 = _mm_unpackhi_epi32(tmp_4, tmp_6);
-
- const __m128i coeff_0 = _mm_unpacklo_epi64(tmp_8, tmp_10);
- const __m128i coeff_2 = _mm_unpackhi_epi64(tmp_8, tmp_10);
- const __m128i coeff_4 = _mm_unpacklo_epi64(tmp_12, tmp_14);
- const __m128i coeff_6 = _mm_unpackhi_epi64(tmp_12, tmp_14);
-
- const __m128i res_0 = _mm_madd_epi16(src_0, coeff_0);
- const __m128i res_2 = _mm_madd_epi16(src_2, coeff_2);
- const __m128i res_4 = _mm_madd_epi16(src_4, coeff_4);
- const __m128i res_6 = _mm_madd_epi16(src_6, coeff_6);
-
- const __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_2),
- _mm_add_epi32(res_4, res_6));
-
- // Filter odd-index pixels
- const __m128i src_1 = _mm_unpackhi_epi16(src[0], src[1]);
- const __m128i src_3 = _mm_unpackhi_epi16(src[2], src[3]);
- const __m128i src_5 = _mm_unpackhi_epi16(src[4], src[5]);
- const __m128i src_7 = _mm_unpackhi_epi16(src[6], src[7]);
-
- const __m128i tmp_1 = _mm_loadu_si128(
- (__m128i *)(warped_filter +
- ((sy + 1 * gamma) >> WARPEDDIFF_PREC_BITS)));
- const __m128i tmp_3 = _mm_loadu_si128(
- (__m128i *)(warped_filter +
- ((sy + 3 * gamma) >> WARPEDDIFF_PREC_BITS)));
- const __m128i tmp_5 = _mm_loadu_si128(
- (__m128i *)(warped_filter +
- ((sy + 5 * gamma) >> WARPEDDIFF_PREC_BITS)));
- const __m128i tmp_7 = _mm_loadu_si128(
- (__m128i *)(warped_filter +
- ((sy + 7 * gamma) >> WARPEDDIFF_PREC_BITS)));
-
- const __m128i tmp_9 = _mm_unpacklo_epi32(tmp_1, tmp_3);
- const __m128i tmp_11 = _mm_unpacklo_epi32(tmp_5, tmp_7);
- const __m128i tmp_13 = _mm_unpackhi_epi32(tmp_1, tmp_3);
- const __m128i tmp_15 = _mm_unpackhi_epi32(tmp_5, tmp_7);
-
- const __m128i coeff_1 = _mm_unpacklo_epi64(tmp_9, tmp_11);
- const __m128i coeff_3 = _mm_unpackhi_epi64(tmp_9, tmp_11);
- const __m128i coeff_5 = _mm_unpacklo_epi64(tmp_13, tmp_15);
- const __m128i coeff_7 = _mm_unpackhi_epi64(tmp_13, tmp_15);
-
- const __m128i res_1 = _mm_madd_epi16(src_1, coeff_1);
- const __m128i res_3 = _mm_madd_epi16(src_3, coeff_3);
- const __m128i res_5 = _mm_madd_epi16(src_5, coeff_5);
- const __m128i res_7 = _mm_madd_epi16(src_7, coeff_7);
-
- const __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_3),
- _mm_add_epi32(res_5, res_7));
-
- // Rearrange pixels back into the order 0 ... 7
- __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd);
- __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd);
-
- if (conv_params->is_compound) {
- __m128i *const p =
- (__m128i *)&conv_params
- ->dst[(i + k + 4) * conv_params->dst_stride + j];
- res_lo = _mm_add_epi32(res_lo, res_add_const);
- res_lo = _mm_sra_epi32(_mm_add_epi32(res_lo, reduce_bits_vert_const),
- reduce_bits_vert_shift);
-
- if (conv_params->do_average) {
- __m128i *const dst16 = (__m128i *)&pred[(i + k + 4) * p_stride + j];
- __m128i p_32 = _mm_cvtepu16_epi32(_mm_loadl_epi64(p));
-
- if (conv_params->use_jnt_comp_avg) {
- res_lo = _mm_add_epi32(_mm_mullo_epi32(p_32, wt0),
- _mm_mullo_epi32(res_lo, wt1));
- res_lo = _mm_srai_epi32(res_lo, DIST_PRECISION_BITS);
- } else {
- res_lo = _mm_srai_epi32(_mm_add_epi32(p_32, res_lo), 1);
- }
-
- __m128i res32_lo = _mm_add_epi32(res_lo, res_sub_const);
- res32_lo = _mm_sra_epi32(_mm_add_epi32(res32_lo, round_bits_const),
- round_bits_shift);
-
- __m128i res16_lo = _mm_packus_epi32(res32_lo, res32_lo);
- res16_lo = _mm_min_epi16(res16_lo, clip_pixel);
- _mm_storel_epi64(dst16, res16_lo);
- } else {
- res_lo = _mm_packus_epi32(res_lo, res_lo);
- _mm_storel_epi64(p, res_lo);
- }
- if (p_width > 4) {
- __m128i *const p4 =
- (__m128i *)&conv_params
- ->dst[(i + k + 4) * conv_params->dst_stride + j + 4];
-
- res_hi = _mm_add_epi32(res_hi, res_add_const);
- res_hi =
- _mm_sra_epi32(_mm_add_epi32(res_hi, reduce_bits_vert_const),
- reduce_bits_vert_shift);
- if (conv_params->do_average) {
- __m128i *const dst16_4 =
- (__m128i *)&pred[(i + k + 4) * p_stride + j + 4];
- __m128i p4_32 = _mm_cvtepu16_epi32(_mm_loadl_epi64(p4));
-
- if (conv_params->use_jnt_comp_avg) {
- res_hi = _mm_add_epi32(_mm_mullo_epi32(p4_32, wt0),
- _mm_mullo_epi32(res_hi, wt1));
- res_hi = _mm_srai_epi32(res_hi, DIST_PRECISION_BITS);
- } else {
- res_hi = _mm_srai_epi32(_mm_add_epi32(p4_32, res_hi), 1);
- }
-
- __m128i res32_hi = _mm_add_epi32(res_hi, res_sub_const);
- res32_hi = _mm_sra_epi32(
- _mm_add_epi32(res32_hi, round_bits_const), round_bits_shift);
- __m128i res16_hi = _mm_packus_epi32(res32_hi, res32_hi);
- res16_hi = _mm_min_epi16(res16_hi, clip_pixel);
- _mm_storel_epi64(dst16_4, res16_hi);
- } else {
- res_hi = _mm_packus_epi32(res_hi, res_hi);
- _mm_storel_epi64(p4, res_hi);
- }
- }
- } else {
- // Round and pack into 8 bits
- const __m128i round_const =
- _mm_set1_epi32(-(1 << (bd + reduce_bits_vert - 1)) +
- ((1 << reduce_bits_vert) >> 1));
-
- const __m128i res_lo_round = _mm_srai_epi32(
- _mm_add_epi32(res_lo, round_const), reduce_bits_vert);
- const __m128i res_hi_round = _mm_srai_epi32(
- _mm_add_epi32(res_hi, round_const), reduce_bits_vert);
-
- __m128i res_16bit = _mm_packs_epi32(res_lo_round, res_hi_round);
- // Clamp res_16bit to the range [0, 2^bd - 1]
- const __m128i max_val = _mm_set1_epi16((1 << bd) - 1);
- const __m128i zero = _mm_setzero_si128();
- res_16bit = _mm_max_epi16(_mm_min_epi16(res_16bit, max_val), zero);
-
- // Store, blending with 'pred' if needed
- __m128i *const p = (__m128i *)&pred[(i + k + 4) * p_stride + j];
-
- // Note: If we're outputting a 4x4 block, we need to be very careful
- // to only output 4 pixels at this point, to avoid encode/decode
- // mismatches when encoding with multiple threads.
- if (p_width == 4) {
- _mm_storel_epi64(p, res_16bit);
- } else {
- _mm_storeu_si128(p, res_16bit);
- }
- }
- }
- }
- }
-}
diff --git a/third_party/aom/av1/common/x86/highbd_wiener_convolve_avx2.c b/third_party/aom/av1/common/x86/highbd_wiener_convolve_avx2.c
deleted file mode 100644
index 0c8a8505b..000000000
--- a/third_party/aom/av1/common/x86/highbd_wiener_convolve_avx2.c
+++ /dev/null
@@ -1,245 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <immintrin.h>
-#include <assert.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "av1/common/convolve.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/aom_filter.h"
-#include "aom_dsp/x86/synonyms.h"
-#include "aom_dsp/x86/synonyms_avx2.h"
-
-// 128-bit xmmwords are written as [ ... ] with the MSB on the left.
-// 256-bit ymmwords are written as two xmmwords, [ ... ][ ... ] with the MSB
-// on the left.
-// A row of, say, 16-bit pixels with values p0, p1, p2, ..., p14, p15 will be
-// loaded and stored as [ p15 ... p9 p8 ][ p7 ... p1 p0 ].
-void av1_highbd_wiener_convolve_add_src_avx2(
- const uint8_t *src8, ptrdiff_t src_stride, uint8_t *dst8,
- ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4, int w, int h,
- const ConvolveParams *conv_params, int bd) {
- assert(x_step_q4 == 16 && y_step_q4 == 16);
- assert(!(w & 7));
- assert(bd + FILTER_BITS - conv_params->round_0 + 2 <= 16);
- (void)x_step_q4;
- (void)y_step_q4;
-
- const uint16_t *const src = CONVERT_TO_SHORTPTR(src8);
- uint16_t *const dst = CONVERT_TO_SHORTPTR(dst8);
-
- DECLARE_ALIGNED(32, uint16_t,
- temp[(MAX_SB_SIZE + SUBPEL_TAPS - 1) * MAX_SB_SIZE]);
- int intermediate_height = h + SUBPEL_TAPS - 1;
- const int center_tap = ((SUBPEL_TAPS - 1) / 2);
- const uint16_t *const src_ptr = src - center_tap * src_stride - center_tap;
-
- const __m128i zero_128 = _mm_setzero_si128();
- const __m256i zero_256 = _mm256_setzero_si256();
-
- // Add an offset to account for the "add_src" part of the convolve function.
- const __m128i offset = _mm_insert_epi16(zero_128, 1 << FILTER_BITS, 3);
-
- const __m256i clamp_low = zero_256;
-
- /* Horizontal filter */
- {
- const __m256i clamp_high_ep =
- _mm256_set1_epi16(WIENER_CLAMP_LIMIT(conv_params->round_0, bd) - 1);
-
- // coeffs [ f7 f6 f5 f4 f3 f2 f1 f0 ]
- const __m128i coeffs_x = _mm_add_epi16(xx_loadu_128(filter_x), offset);
-
- // coeffs [ f3 f2 f3 f2 f1 f0 f1 f0 ]
- const __m128i coeffs_0123 = _mm_unpacklo_epi32(coeffs_x, coeffs_x);
- // coeffs [ f7 f6 f7 f6 f5 f4 f5 f4 ]
- const __m128i coeffs_4567 = _mm_unpackhi_epi32(coeffs_x, coeffs_x);
-
- // coeffs [ f1 f0 f1 f0 f1 f0 f1 f0 ]
- const __m128i coeffs_01_128 = _mm_unpacklo_epi64(coeffs_0123, coeffs_0123);
- // coeffs [ f3 f2 f3 f2 f3 f2 f3 f2 ]
- const __m128i coeffs_23_128 = _mm_unpackhi_epi64(coeffs_0123, coeffs_0123);
- // coeffs [ f5 f4 f5 f4 f5 f4 f5 f4 ]
- const __m128i coeffs_45_128 = _mm_unpacklo_epi64(coeffs_4567, coeffs_4567);
- // coeffs [ f7 f6 f7 f6 f7 f6 f7 f6 ]
- const __m128i coeffs_67_128 = _mm_unpackhi_epi64(coeffs_4567, coeffs_4567);
-
- // coeffs [ f1 f0 f1 f0 f1 f0 f1 f0 ][ f1 f0 f1 f0 f1 f0 f1 f0 ]
- const __m256i coeffs_01 = yy_set_m128i(coeffs_01_128, coeffs_01_128);
- // coeffs [ f3 f2 f3 f2 f3 f2 f3 f2 ][ f3 f2 f3 f2 f3 f2 f3 f2 ]
- const __m256i coeffs_23 = yy_set_m128i(coeffs_23_128, coeffs_23_128);
- // coeffs [ f5 f4 f5 f4 f5 f4 f5 f4 ][ f5 f4 f5 f4 f5 f4 f5 f4 ]
- const __m256i coeffs_45 = yy_set_m128i(coeffs_45_128, coeffs_45_128);
- // coeffs [ f7 f6 f7 f6 f7 f6 f7 f6 ][ f7 f6 f7 f6 f7 f6 f7 f6 ]
- const __m256i coeffs_67 = yy_set_m128i(coeffs_67_128, coeffs_67_128);
-
- const __m256i round_const = _mm256_set1_epi32(
- (1 << (conv_params->round_0 - 1)) + (1 << (bd + FILTER_BITS - 1)));
-
- for (int i = 0; i < intermediate_height; ++i) {
- for (int j = 0; j < w; j += 16) {
- const uint16_t *src_ij = src_ptr + i * src_stride + j;
-
- // Load 16-bit src data
- const __m256i src_0 = yy_loadu_256(src_ij + 0);
- const __m256i src_1 = yy_loadu_256(src_ij + 1);
- const __m256i src_2 = yy_loadu_256(src_ij + 2);
- const __m256i src_3 = yy_loadu_256(src_ij + 3);
- const __m256i src_4 = yy_loadu_256(src_ij + 4);
- const __m256i src_5 = yy_loadu_256(src_ij + 5);
- const __m256i src_6 = yy_loadu_256(src_ij + 6);
- const __m256i src_7 = yy_loadu_256(src_ij + 7);
-
- // Multiply src data by filter coeffs and sum pairs
- const __m256i res_0 = _mm256_madd_epi16(src_0, coeffs_01);
- const __m256i res_1 = _mm256_madd_epi16(src_1, coeffs_01);
- const __m256i res_2 = _mm256_madd_epi16(src_2, coeffs_23);
- const __m256i res_3 = _mm256_madd_epi16(src_3, coeffs_23);
- const __m256i res_4 = _mm256_madd_epi16(src_4, coeffs_45);
- const __m256i res_5 = _mm256_madd_epi16(src_5, coeffs_45);
- const __m256i res_6 = _mm256_madd_epi16(src_6, coeffs_67);
- const __m256i res_7 = _mm256_madd_epi16(src_7, coeffs_67);
-
- // Calculate scalar product for even- and odd-indices separately,
- // increasing to 32-bit precision
- const __m256i res_even_sum = _mm256_add_epi32(
- _mm256_add_epi32(res_0, res_4), _mm256_add_epi32(res_2, res_6));
- const __m256i res_even = _mm256_srai_epi32(
- _mm256_add_epi32(res_even_sum, round_const), conv_params->round_0);
-
- const __m256i res_odd_sum = _mm256_add_epi32(
- _mm256_add_epi32(res_1, res_5), _mm256_add_epi32(res_3, res_7));
- const __m256i res_odd = _mm256_srai_epi32(
- _mm256_add_epi32(res_odd_sum, round_const), conv_params->round_0);
-
- // Reduce to 16-bit precision and pack even- and odd-index results
- // back into one register. The _mm256_packs_epi32 intrinsic returns
- // a register with the pixels ordered as follows:
- // [ 15 13 11 9 14 12 10 8 ] [ 7 5 3 1 6 4 2 0 ]
- const __m256i res = _mm256_packs_epi32(res_even, res_odd);
- const __m256i res_clamped =
- _mm256_min_epi16(_mm256_max_epi16(res, clamp_low), clamp_high_ep);
-
- // Store in a temporary array
- yy_storeu_256(temp + i * MAX_SB_SIZE + j, res_clamped);
- }
- }
- }
-
- /* Vertical filter */
- {
- const __m256i clamp_high = _mm256_set1_epi16((1 << bd) - 1);
-
- // coeffs [ f7 f6 f5 f4 f3 f2 f1 f0 ]
- const __m128i coeffs_y = _mm_add_epi16(xx_loadu_128(filter_y), offset);
-
- // coeffs [ f3 f2 f3 f2 f1 f0 f1 f0 ]
- const __m128i coeffs_0123 = _mm_unpacklo_epi32(coeffs_y, coeffs_y);
- // coeffs [ f7 f6 f7 f6 f5 f4 f5 f4 ]
- const __m128i coeffs_4567 = _mm_unpackhi_epi32(coeffs_y, coeffs_y);
-
- // coeffs [ f1 f0 f1 f0 f1 f0 f1 f0 ]
- const __m128i coeffs_01_128 = _mm_unpacklo_epi64(coeffs_0123, coeffs_0123);
- // coeffs [ f3 f2 f3 f2 f3 f2 f3 f2 ]
- const __m128i coeffs_23_128 = _mm_unpackhi_epi64(coeffs_0123, coeffs_0123);
- // coeffs [ f5 f4 f5 f4 f5 f4 f5 f4 ]
- const __m128i coeffs_45_128 = _mm_unpacklo_epi64(coeffs_4567, coeffs_4567);
- // coeffs [ f7 f6 f7 f6 f7 f6 f7 f6 ]
- const __m128i coeffs_67_128 = _mm_unpackhi_epi64(coeffs_4567, coeffs_4567);
-
- // coeffs [ f1 f0 f1 f0 f1 f0 f1 f0 ][ f1 f0 f1 f0 f1 f0 f1 f0 ]
- const __m256i coeffs_01 = yy_set_m128i(coeffs_01_128, coeffs_01_128);
- // coeffs [ f3 f2 f3 f2 f3 f2 f3 f2 ][ f3 f2 f3 f2 f3 f2 f3 f2 ]
- const __m256i coeffs_23 = yy_set_m128i(coeffs_23_128, coeffs_23_128);
- // coeffs [ f5 f4 f5 f4 f5 f4 f5 f4 ][ f5 f4 f5 f4 f5 f4 f5 f4 ]
- const __m256i coeffs_45 = yy_set_m128i(coeffs_45_128, coeffs_45_128);
- // coeffs [ f7 f6 f7 f6 f7 f6 f7 f6 ][ f7 f6 f7 f6 f7 f6 f7 f6 ]
- const __m256i coeffs_67 = yy_set_m128i(coeffs_67_128, coeffs_67_128);
-
- const __m256i round_const =
- _mm256_set1_epi32((1 << (conv_params->round_1 - 1)) -
- (1 << (bd + conv_params->round_1 - 1)));
-
- for (int i = 0; i < h; ++i) {
- for (int j = 0; j < w; j += 16) {
- const uint16_t *temp_ij = temp + i * MAX_SB_SIZE + j;
-
- // Load 16-bit data from the output of the horizontal filter in
- // which the pixels are ordered as follows:
- // [ 15 13 11 9 14 12 10 8 ] [ 7 5 3 1 6 4 2 0 ]
- const __m256i data_0 = yy_loadu_256(temp_ij + 0 * MAX_SB_SIZE);
- const __m256i data_1 = yy_loadu_256(temp_ij + 1 * MAX_SB_SIZE);
- const __m256i data_2 = yy_loadu_256(temp_ij + 2 * MAX_SB_SIZE);
- const __m256i data_3 = yy_loadu_256(temp_ij + 3 * MAX_SB_SIZE);
- const __m256i data_4 = yy_loadu_256(temp_ij + 4 * MAX_SB_SIZE);
- const __m256i data_5 = yy_loadu_256(temp_ij + 5 * MAX_SB_SIZE);
- const __m256i data_6 = yy_loadu_256(temp_ij + 6 * MAX_SB_SIZE);
- const __m256i data_7 = yy_loadu_256(temp_ij + 7 * MAX_SB_SIZE);
-
- // Filter the even-indices, increasing to 32-bit precision
- const __m256i src_0 = _mm256_unpacklo_epi16(data_0, data_1);
- const __m256i src_2 = _mm256_unpacklo_epi16(data_2, data_3);
- const __m256i src_4 = _mm256_unpacklo_epi16(data_4, data_5);
- const __m256i src_6 = _mm256_unpacklo_epi16(data_6, data_7);
-
- const __m256i res_0 = _mm256_madd_epi16(src_0, coeffs_01);
- const __m256i res_2 = _mm256_madd_epi16(src_2, coeffs_23);
- const __m256i res_4 = _mm256_madd_epi16(src_4, coeffs_45);
- const __m256i res_6 = _mm256_madd_epi16(src_6, coeffs_67);
-
- const __m256i res_even = _mm256_add_epi32(
- _mm256_add_epi32(res_0, res_2), _mm256_add_epi32(res_4, res_6));
-
- // Filter the odd-indices, increasing to 32-bit precision
- const __m256i src_1 = _mm256_unpackhi_epi16(data_0, data_1);
- const __m256i src_3 = _mm256_unpackhi_epi16(data_2, data_3);
- const __m256i src_5 = _mm256_unpackhi_epi16(data_4, data_5);
- const __m256i src_7 = _mm256_unpackhi_epi16(data_6, data_7);
-
- const __m256i res_1 = _mm256_madd_epi16(src_1, coeffs_01);
- const __m256i res_3 = _mm256_madd_epi16(src_3, coeffs_23);
- const __m256i res_5 = _mm256_madd_epi16(src_5, coeffs_45);
- const __m256i res_7 = _mm256_madd_epi16(src_7, coeffs_67);
-
- const __m256i res_odd = _mm256_add_epi32(
- _mm256_add_epi32(res_1, res_3), _mm256_add_epi32(res_5, res_7));
-
- // Pixels are currently in the following order:
- // res_even order: [ 14 12 10 8 ] [ 6 4 2 0 ]
- // res_odd order: [ 15 13 11 9 ] [ 7 5 3 1 ]
- //
- // Rearrange the pixels into the following order:
- // res_lo order: [ 11 10 9 8 ] [ 3 2 1 0 ]
- // res_hi order: [ 15 14 13 12 ] [ 7 6 5 4 ]
- const __m256i res_lo = _mm256_unpacklo_epi32(res_even, res_odd);
- const __m256i res_hi = _mm256_unpackhi_epi32(res_even, res_odd);
-
- const __m256i res_lo_round = _mm256_srai_epi32(
- _mm256_add_epi32(res_lo, round_const), conv_params->round_1);
- const __m256i res_hi_round = _mm256_srai_epi32(
- _mm256_add_epi32(res_hi, round_const), conv_params->round_1);
-
- // Reduce to 16-bit precision and pack into the correct order:
- // [ 15 14 13 12 11 10 9 8 ][ 7 6 5 4 3 2 1 0 ]
- const __m256i res_16bit =
- _mm256_packs_epi32(res_lo_round, res_hi_round);
- const __m256i res_16bit_clamped = _mm256_min_epi16(
- _mm256_max_epi16(res_16bit, clamp_low), clamp_high);
-
- // Store in the dst array
- yy_storeu_256(dst + i * dst_stride + j, res_16bit_clamped);
- }
- }
- }
-}
diff --git a/third_party/aom/av1/common/x86/highbd_wiener_convolve_ssse3.c b/third_party/aom/av1/common/x86/highbd_wiener_convolve_ssse3.c
deleted file mode 100644
index 818b1099c..000000000
--- a/third_party/aom/av1/common/x86/highbd_wiener_convolve_ssse3.c
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <tmmintrin.h>
-#include <assert.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "av1/common/convolve.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/aom_filter.h"
-
-void av1_highbd_wiener_convolve_add_src_ssse3(
- const uint8_t *src8, ptrdiff_t src_stride, uint8_t *dst8,
- ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4, int w, int h,
- const ConvolveParams *conv_params, int bd) {
- assert(x_step_q4 == 16 && y_step_q4 == 16);
- assert(!(w & 7));
- assert(bd + FILTER_BITS - conv_params->round_0 + 2 <= 16);
- (void)x_step_q4;
- (void)y_step_q4;
-
- const uint16_t *const src = CONVERT_TO_SHORTPTR(src8);
- uint16_t *const dst = CONVERT_TO_SHORTPTR(dst8);
-
- DECLARE_ALIGNED(16, uint16_t,
- temp[(MAX_SB_SIZE + SUBPEL_TAPS - 1) * MAX_SB_SIZE]);
- int intermediate_height = h + SUBPEL_TAPS - 1;
- int i, j;
- const int center_tap = ((SUBPEL_TAPS - 1) / 2);
- const uint16_t *const src_ptr = src - center_tap * src_stride - center_tap;
-
- const __m128i zero = _mm_setzero_si128();
- // Add an offset to account for the "add_src" part of the convolve function.
- const __m128i offset = _mm_insert_epi16(zero, 1 << FILTER_BITS, 3);
-
- /* Horizontal filter */
- {
- const __m128i coeffs_x =
- _mm_add_epi16(_mm_loadu_si128((__m128i *)filter_x), offset);
-
- // coeffs 0 1 0 1 2 3 2 3
- const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_x, coeffs_x);
- // coeffs 4 5 4 5 6 7 6 7
- const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_x, coeffs_x);
-
- // coeffs 0 1 0 1 0 1 0 1
- const __m128i coeff_01 = _mm_unpacklo_epi64(tmp_0, tmp_0);
- // coeffs 2 3 2 3 2 3 2 3
- const __m128i coeff_23 = _mm_unpackhi_epi64(tmp_0, tmp_0);
- // coeffs 4 5 4 5 4 5 4 5
- const __m128i coeff_45 = _mm_unpacklo_epi64(tmp_1, tmp_1);
- // coeffs 6 7 6 7 6 7 6 7
- const __m128i coeff_67 = _mm_unpackhi_epi64(tmp_1, tmp_1);
-
- const __m128i round_const = _mm_set1_epi32(
- (1 << (conv_params->round_0 - 1)) + (1 << (bd + FILTER_BITS - 1)));
-
- for (i = 0; i < intermediate_height; ++i) {
- for (j = 0; j < w; j += 8) {
- const __m128i data =
- _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j]);
- const __m128i data2 =
- _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j + 8]);
-
- // Filter even-index pixels
- const __m128i res_0 = _mm_madd_epi16(data, coeff_01);
- const __m128i res_2 =
- _mm_madd_epi16(_mm_alignr_epi8(data2, data, 4), coeff_23);
- const __m128i res_4 =
- _mm_madd_epi16(_mm_alignr_epi8(data2, data, 8), coeff_45);
- const __m128i res_6 =
- _mm_madd_epi16(_mm_alignr_epi8(data2, data, 12), coeff_67);
-
- __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_4),
- _mm_add_epi32(res_2, res_6));
- res_even = _mm_srai_epi32(_mm_add_epi32(res_even, round_const),
- conv_params->round_0);
-
- // Filter odd-index pixels
- const __m128i res_1 =
- _mm_madd_epi16(_mm_alignr_epi8(data2, data, 2), coeff_01);
- const __m128i res_3 =
- _mm_madd_epi16(_mm_alignr_epi8(data2, data, 6), coeff_23);
- const __m128i res_5 =
- _mm_madd_epi16(_mm_alignr_epi8(data2, data, 10), coeff_45);
- const __m128i res_7 =
- _mm_madd_epi16(_mm_alignr_epi8(data2, data, 14), coeff_67);
-
- __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_5),
- _mm_add_epi32(res_3, res_7));
- res_odd = _mm_srai_epi32(_mm_add_epi32(res_odd, round_const),
- conv_params->round_0);
-
- // Pack in the column order 0, 2, 4, 6, 1, 3, 5, 7
- const __m128i maxval =
- _mm_set1_epi16((WIENER_CLAMP_LIMIT(conv_params->round_0, bd)) - 1);
- __m128i res = _mm_packs_epi32(res_even, res_odd);
- res = _mm_min_epi16(_mm_max_epi16(res, zero), maxval);
- _mm_storeu_si128((__m128i *)&temp[i * MAX_SB_SIZE + j], res);
- }
- }
- }
-
- /* Vertical filter */
- {
- const __m128i coeffs_y =
- _mm_add_epi16(_mm_loadu_si128((__m128i *)filter_y), offset);
-
- // coeffs 0 1 0 1 2 3 2 3
- const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_y, coeffs_y);
- // coeffs 4 5 4 5 6 7 6 7
- const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_y, coeffs_y);
-
- // coeffs 0 1 0 1 0 1 0 1
- const __m128i coeff_01 = _mm_unpacklo_epi64(tmp_0, tmp_0);
- // coeffs 2 3 2 3 2 3 2 3
- const __m128i coeff_23 = _mm_unpackhi_epi64(tmp_0, tmp_0);
- // coeffs 4 5 4 5 4 5 4 5
- const __m128i coeff_45 = _mm_unpacklo_epi64(tmp_1, tmp_1);
- // coeffs 6 7 6 7 6 7 6 7
- const __m128i coeff_67 = _mm_unpackhi_epi64(tmp_1, tmp_1);
-
- const __m128i round_const =
- _mm_set1_epi32((1 << (conv_params->round_1 - 1)) -
- (1 << (bd + conv_params->round_1 - 1)));
-
- for (i = 0; i < h; ++i) {
- for (j = 0; j < w; j += 8) {
- // Filter even-index pixels
- const uint16_t *data = &temp[i * MAX_SB_SIZE + j];
- const __m128i src_0 =
- _mm_unpacklo_epi16(*(__m128i *)(data + 0 * MAX_SB_SIZE),
- *(__m128i *)(data + 1 * MAX_SB_SIZE));
- const __m128i src_2 =
- _mm_unpacklo_epi16(*(__m128i *)(data + 2 * MAX_SB_SIZE),
- *(__m128i *)(data + 3 * MAX_SB_SIZE));
- const __m128i src_4 =
- _mm_unpacklo_epi16(*(__m128i *)(data + 4 * MAX_SB_SIZE),
- *(__m128i *)(data + 5 * MAX_SB_SIZE));
- const __m128i src_6 =
- _mm_unpacklo_epi16(*(__m128i *)(data + 6 * MAX_SB_SIZE),
- *(__m128i *)(data + 7 * MAX_SB_SIZE));
-
- const __m128i res_0 = _mm_madd_epi16(src_0, coeff_01);
- const __m128i res_2 = _mm_madd_epi16(src_2, coeff_23);
- const __m128i res_4 = _mm_madd_epi16(src_4, coeff_45);
- const __m128i res_6 = _mm_madd_epi16(src_6, coeff_67);
-
- const __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_2),
- _mm_add_epi32(res_4, res_6));
-
- // Filter odd-index pixels
- const __m128i src_1 =
- _mm_unpackhi_epi16(*(__m128i *)(data + 0 * MAX_SB_SIZE),
- *(__m128i *)(data + 1 * MAX_SB_SIZE));
- const __m128i src_3 =
- _mm_unpackhi_epi16(*(__m128i *)(data + 2 * MAX_SB_SIZE),
- *(__m128i *)(data + 3 * MAX_SB_SIZE));
- const __m128i src_5 =
- _mm_unpackhi_epi16(*(__m128i *)(data + 4 * MAX_SB_SIZE),
- *(__m128i *)(data + 5 * MAX_SB_SIZE));
- const __m128i src_7 =
- _mm_unpackhi_epi16(*(__m128i *)(data + 6 * MAX_SB_SIZE),
- *(__m128i *)(data + 7 * MAX_SB_SIZE));
-
- const __m128i res_1 = _mm_madd_epi16(src_1, coeff_01);
- const __m128i res_3 = _mm_madd_epi16(src_3, coeff_23);
- const __m128i res_5 = _mm_madd_epi16(src_5, coeff_45);
- const __m128i res_7 = _mm_madd_epi16(src_7, coeff_67);
-
- const __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_3),
- _mm_add_epi32(res_5, res_7));
-
- // Rearrange pixels back into the order 0 ... 7
- const __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd);
- const __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd);
-
- const __m128i res_lo_round = _mm_srai_epi32(
- _mm_add_epi32(res_lo, round_const), conv_params->round_1);
- const __m128i res_hi_round = _mm_srai_epi32(
- _mm_add_epi32(res_hi, round_const), conv_params->round_1);
-
- const __m128i maxval = _mm_set1_epi16((1 << bd) - 1);
- __m128i res_16bit = _mm_packs_epi32(res_lo_round, res_hi_round);
- res_16bit = _mm_min_epi16(_mm_max_epi16(res_16bit, zero), maxval);
-
- __m128i *const p = (__m128i *)&dst[i * dst_stride + j];
- _mm_storeu_si128(p, res_16bit);
- }
- }
- }
-}
diff --git a/third_party/aom/av1/common/x86/intra_edge_sse4.c b/third_party/aom/av1/common/x86/intra_edge_sse4.c
deleted file mode 100644
index 0c857b583..000000000
--- a/third_party/aom/av1/common/x86/intra_edge_sse4.c
+++ /dev/null
@@ -1,318 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <smmintrin.h>
-
-#include "config/aom_config.h"
-#include "config/av1_rtcd.h"
-
-void av1_filter_intra_edge_sse4_1(uint8_t *p, int sz, int strength) {
- if (!strength) return;
-
- DECLARE_ALIGNED(16, static const int8_t, kern[3][16]) = {
- { 4, 8, 4, 0, 4, 8, 4, 0, 4, 8, 4, 0, 4, 8, 4, 0 }, // strength 1: 4,8,4
- { 5, 6, 5, 0, 5, 6, 5, 0, 5, 6, 5, 0, 5, 6, 5, 0 }, // strength 2: 5,6,5
- { 2, 4, 4, 4, 2, 0, 0, 0, 2, 4, 4, 4, 2, 0, 0, 0 } // strength 3: 2,4,4,4,2
- };
-
- DECLARE_ALIGNED(16, static const int8_t, v_const[5][16]) = {
- { 0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6 },
- { 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10 },
- { 0, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 8 },
- { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
- };
-
- // Extend the first and last samples to simplify the loop for the 5-tap case
- p[-1] = p[0];
- __m128i last = _mm_set1_epi8(p[sz - 1]);
- _mm_storeu_si128((__m128i *)&p[sz], last);
-
- // Adjust input pointer for filter support area
- uint8_t *in = (strength == 3) ? p - 1 : p;
-
- // Avoid modifying first sample
- uint8_t *out = p + 1;
- int len = sz - 1;
-
- const int use_3tap_filter = (strength < 3);
-
- if (use_3tap_filter) {
- __m128i coef0 = _mm_lddqu_si128((__m128i const *)kern[strength - 1]);
- __m128i shuf0 = _mm_lddqu_si128((__m128i const *)v_const[0]);
- __m128i shuf1 = _mm_lddqu_si128((__m128i const *)v_const[1]);
- __m128i iden = _mm_lddqu_si128((__m128i *)v_const[3]);
- __m128i in0 = _mm_lddqu_si128((__m128i *)in);
- while (len > 0) {
- int n_out = (len < 8) ? len : 8;
- __m128i d0 = _mm_shuffle_epi8(in0, shuf0);
- __m128i d1 = _mm_shuffle_epi8(in0, shuf1);
- d0 = _mm_maddubs_epi16(d0, coef0);
- d1 = _mm_maddubs_epi16(d1, coef0);
- d0 = _mm_hadd_epi16(d0, d1);
- __m128i eight = _mm_set1_epi16(8);
- d0 = _mm_add_epi16(d0, eight);
- d0 = _mm_srai_epi16(d0, 4);
- d0 = _mm_packus_epi16(d0, d0);
- __m128i out0 = _mm_lddqu_si128((__m128i *)out);
- __m128i n0 = _mm_set1_epi8(n_out);
- __m128i mask = _mm_cmpgt_epi8(n0, iden);
- out0 = _mm_blendv_epi8(out0, d0, mask);
- _mm_storel_epi64((__m128i *)out, out0);
- __m128i in1 = _mm_lddqu_si128((__m128i *)(in + 16));
- in0 = _mm_alignr_epi8(in1, in0, 8);
- in += 8;
- out += 8;
- len -= n_out;
- }
- } else { // 5-tap filter
- __m128i coef0 = _mm_lddqu_si128((__m128i const *)kern[strength - 1]);
- __m128i two = _mm_set1_epi8(2);
- __m128i shuf_a = _mm_lddqu_si128((__m128i const *)v_const[2]);
- __m128i shuf_b = _mm_add_epi8(shuf_a, two);
- __m128i shuf_c = _mm_add_epi8(shuf_b, two);
- __m128i shuf_d = _mm_add_epi8(shuf_c, two);
- __m128i iden = _mm_lddqu_si128((__m128i *)v_const[3]);
- __m128i in0 = _mm_lddqu_si128((__m128i *)in);
- while (len > 0) {
- int n_out = (len < 8) ? len : 8;
- __m128i d0 = _mm_shuffle_epi8(in0, shuf_a);
- __m128i d1 = _mm_shuffle_epi8(in0, shuf_b);
- __m128i d2 = _mm_shuffle_epi8(in0, shuf_c);
- __m128i d3 = _mm_shuffle_epi8(in0, shuf_d);
- d0 = _mm_maddubs_epi16(d0, coef0);
- d1 = _mm_maddubs_epi16(d1, coef0);
- d2 = _mm_maddubs_epi16(d2, coef0);
- d3 = _mm_maddubs_epi16(d3, coef0);
- d0 = _mm_hadd_epi16(d0, d1);
- d2 = _mm_hadd_epi16(d2, d3);
- d0 = _mm_hadd_epi16(d0, d2);
- __m128i eight = _mm_set1_epi16(8);
- d0 = _mm_add_epi16(d0, eight);
- d0 = _mm_srai_epi16(d0, 4);
- d0 = _mm_packus_epi16(d0, d0);
- __m128i out0 = _mm_lddqu_si128((__m128i *)out);
- __m128i n0 = _mm_set1_epi8(n_out);
- __m128i mask = _mm_cmpgt_epi8(n0, iden);
- out0 = _mm_blendv_epi8(out0, d0, mask);
- _mm_storel_epi64((__m128i *)out, out0);
- __m128i in1 = _mm_lddqu_si128((__m128i *)(in + 16));
- in0 = _mm_alignr_epi8(in1, in0, 8);
- in += 8;
- out += 8;
- len -= n_out;
- }
- }
-}
-
-void av1_filter_intra_edge_high_sse4_1(uint16_t *p, int sz, int strength) {
- if (!strength) return;
-
- DECLARE_ALIGNED(16, static const int16_t, kern[3][8]) = {
- { 4, 8, 4, 8, 4, 8, 4, 8 }, // strength 1: 4,8,4
- { 5, 6, 5, 6, 5, 6, 5, 6 }, // strength 2: 5,6,5
- { 2, 4, 2, 4, 2, 4, 2, 4 } // strength 3: 2,4,4,4,2
- };
-
- DECLARE_ALIGNED(16, static const int16_t,
- v_const[1][8]) = { { 0, 1, 2, 3, 4, 5, 6, 7 } };
-
- // Extend the first and last samples to simplify the loop for the 5-tap case
- p[-1] = p[0];
- __m128i last = _mm_set1_epi16(p[sz - 1]);
- _mm_storeu_si128((__m128i *)&p[sz], last);
-
- // Adjust input pointer for filter support area
- uint16_t *in = (strength == 3) ? p - 1 : p;
-
- // Avoid modifying first sample
- uint16_t *out = p + 1;
- int len = sz - 1;
-
- const int use_3tap_filter = (strength < 3);
-
- if (use_3tap_filter) {
- __m128i coef0 = _mm_lddqu_si128((__m128i const *)kern[strength - 1]);
- __m128i iden = _mm_lddqu_si128((__m128i *)v_const[0]);
- __m128i in0 = _mm_lddqu_si128((__m128i *)&in[0]);
- __m128i in8 = _mm_lddqu_si128((__m128i *)&in[8]);
- while (len > 0) {
- int n_out = (len < 8) ? len : 8;
- __m128i in1 = _mm_alignr_epi8(in8, in0, 2);
- __m128i in2 = _mm_alignr_epi8(in8, in0, 4);
- __m128i in02 = _mm_add_epi16(in0, in2);
- __m128i d0 = _mm_unpacklo_epi16(in02, in1);
- __m128i d1 = _mm_unpackhi_epi16(in02, in1);
- d0 = _mm_mullo_epi16(d0, coef0);
- d1 = _mm_mullo_epi16(d1, coef0);
- d0 = _mm_hadd_epi16(d0, d1);
- __m128i eight = _mm_set1_epi16(8);
- d0 = _mm_add_epi16(d0, eight);
- d0 = _mm_srli_epi16(d0, 4);
- __m128i out0 = _mm_lddqu_si128((__m128i *)out);
- __m128i n0 = _mm_set1_epi16(n_out);
- __m128i mask = _mm_cmpgt_epi16(n0, iden);
- out0 = _mm_blendv_epi8(out0, d0, mask);
- _mm_storeu_si128((__m128i *)out, out0);
- in += 8;
- in0 = in8;
- in8 = _mm_lddqu_si128((__m128i *)&in[8]);
- out += 8;
- len -= n_out;
- }
- } else { // 5-tap filter
- __m128i coef0 = _mm_lddqu_si128((__m128i const *)kern[strength - 1]);
- __m128i iden = _mm_lddqu_si128((__m128i *)v_const[0]);
- __m128i in0 = _mm_lddqu_si128((__m128i *)&in[0]);
- __m128i in8 = _mm_lddqu_si128((__m128i *)&in[8]);
- while (len > 0) {
- int n_out = (len < 8) ? len : 8;
- __m128i in1 = _mm_alignr_epi8(in8, in0, 2);
- __m128i in2 = _mm_alignr_epi8(in8, in0, 4);
- __m128i in3 = _mm_alignr_epi8(in8, in0, 6);
- __m128i in4 = _mm_alignr_epi8(in8, in0, 8);
- __m128i in04 = _mm_add_epi16(in0, in4);
- __m128i in123 = _mm_add_epi16(in1, in2);
- in123 = _mm_add_epi16(in123, in3);
- __m128i d0 = _mm_unpacklo_epi16(in04, in123);
- __m128i d1 = _mm_unpackhi_epi16(in04, in123);
- d0 = _mm_mullo_epi16(d0, coef0);
- d1 = _mm_mullo_epi16(d1, coef0);
- d0 = _mm_hadd_epi16(d0, d1);
- __m128i eight = _mm_set1_epi16(8);
- d0 = _mm_add_epi16(d0, eight);
- d0 = _mm_srli_epi16(d0, 4);
- __m128i out0 = _mm_lddqu_si128((__m128i *)out);
- __m128i n0 = _mm_set1_epi16(n_out);
- __m128i mask = _mm_cmpgt_epi16(n0, iden);
- out0 = _mm_blendv_epi8(out0, d0, mask);
- _mm_storeu_si128((__m128i *)out, out0);
- in += 8;
- in0 = in8;
- in8 = _mm_lddqu_si128((__m128i *)&in[8]);
- out += 8;
- len -= n_out;
- }
- }
-}
-
-void av1_upsample_intra_edge_sse4_1(uint8_t *p, int sz) {
- // interpolate half-sample positions
- assert(sz <= 24);
-
- DECLARE_ALIGNED(16, static const int8_t, kernel[1][16]) = {
- { -1, 9, 9, -1, -1, 9, 9, -1, -1, 9, 9, -1, -1, 9, 9, -1 }
- };
-
- DECLARE_ALIGNED(16, static const int8_t, v_const[2][16]) = {
- { 0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6 },
- { 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10 }
- };
-
- // Extend first/last samples (upper-left p[-1], last p[sz-1])
- // to support 4-tap filter
- p[-2] = p[-1];
- p[sz] = p[sz - 1];
-
- uint8_t *in = &p[-2];
- uint8_t *out = &p[-2];
-
- int n = sz + 1; // Input length including upper-left sample
-
- __m128i in0 = _mm_lddqu_si128((__m128i *)&in[0]);
- __m128i in16 = _mm_lddqu_si128((__m128i *)&in[16]);
-
- __m128i coef0 = _mm_lddqu_si128((__m128i *)kernel[0]);
- __m128i shuf0 = _mm_lddqu_si128((__m128i *)v_const[0]);
- __m128i shuf1 = _mm_lddqu_si128((__m128i *)v_const[1]);
-
- while (n > 0) {
- __m128i in8 = _mm_alignr_epi8(in16, in0, 8);
- __m128i d0 = _mm_shuffle_epi8(in0, shuf0);
- __m128i d1 = _mm_shuffle_epi8(in0, shuf1);
- __m128i d2 = _mm_shuffle_epi8(in8, shuf0);
- __m128i d3 = _mm_shuffle_epi8(in8, shuf1);
- d0 = _mm_maddubs_epi16(d0, coef0);
- d1 = _mm_maddubs_epi16(d1, coef0);
- d2 = _mm_maddubs_epi16(d2, coef0);
- d3 = _mm_maddubs_epi16(d3, coef0);
- d0 = _mm_hadd_epi16(d0, d1);
- d2 = _mm_hadd_epi16(d2, d3);
- __m128i eight = _mm_set1_epi16(8);
- d0 = _mm_add_epi16(d0, eight);
- d2 = _mm_add_epi16(d2, eight);
- d0 = _mm_srai_epi16(d0, 4);
- d2 = _mm_srai_epi16(d2, 4);
- d0 = _mm_packus_epi16(d0, d2);
- __m128i in1 = _mm_alignr_epi8(in16, in0, 1);
- __m128i out0 = _mm_unpacklo_epi8(in1, d0);
- __m128i out1 = _mm_unpackhi_epi8(in1, d0);
- _mm_storeu_si128((__m128i *)&out[0], out0);
- _mm_storeu_si128((__m128i *)&out[16], out1);
- in0 = in16;
- in16 = _mm_setzero_si128();
- out += 32;
- n -= 16;
- }
-}
-
-void av1_upsample_intra_edge_high_sse4_1(uint16_t *p, int sz, int bd) {
- // interpolate half-sample positions
- assert(sz <= 24);
-
- DECLARE_ALIGNED(16, static const int16_t,
- kernel[1][8]) = { { -1, 9, -1, 9, -1, 9, -1, 9 } };
-
- // Extend first/last samples (upper-left p[-1], last p[sz-1])
- // to support 4-tap filter
- p[-2] = p[-1];
- p[sz] = p[sz - 1];
-
- uint16_t *in = &p[-2];
- uint16_t *out = in;
- int n = sz + 1;
-
- __m128i in0 = _mm_lddqu_si128((__m128i *)&in[0]);
- __m128i in8 = _mm_lddqu_si128((__m128i *)&in[8]);
- __m128i in16 = _mm_lddqu_si128((__m128i *)&in[16]);
- __m128i in24 = _mm_lddqu_si128((__m128i *)&in[24]);
-
- while (n > 0) {
- __m128i in1 = _mm_alignr_epi8(in8, in0, 2);
- __m128i in2 = _mm_alignr_epi8(in8, in0, 4);
- __m128i in3 = _mm_alignr_epi8(in8, in0, 6);
- __m128i sum0 = _mm_add_epi16(in0, in3);
- __m128i sum1 = _mm_add_epi16(in1, in2);
- __m128i d0 = _mm_unpacklo_epi16(sum0, sum1);
- __m128i d1 = _mm_unpackhi_epi16(sum0, sum1);
- __m128i coef0 = _mm_lddqu_si128((__m128i *)kernel[0]);
- d0 = _mm_madd_epi16(d0, coef0);
- d1 = _mm_madd_epi16(d1, coef0);
- __m128i eight = _mm_set1_epi32(8);
- d0 = _mm_add_epi32(d0, eight);
- d1 = _mm_add_epi32(d1, eight);
- d0 = _mm_srai_epi32(d0, 4);
- d1 = _mm_srai_epi32(d1, 4);
- d0 = _mm_packus_epi32(d0, d1);
- __m128i max0 = _mm_set1_epi16((1 << bd) - 1);
- d0 = _mm_min_epi16(d0, max0);
- __m128i out0 = _mm_unpacklo_epi16(in1, d0);
- __m128i out1 = _mm_unpackhi_epi16(in1, d0);
- _mm_storeu_si128((__m128i *)&out[0], out0);
- _mm_storeu_si128((__m128i *)&out[8], out1);
- in0 = in8;
- in8 = in16;
- in16 = in24;
- in24 = _mm_setzero_si128();
- out += 16;
- n -= 8;
- }
-}
diff --git a/third_party/aom/av1/common/x86/jnt_convolve_avx2.c b/third_party/aom/av1/common/x86/jnt_convolve_avx2.c
deleted file mode 100644
index 9f2e2b457..000000000
--- a/third_party/aom/av1/common/x86/jnt_convolve_avx2.c
+++ /dev/null
@@ -1,633 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <immintrin.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/x86/convolve_avx2.h"
-#include "aom_dsp/x86/convolve_common_intrin.h"
-#include "aom_dsp/x86/convolve_sse4_1.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/aom_filter.h"
-#include "av1/common/convolve.h"
-
-static INLINE __m256i unpack_weights_avx2(ConvolveParams *conv_params) {
- const int w0 = conv_params->fwd_offset;
- const int w1 = conv_params->bck_offset;
- const __m256i wt0 = _mm256_set1_epi16(w0);
- const __m256i wt1 = _mm256_set1_epi16(w1);
- const __m256i wt = _mm256_unpacklo_epi16(wt0, wt1);
- return wt;
-}
-
-static INLINE __m256i load_line2_avx2(const void *a, const void *b) {
- return _mm256_permute2x128_si256(
- _mm256_castsi128_si256(_mm_loadu_si128((__m128i *)a)),
- _mm256_castsi128_si256(_mm_loadu_si128((__m128i *)b)), 0x20);
-}
-
-void av1_jnt_convolve_x_avx2(const uint8_t *src, int src_stride, uint8_t *dst0,
- int dst_stride0, int w, int h,
- const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y,
- const int subpel_x_q4, const int subpel_y_q4,
- ConvolveParams *conv_params) {
- CONV_BUF_TYPE *dst = conv_params->dst;
- int dst_stride = conv_params->dst_stride;
- const int bd = 8;
- int i, j;
- const int fo_horiz = filter_params_x->taps / 2 - 1;
- const uint8_t *const src_ptr = src - fo_horiz;
- const int bits = FILTER_BITS - conv_params->round_1;
- const __m256i wt = unpack_weights_avx2(conv_params);
- const int do_average = conv_params->do_average;
- const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg;
- const int offset_0 =
- bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
- const int offset = (1 << offset_0) + (1 << (offset_0 - 1));
- const __m256i offset_const = _mm256_set1_epi16(offset);
- const int rounding_shift =
- 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
- const __m256i rounding_const = _mm256_set1_epi16((1 << rounding_shift) >> 1);
- __m256i filt[4], coeffs[4];
-
- assert(bits >= 0);
- assert(conv_params->round_0 > 0);
-
- filt[0] = _mm256_load_si256((__m256i const *)filt_global_avx2);
- filt[1] = _mm256_load_si256((__m256i const *)(filt_global_avx2 + 32));
- filt[2] = _mm256_load_si256((__m256i const *)(filt_global_avx2 + 32 * 2));
- filt[3] = _mm256_load_si256((__m256i const *)(filt_global_avx2 + 32 * 3));
-
- prepare_coeffs_lowbd(filter_params_x, subpel_x_q4, coeffs);
-
- const __m256i round_const =
- _mm256_set1_epi16((1 << (conv_params->round_0 - 1)) >> 1);
- const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_0 - 1);
-
- (void)filter_params_y;
- (void)subpel_y_q4;
-
- for (i = 0; i < h; i += 2) {
- const uint8_t *src_data = src_ptr + i * src_stride;
- CONV_BUF_TYPE *dst_data = dst + i * dst_stride;
- for (j = 0; j < w; j += 8) {
- const __m256i data =
- load_line2_avx2(&src_data[j], &src_data[j + src_stride]);
-
- __m256i res = convolve_lowbd_x(data, coeffs, filt);
-
- res = _mm256_sra_epi16(_mm256_add_epi16(res, round_const), round_shift);
-
- res = _mm256_slli_epi16(res, bits);
-
- const __m256i res_unsigned = _mm256_add_epi16(res, offset_const);
-
- // Accumulate values into the destination buffer
- if (do_average) {
- const __m256i data_ref_0 =
- load_line2_avx2(&dst_data[j], &dst_data[j + dst_stride]);
- const __m256i comp_avg_res =
- comp_avg(&data_ref_0, &res_unsigned, &wt, use_jnt_comp_avg);
-
- const __m256i round_result = convolve_rounding(
- &comp_avg_res, &offset_const, &rounding_const, rounding_shift);
-
- const __m256i res_8 = _mm256_packus_epi16(round_result, round_result);
- const __m128i res_0 = _mm256_castsi256_si128(res_8);
- const __m128i res_1 = _mm256_extracti128_si256(res_8, 1);
-
- if (w > 4) {
- _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_0);
- _mm_storel_epi64(
- (__m128i *)((&dst0[i * dst_stride0 + j + dst_stride0])), res_1);
- } else {
- *(uint32_t *)(&dst0[i * dst_stride0 + j]) = _mm_cvtsi128_si32(res_0);
- *(uint32_t *)(&dst0[i * dst_stride0 + j + dst_stride0]) =
- _mm_cvtsi128_si32(res_1);
- }
- } else {
- const __m128i res_0 = _mm256_castsi256_si128(res_unsigned);
- _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_0);
-
- const __m128i res_1 = _mm256_extracti128_si256(res_unsigned, 1);
- _mm_store_si128((__m128i *)(&dst[i * dst_stride + j + dst_stride]),
- res_1);
- }
- }
- }
-}
-
-void av1_jnt_convolve_y_avx2(const uint8_t *src, int src_stride, uint8_t *dst0,
- int dst_stride0, int w, int h,
- const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y,
- const int subpel_x_q4, const int subpel_y_q4,
- ConvolveParams *conv_params) {
- CONV_BUF_TYPE *dst = conv_params->dst;
- int dst_stride = conv_params->dst_stride;
- const int bd = 8;
- int i, j;
- const int fo_vert = filter_params_y->taps / 2 - 1;
- const uint8_t *const src_ptr = src - fo_vert * src_stride;
- // +1 to compensate for dividing the filter coeffs by 2
- const int left_shift = FILTER_BITS - conv_params->round_0 + 1;
- const __m256i round_const =
- _mm256_set1_epi32((1 << conv_params->round_1) >> 1);
- const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_1);
- const __m256i wt = unpack_weights_avx2(conv_params);
- const int do_average = conv_params->do_average;
- const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg;
- const int offset_0 =
- bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
- const int offset = (1 << offset_0) + (1 << (offset_0 - 1));
- const __m256i offset_const = _mm256_set1_epi16(offset);
- const int offset_1 = (1 << (bd + FILTER_BITS - 2));
- const __m256i offset_const_1 = _mm256_set1_epi16(offset_1);
- const __m256i offset_const_2 = _mm256_set1_epi16((1 << offset_0));
- const int rounding_shift =
- 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
- const __m256i rounding_const = _mm256_set1_epi16((1 << rounding_shift) >> 1);
- const __m256i zero = _mm256_setzero_si256();
- __m256i coeffs[4], s[8];
-
- assert((FILTER_BITS - conv_params->round_0) >= 0);
-
- prepare_coeffs_lowbd(filter_params_y, subpel_y_q4, coeffs);
-
- (void)conv_params;
- (void)filter_params_x;
- (void)subpel_x_q4;
-
- for (j = 0; j < w; j += 16) {
- const uint8_t *data = &src_ptr[j];
- __m256i src6;
- // Load lines a and b. Line a to lower 128, line b to upper 128
- {
- __m256i src_ab[7];
- __m256i src_a[7];
- src_a[0] = _mm256_castsi128_si256(_mm_loadu_si128((__m128i *)data));
- for (int kk = 0; kk < 6; ++kk) {
- data += src_stride;
- src_a[kk + 1] =
- _mm256_castsi128_si256(_mm_loadu_si128((__m128i *)data));
- src_ab[kk] = _mm256_permute2x128_si256(src_a[kk], src_a[kk + 1], 0x20);
- }
- src6 = src_a[6];
- s[0] = _mm256_unpacklo_epi8(src_ab[0], src_ab[1]);
- s[1] = _mm256_unpacklo_epi8(src_ab[2], src_ab[3]);
- s[2] = _mm256_unpacklo_epi8(src_ab[4], src_ab[5]);
- s[4] = _mm256_unpackhi_epi8(src_ab[0], src_ab[1]);
- s[5] = _mm256_unpackhi_epi8(src_ab[2], src_ab[3]);
- s[6] = _mm256_unpackhi_epi8(src_ab[4], src_ab[5]);
- }
-
- for (i = 0; i < h; i += 2) {
- data = &src_ptr[(i + 7) * src_stride + j];
- const __m256i src7 =
- _mm256_castsi128_si256(_mm_loadu_si128((__m128i *)data));
- const __m256i src_67a = _mm256_permute2x128_si256(src6, src7, 0x20);
-
- src6 = _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(data + src_stride)));
- const __m256i src_78a = _mm256_permute2x128_si256(src7, src6, 0x20);
-
- s[3] = _mm256_unpacklo_epi8(src_67a, src_78a);
- s[7] = _mm256_unpackhi_epi8(src_67a, src_78a);
-
- __m256i res_lo = convolve_lowbd(s, coeffs);
-
- res_lo = _mm256_add_epi16(res_lo, offset_const_1);
-
- const __m256i res_lo_0_32b = _mm256_unpacklo_epi16(res_lo, zero);
- const __m256i res_lo_0_shift =
- _mm256_slli_epi32(res_lo_0_32b, left_shift);
- const __m256i res_lo_0_round = _mm256_sra_epi32(
- _mm256_add_epi32(res_lo_0_shift, round_const), round_shift);
-
- const __m256i res_lo_1_32b = _mm256_unpackhi_epi16(res_lo, zero);
- const __m256i res_lo_1_shift =
- _mm256_slli_epi32(res_lo_1_32b, left_shift);
- const __m256i res_lo_1_round = _mm256_sra_epi32(
- _mm256_add_epi32(res_lo_1_shift, round_const), round_shift);
-
- const __m256i res_lo_round =
- _mm256_packs_epi32(res_lo_0_round, res_lo_1_round);
-
- const __m256i res_lo_unsigned =
- _mm256_add_epi16(res_lo_round, offset_const_2);
-
- if (w - j < 16) {
- if (do_average) {
- const __m256i data_ref_0 = load_line2_avx2(
- &dst[i * dst_stride + j], &dst[i * dst_stride + j + dst_stride]);
- const __m256i comp_avg_res =
- comp_avg(&data_ref_0, &res_lo_unsigned, &wt, use_jnt_comp_avg);
-
- const __m256i round_result = convolve_rounding(
- &comp_avg_res, &offset_const, &rounding_const, rounding_shift);
-
- const __m256i res_8 = _mm256_packus_epi16(round_result, round_result);
- const __m128i res_0 = _mm256_castsi256_si128(res_8);
- const __m128i res_1 = _mm256_extracti128_si256(res_8, 1);
-
- if (w - j > 4) {
- _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_0);
- _mm_storel_epi64(
- (__m128i *)((&dst0[i * dst_stride0 + j + dst_stride0])), res_1);
- } else {
- *(uint32_t *)(&dst0[i * dst_stride0 + j]) =
- _mm_cvtsi128_si32(res_0);
- *(uint32_t *)(&dst0[i * dst_stride0 + j + dst_stride0]) =
- _mm_cvtsi128_si32(res_1);
- }
- } else {
- const __m128i res_0 = _mm256_castsi256_si128(res_lo_unsigned);
- _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_0);
-
- const __m128i res_1 = _mm256_extracti128_si256(res_lo_unsigned, 1);
- _mm_store_si128((__m128i *)(&dst[i * dst_stride + j + dst_stride]),
- res_1);
- }
- } else {
- __m256i res_hi = convolve_lowbd(s + 4, coeffs);
-
- res_hi = _mm256_add_epi16(res_hi, offset_const_1);
-
- const __m256i res_hi_0_32b = _mm256_unpacklo_epi16(res_hi, zero);
- const __m256i res_hi_0_shift =
- _mm256_slli_epi32(res_hi_0_32b, left_shift);
- const __m256i res_hi_0_round = _mm256_sra_epi32(
- _mm256_add_epi32(res_hi_0_shift, round_const), round_shift);
-
- const __m256i res_hi_1_32b = _mm256_unpackhi_epi16(res_hi, zero);
- const __m256i res_hi_1_shift =
- _mm256_slli_epi32(res_hi_1_32b, left_shift);
- const __m256i res_hi_1_round = _mm256_sra_epi32(
- _mm256_add_epi32(res_hi_1_shift, round_const), round_shift);
-
- const __m256i res_hi_round =
- _mm256_packs_epi32(res_hi_0_round, res_hi_1_round);
-
- const __m256i res_hi_unsigned =
- _mm256_add_epi16(res_hi_round, offset_const_2);
-
- if (do_average) {
- const __m256i data_ref_0_lo = load_line2_avx2(
- &dst[i * dst_stride + j], &dst[i * dst_stride + j + dst_stride]);
-
- const __m256i data_ref_0_hi =
- load_line2_avx2(&dst[i * dst_stride + j + 8],
- &dst[i * dst_stride + j + 8 + dst_stride]);
-
- const __m256i comp_avg_res_lo =
- comp_avg(&data_ref_0_lo, &res_lo_unsigned, &wt, use_jnt_comp_avg);
-
- const __m256i comp_avg_res_hi =
- comp_avg(&data_ref_0_hi, &res_hi_unsigned, &wt, use_jnt_comp_avg);
-
- const __m256i round_result_lo = convolve_rounding(
- &comp_avg_res_lo, &offset_const, &rounding_const, rounding_shift);
-
- const __m256i round_result_hi = convolve_rounding(
- &comp_avg_res_hi, &offset_const, &rounding_const, rounding_shift);
-
- const __m256i res_8 =
- _mm256_packus_epi16(round_result_lo, round_result_hi);
- const __m128i res_0 = _mm256_castsi256_si128(res_8);
- const __m128i res_1 = _mm256_extracti128_si256(res_8, 1);
-
- _mm_store_si128((__m128i *)(&dst0[i * dst_stride0 + j]), res_0);
- _mm_store_si128(
- (__m128i *)((&dst0[i * dst_stride0 + j + dst_stride0])), res_1);
-
- } else {
- const __m128i res_lo_0 = _mm256_castsi256_si128(res_lo_unsigned);
- _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_lo_0);
-
- const __m128i res_lo_1 = _mm256_extracti128_si256(res_lo_unsigned, 1);
- _mm_store_si128((__m128i *)(&dst[i * dst_stride + j + dst_stride]),
- res_lo_1);
-
- const __m128i res_hi_0 = _mm256_castsi256_si128(res_hi_unsigned);
- _mm_store_si128((__m128i *)(&dst[i * dst_stride + j + 8]), res_hi_0);
-
- const __m128i res_hi_1 = _mm256_extracti128_si256(res_hi_unsigned, 1);
- _mm_store_si128(
- (__m128i *)(&dst[i * dst_stride + j + 8 + dst_stride]), res_hi_1);
- }
- }
- s[0] = s[1];
- s[1] = s[2];
- s[2] = s[3];
-
- s[4] = s[5];
- s[5] = s[6];
- s[6] = s[7];
- }
- }
-}
-
-void av1_jnt_convolve_2d_avx2(const uint8_t *src, int src_stride, uint8_t *dst0,
- int dst_stride0, int w, int h,
- const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y,
- const int subpel_x_q4, const int subpel_y_q4,
- ConvolveParams *conv_params) {
- CONV_BUF_TYPE *dst = conv_params->dst;
- int dst_stride = conv_params->dst_stride;
- const int bd = 8;
-
- DECLARE_ALIGNED(32, int16_t, im_block[(MAX_SB_SIZE + MAX_FILTER_TAP) * 8]);
- int im_h = h + filter_params_y->taps - 1;
- int im_stride = 8;
- int i, j;
- const int fo_vert = filter_params_y->taps / 2 - 1;
- const int fo_horiz = filter_params_x->taps / 2 - 1;
- const uint8_t *const src_ptr = src - fo_vert * src_stride - fo_horiz;
- const __m256i wt = unpack_weights_avx2(conv_params);
- const int do_average = conv_params->do_average;
- const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg;
- const int offset_0 =
- bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
- const int offset = (1 << offset_0) + (1 << (offset_0 - 1));
- const __m256i offset_const = _mm256_set1_epi16(offset);
- const int rounding_shift =
- 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
- const __m256i rounding_const = _mm256_set1_epi16((1 << rounding_shift) >> 1);
- __m256i filt[4], s[8], coeffs_x[4], coeffs_y[4];
-
- assert(conv_params->round_0 > 0);
-
- filt[0] = _mm256_load_si256((__m256i const *)filt_global_avx2);
- filt[1] = _mm256_load_si256((__m256i const *)(filt_global_avx2 + 32));
- filt[2] = _mm256_load_si256((__m256i const *)(filt_global_avx2 + 32 * 2));
- filt[3] = _mm256_load_si256((__m256i const *)(filt_global_avx2 + 32 * 3));
-
- prepare_coeffs_lowbd(filter_params_x, subpel_x_q4, coeffs_x);
- prepare_coeffs(filter_params_y, subpel_y_q4, coeffs_y);
-
- const __m256i round_const_h = _mm256_set1_epi16(
- ((1 << (conv_params->round_0 - 1)) >> 1) + (1 << (bd + FILTER_BITS - 2)));
- const __m128i round_shift_h = _mm_cvtsi32_si128(conv_params->round_0 - 1);
-
- const __m256i round_const_v = _mm256_set1_epi32(
- ((1 << conv_params->round_1) >> 1) -
- (1 << (bd + 2 * FILTER_BITS - conv_params->round_0 - 1)));
- const __m128i round_shift_v = _mm_cvtsi32_si128(conv_params->round_1);
-
- for (j = 0; j < w; j += 8) {
- /* Horizontal filter */
- {
- const uint8_t *src_h = src_ptr + j;
- for (i = 0; i < im_h; i += 2) {
- __m256i data =
- _mm256_castsi128_si256(_mm_loadu_si128((__m128i *)src_h));
- if (i + 1 < im_h)
- data = _mm256_inserti128_si256(
- data, _mm_loadu_si128((__m128i *)(src_h + src_stride)), 1);
- src_h += (src_stride << 1);
- __m256i res = convolve_lowbd_x(data, coeffs_x, filt);
-
- res = _mm256_sra_epi16(_mm256_add_epi16(res, round_const_h),
- round_shift_h);
-
- _mm256_store_si256((__m256i *)&im_block[i * im_stride], res);
- }
- }
-
- /* Vertical filter */
- {
- __m256i s0 = _mm256_loadu_si256((__m256i *)(im_block + 0 * im_stride));
- __m256i s1 = _mm256_loadu_si256((__m256i *)(im_block + 1 * im_stride));
- __m256i s2 = _mm256_loadu_si256((__m256i *)(im_block + 2 * im_stride));
- __m256i s3 = _mm256_loadu_si256((__m256i *)(im_block + 3 * im_stride));
- __m256i s4 = _mm256_loadu_si256((__m256i *)(im_block + 4 * im_stride));
- __m256i s5 = _mm256_loadu_si256((__m256i *)(im_block + 5 * im_stride));
-
- s[0] = _mm256_unpacklo_epi16(s0, s1);
- s[1] = _mm256_unpacklo_epi16(s2, s3);
- s[2] = _mm256_unpacklo_epi16(s4, s5);
-
- s[4] = _mm256_unpackhi_epi16(s0, s1);
- s[5] = _mm256_unpackhi_epi16(s2, s3);
- s[6] = _mm256_unpackhi_epi16(s4, s5);
-
- for (i = 0; i < h; i += 2) {
- const int16_t *data = &im_block[i * im_stride];
-
- const __m256i s6 =
- _mm256_loadu_si256((__m256i *)(data + 6 * im_stride));
- const __m256i s7 =
- _mm256_loadu_si256((__m256i *)(data + 7 * im_stride));
-
- s[3] = _mm256_unpacklo_epi16(s6, s7);
- s[7] = _mm256_unpackhi_epi16(s6, s7);
-
- const __m256i res_a = convolve(s, coeffs_y);
- const __m256i res_a_round = _mm256_sra_epi32(
- _mm256_add_epi32(res_a, round_const_v), round_shift_v);
-
- if (w - j > 4) {
- const __m256i res_b = convolve(s + 4, coeffs_y);
- const __m256i res_b_round = _mm256_sra_epi32(
- _mm256_add_epi32(res_b, round_const_v), round_shift_v);
- const __m256i res_16b = _mm256_packs_epi32(res_a_round, res_b_round);
- const __m256i res_unsigned = _mm256_add_epi16(res_16b, offset_const);
-
- if (do_average) {
- const __m256i data_ref_0 =
- load_line2_avx2(&dst[i * dst_stride + j],
- &dst[i * dst_stride + j + dst_stride]);
- const __m256i comp_avg_res =
- comp_avg(&data_ref_0, &res_unsigned, &wt, use_jnt_comp_avg);
-
- const __m256i round_result = convolve_rounding(
- &comp_avg_res, &offset_const, &rounding_const, rounding_shift);
-
- const __m256i res_8 =
- _mm256_packus_epi16(round_result, round_result);
- const __m128i res_0 = _mm256_castsi256_si128(res_8);
- const __m128i res_1 = _mm256_extracti128_si256(res_8, 1);
-
- _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_0);
- _mm_storel_epi64(
- (__m128i *)((&dst0[i * dst_stride0 + j + dst_stride0])), res_1);
- } else {
- const __m128i res_0 = _mm256_castsi256_si128(res_unsigned);
- _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_0);
-
- const __m128i res_1 = _mm256_extracti128_si256(res_unsigned, 1);
- _mm_store_si128((__m128i *)(&dst[i * dst_stride + j + dst_stride]),
- res_1);
- }
- } else {
- const __m256i res_16b = _mm256_packs_epi32(res_a_round, res_a_round);
- const __m256i res_unsigned = _mm256_add_epi16(res_16b, offset_const);
-
- if (do_average) {
- const __m256i data_ref_0 =
- load_line2_avx2(&dst[i * dst_stride + j],
- &dst[i * dst_stride + j + dst_stride]);
-
- const __m256i comp_avg_res =
- comp_avg(&data_ref_0, &res_unsigned, &wt, use_jnt_comp_avg);
-
- const __m256i round_result = convolve_rounding(
- &comp_avg_res, &offset_const, &rounding_const, rounding_shift);
-
- const __m256i res_8 =
- _mm256_packus_epi16(round_result, round_result);
- const __m128i res_0 = _mm256_castsi256_si128(res_8);
- const __m128i res_1 = _mm256_extracti128_si256(res_8, 1);
-
- *(uint32_t *)(&dst0[i * dst_stride0 + j]) =
- _mm_cvtsi128_si32(res_0);
- *(uint32_t *)(&dst0[i * dst_stride0 + j + dst_stride0]) =
- _mm_cvtsi128_si32(res_1);
-
- } else {
- const __m128i res_0 = _mm256_castsi256_si128(res_unsigned);
- _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_0);
-
- const __m128i res_1 = _mm256_extracti128_si256(res_unsigned, 1);
- _mm_store_si128((__m128i *)(&dst[i * dst_stride + j + dst_stride]),
- res_1);
- }
- }
-
- s[0] = s[1];
- s[1] = s[2];
- s[2] = s[3];
-
- s[4] = s[5];
- s[5] = s[6];
- s[6] = s[7];
- }
- }
- }
-}
-
-void av1_jnt_convolve_2d_copy_avx2(const uint8_t *src, int src_stride,
- uint8_t *dst0, int dst_stride0, int w, int h,
- const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y,
- const int subpel_x_q4, const int subpel_y_q4,
- ConvolveParams *conv_params) {
- const int bd = 8;
- CONV_BUF_TYPE *dst = conv_params->dst;
- int dst_stride = conv_params->dst_stride;
- (void)filter_params_x;
- (void)filter_params_y;
- (void)subpel_x_q4;
- (void)subpel_y_q4;
-
- const int bits =
- FILTER_BITS * 2 - conv_params->round_1 - conv_params->round_0;
- const __m128i left_shift = _mm_cvtsi32_si128(bits);
- const int do_average = conv_params->do_average;
- const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg;
- const __m256i wt = unpack_weights_avx2(conv_params);
- const __m256i zero = _mm256_setzero_si256();
-
- const int offset_0 =
- bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
- const int offset = (1 << offset_0) + (1 << (offset_0 - 1));
- const __m256i offset_const = _mm256_set1_epi16(offset);
- const int rounding_shift =
- 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
- const __m256i rounding_const = _mm256_set1_epi16((1 << rounding_shift) >> 1);
- int i, j;
-
- if (!(w % 16)) {
- for (i = 0; i < h; i += 1) {
- for (j = 0; j < w; j += 16) {
- const __m256i src_16bit = _mm256_cvtepu8_epi16(
- _mm_loadu_si128((__m128i *)(&src[i * src_stride + j])));
-
- const __m256i res = _mm256_sll_epi16(src_16bit, left_shift);
- const __m256i res_unsigned = _mm256_add_epi16(res, offset_const);
-
- if (do_average) {
- const __m256i data_ref_0 =
- _mm256_loadu_si256((__m256i *)(&dst[i * dst_stride + j]));
-
- const __m256i comp_avg_res =
- comp_avg(&data_ref_0, &res_unsigned, &wt, use_jnt_comp_avg);
-
- const __m256i round_result = convolve_rounding(
- &comp_avg_res, &offset_const, &rounding_const, rounding_shift);
-
- const __m256i res_8 = _mm256_packus_epi16(round_result, round_result);
- const __m256i res_0 = _mm256_permute4x64_epi64(res_8, 0xD8);
-
- _mm_store_si128((__m128i *)(&dst0[i * dst_stride0 + j]),
- _mm256_castsi256_si128(res_0));
- } else {
- _mm256_store_si256((__m256i *)(&dst[i * dst_stride + j]),
- res_unsigned);
- }
- }
- }
- } else if (!(w % 4)) {
- for (i = 0; i < h; i += 2) {
- for (j = 0; j < w; j += 8) {
- const __m128i src_row_0 =
- _mm_loadl_epi64((__m128i *)(&src[i * src_stride + j]));
- const __m128i src_row_1 =
- _mm_loadl_epi64((__m128i *)(&src[i * src_stride + j + src_stride]));
- // since not all compilers yet support _mm256_set_m128i()
- const __m256i src_10 = _mm256_insertf128_si256(
- _mm256_castsi128_si256(src_row_0), src_row_1, 1);
-
- const __m256i src_16bit = _mm256_unpacklo_epi8(src_10, zero);
-
- const __m256i res = _mm256_sll_epi16(src_16bit, left_shift);
-
- const __m256i res_unsigned = _mm256_add_epi16(res, offset_const);
-
- // Accumulate values into the destination buffer
- if (do_average) {
- const __m256i data_ref_0 = load_line2_avx2(
- &dst[i * dst_stride + j], &dst[i * dst_stride + j + dst_stride]);
- const __m256i comp_avg_res =
- comp_avg(&data_ref_0, &res_unsigned, &wt, use_jnt_comp_avg);
-
- const __m256i round_result = convolve_rounding(
- &comp_avg_res, &offset_const, &rounding_const, rounding_shift);
-
- const __m256i res_8 = _mm256_packus_epi16(round_result, round_result);
- const __m128i res_0 = _mm256_castsi256_si128(res_8);
- const __m128i res_1 = _mm256_extracti128_si256(res_8, 1);
-
- if (w > 4) {
- _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_0);
- _mm_storel_epi64(
- (__m128i *)((&dst0[i * dst_stride0 + j + dst_stride0])), res_1);
- } else {
- *(uint32_t *)(&dst0[i * dst_stride0 + j]) =
- _mm_cvtsi128_si32(res_0);
- *(uint32_t *)(&dst0[i * dst_stride0 + j + dst_stride0]) =
- _mm_cvtsi128_si32(res_1);
- }
- } else {
- const __m128i res_0 = _mm256_castsi256_si128(res_unsigned);
- _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_0);
-
- const __m128i res_1 = _mm256_extracti128_si256(res_unsigned, 1);
- _mm_store_si128((__m128i *)(&dst[i * dst_stride + j + dst_stride]),
- res_1);
- }
- }
- }
- }
-}
diff --git a/third_party/aom/av1/common/x86/jnt_convolve_sse2.c b/third_party/aom/av1/common/x86/jnt_convolve_sse2.c
deleted file mode 100644
index 87dc3242e..000000000
--- a/third_party/aom/av1/common/x86/jnt_convolve_sse2.c
+++ /dev/null
@@ -1,385 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <emmintrin.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/aom_filter.h"
-#include "aom_dsp/x86/convolve_sse2.h"
-
-void av1_jnt_convolve_x_sse2(const uint8_t *src, int src_stride, uint8_t *dst0,
- int dst_stride0, int w, int h,
- const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y,
- const int subpel_x_q4, const int subpel_y_q4,
- ConvolveParams *conv_params) {
- const int bd = 8;
- CONV_BUF_TYPE *dst = conv_params->dst;
- const int dst_stride = conv_params->dst_stride;
- const int fo_horiz = filter_params_x->taps / 2 - 1;
- const uint8_t *src_ptr = src - fo_horiz;
- const int bits = FILTER_BITS - conv_params->round_1;
- const __m128i left_shift = _mm_cvtsi32_si128(bits);
- const __m128i round_const = _mm_set1_epi32((1 << conv_params->round_0) >> 1);
- const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_0);
- const int w0 = conv_params->fwd_offset;
- const int w1 = conv_params->bck_offset;
- const __m128i wt0 = _mm_set1_epi16(w0);
- const __m128i wt1 = _mm_set1_epi16(w1);
- const __m128i wt = _mm_unpacklo_epi16(wt0, wt1);
- const int do_average = conv_params->do_average;
- const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg;
- const int offset_0 =
- bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
- const int offset = (1 << offset_0) + (1 << (offset_0 - 1));
- const __m128i offset_const = _mm_set1_epi16(offset);
- const int rounding_shift =
- 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
- const __m128i rounding_const = _mm_set1_epi16((1 << rounding_shift) >> 1);
- __m128i coeffs[4];
-
- (void)filter_params_y;
- (void)subpel_y_q4;
-
- prepare_coeffs(filter_params_x, subpel_x_q4, coeffs);
-
- if (w == 4) {
- do {
- const __m128i data = _mm_loadu_si128((__m128i *)src_ptr);
- __m128i s[4];
-
- s[0] = _mm_unpacklo_epi8(data, _mm_srli_si128(data, 1));
- s[1] =
- _mm_unpacklo_epi8(_mm_srli_si128(data, 2), _mm_srli_si128(data, 3));
- s[2] =
- _mm_unpacklo_epi8(_mm_srli_si128(data, 4), _mm_srli_si128(data, 5));
- s[3] =
- _mm_unpacklo_epi8(_mm_srli_si128(data, 6), _mm_srli_si128(data, 7));
- const __m128i res_lo = convolve_lo_x(s, coeffs);
- const __m128i res_lo_round =
- _mm_sra_epi32(_mm_add_epi32(res_lo, round_const), round_shift);
- const __m128i res_lo_shift = _mm_sll_epi32(res_lo_round, left_shift);
-
- const __m128i res_16b = _mm_packs_epi32(res_lo_shift, res_lo_shift);
- const __m128i res_unsigned = _mm_add_epi16(res_16b, offset_const);
-
- // Accumulate values into the destination buffer
- if (do_average) {
- const __m128i data_ref_0 = _mm_loadu_si128((__m128i *)dst);
-
- const __m128i comp_avg_res =
- comp_avg(&data_ref_0, &res_unsigned, &wt, use_jnt_comp_avg);
-
- const __m128i round_result = convolve_rounding(
- &comp_avg_res, &offset_const, &rounding_const, rounding_shift);
-
- const __m128i res_8 = _mm_packus_epi16(round_result, round_result);
- *(uint32_t *)(&dst0[0]) = _mm_cvtsi128_si32(res_8);
- } else {
- _mm_store_si128((__m128i *)(&dst[0]), res_unsigned);
- }
- src_ptr += src_stride;
- dst += dst_stride;
- dst0 += dst_stride0;
- } while (--h);
- } else {
- assert(!(w % 8));
- int i = 0;
- do {
- int j = 0;
- do {
- const __m128i data =
- _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j]);
- __m128i s[4];
-
- // Filter even-index pixels
- s[0] = data;
- s[1] = _mm_srli_si128(data, 2);
- s[2] = _mm_srli_si128(data, 4);
- s[3] = _mm_srli_si128(data, 6);
- const __m128i res_even = convolve_lo_x(s, coeffs);
-
- // Filter odd-index pixels
- s[0] = _mm_srli_si128(data, 1);
- s[1] = _mm_srli_si128(data, 3);
- s[2] = _mm_srli_si128(data, 5);
- s[3] = _mm_srli_si128(data, 7);
- const __m128i res_odd = convolve_lo_x(s, coeffs);
-
- // Rearrange pixels back into the order 0 ... 7
- const __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd);
- const __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd);
- const __m128i res_lo_round =
- _mm_sra_epi32(_mm_add_epi32(res_lo, round_const), round_shift);
- const __m128i res_hi_round =
- _mm_sra_epi32(_mm_add_epi32(res_hi, round_const), round_shift);
- const __m128i res_lo_shift = _mm_sll_epi32(res_lo_round, left_shift);
- const __m128i res_hi_shift = _mm_sll_epi32(res_hi_round, left_shift);
-
- const __m128i res_16b = _mm_packs_epi32(res_lo_shift, res_hi_shift);
- const __m128i res_unsigned = _mm_add_epi16(res_16b, offset_const);
-
- // Accumulate values into the destination buffer
- if (do_average) {
- const __m128i data_ref_0 =
- _mm_loadu_si128((__m128i *)(&dst[i * dst_stride + j]));
-
- const __m128i comp_avg_res =
- comp_avg(&data_ref_0, &res_unsigned, &wt, use_jnt_comp_avg);
-
- const __m128i round_result = convolve_rounding(
- &comp_avg_res, &offset_const, &rounding_const, rounding_shift);
-
- const __m128i res_8 = _mm_packus_epi16(round_result, round_result);
- _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_8);
- } else {
- _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_unsigned);
- }
- j += 8;
- } while (j < w);
- } while (++i < h);
- }
-}
-
-void av1_jnt_convolve_y_sse2(const uint8_t *src, int src_stride, uint8_t *dst0,
- int dst_stride0, int w, int h,
- const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y,
- const int subpel_x_q4, const int subpel_y_q4,
- ConvolveParams *conv_params) {
- const int bd = 8;
- CONV_BUF_TYPE *dst = conv_params->dst;
- const int dst_stride = conv_params->dst_stride;
- const int fo_vert = filter_params_y->taps / 2 - 1;
- const uint8_t *src_ptr = src - fo_vert * src_stride;
- const int bits = FILTER_BITS - conv_params->round_0;
- const __m128i left_shift = _mm_cvtsi32_si128(bits);
- const __m128i wt0 = _mm_set1_epi16(conv_params->fwd_offset);
- const __m128i wt1 = _mm_set1_epi16(conv_params->bck_offset);
- const __m128i wt = _mm_unpacklo_epi16(wt0, wt1);
- const int do_average = conv_params->do_average;
- const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg;
- const int offset_0 =
- bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
- const int offset = (1 << offset_0) + (1 << (offset_0 - 1));
- const __m128i offset_const = _mm_set1_epi16(offset);
- const int rounding_shift =
- 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
- const __m128i rounding_const = _mm_set1_epi16((1 << rounding_shift) >> 1);
- const __m128i round_const = _mm_set1_epi32((1 << conv_params->round_1) >> 1);
- const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_1);
- __m128i coeffs[4];
-
- (void)filter_params_x;
- (void)subpel_x_q4;
-
- prepare_coeffs(filter_params_y, subpel_y_q4, coeffs);
-
- if (w == 4) {
- __m128i s[8], src6, res, res_shift;
- src6 = _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 6 * src_stride));
- s[0] = _mm_unpacklo_epi8(
- _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 0 * src_stride)),
- _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 1 * src_stride)));
- s[1] = _mm_unpacklo_epi8(
- _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 1 * src_stride)),
- _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 2 * src_stride)));
- s[2] = _mm_unpacklo_epi8(
- _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 2 * src_stride)),
- _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 3 * src_stride)));
- s[3] = _mm_unpacklo_epi8(
- _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 3 * src_stride)),
- _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 4 * src_stride)));
- s[4] = _mm_unpacklo_epi8(
- _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 4 * src_stride)),
- _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 5 * src_stride)));
- s[5] = _mm_unpacklo_epi8(
- _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 5 * src_stride)), src6);
-
- do {
- s[6] = _mm_unpacklo_epi8(
- src6, _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 7 * src_stride)));
- src6 = _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 8 * src_stride));
- s[7] = _mm_unpacklo_epi8(
- _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 7 * src_stride)), src6);
-
- res = convolve_lo_y(s + 0, coeffs);
- res_shift = _mm_sll_epi32(res, left_shift);
- res_shift =
- _mm_sra_epi32(_mm_add_epi32(res_shift, round_const), round_shift);
-
- __m128i res_16b = _mm_packs_epi32(res_shift, res_shift);
- __m128i res_unsigned = _mm_add_epi16(res_16b, offset_const);
-
- // Accumulate values into the destination buffer
- if (do_average) {
- const __m128i data_ref_0 = _mm_loadu_si128((__m128i *)dst);
-
- const __m128i comp_avg_res =
- comp_avg(&data_ref_0, &res_unsigned, &wt, use_jnt_comp_avg);
-
- const __m128i round_result = convolve_rounding(
- &comp_avg_res, &offset_const, &rounding_const, rounding_shift);
-
- const __m128i res_8 = _mm_packus_epi16(round_result, round_result);
- *(uint32_t *)(&dst0[0]) = _mm_cvtsi128_si32(res_8);
-
- } else {
- _mm_store_si128((__m128i *)dst, res_unsigned);
- }
-
- src_ptr += src_stride;
- dst += dst_stride;
- dst0 += dst_stride0;
-
- res = convolve_lo_y(s + 1, coeffs);
- res_shift = _mm_sll_epi32(res, left_shift);
- res_shift =
- _mm_sra_epi32(_mm_add_epi32(res_shift, round_const), round_shift);
-
- res_16b = _mm_packs_epi32(res_shift, res_shift);
- res_unsigned = _mm_add_epi16(res_16b, offset_const);
-
- // Accumulate values into the destination buffer
- if (do_average) {
- const __m128i data_ref_0 = _mm_loadu_si128((__m128i *)dst);
-
- const __m128i comp_avg_res =
- comp_avg(&data_ref_0, &res_unsigned, &wt, use_jnt_comp_avg);
-
- const __m128i round_result = convolve_rounding(
- &comp_avg_res, &offset_const, &rounding_const, rounding_shift);
-
- const __m128i res_8 = _mm_packus_epi16(round_result, round_result);
- *(uint32_t *)(&dst0[0]) = _mm_cvtsi128_si32(res_8);
-
- } else {
- _mm_store_si128((__m128i *)dst, res_unsigned);
- }
-
- src_ptr += src_stride;
- dst += dst_stride;
- dst0 += dst_stride0;
-
- s[0] = s[2];
- s[1] = s[3];
- s[2] = s[4];
- s[3] = s[5];
- s[4] = s[6];
- s[5] = s[7];
- h -= 2;
- } while (h);
- } else {
- assert(!(w % 8));
- int j = 0;
- do {
- __m128i s[8], src6, res_lo, res_hi, res_lo_shift, res_hi_shift;
- const uint8_t *data = &src_ptr[j];
-
- src6 = _mm_loadl_epi64((__m128i *)(data + 6 * src_stride));
- s[0] = _mm_unpacklo_epi8(
- _mm_loadl_epi64((__m128i *)(data + 0 * src_stride)),
- _mm_loadl_epi64((__m128i *)(data + 1 * src_stride)));
- s[1] = _mm_unpacklo_epi8(
- _mm_loadl_epi64((__m128i *)(data + 1 * src_stride)),
- _mm_loadl_epi64((__m128i *)(data + 2 * src_stride)));
- s[2] = _mm_unpacklo_epi8(
- _mm_loadl_epi64((__m128i *)(data + 2 * src_stride)),
- _mm_loadl_epi64((__m128i *)(data + 3 * src_stride)));
- s[3] = _mm_unpacklo_epi8(
- _mm_loadl_epi64((__m128i *)(data + 3 * src_stride)),
- _mm_loadl_epi64((__m128i *)(data + 4 * src_stride)));
- s[4] = _mm_unpacklo_epi8(
- _mm_loadl_epi64((__m128i *)(data + 4 * src_stride)),
- _mm_loadl_epi64((__m128i *)(data + 5 * src_stride)));
- s[5] = _mm_unpacklo_epi8(
- _mm_loadl_epi64((__m128i *)(data + 5 * src_stride)), src6);
-
- int i = 0;
- do {
- data = &src_ptr[i * src_stride + j];
- s[6] = _mm_unpacklo_epi8(
- src6, _mm_loadl_epi64((__m128i *)(data + 7 * src_stride)));
- src6 = _mm_loadl_epi64((__m128i *)(data + 8 * src_stride));
- s[7] = _mm_unpacklo_epi8(
- _mm_loadl_epi64((__m128i *)(data + 7 * src_stride)), src6);
-
- res_lo = convolve_lo_y(s, coeffs); // Filter low index pixels
- res_hi = convolve_hi_y(s, coeffs); // Filter high index pixels
- res_lo_shift = _mm_sll_epi32(res_lo, left_shift);
- res_hi_shift = _mm_sll_epi32(res_hi, left_shift);
- res_lo_shift = _mm_sra_epi32(_mm_add_epi32(res_lo_shift, round_const),
- round_shift);
- res_hi_shift = _mm_sra_epi32(_mm_add_epi32(res_hi_shift, round_const),
- round_shift);
-
- __m128i res_16b = _mm_packs_epi32(res_lo_shift, res_hi_shift);
- __m128i res_unsigned = _mm_add_epi16(res_16b, offset_const);
-
- // Accumulate values into the destination buffer
- if (do_average) {
- const __m128i data_ref_0 =
- _mm_loadu_si128((__m128i *)(&dst[i * dst_stride + j]));
-
- const __m128i comp_avg_res =
- comp_avg(&data_ref_0, &res_unsigned, &wt, use_jnt_comp_avg);
-
- const __m128i round_result = convolve_rounding(
- &comp_avg_res, &offset_const, &rounding_const, rounding_shift);
-
- const __m128i res_8 = _mm_packus_epi16(round_result, round_result);
- _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_8);
- } else {
- _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_unsigned);
- }
- i++;
-
- res_lo = convolve_lo_y(s + 1, coeffs); // Filter low index pixels
- res_hi = convolve_hi_y(s + 1, coeffs); // Filter high index pixels
- res_lo_shift = _mm_sll_epi32(res_lo, left_shift);
- res_hi_shift = _mm_sll_epi32(res_hi, left_shift);
- res_lo_shift = _mm_sra_epi32(_mm_add_epi32(res_lo_shift, round_const),
- round_shift);
- res_hi_shift = _mm_sra_epi32(_mm_add_epi32(res_hi_shift, round_const),
- round_shift);
- res_16b = _mm_packs_epi32(res_lo_shift, res_hi_shift);
- res_unsigned = _mm_add_epi16(res_16b, offset_const);
-
- // Accumulate values into the destination buffer
- if (do_average) {
- __m128i data_ref_0 =
- _mm_loadu_si128((__m128i *)(&dst[i * dst_stride + j]));
-
- const __m128i comp_avg_res =
- comp_avg(&data_ref_0, &res_unsigned, &wt, use_jnt_comp_avg);
-
- const __m128i round_result = convolve_rounding(
- &comp_avg_res, &offset_const, &rounding_const, rounding_shift);
-
- const __m128i res_8 = _mm_packus_epi16(round_result, round_result);
- _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_8);
- } else {
- _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_unsigned);
- }
- i++;
-
- s[0] = s[2];
- s[1] = s[3];
- s[2] = s[4];
- s[3] = s[5];
- s[4] = s[6];
- s[5] = s[7];
- } while (i < h);
- j += 8;
- } while (j < w);
- }
-}
diff --git a/third_party/aom/av1/common/x86/jnt_convolve_ssse3.c b/third_party/aom/av1/common/x86/jnt_convolve_ssse3.c
deleted file mode 100644
index 822772782..000000000
--- a/third_party/aom/av1/common/x86/jnt_convolve_ssse3.c
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <tmmintrin.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/aom_filter.h"
-#include "aom_dsp/x86/convolve_sse2.h"
-
-void av1_jnt_convolve_2d_ssse3(const uint8_t *src, int src_stride,
- uint8_t *dst0, int dst_stride0, int w, int h,
- const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y,
- const int subpel_x_q4, const int subpel_y_q4,
- ConvolveParams *conv_params) {
- CONV_BUF_TYPE *dst = conv_params->dst;
- int dst_stride = conv_params->dst_stride;
- const int bd = 8;
-
- DECLARE_ALIGNED(16, int16_t,
- im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE]);
- int im_h = h + filter_params_y->taps - 1;
- int im_stride = MAX_SB_SIZE;
- int i, j;
- const int fo_vert = filter_params_y->taps / 2 - 1;
- const int fo_horiz = filter_params_x->taps / 2 - 1;
- const int do_average = conv_params->do_average;
- const int use_jnt_comp_avg = conv_params->use_jnt_comp_avg;
- const uint8_t *const src_ptr = src - fo_vert * src_stride - fo_horiz;
-
- const __m128i zero = _mm_setzero_si128();
-
- const int w0 = conv_params->fwd_offset;
- const int w1 = conv_params->bck_offset;
- const __m128i wt0 = _mm_set1_epi16(w0);
- const __m128i wt1 = _mm_set1_epi16(w1);
- const __m128i wt = _mm_unpacklo_epi16(wt0, wt1);
-
- const int offset_0 =
- bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
- const int offset = (1 << offset_0) + (1 << (offset_0 - 1));
- const __m128i offset_const = _mm_set1_epi16(offset);
- const int rounding_shift =
- 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
- const __m128i rounding_const = _mm_set1_epi16((1 << rounding_shift) >> 1);
-
- /* Horizontal filter */
- {
- const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
- filter_params_x, subpel_x_q4 & SUBPEL_MASK);
- const __m128i coeffs_x = _mm_loadu_si128((__m128i *)x_filter);
-
- // coeffs 0 1 0 1 2 3 2 3
- const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_x, coeffs_x);
- // coeffs 4 5 4 5 6 7 6 7
- const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_x, coeffs_x);
-
- // coeffs 0 1 0 1 0 1 0 1
- const __m128i coeff_01 = _mm_unpacklo_epi64(tmp_0, tmp_0);
- // coeffs 2 3 2 3 2 3 2 3
- const __m128i coeff_23 = _mm_unpackhi_epi64(tmp_0, tmp_0);
- // coeffs 4 5 4 5 4 5 4 5
- const __m128i coeff_45 = _mm_unpacklo_epi64(tmp_1, tmp_1);
- // coeffs 6 7 6 7 6 7 6 7
- const __m128i coeff_67 = _mm_unpackhi_epi64(tmp_1, tmp_1);
-
- const __m128i round_const = _mm_set1_epi32(
- ((1 << conv_params->round_0) >> 1) + (1 << (bd + FILTER_BITS - 1)));
- const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_0);
-
- for (i = 0; i < im_h; ++i) {
- for (j = 0; j < w; j += 8) {
- const __m128i data =
- _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j]);
-
- const __m128i src_lo = _mm_unpacklo_epi8(data, zero);
- const __m128i src_hi = _mm_unpackhi_epi8(data, zero);
-
- // Filter even-index pixels
- const __m128i res_0 = _mm_madd_epi16(src_lo, coeff_01);
- const __m128i src_2 = _mm_alignr_epi8(src_hi, src_lo, 4);
- const __m128i res_2 = _mm_madd_epi16(src_2, coeff_23);
- const __m128i src_4 = _mm_alignr_epi8(src_hi, src_lo, 8);
- const __m128i res_4 = _mm_madd_epi16(src_4, coeff_45);
- const __m128i src_6 = _mm_alignr_epi8(src_hi, src_lo, 12);
- const __m128i res_6 = _mm_madd_epi16(src_6, coeff_67);
-
- __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_4),
- _mm_add_epi32(res_2, res_6));
- res_even =
- _mm_sra_epi32(_mm_add_epi32(res_even, round_const), round_shift);
-
- // Filter odd-index pixels
- const __m128i src_1 = _mm_alignr_epi8(src_hi, src_lo, 2);
- const __m128i res_1 = _mm_madd_epi16(src_1, coeff_01);
- const __m128i src_3 = _mm_alignr_epi8(src_hi, src_lo, 6);
- const __m128i res_3 = _mm_madd_epi16(src_3, coeff_23);
- const __m128i src_5 = _mm_alignr_epi8(src_hi, src_lo, 10);
- const __m128i res_5 = _mm_madd_epi16(src_5, coeff_45);
- const __m128i src_7 = _mm_alignr_epi8(src_hi, src_lo, 14);
- const __m128i res_7 = _mm_madd_epi16(src_7, coeff_67);
-
- __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_5),
- _mm_add_epi32(res_3, res_7));
- res_odd =
- _mm_sra_epi32(_mm_add_epi32(res_odd, round_const), round_shift);
-
- // Pack in the column order 0, 2, 4, 6, 1, 3, 5, 7
- __m128i res = _mm_packs_epi32(res_even, res_odd);
- _mm_store_si128((__m128i *)&im_block[i * im_stride + j], res);
- }
- }
- }
-
- /* Vertical filter */
- {
- const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
- filter_params_y, subpel_y_q4 & SUBPEL_MASK);
- const __m128i coeffs_y = _mm_loadu_si128((__m128i *)y_filter);
-
- // coeffs 0 1 0 1 2 3 2 3
- const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_y, coeffs_y);
- // coeffs 4 5 4 5 6 7 6 7
- const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_y, coeffs_y);
-
- // coeffs 0 1 0 1 0 1 0 1
- const __m128i coeff_01 = _mm_unpacklo_epi64(tmp_0, tmp_0);
- // coeffs 2 3 2 3 2 3 2 3
- const __m128i coeff_23 = _mm_unpackhi_epi64(tmp_0, tmp_0);
- // coeffs 4 5 4 5 4 5 4 5
- const __m128i coeff_45 = _mm_unpacklo_epi64(tmp_1, tmp_1);
- // coeffs 6 7 6 7 6 7 6 7
- const __m128i coeff_67 = _mm_unpackhi_epi64(tmp_1, tmp_1);
-
- const __m128i round_const = _mm_set1_epi32(
- ((1 << conv_params->round_1) >> 1) -
- (1 << (bd + 2 * FILTER_BITS - conv_params->round_0 - 1)));
- const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_1);
-
- for (i = 0; i < h; ++i) {
- for (j = 0; j < w; j += 8) {
- // Filter even-index pixels
- const int16_t *data = &im_block[i * im_stride + j];
- const __m128i src_0 =
- _mm_unpacklo_epi16(*(__m128i *)(data + 0 * im_stride),
- *(__m128i *)(data + 1 * im_stride));
- const __m128i src_2 =
- _mm_unpacklo_epi16(*(__m128i *)(data + 2 * im_stride),
- *(__m128i *)(data + 3 * im_stride));
- const __m128i src_4 =
- _mm_unpacklo_epi16(*(__m128i *)(data + 4 * im_stride),
- *(__m128i *)(data + 5 * im_stride));
- const __m128i src_6 =
- _mm_unpacklo_epi16(*(__m128i *)(data + 6 * im_stride),
- *(__m128i *)(data + 7 * im_stride));
-
- const __m128i res_0 = _mm_madd_epi16(src_0, coeff_01);
- const __m128i res_2 = _mm_madd_epi16(src_2, coeff_23);
- const __m128i res_4 = _mm_madd_epi16(src_4, coeff_45);
- const __m128i res_6 = _mm_madd_epi16(src_6, coeff_67);
-
- const __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_2),
- _mm_add_epi32(res_4, res_6));
-
- // Filter odd-index pixels
- const __m128i src_1 =
- _mm_unpackhi_epi16(*(__m128i *)(data + 0 * im_stride),
- *(__m128i *)(data + 1 * im_stride));
- const __m128i src_3 =
- _mm_unpackhi_epi16(*(__m128i *)(data + 2 * im_stride),
- *(__m128i *)(data + 3 * im_stride));
- const __m128i src_5 =
- _mm_unpackhi_epi16(*(__m128i *)(data + 4 * im_stride),
- *(__m128i *)(data + 5 * im_stride));
- const __m128i src_7 =
- _mm_unpackhi_epi16(*(__m128i *)(data + 6 * im_stride),
- *(__m128i *)(data + 7 * im_stride));
-
- const __m128i res_1 = _mm_madd_epi16(src_1, coeff_01);
- const __m128i res_3 = _mm_madd_epi16(src_3, coeff_23);
- const __m128i res_5 = _mm_madd_epi16(src_5, coeff_45);
- const __m128i res_7 = _mm_madd_epi16(src_7, coeff_67);
-
- const __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_3),
- _mm_add_epi32(res_5, res_7));
-
- // Rearrange pixels back into the order 0 ... 7
- const __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd);
- const __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd);
-
- const __m128i res_lo_round =
- _mm_sra_epi32(_mm_add_epi32(res_lo, round_const), round_shift);
- const __m128i res_hi_round =
- _mm_sra_epi32(_mm_add_epi32(res_hi, round_const), round_shift);
-
- const __m128i res_16b = _mm_packs_epi32(res_lo_round, res_hi_round);
- const __m128i res_unsigned = _mm_add_epi16(res_16b, offset_const);
-
- // Accumulate values into the destination buffer
- if (do_average) {
- const __m128i data_ref_0 =
- _mm_loadu_si128((__m128i *)(&dst[i * dst_stride + j]));
-
- const __m128i comp_avg_res =
- comp_avg(&data_ref_0, &res_unsigned, &wt, use_jnt_comp_avg);
-
- const __m128i round_result = convolve_rounding(
- &comp_avg_res, &offset_const, &rounding_const, rounding_shift);
-
- const __m128i res_8 = _mm_packus_epi16(round_result, round_result);
-
- if (w > 4)
- _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_8);
- else
- *(uint32_t *)(&dst0[i * dst_stride0 + j]) =
- _mm_cvtsi128_si32(res_8);
- } else {
- _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_unsigned);
- }
- }
- }
- }
-}
diff --git a/third_party/aom/av1/common/x86/reconinter_avx2.c b/third_party/aom/av1/common/x86/reconinter_avx2.c
deleted file mode 100644
index f645e0454..000000000
--- a/third_party/aom/av1/common/x86/reconinter_avx2.c
+++ /dev/null
@@ -1,620 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <immintrin.h>
-
-#include "config/av1_rtcd.h"
-
-#include "aom/aom_integer.h"
-#include "aom_dsp/blend.h"
-#include "aom_dsp/x86/synonyms.h"
-#include "aom_dsp/x86/synonyms_avx2.h"
-#include "av1/common/blockd.h"
-
-static INLINE __m256i calc_mask_avx2(const __m256i mask_base, const __m256i s0,
- const __m256i s1) {
- const __m256i diff = _mm256_abs_epi16(_mm256_sub_epi16(s0, s1));
- return _mm256_abs_epi16(
- _mm256_add_epi16(mask_base, _mm256_srli_epi16(diff, 4)));
- // clamp(diff, 0, 64) can be skiped for diff is always in the range ( 38, 54)
-}
-void av1_build_compound_diffwtd_mask_avx2(uint8_t *mask,
- DIFFWTD_MASK_TYPE mask_type,
- const uint8_t *src0, int stride0,
- const uint8_t *src1, int stride1,
- int h, int w) {
- const int mb = (mask_type == DIFFWTD_38_INV) ? AOM_BLEND_A64_MAX_ALPHA : 0;
- const __m256i y_mask_base = _mm256_set1_epi16(38 - mb);
- int i = 0;
- if (4 == w) {
- do {
- const __m128i s0A = xx_loadl_32(src0);
- const __m128i s0B = xx_loadl_32(src0 + stride0);
- const __m128i s0C = xx_loadl_32(src0 + stride0 * 2);
- const __m128i s0D = xx_loadl_32(src0 + stride0 * 3);
- const __m128i s0AB = _mm_unpacklo_epi32(s0A, s0B);
- const __m128i s0CD = _mm_unpacklo_epi32(s0C, s0D);
- const __m128i s0ABCD = _mm_unpacklo_epi64(s0AB, s0CD);
- const __m256i s0ABCD_w = _mm256_cvtepu8_epi16(s0ABCD);
-
- const __m128i s1A = xx_loadl_32(src1);
- const __m128i s1B = xx_loadl_32(src1 + stride1);
- const __m128i s1C = xx_loadl_32(src1 + stride1 * 2);
- const __m128i s1D = xx_loadl_32(src1 + stride1 * 3);
- const __m128i s1AB = _mm_unpacklo_epi32(s1A, s1B);
- const __m128i s1CD = _mm_unpacklo_epi32(s1C, s1D);
- const __m128i s1ABCD = _mm_unpacklo_epi64(s1AB, s1CD);
- const __m256i s1ABCD_w = _mm256_cvtepu8_epi16(s1ABCD);
- const __m256i m16 = calc_mask_avx2(y_mask_base, s0ABCD_w, s1ABCD_w);
- const __m256i m8 = _mm256_packus_epi16(m16, _mm256_setzero_si256());
- const __m128i x_m8 =
- _mm256_castsi256_si128(_mm256_permute4x64_epi64(m8, 0xd8));
- xx_storeu_128(mask, x_m8);
- src0 += (stride0 << 2);
- src1 += (stride1 << 2);
- mask += 16;
- i += 4;
- } while (i < h);
- } else if (8 == w) {
- do {
- const __m128i s0A = xx_loadl_64(src0);
- const __m128i s0B = xx_loadl_64(src0 + stride0);
- const __m128i s0C = xx_loadl_64(src0 + stride0 * 2);
- const __m128i s0D = xx_loadl_64(src0 + stride0 * 3);
- const __m256i s0AC_w = _mm256_cvtepu8_epi16(_mm_unpacklo_epi64(s0A, s0C));
- const __m256i s0BD_w = _mm256_cvtepu8_epi16(_mm_unpacklo_epi64(s0B, s0D));
- const __m128i s1A = xx_loadl_64(src1);
- const __m128i s1B = xx_loadl_64(src1 + stride1);
- const __m128i s1C = xx_loadl_64(src1 + stride1 * 2);
- const __m128i s1D = xx_loadl_64(src1 + stride1 * 3);
- const __m256i s1AB_w = _mm256_cvtepu8_epi16(_mm_unpacklo_epi64(s1A, s1C));
- const __m256i s1CD_w = _mm256_cvtepu8_epi16(_mm_unpacklo_epi64(s1B, s1D));
- const __m256i m16AC = calc_mask_avx2(y_mask_base, s0AC_w, s1AB_w);
- const __m256i m16BD = calc_mask_avx2(y_mask_base, s0BD_w, s1CD_w);
- const __m256i m8 = _mm256_packus_epi16(m16AC, m16BD);
- yy_storeu_256(mask, m8);
- src0 += stride0 << 2;
- src1 += stride1 << 2;
- mask += 32;
- i += 4;
- } while (i < h);
- } else if (16 == w) {
- do {
- const __m128i s0A = xx_load_128(src0);
- const __m128i s0B = xx_load_128(src0 + stride0);
- const __m128i s1A = xx_load_128(src1);
- const __m128i s1B = xx_load_128(src1 + stride1);
- const __m256i s0AL = _mm256_cvtepu8_epi16(s0A);
- const __m256i s0BL = _mm256_cvtepu8_epi16(s0B);
- const __m256i s1AL = _mm256_cvtepu8_epi16(s1A);
- const __m256i s1BL = _mm256_cvtepu8_epi16(s1B);
-
- const __m256i m16AL = calc_mask_avx2(y_mask_base, s0AL, s1AL);
- const __m256i m16BL = calc_mask_avx2(y_mask_base, s0BL, s1BL);
-
- const __m256i m8 =
- _mm256_permute4x64_epi64(_mm256_packus_epi16(m16AL, m16BL), 0xd8);
- yy_storeu_256(mask, m8);
- src0 += stride0 << 1;
- src1 += stride1 << 1;
- mask += 32;
- i += 2;
- } while (i < h);
- } else {
- do {
- int j = 0;
- do {
- const __m256i s0 = yy_loadu_256(src0 + j);
- const __m256i s1 = yy_loadu_256(src1 + j);
- const __m256i s0L = _mm256_cvtepu8_epi16(_mm256_castsi256_si128(s0));
- const __m256i s1L = _mm256_cvtepu8_epi16(_mm256_castsi256_si128(s1));
- const __m256i s0H =
- _mm256_cvtepu8_epi16(_mm256_extracti128_si256(s0, 1));
- const __m256i s1H =
- _mm256_cvtepu8_epi16(_mm256_extracti128_si256(s1, 1));
- const __m256i m16L = calc_mask_avx2(y_mask_base, s0L, s1L);
- const __m256i m16H = calc_mask_avx2(y_mask_base, s0H, s1H);
- const __m256i m8 =
- _mm256_permute4x64_epi64(_mm256_packus_epi16(m16L, m16H), 0xd8);
- yy_storeu_256(mask + j, m8);
- j += 32;
- } while (j < w);
- src0 += stride0;
- src1 += stride1;
- mask += w;
- i += 1;
- } while (i < h);
- }
-}
-
-static INLINE __m256i calc_mask_d16_avx2(const __m256i *data_src0,
- const __m256i *data_src1,
- const __m256i *round_const,
- const __m256i *mask_base_16,
- const __m256i *clip_diff, int round) {
- const __m256i diffa = _mm256_subs_epu16(*data_src0, *data_src1);
- const __m256i diffb = _mm256_subs_epu16(*data_src1, *data_src0);
- const __m256i diff = _mm256_max_epu16(diffa, diffb);
- const __m256i diff_round =
- _mm256_srli_epi16(_mm256_adds_epu16(diff, *round_const), round);
- const __m256i diff_factor = _mm256_srli_epi16(diff_round, DIFF_FACTOR_LOG2);
- const __m256i diff_mask = _mm256_adds_epi16(diff_factor, *mask_base_16);
- const __m256i diff_clamp = _mm256_min_epi16(diff_mask, *clip_diff);
- return diff_clamp;
-}
-
-static INLINE __m256i calc_mask_d16_inv_avx2(const __m256i *data_src0,
- const __m256i *data_src1,
- const __m256i *round_const,
- const __m256i *mask_base_16,
- const __m256i *clip_diff,
- int round) {
- const __m256i diffa = _mm256_subs_epu16(*data_src0, *data_src1);
- const __m256i diffb = _mm256_subs_epu16(*data_src1, *data_src0);
- const __m256i diff = _mm256_max_epu16(diffa, diffb);
- const __m256i diff_round =
- _mm256_srli_epi16(_mm256_adds_epu16(diff, *round_const), round);
- const __m256i diff_factor = _mm256_srli_epi16(diff_round, DIFF_FACTOR_LOG2);
- const __m256i diff_mask = _mm256_adds_epi16(diff_factor, *mask_base_16);
- const __m256i diff_clamp = _mm256_min_epi16(diff_mask, *clip_diff);
- const __m256i diff_const_16 = _mm256_sub_epi16(*clip_diff, diff_clamp);
- return diff_const_16;
-}
-
-static INLINE void build_compound_diffwtd_mask_d16_avx2(
- uint8_t *mask, const CONV_BUF_TYPE *src0, int src0_stride,
- const CONV_BUF_TYPE *src1, int src1_stride, int h, int w, int shift) {
- const int mask_base = 38;
- const __m256i _r = _mm256_set1_epi16((1 << shift) >> 1);
- const __m256i y38 = _mm256_set1_epi16(mask_base);
- const __m256i y64 = _mm256_set1_epi16(AOM_BLEND_A64_MAX_ALPHA);
- int i = 0;
- if (w == 4) {
- do {
- const __m128i s0A = xx_loadl_64(src0);
- const __m128i s0B = xx_loadl_64(src0 + src0_stride);
- const __m128i s0C = xx_loadl_64(src0 + src0_stride * 2);
- const __m128i s0D = xx_loadl_64(src0 + src0_stride * 3);
- const __m128i s1A = xx_loadl_64(src1);
- const __m128i s1B = xx_loadl_64(src1 + src1_stride);
- const __m128i s1C = xx_loadl_64(src1 + src1_stride * 2);
- const __m128i s1D = xx_loadl_64(src1 + src1_stride * 3);
- const __m256i s0 = yy_set_m128i(_mm_unpacklo_epi64(s0C, s0D),
- _mm_unpacklo_epi64(s0A, s0B));
- const __m256i s1 = yy_set_m128i(_mm_unpacklo_epi64(s1C, s1D),
- _mm_unpacklo_epi64(s1A, s1B));
- const __m256i m16 = calc_mask_d16_avx2(&s0, &s1, &_r, &y38, &y64, shift);
- const __m256i m8 = _mm256_packus_epi16(m16, _mm256_setzero_si256());
- xx_storeu_128(mask,
- _mm256_castsi256_si128(_mm256_permute4x64_epi64(m8, 0xd8)));
- src0 += src0_stride << 2;
- src1 += src1_stride << 2;
- mask += 16;
- i += 4;
- } while (i < h);
- } else if (w == 8) {
- do {
- const __m256i s0AB = yy_loadu2_128(src0 + src0_stride, src0);
- const __m256i s0CD =
- yy_loadu2_128(src0 + src0_stride * 3, src0 + src0_stride * 2);
- const __m256i s1AB = yy_loadu2_128(src1 + src1_stride, src1);
- const __m256i s1CD =
- yy_loadu2_128(src1 + src1_stride * 3, src1 + src1_stride * 2);
- const __m256i m16AB =
- calc_mask_d16_avx2(&s0AB, &s1AB, &_r, &y38, &y64, shift);
- const __m256i m16CD =
- calc_mask_d16_avx2(&s0CD, &s1CD, &_r, &y38, &y64, shift);
- const __m256i m8 = _mm256_packus_epi16(m16AB, m16CD);
- yy_storeu_256(mask, _mm256_permute4x64_epi64(m8, 0xd8));
- src0 += src0_stride << 2;
- src1 += src1_stride << 2;
- mask += 32;
- i += 4;
- } while (i < h);
- } else if (w == 16) {
- do {
- const __m256i s0A = yy_loadu_256(src0);
- const __m256i s0B = yy_loadu_256(src0 + src0_stride);
- const __m256i s1A = yy_loadu_256(src1);
- const __m256i s1B = yy_loadu_256(src1 + src1_stride);
- const __m256i m16A =
- calc_mask_d16_avx2(&s0A, &s1A, &_r, &y38, &y64, shift);
- const __m256i m16B =
- calc_mask_d16_avx2(&s0B, &s1B, &_r, &y38, &y64, shift);
- const __m256i m8 = _mm256_packus_epi16(m16A, m16B);
- yy_storeu_256(mask, _mm256_permute4x64_epi64(m8, 0xd8));
- src0 += src0_stride << 1;
- src1 += src1_stride << 1;
- mask += 32;
- i += 2;
- } while (i < h);
- } else if (w == 32) {
- do {
- const __m256i s0A = yy_loadu_256(src0);
- const __m256i s0B = yy_loadu_256(src0 + 16);
- const __m256i s1A = yy_loadu_256(src1);
- const __m256i s1B = yy_loadu_256(src1 + 16);
- const __m256i m16A =
- calc_mask_d16_avx2(&s0A, &s1A, &_r, &y38, &y64, shift);
- const __m256i m16B =
- calc_mask_d16_avx2(&s0B, &s1B, &_r, &y38, &y64, shift);
- const __m256i m8 = _mm256_packus_epi16(m16A, m16B);
- yy_storeu_256(mask, _mm256_permute4x64_epi64(m8, 0xd8));
- src0 += src0_stride;
- src1 += src1_stride;
- mask += 32;
- i += 1;
- } while (i < h);
- } else if (w == 64) {
- do {
- const __m256i s0A = yy_loadu_256(src0);
- const __m256i s0B = yy_loadu_256(src0 + 16);
- const __m256i s0C = yy_loadu_256(src0 + 32);
- const __m256i s0D = yy_loadu_256(src0 + 48);
- const __m256i s1A = yy_loadu_256(src1);
- const __m256i s1B = yy_loadu_256(src1 + 16);
- const __m256i s1C = yy_loadu_256(src1 + 32);
- const __m256i s1D = yy_loadu_256(src1 + 48);
- const __m256i m16A =
- calc_mask_d16_avx2(&s0A, &s1A, &_r, &y38, &y64, shift);
- const __m256i m16B =
- calc_mask_d16_avx2(&s0B, &s1B, &_r, &y38, &y64, shift);
- const __m256i m16C =
- calc_mask_d16_avx2(&s0C, &s1C, &_r, &y38, &y64, shift);
- const __m256i m16D =
- calc_mask_d16_avx2(&s0D, &s1D, &_r, &y38, &y64, shift);
- const __m256i m8AB = _mm256_packus_epi16(m16A, m16B);
- const __m256i m8CD = _mm256_packus_epi16(m16C, m16D);
- yy_storeu_256(mask, _mm256_permute4x64_epi64(m8AB, 0xd8));
- yy_storeu_256(mask + 32, _mm256_permute4x64_epi64(m8CD, 0xd8));
- src0 += src0_stride;
- src1 += src1_stride;
- mask += 64;
- i += 1;
- } while (i < h);
- } else {
- do {
- const __m256i s0A = yy_loadu_256(src0);
- const __m256i s0B = yy_loadu_256(src0 + 16);
- const __m256i s0C = yy_loadu_256(src0 + 32);
- const __m256i s0D = yy_loadu_256(src0 + 48);
- const __m256i s0E = yy_loadu_256(src0 + 64);
- const __m256i s0F = yy_loadu_256(src0 + 80);
- const __m256i s0G = yy_loadu_256(src0 + 96);
- const __m256i s0H = yy_loadu_256(src0 + 112);
- const __m256i s1A = yy_loadu_256(src1);
- const __m256i s1B = yy_loadu_256(src1 + 16);
- const __m256i s1C = yy_loadu_256(src1 + 32);
- const __m256i s1D = yy_loadu_256(src1 + 48);
- const __m256i s1E = yy_loadu_256(src1 + 64);
- const __m256i s1F = yy_loadu_256(src1 + 80);
- const __m256i s1G = yy_loadu_256(src1 + 96);
- const __m256i s1H = yy_loadu_256(src1 + 112);
- const __m256i m16A =
- calc_mask_d16_avx2(&s0A, &s1A, &_r, &y38, &y64, shift);
- const __m256i m16B =
- calc_mask_d16_avx2(&s0B, &s1B, &_r, &y38, &y64, shift);
- const __m256i m16C =
- calc_mask_d16_avx2(&s0C, &s1C, &_r, &y38, &y64, shift);
- const __m256i m16D =
- calc_mask_d16_avx2(&s0D, &s1D, &_r, &y38, &y64, shift);
- const __m256i m16E =
- calc_mask_d16_avx2(&s0E, &s1E, &_r, &y38, &y64, shift);
- const __m256i m16F =
- calc_mask_d16_avx2(&s0F, &s1F, &_r, &y38, &y64, shift);
- const __m256i m16G =
- calc_mask_d16_avx2(&s0G, &s1G, &_r, &y38, &y64, shift);
- const __m256i m16H =
- calc_mask_d16_avx2(&s0H, &s1H, &_r, &y38, &y64, shift);
- const __m256i m8AB = _mm256_packus_epi16(m16A, m16B);
- const __m256i m8CD = _mm256_packus_epi16(m16C, m16D);
- const __m256i m8EF = _mm256_packus_epi16(m16E, m16F);
- const __m256i m8GH = _mm256_packus_epi16(m16G, m16H);
- yy_storeu_256(mask, _mm256_permute4x64_epi64(m8AB, 0xd8));
- yy_storeu_256(mask + 32, _mm256_permute4x64_epi64(m8CD, 0xd8));
- yy_storeu_256(mask + 64, _mm256_permute4x64_epi64(m8EF, 0xd8));
- yy_storeu_256(mask + 96, _mm256_permute4x64_epi64(m8GH, 0xd8));
- src0 += src0_stride;
- src1 += src1_stride;
- mask += 128;
- i += 1;
- } while (i < h);
- }
-}
-
-static INLINE void build_compound_diffwtd_mask_d16_inv_avx2(
- uint8_t *mask, const CONV_BUF_TYPE *src0, int src0_stride,
- const CONV_BUF_TYPE *src1, int src1_stride, int h, int w, int shift) {
- const int mask_base = 38;
- const __m256i _r = _mm256_set1_epi16((1 << shift) >> 1);
- const __m256i y38 = _mm256_set1_epi16(mask_base);
- const __m256i y64 = _mm256_set1_epi16(AOM_BLEND_A64_MAX_ALPHA);
- int i = 0;
- if (w == 4) {
- do {
- const __m128i s0A = xx_loadl_64(src0);
- const __m128i s0B = xx_loadl_64(src0 + src0_stride);
- const __m128i s0C = xx_loadl_64(src0 + src0_stride * 2);
- const __m128i s0D = xx_loadl_64(src0 + src0_stride * 3);
- const __m128i s1A = xx_loadl_64(src1);
- const __m128i s1B = xx_loadl_64(src1 + src1_stride);
- const __m128i s1C = xx_loadl_64(src1 + src1_stride * 2);
- const __m128i s1D = xx_loadl_64(src1 + src1_stride * 3);
- const __m256i s0 = yy_set_m128i(_mm_unpacklo_epi64(s0C, s0D),
- _mm_unpacklo_epi64(s0A, s0B));
- const __m256i s1 = yy_set_m128i(_mm_unpacklo_epi64(s1C, s1D),
- _mm_unpacklo_epi64(s1A, s1B));
- const __m256i m16 =
- calc_mask_d16_inv_avx2(&s0, &s1, &_r, &y38, &y64, shift);
- const __m256i m8 = _mm256_packus_epi16(m16, _mm256_setzero_si256());
- xx_storeu_128(mask,
- _mm256_castsi256_si128(_mm256_permute4x64_epi64(m8, 0xd8)));
- src0 += src0_stride << 2;
- src1 += src1_stride << 2;
- mask += 16;
- i += 4;
- } while (i < h);
- } else if (w == 8) {
- do {
- const __m256i s0AB = yy_loadu2_128(src0 + src0_stride, src0);
- const __m256i s0CD =
- yy_loadu2_128(src0 + src0_stride * 3, src0 + src0_stride * 2);
- const __m256i s1AB = yy_loadu2_128(src1 + src1_stride, src1);
- const __m256i s1CD =
- yy_loadu2_128(src1 + src1_stride * 3, src1 + src1_stride * 2);
- const __m256i m16AB =
- calc_mask_d16_inv_avx2(&s0AB, &s1AB, &_r, &y38, &y64, shift);
- const __m256i m16CD =
- calc_mask_d16_inv_avx2(&s0CD, &s1CD, &_r, &y38, &y64, shift);
- const __m256i m8 = _mm256_packus_epi16(m16AB, m16CD);
- yy_storeu_256(mask, _mm256_permute4x64_epi64(m8, 0xd8));
- src0 += src0_stride << 2;
- src1 += src1_stride << 2;
- mask += 32;
- i += 4;
- } while (i < h);
- } else if (w == 16) {
- do {
- const __m256i s0A = yy_loadu_256(src0);
- const __m256i s0B = yy_loadu_256(src0 + src0_stride);
- const __m256i s1A = yy_loadu_256(src1);
- const __m256i s1B = yy_loadu_256(src1 + src1_stride);
- const __m256i m16A =
- calc_mask_d16_inv_avx2(&s0A, &s1A, &_r, &y38, &y64, shift);
- const __m256i m16B =
- calc_mask_d16_inv_avx2(&s0B, &s1B, &_r, &y38, &y64, shift);
- const __m256i m8 = _mm256_packus_epi16(m16A, m16B);
- yy_storeu_256(mask, _mm256_permute4x64_epi64(m8, 0xd8));
- src0 += src0_stride << 1;
- src1 += src1_stride << 1;
- mask += 32;
- i += 2;
- } while (i < h);
- } else if (w == 32) {
- do {
- const __m256i s0A = yy_loadu_256(src0);
- const __m256i s0B = yy_loadu_256(src0 + 16);
- const __m256i s1A = yy_loadu_256(src1);
- const __m256i s1B = yy_loadu_256(src1 + 16);
- const __m256i m16A =
- calc_mask_d16_inv_avx2(&s0A, &s1A, &_r, &y38, &y64, shift);
- const __m256i m16B =
- calc_mask_d16_inv_avx2(&s0B, &s1B, &_r, &y38, &y64, shift);
- const __m256i m8 = _mm256_packus_epi16(m16A, m16B);
- yy_storeu_256(mask, _mm256_permute4x64_epi64(m8, 0xd8));
- src0 += src0_stride;
- src1 += src1_stride;
- mask += 32;
- i += 1;
- } while (i < h);
- } else if (w == 64) {
- do {
- const __m256i s0A = yy_loadu_256(src0);
- const __m256i s0B = yy_loadu_256(src0 + 16);
- const __m256i s0C = yy_loadu_256(src0 + 32);
- const __m256i s0D = yy_loadu_256(src0 + 48);
- const __m256i s1A = yy_loadu_256(src1);
- const __m256i s1B = yy_loadu_256(src1 + 16);
- const __m256i s1C = yy_loadu_256(src1 + 32);
- const __m256i s1D = yy_loadu_256(src1 + 48);
- const __m256i m16A =
- calc_mask_d16_inv_avx2(&s0A, &s1A, &_r, &y38, &y64, shift);
- const __m256i m16B =
- calc_mask_d16_inv_avx2(&s0B, &s1B, &_r, &y38, &y64, shift);
- const __m256i m16C =
- calc_mask_d16_inv_avx2(&s0C, &s1C, &_r, &y38, &y64, shift);
- const __m256i m16D =
- calc_mask_d16_inv_avx2(&s0D, &s1D, &_r, &y38, &y64, shift);
- const __m256i m8AB = _mm256_packus_epi16(m16A, m16B);
- const __m256i m8CD = _mm256_packus_epi16(m16C, m16D);
- yy_storeu_256(mask, _mm256_permute4x64_epi64(m8AB, 0xd8));
- yy_storeu_256(mask + 32, _mm256_permute4x64_epi64(m8CD, 0xd8));
- src0 += src0_stride;
- src1 += src1_stride;
- mask += 64;
- i += 1;
- } while (i < h);
- } else {
- do {
- const __m256i s0A = yy_loadu_256(src0);
- const __m256i s0B = yy_loadu_256(src0 + 16);
- const __m256i s0C = yy_loadu_256(src0 + 32);
- const __m256i s0D = yy_loadu_256(src0 + 48);
- const __m256i s0E = yy_loadu_256(src0 + 64);
- const __m256i s0F = yy_loadu_256(src0 + 80);
- const __m256i s0G = yy_loadu_256(src0 + 96);
- const __m256i s0H = yy_loadu_256(src0 + 112);
- const __m256i s1A = yy_loadu_256(src1);
- const __m256i s1B = yy_loadu_256(src1 + 16);
- const __m256i s1C = yy_loadu_256(src1 + 32);
- const __m256i s1D = yy_loadu_256(src1 + 48);
- const __m256i s1E = yy_loadu_256(src1 + 64);
- const __m256i s1F = yy_loadu_256(src1 + 80);
- const __m256i s1G = yy_loadu_256(src1 + 96);
- const __m256i s1H = yy_loadu_256(src1 + 112);
- const __m256i m16A =
- calc_mask_d16_inv_avx2(&s0A, &s1A, &_r, &y38, &y64, shift);
- const __m256i m16B =
- calc_mask_d16_inv_avx2(&s0B, &s1B, &_r, &y38, &y64, shift);
- const __m256i m16C =
- calc_mask_d16_inv_avx2(&s0C, &s1C, &_r, &y38, &y64, shift);
- const __m256i m16D =
- calc_mask_d16_inv_avx2(&s0D, &s1D, &_r, &y38, &y64, shift);
- const __m256i m16E =
- calc_mask_d16_inv_avx2(&s0E, &s1E, &_r, &y38, &y64, shift);
- const __m256i m16F =
- calc_mask_d16_inv_avx2(&s0F, &s1F, &_r, &y38, &y64, shift);
- const __m256i m16G =
- calc_mask_d16_inv_avx2(&s0G, &s1G, &_r, &y38, &y64, shift);
- const __m256i m16H =
- calc_mask_d16_inv_avx2(&s0H, &s1H, &_r, &y38, &y64, shift);
- const __m256i m8AB = _mm256_packus_epi16(m16A, m16B);
- const __m256i m8CD = _mm256_packus_epi16(m16C, m16D);
- const __m256i m8EF = _mm256_packus_epi16(m16E, m16F);
- const __m256i m8GH = _mm256_packus_epi16(m16G, m16H);
- yy_storeu_256(mask, _mm256_permute4x64_epi64(m8AB, 0xd8));
- yy_storeu_256(mask + 32, _mm256_permute4x64_epi64(m8CD, 0xd8));
- yy_storeu_256(mask + 64, _mm256_permute4x64_epi64(m8EF, 0xd8));
- yy_storeu_256(mask + 96, _mm256_permute4x64_epi64(m8GH, 0xd8));
- src0 += src0_stride;
- src1 += src1_stride;
- mask += 128;
- i += 1;
- } while (i < h);
- }
-}
-
-void av1_build_compound_diffwtd_mask_d16_avx2(
- uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0,
- int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w,
- ConvolveParams *conv_params, int bd) {
- const int shift =
- 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1 + (bd - 8);
- // When rounding constant is added, there is a possibility of overflow.
- // However that much precision is not required. Code should very well work for
- // other values of DIFF_FACTOR_LOG2 and AOM_BLEND_A64_MAX_ALPHA as well. But
- // there is a possibility of corner case bugs.
- assert(DIFF_FACTOR_LOG2 == 4);
- assert(AOM_BLEND_A64_MAX_ALPHA == 64);
-
- if (mask_type == DIFFWTD_38) {
- build_compound_diffwtd_mask_d16_avx2(mask, src0, src0_stride, src1,
- src1_stride, h, w, shift);
- } else {
- build_compound_diffwtd_mask_d16_inv_avx2(mask, src0, src0_stride, src1,
- src1_stride, h, w, shift);
- }
-}
-
-void av1_build_compound_diffwtd_mask_highbd_avx2(
- uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0,
- int src0_stride, const uint8_t *src1, int src1_stride, int h, int w,
- int bd) {
- if (w < 16) {
- av1_build_compound_diffwtd_mask_highbd_ssse3(
- mask, mask_type, src0, src0_stride, src1, src1_stride, h, w, bd);
- } else {
- assert(mask_type == DIFFWTD_38 || mask_type == DIFFWTD_38_INV);
- assert(bd >= 8);
- assert((w % 16) == 0);
- const __m256i y0 = _mm256_setzero_si256();
- const __m256i yAOM_BLEND_A64_MAX_ALPHA =
- _mm256_set1_epi16(AOM_BLEND_A64_MAX_ALPHA);
- const int mask_base = 38;
- const __m256i ymask_base = _mm256_set1_epi16(mask_base);
- const uint16_t *ssrc0 = CONVERT_TO_SHORTPTR(src0);
- const uint16_t *ssrc1 = CONVERT_TO_SHORTPTR(src1);
- if (bd == 8) {
- if (mask_type == DIFFWTD_38_INV) {
- for (int i = 0; i < h; ++i) {
- for (int j = 0; j < w; j += 16) {
- __m256i s0 = _mm256_loadu_si256((const __m256i *)&ssrc0[j]);
- __m256i s1 = _mm256_loadu_si256((const __m256i *)&ssrc1[j]);
- __m256i diff = _mm256_srai_epi16(
- _mm256_abs_epi16(_mm256_sub_epi16(s0, s1)), DIFF_FACTOR_LOG2);
- __m256i m = _mm256_min_epi16(
- _mm256_max_epi16(y0, _mm256_add_epi16(diff, ymask_base)),
- yAOM_BLEND_A64_MAX_ALPHA);
- m = _mm256_sub_epi16(yAOM_BLEND_A64_MAX_ALPHA, m);
- m = _mm256_packus_epi16(m, m);
- m = _mm256_permute4x64_epi64(m, _MM_SHUFFLE(0, 0, 2, 0));
- __m128i m0 = _mm256_castsi256_si128(m);
- _mm_storeu_si128((__m128i *)&mask[j], m0);
- }
- ssrc0 += src0_stride;
- ssrc1 += src1_stride;
- mask += w;
- }
- } else {
- for (int i = 0; i < h; ++i) {
- for (int j = 0; j < w; j += 16) {
- __m256i s0 = _mm256_loadu_si256((const __m256i *)&ssrc0[j]);
- __m256i s1 = _mm256_loadu_si256((const __m256i *)&ssrc1[j]);
- __m256i diff = _mm256_srai_epi16(
- _mm256_abs_epi16(_mm256_sub_epi16(s0, s1)), DIFF_FACTOR_LOG2);
- __m256i m = _mm256_min_epi16(
- _mm256_max_epi16(y0, _mm256_add_epi16(diff, ymask_base)),
- yAOM_BLEND_A64_MAX_ALPHA);
- m = _mm256_packus_epi16(m, m);
- m = _mm256_permute4x64_epi64(m, _MM_SHUFFLE(0, 0, 2, 0));
- __m128i m0 = _mm256_castsi256_si128(m);
- _mm_storeu_si128((__m128i *)&mask[j], m0);
- }
- ssrc0 += src0_stride;
- ssrc1 += src1_stride;
- mask += w;
- }
- }
- } else {
- const __m128i xshift = xx_set1_64_from_32i(bd - 8 + DIFF_FACTOR_LOG2);
- if (mask_type == DIFFWTD_38_INV) {
- for (int i = 0; i < h; ++i) {
- for (int j = 0; j < w; j += 16) {
- __m256i s0 = _mm256_loadu_si256((const __m256i *)&ssrc0[j]);
- __m256i s1 = _mm256_loadu_si256((const __m256i *)&ssrc1[j]);
- __m256i diff = _mm256_sra_epi16(
- _mm256_abs_epi16(_mm256_sub_epi16(s0, s1)), xshift);
- __m256i m = _mm256_min_epi16(
- _mm256_max_epi16(y0, _mm256_add_epi16(diff, ymask_base)),
- yAOM_BLEND_A64_MAX_ALPHA);
- m = _mm256_sub_epi16(yAOM_BLEND_A64_MAX_ALPHA, m);
- m = _mm256_packus_epi16(m, m);
- m = _mm256_permute4x64_epi64(m, _MM_SHUFFLE(0, 0, 2, 0));
- __m128i m0 = _mm256_castsi256_si128(m);
- _mm_storeu_si128((__m128i *)&mask[j], m0);
- }
- ssrc0 += src0_stride;
- ssrc1 += src1_stride;
- mask += w;
- }
- } else {
- for (int i = 0; i < h; ++i) {
- for (int j = 0; j < w; j += 16) {
- __m256i s0 = _mm256_loadu_si256((const __m256i *)&ssrc0[j]);
- __m256i s1 = _mm256_loadu_si256((const __m256i *)&ssrc1[j]);
- __m256i diff = _mm256_sra_epi16(
- _mm256_abs_epi16(_mm256_sub_epi16(s0, s1)), xshift);
- __m256i m = _mm256_min_epi16(
- _mm256_max_epi16(y0, _mm256_add_epi16(diff, ymask_base)),
- yAOM_BLEND_A64_MAX_ALPHA);
- m = _mm256_packus_epi16(m, m);
- m = _mm256_permute4x64_epi64(m, _MM_SHUFFLE(0, 0, 2, 0));
- __m128i m0 = _mm256_castsi256_si128(m);
- _mm_storeu_si128((__m128i *)&mask[j], m0);
- }
- ssrc0 += src0_stride;
- ssrc1 += src1_stride;
- mask += w;
- }
- }
- }
- }
-}
diff --git a/third_party/aom/av1/common/x86/reconinter_sse4.c b/third_party/aom/av1/common/x86/reconinter_sse4.c
deleted file mode 100644
index 5171ca493..000000000
--- a/third_party/aom/av1/common/x86/reconinter_sse4.c
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <emmintrin.h> // SSE2
-#include <smmintrin.h> /* SSE4.1 */
-
-#include "aom/aom_integer.h"
-#include "aom_dsp/blend.h"
-#include "av1/common/blockd.h"
-
-static INLINE __m128i calc_mask(const __m128i mask_base, const __m128i s0,
- const __m128i s1) {
- const __m128i diff = _mm_abs_epi16(_mm_sub_epi16(s0, s1));
- return _mm_abs_epi16(_mm_add_epi16(mask_base, _mm_srli_epi16(diff, 4)));
- // clamp(diff, 0, 64) can be skiped for diff is always in the range ( 38, 54)
-}
-
-void av1_build_compound_diffwtd_mask_sse4_1(uint8_t *mask,
- DIFFWTD_MASK_TYPE mask_type,
- const uint8_t *src0, int stride0,
- const uint8_t *src1, int stride1,
- int h, int w) {
- const int mb = (mask_type == DIFFWTD_38_INV) ? AOM_BLEND_A64_MAX_ALPHA : 0;
- const __m128i mask_base = _mm_set1_epi16(38 - mb);
- int i = 0;
- if (4 == w) {
- do {
- const __m128i s0A = _mm_cvtsi32_si128(*(uint32_t *)src0);
- const __m128i s0B = _mm_cvtsi32_si128(*(uint32_t *)(src0 + stride0));
- const __m128i s0AB = _mm_unpacklo_epi32(s0A, s0B);
- const __m128i s0 = _mm_cvtepu8_epi16(s0AB);
-
- const __m128i s1A = _mm_cvtsi32_si128(*(uint32_t *)src1);
- const __m128i s1B = _mm_cvtsi32_si128(*(uint32_t *)(src1 + stride1));
- const __m128i s1AB = _mm_unpacklo_epi32(s1A, s1B);
- const __m128i s1 = _mm_cvtepu8_epi16(s1AB);
-
- const __m128i m16 = calc_mask(mask_base, s0, s1);
- const __m128i m8 = _mm_packus_epi16(m16, m16);
-
- *(uint32_t *)mask = _mm_cvtsi128_si32(m8);
- *(uint32_t *)(mask + w) = _mm_extract_epi32(m8, 1);
- src0 += (stride0 << 1);
- src1 += (stride1 << 1);
- mask += 8;
- i += 2;
- } while (i < h);
- } else if (8 == w) {
- do {
- __m128i s0 = _mm_loadl_epi64((__m128i const *)src0);
- __m128i s1 = _mm_loadl_epi64((__m128i const *)src1);
- s0 = _mm_cvtepu8_epi16(s0);
- s1 = _mm_cvtepu8_epi16(s1);
- const __m128i m16 = calc_mask(mask_base, s0, s1);
- const __m128i m8 = _mm_packus_epi16(m16, m16);
- _mm_storel_epi64((__m128i *)mask, m8);
- src0 += stride0;
- src1 += stride1;
- mask += 8;
- i += 1;
- } while (i < h);
- } else {
- const __m128i zero = _mm_setzero_si128();
- do {
- int j = 0;
- do {
- const __m128i s0 = _mm_load_si128((__m128i const *)(src0 + j));
- const __m128i s1 = _mm_load_si128((__m128i const *)(src1 + j));
- const __m128i s0L = _mm_cvtepu8_epi16(s0);
- const __m128i s1L = _mm_cvtepu8_epi16(s1);
- const __m128i s0H = _mm_unpackhi_epi8(s0, zero);
- const __m128i s1H = _mm_unpackhi_epi8(s1, zero);
-
- const __m128i m16L = calc_mask(mask_base, s0L, s1L);
- const __m128i m16H = calc_mask(mask_base, s0H, s1H);
-
- const __m128i m8 = _mm_packus_epi16(m16L, m16H);
- _mm_store_si128((__m128i *)(mask + j), m8);
- j += 16;
- } while (j < w);
- src0 += stride0;
- src1 += stride1;
- mask += w;
- i += 1;
- } while (i < h);
- }
-}
-
-void av1_build_compound_diffwtd_mask_d16_sse4_1(
- uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0,
- int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w,
- ConvolveParams *conv_params, int bd) {
- const int which_inverse = (mask_type == DIFFWTD_38) ? 0 : 1;
- const int mask_base = 38;
- int round =
- 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1 + (bd - 8);
- const __m128i round_const = _mm_set1_epi16((1 << round) >> 1);
- const __m128i mask_base_16 = _mm_set1_epi16(mask_base);
- const __m128i clip_diff = _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA);
- const __m128i add_const =
- _mm_set1_epi16((which_inverse ? AOM_BLEND_A64_MAX_ALPHA : 0));
- const __m128i add_sign = _mm_set1_epi16((which_inverse ? -1 : 1));
-
- int i, j;
- // When rounding constant is added, there is a possibility of overflow.
- // However that much precision is not required. Code should very well work for
- // other values of DIFF_FACTOR_LOG2 and AOM_BLEND_A64_MAX_ALPHA as well. But
- // there is a possibility of corner case bugs.
- assert(DIFF_FACTOR_LOG2 == 4);
- assert(AOM_BLEND_A64_MAX_ALPHA == 64);
- for (i = 0; i < h; ++i) {
- for (j = 0; j < w; j += 8) {
- const __m128i data_src0 =
- _mm_loadu_si128((__m128i *)&src0[(i * src0_stride) + j]);
- const __m128i data_src1 =
- _mm_loadu_si128((__m128i *)&src1[(i * src1_stride) + j]);
-
- const __m128i diffa = _mm_subs_epu16(data_src0, data_src1);
- const __m128i diffb = _mm_subs_epu16(data_src1, data_src0);
- const __m128i diff = _mm_max_epu16(diffa, diffb);
- const __m128i diff_round =
- _mm_srli_epi16(_mm_adds_epu16(diff, round_const), round);
- const __m128i diff_factor = _mm_srli_epi16(diff_round, DIFF_FACTOR_LOG2);
- const __m128i diff_mask = _mm_adds_epi16(diff_factor, mask_base_16);
- __m128i diff_clamp = _mm_min_epi16(diff_mask, clip_diff);
- // clamp to 0 can be skipped since we are using add and saturate
- // instruction
-
- const __m128i diff_sign = _mm_sign_epi16(diff_clamp, add_sign);
- const __m128i diff_const_16 = _mm_add_epi16(diff_sign, add_const);
-
- // 8 bit conversion and saturation to uint8
- const __m128i res_8 = _mm_packus_epi16(diff_const_16, diff_const_16);
-
- // Store values into the destination buffer
- __m128i *const dst = (__m128i *)&mask[i * w + j];
-
- if ((w - j) > 4) {
- _mm_storel_epi64(dst, res_8);
- } else { // w==4
- *(uint32_t *)dst = _mm_cvtsi128_si32(res_8);
- }
- }
- }
-}
diff --git a/third_party/aom/av1/common/x86/reconinter_ssse3.c b/third_party/aom/av1/common/x86/reconinter_ssse3.c
deleted file mode 100644
index cf684447c..000000000
--- a/third_party/aom/av1/common/x86/reconinter_ssse3.c
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <tmmintrin.h>
-
-#include "config/av1_rtcd.h"
-
-#include "aom/aom_integer.h"
-#include "aom_dsp/blend.h"
-#include "aom_dsp/x86/synonyms.h"
-#include "av1/common/blockd.h"
-
-void av1_build_compound_diffwtd_mask_highbd_ssse3(
- uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0,
- int src0_stride, const uint8_t *src1, int src1_stride, int h, int w,
- int bd) {
- if (w < 8) {
- av1_build_compound_diffwtd_mask_highbd_c(mask, mask_type, src0, src0_stride,
- src1, src1_stride, h, w, bd);
- } else {
- assert(bd >= 8);
- assert((w % 8) == 0);
- assert(mask_type == DIFFWTD_38 || mask_type == DIFFWTD_38_INV);
- const __m128i x0 = _mm_setzero_si128();
- const __m128i xAOM_BLEND_A64_MAX_ALPHA =
- _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA);
- const int mask_base = 38;
- const __m128i xmask_base = _mm_set1_epi16(mask_base);
- const uint16_t *ssrc0 = CONVERT_TO_SHORTPTR(src0);
- const uint16_t *ssrc1 = CONVERT_TO_SHORTPTR(src1);
- if (bd == 8) {
- if (mask_type == DIFFWTD_38_INV) {
- for (int i = 0; i < h; ++i) {
- for (int j = 0; j < w; j += 8) {
- __m128i s0 = _mm_loadu_si128((const __m128i *)&ssrc0[j]);
- __m128i s1 = _mm_loadu_si128((const __m128i *)&ssrc1[j]);
- __m128i diff = _mm_srai_epi16(_mm_abs_epi16(_mm_sub_epi16(s0, s1)),
- DIFF_FACTOR_LOG2);
- __m128i m = _mm_min_epi16(
- _mm_max_epi16(x0, _mm_add_epi16(diff, xmask_base)),
- xAOM_BLEND_A64_MAX_ALPHA);
- m = _mm_sub_epi16(xAOM_BLEND_A64_MAX_ALPHA, m);
- m = _mm_packus_epi16(m, m);
- _mm_storel_epi64((__m128i *)&mask[j], m);
- }
- ssrc0 += src0_stride;
- ssrc1 += src1_stride;
- mask += w;
- }
- } else {
- for (int i = 0; i < h; ++i) {
- for (int j = 0; j < w; j += 8) {
- __m128i s0 = _mm_loadu_si128((const __m128i *)&ssrc0[j]);
- __m128i s1 = _mm_loadu_si128((const __m128i *)&ssrc1[j]);
- __m128i diff = _mm_srai_epi16(_mm_abs_epi16(_mm_sub_epi16(s0, s1)),
- DIFF_FACTOR_LOG2);
- __m128i m = _mm_min_epi16(
- _mm_max_epi16(x0, _mm_add_epi16(diff, xmask_base)),
- xAOM_BLEND_A64_MAX_ALPHA);
- m = _mm_packus_epi16(m, m);
- _mm_storel_epi64((__m128i *)&mask[j], m);
- }
- ssrc0 += src0_stride;
- ssrc1 += src1_stride;
- mask += w;
- }
- }
- } else {
- const __m128i xshift = xx_set1_64_from_32i(bd - 8 + DIFF_FACTOR_LOG2);
- if (mask_type == DIFFWTD_38_INV) {
- for (int i = 0; i < h; ++i) {
- for (int j = 0; j < w; j += 8) {
- __m128i s0 = _mm_loadu_si128((const __m128i *)&ssrc0[j]);
- __m128i s1 = _mm_loadu_si128((const __m128i *)&ssrc1[j]);
- __m128i diff =
- _mm_sra_epi16(_mm_abs_epi16(_mm_sub_epi16(s0, s1)), xshift);
- __m128i m = _mm_min_epi16(
- _mm_max_epi16(x0, _mm_add_epi16(diff, xmask_base)),
- xAOM_BLEND_A64_MAX_ALPHA);
- m = _mm_sub_epi16(xAOM_BLEND_A64_MAX_ALPHA, m);
- m = _mm_packus_epi16(m, m);
- _mm_storel_epi64((__m128i *)&mask[j], m);
- }
- ssrc0 += src0_stride;
- ssrc1 += src1_stride;
- mask += w;
- }
- } else {
- for (int i = 0; i < h; ++i) {
- for (int j = 0; j < w; j += 8) {
- __m128i s0 = _mm_loadu_si128((const __m128i *)&ssrc0[j]);
- __m128i s1 = _mm_loadu_si128((const __m128i *)&ssrc1[j]);
- __m128i diff =
- _mm_sra_epi16(_mm_abs_epi16(_mm_sub_epi16(s0, s1)), xshift);
- __m128i m = _mm_min_epi16(
- _mm_max_epi16(x0, _mm_add_epi16(diff, xmask_base)),
- xAOM_BLEND_A64_MAX_ALPHA);
- m = _mm_packus_epi16(m, m);
- _mm_storel_epi64((__m128i *)&mask[j], m);
- }
- ssrc0 += src0_stride;
- ssrc1 += src1_stride;
- mask += w;
- }
- }
- }
- }
-}
diff --git a/third_party/aom/av1/common/x86/selfguided_avx2.c b/third_party/aom/av1/common/x86/selfguided_avx2.c
deleted file mode 100644
index 0aaf1f454..000000000
--- a/third_party/aom/av1/common/x86/selfguided_avx2.c
+++ /dev/null
@@ -1,724 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <immintrin.h>
-
-#include "config/aom_config.h"
-#include "config/av1_rtcd.h"
-
-#include "av1/common/restoration.h"
-#include "aom_dsp/x86/synonyms.h"
-#include "aom_dsp/x86/synonyms_avx2.h"
-
-// Load 8 bytes from the possibly-misaligned pointer p, extend each byte to
-// 32-bit precision and return them in an AVX2 register.
-static __m256i yy256_load_extend_8_32(const void *p) {
- return _mm256_cvtepu8_epi32(xx_loadl_64(p));
-}
-
-// Load 8 halfwords from the possibly-misaligned pointer p, extend each
-// halfword to 32-bit precision and return them in an AVX2 register.
-static __m256i yy256_load_extend_16_32(const void *p) {
- return _mm256_cvtepu16_epi32(xx_loadu_128(p));
-}
-
-// Compute the scan of an AVX2 register holding 8 32-bit integers. If the
-// register holds x0..x7 then the scan will hold x0, x0+x1, x0+x1+x2, ...,
-// x0+x1+...+x7
-//
-// Let [...] represent a 128-bit block, and let a, ..., h be 32-bit integers
-// (assumed small enough to be able to add them without overflow).
-//
-// Use -> as shorthand for summing, i.e. h->a = h + g + f + e + d + c + b + a.
-//
-// x = [h g f e][d c b a]
-// x01 = [g f e 0][c b a 0]
-// x02 = [g+h f+g e+f e][c+d b+c a+b a]
-// x03 = [e+f e 0 0][a+b a 0 0]
-// x04 = [e->h e->g e->f e][a->d a->c a->b a]
-// s = a->d
-// s01 = [a->d a->d a->d a->d]
-// s02 = [a->d a->d a->d a->d][0 0 0 0]
-// ret = [a->h a->g a->f a->e][a->d a->c a->b a]
-static __m256i scan_32(__m256i x) {
- const __m256i x01 = _mm256_slli_si256(x, 4);
- const __m256i x02 = _mm256_add_epi32(x, x01);
- const __m256i x03 = _mm256_slli_si256(x02, 8);
- const __m256i x04 = _mm256_add_epi32(x02, x03);
- const int32_t s = _mm256_extract_epi32(x04, 3);
- const __m128i s01 = _mm_set1_epi32(s);
- const __m256i s02 = _mm256_insertf128_si256(_mm256_setzero_si256(), s01, 1);
- return _mm256_add_epi32(x04, s02);
-}
-
-// Compute two integral images from src. B sums elements; A sums their
-// squares. The images are offset by one pixel, so will have width and height
-// equal to width + 1, height + 1 and the first row and column will be zero.
-//
-// A+1 and B+1 should be aligned to 32 bytes. buf_stride should be a multiple
-// of 8.
-
-static void *memset_zero_avx(int32_t *dest, const __m256i *zero, size_t count) {
- unsigned int i = 0;
- for (i = 0; i < (count & 0xffffffe0); i += 32) {
- _mm256_storeu_si256((__m256i *)(dest + i), *zero);
- _mm256_storeu_si256((__m256i *)(dest + i + 8), *zero);
- _mm256_storeu_si256((__m256i *)(dest + i + 16), *zero);
- _mm256_storeu_si256((__m256i *)(dest + i + 24), *zero);
- }
- for (; i < (count & 0xfffffff8); i += 8) {
- _mm256_storeu_si256((__m256i *)(dest + i), *zero);
- }
- for (; i < count; i++) {
- dest[i] = 0;
- }
- return dest;
-}
-
-static void integral_images(const uint8_t *src, int src_stride, int width,
- int height, int32_t *A, int32_t *B,
- int buf_stride) {
- const __m256i zero = _mm256_setzero_si256();
- // Write out the zero top row
- memset_zero_avx(A, &zero, (width + 8));
- memset_zero_avx(B, &zero, (width + 8));
- for (int i = 0; i < height; ++i) {
- // Zero the left column.
- A[(i + 1) * buf_stride] = B[(i + 1) * buf_stride] = 0;
-
- // ldiff is the difference H - D where H is the output sample immediately
- // to the left and D is the output sample above it. These are scalars,
- // replicated across the eight lanes.
- __m256i ldiff1 = zero, ldiff2 = zero;
- for (int j = 0; j < width; j += 8) {
- const int ABj = 1 + j;
-
- const __m256i above1 = yy_load_256(B + ABj + i * buf_stride);
- const __m256i above2 = yy_load_256(A + ABj + i * buf_stride);
-
- const __m256i x1 = yy256_load_extend_8_32(src + j + i * src_stride);
- const __m256i x2 = _mm256_madd_epi16(x1, x1);
-
- const __m256i sc1 = scan_32(x1);
- const __m256i sc2 = scan_32(x2);
-
- const __m256i row1 =
- _mm256_add_epi32(_mm256_add_epi32(sc1, above1), ldiff1);
- const __m256i row2 =
- _mm256_add_epi32(_mm256_add_epi32(sc2, above2), ldiff2);
-
- yy_store_256(B + ABj + (i + 1) * buf_stride, row1);
- yy_store_256(A + ABj + (i + 1) * buf_stride, row2);
-
- // Calculate the new H - D.
- ldiff1 = _mm256_set1_epi32(
- _mm256_extract_epi32(_mm256_sub_epi32(row1, above1), 7));
- ldiff2 = _mm256_set1_epi32(
- _mm256_extract_epi32(_mm256_sub_epi32(row2, above2), 7));
- }
- }
-}
-
-// Compute two integral images from src. B sums elements; A sums their squares
-//
-// A and B should be aligned to 32 bytes. buf_stride should be a multiple of 8.
-static void integral_images_highbd(const uint16_t *src, int src_stride,
- int width, int height, int32_t *A,
- int32_t *B, int buf_stride) {
- const __m256i zero = _mm256_setzero_si256();
- // Write out the zero top row
- memset_zero_avx(A, &zero, (width + 8));
- memset_zero_avx(B, &zero, (width + 8));
-
- for (int i = 0; i < height; ++i) {
- // Zero the left column.
- A[(i + 1) * buf_stride] = B[(i + 1) * buf_stride] = 0;
-
- // ldiff is the difference H - D where H is the output sample immediately
- // to the left and D is the output sample above it. These are scalars,
- // replicated across the eight lanes.
- __m256i ldiff1 = zero, ldiff2 = zero;
- for (int j = 0; j < width; j += 8) {
- const int ABj = 1 + j;
-
- const __m256i above1 = yy_load_256(B + ABj + i * buf_stride);
- const __m256i above2 = yy_load_256(A + ABj + i * buf_stride);
-
- const __m256i x1 = yy256_load_extend_16_32(src + j + i * src_stride);
- const __m256i x2 = _mm256_madd_epi16(x1, x1);
-
- const __m256i sc1 = scan_32(x1);
- const __m256i sc2 = scan_32(x2);
-
- const __m256i row1 =
- _mm256_add_epi32(_mm256_add_epi32(sc1, above1), ldiff1);
- const __m256i row2 =
- _mm256_add_epi32(_mm256_add_epi32(sc2, above2), ldiff2);
-
- yy_store_256(B + ABj + (i + 1) * buf_stride, row1);
- yy_store_256(A + ABj + (i + 1) * buf_stride, row2);
-
- // Calculate the new H - D.
- ldiff1 = _mm256_set1_epi32(
- _mm256_extract_epi32(_mm256_sub_epi32(row1, above1), 7));
- ldiff2 = _mm256_set1_epi32(
- _mm256_extract_epi32(_mm256_sub_epi32(row2, above2), 7));
- }
- }
-}
-
-// Compute 8 values of boxsum from the given integral image. ii should point
-// at the middle of the box (for the first value). r is the box radius.
-static INLINE __m256i boxsum_from_ii(const int32_t *ii, int stride, int r) {
- const __m256i tl = yy_loadu_256(ii - (r + 1) - (r + 1) * stride);
- const __m256i tr = yy_loadu_256(ii + (r + 0) - (r + 1) * stride);
- const __m256i bl = yy_loadu_256(ii - (r + 1) + r * stride);
- const __m256i br = yy_loadu_256(ii + (r + 0) + r * stride);
- const __m256i u = _mm256_sub_epi32(tr, tl);
- const __m256i v = _mm256_sub_epi32(br, bl);
- return _mm256_sub_epi32(v, u);
-}
-
-static __m256i round_for_shift(unsigned shift) {
- return _mm256_set1_epi32((1 << shift) >> 1);
-}
-
-static __m256i compute_p(__m256i sum1, __m256i sum2, int bit_depth, int n) {
- __m256i an, bb;
- if (bit_depth > 8) {
- const __m256i rounding_a = round_for_shift(2 * (bit_depth - 8));
- const __m256i rounding_b = round_for_shift(bit_depth - 8);
- const __m128i shift_a = _mm_cvtsi32_si128(2 * (bit_depth - 8));
- const __m128i shift_b = _mm_cvtsi32_si128(bit_depth - 8);
- const __m256i a =
- _mm256_srl_epi32(_mm256_add_epi32(sum2, rounding_a), shift_a);
- const __m256i b =
- _mm256_srl_epi32(_mm256_add_epi32(sum1, rounding_b), shift_b);
- // b < 2^14, so we can use a 16-bit madd rather than a 32-bit
- // mullo to square it
- bb = _mm256_madd_epi16(b, b);
- an = _mm256_max_epi32(_mm256_mullo_epi32(a, _mm256_set1_epi32(n)), bb);
- } else {
- bb = _mm256_madd_epi16(sum1, sum1);
- an = _mm256_mullo_epi32(sum2, _mm256_set1_epi32(n));
- }
- return _mm256_sub_epi32(an, bb);
-}
-
-// Assumes that C, D are integral images for the original buffer which has been
-// extended to have a padding of SGRPROJ_BORDER_VERT/SGRPROJ_BORDER_HORZ pixels
-// on the sides. A, B, C, D point at logical position (0, 0).
-static void calc_ab(int32_t *A, int32_t *B, const int32_t *C, const int32_t *D,
- int width, int height, int buf_stride, int bit_depth,
- int sgr_params_idx, int radius_idx) {
- const sgr_params_type *const params = &sgr_params[sgr_params_idx];
- const int r = params->r[radius_idx];
- const int n = (2 * r + 1) * (2 * r + 1);
- const __m256i s = _mm256_set1_epi32(params->s[radius_idx]);
- // one_over_n[n-1] is 2^12/n, so easily fits in an int16
- const __m256i one_over_n = _mm256_set1_epi32(one_by_x[n - 1]);
-
- const __m256i rnd_z = round_for_shift(SGRPROJ_MTABLE_BITS);
- const __m256i rnd_res = round_for_shift(SGRPROJ_RECIP_BITS);
-
- // Set up masks
- const __m128i ones32 = _mm_set_epi32(0, 0, 0xffffffff, 0xffffffff);
- __m256i mask[8];
- for (int idx = 0; idx < 8; idx++) {
- const __m128i shift = _mm_cvtsi32_si128(8 * (8 - idx));
- mask[idx] = _mm256_cvtepi8_epi32(_mm_srl_epi64(ones32, shift));
- }
-
- for (int i = -1; i < height + 1; ++i) {
- for (int j = -1; j < width + 1; j += 8) {
- const int32_t *Cij = C + i * buf_stride + j;
- const int32_t *Dij = D + i * buf_stride + j;
-
- __m256i sum1 = boxsum_from_ii(Dij, buf_stride, r);
- __m256i sum2 = boxsum_from_ii(Cij, buf_stride, r);
-
- // When width + 2 isn't a multiple of 8, sum1 and sum2 will contain
- // some uninitialised data in their upper words. We use a mask to
- // ensure that these bits are set to 0.
- int idx = AOMMIN(8, width + 1 - j);
- assert(idx >= 1);
-
- if (idx < 8) {
- sum1 = _mm256_and_si256(mask[idx], sum1);
- sum2 = _mm256_and_si256(mask[idx], sum2);
- }
-
- const __m256i p = compute_p(sum1, sum2, bit_depth, n);
-
- const __m256i z = _mm256_min_epi32(
- _mm256_srli_epi32(_mm256_add_epi32(_mm256_mullo_epi32(p, s), rnd_z),
- SGRPROJ_MTABLE_BITS),
- _mm256_set1_epi32(255));
-
- const __m256i a_res = _mm256_i32gather_epi32(x_by_xplus1, z, 4);
-
- yy_storeu_256(A + i * buf_stride + j, a_res);
-
- const __m256i a_complement =
- _mm256_sub_epi32(_mm256_set1_epi32(SGRPROJ_SGR), a_res);
-
- // sum1 might have lanes greater than 2^15, so we can't use madd to do
- // multiplication involving sum1. However, a_complement and one_over_n
- // are both less than 256, so we can multiply them first.
- const __m256i a_comp_over_n = _mm256_madd_epi16(a_complement, one_over_n);
- const __m256i b_int = _mm256_mullo_epi32(a_comp_over_n, sum1);
- const __m256i b_res = _mm256_srli_epi32(_mm256_add_epi32(b_int, rnd_res),
- SGRPROJ_RECIP_BITS);
-
- yy_storeu_256(B + i * buf_stride + j, b_res);
- }
- }
-}
-
-// Calculate 8 values of the "cross sum" starting at buf. This is a 3x3 filter
-// where the outer four corners have weight 3 and all other pixels have weight
-// 4.
-//
-// Pixels are indexed as follows:
-// xtl xt xtr
-// xl x xr
-// xbl xb xbr
-//
-// buf points to x
-//
-// fours = xl + xt + xr + xb + x
-// threes = xtl + xtr + xbr + xbl
-// cross_sum = 4 * fours + 3 * threes
-// = 4 * (fours + threes) - threes
-// = (fours + threes) << 2 - threes
-static INLINE __m256i cross_sum(const int32_t *buf, int stride) {
- const __m256i xtl = yy_loadu_256(buf - 1 - stride);
- const __m256i xt = yy_loadu_256(buf - stride);
- const __m256i xtr = yy_loadu_256(buf + 1 - stride);
- const __m256i xl = yy_loadu_256(buf - 1);
- const __m256i x = yy_loadu_256(buf);
- const __m256i xr = yy_loadu_256(buf + 1);
- const __m256i xbl = yy_loadu_256(buf - 1 + stride);
- const __m256i xb = yy_loadu_256(buf + stride);
- const __m256i xbr = yy_loadu_256(buf + 1 + stride);
-
- const __m256i fours = _mm256_add_epi32(
- xl, _mm256_add_epi32(xt, _mm256_add_epi32(xr, _mm256_add_epi32(xb, x))));
- const __m256i threes =
- _mm256_add_epi32(xtl, _mm256_add_epi32(xtr, _mm256_add_epi32(xbr, xbl)));
-
- return _mm256_sub_epi32(_mm256_slli_epi32(_mm256_add_epi32(fours, threes), 2),
- threes);
-}
-
-// The final filter for self-guided restoration. Computes a weighted average
-// across A, B with "cross sums" (see cross_sum implementation above).
-static void final_filter(int32_t *dst, int dst_stride, const int32_t *A,
- const int32_t *B, int buf_stride, const void *dgd8,
- int dgd_stride, int width, int height, int highbd) {
- const int nb = 5;
- const __m256i rounding =
- round_for_shift(SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
- const uint8_t *dgd_real =
- highbd ? (const uint8_t *)CONVERT_TO_SHORTPTR(dgd8) : dgd8;
-
- for (int i = 0; i < height; ++i) {
- for (int j = 0; j < width; j += 8) {
- const __m256i a = cross_sum(A + i * buf_stride + j, buf_stride);
- const __m256i b = cross_sum(B + i * buf_stride + j, buf_stride);
-
- const __m128i raw =
- xx_loadu_128(dgd_real + ((i * dgd_stride + j) << highbd));
- const __m256i src =
- highbd ? _mm256_cvtepu16_epi32(raw) : _mm256_cvtepu8_epi32(raw);
-
- __m256i v = _mm256_add_epi32(_mm256_madd_epi16(a, src), b);
- __m256i w = _mm256_srai_epi32(_mm256_add_epi32(v, rounding),
- SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
-
- yy_storeu_256(dst + i * dst_stride + j, w);
- }
- }
-}
-
-// Assumes that C, D are integral images for the original buffer which has been
-// extended to have a padding of SGRPROJ_BORDER_VERT/SGRPROJ_BORDER_HORZ pixels
-// on the sides. A, B, C, D point at logical position (0, 0).
-static void calc_ab_fast(int32_t *A, int32_t *B, const int32_t *C,
- const int32_t *D, int width, int height,
- int buf_stride, int bit_depth, int sgr_params_idx,
- int radius_idx) {
- const sgr_params_type *const params = &sgr_params[sgr_params_idx];
- const int r = params->r[radius_idx];
- const int n = (2 * r + 1) * (2 * r + 1);
- const __m256i s = _mm256_set1_epi32(params->s[radius_idx]);
- // one_over_n[n-1] is 2^12/n, so easily fits in an int16
- const __m256i one_over_n = _mm256_set1_epi32(one_by_x[n - 1]);
-
- const __m256i rnd_z = round_for_shift(SGRPROJ_MTABLE_BITS);
- const __m256i rnd_res = round_for_shift(SGRPROJ_RECIP_BITS);
-
- // Set up masks
- const __m128i ones32 = _mm_set_epi32(0, 0, 0xffffffff, 0xffffffff);
- __m256i mask[8];
- for (int idx = 0; idx < 8; idx++) {
- const __m128i shift = _mm_cvtsi32_si128(8 * (8 - idx));
- mask[idx] = _mm256_cvtepi8_epi32(_mm_srl_epi64(ones32, shift));
- }
-
- for (int i = -1; i < height + 1; i += 2) {
- for (int j = -1; j < width + 1; j += 8) {
- const int32_t *Cij = C + i * buf_stride + j;
- const int32_t *Dij = D + i * buf_stride + j;
-
- __m256i sum1 = boxsum_from_ii(Dij, buf_stride, r);
- __m256i sum2 = boxsum_from_ii(Cij, buf_stride, r);
-
- // When width + 2 isn't a multiple of 8, sum1 and sum2 will contain
- // some uninitialised data in their upper words. We use a mask to
- // ensure that these bits are set to 0.
- int idx = AOMMIN(8, width + 1 - j);
- assert(idx >= 1);
-
- if (idx < 8) {
- sum1 = _mm256_and_si256(mask[idx], sum1);
- sum2 = _mm256_and_si256(mask[idx], sum2);
- }
-
- const __m256i p = compute_p(sum1, sum2, bit_depth, n);
-
- const __m256i z = _mm256_min_epi32(
- _mm256_srli_epi32(_mm256_add_epi32(_mm256_mullo_epi32(p, s), rnd_z),
- SGRPROJ_MTABLE_BITS),
- _mm256_set1_epi32(255));
-
- const __m256i a_res = _mm256_i32gather_epi32(x_by_xplus1, z, 4);
-
- yy_storeu_256(A + i * buf_stride + j, a_res);
-
- const __m256i a_complement =
- _mm256_sub_epi32(_mm256_set1_epi32(SGRPROJ_SGR), a_res);
-
- // sum1 might have lanes greater than 2^15, so we can't use madd to do
- // multiplication involving sum1. However, a_complement and one_over_n
- // are both less than 256, so we can multiply them first.
- const __m256i a_comp_over_n = _mm256_madd_epi16(a_complement, one_over_n);
- const __m256i b_int = _mm256_mullo_epi32(a_comp_over_n, sum1);
- const __m256i b_res = _mm256_srli_epi32(_mm256_add_epi32(b_int, rnd_res),
- SGRPROJ_RECIP_BITS);
-
- yy_storeu_256(B + i * buf_stride + j, b_res);
- }
- }
-}
-
-// Calculate 8 values of the "cross sum" starting at buf.
-//
-// Pixels are indexed like this:
-// xtl xt xtr
-// - buf -
-// xbl xb xbr
-//
-// Pixels are weighted like this:
-// 5 6 5
-// 0 0 0
-// 5 6 5
-//
-// fives = xtl + xtr + xbl + xbr
-// sixes = xt + xb
-// cross_sum = 6 * sixes + 5 * fives
-// = 5 * (fives + sixes) - sixes
-// = (fives + sixes) << 2 + (fives + sixes) + sixes
-static INLINE __m256i cross_sum_fast_even_row(const int32_t *buf, int stride) {
- const __m256i xtl = yy_loadu_256(buf - 1 - stride);
- const __m256i xt = yy_loadu_256(buf - stride);
- const __m256i xtr = yy_loadu_256(buf + 1 - stride);
- const __m256i xbl = yy_loadu_256(buf - 1 + stride);
- const __m256i xb = yy_loadu_256(buf + stride);
- const __m256i xbr = yy_loadu_256(buf + 1 + stride);
-
- const __m256i fives =
- _mm256_add_epi32(xtl, _mm256_add_epi32(xtr, _mm256_add_epi32(xbr, xbl)));
- const __m256i sixes = _mm256_add_epi32(xt, xb);
- const __m256i fives_plus_sixes = _mm256_add_epi32(fives, sixes);
-
- return _mm256_add_epi32(
- _mm256_add_epi32(_mm256_slli_epi32(fives_plus_sixes, 2),
- fives_plus_sixes),
- sixes);
-}
-
-// Calculate 8 values of the "cross sum" starting at buf.
-//
-// Pixels are indexed like this:
-// xl x xr
-//
-// Pixels are weighted like this:
-// 5 6 5
-//
-// buf points to x
-//
-// fives = xl + xr
-// sixes = x
-// cross_sum = 5 * fives + 6 * sixes
-// = 4 * (fives + sixes) + (fives + sixes) + sixes
-// = (fives + sixes) << 2 + (fives + sixes) + sixes
-static INLINE __m256i cross_sum_fast_odd_row(const int32_t *buf) {
- const __m256i xl = yy_loadu_256(buf - 1);
- const __m256i x = yy_loadu_256(buf);
- const __m256i xr = yy_loadu_256(buf + 1);
-
- const __m256i fives = _mm256_add_epi32(xl, xr);
- const __m256i sixes = x;
-
- const __m256i fives_plus_sixes = _mm256_add_epi32(fives, sixes);
-
- return _mm256_add_epi32(
- _mm256_add_epi32(_mm256_slli_epi32(fives_plus_sixes, 2),
- fives_plus_sixes),
- sixes);
-}
-
-// The final filter for the self-guided restoration. Computes a
-// weighted average across A, B with "cross sums" (see cross_sum_...
-// implementations above).
-static void final_filter_fast(int32_t *dst, int dst_stride, const int32_t *A,
- const int32_t *B, int buf_stride,
- const void *dgd8, int dgd_stride, int width,
- int height, int highbd) {
- const int nb0 = 5;
- const int nb1 = 4;
-
- const __m256i rounding0 =
- round_for_shift(SGRPROJ_SGR_BITS + nb0 - SGRPROJ_RST_BITS);
- const __m256i rounding1 =
- round_for_shift(SGRPROJ_SGR_BITS + nb1 - SGRPROJ_RST_BITS);
-
- const uint8_t *dgd_real =
- highbd ? (const uint8_t *)CONVERT_TO_SHORTPTR(dgd8) : dgd8;
-
- for (int i = 0; i < height; ++i) {
- if (!(i & 1)) { // even row
- for (int j = 0; j < width; j += 8) {
- const __m256i a =
- cross_sum_fast_even_row(A + i * buf_stride + j, buf_stride);
- const __m256i b =
- cross_sum_fast_even_row(B + i * buf_stride + j, buf_stride);
-
- const __m128i raw =
- xx_loadu_128(dgd_real + ((i * dgd_stride + j) << highbd));
- const __m256i src =
- highbd ? _mm256_cvtepu16_epi32(raw) : _mm256_cvtepu8_epi32(raw);
-
- __m256i v = _mm256_add_epi32(_mm256_madd_epi16(a, src), b);
- __m256i w =
- _mm256_srai_epi32(_mm256_add_epi32(v, rounding0),
- SGRPROJ_SGR_BITS + nb0 - SGRPROJ_RST_BITS);
-
- yy_storeu_256(dst + i * dst_stride + j, w);
- }
- } else { // odd row
- for (int j = 0; j < width; j += 8) {
- const __m256i a = cross_sum_fast_odd_row(A + i * buf_stride + j);
- const __m256i b = cross_sum_fast_odd_row(B + i * buf_stride + j);
-
- const __m128i raw =
- xx_loadu_128(dgd_real + ((i * dgd_stride + j) << highbd));
- const __m256i src =
- highbd ? _mm256_cvtepu16_epi32(raw) : _mm256_cvtepu8_epi32(raw);
-
- __m256i v = _mm256_add_epi32(_mm256_madd_epi16(a, src), b);
- __m256i w =
- _mm256_srai_epi32(_mm256_add_epi32(v, rounding1),
- SGRPROJ_SGR_BITS + nb1 - SGRPROJ_RST_BITS);
-
- yy_storeu_256(dst + i * dst_stride + j, w);
- }
- }
- }
-}
-
-int av1_selfguided_restoration_avx2(const uint8_t *dgd8, int width, int height,
- int dgd_stride, int32_t *flt0,
- int32_t *flt1, int flt_stride,
- int sgr_params_idx, int bit_depth,
- int highbd) {
- // The ALIGN_POWER_OF_TWO macro here ensures that column 1 of Atl, Btl,
- // Ctl and Dtl is 32-byte aligned.
- const int buf_elts = ALIGN_POWER_OF_TWO(RESTORATION_PROC_UNIT_PELS, 3);
-
- int32_t *buf = aom_memalign(
- 32, 4 * sizeof(*buf) * ALIGN_POWER_OF_TWO(RESTORATION_PROC_UNIT_PELS, 3));
- if (!buf) return -1;
-
- const int width_ext = width + 2 * SGRPROJ_BORDER_HORZ;
- const int height_ext = height + 2 * SGRPROJ_BORDER_VERT;
-
- // Adjusting the stride of A and B here appears to avoid bad cache effects,
- // leading to a significant speed improvement.
- // We also align the stride to a multiple of 32 bytes for efficiency.
- int buf_stride = ALIGN_POWER_OF_TWO(width_ext + 16, 3);
-
- // The "tl" pointers point at the top-left of the initialised data for the
- // array.
- int32_t *Atl = buf + 0 * buf_elts + 7;
- int32_t *Btl = buf + 1 * buf_elts + 7;
- int32_t *Ctl = buf + 2 * buf_elts + 7;
- int32_t *Dtl = buf + 3 * buf_elts + 7;
-
- // The "0" pointers are (- SGRPROJ_BORDER_VERT, -SGRPROJ_BORDER_HORZ). Note
- // there's a zero row and column in A, B (integral images), so we move down
- // and right one for them.
- const int buf_diag_border =
- SGRPROJ_BORDER_HORZ + buf_stride * SGRPROJ_BORDER_VERT;
-
- int32_t *A0 = Atl + 1 + buf_stride;
- int32_t *B0 = Btl + 1 + buf_stride;
- int32_t *C0 = Ctl + 1 + buf_stride;
- int32_t *D0 = Dtl + 1 + buf_stride;
-
- // Finally, A, B, C, D point at position (0, 0).
- int32_t *A = A0 + buf_diag_border;
- int32_t *B = B0 + buf_diag_border;
- int32_t *C = C0 + buf_diag_border;
- int32_t *D = D0 + buf_diag_border;
-
- const int dgd_diag_border =
- SGRPROJ_BORDER_HORZ + dgd_stride * SGRPROJ_BORDER_VERT;
- const uint8_t *dgd0 = dgd8 - dgd_diag_border;
-
- // Generate integral images from the input. C will contain sums of squares; D
- // will contain just sums
- if (highbd)
- integral_images_highbd(CONVERT_TO_SHORTPTR(dgd0), dgd_stride, width_ext,
- height_ext, Ctl, Dtl, buf_stride);
- else
- integral_images(dgd0, dgd_stride, width_ext, height_ext, Ctl, Dtl,
- buf_stride);
-
- const sgr_params_type *const params = &sgr_params[sgr_params_idx];
- // Write to flt0 and flt1
- // If params->r == 0 we skip the corresponding filter. We only allow one of
- // the radii to be 0, as having both equal to 0 would be equivalent to
- // skipping SGR entirely.
- assert(!(params->r[0] == 0 && params->r[1] == 0));
- assert(params->r[0] < AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ));
- assert(params->r[1] < AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ));
-
- if (params->r[0] > 0) {
- calc_ab_fast(A, B, C, D, width, height, buf_stride, bit_depth,
- sgr_params_idx, 0);
- final_filter_fast(flt0, flt_stride, A, B, buf_stride, dgd8, dgd_stride,
- width, height, highbd);
- }
-
- if (params->r[1] > 0) {
- calc_ab(A, B, C, D, width, height, buf_stride, bit_depth, sgr_params_idx,
- 1);
- final_filter(flt1, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width,
- height, highbd);
- }
- aom_free(buf);
- return 0;
-}
-
-void apply_selfguided_restoration_avx2(const uint8_t *dat8, int width,
- int height, int stride, int eps,
- const int *xqd, uint8_t *dst8,
- int dst_stride, int32_t *tmpbuf,
- int bit_depth, int highbd) {
- int32_t *flt0 = tmpbuf;
- int32_t *flt1 = flt0 + RESTORATION_UNITPELS_MAX;
- assert(width * height <= RESTORATION_UNITPELS_MAX);
- const int ret = av1_selfguided_restoration_avx2(
- dat8, width, height, stride, flt0, flt1, width, eps, bit_depth, highbd);
- (void)ret;
- assert(!ret);
- const sgr_params_type *const params = &sgr_params[eps];
- int xq[2];
- decode_xq(xqd, xq, params);
-
- __m256i xq0 = _mm256_set1_epi32(xq[0]);
- __m256i xq1 = _mm256_set1_epi32(xq[1]);
-
- for (int i = 0; i < height; ++i) {
- // Calculate output in batches of 16 pixels
- for (int j = 0; j < width; j += 16) {
- const int k = i * width + j;
- const int m = i * dst_stride + j;
-
- const uint8_t *dat8ij = dat8 + i * stride + j;
- __m256i ep_0, ep_1;
- __m128i src_0, src_1;
- if (highbd) {
- src_0 = xx_loadu_128(CONVERT_TO_SHORTPTR(dat8ij));
- src_1 = xx_loadu_128(CONVERT_TO_SHORTPTR(dat8ij + 8));
- ep_0 = _mm256_cvtepu16_epi32(src_0);
- ep_1 = _mm256_cvtepu16_epi32(src_1);
- } else {
- src_0 = xx_loadu_128(dat8ij);
- ep_0 = _mm256_cvtepu8_epi32(src_0);
- ep_1 = _mm256_cvtepu8_epi32(_mm_srli_si128(src_0, 8));
- }
-
- const __m256i u_0 = _mm256_slli_epi32(ep_0, SGRPROJ_RST_BITS);
- const __m256i u_1 = _mm256_slli_epi32(ep_1, SGRPROJ_RST_BITS);
-
- __m256i v_0 = _mm256_slli_epi32(u_0, SGRPROJ_PRJ_BITS);
- __m256i v_1 = _mm256_slli_epi32(u_1, SGRPROJ_PRJ_BITS);
-
- if (params->r[0] > 0) {
- const __m256i f1_0 = _mm256_sub_epi32(yy_loadu_256(&flt0[k]), u_0);
- v_0 = _mm256_add_epi32(v_0, _mm256_mullo_epi32(xq0, f1_0));
-
- const __m256i f1_1 = _mm256_sub_epi32(yy_loadu_256(&flt0[k + 8]), u_1);
- v_1 = _mm256_add_epi32(v_1, _mm256_mullo_epi32(xq0, f1_1));
- }
-
- if (params->r[1] > 0) {
- const __m256i f2_0 = _mm256_sub_epi32(yy_loadu_256(&flt1[k]), u_0);
- v_0 = _mm256_add_epi32(v_0, _mm256_mullo_epi32(xq1, f2_0));
-
- const __m256i f2_1 = _mm256_sub_epi32(yy_loadu_256(&flt1[k + 8]), u_1);
- v_1 = _mm256_add_epi32(v_1, _mm256_mullo_epi32(xq1, f2_1));
- }
-
- const __m256i rounding =
- round_for_shift(SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS);
- const __m256i w_0 = _mm256_srai_epi32(
- _mm256_add_epi32(v_0, rounding), SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS);
- const __m256i w_1 = _mm256_srai_epi32(
- _mm256_add_epi32(v_1, rounding), SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS);
-
- if (highbd) {
- // Pack into 16 bits and clamp to [0, 2^bit_depth)
- // Note that packing into 16 bits messes up the order of the bits,
- // so we use a permute function to correct this
- const __m256i tmp = _mm256_packus_epi32(w_0, w_1);
- const __m256i tmp2 = _mm256_permute4x64_epi64(tmp, 0xd8);
- const __m256i max = _mm256_set1_epi16((1 << bit_depth) - 1);
- const __m256i res = _mm256_min_epi16(tmp2, max);
- yy_storeu_256(CONVERT_TO_SHORTPTR(dst8 + m), res);
- } else {
- // Pack into 8 bits and clamp to [0, 256)
- // Note that each pack messes up the order of the bits,
- // so we use a permute function to correct this
- const __m256i tmp = _mm256_packs_epi32(w_0, w_1);
- const __m256i tmp2 = _mm256_permute4x64_epi64(tmp, 0xd8);
- const __m256i res =
- _mm256_packus_epi16(tmp2, tmp2 /* "don't care" value */);
- const __m128i res2 =
- _mm256_castsi256_si128(_mm256_permute4x64_epi64(res, 0xd8));
- xx_storeu_128(dst8 + m, res2);
- }
- }
- }
-}
diff --git a/third_party/aom/av1/common/x86/selfguided_sse4.c b/third_party/aom/av1/common/x86/selfguided_sse4.c
deleted file mode 100644
index ea3f6d942..000000000
--- a/third_party/aom/av1/common/x86/selfguided_sse4.c
+++ /dev/null
@@ -1,660 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <smmintrin.h>
-
-#include "config/aom_config.h"
-#include "config/av1_rtcd.h"
-
-#include "av1/common/restoration.h"
-#include "aom_dsp/x86/synonyms.h"
-
-// Load 4 bytes from the possibly-misaligned pointer p, extend each byte to
-// 32-bit precision and return them in an SSE register.
-static __m128i xx_load_extend_8_32(const void *p) {
- return _mm_cvtepu8_epi32(xx_loadl_32(p));
-}
-
-// Load 4 halfwords from the possibly-misaligned pointer p, extend each
-// halfword to 32-bit precision and return them in an SSE register.
-static __m128i xx_load_extend_16_32(const void *p) {
- return _mm_cvtepu16_epi32(xx_loadl_64(p));
-}
-
-// Compute the scan of an SSE register holding 4 32-bit integers. If the
-// register holds x0..x3 then the scan will hold x0, x0+x1, x0+x1+x2,
-// x0+x1+x2+x3
-static __m128i scan_32(__m128i x) {
- const __m128i x01 = _mm_add_epi32(x, _mm_slli_si128(x, 4));
- return _mm_add_epi32(x01, _mm_slli_si128(x01, 8));
-}
-
-// Compute two integral images from src. B sums elements; A sums their
-// squares. The images are offset by one pixel, so will have width and height
-// equal to width + 1, height + 1 and the first row and column will be zero.
-//
-// A+1 and B+1 should be aligned to 16 bytes. buf_stride should be a multiple
-// of 4.
-static void integral_images(const uint8_t *src, int src_stride, int width,
- int height, int32_t *A, int32_t *B,
- int buf_stride) {
- // Write out the zero top row
- memset(A, 0, sizeof(*A) * (width + 1));
- memset(B, 0, sizeof(*B) * (width + 1));
-
- const __m128i zero = _mm_setzero_si128();
- for (int i = 0; i < height; ++i) {
- // Zero the left column.
- A[(i + 1) * buf_stride] = B[(i + 1) * buf_stride] = 0;
-
- // ldiff is the difference H - D where H is the output sample immediately
- // to the left and D is the output sample above it. These are scalars,
- // replicated across the four lanes.
- __m128i ldiff1 = zero, ldiff2 = zero;
- for (int j = 0; j < width; j += 4) {
- const int ABj = 1 + j;
-
- const __m128i above1 = xx_load_128(B + ABj + i * buf_stride);
- const __m128i above2 = xx_load_128(A + ABj + i * buf_stride);
-
- const __m128i x1 = xx_load_extend_8_32(src + j + i * src_stride);
- const __m128i x2 = _mm_madd_epi16(x1, x1);
-
- const __m128i sc1 = scan_32(x1);
- const __m128i sc2 = scan_32(x2);
-
- const __m128i row1 = _mm_add_epi32(_mm_add_epi32(sc1, above1), ldiff1);
- const __m128i row2 = _mm_add_epi32(_mm_add_epi32(sc2, above2), ldiff2);
-
- xx_store_128(B + ABj + (i + 1) * buf_stride, row1);
- xx_store_128(A + ABj + (i + 1) * buf_stride, row2);
-
- // Calculate the new H - D.
- ldiff1 = _mm_shuffle_epi32(_mm_sub_epi32(row1, above1), 0xff);
- ldiff2 = _mm_shuffle_epi32(_mm_sub_epi32(row2, above2), 0xff);
- }
- }
-}
-
-// Compute two integral images from src. B sums elements; A sums their squares
-//
-// A and B should be aligned to 16 bytes. buf_stride should be a multiple of 4.
-static void integral_images_highbd(const uint16_t *src, int src_stride,
- int width, int height, int32_t *A,
- int32_t *B, int buf_stride) {
- // Write out the zero top row
- memset(A, 0, sizeof(*A) * (width + 1));
- memset(B, 0, sizeof(*B) * (width + 1));
-
- const __m128i zero = _mm_setzero_si128();
- for (int i = 0; i < height; ++i) {
- // Zero the left column.
- A[(i + 1) * buf_stride] = B[(i + 1) * buf_stride] = 0;
-
- // ldiff is the difference H - D where H is the output sample immediately
- // to the left and D is the output sample above it. These are scalars,
- // replicated across the four lanes.
- __m128i ldiff1 = zero, ldiff2 = zero;
- for (int j = 0; j < width; j += 4) {
- const int ABj = 1 + j;
-
- const __m128i above1 = xx_load_128(B + ABj + i * buf_stride);
- const __m128i above2 = xx_load_128(A + ABj + i * buf_stride);
-
- const __m128i x1 = xx_load_extend_16_32(src + j + i * src_stride);
- const __m128i x2 = _mm_madd_epi16(x1, x1);
-
- const __m128i sc1 = scan_32(x1);
- const __m128i sc2 = scan_32(x2);
-
- const __m128i row1 = _mm_add_epi32(_mm_add_epi32(sc1, above1), ldiff1);
- const __m128i row2 = _mm_add_epi32(_mm_add_epi32(sc2, above2), ldiff2);
-
- xx_store_128(B + ABj + (i + 1) * buf_stride, row1);
- xx_store_128(A + ABj + (i + 1) * buf_stride, row2);
-
- // Calculate the new H - D.
- ldiff1 = _mm_shuffle_epi32(_mm_sub_epi32(row1, above1), 0xff);
- ldiff2 = _mm_shuffle_epi32(_mm_sub_epi32(row2, above2), 0xff);
- }
- }
-}
-
-// Compute 4 values of boxsum from the given integral image. ii should point
-// at the middle of the box (for the first value). r is the box radius.
-static INLINE __m128i boxsum_from_ii(const int32_t *ii, int stride, int r) {
- const __m128i tl = xx_loadu_128(ii - (r + 1) - (r + 1) * stride);
- const __m128i tr = xx_loadu_128(ii + (r + 0) - (r + 1) * stride);
- const __m128i bl = xx_loadu_128(ii - (r + 1) + r * stride);
- const __m128i br = xx_loadu_128(ii + (r + 0) + r * stride);
- const __m128i u = _mm_sub_epi32(tr, tl);
- const __m128i v = _mm_sub_epi32(br, bl);
- return _mm_sub_epi32(v, u);
-}
-
-static __m128i round_for_shift(unsigned shift) {
- return _mm_set1_epi32((1 << shift) >> 1);
-}
-
-static __m128i compute_p(__m128i sum1, __m128i sum2, int bit_depth, int n) {
- __m128i an, bb;
- if (bit_depth > 8) {
- const __m128i rounding_a = round_for_shift(2 * (bit_depth - 8));
- const __m128i rounding_b = round_for_shift(bit_depth - 8);
- const __m128i shift_a = _mm_cvtsi32_si128(2 * (bit_depth - 8));
- const __m128i shift_b = _mm_cvtsi32_si128(bit_depth - 8);
- const __m128i a = _mm_srl_epi32(_mm_add_epi32(sum2, rounding_a), shift_a);
- const __m128i b = _mm_srl_epi32(_mm_add_epi32(sum1, rounding_b), shift_b);
- // b < 2^14, so we can use a 16-bit madd rather than a 32-bit
- // mullo to square it
- bb = _mm_madd_epi16(b, b);
- an = _mm_max_epi32(_mm_mullo_epi32(a, _mm_set1_epi32(n)), bb);
- } else {
- bb = _mm_madd_epi16(sum1, sum1);
- an = _mm_mullo_epi32(sum2, _mm_set1_epi32(n));
- }
- return _mm_sub_epi32(an, bb);
-}
-
-// Assumes that C, D are integral images for the original buffer which has been
-// extended to have a padding of SGRPROJ_BORDER_VERT/SGRPROJ_BORDER_HORZ pixels
-// on the sides. A, B, C, D point at logical position (0, 0).
-static void calc_ab(int32_t *A, int32_t *B, const int32_t *C, const int32_t *D,
- int width, int height, int buf_stride, int bit_depth,
- int sgr_params_idx, int radius_idx) {
- const sgr_params_type *const params = &sgr_params[sgr_params_idx];
- const int r = params->r[radius_idx];
- const int n = (2 * r + 1) * (2 * r + 1);
- const __m128i s = _mm_set1_epi32(params->s[radius_idx]);
- // one_over_n[n-1] is 2^12/n, so easily fits in an int16
- const __m128i one_over_n = _mm_set1_epi32(one_by_x[n - 1]);
-
- const __m128i rnd_z = round_for_shift(SGRPROJ_MTABLE_BITS);
- const __m128i rnd_res = round_for_shift(SGRPROJ_RECIP_BITS);
-
- // Set up masks
- const __m128i ones32 = _mm_set_epi32(0, 0, 0xffffffff, 0xffffffff);
- __m128i mask[4];
- for (int idx = 0; idx < 4; idx++) {
- const __m128i shift = _mm_cvtsi32_si128(8 * (4 - idx));
- mask[idx] = _mm_cvtepi8_epi32(_mm_srl_epi64(ones32, shift));
- }
-
- for (int i = -1; i < height + 1; ++i) {
- for (int j = -1; j < width + 1; j += 4) {
- const int32_t *Cij = C + i * buf_stride + j;
- const int32_t *Dij = D + i * buf_stride + j;
-
- __m128i sum1 = boxsum_from_ii(Dij, buf_stride, r);
- __m128i sum2 = boxsum_from_ii(Cij, buf_stride, r);
-
- // When width + 2 isn't a multiple of 4, sum1 and sum2 will contain
- // some uninitialised data in their upper words. We use a mask to
- // ensure that these bits are set to 0.
- int idx = AOMMIN(4, width + 1 - j);
- assert(idx >= 1);
-
- if (idx < 4) {
- sum1 = _mm_and_si128(mask[idx], sum1);
- sum2 = _mm_and_si128(mask[idx], sum2);
- }
-
- const __m128i p = compute_p(sum1, sum2, bit_depth, n);
-
- const __m128i z = _mm_min_epi32(
- _mm_srli_epi32(_mm_add_epi32(_mm_mullo_epi32(p, s), rnd_z),
- SGRPROJ_MTABLE_BITS),
- _mm_set1_epi32(255));
-
- // 'Gather' type instructions are not available pre-AVX2, so synthesize a
- // gather using scalar loads.
- const __m128i a_res = _mm_set_epi32(x_by_xplus1[_mm_extract_epi32(z, 3)],
- x_by_xplus1[_mm_extract_epi32(z, 2)],
- x_by_xplus1[_mm_extract_epi32(z, 1)],
- x_by_xplus1[_mm_extract_epi32(z, 0)]);
-
- xx_storeu_128(A + i * buf_stride + j, a_res);
-
- const __m128i a_complement =
- _mm_sub_epi32(_mm_set1_epi32(SGRPROJ_SGR), a_res);
-
- // sum1 might have lanes greater than 2^15, so we can't use madd to do
- // multiplication involving sum1. However, a_complement and one_over_n
- // are both less than 256, so we can multiply them first.
- const __m128i a_comp_over_n = _mm_madd_epi16(a_complement, one_over_n);
- const __m128i b_int = _mm_mullo_epi32(a_comp_over_n, sum1);
- const __m128i b_res =
- _mm_srli_epi32(_mm_add_epi32(b_int, rnd_res), SGRPROJ_RECIP_BITS);
-
- xx_storeu_128(B + i * buf_stride + j, b_res);
- }
- }
-}
-
-// Calculate 4 values of the "cross sum" starting at buf. This is a 3x3 filter
-// where the outer four corners have weight 3 and all other pixels have weight
-// 4.
-//
-// Pixels are indexed like this:
-// xtl xt xtr
-// xl x xr
-// xbl xb xbr
-//
-// buf points to x
-//
-// fours = xl + xt + xr + xb + x
-// threes = xtl + xtr + xbr + xbl
-// cross_sum = 4 * fours + 3 * threes
-// = 4 * (fours + threes) - threes
-// = (fours + threes) << 2 - threes
-static INLINE __m128i cross_sum(const int32_t *buf, int stride) {
- const __m128i xtl = xx_loadu_128(buf - 1 - stride);
- const __m128i xt = xx_loadu_128(buf - stride);
- const __m128i xtr = xx_loadu_128(buf + 1 - stride);
- const __m128i xl = xx_loadu_128(buf - 1);
- const __m128i x = xx_loadu_128(buf);
- const __m128i xr = xx_loadu_128(buf + 1);
- const __m128i xbl = xx_loadu_128(buf - 1 + stride);
- const __m128i xb = xx_loadu_128(buf + stride);
- const __m128i xbr = xx_loadu_128(buf + 1 + stride);
-
- const __m128i fours = _mm_add_epi32(
- xl, _mm_add_epi32(xt, _mm_add_epi32(xr, _mm_add_epi32(xb, x))));
- const __m128i threes =
- _mm_add_epi32(xtl, _mm_add_epi32(xtr, _mm_add_epi32(xbr, xbl)));
-
- return _mm_sub_epi32(_mm_slli_epi32(_mm_add_epi32(fours, threes), 2), threes);
-}
-
-// The final filter for self-guided restoration. Computes a weighted average
-// across A, B with "cross sums" (see cross_sum implementation above).
-static void final_filter(int32_t *dst, int dst_stride, const int32_t *A,
- const int32_t *B, int buf_stride, const void *dgd8,
- int dgd_stride, int width, int height, int highbd) {
- const int nb = 5;
- const __m128i rounding =
- round_for_shift(SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
- const uint8_t *dgd_real =
- highbd ? (const uint8_t *)CONVERT_TO_SHORTPTR(dgd8) : dgd8;
-
- for (int i = 0; i < height; ++i) {
- for (int j = 0; j < width; j += 4) {
- const __m128i a = cross_sum(A + i * buf_stride + j, buf_stride);
- const __m128i b = cross_sum(B + i * buf_stride + j, buf_stride);
- const __m128i raw =
- xx_loadl_64(dgd_real + ((i * dgd_stride + j) << highbd));
- const __m128i src =
- highbd ? _mm_cvtepu16_epi32(raw) : _mm_cvtepu8_epi32(raw);
-
- __m128i v = _mm_add_epi32(_mm_madd_epi16(a, src), b);
- __m128i w = _mm_srai_epi32(_mm_add_epi32(v, rounding),
- SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
-
- xx_storeu_128(dst + i * dst_stride + j, w);
- }
- }
-}
-
-// Assumes that C, D are integral images for the original buffer which has been
-// extended to have a padding of SGRPROJ_BORDER_VERT/SGRPROJ_BORDER_HORZ pixels
-// on the sides. A, B, C, D point at logical position (0, 0).
-static void calc_ab_fast(int32_t *A, int32_t *B, const int32_t *C,
- const int32_t *D, int width, int height,
- int buf_stride, int bit_depth, int sgr_params_idx,
- int radius_idx) {
- const sgr_params_type *const params = &sgr_params[sgr_params_idx];
- const int r = params->r[radius_idx];
- const int n = (2 * r + 1) * (2 * r + 1);
- const __m128i s = _mm_set1_epi32(params->s[radius_idx]);
- // one_over_n[n-1] is 2^12/n, so easily fits in an int16
- const __m128i one_over_n = _mm_set1_epi32(one_by_x[n - 1]);
-
- const __m128i rnd_z = round_for_shift(SGRPROJ_MTABLE_BITS);
- const __m128i rnd_res = round_for_shift(SGRPROJ_RECIP_BITS);
-
- // Set up masks
- const __m128i ones32 = _mm_set_epi32(0, 0, 0xffffffff, 0xffffffff);
- __m128i mask[4];
- for (int idx = 0; idx < 4; idx++) {
- const __m128i shift = _mm_cvtsi32_si128(8 * (4 - idx));
- mask[idx] = _mm_cvtepi8_epi32(_mm_srl_epi64(ones32, shift));
- }
-
- for (int i = -1; i < height + 1; i += 2) {
- for (int j = -1; j < width + 1; j += 4) {
- const int32_t *Cij = C + i * buf_stride + j;
- const int32_t *Dij = D + i * buf_stride + j;
-
- __m128i sum1 = boxsum_from_ii(Dij, buf_stride, r);
- __m128i sum2 = boxsum_from_ii(Cij, buf_stride, r);
-
- // When width + 2 isn't a multiple of 4, sum1 and sum2 will contain
- // some uninitialised data in their upper words. We use a mask to
- // ensure that these bits are set to 0.
- int idx = AOMMIN(4, width + 1 - j);
- assert(idx >= 1);
-
- if (idx < 4) {
- sum1 = _mm_and_si128(mask[idx], sum1);
- sum2 = _mm_and_si128(mask[idx], sum2);
- }
-
- const __m128i p = compute_p(sum1, sum2, bit_depth, n);
-
- const __m128i z = _mm_min_epi32(
- _mm_srli_epi32(_mm_add_epi32(_mm_mullo_epi32(p, s), rnd_z),
- SGRPROJ_MTABLE_BITS),
- _mm_set1_epi32(255));
-
- // 'Gather' type instructions are not available pre-AVX2, so synthesize a
- // gather using scalar loads.
- const __m128i a_res = _mm_set_epi32(x_by_xplus1[_mm_extract_epi32(z, 3)],
- x_by_xplus1[_mm_extract_epi32(z, 2)],
- x_by_xplus1[_mm_extract_epi32(z, 1)],
- x_by_xplus1[_mm_extract_epi32(z, 0)]);
-
- xx_storeu_128(A + i * buf_stride + j, a_res);
-
- const __m128i a_complement =
- _mm_sub_epi32(_mm_set1_epi32(SGRPROJ_SGR), a_res);
-
- // sum1 might have lanes greater than 2^15, so we can't use madd to do
- // multiplication involving sum1. However, a_complement and one_over_n
- // are both less than 256, so we can multiply them first.
- const __m128i a_comp_over_n = _mm_madd_epi16(a_complement, one_over_n);
- const __m128i b_int = _mm_mullo_epi32(a_comp_over_n, sum1);
- const __m128i b_res =
- _mm_srli_epi32(_mm_add_epi32(b_int, rnd_res), SGRPROJ_RECIP_BITS);
-
- xx_storeu_128(B + i * buf_stride + j, b_res);
- }
- }
-}
-
-// Calculate 4 values of the "cross sum" starting at buf.
-//
-// Pixels are indexed like this:
-// xtl xt xtr
-// - buf -
-// xbl xb xbr
-//
-// Pixels are weighted like this:
-// 5 6 5
-// 0 0 0
-// 5 6 5
-//
-// fives = xtl + xtr + xbl + xbr
-// sixes = xt + xb
-// cross_sum = 6 * sixes + 5 * fives
-// = 5 * (fives + sixes) - sixes
-// = (fives + sixes) << 2 + (fives + sixes) + sixes
-static INLINE __m128i cross_sum_fast_even_row(const int32_t *buf, int stride) {
- const __m128i xtl = xx_loadu_128(buf - 1 - stride);
- const __m128i xt = xx_loadu_128(buf - stride);
- const __m128i xtr = xx_loadu_128(buf + 1 - stride);
- const __m128i xbl = xx_loadu_128(buf - 1 + stride);
- const __m128i xb = xx_loadu_128(buf + stride);
- const __m128i xbr = xx_loadu_128(buf + 1 + stride);
-
- const __m128i fives =
- _mm_add_epi32(xtl, _mm_add_epi32(xtr, _mm_add_epi32(xbr, xbl)));
- const __m128i sixes = _mm_add_epi32(xt, xb);
- const __m128i fives_plus_sixes = _mm_add_epi32(fives, sixes);
-
- return _mm_add_epi32(
- _mm_add_epi32(_mm_slli_epi32(fives_plus_sixes, 2), fives_plus_sixes),
- sixes);
-}
-
-// Calculate 4 values of the "cross sum" starting at buf.
-//
-// Pixels are indexed like this:
-// xl x xr
-//
-// Pixels are weighted like this:
-// 5 6 5
-//
-// buf points to x
-//
-// fives = xl + xr
-// sixes = x
-// cross_sum = 5 * fives + 6 * sixes
-// = 4 * (fives + sixes) + (fives + sixes) + sixes
-// = (fives + sixes) << 2 + (fives + sixes) + sixes
-static INLINE __m128i cross_sum_fast_odd_row(const int32_t *buf) {
- const __m128i xl = xx_loadu_128(buf - 1);
- const __m128i x = xx_loadu_128(buf);
- const __m128i xr = xx_loadu_128(buf + 1);
-
- const __m128i fives = _mm_add_epi32(xl, xr);
- const __m128i sixes = x;
-
- const __m128i fives_plus_sixes = _mm_add_epi32(fives, sixes);
-
- return _mm_add_epi32(
- _mm_add_epi32(_mm_slli_epi32(fives_plus_sixes, 2), fives_plus_sixes),
- sixes);
-}
-
-// The final filter for the self-guided restoration. Computes a
-// weighted average across A, B with "cross sums" (see cross_sum_...
-// implementations above).
-static void final_filter_fast(int32_t *dst, int dst_stride, const int32_t *A,
- const int32_t *B, int buf_stride,
- const void *dgd8, int dgd_stride, int width,
- int height, int highbd) {
- const int nb0 = 5;
- const int nb1 = 4;
-
- const __m128i rounding0 =
- round_for_shift(SGRPROJ_SGR_BITS + nb0 - SGRPROJ_RST_BITS);
- const __m128i rounding1 =
- round_for_shift(SGRPROJ_SGR_BITS + nb1 - SGRPROJ_RST_BITS);
-
- const uint8_t *dgd_real =
- highbd ? (const uint8_t *)CONVERT_TO_SHORTPTR(dgd8) : dgd8;
-
- for (int i = 0; i < height; ++i) {
- if (!(i & 1)) { // even row
- for (int j = 0; j < width; j += 4) {
- const __m128i a =
- cross_sum_fast_even_row(A + i * buf_stride + j, buf_stride);
- const __m128i b =
- cross_sum_fast_even_row(B + i * buf_stride + j, buf_stride);
- const __m128i raw =
- xx_loadl_64(dgd_real + ((i * dgd_stride + j) << highbd));
- const __m128i src =
- highbd ? _mm_cvtepu16_epi32(raw) : _mm_cvtepu8_epi32(raw);
-
- __m128i v = _mm_add_epi32(_mm_madd_epi16(a, src), b);
- __m128i w = _mm_srai_epi32(_mm_add_epi32(v, rounding0),
- SGRPROJ_SGR_BITS + nb0 - SGRPROJ_RST_BITS);
-
- xx_storeu_128(dst + i * dst_stride + j, w);
- }
- } else { // odd row
- for (int j = 0; j < width; j += 4) {
- const __m128i a = cross_sum_fast_odd_row(A + i * buf_stride + j);
- const __m128i b = cross_sum_fast_odd_row(B + i * buf_stride + j);
- const __m128i raw =
- xx_loadl_64(dgd_real + ((i * dgd_stride + j) << highbd));
- const __m128i src =
- highbd ? _mm_cvtepu16_epi32(raw) : _mm_cvtepu8_epi32(raw);
-
- __m128i v = _mm_add_epi32(_mm_madd_epi16(a, src), b);
- __m128i w = _mm_srai_epi32(_mm_add_epi32(v, rounding1),
- SGRPROJ_SGR_BITS + nb1 - SGRPROJ_RST_BITS);
-
- xx_storeu_128(dst + i * dst_stride + j, w);
- }
- }
- }
-}
-
-int av1_selfguided_restoration_sse4_1(const uint8_t *dgd8, int width,
- int height, int dgd_stride, int32_t *flt0,
- int32_t *flt1, int flt_stride,
- int sgr_params_idx, int bit_depth,
- int highbd) {
- int32_t *buf = (int32_t *)aom_memalign(
- 16, 4 * sizeof(*buf) * RESTORATION_PROC_UNIT_PELS);
- if (!buf) return -1;
- memset(buf, 0, 4 * sizeof(*buf) * RESTORATION_PROC_UNIT_PELS);
-
- const int width_ext = width + 2 * SGRPROJ_BORDER_HORZ;
- const int height_ext = height + 2 * SGRPROJ_BORDER_VERT;
-
- // Adjusting the stride of A and B here appears to avoid bad cache effects,
- // leading to a significant speed improvement.
- // We also align the stride to a multiple of 16 bytes for efficiency.
- int buf_stride = ((width_ext + 3) & ~3) + 16;
-
- // The "tl" pointers point at the top-left of the initialised data for the
- // array. Adding 3 here ensures that column 1 is 16-byte aligned.
- int32_t *Atl = buf + 0 * RESTORATION_PROC_UNIT_PELS + 3;
- int32_t *Btl = buf + 1 * RESTORATION_PROC_UNIT_PELS + 3;
- int32_t *Ctl = buf + 2 * RESTORATION_PROC_UNIT_PELS + 3;
- int32_t *Dtl = buf + 3 * RESTORATION_PROC_UNIT_PELS + 3;
-
- // The "0" pointers are (- SGRPROJ_BORDER_VERT, -SGRPROJ_BORDER_HORZ). Note
- // there's a zero row and column in A, B (integral images), so we move down
- // and right one for them.
- const int buf_diag_border =
- SGRPROJ_BORDER_HORZ + buf_stride * SGRPROJ_BORDER_VERT;
-
- int32_t *A0 = Atl + 1 + buf_stride;
- int32_t *B0 = Btl + 1 + buf_stride;
- int32_t *C0 = Ctl + 1 + buf_stride;
- int32_t *D0 = Dtl + 1 + buf_stride;
-
- // Finally, A, B, C, D point at position (0, 0).
- int32_t *A = A0 + buf_diag_border;
- int32_t *B = B0 + buf_diag_border;
- int32_t *C = C0 + buf_diag_border;
- int32_t *D = D0 + buf_diag_border;
-
- const int dgd_diag_border =
- SGRPROJ_BORDER_HORZ + dgd_stride * SGRPROJ_BORDER_VERT;
- const uint8_t *dgd0 = dgd8 - dgd_diag_border;
-
- // Generate integral images from the input. C will contain sums of squares; D
- // will contain just sums
- if (highbd)
- integral_images_highbd(CONVERT_TO_SHORTPTR(dgd0), dgd_stride, width_ext,
- height_ext, Ctl, Dtl, buf_stride);
- else
- integral_images(dgd0, dgd_stride, width_ext, height_ext, Ctl, Dtl,
- buf_stride);
-
- const sgr_params_type *const params = &sgr_params[sgr_params_idx];
- // Write to flt0 and flt1
- // If params->r == 0 we skip the corresponding filter. We only allow one of
- // the radii to be 0, as having both equal to 0 would be equivalent to
- // skipping SGR entirely.
- assert(!(params->r[0] == 0 && params->r[1] == 0));
- assert(params->r[0] < AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ));
- assert(params->r[1] < AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ));
-
- if (params->r[0] > 0) {
- calc_ab_fast(A, B, C, D, width, height, buf_stride, bit_depth,
- sgr_params_idx, 0);
- final_filter_fast(flt0, flt_stride, A, B, buf_stride, dgd8, dgd_stride,
- width, height, highbd);
- }
-
- if (params->r[1] > 0) {
- calc_ab(A, B, C, D, width, height, buf_stride, bit_depth, sgr_params_idx,
- 1);
- final_filter(flt1, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width,
- height, highbd);
- }
- aom_free(buf);
- return 0;
-}
-
-void apply_selfguided_restoration_sse4_1(const uint8_t *dat8, int width,
- int height, int stride, int eps,
- const int *xqd, uint8_t *dst8,
- int dst_stride, int32_t *tmpbuf,
- int bit_depth, int highbd) {
- int32_t *flt0 = tmpbuf;
- int32_t *flt1 = flt0 + RESTORATION_UNITPELS_MAX;
- assert(width * height <= RESTORATION_UNITPELS_MAX);
- const int ret = av1_selfguided_restoration_sse4_1(
- dat8, width, height, stride, flt0, flt1, width, eps, bit_depth, highbd);
- (void)ret;
- assert(!ret);
- const sgr_params_type *const params = &sgr_params[eps];
- int xq[2];
- decode_xq(xqd, xq, params);
-
- __m128i xq0 = _mm_set1_epi32(xq[0]);
- __m128i xq1 = _mm_set1_epi32(xq[1]);
-
- for (int i = 0; i < height; ++i) {
- // Calculate output in batches of 8 pixels
- for (int j = 0; j < width; j += 8) {
- const int k = i * width + j;
- const int m = i * dst_stride + j;
-
- const uint8_t *dat8ij = dat8 + i * stride + j;
- __m128i src;
- if (highbd) {
- src = xx_loadu_128(CONVERT_TO_SHORTPTR(dat8ij));
- } else {
- src = _mm_cvtepu8_epi16(xx_loadl_64(dat8ij));
- }
-
- const __m128i u = _mm_slli_epi16(src, SGRPROJ_RST_BITS);
- const __m128i u_0 = _mm_cvtepu16_epi32(u);
- const __m128i u_1 = _mm_cvtepu16_epi32(_mm_srli_si128(u, 8));
-
- __m128i v_0 = _mm_slli_epi32(u_0, SGRPROJ_PRJ_BITS);
- __m128i v_1 = _mm_slli_epi32(u_1, SGRPROJ_PRJ_BITS);
-
- if (params->r[0] > 0) {
- const __m128i f1_0 = _mm_sub_epi32(xx_loadu_128(&flt0[k]), u_0);
- v_0 = _mm_add_epi32(v_0, _mm_mullo_epi32(xq0, f1_0));
-
- const __m128i f1_1 = _mm_sub_epi32(xx_loadu_128(&flt0[k + 4]), u_1);
- v_1 = _mm_add_epi32(v_1, _mm_mullo_epi32(xq0, f1_1));
- }
-
- if (params->r[1] > 0) {
- const __m128i f2_0 = _mm_sub_epi32(xx_loadu_128(&flt1[k]), u_0);
- v_0 = _mm_add_epi32(v_0, _mm_mullo_epi32(xq1, f2_0));
-
- const __m128i f2_1 = _mm_sub_epi32(xx_loadu_128(&flt1[k + 4]), u_1);
- v_1 = _mm_add_epi32(v_1, _mm_mullo_epi32(xq1, f2_1));
- }
-
- const __m128i rounding =
- round_for_shift(SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS);
- const __m128i w_0 = _mm_srai_epi32(_mm_add_epi32(v_0, rounding),
- SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS);
- const __m128i w_1 = _mm_srai_epi32(_mm_add_epi32(v_1, rounding),
- SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS);
-
- if (highbd) {
- // Pack into 16 bits and clamp to [0, 2^bit_depth)
- const __m128i tmp = _mm_packus_epi32(w_0, w_1);
- const __m128i max = _mm_set1_epi16((1 << bit_depth) - 1);
- const __m128i res = _mm_min_epi16(tmp, max);
- xx_storeu_128(CONVERT_TO_SHORTPTR(dst8 + m), res);
- } else {
- // Pack into 8 bits and clamp to [0, 256)
- const __m128i tmp = _mm_packs_epi32(w_0, w_1);
- const __m128i res = _mm_packus_epi16(tmp, tmp /* "don't care" value */);
- xx_storel_64(dst8 + m, res);
- }
- }
- }
-}
diff --git a/third_party/aom/av1/common/x86/warp_plane_sse4.c b/third_party/aom/av1/common/x86/warp_plane_sse4.c
deleted file mode 100644
index b810cea2e..000000000
--- a/third_party/aom/av1/common/x86/warp_plane_sse4.c
+++ /dev/null
@@ -1,942 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <emmintrin.h>
-#include <smmintrin.h>
-
-#include "config/av1_rtcd.h"
-
-#include "av1/common/warped_motion.h"
-
-/* This is a modified version of 'warped_filter' from warped_motion.c:
- * Each coefficient is stored in 8 bits instead of 16 bits
- * The coefficients are rearranged in the column order 0, 2, 4, 6, 1, 3, 5, 7
-
- This is done in order to avoid overflow: Since the tap with the largest
- coefficient could be any of taps 2, 3, 4 or 5, we can't use the summation
- order ((0 + 1) + (4 + 5)) + ((2 + 3) + (6 + 7)) used in the regular
- convolve functions.
-
- Instead, we use the summation order
- ((0 + 2) + (4 + 6)) + ((1 + 3) + (5 + 7)).
- The rearrangement of coefficients in this table is so that we can get the
- coefficients into the correct order more quickly.
-*/
-/* clang-format off */
-DECLARE_ALIGNED(8, static const int8_t,
- filter_8bit[WARPEDPIXEL_PREC_SHIFTS * 3 + 1][8]) = {
-#if WARPEDPIXEL_PREC_BITS == 6
- // [-1, 0)
- { 0, 127, 0, 0, 0, 1, 0, 0}, { 0, 127, 0, 0, -1, 2, 0, 0},
- { 1, 127, -1, 0, -3, 4, 0, 0}, { 1, 126, -2, 0, -4, 6, 1, 0},
- { 1, 126, -3, 0, -5, 8, 1, 0}, { 1, 125, -4, 0, -6, 11, 1, 0},
- { 1, 124, -4, 0, -7, 13, 1, 0}, { 2, 123, -5, 0, -8, 15, 1, 0},
- { 2, 122, -6, 0, -9, 18, 1, 0}, { 2, 121, -6, 0, -10, 20, 1, 0},
- { 2, 120, -7, 0, -11, 22, 2, 0}, { 2, 119, -8, 0, -12, 25, 2, 0},
- { 3, 117, -8, 0, -13, 27, 2, 0}, { 3, 116, -9, 0, -13, 29, 2, 0},
- { 3, 114, -10, 0, -14, 32, 3, 0}, { 3, 113, -10, 0, -15, 35, 2, 0},
- { 3, 111, -11, 0, -15, 37, 3, 0}, { 3, 109, -11, 0, -16, 40, 3, 0},
- { 3, 108, -12, 0, -16, 42, 3, 0}, { 4, 106, -13, 0, -17, 45, 3, 0},
- { 4, 104, -13, 0, -17, 47, 3, 0}, { 4, 102, -14, 0, -17, 50, 3, 0},
- { 4, 100, -14, 0, -17, 52, 3, 0}, { 4, 98, -15, 0, -18, 55, 4, 0},
- { 4, 96, -15, 0, -18, 58, 3, 0}, { 4, 94, -16, 0, -18, 60, 4, 0},
- { 4, 91, -16, 0, -18, 63, 4, 0}, { 4, 89, -16, 0, -18, 65, 4, 0},
- { 4, 87, -17, 0, -18, 68, 4, 0}, { 4, 85, -17, 0, -18, 70, 4, 0},
- { 4, 82, -17, 0, -18, 73, 4, 0}, { 4, 80, -17, 0, -18, 75, 4, 0},
- { 4, 78, -18, 0, -18, 78, 4, 0}, { 4, 75, -18, 0, -17, 80, 4, 0},
- { 4, 73, -18, 0, -17, 82, 4, 0}, { 4, 70, -18, 0, -17, 85, 4, 0},
- { 4, 68, -18, 0, -17, 87, 4, 0}, { 4, 65, -18, 0, -16, 89, 4, 0},
- { 4, 63, -18, 0, -16, 91, 4, 0}, { 4, 60, -18, 0, -16, 94, 4, 0},
- { 3, 58, -18, 0, -15, 96, 4, 0}, { 4, 55, -18, 0, -15, 98, 4, 0},
- { 3, 52, -17, 0, -14, 100, 4, 0}, { 3, 50, -17, 0, -14, 102, 4, 0},
- { 3, 47, -17, 0, -13, 104, 4, 0}, { 3, 45, -17, 0, -13, 106, 4, 0},
- { 3, 42, -16, 0, -12, 108, 3, 0}, { 3, 40, -16, 0, -11, 109, 3, 0},
- { 3, 37, -15, 0, -11, 111, 3, 0}, { 2, 35, -15, 0, -10, 113, 3, 0},
- { 3, 32, -14, 0, -10, 114, 3, 0}, { 2, 29, -13, 0, -9, 116, 3, 0},
- { 2, 27, -13, 0, -8, 117, 3, 0}, { 2, 25, -12, 0, -8, 119, 2, 0},
- { 2, 22, -11, 0, -7, 120, 2, 0}, { 1, 20, -10, 0, -6, 121, 2, 0},
- { 1, 18, -9, 0, -6, 122, 2, 0}, { 1, 15, -8, 0, -5, 123, 2, 0},
- { 1, 13, -7, 0, -4, 124, 1, 0}, { 1, 11, -6, 0, -4, 125, 1, 0},
- { 1, 8, -5, 0, -3, 126, 1, 0}, { 1, 6, -4, 0, -2, 126, 1, 0},
- { 0, 4, -3, 0, -1, 127, 1, 0}, { 0, 2, -1, 0, 0, 127, 0, 0},
- // [0, 1)
- { 0, 0, 1, 0, 0, 127, 0, 0}, { 0, -1, 2, 0, 0, 127, 0, 0},
- { 0, -3, 4, 1, 1, 127, -2, 0}, { 0, -5, 6, 1, 1, 127, -2, 0},
- { 0, -6, 8, 1, 2, 126, -3, 0}, {-1, -7, 11, 2, 2, 126, -4, -1},
- {-1, -8, 13, 2, 3, 125, -5, -1}, {-1, -10, 16, 3, 3, 124, -6, -1},
- {-1, -11, 18, 3, 4, 123, -7, -1}, {-1, -12, 20, 3, 4, 122, -7, -1},
- {-1, -13, 23, 3, 4, 121, -8, -1}, {-2, -14, 25, 4, 5, 120, -9, -1},
- {-1, -15, 27, 4, 5, 119, -10, -1}, {-1, -16, 30, 4, 5, 118, -11, -1},
- {-2, -17, 33, 5, 6, 116, -12, -1}, {-2, -17, 35, 5, 6, 114, -12, -1},
- {-2, -18, 38, 5, 6, 113, -13, -1}, {-2, -19, 41, 6, 7, 111, -14, -2},
- {-2, -19, 43, 6, 7, 110, -15, -2}, {-2, -20, 46, 6, 7, 108, -15, -2},
- {-2, -20, 49, 6, 7, 106, -16, -2}, {-2, -21, 51, 7, 7, 104, -16, -2},
- {-2, -21, 54, 7, 7, 102, -17, -2}, {-2, -21, 56, 7, 8, 100, -18, -2},
- {-2, -22, 59, 7, 8, 98, -18, -2}, {-2, -22, 62, 7, 8, 96, -19, -2},
- {-2, -22, 64, 7, 8, 94, -19, -2}, {-2, -22, 67, 8, 8, 91, -20, -2},
- {-2, -22, 69, 8, 8, 89, -20, -2}, {-2, -22, 72, 8, 8, 87, -21, -2},
- {-2, -21, 74, 8, 8, 84, -21, -2}, {-2, -22, 77, 8, 8, 82, -21, -2},
- {-2, -21, 79, 8, 8, 79, -21, -2}, {-2, -21, 82, 8, 8, 77, -22, -2},
- {-2, -21, 84, 8, 8, 74, -21, -2}, {-2, -21, 87, 8, 8, 72, -22, -2},
- {-2, -20, 89, 8, 8, 69, -22, -2}, {-2, -20, 91, 8, 8, 67, -22, -2},
- {-2, -19, 94, 8, 7, 64, -22, -2}, {-2, -19, 96, 8, 7, 62, -22, -2},
- {-2, -18, 98, 8, 7, 59, -22, -2}, {-2, -18, 100, 8, 7, 56, -21, -2},
- {-2, -17, 102, 7, 7, 54, -21, -2}, {-2, -16, 104, 7, 7, 51, -21, -2},
- {-2, -16, 106, 7, 6, 49, -20, -2}, {-2, -15, 108, 7, 6, 46, -20, -2},
- {-2, -15, 110, 7, 6, 43, -19, -2}, {-2, -14, 111, 7, 6, 41, -19, -2},
- {-1, -13, 113, 6, 5, 38, -18, -2}, {-1, -12, 114, 6, 5, 35, -17, -2},
- {-1, -12, 116, 6, 5, 33, -17, -2}, {-1, -11, 118, 5, 4, 30, -16, -1},
- {-1, -10, 119, 5, 4, 27, -15, -1}, {-1, -9, 120, 5, 4, 25, -14, -2},
- {-1, -8, 121, 4, 3, 23, -13, -1}, {-1, -7, 122, 4, 3, 20, -12, -1},
- {-1, -7, 123, 4, 3, 18, -11, -1}, {-1, -6, 124, 3, 3, 16, -10, -1},
- {-1, -5, 125, 3, 2, 13, -8, -1}, {-1, -4, 126, 2, 2, 11, -7, -1},
- { 0, -3, 126, 2, 1, 8, -6, 0}, { 0, -2, 127, 1, 1, 6, -5, 0},
- { 0, -2, 127, 1, 1, 4, -3, 0}, { 0, 0, 127, 0, 0, 2, -1, 0},
- // [1, 2)
- { 0, 0, 127, 0, 0, 1, 0, 0}, { 0, 0, 127, 0, 0, -1, 2, 0},
- { 0, 1, 127, -1, 0, -3, 4, 0}, { 0, 1, 126, -2, 0, -4, 6, 1},
- { 0, 1, 126, -3, 0, -5, 8, 1}, { 0, 1, 125, -4, 0, -6, 11, 1},
- { 0, 1, 124, -4, 0, -7, 13, 1}, { 0, 2, 123, -5, 0, -8, 15, 1},
- { 0, 2, 122, -6, 0, -9, 18, 1}, { 0, 2, 121, -6, 0, -10, 20, 1},
- { 0, 2, 120, -7, 0, -11, 22, 2}, { 0, 2, 119, -8, 0, -12, 25, 2},
- { 0, 3, 117, -8, 0, -13, 27, 2}, { 0, 3, 116, -9, 0, -13, 29, 2},
- { 0, 3, 114, -10, 0, -14, 32, 3}, { 0, 3, 113, -10, 0, -15, 35, 2},
- { 0, 3, 111, -11, 0, -15, 37, 3}, { 0, 3, 109, -11, 0, -16, 40, 3},
- { 0, 3, 108, -12, 0, -16, 42, 3}, { 0, 4, 106, -13, 0, -17, 45, 3},
- { 0, 4, 104, -13, 0, -17, 47, 3}, { 0, 4, 102, -14, 0, -17, 50, 3},
- { 0, 4, 100, -14, 0, -17, 52, 3}, { 0, 4, 98, -15, 0, -18, 55, 4},
- { 0, 4, 96, -15, 0, -18, 58, 3}, { 0, 4, 94, -16, 0, -18, 60, 4},
- { 0, 4, 91, -16, 0, -18, 63, 4}, { 0, 4, 89, -16, 0, -18, 65, 4},
- { 0, 4, 87, -17, 0, -18, 68, 4}, { 0, 4, 85, -17, 0, -18, 70, 4},
- { 0, 4, 82, -17, 0, -18, 73, 4}, { 0, 4, 80, -17, 0, -18, 75, 4},
- { 0, 4, 78, -18, 0, -18, 78, 4}, { 0, 4, 75, -18, 0, -17, 80, 4},
- { 0, 4, 73, -18, 0, -17, 82, 4}, { 0, 4, 70, -18, 0, -17, 85, 4},
- { 0, 4, 68, -18, 0, -17, 87, 4}, { 0, 4, 65, -18, 0, -16, 89, 4},
- { 0, 4, 63, -18, 0, -16, 91, 4}, { 0, 4, 60, -18, 0, -16, 94, 4},
- { 0, 3, 58, -18, 0, -15, 96, 4}, { 0, 4, 55, -18, 0, -15, 98, 4},
- { 0, 3, 52, -17, 0, -14, 100, 4}, { 0, 3, 50, -17, 0, -14, 102, 4},
- { 0, 3, 47, -17, 0, -13, 104, 4}, { 0, 3, 45, -17, 0, -13, 106, 4},
- { 0, 3, 42, -16, 0, -12, 108, 3}, { 0, 3, 40, -16, 0, -11, 109, 3},
- { 0, 3, 37, -15, 0, -11, 111, 3}, { 0, 2, 35, -15, 0, -10, 113, 3},
- { 0, 3, 32, -14, 0, -10, 114, 3}, { 0, 2, 29, -13, 0, -9, 116, 3},
- { 0, 2, 27, -13, 0, -8, 117, 3}, { 0, 2, 25, -12, 0, -8, 119, 2},
- { 0, 2, 22, -11, 0, -7, 120, 2}, { 0, 1, 20, -10, 0, -6, 121, 2},
- { 0, 1, 18, -9, 0, -6, 122, 2}, { 0, 1, 15, -8, 0, -5, 123, 2},
- { 0, 1, 13, -7, 0, -4, 124, 1}, { 0, 1, 11, -6, 0, -4, 125, 1},
- { 0, 1, 8, -5, 0, -3, 126, 1}, { 0, 1, 6, -4, 0, -2, 126, 1},
- { 0, 0, 4, -3, 0, -1, 127, 1}, { 0, 0, 2, -1, 0, 0, 127, 0},
- // dummy (replicate row index 191)
- { 0, 0, 2, -1, 0, 0, 127, 0},
-
-#else
- // [-1, 0)
- { 0, 127, 0, 0, 0, 1, 0, 0}, { 1, 127, -1, 0, -3, 4, 0, 0},
- { 1, 126, -3, 0, -5, 8, 1, 0}, { 1, 124, -4, 0, -7, 13, 1, 0},
- { 2, 122, -6, 0, -9, 18, 1, 0}, { 2, 120, -7, 0, -11, 22, 2, 0},
- { 3, 117, -8, 0, -13, 27, 2, 0}, { 3, 114, -10, 0, -14, 32, 3, 0},
- { 3, 111, -11, 0, -15, 37, 3, 0}, { 3, 108, -12, 0, -16, 42, 3, 0},
- { 4, 104, -13, 0, -17, 47, 3, 0}, { 4, 100, -14, 0, -17, 52, 3, 0},
- { 4, 96, -15, 0, -18, 58, 3, 0}, { 4, 91, -16, 0, -18, 63, 4, 0},
- { 4, 87, -17, 0, -18, 68, 4, 0}, { 4, 82, -17, 0, -18, 73, 4, 0},
- { 4, 78, -18, 0, -18, 78, 4, 0}, { 4, 73, -18, 0, -17, 82, 4, 0},
- { 4, 68, -18, 0, -17, 87, 4, 0}, { 4, 63, -18, 0, -16, 91, 4, 0},
- { 3, 58, -18, 0, -15, 96, 4, 0}, { 3, 52, -17, 0, -14, 100, 4, 0},
- { 3, 47, -17, 0, -13, 104, 4, 0}, { 3, 42, -16, 0, -12, 108, 3, 0},
- { 3, 37, -15, 0, -11, 111, 3, 0}, { 3, 32, -14, 0, -10, 114, 3, 0},
- { 2, 27, -13, 0, -8, 117, 3, 0}, { 2, 22, -11, 0, -7, 120, 2, 0},
- { 1, 18, -9, 0, -6, 122, 2, 0}, { 1, 13, -7, 0, -4, 124, 1, 0},
- { 1, 8, -5, 0, -3, 126, 1, 0}, { 0, 4, -3, 0, -1, 127, 1, 0},
- // [0, 1)
- { 0, 0, 1, 0, 0, 127, 0, 0}, { 0, -3, 4, 1, 1, 127, -2, 0},
- { 0, -6, 8, 1, 2, 126, -3, 0}, {-1, -8, 13, 2, 3, 125, -5, -1},
- {-1, -11, 18, 3, 4, 123, -7, -1}, {-1, -13, 23, 3, 4, 121, -8, -1},
- {-1, -15, 27, 4, 5, 119, -10, -1}, {-2, -17, 33, 5, 6, 116, -12, -1},
- {-2, -18, 38, 5, 6, 113, -13, -1}, {-2, -19, 43, 6, 7, 110, -15, -2},
- {-2, -20, 49, 6, 7, 106, -16, -2}, {-2, -21, 54, 7, 7, 102, -17, -2},
- {-2, -22, 59, 7, 8, 98, -18, -2}, {-2, -22, 64, 7, 8, 94, -19, -2},
- {-2, -22, 69, 8, 8, 89, -20, -2}, {-2, -21, 74, 8, 8, 84, -21, -2},
- {-2, -21, 79, 8, 8, 79, -21, -2}, {-2, -21, 84, 8, 8, 74, -21, -2},
- {-2, -20, 89, 8, 8, 69, -22, -2}, {-2, -19, 94, 8, 7, 64, -22, -2},
- {-2, -18, 98, 8, 7, 59, -22, -2}, {-2, -17, 102, 7, 7, 54, -21, -2},
- {-2, -16, 106, 7, 6, 49, -20, -2}, {-2, -15, 110, 7, 6, 43, -19, -2},
- {-1, -13, 113, 6, 5, 38, -18, -2}, {-1, -12, 116, 6, 5, 33, -17, -2},
- {-1, -10, 119, 5, 4, 27, -15, -1}, {-1, -8, 121, 4, 3, 23, -13, -1},
- {-1, -7, 123, 4, 3, 18, -11, -1}, {-1, -5, 125, 3, 2, 13, -8, -1},
- { 0, -3, 126, 2, 1, 8, -6, 0}, { 0, -2, 127, 1, 1, 4, -3, 0},
- // [1, 2)
- { 0, 0, 127, 0, 0, 1, 0, 0}, { 0, 1, 127, -1, 0, -3, 4, 0},
- { 0, 1, 126, -3, 0, -5, 8, 1}, { 0, 1, 124, -4, 0, -7, 13, 1},
- { 0, 2, 122, -6, 0, -9, 18, 1}, { 0, 2, 120, -7, 0, -11, 22, 2},
- { 0, 3, 117, -8, 0, -13, 27, 2}, { 0, 3, 114, -10, 0, -14, 32, 3},
- { 0, 3, 111, -11, 0, -15, 37, 3}, { 0, 3, 108, -12, 0, -16, 42, 3},
- { 0, 4, 104, -13, 0, -17, 47, 3}, { 0, 4, 100, -14, 0, -17, 52, 3},
- { 0, 4, 96, -15, 0, -18, 58, 3}, { 0, 4, 91, -16, 0, -18, 63, 4},
- { 0, 4, 87, -17, 0, -18, 68, 4}, { 0, 4, 82, -17, 0, -18, 73, 4},
- { 0, 4, 78, -18, 0, -18, 78, 4}, { 0, 4, 73, -18, 0, -17, 82, 4},
- { 0, 4, 68, -18, 0, -17, 87, 4}, { 0, 4, 63, -18, 0, -16, 91, 4},
- { 0, 3, 58, -18, 0, -15, 96, 4}, { 0, 3, 52, -17, 0, -14, 100, 4},
- { 0, 3, 47, -17, 0, -13, 104, 4}, { 0, 3, 42, -16, 0, -12, 108, 3},
- { 0, 3, 37, -15, 0, -11, 111, 3}, { 0, 3, 32, -14, 0, -10, 114, 3},
- { 0, 2, 27, -13, 0, -8, 117, 3}, { 0, 2, 22, -11, 0, -7, 120, 2},
- { 0, 1, 18, -9, 0, -6, 122, 2}, { 0, 1, 13, -7, 0, -4, 124, 1},
- { 0, 1, 8, -5, 0, -3, 126, 1}, { 0, 0, 4, -3, 0, -1, 127, 1},
- // dummy (replicate row index 95)
- { 0, 0, 4, -3, 0, -1, 127, 1},
-#endif // WARPEDPIXEL_PREC_BITS == 6
-};
-/* clang-format on */
-
-// Shuffle masks: we want to convert a sequence of bytes 0, 1, 2, ..., 15
-// in an SSE register into two sequences:
-// 0, 2, 2, 4, ..., 12, 12, 14, <don't care>
-// 1, 3, 3, 5, ..., 13, 13, 15, <don't care>
-static const uint8_t even_mask[16] = { 0, 2, 2, 4, 4, 6, 6, 8,
- 8, 10, 10, 12, 12, 14, 14, 0 };
-static const uint8_t odd_mask[16] = { 1, 3, 3, 5, 5, 7, 7, 9,
- 9, 11, 11, 13, 13, 15, 15, 0 };
-
-static const uint8_t shuffle_alpha0_mask01[16] = { 0, 1, 0, 1, 0, 1, 0, 1,
- 0, 1, 0, 1, 0, 1, 0, 1 };
-
-static const uint8_t shuffle_alpha0_mask23[16] = { 2, 3, 2, 3, 2, 3, 2, 3,
- 2, 3, 2, 3, 2, 3, 2, 3 };
-
-static const uint8_t shuffle_alpha0_mask45[16] = { 4, 5, 4, 5, 4, 5, 4, 5,
- 4, 5, 4, 5, 4, 5, 4, 5 };
-
-static const uint8_t shuffle_alpha0_mask67[16] = { 6, 7, 6, 7, 6, 7, 6, 7,
- 6, 7, 6, 7, 6, 7, 6, 7 };
-
-static const uint8_t shuffle_gamma0_mask0[16] = { 0, 1, 2, 3, 0, 1, 2, 3,
- 0, 1, 2, 3, 0, 1, 2, 3 };
-static const uint8_t shuffle_gamma0_mask1[16] = { 4, 5, 6, 7, 4, 5, 6, 7,
- 4, 5, 6, 7, 4, 5, 6, 7 };
-static const uint8_t shuffle_gamma0_mask2[16] = { 8, 9, 10, 11, 8, 9, 10, 11,
- 8, 9, 10, 11, 8, 9, 10, 11 };
-static const uint8_t shuffle_gamma0_mask3[16] = {
- 12, 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15
-};
-
-static INLINE void filter_src_pixels(__m128i src, __m128i *tmp, __m128i *coeff,
- const int offset_bits_horiz,
- const int reduce_bits_horiz, int k) {
- const __m128i src_even =
- _mm_shuffle_epi8(src, _mm_loadu_si128((__m128i *)even_mask));
- const __m128i src_odd =
- _mm_shuffle_epi8(src, _mm_loadu_si128((__m128i *)odd_mask));
- // The pixel order we need for 'src' is:
- // 0 2 2 4 4 6 6 8 1 3 3 5 5 7 7 9
- const __m128i src_02 = _mm_unpacklo_epi64(src_even, src_odd);
- const __m128i res_02 = _mm_maddubs_epi16(src_02, coeff[0]);
- // 4 6 6 8 8 10 10 12 5 7 7 9 9 11 11 13
- const __m128i src_46 = _mm_unpacklo_epi64(_mm_srli_si128(src_even, 4),
- _mm_srli_si128(src_odd, 4));
- const __m128i res_46 = _mm_maddubs_epi16(src_46, coeff[1]);
- // 1 3 3 5 5 7 7 9 2 4 4 6 6 8 8 10
- const __m128i src_13 =
- _mm_unpacklo_epi64(src_odd, _mm_srli_si128(src_even, 2));
- const __m128i res_13 = _mm_maddubs_epi16(src_13, coeff[2]);
- // 5 7 7 9 9 11 11 13 6 8 8 10 10 12 12 14
- const __m128i src_57 = _mm_unpacklo_epi64(_mm_srli_si128(src_odd, 4),
- _mm_srli_si128(src_even, 6));
- const __m128i res_57 = _mm_maddubs_epi16(src_57, coeff[3]);
-
- const __m128i round_const = _mm_set1_epi16((1 << offset_bits_horiz) +
- ((1 << reduce_bits_horiz) >> 1));
-
- // Note: The values res_02 + res_46 and res_13 + res_57 both
- // fit into int16s at this point, but their sum may be too wide to fit
- // into an int16. However, once we also add round_const, the sum of
- // all of these fits into a uint16.
- //
- // The wrapping behaviour of _mm_add_* is used here to make sure we
- // get the correct result despite converting between different
- // (implicit) types.
- const __m128i res_even = _mm_add_epi16(res_02, res_46);
- const __m128i res_odd = _mm_add_epi16(res_13, res_57);
- const __m128i res =
- _mm_add_epi16(_mm_add_epi16(res_even, res_odd), round_const);
- tmp[k + 7] = _mm_srl_epi16(res, _mm_cvtsi32_si128(reduce_bits_horiz));
-}
-
-static INLINE void prepare_horizontal_filter_coeff(int alpha, int sx,
- __m128i *coeff) {
- // Filter even-index pixels
- const __m128i tmp_0 = _mm_loadl_epi64(
- (__m128i *)&filter_8bit[(sx + 0 * alpha) >> WARPEDDIFF_PREC_BITS]);
- const __m128i tmp_1 = _mm_loadl_epi64(
- (__m128i *)&filter_8bit[(sx + 1 * alpha) >> WARPEDDIFF_PREC_BITS]);
- const __m128i tmp_2 = _mm_loadl_epi64(
- (__m128i *)&filter_8bit[(sx + 2 * alpha) >> WARPEDDIFF_PREC_BITS]);
- const __m128i tmp_3 = _mm_loadl_epi64(
- (__m128i *)&filter_8bit[(sx + 3 * alpha) >> WARPEDDIFF_PREC_BITS]);
- const __m128i tmp_4 = _mm_loadl_epi64(
- (__m128i *)&filter_8bit[(sx + 4 * alpha) >> WARPEDDIFF_PREC_BITS]);
- const __m128i tmp_5 = _mm_loadl_epi64(
- (__m128i *)&filter_8bit[(sx + 5 * alpha) >> WARPEDDIFF_PREC_BITS]);
- const __m128i tmp_6 = _mm_loadl_epi64(
- (__m128i *)&filter_8bit[(sx + 6 * alpha) >> WARPEDDIFF_PREC_BITS]);
- const __m128i tmp_7 = _mm_loadl_epi64(
- (__m128i *)&filter_8bit[(sx + 7 * alpha) >> WARPEDDIFF_PREC_BITS]);
-
- // Coeffs 0 2 0 2 4 6 4 6 1 3 1 3 5 7 5 7 for pixels 0 2
- const __m128i tmp_8 = _mm_unpacklo_epi16(tmp_0, tmp_2);
- // Coeffs 0 2 0 2 4 6 4 6 1 3 1 3 5 7 5 7 for pixels 1 3
- const __m128i tmp_9 = _mm_unpacklo_epi16(tmp_1, tmp_3);
- // Coeffs 0 2 0 2 4 6 4 6 1 3 1 3 5 7 5 7 for pixels 4 6
- const __m128i tmp_10 = _mm_unpacklo_epi16(tmp_4, tmp_6);
- // Coeffs 0 2 0 2 4 6 4 6 1 3 1 3 5 7 5 7 for pixels 5 7
- const __m128i tmp_11 = _mm_unpacklo_epi16(tmp_5, tmp_7);
-
- // Coeffs 0 2 0 2 0 2 0 2 4 6 4 6 4 6 4 6 for pixels 0 2 4 6
- const __m128i tmp_12 = _mm_unpacklo_epi32(tmp_8, tmp_10);
- // Coeffs 1 3 1 3 1 3 1 3 5 7 5 7 5 7 5 7 for pixels 0 2 4 6
- const __m128i tmp_13 = _mm_unpackhi_epi32(tmp_8, tmp_10);
- // Coeffs 0 2 0 2 0 2 0 2 4 6 4 6 4 6 4 6 for pixels 1 3 5 7
- const __m128i tmp_14 = _mm_unpacklo_epi32(tmp_9, tmp_11);
- // Coeffs 1 3 1 3 1 3 1 3 5 7 5 7 5 7 5 7 for pixels 1 3 5 7
- const __m128i tmp_15 = _mm_unpackhi_epi32(tmp_9, tmp_11);
-
- // Coeffs 0 2 for pixels 0 2 4 6 1 3 5 7
- coeff[0] = _mm_unpacklo_epi64(tmp_12, tmp_14);
- // Coeffs 4 6 for pixels 0 2 4 6 1 3 5 7
- coeff[1] = _mm_unpackhi_epi64(tmp_12, tmp_14);
- // Coeffs 1 3 for pixels 0 2 4 6 1 3 5 7
- coeff[2] = _mm_unpacklo_epi64(tmp_13, tmp_15);
- // Coeffs 5 7 for pixels 0 2 4 6 1 3 5 7
- coeff[3] = _mm_unpackhi_epi64(tmp_13, tmp_15);
-}
-
-static INLINE void prepare_horizontal_filter_coeff_alpha0(int sx,
- __m128i *coeff) {
- // Filter even-index pixels
- const __m128i tmp_0 =
- _mm_loadl_epi64((__m128i *)&filter_8bit[sx >> WARPEDDIFF_PREC_BITS]);
-
- // Coeffs 0 2 for pixels 0 2 4 6 1 3 5 7
- coeff[0] = _mm_shuffle_epi8(
- tmp_0, _mm_loadu_si128((__m128i *)shuffle_alpha0_mask01));
- // Coeffs 4 6 for pixels 0 2 4 6 1 3 5 7
- coeff[1] = _mm_shuffle_epi8(
- tmp_0, _mm_loadu_si128((__m128i *)shuffle_alpha0_mask23));
- // Coeffs 1 3 for pixels 0 2 4 6 1 3 5 7
- coeff[2] = _mm_shuffle_epi8(
- tmp_0, _mm_loadu_si128((__m128i *)shuffle_alpha0_mask45));
- // Coeffs 5 7 for pixels 0 2 4 6 1 3 5 7
- coeff[3] = _mm_shuffle_epi8(
- tmp_0, _mm_loadu_si128((__m128i *)shuffle_alpha0_mask67));
-}
-
-static INLINE void horizontal_filter(__m128i src, __m128i *tmp, int sx,
- int alpha, int k,
- const int offset_bits_horiz,
- const int reduce_bits_horiz) {
- __m128i coeff[4];
- prepare_horizontal_filter_coeff(alpha, sx, coeff);
- filter_src_pixels(src, tmp, coeff, offset_bits_horiz, reduce_bits_horiz, k);
-}
-
-static INLINE void warp_horizontal_filter(const uint8_t *ref, __m128i *tmp,
- int stride, int32_t ix4, int32_t iy4,
- int32_t sx4, int alpha, int beta,
- int p_height, int height, int i,
- const int offset_bits_horiz,
- const int reduce_bits_horiz) {
- int k;
- for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
- int iy = iy4 + k;
- if (iy < 0)
- iy = 0;
- else if (iy > height - 1)
- iy = height - 1;
- int sx = sx4 + beta * (k + 4);
-
- // Load source pixels
- const __m128i src =
- _mm_loadu_si128((__m128i *)(ref + iy * stride + ix4 - 7));
- horizontal_filter(src, tmp, sx, alpha, k, offset_bits_horiz,
- reduce_bits_horiz);
- }
-}
-
-static INLINE void warp_horizontal_filter_alpha0(
- const uint8_t *ref, __m128i *tmp, int stride, int32_t ix4, int32_t iy4,
- int32_t sx4, int alpha, int beta, int p_height, int height, int i,
- const int offset_bits_horiz, const int reduce_bits_horiz) {
- (void)alpha;
- int k;
- for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
- int iy = iy4 + k;
- if (iy < 0)
- iy = 0;
- else if (iy > height - 1)
- iy = height - 1;
- int sx = sx4 + beta * (k + 4);
-
- // Load source pixels
- const __m128i src =
- _mm_loadu_si128((__m128i *)(ref + iy * stride + ix4 - 7));
-
- __m128i coeff[4];
- prepare_horizontal_filter_coeff_alpha0(sx, coeff);
- filter_src_pixels(src, tmp, coeff, offset_bits_horiz, reduce_bits_horiz, k);
- }
-}
-
-static INLINE void warp_horizontal_filter_beta0(
- const uint8_t *ref, __m128i *tmp, int stride, int32_t ix4, int32_t iy4,
- int32_t sx4, int alpha, int beta, int p_height, int height, int i,
- const int offset_bits_horiz, const int reduce_bits_horiz) {
- (void)beta;
- int k;
- __m128i coeff[4];
- prepare_horizontal_filter_coeff(alpha, sx4, coeff);
-
- for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
- int iy = iy4 + k;
- if (iy < 0)
- iy = 0;
- else if (iy > height - 1)
- iy = height - 1;
-
- // Load source pixels
- const __m128i src =
- _mm_loadu_si128((__m128i *)(ref + iy * stride + ix4 - 7));
- filter_src_pixels(src, tmp, coeff, offset_bits_horiz, reduce_bits_horiz, k);
- }
-}
-
-static INLINE void warp_horizontal_filter_alpha0_beta0(
- const uint8_t *ref, __m128i *tmp, int stride, int32_t ix4, int32_t iy4,
- int32_t sx4, int alpha, int beta, int p_height, int height, int i,
- const int offset_bits_horiz, const int reduce_bits_horiz) {
- (void)beta;
- (void)alpha;
- int k;
-
- __m128i coeff[4];
- prepare_horizontal_filter_coeff_alpha0(sx4, coeff);
-
- for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
- int iy = iy4 + k;
- if (iy < 0)
- iy = 0;
- else if (iy > height - 1)
- iy = height - 1;
-
- // Load source pixels
- const __m128i src =
- _mm_loadu_si128((__m128i *)(ref + iy * stride + ix4 - 7));
- filter_src_pixels(src, tmp, coeff, offset_bits_horiz, reduce_bits_horiz, k);
- }
-}
-
-static INLINE void unpack_weights_and_set_round_const(
- ConvolveParams *conv_params, const int round_bits, const int offset_bits,
- __m128i *res_sub_const, __m128i *round_bits_const, __m128i *wt) {
- *res_sub_const =
- _mm_set1_epi16(-(1 << (offset_bits - conv_params->round_1)) -
- (1 << (offset_bits - conv_params->round_1 - 1)));
- *round_bits_const = _mm_set1_epi16(((1 << round_bits) >> 1));
-
- const int w0 = conv_params->fwd_offset;
- const int w1 = conv_params->bck_offset;
- const __m128i wt0 = _mm_set1_epi16(w0);
- const __m128i wt1 = _mm_set1_epi16(w1);
- *wt = _mm_unpacklo_epi16(wt0, wt1);
-}
-
-static INLINE void prepare_vertical_filter_coeffs(int gamma, int sy,
- __m128i *coeffs) {
- const __m128i tmp_0 = _mm_loadu_si128(
- (__m128i *)(warped_filter + ((sy + 0 * gamma) >> WARPEDDIFF_PREC_BITS)));
- const __m128i tmp_2 = _mm_loadu_si128(
- (__m128i *)(warped_filter + ((sy + 2 * gamma) >> WARPEDDIFF_PREC_BITS)));
- const __m128i tmp_4 = _mm_loadu_si128(
- (__m128i *)(warped_filter + ((sy + 4 * gamma) >> WARPEDDIFF_PREC_BITS)));
- const __m128i tmp_6 = _mm_loadu_si128(
- (__m128i *)(warped_filter + ((sy + 6 * gamma) >> WARPEDDIFF_PREC_BITS)));
-
- const __m128i tmp_8 = _mm_unpacklo_epi32(tmp_0, tmp_2);
- const __m128i tmp_10 = _mm_unpacklo_epi32(tmp_4, tmp_6);
- const __m128i tmp_12 = _mm_unpackhi_epi32(tmp_0, tmp_2);
- const __m128i tmp_14 = _mm_unpackhi_epi32(tmp_4, tmp_6);
-
- // even coeffs
- coeffs[0] = _mm_unpacklo_epi64(tmp_8, tmp_10);
- coeffs[1] = _mm_unpackhi_epi64(tmp_8, tmp_10);
- coeffs[2] = _mm_unpacklo_epi64(tmp_12, tmp_14);
- coeffs[3] = _mm_unpackhi_epi64(tmp_12, tmp_14);
-
- const __m128i tmp_1 = _mm_loadu_si128(
- (__m128i *)(warped_filter + ((sy + 1 * gamma) >> WARPEDDIFF_PREC_BITS)));
- const __m128i tmp_3 = _mm_loadu_si128(
- (__m128i *)(warped_filter + ((sy + 3 * gamma) >> WARPEDDIFF_PREC_BITS)));
- const __m128i tmp_5 = _mm_loadu_si128(
- (__m128i *)(warped_filter + ((sy + 5 * gamma) >> WARPEDDIFF_PREC_BITS)));
- const __m128i tmp_7 = _mm_loadu_si128(
- (__m128i *)(warped_filter + ((sy + 7 * gamma) >> WARPEDDIFF_PREC_BITS)));
-
- const __m128i tmp_9 = _mm_unpacklo_epi32(tmp_1, tmp_3);
- const __m128i tmp_11 = _mm_unpacklo_epi32(tmp_5, tmp_7);
- const __m128i tmp_13 = _mm_unpackhi_epi32(tmp_1, tmp_3);
- const __m128i tmp_15 = _mm_unpackhi_epi32(tmp_5, tmp_7);
-
- // odd coeffs
- coeffs[4] = _mm_unpacklo_epi64(tmp_9, tmp_11);
- coeffs[5] = _mm_unpackhi_epi64(tmp_9, tmp_11);
- coeffs[6] = _mm_unpacklo_epi64(tmp_13, tmp_15);
- coeffs[7] = _mm_unpackhi_epi64(tmp_13, tmp_15);
-}
-
-static INLINE void prepare_vertical_filter_coeffs_gamma0(int sy,
- __m128i *coeffs) {
- const __m128i tmp_0 = _mm_loadu_si128(
- (__m128i *)(warped_filter + (sy >> WARPEDDIFF_PREC_BITS)));
-
- // even coeffs
- coeffs[0] =
- _mm_shuffle_epi8(tmp_0, _mm_loadu_si128((__m128i *)shuffle_gamma0_mask0));
- coeffs[1] =
- _mm_shuffle_epi8(tmp_0, _mm_loadu_si128((__m128i *)shuffle_gamma0_mask1));
- coeffs[2] =
- _mm_shuffle_epi8(tmp_0, _mm_loadu_si128((__m128i *)shuffle_gamma0_mask2));
- coeffs[3] =
- _mm_shuffle_epi8(tmp_0, _mm_loadu_si128((__m128i *)shuffle_gamma0_mask3));
-
- // odd coeffs
- coeffs[4] = coeffs[0];
- coeffs[5] = coeffs[1];
- coeffs[6] = coeffs[2];
- coeffs[7] = coeffs[3];
-}
-
-static INLINE void filter_src_pixels_vertical(__m128i *tmp, __m128i *coeffs,
- __m128i *res_lo, __m128i *res_hi,
- int k) {
- // Load from tmp and rearrange pairs of consecutive rows into the
- // column order 0 0 2 2 4 4 6 6; 1 1 3 3 5 5 7 7
- const __m128i *src = tmp + (k + 4);
- const __m128i src_0 = _mm_unpacklo_epi16(src[0], src[1]);
- const __m128i src_2 = _mm_unpacklo_epi16(src[2], src[3]);
- const __m128i src_4 = _mm_unpacklo_epi16(src[4], src[5]);
- const __m128i src_6 = _mm_unpacklo_epi16(src[6], src[7]);
-
- const __m128i res_0 = _mm_madd_epi16(src_0, coeffs[0]);
- const __m128i res_2 = _mm_madd_epi16(src_2, coeffs[1]);
- const __m128i res_4 = _mm_madd_epi16(src_4, coeffs[2]);
- const __m128i res_6 = _mm_madd_epi16(src_6, coeffs[3]);
-
- const __m128i res_even =
- _mm_add_epi32(_mm_add_epi32(res_0, res_2), _mm_add_epi32(res_4, res_6));
-
- // Filter odd-index pixels
- const __m128i src_1 = _mm_unpackhi_epi16(src[0], src[1]);
- const __m128i src_3 = _mm_unpackhi_epi16(src[2], src[3]);
- const __m128i src_5 = _mm_unpackhi_epi16(src[4], src[5]);
- const __m128i src_7 = _mm_unpackhi_epi16(src[6], src[7]);
-
- const __m128i res_1 = _mm_madd_epi16(src_1, coeffs[4]);
- const __m128i res_3 = _mm_madd_epi16(src_3, coeffs[5]);
- const __m128i res_5 = _mm_madd_epi16(src_5, coeffs[6]);
- const __m128i res_7 = _mm_madd_epi16(src_7, coeffs[7]);
-
- const __m128i res_odd =
- _mm_add_epi32(_mm_add_epi32(res_1, res_3), _mm_add_epi32(res_5, res_7));
-
- // Rearrange pixels back into the order 0 ... 7
- *res_lo = _mm_unpacklo_epi32(res_even, res_odd);
- *res_hi = _mm_unpackhi_epi32(res_even, res_odd);
-}
-
-static INLINE void store_vertical_filter_output(
- __m128i *res_lo, __m128i *res_hi, const __m128i *res_add_const,
- const __m128i *wt, const __m128i *res_sub_const, __m128i *round_bits_const,
- uint8_t *pred, ConvolveParams *conv_params, int i, int j, int k,
- const int reduce_bits_vert, int p_stride, int p_width,
- const int round_bits) {
- __m128i res_lo_1 = *res_lo;
- __m128i res_hi_1 = *res_hi;
-
- if (conv_params->is_compound) {
- __m128i *const p =
- (__m128i *)&conv_params->dst[(i + k + 4) * conv_params->dst_stride + j];
- res_lo_1 = _mm_srai_epi32(_mm_add_epi32(res_lo_1, *res_add_const),
- reduce_bits_vert);
- const __m128i temp_lo_16 = _mm_packus_epi32(res_lo_1, res_lo_1);
- __m128i res_lo_16;
- if (conv_params->do_average) {
- __m128i *const dst8 = (__m128i *)&pred[(i + k + 4) * p_stride + j];
- const __m128i p_16 = _mm_loadl_epi64(p);
-
- if (conv_params->use_jnt_comp_avg) {
- const __m128i p_16_lo = _mm_unpacklo_epi16(p_16, temp_lo_16);
- const __m128i wt_res_lo = _mm_madd_epi16(p_16_lo, *wt);
- const __m128i shifted_32 =
- _mm_srai_epi32(wt_res_lo, DIST_PRECISION_BITS);
- res_lo_16 = _mm_packus_epi32(shifted_32, shifted_32);
- } else {
- res_lo_16 = _mm_srai_epi16(_mm_add_epi16(p_16, temp_lo_16), 1);
- }
-
- res_lo_16 = _mm_add_epi16(res_lo_16, *res_sub_const);
-
- res_lo_16 = _mm_srai_epi16(_mm_add_epi16(res_lo_16, *round_bits_const),
- round_bits);
- __m128i res_8_lo = _mm_packus_epi16(res_lo_16, res_lo_16);
- *(uint32_t *)dst8 = _mm_cvtsi128_si32(res_8_lo);
- } else {
- _mm_storel_epi64(p, temp_lo_16);
- }
- if (p_width > 4) {
- __m128i *const p4 =
- (__m128i *)&conv_params
- ->dst[(i + k + 4) * conv_params->dst_stride + j + 4];
- res_hi_1 = _mm_srai_epi32(_mm_add_epi32(res_hi_1, *res_add_const),
- reduce_bits_vert);
- const __m128i temp_hi_16 = _mm_packus_epi32(res_hi_1, res_hi_1);
- __m128i res_hi_16;
-
- if (conv_params->do_average) {
- __m128i *const dst8_4 =
- (__m128i *)&pred[(i + k + 4) * p_stride + j + 4];
- const __m128i p4_16 = _mm_loadl_epi64(p4);
-
- if (conv_params->use_jnt_comp_avg) {
- const __m128i p_16_hi = _mm_unpacklo_epi16(p4_16, temp_hi_16);
- const __m128i wt_res_hi = _mm_madd_epi16(p_16_hi, *wt);
- const __m128i shifted_32 =
- _mm_srai_epi32(wt_res_hi, DIST_PRECISION_BITS);
- res_hi_16 = _mm_packus_epi32(shifted_32, shifted_32);
- } else {
- res_hi_16 = _mm_srai_epi16(_mm_add_epi16(p4_16, temp_hi_16), 1);
- }
- res_hi_16 = _mm_add_epi16(res_hi_16, *res_sub_const);
-
- res_hi_16 = _mm_srai_epi16(_mm_add_epi16(res_hi_16, *round_bits_const),
- round_bits);
- __m128i res_8_hi = _mm_packus_epi16(res_hi_16, res_hi_16);
- *(uint32_t *)dst8_4 = _mm_cvtsi128_si32(res_8_hi);
-
- } else {
- _mm_storel_epi64(p4, temp_hi_16);
- }
- }
- } else {
- const __m128i res_lo_round = _mm_srai_epi32(
- _mm_add_epi32(res_lo_1, *res_add_const), reduce_bits_vert);
- const __m128i res_hi_round = _mm_srai_epi32(
- _mm_add_epi32(res_hi_1, *res_add_const), reduce_bits_vert);
-
- const __m128i res_16bit = _mm_packs_epi32(res_lo_round, res_hi_round);
- __m128i res_8bit = _mm_packus_epi16(res_16bit, res_16bit);
-
- // Store, blending with 'pred' if needed
- __m128i *const p = (__m128i *)&pred[(i + k + 4) * p_stride + j];
-
- // Note: If we're outputting a 4x4 block, we need to be very careful
- // to only output 4 pixels at this point, to avoid encode/decode
- // mismatches when encoding with multiple threads.
- if (p_width == 4) {
- *(uint32_t *)p = _mm_cvtsi128_si32(res_8bit);
- } else {
- _mm_storel_epi64(p, res_8bit);
- }
- }
-}
-
-static INLINE void warp_vertical_filter(
- uint8_t *pred, __m128i *tmp, ConvolveParams *conv_params, int16_t gamma,
- int16_t delta, int p_height, int p_stride, int p_width, int i, int j,
- int sy4, const int reduce_bits_vert, const __m128i *res_add_const,
- const int round_bits, const int offset_bits) {
- int k;
- __m128i res_sub_const, round_bits_const, wt;
- unpack_weights_and_set_round_const(conv_params, round_bits, offset_bits,
- &res_sub_const, &round_bits_const, &wt);
- // Vertical filter
- for (k = -4; k < AOMMIN(4, p_height - i - 4); ++k) {
- int sy = sy4 + delta * (k + 4);
-
- __m128i coeffs[8];
- prepare_vertical_filter_coeffs(gamma, sy, coeffs);
-
- __m128i res_lo;
- __m128i res_hi;
- filter_src_pixels_vertical(tmp, coeffs, &res_lo, &res_hi, k);
-
- store_vertical_filter_output(&res_lo, &res_hi, res_add_const, &wt,
- &res_sub_const, &round_bits_const, pred,
- conv_params, i, j, k, reduce_bits_vert,
- p_stride, p_width, round_bits);
- }
-}
-
-static INLINE void warp_vertical_filter_gamma0(
- uint8_t *pred, __m128i *tmp, ConvolveParams *conv_params, int16_t gamma,
- int16_t delta, int p_height, int p_stride, int p_width, int i, int j,
- int sy4, const int reduce_bits_vert, const __m128i *res_add_const,
- const int round_bits, const int offset_bits) {
- int k;
- (void)gamma;
- __m128i res_sub_const, round_bits_const, wt;
- unpack_weights_and_set_round_const(conv_params, round_bits, offset_bits,
- &res_sub_const, &round_bits_const, &wt);
- // Vertical filter
- for (k = -4; k < AOMMIN(4, p_height - i - 4); ++k) {
- int sy = sy4 + delta * (k + 4);
-
- __m128i coeffs[8];
- prepare_vertical_filter_coeffs_gamma0(sy, coeffs);
-
- __m128i res_lo;
- __m128i res_hi;
- filter_src_pixels_vertical(tmp, coeffs, &res_lo, &res_hi, k);
-
- store_vertical_filter_output(&res_lo, &res_hi, res_add_const, &wt,
- &res_sub_const, &round_bits_const, pred,
- conv_params, i, j, k, reduce_bits_vert,
- p_stride, p_width, round_bits);
- }
-}
-
-static INLINE void warp_vertical_filter_delta0(
- uint8_t *pred, __m128i *tmp, ConvolveParams *conv_params, int16_t gamma,
- int16_t delta, int p_height, int p_stride, int p_width, int i, int j,
- int sy4, const int reduce_bits_vert, const __m128i *res_add_const,
- const int round_bits, const int offset_bits) {
- (void)delta;
- int k;
- __m128i res_sub_const, round_bits_const, wt;
- unpack_weights_and_set_round_const(conv_params, round_bits, offset_bits,
- &res_sub_const, &round_bits_const, &wt);
-
- __m128i coeffs[8];
- prepare_vertical_filter_coeffs(gamma, sy4, coeffs);
- // Vertical filter
- for (k = -4; k < AOMMIN(4, p_height - i - 4); ++k) {
- __m128i res_lo;
- __m128i res_hi;
- filter_src_pixels_vertical(tmp, coeffs, &res_lo, &res_hi, k);
-
- store_vertical_filter_output(&res_lo, &res_hi, res_add_const, &wt,
- &res_sub_const, &round_bits_const, pred,
- conv_params, i, j, k, reduce_bits_vert,
- p_stride, p_width, round_bits);
- }
-}
-
-static INLINE void warp_vertical_filter_gamma0_delta0(
- uint8_t *pred, __m128i *tmp, ConvolveParams *conv_params, int16_t gamma,
- int16_t delta, int p_height, int p_stride, int p_width, int i, int j,
- int sy4, const int reduce_bits_vert, const __m128i *res_add_const,
- const int round_bits, const int offset_bits) {
- (void)delta;
- (void)gamma;
- int k;
- __m128i res_sub_const, round_bits_const, wt;
- unpack_weights_and_set_round_const(conv_params, round_bits, offset_bits,
- &res_sub_const, &round_bits_const, &wt);
-
- __m128i coeffs[8];
- prepare_vertical_filter_coeffs_gamma0(sy4, coeffs);
- // Vertical filter
- for (k = -4; k < AOMMIN(4, p_height - i - 4); ++k) {
- __m128i res_lo;
- __m128i res_hi;
- filter_src_pixels_vertical(tmp, coeffs, &res_lo, &res_hi, k);
-
- store_vertical_filter_output(&res_lo, &res_hi, res_add_const, &wt,
- &res_sub_const, &round_bits_const, pred,
- conv_params, i, j, k, reduce_bits_vert,
- p_stride, p_width, round_bits);
- }
-}
-
-static INLINE void prepare_warp_vertical_filter(
- uint8_t *pred, __m128i *tmp, ConvolveParams *conv_params, int16_t gamma,
- int16_t delta, int p_height, int p_stride, int p_width, int i, int j,
- int sy4, const int reduce_bits_vert, const __m128i *res_add_const,
- const int round_bits, const int offset_bits) {
- if (gamma == 0 && delta == 0)
- warp_vertical_filter_gamma0_delta0(
- pred, tmp, conv_params, gamma, delta, p_height, p_stride, p_width, i, j,
- sy4, reduce_bits_vert, res_add_const, round_bits, offset_bits);
- else if (gamma == 0 && delta != 0)
- warp_vertical_filter_gamma0(pred, tmp, conv_params, gamma, delta, p_height,
- p_stride, p_width, i, j, sy4, reduce_bits_vert,
- res_add_const, round_bits, offset_bits);
- else if (gamma != 0 && delta == 0)
- warp_vertical_filter_delta0(pred, tmp, conv_params, gamma, delta, p_height,
- p_stride, p_width, i, j, sy4, reduce_bits_vert,
- res_add_const, round_bits, offset_bits);
- else
- warp_vertical_filter(pred, tmp, conv_params, gamma, delta, p_height,
- p_stride, p_width, i, j, sy4, reduce_bits_vert,
- res_add_const, round_bits, offset_bits);
-}
-
-static INLINE void prepare_warp_horizontal_filter(
- const uint8_t *ref, __m128i *tmp, int stride, int32_t ix4, int32_t iy4,
- int32_t sx4, int alpha, int beta, int p_height, int height, int i,
- const int offset_bits_horiz, const int reduce_bits_horiz) {
- if (alpha == 0 && beta == 0)
- warp_horizontal_filter_alpha0_beta0(ref, tmp, stride, ix4, iy4, sx4, alpha,
- beta, p_height, height, i,
- offset_bits_horiz, reduce_bits_horiz);
- else if (alpha == 0 && beta != 0)
- warp_horizontal_filter_alpha0(ref, tmp, stride, ix4, iy4, sx4, alpha, beta,
- p_height, height, i, offset_bits_horiz,
- reduce_bits_horiz);
- else if (alpha != 0 && beta == 0)
- warp_horizontal_filter_beta0(ref, tmp, stride, ix4, iy4, sx4, alpha, beta,
- p_height, height, i, offset_bits_horiz,
- reduce_bits_horiz);
- else
- warp_horizontal_filter(ref, tmp, stride, ix4, iy4, sx4, alpha, beta,
- p_height, height, i, offset_bits_horiz,
- reduce_bits_horiz);
-}
-
-void av1_warp_affine_sse4_1(const int32_t *mat, const uint8_t *ref, int width,
- int height, int stride, uint8_t *pred, int p_col,
- int p_row, int p_width, int p_height, int p_stride,
- int subsampling_x, int subsampling_y,
- ConvolveParams *conv_params, int16_t alpha,
- int16_t beta, int16_t gamma, int16_t delta) {
- __m128i tmp[15];
- int i, j, k;
- const int bd = 8;
- const int reduce_bits_horiz = conv_params->round_0;
- const int reduce_bits_vert = conv_params->is_compound
- ? conv_params->round_1
- : 2 * FILTER_BITS - reduce_bits_horiz;
- const int offset_bits_horiz = bd + FILTER_BITS - 1;
- assert(IMPLIES(conv_params->is_compound, conv_params->dst != NULL));
-
- const int offset_bits_vert = bd + 2 * FILTER_BITS - reduce_bits_horiz;
- const __m128i reduce_bits_vert_const =
- _mm_set1_epi32(((1 << reduce_bits_vert) >> 1));
- const __m128i res_add_const = _mm_set1_epi32(1 << offset_bits_vert);
- const int round_bits =
- 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
- const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
- assert(IMPLIES(conv_params->do_average, conv_params->is_compound));
-
- /* Note: For this code to work, the left/right frame borders need to be
- extended by at least 13 pixels each. By the time we get here, other
- code will have set up this border, but we allow an explicit check
- for debugging purposes.
- */
- /*for (i = 0; i < height; ++i) {
- for (j = 0; j < 13; ++j) {
- assert(ref[i * stride - 13 + j] == ref[i * stride]);
- assert(ref[i * stride + width + j] == ref[i * stride + (width - 1)]);
- }
- }*/
- __m128i res_add_const_1;
- if (conv_params->is_compound == 1) {
- res_add_const_1 = _mm_add_epi32(reduce_bits_vert_const, res_add_const);
- } else {
- res_add_const_1 = _mm_set1_epi32(-(1 << (bd + reduce_bits_vert - 1)) +
- ((1 << reduce_bits_vert) >> 1));
- }
-
- for (i = 0; i < p_height; i += 8) {
- for (j = 0; j < p_width; j += 8) {
- const int32_t src_x = (p_col + j + 4) << subsampling_x;
- const int32_t src_y = (p_row + i + 4) << subsampling_y;
- const int32_t dst_x = mat[2] * src_x + mat[3] * src_y + mat[0];
- const int32_t dst_y = mat[4] * src_x + mat[5] * src_y + mat[1];
- const int32_t x4 = dst_x >> subsampling_x;
- const int32_t y4 = dst_y >> subsampling_y;
-
- int32_t ix4 = x4 >> WARPEDMODEL_PREC_BITS;
- int32_t sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
- int32_t iy4 = y4 >> WARPEDMODEL_PREC_BITS;
- int32_t sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
-
- // Add in all the constant terms, including rounding and offset
- sx4 += alpha * (-4) + beta * (-4) + (1 << (WARPEDDIFF_PREC_BITS - 1)) +
- (WARPEDPIXEL_PREC_SHIFTS << WARPEDDIFF_PREC_BITS);
- sy4 += gamma * (-4) + delta * (-4) + (1 << (WARPEDDIFF_PREC_BITS - 1)) +
- (WARPEDPIXEL_PREC_SHIFTS << WARPEDDIFF_PREC_BITS);
-
- sx4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1);
- sy4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1);
-
- // Horizontal filter
- // If the block is aligned such that, after clamping, every sample
- // would be taken from the leftmost/rightmost column, then we can
- // skip the expensive horizontal filter.
- if (ix4 <= -7) {
- for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
- int iy = iy4 + k;
- if (iy < 0)
- iy = 0;
- else if (iy > height - 1)
- iy = height - 1;
- tmp[k + 7] = _mm_set1_epi16(
- (1 << (bd + FILTER_BITS - reduce_bits_horiz - 1)) +
- ref[iy * stride] * (1 << (FILTER_BITS - reduce_bits_horiz)));
- }
- } else if (ix4 >= width + 6) {
- for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
- int iy = iy4 + k;
- if (iy < 0)
- iy = 0;
- else if (iy > height - 1)
- iy = height - 1;
- tmp[k + 7] =
- _mm_set1_epi16((1 << (bd + FILTER_BITS - reduce_bits_horiz - 1)) +
- ref[iy * stride + (width - 1)] *
- (1 << (FILTER_BITS - reduce_bits_horiz)));
- }
- } else if (((ix4 - 7) < 0) || ((ix4 + 9) > width)) {
- const int out_of_boundary_left = -(ix4 - 6);
- const int out_of_boundary_right = (ix4 + 8) - width;
- for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
- int iy = iy4 + k;
- if (iy < 0)
- iy = 0;
- else if (iy > height - 1)
- iy = height - 1;
- int sx = sx4 + beta * (k + 4);
-
- // Load source pixels
- __m128i src =
- _mm_loadu_si128((__m128i *)(ref + iy * stride + ix4 - 7));
- if (out_of_boundary_left >= 0) {
- const __m128i shuffle_reg_left =
- _mm_loadu_si128((__m128i *)warp_pad_left[out_of_boundary_left]);
- src = _mm_shuffle_epi8(src, shuffle_reg_left);
- }
- if (out_of_boundary_right >= 0) {
- const __m128i shuffle_reg_right = _mm_loadu_si128(
- (__m128i *)warp_pad_right[out_of_boundary_right]);
- src = _mm_shuffle_epi8(src, shuffle_reg_right);
- }
- horizontal_filter(src, tmp, sx, alpha, k, offset_bits_horiz,
- reduce_bits_horiz);
- }
- } else {
- prepare_warp_horizontal_filter(ref, tmp, stride, ix4, iy4, sx4, alpha,
- beta, p_height, height, i,
- offset_bits_horiz, reduce_bits_horiz);
- }
-
- // Vertical filter
- prepare_warp_vertical_filter(
- pred, tmp, conv_params, gamma, delta, p_height, p_stride, p_width, i,
- j, sy4, reduce_bits_vert, &res_add_const_1, round_bits, offset_bits);
- }
- }
-}
diff --git a/third_party/aom/av1/common/x86/wiener_convolve_avx2.c b/third_party/aom/av1/common/x86/wiener_convolve_avx2.c
deleted file mode 100644
index 87a6e1239..000000000
--- a/third_party/aom/av1/common/x86/wiener_convolve_avx2.c
+++ /dev/null
@@ -1,261 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <immintrin.h>
-#include <assert.h>
-
-#include "config/av1_rtcd.h"
-
-#include "av1/common/convolve.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/aom_filter.h"
-#include "aom_dsp/x86/synonyms.h"
-#include "aom_dsp/x86/synonyms_avx2.h"
-
-// 128-bit xmmwords are written as [ ... ] with the MSB on the left.
-// 256-bit ymmwords are written as two xmmwords, [ ... ][ ... ] with the MSB
-// on the left.
-// A row of, say, 8-bit pixels with values p0, p1, p2, ..., p30, p31 will be
-// loaded and stored as [ p31 ... p17 p16 ][ p15 ... p1 p0 ].
-void av1_wiener_convolve_add_src_avx2(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h,
- const ConvolveParams *conv_params) {
- const int bd = 8;
- assert(x_step_q4 == 16 && y_step_q4 == 16);
- assert(!(w & 7));
- (void)x_step_q4;
- (void)y_step_q4;
-
- DECLARE_ALIGNED(32, uint16_t,
- temp[(MAX_SB_SIZE + SUBPEL_TAPS - 1) * MAX_SB_SIZE]);
- int intermediate_height = h + SUBPEL_TAPS - 2;
- memset(temp + (intermediate_height * MAX_SB_SIZE), 0, MAX_SB_SIZE);
- const int center_tap = ((SUBPEL_TAPS - 1) / 2);
- const uint8_t *const src_ptr = src - center_tap * src_stride - center_tap;
-
- const __m128i zero_128 = _mm_setzero_si128();
- const __m256i zero_256 = _mm256_setzero_si256();
-
- // Add an offset to account for the "add_src" part of the convolve function.
- const __m128i offset = _mm_insert_epi16(zero_128, 1 << FILTER_BITS, 3);
-
- const __m256i clamp_low = zero_256;
- const __m256i clamp_high =
- _mm256_set1_epi16(WIENER_CLAMP_LIMIT(conv_params->round_0, bd) - 1);
-
- /* Horizontal filter */
- {
- // coeffs [ f7 f6 f5 f4 f3 f2 f1 f0 ]
- const __m128i coeffs_x = _mm_add_epi16(xx_loadu_128(filter_x), offset);
-
- // coeffs [ f3 f2 f3 f2 f1 f0 f1 f0 ]
- const __m128i coeffs_0123 = _mm_unpacklo_epi32(coeffs_x, coeffs_x);
- // coeffs [ f7 f6 f7 f6 f5 f4 f5 f4 ]
- const __m128i coeffs_4567 = _mm_unpackhi_epi32(coeffs_x, coeffs_x);
-
- // coeffs [ f1 f0 f1 f0 f1 f0 f1 f0 ]
- const __m128i coeffs_01_128 = _mm_unpacklo_epi64(coeffs_0123, coeffs_0123);
- // coeffs [ f3 f2 f3 f2 f3 f2 f3 f2 ]
- const __m128i coeffs_23_128 = _mm_unpackhi_epi64(coeffs_0123, coeffs_0123);
- // coeffs [ f5 f4 f5 f4 f5 f4 f5 f4 ]
- const __m128i coeffs_45_128 = _mm_unpacklo_epi64(coeffs_4567, coeffs_4567);
- // coeffs [ f7 f6 f7 f6 f7 f6 f7 f6 ]
- const __m128i coeffs_67_128 = _mm_unpackhi_epi64(coeffs_4567, coeffs_4567);
-
- // coeffs [ f1 f0 f1 f0 f1 f0 f1 f0 ][ f1 f0 f1 f0 f1 f0 f1 f0 ]
- const __m256i coeffs_01 = yy_set_m128i(coeffs_01_128, coeffs_01_128);
- // coeffs [ f3 f2 f3 f2 f3 f2 f3 f2 ][ f3 f2 f3 f2 f3 f2 f3 f2 ]
- const __m256i coeffs_23 = yy_set_m128i(coeffs_23_128, coeffs_23_128);
- // coeffs [ f5 f4 f5 f4 f5 f4 f5 f4 ][ f5 f4 f5 f4 f5 f4 f5 f4 ]
- const __m256i coeffs_45 = yy_set_m128i(coeffs_45_128, coeffs_45_128);
- // coeffs [ f7 f6 f7 f6 f7 f6 f7 f6 ][ f7 f6 f7 f6 f7 f6 f7 f6 ]
- const __m256i coeffs_67 = yy_set_m128i(coeffs_67_128, coeffs_67_128);
-
- const __m256i round_const = _mm256_set1_epi32(
- (1 << (conv_params->round_0 - 1)) + (1 << (bd + FILTER_BITS - 1)));
-
- for (int i = 0; i < intermediate_height; ++i) {
- for (int j = 0; j < w; j += 16) {
- const uint8_t *data_ij = src_ptr + i * src_stride + j;
-
- // Load 8-bit src data
- const __m128i data_0 = xx_loadu_128(data_ij + 0);
- const __m128i data_1 = xx_loadu_128(data_ij + 1);
- const __m128i data_2 = xx_loadu_128(data_ij + 2);
- const __m128i data_3 = xx_loadu_128(data_ij + 3);
- const __m128i data_4 = xx_loadu_128(data_ij + 4);
- const __m128i data_5 = xx_loadu_128(data_ij + 5);
- const __m128i data_6 = xx_loadu_128(data_ij + 6);
- const __m128i data_7 = xx_loadu_128(data_ij + 7);
-
- // (Zero-)Extend 8-bit data to 16-bit data
- const __m256i src_0 = _mm256_cvtepu8_epi16(data_0);
- const __m256i src_1 = _mm256_cvtepu8_epi16(data_1);
- const __m256i src_2 = _mm256_cvtepu8_epi16(data_2);
- const __m256i src_3 = _mm256_cvtepu8_epi16(data_3);
- const __m256i src_4 = _mm256_cvtepu8_epi16(data_4);
- const __m256i src_5 = _mm256_cvtepu8_epi16(data_5);
- const __m256i src_6 = _mm256_cvtepu8_epi16(data_6);
- const __m256i src_7 = _mm256_cvtepu8_epi16(data_7);
-
- // Multiply src data by filter coeffs and sum pairs
- const __m256i res_0 = _mm256_madd_epi16(src_0, coeffs_01);
- const __m256i res_1 = _mm256_madd_epi16(src_1, coeffs_01);
- const __m256i res_2 = _mm256_madd_epi16(src_2, coeffs_23);
- const __m256i res_3 = _mm256_madd_epi16(src_3, coeffs_23);
- const __m256i res_4 = _mm256_madd_epi16(src_4, coeffs_45);
- const __m256i res_5 = _mm256_madd_epi16(src_5, coeffs_45);
- const __m256i res_6 = _mm256_madd_epi16(src_6, coeffs_67);
- const __m256i res_7 = _mm256_madd_epi16(src_7, coeffs_67);
-
- // Calculate scalar product for even- and odd-indices separately,
- // increasing to 32-bit precision
- const __m256i res_even_sum = _mm256_add_epi32(
- _mm256_add_epi32(res_0, res_4), _mm256_add_epi32(res_2, res_6));
- const __m256i res_odd_sum = _mm256_add_epi32(
- _mm256_add_epi32(res_1, res_5), _mm256_add_epi32(res_3, res_7));
-
- const __m256i res_even = _mm256_srai_epi32(
- _mm256_add_epi32(res_even_sum, round_const), conv_params->round_0);
- const __m256i res_odd = _mm256_srai_epi32(
- _mm256_add_epi32(res_odd_sum, round_const), conv_params->round_0);
-
- // Reduce to 16-bit precision and pack even- and odd-index results
- // back into one register. The _mm256_packs_epi32 intrinsic returns
- // a register with the pixels ordered as follows:
- // [ 15 13 11 9 14 12 10 8 ] [ 7 5 3 1 6 4 2 0 ]
- const __m256i res = _mm256_packs_epi32(res_even, res_odd);
- const __m256i res_clamped =
- _mm256_min_epi16(_mm256_max_epi16(res, clamp_low), clamp_high);
-
- // Store in a temporary array
- yy_storeu_256(temp + i * MAX_SB_SIZE + j, res_clamped);
- }
- }
- }
-
- /* Vertical filter */
- {
- // coeffs [ g7 g6 g5 g4 g3 g2 g1 g0 ]
- const __m128i coeffs_y = _mm_add_epi16(xx_loadu_128(filter_y), offset);
-
- // coeffs [ g3 g2 g3 g2 g1 g0 g1 g0 ]
- const __m128i coeffs_0123 = _mm_unpacklo_epi32(coeffs_y, coeffs_y);
- // coeffs [ g7 g6 g7 g6 g5 g4 g5 g4 ]
- const __m128i coeffs_4567 = _mm_unpackhi_epi32(coeffs_y, coeffs_y);
-
- // coeffs [ g1 g0 g1 g0 g1 g0 g1 g0 ]
- const __m128i coeffs_01_128 = _mm_unpacklo_epi64(coeffs_0123, coeffs_0123);
- // coeffs [ g3 g2 g3 g2 g3 g2 g3 g2 ]
- const __m128i coeffs_23_128 = _mm_unpackhi_epi64(coeffs_0123, coeffs_0123);
- // coeffs [ g5 g4 g5 g4 g5 g4 g5 g4 ]
- const __m128i coeffs_45_128 = _mm_unpacklo_epi64(coeffs_4567, coeffs_4567);
- // coeffs [ g7 g6 g7 g6 g7 g6 g7 g6 ]
- const __m128i coeffs_67_128 = _mm_unpackhi_epi64(coeffs_4567, coeffs_4567);
-
- // coeffs [ g1 g0 g1 g0 g1 g0 g1 g0 ][ g1 g0 g1 g0 g1 g0 g1 g0 ]
- const __m256i coeffs_01 = yy_set_m128i(coeffs_01_128, coeffs_01_128);
- // coeffs [ g3 g2 g3 g2 g3 g2 g3 g2 ][ g3 g2 g3 g2 g3 g2 g3 g2 ]
- const __m256i coeffs_23 = yy_set_m128i(coeffs_23_128, coeffs_23_128);
- // coeffs [ g5 g4 g5 g4 g5 g4 g5 g4 ][ g5 g4 g5 g4 g5 g4 g5 g4 ]
- const __m256i coeffs_45 = yy_set_m128i(coeffs_45_128, coeffs_45_128);
- // coeffs [ g7 g6 g7 g6 g7 g6 g7 g6 ][ g7 g6 g7 g6 g7 g6 g7 g6 ]
- const __m256i coeffs_67 = yy_set_m128i(coeffs_67_128, coeffs_67_128);
-
- const __m256i round_const =
- _mm256_set1_epi32((1 << (conv_params->round_1 - 1)) -
- (1 << (bd + conv_params->round_1 - 1)));
-
- for (int i = 0; i < h; ++i) {
- for (int j = 0; j < w; j += 16) {
- const uint16_t *data_ij = temp + i * MAX_SB_SIZE + j;
-
- // Load 16-bit data from the output of the horizontal filter in
- // which the pixels are ordered as follows:
- // [ 15 13 11 9 14 12 10 8 ] [ 7 5 3 1 6 4 2 0 ]
- const __m256i data_0 = yy_loadu_256(data_ij + 0 * MAX_SB_SIZE);
- const __m256i data_1 = yy_loadu_256(data_ij + 1 * MAX_SB_SIZE);
- const __m256i data_2 = yy_loadu_256(data_ij + 2 * MAX_SB_SIZE);
- const __m256i data_3 = yy_loadu_256(data_ij + 3 * MAX_SB_SIZE);
- const __m256i data_4 = yy_loadu_256(data_ij + 4 * MAX_SB_SIZE);
- const __m256i data_5 = yy_loadu_256(data_ij + 5 * MAX_SB_SIZE);
- const __m256i data_6 = yy_loadu_256(data_ij + 6 * MAX_SB_SIZE);
- const __m256i data_7 = yy_loadu_256(data_ij + 7 * MAX_SB_SIZE);
-
- // Filter the even-indices, increasing to 32-bit precision
- const __m256i src_0 = _mm256_unpacklo_epi16(data_0, data_1);
- const __m256i src_2 = _mm256_unpacklo_epi16(data_2, data_3);
- const __m256i src_4 = _mm256_unpacklo_epi16(data_4, data_5);
- const __m256i src_6 = _mm256_unpacklo_epi16(data_6, data_7);
-
- const __m256i res_0 = _mm256_madd_epi16(src_0, coeffs_01);
- const __m256i res_2 = _mm256_madd_epi16(src_2, coeffs_23);
- const __m256i res_4 = _mm256_madd_epi16(src_4, coeffs_45);
- const __m256i res_6 = _mm256_madd_epi16(src_6, coeffs_67);
-
- const __m256i res_even = _mm256_add_epi32(
- _mm256_add_epi32(res_0, res_2), _mm256_add_epi32(res_4, res_6));
-
- // Filter the odd-indices, increasing to 32-bit precision
- const __m256i src_1 = _mm256_unpackhi_epi16(data_0, data_1);
- const __m256i src_3 = _mm256_unpackhi_epi16(data_2, data_3);
- const __m256i src_5 = _mm256_unpackhi_epi16(data_4, data_5);
- const __m256i src_7 = _mm256_unpackhi_epi16(data_6, data_7);
-
- const __m256i res_1 = _mm256_madd_epi16(src_1, coeffs_01);
- const __m256i res_3 = _mm256_madd_epi16(src_3, coeffs_23);
- const __m256i res_5 = _mm256_madd_epi16(src_5, coeffs_45);
- const __m256i res_7 = _mm256_madd_epi16(src_7, coeffs_67);
-
- const __m256i res_odd = _mm256_add_epi32(
- _mm256_add_epi32(res_1, res_3), _mm256_add_epi32(res_5, res_7));
-
- // Pixels are currently in the following order:
- // res_even order: [ 14 12 10 8 ] [ 6 4 2 0 ]
- // res_odd order: [ 15 13 11 9 ] [ 7 5 3 1 ]
- //
- // Rearrange the pixels into the following order:
- // res_lo order: [ 11 10 9 8 ] [ 3 2 1 0 ]
- // res_hi order: [ 15 14 13 12 ] [ 7 6 5 4 ]
- const __m256i res_lo = _mm256_unpacklo_epi32(res_even, res_odd);
- const __m256i res_hi = _mm256_unpackhi_epi32(res_even, res_odd);
-
- const __m256i res_lo_round = _mm256_srai_epi32(
- _mm256_add_epi32(res_lo, round_const), conv_params->round_1);
- const __m256i res_hi_round = _mm256_srai_epi32(
- _mm256_add_epi32(res_hi, round_const), conv_params->round_1);
-
- // Reduce to 16-bit precision and pack into the correct order:
- // [ 15 14 13 12 11 10 9 8 ][ 7 6 5 4 3 2 1 0 ]
- const __m256i res_16bit =
- _mm256_packs_epi32(res_lo_round, res_hi_round);
-
- // Reduce to 8-bit precision. This messes up the order:
- // [ - - - - - - - - 15 14 13 12 11 10 9 8 ]
- // [ - - - - - - - - 7 6 5 4 3 2 1 0 ]
- const __m256i res_8bit =
- _mm256_packus_epi16(res_16bit, zero_256 /* don't care value */);
-
- // Swap the two central 32-bit values to get the order:
- // [ - - - - - - - - - - - - - - - - ]
- // [ 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ]
- const __m256i res_8bit2 = _mm256_permute4x64_epi64(res_8bit, 0xd8);
-
- // Store the lower 128-bit lane in the dst array
- xx_storeu_128(dst + i * dst_stride + j,
- _mm256_castsi256_si128(res_8bit2));
- }
- }
- }
-}
diff --git a/third_party/aom/av1/common/x86/wiener_convolve_sse2.c b/third_party/aom/av1/common/x86/wiener_convolve_sse2.c
deleted file mode 100644
index f9d00b733..000000000
--- a/third_party/aom/av1/common/x86/wiener_convolve_sse2.c
+++ /dev/null
@@ -1,199 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <emmintrin.h>
-#include <assert.h>
-
-#include "config/av1_rtcd.h"
-
-#include "av1/common/convolve.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/aom_filter.h"
-
-void av1_wiener_convolve_add_src_sse2(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h,
- const ConvolveParams *conv_params) {
- const int bd = 8;
- assert(x_step_q4 == 16 && y_step_q4 == 16);
- assert(!(w & 7));
- (void)x_step_q4;
- (void)y_step_q4;
-
- DECLARE_ALIGNED(16, uint16_t,
- temp[(MAX_SB_SIZE + SUBPEL_TAPS - 1) * MAX_SB_SIZE]);
- int intermediate_height = h + SUBPEL_TAPS - 2;
- memset(temp + (intermediate_height * MAX_SB_SIZE), 0, MAX_SB_SIZE);
- int i, j;
- const int center_tap = ((SUBPEL_TAPS - 1) / 2);
- const uint8_t *const src_ptr = src - center_tap * src_stride - center_tap;
-
- const __m128i zero = _mm_setzero_si128();
- // Add an offset to account for the "add_src" part of the convolve function.
- const __m128i offset = _mm_insert_epi16(zero, 1 << FILTER_BITS, 3);
-
- /* Horizontal filter */
- {
- const __m128i coeffs_x =
- _mm_add_epi16(_mm_loadu_si128((__m128i *)filter_x), offset);
-
- // coeffs 0 1 0 1 2 3 2 3
- const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_x, coeffs_x);
- // coeffs 4 5 4 5 6 7 6 7
- const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_x, coeffs_x);
-
- // coeffs 0 1 0 1 0 1 0 1
- const __m128i coeff_01 = _mm_unpacklo_epi64(tmp_0, tmp_0);
- // coeffs 2 3 2 3 2 3 2 3
- const __m128i coeff_23 = _mm_unpackhi_epi64(tmp_0, tmp_0);
- // coeffs 4 5 4 5 4 5 4 5
- const __m128i coeff_45 = _mm_unpacklo_epi64(tmp_1, tmp_1);
- // coeffs 6 7 6 7 6 7 6 7
- const __m128i coeff_67 = _mm_unpackhi_epi64(tmp_1, tmp_1);
-
- const __m128i round_const = _mm_set1_epi32(
- (1 << (conv_params->round_0 - 1)) + (1 << (bd + FILTER_BITS - 1)));
-
- for (i = 0; i < intermediate_height; ++i) {
- for (j = 0; j < w; j += 8) {
- const __m128i data =
- _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j]);
-
- // Filter even-index pixels
- const __m128i src_0 = _mm_unpacklo_epi8(data, zero);
- const __m128i res_0 = _mm_madd_epi16(src_0, coeff_01);
- const __m128i src_2 = _mm_unpacklo_epi8(_mm_srli_si128(data, 2), zero);
- const __m128i res_2 = _mm_madd_epi16(src_2, coeff_23);
- const __m128i src_4 = _mm_unpacklo_epi8(_mm_srli_si128(data, 4), zero);
- const __m128i res_4 = _mm_madd_epi16(src_4, coeff_45);
- const __m128i src_6 = _mm_unpacklo_epi8(_mm_srli_si128(data, 6), zero);
- const __m128i res_6 = _mm_madd_epi16(src_6, coeff_67);
-
- __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_4),
- _mm_add_epi32(res_2, res_6));
- res_even = _mm_srai_epi32(_mm_add_epi32(res_even, round_const),
- conv_params->round_0);
-
- // Filter odd-index pixels
- const __m128i src_1 = _mm_unpacklo_epi8(_mm_srli_si128(data, 1), zero);
- const __m128i res_1 = _mm_madd_epi16(src_1, coeff_01);
- const __m128i src_3 = _mm_unpacklo_epi8(_mm_srli_si128(data, 3), zero);
- const __m128i res_3 = _mm_madd_epi16(src_3, coeff_23);
- const __m128i src_5 = _mm_unpacklo_epi8(_mm_srli_si128(data, 5), zero);
- const __m128i res_5 = _mm_madd_epi16(src_5, coeff_45);
- const __m128i src_7 = _mm_unpacklo_epi8(_mm_srli_si128(data, 7), zero);
- const __m128i res_7 = _mm_madd_epi16(src_7, coeff_67);
-
- __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_5),
- _mm_add_epi32(res_3, res_7));
- res_odd = _mm_srai_epi32(_mm_add_epi32(res_odd, round_const),
- conv_params->round_0);
-
- // Pack in the column order 0, 2, 4, 6, 1, 3, 5, 7
- __m128i res = _mm_packs_epi32(res_even, res_odd);
- res = _mm_min_epi16(
- _mm_max_epi16(res, zero),
- _mm_set1_epi16(WIENER_CLAMP_LIMIT(conv_params->round_0, bd) - 1));
- _mm_storeu_si128((__m128i *)&temp[i * MAX_SB_SIZE + j], res);
- }
- }
- }
-
- /* Vertical filter */
- {
- const __m128i coeffs_y =
- _mm_add_epi16(_mm_loadu_si128((__m128i *)filter_y), offset);
-
- // coeffs 0 1 0 1 2 3 2 3
- const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_y, coeffs_y);
- // coeffs 4 5 4 5 6 7 6 7
- const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_y, coeffs_y);
-
- // coeffs 0 1 0 1 0 1 0 1
- const __m128i coeff_01 = _mm_unpacklo_epi64(tmp_0, tmp_0);
- // coeffs 2 3 2 3 2 3 2 3
- const __m128i coeff_23 = _mm_unpackhi_epi64(tmp_0, tmp_0);
- // coeffs 4 5 4 5 4 5 4 5
- const __m128i coeff_45 = _mm_unpacklo_epi64(tmp_1, tmp_1);
- // coeffs 6 7 6 7 6 7 6 7
- const __m128i coeff_67 = _mm_unpackhi_epi64(tmp_1, tmp_1);
-
- const __m128i round_const =
- _mm_set1_epi32((1 << (conv_params->round_1 - 1)) -
- (1 << (bd + conv_params->round_1 - 1)));
-
- for (i = 0; i < h; ++i) {
- for (j = 0; j < w; j += 8) {
- // Filter even-index pixels
- const uint16_t *data = &temp[i * MAX_SB_SIZE + j];
- const __m128i src_0 =
- _mm_unpacklo_epi16(*(__m128i *)(data + 0 * MAX_SB_SIZE),
- *(__m128i *)(data + 1 * MAX_SB_SIZE));
- const __m128i src_2 =
- _mm_unpacklo_epi16(*(__m128i *)(data + 2 * MAX_SB_SIZE),
- *(__m128i *)(data + 3 * MAX_SB_SIZE));
- const __m128i src_4 =
- _mm_unpacklo_epi16(*(__m128i *)(data + 4 * MAX_SB_SIZE),
- *(__m128i *)(data + 5 * MAX_SB_SIZE));
- const __m128i src_6 =
- _mm_unpacklo_epi16(*(__m128i *)(data + 6 * MAX_SB_SIZE),
- *(__m128i *)(data + 7 * MAX_SB_SIZE));
-
- const __m128i res_0 = _mm_madd_epi16(src_0, coeff_01);
- const __m128i res_2 = _mm_madd_epi16(src_2, coeff_23);
- const __m128i res_4 = _mm_madd_epi16(src_4, coeff_45);
- const __m128i res_6 = _mm_madd_epi16(src_6, coeff_67);
-
- const __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_2),
- _mm_add_epi32(res_4, res_6));
-
- // Filter odd-index pixels
- const __m128i src_1 =
- _mm_unpackhi_epi16(*(__m128i *)(data + 0 * MAX_SB_SIZE),
- *(__m128i *)(data + 1 * MAX_SB_SIZE));
- const __m128i src_3 =
- _mm_unpackhi_epi16(*(__m128i *)(data + 2 * MAX_SB_SIZE),
- *(__m128i *)(data + 3 * MAX_SB_SIZE));
- const __m128i src_5 =
- _mm_unpackhi_epi16(*(__m128i *)(data + 4 * MAX_SB_SIZE),
- *(__m128i *)(data + 5 * MAX_SB_SIZE));
- const __m128i src_7 =
- _mm_unpackhi_epi16(*(__m128i *)(data + 6 * MAX_SB_SIZE),
- *(__m128i *)(data + 7 * MAX_SB_SIZE));
-
- const __m128i res_1 = _mm_madd_epi16(src_1, coeff_01);
- const __m128i res_3 = _mm_madd_epi16(src_3, coeff_23);
- const __m128i res_5 = _mm_madd_epi16(src_5, coeff_45);
- const __m128i res_7 = _mm_madd_epi16(src_7, coeff_67);
-
- const __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_3),
- _mm_add_epi32(res_5, res_7));
-
- // Rearrange pixels back into the order 0 ... 7
- const __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd);
- const __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd);
-
- const __m128i res_lo_round = _mm_srai_epi32(
- _mm_add_epi32(res_lo, round_const), conv_params->round_1);
- const __m128i res_hi_round = _mm_srai_epi32(
- _mm_add_epi32(res_hi, round_const), conv_params->round_1);
-
- const __m128i res_16bit = _mm_packs_epi32(res_lo_round, res_hi_round);
- __m128i res_8bit = _mm_packus_epi16(res_16bit, res_16bit);
-
- __m128i *const p = (__m128i *)&dst[i * dst_stride + j];
- _mm_storel_epi64(p, res_8bit);
- }
- }
- }
-}
diff --git a/third_party/aom/av1/decoder/accounting.c b/third_party/aom/av1/decoder/accounting.c
deleted file mode 100644
index 8d8f3dfdb..000000000
--- a/third_party/aom/av1/decoder/accounting.c
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "aom/aom_integer.h"
-#include "av1/decoder/accounting.h"
-
-static int aom_accounting_hash(const char *str) {
- uint32_t val;
- const unsigned char *ustr;
- val = 0;
- ustr = (const unsigned char *)str;
- /* This is about the worst hash one can design, but it should be good enough
- here. */
- while (*ustr) val += *ustr++;
- return val % AOM_ACCOUNTING_HASH_SIZE;
-}
-
-/* Dictionary lookup based on an open-addressing hash table. */
-int aom_accounting_dictionary_lookup(Accounting *accounting, const char *str) {
- int hash;
- size_t len;
- AccountingDictionary *dictionary;
- dictionary = &accounting->syms.dictionary;
- hash = aom_accounting_hash(str);
- while (accounting->hash_dictionary[hash] != -1) {
- if (strcmp(dictionary->strs[accounting->hash_dictionary[hash]], str) == 0) {
- return accounting->hash_dictionary[hash];
- }
- hash++;
- if (hash == AOM_ACCOUNTING_HASH_SIZE) hash = 0;
- }
- /* No match found. */
- assert(dictionary->num_strs + 1 < MAX_SYMBOL_TYPES);
- accounting->hash_dictionary[hash] = dictionary->num_strs;
- len = strlen(str);
- dictionary->strs[dictionary->num_strs] = malloc(len + 1);
- snprintf(dictionary->strs[dictionary->num_strs], len + 1, "%s", str);
- dictionary->num_strs++;
- return dictionary->num_strs - 1;
-}
-
-void aom_accounting_init(Accounting *accounting) {
- int i;
- accounting->num_syms_allocated = 1000;
- accounting->syms.syms =
- malloc(sizeof(AccountingSymbol) * accounting->num_syms_allocated);
- accounting->syms.dictionary.num_strs = 0;
- assert(AOM_ACCOUNTING_HASH_SIZE > 2 * MAX_SYMBOL_TYPES);
- for (i = 0; i < AOM_ACCOUNTING_HASH_SIZE; i++)
- accounting->hash_dictionary[i] = -1;
- aom_accounting_reset(accounting);
-}
-
-void aom_accounting_reset(Accounting *accounting) {
- accounting->syms.num_syms = 0;
- accounting->syms.num_binary_syms = 0;
- accounting->syms.num_multi_syms = 0;
- accounting->context.x = -1;
- accounting->context.y = -1;
- accounting->last_tell_frac = 0;
-}
-
-void aom_accounting_clear(Accounting *accounting) {
- int i;
- AccountingDictionary *dictionary;
- free(accounting->syms.syms);
- dictionary = &accounting->syms.dictionary;
- for (i = 0; i < dictionary->num_strs; i++) {
- free(dictionary->strs[i]);
- }
-}
-
-void aom_accounting_set_context(Accounting *accounting, int16_t x, int16_t y) {
- accounting->context.x = x;
- accounting->context.y = y;
-}
-
-void aom_accounting_record(Accounting *accounting, const char *str,
- uint32_t bits) {
- AccountingSymbol sym;
- // Reuse previous symbol if it has the same context and symbol id.
- if (accounting->syms.num_syms) {
- AccountingSymbol *last_sym;
- last_sym = &accounting->syms.syms[accounting->syms.num_syms - 1];
- if (memcmp(&last_sym->context, &accounting->context,
- sizeof(AccountingSymbolContext)) == 0) {
- uint32_t id;
- id = aom_accounting_dictionary_lookup(accounting, str);
- if (id == last_sym->id) {
- last_sym->bits += bits;
- last_sym->samples++;
- return;
- }
- }
- }
- sym.context = accounting->context;
- sym.samples = 1;
- sym.bits = bits;
- sym.id = aom_accounting_dictionary_lookup(accounting, str);
- assert(sym.id <= 255);
- if (accounting->syms.num_syms == accounting->num_syms_allocated) {
- accounting->num_syms_allocated *= 2;
- accounting->syms.syms =
- realloc(accounting->syms.syms,
- sizeof(AccountingSymbol) * accounting->num_syms_allocated);
- assert(accounting->syms.syms != NULL);
- }
- accounting->syms.syms[accounting->syms.num_syms++] = sym;
-}
-
-void aom_accounting_dump(Accounting *accounting) {
- int i;
- AccountingSymbol *sym;
- printf("\n----- Number of recorded syntax elements = %d -----\n",
- accounting->syms.num_syms);
- printf("----- Total number of symbol calls = %d (%d binary) -----\n",
- accounting->syms.num_multi_syms + accounting->syms.num_binary_syms,
- accounting->syms.num_binary_syms);
- for (i = 0; i < accounting->syms.num_syms; i++) {
- sym = &accounting->syms.syms[i];
- printf("%s x: %d, y: %d bits: %f samples: %d\n",
- accounting->syms.dictionary.strs[sym->id], sym->context.x,
- sym->context.y, (float)sym->bits / 8.0, sym->samples);
- }
-}
diff --git a/third_party/aom/av1/decoder/accounting.h b/third_party/aom/av1/decoder/accounting.h
deleted file mode 100644
index 288e5e63e..000000000
--- a/third_party/aom/av1/decoder/accounting.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#ifndef AOM_AV1_DECODER_ACCOUNTING_H_
-#define AOM_AV1_DECODER_ACCOUNTING_H_
-#include <stdlib.h>
-#include "aom/aomdx.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif // __cplusplus
-
-#define AOM_ACCOUNTING_HASH_SIZE (1021)
-
-/* Max number of entries for symbol types in the dictionary (increase as
- necessary). */
-#define MAX_SYMBOL_TYPES (256)
-
-/*The resolution of fractional-precision bit usage measurements, i.e.,
- 3 => 1/8th bits.*/
-#define AOM_ACCT_BITRES (3)
-
-typedef struct {
- int16_t x;
- int16_t y;
-} AccountingSymbolContext;
-
-typedef struct {
- AccountingSymbolContext context;
- uint32_t id;
- /** Number of bits in units of 1/8 bit. */
- uint32_t bits;
- uint32_t samples;
-} AccountingSymbol;
-
-/** Dictionary for translating strings into id. */
-typedef struct {
- char *(strs[MAX_SYMBOL_TYPES]);
- int num_strs;
-} AccountingDictionary;
-
-typedef struct {
- /** All recorded symbols decoded. */
- AccountingSymbol *syms;
- /** Number of syntax actually recorded. */
- int num_syms;
- /** Raw symbol decoding calls for non-binary values. */
- int num_multi_syms;
- /** Raw binary symbol decoding calls. */
- int num_binary_syms;
- /** Dictionary for translating strings into id. */
- AccountingDictionary dictionary;
-} AccountingSymbols;
-
-struct Accounting {
- AccountingSymbols syms;
- /** Size allocated for symbols (not all may be used). */
- int num_syms_allocated;
- int16_t hash_dictionary[AOM_ACCOUNTING_HASH_SIZE];
- AccountingSymbolContext context;
- uint32_t last_tell_frac;
-};
-
-void aom_accounting_init(Accounting *accounting);
-void aom_accounting_reset(Accounting *accounting);
-void aom_accounting_clear(Accounting *accounting);
-void aom_accounting_set_context(Accounting *accounting, int16_t x, int16_t y);
-int aom_accounting_dictionary_lookup(Accounting *accounting, const char *str);
-void aom_accounting_record(Accounting *accounting, const char *str,
- uint32_t bits);
-void aom_accounting_dump(Accounting *accounting);
-#ifdef __cplusplus
-} // extern "C"
-#endif // __cplusplus
-#endif // AOM_AV1_DECODER_ACCOUNTING_H_
diff --git a/third_party/aom/av1/decoder/decodeframe.c b/third_party/aom/av1/decoder/decodeframe.c
deleted file mode 100644
index 31f14b531..000000000
--- a/third_party/aom/av1/decoder/decodeframe.c
+++ /dev/null
@@ -1,5567 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <stddef.h>
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-#include "config/aom_scale_rtcd.h"
-#include "config/av1_rtcd.h"
-
-#include "aom/aom_codec.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/binary_codes_reader.h"
-#include "aom_dsp/bitreader.h"
-#include "aom_dsp/bitreader_buffer.h"
-#include "aom_mem/aom_mem.h"
-#include "aom_ports/aom_timer.h"
-#include "aom_ports/mem.h"
-#include "aom_ports/mem_ops.h"
-#include "aom_scale/aom_scale.h"
-#include "aom_util/aom_thread.h"
-
-#if CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG
-#include "aom_util/debug_util.h"
-#endif // CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG
-
-#include "av1/common/alloccommon.h"
-#include "av1/common/cdef.h"
-#include "av1/common/cfl.h"
-#if CONFIG_INSPECTION
-#include "av1/decoder/inspection.h"
-#endif
-#include "av1/common/common.h"
-#include "av1/common/entropy.h"
-#include "av1/common/entropymode.h"
-#include "av1/common/entropymv.h"
-#include "av1/common/frame_buffers.h"
-#include "av1/common/idct.h"
-#include "av1/common/mvref_common.h"
-#include "av1/common/pred_common.h"
-#include "av1/common/quant_common.h"
-#include "av1/common/reconinter.h"
-#include "av1/common/reconintra.h"
-#include "av1/common/resize.h"
-#include "av1/common/seg_common.h"
-#include "av1/common/thread_common.h"
-#include "av1/common/tile_common.h"
-#include "av1/common/warped_motion.h"
-#include "av1/common/obmc.h"
-#include "av1/decoder/decodeframe.h"
-#include "av1/decoder/decodemv.h"
-#include "av1/decoder/decoder.h"
-#include "av1/decoder/decodetxb.h"
-#include "av1/decoder/detokenize.h"
-
-#define ACCT_STR __func__
-
-// This is needed by ext_tile related unit tests.
-#define EXT_TILE_DEBUG 1
-#define MC_TEMP_BUF_PELS \
- (((MAX_SB_SIZE)*2 + (AOM_INTERP_EXTEND)*2) * \
- ((MAX_SB_SIZE)*2 + (AOM_INTERP_EXTEND)*2))
-
-// Checks that the remaining bits start with a 1 and ends with 0s.
-// It consumes an additional byte, if already byte aligned before the check.
-int av1_check_trailing_bits(AV1Decoder *pbi, struct aom_read_bit_buffer *rb) {
- AV1_COMMON *const cm = &pbi->common;
- // bit_offset is set to 0 (mod 8) when the reader is already byte aligned
- int bits_before_alignment = 8 - rb->bit_offset % 8;
- int trailing = aom_rb_read_literal(rb, bits_before_alignment);
- if (trailing != (1 << (bits_before_alignment - 1))) {
- cm->error.error_code = AOM_CODEC_CORRUPT_FRAME;
- return -1;
- }
- return 0;
-}
-
-// Use only_chroma = 1 to only set the chroma planes
-static void set_planes_to_neutral_grey(const SequenceHeader *const seq_params,
- const YV12_BUFFER_CONFIG *const buf,
- int only_chroma) {
- if (seq_params->use_highbitdepth) {
- const int val = 1 << (seq_params->bit_depth - 1);
- for (int plane = only_chroma; plane < MAX_MB_PLANE; plane++) {
- const int is_uv = plane > 0;
- uint16_t *const base = CONVERT_TO_SHORTPTR(buf->buffers[plane]);
- // Set the first row to neutral grey. Then copy the first row to all
- // subsequent rows.
- if (buf->crop_heights[is_uv] > 0) {
- aom_memset16(base, val, buf->crop_widths[is_uv]);
- for (int row_idx = 1; row_idx < buf->crop_heights[is_uv]; row_idx++) {
- memcpy(&base[row_idx * buf->strides[is_uv]], base,
- sizeof(*base) * buf->crop_widths[is_uv]);
- }
- }
- }
- } else {
- for (int plane = only_chroma; plane < MAX_MB_PLANE; plane++) {
- const int is_uv = plane > 0;
- for (int row_idx = 0; row_idx < buf->crop_heights[is_uv]; row_idx++) {
- memset(&buf->buffers[plane][row_idx * buf->uv_stride], 1 << 7,
- buf->crop_widths[is_uv]);
- }
- }
- }
-}
-
-static void loop_restoration_read_sb_coeffs(const AV1_COMMON *const cm,
- MACROBLOCKD *xd,
- aom_reader *const r, int plane,
- int runit_idx);
-
-static void setup_compound_reference_mode(AV1_COMMON *cm) {
- cm->comp_fwd_ref[0] = LAST_FRAME;
- cm->comp_fwd_ref[1] = LAST2_FRAME;
- cm->comp_fwd_ref[2] = LAST3_FRAME;
- cm->comp_fwd_ref[3] = GOLDEN_FRAME;
-
- cm->comp_bwd_ref[0] = BWDREF_FRAME;
- cm->comp_bwd_ref[1] = ALTREF2_FRAME;
- cm->comp_bwd_ref[2] = ALTREF_FRAME;
-}
-
-static int read_is_valid(const uint8_t *start, size_t len, const uint8_t *end) {
- return len != 0 && len <= (size_t)(end - start);
-}
-
-static TX_MODE read_tx_mode(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) {
- if (cm->coded_lossless) return ONLY_4X4;
- return aom_rb_read_bit(rb) ? TX_MODE_SELECT : TX_MODE_LARGEST;
-}
-
-static REFERENCE_MODE read_frame_reference_mode(
- const AV1_COMMON *cm, struct aom_read_bit_buffer *rb) {
- if (frame_is_intra_only(cm)) {
- return SINGLE_REFERENCE;
- } else {
- return aom_rb_read_bit(rb) ? REFERENCE_MODE_SELECT : SINGLE_REFERENCE;
- }
-}
-
-static void inverse_transform_block(MACROBLOCKD *xd, int plane,
- const TX_TYPE tx_type,
- const TX_SIZE tx_size, uint8_t *dst,
- int stride, int reduced_tx_set) {
- struct macroblockd_plane *const pd = &xd->plane[plane];
- tran_low_t *const dqcoeff = pd->dqcoeff;
- eob_info *eob_data = pd->eob_data + xd->txb_offset[plane];
- uint16_t scan_line = eob_data->max_scan_line;
- uint16_t eob = eob_data->eob;
-
- memcpy(dqcoeff, pd->dqcoeff_block + xd->cb_offset[plane],
- (scan_line + 1) * sizeof(dqcoeff[0]));
- av1_inverse_transform_block(xd, dqcoeff, plane, tx_type, tx_size, dst, stride,
- eob, reduced_tx_set);
- memset(dqcoeff, 0, (scan_line + 1) * sizeof(dqcoeff[0]));
-}
-
-static void read_coeffs_tx_intra_block(const AV1_COMMON *const cm,
- MACROBLOCKD *const xd,
- aom_reader *const r, const int plane,
- const int row, const int col,
- const TX_SIZE tx_size) {
- MB_MODE_INFO *mbmi = xd->mi[0];
- if (!mbmi->skip) {
-#if TXCOEFF_TIMER
- struct aom_usec_timer timer;
- aom_usec_timer_start(&timer);
-#endif
- av1_read_coeffs_txb_facade(cm, xd, r, plane, row, col, tx_size);
-#if TXCOEFF_TIMER
- aom_usec_timer_mark(&timer);
- const int64_t elapsed_time = aom_usec_timer_elapsed(&timer);
- cm->txcoeff_timer += elapsed_time;
- ++cm->txb_count;
-#endif
- }
-}
-
-static void decode_block_void(const AV1_COMMON *const cm, MACROBLOCKD *const xd,
- aom_reader *const r, const int plane,
- const int row, const int col,
- const TX_SIZE tx_size) {
- (void)cm;
- (void)xd;
- (void)r;
- (void)plane;
- (void)row;
- (void)col;
- (void)tx_size;
-}
-
-static void predict_inter_block_void(AV1_COMMON *const cm,
- MACROBLOCKD *const xd, int mi_row,
- int mi_col, BLOCK_SIZE bsize) {
- (void)cm;
- (void)xd;
- (void)mi_row;
- (void)mi_col;
- (void)bsize;
-}
-
-static void cfl_store_inter_block_void(AV1_COMMON *const cm,
- MACROBLOCKD *const xd) {
- (void)cm;
- (void)xd;
-}
-
-static void predict_and_reconstruct_intra_block(
- const AV1_COMMON *const cm, MACROBLOCKD *const xd, aom_reader *const r,
- const int plane, const int row, const int col, const TX_SIZE tx_size) {
- (void)r;
- MB_MODE_INFO *mbmi = xd->mi[0];
- PLANE_TYPE plane_type = get_plane_type(plane);
-
- av1_predict_intra_block_facade(cm, xd, plane, col, row, tx_size);
-
- if (!mbmi->skip) {
- struct macroblockd_plane *const pd = &xd->plane[plane];
-
- // tx_type will be read out in av1_read_coeffs_txb_facade
- const TX_TYPE tx_type = av1_get_tx_type(plane_type, xd, row, col, tx_size,
- cm->reduced_tx_set_used);
- eob_info *eob_data = pd->eob_data + xd->txb_offset[plane];
- if (eob_data->eob) {
- uint8_t *dst =
- &pd->dst.buf[(row * pd->dst.stride + col) << tx_size_wide_log2[0]];
- inverse_transform_block(xd, plane, tx_type, tx_size, dst, pd->dst.stride,
- cm->reduced_tx_set_used);
- }
- }
- if (plane == AOM_PLANE_Y && store_cfl_required(cm, xd)) {
- cfl_store_tx(xd, row, col, tx_size, mbmi->sb_type);
- }
-}
-
-static void inverse_transform_inter_block(const AV1_COMMON *const cm,
- MACROBLOCKD *const xd,
- aom_reader *const r, const int plane,
- const int blk_row, const int blk_col,
- const TX_SIZE tx_size) {
- (void)r;
- PLANE_TYPE plane_type = get_plane_type(plane);
- const struct macroblockd_plane *const pd = &xd->plane[plane];
-
- // tx_type will be read out in av1_read_coeffs_txb_facade
- const TX_TYPE tx_type = av1_get_tx_type(plane_type, xd, blk_row, blk_col,
- tx_size, cm->reduced_tx_set_used);
-
- uint8_t *dst =
- &pd->dst
- .buf[(blk_row * pd->dst.stride + blk_col) << tx_size_wide_log2[0]];
- inverse_transform_block(xd, plane, tx_type, tx_size, dst, pd->dst.stride,
- cm->reduced_tx_set_used);
-#if CONFIG_MISMATCH_DEBUG
- int pixel_c, pixel_r;
- BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
- int blk_w = block_size_wide[bsize];
- int blk_h = block_size_high[bsize];
- mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, blk_col, blk_row,
- pd->subsampling_x, pd->subsampling_y);
- mismatch_check_block_tx(dst, pd->dst.stride, cm->frame_offset, plane, pixel_c,
- pixel_r, blk_w, blk_h,
- xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH);
-#endif
-}
-
-static void set_cb_buffer_offsets(MACROBLOCKD *const xd, TX_SIZE tx_size,
- int plane) {
- xd->cb_offset[plane] += tx_size_wide[tx_size] * tx_size_high[tx_size];
- xd->txb_offset[plane] =
- xd->cb_offset[plane] / (TX_SIZE_W_MIN * TX_SIZE_H_MIN);
-}
-
-static void decode_reconstruct_tx(AV1_COMMON *cm, ThreadData *const td,
- aom_reader *r, MB_MODE_INFO *const mbmi,
- int plane, BLOCK_SIZE plane_bsize,
- int blk_row, int blk_col, int block,
- TX_SIZE tx_size, int *eob_total) {
- MACROBLOCKD *const xd = &td->xd;
- const struct macroblockd_plane *const pd = &xd->plane[plane];
- const TX_SIZE plane_tx_size =
- plane ? av1_get_max_uv_txsize(mbmi->sb_type, pd->subsampling_x,
- pd->subsampling_y)
- : mbmi->inter_tx_size[av1_get_txb_size_index(plane_bsize, blk_row,
- blk_col)];
- // Scale to match transform block unit.
- const int max_blocks_high = max_block_high(xd, plane_bsize, plane);
- const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
-
- if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
-
- if (tx_size == plane_tx_size || plane) {
- td->read_coeffs_tx_inter_block_visit(cm, xd, r, plane, blk_row, blk_col,
- tx_size);
-
- td->inverse_tx_inter_block_visit(cm, xd, r, plane, blk_row, blk_col,
- tx_size);
- eob_info *eob_data = pd->eob_data + xd->txb_offset[plane];
- *eob_total += eob_data->eob;
- set_cb_buffer_offsets(xd, tx_size, plane);
- } else {
- const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
- assert(IMPLIES(tx_size <= TX_4X4, sub_txs == tx_size));
- assert(IMPLIES(tx_size > TX_4X4, sub_txs < tx_size));
- const int bsw = tx_size_wide_unit[sub_txs];
- const int bsh = tx_size_high_unit[sub_txs];
- const int sub_step = bsw * bsh;
-
- assert(bsw > 0 && bsh > 0);
-
- for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh) {
- for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) {
- const int offsetr = blk_row + row;
- const int offsetc = blk_col + col;
-
- if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
-
- decode_reconstruct_tx(cm, td, r, mbmi, plane, plane_bsize, offsetr,
- offsetc, block, sub_txs, eob_total);
- block += sub_step;
- }
- }
- }
-}
-
-static void set_offsets(AV1_COMMON *const cm, MACROBLOCKD *const xd,
- BLOCK_SIZE bsize, int mi_row, int mi_col, int bw,
- int bh, int x_mis, int y_mis) {
- const int num_planes = av1_num_planes(cm);
-
- const int offset = mi_row * cm->mi_stride + mi_col;
- const TileInfo *const tile = &xd->tile;
-
- xd->mi = cm->mi_grid_visible + offset;
- xd->mi[0] = &cm->mi[offset];
- // TODO(slavarnway): Generate sb_type based on bwl and bhl, instead of
- // passing bsize from decode_partition().
- xd->mi[0]->sb_type = bsize;
-#if CONFIG_RD_DEBUG
- xd->mi[0]->mi_row = mi_row;
- xd->mi[0]->mi_col = mi_col;
-#endif
- xd->cfl.mi_row = mi_row;
- xd->cfl.mi_col = mi_col;
-
- assert(x_mis && y_mis);
- for (int x = 1; x < x_mis; ++x) xd->mi[x] = xd->mi[0];
- int idx = cm->mi_stride;
- for (int y = 1; y < y_mis; ++y) {
- memcpy(&xd->mi[idx], &xd->mi[0], x_mis * sizeof(xd->mi[0]));
- idx += cm->mi_stride;
- }
-
- set_plane_n4(xd, bw, bh, num_planes);
- set_skip_context(xd, mi_row, mi_col, num_planes);
-
- // Distance of Mb to the various image edges. These are specified to 8th pel
- // as they are always compared to values that are in 1/8th pel units
- set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols);
-
- av1_setup_dst_planes(xd->plane, bsize, get_frame_new_buffer(cm), mi_row,
- mi_col, 0, num_planes);
-}
-
-static void decode_mbmi_block(AV1Decoder *const pbi, MACROBLOCKD *const xd,
- int mi_row, int mi_col, aom_reader *r,
- PARTITION_TYPE partition, BLOCK_SIZE bsize) {
- AV1_COMMON *const cm = &pbi->common;
- const SequenceHeader *const seq_params = &cm->seq_params;
- const int bw = mi_size_wide[bsize];
- const int bh = mi_size_high[bsize];
- const int x_mis = AOMMIN(bw, cm->mi_cols - mi_col);
- const int y_mis = AOMMIN(bh, cm->mi_rows - mi_row);
-
-#if CONFIG_ACCOUNTING
- aom_accounting_set_context(&pbi->accounting, mi_col, mi_row);
-#endif
- set_offsets(cm, xd, bsize, mi_row, mi_col, bw, bh, x_mis, y_mis);
- xd->mi[0]->partition = partition;
- av1_read_mode_info(pbi, xd, mi_row, mi_col, r, x_mis, y_mis);
- if (bsize >= BLOCK_8X8 &&
- (seq_params->subsampling_x || seq_params->subsampling_y)) {
- const BLOCK_SIZE uv_subsize =
- ss_size_lookup[bsize][seq_params->subsampling_x]
- [seq_params->subsampling_y];
- if (uv_subsize == BLOCK_INVALID)
- aom_internal_error(xd->error_info, AOM_CODEC_CORRUPT_FRAME,
- "Invalid block size.");
- }
-
- int reader_corrupted_flag = aom_reader_has_error(r);
- aom_merge_corrupted_flag(&xd->corrupted, reader_corrupted_flag);
-}
-
-typedef struct PadBlock {
- int x0;
- int x1;
- int y0;
- int y1;
-} PadBlock;
-
-static void highbd_build_mc_border(const uint8_t *src8, int src_stride,
- uint8_t *dst8, int dst_stride, int x, int y,
- int b_w, int b_h, int w, int h) {
- // Get a pointer to the start of the real data for this row.
- const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
- const uint16_t *ref_row = src - x - y * src_stride;
-
- if (y >= h)
- ref_row += (h - 1) * src_stride;
- else if (y > 0)
- ref_row += y * src_stride;
-
- do {
- int right = 0, copy;
- int left = x < 0 ? -x : 0;
-
- if (left > b_w) left = b_w;
-
- if (x + b_w > w) right = x + b_w - w;
-
- if (right > b_w) right = b_w;
-
- copy = b_w - left - right;
-
- if (left) aom_memset16(dst, ref_row[0], left);
-
- if (copy) memcpy(dst + left, ref_row + x + left, copy * sizeof(uint16_t));
-
- if (right) aom_memset16(dst + left + copy, ref_row[w - 1], right);
-
- dst += dst_stride;
- ++y;
-
- if (y > 0 && y < h) ref_row += src_stride;
- } while (--b_h);
-}
-
-static void build_mc_border(const uint8_t *src, int src_stride, uint8_t *dst,
- int dst_stride, int x, int y, int b_w, int b_h,
- int w, int h) {
- // Get a pointer to the start of the real data for this row.
- const uint8_t *ref_row = src - x - y * src_stride;
-
- if (y >= h)
- ref_row += (h - 1) * src_stride;
- else if (y > 0)
- ref_row += y * src_stride;
-
- do {
- int right = 0, copy;
- int left = x < 0 ? -x : 0;
-
- if (left > b_w) left = b_w;
-
- if (x + b_w > w) right = x + b_w - w;
-
- if (right > b_w) right = b_w;
-
- copy = b_w - left - right;
-
- if (left) memset(dst, ref_row[0], left);
-
- if (copy) memcpy(dst + left, ref_row + x + left, copy);
-
- if (right) memset(dst + left + copy, ref_row[w - 1], right);
-
- dst += dst_stride;
- ++y;
-
- if (y > 0 && y < h) ref_row += src_stride;
- } while (--b_h);
-}
-
-static INLINE int update_extend_mc_border_params(
- const struct scale_factors *const sf, struct buf_2d *const pre_buf,
- MV32 scaled_mv, PadBlock *block, int subpel_x_mv, int subpel_y_mv,
- int do_warp, int is_intrabc, int *x_pad, int *y_pad) {
- const int is_scaled = av1_is_scaled(sf);
- // Get reference width and height.
- int frame_width = pre_buf->width;
- int frame_height = pre_buf->height;
-
- // Do border extension if there is motion or
- // width/height is not a multiple of 8 pixels.
- if ((!is_intrabc) && (!do_warp) &&
- (is_scaled || scaled_mv.col || scaled_mv.row || (frame_width & 0x7) ||
- (frame_height & 0x7))) {
- if (subpel_x_mv || (sf->x_step_q4 != SUBPEL_SHIFTS)) {
- block->x0 -= AOM_INTERP_EXTEND - 1;
- block->x1 += AOM_INTERP_EXTEND;
- *x_pad = 1;
- }
-
- if (subpel_y_mv || (sf->y_step_q4 != SUBPEL_SHIFTS)) {
- block->y0 -= AOM_INTERP_EXTEND - 1;
- block->y1 += AOM_INTERP_EXTEND;
- *y_pad = 1;
- }
-
- // Skip border extension if block is inside the frame.
- if (block->x0 < 0 || block->x1 > frame_width - 1 || block->y0 < 0 ||
- block->y1 > frame_height - 1) {
- return 1;
- }
- }
- return 0;
-}
-
-static INLINE void extend_mc_border(const struct scale_factors *const sf,
- struct buf_2d *const pre_buf,
- MV32 scaled_mv, PadBlock block,
- int subpel_x_mv, int subpel_y_mv,
- int do_warp, int is_intrabc, int highbd,
- uint8_t *mc_buf, uint8_t **pre,
- int *src_stride) {
- int x_pad = 0, y_pad = 0;
- if (update_extend_mc_border_params(sf, pre_buf, scaled_mv, &block,
- subpel_x_mv, subpel_y_mv, do_warp,
- is_intrabc, &x_pad, &y_pad)) {
- // Get reference block pointer.
- const uint8_t *const buf_ptr =
- pre_buf->buf0 + block.y0 * pre_buf->stride + block.x0;
- int buf_stride = pre_buf->stride;
- const int b_w = block.x1 - block.x0;
- const int b_h = block.y1 - block.y0;
-
- // Extend the border.
- if (highbd) {
- highbd_build_mc_border(buf_ptr, buf_stride, mc_buf, b_w, block.x0,
- block.y0, b_w, b_h, pre_buf->width,
- pre_buf->height);
- } else {
- build_mc_border(buf_ptr, buf_stride, mc_buf, b_w, block.x0, block.y0, b_w,
- b_h, pre_buf->width, pre_buf->height);
- }
- *src_stride = b_w;
- *pre = mc_buf + y_pad * (AOM_INTERP_EXTEND - 1) * b_w +
- x_pad * (AOM_INTERP_EXTEND - 1);
- }
-}
-
-static INLINE void dec_calc_subpel_params(
- MACROBLOCKD *xd, const struct scale_factors *const sf, const MV mv,
- int plane, const int pre_x, const int pre_y, int x, int y,
- struct buf_2d *const pre_buf, SubpelParams *subpel_params, int bw, int bh,
- PadBlock *block, int mi_x, int mi_y, MV32 *scaled_mv, int *subpel_x_mv,
- int *subpel_y_mv) {
- struct macroblockd_plane *const pd = &xd->plane[plane];
- const int is_scaled = av1_is_scaled(sf);
- if (is_scaled) {
- int ssx = pd->subsampling_x;
- int ssy = pd->subsampling_y;
- int orig_pos_y = (pre_y + y) << SUBPEL_BITS;
- orig_pos_y += mv.row * (1 << (1 - ssy));
- int orig_pos_x = (pre_x + x) << SUBPEL_BITS;
- orig_pos_x += mv.col * (1 << (1 - ssx));
- int pos_y = sf->scale_value_y(orig_pos_y, sf);
- int pos_x = sf->scale_value_x(orig_pos_x, sf);
- pos_x += SCALE_EXTRA_OFF;
- pos_y += SCALE_EXTRA_OFF;
-
- const int top = -AOM_LEFT_TOP_MARGIN_SCALED(ssy);
- const int left = -AOM_LEFT_TOP_MARGIN_SCALED(ssx);
- const int bottom = (pre_buf->height + AOM_INTERP_EXTEND)
- << SCALE_SUBPEL_BITS;
- const int right = (pre_buf->width + AOM_INTERP_EXTEND) << SCALE_SUBPEL_BITS;
- pos_y = clamp(pos_y, top, bottom);
- pos_x = clamp(pos_x, left, right);
-
- subpel_params->subpel_x = pos_x & SCALE_SUBPEL_MASK;
- subpel_params->subpel_y = pos_y & SCALE_SUBPEL_MASK;
- subpel_params->xs = sf->x_step_q4;
- subpel_params->ys = sf->y_step_q4;
-
- // Get reference block top left coordinate.
- block->x0 = pos_x >> SCALE_SUBPEL_BITS;
- block->y0 = pos_y >> SCALE_SUBPEL_BITS;
-
- // Get reference block bottom right coordinate.
- block->x1 =
- ((pos_x + (bw - 1) * subpel_params->xs) >> SCALE_SUBPEL_BITS) + 1;
- block->y1 =
- ((pos_y + (bh - 1) * subpel_params->ys) >> SCALE_SUBPEL_BITS) + 1;
-
- MV temp_mv;
- temp_mv = clamp_mv_to_umv_border_sb(xd, &mv, bw, bh, pd->subsampling_x,
- pd->subsampling_y);
- *scaled_mv = av1_scale_mv(&temp_mv, (mi_x + x), (mi_y + y), sf);
- scaled_mv->row += SCALE_EXTRA_OFF;
- scaled_mv->col += SCALE_EXTRA_OFF;
-
- *subpel_x_mv = scaled_mv->col & SCALE_SUBPEL_MASK;
- *subpel_y_mv = scaled_mv->row & SCALE_SUBPEL_MASK;
- } else {
- // Get block position in current frame.
- int pos_x = (pre_x + x) << SUBPEL_BITS;
- int pos_y = (pre_y + y) << SUBPEL_BITS;
-
- const MV mv_q4 = clamp_mv_to_umv_border_sb(
- xd, &mv, bw, bh, pd->subsampling_x, pd->subsampling_y);
- subpel_params->xs = subpel_params->ys = SCALE_SUBPEL_SHIFTS;
- subpel_params->subpel_x = (mv_q4.col & SUBPEL_MASK) << SCALE_EXTRA_BITS;
- subpel_params->subpel_y = (mv_q4.row & SUBPEL_MASK) << SCALE_EXTRA_BITS;
-
- // Get reference block top left coordinate.
- pos_x += mv_q4.col;
- pos_y += mv_q4.row;
- block->x0 = pos_x >> SUBPEL_BITS;
- block->y0 = pos_y >> SUBPEL_BITS;
-
- // Get reference block bottom right coordinate.
- block->x1 = (pos_x >> SUBPEL_BITS) + (bw - 1) + 1;
- block->y1 = (pos_y >> SUBPEL_BITS) + (bh - 1) + 1;
-
- scaled_mv->row = mv_q4.row;
- scaled_mv->col = mv_q4.col;
- *subpel_x_mv = scaled_mv->col & SUBPEL_MASK;
- *subpel_y_mv = scaled_mv->row & SUBPEL_MASK;
- }
-}
-
-static INLINE void dec_build_inter_predictors(const AV1_COMMON *cm,
- MACROBLOCKD *xd, int plane,
- const MB_MODE_INFO *mi,
- int build_for_obmc, int bw,
- int bh, int mi_x, int mi_y) {
- struct macroblockd_plane *const pd = &xd->plane[plane];
- int is_compound = has_second_ref(mi);
- int ref;
- const int is_intrabc = is_intrabc_block(mi);
- assert(IMPLIES(is_intrabc, !is_compound));
- int is_global[2] = { 0, 0 };
- for (ref = 0; ref < 1 + is_compound; ++ref) {
- const WarpedMotionParams *const wm = &xd->global_motion[mi->ref_frame[ref]];
- is_global[ref] = is_global_mv_block(mi, wm->wmtype);
- }
-
- const BLOCK_SIZE bsize = mi->sb_type;
- const int ss_x = pd->subsampling_x;
- const int ss_y = pd->subsampling_y;
- int sub8x8_inter = (block_size_wide[bsize] < 8 && ss_x) ||
- (block_size_high[bsize] < 8 && ss_y);
-
- if (is_intrabc) sub8x8_inter = 0;
-
- // For sub8x8 chroma blocks, we may be covering more than one luma block's
- // worth of pixels. Thus (mi_x, mi_y) may not be the correct coordinates for
- // the top-left corner of the prediction source - the correct top-left corner
- // is at (pre_x, pre_y).
- const int row_start =
- (block_size_high[bsize] == 4) && ss_y && !build_for_obmc ? -1 : 0;
- const int col_start =
- (block_size_wide[bsize] == 4) && ss_x && !build_for_obmc ? -1 : 0;
- const int pre_x = (mi_x + MI_SIZE * col_start) >> ss_x;
- const int pre_y = (mi_y + MI_SIZE * row_start) >> ss_y;
-
- sub8x8_inter = sub8x8_inter && !build_for_obmc;
- if (sub8x8_inter) {
- for (int row = row_start; row <= 0 && sub8x8_inter; ++row) {
- for (int col = col_start; col <= 0; ++col) {
- const MB_MODE_INFO *this_mbmi = xd->mi[row * xd->mi_stride + col];
- if (!is_inter_block(this_mbmi)) sub8x8_inter = 0;
- if (is_intrabc_block(this_mbmi)) sub8x8_inter = 0;
- }
- }
- }
-
- if (sub8x8_inter) {
- // block size
- const int b4_w = block_size_wide[bsize] >> ss_x;
- const int b4_h = block_size_high[bsize] >> ss_y;
- const BLOCK_SIZE plane_bsize = scale_chroma_bsize(bsize, ss_x, ss_y);
- const int b8_w = block_size_wide[plane_bsize] >> ss_x;
- const int b8_h = block_size_high[plane_bsize] >> ss_y;
- assert(!is_compound);
-
- const struct buf_2d orig_pred_buf[2] = { pd->pre[0], pd->pre[1] };
-
- int row = row_start;
- int src_stride;
- for (int y = 0; y < b8_h; y += b4_h) {
- int col = col_start;
- for (int x = 0; x < b8_w; x += b4_w) {
- MB_MODE_INFO *this_mbmi = xd->mi[row * xd->mi_stride + col];
- is_compound = has_second_ref(this_mbmi);
- int tmp_dst_stride = 8;
- assert(bw < 8 || bh < 8);
- ConvolveParams conv_params = get_conv_params_no_round(
- 0, plane, xd->tmp_conv_dst, tmp_dst_stride, is_compound, xd->bd);
- conv_params.use_jnt_comp_avg = 0;
- struct buf_2d *const dst_buf = &pd->dst;
- uint8_t *dst = dst_buf->buf + dst_buf->stride * y + x;
-
- ref = 0;
- const RefBuffer *ref_buf =
- &cm->frame_refs[this_mbmi->ref_frame[ref] - LAST_FRAME];
-
- pd->pre[ref].buf0 =
- (plane == 1) ? ref_buf->buf->u_buffer : ref_buf->buf->v_buffer;
- pd->pre[ref].buf =
- pd->pre[ref].buf0 + scaled_buffer_offset(pre_x, pre_y,
- ref_buf->buf->uv_stride,
- &ref_buf->sf);
- pd->pre[ref].width = ref_buf->buf->uv_crop_width;
- pd->pre[ref].height = ref_buf->buf->uv_crop_height;
- pd->pre[ref].stride = ref_buf->buf->uv_stride;
-
- const struct scale_factors *const sf =
- is_intrabc ? &cm->sf_identity : &ref_buf->sf;
- struct buf_2d *const pre_buf = is_intrabc ? dst_buf : &pd->pre[ref];
-
- const MV mv = this_mbmi->mv[ref].as_mv;
-
- uint8_t *pre;
- SubpelParams subpel_params;
- PadBlock block;
- MV32 scaled_mv;
- int subpel_x_mv, subpel_y_mv;
- int highbd;
- WarpTypesAllowed warp_types;
- warp_types.global_warp_allowed = is_global[ref];
- warp_types.local_warp_allowed = this_mbmi->motion_mode == WARPED_CAUSAL;
-
- dec_calc_subpel_params(xd, sf, mv, plane, pre_x, pre_y, x, y, pre_buf,
- &subpel_params, bw, bh, &block, mi_x, mi_y,
- &scaled_mv, &subpel_x_mv, &subpel_y_mv);
- pre = pre_buf->buf0 + block.y0 * pre_buf->stride + block.x0;
- src_stride = pre_buf->stride;
- highbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH;
- extend_mc_border(sf, pre_buf, scaled_mv, block, subpel_x_mv,
- subpel_y_mv, 0, is_intrabc, highbd, xd->mc_buf[ref],
- &pre, &src_stride);
- conv_params.do_average = ref;
- if (is_masked_compound_type(mi->interinter_comp.type)) {
- // masked compound type has its own average mechanism
- conv_params.do_average = 0;
- }
-
- av1_make_inter_predictor(
- pre, src_stride, dst, dst_buf->stride, &subpel_params, sf, b4_w,
- b4_h, &conv_params, this_mbmi->interp_filters, &warp_types,
- (mi_x >> pd->subsampling_x) + x, (mi_y >> pd->subsampling_y) + y,
- plane, ref, mi, build_for_obmc, xd, cm->allow_warped_motion);
-
- ++col;
- }
- ++row;
- }
-
- for (ref = 0; ref < 2; ++ref) pd->pre[ref] = orig_pred_buf[ref];
- return;
- }
-
- {
- struct buf_2d *const dst_buf = &pd->dst;
- uint8_t *const dst = dst_buf->buf;
- uint8_t *pre[2];
- SubpelParams subpel_params[2];
- int src_stride[2];
- for (ref = 0; ref < 1 + is_compound; ++ref) {
- const struct scale_factors *const sf =
- is_intrabc ? &cm->sf_identity : &xd->block_refs[ref]->sf;
- struct buf_2d *const pre_buf = is_intrabc ? dst_buf : &pd->pre[ref];
- const MV mv = mi->mv[ref].as_mv;
- PadBlock block;
- MV32 scaled_mv;
- int subpel_x_mv, subpel_y_mv;
- int highbd;
-
- dec_calc_subpel_params(xd, sf, mv, plane, pre_x, pre_y, 0, 0, pre_buf,
- &subpel_params[ref], bw, bh, &block, mi_x, mi_y,
- &scaled_mv, &subpel_x_mv, &subpel_y_mv);
- pre[ref] = pre_buf->buf0 + block.y0 * pre_buf->stride + block.x0;
- src_stride[ref] = pre_buf->stride;
- highbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH;
-
- WarpTypesAllowed warp_types;
- warp_types.global_warp_allowed = is_global[ref];
- warp_types.local_warp_allowed = mi->motion_mode == WARPED_CAUSAL;
- int do_warp = (bw >= 8 && bh >= 8 &&
- av1_allow_warp(mi, &warp_types,
- &xd->global_motion[mi->ref_frame[ref]],
- build_for_obmc, subpel_params[ref].xs,
- subpel_params[ref].ys, NULL));
- do_warp = (do_warp && xd->cur_frame_force_integer_mv == 0);
-
- extend_mc_border(sf, pre_buf, scaled_mv, block, subpel_x_mv, subpel_y_mv,
- do_warp, is_intrabc, highbd, xd->mc_buf[ref], &pre[ref],
- &src_stride[ref]);
- }
-
- ConvolveParams conv_params = get_conv_params_no_round(
- 0, plane, xd->tmp_conv_dst, MAX_SB_SIZE, is_compound, xd->bd);
- av1_jnt_comp_weight_assign(cm, mi, 0, &conv_params.fwd_offset,
- &conv_params.bck_offset,
- &conv_params.use_jnt_comp_avg, is_compound);
-
- for (ref = 0; ref < 1 + is_compound; ++ref) {
- const struct scale_factors *const sf =
- is_intrabc ? &cm->sf_identity : &xd->block_refs[ref]->sf;
- WarpTypesAllowed warp_types;
- warp_types.global_warp_allowed = is_global[ref];
- warp_types.local_warp_allowed = mi->motion_mode == WARPED_CAUSAL;
- conv_params.do_average = ref;
- if (is_masked_compound_type(mi->interinter_comp.type)) {
- // masked compound type has its own average mechanism
- conv_params.do_average = 0;
- }
-
- if (ref && is_masked_compound_type(mi->interinter_comp.type))
- av1_make_masked_inter_predictor(
- pre[ref], src_stride[ref], dst, dst_buf->stride,
- &subpel_params[ref], sf, bw, bh, &conv_params, mi->interp_filters,
- plane, &warp_types, mi_x >> pd->subsampling_x,
- mi_y >> pd->subsampling_y, ref, xd, cm->allow_warped_motion);
- else
- av1_make_inter_predictor(
- pre[ref], src_stride[ref], dst, dst_buf->stride,
- &subpel_params[ref], sf, bw, bh, &conv_params, mi->interp_filters,
- &warp_types, mi_x >> pd->subsampling_x, mi_y >> pd->subsampling_y,
- plane, ref, mi, build_for_obmc, xd, cm->allow_warped_motion);
- }
- }
-}
-
-static void dec_build_inter_predictors_for_planes(const AV1_COMMON *cm,
- MACROBLOCKD *xd,
- BLOCK_SIZE bsize, int mi_row,
- int mi_col, int plane_from,
- int plane_to) {
- int plane;
- const int mi_x = mi_col * MI_SIZE;
- const int mi_y = mi_row * MI_SIZE;
- for (plane = plane_from; plane <= plane_to; ++plane) {
- const struct macroblockd_plane *pd = &xd->plane[plane];
- const int bw = pd->width;
- const int bh = pd->height;
-
- if (!is_chroma_reference(mi_row, mi_col, bsize, pd->subsampling_x,
- pd->subsampling_y))
- continue;
-
- dec_build_inter_predictors(cm, xd, plane, xd->mi[0], 0, bw, bh, mi_x, mi_y);
- }
-}
-
-static void dec_build_inter_predictors_sby(const AV1_COMMON *cm,
- MACROBLOCKD *xd, int mi_row,
- int mi_col, BUFFER_SET *ctx,
- BLOCK_SIZE bsize) {
- dec_build_inter_predictors_for_planes(cm, xd, bsize, mi_row, mi_col, 0, 0);
-
- if (is_interintra_pred(xd->mi[0])) {
- BUFFER_SET default_ctx = { { xd->plane[0].dst.buf, NULL, NULL },
- { xd->plane[0].dst.stride, 0, 0 } };
- if (!ctx) ctx = &default_ctx;
- av1_build_interintra_predictors_sbp(cm, xd, xd->plane[0].dst.buf,
- xd->plane[0].dst.stride, ctx, 0, bsize);
- }
-}
-
-static void dec_build_inter_predictors_sbuv(const AV1_COMMON *cm,
- MACROBLOCKD *xd, int mi_row,
- int mi_col, BUFFER_SET *ctx,
- BLOCK_SIZE bsize) {
- dec_build_inter_predictors_for_planes(cm, xd, bsize, mi_row, mi_col, 1,
- MAX_MB_PLANE - 1);
-
- if (is_interintra_pred(xd->mi[0])) {
- BUFFER_SET default_ctx = {
- { NULL, xd->plane[1].dst.buf, xd->plane[2].dst.buf },
- { 0, xd->plane[1].dst.stride, xd->plane[2].dst.stride }
- };
- if (!ctx) ctx = &default_ctx;
- av1_build_interintra_predictors_sbuv(
- cm, xd, xd->plane[1].dst.buf, xd->plane[2].dst.buf,
- xd->plane[1].dst.stride, xd->plane[2].dst.stride, ctx, bsize);
- }
-}
-
-static void dec_build_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd,
- int mi_row, int mi_col,
- BUFFER_SET *ctx, BLOCK_SIZE bsize) {
- const int num_planes = av1_num_planes(cm);
- dec_build_inter_predictors_sby(cm, xd, mi_row, mi_col, ctx, bsize);
- if (num_planes > 1)
- dec_build_inter_predictors_sbuv(cm, xd, mi_row, mi_col, ctx, bsize);
-}
-
-static INLINE void dec_build_prediction_by_above_pred(
- MACROBLOCKD *xd, int rel_mi_col, uint8_t above_mi_width,
- MB_MODE_INFO *above_mbmi, void *fun_ctxt, const int num_planes) {
- struct build_prediction_ctxt *ctxt = (struct build_prediction_ctxt *)fun_ctxt;
- const int above_mi_col = ctxt->mi_col + rel_mi_col;
- int mi_x, mi_y;
- MB_MODE_INFO backup_mbmi = *above_mbmi;
-
- av1_setup_build_prediction_by_above_pred(xd, rel_mi_col, above_mi_width,
- &backup_mbmi, ctxt, num_planes);
- mi_x = above_mi_col << MI_SIZE_LOG2;
- mi_y = ctxt->mi_row << MI_SIZE_LOG2;
-
- const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
-
- for (int j = 0; j < num_planes; ++j) {
- const struct macroblockd_plane *pd = &xd->plane[j];
- int bw = (above_mi_width * MI_SIZE) >> pd->subsampling_x;
- int bh = clamp(block_size_high[bsize] >> (pd->subsampling_y + 1), 4,
- block_size_high[BLOCK_64X64] >> (pd->subsampling_y + 1));
-
- if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 0)) continue;
- dec_build_inter_predictors(ctxt->cm, xd, j, &backup_mbmi, 1, bw, bh, mi_x,
- mi_y);
- }
-}
-
-static void dec_build_prediction_by_above_preds(
- const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col,
- uint8_t *tmp_buf[MAX_MB_PLANE], int tmp_width[MAX_MB_PLANE],
- int tmp_height[MAX_MB_PLANE], int tmp_stride[MAX_MB_PLANE]) {
- if (!xd->up_available) return;
-
- // Adjust mb_to_bottom_edge to have the correct value for the OBMC
- // prediction block. This is half the height of the original block,
- // except for 128-wide blocks, where we only use a height of 32.
- int this_height = xd->n4_h * MI_SIZE;
- int pred_height = AOMMIN(this_height / 2, 32);
- xd->mb_to_bottom_edge += (this_height - pred_height) * 8;
-
- struct build_prediction_ctxt ctxt = { cm, mi_row,
- mi_col, tmp_buf,
- tmp_width, tmp_height,
- tmp_stride, xd->mb_to_right_edge };
- BLOCK_SIZE bsize = xd->mi[0]->sb_type;
- foreach_overlappable_nb_above(cm, xd, mi_col,
- max_neighbor_obmc[mi_size_wide_log2[bsize]],
- dec_build_prediction_by_above_pred, &ctxt);
-
- xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8);
- xd->mb_to_right_edge = ctxt.mb_to_far_edge;
- xd->mb_to_bottom_edge -= (this_height - pred_height) * 8;
-}
-
-static INLINE void dec_build_prediction_by_left_pred(
- MACROBLOCKD *xd, int rel_mi_row, uint8_t left_mi_height,
- MB_MODE_INFO *left_mbmi, void *fun_ctxt, const int num_planes) {
- struct build_prediction_ctxt *ctxt = (struct build_prediction_ctxt *)fun_ctxt;
- const int left_mi_row = ctxt->mi_row + rel_mi_row;
- int mi_x, mi_y;
- MB_MODE_INFO backup_mbmi = *left_mbmi;
-
- av1_setup_build_prediction_by_left_pred(xd, rel_mi_row, left_mi_height,
- &backup_mbmi, ctxt, num_planes);
- mi_x = ctxt->mi_col << MI_SIZE_LOG2;
- mi_y = left_mi_row << MI_SIZE_LOG2;
- const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
-
- for (int j = 0; j < num_planes; ++j) {
- const struct macroblockd_plane *pd = &xd->plane[j];
- int bw = clamp(block_size_wide[bsize] >> (pd->subsampling_x + 1), 4,
- block_size_wide[BLOCK_64X64] >> (pd->subsampling_x + 1));
- int bh = (left_mi_height << MI_SIZE_LOG2) >> pd->subsampling_y;
-
- if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 1)) continue;
- dec_build_inter_predictors(ctxt->cm, xd, j, &backup_mbmi, 1, bw, bh, mi_x,
- mi_y);
- }
-}
-
-static void dec_build_prediction_by_left_preds(
- const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col,
- uint8_t *tmp_buf[MAX_MB_PLANE], int tmp_width[MAX_MB_PLANE],
- int tmp_height[MAX_MB_PLANE], int tmp_stride[MAX_MB_PLANE]) {
- if (!xd->left_available) return;
-
- // Adjust mb_to_right_edge to have the correct value for the OBMC
- // prediction block. This is half the width of the original block,
- // except for 128-wide blocks, where we only use a width of 32.
- int this_width = xd->n4_w * MI_SIZE;
- int pred_width = AOMMIN(this_width / 2, 32);
- xd->mb_to_right_edge += (this_width - pred_width) * 8;
-
- struct build_prediction_ctxt ctxt = { cm, mi_row,
- mi_col, tmp_buf,
- tmp_width, tmp_height,
- tmp_stride, xd->mb_to_bottom_edge };
- BLOCK_SIZE bsize = xd->mi[0]->sb_type;
- foreach_overlappable_nb_left(cm, xd, mi_row,
- max_neighbor_obmc[mi_size_high_log2[bsize]],
- dec_build_prediction_by_left_pred, &ctxt);
-
- xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8);
- xd->mb_to_right_edge -= (this_width - pred_width) * 8;
- xd->mb_to_bottom_edge = ctxt.mb_to_far_edge;
-}
-
-static void dec_build_obmc_inter_predictors_sb(const AV1_COMMON *cm,
- MACROBLOCKD *xd, int mi_row,
- int mi_col) {
- const int num_planes = av1_num_planes(cm);
- uint8_t *dst_buf1[MAX_MB_PLANE], *dst_buf2[MAX_MB_PLANE];
- int dst_stride1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
- int dst_stride2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
- int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
- int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
- int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
- int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
-
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- int len = sizeof(uint16_t);
- dst_buf1[0] = CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0]);
- dst_buf1[1] =
- CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * len);
- dst_buf1[2] =
- CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * 2 * len);
- dst_buf2[0] = CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1]);
- dst_buf2[1] =
- CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * len);
- dst_buf2[2] =
- CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * 2 * len);
- } else {
- dst_buf1[0] = xd->tmp_obmc_bufs[0];
- dst_buf1[1] = xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE;
- dst_buf1[2] = xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * 2;
- dst_buf2[0] = xd->tmp_obmc_bufs[1];
- dst_buf2[1] = xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE;
- dst_buf2[2] = xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * 2;
- }
- dec_build_prediction_by_above_preds(cm, xd, mi_row, mi_col, dst_buf1,
- dst_width1, dst_height1, dst_stride1);
- dec_build_prediction_by_left_preds(cm, xd, mi_row, mi_col, dst_buf2,
- dst_width2, dst_height2, dst_stride2);
- av1_setup_dst_planes(xd->plane, xd->mi[0]->sb_type, get_frame_new_buffer(cm),
- mi_row, mi_col, 0, num_planes);
- av1_build_obmc_inter_prediction(cm, xd, mi_row, mi_col, dst_buf1, dst_stride1,
- dst_buf2, dst_stride2);
-}
-
-static void cfl_store_inter_block(AV1_COMMON *const cm, MACROBLOCKD *const xd) {
- MB_MODE_INFO *mbmi = xd->mi[0];
- if (store_cfl_required(cm, xd)) {
- cfl_store_block(xd, mbmi->sb_type, mbmi->tx_size);
- }
-}
-
-static void predict_inter_block(AV1_COMMON *const cm, MACROBLOCKD *const xd,
- int mi_row, int mi_col, BLOCK_SIZE bsize) {
- MB_MODE_INFO *mbmi = xd->mi[0];
- const int num_planes = av1_num_planes(cm);
- for (int ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) {
- const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
- if (frame < LAST_FRAME) {
- assert(is_intrabc_block(mbmi));
- assert(frame == INTRA_FRAME);
- assert(ref == 0);
- } else {
- RefBuffer *ref_buf = &cm->frame_refs[frame - LAST_FRAME];
-
- xd->block_refs[ref] = ref_buf;
- av1_setup_pre_planes(xd, ref, ref_buf->buf, mi_row, mi_col, &ref_buf->sf,
- num_planes);
- }
- }
-
- dec_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
- if (mbmi->motion_mode == OBMC_CAUSAL) {
- dec_build_obmc_inter_predictors_sb(cm, xd, mi_row, mi_col);
- }
-#if CONFIG_MISMATCH_DEBUG
- for (int plane = 0; plane < num_planes; ++plane) {
- const struct macroblockd_plane *pd = &xd->plane[plane];
- int pixel_c, pixel_r;
- mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, 0, 0, pd->subsampling_x,
- pd->subsampling_y);
- if (!is_chroma_reference(mi_row, mi_col, bsize, pd->subsampling_x,
- pd->subsampling_y))
- continue;
- mismatch_check_block_pre(pd->dst.buf, pd->dst.stride, cm->frame_offset,
- plane, pixel_c, pixel_r, pd->width, pd->height,
- xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH);
- }
-#endif
-}
-
-static void set_color_index_map_offset(MACROBLOCKD *const xd, int plane,
- aom_reader *r) {
- (void)r;
- Av1ColorMapParam params;
- const MB_MODE_INFO *const mbmi = xd->mi[0];
- av1_get_block_dimensions(mbmi->sb_type, plane, xd, &params.plane_width,
- &params.plane_height, NULL, NULL);
- xd->color_index_map_offset[plane] += params.plane_width * params.plane_height;
-}
-
-static void decode_token_recon_block(AV1Decoder *const pbi,
- ThreadData *const td, int mi_row,
- int mi_col, aom_reader *r,
- BLOCK_SIZE bsize) {
- AV1_COMMON *const cm = &pbi->common;
- MACROBLOCKD *const xd = &td->xd;
- const int num_planes = av1_num_planes(cm);
-
- MB_MODE_INFO *mbmi = xd->mi[0];
- CFL_CTX *const cfl = &xd->cfl;
- cfl->is_chroma_reference = is_chroma_reference(
- mi_row, mi_col, bsize, cfl->subsampling_x, cfl->subsampling_y);
-
- if (!is_inter_block(mbmi)) {
- int row, col;
- assert(bsize == get_plane_block_size(bsize, xd->plane[0].subsampling_x,
- xd->plane[0].subsampling_y));
- const int max_blocks_wide = max_block_wide(xd, bsize, 0);
- const int max_blocks_high = max_block_high(xd, bsize, 0);
- const BLOCK_SIZE max_unit_bsize = BLOCK_64X64;
- int mu_blocks_wide =
- block_size_wide[max_unit_bsize] >> tx_size_wide_log2[0];
- int mu_blocks_high =
- block_size_high[max_unit_bsize] >> tx_size_high_log2[0];
- mu_blocks_wide = AOMMIN(max_blocks_wide, mu_blocks_wide);
- mu_blocks_high = AOMMIN(max_blocks_high, mu_blocks_high);
-
- for (row = 0; row < max_blocks_high; row += mu_blocks_high) {
- for (col = 0; col < max_blocks_wide; col += mu_blocks_wide) {
- for (int plane = 0; plane < num_planes; ++plane) {
- const struct macroblockd_plane *const pd = &xd->plane[plane];
- if (!is_chroma_reference(mi_row, mi_col, bsize, pd->subsampling_x,
- pd->subsampling_y))
- continue;
-
- const TX_SIZE tx_size = av1_get_tx_size(plane, xd);
- const int stepr = tx_size_high_unit[tx_size];
- const int stepc = tx_size_wide_unit[tx_size];
-
- const int unit_height = ROUND_POWER_OF_TWO(
- AOMMIN(mu_blocks_high + row, max_blocks_high), pd->subsampling_y);
- const int unit_width = ROUND_POWER_OF_TWO(
- AOMMIN(mu_blocks_wide + col, max_blocks_wide), pd->subsampling_x);
-
- for (int blk_row = row >> pd->subsampling_y; blk_row < unit_height;
- blk_row += stepr) {
- for (int blk_col = col >> pd->subsampling_x; blk_col < unit_width;
- blk_col += stepc) {
- td->read_coeffs_tx_intra_block_visit(cm, xd, r, plane, blk_row,
- blk_col, tx_size);
- td->predict_and_recon_intra_block_visit(cm, xd, r, plane, blk_row,
- blk_col, tx_size);
- set_cb_buffer_offsets(xd, tx_size, plane);
- }
- }
- }
- }
- }
- } else {
- td->predict_inter_block_visit(cm, xd, mi_row, mi_col, bsize);
- // Reconstruction
- if (!mbmi->skip) {
- int eobtotal = 0;
-
- const int max_blocks_wide = max_block_wide(xd, bsize, 0);
- const int max_blocks_high = max_block_high(xd, bsize, 0);
- int row, col;
-
- const BLOCK_SIZE max_unit_bsize = BLOCK_64X64;
- assert(max_unit_bsize ==
- get_plane_block_size(BLOCK_64X64, xd->plane[0].subsampling_x,
- xd->plane[0].subsampling_y));
- int mu_blocks_wide =
- block_size_wide[max_unit_bsize] >> tx_size_wide_log2[0];
- int mu_blocks_high =
- block_size_high[max_unit_bsize] >> tx_size_high_log2[0];
-
- mu_blocks_wide = AOMMIN(max_blocks_wide, mu_blocks_wide);
- mu_blocks_high = AOMMIN(max_blocks_high, mu_blocks_high);
-
- for (row = 0; row < max_blocks_high; row += mu_blocks_high) {
- for (col = 0; col < max_blocks_wide; col += mu_blocks_wide) {
- for (int plane = 0; plane < num_planes; ++plane) {
- const struct macroblockd_plane *const pd = &xd->plane[plane];
- if (!is_chroma_reference(mi_row, mi_col, bsize, pd->subsampling_x,
- pd->subsampling_y))
- continue;
- const BLOCK_SIZE bsizec =
- scale_chroma_bsize(bsize, pd->subsampling_x, pd->subsampling_y);
- const BLOCK_SIZE plane_bsize = get_plane_block_size(
- bsizec, pd->subsampling_x, pd->subsampling_y);
-
- const TX_SIZE max_tx_size =
- get_vartx_max_txsize(xd, plane_bsize, plane);
- const int bh_var_tx = tx_size_high_unit[max_tx_size];
- const int bw_var_tx = tx_size_wide_unit[max_tx_size];
- int block = 0;
- int step =
- tx_size_wide_unit[max_tx_size] * tx_size_high_unit[max_tx_size];
- int blk_row, blk_col;
- const int unit_height = ROUND_POWER_OF_TWO(
- AOMMIN(mu_blocks_high + row, max_blocks_high),
- pd->subsampling_y);
- const int unit_width = ROUND_POWER_OF_TWO(
- AOMMIN(mu_blocks_wide + col, max_blocks_wide),
- pd->subsampling_x);
-
- for (blk_row = row >> pd->subsampling_y; blk_row < unit_height;
- blk_row += bh_var_tx) {
- for (blk_col = col >> pd->subsampling_x; blk_col < unit_width;
- blk_col += bw_var_tx) {
- decode_reconstruct_tx(cm, td, r, mbmi, plane, plane_bsize,
- blk_row, blk_col, block, max_tx_size,
- &eobtotal);
- block += step;
- }
- }
- }
- }
- }
- }
- td->cfl_store_inter_block_visit(cm, xd);
- }
-
- av1_visit_palette(pbi, xd, mi_row, mi_col, r, bsize,
- set_color_index_map_offset);
-}
-
-#if LOOP_FILTER_BITMASK
-static void store_bitmask_vartx(AV1_COMMON *cm, int mi_row, int mi_col,
- BLOCK_SIZE bsize, TX_SIZE tx_size,
- MB_MODE_INFO *mbmi);
-#endif
-
-static void read_tx_size_vartx(MACROBLOCKD *xd, MB_MODE_INFO *mbmi,
- TX_SIZE tx_size, int depth,
-#if LOOP_FILTER_BITMASK
- AV1_COMMON *cm, int mi_row, int mi_col,
-#endif
- int blk_row, int blk_col, aom_reader *r) {
- FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
- int is_split = 0;
- const BLOCK_SIZE bsize = mbmi->sb_type;
- const int max_blocks_high = max_block_high(xd, bsize, 0);
- const int max_blocks_wide = max_block_wide(xd, bsize, 0);
- if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
- assert(tx_size > TX_4X4);
-
- if (depth == MAX_VARTX_DEPTH) {
- for (int idy = 0; idy < tx_size_high_unit[tx_size]; ++idy) {
- for (int idx = 0; idx < tx_size_wide_unit[tx_size]; ++idx) {
- const int index =
- av1_get_txb_size_index(bsize, blk_row + idy, blk_col + idx);
- mbmi->inter_tx_size[index] = tx_size;
- }
- }
- mbmi->tx_size = tx_size;
- txfm_partition_update(xd->above_txfm_context + blk_col,
- xd->left_txfm_context + blk_row, tx_size, tx_size);
- return;
- }
-
- const int ctx = txfm_partition_context(xd->above_txfm_context + blk_col,
- xd->left_txfm_context + blk_row,
- mbmi->sb_type, tx_size);
- is_split = aom_read_symbol(r, ec_ctx->txfm_partition_cdf[ctx], 2, ACCT_STR);
-
- if (is_split) {
- const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
- const int bsw = tx_size_wide_unit[sub_txs];
- const int bsh = tx_size_high_unit[sub_txs];
-
- if (sub_txs == TX_4X4) {
- for (int idy = 0; idy < tx_size_high_unit[tx_size]; ++idy) {
- for (int idx = 0; idx < tx_size_wide_unit[tx_size]; ++idx) {
- const int index =
- av1_get_txb_size_index(bsize, blk_row + idy, blk_col + idx);
- mbmi->inter_tx_size[index] = sub_txs;
- }
- }
- mbmi->tx_size = sub_txs;
- txfm_partition_update(xd->above_txfm_context + blk_col,
- xd->left_txfm_context + blk_row, sub_txs, tx_size);
-#if LOOP_FILTER_BITMASK
- store_bitmask_vartx(cm, mi_row + blk_row, mi_col + blk_col, BLOCK_8X8,
- TX_4X4, mbmi);
-#endif
- return;
- }
-#if LOOP_FILTER_BITMASK
- if (depth + 1 == MAX_VARTX_DEPTH) {
- store_bitmask_vartx(cm, mi_row + blk_row, mi_col + blk_col,
- txsize_to_bsize[tx_size], sub_txs, mbmi);
- }
-#endif
-
- assert(bsw > 0 && bsh > 0);
- for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh) {
- for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) {
- int offsetr = blk_row + row;
- int offsetc = blk_col + col;
- read_tx_size_vartx(xd, mbmi, sub_txs, depth + 1,
-#if LOOP_FILTER_BITMASK
- cm, mi_row, mi_col,
-#endif
- offsetr, offsetc, r);
- }
- }
- } else {
- for (int idy = 0; idy < tx_size_high_unit[tx_size]; ++idy) {
- for (int idx = 0; idx < tx_size_wide_unit[tx_size]; ++idx) {
- const int index =
- av1_get_txb_size_index(bsize, blk_row + idy, blk_col + idx);
- mbmi->inter_tx_size[index] = tx_size;
- }
- }
- mbmi->tx_size = tx_size;
- txfm_partition_update(xd->above_txfm_context + blk_col,
- xd->left_txfm_context + blk_row, tx_size, tx_size);
-#if LOOP_FILTER_BITMASK
- store_bitmask_vartx(cm, mi_row + blk_row, mi_col + blk_col,
- txsize_to_bsize[tx_size], tx_size, mbmi);
-#endif
- }
-}
-
-static TX_SIZE read_selected_tx_size(MACROBLOCKD *xd, aom_reader *r) {
- // TODO(debargha): Clean up the logic here. This function should only
- // be called for intra.
- const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
- const int32_t tx_size_cat = bsize_to_tx_size_cat(bsize);
- const int max_depths = bsize_to_max_depth(bsize);
- const int ctx = get_tx_size_context(xd);
- FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
- const int depth = aom_read_symbol(r, ec_ctx->tx_size_cdf[tx_size_cat][ctx],
- max_depths + 1, ACCT_STR);
- assert(depth >= 0 && depth <= max_depths);
- const TX_SIZE tx_size = depth_to_tx_size(depth, bsize);
- return tx_size;
-}
-
-static TX_SIZE read_tx_size(AV1_COMMON *cm, MACROBLOCKD *xd, int is_inter,
- int allow_select_inter, aom_reader *r) {
- const TX_MODE tx_mode = cm->tx_mode;
- const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
- if (xd->lossless[xd->mi[0]->segment_id]) return TX_4X4;
-
- if (block_signals_txsize(bsize)) {
- if ((!is_inter || allow_select_inter) && tx_mode == TX_MODE_SELECT) {
- const TX_SIZE coded_tx_size = read_selected_tx_size(xd, r);
- return coded_tx_size;
- } else {
- return tx_size_from_tx_mode(bsize, tx_mode);
- }
- } else {
- assert(IMPLIES(tx_mode == ONLY_4X4, bsize == BLOCK_4X4));
- return max_txsize_rect_lookup[bsize];
- }
-}
-
-#if LOOP_FILTER_BITMASK
-static void store_bitmask_vartx(AV1_COMMON *cm, int mi_row, int mi_col,
- BLOCK_SIZE bsize, TX_SIZE tx_size,
- MB_MODE_INFO *mbmi) {
- LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row, mi_col);
- const TX_SIZE tx_size_y_vert = txsize_vert_map[tx_size];
- const TX_SIZE tx_size_y_horz = txsize_horz_map[tx_size];
- const TX_SIZE tx_size_uv_vert = txsize_vert_map[av1_get_max_uv_txsize(
- mbmi->sb_type, cm->seq_params.subsampling_x,
- cm->seq_params.subsampling_y)];
- const TX_SIZE tx_size_uv_horz = txsize_horz_map[av1_get_max_uv_txsize(
- mbmi->sb_type, cm->seq_params.subsampling_x,
- cm->seq_params.subsampling_y)];
- const int is_square_transform_size = tx_size <= TX_64X64;
- int mask_id = 0;
- int offset = 0;
- const int half_ratio_tx_size_max32 =
- (tx_size > TX_64X64) & (tx_size <= TX_32X16);
- if (is_square_transform_size) {
- switch (tx_size) {
- case TX_4X4: mask_id = mask_id_table_tx_4x4[bsize]; break;
- case TX_8X8:
- mask_id = mask_id_table_tx_8x8[bsize];
- offset = 19;
- break;
- case TX_16X16:
- mask_id = mask_id_table_tx_16x16[bsize];
- offset = 33;
- break;
- case TX_32X32:
- mask_id = mask_id_table_tx_32x32[bsize];
- offset = 42;
- break;
- case TX_64X64: mask_id = 46; break;
- default: assert(!is_square_transform_size); return;
- }
- mask_id += offset;
- } else if (half_ratio_tx_size_max32) {
- int tx_size_equal_block_size = bsize == txsize_to_bsize[tx_size];
- mask_id = 47 + 2 * (tx_size - TX_4X8) + (tx_size_equal_block_size ? 0 : 1);
- } else if (tx_size == TX_32X64) {
- mask_id = 59;
- } else if (tx_size == TX_64X32) {
- mask_id = 60;
- } else { // quarter ratio tx size
- mask_id = 61 + (tx_size - TX_4X16);
- }
- int index = 0;
- const int row = mi_row % MI_SIZE_64X64;
- const int col = mi_col % MI_SIZE_64X64;
- const int shift = get_index_shift(col, row, &index);
- const int vert_shift = tx_size_y_vert <= TX_8X8 ? shift : col;
- for (int i = 0; i + index < 4; ++i) {
- // y vertical.
- lfm->tx_size_ver[0][tx_size_y_horz].bits[i + index] |=
- (left_mask_univariant_reordered[mask_id].bits[i] << vert_shift);
- // y horizontal.
- lfm->tx_size_hor[0][tx_size_y_vert].bits[i + index] |=
- (above_mask_univariant_reordered[mask_id].bits[i] << shift);
- // u/v vertical.
- lfm->tx_size_ver[1][tx_size_uv_horz].bits[i + index] |=
- (left_mask_univariant_reordered[mask_id].bits[i] << vert_shift);
- // u/v horizontal.
- lfm->tx_size_hor[1][tx_size_uv_vert].bits[i + index] |=
- (above_mask_univariant_reordered[mask_id].bits[i] << shift);
- }
-}
-
-static void store_bitmask_univariant_tx(AV1_COMMON *cm, int mi_row, int mi_col,
- BLOCK_SIZE bsize, MB_MODE_INFO *mbmi) {
- // Use a lookup table that provides one bitmask for a given block size and
- // a univariant transform size.
- int index;
- int shift;
- int row;
- int col;
- LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row, mi_col);
- const TX_SIZE tx_size_y_vert = txsize_vert_map[mbmi->tx_size];
- const TX_SIZE tx_size_y_horz = txsize_horz_map[mbmi->tx_size];
- const TX_SIZE tx_size_uv_vert = txsize_vert_map[av1_get_max_uv_txsize(
- mbmi->sb_type, cm->seq_params.subsampling_x,
- cm->seq_params.subsampling_y)];
- const TX_SIZE tx_size_uv_horz = txsize_horz_map[av1_get_max_uv_txsize(
- mbmi->sb_type, cm->seq_params.subsampling_x,
- cm->seq_params.subsampling_y)];
- const int is_square_transform_size = mbmi->tx_size <= TX_64X64;
- int mask_id = 0;
- int offset = 0;
- const int half_ratio_tx_size_max32 =
- (mbmi->tx_size > TX_64X64) & (mbmi->tx_size <= TX_32X16);
- if (is_square_transform_size) {
- switch (mbmi->tx_size) {
- case TX_4X4: mask_id = mask_id_table_tx_4x4[bsize]; break;
- case TX_8X8:
- mask_id = mask_id_table_tx_8x8[bsize];
- offset = 19;
- break;
- case TX_16X16:
- mask_id = mask_id_table_tx_16x16[bsize];
- offset = 33;
- break;
- case TX_32X32:
- mask_id = mask_id_table_tx_32x32[bsize];
- offset = 42;
- break;
- case TX_64X64: mask_id = 46; break;
- default: assert(!is_square_transform_size); return;
- }
- mask_id += offset;
- } else if (half_ratio_tx_size_max32) {
- int tx_size_equal_block_size = bsize == txsize_to_bsize[mbmi->tx_size];
- mask_id =
- 47 + 2 * (mbmi->tx_size - TX_4X8) + (tx_size_equal_block_size ? 0 : 1);
- } else if (mbmi->tx_size == TX_32X64) {
- mask_id = 59;
- } else if (mbmi->tx_size == TX_64X32) {
- mask_id = 60;
- } else { // quarter ratio tx size
- mask_id = 61 + (mbmi->tx_size - TX_4X16);
- }
- row = mi_row % MI_SIZE_64X64;
- col = mi_col % MI_SIZE_64X64;
- shift = get_index_shift(col, row, &index);
- const int vert_shift = tx_size_y_vert <= TX_8X8 ? shift : col;
- for (int i = 0; i + index < 4; ++i) {
- // y vertical.
- lfm->tx_size_ver[0][tx_size_y_horz].bits[i + index] |=
- (left_mask_univariant_reordered[mask_id].bits[i] << vert_shift);
- // y horizontal.
- lfm->tx_size_hor[0][tx_size_y_vert].bits[i + index] |=
- (above_mask_univariant_reordered[mask_id].bits[i] << shift);
- // u/v vertical.
- lfm->tx_size_ver[1][tx_size_uv_horz].bits[i + index] |=
- (left_mask_univariant_reordered[mask_id].bits[i] << vert_shift);
- // u/v horizontal.
- lfm->tx_size_hor[1][tx_size_uv_vert].bits[i + index] |=
- (above_mask_univariant_reordered[mask_id].bits[i] << shift);
- }
-}
-
-static void store_bitmask_other_info(AV1_COMMON *cm, int mi_row, int mi_col,
- BLOCK_SIZE bsize, MB_MODE_INFO *mbmi) {
- int index;
- int shift;
- int row;
- LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row, mi_col);
- const int row_start = mi_row % MI_SIZE_64X64;
- const int col_start = mi_col % MI_SIZE_64X64;
- shift = get_index_shift(col_start, row_start, &index);
- const uint64_t top_edge_mask =
- ((uint64_t)1 << (shift + mi_size_wide[bsize])) - ((uint64_t)1 << shift);
- lfm->is_horz_border.bits[index] |= top_edge_mask;
- const int is_vert_border = mask_id_table_vert_border[bsize];
- const int vert_shift = block_size_high[bsize] <= 8 ? shift : col_start;
- for (int i = 0; i + index < 4; ++i) {
- lfm->is_vert_border.bits[i + index] |=
- (left_mask_univariant_reordered[is_vert_border].bits[i] << vert_shift);
- }
- const int is_skip = mbmi->skip && is_inter_block(mbmi);
- if (is_skip) {
- const int is_skip_mask = mask_id_table_tx_4x4[bsize];
- for (int i = 0; i + index < 4; ++i) {
- lfm->skip.bits[i + index] |=
- (above_mask_univariant_reordered[is_skip_mask].bits[i] << shift);
- }
- }
- const uint8_t level_vert_y = get_filter_level(cm, &cm->lf_info, 0, 0, mbmi);
- const uint8_t level_horz_y = get_filter_level(cm, &cm->lf_info, 1, 0, mbmi);
- const uint8_t level_u = get_filter_level(cm, &cm->lf_info, 0, 1, mbmi);
- const uint8_t level_v = get_filter_level(cm, &cm->lf_info, 0, 2, mbmi);
- for (int r = mi_row; r < mi_row + mi_size_high[bsize]; r++) {
- index = 0;
- row = r % MI_SIZE_64X64;
- memset(&lfm->lfl_y_ver[row][col_start], level_vert_y,
- sizeof(uint8_t) * mi_size_wide[bsize]);
- memset(&lfm->lfl_y_hor[row][col_start], level_horz_y,
- sizeof(uint8_t) * mi_size_wide[bsize]);
- memset(&lfm->lfl_u[row][col_start], level_u,
- sizeof(uint8_t) * mi_size_wide[bsize]);
- memset(&lfm->lfl_v[row][col_start], level_v,
- sizeof(uint8_t) * mi_size_wide[bsize]);
- }
-}
-#endif
-
-static void parse_decode_block(AV1Decoder *const pbi, ThreadData *const td,
- int mi_row, int mi_col, aom_reader *r,
- PARTITION_TYPE partition, BLOCK_SIZE bsize) {
- MACROBLOCKD *const xd = &td->xd;
- decode_mbmi_block(pbi, xd, mi_row, mi_col, r, partition, bsize);
-
- av1_visit_palette(pbi, xd, mi_row, mi_col, r, bsize,
- av1_decode_palette_tokens);
-
- AV1_COMMON *cm = &pbi->common;
- const int num_planes = av1_num_planes(cm);
- MB_MODE_INFO *mbmi = xd->mi[0];
- int inter_block_tx = is_inter_block(mbmi) || is_intrabc_block(mbmi);
- if (cm->tx_mode == TX_MODE_SELECT && block_signals_txsize(bsize) &&
- !mbmi->skip && inter_block_tx && !xd->lossless[mbmi->segment_id]) {
- const TX_SIZE max_tx_size = max_txsize_rect_lookup[bsize];
- const int bh = tx_size_high_unit[max_tx_size];
- const int bw = tx_size_wide_unit[max_tx_size];
- const int width = block_size_wide[bsize] >> tx_size_wide_log2[0];
- const int height = block_size_high[bsize] >> tx_size_high_log2[0];
-
- for (int idy = 0; idy < height; idy += bh)
- for (int idx = 0; idx < width; idx += bw)
- read_tx_size_vartx(xd, mbmi, max_tx_size, 0,
-#if LOOP_FILTER_BITMASK
- cm, mi_row, mi_col,
-#endif
- idy, idx, r);
- } else {
- mbmi->tx_size = read_tx_size(cm, xd, inter_block_tx, !mbmi->skip, r);
- if (inter_block_tx)
- memset(mbmi->inter_tx_size, mbmi->tx_size, sizeof(mbmi->inter_tx_size));
- set_txfm_ctxs(mbmi->tx_size, xd->n4_w, xd->n4_h,
- mbmi->skip && is_inter_block(mbmi), xd);
-#if LOOP_FILTER_BITMASK
- const int w = mi_size_wide[bsize];
- const int h = mi_size_high[bsize];
- if (w <= mi_size_wide[BLOCK_64X64] && h <= mi_size_high[BLOCK_64X64]) {
- store_bitmask_univariant_tx(cm, mi_row, mi_col, bsize, mbmi);
- } else {
- for (int row = 0; row < h; row += mi_size_high[BLOCK_64X64]) {
- for (int col = 0; col < w; col += mi_size_wide[BLOCK_64X64]) {
- store_bitmask_univariant_tx(cm, mi_row + row, mi_col + col,
- BLOCK_64X64, mbmi);
- }
- }
- }
-#endif
- }
-#if LOOP_FILTER_BITMASK
- const int w = mi_size_wide[bsize];
- const int h = mi_size_high[bsize];
- if (w <= mi_size_wide[BLOCK_64X64] && h <= mi_size_high[BLOCK_64X64]) {
- store_bitmask_other_info(cm, mi_row, mi_col, bsize, mbmi);
- } else {
- for (int row = 0; row < h; row += mi_size_high[BLOCK_64X64]) {
- for (int col = 0; col < w; col += mi_size_wide[BLOCK_64X64]) {
- store_bitmask_other_info(cm, mi_row + row, mi_col + col, BLOCK_64X64,
- mbmi);
- }
- }
- }
-#endif
-
- if (cm->delta_q_present_flag) {
- for (int i = 0; i < MAX_SEGMENTS; i++) {
- const int current_qindex =
- av1_get_qindex(&cm->seg, i, xd->current_qindex);
- for (int j = 0; j < num_planes; ++j) {
- const int dc_delta_q =
- j == 0 ? cm->y_dc_delta_q
- : (j == 1 ? cm->u_dc_delta_q : cm->v_dc_delta_q);
- const int ac_delta_q =
- j == 0 ? 0 : (j == 1 ? cm->u_ac_delta_q : cm->v_ac_delta_q);
- xd->plane[j].seg_dequant_QTX[i][0] = av1_dc_quant_QTX(
- current_qindex, dc_delta_q, cm->seq_params.bit_depth);
- xd->plane[j].seg_dequant_QTX[i][1] = av1_ac_quant_QTX(
- current_qindex, ac_delta_q, cm->seq_params.bit_depth);
- }
- }
- }
- if (mbmi->skip) av1_reset_skip_context(xd, mi_row, mi_col, bsize, num_planes);
-
- decode_token_recon_block(pbi, td, mi_row, mi_col, r, bsize);
-
- int reader_corrupted_flag = aom_reader_has_error(r);
- aom_merge_corrupted_flag(&xd->corrupted, reader_corrupted_flag);
-}
-
-static void set_offsets_for_pred_and_recon(AV1Decoder *const pbi,
- ThreadData *const td, int mi_row,
- int mi_col, BLOCK_SIZE bsize) {
- AV1_COMMON *const cm = &pbi->common;
- MACROBLOCKD *const xd = &td->xd;
- const int bw = mi_size_wide[bsize];
- const int bh = mi_size_high[bsize];
- const int num_planes = av1_num_planes(cm);
-
- const int offset = mi_row * cm->mi_stride + mi_col;
- const TileInfo *const tile = &xd->tile;
-
- xd->mi = cm->mi_grid_visible + offset;
- xd->cfl.mi_row = mi_row;
- xd->cfl.mi_col = mi_col;
-
- set_plane_n4(xd, bw, bh, num_planes);
-
- // Distance of Mb to the various image edges. These are specified to 8th pel
- // as they are always compared to values that are in 1/8th pel units
- set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols);
-
- av1_setup_dst_planes(xd->plane, bsize, get_frame_new_buffer(cm), mi_row,
- mi_col, 0, num_planes);
-}
-
-static void decode_block(AV1Decoder *const pbi, ThreadData *const td,
- int mi_row, int mi_col, aom_reader *r,
- PARTITION_TYPE partition, BLOCK_SIZE bsize) {
- (void)partition;
- set_offsets_for_pred_and_recon(pbi, td, mi_row, mi_col, bsize);
- decode_token_recon_block(pbi, td, mi_row, mi_col, r, bsize);
-}
-
-static PARTITION_TYPE read_partition(MACROBLOCKD *xd, int mi_row, int mi_col,
- aom_reader *r, int has_rows, int has_cols,
- BLOCK_SIZE bsize) {
- const int ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
- FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
-
- if (!has_rows && !has_cols) return PARTITION_SPLIT;
-
- assert(ctx >= 0);
- aom_cdf_prob *partition_cdf = ec_ctx->partition_cdf[ctx];
- if (has_rows && has_cols) {
- return (PARTITION_TYPE)aom_read_symbol(
- r, partition_cdf, partition_cdf_length(bsize), ACCT_STR);
- } else if (!has_rows && has_cols) {
- assert(bsize > BLOCK_8X8);
- aom_cdf_prob cdf[2];
- partition_gather_vert_alike(cdf, partition_cdf, bsize);
- assert(cdf[1] == AOM_ICDF(CDF_PROB_TOP));
- return aom_read_cdf(r, cdf, 2, ACCT_STR) ? PARTITION_SPLIT : PARTITION_HORZ;
- } else {
- assert(has_rows && !has_cols);
- assert(bsize > BLOCK_8X8);
- aom_cdf_prob cdf[2];
- partition_gather_horz_alike(cdf, partition_cdf, bsize);
- assert(cdf[1] == AOM_ICDF(CDF_PROB_TOP));
- return aom_read_cdf(r, cdf, 2, ACCT_STR) ? PARTITION_SPLIT : PARTITION_VERT;
- }
-}
-
-// TODO(slavarnway): eliminate bsize and subsize in future commits
-static void decode_partition(AV1Decoder *const pbi, ThreadData *const td,
- int mi_row, int mi_col, aom_reader *r,
- BLOCK_SIZE bsize, int parse_decode_flag) {
- AV1_COMMON *const cm = &pbi->common;
- MACROBLOCKD *const xd = &td->xd;
- const int bw = mi_size_wide[bsize];
- const int hbs = bw >> 1;
- PARTITION_TYPE partition;
- BLOCK_SIZE subsize;
- const int quarter_step = bw / 4;
- BLOCK_SIZE bsize2 = get_partition_subsize(bsize, PARTITION_SPLIT);
- const int has_rows = (mi_row + hbs) < cm->mi_rows;
- const int has_cols = (mi_col + hbs) < cm->mi_cols;
-
- if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
-
- // parse_decode_flag takes the following values :
- // 01 - do parse only
- // 10 - do decode only
- // 11 - do parse and decode
- static const block_visitor_fn_t block_visit[4] = {
- NULL, parse_decode_block, decode_block, parse_decode_block
- };
-
- if (parse_decode_flag & 1) {
- const int num_planes = av1_num_planes(cm);
- for (int plane = 0; plane < num_planes; ++plane) {
- int rcol0, rcol1, rrow0, rrow1;
- if (av1_loop_restoration_corners_in_sb(cm, plane, mi_row, mi_col, bsize,
- &rcol0, &rcol1, &rrow0, &rrow1)) {
- const int rstride = cm->rst_info[plane].horz_units_per_tile;
- for (int rrow = rrow0; rrow < rrow1; ++rrow) {
- for (int rcol = rcol0; rcol < rcol1; ++rcol) {
- const int runit_idx = rcol + rrow * rstride;
- loop_restoration_read_sb_coeffs(cm, xd, r, plane, runit_idx);
- }
- }
- }
- }
-
- partition = (bsize < BLOCK_8X8) ? PARTITION_NONE
- : read_partition(xd, mi_row, mi_col, r,
- has_rows, has_cols, bsize);
- } else {
- partition = get_partition(cm, mi_row, mi_col, bsize);
- }
- subsize = get_partition_subsize(bsize, partition);
-
- // Check the bitstream is conformant: if there is subsampling on the
- // chroma planes, subsize must subsample to a valid block size.
- const struct macroblockd_plane *const pd_u = &xd->plane[1];
- if (get_plane_block_size(subsize, pd_u->subsampling_x, pd_u->subsampling_y) ==
- BLOCK_INVALID) {
- aom_internal_error(xd->error_info, AOM_CODEC_CORRUPT_FRAME,
- "Block size %dx%d invalid with this subsampling mode",
- block_size_wide[subsize], block_size_high[subsize]);
- }
-
-#define DEC_BLOCK_STX_ARG
-#define DEC_BLOCK_EPT_ARG partition,
-#define DEC_BLOCK(db_r, db_c, db_subsize) \
- block_visit[parse_decode_flag](pbi, td, DEC_BLOCK_STX_ARG(db_r), (db_c), r, \
- DEC_BLOCK_EPT_ARG(db_subsize))
-#define DEC_PARTITION(db_r, db_c, db_subsize) \
- decode_partition(pbi, td, DEC_BLOCK_STX_ARG(db_r), (db_c), r, (db_subsize), \
- parse_decode_flag)
-
- switch (partition) {
- case PARTITION_NONE: DEC_BLOCK(mi_row, mi_col, subsize); break;
- case PARTITION_HORZ:
- DEC_BLOCK(mi_row, mi_col, subsize);
- if (has_rows) DEC_BLOCK(mi_row + hbs, mi_col, subsize);
- break;
- case PARTITION_VERT:
- DEC_BLOCK(mi_row, mi_col, subsize);
- if (has_cols) DEC_BLOCK(mi_row, mi_col + hbs, subsize);
- break;
- case PARTITION_SPLIT:
- DEC_PARTITION(mi_row, mi_col, subsize);
- DEC_PARTITION(mi_row, mi_col + hbs, subsize);
- DEC_PARTITION(mi_row + hbs, mi_col, subsize);
- DEC_PARTITION(mi_row + hbs, mi_col + hbs, subsize);
- break;
- case PARTITION_HORZ_A:
- DEC_BLOCK(mi_row, mi_col, bsize2);
- DEC_BLOCK(mi_row, mi_col + hbs, bsize2);
- DEC_BLOCK(mi_row + hbs, mi_col, subsize);
- break;
- case PARTITION_HORZ_B:
- DEC_BLOCK(mi_row, mi_col, subsize);
- DEC_BLOCK(mi_row + hbs, mi_col, bsize2);
- DEC_BLOCK(mi_row + hbs, mi_col + hbs, bsize2);
- break;
- case PARTITION_VERT_A:
- DEC_BLOCK(mi_row, mi_col, bsize2);
- DEC_BLOCK(mi_row + hbs, mi_col, bsize2);
- DEC_BLOCK(mi_row, mi_col + hbs, subsize);
- break;
- case PARTITION_VERT_B:
- DEC_BLOCK(mi_row, mi_col, subsize);
- DEC_BLOCK(mi_row, mi_col + hbs, bsize2);
- DEC_BLOCK(mi_row + hbs, mi_col + hbs, bsize2);
- break;
- case PARTITION_HORZ_4:
- for (int i = 0; i < 4; ++i) {
- int this_mi_row = mi_row + i * quarter_step;
- if (i > 0 && this_mi_row >= cm->mi_rows) break;
- DEC_BLOCK(this_mi_row, mi_col, subsize);
- }
- break;
- case PARTITION_VERT_4:
- for (int i = 0; i < 4; ++i) {
- int this_mi_col = mi_col + i * quarter_step;
- if (i > 0 && this_mi_col >= cm->mi_cols) break;
- DEC_BLOCK(mi_row, this_mi_col, subsize);
- }
- break;
- default: assert(0 && "Invalid partition type");
- }
-
-#undef DEC_PARTITION
-#undef DEC_BLOCK
-#undef DEC_BLOCK_EPT_ARG
-#undef DEC_BLOCK_STX_ARG
-
- if (parse_decode_flag & 1)
- update_ext_partition_context(xd, mi_row, mi_col, subsize, bsize, partition);
-}
-
-static void setup_bool_decoder(const uint8_t *data, const uint8_t *data_end,
- const size_t read_size,
- struct aom_internal_error_info *error_info,
- aom_reader *r, uint8_t allow_update_cdf) {
- // Validate the calculated partition length. If the buffer
- // described by the partition can't be fully read, then restrict
- // it to the portion that can be (for EC mode) or throw an error.
- if (!read_is_valid(data, read_size, data_end))
- aom_internal_error(error_info, AOM_CODEC_CORRUPT_FRAME,
- "Truncated packet or corrupt tile length");
-
- if (aom_reader_init(r, data, read_size))
- aom_internal_error(error_info, AOM_CODEC_MEM_ERROR,
- "Failed to allocate bool decoder %d", 1);
-
- r->allow_update_cdf = allow_update_cdf;
-}
-
-static void setup_segmentation(AV1_COMMON *const cm,
- struct aom_read_bit_buffer *rb) {
- struct segmentation *const seg = &cm->seg;
-
- seg->update_map = 0;
- seg->update_data = 0;
- seg->temporal_update = 0;
-
- seg->enabled = aom_rb_read_bit(rb);
- if (!seg->enabled) {
- if (cm->cur_frame->seg_map)
- memset(cm->cur_frame->seg_map, 0, (cm->mi_rows * cm->mi_cols));
-
- memset(seg, 0, sizeof(*seg));
- segfeatures_copy(&cm->cur_frame->seg, seg);
- return;
- }
- if (cm->seg.enabled && cm->prev_frame &&
- (cm->mi_rows == cm->prev_frame->mi_rows) &&
- (cm->mi_cols == cm->prev_frame->mi_cols)) {
- cm->last_frame_seg_map = cm->prev_frame->seg_map;
- } else {
- cm->last_frame_seg_map = NULL;
- }
- // Read update flags
- if (cm->primary_ref_frame == PRIMARY_REF_NONE) {
- // These frames can't use previous frames, so must signal map + features
- seg->update_map = 1;
- seg->temporal_update = 0;
- seg->update_data = 1;
- } else {
- seg->update_map = aom_rb_read_bit(rb);
- if (seg->update_map) {
- seg->temporal_update = aom_rb_read_bit(rb);
- } else {
- seg->temporal_update = 0;
- }
- seg->update_data = aom_rb_read_bit(rb);
- }
-
- // Segmentation data update
- if (seg->update_data) {
- av1_clearall_segfeatures(seg);
-
- for (int i = 0; i < MAX_SEGMENTS; i++) {
- for (int j = 0; j < SEG_LVL_MAX; j++) {
- int data = 0;
- const int feature_enabled = aom_rb_read_bit(rb);
- if (feature_enabled) {
- av1_enable_segfeature(seg, i, j);
-
- const int data_max = av1_seg_feature_data_max(j);
- const int data_min = -data_max;
- const int ubits = get_unsigned_bits(data_max);
-
- if (av1_is_segfeature_signed(j)) {
- data = aom_rb_read_inv_signed_literal(rb, ubits);
- } else {
- data = aom_rb_read_literal(rb, ubits);
- }
-
- data = clamp(data, data_min, data_max);
- }
- av1_set_segdata(seg, i, j, data);
- }
- }
- calculate_segdata(seg);
- } else if (cm->prev_frame) {
- segfeatures_copy(seg, &cm->prev_frame->seg);
- }
- segfeatures_copy(&cm->cur_frame->seg, seg);
-}
-
-static void decode_restoration_mode(AV1_COMMON *cm,
- struct aom_read_bit_buffer *rb) {
- assert(!cm->all_lossless);
- const int num_planes = av1_num_planes(cm);
- if (cm->allow_intrabc) return;
- int all_none = 1, chroma_none = 1;
- for (int p = 0; p < num_planes; ++p) {
- RestorationInfo *rsi = &cm->rst_info[p];
- if (aom_rb_read_bit(rb)) {
- rsi->frame_restoration_type =
- aom_rb_read_bit(rb) ? RESTORE_SGRPROJ : RESTORE_WIENER;
- } else {
- rsi->frame_restoration_type =
- aom_rb_read_bit(rb) ? RESTORE_SWITCHABLE : RESTORE_NONE;
- }
- if (rsi->frame_restoration_type != RESTORE_NONE) {
- all_none = 0;
- chroma_none &= p == 0;
- }
- }
- if (!all_none) {
- assert(cm->seq_params.sb_size == BLOCK_64X64 ||
- cm->seq_params.sb_size == BLOCK_128X128);
- const int sb_size = cm->seq_params.sb_size == BLOCK_128X128 ? 128 : 64;
-
- for (int p = 0; p < num_planes; ++p)
- cm->rst_info[p].restoration_unit_size = sb_size;
-
- RestorationInfo *rsi = &cm->rst_info[0];
-
- if (sb_size == 64) {
- rsi->restoration_unit_size <<= aom_rb_read_bit(rb);
- }
- if (rsi->restoration_unit_size > 64) {
- rsi->restoration_unit_size <<= aom_rb_read_bit(rb);
- }
- } else {
- const int size = RESTORATION_UNITSIZE_MAX;
- for (int p = 0; p < num_planes; ++p)
- cm->rst_info[p].restoration_unit_size = size;
- }
-
- if (num_planes > 1) {
- int s = AOMMIN(cm->seq_params.subsampling_x, cm->seq_params.subsampling_y);
- if (s && !chroma_none) {
- cm->rst_info[1].restoration_unit_size =
- cm->rst_info[0].restoration_unit_size >> (aom_rb_read_bit(rb) * s);
- } else {
- cm->rst_info[1].restoration_unit_size =
- cm->rst_info[0].restoration_unit_size;
- }
- cm->rst_info[2].restoration_unit_size =
- cm->rst_info[1].restoration_unit_size;
- }
-}
-
-static void read_wiener_filter(int wiener_win, WienerInfo *wiener_info,
- WienerInfo *ref_wiener_info, aom_reader *rb) {
- memset(wiener_info->vfilter, 0, sizeof(wiener_info->vfilter));
- memset(wiener_info->hfilter, 0, sizeof(wiener_info->hfilter));
-
- if (wiener_win == WIENER_WIN)
- wiener_info->vfilter[0] = wiener_info->vfilter[WIENER_WIN - 1] =
- aom_read_primitive_refsubexpfin(
- rb, WIENER_FILT_TAP0_MAXV - WIENER_FILT_TAP0_MINV + 1,
- WIENER_FILT_TAP0_SUBEXP_K,
- ref_wiener_info->vfilter[0] - WIENER_FILT_TAP0_MINV, ACCT_STR) +
- WIENER_FILT_TAP0_MINV;
- else
- wiener_info->vfilter[0] = wiener_info->vfilter[WIENER_WIN - 1] = 0;
- wiener_info->vfilter[1] = wiener_info->vfilter[WIENER_WIN - 2] =
- aom_read_primitive_refsubexpfin(
- rb, WIENER_FILT_TAP1_MAXV - WIENER_FILT_TAP1_MINV + 1,
- WIENER_FILT_TAP1_SUBEXP_K,
- ref_wiener_info->vfilter[1] - WIENER_FILT_TAP1_MINV, ACCT_STR) +
- WIENER_FILT_TAP1_MINV;
- wiener_info->vfilter[2] = wiener_info->vfilter[WIENER_WIN - 3] =
- aom_read_primitive_refsubexpfin(
- rb, WIENER_FILT_TAP2_MAXV - WIENER_FILT_TAP2_MINV + 1,
- WIENER_FILT_TAP2_SUBEXP_K,
- ref_wiener_info->vfilter[2] - WIENER_FILT_TAP2_MINV, ACCT_STR) +
- WIENER_FILT_TAP2_MINV;
- // The central element has an implicit +WIENER_FILT_STEP
- wiener_info->vfilter[WIENER_HALFWIN] =
- -2 * (wiener_info->vfilter[0] + wiener_info->vfilter[1] +
- wiener_info->vfilter[2]);
-
- if (wiener_win == WIENER_WIN)
- wiener_info->hfilter[0] = wiener_info->hfilter[WIENER_WIN - 1] =
- aom_read_primitive_refsubexpfin(
- rb, WIENER_FILT_TAP0_MAXV - WIENER_FILT_TAP0_MINV + 1,
- WIENER_FILT_TAP0_SUBEXP_K,
- ref_wiener_info->hfilter[0] - WIENER_FILT_TAP0_MINV, ACCT_STR) +
- WIENER_FILT_TAP0_MINV;
- else
- wiener_info->hfilter[0] = wiener_info->hfilter[WIENER_WIN - 1] = 0;
- wiener_info->hfilter[1] = wiener_info->hfilter[WIENER_WIN - 2] =
- aom_read_primitive_refsubexpfin(
- rb, WIENER_FILT_TAP1_MAXV - WIENER_FILT_TAP1_MINV + 1,
- WIENER_FILT_TAP1_SUBEXP_K,
- ref_wiener_info->hfilter[1] - WIENER_FILT_TAP1_MINV, ACCT_STR) +
- WIENER_FILT_TAP1_MINV;
- wiener_info->hfilter[2] = wiener_info->hfilter[WIENER_WIN - 3] =
- aom_read_primitive_refsubexpfin(
- rb, WIENER_FILT_TAP2_MAXV - WIENER_FILT_TAP2_MINV + 1,
- WIENER_FILT_TAP2_SUBEXP_K,
- ref_wiener_info->hfilter[2] - WIENER_FILT_TAP2_MINV, ACCT_STR) +
- WIENER_FILT_TAP2_MINV;
- // The central element has an implicit +WIENER_FILT_STEP
- wiener_info->hfilter[WIENER_HALFWIN] =
- -2 * (wiener_info->hfilter[0] + wiener_info->hfilter[1] +
- wiener_info->hfilter[2]);
- memcpy(ref_wiener_info, wiener_info, sizeof(*wiener_info));
-}
-
-static void read_sgrproj_filter(SgrprojInfo *sgrproj_info,
- SgrprojInfo *ref_sgrproj_info, aom_reader *rb) {
- sgrproj_info->ep = aom_read_literal(rb, SGRPROJ_PARAMS_BITS, ACCT_STR);
- const sgr_params_type *params = &sgr_params[sgrproj_info->ep];
-
- if (params->r[0] == 0) {
- sgrproj_info->xqd[0] = 0;
- sgrproj_info->xqd[1] =
- aom_read_primitive_refsubexpfin(
- rb, SGRPROJ_PRJ_MAX1 - SGRPROJ_PRJ_MIN1 + 1, SGRPROJ_PRJ_SUBEXP_K,
- ref_sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1, ACCT_STR) +
- SGRPROJ_PRJ_MIN1;
- } else if (params->r[1] == 0) {
- sgrproj_info->xqd[0] =
- aom_read_primitive_refsubexpfin(
- rb, SGRPROJ_PRJ_MAX0 - SGRPROJ_PRJ_MIN0 + 1, SGRPROJ_PRJ_SUBEXP_K,
- ref_sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0, ACCT_STR) +
- SGRPROJ_PRJ_MIN0;
- sgrproj_info->xqd[1] = clamp((1 << SGRPROJ_PRJ_BITS) - sgrproj_info->xqd[0],
- SGRPROJ_PRJ_MIN1, SGRPROJ_PRJ_MAX1);
- } else {
- sgrproj_info->xqd[0] =
- aom_read_primitive_refsubexpfin(
- rb, SGRPROJ_PRJ_MAX0 - SGRPROJ_PRJ_MIN0 + 1, SGRPROJ_PRJ_SUBEXP_K,
- ref_sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0, ACCT_STR) +
- SGRPROJ_PRJ_MIN0;
- sgrproj_info->xqd[1] =
- aom_read_primitive_refsubexpfin(
- rb, SGRPROJ_PRJ_MAX1 - SGRPROJ_PRJ_MIN1 + 1, SGRPROJ_PRJ_SUBEXP_K,
- ref_sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1, ACCT_STR) +
- SGRPROJ_PRJ_MIN1;
- }
-
- memcpy(ref_sgrproj_info, sgrproj_info, sizeof(*sgrproj_info));
-}
-
-static void loop_restoration_read_sb_coeffs(const AV1_COMMON *const cm,
- MACROBLOCKD *xd,
- aom_reader *const r, int plane,
- int runit_idx) {
- const RestorationInfo *rsi = &cm->rst_info[plane];
- RestorationUnitInfo *rui = &rsi->unit_info[runit_idx];
- if (rsi->frame_restoration_type == RESTORE_NONE) return;
-
- assert(!cm->all_lossless);
-
- const int wiener_win = (plane > 0) ? WIENER_WIN_CHROMA : WIENER_WIN;
- WienerInfo *wiener_info = xd->wiener_info + plane;
- SgrprojInfo *sgrproj_info = xd->sgrproj_info + plane;
-
- if (rsi->frame_restoration_type == RESTORE_SWITCHABLE) {
- rui->restoration_type =
- aom_read_symbol(r, xd->tile_ctx->switchable_restore_cdf,
- RESTORE_SWITCHABLE_TYPES, ACCT_STR);
- switch (rui->restoration_type) {
- case RESTORE_WIENER:
- read_wiener_filter(wiener_win, &rui->wiener_info, wiener_info, r);
- break;
- case RESTORE_SGRPROJ:
- read_sgrproj_filter(&rui->sgrproj_info, sgrproj_info, r);
- break;
- default: assert(rui->restoration_type == RESTORE_NONE); break;
- }
- } else if (rsi->frame_restoration_type == RESTORE_WIENER) {
- if (aom_read_symbol(r, xd->tile_ctx->wiener_restore_cdf, 2, ACCT_STR)) {
- rui->restoration_type = RESTORE_WIENER;
- read_wiener_filter(wiener_win, &rui->wiener_info, wiener_info, r);
- } else {
- rui->restoration_type = RESTORE_NONE;
- }
- } else if (rsi->frame_restoration_type == RESTORE_SGRPROJ) {
- if (aom_read_symbol(r, xd->tile_ctx->sgrproj_restore_cdf, 2, ACCT_STR)) {
- rui->restoration_type = RESTORE_SGRPROJ;
- read_sgrproj_filter(&rui->sgrproj_info, sgrproj_info, r);
- } else {
- rui->restoration_type = RESTORE_NONE;
- }
- }
-}
-
-static void setup_loopfilter(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) {
- const int num_planes = av1_num_planes(cm);
- struct loopfilter *lf = &cm->lf;
- if (cm->allow_intrabc || cm->coded_lossless) {
- // write default deltas to frame buffer
- av1_set_default_ref_deltas(cm->cur_frame->ref_deltas);
- av1_set_default_mode_deltas(cm->cur_frame->mode_deltas);
- return;
- }
- assert(!cm->coded_lossless);
- if (cm->prev_frame) {
- // write deltas to frame buffer
- memcpy(lf->ref_deltas, cm->prev_frame->ref_deltas, REF_FRAMES);
- memcpy(lf->mode_deltas, cm->prev_frame->mode_deltas, MAX_MODE_LF_DELTAS);
- } else {
- av1_set_default_ref_deltas(lf->ref_deltas);
- av1_set_default_mode_deltas(lf->mode_deltas);
- }
- lf->filter_level[0] = aom_rb_read_literal(rb, 6);
- lf->filter_level[1] = aom_rb_read_literal(rb, 6);
- if (num_planes > 1) {
- if (lf->filter_level[0] || lf->filter_level[1]) {
- lf->filter_level_u = aom_rb_read_literal(rb, 6);
- lf->filter_level_v = aom_rb_read_literal(rb, 6);
- }
- }
- lf->sharpness_level = aom_rb_read_literal(rb, 3);
-
- // Read in loop filter deltas applied at the MB level based on mode or ref
- // frame.
- lf->mode_ref_delta_update = 0;
-
- lf->mode_ref_delta_enabled = aom_rb_read_bit(rb);
- if (lf->mode_ref_delta_enabled) {
- lf->mode_ref_delta_update = aom_rb_read_bit(rb);
- if (lf->mode_ref_delta_update) {
- for (int i = 0; i < REF_FRAMES; i++)
- if (aom_rb_read_bit(rb))
- lf->ref_deltas[i] = aom_rb_read_inv_signed_literal(rb, 6);
-
- for (int i = 0; i < MAX_MODE_LF_DELTAS; i++)
- if (aom_rb_read_bit(rb))
- lf->mode_deltas[i] = aom_rb_read_inv_signed_literal(rb, 6);
- }
- }
-
- // write deltas to frame buffer
- memcpy(cm->cur_frame->ref_deltas, lf->ref_deltas, REF_FRAMES);
- memcpy(cm->cur_frame->mode_deltas, lf->mode_deltas, MAX_MODE_LF_DELTAS);
-}
-
-static void setup_cdef(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) {
- const int num_planes = av1_num_planes(cm);
- if (cm->allow_intrabc) return;
- cm->cdef_pri_damping = cm->cdef_sec_damping = aom_rb_read_literal(rb, 2) + 3;
- cm->cdef_bits = aom_rb_read_literal(rb, 2);
- cm->nb_cdef_strengths = 1 << cm->cdef_bits;
- for (int i = 0; i < cm->nb_cdef_strengths; i++) {
- cm->cdef_strengths[i] = aom_rb_read_literal(rb, CDEF_STRENGTH_BITS);
- cm->cdef_uv_strengths[i] =
- num_planes > 1 ? aom_rb_read_literal(rb, CDEF_STRENGTH_BITS) : 0;
- }
-}
-
-static INLINE int read_delta_q(struct aom_read_bit_buffer *rb) {
- return aom_rb_read_bit(rb) ? aom_rb_read_inv_signed_literal(rb, 6) : 0;
-}
-
-static void setup_quantization(AV1_COMMON *const cm,
- struct aom_read_bit_buffer *rb) {
- const SequenceHeader *const seq_params = &cm->seq_params;
- const int num_planes = av1_num_planes(cm);
- cm->base_qindex = aom_rb_read_literal(rb, QINDEX_BITS);
- cm->y_dc_delta_q = read_delta_q(rb);
- if (num_planes > 1) {
- int diff_uv_delta = 0;
- if (seq_params->separate_uv_delta_q) diff_uv_delta = aom_rb_read_bit(rb);
- cm->u_dc_delta_q = read_delta_q(rb);
- cm->u_ac_delta_q = read_delta_q(rb);
- if (diff_uv_delta) {
- cm->v_dc_delta_q = read_delta_q(rb);
- cm->v_ac_delta_q = read_delta_q(rb);
- } else {
- cm->v_dc_delta_q = cm->u_dc_delta_q;
- cm->v_ac_delta_q = cm->u_ac_delta_q;
- }
- } else {
- cm->u_dc_delta_q = 0;
- cm->u_ac_delta_q = 0;
- cm->v_dc_delta_q = 0;
- cm->v_ac_delta_q = 0;
- }
- cm->dequant_bit_depth = seq_params->bit_depth;
- cm->using_qmatrix = aom_rb_read_bit(rb);
- if (cm->using_qmatrix) {
- cm->qm_y = aom_rb_read_literal(rb, QM_LEVEL_BITS);
- cm->qm_u = aom_rb_read_literal(rb, QM_LEVEL_BITS);
- if (!seq_params->separate_uv_delta_q)
- cm->qm_v = cm->qm_u;
- else
- cm->qm_v = aom_rb_read_literal(rb, QM_LEVEL_BITS);
- } else {
- cm->qm_y = 0;
- cm->qm_u = 0;
- cm->qm_v = 0;
- }
-}
-
-// Build y/uv dequant values based on segmentation.
-static void setup_segmentation_dequant(AV1_COMMON *const cm) {
- const int bit_depth = cm->seq_params.bit_depth;
- const int using_qm = cm->using_qmatrix;
- // When segmentation is disabled, only the first value is used. The
- // remaining are don't cares.
- const int max_segments = cm->seg.enabled ? MAX_SEGMENTS : 1;
- for (int i = 0; i < max_segments; ++i) {
- const int qindex = av1_get_qindex(&cm->seg, i, cm->base_qindex);
- cm->y_dequant_QTX[i][0] =
- av1_dc_quant_QTX(qindex, cm->y_dc_delta_q, bit_depth);
- cm->y_dequant_QTX[i][1] = av1_ac_quant_QTX(qindex, 0, bit_depth);
- cm->u_dequant_QTX[i][0] =
- av1_dc_quant_QTX(qindex, cm->u_dc_delta_q, bit_depth);
- cm->u_dequant_QTX[i][1] =
- av1_ac_quant_QTX(qindex, cm->u_ac_delta_q, bit_depth);
- cm->v_dequant_QTX[i][0] =
- av1_dc_quant_QTX(qindex, cm->v_dc_delta_q, bit_depth);
- cm->v_dequant_QTX[i][1] =
- av1_ac_quant_QTX(qindex, cm->v_ac_delta_q, bit_depth);
- const int lossless = qindex == 0 && cm->y_dc_delta_q == 0 &&
- cm->u_dc_delta_q == 0 && cm->u_ac_delta_q == 0 &&
- cm->v_dc_delta_q == 0 && cm->v_ac_delta_q == 0;
- // NB: depends on base index so there is only 1 set per frame
- // No quant weighting when lossless or signalled not using QM
- int qmlevel = (lossless || using_qm == 0) ? NUM_QM_LEVELS - 1 : cm->qm_y;
- for (int j = 0; j < TX_SIZES_ALL; ++j) {
- cm->y_iqmatrix[i][j] = av1_iqmatrix(cm, qmlevel, AOM_PLANE_Y, j);
- }
- qmlevel = (lossless || using_qm == 0) ? NUM_QM_LEVELS - 1 : cm->qm_u;
- for (int j = 0; j < TX_SIZES_ALL; ++j) {
- cm->u_iqmatrix[i][j] = av1_iqmatrix(cm, qmlevel, AOM_PLANE_U, j);
- }
- qmlevel = (lossless || using_qm == 0) ? NUM_QM_LEVELS - 1 : cm->qm_v;
- for (int j = 0; j < TX_SIZES_ALL; ++j) {
- cm->v_iqmatrix[i][j] = av1_iqmatrix(cm, qmlevel, AOM_PLANE_V, j);
- }
- }
-}
-
-static InterpFilter read_frame_interp_filter(struct aom_read_bit_buffer *rb) {
- return aom_rb_read_bit(rb) ? SWITCHABLE
- : aom_rb_read_literal(rb, LOG_SWITCHABLE_FILTERS);
-}
-
-static void setup_render_size(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) {
- cm->render_width = cm->superres_upscaled_width;
- cm->render_height = cm->superres_upscaled_height;
- if (aom_rb_read_bit(rb))
- av1_read_frame_size(rb, 16, 16, &cm->render_width, &cm->render_height);
-}
-
-// TODO(afergs): make "struct aom_read_bit_buffer *const rb"?
-static void setup_superres(AV1_COMMON *const cm, struct aom_read_bit_buffer *rb,
- int *width, int *height) {
- cm->superres_upscaled_width = *width;
- cm->superres_upscaled_height = *height;
-
- const SequenceHeader *const seq_params = &cm->seq_params;
- if (!seq_params->enable_superres) return;
-
- if (aom_rb_read_bit(rb)) {
- cm->superres_scale_denominator =
- (uint8_t)aom_rb_read_literal(rb, SUPERRES_SCALE_BITS);
- cm->superres_scale_denominator += SUPERRES_SCALE_DENOMINATOR_MIN;
- // Don't edit cm->width or cm->height directly, or the buffers won't get
- // resized correctly
- av1_calculate_scaled_superres_size(width, height,
- cm->superres_scale_denominator);
- } else {
- // 1:1 scaling - ie. no scaling, scale not provided
- cm->superres_scale_denominator = SCALE_NUMERATOR;
- }
-}
-
-static void resize_context_buffers(AV1_COMMON *cm, int width, int height) {
-#if CONFIG_SIZE_LIMIT
- if (width > DECODE_WIDTH_LIMIT || height > DECODE_HEIGHT_LIMIT)
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
- "Dimensions of %dx%d beyond allowed size of %dx%d.",
- width, height, DECODE_WIDTH_LIMIT, DECODE_HEIGHT_LIMIT);
-#endif
- if (cm->width != width || cm->height != height) {
- const int new_mi_rows =
- ALIGN_POWER_OF_TWO(height, MI_SIZE_LOG2) >> MI_SIZE_LOG2;
- const int new_mi_cols =
- ALIGN_POWER_OF_TWO(width, MI_SIZE_LOG2) >> MI_SIZE_LOG2;
-
- // Allocations in av1_alloc_context_buffers() depend on individual
- // dimensions as well as the overall size.
- if (new_mi_cols > cm->mi_cols || new_mi_rows > cm->mi_rows) {
- if (av1_alloc_context_buffers(cm, width, height)) {
- // The cm->mi_* values have been cleared and any existing context
- // buffers have been freed. Clear cm->width and cm->height to be
- // consistent and to force a realloc next time.
- cm->width = 0;
- cm->height = 0;
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
- "Failed to allocate context buffers");
- }
- } else {
- av1_set_mb_mi(cm, width, height);
- }
- av1_init_context_buffers(cm);
- cm->width = width;
- cm->height = height;
- }
-
- ensure_mv_buffer(cm->cur_frame, cm);
- cm->cur_frame->width = cm->width;
- cm->cur_frame->height = cm->height;
-}
-
-static void setup_buffer_pool(AV1_COMMON *cm) {
- BufferPool *const pool = cm->buffer_pool;
- const SequenceHeader *const seq_params = &cm->seq_params;
-
- lock_buffer_pool(pool);
- if (aom_realloc_frame_buffer(
- get_frame_new_buffer(cm), cm->width, cm->height,
- seq_params->subsampling_x, seq_params->subsampling_y,
- seq_params->use_highbitdepth, AOM_BORDER_IN_PIXELS,
- cm->byte_alignment,
- &pool->frame_bufs[cm->new_fb_idx].raw_frame_buffer, pool->get_fb_cb,
- pool->cb_priv)) {
- unlock_buffer_pool(pool);
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
- "Failed to allocate frame buffer");
- }
- unlock_buffer_pool(pool);
-
- pool->frame_bufs[cm->new_fb_idx].buf.subsampling_x =
- seq_params->subsampling_x;
- pool->frame_bufs[cm->new_fb_idx].buf.subsampling_y =
- seq_params->subsampling_y;
- pool->frame_bufs[cm->new_fb_idx].buf.bit_depth =
- (unsigned int)seq_params->bit_depth;
- pool->frame_bufs[cm->new_fb_idx].buf.color_primaries =
- seq_params->color_primaries;
- pool->frame_bufs[cm->new_fb_idx].buf.transfer_characteristics =
- seq_params->transfer_characteristics;
- pool->frame_bufs[cm->new_fb_idx].buf.matrix_coefficients =
- seq_params->matrix_coefficients;
- pool->frame_bufs[cm->new_fb_idx].buf.monochrome = seq_params->monochrome;
- pool->frame_bufs[cm->new_fb_idx].buf.chroma_sample_position =
- seq_params->chroma_sample_position;
- pool->frame_bufs[cm->new_fb_idx].buf.color_range = seq_params->color_range;
- pool->frame_bufs[cm->new_fb_idx].buf.render_width = cm->render_width;
- pool->frame_bufs[cm->new_fb_idx].buf.render_height = cm->render_height;
-}
-
-static void setup_frame_size(AV1_COMMON *cm, int frame_size_override_flag,
- struct aom_read_bit_buffer *rb) {
- const SequenceHeader *const seq_params = &cm->seq_params;
- int width, height;
-
- if (frame_size_override_flag) {
- int num_bits_width = seq_params->num_bits_width;
- int num_bits_height = seq_params->num_bits_height;
- av1_read_frame_size(rb, num_bits_width, num_bits_height, &width, &height);
- if (width > seq_params->max_frame_width ||
- height > seq_params->max_frame_height) {
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
- "Frame dimensions are larger than the maximum values");
- }
- } else {
- width = seq_params->max_frame_width;
- height = seq_params->max_frame_height;
- }
-
- setup_superres(cm, rb, &width, &height);
- resize_context_buffers(cm, width, height);
- setup_render_size(cm, rb);
- setup_buffer_pool(cm);
-}
-
-static void setup_sb_size(SequenceHeader *seq_params,
- struct aom_read_bit_buffer *rb) {
- set_sb_size(seq_params, aom_rb_read_bit(rb) ? BLOCK_128X128 : BLOCK_64X64);
-}
-
-static INLINE int valid_ref_frame_img_fmt(aom_bit_depth_t ref_bit_depth,
- int ref_xss, int ref_yss,
- aom_bit_depth_t this_bit_depth,
- int this_xss, int this_yss) {
- return ref_bit_depth == this_bit_depth && ref_xss == this_xss &&
- ref_yss == this_yss;
-}
-
-static void setup_frame_size_with_refs(AV1_COMMON *cm,
- struct aom_read_bit_buffer *rb) {
- int width, height;
- int found = 0;
- int has_valid_ref_frame = 0;
- for (int i = 0; i < INTER_REFS_PER_FRAME; ++i) {
- if (aom_rb_read_bit(rb)) {
- YV12_BUFFER_CONFIG *const buf = cm->frame_refs[i].buf;
- width = buf->y_crop_width;
- height = buf->y_crop_height;
- cm->render_width = buf->render_width;
- cm->render_height = buf->render_height;
- setup_superres(cm, rb, &width, &height);
- resize_context_buffers(cm, width, height);
- found = 1;
- break;
- }
- }
-
- const SequenceHeader *const seq_params = &cm->seq_params;
- if (!found) {
- int num_bits_width = seq_params->num_bits_width;
- int num_bits_height = seq_params->num_bits_height;
-
- av1_read_frame_size(rb, num_bits_width, num_bits_height, &width, &height);
- setup_superres(cm, rb, &width, &height);
- resize_context_buffers(cm, width, height);
- setup_render_size(cm, rb);
- }
-
- if (width <= 0 || height <= 0)
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
- "Invalid frame size");
-
- // Check to make sure at least one of frames that this frame references
- // has valid dimensions.
- for (int i = 0; i < INTER_REFS_PER_FRAME; ++i) {
- RefBuffer *const ref_frame = &cm->frame_refs[i];
- has_valid_ref_frame |=
- valid_ref_frame_size(ref_frame->buf->y_crop_width,
- ref_frame->buf->y_crop_height, width, height);
- }
- if (!has_valid_ref_frame)
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
- "Referenced frame has invalid size");
- for (int i = 0; i < INTER_REFS_PER_FRAME; ++i) {
- RefBuffer *const ref_frame = &cm->frame_refs[i];
- if (!valid_ref_frame_img_fmt(
- ref_frame->buf->bit_depth, ref_frame->buf->subsampling_x,
- ref_frame->buf->subsampling_y, seq_params->bit_depth,
- seq_params->subsampling_x, seq_params->subsampling_y))
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
- "Referenced frame has incompatible color format");
- }
- setup_buffer_pool(cm);
-}
-
-// Same function as av1_read_uniform but reading from uncompresses header wb
-static int rb_read_uniform(struct aom_read_bit_buffer *const rb, int n) {
- const int l = get_unsigned_bits(n);
- const int m = (1 << l) - n;
- const int v = aom_rb_read_literal(rb, l - 1);
- assert(l != 0);
- if (v < m)
- return v;
- else
- return (v << 1) - m + aom_rb_read_bit(rb);
-}
-
-static void read_tile_info_max_tile(AV1_COMMON *const cm,
- struct aom_read_bit_buffer *const rb) {
- int width_mi = ALIGN_POWER_OF_TWO(cm->mi_cols, cm->seq_params.mib_size_log2);
- int height_mi = ALIGN_POWER_OF_TWO(cm->mi_rows, cm->seq_params.mib_size_log2);
- int width_sb = width_mi >> cm->seq_params.mib_size_log2;
- int height_sb = height_mi >> cm->seq_params.mib_size_log2;
-
- av1_get_tile_limits(cm);
- cm->uniform_tile_spacing_flag = aom_rb_read_bit(rb);
-
- // Read tile columns
- if (cm->uniform_tile_spacing_flag) {
- cm->log2_tile_cols = cm->min_log2_tile_cols;
- while (cm->log2_tile_cols < cm->max_log2_tile_cols) {
- if (!aom_rb_read_bit(rb)) {
- break;
- }
- cm->log2_tile_cols++;
- }
- } else {
- int i;
- int start_sb;
- for (i = 0, start_sb = 0; width_sb > 0 && i < MAX_TILE_COLS; i++) {
- const int size_sb =
- 1 + rb_read_uniform(rb, AOMMIN(width_sb, cm->max_tile_width_sb));
- cm->tile_col_start_sb[i] = start_sb;
- start_sb += size_sb;
- width_sb -= size_sb;
- }
- cm->tile_cols = i;
- cm->tile_col_start_sb[i] = start_sb + width_sb;
- }
- av1_calculate_tile_cols(cm);
-
- // Read tile rows
- if (cm->uniform_tile_spacing_flag) {
- cm->log2_tile_rows = cm->min_log2_tile_rows;
- while (cm->log2_tile_rows < cm->max_log2_tile_rows) {
- if (!aom_rb_read_bit(rb)) {
- break;
- }
- cm->log2_tile_rows++;
- }
- } else {
- int i;
- int start_sb;
- for (i = 0, start_sb = 0; height_sb > 0 && i < MAX_TILE_ROWS; i++) {
- const int size_sb =
- 1 + rb_read_uniform(rb, AOMMIN(height_sb, cm->max_tile_height_sb));
- cm->tile_row_start_sb[i] = start_sb;
- start_sb += size_sb;
- height_sb -= size_sb;
- }
- cm->tile_rows = i;
- cm->tile_row_start_sb[i] = start_sb + height_sb;
- }
- av1_calculate_tile_rows(cm);
-}
-
-void av1_set_single_tile_decoding_mode(AV1_COMMON *const cm) {
- cm->single_tile_decoding = 0;
- if (cm->large_scale_tile) {
- struct loopfilter *lf = &cm->lf;
-
- // Figure out single_tile_decoding by loopfilter_level.
- const int no_loopfilter = !(lf->filter_level[0] || lf->filter_level[1]);
- const int no_cdef = cm->cdef_bits == 0 && cm->cdef_strengths[0] == 0 &&
- cm->cdef_uv_strengths[0] == 0;
- const int no_restoration =
- cm->rst_info[0].frame_restoration_type == RESTORE_NONE &&
- cm->rst_info[1].frame_restoration_type == RESTORE_NONE &&
- cm->rst_info[2].frame_restoration_type == RESTORE_NONE;
- assert(IMPLIES(cm->coded_lossless, no_loopfilter && no_cdef));
- assert(IMPLIES(cm->all_lossless, no_restoration));
- cm->single_tile_decoding = no_loopfilter && no_cdef && no_restoration;
- }
-}
-
-static void read_tile_info(AV1Decoder *const pbi,
- struct aom_read_bit_buffer *const rb) {
- AV1_COMMON *const cm = &pbi->common;
-
- read_tile_info_max_tile(cm, rb);
-
- cm->context_update_tile_id = 0;
- if (cm->tile_rows * cm->tile_cols > 1) {
- // tile to use for cdf update
- cm->context_update_tile_id =
- aom_rb_read_literal(rb, cm->log2_tile_rows + cm->log2_tile_cols);
- if (cm->context_update_tile_id >= cm->tile_rows * cm->tile_cols) {
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
- "Invalid context_update_tile_id");
- }
- // tile size magnitude
- pbi->tile_size_bytes = aom_rb_read_literal(rb, 2) + 1;
- }
-}
-
-#if EXT_TILE_DEBUG
-static void read_ext_tile_info(AV1Decoder *const pbi,
- struct aom_read_bit_buffer *const rb) {
- AV1_COMMON *const cm = &pbi->common;
-
- // This information is stored as a separate byte.
- int mod = rb->bit_offset % CHAR_BIT;
- if (mod > 0) aom_rb_read_literal(rb, CHAR_BIT - mod);
- assert(rb->bit_offset % CHAR_BIT == 0);
-
- if (cm->tile_cols * cm->tile_rows > 1) {
- // Read the number of bytes used to store tile size
- pbi->tile_col_size_bytes = aom_rb_read_literal(rb, 2) + 1;
- pbi->tile_size_bytes = aom_rb_read_literal(rb, 2) + 1;
- }
-}
-#endif // EXT_TILE_DEBUG
-
-static size_t mem_get_varsize(const uint8_t *src, int sz) {
- switch (sz) {
- case 1: return src[0];
- case 2: return mem_get_le16(src);
- case 3: return mem_get_le24(src);
- case 4: return mem_get_le32(src);
- default: assert(0 && "Invalid size"); return -1;
- }
-}
-
-#if EXT_TILE_DEBUG
-// Reads the next tile returning its size and adjusting '*data' accordingly
-// based on 'is_last'. On return, '*data' is updated to point to the end of the
-// raw tile buffer in the bit stream.
-static void get_ls_tile_buffer(
- const uint8_t *const data_end, struct aom_internal_error_info *error_info,
- const uint8_t **data, TileBufferDec (*const tile_buffers)[MAX_TILE_COLS],
- int tile_size_bytes, int col, int row, int tile_copy_mode) {
- size_t size;
-
- size_t copy_size = 0;
- const uint8_t *copy_data = NULL;
-
- if (!read_is_valid(*data, tile_size_bytes, data_end))
- aom_internal_error(error_info, AOM_CODEC_CORRUPT_FRAME,
- "Truncated packet or corrupt tile length");
- size = mem_get_varsize(*data, tile_size_bytes);
-
- // If tile_copy_mode = 1, then the top bit of the tile header indicates copy
- // mode.
- if (tile_copy_mode && (size >> (tile_size_bytes * 8 - 1)) == 1) {
- // The remaining bits in the top byte signal the row offset
- int offset = (size >> (tile_size_bytes - 1) * 8) & 0x7f;
-
- // Currently, only use tiles in same column as reference tiles.
- copy_data = tile_buffers[row - offset][col].data;
- copy_size = tile_buffers[row - offset][col].size;
- size = 0;
- } else {
- size += AV1_MIN_TILE_SIZE_BYTES;
- }
-
- *data += tile_size_bytes;
-
- if (size > (size_t)(data_end - *data))
- aom_internal_error(error_info, AOM_CODEC_CORRUPT_FRAME,
- "Truncated packet or corrupt tile size");
-
- if (size > 0) {
- tile_buffers[row][col].data = *data;
- tile_buffers[row][col].size = size;
- } else {
- tile_buffers[row][col].data = copy_data;
- tile_buffers[row][col].size = copy_size;
- }
-
- *data += size;
-}
-
-// Returns the end of the last tile buffer
-// (tile_buffers[cm->tile_rows - 1][cm->tile_cols - 1]).
-static const uint8_t *get_ls_tile_buffers(
- AV1Decoder *pbi, const uint8_t *data, const uint8_t *data_end,
- TileBufferDec (*const tile_buffers)[MAX_TILE_COLS]) {
- AV1_COMMON *const cm = &pbi->common;
- const int tile_cols = cm->tile_cols;
- const int tile_rows = cm->tile_rows;
- const int have_tiles = tile_cols * tile_rows > 1;
- const uint8_t *raw_data_end; // The end of the last tile buffer
-
- if (!have_tiles) {
- const size_t tile_size = data_end - data;
- tile_buffers[0][0].data = data;
- tile_buffers[0][0].size = tile_size;
- raw_data_end = NULL;
- } else {
- // We locate only the tile buffers that are required, which are the ones
- // specified by pbi->dec_tile_col and pbi->dec_tile_row. Also, we always
- // need the last (bottom right) tile buffer, as we need to know where the
- // end of the compressed frame buffer is for proper superframe decoding.
-
- const uint8_t *tile_col_data_end[MAX_TILE_COLS] = { NULL };
- const uint8_t *const data_start = data;
-
- const int dec_tile_row = AOMMIN(pbi->dec_tile_row, tile_rows);
- const int single_row = pbi->dec_tile_row >= 0;
- const int tile_rows_start = single_row ? dec_tile_row : 0;
- const int tile_rows_end = single_row ? tile_rows_start + 1 : tile_rows;
- const int dec_tile_col = AOMMIN(pbi->dec_tile_col, tile_cols);
- const int single_col = pbi->dec_tile_col >= 0;
- const int tile_cols_start = single_col ? dec_tile_col : 0;
- const int tile_cols_end = single_col ? tile_cols_start + 1 : tile_cols;
-
- const int tile_col_size_bytes = pbi->tile_col_size_bytes;
- const int tile_size_bytes = pbi->tile_size_bytes;
- const int tile_copy_mode =
- ((AOMMAX(cm->tile_width, cm->tile_height) << MI_SIZE_LOG2) <= 256) ? 1
- : 0;
- // Read tile column sizes for all columns (we need the last tile buffer)
- for (int c = 0; c < tile_cols; ++c) {
- const int is_last = c == tile_cols - 1;
- size_t tile_col_size;
-
- if (!is_last) {
- tile_col_size = mem_get_varsize(data, tile_col_size_bytes);
- data += tile_col_size_bytes;
- tile_col_data_end[c] = data + tile_col_size;
- } else {
- tile_col_size = data_end - data;
- tile_col_data_end[c] = data_end;
- }
- data += tile_col_size;
- }
-
- data = data_start;
-
- // Read the required tile sizes.
- for (int c = tile_cols_start; c < tile_cols_end; ++c) {
- const int is_last = c == tile_cols - 1;
-
- if (c > 0) data = tile_col_data_end[c - 1];
-
- if (!is_last) data += tile_col_size_bytes;
-
- // Get the whole of the last column, otherwise stop at the required tile.
- for (int r = 0; r < (is_last ? tile_rows : tile_rows_end); ++r) {
- get_ls_tile_buffer(tile_col_data_end[c], &pbi->common.error, &data,
- tile_buffers, tile_size_bytes, c, r, tile_copy_mode);
- }
- }
-
- // If we have not read the last column, then read it to get the last tile.
- if (tile_cols_end != tile_cols) {
- const int c = tile_cols - 1;
-
- data = tile_col_data_end[c - 1];
-
- for (int r = 0; r < tile_rows; ++r) {
- get_ls_tile_buffer(tile_col_data_end[c], &pbi->common.error, &data,
- tile_buffers, tile_size_bytes, c, r, tile_copy_mode);
- }
- }
- raw_data_end = data;
- }
- return raw_data_end;
-}
-#endif // EXT_TILE_DEBUG
-
-static const uint8_t *get_ls_single_tile_buffer(
- AV1Decoder *pbi, const uint8_t *data,
- TileBufferDec (*const tile_buffers)[MAX_TILE_COLS]) {
- assert(pbi->dec_tile_row >= 0 && pbi->dec_tile_col >= 0);
- tile_buffers[pbi->dec_tile_row][pbi->dec_tile_col].data = data;
- tile_buffers[pbi->dec_tile_row][pbi->dec_tile_col].size =
- (size_t)pbi->coded_tile_data_size;
- return data + pbi->coded_tile_data_size;
-}
-
-// Reads the next tile returning its size and adjusting '*data' accordingly
-// based on 'is_last'.
-static void get_tile_buffer(const uint8_t *const data_end,
- const int tile_size_bytes, int is_last,
- struct aom_internal_error_info *error_info,
- const uint8_t **data, TileBufferDec *const buf) {
- size_t size;
-
- if (!is_last) {
- if (!read_is_valid(*data, tile_size_bytes, data_end))
- aom_internal_error(error_info, AOM_CODEC_CORRUPT_FRAME,
- "Truncated packet or corrupt tile length");
-
- size = mem_get_varsize(*data, tile_size_bytes) + AV1_MIN_TILE_SIZE_BYTES;
- *data += tile_size_bytes;
-
- if (size > (size_t)(data_end - *data))
- aom_internal_error(error_info, AOM_CODEC_CORRUPT_FRAME,
- "Truncated packet or corrupt tile size");
- } else {
- size = data_end - *data;
- }
-
- buf->data = *data;
- buf->size = size;
-
- *data += size;
-}
-
-static void get_tile_buffers(AV1Decoder *pbi, const uint8_t *data,
- const uint8_t *data_end,
- TileBufferDec (*const tile_buffers)[MAX_TILE_COLS],
- int start_tile, int end_tile) {
- AV1_COMMON *const cm = &pbi->common;
- const int tile_cols = cm->tile_cols;
- const int tile_rows = cm->tile_rows;
- int tc = 0;
- int first_tile_in_tg = 0;
-
- for (int r = 0; r < tile_rows; ++r) {
- for (int c = 0; c < tile_cols; ++c, ++tc) {
- TileBufferDec *const buf = &tile_buffers[r][c];
-
- const int is_last = (tc == end_tile);
- const size_t hdr_offset = 0;
-
- if (tc < start_tile || tc > end_tile) continue;
-
- if (data + hdr_offset >= data_end)
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
- "Data ended before all tiles were read.");
- first_tile_in_tg += tc == first_tile_in_tg ? pbi->tg_size : 0;
- data += hdr_offset;
- get_tile_buffer(data_end, pbi->tile_size_bytes, is_last,
- &pbi->common.error, &data, buf);
- }
- }
-}
-
-static void set_cb_buffer(AV1Decoder *pbi, MACROBLOCKD *const xd,
- CB_BUFFER *cb_buffer_base, const int num_planes,
- int mi_row, int mi_col) {
- AV1_COMMON *const cm = &pbi->common;
- int mib_size_log2 = cm->seq_params.mib_size_log2;
- int stride = (cm->mi_cols >> mib_size_log2) + 1;
- int offset = (mi_row >> mib_size_log2) * stride + (mi_col >> mib_size_log2);
- CB_BUFFER *cb_buffer = cb_buffer_base + offset;
-
- for (int plane = 0; plane < num_planes; ++plane) {
- xd->plane[plane].dqcoeff_block = cb_buffer->dqcoeff[plane];
- xd->plane[plane].eob_data = cb_buffer->eob_data[plane];
- xd->cb_offset[plane] = 0;
- xd->txb_offset[plane] = 0;
- }
- xd->plane[0].color_index_map = cb_buffer->color_index_map[0];
- xd->plane[1].color_index_map = cb_buffer->color_index_map[1];
- xd->color_index_map_offset[0] = 0;
- xd->color_index_map_offset[1] = 0;
-}
-
-static void decoder_alloc_tile_data(AV1Decoder *pbi, const int n_tiles) {
- AV1_COMMON *const cm = &pbi->common;
- aom_free(pbi->tile_data);
- CHECK_MEM_ERROR(cm, pbi->tile_data,
- aom_memalign(32, n_tiles * sizeof(*pbi->tile_data)));
- pbi->allocated_tiles = n_tiles;
- for (int i = 0; i < n_tiles; i++) {
- TileDataDec *const tile_data = pbi->tile_data + i;
- av1_zero(tile_data->dec_row_mt_sync);
- }
- pbi->allocated_row_mt_sync_rows = 0;
-}
-
-// Set up nsync by width.
-static INLINE int get_sync_range(int width) {
-// nsync numbers are picked by testing.
-#if 0
- if (width < 640)
- return 1;
- else if (width <= 1280)
- return 2;
- else if (width <= 4096)
- return 4;
- else
- return 8;
-#else
- (void)width;
-#endif
- return 1;
-}
-
-// Allocate memory for decoder row synchronization
-static void dec_row_mt_alloc(AV1DecRowMTSync *dec_row_mt_sync, AV1_COMMON *cm,
- int rows) {
- dec_row_mt_sync->allocated_sb_rows = rows;
-#if CONFIG_MULTITHREAD
- {
- int i;
-
- CHECK_MEM_ERROR(cm, dec_row_mt_sync->mutex_,
- aom_malloc(sizeof(*(dec_row_mt_sync->mutex_)) * rows));
- if (dec_row_mt_sync->mutex_) {
- for (i = 0; i < rows; ++i) {
- pthread_mutex_init(&dec_row_mt_sync->mutex_[i], NULL);
- }
- }
-
- CHECK_MEM_ERROR(cm, dec_row_mt_sync->cond_,
- aom_malloc(sizeof(*(dec_row_mt_sync->cond_)) * rows));
- if (dec_row_mt_sync->cond_) {
- for (i = 0; i < rows; ++i) {
- pthread_cond_init(&dec_row_mt_sync->cond_[i], NULL);
- }
- }
- }
-#endif // CONFIG_MULTITHREAD
-
- CHECK_MEM_ERROR(cm, dec_row_mt_sync->cur_sb_col,
- aom_malloc(sizeof(*(dec_row_mt_sync->cur_sb_col)) * rows));
-
- // Set up nsync.
- dec_row_mt_sync->sync_range = get_sync_range(cm->width);
-}
-
-// Deallocate decoder row synchronization related mutex and data
-void av1_dec_row_mt_dealloc(AV1DecRowMTSync *dec_row_mt_sync) {
- if (dec_row_mt_sync != NULL) {
-#if CONFIG_MULTITHREAD
- int i;
- if (dec_row_mt_sync->mutex_ != NULL) {
- for (i = 0; i < dec_row_mt_sync->allocated_sb_rows; ++i) {
- pthread_mutex_destroy(&dec_row_mt_sync->mutex_[i]);
- }
- aom_free(dec_row_mt_sync->mutex_);
- }
- if (dec_row_mt_sync->cond_ != NULL) {
- for (i = 0; i < dec_row_mt_sync->allocated_sb_rows; ++i) {
- pthread_cond_destroy(&dec_row_mt_sync->cond_[i]);
- }
- aom_free(dec_row_mt_sync->cond_);
- }
-#endif // CONFIG_MULTITHREAD
- aom_free(dec_row_mt_sync->cur_sb_col);
-
- // clear the structure as the source of this call may be a resize in which
- // case this call will be followed by an _alloc() which may fail.
- av1_zero(*dec_row_mt_sync);
- }
-}
-
-static INLINE void sync_read(AV1DecRowMTSync *const dec_row_mt_sync, int r,
- int c) {
-#if CONFIG_MULTITHREAD
- const int nsync = dec_row_mt_sync->sync_range;
-
- if (r && !(c & (nsync - 1))) {
- pthread_mutex_t *const mutex = &dec_row_mt_sync->mutex_[r - 1];
- pthread_mutex_lock(mutex);
-
- while (c > dec_row_mt_sync->cur_sb_col[r - 1] - nsync) {
- pthread_cond_wait(&dec_row_mt_sync->cond_[r - 1], mutex);
- }
- pthread_mutex_unlock(mutex);
- }
-#else
- (void)dec_row_mt_sync;
- (void)r;
- (void)c;
-#endif // CONFIG_MULTITHREAD
-}
-
-static INLINE void sync_write(AV1DecRowMTSync *const dec_row_mt_sync, int r,
- int c, const int sb_cols) {
-#if CONFIG_MULTITHREAD
- const int nsync = dec_row_mt_sync->sync_range;
- int cur;
- int sig = 1;
-
- if (c < sb_cols - 1) {
- cur = c;
- if (c % nsync) sig = 0;
- } else {
- cur = sb_cols + nsync;
- }
-
- if (sig) {
- pthread_mutex_lock(&dec_row_mt_sync->mutex_[r]);
-
- dec_row_mt_sync->cur_sb_col[r] = cur;
-
- pthread_cond_signal(&dec_row_mt_sync->cond_[r]);
- pthread_mutex_unlock(&dec_row_mt_sync->mutex_[r]);
- }
-#else
- (void)dec_row_mt_sync;
- (void)r;
- (void)c;
- (void)sb_cols;
-#endif // CONFIG_MULTITHREAD
-}
-
-static void decode_tile_sb_row(AV1Decoder *pbi, ThreadData *const td,
- TileInfo tile_info, const int mi_row) {
- AV1_COMMON *const cm = &pbi->common;
- const int num_planes = av1_num_planes(cm);
- TileDataDec *const tile_data =
- pbi->tile_data + tile_info.tile_row * cm->tile_cols + tile_info.tile_col;
- const int sb_cols_in_tile = av1_get_sb_cols_in_tile(cm, tile_info);
- const int sb_row_in_tile =
- (mi_row - tile_info.mi_row_start) >> cm->seq_params.mib_size_log2;
- int sb_col_in_tile = 0;
-
- for (int mi_col = tile_info.mi_col_start; mi_col < tile_info.mi_col_end;
- mi_col += cm->seq_params.mib_size, sb_col_in_tile++) {
- set_cb_buffer(pbi, &td->xd, pbi->cb_buffer_base, num_planes, mi_row,
- mi_col);
-
- sync_read(&tile_data->dec_row_mt_sync, sb_row_in_tile, sb_col_in_tile);
-
- // Decoding of the super-block
- decode_partition(pbi, td, mi_row, mi_col, td->bit_reader,
- cm->seq_params.sb_size, 0x2);
-
- sync_write(&tile_data->dec_row_mt_sync, sb_row_in_tile, sb_col_in_tile,
- sb_cols_in_tile);
- }
-}
-
-static int check_trailing_bits_after_symbol_coder(aom_reader *r) {
- if (aom_reader_has_overflowed(r)) return -1;
-
- uint32_t nb_bits = aom_reader_tell(r);
- uint32_t nb_bytes = (nb_bits + 7) >> 3;
- const uint8_t *p = aom_reader_find_begin(r) + nb_bytes;
-
- // aom_reader_tell() returns 1 for a newly initialized decoder, and the
- // return value only increases as values are decoded. So nb_bits > 0, and
- // thus p > p_begin. Therefore accessing p[-1] is safe.
- uint8_t last_byte = p[-1];
- uint8_t pattern = 128 >> ((nb_bits - 1) & 7);
- if ((last_byte & (2 * pattern - 1)) != pattern) return -1;
-
- // Make sure that all padding bytes are zero as required by the spec.
- const uint8_t *p_end = aom_reader_find_end(r);
- while (p < p_end) {
- if (*p != 0) return -1;
- p++;
- }
- return 0;
-}
-
-static void set_decode_func_pointers(ThreadData *td, int parse_decode_flag) {
- td->read_coeffs_tx_intra_block_visit = decode_block_void;
- td->predict_and_recon_intra_block_visit = decode_block_void;
- td->read_coeffs_tx_inter_block_visit = decode_block_void;
- td->inverse_tx_inter_block_visit = decode_block_void;
- td->predict_inter_block_visit = predict_inter_block_void;
- td->cfl_store_inter_block_visit = cfl_store_inter_block_void;
-
- if (parse_decode_flag & 0x1) {
- td->read_coeffs_tx_intra_block_visit = read_coeffs_tx_intra_block;
- td->read_coeffs_tx_inter_block_visit = av1_read_coeffs_txb_facade;
- }
- if (parse_decode_flag & 0x2) {
- td->predict_and_recon_intra_block_visit =
- predict_and_reconstruct_intra_block;
- td->inverse_tx_inter_block_visit = inverse_transform_inter_block;
- td->predict_inter_block_visit = predict_inter_block;
- td->cfl_store_inter_block_visit = cfl_store_inter_block;
- }
-}
-
-static void decode_tile(AV1Decoder *pbi, ThreadData *const td, int tile_row,
- int tile_col) {
- TileInfo tile_info;
-
- AV1_COMMON *const cm = &pbi->common;
- const int num_planes = av1_num_planes(cm);
-
- av1_tile_set_row(&tile_info, cm, tile_row);
- av1_tile_set_col(&tile_info, cm, tile_col);
- av1_zero_above_context(cm, &td->xd, tile_info.mi_col_start,
- tile_info.mi_col_end, tile_row);
- av1_reset_loop_filter_delta(&td->xd, num_planes);
- av1_reset_loop_restoration(&td->xd, num_planes);
-
- for (int mi_row = tile_info.mi_row_start; mi_row < tile_info.mi_row_end;
- mi_row += cm->seq_params.mib_size) {
- av1_zero_left_context(&td->xd);
-
- for (int mi_col = tile_info.mi_col_start; mi_col < tile_info.mi_col_end;
- mi_col += cm->seq_params.mib_size) {
- set_cb_buffer(pbi, &td->xd, &td->cb_buffer_base, num_planes, 0, 0);
-
- // Bit-stream parsing and decoding of the superblock
- decode_partition(pbi, td, mi_row, mi_col, td->bit_reader,
- cm->seq_params.sb_size, 0x3);
-
- if (aom_reader_has_overflowed(td->bit_reader)) {
- aom_merge_corrupted_flag(&td->xd.corrupted, 1);
- return;
- }
- }
- }
-
- int corrupted =
- (check_trailing_bits_after_symbol_coder(td->bit_reader)) ? 1 : 0;
- aom_merge_corrupted_flag(&td->xd.corrupted, corrupted);
-}
-
-static const uint8_t *decode_tiles(AV1Decoder *pbi, const uint8_t *data,
- const uint8_t *data_end, int start_tile,
- int end_tile) {
- AV1_COMMON *const cm = &pbi->common;
- ThreadData *const td = &pbi->td;
- const int tile_cols = cm->tile_cols;
- const int tile_rows = cm->tile_rows;
- const int n_tiles = tile_cols * tile_rows;
- TileBufferDec(*const tile_buffers)[MAX_TILE_COLS] = pbi->tile_buffers;
- const int dec_tile_row = AOMMIN(pbi->dec_tile_row, tile_rows);
- const int single_row = pbi->dec_tile_row >= 0;
- const int dec_tile_col = AOMMIN(pbi->dec_tile_col, tile_cols);
- const int single_col = pbi->dec_tile_col >= 0;
- int tile_rows_start;
- int tile_rows_end;
- int tile_cols_start;
- int tile_cols_end;
- int inv_col_order;
- int inv_row_order;
- int tile_row, tile_col;
- uint8_t allow_update_cdf;
- const uint8_t *raw_data_end = NULL;
-
- if (cm->large_scale_tile) {
- tile_rows_start = single_row ? dec_tile_row : 0;
- tile_rows_end = single_row ? dec_tile_row + 1 : tile_rows;
- tile_cols_start = single_col ? dec_tile_col : 0;
- tile_cols_end = single_col ? tile_cols_start + 1 : tile_cols;
- inv_col_order = pbi->inv_tile_order && !single_col;
- inv_row_order = pbi->inv_tile_order && !single_row;
- allow_update_cdf = 0;
- } else {
- tile_rows_start = 0;
- tile_rows_end = tile_rows;
- tile_cols_start = 0;
- tile_cols_end = tile_cols;
- inv_col_order = pbi->inv_tile_order;
- inv_row_order = pbi->inv_tile_order;
- allow_update_cdf = 1;
- }
-
- // No tiles to decode.
- if (tile_rows_end <= tile_rows_start || tile_cols_end <= tile_cols_start ||
- // First tile is larger than end_tile.
- tile_rows_start * cm->tile_cols + tile_cols_start > end_tile ||
- // Last tile is smaller than start_tile.
- (tile_rows_end - 1) * cm->tile_cols + tile_cols_end - 1 < start_tile)
- return data;
-
- allow_update_cdf = allow_update_cdf && !cm->disable_cdf_update;
-
- assert(tile_rows <= MAX_TILE_ROWS);
- assert(tile_cols <= MAX_TILE_COLS);
-
-#if EXT_TILE_DEBUG
- if (cm->large_scale_tile && !pbi->ext_tile_debug)
- raw_data_end = get_ls_single_tile_buffer(pbi, data, tile_buffers);
- else if (cm->large_scale_tile && pbi->ext_tile_debug)
- raw_data_end = get_ls_tile_buffers(pbi, data, data_end, tile_buffers);
- else
-#endif // EXT_TILE_DEBUG
- get_tile_buffers(pbi, data, data_end, tile_buffers, start_tile, end_tile);
-
- if (pbi->tile_data == NULL || n_tiles != pbi->allocated_tiles) {
- decoder_alloc_tile_data(pbi, n_tiles);
- }
-#if CONFIG_ACCOUNTING
- if (pbi->acct_enabled) {
- aom_accounting_reset(&pbi->accounting);
- }
-#endif
-
- set_decode_func_pointers(&pbi->td, 0x3);
-
- // Load all tile information into thread_data.
- td->xd = pbi->mb;
- td->xd.corrupted = 0;
- td->xd.mc_buf[0] = td->mc_buf[0];
- td->xd.mc_buf[1] = td->mc_buf[1];
- td->xd.tmp_conv_dst = td->tmp_conv_dst;
- for (int j = 0; j < 2; ++j) {
- td->xd.tmp_obmc_bufs[j] = td->tmp_obmc_bufs[j];
- }
-
- for (tile_row = tile_rows_start; tile_row < tile_rows_end; ++tile_row) {
- const int row = inv_row_order ? tile_rows - 1 - tile_row : tile_row;
-
- for (tile_col = tile_cols_start; tile_col < tile_cols_end; ++tile_col) {
- const int col = inv_col_order ? tile_cols - 1 - tile_col : tile_col;
- TileDataDec *const tile_data = pbi->tile_data + row * cm->tile_cols + col;
- const TileBufferDec *const tile_bs_buf = &tile_buffers[row][col];
-
- if (row * cm->tile_cols + col < start_tile ||
- row * cm->tile_cols + col > end_tile)
- continue;
-
- td->bit_reader = &tile_data->bit_reader;
- av1_zero(td->dqcoeff);
- av1_tile_init(&td->xd.tile, cm, row, col);
- td->xd.current_qindex = cm->base_qindex;
- setup_bool_decoder(tile_bs_buf->data, data_end, tile_bs_buf->size,
- &cm->error, td->bit_reader, allow_update_cdf);
-#if CONFIG_ACCOUNTING
- if (pbi->acct_enabled) {
- td->bit_reader->accounting = &pbi->accounting;
- td->bit_reader->accounting->last_tell_frac =
- aom_reader_tell_frac(td->bit_reader);
- } else {
- td->bit_reader->accounting = NULL;
- }
-#endif
- av1_init_macroblockd(cm, &td->xd, td->dqcoeff);
- av1_init_above_context(cm, &td->xd, row);
-
- // Initialise the tile context from the frame context
- tile_data->tctx = *cm->fc;
- td->xd.tile_ctx = &tile_data->tctx;
-
- // decode tile
- decode_tile(pbi, td, row, col);
- aom_merge_corrupted_flag(&pbi->mb.corrupted, td->xd.corrupted);
- if (pbi->mb.corrupted)
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
- "Failed to decode tile data");
- }
- }
-
- if (cm->large_scale_tile) {
- if (n_tiles == 1) {
- // Find the end of the single tile buffer
- return aom_reader_find_end(&pbi->tile_data->bit_reader);
- }
- // Return the end of the last tile buffer
- return raw_data_end;
- }
- TileDataDec *const tile_data = pbi->tile_data + end_tile;
-
- return aom_reader_find_end(&tile_data->bit_reader);
-}
-
-static TileJobsDec *get_dec_job_info(AV1DecTileMT *tile_mt_info) {
- TileJobsDec *cur_job_info = NULL;
-#if CONFIG_MULTITHREAD
- pthread_mutex_lock(tile_mt_info->job_mutex);
-
- if (tile_mt_info->jobs_dequeued < tile_mt_info->jobs_enqueued) {
- cur_job_info = tile_mt_info->job_queue + tile_mt_info->jobs_dequeued;
- tile_mt_info->jobs_dequeued++;
- }
-
- pthread_mutex_unlock(tile_mt_info->job_mutex);
-#else
- (void)tile_mt_info;
-#endif
- return cur_job_info;
-}
-
-static void tile_worker_hook_init(AV1Decoder *const pbi,
- DecWorkerData *const thread_data,
- const TileBufferDec *const tile_buffer,
- TileDataDec *const tile_data,
- uint8_t allow_update_cdf) {
- AV1_COMMON *cm = &pbi->common;
- ThreadData *const td = thread_data->td;
- int tile_row = tile_data->tile_info.tile_row;
- int tile_col = tile_data->tile_info.tile_col;
-
- td->bit_reader = &tile_data->bit_reader;
- av1_zero(td->dqcoeff);
- av1_tile_init(&td->xd.tile, cm, tile_row, tile_col);
- td->xd.current_qindex = cm->base_qindex;
- setup_bool_decoder(tile_buffer->data, thread_data->data_end,
- tile_buffer->size, &thread_data->error_info,
- td->bit_reader, allow_update_cdf);
-#if CONFIG_ACCOUNTING
- if (pbi->acct_enabled) {
- td->bit_reader->accounting = &pbi->accounting;
- td->bit_reader->accounting->last_tell_frac =
- aom_reader_tell_frac(td->bit_reader);
- } else {
- td->bit_reader->accounting = NULL;
- }
-#endif
- av1_init_macroblockd(cm, &td->xd, td->dqcoeff);
- td->xd.error_info = &thread_data->error_info;
- av1_init_above_context(cm, &td->xd, tile_row);
-
- // Initialise the tile context from the frame context
- tile_data->tctx = *cm->fc;
- td->xd.tile_ctx = &tile_data->tctx;
-#if CONFIG_ACCOUNTING
- if (pbi->acct_enabled) {
- tile_data->bit_reader.accounting->last_tell_frac =
- aom_reader_tell_frac(&tile_data->bit_reader);
- }
-#endif
-}
-
-static int tile_worker_hook(void *arg1, void *arg2) {
- DecWorkerData *const thread_data = (DecWorkerData *)arg1;
- AV1Decoder *const pbi = (AV1Decoder *)arg2;
- AV1_COMMON *cm = &pbi->common;
- ThreadData *const td = thread_data->td;
- uint8_t allow_update_cdf;
-
- // The jmp_buf is valid only for the duration of the function that calls
- // setjmp(). Therefore, this function must reset the 'setjmp' field to 0
- // before it returns.
- if (setjmp(thread_data->error_info.jmp)) {
- thread_data->error_info.setjmp = 0;
- thread_data->td->xd.corrupted = 1;
- return 0;
- }
- thread_data->error_info.setjmp = 1;
-
- allow_update_cdf = cm->large_scale_tile ? 0 : 1;
- allow_update_cdf = allow_update_cdf && !cm->disable_cdf_update;
-
- set_decode_func_pointers(td, 0x3);
-
- assert(cm->tile_cols > 0);
- while (1) {
- TileJobsDec *cur_job_info = get_dec_job_info(&pbi->tile_mt_info);
-
- if (cur_job_info != NULL && !td->xd.corrupted) {
- const TileBufferDec *const tile_buffer = cur_job_info->tile_buffer;
- TileDataDec *const tile_data = cur_job_info->tile_data;
- tile_worker_hook_init(pbi, thread_data, tile_buffer, tile_data,
- allow_update_cdf);
- // decode tile
- int tile_row = tile_data->tile_info.tile_row;
- int tile_col = tile_data->tile_info.tile_col;
- decode_tile(pbi, td, tile_row, tile_col);
- } else {
- break;
- }
- }
- thread_data->error_info.setjmp = 0;
- return !td->xd.corrupted;
-}
-
-static int get_next_job_info(AV1Decoder *const pbi,
- AV1DecRowMTJobInfo *next_job_info,
- int *end_of_frame) {
- AV1_COMMON *cm = &pbi->common;
- TileDataDec *tile_data;
- AV1DecRowMTSync *dec_row_mt_sync;
- AV1DecRowMTInfo *frame_row_mt_info = &pbi->frame_row_mt_info;
- TileInfo tile_info;
- const int tile_rows_start = frame_row_mt_info->tile_rows_start;
- const int tile_rows_end = frame_row_mt_info->tile_rows_end;
- const int tile_cols_start = frame_row_mt_info->tile_cols_start;
- const int tile_cols_end = frame_row_mt_info->tile_cols_end;
- const int start_tile = frame_row_mt_info->start_tile;
- const int end_tile = frame_row_mt_info->end_tile;
- const int sb_mi_size = mi_size_wide[cm->seq_params.sb_size];
- int num_mis_to_decode, num_threads_working;
- int num_mis_waiting_for_decode;
- int min_threads_working = INT_MAX;
- int max_mis_to_decode = 0;
- int tile_row_idx, tile_col_idx;
- int tile_row = 0;
- int tile_col = 0;
-
- memset(next_job_info, 0, sizeof(*next_job_info));
-
- // Frame decode is completed or error is encountered.
- *end_of_frame = (frame_row_mt_info->mi_rows_decode_started ==
- frame_row_mt_info->mi_rows_to_decode) ||
- (frame_row_mt_info->row_mt_exit == 1);
- if (*end_of_frame) {
- return 1;
- }
-
- // Decoding cannot start as bit-stream parsing is not complete.
- if (frame_row_mt_info->mi_rows_parse_done -
- frame_row_mt_info->mi_rows_decode_started ==
- 0)
- return 0;
-
- // Choose the tile to decode.
- for (tile_row_idx = tile_rows_start; tile_row_idx < tile_rows_end;
- ++tile_row_idx) {
- for (tile_col_idx = tile_cols_start; tile_col_idx < tile_cols_end;
- ++tile_col_idx) {
- if (tile_row_idx * cm->tile_cols + tile_col_idx < start_tile ||
- tile_row_idx * cm->tile_cols + tile_col_idx > end_tile)
- continue;
-
- tile_data = pbi->tile_data + tile_row_idx * cm->tile_cols + tile_col_idx;
- dec_row_mt_sync = &tile_data->dec_row_mt_sync;
-
- num_threads_working = dec_row_mt_sync->num_threads_working;
- num_mis_waiting_for_decode = (dec_row_mt_sync->mi_rows_parse_done -
- dec_row_mt_sync->mi_rows_decode_started) *
- dec_row_mt_sync->mi_cols;
- num_mis_to_decode =
- (dec_row_mt_sync->mi_rows - dec_row_mt_sync->mi_rows_decode_started) *
- dec_row_mt_sync->mi_cols;
-
- assert(num_mis_to_decode >= num_mis_waiting_for_decode);
-
- // Pick the tile which has minimum number of threads working on it.
- if (num_mis_waiting_for_decode > 0) {
- if (num_threads_working < min_threads_working) {
- min_threads_working = num_threads_working;
- max_mis_to_decode = 0;
- }
- if (num_threads_working == min_threads_working &&
- num_mis_to_decode > max_mis_to_decode) {
- max_mis_to_decode = num_mis_to_decode;
- tile_row = tile_row_idx;
- tile_col = tile_col_idx;
- }
- }
- }
- }
-
- tile_data = pbi->tile_data + tile_row * cm->tile_cols + tile_col;
- tile_info = tile_data->tile_info;
- dec_row_mt_sync = &tile_data->dec_row_mt_sync;
-
- next_job_info->tile_row = tile_row;
- next_job_info->tile_col = tile_col;
- next_job_info->mi_row =
- dec_row_mt_sync->mi_rows_decode_started + tile_info.mi_row_start;
-
- dec_row_mt_sync->num_threads_working++;
- dec_row_mt_sync->mi_rows_decode_started += sb_mi_size;
- frame_row_mt_info->mi_rows_decode_started += sb_mi_size;
-
- return 1;
-}
-
-static INLINE void signal_parse_sb_row_done(AV1Decoder *const pbi,
- TileDataDec *const tile_data,
- const int sb_mi_size) {
- AV1DecRowMTInfo *frame_row_mt_info = &pbi->frame_row_mt_info;
-#if CONFIG_MULTITHREAD
- pthread_mutex_lock(pbi->row_mt_mutex_);
-#endif
- tile_data->dec_row_mt_sync.mi_rows_parse_done += sb_mi_size;
- frame_row_mt_info->mi_rows_parse_done += sb_mi_size;
-#if CONFIG_MULTITHREAD
- pthread_cond_broadcast(pbi->row_mt_cond_);
- pthread_mutex_unlock(pbi->row_mt_mutex_);
-#endif
-}
-
-static int row_mt_worker_hook(void *arg1, void *arg2) {
- DecWorkerData *const thread_data = (DecWorkerData *)arg1;
- AV1Decoder *const pbi = (AV1Decoder *)arg2;
- AV1_COMMON *cm = &pbi->common;
- ThreadData *const td = thread_data->td;
- uint8_t allow_update_cdf;
- const int sb_mi_size = mi_size_wide[cm->seq_params.sb_size];
- AV1DecRowMTInfo *frame_row_mt_info = &pbi->frame_row_mt_info;
- td->xd.corrupted = 0;
-
- // The jmp_buf is valid only for the duration of the function that calls
- // setjmp(). Therefore, this function must reset the 'setjmp' field to 0
- // before it returns.
- if (setjmp(thread_data->error_info.jmp)) {
- thread_data->error_info.setjmp = 0;
- thread_data->td->xd.corrupted = 1;
-#if CONFIG_MULTITHREAD
- pthread_mutex_lock(pbi->row_mt_mutex_);
-#endif
- frame_row_mt_info->row_mt_exit = 1;
-#if CONFIG_MULTITHREAD
- pthread_cond_broadcast(pbi->row_mt_cond_);
- pthread_mutex_unlock(pbi->row_mt_mutex_);
-#endif
- return 0;
- }
- thread_data->error_info.setjmp = 1;
-
- const int num_planes = av1_num_planes(cm);
- allow_update_cdf = cm->large_scale_tile ? 0 : 1;
- allow_update_cdf = allow_update_cdf && !cm->disable_cdf_update;
-
- assert(cm->tile_cols > 0);
- while (1) {
- TileJobsDec *cur_job_info = get_dec_job_info(&pbi->tile_mt_info);
-
- if (cur_job_info != NULL && !td->xd.corrupted) {
- const TileBufferDec *const tile_buffer = cur_job_info->tile_buffer;
- TileDataDec *const tile_data = cur_job_info->tile_data;
- tile_worker_hook_init(pbi, thread_data, tile_buffer, tile_data,
- allow_update_cdf);
-
- set_decode_func_pointers(td, 0x1);
-
- // decode tile
- TileInfo tile_info = tile_data->tile_info;
- int tile_row = tile_info.tile_row;
-
- av1_zero_above_context(cm, &td->xd, tile_info.mi_col_start,
- tile_info.mi_col_end, tile_row);
- av1_reset_loop_filter_delta(&td->xd, num_planes);
- av1_reset_loop_restoration(&td->xd, num_planes);
-
- for (int mi_row = tile_info.mi_row_start; mi_row < tile_info.mi_row_end;
- mi_row += cm->seq_params.mib_size) {
- av1_zero_left_context(&td->xd);
-
- for (int mi_col = tile_info.mi_col_start; mi_col < tile_info.mi_col_end;
- mi_col += cm->seq_params.mib_size) {
- set_cb_buffer(pbi, &td->xd, pbi->cb_buffer_base, num_planes, mi_row,
- mi_col);
-
- // Bit-stream parsing of the superblock
- decode_partition(pbi, td, mi_row, mi_col, td->bit_reader,
- cm->seq_params.sb_size, 0x1);
- }
- signal_parse_sb_row_done(pbi, tile_data, sb_mi_size);
- }
-
- int corrupted =
- (check_trailing_bits_after_symbol_coder(td->bit_reader)) ? 1 : 0;
- aom_merge_corrupted_flag(&td->xd.corrupted, corrupted);
- } else {
- break;
- }
- }
-
- set_decode_func_pointers(td, 0x2);
-
- while (1) {
- AV1DecRowMTJobInfo next_job_info;
- int end_of_frame = 0;
-
-#if CONFIG_MULTITHREAD
- pthread_mutex_lock(pbi->row_mt_mutex_);
-#endif
- while (!get_next_job_info(pbi, &next_job_info, &end_of_frame)) {
-#if CONFIG_MULTITHREAD
- pthread_cond_wait(pbi->row_mt_cond_, pbi->row_mt_mutex_);
-#endif
- }
-#if CONFIG_MULTITHREAD
- pthread_mutex_unlock(pbi->row_mt_mutex_);
-#endif
-
- if (end_of_frame) break;
-
- int tile_row = next_job_info.tile_row;
- int tile_col = next_job_info.tile_col;
- int mi_row = next_job_info.mi_row;
-
- TileDataDec *tile_data =
- pbi->tile_data + tile_row * cm->tile_cols + tile_col;
- AV1DecRowMTSync *dec_row_mt_sync = &tile_data->dec_row_mt_sync;
- TileInfo tile_info = tile_data->tile_info;
-
- av1_tile_init(&td->xd.tile, cm, tile_row, tile_col);
- av1_init_macroblockd(cm, &td->xd, td->dqcoeff);
- td->xd.error_info = &thread_data->error_info;
-
- decode_tile_sb_row(pbi, td, tile_info, mi_row);
-
-#if CONFIG_MULTITHREAD
- pthread_mutex_lock(pbi->row_mt_mutex_);
-#endif
- dec_row_mt_sync->num_threads_working--;
-#if CONFIG_MULTITHREAD
- pthread_mutex_unlock(pbi->row_mt_mutex_);
-#endif
- }
- thread_data->error_info.setjmp = 0;
- return !td->xd.corrupted;
-}
-
-// sorts in descending order
-static int compare_tile_buffers(const void *a, const void *b) {
- const TileJobsDec *const buf1 = (const TileJobsDec *)a;
- const TileJobsDec *const buf2 = (const TileJobsDec *)b;
- return (((int)buf2->tile_buffer->size) - ((int)buf1->tile_buffer->size));
-}
-
-static void enqueue_tile_jobs(AV1Decoder *pbi, AV1_COMMON *cm,
- int tile_rows_start, int tile_rows_end,
- int tile_cols_start, int tile_cols_end,
- int startTile, int endTile) {
- AV1DecTileMT *tile_mt_info = &pbi->tile_mt_info;
- TileJobsDec *tile_job_queue = tile_mt_info->job_queue;
- tile_mt_info->jobs_enqueued = 0;
- tile_mt_info->jobs_dequeued = 0;
-
- for (int row = tile_rows_start; row < tile_rows_end; row++) {
- for (int col = tile_cols_start; col < tile_cols_end; col++) {
- if (row * cm->tile_cols + col < startTile ||
- row * cm->tile_cols + col > endTile)
- continue;
- tile_job_queue->tile_buffer = &pbi->tile_buffers[row][col];
- tile_job_queue->tile_data = pbi->tile_data + row * cm->tile_cols + col;
- tile_job_queue++;
- tile_mt_info->jobs_enqueued++;
- }
- }
-}
-
-static void alloc_dec_jobs(AV1DecTileMT *tile_mt_info, AV1_COMMON *cm,
- int tile_rows, int tile_cols) {
- tile_mt_info->alloc_tile_rows = tile_rows;
- tile_mt_info->alloc_tile_cols = tile_cols;
- int num_tiles = tile_rows * tile_cols;
-#if CONFIG_MULTITHREAD
- {
- CHECK_MEM_ERROR(cm, tile_mt_info->job_mutex,
- aom_malloc(sizeof(*tile_mt_info->job_mutex) * num_tiles));
-
- for (int i = 0; i < num_tiles; i++) {
- pthread_mutex_init(&tile_mt_info->job_mutex[i], NULL);
- }
- }
-#endif
- CHECK_MEM_ERROR(cm, tile_mt_info->job_queue,
- aom_malloc(sizeof(*tile_mt_info->job_queue) * num_tiles));
-}
-
-void av1_free_mc_tmp_buf(ThreadData *thread_data) {
- int ref;
- for (ref = 0; ref < 2; ref++) {
- if (thread_data->mc_buf_use_highbd)
- aom_free(CONVERT_TO_SHORTPTR(thread_data->mc_buf[ref]));
- else
- aom_free(thread_data->mc_buf[ref]);
- thread_data->mc_buf[ref] = NULL;
- }
- thread_data->mc_buf_size = 0;
- thread_data->mc_buf_use_highbd = 0;
-
- aom_free(thread_data->tmp_conv_dst);
- thread_data->tmp_conv_dst = NULL;
- for (int i = 0; i < 2; ++i) {
- aom_free(thread_data->tmp_obmc_bufs[i]);
- thread_data->tmp_obmc_bufs[i] = NULL;
- }
-}
-
-static void allocate_mc_tmp_buf(AV1_COMMON *const cm, ThreadData *thread_data,
- int buf_size, int use_highbd) {
- for (int ref = 0; ref < 2; ref++) {
- if (use_highbd) {
- uint16_t *hbd_mc_buf;
- CHECK_MEM_ERROR(cm, hbd_mc_buf, (uint16_t *)aom_memalign(16, buf_size));
- thread_data->mc_buf[ref] = CONVERT_TO_BYTEPTR(hbd_mc_buf);
- } else {
- CHECK_MEM_ERROR(cm, thread_data->mc_buf[ref],
- (uint8_t *)aom_memalign(16, buf_size));
- }
- }
- thread_data->mc_buf_size = buf_size;
- thread_data->mc_buf_use_highbd = use_highbd;
-
- CHECK_MEM_ERROR(cm, thread_data->tmp_conv_dst,
- aom_memalign(32, MAX_SB_SIZE * MAX_SB_SIZE *
- sizeof(*thread_data->tmp_conv_dst)));
- for (int i = 0; i < 2; ++i) {
- CHECK_MEM_ERROR(
- cm, thread_data->tmp_obmc_bufs[i],
- aom_memalign(16, 2 * MAX_MB_PLANE * MAX_SB_SQUARE *
- sizeof(*thread_data->tmp_obmc_bufs[i])));
- }
-}
-
-static void reset_dec_workers(AV1Decoder *pbi, AVxWorkerHook worker_hook,
- int num_workers) {
- const AVxWorkerInterface *const winterface = aom_get_worker_interface();
-
- // Reset tile decoding hook
- for (int worker_idx = 0; worker_idx < num_workers; ++worker_idx) {
- AVxWorker *const worker = &pbi->tile_workers[worker_idx];
- DecWorkerData *const thread_data = pbi->thread_data + worker_idx;
- thread_data->td->xd = pbi->mb;
- thread_data->td->xd.corrupted = 0;
- thread_data->td->xd.mc_buf[0] = thread_data->td->mc_buf[0];
- thread_data->td->xd.mc_buf[1] = thread_data->td->mc_buf[1];
- thread_data->td->xd.tmp_conv_dst = thread_data->td->tmp_conv_dst;
- for (int j = 0; j < 2; ++j) {
- thread_data->td->xd.tmp_obmc_bufs[j] = thread_data->td->tmp_obmc_bufs[j];
- }
- winterface->sync(worker);
-
- worker->hook = worker_hook;
- worker->data1 = thread_data;
- worker->data2 = pbi;
- }
-#if CONFIG_ACCOUNTING
- if (pbi->acct_enabled) {
- aom_accounting_reset(&pbi->accounting);
- }
-#endif
-}
-
-static void launch_dec_workers(AV1Decoder *pbi, const uint8_t *data_end,
- int num_workers) {
- const AVxWorkerInterface *const winterface = aom_get_worker_interface();
-
- for (int worker_idx = 0; worker_idx < num_workers; ++worker_idx) {
- AVxWorker *const worker = &pbi->tile_workers[worker_idx];
- DecWorkerData *const thread_data = (DecWorkerData *)worker->data1;
-
- thread_data->data_end = data_end;
-
- worker->had_error = 0;
- if (worker_idx == num_workers - 1) {
- winterface->execute(worker);
- } else {
- winterface->launch(worker);
- }
- }
-}
-
-static void sync_dec_workers(AV1Decoder *pbi, int num_workers) {
- const AVxWorkerInterface *const winterface = aom_get_worker_interface();
- int corrupted = 0;
-
- for (int worker_idx = num_workers; worker_idx > 0; --worker_idx) {
- AVxWorker *const worker = &pbi->tile_workers[worker_idx - 1];
- aom_merge_corrupted_flag(&corrupted, !winterface->sync(worker));
- }
-
- pbi->mb.corrupted = corrupted;
-}
-
-static void decode_mt_init(AV1Decoder *pbi) {
- AV1_COMMON *const cm = &pbi->common;
- const AVxWorkerInterface *const winterface = aom_get_worker_interface();
- int worker_idx;
-
- // Create workers and thread_data
- if (pbi->num_workers == 0) {
- const int num_threads = pbi->max_threads;
- CHECK_MEM_ERROR(cm, pbi->tile_workers,
- aom_malloc(num_threads * sizeof(*pbi->tile_workers)));
- CHECK_MEM_ERROR(cm, pbi->thread_data,
- aom_malloc(num_threads * sizeof(*pbi->thread_data)));
-
- for (worker_idx = 0; worker_idx < num_threads; ++worker_idx) {
- AVxWorker *const worker = &pbi->tile_workers[worker_idx];
- DecWorkerData *const thread_data = pbi->thread_data + worker_idx;
- ++pbi->num_workers;
-
- winterface->init(worker);
- if (worker_idx < num_threads - 1 && !winterface->reset(worker)) {
- aom_internal_error(&cm->error, AOM_CODEC_ERROR,
- "Tile decoder thread creation failed");
- }
-
- if (worker_idx < num_threads - 1) {
- // Allocate thread data.
- CHECK_MEM_ERROR(cm, thread_data->td,
- aom_memalign(32, sizeof(*thread_data->td)));
- av1_zero(*thread_data->td);
- } else {
- // Main thread acts as a worker and uses the thread data in pbi
- thread_data->td = &pbi->td;
- }
- thread_data->error_info.error_code = AOM_CODEC_OK;
- thread_data->error_info.setjmp = 0;
- }
- }
- const int use_highbd = cm->seq_params.use_highbitdepth ? 1 : 0;
- const int buf_size = MC_TEMP_BUF_PELS << use_highbd;
- for (worker_idx = 0; worker_idx < pbi->max_threads - 1; ++worker_idx) {
- DecWorkerData *const thread_data = pbi->thread_data + worker_idx;
- if (thread_data->td->mc_buf_size != buf_size) {
- av1_free_mc_tmp_buf(thread_data->td);
- allocate_mc_tmp_buf(cm, thread_data->td, buf_size, use_highbd);
- }
- }
-}
-
-static void tile_mt_queue(AV1Decoder *pbi, int tile_cols, int tile_rows,
- int tile_rows_start, int tile_rows_end,
- int tile_cols_start, int tile_cols_end,
- int start_tile, int end_tile) {
- AV1_COMMON *const cm = &pbi->common;
- if (pbi->tile_mt_info.alloc_tile_cols != tile_cols ||
- pbi->tile_mt_info.alloc_tile_rows != tile_rows) {
- av1_dealloc_dec_jobs(&pbi->tile_mt_info);
- alloc_dec_jobs(&pbi->tile_mt_info, cm, tile_rows, tile_cols);
- }
- enqueue_tile_jobs(pbi, cm, tile_rows_start, tile_rows_end, tile_cols_start,
- tile_cols_end, start_tile, end_tile);
- qsort(pbi->tile_mt_info.job_queue, pbi->tile_mt_info.jobs_enqueued,
- sizeof(pbi->tile_mt_info.job_queue[0]), compare_tile_buffers);
-}
-
-static const uint8_t *decode_tiles_mt(AV1Decoder *pbi, const uint8_t *data,
- const uint8_t *data_end, int start_tile,
- int end_tile) {
- AV1_COMMON *const cm = &pbi->common;
- const int tile_cols = cm->tile_cols;
- const int tile_rows = cm->tile_rows;
- const int n_tiles = tile_cols * tile_rows;
- TileBufferDec(*const tile_buffers)[MAX_TILE_COLS] = pbi->tile_buffers;
- const int dec_tile_row = AOMMIN(pbi->dec_tile_row, tile_rows);
- const int single_row = pbi->dec_tile_row >= 0;
- const int dec_tile_col = AOMMIN(pbi->dec_tile_col, tile_cols);
- const int single_col = pbi->dec_tile_col >= 0;
- int tile_rows_start;
- int tile_rows_end;
- int tile_cols_start;
- int tile_cols_end;
- int tile_count_tg;
- int num_workers;
- const uint8_t *raw_data_end = NULL;
-
- if (cm->large_scale_tile) {
- tile_rows_start = single_row ? dec_tile_row : 0;
- tile_rows_end = single_row ? dec_tile_row + 1 : tile_rows;
- tile_cols_start = single_col ? dec_tile_col : 0;
- tile_cols_end = single_col ? tile_cols_start + 1 : tile_cols;
- } else {
- tile_rows_start = 0;
- tile_rows_end = tile_rows;
- tile_cols_start = 0;
- tile_cols_end = tile_cols;
- }
- tile_count_tg = end_tile - start_tile + 1;
- num_workers = AOMMIN(pbi->max_threads, tile_count_tg);
-
- // No tiles to decode.
- if (tile_rows_end <= tile_rows_start || tile_cols_end <= tile_cols_start ||
- // First tile is larger than end_tile.
- tile_rows_start * tile_cols + tile_cols_start > end_tile ||
- // Last tile is smaller than start_tile.
- (tile_rows_end - 1) * tile_cols + tile_cols_end - 1 < start_tile)
- return data;
-
- assert(tile_rows <= MAX_TILE_ROWS);
- assert(tile_cols <= MAX_TILE_COLS);
- assert(tile_count_tg > 0);
- assert(num_workers > 0);
- assert(start_tile <= end_tile);
- assert(start_tile >= 0 && end_tile < n_tiles);
-
- decode_mt_init(pbi);
-
- // get tile size in tile group
-#if EXT_TILE_DEBUG
- if (cm->large_scale_tile) assert(pbi->ext_tile_debug == 1);
- if (cm->large_scale_tile)
- raw_data_end = get_ls_tile_buffers(pbi, data, data_end, tile_buffers);
- else
-#endif // EXT_TILE_DEBUG
- get_tile_buffers(pbi, data, data_end, tile_buffers, start_tile, end_tile);
-
- if (pbi->tile_data == NULL || n_tiles != pbi->allocated_tiles) {
- decoder_alloc_tile_data(pbi, n_tiles);
- }
-
- for (int row = 0; row < tile_rows; row++) {
- for (int col = 0; col < tile_cols; col++) {
- TileDataDec *tile_data = pbi->tile_data + row * cm->tile_cols + col;
- av1_tile_init(&tile_data->tile_info, cm, row, col);
- }
- }
-
- tile_mt_queue(pbi, tile_cols, tile_rows, tile_rows_start, tile_rows_end,
- tile_cols_start, tile_cols_end, start_tile, end_tile);
-
- reset_dec_workers(pbi, tile_worker_hook, num_workers);
- launch_dec_workers(pbi, data_end, num_workers);
- sync_dec_workers(pbi, num_workers);
-
- if (pbi->mb.corrupted)
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
- "Failed to decode tile data");
-
- if (cm->large_scale_tile) {
- if (n_tiles == 1) {
- // Find the end of the single tile buffer
- return aom_reader_find_end(&pbi->tile_data->bit_reader);
- }
- // Return the end of the last tile buffer
- return raw_data_end;
- }
- TileDataDec *const tile_data = pbi->tile_data + end_tile;
-
- return aom_reader_find_end(&tile_data->bit_reader);
-}
-
-static void dec_alloc_cb_buf(AV1Decoder *pbi) {
- AV1_COMMON *const cm = &pbi->common;
- int size = ((cm->mi_rows >> cm->seq_params.mib_size_log2) + 1) *
- ((cm->mi_cols >> cm->seq_params.mib_size_log2) + 1);
-
- if (pbi->cb_buffer_alloc_size < size) {
- av1_dec_free_cb_buf(pbi);
- CHECK_MEM_ERROR(cm, pbi->cb_buffer_base,
- aom_memalign(32, sizeof(*pbi->cb_buffer_base) * size));
- pbi->cb_buffer_alloc_size = size;
- }
-}
-
-static void row_mt_frame_init(AV1Decoder *pbi, int tile_rows_start,
- int tile_rows_end, int tile_cols_start,
- int tile_cols_end, int start_tile, int end_tile,
- int max_sb_rows) {
- AV1_COMMON *const cm = &pbi->common;
- AV1DecRowMTInfo *frame_row_mt_info = &pbi->frame_row_mt_info;
-
- frame_row_mt_info->tile_rows_start = tile_rows_start;
- frame_row_mt_info->tile_rows_end = tile_rows_end;
- frame_row_mt_info->tile_cols_start = tile_cols_start;
- frame_row_mt_info->tile_cols_end = tile_cols_end;
- frame_row_mt_info->start_tile = start_tile;
- frame_row_mt_info->end_tile = end_tile;
- frame_row_mt_info->mi_rows_to_decode = 0;
- frame_row_mt_info->mi_rows_parse_done = 0;
- frame_row_mt_info->mi_rows_decode_started = 0;
- frame_row_mt_info->row_mt_exit = 0;
-
- for (int tile_row = tile_rows_start; tile_row < tile_rows_end; ++tile_row) {
- for (int tile_col = tile_cols_start; tile_col < tile_cols_end; ++tile_col) {
- if (tile_row * cm->tile_cols + tile_col < start_tile ||
- tile_row * cm->tile_cols + tile_col > end_tile)
- continue;
-
- TileDataDec *const tile_data =
- pbi->tile_data + tile_row * cm->tile_cols + tile_col;
- TileInfo tile_info = tile_data->tile_info;
-
- tile_data->dec_row_mt_sync.mi_rows_parse_done = 0;
- tile_data->dec_row_mt_sync.mi_rows_decode_started = 0;
- tile_data->dec_row_mt_sync.num_threads_working = 0;
- tile_data->dec_row_mt_sync.mi_rows =
- ALIGN_POWER_OF_TWO(tile_info.mi_row_end - tile_info.mi_row_start,
- cm->seq_params.mib_size_log2);
- tile_data->dec_row_mt_sync.mi_cols =
- ALIGN_POWER_OF_TWO(tile_info.mi_col_end - tile_info.mi_col_start,
- cm->seq_params.mib_size_log2);
-
- frame_row_mt_info->mi_rows_to_decode +=
- tile_data->dec_row_mt_sync.mi_rows;
-
- // Initialize cur_sb_col to -1 for all SB rows.
- memset(tile_data->dec_row_mt_sync.cur_sb_col, -1,
- sizeof(*tile_data->dec_row_mt_sync.cur_sb_col) * max_sb_rows);
- }
- }
-
-#if CONFIG_MULTITHREAD
- if (pbi->row_mt_mutex_ == NULL) {
- CHECK_MEM_ERROR(cm, pbi->row_mt_mutex_,
- aom_malloc(sizeof(*(pbi->row_mt_mutex_))));
- if (pbi->row_mt_mutex_) {
- pthread_mutex_init(pbi->row_mt_mutex_, NULL);
- }
- }
-
- if (pbi->row_mt_cond_ == NULL) {
- CHECK_MEM_ERROR(cm, pbi->row_mt_cond_,
- aom_malloc(sizeof(*(pbi->row_mt_cond_))));
- if (pbi->row_mt_cond_) {
- pthread_cond_init(pbi->row_mt_cond_, NULL);
- }
- }
-#endif
-}
-
-static const uint8_t *decode_tiles_row_mt(AV1Decoder *pbi, const uint8_t *data,
- const uint8_t *data_end,
- int start_tile, int end_tile) {
- AV1_COMMON *const cm = &pbi->common;
- const int tile_cols = cm->tile_cols;
- const int tile_rows = cm->tile_rows;
- const int n_tiles = tile_cols * tile_rows;
- TileBufferDec(*const tile_buffers)[MAX_TILE_COLS] = pbi->tile_buffers;
- const int dec_tile_row = AOMMIN(pbi->dec_tile_row, tile_rows);
- const int single_row = pbi->dec_tile_row >= 0;
- const int dec_tile_col = AOMMIN(pbi->dec_tile_col, tile_cols);
- const int single_col = pbi->dec_tile_col >= 0;
- int tile_rows_start;
- int tile_rows_end;
- int tile_cols_start;
- int tile_cols_end;
- int tile_count_tg;
- int num_workers;
- const uint8_t *raw_data_end = NULL;
- int max_sb_rows = 0;
-
- if (cm->large_scale_tile) {
- tile_rows_start = single_row ? dec_tile_row : 0;
- tile_rows_end = single_row ? dec_tile_row + 1 : tile_rows;
- tile_cols_start = single_col ? dec_tile_col : 0;
- tile_cols_end = single_col ? tile_cols_start + 1 : tile_cols;
- } else {
- tile_rows_start = 0;
- tile_rows_end = tile_rows;
- tile_cols_start = 0;
- tile_cols_end = tile_cols;
- }
- tile_count_tg = end_tile - start_tile + 1;
- num_workers = pbi->max_threads;
-
- // No tiles to decode.
- if (tile_rows_end <= tile_rows_start || tile_cols_end <= tile_cols_start ||
- // First tile is larger than end_tile.
- tile_rows_start * tile_cols + tile_cols_start > end_tile ||
- // Last tile is smaller than start_tile.
- (tile_rows_end - 1) * tile_cols + tile_cols_end - 1 < start_tile)
- return data;
-
- assert(tile_rows <= MAX_TILE_ROWS);
- assert(tile_cols <= MAX_TILE_COLS);
- assert(tile_count_tg > 0);
- assert(num_workers > 0);
- assert(start_tile <= end_tile);
- assert(start_tile >= 0 && end_tile < n_tiles);
-
- (void)tile_count_tg;
-
- decode_mt_init(pbi);
-
- // get tile size in tile group
-#if EXT_TILE_DEBUG
- if (cm->large_scale_tile) assert(pbi->ext_tile_debug == 1);
- if (cm->large_scale_tile)
- raw_data_end = get_ls_tile_buffers(pbi, data, data_end, tile_buffers);
- else
-#endif // EXT_TILE_DEBUG
- get_tile_buffers(pbi, data, data_end, tile_buffers, start_tile, end_tile);
-
- if (pbi->tile_data == NULL || n_tiles != pbi->allocated_tiles) {
- for (int i = 0; i < pbi->allocated_tiles; i++) {
- TileDataDec *const tile_data = pbi->tile_data + i;
- av1_dec_row_mt_dealloc(&tile_data->dec_row_mt_sync);
- }
- decoder_alloc_tile_data(pbi, n_tiles);
- }
-
- for (int row = 0; row < tile_rows; row++) {
- for (int col = 0; col < tile_cols; col++) {
- TileDataDec *tile_data = pbi->tile_data + row * cm->tile_cols + col;
- av1_tile_init(&tile_data->tile_info, cm, row, col);
-
- max_sb_rows = AOMMAX(max_sb_rows,
- av1_get_sb_rows_in_tile(cm, tile_data->tile_info));
- }
- }
-
- if (pbi->allocated_row_mt_sync_rows != max_sb_rows) {
- for (int i = 0; i < n_tiles; ++i) {
- TileDataDec *const tile_data = pbi->tile_data + i;
- av1_dec_row_mt_dealloc(&tile_data->dec_row_mt_sync);
- dec_row_mt_alloc(&tile_data->dec_row_mt_sync, cm, max_sb_rows);
- }
- pbi->allocated_row_mt_sync_rows = max_sb_rows;
- }
-
- tile_mt_queue(pbi, tile_cols, tile_rows, tile_rows_start, tile_rows_end,
- tile_cols_start, tile_cols_end, start_tile, end_tile);
-
- dec_alloc_cb_buf(pbi);
-
- row_mt_frame_init(pbi, tile_rows_start, tile_rows_end, tile_cols_start,
- tile_cols_end, start_tile, end_tile, max_sb_rows);
-
- reset_dec_workers(pbi, row_mt_worker_hook, num_workers);
- launch_dec_workers(pbi, data_end, num_workers);
- sync_dec_workers(pbi, num_workers);
-
- if (pbi->mb.corrupted)
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
- "Failed to decode tile data");
-
- if (cm->large_scale_tile) {
- if (n_tiles == 1) {
- // Find the end of the single tile buffer
- return aom_reader_find_end(&pbi->tile_data->bit_reader);
- }
- // Return the end of the last tile buffer
- return raw_data_end;
- }
- TileDataDec *const tile_data = pbi->tile_data + end_tile;
-
- return aom_reader_find_end(&tile_data->bit_reader);
-}
-
-static void error_handler(void *data) {
- AV1_COMMON *const cm = (AV1_COMMON *)data;
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME, "Truncated packet");
-}
-
-// Reads the high_bitdepth and twelve_bit fields in color_config() and sets
-// seq_params->bit_depth based on the values of those fields and
-// seq_params->profile. Reports errors by calling rb->error_handler() or
-// aom_internal_error().
-static void read_bitdepth(struct aom_read_bit_buffer *rb,
- SequenceHeader *seq_params,
- struct aom_internal_error_info *error_info) {
- const int high_bitdepth = aom_rb_read_bit(rb);
- if (seq_params->profile == PROFILE_2 && high_bitdepth) {
- const int twelve_bit = aom_rb_read_bit(rb);
- seq_params->bit_depth = twelve_bit ? AOM_BITS_12 : AOM_BITS_10;
- } else if (seq_params->profile <= PROFILE_2) {
- seq_params->bit_depth = high_bitdepth ? AOM_BITS_10 : AOM_BITS_8;
- } else {
- aom_internal_error(error_info, AOM_CODEC_UNSUP_BITSTREAM,
- "Unsupported profile/bit-depth combination");
- }
-}
-
-void av1_read_film_grain_params(AV1_COMMON *cm,
- struct aom_read_bit_buffer *rb) {
- aom_film_grain_t *pars = &cm->film_grain_params;
- const SequenceHeader *const seq_params = &cm->seq_params;
-
- pars->apply_grain = aom_rb_read_bit(rb);
- if (!pars->apply_grain) {
- memset(pars, 0, sizeof(*pars));
- return;
- }
-
- pars->random_seed = aom_rb_read_literal(rb, 16);
- if (cm->frame_type == INTER_FRAME)
- pars->update_parameters = aom_rb_read_bit(rb);
- else
- pars->update_parameters = 1;
-
- pars->bit_depth = seq_params->bit_depth;
-
- if (!pars->update_parameters) {
- // inherit parameters from a previous reference frame
- RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
- int film_grain_params_ref_idx = aom_rb_read_literal(rb, 3);
- int buf_idx = cm->ref_frame_map[film_grain_params_ref_idx];
- if (buf_idx == INVALID_IDX) {
- aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
- "Invalid Film grain reference idx");
- }
- if (!frame_bufs[buf_idx].film_grain_params_present) {
- aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
- "Film grain reference parameters not available");
- }
- uint16_t random_seed = pars->random_seed;
- *pars = frame_bufs[buf_idx].film_grain_params; // inherit paramaters
- pars->random_seed = random_seed; // with new random seed
- return;
- }
-
- // Scaling functions parameters
- pars->num_y_points = aom_rb_read_literal(rb, 4); // max 14
- if (pars->num_y_points > 14)
- aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
- "Number of points for film grain luma scaling function "
- "exceeds the maximum value.");
- for (int i = 0; i < pars->num_y_points; i++) {
- pars->scaling_points_y[i][0] = aom_rb_read_literal(rb, 8);
- if (i && pars->scaling_points_y[i - 1][0] >= pars->scaling_points_y[i][0])
- aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
- "First coordinate of the scaling function points "
- "shall be increasing.");
- pars->scaling_points_y[i][1] = aom_rb_read_literal(rb, 8);
- }
-
- if (!seq_params->monochrome)
- pars->chroma_scaling_from_luma = aom_rb_read_bit(rb);
- else
- pars->chroma_scaling_from_luma = 0;
-
- if (seq_params->monochrome || pars->chroma_scaling_from_luma ||
- ((seq_params->subsampling_x == 1) && (seq_params->subsampling_y == 1) &&
- (pars->num_y_points == 0))) {
- pars->num_cb_points = 0;
- pars->num_cr_points = 0;
- } else {
- pars->num_cb_points = aom_rb_read_literal(rb, 4); // max 10
- if (pars->num_cb_points > 10)
- aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
- "Number of points for film grain cb scaling function "
- "exceeds the maximum value.");
- for (int i = 0; i < pars->num_cb_points; i++) {
- pars->scaling_points_cb[i][0] = aom_rb_read_literal(rb, 8);
- if (i &&
- pars->scaling_points_cb[i - 1][0] >= pars->scaling_points_cb[i][0])
- aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
- "First coordinate of the scaling function points "
- "shall be increasing.");
- pars->scaling_points_cb[i][1] = aom_rb_read_literal(rb, 8);
- }
-
- pars->num_cr_points = aom_rb_read_literal(rb, 4); // max 10
- if (pars->num_cr_points > 10)
- aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
- "Number of points for film grain cr scaling function "
- "exceeds the maximum value.");
- for (int i = 0; i < pars->num_cr_points; i++) {
- pars->scaling_points_cr[i][0] = aom_rb_read_literal(rb, 8);
- if (i &&
- pars->scaling_points_cr[i - 1][0] >= pars->scaling_points_cr[i][0])
- aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
- "First coordinate of the scaling function points "
- "shall be increasing.");
- pars->scaling_points_cr[i][1] = aom_rb_read_literal(rb, 8);
- }
-
- if ((seq_params->subsampling_x == 1) && (seq_params->subsampling_y == 1) &&
- (((pars->num_cb_points == 0) && (pars->num_cr_points != 0)) ||
- ((pars->num_cb_points != 0) && (pars->num_cr_points == 0))))
- aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
- "In YCbCr 4:2:0, film grain shall be applied "
- "to both chroma components or neither.");
- }
-
- pars->scaling_shift = aom_rb_read_literal(rb, 2) + 8; // 8 + value
-
- // AR coefficients
- // Only sent if the corresponsing scaling function has
- // more than 0 points
-
- pars->ar_coeff_lag = aom_rb_read_literal(rb, 2);
-
- int num_pos_luma = 2 * pars->ar_coeff_lag * (pars->ar_coeff_lag + 1);
- int num_pos_chroma = num_pos_luma;
- if (pars->num_y_points > 0) ++num_pos_chroma;
-
- if (pars->num_y_points)
- for (int i = 0; i < num_pos_luma; i++)
- pars->ar_coeffs_y[i] = aom_rb_read_literal(rb, 8) - 128;
-
- if (pars->num_cb_points || pars->chroma_scaling_from_luma)
- for (int i = 0; i < num_pos_chroma; i++)
- pars->ar_coeffs_cb[i] = aom_rb_read_literal(rb, 8) - 128;
-
- if (pars->num_cr_points || pars->chroma_scaling_from_luma)
- for (int i = 0; i < num_pos_chroma; i++)
- pars->ar_coeffs_cr[i] = aom_rb_read_literal(rb, 8) - 128;
-
- pars->ar_coeff_shift = aom_rb_read_literal(rb, 2) + 6; // 6 + value
-
- pars->grain_scale_shift = aom_rb_read_literal(rb, 2);
-
- if (pars->num_cb_points) {
- pars->cb_mult = aom_rb_read_literal(rb, 8);
- pars->cb_luma_mult = aom_rb_read_literal(rb, 8);
- pars->cb_offset = aom_rb_read_literal(rb, 9);
- }
-
- if (pars->num_cr_points) {
- pars->cr_mult = aom_rb_read_literal(rb, 8);
- pars->cr_luma_mult = aom_rb_read_literal(rb, 8);
- pars->cr_offset = aom_rb_read_literal(rb, 9);
- }
-
- pars->overlap_flag = aom_rb_read_bit(rb);
-
- pars->clip_to_restricted_range = aom_rb_read_bit(rb);
-}
-
-static void read_film_grain(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) {
- if (cm->seq_params.film_grain_params_present &&
- (cm->show_frame || cm->showable_frame)) {
- av1_read_film_grain_params(cm, rb);
- } else {
- memset(&cm->film_grain_params, 0, sizeof(cm->film_grain_params));
- }
- cm->film_grain_params.bit_depth = cm->seq_params.bit_depth;
- memcpy(&cm->cur_frame->film_grain_params, &cm->film_grain_params,
- sizeof(aom_film_grain_t));
-}
-
-void av1_read_color_config(struct aom_read_bit_buffer *rb,
- int allow_lowbitdepth, SequenceHeader *seq_params,
- struct aom_internal_error_info *error_info) {
- read_bitdepth(rb, seq_params, error_info);
-
- seq_params->use_highbitdepth =
- seq_params->bit_depth > AOM_BITS_8 || !allow_lowbitdepth;
- // monochrome bit (not needed for PROFILE_1)
- const int is_monochrome =
- seq_params->profile != PROFILE_1 ? aom_rb_read_bit(rb) : 0;
- seq_params->monochrome = is_monochrome;
- int color_description_present_flag = aom_rb_read_bit(rb);
- if (color_description_present_flag) {
- seq_params->color_primaries = aom_rb_read_literal(rb, 8);
- seq_params->transfer_characteristics = aom_rb_read_literal(rb, 8);
- seq_params->matrix_coefficients = aom_rb_read_literal(rb, 8);
- } else {
- seq_params->color_primaries = AOM_CICP_CP_UNSPECIFIED;
- seq_params->transfer_characteristics = AOM_CICP_TC_UNSPECIFIED;
- seq_params->matrix_coefficients = AOM_CICP_MC_UNSPECIFIED;
- }
- if (is_monochrome) {
- // [16,235] (including xvycc) vs [0,255] range
- seq_params->color_range = aom_rb_read_bit(rb);
- seq_params->subsampling_y = seq_params->subsampling_x = 1;
- seq_params->chroma_sample_position = AOM_CSP_UNKNOWN;
- seq_params->separate_uv_delta_q = 0;
- return;
- }
- if (seq_params->color_primaries == AOM_CICP_CP_BT_709 &&
- seq_params->transfer_characteristics == AOM_CICP_TC_SRGB &&
- seq_params->matrix_coefficients == AOM_CICP_MC_IDENTITY) {
- // It would be good to remove this dependency.
- seq_params->subsampling_y = seq_params->subsampling_x = 0;
- seq_params->color_range = 1; // assume full color-range
- if (!(seq_params->profile == PROFILE_1 ||
- (seq_params->profile == PROFILE_2 &&
- seq_params->bit_depth == AOM_BITS_12))) {
- aom_internal_error(
- error_info, AOM_CODEC_UNSUP_BITSTREAM,
- "sRGB colorspace not compatible with specified profile");
- }
- } else {
- // [16,235] (including xvycc) vs [0,255] range
- seq_params->color_range = aom_rb_read_bit(rb);
- if (seq_params->profile == PROFILE_0) {
- // 420 only
- seq_params->subsampling_x = seq_params->subsampling_y = 1;
- } else if (seq_params->profile == PROFILE_1) {
- // 444 only
- seq_params->subsampling_x = seq_params->subsampling_y = 0;
- } else {
- assert(seq_params->profile == PROFILE_2);
- if (seq_params->bit_depth == AOM_BITS_12) {
- seq_params->subsampling_x = aom_rb_read_bit(rb);
- if (seq_params->subsampling_x)
- seq_params->subsampling_y = aom_rb_read_bit(rb); // 422 or 420
- else
- seq_params->subsampling_y = 0; // 444
- } else {
- // 422
- seq_params->subsampling_x = 1;
- seq_params->subsampling_y = 0;
- }
- }
- if (seq_params->matrix_coefficients == AOM_CICP_MC_IDENTITY &&
- (seq_params->subsampling_x || seq_params->subsampling_y)) {
- aom_internal_error(
- error_info, AOM_CODEC_UNSUP_BITSTREAM,
- "Identity CICP Matrix incompatible with non 4:4:4 color sampling");
- }
- if (seq_params->subsampling_x && seq_params->subsampling_y) {
- seq_params->chroma_sample_position = aom_rb_read_literal(rb, 2);
- }
- }
- seq_params->separate_uv_delta_q = aom_rb_read_bit(rb);
-}
-
-void av1_read_timing_info_header(AV1_COMMON *cm,
- struct aom_read_bit_buffer *rb) {
- cm->timing_info.num_units_in_display_tick = aom_rb_read_unsigned_literal(
- rb, 32); // Number of units in a display tick
- cm->timing_info.time_scale =
- aom_rb_read_unsigned_literal(rb, 32); // Time scale
- if (cm->timing_info.num_units_in_display_tick == 0 ||
- cm->timing_info.time_scale == 0) {
- aom_internal_error(
- &cm->error, AOM_CODEC_UNSUP_BITSTREAM,
- "num_units_in_display_tick and time_scale must be greater than 0.");
- }
- cm->timing_info.equal_picture_interval =
- aom_rb_read_bit(rb); // Equal picture interval bit
- if (cm->timing_info.equal_picture_interval) {
- cm->timing_info.num_ticks_per_picture =
- aom_rb_read_uvlc(rb) + 1; // ticks per picture
- if (cm->timing_info.num_ticks_per_picture == 0) {
- aom_internal_error(
- &cm->error, AOM_CODEC_UNSUP_BITSTREAM,
- "num_ticks_per_picture_minus_1 cannot be (1 << 32) − 1.");
- }
- }
-}
-
-void av1_read_decoder_model_info(AV1_COMMON *cm,
- struct aom_read_bit_buffer *rb) {
- cm->buffer_model.encoder_decoder_buffer_delay_length =
- aom_rb_read_literal(rb, 5) + 1;
- cm->buffer_model.num_units_in_decoding_tick = aom_rb_read_unsigned_literal(
- rb, 32); // Number of units in a decoding tick
- cm->buffer_model.buffer_removal_time_length = aom_rb_read_literal(rb, 5) + 1;
- cm->buffer_model.frame_presentation_time_length =
- aom_rb_read_literal(rb, 5) + 1;
-}
-
-void av1_read_op_parameters_info(AV1_COMMON *const cm,
- struct aom_read_bit_buffer *rb, int op_num) {
- // The cm->op_params array has MAX_NUM_OPERATING_POINTS + 1 elements.
- if (op_num > MAX_NUM_OPERATING_POINTS) {
- aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
- "AV1 does not support %d decoder model operating points",
- op_num + 1);
- }
-
- cm->op_params[op_num].decoder_buffer_delay = aom_rb_read_unsigned_literal(
- rb, cm->buffer_model.encoder_decoder_buffer_delay_length);
-
- cm->op_params[op_num].encoder_buffer_delay = aom_rb_read_unsigned_literal(
- rb, cm->buffer_model.encoder_decoder_buffer_delay_length);
-
- cm->op_params[op_num].low_delay_mode_flag = aom_rb_read_bit(rb);
-}
-
-static void av1_read_temporal_point_info(AV1_COMMON *const cm,
- struct aom_read_bit_buffer *rb) {
- cm->frame_presentation_time = aom_rb_read_unsigned_literal(
- rb, cm->buffer_model.frame_presentation_time_length);
-}
-
-void av1_read_sequence_header(AV1_COMMON *cm, struct aom_read_bit_buffer *rb,
- SequenceHeader *seq_params) {
- const int num_bits_width = aom_rb_read_literal(rb, 4) + 1;
- const int num_bits_height = aom_rb_read_literal(rb, 4) + 1;
- const int max_frame_width = aom_rb_read_literal(rb, num_bits_width) + 1;
- const int max_frame_height = aom_rb_read_literal(rb, num_bits_height) + 1;
-
- seq_params->num_bits_width = num_bits_width;
- seq_params->num_bits_height = num_bits_height;
- seq_params->max_frame_width = max_frame_width;
- seq_params->max_frame_height = max_frame_height;
-
- if (seq_params->reduced_still_picture_hdr) {
- seq_params->frame_id_numbers_present_flag = 0;
- } else {
- seq_params->frame_id_numbers_present_flag = aom_rb_read_bit(rb);
- }
- if (seq_params->frame_id_numbers_present_flag) {
- // We must always have delta_frame_id_length < frame_id_length,
- // in order for a frame to be referenced with a unique delta.
- // Avoid wasting bits by using a coding that enforces this restriction.
- seq_params->delta_frame_id_length = aom_rb_read_literal(rb, 4) + 2;
- seq_params->frame_id_length =
- aom_rb_read_literal(rb, 3) + seq_params->delta_frame_id_length + 1;
- if (seq_params->frame_id_length > 16)
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
- "Invalid frame_id_length");
- }
-
- setup_sb_size(seq_params, rb);
-
- seq_params->enable_filter_intra = aom_rb_read_bit(rb);
- seq_params->enable_intra_edge_filter = aom_rb_read_bit(rb);
-
- if (seq_params->reduced_still_picture_hdr) {
- seq_params->enable_interintra_compound = 0;
- seq_params->enable_masked_compound = 0;
- seq_params->enable_warped_motion = 0;
- seq_params->enable_dual_filter = 0;
- seq_params->enable_order_hint = 0;
- seq_params->enable_jnt_comp = 0;
- seq_params->enable_ref_frame_mvs = 0;
- seq_params->force_screen_content_tools = 2; // SELECT_SCREEN_CONTENT_TOOLS
- seq_params->force_integer_mv = 2; // SELECT_INTEGER_MV
- seq_params->order_hint_bits_minus_1 = -1;
- } else {
- seq_params->enable_interintra_compound = aom_rb_read_bit(rb);
- seq_params->enable_masked_compound = aom_rb_read_bit(rb);
- seq_params->enable_warped_motion = aom_rb_read_bit(rb);
- seq_params->enable_dual_filter = aom_rb_read_bit(rb);
-
- seq_params->enable_order_hint = aom_rb_read_bit(rb);
- seq_params->enable_jnt_comp =
- seq_params->enable_order_hint ? aom_rb_read_bit(rb) : 0;
- seq_params->enable_ref_frame_mvs =
- seq_params->enable_order_hint ? aom_rb_read_bit(rb) : 0;
-
- if (aom_rb_read_bit(rb)) {
- seq_params->force_screen_content_tools =
- 2; // SELECT_SCREEN_CONTENT_TOOLS
- } else {
- seq_params->force_screen_content_tools = aom_rb_read_bit(rb);
- }
-
- if (seq_params->force_screen_content_tools > 0) {
- if (aom_rb_read_bit(rb)) {
- seq_params->force_integer_mv = 2; // SELECT_INTEGER_MV
- } else {
- seq_params->force_integer_mv = aom_rb_read_bit(rb);
- }
- } else {
- seq_params->force_integer_mv = 2; // SELECT_INTEGER_MV
- }
- seq_params->order_hint_bits_minus_1 =
- seq_params->enable_order_hint ? aom_rb_read_literal(rb, 3) : -1;
- }
-
- seq_params->enable_superres = aom_rb_read_bit(rb);
- seq_params->enable_cdef = aom_rb_read_bit(rb);
- seq_params->enable_restoration = aom_rb_read_bit(rb);
-}
-
-static int read_global_motion_params(WarpedMotionParams *params,
- const WarpedMotionParams *ref_params,
- struct aom_read_bit_buffer *rb,
- int allow_hp) {
- TransformationType type = aom_rb_read_bit(rb);
- if (type != IDENTITY) {
- if (aom_rb_read_bit(rb))
- type = ROTZOOM;
- else
- type = aom_rb_read_bit(rb) ? TRANSLATION : AFFINE;
- }
-
- *params = default_warp_params;
- params->wmtype = type;
-
- if (type >= ROTZOOM) {
- params->wmmat[2] = aom_rb_read_signed_primitive_refsubexpfin(
- rb, GM_ALPHA_MAX + 1, SUBEXPFIN_K,
- (ref_params->wmmat[2] >> GM_ALPHA_PREC_DIFF) -
- (1 << GM_ALPHA_PREC_BITS)) *
- GM_ALPHA_DECODE_FACTOR +
- (1 << WARPEDMODEL_PREC_BITS);
- params->wmmat[3] = aom_rb_read_signed_primitive_refsubexpfin(
- rb, GM_ALPHA_MAX + 1, SUBEXPFIN_K,
- (ref_params->wmmat[3] >> GM_ALPHA_PREC_DIFF)) *
- GM_ALPHA_DECODE_FACTOR;
- }
-
- if (type >= AFFINE) {
- params->wmmat[4] = aom_rb_read_signed_primitive_refsubexpfin(
- rb, GM_ALPHA_MAX + 1, SUBEXPFIN_K,
- (ref_params->wmmat[4] >> GM_ALPHA_PREC_DIFF)) *
- GM_ALPHA_DECODE_FACTOR;
- params->wmmat[5] = aom_rb_read_signed_primitive_refsubexpfin(
- rb, GM_ALPHA_MAX + 1, SUBEXPFIN_K,
- (ref_params->wmmat[5] >> GM_ALPHA_PREC_DIFF) -
- (1 << GM_ALPHA_PREC_BITS)) *
- GM_ALPHA_DECODE_FACTOR +
- (1 << WARPEDMODEL_PREC_BITS);
- } else {
- params->wmmat[4] = -params->wmmat[3];
- params->wmmat[5] = params->wmmat[2];
- }
-
- if (type >= TRANSLATION) {
- const int trans_bits = (type == TRANSLATION)
- ? GM_ABS_TRANS_ONLY_BITS - !allow_hp
- : GM_ABS_TRANS_BITS;
- const int trans_dec_factor =
- (type == TRANSLATION) ? GM_TRANS_ONLY_DECODE_FACTOR * (1 << !allow_hp)
- : GM_TRANS_DECODE_FACTOR;
- const int trans_prec_diff = (type == TRANSLATION)
- ? GM_TRANS_ONLY_PREC_DIFF + !allow_hp
- : GM_TRANS_PREC_DIFF;
- params->wmmat[0] = aom_rb_read_signed_primitive_refsubexpfin(
- rb, (1 << trans_bits) + 1, SUBEXPFIN_K,
- (ref_params->wmmat[0] >> trans_prec_diff)) *
- trans_dec_factor;
- params->wmmat[1] = aom_rb_read_signed_primitive_refsubexpfin(
- rb, (1 << trans_bits) + 1, SUBEXPFIN_K,
- (ref_params->wmmat[1] >> trans_prec_diff)) *
- trans_dec_factor;
- }
-
- if (params->wmtype <= AFFINE) {
- int good_shear_params = get_shear_params(params);
- if (!good_shear_params) return 0;
- }
-
- return 1;
-}
-
-static void read_global_motion(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) {
- for (int frame = LAST_FRAME; frame <= ALTREF_FRAME; ++frame) {
- const WarpedMotionParams *ref_params =
- cm->prev_frame ? &cm->prev_frame->global_motion[frame]
- : &default_warp_params;
- int good_params = read_global_motion_params(
- &cm->global_motion[frame], ref_params, rb, cm->allow_high_precision_mv);
- if (!good_params) {
-#if WARPED_MOTION_DEBUG
- printf("Warning: unexpected global motion shear params from aomenc\n");
-#endif
- cm->global_motion[frame].invalid = 1;
- }
-
- // TODO(sarahparker, debargha): The logic in the commented out code below
- // does not work currently and causes mismatches when resize is on. Fix it
- // before turning the optimization back on.
- /*
- YV12_BUFFER_CONFIG *ref_buf = get_ref_frame(cm, frame);
- if (cm->width == ref_buf->y_crop_width &&
- cm->height == ref_buf->y_crop_height) {
- read_global_motion_params(&cm->global_motion[frame],
- &cm->prev_frame->global_motion[frame], rb,
- cm->allow_high_precision_mv);
- } else {
- cm->global_motion[frame] = default_warp_params;
- }
- */
- /*
- printf("Dec Ref %d [%d/%d]: %d %d %d %d\n",
- frame, cm->current_video_frame, cm->show_frame,
- cm->global_motion[frame].wmmat[0],
- cm->global_motion[frame].wmmat[1],
- cm->global_motion[frame].wmmat[2],
- cm->global_motion[frame].wmmat[3]);
- */
- }
- memcpy(cm->cur_frame->global_motion, cm->global_motion,
- REF_FRAMES * sizeof(WarpedMotionParams));
-}
-
-static void show_existing_frame_reset(AV1Decoder *const pbi,
- int existing_frame_idx) {
- AV1_COMMON *const cm = &pbi->common;
- BufferPool *const pool = cm->buffer_pool;
- RefCntBuffer *const frame_bufs = pool->frame_bufs;
-
- assert(cm->show_existing_frame);
-
- cm->frame_type = KEY_FRAME;
-
- pbi->refresh_frame_flags = (1 << REF_FRAMES) - 1;
-
- for (int i = 0; i < INTER_REFS_PER_FRAME; ++i) {
- cm->frame_refs[i].idx = INVALID_IDX;
- cm->frame_refs[i].buf = NULL;
- }
-
- if (pbi->need_resync) {
- memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map));
- pbi->need_resync = 0;
- }
-
- cm->cur_frame->intra_only = 1;
-
- if (cm->seq_params.frame_id_numbers_present_flag) {
- /* If bitmask is set, update reference frame id values and
- mark frames as valid for reference.
- Note that the displayed frame be valid for referencing
- in order to have been selected.
- */
- int refresh_frame_flags = pbi->refresh_frame_flags;
- int display_frame_id = cm->ref_frame_id[existing_frame_idx];
- for (int i = 0; i < REF_FRAMES; i++) {
- if ((refresh_frame_flags >> i) & 1) {
- cm->ref_frame_id[i] = display_frame_id;
- cm->valid_for_referencing[i] = 1;
- }
- }
- }
-
- cm->refresh_frame_context = REFRESH_FRAME_CONTEXT_DISABLED;
-
- // Generate next_ref_frame_map.
- lock_buffer_pool(pool);
- int ref_index = 0;
- for (int mask = pbi->refresh_frame_flags; mask; mask >>= 1) {
- if (mask & 1) {
- cm->next_ref_frame_map[ref_index] = cm->new_fb_idx;
- ++frame_bufs[cm->new_fb_idx].ref_count;
- } else {
- cm->next_ref_frame_map[ref_index] = cm->ref_frame_map[ref_index];
- }
- // Current thread holds the reference frame.
- if (cm->ref_frame_map[ref_index] >= 0)
- ++frame_bufs[cm->ref_frame_map[ref_index]].ref_count;
- ++ref_index;
- }
-
- for (; ref_index < REF_FRAMES; ++ref_index) {
- cm->next_ref_frame_map[ref_index] = cm->ref_frame_map[ref_index];
-
- // Current thread holds the reference frame.
- if (cm->ref_frame_map[ref_index] >= 0)
- ++frame_bufs[cm->ref_frame_map[ref_index]].ref_count;
- }
- unlock_buffer_pool(pool);
- pbi->hold_ref_buf = 1;
-
- // Reload the adapted CDFs from when we originally coded this keyframe
- *cm->fc = cm->frame_contexts[existing_frame_idx];
-}
-
-static INLINE void reset_frame_buffers(AV1_COMMON *cm) {
- RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
- int i;
-
- memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map));
- memset(&cm->next_ref_frame_map, -1, sizeof(cm->next_ref_frame_map));
-
- lock_buffer_pool(cm->buffer_pool);
- for (i = 0; i < FRAME_BUFFERS; ++i) {
- if (i != cm->new_fb_idx) {
- frame_bufs[i].ref_count = 0;
- cm->buffer_pool->release_fb_cb(cm->buffer_pool->cb_priv,
- &frame_bufs[i].raw_frame_buffer);
- } else {
- assert(frame_bufs[i].ref_count == 1);
- }
- frame_bufs[i].cur_frame_offset = 0;
- av1_zero(frame_bufs[i].ref_frame_offset);
- }
- av1_zero_unused_internal_frame_buffers(&cm->buffer_pool->int_frame_buffers);
- unlock_buffer_pool(cm->buffer_pool);
-}
-
-// On success, returns 0. On failure, calls aom_internal_error and does not
-// return.
-static int read_uncompressed_header(AV1Decoder *pbi,
- struct aom_read_bit_buffer *rb) {
- AV1_COMMON *const cm = &pbi->common;
- const SequenceHeader *const seq_params = &cm->seq_params;
- MACROBLOCKD *const xd = &pbi->mb;
- BufferPool *const pool = cm->buffer_pool;
- RefCntBuffer *const frame_bufs = pool->frame_bufs;
-
- if (!pbi->sequence_header_ready) {
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
- "No sequence header");
- }
-
- cm->last_frame_type = cm->frame_type;
- cm->last_intra_only = cm->intra_only;
-
- // NOTE: By default all coded frames to be used as a reference
- cm->is_reference_frame = 1;
-
- if (seq_params->reduced_still_picture_hdr) {
- cm->show_existing_frame = 0;
- cm->show_frame = 1;
- cm->frame_type = KEY_FRAME;
- cm->error_resilient_mode = 1;
- } else {
- cm->show_existing_frame = aom_rb_read_bit(rb);
- cm->reset_decoder_state = 0;
-
- if (cm->show_existing_frame) {
- if (pbi->sequence_header_changed) {
- aom_internal_error(
- &cm->error, AOM_CODEC_CORRUPT_FRAME,
- "New sequence header starts with a show_existing_frame.");
- }
- // Show an existing frame directly.
- const int existing_frame_idx = aom_rb_read_literal(rb, 3);
- const int frame_to_show = cm->ref_frame_map[existing_frame_idx];
- if (seq_params->decoder_model_info_present_flag &&
- cm->timing_info.equal_picture_interval == 0) {
- av1_read_temporal_point_info(cm, rb);
- }
- if (seq_params->frame_id_numbers_present_flag) {
- int frame_id_length = seq_params->frame_id_length;
- int display_frame_id = aom_rb_read_literal(rb, frame_id_length);
- /* Compare display_frame_id with ref_frame_id and check valid for
- * referencing */
- if (display_frame_id != cm->ref_frame_id[existing_frame_idx] ||
- cm->valid_for_referencing[existing_frame_idx] == 0)
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
- "Reference buffer frame ID mismatch");
- }
- lock_buffer_pool(pool);
- if (frame_to_show < 0 || frame_bufs[frame_to_show].ref_count < 1) {
- unlock_buffer_pool(pool);
- aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
- "Buffer %d does not contain a decoded frame",
- frame_to_show);
- }
- ref_cnt_fb(frame_bufs, &cm->new_fb_idx, frame_to_show);
- cm->reset_decoder_state =
- frame_bufs[frame_to_show].frame_type == KEY_FRAME;
- unlock_buffer_pool(pool);
-
- cm->lf.filter_level[0] = 0;
- cm->lf.filter_level[1] = 0;
- cm->show_frame = 1;
-
- if (!frame_bufs[frame_to_show].showable_frame) {
- aom_merge_corrupted_flag(&xd->corrupted, 1);
- }
- if (cm->reset_decoder_state) frame_bufs[frame_to_show].showable_frame = 0;
-
- cm->film_grain_params = frame_bufs[frame_to_show].film_grain_params;
-
- if (cm->reset_decoder_state) {
- show_existing_frame_reset(pbi, existing_frame_idx);
- } else {
- pbi->refresh_frame_flags = 0;
- }
-
- return 0;
- }
-
- cm->frame_type = (FRAME_TYPE)aom_rb_read_literal(rb, 2); // 2 bits
- if (pbi->sequence_header_changed) {
- if (pbi->common.frame_type == KEY_FRAME) {
- // This is the start of a new coded video sequence.
- pbi->sequence_header_changed = 0;
- pbi->decoding_first_frame = 1;
- reset_frame_buffers(&pbi->common);
- } else {
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
- "Sequence header has changed without a keyframe.");
- }
- }
-
- cm->show_frame = aom_rb_read_bit(rb);
- if (seq_params->still_picture &&
- (cm->frame_type != KEY_FRAME || !cm->show_frame)) {
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
- "Still pictures must be coded as shown keyframes");
- }
- cm->showable_frame = cm->frame_type != KEY_FRAME;
- if (cm->show_frame) {
- if (seq_params->decoder_model_info_present_flag &&
- cm->timing_info.equal_picture_interval == 0)
- av1_read_temporal_point_info(cm, rb);
- } else {
- // See if this frame can be used as show_existing_frame in future
- cm->showable_frame = aom_rb_read_bit(rb);
- }
- cm->cur_frame->showable_frame = cm->showable_frame;
- cm->intra_only = cm->frame_type == INTRA_ONLY_FRAME;
- cm->error_resilient_mode =
- frame_is_sframe(cm) || (cm->frame_type == KEY_FRAME && cm->show_frame)
- ? 1
- : aom_rb_read_bit(rb);
- }
-
- cm->disable_cdf_update = aom_rb_read_bit(rb);
- if (seq_params->force_screen_content_tools == 2) {
- cm->allow_screen_content_tools = aom_rb_read_bit(rb);
- } else {
- cm->allow_screen_content_tools = seq_params->force_screen_content_tools;
- }
-
- if (cm->allow_screen_content_tools) {
- if (seq_params->force_integer_mv == 2) {
- cm->cur_frame_force_integer_mv = aom_rb_read_bit(rb);
- } else {
- cm->cur_frame_force_integer_mv = seq_params->force_integer_mv;
- }
- } else {
- cm->cur_frame_force_integer_mv = 0;
- }
-
- cm->frame_refs_short_signaling = 0;
- int frame_size_override_flag = 0;
- cm->allow_intrabc = 0;
- cm->primary_ref_frame = PRIMARY_REF_NONE;
-
- if (!seq_params->reduced_still_picture_hdr) {
- if (seq_params->frame_id_numbers_present_flag) {
- int frame_id_length = seq_params->frame_id_length;
- int diff_len = seq_params->delta_frame_id_length;
- int prev_frame_id = 0;
- int have_prev_frame_id = !pbi->decoding_first_frame &&
- !(cm->frame_type == KEY_FRAME && cm->show_frame);
- if (have_prev_frame_id) {
- prev_frame_id = cm->current_frame_id;
- }
- cm->current_frame_id = aom_rb_read_literal(rb, frame_id_length);
-
- if (have_prev_frame_id) {
- int diff_frame_id;
- if (cm->current_frame_id > prev_frame_id) {
- diff_frame_id = cm->current_frame_id - prev_frame_id;
- } else {
- diff_frame_id =
- (1 << frame_id_length) + cm->current_frame_id - prev_frame_id;
- }
- /* Check current_frame_id for conformance */
- if (prev_frame_id == cm->current_frame_id ||
- diff_frame_id >= (1 << (frame_id_length - 1))) {
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
- "Invalid value of current_frame_id");
- }
- }
- /* Check if some frames need to be marked as not valid for referencing */
- for (int i = 0; i < REF_FRAMES; i++) {
- if (cm->frame_type == KEY_FRAME && cm->show_frame) {
- cm->valid_for_referencing[i] = 0;
- } else if (cm->current_frame_id - (1 << diff_len) > 0) {
- if (cm->ref_frame_id[i] > cm->current_frame_id ||
- cm->ref_frame_id[i] < cm->current_frame_id - (1 << diff_len))
- cm->valid_for_referencing[i] = 0;
- } else {
- if (cm->ref_frame_id[i] > cm->current_frame_id &&
- cm->ref_frame_id[i] < (1 << frame_id_length) +
- cm->current_frame_id - (1 << diff_len))
- cm->valid_for_referencing[i] = 0;
- }
- }
- }
-
- frame_size_override_flag = frame_is_sframe(cm) ? 1 : aom_rb_read_bit(rb);
-
- cm->frame_offset =
- aom_rb_read_literal(rb, seq_params->order_hint_bits_minus_1 + 1);
- cm->current_video_frame = cm->frame_offset;
-
- if (!cm->error_resilient_mode && !frame_is_intra_only(cm)) {
- cm->primary_ref_frame = aom_rb_read_literal(rb, PRIMARY_REF_BITS);
- }
- }
-
- if (seq_params->decoder_model_info_present_flag) {
- cm->buffer_removal_time_present = aom_rb_read_bit(rb);
- if (cm->buffer_removal_time_present) {
- for (int op_num = 0;
- op_num < seq_params->operating_points_cnt_minus_1 + 1; op_num++) {
- if (cm->op_params[op_num].decoder_model_param_present_flag) {
- if ((((seq_params->operating_point_idc[op_num] >>
- cm->temporal_layer_id) &
- 0x1) &&
- ((seq_params->operating_point_idc[op_num] >>
- (cm->spatial_layer_id + 8)) &
- 0x1)) ||
- seq_params->operating_point_idc[op_num] == 0) {
- cm->op_frame_timing[op_num].buffer_removal_time =
- aom_rb_read_unsigned_literal(
- rb, cm->buffer_model.buffer_removal_time_length);
- } else {
- cm->op_frame_timing[op_num].buffer_removal_time = 0;
- }
- } else {
- cm->op_frame_timing[op_num].buffer_removal_time = 0;
- }
- }
- }
- }
- if (cm->frame_type == KEY_FRAME) {
- if (!cm->show_frame) // unshown keyframe (forward keyframe)
- pbi->refresh_frame_flags = aom_rb_read_literal(rb, REF_FRAMES);
- else // shown keyframe
- pbi->refresh_frame_flags = (1 << REF_FRAMES) - 1;
-
- for (int i = 0; i < INTER_REFS_PER_FRAME; ++i) {
- cm->frame_refs[i].idx = INVALID_IDX;
- cm->frame_refs[i].buf = NULL;
- }
- if (pbi->need_resync) {
- memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map));
- pbi->need_resync = 0;
- }
- } else {
- if (cm->intra_only) {
- pbi->refresh_frame_flags = aom_rb_read_literal(rb, REF_FRAMES);
- if (pbi->refresh_frame_flags == 0xFF) {
- aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
- "Intra only frames cannot have refresh flags 0xFF");
- }
- if (pbi->need_resync) {
- memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map));
- pbi->need_resync = 0;
- }
- } else if (pbi->need_resync != 1) { /* Skip if need resync */
- pbi->refresh_frame_flags =
- frame_is_sframe(cm) ? 0xFF : aom_rb_read_literal(rb, REF_FRAMES);
- if (!pbi->refresh_frame_flags) {
- // NOTE: "pbi->refresh_frame_flags == 0" indicates that the coded frame
- // will not be used as a reference
- cm->is_reference_frame = 0;
- }
- }
- }
-
- if (!frame_is_intra_only(cm) || pbi->refresh_frame_flags != 0xFF) {
- // Read all ref frame order hints if error_resilient_mode == 1
- if (cm->error_resilient_mode && seq_params->enable_order_hint) {
- for (int ref_idx = 0; ref_idx < REF_FRAMES; ref_idx++) {
- // Read order hint from bit stream
- unsigned int frame_offset =
- aom_rb_read_literal(rb, seq_params->order_hint_bits_minus_1 + 1);
- // Get buffer index
- int buf_idx = cm->ref_frame_map[ref_idx];
- assert(buf_idx < FRAME_BUFFERS);
- if (buf_idx == -1 ||
- frame_offset != frame_bufs[buf_idx].cur_frame_offset) {
- if (buf_idx >= 0) {
- lock_buffer_pool(pool);
- decrease_ref_count(buf_idx, frame_bufs, pool);
- unlock_buffer_pool(pool);
- }
- // If no corresponding buffer exists, allocate a new buffer with all
- // pixels set to neutral grey.
- buf_idx = get_free_fb(cm);
- if (buf_idx == INVALID_IDX) {
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
- "Unable to find free frame buffer");
- }
- lock_buffer_pool(pool);
- if (aom_realloc_frame_buffer(
- &frame_bufs[buf_idx].buf, seq_params->max_frame_width,
- seq_params->max_frame_height, seq_params->subsampling_x,
- seq_params->subsampling_y, seq_params->use_highbitdepth,
- AOM_BORDER_IN_PIXELS, cm->byte_alignment,
- &pool->frame_bufs[buf_idx].raw_frame_buffer, pool->get_fb_cb,
- pool->cb_priv)) {
- unlock_buffer_pool(pool);
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
- "Failed to allocate frame buffer");
- }
- unlock_buffer_pool(pool);
- set_planes_to_neutral_grey(seq_params, &frame_bufs[buf_idx].buf, 0);
-
- cm->ref_frame_map[ref_idx] = buf_idx;
- frame_bufs[buf_idx].cur_frame_offset = frame_offset;
- }
- }
- }
- }
-
- if (cm->frame_type == KEY_FRAME) {
- setup_frame_size(cm, frame_size_override_flag, rb);
-
- if (cm->allow_screen_content_tools && !av1_superres_scaled(cm))
- cm->allow_intrabc = aom_rb_read_bit(rb);
- cm->allow_ref_frame_mvs = 0;
- cm->prev_frame = NULL;
- } else {
- cm->allow_ref_frame_mvs = 0;
-
- if (cm->intra_only) {
- cm->cur_frame->film_grain_params_present =
- seq_params->film_grain_params_present;
- setup_frame_size(cm, frame_size_override_flag, rb);
- if (cm->allow_screen_content_tools && !av1_superres_scaled(cm))
- cm->allow_intrabc = aom_rb_read_bit(rb);
-
- } else if (pbi->need_resync != 1) { /* Skip if need resync */
-
- // Frame refs short signaling is off when error resilient mode is on.
- if (seq_params->enable_order_hint)
- cm->frame_refs_short_signaling = aom_rb_read_bit(rb);
-
- if (cm->frame_refs_short_signaling) {
- // == LAST_FRAME ==
- const int lst_ref = aom_rb_read_literal(rb, REF_FRAMES_LOG2);
- const int lst_idx = cm->ref_frame_map[lst_ref];
-
- // == GOLDEN_FRAME ==
- const int gld_ref = aom_rb_read_literal(rb, REF_FRAMES_LOG2);
- const int gld_idx = cm->ref_frame_map[gld_ref];
-
- // Most of the time, streams start with a keyframe. In that case,
- // ref_frame_map will have been filled in at that point and will not
- // contain any -1's. However, streams are explicitly allowed to start
- // with an intra-only frame, so long as they don't then signal a
- // reference to a slot that hasn't been set yet. That's what we are
- // checking here.
- if (lst_idx == -1)
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
- "Inter frame requests nonexistent reference");
- if (gld_idx == -1)
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
- "Inter frame requests nonexistent reference");
-
- av1_set_frame_refs(cm, lst_ref, gld_ref);
- }
-
- for (int i = 0; i < INTER_REFS_PER_FRAME; ++i) {
- int ref = 0;
- if (!cm->frame_refs_short_signaling) {
- ref = aom_rb_read_literal(rb, REF_FRAMES_LOG2);
- const int idx = cm->ref_frame_map[ref];
-
- // Most of the time, streams start with a keyframe. In that case,
- // ref_frame_map will have been filled in at that point and will not
- // contain any -1's. However, streams are explicitly allowed to start
- // with an intra-only frame, so long as they don't then signal a
- // reference to a slot that hasn't been set yet. That's what we are
- // checking here.
- if (idx == -1)
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
- "Inter frame requests nonexistent reference");
-
- RefBuffer *const ref_frame = &cm->frame_refs[i];
- ref_frame->idx = idx;
- ref_frame->buf = &frame_bufs[idx].buf;
- ref_frame->map_idx = ref;
- } else {
- ref = cm->frame_refs[i].map_idx;
- }
-
- cm->ref_frame_sign_bias[LAST_FRAME + i] = 0;
-
- if (seq_params->frame_id_numbers_present_flag) {
- int frame_id_length = seq_params->frame_id_length;
- int diff_len = seq_params->delta_frame_id_length;
- int delta_frame_id_minus_1 = aom_rb_read_literal(rb, diff_len);
- int ref_frame_id =
- ((cm->current_frame_id - (delta_frame_id_minus_1 + 1) +
- (1 << frame_id_length)) %
- (1 << frame_id_length));
- // Compare values derived from delta_frame_id_minus_1 and
- // refresh_frame_flags. Also, check valid for referencing
- if (ref_frame_id != cm->ref_frame_id[ref] ||
- cm->valid_for_referencing[ref] == 0)
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
- "Reference buffer frame ID mismatch");
- }
- }
-
- if (!cm->error_resilient_mode && frame_size_override_flag) {
- setup_frame_size_with_refs(cm, rb);
- } else {
- setup_frame_size(cm, frame_size_override_flag, rb);
- }
-
- if (cm->cur_frame_force_integer_mv) {
- cm->allow_high_precision_mv = 0;
- } else {
- cm->allow_high_precision_mv = aom_rb_read_bit(rb);
- }
- cm->interp_filter = read_frame_interp_filter(rb);
- cm->switchable_motion_mode = aom_rb_read_bit(rb);
- }
-
- cm->prev_frame = get_prev_frame(cm);
- if (cm->primary_ref_frame != PRIMARY_REF_NONE &&
- cm->frame_refs[cm->primary_ref_frame].idx < 0) {
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
- "Reference frame containing this frame's initial "
- "frame context is unavailable.");
- }
-
- if (!cm->intra_only && pbi->need_resync != 1) {
- if (frame_might_allow_ref_frame_mvs(cm))
- cm->allow_ref_frame_mvs = aom_rb_read_bit(rb);
- else
- cm->allow_ref_frame_mvs = 0;
-
- for (int i = 0; i < INTER_REFS_PER_FRAME; ++i) {
- RefBuffer *const ref_buf = &cm->frame_refs[i];
- av1_setup_scale_factors_for_frame(
- &ref_buf->sf, ref_buf->buf->y_crop_width,
- ref_buf->buf->y_crop_height, cm->width, cm->height);
- if ((!av1_is_valid_scale(&ref_buf->sf)))
- aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
- "Reference frame has invalid dimensions");
- }
- }
- }
-
- av1_setup_frame_buf_refs(cm);
-
- av1_setup_frame_sign_bias(cm);
-
- cm->cur_frame->intra_only = cm->frame_type == KEY_FRAME || cm->intra_only;
- cm->cur_frame->frame_type = cm->frame_type;
-
- if (seq_params->frame_id_numbers_present_flag) {
- /* If bitmask is set, update reference frame id values and
- mark frames as valid for reference */
- int refresh_frame_flags = pbi->refresh_frame_flags;
- for (int i = 0; i < REF_FRAMES; i++) {
- if ((refresh_frame_flags >> i) & 1) {
- cm->ref_frame_id[i] = cm->current_frame_id;
- cm->valid_for_referencing[i] = 1;
- }
- }
- }
-
- const int might_bwd_adapt =
- !(seq_params->reduced_still_picture_hdr) && !(cm->disable_cdf_update);
- if (might_bwd_adapt) {
- cm->refresh_frame_context = aom_rb_read_bit(rb)
- ? REFRESH_FRAME_CONTEXT_DISABLED
- : REFRESH_FRAME_CONTEXT_BACKWARD;
- } else {
- cm->refresh_frame_context = REFRESH_FRAME_CONTEXT_DISABLED;
- }
-
- get_frame_new_buffer(cm)->bit_depth = seq_params->bit_depth;
- get_frame_new_buffer(cm)->color_primaries = seq_params->color_primaries;
- get_frame_new_buffer(cm)->transfer_characteristics =
- seq_params->transfer_characteristics;
- get_frame_new_buffer(cm)->matrix_coefficients =
- seq_params->matrix_coefficients;
- get_frame_new_buffer(cm)->monochrome = seq_params->monochrome;
- get_frame_new_buffer(cm)->chroma_sample_position =
- seq_params->chroma_sample_position;
- get_frame_new_buffer(cm)->color_range = seq_params->color_range;
- get_frame_new_buffer(cm)->render_width = cm->render_width;
- get_frame_new_buffer(cm)->render_height = cm->render_height;
-
- if (pbi->need_resync) {
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
- "Keyframe / intra-only frame required to reset decoder"
- " state");
- }
-
- // Generate next_ref_frame_map.
- lock_buffer_pool(pool);
- int ref_index = 0;
- for (int mask = pbi->refresh_frame_flags; mask; mask >>= 1) {
- if (mask & 1) {
- cm->next_ref_frame_map[ref_index] = cm->new_fb_idx;
- ++frame_bufs[cm->new_fb_idx].ref_count;
- } else {
- cm->next_ref_frame_map[ref_index] = cm->ref_frame_map[ref_index];
- }
- // Current thread holds the reference frame.
- if (cm->ref_frame_map[ref_index] >= 0)
- ++frame_bufs[cm->ref_frame_map[ref_index]].ref_count;
- ++ref_index;
- }
-
- for (; ref_index < REF_FRAMES; ++ref_index) {
- cm->next_ref_frame_map[ref_index] = cm->ref_frame_map[ref_index];
-
- // Current thread holds the reference frame.
- if (cm->ref_frame_map[ref_index] >= 0)
- ++frame_bufs[cm->ref_frame_map[ref_index]].ref_count;
- }
- unlock_buffer_pool(pool);
- pbi->hold_ref_buf = 1;
-
- if (cm->allow_intrabc) {
- // Set parameters corresponding to no filtering.
- struct loopfilter *lf = &cm->lf;
- lf->filter_level[0] = 0;
- lf->filter_level[1] = 0;
- cm->cdef_bits = 0;
- cm->cdef_strengths[0] = 0;
- cm->nb_cdef_strengths = 1;
- cm->cdef_uv_strengths[0] = 0;
- cm->rst_info[0].frame_restoration_type = RESTORE_NONE;
- cm->rst_info[1].frame_restoration_type = RESTORE_NONE;
- cm->rst_info[2].frame_restoration_type = RESTORE_NONE;
- }
-
- read_tile_info(pbi, rb);
- setup_quantization(cm, rb);
- xd->bd = (int)seq_params->bit_depth;
-
- if (cm->num_allocated_above_context_planes < av1_num_planes(cm) ||
- cm->num_allocated_above_context_mi_col < cm->mi_cols ||
- cm->num_allocated_above_contexts < cm->tile_rows) {
- av1_free_above_context_buffers(cm, cm->num_allocated_above_contexts);
- if (av1_alloc_above_context_buffers(cm, cm->tile_rows))
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
- "Failed to allocate context buffers");
- }
-
- if (cm->primary_ref_frame == PRIMARY_REF_NONE) {
- av1_setup_past_independence(cm);
- }
-
- setup_segmentation(cm, rb);
-
- cm->delta_q_res = 1;
- cm->delta_lf_res = 1;
- cm->delta_lf_present_flag = 0;
- cm->delta_lf_multi = 0;
- cm->delta_q_present_flag = cm->base_qindex > 0 ? aom_rb_read_bit(rb) : 0;
- if (cm->delta_q_present_flag) {
- xd->current_qindex = cm->base_qindex;
- cm->delta_q_res = 1 << aom_rb_read_literal(rb, 2);
- if (!cm->allow_intrabc) cm->delta_lf_present_flag = aom_rb_read_bit(rb);
- if (cm->delta_lf_present_flag) {
- cm->delta_lf_res = 1 << aom_rb_read_literal(rb, 2);
- cm->delta_lf_multi = aom_rb_read_bit(rb);
- av1_reset_loop_filter_delta(xd, av1_num_planes(cm));
- }
- }
-
- xd->cur_frame_force_integer_mv = cm->cur_frame_force_integer_mv;
-
- for (int i = 0; i < MAX_SEGMENTS; ++i) {
- const int qindex = cm->seg.enabled
- ? av1_get_qindex(&cm->seg, i, cm->base_qindex)
- : cm->base_qindex;
- xd->lossless[i] = qindex == 0 && cm->y_dc_delta_q == 0 &&
- cm->u_dc_delta_q == 0 && cm->u_ac_delta_q == 0 &&
- cm->v_dc_delta_q == 0 && cm->v_ac_delta_q == 0;
- xd->qindex[i] = qindex;
- }
- cm->coded_lossless = is_coded_lossless(cm, xd);
- cm->all_lossless = cm->coded_lossless && !av1_superres_scaled(cm);
- setup_segmentation_dequant(cm);
- if (cm->coded_lossless) {
- cm->lf.filter_level[0] = 0;
- cm->lf.filter_level[1] = 0;
- }
- if (cm->coded_lossless || !seq_params->enable_cdef) {
- cm->cdef_bits = 0;
- cm->cdef_strengths[0] = 0;
- cm->cdef_uv_strengths[0] = 0;
- }
- if (cm->all_lossless || !seq_params->enable_restoration) {
- cm->rst_info[0].frame_restoration_type = RESTORE_NONE;
- cm->rst_info[1].frame_restoration_type = RESTORE_NONE;
- cm->rst_info[2].frame_restoration_type = RESTORE_NONE;
- }
- setup_loopfilter(cm, rb);
-
- if (!cm->coded_lossless && seq_params->enable_cdef) {
- setup_cdef(cm, rb);
- }
- if (!cm->all_lossless && seq_params->enable_restoration) {
- decode_restoration_mode(cm, rb);
- }
-
- cm->tx_mode = read_tx_mode(cm, rb);
- cm->reference_mode = read_frame_reference_mode(cm, rb);
- if (cm->reference_mode != SINGLE_REFERENCE) setup_compound_reference_mode(cm);
-
- av1_setup_skip_mode_allowed(cm);
- cm->skip_mode_flag = cm->is_skip_mode_allowed ? aom_rb_read_bit(rb) : 0;
-
- if (frame_might_allow_warped_motion(cm))
- cm->allow_warped_motion = aom_rb_read_bit(rb);
- else
- cm->allow_warped_motion = 0;
-
- cm->reduced_tx_set_used = aom_rb_read_bit(rb);
-
- if (cm->allow_ref_frame_mvs && !frame_might_allow_ref_frame_mvs(cm)) {
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
- "Frame wrongly requests reference frame MVs");
- }
-
- if (!frame_is_intra_only(cm)) read_global_motion(cm, rb);
-
- cm->cur_frame->film_grain_params_present =
- seq_params->film_grain_params_present;
- read_film_grain(cm, rb);
-
-#if EXT_TILE_DEBUG
- if (pbi->ext_tile_debug && cm->large_scale_tile) {
- read_ext_tile_info(pbi, rb);
- av1_set_single_tile_decoding_mode(cm);
- }
-#endif // EXT_TILE_DEBUG
- return 0;
-}
-
-struct aom_read_bit_buffer *av1_init_read_bit_buffer(
- AV1Decoder *pbi, struct aom_read_bit_buffer *rb, const uint8_t *data,
- const uint8_t *data_end) {
- rb->bit_offset = 0;
- rb->error_handler = error_handler;
- rb->error_handler_data = &pbi->common;
- rb->bit_buffer = data;
- rb->bit_buffer_end = data_end;
- return rb;
-}
-
-void av1_read_frame_size(struct aom_read_bit_buffer *rb, int num_bits_width,
- int num_bits_height, int *width, int *height) {
- *width = aom_rb_read_literal(rb, num_bits_width) + 1;
- *height = aom_rb_read_literal(rb, num_bits_height) + 1;
-}
-
-BITSTREAM_PROFILE av1_read_profile(struct aom_read_bit_buffer *rb) {
- int profile = aom_rb_read_literal(rb, PROFILE_BITS);
- return (BITSTREAM_PROFILE)profile;
-}
-
-void superres_post_decode(AV1Decoder *pbi) {
- AV1_COMMON *const cm = &pbi->common;
- BufferPool *const pool = cm->buffer_pool;
-
- if (!av1_superres_scaled(cm)) return;
- assert(!cm->all_lossless);
-
- lock_buffer_pool(pool);
- av1_superres_upscale(cm, pool);
- unlock_buffer_pool(pool);
-}
-
-uint32_t av1_decode_frame_headers_and_setup(AV1Decoder *pbi,
- struct aom_read_bit_buffer *rb,
- const uint8_t *data,
- const uint8_t **p_data_end,
- int trailing_bits_present) {
- AV1_COMMON *const cm = &pbi->common;
- const int num_planes = av1_num_planes(cm);
- MACROBLOCKD *const xd = &pbi->mb;
-
-#if CONFIG_BITSTREAM_DEBUG
- bitstream_queue_set_frame_read(cm->current_video_frame * 2 + cm->show_frame);
-#endif
-#if CONFIG_MISMATCH_DEBUG
- mismatch_move_frame_idx_r();
-#endif
-
- for (int i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
- cm->global_motion[i] = default_warp_params;
- cm->cur_frame->global_motion[i] = default_warp_params;
- }
- xd->global_motion = cm->global_motion;
-
- read_uncompressed_header(pbi, rb);
-
- if (trailing_bits_present) av1_check_trailing_bits(pbi, rb);
-
- // If cm->single_tile_decoding = 0, the independent decoding of a single tile
- // or a section of a frame is not allowed.
- if (!cm->single_tile_decoding &&
- (pbi->dec_tile_row >= 0 || pbi->dec_tile_col >= 0)) {
- pbi->dec_tile_row = -1;
- pbi->dec_tile_col = -1;
- }
-
- const uint32_t uncomp_hdr_size =
- (uint32_t)aom_rb_bytes_read(rb); // Size of the uncompressed header
- YV12_BUFFER_CONFIG *new_fb = get_frame_new_buffer(cm);
- xd->cur_buf = new_fb;
- if (av1_allow_intrabc(cm)) {
- av1_setup_scale_factors_for_frame(
- &cm->sf_identity, xd->cur_buf->y_crop_width, xd->cur_buf->y_crop_height,
- xd->cur_buf->y_crop_width, xd->cur_buf->y_crop_height);
- }
-
- if (cm->show_existing_frame) {
- // showing a frame directly
- *p_data_end = data + uncomp_hdr_size;
- if (cm->reset_decoder_state) {
- // Use the default frame context values.
- *cm->fc = cm->frame_contexts[FRAME_CONTEXT_DEFAULTS];
- if (!cm->fc->initialized)
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
- "Uninitialized entropy context.");
- }
- return uncomp_hdr_size;
- }
-
- cm->setup_mi(cm);
-
- cm->current_frame_seg_map = cm->cur_frame->seg_map;
-
- av1_setup_motion_field(cm);
-
- av1_setup_block_planes(xd, cm->seq_params.subsampling_x,
- cm->seq_params.subsampling_y, num_planes);
- if (cm->primary_ref_frame == PRIMARY_REF_NONE) {
- // use the default frame context values
- *cm->fc = cm->frame_contexts[FRAME_CONTEXT_DEFAULTS];
- } else {
- *cm->fc = cm->frame_contexts[cm->frame_refs[cm->primary_ref_frame].idx];
- }
- if (!cm->fc->initialized)
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
- "Uninitialized entropy context.");
-
- xd->corrupted = 0;
- return uncomp_hdr_size;
-}
-
-// Once-per-frame initialization
-static void setup_frame_info(AV1Decoder *pbi) {
- AV1_COMMON *const cm = &pbi->common;
-
- if (cm->rst_info[0].frame_restoration_type != RESTORE_NONE ||
- cm->rst_info[1].frame_restoration_type != RESTORE_NONE ||
- cm->rst_info[2].frame_restoration_type != RESTORE_NONE) {
- av1_alloc_restoration_buffers(cm);
- }
- const int use_highbd = cm->seq_params.use_highbitdepth ? 1 : 0;
- const int buf_size = MC_TEMP_BUF_PELS << use_highbd;
- if (pbi->td.mc_buf_size != buf_size) {
- av1_free_mc_tmp_buf(&pbi->td);
- allocate_mc_tmp_buf(cm, &pbi->td, buf_size, use_highbd);
- }
-}
-
-void av1_decode_tg_tiles_and_wrapup(AV1Decoder *pbi, const uint8_t *data,
- const uint8_t *data_end,
- const uint8_t **p_data_end, int start_tile,
- int end_tile, int initialize_flag) {
- AV1_COMMON *const cm = &pbi->common;
- MACROBLOCKD *const xd = &pbi->mb;
- const int tile_count_tg = end_tile - start_tile + 1;
-
- if (initialize_flag) setup_frame_info(pbi);
- const int num_planes = av1_num_planes(cm);
-#if LOOP_FILTER_BITMASK
- av1_loop_filter_frame_init(cm, 0, num_planes);
- av1_zero_array(cm->lf.lfm, cm->lf.lfm_num);
-#endif
-
- if (pbi->max_threads > 1 && !(cm->large_scale_tile && !pbi->ext_tile_debug) &&
- pbi->row_mt)
- *p_data_end =
- decode_tiles_row_mt(pbi, data, data_end, start_tile, end_tile);
- else if (pbi->max_threads > 1 && tile_count_tg > 1 &&
- !(cm->large_scale_tile && !pbi->ext_tile_debug))
- *p_data_end = decode_tiles_mt(pbi, data, data_end, start_tile, end_tile);
- else
- *p_data_end = decode_tiles(pbi, data, data_end, start_tile, end_tile);
-
- // If the bit stream is monochrome, set the U and V buffers to a constant.
- if (num_planes < 3) {
- set_planes_to_neutral_grey(&cm->seq_params, xd->cur_buf, 1);
- }
-
- if (end_tile != cm->tile_rows * cm->tile_cols - 1) {
- return;
- }
-
- if (!cm->allow_intrabc && !cm->single_tile_decoding) {
- if (cm->lf.filter_level[0] || cm->lf.filter_level[1]) {
-#if LOOP_FILTER_BITMASK
- av1_loop_filter_frame(get_frame_new_buffer(cm), cm, &pbi->mb, 1, 0,
- num_planes, 0);
-#else
- if (pbi->num_workers > 1) {
- av1_loop_filter_frame_mt(get_frame_new_buffer(cm), cm, &pbi->mb, 0,
- num_planes, 0, pbi->tile_workers,
- pbi->num_workers, &pbi->lf_row_sync);
- } else {
- av1_loop_filter_frame(get_frame_new_buffer(cm), cm, &pbi->mb, 0,
- num_planes, 0);
- }
-#endif
- }
-
- const int do_loop_restoration =
- cm->rst_info[0].frame_restoration_type != RESTORE_NONE ||
- cm->rst_info[1].frame_restoration_type != RESTORE_NONE ||
- cm->rst_info[2].frame_restoration_type != RESTORE_NONE;
- const int do_cdef =
- !cm->skip_loop_filter && !cm->coded_lossless &&
- (cm->cdef_bits || cm->cdef_strengths[0] || cm->cdef_uv_strengths[0]);
- const int do_superres = av1_superres_scaled(cm);
- const int optimized_loop_restoration = !do_cdef && !do_superres;
-
- if (!optimized_loop_restoration) {
- if (do_loop_restoration)
- av1_loop_restoration_save_boundary_lines(&pbi->cur_buf->buf, cm, 0);
-
- if (do_cdef) av1_cdef_frame(&pbi->cur_buf->buf, cm, &pbi->mb);
-
- superres_post_decode(pbi);
-
- if (do_loop_restoration) {
- av1_loop_restoration_save_boundary_lines(&pbi->cur_buf->buf, cm, 1);
- if (pbi->num_workers > 1) {
- av1_loop_restoration_filter_frame_mt(
- (YV12_BUFFER_CONFIG *)xd->cur_buf, cm, optimized_loop_restoration,
- pbi->tile_workers, pbi->num_workers, &pbi->lr_row_sync,
- &pbi->lr_ctxt);
- } else {
- av1_loop_restoration_filter_frame((YV12_BUFFER_CONFIG *)xd->cur_buf,
- cm, optimized_loop_restoration,
- &pbi->lr_ctxt);
- }
- }
- } else {
- // In no cdef and no superres case. Provide an optimized version of
- // loop_restoration_filter.
- if (do_loop_restoration) {
- if (pbi->num_workers > 1) {
- av1_loop_restoration_filter_frame_mt(
- (YV12_BUFFER_CONFIG *)xd->cur_buf, cm, optimized_loop_restoration,
- pbi->tile_workers, pbi->num_workers, &pbi->lr_row_sync,
- &pbi->lr_ctxt);
- } else {
- av1_loop_restoration_filter_frame((YV12_BUFFER_CONFIG *)xd->cur_buf,
- cm, optimized_loop_restoration,
- &pbi->lr_ctxt);
- }
- }
- }
- }
-
- if (!xd->corrupted) {
- if (cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) {
- assert(cm->context_update_tile_id < pbi->allocated_tiles);
- *cm->fc = pbi->tile_data[cm->context_update_tile_id].tctx;
- av1_reset_cdf_symbol_counters(cm->fc);
- }
- } else {
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
- "Decode failed. Frame data is corrupted.");
- }
-
-#if CONFIG_INSPECTION
- if (pbi->inspect_cb != NULL) {
- (*pbi->inspect_cb)(pbi, pbi->inspect_ctx);
- }
-#endif
-
- // Non frame parallel update frame context here.
- if (!cm->large_scale_tile) {
- cm->frame_contexts[cm->new_fb_idx] = *cm->fc;
- }
-}
diff --git a/third_party/aom/av1/decoder/decodeframe.h b/third_party/aom/av1/decoder/decodeframe.h
deleted file mode 100644
index ddad273f1..000000000
--- a/third_party/aom/av1/decoder/decodeframe.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_DECODER_DECODEFRAME_H_
-#define AOM_AV1_DECODER_DECODEFRAME_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct AV1Decoder;
-struct aom_read_bit_buffer;
-struct ThreadData;
-
-// Reads the middle part of the sequence header OBU (from
-// frame_width_bits_minus_1 to enable_restoration) into seq_params.
-// Reports errors by calling rb->error_handler() or aom_internal_error().
-void av1_read_sequence_header(AV1_COMMON *cm, struct aom_read_bit_buffer *rb,
- SequenceHeader *seq_params);
-
-void av1_read_frame_size(struct aom_read_bit_buffer *rb, int num_bits_width,
- int num_bits_height, int *width, int *height);
-BITSTREAM_PROFILE av1_read_profile(struct aom_read_bit_buffer *rb);
-
-// Returns 0 on success. Sets pbi->common.error.error_code and returns -1 on
-// failure.
-int av1_check_trailing_bits(struct AV1Decoder *pbi,
- struct aom_read_bit_buffer *rb);
-
-// On success, returns the frame header size. On failure, calls
-// aom_internal_error and does not return.
-// TODO(wtc): Figure out and document the p_data_end parameter.
-uint32_t av1_decode_frame_headers_and_setup(struct AV1Decoder *pbi,
- struct aom_read_bit_buffer *rb,
- const uint8_t *data,
- const uint8_t **p_data_end,
- int trailing_bits_present);
-
-void av1_decode_tg_tiles_and_wrapup(struct AV1Decoder *pbi, const uint8_t *data,
- const uint8_t *data_end,
- const uint8_t **p_data_end, int startTile,
- int endTile, int initialize_flag);
-
-// Implements the color_config() function in the spec. Reports errors by
-// calling rb->error_handler() or aom_internal_error().
-void av1_read_color_config(struct aom_read_bit_buffer *rb,
- int allow_lowbitdepth, SequenceHeader *seq_params,
- struct aom_internal_error_info *error_info);
-
-// Implements the timing_info() function in the spec. Reports errors by calling
-// rb->error_handler().
-void av1_read_timing_info_header(AV1_COMMON *cm,
- struct aom_read_bit_buffer *rb);
-
-// Implements the decoder_model_info() function in the spec. Reports errors by
-// calling rb->error_handler().
-void av1_read_decoder_model_info(AV1_COMMON *cm,
- struct aom_read_bit_buffer *rb);
-
-// Implements the operating_parameters_info() function in the spec. Reports
-// errors by calling rb->error_handler() or aom_internal_error().
-void av1_read_op_parameters_info(AV1_COMMON *const cm,
- struct aom_read_bit_buffer *rb, int op_num);
-
-struct aom_read_bit_buffer *av1_init_read_bit_buffer(
- struct AV1Decoder *pbi, struct aom_read_bit_buffer *rb, const uint8_t *data,
- const uint8_t *data_end);
-
-void av1_free_mc_tmp_buf(struct ThreadData *thread_data);
-
-void av1_set_single_tile_decoding_mode(AV1_COMMON *const cm);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_DECODER_DECODEFRAME_H_
diff --git a/third_party/aom/av1/decoder/decodemv.c b/third_party/aom/av1/decoder/decodemv.c
deleted file mode 100644
index 551e4d543..000000000
--- a/third_party/aom/av1/decoder/decodemv.c
+++ /dev/null
@@ -1,1560 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-
-#include "av1/common/cfl.h"
-#include "av1/common/common.h"
-#include "av1/common/entropy.h"
-#include "av1/common/entropymode.h"
-#include "av1/common/entropymv.h"
-#include "av1/common/mvref_common.h"
-#include "av1/common/pred_common.h"
-#include "av1/common/reconinter.h"
-#include "av1/common/reconintra.h"
-#include "av1/common/seg_common.h"
-#include "av1/common/warped_motion.h"
-
-#include "av1/decoder/decodeframe.h"
-#include "av1/decoder/decodemv.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-
-#define ACCT_STR __func__
-
-#define DEC_MISMATCH_DEBUG 0
-
-static PREDICTION_MODE read_intra_mode(aom_reader *r, aom_cdf_prob *cdf) {
- return (PREDICTION_MODE)aom_read_symbol(r, cdf, INTRA_MODES, ACCT_STR);
-}
-
-static void read_cdef(AV1_COMMON *cm, aom_reader *r, MACROBLOCKD *const xd,
- int mi_col, int mi_row) {
- MB_MODE_INFO *const mbmi = xd->mi[0];
- if (cm->coded_lossless) return;
- if (cm->allow_intrabc) {
- assert(cm->cdef_bits == 0);
- return;
- }
-
- if (!(mi_col & (cm->seq_params.mib_size - 1)) &&
- !(mi_row & (cm->seq_params.mib_size - 1))) { // Top left?
- xd->cdef_preset[0] = xd->cdef_preset[1] = xd->cdef_preset[2] =
- xd->cdef_preset[3] = -1;
- }
- // Read CDEF param at the first non-skip coding block
- const int mask = (1 << (6 - MI_SIZE_LOG2));
- const int m = ~(mask - 1);
- const int index = cm->seq_params.sb_size == BLOCK_128X128
- ? !!(mi_col & mask) + 2 * !!(mi_row & mask)
- : 0;
- cm->mi_grid_visible[(mi_row & m) * cm->mi_stride + (mi_col & m)]
- ->cdef_strength = xd->cdef_preset[index] =
- xd->cdef_preset[index] == -1 && !mbmi->skip
- ? aom_read_literal(r, cm->cdef_bits, ACCT_STR)
- : xd->cdef_preset[index];
-}
-
-static int read_delta_qindex(AV1_COMMON *cm, const MACROBLOCKD *xd,
- aom_reader *r, MB_MODE_INFO *const mbmi,
- int mi_col, int mi_row) {
- int sign, abs, reduced_delta_qindex = 0;
- BLOCK_SIZE bsize = mbmi->sb_type;
- const int b_col = mi_col & (cm->seq_params.mib_size - 1);
- const int b_row = mi_row & (cm->seq_params.mib_size - 1);
- const int read_delta_q_flag = (b_col == 0 && b_row == 0);
- FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
-
- if ((bsize != cm->seq_params.sb_size || mbmi->skip == 0) &&
- read_delta_q_flag) {
- abs = aom_read_symbol(r, ec_ctx->delta_q_cdf, DELTA_Q_PROBS + 1, ACCT_STR);
- const int smallval = (abs < DELTA_Q_SMALL);
-
- if (!smallval) {
- const int rem_bits = aom_read_literal(r, 3, ACCT_STR) + 1;
- const int thr = (1 << rem_bits) + 1;
- abs = aom_read_literal(r, rem_bits, ACCT_STR) + thr;
- }
-
- if (abs) {
- sign = aom_read_bit(r, ACCT_STR);
- } else {
- sign = 1;
- }
-
- reduced_delta_qindex = sign ? -abs : abs;
- }
- return reduced_delta_qindex;
-}
-static int read_delta_lflevel(const AV1_COMMON *const cm, aom_reader *r,
- aom_cdf_prob *const cdf,
- const MB_MODE_INFO *const mbmi, int mi_col,
- int mi_row) {
- int reduced_delta_lflevel = 0;
- const BLOCK_SIZE bsize = mbmi->sb_type;
- const int b_col = mi_col & (cm->seq_params.mib_size - 1);
- const int b_row = mi_row & (cm->seq_params.mib_size - 1);
- const int read_delta_lf_flag = (b_col == 0 && b_row == 0);
-
- if ((bsize != cm->seq_params.sb_size || mbmi->skip == 0) &&
- read_delta_lf_flag) {
- int abs = aom_read_symbol(r, cdf, DELTA_LF_PROBS + 1, ACCT_STR);
- const int smallval = (abs < DELTA_LF_SMALL);
- if (!smallval) {
- const int rem_bits = aom_read_literal(r, 3, ACCT_STR) + 1;
- const int thr = (1 << rem_bits) + 1;
- abs = aom_read_literal(r, rem_bits, ACCT_STR) + thr;
- }
- const int sign = abs ? aom_read_bit(r, ACCT_STR) : 1;
- reduced_delta_lflevel = sign ? -abs : abs;
- }
- return reduced_delta_lflevel;
-}
-
-static UV_PREDICTION_MODE read_intra_mode_uv(FRAME_CONTEXT *ec_ctx,
- aom_reader *r,
- CFL_ALLOWED_TYPE cfl_allowed,
- PREDICTION_MODE y_mode) {
- const UV_PREDICTION_MODE uv_mode =
- aom_read_symbol(r, ec_ctx->uv_mode_cdf[cfl_allowed][y_mode],
- UV_INTRA_MODES - !cfl_allowed, ACCT_STR);
- return uv_mode;
-}
-
-static int read_cfl_alphas(FRAME_CONTEXT *const ec_ctx, aom_reader *r,
- int *signs_out) {
- const int joint_sign =
- aom_read_symbol(r, ec_ctx->cfl_sign_cdf, CFL_JOINT_SIGNS, "cfl:signs");
- int idx = 0;
- // Magnitudes are only coded for nonzero values
- if (CFL_SIGN_U(joint_sign) != CFL_SIGN_ZERO) {
- aom_cdf_prob *cdf_u = ec_ctx->cfl_alpha_cdf[CFL_CONTEXT_U(joint_sign)];
- idx = aom_read_symbol(r, cdf_u, CFL_ALPHABET_SIZE, "cfl:alpha_u")
- << CFL_ALPHABET_SIZE_LOG2;
- }
- if (CFL_SIGN_V(joint_sign) != CFL_SIGN_ZERO) {
- aom_cdf_prob *cdf_v = ec_ctx->cfl_alpha_cdf[CFL_CONTEXT_V(joint_sign)];
- idx += aom_read_symbol(r, cdf_v, CFL_ALPHABET_SIZE, "cfl:alpha_v");
- }
- *signs_out = joint_sign;
- return idx;
-}
-
-static INTERINTRA_MODE read_interintra_mode(MACROBLOCKD *xd, aom_reader *r,
- int size_group) {
- const INTERINTRA_MODE ii_mode = (INTERINTRA_MODE)aom_read_symbol(
- r, xd->tile_ctx->interintra_mode_cdf[size_group], INTERINTRA_MODES,
- ACCT_STR);
- return ii_mode;
-}
-
-static PREDICTION_MODE read_inter_mode(FRAME_CONTEXT *ec_ctx, aom_reader *r,
- int16_t ctx) {
- int16_t mode_ctx = ctx & NEWMV_CTX_MASK;
- int is_newmv, is_zeromv, is_refmv;
- is_newmv = aom_read_symbol(r, ec_ctx->newmv_cdf[mode_ctx], 2, ACCT_STR) == 0;
- if (is_newmv) return NEWMV;
-
- mode_ctx = (ctx >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK;
- is_zeromv =
- aom_read_symbol(r, ec_ctx->zeromv_cdf[mode_ctx], 2, ACCT_STR) == 0;
- if (is_zeromv) return GLOBALMV;
-
- mode_ctx = (ctx >> REFMV_OFFSET) & REFMV_CTX_MASK;
- is_refmv = aom_read_symbol(r, ec_ctx->refmv_cdf[mode_ctx], 2, ACCT_STR) == 0;
- if (is_refmv)
- return NEARESTMV;
- else
- return NEARMV;
-}
-
-static void read_drl_idx(FRAME_CONTEXT *ec_ctx, MACROBLOCKD *xd,
- MB_MODE_INFO *mbmi, aom_reader *r) {
- uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
- mbmi->ref_mv_idx = 0;
- if (mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV) {
- for (int idx = 0; idx < 2; ++idx) {
- if (xd->ref_mv_count[ref_frame_type] > idx + 1) {
- uint8_t drl_ctx = av1_drl_ctx(xd->ref_mv_stack[ref_frame_type], idx);
- int drl_idx = aom_read_symbol(r, ec_ctx->drl_cdf[drl_ctx], 2, ACCT_STR);
- mbmi->ref_mv_idx = idx + drl_idx;
- if (!drl_idx) return;
- }
- }
- }
- if (have_nearmv_in_inter_mode(mbmi->mode)) {
- // Offset the NEARESTMV mode.
- // TODO(jingning): Unify the two syntax decoding loops after the NEARESTMV
- // mode is factored in.
- for (int idx = 1; idx < 3; ++idx) {
- if (xd->ref_mv_count[ref_frame_type] > idx + 1) {
- uint8_t drl_ctx = av1_drl_ctx(xd->ref_mv_stack[ref_frame_type], idx);
- int drl_idx = aom_read_symbol(r, ec_ctx->drl_cdf[drl_ctx], 2, ACCT_STR);
- mbmi->ref_mv_idx = idx + drl_idx - 1;
- if (!drl_idx) return;
- }
- }
- }
-}
-
-static MOTION_MODE read_motion_mode(AV1_COMMON *cm, MACROBLOCKD *xd,
- MB_MODE_INFO *mbmi, aom_reader *r) {
- if (cm->switchable_motion_mode == 0) return SIMPLE_TRANSLATION;
- if (mbmi->skip_mode) return SIMPLE_TRANSLATION;
-
- const MOTION_MODE last_motion_mode_allowed =
- motion_mode_allowed(xd->global_motion, xd, mbmi, cm->allow_warped_motion);
- int motion_mode;
-
- if (last_motion_mode_allowed == SIMPLE_TRANSLATION) return SIMPLE_TRANSLATION;
-
- if (last_motion_mode_allowed == OBMC_CAUSAL) {
- motion_mode =
- aom_read_symbol(r, xd->tile_ctx->obmc_cdf[mbmi->sb_type], 2, ACCT_STR);
- return (MOTION_MODE)(SIMPLE_TRANSLATION + motion_mode);
- } else {
- motion_mode =
- aom_read_symbol(r, xd->tile_ctx->motion_mode_cdf[mbmi->sb_type],
- MOTION_MODES, ACCT_STR);
- return (MOTION_MODE)(SIMPLE_TRANSLATION + motion_mode);
- }
-}
-
-static PREDICTION_MODE read_inter_compound_mode(MACROBLOCKD *xd, aom_reader *r,
- int16_t ctx) {
- const int mode =
- aom_read_symbol(r, xd->tile_ctx->inter_compound_mode_cdf[ctx],
- INTER_COMPOUND_MODES, ACCT_STR);
- assert(is_inter_compound_mode(NEAREST_NEARESTMV + mode));
- return NEAREST_NEARESTMV + mode;
-}
-
-int av1_neg_deinterleave(int diff, int ref, int max) {
- if (!ref) return diff;
- if (ref >= (max - 1)) return max - diff - 1;
- if (2 * ref < max) {
- if (diff <= 2 * ref) {
- if (diff & 1)
- return ref + ((diff + 1) >> 1);
- else
- return ref - (diff >> 1);
- }
- return diff;
- } else {
- if (diff <= 2 * (max - ref - 1)) {
- if (diff & 1)
- return ref + ((diff + 1) >> 1);
- else
- return ref - (diff >> 1);
- }
- return max - (diff + 1);
- }
-}
-
-static int read_segment_id(AV1_COMMON *const cm, const MACROBLOCKD *const xd,
- int mi_row, int mi_col, aom_reader *r, int skip) {
- int cdf_num;
- const int pred = av1_get_spatial_seg_pred(cm, xd, mi_row, mi_col, &cdf_num);
- if (skip) return pred;
-
- FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
- struct segmentation *const seg = &cm->seg;
- struct segmentation_probs *const segp = &ec_ctx->seg;
- aom_cdf_prob *pred_cdf = segp->spatial_pred_seg_cdf[cdf_num];
- const int coded_id = aom_read_symbol(r, pred_cdf, MAX_SEGMENTS, ACCT_STR);
- const int segment_id =
- av1_neg_deinterleave(coded_id, pred, seg->last_active_segid + 1);
-
- if (segment_id < 0 || segment_id > seg->last_active_segid) {
- aom_internal_error(xd->error_info, AOM_CODEC_CORRUPT_FRAME,
- "Corrupted segment_ids");
- }
- return segment_id;
-}
-
-static int dec_get_segment_id(const AV1_COMMON *cm, const uint8_t *segment_ids,
- int mi_offset, int x_mis, int y_mis) {
- int segment_id = INT_MAX;
-
- for (int y = 0; y < y_mis; y++)
- for (int x = 0; x < x_mis; x++)
- segment_id =
- AOMMIN(segment_id, segment_ids[mi_offset + y * cm->mi_cols + x]);
-
- assert(segment_id >= 0 && segment_id < MAX_SEGMENTS);
- return segment_id;
-}
-
-static void set_segment_id(AV1_COMMON *cm, int mi_offset, int x_mis, int y_mis,
- int segment_id) {
- assert(segment_id >= 0 && segment_id < MAX_SEGMENTS);
-
- for (int y = 0; y < y_mis; y++)
- for (int x = 0; x < x_mis; x++)
- cm->current_frame_seg_map[mi_offset + y * cm->mi_cols + x] = segment_id;
-}
-
-static int read_intra_segment_id(AV1_COMMON *const cm,
- const MACROBLOCKD *const xd, int mi_row,
- int mi_col, int bsize, aom_reader *r,
- int skip) {
- struct segmentation *const seg = &cm->seg;
- if (!seg->enabled) return 0; // Default for disabled segmentation
-
- assert(seg->update_map && !seg->temporal_update);
-
- const int mi_offset = mi_row * cm->mi_cols + mi_col;
- const int bw = mi_size_wide[bsize];
- const int bh = mi_size_high[bsize];
- const int x_mis = AOMMIN(cm->mi_cols - mi_col, bw);
- const int y_mis = AOMMIN(cm->mi_rows - mi_row, bh);
- const int segment_id = read_segment_id(cm, xd, mi_row, mi_col, r, skip);
- set_segment_id(cm, mi_offset, x_mis, y_mis, segment_id);
- return segment_id;
-}
-
-static void copy_segment_id(const AV1_COMMON *cm,
- const uint8_t *last_segment_ids,
- uint8_t *current_segment_ids, int mi_offset,
- int x_mis, int y_mis) {
- for (int y = 0; y < y_mis; y++)
- for (int x = 0; x < x_mis; x++)
- current_segment_ids[mi_offset + y * cm->mi_cols + x] =
- last_segment_ids ? last_segment_ids[mi_offset + y * cm->mi_cols + x]
- : 0;
-}
-
-static int get_predicted_segment_id(AV1_COMMON *const cm, int mi_offset,
- int x_mis, int y_mis) {
- return cm->last_frame_seg_map ? dec_get_segment_id(cm, cm->last_frame_seg_map,
- mi_offset, x_mis, y_mis)
- : 0;
-}
-
-static int read_inter_segment_id(AV1_COMMON *const cm, MACROBLOCKD *const xd,
- int mi_row, int mi_col, int preskip,
- aom_reader *r) {
- struct segmentation *const seg = &cm->seg;
- MB_MODE_INFO *const mbmi = xd->mi[0];
- const int mi_offset = mi_row * cm->mi_cols + mi_col;
- const int bw = mi_size_wide[mbmi->sb_type];
- const int bh = mi_size_high[mbmi->sb_type];
-
- // TODO(slavarnway): move x_mis, y_mis into xd ?????
- const int x_mis = AOMMIN(cm->mi_cols - mi_col, bw);
- const int y_mis = AOMMIN(cm->mi_rows - mi_row, bh);
-
- if (!seg->enabled) return 0; // Default for disabled segmentation
-
- if (!seg->update_map) {
- copy_segment_id(cm, cm->last_frame_seg_map, cm->current_frame_seg_map,
- mi_offset, x_mis, y_mis);
- return get_predicted_segment_id(cm, mi_offset, x_mis, y_mis);
- }
-
- int segment_id;
- if (preskip) {
- if (!seg->segid_preskip) return 0;
- } else {
- if (seg->segid_preskip) return mbmi->segment_id;
- if (mbmi->skip) {
- if (seg->temporal_update) {
- mbmi->seg_id_predicted = 0;
- }
- segment_id = read_segment_id(cm, xd, mi_row, mi_col, r, 1);
- set_segment_id(cm, mi_offset, x_mis, y_mis, segment_id);
- return segment_id;
- }
- }
-
- if (seg->temporal_update) {
- const int ctx = av1_get_pred_context_seg_id(xd);
- FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
- struct segmentation_probs *const segp = &ec_ctx->seg;
- aom_cdf_prob *pred_cdf = segp->pred_cdf[ctx];
- mbmi->seg_id_predicted = aom_read_symbol(r, pred_cdf, 2, ACCT_STR);
- if (mbmi->seg_id_predicted) {
- segment_id = get_predicted_segment_id(cm, mi_offset, x_mis, y_mis);
- } else {
- segment_id = read_segment_id(cm, xd, mi_row, mi_col, r, 0);
- }
- } else {
- segment_id = read_segment_id(cm, xd, mi_row, mi_col, r, 0);
- }
- set_segment_id(cm, mi_offset, x_mis, y_mis, segment_id);
- return segment_id;
-}
-
-static int read_skip_mode(AV1_COMMON *cm, const MACROBLOCKD *xd, int segment_id,
- aom_reader *r) {
- if (!cm->skip_mode_flag) return 0;
-
- if (segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) {
- return 0;
- }
-
- if (!is_comp_ref_allowed(xd->mi[0]->sb_type)) return 0;
-
- if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME) ||
- segfeature_active(&cm->seg, segment_id, SEG_LVL_GLOBALMV)) {
- // These features imply single-reference mode, while skip mode implies
- // compound reference. Hence, the two are mutually exclusive.
- // In other words, skip_mode is implicitly 0 here.
- return 0;
- }
-
- const int ctx = av1_get_skip_mode_context(xd);
- FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
- const int skip_mode =
- aom_read_symbol(r, ec_ctx->skip_mode_cdfs[ctx], 2, ACCT_STR);
- return skip_mode;
-}
-
-static int read_skip(AV1_COMMON *cm, const MACROBLOCKD *xd, int segment_id,
- aom_reader *r) {
- if (segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) {
- return 1;
- } else {
- const int ctx = av1_get_skip_context(xd);
- FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
- const int skip = aom_read_symbol(r, ec_ctx->skip_cdfs[ctx], 2, ACCT_STR);
- return skip;
- }
-}
-
-// Merge the sorted list of cached colors(cached_colors[0...n_cached_colors-1])
-// and the sorted list of transmitted colors(colors[n_cached_colors...n-1]) into
-// one single sorted list(colors[...]).
-static void merge_colors(uint16_t *colors, uint16_t *cached_colors,
- int n_colors, int n_cached_colors) {
- if (n_cached_colors == 0) return;
- int cache_idx = 0, trans_idx = n_cached_colors;
- for (int i = 0; i < n_colors; ++i) {
- if (cache_idx < n_cached_colors &&
- (trans_idx >= n_colors ||
- cached_colors[cache_idx] <= colors[trans_idx])) {
- colors[i] = cached_colors[cache_idx++];
- } else {
- assert(trans_idx < n_colors);
- colors[i] = colors[trans_idx++];
- }
- }
-}
-
-static void read_palette_colors_y(MACROBLOCKD *const xd, int bit_depth,
- PALETTE_MODE_INFO *const pmi, aom_reader *r) {
- uint16_t color_cache[2 * PALETTE_MAX_SIZE];
- uint16_t cached_colors[PALETTE_MAX_SIZE];
- const int n_cache = av1_get_palette_cache(xd, 0, color_cache);
- const int n = pmi->palette_size[0];
- int idx = 0;
- for (int i = 0; i < n_cache && idx < n; ++i)
- if (aom_read_bit(r, ACCT_STR)) cached_colors[idx++] = color_cache[i];
- if (idx < n) {
- const int n_cached_colors = idx;
- pmi->palette_colors[idx++] = aom_read_literal(r, bit_depth, ACCT_STR);
- if (idx < n) {
- const int min_bits = bit_depth - 3;
- int bits = min_bits + aom_read_literal(r, 2, ACCT_STR);
- int range = (1 << bit_depth) - pmi->palette_colors[idx - 1] - 1;
- for (; idx < n; ++idx) {
- assert(range >= 0);
- const int delta = aom_read_literal(r, bits, ACCT_STR) + 1;
- pmi->palette_colors[idx] = clamp(pmi->palette_colors[idx - 1] + delta,
- 0, (1 << bit_depth) - 1);
- range -= (pmi->palette_colors[idx] - pmi->palette_colors[idx - 1]);
- bits = AOMMIN(bits, av1_ceil_log2(range));
- }
- }
- merge_colors(pmi->palette_colors, cached_colors, n, n_cached_colors);
- } else {
- memcpy(pmi->palette_colors, cached_colors, n * sizeof(cached_colors[0]));
- }
-}
-
-static void read_palette_colors_uv(MACROBLOCKD *const xd, int bit_depth,
- PALETTE_MODE_INFO *const pmi,
- aom_reader *r) {
- const int n = pmi->palette_size[1];
- // U channel colors.
- uint16_t color_cache[2 * PALETTE_MAX_SIZE];
- uint16_t cached_colors[PALETTE_MAX_SIZE];
- const int n_cache = av1_get_palette_cache(xd, 1, color_cache);
- int idx = 0;
- for (int i = 0; i < n_cache && idx < n; ++i)
- if (aom_read_bit(r, ACCT_STR)) cached_colors[idx++] = color_cache[i];
- if (idx < n) {
- const int n_cached_colors = idx;
- idx += PALETTE_MAX_SIZE;
- pmi->palette_colors[idx++] = aom_read_literal(r, bit_depth, ACCT_STR);
- if (idx < PALETTE_MAX_SIZE + n) {
- const int min_bits = bit_depth - 3;
- int bits = min_bits + aom_read_literal(r, 2, ACCT_STR);
- int range = (1 << bit_depth) - pmi->palette_colors[idx - 1];
- for (; idx < PALETTE_MAX_SIZE + n; ++idx) {
- assert(range >= 0);
- const int delta = aom_read_literal(r, bits, ACCT_STR);
- pmi->palette_colors[idx] = clamp(pmi->palette_colors[idx - 1] + delta,
- 0, (1 << bit_depth) - 1);
- range -= (pmi->palette_colors[idx] - pmi->palette_colors[idx - 1]);
- bits = AOMMIN(bits, av1_ceil_log2(range));
- }
- }
- merge_colors(pmi->palette_colors + PALETTE_MAX_SIZE, cached_colors, n,
- n_cached_colors);
- } else {
- memcpy(pmi->palette_colors + PALETTE_MAX_SIZE, cached_colors,
- n * sizeof(cached_colors[0]));
- }
-
- // V channel colors.
- if (aom_read_bit(r, ACCT_STR)) { // Delta encoding.
- const int min_bits_v = bit_depth - 4;
- const int max_val = 1 << bit_depth;
- int bits = min_bits_v + aom_read_literal(r, 2, ACCT_STR);
- pmi->palette_colors[2 * PALETTE_MAX_SIZE] =
- aom_read_literal(r, bit_depth, ACCT_STR);
- for (int i = 1; i < n; ++i) {
- int delta = aom_read_literal(r, bits, ACCT_STR);
- if (delta && aom_read_bit(r, ACCT_STR)) delta = -delta;
- int val = (int)pmi->palette_colors[2 * PALETTE_MAX_SIZE + i - 1] + delta;
- if (val < 0) val += max_val;
- if (val >= max_val) val -= max_val;
- pmi->palette_colors[2 * PALETTE_MAX_SIZE + i] = val;
- }
- } else {
- for (int i = 0; i < n; ++i) {
- pmi->palette_colors[2 * PALETTE_MAX_SIZE + i] =
- aom_read_literal(r, bit_depth, ACCT_STR);
- }
- }
-}
-
-static void read_palette_mode_info(AV1_COMMON *const cm, MACROBLOCKD *const xd,
- int mi_row, int mi_col, aom_reader *r) {
- const int num_planes = av1_num_planes(cm);
- MB_MODE_INFO *const mbmi = xd->mi[0];
- const BLOCK_SIZE bsize = mbmi->sb_type;
- assert(av1_allow_palette(cm->allow_screen_content_tools, bsize));
- PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
- const int bsize_ctx = av1_get_palette_bsize_ctx(bsize);
-
- if (mbmi->mode == DC_PRED) {
- const int palette_mode_ctx = av1_get_palette_mode_ctx(xd);
- const int modev = aom_read_symbol(
- r, xd->tile_ctx->palette_y_mode_cdf[bsize_ctx][palette_mode_ctx], 2,
- ACCT_STR);
- if (modev) {
- pmi->palette_size[0] =
- aom_read_symbol(r, xd->tile_ctx->palette_y_size_cdf[bsize_ctx],
- PALETTE_SIZES, ACCT_STR) +
- 2;
- read_palette_colors_y(xd, cm->seq_params.bit_depth, pmi, r);
- }
- }
- if (num_planes > 1 && mbmi->uv_mode == UV_DC_PRED &&
- is_chroma_reference(mi_row, mi_col, bsize, xd->plane[1].subsampling_x,
- xd->plane[1].subsampling_y)) {
- const int palette_uv_mode_ctx = (pmi->palette_size[0] > 0);
- const int modev = aom_read_symbol(
- r, xd->tile_ctx->palette_uv_mode_cdf[palette_uv_mode_ctx], 2, ACCT_STR);
- if (modev) {
- pmi->palette_size[1] =
- aom_read_symbol(r, xd->tile_ctx->palette_uv_size_cdf[bsize_ctx],
- PALETTE_SIZES, ACCT_STR) +
- 2;
- read_palette_colors_uv(xd, cm->seq_params.bit_depth, pmi, r);
- }
- }
-}
-
-static int read_angle_delta(aom_reader *r, aom_cdf_prob *cdf) {
- const int sym = aom_read_symbol(r, cdf, 2 * MAX_ANGLE_DELTA + 1, ACCT_STR);
- return sym - MAX_ANGLE_DELTA;
-}
-
-static void read_filter_intra_mode_info(const AV1_COMMON *const cm,
- MACROBLOCKD *const xd, aom_reader *r) {
- MB_MODE_INFO *const mbmi = xd->mi[0];
- FILTER_INTRA_MODE_INFO *filter_intra_mode_info =
- &mbmi->filter_intra_mode_info;
-
- if (av1_filter_intra_allowed(cm, mbmi)) {
- filter_intra_mode_info->use_filter_intra = aom_read_symbol(
- r, xd->tile_ctx->filter_intra_cdfs[mbmi->sb_type], 2, ACCT_STR);
- if (filter_intra_mode_info->use_filter_intra) {
- filter_intra_mode_info->filter_intra_mode = aom_read_symbol(
- r, xd->tile_ctx->filter_intra_mode_cdf, FILTER_INTRA_MODES, ACCT_STR);
- }
- } else {
- filter_intra_mode_info->use_filter_intra = 0;
- }
-}
-
-void av1_read_tx_type(const AV1_COMMON *const cm, MACROBLOCKD *xd, int blk_row,
- int blk_col, TX_SIZE tx_size, aom_reader *r) {
- MB_MODE_INFO *mbmi = xd->mi[0];
- const int txk_type_idx =
- av1_get_txk_type_index(mbmi->sb_type, blk_row, blk_col);
- TX_TYPE *tx_type = &mbmi->txk_type[txk_type_idx];
- *tx_type = DCT_DCT;
-
- // No need to read transform type if block is skipped.
- if (mbmi->skip || segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP))
- return;
-
- // No need to read transform type for lossless mode(qindex==0).
- const int qindex =
- cm->seg.enabled ? xd->qindex[mbmi->segment_id] : cm->base_qindex;
- if (qindex <= 0) return;
-
- const int inter_block = is_inter_block(mbmi);
- if (get_ext_tx_types(tx_size, inter_block, cm->reduced_tx_set_used) > 1) {
- const TxSetType tx_set_type =
- av1_get_ext_tx_set_type(tx_size, inter_block, cm->reduced_tx_set_used);
- const int eset =
- get_ext_tx_set(tx_size, inter_block, cm->reduced_tx_set_used);
- // eset == 0 should correspond to a set with only DCT_DCT and
- // there is no need to read the tx_type
- assert(eset != 0);
-
- const TX_SIZE square_tx_size = txsize_sqr_map[tx_size];
- FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
- if (inter_block) {
- *tx_type = av1_ext_tx_inv[tx_set_type][aom_read_symbol(
- r, ec_ctx->inter_ext_tx_cdf[eset][square_tx_size],
- av1_num_ext_tx_set[tx_set_type], ACCT_STR)];
- } else {
- const PREDICTION_MODE intra_mode =
- mbmi->filter_intra_mode_info.use_filter_intra
- ? fimode_to_intradir[mbmi->filter_intra_mode_info
- .filter_intra_mode]
- : mbmi->mode;
- *tx_type = av1_ext_tx_inv[tx_set_type][aom_read_symbol(
- r, ec_ctx->intra_ext_tx_cdf[eset][square_tx_size][intra_mode],
- av1_num_ext_tx_set[tx_set_type], ACCT_STR)];
- }
- }
-}
-
-static INLINE void read_mv(aom_reader *r, MV *mv, const MV *ref,
- nmv_context *ctx, MvSubpelPrecision precision);
-
-static INLINE int is_mv_valid(const MV *mv);
-
-static INLINE int assign_dv(AV1_COMMON *cm, MACROBLOCKD *xd, int_mv *mv,
- const int_mv *ref_mv, int mi_row, int mi_col,
- BLOCK_SIZE bsize, aom_reader *r) {
- FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
- read_mv(r, &mv->as_mv, &ref_mv->as_mv, &ec_ctx->ndvc, MV_SUBPEL_NONE);
- // DV should not have sub-pel.
- assert((mv->as_mv.col & 7) == 0);
- assert((mv->as_mv.row & 7) == 0);
- mv->as_mv.col = (mv->as_mv.col >> 3) * 8;
- mv->as_mv.row = (mv->as_mv.row >> 3) * 8;
- int valid = is_mv_valid(&mv->as_mv) &&
- av1_is_dv_valid(mv->as_mv, cm, xd, mi_row, mi_col, bsize,
- cm->seq_params.mib_size_log2);
- return valid;
-}
-
-static void read_intrabc_info(AV1_COMMON *const cm, MACROBLOCKD *const xd,
- int mi_row, int mi_col, aom_reader *r) {
- MB_MODE_INFO *const mbmi = xd->mi[0];
- FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
- mbmi->use_intrabc = aom_read_symbol(r, ec_ctx->intrabc_cdf, 2, ACCT_STR);
- if (mbmi->use_intrabc) {
- BLOCK_SIZE bsize = mbmi->sb_type;
- mbmi->mode = DC_PRED;
- mbmi->uv_mode = UV_DC_PRED;
- mbmi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
- mbmi->motion_mode = SIMPLE_TRANSLATION;
-
- int16_t inter_mode_ctx[MODE_CTX_REF_FRAMES];
- int_mv ref_mvs[INTRA_FRAME + 1][MAX_MV_REF_CANDIDATES];
- int_mv global_mvs[REF_FRAMES];
-
- av1_find_mv_refs(cm, xd, mbmi, INTRA_FRAME, xd->ref_mv_count,
- xd->ref_mv_stack, ref_mvs, global_mvs, mi_row, mi_col,
- inter_mode_ctx);
-
- int_mv nearestmv, nearmv;
-
- av1_find_best_ref_mvs(0, ref_mvs[INTRA_FRAME], &nearestmv, &nearmv, 0);
- int_mv dv_ref = nearestmv.as_int == 0 ? nearmv : nearestmv;
- if (dv_ref.as_int == 0)
- av1_find_ref_dv(&dv_ref, &xd->tile, cm->seq_params.mib_size, mi_row,
- mi_col);
- // Ref DV should not have sub-pel.
- int valid_dv = (dv_ref.as_mv.col & 7) == 0 && (dv_ref.as_mv.row & 7) == 0;
- dv_ref.as_mv.col = (dv_ref.as_mv.col >> 3) * 8;
- dv_ref.as_mv.row = (dv_ref.as_mv.row >> 3) * 8;
- valid_dv = valid_dv && assign_dv(cm, xd, &mbmi->mv[0], &dv_ref, mi_row,
- mi_col, bsize, r);
- if (!valid_dv) {
- // Intra bc motion vectors are not valid - signal corrupt frame
- aom_merge_corrupted_flag(&xd->corrupted, 1);
- }
- }
-}
-
-// If delta q is present, reads delta_q index.
-// Also reads delta_q loop filter levels, if present.
-static void read_delta_q_params(AV1_COMMON *const cm, MACROBLOCKD *const xd,
- const int mi_row, const int mi_col,
- aom_reader *r) {
- if (cm->delta_q_present_flag) {
- MB_MODE_INFO *const mbmi = xd->mi[0];
- xd->current_qindex +=
- read_delta_qindex(cm, xd, r, mbmi, mi_col, mi_row) * cm->delta_q_res;
- /* Normative: Clamp to [1,MAXQ] to not interfere with lossless mode */
- xd->current_qindex = clamp(xd->current_qindex, 1, MAXQ);
- FRAME_CONTEXT *const ec_ctx = xd->tile_ctx;
- if (cm->delta_lf_present_flag) {
- if (cm->delta_lf_multi) {
- const int frame_lf_count =
- av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
- for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) {
- const int tmp_lvl =
- xd->delta_lf[lf_id] +
- read_delta_lflevel(cm, r, ec_ctx->delta_lf_multi_cdf[lf_id], mbmi,
- mi_col, mi_row) *
- cm->delta_lf_res;
- mbmi->delta_lf[lf_id] = xd->delta_lf[lf_id] =
- clamp(tmp_lvl, -MAX_LOOP_FILTER, MAX_LOOP_FILTER);
- }
- } else {
- const int tmp_lvl = xd->delta_lf_from_base +
- read_delta_lflevel(cm, r, ec_ctx->delta_lf_cdf,
- mbmi, mi_col, mi_row) *
- cm->delta_lf_res;
- mbmi->delta_lf_from_base = xd->delta_lf_from_base =
- clamp(tmp_lvl, -MAX_LOOP_FILTER, MAX_LOOP_FILTER);
- }
- }
- }
-}
-
-static void read_intra_frame_mode_info(AV1_COMMON *const cm,
- MACROBLOCKD *const xd, int mi_row,
- int mi_col, aom_reader *r) {
- MB_MODE_INFO *const mbmi = xd->mi[0];
- const MB_MODE_INFO *above_mi = xd->above_mbmi;
- const MB_MODE_INFO *left_mi = xd->left_mbmi;
- const BLOCK_SIZE bsize = mbmi->sb_type;
- struct segmentation *const seg = &cm->seg;
-
- FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
-
- if (seg->segid_preskip)
- mbmi->segment_id =
- read_intra_segment_id(cm, xd, mi_row, mi_col, bsize, r, 0);
-
- mbmi->skip = read_skip(cm, xd, mbmi->segment_id, r);
-
- if (!seg->segid_preskip)
- mbmi->segment_id =
- read_intra_segment_id(cm, xd, mi_row, mi_col, bsize, r, mbmi->skip);
-
- read_cdef(cm, r, xd, mi_col, mi_row);
-
- read_delta_q_params(cm, xd, mi_row, mi_col, r);
-
- mbmi->current_qindex = xd->current_qindex;
-
- mbmi->ref_frame[0] = INTRA_FRAME;
- mbmi->ref_frame[1] = NONE_FRAME;
- mbmi->palette_mode_info.palette_size[0] = 0;
- mbmi->palette_mode_info.palette_size[1] = 0;
- mbmi->filter_intra_mode_info.use_filter_intra = 0;
-
- xd->above_txfm_context = cm->above_txfm_context[xd->tile.tile_row] + mi_col;
- xd->left_txfm_context =
- xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
-
- if (av1_allow_intrabc(cm)) {
- read_intrabc_info(cm, xd, mi_row, mi_col, r);
- if (is_intrabc_block(mbmi)) return;
- }
-
- mbmi->mode = read_intra_mode(r, get_y_mode_cdf(ec_ctx, above_mi, left_mi));
-
- const int use_angle_delta = av1_use_angle_delta(bsize);
- mbmi->angle_delta[PLANE_TYPE_Y] =
- (use_angle_delta && av1_is_directional_mode(mbmi->mode))
- ? read_angle_delta(r, ec_ctx->angle_delta_cdf[mbmi->mode - V_PRED])
- : 0;
-
- if (!cm->seq_params.monochrome &&
- is_chroma_reference(mi_row, mi_col, bsize, xd->plane[1].subsampling_x,
- xd->plane[1].subsampling_y)) {
- xd->cfl.is_chroma_reference = 1;
- mbmi->uv_mode =
- read_intra_mode_uv(ec_ctx, r, is_cfl_allowed(xd), mbmi->mode);
- if (mbmi->uv_mode == UV_CFL_PRED) {
- mbmi->cfl_alpha_idx = read_cfl_alphas(ec_ctx, r, &mbmi->cfl_alpha_signs);
- }
- mbmi->angle_delta[PLANE_TYPE_UV] =
- (use_angle_delta && av1_is_directional_mode(get_uv_mode(mbmi->uv_mode)))
- ? read_angle_delta(r,
- ec_ctx->angle_delta_cdf[mbmi->uv_mode - V_PRED])
- : 0;
- } else {
- // Avoid decoding angle_info if there is is no chroma prediction
- mbmi->uv_mode = UV_DC_PRED;
- xd->cfl.is_chroma_reference = 0;
- }
- xd->cfl.store_y = store_cfl_required(cm, xd);
-
- if (av1_allow_palette(cm->allow_screen_content_tools, bsize))
- read_palette_mode_info(cm, xd, mi_row, mi_col, r);
-
- read_filter_intra_mode_info(cm, xd, r);
-}
-
-static int read_mv_component(aom_reader *r, nmv_component *mvcomp,
- int use_subpel, int usehp) {
- int mag, d, fr, hp;
- const int sign = aom_read_symbol(r, mvcomp->sign_cdf, 2, ACCT_STR);
- const int mv_class =
- aom_read_symbol(r, mvcomp->classes_cdf, MV_CLASSES, ACCT_STR);
- const int class0 = mv_class == MV_CLASS_0;
-
- // Integer part
- if (class0) {
- d = aom_read_symbol(r, mvcomp->class0_cdf, CLASS0_SIZE, ACCT_STR);
- mag = 0;
- } else {
- const int n = mv_class + CLASS0_BITS - 1; // number of bits
- d = 0;
- for (int i = 0; i < n; ++i)
- d |= aom_read_symbol(r, mvcomp->bits_cdf[i], 2, ACCT_STR) << i;
- mag = CLASS0_SIZE << (mv_class + 2);
- }
-
- if (use_subpel) {
- // Fractional part
- fr = aom_read_symbol(r, class0 ? mvcomp->class0_fp_cdf[d] : mvcomp->fp_cdf,
- MV_FP_SIZE, ACCT_STR);
-
- // High precision part (if hp is not used, the default value of the hp is 1)
- hp = usehp ? aom_read_symbol(
- r, class0 ? mvcomp->class0_hp_cdf : mvcomp->hp_cdf, 2,
- ACCT_STR)
- : 1;
- } else {
- fr = 3;
- hp = 1;
- }
-
- // Result
- mag += ((d << 3) | (fr << 1) | hp) + 1;
- return sign ? -mag : mag;
-}
-
-static INLINE void read_mv(aom_reader *r, MV *mv, const MV *ref,
- nmv_context *ctx, MvSubpelPrecision precision) {
- MV diff = kZeroMv;
- const MV_JOINT_TYPE joint_type =
- (MV_JOINT_TYPE)aom_read_symbol(r, ctx->joints_cdf, MV_JOINTS, ACCT_STR);
-
- if (mv_joint_vertical(joint_type))
- diff.row = read_mv_component(r, &ctx->comps[0], precision > MV_SUBPEL_NONE,
- precision > MV_SUBPEL_LOW_PRECISION);
-
- if (mv_joint_horizontal(joint_type))
- diff.col = read_mv_component(r, &ctx->comps[1], precision > MV_SUBPEL_NONE,
- precision > MV_SUBPEL_LOW_PRECISION);
-
- mv->row = ref->row + diff.row;
- mv->col = ref->col + diff.col;
-}
-
-static REFERENCE_MODE read_block_reference_mode(AV1_COMMON *cm,
- const MACROBLOCKD *xd,
- aom_reader *r) {
- if (!is_comp_ref_allowed(xd->mi[0]->sb_type)) return SINGLE_REFERENCE;
- if (cm->reference_mode == REFERENCE_MODE_SELECT) {
- const int ctx = av1_get_reference_mode_context(xd);
- const REFERENCE_MODE mode = (REFERENCE_MODE)aom_read_symbol(
- r, xd->tile_ctx->comp_inter_cdf[ctx], 2, ACCT_STR);
- return mode; // SINGLE_REFERENCE or COMPOUND_REFERENCE
- } else {
- assert(cm->reference_mode == SINGLE_REFERENCE);
- return cm->reference_mode;
- }
-}
-
-#define READ_REF_BIT(pname) \
- aom_read_symbol(r, av1_get_pred_cdf_##pname(xd), 2, ACCT_STR)
-
-static COMP_REFERENCE_TYPE read_comp_reference_type(const MACROBLOCKD *xd,
- aom_reader *r) {
- const int ctx = av1_get_comp_reference_type_context(xd);
- const COMP_REFERENCE_TYPE comp_ref_type =
- (COMP_REFERENCE_TYPE)aom_read_symbol(
- r, xd->tile_ctx->comp_ref_type_cdf[ctx], 2, ACCT_STR);
- return comp_ref_type; // UNIDIR_COMP_REFERENCE or BIDIR_COMP_REFERENCE
-}
-
-static void set_ref_frames_for_skip_mode(AV1_COMMON *const cm,
- MV_REFERENCE_FRAME ref_frame[2]) {
- ref_frame[0] = LAST_FRAME + cm->ref_frame_idx_0;
- ref_frame[1] = LAST_FRAME + cm->ref_frame_idx_1;
-}
-
-// Read the referncence frame
-static void read_ref_frames(AV1_COMMON *const cm, MACROBLOCKD *const xd,
- aom_reader *r, int segment_id,
- MV_REFERENCE_FRAME ref_frame[2]) {
- if (xd->mi[0]->skip_mode) {
- set_ref_frames_for_skip_mode(cm, ref_frame);
- return;
- }
-
- if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) {
- ref_frame[0] = (MV_REFERENCE_FRAME)get_segdata(&cm->seg, segment_id,
- SEG_LVL_REF_FRAME);
- ref_frame[1] = NONE_FRAME;
- } else if (segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP) ||
- segfeature_active(&cm->seg, segment_id, SEG_LVL_GLOBALMV)) {
- ref_frame[0] = LAST_FRAME;
- ref_frame[1] = NONE_FRAME;
- } else {
- const REFERENCE_MODE mode = read_block_reference_mode(cm, xd, r);
-
- if (mode == COMPOUND_REFERENCE) {
- const COMP_REFERENCE_TYPE comp_ref_type = read_comp_reference_type(xd, r);
-
- if (comp_ref_type == UNIDIR_COMP_REFERENCE) {
- const int bit = READ_REF_BIT(uni_comp_ref_p);
- if (bit) {
- ref_frame[0] = BWDREF_FRAME;
- ref_frame[1] = ALTREF_FRAME;
- } else {
- const int bit1 = READ_REF_BIT(uni_comp_ref_p1);
- if (bit1) {
- const int bit2 = READ_REF_BIT(uni_comp_ref_p2);
- if (bit2) {
- ref_frame[0] = LAST_FRAME;
- ref_frame[1] = GOLDEN_FRAME;
- } else {
- ref_frame[0] = LAST_FRAME;
- ref_frame[1] = LAST3_FRAME;
- }
- } else {
- ref_frame[0] = LAST_FRAME;
- ref_frame[1] = LAST2_FRAME;
- }
- }
-
- return;
- }
-
- assert(comp_ref_type == BIDIR_COMP_REFERENCE);
-
- const int idx = 1;
- const int bit = READ_REF_BIT(comp_ref_p);
- // Decode forward references.
- if (!bit) {
- const int bit1 = READ_REF_BIT(comp_ref_p1);
- ref_frame[!idx] = cm->comp_fwd_ref[bit1 ? 1 : 0];
- } else {
- const int bit2 = READ_REF_BIT(comp_ref_p2);
- ref_frame[!idx] = cm->comp_fwd_ref[bit2 ? 3 : 2];
- }
-
- // Decode backward references.
- const int bit_bwd = READ_REF_BIT(comp_bwdref_p);
- if (!bit_bwd) {
- const int bit1_bwd = READ_REF_BIT(comp_bwdref_p1);
- ref_frame[idx] = cm->comp_bwd_ref[bit1_bwd];
- } else {
- ref_frame[idx] = cm->comp_bwd_ref[2];
- }
- } else if (mode == SINGLE_REFERENCE) {
- const int bit0 = READ_REF_BIT(single_ref_p1);
- if (bit0) {
- const int bit1 = READ_REF_BIT(single_ref_p2);
- if (!bit1) {
- const int bit5 = READ_REF_BIT(single_ref_p6);
- ref_frame[0] = bit5 ? ALTREF2_FRAME : BWDREF_FRAME;
- } else {
- ref_frame[0] = ALTREF_FRAME;
- }
- } else {
- const int bit2 = READ_REF_BIT(single_ref_p3);
- if (bit2) {
- const int bit4 = READ_REF_BIT(single_ref_p5);
- ref_frame[0] = bit4 ? GOLDEN_FRAME : LAST3_FRAME;
- } else {
- const int bit3 = READ_REF_BIT(single_ref_p4);
- ref_frame[0] = bit3 ? LAST2_FRAME : LAST_FRAME;
- }
- }
-
- ref_frame[1] = NONE_FRAME;
- } else {
- assert(0 && "Invalid prediction mode.");
- }
- }
-}
-
-static INLINE void read_mb_interp_filter(AV1_COMMON *const cm,
- MACROBLOCKD *const xd,
- MB_MODE_INFO *const mbmi,
- aom_reader *r) {
- FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
-
- if (!av1_is_interp_needed(xd)) {
- set_default_interp_filters(mbmi, cm->interp_filter);
- return;
- }
-
- if (cm->interp_filter != SWITCHABLE) {
- mbmi->interp_filters = av1_broadcast_interp_filter(cm->interp_filter);
- } else {
- InterpFilter ref0_filter[2] = { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR };
- for (int dir = 0; dir < 2; ++dir) {
- const int ctx = av1_get_pred_context_switchable_interp(xd, dir);
- ref0_filter[dir] = (InterpFilter)aom_read_symbol(
- r, ec_ctx->switchable_interp_cdf[ctx], SWITCHABLE_FILTERS, ACCT_STR);
- if (cm->seq_params.enable_dual_filter == 0) {
- ref0_filter[1] = ref0_filter[0];
- break;
- }
- }
- // The index system works as: (0, 1) -> (vertical, horizontal) filter types
- mbmi->interp_filters =
- av1_make_interp_filters(ref0_filter[0], ref0_filter[1]);
- }
-}
-
-static void read_intra_block_mode_info(AV1_COMMON *const cm, const int mi_row,
- const int mi_col, MACROBLOCKD *const xd,
- MB_MODE_INFO *const mbmi,
- aom_reader *r) {
- const BLOCK_SIZE bsize = mbmi->sb_type;
- const int use_angle_delta = av1_use_angle_delta(bsize);
-
- mbmi->ref_frame[0] = INTRA_FRAME;
- mbmi->ref_frame[1] = NONE_FRAME;
-
- FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
-
- mbmi->mode = read_intra_mode(r, ec_ctx->y_mode_cdf[size_group_lookup[bsize]]);
-
- mbmi->angle_delta[PLANE_TYPE_Y] =
- use_angle_delta && av1_is_directional_mode(mbmi->mode)
- ? read_angle_delta(r, ec_ctx->angle_delta_cdf[mbmi->mode - V_PRED])
- : 0;
- const int has_chroma =
- is_chroma_reference(mi_row, mi_col, bsize, xd->plane[1].subsampling_x,
- xd->plane[1].subsampling_y);
- xd->cfl.is_chroma_reference = has_chroma;
- if (!cm->seq_params.monochrome && has_chroma) {
- mbmi->uv_mode =
- read_intra_mode_uv(ec_ctx, r, is_cfl_allowed(xd), mbmi->mode);
- if (mbmi->uv_mode == UV_CFL_PRED) {
- mbmi->cfl_alpha_idx =
- read_cfl_alphas(xd->tile_ctx, r, &mbmi->cfl_alpha_signs);
- }
- mbmi->angle_delta[PLANE_TYPE_UV] =
- use_angle_delta && av1_is_directional_mode(get_uv_mode(mbmi->uv_mode))
- ? read_angle_delta(r,
- ec_ctx->angle_delta_cdf[mbmi->uv_mode - V_PRED])
- : 0;
- } else {
- // Avoid decoding angle_info if there is is no chroma prediction
- mbmi->uv_mode = UV_DC_PRED;
- }
- xd->cfl.store_y = store_cfl_required(cm, xd);
-
- mbmi->palette_mode_info.palette_size[0] = 0;
- mbmi->palette_mode_info.palette_size[1] = 0;
- if (av1_allow_palette(cm->allow_screen_content_tools, bsize))
- read_palette_mode_info(cm, xd, mi_row, mi_col, r);
-
- read_filter_intra_mode_info(cm, xd, r);
-}
-
-static INLINE int is_mv_valid(const MV *mv) {
- return mv->row > MV_LOW && mv->row < MV_UPP && mv->col > MV_LOW &&
- mv->col < MV_UPP;
-}
-
-static INLINE int assign_mv(AV1_COMMON *cm, MACROBLOCKD *xd,
- PREDICTION_MODE mode,
- MV_REFERENCE_FRAME ref_frame[2], int_mv mv[2],
- int_mv ref_mv[2], int_mv nearest_mv[2],
- int_mv near_mv[2], int mi_row, int mi_col,
- int is_compound, int allow_hp, aom_reader *r) {
- FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
- MB_MODE_INFO *mbmi = xd->mi[0];
- BLOCK_SIZE bsize = mbmi->sb_type;
- if (cm->cur_frame_force_integer_mv) {
- allow_hp = MV_SUBPEL_NONE;
- }
- switch (mode) {
- case NEWMV: {
- nmv_context *const nmvc = &ec_ctx->nmvc;
- read_mv(r, &mv[0].as_mv, &ref_mv[0].as_mv, nmvc, allow_hp);
- break;
- }
- case NEARESTMV: {
- mv[0].as_int = nearest_mv[0].as_int;
- break;
- }
- case NEARMV: {
- mv[0].as_int = near_mv[0].as_int;
- break;
- }
- case GLOBALMV: {
- mv[0].as_int =
- gm_get_motion_vector(&cm->global_motion[ref_frame[0]],
- cm->allow_high_precision_mv, bsize, mi_col,
- mi_row, cm->cur_frame_force_integer_mv)
- .as_int;
- break;
- }
- case NEW_NEWMV: {
- assert(is_compound);
- for (int i = 0; i < 2; ++i) {
- nmv_context *const nmvc = &ec_ctx->nmvc;
- read_mv(r, &mv[i].as_mv, &ref_mv[i].as_mv, nmvc, allow_hp);
- }
- break;
- }
- case NEAREST_NEARESTMV: {
- assert(is_compound);
- mv[0].as_int = nearest_mv[0].as_int;
- mv[1].as_int = nearest_mv[1].as_int;
- break;
- }
- case NEAR_NEARMV: {
- assert(is_compound);
- mv[0].as_int = near_mv[0].as_int;
- mv[1].as_int = near_mv[1].as_int;
- break;
- }
- case NEW_NEARESTMV: {
- nmv_context *const nmvc = &ec_ctx->nmvc;
- read_mv(r, &mv[0].as_mv, &ref_mv[0].as_mv, nmvc, allow_hp);
- assert(is_compound);
- mv[1].as_int = nearest_mv[1].as_int;
- break;
- }
- case NEAREST_NEWMV: {
- nmv_context *const nmvc = &ec_ctx->nmvc;
- mv[0].as_int = nearest_mv[0].as_int;
- read_mv(r, &mv[1].as_mv, &ref_mv[1].as_mv, nmvc, allow_hp);
- assert(is_compound);
- break;
- }
- case NEAR_NEWMV: {
- nmv_context *const nmvc = &ec_ctx->nmvc;
- mv[0].as_int = near_mv[0].as_int;
- read_mv(r, &mv[1].as_mv, &ref_mv[1].as_mv, nmvc, allow_hp);
- assert(is_compound);
- break;
- }
- case NEW_NEARMV: {
- nmv_context *const nmvc = &ec_ctx->nmvc;
- read_mv(r, &mv[0].as_mv, &ref_mv[0].as_mv, nmvc, allow_hp);
- assert(is_compound);
- mv[1].as_int = near_mv[1].as_int;
- break;
- }
- case GLOBAL_GLOBALMV: {
- assert(is_compound);
- mv[0].as_int =
- gm_get_motion_vector(&cm->global_motion[ref_frame[0]],
- cm->allow_high_precision_mv, bsize, mi_col,
- mi_row, cm->cur_frame_force_integer_mv)
- .as_int;
- mv[1].as_int =
- gm_get_motion_vector(&cm->global_motion[ref_frame[1]],
- cm->allow_high_precision_mv, bsize, mi_col,
- mi_row, cm->cur_frame_force_integer_mv)
- .as_int;
- break;
- }
- default: { return 0; }
- }
-
- int ret = is_mv_valid(&mv[0].as_mv);
- if (is_compound) {
- ret = ret && is_mv_valid(&mv[1].as_mv);
- }
- return ret;
-}
-
-static int read_is_inter_block(AV1_COMMON *const cm, MACROBLOCKD *const xd,
- int segment_id, aom_reader *r) {
- if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) {
- const int frame = get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
- if (frame < LAST_FRAME) return 0;
- return frame != INTRA_FRAME;
- }
- if (segfeature_active(&cm->seg, segment_id, SEG_LVL_GLOBALMV)) {
- return 1;
- }
- const int ctx = av1_get_intra_inter_context(xd);
- FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
- const int is_inter =
- aom_read_symbol(r, ec_ctx->intra_inter_cdf[ctx], 2, ACCT_STR);
- return is_inter;
-}
-
-#if DEC_MISMATCH_DEBUG
-static void dec_dump_logs(AV1_COMMON *cm, MB_MODE_INFO *const mbmi, int mi_row,
- int mi_col, int16_t mode_ctx) {
- int_mv mv[2] = { { 0 } };
- for (int ref = 0; ref < 1 + has_second_ref(mbmi); ++ref)
- mv[ref].as_mv = mbmi->mv[ref].as_mv;
-
- const int16_t newmv_ctx = mode_ctx & NEWMV_CTX_MASK;
- int16_t zeromv_ctx = -1;
- int16_t refmv_ctx = -1;
- if (mbmi->mode != NEWMV) {
- zeromv_ctx = (mode_ctx >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK;
- if (mbmi->mode != GLOBALMV)
- refmv_ctx = (mode_ctx >> REFMV_OFFSET) & REFMV_CTX_MASK;
- }
-
-#define FRAME_TO_CHECK 11
- if (cm->current_video_frame == FRAME_TO_CHECK && cm->show_frame == 1) {
- printf(
- "=== DECODER ===: "
- "Frame=%d, (mi_row,mi_col)=(%d,%d), skip_mode=%d, mode=%d, bsize=%d, "
- "show_frame=%d, mv[0]=(%d,%d), mv[1]=(%d,%d), ref[0]=%d, "
- "ref[1]=%d, motion_mode=%d, mode_ctx=%d, "
- "newmv_ctx=%d, zeromv_ctx=%d, refmv_ctx=%d, tx_size=%d\n",
- cm->current_video_frame, mi_row, mi_col, mbmi->skip_mode, mbmi->mode,
- mbmi->sb_type, cm->show_frame, mv[0].as_mv.row, mv[0].as_mv.col,
- mv[1].as_mv.row, mv[1].as_mv.col, mbmi->ref_frame[0],
- mbmi->ref_frame[1], mbmi->motion_mode, mode_ctx, newmv_ctx, zeromv_ctx,
- refmv_ctx, mbmi->tx_size);
- }
-}
-#endif // DEC_MISMATCH_DEBUG
-
-static void read_inter_block_mode_info(AV1Decoder *const pbi,
- MACROBLOCKD *const xd,
- MB_MODE_INFO *const mbmi, int mi_row,
- int mi_col, aom_reader *r) {
- AV1_COMMON *const cm = &pbi->common;
- const BLOCK_SIZE bsize = mbmi->sb_type;
- const int allow_hp = cm->allow_high_precision_mv;
- int_mv nearestmv[2], nearmv[2];
- int_mv ref_mvs[MODE_CTX_REF_FRAMES][MAX_MV_REF_CANDIDATES] = { { { 0 } } };
- int16_t inter_mode_ctx[MODE_CTX_REF_FRAMES];
- int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
- FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
-
- mbmi->uv_mode = UV_DC_PRED;
- mbmi->palette_mode_info.palette_size[0] = 0;
- mbmi->palette_mode_info.palette_size[1] = 0;
-
- av1_collect_neighbors_ref_counts(xd);
-
- read_ref_frames(cm, xd, r, mbmi->segment_id, mbmi->ref_frame);
- const int is_compound = has_second_ref(mbmi);
-
- MV_REFERENCE_FRAME ref_frame = av1_ref_frame_type(mbmi->ref_frame);
- int_mv global_mvs[REF_FRAMES];
- av1_find_mv_refs(cm, xd, mbmi, ref_frame, xd->ref_mv_count, xd->ref_mv_stack,
- ref_mvs, global_mvs, mi_row, mi_col, inter_mode_ctx);
-
- int mode_ctx = av1_mode_context_analyzer(inter_mode_ctx, mbmi->ref_frame);
- mbmi->ref_mv_idx = 0;
-
- if (mbmi->skip_mode) {
- assert(is_compound);
- mbmi->mode = NEAREST_NEARESTMV;
- } else {
- if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) ||
- segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_GLOBALMV)) {
- mbmi->mode = GLOBALMV;
- } else {
- if (is_compound)
- mbmi->mode = read_inter_compound_mode(xd, r, mode_ctx);
- else
- mbmi->mode = read_inter_mode(ec_ctx, r, mode_ctx);
- if (mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV ||
- have_nearmv_in_inter_mode(mbmi->mode))
- read_drl_idx(ec_ctx, xd, mbmi, r);
- }
- }
-
- if (is_compound != is_inter_compound_mode(mbmi->mode)) {
- aom_internal_error(xd->error_info, AOM_CODEC_CORRUPT_FRAME,
- "Prediction mode %d invalid with ref frame %d %d",
- mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
- }
-
- if (!is_compound && mbmi->mode != GLOBALMV) {
- av1_find_best_ref_mvs(allow_hp, ref_mvs[mbmi->ref_frame[0]], &nearestmv[0],
- &nearmv[0], cm->cur_frame_force_integer_mv);
- }
-
- if (is_compound && mbmi->mode != GLOBAL_GLOBALMV) {
- int ref_mv_idx = mbmi->ref_mv_idx + 1;
- nearestmv[0] = xd->ref_mv_stack[ref_frame][0].this_mv;
- nearestmv[1] = xd->ref_mv_stack[ref_frame][0].comp_mv;
- nearmv[0] = xd->ref_mv_stack[ref_frame][ref_mv_idx].this_mv;
- nearmv[1] = xd->ref_mv_stack[ref_frame][ref_mv_idx].comp_mv;
- lower_mv_precision(&nearestmv[0].as_mv, allow_hp,
- cm->cur_frame_force_integer_mv);
- lower_mv_precision(&nearestmv[1].as_mv, allow_hp,
- cm->cur_frame_force_integer_mv);
- lower_mv_precision(&nearmv[0].as_mv, allow_hp,
- cm->cur_frame_force_integer_mv);
- lower_mv_precision(&nearmv[1].as_mv, allow_hp,
- cm->cur_frame_force_integer_mv);
- } else if (mbmi->ref_mv_idx > 0 && mbmi->mode == NEARMV) {
- int_mv cur_mv =
- xd->ref_mv_stack[mbmi->ref_frame[0]][1 + mbmi->ref_mv_idx].this_mv;
- nearmv[0] = cur_mv;
- }
-
- int_mv ref_mv[2];
- ref_mv[0] = nearestmv[0];
- ref_mv[1] = nearestmv[1];
-
- if (is_compound) {
- int ref_mv_idx = mbmi->ref_mv_idx;
- // Special case: NEAR_NEWMV and NEW_NEARMV modes use
- // 1 + mbmi->ref_mv_idx (like NEARMV) instead of
- // mbmi->ref_mv_idx (like NEWMV)
- if (mbmi->mode == NEAR_NEWMV || mbmi->mode == NEW_NEARMV)
- ref_mv_idx = 1 + mbmi->ref_mv_idx;
-
- // TODO(jingning, yunqing): Do we need a lower_mv_precision() call here?
- if (compound_ref0_mode(mbmi->mode) == NEWMV)
- ref_mv[0] = xd->ref_mv_stack[ref_frame][ref_mv_idx].this_mv;
-
- if (compound_ref1_mode(mbmi->mode) == NEWMV)
- ref_mv[1] = xd->ref_mv_stack[ref_frame][ref_mv_idx].comp_mv;
- } else {
- if (mbmi->mode == NEWMV) {
- if (xd->ref_mv_count[ref_frame] > 1)
- ref_mv[0] = xd->ref_mv_stack[ref_frame][mbmi->ref_mv_idx].this_mv;
- }
- }
-
- if (mbmi->skip_mode) {
- assert(mbmi->mode == NEAREST_NEARESTMV);
- mbmi->mv[0].as_int = nearestmv[0].as_int;
- mbmi->mv[1].as_int = nearestmv[1].as_int;
- } else {
- int mv_corrupted_flag =
- !assign_mv(cm, xd, mbmi->mode, mbmi->ref_frame, mbmi->mv, ref_mv,
- nearestmv, nearmv, mi_row, mi_col, is_compound, allow_hp, r);
- aom_merge_corrupted_flag(&xd->corrupted, mv_corrupted_flag);
- }
-
- mbmi->use_wedge_interintra = 0;
- if (cm->seq_params.enable_interintra_compound && !mbmi->skip_mode &&
- is_interintra_allowed(mbmi)) {
- const int bsize_group = size_group_lookup[bsize];
- const int interintra =
- aom_read_symbol(r, ec_ctx->interintra_cdf[bsize_group], 2, ACCT_STR);
- assert(mbmi->ref_frame[1] == NONE_FRAME);
- if (interintra) {
- const INTERINTRA_MODE interintra_mode =
- read_interintra_mode(xd, r, bsize_group);
- mbmi->ref_frame[1] = INTRA_FRAME;
- mbmi->interintra_mode = interintra_mode;
- mbmi->angle_delta[PLANE_TYPE_Y] = 0;
- mbmi->angle_delta[PLANE_TYPE_UV] = 0;
- mbmi->filter_intra_mode_info.use_filter_intra = 0;
- if (is_interintra_wedge_used(bsize)) {
- mbmi->use_wedge_interintra = aom_read_symbol(
- r, ec_ctx->wedge_interintra_cdf[bsize], 2, ACCT_STR);
- if (mbmi->use_wedge_interintra) {
- mbmi->interintra_wedge_index =
- aom_read_symbol(r, ec_ctx->wedge_idx_cdf[bsize], 16, ACCT_STR);
- mbmi->interintra_wedge_sign = 0;
- }
- }
- }
- }
-
- for (int ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) {
- const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
- RefBuffer *ref_buf = &cm->frame_refs[frame - LAST_FRAME];
-
- xd->block_refs[ref] = ref_buf;
- }
-
- mbmi->motion_mode = SIMPLE_TRANSLATION;
- if (is_motion_variation_allowed_bsize(mbmi->sb_type) && !mbmi->skip_mode &&
- !has_second_ref(mbmi))
- mbmi->num_proj_ref = findSamples(cm, xd, mi_row, mi_col, pts, pts_inref);
- av1_count_overlappable_neighbors(cm, xd, mi_row, mi_col);
-
- if (mbmi->ref_frame[1] != INTRA_FRAME)
- mbmi->motion_mode = read_motion_mode(cm, xd, mbmi, r);
-
- // init
- mbmi->comp_group_idx = 0;
- mbmi->compound_idx = 1;
- mbmi->interinter_comp.type = COMPOUND_AVERAGE;
-
- if (has_second_ref(mbmi) && !mbmi->skip_mode) {
- // Read idx to indicate current compound inter prediction mode group
- const int masked_compound_used = is_any_masked_compound_used(bsize) &&
- cm->seq_params.enable_masked_compound;
-
- if (masked_compound_used) {
- const int ctx_comp_group_idx = get_comp_group_idx_context(xd);
- mbmi->comp_group_idx = aom_read_symbol(
- r, ec_ctx->comp_group_idx_cdf[ctx_comp_group_idx], 2, ACCT_STR);
- }
-
- if (mbmi->comp_group_idx == 0) {
- if (cm->seq_params.enable_jnt_comp) {
- const int comp_index_ctx = get_comp_index_context(cm, xd);
- mbmi->compound_idx = aom_read_symbol(
- r, ec_ctx->compound_index_cdf[comp_index_ctx], 2, ACCT_STR);
- } else {
- // Distance-weighted compound is disabled, so always use average
- mbmi->compound_idx = 1;
- }
- } else {
- assert(cm->reference_mode != SINGLE_REFERENCE &&
- is_inter_compound_mode(mbmi->mode) &&
- mbmi->motion_mode == SIMPLE_TRANSLATION);
- assert(masked_compound_used);
-
- // compound_diffwtd, wedge
- if (is_interinter_compound_used(COMPOUND_WEDGE, bsize))
- mbmi->interinter_comp.type =
- 1 + aom_read_symbol(r, ec_ctx->compound_type_cdf[bsize],
- COMPOUND_TYPES - 1, ACCT_STR);
- else
- mbmi->interinter_comp.type = COMPOUND_DIFFWTD;
-
- if (mbmi->interinter_comp.type == COMPOUND_WEDGE) {
- assert(is_interinter_compound_used(COMPOUND_WEDGE, bsize));
- mbmi->interinter_comp.wedge_index =
- aom_read_symbol(r, ec_ctx->wedge_idx_cdf[bsize], 16, ACCT_STR);
- mbmi->interinter_comp.wedge_sign = aom_read_bit(r, ACCT_STR);
- } else {
- assert(mbmi->interinter_comp.type == COMPOUND_DIFFWTD);
- mbmi->interinter_comp.mask_type =
- aom_read_literal(r, MAX_DIFFWTD_MASK_BITS, ACCT_STR);
- }
- }
- }
-
- read_mb_interp_filter(cm, xd, mbmi, r);
-
- if (mbmi->motion_mode == WARPED_CAUSAL) {
- mbmi->wm_params.wmtype = DEFAULT_WMTYPE;
- mbmi->wm_params.invalid = 0;
-
- if (mbmi->num_proj_ref > 1)
- mbmi->num_proj_ref = selectSamples(&mbmi->mv[0].as_mv, pts, pts_inref,
- mbmi->num_proj_ref, bsize);
-
- if (find_projection(mbmi->num_proj_ref, pts, pts_inref, bsize,
- mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col,
- &mbmi->wm_params, mi_row, mi_col)) {
-#if WARPED_MOTION_DEBUG
- printf("Warning: unexpected warped model from aomenc\n");
-#endif
- mbmi->wm_params.invalid = 1;
- }
- }
-
- xd->cfl.is_chroma_reference =
- is_chroma_reference(mi_row, mi_col, bsize, cm->seq_params.subsampling_x,
- cm->seq_params.subsampling_y);
- xd->cfl.store_y = store_cfl_required(cm, xd);
-
-#if DEC_MISMATCH_DEBUG
- dec_dump_logs(cm, mi, mi_row, mi_col, mode_ctx);
-#endif // DEC_MISMATCH_DEBUG
-}
-
-static void read_inter_frame_mode_info(AV1Decoder *const pbi,
- MACROBLOCKD *const xd, int mi_row,
- int mi_col, aom_reader *r) {
- AV1_COMMON *const cm = &pbi->common;
- MB_MODE_INFO *const mbmi = xd->mi[0];
- int inter_block = 1;
-
- mbmi->mv[0].as_int = 0;
- mbmi->mv[1].as_int = 0;
- mbmi->segment_id = read_inter_segment_id(cm, xd, mi_row, mi_col, 1, r);
-
- mbmi->skip_mode = read_skip_mode(cm, xd, mbmi->segment_id, r);
-
- if (mbmi->skip_mode)
- mbmi->skip = 1;
- else
- mbmi->skip = read_skip(cm, xd, mbmi->segment_id, r);
-
- mbmi->segment_id = read_inter_segment_id(cm, xd, mi_row, mi_col, 0, r);
-
- read_cdef(cm, r, xd, mi_col, mi_row);
-
- read_delta_q_params(cm, xd, mi_row, mi_col, r);
-
- if (!mbmi->skip_mode)
- inter_block = read_is_inter_block(cm, xd, mbmi->segment_id, r);
-
- mbmi->current_qindex = xd->current_qindex;
-
- xd->above_txfm_context = cm->above_txfm_context[xd->tile.tile_row] + mi_col;
- xd->left_txfm_context =
- xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
-
- if (inter_block)
- read_inter_block_mode_info(pbi, xd, mbmi, mi_row, mi_col, r);
- else
- read_intra_block_mode_info(cm, mi_row, mi_col, xd, mbmi, r);
-}
-
-static void intra_copy_frame_mvs(AV1_COMMON *const cm, int mi_row, int mi_col,
- int x_mis, int y_mis) {
- const int frame_mvs_stride = ROUND_POWER_OF_TWO(cm->mi_cols, 1);
- MV_REF *frame_mvs =
- cm->cur_frame->mvs + (mi_row >> 1) * frame_mvs_stride + (mi_col >> 1);
- x_mis = ROUND_POWER_OF_TWO(x_mis, 1);
- y_mis = ROUND_POWER_OF_TWO(y_mis, 1);
-
- for (int h = 0; h < y_mis; h++) {
- MV_REF *mv = frame_mvs;
- for (int w = 0; w < x_mis; w++) {
- mv->ref_frame = NONE_FRAME;
- mv++;
- }
- frame_mvs += frame_mvs_stride;
- }
-}
-
-void av1_read_mode_info(AV1Decoder *const pbi, MACROBLOCKD *xd, int mi_row,
- int mi_col, aom_reader *r, int x_mis, int y_mis) {
- AV1_COMMON *const cm = &pbi->common;
- MB_MODE_INFO *const mi = xd->mi[0];
- mi->use_intrabc = 0;
-
- if (frame_is_intra_only(cm)) {
- read_intra_frame_mode_info(cm, xd, mi_row, mi_col, r);
- intra_copy_frame_mvs(cm, mi_row, mi_col, x_mis, y_mis);
- } else {
- read_inter_frame_mode_info(pbi, xd, mi_row, mi_col, r);
- av1_copy_frame_mvs(cm, mi, mi_row, mi_col, x_mis, y_mis);
- }
-}
diff --git a/third_party/aom/av1/decoder/decodemv.h b/third_party/aom/av1/decoder/decodemv.h
deleted file mode 100644
index 1625e5bd2..000000000
--- a/third_party/aom/av1/decoder/decodemv.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_DECODER_DECODEMV_H_
-#define AOM_AV1_DECODER_DECODEMV_H_
-
-#include "aom_dsp/bitreader.h"
-
-#include "av1/decoder/decoder.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void av1_read_mode_info(AV1Decoder *const pbi, MACROBLOCKD *xd,
-
- int mi_row, int mi_col, aom_reader *r, int x_mis,
- int y_mis);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-void av1_read_tx_type(const AV1_COMMON *const cm, MACROBLOCKD *xd, int blk_row,
- int blk_col, TX_SIZE tx_size, aom_reader *r);
-
-#endif // AOM_AV1_DECODER_DECODEMV_H_
diff --git a/third_party/aom/av1/decoder/decoder.c b/third_party/aom/av1/decoder/decoder.c
deleted file mode 100644
index a5f4fd67f..000000000
--- a/third_party/aom/av1/decoder/decoder.c
+++ /dev/null
@@ -1,575 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <limits.h>
-#include <stdio.h>
-
-#include "config/av1_rtcd.h"
-#include "config/aom_dsp_rtcd.h"
-#include "config/aom_scale_rtcd.h"
-
-#include "aom_mem/aom_mem.h"
-#include "aom_ports/system_state.h"
-#include "aom_ports/aom_once.h"
-#include "aom_ports/aom_timer.h"
-#include "aom_scale/aom_scale.h"
-#include "aom_util/aom_thread.h"
-
-#include "av1/common/alloccommon.h"
-#include "av1/common/av1_loopfilter.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/common/quant_common.h"
-#include "av1/common/reconinter.h"
-#include "av1/common/reconintra.h"
-
-#include "av1/decoder/decodeframe.h"
-#include "av1/decoder/decoder.h"
-#include "av1/decoder/detokenize.h"
-#include "av1/decoder/obu.h"
-
-static void initialize_dec(void) {
- av1_rtcd();
- aom_dsp_rtcd();
- aom_scale_rtcd();
- av1_init_intra_predictors();
- av1_init_wedge_masks();
-}
-
-static void dec_setup_mi(AV1_COMMON *cm) {
- cm->mi = cm->mip;
- cm->mi_grid_visible = cm->mi_grid_base;
- memset(cm->mi_grid_base, 0,
- cm->mi_stride * cm->mi_rows * sizeof(*cm->mi_grid_base));
-}
-
-static int av1_dec_alloc_mi(AV1_COMMON *cm, int mi_size) {
- cm->mip = aom_calloc(mi_size, sizeof(*cm->mip));
- if (!cm->mip) return 1;
- cm->mi_alloc_size = mi_size;
- cm->mi_grid_base =
- (MB_MODE_INFO **)aom_calloc(mi_size, sizeof(MB_MODE_INFO *));
- if (!cm->mi_grid_base) return 1;
- return 0;
-}
-
-static void dec_free_mi(AV1_COMMON *cm) {
- aom_free(cm->mip);
- cm->mip = NULL;
- aom_free(cm->mi_grid_base);
- cm->mi_grid_base = NULL;
- cm->mi_alloc_size = 0;
-}
-
-AV1Decoder *av1_decoder_create(BufferPool *const pool) {
- AV1Decoder *volatile const pbi = aom_memalign(32, sizeof(*pbi));
- AV1_COMMON *volatile const cm = pbi ? &pbi->common : NULL;
-
- if (!cm) return NULL;
-
- av1_zero(*pbi);
-
- // The jmp_buf is valid only for the duration of the function that calls
- // setjmp(). Therefore, this function must reset the 'setjmp' field to 0
- // before it returns.
- if (setjmp(cm->error.jmp)) {
- cm->error.setjmp = 0;
- av1_decoder_remove(pbi);
- return NULL;
- }
-
- cm->error.setjmp = 1;
-
- CHECK_MEM_ERROR(cm, cm->fc,
- (FRAME_CONTEXT *)aom_memalign(32, sizeof(*cm->fc)));
- CHECK_MEM_ERROR(cm, cm->frame_contexts,
- (FRAME_CONTEXT *)aom_memalign(
- 32, FRAME_CONTEXTS * sizeof(*cm->frame_contexts)));
- memset(cm->fc, 0, sizeof(*cm->fc));
- memset(cm->frame_contexts, 0, FRAME_CONTEXTS * sizeof(*cm->frame_contexts));
-
- pbi->need_resync = 1;
- aom_once(initialize_dec);
-
- // Initialize the references to not point to any frame buffers.
- memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map));
- memset(&cm->next_ref_frame_map, -1, sizeof(cm->next_ref_frame_map));
-
- cm->current_video_frame = 0;
- pbi->decoding_first_frame = 1;
- pbi->common.buffer_pool = pool;
-
- cm->seq_params.bit_depth = AOM_BITS_8;
- cm->dequant_bit_depth = AOM_BITS_8;
-
- cm->alloc_mi = av1_dec_alloc_mi;
- cm->free_mi = dec_free_mi;
- cm->setup_mi = dec_setup_mi;
-
- av1_loop_filter_init(cm);
-
- av1_qm_init(cm);
- av1_loop_restoration_precal();
-#if CONFIG_ACCOUNTING
- pbi->acct_enabled = 1;
- aom_accounting_init(&pbi->accounting);
-#endif
-
- cm->error.setjmp = 0;
-
- aom_get_worker_interface()->init(&pbi->lf_worker);
-
- return pbi;
-}
-
-void av1_dealloc_dec_jobs(struct AV1DecTileMTData *tile_mt_info) {
- if (tile_mt_info != NULL) {
-#if CONFIG_MULTITHREAD
- if (tile_mt_info->job_mutex != NULL) {
- pthread_mutex_destroy(tile_mt_info->job_mutex);
- aom_free(tile_mt_info->job_mutex);
- }
-#endif
- aom_free(tile_mt_info->job_queue);
- // clear the structure as the source of this call may be a resize in which
- // case this call will be followed by an _alloc() which may fail.
- av1_zero(*tile_mt_info);
- }
-}
-
-void av1_dec_free_cb_buf(AV1Decoder *pbi) {
- aom_free(pbi->cb_buffer_base);
- pbi->cb_buffer_base = NULL;
- pbi->cb_buffer_alloc_size = 0;
-}
-
-void av1_decoder_remove(AV1Decoder *pbi) {
- int i;
-
- if (!pbi) return;
-
- // Free the tile list output buffer.
- if (pbi->tile_list_output != NULL) aom_free(pbi->tile_list_output);
- pbi->tile_list_output = NULL;
-
- aom_get_worker_interface()->end(&pbi->lf_worker);
- aom_free(pbi->lf_worker.data1);
-
- if (pbi->thread_data) {
- for (int worker_idx = 0; worker_idx < pbi->max_threads - 1; worker_idx++) {
- DecWorkerData *const thread_data = pbi->thread_data + worker_idx;
- av1_free_mc_tmp_buf(thread_data->td);
- aom_free(thread_data->td);
- }
- aom_free(pbi->thread_data);
- }
-
- for (i = 0; i < pbi->num_workers; ++i) {
- AVxWorker *const worker = &pbi->tile_workers[i];
- aom_get_worker_interface()->end(worker);
- }
-#if CONFIG_MULTITHREAD
- if (pbi->row_mt_mutex_ != NULL) {
- pthread_mutex_destroy(pbi->row_mt_mutex_);
- aom_free(pbi->row_mt_mutex_);
- }
- if (pbi->row_mt_cond_ != NULL) {
- pthread_cond_destroy(pbi->row_mt_cond_);
- aom_free(pbi->row_mt_cond_);
- }
-#endif
- for (i = 0; i < pbi->allocated_tiles; i++) {
- TileDataDec *const tile_data = pbi->tile_data + i;
- av1_dec_row_mt_dealloc(&tile_data->dec_row_mt_sync);
- }
- aom_free(pbi->tile_data);
- aom_free(pbi->tile_workers);
-
- if (pbi->num_workers > 0) {
- av1_loop_filter_dealloc(&pbi->lf_row_sync);
- av1_loop_restoration_dealloc(&pbi->lr_row_sync, pbi->num_workers);
- av1_dealloc_dec_jobs(&pbi->tile_mt_info);
- }
-
- av1_dec_free_cb_buf(pbi);
-#if CONFIG_ACCOUNTING
- aom_accounting_clear(&pbi->accounting);
-#endif
- av1_free_mc_tmp_buf(&pbi->td);
-
- aom_free(pbi);
-}
-
-void av1_visit_palette(AV1Decoder *const pbi, MACROBLOCKD *const xd, int mi_row,
- int mi_col, aom_reader *r, BLOCK_SIZE bsize,
- palette_visitor_fn_t visit) {
- if (!is_inter_block(xd->mi[0])) {
- for (int plane = 0; plane < AOMMIN(2, av1_num_planes(&pbi->common));
- ++plane) {
- const struct macroblockd_plane *const pd = &xd->plane[plane];
- if (is_chroma_reference(mi_row, mi_col, bsize, pd->subsampling_x,
- pd->subsampling_y)) {
- if (xd->mi[0]->palette_mode_info.palette_size[plane])
- visit(xd, plane, r);
- } else {
- assert(xd->mi[0]->palette_mode_info.palette_size[plane] == 0);
- }
- }
- }
-}
-
-static int equal_dimensions(const YV12_BUFFER_CONFIG *a,
- const YV12_BUFFER_CONFIG *b) {
- return a->y_height == b->y_height && a->y_width == b->y_width &&
- a->uv_height == b->uv_height && a->uv_width == b->uv_width;
-}
-
-aom_codec_err_t av1_copy_reference_dec(AV1Decoder *pbi, int idx,
- YV12_BUFFER_CONFIG *sd) {
- AV1_COMMON *cm = &pbi->common;
- const int num_planes = av1_num_planes(cm);
-
- const YV12_BUFFER_CONFIG *const cfg = get_ref_frame(cm, idx);
- if (cfg == NULL) {
- aom_internal_error(&cm->error, AOM_CODEC_ERROR, "No reference frame");
- return AOM_CODEC_ERROR;
- }
- if (!equal_dimensions(cfg, sd))
- aom_internal_error(&cm->error, AOM_CODEC_ERROR,
- "Incorrect buffer dimensions");
- else
- aom_yv12_copy_frame(cfg, sd, num_planes);
-
- return cm->error.error_code;
-}
-
-static int equal_dimensions_and_border(const YV12_BUFFER_CONFIG *a,
- const YV12_BUFFER_CONFIG *b) {
- return a->y_height == b->y_height && a->y_width == b->y_width &&
- a->uv_height == b->uv_height && a->uv_width == b->uv_width &&
- a->y_stride == b->y_stride && a->uv_stride == b->uv_stride &&
- a->border == b->border &&
- (a->flags & YV12_FLAG_HIGHBITDEPTH) ==
- (b->flags & YV12_FLAG_HIGHBITDEPTH);
-}
-
-aom_codec_err_t av1_set_reference_dec(AV1_COMMON *cm, int idx,
- int use_external_ref,
- YV12_BUFFER_CONFIG *sd) {
- const int num_planes = av1_num_planes(cm);
- YV12_BUFFER_CONFIG *ref_buf = NULL;
-
- // Get the destination reference buffer.
- ref_buf = get_ref_frame(cm, idx);
-
- if (ref_buf == NULL) {
- aom_internal_error(&cm->error, AOM_CODEC_ERROR, "No reference frame");
- return AOM_CODEC_ERROR;
- }
-
- if (!use_external_ref) {
- if (!equal_dimensions(ref_buf, sd)) {
- aom_internal_error(&cm->error, AOM_CODEC_ERROR,
- "Incorrect buffer dimensions");
- } else {
- // Overwrite the reference frame buffer.
- aom_yv12_copy_frame(sd, ref_buf, num_planes);
- }
- } else {
- if (!equal_dimensions_and_border(ref_buf, sd)) {
- aom_internal_error(&cm->error, AOM_CODEC_ERROR,
- "Incorrect buffer dimensions");
- } else {
- // Overwrite the reference frame buffer pointers.
- // Once we no longer need the external reference buffer, these pointers
- // are restored.
- ref_buf->store_buf_adr[0] = ref_buf->y_buffer;
- ref_buf->store_buf_adr[1] = ref_buf->u_buffer;
- ref_buf->store_buf_adr[2] = ref_buf->v_buffer;
- ref_buf->y_buffer = sd->y_buffer;
- ref_buf->u_buffer = sd->u_buffer;
- ref_buf->v_buffer = sd->v_buffer;
- ref_buf->use_external_reference_buffers = 1;
- }
- }
-
- return cm->error.error_code;
-}
-
-aom_codec_err_t av1_copy_new_frame_dec(AV1_COMMON *cm,
- YV12_BUFFER_CONFIG *new_frame,
- YV12_BUFFER_CONFIG *sd) {
- const int num_planes = av1_num_planes(cm);
-
- if (!equal_dimensions_and_border(new_frame, sd))
- aom_internal_error(&cm->error, AOM_CODEC_ERROR,
- "Incorrect buffer dimensions");
- else
- aom_yv12_copy_frame(new_frame, sd, num_planes);
-
- return cm->error.error_code;
-}
-
-/* If any buffer updating is signaled it should be done here.
- Consumes a reference to cm->new_fb_idx.
-*/
-static void swap_frame_buffers(AV1Decoder *pbi, int frame_decoded) {
- int ref_index = 0, mask;
- AV1_COMMON *const cm = &pbi->common;
- BufferPool *const pool = cm->buffer_pool;
- RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
-
- if (frame_decoded) {
- lock_buffer_pool(pool);
-
- // In ext-tile decoding, the camera frame header is only decoded once. So,
- // we don't release the references here.
- if (!pbi->camera_frame_header_ready) {
- for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) {
- const int old_idx = cm->ref_frame_map[ref_index];
- // Current thread releases the holding of reference frame.
- decrease_ref_count(old_idx, frame_bufs, pool);
-
- // Release the reference frame holding in the reference map for the
- // decoding of the next frame.
- if (mask & 1) decrease_ref_count(old_idx, frame_bufs, pool);
- cm->ref_frame_map[ref_index] = cm->next_ref_frame_map[ref_index];
- ++ref_index;
- }
-
- // Current thread releases the holding of reference frame.
- const int check_on_show_existing_frame =
- !cm->show_existing_frame || cm->reset_decoder_state;
- for (; ref_index < REF_FRAMES && check_on_show_existing_frame;
- ++ref_index) {
- const int old_idx = cm->ref_frame_map[ref_index];
- decrease_ref_count(old_idx, frame_bufs, pool);
- cm->ref_frame_map[ref_index] = cm->next_ref_frame_map[ref_index];
- }
- }
-
- YV12_BUFFER_CONFIG *cur_frame = get_frame_new_buffer(cm);
-
- if (cm->show_existing_frame || cm->show_frame) {
- if (pbi->output_all_layers) {
- // Append this frame to the output queue
- if (pbi->num_output_frames >= MAX_NUM_SPATIAL_LAYERS) {
- // We can't store the new frame anywhere, so drop it and return an
- // error
- decrease_ref_count(cm->new_fb_idx, frame_bufs, pool);
- cm->error.error_code = AOM_CODEC_UNSUP_BITSTREAM;
- } else {
- pbi->output_frames[pbi->num_output_frames] = cur_frame;
- pbi->output_frame_index[pbi->num_output_frames] = cm->new_fb_idx;
- pbi->num_output_frames++;
- }
- } else {
- // Replace any existing output frame
- assert(pbi->num_output_frames == 0 || pbi->num_output_frames == 1);
- if (pbi->num_output_frames > 0) {
- decrease_ref_count((int)pbi->output_frame_index[0], frame_bufs, pool);
- }
- pbi->output_frames[0] = cur_frame;
- pbi->output_frame_index[0] = cm->new_fb_idx;
- pbi->num_output_frames = 1;
- }
- } else {
- decrease_ref_count(cm->new_fb_idx, frame_bufs, pool);
- }
-
- unlock_buffer_pool(pool);
- } else {
- // Nothing was decoded, so just drop this frame buffer
- lock_buffer_pool(pool);
- decrease_ref_count(cm->new_fb_idx, frame_bufs, pool);
- unlock_buffer_pool(pool);
- }
-
- if (!pbi->camera_frame_header_ready) {
- pbi->hold_ref_buf = 0;
-
- // Invalidate these references until the next frame starts.
- for (ref_index = 0; ref_index < INTER_REFS_PER_FRAME; ref_index++) {
- cm->frame_refs[ref_index].idx = INVALID_IDX;
- cm->frame_refs[ref_index].buf = NULL;
- }
- }
-}
-
-int av1_receive_compressed_data(AV1Decoder *pbi, size_t size,
- const uint8_t **psource) {
- AV1_COMMON *volatile const cm = &pbi->common;
- BufferPool *volatile const pool = cm->buffer_pool;
- RefCntBuffer *volatile const frame_bufs = cm->buffer_pool->frame_bufs;
- const uint8_t *source = *psource;
- cm->error.error_code = AOM_CODEC_OK;
-
- if (size == 0) {
- // This is used to signal that we are missing frames.
- // We do not know if the missing frame(s) was supposed to update
- // any of the reference buffers, but we act conservative and
- // mark only the last buffer as corrupted.
- //
- // TODO(jkoleszar): Error concealment is undefined and non-normative
- // at this point, but if it becomes so, [0] may not always be the correct
- // thing to do here.
- if (cm->frame_refs[0].idx > 0) {
- assert(cm->frame_refs[0].buf != NULL);
- cm->frame_refs[0].buf->corrupted = 1;
- }
- }
-
- // Find a free buffer for the new frame, releasing the reference previously
- // held.
-
- // Find a free frame buffer. Return error if can not find any.
- cm->new_fb_idx = get_free_fb(cm);
- if (cm->new_fb_idx == INVALID_IDX) {
- cm->error.error_code = AOM_CODEC_MEM_ERROR;
- return 1;
- }
-
- // Assign a MV array to the frame buffer.
- cm->cur_frame = &pool->frame_bufs[cm->new_fb_idx];
-
- if (!pbi->camera_frame_header_ready) pbi->hold_ref_buf = 0;
-
- pbi->cur_buf = &frame_bufs[cm->new_fb_idx];
-
- // The jmp_buf is valid only for the duration of the function that calls
- // setjmp(). Therefore, this function must reset the 'setjmp' field to 0
- // before it returns.
- if (setjmp(cm->error.jmp)) {
- const AVxWorkerInterface *const winterface = aom_get_worker_interface();
- int i;
-
- cm->error.setjmp = 0;
-
- // Synchronize all threads immediately as a subsequent decode call may
- // cause a resize invalidating some allocations.
- winterface->sync(&pbi->lf_worker);
- for (i = 0; i < pbi->num_workers; ++i) {
- winterface->sync(&pbi->tile_workers[i]);
- }
-
- lock_buffer_pool(pool);
- // Release all the reference buffers if worker thread is holding them.
- if (pbi->hold_ref_buf == 1) {
- int ref_index = 0, mask;
- for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) {
- const int old_idx = cm->ref_frame_map[ref_index];
- // Current thread releases the holding of reference frame.
- decrease_ref_count(old_idx, frame_bufs, pool);
-
- // Release the reference frame holding in the reference map for the
- // decoding of the next frame.
- if (mask & 1) decrease_ref_count(old_idx, frame_bufs, pool);
- ++ref_index;
- }
-
- // Current thread releases the holding of reference frame.
- const int check_on_show_existing_frame =
- !cm->show_existing_frame || cm->reset_decoder_state;
- for (; ref_index < REF_FRAMES && check_on_show_existing_frame;
- ++ref_index) {
- const int old_idx = cm->ref_frame_map[ref_index];
- decrease_ref_count(old_idx, frame_bufs, pool);
- }
- pbi->hold_ref_buf = 0;
- }
- // Release current frame.
- decrease_ref_count(cm->new_fb_idx, frame_bufs, pool);
- unlock_buffer_pool(pool);
-
- aom_clear_system_state();
- return -1;
- }
-
- cm->error.setjmp = 1;
-
- int frame_decoded =
- aom_decode_frame_from_obus(pbi, source, source + size, psource);
-
- if (cm->error.error_code != AOM_CODEC_OK) {
- lock_buffer_pool(pool);
- decrease_ref_count(cm->new_fb_idx, frame_bufs, pool);
- unlock_buffer_pool(pool);
- cm->error.setjmp = 0;
- return 1;
- }
-
-#if TXCOEFF_TIMER
- cm->cum_txcoeff_timer += cm->txcoeff_timer;
- fprintf(stderr,
- "txb coeff block number: %d, frame time: %ld, cum time %ld in us\n",
- cm->txb_count, cm->txcoeff_timer, cm->cum_txcoeff_timer);
- cm->txcoeff_timer = 0;
- cm->txb_count = 0;
-#endif
-
- // Note: At this point, this function holds a reference to cm->new_fb_idx
- // in the buffer pool. This reference is consumed by swap_frame_buffers().
- swap_frame_buffers(pbi, frame_decoded);
-
- if (frame_decoded) {
- pbi->decoding_first_frame = 0;
- }
-
- if (cm->error.error_code != AOM_CODEC_OK) {
- cm->error.setjmp = 0;
- return 1;
- }
-
- aom_clear_system_state();
-
- if (!cm->show_existing_frame) {
- cm->last_show_frame = cm->show_frame;
-
- if (cm->seg.enabled) {
- if (cm->prev_frame && (cm->mi_rows == cm->prev_frame->mi_rows) &&
- (cm->mi_cols == cm->prev_frame->mi_cols)) {
- cm->last_frame_seg_map = cm->prev_frame->seg_map;
- } else {
- cm->last_frame_seg_map = NULL;
- }
- }
- }
-
- // Update progress in frame parallel decode.
- cm->last_width = cm->width;
- cm->last_height = cm->height;
- cm->last_tile_cols = cm->tile_cols;
- cm->last_tile_rows = cm->tile_rows;
- cm->error.setjmp = 0;
-
- return 0;
-}
-
-// Get the frame at a particular index in the output queue
-int av1_get_raw_frame(AV1Decoder *pbi, size_t index, YV12_BUFFER_CONFIG **sd,
- aom_film_grain_t **grain_params) {
- RefCntBuffer *const frame_bufs = pbi->common.buffer_pool->frame_bufs;
-
- if (index >= pbi->num_output_frames) return -1;
- *sd = pbi->output_frames[index];
- *grain_params = &frame_bufs[pbi->output_frame_index[index]].film_grain_params;
- aom_clear_system_state();
- return 0;
-}
-
-// Get the highest-spatial-layer output
-// TODO(david.barker): What should this do?
-int av1_get_frame_to_show(AV1Decoder *pbi, YV12_BUFFER_CONFIG *frame) {
- if (pbi->num_output_frames == 0) return -1;
-
- *frame = *pbi->output_frames[pbi->num_output_frames - 1];
- return 0;
-}
diff --git a/third_party/aom/av1/decoder/decoder.h b/third_party/aom/av1/decoder/decoder.h
deleted file mode 100644
index 5ca939c24..000000000
--- a/third_party/aom/av1/decoder/decoder.h
+++ /dev/null
@@ -1,317 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_DECODER_DECODER_H_
-#define AOM_AV1_DECODER_DECODER_H_
-
-#include "config/aom_config.h"
-
-#include "aom/aom_codec.h"
-#include "aom_dsp/bitreader.h"
-#include "aom_scale/yv12config.h"
-#include "aom_util/aom_thread.h"
-
-#include "av1/common/thread_common.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/decoder/dthread.h"
-#if CONFIG_ACCOUNTING
-#include "av1/decoder/accounting.h"
-#endif
-#if CONFIG_INSPECTION
-#include "av1/decoder/inspection.h"
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef void (*decode_block_visitor_fn_t)(const AV1_COMMON *const cm,
- MACROBLOCKD *const xd,
- aom_reader *const r, const int plane,
- const int row, const int col,
- const TX_SIZE tx_size);
-
-typedef void (*predict_inter_block_visitor_fn_t)(AV1_COMMON *const cm,
- MACROBLOCKD *const xd,
- int mi_row, int mi_col,
- BLOCK_SIZE bsize);
-
-typedef void (*cfl_store_inter_block_visitor_fn_t)(AV1_COMMON *const cm,
- MACROBLOCKD *const xd);
-
-typedef struct ThreadData {
- aom_reader *bit_reader;
- DECLARE_ALIGNED(32, MACROBLOCKD, xd);
- /* dqcoeff are shared by all the planes. So planes must be decoded serially */
- DECLARE_ALIGNED(32, tran_low_t, dqcoeff[MAX_TX_SQUARE]);
- CB_BUFFER cb_buffer_base;
- uint8_t *mc_buf[2];
- int32_t mc_buf_size;
- int mc_buf_use_highbd; // Boolean: whether the byte pointers stored in
- // mc_buf were converted from highbd pointers.
-
- CONV_BUF_TYPE *tmp_conv_dst;
- uint8_t *tmp_obmc_bufs[2];
-
- decode_block_visitor_fn_t read_coeffs_tx_intra_block_visit;
- decode_block_visitor_fn_t predict_and_recon_intra_block_visit;
- decode_block_visitor_fn_t read_coeffs_tx_inter_block_visit;
- decode_block_visitor_fn_t inverse_tx_inter_block_visit;
- predict_inter_block_visitor_fn_t predict_inter_block_visit;
- cfl_store_inter_block_visitor_fn_t cfl_store_inter_block_visit;
-} ThreadData;
-
-typedef struct AV1DecRowMTJobInfo {
- int tile_row;
- int tile_col;
- int mi_row;
-} AV1DecRowMTJobInfo;
-
-typedef struct AV1DecRowMTSyncData {
-#if CONFIG_MULTITHREAD
- pthread_mutex_t *mutex_;
- pthread_cond_t *cond_;
-#endif
- int allocated_sb_rows;
- int *cur_sb_col;
- int sync_range;
- int mi_rows;
- int mi_cols;
- int mi_rows_parse_done;
- int mi_rows_decode_started;
- int num_threads_working;
-} AV1DecRowMTSync;
-
-typedef struct AV1DecRowMTInfo {
- int tile_rows_start;
- int tile_rows_end;
- int tile_cols_start;
- int tile_cols_end;
- int start_tile;
- int end_tile;
- int mi_rows_parse_done;
- int mi_rows_decode_started;
- int mi_rows_to_decode;
- int row_mt_exit;
-} AV1DecRowMTInfo;
-
-typedef struct TileDataDec {
- TileInfo tile_info;
- aom_reader bit_reader;
- DECLARE_ALIGNED(16, FRAME_CONTEXT, tctx);
- AV1DecRowMTSync dec_row_mt_sync;
-} TileDataDec;
-
-typedef struct TileBufferDec {
- const uint8_t *data;
- size_t size;
-} TileBufferDec;
-
-typedef struct DataBuffer {
- const uint8_t *data;
- size_t size;
-} DataBuffer;
-
-typedef struct EXTERNAL_REFERENCES {
- YV12_BUFFER_CONFIG refs[MAX_EXTERNAL_REFERENCES];
- int num;
-} EXTERNAL_REFERENCES;
-
-typedef struct TileJobsDec {
- TileBufferDec *tile_buffer;
- TileDataDec *tile_data;
-} TileJobsDec;
-
-typedef struct AV1DecTileMTData {
-#if CONFIG_MULTITHREAD
- pthread_mutex_t *job_mutex;
-#endif
- TileJobsDec *job_queue;
- int jobs_enqueued;
- int jobs_dequeued;
- int alloc_tile_rows;
- int alloc_tile_cols;
-} AV1DecTileMT;
-
-typedef struct AV1Decoder {
- DECLARE_ALIGNED(32, MACROBLOCKD, mb);
-
- DECLARE_ALIGNED(32, AV1_COMMON, common);
-
- int refresh_frame_flags;
-
- // TODO(hkuang): Combine this with cur_buf in macroblockd as they are
- // the same.
- RefCntBuffer *cur_buf; // Current decoding frame buffer.
-
- AVxWorker *frame_worker_owner; // frame_worker that owns this pbi.
- AVxWorker lf_worker;
- AV1LfSync lf_row_sync;
- AV1LrSync lr_row_sync;
- AV1LrStruct lr_ctxt;
- AVxWorker *tile_workers;
- int num_workers;
- DecWorkerData *thread_data;
- ThreadData td;
- TileDataDec *tile_data;
- int allocated_tiles;
-
- TileBufferDec tile_buffers[MAX_TILE_ROWS][MAX_TILE_COLS];
- AV1DecTileMT tile_mt_info;
-
- // Each time the decoder is called, we expect to receive a full temporal unit.
- // This can contain up to one shown frame per spatial layer in the current
- // operating point (note that some layers may be entirely omitted).
- // If the 'output_all_layers' option is true, we save all of these shown
- // frames so that they can be returned to the application. If the
- // 'output_all_layers' option is false, then we only output one image per
- // temporal unit.
- //
- // Note: The saved buffers are released at the start of the next time the
- // application calls aom_codec_decode().
- int output_all_layers;
- YV12_BUFFER_CONFIG *output_frames[MAX_NUM_SPATIAL_LAYERS];
- size_t output_frame_index[MAX_NUM_SPATIAL_LAYERS]; // Buffer pool indices
- size_t num_output_frames; // How many frames are queued up so far?
-
- // In order to properly support random-access decoding, we need
- // to behave slightly differently for the very first frame we decode.
- // So we track whether this is the first frame or not.
- int decoding_first_frame;
-
- int allow_lowbitdepth;
- int max_threads;
- int inv_tile_order;
- int need_resync; // wait for key/intra-only frame.
- int hold_ref_buf; // hold the reference buffer.
-
- int tile_size_bytes;
- int tile_col_size_bytes;
- int dec_tile_row, dec_tile_col; // always -1 for non-VR tile encoding
-#if CONFIG_ACCOUNTING
- int acct_enabled;
- Accounting accounting;
-#endif
- int tg_size; // Number of tiles in the current tilegroup
- int tg_start; // First tile in the current tilegroup
- int tg_size_bit_offset;
- int sequence_header_ready;
- int sequence_header_changed;
-#if CONFIG_INSPECTION
- aom_inspect_cb inspect_cb;
- void *inspect_ctx;
-#endif
- int operating_point;
- int current_operating_point;
- int seen_frame_header;
-
- // State if the camera frame header is already decoded while
- // large_scale_tile = 1.
- int camera_frame_header_ready;
- size_t frame_header_size;
- DataBuffer obu_size_hdr;
- int output_frame_width_in_tiles_minus_1;
- int output_frame_height_in_tiles_minus_1;
- int tile_count_minus_1;
- uint32_t coded_tile_data_size;
- unsigned int ext_tile_debug; // for ext-tile software debug & testing
- unsigned int row_mt;
- EXTERNAL_REFERENCES ext_refs;
- size_t tile_list_size;
- uint8_t *tile_list_output;
- size_t buffer_sz;
-
- CB_BUFFER *cb_buffer_base;
- int cb_buffer_alloc_size;
-
- int allocated_row_mt_sync_rows;
-
-#if CONFIG_MULTITHREAD
- pthread_mutex_t *row_mt_mutex_;
- pthread_cond_t *row_mt_cond_;
-#endif
-
- AV1DecRowMTInfo frame_row_mt_info;
-} AV1Decoder;
-
-// Returns 0 on success. Sets pbi->common.error.error_code to a nonzero error
-// code and returns a nonzero value on failure.
-int av1_receive_compressed_data(struct AV1Decoder *pbi, size_t size,
- const uint8_t **dest);
-
-// Get the frame at a particular index in the output queue
-int av1_get_raw_frame(AV1Decoder *pbi, size_t index, YV12_BUFFER_CONFIG **sd,
- aom_film_grain_t **grain_params);
-
-int av1_get_frame_to_show(struct AV1Decoder *pbi, YV12_BUFFER_CONFIG *frame);
-
-aom_codec_err_t av1_copy_reference_dec(struct AV1Decoder *pbi, int idx,
- YV12_BUFFER_CONFIG *sd);
-
-aom_codec_err_t av1_set_reference_dec(AV1_COMMON *cm, int idx,
- int use_external_ref,
- YV12_BUFFER_CONFIG *sd);
-aom_codec_err_t av1_copy_new_frame_dec(AV1_COMMON *cm,
- YV12_BUFFER_CONFIG *new_frame,
- YV12_BUFFER_CONFIG *sd);
-
-struct AV1Decoder *av1_decoder_create(BufferPool *const pool);
-
-void av1_decoder_remove(struct AV1Decoder *pbi);
-void av1_dealloc_dec_jobs(struct AV1DecTileMTData *tile_jobs_sync);
-
-void av1_dec_row_mt_dealloc(AV1DecRowMTSync *dec_row_mt_sync);
-
-void av1_dec_free_cb_buf(AV1Decoder *pbi);
-
-static INLINE void decrease_ref_count(int idx, RefCntBuffer *const frame_bufs,
- BufferPool *const pool) {
- if (idx >= 0) {
- --frame_bufs[idx].ref_count;
- // A worker may only get a free framebuffer index when calling get_free_fb.
- // But the private buffer is not set up until finish decoding header.
- // So any error happens during decoding header, the frame_bufs will not
- // have valid priv buffer.
- if (frame_bufs[idx].ref_count == 0 &&
- frame_bufs[idx].raw_frame_buffer.priv) {
- pool->release_fb_cb(pool->cb_priv, &frame_bufs[idx].raw_frame_buffer);
- }
- }
-}
-
-#define ACCT_STR __func__
-static INLINE int av1_read_uniform(aom_reader *r, int n) {
- const int l = get_unsigned_bits(n);
- const int m = (1 << l) - n;
- const int v = aom_read_literal(r, l - 1, ACCT_STR);
- assert(l != 0);
- if (v < m)
- return v;
- else
- return (v << 1) - m + aom_read_literal(r, 1, ACCT_STR);
-}
-
-typedef void (*palette_visitor_fn_t)(MACROBLOCKD *const xd, int plane,
- aom_reader *r);
-
-void av1_visit_palette(AV1Decoder *const pbi, MACROBLOCKD *const xd, int mi_row,
- int mi_col, aom_reader *r, BLOCK_SIZE bsize,
- palette_visitor_fn_t visit);
-
-typedef void (*block_visitor_fn_t)(AV1Decoder *const pbi, ThreadData *const td,
- int mi_row, int mi_col, aom_reader *r,
- PARTITION_TYPE partition, BLOCK_SIZE bsize);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_DECODER_DECODER_H_
diff --git a/third_party/aom/av1/decoder/decodetxb.c b/third_party/aom/av1/decoder/decodetxb.c
deleted file mode 100644
index f3ef2d55e..000000000
--- a/third_party/aom/av1/decoder/decodetxb.c
+++ /dev/null
@@ -1,362 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "av1/decoder/decodetxb.h"
-
-#include "aom_ports/mem.h"
-#include "av1/common/idct.h"
-#include "av1/common/scan.h"
-#include "av1/common/txb_common.h"
-#include "av1/decoder/decodemv.h"
-
-#define ACCT_STR __func__
-
-static int read_golomb(MACROBLOCKD *xd, aom_reader *r) {
- int x = 1;
- int length = 0;
- int i = 0;
-
- while (!i) {
- i = aom_read_bit(r, ACCT_STR);
- ++length;
- if (length > 20) {
- aom_internal_error(xd->error_info, AOM_CODEC_CORRUPT_FRAME,
- "Invalid length in read_golomb");
- break;
- }
- }
-
- for (i = 0; i < length - 1; ++i) {
- x <<= 1;
- x += aom_read_bit(r, ACCT_STR);
- }
-
- return x - 1;
-}
-
-static INLINE int rec_eob_pos(const int eob_token, const int extra) {
- int eob = k_eob_group_start[eob_token];
- if (eob > 2) {
- eob += extra;
- }
- return eob;
-}
-
-static INLINE int get_dqv(const int16_t *dequant, int coeff_idx,
- const qm_val_t *iqmatrix) {
- int dqv = dequant[!!coeff_idx];
- if (iqmatrix != NULL)
- dqv =
- ((iqmatrix[coeff_idx] * dqv) + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
- return dqv;
-}
-
-static INLINE void read_coeffs_reverse_2d(aom_reader *r, TX_SIZE tx_size,
- int start_si, int end_si,
- const int16_t *scan, int bwl,
- uint8_t *levels,
- base_cdf_arr base_cdf,
- br_cdf_arr br_cdf) {
- for (int c = end_si; c >= start_si; --c) {
- const int pos = scan[c];
- const int coeff_ctx = get_lower_levels_ctx_2d(levels, pos, bwl, tx_size);
- const int nsymbs = 4;
- int level = aom_read_symbol(r, base_cdf[coeff_ctx], nsymbs, ACCT_STR);
- if (level > NUM_BASE_LEVELS) {
- const int br_ctx = get_br_ctx_2d(levels, pos, bwl);
- aom_cdf_prob *cdf = br_cdf[br_ctx];
- for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) {
- const int k = aom_read_symbol(r, cdf, BR_CDF_SIZE, ACCT_STR);
- level += k;
- if (k < BR_CDF_SIZE - 1) break;
- }
- }
- levels[get_padded_idx(pos, bwl)] = level;
- }
-}
-
-static INLINE void read_coeffs_reverse(aom_reader *r, TX_SIZE tx_size,
- TX_CLASS tx_class, int start_si,
- int end_si, const int16_t *scan, int bwl,
- uint8_t *levels, base_cdf_arr base_cdf,
- br_cdf_arr br_cdf) {
- for (int c = end_si; c >= start_si; --c) {
- const int pos = scan[c];
- const int coeff_ctx =
- get_lower_levels_ctx(levels, pos, bwl, tx_size, tx_class);
- const int nsymbs = 4;
- int level = aom_read_symbol(r, base_cdf[coeff_ctx], nsymbs, ACCT_STR);
- if (level > NUM_BASE_LEVELS) {
- const int br_ctx = get_br_ctx(levels, pos, bwl, tx_class);
- aom_cdf_prob *cdf = br_cdf[br_ctx];
- for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) {
- const int k = aom_read_symbol(r, cdf, BR_CDF_SIZE, ACCT_STR);
- level += k;
- if (k < BR_CDF_SIZE - 1) break;
- }
- }
- levels[get_padded_idx(pos, bwl)] = level;
- }
-}
-
-uint8_t av1_read_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *const xd,
- aom_reader *const r, const int blk_row,
- const int blk_col, const int plane,
- const TXB_CTX *const txb_ctx,
- const TX_SIZE tx_size) {
- FRAME_CONTEXT *const ec_ctx = xd->tile_ctx;
- const int32_t max_value = (1 << (7 + xd->bd)) - 1;
- const int32_t min_value = -(1 << (7 + xd->bd));
- const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size);
- const PLANE_TYPE plane_type = get_plane_type(plane);
- MB_MODE_INFO *const mbmi = xd->mi[0];
- struct macroblockd_plane *const pd = &xd->plane[plane];
- const int16_t *const dequant = pd->seg_dequant_QTX[mbmi->segment_id];
- tran_low_t *const tcoeffs = pd->dqcoeff_block + xd->cb_offset[plane];
- const int shift = av1_get_tx_scale(tx_size);
- const int bwl = get_txb_bwl(tx_size);
- const int width = get_txb_wide(tx_size);
- const int height = get_txb_high(tx_size);
- int cul_level = 0;
- int dc_val = 0;
- uint8_t levels_buf[TX_PAD_2D];
- uint8_t *const levels = set_levels(levels_buf, width);
- const int all_zero = aom_read_symbol(
- r, ec_ctx->txb_skip_cdf[txs_ctx][txb_ctx->txb_skip_ctx], 2, ACCT_STR);
- eob_info *eob_data = pd->eob_data + xd->txb_offset[plane];
- uint16_t *const eob = &(eob_data->eob);
- uint16_t *const max_scan_line = &(eob_data->max_scan_line);
- *max_scan_line = 0;
- *eob = 0;
- if (all_zero) {
- *max_scan_line = 0;
- if (plane == 0) {
- const int txk_type_idx =
- av1_get_txk_type_index(mbmi->sb_type, blk_row, blk_col);
- mbmi->txk_type[txk_type_idx] = DCT_DCT;
- }
- return 0;
- }
-
- memset(levels_buf, 0,
- sizeof(*levels_buf) *
- ((width + TX_PAD_HOR) * (height + TX_PAD_VER) + TX_PAD_END));
- if (plane == AOM_PLANE_Y) {
- // only y plane's tx_type is transmitted
- av1_read_tx_type(cm, xd, blk_row, blk_col, tx_size, r);
- }
- const TX_TYPE tx_type = av1_get_tx_type(plane_type, xd, blk_row, blk_col,
- tx_size, cm->reduced_tx_set_used);
- const TX_CLASS tx_class = tx_type_to_class[tx_type];
- const TX_SIZE qm_tx_size = av1_get_adjusted_tx_size(tx_size);
- const qm_val_t *iqmatrix =
- IS_2D_TRANSFORM(tx_type)
- ? pd->seg_iqmatrix[mbmi->segment_id][qm_tx_size]
- : cm->giqmatrix[NUM_QM_LEVELS - 1][0][qm_tx_size];
- const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type);
- const int16_t *const scan = scan_order->scan;
- int eob_extra = 0;
- int eob_pt = 1;
-
- const int eob_multi_size = txsize_log2_minus4[tx_size];
- const int eob_multi_ctx = (tx_class == TX_CLASS_2D) ? 0 : 1;
- switch (eob_multi_size) {
- case 0:
- eob_pt =
- aom_read_symbol(r, ec_ctx->eob_flag_cdf16[plane_type][eob_multi_ctx],
- 5, ACCT_STR) +
- 1;
- break;
- case 1:
- eob_pt =
- aom_read_symbol(r, ec_ctx->eob_flag_cdf32[plane_type][eob_multi_ctx],
- 6, ACCT_STR) +
- 1;
- break;
- case 2:
- eob_pt =
- aom_read_symbol(r, ec_ctx->eob_flag_cdf64[plane_type][eob_multi_ctx],
- 7, ACCT_STR) +
- 1;
- break;
- case 3:
- eob_pt =
- aom_read_symbol(r, ec_ctx->eob_flag_cdf128[plane_type][eob_multi_ctx],
- 8, ACCT_STR) +
- 1;
- break;
- case 4:
- eob_pt =
- aom_read_symbol(r, ec_ctx->eob_flag_cdf256[plane_type][eob_multi_ctx],
- 9, ACCT_STR) +
- 1;
- break;
- case 5:
- eob_pt =
- aom_read_symbol(r, ec_ctx->eob_flag_cdf512[plane_type][eob_multi_ctx],
- 10, ACCT_STR) +
- 1;
- break;
- case 6:
- default:
- eob_pt = aom_read_symbol(
- r, ec_ctx->eob_flag_cdf1024[plane_type][eob_multi_ctx], 11,
- ACCT_STR) +
- 1;
- break;
- }
-
- if (k_eob_offset_bits[eob_pt] > 0) {
- const int eob_ctx = eob_pt - 3;
- int bit = aom_read_symbol(
- r, ec_ctx->eob_extra_cdf[txs_ctx][plane_type][eob_ctx], 2, ACCT_STR);
- if (bit) {
- eob_extra += (1 << (k_eob_offset_bits[eob_pt] - 1));
- }
-
- for (int i = 1; i < k_eob_offset_bits[eob_pt]; i++) {
- bit = aom_read_bit(r, ACCT_STR);
- if (bit) {
- eob_extra += (1 << (k_eob_offset_bits[eob_pt] - 1 - i));
- }
- }
- }
- *eob = rec_eob_pos(eob_pt, eob_extra);
-
- {
- // Read the non-zero coefficient with scan index eob-1
- // TODO(angiebird): Put this into a function
- const int c = *eob - 1;
- const int pos = scan[c];
- const int coeff_ctx = get_lower_levels_ctx_eob(bwl, height, c);
- const int nsymbs = 3;
- aom_cdf_prob *cdf =
- ec_ctx->coeff_base_eob_cdf[txs_ctx][plane_type][coeff_ctx];
- int level = aom_read_symbol(r, cdf, nsymbs, ACCT_STR) + 1;
- if (level > NUM_BASE_LEVELS) {
- const int br_ctx = get_br_ctx(levels, pos, bwl, tx_class);
- for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) {
- const int k = aom_read_symbol(
- r,
- ec_ctx->coeff_br_cdf[AOMMIN(txs_ctx, TX_32X32)][plane_type][br_ctx],
- BR_CDF_SIZE, ACCT_STR);
- level += k;
- if (k < BR_CDF_SIZE - 1) break;
- }
- }
- levels[get_padded_idx(pos, bwl)] = level;
- }
- if (*eob > 1) {
- base_cdf_arr base_cdf = ec_ctx->coeff_base_cdf[txs_ctx][plane_type];
- br_cdf_arr br_cdf =
- ec_ctx->coeff_br_cdf[AOMMIN(txs_ctx, TX_32X32)][plane_type];
- if (tx_class == TX_CLASS_2D) {
- read_coeffs_reverse_2d(r, tx_size, 1, *eob - 1 - 1, scan, bwl, levels,
- base_cdf, br_cdf);
- read_coeffs_reverse(r, tx_size, tx_class, 0, 0, scan, bwl, levels,
- base_cdf, br_cdf);
- } else {
- read_coeffs_reverse(r, tx_size, tx_class, 0, *eob - 1 - 1, scan, bwl,
- levels, base_cdf, br_cdf);
- }
- }
-
- int16_t num_zero_coeffs = 0;
- for (int c = 0; c < *eob; ++c) {
- const int pos = scan[c];
- num_zero_coeffs = AOMMAX(num_zero_coeffs, pos);
- }
- memset(tcoeffs, 0, (num_zero_coeffs + 1) * sizeof(tcoeffs[0]));
-
- for (int c = 0; c < *eob; ++c) {
- const int pos = scan[c];
- uint8_t sign;
- tran_low_t level = levels[get_padded_idx(pos, bwl)];
- if (level) {
- *max_scan_line = AOMMAX(*max_scan_line, pos);
- if (c == 0) {
- const int dc_sign_ctx = txb_ctx->dc_sign_ctx;
- sign = aom_read_symbol(r, ec_ctx->dc_sign_cdf[plane_type][dc_sign_ctx],
- 2, ACCT_STR);
- } else {
- sign = aom_read_bit(r, ACCT_STR);
- }
- if (level >= MAX_BASE_BR_RANGE) {
- level += read_golomb(xd, r);
- }
-
- if (c == 0) dc_val = sign ? -level : level;
-
- // Bitmasking to clamp level to valid range:
- // The valid range for 8/10/12 bit vdieo is at most 14/16/18 bit
- level &= 0xfffff;
- cul_level += level;
- tran_low_t dq_coeff;
- // Bitmasking to clamp dq_coeff to valid range:
- // The valid range for 8/10/12 bit video is at most 17/19/21 bit
- dq_coeff = (tran_low_t)(
- (int64_t)level * get_dqv(dequant, scan[c], iqmatrix) & 0xffffff);
- dq_coeff = dq_coeff >> shift;
- if (sign) {
- dq_coeff = -dq_coeff;
- }
- tcoeffs[pos] = clamp(dq_coeff, min_value, max_value);
- }
- }
-
- cul_level = AOMMIN(COEFF_CONTEXT_MASK, cul_level);
-
- // DC value
- set_dc_sign(&cul_level, dc_val);
-
- return cul_level;
-}
-
-void av1_read_coeffs_txb_facade(const AV1_COMMON *const cm,
- MACROBLOCKD *const xd, aom_reader *const r,
- const int plane, const int row, const int col,
- const TX_SIZE tx_size) {
-#if TXCOEFF_TIMER
- struct aom_usec_timer timer;
- aom_usec_timer_start(&timer);
-#endif
- MB_MODE_INFO *const mbmi = xd->mi[0];
- struct macroblockd_plane *const pd = &xd->plane[plane];
-
- const BLOCK_SIZE bsize = mbmi->sb_type;
- const BLOCK_SIZE plane_bsize =
- get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
-
- TXB_CTX txb_ctx;
- get_txb_ctx(plane_bsize, tx_size, plane, pd->above_context + col,
- pd->left_context + row, &txb_ctx);
- const uint8_t cul_level =
- av1_read_coeffs_txb(cm, xd, r, row, col, plane, &txb_ctx, tx_size);
- av1_set_contexts(xd, pd, plane, plane_bsize, tx_size, cul_level, col, row);
-
- if (is_inter_block(mbmi)) {
- PLANE_TYPE plane_type = get_plane_type(plane);
- // tx_type will be read out in av1_read_coeffs_txb_facade
- const TX_TYPE tx_type = av1_get_tx_type(plane_type, xd, row, col, tx_size,
- cm->reduced_tx_set_used);
-
- if (plane == 0)
- update_txk_array(mbmi->txk_type, mbmi->sb_type, row, col, tx_size,
- tx_type);
- }
-
-#if TXCOEFF_TIMER
- aom_usec_timer_mark(&timer);
- const int64_t elapsed_time = aom_usec_timer_elapsed(&timer);
- cm->txcoeff_timer += elapsed_time;
- ++cm->txb_count;
-#endif
-}
diff --git a/third_party/aom/av1/decoder/decodetxb.h b/third_party/aom/av1/decoder/decodetxb.h
deleted file mode 100644
index fe04f6abd..000000000
--- a/third_party/aom/av1/decoder/decodetxb.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_DECODER_DECODETXB_H_
-#define AOM_AV1_DECODER_DECODETXB_H_
-
-#include "config/aom_config.h"
-
-#include "av1/common/blockd.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/common/txb_common.h"
-#include "aom_dsp/bitreader.h"
-
-uint8_t av1_read_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *const xd,
- aom_reader *const r, const int blk_row,
- const int blk_col, const int plane,
- const TXB_CTX *const txb_ctx,
- const TX_SIZE tx_size);
-
-void av1_read_coeffs_txb_facade(const AV1_COMMON *const cm,
- MACROBLOCKD *const xd, aom_reader *const r,
- const int plane, const int row, const int col,
- const TX_SIZE tx_size);
-#endif // AOM_AV1_DECODER_DECODETXB_H_
diff --git a/third_party/aom/av1/decoder/detokenize.c b/third_party/aom/av1/decoder/detokenize.c
deleted file mode 100644
index 9d54bd13d..000000000
--- a/third_party/aom/av1/decoder/detokenize.c
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "config/aom_config.h"
-
-#include "aom_mem/aom_mem.h"
-#include "aom_ports/mem.h"
-#include "av1/common/blockd.h"
-#include "av1/decoder/detokenize.h"
-
-#define ACCT_STR __func__
-
-#include "av1/common/common.h"
-#include "av1/common/entropy.h"
-#include "av1/common/idct.h"
-
-static void decode_color_map_tokens(Av1ColorMapParam *param, aom_reader *r) {
- uint8_t color_order[PALETTE_MAX_SIZE];
- const int n = param->n_colors;
- uint8_t *const color_map = param->color_map;
- MapCdf color_map_cdf = param->map_cdf;
- int plane_block_width = param->plane_width;
- int plane_block_height = param->plane_height;
- int rows = param->rows;
- int cols = param->cols;
-
- // The first color index.
- color_map[0] = av1_read_uniform(r, n);
- assert(color_map[0] < n);
-
- // Run wavefront on the palette map index decoding.
- for (int i = 1; i < rows + cols - 1; ++i) {
- for (int j = AOMMIN(i, cols - 1); j >= AOMMAX(0, i - rows + 1); --j) {
- const int color_ctx = av1_get_palette_color_index_context(
- color_map, plane_block_width, (i - j), j, n, color_order, NULL);
- const int color_idx = aom_read_symbol(
- r, color_map_cdf[n - PALETTE_MIN_SIZE][color_ctx], n, ACCT_STR);
- assert(color_idx >= 0 && color_idx < n);
- color_map[(i - j) * plane_block_width + j] = color_order[color_idx];
- }
- }
- // Copy last column to extra columns.
- if (cols < plane_block_width) {
- for (int i = 0; i < rows; ++i) {
- memset(color_map + i * plane_block_width + cols,
- color_map[i * plane_block_width + cols - 1],
- (plane_block_width - cols));
- }
- }
- // Copy last row to extra rows.
- for (int i = rows; i < plane_block_height; ++i) {
- memcpy(color_map + i * plane_block_width,
- color_map + (rows - 1) * plane_block_width, plane_block_width);
- }
-}
-
-void av1_decode_palette_tokens(MACROBLOCKD *const xd, int plane,
- aom_reader *r) {
- assert(plane == 0 || plane == 1);
- Av1ColorMapParam params;
- params.color_map =
- xd->plane[plane].color_index_map + xd->color_index_map_offset[plane];
- params.map_cdf = plane ? xd->tile_ctx->palette_uv_color_index_cdf
- : xd->tile_ctx->palette_y_color_index_cdf;
- const MB_MODE_INFO *const mbmi = xd->mi[0];
- params.n_colors = mbmi->palette_mode_info.palette_size[plane];
- av1_get_block_dimensions(mbmi->sb_type, plane, xd, &params.plane_width,
- &params.plane_height, &params.rows, &params.cols);
- decode_color_map_tokens(&params, r);
-}
diff --git a/third_party/aom/av1/decoder/detokenize.h b/third_party/aom/av1/decoder/detokenize.h
deleted file mode 100644
index 173b437a9..000000000
--- a/third_party/aom/av1/decoder/detokenize.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_DECODER_DETOKENIZE_H_
-#define AOM_AV1_DECODER_DETOKENIZE_H_
-
-#include "config/aom_config.h"
-
-#include "av1/common/scan.h"
-#include "av1/decoder/decoder.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void av1_decode_palette_tokens(MACROBLOCKD *const xd, int plane, aom_reader *r);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-#endif // AOM_AV1_DECODER_DETOKENIZE_H_
diff --git a/third_party/aom/av1/decoder/dthread.c b/third_party/aom/av1/decoder/dthread.c
deleted file mode 100644
index 3946c787a..000000000
--- a/third_party/aom/av1/decoder/dthread.c
+++ /dev/null
@@ -1,192 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "config/aom_config.h"
-
-#include "aom_mem/aom_mem.h"
-#include "av1/common/reconinter.h"
-#include "av1/decoder/dthread.h"
-#include "av1/decoder/decoder.h"
-
-// #define DEBUG_THREAD
-
-// TODO(hkuang): Clean up all the #ifdef in this file.
-void av1_frameworker_lock_stats(AVxWorker *const worker) {
-#if CONFIG_MULTITHREAD
- FrameWorkerData *const worker_data = worker->data1;
- pthread_mutex_lock(&worker_data->stats_mutex);
-#else
- (void)worker;
-#endif
-}
-
-void av1_frameworker_unlock_stats(AVxWorker *const worker) {
-#if CONFIG_MULTITHREAD
- FrameWorkerData *const worker_data = worker->data1;
- pthread_mutex_unlock(&worker_data->stats_mutex);
-#else
- (void)worker;
-#endif
-}
-
-void av1_frameworker_signal_stats(AVxWorker *const worker) {
-#if CONFIG_MULTITHREAD
- FrameWorkerData *const worker_data = worker->data1;
-
-// TODO(hkuang): Fix the pthread_cond_broadcast in windows wrapper.
-#if defined(_WIN32) && !HAVE_PTHREAD_H
- pthread_cond_signal(&worker_data->stats_cond);
-#else
- pthread_cond_broadcast(&worker_data->stats_cond);
-#endif
-
-#else
- (void)worker;
-#endif
-}
-
-// This macro prevents thread_sanitizer from reporting known concurrent writes.
-#if defined(__has_feature)
-#if __has_feature(thread_sanitizer)
-#define BUILDING_WITH_TSAN
-#endif
-#endif
-
-// TODO(hkuang): Remove worker parameter as it is only used in debug code.
-void av1_frameworker_wait(AVxWorker *const worker, RefCntBuffer *const ref_buf,
- int row) {
-#if CONFIG_MULTITHREAD
- if (!ref_buf) return;
-
-#ifndef BUILDING_WITH_TSAN
- // The following line of code will get harmless tsan error but it is the key
- // to get best performance.
- if (ref_buf->row >= row && ref_buf->buf.corrupted != 1) return;
-#endif
-
- {
- // Find the worker thread that owns the reference frame. If the reference
- // frame has been fully decoded, it may not have owner.
- AVxWorker *const ref_worker = ref_buf->frame_worker_owner;
- FrameWorkerData *const ref_worker_data =
- (FrameWorkerData *)ref_worker->data1;
- const AV1Decoder *const pbi = ref_worker_data->pbi;
-
-#ifdef DEBUG_THREAD
- {
- FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1;
- printf("%d %p worker is waiting for %d %p worker (%d) ref %d \r\n",
- worker_data->worker_id, worker, ref_worker_data->worker_id,
- ref_buf->frame_worker_owner, row, ref_buf->row);
- }
-#endif
-
- av1_frameworker_lock_stats(ref_worker);
- while (ref_buf->row < row && pbi->cur_buf == ref_buf &&
- ref_buf->buf.corrupted != 1) {
- pthread_cond_wait(&ref_worker_data->stats_cond,
- &ref_worker_data->stats_mutex);
- }
-
- if (ref_buf->buf.corrupted == 1) {
- FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1;
- av1_frameworker_unlock_stats(ref_worker);
- aom_internal_error(&worker_data->pbi->common.error,
- AOM_CODEC_CORRUPT_FRAME,
- "Worker %p failed to decode frame", worker);
- }
- av1_frameworker_unlock_stats(ref_worker);
- }
-#else
- (void)worker;
- (void)ref_buf;
- (void)row;
- (void)ref_buf;
-#endif // CONFIG_MULTITHREAD
-}
-
-void av1_frameworker_broadcast(RefCntBuffer *const buf, int row) {
-#if CONFIG_MULTITHREAD
- AVxWorker *worker = buf->frame_worker_owner;
-
-#ifdef DEBUG_THREAD
- {
- FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1;
- printf("%d %p worker decode to (%d) \r\n", worker_data->worker_id,
- buf->frame_worker_owner, row);
- }
-#endif
-
- av1_frameworker_lock_stats(worker);
- buf->row = row;
- av1_frameworker_signal_stats(worker);
- av1_frameworker_unlock_stats(worker);
-#else
- (void)buf;
- (void)row;
-#endif // CONFIG_MULTITHREAD
-}
-
-void av1_frameworker_copy_context(AVxWorker *const dst_worker,
- AVxWorker *const src_worker) {
-#if CONFIG_MULTITHREAD
- FrameWorkerData *const src_worker_data = (FrameWorkerData *)src_worker->data1;
- FrameWorkerData *const dst_worker_data = (FrameWorkerData *)dst_worker->data1;
- AV1_COMMON *const src_cm = &src_worker_data->pbi->common;
- AV1_COMMON *const dst_cm = &dst_worker_data->pbi->common;
- int i;
-
- // Wait until source frame's context is ready.
- av1_frameworker_lock_stats(src_worker);
- while (!src_worker_data->frame_context_ready) {
- pthread_cond_wait(&src_worker_data->stats_cond,
- &src_worker_data->stats_mutex);
- }
-
- dst_cm->last_frame_seg_map = src_cm->seg.enabled
- ? src_cm->current_frame_seg_map
- : src_cm->last_frame_seg_map;
- dst_worker_data->pbi->need_resync = src_worker_data->pbi->need_resync;
- av1_frameworker_unlock_stats(src_worker);
-
- dst_cm->seq_params.bit_depth = src_cm->seq_params.bit_depth;
- dst_cm->seq_params.use_highbitdepth = src_cm->seq_params.use_highbitdepth;
- // TODO(zoeliu): To handle parallel decoding
- dst_cm->prev_frame =
- src_cm->show_existing_frame ? src_cm->prev_frame : src_cm->cur_frame;
- dst_cm->last_width =
- !src_cm->show_existing_frame ? src_cm->width : src_cm->last_width;
- dst_cm->last_height =
- !src_cm->show_existing_frame ? src_cm->height : src_cm->last_height;
- dst_cm->seq_params.subsampling_x = src_cm->seq_params.subsampling_x;
- dst_cm->seq_params.subsampling_y = src_cm->seq_params.subsampling_y;
- dst_cm->frame_type = src_cm->frame_type;
- dst_cm->last_show_frame = !src_cm->show_existing_frame
- ? src_cm->show_frame
- : src_cm->last_show_frame;
- for (i = 0; i < REF_FRAMES; ++i)
- dst_cm->ref_frame_map[i] = src_cm->next_ref_frame_map[i];
-
- memcpy(dst_cm->lf_info.lfthr, src_cm->lf_info.lfthr,
- (MAX_LOOP_FILTER + 1) * sizeof(loop_filter_thresh));
- dst_cm->lf.sharpness_level = src_cm->lf.sharpness_level;
- dst_cm->lf.filter_level[0] = src_cm->lf.filter_level[0];
- dst_cm->lf.filter_level[1] = src_cm->lf.filter_level[1];
- memcpy(dst_cm->lf.ref_deltas, src_cm->lf.ref_deltas, REF_FRAMES);
- memcpy(dst_cm->lf.mode_deltas, src_cm->lf.mode_deltas, MAX_MODE_LF_DELTAS);
- dst_cm->seg = src_cm->seg;
- memcpy(dst_cm->frame_contexts, src_cm->frame_contexts,
- FRAME_CONTEXTS * sizeof(dst_cm->frame_contexts[0]));
-#else
- (void)dst_worker;
- (void)src_worker;
-#endif // CONFIG_MULTITHREAD
-}
diff --git a/third_party/aom/av1/decoder/dthread.h b/third_party/aom/av1/decoder/dthread.h
deleted file mode 100644
index 1d264b07e..000000000
--- a/third_party/aom/av1/decoder/dthread.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_DECODER_DTHREAD_H_
-#define AOM_AV1_DECODER_DTHREAD_H_
-
-#include "config/aom_config.h"
-
-#include "aom_util/aom_thread.h"
-#include "aom/internal/aom_codec_internal.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct AV1Common;
-struct AV1Decoder;
-struct ThreadData;
-
-typedef struct DecWorkerData {
- struct ThreadData *td;
- const uint8_t *data_end;
- struct aom_internal_error_info error_info;
-} DecWorkerData;
-
-// WorkerData for the FrameWorker thread. It contains all the information of
-// the worker and decode structures for decoding a frame.
-typedef struct FrameWorkerData {
- struct AV1Decoder *pbi;
- const uint8_t *data;
- const uint8_t *data_end;
- size_t data_size;
- void *user_priv;
- int worker_id;
- int received_frame;
-
- // scratch_buffer is used in frame parallel mode only.
- // It is used to make a copy of the compressed data.
- uint8_t *scratch_buffer;
- size_t scratch_buffer_size;
-
-#if CONFIG_MULTITHREAD
- pthread_mutex_t stats_mutex;
- pthread_cond_t stats_cond;
-#endif
-
- int frame_context_ready; // Current frame's context is ready to read.
- int frame_decoded; // Finished decoding current frame.
-} FrameWorkerData;
-
-void av1_frameworker_lock_stats(AVxWorker *const worker);
-void av1_frameworker_unlock_stats(AVxWorker *const worker);
-void av1_frameworker_signal_stats(AVxWorker *const worker);
-
-// Wait until ref_buf has been decoded to row in real pixel unit.
-// Note: worker may already finish decoding ref_buf and release it in order to
-// start decoding next frame. So need to check whether worker is still decoding
-// ref_buf.
-void av1_frameworker_wait(AVxWorker *const worker, RefCntBuffer *const ref_buf,
- int row);
-
-// FrameWorker broadcasts its decoding progress so other workers that are
-// waiting on it can resume decoding.
-void av1_frameworker_broadcast(RefCntBuffer *const buf, int row);
-
-// Copy necessary decoding context from src worker to dst worker.
-void av1_frameworker_copy_context(AVxWorker *const dst_worker,
- AVxWorker *const src_worker);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_DECODER_DTHREAD_H_
diff --git a/third_party/aom/av1/decoder/inspection.c b/third_party/aom/av1/decoder/inspection.c
deleted file mode 100644
index e6c89298a..000000000
--- a/third_party/aom/av1/decoder/inspection.c
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#include "av1/decoder/decoder.h"
-#include "av1/decoder/inspection.h"
-#include "av1/common/enums.h"
-#include "av1/common/cdef.h"
-
-static void ifd_init_mi_rc(insp_frame_data *fd, int mi_cols, int mi_rows) {
- fd->mi_cols = mi_cols;
- fd->mi_rows = mi_rows;
- fd->mi_grid = (insp_mi_data *)aom_malloc(sizeof(insp_mi_data) * fd->mi_rows *
- fd->mi_cols);
-}
-
-void ifd_init(insp_frame_data *fd, int frame_width, int frame_height) {
- int mi_cols = ALIGN_POWER_OF_TWO(frame_width, 3) >> MI_SIZE_LOG2;
- int mi_rows = ALIGN_POWER_OF_TWO(frame_height, 3) >> MI_SIZE_LOG2;
- ifd_init_mi_rc(fd, mi_cols, mi_rows);
-}
-
-void ifd_clear(insp_frame_data *fd) {
- aom_free(fd->mi_grid);
- fd->mi_grid = NULL;
-}
-
-/* TODO(negge) This function may be called by more than one thread when using
- a multi-threaded decoder and this may cause a data race. */
-int ifd_inspect(insp_frame_data *fd, void *decoder) {
- struct AV1Decoder *pbi = (struct AV1Decoder *)decoder;
- AV1_COMMON *const cm = &pbi->common;
- if (fd->mi_rows != cm->mi_rows || fd->mi_cols != cm->mi_cols) {
- ifd_clear(fd);
- ifd_init_mi_rc(fd, cm->mi_rows, cm->mi_cols);
- }
- fd->show_frame = cm->show_frame;
- fd->frame_type = cm->frame_type;
- fd->base_qindex = cm->base_qindex;
- // Set width and height of the first tile until generic support can be added
- TileInfo tile_info;
- av1_tile_set_row(&tile_info, cm, 0);
- av1_tile_set_col(&tile_info, cm, 0);
- fd->tile_mi_cols = tile_info.mi_col_end - tile_info.mi_col_start;
- fd->tile_mi_rows = tile_info.mi_row_end - tile_info.mi_row_start;
- fd->delta_q_present_flag = cm->delta_q_present_flag;
- fd->delta_q_res = cm->delta_q_res;
-#if CONFIG_ACCOUNTING
- fd->accounting = &pbi->accounting;
-#endif
- // TODO(negge): copy per frame CDEF data
- int i, j;
- for (i = 0; i < MAX_SEGMENTS; i++) {
- for (j = 0; j < 2; j++) {
- fd->y_dequant[i][j] = cm->y_dequant_QTX[i][j];
- fd->u_dequant[i][j] = cm->u_dequant_QTX[i][j];
- fd->v_dequant[i][j] = cm->v_dequant_QTX[i][j];
- }
- }
- for (j = 0; j < cm->mi_rows; j++) {
- for (i = 0; i < cm->mi_cols; i++) {
- const MB_MODE_INFO *mbmi = cm->mi_grid_visible[j * cm->mi_stride + i];
- insp_mi_data *mi = &fd->mi_grid[j * cm->mi_cols + i];
- // Segment
- mi->segment_id = mbmi->segment_id;
- // Motion Vectors
- mi->mv[0].row = mbmi->mv[0].as_mv.row;
- mi->mv[0].col = mbmi->mv[0].as_mv.col;
- mi->mv[1].row = mbmi->mv[1].as_mv.row;
- mi->mv[1].col = mbmi->mv[1].as_mv.col;
- // Reference Frames
- mi->ref_frame[0] = mbmi->ref_frame[0];
- mi->ref_frame[1] = mbmi->ref_frame[1];
- // Prediction Mode
- mi->mode = mbmi->mode;
- // Prediction Mode for Chromatic planes
- if (mi->mode < INTRA_MODES) {
- mi->uv_mode = mbmi->uv_mode;
- } else {
- mi->uv_mode = UV_MODE_INVALID;
- }
- // Block Size
- mi->sb_type = mbmi->sb_type;
- // Skip Flag
- mi->skip = mbmi->skip;
- mi->filter[0] = av1_extract_interp_filter(mbmi->interp_filters, 0);
- mi->filter[1] = av1_extract_interp_filter(mbmi->interp_filters, 1);
- mi->dual_filter_type = mi->filter[0] * 3 + mi->filter[1];
- // Transform
- // TODO(anyone): extract tx type info from mbmi->txk_type[].
- mi->tx_type = DCT_DCT;
- mi->tx_size = mbmi->tx_size;
-
- mi->cdef_level =
- cm->cdef_strengths[mbmi->cdef_strength] / CDEF_SEC_STRENGTHS;
- mi->cdef_strength =
- cm->cdef_strengths[mbmi->cdef_strength] % CDEF_SEC_STRENGTHS;
- mi->cdef_strength += mi->cdef_strength == 3;
- if (mbmi->uv_mode == UV_CFL_PRED) {
- mi->cfl_alpha_idx = mbmi->cfl_alpha_idx;
- mi->cfl_alpha_sign = mbmi->cfl_alpha_signs;
- } else {
- mi->cfl_alpha_idx = 0;
- mi->cfl_alpha_sign = 0;
- }
- // delta_q
- mi->current_qindex = mbmi->current_qindex;
- }
- }
- return 1;
-}
diff --git a/third_party/aom/av1/decoder/inspection.h b/third_party/aom/av1/decoder/inspection.h
deleted file mode 100644
index 7214a9bed..000000000
--- a/third_party/aom/av1/decoder/inspection.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#ifndef AOM_AV1_DECODER_INSPECTION_H_
-#define AOM_AV1_DECODER_INSPECTION_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif // __cplusplus
-
-#include "av1/common/seg_common.h"
-#if CONFIG_ACCOUNTING
-#include "av1/decoder/accounting.h"
-#endif
-
-#ifndef AOM_AOMDX_H_
-typedef void (*aom_inspect_cb)(void *decoder, void *data);
-#endif
-
-typedef struct insp_mv insp_mv;
-
-struct insp_mv {
- int16_t row;
- int16_t col;
-};
-
-typedef struct insp_mi_data insp_mi_data;
-
-struct insp_mi_data {
- insp_mv mv[2];
- int16_t ref_frame[2];
- int16_t mode;
- int16_t uv_mode;
- int16_t sb_type;
- int16_t skip;
- int16_t segment_id;
- int16_t dual_filter_type;
- int16_t filter[2];
- int16_t tx_type;
- int16_t tx_size;
- int16_t cdef_level;
- int16_t cdef_strength;
- int16_t cfl_alpha_idx;
- int16_t cfl_alpha_sign;
- int16_t current_qindex;
-};
-
-typedef struct insp_frame_data insp_frame_data;
-
-struct insp_frame_data {
-#if CONFIG_ACCOUNTING
- Accounting *accounting;
-#endif
- insp_mi_data *mi_grid;
- int show_frame;
- int frame_type;
- int base_qindex;
- int mi_rows;
- int mi_cols;
- int tile_mi_rows;
- int tile_mi_cols;
- int16_t y_dequant[MAX_SEGMENTS][2];
- int16_t u_dequant[MAX_SEGMENTS][2];
- int16_t v_dequant[MAX_SEGMENTS][2];
- // TODO(negge): add per frame CDEF data
- int delta_q_present_flag;
- int delta_q_res;
-};
-
-void ifd_init(insp_frame_data *fd, int frame_width, int frame_height);
-void ifd_clear(insp_frame_data *fd);
-int ifd_inspect(insp_frame_data *fd, void *decoder);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif // __cplusplus
-#endif // AOM_AV1_DECODER_INSPECTION_H_
diff --git a/third_party/aom/av1/decoder/obu.c b/third_party/aom/av1/decoder/obu.c
deleted file mode 100644
index 44ecf818e..000000000
--- a/third_party/aom/av1/decoder/obu.c
+++ /dev/null
@@ -1,839 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-
-#include "config/aom_config.h"
-
-#include "aom/aom_codec.h"
-#include "aom_dsp/bitreader_buffer.h"
-#include "aom_ports/mem_ops.h"
-
-#include "av1/common/common.h"
-#include "av1/common/obu_util.h"
-#include "av1/common/timing.h"
-#include "av1/decoder/decoder.h"
-#include "av1/decoder/decodeframe.h"
-#include "av1/decoder/obu.h"
-
-// Picture prediction structures (0-12 are predefined) in scalability metadata.
-typedef enum {
- SCALABILITY_L1T2 = 0,
- SCALABILITY_L1T3 = 1,
- SCALABILITY_L2T1 = 2,
- SCALABILITY_L2T2 = 3,
- SCALABILITY_L2T3 = 4,
- SCALABILITY_S2T1 = 5,
- SCALABILITY_S2T2 = 6,
- SCALABILITY_S2T3 = 7,
- SCALABILITY_L2T1h = 8,
- SCALABILITY_L2T2h = 9,
- SCALABILITY_L2T3h = 10,
- SCALABILITY_S2T1h = 11,
- SCALABILITY_S2T2h = 12,
- SCALABILITY_S2T3h = 13,
- SCALABILITY_SS = 14
-} SCALABILITY_STRUCTURES;
-
-aom_codec_err_t aom_get_num_layers_from_operating_point_idc(
- int operating_point_idc, unsigned int *number_spatial_layers,
- unsigned int *number_temporal_layers) {
- // derive number of spatial/temporal layers from operating_point_idc
-
- if (!number_spatial_layers || !number_temporal_layers)
- return AOM_CODEC_INVALID_PARAM;
-
- if (operating_point_idc == 0) {
- *number_temporal_layers = 1;
- *number_spatial_layers = 1;
- } else {
- *number_spatial_layers = 0;
- *number_temporal_layers = 0;
- for (int j = 0; j < MAX_NUM_SPATIAL_LAYERS; j++) {
- *number_spatial_layers +=
- (operating_point_idc >> (j + MAX_NUM_TEMPORAL_LAYERS)) & 0x1;
- }
- for (int j = 0; j < MAX_NUM_TEMPORAL_LAYERS; j++) {
- *number_temporal_layers += (operating_point_idc >> j) & 0x1;
- }
- }
-
- return AOM_CODEC_OK;
-}
-
-static int is_obu_in_current_operating_point(AV1Decoder *pbi,
- ObuHeader obu_header) {
- if (!pbi->current_operating_point) {
- return 1;
- }
-
- if ((pbi->current_operating_point >> obu_header.temporal_layer_id) & 0x1 &&
- (pbi->current_operating_point >> (obu_header.spatial_layer_id + 8)) &
- 0x1) {
- return 1;
- }
- return 0;
-}
-
-static int byte_alignment(AV1_COMMON *const cm,
- struct aom_read_bit_buffer *const rb) {
- while (rb->bit_offset & 7) {
- if (aom_rb_read_bit(rb)) {
- cm->error.error_code = AOM_CODEC_CORRUPT_FRAME;
- return -1;
- }
- }
- return 0;
-}
-
-static uint32_t read_temporal_delimiter_obu() { return 0; }
-
-// Returns a boolean that indicates success.
-static int read_bitstream_level(BitstreamLevel *bl,
- struct aom_read_bit_buffer *rb) {
- const uint8_t seq_level_idx = aom_rb_read_literal(rb, LEVEL_BITS);
- if (!is_valid_seq_level_idx(seq_level_idx)) return 0;
- bl->major = (seq_level_idx >> LEVEL_MINOR_BITS) + LEVEL_MAJOR_MIN;
- bl->minor = seq_level_idx & ((1 << LEVEL_MINOR_BITS) - 1);
- return 1;
-}
-
-// Returns whether two sequence headers are consistent with each other.
-// TODO(huisu,wtc@google.com): make sure the code matches the spec exactly.
-static int are_seq_headers_consistent(const SequenceHeader *seq_params_old,
- const SequenceHeader *seq_params_new) {
- return !memcmp(seq_params_old, seq_params_new, sizeof(SequenceHeader));
-}
-
-// On success, sets pbi->sequence_header_ready to 1 and returns the number of
-// bytes read from 'rb'.
-// On failure, sets pbi->common.error.error_code and returns 0.
-static uint32_t read_sequence_header_obu(AV1Decoder *pbi,
- struct aom_read_bit_buffer *rb) {
- AV1_COMMON *const cm = &pbi->common;
- const uint32_t saved_bit_offset = rb->bit_offset;
-
- // Verify rb has been configured to report errors.
- assert(rb->error_handler);
-
- // Use a local variable to store the information as we decode. At the end,
- // if no errors have occurred, cm->seq_params is updated.
- SequenceHeader sh = cm->seq_params;
- SequenceHeader *const seq_params = &sh;
-
- seq_params->profile = av1_read_profile(rb);
- if (seq_params->profile > CONFIG_MAX_DECODE_PROFILE) {
- cm->error.error_code = AOM_CODEC_UNSUP_BITSTREAM;
- return 0;
- }
-
- // Still picture or not
- seq_params->still_picture = aom_rb_read_bit(rb);
- seq_params->reduced_still_picture_hdr = aom_rb_read_bit(rb);
- // Video must have reduced_still_picture_hdr = 0
- if (!seq_params->still_picture && seq_params->reduced_still_picture_hdr) {
- cm->error.error_code = AOM_CODEC_UNSUP_BITSTREAM;
- return 0;
- }
-
- if (seq_params->reduced_still_picture_hdr) {
- cm->timing_info_present = 0;
- seq_params->decoder_model_info_present_flag = 0;
- seq_params->display_model_info_present_flag = 0;
- seq_params->operating_points_cnt_minus_1 = 0;
- seq_params->operating_point_idc[0] = 0;
- if (!read_bitstream_level(&seq_params->level[0], rb)) {
- cm->error.error_code = AOM_CODEC_UNSUP_BITSTREAM;
- return 0;
- }
- seq_params->tier[0] = 0;
- cm->op_params[0].decoder_model_param_present_flag = 0;
- cm->op_params[0].display_model_param_present_flag = 0;
- } else {
- cm->timing_info_present = aom_rb_read_bit(rb); // timing_info_present_flag
- if (cm->timing_info_present) {
- av1_read_timing_info_header(cm, rb);
-
- seq_params->decoder_model_info_present_flag = aom_rb_read_bit(rb);
- if (seq_params->decoder_model_info_present_flag)
- av1_read_decoder_model_info(cm, rb);
- } else {
- seq_params->decoder_model_info_present_flag = 0;
- }
- seq_params->display_model_info_present_flag = aom_rb_read_bit(rb);
- seq_params->operating_points_cnt_minus_1 =
- aom_rb_read_literal(rb, OP_POINTS_CNT_MINUS_1_BITS);
- for (int i = 0; i < seq_params->operating_points_cnt_minus_1 + 1; i++) {
- seq_params->operating_point_idc[i] =
- aom_rb_read_literal(rb, OP_POINTS_IDC_BITS);
- if (!read_bitstream_level(&seq_params->level[i], rb)) {
- cm->error.error_code = AOM_CODEC_UNSUP_BITSTREAM;
- return 0;
- }
- // This is the seq_level_idx[i] > 7 check in the spec. seq_level_idx 7
- // is equivalent to level 3.3.
- if (seq_params->level[i].major > 3)
- seq_params->tier[i] = aom_rb_read_bit(rb);
- else
- seq_params->tier[i] = 0;
- if (seq_params->decoder_model_info_present_flag) {
- cm->op_params[i].decoder_model_param_present_flag = aom_rb_read_bit(rb);
- if (cm->op_params[i].decoder_model_param_present_flag)
- av1_read_op_parameters_info(cm, rb, i);
- } else {
- cm->op_params[i].decoder_model_param_present_flag = 0;
- }
- if (cm->timing_info_present &&
- (cm->timing_info.equal_picture_interval ||
- cm->op_params[i].decoder_model_param_present_flag)) {
- cm->op_params[i].bitrate = max_level_bitrate(
- seq_params->profile,
- major_minor_to_seq_level_idx(seq_params->level[i]),
- seq_params->tier[i]);
- // Level with seq_level_idx = 31 returns a high "dummy" bitrate to pass
- // the check
- if (cm->op_params[i].bitrate == 0)
- aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
- "AV1 does not support this combination of "
- "profile, level, and tier.");
- // Buffer size in bits/s is bitrate in bits/s * 1 s
- cm->op_params[i].buffer_size = cm->op_params[i].bitrate;
- }
- if (cm->timing_info_present && cm->timing_info.equal_picture_interval &&
- !cm->op_params[i].decoder_model_param_present_flag) {
- // When the decoder_model_parameters are not sent for this op, set
- // the default ones that can be used with the resource availability mode
- cm->op_params[i].decoder_buffer_delay = 70000;
- cm->op_params[i].encoder_buffer_delay = 20000;
- cm->op_params[i].low_delay_mode_flag = 0;
- }
-
- if (seq_params->display_model_info_present_flag) {
- cm->op_params[i].display_model_param_present_flag = aom_rb_read_bit(rb);
- if (cm->op_params[i].display_model_param_present_flag) {
- cm->op_params[i].initial_display_delay =
- aom_rb_read_literal(rb, 4) + 1;
- if (cm->op_params[i].initial_display_delay > 10)
- aom_internal_error(
- &cm->error, AOM_CODEC_UNSUP_BITSTREAM,
- "AV1 does not support more than 10 decoded frames delay");
- } else {
- cm->op_params[i].initial_display_delay = 10;
- }
- } else {
- cm->op_params[i].display_model_param_present_flag = 0;
- cm->op_params[i].initial_display_delay = 10;
- }
- }
- }
- // This decoder supports all levels. Choose operating point provided by
- // external means
- int operating_point = pbi->operating_point;
- if (operating_point < 0 ||
- operating_point > seq_params->operating_points_cnt_minus_1)
- operating_point = 0;
- pbi->current_operating_point =
- seq_params->operating_point_idc[operating_point];
- if (aom_get_num_layers_from_operating_point_idc(
- pbi->current_operating_point, &cm->number_spatial_layers,
- &cm->number_temporal_layers) != AOM_CODEC_OK) {
- cm->error.error_code = AOM_CODEC_ERROR;
- return 0;
- }
-
- av1_read_sequence_header(cm, rb, seq_params);
-
- av1_read_color_config(rb, pbi->allow_lowbitdepth, seq_params, &cm->error);
- if (!(seq_params->subsampling_x == 0 && seq_params->subsampling_y == 0) &&
- !(seq_params->subsampling_x == 1 && seq_params->subsampling_y == 1) &&
- !(seq_params->subsampling_x == 1 && seq_params->subsampling_y == 0)) {
- aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
- "Only 4:4:4, 4:2:2 and 4:2:0 are currently supported, "
- "%d %d subsampling is not supported.\n",
- seq_params->subsampling_x, seq_params->subsampling_y);
- }
-
- seq_params->film_grain_params_present = aom_rb_read_bit(rb);
-
- if (av1_check_trailing_bits(pbi, rb) != 0) {
- // cm->error.error_code is already set.
- return 0;
- }
-
- // If a sequence header has been decoded before, we check if the new
- // one is consistent with the old one.
- if (pbi->sequence_header_ready) {
- if (!are_seq_headers_consistent(&cm->seq_params, seq_params))
- pbi->sequence_header_changed = 1;
- }
-
- cm->seq_params = *seq_params;
- pbi->sequence_header_ready = 1;
-
- return ((rb->bit_offset - saved_bit_offset + 7) >> 3);
-}
-
-// On success, returns the frame header size. On failure, calls
-// aom_internal_error and does not return.
-static uint32_t read_frame_header_obu(AV1Decoder *pbi,
- struct aom_read_bit_buffer *rb,
- const uint8_t *data,
- const uint8_t **p_data_end,
- int trailing_bits_present) {
- return av1_decode_frame_headers_and_setup(pbi, rb, data, p_data_end,
- trailing_bits_present);
-}
-
-static int32_t read_tile_group_header(AV1Decoder *pbi,
- struct aom_read_bit_buffer *rb,
- int *start_tile, int *end_tile,
- int tile_start_implicit) {
- AV1_COMMON *const cm = &pbi->common;
- uint32_t saved_bit_offset = rb->bit_offset;
- int tile_start_and_end_present_flag = 0;
- const int num_tiles = pbi->common.tile_rows * pbi->common.tile_cols;
-
- if (!pbi->common.large_scale_tile && num_tiles > 1) {
- tile_start_and_end_present_flag = aom_rb_read_bit(rb);
- }
- if (pbi->common.large_scale_tile || num_tiles == 1 ||
- !tile_start_and_end_present_flag) {
- *start_tile = 0;
- *end_tile = num_tiles - 1;
- return ((rb->bit_offset - saved_bit_offset + 7) >> 3);
- }
- if (tile_start_implicit && tile_start_and_end_present_flag) {
- aom_internal_error(
- &cm->error, AOM_CODEC_UNSUP_BITSTREAM,
- "For OBU_FRAME type obu tile_start_and_end_present_flag must be 0");
- return -1;
- }
- *start_tile =
- aom_rb_read_literal(rb, cm->log2_tile_rows + cm->log2_tile_cols);
- *end_tile = aom_rb_read_literal(rb, cm->log2_tile_rows + cm->log2_tile_cols);
-
- return ((rb->bit_offset - saved_bit_offset + 7) >> 3);
-}
-
-static uint32_t read_one_tile_group_obu(
- AV1Decoder *pbi, struct aom_read_bit_buffer *rb, int is_first_tg,
- const uint8_t *data, const uint8_t *data_end, const uint8_t **p_data_end,
- int *is_last_tg, int tile_start_implicit) {
- AV1_COMMON *const cm = &pbi->common;
- int start_tile, end_tile;
- int32_t header_size, tg_payload_size;
-
- assert((rb->bit_offset & 7) == 0);
- assert(rb->bit_buffer + aom_rb_bytes_read(rb) == data);
-
- header_size = read_tile_group_header(pbi, rb, &start_tile, &end_tile,
- tile_start_implicit);
- if (header_size == -1 || byte_alignment(cm, rb)) return 0;
- if (start_tile > end_tile) return header_size;
- data += header_size;
- av1_decode_tg_tiles_and_wrapup(pbi, data, data_end, p_data_end, start_tile,
- end_tile, is_first_tg);
-
- tg_payload_size = (uint32_t)(*p_data_end - data);
-
- // TODO(shan): For now, assume all tile groups received in order
- *is_last_tg = end_tile == cm->tile_rows * cm->tile_cols - 1;
- return header_size + tg_payload_size;
-}
-
-static void alloc_tile_list_buffer(AV1Decoder *pbi) {
- // TODO(yunqing): for now, copy each tile's decoded YUV data directly to the
- // output buffer. This needs to be modified according to the application
- // requirement.
- AV1_COMMON *const cm = &pbi->common;
- const int tile_width_in_pixels = cm->tile_width * MI_SIZE;
- const int tile_height_in_pixels = cm->tile_height * MI_SIZE;
- const int ssy = cm->seq_params.subsampling_y;
- const int ssx = cm->seq_params.subsampling_x;
- const int num_planes = av1_num_planes(cm);
- const size_t yplane_tile_size = tile_height_in_pixels * tile_width_in_pixels;
- const size_t uvplane_tile_size =
- (num_planes > 1)
- ? (tile_height_in_pixels >> ssy) * (tile_width_in_pixels >> ssx)
- : 0;
- const size_t tile_size = (cm->seq_params.use_highbitdepth ? 2 : 1) *
- (yplane_tile_size + 2 * uvplane_tile_size);
- pbi->tile_list_size = tile_size * (pbi->tile_count_minus_1 + 1);
-
- if (pbi->tile_list_size > pbi->buffer_sz) {
- if (pbi->tile_list_output != NULL) aom_free(pbi->tile_list_output);
- pbi->tile_list_output = NULL;
-
- pbi->tile_list_output = (uint8_t *)aom_memalign(32, pbi->tile_list_size);
- if (pbi->tile_list_output == NULL)
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
- "Failed to allocate the tile list output buffer");
- pbi->buffer_sz = pbi->tile_list_size;
- }
-}
-
-static void copy_decoded_tile_to_tile_list_buffer(AV1Decoder *pbi,
- uint8_t **output) {
- AV1_COMMON *const cm = &pbi->common;
- const int tile_width_in_pixels = cm->tile_width * MI_SIZE;
- const int tile_height_in_pixels = cm->tile_height * MI_SIZE;
- const int ssy = cm->seq_params.subsampling_y;
- const int ssx = cm->seq_params.subsampling_x;
- const int num_planes = av1_num_planes(cm);
-
- // Copy decoded tile to the tile list output buffer.
- YV12_BUFFER_CONFIG *cur_frame = get_frame_new_buffer(cm);
- const int mi_row = pbi->dec_tile_row * cm->tile_height;
- const int mi_col = pbi->dec_tile_col * cm->tile_width;
- const int is_hbd = (cur_frame->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
- uint8_t *bufs[MAX_MB_PLANE] = { NULL, NULL, NULL };
- int strides[MAX_MB_PLANE] = { 0, 0, 0 };
- int plane;
-
- for (plane = 0; plane < num_planes; ++plane) {
- int shift_x = plane > 0 ? ssx : 0;
- int shift_y = plane > 0 ? ssy : 0;
-
- bufs[plane] = cur_frame->buffers[plane];
- strides[plane] =
- (plane > 0) ? cur_frame->strides[1] : cur_frame->strides[0];
-
- bufs[plane] += mi_row * (MI_SIZE >> shift_y) * strides[plane] +
- mi_col * (MI_SIZE >> shift_x);
-
- if (is_hbd) {
- bufs[plane] = (uint8_t *)CONVERT_TO_SHORTPTR(bufs[plane]);
- strides[plane] *= 2;
- }
-
- int w, h;
- w = (plane > 0 && shift_x > 0) ? ((tile_width_in_pixels + 1) >> shift_x)
- : tile_width_in_pixels;
- w *= (1 + is_hbd);
- h = (plane > 0 && shift_y > 0) ? ((tile_height_in_pixels + 1) >> shift_y)
- : tile_height_in_pixels;
- int j;
-
- for (j = 0; j < h; ++j) {
- memcpy(*output, bufs[plane], w);
- bufs[plane] += strides[plane];
- *output += w;
- }
- }
-}
-
-// Only called while large_scale_tile = 1.
-static uint32_t read_and_decode_one_tile_list(AV1Decoder *pbi,
- struct aom_read_bit_buffer *rb,
- const uint8_t *data,
- const uint8_t *data_end,
- const uint8_t **p_data_end,
- int *frame_decoding_finished) {
- AV1_COMMON *const cm = &pbi->common;
- uint32_t tile_list_payload_size = 0;
- const int num_tiles = cm->tile_cols * cm->tile_rows;
- const int start_tile = 0;
- const int end_tile = num_tiles - 1;
- int i = 0;
-
- // Process the tile list info.
- pbi->output_frame_width_in_tiles_minus_1 = aom_rb_read_literal(rb, 8);
- pbi->output_frame_height_in_tiles_minus_1 = aom_rb_read_literal(rb, 8);
- pbi->tile_count_minus_1 = aom_rb_read_literal(rb, 16);
- if (pbi->tile_count_minus_1 > MAX_TILES - 1) {
- cm->error.error_code = AOM_CODEC_CORRUPT_FRAME;
- return 0;
- }
-
- // Allocate output frame buffer for the tile list.
- alloc_tile_list_buffer(pbi);
-
- uint32_t tile_list_info_bytes = 4;
- tile_list_payload_size += tile_list_info_bytes;
- data += tile_list_info_bytes;
- uint8_t *output = pbi->tile_list_output;
-
- for (i = 0; i <= pbi->tile_count_minus_1; i++) {
- // Process 1 tile.
- // Reset the bit reader.
- rb->bit_offset = 0;
- rb->bit_buffer = data;
-
- // Read out the tile info.
- uint32_t tile_info_bytes = 5;
- // Set reference for each tile.
- int ref_idx = aom_rb_read_literal(rb, 8);
- if (ref_idx >= MAX_EXTERNAL_REFERENCES) {
- cm->error.error_code = AOM_CODEC_CORRUPT_FRAME;
- return 0;
- }
- av1_set_reference_dec(cm, 0, 1, &pbi->ext_refs.refs[ref_idx]);
-
- pbi->dec_tile_row = aom_rb_read_literal(rb, 8);
- pbi->dec_tile_col = aom_rb_read_literal(rb, 8);
- if (pbi->dec_tile_row < 0 || pbi->dec_tile_col < 0 ||
- pbi->dec_tile_row >= cm->tile_rows ||
- pbi->dec_tile_col >= cm->tile_cols) {
- cm->error.error_code = AOM_CODEC_CORRUPT_FRAME;
- return 0;
- }
-
- pbi->coded_tile_data_size = aom_rb_read_literal(rb, 16) + 1;
- data += tile_info_bytes;
- if ((size_t)(data_end - data) < pbi->coded_tile_data_size) {
- cm->error.error_code = AOM_CODEC_CORRUPT_FRAME;
- return 0;
- }
-
- av1_decode_tg_tiles_and_wrapup(pbi, data, data + pbi->coded_tile_data_size,
- p_data_end, start_tile, end_tile, 0);
- uint32_t tile_payload_size = (uint32_t)(*p_data_end - data);
-
- tile_list_payload_size += tile_info_bytes + tile_payload_size;
-
- // Update data ptr for next tile decoding.
- data = *p_data_end;
- assert(data <= data_end);
-
- // Copy the decoded tile to the tile list output buffer.
- copy_decoded_tile_to_tile_list_buffer(pbi, &output);
- }
-
- *frame_decoding_finished = 1;
- return tile_list_payload_size;
-}
-
-static void read_metadata_itut_t35(const uint8_t *data, size_t sz) {
- struct aom_read_bit_buffer rb = { data, data + sz, 0, NULL, NULL };
- for (size_t i = 0; i < sz; i++) {
- aom_rb_read_literal(&rb, 8);
- }
-}
-
-static void read_metadata_hdr_cll(const uint8_t *data, size_t sz) {
- struct aom_read_bit_buffer rb = { data, data + sz, 0, NULL, NULL };
- aom_rb_read_literal(&rb, 16); // max_cll
- aom_rb_read_literal(&rb, 16); // max_fall
-}
-
-static void read_metadata_hdr_mdcv(const uint8_t *data, size_t sz) {
- struct aom_read_bit_buffer rb = { data, data + sz, 0, NULL, NULL };
- for (int i = 0; i < 3; i++) {
- aom_rb_read_literal(&rb, 16); // primary_i_chromaticity_x
- aom_rb_read_literal(&rb, 16); // primary_i_chromaticity_y
- }
-
- aom_rb_read_literal(&rb, 16); // white_point_chromaticity_x
- aom_rb_read_literal(&rb, 16); // white_point_chromaticity_y
-
- aom_rb_read_unsigned_literal(&rb, 32); // luminance_max
- aom_rb_read_unsigned_literal(&rb, 32); // luminance_min
-}
-
-static void scalability_structure(struct aom_read_bit_buffer *rb) {
- int spatial_layers_cnt = aom_rb_read_literal(rb, 2);
- int spatial_layer_dimensions_present_flag = aom_rb_read_bit(rb);
- int spatial_layer_description_present_flag = aom_rb_read_bit(rb);
- int temporal_group_description_present_flag = aom_rb_read_bit(rb);
- aom_rb_read_literal(rb, 3); // reserved
-
- if (spatial_layer_dimensions_present_flag) {
- int i;
- for (i = 0; i < spatial_layers_cnt + 1; i++) {
- aom_rb_read_literal(rb, 16);
- aom_rb_read_literal(rb, 16);
- }
- }
- if (spatial_layer_description_present_flag) {
- int i;
- for (i = 0; i < spatial_layers_cnt + 1; i++) {
- aom_rb_read_literal(rb, 8);
- }
- }
- if (temporal_group_description_present_flag) {
- int i, j, temporal_group_size;
- temporal_group_size = aom_rb_read_literal(rb, 8);
- for (i = 0; i < temporal_group_size; i++) {
- aom_rb_read_literal(rb, 3);
- aom_rb_read_bit(rb);
- aom_rb_read_bit(rb);
- int temporal_group_ref_cnt = aom_rb_read_literal(rb, 3);
- for (j = 0; j < temporal_group_ref_cnt; j++) {
- aom_rb_read_literal(rb, 8);
- }
- }
- }
-}
-
-static void read_metadata_scalability(const uint8_t *data, size_t sz) {
- struct aom_read_bit_buffer rb = { data, data + sz, 0, NULL, NULL };
- int scalability_mode_idc = aom_rb_read_literal(&rb, 8);
- if (scalability_mode_idc == SCALABILITY_SS) {
- scalability_structure(&rb);
- }
-}
-
-static void read_metadata_timecode(const uint8_t *data, size_t sz) {
- struct aom_read_bit_buffer rb = { data, data + sz, 0, NULL, NULL };
- aom_rb_read_literal(&rb, 5); // counting_type f(5)
- int full_timestamp_flag = aom_rb_read_bit(&rb); // full_timestamp_flag f(1)
- aom_rb_read_bit(&rb); // discontinuity_flag (f1)
- aom_rb_read_bit(&rb); // cnt_dropped_flag f(1)
- aom_rb_read_literal(&rb, 9); // n_frames f(9)
- if (full_timestamp_flag) {
- aom_rb_read_literal(&rb, 6); // seconds_value f(6)
- aom_rb_read_literal(&rb, 6); // minutes_value f(6)
- aom_rb_read_literal(&rb, 5); // hours_value f(5)
- } else {
- int seconds_flag = aom_rb_read_bit(&rb); // seconds_flag f(1)
- if (seconds_flag) {
- aom_rb_read_literal(&rb, 6); // seconds_value f(6)
- int minutes_flag = aom_rb_read_bit(&rb); // minutes_flag f(1)
- if (minutes_flag) {
- aom_rb_read_literal(&rb, 6); // minutes_value f(6)
- int hours_flag = aom_rb_read_bit(&rb); // hours_flag f(1)
- if (hours_flag) {
- aom_rb_read_literal(&rb, 5); // hours_value f(5)
- }
- }
- }
- }
- // time_offset_length f(5)
- int time_offset_length = aom_rb_read_literal(&rb, 5);
- if (time_offset_length) {
- aom_rb_read_literal(&rb, time_offset_length); // f(time_offset_length)
- }
-}
-
-static size_t read_metadata(const uint8_t *data, size_t sz) {
- size_t type_length;
- uint64_t type_value;
- OBU_METADATA_TYPE metadata_type;
- if (aom_uleb_decode(data, sz, &type_value, &type_length) < 0) {
- return sz;
- }
- metadata_type = (OBU_METADATA_TYPE)type_value;
- if (metadata_type == OBU_METADATA_TYPE_ITUT_T35) {
- read_metadata_itut_t35(data + type_length, sz - type_length);
- } else if (metadata_type == OBU_METADATA_TYPE_HDR_CLL) {
- read_metadata_hdr_cll(data + type_length, sz - type_length);
- } else if (metadata_type == OBU_METADATA_TYPE_HDR_MDCV) {
- read_metadata_hdr_mdcv(data + type_length, sz - type_length);
- } else if (metadata_type == OBU_METADATA_TYPE_SCALABILITY) {
- read_metadata_scalability(data + type_length, sz - type_length);
- } else if (metadata_type == OBU_METADATA_TYPE_TIMECODE) {
- read_metadata_timecode(data + type_length, sz - type_length);
- }
-
- return sz;
-}
-
-// On success, returns a boolean that indicates whether the decoding of the
-// current frame is finished. On failure, sets cm->error.error_code and
-// returns -1.
-int aom_decode_frame_from_obus(struct AV1Decoder *pbi, const uint8_t *data,
- const uint8_t *data_end,
- const uint8_t **p_data_end) {
- AV1_COMMON *const cm = &pbi->common;
- int frame_decoding_finished = 0;
- int is_first_tg_obu_received = 1;
- uint32_t frame_header_size = 0;
- ObuHeader obu_header;
- memset(&obu_header, 0, sizeof(obu_header));
- pbi->seen_frame_header = 0;
-
- if (data_end < data) {
- cm->error.error_code = AOM_CODEC_CORRUPT_FRAME;
- return -1;
- }
-
- // Reset pbi->camera_frame_header_ready to 0 if cm->large_scale_tile = 0.
- if (!cm->large_scale_tile) pbi->camera_frame_header_ready = 0;
-
- // decode frame as a series of OBUs
- while (!frame_decoding_finished && !cm->error.error_code) {
- struct aom_read_bit_buffer rb;
- size_t payload_size = 0;
- size_t decoded_payload_size = 0;
- size_t obu_payload_offset = 0;
- size_t bytes_read = 0;
- const size_t bytes_available = data_end - data;
-
- if (bytes_available == 0 && !pbi->seen_frame_header) {
- *p_data_end = data;
- cm->error.error_code = AOM_CODEC_OK;
- break;
- }
-
- aom_codec_err_t status =
- aom_read_obu_header_and_size(data, bytes_available, cm->is_annexb,
- &obu_header, &payload_size, &bytes_read);
-
- if (status != AOM_CODEC_OK) {
- cm->error.error_code = status;
- return -1;
- }
-
- // Record obu size header information.
- pbi->obu_size_hdr.data = data + obu_header.size;
- pbi->obu_size_hdr.size = bytes_read - obu_header.size;
-
- // Note: aom_read_obu_header_and_size() takes care of checking that this
- // doesn't cause 'data' to advance past 'data_end'.
- data += bytes_read;
-
- if ((size_t)(data_end - data) < payload_size) {
- cm->error.error_code = AOM_CODEC_CORRUPT_FRAME;
- return -1;
- }
-
- cm->temporal_layer_id = obu_header.temporal_layer_id;
- cm->spatial_layer_id = obu_header.spatial_layer_id;
-
- if (obu_header.type != OBU_TEMPORAL_DELIMITER &&
- obu_header.type != OBU_SEQUENCE_HEADER &&
- obu_header.type != OBU_PADDING) {
- // don't decode obu if it's not in current operating mode
- if (!is_obu_in_current_operating_point(pbi, obu_header)) {
- data += payload_size;
- continue;
- }
- }
-
- av1_init_read_bit_buffer(pbi, &rb, data, data + payload_size);
-
- switch (obu_header.type) {
- case OBU_TEMPORAL_DELIMITER:
- decoded_payload_size = read_temporal_delimiter_obu();
- pbi->seen_frame_header = 0;
- break;
- case OBU_SEQUENCE_HEADER:
- decoded_payload_size = read_sequence_header_obu(pbi, &rb);
- if (cm->error.error_code != AOM_CODEC_OK) return -1;
- break;
- case OBU_FRAME_HEADER:
- case OBU_REDUNDANT_FRAME_HEADER:
- case OBU_FRAME:
- // Only decode first frame header received
- if (!pbi->seen_frame_header ||
- (cm->large_scale_tile && !pbi->camera_frame_header_ready)) {
- frame_header_size = read_frame_header_obu(
- pbi, &rb, data, p_data_end, obu_header.type != OBU_FRAME);
- pbi->seen_frame_header = 1;
- if (!pbi->ext_tile_debug && cm->large_scale_tile)
- pbi->camera_frame_header_ready = 1;
- } else {
- // TODO(wtc): Verify that the frame_header_obu is identical to the
- // original frame_header_obu. For now just skip frame_header_size
- // bytes in the bit buffer.
- if (frame_header_size > payload_size) {
- cm->error.error_code = AOM_CODEC_CORRUPT_FRAME;
- return -1;
- }
- assert(rb.bit_offset == 0);
- rb.bit_offset = 8 * frame_header_size;
- }
-
- decoded_payload_size = frame_header_size;
- pbi->frame_header_size = frame_header_size;
-
- if (cm->show_existing_frame) {
- if (obu_header.type == OBU_FRAME) {
- cm->error.error_code = AOM_CODEC_UNSUP_BITSTREAM;
- return -1;
- }
- frame_decoding_finished = 1;
- pbi->seen_frame_header = 0;
- break;
- }
-
- // In large scale tile coding, decode the common camera frame header
- // before any tile list OBU.
- if (!pbi->ext_tile_debug && pbi->camera_frame_header_ready) {
- frame_decoding_finished = 1;
- // Skip the rest of the frame data.
- decoded_payload_size = payload_size;
- // Update data_end.
- *p_data_end = data_end;
- break;
- }
-
- if (obu_header.type != OBU_FRAME) break;
- obu_payload_offset = frame_header_size;
- // Byte align the reader before reading the tile group.
- if (byte_alignment(cm, &rb)) return -1;
- AOM_FALLTHROUGH_INTENDED; // fall through to read tile group.
- case OBU_TILE_GROUP:
- if (!pbi->seen_frame_header) {
- cm->error.error_code = AOM_CODEC_CORRUPT_FRAME;
- return -1;
- }
- if (obu_payload_offset > payload_size) {
- cm->error.error_code = AOM_CODEC_CORRUPT_FRAME;
- return -1;
- }
- decoded_payload_size += read_one_tile_group_obu(
- pbi, &rb, is_first_tg_obu_received, data + obu_payload_offset,
- data + payload_size, p_data_end, &frame_decoding_finished,
- obu_header.type == OBU_FRAME);
- is_first_tg_obu_received = 0;
- if (frame_decoding_finished) pbi->seen_frame_header = 0;
- break;
- case OBU_METADATA:
- decoded_payload_size = read_metadata(data, payload_size);
- break;
- case OBU_TILE_LIST:
- if (CONFIG_NORMAL_TILE_MODE) {
- cm->error.error_code = AOM_CODEC_UNSUP_BITSTREAM;
- return -1;
- }
-
- // This OBU type is purely for the large scale tile coding mode.
- // The common camera frame header has to be already decoded.
- if (!pbi->camera_frame_header_ready) {
- cm->error.error_code = AOM_CODEC_CORRUPT_FRAME;
- return -1;
- }
-
- cm->large_scale_tile = 1;
- av1_set_single_tile_decoding_mode(cm);
- decoded_payload_size =
- read_and_decode_one_tile_list(pbi, &rb, data, data + payload_size,
- p_data_end, &frame_decoding_finished);
- if (cm->error.error_code != AOM_CODEC_OK) return -1;
- break;
- case OBU_PADDING:
- default:
- // Skip unrecognized OBUs
- decoded_payload_size = payload_size;
- break;
- }
-
- // Check that the signalled OBU size matches the actual amount of data read
- if (decoded_payload_size > payload_size) {
- cm->error.error_code = AOM_CODEC_CORRUPT_FRAME;
- return -1;
- }
-
- // If there are extra padding bytes, they should all be zero
- while (decoded_payload_size < payload_size) {
- uint8_t padding_byte = data[decoded_payload_size++];
- if (padding_byte != 0) {
- cm->error.error_code = AOM_CODEC_CORRUPT_FRAME;
- return -1;
- }
- }
-
- data += payload_size;
- }
-
- return frame_decoding_finished;
-}
diff --git a/third_party/aom/av1/decoder/obu.h b/third_party/aom/av1/decoder/obu.h
deleted file mode 100644
index 5ab243fc9..000000000
--- a/third_party/aom/av1/decoder/obu.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_DECODER_OBU_H_
-#define AOM_AV1_DECODER_OBU_H_
-
-#include "aom/aom_codec.h"
-#include "av1/decoder/decoder.h"
-
-// Try to decode one frame from a buffer.
-// Returns 1 if we decoded a frame,
-// 0 if we didn't decode a frame but that's okay
-// (eg, if there was a frame but we skipped it),
-// or -1 on error
-int aom_decode_frame_from_obus(struct AV1Decoder *pbi, const uint8_t *data,
- const uint8_t *data_end,
- const uint8_t **p_data_end);
-
-aom_codec_err_t aom_get_num_layers_from_operating_point_idc(
- int operating_point_idc, unsigned int *num_spatial_layers,
- unsigned int *num_temporal_layers);
-
-#endif // AOM_AV1_DECODER_OBU_H_
diff --git a/third_party/aom/av1/encoder/aq_complexity.c b/third_party/aom/av1/encoder/aq_complexity.c
deleted file mode 100644
index 80f8e2e66..000000000
--- a/third_party/aom/av1/encoder/aq_complexity.c
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <limits.h>
-#include <math.h>
-
-#include "av1/encoder/aq_complexity.h"
-#include "av1/encoder/aq_variance.h"
-#include "av1/encoder/encodeframe.h"
-#include "av1/common/seg_common.h"
-#include "av1/encoder/segmentation.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_ports/system_state.h"
-
-#define AQ_C_SEGMENTS 5
-#define DEFAULT_AQ2_SEG 3 // Neutral Q segment
-#define AQ_C_STRENGTHS 3
-static const double aq_c_q_adj_factor[AQ_C_STRENGTHS][AQ_C_SEGMENTS] = {
- { 1.75, 1.25, 1.05, 1.00, 0.90 },
- { 2.00, 1.50, 1.15, 1.00, 0.85 },
- { 2.50, 1.75, 1.25, 1.00, 0.80 }
-};
-static const double aq_c_transitions[AQ_C_STRENGTHS][AQ_C_SEGMENTS] = {
- { 0.15, 0.30, 0.55, 2.00, 100.0 },
- { 0.20, 0.40, 0.65, 2.00, 100.0 },
- { 0.25, 0.50, 0.75, 2.00, 100.0 }
-};
-static const double aq_c_var_thresholds[AQ_C_STRENGTHS][AQ_C_SEGMENTS] = {
- { -4.0, -3.0, -2.0, 100.00, 100.0 },
- { -3.5, -2.5, -1.5, 100.00, 100.0 },
- { -3.0, -2.0, -1.0, 100.00, 100.0 }
-};
-
-static int get_aq_c_strength(int q_index, aom_bit_depth_t bit_depth) {
- // Approximate base quatizer (truncated to int)
- const int base_quant = av1_ac_quant_Q3(q_index, 0, bit_depth) / 4;
- return (base_quant > 10) + (base_quant > 25);
-}
-
-void av1_setup_in_frame_q_adj(AV1_COMP *cpi) {
- AV1_COMMON *const cm = &cpi->common;
- struct segmentation *const seg = &cm->seg;
- int resolution_change =
- cm->prev_frame && (cm->width != cm->prev_frame->width ||
- cm->height != cm->prev_frame->height);
-
- // Make SURE use of floating point in this function is safe.
- aom_clear_system_state();
-
- if (resolution_change) {
- memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols);
- av1_clearall_segfeatures(seg);
- av1_disable_segmentation(seg);
- return;
- }
-
- if (frame_is_intra_only(cm) || cm->error_resilient_mode ||
- cpi->refresh_alt_ref_frame ||
- (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) {
- int segment;
- const int aq_strength =
- get_aq_c_strength(cm->base_qindex, cm->seq_params.bit_depth);
-
- // Clear down the segment map.
- memset(cpi->segmentation_map, DEFAULT_AQ2_SEG, cm->mi_rows * cm->mi_cols);
-
- av1_clearall_segfeatures(seg);
-
- // Segmentation only makes sense if the target bits per SB is above a
- // threshold. Below this the overheads will usually outweigh any benefit.
- if (cpi->rc.sb64_target_rate < 256) {
- av1_disable_segmentation(seg);
- return;
- }
-
- av1_enable_segmentation(seg);
-
- // Default segment "Q" feature is disabled so it defaults to the baseline Q.
- av1_disable_segfeature(seg, DEFAULT_AQ2_SEG, SEG_LVL_ALT_Q);
-
- // Use some of the segments for in frame Q adjustment.
- for (segment = 0; segment < AQ_C_SEGMENTS; ++segment) {
- int qindex_delta;
-
- if (segment == DEFAULT_AQ2_SEG) continue;
-
- qindex_delta = av1_compute_qdelta_by_rate(
- &cpi->rc, cm->frame_type, cm->base_qindex,
- aq_c_q_adj_factor[aq_strength][segment], cm->seq_params.bit_depth);
-
- // For AQ complexity mode, we dont allow Q0 in a segment if the base
- // Q is not 0. Q0 (lossless) implies 4x4 only and in AQ mode 2 a segment
- // Q delta is sometimes applied without going back around the rd loop.
- // This could lead to an illegal combination of partition size and q.
- if ((cm->base_qindex != 0) && ((cm->base_qindex + qindex_delta) == 0)) {
- qindex_delta = -cm->base_qindex + 1;
- }
- if ((cm->base_qindex + qindex_delta) > 0) {
- av1_enable_segfeature(seg, segment, SEG_LVL_ALT_Q);
- av1_set_segdata(seg, segment, SEG_LVL_ALT_Q, qindex_delta);
- }
- }
- }
-}
-
-#define DEFAULT_LV_THRESH 10.0
-#define MIN_DEFAULT_LV_THRESH 8.0
-// Select a segment for the current block.
-// The choice of segment for a block depends on the ratio of the projected
-// bits for the block vs a target average and its spatial complexity.
-void av1_caq_select_segment(const AV1_COMP *cpi, MACROBLOCK *mb, BLOCK_SIZE bs,
- int mi_row, int mi_col, int projected_rate) {
- const AV1_COMMON *const cm = &cpi->common;
- const int num_planes = av1_num_planes(cm);
-
- const int mi_offset = mi_row * cm->mi_cols + mi_col;
- const int xmis = AOMMIN(cm->mi_cols - mi_col, mi_size_wide[bs]);
- const int ymis = AOMMIN(cm->mi_rows - mi_row, mi_size_high[bs]);
- int x, y;
- int i;
- unsigned char segment;
-
- if (0) {
- segment = DEFAULT_AQ2_SEG;
- } else {
- // Rate depends on fraction of a SB64 in frame (xmis * ymis / bw * bh).
- // It is converted to bits << AV1_PROB_COST_SHIFT units.
- const int64_t num = (int64_t)(cpi->rc.sb64_target_rate * xmis * ymis)
- << AV1_PROB_COST_SHIFT;
- const int denom = cm->seq_params.mib_size * cm->seq_params.mib_size;
- const int target_rate = (int)(num / denom);
- double logvar;
- double low_var_thresh;
- const int aq_strength =
- get_aq_c_strength(cm->base_qindex, cm->seq_params.bit_depth);
-
- aom_clear_system_state();
- low_var_thresh =
- (cpi->oxcf.pass == 2)
- ? AOMMAX(exp(cpi->twopass.mb_av_energy), MIN_DEFAULT_LV_THRESH)
- : DEFAULT_LV_THRESH;
-
- av1_setup_src_planes(mb, cpi->source, mi_row, mi_col, num_planes);
- logvar = av1_log_block_var(cpi, mb, bs);
-
- segment = AQ_C_SEGMENTS - 1; // Just in case no break out below.
- for (i = 0; i < AQ_C_SEGMENTS; ++i) {
- // Test rate against a threshold value and variance against a threshold.
- // Increasing segment number (higher variance and complexity) = higher Q.
- if ((projected_rate < target_rate * aq_c_transitions[aq_strength][i]) &&
- (logvar < (low_var_thresh + aq_c_var_thresholds[aq_strength][i]))) {
- segment = i;
- break;
- }
- }
- }
-
- // Fill in the entires in the segment map corresponding to this SB64.
- for (y = 0; y < ymis; y++) {
- for (x = 0; x < xmis; x++) {
- cpi->segmentation_map[mi_offset + y * cm->mi_cols + x] = segment;
- }
- }
-}
diff --git a/third_party/aom/av1/encoder/aq_complexity.h b/third_party/aom/av1/encoder/aq_complexity.h
deleted file mode 100644
index 3421d74c9..000000000
--- a/third_party/aom/av1/encoder/aq_complexity.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_AQ_COMPLEXITY_H_
-#define AOM_AV1_ENCODER_AQ_COMPLEXITY_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "av1/common/enums.h"
-
-struct AV1_COMP;
-struct macroblock;
-
-// Select a segment for the current Block.
-void av1_caq_select_segment(const struct AV1_COMP *cpi, struct macroblock *,
- BLOCK_SIZE bs, int mi_row, int mi_col,
- int projected_rate);
-
-// This function sets up a set of segments with delta Q values around
-// the baseline frame quantizer.
-void av1_setup_in_frame_q_adj(struct AV1_COMP *cpi);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_ENCODER_AQ_COMPLEXITY_H_
diff --git a/third_party/aom/av1/encoder/aq_cyclicrefresh.c b/third_party/aom/av1/encoder/aq_cyclicrefresh.c
deleted file mode 100644
index f532d48da..000000000
--- a/third_party/aom/av1/encoder/aq_cyclicrefresh.c
+++ /dev/null
@@ -1,580 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <limits.h>
-#include <math.h>
-
-#include "av1/common/seg_common.h"
-#include "av1/encoder/aq_cyclicrefresh.h"
-#include "av1/encoder/ratectrl.h"
-#include "av1/encoder/segmentation.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_ports/system_state.h"
-
-struct CYCLIC_REFRESH {
- // Percentage of blocks per frame that are targeted as candidates
- // for cyclic refresh.
- int percent_refresh;
- // Maximum q-delta as percentage of base q.
- int max_qdelta_perc;
- // Superblock starting index for cycling through the frame.
- int sb_index;
- // Controls how long block will need to wait to be refreshed again, in
- // excess of the cycle time, i.e., in the case of all zero motion, block
- // will be refreshed every (100/percent_refresh + time_for_refresh) frames.
- int time_for_refresh;
- // Target number of (8x8) blocks that are set for delta-q.
- int target_num_seg_blocks;
- // Actual number of (8x8) blocks that were applied delta-q.
- int actual_num_seg1_blocks;
- int actual_num_seg2_blocks;
- // RD mult. parameters for segment 1.
- int rdmult;
- // Cyclic refresh map.
- int8_t *map;
- // Map of the last q a block was coded at.
- uint8_t *last_coded_q_map;
- // Thresholds applied to the projected rate/distortion of the coding block,
- // when deciding whether block should be refreshed.
- int64_t thresh_rate_sb;
- int64_t thresh_dist_sb;
- // Threshold applied to the motion vector (in units of 1/8 pel) of the
- // coding block, when deciding whether block should be refreshed.
- int16_t motion_thresh;
- // Rate target ratio to set q delta.
- double rate_ratio_qdelta;
- // Boost factor for rate target ratio, for segment CR_SEGMENT_ID_BOOST2.
- int rate_boost_fac;
- double low_content_avg;
- int qindex_delta[3];
-};
-
-CYCLIC_REFRESH *av1_cyclic_refresh_alloc(int mi_rows, int mi_cols) {
- size_t last_coded_q_map_size;
- CYCLIC_REFRESH *const cr = aom_calloc(1, sizeof(*cr));
- if (cr == NULL) return NULL;
-
- cr->map = aom_calloc(mi_rows * mi_cols, sizeof(*cr->map));
- if (cr->map == NULL) {
- av1_cyclic_refresh_free(cr);
- return NULL;
- }
- last_coded_q_map_size = mi_rows * mi_cols * sizeof(*cr->last_coded_q_map);
- cr->last_coded_q_map = aom_malloc(last_coded_q_map_size);
- if (cr->last_coded_q_map == NULL) {
- av1_cyclic_refresh_free(cr);
- return NULL;
- }
- assert(MAXQ <= 255);
- memset(cr->last_coded_q_map, MAXQ, last_coded_q_map_size);
-
- return cr;
-}
-
-void av1_cyclic_refresh_free(CYCLIC_REFRESH *cr) {
- if (cr != NULL) {
- aom_free(cr->map);
- aom_free(cr->last_coded_q_map);
- aom_free(cr);
- }
-}
-
-// Check if we should turn off cyclic refresh based on bitrate condition.
-static int apply_cyclic_refresh_bitrate(const AV1_COMMON *cm,
- const RATE_CONTROL *rc) {
- // Turn off cyclic refresh if bits available per frame is not sufficiently
- // larger than bit cost of segmentation. Segment map bit cost should scale
- // with number of seg blocks, so compare available bits to number of blocks.
- // Average bits available per frame = avg_frame_bandwidth
- // Number of (8x8) blocks in frame = mi_rows * mi_cols;
- const float factor = 0.25;
- const int number_blocks = cm->mi_rows * cm->mi_cols;
- // The condition below corresponds to turning off at target bitrates:
- // (at 30fps), ~12kbps for CIF, 36kbps for VGA, 100kps for HD/720p.
- // Also turn off at very small frame sizes, to avoid too large fraction of
- // superblocks to be refreshed per frame. Threshold below is less than QCIF.
- if (rc->avg_frame_bandwidth < factor * number_blocks ||
- number_blocks / 64 < 5)
- return 0;
- else
- return 1;
-}
-
-// Check if this coding block, of size bsize, should be considered for refresh
-// (lower-qp coding). Decision can be based on various factors, such as
-// size of the coding block (i.e., below min_block size rejected), coding
-// mode, and rate/distortion.
-static int candidate_refresh_aq(const CYCLIC_REFRESH *cr,
- const MB_MODE_INFO *mbmi, int64_t rate,
- int64_t dist, int bsize) {
- MV mv = mbmi->mv[0].as_mv;
- // Reject the block for lower-qp coding if projected distortion
- // is above the threshold, and any of the following is true:
- // 1) mode uses large mv
- // 2) mode is an intra-mode
- // Otherwise accept for refresh.
- if (dist > cr->thresh_dist_sb &&
- (mv.row > cr->motion_thresh || mv.row < -cr->motion_thresh ||
- mv.col > cr->motion_thresh || mv.col < -cr->motion_thresh ||
- !is_inter_block(mbmi)))
- return CR_SEGMENT_ID_BASE;
- else if (bsize >= BLOCK_16X16 && rate < cr->thresh_rate_sb &&
- is_inter_block(mbmi) && mbmi->mv[0].as_int == 0 &&
- cr->rate_boost_fac > 10)
- // More aggressive delta-q for bigger blocks with zero motion.
- return CR_SEGMENT_ID_BOOST2;
- else
- return CR_SEGMENT_ID_BOOST1;
-}
-
-// Compute delta-q for the segment.
-static int compute_deltaq(const AV1_COMP *cpi, int q, double rate_factor) {
- const CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
- const RATE_CONTROL *const rc = &cpi->rc;
- int deltaq =
- av1_compute_qdelta_by_rate(rc, cpi->common.frame_type, q, rate_factor,
- cpi->common.seq_params.bit_depth);
- if ((-deltaq) > cr->max_qdelta_perc * q / 100) {
- deltaq = -cr->max_qdelta_perc * q / 100;
- }
- return deltaq;
-}
-
-// For the just encoded frame, estimate the bits, incorporating the delta-q
-// from non-base segment. For now ignore effect of multiple segments
-// (with different delta-q). Note this function is called in the postencode
-// (called from rc_update_rate_correction_factors()).
-int av1_cyclic_refresh_estimate_bits_at_q(const AV1_COMP *cpi,
- double correction_factor) {
- const AV1_COMMON *const cm = &cpi->common;
- const CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
- int estimated_bits;
- int mbs = cm->MBs;
- int num8x8bl = mbs << 2;
- // Weight for non-base segments: use actual number of blocks refreshed in
- // previous/just encoded frame. Note number of blocks here is in 8x8 units.
- double weight_segment1 = (double)cr->actual_num_seg1_blocks / num8x8bl;
- double weight_segment2 = (double)cr->actual_num_seg2_blocks / num8x8bl;
- // Take segment weighted average for estimated bits.
- estimated_bits =
- (int)((1.0 - weight_segment1 - weight_segment2) *
- av1_estimate_bits_at_q(cm->frame_type, cm->base_qindex, mbs,
- correction_factor,
- cm->seq_params.bit_depth) +
- weight_segment1 * av1_estimate_bits_at_q(
- cm->frame_type,
- cm->base_qindex + cr->qindex_delta[1], mbs,
- correction_factor, cm->seq_params.bit_depth) +
- weight_segment2 * av1_estimate_bits_at_q(
- cm->frame_type,
- cm->base_qindex + cr->qindex_delta[2], mbs,
- correction_factor, cm->seq_params.bit_depth));
- return estimated_bits;
-}
-
-// Prior to encoding the frame, estimate the bits per mb, for a given q = i and
-// a corresponding delta-q (for segment 1). This function is called in the
-// rc_regulate_q() to set the base qp index.
-// Note: the segment map is set to either 0/CR_SEGMENT_ID_BASE (no refresh) or
-// to 1/CR_SEGMENT_ID_BOOST1 (refresh) for each superblock, prior to encoding.
-int av1_cyclic_refresh_rc_bits_per_mb(const AV1_COMP *cpi, int i,
- double correction_factor) {
- const AV1_COMMON *const cm = &cpi->common;
- CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
- int bits_per_mb;
- int num8x8bl = cm->MBs << 2;
- // Weight for segment prior to encoding: take the average of the target
- // number for the frame to be encoded and the actual from the previous frame.
- double weight_segment =
- (double)((cr->target_num_seg_blocks + cr->actual_num_seg1_blocks +
- cr->actual_num_seg2_blocks) >>
- 1) /
- num8x8bl;
- // Compute delta-q corresponding to qindex i.
- int deltaq = compute_deltaq(cpi, i, cr->rate_ratio_qdelta);
- // Take segment weighted average for bits per mb.
- bits_per_mb =
- (int)((1.0 - weight_segment) *
- av1_rc_bits_per_mb(cm->frame_type, i, correction_factor,
- cm->seq_params.bit_depth) +
- weight_segment * av1_rc_bits_per_mb(cm->frame_type, i + deltaq,
- correction_factor,
- cm->seq_params.bit_depth));
- return bits_per_mb;
-}
-
-// Prior to coding a given prediction block, of size bsize at (mi_row, mi_col),
-// check if we should reset the segment_id, and update the cyclic_refresh map
-// and segmentation map.
-void av1_cyclic_refresh_update_segment(const AV1_COMP *cpi,
- MB_MODE_INFO *const mbmi, int mi_row,
- int mi_col, BLOCK_SIZE bsize,
- int64_t rate, int64_t dist, int skip) {
- const AV1_COMMON *const cm = &cpi->common;
- CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
- const int bw = mi_size_wide[bsize];
- const int bh = mi_size_high[bsize];
- const int xmis = AOMMIN(cm->mi_cols - mi_col, bw);
- const int ymis = AOMMIN(cm->mi_rows - mi_row, bh);
- const int block_index = mi_row * cm->mi_cols + mi_col;
- const int refresh_this_block =
- candidate_refresh_aq(cr, mbmi, rate, dist, bsize);
- // Default is to not update the refresh map.
- int new_map_value = cr->map[block_index];
- int x = 0;
- int y = 0;
-
- // If this block is labeled for refresh, check if we should reset the
- // segment_id.
- if (cyclic_refresh_segment_id_boosted(mbmi->segment_id)) {
- mbmi->segment_id = refresh_this_block;
- // Reset segment_id if will be skipped.
- if (skip) mbmi->segment_id = CR_SEGMENT_ID_BASE;
- }
-
- // Update the cyclic refresh map, to be used for setting segmentation map
- // for the next frame. If the block will be refreshed this frame, mark it
- // as clean. The magnitude of the -ve influences how long before we consider
- // it for refresh again.
- if (cyclic_refresh_segment_id_boosted(mbmi->segment_id)) {
- new_map_value = -cr->time_for_refresh;
- } else if (refresh_this_block) {
- // Else if it is accepted as candidate for refresh, and has not already
- // been refreshed (marked as 1) then mark it as a candidate for cleanup
- // for future time (marked as 0), otherwise don't update it.
- if (cr->map[block_index] == 1) new_map_value = 0;
- } else {
- // Leave it marked as block that is not candidate for refresh.
- new_map_value = 1;
- }
-
- // Update entries in the cyclic refresh map with new_map_value, and
- // copy mbmi->segment_id into global segmentation map.
- for (y = 0; y < ymis; y++)
- for (x = 0; x < xmis; x++) {
- int map_offset = block_index + y * cm->mi_cols + x;
- cr->map[map_offset] = new_map_value;
- cpi->segmentation_map[map_offset] = mbmi->segment_id;
- // Inter skip blocks were clearly not coded at the current qindex, so
- // don't update the map for them. For cases where motion is non-zero or
- // the reference frame isn't the previous frame, the previous value in
- // the map for this spatial location is not entirely correct.
- if ((!is_inter_block(mbmi) || !skip) &&
- mbmi->segment_id <= CR_SEGMENT_ID_BOOST2) {
- cr->last_coded_q_map[map_offset] = clamp(
- cm->base_qindex + cr->qindex_delta[mbmi->segment_id], 0, MAXQ);
- } else if (is_inter_block(mbmi) && skip &&
- mbmi->segment_id <= CR_SEGMENT_ID_BOOST2) {
- cr->last_coded_q_map[map_offset] =
- AOMMIN(clamp(cm->base_qindex + cr->qindex_delta[mbmi->segment_id],
- 0, MAXQ),
- cr->last_coded_q_map[map_offset]);
- }
- }
-}
-
-// Update the actual number of blocks that were applied the segment delta q.
-void av1_cyclic_refresh_postencode(AV1_COMP *const cpi) {
- AV1_COMMON *const cm = &cpi->common;
- CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
- unsigned char *const seg_map = cpi->segmentation_map;
- int mi_row, mi_col;
- cr->actual_num_seg1_blocks = 0;
- cr->actual_num_seg2_blocks = 0;
- for (mi_row = 0; mi_row < cm->mi_rows; mi_row++)
- for (mi_col = 0; mi_col < cm->mi_cols; mi_col++) {
- if (cyclic_refresh_segment_id(seg_map[mi_row * cm->mi_cols + mi_col]) ==
- CR_SEGMENT_ID_BOOST1)
- cr->actual_num_seg1_blocks++;
- else if (cyclic_refresh_segment_id(
- seg_map[mi_row * cm->mi_cols + mi_col]) ==
- CR_SEGMENT_ID_BOOST2)
- cr->actual_num_seg2_blocks++;
- }
-}
-
-// Set golden frame update interval, for 1 pass CBR mode.
-void av1_cyclic_refresh_set_golden_update(AV1_COMP *const cpi) {
- RATE_CONTROL *const rc = &cpi->rc;
- CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
- // Set minimum gf_interval for GF update to a multiple (== 2) of refresh
- // period. Depending on past encoding stats, GF flag may be reset and update
- // may not occur until next baseline_gf_interval.
- if (cr->percent_refresh > 0)
- rc->baseline_gf_interval = 4 * (100 / cr->percent_refresh);
- else
- rc->baseline_gf_interval = 40;
-}
-
-// Update some encoding stats (from the just encoded frame). If this frame's
-// background has high motion, refresh the golden frame. Otherwise, if the
-// golden reference is to be updated check if we should NOT update the golden
-// ref.
-void av1_cyclic_refresh_check_golden_update(AV1_COMP *const cpi) {
- AV1_COMMON *const cm = &cpi->common;
- CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
- int mi_row, mi_col;
- double fraction_low = 0.0;
- int low_content_frame = 0;
-
- MB_MODE_INFO **mi;
- RATE_CONTROL *const rc = &cpi->rc;
- const int rows = cm->mi_rows, cols = cm->mi_cols;
- int cnt1 = 0, cnt2 = 0;
- int force_gf_refresh = 0;
-
- for (mi_row = 0; mi_row < rows; mi_row++) {
- mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
-
- for (mi_col = 0; mi_col < cols; mi_col++) {
- int16_t abs_mvr = mi[0]->mv[0].as_mv.row >= 0
- ? mi[0]->mv[0].as_mv.row
- : -1 * mi[0]->mv[0].as_mv.row;
- int16_t abs_mvc = mi[0]->mv[0].as_mv.col >= 0
- ? mi[0]->mv[0].as_mv.col
- : -1 * mi[0]->mv[0].as_mv.col;
-
- // Calculate the motion of the background.
- if (abs_mvr <= 16 && abs_mvc <= 16) {
- cnt1++;
- if (abs_mvr == 0 && abs_mvc == 0) cnt2++;
- }
- mi++;
-
- // Accumulate low_content_frame.
- if (cr->map[mi_row * cols + mi_col] < 1) low_content_frame++;
- }
- }
-
- // For video conference clips, if the background has high motion in current
- // frame because of the camera movement, set this frame as the golden frame.
- // Use 70% and 5% as the thresholds for golden frame refreshing.
- if (cnt1 * 10 > (70 * rows * cols) && cnt2 * 20 < cnt1) {
- av1_cyclic_refresh_set_golden_update(cpi);
- rc->frames_till_gf_update_due = rc->baseline_gf_interval;
-
- if (rc->frames_till_gf_update_due > rc->frames_to_key)
- rc->frames_till_gf_update_due = rc->frames_to_key;
- cpi->refresh_golden_frame = 1;
- force_gf_refresh = 1;
- }
-
- fraction_low = (double)low_content_frame / (rows * cols);
- // Update average.
- cr->low_content_avg = (fraction_low + 3 * cr->low_content_avg) / 4;
- if (!force_gf_refresh && cpi->refresh_golden_frame == 1) {
- // Don't update golden reference if the amount of low_content for the
- // current encoded frame is small, or if the recursive average of the
- // low_content over the update interval window falls below threshold.
- if (fraction_low < 0.8 || cr->low_content_avg < 0.7)
- cpi->refresh_golden_frame = 0;
- // Reset for next internal.
- cr->low_content_avg = fraction_low;
- }
-}
-
-// Update the segmentation map, and related quantities: cyclic refresh map,
-// refresh sb_index, and target number of blocks to be refreshed.
-// The map is set to either 0/CR_SEGMENT_ID_BASE (no refresh) or to
-// 1/CR_SEGMENT_ID_BOOST1 (refresh) for each superblock.
-// Blocks labeled as BOOST1 may later get set to BOOST2 (during the
-// encoding of the superblock).
-static void cyclic_refresh_update_map(AV1_COMP *const cpi) {
- AV1_COMMON *const cm = &cpi->common;
- CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
- unsigned char *const seg_map = cpi->segmentation_map;
- int i, block_count, bl_index, sb_rows, sb_cols, sbs_in_frame;
- int xmis, ymis, x, y;
- memset(seg_map, CR_SEGMENT_ID_BASE, cm->mi_rows * cm->mi_cols);
- sb_cols =
- (cm->mi_cols + cm->seq_params.mib_size - 1) / cm->seq_params.mib_size;
- sb_rows =
- (cm->mi_rows + cm->seq_params.mib_size - 1) / cm->seq_params.mib_size;
- sbs_in_frame = sb_cols * sb_rows;
- // Number of target blocks to get the q delta (segment 1).
- block_count = cr->percent_refresh * cm->mi_rows * cm->mi_cols / 100;
- // Set the segmentation map: cycle through the superblocks, starting at
- // cr->mb_index, and stopping when either block_count blocks have been found
- // to be refreshed, or we have passed through whole frame.
- if (cr->sb_index >= sbs_in_frame) cr->sb_index = 0;
- assert(cr->sb_index < sbs_in_frame);
- i = cr->sb_index;
- cr->target_num_seg_blocks = 0;
- do {
- int sum_map = 0;
- // Get the mi_row/mi_col corresponding to superblock index i.
- int sb_row_index = (i / sb_cols);
- int sb_col_index = i - sb_row_index * sb_cols;
- int mi_row = sb_row_index * cm->seq_params.mib_size;
- int mi_col = sb_col_index * cm->seq_params.mib_size;
- int qindex_thresh =
- cpi->oxcf.content == AOM_CONTENT_SCREEN
- ? av1_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST2, cm->base_qindex)
- : 0;
- assert(mi_row >= 0 && mi_row < cm->mi_rows);
- assert(mi_col >= 0 && mi_col < cm->mi_cols);
- bl_index = mi_row * cm->mi_cols + mi_col;
- // Loop through all MI blocks in superblock and update map.
- xmis = AOMMIN(cm->mi_cols - mi_col, cm->seq_params.mib_size);
- ymis = AOMMIN(cm->mi_rows - mi_row, cm->seq_params.mib_size);
- for (y = 0; y < ymis; y++) {
- for (x = 0; x < xmis; x++) {
- const int bl_index2 = bl_index + y * cm->mi_cols + x;
- // If the block is as a candidate for clean up then mark it
- // for possible boost/refresh (segment 1). The segment id may get
- // reset to 0 later if block gets coded anything other than GLOBALMV.
- if (cr->map[bl_index2] == 0) {
- if (cr->last_coded_q_map[bl_index2] > qindex_thresh) sum_map++;
- } else if (cr->map[bl_index2] < 0) {
- cr->map[bl_index2]++;
- }
- }
- }
- // Enforce constant segment over superblock.
- // If segment is at least half of superblock, set to 1.
- if (sum_map >= xmis * ymis / 2) {
- for (y = 0; y < ymis; y++)
- for (x = 0; x < xmis; x++) {
- seg_map[bl_index + y * cm->mi_cols + x] = CR_SEGMENT_ID_BOOST1;
- }
- cr->target_num_seg_blocks += xmis * ymis;
- }
- i++;
- if (i == sbs_in_frame) {
- i = 0;
- }
- } while (cr->target_num_seg_blocks < block_count && i != cr->sb_index);
- cr->sb_index = i;
-}
-
-// Set cyclic refresh parameters.
-void av1_cyclic_refresh_update_parameters(AV1_COMP *const cpi) {
- const RATE_CONTROL *const rc = &cpi->rc;
- const AV1_COMMON *const cm = &cpi->common;
- CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
- cr->percent_refresh = 10;
- cr->max_qdelta_perc = 50;
- cr->time_for_refresh = 0;
- // Use larger delta-qp (increase rate_ratio_qdelta) for first few (~4)
- // periods of the refresh cycle, after a key frame.
- if (rc->frames_since_key < 4 * cr->percent_refresh)
- cr->rate_ratio_qdelta = 3.0;
- else
- cr->rate_ratio_qdelta = 2.0;
- // Adjust some parameters for low resolutions at low bitrates.
- if (cm->width <= 352 && cm->height <= 288 && rc->avg_frame_bandwidth < 3400) {
- cr->motion_thresh = 4;
- cr->rate_boost_fac = 10;
- } else {
- cr->motion_thresh = 32;
- cr->rate_boost_fac = 17;
- }
-}
-
-// Setup cyclic background refresh: set delta q and segmentation map.
-void av1_cyclic_refresh_setup(AV1_COMP *const cpi) {
- AV1_COMMON *const cm = &cpi->common;
- const RATE_CONTROL *const rc = &cpi->rc;
- CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
- struct segmentation *const seg = &cm->seg;
- const int apply_cyclic_refresh = apply_cyclic_refresh_bitrate(cm, rc);
- int resolution_change =
- cm->prev_frame && (cm->width != cm->prev_frame->width ||
- cm->height != cm->prev_frame->height);
- if (resolution_change) {
- memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols);
- av1_clearall_segfeatures(seg);
- aom_clear_system_state();
- av1_disable_segmentation(seg);
- return;
- }
- if (cm->current_video_frame == 0) cr->low_content_avg = 0.0;
- // Don't apply refresh on key frame or enhancement layer frames.
- if (!apply_cyclic_refresh || cm->frame_type == KEY_FRAME) {
- // Set segmentation map to 0 and disable.
- unsigned char *const seg_map = cpi->segmentation_map;
- memset(seg_map, 0, cm->mi_rows * cm->mi_cols);
- av1_disable_segmentation(&cm->seg);
- if (cm->frame_type == KEY_FRAME) {
- memset(cr->last_coded_q_map, MAXQ,
- cm->mi_rows * cm->mi_cols * sizeof(*cr->last_coded_q_map));
- cr->sb_index = 0;
- }
- return;
- } else {
- int qindex_delta = 0;
- int qindex2;
- const double q =
- av1_convert_qindex_to_q(cm->base_qindex, cm->seq_params.bit_depth);
- aom_clear_system_state();
- // Set rate threshold to some multiple (set to 2 for now) of the target
- // rate (target is given by sb64_target_rate and scaled by 256).
- cr->thresh_rate_sb = ((int64_t)(rc->sb64_target_rate) << 8) << 2;
- // Distortion threshold, quadratic in Q, scale factor to be adjusted.
- // q will not exceed 457, so (q * q) is within 32bit; see:
- // av1_convert_qindex_to_q(), av1_ac_quant(), ac_qlookup*[].
- cr->thresh_dist_sb = ((int64_t)(q * q)) << 2;
-
- // Set up segmentation.
- // Clear down the segment map.
- av1_enable_segmentation(&cm->seg);
- av1_clearall_segfeatures(seg);
-
- // Note: setting temporal_update has no effect, as the seg-map coding method
- // (temporal or spatial) is determined in
- // av1_choose_segmap_coding_method(),
- // based on the coding cost of each method. For error_resilient mode on the
- // last_frame_seg_map is set to 0, so if temporal coding is used, it is
- // relative to 0 previous map.
- // seg->temporal_update = 0;
-
- // Segment BASE "Q" feature is disabled so it defaults to the baseline Q.
- av1_disable_segfeature(seg, CR_SEGMENT_ID_BASE, SEG_LVL_ALT_Q);
- // Use segment BOOST1 for in-frame Q adjustment.
- av1_enable_segfeature(seg, CR_SEGMENT_ID_BOOST1, SEG_LVL_ALT_Q);
- // Use segment BOOST2 for more aggressive in-frame Q adjustment.
- av1_enable_segfeature(seg, CR_SEGMENT_ID_BOOST2, SEG_LVL_ALT_Q);
-
- // Set the q delta for segment BOOST1.
- qindex_delta = compute_deltaq(cpi, cm->base_qindex, cr->rate_ratio_qdelta);
- cr->qindex_delta[1] = qindex_delta;
-
- // Compute rd-mult for segment BOOST1.
- qindex2 = clamp(cm->base_qindex + cm->y_dc_delta_q + qindex_delta, 0, MAXQ);
-
- cr->rdmult = av1_compute_rd_mult(cpi, qindex2);
-
- av1_set_segdata(seg, CR_SEGMENT_ID_BOOST1, SEG_LVL_ALT_Q, qindex_delta);
-
- // Set a more aggressive (higher) q delta for segment BOOST2.
- qindex_delta = compute_deltaq(
- cpi, cm->base_qindex,
- AOMMIN(CR_MAX_RATE_TARGET_RATIO,
- 0.1 * cr->rate_boost_fac * cr->rate_ratio_qdelta));
- cr->qindex_delta[2] = qindex_delta;
- av1_set_segdata(seg, CR_SEGMENT_ID_BOOST2, SEG_LVL_ALT_Q, qindex_delta);
-
- // Update the segmentation and refresh map.
- cyclic_refresh_update_map(cpi);
- }
-}
-
-int av1_cyclic_refresh_get_rdmult(const CYCLIC_REFRESH *cr) {
- return cr->rdmult;
-}
-
-void av1_cyclic_refresh_reset_resize(AV1_COMP *const cpi) {
- const AV1_COMMON *const cm = &cpi->common;
- CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
- memset(cr->map, 0, cm->mi_rows * cm->mi_cols);
- cr->sb_index = 0;
- cpi->refresh_golden_frame = 1;
-}
diff --git a/third_party/aom/av1/encoder/aq_cyclicrefresh.h b/third_party/aom/av1/encoder/aq_cyclicrefresh.h
deleted file mode 100644
index b45781983..000000000
--- a/third_party/aom/av1/encoder/aq_cyclicrefresh.h
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_AQ_CYCLICREFRESH_H_
-#define AOM_AV1_ENCODER_AQ_CYCLICREFRESH_H_
-
-#include "av1/common/blockd.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// The segment ids used in cyclic refresh: from base (no boost) to increasing
-// boost (higher delta-qp).
-#define CR_SEGMENT_ID_BASE 0
-#define CR_SEGMENT_ID_BOOST1 1
-#define CR_SEGMENT_ID_BOOST2 2
-
-// Maximum rate target ratio for setting segment delta-qp.
-#define CR_MAX_RATE_TARGET_RATIO 4.0
-
-struct AV1_COMP;
-
-struct CYCLIC_REFRESH;
-typedef struct CYCLIC_REFRESH CYCLIC_REFRESH;
-
-CYCLIC_REFRESH *av1_cyclic_refresh_alloc(int mi_rows, int mi_cols);
-
-void av1_cyclic_refresh_free(CYCLIC_REFRESH *cr);
-
-// Estimate the bits, incorporating the delta-q from segment 1, after encoding
-// the frame.
-int av1_cyclic_refresh_estimate_bits_at_q(const struct AV1_COMP *cpi,
- double correction_factor);
-
-// Estimate the bits per mb, for a given q = i and a corresponding delta-q
-// (for segment 1), prior to encoding the frame.
-int av1_cyclic_refresh_rc_bits_per_mb(const struct AV1_COMP *cpi, int i,
- double correction_factor);
-
-// Prior to coding a given prediction block, of size bsize at (mi_row, mi_col),
-// check if we should reset the segment_id, and update the cyclic_refresh map
-// and segmentation map.
-void av1_cyclic_refresh_update_segment(const struct AV1_COMP *cpi,
- MB_MODE_INFO *const mbmi, int mi_row,
- int mi_col, BLOCK_SIZE bsize,
- int64_t rate, int64_t dist, int skip);
-
-// Update the segmentation map, and related quantities: cyclic refresh map,
-// refresh sb_index, and target number of blocks to be refreshed.
-void av1_cyclic_refresh_update__map(struct AV1_COMP *const cpi);
-
-// Update the actual number of blocks that were applied the segment delta q.
-void av1_cyclic_refresh_postencode(struct AV1_COMP *const cpi);
-
-// Set golden frame update interval, for 1 pass CBR mode.
-void av1_cyclic_refresh_set_golden_update(struct AV1_COMP *const cpi);
-
-// Check if we should not update golden reference, based on past refresh stats.
-void av1_cyclic_refresh_check_golden_update(struct AV1_COMP *const cpi);
-
-// Set/update global/frame level refresh parameters.
-void av1_cyclic_refresh_update_parameters(struct AV1_COMP *const cpi);
-
-// Setup cyclic background refresh: set delta q and segmentation map.
-void av1_cyclic_refresh_setup(struct AV1_COMP *const cpi);
-
-int av1_cyclic_refresh_get_rdmult(const CYCLIC_REFRESH *cr);
-
-void av1_cyclic_refresh_reset_resize(struct AV1_COMP *const cpi);
-
-static INLINE int cyclic_refresh_segment_id_boosted(int segment_id) {
- return segment_id == CR_SEGMENT_ID_BOOST1 ||
- segment_id == CR_SEGMENT_ID_BOOST2;
-}
-
-static INLINE int cyclic_refresh_segment_id(int segment_id) {
- if (segment_id == CR_SEGMENT_ID_BOOST1)
- return CR_SEGMENT_ID_BOOST1;
- else if (segment_id == CR_SEGMENT_ID_BOOST2)
- return CR_SEGMENT_ID_BOOST2;
- else
- return CR_SEGMENT_ID_BASE;
-}
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_ENCODER_AQ_CYCLICREFRESH_H_
diff --git a/third_party/aom/av1/encoder/aq_variance.c b/third_party/aom/av1/encoder/aq_variance.c
deleted file mode 100644
index 58f906bdc..000000000
--- a/third_party/aom/av1/encoder/aq_variance.c
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <math.h>
-
-#include "aom_ports/mem.h"
-
-#include "av1/encoder/aq_variance.h"
-#include "av1/common/seg_common.h"
-#include "av1/encoder/encodeframe.h"
-#include "av1/encoder/ratectrl.h"
-#include "av1/encoder/rd.h"
-#include "av1/encoder/segmentation.h"
-#include "av1/encoder/dwt.h"
-#include "aom_ports/system_state.h"
-
-static const double rate_ratio[MAX_SEGMENTS] = { 2.2, 1.7, 1.3, 1.0,
- 0.9, .8, .7, .6 };
-
-static const double deltaq_rate_ratio[MAX_SEGMENTS] = { 2.5, 2.0, 1.5, 1.0,
- 0.75, 1.0, 1.0, 1.0 };
-#define ENERGY_MIN (-4)
-#define ENERGY_MAX (1)
-#define ENERGY_SPAN (ENERGY_MAX - ENERGY_MIN + 1)
-#define ENERGY_IN_BOUNDS(energy) \
- assert((energy) >= ENERGY_MIN && (energy) <= ENERGY_MAX)
-
-DECLARE_ALIGNED(16, static const uint8_t, av1_all_zeros[MAX_SB_SIZE]) = { 0 };
-
-DECLARE_ALIGNED(16, static const uint16_t,
- av1_highbd_all_zeros[MAX_SB_SIZE]) = { 0 };
-
-static const int segment_id[ENERGY_SPAN] = { 0, 1, 1, 2, 3, 4 };
-
-#define SEGMENT_ID(i) segment_id[(i)-ENERGY_MIN]
-
-void av1_vaq_frame_setup(AV1_COMP *cpi) {
- AV1_COMMON *cm = &cpi->common;
- struct segmentation *seg = &cm->seg;
- int i;
-
- int resolution_change =
- cm->prev_frame && (cm->width != cm->prev_frame->width ||
- cm->height != cm->prev_frame->height);
- int avg_energy = (int)(cpi->twopass.mb_av_energy - 2);
- double avg_ratio;
- if (avg_energy > 7) avg_energy = 7;
- if (avg_energy < 0) avg_energy = 0;
- avg_ratio = rate_ratio[avg_energy];
-
- if (resolution_change) {
- memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols);
- av1_clearall_segfeatures(seg);
- aom_clear_system_state();
- av1_disable_segmentation(seg);
- return;
- }
- if (frame_is_intra_only(cm) || cm->error_resilient_mode ||
- cpi->refresh_alt_ref_frame ||
- (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) {
- cpi->vaq_refresh = 1;
-
- av1_enable_segmentation(seg);
- av1_clearall_segfeatures(seg);
-
- aom_clear_system_state();
-
- for (i = 0; i < MAX_SEGMENTS; ++i) {
- // Set up avg segment id to be 1.0 and adjust the other segments around
- // it.
- int qindex_delta = av1_compute_qdelta_by_rate(
- &cpi->rc, cm->frame_type, cm->base_qindex, rate_ratio[i] / avg_ratio,
- cm->seq_params.bit_depth);
-
- // We don't allow qindex 0 in a segment if the base value is not 0.
- // Q index 0 (lossless) implies 4x4 encoding only and in AQ mode a segment
- // Q delta is sometimes applied without going back around the rd loop.
- // This could lead to an illegal combination of partition size and q.
- if ((cm->base_qindex != 0) && ((cm->base_qindex + qindex_delta) == 0)) {
- qindex_delta = -cm->base_qindex + 1;
- }
-
- av1_set_segdata(seg, i, SEG_LVL_ALT_Q, qindex_delta);
- av1_enable_segfeature(seg, i, SEG_LVL_ALT_Q);
- }
- }
-}
-
-int av1_log_block_var(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs) {
- // This functions returns a score for the blocks local variance as calculated
- // by: sum of the log of the (4x4 variances) of each subblock to the current
- // block (x,bs)
- // * 32 / number of pixels in the block_size.
- // This is used for segmentation because to avoid situations in which a large
- // block with a gentle gradient gets marked high variance even though each
- // subblock has a low variance. This allows us to assign the same segment
- // number for the same sorts of area regardless of how the partitioning goes.
-
- MACROBLOCKD *xd = &x->e_mbd;
- double var = 0;
- unsigned int sse;
- int i, j;
-
- int right_overflow =
- (xd->mb_to_right_edge < 0) ? ((-xd->mb_to_right_edge) >> 3) : 0;
- int bottom_overflow =
- (xd->mb_to_bottom_edge < 0) ? ((-xd->mb_to_bottom_edge) >> 3) : 0;
-
- const int bw = MI_SIZE * mi_size_wide[bs] - right_overflow;
- const int bh = MI_SIZE * mi_size_high[bs] - bottom_overflow;
-
- aom_clear_system_state();
-
- for (i = 0; i < bh; i += 4) {
- for (j = 0; j < bw; j += 4) {
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- var +=
- log(1.0 + cpi->fn_ptr[BLOCK_4X4].vf(
- x->plane[0].src.buf + i * x->plane[0].src.stride + j,
- x->plane[0].src.stride,
- CONVERT_TO_BYTEPTR(av1_highbd_all_zeros), 0, &sse) /
- 16);
- } else {
- var +=
- log(1.0 + cpi->fn_ptr[BLOCK_4X4].vf(
- x->plane[0].src.buf + i * x->plane[0].src.stride + j,
- x->plane[0].src.stride, av1_all_zeros, 0, &sse) /
- 16);
- }
- }
- }
- // Use average of 4x4 log variance. The range for 8 bit 0 - 9.704121561.
- var /= (bw / 4 * bh / 4);
- if (var > 7) var = 7;
-
- aom_clear_system_state();
- return (int)(var);
-}
-
-#define DEFAULT_E_MIDPOINT 10.0
-
-unsigned int haar_ac_energy(MACROBLOCK *x, BLOCK_SIZE bs) {
- MACROBLOCKD *xd = &x->e_mbd;
- int stride = x->plane[0].src.stride;
- uint8_t *buf = x->plane[0].src.buf;
- const int bw = MI_SIZE * mi_size_wide[bs];
- const int bh = MI_SIZE * mi_size_high[bs];
- int hbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH;
-
- int var = 0;
- for (int r = 0; r < bh; r += 8)
- for (int c = 0; c < bw; c += 8) {
- var += av1_haar_ac_sad_8x8_uint8_input(buf + c + r * stride, stride, hbd);
- }
-
- return (unsigned int)((uint64_t)var * 256) >> num_pels_log2_lookup[bs];
-}
-
-double av1_log_block_wavelet_energy(MACROBLOCK *x, BLOCK_SIZE bs) {
- unsigned int haar_sad = haar_ac_energy(x, bs);
- aom_clear_system_state();
- return log(haar_sad + 1.0);
-}
-
-int av1_block_wavelet_energy_level(const AV1_COMP *cpi, MACROBLOCK *x,
- BLOCK_SIZE bs) {
- double energy, energy_midpoint;
- aom_clear_system_state();
- energy_midpoint = (cpi->oxcf.pass == 2) ? cpi->twopass.frame_avg_haar_energy
- : DEFAULT_E_MIDPOINT;
- energy = av1_log_block_wavelet_energy(x, bs) - energy_midpoint;
- return clamp((int)round(energy), ENERGY_MIN, ENERGY_MAX);
-}
-
-int av1_compute_deltaq_from_energy_level(const AV1_COMP *const cpi,
- int block_var_level) {
- int rate_level;
- const AV1_COMMON *const cm = &cpi->common;
-
- if (DELTAQ_MODULATION == 1) {
- ENERGY_IN_BOUNDS(block_var_level);
- rate_level = SEGMENT_ID(block_var_level);
- } else {
- rate_level = block_var_level;
- }
- int qindex_delta = av1_compute_qdelta_by_rate(
- &cpi->rc, cm->frame_type, cm->base_qindex, deltaq_rate_ratio[rate_level],
- cm->seq_params.bit_depth);
-
- if ((cm->base_qindex != 0) && ((cm->base_qindex + qindex_delta) == 0)) {
- qindex_delta = -cm->base_qindex + 1;
- }
- return qindex_delta;
-}
diff --git a/third_party/aom/av1/encoder/aq_variance.h b/third_party/aom/av1/encoder/aq_variance.h
deleted file mode 100644
index 2d22b663e..000000000
--- a/third_party/aom/av1/encoder/aq_variance.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_AQ_VARIANCE_H_
-#define AOM_AV1_ENCODER_AQ_VARIANCE_H_
-
-#include "av1/encoder/encoder.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void av1_vaq_frame_setup(AV1_COMP *cpi);
-
-int av1_log_block_var(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs);
-int av1_compute_deltaq_from_energy_level(const AV1_COMP *const cpi,
- int block_var_level);
-int av1_block_wavelet_energy_level(const AV1_COMP *cpi, MACROBLOCK *x,
- BLOCK_SIZE bs);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_ENCODER_AQ_VARIANCE_H_
diff --git a/third_party/aom/av1/encoder/arm/neon/quantize_neon.c b/third_party/aom/av1/encoder/arm/neon/quantize_neon.c
deleted file mode 100644
index 36e7d3370..000000000
--- a/third_party/aom/av1/encoder/arm/neon/quantize_neon.c
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <arm_neon.h>
-
-#include <math.h>
-
-#include "aom_mem/aom_mem.h"
-
-#include "av1/common/quant_common.h"
-#include "av1/common/seg_common.h"
-
-#include "av1/encoder/av1_quantize.h"
-#include "av1/encoder/encoder.h"
-#include "av1/encoder/rd.h"
-
-void av1_quantize_fp_neon(const int16_t *coeff_ptr, intptr_t count,
- int skip_block, const int16_t *zbin_ptr,
- const int16_t *round_ptr, const int16_t *quant_ptr,
- const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr,
- int16_t *dqcoeff_ptr, const int16_t *dequant_ptr,
- uint16_t *eob_ptr, const int16_t *scan,
- const int16_t *iscan) {
- // TODO(jingning) Decide the need of these arguments after the
- // quantization process is completed.
- (void)zbin_ptr;
- (void)quant_shift_ptr;
- (void)scan;
-
- if (!skip_block) {
- // Quantization pass: All coefficients with index >= zero_flag are
- // skippable. Note: zero_flag can be zero.
- int i;
- const int16x8_t v_zero = vdupq_n_s16(0);
- const int16x8_t v_one = vdupq_n_s16(1);
- int16x8_t v_eobmax_76543210 = vdupq_n_s16(-1);
- int16x8_t v_round = vmovq_n_s16(round_ptr[1]);
- int16x8_t v_quant = vmovq_n_s16(quant_ptr[1]);
- int16x8_t v_dequant = vmovq_n_s16(dequant_ptr[1]);
- // adjust for dc
- v_round = vsetq_lane_s16(round_ptr[0], v_round, 0);
- v_quant = vsetq_lane_s16(quant_ptr[0], v_quant, 0);
- v_dequant = vsetq_lane_s16(dequant_ptr[0], v_dequant, 0);
- // process dc and the first seven ac coeffs
- {
- const int16x8_t v_iscan = vld1q_s16(&iscan[0]);
- const int16x8_t v_coeff = vld1q_s16(&coeff_ptr[0]);
- const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15);
- const int16x8_t v_tmp = vabaq_s16(v_round, v_coeff, v_zero);
- const int32x4_t v_tmp_lo =
- vmull_s16(vget_low_s16(v_tmp), vget_low_s16(v_quant));
- const int32x4_t v_tmp_hi =
- vmull_s16(vget_high_s16(v_tmp), vget_high_s16(v_quant));
- const int16x8_t v_tmp2 =
- vcombine_s16(vshrn_n_s32(v_tmp_lo, 16), vshrn_n_s32(v_tmp_hi, 16));
- const uint16x8_t v_nz_mask = vceqq_s16(v_tmp2, v_zero);
- const int16x8_t v_iscan_plus1 = vaddq_s16(v_iscan, v_one);
- const int16x8_t v_nz_iscan = vbslq_s16(v_nz_mask, v_zero, v_iscan_plus1);
- const int16x8_t v_qcoeff_a = veorq_s16(v_tmp2, v_coeff_sign);
- const int16x8_t v_qcoeff = vsubq_s16(v_qcoeff_a, v_coeff_sign);
- const int16x8_t v_dqcoeff = vmulq_s16(v_qcoeff, v_dequant);
- v_eobmax_76543210 = vmaxq_s16(v_eobmax_76543210, v_nz_iscan);
- vst1q_s16(&qcoeff_ptr[0], v_qcoeff);
- vst1q_s16(&dqcoeff_ptr[0], v_dqcoeff);
- v_round = vmovq_n_s16(round_ptr[1]);
- v_quant = vmovq_n_s16(quant_ptr[1]);
- v_dequant = vmovq_n_s16(dequant_ptr[1]);
- }
- // now process the rest of the ac coeffs
- for (i = 8; i < count; i += 8) {
- const int16x8_t v_iscan = vld1q_s16(&iscan[i]);
- const int16x8_t v_coeff = vld1q_s16(&coeff_ptr[i]);
- const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15);
- const int16x8_t v_tmp = vabaq_s16(v_round, v_coeff, v_zero);
- const int32x4_t v_tmp_lo =
- vmull_s16(vget_low_s16(v_tmp), vget_low_s16(v_quant));
- const int32x4_t v_tmp_hi =
- vmull_s16(vget_high_s16(v_tmp), vget_high_s16(v_quant));
- const int16x8_t v_tmp2 =
- vcombine_s16(vshrn_n_s32(v_tmp_lo, 16), vshrn_n_s32(v_tmp_hi, 16));
- const uint16x8_t v_nz_mask = vceqq_s16(v_tmp2, v_zero);
- const int16x8_t v_iscan_plus1 = vaddq_s16(v_iscan, v_one);
- const int16x8_t v_nz_iscan = vbslq_s16(v_nz_mask, v_zero, v_iscan_plus1);
- const int16x8_t v_qcoeff_a = veorq_s16(v_tmp2, v_coeff_sign);
- const int16x8_t v_qcoeff = vsubq_s16(v_qcoeff_a, v_coeff_sign);
- const int16x8_t v_dqcoeff = vmulq_s16(v_qcoeff, v_dequant);
- v_eobmax_76543210 = vmaxq_s16(v_eobmax_76543210, v_nz_iscan);
- vst1q_s16(&qcoeff_ptr[i], v_qcoeff);
- vst1q_s16(&dqcoeff_ptr[i], v_dqcoeff);
- }
- {
- const int16x4_t v_eobmax_3210 = vmax_s16(
- vget_low_s16(v_eobmax_76543210), vget_high_s16(v_eobmax_76543210));
- const int64x1_t v_eobmax_xx32 =
- vshr_n_s64(vreinterpret_s64_s16(v_eobmax_3210), 32);
- const int16x4_t v_eobmax_tmp =
- vmax_s16(v_eobmax_3210, vreinterpret_s16_s64(v_eobmax_xx32));
- const int64x1_t v_eobmax_xxx3 =
- vshr_n_s64(vreinterpret_s64_s16(v_eobmax_tmp), 16);
- const int16x4_t v_eobmax_final =
- vmax_s16(v_eobmax_tmp, vreinterpret_s16_s64(v_eobmax_xxx3));
-
- *eob_ptr = (uint16_t)vget_lane_s16(v_eobmax_final, 0);
- }
- } else {
- memset(qcoeff_ptr, 0, count * sizeof(int16_t));
- memset(dqcoeff_ptr, 0, count * sizeof(int16_t));
- *eob_ptr = 0;
- }
-}
diff --git a/third_party/aom/av1/encoder/av1_fwd_txfm1d.c b/third_party/aom/av1/encoder/av1_fwd_txfm1d.c
deleted file mode 100644
index 98505e0b1..000000000
--- a/third_party/aom/av1/encoder/av1_fwd_txfm1d.c
+++ /dev/null
@@ -1,1885 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <stdlib.h>
-#include "av1/encoder/av1_fwd_txfm1d.h"
-#include "av1/common/av1_txfm.h"
-
-void av1_fdct4_new(const int32_t *input, int32_t *output, int8_t cos_bit,
- const int8_t *stage_range) {
- const int32_t size = 4;
- const int32_t *cospi;
-
- int32_t stage = 0;
- int32_t *bf0, *bf1;
- int32_t step[4];
-
- // stage 0;
- av1_range_check_buf(stage, input, input, size, stage_range[stage]);
-
- // stage 1;
- stage++;
- bf1 = output;
- bf1[0] = input[0] + input[3];
- bf1[1] = input[1] + input[2];
- bf1[2] = -input[2] + input[1];
- bf1[3] = -input[3] + input[0];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 2
- stage++;
- cospi = cospi_arr(cos_bit);
- bf0 = output;
- bf1 = step;
- bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit);
- bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit);
- bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit);
- bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit);
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 3
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0];
- bf1[1] = bf0[2];
- bf1[2] = bf0[1];
- bf1[3] = bf0[3];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-}
-
-void av1_fdct8_new(const int32_t *input, int32_t *output, int8_t cos_bit,
- const int8_t *stage_range) {
- const int32_t size = 8;
- const int32_t *cospi;
-
- int32_t stage = 0;
- int32_t *bf0, *bf1;
- int32_t step[8];
-
- // stage 0;
- av1_range_check_buf(stage, input, input, size, stage_range[stage]);
-
- // stage 1;
- stage++;
- bf1 = output;
- bf1[0] = input[0] + input[7];
- bf1[1] = input[1] + input[6];
- bf1[2] = input[2] + input[5];
- bf1[3] = input[3] + input[4];
- bf1[4] = -input[4] + input[3];
- bf1[5] = -input[5] + input[2];
- bf1[6] = -input[6] + input[1];
- bf1[7] = -input[7] + input[0];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 2
- stage++;
- cospi = cospi_arr(cos_bit);
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0] + bf0[3];
- bf1[1] = bf0[1] + bf0[2];
- bf1[2] = -bf0[2] + bf0[1];
- bf1[3] = -bf0[3] + bf0[0];
- bf1[4] = bf0[4];
- bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
- bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit);
- bf1[7] = bf0[7];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 3
- stage++;
- cospi = cospi_arr(cos_bit);
- bf0 = step;
- bf1 = output;
- bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit);
- bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit);
- bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit);
- bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit);
- bf1[4] = bf0[4] + bf0[5];
- bf1[5] = -bf0[5] + bf0[4];
- bf1[6] = -bf0[6] + bf0[7];
- bf1[7] = bf0[7] + bf0[6];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 4
- stage++;
- cospi = cospi_arr(cos_bit);
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = bf0[2];
- bf1[3] = bf0[3];
- bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit);
- bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit);
- bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit);
- bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit);
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 5
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0];
- bf1[1] = bf0[4];
- bf1[2] = bf0[2];
- bf1[3] = bf0[6];
- bf1[4] = bf0[1];
- bf1[5] = bf0[5];
- bf1[6] = bf0[3];
- bf1[7] = bf0[7];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-}
-
-void av1_fdct16_new(const int32_t *input, int32_t *output, int8_t cos_bit,
- const int8_t *stage_range) {
- const int32_t size = 16;
- const int32_t *cospi;
-
- int32_t stage = 0;
- int32_t *bf0, *bf1;
- int32_t step[16];
-
- // stage 0;
- av1_range_check_buf(stage, input, input, size, stage_range[stage]);
-
- // stage 1;
- stage++;
- bf1 = output;
- bf1[0] = input[0] + input[15];
- bf1[1] = input[1] + input[14];
- bf1[2] = input[2] + input[13];
- bf1[3] = input[3] + input[12];
- bf1[4] = input[4] + input[11];
- bf1[5] = input[5] + input[10];
- bf1[6] = input[6] + input[9];
- bf1[7] = input[7] + input[8];
- bf1[8] = -input[8] + input[7];
- bf1[9] = -input[9] + input[6];
- bf1[10] = -input[10] + input[5];
- bf1[11] = -input[11] + input[4];
- bf1[12] = -input[12] + input[3];
- bf1[13] = -input[13] + input[2];
- bf1[14] = -input[14] + input[1];
- bf1[15] = -input[15] + input[0];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 2
- stage++;
- cospi = cospi_arr(cos_bit);
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0] + bf0[7];
- bf1[1] = bf0[1] + bf0[6];
- bf1[2] = bf0[2] + bf0[5];
- bf1[3] = bf0[3] + bf0[4];
- bf1[4] = -bf0[4] + bf0[3];
- bf1[5] = -bf0[5] + bf0[2];
- bf1[6] = -bf0[6] + bf0[1];
- bf1[7] = -bf0[7] + bf0[0];
- bf1[8] = bf0[8];
- bf1[9] = bf0[9];
- bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit);
- bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit);
- bf1[12] = half_btf(cospi[32], bf0[12], cospi[32], bf0[11], cos_bit);
- bf1[13] = half_btf(cospi[32], bf0[13], cospi[32], bf0[10], cos_bit);
- bf1[14] = bf0[14];
- bf1[15] = bf0[15];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 3
- stage++;
- cospi = cospi_arr(cos_bit);
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0] + bf0[3];
- bf1[1] = bf0[1] + bf0[2];
- bf1[2] = -bf0[2] + bf0[1];
- bf1[3] = -bf0[3] + bf0[0];
- bf1[4] = bf0[4];
- bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
- bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit);
- bf1[7] = bf0[7];
- bf1[8] = bf0[8] + bf0[11];
- bf1[9] = bf0[9] + bf0[10];
- bf1[10] = -bf0[10] + bf0[9];
- bf1[11] = -bf0[11] + bf0[8];
- bf1[12] = -bf0[12] + bf0[15];
- bf1[13] = -bf0[13] + bf0[14];
- bf1[14] = bf0[14] + bf0[13];
- bf1[15] = bf0[15] + bf0[12];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 4
- stage++;
- cospi = cospi_arr(cos_bit);
- bf0 = output;
- bf1 = step;
- bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit);
- bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit);
- bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit);
- bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit);
- bf1[4] = bf0[4] + bf0[5];
- bf1[5] = -bf0[5] + bf0[4];
- bf1[6] = -bf0[6] + bf0[7];
- bf1[7] = bf0[7] + bf0[6];
- bf1[8] = bf0[8];
- bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit);
- bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit);
- bf1[11] = bf0[11];
- bf1[12] = bf0[12];
- bf1[13] = half_btf(cospi[48], bf0[13], -cospi[16], bf0[10], cos_bit);
- bf1[14] = half_btf(cospi[16], bf0[14], cospi[48], bf0[9], cos_bit);
- bf1[15] = bf0[15];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 5
- stage++;
- cospi = cospi_arr(cos_bit);
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = bf0[2];
- bf1[3] = bf0[3];
- bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit);
- bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit);
- bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit);
- bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit);
- bf1[8] = bf0[8] + bf0[9];
- bf1[9] = -bf0[9] + bf0[8];
- bf1[10] = -bf0[10] + bf0[11];
- bf1[11] = bf0[11] + bf0[10];
- bf1[12] = bf0[12] + bf0[13];
- bf1[13] = -bf0[13] + bf0[12];
- bf1[14] = -bf0[14] + bf0[15];
- bf1[15] = bf0[15] + bf0[14];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 6
- stage++;
- cospi = cospi_arr(cos_bit);
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = bf0[2];
- bf1[3] = bf0[3];
- bf1[4] = bf0[4];
- bf1[5] = bf0[5];
- bf1[6] = bf0[6];
- bf1[7] = bf0[7];
- bf1[8] = half_btf(cospi[60], bf0[8], cospi[4], bf0[15], cos_bit);
- bf1[9] = half_btf(cospi[28], bf0[9], cospi[36], bf0[14], cos_bit);
- bf1[10] = half_btf(cospi[44], bf0[10], cospi[20], bf0[13], cos_bit);
- bf1[11] = half_btf(cospi[12], bf0[11], cospi[52], bf0[12], cos_bit);
- bf1[12] = half_btf(cospi[12], bf0[12], -cospi[52], bf0[11], cos_bit);
- bf1[13] = half_btf(cospi[44], bf0[13], -cospi[20], bf0[10], cos_bit);
- bf1[14] = half_btf(cospi[28], bf0[14], -cospi[36], bf0[9], cos_bit);
- bf1[15] = half_btf(cospi[60], bf0[15], -cospi[4], bf0[8], cos_bit);
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 7
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0];
- bf1[1] = bf0[8];
- bf1[2] = bf0[4];
- bf1[3] = bf0[12];
- bf1[4] = bf0[2];
- bf1[5] = bf0[10];
- bf1[6] = bf0[6];
- bf1[7] = bf0[14];
- bf1[8] = bf0[1];
- bf1[9] = bf0[9];
- bf1[10] = bf0[5];
- bf1[11] = bf0[13];
- bf1[12] = bf0[3];
- bf1[13] = bf0[11];
- bf1[14] = bf0[7];
- bf1[15] = bf0[15];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-}
-
-void av1_fdct32_new(const int32_t *input, int32_t *output, int8_t cos_bit,
- const int8_t *stage_range) {
- const int32_t size = 32;
- const int32_t *cospi;
-
- int32_t stage = 0;
- int32_t *bf0, *bf1;
- int32_t step[32];
-
- // stage 0;
- av1_range_check_buf(stage, input, input, size, stage_range[stage]);
-
- // stage 1;
- stage++;
- bf1 = output;
- bf1[0] = input[0] + input[31];
- bf1[1] = input[1] + input[30];
- bf1[2] = input[2] + input[29];
- bf1[3] = input[3] + input[28];
- bf1[4] = input[4] + input[27];
- bf1[5] = input[5] + input[26];
- bf1[6] = input[6] + input[25];
- bf1[7] = input[7] + input[24];
- bf1[8] = input[8] + input[23];
- bf1[9] = input[9] + input[22];
- bf1[10] = input[10] + input[21];
- bf1[11] = input[11] + input[20];
- bf1[12] = input[12] + input[19];
- bf1[13] = input[13] + input[18];
- bf1[14] = input[14] + input[17];
- bf1[15] = input[15] + input[16];
- bf1[16] = -input[16] + input[15];
- bf1[17] = -input[17] + input[14];
- bf1[18] = -input[18] + input[13];
- bf1[19] = -input[19] + input[12];
- bf1[20] = -input[20] + input[11];
- bf1[21] = -input[21] + input[10];
- bf1[22] = -input[22] + input[9];
- bf1[23] = -input[23] + input[8];
- bf1[24] = -input[24] + input[7];
- bf1[25] = -input[25] + input[6];
- bf1[26] = -input[26] + input[5];
- bf1[27] = -input[27] + input[4];
- bf1[28] = -input[28] + input[3];
- bf1[29] = -input[29] + input[2];
- bf1[30] = -input[30] + input[1];
- bf1[31] = -input[31] + input[0];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 2
- stage++;
- cospi = cospi_arr(cos_bit);
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0] + bf0[15];
- bf1[1] = bf0[1] + bf0[14];
- bf1[2] = bf0[2] + bf0[13];
- bf1[3] = bf0[3] + bf0[12];
- bf1[4] = bf0[4] + bf0[11];
- bf1[5] = bf0[5] + bf0[10];
- bf1[6] = bf0[6] + bf0[9];
- bf1[7] = bf0[7] + bf0[8];
- bf1[8] = -bf0[8] + bf0[7];
- bf1[9] = -bf0[9] + bf0[6];
- bf1[10] = -bf0[10] + bf0[5];
- bf1[11] = -bf0[11] + bf0[4];
- bf1[12] = -bf0[12] + bf0[3];
- bf1[13] = -bf0[13] + bf0[2];
- bf1[14] = -bf0[14] + bf0[1];
- bf1[15] = -bf0[15] + bf0[0];
- bf1[16] = bf0[16];
- bf1[17] = bf0[17];
- bf1[18] = bf0[18];
- bf1[19] = bf0[19];
- bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit);
- bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit);
- bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit);
- bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit);
- bf1[24] = half_btf(cospi[32], bf0[24], cospi[32], bf0[23], cos_bit);
- bf1[25] = half_btf(cospi[32], bf0[25], cospi[32], bf0[22], cos_bit);
- bf1[26] = half_btf(cospi[32], bf0[26], cospi[32], bf0[21], cos_bit);
- bf1[27] = half_btf(cospi[32], bf0[27], cospi[32], bf0[20], cos_bit);
- bf1[28] = bf0[28];
- bf1[29] = bf0[29];
- bf1[30] = bf0[30];
- bf1[31] = bf0[31];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 3
- stage++;
- cospi = cospi_arr(cos_bit);
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0] + bf0[7];
- bf1[1] = bf0[1] + bf0[6];
- bf1[2] = bf0[2] + bf0[5];
- bf1[3] = bf0[3] + bf0[4];
- bf1[4] = -bf0[4] + bf0[3];
- bf1[5] = -bf0[5] + bf0[2];
- bf1[6] = -bf0[6] + bf0[1];
- bf1[7] = -bf0[7] + bf0[0];
- bf1[8] = bf0[8];
- bf1[9] = bf0[9];
- bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit);
- bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit);
- bf1[12] = half_btf(cospi[32], bf0[12], cospi[32], bf0[11], cos_bit);
- bf1[13] = half_btf(cospi[32], bf0[13], cospi[32], bf0[10], cos_bit);
- bf1[14] = bf0[14];
- bf1[15] = bf0[15];
- bf1[16] = bf0[16] + bf0[23];
- bf1[17] = bf0[17] + bf0[22];
- bf1[18] = bf0[18] + bf0[21];
- bf1[19] = bf0[19] + bf0[20];
- bf1[20] = -bf0[20] + bf0[19];
- bf1[21] = -bf0[21] + bf0[18];
- bf1[22] = -bf0[22] + bf0[17];
- bf1[23] = -bf0[23] + bf0[16];
- bf1[24] = -bf0[24] + bf0[31];
- bf1[25] = -bf0[25] + bf0[30];
- bf1[26] = -bf0[26] + bf0[29];
- bf1[27] = -bf0[27] + bf0[28];
- bf1[28] = bf0[28] + bf0[27];
- bf1[29] = bf0[29] + bf0[26];
- bf1[30] = bf0[30] + bf0[25];
- bf1[31] = bf0[31] + bf0[24];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 4
- stage++;
- cospi = cospi_arr(cos_bit);
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0] + bf0[3];
- bf1[1] = bf0[1] + bf0[2];
- bf1[2] = -bf0[2] + bf0[1];
- bf1[3] = -bf0[3] + bf0[0];
- bf1[4] = bf0[4];
- bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
- bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit);
- bf1[7] = bf0[7];
- bf1[8] = bf0[8] + bf0[11];
- bf1[9] = bf0[9] + bf0[10];
- bf1[10] = -bf0[10] + bf0[9];
- bf1[11] = -bf0[11] + bf0[8];
- bf1[12] = -bf0[12] + bf0[15];
- bf1[13] = -bf0[13] + bf0[14];
- bf1[14] = bf0[14] + bf0[13];
- bf1[15] = bf0[15] + bf0[12];
- bf1[16] = bf0[16];
- bf1[17] = bf0[17];
- bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit);
- bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit);
- bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit);
- bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit);
- bf1[22] = bf0[22];
- bf1[23] = bf0[23];
- bf1[24] = bf0[24];
- bf1[25] = bf0[25];
- bf1[26] = half_btf(cospi[48], bf0[26], -cospi[16], bf0[21], cos_bit);
- bf1[27] = half_btf(cospi[48], bf0[27], -cospi[16], bf0[20], cos_bit);
- bf1[28] = half_btf(cospi[16], bf0[28], cospi[48], bf0[19], cos_bit);
- bf1[29] = half_btf(cospi[16], bf0[29], cospi[48], bf0[18], cos_bit);
- bf1[30] = bf0[30];
- bf1[31] = bf0[31];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 5
- stage++;
- cospi = cospi_arr(cos_bit);
- bf0 = step;
- bf1 = output;
- bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit);
- bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit);
- bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit);
- bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit);
- bf1[4] = bf0[4] + bf0[5];
- bf1[5] = -bf0[5] + bf0[4];
- bf1[6] = -bf0[6] + bf0[7];
- bf1[7] = bf0[7] + bf0[6];
- bf1[8] = bf0[8];
- bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit);
- bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit);
- bf1[11] = bf0[11];
- bf1[12] = bf0[12];
- bf1[13] = half_btf(cospi[48], bf0[13], -cospi[16], bf0[10], cos_bit);
- bf1[14] = half_btf(cospi[16], bf0[14], cospi[48], bf0[9], cos_bit);
- bf1[15] = bf0[15];
- bf1[16] = bf0[16] + bf0[19];
- bf1[17] = bf0[17] + bf0[18];
- bf1[18] = -bf0[18] + bf0[17];
- bf1[19] = -bf0[19] + bf0[16];
- bf1[20] = -bf0[20] + bf0[23];
- bf1[21] = -bf0[21] + bf0[22];
- bf1[22] = bf0[22] + bf0[21];
- bf1[23] = bf0[23] + bf0[20];
- bf1[24] = bf0[24] + bf0[27];
- bf1[25] = bf0[25] + bf0[26];
- bf1[26] = -bf0[26] + bf0[25];
- bf1[27] = -bf0[27] + bf0[24];
- bf1[28] = -bf0[28] + bf0[31];
- bf1[29] = -bf0[29] + bf0[30];
- bf1[30] = bf0[30] + bf0[29];
- bf1[31] = bf0[31] + bf0[28];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 6
- stage++;
- cospi = cospi_arr(cos_bit);
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = bf0[2];
- bf1[3] = bf0[3];
- bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit);
- bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit);
- bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit);
- bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit);
- bf1[8] = bf0[8] + bf0[9];
- bf1[9] = -bf0[9] + bf0[8];
- bf1[10] = -bf0[10] + bf0[11];
- bf1[11] = bf0[11] + bf0[10];
- bf1[12] = bf0[12] + bf0[13];
- bf1[13] = -bf0[13] + bf0[12];
- bf1[14] = -bf0[14] + bf0[15];
- bf1[15] = bf0[15] + bf0[14];
- bf1[16] = bf0[16];
- bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit);
- bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit);
- bf1[19] = bf0[19];
- bf1[20] = bf0[20];
- bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit);
- bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit);
- bf1[23] = bf0[23];
- bf1[24] = bf0[24];
- bf1[25] = half_btf(cospi[24], bf0[25], -cospi[40], bf0[22], cos_bit);
- bf1[26] = half_btf(cospi[40], bf0[26], cospi[24], bf0[21], cos_bit);
- bf1[27] = bf0[27];
- bf1[28] = bf0[28];
- bf1[29] = half_btf(cospi[56], bf0[29], -cospi[8], bf0[18], cos_bit);
- bf1[30] = half_btf(cospi[8], bf0[30], cospi[56], bf0[17], cos_bit);
- bf1[31] = bf0[31];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 7
- stage++;
- cospi = cospi_arr(cos_bit);
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = bf0[2];
- bf1[3] = bf0[3];
- bf1[4] = bf0[4];
- bf1[5] = bf0[5];
- bf1[6] = bf0[6];
- bf1[7] = bf0[7];
- bf1[8] = half_btf(cospi[60], bf0[8], cospi[4], bf0[15], cos_bit);
- bf1[9] = half_btf(cospi[28], bf0[9], cospi[36], bf0[14], cos_bit);
- bf1[10] = half_btf(cospi[44], bf0[10], cospi[20], bf0[13], cos_bit);
- bf1[11] = half_btf(cospi[12], bf0[11], cospi[52], bf0[12], cos_bit);
- bf1[12] = half_btf(cospi[12], bf0[12], -cospi[52], bf0[11], cos_bit);
- bf1[13] = half_btf(cospi[44], bf0[13], -cospi[20], bf0[10], cos_bit);
- bf1[14] = half_btf(cospi[28], bf0[14], -cospi[36], bf0[9], cos_bit);
- bf1[15] = half_btf(cospi[60], bf0[15], -cospi[4], bf0[8], cos_bit);
- bf1[16] = bf0[16] + bf0[17];
- bf1[17] = -bf0[17] + bf0[16];
- bf1[18] = -bf0[18] + bf0[19];
- bf1[19] = bf0[19] + bf0[18];
- bf1[20] = bf0[20] + bf0[21];
- bf1[21] = -bf0[21] + bf0[20];
- bf1[22] = -bf0[22] + bf0[23];
- bf1[23] = bf0[23] + bf0[22];
- bf1[24] = bf0[24] + bf0[25];
- bf1[25] = -bf0[25] + bf0[24];
- bf1[26] = -bf0[26] + bf0[27];
- bf1[27] = bf0[27] + bf0[26];
- bf1[28] = bf0[28] + bf0[29];
- bf1[29] = -bf0[29] + bf0[28];
- bf1[30] = -bf0[30] + bf0[31];
- bf1[31] = bf0[31] + bf0[30];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 8
- stage++;
- cospi = cospi_arr(cos_bit);
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = bf0[2];
- bf1[3] = bf0[3];
- bf1[4] = bf0[4];
- bf1[5] = bf0[5];
- bf1[6] = bf0[6];
- bf1[7] = bf0[7];
- bf1[8] = bf0[8];
- bf1[9] = bf0[9];
- bf1[10] = bf0[10];
- bf1[11] = bf0[11];
- bf1[12] = bf0[12];
- bf1[13] = bf0[13];
- bf1[14] = bf0[14];
- bf1[15] = bf0[15];
- bf1[16] = half_btf(cospi[62], bf0[16], cospi[2], bf0[31], cos_bit);
- bf1[17] = half_btf(cospi[30], bf0[17], cospi[34], bf0[30], cos_bit);
- bf1[18] = half_btf(cospi[46], bf0[18], cospi[18], bf0[29], cos_bit);
- bf1[19] = half_btf(cospi[14], bf0[19], cospi[50], bf0[28], cos_bit);
- bf1[20] = half_btf(cospi[54], bf0[20], cospi[10], bf0[27], cos_bit);
- bf1[21] = half_btf(cospi[22], bf0[21], cospi[42], bf0[26], cos_bit);
- bf1[22] = half_btf(cospi[38], bf0[22], cospi[26], bf0[25], cos_bit);
- bf1[23] = half_btf(cospi[6], bf0[23], cospi[58], bf0[24], cos_bit);
- bf1[24] = half_btf(cospi[6], bf0[24], -cospi[58], bf0[23], cos_bit);
- bf1[25] = half_btf(cospi[38], bf0[25], -cospi[26], bf0[22], cos_bit);
- bf1[26] = half_btf(cospi[22], bf0[26], -cospi[42], bf0[21], cos_bit);
- bf1[27] = half_btf(cospi[54], bf0[27], -cospi[10], bf0[20], cos_bit);
- bf1[28] = half_btf(cospi[14], bf0[28], -cospi[50], bf0[19], cos_bit);
- bf1[29] = half_btf(cospi[46], bf0[29], -cospi[18], bf0[18], cos_bit);
- bf1[30] = half_btf(cospi[30], bf0[30], -cospi[34], bf0[17], cos_bit);
- bf1[31] = half_btf(cospi[62], bf0[31], -cospi[2], bf0[16], cos_bit);
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 9
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0];
- bf1[1] = bf0[16];
- bf1[2] = bf0[8];
- bf1[3] = bf0[24];
- bf1[4] = bf0[4];
- bf1[5] = bf0[20];
- bf1[6] = bf0[12];
- bf1[7] = bf0[28];
- bf1[8] = bf0[2];
- bf1[9] = bf0[18];
- bf1[10] = bf0[10];
- bf1[11] = bf0[26];
- bf1[12] = bf0[6];
- bf1[13] = bf0[22];
- bf1[14] = bf0[14];
- bf1[15] = bf0[30];
- bf1[16] = bf0[1];
- bf1[17] = bf0[17];
- bf1[18] = bf0[9];
- bf1[19] = bf0[25];
- bf1[20] = bf0[5];
- bf1[21] = bf0[21];
- bf1[22] = bf0[13];
- bf1[23] = bf0[29];
- bf1[24] = bf0[3];
- bf1[25] = bf0[19];
- bf1[26] = bf0[11];
- bf1[27] = bf0[27];
- bf1[28] = bf0[7];
- bf1[29] = bf0[23];
- bf1[30] = bf0[15];
- bf1[31] = bf0[31];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-}
-
-void av1_fadst4_new(const int32_t *input, int32_t *output, int8_t cos_bit,
- const int8_t *stage_range) {
- int bit = cos_bit;
- const int32_t *sinpi = sinpi_arr(bit);
- int32_t x0, x1, x2, x3;
- int32_t s0, s1, s2, s3, s4, s5, s6, s7;
-
- // stage 0
- av1_range_check_buf(0, input, input, 4, stage_range[0]);
- x0 = input[0];
- x1 = input[1];
- x2 = input[2];
- x3 = input[3];
-
- if (!(x0 | x1 | x2 | x3)) {
- output[0] = output[1] = output[2] = output[3] = 0;
- return;
- }
-
- // stage 1
- s0 = range_check_value(sinpi[1] * x0, bit + stage_range[1]);
- s1 = range_check_value(sinpi[4] * x0, bit + stage_range[1]);
- s2 = range_check_value(sinpi[2] * x1, bit + stage_range[1]);
- s3 = range_check_value(sinpi[1] * x1, bit + stage_range[1]);
- s4 = range_check_value(sinpi[3] * x2, bit + stage_range[1]);
- s5 = range_check_value(sinpi[4] * x3, bit + stage_range[1]);
- s6 = range_check_value(sinpi[2] * x3, bit + stage_range[1]);
- s7 = range_check_value(x0 + x1, stage_range[1]);
-
- // stage 2
- s7 = range_check_value(s7 - x3, stage_range[2]);
-
- // stage 3
- x0 = range_check_value(s0 + s2, bit + stage_range[3]);
- x1 = range_check_value(sinpi[3] * s7, bit + stage_range[3]);
- x2 = range_check_value(s1 - s3, bit + stage_range[3]);
- x3 = range_check_value(s4, bit + stage_range[3]);
-
- // stage 4
- x0 = range_check_value(x0 + s5, bit + stage_range[4]);
- x2 = range_check_value(x2 + s6, bit + stage_range[4]);
-
- // stage 5
- s0 = range_check_value(x0 + x3, bit + stage_range[5]);
- s1 = range_check_value(x1, bit + stage_range[5]);
- s2 = range_check_value(x2 - x3, bit + stage_range[5]);
- s3 = range_check_value(x2 - x0, bit + stage_range[5]);
-
- // stage 6
- s3 = range_check_value(s3 + x3, bit + stage_range[6]);
-
- // 1-D transform scaling factor is sqrt(2).
- output[0] = round_shift(s0, bit);
- output[1] = round_shift(s1, bit);
- output[2] = round_shift(s2, bit);
- output[3] = round_shift(s3, bit);
- av1_range_check_buf(6, input, output, 4, stage_range[6]);
-}
-
-void av1_fadst8_new(const int32_t *input, int32_t *output, int8_t cos_bit,
- const int8_t *stage_range) {
- const int32_t size = 8;
- const int32_t *cospi;
-
- int32_t stage = 0;
- int32_t *bf0, *bf1;
- int32_t step[8];
-
- // stage 0;
- av1_range_check_buf(stage, input, input, size, stage_range[stage]);
-
- // stage 1;
- stage++;
- assert(output != input);
- bf1 = output;
- bf1[0] = input[0];
- bf1[1] = -input[7];
- bf1[2] = -input[3];
- bf1[3] = input[4];
- bf1[4] = -input[1];
- bf1[5] = input[6];
- bf1[6] = input[2];
- bf1[7] = -input[5];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 2
- stage++;
- cospi = cospi_arr(cos_bit);
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit);
- bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit);
- bf1[4] = bf0[4];
- bf1[5] = bf0[5];
- bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit);
- bf1[7] = half_btf(cospi[32], bf0[6], -cospi[32], bf0[7], cos_bit);
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 3
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0] + bf0[2];
- bf1[1] = bf0[1] + bf0[3];
- bf1[2] = bf0[0] - bf0[2];
- bf1[3] = bf0[1] - bf0[3];
- bf1[4] = bf0[4] + bf0[6];
- bf1[5] = bf0[5] + bf0[7];
- bf1[6] = bf0[4] - bf0[6];
- bf1[7] = bf0[5] - bf0[7];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 4
- stage++;
- cospi = cospi_arr(cos_bit);
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = bf0[2];
- bf1[3] = bf0[3];
- bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit);
- bf1[5] = half_btf(cospi[48], bf0[4], -cospi[16], bf0[5], cos_bit);
- bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit);
- bf1[7] = half_btf(cospi[16], bf0[6], cospi[48], bf0[7], cos_bit);
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 5
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0] + bf0[4];
- bf1[1] = bf0[1] + bf0[5];
- bf1[2] = bf0[2] + bf0[6];
- bf1[3] = bf0[3] + bf0[7];
- bf1[4] = bf0[0] - bf0[4];
- bf1[5] = bf0[1] - bf0[5];
- bf1[6] = bf0[2] - bf0[6];
- bf1[7] = bf0[3] - bf0[7];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 6
- stage++;
- cospi = cospi_arr(cos_bit);
- bf0 = output;
- bf1 = step;
- bf1[0] = half_btf(cospi[4], bf0[0], cospi[60], bf0[1], cos_bit);
- bf1[1] = half_btf(cospi[60], bf0[0], -cospi[4], bf0[1], cos_bit);
- bf1[2] = half_btf(cospi[20], bf0[2], cospi[44], bf0[3], cos_bit);
- bf1[3] = half_btf(cospi[44], bf0[2], -cospi[20], bf0[3], cos_bit);
- bf1[4] = half_btf(cospi[36], bf0[4], cospi[28], bf0[5], cos_bit);
- bf1[5] = half_btf(cospi[28], bf0[4], -cospi[36], bf0[5], cos_bit);
- bf1[6] = half_btf(cospi[52], bf0[6], cospi[12], bf0[7], cos_bit);
- bf1[7] = half_btf(cospi[12], bf0[6], -cospi[52], bf0[7], cos_bit);
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 7
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[1];
- bf1[1] = bf0[6];
- bf1[2] = bf0[3];
- bf1[3] = bf0[4];
- bf1[4] = bf0[5];
- bf1[5] = bf0[2];
- bf1[6] = bf0[7];
- bf1[7] = bf0[0];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-}
-
-void av1_fadst16_new(const int32_t *input, int32_t *output, int8_t cos_bit,
- const int8_t *stage_range) {
- const int32_t size = 16;
- const int32_t *cospi;
-
- int32_t stage = 0;
- int32_t *bf0, *bf1;
- int32_t step[16];
-
- // stage 0;
- av1_range_check_buf(stage, input, input, size, stage_range[stage]);
-
- // stage 1;
- stage++;
- assert(output != input);
- bf1 = output;
- bf1[0] = input[0];
- bf1[1] = -input[15];
- bf1[2] = -input[7];
- bf1[3] = input[8];
- bf1[4] = -input[3];
- bf1[5] = input[12];
- bf1[6] = input[4];
- bf1[7] = -input[11];
- bf1[8] = -input[1];
- bf1[9] = input[14];
- bf1[10] = input[6];
- bf1[11] = -input[9];
- bf1[12] = input[2];
- bf1[13] = -input[13];
- bf1[14] = -input[5];
- bf1[15] = input[10];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 2
- stage++;
- cospi = cospi_arr(cos_bit);
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit);
- bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit);
- bf1[4] = bf0[4];
- bf1[5] = bf0[5];
- bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit);
- bf1[7] = half_btf(cospi[32], bf0[6], -cospi[32], bf0[7], cos_bit);
- bf1[8] = bf0[8];
- bf1[9] = bf0[9];
- bf1[10] = half_btf(cospi[32], bf0[10], cospi[32], bf0[11], cos_bit);
- bf1[11] = half_btf(cospi[32], bf0[10], -cospi[32], bf0[11], cos_bit);
- bf1[12] = bf0[12];
- bf1[13] = bf0[13];
- bf1[14] = half_btf(cospi[32], bf0[14], cospi[32], bf0[15], cos_bit);
- bf1[15] = half_btf(cospi[32], bf0[14], -cospi[32], bf0[15], cos_bit);
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 3
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0] + bf0[2];
- bf1[1] = bf0[1] + bf0[3];
- bf1[2] = bf0[0] - bf0[2];
- bf1[3] = bf0[1] - bf0[3];
- bf1[4] = bf0[4] + bf0[6];
- bf1[5] = bf0[5] + bf0[7];
- bf1[6] = bf0[4] - bf0[6];
- bf1[7] = bf0[5] - bf0[7];
- bf1[8] = bf0[8] + bf0[10];
- bf1[9] = bf0[9] + bf0[11];
- bf1[10] = bf0[8] - bf0[10];
- bf1[11] = bf0[9] - bf0[11];
- bf1[12] = bf0[12] + bf0[14];
- bf1[13] = bf0[13] + bf0[15];
- bf1[14] = bf0[12] - bf0[14];
- bf1[15] = bf0[13] - bf0[15];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 4
- stage++;
- cospi = cospi_arr(cos_bit);
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = bf0[2];
- bf1[3] = bf0[3];
- bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit);
- bf1[5] = half_btf(cospi[48], bf0[4], -cospi[16], bf0[5], cos_bit);
- bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit);
- bf1[7] = half_btf(cospi[16], bf0[6], cospi[48], bf0[7], cos_bit);
- bf1[8] = bf0[8];
- bf1[9] = bf0[9];
- bf1[10] = bf0[10];
- bf1[11] = bf0[11];
- bf1[12] = half_btf(cospi[16], bf0[12], cospi[48], bf0[13], cos_bit);
- bf1[13] = half_btf(cospi[48], bf0[12], -cospi[16], bf0[13], cos_bit);
- bf1[14] = half_btf(-cospi[48], bf0[14], cospi[16], bf0[15], cos_bit);
- bf1[15] = half_btf(cospi[16], bf0[14], cospi[48], bf0[15], cos_bit);
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 5
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0] + bf0[4];
- bf1[1] = bf0[1] + bf0[5];
- bf1[2] = bf0[2] + bf0[6];
- bf1[3] = bf0[3] + bf0[7];
- bf1[4] = bf0[0] - bf0[4];
- bf1[5] = bf0[1] - bf0[5];
- bf1[6] = bf0[2] - bf0[6];
- bf1[7] = bf0[3] - bf0[7];
- bf1[8] = bf0[8] + bf0[12];
- bf1[9] = bf0[9] + bf0[13];
- bf1[10] = bf0[10] + bf0[14];
- bf1[11] = bf0[11] + bf0[15];
- bf1[12] = bf0[8] - bf0[12];
- bf1[13] = bf0[9] - bf0[13];
- bf1[14] = bf0[10] - bf0[14];
- bf1[15] = bf0[11] - bf0[15];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 6
- stage++;
- cospi = cospi_arr(cos_bit);
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = bf0[2];
- bf1[3] = bf0[3];
- bf1[4] = bf0[4];
- bf1[5] = bf0[5];
- bf1[6] = bf0[6];
- bf1[7] = bf0[7];
- bf1[8] = half_btf(cospi[8], bf0[8], cospi[56], bf0[9], cos_bit);
- bf1[9] = half_btf(cospi[56], bf0[8], -cospi[8], bf0[9], cos_bit);
- bf1[10] = half_btf(cospi[40], bf0[10], cospi[24], bf0[11], cos_bit);
- bf1[11] = half_btf(cospi[24], bf0[10], -cospi[40], bf0[11], cos_bit);
- bf1[12] = half_btf(-cospi[56], bf0[12], cospi[8], bf0[13], cos_bit);
- bf1[13] = half_btf(cospi[8], bf0[12], cospi[56], bf0[13], cos_bit);
- bf1[14] = half_btf(-cospi[24], bf0[14], cospi[40], bf0[15], cos_bit);
- bf1[15] = half_btf(cospi[40], bf0[14], cospi[24], bf0[15], cos_bit);
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 7
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0] + bf0[8];
- bf1[1] = bf0[1] + bf0[9];
- bf1[2] = bf0[2] + bf0[10];
- bf1[3] = bf0[3] + bf0[11];
- bf1[4] = bf0[4] + bf0[12];
- bf1[5] = bf0[5] + bf0[13];
- bf1[6] = bf0[6] + bf0[14];
- bf1[7] = bf0[7] + bf0[15];
- bf1[8] = bf0[0] - bf0[8];
- bf1[9] = bf0[1] - bf0[9];
- bf1[10] = bf0[2] - bf0[10];
- bf1[11] = bf0[3] - bf0[11];
- bf1[12] = bf0[4] - bf0[12];
- bf1[13] = bf0[5] - bf0[13];
- bf1[14] = bf0[6] - bf0[14];
- bf1[15] = bf0[7] - bf0[15];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 8
- stage++;
- cospi = cospi_arr(cos_bit);
- bf0 = output;
- bf1 = step;
- bf1[0] = half_btf(cospi[2], bf0[0], cospi[62], bf0[1], cos_bit);
- bf1[1] = half_btf(cospi[62], bf0[0], -cospi[2], bf0[1], cos_bit);
- bf1[2] = half_btf(cospi[10], bf0[2], cospi[54], bf0[3], cos_bit);
- bf1[3] = half_btf(cospi[54], bf0[2], -cospi[10], bf0[3], cos_bit);
- bf1[4] = half_btf(cospi[18], bf0[4], cospi[46], bf0[5], cos_bit);
- bf1[5] = half_btf(cospi[46], bf0[4], -cospi[18], bf0[5], cos_bit);
- bf1[6] = half_btf(cospi[26], bf0[6], cospi[38], bf0[7], cos_bit);
- bf1[7] = half_btf(cospi[38], bf0[6], -cospi[26], bf0[7], cos_bit);
- bf1[8] = half_btf(cospi[34], bf0[8], cospi[30], bf0[9], cos_bit);
- bf1[9] = half_btf(cospi[30], bf0[8], -cospi[34], bf0[9], cos_bit);
- bf1[10] = half_btf(cospi[42], bf0[10], cospi[22], bf0[11], cos_bit);
- bf1[11] = half_btf(cospi[22], bf0[10], -cospi[42], bf0[11], cos_bit);
- bf1[12] = half_btf(cospi[50], bf0[12], cospi[14], bf0[13], cos_bit);
- bf1[13] = half_btf(cospi[14], bf0[12], -cospi[50], bf0[13], cos_bit);
- bf1[14] = half_btf(cospi[58], bf0[14], cospi[6], bf0[15], cos_bit);
- bf1[15] = half_btf(cospi[6], bf0[14], -cospi[58], bf0[15], cos_bit);
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 9
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[1];
- bf1[1] = bf0[14];
- bf1[2] = bf0[3];
- bf1[3] = bf0[12];
- bf1[4] = bf0[5];
- bf1[5] = bf0[10];
- bf1[6] = bf0[7];
- bf1[7] = bf0[8];
- bf1[8] = bf0[9];
- bf1[9] = bf0[6];
- bf1[10] = bf0[11];
- bf1[11] = bf0[4];
- bf1[12] = bf0[13];
- bf1[13] = bf0[2];
- bf1[14] = bf0[15];
- bf1[15] = bf0[0];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-}
-
-void av1_fidentity4_c(const int32_t *input, int32_t *output, int8_t cos_bit,
- const int8_t *stage_range) {
- (void)cos_bit;
- for (int i = 0; i < 4; ++i)
- output[i] = round_shift((int64_t)input[i] * NewSqrt2, NewSqrt2Bits);
- assert(stage_range[0] + NewSqrt2Bits <= 32);
- av1_range_check_buf(0, input, output, 4, stage_range[0]);
-}
-
-void av1_fidentity8_c(const int32_t *input, int32_t *output, int8_t cos_bit,
- const int8_t *stage_range) {
- (void)cos_bit;
- for (int i = 0; i < 8; ++i) output[i] = input[i] * 2;
- av1_range_check_buf(0, input, output, 8, stage_range[0]);
-}
-
-void av1_fidentity16_c(const int32_t *input, int32_t *output, int8_t cos_bit,
- const int8_t *stage_range) {
- (void)cos_bit;
- for (int i = 0; i < 16; ++i)
- output[i] = round_shift((int64_t)input[i] * 2 * NewSqrt2, NewSqrt2Bits);
- assert(stage_range[0] + NewSqrt2Bits <= 32);
- av1_range_check_buf(0, input, output, 16, stage_range[0]);
-}
-
-void av1_fidentity32_c(const int32_t *input, int32_t *output, int8_t cos_bit,
- const int8_t *stage_range) {
- (void)cos_bit;
- for (int i = 0; i < 32; ++i) output[i] = input[i] * 4;
- av1_range_check_buf(0, input, output, 32, stage_range[0]);
-}
-
-void av1_fdct64_new(const int32_t *input, int32_t *output, int8_t cos_bit,
- const int8_t *stage_range) {
- const int32_t size = 64;
- const int32_t *cospi;
-
- int32_t stage = 0;
- int32_t *bf0, *bf1;
- int32_t step[64];
-
- // stage 0;
- av1_range_check_buf(stage, input, input, size, stage_range[stage]);
-
- // stage 1;
- stage++;
- bf1 = output;
- bf1[0] = input[0] + input[63];
- bf1[1] = input[1] + input[62];
- bf1[2] = input[2] + input[61];
- bf1[3] = input[3] + input[60];
- bf1[4] = input[4] + input[59];
- bf1[5] = input[5] + input[58];
- bf1[6] = input[6] + input[57];
- bf1[7] = input[7] + input[56];
- bf1[8] = input[8] + input[55];
- bf1[9] = input[9] + input[54];
- bf1[10] = input[10] + input[53];
- bf1[11] = input[11] + input[52];
- bf1[12] = input[12] + input[51];
- bf1[13] = input[13] + input[50];
- bf1[14] = input[14] + input[49];
- bf1[15] = input[15] + input[48];
- bf1[16] = input[16] + input[47];
- bf1[17] = input[17] + input[46];
- bf1[18] = input[18] + input[45];
- bf1[19] = input[19] + input[44];
- bf1[20] = input[20] + input[43];
- bf1[21] = input[21] + input[42];
- bf1[22] = input[22] + input[41];
- bf1[23] = input[23] + input[40];
- bf1[24] = input[24] + input[39];
- bf1[25] = input[25] + input[38];
- bf1[26] = input[26] + input[37];
- bf1[27] = input[27] + input[36];
- bf1[28] = input[28] + input[35];
- bf1[29] = input[29] + input[34];
- bf1[30] = input[30] + input[33];
- bf1[31] = input[31] + input[32];
- bf1[32] = -input[32] + input[31];
- bf1[33] = -input[33] + input[30];
- bf1[34] = -input[34] + input[29];
- bf1[35] = -input[35] + input[28];
- bf1[36] = -input[36] + input[27];
- bf1[37] = -input[37] + input[26];
- bf1[38] = -input[38] + input[25];
- bf1[39] = -input[39] + input[24];
- bf1[40] = -input[40] + input[23];
- bf1[41] = -input[41] + input[22];
- bf1[42] = -input[42] + input[21];
- bf1[43] = -input[43] + input[20];
- bf1[44] = -input[44] + input[19];
- bf1[45] = -input[45] + input[18];
- bf1[46] = -input[46] + input[17];
- bf1[47] = -input[47] + input[16];
- bf1[48] = -input[48] + input[15];
- bf1[49] = -input[49] + input[14];
- bf1[50] = -input[50] + input[13];
- bf1[51] = -input[51] + input[12];
- bf1[52] = -input[52] + input[11];
- bf1[53] = -input[53] + input[10];
- bf1[54] = -input[54] + input[9];
- bf1[55] = -input[55] + input[8];
- bf1[56] = -input[56] + input[7];
- bf1[57] = -input[57] + input[6];
- bf1[58] = -input[58] + input[5];
- bf1[59] = -input[59] + input[4];
- bf1[60] = -input[60] + input[3];
- bf1[61] = -input[61] + input[2];
- bf1[62] = -input[62] + input[1];
- bf1[63] = -input[63] + input[0];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 2
- stage++;
- cospi = cospi_arr(cos_bit);
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0] + bf0[31];
- bf1[1] = bf0[1] + bf0[30];
- bf1[2] = bf0[2] + bf0[29];
- bf1[3] = bf0[3] + bf0[28];
- bf1[4] = bf0[4] + bf0[27];
- bf1[5] = bf0[5] + bf0[26];
- bf1[6] = bf0[6] + bf0[25];
- bf1[7] = bf0[7] + bf0[24];
- bf1[8] = bf0[8] + bf0[23];
- bf1[9] = bf0[9] + bf0[22];
- bf1[10] = bf0[10] + bf0[21];
- bf1[11] = bf0[11] + bf0[20];
- bf1[12] = bf0[12] + bf0[19];
- bf1[13] = bf0[13] + bf0[18];
- bf1[14] = bf0[14] + bf0[17];
- bf1[15] = bf0[15] + bf0[16];
- bf1[16] = -bf0[16] + bf0[15];
- bf1[17] = -bf0[17] + bf0[14];
- bf1[18] = -bf0[18] + bf0[13];
- bf1[19] = -bf0[19] + bf0[12];
- bf1[20] = -bf0[20] + bf0[11];
- bf1[21] = -bf0[21] + bf0[10];
- bf1[22] = -bf0[22] + bf0[9];
- bf1[23] = -bf0[23] + bf0[8];
- bf1[24] = -bf0[24] + bf0[7];
- bf1[25] = -bf0[25] + bf0[6];
- bf1[26] = -bf0[26] + bf0[5];
- bf1[27] = -bf0[27] + bf0[4];
- bf1[28] = -bf0[28] + bf0[3];
- bf1[29] = -bf0[29] + bf0[2];
- bf1[30] = -bf0[30] + bf0[1];
- bf1[31] = -bf0[31] + bf0[0];
- bf1[32] = bf0[32];
- bf1[33] = bf0[33];
- bf1[34] = bf0[34];
- bf1[35] = bf0[35];
- bf1[36] = bf0[36];
- bf1[37] = bf0[37];
- bf1[38] = bf0[38];
- bf1[39] = bf0[39];
- bf1[40] = half_btf(-cospi[32], bf0[40], cospi[32], bf0[55], cos_bit);
- bf1[41] = half_btf(-cospi[32], bf0[41], cospi[32], bf0[54], cos_bit);
- bf1[42] = half_btf(-cospi[32], bf0[42], cospi[32], bf0[53], cos_bit);
- bf1[43] = half_btf(-cospi[32], bf0[43], cospi[32], bf0[52], cos_bit);
- bf1[44] = half_btf(-cospi[32], bf0[44], cospi[32], bf0[51], cos_bit);
- bf1[45] = half_btf(-cospi[32], bf0[45], cospi[32], bf0[50], cos_bit);
- bf1[46] = half_btf(-cospi[32], bf0[46], cospi[32], bf0[49], cos_bit);
- bf1[47] = half_btf(-cospi[32], bf0[47], cospi[32], bf0[48], cos_bit);
- bf1[48] = half_btf(cospi[32], bf0[48], cospi[32], bf0[47], cos_bit);
- bf1[49] = half_btf(cospi[32], bf0[49], cospi[32], bf0[46], cos_bit);
- bf1[50] = half_btf(cospi[32], bf0[50], cospi[32], bf0[45], cos_bit);
- bf1[51] = half_btf(cospi[32], bf0[51], cospi[32], bf0[44], cos_bit);
- bf1[52] = half_btf(cospi[32], bf0[52], cospi[32], bf0[43], cos_bit);
- bf1[53] = half_btf(cospi[32], bf0[53], cospi[32], bf0[42], cos_bit);
- bf1[54] = half_btf(cospi[32], bf0[54], cospi[32], bf0[41], cos_bit);
- bf1[55] = half_btf(cospi[32], bf0[55], cospi[32], bf0[40], cos_bit);
- bf1[56] = bf0[56];
- bf1[57] = bf0[57];
- bf1[58] = bf0[58];
- bf1[59] = bf0[59];
- bf1[60] = bf0[60];
- bf1[61] = bf0[61];
- bf1[62] = bf0[62];
- bf1[63] = bf0[63];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 3
- stage++;
- cospi = cospi_arr(cos_bit);
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0] + bf0[15];
- bf1[1] = bf0[1] + bf0[14];
- bf1[2] = bf0[2] + bf0[13];
- bf1[3] = bf0[3] + bf0[12];
- bf1[4] = bf0[4] + bf0[11];
- bf1[5] = bf0[5] + bf0[10];
- bf1[6] = bf0[6] + bf0[9];
- bf1[7] = bf0[7] + bf0[8];
- bf1[8] = -bf0[8] + bf0[7];
- bf1[9] = -bf0[9] + bf0[6];
- bf1[10] = -bf0[10] + bf0[5];
- bf1[11] = -bf0[11] + bf0[4];
- bf1[12] = -bf0[12] + bf0[3];
- bf1[13] = -bf0[13] + bf0[2];
- bf1[14] = -bf0[14] + bf0[1];
- bf1[15] = -bf0[15] + bf0[0];
- bf1[16] = bf0[16];
- bf1[17] = bf0[17];
- bf1[18] = bf0[18];
- bf1[19] = bf0[19];
- bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit);
- bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit);
- bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit);
- bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit);
- bf1[24] = half_btf(cospi[32], bf0[24], cospi[32], bf0[23], cos_bit);
- bf1[25] = half_btf(cospi[32], bf0[25], cospi[32], bf0[22], cos_bit);
- bf1[26] = half_btf(cospi[32], bf0[26], cospi[32], bf0[21], cos_bit);
- bf1[27] = half_btf(cospi[32], bf0[27], cospi[32], bf0[20], cos_bit);
- bf1[28] = bf0[28];
- bf1[29] = bf0[29];
- bf1[30] = bf0[30];
- bf1[31] = bf0[31];
- bf1[32] = bf0[32] + bf0[47];
- bf1[33] = bf0[33] + bf0[46];
- bf1[34] = bf0[34] + bf0[45];
- bf1[35] = bf0[35] + bf0[44];
- bf1[36] = bf0[36] + bf0[43];
- bf1[37] = bf0[37] + bf0[42];
- bf1[38] = bf0[38] + bf0[41];
- bf1[39] = bf0[39] + bf0[40];
- bf1[40] = -bf0[40] + bf0[39];
- bf1[41] = -bf0[41] + bf0[38];
- bf1[42] = -bf0[42] + bf0[37];
- bf1[43] = -bf0[43] + bf0[36];
- bf1[44] = -bf0[44] + bf0[35];
- bf1[45] = -bf0[45] + bf0[34];
- bf1[46] = -bf0[46] + bf0[33];
- bf1[47] = -bf0[47] + bf0[32];
- bf1[48] = -bf0[48] + bf0[63];
- bf1[49] = -bf0[49] + bf0[62];
- bf1[50] = -bf0[50] + bf0[61];
- bf1[51] = -bf0[51] + bf0[60];
- bf1[52] = -bf0[52] + bf0[59];
- bf1[53] = -bf0[53] + bf0[58];
- bf1[54] = -bf0[54] + bf0[57];
- bf1[55] = -bf0[55] + bf0[56];
- bf1[56] = bf0[56] + bf0[55];
- bf1[57] = bf0[57] + bf0[54];
- bf1[58] = bf0[58] + bf0[53];
- bf1[59] = bf0[59] + bf0[52];
- bf1[60] = bf0[60] + bf0[51];
- bf1[61] = bf0[61] + bf0[50];
- bf1[62] = bf0[62] + bf0[49];
- bf1[63] = bf0[63] + bf0[48];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 4
- stage++;
- cospi = cospi_arr(cos_bit);
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0] + bf0[7];
- bf1[1] = bf0[1] + bf0[6];
- bf1[2] = bf0[2] + bf0[5];
- bf1[3] = bf0[3] + bf0[4];
- bf1[4] = -bf0[4] + bf0[3];
- bf1[5] = -bf0[5] + bf0[2];
- bf1[6] = -bf0[6] + bf0[1];
- bf1[7] = -bf0[7] + bf0[0];
- bf1[8] = bf0[8];
- bf1[9] = bf0[9];
- bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit);
- bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit);
- bf1[12] = half_btf(cospi[32], bf0[12], cospi[32], bf0[11], cos_bit);
- bf1[13] = half_btf(cospi[32], bf0[13], cospi[32], bf0[10], cos_bit);
- bf1[14] = bf0[14];
- bf1[15] = bf0[15];
- bf1[16] = bf0[16] + bf0[23];
- bf1[17] = bf0[17] + bf0[22];
- bf1[18] = bf0[18] + bf0[21];
- bf1[19] = bf0[19] + bf0[20];
- bf1[20] = -bf0[20] + bf0[19];
- bf1[21] = -bf0[21] + bf0[18];
- bf1[22] = -bf0[22] + bf0[17];
- bf1[23] = -bf0[23] + bf0[16];
- bf1[24] = -bf0[24] + bf0[31];
- bf1[25] = -bf0[25] + bf0[30];
- bf1[26] = -bf0[26] + bf0[29];
- bf1[27] = -bf0[27] + bf0[28];
- bf1[28] = bf0[28] + bf0[27];
- bf1[29] = bf0[29] + bf0[26];
- bf1[30] = bf0[30] + bf0[25];
- bf1[31] = bf0[31] + bf0[24];
- bf1[32] = bf0[32];
- bf1[33] = bf0[33];
- bf1[34] = bf0[34];
- bf1[35] = bf0[35];
- bf1[36] = half_btf(-cospi[16], bf0[36], cospi[48], bf0[59], cos_bit);
- bf1[37] = half_btf(-cospi[16], bf0[37], cospi[48], bf0[58], cos_bit);
- bf1[38] = half_btf(-cospi[16], bf0[38], cospi[48], bf0[57], cos_bit);
- bf1[39] = half_btf(-cospi[16], bf0[39], cospi[48], bf0[56], cos_bit);
- bf1[40] = half_btf(-cospi[48], bf0[40], -cospi[16], bf0[55], cos_bit);
- bf1[41] = half_btf(-cospi[48], bf0[41], -cospi[16], bf0[54], cos_bit);
- bf1[42] = half_btf(-cospi[48], bf0[42], -cospi[16], bf0[53], cos_bit);
- bf1[43] = half_btf(-cospi[48], bf0[43], -cospi[16], bf0[52], cos_bit);
- bf1[44] = bf0[44];
- bf1[45] = bf0[45];
- bf1[46] = bf0[46];
- bf1[47] = bf0[47];
- bf1[48] = bf0[48];
- bf1[49] = bf0[49];
- bf1[50] = bf0[50];
- bf1[51] = bf0[51];
- bf1[52] = half_btf(cospi[48], bf0[52], -cospi[16], bf0[43], cos_bit);
- bf1[53] = half_btf(cospi[48], bf0[53], -cospi[16], bf0[42], cos_bit);
- bf1[54] = half_btf(cospi[48], bf0[54], -cospi[16], bf0[41], cos_bit);
- bf1[55] = half_btf(cospi[48], bf0[55], -cospi[16], bf0[40], cos_bit);
- bf1[56] = half_btf(cospi[16], bf0[56], cospi[48], bf0[39], cos_bit);
- bf1[57] = half_btf(cospi[16], bf0[57], cospi[48], bf0[38], cos_bit);
- bf1[58] = half_btf(cospi[16], bf0[58], cospi[48], bf0[37], cos_bit);
- bf1[59] = half_btf(cospi[16], bf0[59], cospi[48], bf0[36], cos_bit);
- bf1[60] = bf0[60];
- bf1[61] = bf0[61];
- bf1[62] = bf0[62];
- bf1[63] = bf0[63];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 5
- stage++;
- cospi = cospi_arr(cos_bit);
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0] + bf0[3];
- bf1[1] = bf0[1] + bf0[2];
- bf1[2] = -bf0[2] + bf0[1];
- bf1[3] = -bf0[3] + bf0[0];
- bf1[4] = bf0[4];
- bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
- bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit);
- bf1[7] = bf0[7];
- bf1[8] = bf0[8] + bf0[11];
- bf1[9] = bf0[9] + bf0[10];
- bf1[10] = -bf0[10] + bf0[9];
- bf1[11] = -bf0[11] + bf0[8];
- bf1[12] = -bf0[12] + bf0[15];
- bf1[13] = -bf0[13] + bf0[14];
- bf1[14] = bf0[14] + bf0[13];
- bf1[15] = bf0[15] + bf0[12];
- bf1[16] = bf0[16];
- bf1[17] = bf0[17];
- bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit);
- bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit);
- bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit);
- bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit);
- bf1[22] = bf0[22];
- bf1[23] = bf0[23];
- bf1[24] = bf0[24];
- bf1[25] = bf0[25];
- bf1[26] = half_btf(cospi[48], bf0[26], -cospi[16], bf0[21], cos_bit);
- bf1[27] = half_btf(cospi[48], bf0[27], -cospi[16], bf0[20], cos_bit);
- bf1[28] = half_btf(cospi[16], bf0[28], cospi[48], bf0[19], cos_bit);
- bf1[29] = half_btf(cospi[16], bf0[29], cospi[48], bf0[18], cos_bit);
- bf1[30] = bf0[30];
- bf1[31] = bf0[31];
- bf1[32] = bf0[32] + bf0[39];
- bf1[33] = bf0[33] + bf0[38];
- bf1[34] = bf0[34] + bf0[37];
- bf1[35] = bf0[35] + bf0[36];
- bf1[36] = -bf0[36] + bf0[35];
- bf1[37] = -bf0[37] + bf0[34];
- bf1[38] = -bf0[38] + bf0[33];
- bf1[39] = -bf0[39] + bf0[32];
- bf1[40] = -bf0[40] + bf0[47];
- bf1[41] = -bf0[41] + bf0[46];
- bf1[42] = -bf0[42] + bf0[45];
- bf1[43] = -bf0[43] + bf0[44];
- bf1[44] = bf0[44] + bf0[43];
- bf1[45] = bf0[45] + bf0[42];
- bf1[46] = bf0[46] + bf0[41];
- bf1[47] = bf0[47] + bf0[40];
- bf1[48] = bf0[48] + bf0[55];
- bf1[49] = bf0[49] + bf0[54];
- bf1[50] = bf0[50] + bf0[53];
- bf1[51] = bf0[51] + bf0[52];
- bf1[52] = -bf0[52] + bf0[51];
- bf1[53] = -bf0[53] + bf0[50];
- bf1[54] = -bf0[54] + bf0[49];
- bf1[55] = -bf0[55] + bf0[48];
- bf1[56] = -bf0[56] + bf0[63];
- bf1[57] = -bf0[57] + bf0[62];
- bf1[58] = -bf0[58] + bf0[61];
- bf1[59] = -bf0[59] + bf0[60];
- bf1[60] = bf0[60] + bf0[59];
- bf1[61] = bf0[61] + bf0[58];
- bf1[62] = bf0[62] + bf0[57];
- bf1[63] = bf0[63] + bf0[56];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 6
- stage++;
- cospi = cospi_arr(cos_bit);
- bf0 = output;
- bf1 = step;
- bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit);
- bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit);
- bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit);
- bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit);
- bf1[4] = bf0[4] + bf0[5];
- bf1[5] = -bf0[5] + bf0[4];
- bf1[6] = -bf0[6] + bf0[7];
- bf1[7] = bf0[7] + bf0[6];
- bf1[8] = bf0[8];
- bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit);
- bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit);
- bf1[11] = bf0[11];
- bf1[12] = bf0[12];
- bf1[13] = half_btf(cospi[48], bf0[13], -cospi[16], bf0[10], cos_bit);
- bf1[14] = half_btf(cospi[16], bf0[14], cospi[48], bf0[9], cos_bit);
- bf1[15] = bf0[15];
- bf1[16] = bf0[16] + bf0[19];
- bf1[17] = bf0[17] + bf0[18];
- bf1[18] = -bf0[18] + bf0[17];
- bf1[19] = -bf0[19] + bf0[16];
- bf1[20] = -bf0[20] + bf0[23];
- bf1[21] = -bf0[21] + bf0[22];
- bf1[22] = bf0[22] + bf0[21];
- bf1[23] = bf0[23] + bf0[20];
- bf1[24] = bf0[24] + bf0[27];
- bf1[25] = bf0[25] + bf0[26];
- bf1[26] = -bf0[26] + bf0[25];
- bf1[27] = -bf0[27] + bf0[24];
- bf1[28] = -bf0[28] + bf0[31];
- bf1[29] = -bf0[29] + bf0[30];
- bf1[30] = bf0[30] + bf0[29];
- bf1[31] = bf0[31] + bf0[28];
- bf1[32] = bf0[32];
- bf1[33] = bf0[33];
- bf1[34] = half_btf(-cospi[8], bf0[34], cospi[56], bf0[61], cos_bit);
- bf1[35] = half_btf(-cospi[8], bf0[35], cospi[56], bf0[60], cos_bit);
- bf1[36] = half_btf(-cospi[56], bf0[36], -cospi[8], bf0[59], cos_bit);
- bf1[37] = half_btf(-cospi[56], bf0[37], -cospi[8], bf0[58], cos_bit);
- bf1[38] = bf0[38];
- bf1[39] = bf0[39];
- bf1[40] = bf0[40];
- bf1[41] = bf0[41];
- bf1[42] = half_btf(-cospi[40], bf0[42], cospi[24], bf0[53], cos_bit);
- bf1[43] = half_btf(-cospi[40], bf0[43], cospi[24], bf0[52], cos_bit);
- bf1[44] = half_btf(-cospi[24], bf0[44], -cospi[40], bf0[51], cos_bit);
- bf1[45] = half_btf(-cospi[24], bf0[45], -cospi[40], bf0[50], cos_bit);
- bf1[46] = bf0[46];
- bf1[47] = bf0[47];
- bf1[48] = bf0[48];
- bf1[49] = bf0[49];
- bf1[50] = half_btf(cospi[24], bf0[50], -cospi[40], bf0[45], cos_bit);
- bf1[51] = half_btf(cospi[24], bf0[51], -cospi[40], bf0[44], cos_bit);
- bf1[52] = half_btf(cospi[40], bf0[52], cospi[24], bf0[43], cos_bit);
- bf1[53] = half_btf(cospi[40], bf0[53], cospi[24], bf0[42], cos_bit);
- bf1[54] = bf0[54];
- bf1[55] = bf0[55];
- bf1[56] = bf0[56];
- bf1[57] = bf0[57];
- bf1[58] = half_btf(cospi[56], bf0[58], -cospi[8], bf0[37], cos_bit);
- bf1[59] = half_btf(cospi[56], bf0[59], -cospi[8], bf0[36], cos_bit);
- bf1[60] = half_btf(cospi[8], bf0[60], cospi[56], bf0[35], cos_bit);
- bf1[61] = half_btf(cospi[8], bf0[61], cospi[56], bf0[34], cos_bit);
- bf1[62] = bf0[62];
- bf1[63] = bf0[63];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 7
- stage++;
- cospi = cospi_arr(cos_bit);
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = bf0[2];
- bf1[3] = bf0[3];
- bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit);
- bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit);
- bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit);
- bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit);
- bf1[8] = bf0[8] + bf0[9];
- bf1[9] = -bf0[9] + bf0[8];
- bf1[10] = -bf0[10] + bf0[11];
- bf1[11] = bf0[11] + bf0[10];
- bf1[12] = bf0[12] + bf0[13];
- bf1[13] = -bf0[13] + bf0[12];
- bf1[14] = -bf0[14] + bf0[15];
- bf1[15] = bf0[15] + bf0[14];
- bf1[16] = bf0[16];
- bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit);
- bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit);
- bf1[19] = bf0[19];
- bf1[20] = bf0[20];
- bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit);
- bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit);
- bf1[23] = bf0[23];
- bf1[24] = bf0[24];
- bf1[25] = half_btf(cospi[24], bf0[25], -cospi[40], bf0[22], cos_bit);
- bf1[26] = half_btf(cospi[40], bf0[26], cospi[24], bf0[21], cos_bit);
- bf1[27] = bf0[27];
- bf1[28] = bf0[28];
- bf1[29] = half_btf(cospi[56], bf0[29], -cospi[8], bf0[18], cos_bit);
- bf1[30] = half_btf(cospi[8], bf0[30], cospi[56], bf0[17], cos_bit);
- bf1[31] = bf0[31];
- bf1[32] = bf0[32] + bf0[35];
- bf1[33] = bf0[33] + bf0[34];
- bf1[34] = -bf0[34] + bf0[33];
- bf1[35] = -bf0[35] + bf0[32];
- bf1[36] = -bf0[36] + bf0[39];
- bf1[37] = -bf0[37] + bf0[38];
- bf1[38] = bf0[38] + bf0[37];
- bf1[39] = bf0[39] + bf0[36];
- bf1[40] = bf0[40] + bf0[43];
- bf1[41] = bf0[41] + bf0[42];
- bf1[42] = -bf0[42] + bf0[41];
- bf1[43] = -bf0[43] + bf0[40];
- bf1[44] = -bf0[44] + bf0[47];
- bf1[45] = -bf0[45] + bf0[46];
- bf1[46] = bf0[46] + bf0[45];
- bf1[47] = bf0[47] + bf0[44];
- bf1[48] = bf0[48] + bf0[51];
- bf1[49] = bf0[49] + bf0[50];
- bf1[50] = -bf0[50] + bf0[49];
- bf1[51] = -bf0[51] + bf0[48];
- bf1[52] = -bf0[52] + bf0[55];
- bf1[53] = -bf0[53] + bf0[54];
- bf1[54] = bf0[54] + bf0[53];
- bf1[55] = bf0[55] + bf0[52];
- bf1[56] = bf0[56] + bf0[59];
- bf1[57] = bf0[57] + bf0[58];
- bf1[58] = -bf0[58] + bf0[57];
- bf1[59] = -bf0[59] + bf0[56];
- bf1[60] = -bf0[60] + bf0[63];
- bf1[61] = -bf0[61] + bf0[62];
- bf1[62] = bf0[62] + bf0[61];
- bf1[63] = bf0[63] + bf0[60];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 8
- stage++;
- cospi = cospi_arr(cos_bit);
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = bf0[2];
- bf1[3] = bf0[3];
- bf1[4] = bf0[4];
- bf1[5] = bf0[5];
- bf1[6] = bf0[6];
- bf1[7] = bf0[7];
- bf1[8] = half_btf(cospi[60], bf0[8], cospi[4], bf0[15], cos_bit);
- bf1[9] = half_btf(cospi[28], bf0[9], cospi[36], bf0[14], cos_bit);
- bf1[10] = half_btf(cospi[44], bf0[10], cospi[20], bf0[13], cos_bit);
- bf1[11] = half_btf(cospi[12], bf0[11], cospi[52], bf0[12], cos_bit);
- bf1[12] = half_btf(cospi[12], bf0[12], -cospi[52], bf0[11], cos_bit);
- bf1[13] = half_btf(cospi[44], bf0[13], -cospi[20], bf0[10], cos_bit);
- bf1[14] = half_btf(cospi[28], bf0[14], -cospi[36], bf0[9], cos_bit);
- bf1[15] = half_btf(cospi[60], bf0[15], -cospi[4], bf0[8], cos_bit);
- bf1[16] = bf0[16] + bf0[17];
- bf1[17] = -bf0[17] + bf0[16];
- bf1[18] = -bf0[18] + bf0[19];
- bf1[19] = bf0[19] + bf0[18];
- bf1[20] = bf0[20] + bf0[21];
- bf1[21] = -bf0[21] + bf0[20];
- bf1[22] = -bf0[22] + bf0[23];
- bf1[23] = bf0[23] + bf0[22];
- bf1[24] = bf0[24] + bf0[25];
- bf1[25] = -bf0[25] + bf0[24];
- bf1[26] = -bf0[26] + bf0[27];
- bf1[27] = bf0[27] + bf0[26];
- bf1[28] = bf0[28] + bf0[29];
- bf1[29] = -bf0[29] + bf0[28];
- bf1[30] = -bf0[30] + bf0[31];
- bf1[31] = bf0[31] + bf0[30];
- bf1[32] = bf0[32];
- bf1[33] = half_btf(-cospi[4], bf0[33], cospi[60], bf0[62], cos_bit);
- bf1[34] = half_btf(-cospi[60], bf0[34], -cospi[4], bf0[61], cos_bit);
- bf1[35] = bf0[35];
- bf1[36] = bf0[36];
- bf1[37] = half_btf(-cospi[36], bf0[37], cospi[28], bf0[58], cos_bit);
- bf1[38] = half_btf(-cospi[28], bf0[38], -cospi[36], bf0[57], cos_bit);
- bf1[39] = bf0[39];
- bf1[40] = bf0[40];
- bf1[41] = half_btf(-cospi[20], bf0[41], cospi[44], bf0[54], cos_bit);
- bf1[42] = half_btf(-cospi[44], bf0[42], -cospi[20], bf0[53], cos_bit);
- bf1[43] = bf0[43];
- bf1[44] = bf0[44];
- bf1[45] = half_btf(-cospi[52], bf0[45], cospi[12], bf0[50], cos_bit);
- bf1[46] = half_btf(-cospi[12], bf0[46], -cospi[52], bf0[49], cos_bit);
- bf1[47] = bf0[47];
- bf1[48] = bf0[48];
- bf1[49] = half_btf(cospi[12], bf0[49], -cospi[52], bf0[46], cos_bit);
- bf1[50] = half_btf(cospi[52], bf0[50], cospi[12], bf0[45], cos_bit);
- bf1[51] = bf0[51];
- bf1[52] = bf0[52];
- bf1[53] = half_btf(cospi[44], bf0[53], -cospi[20], bf0[42], cos_bit);
- bf1[54] = half_btf(cospi[20], bf0[54], cospi[44], bf0[41], cos_bit);
- bf1[55] = bf0[55];
- bf1[56] = bf0[56];
- bf1[57] = half_btf(cospi[28], bf0[57], -cospi[36], bf0[38], cos_bit);
- bf1[58] = half_btf(cospi[36], bf0[58], cospi[28], bf0[37], cos_bit);
- bf1[59] = bf0[59];
- bf1[60] = bf0[60];
- bf1[61] = half_btf(cospi[60], bf0[61], -cospi[4], bf0[34], cos_bit);
- bf1[62] = half_btf(cospi[4], bf0[62], cospi[60], bf0[33], cos_bit);
- bf1[63] = bf0[63];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 9
- stage++;
- cospi = cospi_arr(cos_bit);
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = bf0[2];
- bf1[3] = bf0[3];
- bf1[4] = bf0[4];
- bf1[5] = bf0[5];
- bf1[6] = bf0[6];
- bf1[7] = bf0[7];
- bf1[8] = bf0[8];
- bf1[9] = bf0[9];
- bf1[10] = bf0[10];
- bf1[11] = bf0[11];
- bf1[12] = bf0[12];
- bf1[13] = bf0[13];
- bf1[14] = bf0[14];
- bf1[15] = bf0[15];
- bf1[16] = half_btf(cospi[62], bf0[16], cospi[2], bf0[31], cos_bit);
- bf1[17] = half_btf(cospi[30], bf0[17], cospi[34], bf0[30], cos_bit);
- bf1[18] = half_btf(cospi[46], bf0[18], cospi[18], bf0[29], cos_bit);
- bf1[19] = half_btf(cospi[14], bf0[19], cospi[50], bf0[28], cos_bit);
- bf1[20] = half_btf(cospi[54], bf0[20], cospi[10], bf0[27], cos_bit);
- bf1[21] = half_btf(cospi[22], bf0[21], cospi[42], bf0[26], cos_bit);
- bf1[22] = half_btf(cospi[38], bf0[22], cospi[26], bf0[25], cos_bit);
- bf1[23] = half_btf(cospi[6], bf0[23], cospi[58], bf0[24], cos_bit);
- bf1[24] = half_btf(cospi[6], bf0[24], -cospi[58], bf0[23], cos_bit);
- bf1[25] = half_btf(cospi[38], bf0[25], -cospi[26], bf0[22], cos_bit);
- bf1[26] = half_btf(cospi[22], bf0[26], -cospi[42], bf0[21], cos_bit);
- bf1[27] = half_btf(cospi[54], bf0[27], -cospi[10], bf0[20], cos_bit);
- bf1[28] = half_btf(cospi[14], bf0[28], -cospi[50], bf0[19], cos_bit);
- bf1[29] = half_btf(cospi[46], bf0[29], -cospi[18], bf0[18], cos_bit);
- bf1[30] = half_btf(cospi[30], bf0[30], -cospi[34], bf0[17], cos_bit);
- bf1[31] = half_btf(cospi[62], bf0[31], -cospi[2], bf0[16], cos_bit);
- bf1[32] = bf0[32] + bf0[33];
- bf1[33] = -bf0[33] + bf0[32];
- bf1[34] = -bf0[34] + bf0[35];
- bf1[35] = bf0[35] + bf0[34];
- bf1[36] = bf0[36] + bf0[37];
- bf1[37] = -bf0[37] + bf0[36];
- bf1[38] = -bf0[38] + bf0[39];
- bf1[39] = bf0[39] + bf0[38];
- bf1[40] = bf0[40] + bf0[41];
- bf1[41] = -bf0[41] + bf0[40];
- bf1[42] = -bf0[42] + bf0[43];
- bf1[43] = bf0[43] + bf0[42];
- bf1[44] = bf0[44] + bf0[45];
- bf1[45] = -bf0[45] + bf0[44];
- bf1[46] = -bf0[46] + bf0[47];
- bf1[47] = bf0[47] + bf0[46];
- bf1[48] = bf0[48] + bf0[49];
- bf1[49] = -bf0[49] + bf0[48];
- bf1[50] = -bf0[50] + bf0[51];
- bf1[51] = bf0[51] + bf0[50];
- bf1[52] = bf0[52] + bf0[53];
- bf1[53] = -bf0[53] + bf0[52];
- bf1[54] = -bf0[54] + bf0[55];
- bf1[55] = bf0[55] + bf0[54];
- bf1[56] = bf0[56] + bf0[57];
- bf1[57] = -bf0[57] + bf0[56];
- bf1[58] = -bf0[58] + bf0[59];
- bf1[59] = bf0[59] + bf0[58];
- bf1[60] = bf0[60] + bf0[61];
- bf1[61] = -bf0[61] + bf0[60];
- bf1[62] = -bf0[62] + bf0[63];
- bf1[63] = bf0[63] + bf0[62];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 10
- stage++;
- cospi = cospi_arr(cos_bit);
- bf0 = output;
- bf1 = step;
- bf1[0] = bf0[0];
- bf1[1] = bf0[1];
- bf1[2] = bf0[2];
- bf1[3] = bf0[3];
- bf1[4] = bf0[4];
- bf1[5] = bf0[5];
- bf1[6] = bf0[6];
- bf1[7] = bf0[7];
- bf1[8] = bf0[8];
- bf1[9] = bf0[9];
- bf1[10] = bf0[10];
- bf1[11] = bf0[11];
- bf1[12] = bf0[12];
- bf1[13] = bf0[13];
- bf1[14] = bf0[14];
- bf1[15] = bf0[15];
- bf1[16] = bf0[16];
- bf1[17] = bf0[17];
- bf1[18] = bf0[18];
- bf1[19] = bf0[19];
- bf1[20] = bf0[20];
- bf1[21] = bf0[21];
- bf1[22] = bf0[22];
- bf1[23] = bf0[23];
- bf1[24] = bf0[24];
- bf1[25] = bf0[25];
- bf1[26] = bf0[26];
- bf1[27] = bf0[27];
- bf1[28] = bf0[28];
- bf1[29] = bf0[29];
- bf1[30] = bf0[30];
- bf1[31] = bf0[31];
- bf1[32] = half_btf(cospi[63], bf0[32], cospi[1], bf0[63], cos_bit);
- bf1[33] = half_btf(cospi[31], bf0[33], cospi[33], bf0[62], cos_bit);
- bf1[34] = half_btf(cospi[47], bf0[34], cospi[17], bf0[61], cos_bit);
- bf1[35] = half_btf(cospi[15], bf0[35], cospi[49], bf0[60], cos_bit);
- bf1[36] = half_btf(cospi[55], bf0[36], cospi[9], bf0[59], cos_bit);
- bf1[37] = half_btf(cospi[23], bf0[37], cospi[41], bf0[58], cos_bit);
- bf1[38] = half_btf(cospi[39], bf0[38], cospi[25], bf0[57], cos_bit);
- bf1[39] = half_btf(cospi[7], bf0[39], cospi[57], bf0[56], cos_bit);
- bf1[40] = half_btf(cospi[59], bf0[40], cospi[5], bf0[55], cos_bit);
- bf1[41] = half_btf(cospi[27], bf0[41], cospi[37], bf0[54], cos_bit);
- bf1[42] = half_btf(cospi[43], bf0[42], cospi[21], bf0[53], cos_bit);
- bf1[43] = half_btf(cospi[11], bf0[43], cospi[53], bf0[52], cos_bit);
- bf1[44] = half_btf(cospi[51], bf0[44], cospi[13], bf0[51], cos_bit);
- bf1[45] = half_btf(cospi[19], bf0[45], cospi[45], bf0[50], cos_bit);
- bf1[46] = half_btf(cospi[35], bf0[46], cospi[29], bf0[49], cos_bit);
- bf1[47] = half_btf(cospi[3], bf0[47], cospi[61], bf0[48], cos_bit);
- bf1[48] = half_btf(cospi[3], bf0[48], -cospi[61], bf0[47], cos_bit);
- bf1[49] = half_btf(cospi[35], bf0[49], -cospi[29], bf0[46], cos_bit);
- bf1[50] = half_btf(cospi[19], bf0[50], -cospi[45], bf0[45], cos_bit);
- bf1[51] = half_btf(cospi[51], bf0[51], -cospi[13], bf0[44], cos_bit);
- bf1[52] = half_btf(cospi[11], bf0[52], -cospi[53], bf0[43], cos_bit);
- bf1[53] = half_btf(cospi[43], bf0[53], -cospi[21], bf0[42], cos_bit);
- bf1[54] = half_btf(cospi[27], bf0[54], -cospi[37], bf0[41], cos_bit);
- bf1[55] = half_btf(cospi[59], bf0[55], -cospi[5], bf0[40], cos_bit);
- bf1[56] = half_btf(cospi[7], bf0[56], -cospi[57], bf0[39], cos_bit);
- bf1[57] = half_btf(cospi[39], bf0[57], -cospi[25], bf0[38], cos_bit);
- bf1[58] = half_btf(cospi[23], bf0[58], -cospi[41], bf0[37], cos_bit);
- bf1[59] = half_btf(cospi[55], bf0[59], -cospi[9], bf0[36], cos_bit);
- bf1[60] = half_btf(cospi[15], bf0[60], -cospi[49], bf0[35], cos_bit);
- bf1[61] = half_btf(cospi[47], bf0[61], -cospi[17], bf0[34], cos_bit);
- bf1[62] = half_btf(cospi[31], bf0[62], -cospi[33], bf0[33], cos_bit);
- bf1[63] = half_btf(cospi[63], bf0[63], -cospi[1], bf0[32], cos_bit);
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
- // stage 11
- stage++;
- bf0 = step;
- bf1 = output;
- bf1[0] = bf0[0];
- bf1[1] = bf0[32];
- bf1[2] = bf0[16];
- bf1[3] = bf0[48];
- bf1[4] = bf0[8];
- bf1[5] = bf0[40];
- bf1[6] = bf0[24];
- bf1[7] = bf0[56];
- bf1[8] = bf0[4];
- bf1[9] = bf0[36];
- bf1[10] = bf0[20];
- bf1[11] = bf0[52];
- bf1[12] = bf0[12];
- bf1[13] = bf0[44];
- bf1[14] = bf0[28];
- bf1[15] = bf0[60];
- bf1[16] = bf0[2];
- bf1[17] = bf0[34];
- bf1[18] = bf0[18];
- bf1[19] = bf0[50];
- bf1[20] = bf0[10];
- bf1[21] = bf0[42];
- bf1[22] = bf0[26];
- bf1[23] = bf0[58];
- bf1[24] = bf0[6];
- bf1[25] = bf0[38];
- bf1[26] = bf0[22];
- bf1[27] = bf0[54];
- bf1[28] = bf0[14];
- bf1[29] = bf0[46];
- bf1[30] = bf0[30];
- bf1[31] = bf0[62];
- bf1[32] = bf0[1];
- bf1[33] = bf0[33];
- bf1[34] = bf0[17];
- bf1[35] = bf0[49];
- bf1[36] = bf0[9];
- bf1[37] = bf0[41];
- bf1[38] = bf0[25];
- bf1[39] = bf0[57];
- bf1[40] = bf0[5];
- bf1[41] = bf0[37];
- bf1[42] = bf0[21];
- bf1[43] = bf0[53];
- bf1[44] = bf0[13];
- bf1[45] = bf0[45];
- bf1[46] = bf0[29];
- bf1[47] = bf0[61];
- bf1[48] = bf0[3];
- bf1[49] = bf0[35];
- bf1[50] = bf0[19];
- bf1[51] = bf0[51];
- bf1[52] = bf0[11];
- bf1[53] = bf0[43];
- bf1[54] = bf0[27];
- bf1[55] = bf0[59];
- bf1[56] = bf0[7];
- bf1[57] = bf0[39];
- bf1[58] = bf0[23];
- bf1[59] = bf0[55];
- bf1[60] = bf0[15];
- bf1[61] = bf0[47];
- bf1[62] = bf0[31];
- bf1[63] = bf0[63];
- av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-}
diff --git a/third_party/aom/av1/encoder/av1_fwd_txfm1d.h b/third_party/aom/av1/encoder/av1_fwd_txfm1d.h
deleted file mode 100644
index 9dcf16552..000000000
--- a/third_party/aom/av1/encoder/av1_fwd_txfm1d.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_AV1_FWD_TXFM1D_H_
-#define AOM_AV1_ENCODER_AV1_FWD_TXFM1D_H_
-
-#include "av1/common/av1_txfm.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void av1_fdct4_new(const int32_t *input, int32_t *output, int8_t cos_bit,
- const int8_t *stage_range);
-void av1_fdct8_new(const int32_t *input, int32_t *output, int8_t cos_bit,
- const int8_t *stage_range);
-void av1_fdct16_new(const int32_t *input, int32_t *output, int8_t cos_bit,
- const int8_t *stage_range);
-void av1_fdct32_new(const int32_t *input, int32_t *output, int8_t cos_bit,
- const int8_t *stage_range);
-void av1_fdct64_new(const int32_t *input, int32_t *output, int8_t cos_bit,
- const int8_t *stage_range);
-void av1_fadst4_new(const int32_t *input, int32_t *output, int8_t cos_bit,
- const int8_t *stage_range);
-void av1_fadst8_new(const int32_t *input, int32_t *output, int8_t cos_bit,
- const int8_t *stage_range);
-void av1_fadst16_new(const int32_t *input, int32_t *output, int8_t cos_bit,
- const int8_t *stage_range);
-void av1_fidentity4_c(const int32_t *input, int32_t *output, int8_t cos_bit,
- const int8_t *stage_range);
-void av1_fidentity8_c(const int32_t *input, int32_t *output, int8_t cos_bit,
- const int8_t *stage_range);
-void av1_fidentity16_c(const int32_t *input, int32_t *output, int8_t cos_bit,
- const int8_t *stage_range);
-void av1_fidentity32_c(const int32_t *input, int32_t *output, int8_t cos_bit,
- const int8_t *stage_range);
-#ifdef __cplusplus
-}
-#endif
-
-#endif // AOM_AV1_ENCODER_AV1_FWD_TXFM1D_H_
diff --git a/third_party/aom/av1/encoder/av1_fwd_txfm1d_cfg.h b/third_party/aom/av1/encoder/av1_fwd_txfm1d_cfg.h
deleted file mode 100644
index 98b6530db..000000000
--- a/third_party/aom/av1/encoder/av1_fwd_txfm1d_cfg.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_AV1_FWD_TXFM1D_CFG_H_
-#define AOM_AV1_ENCODER_AV1_FWD_TXFM1D_CFG_H_
-#include "av1/common/enums.h"
-#include "av1/encoder/av1_fwd_txfm1d.h"
-extern const int8_t *fwd_txfm_shift_ls[TX_SIZES_ALL];
-extern const int8_t fwd_cos_bit_col[5][5];
-extern const int8_t fwd_cos_bit_row[5][5];
-#endif // AOM_AV1_ENCODER_AV1_FWD_TXFM1D_CFG_H_
diff --git a/third_party/aom/av1/encoder/av1_fwd_txfm2d.c b/third_party/aom/av1/encoder/av1_fwd_txfm2d.c
deleted file mode 100644
index f25a667cf..000000000
--- a/third_party/aom/av1/encoder/av1_fwd_txfm2d.c
+++ /dev/null
@@ -1,431 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-
-#include "config/aom_dsp_rtcd.h"
-#include "config/av1_rtcd.h"
-
-#include "aom_dsp/txfm_common.h"
-#include "av1/common/enums.h"
-#include "av1/common/av1_txfm.h"
-#include "av1/encoder/av1_fwd_txfm1d.h"
-#include "av1/encoder/av1_fwd_txfm1d_cfg.h"
-
-static INLINE TxfmFunc fwd_txfm_type_to_func(TXFM_TYPE txfm_type) {
- switch (txfm_type) {
- case TXFM_TYPE_DCT4: return av1_fdct4_new;
- case TXFM_TYPE_DCT8: return av1_fdct8_new;
- case TXFM_TYPE_DCT16: return av1_fdct16_new;
- case TXFM_TYPE_DCT32: return av1_fdct32_new;
- case TXFM_TYPE_DCT64: return av1_fdct64_new;
- case TXFM_TYPE_ADST4: return av1_fadst4_new;
- case TXFM_TYPE_ADST8: return av1_fadst8_new;
- case TXFM_TYPE_ADST16: return av1_fadst16_new;
- case TXFM_TYPE_IDENTITY4: return av1_fidentity4_c;
- case TXFM_TYPE_IDENTITY8: return av1_fidentity8_c;
- case TXFM_TYPE_IDENTITY16: return av1_fidentity16_c;
- case TXFM_TYPE_IDENTITY32: return av1_fidentity32_c;
- default: assert(0); return NULL;
- }
-}
-
-void av1_gen_fwd_stage_range(int8_t *stage_range_col, int8_t *stage_range_row,
- const TXFM_2D_FLIP_CFG *cfg, int bd) {
- // Take the shift from the larger dimension in the rectangular case.
- const int8_t *shift = cfg->shift;
- // i < MAX_TXFM_STAGE_NUM will mute above array bounds warning
- for (int i = 0; i < cfg->stage_num_col && i < MAX_TXFM_STAGE_NUM; ++i) {
- stage_range_col[i] = cfg->stage_range_col[i] + shift[0] + bd + 1;
- }
-
- // i < MAX_TXFM_STAGE_NUM will mute above array bounds warning
- for (int i = 0; i < cfg->stage_num_row && i < MAX_TXFM_STAGE_NUM; ++i) {
- stage_range_row[i] = cfg->stage_range_row[i] + shift[0] + shift[1] + bd + 1;
- }
-}
-
-static INLINE void fwd_txfm2d_c(const int16_t *input, int32_t *output,
- const int stride, const TXFM_2D_FLIP_CFG *cfg,
- int32_t *buf, int bd) {
- int c, r;
- // Note when assigning txfm_size_col, we use the txfm_size from the
- // row configuration and vice versa. This is intentionally done to
- // accurately perform rectangular transforms. When the transform is
- // rectangular, the number of columns will be the same as the
- // txfm_size stored in the row cfg struct. It will make no difference
- // for square transforms.
- const int txfm_size_col = tx_size_wide[cfg->tx_size];
- const int txfm_size_row = tx_size_high[cfg->tx_size];
- // Take the shift from the larger dimension in the rectangular case.
- const int8_t *shift = cfg->shift;
- const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
- int8_t stage_range_col[MAX_TXFM_STAGE_NUM];
- int8_t stage_range_row[MAX_TXFM_STAGE_NUM];
- assert(cfg->stage_num_col <= MAX_TXFM_STAGE_NUM);
- assert(cfg->stage_num_row <= MAX_TXFM_STAGE_NUM);
- av1_gen_fwd_stage_range(stage_range_col, stage_range_row, cfg, bd);
-
- const int8_t cos_bit_col = cfg->cos_bit_col;
- const int8_t cos_bit_row = cfg->cos_bit_row;
- const TxfmFunc txfm_func_col = fwd_txfm_type_to_func(cfg->txfm_type_col);
- const TxfmFunc txfm_func_row = fwd_txfm_type_to_func(cfg->txfm_type_row);
-
- // use output buffer as temp buffer
- int32_t *temp_in = output;
- int32_t *temp_out = output + txfm_size_row;
-
- // Columns
- for (c = 0; c < txfm_size_col; ++c) {
- if (cfg->ud_flip == 0) {
- for (r = 0; r < txfm_size_row; ++r) temp_in[r] = input[r * stride + c];
- } else {
- for (r = 0; r < txfm_size_row; ++r)
- // flip upside down
- temp_in[r] = input[(txfm_size_row - r - 1) * stride + c];
- }
- av1_round_shift_array(temp_in, txfm_size_row, -shift[0]);
- txfm_func_col(temp_in, temp_out, cos_bit_col, stage_range_col);
- av1_round_shift_array(temp_out, txfm_size_row, -shift[1]);
- if (cfg->lr_flip == 0) {
- for (r = 0; r < txfm_size_row; ++r)
- buf[r * txfm_size_col + c] = temp_out[r];
- } else {
- for (r = 0; r < txfm_size_row; ++r)
- // flip from left to right
- buf[r * txfm_size_col + (txfm_size_col - c - 1)] = temp_out[r];
- }
- }
-
- // Rows
- for (r = 0; r < txfm_size_row; ++r) {
- txfm_func_row(buf + r * txfm_size_col, output + r * txfm_size_col,
- cos_bit_row, stage_range_row);
- av1_round_shift_array(output + r * txfm_size_col, txfm_size_col, -shift[2]);
- if (abs(rect_type) == 1) {
- // Multiply everything by Sqrt2 if the transform is rectangular and the
- // size difference is a factor of 2.
- for (c = 0; c < txfm_size_col; ++c) {
- output[r * txfm_size_col + c] = round_shift(
- (int64_t)output[r * txfm_size_col + c] * NewSqrt2, NewSqrt2Bits);
- }
- }
- }
-}
-
-void av1_fwd_txfm2d_4x8_c(const int16_t *input, int32_t *output, int stride,
- TX_TYPE tx_type, int bd) {
- DECLARE_ALIGNED(32, int32_t, txfm_buf[4 * 8]);
- TXFM_2D_FLIP_CFG cfg;
- av1_get_fwd_txfm_cfg(tx_type, TX_4X8, &cfg);
- fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
-}
-
-void av1_fwd_txfm2d_8x4_c(const int16_t *input, int32_t *output, int stride,
- TX_TYPE tx_type, int bd) {
- int32_t txfm_buf[8 * 4];
- TXFM_2D_FLIP_CFG cfg;
- av1_get_fwd_txfm_cfg(tx_type, TX_8X4, &cfg);
- fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
-}
-
-void av1_fwd_txfm2d_8x16_c(const int16_t *input, int32_t *output, int stride,
- TX_TYPE tx_type, int bd) {
- DECLARE_ALIGNED(32, int32_t, txfm_buf[8 * 16]);
- TXFM_2D_FLIP_CFG cfg;
- av1_get_fwd_txfm_cfg(tx_type, TX_8X16, &cfg);
- fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
-}
-
-void av1_fwd_txfm2d_16x8_c(const int16_t *input, int32_t *output, int stride,
- TX_TYPE tx_type, int bd) {
- int32_t txfm_buf[16 * 8];
- TXFM_2D_FLIP_CFG cfg;
- av1_get_fwd_txfm_cfg(tx_type, TX_16X8, &cfg);
- fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
-}
-
-void av1_fwd_txfm2d_16x32_c(const int16_t *input, int32_t *output, int stride,
- TX_TYPE tx_type, int bd) {
- DECLARE_ALIGNED(32, int32_t, txfm_buf[16 * 32]);
- TXFM_2D_FLIP_CFG cfg;
- av1_get_fwd_txfm_cfg(tx_type, TX_16X32, &cfg);
- fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
-}
-
-void av1_fwd_txfm2d_32x16_c(const int16_t *input, int32_t *output, int stride,
- TX_TYPE tx_type, int bd) {
- int32_t txfm_buf[32 * 16];
- TXFM_2D_FLIP_CFG cfg;
- av1_get_fwd_txfm_cfg(tx_type, TX_32X16, &cfg);
- fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
-}
-
-void av1_fwd_txfm2d_4x16_c(const int16_t *input, int32_t *output, int stride,
- TX_TYPE tx_type, int bd) {
- DECLARE_ALIGNED(32, int32_t, txfm_buf[4 * 16]);
- TXFM_2D_FLIP_CFG cfg;
- av1_get_fwd_txfm_cfg(tx_type, TX_4X16, &cfg);
- fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
-}
-
-void av1_fwd_txfm2d_16x4_c(const int16_t *input, int32_t *output, int stride,
- TX_TYPE tx_type, int bd) {
- int32_t txfm_buf[16 * 4];
- TXFM_2D_FLIP_CFG cfg;
- av1_get_fwd_txfm_cfg(tx_type, TX_16X4, &cfg);
- fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
-}
-
-void av1_fwd_txfm2d_8x32_c(const int16_t *input, int32_t *output, int stride,
- TX_TYPE tx_type, int bd) {
- DECLARE_ALIGNED(32, int32_t, txfm_buf[32 * 8]);
- TXFM_2D_FLIP_CFG cfg;
- av1_get_fwd_txfm_cfg(tx_type, TX_8X32, &cfg);
- fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
-}
-
-void av1_fwd_txfm2d_32x8_c(const int16_t *input, int32_t *output, int stride,
- TX_TYPE tx_type, int bd) {
- int32_t txfm_buf[32 * 8];
- TXFM_2D_FLIP_CFG cfg;
- av1_get_fwd_txfm_cfg(tx_type, TX_32X8, &cfg);
- fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
-}
-
-void av1_fwd_txfm2d_4x4_c(const int16_t *input, int32_t *output, int stride,
- TX_TYPE tx_type, int bd) {
- int32_t txfm_buf[4 * 4];
- TXFM_2D_FLIP_CFG cfg;
- av1_get_fwd_txfm_cfg(tx_type, TX_4X4, &cfg);
- fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
-}
-
-void av1_fwd_txfm2d_8x8_c(const int16_t *input, int32_t *output, int stride,
- TX_TYPE tx_type, int bd) {
- int32_t txfm_buf[8 * 8];
- TXFM_2D_FLIP_CFG cfg;
- av1_get_fwd_txfm_cfg(tx_type, TX_8X8, &cfg);
- fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
-}
-
-void av1_fwd_txfm2d_16x16_c(const int16_t *input, int32_t *output, int stride,
- TX_TYPE tx_type, int bd) {
- int32_t txfm_buf[16 * 16];
- TXFM_2D_FLIP_CFG cfg;
- av1_get_fwd_txfm_cfg(tx_type, TX_16X16, &cfg);
- fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
-}
-
-void av1_fwd_txfm2d_32x32_c(const int16_t *input, int32_t *output, int stride,
- TX_TYPE tx_type, int bd) {
- int32_t txfm_buf[32 * 32];
- TXFM_2D_FLIP_CFG cfg;
- av1_get_fwd_txfm_cfg(tx_type, TX_32X32, &cfg);
- fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
-}
-
-void av1_fwd_txfm2d_64x64_c(const int16_t *input, int32_t *output, int stride,
- TX_TYPE tx_type, int bd) {
- int32_t txfm_buf[64 * 64];
- TXFM_2D_FLIP_CFG cfg;
- av1_get_fwd_txfm_cfg(tx_type, TX_64X64, &cfg);
- fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
-
- // Zero out top-right 32x32 area.
- for (int row = 0; row < 32; ++row) {
- memset(output + row * 64 + 32, 0, 32 * sizeof(*output));
- }
- // Zero out the bottom 64x32 area.
- memset(output + 32 * 64, 0, 32 * 64 * sizeof(*output));
- // Re-pack non-zero coeffs in the first 32x32 indices.
- for (int row = 1; row < 32; ++row) {
- memcpy(output + row * 32, output + row * 64, 32 * sizeof(*output));
- }
-}
-
-void av1_fwd_txfm2d_32x64_c(const int16_t *input, int32_t *output, int stride,
- TX_TYPE tx_type, int bd) {
- DECLARE_ALIGNED(32, int32_t, txfm_buf[32 * 64]);
- TXFM_2D_FLIP_CFG cfg;
- av1_get_fwd_txfm_cfg(tx_type, TX_32X64, &cfg);
- fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
- // Zero out the bottom 32x32 area.
- memset(output + 32 * 32, 0, 32 * 32 * sizeof(*output));
- // Note: no repacking needed here.
-}
-
-void av1_fwd_txfm2d_64x32_c(const int16_t *input, int32_t *output, int stride,
- TX_TYPE tx_type, int bd) {
- int32_t txfm_buf[64 * 32];
- TXFM_2D_FLIP_CFG cfg;
- av1_get_fwd_txfm_cfg(tx_type, TX_64X32, &cfg);
- fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
-
- // Zero out right 32x32 area.
- for (int row = 0; row < 32; ++row) {
- memset(output + row * 64 + 32, 0, 32 * sizeof(*output));
- }
- // Re-pack non-zero coeffs in the first 32x32 indices.
- for (int row = 1; row < 32; ++row) {
- memcpy(output + row * 32, output + row * 64, 32 * sizeof(*output));
- }
-}
-
-void av1_fwd_txfm2d_16x64_c(const int16_t *input, int32_t *output, int stride,
- TX_TYPE tx_type, int bd) {
- DECLARE_ALIGNED(32, int32_t, txfm_buf[64 * 16]);
- TXFM_2D_FLIP_CFG cfg;
- av1_get_fwd_txfm_cfg(tx_type, TX_16X64, &cfg);
- fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
- // Zero out the bottom 16x32 area.
- memset(output + 16 * 32, 0, 16 * 32 * sizeof(*output));
- // Note: no repacking needed here.
-}
-
-void av1_fwd_txfm2d_64x16_c(const int16_t *input, int32_t *output, int stride,
- TX_TYPE tx_type, int bd) {
- int32_t txfm_buf[64 * 16];
- TXFM_2D_FLIP_CFG cfg;
- av1_get_fwd_txfm_cfg(tx_type, TX_64X16, &cfg);
- fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
- // Zero out right 32x16 area.
- for (int row = 0; row < 16; ++row) {
- memset(output + row * 64 + 32, 0, 32 * sizeof(*output));
- }
- // Re-pack non-zero coeffs in the first 32x16 indices.
- for (int row = 1; row < 16; ++row) {
- memcpy(output + row * 32, output + row * 64, 32 * sizeof(*output));
- }
-}
-
-static const int8_t fwd_shift_4x4[3] = { 2, 0, 0 };
-static const int8_t fwd_shift_8x8[3] = { 2, -1, 0 };
-static const int8_t fwd_shift_16x16[3] = { 2, -2, 0 };
-static const int8_t fwd_shift_32x32[3] = { 2, -4, 0 };
-static const int8_t fwd_shift_64x64[3] = { 0, -2, -2 };
-static const int8_t fwd_shift_4x8[3] = { 2, -1, 0 };
-static const int8_t fwd_shift_8x4[3] = { 2, -1, 0 };
-static const int8_t fwd_shift_8x16[3] = { 2, -2, 0 };
-static const int8_t fwd_shift_16x8[3] = { 2, -2, 0 };
-static const int8_t fwd_shift_16x32[3] = { 2, -4, 0 };
-static const int8_t fwd_shift_32x16[3] = { 2, -4, 0 };
-static const int8_t fwd_shift_32x64[3] = { 0, -2, -2 };
-static const int8_t fwd_shift_64x32[3] = { 2, -4, -2 };
-static const int8_t fwd_shift_4x16[3] = { 2, -1, 0 };
-static const int8_t fwd_shift_16x4[3] = { 2, -1, 0 };
-static const int8_t fwd_shift_8x32[3] = { 2, -2, 0 };
-static const int8_t fwd_shift_32x8[3] = { 2, -2, 0 };
-static const int8_t fwd_shift_16x64[3] = { 0, -2, 0 };
-static const int8_t fwd_shift_64x16[3] = { 2, -4, 0 };
-
-const int8_t *fwd_txfm_shift_ls[TX_SIZES_ALL] = {
- fwd_shift_4x4, fwd_shift_8x8, fwd_shift_16x16, fwd_shift_32x32,
- fwd_shift_64x64, fwd_shift_4x8, fwd_shift_8x4, fwd_shift_8x16,
- fwd_shift_16x8, fwd_shift_16x32, fwd_shift_32x16, fwd_shift_32x64,
- fwd_shift_64x32, fwd_shift_4x16, fwd_shift_16x4, fwd_shift_8x32,
- fwd_shift_32x8, fwd_shift_16x64, fwd_shift_64x16,
-};
-
-const int8_t fwd_cos_bit_col[MAX_TXWH_IDX /*txw_idx*/]
- [MAX_TXWH_IDX /*txh_idx*/] = {
- { 13, 13, 13, 0, 0 },
- { 13, 13, 13, 12, 0 },
- { 13, 13, 13, 12, 13 },
- { 0, 13, 13, 12, 13 },
- { 0, 0, 13, 12, 13 }
- };
-
-const int8_t fwd_cos_bit_row[MAX_TXWH_IDX /*txw_idx*/]
- [MAX_TXWH_IDX /*txh_idx*/] = {
- { 13, 13, 12, 0, 0 },
- { 13, 13, 13, 12, 0 },
- { 13, 13, 12, 13, 12 },
- { 0, 12, 13, 12, 11 },
- { 0, 0, 12, 11, 10 }
- };
-
-static const int8_t fdct4_range_mult2[4] = { 0, 2, 3, 3 };
-static const int8_t fdct8_range_mult2[6] = { 0, 2, 4, 5, 5, 5 };
-static const int8_t fdct16_range_mult2[8] = { 0, 2, 4, 6, 7, 7, 7, 7 };
-static const int8_t fdct32_range_mult2[10] = { 0, 2, 4, 6, 8, 9, 9, 9, 9, 9 };
-static const int8_t fdct64_range_mult2[12] = { 0, 2, 4, 6, 8, 10,
- 11, 11, 11, 11, 11, 11 };
-
-static const int8_t fadst4_range_mult2[7] = { 0, 2, 4, 3, 3, 3, 3 };
-static const int8_t fadst8_range_mult2[8] = { 0, 0, 1, 3, 3, 5, 5, 5 };
-static const int8_t fadst16_range_mult2[10] = { 0, 0, 1, 3, 3, 5, 5, 7, 7, 7 };
-
-static const int8_t max_fwd_range_mult2_col[5] = { 3, 5, 7, 9, 11 };
-
-static const int8_t fidtx4_range_mult2[1] = { 1 };
-static const int8_t fidtx8_range_mult2[1] = { 2 };
-static const int8_t fidtx16_range_mult2[1] = { 3 };
-static const int8_t fidtx32_range_mult2[1] = { 4 };
-
-#if 0
-const int8_t fwd_idtx_range_row[MAX_TXWH_IDX /*txw_idx*/]
- [MAX_TXWH_IDX /*txh_idx*/] = { { 2, 4, 5, 0, 0 },
- { 3, 4, 5, 6, 0 },
- { 4, 5, 6, 7, 8 },
- { 0, 5, 6, 7, 8 },
- { 0, 0, 7, 8,
- 9 } };
-#endif
-
-const int8_t *fwd_txfm_range_mult2_list[TXFM_TYPES] = {
- fdct4_range_mult2, fdct8_range_mult2, fdct16_range_mult2,
- fdct32_range_mult2, fdct64_range_mult2, fadst4_range_mult2,
- fadst8_range_mult2, fadst16_range_mult2, fidtx4_range_mult2,
- fidtx8_range_mult2, fidtx16_range_mult2, fidtx32_range_mult2
-};
-
-static INLINE void set_fwd_txfm_non_scale_range(TXFM_2D_FLIP_CFG *cfg) {
- const int txh_idx = get_txh_idx(cfg->tx_size);
- av1_zero(cfg->stage_range_col);
- av1_zero(cfg->stage_range_row);
-
- if (cfg->txfm_type_col != TXFM_TYPE_INVALID) {
- int stage_num_col = cfg->stage_num_col;
- const int8_t *range_mult2_col =
- fwd_txfm_range_mult2_list[cfg->txfm_type_col];
- for (int i = 0; i < stage_num_col; ++i)
- cfg->stage_range_col[i] = (range_mult2_col[i] + 1) >> 1;
- }
-
- if (cfg->txfm_type_row != TXFM_TYPE_INVALID) {
- int stage_num_row = cfg->stage_num_row;
- const int8_t *range_mult2_row =
- fwd_txfm_range_mult2_list[cfg->txfm_type_row];
- for (int i = 0; i < stage_num_row; ++i)
- cfg->stage_range_row[i] =
- (max_fwd_range_mult2_col[txh_idx] + range_mult2_row[i] + 1) >> 1;
- }
-}
-
-void av1_get_fwd_txfm_cfg(TX_TYPE tx_type, TX_SIZE tx_size,
- TXFM_2D_FLIP_CFG *cfg) {
- assert(cfg != NULL);
- cfg->tx_size = tx_size;
- set_flip_cfg(tx_type, cfg);
- const TX_TYPE_1D tx_type_1d_col = vtx_tab[tx_type];
- const TX_TYPE_1D tx_type_1d_row = htx_tab[tx_type];
- const int txw_idx = tx_size_wide_log2[tx_size] - tx_size_wide_log2[0];
- const int txh_idx = tx_size_high_log2[tx_size] - tx_size_high_log2[0];
- cfg->shift = fwd_txfm_shift_ls[tx_size];
- cfg->cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
- cfg->cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
- cfg->txfm_type_col = av1_txfm_type_ls[txh_idx][tx_type_1d_col];
- cfg->txfm_type_row = av1_txfm_type_ls[txw_idx][tx_type_1d_row];
- cfg->stage_num_col = av1_txfm_stage_num_list[cfg->txfm_type_col];
- cfg->stage_num_row = av1_txfm_stage_num_list[cfg->txfm_type_row];
- set_fwd_txfm_non_scale_range(cfg);
-}
diff --git a/third_party/aom/av1/encoder/av1_quantize.c b/third_party/aom/av1/encoder/av1_quantize.c
deleted file mode 100644
index a0a926005..000000000
--- a/third_party/aom/av1/encoder/av1_quantize.c
+++ /dev/null
@@ -1,738 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <math.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/quantize.h"
-#include "aom_mem/aom_mem.h"
-#include "aom_ports/mem.h"
-
-#include "av1/common/idct.h"
-#include "av1/common/quant_common.h"
-#include "av1/common/scan.h"
-#include "av1/common/seg_common.h"
-
-#include "av1/encoder/av1_quantize.h"
-#include "av1/encoder/encoder.h"
-#include "av1/encoder/rd.h"
-
-void av1_quantize_skip(intptr_t n_coeffs, tran_low_t *qcoeff_ptr,
- tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr) {
- memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
- memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
- *eob_ptr = 0;
-}
-
-static void quantize_fp_helper_c(
- const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
- const int16_t *round_ptr, const int16_t *quant_ptr,
- const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
- tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr,
- const qm_val_t *iqm_ptr, int log_scale) {
- int i, eob = -1;
- // TODO(jingning) Decide the need of these arguments after the
- // quantization process is completed.
- (void)zbin_ptr;
- (void)quant_shift_ptr;
-
- memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
- memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
- if (qm_ptr == NULL && iqm_ptr == NULL) {
- const int rounding0 = ROUND_POWER_OF_TWO(round_ptr[0], log_scale);
- { // rc == 0
- const int coeff = coeff_ptr[0];
- const int coeff_sign = (coeff >> 31);
- int64_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
- if ((abs_coeff << (1 + log_scale)) >= (int32_t)(dequant_ptr[0])) {
- abs_coeff = clamp64(abs_coeff + rounding0, INT16_MIN, INT16_MAX);
- const int tmp32 = (int)((abs_coeff * quant_ptr[0]) >> (16 - log_scale));
- if (tmp32) {
- qcoeff_ptr[0] = (tmp32 ^ coeff_sign) - coeff_sign;
- const tran_low_t abs_dqcoeff = (tmp32 * dequant_ptr[0]) >> log_scale;
- dqcoeff_ptr[0] = (abs_dqcoeff ^ coeff_sign) - coeff_sign;
- eob = 0;
- }
- }
- }
- const int rounding1 = ROUND_POWER_OF_TWO(round_ptr[1], log_scale);
- const int32_t thresh1 = (int32_t)(dequant_ptr[1]);
- for (i = 1; i < n_coeffs; i++) {
- const int coeff = coeff_ptr[i];
- const int coeff_sign = (coeff >> 31);
- int64_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
- if ((abs_coeff << (1 + log_scale)) >= thresh1) {
- abs_coeff = clamp64(abs_coeff + rounding1, INT16_MIN, INT16_MAX);
- const int tmp32 = (int)((abs_coeff * quant_ptr[1]) >> (16 - log_scale));
- if (tmp32) {
- qcoeff_ptr[i] = (tmp32 ^ coeff_sign) - coeff_sign;
- const tran_low_t abs_dqcoeff = (tmp32 * dequant_ptr[1]) >> log_scale;
- dqcoeff_ptr[i] = (abs_dqcoeff ^ coeff_sign) - coeff_sign;
- eob = AOMMAX(iscan[i], eob);
- }
- }
- }
- } else {
- // Quantization pass: All coefficients with index >= zero_flag are
- // skippable. Note: zero_flag can be zero.
- for (i = 0; i < n_coeffs; i++) {
- const int rc = scan[i];
- const int coeff = coeff_ptr[rc];
- const qm_val_t wt = qm_ptr ? qm_ptr[rc] : (1 << AOM_QM_BITS);
- const qm_val_t iwt = iqm_ptr ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
- const int dequant =
- (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
- AOM_QM_BITS;
- const int coeff_sign = (coeff >> 31);
- int64_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
- int tmp32 = 0;
- if (abs_coeff * wt >=
- (dequant_ptr[rc != 0] << (AOM_QM_BITS - (1 + log_scale)))) {
- abs_coeff += ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale);
- abs_coeff = clamp64(abs_coeff, INT16_MIN, INT16_MAX);
- tmp32 = (int)((abs_coeff * wt * quant_ptr[rc != 0]) >>
- (16 - log_scale + AOM_QM_BITS));
- qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
- const tran_low_t abs_dqcoeff = (tmp32 * dequant) >> log_scale;
- dqcoeff_ptr[rc] = (abs_dqcoeff ^ coeff_sign) - coeff_sign;
- }
-
- if (tmp32) eob = i;
- }
- }
- *eob_ptr = eob + 1;
-}
-
-static void highbd_quantize_fp_helper_c(
- const tran_low_t *coeff_ptr, intptr_t count, const int16_t *zbin_ptr,
- const int16_t *round_ptr, const int16_t *quant_ptr,
- const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
- tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr,
- const qm_val_t *iqm_ptr, int log_scale) {
- int i;
- int eob = -1;
- const int shift = 16 - log_scale;
- // TODO(jingning) Decide the need of these arguments after the
- // quantization process is completed.
- (void)zbin_ptr;
- (void)quant_shift_ptr;
- (void)iscan;
-
- if (qm_ptr || iqm_ptr) {
- // Quantization pass: All coefficients with index >= zero_flag are
- // skippable. Note: zero_flag can be zero.
- for (i = 0; i < count; i++) {
- const int rc = scan[i];
- const int coeff = coeff_ptr[rc];
- const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
- const qm_val_t iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
- const int dequant =
- (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
- AOM_QM_BITS;
- const int coeff_sign = (coeff >> 31);
- const int64_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
- int abs_qcoeff = 0;
- if (abs_coeff * wt >=
- (dequant_ptr[rc != 0] << (AOM_QM_BITS - (1 + log_scale)))) {
- const int64_t tmp =
- abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale);
- abs_qcoeff =
- (int)((tmp * quant_ptr[rc != 0] * wt) >> (shift + AOM_QM_BITS));
- qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
- const tran_low_t abs_dqcoeff = (abs_qcoeff * dequant) >> log_scale;
- dqcoeff_ptr[rc] = (tran_low_t)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
- if (abs_qcoeff) eob = i;
- } else {
- qcoeff_ptr[rc] = 0;
- dqcoeff_ptr[rc] = 0;
- }
- }
- } else {
- const int log_scaled_round_arr[2] = {
- ROUND_POWER_OF_TWO(round_ptr[0], log_scale),
- ROUND_POWER_OF_TWO(round_ptr[1], log_scale),
- };
- for (i = 0; i < count; i++) {
- const int rc = scan[i];
- const int coeff = coeff_ptr[rc];
- const int rc01 = (rc != 0);
- const int coeff_sign = (coeff >> 31);
- const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
- const int log_scaled_round = log_scaled_round_arr[rc01];
- if ((abs_coeff << (1 + log_scale)) >= dequant_ptr[rc01]) {
- const int quant = quant_ptr[rc01];
- const int dequant = dequant_ptr[rc01];
- const int64_t tmp = (int64_t)abs_coeff + log_scaled_round;
- const int abs_qcoeff = (int)((tmp * quant) >> shift);
- qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
- const tran_low_t abs_dqcoeff = (abs_qcoeff * dequant) >> log_scale;
- if (abs_qcoeff) eob = i;
- dqcoeff_ptr[rc] = (tran_low_t)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
- } else {
- qcoeff_ptr[rc] = 0;
- dqcoeff_ptr[rc] = 0;
- }
- }
- }
- *eob_ptr = eob + 1;
-}
-
-void av1_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
- const int16_t *zbin_ptr, const int16_t *round_ptr,
- const int16_t *quant_ptr, const int16_t *quant_shift_ptr,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan) {
- quantize_fp_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr,
- quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr,
- eob_ptr, scan, iscan, NULL, NULL, 0);
-}
-
-void av1_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
- const int16_t *zbin_ptr, const int16_t *round_ptr,
- const int16_t *quant_ptr,
- const int16_t *quant_shift_ptr,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan) {
- quantize_fp_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr,
- quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr,
- eob_ptr, scan, iscan, NULL, NULL, 1);
-}
-
-void av1_quantize_fp_64x64_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
- const int16_t *zbin_ptr, const int16_t *round_ptr,
- const int16_t *quant_ptr,
- const int16_t *quant_shift_ptr,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan) {
- quantize_fp_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr,
- quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr,
- eob_ptr, scan, iscan, NULL, NULL, 2);
-}
-
-void av1_quantize_fp_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
- const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr,
- tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
- const SCAN_ORDER *sc, const QUANT_PARAM *qparam) {
- const qm_val_t *qm_ptr = qparam->qmatrix;
- const qm_val_t *iqm_ptr = qparam->iqmatrix;
- if (qm_ptr != NULL && iqm_ptr != NULL) {
- quantize_fp_helper_c(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_fp_QTX,
- p->quant_fp_QTX, p->quant_shift_QTX, qcoeff_ptr,
- dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
- sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale);
- } else {
- switch (qparam->log_scale) {
- case 0:
- if (n_coeffs < 16) {
- // TODO(jingning): Need SIMD implementation for smaller block size
- // quantization.
- quantize_fp_helper_c(
- coeff_ptr, n_coeffs, p->zbin_QTX, p->round_fp_QTX,
- p->quant_fp_QTX, p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr,
- p->dequant_QTX, eob_ptr, sc->scan, sc->iscan, NULL, NULL, 0);
- } else {
- av1_quantize_fp(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_fp_QTX,
- p->quant_fp_QTX, p->quant_shift_QTX, qcoeff_ptr,
- dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
- sc->iscan);
- }
- break;
- case 1:
- av1_quantize_fp_32x32(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_fp_QTX,
- p->quant_fp_QTX, p->quant_shift_QTX, qcoeff_ptr,
- dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
- sc->iscan);
- break;
- case 2:
- av1_quantize_fp_64x64(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_fp_QTX,
- p->quant_fp_QTX, p->quant_shift_QTX, qcoeff_ptr,
- dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
- sc->iscan);
- break;
- default: assert(0);
- }
- }
-}
-
-void av1_quantize_b_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
- const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr,
- tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
- const SCAN_ORDER *sc, const QUANT_PARAM *qparam) {
- const qm_val_t *qm_ptr = qparam->qmatrix;
- const qm_val_t *iqm_ptr = qparam->iqmatrix;
- if (qm_ptr != NULL && iqm_ptr != NULL) {
- quantize_b_helper_c(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
- p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
- dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
- sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale);
- } else {
- switch (qparam->log_scale) {
- case 0:
- aom_quantize_b(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
- p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
- dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
- sc->iscan);
- break;
- case 1:
- aom_quantize_b_32x32(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
- p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
- dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
- sc->iscan);
- break;
- case 2:
- aom_quantize_b_64x64(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
- p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
- dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
- sc->iscan);
- break;
- default: assert(0);
- }
- }
-}
-
-static void quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs,
- int skip_block, const int16_t *round_ptr,
- const int16_t quant, tran_low_t *qcoeff_ptr,
- tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr,
- uint16_t *eob_ptr, const qm_val_t *qm_ptr,
- const qm_val_t *iqm_ptr, const int log_scale) {
- const int rc = 0;
- const int coeff = coeff_ptr[rc];
- const int coeff_sign = (coeff >> 31);
- const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
- int64_t tmp;
- int eob = -1;
- int32_t tmp32;
- int dequant;
-
- memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
- memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
- if (!skip_block) {
- const int wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
- const int iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
- tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale),
- INT16_MIN, INT16_MAX);
- tmp32 = (int32_t)((tmp * wt * quant) >> (16 - log_scale + AOM_QM_BITS));
- qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
- dequant = (dequant_ptr * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
- const tran_low_t abs_dqcoeff = (tmp32 * dequant) >> log_scale;
- dqcoeff_ptr[rc] = (tran_low_t)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
- if (tmp32) eob = 0;
- }
- *eob_ptr = eob + 1;
-}
-
-void av1_quantize_dc_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
- const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr,
- tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
- const SCAN_ORDER *sc, const QUANT_PARAM *qparam) {
- // obsolete skip_block
- const int skip_block = 0;
- (void)sc;
- assert(qparam->log_scale >= 0 && qparam->log_scale < (3));
- const qm_val_t *qm_ptr = qparam->qmatrix;
- const qm_val_t *iqm_ptr = qparam->iqmatrix;
- quantize_dc(coeff_ptr, (int)n_coeffs, skip_block, p->round_QTX,
- p->quant_fp_QTX[0], qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX[0],
- eob_ptr, qm_ptr, iqm_ptr, qparam->log_scale);
-}
-
-void av1_highbd_quantize_fp_facade(const tran_low_t *coeff_ptr,
- intptr_t n_coeffs, const MACROBLOCK_PLANE *p,
- tran_low_t *qcoeff_ptr,
- tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
- const SCAN_ORDER *sc,
- const QUANT_PARAM *qparam) {
- const qm_val_t *qm_ptr = qparam->qmatrix;
- const qm_val_t *iqm_ptr = qparam->iqmatrix;
- if (qm_ptr != NULL && iqm_ptr != NULL) {
- highbd_quantize_fp_helper_c(
- coeff_ptr, n_coeffs, p->zbin_QTX, p->round_fp_QTX, p->quant_fp_QTX,
- p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX, eob_ptr,
- sc->scan, sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale);
- } else {
- if (n_coeffs < 16) {
- // TODO(jingning): Need SIMD implementation for smaller block size
- // quantization.
- av1_highbd_quantize_fp_c(
- coeff_ptr, n_coeffs, p->zbin_QTX, p->round_fp_QTX, p->quant_fp_QTX,
- p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX, eob_ptr,
- sc->scan, sc->iscan, qparam->log_scale);
- return;
- }
- av1_highbd_quantize_fp(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_fp_QTX,
- p->quant_fp_QTX, p->quant_shift_QTX, qcoeff_ptr,
- dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
- sc->iscan, qparam->log_scale);
- }
-}
-
-void av1_highbd_quantize_b_facade(const tran_low_t *coeff_ptr,
- intptr_t n_coeffs, const MACROBLOCK_PLANE *p,
- tran_low_t *qcoeff_ptr,
- tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
- const SCAN_ORDER *sc,
- const QUANT_PARAM *qparam) {
- const qm_val_t *qm_ptr = qparam->qmatrix;
- const qm_val_t *iqm_ptr = qparam->iqmatrix;
- if (qm_ptr != NULL && iqm_ptr != NULL) {
- highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
- p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
- dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
- sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale);
- } else {
- switch (qparam->log_scale) {
- case 0:
- if (LIKELY(n_coeffs >= 8)) {
- aom_highbd_quantize_b(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
- p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
- dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
- sc->iscan);
- } else {
- // TODO(luoyi): Need SIMD (e.g. sse2) for smaller block size
- // quantization
- aom_highbd_quantize_b_c(coeff_ptr, n_coeffs, p->zbin_QTX,
- p->round_QTX, p->quant_QTX,
- p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr,
- p->dequant_QTX, eob_ptr, sc->scan, sc->iscan);
- }
- break;
- case 1:
- aom_highbd_quantize_b_32x32(
- coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX,
- p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX,
- eob_ptr, sc->scan, sc->iscan);
- break;
- case 2:
- aom_highbd_quantize_b_64x64(
- coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX,
- p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX,
- eob_ptr, sc->scan, sc->iscan);
- break;
- default: assert(0);
- }
- }
-}
-
-static INLINE void highbd_quantize_dc(
- const tran_low_t *coeff_ptr, int n_coeffs, int skip_block,
- const int16_t *round_ptr, const int16_t quant, tran_low_t *qcoeff_ptr,
- tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr, uint16_t *eob_ptr,
- const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr, const int log_scale) {
- int eob = -1;
-
- memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
- memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
- if (!skip_block) {
- const qm_val_t wt = qm_ptr != NULL ? qm_ptr[0] : (1 << AOM_QM_BITS);
- const qm_val_t iwt = iqm_ptr != NULL ? iqm_ptr[0] : (1 << AOM_QM_BITS);
- const int coeff = coeff_ptr[0];
- const int coeff_sign = (coeff >> 31);
- const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
- const int64_t tmp = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[0], log_scale);
- const int64_t tmpw = tmp * wt;
- const int abs_qcoeff =
- (int)((tmpw * quant) >> (16 - log_scale + AOM_QM_BITS));
- qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
- const int dequant =
- (dequant_ptr * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
-
- const tran_low_t abs_dqcoeff = (abs_qcoeff * dequant) >> log_scale;
- dqcoeff_ptr[0] = (tran_low_t)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
- if (abs_qcoeff) eob = 0;
- }
- *eob_ptr = eob + 1;
-}
-
-void av1_highbd_quantize_dc_facade(const tran_low_t *coeff_ptr,
- intptr_t n_coeffs, const MACROBLOCK_PLANE *p,
- tran_low_t *qcoeff_ptr,
- tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
- const SCAN_ORDER *sc,
- const QUANT_PARAM *qparam) {
- // obsolete skip_block
- const int skip_block = 0;
- const qm_val_t *qm_ptr = qparam->qmatrix;
- const qm_val_t *iqm_ptr = qparam->iqmatrix;
- (void)sc;
-
- highbd_quantize_dc(coeff_ptr, (int)n_coeffs, skip_block, p->round_QTX,
- p->quant_fp_QTX[0], qcoeff_ptr, dqcoeff_ptr,
- p->dequant_QTX[0], eob_ptr, qm_ptr, iqm_ptr,
- qparam->log_scale);
-}
-
-void av1_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t count,
- const int16_t *zbin_ptr, const int16_t *round_ptr,
- const int16_t *quant_ptr,
- const int16_t *quant_shift_ptr,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan,
- int log_scale) {
- highbd_quantize_fp_helper_c(coeff_ptr, count, zbin_ptr, round_ptr, quant_ptr,
- quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr,
- dequant_ptr, eob_ptr, scan, iscan, NULL, NULL,
- log_scale);
-}
-
-static void invert_quant(int16_t *quant, int16_t *shift, int d) {
- uint32_t t;
- int l, m;
- t = d;
- for (l = 0; t > 1; l++) t >>= 1;
- m = 1 + (1 << (16 + l)) / d;
- *quant = (int16_t)(m - (1 << 16));
- *shift = 1 << (16 - l);
-}
-
-static int get_qzbin_factor(int q, aom_bit_depth_t bit_depth) {
- const int quant = av1_dc_quant_Q3(q, 0, bit_depth);
- switch (bit_depth) {
- case AOM_BITS_8: return q == 0 ? 64 : (quant < 148 ? 84 : 80);
- case AOM_BITS_10: return q == 0 ? 64 : (quant < 592 ? 84 : 80);
- case AOM_BITS_12: return q == 0 ? 64 : (quant < 2368 ? 84 : 80);
- default:
- assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
- return -1;
- }
-}
-
-void av1_build_quantizer(aom_bit_depth_t bit_depth, int y_dc_delta_q,
- int u_dc_delta_q, int u_ac_delta_q, int v_dc_delta_q,
- int v_ac_delta_q, QUANTS *const quants,
- Dequants *const deq) {
- int i, q, quant_Q3, quant_QTX;
-
- for (q = 0; q < QINDEX_RANGE; q++) {
- const int qzbin_factor = get_qzbin_factor(q, bit_depth);
- const int qrounding_factor = q == 0 ? 64 : 48;
-
- for (i = 0; i < 2; ++i) {
- int qrounding_factor_fp = 64;
- // y quantizer setup with original coeff shift of Q3
- quant_Q3 = i == 0 ? av1_dc_quant_Q3(q, y_dc_delta_q, bit_depth)
- : av1_ac_quant_Q3(q, 0, bit_depth);
- // y quantizer with TX scale
- quant_QTX = i == 0 ? av1_dc_quant_QTX(q, y_dc_delta_q, bit_depth)
- : av1_ac_quant_QTX(q, 0, bit_depth);
- invert_quant(&quants->y_quant[q][i], &quants->y_quant_shift[q][i],
- quant_QTX);
- quants->y_quant_fp[q][i] = (1 << 16) / quant_QTX;
- quants->y_round_fp[q][i] = (qrounding_factor_fp * quant_QTX) >> 7;
- quants->y_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant_QTX, 7);
- quants->y_round[q][i] = (qrounding_factor * quant_QTX) >> 7;
- deq->y_dequant_QTX[q][i] = quant_QTX;
- deq->y_dequant_Q3[q][i] = quant_Q3;
-
- // u quantizer setup with original coeff shift of Q3
- quant_Q3 = i == 0 ? av1_dc_quant_Q3(q, u_dc_delta_q, bit_depth)
- : av1_ac_quant_Q3(q, u_ac_delta_q, bit_depth);
- // u quantizer with TX scale
- quant_QTX = i == 0 ? av1_dc_quant_QTX(q, u_dc_delta_q, bit_depth)
- : av1_ac_quant_QTX(q, u_ac_delta_q, bit_depth);
- invert_quant(&quants->u_quant[q][i], &quants->u_quant_shift[q][i],
- quant_QTX);
- quants->u_quant_fp[q][i] = (1 << 16) / quant_QTX;
- quants->u_round_fp[q][i] = (qrounding_factor_fp * quant_QTX) >> 7;
- quants->u_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant_QTX, 7);
- quants->u_round[q][i] = (qrounding_factor * quant_QTX) >> 7;
- deq->u_dequant_QTX[q][i] = quant_QTX;
- deq->u_dequant_Q3[q][i] = quant_Q3;
-
- // v quantizer setup with original coeff shift of Q3
- quant_Q3 = i == 0 ? av1_dc_quant_Q3(q, v_dc_delta_q, bit_depth)
- : av1_ac_quant_Q3(q, v_ac_delta_q, bit_depth);
- // v quantizer with TX scale
- quant_QTX = i == 0 ? av1_dc_quant_QTX(q, v_dc_delta_q, bit_depth)
- : av1_ac_quant_QTX(q, v_ac_delta_q, bit_depth);
- invert_quant(&quants->v_quant[q][i], &quants->v_quant_shift[q][i],
- quant_QTX);
- quants->v_quant_fp[q][i] = (1 << 16) / quant_QTX;
- quants->v_round_fp[q][i] = (qrounding_factor_fp * quant_QTX) >> 7;
- quants->v_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant_QTX, 7);
- quants->v_round[q][i] = (qrounding_factor * quant_QTX) >> 7;
- deq->v_dequant_QTX[q][i] = quant_QTX;
- deq->v_dequant_Q3[q][i] = quant_Q3;
- }
-
- for (i = 2; i < 8; i++) { // 8: SIMD width
- quants->y_quant[q][i] = quants->y_quant[q][1];
- quants->y_quant_fp[q][i] = quants->y_quant_fp[q][1];
- quants->y_round_fp[q][i] = quants->y_round_fp[q][1];
- quants->y_quant_shift[q][i] = quants->y_quant_shift[q][1];
- quants->y_zbin[q][i] = quants->y_zbin[q][1];
- quants->y_round[q][i] = quants->y_round[q][1];
- deq->y_dequant_QTX[q][i] = deq->y_dequant_QTX[q][1];
- deq->y_dequant_Q3[q][i] = deq->y_dequant_Q3[q][1];
-
- quants->u_quant[q][i] = quants->u_quant[q][1];
- quants->u_quant_fp[q][i] = quants->u_quant_fp[q][1];
- quants->u_round_fp[q][i] = quants->u_round_fp[q][1];
- quants->u_quant_shift[q][i] = quants->u_quant_shift[q][1];
- quants->u_zbin[q][i] = quants->u_zbin[q][1];
- quants->u_round[q][i] = quants->u_round[q][1];
- deq->u_dequant_QTX[q][i] = deq->u_dequant_QTX[q][1];
- deq->u_dequant_Q3[q][i] = deq->u_dequant_Q3[q][1];
- quants->v_quant[q][i] = quants->u_quant[q][1];
- quants->v_quant_fp[q][i] = quants->v_quant_fp[q][1];
- quants->v_round_fp[q][i] = quants->v_round_fp[q][1];
- quants->v_quant_shift[q][i] = quants->v_quant_shift[q][1];
- quants->v_zbin[q][i] = quants->v_zbin[q][1];
- quants->v_round[q][i] = quants->v_round[q][1];
- deq->v_dequant_QTX[q][i] = deq->v_dequant_QTX[q][1];
- deq->v_dequant_Q3[q][i] = deq->v_dequant_Q3[q][1];
- }
- }
-}
-
-void av1_init_quantizer(AV1_COMP *cpi) {
- AV1_COMMON *const cm = &cpi->common;
- QUANTS *const quants = &cpi->quants;
- Dequants *const dequants = &cpi->dequants;
- av1_build_quantizer(cm->seq_params.bit_depth, cm->y_dc_delta_q,
- cm->u_dc_delta_q, cm->u_ac_delta_q, cm->v_dc_delta_q,
- cm->v_ac_delta_q, quants, dequants);
-}
-
-void av1_init_plane_quantizers(const AV1_COMP *cpi, MACROBLOCK *x,
- int segment_id) {
- const AV1_COMMON *const cm = &cpi->common;
- MACROBLOCKD *const xd = &x->e_mbd;
- const QUANTS *const quants = &cpi->quants;
-
- int current_qindex = AOMMAX(
- 0, AOMMIN(QINDEX_RANGE - 1, cpi->oxcf.deltaq_mode != NO_DELTA_Q
- ? cm->base_qindex + xd->delta_qindex
- : cm->base_qindex));
- const int qindex = av1_get_qindex(&cm->seg, segment_id, current_qindex);
- const int rdmult = av1_compute_rd_mult(cpi, qindex + cm->y_dc_delta_q);
- int qmlevel = (xd->lossless[segment_id] || cm->using_qmatrix == 0)
- ? NUM_QM_LEVELS - 1
- : cm->qm_y;
-
- // Y
- x->plane[0].quant_QTX = quants->y_quant[qindex];
- x->plane[0].quant_fp_QTX = quants->y_quant_fp[qindex];
- x->plane[0].round_fp_QTX = quants->y_round_fp[qindex];
- x->plane[0].quant_shift_QTX = quants->y_quant_shift[qindex];
- x->plane[0].zbin_QTX = quants->y_zbin[qindex];
- x->plane[0].round_QTX = quants->y_round[qindex];
- x->plane[0].dequant_QTX = cpi->dequants.y_dequant_QTX[qindex];
- memcpy(&xd->plane[0].seg_qmatrix[segment_id], cm->gqmatrix[qmlevel][0],
- sizeof(cm->gqmatrix[qmlevel][0]));
- memcpy(&xd->plane[0].seg_iqmatrix[segment_id], cm->giqmatrix[qmlevel][0],
- sizeof(cm->giqmatrix[qmlevel][0]));
- xd->plane[0].dequant_Q3 = cpi->dequants.y_dequant_Q3[qindex];
-
- // U
- qmlevel = (xd->lossless[segment_id] || cm->using_qmatrix == 0)
- ? NUM_QM_LEVELS - 1
- : cm->qm_u;
- {
- x->plane[1].quant_QTX = quants->u_quant[qindex];
- x->plane[1].quant_fp_QTX = quants->u_quant_fp[qindex];
- x->plane[1].round_fp_QTX = quants->u_round_fp[qindex];
- x->plane[1].quant_shift_QTX = quants->u_quant_shift[qindex];
- x->plane[1].zbin_QTX = quants->u_zbin[qindex];
- x->plane[1].round_QTX = quants->u_round[qindex];
- x->plane[1].dequant_QTX = cpi->dequants.u_dequant_QTX[qindex];
- memcpy(&xd->plane[1].seg_qmatrix[segment_id], cm->gqmatrix[qmlevel][1],
- sizeof(cm->gqmatrix[qmlevel][1]));
- memcpy(&xd->plane[1].seg_iqmatrix[segment_id], cm->giqmatrix[qmlevel][1],
- sizeof(cm->giqmatrix[qmlevel][1]));
- x->plane[1].dequant_QTX = cpi->dequants.u_dequant_QTX[qindex];
- xd->plane[1].dequant_Q3 = cpi->dequants.u_dequant_Q3[qindex];
- }
- // V
- qmlevel = (xd->lossless[segment_id] || cm->using_qmatrix == 0)
- ? NUM_QM_LEVELS - 1
- : cm->qm_v;
- {
- x->plane[2].quant_QTX = quants->v_quant[qindex];
- x->plane[2].quant_fp_QTX = quants->v_quant_fp[qindex];
- x->plane[2].round_fp_QTX = quants->v_round_fp[qindex];
- x->plane[2].quant_shift_QTX = quants->v_quant_shift[qindex];
- x->plane[2].zbin_QTX = quants->v_zbin[qindex];
- x->plane[2].round_QTX = quants->v_round[qindex];
- x->plane[2].dequant_QTX = cpi->dequants.v_dequant_QTX[qindex];
- memcpy(&xd->plane[2].seg_qmatrix[segment_id], cm->gqmatrix[qmlevel][2],
- sizeof(cm->gqmatrix[qmlevel][2]));
- memcpy(&xd->plane[2].seg_iqmatrix[segment_id], cm->giqmatrix[qmlevel][2],
- sizeof(cm->giqmatrix[qmlevel][2]));
- x->plane[2].dequant_QTX = cpi->dequants.v_dequant_QTX[qindex];
- xd->plane[2].dequant_Q3 = cpi->dequants.v_dequant_Q3[qindex];
- }
- x->skip_block = segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP);
- x->qindex = qindex;
-
- set_error_per_bit(x, rdmult);
-
- av1_initialize_me_consts(cpi, x, qindex);
-}
-
-void av1_frame_init_quantizer(AV1_COMP *cpi) {
- MACROBLOCK *const x = &cpi->td.mb;
- MACROBLOCKD *const xd = &x->e_mbd;
- av1_init_plane_quantizers(cpi, x, xd->mi[0]->segment_id);
-}
-
-void av1_set_quantizer(AV1_COMMON *cm, int q) {
- // quantizer has to be reinitialized with av1_init_quantizer() if any
- // delta_q changes.
- cm->base_qindex = AOMMAX(cm->delta_q_present_flag, q);
- cm->y_dc_delta_q = 0;
- cm->u_dc_delta_q = 0;
- cm->u_ac_delta_q = 0;
- cm->v_dc_delta_q = 0;
- cm->v_ac_delta_q = 0;
- cm->qm_y = aom_get_qmlevel(cm->base_qindex, cm->min_qmlevel, cm->max_qmlevel);
- cm->qm_u = aom_get_qmlevel(cm->base_qindex + cm->u_ac_delta_q,
- cm->min_qmlevel, cm->max_qmlevel);
-
- if (!cm->seq_params.separate_uv_delta_q)
- cm->qm_v = cm->qm_u;
- else
- cm->qm_v = aom_get_qmlevel(cm->base_qindex + cm->v_ac_delta_q,
- cm->min_qmlevel, cm->max_qmlevel);
-}
-
-// Table that converts 0-63 Q-range values passed in outside to the Qindex
-// range used internally.
-static const int quantizer_to_qindex[] = {
- 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48,
- 52, 56, 60, 64, 68, 72, 76, 80, 84, 88, 92, 96, 100,
- 104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144, 148, 152,
- 156, 160, 164, 168, 172, 176, 180, 184, 188, 192, 196, 200, 204,
- 208, 212, 216, 220, 224, 228, 232, 236, 240, 244, 249, 255,
-};
-
-int av1_quantizer_to_qindex(int quantizer) {
- return quantizer_to_qindex[quantizer];
-}
-
-int av1_qindex_to_quantizer(int qindex) {
- int quantizer;
-
- for (quantizer = 0; quantizer < 64; ++quantizer)
- if (quantizer_to_qindex[quantizer] >= qindex) return quantizer;
-
- return 63;
-}
diff --git a/third_party/aom/av1/encoder/av1_quantize.h b/third_party/aom/av1/encoder/av1_quantize.h
deleted file mode 100644
index 35af9a67a..000000000
--- a/third_party/aom/av1/encoder/av1_quantize.h
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_AV1_QUANTIZE_H_
-#define AOM_AV1_ENCODER_AV1_QUANTIZE_H_
-
-#include "config/aom_config.h"
-
-#include "av1/common/quant_common.h"
-#include "av1/common/scan.h"
-#include "av1/encoder/block.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct QUANT_PARAM {
- int log_scale;
- TX_SIZE tx_size;
- const qm_val_t *qmatrix;
- const qm_val_t *iqmatrix;
-} QUANT_PARAM;
-
-typedef void (*AV1_QUANT_FACADE)(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
- const MACROBLOCK_PLANE *p,
- tran_low_t *qcoeff_ptr,
- tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
- const SCAN_ORDER *sc,
- const QUANT_PARAM *qparam);
-
-// The QUANTS structure is used only for internal quantizer setup in
-// av1_quantize.c.
-// All of its fields use the same coefficient shift/scaling at TX.
-typedef struct {
- // 0: dc 1: ac 2-8: ac repeated to SIMD width
- DECLARE_ALIGNED(16, int16_t, y_quant[QINDEX_RANGE][8]);
- DECLARE_ALIGNED(16, int16_t, y_quant_shift[QINDEX_RANGE][8]);
- DECLARE_ALIGNED(16, int16_t, y_zbin[QINDEX_RANGE][8]);
- DECLARE_ALIGNED(16, int16_t, y_round[QINDEX_RANGE][8]);
-
- // TODO(jingning): in progress of re-working the quantization. will decide
- // if we want to deprecate the current use of y_quant.
- DECLARE_ALIGNED(16, int16_t, y_quant_fp[QINDEX_RANGE][8]);
- DECLARE_ALIGNED(16, int16_t, u_quant_fp[QINDEX_RANGE][8]);
- DECLARE_ALIGNED(16, int16_t, v_quant_fp[QINDEX_RANGE][8]);
- DECLARE_ALIGNED(16, int16_t, y_round_fp[QINDEX_RANGE][8]);
- DECLARE_ALIGNED(16, int16_t, u_round_fp[QINDEX_RANGE][8]);
- DECLARE_ALIGNED(16, int16_t, v_round_fp[QINDEX_RANGE][8]);
-
- DECLARE_ALIGNED(16, int16_t, u_quant[QINDEX_RANGE][8]);
- DECLARE_ALIGNED(16, int16_t, v_quant[QINDEX_RANGE][8]);
- DECLARE_ALIGNED(16, int16_t, u_quant_shift[QINDEX_RANGE][8]);
- DECLARE_ALIGNED(16, int16_t, v_quant_shift[QINDEX_RANGE][8]);
- DECLARE_ALIGNED(16, int16_t, u_zbin[QINDEX_RANGE][8]);
- DECLARE_ALIGNED(16, int16_t, v_zbin[QINDEX_RANGE][8]);
- DECLARE_ALIGNED(16, int16_t, u_round[QINDEX_RANGE][8]);
- DECLARE_ALIGNED(16, int16_t, v_round[QINDEX_RANGE][8]);
-} QUANTS;
-
-// The Dequants structure is used only for internal quantizer setup in
-// av1_quantize.c.
-// Fields are sufffixed according to whether or not they're expressed in
-// the same coefficient shift/precision as TX or a fixed Q3 format.
-typedef struct {
- DECLARE_ALIGNED(16, int16_t,
- y_dequant_QTX[QINDEX_RANGE][8]); // 8: SIMD width
- DECLARE_ALIGNED(16, int16_t,
- u_dequant_QTX[QINDEX_RANGE][8]); // 8: SIMD width
- DECLARE_ALIGNED(16, int16_t,
- v_dequant_QTX[QINDEX_RANGE][8]); // 8: SIMD width
- DECLARE_ALIGNED(16, int16_t, y_dequant_Q3[QINDEX_RANGE][8]); // 8: SIMD width
- DECLARE_ALIGNED(16, int16_t, u_dequant_Q3[QINDEX_RANGE][8]); // 8: SIMD width
- DECLARE_ALIGNED(16, int16_t, v_dequant_Q3[QINDEX_RANGE][8]); // 8: SIMD width
-} Dequants;
-
-struct AV1_COMP;
-struct AV1Common;
-
-void av1_frame_init_quantizer(struct AV1_COMP *cpi);
-
-void av1_init_plane_quantizers(const struct AV1_COMP *cpi, MACROBLOCK *x,
- int segment_id);
-
-void av1_build_quantizer(aom_bit_depth_t bit_depth, int y_dc_delta_q,
- int u_dc_delta_q, int u_ac_delta_q, int v_dc_delta_q,
- int v_ac_delta_q, QUANTS *const quants,
- Dequants *const deq);
-
-void av1_init_quantizer(struct AV1_COMP *cpi);
-
-void av1_set_quantizer(struct AV1Common *cm, int q);
-
-int av1_quantizer_to_qindex(int quantizer);
-
-int av1_qindex_to_quantizer(int qindex);
-
-void av1_quantize_skip(intptr_t n_coeffs, tran_low_t *qcoeff_ptr,
- tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr);
-
-void av1_quantize_fp_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
- const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr,
- tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
- const SCAN_ORDER *sc, const QUANT_PARAM *qparam);
-
-void av1_quantize_b_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
- const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr,
- tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
- const SCAN_ORDER *sc, const QUANT_PARAM *qparam);
-
-void av1_quantize_dc_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
- const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr,
- tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
- const SCAN_ORDER *sc, const QUANT_PARAM *qparam);
-
-void av1_highbd_quantize_fp_facade(const tran_low_t *coeff_ptr,
- intptr_t n_coeffs, const MACROBLOCK_PLANE *p,
- tran_low_t *qcoeff_ptr,
- tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
- const SCAN_ORDER *sc,
- const QUANT_PARAM *qparam);
-
-void av1_highbd_quantize_b_facade(const tran_low_t *coeff_ptr,
- intptr_t n_coeffs, const MACROBLOCK_PLANE *p,
- tran_low_t *qcoeff_ptr,
- tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
- const SCAN_ORDER *sc,
- const QUANT_PARAM *qparam);
-
-void av1_highbd_quantize_dc_facade(const tran_low_t *coeff_ptr,
- intptr_t n_coeffs, const MACROBLOCK_PLANE *p,
- tran_low_t *qcoeff_ptr,
- tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
- const SCAN_ORDER *sc,
- const QUANT_PARAM *qparam);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_ENCODER_AV1_QUANTIZE_H_
diff --git a/third_party/aom/av1/encoder/bitstream.c b/third_party/aom/av1/encoder/bitstream.c
deleted file mode 100644
index 2c4acdb02..000000000
--- a/third_party/aom/av1/encoder/bitstream.c
+++ /dev/null
@@ -1,3999 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <limits.h>
-#include <stdio.h>
-
-#include "aom/aom_encoder.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/binary_codes_writer.h"
-#include "aom_dsp/bitwriter_buffer.h"
-#include "aom_mem/aom_mem.h"
-#include "aom_ports/bitops.h"
-#include "aom_ports/mem_ops.h"
-#include "aom_ports/system_state.h"
-#if CONFIG_BITSTREAM_DEBUG
-#include "aom_util/debug_util.h"
-#endif // CONFIG_BITSTREAM_DEBUG
-
-#include "av1/common/cdef.h"
-#include "av1/common/cfl.h"
-#include "av1/common/entropy.h"
-#include "av1/common/entropymode.h"
-#include "av1/common/entropymv.h"
-#include "av1/common/mvref_common.h"
-#include "av1/common/pred_common.h"
-#include "av1/common/reconinter.h"
-#include "av1/common/reconintra.h"
-#include "av1/common/seg_common.h"
-#include "av1/common/tile_common.h"
-
-#include "av1/encoder/bitstream.h"
-#include "av1/encoder/cost.h"
-#include "av1/encoder/encodemv.h"
-#include "av1/encoder/encodetxb.h"
-#include "av1/encoder/mcomp.h"
-#include "av1/encoder/palette.h"
-#include "av1/encoder/segmentation.h"
-#include "av1/encoder/tokenize.h"
-
-#define ENC_MISMATCH_DEBUG 0
-
-static INLINE void write_uniform(aom_writer *w, int n, int v) {
- const int l = get_unsigned_bits(n);
- const int m = (1 << l) - n;
- if (l == 0) return;
- if (v < m) {
- aom_write_literal(w, v, l - 1);
- } else {
- aom_write_literal(w, m + ((v - m) >> 1), l - 1);
- aom_write_literal(w, (v - m) & 1, 1);
- }
-}
-
-static void loop_restoration_write_sb_coeffs(const AV1_COMMON *const cm,
- MACROBLOCKD *xd,
- const RestorationUnitInfo *rui,
- aom_writer *const w, int plane,
- FRAME_COUNTS *counts);
-
-static void write_intra_y_mode_kf(FRAME_CONTEXT *frame_ctx,
- const MB_MODE_INFO *mi,
- const MB_MODE_INFO *above_mi,
- const MB_MODE_INFO *left_mi,
- PREDICTION_MODE mode, aom_writer *w) {
- assert(!is_intrabc_block(mi));
- (void)mi;
- aom_write_symbol(w, mode, get_y_mode_cdf(frame_ctx, above_mi, left_mi),
- INTRA_MODES);
-}
-
-static void write_inter_mode(aom_writer *w, PREDICTION_MODE mode,
- FRAME_CONTEXT *ec_ctx, const int16_t mode_ctx) {
- const int16_t newmv_ctx = mode_ctx & NEWMV_CTX_MASK;
-
- aom_write_symbol(w, mode != NEWMV, ec_ctx->newmv_cdf[newmv_ctx], 2);
-
- if (mode != NEWMV) {
- const int16_t zeromv_ctx =
- (mode_ctx >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK;
- aom_write_symbol(w, mode != GLOBALMV, ec_ctx->zeromv_cdf[zeromv_ctx], 2);
-
- if (mode != GLOBALMV) {
- int16_t refmv_ctx = (mode_ctx >> REFMV_OFFSET) & REFMV_CTX_MASK;
- aom_write_symbol(w, mode != NEARESTMV, ec_ctx->refmv_cdf[refmv_ctx], 2);
- }
- }
-}
-
-static void write_drl_idx(FRAME_CONTEXT *ec_ctx, const MB_MODE_INFO *mbmi,
- const MB_MODE_INFO_EXT *mbmi_ext, aom_writer *w) {
- uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
-
- assert(mbmi->ref_mv_idx < 3);
-
- const int new_mv = mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV;
- if (new_mv) {
- int idx;
- for (idx = 0; idx < 2; ++idx) {
- if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
- uint8_t drl_ctx =
- av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx);
-
- aom_write_symbol(w, mbmi->ref_mv_idx != idx, ec_ctx->drl_cdf[drl_ctx],
- 2);
- if (mbmi->ref_mv_idx == idx) return;
- }
- }
- return;
- }
-
- if (have_nearmv_in_inter_mode(mbmi->mode)) {
- int idx;
- // TODO(jingning): Temporary solution to compensate the NEARESTMV offset.
- for (idx = 1; idx < 3; ++idx) {
- if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
- uint8_t drl_ctx =
- av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx);
- aom_write_symbol(w, mbmi->ref_mv_idx != (idx - 1),
- ec_ctx->drl_cdf[drl_ctx], 2);
- if (mbmi->ref_mv_idx == (idx - 1)) return;
- }
- }
- return;
- }
-}
-
-static void write_inter_compound_mode(MACROBLOCKD *xd, aom_writer *w,
- PREDICTION_MODE mode,
- const int16_t mode_ctx) {
- assert(is_inter_compound_mode(mode));
- aom_write_symbol(w, INTER_COMPOUND_OFFSET(mode),
- xd->tile_ctx->inter_compound_mode_cdf[mode_ctx],
- INTER_COMPOUND_MODES);
-}
-
-static void write_tx_size_vartx(MACROBLOCKD *xd, const MB_MODE_INFO *mbmi,
- TX_SIZE tx_size, int depth, int blk_row,
- int blk_col, aom_writer *w) {
- FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
- const int max_blocks_high = max_block_high(xd, mbmi->sb_type, 0);
- const int max_blocks_wide = max_block_wide(xd, mbmi->sb_type, 0);
-
- if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
-
- if (depth == MAX_VARTX_DEPTH) {
- txfm_partition_update(xd->above_txfm_context + blk_col,
- xd->left_txfm_context + blk_row, tx_size, tx_size);
- return;
- }
-
- const int ctx = txfm_partition_context(xd->above_txfm_context + blk_col,
- xd->left_txfm_context + blk_row,
- mbmi->sb_type, tx_size);
- const int txb_size_index =
- av1_get_txb_size_index(mbmi->sb_type, blk_row, blk_col);
- const int write_txfm_partition =
- tx_size == mbmi->inter_tx_size[txb_size_index];
- if (write_txfm_partition) {
- aom_write_symbol(w, 0, ec_ctx->txfm_partition_cdf[ctx], 2);
-
- txfm_partition_update(xd->above_txfm_context + blk_col,
- xd->left_txfm_context + blk_row, tx_size, tx_size);
- // TODO(yuec): set correct txfm partition update for qttx
- } else {
- const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
- const int bsw = tx_size_wide_unit[sub_txs];
- const int bsh = tx_size_high_unit[sub_txs];
-
- aom_write_symbol(w, 1, ec_ctx->txfm_partition_cdf[ctx], 2);
-
- if (sub_txs == TX_4X4) {
- txfm_partition_update(xd->above_txfm_context + blk_col,
- xd->left_txfm_context + blk_row, sub_txs, tx_size);
- return;
- }
-
- assert(bsw > 0 && bsh > 0);
- for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh)
- for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) {
- int offsetr = blk_row + row;
- int offsetc = blk_col + col;
- write_tx_size_vartx(xd, mbmi, sub_txs, depth + 1, offsetr, offsetc, w);
- }
- }
-}
-
-static void write_selected_tx_size(const MACROBLOCKD *xd, aom_writer *w) {
- const MB_MODE_INFO *const mbmi = xd->mi[0];
- const BLOCK_SIZE bsize = mbmi->sb_type;
- FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
- if (block_signals_txsize(bsize)) {
- const TX_SIZE tx_size = mbmi->tx_size;
- const int tx_size_ctx = get_tx_size_context(xd);
- const int depth = tx_size_to_depth(tx_size, bsize);
- const int max_depths = bsize_to_max_depth(bsize);
- const int32_t tx_size_cat = bsize_to_tx_size_cat(bsize);
-
- assert(depth >= 0 && depth <= max_depths);
- assert(!is_inter_block(mbmi));
- assert(IMPLIES(is_rect_tx(tx_size), is_rect_tx_allowed(xd, mbmi)));
-
- aom_write_symbol(w, depth, ec_ctx->tx_size_cdf[tx_size_cat][tx_size_ctx],
- max_depths + 1);
- }
-}
-
-static int write_skip(const AV1_COMMON *cm, const MACROBLOCKD *xd,
- int segment_id, const MB_MODE_INFO *mi, aom_writer *w) {
- if (segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) {
- return 1;
- } else {
- const int skip = mi->skip;
- const int ctx = av1_get_skip_context(xd);
- FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
- aom_write_symbol(w, skip, ec_ctx->skip_cdfs[ctx], 2);
- return skip;
- }
-}
-
-static int write_skip_mode(const AV1_COMMON *cm, const MACROBLOCKD *xd,
- int segment_id, const MB_MODE_INFO *mi,
- aom_writer *w) {
- if (!cm->skip_mode_flag) return 0;
- if (segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) {
- return 0;
- }
- const int skip_mode = mi->skip_mode;
- if (!is_comp_ref_allowed(mi->sb_type)) {
- assert(!skip_mode);
- return 0;
- }
- if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME) ||
- segfeature_active(&cm->seg, segment_id, SEG_LVL_GLOBALMV)) {
- // These features imply single-reference mode, while skip mode implies
- // compound reference. Hence, the two are mutually exclusive.
- // In other words, skip_mode is implicitly 0 here.
- assert(!skip_mode);
- return 0;
- }
- const int ctx = av1_get_skip_mode_context(xd);
- aom_write_symbol(w, skip_mode, xd->tile_ctx->skip_mode_cdfs[ctx], 2);
- return skip_mode;
-}
-
-static void write_is_inter(const AV1_COMMON *cm, const MACROBLOCKD *xd,
- int segment_id, aom_writer *w, const int is_inter) {
- if (!segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) {
- if (segfeature_active(&cm->seg, segment_id, SEG_LVL_GLOBALMV)) {
- assert(is_inter);
- return;
- }
- const int ctx = av1_get_intra_inter_context(xd);
- FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
- aom_write_symbol(w, is_inter, ec_ctx->intra_inter_cdf[ctx], 2);
- }
-}
-
-static void write_motion_mode(const AV1_COMMON *cm, MACROBLOCKD *xd,
- const MB_MODE_INFO *mbmi, aom_writer *w) {
- MOTION_MODE last_motion_mode_allowed =
- cm->switchable_motion_mode
- ? motion_mode_allowed(cm->global_motion, xd, mbmi,
- cm->allow_warped_motion)
- : SIMPLE_TRANSLATION;
- assert(mbmi->motion_mode <= last_motion_mode_allowed);
- switch (last_motion_mode_allowed) {
- case SIMPLE_TRANSLATION: break;
- case OBMC_CAUSAL:
- aom_write_symbol(w, mbmi->motion_mode == OBMC_CAUSAL,
- xd->tile_ctx->obmc_cdf[mbmi->sb_type], 2);
- break;
- default:
- aom_write_symbol(w, mbmi->motion_mode,
- xd->tile_ctx->motion_mode_cdf[mbmi->sb_type],
- MOTION_MODES);
- }
-}
-
-static void write_delta_qindex(const MACROBLOCKD *xd, int delta_qindex,
- aom_writer *w) {
- int sign = delta_qindex < 0;
- int abs = sign ? -delta_qindex : delta_qindex;
- int rem_bits, thr;
- int smallval = abs < DELTA_Q_SMALL ? 1 : 0;
- FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
-
- aom_write_symbol(w, AOMMIN(abs, DELTA_Q_SMALL), ec_ctx->delta_q_cdf,
- DELTA_Q_PROBS + 1);
-
- if (!smallval) {
- rem_bits = get_msb(abs - 1);
- thr = (1 << rem_bits) + 1;
- aom_write_literal(w, rem_bits - 1, 3);
- aom_write_literal(w, abs - thr, rem_bits);
- }
- if (abs > 0) {
- aom_write_bit(w, sign);
- }
-}
-
-static void write_delta_lflevel(const AV1_COMMON *cm, const MACROBLOCKD *xd,
- int lf_id, int delta_lflevel, aom_writer *w) {
- int sign = delta_lflevel < 0;
- int abs = sign ? -delta_lflevel : delta_lflevel;
- int rem_bits, thr;
- int smallval = abs < DELTA_LF_SMALL ? 1 : 0;
- FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
-
- if (cm->delta_lf_multi) {
- assert(lf_id >= 0 && lf_id < (av1_num_planes(cm) > 1 ? FRAME_LF_COUNT
- : FRAME_LF_COUNT - 2));
- aom_write_symbol(w, AOMMIN(abs, DELTA_LF_SMALL),
- ec_ctx->delta_lf_multi_cdf[lf_id], DELTA_LF_PROBS + 1);
- } else {
- aom_write_symbol(w, AOMMIN(abs, DELTA_LF_SMALL), ec_ctx->delta_lf_cdf,
- DELTA_LF_PROBS + 1);
- }
-
- if (!smallval) {
- rem_bits = get_msb(abs - 1);
- thr = (1 << rem_bits) + 1;
- aom_write_literal(w, rem_bits - 1, 3);
- aom_write_literal(w, abs - thr, rem_bits);
- }
- if (abs > 0) {
- aom_write_bit(w, sign);
- }
-}
-
-static void pack_map_tokens(aom_writer *w, const TOKENEXTRA **tp, int n,
- int num) {
- const TOKENEXTRA *p = *tp;
- write_uniform(w, n, p->token); // The first color index.
- ++p;
- --num;
- for (int i = 0; i < num; ++i) {
- aom_write_symbol(w, p->token, p->color_map_cdf, n);
- ++p;
- }
- *tp = p;
-}
-
-static void pack_txb_tokens(aom_writer *w, AV1_COMMON *cm, MACROBLOCK *const x,
- const TOKENEXTRA **tp,
- const TOKENEXTRA *const tok_end, MACROBLOCKD *xd,
- MB_MODE_INFO *mbmi, int plane,
- BLOCK_SIZE plane_bsize, aom_bit_depth_t bit_depth,
- int block, int blk_row, int blk_col,
- TX_SIZE tx_size, TOKEN_STATS *token_stats) {
- const int max_blocks_high = max_block_high(xd, plane_bsize, plane);
- const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
-
- if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
-
- const struct macroblockd_plane *const pd = &xd->plane[plane];
- const TX_SIZE plane_tx_size =
- plane ? av1_get_max_uv_txsize(mbmi->sb_type, pd->subsampling_x,
- pd->subsampling_y)
- : mbmi->inter_tx_size[av1_get_txb_size_index(plane_bsize, blk_row,
- blk_col)];
-
- if (tx_size == plane_tx_size || plane) {
- tran_low_t *tcoeff = BLOCK_OFFSET(x->mbmi_ext->tcoeff[plane], block);
- const uint16_t eob = x->mbmi_ext->eobs[plane][block];
- TXB_CTX txb_ctx = { x->mbmi_ext->txb_skip_ctx[plane][block],
- x->mbmi_ext->dc_sign_ctx[plane][block] };
- av1_write_coeffs_txb(cm, xd, w, blk_row, blk_col, plane, tx_size, tcoeff,
- eob, &txb_ctx);
-#if CONFIG_RD_DEBUG
- TOKEN_STATS tmp_token_stats;
- init_token_stats(&tmp_token_stats);
- token_stats->txb_coeff_cost_map[blk_row][blk_col] = tmp_token_stats.cost;
- token_stats->cost += tmp_token_stats.cost;
-#endif
- } else {
- const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
- const int bsw = tx_size_wide_unit[sub_txs];
- const int bsh = tx_size_high_unit[sub_txs];
- const int step = bsh * bsw;
-
- assert(bsw > 0 && bsh > 0);
-
- for (int r = 0; r < tx_size_high_unit[tx_size]; r += bsh) {
- for (int c = 0; c < tx_size_wide_unit[tx_size]; c += bsw) {
- const int offsetr = blk_row + r;
- const int offsetc = blk_col + c;
- if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
- pack_txb_tokens(w, cm, x, tp, tok_end, xd, mbmi, plane, plane_bsize,
- bit_depth, block, offsetr, offsetc, sub_txs,
- token_stats);
- block += step;
- }
- }
- }
-}
-
-static INLINE void set_spatial_segment_id(const AV1_COMMON *const cm,
- uint8_t *segment_ids,
- BLOCK_SIZE bsize, int mi_row,
- int mi_col, int segment_id) {
- const int mi_offset = mi_row * cm->mi_cols + mi_col;
- const int bw = mi_size_wide[bsize];
- const int bh = mi_size_high[bsize];
- const int xmis = AOMMIN(cm->mi_cols - mi_col, bw);
- const int ymis = AOMMIN(cm->mi_rows - mi_row, bh);
- int x, y;
-
- for (y = 0; y < ymis; ++y)
- for (x = 0; x < xmis; ++x)
- segment_ids[mi_offset + y * cm->mi_cols + x] = segment_id;
-}
-
-int av1_neg_interleave(int x, int ref, int max) {
- assert(x < max);
- const int diff = x - ref;
- if (!ref) return x;
- if (ref >= (max - 1)) return -x + max - 1;
- if (2 * ref < max) {
- if (abs(diff) <= ref) {
- if (diff > 0)
- return (diff << 1) - 1;
- else
- return ((-diff) << 1);
- }
- return x;
- } else {
- if (abs(diff) < (max - ref)) {
- if (diff > 0)
- return (diff << 1) - 1;
- else
- return ((-diff) << 1);
- }
- return (max - x) - 1;
- }
-}
-
-static void write_segment_id(AV1_COMP *cpi, const MB_MODE_INFO *const mbmi,
- aom_writer *w, const struct segmentation *seg,
- struct segmentation_probs *segp, int mi_row,
- int mi_col, int skip) {
- if (!seg->enabled || !seg->update_map) return;
-
- AV1_COMMON *const cm = &cpi->common;
- MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
- int cdf_num;
- const int pred = av1_get_spatial_seg_pred(cm, xd, mi_row, mi_col, &cdf_num);
-
- if (skip) {
- // Still need to transmit tx size for intra blocks even if skip is
- // true. Changing segment_id may make the tx size become invalid, e.g
- // changing from lossless to lossy.
- assert(is_inter_block(mbmi) || !cpi->has_lossless_segment);
-
- set_spatial_segment_id(cm, cm->current_frame_seg_map, mbmi->sb_type, mi_row,
- mi_col, pred);
- set_spatial_segment_id(cm, cpi->segmentation_map, mbmi->sb_type, mi_row,
- mi_col, pred);
- /* mbmi is read only but we need to update segment_id */
- ((MB_MODE_INFO *)mbmi)->segment_id = pred;
- return;
- }
-
- const int coded_id =
- av1_neg_interleave(mbmi->segment_id, pred, seg->last_active_segid + 1);
- aom_cdf_prob *pred_cdf = segp->spatial_pred_seg_cdf[cdf_num];
- aom_write_symbol(w, coded_id, pred_cdf, MAX_SEGMENTS);
- set_spatial_segment_id(cm, cm->current_frame_seg_map, mbmi->sb_type, mi_row,
- mi_col, mbmi->segment_id);
-}
-
-#define WRITE_REF_BIT(bname, pname) \
- aom_write_symbol(w, bname, av1_get_pred_cdf_##pname(xd), 2)
-
-// This function encodes the reference frame
-static void write_ref_frames(const AV1_COMMON *cm, const MACROBLOCKD *xd,
- aom_writer *w) {
- const MB_MODE_INFO *const mbmi = xd->mi[0];
- const int is_compound = has_second_ref(mbmi);
- const int segment_id = mbmi->segment_id;
-
- // If segment level coding of this signal is disabled...
- // or the segment allows multiple reference frame options
- if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) {
- assert(!is_compound);
- assert(mbmi->ref_frame[0] ==
- get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME));
- } else if (segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP) ||
- segfeature_active(&cm->seg, segment_id, SEG_LVL_GLOBALMV)) {
- assert(!is_compound);
- assert(mbmi->ref_frame[0] == LAST_FRAME);
- } else {
- // does the feature use compound prediction or not
- // (if not specified at the frame/segment level)
- if (cm->reference_mode == REFERENCE_MODE_SELECT) {
- if (is_comp_ref_allowed(mbmi->sb_type))
- aom_write_symbol(w, is_compound, av1_get_reference_mode_cdf(xd), 2);
- } else {
- assert((!is_compound) == (cm->reference_mode == SINGLE_REFERENCE));
- }
-
- if (is_compound) {
- const COMP_REFERENCE_TYPE comp_ref_type = has_uni_comp_refs(mbmi)
- ? UNIDIR_COMP_REFERENCE
- : BIDIR_COMP_REFERENCE;
- aom_write_symbol(w, comp_ref_type, av1_get_comp_reference_type_cdf(xd),
- 2);
-
- if (comp_ref_type == UNIDIR_COMP_REFERENCE) {
- const int bit = mbmi->ref_frame[0] == BWDREF_FRAME;
- WRITE_REF_BIT(bit, uni_comp_ref_p);
-
- if (!bit) {
- assert(mbmi->ref_frame[0] == LAST_FRAME);
- const int bit1 = mbmi->ref_frame[1] == LAST3_FRAME ||
- mbmi->ref_frame[1] == GOLDEN_FRAME;
- WRITE_REF_BIT(bit1, uni_comp_ref_p1);
- if (bit1) {
- const int bit2 = mbmi->ref_frame[1] == GOLDEN_FRAME;
- WRITE_REF_BIT(bit2, uni_comp_ref_p2);
- }
- } else {
- assert(mbmi->ref_frame[1] == ALTREF_FRAME);
- }
-
- return;
- }
-
- assert(comp_ref_type == BIDIR_COMP_REFERENCE);
-
- const int bit = (mbmi->ref_frame[0] == GOLDEN_FRAME ||
- mbmi->ref_frame[0] == LAST3_FRAME);
- WRITE_REF_BIT(bit, comp_ref_p);
-
- if (!bit) {
- const int bit1 = mbmi->ref_frame[0] == LAST2_FRAME;
- WRITE_REF_BIT(bit1, comp_ref_p1);
- } else {
- const int bit2 = mbmi->ref_frame[0] == GOLDEN_FRAME;
- WRITE_REF_BIT(bit2, comp_ref_p2);
- }
-
- const int bit_bwd = mbmi->ref_frame[1] == ALTREF_FRAME;
- WRITE_REF_BIT(bit_bwd, comp_bwdref_p);
-
- if (!bit_bwd) {
- WRITE_REF_BIT(mbmi->ref_frame[1] == ALTREF2_FRAME, comp_bwdref_p1);
- }
-
- } else {
- const int bit0 = (mbmi->ref_frame[0] <= ALTREF_FRAME &&
- mbmi->ref_frame[0] >= BWDREF_FRAME);
- WRITE_REF_BIT(bit0, single_ref_p1);
-
- if (bit0) {
- const int bit1 = mbmi->ref_frame[0] == ALTREF_FRAME;
- WRITE_REF_BIT(bit1, single_ref_p2);
-
- if (!bit1) {
- WRITE_REF_BIT(mbmi->ref_frame[0] == ALTREF2_FRAME, single_ref_p6);
- }
- } else {
- const int bit2 = (mbmi->ref_frame[0] == LAST3_FRAME ||
- mbmi->ref_frame[0] == GOLDEN_FRAME);
- WRITE_REF_BIT(bit2, single_ref_p3);
-
- if (!bit2) {
- const int bit3 = mbmi->ref_frame[0] != LAST_FRAME;
- WRITE_REF_BIT(bit3, single_ref_p4);
- } else {
- const int bit4 = mbmi->ref_frame[0] != LAST3_FRAME;
- WRITE_REF_BIT(bit4, single_ref_p5);
- }
- }
- }
- }
-}
-
-static void write_filter_intra_mode_info(const AV1_COMMON *cm,
- const MACROBLOCKD *xd,
- const MB_MODE_INFO *const mbmi,
- aom_writer *w) {
- if (av1_filter_intra_allowed(cm, mbmi)) {
- aom_write_symbol(w, mbmi->filter_intra_mode_info.use_filter_intra,
- xd->tile_ctx->filter_intra_cdfs[mbmi->sb_type], 2);
- if (mbmi->filter_intra_mode_info.use_filter_intra) {
- const FILTER_INTRA_MODE mode =
- mbmi->filter_intra_mode_info.filter_intra_mode;
- aom_write_symbol(w, mode, xd->tile_ctx->filter_intra_mode_cdf,
- FILTER_INTRA_MODES);
- }
- }
-}
-
-static void write_angle_delta(aom_writer *w, int angle_delta,
- aom_cdf_prob *cdf) {
- aom_write_symbol(w, angle_delta + MAX_ANGLE_DELTA, cdf,
- 2 * MAX_ANGLE_DELTA + 1);
-}
-
-static void write_mb_interp_filter(AV1_COMP *cpi, const MACROBLOCKD *xd,
- aom_writer *w) {
- AV1_COMMON *const cm = &cpi->common;
- const MB_MODE_INFO *const mbmi = xd->mi[0];
- FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
-
- if (!av1_is_interp_needed(xd)) {
- assert(mbmi->interp_filters ==
- av1_broadcast_interp_filter(
- av1_unswitchable_filter(cm->interp_filter)));
- return;
- }
- if (cm->interp_filter == SWITCHABLE) {
- int dir;
- for (dir = 0; dir < 2; ++dir) {
- const int ctx = av1_get_pred_context_switchable_interp(xd, dir);
- InterpFilter filter =
- av1_extract_interp_filter(mbmi->interp_filters, dir);
- aom_write_symbol(w, filter, ec_ctx->switchable_interp_cdf[ctx],
- SWITCHABLE_FILTERS);
- ++cpi->interp_filter_selected[0][filter];
- if (cm->seq_params.enable_dual_filter == 0) return;
- }
- }
-}
-
-// Transmit color values with delta encoding. Write the first value as
-// literal, and the deltas between each value and the previous one. "min_val" is
-// the smallest possible value of the deltas.
-static void delta_encode_palette_colors(const int *colors, int num,
- int bit_depth, int min_val,
- aom_writer *w) {
- if (num <= 0) return;
- assert(colors[0] < (1 << bit_depth));
- aom_write_literal(w, colors[0], bit_depth);
- if (num == 1) return;
- int max_delta = 0;
- int deltas[PALETTE_MAX_SIZE];
- memset(deltas, 0, sizeof(deltas));
- for (int i = 1; i < num; ++i) {
- assert(colors[i] < (1 << bit_depth));
- const int delta = colors[i] - colors[i - 1];
- deltas[i - 1] = delta;
- assert(delta >= min_val);
- if (delta > max_delta) max_delta = delta;
- }
- const int min_bits = bit_depth - 3;
- int bits = AOMMAX(av1_ceil_log2(max_delta + 1 - min_val), min_bits);
- assert(bits <= bit_depth);
- int range = (1 << bit_depth) - colors[0] - min_val;
- aom_write_literal(w, bits - min_bits, 2);
- for (int i = 0; i < num - 1; ++i) {
- aom_write_literal(w, deltas[i] - min_val, bits);
- range -= deltas[i];
- bits = AOMMIN(bits, av1_ceil_log2(range));
- }
-}
-
-// Transmit luma palette color values. First signal if each color in the color
-// cache is used. Those colors that are not in the cache are transmitted with
-// delta encoding.
-static void write_palette_colors_y(const MACROBLOCKD *const xd,
- const PALETTE_MODE_INFO *const pmi,
- int bit_depth, aom_writer *w) {
- const int n = pmi->palette_size[0];
- uint16_t color_cache[2 * PALETTE_MAX_SIZE];
- const int n_cache = av1_get_palette_cache(xd, 0, color_cache);
- int out_cache_colors[PALETTE_MAX_SIZE];
- uint8_t cache_color_found[2 * PALETTE_MAX_SIZE];
- const int n_out_cache =
- av1_index_color_cache(color_cache, n_cache, pmi->palette_colors, n,
- cache_color_found, out_cache_colors);
- int n_in_cache = 0;
- for (int i = 0; i < n_cache && n_in_cache < n; ++i) {
- const int found = cache_color_found[i];
- aom_write_bit(w, found);
- n_in_cache += found;
- }
- assert(n_in_cache + n_out_cache == n);
- delta_encode_palette_colors(out_cache_colors, n_out_cache, bit_depth, 1, w);
-}
-
-// Write chroma palette color values. U channel is handled similarly to the luma
-// channel. For v channel, either use delta encoding or transmit raw values
-// directly, whichever costs less.
-static void write_palette_colors_uv(const MACROBLOCKD *const xd,
- const PALETTE_MODE_INFO *const pmi,
- int bit_depth, aom_writer *w) {
- const int n = pmi->palette_size[1];
- const uint16_t *colors_u = pmi->palette_colors + PALETTE_MAX_SIZE;
- const uint16_t *colors_v = pmi->palette_colors + 2 * PALETTE_MAX_SIZE;
- // U channel colors.
- uint16_t color_cache[2 * PALETTE_MAX_SIZE];
- const int n_cache = av1_get_palette_cache(xd, 1, color_cache);
- int out_cache_colors[PALETTE_MAX_SIZE];
- uint8_t cache_color_found[2 * PALETTE_MAX_SIZE];
- const int n_out_cache = av1_index_color_cache(
- color_cache, n_cache, colors_u, n, cache_color_found, out_cache_colors);
- int n_in_cache = 0;
- for (int i = 0; i < n_cache && n_in_cache < n; ++i) {
- const int found = cache_color_found[i];
- aom_write_bit(w, found);
- n_in_cache += found;
- }
- delta_encode_palette_colors(out_cache_colors, n_out_cache, bit_depth, 0, w);
-
- // V channel colors. Don't use color cache as the colors are not sorted.
- const int max_val = 1 << bit_depth;
- int zero_count = 0, min_bits_v = 0;
- int bits_v =
- av1_get_palette_delta_bits_v(pmi, bit_depth, &zero_count, &min_bits_v);
- const int rate_using_delta =
- 2 + bit_depth + (bits_v + 1) * (n - 1) - zero_count;
- const int rate_using_raw = bit_depth * n;
- if (rate_using_delta < rate_using_raw) { // delta encoding
- assert(colors_v[0] < (1 << bit_depth));
- aom_write_bit(w, 1);
- aom_write_literal(w, bits_v - min_bits_v, 2);
- aom_write_literal(w, colors_v[0], bit_depth);
- for (int i = 1; i < n; ++i) {
- assert(colors_v[i] < (1 << bit_depth));
- if (colors_v[i] == colors_v[i - 1]) { // No need to signal sign bit.
- aom_write_literal(w, 0, bits_v);
- continue;
- }
- const int delta = abs((int)colors_v[i] - colors_v[i - 1]);
- const int sign_bit = colors_v[i] < colors_v[i - 1];
- if (delta <= max_val - delta) {
- aom_write_literal(w, delta, bits_v);
- aom_write_bit(w, sign_bit);
- } else {
- aom_write_literal(w, max_val - delta, bits_v);
- aom_write_bit(w, !sign_bit);
- }
- }
- } else { // Transmit raw values.
- aom_write_bit(w, 0);
- for (int i = 0; i < n; ++i) {
- assert(colors_v[i] < (1 << bit_depth));
- aom_write_literal(w, colors_v[i], bit_depth);
- }
- }
-}
-
-static void write_palette_mode_info(const AV1_COMMON *cm, const MACROBLOCKD *xd,
- const MB_MODE_INFO *const mbmi, int mi_row,
- int mi_col, aom_writer *w) {
- const int num_planes = av1_num_planes(cm);
- const BLOCK_SIZE bsize = mbmi->sb_type;
- assert(av1_allow_palette(cm->allow_screen_content_tools, bsize));
- const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
- const int bsize_ctx = av1_get_palette_bsize_ctx(bsize);
-
- if (mbmi->mode == DC_PRED) {
- const int n = pmi->palette_size[0];
- const int palette_y_mode_ctx = av1_get_palette_mode_ctx(xd);
- aom_write_symbol(
- w, n > 0,
- xd->tile_ctx->palette_y_mode_cdf[bsize_ctx][palette_y_mode_ctx], 2);
- if (n > 0) {
- aom_write_symbol(w, n - PALETTE_MIN_SIZE,
- xd->tile_ctx->palette_y_size_cdf[bsize_ctx],
- PALETTE_SIZES);
- write_palette_colors_y(xd, pmi, cm->seq_params.bit_depth, w);
- }
- }
-
- const int uv_dc_pred =
- num_planes > 1 && mbmi->uv_mode == UV_DC_PRED &&
- is_chroma_reference(mi_row, mi_col, bsize, xd->plane[1].subsampling_x,
- xd->plane[1].subsampling_y);
- if (uv_dc_pred) {
- const int n = pmi->palette_size[1];
- const int palette_uv_mode_ctx = (pmi->palette_size[0] > 0);
- aom_write_symbol(w, n > 0,
- xd->tile_ctx->palette_uv_mode_cdf[palette_uv_mode_ctx], 2);
- if (n > 0) {
- aom_write_symbol(w, n - PALETTE_MIN_SIZE,
- xd->tile_ctx->palette_uv_size_cdf[bsize_ctx],
- PALETTE_SIZES);
- write_palette_colors_uv(xd, pmi, cm->seq_params.bit_depth, w);
- }
- }
-}
-
-void av1_write_tx_type(const AV1_COMMON *const cm, const MACROBLOCKD *xd,
- int blk_row, int blk_col, int plane, TX_SIZE tx_size,
- aom_writer *w) {
- MB_MODE_INFO *mbmi = xd->mi[0];
- const int is_inter = is_inter_block(mbmi);
- FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
-
- // Only y plane's tx_type is transmitted
- if (plane > 0) return;
- PLANE_TYPE plane_type = get_plane_type(plane);
- TX_TYPE tx_type = av1_get_tx_type(plane_type, xd, blk_row, blk_col, tx_size,
- cm->reduced_tx_set_used);
-
- const TX_SIZE square_tx_size = txsize_sqr_map[tx_size];
- if (get_ext_tx_types(tx_size, is_inter, cm->reduced_tx_set_used) > 1 &&
- ((!cm->seg.enabled && cm->base_qindex > 0) ||
- (cm->seg.enabled && xd->qindex[mbmi->segment_id] > 0)) &&
- !mbmi->skip &&
- !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
- const TxSetType tx_set_type =
- av1_get_ext_tx_set_type(tx_size, is_inter, cm->reduced_tx_set_used);
- const int eset = get_ext_tx_set(tx_size, is_inter, cm->reduced_tx_set_used);
- // eset == 0 should correspond to a set with only DCT_DCT and there
- // is no need to send the tx_type
- assert(eset > 0);
- assert(av1_ext_tx_used[tx_set_type][tx_type]);
- if (is_inter) {
- aom_write_symbol(w, av1_ext_tx_ind[tx_set_type][tx_type],
- ec_ctx->inter_ext_tx_cdf[eset][square_tx_size],
- av1_num_ext_tx_set[tx_set_type]);
- } else {
- PREDICTION_MODE intra_dir;
- if (mbmi->filter_intra_mode_info.use_filter_intra)
- intra_dir =
- fimode_to_intradir[mbmi->filter_intra_mode_info.filter_intra_mode];
- else
- intra_dir = mbmi->mode;
- aom_write_symbol(
- w, av1_ext_tx_ind[tx_set_type][tx_type],
- ec_ctx->intra_ext_tx_cdf[eset][square_tx_size][intra_dir],
- av1_num_ext_tx_set[tx_set_type]);
- }
- }
-}
-
-static void write_intra_y_mode_nonkf(FRAME_CONTEXT *frame_ctx, BLOCK_SIZE bsize,
- PREDICTION_MODE mode, aom_writer *w) {
- aom_write_symbol(w, mode, frame_ctx->y_mode_cdf[size_group_lookup[bsize]],
- INTRA_MODES);
-}
-
-static void write_intra_uv_mode(FRAME_CONTEXT *frame_ctx,
- UV_PREDICTION_MODE uv_mode,
- PREDICTION_MODE y_mode,
- CFL_ALLOWED_TYPE cfl_allowed, aom_writer *w) {
- aom_write_symbol(w, uv_mode, frame_ctx->uv_mode_cdf[cfl_allowed][y_mode],
- UV_INTRA_MODES - !cfl_allowed);
-}
-
-static void write_cfl_alphas(FRAME_CONTEXT *const ec_ctx, int idx,
- int joint_sign, aom_writer *w) {
- aom_write_symbol(w, joint_sign, ec_ctx->cfl_sign_cdf, CFL_JOINT_SIGNS);
- // Magnitudes are only signaled for nonzero codes.
- if (CFL_SIGN_U(joint_sign) != CFL_SIGN_ZERO) {
- aom_cdf_prob *cdf_u = ec_ctx->cfl_alpha_cdf[CFL_CONTEXT_U(joint_sign)];
- aom_write_symbol(w, CFL_IDX_U(idx), cdf_u, CFL_ALPHABET_SIZE);
- }
- if (CFL_SIGN_V(joint_sign) != CFL_SIGN_ZERO) {
- aom_cdf_prob *cdf_v = ec_ctx->cfl_alpha_cdf[CFL_CONTEXT_V(joint_sign)];
- aom_write_symbol(w, CFL_IDX_V(idx), cdf_v, CFL_ALPHABET_SIZE);
- }
-}
-
-static void write_cdef(AV1_COMMON *cm, MACROBLOCKD *const xd, aom_writer *w,
- int skip, int mi_col, int mi_row) {
- if (cm->coded_lossless || cm->allow_intrabc) {
- // Initialize to indicate no CDEF for safety.
- cm->cdef_bits = 0;
- cm->cdef_strengths[0] = 0;
- cm->nb_cdef_strengths = 1;
- cm->cdef_uv_strengths[0] = 0;
- return;
- }
-
- const int m = ~((1 << (6 - MI_SIZE_LOG2)) - 1);
- const MB_MODE_INFO *mbmi =
- cm->mi_grid_visible[(mi_row & m) * cm->mi_stride + (mi_col & m)];
- // Initialise when at top left part of the superblock
- if (!(mi_row & (cm->seq_params.mib_size - 1)) &&
- !(mi_col & (cm->seq_params.mib_size - 1))) { // Top left?
- xd->cdef_preset[0] = xd->cdef_preset[1] = xd->cdef_preset[2] =
- xd->cdef_preset[3] = -1;
- }
-
- // Emit CDEF param at first non-skip coding block
- const int mask = 1 << (6 - MI_SIZE_LOG2);
- const int index = cm->seq_params.sb_size == BLOCK_128X128
- ? !!(mi_col & mask) + 2 * !!(mi_row & mask)
- : 0;
- if (xd->cdef_preset[index] == -1 && !skip) {
- aom_write_literal(w, mbmi->cdef_strength, cm->cdef_bits);
- xd->cdef_preset[index] = mbmi->cdef_strength;
- }
-}
-
-static void write_inter_segment_id(AV1_COMP *cpi, aom_writer *w,
- const struct segmentation *const seg,
- struct segmentation_probs *const segp,
- int mi_row, int mi_col, int skip,
- int preskip) {
- MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
- const MB_MODE_INFO *const mbmi = xd->mi[0];
- AV1_COMMON *const cm = &cpi->common;
-
- if (seg->update_map) {
- if (preskip) {
- if (!seg->segid_preskip) return;
- } else {
- if (seg->segid_preskip) return;
- if (skip) {
- write_segment_id(cpi, mbmi, w, seg, segp, mi_row, mi_col, 1);
- if (seg->temporal_update) ((MB_MODE_INFO *)mbmi)->seg_id_predicted = 0;
- return;
- }
- }
- if (seg->temporal_update) {
- const int pred_flag = mbmi->seg_id_predicted;
- aom_cdf_prob *pred_cdf = av1_get_pred_cdf_seg_id(segp, xd);
- aom_write_symbol(w, pred_flag, pred_cdf, 2);
- if (!pred_flag) {
- write_segment_id(cpi, mbmi, w, seg, segp, mi_row, mi_col, 0);
- }
- if (pred_flag) {
- set_spatial_segment_id(cm, cm->current_frame_seg_map, mbmi->sb_type,
- mi_row, mi_col, mbmi->segment_id);
- }
- } else {
- write_segment_id(cpi, mbmi, w, seg, segp, mi_row, mi_col, 0);
- }
- }
-}
-
-// If delta q is present, writes delta_q index.
-// Also writes delta_q loop filter levels, if present.
-static void write_delta_q_params(AV1_COMP *cpi, const int mi_row,
- const int mi_col, int skip, aom_writer *w) {
- AV1_COMMON *const cm = &cpi->common;
- if (cm->delta_q_present_flag) {
- MACROBLOCK *const x = &cpi->td.mb;
- MACROBLOCKD *const xd = &x->e_mbd;
- const MB_MODE_INFO *const mbmi = xd->mi[0];
- const BLOCK_SIZE bsize = mbmi->sb_type;
- const int super_block_upper_left =
- ((mi_row & (cm->seq_params.mib_size - 1)) == 0) &&
- ((mi_col & (cm->seq_params.mib_size - 1)) == 0);
-
- if ((bsize != cm->seq_params.sb_size || skip == 0) &&
- super_block_upper_left) {
- assert(mbmi->current_qindex > 0);
- const int reduced_delta_qindex =
- (mbmi->current_qindex - xd->current_qindex) / cm->delta_q_res;
- write_delta_qindex(xd, reduced_delta_qindex, w);
- xd->current_qindex = mbmi->current_qindex;
- if (cm->delta_lf_present_flag) {
- if (cm->delta_lf_multi) {
- const int frame_lf_count =
- av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
- for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) {
- int reduced_delta_lflevel =
- (mbmi->delta_lf[lf_id] - xd->delta_lf[lf_id]) /
- cm->delta_lf_res;
- write_delta_lflevel(cm, xd, lf_id, reduced_delta_lflevel, w);
- xd->delta_lf[lf_id] = mbmi->delta_lf[lf_id];
- }
- } else {
- int reduced_delta_lflevel =
- (mbmi->delta_lf_from_base - xd->delta_lf_from_base) /
- cm->delta_lf_res;
- write_delta_lflevel(cm, xd, -1, reduced_delta_lflevel, w);
- xd->delta_lf_from_base = mbmi->delta_lf_from_base;
- }
- }
- }
- }
-}
-
-static void write_intra_prediction_modes(AV1_COMP *cpi, const int mi_row,
- const int mi_col, int is_keyframe,
- aom_writer *w) {
- const AV1_COMMON *const cm = &cpi->common;
- MACROBLOCK *const x = &cpi->td.mb;
- MACROBLOCKD *const xd = &x->e_mbd;
- FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
- const MB_MODE_INFO *const mbmi = xd->mi[0];
- const PREDICTION_MODE mode = mbmi->mode;
- const BLOCK_SIZE bsize = mbmi->sb_type;
-
- // Y mode.
- if (is_keyframe) {
- const MB_MODE_INFO *const above_mi = xd->above_mbmi;
- const MB_MODE_INFO *const left_mi = xd->left_mbmi;
- write_intra_y_mode_kf(ec_ctx, mbmi, above_mi, left_mi, mode, w);
- } else {
- write_intra_y_mode_nonkf(ec_ctx, bsize, mode, w);
- }
-
- // Y angle delta.
- const int use_angle_delta = av1_use_angle_delta(bsize);
- if (use_angle_delta && av1_is_directional_mode(mode)) {
- write_angle_delta(w, mbmi->angle_delta[PLANE_TYPE_Y],
- ec_ctx->angle_delta_cdf[mode - V_PRED]);
- }
-
- // UV mode and UV angle delta.
- if (!cm->seq_params.monochrome &&
- is_chroma_reference(mi_row, mi_col, bsize, xd->plane[1].subsampling_x,
- xd->plane[1].subsampling_y)) {
- const UV_PREDICTION_MODE uv_mode = mbmi->uv_mode;
- write_intra_uv_mode(ec_ctx, uv_mode, mode, is_cfl_allowed(xd), w);
- if (uv_mode == UV_CFL_PRED)
- write_cfl_alphas(ec_ctx, mbmi->cfl_alpha_idx, mbmi->cfl_alpha_signs, w);
- if (use_angle_delta && av1_is_directional_mode(get_uv_mode(uv_mode))) {
- write_angle_delta(w, mbmi->angle_delta[PLANE_TYPE_UV],
- ec_ctx->angle_delta_cdf[uv_mode - V_PRED]);
- }
- }
-
- // Palette.
- if (av1_allow_palette(cm->allow_screen_content_tools, bsize)) {
- write_palette_mode_info(cm, xd, mbmi, mi_row, mi_col, w);
- }
-
- // Filter intra.
- write_filter_intra_mode_info(cm, xd, mbmi, w);
-}
-
-static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
- const int mi_col, aom_writer *w) {
- AV1_COMMON *const cm = &cpi->common;
- MACROBLOCK *const x = &cpi->td.mb;
- MACROBLOCKD *const xd = &x->e_mbd;
- FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
- const struct segmentation *const seg = &cm->seg;
- struct segmentation_probs *const segp = &ec_ctx->seg;
- const MB_MODE_INFO *const mbmi = xd->mi[0];
- const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
- const PREDICTION_MODE mode = mbmi->mode;
- const int segment_id = mbmi->segment_id;
- const BLOCK_SIZE bsize = mbmi->sb_type;
- const int allow_hp = cm->allow_high_precision_mv;
- const int is_inter = is_inter_block(mbmi);
- const int is_compound = has_second_ref(mbmi);
- int ref;
-
- write_inter_segment_id(cpi, w, seg, segp, mi_row, mi_col, 0, 1);
-
- write_skip_mode(cm, xd, segment_id, mbmi, w);
-
- assert(IMPLIES(mbmi->skip_mode, mbmi->skip));
- const int skip =
- mbmi->skip_mode ? 1 : write_skip(cm, xd, segment_id, mbmi, w);
-
- write_inter_segment_id(cpi, w, seg, segp, mi_row, mi_col, skip, 0);
-
- write_cdef(cm, xd, w, skip, mi_col, mi_row);
-
- write_delta_q_params(cpi, mi_row, mi_col, skip, w);
-
- if (!mbmi->skip_mode) write_is_inter(cm, xd, mbmi->segment_id, w, is_inter);
-
- if (mbmi->skip_mode) return;
-
- if (!is_inter) {
- write_intra_prediction_modes(cpi, mi_row, mi_col, 0, w);
- } else {
- int16_t mode_ctx;
-
- av1_collect_neighbors_ref_counts(xd);
-
- write_ref_frames(cm, xd, w);
-
- mode_ctx =
- av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
-
- // If segment skip is not enabled code the mode.
- if (!segfeature_active(seg, segment_id, SEG_LVL_SKIP)) {
- if (is_inter_compound_mode(mode))
- write_inter_compound_mode(xd, w, mode, mode_ctx);
- else if (is_inter_singleref_mode(mode))
- write_inter_mode(w, mode, ec_ctx, mode_ctx);
-
- if (mode == NEWMV || mode == NEW_NEWMV || have_nearmv_in_inter_mode(mode))
- write_drl_idx(ec_ctx, mbmi, mbmi_ext, w);
- else
- assert(mbmi->ref_mv_idx == 0);
- }
-
- if (mode == NEWMV || mode == NEW_NEWMV) {
- for (ref = 0; ref < 1 + is_compound; ++ref) {
- nmv_context *nmvc = &ec_ctx->nmvc;
- const int_mv ref_mv = av1_get_ref_mv(x, ref);
- av1_encode_mv(cpi, w, &mbmi->mv[ref].as_mv, &ref_mv.as_mv, nmvc,
- allow_hp);
- }
- } else if (mode == NEAREST_NEWMV || mode == NEAR_NEWMV) {
- nmv_context *nmvc = &ec_ctx->nmvc;
- const int_mv ref_mv = av1_get_ref_mv(x, 1);
- av1_encode_mv(cpi, w, &mbmi->mv[1].as_mv, &ref_mv.as_mv, nmvc, allow_hp);
- } else if (mode == NEW_NEARESTMV || mode == NEW_NEARMV) {
- nmv_context *nmvc = &ec_ctx->nmvc;
- const int_mv ref_mv = av1_get_ref_mv(x, 0);
- av1_encode_mv(cpi, w, &mbmi->mv[0].as_mv, &ref_mv.as_mv, nmvc, allow_hp);
- }
-
- if (cpi->common.reference_mode != COMPOUND_REFERENCE &&
- cpi->common.seq_params.enable_interintra_compound &&
- is_interintra_allowed(mbmi)) {
- const int interintra = mbmi->ref_frame[1] == INTRA_FRAME;
- const int bsize_group = size_group_lookup[bsize];
- aom_write_symbol(w, interintra, ec_ctx->interintra_cdf[bsize_group], 2);
- if (interintra) {
- aom_write_symbol(w, mbmi->interintra_mode,
- ec_ctx->interintra_mode_cdf[bsize_group],
- INTERINTRA_MODES);
- if (is_interintra_wedge_used(bsize)) {
- aom_write_symbol(w, mbmi->use_wedge_interintra,
- ec_ctx->wedge_interintra_cdf[bsize], 2);
- if (mbmi->use_wedge_interintra) {
- aom_write_symbol(w, mbmi->interintra_wedge_index,
- ec_ctx->wedge_idx_cdf[bsize], 16);
- assert(mbmi->interintra_wedge_sign == 0);
- }
- }
- }
- }
-
- if (mbmi->ref_frame[1] != INTRA_FRAME) write_motion_mode(cm, xd, mbmi, w);
-
- // First write idx to indicate current compound inter prediction mode group
- // Group A (0): jnt_comp, compound_average
- // Group B (1): interintra, compound_diffwtd, wedge
- if (has_second_ref(mbmi)) {
- const int masked_compound_used = is_any_masked_compound_used(bsize) &&
- cm->seq_params.enable_masked_compound;
-
- if (masked_compound_used) {
- const int ctx_comp_group_idx = get_comp_group_idx_context(xd);
- aom_write_symbol(w, mbmi->comp_group_idx,
- ec_ctx->comp_group_idx_cdf[ctx_comp_group_idx], 2);
- } else {
- assert(mbmi->comp_group_idx == 0);
- }
-
- if (mbmi->comp_group_idx == 0) {
- if (mbmi->compound_idx)
- assert(mbmi->interinter_comp.type == COMPOUND_AVERAGE);
-
- if (cm->seq_params.enable_jnt_comp) {
- const int comp_index_ctx = get_comp_index_context(cm, xd);
- aom_write_symbol(w, mbmi->compound_idx,
- ec_ctx->compound_index_cdf[comp_index_ctx], 2);
- } else {
- assert(mbmi->compound_idx == 1);
- }
- } else {
- assert(cpi->common.reference_mode != SINGLE_REFERENCE &&
- is_inter_compound_mode(mbmi->mode) &&
- mbmi->motion_mode == SIMPLE_TRANSLATION);
- assert(masked_compound_used);
- // compound_diffwtd, wedge
- assert(mbmi->interinter_comp.type == COMPOUND_WEDGE ||
- mbmi->interinter_comp.type == COMPOUND_DIFFWTD);
-
- if (is_interinter_compound_used(COMPOUND_WEDGE, bsize))
- aom_write_symbol(w, mbmi->interinter_comp.type - 1,
- ec_ctx->compound_type_cdf[bsize],
- COMPOUND_TYPES - 1);
-
- if (mbmi->interinter_comp.type == COMPOUND_WEDGE) {
- assert(is_interinter_compound_used(COMPOUND_WEDGE, bsize));
- aom_write_symbol(w, mbmi->interinter_comp.wedge_index,
- ec_ctx->wedge_idx_cdf[bsize], 16);
- aom_write_bit(w, mbmi->interinter_comp.wedge_sign);
- } else {
- assert(mbmi->interinter_comp.type == COMPOUND_DIFFWTD);
- aom_write_literal(w, mbmi->interinter_comp.mask_type,
- MAX_DIFFWTD_MASK_BITS);
- }
- }
- }
-
- write_mb_interp_filter(cpi, xd, w);
- }
-}
-
-static void write_intrabc_info(MACROBLOCKD *xd,
- const MB_MODE_INFO_EXT *mbmi_ext,
- aom_writer *w) {
- const MB_MODE_INFO *const mbmi = xd->mi[0];
- int use_intrabc = is_intrabc_block(mbmi);
- FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
- aom_write_symbol(w, use_intrabc, ec_ctx->intrabc_cdf, 2);
- if (use_intrabc) {
- assert(mbmi->mode == DC_PRED);
- assert(mbmi->uv_mode == UV_DC_PRED);
- assert(mbmi->motion_mode == SIMPLE_TRANSLATION);
- int_mv dv_ref = mbmi_ext->ref_mv_stack[INTRA_FRAME][0].this_mv;
- av1_encode_dv(w, &mbmi->mv[0].as_mv, &dv_ref.as_mv, &ec_ctx->ndvc);
- }
-}
-
-static void write_mb_modes_kf(AV1_COMP *cpi, MACROBLOCKD *xd,
- const MB_MODE_INFO_EXT *mbmi_ext,
- const int mi_row, const int mi_col,
- aom_writer *w) {
- AV1_COMMON *const cm = &cpi->common;
- FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
- const struct segmentation *const seg = &cm->seg;
- struct segmentation_probs *const segp = &ec_ctx->seg;
- const MB_MODE_INFO *const mbmi = xd->mi[0];
-
- if (seg->segid_preskip && seg->update_map)
- write_segment_id(cpi, mbmi, w, seg, segp, mi_row, mi_col, 0);
-
- const int skip = write_skip(cm, xd, mbmi->segment_id, mbmi, w);
-
- if (!seg->segid_preskip && seg->update_map)
- write_segment_id(cpi, mbmi, w, seg, segp, mi_row, mi_col, skip);
-
- write_cdef(cm, xd, w, skip, mi_col, mi_row);
-
- write_delta_q_params(cpi, mi_row, mi_col, skip, w);
-
- if (av1_allow_intrabc(cm)) {
- write_intrabc_info(xd, mbmi_ext, w);
- if (is_intrabc_block(mbmi)) return;
- }
-
- write_intra_prediction_modes(cpi, mi_row, mi_col, 1, w);
-}
-
-#if CONFIG_RD_DEBUG
-static void dump_mode_info(MODE_INFO *mi) {
- printf("\nmi->mi_row == %d\n", mi->mi_row);
- printf("&& mi->mi_col == %d\n", mi->mi_col);
- printf("&& mi->sb_type == %d\n", mi->sb_type);
- printf("&& mi->tx_size == %d\n", mi->tx_size);
- printf("&& mi->mode == %d\n", mi->mode);
-}
-static int rd_token_stats_mismatch(RD_STATS *rd_stats, TOKEN_STATS *token_stats,
- int plane) {
- if (rd_stats->txb_coeff_cost[plane] != token_stats->cost) {
- int r, c;
- printf("\nplane %d rd_stats->txb_coeff_cost %d token_stats->cost %d\n",
- plane, rd_stats->txb_coeff_cost[plane], token_stats->cost);
- printf("rd txb_coeff_cost_map\n");
- for (r = 0; r < TXB_COEFF_COST_MAP_SIZE; ++r) {
- for (c = 0; c < TXB_COEFF_COST_MAP_SIZE; ++c) {
- printf("%d ", rd_stats->txb_coeff_cost_map[plane][r][c]);
- }
- printf("\n");
- }
-
- printf("pack txb_coeff_cost_map\n");
- for (r = 0; r < TXB_COEFF_COST_MAP_SIZE; ++r) {
- for (c = 0; c < TXB_COEFF_COST_MAP_SIZE; ++c) {
- printf("%d ", token_stats->txb_coeff_cost_map[r][c]);
- }
- printf("\n");
- }
- return 1;
- }
- return 0;
-}
-#endif
-
-#if ENC_MISMATCH_DEBUG
-static void enc_dump_logs(AV1_COMP *cpi, int mi_row, int mi_col) {
- AV1_COMMON *const cm = &cpi->common;
- MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
- xd->mi = cm->mi_grid_visible + (mi_row * cm->mi_stride + mi_col);
- const MB_MODE_INFO *const *mbmi = xd->mi[0];
- if (is_inter_block(mbmi)) {
-#define FRAME_TO_CHECK 11
- if (cm->current_video_frame == FRAME_TO_CHECK && cm->show_frame == 1) {
- const BLOCK_SIZE bsize = mbmi->sb_type;
-
- int_mv mv[2];
- int is_comp_ref = has_second_ref(mbmi);
- int ref;
-
- for (ref = 0; ref < 1 + is_comp_ref; ++ref)
- mv[ref].as_mv = mbmi->mv[ref].as_mv;
-
- if (!is_comp_ref) {
- mv[1].as_int = 0;
- }
-
- MACROBLOCK *const x = &cpi->td.mb;
- const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
- const int16_t mode_ctx =
- is_comp_ref ? mbmi_ext->compound_mode_context[mbmi->ref_frame[0]]
- : av1_mode_context_analyzer(mbmi_ext->mode_context,
- mbmi->ref_frame);
-
- const int16_t newmv_ctx = mode_ctx & NEWMV_CTX_MASK;
- int16_t zeromv_ctx = -1;
- int16_t refmv_ctx = -1;
-
- if (mbmi->mode != NEWMV) {
- zeromv_ctx = (mode_ctx >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK;
- if (mbmi->mode != GLOBALMV)
- refmv_ctx = (mode_ctx >> REFMV_OFFSET) & REFMV_CTX_MASK;
- }
-
- printf(
- "=== ENCODER ===: "
- "Frame=%d, (mi_row,mi_col)=(%d,%d), skip_mode=%d, mode=%d, bsize=%d, "
- "show_frame=%d, mv[0]=(%d,%d), mv[1]=(%d,%d), ref[0]=%d, "
- "ref[1]=%d, motion_mode=%d, mode_ctx=%d, "
- "newmv_ctx=%d, zeromv_ctx=%d, refmv_ctx=%d, tx_size=%d\n",
- cm->current_video_frame, mi_row, mi_col, mbmi->skip_mode, mbmi->mode,
- bsize, cm->show_frame, mv[0].as_mv.row, mv[0].as_mv.col,
- mv[1].as_mv.row, mv[1].as_mv.col, mbmi->ref_frame[0],
- mbmi->ref_frame[1], mbmi->motion_mode, mode_ctx, newmv_ctx,
- zeromv_ctx, refmv_ctx, mbmi->tx_size);
- }
- }
-}
-#endif // ENC_MISMATCH_DEBUG
-
-static void write_mbmi_b(AV1_COMP *cpi, const TileInfo *const tile,
- aom_writer *w, int mi_row, int mi_col) {
- AV1_COMMON *const cm = &cpi->common;
- MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
- int bh, bw;
- xd->mi = cm->mi_grid_visible + (mi_row * cm->mi_stride + mi_col);
- MB_MODE_INFO *m = xd->mi[0];
-
- assert(m->sb_type <= cm->seq_params.sb_size ||
- (m->sb_type >= BLOCK_SIZES && m->sb_type < BLOCK_SIZES_ALL));
-
- bh = mi_size_high[m->sb_type];
- bw = mi_size_wide[m->sb_type];
-
- cpi->td.mb.mbmi_ext = cpi->mbmi_ext_base + (mi_row * cm->mi_cols + mi_col);
-
- set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols);
-
- xd->above_txfm_context = cm->above_txfm_context[tile->tile_row] + mi_col;
- xd->left_txfm_context =
- xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
-
- if (frame_is_intra_only(cm)) {
- write_mb_modes_kf(cpi, xd, cpi->td.mb.mbmi_ext, mi_row, mi_col, w);
- } else {
- // has_subpel_mv_component needs the ref frame buffers set up to look
- // up if they are scaled. has_subpel_mv_component is in turn needed by
- // write_switchable_interp_filter, which is called by pack_inter_mode_mvs.
- set_ref_ptrs(cm, xd, m->ref_frame[0], m->ref_frame[1]);
-
-#if ENC_MISMATCH_DEBUG
- enc_dump_logs(cpi, mi_row, mi_col);
-#endif // ENC_MISMATCH_DEBUG
-
- pack_inter_mode_mvs(cpi, mi_row, mi_col, w);
- }
-}
-
-static void write_inter_txb_coeff(AV1_COMMON *const cm, MACROBLOCK *const x,
- MB_MODE_INFO *const mbmi, aom_writer *w,
- const TOKENEXTRA **tok,
- const TOKENEXTRA *const tok_end,
- TOKEN_STATS *token_stats, const int row,
- const int col, int *block, const int plane) {
- MACROBLOCKD *const xd = &x->e_mbd;
- const struct macroblockd_plane *const pd = &xd->plane[plane];
- const BLOCK_SIZE bsize = mbmi->sb_type;
- const BLOCK_SIZE bsizec =
- scale_chroma_bsize(bsize, pd->subsampling_x, pd->subsampling_y);
-
- const BLOCK_SIZE plane_bsize =
- get_plane_block_size(bsizec, pd->subsampling_x, pd->subsampling_y);
-
- const TX_SIZE max_tx_size = get_vartx_max_txsize(xd, plane_bsize, plane);
- const int step =
- tx_size_wide_unit[max_tx_size] * tx_size_high_unit[max_tx_size];
- const int bkw = tx_size_wide_unit[max_tx_size];
- const int bkh = tx_size_high_unit[max_tx_size];
-
- const BLOCK_SIZE max_unit_bsize =
- get_plane_block_size(BLOCK_64X64, pd->subsampling_x, pd->subsampling_y);
- int mu_blocks_wide = block_size_wide[max_unit_bsize] >> tx_size_wide_log2[0];
- int mu_blocks_high = block_size_high[max_unit_bsize] >> tx_size_high_log2[0];
-
- int blk_row, blk_col;
-
- const int num_4x4_w = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
- const int num_4x4_h = block_size_high[plane_bsize] >> tx_size_high_log2[0];
-
- const int unit_height =
- AOMMIN(mu_blocks_high + (row >> pd->subsampling_y), num_4x4_h);
- const int unit_width =
- AOMMIN(mu_blocks_wide + (col >> pd->subsampling_x), num_4x4_w);
- for (blk_row = row >> pd->subsampling_y; blk_row < unit_height;
- blk_row += bkh) {
- for (blk_col = col >> pd->subsampling_x; blk_col < unit_width;
- blk_col += bkw) {
- pack_txb_tokens(w, cm, x, tok, tok_end, xd, mbmi, plane, plane_bsize,
- cm->seq_params.bit_depth, *block, blk_row, blk_col,
- max_tx_size, token_stats);
- *block += step;
- }
- }
-}
-
-static void write_tokens_b(AV1_COMP *cpi, const TileInfo *const tile,
- aom_writer *w, const TOKENEXTRA **tok,
- const TOKENEXTRA *const tok_end, int mi_row,
- int mi_col) {
- AV1_COMMON *const cm = &cpi->common;
- const int num_planes = av1_num_planes(cm);
- MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
- const int mi_offset = mi_row * cm->mi_stride + mi_col;
- MB_MODE_INFO *const mbmi = *(cm->mi_grid_visible + mi_offset);
- int plane;
- int bh, bw;
- MACROBLOCK *const x = &cpi->td.mb;
- (void)tok;
- (void)tok_end;
- xd->mi = cm->mi_grid_visible + mi_offset;
-
- assert(mbmi->sb_type <= cm->seq_params.sb_size ||
- (mbmi->sb_type >= BLOCK_SIZES && mbmi->sb_type < BLOCK_SIZES_ALL));
-
- bh = mi_size_high[mbmi->sb_type];
- bw = mi_size_wide[mbmi->sb_type];
- cpi->td.mb.mbmi_ext = cpi->mbmi_ext_base + (mi_row * cm->mi_cols + mi_col);
-
- set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols);
-
- if (!mbmi->skip) {
- if (!is_inter_block(mbmi))
- av1_write_coeffs_mb(cm, x, mi_row, mi_col, w, mbmi->sb_type);
-
- if (is_inter_block(mbmi)) {
- int block[MAX_MB_PLANE] = { 0 };
- const BLOCK_SIZE plane_bsize = mbmi->sb_type;
- assert(plane_bsize == get_plane_block_size(mbmi->sb_type,
- xd->plane[0].subsampling_x,
- xd->plane[0].subsampling_y));
- const int num_4x4_w =
- block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
- const int num_4x4_h =
- block_size_high[plane_bsize] >> tx_size_high_log2[0];
- int row, col;
- TOKEN_STATS token_stats;
- init_token_stats(&token_stats);
-
- const BLOCK_SIZE max_unit_bsize = BLOCK_64X64;
- assert(max_unit_bsize ==
- get_plane_block_size(BLOCK_64X64, xd->plane[0].subsampling_x,
- xd->plane[0].subsampling_y));
- int mu_blocks_wide =
- block_size_wide[max_unit_bsize] >> tx_size_wide_log2[0];
- int mu_blocks_high =
- block_size_high[max_unit_bsize] >> tx_size_high_log2[0];
-
- mu_blocks_wide = AOMMIN(num_4x4_w, mu_blocks_wide);
- mu_blocks_high = AOMMIN(num_4x4_h, mu_blocks_high);
-
- for (row = 0; row < num_4x4_h; row += mu_blocks_high) {
- for (col = 0; col < num_4x4_w; col += mu_blocks_wide) {
- for (plane = 0; plane < num_planes && is_inter_block(mbmi); ++plane) {
- const struct macroblockd_plane *const pd = &xd->plane[plane];
- if (!is_chroma_reference(mi_row, mi_col, mbmi->sb_type,
- pd->subsampling_x, pd->subsampling_y)) {
- continue;
- }
- write_inter_txb_coeff(cm, x, mbmi, w, tok, tok_end, &token_stats,
- row, col, &block[plane], plane);
- }
- }
-#if CONFIG_RD_DEBUG
- if (mbmi->sb_type >= BLOCK_8X8 &&
- rd_token_stats_mismatch(&mbmi->rd_stats, &token_stats, plane)) {
- dump_mode_info(m);
- assert(0);
- }
-#endif // CONFIG_RD_DEBUG
- }
- }
- }
-}
-
-static void write_modes_b(AV1_COMP *cpi, const TileInfo *const tile,
- aom_writer *w, const TOKENEXTRA **tok,
- const TOKENEXTRA *const tok_end, int mi_row,
- int mi_col) {
- write_mbmi_b(cpi, tile, w, mi_row, mi_col);
-
- AV1_COMMON *cm = &cpi->common;
- MACROBLOCKD *xd = &cpi->td.mb.e_mbd;
- MB_MODE_INFO *mbmi = xd->mi[0];
- for (int plane = 0; plane < AOMMIN(2, av1_num_planes(cm)); ++plane) {
- const uint8_t palette_size_plane =
- mbmi->palette_mode_info.palette_size[plane];
- assert(!mbmi->skip_mode || !palette_size_plane);
- if (palette_size_plane > 0) {
- assert(mbmi->use_intrabc == 0);
- assert(av1_allow_palette(cm->allow_screen_content_tools, mbmi->sb_type));
- int rows, cols;
- av1_get_block_dimensions(mbmi->sb_type, plane, xd, NULL, NULL, &rows,
- &cols);
- assert(*tok < tok_end);
- pack_map_tokens(w, tok, palette_size_plane, rows * cols);
- }
- }
-
- BLOCK_SIZE bsize = mbmi->sb_type;
- int is_inter_tx = is_inter_block(mbmi) || is_intrabc_block(mbmi);
- int skip = mbmi->skip;
- int segment_id = mbmi->segment_id;
- if (cm->tx_mode == TX_MODE_SELECT && block_signals_txsize(bsize) &&
- !(is_inter_tx && skip) && !xd->lossless[segment_id]) {
- if (is_inter_tx) { // This implies skip flag is 0.
- const TX_SIZE max_tx_size = get_vartx_max_txsize(xd, bsize, 0);
- const int txbh = tx_size_high_unit[max_tx_size];
- const int txbw = tx_size_wide_unit[max_tx_size];
- const int width = block_size_wide[bsize] >> tx_size_wide_log2[0];
- const int height = block_size_high[bsize] >> tx_size_high_log2[0];
- int idx, idy;
- for (idy = 0; idy < height; idy += txbh)
- for (idx = 0; idx < width; idx += txbw)
- write_tx_size_vartx(xd, mbmi, max_tx_size, 0, idy, idx, w);
- } else {
- write_selected_tx_size(xd, w);
- set_txfm_ctxs(mbmi->tx_size, xd->n4_w, xd->n4_h, 0, xd);
- }
- } else {
- set_txfm_ctxs(mbmi->tx_size, xd->n4_w, xd->n4_h,
- skip && is_inter_block(mbmi), xd);
- }
-
- write_tokens_b(cpi, tile, w, tok, tok_end, mi_row, mi_col);
-}
-
-static void write_partition(const AV1_COMMON *const cm,
- const MACROBLOCKD *const xd, int hbs, int mi_row,
- int mi_col, PARTITION_TYPE p, BLOCK_SIZE bsize,
- aom_writer *w) {
- const int is_partition_point = bsize >= BLOCK_8X8;
-
- if (!is_partition_point) return;
-
- const int has_rows = (mi_row + hbs) < cm->mi_rows;
- const int has_cols = (mi_col + hbs) < cm->mi_cols;
- const int ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
- FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
-
- if (!has_rows && !has_cols) {
- assert(p == PARTITION_SPLIT);
- return;
- }
-
- if (has_rows && has_cols) {
- aom_write_symbol(w, p, ec_ctx->partition_cdf[ctx],
- partition_cdf_length(bsize));
- } else if (!has_rows && has_cols) {
- assert(p == PARTITION_SPLIT || p == PARTITION_HORZ);
- assert(bsize > BLOCK_8X8);
- aom_cdf_prob cdf[2];
- partition_gather_vert_alike(cdf, ec_ctx->partition_cdf[ctx], bsize);
- aom_write_cdf(w, p == PARTITION_SPLIT, cdf, 2);
- } else {
- assert(has_rows && !has_cols);
- assert(p == PARTITION_SPLIT || p == PARTITION_VERT);
- assert(bsize > BLOCK_8X8);
- aom_cdf_prob cdf[2];
- partition_gather_horz_alike(cdf, ec_ctx->partition_cdf[ctx], bsize);
- aom_write_cdf(w, p == PARTITION_SPLIT, cdf, 2);
- }
-}
-
-static void write_modes_sb(AV1_COMP *const cpi, const TileInfo *const tile,
- aom_writer *const w, const TOKENEXTRA **tok,
- const TOKENEXTRA *const tok_end, int mi_row,
- int mi_col, BLOCK_SIZE bsize) {
- const AV1_COMMON *const cm = &cpi->common;
- MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
- const int hbs = mi_size_wide[bsize] / 2;
- const int quarter_step = mi_size_wide[bsize] / 4;
- int i;
- const PARTITION_TYPE partition = get_partition(cm, mi_row, mi_col, bsize);
- const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition);
-
- if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
-
- const int num_planes = av1_num_planes(cm);
- for (int plane = 0; plane < num_planes; ++plane) {
- int rcol0, rcol1, rrow0, rrow1;
- if (av1_loop_restoration_corners_in_sb(cm, plane, mi_row, mi_col, bsize,
- &rcol0, &rcol1, &rrow0, &rrow1)) {
- const int rstride = cm->rst_info[plane].horz_units_per_tile;
- for (int rrow = rrow0; rrow < rrow1; ++rrow) {
- for (int rcol = rcol0; rcol < rcol1; ++rcol) {
- const int runit_idx = rcol + rrow * rstride;
- const RestorationUnitInfo *rui =
- &cm->rst_info[plane].unit_info[runit_idx];
- loop_restoration_write_sb_coeffs(cm, xd, rui, w, plane,
- cpi->td.counts);
- }
- }
- }
- }
-
- write_partition(cm, xd, hbs, mi_row, mi_col, partition, bsize, w);
- switch (partition) {
- case PARTITION_NONE:
- write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col);
- break;
- case PARTITION_HORZ:
- write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col);
- if (mi_row + hbs < cm->mi_rows)
- write_modes_b(cpi, tile, w, tok, tok_end, mi_row + hbs, mi_col);
- break;
- case PARTITION_VERT:
- write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col);
- if (mi_col + hbs < cm->mi_cols)
- write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col + hbs);
- break;
- case PARTITION_SPLIT:
- write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col, subsize);
- write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col + hbs, subsize);
- write_modes_sb(cpi, tile, w, tok, tok_end, mi_row + hbs, mi_col, subsize);
- write_modes_sb(cpi, tile, w, tok, tok_end, mi_row + hbs, mi_col + hbs,
- subsize);
- break;
- case PARTITION_HORZ_A:
- write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col);
- write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col + hbs);
- write_modes_b(cpi, tile, w, tok, tok_end, mi_row + hbs, mi_col);
- break;
- case PARTITION_HORZ_B:
- write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col);
- write_modes_b(cpi, tile, w, tok, tok_end, mi_row + hbs, mi_col);
- write_modes_b(cpi, tile, w, tok, tok_end, mi_row + hbs, mi_col + hbs);
- break;
- case PARTITION_VERT_A:
- write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col);
- write_modes_b(cpi, tile, w, tok, tok_end, mi_row + hbs, mi_col);
- write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col + hbs);
- break;
- case PARTITION_VERT_B:
- write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col);
- write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col + hbs);
- write_modes_b(cpi, tile, w, tok, tok_end, mi_row + hbs, mi_col + hbs);
- break;
- case PARTITION_HORZ_4:
- for (i = 0; i < 4; ++i) {
- int this_mi_row = mi_row + i * quarter_step;
- if (i > 0 && this_mi_row >= cm->mi_rows) break;
-
- write_modes_b(cpi, tile, w, tok, tok_end, this_mi_row, mi_col);
- }
- break;
- case PARTITION_VERT_4:
- for (i = 0; i < 4; ++i) {
- int this_mi_col = mi_col + i * quarter_step;
- if (i > 0 && this_mi_col >= cm->mi_cols) break;
-
- write_modes_b(cpi, tile, w, tok, tok_end, mi_row, this_mi_col);
- }
- break;
- default: assert(0);
- }
-
- // update partition context
- update_ext_partition_context(xd, mi_row, mi_col, subsize, bsize, partition);
-}
-
-static void write_modes(AV1_COMP *const cpi, const TileInfo *const tile,
- aom_writer *const w, int tile_row, int tile_col) {
- AV1_COMMON *const cm = &cpi->common;
- MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
- const int mi_row_start = tile->mi_row_start;
- const int mi_row_end = tile->mi_row_end;
- const int mi_col_start = tile->mi_col_start;
- const int mi_col_end = tile->mi_col_end;
- int mi_row, mi_col, sb_row_in_tile;
-
- av1_zero_above_context(cm, xd, mi_col_start, mi_col_end, tile->tile_row);
- av1_init_above_context(cm, xd, tile->tile_row);
-
- if (cpi->common.delta_q_present_flag) {
- xd->current_qindex = cpi->common.base_qindex;
- if (cpi->common.delta_lf_present_flag) {
- av1_reset_loop_filter_delta(xd, av1_num_planes(cm));
- }
- }
-
- for (mi_row = mi_row_start; mi_row < mi_row_end;
- mi_row += cm->seq_params.mib_size) {
- sb_row_in_tile =
- (mi_row - tile->mi_row_start) >> cm->seq_params.mib_size_log2;
- const TOKENEXTRA *tok =
- cpi->tplist[tile_row][tile_col][sb_row_in_tile].start;
- const TOKENEXTRA *tok_end =
- tok + cpi->tplist[tile_row][tile_col][sb_row_in_tile].count;
-
- av1_zero_left_context(xd);
-
- for (mi_col = mi_col_start; mi_col < mi_col_end;
- mi_col += cm->seq_params.mib_size) {
- write_modes_sb(cpi, tile, w, &tok, tok_end, mi_row, mi_col,
- cm->seq_params.sb_size);
- }
- assert(tok == cpi->tplist[tile_row][tile_col][sb_row_in_tile].stop);
- }
-}
-
-static void encode_restoration_mode(AV1_COMMON *cm,
- struct aom_write_bit_buffer *wb) {
- assert(!cm->all_lossless);
- if (!cm->seq_params.enable_restoration) return;
- if (cm->allow_intrabc) return;
- const int num_planes = av1_num_planes(cm);
- int all_none = 1, chroma_none = 1;
- for (int p = 0; p < num_planes; ++p) {
- RestorationInfo *rsi = &cm->rst_info[p];
- if (rsi->frame_restoration_type != RESTORE_NONE) {
- all_none = 0;
- chroma_none &= p == 0;
- }
- switch (rsi->frame_restoration_type) {
- case RESTORE_NONE:
- aom_wb_write_bit(wb, 0);
- aom_wb_write_bit(wb, 0);
- break;
- case RESTORE_WIENER:
- aom_wb_write_bit(wb, 1);
- aom_wb_write_bit(wb, 0);
- break;
- case RESTORE_SGRPROJ:
- aom_wb_write_bit(wb, 1);
- aom_wb_write_bit(wb, 1);
- break;
- case RESTORE_SWITCHABLE:
- aom_wb_write_bit(wb, 0);
- aom_wb_write_bit(wb, 1);
- break;
- default: assert(0);
- }
- }
- if (!all_none) {
- assert(cm->seq_params.sb_size == BLOCK_64X64 ||
- cm->seq_params.sb_size == BLOCK_128X128);
- const int sb_size = cm->seq_params.sb_size == BLOCK_128X128 ? 128 : 64;
-
- RestorationInfo *rsi = &cm->rst_info[0];
-
- assert(rsi->restoration_unit_size >= sb_size);
- assert(RESTORATION_UNITSIZE_MAX == 256);
-
- if (sb_size == 64) {
- aom_wb_write_bit(wb, rsi->restoration_unit_size > 64);
- }
- if (rsi->restoration_unit_size > 64) {
- aom_wb_write_bit(wb, rsi->restoration_unit_size > 128);
- }
- }
-
- if (num_planes > 1) {
- int s = AOMMIN(cm->seq_params.subsampling_x, cm->seq_params.subsampling_y);
- if (s && !chroma_none) {
- aom_wb_write_bit(wb, cm->rst_info[1].restoration_unit_size !=
- cm->rst_info[0].restoration_unit_size);
- assert(cm->rst_info[1].restoration_unit_size ==
- cm->rst_info[0].restoration_unit_size ||
- cm->rst_info[1].restoration_unit_size ==
- (cm->rst_info[0].restoration_unit_size >> s));
- assert(cm->rst_info[2].restoration_unit_size ==
- cm->rst_info[1].restoration_unit_size);
- } else if (!s) {
- assert(cm->rst_info[1].restoration_unit_size ==
- cm->rst_info[0].restoration_unit_size);
- assert(cm->rst_info[2].restoration_unit_size ==
- cm->rst_info[1].restoration_unit_size);
- }
- }
-}
-
-static void write_wiener_filter(int wiener_win, const WienerInfo *wiener_info,
- WienerInfo *ref_wiener_info, aom_writer *wb) {
- if (wiener_win == WIENER_WIN)
- aom_write_primitive_refsubexpfin(
- wb, WIENER_FILT_TAP0_MAXV - WIENER_FILT_TAP0_MINV + 1,
- WIENER_FILT_TAP0_SUBEXP_K,
- ref_wiener_info->vfilter[0] - WIENER_FILT_TAP0_MINV,
- wiener_info->vfilter[0] - WIENER_FILT_TAP0_MINV);
- else
- assert(wiener_info->vfilter[0] == 0 &&
- wiener_info->vfilter[WIENER_WIN - 1] == 0);
- aom_write_primitive_refsubexpfin(
- wb, WIENER_FILT_TAP1_MAXV - WIENER_FILT_TAP1_MINV + 1,
- WIENER_FILT_TAP1_SUBEXP_K,
- ref_wiener_info->vfilter[1] - WIENER_FILT_TAP1_MINV,
- wiener_info->vfilter[1] - WIENER_FILT_TAP1_MINV);
- aom_write_primitive_refsubexpfin(
- wb, WIENER_FILT_TAP2_MAXV - WIENER_FILT_TAP2_MINV + 1,
- WIENER_FILT_TAP2_SUBEXP_K,
- ref_wiener_info->vfilter[2] - WIENER_FILT_TAP2_MINV,
- wiener_info->vfilter[2] - WIENER_FILT_TAP2_MINV);
- if (wiener_win == WIENER_WIN)
- aom_write_primitive_refsubexpfin(
- wb, WIENER_FILT_TAP0_MAXV - WIENER_FILT_TAP0_MINV + 1,
- WIENER_FILT_TAP0_SUBEXP_K,
- ref_wiener_info->hfilter[0] - WIENER_FILT_TAP0_MINV,
- wiener_info->hfilter[0] - WIENER_FILT_TAP0_MINV);
- else
- assert(wiener_info->hfilter[0] == 0 &&
- wiener_info->hfilter[WIENER_WIN - 1] == 0);
- aom_write_primitive_refsubexpfin(
- wb, WIENER_FILT_TAP1_MAXV - WIENER_FILT_TAP1_MINV + 1,
- WIENER_FILT_TAP1_SUBEXP_K,
- ref_wiener_info->hfilter[1] - WIENER_FILT_TAP1_MINV,
- wiener_info->hfilter[1] - WIENER_FILT_TAP1_MINV);
- aom_write_primitive_refsubexpfin(
- wb, WIENER_FILT_TAP2_MAXV - WIENER_FILT_TAP2_MINV + 1,
- WIENER_FILT_TAP2_SUBEXP_K,
- ref_wiener_info->hfilter[2] - WIENER_FILT_TAP2_MINV,
- wiener_info->hfilter[2] - WIENER_FILT_TAP2_MINV);
- memcpy(ref_wiener_info, wiener_info, sizeof(*wiener_info));
-}
-
-static void write_sgrproj_filter(const SgrprojInfo *sgrproj_info,
- SgrprojInfo *ref_sgrproj_info,
- aom_writer *wb) {
- aom_write_literal(wb, sgrproj_info->ep, SGRPROJ_PARAMS_BITS);
- const sgr_params_type *params = &sgr_params[sgrproj_info->ep];
-
- if (params->r[0] == 0) {
- assert(sgrproj_info->xqd[0] == 0);
- aom_write_primitive_refsubexpfin(
- wb, SGRPROJ_PRJ_MAX1 - SGRPROJ_PRJ_MIN1 + 1, SGRPROJ_PRJ_SUBEXP_K,
- ref_sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1,
- sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1);
- } else if (params->r[1] == 0) {
- aom_write_primitive_refsubexpfin(
- wb, SGRPROJ_PRJ_MAX0 - SGRPROJ_PRJ_MIN0 + 1, SGRPROJ_PRJ_SUBEXP_K,
- ref_sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0,
- sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0);
- } else {
- aom_write_primitive_refsubexpfin(
- wb, SGRPROJ_PRJ_MAX0 - SGRPROJ_PRJ_MIN0 + 1, SGRPROJ_PRJ_SUBEXP_K,
- ref_sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0,
- sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0);
- aom_write_primitive_refsubexpfin(
- wb, SGRPROJ_PRJ_MAX1 - SGRPROJ_PRJ_MIN1 + 1, SGRPROJ_PRJ_SUBEXP_K,
- ref_sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1,
- sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1);
- }
-
- memcpy(ref_sgrproj_info, sgrproj_info, sizeof(*sgrproj_info));
-}
-
-static void loop_restoration_write_sb_coeffs(const AV1_COMMON *const cm,
- MACROBLOCKD *xd,
- const RestorationUnitInfo *rui,
- aom_writer *const w, int plane,
- FRAME_COUNTS *counts) {
- const RestorationInfo *rsi = cm->rst_info + plane;
- RestorationType frame_rtype = rsi->frame_restoration_type;
- if (frame_rtype == RESTORE_NONE) return;
-
- (void)counts;
- assert(!cm->all_lossless);
-
- const int wiener_win = (plane > 0) ? WIENER_WIN_CHROMA : WIENER_WIN;
- WienerInfo *wiener_info = xd->wiener_info + plane;
- SgrprojInfo *sgrproj_info = xd->sgrproj_info + plane;
- RestorationType unit_rtype = rui->restoration_type;
-
- if (frame_rtype == RESTORE_SWITCHABLE) {
- aom_write_symbol(w, unit_rtype, xd->tile_ctx->switchable_restore_cdf,
- RESTORE_SWITCHABLE_TYPES);
-#if CONFIG_ENTROPY_STATS
- ++counts->switchable_restore[unit_rtype];
-#endif
- switch (unit_rtype) {
- case RESTORE_WIENER:
- write_wiener_filter(wiener_win, &rui->wiener_info, wiener_info, w);
- break;
- case RESTORE_SGRPROJ:
- write_sgrproj_filter(&rui->sgrproj_info, sgrproj_info, w);
- break;
- default: assert(unit_rtype == RESTORE_NONE); break;
- }
- } else if (frame_rtype == RESTORE_WIENER) {
- aom_write_symbol(w, unit_rtype != RESTORE_NONE,
- xd->tile_ctx->wiener_restore_cdf, 2);
-#if CONFIG_ENTROPY_STATS
- ++counts->wiener_restore[unit_rtype != RESTORE_NONE];
-#endif
- if (unit_rtype != RESTORE_NONE) {
- write_wiener_filter(wiener_win, &rui->wiener_info, wiener_info, w);
- }
- } else if (frame_rtype == RESTORE_SGRPROJ) {
- aom_write_symbol(w, unit_rtype != RESTORE_NONE,
- xd->tile_ctx->sgrproj_restore_cdf, 2);
-#if CONFIG_ENTROPY_STATS
- ++counts->sgrproj_restore[unit_rtype != RESTORE_NONE];
-#endif
- if (unit_rtype != RESTORE_NONE) {
- write_sgrproj_filter(&rui->sgrproj_info, sgrproj_info, w);
- }
- }
-}
-
-static void encode_loopfilter(AV1_COMMON *cm, struct aom_write_bit_buffer *wb) {
- assert(!cm->coded_lossless);
- if (cm->allow_intrabc) return;
- const int num_planes = av1_num_planes(cm);
- int i;
- struct loopfilter *lf = &cm->lf;
-
- // Encode the loop filter level and type
- aom_wb_write_literal(wb, lf->filter_level[0], 6);
- aom_wb_write_literal(wb, lf->filter_level[1], 6);
- if (num_planes > 1) {
- if (lf->filter_level[0] || lf->filter_level[1]) {
- aom_wb_write_literal(wb, lf->filter_level_u, 6);
- aom_wb_write_literal(wb, lf->filter_level_v, 6);
- }
- }
- aom_wb_write_literal(wb, lf->sharpness_level, 3);
-
- // Write out loop filter deltas applied at the MB level based on mode or
- // ref frame (if they are enabled).
- aom_wb_write_bit(wb, lf->mode_ref_delta_enabled);
-
- if (lf->mode_ref_delta_enabled) {
- aom_wb_write_bit(wb, lf->mode_ref_delta_update);
-
- if (lf->mode_ref_delta_update) {
- const int prime_idx = cm->primary_ref_frame;
- const int buf_idx =
- prime_idx == PRIMARY_REF_NONE ? -1 : cm->frame_refs[prime_idx].idx;
- int8_t last_ref_deltas[REF_FRAMES];
- if (prime_idx == PRIMARY_REF_NONE || buf_idx < 0) {
- av1_set_default_ref_deltas(last_ref_deltas);
- } else {
- memcpy(last_ref_deltas, cm->buffer_pool->frame_bufs[buf_idx].ref_deltas,
- REF_FRAMES);
- }
- for (i = 0; i < REF_FRAMES; i++) {
- const int delta = lf->ref_deltas[i];
- const int changed = delta != last_ref_deltas[i];
- aom_wb_write_bit(wb, changed);
- if (changed) aom_wb_write_inv_signed_literal(wb, delta, 6);
- }
-
- int8_t last_mode_deltas[MAX_MODE_LF_DELTAS];
- if (prime_idx == PRIMARY_REF_NONE || buf_idx < 0) {
- av1_set_default_mode_deltas(last_mode_deltas);
- } else {
- memcpy(last_mode_deltas,
- cm->buffer_pool->frame_bufs[buf_idx].mode_deltas,
- MAX_MODE_LF_DELTAS);
- }
- for (i = 0; i < MAX_MODE_LF_DELTAS; i++) {
- const int delta = lf->mode_deltas[i];
- const int changed = delta != last_mode_deltas[i];
- aom_wb_write_bit(wb, changed);
- if (changed) aom_wb_write_inv_signed_literal(wb, delta, 6);
- }
- }
- }
-}
-
-static void encode_cdef(const AV1_COMMON *cm, struct aom_write_bit_buffer *wb) {
- assert(!cm->coded_lossless);
- if (!cm->seq_params.enable_cdef) return;
- if (cm->allow_intrabc) return;
- const int num_planes = av1_num_planes(cm);
- int i;
- aom_wb_write_literal(wb, cm->cdef_pri_damping - 3, 2);
- assert(cm->cdef_pri_damping == cm->cdef_sec_damping);
- aom_wb_write_literal(wb, cm->cdef_bits, 2);
- for (i = 0; i < cm->nb_cdef_strengths; i++) {
- aom_wb_write_literal(wb, cm->cdef_strengths[i], CDEF_STRENGTH_BITS);
- if (num_planes > 1)
- aom_wb_write_literal(wb, cm->cdef_uv_strengths[i], CDEF_STRENGTH_BITS);
- }
-}
-
-static void write_delta_q(struct aom_write_bit_buffer *wb, int delta_q) {
- if (delta_q != 0) {
- aom_wb_write_bit(wb, 1);
- aom_wb_write_inv_signed_literal(wb, delta_q, 6);
- } else {
- aom_wb_write_bit(wb, 0);
- }
-}
-
-static void encode_quantization(const AV1_COMMON *const cm,
- struct aom_write_bit_buffer *wb) {
- const int num_planes = av1_num_planes(cm);
-
- aom_wb_write_literal(wb, cm->base_qindex, QINDEX_BITS);
- write_delta_q(wb, cm->y_dc_delta_q);
- if (num_planes > 1) {
- int diff_uv_delta = (cm->u_dc_delta_q != cm->v_dc_delta_q) ||
- (cm->u_ac_delta_q != cm->v_ac_delta_q);
- if (cm->seq_params.separate_uv_delta_q) aom_wb_write_bit(wb, diff_uv_delta);
- write_delta_q(wb, cm->u_dc_delta_q);
- write_delta_q(wb, cm->u_ac_delta_q);
- if (diff_uv_delta) {
- write_delta_q(wb, cm->v_dc_delta_q);
- write_delta_q(wb, cm->v_ac_delta_q);
- }
- }
- aom_wb_write_bit(wb, cm->using_qmatrix);
- if (cm->using_qmatrix) {
- aom_wb_write_literal(wb, cm->qm_y, QM_LEVEL_BITS);
- aom_wb_write_literal(wb, cm->qm_u, QM_LEVEL_BITS);
- if (!cm->seq_params.separate_uv_delta_q)
- assert(cm->qm_u == cm->qm_v);
- else
- aom_wb_write_literal(wb, cm->qm_v, QM_LEVEL_BITS);
- }
-}
-
-static void encode_segmentation(AV1_COMMON *cm, MACROBLOCKD *xd,
- struct aom_write_bit_buffer *wb) {
- int i, j;
- struct segmentation *seg = &cm->seg;
-
- aom_wb_write_bit(wb, seg->enabled);
- if (!seg->enabled) return;
-
- // Write update flags
- if (cm->primary_ref_frame == PRIMARY_REF_NONE) {
- assert(seg->update_map == 1);
- seg->temporal_update = 0;
- assert(seg->update_data == 1);
- } else {
- aom_wb_write_bit(wb, seg->update_map);
- if (seg->update_map) {
- // Select the coding strategy (temporal or spatial)
- av1_choose_segmap_coding_method(cm, xd);
- aom_wb_write_bit(wb, seg->temporal_update);
- }
- aom_wb_write_bit(wb, seg->update_data);
- }
-
- // Segmentation data
- if (seg->update_data) {
- for (i = 0; i < MAX_SEGMENTS; i++) {
- for (j = 0; j < SEG_LVL_MAX; j++) {
- const int active = segfeature_active(seg, i, j);
- aom_wb_write_bit(wb, active);
- if (active) {
- const int data_max = av1_seg_feature_data_max(j);
- const int data_min = -data_max;
- const int ubits = get_unsigned_bits(data_max);
- const int data = clamp(get_segdata(seg, i, j), data_min, data_max);
-
- if (av1_is_segfeature_signed(j)) {
- aom_wb_write_inv_signed_literal(wb, data, ubits);
- } else {
- aom_wb_write_literal(wb, data, ubits);
- }
- }
- }
- }
- }
-}
-
-static void write_tx_mode(AV1_COMMON *cm, TX_MODE *mode,
- struct aom_write_bit_buffer *wb) {
- if (cm->coded_lossless) {
- *mode = ONLY_4X4;
- return;
- }
- aom_wb_write_bit(wb, *mode == TX_MODE_SELECT);
-}
-
-static void write_frame_interp_filter(InterpFilter filter,
- struct aom_write_bit_buffer *wb) {
- aom_wb_write_bit(wb, filter == SWITCHABLE);
- if (filter != SWITCHABLE)
- aom_wb_write_literal(wb, filter, LOG_SWITCHABLE_FILTERS);
-}
-
-static void fix_interp_filter(AV1_COMMON *cm, FRAME_COUNTS *counts) {
- if (cm->interp_filter == SWITCHABLE) {
- // Check to see if only one of the filters is actually used
- int count[SWITCHABLE_FILTERS];
- int i, j, c = 0;
- for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
- count[i] = 0;
- for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; ++j)
- count[i] += counts->switchable_interp[j][i];
- c += (count[i] > 0);
- }
- if (c == 1) {
- // Only one filter is used. So set the filter at frame level
- for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
- if (count[i]) {
- if (i == EIGHTTAP_REGULAR) cm->interp_filter = i;
- break;
- }
- }
- }
- }
-}
-
-// Same function as write_uniform but writing to uncompresses header wb
-static void wb_write_uniform(struct aom_write_bit_buffer *wb, int n, int v) {
- const int l = get_unsigned_bits(n);
- const int m = (1 << l) - n;
- if (l == 0) return;
- if (v < m) {
- aom_wb_write_literal(wb, v, l - 1);
- } else {
- aom_wb_write_literal(wb, m + ((v - m) >> 1), l - 1);
- aom_wb_write_literal(wb, (v - m) & 1, 1);
- }
-}
-
-static void write_tile_info_max_tile(const AV1_COMMON *const cm,
- struct aom_write_bit_buffer *wb) {
- int width_mi = ALIGN_POWER_OF_TWO(cm->mi_cols, cm->seq_params.mib_size_log2);
- int height_mi = ALIGN_POWER_OF_TWO(cm->mi_rows, cm->seq_params.mib_size_log2);
- int width_sb = width_mi >> cm->seq_params.mib_size_log2;
- int height_sb = height_mi >> cm->seq_params.mib_size_log2;
- int size_sb, i;
-
- aom_wb_write_bit(wb, cm->uniform_tile_spacing_flag);
-
- if (cm->uniform_tile_spacing_flag) {
- // Uniform spaced tiles with power-of-two number of rows and columns
- // tile columns
- int ones = cm->log2_tile_cols - cm->min_log2_tile_cols;
- while (ones--) {
- aom_wb_write_bit(wb, 1);
- }
- if (cm->log2_tile_cols < cm->max_log2_tile_cols) {
- aom_wb_write_bit(wb, 0);
- }
-
- // rows
- ones = cm->log2_tile_rows - cm->min_log2_tile_rows;
- while (ones--) {
- aom_wb_write_bit(wb, 1);
- }
- if (cm->log2_tile_rows < cm->max_log2_tile_rows) {
- aom_wb_write_bit(wb, 0);
- }
- } else {
- // Explicit tiles with configurable tile widths and heights
- // columns
- for (i = 0; i < cm->tile_cols; i++) {
- size_sb = cm->tile_col_start_sb[i + 1] - cm->tile_col_start_sb[i];
- wb_write_uniform(wb, AOMMIN(width_sb, cm->max_tile_width_sb),
- size_sb - 1);
- width_sb -= size_sb;
- }
- assert(width_sb == 0);
-
- // rows
- for (i = 0; i < cm->tile_rows; i++) {
- size_sb = cm->tile_row_start_sb[i + 1] - cm->tile_row_start_sb[i];
- wb_write_uniform(wb, AOMMIN(height_sb, cm->max_tile_height_sb),
- size_sb - 1);
- height_sb -= size_sb;
- }
- assert(height_sb == 0);
- }
-}
-
-static void write_tile_info(const AV1_COMMON *const cm,
- struct aom_write_bit_buffer *saved_wb,
- struct aom_write_bit_buffer *wb) {
- write_tile_info_max_tile(cm, wb);
-
- *saved_wb = *wb;
- if (cm->tile_rows * cm->tile_cols > 1) {
- // tile id used for cdf update
- aom_wb_write_literal(wb, 0, cm->log2_tile_cols + cm->log2_tile_rows);
- // Number of bytes in tile size - 1
- aom_wb_write_literal(wb, 3, 2);
- }
-}
-
-static void write_ext_tile_info(const AV1_COMMON *const cm,
- struct aom_write_bit_buffer *saved_wb,
- struct aom_write_bit_buffer *wb) {
- // This information is stored as a separate byte.
- int mod = wb->bit_offset % CHAR_BIT;
- if (mod > 0) aom_wb_write_literal(wb, 0, CHAR_BIT - mod);
- assert(aom_wb_is_byte_aligned(wb));
-
- *saved_wb = *wb;
- if (cm->tile_rows * cm->tile_cols > 1) {
- // Note that the last item in the uncompressed header is the data
- // describing tile configuration.
- // Number of bytes in tile column size - 1
- aom_wb_write_literal(wb, 0, 2);
- // Number of bytes in tile size - 1
- aom_wb_write_literal(wb, 0, 2);
- }
-}
-
-static int get_refresh_mask(AV1_COMP *cpi) {
- if ((cpi->common.frame_type == KEY_FRAME && cpi->common.show_frame) ||
- frame_is_sframe(&cpi->common))
- return 0xFF;
-
- int refresh_mask = 0;
-
- // NOTE(zoeliu): When LAST_FRAME is to get refreshed, the decoder will be
- // notified to get LAST3_FRAME refreshed and then the virtual indexes for all
- // the 3 LAST reference frames will be updated accordingly, i.e.:
- // (1) The original virtual index for LAST3_FRAME will become the new virtual
- // index for LAST_FRAME; and
- // (2) The original virtual indexes for LAST_FRAME and LAST2_FRAME will be
- // shifted and become the new virtual indexes for LAST2_FRAME and
- // LAST3_FRAME.
- refresh_mask |=
- (cpi->refresh_last_frame << cpi->ref_fb_idx[LAST_REF_FRAMES - 1]);
-#if USE_SYMM_MULTI_LAYER
- refresh_mask |=
- (cpi->new_bwdref_update_rule == 1)
- ? (cpi->refresh_bwd_ref_frame << cpi->ref_fb_idx[EXTREF_FRAME - 1])
- : (cpi->refresh_bwd_ref_frame << cpi->ref_fb_idx[BWDREF_FRAME - 1]);
-#else
- refresh_mask |=
- (cpi->refresh_bwd_ref_frame << cpi->ref_fb_idx[BWDREF_FRAME - 1]);
-#endif
- refresh_mask |=
- (cpi->refresh_alt2_ref_frame << cpi->ref_fb_idx[ALTREF2_FRAME - 1]);
-
- if (av1_preserve_existing_gf(cpi)) {
- // We have decided to preserve the previously existing golden frame as our
- // new ARF frame. However, in the short term we leave it in the GF slot and,
- // if we're updating the GF with the current decoded frame, we save it
- // instead to the ARF slot.
- // Later, in the function av1_encoder.c:av1_update_reference_frames() we
- // will swap gld_fb_idx and alt_fb_idx to achieve our objective. We do it
- // there so that it can be done outside of the recode loop.
- // Note: This is highly specific to the use of ARF as a forward reference,
- // and this needs to be generalized as other uses are implemented
- // (like RTC/temporal scalability).
-
- if (cpi->preserve_arf_as_gld) {
- return refresh_mask;
- } else {
- return refresh_mask |
- (cpi->refresh_golden_frame << cpi->ref_fb_idx[ALTREF_FRAME - 1]);
- }
- } else {
- const int arf_idx = cpi->ref_fb_idx[ALTREF_FRAME - 1];
- return refresh_mask |
- (cpi->refresh_golden_frame << cpi->ref_fb_idx[GOLDEN_FRAME - 1]) |
- (cpi->refresh_alt_ref_frame << arf_idx);
- }
-}
-
-static INLINE int find_identical_tile(
- const int tile_row, const int tile_col,
- TileBufferEnc (*const tile_buffers)[MAX_TILE_COLS]) {
- const MV32 candidate_offset[1] = { { 1, 0 } };
- const uint8_t *const cur_tile_data =
- tile_buffers[tile_row][tile_col].data + 4;
- const size_t cur_tile_size = tile_buffers[tile_row][tile_col].size;
-
- int i;
-
- if (tile_row == 0) return 0;
-
- // (TODO: yunqingwang) For now, only above tile is checked and used.
- // More candidates such as left tile can be added later.
- for (i = 0; i < 1; i++) {
- int row_offset = candidate_offset[0].row;
- int col_offset = candidate_offset[0].col;
- int row = tile_row - row_offset;
- int col = tile_col - col_offset;
- uint8_t tile_hdr;
- const uint8_t *tile_data;
- TileBufferEnc *candidate;
-
- if (row < 0 || col < 0) continue;
-
- tile_hdr = *(tile_buffers[row][col].data);
-
- // Read out tcm bit
- if ((tile_hdr >> 7) == 1) {
- // The candidate is a copy tile itself
- row_offset += tile_hdr & 0x7f;
- row = tile_row - row_offset;
- }
-
- candidate = &tile_buffers[row][col];
-
- if (row_offset >= 128 || candidate->size != cur_tile_size) continue;
-
- tile_data = candidate->data + 4;
-
- if (memcmp(tile_data, cur_tile_data, cur_tile_size) != 0) continue;
-
- // Identical tile found
- assert(row_offset > 0);
- return row_offset;
- }
-
- // No identical tile found
- return 0;
-}
-
-static void write_render_size(const AV1_COMMON *cm,
- struct aom_write_bit_buffer *wb) {
- const int scaling_active = av1_resize_scaled(cm);
- aom_wb_write_bit(wb, scaling_active);
- if (scaling_active) {
- aom_wb_write_literal(wb, cm->render_width - 1, 16);
- aom_wb_write_literal(wb, cm->render_height - 1, 16);
- }
-}
-
-static void write_superres_scale(const AV1_COMMON *const cm,
- struct aom_write_bit_buffer *wb) {
- const SequenceHeader *const seq_params = &cm->seq_params;
- if (!seq_params->enable_superres) {
- assert(cm->superres_scale_denominator == SCALE_NUMERATOR);
- return;
- }
-
- // First bit is whether to to scale or not
- if (cm->superres_scale_denominator == SCALE_NUMERATOR) {
- aom_wb_write_bit(wb, 0); // no scaling
- } else {
- aom_wb_write_bit(wb, 1); // scaling, write scale factor
- assert(cm->superres_scale_denominator >= SUPERRES_SCALE_DENOMINATOR_MIN);
- assert(cm->superres_scale_denominator <
- SUPERRES_SCALE_DENOMINATOR_MIN + (1 << SUPERRES_SCALE_BITS));
- aom_wb_write_literal(
- wb, cm->superres_scale_denominator - SUPERRES_SCALE_DENOMINATOR_MIN,
- SUPERRES_SCALE_BITS);
- }
-}
-
-static void write_frame_size(const AV1_COMMON *cm, int frame_size_override,
- struct aom_write_bit_buffer *wb) {
- const int coded_width = cm->superres_upscaled_width - 1;
- const int coded_height = cm->superres_upscaled_height - 1;
-
- if (frame_size_override) {
- const SequenceHeader *seq_params = &cm->seq_params;
- int num_bits_width = seq_params->num_bits_width;
- int num_bits_height = seq_params->num_bits_height;
- aom_wb_write_literal(wb, coded_width, num_bits_width);
- aom_wb_write_literal(wb, coded_height, num_bits_height);
- }
-
- write_superres_scale(cm, wb);
- write_render_size(cm, wb);
-}
-
-static void write_frame_size_with_refs(AV1_COMP *cpi,
- struct aom_write_bit_buffer *wb) {
- AV1_COMMON *const cm = &cpi->common;
- int found = 0;
-
- MV_REFERENCE_FRAME ref_frame;
- for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
- YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, ref_frame);
-
- if (cfg != NULL) {
- found = cm->superres_upscaled_width == cfg->y_crop_width &&
- cm->superres_upscaled_height == cfg->y_crop_height;
- found &= cm->render_width == cfg->render_width &&
- cm->render_height == cfg->render_height;
- }
- aom_wb_write_bit(wb, found);
- if (found) {
- write_superres_scale(cm, wb);
- break;
- }
- }
-
- if (!found) {
- int frame_size_override = 1; // Always equal to 1 in this function
- write_frame_size(cm, frame_size_override, wb);
- }
-}
-
-static void write_profile(BITSTREAM_PROFILE profile,
- struct aom_write_bit_buffer *wb) {
- assert(profile >= PROFILE_0 && profile < MAX_PROFILES);
- aom_wb_write_literal(wb, profile, PROFILE_BITS);
-}
-
-static void write_bitdepth(const SequenceHeader *const seq_params,
- struct aom_write_bit_buffer *wb) {
- // Profile 0/1: [0] for 8 bit, [1] 10-bit
- // Profile 2: [0] for 8 bit, [10] 10-bit, [11] - 12-bit
- aom_wb_write_bit(wb, seq_params->bit_depth == AOM_BITS_8 ? 0 : 1);
- if (seq_params->profile == PROFILE_2 && seq_params->bit_depth != AOM_BITS_8) {
- aom_wb_write_bit(wb, seq_params->bit_depth == AOM_BITS_10 ? 0 : 1);
- }
-}
-
-static void write_color_config(const SequenceHeader *const seq_params,
- struct aom_write_bit_buffer *wb) {
- write_bitdepth(seq_params, wb);
- const int is_monochrome = seq_params->monochrome;
- // monochrome bit
- if (seq_params->profile != PROFILE_1)
- aom_wb_write_bit(wb, is_monochrome);
- else
- assert(!is_monochrome);
- if (seq_params->color_primaries == AOM_CICP_CP_UNSPECIFIED &&
- seq_params->transfer_characteristics == AOM_CICP_TC_UNSPECIFIED &&
- seq_params->matrix_coefficients == AOM_CICP_MC_UNSPECIFIED) {
- aom_wb_write_bit(wb, 0); // No color description present
- } else {
- aom_wb_write_bit(wb, 1); // Color description present
- aom_wb_write_literal(wb, seq_params->color_primaries, 8);
- aom_wb_write_literal(wb, seq_params->transfer_characteristics, 8);
- aom_wb_write_literal(wb, seq_params->matrix_coefficients, 8);
- }
- if (is_monochrome) {
- // 0: [16, 235] (i.e. xvYCC), 1: [0, 255]
- aom_wb_write_bit(wb, seq_params->color_range);
- return;
- }
- if (seq_params->color_primaries == AOM_CICP_CP_BT_709 &&
- seq_params->transfer_characteristics == AOM_CICP_TC_SRGB &&
- seq_params->matrix_coefficients ==
- AOM_CICP_MC_IDENTITY) { // it would be better to remove this
- // dependency too
- assert(seq_params->subsampling_x == 0 && seq_params->subsampling_y == 0);
- assert(seq_params->profile == PROFILE_1 ||
- (seq_params->profile == PROFILE_2 &&
- seq_params->bit_depth == AOM_BITS_12));
- } else {
- // 0: [16, 235] (i.e. xvYCC), 1: [0, 255]
- aom_wb_write_bit(wb, seq_params->color_range);
- if (seq_params->profile == PROFILE_0) {
- // 420 only
- assert(seq_params->subsampling_x == 1 && seq_params->subsampling_y == 1);
- } else if (seq_params->profile == PROFILE_1) {
- // 444 only
- assert(seq_params->subsampling_x == 0 && seq_params->subsampling_y == 0);
- } else if (seq_params->profile == PROFILE_2) {
- if (seq_params->bit_depth == AOM_BITS_12) {
- // 420, 444 or 422
- aom_wb_write_bit(wb, seq_params->subsampling_x);
- if (seq_params->subsampling_x == 0) {
- assert(seq_params->subsampling_y == 0 &&
- "4:4:0 subsampling not allowed in AV1");
- } else {
- aom_wb_write_bit(wb, seq_params->subsampling_y);
- }
- } else {
- // 422 only
- assert(seq_params->subsampling_x == 1 &&
- seq_params->subsampling_y == 0);
- }
- }
- if (seq_params->matrix_coefficients == AOM_CICP_MC_IDENTITY) {
- assert(seq_params->subsampling_x == 0 && seq_params->subsampling_y == 0);
- }
- if (seq_params->subsampling_x == 1 && seq_params->subsampling_y == 1) {
- aom_wb_write_literal(wb, seq_params->chroma_sample_position, 2);
- }
- }
- aom_wb_write_bit(wb, seq_params->separate_uv_delta_q);
-}
-
-static void write_timing_info_header(AV1_COMMON *const cm,
- struct aom_write_bit_buffer *wb) {
- aom_wb_write_unsigned_literal(wb, cm->timing_info.num_units_in_display_tick,
- 32); // Number of units in tick
- aom_wb_write_unsigned_literal(wb, cm->timing_info.time_scale,
- 32); // Time scale
- aom_wb_write_bit(
- wb,
- cm->timing_info.equal_picture_interval); // Equal picture interval bit
- if (cm->timing_info.equal_picture_interval) {
- aom_wb_write_uvlc(
- wb,
- cm->timing_info.num_ticks_per_picture - 1); // ticks per picture
- }
-}
-
-static void write_decoder_model_info(AV1_COMMON *const cm,
- struct aom_write_bit_buffer *wb) {
- aom_wb_write_literal(
- wb, cm->buffer_model.encoder_decoder_buffer_delay_length - 1, 5);
- aom_wb_write_unsigned_literal(wb, cm->buffer_model.num_units_in_decoding_tick,
- 32); // Number of units in decoding tick
- aom_wb_write_literal(wb, cm->buffer_model.buffer_removal_time_length - 1, 5);
- aom_wb_write_literal(wb, cm->buffer_model.frame_presentation_time_length - 1,
- 5);
-}
-
-static void write_dec_model_op_parameters(AV1_COMMON *const cm,
- struct aom_write_bit_buffer *wb,
- int op_num) {
- if (op_num > MAX_NUM_OPERATING_POINTS)
- aom_internal_error(
- &cm->error, AOM_CODEC_UNSUP_BITSTREAM,
- "Encoder does not support %d decoder model operating points", op_num);
-
- // aom_wb_write_bit(wb, cm->op_params[op_num].has_parameters);
- // if (!cm->op_params[op_num].has_parameters) return;
-
- aom_wb_write_unsigned_literal(
- wb, cm->op_params[op_num].decoder_buffer_delay,
- cm->buffer_model.encoder_decoder_buffer_delay_length);
-
- aom_wb_write_unsigned_literal(
- wb, cm->op_params[op_num].encoder_buffer_delay,
- cm->buffer_model.encoder_decoder_buffer_delay_length);
-
- aom_wb_write_bit(wb, cm->op_params[op_num].low_delay_mode_flag);
-
- cm->op_frame_timing[op_num].buffer_removal_time =
- 0; // reset the decoded frame counter
-}
-
-static void write_tu_pts_info(AV1_COMMON *const cm,
- struct aom_write_bit_buffer *wb) {
- aom_wb_write_unsigned_literal(
- wb, cm->frame_presentation_time,
- cm->buffer_model.frame_presentation_time_length);
-}
-
-static void write_film_grain_params(AV1_COMP *cpi,
- struct aom_write_bit_buffer *wb) {
- AV1_COMMON *const cm = &cpi->common;
- aom_film_grain_t *pars = &cm->film_grain_params;
-
- cm->cur_frame->film_grain_params = *pars;
-
- aom_wb_write_bit(wb, pars->apply_grain);
- if (!pars->apply_grain) return;
-
- aom_wb_write_literal(wb, pars->random_seed, 16);
-
- pars->random_seed += 3381; // Changing random seed for film grain
- if (!pars->random_seed) // Random seed should not be zero
- pars->random_seed += 7391;
- if (cm->frame_type == INTER_FRAME)
- aom_wb_write_bit(wb, pars->update_parameters);
- else
- pars->update_parameters = 1;
- if (!pars->update_parameters) {
- RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
- int ref_frame, ref_idx, buf_idx;
- for (ref_frame = LAST_FRAME; ref_frame < REF_FRAMES; ref_frame++) {
- ref_idx = get_ref_frame_map_idx(cpi, ref_frame);
- assert(ref_idx != INVALID_IDX);
- buf_idx = cm->ref_frame_map[ref_idx];
- if (frame_bufs[buf_idx].film_grain_params_present &&
- memcmp(pars, &frame_bufs[buf_idx].film_grain_params, sizeof(*pars))) {
- break;
- }
- }
- assert(ref_frame < REF_FRAMES);
- aom_wb_write_literal(wb, ref_idx, 3);
- return;
- }
-
- // Scaling functions parameters
- aom_wb_write_literal(wb, pars->num_y_points, 4); // max 14
- for (int i = 0; i < pars->num_y_points; i++) {
- aom_wb_write_literal(wb, pars->scaling_points_y[i][0], 8);
- aom_wb_write_literal(wb, pars->scaling_points_y[i][1], 8);
- }
-
- if (!cm->seq_params.monochrome)
- aom_wb_write_bit(wb, pars->chroma_scaling_from_luma);
- else
- pars->chroma_scaling_from_luma = 0; // for monochrome override to 0
-
- if (cm->seq_params.monochrome || pars->chroma_scaling_from_luma ||
- ((cm->seq_params.subsampling_x == 1) &&
- (cm->seq_params.subsampling_y == 1) && (pars->num_y_points == 0))) {
- pars->num_cb_points = 0;
- pars->num_cr_points = 0;
- } else {
- aom_wb_write_literal(wb, pars->num_cb_points, 4); // max 10
- for (int i = 0; i < pars->num_cb_points; i++) {
- aom_wb_write_literal(wb, pars->scaling_points_cb[i][0], 8);
- aom_wb_write_literal(wb, pars->scaling_points_cb[i][1], 8);
- }
-
- aom_wb_write_literal(wb, pars->num_cr_points, 4); // max 10
- for (int i = 0; i < pars->num_cr_points; i++) {
- aom_wb_write_literal(wb, pars->scaling_points_cr[i][0], 8);
- aom_wb_write_literal(wb, pars->scaling_points_cr[i][1], 8);
- }
- }
-
- aom_wb_write_literal(wb, pars->scaling_shift - 8, 2); // 8 + value
-
- // AR coefficients
- // Only sent if the corresponsing scaling function has
- // more than 0 points
-
- aom_wb_write_literal(wb, pars->ar_coeff_lag, 2);
-
- int num_pos_luma = 2 * pars->ar_coeff_lag * (pars->ar_coeff_lag + 1);
- int num_pos_chroma = num_pos_luma;
- if (pars->num_y_points > 0) ++num_pos_chroma;
-
- if (pars->num_y_points)
- for (int i = 0; i < num_pos_luma; i++)
- aom_wb_write_literal(wb, pars->ar_coeffs_y[i] + 128, 8);
-
- if (pars->num_cb_points || pars->chroma_scaling_from_luma)
- for (int i = 0; i < num_pos_chroma; i++)
- aom_wb_write_literal(wb, pars->ar_coeffs_cb[i] + 128, 8);
-
- if (pars->num_cr_points || pars->chroma_scaling_from_luma)
- for (int i = 0; i < num_pos_chroma; i++)
- aom_wb_write_literal(wb, pars->ar_coeffs_cr[i] + 128, 8);
-
- aom_wb_write_literal(wb, pars->ar_coeff_shift - 6, 2); // 8 + value
-
- aom_wb_write_literal(wb, pars->grain_scale_shift, 2);
-
- if (pars->num_cb_points) {
- aom_wb_write_literal(wb, pars->cb_mult, 8);
- aom_wb_write_literal(wb, pars->cb_luma_mult, 8);
- aom_wb_write_literal(wb, pars->cb_offset, 9);
- }
-
- if (pars->num_cr_points) {
- aom_wb_write_literal(wb, pars->cr_mult, 8);
- aom_wb_write_literal(wb, pars->cr_luma_mult, 8);
- aom_wb_write_literal(wb, pars->cr_offset, 9);
- }
-
- aom_wb_write_bit(wb, pars->overlap_flag);
-
- aom_wb_write_bit(wb, pars->clip_to_restricted_range);
-}
-
-static void write_sb_size(SequenceHeader *seq_params,
- struct aom_write_bit_buffer *wb) {
- (void)seq_params;
- (void)wb;
- assert(seq_params->mib_size == mi_size_wide[seq_params->sb_size]);
- assert(seq_params->mib_size == 1 << seq_params->mib_size_log2);
- assert(seq_params->sb_size == BLOCK_128X128 ||
- seq_params->sb_size == BLOCK_64X64);
- aom_wb_write_bit(wb, seq_params->sb_size == BLOCK_128X128 ? 1 : 0);
-}
-
-static void write_sequence_header(AV1_COMP *cpi,
- struct aom_write_bit_buffer *wb) {
- AV1_COMMON *const cm = &cpi->common;
- SequenceHeader *seq_params = &cm->seq_params;
-
- int max_frame_width = cpi->oxcf.forced_max_frame_width
- ? cpi->oxcf.forced_max_frame_width
- : cpi->oxcf.width;
- int max_frame_height = cpi->oxcf.forced_max_frame_height
- ? cpi->oxcf.forced_max_frame_height
- : cpi->oxcf.height;
- // max((int)ceil(log2(max_frame_width)), 1)
- const int num_bits_width =
- (max_frame_width > 1) ? get_msb(max_frame_width - 1) + 1 : 1;
- // max((int)ceil(log2(max_frame_height)), 1)
- const int num_bits_height =
- (max_frame_height > 1) ? get_msb(max_frame_height - 1) + 1 : 1;
- assert(num_bits_width <= 16);
- assert(num_bits_height <= 16);
-
- seq_params->num_bits_width = num_bits_width;
- seq_params->num_bits_height = num_bits_height;
- seq_params->max_frame_width = max_frame_width;
- seq_params->max_frame_height = max_frame_height;
-
- aom_wb_write_literal(wb, num_bits_width - 1, 4);
- aom_wb_write_literal(wb, num_bits_height - 1, 4);
- aom_wb_write_literal(wb, max_frame_width - 1, num_bits_width);
- aom_wb_write_literal(wb, max_frame_height - 1, num_bits_height);
-
- /* Placeholder for actually writing to the bitstream */
- if (!seq_params->reduced_still_picture_hdr) {
- seq_params->frame_id_numbers_present_flag =
- cm->large_scale_tile ? 0 : cm->error_resilient_mode;
- seq_params->frame_id_length = FRAME_ID_LENGTH;
- seq_params->delta_frame_id_length = DELTA_FRAME_ID_LENGTH;
-
- aom_wb_write_bit(wb, seq_params->frame_id_numbers_present_flag);
- if (seq_params->frame_id_numbers_present_flag) {
- // We must always have delta_frame_id_length < frame_id_length,
- // in order for a frame to be referenced with a unique delta.
- // Avoid wasting bits by using a coding that enforces this restriction.
- aom_wb_write_literal(wb, seq_params->delta_frame_id_length - 2, 4);
- aom_wb_write_literal(
- wb,
- seq_params->frame_id_length - seq_params->delta_frame_id_length - 1,
- 3);
- }
- }
-
- write_sb_size(seq_params, wb);
-
- aom_wb_write_bit(wb, seq_params->enable_filter_intra);
- aom_wb_write_bit(wb, seq_params->enable_intra_edge_filter);
-
- if (!seq_params->reduced_still_picture_hdr) {
- aom_wb_write_bit(wb, seq_params->enable_interintra_compound);
- aom_wb_write_bit(wb, seq_params->enable_masked_compound);
- aom_wb_write_bit(wb, seq_params->enable_warped_motion);
- aom_wb_write_bit(wb, seq_params->enable_dual_filter);
-
- aom_wb_write_bit(wb, seq_params->enable_order_hint);
-
- if (seq_params->enable_order_hint) {
- aom_wb_write_bit(wb, seq_params->enable_jnt_comp);
- aom_wb_write_bit(wb, seq_params->enable_ref_frame_mvs);
- }
- if (seq_params->force_screen_content_tools == 2) {
- aom_wb_write_bit(wb, 1);
- } else {
- aom_wb_write_bit(wb, 0);
- aom_wb_write_bit(wb, seq_params->force_screen_content_tools);
- }
- if (seq_params->force_screen_content_tools > 0) {
- if (seq_params->force_integer_mv == 2) {
- aom_wb_write_bit(wb, 1);
- } else {
- aom_wb_write_bit(wb, 0);
- aom_wb_write_bit(wb, seq_params->force_integer_mv);
- }
- } else {
- assert(seq_params->force_integer_mv == 2);
- }
- if (seq_params->enable_order_hint)
- aom_wb_write_literal(wb, seq_params->order_hint_bits_minus_1, 3);
- }
-
- aom_wb_write_bit(wb, seq_params->enable_superres);
- aom_wb_write_bit(wb, seq_params->enable_cdef);
- aom_wb_write_bit(wb, seq_params->enable_restoration);
-}
-
-static void write_global_motion_params(const WarpedMotionParams *params,
- const WarpedMotionParams *ref_params,
- struct aom_write_bit_buffer *wb,
- int allow_hp) {
- const TransformationType type = params->wmtype;
-
- aom_wb_write_bit(wb, type != IDENTITY);
- if (type != IDENTITY) {
- aom_wb_write_bit(wb, type == ROTZOOM);
- if (type != ROTZOOM) aom_wb_write_bit(wb, type == TRANSLATION);
- }
-
- if (type >= ROTZOOM) {
- aom_wb_write_signed_primitive_refsubexpfin(
- wb, GM_ALPHA_MAX + 1, SUBEXPFIN_K,
- (ref_params->wmmat[2] >> GM_ALPHA_PREC_DIFF) -
- (1 << GM_ALPHA_PREC_BITS),
- (params->wmmat[2] >> GM_ALPHA_PREC_DIFF) - (1 << GM_ALPHA_PREC_BITS));
- aom_wb_write_signed_primitive_refsubexpfin(
- wb, GM_ALPHA_MAX + 1, SUBEXPFIN_K,
- (ref_params->wmmat[3] >> GM_ALPHA_PREC_DIFF),
- (params->wmmat[3] >> GM_ALPHA_PREC_DIFF));
- }
-
- if (type >= AFFINE) {
- aom_wb_write_signed_primitive_refsubexpfin(
- wb, GM_ALPHA_MAX + 1, SUBEXPFIN_K,
- (ref_params->wmmat[4] >> GM_ALPHA_PREC_DIFF),
- (params->wmmat[4] >> GM_ALPHA_PREC_DIFF));
- aom_wb_write_signed_primitive_refsubexpfin(
- wb, GM_ALPHA_MAX + 1, SUBEXPFIN_K,
- (ref_params->wmmat[5] >> GM_ALPHA_PREC_DIFF) -
- (1 << GM_ALPHA_PREC_BITS),
- (params->wmmat[5] >> GM_ALPHA_PREC_DIFF) - (1 << GM_ALPHA_PREC_BITS));
- }
-
- if (type >= TRANSLATION) {
- const int trans_bits = (type == TRANSLATION)
- ? GM_ABS_TRANS_ONLY_BITS - !allow_hp
- : GM_ABS_TRANS_BITS;
- const int trans_prec_diff = (type == TRANSLATION)
- ? GM_TRANS_ONLY_PREC_DIFF + !allow_hp
- : GM_TRANS_PREC_DIFF;
- aom_wb_write_signed_primitive_refsubexpfin(
- wb, (1 << trans_bits) + 1, SUBEXPFIN_K,
- (ref_params->wmmat[0] >> trans_prec_diff),
- (params->wmmat[0] >> trans_prec_diff));
- aom_wb_write_signed_primitive_refsubexpfin(
- wb, (1 << trans_bits) + 1, SUBEXPFIN_K,
- (ref_params->wmmat[1] >> trans_prec_diff),
- (params->wmmat[1] >> trans_prec_diff));
- }
-}
-
-static void write_global_motion(AV1_COMP *cpi,
- struct aom_write_bit_buffer *wb) {
- AV1_COMMON *const cm = &cpi->common;
- int frame;
- for (frame = LAST_FRAME; frame <= ALTREF_FRAME; ++frame) {
- const WarpedMotionParams *ref_params =
- cm->prev_frame ? &cm->prev_frame->global_motion[frame]
- : &default_warp_params;
- write_global_motion_params(&cm->global_motion[frame], ref_params, wb,
- cm->allow_high_precision_mv);
- // TODO(sarahparker, debargha): The logic in the commented out code below
- // does not work currently and causes mismatches when resize is on.
- // Fix it before turning the optimization back on.
- /*
- YV12_BUFFER_CONFIG *ref_buf = get_ref_frame_buffer(cpi, frame);
- if (cpi->source->y_crop_width == ref_buf->y_crop_width &&
- cpi->source->y_crop_height == ref_buf->y_crop_height) {
- write_global_motion_params(&cm->global_motion[frame],
- &cm->prev_frame->global_motion[frame], wb,
- cm->allow_high_precision_mv);
- } else {
- assert(cm->global_motion[frame].wmtype == IDENTITY &&
- "Invalid warp type for frames of different resolutions");
- }
- */
- /*
- printf("Frame %d/%d: Enc Ref %d: %d %d %d %d\n",
- cm->current_video_frame, cm->show_frame, frame,
- cm->global_motion[frame].wmmat[0],
- cm->global_motion[frame].wmmat[1], cm->global_motion[frame].wmmat[2],
- cm->global_motion[frame].wmmat[3]);
- */
- }
-}
-
-static void check_frame_refs_short_signaling(AV1_COMP *const cpi) {
- AV1_COMMON *const cm = &cpi->common;
- if (!cm->frame_refs_short_signaling) return;
-
- // Check whether all references are distinct frames.
- int buf_markers[FRAME_BUFFERS] = { 0 };
- for (int ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
- const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
- if (buf_idx != INVALID_IDX) {
- assert(buf_idx >= 0 && buf_idx < FRAME_BUFFERS);
- buf_markers[buf_idx] = 1;
- }
- }
-
- int num_refs = 0;
- for (int buf_idx = 0; buf_idx < FRAME_BUFFERS; ++buf_idx) {
- num_refs += buf_markers[buf_idx];
- }
-
- // We only turn on frame_refs_short_signaling when all references are
- // distinct.
- if (num_refs < INTER_REFS_PER_FRAME) {
- // It indicates that there exist more than one reference frame pointing to
- // the same reference buffer, i.e. two or more references are duplicate.
- cm->frame_refs_short_signaling = 0;
- return;
- }
-
- // Check whether the encoder side ref frame choices are aligned with that to
- // be derived at the decoder side.
- RefBuffer frame_refs_copy[INTER_REFS_PER_FRAME];
-
- // Backup the frame refs info
- memcpy(frame_refs_copy, cm->frame_refs,
- INTER_REFS_PER_FRAME * sizeof(RefBuffer));
-
- const int lst_map_idx = get_ref_frame_map_idx(cpi, LAST_FRAME);
- const int gld_map_idx = get_ref_frame_map_idx(cpi, GOLDEN_FRAME);
-
- // Set up the frame refs mapping indexes according to the
- // frame_refs_short_signaling policy.
- av1_set_frame_refs(cm, lst_map_idx, gld_map_idx);
-
- // We only turn on frame_refs_short_signaling when the encoder side decision
- // on ref frames is identical to that at the decoder side.
- for (int ref_idx = 0; ref_idx < INTER_REFS_PER_FRAME; ++ref_idx) {
- // Compare the buffer index between two reference frames indexed
- // respectively by the encoder and the decoder side decisions.
- if (cm->frame_refs[ref_idx].idx != frame_refs_copy[ref_idx].idx) {
- cm->frame_refs_short_signaling = 0;
- break;
- }
- }
-
-#if 0 // For debug
- printf("\nFrame=%d: \n", cm->current_video_frame);
- printf("***frame_refs_short_signaling=%d\n", cm->frame_refs_short_signaling);
- for (int ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
- printf("enc_ref(map_idx=%d, buf_idx=%d)=%d, vs. "
- "dec_ref(map_idx=%d, buf_idx=%d)=%d\n",
- get_ref_frame_map_idx(cpi, ref_frame),
- get_ref_frame_buf_idx(cpi, ref_frame), ref_frame,
- cm->frame_refs[ref_frame - LAST_FRAME].map_idx,
- cm->frame_refs[ref_frame - LAST_FRAME].idx, ref_frame);
- }
-#endif // 0
-
- // Restore the frame refs info if frame_refs_short_signaling is off.
- if (!cm->frame_refs_short_signaling)
- memcpy(cm->frame_refs, frame_refs_copy,
- INTER_REFS_PER_FRAME * sizeof(RefBuffer));
-}
-
-// New function based on HLS R18
-static void write_uncompressed_header_obu(AV1_COMP *cpi,
- struct aom_write_bit_buffer *saved_wb,
- struct aom_write_bit_buffer *wb) {
- AV1_COMMON *const cm = &cpi->common;
- const SequenceHeader *const seq_params = &cm->seq_params;
- MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
-
- // NOTE: By default all coded frames to be used as a reference
- cm->is_reference_frame = 1;
- cm->frame_type = cm->intra_only ? INTRA_ONLY_FRAME : cm->frame_type;
-
- if (seq_params->still_picture) {
- assert(cm->show_existing_frame == 0);
- assert(cm->show_frame == 1);
- assert(cm->frame_type == KEY_FRAME);
- }
- if (!seq_params->reduced_still_picture_hdr) {
- if (encode_show_existing_frame(cm)) {
- RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
- const int frame_to_show = cm->ref_frame_map[cpi->existing_fb_idx_to_show];
-
- if (frame_to_show < 0 || frame_bufs[frame_to_show].ref_count < 1) {
- aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
- "Buffer %d does not contain a reconstructed frame",
- frame_to_show);
- }
- ref_cnt_fb(frame_bufs, &cm->new_fb_idx, frame_to_show);
-
- aom_wb_write_bit(wb, 1); // show_existing_frame
- aom_wb_write_literal(wb, cpi->existing_fb_idx_to_show, 3);
-
- if (seq_params->decoder_model_info_present_flag &&
- cm->timing_info.equal_picture_interval == 0) {
- write_tu_pts_info(cm, wb);
- }
- if (seq_params->frame_id_numbers_present_flag) {
- int frame_id_len = seq_params->frame_id_length;
- int display_frame_id = cm->ref_frame_id[cpi->existing_fb_idx_to_show];
- aom_wb_write_literal(wb, display_frame_id, frame_id_len);
- }
-
- if (cm->reset_decoder_state &&
- frame_bufs[frame_to_show].frame_type != KEY_FRAME) {
- aom_internal_error(
- &cm->error, AOM_CODEC_UNSUP_BITSTREAM,
- "show_existing_frame to reset state on KEY_FRAME only");
- }
-
- return;
- } else {
- aom_wb_write_bit(wb, 0); // show_existing_frame
- }
-
- aom_wb_write_literal(wb, cm->frame_type, 2);
-
- aom_wb_write_bit(wb, cm->show_frame);
- if (cm->show_frame) {
- if (seq_params->decoder_model_info_present_flag &&
- cm->timing_info.equal_picture_interval == 0)
- write_tu_pts_info(cm, wb);
- } else {
- aom_wb_write_bit(wb, cm->showable_frame);
- }
- if (frame_is_sframe(cm)) {
- assert(cm->error_resilient_mode);
- } else if (!(cm->frame_type == KEY_FRAME && cm->show_frame)) {
- aom_wb_write_bit(wb, cm->error_resilient_mode);
- }
- }
- aom_wb_write_bit(wb, cm->disable_cdf_update);
-
- if (seq_params->force_screen_content_tools == 2) {
- aom_wb_write_bit(wb, cm->allow_screen_content_tools);
- } else {
- assert(cm->allow_screen_content_tools ==
- seq_params->force_screen_content_tools);
- }
-
- if (cm->allow_screen_content_tools) {
- if (seq_params->force_integer_mv == 2) {
- aom_wb_write_bit(wb, cm->cur_frame_force_integer_mv);
- } else {
- assert(cm->cur_frame_force_integer_mv == seq_params->force_integer_mv);
- }
- } else {
- assert(cm->cur_frame_force_integer_mv == 0);
- }
-
- cm->invalid_delta_frame_id_minus_1 = 0;
- int frame_size_override_flag = 0;
- cm->frame_refs_short_signaling = 0;
-
- if (seq_params->reduced_still_picture_hdr) {
- assert(cm->width == seq_params->max_frame_width &&
- cm->height == seq_params->max_frame_height);
- } else {
- if (seq_params->frame_id_numbers_present_flag) {
- int frame_id_len = seq_params->frame_id_length;
- aom_wb_write_literal(wb, cm->current_frame_id, frame_id_len);
- }
-
- if (cm->width > seq_params->max_frame_width ||
- cm->height > seq_params->max_frame_height) {
- aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
- "Frame dimensions are larger than the maximum values");
- }
-
- frame_size_override_flag =
- frame_is_sframe(cm) ? 1
- : (cm->width != seq_params->max_frame_width ||
- cm->height != seq_params->max_frame_height);
- if (!frame_is_sframe(cm)) aom_wb_write_bit(wb, frame_size_override_flag);
-
- if (seq_params->enable_order_hint)
- aom_wb_write_literal(wb, cm->frame_offset,
- seq_params->order_hint_bits_minus_1 + 1);
-
- if (!cm->error_resilient_mode && !frame_is_intra_only(cm)) {
- aom_wb_write_literal(wb, cm->primary_ref_frame, PRIMARY_REF_BITS);
- }
- }
-
- if (seq_params->decoder_model_info_present_flag) {
- aom_wb_write_bit(wb, cm->buffer_removal_time_present);
- if (cm->buffer_removal_time_present) {
- for (int op_num = 0;
- op_num < seq_params->operating_points_cnt_minus_1 + 1; op_num++) {
- if (cm->op_params[op_num].decoder_model_param_present_flag) {
- if (((seq_params->operating_point_idc[op_num] >>
- cm->temporal_layer_id) &
- 0x1 &&
- (seq_params->operating_point_idc[op_num] >>
- (cm->spatial_layer_id + 8)) &
- 0x1) ||
- seq_params->operating_point_idc[op_num] == 0) {
- aom_wb_write_unsigned_literal(
- wb, cm->op_frame_timing[op_num].buffer_removal_time,
- cm->buffer_model.buffer_removal_time_length);
- cm->op_frame_timing[op_num].buffer_removal_time++;
- if (cm->op_frame_timing[op_num].buffer_removal_time == 0) {
- aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
- "buffer_removal_time overflowed");
- }
- }
- }
- }
- }
- }
- cpi->refresh_frame_mask = get_refresh_mask(cpi);
- if (cm->frame_type == KEY_FRAME) {
- if (!cm->show_frame) { // unshown keyframe (forward keyframe)
- aom_wb_write_literal(wb, cpi->refresh_frame_mask, REF_FRAMES);
- } else {
- assert(cpi->refresh_frame_mask == 0xFF);
- }
- } else {
- if (cm->frame_type == INTRA_ONLY_FRAME) {
- assert(cpi->refresh_frame_mask != 0xFF);
- int updated_fb = -1;
- for (int i = 0; i < REF_FRAMES; i++) {
- // If more than one frame is refreshed, it doesn't matter which one
- // we pick, so pick the first.
- if (cpi->refresh_frame_mask & (1 << i)) {
- updated_fb = i;
- break;
- }
- }
- assert(updated_fb >= 0);
- cm->fb_of_context_type[cm->frame_context_idx] = updated_fb;
- aom_wb_write_literal(wb, cpi->refresh_frame_mask, REF_FRAMES);
- } else if (cm->frame_type == INTER_FRAME || frame_is_sframe(cm)) {
- if (cm->frame_type == INTER_FRAME) {
- aom_wb_write_literal(wb, cpi->refresh_frame_mask, REF_FRAMES);
- } else {
- assert(frame_is_sframe(cm) && cpi->refresh_frame_mask == 0xFF);
- }
- int updated_fb = -1;
- for (int i = 0; i < REF_FRAMES; i++) {
- // If more than one frame is refreshed, it doesn't matter which one
- // we pick, so pick the first.
- if (cpi->refresh_frame_mask & (1 << i)) {
- updated_fb = i;
- break;
- }
- }
- // large scale tile sometimes won't refresh any fbs
- if (updated_fb >= 0) {
- cm->fb_of_context_type[cm->frame_context_idx] = updated_fb;
- }
-
- if (!cpi->refresh_frame_mask) {
- // NOTE: "cpi->refresh_frame_mask == 0" indicates that the coded frame
- // will not be used as a reference
- cm->is_reference_frame = 0;
- }
- }
- }
-
- if (!frame_is_intra_only(cm) || cpi->refresh_frame_mask != 0xFF) {
- // Write all ref frame order hints if error_resilient_mode == 1
- if (cm->error_resilient_mode && seq_params->enable_order_hint) {
- RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
- for (int ref_idx = 0; ref_idx < REF_FRAMES; ref_idx++) {
- // Get buffer index
- const int buf_idx = cm->ref_frame_map[ref_idx];
- assert(buf_idx >= 0 && buf_idx < FRAME_BUFFERS);
-
- // Write order hint to bit stream
- aom_wb_write_literal(wb, frame_bufs[buf_idx].cur_frame_offset,
- seq_params->order_hint_bits_minus_1 + 1);
- }
- }
- }
-
- if (cm->frame_type == KEY_FRAME) {
- write_frame_size(cm, frame_size_override_flag, wb);
- assert(!av1_superres_scaled(cm) || !cm->allow_intrabc);
- if (cm->allow_screen_content_tools && !av1_superres_scaled(cm))
- aom_wb_write_bit(wb, cm->allow_intrabc);
- // all eight fbs are refreshed, pick one that will live long enough
- cm->fb_of_context_type[REGULAR_FRAME] = 0;
- } else {
- if (cm->frame_type == INTRA_ONLY_FRAME) {
- write_frame_size(cm, frame_size_override_flag, wb);
- assert(!av1_superres_scaled(cm) || !cm->allow_intrabc);
- if (cm->allow_screen_content_tools && !av1_superres_scaled(cm))
- aom_wb_write_bit(wb, cm->allow_intrabc);
- } else if (cm->frame_type == INTER_FRAME || frame_is_sframe(cm)) {
- MV_REFERENCE_FRAME ref_frame;
-
- // NOTE: Error resilient mode turns off frame_refs_short_signaling
- // automatically.
-#define FRAME_REFS_SHORT_SIGNALING 0
-#if FRAME_REFS_SHORT_SIGNALING
- cm->frame_refs_short_signaling = seq_params->enable_order_hint;
-#endif // FRAME_REFS_SHORT_SIGNALING
-
- if (cm->frame_refs_short_signaling) {
- // NOTE(zoeliu@google.com):
- // An example solution for encoder-side implementation on frame refs
- // short signaling, which is only turned on when the encoder side
- // decision on ref frames is identical to that at the decoder side.
- check_frame_refs_short_signaling(cpi);
- }
-
- if (seq_params->enable_order_hint)
- aom_wb_write_bit(wb, cm->frame_refs_short_signaling);
-
- if (cm->frame_refs_short_signaling) {
- const int lst_ref = get_ref_frame_map_idx(cpi, LAST_FRAME);
- aom_wb_write_literal(wb, lst_ref, REF_FRAMES_LOG2);
-
- const int gld_ref = get_ref_frame_map_idx(cpi, GOLDEN_FRAME);
- aom_wb_write_literal(wb, gld_ref, REF_FRAMES_LOG2);
- }
-
- for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
- assert(get_ref_frame_map_idx(cpi, ref_frame) != INVALID_IDX);
- if (!cm->frame_refs_short_signaling)
- aom_wb_write_literal(wb, get_ref_frame_map_idx(cpi, ref_frame),
- REF_FRAMES_LOG2);
- if (seq_params->frame_id_numbers_present_flag) {
- int i = get_ref_frame_map_idx(cpi, ref_frame);
- int frame_id_len = seq_params->frame_id_length;
- int diff_len = seq_params->delta_frame_id_length;
- int delta_frame_id_minus_1 =
- ((cm->current_frame_id - cm->ref_frame_id[i] +
- (1 << frame_id_len)) %
- (1 << frame_id_len)) -
- 1;
- if (delta_frame_id_minus_1 < 0 ||
- delta_frame_id_minus_1 >= (1 << diff_len))
- cm->invalid_delta_frame_id_minus_1 = 1;
- aom_wb_write_literal(wb, delta_frame_id_minus_1, diff_len);
- }
- }
-
- if (!cm->error_resilient_mode && frame_size_override_flag) {
- write_frame_size_with_refs(cpi, wb);
- } else {
- write_frame_size(cm, frame_size_override_flag, wb);
- }
-
- if (cm->cur_frame_force_integer_mv) {
- cm->allow_high_precision_mv = 0;
- } else {
- aom_wb_write_bit(wb, cm->allow_high_precision_mv);
- }
- fix_interp_filter(cm, cpi->td.counts);
- write_frame_interp_filter(cm->interp_filter, wb);
- aom_wb_write_bit(wb, cm->switchable_motion_mode);
- if (frame_might_allow_ref_frame_mvs(cm)) {
- aom_wb_write_bit(wb, cm->allow_ref_frame_mvs);
- } else {
- assert(cm->allow_ref_frame_mvs == 0);
- }
- }
- }
-
- const int might_bwd_adapt =
- !(seq_params->reduced_still_picture_hdr) && !(cm->disable_cdf_update);
- if (cm->large_scale_tile)
- cm->refresh_frame_context = REFRESH_FRAME_CONTEXT_DISABLED;
-
- if (might_bwd_adapt) {
- aom_wb_write_bit(
- wb, cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_DISABLED);
- }
-
- write_tile_info(cm, saved_wb, wb);
- encode_quantization(cm, wb);
- encode_segmentation(cm, xd, wb);
-
- if (cm->delta_q_present_flag) assert(cm->base_qindex > 0);
- if (cm->base_qindex > 0) {
- aom_wb_write_bit(wb, cm->delta_q_present_flag);
- if (cm->delta_q_present_flag) {
- aom_wb_write_literal(wb, get_msb(cm->delta_q_res), 2);
- xd->current_qindex = cm->base_qindex;
- if (cm->allow_intrabc)
- assert(cm->delta_lf_present_flag == 0);
- else
- aom_wb_write_bit(wb, cm->delta_lf_present_flag);
- if (cm->delta_lf_present_flag) {
- aom_wb_write_literal(wb, get_msb(cm->delta_lf_res), 2);
- aom_wb_write_bit(wb, cm->delta_lf_multi);
- av1_reset_loop_filter_delta(xd, av1_num_planes(cm));
- }
- }
- }
-
- if (cm->all_lossless) {
- assert(!av1_superres_scaled(cm));
- } else {
- if (!cm->coded_lossless) {
- encode_loopfilter(cm, wb);
- encode_cdef(cm, wb);
- }
- encode_restoration_mode(cm, wb);
- }
-
- write_tx_mode(cm, &cm->tx_mode, wb);
-
- if (cpi->allow_comp_inter_inter) {
- const int use_hybrid_pred = cm->reference_mode == REFERENCE_MODE_SELECT;
-
- aom_wb_write_bit(wb, use_hybrid_pred);
- }
-
- if (cm->is_skip_mode_allowed) aom_wb_write_bit(wb, cm->skip_mode_flag);
-
- if (frame_might_allow_warped_motion(cm))
- aom_wb_write_bit(wb, cm->allow_warped_motion);
- else
- assert(!cm->allow_warped_motion);
-
- aom_wb_write_bit(wb, cm->reduced_tx_set_used);
-
- if (!frame_is_intra_only(cm)) write_global_motion(cpi, wb);
-
- if (seq_params->film_grain_params_present &&
- (cm->show_frame || cm->showable_frame)) {
- int flip_back_update_parameters_flag = 0;
- if (cm->frame_type != INTER_FRAME &&
- cm->film_grain_params.update_parameters == 0) {
- cm->film_grain_params.update_parameters = 1;
- flip_back_update_parameters_flag = 1;
- }
- write_film_grain_params(cpi, wb);
-
- if (flip_back_update_parameters_flag)
- cm->film_grain_params.update_parameters = 0;
- }
-
- if (cm->large_scale_tile) write_ext_tile_info(cm, saved_wb, wb);
-}
-
-static int choose_size_bytes(uint32_t size, int spare_msbs) {
- // Choose the number of bytes required to represent size, without
- // using the 'spare_msbs' number of most significant bits.
-
- // Make sure we will fit in 4 bytes to start with..
- if (spare_msbs > 0 && size >> (32 - spare_msbs) != 0) return -1;
-
- // Normalise to 32 bits
- size <<= spare_msbs;
-
- if (size >> 24 != 0)
- return 4;
- else if (size >> 16 != 0)
- return 3;
- else if (size >> 8 != 0)
- return 2;
- else
- return 1;
-}
-
-static void mem_put_varsize(uint8_t *const dst, const int sz, const int val) {
- switch (sz) {
- case 1: dst[0] = (uint8_t)(val & 0xff); break;
- case 2: mem_put_le16(dst, val); break;
- case 3: mem_put_le24(dst, val); break;
- case 4: mem_put_le32(dst, val); break;
- default: assert(0 && "Invalid size"); break;
- }
-}
-
-static int remux_tiles(const AV1_COMMON *const cm, uint8_t *dst,
- const uint32_t data_size, const uint32_t max_tile_size,
- const uint32_t max_tile_col_size,
- int *const tile_size_bytes,
- int *const tile_col_size_bytes) {
- // Choose the tile size bytes (tsb) and tile column size bytes (tcsb)
- int tsb;
- int tcsb;
-
- if (cm->large_scale_tile) {
- // The top bit in the tile size field indicates tile copy mode, so we
- // have 1 less bit to code the tile size
- tsb = choose_size_bytes(max_tile_size, 1);
- tcsb = choose_size_bytes(max_tile_col_size, 0);
- } else {
- tsb = choose_size_bytes(max_tile_size, 0);
- tcsb = 4; // This is ignored
- (void)max_tile_col_size;
- }
-
- assert(tsb > 0);
- assert(tcsb > 0);
-
- *tile_size_bytes = tsb;
- *tile_col_size_bytes = tcsb;
- if (tsb == 4 && tcsb == 4) return data_size;
-
- uint32_t wpos = 0;
- uint32_t rpos = 0;
-
- if (cm->large_scale_tile) {
- int tile_row;
- int tile_col;
-
- for (tile_col = 0; tile_col < cm->tile_cols; tile_col++) {
- // All but the last column has a column header
- if (tile_col < cm->tile_cols - 1) {
- uint32_t tile_col_size = mem_get_le32(dst + rpos);
- rpos += 4;
-
- // Adjust the tile column size by the number of bytes removed
- // from the tile size fields.
- tile_col_size -= (4 - tsb) * cm->tile_rows;
-
- mem_put_varsize(dst + wpos, tcsb, tile_col_size);
- wpos += tcsb;
- }
-
- for (tile_row = 0; tile_row < cm->tile_rows; tile_row++) {
- // All, including the last row has a header
- uint32_t tile_header = mem_get_le32(dst + rpos);
- rpos += 4;
-
- // If this is a copy tile, we need to shift the MSB to the
- // top bit of the new width, and there is no data to copy.
- if (tile_header >> 31 != 0) {
- if (tsb < 4) tile_header >>= 32 - 8 * tsb;
- mem_put_varsize(dst + wpos, tsb, tile_header);
- wpos += tsb;
- } else {
- mem_put_varsize(dst + wpos, tsb, tile_header);
- wpos += tsb;
-
- tile_header += AV1_MIN_TILE_SIZE_BYTES;
- memmove(dst + wpos, dst + rpos, tile_header);
- rpos += tile_header;
- wpos += tile_header;
- }
- }
- }
-
- assert(rpos > wpos);
- assert(rpos == data_size);
-
- return wpos;
- }
- const int n_tiles = cm->tile_cols * cm->tile_rows;
- int n;
-
- for (n = 0; n < n_tiles; n++) {
- int tile_size;
-
- if (n == n_tiles - 1) {
- tile_size = data_size - rpos;
- } else {
- tile_size = mem_get_le32(dst + rpos);
- rpos += 4;
- mem_put_varsize(dst + wpos, tsb, tile_size);
- tile_size += AV1_MIN_TILE_SIZE_BYTES;
- wpos += tsb;
- }
-
- memmove(dst + wpos, dst + rpos, tile_size);
-
- rpos += tile_size;
- wpos += tile_size;
- }
-
- assert(rpos > wpos);
- assert(rpos == data_size);
-
- return wpos;
-}
-
-uint32_t write_obu_header(OBU_TYPE obu_type, int obu_extension,
- uint8_t *const dst) {
- struct aom_write_bit_buffer wb = { dst, 0 };
- uint32_t size = 0;
-
- aom_wb_write_literal(&wb, 0, 1); // forbidden bit.
- aom_wb_write_literal(&wb, (int)obu_type, 4);
- aom_wb_write_literal(&wb, obu_extension ? 1 : 0, 1);
- aom_wb_write_literal(&wb, 1, 1); // obu_has_payload_length_field
- aom_wb_write_literal(&wb, 0, 1); // reserved
-
- if (obu_extension) {
- aom_wb_write_literal(&wb, obu_extension & 0xFF, 8);
- }
-
- size = aom_wb_bytes_written(&wb);
- return size;
-}
-
-int write_uleb_obu_size(uint32_t obu_header_size, uint32_t obu_payload_size,
- uint8_t *dest) {
- const uint32_t obu_size = obu_payload_size;
- const uint32_t offset = obu_header_size;
- size_t coded_obu_size = 0;
-
- if (aom_uleb_encode(obu_size, sizeof(obu_size), dest + offset,
- &coded_obu_size) != 0) {
- return AOM_CODEC_ERROR;
- }
-
- return AOM_CODEC_OK;
-}
-
-static size_t obu_memmove(uint32_t obu_header_size, uint32_t obu_payload_size,
- uint8_t *data) {
- const size_t length_field_size = aom_uleb_size_in_bytes(obu_payload_size);
- const uint32_t move_dst_offset =
- (uint32_t)length_field_size + obu_header_size;
- const uint32_t move_src_offset = obu_header_size;
- const uint32_t move_size = obu_payload_size;
- memmove(data + move_dst_offset, data + move_src_offset, move_size);
- return length_field_size;
-}
-
-static void add_trailing_bits(struct aom_write_bit_buffer *wb) {
- if (aom_wb_is_byte_aligned(wb)) {
- aom_wb_write_literal(wb, 0x80, 8);
- } else {
- // assumes that the other bits are already 0s
- aom_wb_write_bit(wb, 1);
- }
-}
-
-static void write_bitstream_level(BitstreamLevel bl,
- struct aom_write_bit_buffer *wb) {
- uint8_t seq_level_idx = major_minor_to_seq_level_idx(bl);
- assert(is_valid_seq_level_idx(seq_level_idx));
- aom_wb_write_literal(wb, seq_level_idx, LEVEL_BITS);
-}
-
-uint32_t write_sequence_header_obu(AV1_COMP *cpi, uint8_t *const dst) {
- AV1_COMMON *const cm = &cpi->common;
- struct aom_write_bit_buffer wb = { dst, 0 };
- uint32_t size = 0;
-
- write_profile(cm->seq_params.profile, &wb);
-
- // Still picture or not
- aom_wb_write_bit(&wb, cm->seq_params.still_picture);
- assert(IMPLIES(!cm->seq_params.still_picture,
- !cm->seq_params.reduced_still_picture_hdr));
- // whether to use reduced still picture header
- aom_wb_write_bit(&wb, cm->seq_params.reduced_still_picture_hdr);
-
- if (cm->seq_params.reduced_still_picture_hdr) {
- assert(cm->timing_info_present == 0);
- assert(cm->seq_params.decoder_model_info_present_flag == 0);
- assert(cm->seq_params.display_model_info_present_flag == 0);
- write_bitstream_level(cm->seq_params.level[0], &wb);
- } else {
- aom_wb_write_bit(&wb, cm->timing_info_present); // timing info present flag
-
- if (cm->timing_info_present) {
- // timing_info
- write_timing_info_header(cm, &wb);
- aom_wb_write_bit(&wb, cm->seq_params.decoder_model_info_present_flag);
- if (cm->seq_params.decoder_model_info_present_flag) {
- write_decoder_model_info(cm, &wb);
- }
- }
- aom_wb_write_bit(&wb, cm->seq_params.display_model_info_present_flag);
- aom_wb_write_literal(&wb, cm->seq_params.operating_points_cnt_minus_1,
- OP_POINTS_CNT_MINUS_1_BITS);
- int i;
- for (i = 0; i < cm->seq_params.operating_points_cnt_minus_1 + 1; i++) {
- aom_wb_write_literal(&wb, cm->seq_params.operating_point_idc[i],
- OP_POINTS_IDC_BITS);
- write_bitstream_level(cm->seq_params.level[i], &wb);
- if (cm->seq_params.level[i].major > 3)
- aom_wb_write_bit(&wb, cm->seq_params.tier[i]);
- if (cm->seq_params.decoder_model_info_present_flag) {
- aom_wb_write_bit(&wb,
- cm->op_params[i].decoder_model_param_present_flag);
- if (cm->op_params[i].decoder_model_param_present_flag)
- write_dec_model_op_parameters(cm, &wb, i);
- }
- if (cm->seq_params.display_model_info_present_flag) {
- aom_wb_write_bit(&wb,
- cm->op_params[i].display_model_param_present_flag);
- if (cm->op_params[i].display_model_param_present_flag) {
- assert(cm->op_params[i].initial_display_delay <= 10);
- aom_wb_write_literal(&wb, cm->op_params[i].initial_display_delay - 1,
- 4);
- }
- }
- }
- }
- write_sequence_header(cpi, &wb);
-
- write_color_config(&cm->seq_params, &wb);
-
- aom_wb_write_bit(&wb, cm->seq_params.film_grain_params_present);
-
- add_trailing_bits(&wb);
-
- size = aom_wb_bytes_written(&wb);
- return size;
-}
-
-static uint32_t write_frame_header_obu(AV1_COMP *cpi,
- struct aom_write_bit_buffer *saved_wb,
- uint8_t *const dst,
- int append_trailing_bits) {
- struct aom_write_bit_buffer wb = { dst, 0 };
- write_uncompressed_header_obu(cpi, saved_wb, &wb);
- if (append_trailing_bits) add_trailing_bits(&wb);
- return aom_wb_bytes_written(&wb);
-}
-
-static uint32_t write_tile_group_header(uint8_t *const dst, int startTile,
- int endTile, int tiles_log2,
- int tile_start_and_end_present_flag) {
- struct aom_write_bit_buffer wb = { dst, 0 };
- uint32_t size = 0;
-
- if (!tiles_log2) return size;
-
- aom_wb_write_bit(&wb, tile_start_and_end_present_flag);
-
- if (tile_start_and_end_present_flag) {
- aom_wb_write_literal(&wb, startTile, tiles_log2);
- aom_wb_write_literal(&wb, endTile, tiles_log2);
- }
-
- size = aom_wb_bytes_written(&wb);
- return size;
-}
-
-typedef struct {
- uint8_t *frame_header;
- size_t obu_header_byte_offset;
- size_t total_length;
-} FrameHeaderInfo;
-
-static uint32_t write_tiles_in_tg_obus(AV1_COMP *const cpi, uint8_t *const dst,
- struct aom_write_bit_buffer *saved_wb,
- uint8_t obu_extension_header,
- const FrameHeaderInfo *fh_info) {
- AV1_COMMON *const cm = &cpi->common;
- aom_writer mode_bc;
- int tile_row, tile_col;
- TileBufferEnc(*const tile_buffers)[MAX_TILE_COLS] = cpi->tile_buffers;
- uint32_t total_size = 0;
- const int tile_cols = cm->tile_cols;
- const int tile_rows = cm->tile_rows;
- unsigned int tile_size = 0;
- unsigned int max_tile_size = 0;
- unsigned int max_tile_col_size = 0;
- const int n_log2_tiles = cm->log2_tile_rows + cm->log2_tile_cols;
- // Fixed size tile groups for the moment
- const int num_tg_hdrs = cm->num_tg;
- const int tg_size =
- (cm->large_scale_tile)
- ? 1
- : (tile_rows * tile_cols + num_tg_hdrs - 1) / num_tg_hdrs;
- int tile_count = 0;
- int curr_tg_data_size = 0;
- uint8_t *data = dst;
- int new_tg = 1;
- const int have_tiles = tile_cols * tile_rows > 1;
- int first_tg = 1;
-
- cm->largest_tile_id = 0;
-
- if (cm->large_scale_tile) {
- // For large_scale_tile case, we always have only one tile group, so it can
- // be written as an OBU_FRAME.
- const OBU_TYPE obu_type = OBU_FRAME;
- const uint32_t tg_hdr_size = write_obu_header(obu_type, 0, data);
- data += tg_hdr_size;
-
- const uint32_t frame_header_size =
- write_frame_header_obu(cpi, saved_wb, data, 0);
- data += frame_header_size;
- total_size += frame_header_size;
-
-#define EXT_TILE_DEBUG 0
-#if EXT_TILE_DEBUG
- {
- char fn[20] = "./fh";
- fn[4] = cm->current_video_frame / 100 + '0';
- fn[5] = (cm->current_video_frame % 100) / 10 + '0';
- fn[6] = (cm->current_video_frame % 10) + '0';
- fn[7] = '\0';
- av1_print_uncompressed_frame_header(data - frame_header_size,
- frame_header_size, fn);
- }
-#endif // EXT_TILE_DEBUG
-#undef EXT_TILE_DEBUG
-
- int tile_size_bytes = 0;
- int tile_col_size_bytes = 0;
-
- for (tile_col = 0; tile_col < tile_cols; tile_col++) {
- TileInfo tile_info;
- const int is_last_col = (tile_col == tile_cols - 1);
- const uint32_t col_offset = total_size;
-
- av1_tile_set_col(&tile_info, cm, tile_col);
-
- // The last column does not have a column header
- if (!is_last_col) total_size += 4;
-
- for (tile_row = 0; tile_row < tile_rows; tile_row++) {
- TileBufferEnc *const buf = &tile_buffers[tile_row][tile_col];
- const int data_offset = have_tiles ? 4 : 0;
- const int tile_idx = tile_row * tile_cols + tile_col;
- TileDataEnc *this_tile = &cpi->tile_data[tile_idx];
- av1_tile_set_row(&tile_info, cm, tile_row);
-
- buf->data = dst + total_size + tg_hdr_size;
-
- // Is CONFIG_EXT_TILE = 1, every tile in the row has a header,
- // even for the last one, unless no tiling is used at all.
- total_size += data_offset;
- // Initialise tile context from the frame context
- this_tile->tctx = *cm->fc;
- cpi->td.mb.e_mbd.tile_ctx = &this_tile->tctx;
- mode_bc.allow_update_cdf = !cm->large_scale_tile;
- mode_bc.allow_update_cdf =
- mode_bc.allow_update_cdf && !cm->disable_cdf_update;
- aom_start_encode(&mode_bc, buf->data + data_offset);
- write_modes(cpi, &tile_info, &mode_bc, tile_row, tile_col);
- aom_stop_encode(&mode_bc);
- tile_size = mode_bc.pos;
- buf->size = tile_size;
-
- // Record the maximum tile size we see, so we can compact headers later.
- if (tile_size > max_tile_size) {
- max_tile_size = tile_size;
- cm->largest_tile_id = tile_cols * tile_row + tile_col;
- }
-
- if (have_tiles) {
- // tile header: size of this tile, or copy offset
- uint32_t tile_header = tile_size - AV1_MIN_TILE_SIZE_BYTES;
- const int tile_copy_mode =
- ((AOMMAX(cm->tile_width, cm->tile_height) << MI_SIZE_LOG2) <= 256)
- ? 1
- : 0;
-
- // If tile_copy_mode = 1, check if this tile is a copy tile.
- // Very low chances to have copy tiles on the key frames, so don't
- // search on key frames to reduce unnecessary search.
- if (cm->frame_type != KEY_FRAME && tile_copy_mode) {
- const int identical_tile_offset =
- find_identical_tile(tile_row, tile_col, tile_buffers);
-
- if (identical_tile_offset > 0) {
- tile_size = 0;
- tile_header = identical_tile_offset | 0x80;
- tile_header <<= 24;
- }
- }
-
- mem_put_le32(buf->data, tile_header);
- }
-
- total_size += tile_size;
- }
-
- if (!is_last_col) {
- uint32_t col_size = total_size - col_offset - 4;
- mem_put_le32(dst + col_offset + tg_hdr_size, col_size);
-
- // Record the maximum tile column size we see.
- max_tile_col_size = AOMMAX(max_tile_col_size, col_size);
- }
- }
-
- if (have_tiles) {
- total_size = remux_tiles(cm, data, total_size - frame_header_size,
- max_tile_size, max_tile_col_size,
- &tile_size_bytes, &tile_col_size_bytes);
- total_size += frame_header_size;
- }
-
- // In EXT_TILE case, only use 1 tile group. Follow the obu syntax, write
- // current tile group size before tile data(include tile column header).
- // Tile group size doesn't include the bytes storing tg size.
- total_size += tg_hdr_size;
- const uint32_t obu_payload_size = total_size - tg_hdr_size;
- const size_t length_field_size =
- obu_memmove(tg_hdr_size, obu_payload_size, dst);
- if (write_uleb_obu_size(tg_hdr_size, obu_payload_size, dst) !=
- AOM_CODEC_OK) {
- assert(0);
- }
- total_size += (uint32_t)length_field_size;
- saved_wb->bit_buffer += length_field_size;
-
- // Now fill in the gaps in the uncompressed header.
- if (have_tiles) {
- assert(tile_col_size_bytes >= 1 && tile_col_size_bytes <= 4);
- aom_wb_overwrite_literal(saved_wb, tile_col_size_bytes - 1, 2);
-
- assert(tile_size_bytes >= 1 && tile_size_bytes <= 4);
- aom_wb_overwrite_literal(saved_wb, tile_size_bytes - 1, 2);
- }
- return total_size;
- }
-
- uint32_t obu_header_size = 0;
- uint8_t *tile_data_start = dst + total_size;
- for (tile_row = 0; tile_row < tile_rows; tile_row++) {
- TileInfo tile_info;
- av1_tile_set_row(&tile_info, cm, tile_row);
-
- for (tile_col = 0; tile_col < tile_cols; tile_col++) {
- const int tile_idx = tile_row * tile_cols + tile_col;
- TileBufferEnc *const buf = &tile_buffers[tile_row][tile_col];
- TileDataEnc *this_tile = &cpi->tile_data[tile_idx];
- int is_last_tile_in_tg = 0;
-
- if (new_tg) {
- data = dst + total_size;
-
- // A new tile group begins at this tile. Write the obu header and
- // tile group header
- const OBU_TYPE obu_type =
- (num_tg_hdrs == 1) ? OBU_FRAME : OBU_TILE_GROUP;
- curr_tg_data_size =
- write_obu_header(obu_type, obu_extension_header, data);
- obu_header_size = curr_tg_data_size;
-
- if (num_tg_hdrs == 1) {
- curr_tg_data_size += write_frame_header_obu(
- cpi, saved_wb, data + curr_tg_data_size, 0);
- }
- curr_tg_data_size += write_tile_group_header(
- data + curr_tg_data_size, tile_idx,
- AOMMIN(tile_idx + tg_size - 1, tile_cols * tile_rows - 1),
- n_log2_tiles, cm->num_tg > 1);
- total_size += curr_tg_data_size;
- tile_data_start += curr_tg_data_size;
- new_tg = 0;
- tile_count = 0;
- }
- tile_count++;
- av1_tile_set_col(&tile_info, cm, tile_col);
-
- if (tile_count == tg_size || tile_idx == (tile_cols * tile_rows - 1)) {
- is_last_tile_in_tg = 1;
- new_tg = 1;
- } else {
- is_last_tile_in_tg = 0;
- }
-
- buf->data = dst + total_size;
-
- // The last tile of the tile group does not have a header.
- if (!is_last_tile_in_tg) total_size += 4;
-
- // Initialise tile context from the frame context
- this_tile->tctx = *cm->fc;
- cpi->td.mb.e_mbd.tile_ctx = &this_tile->tctx;
- mode_bc.allow_update_cdf = 1;
- mode_bc.allow_update_cdf =
- mode_bc.allow_update_cdf && !cm->disable_cdf_update;
- const int num_planes = av1_num_planes(cm);
- av1_reset_loop_restoration(&cpi->td.mb.e_mbd, num_planes);
-
- aom_start_encode(&mode_bc, dst + total_size);
- write_modes(cpi, &tile_info, &mode_bc, tile_row, tile_col);
- aom_stop_encode(&mode_bc);
- tile_size = mode_bc.pos;
- assert(tile_size >= AV1_MIN_TILE_SIZE_BYTES);
-
- curr_tg_data_size += (tile_size + (is_last_tile_in_tg ? 0 : 4));
- buf->size = tile_size;
- if (tile_size > max_tile_size) {
- cm->largest_tile_id = tile_cols * tile_row + tile_col;
- max_tile_size = tile_size;
- }
-
- if (!is_last_tile_in_tg) {
- // size of this tile
- mem_put_le32(buf->data, tile_size - AV1_MIN_TILE_SIZE_BYTES);
- } else {
- // write current tile group size
- const uint32_t obu_payload_size = curr_tg_data_size - obu_header_size;
- const size_t length_field_size =
- obu_memmove(obu_header_size, obu_payload_size, data);
- if (write_uleb_obu_size(obu_header_size, obu_payload_size, data) !=
- AOM_CODEC_OK) {
- assert(0);
- }
- curr_tg_data_size += (int)length_field_size;
- total_size += (uint32_t)length_field_size;
- tile_data_start += length_field_size;
- if (num_tg_hdrs == 1) {
- // if this tg is combined with the frame header then update saved
- // frame header base offset accroding to length field size
- saved_wb->bit_buffer += length_field_size;
- }
-
- if (!first_tg && cm->error_resilient_mode) {
- // Make room for a duplicate Frame Header OBU.
- memmove(data + fh_info->total_length, data, curr_tg_data_size);
-
- // Insert a copy of the Frame Header OBU.
- memcpy(data, fh_info->frame_header, fh_info->total_length);
-
- // Force context update tile to be the first tile in error
- // resiliant mode as the duplicate frame headers will have
- // context_update_tile_id set to 0
- cm->largest_tile_id = 0;
-
- // Rewrite the OBU header to change the OBU type to Redundant Frame
- // Header.
- write_obu_header(OBU_REDUNDANT_FRAME_HEADER, obu_extension_header,
- &data[fh_info->obu_header_byte_offset]);
-
- data += fh_info->total_length;
-
- curr_tg_data_size += (int)(fh_info->total_length);
- total_size += (uint32_t)(fh_info->total_length);
- }
- first_tg = 0;
- }
-
- total_size += tile_size;
- }
- }
-
- if (have_tiles) {
- // Fill in context_update_tile_id indicating the tile to use for the
- // cdf update. The encoder currently sets it to the largest tile
- // (but is up to the encoder)
- aom_wb_overwrite_literal(saved_wb, cm->largest_tile_id,
- cm->log2_tile_cols + cm->log2_tile_rows);
- // If more than one tile group. tile_size_bytes takes the default value 4
- // and does not need to be set. For a single tile group it is set in the
- // section below.
- if (num_tg_hdrs == 1) {
- int tile_size_bytes = 4, unused;
- const uint32_t tile_data_offset = (uint32_t)(tile_data_start - dst);
- const uint32_t tile_data_size = total_size - tile_data_offset;
-
- total_size =
- remux_tiles(cm, tile_data_start, tile_data_size, max_tile_size,
- max_tile_col_size, &tile_size_bytes, &unused);
- total_size += tile_data_offset;
- assert(tile_size_bytes >= 1 && tile_size_bytes <= 4);
-
- aom_wb_overwrite_literal(saved_wb, tile_size_bytes - 1, 2);
-
- // Update the OBU length if remux_tiles() reduced the size.
- uint64_t payload_size;
- size_t length_field_size;
- int res =
- aom_uleb_decode(dst + obu_header_size, total_size - obu_header_size,
- &payload_size, &length_field_size);
- assert(res == 0);
- (void)res;
-
- const uint64_t new_payload_size =
- total_size - obu_header_size - length_field_size;
- if (new_payload_size != payload_size) {
- size_t new_length_field_size;
- res = aom_uleb_encode(new_payload_size, length_field_size,
- dst + obu_header_size, &new_length_field_size);
- assert(res == 0);
- if (new_length_field_size < length_field_size) {
- const size_t src_offset = obu_header_size + length_field_size;
- const size_t dst_offset = obu_header_size + new_length_field_size;
- memmove(dst + dst_offset, dst + src_offset, (size_t)payload_size);
- total_size -= (int)(length_field_size - new_length_field_size);
- }
- }
- }
- }
- return total_size;
-}
-
-int av1_pack_bitstream(AV1_COMP *const cpi, uint8_t *dst, size_t *size) {
- uint8_t *data = dst;
- uint32_t data_size;
- AV1_COMMON *const cm = &cpi->common;
- uint32_t obu_header_size = 0;
- uint32_t obu_payload_size = 0;
- FrameHeaderInfo fh_info = { NULL, 0, 0 };
- const uint8_t obu_extension_header =
- cm->temporal_layer_id << 5 | cm->spatial_layer_id << 3 | 0;
-
-#if CONFIG_BITSTREAM_DEBUG
- bitstream_queue_reset_write();
-#endif
-
- // The TD is now written outside the frame encode loop
-
- // write sequence header obu if KEY_FRAME, preceded by 4-byte size
- if (cm->frame_type == KEY_FRAME && cm->show_frame) {
- obu_header_size = write_obu_header(OBU_SEQUENCE_HEADER, 0, data);
-
- obu_payload_size = write_sequence_header_obu(cpi, data + obu_header_size);
- const size_t length_field_size =
- obu_memmove(obu_header_size, obu_payload_size, data);
- if (write_uleb_obu_size(obu_header_size, obu_payload_size, data) !=
- AOM_CODEC_OK) {
- return AOM_CODEC_ERROR;
- }
-
- data += obu_header_size + obu_payload_size + length_field_size;
- }
-
- const int write_frame_header =
- (cm->num_tg > 1 || encode_show_existing_frame(cm));
- struct aom_write_bit_buffer saved_wb;
- if (write_frame_header) {
- // Write Frame Header OBU.
- fh_info.frame_header = data;
- obu_header_size =
- write_obu_header(OBU_FRAME_HEADER, obu_extension_header, data);
- obu_payload_size =
- write_frame_header_obu(cpi, &saved_wb, data + obu_header_size, 1);
-
- const size_t length_field_size =
- obu_memmove(obu_header_size, obu_payload_size, data);
- if (write_uleb_obu_size(obu_header_size, obu_payload_size, data) !=
- AOM_CODEC_OK) {
- return AOM_CODEC_ERROR;
- }
-
- fh_info.obu_header_byte_offset = 0;
- fh_info.total_length =
- obu_header_size + obu_payload_size + length_field_size;
- data += fh_info.total_length;
-
- // Since length_field_size is determined adaptively after frame header
- // encoding, saved_wb must be adjusted accordingly.
- saved_wb.bit_buffer += length_field_size;
- }
-
- if (encode_show_existing_frame(cm)) {
- data_size = 0;
- } else {
- // Each tile group obu will be preceded by 4-byte size of the tile group
- // obu
- data_size = write_tiles_in_tg_obus(cpi, data, &saved_wb,
- obu_extension_header, &fh_info);
- }
- data += data_size;
- *size = data - dst;
- return AOM_CODEC_OK;
-}
diff --git a/third_party/aom/av1/encoder/bitstream.h b/third_party/aom/av1/encoder/bitstream.h
deleted file mode 100644
index 465ccaed5..000000000
--- a/third_party/aom/av1/encoder/bitstream.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_BITSTREAM_H_
-#define AOM_AV1_ENCODER_BITSTREAM_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "av1/encoder/encoder.h"
-
-struct aom_write_bit_buffer;
-
-// Writes only the OBU Sequence Header payload, and returns the size of the
-// payload written to 'dst'. This function does not write the OBU header, the
-// optional extension, or the OBU size to 'dst'.
-uint32_t write_sequence_header_obu(AV1_COMP *cpi, uint8_t *const dst);
-
-// Writes the OBU header byte, and the OBU header extension byte when
-// 'obu_extension' is non-zero. Returns number of bytes written to 'dst'.
-uint32_t write_obu_header(OBU_TYPE obu_type, int obu_extension,
- uint8_t *const dst);
-
-int write_uleb_obu_size(uint32_t obu_header_size, uint32_t obu_payload_size,
- uint8_t *dest);
-
-int av1_pack_bitstream(AV1_COMP *const cpi, uint8_t *dest, size_t *size);
-
-static INLINE int av1_preserve_existing_gf(AV1_COMP *cpi) {
- // Do not swap gf and arf indices for internal overlay frames
- return cpi->rc.is_src_frame_alt_ref && !cpi->rc.is_src_frame_ext_arf;
-}
-
-void av1_write_tx_type(const AV1_COMMON *const cm, const MACROBLOCKD *xd,
- int blk_row, int blk_col, int plane, TX_SIZE tx_size,
- aom_writer *w);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_ENCODER_BITSTREAM_H_
diff --git a/third_party/aom/av1/encoder/block.h b/third_party/aom/av1/encoder/block.h
deleted file mode 100644
index 0bc5dea82..000000000
--- a/third_party/aom/av1/encoder/block.h
+++ /dev/null
@@ -1,452 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_BLOCK_H_
-#define AOM_AV1_ENCODER_BLOCK_H_
-
-#include "av1/common/entropymv.h"
-#include "av1/common/entropy.h"
-#include "av1/common/mvref_common.h"
-#include "av1/encoder/hash.h"
-#if CONFIG_DIST_8X8
-#include "aom/aomcx.h"
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct {
- unsigned int sse;
- int sum;
- unsigned int var;
-} DIFF;
-
-typedef struct macroblock_plane {
- DECLARE_ALIGNED(16, int16_t, src_diff[MAX_SB_SQUARE]);
- tran_low_t *qcoeff;
- tran_low_t *coeff;
- uint16_t *eobs;
- uint8_t *txb_entropy_ctx;
- struct buf_2d src;
-
- // Quantizer setings
- // These are used/accessed only in the quantization process
- // RDO does not / must not depend on any of these values
- // All values below share the coefficient scale/shift used in TX
- const int16_t *quant_fp_QTX;
- const int16_t *round_fp_QTX;
- const int16_t *quant_QTX;
- const int16_t *quant_shift_QTX;
- const int16_t *zbin_QTX;
- const int16_t *round_QTX;
- const int16_t *dequant_QTX;
-} MACROBLOCK_PLANE;
-
-typedef struct {
- int txb_skip_cost[TXB_SKIP_CONTEXTS][2];
- int base_eob_cost[SIG_COEF_CONTEXTS_EOB][3];
- int base_cost[SIG_COEF_CONTEXTS][4];
- int eob_extra_cost[EOB_COEF_CONTEXTS][2];
- int dc_sign_cost[DC_SIGN_CONTEXTS][2];
- int lps_cost[LEVEL_CONTEXTS][COEFF_BASE_RANGE + 1];
-} LV_MAP_COEFF_COST;
-
-typedef struct {
- int eob_cost[2][11];
-} LV_MAP_EOB_COST;
-
-typedef struct {
- tran_low_t tcoeff[MAX_MB_PLANE][MAX_SB_SQUARE];
- uint16_t eobs[MAX_MB_PLANE][MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
- uint8_t txb_skip_ctx[MAX_MB_PLANE]
- [MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
- int dc_sign_ctx[MAX_MB_PLANE]
- [MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
-} CB_COEFF_BUFFER;
-
-typedef struct {
- int16_t mode_context[MODE_CTX_REF_FRAMES];
- // TODO(angiebird): Reduce the buffer size according to sb_type
- tran_low_t *tcoeff[MAX_MB_PLANE];
- uint16_t *eobs[MAX_MB_PLANE];
- uint8_t *txb_skip_ctx[MAX_MB_PLANE];
- int *dc_sign_ctx[MAX_MB_PLANE];
- uint8_t ref_mv_count[MODE_CTX_REF_FRAMES];
- CANDIDATE_MV ref_mv_stack[MODE_CTX_REF_FRAMES][MAX_REF_MV_STACK_SIZE];
- int_mv global_mvs[REF_FRAMES];
- int16_t compound_mode_context[MODE_CTX_REF_FRAMES];
-} MB_MODE_INFO_EXT;
-
-typedef struct {
- int col_min;
- int col_max;
- int row_min;
- int row_max;
-} MvLimits;
-
-typedef struct {
- uint8_t best_palette_color_map[MAX_PALETTE_SQUARE];
- int kmeans_data_buf[2 * MAX_PALETTE_SQUARE];
-} PALETTE_BUFFER;
-
-typedef struct {
- TX_SIZE tx_size;
- TX_SIZE inter_tx_size[INTER_TX_SIZE_BUF_LEN];
- uint8_t blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
- TX_TYPE txk_type[TXK_TYPE_BUF_LEN];
- RD_STATS rd_stats;
- uint32_t hash_value;
-} MB_RD_INFO;
-
-#define RD_RECORD_BUFFER_LEN 8
-typedef struct {
- MB_RD_INFO tx_rd_info[RD_RECORD_BUFFER_LEN]; // Circular buffer.
- int index_start;
- int num;
- CRC32C crc_calculator; // Hash function.
-} MB_RD_RECORD;
-
-typedef struct {
- int64_t dist;
- int64_t sse;
- int rate;
- uint16_t eob;
- TX_TYPE tx_type;
- uint16_t entropy_context;
- uint8_t txb_entropy_ctx;
- uint8_t valid;
- uint8_t fast; // This is not being used now.
-} TXB_RD_INFO;
-
-#define TX_SIZE_RD_RECORD_BUFFER_LEN 256
-typedef struct {
- uint32_t hash_vals[TX_SIZE_RD_RECORD_BUFFER_LEN];
- TXB_RD_INFO tx_rd_info[TX_SIZE_RD_RECORD_BUFFER_LEN];
- int index_start;
- int num;
-} TXB_RD_RECORD;
-
-typedef struct tx_size_rd_info_node {
- TXB_RD_INFO *rd_info_array; // Points to array of size TX_TYPES.
- struct tx_size_rd_info_node *children[4];
-} TXB_RD_INFO_NODE;
-
-// Region size for mode decision sampling in the first pass of partition
-// search(two_pass_partition_search speed feature), in units of mi size(4).
-// Used by the mode_pruning_based_on_two_pass_partition_search speed feature.
-#define FIRST_PARTITION_PASS_SAMPLE_REGION 8
-#define FIRST_PARTITION_PASS_SAMPLE_REGION_LOG2 3
-#define FIRST_PARTITION_PASS_STATS_TABLES \
- (MAX_MIB_SIZE >> FIRST_PARTITION_PASS_SAMPLE_REGION_LOG2) * \
- (MAX_MIB_SIZE >> FIRST_PARTITION_PASS_SAMPLE_REGION_LOG2)
-#define FIRST_PARTITION_PASS_STATS_STRIDE \
- (MAX_MIB_SIZE_LOG2 - FIRST_PARTITION_PASS_SAMPLE_REGION_LOG2)
-
-static INLINE int av1_first_partition_pass_stats_index(int mi_row, int mi_col) {
- const int row =
- (mi_row & MAX_MIB_MASK) >> FIRST_PARTITION_PASS_SAMPLE_REGION_LOG2;
- const int col =
- (mi_col & MAX_MIB_MASK) >> FIRST_PARTITION_PASS_SAMPLE_REGION_LOG2;
- return (row << FIRST_PARTITION_PASS_STATS_STRIDE) + col;
-}
-
-typedef struct {
- uint8_t ref0_counts[REF_FRAMES]; // Counters for ref_frame[0].
- uint8_t ref1_counts[REF_FRAMES]; // Counters for ref_frame[1].
- int sample_counts; // Number of samples collected.
-} FIRST_PARTITION_PASS_STATS;
-
-#define MAX_INTERP_FILTER_STATS 64
-typedef struct {
- InterpFilters filters;
- int_mv mv[2];
- int8_t ref_frames[2];
- COMPOUND_TYPE comp_type;
-} INTERPOLATION_FILTER_STATS;
-
-typedef struct macroblock MACROBLOCK;
-struct macroblock {
- struct macroblock_plane plane[MAX_MB_PLANE];
-
- // Determine if one would go with reduced complexity transform block
- // search model to select prediction modes, or full complexity model
- // to select transform kernel.
- int rd_model;
-
- // Indicate if the encoder is running in the first pass partition search.
- // In that case, apply certain speed features therein to reduce the overhead
- // cost in the first pass search.
- int cb_partition_scan;
-
- FIRST_PARTITION_PASS_STATS
- first_partition_pass_stats[FIRST_PARTITION_PASS_STATS_TABLES];
-
- // [comp_idx][saved stat_idx]
- INTERPOLATION_FILTER_STATS interp_filter_stats[2][MAX_INTERP_FILTER_STATS];
- int interp_filter_stats_idx[2];
-
- // Activate constrained coding block partition search range.
- int use_cb_search_range;
-
- // Inter macroblock RD search info.
- MB_RD_RECORD mb_rd_record;
-
- // Inter transform block RD search info. for square TX sizes.
- TXB_RD_RECORD txb_rd_record_8X8[(MAX_MIB_SIZE >> 1) * (MAX_MIB_SIZE >> 1)];
- TXB_RD_RECORD txb_rd_record_16X16[(MAX_MIB_SIZE >> 2) * (MAX_MIB_SIZE >> 2)];
- TXB_RD_RECORD txb_rd_record_32X32[(MAX_MIB_SIZE >> 3) * (MAX_MIB_SIZE >> 3)];
- TXB_RD_RECORD txb_rd_record_64X64[(MAX_MIB_SIZE >> 4) * (MAX_MIB_SIZE >> 4)];
-
- // Intra transform block RD search info. for square TX sizes.
- TXB_RD_RECORD txb_rd_record_intra;
-
- MACROBLOCKD e_mbd;
- MB_MODE_INFO_EXT *mbmi_ext;
- int skip_block;
- int qindex;
-
- // The equivalent error at the current rdmult of one whole bit (not one
- // bitcost unit).
- int errorperbit;
- // The equivalend SAD error of one (whole) bit at the current quantizer
- // for large blocks.
- int sadperbit16;
- // The equivalend SAD error of one (whole) bit at the current quantizer
- // for sub-8x8 blocks.
- int sadperbit4;
- int rdmult;
- int mb_energy;
- int sb_energy_level;
- int *m_search_count_ptr;
- int *ex_search_count_ptr;
-
- unsigned int txb_split_count;
-
- // These are set to their default values at the beginning, and then adjusted
- // further in the encoding process.
- BLOCK_SIZE min_partition_size;
- BLOCK_SIZE max_partition_size;
-
- unsigned int max_mv_context[REF_FRAMES];
- unsigned int source_variance;
- unsigned int pred_sse[REF_FRAMES];
- int pred_mv_sad[REF_FRAMES];
-
- int *nmvjointcost;
- int nmv_vec_cost[MV_JOINTS];
- int *nmvcost[2];
- int *nmvcost_hp[2];
- int **mv_cost_stack;
- int **mvcost;
-
- int32_t *wsrc_buf;
- int32_t *mask_buf;
- uint8_t *above_pred_buf;
- uint8_t *left_pred_buf;
-
- PALETTE_BUFFER *palette_buffer;
-
- CONV_BUF_TYPE *tmp_conv_dst;
- uint8_t *tmp_obmc_bufs[2];
-
- // buffer for hash value calculation of a block
- // used only in av1_get_block_hash_value()
- // [first hash/second hash]
- // [two buffers used ping-pong]
- uint32_t *hash_value_buffer[2][2];
-
- CRC_CALCULATOR crc_calculator1;
- CRC_CALCULATOR crc_calculator2;
- int g_crc_initialized;
-
- // These define limits to motion vector components to prevent them
- // from extending outside the UMV borders
- MvLimits mv_limits;
-
- uint8_t blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
-
- int skip;
- int skip_chroma_rd;
- int skip_cost[SKIP_CONTEXTS][2];
-
- int skip_mode; // 0: off; 1: on
- int skip_mode_cost[SKIP_CONTEXTS][2];
-
- int compound_idx;
-
- LV_MAP_COEFF_COST coeff_costs[TX_SIZES][PLANE_TYPES];
- LV_MAP_EOB_COST eob_costs[7][2];
- uint16_t cb_offset;
-
- // mode costs
- int intra_inter_cost[INTRA_INTER_CONTEXTS][2];
-
- int mbmode_cost[BLOCK_SIZE_GROUPS][INTRA_MODES];
- int newmv_mode_cost[NEWMV_MODE_CONTEXTS][2];
- int zeromv_mode_cost[GLOBALMV_MODE_CONTEXTS][2];
- int refmv_mode_cost[REFMV_MODE_CONTEXTS][2];
- int drl_mode_cost0[DRL_MODE_CONTEXTS][2];
-
- int comp_inter_cost[COMP_INTER_CONTEXTS][2];
- int single_ref_cost[REF_CONTEXTS][SINGLE_REFS - 1][2];
- int comp_ref_type_cost[COMP_REF_TYPE_CONTEXTS]
- [CDF_SIZE(COMP_REFERENCE_TYPES)];
- int uni_comp_ref_cost[UNI_COMP_REF_CONTEXTS][UNIDIR_COMP_REFS - 1]
- [CDF_SIZE(2)];
- // Cost for signaling ref_frame[0] (LAST_FRAME, LAST2_FRAME, LAST3_FRAME or
- // GOLDEN_FRAME) in bidir-comp mode.
- int comp_ref_cost[REF_CONTEXTS][FWD_REFS - 1][2];
- // Cost for signaling ref_frame[1] (ALTREF_FRAME, ALTREF2_FRAME, or
- // BWDREF_FRAME) in bidir-comp mode.
- int comp_bwdref_cost[REF_CONTEXTS][BWD_REFS - 1][2];
- int inter_compound_mode_cost[INTER_MODE_CONTEXTS][INTER_COMPOUND_MODES];
- int compound_type_cost[BLOCK_SIZES_ALL][COMPOUND_TYPES - 1];
- int wedge_idx_cost[BLOCK_SIZES_ALL][16];
- int interintra_cost[BLOCK_SIZE_GROUPS][2];
- int wedge_interintra_cost[BLOCK_SIZES_ALL][2];
- int interintra_mode_cost[BLOCK_SIZE_GROUPS][INTERINTRA_MODES];
- int motion_mode_cost[BLOCK_SIZES_ALL][MOTION_MODES];
- int motion_mode_cost1[BLOCK_SIZES_ALL][2];
- int intra_uv_mode_cost[CFL_ALLOWED_TYPES][INTRA_MODES][UV_INTRA_MODES];
- int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES];
- int filter_intra_cost[BLOCK_SIZES_ALL][2];
- int filter_intra_mode_cost[FILTER_INTRA_MODES];
- int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS];
- int partition_cost[PARTITION_CONTEXTS][EXT_PARTITION_TYPES];
- int palette_y_size_cost[PALATTE_BSIZE_CTXS][PALETTE_SIZES];
- int palette_uv_size_cost[PALATTE_BSIZE_CTXS][PALETTE_SIZES];
- int palette_y_color_cost[PALETTE_SIZES][PALETTE_COLOR_INDEX_CONTEXTS]
- [PALETTE_COLORS];
- int palette_uv_color_cost[PALETTE_SIZES][PALETTE_COLOR_INDEX_CONTEXTS]
- [PALETTE_COLORS];
- int palette_y_mode_cost[PALATTE_BSIZE_CTXS][PALETTE_Y_MODE_CONTEXTS][2];
- int palette_uv_mode_cost[PALETTE_UV_MODE_CONTEXTS][2];
- // The rate associated with each alpha codeword
- int cfl_cost[CFL_JOINT_SIGNS][CFL_PRED_PLANES][CFL_ALPHABET_SIZE];
- int tx_size_cost[TX_SIZES - 1][TX_SIZE_CONTEXTS][TX_SIZES];
- int txfm_partition_cost[TXFM_PARTITION_CONTEXTS][2];
- int inter_tx_type_costs[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES];
- int intra_tx_type_costs[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES]
- [TX_TYPES];
- int angle_delta_cost[DIRECTIONAL_MODES][2 * MAX_ANGLE_DELTA + 1];
- int switchable_restore_cost[RESTORE_SWITCHABLE_TYPES];
- int wiener_restore_cost[2];
- int sgrproj_restore_cost[2];
- int intrabc_cost[2];
-
- // Used to store sub partition's choices.
- MV pred_mv[REF_FRAMES];
-
- // Store the best motion vector during motion search
- int_mv best_mv;
- // Store the second best motion vector during full-pixel motion search
- int_mv second_best_mv;
-
- // use default transform and skip transform type search for intra modes
- int use_default_intra_tx_type;
- // use default transform and skip transform type search for inter modes
- int use_default_inter_tx_type;
-#if CONFIG_DIST_8X8
- int using_dist_8x8;
- aom_tune_metric tune_metric;
-#endif // CONFIG_DIST_8X8
- int comp_idx_cost[COMP_INDEX_CONTEXTS][2];
- int comp_group_idx_cost[COMP_GROUP_IDX_CONTEXTS][2];
- // Bit flags for pruning tx type search, tx split, etc.
- int tx_search_prune[EXT_TX_SET_TYPES];
- int must_find_valid_partition;
- int tx_split_prune_flag; // Flag to skip tx split RD search.
- int recalc_luma_mc_data; // Flag to indicate recalculation of MC data during
- // interpolation filter search
-};
-
-static INLINE int is_rect_tx_allowed_bsize(BLOCK_SIZE bsize) {
- static const char LUT[BLOCK_SIZES_ALL] = {
- 0, // BLOCK_4X4
- 1, // BLOCK_4X8
- 1, // BLOCK_8X4
- 0, // BLOCK_8X8
- 1, // BLOCK_8X16
- 1, // BLOCK_16X8
- 0, // BLOCK_16X16
- 1, // BLOCK_16X32
- 1, // BLOCK_32X16
- 0, // BLOCK_32X32
- 1, // BLOCK_32X64
- 1, // BLOCK_64X32
- 0, // BLOCK_64X64
- 0, // BLOCK_64X128
- 0, // BLOCK_128X64
- 0, // BLOCK_128X128
- 1, // BLOCK_4X16
- 1, // BLOCK_16X4
- 1, // BLOCK_8X32
- 1, // BLOCK_32X8
- 1, // BLOCK_16X64
- 1, // BLOCK_64X16
- };
-
- return LUT[bsize];
-}
-
-static INLINE int is_rect_tx_allowed(const MACROBLOCKD *xd,
- const MB_MODE_INFO *mbmi) {
- return is_rect_tx_allowed_bsize(mbmi->sb_type) &&
- !xd->lossless[mbmi->segment_id];
-}
-
-static INLINE int tx_size_to_depth(TX_SIZE tx_size, BLOCK_SIZE bsize) {
- TX_SIZE ctx_size = max_txsize_rect_lookup[bsize];
- int depth = 0;
- while (tx_size != ctx_size) {
- depth++;
- ctx_size = sub_tx_size_map[ctx_size];
- assert(depth <= MAX_TX_DEPTH);
- }
- return depth;
-}
-
-static INLINE void set_blk_skip(MACROBLOCK *x, int plane, int blk_idx,
- int skip) {
- if (skip)
- x->blk_skip[blk_idx] |= 1UL << plane;
- else
- x->blk_skip[blk_idx] &= ~(1UL << plane);
-#ifndef NDEBUG
- // Set chroma planes to uninitialized states when luma is set to check if
- // it will be set later
- if (plane == 0) {
- x->blk_skip[blk_idx] |= 1UL << (1 + 4);
- x->blk_skip[blk_idx] |= 1UL << (2 + 4);
- }
-
- // Clear the initialization checking bit
- x->blk_skip[blk_idx] &= ~(1UL << (plane + 4));
-#endif
-}
-
-static INLINE int is_blk_skip(MACROBLOCK *x, int plane, int blk_idx) {
-#ifndef NDEBUG
- // Check if this is initialized
- assert(!(x->blk_skip[blk_idx] & (1UL << (plane + 4))));
-
- // The magic number is 0x77, this is to test if there is garbage data
- assert((x->blk_skip[blk_idx] & 0x88) == 0);
-#endif
- return (x->blk_skip[blk_idx] >> plane) & 1;
-}
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_ENCODER_BLOCK_H_
diff --git a/third_party/aom/av1/encoder/blockiness.c b/third_party/aom/av1/encoder/blockiness.c
deleted file mode 100644
index f7cff9e53..000000000
--- a/third_party/aom/av1/encoder/blockiness.c
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "config/av1_rtcd.h"
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-
-#include "av1/common/common.h"
-#include "av1/common/filter.h"
-#include "aom/aom_integer.h"
-#include "aom_dsp/aom_filter.h"
-#include "aom_ports/mem.h"
-#include "aom_ports/system_state.h"
-
-static int horizontal_filter(const uint8_t *s) {
- return (s[1] - s[-2]) * 2 + (s[-1] - s[0]) * 6;
-}
-
-static int vertical_filter(const uint8_t *s, int p) {
- return (s[p] - s[-2 * p]) * 2 + (s[-p] - s[0]) * 6;
-}
-
-static int variance(int sum, int sum_squared, int size) {
- return sum_squared / size - (sum / size) * (sum / size);
-}
-// Calculate a blockiness level for a vertical block edge.
-// This function returns a new blockiness metric that's defined as
-
-// p0 p1 p2 p3
-// q0 q1 q2 q3
-// block edge ->
-// r0 r1 r2 r3
-// s0 s1 s2 s3
-
-// blockiness = p0*-2+q0*6+r0*-6+s0*2 +
-// p1*-2+q1*6+r1*-6+s1*2 +
-// p2*-2+q2*6+r2*-6+s2*2 +
-// p3*-2+q3*6+r3*-6+s3*2 ;
-
-// reconstructed_blockiness = abs(blockiness from reconstructed buffer -
-// blockiness from source buffer,0)
-//
-// I make the assumption that flat blocks are much more visible than high
-// contrast blocks. As such, I scale the result of the blockiness calc
-// by dividing the blockiness by the variance of the pixels on either side
-// of the edge as follows:
-// var_0 = (q0^2+q1^2+q2^2+q3^2) - ((q0 + q1 + q2 + q3) / 4 )^2
-// var_1 = (r0^2+r1^2+r2^2+r3^2) - ((r0 + r1 + r2 + r3) / 4 )^2
-// The returned blockiness is the scaled value
-// Reconstructed blockiness / ( 1 + var_0 + var_1 ) ;
-static int blockiness_vertical(const uint8_t *s, int sp, const uint8_t *r,
- int rp, int size) {
- int s_blockiness = 0;
- int r_blockiness = 0;
- int sum_0 = 0;
- int sum_sq_0 = 0;
- int sum_1 = 0;
- int sum_sq_1 = 0;
- int i;
- int var_0;
- int var_1;
- for (i = 0; i < size; ++i, s += sp, r += rp) {
- s_blockiness += horizontal_filter(s);
- r_blockiness += horizontal_filter(r);
- sum_0 += s[0];
- sum_sq_0 += s[0] * s[0];
- sum_1 += s[-1];
- sum_sq_1 += s[-1] * s[-1];
- }
- var_0 = variance(sum_0, sum_sq_0, size);
- var_1 = variance(sum_1, sum_sq_1, size);
- r_blockiness = abs(r_blockiness);
- s_blockiness = abs(s_blockiness);
-
- if (r_blockiness > s_blockiness)
- return (r_blockiness - s_blockiness) / (1 + var_0 + var_1);
- else
- return 0;
-}
-
-// Calculate a blockiness level for a horizontal block edge
-// same as above.
-static int blockiness_horizontal(const uint8_t *s, int sp, const uint8_t *r,
- int rp, int size) {
- int s_blockiness = 0;
- int r_blockiness = 0;
- int sum_0 = 0;
- int sum_sq_0 = 0;
- int sum_1 = 0;
- int sum_sq_1 = 0;
- int i;
- int var_0;
- int var_1;
- for (i = 0; i < size; ++i, ++s, ++r) {
- s_blockiness += vertical_filter(s, sp);
- r_blockiness += vertical_filter(r, rp);
- sum_0 += s[0];
- sum_sq_0 += s[0] * s[0];
- sum_1 += s[-sp];
- sum_sq_1 += s[-sp] * s[-sp];
- }
- var_0 = variance(sum_0, sum_sq_0, size);
- var_1 = variance(sum_1, sum_sq_1, size);
- r_blockiness = abs(r_blockiness);
- s_blockiness = abs(s_blockiness);
-
- if (r_blockiness > s_blockiness)
- return (r_blockiness - s_blockiness) / (1 + var_0 + var_1);
- else
- return 0;
-}
-
-// This function returns the blockiness for the entire frame currently by
-// looking at all borders in steps of 4.
-double av1_get_blockiness(const unsigned char *img1, int img1_pitch,
- const unsigned char *img2, int img2_pitch, int width,
- int height) {
- double blockiness = 0;
- int i, j;
- aom_clear_system_state();
- for (i = 0; i < height;
- i += 4, img1 += img1_pitch * 4, img2 += img2_pitch * 4) {
- for (j = 0; j < width; j += 4) {
- if (i > 0 && i < height && j > 0 && j < width) {
- blockiness +=
- blockiness_vertical(img1 + j, img1_pitch, img2 + j, img2_pitch, 4);
- blockiness += blockiness_horizontal(img1 + j, img1_pitch, img2 + j,
- img2_pitch, 4);
- }
- }
- }
- blockiness /= width * height / 16;
- return blockiness;
-}
diff --git a/third_party/aom/av1/encoder/context_tree.c b/third_party/aom/av1/encoder/context_tree.c
deleted file mode 100644
index 57f59f304..000000000
--- a/third_party/aom/av1/encoder/context_tree.c
+++ /dev/null
@@ -1,215 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "av1/encoder/context_tree.h"
-#include "av1/encoder/encoder.h"
-
-static const BLOCK_SIZE square[MAX_SB_SIZE_LOG2 - 1] = {
- BLOCK_4X4, BLOCK_8X8, BLOCK_16X16, BLOCK_32X32, BLOCK_64X64, BLOCK_128X128,
-};
-
-static void alloc_mode_context(AV1_COMMON *cm, int num_pix,
- PICK_MODE_CONTEXT *ctx) {
- const int num_planes = av1_num_planes(cm);
- int i;
- const int num_blk = num_pix / 16;
- ctx->num_4x4_blk = num_blk;
-
- CHECK_MEM_ERROR(cm, ctx->blk_skip, aom_calloc(num_blk, sizeof(uint8_t)));
- for (i = 0; i < num_planes; ++i) {
- CHECK_MEM_ERROR(cm, ctx->coeff[i],
- aom_memalign(32, num_pix * sizeof(*ctx->coeff[i])));
- CHECK_MEM_ERROR(cm, ctx->qcoeff[i],
- aom_memalign(32, num_pix * sizeof(*ctx->qcoeff[i])));
- CHECK_MEM_ERROR(cm, ctx->dqcoeff[i],
- aom_memalign(32, num_pix * sizeof(*ctx->dqcoeff[i])));
- CHECK_MEM_ERROR(cm, ctx->eobs[i],
- aom_memalign(32, num_blk * sizeof(*ctx->eobs[i])));
- CHECK_MEM_ERROR(
- cm, ctx->txb_entropy_ctx[i],
- aom_memalign(32, num_blk * sizeof(*ctx->txb_entropy_ctx[i])));
- }
-
- if (num_pix <= MAX_PALETTE_SQUARE) {
- for (i = 0; i < 2; ++i) {
- CHECK_MEM_ERROR(
- cm, ctx->color_index_map[i],
- aom_memalign(32, num_pix * sizeof(*ctx->color_index_map[i])));
- }
- }
-}
-
-static void free_mode_context(PICK_MODE_CONTEXT *ctx, const int num_planes) {
- int i;
- aom_free(ctx->blk_skip);
- ctx->blk_skip = 0;
- for (i = 0; i < num_planes; ++i) {
- aom_free(ctx->coeff[i]);
- ctx->coeff[i] = 0;
- aom_free(ctx->qcoeff[i]);
- ctx->qcoeff[i] = 0;
- aom_free(ctx->dqcoeff[i]);
- ctx->dqcoeff[i] = 0;
- aom_free(ctx->eobs[i]);
- ctx->eobs[i] = 0;
- aom_free(ctx->txb_entropy_ctx[i]);
- ctx->txb_entropy_ctx[i] = 0;
- }
-
- for (i = 0; i < 2; ++i) {
- aom_free(ctx->color_index_map[i]);
- ctx->color_index_map[i] = 0;
- }
-}
-
-static void alloc_tree_contexts(AV1_COMMON *cm, PC_TREE *tree, int num_pix,
- int is_leaf) {
- alloc_mode_context(cm, num_pix, &tree->none);
-
- if (is_leaf) return;
-
- alloc_mode_context(cm, num_pix / 2, &tree->horizontal[0]);
- alloc_mode_context(cm, num_pix / 2, &tree->vertical[0]);
-
- alloc_mode_context(cm, num_pix / 2, &tree->horizontal[1]);
- alloc_mode_context(cm, num_pix / 2, &tree->vertical[1]);
-
- alloc_mode_context(cm, num_pix / 4, &tree->horizontala[0]);
- alloc_mode_context(cm, num_pix / 4, &tree->horizontala[1]);
- alloc_mode_context(cm, num_pix / 2, &tree->horizontala[2]);
-
- alloc_mode_context(cm, num_pix / 2, &tree->horizontalb[0]);
- alloc_mode_context(cm, num_pix / 4, &tree->horizontalb[1]);
- alloc_mode_context(cm, num_pix / 4, &tree->horizontalb[2]);
-
- alloc_mode_context(cm, num_pix / 4, &tree->verticala[0]);
- alloc_mode_context(cm, num_pix / 4, &tree->verticala[1]);
- alloc_mode_context(cm, num_pix / 2, &tree->verticala[2]);
-
- alloc_mode_context(cm, num_pix / 2, &tree->verticalb[0]);
- alloc_mode_context(cm, num_pix / 4, &tree->verticalb[1]);
- alloc_mode_context(cm, num_pix / 4, &tree->verticalb[2]);
-
- for (int i = 0; i < 4; ++i) {
- alloc_mode_context(cm, num_pix / 4, &tree->horizontal4[i]);
- alloc_mode_context(cm, num_pix / 4, &tree->vertical4[i]);
- }
-}
-
-static void free_tree_contexts(PC_TREE *tree, const int num_planes) {
- int i;
- for (i = 0; i < 3; i++) {
- free_mode_context(&tree->horizontala[i], num_planes);
- free_mode_context(&tree->horizontalb[i], num_planes);
- free_mode_context(&tree->verticala[i], num_planes);
- free_mode_context(&tree->verticalb[i], num_planes);
- }
- for (i = 0; i < 4; ++i) {
- free_mode_context(&tree->horizontal4[i], num_planes);
- free_mode_context(&tree->vertical4[i], num_planes);
- }
- free_mode_context(&tree->none, num_planes);
- free_mode_context(&tree->horizontal[0], num_planes);
- free_mode_context(&tree->horizontal[1], num_planes);
- free_mode_context(&tree->vertical[0], num_planes);
- free_mode_context(&tree->vertical[1], num_planes);
-}
-
-// This function sets up a tree of contexts such that at each square
-// partition level. There are contexts for none, horizontal, vertical, and
-// split. Along with a block_size value and a selected block_size which
-// represents the state of our search.
-void av1_setup_pc_tree(AV1_COMMON *cm, ThreadData *td) {
- int i, j;
- const int tree_nodes_inc = 1024;
- const int leaf_factor = 4;
- const int leaf_nodes = 256 * leaf_factor;
- const int tree_nodes = tree_nodes_inc + 256 + 64 + 16 + 4 + 1;
- int pc_tree_index = 0;
- PC_TREE *this_pc;
- int square_index = 1;
- int nodes;
-
- aom_free(td->pc_tree);
- CHECK_MEM_ERROR(cm, td->pc_tree,
- aom_calloc(tree_nodes, sizeof(*td->pc_tree)));
- this_pc = &td->pc_tree[0];
-
- // Sets up all the leaf nodes in the tree.
- for (pc_tree_index = 0; pc_tree_index < leaf_nodes; ++pc_tree_index) {
- PC_TREE *const tree = &td->pc_tree[pc_tree_index];
- tree->block_size = square[0];
- alloc_tree_contexts(cm, tree, 16, 1);
- }
-
- // Each node has 4 leaf nodes, fill each block_size level of the tree
- // from leafs to the root.
- for (nodes = leaf_nodes >> 2; nodes > 0; nodes >>= 2) {
- for (i = 0; i < nodes; ++i) {
- PC_TREE *const tree = &td->pc_tree[pc_tree_index];
- alloc_tree_contexts(cm, tree, 16 << (2 * square_index), 0);
- tree->block_size = square[square_index];
- for (j = 0; j < 4; j++) tree->split[j] = this_pc++;
- ++pc_tree_index;
- }
- ++square_index;
- }
-
- // Set up the root node for the largest superblock size
- i = MAX_MIB_SIZE_LOG2 - MIN_MIB_SIZE_LOG2;
- td->pc_root[i] = &td->pc_tree[tree_nodes - 1];
- td->pc_root[i]->none.best_mode_index = 2;
- // Set up the root nodes for the rest of the possible superblock sizes
- while (--i >= 0) {
- td->pc_root[i] = td->pc_root[i + 1]->split[0];
- td->pc_root[i]->none.best_mode_index = 2;
- }
-}
-
-void av1_free_pc_tree(ThreadData *td, const int num_planes) {
- if (td->pc_tree != NULL) {
- const int tree_nodes_inc = 1024;
- const int tree_nodes = tree_nodes_inc + 256 + 64 + 16 + 4 + 1;
- for (int i = 0; i < tree_nodes; ++i) {
- free_tree_contexts(&td->pc_tree[i], num_planes);
- }
- aom_free(td->pc_tree);
- td->pc_tree = NULL;
- }
-}
-
-void av1_copy_tree_context(PICK_MODE_CONTEXT *dst_ctx,
- PICK_MODE_CONTEXT *src_ctx) {
- dst_ctx->mic = src_ctx->mic;
- dst_ctx->mbmi_ext = src_ctx->mbmi_ext;
-
- dst_ctx->num_4x4_blk = src_ctx->num_4x4_blk;
- dst_ctx->skip = src_ctx->skip;
- dst_ctx->skippable = src_ctx->skippable;
- dst_ctx->best_mode_index = src_ctx->best_mode_index;
-
- memcpy(dst_ctx->blk_skip, src_ctx->blk_skip,
- sizeof(uint8_t) * src_ctx->num_4x4_blk);
-
- dst_ctx->hybrid_pred_diff = src_ctx->hybrid_pred_diff;
- dst_ctx->comp_pred_diff = src_ctx->comp_pred_diff;
- dst_ctx->single_pred_diff = src_ctx->single_pred_diff;
-
- dst_ctx->rate = src_ctx->rate;
- dst_ctx->dist = src_ctx->dist;
- dst_ctx->rdcost = src_ctx->rdcost;
- dst_ctx->rd_mode_is_ready = src_ctx->rd_mode_is_ready;
-
- memcpy(dst_ctx->pred_mv, src_ctx->pred_mv, sizeof(MV) * REF_FRAMES);
- dst_ctx->pred_interp_filter = src_ctx->pred_interp_filter;
-
- dst_ctx->partition = src_ctx->partition;
-}
diff --git a/third_party/aom/av1/encoder/context_tree.h b/third_party/aom/av1/encoder/context_tree.h
deleted file mode 100644
index 4efc34985..000000000
--- a/third_party/aom/av1/encoder/context_tree.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_CONTEXT_TREE_H_
-#define AOM_AV1_ENCODER_CONTEXT_TREE_H_
-
-#include "av1/common/blockd.h"
-#include "av1/encoder/block.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct AV1_COMP;
-struct AV1Common;
-struct ThreadData;
-
-typedef enum {
- // Search all the partition types in this plane.
- SEARCH_FULL_PLANE = 0,
- // Only search none_partition coding block.
- NONE_PARTITION_PLANE = 1,
- // Search all the partition types in this plane except split.
- SEARCH_SAME_PLANE = 2,
- // Skip search partition on this plane. Go split directly.
- SPLIT_PLANE = 3,
-} CB_TREE_SEARCH;
-
-// Structure to hold snapshot of coding context during the mode picking process
-typedef struct {
- MB_MODE_INFO mic;
- MB_MODE_INFO_EXT mbmi_ext;
- uint8_t *color_index_map[2];
- uint8_t *blk_skip;
-
- tran_low_t *coeff[MAX_MB_PLANE];
- tran_low_t *qcoeff[MAX_MB_PLANE];
- tran_low_t *dqcoeff[MAX_MB_PLANE];
- uint16_t *eobs[MAX_MB_PLANE];
- uint8_t *txb_entropy_ctx[MAX_MB_PLANE];
-
- int num_4x4_blk;
- int skip;
- // For current partition, only if all Y, U, and V transform blocks'
- // coefficients are quantized to 0, skippable is set to 1.
- int skippable;
- int best_mode_index;
- int hybrid_pred_diff;
- int comp_pred_diff;
- int single_pred_diff;
- // Skip certain ref frames during RD search of rectangular partitions.
- int skip_ref_frame_mask;
-
- // TODO(jingning) Use RD_COST struct here instead. This involves a boarder
- // scope of refactoring.
- int rate;
- int64_t dist;
- int64_t rdcost;
- int rd_mode_is_ready; // Flag to indicate whether rd pick mode decision has
- // been made.
-
- // motion vector cache for adaptive motion search control in partition
- // search loop
- MV pred_mv[REF_FRAMES];
- InterpFilter pred_interp_filter;
- PARTITION_TYPE partition;
-} PICK_MODE_CONTEXT;
-
-typedef struct {
- int valid;
- int split;
- int skip;
- int64_t rdcost;
- int sub_block_split[4];
- int sub_block_skip[4];
- int64_t sub_block_rdcost[4];
-} PC_TREE_STATS;
-
-typedef struct PC_TREE {
- int index;
- PARTITION_TYPE partitioning;
- BLOCK_SIZE block_size;
- PICK_MODE_CONTEXT none;
- PICK_MODE_CONTEXT horizontal[2];
- PICK_MODE_CONTEXT vertical[2];
- PICK_MODE_CONTEXT horizontala[3];
- PICK_MODE_CONTEXT horizontalb[3];
- PICK_MODE_CONTEXT verticala[3];
- PICK_MODE_CONTEXT verticalb[3];
- PICK_MODE_CONTEXT horizontal4[4];
- PICK_MODE_CONTEXT vertical4[4];
- CB_TREE_SEARCH cb_search_range;
- struct PC_TREE *split[4];
- PC_TREE_STATS pc_tree_stats;
-} PC_TREE;
-
-void av1_setup_pc_tree(struct AV1Common *cm, struct ThreadData *td);
-void av1_free_pc_tree(struct ThreadData *td, const int num_planes);
-void av1_copy_tree_context(PICK_MODE_CONTEXT *dst_ctx,
- PICK_MODE_CONTEXT *src_ctx);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_ENCODER_CONTEXT_TREE_H_
diff --git a/third_party/aom/av1/encoder/corner_detect.c b/third_party/aom/av1/encoder/corner_detect.c
deleted file mode 100644
index e4c59dd9c..000000000
--- a/third_party/aom/av1/encoder/corner_detect.c
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <memory.h>
-#include <math.h>
-#include <assert.h>
-
-#include "third_party/fastfeat/fast.h"
-
-#include "av1/encoder/corner_detect.h"
-
-// Fast_9 wrapper
-#define FAST_BARRIER 18
-int fast_corner_detect(unsigned char *buf, int width, int height, int stride,
- int *points, int max_points) {
- int num_points;
- xy *const frm_corners_xy = fast9_detect_nonmax(buf, width, height, stride,
- FAST_BARRIER, &num_points);
- num_points = (num_points <= max_points ? num_points : max_points);
- if (num_points > 0 && frm_corners_xy) {
- memcpy(points, frm_corners_xy, sizeof(*frm_corners_xy) * num_points);
- free(frm_corners_xy);
- return num_points;
- }
- free(frm_corners_xy);
- return 0;
-}
diff --git a/third_party/aom/av1/encoder/corner_detect.h b/third_party/aom/av1/encoder/corner_detect.h
deleted file mode 100644
index cab59a774..000000000
--- a/third_party/aom/av1/encoder/corner_detect.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_CORNER_DETECT_H_
-#define AOM_AV1_ENCODER_CORNER_DETECT_H_
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <memory.h>
-
-int fast_corner_detect(unsigned char *buf, int width, int height, int stride,
- int *points, int max_points);
-
-#endif // AOM_AV1_ENCODER_CORNER_DETECT_H_
diff --git a/third_party/aom/av1/encoder/corner_match.c b/third_party/aom/av1/encoder/corner_match.c
deleted file mode 100644
index 29e934deb..000000000
--- a/third_party/aom/av1/encoder/corner_match.c
+++ /dev/null
@@ -1,191 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <stdlib.h>
-#include <memory.h>
-#include <math.h>
-
-#include "config/av1_rtcd.h"
-
-#include "av1/encoder/corner_match.h"
-
-#define SEARCH_SZ 9
-#define SEARCH_SZ_BY2 ((SEARCH_SZ - 1) / 2)
-
-#define THRESHOLD_NCC 0.75
-
-/* Compute var(im) * MATCH_SZ_SQ over a MATCH_SZ by MATCH_SZ window of im,
- centered at (x, y).
-*/
-static double compute_variance(unsigned char *im, int stride, int x, int y) {
- int sum = 0;
- int sumsq = 0;
- int var;
- int i, j;
- for (i = 0; i < MATCH_SZ; ++i)
- for (j = 0; j < MATCH_SZ; ++j) {
- sum += im[(i + y - MATCH_SZ_BY2) * stride + (j + x - MATCH_SZ_BY2)];
- sumsq += im[(i + y - MATCH_SZ_BY2) * stride + (j + x - MATCH_SZ_BY2)] *
- im[(i + y - MATCH_SZ_BY2) * stride + (j + x - MATCH_SZ_BY2)];
- }
- var = sumsq * MATCH_SZ_SQ - sum * sum;
- return (double)var;
-}
-
-/* Compute corr(im1, im2) * MATCH_SZ * stddev(im1), where the
- correlation/standard deviation are taken over MATCH_SZ by MATCH_SZ windows
- of each image, centered at (x1, y1) and (x2, y2) respectively.
-*/
-double compute_cross_correlation_c(unsigned char *im1, int stride1, int x1,
- int y1, unsigned char *im2, int stride2,
- int x2, int y2) {
- int v1, v2;
- int sum1 = 0;
- int sum2 = 0;
- int sumsq2 = 0;
- int cross = 0;
- int var2, cov;
- int i, j;
- for (i = 0; i < MATCH_SZ; ++i)
- for (j = 0; j < MATCH_SZ; ++j) {
- v1 = im1[(i + y1 - MATCH_SZ_BY2) * stride1 + (j + x1 - MATCH_SZ_BY2)];
- v2 = im2[(i + y2 - MATCH_SZ_BY2) * stride2 + (j + x2 - MATCH_SZ_BY2)];
- sum1 += v1;
- sum2 += v2;
- sumsq2 += v2 * v2;
- cross += v1 * v2;
- }
- var2 = sumsq2 * MATCH_SZ_SQ - sum2 * sum2;
- cov = cross * MATCH_SZ_SQ - sum1 * sum2;
- return cov / sqrt((double)var2);
-}
-
-static int is_eligible_point(int pointx, int pointy, int width, int height) {
- return (pointx >= MATCH_SZ_BY2 && pointy >= MATCH_SZ_BY2 &&
- pointx + MATCH_SZ_BY2 < width && pointy + MATCH_SZ_BY2 < height);
-}
-
-static int is_eligible_distance(int point1x, int point1y, int point2x,
- int point2y, int width, int height) {
- const int thresh = (width < height ? height : width) >> 4;
- return ((point1x - point2x) * (point1x - point2x) +
- (point1y - point2y) * (point1y - point2y)) <= thresh * thresh;
-}
-
-static void improve_correspondence(unsigned char *frm, unsigned char *ref,
- int width, int height, int frm_stride,
- int ref_stride,
- Correspondence *correspondences,
- int num_correspondences) {
- int i;
- for (i = 0; i < num_correspondences; ++i) {
- int x, y, best_x = 0, best_y = 0;
- double best_match_ncc = 0.0;
- for (y = -SEARCH_SZ_BY2; y <= SEARCH_SZ_BY2; ++y) {
- for (x = -SEARCH_SZ_BY2; x <= SEARCH_SZ_BY2; ++x) {
- double match_ncc;
- if (!is_eligible_point(correspondences[i].rx + x,
- correspondences[i].ry + y, width, height))
- continue;
- if (!is_eligible_distance(correspondences[i].x, correspondences[i].y,
- correspondences[i].rx + x,
- correspondences[i].ry + y, width, height))
- continue;
- match_ncc = compute_cross_correlation(
- frm, frm_stride, correspondences[i].x, correspondences[i].y, ref,
- ref_stride, correspondences[i].rx + x, correspondences[i].ry + y);
- if (match_ncc > best_match_ncc) {
- best_match_ncc = match_ncc;
- best_y = y;
- best_x = x;
- }
- }
- }
- correspondences[i].rx += best_x;
- correspondences[i].ry += best_y;
- }
- for (i = 0; i < num_correspondences; ++i) {
- int x, y, best_x = 0, best_y = 0;
- double best_match_ncc = 0.0;
- for (y = -SEARCH_SZ_BY2; y <= SEARCH_SZ_BY2; ++y)
- for (x = -SEARCH_SZ_BY2; x <= SEARCH_SZ_BY2; ++x) {
- double match_ncc;
- if (!is_eligible_point(correspondences[i].x + x,
- correspondences[i].y + y, width, height))
- continue;
- if (!is_eligible_distance(
- correspondences[i].x + x, correspondences[i].y + y,
- correspondences[i].rx, correspondences[i].ry, width, height))
- continue;
- match_ncc = compute_cross_correlation(
- ref, ref_stride, correspondences[i].rx, correspondences[i].ry, frm,
- frm_stride, correspondences[i].x + x, correspondences[i].y + y);
- if (match_ncc > best_match_ncc) {
- best_match_ncc = match_ncc;
- best_y = y;
- best_x = x;
- }
- }
- correspondences[i].x += best_x;
- correspondences[i].y += best_y;
- }
-}
-
-int determine_correspondence(unsigned char *frm, int *frm_corners,
- int num_frm_corners, unsigned char *ref,
- int *ref_corners, int num_ref_corners, int width,
- int height, int frm_stride, int ref_stride,
- int *correspondence_pts) {
- // TODO(sarahparker) Improve this to include 2-way match
- int i, j;
- Correspondence *correspondences = (Correspondence *)correspondence_pts;
- int num_correspondences = 0;
- for (i = 0; i < num_frm_corners; ++i) {
- double best_match_ncc = 0.0;
- double template_norm;
- int best_match_j = -1;
- if (!is_eligible_point(frm_corners[2 * i], frm_corners[2 * i + 1], width,
- height))
- continue;
- for (j = 0; j < num_ref_corners; ++j) {
- double match_ncc;
- if (!is_eligible_point(ref_corners[2 * j], ref_corners[2 * j + 1], width,
- height))
- continue;
- if (!is_eligible_distance(frm_corners[2 * i], frm_corners[2 * i + 1],
- ref_corners[2 * j], ref_corners[2 * j + 1],
- width, height))
- continue;
- match_ncc = compute_cross_correlation(
- frm, frm_stride, frm_corners[2 * i], frm_corners[2 * i + 1], ref,
- ref_stride, ref_corners[2 * j], ref_corners[2 * j + 1]);
- if (match_ncc > best_match_ncc) {
- best_match_ncc = match_ncc;
- best_match_j = j;
- }
- }
- // Note: We want to test if the best correlation is >= THRESHOLD_NCC,
- // but need to account for the normalization in compute_cross_correlation.
- template_norm = compute_variance(frm, frm_stride, frm_corners[2 * i],
- frm_corners[2 * i + 1]);
- if (best_match_ncc > THRESHOLD_NCC * sqrt(template_norm)) {
- correspondences[num_correspondences].x = frm_corners[2 * i];
- correspondences[num_correspondences].y = frm_corners[2 * i + 1];
- correspondences[num_correspondences].rx = ref_corners[2 * best_match_j];
- correspondences[num_correspondences].ry =
- ref_corners[2 * best_match_j + 1];
- num_correspondences++;
- }
- }
- improve_correspondence(frm, ref, width, height, frm_stride, ref_stride,
- correspondences, num_correspondences);
- return num_correspondences;
-}
diff --git a/third_party/aom/av1/encoder/corner_match.h b/third_party/aom/av1/encoder/corner_match.h
deleted file mode 100644
index 535d2faed..000000000
--- a/third_party/aom/av1/encoder/corner_match.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#ifndef AOM_AV1_ENCODER_CORNER_MATCH_H_
-#define AOM_AV1_ENCODER_CORNER_MATCH_H_
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <memory.h>
-
-#define MATCH_SZ 13
-#define MATCH_SZ_BY2 ((MATCH_SZ - 1) / 2)
-#define MATCH_SZ_SQ (MATCH_SZ * MATCH_SZ)
-
-typedef struct {
- int x, y;
- int rx, ry;
-} Correspondence;
-
-int determine_correspondence(unsigned char *frm, int *frm_corners,
- int num_frm_corners, unsigned char *ref,
- int *ref_corners, int num_ref_corners, int width,
- int height, int frm_stride, int ref_stride,
- int *correspondence_pts);
-
-#endif // AOM_AV1_ENCODER_CORNER_MATCH_H_
diff --git a/third_party/aom/av1/encoder/cost.c b/third_party/aom/av1/encoder/cost.c
deleted file mode 100644
index 323e2aed5..000000000
--- a/third_party/aom/av1/encoder/cost.c
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#include <assert.h>
-
-#include "av1/encoder/cost.h"
-#include "av1/common/entropy.h"
-
-// round(-log2(i/256.) * (1 << AV1_PROB_COST_SHIFT)); i = 128~255.
-const uint16_t av1_prob_cost[128] = {
- 512, 506, 501, 495, 489, 484, 478, 473, 467, 462, 456, 451, 446, 441, 435,
- 430, 425, 420, 415, 410, 405, 400, 395, 390, 385, 380, 375, 371, 366, 361,
- 356, 352, 347, 343, 338, 333, 329, 324, 320, 316, 311, 307, 302, 298, 294,
- 289, 285, 281, 277, 273, 268, 264, 260, 256, 252, 248, 244, 240, 236, 232,
- 228, 224, 220, 216, 212, 209, 205, 201, 197, 194, 190, 186, 182, 179, 175,
- 171, 168, 164, 161, 157, 153, 150, 146, 143, 139, 136, 132, 129, 125, 122,
- 119, 115, 112, 109, 105, 102, 99, 95, 92, 89, 86, 82, 79, 76, 73,
- 70, 66, 63, 60, 57, 54, 51, 48, 45, 42, 38, 35, 32, 29, 26,
- 23, 20, 18, 15, 12, 9, 6, 3,
-};
-
-void av1_cost_tokens_from_cdf(int *costs, const aom_cdf_prob *cdf,
- const int *inv_map) {
- int i;
- aom_cdf_prob prev_cdf = 0;
- for (i = 0;; ++i) {
- aom_cdf_prob p15 = AOM_ICDF(cdf[i]) - prev_cdf;
- p15 = (p15 < EC_MIN_PROB) ? EC_MIN_PROB : p15;
- prev_cdf = AOM_ICDF(cdf[i]);
-
- if (inv_map)
- costs[inv_map[i]] = av1_cost_symbol(p15);
- else
- costs[i] = av1_cost_symbol(p15);
-
- // Stop once we reach the end of the CDF
- if (cdf[i] == AOM_ICDF(CDF_PROB_TOP)) break;
- }
-}
diff --git a/third_party/aom/av1/encoder/cost.h b/third_party/aom/av1/encoder/cost.h
deleted file mode 100644
index af5b09837..000000000
--- a/third_party/aom/av1/encoder/cost.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_COST_H_
-#define AOM_AV1_ENCODER_COST_H_
-
-#include "aom_dsp/prob.h"
-#include "aom/aom_integer.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-extern const uint16_t av1_prob_cost[128];
-
-// The factor to scale from cost in bits to cost in av1_prob_cost units.
-#define AV1_PROB_COST_SHIFT 9
-
-// Cost of coding an n bit literal, using 128 (i.e. 50%) probability
-// for each bit.
-#define av1_cost_literal(n) ((n) * (1 << AV1_PROB_COST_SHIFT))
-
-// Calculate the cost of a symbol with probability p15 / 2^15
-static INLINE int av1_cost_symbol(aom_cdf_prob p15) {
- assert(0 < p15 && p15 < CDF_PROB_TOP);
- const int shift = CDF_PROB_BITS - 1 - get_msb(p15);
- const int prob = get_prob(p15 << shift, CDF_PROB_TOP);
- assert(prob >= 128);
- return av1_prob_cost[prob - 128] + av1_cost_literal(shift);
-}
-
-void av1_cost_tokens_from_cdf(int *costs, const aom_cdf_prob *cdf,
- const int *inv_map);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_ENCODER_COST_H_
diff --git a/third_party/aom/av1/encoder/dwt.c b/third_party/aom/av1/encoder/dwt.c
deleted file mode 100644
index 04088b25f..000000000
--- a/third_party/aom/av1/encoder/dwt.c
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <stdlib.h>
-#include <math.h>
-
-#include "config/av1_rtcd.h"
-#include "av1/encoder/dwt.h"
-
-// Note: block length must be even for this implementation
-static void analysis_53_row(int length, tran_low_t *x, tran_low_t *lowpass,
- tran_low_t *highpass) {
- int n;
- tran_low_t r, *a, *b;
-
- n = length >> 1;
- b = highpass;
- a = lowpass;
- while (--n) {
- *a++ = (r = *x++) * 2;
- *b++ = *x - ((r + x[1] + 1) >> 1);
- x++;
- }
- *a = (r = *x++) * 2;
- *b = *x - r;
-
- n = length >> 1;
- b = highpass;
- a = lowpass;
- r = *highpass;
- while (n--) {
- *a++ += (r + (*b) + 1) >> 1;
- r = *b++;
- }
-}
-
-static void analysis_53_col(int length, tran_low_t *x, tran_low_t *lowpass,
- tran_low_t *highpass) {
- int n;
- tran_low_t r, *a, *b;
-
- n = length >> 1;
- b = highpass;
- a = lowpass;
- while (--n) {
- *a++ = (r = *x++);
- *b++ = (((*x) * 2) - (r + x[1]) + 2) >> 2;
- x++;
- }
- *a = (r = *x++);
- *b = (*x - r + 1) >> 1;
-
- n = length >> 1;
- b = highpass;
- a = lowpass;
- r = *highpass;
- while (n--) {
- *a++ += (r + (*b) + 1) >> 1;
- r = *b++;
- }
-}
-
-static void dyadic_analyze_53_uint8_input(int levels, int width, int height,
- uint8_t *x, int pitch_x,
- tran_low_t *c, int pitch_c,
- int dwt_scale_bits, int hbd) {
- int lv, i, j, nh, nw, hh = height, hw = width;
- tran_low_t buffer[2 * DWT_MAX_LENGTH];
-
- if (hbd) {
- uint16_t *x16 = CONVERT_TO_SHORTPTR(x);
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++) {
- c[i * pitch_c + j] = x16[i * pitch_x + j] << dwt_scale_bits;
- }
- }
- } else {
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++) {
- c[i * pitch_c + j] = x[i * pitch_x + j] << dwt_scale_bits;
- }
- }
- }
-
- for (lv = 0; lv < levels; lv++) {
- nh = hh;
- hh = (hh + 1) >> 1;
- nw = hw;
- hw = (hw + 1) >> 1;
- if ((nh < 2) || (nw < 2)) return;
- for (i = 0; i < nh; i++) {
- memcpy(buffer, &c[i * pitch_c], nw * sizeof(tran_low_t));
- analysis_53_row(nw, buffer, &c[i * pitch_c], &c[i * pitch_c] + hw);
- }
- for (j = 0; j < nw; j++) {
- for (i = 0; i < nh; i++) buffer[i + nh] = c[i * pitch_c + j];
- analysis_53_col(nh, buffer + nh, buffer, buffer + hh);
- for (i = 0; i < nh; i++) c[i * pitch_c + j] = buffer[i];
- }
- }
-}
-
-void av1_fdwt8x8_uint8_input_c(uint8_t *input, tran_low_t *output, int stride,
- int hbd) {
- dyadic_analyze_53_uint8_input(4, 8, 8, input, stride, output, 8, 2, hbd);
-}
-
-int av1_haar_ac_sad(tran_low_t *output, int bw, int bh, int stride) {
- int acsad = 0;
-
- for (int r = 0; r < bh; ++r)
- for (int c = 0; c < bw; ++c) {
- if (r >= bh / 2 || c >= bw / 2) acsad += abs(output[r * stride + c]);
- }
- return acsad;
-}
-
-uint64_t av1_dct_ac_sad(tran_low_t *output, int bw, int bh, int stride) {
- uint64_t acsad = 0;
-
- for (int r = 0; r < bh; ++r)
- for (int c = 0; c < bw; ++c) {
- if (r > 0 || c > 0) acsad += abs(output[r * stride + c]);
- }
-
- return acsad;
-}
-
-uint32_t av1_variance(uint8_t *input, int bw, int bh, int stride) {
- int sum = 0;
- uint32_t sse = 0;
-
- for (int r = 0; r < bh; ++r)
- for (int c = 0; c < bw; ++c) {
- sum += input[r * stride + c];
- sse += input[r * stride + c] * input[r * stride + c];
- }
- return sse - (uint32_t)(((int64_t)sum * sum) / (bw * bh));
-}
-
-int av1_haar_ac_sad_8x8_uint8_input(uint8_t *input, int stride, int hbd) {
- tran_low_t output[64];
-
- av1_fdwt8x8_uint8_input_c(input, output, stride, hbd);
- return av1_haar_ac_sad(output, 8, 8, 8);
-}
diff --git a/third_party/aom/av1/encoder/dwt.h b/third_party/aom/av1/encoder/dwt.h
deleted file mode 100644
index 37306c6a5..000000000
--- a/third_party/aom/av1/encoder/dwt.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_DWT_H_
-#define AOM_AV1_ENCODER_DWT_H_
-
-#include "av1/common/common.h"
-#include "av1/common/enums.h"
-
-#define DWT_MAX_LENGTH 64
-
-void av1_fdwt8x8(tran_low_t *input, tran_low_t *output, int stride);
-void av1_fdwt8x8_uint8_input_c(uint8_t *input, tran_low_t *output, int stride,
- int hbd);
-int av1_haar_ac_sad_8x8_uint8_input(uint8_t *input, int stride, int hbd);
-
-#endif // AOM_AV1_ENCODER_DWT_H_
diff --git a/third_party/aom/av1/encoder/encodeframe.c b/third_party/aom/av1/encoder/encodeframe.c
deleted file mode 100644
index cb226c59e..000000000
--- a/third_party/aom/av1/encoder/encodeframe.c
+++ /dev/null
@@ -1,5739 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <limits.h>
-#include <math.h>
-#include <stdio.h>
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-#include "config/av1_rtcd.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/binary_codes_writer.h"
-#include "aom_ports/mem.h"
-#include "aom_ports/aom_timer.h"
-#include "aom_ports/system_state.h"
-
-#if CONFIG_MISMATCH_DEBUG
-#include "aom_util/debug_util.h"
-#endif // CONFIG_MISMATCH_DEBUG
-
-#include "av1/common/cfl.h"
-#include "av1/common/common.h"
-#include "av1/common/entropy.h"
-#include "av1/common/entropymode.h"
-#include "av1/common/idct.h"
-#include "av1/common/mv.h"
-#include "av1/common/mvref_common.h"
-#include "av1/common/pred_common.h"
-#include "av1/common/quant_common.h"
-#include "av1/common/reconintra.h"
-#include "av1/common/reconinter.h"
-#include "av1/common/seg_common.h"
-#include "av1/common/tile_common.h"
-#include "av1/common/warped_motion.h"
-
-#include "av1/encoder/aq_complexity.h"
-#include "av1/encoder/aq_cyclicrefresh.h"
-#include "av1/encoder/aq_variance.h"
-#include "av1/encoder/global_motion.h"
-#include "av1/encoder/encodeframe.h"
-#include "av1/encoder/encodemb.h"
-#include "av1/encoder/encodemv.h"
-#include "av1/encoder/encodetxb.h"
-#include "av1/encoder/ethread.h"
-#include "av1/encoder/extend.h"
-#include "av1/encoder/ml.h"
-#include "av1/encoder/partition_model_weights.h"
-#include "av1/encoder/rd.h"
-#include "av1/encoder/rdopt.h"
-#include "av1/encoder/reconinter_enc.h"
-#include "av1/encoder/segmentation.h"
-#include "av1/encoder/tokenize.h"
-
-static void encode_superblock(const AV1_COMP *const cpi, TileDataEnc *tile_data,
- ThreadData *td, TOKENEXTRA **t, RUN_TYPE dry_run,
- int mi_row, int mi_col, BLOCK_SIZE bsize,
- int *rate);
-
-// This is used as a reference when computing the source variance for the
-// purposes of activity masking.
-// Eventually this should be replaced by custom no-reference routines,
-// which will be faster.
-static const uint8_t AV1_VAR_OFFS[MAX_SB_SIZE] = {
- 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128
-};
-
-static const uint16_t AV1_HIGH_VAR_OFFS_8[MAX_SB_SIZE] = {
- 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128
-};
-
-static const uint16_t AV1_HIGH_VAR_OFFS_10[MAX_SB_SIZE] = {
- 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
- 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
- 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
- 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
- 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
- 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
- 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
- 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
- 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
- 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
- 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
- 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
- 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
- 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
- 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
- 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4
-};
-
-static const uint16_t AV1_HIGH_VAR_OFFS_12[MAX_SB_SIZE] = {
- 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
- 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
- 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
- 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
- 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
- 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
- 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
- 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
- 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
- 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
- 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
- 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
- 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
- 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
- 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
- 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
- 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
- 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
- 128 * 16, 128 * 16
-};
-
-#if CONFIG_FP_MB_STATS
-static const uint8_t num_16x16_blocks_wide_lookup[BLOCK_SIZES_ALL] = {
- 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 4, 4, 4, 8, 8, 1, 1, 1, 2, 2, 4
-};
-static const uint8_t num_16x16_blocks_high_lookup[BLOCK_SIZES_ALL] = {
- 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 4, 2, 4, 8, 4, 8, 1, 1, 2, 1, 4, 2
-};
-#endif // CONFIG_FP_MB_STATS
-
-unsigned int av1_get_sby_perpixel_variance(const AV1_COMP *cpi,
- const struct buf_2d *ref,
- BLOCK_SIZE bs) {
- unsigned int sse;
- const unsigned int var =
- cpi->fn_ptr[bs].vf(ref->buf, ref->stride, AV1_VAR_OFFS, 0, &sse);
- return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
-}
-
-unsigned int av1_high_get_sby_perpixel_variance(const AV1_COMP *cpi,
- const struct buf_2d *ref,
- BLOCK_SIZE bs, int bd) {
- unsigned int var, sse;
- switch (bd) {
- case 10:
- var =
- cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
- CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_10), 0, &sse);
- break;
- case 12:
- var =
- cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
- CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_12), 0, &sse);
- break;
- case 8:
- default:
- var =
- cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
- CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_8), 0, &sse);
- break;
- }
- return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
-}
-
-static unsigned int get_sby_perpixel_diff_variance(const AV1_COMP *const cpi,
- const struct buf_2d *ref,
- int mi_row, int mi_col,
- BLOCK_SIZE bs) {
- unsigned int sse, var;
- uint8_t *last_y;
- const YV12_BUFFER_CONFIG *last = get_ref_frame_buffer(cpi, LAST_FRAME);
-
- assert(last != NULL);
- last_y =
- &last->y_buffer[mi_row * MI_SIZE * last->y_stride + mi_col * MI_SIZE];
- var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride, last_y, last->y_stride, &sse);
- return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
-}
-
-static BLOCK_SIZE get_rd_var_based_fixed_partition(AV1_COMP *cpi, MACROBLOCK *x,
- int mi_row, int mi_col) {
- unsigned int var = get_sby_perpixel_diff_variance(
- cpi, &x->plane[0].src, mi_row, mi_col, BLOCK_64X64);
- if (var < 8)
- return BLOCK_64X64;
- else if (var < 128)
- return BLOCK_32X32;
- else if (var < 2048)
- return BLOCK_16X16;
- else
- return BLOCK_8X8;
-}
-
-// Lighter version of set_offsets that only sets the mode info
-// pointers.
-static void set_mode_info_offsets(const AV1_COMP *const cpi,
- MACROBLOCK *const x, MACROBLOCKD *const xd,
- int mi_row, int mi_col) {
- const AV1_COMMON *const cm = &cpi->common;
- const int idx_str = xd->mi_stride * mi_row + mi_col;
- xd->mi = cm->mi_grid_visible + idx_str;
- xd->mi[0] = cm->mi + idx_str;
- x->mbmi_ext = cpi->mbmi_ext_base + (mi_row * cm->mi_cols + mi_col);
-}
-
-static void set_offsets_without_segment_id(const AV1_COMP *const cpi,
- const TileInfo *const tile,
- MACROBLOCK *const x, int mi_row,
- int mi_col, BLOCK_SIZE bsize) {
- const AV1_COMMON *const cm = &cpi->common;
- const int num_planes = av1_num_planes(cm);
- MACROBLOCKD *const xd = &x->e_mbd;
- const int mi_width = mi_size_wide[bsize];
- const int mi_height = mi_size_high[bsize];
-
- set_mode_info_offsets(cpi, x, xd, mi_row, mi_col);
-
- set_skip_context(xd, mi_row, mi_col, num_planes);
- xd->above_txfm_context = cm->above_txfm_context[tile->tile_row] + mi_col;
- xd->left_txfm_context =
- xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
-
- // Set up destination pointers.
- av1_setup_dst_planes(xd->plane, bsize, get_frame_new_buffer(cm), mi_row,
- mi_col, 0, num_planes);
-
- // Set up limit values for MV components.
- // Mv beyond the range do not produce new/different prediction block.
- x->mv_limits.row_min =
- -(((mi_row + mi_height) * MI_SIZE) + AOM_INTERP_EXTEND);
- x->mv_limits.col_min = -(((mi_col + mi_width) * MI_SIZE) + AOM_INTERP_EXTEND);
- x->mv_limits.row_max = (cm->mi_rows - mi_row) * MI_SIZE + AOM_INTERP_EXTEND;
- x->mv_limits.col_max = (cm->mi_cols - mi_col) * MI_SIZE + AOM_INTERP_EXTEND;
-
- set_plane_n4(xd, mi_width, mi_height, num_planes);
-
- // Set up distance of MB to edge of frame in 1/8th pel units.
- assert(!(mi_col & (mi_width - 1)) && !(mi_row & (mi_height - 1)));
- set_mi_row_col(xd, tile, mi_row, mi_height, mi_col, mi_width, cm->mi_rows,
- cm->mi_cols);
-
- // Set up source buffers.
- av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes);
-
- // R/D setup.
- x->rdmult = cpi->rd.RDMULT;
-
- // required by av1_append_sub8x8_mvs_for_idx() and av1_find_best_ref_mvs()
- xd->tile = *tile;
-}
-
-static void set_offsets(const AV1_COMP *const cpi, const TileInfo *const tile,
- MACROBLOCK *const x, int mi_row, int mi_col,
- BLOCK_SIZE bsize) {
- const AV1_COMMON *const cm = &cpi->common;
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *mbmi;
- const struct segmentation *const seg = &cm->seg;
-
- set_offsets_without_segment_id(cpi, tile, x, mi_row, mi_col, bsize);
-
- mbmi = xd->mi[0];
- xd->cfl.mi_row = mi_row;
- xd->cfl.mi_col = mi_col;
-
- mbmi->segment_id = 0;
-
- // Setup segment ID.
- if (seg->enabled) {
- if (seg->enabled && !cpi->vaq_refresh) {
- const uint8_t *const map =
- seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
- mbmi->segment_id =
- map ? get_segment_id(cm, map, bsize, mi_row, mi_col) : 0;
- }
- av1_init_plane_quantizers(cpi, x, mbmi->segment_id);
- }
-}
-
-static void reset_intmv_filter_type(MB_MODE_INFO *mbmi) {
- InterpFilter filters[2];
-
- for (int dir = 0; dir < 2; ++dir) {
- filters[dir] = av1_extract_interp_filter(mbmi->interp_filters, dir);
- }
- mbmi->interp_filters = av1_make_interp_filters(filters[0], filters[1]);
-}
-
-static void update_filter_type_count(uint8_t allow_update_cdf,
- FRAME_COUNTS *counts,
- const MACROBLOCKD *xd,
- const MB_MODE_INFO *mbmi) {
- int dir;
- for (dir = 0; dir < 2; ++dir) {
- const int ctx = av1_get_pred_context_switchable_interp(xd, dir);
- InterpFilter filter = av1_extract_interp_filter(mbmi->interp_filters, dir);
- ++counts->switchable_interp[ctx][filter];
- if (allow_update_cdf) {
- update_cdf(xd->tile_ctx->switchable_interp_cdf[ctx], filter,
- SWITCHABLE_FILTERS);
- }
- }
-}
-
-static void update_global_motion_used(PREDICTION_MODE mode, BLOCK_SIZE bsize,
- const MB_MODE_INFO *mbmi,
- RD_COUNTS *rdc) {
- if (mode == GLOBALMV || mode == GLOBAL_GLOBALMV) {
- const int num_4x4s = mi_size_wide[bsize] * mi_size_high[bsize];
- int ref;
- for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) {
- rdc->global_motion_used[mbmi->ref_frame[ref]] += num_4x4s;
- }
- }
-}
-
-static void reset_tx_size(MACROBLOCK *x, MB_MODE_INFO *mbmi,
- const TX_MODE tx_mode) {
- MACROBLOCKD *const xd = &x->e_mbd;
- if (xd->lossless[mbmi->segment_id]) {
- mbmi->tx_size = TX_4X4;
- } else if (tx_mode != TX_MODE_SELECT) {
- mbmi->tx_size = tx_size_from_tx_mode(mbmi->sb_type, tx_mode);
- } else {
- BLOCK_SIZE bsize = mbmi->sb_type;
- TX_SIZE min_tx_size = depth_to_tx_size(MAX_TX_DEPTH, bsize);
- mbmi->tx_size = (TX_SIZE)TXSIZEMAX(mbmi->tx_size, min_tx_size);
- }
- if (is_inter_block(mbmi)) {
- memset(mbmi->inter_tx_size, mbmi->tx_size, sizeof(mbmi->inter_tx_size));
- }
- memset(mbmi->txk_type, DCT_DCT, sizeof(mbmi->txk_type[0]) * TXK_TYPE_BUF_LEN);
- av1_zero(x->blk_skip);
- x->skip = 0;
-}
-
-static void update_state(const AV1_COMP *const cpi,
- const TileDataEnc *const tile_data, ThreadData *td,
- const PICK_MODE_CONTEXT *const ctx, int mi_row,
- int mi_col, BLOCK_SIZE bsize, RUN_TYPE dry_run) {
- int i, x_idx, y;
- const AV1_COMMON *const cm = &cpi->common;
- const int num_planes = av1_num_planes(cm);
- RD_COUNTS *const rdc = &td->rd_counts;
- MACROBLOCK *const x = &td->mb;
- MACROBLOCKD *const xd = &x->e_mbd;
- struct macroblock_plane *const p = x->plane;
- struct macroblockd_plane *const pd = xd->plane;
- const MB_MODE_INFO *const mi = &ctx->mic;
- MB_MODE_INFO *const mi_addr = xd->mi[0];
- const struct segmentation *const seg = &cm->seg;
- const int bw = mi_size_wide[mi->sb_type];
- const int bh = mi_size_high[mi->sb_type];
- const int mis = cm->mi_stride;
- const int mi_width = mi_size_wide[bsize];
- const int mi_height = mi_size_high[bsize];
-
- assert(mi->sb_type == bsize);
-
- *mi_addr = *mi;
- *x->mbmi_ext = ctx->mbmi_ext;
-
- reset_intmv_filter_type(mi_addr);
-
- memcpy(x->blk_skip, ctx->blk_skip, sizeof(x->blk_skip[0]) * ctx->num_4x4_blk);
-
- x->skip = ctx->skip;
-
- // If segmentation in use
- if (seg->enabled) {
- // For in frame complexity AQ copy the segment id from the segment map.
- if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) {
- const uint8_t *const map =
- seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
- mi_addr->segment_id =
- map ? get_segment_id(cm, map, bsize, mi_row, mi_col) : 0;
- reset_tx_size(x, mi_addr, cm->tx_mode);
- }
- // Else for cyclic refresh mode update the segment map, set the segment id
- // and then update the quantizer.
- if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
- av1_cyclic_refresh_update_segment(cpi, mi_addr, mi_row, mi_col, bsize,
- ctx->rate, ctx->dist, x->skip);
- reset_tx_size(x, mi_addr, cm->tx_mode);
- }
- if (mi_addr->uv_mode == UV_CFL_PRED && !is_cfl_allowed(xd))
- mi_addr->uv_mode = UV_DC_PRED;
- }
-
- for (i = 0; i < num_planes; ++i) {
- p[i].coeff = ctx->coeff[i];
- p[i].qcoeff = ctx->qcoeff[i];
- pd[i].dqcoeff = ctx->dqcoeff[i];
- p[i].eobs = ctx->eobs[i];
- p[i].txb_entropy_ctx = ctx->txb_entropy_ctx[i];
- }
- for (i = 0; i < 2; ++i) pd[i].color_index_map = ctx->color_index_map[i];
- // Restore the coding context of the MB to that that was in place
- // when the mode was picked for it
- for (y = 0; y < mi_height; y++)
- for (x_idx = 0; x_idx < mi_width; x_idx++)
- if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width > x_idx &&
- (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height > y) {
- xd->mi[x_idx + y * mis] = mi_addr;
- }
-
- if (cpi->oxcf.aq_mode) av1_init_plane_quantizers(cpi, x, mi_addr->segment_id);
-
- if (dry_run) return;
-
-#if CONFIG_INTERNAL_STATS
- {
- unsigned int *const mode_chosen_counts =
- (unsigned int *)cpi->mode_chosen_counts; // Cast const away.
- if (frame_is_intra_only(cm)) {
- static const int kf_mode_index[] = {
- THR_DC /*DC_PRED*/,
- THR_V_PRED /*V_PRED*/,
- THR_H_PRED /*H_PRED*/,
- THR_D45_PRED /*D45_PRED*/,
- THR_D135_PRED /*D135_PRED*/,
- THR_D113_PRED /*D113_PRED*/,
- THR_D157_PRED /*D157_PRED*/,
- THR_D203_PRED /*D203_PRED*/,
- THR_D67_PRED /*D67_PRED*/,
- THR_SMOOTH, /*SMOOTH_PRED*/
- THR_SMOOTH_V, /*SMOOTH_V_PRED*/
- THR_SMOOTH_H, /*SMOOTH_H_PRED*/
- THR_PAETH /*PAETH_PRED*/,
- };
- ++mode_chosen_counts[kf_mode_index[mi_addr->mode]];
- } else {
- // Note how often each mode chosen as best
- ++mode_chosen_counts[ctx->best_mode_index];
- }
- }
-#endif
- if (!frame_is_intra_only(cm)) {
- if (is_inter_block(mi_addr)) {
- // TODO(sarahparker): global motion stats need to be handled per-tile
- // to be compatible with tile-based threading.
- update_global_motion_used(mi_addr->mode, bsize, mi_addr, rdc);
- }
-
- if (cm->interp_filter == SWITCHABLE &&
- mi_addr->motion_mode != WARPED_CAUSAL &&
- !is_nontrans_global_motion(xd, xd->mi[0])) {
- update_filter_type_count(tile_data->allow_update_cdf, td->counts, xd,
- mi_addr);
- }
-
- rdc->comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff;
- rdc->comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff;
- rdc->comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff;
- }
-
- const int x_mis = AOMMIN(bw, cm->mi_cols - mi_col);
- const int y_mis = AOMMIN(bh, cm->mi_rows - mi_row);
- av1_copy_frame_mvs(cm, mi, mi_row, mi_col, x_mis, y_mis);
-}
-
-void av1_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src,
- int mi_row, int mi_col, const int num_planes) {
- // Set current frame pointer.
- x->e_mbd.cur_buf = src;
-
- // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
- // the static analysis warnings.
- for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); i++) {
- const int is_uv = i > 0;
- setup_pred_plane(&x->plane[i].src, x->e_mbd.mi[0]->sb_type, src->buffers[i],
- src->crop_widths[is_uv], src->crop_heights[is_uv],
- src->strides[is_uv], mi_row, mi_col, NULL,
- x->e_mbd.plane[i].subsampling_x,
- x->e_mbd.plane[i].subsampling_y);
- }
-}
-
-static int set_segment_rdmult(const AV1_COMP *const cpi, MACROBLOCK *const x,
- int8_t segment_id) {
- const AV1_COMMON *const cm = &cpi->common;
- av1_init_plane_quantizers(cpi, x, segment_id);
- aom_clear_system_state();
- int segment_qindex = av1_get_qindex(&cm->seg, segment_id, cm->base_qindex);
- return av1_compute_rd_mult(cpi, segment_qindex + cm->y_dc_delta_q);
-}
-
-static int set_deltaq_rdmult(const AV1_COMP *const cpi, MACROBLOCKD *const xd) {
- const AV1_COMMON *const cm = &cpi->common;
-
- return av1_compute_rd_mult(
- cpi, cm->base_qindex + xd->delta_qindex + cm->y_dc_delta_q);
-}
-
-static void rd_pick_sb_modes(AV1_COMP *const cpi, TileDataEnc *tile_data,
- MACROBLOCK *const x, int mi_row, int mi_col,
- RD_STATS *rd_cost, PARTITION_TYPE partition,
- BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
- int64_t best_rd) {
- AV1_COMMON *const cm = &cpi->common;
- const int num_planes = av1_num_planes(cm);
- TileInfo *const tile_info = &tile_data->tile_info;
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *mbmi;
- MB_MODE_INFO *ctx_mbmi = &ctx->mic;
- struct macroblock_plane *const p = x->plane;
- struct macroblockd_plane *const pd = xd->plane;
- const AQ_MODE aq_mode = cpi->oxcf.aq_mode;
- const DELTAQ_MODE deltaq_mode = cpi->oxcf.deltaq_mode;
- int i, orig_rdmult;
-
- if (best_rd < 0) {
- ctx->rdcost = INT64_MAX;
- ctx->skip = 0;
- av1_invalid_rd_stats(rd_cost);
- return;
- }
-
- aom_clear_system_state();
-
- set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
-
- mbmi = xd->mi[0];
-
- if (ctx->rd_mode_is_ready) {
- assert(ctx_mbmi->sb_type == bsize);
- assert(ctx_mbmi->partition == partition);
- *mbmi = *ctx_mbmi;
- rd_cost->rate = ctx->rate;
- rd_cost->dist = ctx->dist;
- rd_cost->rdcost = ctx->rdcost;
- } else {
- mbmi->sb_type = bsize;
- mbmi->partition = partition;
- }
-
-#if CONFIG_RD_DEBUG
- mbmi->mi_row = mi_row;
- mbmi->mi_col = mi_col;
-#endif
-
- for (i = 0; i < num_planes; ++i) {
- p[i].coeff = ctx->coeff[i];
- p[i].qcoeff = ctx->qcoeff[i];
- pd[i].dqcoeff = ctx->dqcoeff[i];
- p[i].eobs = ctx->eobs[i];
- p[i].txb_entropy_ctx = ctx->txb_entropy_ctx[i];
- }
-
- for (i = 0; i < 2; ++i) pd[i].color_index_map = ctx->color_index_map[i];
-
- if (!ctx->rd_mode_is_ready) {
- ctx->skippable = 0;
-
- // Set to zero to make sure we do not use the previous encoded frame stats
- mbmi->skip = 0;
-
- // Reset skip mode flag.
- mbmi->skip_mode = 0;
- }
-
- x->skip_chroma_rd =
- !is_chroma_reference(mi_row, mi_col, bsize, xd->plane[1].subsampling_x,
- xd->plane[1].subsampling_y);
-
- if (ctx->rd_mode_is_ready) {
- x->skip = ctx->skip;
- *x->mbmi_ext = ctx->mbmi_ext;
- return;
- }
-
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- x->source_variance = av1_high_get_sby_perpixel_variance(
- cpi, &x->plane[0].src, bsize, xd->bd);
- } else {
- x->source_variance =
- av1_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
- }
-
- // Save rdmult before it might be changed, so it can be restored later.
- orig_rdmult = x->rdmult;
-
- if (aq_mode == VARIANCE_AQ) {
- if (cpi->vaq_refresh) {
- const int energy = bsize <= BLOCK_16X16
- ? x->mb_energy
- : av1_log_block_var(cpi, x, bsize);
- mbmi->segment_id = energy;
- }
- x->rdmult = set_segment_rdmult(cpi, x, mbmi->segment_id);
- } else if (aq_mode == COMPLEXITY_AQ) {
- x->rdmult = set_segment_rdmult(cpi, x, mbmi->segment_id);
- } else if (aq_mode == CYCLIC_REFRESH_AQ) {
- // If segment is boosted, use rdmult for that segment.
- if (cyclic_refresh_segment_id_boosted(mbmi->segment_id))
- x->rdmult = av1_cyclic_refresh_get_rdmult(cpi->cyclic_refresh);
- }
-
- if (deltaq_mode > 0) x->rdmult = set_deltaq_rdmult(cpi, xd);
-
- // Find best coding mode & reconstruct the MB so it is available
- // as a predictor for MBs that follow in the SB
- if (frame_is_intra_only(cm)) {
- av1_rd_pick_intra_mode_sb(cpi, x, mi_row, mi_col, rd_cost, bsize, ctx,
- best_rd);
- } else {
- if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
- av1_rd_pick_inter_mode_sb_seg_skip(cpi, tile_data, x, mi_row, mi_col,
- rd_cost, bsize, ctx, best_rd);
- } else {
- av1_rd_pick_inter_mode_sb(cpi, tile_data, x, mi_row, mi_col, rd_cost,
- bsize, ctx, best_rd);
- }
- }
-
- // Examine the resulting rate and for AQ mode 2 make a segment choice.
- if ((rd_cost->rate != INT_MAX) && (aq_mode == COMPLEXITY_AQ) &&
- (bsize >= BLOCK_16X16) &&
- (cm->frame_type == KEY_FRAME || cpi->refresh_alt_ref_frame ||
- cpi->refresh_alt2_ref_frame ||
- (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref))) {
- av1_caq_select_segment(cpi, x, bsize, mi_row, mi_col, rd_cost->rate);
- }
-
- x->rdmult = orig_rdmult;
-
- // TODO(jingning) The rate-distortion optimization flow needs to be
- // refactored to provide proper exit/return handle.
- if (rd_cost->rate == INT_MAX) rd_cost->rdcost = INT64_MAX;
-
- ctx->rate = rd_cost->rate;
- ctx->dist = rd_cost->dist;
- ctx->rdcost = rd_cost->rdcost;
-}
-
-static void update_inter_mode_stats(FRAME_CONTEXT *fc, FRAME_COUNTS *counts,
- PREDICTION_MODE mode, int16_t mode_context,
- uint8_t allow_update_cdf) {
- (void)counts;
-
- int16_t mode_ctx = mode_context & NEWMV_CTX_MASK;
- if (mode == NEWMV) {
-#if CONFIG_ENTROPY_STATS
- ++counts->newmv_mode[mode_ctx][0];
-#endif
- if (allow_update_cdf) update_cdf(fc->newmv_cdf[mode_ctx], 0, 2);
- return;
- } else {
-#if CONFIG_ENTROPY_STATS
- ++counts->newmv_mode[mode_ctx][1];
-#endif
- if (allow_update_cdf) update_cdf(fc->newmv_cdf[mode_ctx], 1, 2);
-
- mode_ctx = (mode_context >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK;
- if (mode == GLOBALMV) {
-#if CONFIG_ENTROPY_STATS
- ++counts->zeromv_mode[mode_ctx][0];
-#endif
- if (allow_update_cdf) update_cdf(fc->zeromv_cdf[mode_ctx], 0, 2);
- return;
- } else {
-#if CONFIG_ENTROPY_STATS
- ++counts->zeromv_mode[mode_ctx][1];
-#endif
- if (allow_update_cdf) update_cdf(fc->zeromv_cdf[mode_ctx], 1, 2);
- mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK;
-#if CONFIG_ENTROPY_STATS
- ++counts->refmv_mode[mode_ctx][mode != NEARESTMV];
-#endif
- if (allow_update_cdf)
- update_cdf(fc->refmv_cdf[mode_ctx], mode != NEARESTMV, 2);
- }
- }
-}
-
-static void update_palette_cdf(MACROBLOCKD *xd, const MB_MODE_INFO *const mbmi,
- FRAME_COUNTS *counts, uint8_t allow_update_cdf) {
- FRAME_CONTEXT *fc = xd->tile_ctx;
- const BLOCK_SIZE bsize = mbmi->sb_type;
- const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
- const int palette_bsize_ctx = av1_get_palette_bsize_ctx(bsize);
-
- (void)counts;
-
- if (mbmi->mode == DC_PRED) {
- const int n = pmi->palette_size[0];
- const int palette_mode_ctx = av1_get_palette_mode_ctx(xd);
-
-#if CONFIG_ENTROPY_STATS
- ++counts->palette_y_mode[palette_bsize_ctx][palette_mode_ctx][n > 0];
-#endif
- if (allow_update_cdf)
- update_cdf(fc->palette_y_mode_cdf[palette_bsize_ctx][palette_mode_ctx],
- n > 0, 2);
- if (n > 0) {
-#if CONFIG_ENTROPY_STATS
- ++counts->palette_y_size[palette_bsize_ctx][n - PALETTE_MIN_SIZE];
-#endif
- if (allow_update_cdf) {
- update_cdf(fc->palette_y_size_cdf[palette_bsize_ctx],
- n - PALETTE_MIN_SIZE, PALETTE_SIZES);
- }
- }
- }
-
- if (mbmi->uv_mode == UV_DC_PRED) {
- const int n = pmi->palette_size[1];
- const int palette_uv_mode_ctx = (pmi->palette_size[0] > 0);
-
-#if CONFIG_ENTROPY_STATS
- ++counts->palette_uv_mode[palette_uv_mode_ctx][n > 0];
-#endif
- if (allow_update_cdf)
- update_cdf(fc->palette_uv_mode_cdf[palette_uv_mode_ctx], n > 0, 2);
-
- if (n > 0) {
-#if CONFIG_ENTROPY_STATS
- ++counts->palette_uv_size[palette_bsize_ctx][n - PALETTE_MIN_SIZE];
-#endif
- if (allow_update_cdf) {
- update_cdf(fc->palette_uv_size_cdf[palette_bsize_ctx],
- n - PALETTE_MIN_SIZE, PALETTE_SIZES);
- }
- }
- }
-}
-
-static void sum_intra_stats(const AV1_COMMON *const cm, FRAME_COUNTS *counts,
- MACROBLOCKD *xd, const MB_MODE_INFO *const mbmi,
- const MB_MODE_INFO *above_mi,
- const MB_MODE_INFO *left_mi, const int intraonly,
- const int mi_row, const int mi_col,
- uint8_t allow_update_cdf) {
- FRAME_CONTEXT *fc = xd->tile_ctx;
- const PREDICTION_MODE y_mode = mbmi->mode;
- const UV_PREDICTION_MODE uv_mode = mbmi->uv_mode;
- (void)counts;
- const BLOCK_SIZE bsize = mbmi->sb_type;
-
- if (intraonly) {
-#if CONFIG_ENTROPY_STATS
- const PREDICTION_MODE above = av1_above_block_mode(above_mi);
- const PREDICTION_MODE left = av1_left_block_mode(left_mi);
- const int above_ctx = intra_mode_context[above];
- const int left_ctx = intra_mode_context[left];
- ++counts->kf_y_mode[above_ctx][left_ctx][y_mode];
-#endif // CONFIG_ENTROPY_STATS
- if (allow_update_cdf)
- update_cdf(get_y_mode_cdf(fc, above_mi, left_mi), y_mode, INTRA_MODES);
- } else {
-#if CONFIG_ENTROPY_STATS
- ++counts->y_mode[size_group_lookup[bsize]][y_mode];
-#endif // CONFIG_ENTROPY_STATS
- if (allow_update_cdf)
- update_cdf(fc->y_mode_cdf[size_group_lookup[bsize]], y_mode, INTRA_MODES);
- }
-
- if (av1_filter_intra_allowed(cm, mbmi)) {
- const int use_filter_intra_mode =
- mbmi->filter_intra_mode_info.use_filter_intra;
-#if CONFIG_ENTROPY_STATS
- ++counts->filter_intra[mbmi->sb_type][use_filter_intra_mode];
- if (use_filter_intra_mode) {
- ++counts
- ->filter_intra_mode[mbmi->filter_intra_mode_info.filter_intra_mode];
- }
-#endif // CONFIG_ENTROPY_STATS
- if (allow_update_cdf) {
- update_cdf(fc->filter_intra_cdfs[mbmi->sb_type], use_filter_intra_mode,
- 2);
- if (use_filter_intra_mode) {
- update_cdf(fc->filter_intra_mode_cdf,
- mbmi->filter_intra_mode_info.filter_intra_mode,
- FILTER_INTRA_MODES);
- }
- }
- }
- if (av1_is_directional_mode(mbmi->mode) && av1_use_angle_delta(bsize)) {
-#if CONFIG_ENTROPY_STATS
- ++counts->angle_delta[mbmi->mode - V_PRED]
- [mbmi->angle_delta[PLANE_TYPE_Y] + MAX_ANGLE_DELTA];
-#endif
- if (allow_update_cdf) {
- update_cdf(fc->angle_delta_cdf[mbmi->mode - V_PRED],
- mbmi->angle_delta[PLANE_TYPE_Y] + MAX_ANGLE_DELTA,
- 2 * MAX_ANGLE_DELTA + 1);
- }
- }
-
- if (!is_chroma_reference(mi_row, mi_col, bsize,
- xd->plane[AOM_PLANE_U].subsampling_x,
- xd->plane[AOM_PLANE_U].subsampling_y))
- return;
-
-#if CONFIG_ENTROPY_STATS
- ++counts->uv_mode[is_cfl_allowed(xd)][y_mode][uv_mode];
-#endif // CONFIG_ENTROPY_STATS
- if (allow_update_cdf) {
- const CFL_ALLOWED_TYPE cfl_allowed = is_cfl_allowed(xd);
- update_cdf(fc->uv_mode_cdf[cfl_allowed][y_mode], uv_mode,
- UV_INTRA_MODES - !cfl_allowed);
- }
- if (uv_mode == UV_CFL_PRED) {
- const int joint_sign = mbmi->cfl_alpha_signs;
- const int idx = mbmi->cfl_alpha_idx;
-
-#if CONFIG_ENTROPY_STATS
- ++counts->cfl_sign[joint_sign];
-#endif
- if (allow_update_cdf)
- update_cdf(fc->cfl_sign_cdf, joint_sign, CFL_JOINT_SIGNS);
- if (CFL_SIGN_U(joint_sign) != CFL_SIGN_ZERO) {
- aom_cdf_prob *cdf_u = fc->cfl_alpha_cdf[CFL_CONTEXT_U(joint_sign)];
-
-#if CONFIG_ENTROPY_STATS
- ++counts->cfl_alpha[CFL_CONTEXT_U(joint_sign)][CFL_IDX_U(idx)];
-#endif
- if (allow_update_cdf)
- update_cdf(cdf_u, CFL_IDX_U(idx), CFL_ALPHABET_SIZE);
- }
- if (CFL_SIGN_V(joint_sign) != CFL_SIGN_ZERO) {
- aom_cdf_prob *cdf_v = fc->cfl_alpha_cdf[CFL_CONTEXT_V(joint_sign)];
-
-#if CONFIG_ENTROPY_STATS
- ++counts->cfl_alpha[CFL_CONTEXT_V(joint_sign)][CFL_IDX_V(idx)];
-#endif
- if (allow_update_cdf)
- update_cdf(cdf_v, CFL_IDX_V(idx), CFL_ALPHABET_SIZE);
- }
- }
- if (av1_is_directional_mode(get_uv_mode(uv_mode)) &&
- av1_use_angle_delta(bsize)) {
-#if CONFIG_ENTROPY_STATS
- ++counts->angle_delta[uv_mode - UV_V_PRED]
- [mbmi->angle_delta[PLANE_TYPE_UV] + MAX_ANGLE_DELTA];
-#endif
- if (allow_update_cdf) {
- update_cdf(fc->angle_delta_cdf[uv_mode - UV_V_PRED],
- mbmi->angle_delta[PLANE_TYPE_UV] + MAX_ANGLE_DELTA,
- 2 * MAX_ANGLE_DELTA + 1);
- }
- }
- if (av1_allow_palette(cm->allow_screen_content_tools, bsize))
- update_palette_cdf(xd, mbmi, counts, allow_update_cdf);
-}
-
-static void update_stats(const AV1_COMMON *const cm, TileDataEnc *tile_data,
- ThreadData *td, int mi_row, int mi_col) {
- MACROBLOCK *x = &td->mb;
- MACROBLOCKD *const xd = &x->e_mbd;
- const MB_MODE_INFO *const mbmi = xd->mi[0];
- const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
- const BLOCK_SIZE bsize = mbmi->sb_type;
- FRAME_CONTEXT *fc = xd->tile_ctx;
- const uint8_t allow_update_cdf = tile_data->allow_update_cdf;
-
- // delta quant applies to both intra and inter
- const int super_block_upper_left =
- ((mi_row & (cm->seq_params.mib_size - 1)) == 0) &&
- ((mi_col & (cm->seq_params.mib_size - 1)) == 0);
-
- const int seg_ref_active =
- segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_REF_FRAME);
-
- if (cm->skip_mode_flag && !seg_ref_active && is_comp_ref_allowed(bsize)) {
- const int skip_mode_ctx = av1_get_skip_mode_context(xd);
-#if CONFIG_ENTROPY_STATS
- td->counts->skip_mode[skip_mode_ctx][mbmi->skip_mode]++;
-#endif
- if (allow_update_cdf)
- update_cdf(fc->skip_mode_cdfs[skip_mode_ctx], mbmi->skip_mode, 2);
- }
-
- if (!mbmi->skip_mode) {
- if (!seg_ref_active) {
- const int skip_ctx = av1_get_skip_context(xd);
-#if CONFIG_ENTROPY_STATS
- td->counts->skip[skip_ctx][mbmi->skip]++;
-#endif
- if (allow_update_cdf) update_cdf(fc->skip_cdfs[skip_ctx], mbmi->skip, 2);
- }
- }
-
- if (cm->delta_q_present_flag &&
- (bsize != cm->seq_params.sb_size || !mbmi->skip) &&
- super_block_upper_left) {
-#if CONFIG_ENTROPY_STATS
- const int dq =
- (mbmi->current_qindex - xd->current_qindex) / cm->delta_q_res;
- const int absdq = abs(dq);
- for (int i = 0; i < AOMMIN(absdq, DELTA_Q_SMALL); ++i) {
- td->counts->delta_q[i][1]++;
- }
- if (absdq < DELTA_Q_SMALL) td->counts->delta_q[absdq][0]++;
-#endif
- xd->current_qindex = mbmi->current_qindex;
- if (cm->delta_lf_present_flag) {
- if (cm->delta_lf_multi) {
- const int frame_lf_count =
- av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
- for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) {
-#if CONFIG_ENTROPY_STATS
- const int delta_lf =
- (mbmi->delta_lf[lf_id] - xd->delta_lf[lf_id]) / cm->delta_lf_res;
- const int abs_delta_lf = abs(delta_lf);
- for (int i = 0; i < AOMMIN(abs_delta_lf, DELTA_LF_SMALL); ++i) {
- td->counts->delta_lf_multi[lf_id][i][1]++;
- }
- if (abs_delta_lf < DELTA_LF_SMALL)
- td->counts->delta_lf_multi[lf_id][abs_delta_lf][0]++;
-#endif
- xd->delta_lf[lf_id] = mbmi->delta_lf[lf_id];
- }
- } else {
-#if CONFIG_ENTROPY_STATS
- const int delta_lf =
- (mbmi->delta_lf_from_base - xd->delta_lf_from_base) /
- cm->delta_lf_res;
- const int abs_delta_lf = abs(delta_lf);
- for (int i = 0; i < AOMMIN(abs_delta_lf, DELTA_LF_SMALL); ++i) {
- td->counts->delta_lf[i][1]++;
- }
- if (abs_delta_lf < DELTA_LF_SMALL)
- td->counts->delta_lf[abs_delta_lf][0]++;
-#endif
- xd->delta_lf_from_base = mbmi->delta_lf_from_base;
- }
- }
- }
-
- if (!is_inter_block(mbmi)) {
- sum_intra_stats(cm, td->counts, xd, mbmi, xd->above_mbmi, xd->left_mbmi,
- frame_is_intra_only(cm), mi_row, mi_col,
- tile_data->allow_update_cdf);
- }
-
- if (av1_allow_intrabc(cm)) {
- if (allow_update_cdf)
- update_cdf(fc->intrabc_cdf, is_intrabc_block(mbmi), 2);
-#if CONFIG_ENTROPY_STATS
- ++td->counts->intrabc[is_intrabc_block(mbmi)];
-#endif // CONFIG_ENTROPY_STATS
- }
-
- if (!frame_is_intra_only(cm)) {
- RD_COUNTS *rdc = &td->rd_counts;
-
- FRAME_COUNTS *const counts = td->counts;
-
- if (mbmi->skip_mode) {
- rdc->skip_mode_used_flag = 1;
- if (cm->reference_mode == REFERENCE_MODE_SELECT) {
- assert(has_second_ref(mbmi));
- rdc->compound_ref_used_flag = 1;
- }
- set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
- return;
- }
-
- const int inter_block = is_inter_block(mbmi);
-
- if (!seg_ref_active) {
-#if CONFIG_ENTROPY_STATS
- counts->intra_inter[av1_get_intra_inter_context(xd)][inter_block]++;
-#endif
- if (allow_update_cdf) {
- update_cdf(fc->intra_inter_cdf[av1_get_intra_inter_context(xd)],
- inter_block, 2);
- }
- // If the segment reference feature is enabled we have only a single
- // reference frame allowed for the segment so exclude it from
- // the reference frame counts used to work out probabilities.
- if (inter_block) {
- const MV_REFERENCE_FRAME ref0 = mbmi->ref_frame[0];
- const MV_REFERENCE_FRAME ref1 = mbmi->ref_frame[1];
-
- av1_collect_neighbors_ref_counts(xd);
-
- if (cm->reference_mode == REFERENCE_MODE_SELECT) {
- if (has_second_ref(mbmi))
- // This flag is also updated for 4x4 blocks
- rdc->compound_ref_used_flag = 1;
- if (is_comp_ref_allowed(bsize)) {
-#if CONFIG_ENTROPY_STATS
- counts->comp_inter[av1_get_reference_mode_context(xd)]
- [has_second_ref(mbmi)]++;
-#endif // CONFIG_ENTROPY_STATS
- if (allow_update_cdf) {
- update_cdf(av1_get_reference_mode_cdf(xd), has_second_ref(mbmi),
- 2);
- }
- }
- }
-
- if (has_second_ref(mbmi)) {
- const COMP_REFERENCE_TYPE comp_ref_type = has_uni_comp_refs(mbmi)
- ? UNIDIR_COMP_REFERENCE
- : BIDIR_COMP_REFERENCE;
- if (allow_update_cdf) {
- update_cdf(av1_get_comp_reference_type_cdf(xd), comp_ref_type,
- COMP_REFERENCE_TYPES);
- }
-#if CONFIG_ENTROPY_STATS
- counts->comp_ref_type[av1_get_comp_reference_type_context(xd)]
- [comp_ref_type]++;
-#endif // CONFIG_ENTROPY_STATS
-
- if (comp_ref_type == UNIDIR_COMP_REFERENCE) {
- const int bit = (ref0 == BWDREF_FRAME);
- if (allow_update_cdf)
- update_cdf(av1_get_pred_cdf_uni_comp_ref_p(xd), bit, 2);
-#if CONFIG_ENTROPY_STATS
- counts->uni_comp_ref[av1_get_pred_context_uni_comp_ref_p(xd)][0]
- [bit]++;
-#endif // CONFIG_ENTROPY_STATS
- if (!bit) {
- const int bit1 = (ref1 == LAST3_FRAME || ref1 == GOLDEN_FRAME);
- if (allow_update_cdf)
- update_cdf(av1_get_pred_cdf_uni_comp_ref_p1(xd), bit1, 2);
-#if CONFIG_ENTROPY_STATS
- counts->uni_comp_ref[av1_get_pred_context_uni_comp_ref_p1(xd)][1]
- [bit1]++;
-#endif // CONFIG_ENTROPY_STATS
- if (bit1) {
- if (allow_update_cdf) {
- update_cdf(av1_get_pred_cdf_uni_comp_ref_p2(xd),
- ref1 == GOLDEN_FRAME, 2);
- }
-#if CONFIG_ENTROPY_STATS
- counts->uni_comp_ref[av1_get_pred_context_uni_comp_ref_p2(xd)]
- [2][ref1 == GOLDEN_FRAME]++;
-#endif // CONFIG_ENTROPY_STATS
- }
- }
- } else {
- const int bit = (ref0 == GOLDEN_FRAME || ref0 == LAST3_FRAME);
- if (allow_update_cdf)
- update_cdf(av1_get_pred_cdf_comp_ref_p(xd), bit, 2);
-#if CONFIG_ENTROPY_STATS
- counts->comp_ref[av1_get_pred_context_comp_ref_p(xd)][0][bit]++;
-#endif // CONFIG_ENTROPY_STATS
- if (!bit) {
- if (allow_update_cdf) {
- update_cdf(av1_get_pred_cdf_comp_ref_p1(xd),
- ref0 == LAST2_FRAME, 2);
- }
-#if CONFIG_ENTROPY_STATS
- counts->comp_ref[av1_get_pred_context_comp_ref_p1(xd)][1]
- [ref0 == LAST2_FRAME]++;
-#endif // CONFIG_ENTROPY_STATS
- } else {
- if (allow_update_cdf) {
- update_cdf(av1_get_pred_cdf_comp_ref_p2(xd),
- ref0 == GOLDEN_FRAME, 2);
- }
-#if CONFIG_ENTROPY_STATS
- counts->comp_ref[av1_get_pred_context_comp_ref_p2(xd)][2]
- [ref0 == GOLDEN_FRAME]++;
-#endif // CONFIG_ENTROPY_STATS
- }
- if (allow_update_cdf) {
- update_cdf(av1_get_pred_cdf_comp_bwdref_p(xd),
- ref1 == ALTREF_FRAME, 2);
- }
-#if CONFIG_ENTROPY_STATS
- counts->comp_bwdref[av1_get_pred_context_comp_bwdref_p(xd)][0]
- [ref1 == ALTREF_FRAME]++;
-#endif // CONFIG_ENTROPY_STATS
- if (ref1 != ALTREF_FRAME) {
- if (allow_update_cdf) {
- update_cdf(av1_get_pred_cdf_comp_bwdref_p1(xd),
- ref1 == ALTREF2_FRAME, 2);
- }
-#if CONFIG_ENTROPY_STATS
- counts->comp_bwdref[av1_get_pred_context_comp_bwdref_p1(xd)][1]
- [ref1 == ALTREF2_FRAME]++;
-#endif // CONFIG_ENTROPY_STATS
- }
- }
- } else {
- const int bit = (ref0 >= BWDREF_FRAME);
- if (allow_update_cdf)
- update_cdf(av1_get_pred_cdf_single_ref_p1(xd), bit, 2);
-#if CONFIG_ENTROPY_STATS
- counts->single_ref[av1_get_pred_context_single_ref_p1(xd)][0][bit]++;
-#endif // CONFIG_ENTROPY_STATS
- if (bit) {
- assert(ref0 <= ALTREF_FRAME);
- if (allow_update_cdf) {
- update_cdf(av1_get_pred_cdf_single_ref_p2(xd),
- ref0 == ALTREF_FRAME, 2);
- }
-#if CONFIG_ENTROPY_STATS
- counts->single_ref[av1_get_pred_context_single_ref_p2(xd)][1]
- [ref0 == ALTREF_FRAME]++;
-#endif // CONFIG_ENTROPY_STATS
- if (ref0 != ALTREF_FRAME) {
- if (allow_update_cdf) {
- update_cdf(av1_get_pred_cdf_single_ref_p6(xd),
- ref0 == ALTREF2_FRAME, 2);
- }
-#if CONFIG_ENTROPY_STATS
- counts->single_ref[av1_get_pred_context_single_ref_p6(xd)][5]
- [ref0 == ALTREF2_FRAME]++;
-#endif // CONFIG_ENTROPY_STATS
- }
- } else {
- const int bit1 = !(ref0 == LAST2_FRAME || ref0 == LAST_FRAME);
- if (allow_update_cdf)
- update_cdf(av1_get_pred_cdf_single_ref_p3(xd), bit1, 2);
-#if CONFIG_ENTROPY_STATS
- counts
- ->single_ref[av1_get_pred_context_single_ref_p3(xd)][2][bit1]++;
-#endif // CONFIG_ENTROPY_STATS
- if (!bit1) {
- if (allow_update_cdf) {
- update_cdf(av1_get_pred_cdf_single_ref_p4(xd),
- ref0 != LAST_FRAME, 2);
- }
-#if CONFIG_ENTROPY_STATS
- counts->single_ref[av1_get_pred_context_single_ref_p4(xd)][3]
- [ref0 != LAST_FRAME]++;
-#endif // CONFIG_ENTROPY_STATS
- } else {
- if (allow_update_cdf) {
- update_cdf(av1_get_pred_cdf_single_ref_p5(xd),
- ref0 != LAST3_FRAME, 2);
- }
-#if CONFIG_ENTROPY_STATS
- counts->single_ref[av1_get_pred_context_single_ref_p5(xd)][4]
- [ref0 != LAST3_FRAME]++;
-#endif // CONFIG_ENTROPY_STATS
- }
- }
- }
-
- if (cm->seq_params.enable_interintra_compound &&
- is_interintra_allowed(mbmi)) {
- const int bsize_group = size_group_lookup[bsize];
- if (mbmi->ref_frame[1] == INTRA_FRAME) {
-#if CONFIG_ENTROPY_STATS
- counts->interintra[bsize_group][1]++;
-#endif
- if (allow_update_cdf)
- update_cdf(fc->interintra_cdf[bsize_group], 1, 2);
-#if CONFIG_ENTROPY_STATS
- counts->interintra_mode[bsize_group][mbmi->interintra_mode]++;
-#endif
- if (allow_update_cdf) {
- update_cdf(fc->interintra_mode_cdf[bsize_group],
- mbmi->interintra_mode, INTERINTRA_MODES);
- }
- if (is_interintra_wedge_used(bsize)) {
-#if CONFIG_ENTROPY_STATS
- counts->wedge_interintra[bsize][mbmi->use_wedge_interintra]++;
-#endif
- if (allow_update_cdf) {
- update_cdf(fc->wedge_interintra_cdf[bsize],
- mbmi->use_wedge_interintra, 2);
- }
- if (mbmi->use_wedge_interintra) {
-#if CONFIG_ENTROPY_STATS
- counts->wedge_idx[bsize][mbmi->interintra_wedge_index]++;
-#endif
- if (allow_update_cdf) {
- update_cdf(fc->wedge_idx_cdf[bsize],
- mbmi->interintra_wedge_index, 16);
- }
- }
- }
- } else {
-#if CONFIG_ENTROPY_STATS
- counts->interintra[bsize_group][0]++;
-#endif
- if (allow_update_cdf)
- update_cdf(fc->interintra_cdf[bsize_group], 0, 2);
- }
- }
-
- set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
- const MOTION_MODE motion_allowed =
- cm->switchable_motion_mode
- ? motion_mode_allowed(xd->global_motion, xd, mbmi,
- cm->allow_warped_motion)
- : SIMPLE_TRANSLATION;
- if (mbmi->ref_frame[1] != INTRA_FRAME) {
- if (motion_allowed == WARPED_CAUSAL) {
-#if CONFIG_ENTROPY_STATS
- counts->motion_mode[bsize][mbmi->motion_mode]++;
-#endif
- if (allow_update_cdf) {
- update_cdf(fc->motion_mode_cdf[bsize], mbmi->motion_mode,
- MOTION_MODES);
- }
- } else if (motion_allowed == OBMC_CAUSAL) {
-#if CONFIG_ENTROPY_STATS
- counts->obmc[bsize][mbmi->motion_mode == OBMC_CAUSAL]++;
-#endif
- if (allow_update_cdf) {
- update_cdf(fc->obmc_cdf[bsize], mbmi->motion_mode == OBMC_CAUSAL,
- 2);
- }
- }
- }
-
- if (has_second_ref(mbmi)) {
- assert(cm->reference_mode != SINGLE_REFERENCE &&
- is_inter_compound_mode(mbmi->mode) &&
- mbmi->motion_mode == SIMPLE_TRANSLATION);
-
- const int masked_compound_used =
- is_any_masked_compound_used(bsize) &&
- cm->seq_params.enable_masked_compound;
- if (masked_compound_used) {
- const int comp_group_idx_ctx = get_comp_group_idx_context(xd);
-#if CONFIG_ENTROPY_STATS
- ++counts->comp_group_idx[comp_group_idx_ctx][mbmi->comp_group_idx];
-#endif
- if (allow_update_cdf) {
- update_cdf(fc->comp_group_idx_cdf[comp_group_idx_ctx],
- mbmi->comp_group_idx, 2);
- }
- }
-
- if (mbmi->comp_group_idx == 0) {
- const int comp_index_ctx = get_comp_index_context(cm, xd);
-#if CONFIG_ENTROPY_STATS
- ++counts->compound_index[comp_index_ctx][mbmi->compound_idx];
-#endif
- if (allow_update_cdf) {
- update_cdf(fc->compound_index_cdf[comp_index_ctx],
- mbmi->compound_idx, 2);
- }
- } else {
- assert(masked_compound_used);
- if (is_interinter_compound_used(COMPOUND_WEDGE, bsize)) {
-#if CONFIG_ENTROPY_STATS
- ++counts->compound_type[bsize][mbmi->interinter_comp.type - 1];
-#endif
- if (allow_update_cdf) {
- update_cdf(fc->compound_type_cdf[bsize],
- mbmi->interinter_comp.type - 1, COMPOUND_TYPES - 1);
- }
- }
- }
- }
- if (mbmi->interinter_comp.type == COMPOUND_WEDGE) {
- if (is_interinter_compound_used(COMPOUND_WEDGE, bsize)) {
-#if CONFIG_ENTROPY_STATS
- counts->wedge_idx[bsize][mbmi->interinter_comp.wedge_index]++;
-#endif
- if (allow_update_cdf) {
- update_cdf(fc->wedge_idx_cdf[bsize],
- mbmi->interinter_comp.wedge_index, 16);
- }
- }
- }
- }
- }
-
- if (inter_block &&
- !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
- int16_t mode_ctx;
- const PREDICTION_MODE mode = mbmi->mode;
-
- mode_ctx =
- av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
- if (has_second_ref(mbmi)) {
-#if CONFIG_ENTROPY_STATS
- ++counts->inter_compound_mode[mode_ctx][INTER_COMPOUND_OFFSET(mode)];
-#endif
- if (allow_update_cdf)
- update_cdf(fc->inter_compound_mode_cdf[mode_ctx],
- INTER_COMPOUND_OFFSET(mode), INTER_COMPOUND_MODES);
- } else {
- update_inter_mode_stats(fc, counts, mode, mode_ctx, allow_update_cdf);
- }
-
- int mode_allowed = (mbmi->mode == NEWMV);
- mode_allowed |= (mbmi->mode == NEW_NEWMV);
- if (mode_allowed) {
- uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
- int idx;
-
- for (idx = 0; idx < 2; ++idx) {
- if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
-#if CONFIG_ENTROPY_STATS
- uint8_t drl_ctx =
- av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx);
- ++counts->drl_mode[drl_ctx][mbmi->ref_mv_idx != idx];
-#endif
-
- if (mbmi->ref_mv_idx == idx) break;
- }
- }
- }
-
- if (have_nearmv_in_inter_mode(mbmi->mode)) {
- uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
- int idx;
-
- for (idx = 1; idx < 3; ++idx) {
- if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
-#if CONFIG_ENTROPY_STATS
- uint8_t drl_ctx =
- av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx);
- ++counts->drl_mode[drl_ctx][mbmi->ref_mv_idx != idx - 1];
-#endif
-
- if (mbmi->ref_mv_idx == idx - 1) break;
- }
- }
- }
- }
- }
-}
-
-typedef struct {
- ENTROPY_CONTEXT a[MAX_MIB_SIZE * MAX_MB_PLANE];
- ENTROPY_CONTEXT l[MAX_MIB_SIZE * MAX_MB_PLANE];
- PARTITION_CONTEXT sa[MAX_MIB_SIZE];
- PARTITION_CONTEXT sl[MAX_MIB_SIZE];
- TXFM_CONTEXT *p_ta;
- TXFM_CONTEXT *p_tl;
- TXFM_CONTEXT ta[MAX_MIB_SIZE];
- TXFM_CONTEXT tl[MAX_MIB_SIZE];
-} RD_SEARCH_MACROBLOCK_CONTEXT;
-
-static void restore_context(MACROBLOCK *x,
- const RD_SEARCH_MACROBLOCK_CONTEXT *ctx, int mi_row,
- int mi_col, BLOCK_SIZE bsize,
- const int num_planes) {
- MACROBLOCKD *xd = &x->e_mbd;
- int p;
- const int num_4x4_blocks_wide =
- block_size_wide[bsize] >> tx_size_wide_log2[0];
- const int num_4x4_blocks_high =
- block_size_high[bsize] >> tx_size_high_log2[0];
- int mi_width = mi_size_wide[bsize];
- int mi_height = mi_size_high[bsize];
- for (p = 0; p < num_planes; p++) {
- int tx_col = mi_col;
- int tx_row = mi_row & MAX_MIB_MASK;
- memcpy(xd->above_context[p] + (tx_col >> xd->plane[p].subsampling_x),
- ctx->a + num_4x4_blocks_wide * p,
- (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >>
- xd->plane[p].subsampling_x);
- memcpy(xd->left_context[p] + (tx_row >> xd->plane[p].subsampling_y),
- ctx->l + num_4x4_blocks_high * p,
- (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >>
- xd->plane[p].subsampling_y);
- }
- memcpy(xd->above_seg_context + mi_col, ctx->sa,
- sizeof(*xd->above_seg_context) * mi_width);
- memcpy(xd->left_seg_context + (mi_row & MAX_MIB_MASK), ctx->sl,
- sizeof(xd->left_seg_context[0]) * mi_height);
- xd->above_txfm_context = ctx->p_ta;
- xd->left_txfm_context = ctx->p_tl;
- memcpy(xd->above_txfm_context, ctx->ta,
- sizeof(*xd->above_txfm_context) * mi_width);
- memcpy(xd->left_txfm_context, ctx->tl,
- sizeof(*xd->left_txfm_context) * mi_height);
-}
-
-static void save_context(const MACROBLOCK *x, RD_SEARCH_MACROBLOCK_CONTEXT *ctx,
- int mi_row, int mi_col, BLOCK_SIZE bsize,
- const int num_planes) {
- const MACROBLOCKD *xd = &x->e_mbd;
- int p;
- const int num_4x4_blocks_wide =
- block_size_wide[bsize] >> tx_size_wide_log2[0];
- const int num_4x4_blocks_high =
- block_size_high[bsize] >> tx_size_high_log2[0];
- int mi_width = mi_size_wide[bsize];
- int mi_height = mi_size_high[bsize];
-
- // buffer the above/left context information of the block in search.
- for (p = 0; p < num_planes; ++p) {
- int tx_col = mi_col;
- int tx_row = mi_row & MAX_MIB_MASK;
- memcpy(ctx->a + num_4x4_blocks_wide * p,
- xd->above_context[p] + (tx_col >> xd->plane[p].subsampling_x),
- (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >>
- xd->plane[p].subsampling_x);
- memcpy(ctx->l + num_4x4_blocks_high * p,
- xd->left_context[p] + (tx_row >> xd->plane[p].subsampling_y),
- (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >>
- xd->plane[p].subsampling_y);
- }
- memcpy(ctx->sa, xd->above_seg_context + mi_col,
- sizeof(*xd->above_seg_context) * mi_width);
- memcpy(ctx->sl, xd->left_seg_context + (mi_row & MAX_MIB_MASK),
- sizeof(xd->left_seg_context[0]) * mi_height);
- memcpy(ctx->ta, xd->above_txfm_context,
- sizeof(*xd->above_txfm_context) * mi_width);
- memcpy(ctx->tl, xd->left_txfm_context,
- sizeof(*xd->left_txfm_context) * mi_height);
- ctx->p_ta = xd->above_txfm_context;
- ctx->p_tl = xd->left_txfm_context;
-}
-
-static void encode_b(const AV1_COMP *const cpi, TileDataEnc *tile_data,
- ThreadData *td, TOKENEXTRA **tp, int mi_row, int mi_col,
- RUN_TYPE dry_run, BLOCK_SIZE bsize,
- PARTITION_TYPE partition,
- const PICK_MODE_CONTEXT *const ctx, int *rate) {
- TileInfo *const tile = &tile_data->tile_info;
- MACROBLOCK *const x = &td->mb;
- MACROBLOCKD *xd = &x->e_mbd;
-
- set_offsets(cpi, tile, x, mi_row, mi_col, bsize);
- MB_MODE_INFO *mbmi = xd->mi[0];
- mbmi->partition = partition;
- update_state(cpi, tile_data, td, ctx, mi_row, mi_col, bsize, dry_run);
-
- if (!dry_run) av1_set_coeff_buffer(cpi, x, mi_row, mi_col);
-
- encode_superblock(cpi, tile_data, td, tp, dry_run, mi_row, mi_col, bsize,
- rate);
-
- if (dry_run == 0)
- x->cb_offset += block_size_wide[bsize] * block_size_high[bsize];
-
- if (!dry_run) {
- if (bsize == cpi->common.seq_params.sb_size && mbmi->skip == 1 &&
- cpi->common.delta_lf_present_flag) {
- const int frame_lf_count = av1_num_planes(&cpi->common) > 1
- ? FRAME_LF_COUNT
- : FRAME_LF_COUNT - 2;
- for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id)
- mbmi->delta_lf[lf_id] = xd->delta_lf[lf_id];
- mbmi->delta_lf_from_base = xd->delta_lf_from_base;
- }
- if (has_second_ref(mbmi)) {
- if (mbmi->compound_idx == 0 ||
- mbmi->interinter_comp.type == COMPOUND_AVERAGE)
- mbmi->comp_group_idx = 0;
- else
- mbmi->comp_group_idx = 1;
- }
- update_stats(&cpi->common, tile_data, td, mi_row, mi_col);
- }
-}
-
-static void encode_sb(const AV1_COMP *const cpi, ThreadData *td,
- TileDataEnc *tile_data, TOKENEXTRA **tp, int mi_row,
- int mi_col, RUN_TYPE dry_run, BLOCK_SIZE bsize,
- PC_TREE *pc_tree, int *rate) {
- const AV1_COMMON *const cm = &cpi->common;
- MACROBLOCK *const x = &td->mb;
- MACROBLOCKD *const xd = &x->e_mbd;
- const int hbs = mi_size_wide[bsize] / 2;
- const int is_partition_root = bsize >= BLOCK_8X8;
- const int ctx = is_partition_root
- ? partition_plane_context(xd, mi_row, mi_col, bsize)
- : -1;
- const PARTITION_TYPE partition = pc_tree->partitioning;
- const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition);
- int quarter_step = mi_size_wide[bsize] / 4;
- int i;
- BLOCK_SIZE bsize2 = get_partition_subsize(bsize, PARTITION_SPLIT);
-
- if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
-
- if (!dry_run && ctx >= 0) {
- const int has_rows = (mi_row + hbs) < cm->mi_rows;
- const int has_cols = (mi_col + hbs) < cm->mi_cols;
-
- if (has_rows && has_cols) {
-#if CONFIG_ENTROPY_STATS
- td->counts->partition[ctx][partition]++;
-#endif
-
- if (tile_data->allow_update_cdf) {
- FRAME_CONTEXT *fc = xd->tile_ctx;
- update_cdf(fc->partition_cdf[ctx], partition,
- partition_cdf_length(bsize));
- }
- }
- }
-
- switch (partition) {
- case PARTITION_NONE:
- encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize,
- partition, &pc_tree->none, rate);
- break;
- case PARTITION_VERT:
- encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize,
- partition, &pc_tree->vertical[0], rate);
- if (mi_col + hbs < cm->mi_cols) {
- encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, subsize,
- partition, &pc_tree->vertical[1], rate);
- }
- break;
- case PARTITION_HORZ:
- encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize,
- partition, &pc_tree->horizontal[0], rate);
- if (mi_row + hbs < cm->mi_rows) {
- encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, subsize,
- partition, &pc_tree->horizontal[1], rate);
- }
- break;
- case PARTITION_SPLIT:
- encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, dry_run, subsize,
- pc_tree->split[0], rate);
- encode_sb(cpi, td, tile_data, tp, mi_row, mi_col + hbs, dry_run, subsize,
- pc_tree->split[1], rate);
- encode_sb(cpi, td, tile_data, tp, mi_row + hbs, mi_col, dry_run, subsize,
- pc_tree->split[2], rate);
- encode_sb(cpi, td, tile_data, tp, mi_row + hbs, mi_col + hbs, dry_run,
- subsize, pc_tree->split[3], rate);
- break;
-
- case PARTITION_HORZ_A:
- encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, bsize2,
- partition, &pc_tree->horizontala[0], rate);
- encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, bsize2,
- partition, &pc_tree->horizontala[1], rate);
- encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, subsize,
- partition, &pc_tree->horizontala[2], rate);
- break;
- case PARTITION_HORZ_B:
- encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize,
- partition, &pc_tree->horizontalb[0], rate);
- encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, bsize2,
- partition, &pc_tree->horizontalb[1], rate);
- encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col + hbs, dry_run,
- bsize2, partition, &pc_tree->horizontalb[2], rate);
- break;
- case PARTITION_VERT_A:
- encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, bsize2,
- partition, &pc_tree->verticala[0], rate);
- encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, bsize2,
- partition, &pc_tree->verticala[1], rate);
- encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, subsize,
- partition, &pc_tree->verticala[2], rate);
-
- break;
- case PARTITION_VERT_B:
- encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize,
- partition, &pc_tree->verticalb[0], rate);
- encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, bsize2,
- partition, &pc_tree->verticalb[1], rate);
- encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col + hbs, dry_run,
- bsize2, partition, &pc_tree->verticalb[2], rate);
- break;
- case PARTITION_HORZ_4:
- for (i = 0; i < 4; ++i) {
- int this_mi_row = mi_row + i * quarter_step;
- if (i > 0 && this_mi_row >= cm->mi_rows) break;
-
- encode_b(cpi, tile_data, td, tp, this_mi_row, mi_col, dry_run, subsize,
- partition, &pc_tree->horizontal4[i], rate);
- }
- break;
- case PARTITION_VERT_4:
- for (i = 0; i < 4; ++i) {
- int this_mi_col = mi_col + i * quarter_step;
- if (i > 0 && this_mi_col >= cm->mi_cols) break;
-
- encode_b(cpi, tile_data, td, tp, mi_row, this_mi_col, dry_run, subsize,
- partition, &pc_tree->vertical4[i], rate);
- }
- break;
- default: assert(0 && "Invalid partition type."); break;
- }
-
- update_ext_partition_context(xd, mi_row, mi_col, subsize, bsize, partition);
-}
-
-// Check to see if the given partition size is allowed for a specified number
-// of mi block rows and columns remaining in the image.
-// If not then return the largest allowed partition size
-static BLOCK_SIZE find_partition_size(BLOCK_SIZE bsize, int rows_left,
- int cols_left, int *bh, int *bw) {
- if (rows_left <= 0 || cols_left <= 0) {
- return AOMMIN(bsize, BLOCK_8X8);
- } else {
- for (; bsize > 0; bsize -= 3) {
- *bh = mi_size_high[bsize];
- *bw = mi_size_wide[bsize];
- if ((*bh <= rows_left) && (*bw <= cols_left)) {
- break;
- }
- }
- }
- return bsize;
-}
-
-static void set_partial_sb_partition(const AV1_COMMON *const cm,
- MB_MODE_INFO *mi, int bh_in, int bw_in,
- int mi_rows_remaining,
- int mi_cols_remaining, BLOCK_SIZE bsize,
- MB_MODE_INFO **mib) {
- int bh = bh_in;
- int r, c;
- for (r = 0; r < cm->seq_params.mib_size; r += bh) {
- int bw = bw_in;
- for (c = 0; c < cm->seq_params.mib_size; c += bw) {
- const int index = r * cm->mi_stride + c;
- mib[index] = mi + index;
- mib[index]->sb_type = find_partition_size(
- bsize, mi_rows_remaining - r, mi_cols_remaining - c, &bh, &bw);
- }
- }
-}
-
-// This function attempts to set all mode info entries in a given superblock
-// to the same block partition size.
-// However, at the bottom and right borders of the image the requested size
-// may not be allowed in which case this code attempts to choose the largest
-// allowable partition.
-static void set_fixed_partitioning(AV1_COMP *cpi, const TileInfo *const tile,
- MB_MODE_INFO **mib, int mi_row, int mi_col,
- BLOCK_SIZE bsize) {
- AV1_COMMON *const cm = &cpi->common;
- const int mi_rows_remaining = tile->mi_row_end - mi_row;
- const int mi_cols_remaining = tile->mi_col_end - mi_col;
- int block_row, block_col;
- MB_MODE_INFO *const mi_upper_left = cm->mi + mi_row * cm->mi_stride + mi_col;
- int bh = mi_size_high[bsize];
- int bw = mi_size_wide[bsize];
-
- assert((mi_rows_remaining > 0) && (mi_cols_remaining > 0));
-
- // Apply the requested partition size to the SB if it is all "in image"
- if ((mi_cols_remaining >= cm->seq_params.mib_size) &&
- (mi_rows_remaining >= cm->seq_params.mib_size)) {
- for (block_row = 0; block_row < cm->seq_params.mib_size; block_row += bh) {
- for (block_col = 0; block_col < cm->seq_params.mib_size;
- block_col += bw) {
- int index = block_row * cm->mi_stride + block_col;
- mib[index] = mi_upper_left + index;
- mib[index]->sb_type = bsize;
- }
- }
- } else {
- // Else this is a partial SB.
- set_partial_sb_partition(cm, mi_upper_left, bh, bw, mi_rows_remaining,
- mi_cols_remaining, bsize, mib);
- }
-}
-
-static void rd_use_partition(AV1_COMP *cpi, ThreadData *td,
- TileDataEnc *tile_data, MB_MODE_INFO **mib,
- TOKENEXTRA **tp, int mi_row, int mi_col,
- BLOCK_SIZE bsize, int *rate, int64_t *dist,
- int do_recon, PC_TREE *pc_tree) {
- AV1_COMMON *const cm = &cpi->common;
- const int num_planes = av1_num_planes(cm);
- TileInfo *const tile_info = &tile_data->tile_info;
- MACROBLOCK *const x = &td->mb;
- MACROBLOCKD *const xd = &x->e_mbd;
- const int bs = mi_size_wide[bsize];
- const int hbs = bs / 2;
- int i;
- const int pl = (bsize >= BLOCK_8X8)
- ? partition_plane_context(xd, mi_row, mi_col, bsize)
- : 0;
- const PARTITION_TYPE partition =
- (bsize >= BLOCK_8X8) ? get_partition(cm, mi_row, mi_col, bsize)
- : PARTITION_NONE;
- const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition);
- RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
- RD_STATS last_part_rdc, none_rdc, chosen_rdc;
- BLOCK_SIZE sub_subsize = BLOCK_4X4;
- int splits_below = 0;
- BLOCK_SIZE bs_type = mib[0]->sb_type;
- int do_partition_search = 1;
- PICK_MODE_CONTEXT *ctx_none = &pc_tree->none;
-
- if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
-
- assert(mi_size_wide[bsize] == mi_size_high[bsize]);
-
- av1_invalid_rd_stats(&last_part_rdc);
- av1_invalid_rd_stats(&none_rdc);
- av1_invalid_rd_stats(&chosen_rdc);
-
- pc_tree->partitioning = partition;
-
- xd->above_txfm_context = cm->above_txfm_context[tile_info->tile_row] + mi_col;
- xd->left_txfm_context =
- xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
- save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
-
- if (bsize == BLOCK_16X16 && cpi->vaq_refresh) {
- set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
- x->mb_energy = av1_log_block_var(cpi, x, bsize);
- }
-
- if (do_partition_search &&
- cpi->sf.partition_search_type == SEARCH_PARTITION &&
- cpi->sf.adjust_partitioning_from_last_frame) {
- // Check if any of the sub blocks are further split.
- if (partition == PARTITION_SPLIT && subsize > BLOCK_8X8) {
- sub_subsize = get_partition_subsize(subsize, PARTITION_SPLIT);
- splits_below = 1;
- for (i = 0; i < 4; i++) {
- int jj = i >> 1, ii = i & 0x01;
- MB_MODE_INFO *this_mi = mib[jj * hbs * cm->mi_stride + ii * hbs];
- if (this_mi && this_mi->sb_type >= sub_subsize) {
- splits_below = 0;
- }
- }
- }
-
- // If partition is not none try none unless each of the 4 splits are split
- // even further..
- if (partition != PARTITION_NONE && !splits_below &&
- mi_row + hbs < cm->mi_rows && mi_col + hbs < cm->mi_cols) {
- pc_tree->partitioning = PARTITION_NONE;
- rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &none_rdc,
- PARTITION_NONE, bsize, ctx_none, INT64_MAX);
-
- if (none_rdc.rate < INT_MAX) {
- none_rdc.rate += x->partition_cost[pl][PARTITION_NONE];
- none_rdc.rdcost = RDCOST(x->rdmult, none_rdc.rate, none_rdc.dist);
- }
-
- restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
- mib[0]->sb_type = bs_type;
- pc_tree->partitioning = partition;
- }
- }
- for (int b = 0; b < 2; ++b) {
- pc_tree->horizontal[b].skip_ref_frame_mask = 0;
- pc_tree->vertical[b].skip_ref_frame_mask = 0;
- }
- for (int b = 0; b < 3; ++b) {
- pc_tree->horizontala[b].skip_ref_frame_mask = 0;
- pc_tree->horizontalb[b].skip_ref_frame_mask = 0;
- pc_tree->verticala[b].skip_ref_frame_mask = 0;
- pc_tree->verticalb[b].skip_ref_frame_mask = 0;
- }
- for (int b = 0; b < 4; ++b) {
- pc_tree->horizontal4[b].skip_ref_frame_mask = 0;
- pc_tree->vertical4[b].skip_ref_frame_mask = 0;
- }
- switch (partition) {
- case PARTITION_NONE:
- rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
- PARTITION_NONE, bsize, ctx_none, INT64_MAX);
- break;
- case PARTITION_HORZ:
- rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
- PARTITION_HORZ, subsize, &pc_tree->horizontal[0],
- INT64_MAX);
- if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
- mi_row + hbs < cm->mi_rows) {
- RD_STATS tmp_rdc;
- const PICK_MODE_CONTEXT *const ctx_h = &pc_tree->horizontal[0];
- av1_init_rd_stats(&tmp_rdc);
- update_state(cpi, tile_data, td, ctx_h, mi_row, mi_col, subsize, 1);
- encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, mi_row,
- mi_col, subsize, NULL);
- rd_pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, &tmp_rdc,
- PARTITION_HORZ, subsize, &pc_tree->horizontal[1],
- INT64_MAX);
- if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
- av1_invalid_rd_stats(&last_part_rdc);
- break;
- }
- last_part_rdc.rate += tmp_rdc.rate;
- last_part_rdc.dist += tmp_rdc.dist;
- last_part_rdc.rdcost += tmp_rdc.rdcost;
- }
- break;
- case PARTITION_VERT:
- rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
- PARTITION_VERT, subsize, &pc_tree->vertical[0],
- INT64_MAX);
- if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
- mi_col + hbs < cm->mi_cols) {
- RD_STATS tmp_rdc;
- const PICK_MODE_CONTEXT *const ctx_v = &pc_tree->vertical[0];
- av1_init_rd_stats(&tmp_rdc);
- update_state(cpi, tile_data, td, ctx_v, mi_row, mi_col, subsize, 1);
- encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, mi_row,
- mi_col, subsize, NULL);
- rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, &tmp_rdc,
- PARTITION_VERT, subsize,
- &pc_tree->vertical[bsize > BLOCK_8X8], INT64_MAX);
- if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
- av1_invalid_rd_stats(&last_part_rdc);
- break;
- }
- last_part_rdc.rate += tmp_rdc.rate;
- last_part_rdc.dist += tmp_rdc.dist;
- last_part_rdc.rdcost += tmp_rdc.rdcost;
- }
- break;
- case PARTITION_SPLIT:
- last_part_rdc.rate = 0;
- last_part_rdc.dist = 0;
- last_part_rdc.rdcost = 0;
- for (i = 0; i < 4; i++) {
- int x_idx = (i & 1) * hbs;
- int y_idx = (i >> 1) * hbs;
- int jj = i >> 1, ii = i & 0x01;
- RD_STATS tmp_rdc;
- if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
- continue;
-
- av1_init_rd_stats(&tmp_rdc);
- rd_use_partition(cpi, td, tile_data,
- mib + jj * hbs * cm->mi_stride + ii * hbs, tp,
- mi_row + y_idx, mi_col + x_idx, subsize, &tmp_rdc.rate,
- &tmp_rdc.dist, i != 3, pc_tree->split[i]);
- if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
- av1_invalid_rd_stats(&last_part_rdc);
- break;
- }
- last_part_rdc.rate += tmp_rdc.rate;
- last_part_rdc.dist += tmp_rdc.dist;
- }
- break;
- case PARTITION_VERT_A:
- case PARTITION_VERT_B:
- case PARTITION_HORZ_A:
- case PARTITION_HORZ_B:
- case PARTITION_HORZ_4:
- case PARTITION_VERT_4:
- assert(0 && "Cannot handle extended partition types");
- default: assert(0); break;
- }
-
- if (last_part_rdc.rate < INT_MAX) {
- last_part_rdc.rate += x->partition_cost[pl][partition];
- last_part_rdc.rdcost =
- RDCOST(x->rdmult, last_part_rdc.rate, last_part_rdc.dist);
- }
-
- if (do_partition_search && cpi->sf.adjust_partitioning_from_last_frame &&
- cpi->sf.partition_search_type == SEARCH_PARTITION &&
- partition != PARTITION_SPLIT && bsize > BLOCK_8X8 &&
- (mi_row + bs < cm->mi_rows || mi_row + hbs == cm->mi_rows) &&
- (mi_col + bs < cm->mi_cols || mi_col + hbs == cm->mi_cols)) {
- BLOCK_SIZE split_subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
- chosen_rdc.rate = 0;
- chosen_rdc.dist = 0;
-
- restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
- pc_tree->partitioning = PARTITION_SPLIT;
-
- // Split partition.
- for (i = 0; i < 4; i++) {
- int x_idx = (i & 1) * hbs;
- int y_idx = (i >> 1) * hbs;
- RD_STATS tmp_rdc;
-
- if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
- continue;
-
- save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
- pc_tree->split[i]->partitioning = PARTITION_NONE;
- rd_pick_sb_modes(cpi, tile_data, x, mi_row + y_idx, mi_col + x_idx,
- &tmp_rdc, PARTITION_SPLIT, split_subsize,
- &pc_tree->split[i]->none, INT64_MAX);
-
- restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
- if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
- av1_invalid_rd_stats(&chosen_rdc);
- break;
- }
-
- chosen_rdc.rate += tmp_rdc.rate;
- chosen_rdc.dist += tmp_rdc.dist;
-
- if (i != 3)
- encode_sb(cpi, td, tile_data, tp, mi_row + y_idx, mi_col + x_idx,
- OUTPUT_ENABLED, split_subsize, pc_tree->split[i], NULL);
-
- chosen_rdc.rate += x->partition_cost[pl][PARTITION_NONE];
- }
- if (chosen_rdc.rate < INT_MAX) {
- chosen_rdc.rate += x->partition_cost[pl][PARTITION_SPLIT];
- chosen_rdc.rdcost = RDCOST(x->rdmult, chosen_rdc.rate, chosen_rdc.dist);
- }
- }
-
- // If last_part is better set the partitioning to that.
- if (last_part_rdc.rdcost < chosen_rdc.rdcost) {
- mib[0]->sb_type = bsize;
- if (bsize >= BLOCK_8X8) pc_tree->partitioning = partition;
- chosen_rdc = last_part_rdc;
- }
- // If none was better set the partitioning to that.
- if (none_rdc.rdcost < chosen_rdc.rdcost) {
- if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE;
- chosen_rdc = none_rdc;
- }
-
- restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
-
- // We must have chosen a partitioning and encoding or we'll fail later on.
- // No other opportunities for success.
- if (bsize == cm->seq_params.sb_size)
- assert(chosen_rdc.rate < INT_MAX && chosen_rdc.dist < INT64_MAX);
-
- if (do_recon) {
- if (bsize == cm->seq_params.sb_size) {
- // NOTE: To get estimate for rate due to the tokens, use:
- // int rate_coeffs = 0;
- // encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_COSTCOEFFS,
- // bsize, pc_tree, &rate_coeffs);
- x->cb_offset = 0;
- encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, OUTPUT_ENABLED, bsize,
- pc_tree, NULL);
- } else {
- encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize,
- pc_tree, NULL);
- }
- }
-
- *rate = chosen_rdc.rate;
- *dist = chosen_rdc.dist;
-}
-
-/* clang-format off */
-static const BLOCK_SIZE min_partition_size[BLOCK_SIZES_ALL] = {
- BLOCK_4X4, // 4x4
- BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, // 4x8, 8x4, 8x8
- BLOCK_4X4, BLOCK_4X4, BLOCK_8X8, // 8x16, 16x8, 16x16
- BLOCK_8X8, BLOCK_8X8, BLOCK_16X16, // 16x32, 32x16, 32x32
- BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, // 32x64, 64x32, 64x64
- BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, // 64x128, 128x64, 128x128
- BLOCK_4X4, BLOCK_4X4, BLOCK_8X8, // 4x16, 16x4, 8x32
- BLOCK_8X8, BLOCK_16X16, BLOCK_16X16, // 32x8, 16x64, 64x16
-};
-
-static const BLOCK_SIZE max_partition_size[BLOCK_SIZES_ALL] = {
- BLOCK_8X8, // 4x4
- BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, // 4x8, 8x4, 8x8
- BLOCK_32X32, BLOCK_32X32, BLOCK_32X32, // 8x16, 16x8, 16x16
- BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, // 16x32, 32x16, 32x32
- BLOCK_LARGEST, BLOCK_LARGEST, BLOCK_LARGEST, // 32x64, 64x32, 64x64
- BLOCK_LARGEST, BLOCK_LARGEST, BLOCK_LARGEST, // 64x128, 128x64, 128x128
- BLOCK_16X16, BLOCK_16X16, BLOCK_32X32, // 4x16, 16x4, 8x32
- BLOCK_32X32, BLOCK_LARGEST, BLOCK_LARGEST, // 32x8, 16x64, 64x16
-};
-
-// Next square block size less or equal than current block size.
-static const BLOCK_SIZE next_square_size[BLOCK_SIZES_ALL] = {
- BLOCK_4X4, // 4x4
- BLOCK_4X4, BLOCK_4X4, BLOCK_8X8, // 4x8, 8x4, 8x8
- BLOCK_8X8, BLOCK_8X8, BLOCK_16X16, // 8x16, 16x8, 16x16
- BLOCK_16X16, BLOCK_16X16, BLOCK_32X32, // 16x32, 32x16, 32x32
- BLOCK_32X32, BLOCK_32X32, BLOCK_64X64, // 32x64, 64x32, 64x64
- BLOCK_64X64, BLOCK_64X64, BLOCK_128X128, // 64x128, 128x64, 128x128
- BLOCK_4X4, BLOCK_4X4, BLOCK_8X8, // 4x16, 16x4, 8x32
- BLOCK_8X8, BLOCK_16X16, BLOCK_16X16, // 32x8, 16x64, 64x16
-};
-/* clang-format on */
-
-// Look at all the mode_info entries for blocks that are part of this
-// partition and find the min and max values for sb_type.
-// At the moment this is designed to work on a superblock but could be
-// adjusted to use a size parameter.
-//
-// The min and max are assumed to have been initialized prior to calling this
-// function so repeat calls can accumulate a min and max of more than one
-// superblock.
-static void get_sb_partition_size_range(const AV1_COMMON *const cm,
- MACROBLOCKD *xd, MB_MODE_INFO **mib,
- BLOCK_SIZE *min_block_size,
- BLOCK_SIZE *max_block_size) {
- int i, j;
- int index = 0;
-
- // Check the sb_type for each block that belongs to this region.
- for (i = 0; i < cm->seq_params.mib_size; ++i) {
- for (j = 0; j < cm->seq_params.mib_size; ++j) {
- MB_MODE_INFO *mi = mib[index + j];
- BLOCK_SIZE sb_type = mi ? mi->sb_type : BLOCK_4X4;
- *min_block_size = AOMMIN(*min_block_size, sb_type);
- *max_block_size = AOMMAX(*max_block_size, sb_type);
- }
- index += xd->mi_stride;
- }
-}
-
-// Checks to see if a super block is on a horizontal image edge.
-// In most cases this is the "real" edge unless there are formatting
-// bars embedded in the stream.
-static int active_h_edge(const AV1_COMP *cpi, int mi_row, int mi_step) {
- int top_edge = 0;
- int bottom_edge = cpi->common.mi_rows;
- int is_active_h_edge = 0;
-
- // For two pass account for any formatting bars detected.
- if (cpi->oxcf.pass == 2) {
- const TWO_PASS *const twopass = &cpi->twopass;
-
- // The inactive region is specified in MBs not mi units.
- // The image edge is in the following MB row.
- top_edge += (int)(twopass->this_frame_stats.inactive_zone_rows * 2);
-
- bottom_edge -= (int)(twopass->this_frame_stats.inactive_zone_rows * 2);
- bottom_edge = AOMMAX(top_edge, bottom_edge);
- }
-
- if (((top_edge >= mi_row) && (top_edge < (mi_row + mi_step))) ||
- ((bottom_edge >= mi_row) && (bottom_edge < (mi_row + mi_step)))) {
- is_active_h_edge = 1;
- }
- return is_active_h_edge;
-}
-
-// Checks to see if a super block is on a vertical image edge.
-// In most cases this is the "real" edge unless there are formatting
-// bars embedded in the stream.
-static int active_v_edge(const AV1_COMP *cpi, int mi_col, int mi_step) {
- int left_edge = 0;
- int right_edge = cpi->common.mi_cols;
- int is_active_v_edge = 0;
-
- // For two pass account for any formatting bars detected.
- if (cpi->oxcf.pass == 2) {
- const TWO_PASS *const twopass = &cpi->twopass;
-
- // The inactive region is specified in MBs not mi units.
- // The image edge is in the following MB row.
- left_edge += (int)(twopass->this_frame_stats.inactive_zone_cols * 2);
-
- right_edge -= (int)(twopass->this_frame_stats.inactive_zone_cols * 2);
- right_edge = AOMMAX(left_edge, right_edge);
- }
-
- if (((left_edge >= mi_col) && (left_edge < (mi_col + mi_step))) ||
- ((right_edge >= mi_col) && (right_edge < (mi_col + mi_step)))) {
- is_active_v_edge = 1;
- }
- return is_active_v_edge;
-}
-
-// Checks to see if a super block is at the edge of the active image.
-// In most cases this is the "real" edge unless there are formatting
-// bars embedded in the stream.
-static int active_edge_sb(const AV1_COMP *cpi, int mi_row, int mi_col) {
- return active_h_edge(cpi, mi_row, cpi->common.seq_params.mib_size) ||
- active_v_edge(cpi, mi_col, cpi->common.seq_params.mib_size);
-}
-
-// Look at neighboring blocks and set a min and max partition size based on
-// what they chose.
-static void rd_auto_partition_range(AV1_COMP *cpi, const TileInfo *const tile,
- MACROBLOCKD *const xd, int mi_row,
- int mi_col, BLOCK_SIZE *min_block_size,
- BLOCK_SIZE *max_block_size) {
- AV1_COMMON *const cm = &cpi->common;
- MB_MODE_INFO **mi = xd->mi;
- const int left_in_image = xd->left_available && mi[-1];
- const int above_in_image = xd->up_available && mi[-xd->mi_stride];
- const int mi_rows_remaining = tile->mi_row_end - mi_row;
- const int mi_cols_remaining = tile->mi_col_end - mi_col;
- int bh, bw;
- BLOCK_SIZE min_size = BLOCK_4X4;
- BLOCK_SIZE max_size = BLOCK_LARGEST;
-
- // Trap case where we do not have a prediction.
- if (left_in_image || above_in_image || cm->frame_type != KEY_FRAME) {
- // Default "min to max" and "max to min"
- min_size = BLOCK_LARGEST;
- max_size = BLOCK_4X4;
-
- // NOTE: each call to get_sb_partition_size_range() uses the previous
- // passed in values for min and max as a starting point.
- // Find the min and max partition used in previous frame at this location
- if (cm->frame_type != KEY_FRAME) {
- MB_MODE_INFO **prev_mi =
- &cm->prev_mi_grid_visible[mi_row * xd->mi_stride + mi_col];
- get_sb_partition_size_range(cm, xd, prev_mi, &min_size, &max_size);
- }
- // Find the min and max partition sizes used in the left superblock
- if (left_in_image) {
- MB_MODE_INFO **left_sb_mi = &mi[-cm->seq_params.mib_size];
- get_sb_partition_size_range(cm, xd, left_sb_mi, &min_size, &max_size);
- }
- // Find the min and max partition sizes used in the above suprblock.
- if (above_in_image) {
- MB_MODE_INFO **above_sb_mi =
- &mi[-xd->mi_stride * cm->seq_params.mib_size];
- get_sb_partition_size_range(cm, xd, above_sb_mi, &min_size, &max_size);
- }
-
- // Adjust observed min and max for "relaxed" auto partition case.
- if (cpi->sf.auto_min_max_partition_size == RELAXED_NEIGHBORING_MIN_MAX) {
- min_size = min_partition_size[min_size];
- max_size = max_partition_size[max_size];
- }
- }
-
- // Check border cases where max and min from neighbors may not be legal.
- max_size = find_partition_size(max_size, mi_rows_remaining, mi_cols_remaining,
- &bh, &bw);
- min_size = AOMMIN(min_size, max_size);
-
- // Test for blocks at the edge of the active image.
- // This may be the actual edge of the image or where there are formatting
- // bars.
- if (active_edge_sb(cpi, mi_row, mi_col)) {
- min_size = BLOCK_4X4;
- } else {
- min_size = AOMMIN(cpi->sf.rd_auto_partition_min_limit, min_size);
- }
-
- // When use_square_partition_only is true, make sure at least one square
- // partition is allowed by selecting the next smaller square size as
- // *min_block_size.
- if (min_size >= cpi->sf.use_square_partition_only_threshold) {
- min_size = AOMMIN(min_size, next_square_size[max_size]);
- }
-
- *min_block_size = AOMMIN(min_size, cm->seq_params.sb_size);
- *max_block_size = AOMMIN(max_size, cm->seq_params.sb_size);
-}
-
-// TODO(jingning) refactor functions setting partition search range
-static void set_partition_range(const AV1_COMMON *const cm,
- const MACROBLOCKD *const xd, int mi_row,
- int mi_col, BLOCK_SIZE bsize,
- BLOCK_SIZE *const min_bs,
- BLOCK_SIZE *const max_bs) {
- const int mi_width = mi_size_wide[bsize];
- const int mi_height = mi_size_high[bsize];
- int idx, idy;
-
- const int idx_str = cm->mi_stride * mi_row + mi_col;
- MB_MODE_INFO **const prev_mi = &cm->prev_mi_grid_visible[idx_str];
- BLOCK_SIZE min_size = cm->seq_params.sb_size; // default values
- BLOCK_SIZE max_size = BLOCK_4X4;
-
- if (prev_mi) {
- for (idy = 0; idy < mi_height; ++idy) {
- for (idx = 0; idx < mi_width; ++idx) {
- const MB_MODE_INFO *const mi = prev_mi[idy * cm->mi_stride + idx];
- const BLOCK_SIZE bs = mi ? mi->sb_type : bsize;
- min_size = AOMMIN(min_size, bs);
- max_size = AOMMAX(max_size, bs);
- }
- }
- }
-
- if (xd->left_available) {
- for (idy = 0; idy < mi_height; ++idy) {
- const MB_MODE_INFO *const mi = xd->mi[idy * cm->mi_stride - 1];
- const BLOCK_SIZE bs = mi ? mi->sb_type : bsize;
- min_size = AOMMIN(min_size, bs);
- max_size = AOMMAX(max_size, bs);
- }
- }
-
- if (xd->up_available) {
- for (idx = 0; idx < mi_width; ++idx) {
- const MB_MODE_INFO *const mi = xd->mi[idx - cm->mi_stride];
- const BLOCK_SIZE bs = mi ? mi->sb_type : bsize;
- min_size = AOMMIN(min_size, bs);
- max_size = AOMMAX(max_size, bs);
- }
- }
-
- if (min_size == max_size) {
- min_size = min_partition_size[min_size];
- max_size = max_partition_size[max_size];
- }
-
- *min_bs = AOMMIN(min_size, cm->seq_params.sb_size);
- *max_bs = AOMMIN(max_size, cm->seq_params.sb_size);
-}
-
-static INLINE void store_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
- memcpy(ctx->pred_mv, x->pred_mv, sizeof(x->pred_mv));
-}
-
-static INLINE void load_pred_mv(MACROBLOCK *x,
- const PICK_MODE_CONTEXT *const ctx) {
- memcpy(x->pred_mv, ctx->pred_mv, sizeof(x->pred_mv));
-}
-
-#if CONFIG_FP_MB_STATS
-const int qindex_skip_threshold_lookup[BLOCK_SIZES] = {
- 0, 10, 10, 30, 40, 40, 60, 80, 80, 90, 100, 100, 120,
- // TODO(debargha): What are the correct numbers here?
- 130, 130, 150
-};
-const int qindex_split_threshold_lookup[BLOCK_SIZES] = {
- 0, 3, 3, 7, 15, 15, 30, 40, 40, 60, 80, 80, 120,
- // TODO(debargha): What are the correct numbers here?
- 160, 160, 240
-};
-const int complexity_16x16_blocks_threshold[BLOCK_SIZES] = {
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 6,
- // TODO(debargha): What are the correct numbers here?
- 8, 8, 10
-};
-
-typedef enum {
- MV_ZERO = 0,
- MV_LEFT = 1,
- MV_UP = 2,
- MV_RIGHT = 3,
- MV_DOWN = 4,
- MV_INVALID
-} MOTION_DIRECTION;
-
-static INLINE MOTION_DIRECTION get_motion_direction_fp(uint8_t fp_byte) {
- if (fp_byte & FPMB_MOTION_ZERO_MASK) {
- return MV_ZERO;
- } else if (fp_byte & FPMB_MOTION_LEFT_MASK) {
- return MV_LEFT;
- } else if (fp_byte & FPMB_MOTION_RIGHT_MASK) {
- return MV_RIGHT;
- } else if (fp_byte & FPMB_MOTION_UP_MASK) {
- return MV_UP;
- } else {
- return MV_DOWN;
- }
-}
-
-static INLINE int get_motion_inconsistency(MOTION_DIRECTION this_mv,
- MOTION_DIRECTION that_mv) {
- if (this_mv == that_mv) {
- return 0;
- } else {
- return abs(this_mv - that_mv) == 2 ? 2 : 1;
- }
-}
-#endif
-
-// Try searching for an encoding for the given subblock. Returns zero if the
-// rdcost is already too high (to tell the caller not to bother searching for
-// encodings of further subblocks)
-static int rd_try_subblock(AV1_COMP *const cpi, ThreadData *td,
- TileDataEnc *tile_data, TOKENEXTRA **tp, int is_last,
- int mi_row, int mi_col, BLOCK_SIZE subsize,
- RD_STATS *best_rdc, RD_STATS *sum_rdc,
- RD_STATS *this_rdc, PARTITION_TYPE partition,
- PICK_MODE_CONTEXT *prev_ctx,
- PICK_MODE_CONTEXT *this_ctx) {
-#define RTS_X_RATE_NOCOEF_ARG
-#define RTS_MAX_RDCOST best_rdc->rdcost
-
- MACROBLOCK *const x = &td->mb;
-
- if (cpi->sf.adaptive_motion_search) load_pred_mv(x, prev_ctx);
-
- const int64_t rdcost_remaining = best_rdc->rdcost == INT64_MAX
- ? INT64_MAX
- : (best_rdc->rdcost - sum_rdc->rdcost);
-
- rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, this_rdc,
- RTS_X_RATE_NOCOEF_ARG partition, subsize, this_ctx,
- rdcost_remaining);
-
- if (this_rdc->rate == INT_MAX) {
- sum_rdc->rdcost = INT64_MAX;
- } else {
- sum_rdc->rate += this_rdc->rate;
- sum_rdc->dist += this_rdc->dist;
- sum_rdc->rdcost += this_rdc->rdcost;
- }
-
- if (sum_rdc->rdcost >= RTS_MAX_RDCOST) return 0;
-
- if (!is_last) {
- update_state(cpi, tile_data, td, this_ctx, mi_row, mi_col, subsize, 1);
- encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, mi_row, mi_col,
- subsize, NULL);
- }
-
- return 1;
-
-#undef RTS_X_RATE_NOCOEF_ARG
-#undef RTS_MAX_RDCOST
-}
-
-static void rd_test_partition3(AV1_COMP *const cpi, ThreadData *td,
- TileDataEnc *tile_data, TOKENEXTRA **tp,
- PC_TREE *pc_tree, RD_STATS *best_rdc,
- PICK_MODE_CONTEXT ctxs[3],
- PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col,
- BLOCK_SIZE bsize, PARTITION_TYPE partition,
- int mi_row0, int mi_col0, BLOCK_SIZE subsize0,
- int mi_row1, int mi_col1, BLOCK_SIZE subsize1,
- int mi_row2, int mi_col2, BLOCK_SIZE subsize2) {
- MACROBLOCK *const x = &td->mb;
- MACROBLOCKD *const xd = &x->e_mbd;
- RD_STATS sum_rdc, this_rdc;
-#define RTP_STX_TRY_ARGS
- int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
- av1_init_rd_stats(&sum_rdc);
- sum_rdc.rate = x->partition_cost[pl][partition];
- sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
- if (!rd_try_subblock(cpi, td, tile_data, tp, 0, mi_row0, mi_col0, subsize0,
- best_rdc, &sum_rdc, &this_rdc,
- RTP_STX_TRY_ARGS partition, ctx, &ctxs[0]))
- return;
-
- if (!rd_try_subblock(cpi, td, tile_data, tp, 0, mi_row1, mi_col1, subsize1,
- best_rdc, &sum_rdc, &this_rdc,
- RTP_STX_TRY_ARGS partition, &ctxs[0], &ctxs[1]))
- return;
-
- // With the new layout of mixed partitions for PARTITION_HORZ_B and
- // PARTITION_VERT_B, the last subblock might start past halfway through the
- // main block, so we might signal it even though the subblock lies strictly
- // outside the image. In that case, we won't spend any bits coding it and the
- // difference (obviously) doesn't contribute to the error.
- const int try_block2 = 1;
- if (try_block2 &&
- !rd_try_subblock(cpi, td, tile_data, tp, 1, mi_row2, mi_col2, subsize2,
- best_rdc, &sum_rdc, &this_rdc,
- RTP_STX_TRY_ARGS partition, &ctxs[1], &ctxs[2]))
- return;
-
- if (sum_rdc.rdcost >= best_rdc->rdcost) return;
-
- sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
-
- if (sum_rdc.rdcost >= best_rdc->rdcost) return;
-
- *best_rdc = sum_rdc;
- pc_tree->partitioning = partition;
-
-#undef RTP_STX_TRY_ARGS
-}
-
-static void reset_partition(PC_TREE *pc_tree, BLOCK_SIZE bsize) {
- pc_tree->partitioning = PARTITION_NONE;
- pc_tree->cb_search_range = SEARCH_FULL_PLANE;
- pc_tree->none.skip = 0;
-
- if (bsize >= BLOCK_8X8) {
- BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
- for (int idx = 0; idx < 4; ++idx)
- reset_partition(pc_tree->split[idx], subsize);
- }
-}
-
-static void rd_pick_sqr_partition(AV1_COMP *const cpi, ThreadData *td,
- TileDataEnc *tile_data, TOKENEXTRA **tp,
- int mi_row, int mi_col, BLOCK_SIZE bsize,
- RD_STATS *rd_cost, int64_t best_rd,
- PC_TREE *pc_tree, int64_t *none_rd) {
- const AV1_COMMON *const cm = &cpi->common;
- TileInfo *const tile_info = &tile_data->tile_info;
- MACROBLOCK *const x = &td->mb;
- MACROBLOCKD *const xd = &x->e_mbd;
- const int mi_step = mi_size_wide[bsize] / 2;
- RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
- const TOKENEXTRA *const tp_orig = *tp;
- PICK_MODE_CONTEXT *ctx_none = &pc_tree->none;
- int tmp_partition_cost[PARTITION_TYPES];
- BLOCK_SIZE subsize;
- RD_STATS this_rdc, sum_rdc, best_rdc, pn_rdc;
- const int bsize_at_least_8x8 = (bsize >= BLOCK_8X8);
- int do_square_split = bsize_at_least_8x8;
- const int pl = bsize_at_least_8x8
- ? partition_plane_context(xd, mi_row, mi_col, bsize)
- : 0;
- const int *partition_cost =
- pl >= 0 ? x->partition_cost[pl] : x->partition_cost[0];
- const int num_planes = av1_num_planes(cm);
-
- int64_t split_rd[4] = { 0, 0, 0, 0 };
-
- // Override skipping rectangular partition operations for edge blocks
- const int has_rows = (mi_row + mi_step < cm->mi_rows);
- const int has_cols = (mi_col + mi_step < cm->mi_cols);
-
- if (none_rd) *none_rd = 0;
-
- int partition_none_allowed = has_rows && has_cols;
-
- (void)*tp_orig;
- (void)split_rd;
-
- if (best_rd < 0) {
- pc_tree->none.rdcost = INT64_MAX;
- pc_tree->none.skip = 0;
- av1_invalid_rd_stats(rd_cost);
- return;
- }
- pc_tree->pc_tree_stats.valid = 1;
-
- // Override partition costs at the edges of the frame in the same
- // way as in read_partition (see decodeframe.c)
- if (!(has_rows && has_cols)) {
- assert(bsize_at_least_8x8 && pl >= 0);
- const aom_cdf_prob *partition_cdf = cm->fc->partition_cdf[pl];
- for (int i = 0; i < PARTITION_TYPES; ++i) tmp_partition_cost[i] = INT_MAX;
- if (has_cols) {
- // At the bottom, the two possibilities are HORZ and SPLIT
- aom_cdf_prob bot_cdf[2];
- partition_gather_vert_alike(bot_cdf, partition_cdf, bsize);
- static const int bot_inv_map[2] = { PARTITION_HORZ, PARTITION_SPLIT };
- av1_cost_tokens_from_cdf(tmp_partition_cost, bot_cdf, bot_inv_map);
- } else if (has_rows) {
- // At the right, the two possibilities are VERT and SPLIT
- aom_cdf_prob rhs_cdf[2];
- partition_gather_horz_alike(rhs_cdf, partition_cdf, bsize);
- static const int rhs_inv_map[2] = { PARTITION_VERT, PARTITION_SPLIT };
- av1_cost_tokens_from_cdf(tmp_partition_cost, rhs_cdf, rhs_inv_map);
- } else {
- // At the bottom right, we always split
- tmp_partition_cost[PARTITION_SPLIT] = 0;
- }
-
- partition_cost = tmp_partition_cost;
- }
-
-#ifndef NDEBUG
- // Nothing should rely on the default value of this array (which is just
- // leftover from encoding the previous block. Setting it to fixed pattern
- // when debugging.
- // bit 0, 1, 2 are blk_skip of each plane
- // bit 4, 5, 6 are initialization checking of each plane
- memset(x->blk_skip, 0x77, sizeof(x->blk_skip));
-#endif // NDEBUG
-
- assert(mi_size_wide[bsize] == mi_size_high[bsize]);
-
- av1_init_rd_stats(&this_rdc);
- av1_init_rd_stats(&sum_rdc);
- av1_invalid_rd_stats(&best_rdc);
- best_rdc.rdcost = best_rd;
-
- set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
-
- if (bsize == BLOCK_16X16 && cpi->vaq_refresh)
- x->mb_energy = av1_log_block_var(cpi, x, bsize);
-
- xd->above_txfm_context = cm->above_txfm_context[tile_info->tile_row] + mi_col;
- xd->left_txfm_context =
- xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
- save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
-
-#if CONFIG_DIST_8X8
- if (x->using_dist_8x8) {
- if (block_size_high[bsize] <= 8 || block_size_wide[bsize] <= 8)
- do_square_split = 0;
- }
-#endif
-
- // PARTITION_NONE
- if (partition_none_allowed) {
- int pt_cost = 0;
- if (bsize_at_least_8x8) {
- pc_tree->partitioning = PARTITION_NONE;
- pt_cost = partition_cost[PARTITION_NONE] < INT_MAX
- ? partition_cost[PARTITION_NONE]
- : 0;
- }
- int64_t partition_rd_cost = RDCOST(x->rdmult, pt_cost, 0);
- int64_t best_remain_rdcost = best_rdc.rdcost == INT64_MAX
- ? INT64_MAX
- : (best_rdc.rdcost - partition_rd_cost);
- rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc,
- PARTITION_NONE, bsize, ctx_none, best_remain_rdcost);
-
- pc_tree->pc_tree_stats.rdcost = ctx_none->rdcost;
- pc_tree->pc_tree_stats.skip = ctx_none->skip;
-
- if (none_rd) *none_rd = this_rdc.rdcost;
- if (this_rdc.rate != INT_MAX) {
- if (bsize_at_least_8x8) {
- this_rdc.rate += pt_cost;
- this_rdc.rdcost = RDCOST(x->rdmult, this_rdc.rate, this_rdc.dist);
- }
-
- if (this_rdc.rdcost < best_rdc.rdcost) {
- // Adjust dist breakout threshold according to the partition size.
- const int64_t dist_breakout_thr =
- cpi->sf.partition_search_breakout_dist_thr >>
- ((2 * (MAX_SB_SIZE_LOG2 - 2)) -
- (mi_size_wide_log2[bsize] + mi_size_high_log2[bsize]));
- const int rate_breakout_thr =
- cpi->sf.partition_search_breakout_rate_thr *
- num_pels_log2_lookup[bsize];
-
- best_rdc = this_rdc;
- if (bsize_at_least_8x8) pc_tree->partitioning = PARTITION_NONE;
-
- pc_tree->cb_search_range = SEARCH_FULL_PLANE;
-
- // If all y, u, v transform blocks in this partition are skippable, and
- // the dist & rate are within the thresholds, the partition search is
- // terminated for current branch of the partition search tree.
- // The dist & rate thresholds are set to 0 at speed 0 to disable the
- // early termination at that speed.
- if (!x->e_mbd.lossless[xd->mi[0]->segment_id] &&
- (ctx_none->skippable && best_rdc.dist < dist_breakout_thr &&
- best_rdc.rate < rate_breakout_thr)) {
- do_square_split = 0;
- }
- }
- }
-
- restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
- }
-
- // store estimated motion vector
- if (cpi->sf.adaptive_motion_search) store_pred_mv(x, ctx_none);
-
- int64_t temp_best_rdcost = best_rdc.rdcost;
- pn_rdc = best_rdc;
-
- // PARTITION_SPLIT
- if (do_square_split) {
- int reached_last_index = 0;
- subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
- int idx;
-
- for (idx = 0; idx < 4 && sum_rdc.rdcost < temp_best_rdcost; ++idx) {
- const int x_idx = (idx & 1) * mi_step;
- const int y_idx = (idx >> 1) * mi_step;
-
- if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
- continue;
-
- if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
-
- pc_tree->split[idx]->index = idx;
- int64_t *p_split_rd = &split_rd[idx];
- // TODO(Cherma) : Account for partition cost while passing best rd to
- // rd_pick_sqr_partition()
- rd_pick_sqr_partition(cpi, td, tile_data, tp, mi_row + y_idx,
- mi_col + x_idx, subsize, &this_rdc,
- temp_best_rdcost - sum_rdc.rdcost,
- pc_tree->split[idx], p_split_rd);
-
- pc_tree->pc_tree_stats.sub_block_rdcost[idx] = this_rdc.rdcost;
- pc_tree->pc_tree_stats.sub_block_skip[idx] =
- pc_tree->split[idx]->none.skip;
-
- if (this_rdc.rate == INT_MAX) {
- sum_rdc.rdcost = INT64_MAX;
- break;
- } else {
- sum_rdc.rate += this_rdc.rate;
- sum_rdc.dist += this_rdc.dist;
- sum_rdc.rdcost += this_rdc.rdcost;
- }
- }
- reached_last_index = (idx == 4);
-
- if (reached_last_index && sum_rdc.rdcost < best_rdc.rdcost) {
- sum_rdc.rate += partition_cost[PARTITION_SPLIT];
- sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
-
- if (sum_rdc.rdcost < best_rdc.rdcost) {
- best_rdc = sum_rdc;
- pc_tree->partitioning = PARTITION_SPLIT;
- }
- }
-
- int has_split = 0;
- if (pc_tree->partitioning == PARTITION_SPLIT) {
- for (int cb_idx = 0; cb_idx <= AOMMIN(idx, 3); ++cb_idx) {
- if (pc_tree->split[cb_idx]->partitioning == PARTITION_SPLIT)
- ++has_split;
- }
-
- if (has_split >= 3 || sum_rdc.rdcost < (pn_rdc.rdcost >> 1)) {
- pc_tree->cb_search_range = SPLIT_PLANE;
- }
- }
-
- if (pc_tree->partitioning == PARTITION_NONE) {
- pc_tree->cb_search_range = SEARCH_SAME_PLANE;
- if (pn_rdc.dist <= sum_rdc.dist)
- pc_tree->cb_search_range = NONE_PARTITION_PLANE;
- }
-
- if (pn_rdc.rate == INT_MAX) pc_tree->cb_search_range = NONE_PARTITION_PLANE;
-
- restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
- } // if (do_split)
-
- pc_tree->pc_tree_stats.split = pc_tree->partitioning == PARTITION_SPLIT;
- if (do_square_split) {
- for (int i = 0; i < 4; ++i) {
- pc_tree->pc_tree_stats.sub_block_split[i] =
- pc_tree->split[i]->partitioning == PARTITION_SPLIT;
- }
- }
-
- // TODO(jbb): This code added so that we avoid static analysis
- // warning related to the fact that best_rd isn't used after this
- // point. This code should be refactored so that the duplicate
- // checks occur in some sub function and thus are used...
- (void)best_rd;
- *rd_cost = best_rdc;
-
- if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX &&
- pc_tree->index != 3) {
- if (bsize == cm->seq_params.sb_size) {
- restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
- } else {
- encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize,
- pc_tree, NULL);
- }
- }
-
- if (bsize == cm->seq_params.sb_size) {
- assert(best_rdc.rate < INT_MAX);
- assert(best_rdc.dist < INT64_MAX);
- } else {
- assert(tp_orig == *tp);
- }
-}
-
-#define FEATURE_SIZE 19
-static const float two_pass_split_partition_weights_128[FEATURE_SIZE + 1] = {
- 2.683936f, -0.193620f, -4.106470f, -0.141320f, -0.282289f,
- 0.125296f, -1.134961f, 0.862757f, -0.418799f, -0.637666f,
- 0.016232f, 0.345013f, 0.018823f, -0.393394f, -1.130700f,
- 0.695357f, 0.112569f, -0.341975f, -0.513882f, 5.7488966f,
-};
-
-static const float two_pass_split_partition_weights_64[FEATURE_SIZE + 1] = {
- 2.990993f, 0.423273f, -0.926544f, 0.454646f, -0.292698f,
- -1.311632f, -0.284432f, 0.717141f, -0.419257f, -0.574760f,
- -0.674444f, 0.669047f, -0.374255f, 0.380624f, -0.804036f,
- 0.264021f, 0.004163f, 1.896802f, 0.924287f, 0.13490619f,
-};
-
-static const float two_pass_split_partition_weights_32[FEATURE_SIZE + 1] = {
- 2.795181f, -0.136943f, -0.924842f, 0.405330f, -0.463505f,
- -0.584076f, -0.831472f, 0.382985f, -0.597544f, -0.138915f,
- -1.354350f, 0.466035f, -0.553961f, 0.213202f, -1.166429f,
- 0.010776f, -0.096236f, 2.335084f, 1.699857f, -0.58178353f,
-};
-
-static const float two_pass_split_partition_weights_16[FEATURE_SIZE + 1] = {
- 1.987888f, -0.431100f, -1.687703f, 0.262602f, -0.425298f,
- -0.463870f, -1.493457f, 0.470917f, -0.528457f, -0.087700f,
- -1.815092f, 0.152883f, -0.337908f, 0.093679f, -1.548267f,
- -0.042387f, -0.000861f, 2.556746f, 1.619192f, 0.03643292f,
-};
-
-static const float two_pass_split_partition_weights_8[FEATURE_SIZE + 1] = {
- 2.188344f, -0.817528f, -2.119219f, 0.000000f, -0.348167f,
- -0.658074f, -1.960362f, 0.000000f, -0.403080f, 0.282699f,
- -2.061088f, 0.000000f, -0.431919f, -0.127960f, -1.099550f,
- 0.000000f, 0.121622f, 2.017455f, 2.058228f, -0.15475988f,
-};
-
-static const float two_pass_none_partition_weights_128[FEATURE_SIZE + 1] = {
- -1.006689f, 0.777908f, 4.461072f, -0.395782f, -0.014610f,
- -0.853863f, 0.729997f, -0.420477f, 0.282429f, -1.194595f,
- 3.181220f, -0.511416f, 0.117084f, -1.149348f, 1.507990f,
- -0.477212f, 0.202963f, -1.469581f, 0.624461f, -0.89081228f,
-};
-
-static const float two_pass_none_partition_weights_64[FEATURE_SIZE + 1] = {
- -1.241117f, 0.844878f, 5.638803f, -0.489780f, -0.108796f,
- -4.576821f, 1.540624f, -0.477519f, 0.227791f, -1.443968f,
- 1.586911f, -0.505125f, 0.140764f, -0.464194f, 1.466658f,
- -0.641166f, 0.195412f, 1.427905f, 2.080007f, -1.98272777f,
-};
-
-static const float two_pass_none_partition_weights_32[FEATURE_SIZE + 1] = {
- -2.130825f, 0.476023f, 5.907343f, -0.516002f, -0.097471f,
- -2.662754f, 0.614858f, -0.576728f, 0.085261f, -0.031901f,
- 0.727842f, -0.600034f, 0.079326f, 0.324328f, 0.504502f,
- -0.547105f, -0.037670f, 0.304995f, 0.369018f, -2.66299987f,
-};
-
-static const float two_pass_none_partition_weights_16[FEATURE_SIZE + 1] = {
- -1.626410f, 0.872047f, 5.414965f, -0.554781f, -0.084514f,
- -3.020550f, 0.467632f, -0.382280f, 0.199568f, 0.426220f,
- 0.829426f, -0.467100f, 0.153098f, 0.662994f, 0.327545f,
- -0.560106f, -0.141610f, 0.403372f, 0.523991f, -3.02891231f,
-};
-
-static const float two_pass_none_partition_weights_8[FEATURE_SIZE + 1] = {
- -1.463349f, 0.375376f, 4.751430f, 0.000000f, -0.184451f,
- -1.655447f, 0.443214f, 0.000000f, 0.127961f, 0.152435f,
- 0.083288f, 0.000000f, 0.143105f, 0.438012f, 0.073238f,
- 0.000000f, -0.278137f, 0.186134f, 0.073737f, -1.6494962f,
-};
-
-// split_score indicates confidence of picking split partition;
-// none_score indicates confidence of picking none partition;
-static int ml_prune_2pass_split_partition(const PC_TREE_STATS *pc_tree_stats,
- BLOCK_SIZE bsize, int *split_score,
- int *none_score) {
- if (!pc_tree_stats->valid) return 0;
- const float *split_weights = NULL;
- const float *none_weights = NULL;
- switch (bsize) {
- case BLOCK_4X4: break;
- case BLOCK_8X8:
- split_weights = two_pass_split_partition_weights_8;
- none_weights = two_pass_none_partition_weights_8;
- break;
- case BLOCK_16X16:
- split_weights = two_pass_split_partition_weights_16;
- none_weights = two_pass_none_partition_weights_16;
- break;
- case BLOCK_32X32:
- split_weights = two_pass_split_partition_weights_32;
- none_weights = two_pass_none_partition_weights_32;
- break;
- case BLOCK_64X64:
- split_weights = two_pass_split_partition_weights_64;
- none_weights = two_pass_none_partition_weights_64;
- break;
- case BLOCK_128X128:
- split_weights = two_pass_split_partition_weights_128;
- none_weights = two_pass_none_partition_weights_128;
- break;
- default: assert(0 && "Unexpected bsize.");
- }
- if (!split_weights || !none_weights) return 0;
-
- aom_clear_system_state();
-
- float features[FEATURE_SIZE];
- int feature_index = 0;
- features[feature_index++] = (float)pc_tree_stats->split;
- features[feature_index++] = (float)pc_tree_stats->skip;
- const int rdcost = (int)AOMMIN(INT_MAX, pc_tree_stats->rdcost);
- const int rd_valid = rdcost > 0 && rdcost < 1000000000;
- features[feature_index++] = (float)rd_valid;
- for (int i = 0; i < 4; ++i) {
- features[feature_index++] = (float)pc_tree_stats->sub_block_split[i];
- features[feature_index++] = (float)pc_tree_stats->sub_block_skip[i];
- const int sub_rdcost =
- (int)AOMMIN(INT_MAX, pc_tree_stats->sub_block_rdcost[i]);
- const int sub_rd_valid = sub_rdcost > 0 && sub_rdcost < 1000000000;
- features[feature_index++] = (float)sub_rd_valid;
- // Ratio between the sub-block RD and the whole-block RD.
- float rd_ratio = 1.0f;
- if (rd_valid && sub_rd_valid && sub_rdcost < rdcost)
- rd_ratio = (float)sub_rdcost / (float)rdcost;
- features[feature_index++] = rd_ratio;
- }
- assert(feature_index == FEATURE_SIZE);
-
- float score_1 = split_weights[FEATURE_SIZE];
- float score_2 = none_weights[FEATURE_SIZE];
- for (int i = 0; i < FEATURE_SIZE; ++i) {
- score_1 += features[i] * split_weights[i];
- score_2 += features[i] * none_weights[i];
- }
- *split_score = (int)(score_1 * 100);
- *none_score = (int)(score_2 * 100);
- return 1;
-}
-#undef FEATURE_SIZE
-
-static void ml_prune_rect_partition(const AV1_COMP *const cpi,
- const MACROBLOCK *const x, BLOCK_SIZE bsize,
- int64_t best_rd, int64_t none_rd,
- int64_t *split_rd,
- int *const dst_prune_horz,
- int *const dst_prune_vert) {
- if (bsize < BLOCK_8X8 || best_rd >= 1000000000) return;
- best_rd = AOMMAX(best_rd, 1);
- const NN_CONFIG *nn_config = NULL;
- const float prob_thresholds[5] = { 0.01f, 0.01f, 0.004f, 0.002f, 0.002f };
- float cur_thresh = 0.0f;
- switch (bsize) {
- case BLOCK_8X8:
- nn_config = &av1_rect_partition_nnconfig_8;
- cur_thresh = prob_thresholds[0];
- break;
- case BLOCK_16X16:
- nn_config = &av1_rect_partition_nnconfig_16;
- cur_thresh = prob_thresholds[1];
- break;
- case BLOCK_32X32:
- nn_config = &av1_rect_partition_nnconfig_32;
- cur_thresh = prob_thresholds[2];
- break;
- case BLOCK_64X64:
- nn_config = &av1_rect_partition_nnconfig_64;
- cur_thresh = prob_thresholds[3];
- break;
- case BLOCK_128X128:
- nn_config = &av1_rect_partition_nnconfig_128;
- cur_thresh = prob_thresholds[4];
- break;
- default: assert(0 && "Unexpected bsize.");
- }
- if (!nn_config) return;
- aom_clear_system_state();
-
- // 1. Compute input features
- float features[9];
-
- // RD cost ratios
- for (int i = 0; i < 5; i++) features[i] = 1.0f;
- if (none_rd > 0 && none_rd < 1000000000)
- features[0] = (float)none_rd / (float)best_rd;
- for (int i = 0; i < 4; i++) {
- if (split_rd[i] > 0 && split_rd[i] < 1000000000)
- features[1 + i] = (float)split_rd[i] / (float)best_rd;
- }
-
- // Variance ratios
- const MACROBLOCKD *const xd = &x->e_mbd;
- int whole_block_variance;
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- whole_block_variance = av1_high_get_sby_perpixel_variance(
- cpi, &x->plane[0].src, bsize, xd->bd);
- } else {
- whole_block_variance =
- av1_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
- }
- whole_block_variance = AOMMAX(whole_block_variance, 1);
-
- int split_variance[4];
- const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
- struct buf_2d buf;
- buf.stride = x->plane[0].src.stride;
- const int bw = block_size_wide[bsize];
- for (int i = 0; i < 4; ++i) {
- const int x_idx = (i & 1) * bw / 2;
- const int y_idx = (i >> 1) * bw / 2;
- buf.buf = x->plane[0].src.buf + x_idx + y_idx * buf.stride;
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- split_variance[i] =
- av1_high_get_sby_perpixel_variance(cpi, &buf, subsize, xd->bd);
- } else {
- split_variance[i] = av1_get_sby_perpixel_variance(cpi, &buf, subsize);
- }
- }
-
- for (int i = 0; i < 4; i++)
- features[5 + i] = (float)split_variance[i] / (float)whole_block_variance;
-
- // 2. Do the prediction and prune 0-2 partitions based on their probabilities
- float raw_scores[3] = { 0.0f };
- av1_nn_predict(features, nn_config, raw_scores);
- float probs[3] = { 0.0f };
- av1_nn_softmax(raw_scores, probs, 3);
-
- // probs[0] is the probability of the fact that both rectangular partitions
- // are worse than current best_rd
- if (probs[1] <= cur_thresh) (*dst_prune_horz) = 1;
- if (probs[2] <= cur_thresh) (*dst_prune_vert) = 1;
-}
-
-// Use a ML model to predict if horz_a, horz_b, vert_a, and vert_b should be
-// considered.
-static void ml_prune_ab_partition(BLOCK_SIZE bsize, int part_ctx, int var_ctx,
- int64_t best_rd, int64_t horz_rd[2],
- int64_t vert_rd[2], int64_t split_rd[4],
- int *const horza_partition_allowed,
- int *const horzb_partition_allowed,
- int *const verta_partition_allowed,
- int *const vertb_partition_allowed) {
- if (bsize < BLOCK_8X8 || best_rd >= 1000000000) return;
- const NN_CONFIG *nn_config = NULL;
- switch (bsize) {
- case BLOCK_8X8: nn_config = NULL; break;
- case BLOCK_16X16: nn_config = &av1_ab_partition_nnconfig_16; break;
- case BLOCK_32X32: nn_config = &av1_ab_partition_nnconfig_32; break;
- case BLOCK_64X64: nn_config = &av1_ab_partition_nnconfig_64; break;
- case BLOCK_128X128: nn_config = &av1_ab_partition_nnconfig_128; break;
- default: assert(0 && "Unexpected bsize.");
- }
- if (!nn_config) return;
-
- aom_clear_system_state();
-
- // Generate features.
- float features[10];
- int feature_index = 0;
- features[feature_index++] = (float)part_ctx;
- features[feature_index++] = (float)var_ctx;
- const int rdcost = (int)AOMMIN(INT_MAX, best_rd);
- int sub_block_rdcost[8] = { 0 };
- int rd_index = 0;
- for (int i = 0; i < 2; ++i) {
- if (horz_rd[i] > 0 && horz_rd[i] < 1000000000)
- sub_block_rdcost[rd_index] = (int)horz_rd[i];
- ++rd_index;
- }
- for (int i = 0; i < 2; ++i) {
- if (vert_rd[i] > 0 && vert_rd[i] < 1000000000)
- sub_block_rdcost[rd_index] = (int)vert_rd[i];
- ++rd_index;
- }
- for (int i = 0; i < 4; ++i) {
- if (split_rd[i] > 0 && split_rd[i] < 1000000000)
- sub_block_rdcost[rd_index] = (int)split_rd[i];
- ++rd_index;
- }
- for (int i = 0; i < 8; ++i) {
- // Ratio between the sub-block RD and the whole-block RD.
- float rd_ratio = 1.0f;
- if (sub_block_rdcost[i] > 0 && sub_block_rdcost[i] < rdcost)
- rd_ratio = (float)sub_block_rdcost[i] / (float)rdcost;
- features[feature_index++] = rd_ratio;
- }
- assert(feature_index == 10);
-
- // Calculate scores using the NN model.
- float score[16] = { 0.0f };
- av1_nn_predict(features, nn_config, score);
- int int_score[16];
- int max_score = -1000;
- for (int i = 0; i < 16; ++i) {
- int_score[i] = (int)(100 * score[i]);
- max_score = AOMMAX(int_score[i], max_score);
- }
-
- // Make decisions based on the model scores.
- int thresh = max_score;
- switch (bsize) {
- case BLOCK_16X16: thresh -= 150; break;
- case BLOCK_32X32: thresh -= 100; break;
- default: break;
- }
- *horza_partition_allowed = 0;
- *horzb_partition_allowed = 0;
- *verta_partition_allowed = 0;
- *vertb_partition_allowed = 0;
- for (int i = 0; i < 16; ++i) {
- if (int_score[i] >= thresh) {
- if ((i >> 0) & 1) *horza_partition_allowed = 1;
- if ((i >> 1) & 1) *horzb_partition_allowed = 1;
- if ((i >> 2) & 1) *verta_partition_allowed = 1;
- if ((i >> 3) & 1) *vertb_partition_allowed = 1;
- }
- }
-}
-
-#define FEATURES 18
-#define LABELS 4
-// Use a ML model to predict if horz4 and vert4 should be considered.
-static void ml_prune_4_partition(const AV1_COMP *const cpi, MACROBLOCK *const x,
- BLOCK_SIZE bsize, int part_ctx,
- int64_t best_rd, int64_t horz_rd[2],
- int64_t vert_rd[2], int64_t split_rd[4],
- int *const partition_horz4_allowed,
- int *const partition_vert4_allowed,
- unsigned int pb_source_variance, int mi_row,
- int mi_col) {
- if (best_rd >= 1000000000) return;
- const NN_CONFIG *nn_config = NULL;
- switch (bsize) {
- case BLOCK_16X16: nn_config = &av1_4_partition_nnconfig_16; break;
- case BLOCK_32X32: nn_config = &av1_4_partition_nnconfig_32; break;
- case BLOCK_64X64: nn_config = &av1_4_partition_nnconfig_64; break;
- default: assert(0 && "Unexpected bsize.");
- }
- if (!nn_config) return;
-
- aom_clear_system_state();
-
- // Generate features.
- float features[FEATURES];
- int feature_index = 0;
- features[feature_index++] = (float)part_ctx;
- features[feature_index++] = (float)get_unsigned_bits(pb_source_variance);
-
- const int rdcost = (int)AOMMIN(INT_MAX, best_rd);
- int sub_block_rdcost[8] = { 0 };
- int rd_index = 0;
- for (int i = 0; i < 2; ++i) {
- if (horz_rd[i] > 0 && horz_rd[i] < 1000000000)
- sub_block_rdcost[rd_index] = (int)horz_rd[i];
- ++rd_index;
- }
- for (int i = 0; i < 2; ++i) {
- if (vert_rd[i] > 0 && vert_rd[i] < 1000000000)
- sub_block_rdcost[rd_index] = (int)vert_rd[i];
- ++rd_index;
- }
- for (int i = 0; i < 4; ++i) {
- if (split_rd[i] > 0 && split_rd[i] < 1000000000)
- sub_block_rdcost[rd_index] = (int)split_rd[i];
- ++rd_index;
- }
- for (int i = 0; i < 8; ++i) {
- // Ratio between the sub-block RD and the whole-block RD.
- float rd_ratio = 1.0f;
- if (sub_block_rdcost[i] > 0 && sub_block_rdcost[i] < rdcost)
- rd_ratio = (float)sub_block_rdcost[i] / (float)rdcost;
- features[feature_index++] = rd_ratio;
- }
-
- // Get variance of the 1:4 and 4:1 sub-blocks.
- unsigned int horz_4_source_var[4] = { 0 };
- unsigned int vert_4_source_var[4] = { 0 };
- {
- BLOCK_SIZE horz_4_bs = get_partition_subsize(bsize, PARTITION_HORZ_4);
- BLOCK_SIZE vert_4_bs = get_partition_subsize(bsize, PARTITION_VERT_4);
- av1_setup_src_planes(x, cpi->source, mi_row, mi_col,
- av1_num_planes(&cpi->common));
- const int src_stride = x->plane[0].src.stride;
- const uint8_t *src = x->plane[0].src.buf;
- const MACROBLOCKD *const xd = &x->e_mbd;
- for (int i = 0; i < 4; ++i) {
- const uint8_t *horz_src =
- src + i * block_size_high[horz_4_bs] * src_stride;
- const uint8_t *vert_src = src + i * block_size_wide[vert_4_bs];
- unsigned int horz_var, vert_var, sse;
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- switch (xd->bd) {
- case 10:
- horz_var = cpi->fn_ptr[horz_4_bs].vf(
- horz_src, src_stride, CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_10),
- 0, &sse);
- vert_var = cpi->fn_ptr[vert_4_bs].vf(
- vert_src, src_stride, CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_10),
- 0, &sse);
- break;
- case 12:
- horz_var = cpi->fn_ptr[horz_4_bs].vf(
- horz_src, src_stride, CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_12),
- 0, &sse);
- vert_var = cpi->fn_ptr[vert_4_bs].vf(
- vert_src, src_stride, CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_12),
- 0, &sse);
- break;
- case 8:
- default:
- horz_var = cpi->fn_ptr[horz_4_bs].vf(
- horz_src, src_stride, CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_8),
- 0, &sse);
- vert_var = cpi->fn_ptr[vert_4_bs].vf(
- vert_src, src_stride, CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_8),
- 0, &sse);
- break;
- }
- horz_4_source_var[i] =
- ROUND_POWER_OF_TWO(horz_var, num_pels_log2_lookup[horz_4_bs]);
- vert_4_source_var[i] =
- ROUND_POWER_OF_TWO(vert_var, num_pels_log2_lookup[vert_4_bs]);
- } else {
- horz_var = cpi->fn_ptr[horz_4_bs].vf(horz_src, src_stride, AV1_VAR_OFFS,
- 0, &sse);
- vert_var = cpi->fn_ptr[vert_4_bs].vf(vert_src, src_stride, AV1_VAR_OFFS,
- 0, &sse);
- horz_4_source_var[i] =
- ROUND_POWER_OF_TWO(horz_var, num_pels_log2_lookup[horz_4_bs]);
- vert_4_source_var[i] =
- ROUND_POWER_OF_TWO(vert_var, num_pels_log2_lookup[vert_4_bs]);
- }
- }
- }
-
- const float denom = (float)(pb_source_variance + 1);
- const float low_b = 0.1f;
- const float high_b = 10.0f;
- for (int i = 0; i < 4; ++i) {
- // Ratio between the 4:1 sub-block variance and the whole-block variance.
- float var_ratio = (float)(horz_4_source_var[i] + 1) / denom;
- if (var_ratio < low_b) var_ratio = low_b;
- if (var_ratio > high_b) var_ratio = high_b;
- features[feature_index++] = var_ratio;
- }
- for (int i = 0; i < 4; ++i) {
- // Ratio between the 1:4 sub-block RD and the whole-block RD.
- float var_ratio = (float)(vert_4_source_var[i] + 1) / denom;
- if (var_ratio < low_b) var_ratio = low_b;
- if (var_ratio > high_b) var_ratio = high_b;
- features[feature_index++] = var_ratio;
- }
- assert(feature_index == FEATURES);
-
- // Calculate scores using the NN model.
- float score[LABELS] = { 0.0f };
- av1_nn_predict(features, nn_config, score);
- int int_score[LABELS];
- int max_score = -1000;
- for (int i = 0; i < LABELS; ++i) {
- int_score[i] = (int)(100 * score[i]);
- max_score = AOMMAX(int_score[i], max_score);
- }
-
- // Make decisions based on the model scores.
- int thresh = max_score;
- switch (bsize) {
- case BLOCK_16X16: thresh -= 500; break;
- case BLOCK_32X32: thresh -= 500; break;
- case BLOCK_64X64: thresh -= 200; break;
- default: break;
- }
- *partition_horz4_allowed = 0;
- *partition_vert4_allowed = 0;
- for (int i = 0; i < LABELS; ++i) {
- if (int_score[i] >= thresh) {
- if ((i >> 0) & 1) *partition_horz4_allowed = 1;
- if ((i >> 1) & 1) *partition_vert4_allowed = 1;
- }
- }
-}
-#undef FEATURES
-#undef LABELS
-
-#define FEATURES 4
-// ML-based partition search breakout.
-static int ml_predict_breakout(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
- const MACROBLOCK *const x,
- const RD_STATS *const rd_stats,
- unsigned int pb_source_variance) {
- const NN_CONFIG *nn_config = NULL;
- int thresh = 0;
- switch (bsize) {
- case BLOCK_8X8:
- nn_config = &av1_partition_breakout_nnconfig_8;
- thresh = cpi->sf.ml_partition_search_breakout_thresh[0];
- break;
- case BLOCK_16X16:
- nn_config = &av1_partition_breakout_nnconfig_16;
- thresh = cpi->sf.ml_partition_search_breakout_thresh[1];
- break;
- case BLOCK_32X32:
- nn_config = &av1_partition_breakout_nnconfig_32;
- thresh = cpi->sf.ml_partition_search_breakout_thresh[2];
- break;
- case BLOCK_64X64:
- nn_config = &av1_partition_breakout_nnconfig_64;
- thresh = cpi->sf.ml_partition_search_breakout_thresh[3];
- break;
- case BLOCK_128X128:
- nn_config = &av1_partition_breakout_nnconfig_128;
- thresh = cpi->sf.ml_partition_search_breakout_thresh[4];
- break;
- default: assert(0 && "Unexpected bsize.");
- }
- if (!nn_config || thresh < 0) return 0;
-
- // Generate feature values.
- float features[FEATURES];
- int feature_index = 0;
- aom_clear_system_state();
-
- const int num_pels_log2 = num_pels_log2_lookup[bsize];
- float rate_f = (float)AOMMIN(rd_stats->rate, INT_MAX);
- rate_f = ((float)x->rdmult / 128.0f / 512.0f / (float)(1 << num_pels_log2)) *
- rate_f;
- features[feature_index++] = rate_f;
-
- const float dist_f =
- (float)(AOMMIN(rd_stats->dist, INT_MAX) >> num_pels_log2);
- features[feature_index++] = dist_f;
-
- features[feature_index++] = (float)pb_source_variance;
-
- const int dc_q = (int)x->plane[0].dequant_QTX[0];
- features[feature_index++] = (float)(dc_q * dc_q) / 256.0f;
- assert(feature_index == FEATURES);
-
- // Calculate score using the NN model.
- float score = 0.0f;
- av1_nn_predict(features, nn_config, &score);
-
- // Make decision.
- return (int)(score * 100) >= thresh;
-}
-#undef FEATURES
-
-// TODO(jingning,jimbankoski,rbultje): properly skip partition types that are
-// unlikely to be selected depending on previous rate-distortion optimization
-// results, for encoding speed-up.
-static void rd_pick_partition(AV1_COMP *const cpi, ThreadData *td,
- TileDataEnc *tile_data, TOKENEXTRA **tp,
- int mi_row, int mi_col, BLOCK_SIZE bsize,
- RD_STATS *rd_cost, int64_t best_rd,
- PC_TREE *pc_tree, int64_t *none_rd) {
- const AV1_COMMON *const cm = &cpi->common;
- const int num_planes = av1_num_planes(cm);
- TileInfo *const tile_info = &tile_data->tile_info;
- MACROBLOCK *const x = &td->mb;
- MACROBLOCKD *const xd = &x->e_mbd;
- const int mi_step = mi_size_wide[bsize] / 2;
- RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
- const TOKENEXTRA *const tp_orig = *tp;
- PICK_MODE_CONTEXT *ctx_none = &pc_tree->none;
- int tmp_partition_cost[PARTITION_TYPES];
- BLOCK_SIZE subsize;
- RD_STATS this_rdc, sum_rdc, best_rdc;
- const int bsize_at_least_8x8 = (bsize >= BLOCK_8X8);
- int do_square_split = bsize_at_least_8x8;
- const int pl = bsize_at_least_8x8
- ? partition_plane_context(xd, mi_row, mi_col, bsize)
- : 0;
- const int *partition_cost =
- pl >= 0 ? x->partition_cost[pl] : x->partition_cost[0];
-
- int do_rectangular_split = 1;
- int64_t cur_none_rd = 0;
- int64_t split_rd[4] = { 0, 0, 0, 0 };
- int64_t horz_rd[2] = { 0, 0 };
- int64_t vert_rd[2] = { 0, 0 };
-
- int split_ctx_is_ready[2] = { 0, 0 };
- int horz_ctx_is_ready = 0;
- int vert_ctx_is_ready = 0;
- BLOCK_SIZE bsize2 = get_partition_subsize(bsize, PARTITION_SPLIT);
-
- if (best_rd < 0) {
- pc_tree->none.rdcost = INT64_MAX;
- pc_tree->none.skip = 0;
- av1_invalid_rd_stats(rd_cost);
- return;
- }
- if (bsize == cm->seq_params.sb_size) x->must_find_valid_partition = 0;
-
- // Override skipping rectangular partition operations for edge blocks
- const int has_rows = (mi_row + mi_step < cm->mi_rows);
- const int has_cols = (mi_col + mi_step < cm->mi_cols);
- const int xss = x->e_mbd.plane[1].subsampling_x;
- const int yss = x->e_mbd.plane[1].subsampling_y;
-
- BLOCK_SIZE min_size = x->min_partition_size;
- BLOCK_SIZE max_size = x->max_partition_size;
-
- if (none_rd) *none_rd = 0;
-
-#if CONFIG_FP_MB_STATS
- unsigned int src_diff_var = UINT_MAX;
- int none_complexity = 0;
-#endif
-
- int partition_none_allowed = has_rows && has_cols;
- int partition_horz_allowed = has_cols && yss <= xss && bsize_at_least_8x8;
- int partition_vert_allowed = has_rows && xss <= yss && bsize_at_least_8x8;
-
- (void)*tp_orig;
-
- // Override partition costs at the edges of the frame in the same
- // way as in read_partition (see decodeframe.c)
- if (!(has_rows && has_cols)) {
- assert(bsize_at_least_8x8 && pl >= 0);
- const aom_cdf_prob *partition_cdf = cm->fc->partition_cdf[pl];
- for (int i = 0; i < PARTITION_TYPES; ++i) tmp_partition_cost[i] = INT_MAX;
- if (has_cols) {
- // At the bottom, the two possibilities are HORZ and SPLIT
- aom_cdf_prob bot_cdf[2];
- partition_gather_vert_alike(bot_cdf, partition_cdf, bsize);
- static const int bot_inv_map[2] = { PARTITION_HORZ, PARTITION_SPLIT };
- av1_cost_tokens_from_cdf(tmp_partition_cost, bot_cdf, bot_inv_map);
- } else if (has_rows) {
- // At the right, the two possibilities are VERT and SPLIT
- aom_cdf_prob rhs_cdf[2];
- partition_gather_horz_alike(rhs_cdf, partition_cdf, bsize);
- static const int rhs_inv_map[2] = { PARTITION_VERT, PARTITION_SPLIT };
- av1_cost_tokens_from_cdf(tmp_partition_cost, rhs_cdf, rhs_inv_map);
- } else {
- // At the bottom right, we always split
- tmp_partition_cost[PARTITION_SPLIT] = 0;
- }
-
- partition_cost = tmp_partition_cost;
- }
-
-#ifndef NDEBUG
- // Nothing should rely on the default value of this array (which is just
- // leftover from encoding the previous block. Setting it to fixed pattern
- // when debugging.
- // bit 0, 1, 2 are blk_skip of each plane
- // bit 4, 5, 6 are initialization checking of each plane
- memset(x->blk_skip, 0x77, sizeof(x->blk_skip));
-#endif // NDEBUG
-
- assert(mi_size_wide[bsize] == mi_size_high[bsize]);
-
- av1_init_rd_stats(&this_rdc);
- av1_invalid_rd_stats(&best_rdc);
- best_rdc.rdcost = best_rd;
-
- set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
-
- if (bsize == BLOCK_16X16 && cpi->vaq_refresh)
- x->mb_energy = av1_log_block_var(cpi, x, bsize);
-
- if (cpi->sf.cb_partition_search && bsize == BLOCK_16X16) {
- const int cb_partition_search_ctrl =
- ((pc_tree->index == 0 || pc_tree->index == 3) +
- get_chessboard_index(cm->current_video_frame)) &
- 0x1;
-
- if (cb_partition_search_ctrl && bsize > min_size && bsize < max_size)
- set_partition_range(cm, xd, mi_row, mi_col, bsize, &min_size, &max_size);
- }
-
- // Determine partition types in search according to the speed features.
- // The threshold set here has to be of square block size.
- if (cpi->sf.auto_min_max_partition_size) {
- const int no_partition_allowed = (bsize <= max_size && bsize >= min_size);
- // Note: Further partitioning is NOT allowed when bsize == min_size already.
- const int partition_allowed = (bsize <= max_size && bsize > min_size);
- partition_none_allowed &= no_partition_allowed;
- partition_horz_allowed &= partition_allowed || !has_rows;
- partition_vert_allowed &= partition_allowed || !has_cols;
- do_square_split &= bsize > min_size;
- }
-
- if (bsize > cpi->sf.use_square_partition_only_threshold) {
- partition_horz_allowed &= !has_rows;
- partition_vert_allowed &= !has_cols;
- }
-
- if (bsize > BLOCK_4X4 && x->use_cb_search_range &&
- cpi->sf.auto_min_max_partition_size == 0) {
- int split_score = 0;
- int none_score = 0;
- const int score_valid = ml_prune_2pass_split_partition(
- &pc_tree->pc_tree_stats, bsize, &split_score, &none_score);
- if (score_valid) {
- {
- const int only_split_thresh = 300;
- const int no_none_thresh = 250;
- const int no_split_thresh = 0;
- if (split_score > only_split_thresh) {
- partition_none_allowed = 0;
- partition_horz_allowed = 0;
- partition_vert_allowed = 0;
- } else if (split_score > no_none_thresh) {
- partition_none_allowed = 0;
- }
- if (split_score < no_split_thresh) do_square_split = 0;
- }
- {
- const int no_split_thresh = 120;
- const int no_none_thresh = -120;
- if (none_score > no_split_thresh && partition_none_allowed)
- do_square_split = 0;
- if (none_score < no_none_thresh) partition_none_allowed = 0;
- }
- } else {
- if (pc_tree->cb_search_range == SPLIT_PLANE) {
- partition_none_allowed = 0;
- partition_horz_allowed = 0;
- partition_vert_allowed = 0;
- }
- if (pc_tree->cb_search_range == SEARCH_SAME_PLANE) do_square_split = 0;
- if (pc_tree->cb_search_range == NONE_PARTITION_PLANE) {
- do_square_split = 0;
- partition_horz_allowed = 0;
- partition_vert_allowed = 0;
- }
- }
-
- // Fall back to default values in case all partition modes are rejected.
- if (partition_none_allowed == 0 && do_square_split == 0 &&
- partition_horz_allowed == 0 && partition_vert_allowed == 0) {
- do_square_split = bsize_at_least_8x8;
- partition_none_allowed = has_rows && has_cols;
- partition_horz_allowed = has_cols && yss <= xss && bsize_at_least_8x8;
- partition_vert_allowed = has_rows && xss <= yss && bsize_at_least_8x8;
- }
- }
-
- xd->above_txfm_context = cm->above_txfm_context[tile_info->tile_row] + mi_col;
- xd->left_txfm_context =
- xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
- save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
-
-#if CONFIG_FP_MB_STATS
- if (cpi->use_fp_mb_stats) {
- set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
- src_diff_var = get_sby_perpixel_diff_variance(cpi, &x->plane[0].src, mi_row,
- mi_col, bsize);
- }
-
- // Decide whether we shall split directly and skip searching NONE by using
- // the first pass block statistics
- if (cpi->use_fp_mb_stats && bsize >= BLOCK_32X32 && do_square_split &&
- partition_none_allowed && src_diff_var > 4 &&
- cm->base_qindex < qindex_split_threshold_lookup[bsize]) {
- int mb_row = mi_row >> 1;
- int mb_col = mi_col >> 1;
- int mb_row_end =
- AOMMIN(mb_row + num_16x16_blocks_high_lookup[bsize], cm->mb_rows);
- int mb_col_end =
- AOMMIN(mb_col + num_16x16_blocks_wide_lookup[bsize], cm->mb_cols);
- int r, c;
-
- // compute a complexity measure, basically measure inconsistency of motion
- // vectors obtained from the first pass in the current block
- for (r = mb_row; r < mb_row_end; r++) {
- for (c = mb_col; c < mb_col_end; c++) {
- const int mb_index = r * cm->mb_cols + c;
-
- MOTION_DIRECTION this_mv;
- MOTION_DIRECTION right_mv;
- MOTION_DIRECTION bottom_mv;
-
- this_mv =
- get_motion_direction_fp(cpi->twopass.this_frame_mb_stats[mb_index]);
-
- // to its right
- if (c != mb_col_end - 1) {
- right_mv = get_motion_direction_fp(
- cpi->twopass.this_frame_mb_stats[mb_index + 1]);
- none_complexity += get_motion_inconsistency(this_mv, right_mv);
- }
-
- // to its bottom
- if (r != mb_row_end - 1) {
- bottom_mv = get_motion_direction_fp(
- cpi->twopass.this_frame_mb_stats[mb_index + cm->mb_cols]);
- none_complexity += get_motion_inconsistency(this_mv, bottom_mv);
- }
-
- // do not count its left and top neighbors to avoid double counting
- }
- }
-
- if (none_complexity > complexity_16x16_blocks_threshold[bsize]) {
- partition_none_allowed = 0;
- }
- }
-#endif
-
- // Ref frames picked in the [i_th] quarter subblock during square partition
- // RD search. It may be used to prune ref frame selection of rect partitions.
- int ref_frames_used[4] = {
- 0,
- };
-
-BEGIN_PARTITION_SEARCH:
- if (x->must_find_valid_partition) {
- partition_none_allowed = has_rows && has_cols;
- partition_horz_allowed = has_cols && yss <= xss && bsize_at_least_8x8;
- partition_vert_allowed = has_rows && xss <= yss && bsize_at_least_8x8;
- }
-
- // Partition block source pixel variance.
- unsigned int pb_source_variance = UINT_MAX;
-
-#if CONFIG_DIST_8X8
- if (x->using_dist_8x8) {
- if (block_size_high[bsize] <= 8) partition_horz_allowed = 0;
- if (block_size_wide[bsize] <= 8) partition_vert_allowed = 0;
- if (block_size_high[bsize] <= 8 || block_size_wide[bsize] <= 8)
- do_square_split = 0;
- }
-#endif
-
- // PARTITION_NONE
- if (partition_none_allowed) {
- int pt_cost = 0;
- if (bsize_at_least_8x8) {
- pt_cost = partition_cost[PARTITION_NONE] < INT_MAX
- ? partition_cost[PARTITION_NONE]
- : 0;
- }
- int64_t partition_rd_cost = RDCOST(x->rdmult, pt_cost, 0);
- int64_t best_remain_rdcost = (best_rdc.rdcost == INT64_MAX)
- ? INT64_MAX
- : (best_rdc.rdcost - partition_rd_cost);
- rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc,
- PARTITION_NONE, bsize, ctx_none, best_remain_rdcost);
- pb_source_variance = x->source_variance;
- if (none_rd) *none_rd = this_rdc.rdcost;
- cur_none_rd = this_rdc.rdcost;
- if (this_rdc.rate != INT_MAX) {
- if (cpi->sf.prune_ref_frame_for_rect_partitions) {
- const int ref_type = av1_ref_frame_type(ctx_none->mic.ref_frame);
- for (int i = 0; i < 4; ++i) {
- ref_frames_used[i] |= (1 << ref_type);
- }
- }
- if (bsize_at_least_8x8) {
- this_rdc.rate += pt_cost;
- this_rdc.rdcost = RDCOST(x->rdmult, this_rdc.rate, this_rdc.dist);
- }
-
- if (this_rdc.rdcost < best_rdc.rdcost) {
- // Adjust dist breakout threshold according to the partition size.
- const int64_t dist_breakout_thr =
- cpi->sf.partition_search_breakout_dist_thr >>
- ((2 * (MAX_SB_SIZE_LOG2 - 2)) -
- (mi_size_wide_log2[bsize] + mi_size_high_log2[bsize]));
- const int rate_breakout_thr =
- cpi->sf.partition_search_breakout_rate_thr *
- num_pels_log2_lookup[bsize];
-
- best_rdc = this_rdc;
- if (bsize_at_least_8x8) pc_tree->partitioning = PARTITION_NONE;
-
- if ((do_square_split || do_rectangular_split) &&
- !x->e_mbd.lossless[xd->mi[0]->segment_id] && ctx_none->skippable) {
- const int use_ml_based_breakout =
- bsize <= cpi->sf.use_square_partition_only_threshold &&
- bsize > BLOCK_4X4 && xd->bd == 8;
- if (use_ml_based_breakout) {
- if (ml_predict_breakout(cpi, bsize, x, &this_rdc,
- pb_source_variance)) {
- do_square_split = 0;
- do_rectangular_split = 0;
- }
- }
-
- // If all y, u, v transform blocks in this partition are skippable,
- // and the dist & rate are within the thresholds, the partition
- // search is terminated for current branch of the partition search
- // tree. The dist & rate thresholds are set to 0 at speed 0 to
- // disable the early termination at that speed.
- if (best_rdc.dist < dist_breakout_thr &&
- best_rdc.rate < rate_breakout_thr) {
- do_square_split = 0;
- do_rectangular_split = 0;
- }
- }
-
-#if CONFIG_FP_MB_STATS
- // Check if every 16x16 first pass block statistics has zero
- // motion and the corresponding first pass residue is small enough.
- // If that is the case, check the difference variance between the
- // current frame and the last frame. If the variance is small enough,
- // stop further splitting in RD optimization
- if (cpi->use_fp_mb_stats && do_square_split &&
- cm->base_qindex > qindex_skip_threshold_lookup[bsize]) {
- int mb_row = mi_row >> 1;
- int mb_col = mi_col >> 1;
- int mb_row_end =
- AOMMIN(mb_row + num_16x16_blocks_high_lookup[bsize], cm->mb_rows);
- int mb_col_end =
- AOMMIN(mb_col + num_16x16_blocks_wide_lookup[bsize], cm->mb_cols);
- int r, c;
-
- int skip = 1;
- for (r = mb_row; r < mb_row_end; r++) {
- for (c = mb_col; c < mb_col_end; c++) {
- const int mb_index = r * cm->mb_cols + c;
- if (!(cpi->twopass.this_frame_mb_stats[mb_index] &
- FPMB_MOTION_ZERO_MASK) ||
- !(cpi->twopass.this_frame_mb_stats[mb_index] &
- FPMB_ERROR_SMALL_MASK)) {
- skip = 0;
- break;
- }
- }
- if (skip == 0) {
- break;
- }
- }
- if (skip) {
- if (src_diff_var == UINT_MAX) {
- set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
- src_diff_var = get_sby_perpixel_diff_variance(
- cpi, &x->plane[0].src, mi_row, mi_col, bsize);
- }
- if (src_diff_var < 8) {
- do_square_split = 0;
- do_rectangular_split = 0;
- }
- }
- }
-#endif
- }
- }
-
- restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
- }
-
- // store estimated motion vector
- if (cpi->sf.adaptive_motion_search) store_pred_mv(x, ctx_none);
-
- // PARTITION_SPLIT
- if (do_square_split) {
- av1_init_rd_stats(&sum_rdc);
- subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
- sum_rdc.rate = partition_cost[PARTITION_SPLIT];
- sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
-
- int idx;
- for (idx = 0; idx < 4 && sum_rdc.rdcost < best_rdc.rdcost; ++idx) {
- const int x_idx = (idx & 1) * mi_step;
- const int y_idx = (idx >> 1) * mi_step;
-
- if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
- continue;
-
- if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
-
- pc_tree->split[idx]->index = idx;
- int64_t *p_split_rd = &split_rd[idx];
- int64_t best_remain_rdcost = best_rdc.rdcost == INT64_MAX
- ? INT64_MAX
- : (best_rdc.rdcost - sum_rdc.rdcost);
- if (cpi->sf.prune_ref_frame_for_rect_partitions)
- pc_tree->split[idx]->none.rate = INT_MAX;
- rd_pick_partition(cpi, td, tile_data, tp, mi_row + y_idx, mi_col + x_idx,
- subsize, &this_rdc, best_remain_rdcost,
- pc_tree->split[idx], p_split_rd);
-
- if (this_rdc.rate == INT_MAX) {
- sum_rdc.rdcost = INT64_MAX;
- break;
- } else {
- sum_rdc.rate += this_rdc.rate;
- sum_rdc.dist += this_rdc.dist;
- sum_rdc.rdcost += this_rdc.rdcost;
- if (cpi->sf.prune_ref_frame_for_rect_partitions &&
- pc_tree->split[idx]->none.rate != INT_MAX) {
- const int ref_type =
- av1_ref_frame_type(pc_tree->split[idx]->none.mic.ref_frame);
- ref_frames_used[idx] |= (1 << ref_type);
- }
- if (idx <= 1 && (bsize <= BLOCK_8X8 ||
- pc_tree->split[idx]->partitioning == PARTITION_NONE)) {
- const MB_MODE_INFO *const mbmi = &pc_tree->split[idx]->none.mic;
- const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
- // Neither palette mode nor cfl predicted
- if (pmi->palette_size[0] == 0 && pmi->palette_size[1] == 0) {
- if (mbmi->uv_mode != UV_CFL_PRED) split_ctx_is_ready[idx] = 1;
- }
- }
- }
- }
- const int reached_last_index = (idx == 4);
-
- if (reached_last_index && sum_rdc.rdcost < best_rdc.rdcost) {
- sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
-
- if (sum_rdc.rdcost < best_rdc.rdcost) {
- best_rdc = sum_rdc;
- pc_tree->partitioning = PARTITION_SPLIT;
- }
- } else if (cpi->sf.less_rectangular_check_level > 0) {
- // skip rectangular partition test when larger block size
- // gives better rd cost
- if (cpi->sf.less_rectangular_check_level == 2 || idx <= 2)
- do_rectangular_split &= !partition_none_allowed;
- }
-
- restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
- } // if (do_split)
-
- pc_tree->horizontal[0].skip_ref_frame_mask = 0;
- pc_tree->horizontal[1].skip_ref_frame_mask = 0;
- pc_tree->vertical[0].skip_ref_frame_mask = 0;
- pc_tree->vertical[1].skip_ref_frame_mask = 0;
- if (cpi->sf.prune_ref_frame_for_rect_partitions) {
- int used_frames;
- used_frames = ref_frames_used[0] | ref_frames_used[1];
- if (used_frames) pc_tree->horizontal[0].skip_ref_frame_mask = ~used_frames;
- used_frames = ref_frames_used[2] | ref_frames_used[3];
- if (used_frames) pc_tree->horizontal[1].skip_ref_frame_mask = ~used_frames;
- used_frames = ref_frames_used[0] | ref_frames_used[2];
- if (used_frames) pc_tree->vertical[0].skip_ref_frame_mask = ~used_frames;
- used_frames = ref_frames_used[1] | ref_frames_used[3];
- if (used_frames) pc_tree->vertical[1].skip_ref_frame_mask = ~used_frames;
- }
-
- int prune_horz = 0;
- int prune_vert = 0;
- if (cpi->sf.ml_prune_rect_partition && !frame_is_intra_only(cm) &&
- (partition_horz_allowed || partition_vert_allowed)) {
- av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes);
- ml_prune_rect_partition(cpi, x, bsize, best_rdc.rdcost, cur_none_rd,
- split_rd, &prune_horz, &prune_vert);
- }
-
- // PARTITION_HORZ
- if (partition_horz_allowed && !prune_horz &&
- (do_rectangular_split || active_h_edge(cpi, mi_row, mi_step))) {
- av1_init_rd_stats(&sum_rdc);
- subsize = get_partition_subsize(bsize, PARTITION_HORZ);
- if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
- if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
- partition_none_allowed) {
- pc_tree->horizontal[0].pred_interp_filter =
- av1_extract_interp_filter(ctx_none->mic.interp_filters, 0);
- }
- int64_t best_remain_rdcost = best_rdc.rdcost == INT64_MAX
- ? INT64_MAX
- : (best_rdc.rdcost - sum_rdc.rdcost);
- sum_rdc.rate = partition_cost[PARTITION_HORZ];
- sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
- rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc,
- PARTITION_HORZ, subsize, &pc_tree->horizontal[0],
- best_remain_rdcost);
-
- if (this_rdc.rate == INT_MAX) {
- sum_rdc.rdcost = INT64_MAX;
- } else {
- sum_rdc.rate += this_rdc.rate;
- sum_rdc.dist += this_rdc.dist;
- sum_rdc.rdcost += this_rdc.rdcost;
- }
- horz_rd[0] = this_rdc.rdcost;
-
- if (sum_rdc.rdcost < best_rdc.rdcost && has_rows) {
- const PICK_MODE_CONTEXT *const ctx_h = &pc_tree->horizontal[0];
- const MB_MODE_INFO *const mbmi = &pc_tree->horizontal[0].mic;
- const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
- // Neither palette mode nor cfl predicted
- if (pmi->palette_size[0] == 0 && pmi->palette_size[1] == 0) {
- if (mbmi->uv_mode != UV_CFL_PRED) horz_ctx_is_ready = 1;
- }
- update_state(cpi, tile_data, td, ctx_h, mi_row, mi_col, subsize, 1);
- encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, mi_row, mi_col,
- subsize, NULL);
-
- if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_h);
-
- if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
- partition_none_allowed) {
- pc_tree->horizontal[1].pred_interp_filter =
- av1_extract_interp_filter(ctx_h->mic.interp_filters, 0);
- }
- rd_pick_sb_modes(cpi, tile_data, x, mi_row + mi_step, mi_col, &this_rdc,
- PARTITION_HORZ, subsize, &pc_tree->horizontal[1],
- best_rdc.rdcost - sum_rdc.rdcost);
- horz_rd[1] = this_rdc.rdcost;
-
- if (this_rdc.rate == INT_MAX) {
- sum_rdc.rdcost = INT64_MAX;
- } else {
- sum_rdc.rate += this_rdc.rate;
- sum_rdc.dist += this_rdc.dist;
- sum_rdc.rdcost += this_rdc.rdcost;
- }
- }
-
- if (sum_rdc.rdcost < best_rdc.rdcost) {
- sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
- if (sum_rdc.rdcost < best_rdc.rdcost) {
- best_rdc = sum_rdc;
- pc_tree->partitioning = PARTITION_HORZ;
- }
- }
-
- restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
- }
-
- // PARTITION_VERT
- if (partition_vert_allowed && !prune_vert &&
- (do_rectangular_split || active_v_edge(cpi, mi_col, mi_step))) {
- av1_init_rd_stats(&sum_rdc);
- subsize = get_partition_subsize(bsize, PARTITION_VERT);
-
- if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
-
- if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
- partition_none_allowed) {
- pc_tree->vertical[0].pred_interp_filter =
- av1_extract_interp_filter(ctx_none->mic.interp_filters, 0);
- }
- sum_rdc.rate = partition_cost[PARTITION_VERT];
- sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
- int64_t best_remain_rdcost = best_rdc.rdcost == INT64_MAX
- ? INT64_MAX
- : (best_rdc.rdcost - sum_rdc.rdcost);
- rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc,
- PARTITION_VERT, subsize, &pc_tree->vertical[0],
- best_remain_rdcost);
-
- if (this_rdc.rate == INT_MAX) {
- sum_rdc.rdcost = INT64_MAX;
- } else {
- sum_rdc.rate += this_rdc.rate;
- sum_rdc.dist += this_rdc.dist;
- sum_rdc.rdcost += this_rdc.rdcost;
- }
- vert_rd[0] = this_rdc.rdcost;
- const int64_t vert_max_rdcost = best_rdc.rdcost;
- if (sum_rdc.rdcost < vert_max_rdcost && has_cols) {
- const MB_MODE_INFO *const mbmi = &pc_tree->vertical[0].mic;
- const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
- // Neither palette mode nor cfl predicted
- if (pmi->palette_size[0] == 0 && pmi->palette_size[1] == 0) {
- if (mbmi->uv_mode != UV_CFL_PRED) vert_ctx_is_ready = 1;
- }
- update_state(cpi, tile_data, td, &pc_tree->vertical[0], mi_row, mi_col,
- subsize, 1);
- encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, mi_row, mi_col,
- subsize, NULL);
-
- if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
-
- if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
- partition_none_allowed) {
- pc_tree->vertical[1].pred_interp_filter =
- av1_extract_interp_filter(ctx_none->mic.interp_filters, 0);
- }
- rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + mi_step, &this_rdc,
- PARTITION_VERT, subsize, &pc_tree->vertical[1],
- best_rdc.rdcost - sum_rdc.rdcost);
- vert_rd[1] = this_rdc.rdcost;
-
- if (this_rdc.rate == INT_MAX) {
- sum_rdc.rdcost = INT64_MAX;
- } else {
- sum_rdc.rate += this_rdc.rate;
- sum_rdc.dist += this_rdc.dist;
- sum_rdc.rdcost += this_rdc.rdcost;
- }
- }
-
- if (sum_rdc.rdcost < best_rdc.rdcost) {
- sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
- if (sum_rdc.rdcost < best_rdc.rdcost) {
- best_rdc = sum_rdc;
- pc_tree->partitioning = PARTITION_VERT;
- }
- }
-
- restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
- }
-
- if (pb_source_variance == UINT_MAX) {
- av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes);
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- pb_source_variance = av1_high_get_sby_perpixel_variance(
- cpi, &x->plane[0].src, bsize, xd->bd);
- } else {
- pb_source_variance =
- av1_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
- }
- }
-
- const int ext_partition_allowed =
- do_rectangular_split && bsize > BLOCK_8X8 && partition_none_allowed;
-
- // The standard AB partitions are allowed whenever ext-partition-types are
- // allowed
- int horzab_partition_allowed = ext_partition_allowed;
- int vertab_partition_allowed = ext_partition_allowed;
-
-#if CONFIG_DIST_8X8
- if (x->using_dist_8x8) {
- if (block_size_high[bsize] <= 8 || block_size_wide[bsize] <= 8) {
- horzab_partition_allowed = 0;
- vertab_partition_allowed = 0;
- }
- }
-#endif
-
- if (cpi->sf.prune_ext_partition_types_search_level) {
- if (cpi->sf.prune_ext_partition_types_search_level == 1) {
- // TODO(debargha,huisu@google.com): may need to tune the threshold for
- // pb_source_variance.
- horzab_partition_allowed &= (pc_tree->partitioning == PARTITION_HORZ ||
- (pc_tree->partitioning == PARTITION_NONE &&
- pb_source_variance < 32) ||
- pc_tree->partitioning == PARTITION_SPLIT);
- vertab_partition_allowed &= (pc_tree->partitioning == PARTITION_VERT ||
- (pc_tree->partitioning == PARTITION_NONE &&
- pb_source_variance < 32) ||
- pc_tree->partitioning == PARTITION_SPLIT);
- } else {
- horzab_partition_allowed &= (pc_tree->partitioning == PARTITION_HORZ ||
- pc_tree->partitioning == PARTITION_SPLIT);
- vertab_partition_allowed &= (pc_tree->partitioning == PARTITION_VERT ||
- pc_tree->partitioning == PARTITION_SPLIT);
- }
- horz_rd[0] = (horz_rd[0] < INT64_MAX ? horz_rd[0] : 0);
- horz_rd[1] = (horz_rd[1] < INT64_MAX ? horz_rd[1] : 0);
- vert_rd[0] = (vert_rd[0] < INT64_MAX ? vert_rd[0] : 0);
- vert_rd[1] = (vert_rd[1] < INT64_MAX ? vert_rd[1] : 0);
- split_rd[0] = (split_rd[0] < INT64_MAX ? split_rd[0] : 0);
- split_rd[1] = (split_rd[1] < INT64_MAX ? split_rd[1] : 0);
- split_rd[2] = (split_rd[2] < INT64_MAX ? split_rd[2] : 0);
- split_rd[3] = (split_rd[3] < INT64_MAX ? split_rd[3] : 0);
- }
- int horza_partition_allowed = horzab_partition_allowed;
- int horzb_partition_allowed = horzab_partition_allowed;
- if (cpi->sf.prune_ext_partition_types_search_level) {
- const int64_t horz_a_rd = horz_rd[1] + split_rd[0] + split_rd[1];
- const int64_t horz_b_rd = horz_rd[0] + split_rd[2] + split_rd[3];
- switch (cpi->sf.prune_ext_partition_types_search_level) {
- case 1:
- horza_partition_allowed &= (horz_a_rd / 16 * 14 < best_rdc.rdcost);
- horzb_partition_allowed &= (horz_b_rd / 16 * 14 < best_rdc.rdcost);
- break;
- case 2:
- default:
- horza_partition_allowed &= (horz_a_rd / 16 * 15 < best_rdc.rdcost);
- horzb_partition_allowed &= (horz_b_rd / 16 * 15 < best_rdc.rdcost);
- break;
- }
- }
-
- int verta_partition_allowed = vertab_partition_allowed;
- int vertb_partition_allowed = vertab_partition_allowed;
- if (cpi->sf.prune_ext_partition_types_search_level) {
- const int64_t vert_a_rd = vert_rd[1] + split_rd[0] + split_rd[2];
- const int64_t vert_b_rd = vert_rd[0] + split_rd[1] + split_rd[3];
- switch (cpi->sf.prune_ext_partition_types_search_level) {
- case 1:
- verta_partition_allowed &= (vert_a_rd / 16 * 14 < best_rdc.rdcost);
- vertb_partition_allowed &= (vert_b_rd / 16 * 14 < best_rdc.rdcost);
- break;
- case 2:
- default:
- verta_partition_allowed &= (vert_a_rd / 16 * 15 < best_rdc.rdcost);
- vertb_partition_allowed &= (vert_b_rd / 16 * 15 < best_rdc.rdcost);
- break;
- }
- }
-
- if (cpi->sf.ml_prune_ab_partition && ext_partition_allowed &&
- partition_horz_allowed && partition_vert_allowed) {
- // TODO(huisu@google.com): x->source_variance may not be the current block's
- // variance. The correct one to use is pb_source_variance.
- // Need to re-train the model to fix it.
- ml_prune_ab_partition(bsize, pc_tree->partitioning,
- get_unsigned_bits(x->source_variance),
- best_rdc.rdcost, horz_rd, vert_rd, split_rd,
- &horza_partition_allowed, &horzb_partition_allowed,
- &verta_partition_allowed, &vertb_partition_allowed);
- }
-
- // PARTITION_HORZ_A
- if (partition_horz_allowed && horza_partition_allowed) {
- subsize = get_partition_subsize(bsize, PARTITION_HORZ_A);
- pc_tree->horizontala[0].rd_mode_is_ready = 0;
- pc_tree->horizontala[1].rd_mode_is_ready = 0;
- pc_tree->horizontala[2].rd_mode_is_ready = 0;
- if (split_ctx_is_ready[0]) {
- av1_copy_tree_context(&pc_tree->horizontala[0], &pc_tree->split[0]->none);
- pc_tree->horizontala[0].mic.partition = PARTITION_HORZ_A;
- pc_tree->horizontala[0].rd_mode_is_ready = 1;
- if (split_ctx_is_ready[1]) {
- av1_copy_tree_context(&pc_tree->horizontala[1],
- &pc_tree->split[1]->none);
- pc_tree->horizontala[1].mic.partition = PARTITION_HORZ_A;
- pc_tree->horizontala[1].rd_mode_is_ready = 1;
- }
- }
- pc_tree->horizontala[0].skip_ref_frame_mask = 0;
- pc_tree->horizontala[1].skip_ref_frame_mask = 0;
- pc_tree->horizontala[2].skip_ref_frame_mask = 0;
- if (cpi->sf.prune_ref_frame_for_rect_partitions) {
- int used_frames;
- used_frames = ref_frames_used[0];
- if (used_frames)
- pc_tree->horizontala[0].skip_ref_frame_mask = ~used_frames;
- used_frames = ref_frames_used[1];
- if (used_frames)
- pc_tree->horizontala[1].skip_ref_frame_mask = ~used_frames;
- used_frames = ref_frames_used[2] | ref_frames_used[3];
- if (used_frames)
- pc_tree->horizontala[2].skip_ref_frame_mask = ~used_frames;
- }
- rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
- pc_tree->horizontala, ctx_none, mi_row, mi_col, bsize,
- PARTITION_HORZ_A, mi_row, mi_col, bsize2, mi_row,
- mi_col + mi_step, bsize2, mi_row + mi_step, mi_col,
- subsize);
- restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
- }
- // PARTITION_HORZ_B
- if (partition_horz_allowed && horzb_partition_allowed) {
- subsize = get_partition_subsize(bsize, PARTITION_HORZ_B);
- pc_tree->horizontalb[0].rd_mode_is_ready = 0;
- pc_tree->horizontalb[1].rd_mode_is_ready = 0;
- pc_tree->horizontalb[2].rd_mode_is_ready = 0;
- if (horz_ctx_is_ready) {
- av1_copy_tree_context(&pc_tree->horizontalb[0], &pc_tree->horizontal[0]);
- pc_tree->horizontalb[0].mic.partition = PARTITION_HORZ_B;
- pc_tree->horizontalb[0].rd_mode_is_ready = 1;
- }
- pc_tree->horizontalb[0].skip_ref_frame_mask = 0;
- pc_tree->horizontalb[1].skip_ref_frame_mask = 0;
- pc_tree->horizontalb[2].skip_ref_frame_mask = 0;
- if (cpi->sf.prune_ref_frame_for_rect_partitions) {
- int used_frames;
- used_frames = ref_frames_used[0] | ref_frames_used[1];
- if (used_frames)
- pc_tree->horizontalb[0].skip_ref_frame_mask = ~used_frames;
- used_frames = ref_frames_used[2];
- if (used_frames)
- pc_tree->horizontalb[1].skip_ref_frame_mask = ~used_frames;
- used_frames = ref_frames_used[3];
- if (used_frames)
- pc_tree->horizontalb[2].skip_ref_frame_mask = ~used_frames;
- }
- rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
- pc_tree->horizontalb, ctx_none, mi_row, mi_col, bsize,
- PARTITION_HORZ_B, mi_row, mi_col, subsize,
- mi_row + mi_step, mi_col, bsize2, mi_row + mi_step,
- mi_col + mi_step, bsize2);
- restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
- }
-
- // PARTITION_VERT_A
- if (partition_vert_allowed && verta_partition_allowed) {
- subsize = get_partition_subsize(bsize, PARTITION_VERT_A);
- pc_tree->verticala[0].rd_mode_is_ready = 0;
- pc_tree->verticala[1].rd_mode_is_ready = 0;
- pc_tree->verticala[2].rd_mode_is_ready = 0;
- if (split_ctx_is_ready[0]) {
- av1_copy_tree_context(&pc_tree->verticala[0], &pc_tree->split[0]->none);
- pc_tree->verticala[0].mic.partition = PARTITION_VERT_A;
- pc_tree->verticala[0].rd_mode_is_ready = 1;
- }
- pc_tree->verticala[0].skip_ref_frame_mask = 0;
- pc_tree->verticala[1].skip_ref_frame_mask = 0;
- pc_tree->verticala[2].skip_ref_frame_mask = 0;
- if (cpi->sf.prune_ref_frame_for_rect_partitions) {
- int used_frames;
- used_frames = ref_frames_used[0];
- if (used_frames) pc_tree->verticala[0].skip_ref_frame_mask = ~used_frames;
- used_frames = ref_frames_used[2];
- if (used_frames) pc_tree->verticala[1].skip_ref_frame_mask = ~used_frames;
- used_frames = ref_frames_used[1] | ref_frames_used[3];
- if (used_frames) pc_tree->verticala[2].skip_ref_frame_mask = ~used_frames;
- }
- rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
- pc_tree->verticala, ctx_none, mi_row, mi_col, bsize,
- PARTITION_VERT_A, mi_row, mi_col, bsize2,
- mi_row + mi_step, mi_col, bsize2, mi_row,
- mi_col + mi_step, subsize);
- restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
- }
- // PARTITION_VERT_B
- if (partition_vert_allowed && vertb_partition_allowed) {
- subsize = get_partition_subsize(bsize, PARTITION_VERT_B);
- pc_tree->verticalb[0].rd_mode_is_ready = 0;
- pc_tree->verticalb[1].rd_mode_is_ready = 0;
- pc_tree->verticalb[2].rd_mode_is_ready = 0;
- if (vert_ctx_is_ready) {
- av1_copy_tree_context(&pc_tree->verticalb[0], &pc_tree->vertical[0]);
- pc_tree->verticalb[0].mic.partition = PARTITION_VERT_B;
- pc_tree->verticalb[0].rd_mode_is_ready = 1;
- }
- pc_tree->verticalb[0].skip_ref_frame_mask = 0;
- pc_tree->verticalb[1].skip_ref_frame_mask = 0;
- pc_tree->verticalb[2].skip_ref_frame_mask = 0;
- if (cpi->sf.prune_ref_frame_for_rect_partitions) {
- int used_frames;
- used_frames = ref_frames_used[0] | ref_frames_used[2];
- if (used_frames) pc_tree->verticalb[0].skip_ref_frame_mask = ~used_frames;
- used_frames = ref_frames_used[1];
- if (used_frames) pc_tree->verticalb[1].skip_ref_frame_mask = ~used_frames;
- used_frames = ref_frames_used[3];
- if (used_frames) pc_tree->verticalb[2].skip_ref_frame_mask = ~used_frames;
- }
- rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
- pc_tree->verticalb, ctx_none, mi_row, mi_col, bsize,
- PARTITION_VERT_B, mi_row, mi_col, subsize, mi_row,
- mi_col + mi_step, bsize2, mi_row + mi_step,
- mi_col + mi_step, bsize2);
- restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
- }
-
- // partition4_allowed is 1 if we can use a PARTITION_HORZ_4 or
- // PARTITION_VERT_4 for this block. This is almost the same as
- // ext_partition_allowed, except that we don't allow 128x32 or 32x128 blocks,
- // so we require that bsize is not BLOCK_128X128.
- const int partition4_allowed =
- ext_partition_allowed && bsize != BLOCK_128X128;
- int partition_horz4_allowed = partition4_allowed && partition_horz_allowed;
- int partition_vert4_allowed = partition4_allowed && partition_vert_allowed;
- if (cpi->sf.prune_ext_partition_types_search_level == 2) {
- partition_horz4_allowed &= (pc_tree->partitioning == PARTITION_HORZ ||
- pc_tree->partitioning == PARTITION_HORZ_A ||
- pc_tree->partitioning == PARTITION_HORZ_B ||
- pc_tree->partitioning == PARTITION_SPLIT ||
- pc_tree->partitioning == PARTITION_NONE);
- partition_vert4_allowed &= (pc_tree->partitioning == PARTITION_VERT ||
- pc_tree->partitioning == PARTITION_VERT_A ||
- pc_tree->partitioning == PARTITION_VERT_B ||
- pc_tree->partitioning == PARTITION_SPLIT ||
- pc_tree->partitioning == PARTITION_NONE);
- }
- if (cpi->sf.ml_prune_4_partition && partition4_allowed &&
- partition_horz_allowed && partition_vert_allowed) {
- ml_prune_4_partition(cpi, x, bsize, pc_tree->partitioning, best_rdc.rdcost,
- horz_rd, vert_rd, split_rd, &partition_horz4_allowed,
- &partition_vert4_allowed, pb_source_variance, mi_row,
- mi_col);
- }
-
-#if CONFIG_DIST_8X8
- if (x->using_dist_8x8) {
- if (block_size_high[bsize] <= 16 || block_size_wide[bsize] <= 16) {
- partition_horz4_allowed = 0;
- partition_vert4_allowed = 0;
- }
- }
-#endif
-
- // PARTITION_HORZ_4
- if (partition_horz4_allowed && has_rows &&
- (do_rectangular_split || active_h_edge(cpi, mi_row, mi_step))) {
- av1_init_rd_stats(&sum_rdc);
- const int quarter_step = mi_size_high[bsize] / 4;
- PICK_MODE_CONTEXT *ctx_prev = ctx_none;
-
- subsize = get_partition_subsize(bsize, PARTITION_HORZ_4);
- sum_rdc.rate = partition_cost[PARTITION_HORZ_4];
- sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
-
- for (int i = 0; i < 4; ++i) {
- const int this_mi_row = mi_row + i * quarter_step;
-
- if (i > 0 && this_mi_row >= cm->mi_rows) break;
-
- PICK_MODE_CONTEXT *ctx_this = &pc_tree->horizontal4[i];
-
- ctx_this->rd_mode_is_ready = 0;
- ctx_this->skip_ref_frame_mask = 0;
- if (cpi->sf.prune_ref_frame_for_rect_partitions) {
- const int used_frames = i <= 1
- ? (ref_frames_used[0] | ref_frames_used[1])
- : (ref_frames_used[2] | ref_frames_used[3]);
- if (used_frames) ctx_this->skip_ref_frame_mask = ~used_frames;
- }
- if (!rd_try_subblock(cpi, td, tile_data, tp, (i == 3), this_mi_row,
- mi_col, subsize, &best_rdc, &sum_rdc, &this_rdc,
- PARTITION_HORZ_4, ctx_prev, ctx_this))
- break;
-
- ctx_prev = ctx_this;
- }
-
- if (sum_rdc.rdcost < best_rdc.rdcost) {
- sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
- if (sum_rdc.rdcost < best_rdc.rdcost) {
- best_rdc = sum_rdc;
- pc_tree->partitioning = PARTITION_HORZ_4;
- }
- }
- restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
- }
-
- // PARTITION_VERT_4
- if (partition_vert4_allowed && has_cols &&
- (do_rectangular_split || active_v_edge(cpi, mi_row, mi_step))) {
- av1_init_rd_stats(&sum_rdc);
- const int quarter_step = mi_size_wide[bsize] / 4;
- PICK_MODE_CONTEXT *ctx_prev = ctx_none;
-
- subsize = get_partition_subsize(bsize, PARTITION_VERT_4);
- sum_rdc.rate = partition_cost[PARTITION_VERT_4];
- sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
-
- for (int i = 0; i < 4; ++i) {
- const int this_mi_col = mi_col + i * quarter_step;
-
- if (i > 0 && this_mi_col >= cm->mi_cols) break;
-
- PICK_MODE_CONTEXT *ctx_this = &pc_tree->vertical4[i];
-
- ctx_this->rd_mode_is_ready = 0;
- ctx_this->skip_ref_frame_mask = 0;
- if (cpi->sf.prune_ref_frame_for_rect_partitions) {
- const int used_frames = i <= 1
- ? (ref_frames_used[0] | ref_frames_used[2])
- : (ref_frames_used[1] | ref_frames_used[3]);
- if (used_frames) ctx_this->skip_ref_frame_mask = ~used_frames;
- }
- if (!rd_try_subblock(cpi, td, tile_data, tp, (i == 3), mi_row,
- this_mi_col, subsize, &best_rdc, &sum_rdc, &this_rdc,
- PARTITION_VERT_4, ctx_prev, ctx_this))
- break;
-
- ctx_prev = ctx_this;
- }
-
- if (sum_rdc.rdcost < best_rdc.rdcost) {
- sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
- if (sum_rdc.rdcost < best_rdc.rdcost) {
- best_rdc = sum_rdc;
- pc_tree->partitioning = PARTITION_VERT_4;
- }
- }
- restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
- }
-
- if (bsize == cm->seq_params.sb_size && best_rdc.rate == INT_MAX) {
- // Did not find a valid partition, go back and search again, with less
- // constraint on which partition types to search.
- x->must_find_valid_partition = 1;
- goto BEGIN_PARTITION_SEARCH;
- }
-
- // TODO(jbb): This code added so that we avoid static analysis
- // warning related to the fact that best_rd isn't used after this
- // point. This code should be refactored so that the duplicate
- // checks occur in some sub function and thus are used...
- (void)best_rd;
- *rd_cost = best_rdc;
-
- if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX &&
- pc_tree->index != 3) {
- if (bsize == cm->seq_params.sb_size) {
- x->cb_offset = 0;
- encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, OUTPUT_ENABLED, bsize,
- pc_tree, NULL);
- } else {
- encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize,
- pc_tree, NULL);
- }
- }
-
- if (bsize == cm->seq_params.sb_size) {
- assert(best_rdc.rate < INT_MAX);
- assert(best_rdc.dist < INT64_MAX);
- } else {
- assert(tp_orig == *tp);
- }
-}
-
-// Set all the counters as max.
-static void init_first_partition_pass_stats_tables(
- FIRST_PARTITION_PASS_STATS *stats) {
- for (int i = 0; i < FIRST_PARTITION_PASS_STATS_TABLES; ++i) {
- memset(stats[i].ref0_counts, 0xff, sizeof(stats[i].ref0_counts));
- memset(stats[i].ref1_counts, 0xff, sizeof(stats[i].ref1_counts));
- stats[i].sample_counts = INT_MAX;
- }
-}
-
-// clear pc_tree_stats
-static INLINE void clear_pc_tree_stats(PC_TREE *pt) {
- if (pt == NULL) return;
- pt->pc_tree_stats.valid = 0;
- for (int i = 0; i < 4; ++i) {
- clear_pc_tree_stats(pt->split[i]);
- }
-}
-
-// Minimum number of samples to trigger the
-// mode_pruning_based_on_two_pass_partition_search feature.
-#define FIRST_PARTITION_PASS_MIN_SAMPLES 16
-
-static void encode_rd_sb_row(AV1_COMP *cpi, ThreadData *td,
- TileDataEnc *tile_data, int mi_row,
- TOKENEXTRA **tp) {
- AV1_COMMON *const cm = &cpi->common;
- const int num_planes = av1_num_planes(cm);
- const TileInfo *const tile_info = &tile_data->tile_info;
- MACROBLOCK *const x = &td->mb;
- MACROBLOCKD *const xd = &x->e_mbd;
- SPEED_FEATURES *const sf = &cpi->sf;
- const int leaf_nodes = 256;
-
- // Initialize the left context for the new SB row
- av1_zero_left_context(xd);
-
- // Reset delta for every tile
- if (mi_row == tile_info->mi_row_start) {
- if (cm->delta_q_present_flag) xd->current_qindex = cm->base_qindex;
- if (cm->delta_lf_present_flag) {
- av1_reset_loop_filter_delta(xd, av1_num_planes(cm));
- }
- }
-
- PC_TREE *const pc_root =
- td->pc_root[cm->seq_params.mib_size_log2 - MIN_MIB_SIZE_LOG2];
- // Code each SB in the row
- for (int mi_col = tile_info->mi_col_start; mi_col < tile_info->mi_col_end;
- mi_col += cm->seq_params.mib_size) {
- av1_fill_coeff_costs(&td->mb, xd->tile_ctx, num_planes);
- av1_fill_mode_rates(cm, x, xd->tile_ctx);
-
- if (sf->adaptive_pred_interp_filter) {
- for (int i = 0; i < leaf_nodes; ++i) {
- td->pc_tree[i].vertical[0].pred_interp_filter = SWITCHABLE;
- td->pc_tree[i].vertical[1].pred_interp_filter = SWITCHABLE;
- td->pc_tree[i].horizontal[0].pred_interp_filter = SWITCHABLE;
- td->pc_tree[i].horizontal[1].pred_interp_filter = SWITCHABLE;
- }
- }
-
- x->mb_rd_record.num = x->mb_rd_record.index_start = 0;
-
- av1_zero(x->txb_rd_record_8X8);
- av1_zero(x->txb_rd_record_16X16);
- av1_zero(x->txb_rd_record_32X32);
- av1_zero(x->txb_rd_record_64X64);
- av1_zero(x->txb_rd_record_intra);
-
- av1_zero(x->pred_mv);
- pc_root->index = 0;
-
- const struct segmentation *const seg = &cm->seg;
- int seg_skip = 0;
- if (seg->enabled) {
- const uint8_t *const map =
- seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
- const int segment_id =
- map ? get_segment_id(cm, map, cm->seq_params.sb_size, mi_row, mi_col)
- : 0;
- seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP);
- }
- xd->cur_frame_force_integer_mv = cm->cur_frame_force_integer_mv;
-
- x->sb_energy_level = 0;
- if (cm->delta_q_present_flag) {
- // Delta-q modulation based on variance
- av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes);
-
- int offset_qindex;
- if (DELTAQ_MODULATION == 1) {
- const int block_wavelet_energy_level =
- av1_block_wavelet_energy_level(cpi, x, cm->seq_params.sb_size);
- x->sb_energy_level = block_wavelet_energy_level;
- offset_qindex = av1_compute_deltaq_from_energy_level(
- cpi, block_wavelet_energy_level);
- } else {
- const int block_var_level =
- av1_log_block_var(cpi, x, cm->seq_params.sb_size);
- x->sb_energy_level = block_var_level;
- offset_qindex =
- av1_compute_deltaq_from_energy_level(cpi, block_var_level);
- }
- const int qmask = ~(cm->delta_q_res - 1);
- int current_qindex = clamp(cm->base_qindex + offset_qindex,
- cm->delta_q_res, 256 - cm->delta_q_res);
- current_qindex =
- ((current_qindex - cm->base_qindex + cm->delta_q_res / 2) & qmask) +
- cm->base_qindex;
- assert(current_qindex > 0);
-
- xd->delta_qindex = current_qindex - cm->base_qindex;
- set_offsets(cpi, tile_info, x, mi_row, mi_col, cm->seq_params.sb_size);
- xd->mi[0]->current_qindex = current_qindex;
- av1_init_plane_quantizers(cpi, x, xd->mi[0]->segment_id);
- if (cpi->oxcf.deltaq_mode == DELTA_Q_LF) {
- const int lfmask = ~(cm->delta_lf_res - 1);
- const int delta_lf_from_base =
- ((offset_qindex / 2 + cm->delta_lf_res / 2) & lfmask);
-
- // pre-set the delta lf for loop filter. Note that this value is set
- // before mi is assigned for each block in current superblock
- for (int j = 0;
- j < AOMMIN(cm->seq_params.mib_size, cm->mi_rows - mi_row); j++) {
- for (int k = 0;
- k < AOMMIN(cm->seq_params.mib_size, cm->mi_cols - mi_col); k++) {
- cm->mi[(mi_row + j) * cm->mi_stride + (mi_col + k)]
- .delta_lf_from_base =
- clamp(delta_lf_from_base, -MAX_LOOP_FILTER, MAX_LOOP_FILTER);
- const int frame_lf_count =
- av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
- for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) {
- cm->mi[(mi_row + j) * cm->mi_stride + (mi_col + k)]
- .delta_lf[lf_id] =
- clamp(delta_lf_from_base, -MAX_LOOP_FILTER, MAX_LOOP_FILTER);
- }
- }
- }
- }
- }
-
- int dummy_rate;
- int64_t dummy_dist;
- RD_STATS dummy_rdc;
- const int idx_str = cm->mi_stride * mi_row + mi_col;
- MB_MODE_INFO **mi = cm->mi_grid_visible + idx_str;
- x->source_variance = UINT_MAX;
- if (sf->partition_search_type == FIXED_PARTITION || seg_skip) {
- set_offsets(cpi, tile_info, x, mi_row, mi_col, cm->seq_params.sb_size);
- const BLOCK_SIZE bsize =
- seg_skip ? cm->seq_params.sb_size : sf->always_this_block_size;
- set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
- rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
- cm->seq_params.sb_size, &dummy_rate, &dummy_dist, 1,
- pc_root);
- } else if (cpi->partition_search_skippable_frame) {
- set_offsets(cpi, tile_info, x, mi_row, mi_col, cm->seq_params.sb_size);
- const BLOCK_SIZE bsize =
- get_rd_var_based_fixed_partition(cpi, x, mi_row, mi_col);
- set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
- rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
- cm->seq_params.sb_size, &dummy_rate, &dummy_dist, 1,
- pc_root);
- } else {
- // If required set upper and lower partition size limits
- if (sf->auto_min_max_partition_size) {
- set_offsets(cpi, tile_info, x, mi_row, mi_col, cm->seq_params.sb_size);
- rd_auto_partition_range(cpi, tile_info, xd, mi_row, mi_col,
- &x->min_partition_size, &x->max_partition_size);
- }
-
- reset_partition(pc_root, cm->seq_params.sb_size);
- x->use_cb_search_range = 0;
- init_first_partition_pass_stats_tables(x->first_partition_pass_stats);
- // Do the first pass if we need two pass partition search
- if (cpi->sf.two_pass_partition_search &&
- cpi->sf.use_square_partition_only_threshold > BLOCK_4X4 &&
- mi_row + mi_size_high[cm->seq_params.sb_size] < cm->mi_rows &&
- mi_col + mi_size_wide[cm->seq_params.sb_size] < cm->mi_cols &&
- cm->frame_type != KEY_FRAME) {
- x->cb_partition_scan = 1;
- // Reset the stats tables.
- if (sf->mode_pruning_based_on_two_pass_partition_search)
- av1_zero(x->first_partition_pass_stats);
- clear_pc_tree_stats(pc_root);
- rd_pick_sqr_partition(cpi, td, tile_data, tp, mi_row, mi_col,
- cm->seq_params.sb_size, &dummy_rdc, INT64_MAX,
- pc_root, NULL);
- x->cb_partition_scan = 0;
-
- x->source_variance = UINT_MAX;
- if (sf->adaptive_pred_interp_filter) {
- for (int i = 0; i < leaf_nodes; ++i) {
- td->pc_tree[i].vertical[0].pred_interp_filter = SWITCHABLE;
- td->pc_tree[i].vertical[1].pred_interp_filter = SWITCHABLE;
- td->pc_tree[i].horizontal[0].pred_interp_filter = SWITCHABLE;
- td->pc_tree[i].horizontal[1].pred_interp_filter = SWITCHABLE;
- }
- }
-
- x->mb_rd_record.num = x->mb_rd_record.index_start = 0;
- av1_zero(x->txb_rd_record_8X8);
- av1_zero(x->txb_rd_record_16X16);
- av1_zero(x->txb_rd_record_32X32);
- av1_zero(x->txb_rd_record_64X64);
- av1_zero(x->txb_rd_record_intra);
- av1_zero(x->pred_mv);
- pc_root->index = 0;
-
- for (int idy = 0; idy < mi_size_high[cm->seq_params.sb_size]; ++idy) {
- for (int idx = 0; idx < mi_size_wide[cm->seq_params.sb_size]; ++idx) {
- const int offset = cm->mi_stride * (mi_row + idy) + (mi_col + idx);
- cm->mi_grid_visible[offset] = 0;
- }
- }
-
- x->use_cb_search_range = 1;
-
- if (sf->mode_pruning_based_on_two_pass_partition_search) {
- for (int i = 0; i < FIRST_PARTITION_PASS_STATS_TABLES; ++i) {
- FIRST_PARTITION_PASS_STATS *const stat =
- &x->first_partition_pass_stats[i];
- if (stat->sample_counts < FIRST_PARTITION_PASS_MIN_SAMPLES) {
- // If there are not enough samples collected, make all available.
- memset(stat->ref0_counts, 0xff, sizeof(stat->ref0_counts));
- memset(stat->ref1_counts, 0xff, sizeof(stat->ref1_counts));
- } else if (sf->selective_ref_frame < 2) {
- // ALTREF2_FRAME and BWDREF_FRAME may be skipped during the
- // initial partition scan, so we don't eliminate them.
- stat->ref0_counts[ALTREF2_FRAME] = 0xff;
- stat->ref1_counts[ALTREF2_FRAME] = 0xff;
- stat->ref0_counts[BWDREF_FRAME] = 0xff;
- stat->ref1_counts[BWDREF_FRAME] = 0xff;
- }
- }
- }
- }
-
- rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col,
- cm->seq_params.sb_size, &dummy_rdc, INT64_MAX, pc_root,
- NULL);
- }
-#if CONFIG_COLLECT_INTER_MODE_RD_STATS
- // TODO(angiebird): Let inter_mode_rd_model_estimation support multi-tile.
- if (cpi->sf.inter_mode_rd_model_estimation && cm->tile_cols == 1 &&
- cm->tile_rows == 1) {
- av1_inter_mode_data_fit(tile_data, x->rdmult);
- }
-#endif
- }
-}
-
-static void init_encode_frame_mb_context(AV1_COMP *cpi) {
- AV1_COMMON *const cm = &cpi->common;
- const int num_planes = av1_num_planes(cm);
- MACROBLOCK *const x = &cpi->td.mb;
- MACROBLOCKD *const xd = &x->e_mbd;
-
- // Copy data over into macro block data structures.
- av1_setup_src_planes(x, cpi->source, 0, 0, num_planes);
-
- av1_setup_block_planes(xd, cm->seq_params.subsampling_x,
- cm->seq_params.subsampling_y, num_planes);
-}
-
-static MV_REFERENCE_FRAME get_frame_type(const AV1_COMP *cpi) {
- if (frame_is_intra_only(&cpi->common)) return INTRA_FRAME;
- // We will not update the golden frame with an internal overlay frame
- else if ((cpi->rc.is_src_frame_alt_ref && cpi->refresh_golden_frame) ||
- cpi->rc.is_src_frame_ext_arf)
- return ALTREF_FRAME;
- else if (cpi->refresh_golden_frame || cpi->refresh_alt2_ref_frame ||
- cpi->refresh_alt_ref_frame)
- return GOLDEN_FRAME;
- else
- // TODO(zoeliu): To investigate whether a frame_type other than
- // INTRA/ALTREF/GOLDEN/LAST needs to be specified seperately.
- return LAST_FRAME;
-}
-
-static TX_MODE select_tx_mode(const AV1_COMP *cpi) {
- if (cpi->common.coded_lossless) return ONLY_4X4;
- if (cpi->sf.tx_size_search_method == USE_LARGESTALL)
- return TX_MODE_LARGEST;
- else if (cpi->sf.tx_size_search_method == USE_FULL_RD ||
- cpi->sf.tx_size_search_method == USE_FAST_RD)
- return TX_MODE_SELECT;
- else
- return cpi->common.tx_mode;
-}
-
-void av1_alloc_tile_data(AV1_COMP *cpi) {
- AV1_COMMON *const cm = &cpi->common;
- const int tile_cols = cm->tile_cols;
- const int tile_rows = cm->tile_rows;
- int tile_col, tile_row;
-
- if (cpi->tile_data != NULL) aom_free(cpi->tile_data);
- CHECK_MEM_ERROR(
- cm, cpi->tile_data,
- aom_memalign(32, tile_cols * tile_rows * sizeof(*cpi->tile_data)));
- cpi->allocated_tiles = tile_cols * tile_rows;
-
- for (tile_row = 0; tile_row < tile_rows; ++tile_row)
- for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
- TileDataEnc *const tile_data =
- &cpi->tile_data[tile_row * tile_cols + tile_col];
- int i, j;
- for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
- for (j = 0; j < MAX_MODES; ++j) {
- tile_data->thresh_freq_fact[i][j] = 32;
- tile_data->mode_map[i][j] = j;
- }
- }
- }
-}
-
-void av1_init_tile_data(AV1_COMP *cpi) {
- AV1_COMMON *const cm = &cpi->common;
- const int num_planes = av1_num_planes(cm);
- const int tile_cols = cm->tile_cols;
- const int tile_rows = cm->tile_rows;
- int tile_col, tile_row;
- TOKENEXTRA *pre_tok = cpi->tile_tok[0][0];
- TOKENLIST *tplist = cpi->tplist[0][0];
- unsigned int tile_tok = 0;
- int tplist_count = 0;
-
- for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
- for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
- TileDataEnc *const tile_data =
- &cpi->tile_data[tile_row * tile_cols + tile_col];
- TileInfo *const tile_info = &tile_data->tile_info;
- av1_tile_init(tile_info, cm, tile_row, tile_col);
-
- cpi->tile_tok[tile_row][tile_col] = pre_tok + tile_tok;
- pre_tok = cpi->tile_tok[tile_row][tile_col];
- tile_tok = allocated_tokens(
- *tile_info, cm->seq_params.mib_size_log2 + MI_SIZE_LOG2, num_planes);
- cpi->tplist[tile_row][tile_col] = tplist + tplist_count;
- tplist = cpi->tplist[tile_row][tile_col];
- tplist_count = av1_get_sb_rows_in_tile(cm, tile_data->tile_info);
- tile_data->allow_update_cdf = !cm->large_scale_tile;
- tile_data->allow_update_cdf =
- tile_data->allow_update_cdf && !cm->disable_cdf_update;
- }
- }
-}
-
-void av1_encode_sb_row(AV1_COMP *cpi, ThreadData *td, int tile_row,
- int tile_col, int mi_row) {
- AV1_COMMON *const cm = &cpi->common;
- const int num_planes = av1_num_planes(cm);
- const int tile_cols = cm->tile_cols;
- TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
- const TileInfo *const tile_info = &this_tile->tile_info;
- TOKENEXTRA *tok = NULL;
- int sb_row_in_tile;
- int tile_mb_cols = (tile_info->mi_col_end - tile_info->mi_col_start + 2) >> 2;
-
- int num_mb_rows_in_sb =
- ((1 << (cm->seq_params.mib_size_log2 + MI_SIZE_LOG2)) + 8) >> 4;
-
- sb_row_in_tile =
- (mi_row - tile_info->mi_row_start) >> cm->seq_params.mib_size_log2;
-
- get_start_tok(cpi, tile_row, tile_col, mi_row, &tok,
- cm->seq_params.mib_size_log2 + MI_SIZE_LOG2, num_planes);
- cpi->tplist[tile_row][tile_col][sb_row_in_tile].start = tok;
-
- encode_rd_sb_row(cpi, td, this_tile, mi_row, &tok);
-
- cpi->tplist[tile_row][tile_col][sb_row_in_tile].stop = tok;
- cpi->tplist[tile_row][tile_col][sb_row_in_tile].count =
- (unsigned int)(cpi->tplist[tile_row][tile_col][sb_row_in_tile].stop -
- cpi->tplist[tile_row][tile_col][sb_row_in_tile].start);
-
- assert(
- (unsigned int)(tok -
- cpi->tplist[tile_row][tile_col][sb_row_in_tile].start) <=
- get_token_alloc(num_mb_rows_in_sb, tile_mb_cols,
- cm->seq_params.mib_size_log2 + MI_SIZE_LOG2, num_planes));
-
- (void)tile_mb_cols;
- (void)num_mb_rows_in_sb;
-}
-
-void av1_encode_tile(AV1_COMP *cpi, ThreadData *td, int tile_row,
- int tile_col) {
- AV1_COMMON *const cm = &cpi->common;
- TileDataEnc *const this_tile =
- &cpi->tile_data[tile_row * cm->tile_cols + tile_col];
- const TileInfo *const tile_info = &this_tile->tile_info;
- int mi_row;
-
-#if CONFIG_COLLECT_INTER_MODE_RD_STATS
- av1_inter_mode_data_init(this_tile);
-#endif
-
- av1_zero_above_context(cm, &td->mb.e_mbd, tile_info->mi_col_start,
- tile_info->mi_col_end, tile_row);
- av1_init_above_context(cm, &td->mb.e_mbd, tile_row);
-
- // Set up pointers to per thread motion search counters.
- this_tile->m_search_count = 0; // Count of motion search hits.
- this_tile->ex_search_count = 0; // Exhaustive mesh search hits.
- td->mb.m_search_count_ptr = &this_tile->m_search_count;
- td->mb.ex_search_count_ptr = &this_tile->ex_search_count;
- this_tile->tctx = *cm->fc;
- td->mb.e_mbd.tile_ctx = &this_tile->tctx;
-
- cfl_init(&td->mb.e_mbd.cfl, &cm->seq_params);
-
- av1_crc32c_calculator_init(&td->mb.mb_rd_record.crc_calculator);
-
- td->intrabc_used_this_tile = 0;
-
- for (mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end;
- mi_row += cm->seq_params.mib_size) {
- av1_encode_sb_row(cpi, td, tile_row, tile_col, mi_row);
- }
-}
-
-static void encode_tiles(AV1_COMP *cpi) {
- AV1_COMMON *const cm = &cpi->common;
- const int tile_cols = cm->tile_cols;
- const int tile_rows = cm->tile_rows;
- int tile_col, tile_row;
-
- if (cpi->tile_data == NULL || cpi->allocated_tiles < tile_cols * tile_rows)
- av1_alloc_tile_data(cpi);
-
- av1_init_tile_data(cpi);
-
- for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
- for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
- av1_encode_tile(cpi, &cpi->td, tile_row, tile_col);
- cpi->intrabc_used |= cpi->td.intrabc_used_this_tile;
- }
- }
-}
-
-#if CONFIG_FP_MB_STATS
-static int input_fpmb_stats(FIRSTPASS_MB_STATS *firstpass_mb_stats,
- AV1_COMMON *cm, uint8_t **this_frame_mb_stats) {
- uint8_t *mb_stats_in = firstpass_mb_stats->mb_stats_start +
- cm->current_video_frame * cm->MBs * sizeof(uint8_t);
-
- if (mb_stats_in > firstpass_mb_stats->mb_stats_end) return EOF;
-
- *this_frame_mb_stats = mb_stats_in;
-
- return 1;
-}
-#endif
-
-#define GLOBAL_TRANS_TYPES_ENC 3 // highest motion model to search
-static int gm_get_params_cost(const WarpedMotionParams *gm,
- const WarpedMotionParams *ref_gm, int allow_hp) {
- int params_cost = 0;
- int trans_bits, trans_prec_diff;
- switch (gm->wmtype) {
- case AFFINE:
- case ROTZOOM:
- params_cost += aom_count_signed_primitive_refsubexpfin(
- GM_ALPHA_MAX + 1, SUBEXPFIN_K,
- (ref_gm->wmmat[2] >> GM_ALPHA_PREC_DIFF) - (1 << GM_ALPHA_PREC_BITS),
- (gm->wmmat[2] >> GM_ALPHA_PREC_DIFF) - (1 << GM_ALPHA_PREC_BITS));
- params_cost += aom_count_signed_primitive_refsubexpfin(
- GM_ALPHA_MAX + 1, SUBEXPFIN_K,
- (ref_gm->wmmat[3] >> GM_ALPHA_PREC_DIFF),
- (gm->wmmat[3] >> GM_ALPHA_PREC_DIFF));
- if (gm->wmtype >= AFFINE) {
- params_cost += aom_count_signed_primitive_refsubexpfin(
- GM_ALPHA_MAX + 1, SUBEXPFIN_K,
- (ref_gm->wmmat[4] >> GM_ALPHA_PREC_DIFF),
- (gm->wmmat[4] >> GM_ALPHA_PREC_DIFF));
- params_cost += aom_count_signed_primitive_refsubexpfin(
- GM_ALPHA_MAX + 1, SUBEXPFIN_K,
- (ref_gm->wmmat[5] >> GM_ALPHA_PREC_DIFF) -
- (1 << GM_ALPHA_PREC_BITS),
- (gm->wmmat[5] >> GM_ALPHA_PREC_DIFF) - (1 << GM_ALPHA_PREC_BITS));
- }
- AOM_FALLTHROUGH_INTENDED;
- case TRANSLATION:
- trans_bits = (gm->wmtype == TRANSLATION)
- ? GM_ABS_TRANS_ONLY_BITS - !allow_hp
- : GM_ABS_TRANS_BITS;
- trans_prec_diff = (gm->wmtype == TRANSLATION)
- ? GM_TRANS_ONLY_PREC_DIFF + !allow_hp
- : GM_TRANS_PREC_DIFF;
- params_cost += aom_count_signed_primitive_refsubexpfin(
- (1 << trans_bits) + 1, SUBEXPFIN_K,
- (ref_gm->wmmat[0] >> trans_prec_diff),
- (gm->wmmat[0] >> trans_prec_diff));
- params_cost += aom_count_signed_primitive_refsubexpfin(
- (1 << trans_bits) + 1, SUBEXPFIN_K,
- (ref_gm->wmmat[1] >> trans_prec_diff),
- (gm->wmmat[1] >> trans_prec_diff));
- AOM_FALLTHROUGH_INTENDED;
- case IDENTITY: break;
- default: assert(0);
- }
- return (params_cost << AV1_PROB_COST_SHIFT);
-}
-
-static int do_gm_search_logic(SPEED_FEATURES *const sf, int num_refs_using_gm,
- int frame) {
- (void)num_refs_using_gm;
- (void)frame;
- switch (sf->gm_search_type) {
- case GM_FULL_SEARCH: return 1;
- case GM_REDUCED_REF_SEARCH:
- return !(frame == LAST2_FRAME || frame == LAST3_FRAME);
- case GM_DISABLE_SEARCH: return 0;
- default: assert(0);
- }
- return 1;
-}
-
-// Estimate if the source frame is screen content, based on the portion of
-// blocks that have no more than 4 (experimentally selected) luma colors.
-static int is_screen_content(const uint8_t *src, int use_hbd, int bd,
- int stride, int width, int height) {
- assert(src != NULL);
- int counts = 0;
- const int blk_w = 16;
- const int blk_h = 16;
- const int limit = 4;
- for (int r = 0; r + blk_h <= height; r += blk_h) {
- for (int c = 0; c + blk_w <= width; c += blk_w) {
- int count_buf[1 << 12]; // Maximum (1 << 12) color levels.
- const int n_colors =
- use_hbd ? av1_count_colors_highbd(src + r * stride + c, stride, blk_w,
- blk_h, bd, count_buf)
- : av1_count_colors(src + r * stride + c, stride, blk_w, blk_h,
- count_buf);
- if (n_colors > 1 && n_colors <= limit) counts++;
- }
- }
- // The threshold is 10%.
- return counts * blk_h * blk_w * 10 > width * height;
-}
-
-static const uint8_t ref_frame_flag_list[REF_FRAMES] = { 0,
- AOM_LAST_FLAG,
- AOM_LAST2_FLAG,
- AOM_LAST3_FLAG,
- AOM_GOLD_FLAG,
- AOM_BWD_FLAG,
- AOM_ALT2_FLAG,
- AOM_ALT_FLAG };
-
-// Enforce the number of references for each arbitrary frame limited to
-// (INTER_REFS_PER_FRAME - 1)
-static void enforce_max_ref_frames(AV1_COMP *cpi) {
- AV1_COMMON *const cm = &cpi->common;
- MV_REFERENCE_FRAME ref_frame;
- int total_valid_refs = 0;
- for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
- if (cpi->ref_frame_flags & ref_frame_flag_list[ref_frame])
- total_valid_refs++;
- }
-
- // NOTE(zoeliu): When all the possible reference frames are availble, we
- // reduce the number of reference frames by 1, following the rules of:
- // (1) Retain GOLDEN_FARME/ALTEF_FRAME;
- // (2) Check the earliest 2 remaining reference frames, and remove the one
- // with the lower quality factor, otherwise if both have been coded at
- // the same quality level, remove the earliest reference frame.
-
- if (total_valid_refs == INTER_REFS_PER_FRAME) {
- unsigned int min_ref_offset = UINT_MAX;
- unsigned int second_min_ref_offset = UINT_MAX;
- MV_REFERENCE_FRAME earliest_ref_frames[2] = { LAST3_FRAME, LAST2_FRAME };
- int earliest_buf_idxes[2] = { 0 };
-
- // Locate the earliest two reference frames except GOLDEN/ALTREF.
- for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
- // Retain GOLDEN/ALTERF
- if (ref_frame == GOLDEN_FRAME || ref_frame == ALTREF_FRAME) continue;
-
- const int buf_idx = cm->frame_refs[ref_frame - LAST_FRAME].idx;
- if (buf_idx >= 0) {
- const unsigned int ref_offset =
- cm->buffer_pool->frame_bufs[buf_idx].cur_frame_offset;
-
- if (min_ref_offset == UINT_MAX) {
- min_ref_offset = ref_offset;
- earliest_ref_frames[0] = ref_frame;
- earliest_buf_idxes[0] = buf_idx;
- } else {
- if (get_relative_dist(cm, ref_offset, min_ref_offset) < 0) {
- second_min_ref_offset = min_ref_offset;
- earliest_ref_frames[1] = earliest_ref_frames[0];
- earliest_buf_idxes[1] = earliest_buf_idxes[0];
-
- min_ref_offset = ref_offset;
- earliest_ref_frames[0] = ref_frame;
- earliest_buf_idxes[0] = buf_idx;
- } else if (second_min_ref_offset == UINT_MAX ||
- get_relative_dist(cm, ref_offset, second_min_ref_offset) <
- 0) {
- second_min_ref_offset = ref_offset;
- earliest_ref_frames[1] = ref_frame;
- earliest_buf_idxes[1] = buf_idx;
- }
- }
- }
- }
- // Check the coding quality factors of the two earliest reference frames.
- RATE_FACTOR_LEVEL ref_rf_level[2];
- double ref_rf_deltas[2];
- for (int i = 0; i < 2; ++i) {
- ref_rf_level[i] = cpi->frame_rf_level[earliest_buf_idxes[i]];
- ref_rf_deltas[i] = rate_factor_deltas[ref_rf_level[i]];
- }
- (void)ref_rf_level;
- (void)ref_rf_deltas;
-
-#define USE_RF_LEVEL_TO_ENFORCE 1
-#if USE_RF_LEVEL_TO_ENFORCE
- // If both earliest two reference frames are coded using the same rate-
- // factor, disable the earliest reference frame; Otherwise disable the
- // reference frame that uses a lower rate-factor delta.
- const MV_REFERENCE_FRAME ref_frame_to_disable =
- (ref_rf_deltas[0] <= ref_rf_deltas[1]) ? earliest_ref_frames[0]
- : earliest_ref_frames[1];
-#else
- // Always disable the earliest reference frame
- const MV_REFERENCE_FRAME ref_frame_to_disable = earliest_ref_frames[0];
-#endif // USE_RF_LEVEL_TO_ENFORCE
-#undef USE_RF_LEVEL_TO_ENFORCE
-
- switch (ref_frame_to_disable) {
- case LAST_FRAME: cpi->ref_frame_flags &= ~AOM_LAST_FLAG; break;
- case LAST2_FRAME: cpi->ref_frame_flags &= ~AOM_LAST2_FLAG; break;
- case LAST3_FRAME: cpi->ref_frame_flags &= ~AOM_LAST3_FLAG; break;
- case BWDREF_FRAME: cpi->ref_frame_flags &= ~AOM_BWD_FLAG; break;
- case ALTREF2_FRAME: cpi->ref_frame_flags &= ~AOM_ALT2_FLAG; break;
- default: break;
- }
- }
-}
-
-static INLINE int av1_refs_are_one_sided(const AV1_COMMON *cm) {
- assert(!frame_is_intra_only(cm));
-
- int one_sided_refs = 1;
- for (int ref = 0; ref < INTER_REFS_PER_FRAME; ++ref) {
- const int buf_idx = cm->frame_refs[ref].idx;
- if (buf_idx == INVALID_IDX) continue;
-
- const int ref_offset =
- cm->buffer_pool->frame_bufs[buf_idx].cur_frame_offset;
- if (get_relative_dist(cm, ref_offset, (int)cm->frame_offset) > 0) {
- one_sided_refs = 0; // bwd reference
- break;
- }
- }
- return one_sided_refs;
-}
-
-static INLINE void get_skip_mode_ref_offsets(const AV1_COMMON *cm,
- int ref_offset[2]) {
- ref_offset[0] = ref_offset[1] = 0;
- if (!cm->is_skip_mode_allowed) return;
-
- const int buf_idx_0 = cm->frame_refs[cm->ref_frame_idx_0].idx;
- const int buf_idx_1 = cm->frame_refs[cm->ref_frame_idx_1].idx;
- assert(buf_idx_0 != INVALID_IDX && buf_idx_1 != INVALID_IDX);
-
- ref_offset[0] = cm->buffer_pool->frame_bufs[buf_idx_0].cur_frame_offset;
- ref_offset[1] = cm->buffer_pool->frame_bufs[buf_idx_1].cur_frame_offset;
-}
-
-static int check_skip_mode_enabled(AV1_COMP *const cpi) {
- AV1_COMMON *const cm = &cpi->common;
-
- av1_setup_skip_mode_allowed(cm);
- if (!cm->is_skip_mode_allowed) return 0;
-
- // Turn off skip mode if the temporal distances of the reference pair to the
- // current frame are different by more than 1 frame.
- const int cur_offset = (int)cm->frame_offset;
- int ref_offset[2];
- get_skip_mode_ref_offsets(cm, ref_offset);
- const int cur_to_ref0 = get_relative_dist(cm, cur_offset, ref_offset[0]);
- const int cur_to_ref1 = abs(get_relative_dist(cm, cur_offset, ref_offset[1]));
- if (abs(cur_to_ref0 - cur_to_ref1) > 1) return 0;
-
- // High Latency: Turn off skip mode if all refs are fwd.
- if (cpi->all_one_sided_refs && cpi->oxcf.lag_in_frames > 0) return 0;
-
- static const int flag_list[REF_FRAMES] = { 0,
- AOM_LAST_FLAG,
- AOM_LAST2_FLAG,
- AOM_LAST3_FLAG,
- AOM_GOLD_FLAG,
- AOM_BWD_FLAG,
- AOM_ALT2_FLAG,
- AOM_ALT_FLAG };
- const int ref_frame[2] = { cm->ref_frame_idx_0 + LAST_FRAME,
- cm->ref_frame_idx_1 + LAST_FRAME };
- if (!(cpi->ref_frame_flags & flag_list[ref_frame[0]]) ||
- !(cpi->ref_frame_flags & flag_list[ref_frame[1]]))
- return 0;
-
- return 1;
-}
-
-// Function to decide if we can skip the global motion parameter computation
-// for a particular ref frame
-static INLINE int skip_gm_frame(AV1_COMMON *const cm, int ref_frame) {
- if ((ref_frame == LAST3_FRAME || ref_frame == LAST2_FRAME) &&
- cm->global_motion[GOLDEN_FRAME].wmtype != IDENTITY) {
- return get_relative_dist(
- cm, cm->cur_frame->ref_frame_offset[ref_frame - LAST_FRAME],
- cm->cur_frame->ref_frame_offset[GOLDEN_FRAME - LAST_FRAME]) <= 0;
- }
- return 0;
-}
-
-static void set_default_interp_skip_flags(AV1_COMP *cpi) {
- const int num_planes = av1_num_planes(&cpi->common);
- cpi->default_interp_skip_flags = (num_planes == 1)
- ? DEFAULT_LUMA_INTERP_SKIP_FLAG
- : DEFAULT_INTERP_SKIP_FLAG;
-}
-
-static void encode_frame_internal(AV1_COMP *cpi) {
- ThreadData *const td = &cpi->td;
- MACROBLOCK *const x = &td->mb;
- AV1_COMMON *const cm = &cpi->common;
- MACROBLOCKD *const xd = &x->e_mbd;
- RD_COUNTS *const rdc = &cpi->td.rd_counts;
- int i;
-
- x->min_partition_size = AOMMIN(x->min_partition_size, cm->seq_params.sb_size);
- x->max_partition_size = AOMMIN(x->max_partition_size, cm->seq_params.sb_size);
-#if CONFIG_DIST_8X8
- x->using_dist_8x8 = cpi->oxcf.using_dist_8x8;
- x->tune_metric = cpi->oxcf.tuning;
-#endif
- cm->setup_mi(cm);
-
- xd->mi = cm->mi_grid_visible;
- xd->mi[0] = cm->mi;
-
- av1_zero(*td->counts);
- av1_zero(rdc->comp_pred_diff);
-
- if (frame_is_intra_only(cm)) {
- if (cm->seq_params.force_screen_content_tools == 2) {
- cm->allow_screen_content_tools =
- cpi->oxcf.content == AOM_CONTENT_SCREEN ||
- is_screen_content(cpi->source->y_buffer,
- cpi->source->flags & YV12_FLAG_HIGHBITDEPTH, xd->bd,
- cpi->source->y_stride, cpi->source->y_width,
- cpi->source->y_height);
- } else {
- cm->allow_screen_content_tools =
- cm->seq_params.force_screen_content_tools;
- }
- }
-
- // Allow intrabc when screen content tools are enabled.
- cm->allow_intrabc = cm->allow_screen_content_tools;
- // Reset the flag.
- cpi->intrabc_used = 0;
- // Need to disable intrabc when superres is selected
- if (av1_superres_scaled(cm)) {
- cm->allow_intrabc = 0;
- }
-
- if (cpi->oxcf.pass != 1 && av1_use_hash_me(cm)) {
- // add to hash table
- const int pic_width = cpi->source->y_crop_width;
- const int pic_height = cpi->source->y_crop_height;
- uint32_t *block_hash_values[2][2];
- int8_t *is_block_same[2][3];
- int k, j;
-
- for (k = 0; k < 2; k++) {
- for (j = 0; j < 2; j++) {
- CHECK_MEM_ERROR(cm, block_hash_values[k][j],
- aom_malloc(sizeof(uint32_t) * pic_width * pic_height));
- }
-
- for (j = 0; j < 3; j++) {
- CHECK_MEM_ERROR(cm, is_block_same[k][j],
- aom_malloc(sizeof(int8_t) * pic_width * pic_height));
- }
- }
-
- av1_hash_table_create(&cm->cur_frame->hash_table);
- av1_generate_block_2x2_hash_value(cpi->source, block_hash_values[0],
- is_block_same[0], &cpi->td.mb);
- av1_generate_block_hash_value(cpi->source, 4, block_hash_values[0],
- block_hash_values[1], is_block_same[0],
- is_block_same[1], &cpi->td.mb);
- av1_add_to_hash_map_by_row_with_precal_data(
- &cm->cur_frame->hash_table, block_hash_values[1], is_block_same[1][2],
- pic_width, pic_height, 4);
- av1_generate_block_hash_value(cpi->source, 8, block_hash_values[1],
- block_hash_values[0], is_block_same[1],
- is_block_same[0], &cpi->td.mb);
- av1_add_to_hash_map_by_row_with_precal_data(
- &cm->cur_frame->hash_table, block_hash_values[0], is_block_same[0][2],
- pic_width, pic_height, 8);
- av1_generate_block_hash_value(cpi->source, 16, block_hash_values[0],
- block_hash_values[1], is_block_same[0],
- is_block_same[1], &cpi->td.mb);
- av1_add_to_hash_map_by_row_with_precal_data(
- &cm->cur_frame->hash_table, block_hash_values[1], is_block_same[1][2],
- pic_width, pic_height, 16);
- av1_generate_block_hash_value(cpi->source, 32, block_hash_values[1],
- block_hash_values[0], is_block_same[1],
- is_block_same[0], &cpi->td.mb);
- av1_add_to_hash_map_by_row_with_precal_data(
- &cm->cur_frame->hash_table, block_hash_values[0], is_block_same[0][2],
- pic_width, pic_height, 32);
- av1_generate_block_hash_value(cpi->source, 64, block_hash_values[0],
- block_hash_values[1], is_block_same[0],
- is_block_same[1], &cpi->td.mb);
- av1_add_to_hash_map_by_row_with_precal_data(
- &cm->cur_frame->hash_table, block_hash_values[1], is_block_same[1][2],
- pic_width, pic_height, 64);
-
- av1_generate_block_hash_value(cpi->source, 128, block_hash_values[1],
- block_hash_values[0], is_block_same[1],
- is_block_same[0], &cpi->td.mb);
- av1_add_to_hash_map_by_row_with_precal_data(
- &cm->cur_frame->hash_table, block_hash_values[0], is_block_same[0][2],
- pic_width, pic_height, 128);
-
- for (k = 0; k < 2; k++) {
- for (j = 0; j < 2; j++) {
- aom_free(block_hash_values[k][j]);
- }
-
- for (j = 0; j < 3; j++) {
- aom_free(is_block_same[k][j]);
- }
- }
- }
-
- for (i = 0; i < MAX_SEGMENTS; ++i) {
- const int qindex = cm->seg.enabled
- ? av1_get_qindex(&cm->seg, i, cm->base_qindex)
- : cm->base_qindex;
- xd->lossless[i] = qindex == 0 && cm->y_dc_delta_q == 0 &&
- cm->u_dc_delta_q == 0 && cm->u_ac_delta_q == 0 &&
- cm->v_dc_delta_q == 0 && cm->v_ac_delta_q == 0;
- if (xd->lossless[i]) cpi->has_lossless_segment = 1;
- xd->qindex[i] = qindex;
- if (xd->lossless[i]) {
- cpi->optimize_seg_arr[i] = 0;
- } else {
- cpi->optimize_seg_arr[i] = cpi->optimize_speed_feature;
- }
- }
- cm->coded_lossless = is_coded_lossless(cm, xd);
- cm->all_lossless = cm->coded_lossless && !av1_superres_scaled(cm);
-
- cm->tx_mode = select_tx_mode(cpi);
-
- // Fix delta q resolution for the moment
- cm->delta_q_res = DEFAULT_DELTA_Q_RES;
- // Set delta_q_present_flag before it is used for the first time
- cm->delta_lf_res = DEFAULT_DELTA_LF_RES;
- cm->delta_q_present_flag = cpi->oxcf.deltaq_mode != NO_DELTA_Q;
- cm->delta_lf_present_flag = cpi->oxcf.deltaq_mode == DELTA_Q_LF;
- cm->delta_lf_multi = DEFAULT_DELTA_LF_MULTI;
- // update delta_q_present_flag and delta_lf_present_flag based on base_qindex
- cm->delta_q_present_flag &= cm->base_qindex > 0;
- cm->delta_lf_present_flag &= cm->base_qindex > 0;
-
- av1_frame_init_quantizer(cpi);
-
- av1_initialize_rd_consts(cpi);
- av1_initialize_me_consts(cpi, x, cm->base_qindex);
- init_encode_frame_mb_context(cpi);
- set_default_interp_skip_flags(cpi);
- if (cm->prev_frame)
- cm->last_frame_seg_map = cm->prev_frame->seg_map;
- else
- cm->last_frame_seg_map = NULL;
- cm->current_frame_seg_map = cm->cur_frame->seg_map;
- if (cm->allow_intrabc || cm->coded_lossless) {
- av1_set_default_ref_deltas(cm->lf.ref_deltas);
- av1_set_default_mode_deltas(cm->lf.mode_deltas);
- } else if (cm->prev_frame) {
- memcpy(cm->lf.ref_deltas, cm->prev_frame->ref_deltas, REF_FRAMES);
- memcpy(cm->lf.mode_deltas, cm->prev_frame->mode_deltas, MAX_MODE_LF_DELTAS);
- }
- memcpy(cm->cur_frame->ref_deltas, cm->lf.ref_deltas, REF_FRAMES);
- memcpy(cm->cur_frame->mode_deltas, cm->lf.mode_deltas, MAX_MODE_LF_DELTAS);
-
- // Special case: set prev_mi to NULL when the previous mode info
- // context cannot be used.
- cm->prev_mi = cm->allow_ref_frame_mvs ? cm->prev_mip : NULL;
-
- x->txb_split_count = 0;
-
- av1_zero(rdc->global_motion_used);
- av1_zero(cpi->gmparams_cost);
-#if !CONFIG_GLOBAL_MOTION_SEARCH
- cpi->global_motion_search_done = 1;
-#endif // !CONFIG_GLOBAL_MOTION_SEARCH
- if (cpi->common.frame_type == INTER_FRAME && cpi->source &&
- !cpi->global_motion_search_done) {
- YV12_BUFFER_CONFIG *ref_buf[REF_FRAMES];
- int frame;
- double params_by_motion[RANSAC_NUM_MOTIONS * (MAX_PARAMDIM - 1)];
- const double *params_this_motion;
- int inliers_by_motion[RANSAC_NUM_MOTIONS];
- WarpedMotionParams tmp_wm_params;
- static const double kIdentityParams[MAX_PARAMDIM - 1] = {
- 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0
- };
- int num_refs_using_gm = 0;
-
- for (frame = ALTREF_FRAME; frame >= LAST_FRAME; --frame) {
- ref_buf[frame] = get_ref_frame_buffer(cpi, frame);
- int pframe;
- cm->global_motion[frame] = default_warp_params;
- const WarpedMotionParams *ref_params =
- cm->prev_frame ? &cm->prev_frame->global_motion[frame]
- : &default_warp_params;
- // check for duplicate buffer
- for (pframe = ALTREF_FRAME; pframe > frame; --pframe) {
- if (ref_buf[frame] == ref_buf[pframe]) break;
- }
- if (pframe > frame) {
- memcpy(&cm->global_motion[frame], &cm->global_motion[pframe],
- sizeof(WarpedMotionParams));
- } else if (ref_buf[frame] &&
- ref_buf[frame]->y_crop_width == cpi->source->y_crop_width &&
- ref_buf[frame]->y_crop_height == cpi->source->y_crop_height &&
- do_gm_search_logic(&cpi->sf, num_refs_using_gm, frame) &&
- !(cpi->sf.selective_ref_gm && skip_gm_frame(cm, frame))) {
- TransformationType model;
- const int64_t ref_frame_error =
- av1_frame_error(xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, xd->bd,
- ref_buf[frame]->y_buffer, ref_buf[frame]->y_stride,
- cpi->source->y_buffer, cpi->source->y_width,
- cpi->source->y_height, cpi->source->y_stride);
-
- if (ref_frame_error == 0) continue;
-
- aom_clear_system_state();
- for (model = ROTZOOM; model < GLOBAL_TRANS_TYPES_ENC; ++model) {
- int64_t best_warp_error = INT64_MAX;
- // Initially set all params to identity.
- for (i = 0; i < RANSAC_NUM_MOTIONS; ++i) {
- memcpy(params_by_motion + (MAX_PARAMDIM - 1) * i, kIdentityParams,
- (MAX_PARAMDIM - 1) * sizeof(*params_by_motion));
- }
-
- compute_global_motion_feature_based(
- model, cpi->source, ref_buf[frame],
- cpi->common.seq_params.bit_depth, inliers_by_motion,
- params_by_motion, RANSAC_NUM_MOTIONS);
-
- for (i = 0; i < RANSAC_NUM_MOTIONS; ++i) {
- if (inliers_by_motion[i] == 0) continue;
-
- params_this_motion = params_by_motion + (MAX_PARAMDIM - 1) * i;
- convert_model_to_params(params_this_motion, &tmp_wm_params);
-
- if (tmp_wm_params.wmtype != IDENTITY) {
- const int64_t warp_error = refine_integerized_param(
- &tmp_wm_params, tmp_wm_params.wmtype,
- xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, xd->bd,
- ref_buf[frame]->y_buffer, ref_buf[frame]->y_width,
- ref_buf[frame]->y_height, ref_buf[frame]->y_stride,
- cpi->source->y_buffer, cpi->source->y_width,
- cpi->source->y_height, cpi->source->y_stride, 5,
- best_warp_error);
- if (warp_error < best_warp_error) {
- best_warp_error = warp_error;
- // Save the wm_params modified by refine_integerized_param()
- // rather than motion index to avoid rerunning refine() below.
- memcpy(&(cm->global_motion[frame]), &tmp_wm_params,
- sizeof(WarpedMotionParams));
- }
- }
- }
- if (cm->global_motion[frame].wmtype <= AFFINE)
- if (!get_shear_params(&cm->global_motion[frame]))
- cm->global_motion[frame] = default_warp_params;
-
- if (cm->global_motion[frame].wmtype == TRANSLATION) {
- cm->global_motion[frame].wmmat[0] =
- convert_to_trans_prec(cm->allow_high_precision_mv,
- cm->global_motion[frame].wmmat[0]) *
- GM_TRANS_ONLY_DECODE_FACTOR;
- cm->global_motion[frame].wmmat[1] =
- convert_to_trans_prec(cm->allow_high_precision_mv,
- cm->global_motion[frame].wmmat[1]) *
- GM_TRANS_ONLY_DECODE_FACTOR;
- }
-
- // If the best error advantage found doesn't meet the threshold for
- // this motion type, revert to IDENTITY.
- if (!is_enough_erroradvantage(
- (double)best_warp_error / ref_frame_error,
- gm_get_params_cost(&cm->global_motion[frame], ref_params,
- cm->allow_high_precision_mv),
- cpi->sf.gm_erroradv_type)) {
- cm->global_motion[frame] = default_warp_params;
- }
- if (cm->global_motion[frame].wmtype != IDENTITY) break;
- }
- aom_clear_system_state();
- }
- if (cm->global_motion[frame].wmtype != IDENTITY) num_refs_using_gm++;
- cpi->gmparams_cost[frame] =
- gm_get_params_cost(&cm->global_motion[frame], ref_params,
- cm->allow_high_precision_mv) +
- cpi->gmtype_cost[cm->global_motion[frame].wmtype] -
- cpi->gmtype_cost[IDENTITY];
- }
- // clear disabled ref_frames
- for (frame = LAST_FRAME; frame <= ALTREF_FRAME; ++frame) {
- const int ref_disabled =
- !(cpi->ref_frame_flags & ref_frame_flag_list[frame]);
- if (ref_disabled && cpi->sf.recode_loop != DISALLOW_RECODE) {
- cpi->gmparams_cost[frame] = 0;
- cm->global_motion[frame] = default_warp_params;
- }
- }
- cpi->global_motion_search_done = 1;
- }
- memcpy(cm->cur_frame->global_motion, cm->global_motion,
- REF_FRAMES * sizeof(WarpedMotionParams));
-
- av1_setup_motion_field(cm);
-
- cpi->all_one_sided_refs =
- frame_is_intra_only(cm) ? 0 : av1_refs_are_one_sided(cm);
-
- cm->skip_mode_flag = check_skip_mode_enabled(cpi);
-
- {
- struct aom_usec_timer emr_timer;
- aom_usec_timer_start(&emr_timer);
-
-#if CONFIG_FP_MB_STATS
- if (cpi->use_fp_mb_stats) {
- input_fpmb_stats(&cpi->twopass.firstpass_mb_stats, cm,
- &cpi->twopass.this_frame_mb_stats);
- }
-#endif
-
- if (cpi->row_mt && (cpi->oxcf.max_threads > 1))
- av1_encode_tiles_mt(cpi);
- else if (AOMMIN(cpi->oxcf.max_threads, cm->tile_cols * cm->tile_rows) > 1)
- av1_encode_tiles_mt(cpi);
- else
- encode_tiles(cpi);
-
- aom_usec_timer_mark(&emr_timer);
- cpi->time_encode_sb_row += aom_usec_timer_elapsed(&emr_timer);
- }
-
- // If intrabc is allowed but never selected, reset the allow_intrabc flag.
- if (cm->allow_intrabc && !cpi->intrabc_used) cm->allow_intrabc = 0;
- if (cm->allow_intrabc) cm->delta_lf_present_flag = 0;
-}
-
-void av1_encode_frame(AV1_COMP *cpi) {
- AV1_COMMON *const cm = &cpi->common;
- const int num_planes = av1_num_planes(cm);
- // Indicates whether or not to use a default reduced set for ext-tx
- // rather than the potential full set of 16 transforms
- cm->reduced_tx_set_used = 0;
-
- if (cm->show_frame == 0) {
- int arf_offset = AOMMIN(
- (MAX_GF_INTERVAL - 1),
- cpi->twopass.gf_group.arf_src_offset[cpi->twopass.gf_group.index]);
- int brf_offset =
- cpi->twopass.gf_group.brf_src_offset[cpi->twopass.gf_group.index];
- arf_offset = AOMMIN((MAX_GF_INTERVAL - 1), arf_offset + brf_offset);
- cm->frame_offset = cm->current_video_frame + arf_offset;
- } else {
- cm->frame_offset = cm->current_video_frame;
- }
- cm->frame_offset %= (1 << (cm->seq_params.order_hint_bits_minus_1 + 1));
-
- // Make sure segment_id is no larger than last_active_segid.
- if (cm->seg.enabled && cm->seg.update_map) {
- const int mi_rows = cm->mi_rows;
- const int mi_cols = cm->mi_cols;
- const int last_active_segid = cm->seg.last_active_segid;
- uint8_t *map = cpi->segmentation_map;
- for (int mi_row = 0; mi_row < mi_rows; ++mi_row) {
- for (int mi_col = 0; mi_col < mi_cols; ++mi_col) {
- map[mi_col] = AOMMIN(map[mi_col], last_active_segid);
- }
- map += mi_cols;
- }
- }
-
- av1_setup_frame_buf_refs(cm);
- if (cpi->sf.selective_ref_frame >= 2) enforce_max_ref_frames(cpi);
- av1_setup_frame_sign_bias(cm);
-
-#if CONFIG_MISMATCH_DEBUG
- mismatch_reset_frame(num_planes);
-#else
- (void)num_planes;
-#endif
-
- cpi->allow_comp_inter_inter = !frame_is_intra_only(cm);
-
- if (cpi->sf.frame_parameter_update) {
- int i;
- RD_OPT *const rd_opt = &cpi->rd;
- RD_COUNTS *const rdc = &cpi->td.rd_counts;
-
- // This code does a single RD pass over the whole frame assuming
- // either compound, single or hybrid prediction as per whatever has
- // worked best for that type of frame in the past.
- // It also predicts whether another coding mode would have worked
- // better than this coding mode. If that is the case, it remembers
- // that for subsequent frames.
- // It does the same analysis for transform size selection also.
- //
- // TODO(zoeliu): To investigate whether a frame_type other than
- // INTRA/ALTREF/GOLDEN/LAST needs to be specified seperately.
- const MV_REFERENCE_FRAME frame_type = get_frame_type(cpi);
- int64_t *const mode_thrs = rd_opt->prediction_type_threshes[frame_type];
- const int is_alt_ref = frame_type == ALTREF_FRAME;
-
- /* prediction (compound, single or hybrid) mode selection */
- // NOTE: "is_alt_ref" is true only for OVERLAY/INTNL_OVERLAY frames
- if (is_alt_ref || !cpi->allow_comp_inter_inter)
- cm->reference_mode = SINGLE_REFERENCE;
- else
- cm->reference_mode = REFERENCE_MODE_SELECT;
-
- cm->interp_filter = SWITCHABLE;
- if (cm->large_scale_tile) cm->interp_filter = EIGHTTAP_REGULAR;
-
- cm->switchable_motion_mode = 1;
-
- rdc->compound_ref_used_flag = 0;
- rdc->skip_mode_used_flag = 0;
-
- encode_frame_internal(cpi);
-
- for (i = 0; i < REFERENCE_MODES; ++i)
- mode_thrs[i] = (mode_thrs[i] + rdc->comp_pred_diff[i] / cm->MBs) / 2;
-
- if (cm->reference_mode == REFERENCE_MODE_SELECT) {
- // Use a flag that includes 4x4 blocks
- if (rdc->compound_ref_used_flag == 0) {
- cm->reference_mode = SINGLE_REFERENCE;
-#if CONFIG_ENTROPY_STATS
- av1_zero(cpi->td.counts->comp_inter);
-#endif // CONFIG_ENTROPY_STATS
- }
- }
- // Re-check on the skip mode status as reference mode may have been changed.
- if (frame_is_intra_only(cm) || cm->reference_mode == SINGLE_REFERENCE) {
- cm->is_skip_mode_allowed = 0;
- cm->skip_mode_flag = 0;
- }
- if (cm->skip_mode_flag && rdc->skip_mode_used_flag == 0)
- cm->skip_mode_flag = 0;
-
- if (!cm->large_scale_tile) {
- if (cm->tx_mode == TX_MODE_SELECT && cpi->td.mb.txb_split_count == 0)
- cm->tx_mode = TX_MODE_LARGEST;
- }
- } else {
- encode_frame_internal(cpi);
- }
-}
-
-static void update_txfm_count(MACROBLOCK *x, MACROBLOCKD *xd,
- FRAME_COUNTS *counts, TX_SIZE tx_size, int depth,
- int blk_row, int blk_col,
- uint8_t allow_update_cdf) {
- MB_MODE_INFO *mbmi = xd->mi[0];
- const BLOCK_SIZE bsize = mbmi->sb_type;
- const int max_blocks_high = max_block_high(xd, bsize, 0);
- const int max_blocks_wide = max_block_wide(xd, bsize, 0);
- int ctx = txfm_partition_context(xd->above_txfm_context + blk_col,
- xd->left_txfm_context + blk_row,
- mbmi->sb_type, tx_size);
- const int txb_size_index = av1_get_txb_size_index(bsize, blk_row, blk_col);
- const TX_SIZE plane_tx_size = mbmi->inter_tx_size[txb_size_index];
-
- if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
- assert(tx_size > TX_4X4);
-
- if (depth == MAX_VARTX_DEPTH) {
- // Don't add to counts in this case
- mbmi->tx_size = tx_size;
- txfm_partition_update(xd->above_txfm_context + blk_col,
- xd->left_txfm_context + blk_row, tx_size, tx_size);
- return;
- }
-
- if (tx_size == plane_tx_size) {
-#if CONFIG_ENTROPY_STATS
- ++counts->txfm_partition[ctx][0];
-#endif
- if (allow_update_cdf)
- update_cdf(xd->tile_ctx->txfm_partition_cdf[ctx], 0, 2);
- mbmi->tx_size = tx_size;
- txfm_partition_update(xd->above_txfm_context + blk_col,
- xd->left_txfm_context + blk_row, tx_size, tx_size);
- } else {
- const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
- const int bsw = tx_size_wide_unit[sub_txs];
- const int bsh = tx_size_high_unit[sub_txs];
-
-#if CONFIG_ENTROPY_STATS
- ++counts->txfm_partition[ctx][1];
-#endif
- if (allow_update_cdf)
- update_cdf(xd->tile_ctx->txfm_partition_cdf[ctx], 1, 2);
- ++x->txb_split_count;
-
- if (sub_txs == TX_4X4) {
- mbmi->inter_tx_size[txb_size_index] = TX_4X4;
- mbmi->tx_size = TX_4X4;
- txfm_partition_update(xd->above_txfm_context + blk_col,
- xd->left_txfm_context + blk_row, TX_4X4, tx_size);
- return;
- }
-
- for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh) {
- for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) {
- int offsetr = row;
- int offsetc = col;
-
- update_txfm_count(x, xd, counts, sub_txs, depth + 1, blk_row + offsetr,
- blk_col + offsetc, allow_update_cdf);
- }
- }
- }
-}
-
-static void tx_partition_count_update(const AV1_COMMON *const cm, MACROBLOCK *x,
- BLOCK_SIZE plane_bsize, int mi_row,
- int mi_col, FRAME_COUNTS *td_counts,
- uint8_t allow_update_cdf) {
- MACROBLOCKD *xd = &x->e_mbd;
- const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
- const int mi_height = block_size_high[plane_bsize] >> tx_size_high_log2[0];
- const TX_SIZE max_tx_size = get_vartx_max_txsize(xd, plane_bsize, 0);
- const int bh = tx_size_high_unit[max_tx_size];
- const int bw = tx_size_wide_unit[max_tx_size];
- int idx, idy;
-
- xd->above_txfm_context = cm->above_txfm_context[xd->tile.tile_row] + mi_col;
- xd->left_txfm_context =
- xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
-
- for (idy = 0; idy < mi_height; idy += bh)
- for (idx = 0; idx < mi_width; idx += bw)
- update_txfm_count(x, xd, td_counts, max_tx_size, 0, idy, idx,
- allow_update_cdf);
-}
-
-static void set_txfm_context(MACROBLOCKD *xd, TX_SIZE tx_size, int blk_row,
- int blk_col) {
- MB_MODE_INFO *mbmi = xd->mi[0];
- const BLOCK_SIZE bsize = mbmi->sb_type;
- const int max_blocks_high = max_block_high(xd, bsize, 0);
- const int max_blocks_wide = max_block_wide(xd, bsize, 0);
- const int txb_size_index = av1_get_txb_size_index(bsize, blk_row, blk_col);
- const TX_SIZE plane_tx_size = mbmi->inter_tx_size[txb_size_index];
-
- if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
-
- if (tx_size == plane_tx_size) {
- mbmi->tx_size = tx_size;
- txfm_partition_update(xd->above_txfm_context + blk_col,
- xd->left_txfm_context + blk_row, tx_size, tx_size);
-
- } else {
- if (tx_size == TX_8X8) {
- mbmi->inter_tx_size[txb_size_index] = TX_4X4;
- mbmi->tx_size = TX_4X4;
- txfm_partition_update(xd->above_txfm_context + blk_col,
- xd->left_txfm_context + blk_row, TX_4X4, tx_size);
- return;
- }
- const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
- const int bsw = tx_size_wide_unit[sub_txs];
- const int bsh = tx_size_high_unit[sub_txs];
- for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh) {
- for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) {
- const int offsetr = blk_row + row;
- const int offsetc = blk_col + col;
- if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
- set_txfm_context(xd, sub_txs, offsetr, offsetc);
- }
- }
- }
-}
-
-static void tx_partition_set_contexts(const AV1_COMMON *const cm,
- MACROBLOCKD *xd, BLOCK_SIZE plane_bsize,
- int mi_row, int mi_col) {
- const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
- const int mi_height = block_size_high[plane_bsize] >> tx_size_high_log2[0];
- const TX_SIZE max_tx_size = get_vartx_max_txsize(xd, plane_bsize, 0);
- const int bh = tx_size_high_unit[max_tx_size];
- const int bw = tx_size_wide_unit[max_tx_size];
- int idx, idy;
-
- xd->above_txfm_context = cm->above_txfm_context[xd->tile.tile_row] + mi_col;
- xd->left_txfm_context =
- xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
-
- for (idy = 0; idy < mi_height; idy += bh)
- for (idx = 0; idx < mi_width; idx += bw)
- set_txfm_context(xd, max_tx_size, idy, idx);
-}
-
-static void encode_superblock(const AV1_COMP *const cpi, TileDataEnc *tile_data,
- ThreadData *td, TOKENEXTRA **t, RUN_TYPE dry_run,
- int mi_row, int mi_col, BLOCK_SIZE bsize,
- int *rate) {
- const AV1_COMMON *const cm = &cpi->common;
- const int num_planes = av1_num_planes(cm);
- MACROBLOCK *const x = &td->mb;
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO **mi_4x4 = xd->mi;
- MB_MODE_INFO *mbmi = mi_4x4[0];
- const int seg_skip =
- segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP);
- const int mis = cm->mi_stride;
- const int mi_width = mi_size_wide[bsize];
- const int mi_height = mi_size_high[bsize];
- const int is_inter = is_inter_block(mbmi);
-
- if (cpi->sf.mode_pruning_based_on_two_pass_partition_search &&
- x->cb_partition_scan) {
- for (int row = mi_row; row < mi_row + mi_width;
- row += FIRST_PARTITION_PASS_SAMPLE_REGION) {
- for (int col = mi_col; col < mi_col + mi_height;
- col += FIRST_PARTITION_PASS_SAMPLE_REGION) {
- const int index = av1_first_partition_pass_stats_index(row, col);
- FIRST_PARTITION_PASS_STATS *const stats =
- &x->first_partition_pass_stats[index];
- // Increase the counter of data samples.
- ++stats->sample_counts;
- // Increase the counter for ref_frame[0] and ref_frame[1].
- if (stats->ref0_counts[mbmi->ref_frame[0]] < 255)
- ++stats->ref0_counts[mbmi->ref_frame[0]];
- if (mbmi->ref_frame[1] >= 0 &&
- stats->ref1_counts[mbmi->ref_frame[0]] < 255)
- ++stats->ref1_counts[mbmi->ref_frame[1]];
- }
- }
- }
-
- if (!is_inter) {
- xd->cfl.is_chroma_reference =
- is_chroma_reference(mi_row, mi_col, bsize, cm->seq_params.subsampling_x,
- cm->seq_params.subsampling_y);
- xd->cfl.store_y = store_cfl_required(cm, xd);
- mbmi->skip = 1;
- for (int plane = 0; plane < num_planes; ++plane) {
- av1_encode_intra_block_plane(cpi, x, bsize, plane,
- cpi->optimize_seg_arr[mbmi->segment_id],
- mi_row, mi_col);
- }
-
- // If there is at least one lossless segment, force the skip for intra
- // block to be 0, in order to avoid the segment_id to be changed by in
- // write_segment_id().
- if (!cpi->common.seg.segid_preskip && cpi->common.seg.update_map &&
- cpi->has_lossless_segment)
- mbmi->skip = 0;
-
- xd->cfl.store_y = 0;
- if (av1_allow_palette(cm->allow_screen_content_tools, bsize)) {
- for (int plane = 0; plane < AOMMIN(2, num_planes); ++plane) {
- if (mbmi->palette_mode_info.palette_size[plane] > 0) {
- if (!dry_run) {
- av1_tokenize_color_map(x, plane, t, bsize, mbmi->tx_size,
- PALETTE_MAP, tile_data->allow_update_cdf,
- td->counts);
- } else if (dry_run == DRY_RUN_COSTCOEFFS) {
- rate +=
- av1_cost_color_map(x, plane, bsize, mbmi->tx_size, PALETTE_MAP);
- }
- }
- }
- }
-
- av1_update_txb_context(cpi, td, dry_run, bsize, rate, mi_row, mi_col,
- tile_data->allow_update_cdf);
- } else {
- int ref;
- const int is_compound = has_second_ref(mbmi);
-
- set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
- for (ref = 0; ref < 1 + is_compound; ++ref) {
- YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, mbmi->ref_frame[ref]);
- assert(IMPLIES(!is_intrabc_block(mbmi), cfg));
- av1_setup_pre_planes(xd, ref, cfg, mi_row, mi_col,
- &xd->block_refs[ref]->sf, num_planes);
- }
-
- av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
- if (mbmi->motion_mode == OBMC_CAUSAL)
- av1_build_obmc_inter_predictors_sb(cm, xd, mi_row, mi_col);
-
-#if CONFIG_MISMATCH_DEBUG
- if (dry_run == OUTPUT_ENABLED) {
- for (int plane = 0; plane < num_planes; ++plane) {
- const struct macroblockd_plane *pd = &xd->plane[plane];
- int pixel_c, pixel_r;
- mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, 0, 0,
- pd->subsampling_x, pd->subsampling_y);
- if (!is_chroma_reference(mi_row, mi_col, bsize, pd->subsampling_x,
- pd->subsampling_y))
- continue;
- mismatch_record_block_pre(pd->dst.buf, pd->dst.stride, cm->frame_offset,
- plane, pixel_c, pixel_r, pd->width,
- pd->height,
- xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH);
- }
- }
-#else
- (void)num_planes;
-#endif
-
- av1_encode_sb(cpi, x, bsize, mi_row, mi_col, dry_run);
- av1_tokenize_sb_vartx(cpi, td, t, dry_run, mi_row, mi_col, bsize, rate,
- tile_data->allow_update_cdf);
- }
-
- if (!dry_run) {
- if (av1_allow_intrabc(cm) && is_intrabc_block(mbmi))
- td->intrabc_used_this_tile = 1;
- if (cm->tx_mode == TX_MODE_SELECT && !xd->lossless[mbmi->segment_id] &&
- mbmi->sb_type > BLOCK_4X4 && !(is_inter && (mbmi->skip || seg_skip))) {
- if (is_inter) {
- tx_partition_count_update(cm, x, bsize, mi_row, mi_col, td->counts,
- tile_data->allow_update_cdf);
- } else {
- if (mbmi->tx_size != max_txsize_rect_lookup[bsize])
- ++x->txb_split_count;
- if (block_signals_txsize(bsize)) {
- const int tx_size_ctx = get_tx_size_context(xd);
- const int32_t tx_size_cat = bsize_to_tx_size_cat(bsize);
- const int depth = tx_size_to_depth(mbmi->tx_size, bsize);
- const int max_depths = bsize_to_max_depth(bsize);
-
- if (tile_data->allow_update_cdf)
- update_cdf(xd->tile_ctx->tx_size_cdf[tx_size_cat][tx_size_ctx],
- depth, max_depths + 1);
-#if CONFIG_ENTROPY_STATS
- ++td->counts->intra_tx_size[tx_size_cat][tx_size_ctx][depth];
-#endif
- }
- }
- assert(IMPLIES(is_rect_tx(mbmi->tx_size), is_rect_tx_allowed(xd, mbmi)));
- } else {
- int i, j;
- TX_SIZE intra_tx_size;
- // The new intra coding scheme requires no change of transform size
- if (is_inter) {
- if (xd->lossless[mbmi->segment_id]) {
- intra_tx_size = TX_4X4;
- } else {
- intra_tx_size = tx_size_from_tx_mode(bsize, cm->tx_mode);
- }
- } else {
- intra_tx_size = mbmi->tx_size;
- }
-
- for (j = 0; j < mi_height; j++)
- for (i = 0; i < mi_width; i++)
- if (mi_col + i < cm->mi_cols && mi_row + j < cm->mi_rows)
- mi_4x4[mis * j + i]->tx_size = intra_tx_size;
-
- if (intra_tx_size != max_txsize_rect_lookup[bsize]) ++x->txb_split_count;
- }
- }
-
- if (cm->tx_mode == TX_MODE_SELECT && block_signals_txsize(mbmi->sb_type) &&
- is_inter && !(mbmi->skip || seg_skip) &&
- !xd->lossless[mbmi->segment_id]) {
- if (dry_run) tx_partition_set_contexts(cm, xd, bsize, mi_row, mi_col);
- } else {
- TX_SIZE tx_size = mbmi->tx_size;
- // The new intra coding scheme requires no change of transform size
- if (is_inter) {
- if (xd->lossless[mbmi->segment_id]) {
- tx_size = TX_4X4;
- } else {
- tx_size = tx_size_from_tx_mode(bsize, cm->tx_mode);
- }
- } else {
- tx_size = (bsize > BLOCK_4X4) ? tx_size : TX_4X4;
- }
- mbmi->tx_size = tx_size;
- set_txfm_ctxs(tx_size, xd->n4_w, xd->n4_h,
- (mbmi->skip || seg_skip) && is_inter_block(mbmi), xd);
- }
- CFL_CTX *const cfl = &xd->cfl;
- if (is_inter_block(mbmi) &&
- !is_chroma_reference(mi_row, mi_col, bsize, cfl->subsampling_x,
- cfl->subsampling_y) &&
- is_cfl_allowed(xd)) {
- cfl_store_block(xd, mbmi->sb_type, mbmi->tx_size);
- }
-}
diff --git a/third_party/aom/av1/encoder/encodeframe.h b/third_party/aom/av1/encoder/encodeframe.h
deleted file mode 100644
index e8cf9b468..000000000
--- a/third_party/aom/av1/encoder/encodeframe.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_ENCODEFRAME_H_
-#define AOM_AV1_ENCODER_ENCODEFRAME_H_
-
-#include "aom/aom_integer.h"
-#include "av1/common/blockd.h"
-#include "av1/common/enums.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define DELTAQ_MODULATION 1 // 0: variance based, 1: wavelet AC energy based
-
-struct macroblock;
-struct yv12_buffer_config;
-struct AV1_COMP;
-struct ThreadData;
-
-void av1_setup_src_planes(struct macroblock *x,
- const struct yv12_buffer_config *src, int mi_row,
- int mi_col, const int num_planes);
-
-void av1_encode_frame(struct AV1_COMP *cpi);
-
-void av1_alloc_tile_data(struct AV1_COMP *cpi);
-void av1_init_tile_data(struct AV1_COMP *cpi);
-void av1_encode_tile(struct AV1_COMP *cpi, struct ThreadData *td, int tile_row,
- int tile_col);
-void av1_encode_sb_row(struct AV1_COMP *cpi, struct ThreadData *td,
- int tile_row, int tile_col, int mi_row);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_ENCODER_ENCODEFRAME_H_
diff --git a/third_party/aom/av1/encoder/encodemb.c b/third_party/aom/av1/encoder/encodemb.c
deleted file mode 100644
index ad12577e6..000000000
--- a/third_party/aom/av1/encoder/encodemb.c
+++ /dev/null
@@ -1,649 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "config/aom_config.h"
-#include "config/av1_rtcd.h"
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/bitwriter.h"
-#include "aom_dsp/quantize.h"
-#include "aom_mem/aom_mem.h"
-#include "aom_ports/mem.h"
-
-#if CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG
-#include "aom_util/debug_util.h"
-#endif // CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG
-
-#include "av1/common/cfl.h"
-#include "av1/common/idct.h"
-#include "av1/common/reconinter.h"
-#include "av1/common/reconintra.h"
-#include "av1/common/scan.h"
-
-#include "av1/encoder/av1_quantize.h"
-#include "av1/encoder/encodemb.h"
-#include "av1/encoder/encodetxb.h"
-#include "av1/encoder/hybrid_fwd_txfm.h"
-#include "av1/encoder/rd.h"
-#include "av1/encoder/rdopt.h"
-
-// Check if one needs to use c version subtraction.
-static int check_subtract_block_size(int w, int h) { return w < 4 || h < 4; }
-
-static void subtract_block(const MACROBLOCKD *xd, int rows, int cols,
- int16_t *diff, ptrdiff_t diff_stride,
- const uint8_t *src8, ptrdiff_t src_stride,
- const uint8_t *pred8, ptrdiff_t pred_stride) {
- if (check_subtract_block_size(rows, cols)) {
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- aom_highbd_subtract_block_c(rows, cols, diff, diff_stride, src8,
- src_stride, pred8, pred_stride, xd->bd);
- return;
- }
- aom_subtract_block_c(rows, cols, diff, diff_stride, src8, src_stride, pred8,
- pred_stride);
-
- return;
- }
-
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- aom_highbd_subtract_block(rows, cols, diff, diff_stride, src8, src_stride,
- pred8, pred_stride, xd->bd);
- return;
- }
- aom_subtract_block(rows, cols, diff, diff_stride, src8, src_stride, pred8,
- pred_stride);
-}
-
-void av1_subtract_txb(MACROBLOCK *x, int plane, BLOCK_SIZE plane_bsize,
- int blk_col, int blk_row, TX_SIZE tx_size) {
- MACROBLOCKD *const xd = &x->e_mbd;
- struct macroblock_plane *const p = &x->plane[plane];
- const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane];
- const int diff_stride = block_size_wide[plane_bsize];
- const int src_stride = p->src.stride;
- const int dst_stride = pd->dst.stride;
- const int tx1d_width = tx_size_wide[tx_size];
- const int tx1d_height = tx_size_high[tx_size];
- uint8_t *dst =
- &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
- uint8_t *src =
- &p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]];
- int16_t *src_diff =
- &p->src_diff[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]];
- subtract_block(xd, tx1d_height, tx1d_width, src_diff, diff_stride, src,
- src_stride, dst, dst_stride);
-}
-
-void av1_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
- struct macroblock_plane *const p = &x->plane[plane];
- const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane];
- const BLOCK_SIZE plane_bsize =
- get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
- const int bw = block_size_wide[plane_bsize];
- const int bh = block_size_high[plane_bsize];
- const MACROBLOCKD *xd = &x->e_mbd;
-
- subtract_block(xd, bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
- pd->dst.buf, pd->dst.stride);
-}
-
-int av1_optimize_b(const struct AV1_COMP *cpi, MACROBLOCK *mb, int plane,
- int block, TX_SIZE tx_size, TX_TYPE tx_type,
- const TXB_CTX *const txb_ctx, int fast_mode,
- int *rate_cost) {
- MACROBLOCKD *const xd = &mb->e_mbd;
- struct macroblock_plane *const p = &mb->plane[plane];
- const int eob = p->eobs[block];
- const int segment_id = xd->mi[0]->segment_id;
-
- if (eob == 0 || !cpi->optimize_seg_arr[segment_id] ||
- xd->lossless[segment_id]) {
- *rate_cost = av1_cost_skip_txb(mb, txb_ctx, plane, tx_size);
- return eob;
- }
-
- (void)fast_mode;
- return av1_optimize_txb_new(cpi, mb, plane, block, tx_size, tx_type, txb_ctx,
- rate_cost, cpi->oxcf.sharpness);
-}
-
-typedef enum QUANT_FUNC {
- QUANT_FUNC_LOWBD = 0,
- QUANT_FUNC_HIGHBD = 1,
- QUANT_FUNC_TYPES = 2
-} QUANT_FUNC;
-
-static AV1_QUANT_FACADE
- quant_func_list[AV1_XFORM_QUANT_TYPES][QUANT_FUNC_TYPES] = {
- { av1_quantize_fp_facade, av1_highbd_quantize_fp_facade },
- { av1_quantize_b_facade, av1_highbd_quantize_b_facade },
- { av1_quantize_dc_facade, av1_highbd_quantize_dc_facade },
- { NULL, NULL }
- };
-
-void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block,
- int blk_row, int blk_col, BLOCK_SIZE plane_bsize,
- TX_SIZE tx_size, TX_TYPE tx_type,
- AV1_XFORM_QUANT xform_quant_idx) {
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = xd->mi[0];
- const struct macroblock_plane *const p = &x->plane[plane];
- const struct macroblockd_plane *const pd = &xd->plane[plane];
- const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type);
-
- tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
- tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
- tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
- uint16_t *const eob = &p->eobs[block];
- const int diff_stride = block_size_wide[plane_bsize];
- int seg_id = mbmi->segment_id;
- const TX_SIZE qm_tx_size = av1_get_adjusted_tx_size(tx_size);
- // Use a flat matrix (i.e. no weighting) for 1D and Identity transforms
- const qm_val_t *qmatrix =
- IS_2D_TRANSFORM(tx_type) ? pd->seg_qmatrix[seg_id][qm_tx_size]
- : cm->gqmatrix[NUM_QM_LEVELS - 1][0][qm_tx_size];
- const qm_val_t *iqmatrix =
- IS_2D_TRANSFORM(tx_type)
- ? pd->seg_iqmatrix[seg_id][qm_tx_size]
- : cm->giqmatrix[NUM_QM_LEVELS - 1][0][qm_tx_size];
-
- const int src_offset = (blk_row * diff_stride + blk_col);
- const int16_t *src_diff = &p->src_diff[src_offset << tx_size_wide_log2[0]];
- QUANT_PARAM qparam;
- qparam.log_scale = av1_get_tx_scale(tx_size);
- qparam.tx_size = tx_size;
- qparam.qmatrix = qmatrix;
- qparam.iqmatrix = iqmatrix;
- TxfmParam txfm_param;
- txfm_param.tx_type = tx_type;
- txfm_param.tx_size = tx_size;
- txfm_param.lossless = xd->lossless[mbmi->segment_id];
- txfm_param.tx_set_type = av1_get_ext_tx_set_type(
- txfm_param.tx_size, is_inter_block(mbmi), cm->reduced_tx_set_used);
-
- txfm_param.bd = xd->bd;
- txfm_param.is_hbd = get_bitdepth_data_path_index(xd);
-
- av1_fwd_txfm(src_diff, coeff, diff_stride, &txfm_param);
-
- if (xform_quant_idx != AV1_XFORM_QUANT_SKIP_QUANT) {
- const int n_coeffs = av1_get_max_eob(tx_size);
- if (LIKELY(!x->skip_block)) {
- quant_func_list[xform_quant_idx][txfm_param.is_hbd](
- coeff, n_coeffs, p, qcoeff, dqcoeff, eob, scan_order, &qparam);
- } else {
- av1_quantize_skip(n_coeffs, qcoeff, dqcoeff, eob);
- }
- }
- // NOTE: optimize_b_following is ture means av1_optimze_b will be called
- // When the condition of doing optimize_b is changed,
- // this flag need update simultaneously
- const int optimize_b_following =
- (xform_quant_idx != AV1_XFORM_QUANT_FP) || (txfm_param.lossless);
- if (optimize_b_following) {
- p->txb_entropy_ctx[block] =
- (uint8_t)av1_get_txb_entropy_context(qcoeff, scan_order, *eob);
- } else {
- p->txb_entropy_ctx[block] = 0;
- }
- return;
-}
-
-static void encode_block(int plane, int block, int blk_row, int blk_col,
- BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg,
- int mi_row, int mi_col, RUN_TYPE dry_run) {
- (void)mi_row;
- (void)mi_col;
- (void)dry_run;
- struct encode_b_args *const args = arg;
- const AV1_COMMON *const cm = &args->cpi->common;
- MACROBLOCK *const x = args->x;
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *mbmi = xd->mi[0];
- struct macroblock_plane *const p = &x->plane[plane];
- struct macroblockd_plane *const pd = &xd->plane[plane];
- tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
- uint8_t *dst;
- ENTROPY_CONTEXT *a, *l;
- int dummy_rate_cost = 0;
-
- const int bw = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
- dst = &pd->dst
- .buf[(blk_row * pd->dst.stride + blk_col) << tx_size_wide_log2[0]];
-
- a = &args->ta[blk_col];
- l = &args->tl[blk_row];
-
- if (!is_blk_skip(x, plane, blk_row * bw + blk_col) && !mbmi->skip_mode) {
- TX_TYPE tx_type = av1_get_tx_type(pd->plane_type, xd, blk_row, blk_col,
- tx_size, cm->reduced_tx_set_used);
- if (args->enable_optimize_b) {
- av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize,
- tx_size, tx_type, AV1_XFORM_QUANT_FP);
- TXB_CTX txb_ctx;
- get_txb_ctx(plane_bsize, tx_size, plane, a, l, &txb_ctx);
- av1_optimize_b(args->cpi, x, plane, block, tx_size, tx_type, &txb_ctx, 1,
- &dummy_rate_cost);
- } else {
- av1_xform_quant(
- cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size, tx_type,
- USE_B_QUANT_NO_TRELLIS ? AV1_XFORM_QUANT_B : AV1_XFORM_QUANT_FP);
- }
- } else {
- p->eobs[block] = 0;
- p->txb_entropy_ctx[block] = 0;
- }
-
- av1_set_txb_context(x, plane, block, tx_size, a, l);
-
- if (p->eobs[block]) {
- *(args->skip) = 0;
-
- TX_TYPE tx_type = av1_get_tx_type(pd->plane_type, xd, blk_row, blk_col,
- tx_size, cm->reduced_tx_set_used);
- av1_inverse_transform_block(xd, dqcoeff, plane, tx_type, tx_size, dst,
- pd->dst.stride, p->eobs[block],
- cm->reduced_tx_set_used);
- }
-
- if (p->eobs[block] == 0 && plane == 0) {
- // TODO(debargha, jingning): Temporarily disable txk_type check for eob=0
- // case. It is possible that certain collision in hash index would cause
- // the assertion failure. To further optimize the rate-distortion
- // performance, we need to re-visit this part and enable this assert
- // again.
-#if 0
- if (args->cpi->oxcf.aq_mode == NO_AQ &&
- args->cpi->oxcf.deltaq_mode == NO_DELTA_Q) {
- // TODO(jingning,angiebird,huisu@google.com): enable txk_check when
- // enable_optimize_b is true to detect potential RD bug.
- const uint8_t disable_txk_check = args->enable_optimize_b;
- if (!disable_txk_check) {
- assert(mbmi->txk_type[av1_get_txk_type_index(plane_bsize, blk_row,
- blk_col)] == DCT_DCT);
- }
- }
-#endif
- update_txk_array(mbmi->txk_type, plane_bsize, blk_row, blk_col, tx_size,
- DCT_DCT);
- }
-
-#if CONFIG_MISMATCH_DEBUG
- if (dry_run == OUTPUT_ENABLED) {
- int pixel_c, pixel_r;
- BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
- int blk_w = block_size_wide[bsize];
- int blk_h = block_size_high[bsize];
- mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, blk_col, blk_row,
- pd->subsampling_x, pd->subsampling_y);
- mismatch_record_block_tx(dst, pd->dst.stride, cm->frame_offset, plane,
- pixel_c, pixel_r, blk_w, blk_h,
- xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH);
- }
-#endif
-}
-
-static void encode_block_inter(int plane, int block, int blk_row, int blk_col,
- BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
- void *arg, int mi_row, int mi_col,
- RUN_TYPE dry_run) {
- (void)mi_row;
- (void)mi_col;
- struct encode_b_args *const args = arg;
- MACROBLOCK *const x = args->x;
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = xd->mi[0];
- const struct macroblockd_plane *const pd = &xd->plane[plane];
- const int max_blocks_high = max_block_high(xd, plane_bsize, plane);
- const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
-
- if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
-
- const TX_SIZE plane_tx_size =
- plane ? av1_get_max_uv_txsize(mbmi->sb_type, pd->subsampling_x,
- pd->subsampling_y)
- : mbmi->inter_tx_size[av1_get_txb_size_index(plane_bsize, blk_row,
- blk_col)];
- if (!plane) {
- assert(tx_size_wide[tx_size] >= tx_size_wide[plane_tx_size] &&
- tx_size_high[tx_size] >= tx_size_high[plane_tx_size]);
- }
-
- if (tx_size == plane_tx_size || plane) {
- encode_block(plane, block, blk_row, blk_col, plane_bsize, tx_size, arg,
- mi_row, mi_col, dry_run);
- } else {
- assert(tx_size < TX_SIZES_ALL);
- const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
- assert(IMPLIES(tx_size <= TX_4X4, sub_txs == tx_size));
- assert(IMPLIES(tx_size > TX_4X4, sub_txs < tx_size));
- // This is the square transform block partition entry point.
- const int bsw = tx_size_wide_unit[sub_txs];
- const int bsh = tx_size_high_unit[sub_txs];
- const int step = bsh * bsw;
- assert(bsw > 0 && bsh > 0);
-
- for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh) {
- for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) {
- const int offsetr = blk_row + row;
- const int offsetc = blk_col + col;
-
- if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
-
- encode_block_inter(plane, block, offsetr, offsetc, plane_bsize, sub_txs,
- arg, mi_row, mi_col, dry_run);
- block += step;
- }
- }
- }
-}
-
-void av1_foreach_transformed_block_in_plane(
- const MACROBLOCKD *const xd, BLOCK_SIZE bsize, int plane,
- foreach_transformed_block_visitor visit, void *arg) {
- const struct macroblockd_plane *const pd = &xd->plane[plane];
- // block and transform sizes, in number of 4x4 blocks log 2 ("*_b")
- // 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8
- // transform size varies per plane, look it up in a common way.
- const TX_SIZE tx_size = av1_get_tx_size(plane, xd);
- const BLOCK_SIZE plane_bsize =
- get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
- const uint8_t txw_unit = tx_size_wide_unit[tx_size];
- const uint8_t txh_unit = tx_size_high_unit[tx_size];
- const int step = txw_unit * txh_unit;
- int i = 0, r, c;
-
- // If mb_to_right_edge is < 0 we are in a situation in which
- // the current block size extends into the UMV and we won't
- // visit the sub blocks that are wholly within the UMV.
- const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
- const int max_blocks_high = max_block_high(xd, plane_bsize, plane);
-
- int blk_row, blk_col;
-
- const BLOCK_SIZE max_unit_bsize =
- get_plane_block_size(BLOCK_64X64, pd->subsampling_x, pd->subsampling_y);
- int mu_blocks_wide = block_size_wide[max_unit_bsize] >> tx_size_wide_log2[0];
- int mu_blocks_high = block_size_high[max_unit_bsize] >> tx_size_high_log2[0];
- mu_blocks_wide = AOMMIN(max_blocks_wide, mu_blocks_wide);
- mu_blocks_high = AOMMIN(max_blocks_high, mu_blocks_high);
-
- // Keep track of the row and column of the blocks we use so that we know
- // if we are in the unrestricted motion border.
- for (r = 0; r < max_blocks_high; r += mu_blocks_high) {
- const int unit_height = AOMMIN(mu_blocks_high + r, max_blocks_high);
- // Skip visiting the sub blocks that are wholly within the UMV.
- for (c = 0; c < max_blocks_wide; c += mu_blocks_wide) {
- const int unit_width = AOMMIN(mu_blocks_wide + c, max_blocks_wide);
- for (blk_row = r; blk_row < unit_height; blk_row += txh_unit) {
- for (blk_col = c; blk_col < unit_width; blk_col += txw_unit) {
- visit(plane, i, blk_row, blk_col, plane_bsize, tx_size, arg);
- i += step;
- }
- }
- }
- }
-}
-
-void av1_foreach_transformed_block(const MACROBLOCKD *const xd,
- BLOCK_SIZE bsize, int mi_row, int mi_col,
- foreach_transformed_block_visitor visit,
- void *arg, const int num_planes) {
- for (int plane = 0; plane < num_planes; ++plane) {
- if (!is_chroma_reference(mi_row, mi_col, bsize,
- xd->plane[plane].subsampling_x,
- xd->plane[plane].subsampling_y))
- continue;
- av1_foreach_transformed_block_in_plane(xd, bsize, plane, visit, arg);
- }
-}
-
-typedef struct encode_block_pass1_args {
- AV1_COMMON *cm;
- MACROBLOCK *x;
-} encode_block_pass1_args;
-
-static void encode_block_pass1(int plane, int block, int blk_row, int blk_col,
- BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
- void *arg) {
- encode_block_pass1_args *args = (encode_block_pass1_args *)arg;
- AV1_COMMON *cm = args->cm;
- MACROBLOCK *const x = args->x;
- MACROBLOCKD *const xd = &x->e_mbd;
- struct macroblock_plane *const p = &x->plane[plane];
- struct macroblockd_plane *const pd = &xd->plane[plane];
- tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
- TxfmParam txfm_param;
- uint8_t *dst;
- dst = &pd->dst
- .buf[(blk_row * pd->dst.stride + blk_col) << tx_size_wide_log2[0]];
- av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
- DCT_DCT, AV1_XFORM_QUANT_B);
-
- if (p->eobs[block] > 0) {
- txfm_param.bd = xd->bd;
- txfm_param.is_hbd = get_bitdepth_data_path_index(xd);
- txfm_param.tx_type = DCT_DCT;
- txfm_param.tx_size = tx_size;
- txfm_param.eob = p->eobs[block];
- txfm_param.lossless = xd->lossless[xd->mi[0]->segment_id];
- txfm_param.tx_set_type = av1_get_ext_tx_set_type(
- txfm_param.tx_size, is_inter_block(xd->mi[0]), cm->reduced_tx_set_used);
- if (txfm_param.is_hbd) {
- av1_highbd_inv_txfm_add(dqcoeff, dst, pd->dst.stride, &txfm_param);
- return;
- }
- av1_inv_txfm_add(dqcoeff, dst, pd->dst.stride, &txfm_param);
- }
-}
-
-void av1_encode_sby_pass1(AV1_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE bsize) {
- encode_block_pass1_args args = { cm, x };
- av1_subtract_plane(x, bsize, 0);
- av1_foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0,
- encode_block_pass1, &args);
-}
-
-void av1_encode_sb(const struct AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
- int mi_row, int mi_col, RUN_TYPE dry_run) {
- (void)dry_run;
- const AV1_COMMON *const cm = &cpi->common;
- const int num_planes = av1_num_planes(cm);
- MACROBLOCKD *const xd = &x->e_mbd;
- struct optimize_ctx ctx;
- MB_MODE_INFO *mbmi = xd->mi[0];
- struct encode_b_args arg = { cpi,
- x,
- &ctx,
- &mbmi->skip,
- NULL,
- NULL,
- cpi->optimize_seg_arr[mbmi->segment_id] };
- int plane;
-
- mbmi->skip = 1;
-
- if (x->skip) return;
-
- for (plane = 0; plane < num_planes; ++plane) {
- const int subsampling_x = xd->plane[plane].subsampling_x;
- const int subsampling_y = xd->plane[plane].subsampling_y;
-
- if (!is_chroma_reference(mi_row, mi_col, bsize, subsampling_x,
- subsampling_y))
- continue;
-
- const BLOCK_SIZE bsizec =
- scale_chroma_bsize(bsize, subsampling_x, subsampling_y);
-
- // TODO(jingning): Clean this up.
- const struct macroblockd_plane *const pd = &xd->plane[plane];
- const BLOCK_SIZE plane_bsize =
- get_plane_block_size(bsizec, pd->subsampling_x, pd->subsampling_y);
- const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
- const int mi_height = block_size_high[plane_bsize] >> tx_size_high_log2[0];
- const TX_SIZE max_tx_size = get_vartx_max_txsize(xd, plane_bsize, plane);
-
- const BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size];
- const int bw = block_size_wide[txb_size] >> tx_size_wide_log2[0];
- const int bh = block_size_high[txb_size] >> tx_size_high_log2[0];
- int idx, idy;
- int block = 0;
- int step = tx_size_wide_unit[max_tx_size] * tx_size_high_unit[max_tx_size];
- av1_get_entropy_contexts(bsizec, pd, ctx.ta[plane], ctx.tl[plane]);
-
- av1_subtract_plane(x, bsizec, plane);
-
- arg.ta = ctx.ta[plane];
- arg.tl = ctx.tl[plane];
-
- const BLOCK_SIZE max_unit_bsize =
- get_plane_block_size(BLOCK_64X64, pd->subsampling_x, pd->subsampling_y);
- int mu_blocks_wide =
- block_size_wide[max_unit_bsize] >> tx_size_wide_log2[0];
- int mu_blocks_high =
- block_size_high[max_unit_bsize] >> tx_size_high_log2[0];
-
- mu_blocks_wide = AOMMIN(mi_width, mu_blocks_wide);
- mu_blocks_high = AOMMIN(mi_height, mu_blocks_high);
-
- for (idy = 0; idy < mi_height; idy += mu_blocks_high) {
- for (idx = 0; idx < mi_width; idx += mu_blocks_wide) {
- int blk_row, blk_col;
- const int unit_height = AOMMIN(mu_blocks_high + idy, mi_height);
- const int unit_width = AOMMIN(mu_blocks_wide + idx, mi_width);
- for (blk_row = idy; blk_row < unit_height; blk_row += bh) {
- for (blk_col = idx; blk_col < unit_width; blk_col += bw) {
- encode_block_inter(plane, block, blk_row, blk_col, plane_bsize,
- max_tx_size, &arg, mi_row, mi_col, dry_run);
- block += step;
- }
- }
- }
- }
- }
-}
-
-static void encode_block_intra_and_set_context(int plane, int block,
- int blk_row, int blk_col,
- BLOCK_SIZE plane_bsize,
- TX_SIZE tx_size, void *arg) {
- av1_encode_block_intra(plane, block, blk_row, blk_col, plane_bsize, tx_size,
- arg);
-
- struct encode_b_args *const args = arg;
- MACROBLOCK *x = args->x;
- ENTROPY_CONTEXT *a = &args->ta[blk_col];
- ENTROPY_CONTEXT *l = &args->tl[blk_row];
- av1_set_txb_context(x, plane, block, tx_size, a, l);
-}
-
-void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col,
- BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
- void *arg) {
- struct encode_b_args *const args = arg;
- const AV1_COMMON *const cm = &args->cpi->common;
- MACROBLOCK *const x = args->x;
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *mbmi = xd->mi[0];
- struct macroblock_plane *const p = &x->plane[plane];
- struct macroblockd_plane *const pd = &xd->plane[plane];
- tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
- PLANE_TYPE plane_type = get_plane_type(plane);
- const TX_TYPE tx_type = av1_get_tx_type(plane_type, xd, blk_row, blk_col,
- tx_size, cm->reduced_tx_set_used);
- uint16_t *eob = &p->eobs[block];
- const int dst_stride = pd->dst.stride;
- uint8_t *dst =
- &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
- int dummy_rate_cost = 0;
-
- av1_predict_intra_block_facade(cm, xd, plane, blk_col, blk_row, tx_size);
-
- const int bw = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
- if (plane == 0 && is_blk_skip(x, plane, blk_row * bw + blk_col)) {
- *eob = 0;
- p->txb_entropy_ctx[block] = 0;
- } else {
- av1_subtract_txb(x, plane, plane_bsize, blk_col, blk_row, tx_size);
-
- const ENTROPY_CONTEXT *a = &args->ta[blk_col];
- const ENTROPY_CONTEXT *l = &args->tl[blk_row];
- if (args->enable_optimize_b) {
- av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize,
- tx_size, tx_type, AV1_XFORM_QUANT_FP);
- TXB_CTX txb_ctx;
- get_txb_ctx(plane_bsize, tx_size, plane, a, l, &txb_ctx);
- av1_optimize_b(args->cpi, x, plane, block, tx_size, tx_type, &txb_ctx, 1,
- &dummy_rate_cost);
- } else {
- av1_xform_quant(
- cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size, tx_type,
- USE_B_QUANT_NO_TRELLIS ? AV1_XFORM_QUANT_B : AV1_XFORM_QUANT_FP);
- }
- }
-
- if (*eob) {
- av1_inverse_transform_block(xd, dqcoeff, plane, tx_type, tx_size, dst,
- dst_stride, *eob, cm->reduced_tx_set_used);
- }
-
- if (*eob == 0 && plane == 0) {
- // TODO(jingning): Temporarily disable txk_type check for eob=0 case.
- // It is possible that certain collision in hash index would cause
- // the assertion failure. To further optimize the rate-distortion
- // performance, we need to re-visit this part and enable this assert
- // again.
-#if 0
- if (args->cpi->oxcf.aq_mode == NO_AQ
- && args->cpi->oxcf.deltaq_mode == NO_DELTA_Q) {
- assert(mbmi->txk_type[av1_get_txk_type_index(plane_bsize, blk_row,
- blk_col)] == DCT_DCT);
- }
-#endif
- update_txk_array(mbmi->txk_type, plane_bsize, blk_row, blk_col, tx_size,
- DCT_DCT);
- }
-
- // For intra mode, skipped blocks are so rare that transmitting skip=1 is
- // very expensive.
- *(args->skip) = 0;
-
- if (plane == AOM_PLANE_Y && xd->cfl.store_y) {
- cfl_store_tx(xd, blk_row, blk_col, tx_size, plane_bsize);
- }
-}
-
-void av1_encode_intra_block_plane(const struct AV1_COMP *cpi, MACROBLOCK *x,
- BLOCK_SIZE bsize, int plane,
- int enable_optimize_b, int mi_row,
- int mi_col) {
- const MACROBLOCKD *const xd = &x->e_mbd;
- ENTROPY_CONTEXT ta[MAX_MIB_SIZE] = { 0 };
- ENTROPY_CONTEXT tl[MAX_MIB_SIZE] = { 0 };
-
- struct encode_b_args arg = {
- cpi, x, NULL, &(xd->mi[0]->skip), ta, tl, enable_optimize_b
- };
-
- if (!is_chroma_reference(mi_row, mi_col, bsize,
- xd->plane[plane].subsampling_x,
- xd->plane[plane].subsampling_y))
- return;
-
- if (enable_optimize_b) {
- const struct macroblockd_plane *const pd = &xd->plane[plane];
- av1_get_entropy_contexts(bsize, pd, ta, tl);
- }
- av1_foreach_transformed_block_in_plane(
- xd, bsize, plane, encode_block_intra_and_set_context, &arg);
-}
diff --git a/third_party/aom/av1/encoder/encodemb.h b/third_party/aom/av1/encoder/encodemb.h
deleted file mode 100644
index 39080de59..000000000
--- a/third_party/aom/av1/encoder/encodemb.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_ENCODEMB_H_
-#define AOM_AV1_ENCODER_ENCODEMB_H_
-
-#include "config/aom_config.h"
-
-#include "av1/common/onyxc_int.h"
-#include "av1/common/txb_common.h"
-#include "av1/encoder/block.h"
-#include "av1/encoder/tokenize.h"
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct optimize_ctx {
- ENTROPY_CONTEXT ta[MAX_MB_PLANE][MAX_MIB_SIZE];
- ENTROPY_CONTEXT tl[MAX_MB_PLANE][MAX_MIB_SIZE];
-};
-
-struct encode_b_args {
- const struct AV1_COMP *cpi;
- MACROBLOCK *x;
- struct optimize_ctx *ctx;
- int8_t *skip;
- ENTROPY_CONTEXT *ta;
- ENTROPY_CONTEXT *tl;
- int8_t enable_optimize_b;
-};
-
-typedef enum AV1_XFORM_QUANT {
- AV1_XFORM_QUANT_FP = 0,
- AV1_XFORM_QUANT_B = 1,
- AV1_XFORM_QUANT_DC = 2,
- AV1_XFORM_QUANT_SKIP_QUANT,
- AV1_XFORM_QUANT_TYPES,
-} AV1_XFORM_QUANT;
-
-void av1_encode_sb(const struct AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
- int mi_row, int mi_col, RUN_TYPE dry_run);
-
-void av1_foreach_transformed_block_in_plane(
- const MACROBLOCKD *const xd, BLOCK_SIZE bsize, int plane,
- foreach_transformed_block_visitor visit, void *arg);
-
-void av1_foreach_transformed_block(const MACROBLOCKD *const xd,
- BLOCK_SIZE bsize, int mi_row, int mi_col,
- foreach_transformed_block_visitor visit,
- void *arg, const int num_planes);
-
-void av1_encode_sby_pass1(AV1_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE bsize);
-
-void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block,
- int blk_row, int blk_col, BLOCK_SIZE plane_bsize,
- TX_SIZE tx_size, TX_TYPE tx_type,
- AV1_XFORM_QUANT xform_quant_idx);
-
-int av1_optimize_b(const struct AV1_COMP *cpi, MACROBLOCK *mb, int plane,
- int block, TX_SIZE tx_size, TX_TYPE tx_type,
- const TXB_CTX *const txb_ctx, int fast_mode, int *rate_cost);
-
-void av1_subtract_txb(MACROBLOCK *x, int plane, BLOCK_SIZE plane_bsize,
- int blk_col, int blk_row, TX_SIZE tx_size);
-
-void av1_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane);
-
-static INLINE void av1_set_txb_context(MACROBLOCK *x, int plane, int block,
- TX_SIZE tx_size, ENTROPY_CONTEXT *a,
- ENTROPY_CONTEXT *l) {
- const uint8_t ctx = x->plane[plane].txb_entropy_ctx[block];
- memset(a, ctx, tx_size_wide_unit[tx_size] * sizeof(*a));
- memset(l, ctx, tx_size_high_unit[tx_size] * sizeof(*l));
-}
-
-void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col,
- BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg);
-
-void av1_encode_intra_block_plane(const struct AV1_COMP *cpi, MACROBLOCK *x,
- BLOCK_SIZE bsize, int plane,
- int enable_optimize_b, int mi_row,
- int mi_col);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_ENCODER_ENCODEMB_H_
diff --git a/third_party/aom/av1/encoder/encodemv.c b/third_party/aom/av1/encoder/encodemv.c
deleted file mode 100644
index 42eb5abf6..000000000
--- a/third_party/aom/av1/encoder/encodemv.c
+++ /dev/null
@@ -1,239 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <math.h>
-
-#include "av1/common/common.h"
-#include "av1/common/entropymode.h"
-
-#include "av1/encoder/cost.h"
-#include "av1/encoder/encodemv.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_ports/bitops.h"
-
-static INLINE int mv_class_base(MV_CLASS_TYPE c) {
- return c ? CLASS0_SIZE << (c + 2) : 0;
-}
-
-// If n != 0, returns the floor of log base 2 of n. If n == 0, returns 0.
-static INLINE uint8_t log_in_base_2(unsigned int n) {
- // get_msb() is only valid when n != 0.
- return n == 0 ? 0 : get_msb(n);
-}
-
-static INLINE MV_CLASS_TYPE get_mv_class(int z, int *offset) {
- const MV_CLASS_TYPE c = (z >= CLASS0_SIZE * 4096)
- ? MV_CLASS_10
- : (MV_CLASS_TYPE)log_in_base_2(z >> 3);
- if (offset) *offset = z - mv_class_base(c);
- return c;
-}
-
-static void encode_mv_component(aom_writer *w, int comp, nmv_component *mvcomp,
- MvSubpelPrecision precision) {
- assert(comp != 0);
- int offset;
- const int sign = comp < 0;
- const int mag = sign ? -comp : comp;
- const int mv_class = get_mv_class(mag - 1, &offset);
- const int d = offset >> 3; // int mv data
- const int fr = (offset >> 1) & 3; // fractional mv data
- const int hp = offset & 1; // high precision mv data
-
- // Sign
- aom_write_symbol(w, sign, mvcomp->sign_cdf, 2);
-
- // Class
- aom_write_symbol(w, mv_class, mvcomp->classes_cdf, MV_CLASSES);
-
- // Integer bits
- if (mv_class == MV_CLASS_0) {
- aom_write_symbol(w, d, mvcomp->class0_cdf, CLASS0_SIZE);
- } else {
- int i;
- const int n = mv_class + CLASS0_BITS - 1; // number of bits
- for (i = 0; i < n; ++i)
- aom_write_symbol(w, (d >> i) & 1, mvcomp->bits_cdf[i], 2);
- }
- // Fractional bits
- if (precision > MV_SUBPEL_NONE) {
- aom_write_symbol(
- w, fr,
- mv_class == MV_CLASS_0 ? mvcomp->class0_fp_cdf[d] : mvcomp->fp_cdf,
- MV_FP_SIZE);
- }
-
- // High precision bit
- if (precision > MV_SUBPEL_LOW_PRECISION)
- aom_write_symbol(
- w, hp, mv_class == MV_CLASS_0 ? mvcomp->class0_hp_cdf : mvcomp->hp_cdf,
- 2);
-}
-
-static void build_nmv_component_cost_table(int *mvcost,
- const nmv_component *const mvcomp,
- MvSubpelPrecision precision) {
- int i, v;
- int sign_cost[2], class_cost[MV_CLASSES], class0_cost[CLASS0_SIZE];
- int bits_cost[MV_OFFSET_BITS][2];
- int class0_fp_cost[CLASS0_SIZE][MV_FP_SIZE], fp_cost[MV_FP_SIZE];
- int class0_hp_cost[2], hp_cost[2];
-
- av1_cost_tokens_from_cdf(sign_cost, mvcomp->sign_cdf, NULL);
- av1_cost_tokens_from_cdf(class_cost, mvcomp->classes_cdf, NULL);
- av1_cost_tokens_from_cdf(class0_cost, mvcomp->class0_cdf, NULL);
- for (i = 0; i < MV_OFFSET_BITS; ++i) {
- av1_cost_tokens_from_cdf(bits_cost[i], mvcomp->bits_cdf[i], NULL);
- }
-
- for (i = 0; i < CLASS0_SIZE; ++i)
- av1_cost_tokens_from_cdf(class0_fp_cost[i], mvcomp->class0_fp_cdf[i], NULL);
- av1_cost_tokens_from_cdf(fp_cost, mvcomp->fp_cdf, NULL);
-
- if (precision > MV_SUBPEL_LOW_PRECISION) {
- av1_cost_tokens_from_cdf(class0_hp_cost, mvcomp->class0_hp_cdf, NULL);
- av1_cost_tokens_from_cdf(hp_cost, mvcomp->hp_cdf, NULL);
- }
- mvcost[0] = 0;
- for (v = 1; v <= MV_MAX; ++v) {
- int z, c, o, d, e, f, cost = 0;
- z = v - 1;
- c = get_mv_class(z, &o);
- cost += class_cost[c];
- d = (o >> 3); /* int mv data */
- f = (o >> 1) & 3; /* fractional pel mv data */
- e = (o & 1); /* high precision mv data */
- if (c == MV_CLASS_0) {
- cost += class0_cost[d];
- } else {
- const int b = c + CLASS0_BITS - 1; /* number of bits */
- for (i = 0; i < b; ++i) cost += bits_cost[i][((d >> i) & 1)];
- }
- if (precision > MV_SUBPEL_NONE) {
- if (c == MV_CLASS_0) {
- cost += class0_fp_cost[d][f];
- } else {
- cost += fp_cost[f];
- }
- if (precision > MV_SUBPEL_LOW_PRECISION) {
- if (c == MV_CLASS_0) {
- cost += class0_hp_cost[e];
- } else {
- cost += hp_cost[e];
- }
- }
- }
- mvcost[v] = cost + sign_cost[0];
- mvcost[-v] = cost + sign_cost[1];
- }
-}
-
-void av1_encode_mv(AV1_COMP *cpi, aom_writer *w, const MV *mv, const MV *ref,
- nmv_context *mvctx, int usehp) {
- const MV diff = { mv->row - ref->row, mv->col - ref->col };
- const MV_JOINT_TYPE j = av1_get_mv_joint(&diff);
- if (cpi->common.cur_frame_force_integer_mv) {
- usehp = MV_SUBPEL_NONE;
- }
- aom_write_symbol(w, j, mvctx->joints_cdf, MV_JOINTS);
- if (mv_joint_vertical(j))
- encode_mv_component(w, diff.row, &mvctx->comps[0], usehp);
-
- if (mv_joint_horizontal(j))
- encode_mv_component(w, diff.col, &mvctx->comps[1], usehp);
-
- // If auto_mv_step_size is enabled then keep track of the largest
- // motion vector component used.
- if (cpi->sf.mv.auto_mv_step_size) {
- unsigned int maxv = AOMMAX(abs(mv->row), abs(mv->col)) >> 3;
- cpi->max_mv_magnitude = AOMMAX(maxv, cpi->max_mv_magnitude);
- }
-}
-
-void av1_encode_dv(aom_writer *w, const MV *mv, const MV *ref,
- nmv_context *mvctx) {
- // DV and ref DV should not have sub-pel.
- assert((mv->col & 7) == 0);
- assert((mv->row & 7) == 0);
- assert((ref->col & 7) == 0);
- assert((ref->row & 7) == 0);
- const MV diff = { mv->row - ref->row, mv->col - ref->col };
- const MV_JOINT_TYPE j = av1_get_mv_joint(&diff);
-
- aom_write_symbol(w, j, mvctx->joints_cdf, MV_JOINTS);
- if (mv_joint_vertical(j))
- encode_mv_component(w, diff.row, &mvctx->comps[0], MV_SUBPEL_NONE);
-
- if (mv_joint_horizontal(j))
- encode_mv_component(w, diff.col, &mvctx->comps[1], MV_SUBPEL_NONE);
-}
-
-void av1_build_nmv_cost_table(int *mvjoint, int *mvcost[2],
- const nmv_context *ctx,
- MvSubpelPrecision precision) {
- av1_cost_tokens_from_cdf(mvjoint, ctx->joints_cdf, NULL);
- build_nmv_component_cost_table(mvcost[0], &ctx->comps[0], precision);
- build_nmv_component_cost_table(mvcost[1], &ctx->comps[1], precision);
-}
-
-int_mv av1_get_ref_mv_from_stack(int ref_idx,
- const MV_REFERENCE_FRAME *ref_frame,
- int ref_mv_idx,
- const MB_MODE_INFO_EXT *mbmi_ext) {
- const int8_t ref_frame_type = av1_ref_frame_type(ref_frame);
- const CANDIDATE_MV *curr_ref_mv_stack =
- mbmi_ext->ref_mv_stack[ref_frame_type];
- int_mv ref_mv;
- ref_mv.as_int = INVALID_MV;
-
- if (ref_frame[1] > INTRA_FRAME) {
- if (ref_idx == 0) {
- ref_mv = curr_ref_mv_stack[ref_mv_idx].this_mv;
- } else {
- assert(ref_idx == 1);
- ref_mv = curr_ref_mv_stack[ref_mv_idx].comp_mv;
- }
- } else {
- assert(ref_idx == 0);
- if (ref_mv_idx < mbmi_ext->ref_mv_count[ref_frame_type]) {
- ref_mv = curr_ref_mv_stack[ref_mv_idx].this_mv;
- } else {
- ref_mv = mbmi_ext->global_mvs[ref_frame_type];
- }
- }
- return ref_mv;
-}
-
-int_mv av1_get_ref_mv(const MACROBLOCK *x, int ref_idx) {
- const MACROBLOCKD *xd = &x->e_mbd;
- const MB_MODE_INFO *mbmi = xd->mi[0];
- int ref_mv_idx = mbmi->ref_mv_idx;
- if (mbmi->mode == NEAR_NEWMV || mbmi->mode == NEW_NEARMV) {
- assert(has_second_ref(mbmi));
- ref_mv_idx += 1;
- }
- return av1_get_ref_mv_from_stack(ref_idx, mbmi->ref_frame, ref_mv_idx,
- x->mbmi_ext);
-}
-
-void av1_find_best_ref_mvs_from_stack(int allow_hp,
- const MB_MODE_INFO_EXT *mbmi_ext,
- MV_REFERENCE_FRAME ref_frame,
- int_mv *nearest_mv, int_mv *near_mv,
- int is_integer) {
- const int ref_idx = 0;
- MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, NONE_FRAME };
- *nearest_mv = av1_get_ref_mv_from_stack(ref_idx, ref_frames, 0, mbmi_ext);
- lower_mv_precision(&nearest_mv->as_mv, allow_hp, is_integer);
- *near_mv = av1_get_ref_mv_from_stack(ref_idx, ref_frames, 1, mbmi_ext);
- lower_mv_precision(&near_mv->as_mv, allow_hp, is_integer);
-}
diff --git a/third_party/aom/av1/encoder/encodemv.h b/third_party/aom/av1/encoder/encodemv.h
deleted file mode 100644
index 37ff547c8..000000000
--- a/third_party/aom/av1/encoder/encodemv.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_ENCODEMV_H_
-#define AOM_AV1_ENCODER_ENCODEMV_H_
-
-#include "av1/encoder/encoder.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void av1_encode_mv(AV1_COMP *cpi, aom_writer *w, const MV *mv, const MV *ref,
- nmv_context *mvctx, int usehp);
-
-void av1_build_nmv_cost_table(int *mvjoint, int *mvcost[2],
- const nmv_context *mvctx,
- MvSubpelPrecision precision);
-
-void av1_update_mv_count(ThreadData *td);
-
-void av1_encode_dv(aom_writer *w, const MV *mv, const MV *ref,
- nmv_context *mvctx);
-int_mv av1_get_ref_mv(const MACROBLOCK *x, int ref_idx);
-int_mv av1_get_ref_mv_from_stack(int ref_idx,
- const MV_REFERENCE_FRAME *ref_frame,
- int ref_mv_idx,
- const MB_MODE_INFO_EXT *mbmi_ext);
-void av1_find_best_ref_mvs_from_stack(int allow_hp,
- const MB_MODE_INFO_EXT *mbmi_ext,
- MV_REFERENCE_FRAME ref_frame,
- int_mv *nearest_mv, int_mv *near_mv,
- int is_integer);
-
-static INLINE MV_JOINT_TYPE av1_get_mv_joint(const MV *mv) {
- if (mv->row == 0) {
- return mv->col == 0 ? MV_JOINT_ZERO : MV_JOINT_HNZVZ;
- } else {
- return mv->col == 0 ? MV_JOINT_HZVNZ : MV_JOINT_HNZVNZ;
- }
-}
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_ENCODER_ENCODEMV_H_
diff --git a/third_party/aom/av1/encoder/encoder.c b/third_party/aom/av1/encoder/encoder.c
deleted file mode 100644
index a2da2df89..000000000
--- a/third_party/aom/av1/encoder/encoder.c
+++ /dev/null
@@ -1,6437 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <limits.h>
-#include <math.h>
-#include <stdio.h>
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-#include "config/aom_scale_rtcd.h"
-#include "config/av1_rtcd.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/aom_filter.h"
-#if CONFIG_DENOISE
-#include "aom_dsp/grain_table.h"
-#include "aom_dsp/noise_util.h"
-#include "aom_dsp/noise_model.h"
-#endif
-#include "aom_dsp/psnr.h"
-#if CONFIG_INTERNAL_STATS
-#include "aom_dsp/ssim.h"
-#endif
-#include "aom_ports/aom_timer.h"
-#include "aom_ports/mem.h"
-#include "aom_ports/system_state.h"
-#include "aom_scale/aom_scale.h"
-#if CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG
-#include "aom_util/debug_util.h"
-#endif // CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG
-
-#include "av1/common/alloccommon.h"
-#include "av1/common/cdef.h"
-#include "av1/common/filter.h"
-#include "av1/common/idct.h"
-#include "av1/common/reconinter.h"
-#include "av1/common/reconintra.h"
-#include "av1/common/resize.h"
-#include "av1/common/tile_common.h"
-
-#include "av1/encoder/aq_complexity.h"
-#include "av1/encoder/aq_cyclicrefresh.h"
-#include "av1/encoder/aq_variance.h"
-#include "av1/encoder/bitstream.h"
-#include "av1/encoder/context_tree.h"
-#include "av1/encoder/encodeframe.h"
-#include "av1/encoder/encodemv.h"
-#include "av1/encoder/encoder.h"
-#include "av1/encoder/encodetxb.h"
-#include "av1/encoder/ethread.h"
-#include "av1/encoder/firstpass.h"
-#include "av1/encoder/grain_test_vectors.h"
-#include "av1/encoder/hash_motion.h"
-#include "av1/encoder/mbgraph.h"
-#include "av1/encoder/picklpf.h"
-#include "av1/encoder/pickrst.h"
-#include "av1/encoder/random.h"
-#include "av1/encoder/ratectrl.h"
-#include "av1/encoder/rd.h"
-#include "av1/encoder/segmentation.h"
-#include "av1/encoder/speed_features.h"
-#include "av1/encoder/temporal_filter.h"
-
-#define DEFAULT_EXPLICIT_ORDER_HINT_BITS 7
-
-// av1 uses 10,000,000 ticks/second as time stamp
-#define TICKS_PER_SEC 10000000LL
-
-#if CONFIG_ENTROPY_STATS
-FRAME_COUNTS aggregate_fc;
-#endif // CONFIG_ENTROPY_STATS
-
-#define AM_SEGMENT_ID_INACTIVE 7
-#define AM_SEGMENT_ID_ACTIVE 0
-
-// Whether to use high precision mv for altref computation.
-#define ALTREF_HIGH_PRECISION_MV 1
-
-// Q threshold for high precision mv. Choose a very high value for now so that
-// HIGH_PRECISION is always chosen.
-#define HIGH_PRECISION_MV_QTHRESH 200
-
-// #define OUTPUT_YUV_REC
-#ifdef OUTPUT_YUV_SKINMAP
-FILE *yuv_skinmap_file = NULL;
-#endif
-#ifdef OUTPUT_YUV_REC
-FILE *yuv_rec_file;
-#define FILE_NAME_LEN 100
-#endif
-
-static INLINE void Scale2Ratio(AOM_SCALING mode, int *hr, int *hs) {
- switch (mode) {
- case NORMAL:
- *hr = 1;
- *hs = 1;
- break;
- case FOURFIVE:
- *hr = 4;
- *hs = 5;
- break;
- case THREEFIVE:
- *hr = 3;
- *hs = 5;
- break;
- case ONETWO:
- *hr = 1;
- *hs = 2;
- break;
- default:
- *hr = 1;
- *hs = 1;
- assert(0);
- break;
- }
-}
-
-// Mark all inactive blocks as active. Other segmentation features may be set
-// so memset cannot be used, instead only inactive blocks should be reset.
-static void suppress_active_map(AV1_COMP *cpi) {
- unsigned char *const seg_map = cpi->segmentation_map;
- int i;
- if (cpi->active_map.enabled || cpi->active_map.update)
- for (i = 0; i < cpi->common.mi_rows * cpi->common.mi_cols; ++i)
- if (seg_map[i] == AM_SEGMENT_ID_INACTIVE)
- seg_map[i] = AM_SEGMENT_ID_ACTIVE;
-}
-
-static void apply_active_map(AV1_COMP *cpi) {
- struct segmentation *const seg = &cpi->common.seg;
- unsigned char *const seg_map = cpi->segmentation_map;
- const unsigned char *const active_map = cpi->active_map.map;
- int i;
-
- assert(AM_SEGMENT_ID_ACTIVE == CR_SEGMENT_ID_BASE);
-
- if (frame_is_intra_only(&cpi->common)) {
- cpi->active_map.enabled = 0;
- cpi->active_map.update = 1;
- }
-
- if (cpi->active_map.update) {
- if (cpi->active_map.enabled) {
- for (i = 0; i < cpi->common.mi_rows * cpi->common.mi_cols; ++i)
- if (seg_map[i] == AM_SEGMENT_ID_ACTIVE) seg_map[i] = active_map[i];
- av1_enable_segmentation(seg);
- av1_enable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_SKIP);
- av1_enable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF_Y_H);
- av1_enable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF_Y_V);
- av1_enable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF_U);
- av1_enable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF_V);
-
- av1_set_segdata(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF_Y_H,
- -MAX_LOOP_FILTER);
- av1_set_segdata(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF_Y_V,
- -MAX_LOOP_FILTER);
- av1_set_segdata(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF_U,
- -MAX_LOOP_FILTER);
- av1_set_segdata(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF_V,
- -MAX_LOOP_FILTER);
- } else {
- av1_disable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_SKIP);
- av1_disable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF_Y_H);
- av1_disable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF_Y_V);
- av1_disable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF_U);
- av1_disable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF_V);
- if (seg->enabled) {
- seg->update_data = 1;
- seg->update_map = 1;
- }
- }
- cpi->active_map.update = 0;
- }
-}
-
-int av1_set_active_map(AV1_COMP *cpi, unsigned char *new_map_16x16, int rows,
- int cols) {
- if (rows == cpi->common.mb_rows && cols == cpi->common.mb_cols) {
- unsigned char *const active_map_8x8 = cpi->active_map.map;
- const int mi_rows = cpi->common.mi_rows;
- const int mi_cols = cpi->common.mi_cols;
- const int row_scale = mi_size_high[BLOCK_16X16] == 2 ? 1 : 2;
- const int col_scale = mi_size_wide[BLOCK_16X16] == 2 ? 1 : 2;
- cpi->active_map.update = 1;
- if (new_map_16x16) {
- int r, c;
- for (r = 0; r < mi_rows; ++r) {
- for (c = 0; c < mi_cols; ++c) {
- active_map_8x8[r * mi_cols + c] =
- new_map_16x16[(r >> row_scale) * cols + (c >> col_scale)]
- ? AM_SEGMENT_ID_ACTIVE
- : AM_SEGMENT_ID_INACTIVE;
- }
- }
- cpi->active_map.enabled = 1;
- } else {
- cpi->active_map.enabled = 0;
- }
- return 0;
- } else {
- return -1;
- }
-}
-
-int av1_get_active_map(AV1_COMP *cpi, unsigned char *new_map_16x16, int rows,
- int cols) {
- if (rows == cpi->common.mb_rows && cols == cpi->common.mb_cols &&
- new_map_16x16) {
- unsigned char *const seg_map_8x8 = cpi->segmentation_map;
- const int mi_rows = cpi->common.mi_rows;
- const int mi_cols = cpi->common.mi_cols;
- const int row_scale = mi_size_high[BLOCK_16X16] == 2 ? 1 : 2;
- const int col_scale = mi_size_wide[BLOCK_16X16] == 2 ? 1 : 2;
-
- memset(new_map_16x16, !cpi->active_map.enabled, rows * cols);
- if (cpi->active_map.enabled) {
- int r, c;
- for (r = 0; r < mi_rows; ++r) {
- for (c = 0; c < mi_cols; ++c) {
- // Cyclic refresh segments are considered active despite not having
- // AM_SEGMENT_ID_ACTIVE
- new_map_16x16[(r >> row_scale) * cols + (c >> col_scale)] |=
- seg_map_8x8[r * mi_cols + c] != AM_SEGMENT_ID_INACTIVE;
- }
- }
- }
- return 0;
- } else {
- return -1;
- }
-}
-
-static void set_high_precision_mv(AV1_COMP *cpi, int allow_high_precision_mv,
- int cur_frame_force_integer_mv) {
- MACROBLOCK *const mb = &cpi->td.mb;
- cpi->common.allow_high_precision_mv =
- allow_high_precision_mv && cur_frame_force_integer_mv == 0;
- const int copy_hp =
- cpi->common.allow_high_precision_mv && cur_frame_force_integer_mv == 0;
- int *(*src)[2] = copy_hp ? &mb->nmvcost_hp : &mb->nmvcost;
- mb->mv_cost_stack = *src;
-}
-
-static BLOCK_SIZE select_sb_size(const AV1_COMP *const cpi) {
- const AV1_COMMON *const cm = &cpi->common;
-
- if (cpi->oxcf.superblock_size == AOM_SUPERBLOCK_SIZE_64X64)
- return BLOCK_64X64;
-#if CONFIG_FILEOPTIONS
- if (cm->options && cm->options->ext_partition)
-#endif
- if (cpi->oxcf.superblock_size == AOM_SUPERBLOCK_SIZE_128X128)
- return BLOCK_128X128;
-
- assert(cpi->oxcf.superblock_size == AOM_SUPERBLOCK_SIZE_DYNAMIC);
-
-// TODO(any): Possibly could improve this with a heuristic.
-#if CONFIG_FILEOPTIONS
- if (cm->options && !cm->options->ext_partition) return BLOCK_64X64;
-#endif
-
- // When superres / resize is on, 'cm->width / height' can change between
- // calls, so we don't apply this heuristic there. Also, this heuristic gives
- // compression gain for speed >= 2 only.
- if (cpi->oxcf.superres_mode == SUPERRES_NONE &&
- cpi->oxcf.resize_mode == RESIZE_NONE && cpi->oxcf.speed >= 2) {
- return (cm->width >= 480 && cm->height >= 360) ? BLOCK_128X128
- : BLOCK_64X64;
- }
-
- return BLOCK_128X128;
-}
-
-static void setup_frame(AV1_COMP *cpi) {
- AV1_COMMON *const cm = &cpi->common;
- // Set up entropy context depending on frame type. The decoder mandates
- // the use of the default context, index 0, for keyframes and inter
- // frames where the error_resilient_mode or intra_only flag is set. For
- // other inter-frames the encoder currently uses only two contexts;
- // context 1 for ALTREF frames and context 0 for the others.
-
- cm->primary_ref_frame = PRIMARY_REF_NONE;
- if (frame_is_intra_only(cm) || cm->error_resilient_mode ||
- cm->force_primary_ref_none) {
- av1_setup_past_independence(cm);
- for (int i = 0; i < REF_FRAMES; i++) {
- cm->fb_of_context_type[i] = -1;
- }
- cm->fb_of_context_type[REGULAR_FRAME] =
- cm->show_frame ? get_ref_frame_map_idx(cpi, GOLDEN_FRAME)
- : get_ref_frame_map_idx(cpi, ALTREF_FRAME);
- cm->frame_context_idx = REGULAR_FRAME;
- } else {
- const GF_GROUP *gf_group = &cpi->twopass.gf_group;
- if (gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE)
- cm->frame_context_idx = EXT_ARF_FRAME;
- else if (cpi->refresh_alt_ref_frame)
- cm->frame_context_idx = ARF_FRAME;
- else if (cpi->rc.is_src_frame_alt_ref)
- cm->frame_context_idx = OVERLAY_FRAME;
- else if (cpi->refresh_golden_frame)
- cm->frame_context_idx = GLD_FRAME;
- else if (cpi->refresh_bwd_ref_frame)
- cm->frame_context_idx = BRF_FRAME;
- else
- cm->frame_context_idx = REGULAR_FRAME;
- int wanted_fb = cm->fb_of_context_type[cm->frame_context_idx];
- for (int ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
- int fb = get_ref_frame_map_idx(cpi, ref_frame);
- if (fb == wanted_fb) {
- cm->primary_ref_frame = ref_frame - LAST_FRAME;
- }
- }
- }
-
- if (cm->frame_type == KEY_FRAME && cm->show_frame) {
- cpi->refresh_golden_frame = 1;
- cpi->refresh_alt_ref_frame = 1;
- av1_zero(cpi->interp_filter_selected);
- set_sb_size(&cm->seq_params, select_sb_size(cpi));
- set_use_reference_buffer(cm, 0);
- } else if (frame_is_sframe(cm)) {
- cpi->refresh_golden_frame = 1;
- cpi->refresh_alt_ref_frame = 1;
- av1_zero(cpi->interp_filter_selected);
- set_sb_size(&cm->seq_params, select_sb_size(cpi));
- } else {
- if (cm->primary_ref_frame == PRIMARY_REF_NONE ||
- cm->frame_refs[cm->primary_ref_frame].idx < 0) {
- av1_setup_past_independence(cm);
- cm->seg.update_map = 1;
- cm->seg.update_data = 1;
- } else {
- *cm->fc = cm->frame_contexts[cm->frame_refs[cm->primary_ref_frame].idx];
- }
- av1_zero(cpi->interp_filter_selected[0]);
- }
-
- cm->prev_frame = get_prev_frame(cm);
- cpi->vaq_refresh = 0;
-}
-
-static void enc_setup_mi(AV1_COMMON *cm) {
- int i;
- int mi_rows_sb_aligned = calc_mi_size(cm->mi_rows);
- cm->mi = cm->mip;
- memset(cm->mip, 0, cm->mi_stride * mi_rows_sb_aligned * sizeof(*cm->mip));
- cm->prev_mi = cm->prev_mip;
- // Clear top border row
- memset(cm->prev_mip, 0, sizeof(*cm->prev_mip) * cm->mi_stride);
- // Clear left border column
- for (i = 0; i < mi_rows_sb_aligned; ++i)
- memset(&cm->prev_mip[i * cm->mi_stride], 0, sizeof(*cm->prev_mip));
- cm->mi_grid_visible = cm->mi_grid_base;
- cm->prev_mi_grid_visible = cm->prev_mi_grid_base;
-
- memset(cm->mi_grid_base, 0,
- cm->mi_stride * mi_rows_sb_aligned * sizeof(*cm->mi_grid_base));
-}
-
-static int enc_alloc_mi(AV1_COMMON *cm, int mi_size) {
- cm->mip = aom_calloc(mi_size, sizeof(*cm->mip));
- if (!cm->mip) return 1;
- cm->prev_mip = aom_calloc(mi_size, sizeof(*cm->prev_mip));
- if (!cm->prev_mip) return 1;
- cm->mi_alloc_size = mi_size;
-
- cm->mi_grid_base =
- (MB_MODE_INFO **)aom_calloc(mi_size, sizeof(MB_MODE_INFO *));
- if (!cm->mi_grid_base) return 1;
- cm->prev_mi_grid_base =
- (MB_MODE_INFO **)aom_calloc(mi_size, sizeof(MB_MODE_INFO *));
- if (!cm->prev_mi_grid_base) return 1;
-
- return 0;
-}
-
-static void enc_free_mi(AV1_COMMON *cm) {
- aom_free(cm->mip);
- cm->mip = NULL;
- aom_free(cm->prev_mip);
- cm->prev_mip = NULL;
- aom_free(cm->mi_grid_base);
- cm->mi_grid_base = NULL;
- aom_free(cm->prev_mi_grid_base);
- cm->prev_mi_grid_base = NULL;
- cm->mi_alloc_size = 0;
-}
-
-static void swap_mi_and_prev_mi(AV1_COMMON *cm) {
- // Current mip will be the prev_mip for the next frame.
- MB_MODE_INFO **temp_base = cm->prev_mi_grid_base;
- MB_MODE_INFO *temp = cm->prev_mip;
- cm->prev_mip = cm->mip;
- cm->mip = temp;
-
- // Update the upper left visible macroblock ptrs.
- cm->mi = cm->mip;
- cm->prev_mi = cm->prev_mip;
-
- cm->prev_mi_grid_base = cm->mi_grid_base;
- cm->mi_grid_base = temp_base;
- cm->mi_grid_visible = cm->mi_grid_base;
- cm->prev_mi_grid_visible = cm->prev_mi_grid_base;
-}
-
-void av1_initialize_enc(void) {
- av1_rtcd();
- aom_dsp_rtcd();
- aom_scale_rtcd();
- av1_init_intra_predictors();
- av1_init_me_luts();
- av1_rc_init_minq_luts();
- av1_init_wedge_masks();
-}
-
-static void dealloc_context_buffers_ext(AV1_COMP *cpi) {
- if (cpi->mbmi_ext_base) {
- aom_free(cpi->mbmi_ext_base);
- cpi->mbmi_ext_base = NULL;
- }
-}
-
-static void alloc_context_buffers_ext(AV1_COMP *cpi) {
- AV1_COMMON *cm = &cpi->common;
- int mi_size = cm->mi_cols * cm->mi_rows;
-
- dealloc_context_buffers_ext(cpi);
- CHECK_MEM_ERROR(cm, cpi->mbmi_ext_base,
- aom_calloc(mi_size, sizeof(*cpi->mbmi_ext_base)));
-}
-
-static void update_film_grain_parameters(struct AV1_COMP *cpi,
- const AV1EncoderConfig *oxcf) {
- AV1_COMMON *const cm = &cpi->common;
- cpi->oxcf = *oxcf;
-
- if (cpi->film_grain_table) {
- aom_film_grain_table_free(cpi->film_grain_table);
- aom_free(cpi->film_grain_table);
- cpi->film_grain_table = NULL;
- }
-
- if (oxcf->film_grain_test_vector) {
- cm->seq_params.film_grain_params_present = 1;
- if (cm->frame_type == KEY_FRAME) {
- memcpy(&cm->film_grain_params,
- film_grain_test_vectors + oxcf->film_grain_test_vector - 1,
- sizeof(cm->film_grain_params));
-
- cm->film_grain_params.bit_depth = cm->seq_params.bit_depth;
- if (cm->seq_params.color_range == AOM_CR_FULL_RANGE) {
- cm->film_grain_params.clip_to_restricted_range = 0;
- }
- }
- } else if (oxcf->film_grain_table_filename) {
- cpi->film_grain_table = aom_malloc(sizeof(*cpi->film_grain_table));
- memset(cpi->film_grain_table, 0, sizeof(aom_film_grain_table_t));
-
- aom_film_grain_table_read(cpi->film_grain_table,
- oxcf->film_grain_table_filename, &cm->error);
- } else {
- cm->seq_params.film_grain_params_present = 0;
- memset(&cm->film_grain_params, 0, sizeof(cm->film_grain_params));
- }
-}
-
-static void dealloc_compressor_data(AV1_COMP *cpi) {
- AV1_COMMON *const cm = &cpi->common;
- const int num_planes = av1_num_planes(cm);
-
- dealloc_context_buffers_ext(cpi);
-
- aom_free(cpi->tile_data);
- cpi->tile_data = NULL;
-
- // Delete sementation map
- aom_free(cpi->segmentation_map);
- cpi->segmentation_map = NULL;
-
- av1_cyclic_refresh_free(cpi->cyclic_refresh);
- cpi->cyclic_refresh = NULL;
-
- aom_free(cpi->active_map.map);
- cpi->active_map.map = NULL;
-
- aom_free(cpi->td.mb.above_pred_buf);
- cpi->td.mb.above_pred_buf = NULL;
-
- aom_free(cpi->td.mb.left_pred_buf);
- cpi->td.mb.left_pred_buf = NULL;
-
- aom_free(cpi->td.mb.wsrc_buf);
- cpi->td.mb.wsrc_buf = NULL;
-
- for (int i = 0; i < 2; i++)
- for (int j = 0; j < 2; j++) {
- aom_free(cpi->td.mb.hash_value_buffer[i][j]);
- cpi->td.mb.hash_value_buffer[i][j] = NULL;
- }
- aom_free(cpi->td.mb.mask_buf);
- cpi->td.mb.mask_buf = NULL;
-
- aom_free(cm->tpl_mvs);
- cm->tpl_mvs = NULL;
-
- av1_free_ref_frame_buffers(cm->buffer_pool);
- av1_free_txb_buf(cpi);
- av1_free_context_buffers(cm);
-
- aom_free_frame_buffer(&cpi->last_frame_uf);
- av1_free_restoration_buffers(cm);
- aom_free_frame_buffer(&cpi->trial_frame_rst);
- aom_free_frame_buffer(&cpi->scaled_source);
- aom_free_frame_buffer(&cpi->scaled_last_source);
- aom_free_frame_buffer(&cpi->alt_ref_buffer);
- av1_lookahead_destroy(cpi->lookahead);
-
- aom_free(cpi->tile_tok[0][0]);
- cpi->tile_tok[0][0] = 0;
-
- aom_free(cpi->tplist[0][0]);
- cpi->tplist[0][0] = NULL;
-
- av1_free_pc_tree(&cpi->td, num_planes);
-
- aom_free(cpi->td.mb.palette_buffer);
-
- aom_free(cpi->td.mb.tmp_conv_dst);
- for (int j = 0; j < 2; ++j) {
- aom_free(cpi->td.mb.tmp_obmc_bufs[j]);
- }
-
-#if CONFIG_DENOISE
- if (cpi->denoise_and_model) {
- aom_denoise_and_model_free(cpi->denoise_and_model);
- cpi->denoise_and_model = NULL;
- }
-#endif
- if (cpi->film_grain_table) {
- aom_film_grain_table_free(cpi->film_grain_table);
- cpi->film_grain_table = NULL;
- }
-}
-
-static void save_coding_context(AV1_COMP *cpi) {
- CODING_CONTEXT *const cc = &cpi->coding_context;
- AV1_COMMON *cm = &cpi->common;
-
- // Stores a snapshot of key state variables which can subsequently be
- // restored with a call to av1_restore_coding_context. These functions are
- // intended for use in a re-code loop in av1_compress_frame where the
- // quantizer value is adjusted between loop iterations.
- av1_copy(cc->nmv_vec_cost, cpi->td.mb.nmv_vec_cost);
- av1_copy(cc->nmv_costs, cpi->nmv_costs);
- av1_copy(cc->nmv_costs_hp, cpi->nmv_costs_hp);
-
- cc->fc = *cm->fc;
-}
-
-static void restore_coding_context(AV1_COMP *cpi) {
- CODING_CONTEXT *const cc = &cpi->coding_context;
- AV1_COMMON *cm = &cpi->common;
-
- // Restore key state variables to the snapshot state stored in the
- // previous call to av1_save_coding_context.
- av1_copy(cpi->td.mb.nmv_vec_cost, cc->nmv_vec_cost);
- av1_copy(cpi->nmv_costs, cc->nmv_costs);
- av1_copy(cpi->nmv_costs_hp, cc->nmv_costs_hp);
-
- *cm->fc = cc->fc;
-}
-
-static void configure_static_seg_features(AV1_COMP *cpi) {
- AV1_COMMON *const cm = &cpi->common;
- const RATE_CONTROL *const rc = &cpi->rc;
- struct segmentation *const seg = &cm->seg;
-
- int high_q = (int)(rc->avg_q > 48.0);
- int qi_delta;
-
- // Disable and clear down for KF
- if (cm->frame_type == KEY_FRAME) {
- // Clear down the global segmentation map
- memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols);
- seg->update_map = 0;
- seg->update_data = 0;
- cpi->static_mb_pct = 0;
-
- // Disable segmentation
- av1_disable_segmentation(seg);
-
- // Clear down the segment features.
- av1_clearall_segfeatures(seg);
- } else if (cpi->refresh_alt_ref_frame) {
- // If this is an alt ref frame
- // Clear down the global segmentation map
- memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols);
- seg->update_map = 0;
- seg->update_data = 0;
- cpi->static_mb_pct = 0;
-
- // Disable segmentation and individual segment features by default
- av1_disable_segmentation(seg);
- av1_clearall_segfeatures(seg);
-
- // Scan frames from current to arf frame.
- // This function re-enables segmentation if appropriate.
- av1_update_mbgraph_stats(cpi);
-
- // If segmentation was enabled set those features needed for the
- // arf itself.
- if (seg->enabled) {
- seg->update_map = 1;
- seg->update_data = 1;
-
- qi_delta = av1_compute_qdelta(rc, rc->avg_q, rc->avg_q * 0.875,
- cm->seq_params.bit_depth);
- av1_set_segdata(seg, 1, SEG_LVL_ALT_Q, qi_delta - 2);
- av1_set_segdata(seg, 1, SEG_LVL_ALT_LF_Y_H, -2);
- av1_set_segdata(seg, 1, SEG_LVL_ALT_LF_Y_V, -2);
- av1_set_segdata(seg, 1, SEG_LVL_ALT_LF_U, -2);
- av1_set_segdata(seg, 1, SEG_LVL_ALT_LF_V, -2);
-
- av1_enable_segfeature(seg, 1, SEG_LVL_ALT_LF_Y_H);
- av1_enable_segfeature(seg, 1, SEG_LVL_ALT_LF_Y_V);
- av1_enable_segfeature(seg, 1, SEG_LVL_ALT_LF_U);
- av1_enable_segfeature(seg, 1, SEG_LVL_ALT_LF_V);
-
- av1_enable_segfeature(seg, 1, SEG_LVL_ALT_Q);
- }
- } else if (seg->enabled) {
- // All other frames if segmentation has been enabled
-
- // First normal frame in a valid gf or alt ref group
- if (rc->frames_since_golden == 0) {
- // Set up segment features for normal frames in an arf group
- if (rc->source_alt_ref_active) {
- seg->update_map = 0;
- seg->update_data = 1;
-
- qi_delta = av1_compute_qdelta(rc, rc->avg_q, rc->avg_q * 1.125,
- cm->seq_params.bit_depth);
- av1_set_segdata(seg, 1, SEG_LVL_ALT_Q, qi_delta + 2);
- av1_enable_segfeature(seg, 1, SEG_LVL_ALT_Q);
-
- av1_set_segdata(seg, 1, SEG_LVL_ALT_LF_Y_H, -2);
- av1_set_segdata(seg, 1, SEG_LVL_ALT_LF_Y_V, -2);
- av1_set_segdata(seg, 1, SEG_LVL_ALT_LF_U, -2);
- av1_set_segdata(seg, 1, SEG_LVL_ALT_LF_V, -2);
-
- av1_enable_segfeature(seg, 1, SEG_LVL_ALT_LF_Y_H);
- av1_enable_segfeature(seg, 1, SEG_LVL_ALT_LF_Y_V);
- av1_enable_segfeature(seg, 1, SEG_LVL_ALT_LF_U);
- av1_enable_segfeature(seg, 1, SEG_LVL_ALT_LF_V);
-
- // Segment coding disabled for compred testing
- if (high_q || (cpi->static_mb_pct == 100)) {
- av1_set_segdata(seg, 1, SEG_LVL_REF_FRAME, ALTREF_FRAME);
- av1_enable_segfeature(seg, 1, SEG_LVL_REF_FRAME);
- av1_enable_segfeature(seg, 1, SEG_LVL_SKIP);
- }
- } else {
- // Disable segmentation and clear down features if alt ref
- // is not active for this group
-
- av1_disable_segmentation(seg);
-
- memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols);
-
- seg->update_map = 0;
- seg->update_data = 0;
-
- av1_clearall_segfeatures(seg);
- }
- } else if (rc->is_src_frame_alt_ref) {
- // Special case where we are coding over the top of a previous
- // alt ref frame.
- // Segment coding disabled for compred testing
-
- // Enable ref frame features for segment 0 as well
- av1_enable_segfeature(seg, 0, SEG_LVL_REF_FRAME);
- av1_enable_segfeature(seg, 1, SEG_LVL_REF_FRAME);
-
- // All mbs should use ALTREF_FRAME
- av1_clear_segdata(seg, 0, SEG_LVL_REF_FRAME);
- av1_set_segdata(seg, 0, SEG_LVL_REF_FRAME, ALTREF_FRAME);
- av1_clear_segdata(seg, 1, SEG_LVL_REF_FRAME);
- av1_set_segdata(seg, 1, SEG_LVL_REF_FRAME, ALTREF_FRAME);
-
- // Skip all MBs if high Q (0,0 mv and skip coeffs)
- if (high_q) {
- av1_enable_segfeature(seg, 0, SEG_LVL_SKIP);
- av1_enable_segfeature(seg, 1, SEG_LVL_SKIP);
- }
- // Enable data update
- seg->update_data = 1;
- } else {
- // All other frames.
-
- // No updates.. leave things as they are.
- seg->update_map = 0;
- seg->update_data = 0;
- }
- }
-}
-
-static void update_reference_segmentation_map(AV1_COMP *cpi) {
- AV1_COMMON *const cm = &cpi->common;
- MB_MODE_INFO **mi_4x4_ptr = cm->mi_grid_visible;
- uint8_t *cache_ptr = cm->current_frame_seg_map;
- int row, col;
-
- for (row = 0; row < cm->mi_rows; row++) {
- MB_MODE_INFO **mi_4x4 = mi_4x4_ptr;
- uint8_t *cache = cache_ptr;
- for (col = 0; col < cm->mi_cols; col++, mi_4x4++, cache++)
- cache[0] = mi_4x4[0]->segment_id;
- mi_4x4_ptr += cm->mi_stride;
- cache_ptr += cm->mi_cols;
- }
-}
-
-static void alloc_raw_frame_buffers(AV1_COMP *cpi) {
- AV1_COMMON *cm = &cpi->common;
- const SequenceHeader *const seq_params = &cm->seq_params;
- const AV1EncoderConfig *oxcf = &cpi->oxcf;
-
- if (!cpi->lookahead)
- cpi->lookahead =
- av1_lookahead_init(oxcf->width, oxcf->height, seq_params->subsampling_x,
- seq_params->subsampling_y,
- seq_params->use_highbitdepth, oxcf->lag_in_frames);
- if (!cpi->lookahead)
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
- "Failed to allocate lag buffers");
-
- // TODO(agrange) Check if ARF is enabled and skip allocation if not.
- if (aom_realloc_frame_buffer(
- &cpi->alt_ref_buffer, oxcf->width, oxcf->height,
- seq_params->subsampling_x, seq_params->subsampling_y,
- seq_params->use_highbitdepth, AOM_BORDER_IN_PIXELS,
- cm->byte_alignment, NULL, NULL, NULL))
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
- "Failed to allocate altref buffer");
-}
-
-static void alloc_util_frame_buffers(AV1_COMP *cpi) {
- AV1_COMMON *const cm = &cpi->common;
- const SequenceHeader *const seq_params = &cm->seq_params;
- if (aom_realloc_frame_buffer(
- &cpi->last_frame_uf, cm->width, cm->height, seq_params->subsampling_x,
- seq_params->subsampling_y, seq_params->use_highbitdepth,
- AOM_BORDER_IN_PIXELS, cm->byte_alignment, NULL, NULL, NULL))
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
- "Failed to allocate last frame buffer");
-
- if (aom_realloc_frame_buffer(
- &cpi->trial_frame_rst, cm->superres_upscaled_width,
- cm->superres_upscaled_height, seq_params->subsampling_x,
- seq_params->subsampling_y, seq_params->use_highbitdepth,
- AOM_BORDER_IN_PIXELS, cm->byte_alignment, NULL, NULL, NULL))
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
- "Failed to allocate trial restored frame buffer");
-
- if (aom_realloc_frame_buffer(
- &cpi->scaled_source, cm->width, cm->height, seq_params->subsampling_x,
- seq_params->subsampling_y, seq_params->use_highbitdepth,
- AOM_BORDER_IN_PIXELS, cm->byte_alignment, NULL, NULL, NULL))
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
- "Failed to allocate scaled source buffer");
-
- if (aom_realloc_frame_buffer(
- &cpi->scaled_last_source, cm->width, cm->height,
- seq_params->subsampling_x, seq_params->subsampling_y,
- seq_params->use_highbitdepth, AOM_BORDER_IN_PIXELS,
- cm->byte_alignment, NULL, NULL, NULL))
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
- "Failed to allocate scaled last source buffer");
-}
-
-static void alloc_compressor_data(AV1_COMP *cpi) {
- AV1_COMMON *cm = &cpi->common;
- const int num_planes = av1_num_planes(cm);
-
- av1_alloc_context_buffers(cm, cm->width, cm->height);
-
- int mi_rows_aligned_to_sb =
- ALIGN_POWER_OF_TWO(cm->mi_rows, cm->seq_params.mib_size_log2);
- int sb_rows = mi_rows_aligned_to_sb >> cm->seq_params.mib_size_log2;
-
- av1_alloc_txb_buf(cpi);
-
- alloc_context_buffers_ext(cpi);
-
- aom_free(cpi->tile_tok[0][0]);
-
- {
- unsigned int tokens =
- get_token_alloc(cm->mb_rows, cm->mb_cols, MAX_SB_SIZE_LOG2, num_planes);
- CHECK_MEM_ERROR(cm, cpi->tile_tok[0][0],
- aom_calloc(tokens, sizeof(*cpi->tile_tok[0][0])));
- }
- aom_free(cpi->tplist[0][0]);
-
- CHECK_MEM_ERROR(cm, cpi->tplist[0][0],
- aom_calloc(sb_rows * MAX_TILE_ROWS * MAX_TILE_COLS,
- sizeof(*cpi->tplist[0][0])));
-
- av1_setup_pc_tree(&cpi->common, &cpi->td);
-}
-
-void av1_new_framerate(AV1_COMP *cpi, double framerate) {
- cpi->framerate = framerate < 0.1 ? 30 : framerate;
- av1_rc_update_framerate(cpi, cpi->common.width, cpi->common.height);
-}
-
-static void set_tile_info(AV1_COMP *cpi) {
- AV1_COMMON *const cm = &cpi->common;
- int i, start_sb;
-
- av1_get_tile_limits(cm);
-
- // configure tile columns
- if (cpi->oxcf.tile_width_count == 0 || cpi->oxcf.tile_height_count == 0) {
- cm->uniform_tile_spacing_flag = 1;
- cm->log2_tile_cols = AOMMAX(cpi->oxcf.tile_columns, cm->min_log2_tile_cols);
- cm->log2_tile_cols = AOMMIN(cm->log2_tile_cols, cm->max_log2_tile_cols);
- } else {
- int mi_cols = ALIGN_POWER_OF_TWO(cm->mi_cols, cm->seq_params.mib_size_log2);
- int sb_cols = mi_cols >> cm->seq_params.mib_size_log2;
- int size_sb, j = 0;
- cm->uniform_tile_spacing_flag = 0;
- for (i = 0, start_sb = 0; start_sb < sb_cols && i < MAX_TILE_COLS; i++) {
- cm->tile_col_start_sb[i] = start_sb;
- size_sb = cpi->oxcf.tile_widths[j++];
- if (j >= cpi->oxcf.tile_width_count) j = 0;
- start_sb += AOMMIN(size_sb, cm->max_tile_width_sb);
- }
- cm->tile_cols = i;
- cm->tile_col_start_sb[i] = sb_cols;
- }
- av1_calculate_tile_cols(cm);
-
- // configure tile rows
- if (cm->uniform_tile_spacing_flag) {
- cm->log2_tile_rows = AOMMAX(cpi->oxcf.tile_rows, cm->min_log2_tile_rows);
- cm->log2_tile_rows = AOMMIN(cm->log2_tile_rows, cm->max_log2_tile_rows);
- } else {
- int mi_rows = ALIGN_POWER_OF_TWO(cm->mi_rows, cm->seq_params.mib_size_log2);
- int sb_rows = mi_rows >> cm->seq_params.mib_size_log2;
- int size_sb, j = 0;
- for (i = 0, start_sb = 0; start_sb < sb_rows && i < MAX_TILE_ROWS; i++) {
- cm->tile_row_start_sb[i] = start_sb;
- size_sb = cpi->oxcf.tile_heights[j++];
- if (j >= cpi->oxcf.tile_height_count) j = 0;
- start_sb += AOMMIN(size_sb, cm->max_tile_height_sb);
- }
- cm->tile_rows = i;
- cm->tile_row_start_sb[i] = sb_rows;
- }
- av1_calculate_tile_rows(cm);
-}
-
-static void update_frame_size(AV1_COMP *cpi) {
- AV1_COMMON *const cm = &cpi->common;
- MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
-
- av1_set_mb_mi(cm, cm->width, cm->height);
- av1_init_context_buffers(cm);
- av1_init_macroblockd(cm, xd, NULL);
- memset(cpi->mbmi_ext_base, 0,
- cm->mi_rows * cm->mi_cols * sizeof(*cpi->mbmi_ext_base));
- set_tile_info(cpi);
-}
-
-static void init_buffer_indices(AV1_COMP *cpi) {
- int fb_idx;
- for (fb_idx = 0; fb_idx < REF_FRAMES; ++fb_idx)
- cpi->ref_fb_idx[fb_idx] = fb_idx;
- cpi->rate_index = 0;
- cpi->rate_size = 0;
- cpi->cur_poc = -1;
-}
-
-static INLINE int does_level_match(int width, int height, double fps,
- int lvl_width, int lvl_height,
- double lvl_fps, int lvl_dim_mult) {
- const int64_t lvl_luma_pels = lvl_width * lvl_height;
- const double lvl_display_sample_rate = lvl_luma_pels * lvl_fps;
- const int64_t luma_pels = width * height;
- const double display_sample_rate = luma_pels * fps;
- return luma_pels <= lvl_luma_pels &&
- display_sample_rate <= lvl_display_sample_rate &&
- width <= lvl_width * lvl_dim_mult &&
- height <= lvl_height * lvl_dim_mult;
-}
-
-static void set_bitstream_level_tier(SequenceHeader *seq, AV1_COMMON *cm,
- const AV1EncoderConfig *oxcf) {
- // TODO(any): This is a placeholder function that only addresses dimensions
- // and max display sample rates.
- // Need to add checks for max bit rate, max decoded luma sample rate, header
- // rate, etc. that are not covered by this function.
- (void)oxcf;
- BitstreamLevel bl = { 9, 3 };
- if (does_level_match(oxcf->width, oxcf->height, oxcf->init_framerate, 512,
- 288, 30.0, 4)) {
- bl.major = 2;
- bl.minor = 0;
- } else if (does_level_match(oxcf->width, oxcf->height, oxcf->init_framerate,
- 704, 396, 30.0, 4)) {
- bl.major = 2;
- bl.minor = 1;
- } else if (does_level_match(oxcf->width, oxcf->height, oxcf->init_framerate,
- 1088, 612, 30.0, 4)) {
- bl.major = 3;
- bl.minor = 0;
- } else if (does_level_match(oxcf->width, oxcf->height, oxcf->init_framerate,
- 1376, 774, 30.0, 4)) {
- bl.major = 3;
- bl.minor = 1;
- } else if (does_level_match(oxcf->width, oxcf->height, oxcf->init_framerate,
- 2048, 1152, 30.0, 3)) {
- bl.major = 4;
- bl.minor = 0;
- } else if (does_level_match(oxcf->width, oxcf->height, oxcf->init_framerate,
- 2048, 1152, 60.0, 3)) {
- bl.major = 4;
- bl.minor = 1;
- } else if (does_level_match(oxcf->width, oxcf->height, oxcf->init_framerate,
- 4096, 2176, 30.0, 2)) {
- bl.major = 5;
- bl.minor = 0;
- } else if (does_level_match(oxcf->width, oxcf->height, oxcf->init_framerate,
- 4096, 2176, 60.0, 2)) {
- bl.major = 5;
- bl.minor = 1;
- } else if (does_level_match(oxcf->width, oxcf->height, oxcf->init_framerate,
- 4096, 2176, 120.0, 2)) {
- bl.major = 5;
- bl.minor = 2;
- } else if (does_level_match(oxcf->width, oxcf->height, oxcf->init_framerate,
- 8192, 4352, 30.0, 2)) {
- bl.major = 6;
- bl.minor = 0;
- } else if (does_level_match(oxcf->width, oxcf->height, oxcf->init_framerate,
- 8192, 4352, 60.0, 2)) {
- bl.major = 6;
- bl.minor = 1;
- } else if (does_level_match(oxcf->width, oxcf->height, oxcf->init_framerate,
- 8192, 4352, 120.0, 2)) {
- bl.major = 6;
- bl.minor = 2;
- } else if (does_level_match(oxcf->width, oxcf->height, oxcf->init_framerate,
- 16384, 8704, 30.0, 2)) {
- bl.major = 7;
- bl.minor = 0;
- } else if (does_level_match(oxcf->width, oxcf->height, oxcf->init_framerate,
- 16384, 8704, 60.0, 2)) {
- bl.major = 7;
- bl.minor = 1;
- } else if (does_level_match(oxcf->width, oxcf->height, oxcf->init_framerate,
- 16384, 8704, 120.0, 2)) {
- bl.major = 7;
- bl.minor = 2;
- }
- for (int i = 0; i < MAX_NUM_OPERATING_POINTS; ++i) {
- seq->level[i] = bl;
- seq->tier[i] = 0; // setting main tier by default
- // Set the maximum parameters for bitrate and buffer size for this profile,
- // level, and tier
- cm->op_params[i].bitrate = max_level_bitrate(
- cm->seq_params.profile, major_minor_to_seq_level_idx(seq->level[i]),
- seq->tier[i]);
- // Level with seq_level_idx = 31 returns a high "dummy" bitrate to pass the
- // check
- if (cm->op_params[i].bitrate == 0)
- aom_internal_error(
- &cm->error, AOM_CODEC_UNSUP_BITSTREAM,
- "AV1 does not support this combination of profile, level, and tier.");
- // Buffer size in bits/s is bitrate in bits/s * 1 s
- cm->op_params[i].buffer_size = cm->op_params[i].bitrate;
- }
-}
-
-static void init_seq_coding_tools(SequenceHeader *seq, AV1_COMMON *cm,
- const AV1EncoderConfig *oxcf) {
- seq->still_picture = (oxcf->limit == 1);
- seq->reduced_still_picture_hdr = seq->still_picture;
- seq->reduced_still_picture_hdr &= !oxcf->full_still_picture_hdr;
- seq->force_screen_content_tools = 2;
- seq->force_integer_mv = 2;
- seq->enable_order_hint = oxcf->enable_order_hint;
- seq->frame_id_numbers_present_flag = oxcf->large_scale_tile;
- if (seq->still_picture && seq->reduced_still_picture_hdr) {
- seq->enable_order_hint = 0;
- seq->frame_id_numbers_present_flag = 0;
- seq->force_screen_content_tools = 2;
- seq->force_integer_mv = 2;
- }
- seq->order_hint_bits_minus_1 =
- seq->enable_order_hint ? DEFAULT_EXPLICIT_ORDER_HINT_BITS - 1 : -1;
-
- seq->enable_dual_filter = oxcf->enable_dual_filter;
- seq->enable_jnt_comp = oxcf->enable_jnt_comp;
- seq->enable_jnt_comp &= seq->enable_order_hint;
- seq->enable_ref_frame_mvs = oxcf->enable_ref_frame_mvs;
- seq->enable_ref_frame_mvs &= seq->enable_order_hint;
- seq->enable_superres = oxcf->enable_superres;
- seq->enable_cdef = oxcf->enable_cdef;
- seq->enable_restoration = oxcf->enable_restoration;
- seq->enable_warped_motion = oxcf->enable_warped_motion;
- seq->enable_interintra_compound = 1;
- seq->enable_masked_compound = 1;
- seq->enable_intra_edge_filter = 1;
- seq->enable_filter_intra = 1;
-
- set_bitstream_level_tier(seq, cm, oxcf);
-
- if (seq->operating_points_cnt_minus_1 == 0) {
- seq->operating_point_idc[0] = 0;
- } else {
- // Set operating_point_idc[] such that for the i-th operating point the
- // first (operating_points_cnt-i) spatial layers and the first temporal
- // layer are decoded Note that highest quality operating point should come
- // first
- for (int i = 0; i < seq->operating_points_cnt_minus_1 + 1; i++)
- seq->operating_point_idc[i] =
- (~(~0u << (seq->operating_points_cnt_minus_1 + 1 - i)) << 8) | 1;
- }
-}
-
-static void init_config(struct AV1_COMP *cpi, AV1EncoderConfig *oxcf) {
- AV1_COMMON *const cm = &cpi->common;
-
- cpi->oxcf = *oxcf;
- cpi->framerate = oxcf->init_framerate;
-
- cm->seq_params.profile = oxcf->profile;
- cm->seq_params.bit_depth = oxcf->bit_depth;
- cm->seq_params.use_highbitdepth = oxcf->use_highbitdepth;
- cm->seq_params.color_primaries = oxcf->color_primaries;
- cm->seq_params.transfer_characteristics = oxcf->transfer_characteristics;
- cm->seq_params.matrix_coefficients = oxcf->matrix_coefficients;
- cm->seq_params.monochrome = oxcf->monochrome;
- cm->seq_params.chroma_sample_position = oxcf->chroma_sample_position;
- cm->seq_params.color_range = oxcf->color_range;
- cm->timing_info_present = oxcf->timing_info_present;
- cm->timing_info.num_units_in_display_tick =
- oxcf->timing_info.num_units_in_display_tick;
- cm->timing_info.time_scale = oxcf->timing_info.time_scale;
- cm->timing_info.equal_picture_interval =
- oxcf->timing_info.equal_picture_interval;
- cm->timing_info.num_ticks_per_picture =
- oxcf->timing_info.num_ticks_per_picture;
-
- cm->seq_params.display_model_info_present_flag =
- oxcf->display_model_info_present_flag;
- cm->seq_params.decoder_model_info_present_flag =
- oxcf->decoder_model_info_present_flag;
- if (oxcf->decoder_model_info_present_flag) {
- // set the decoder model parameters in schedule mode
- cm->buffer_model.num_units_in_decoding_tick =
- oxcf->buffer_model.num_units_in_decoding_tick;
- cm->buffer_removal_time_present = 1;
- set_aom_dec_model_info(&cm->buffer_model);
- set_dec_model_op_parameters(&cm->op_params[0]);
- } else if (cm->timing_info_present &&
- cm->timing_info.equal_picture_interval &&
- !cm->seq_params.decoder_model_info_present_flag) {
- // set the decoder model parameters in resource availability mode
- set_resource_availability_parameters(&cm->op_params[0]);
- } else {
- cm->op_params[0].initial_display_delay =
- 10; // Default value (not signaled)
- }
-
- if (cm->seq_params.monochrome) {
- cm->seq_params.subsampling_x = 1;
- cm->seq_params.subsampling_y = 1;
- } else if (cm->seq_params.color_primaries == AOM_CICP_CP_BT_709 &&
- cm->seq_params.transfer_characteristics == AOM_CICP_TC_SRGB &&
- cm->seq_params.matrix_coefficients == AOM_CICP_MC_IDENTITY) {
- cm->seq_params.subsampling_x = 0;
- cm->seq_params.subsampling_y = 0;
- } else {
- if (cm->seq_params.profile == 0) {
- cm->seq_params.subsampling_x = 1;
- cm->seq_params.subsampling_y = 1;
- } else if (cm->seq_params.profile == 1) {
- cm->seq_params.subsampling_x = 0;
- cm->seq_params.subsampling_y = 0;
- } else {
- if (cm->seq_params.bit_depth == AOM_BITS_12) {
- cm->seq_params.subsampling_x = oxcf->chroma_subsampling_x;
- cm->seq_params.subsampling_y = oxcf->chroma_subsampling_y;
- } else {
- cm->seq_params.subsampling_x = 1;
- cm->seq_params.subsampling_y = 0;
- }
- }
- }
-
- cm->width = oxcf->width;
- cm->height = oxcf->height;
- set_sb_size(&cm->seq_params,
- select_sb_size(cpi)); // set sb size before allocations
- alloc_compressor_data(cpi);
-
- update_film_grain_parameters(cpi, oxcf);
-
- // Single thread case: use counts in common.
- cpi->td.counts = &cpi->counts;
-
- // change includes all joint functionality
- av1_change_config(cpi, oxcf);
-
- cpi->static_mb_pct = 0;
- cpi->ref_frame_flags = 0;
-
- // Reset resize pending flags
- cpi->resize_pending_width = 0;
- cpi->resize_pending_height = 0;
-
- init_buffer_indices(cpi);
-}
-
-static void set_rc_buffer_sizes(RATE_CONTROL *rc,
- const AV1EncoderConfig *oxcf) {
- const int64_t bandwidth = oxcf->target_bandwidth;
- const int64_t starting = oxcf->starting_buffer_level_ms;
- const int64_t optimal = oxcf->optimal_buffer_level_ms;
- const int64_t maximum = oxcf->maximum_buffer_size_ms;
-
- rc->starting_buffer_level = starting * bandwidth / 1000;
- rc->optimal_buffer_level =
- (optimal == 0) ? bandwidth / 8 : optimal * bandwidth / 1000;
- rc->maximum_buffer_size =
- (maximum == 0) ? bandwidth / 8 : maximum * bandwidth / 1000;
-}
-
-#define HIGHBD_BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF, JSDAF, JSVAF) \
- cpi->fn_ptr[BT].sdf = SDF; \
- cpi->fn_ptr[BT].sdaf = SDAF; \
- cpi->fn_ptr[BT].vf = VF; \
- cpi->fn_ptr[BT].svf = SVF; \
- cpi->fn_ptr[BT].svaf = SVAF; \
- cpi->fn_ptr[BT].sdx4df = SDX4DF; \
- cpi->fn_ptr[BT].jsdaf = JSDAF; \
- cpi->fn_ptr[BT].jsvaf = JSVAF;
-
-#define MAKE_BFP_SAD_WRAPPER(fnname) \
- static unsigned int fnname##_bits8(const uint8_t *src_ptr, \
- int source_stride, \
- const uint8_t *ref_ptr, int ref_stride) { \
- return fnname(src_ptr, source_stride, ref_ptr, ref_stride); \
- } \
- static unsigned int fnname##_bits10( \
- const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \
- int ref_stride) { \
- return fnname(src_ptr, source_stride, ref_ptr, ref_stride) >> 2; \
- } \
- static unsigned int fnname##_bits12( \
- const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \
- int ref_stride) { \
- return fnname(src_ptr, source_stride, ref_ptr, ref_stride) >> 4; \
- }
-
-#define MAKE_BFP_SADAVG_WRAPPER(fnname) \
- static unsigned int fnname##_bits8( \
- const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \
- int ref_stride, const uint8_t *second_pred) { \
- return fnname(src_ptr, source_stride, ref_ptr, ref_stride, second_pred); \
- } \
- static unsigned int fnname##_bits10( \
- const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \
- int ref_stride, const uint8_t *second_pred) { \
- return fnname(src_ptr, source_stride, ref_ptr, ref_stride, second_pred) >> \
- 2; \
- } \
- static unsigned int fnname##_bits12( \
- const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \
- int ref_stride, const uint8_t *second_pred) { \
- return fnname(src_ptr, source_stride, ref_ptr, ref_stride, second_pred) >> \
- 4; \
- }
-
-#define MAKE_BFP_SAD4D_WRAPPER(fnname) \
- static void fnname##_bits8(const uint8_t *src_ptr, int source_stride, \
- const uint8_t *const ref_ptr[], int ref_stride, \
- unsigned int *sad_array) { \
- fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array); \
- } \
- static void fnname##_bits10(const uint8_t *src_ptr, int source_stride, \
- const uint8_t *const ref_ptr[], int ref_stride, \
- unsigned int *sad_array) { \
- int i; \
- fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array); \
- for (i = 0; i < 4; i++) sad_array[i] >>= 2; \
- } \
- static void fnname##_bits12(const uint8_t *src_ptr, int source_stride, \
- const uint8_t *const ref_ptr[], int ref_stride, \
- unsigned int *sad_array) { \
- int i; \
- fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array); \
- for (i = 0; i < 4; i++) sad_array[i] >>= 4; \
- }
-
-#define MAKE_BFP_JSADAVG_WRAPPER(fnname) \
- static unsigned int fnname##_bits8( \
- const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \
- int ref_stride, const uint8_t *second_pred, \
- const JNT_COMP_PARAMS *jcp_param) { \
- return fnname(src_ptr, source_stride, ref_ptr, ref_stride, second_pred, \
- jcp_param); \
- } \
- static unsigned int fnname##_bits10( \
- const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \
- int ref_stride, const uint8_t *second_pred, \
- const JNT_COMP_PARAMS *jcp_param) { \
- return fnname(src_ptr, source_stride, ref_ptr, ref_stride, second_pred, \
- jcp_param) >> \
- 2; \
- } \
- static unsigned int fnname##_bits12( \
- const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \
- int ref_stride, const uint8_t *second_pred, \
- const JNT_COMP_PARAMS *jcp_param) { \
- return fnname(src_ptr, source_stride, ref_ptr, ref_stride, second_pred, \
- jcp_param) >> \
- 4; \
- }
-
-MAKE_BFP_SAD_WRAPPER(aom_highbd_sad128x128)
-MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad128x128_avg)
-MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad128x128x4d)
-MAKE_BFP_SAD_WRAPPER(aom_highbd_sad128x64)
-MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad128x64_avg)
-MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad128x64x4d)
-MAKE_BFP_SAD_WRAPPER(aom_highbd_sad64x128)
-MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad64x128_avg)
-MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad64x128x4d)
-MAKE_BFP_SAD_WRAPPER(aom_highbd_sad32x16)
-MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad32x16_avg)
-MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad32x16x4d)
-MAKE_BFP_SAD_WRAPPER(aom_highbd_sad16x32)
-MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad16x32_avg)
-MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad16x32x4d)
-MAKE_BFP_SAD_WRAPPER(aom_highbd_sad64x32)
-MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad64x32_avg)
-MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad64x32x4d)
-MAKE_BFP_SAD_WRAPPER(aom_highbd_sad32x64)
-MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad32x64_avg)
-MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad32x64x4d)
-MAKE_BFP_SAD_WRAPPER(aom_highbd_sad32x32)
-MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad32x32_avg)
-MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad32x32x4d)
-MAKE_BFP_SAD_WRAPPER(aom_highbd_sad64x64)
-MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad64x64_avg)
-MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad64x64x4d)
-MAKE_BFP_SAD_WRAPPER(aom_highbd_sad16x16)
-MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad16x16_avg)
-MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad16x16x4d)
-MAKE_BFP_SAD_WRAPPER(aom_highbd_sad16x8)
-MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad16x8_avg)
-MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad16x8x4d)
-MAKE_BFP_SAD_WRAPPER(aom_highbd_sad8x16)
-MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad8x16_avg)
-MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad8x16x4d)
-MAKE_BFP_SAD_WRAPPER(aom_highbd_sad8x8)
-MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad8x8_avg)
-MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad8x8x4d)
-MAKE_BFP_SAD_WRAPPER(aom_highbd_sad8x4)
-MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad8x4_avg)
-MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad8x4x4d)
-MAKE_BFP_SAD_WRAPPER(aom_highbd_sad4x8)
-MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad4x8_avg)
-MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad4x8x4d)
-MAKE_BFP_SAD_WRAPPER(aom_highbd_sad4x4)
-MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad4x4_avg)
-MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad4x4x4d)
-
-MAKE_BFP_SAD_WRAPPER(aom_highbd_sad4x16)
-MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad4x16_avg)
-MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad4x16x4d)
-MAKE_BFP_SAD_WRAPPER(aom_highbd_sad16x4)
-MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad16x4_avg)
-MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad16x4x4d)
-MAKE_BFP_SAD_WRAPPER(aom_highbd_sad8x32)
-MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad8x32_avg)
-MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad8x32x4d)
-MAKE_BFP_SAD_WRAPPER(aom_highbd_sad32x8)
-MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad32x8_avg)
-MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad32x8x4d)
-MAKE_BFP_SAD_WRAPPER(aom_highbd_sad16x64)
-MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad16x64_avg)
-MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad16x64x4d)
-MAKE_BFP_SAD_WRAPPER(aom_highbd_sad64x16)
-MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad64x16_avg)
-MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad64x16x4d)
-
-MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad128x128_avg)
-MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad128x64_avg)
-MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad64x128_avg)
-MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad32x16_avg)
-MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad16x32_avg)
-MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad64x32_avg)
-MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad32x64_avg)
-MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad32x32_avg)
-MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad64x64_avg)
-MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad16x16_avg)
-MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad16x8_avg)
-MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad8x16_avg)
-MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad8x8_avg)
-MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad8x4_avg)
-MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad4x8_avg)
-MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad4x4_avg)
-MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad4x16_avg)
-MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad16x4_avg)
-MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad8x32_avg)
-MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad32x8_avg)
-MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad16x64_avg)
-MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad64x16_avg)
-
-#define HIGHBD_MBFP(BT, MCSDF, MCSVF) \
- cpi->fn_ptr[BT].msdf = MCSDF; \
- cpi->fn_ptr[BT].msvf = MCSVF;
-
-#define MAKE_MBFP_COMPOUND_SAD_WRAPPER(fnname) \
- static unsigned int fnname##_bits8( \
- const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \
- int ref_stride, const uint8_t *second_pred_ptr, const uint8_t *m, \
- int m_stride, int invert_mask) { \
- return fnname(src_ptr, source_stride, ref_ptr, ref_stride, \
- second_pred_ptr, m, m_stride, invert_mask); \
- } \
- static unsigned int fnname##_bits10( \
- const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \
- int ref_stride, const uint8_t *second_pred_ptr, const uint8_t *m, \
- int m_stride, int invert_mask) { \
- return fnname(src_ptr, source_stride, ref_ptr, ref_stride, \
- second_pred_ptr, m, m_stride, invert_mask) >> \
- 2; \
- } \
- static unsigned int fnname##_bits12( \
- const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \
- int ref_stride, const uint8_t *second_pred_ptr, const uint8_t *m, \
- int m_stride, int invert_mask) { \
- return fnname(src_ptr, source_stride, ref_ptr, ref_stride, \
- second_pred_ptr, m, m_stride, invert_mask) >> \
- 4; \
- }
-
-MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad128x128)
-MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad128x64)
-MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad64x128)
-MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad64x64)
-MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad64x32)
-MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad32x64)
-MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad32x32)
-MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad32x16)
-MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad16x32)
-MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad16x16)
-MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad16x8)
-MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad8x16)
-MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad8x8)
-MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad8x4)
-MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad4x8)
-MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad4x4)
-MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad4x16)
-MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad16x4)
-MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad8x32)
-MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad32x8)
-MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad16x64)
-MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad64x16)
-
-#define HIGHBD_OBFP(BT, OSDF, OVF, OSVF) \
- cpi->fn_ptr[BT].osdf = OSDF; \
- cpi->fn_ptr[BT].ovf = OVF; \
- cpi->fn_ptr[BT].osvf = OSVF;
-
-#define MAKE_OBFP_SAD_WRAPPER(fnname) \
- static unsigned int fnname##_bits8(const uint8_t *ref, int ref_stride, \
- const int32_t *wsrc, \
- const int32_t *msk) { \
- return fnname(ref, ref_stride, wsrc, msk); \
- } \
- static unsigned int fnname##_bits10(const uint8_t *ref, int ref_stride, \
- const int32_t *wsrc, \
- const int32_t *msk) { \
- return fnname(ref, ref_stride, wsrc, msk) >> 2; \
- } \
- static unsigned int fnname##_bits12(const uint8_t *ref, int ref_stride, \
- const int32_t *wsrc, \
- const int32_t *msk) { \
- return fnname(ref, ref_stride, wsrc, msk) >> 4; \
- }
-
-MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad128x128)
-MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad128x64)
-MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad64x128)
-MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad64x64)
-MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad64x32)
-MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad32x64)
-MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad32x32)
-MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad32x16)
-MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad16x32)
-MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad16x16)
-MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad16x8)
-MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad8x16)
-MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad8x8)
-MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad8x4)
-MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad4x8)
-MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad4x4)
-MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad4x16)
-MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad16x4)
-MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad8x32)
-MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad32x8)
-MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad16x64)
-MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad64x16)
-
-static void highbd_set_var_fns(AV1_COMP *const cpi) {
- AV1_COMMON *const cm = &cpi->common;
- if (cm->seq_params.use_highbitdepth) {
- switch (cm->seq_params.bit_depth) {
- case AOM_BITS_8:
- HIGHBD_BFP(BLOCK_64X16, aom_highbd_sad64x16_bits8,
- aom_highbd_sad64x16_avg_bits8, aom_highbd_8_variance64x16,
- aom_highbd_8_sub_pixel_variance64x16,
- aom_highbd_8_sub_pixel_avg_variance64x16,
- aom_highbd_sad64x16x4d_bits8,
- aom_highbd_jnt_sad64x16_avg_bits8,
- aom_highbd_8_jnt_sub_pixel_avg_variance64x16)
-
- HIGHBD_BFP(BLOCK_16X64, aom_highbd_sad16x64_bits8,
- aom_highbd_sad16x64_avg_bits8, aom_highbd_8_variance16x64,
- aom_highbd_8_sub_pixel_variance16x64,
- aom_highbd_8_sub_pixel_avg_variance16x64,
- aom_highbd_sad16x64x4d_bits8,
- aom_highbd_jnt_sad16x64_avg_bits8,
- aom_highbd_8_jnt_sub_pixel_avg_variance16x64)
-
- HIGHBD_BFP(
- BLOCK_32X8, aom_highbd_sad32x8_bits8, aom_highbd_sad32x8_avg_bits8,
- aom_highbd_8_variance32x8, aom_highbd_8_sub_pixel_variance32x8,
- aom_highbd_8_sub_pixel_avg_variance32x8,
- aom_highbd_sad32x8x4d_bits8, aom_highbd_jnt_sad32x8_avg_bits8,
- aom_highbd_8_jnt_sub_pixel_avg_variance32x8)
-
- HIGHBD_BFP(
- BLOCK_8X32, aom_highbd_sad8x32_bits8, aom_highbd_sad8x32_avg_bits8,
- aom_highbd_8_variance8x32, aom_highbd_8_sub_pixel_variance8x32,
- aom_highbd_8_sub_pixel_avg_variance8x32,
- aom_highbd_sad8x32x4d_bits8, aom_highbd_jnt_sad8x32_avg_bits8,
- aom_highbd_8_jnt_sub_pixel_avg_variance8x32)
-
- HIGHBD_BFP(
- BLOCK_16X4, aom_highbd_sad16x4_bits8, aom_highbd_sad16x4_avg_bits8,
- aom_highbd_8_variance16x4, aom_highbd_8_sub_pixel_variance16x4,
- aom_highbd_8_sub_pixel_avg_variance16x4,
- aom_highbd_sad16x4x4d_bits8, aom_highbd_jnt_sad16x4_avg_bits8,
- aom_highbd_8_jnt_sub_pixel_avg_variance16x4)
-
- HIGHBD_BFP(
- BLOCK_4X16, aom_highbd_sad4x16_bits8, aom_highbd_sad4x16_avg_bits8,
- aom_highbd_8_variance4x16, aom_highbd_8_sub_pixel_variance4x16,
- aom_highbd_8_sub_pixel_avg_variance4x16,
- aom_highbd_sad4x16x4d_bits8, aom_highbd_jnt_sad4x16_avg_bits8,
- aom_highbd_8_jnt_sub_pixel_avg_variance4x16)
-
- HIGHBD_BFP(BLOCK_32X16, aom_highbd_sad32x16_bits8,
- aom_highbd_sad32x16_avg_bits8, aom_highbd_8_variance32x16,
- aom_highbd_8_sub_pixel_variance32x16,
- aom_highbd_8_sub_pixel_avg_variance32x16,
- aom_highbd_sad32x16x4d_bits8,
- aom_highbd_jnt_sad32x16_avg_bits8,
- aom_highbd_8_jnt_sub_pixel_avg_variance32x16)
-
- HIGHBD_BFP(BLOCK_16X32, aom_highbd_sad16x32_bits8,
- aom_highbd_sad16x32_avg_bits8, aom_highbd_8_variance16x32,
- aom_highbd_8_sub_pixel_variance16x32,
- aom_highbd_8_sub_pixel_avg_variance16x32,
- aom_highbd_sad16x32x4d_bits8,
- aom_highbd_jnt_sad16x32_avg_bits8,
- aom_highbd_8_jnt_sub_pixel_avg_variance16x32)
-
- HIGHBD_BFP(BLOCK_64X32, aom_highbd_sad64x32_bits8,
- aom_highbd_sad64x32_avg_bits8, aom_highbd_8_variance64x32,
- aom_highbd_8_sub_pixel_variance64x32,
- aom_highbd_8_sub_pixel_avg_variance64x32,
- aom_highbd_sad64x32x4d_bits8,
- aom_highbd_jnt_sad64x32_avg_bits8,
- aom_highbd_8_jnt_sub_pixel_avg_variance64x32)
-
- HIGHBD_BFP(BLOCK_32X64, aom_highbd_sad32x64_bits8,
- aom_highbd_sad32x64_avg_bits8, aom_highbd_8_variance32x64,
- aom_highbd_8_sub_pixel_variance32x64,
- aom_highbd_8_sub_pixel_avg_variance32x64,
- aom_highbd_sad32x64x4d_bits8,
- aom_highbd_jnt_sad32x64_avg_bits8,
- aom_highbd_8_jnt_sub_pixel_avg_variance32x64)
-
- HIGHBD_BFP(BLOCK_32X32, aom_highbd_sad32x32_bits8,
- aom_highbd_sad32x32_avg_bits8, aom_highbd_8_variance32x32,
- aom_highbd_8_sub_pixel_variance32x32,
- aom_highbd_8_sub_pixel_avg_variance32x32,
- aom_highbd_sad32x32x4d_bits8,
- aom_highbd_jnt_sad32x32_avg_bits8,
- aom_highbd_8_jnt_sub_pixel_avg_variance32x32)
-
- HIGHBD_BFP(BLOCK_64X64, aom_highbd_sad64x64_bits8,
- aom_highbd_sad64x64_avg_bits8, aom_highbd_8_variance64x64,
- aom_highbd_8_sub_pixel_variance64x64,
- aom_highbd_8_sub_pixel_avg_variance64x64,
- aom_highbd_sad64x64x4d_bits8,
- aom_highbd_jnt_sad64x64_avg_bits8,
- aom_highbd_8_jnt_sub_pixel_avg_variance64x64)
-
- HIGHBD_BFP(BLOCK_16X16, aom_highbd_sad16x16_bits8,
- aom_highbd_sad16x16_avg_bits8, aom_highbd_8_variance16x16,
- aom_highbd_8_sub_pixel_variance16x16,
- aom_highbd_8_sub_pixel_avg_variance16x16,
- aom_highbd_sad16x16x4d_bits8,
- aom_highbd_jnt_sad16x16_avg_bits8,
- aom_highbd_8_jnt_sub_pixel_avg_variance16x16)
-
- HIGHBD_BFP(
- BLOCK_16X8, aom_highbd_sad16x8_bits8, aom_highbd_sad16x8_avg_bits8,
- aom_highbd_8_variance16x8, aom_highbd_8_sub_pixel_variance16x8,
- aom_highbd_8_sub_pixel_avg_variance16x8,
- aom_highbd_sad16x8x4d_bits8, aom_highbd_jnt_sad16x8_avg_bits8,
- aom_highbd_8_jnt_sub_pixel_avg_variance16x8)
-
- HIGHBD_BFP(
- BLOCK_8X16, aom_highbd_sad8x16_bits8, aom_highbd_sad8x16_avg_bits8,
- aom_highbd_8_variance8x16, aom_highbd_8_sub_pixel_variance8x16,
- aom_highbd_8_sub_pixel_avg_variance8x16,
- aom_highbd_sad8x16x4d_bits8, aom_highbd_jnt_sad8x16_avg_bits8,
- aom_highbd_8_jnt_sub_pixel_avg_variance8x16)
-
- HIGHBD_BFP(BLOCK_8X8, aom_highbd_sad8x8_bits8,
- aom_highbd_sad8x8_avg_bits8, aom_highbd_8_variance8x8,
- aom_highbd_8_sub_pixel_variance8x8,
- aom_highbd_8_sub_pixel_avg_variance8x8,
- aom_highbd_sad8x8x4d_bits8, aom_highbd_jnt_sad8x8_avg_bits8,
- aom_highbd_8_jnt_sub_pixel_avg_variance8x8)
-
- HIGHBD_BFP(BLOCK_8X4, aom_highbd_sad8x4_bits8,
- aom_highbd_sad8x4_avg_bits8, aom_highbd_8_variance8x4,
- aom_highbd_8_sub_pixel_variance8x4,
- aom_highbd_8_sub_pixel_avg_variance8x4,
- aom_highbd_sad8x4x4d_bits8, aom_highbd_jnt_sad8x4_avg_bits8,
- aom_highbd_8_jnt_sub_pixel_avg_variance8x4)
-
- HIGHBD_BFP(BLOCK_4X8, aom_highbd_sad4x8_bits8,
- aom_highbd_sad4x8_avg_bits8, aom_highbd_8_variance4x8,
- aom_highbd_8_sub_pixel_variance4x8,
- aom_highbd_8_sub_pixel_avg_variance4x8,
- aom_highbd_sad4x8x4d_bits8, aom_highbd_jnt_sad4x8_avg_bits8,
- aom_highbd_8_jnt_sub_pixel_avg_variance4x8)
-
- HIGHBD_BFP(BLOCK_4X4, aom_highbd_sad4x4_bits8,
- aom_highbd_sad4x4_avg_bits8, aom_highbd_8_variance4x4,
- aom_highbd_8_sub_pixel_variance4x4,
- aom_highbd_8_sub_pixel_avg_variance4x4,
- aom_highbd_sad4x4x4d_bits8, aom_highbd_jnt_sad4x4_avg_bits8,
- aom_highbd_8_jnt_sub_pixel_avg_variance4x4)
-
- HIGHBD_BFP(
- BLOCK_128X128, aom_highbd_sad128x128_bits8,
- aom_highbd_sad128x128_avg_bits8, aom_highbd_8_variance128x128,
- aom_highbd_8_sub_pixel_variance128x128,
- aom_highbd_8_sub_pixel_avg_variance128x128,
- aom_highbd_sad128x128x4d_bits8, aom_highbd_jnt_sad128x128_avg_bits8,
- aom_highbd_8_jnt_sub_pixel_avg_variance128x128)
-
- HIGHBD_BFP(BLOCK_128X64, aom_highbd_sad128x64_bits8,
- aom_highbd_sad128x64_avg_bits8, aom_highbd_8_variance128x64,
- aom_highbd_8_sub_pixel_variance128x64,
- aom_highbd_8_sub_pixel_avg_variance128x64,
- aom_highbd_sad128x64x4d_bits8,
- aom_highbd_jnt_sad128x64_avg_bits8,
- aom_highbd_8_jnt_sub_pixel_avg_variance128x64)
-
- HIGHBD_BFP(BLOCK_64X128, aom_highbd_sad64x128_bits8,
- aom_highbd_sad64x128_avg_bits8, aom_highbd_8_variance64x128,
- aom_highbd_8_sub_pixel_variance64x128,
- aom_highbd_8_sub_pixel_avg_variance64x128,
- aom_highbd_sad64x128x4d_bits8,
- aom_highbd_jnt_sad64x128_avg_bits8,
- aom_highbd_8_jnt_sub_pixel_avg_variance64x128)
-
- HIGHBD_MBFP(BLOCK_128X128, aom_highbd_masked_sad128x128_bits8,
- aom_highbd_8_masked_sub_pixel_variance128x128)
- HIGHBD_MBFP(BLOCK_128X64, aom_highbd_masked_sad128x64_bits8,
- aom_highbd_8_masked_sub_pixel_variance128x64)
- HIGHBD_MBFP(BLOCK_64X128, aom_highbd_masked_sad64x128_bits8,
- aom_highbd_8_masked_sub_pixel_variance64x128)
- HIGHBD_MBFP(BLOCK_64X64, aom_highbd_masked_sad64x64_bits8,
- aom_highbd_8_masked_sub_pixel_variance64x64)
- HIGHBD_MBFP(BLOCK_64X32, aom_highbd_masked_sad64x32_bits8,
- aom_highbd_8_masked_sub_pixel_variance64x32)
- HIGHBD_MBFP(BLOCK_32X64, aom_highbd_masked_sad32x64_bits8,
- aom_highbd_8_masked_sub_pixel_variance32x64)
- HIGHBD_MBFP(BLOCK_32X32, aom_highbd_masked_sad32x32_bits8,
- aom_highbd_8_masked_sub_pixel_variance32x32)
- HIGHBD_MBFP(BLOCK_32X16, aom_highbd_masked_sad32x16_bits8,
- aom_highbd_8_masked_sub_pixel_variance32x16)
- HIGHBD_MBFP(BLOCK_16X32, aom_highbd_masked_sad16x32_bits8,
- aom_highbd_8_masked_sub_pixel_variance16x32)
- HIGHBD_MBFP(BLOCK_16X16, aom_highbd_masked_sad16x16_bits8,
- aom_highbd_8_masked_sub_pixel_variance16x16)
- HIGHBD_MBFP(BLOCK_8X16, aom_highbd_masked_sad8x16_bits8,
- aom_highbd_8_masked_sub_pixel_variance8x16)
- HIGHBD_MBFP(BLOCK_16X8, aom_highbd_masked_sad16x8_bits8,
- aom_highbd_8_masked_sub_pixel_variance16x8)
- HIGHBD_MBFP(BLOCK_8X8, aom_highbd_masked_sad8x8_bits8,
- aom_highbd_8_masked_sub_pixel_variance8x8)
- HIGHBD_MBFP(BLOCK_4X8, aom_highbd_masked_sad4x8_bits8,
- aom_highbd_8_masked_sub_pixel_variance4x8)
- HIGHBD_MBFP(BLOCK_8X4, aom_highbd_masked_sad8x4_bits8,
- aom_highbd_8_masked_sub_pixel_variance8x4)
- HIGHBD_MBFP(BLOCK_4X4, aom_highbd_masked_sad4x4_bits8,
- aom_highbd_8_masked_sub_pixel_variance4x4)
- HIGHBD_MBFP(BLOCK_64X16, aom_highbd_masked_sad64x16_bits8,
- aom_highbd_8_masked_sub_pixel_variance64x16)
- HIGHBD_MBFP(BLOCK_16X64, aom_highbd_masked_sad16x64_bits8,
- aom_highbd_8_masked_sub_pixel_variance16x64)
- HIGHBD_MBFP(BLOCK_32X8, aom_highbd_masked_sad32x8_bits8,
- aom_highbd_8_masked_sub_pixel_variance32x8)
- HIGHBD_MBFP(BLOCK_8X32, aom_highbd_masked_sad8x32_bits8,
- aom_highbd_8_masked_sub_pixel_variance8x32)
- HIGHBD_MBFP(BLOCK_16X4, aom_highbd_masked_sad16x4_bits8,
- aom_highbd_8_masked_sub_pixel_variance16x4)
- HIGHBD_MBFP(BLOCK_4X16, aom_highbd_masked_sad4x16_bits8,
- aom_highbd_8_masked_sub_pixel_variance4x16)
- HIGHBD_OBFP(BLOCK_128X128, aom_highbd_obmc_sad128x128_bits8,
- aom_highbd_obmc_variance128x128,
- aom_highbd_obmc_sub_pixel_variance128x128)
- HIGHBD_OBFP(BLOCK_128X64, aom_highbd_obmc_sad128x64_bits8,
- aom_highbd_obmc_variance128x64,
- aom_highbd_obmc_sub_pixel_variance128x64)
- HIGHBD_OBFP(BLOCK_64X128, aom_highbd_obmc_sad64x128_bits8,
- aom_highbd_obmc_variance64x128,
- aom_highbd_obmc_sub_pixel_variance64x128)
- HIGHBD_OBFP(BLOCK_64X64, aom_highbd_obmc_sad64x64_bits8,
- aom_highbd_obmc_variance64x64,
- aom_highbd_obmc_sub_pixel_variance64x64)
- HIGHBD_OBFP(BLOCK_64X32, aom_highbd_obmc_sad64x32_bits8,
- aom_highbd_obmc_variance64x32,
- aom_highbd_obmc_sub_pixel_variance64x32)
- HIGHBD_OBFP(BLOCK_32X64, aom_highbd_obmc_sad32x64_bits8,
- aom_highbd_obmc_variance32x64,
- aom_highbd_obmc_sub_pixel_variance32x64)
- HIGHBD_OBFP(BLOCK_32X32, aom_highbd_obmc_sad32x32_bits8,
- aom_highbd_obmc_variance32x32,
- aom_highbd_obmc_sub_pixel_variance32x32)
- HIGHBD_OBFP(BLOCK_32X16, aom_highbd_obmc_sad32x16_bits8,
- aom_highbd_obmc_variance32x16,
- aom_highbd_obmc_sub_pixel_variance32x16)
- HIGHBD_OBFP(BLOCK_16X32, aom_highbd_obmc_sad16x32_bits8,
- aom_highbd_obmc_variance16x32,
- aom_highbd_obmc_sub_pixel_variance16x32)
- HIGHBD_OBFP(BLOCK_16X16, aom_highbd_obmc_sad16x16_bits8,
- aom_highbd_obmc_variance16x16,
- aom_highbd_obmc_sub_pixel_variance16x16)
- HIGHBD_OBFP(BLOCK_8X16, aom_highbd_obmc_sad8x16_bits8,
- aom_highbd_obmc_variance8x16,
- aom_highbd_obmc_sub_pixel_variance8x16)
- HIGHBD_OBFP(BLOCK_16X8, aom_highbd_obmc_sad16x8_bits8,
- aom_highbd_obmc_variance16x8,
- aom_highbd_obmc_sub_pixel_variance16x8)
- HIGHBD_OBFP(BLOCK_8X8, aom_highbd_obmc_sad8x8_bits8,
- aom_highbd_obmc_variance8x8,
- aom_highbd_obmc_sub_pixel_variance8x8)
- HIGHBD_OBFP(BLOCK_4X8, aom_highbd_obmc_sad4x8_bits8,
- aom_highbd_obmc_variance4x8,
- aom_highbd_obmc_sub_pixel_variance4x8)
- HIGHBD_OBFP(BLOCK_8X4, aom_highbd_obmc_sad8x4_bits8,
- aom_highbd_obmc_variance8x4,
- aom_highbd_obmc_sub_pixel_variance8x4)
- HIGHBD_OBFP(BLOCK_4X4, aom_highbd_obmc_sad4x4_bits8,
- aom_highbd_obmc_variance4x4,
- aom_highbd_obmc_sub_pixel_variance4x4)
- HIGHBD_OBFP(BLOCK_64X16, aom_highbd_obmc_sad64x16_bits8,
- aom_highbd_obmc_variance64x16,
- aom_highbd_obmc_sub_pixel_variance64x16)
- HIGHBD_OBFP(BLOCK_16X64, aom_highbd_obmc_sad16x64_bits8,
- aom_highbd_obmc_variance16x64,
- aom_highbd_obmc_sub_pixel_variance16x64)
- HIGHBD_OBFP(BLOCK_32X8, aom_highbd_obmc_sad32x8_bits8,
- aom_highbd_obmc_variance32x8,
- aom_highbd_obmc_sub_pixel_variance32x8)
- HIGHBD_OBFP(BLOCK_8X32, aom_highbd_obmc_sad8x32_bits8,
- aom_highbd_obmc_variance8x32,
- aom_highbd_obmc_sub_pixel_variance8x32)
- HIGHBD_OBFP(BLOCK_16X4, aom_highbd_obmc_sad16x4_bits8,
- aom_highbd_obmc_variance16x4,
- aom_highbd_obmc_sub_pixel_variance16x4)
- HIGHBD_OBFP(BLOCK_4X16, aom_highbd_obmc_sad4x16_bits8,
- aom_highbd_obmc_variance4x16,
- aom_highbd_obmc_sub_pixel_variance4x16)
- break;
-
- case AOM_BITS_10:
- HIGHBD_BFP(BLOCK_64X16, aom_highbd_sad64x16_bits10,
- aom_highbd_sad64x16_avg_bits10, aom_highbd_10_variance64x16,
- aom_highbd_10_sub_pixel_variance64x16,
- aom_highbd_10_sub_pixel_avg_variance64x16,
- aom_highbd_sad64x16x4d_bits10,
- aom_highbd_jnt_sad64x16_avg_bits10,
- aom_highbd_10_jnt_sub_pixel_avg_variance64x16);
-
- HIGHBD_BFP(BLOCK_16X64, aom_highbd_sad16x64_bits10,
- aom_highbd_sad16x64_avg_bits10, aom_highbd_10_variance16x64,
- aom_highbd_10_sub_pixel_variance16x64,
- aom_highbd_10_sub_pixel_avg_variance16x64,
- aom_highbd_sad16x64x4d_bits10,
- aom_highbd_jnt_sad16x64_avg_bits10,
- aom_highbd_10_jnt_sub_pixel_avg_variance16x64);
-
- HIGHBD_BFP(BLOCK_32X8, aom_highbd_sad32x8_bits10,
- aom_highbd_sad32x8_avg_bits10, aom_highbd_10_variance32x8,
- aom_highbd_10_sub_pixel_variance32x8,
- aom_highbd_10_sub_pixel_avg_variance32x8,
- aom_highbd_sad32x8x4d_bits10,
- aom_highbd_jnt_sad32x8_avg_bits10,
- aom_highbd_10_jnt_sub_pixel_avg_variance32x8);
-
- HIGHBD_BFP(BLOCK_8X32, aom_highbd_sad8x32_bits10,
- aom_highbd_sad8x32_avg_bits10, aom_highbd_10_variance8x32,
- aom_highbd_10_sub_pixel_variance8x32,
- aom_highbd_10_sub_pixel_avg_variance8x32,
- aom_highbd_sad8x32x4d_bits10,
- aom_highbd_jnt_sad8x32_avg_bits10,
- aom_highbd_10_jnt_sub_pixel_avg_variance8x32);
-
- HIGHBD_BFP(BLOCK_16X4, aom_highbd_sad16x4_bits10,
- aom_highbd_sad16x4_avg_bits10, aom_highbd_10_variance16x4,
- aom_highbd_10_sub_pixel_variance16x4,
- aom_highbd_10_sub_pixel_avg_variance16x4,
- aom_highbd_sad16x4x4d_bits10,
- aom_highbd_jnt_sad16x4_avg_bits10,
- aom_highbd_10_jnt_sub_pixel_avg_variance16x4);
-
- HIGHBD_BFP(BLOCK_4X16, aom_highbd_sad4x16_bits10,
- aom_highbd_sad4x16_avg_bits10, aom_highbd_10_variance4x16,
- aom_highbd_10_sub_pixel_variance4x16,
- aom_highbd_10_sub_pixel_avg_variance4x16,
- aom_highbd_sad4x16x4d_bits10,
- aom_highbd_jnt_sad4x16_avg_bits10,
- aom_highbd_10_jnt_sub_pixel_avg_variance4x16);
-
- HIGHBD_BFP(BLOCK_32X16, aom_highbd_sad32x16_bits10,
- aom_highbd_sad32x16_avg_bits10, aom_highbd_10_variance32x16,
- aom_highbd_10_sub_pixel_variance32x16,
- aom_highbd_10_sub_pixel_avg_variance32x16,
- aom_highbd_sad32x16x4d_bits10,
- aom_highbd_jnt_sad32x16_avg_bits10,
- aom_highbd_10_jnt_sub_pixel_avg_variance32x16);
-
- HIGHBD_BFP(BLOCK_16X32, aom_highbd_sad16x32_bits10,
- aom_highbd_sad16x32_avg_bits10, aom_highbd_10_variance16x32,
- aom_highbd_10_sub_pixel_variance16x32,
- aom_highbd_10_sub_pixel_avg_variance16x32,
- aom_highbd_sad16x32x4d_bits10,
- aom_highbd_jnt_sad16x32_avg_bits10,
- aom_highbd_10_jnt_sub_pixel_avg_variance16x32);
-
- HIGHBD_BFP(BLOCK_64X32, aom_highbd_sad64x32_bits10,
- aom_highbd_sad64x32_avg_bits10, aom_highbd_10_variance64x32,
- aom_highbd_10_sub_pixel_variance64x32,
- aom_highbd_10_sub_pixel_avg_variance64x32,
- aom_highbd_sad64x32x4d_bits10,
- aom_highbd_jnt_sad64x32_avg_bits10,
- aom_highbd_10_jnt_sub_pixel_avg_variance64x32);
-
- HIGHBD_BFP(BLOCK_32X64, aom_highbd_sad32x64_bits10,
- aom_highbd_sad32x64_avg_bits10, aom_highbd_10_variance32x64,
- aom_highbd_10_sub_pixel_variance32x64,
- aom_highbd_10_sub_pixel_avg_variance32x64,
- aom_highbd_sad32x64x4d_bits10,
- aom_highbd_jnt_sad32x64_avg_bits10,
- aom_highbd_10_jnt_sub_pixel_avg_variance32x64);
-
- HIGHBD_BFP(BLOCK_32X32, aom_highbd_sad32x32_bits10,
- aom_highbd_sad32x32_avg_bits10, aom_highbd_10_variance32x32,
- aom_highbd_10_sub_pixel_variance32x32,
- aom_highbd_10_sub_pixel_avg_variance32x32,
- aom_highbd_sad32x32x4d_bits10,
- aom_highbd_jnt_sad32x32_avg_bits10,
- aom_highbd_10_jnt_sub_pixel_avg_variance32x32);
-
- HIGHBD_BFP(BLOCK_64X64, aom_highbd_sad64x64_bits10,
- aom_highbd_sad64x64_avg_bits10, aom_highbd_10_variance64x64,
- aom_highbd_10_sub_pixel_variance64x64,
- aom_highbd_10_sub_pixel_avg_variance64x64,
- aom_highbd_sad64x64x4d_bits10,
- aom_highbd_jnt_sad64x64_avg_bits10,
- aom_highbd_10_jnt_sub_pixel_avg_variance64x64);
-
- HIGHBD_BFP(BLOCK_16X16, aom_highbd_sad16x16_bits10,
- aom_highbd_sad16x16_avg_bits10, aom_highbd_10_variance16x16,
- aom_highbd_10_sub_pixel_variance16x16,
- aom_highbd_10_sub_pixel_avg_variance16x16,
- aom_highbd_sad16x16x4d_bits10,
- aom_highbd_jnt_sad16x16_avg_bits10,
- aom_highbd_10_jnt_sub_pixel_avg_variance16x16);
-
- HIGHBD_BFP(BLOCK_16X8, aom_highbd_sad16x8_bits10,
- aom_highbd_sad16x8_avg_bits10, aom_highbd_10_variance16x8,
- aom_highbd_10_sub_pixel_variance16x8,
- aom_highbd_10_sub_pixel_avg_variance16x8,
- aom_highbd_sad16x8x4d_bits10,
- aom_highbd_jnt_sad16x8_avg_bits10,
- aom_highbd_10_jnt_sub_pixel_avg_variance16x8);
-
- HIGHBD_BFP(BLOCK_8X16, aom_highbd_sad8x16_bits10,
- aom_highbd_sad8x16_avg_bits10, aom_highbd_10_variance8x16,
- aom_highbd_10_sub_pixel_variance8x16,
- aom_highbd_10_sub_pixel_avg_variance8x16,
- aom_highbd_sad8x16x4d_bits10,
- aom_highbd_jnt_sad8x16_avg_bits10,
- aom_highbd_10_jnt_sub_pixel_avg_variance8x16);
-
- HIGHBD_BFP(
- BLOCK_8X8, aom_highbd_sad8x8_bits10, aom_highbd_sad8x8_avg_bits10,
- aom_highbd_10_variance8x8, aom_highbd_10_sub_pixel_variance8x8,
- aom_highbd_10_sub_pixel_avg_variance8x8,
- aom_highbd_sad8x8x4d_bits10, aom_highbd_jnt_sad8x8_avg_bits10,
- aom_highbd_10_jnt_sub_pixel_avg_variance8x8);
-
- HIGHBD_BFP(
- BLOCK_8X4, aom_highbd_sad8x4_bits10, aom_highbd_sad8x4_avg_bits10,
- aom_highbd_10_variance8x4, aom_highbd_10_sub_pixel_variance8x4,
- aom_highbd_10_sub_pixel_avg_variance8x4,
- aom_highbd_sad8x4x4d_bits10, aom_highbd_jnt_sad8x4_avg_bits10,
- aom_highbd_10_jnt_sub_pixel_avg_variance8x4);
-
- HIGHBD_BFP(
- BLOCK_4X8, aom_highbd_sad4x8_bits10, aom_highbd_sad4x8_avg_bits10,
- aom_highbd_10_variance4x8, aom_highbd_10_sub_pixel_variance4x8,
- aom_highbd_10_sub_pixel_avg_variance4x8,
- aom_highbd_sad4x8x4d_bits10, aom_highbd_jnt_sad4x8_avg_bits10,
- aom_highbd_10_jnt_sub_pixel_avg_variance4x8);
-
- HIGHBD_BFP(
- BLOCK_4X4, aom_highbd_sad4x4_bits10, aom_highbd_sad4x4_avg_bits10,
- aom_highbd_10_variance4x4, aom_highbd_10_sub_pixel_variance4x4,
- aom_highbd_10_sub_pixel_avg_variance4x4,
- aom_highbd_sad4x4x4d_bits10, aom_highbd_jnt_sad4x4_avg_bits10,
- aom_highbd_10_jnt_sub_pixel_avg_variance4x4);
-
- HIGHBD_BFP(BLOCK_128X128, aom_highbd_sad128x128_bits10,
- aom_highbd_sad128x128_avg_bits10,
- aom_highbd_10_variance128x128,
- aom_highbd_10_sub_pixel_variance128x128,
- aom_highbd_10_sub_pixel_avg_variance128x128,
- aom_highbd_sad128x128x4d_bits10,
- aom_highbd_jnt_sad128x128_avg_bits10,
- aom_highbd_10_jnt_sub_pixel_avg_variance128x128);
-
- HIGHBD_BFP(
- BLOCK_128X64, aom_highbd_sad128x64_bits10,
- aom_highbd_sad128x64_avg_bits10, aom_highbd_10_variance128x64,
- aom_highbd_10_sub_pixel_variance128x64,
- aom_highbd_10_sub_pixel_avg_variance128x64,
- aom_highbd_sad128x64x4d_bits10, aom_highbd_jnt_sad128x64_avg_bits10,
- aom_highbd_10_jnt_sub_pixel_avg_variance128x64);
-
- HIGHBD_BFP(
- BLOCK_64X128, aom_highbd_sad64x128_bits10,
- aom_highbd_sad64x128_avg_bits10, aom_highbd_10_variance64x128,
- aom_highbd_10_sub_pixel_variance64x128,
- aom_highbd_10_sub_pixel_avg_variance64x128,
- aom_highbd_sad64x128x4d_bits10, aom_highbd_jnt_sad64x128_avg_bits10,
- aom_highbd_10_jnt_sub_pixel_avg_variance64x128);
-
- HIGHBD_MBFP(BLOCK_128X128, aom_highbd_masked_sad128x128_bits10,
- aom_highbd_10_masked_sub_pixel_variance128x128)
- HIGHBD_MBFP(BLOCK_128X64, aom_highbd_masked_sad128x64_bits10,
- aom_highbd_10_masked_sub_pixel_variance128x64)
- HIGHBD_MBFP(BLOCK_64X128, aom_highbd_masked_sad64x128_bits10,
- aom_highbd_10_masked_sub_pixel_variance64x128)
- HIGHBD_MBFP(BLOCK_64X64, aom_highbd_masked_sad64x64_bits10,
- aom_highbd_10_masked_sub_pixel_variance64x64)
- HIGHBD_MBFP(BLOCK_64X32, aom_highbd_masked_sad64x32_bits10,
- aom_highbd_10_masked_sub_pixel_variance64x32)
- HIGHBD_MBFP(BLOCK_32X64, aom_highbd_masked_sad32x64_bits10,
- aom_highbd_10_masked_sub_pixel_variance32x64)
- HIGHBD_MBFP(BLOCK_32X32, aom_highbd_masked_sad32x32_bits10,
- aom_highbd_10_masked_sub_pixel_variance32x32)
- HIGHBD_MBFP(BLOCK_32X16, aom_highbd_masked_sad32x16_bits10,
- aom_highbd_10_masked_sub_pixel_variance32x16)
- HIGHBD_MBFP(BLOCK_16X32, aom_highbd_masked_sad16x32_bits10,
- aom_highbd_10_masked_sub_pixel_variance16x32)
- HIGHBD_MBFP(BLOCK_16X16, aom_highbd_masked_sad16x16_bits10,
- aom_highbd_10_masked_sub_pixel_variance16x16)
- HIGHBD_MBFP(BLOCK_8X16, aom_highbd_masked_sad8x16_bits10,
- aom_highbd_10_masked_sub_pixel_variance8x16)
- HIGHBD_MBFP(BLOCK_16X8, aom_highbd_masked_sad16x8_bits10,
- aom_highbd_10_masked_sub_pixel_variance16x8)
- HIGHBD_MBFP(BLOCK_8X8, aom_highbd_masked_sad8x8_bits10,
- aom_highbd_10_masked_sub_pixel_variance8x8)
- HIGHBD_MBFP(BLOCK_4X8, aom_highbd_masked_sad4x8_bits10,
- aom_highbd_10_masked_sub_pixel_variance4x8)
- HIGHBD_MBFP(BLOCK_8X4, aom_highbd_masked_sad8x4_bits10,
- aom_highbd_10_masked_sub_pixel_variance8x4)
- HIGHBD_MBFP(BLOCK_4X4, aom_highbd_masked_sad4x4_bits10,
- aom_highbd_10_masked_sub_pixel_variance4x4)
- HIGHBD_MBFP(BLOCK_64X16, aom_highbd_masked_sad64x16_bits10,
- aom_highbd_10_masked_sub_pixel_variance64x16)
- HIGHBD_MBFP(BLOCK_16X64, aom_highbd_masked_sad16x64_bits10,
- aom_highbd_10_masked_sub_pixel_variance16x64)
- HIGHBD_MBFP(BLOCK_32X8, aom_highbd_masked_sad32x8_bits10,
- aom_highbd_10_masked_sub_pixel_variance32x8)
- HIGHBD_MBFP(BLOCK_8X32, aom_highbd_masked_sad8x32_bits10,
- aom_highbd_10_masked_sub_pixel_variance8x32)
- HIGHBD_MBFP(BLOCK_16X4, aom_highbd_masked_sad16x4_bits10,
- aom_highbd_10_masked_sub_pixel_variance16x4)
- HIGHBD_MBFP(BLOCK_4X16, aom_highbd_masked_sad4x16_bits10,
- aom_highbd_10_masked_sub_pixel_variance4x16)
- HIGHBD_OBFP(BLOCK_128X128, aom_highbd_obmc_sad128x128_bits10,
- aom_highbd_10_obmc_variance128x128,
- aom_highbd_10_obmc_sub_pixel_variance128x128)
- HIGHBD_OBFP(BLOCK_128X64, aom_highbd_obmc_sad128x64_bits10,
- aom_highbd_10_obmc_variance128x64,
- aom_highbd_10_obmc_sub_pixel_variance128x64)
- HIGHBD_OBFP(BLOCK_64X128, aom_highbd_obmc_sad64x128_bits10,
- aom_highbd_10_obmc_variance64x128,
- aom_highbd_10_obmc_sub_pixel_variance64x128)
- HIGHBD_OBFP(BLOCK_64X64, aom_highbd_obmc_sad64x64_bits10,
- aom_highbd_10_obmc_variance64x64,
- aom_highbd_10_obmc_sub_pixel_variance64x64)
- HIGHBD_OBFP(BLOCK_64X32, aom_highbd_obmc_sad64x32_bits10,
- aom_highbd_10_obmc_variance64x32,
- aom_highbd_10_obmc_sub_pixel_variance64x32)
- HIGHBD_OBFP(BLOCK_32X64, aom_highbd_obmc_sad32x64_bits10,
- aom_highbd_10_obmc_variance32x64,
- aom_highbd_10_obmc_sub_pixel_variance32x64)
- HIGHBD_OBFP(BLOCK_32X32, aom_highbd_obmc_sad32x32_bits10,
- aom_highbd_10_obmc_variance32x32,
- aom_highbd_10_obmc_sub_pixel_variance32x32)
- HIGHBD_OBFP(BLOCK_32X16, aom_highbd_obmc_sad32x16_bits10,
- aom_highbd_10_obmc_variance32x16,
- aom_highbd_10_obmc_sub_pixel_variance32x16)
- HIGHBD_OBFP(BLOCK_16X32, aom_highbd_obmc_sad16x32_bits10,
- aom_highbd_10_obmc_variance16x32,
- aom_highbd_10_obmc_sub_pixel_variance16x32)
- HIGHBD_OBFP(BLOCK_16X16, aom_highbd_obmc_sad16x16_bits10,
- aom_highbd_10_obmc_variance16x16,
- aom_highbd_10_obmc_sub_pixel_variance16x16)
- HIGHBD_OBFP(BLOCK_8X16, aom_highbd_obmc_sad8x16_bits10,
- aom_highbd_10_obmc_variance8x16,
- aom_highbd_10_obmc_sub_pixel_variance8x16)
- HIGHBD_OBFP(BLOCK_16X8, aom_highbd_obmc_sad16x8_bits10,
- aom_highbd_10_obmc_variance16x8,
- aom_highbd_10_obmc_sub_pixel_variance16x8)
- HIGHBD_OBFP(BLOCK_8X8, aom_highbd_obmc_sad8x8_bits10,
- aom_highbd_10_obmc_variance8x8,
- aom_highbd_10_obmc_sub_pixel_variance8x8)
- HIGHBD_OBFP(BLOCK_4X8, aom_highbd_obmc_sad4x8_bits10,
- aom_highbd_10_obmc_variance4x8,
- aom_highbd_10_obmc_sub_pixel_variance4x8)
- HIGHBD_OBFP(BLOCK_8X4, aom_highbd_obmc_sad8x4_bits10,
- aom_highbd_10_obmc_variance8x4,
- aom_highbd_10_obmc_sub_pixel_variance8x4)
- HIGHBD_OBFP(BLOCK_4X4, aom_highbd_obmc_sad4x4_bits10,
- aom_highbd_10_obmc_variance4x4,
- aom_highbd_10_obmc_sub_pixel_variance4x4)
-
- HIGHBD_OBFP(BLOCK_64X16, aom_highbd_obmc_sad64x16_bits10,
- aom_highbd_10_obmc_variance64x16,
- aom_highbd_10_obmc_sub_pixel_variance64x16)
-
- HIGHBD_OBFP(BLOCK_16X64, aom_highbd_obmc_sad16x64_bits10,
- aom_highbd_10_obmc_variance16x64,
- aom_highbd_10_obmc_sub_pixel_variance16x64)
-
- HIGHBD_OBFP(BLOCK_32X8, aom_highbd_obmc_sad32x8_bits10,
- aom_highbd_10_obmc_variance32x8,
- aom_highbd_10_obmc_sub_pixel_variance32x8)
-
- HIGHBD_OBFP(BLOCK_8X32, aom_highbd_obmc_sad8x32_bits10,
- aom_highbd_10_obmc_variance8x32,
- aom_highbd_10_obmc_sub_pixel_variance8x32)
-
- HIGHBD_OBFP(BLOCK_16X4, aom_highbd_obmc_sad16x4_bits10,
- aom_highbd_10_obmc_variance16x4,
- aom_highbd_10_obmc_sub_pixel_variance16x4)
-
- HIGHBD_OBFP(BLOCK_4X16, aom_highbd_obmc_sad4x16_bits10,
- aom_highbd_10_obmc_variance4x16,
- aom_highbd_10_obmc_sub_pixel_variance4x16)
- break;
-
- case AOM_BITS_12:
- HIGHBD_BFP(BLOCK_64X16, aom_highbd_sad64x16_bits12,
- aom_highbd_sad64x16_avg_bits12, aom_highbd_12_variance64x16,
- aom_highbd_12_sub_pixel_variance64x16,
- aom_highbd_12_sub_pixel_avg_variance64x16,
- aom_highbd_sad64x16x4d_bits12,
- aom_highbd_jnt_sad64x16_avg_bits12,
- aom_highbd_12_jnt_sub_pixel_avg_variance64x16);
-
- HIGHBD_BFP(BLOCK_16X64, aom_highbd_sad16x64_bits12,
- aom_highbd_sad16x64_avg_bits12, aom_highbd_12_variance16x64,
- aom_highbd_12_sub_pixel_variance16x64,
- aom_highbd_12_sub_pixel_avg_variance16x64,
- aom_highbd_sad16x64x4d_bits12,
- aom_highbd_jnt_sad16x64_avg_bits12,
- aom_highbd_12_jnt_sub_pixel_avg_variance16x64);
-
- HIGHBD_BFP(BLOCK_32X8, aom_highbd_sad32x8_bits12,
- aom_highbd_sad32x8_avg_bits12, aom_highbd_12_variance32x8,
- aom_highbd_12_sub_pixel_variance32x8,
- aom_highbd_12_sub_pixel_avg_variance32x8,
- aom_highbd_sad32x8x4d_bits12,
- aom_highbd_jnt_sad32x8_avg_bits12,
- aom_highbd_12_jnt_sub_pixel_avg_variance32x8);
-
- HIGHBD_BFP(BLOCK_8X32, aom_highbd_sad8x32_bits12,
- aom_highbd_sad8x32_avg_bits12, aom_highbd_12_variance8x32,
- aom_highbd_12_sub_pixel_variance8x32,
- aom_highbd_12_sub_pixel_avg_variance8x32,
- aom_highbd_sad8x32x4d_bits12,
- aom_highbd_jnt_sad8x32_avg_bits12,
- aom_highbd_12_jnt_sub_pixel_avg_variance8x32);
-
- HIGHBD_BFP(BLOCK_16X4, aom_highbd_sad16x4_bits12,
- aom_highbd_sad16x4_avg_bits12, aom_highbd_12_variance16x4,
- aom_highbd_12_sub_pixel_variance16x4,
- aom_highbd_12_sub_pixel_avg_variance16x4,
- aom_highbd_sad16x4x4d_bits12,
- aom_highbd_jnt_sad16x4_avg_bits12,
- aom_highbd_12_jnt_sub_pixel_avg_variance16x4);
-
- HIGHBD_BFP(BLOCK_4X16, aom_highbd_sad4x16_bits12,
- aom_highbd_sad4x16_avg_bits12, aom_highbd_12_variance4x16,
- aom_highbd_12_sub_pixel_variance4x16,
- aom_highbd_12_sub_pixel_avg_variance4x16,
- aom_highbd_sad4x16x4d_bits12,
- aom_highbd_jnt_sad4x16_avg_bits12,
- aom_highbd_12_jnt_sub_pixel_avg_variance4x16);
-
- HIGHBD_BFP(BLOCK_32X16, aom_highbd_sad32x16_bits12,
- aom_highbd_sad32x16_avg_bits12, aom_highbd_12_variance32x16,
- aom_highbd_12_sub_pixel_variance32x16,
- aom_highbd_12_sub_pixel_avg_variance32x16,
- aom_highbd_sad32x16x4d_bits12,
- aom_highbd_jnt_sad32x16_avg_bits12,
- aom_highbd_12_jnt_sub_pixel_avg_variance32x16);
-
- HIGHBD_BFP(BLOCK_16X32, aom_highbd_sad16x32_bits12,
- aom_highbd_sad16x32_avg_bits12, aom_highbd_12_variance16x32,
- aom_highbd_12_sub_pixel_variance16x32,
- aom_highbd_12_sub_pixel_avg_variance16x32,
- aom_highbd_sad16x32x4d_bits12,
- aom_highbd_jnt_sad16x32_avg_bits12,
- aom_highbd_12_jnt_sub_pixel_avg_variance16x32);
-
- HIGHBD_BFP(BLOCK_64X32, aom_highbd_sad64x32_bits12,
- aom_highbd_sad64x32_avg_bits12, aom_highbd_12_variance64x32,
- aom_highbd_12_sub_pixel_variance64x32,
- aom_highbd_12_sub_pixel_avg_variance64x32,
- aom_highbd_sad64x32x4d_bits12,
- aom_highbd_jnt_sad64x32_avg_bits12,
- aom_highbd_12_jnt_sub_pixel_avg_variance64x32);
-
- HIGHBD_BFP(BLOCK_32X64, aom_highbd_sad32x64_bits12,
- aom_highbd_sad32x64_avg_bits12, aom_highbd_12_variance32x64,
- aom_highbd_12_sub_pixel_variance32x64,
- aom_highbd_12_sub_pixel_avg_variance32x64,
- aom_highbd_sad32x64x4d_bits12,
- aom_highbd_jnt_sad32x64_avg_bits12,
- aom_highbd_12_jnt_sub_pixel_avg_variance32x64);
-
- HIGHBD_BFP(BLOCK_32X32, aom_highbd_sad32x32_bits12,
- aom_highbd_sad32x32_avg_bits12, aom_highbd_12_variance32x32,
- aom_highbd_12_sub_pixel_variance32x32,
- aom_highbd_12_sub_pixel_avg_variance32x32,
- aom_highbd_sad32x32x4d_bits12,
- aom_highbd_jnt_sad32x32_avg_bits12,
- aom_highbd_12_jnt_sub_pixel_avg_variance32x32);
-
- HIGHBD_BFP(BLOCK_64X64, aom_highbd_sad64x64_bits12,
- aom_highbd_sad64x64_avg_bits12, aom_highbd_12_variance64x64,
- aom_highbd_12_sub_pixel_variance64x64,
- aom_highbd_12_sub_pixel_avg_variance64x64,
- aom_highbd_sad64x64x4d_bits12,
- aom_highbd_jnt_sad64x64_avg_bits12,
- aom_highbd_12_jnt_sub_pixel_avg_variance64x64);
-
- HIGHBD_BFP(BLOCK_16X16, aom_highbd_sad16x16_bits12,
- aom_highbd_sad16x16_avg_bits12, aom_highbd_12_variance16x16,
- aom_highbd_12_sub_pixel_variance16x16,
- aom_highbd_12_sub_pixel_avg_variance16x16,
- aom_highbd_sad16x16x4d_bits12,
- aom_highbd_jnt_sad16x16_avg_bits12,
- aom_highbd_12_jnt_sub_pixel_avg_variance16x16);
-
- HIGHBD_BFP(BLOCK_16X8, aom_highbd_sad16x8_bits12,
- aom_highbd_sad16x8_avg_bits12, aom_highbd_12_variance16x8,
- aom_highbd_12_sub_pixel_variance16x8,
- aom_highbd_12_sub_pixel_avg_variance16x8,
- aom_highbd_sad16x8x4d_bits12,
- aom_highbd_jnt_sad16x8_avg_bits12,
- aom_highbd_12_jnt_sub_pixel_avg_variance16x8);
-
- HIGHBD_BFP(BLOCK_8X16, aom_highbd_sad8x16_bits12,
- aom_highbd_sad8x16_avg_bits12, aom_highbd_12_variance8x16,
- aom_highbd_12_sub_pixel_variance8x16,
- aom_highbd_12_sub_pixel_avg_variance8x16,
- aom_highbd_sad8x16x4d_bits12,
- aom_highbd_jnt_sad8x16_avg_bits12,
- aom_highbd_12_jnt_sub_pixel_avg_variance8x16);
-
- HIGHBD_BFP(
- BLOCK_8X8, aom_highbd_sad8x8_bits12, aom_highbd_sad8x8_avg_bits12,
- aom_highbd_12_variance8x8, aom_highbd_12_sub_pixel_variance8x8,
- aom_highbd_12_sub_pixel_avg_variance8x8,
- aom_highbd_sad8x8x4d_bits12, aom_highbd_jnt_sad8x8_avg_bits12,
- aom_highbd_12_jnt_sub_pixel_avg_variance8x8);
-
- HIGHBD_BFP(
- BLOCK_8X4, aom_highbd_sad8x4_bits12, aom_highbd_sad8x4_avg_bits12,
- aom_highbd_12_variance8x4, aom_highbd_12_sub_pixel_variance8x4,
- aom_highbd_12_sub_pixel_avg_variance8x4,
- aom_highbd_sad8x4x4d_bits12, aom_highbd_jnt_sad8x4_avg_bits12,
- aom_highbd_12_jnt_sub_pixel_avg_variance8x4);
-
- HIGHBD_BFP(
- BLOCK_4X8, aom_highbd_sad4x8_bits12, aom_highbd_sad4x8_avg_bits12,
- aom_highbd_12_variance4x8, aom_highbd_12_sub_pixel_variance4x8,
- aom_highbd_12_sub_pixel_avg_variance4x8,
- aom_highbd_sad4x8x4d_bits12, aom_highbd_jnt_sad4x8_avg_bits12,
- aom_highbd_12_jnt_sub_pixel_avg_variance4x8);
-
- HIGHBD_BFP(
- BLOCK_4X4, aom_highbd_sad4x4_bits12, aom_highbd_sad4x4_avg_bits12,
- aom_highbd_12_variance4x4, aom_highbd_12_sub_pixel_variance4x4,
- aom_highbd_12_sub_pixel_avg_variance4x4,
- aom_highbd_sad4x4x4d_bits12, aom_highbd_jnt_sad4x4_avg_bits12,
- aom_highbd_12_jnt_sub_pixel_avg_variance4x4);
-
- HIGHBD_BFP(BLOCK_128X128, aom_highbd_sad128x128_bits12,
- aom_highbd_sad128x128_avg_bits12,
- aom_highbd_12_variance128x128,
- aom_highbd_12_sub_pixel_variance128x128,
- aom_highbd_12_sub_pixel_avg_variance128x128,
- aom_highbd_sad128x128x4d_bits12,
- aom_highbd_jnt_sad128x128_avg_bits12,
- aom_highbd_12_jnt_sub_pixel_avg_variance128x128);
-
- HIGHBD_BFP(
- BLOCK_128X64, aom_highbd_sad128x64_bits12,
- aom_highbd_sad128x64_avg_bits12, aom_highbd_12_variance128x64,
- aom_highbd_12_sub_pixel_variance128x64,
- aom_highbd_12_sub_pixel_avg_variance128x64,
- aom_highbd_sad128x64x4d_bits12, aom_highbd_jnt_sad128x64_avg_bits12,
- aom_highbd_12_jnt_sub_pixel_avg_variance128x64);
-
- HIGHBD_BFP(
- BLOCK_64X128, aom_highbd_sad64x128_bits12,
- aom_highbd_sad64x128_avg_bits12, aom_highbd_12_variance64x128,
- aom_highbd_12_sub_pixel_variance64x128,
- aom_highbd_12_sub_pixel_avg_variance64x128,
- aom_highbd_sad64x128x4d_bits12, aom_highbd_jnt_sad64x128_avg_bits12,
- aom_highbd_12_jnt_sub_pixel_avg_variance64x128);
-
- HIGHBD_MBFP(BLOCK_128X128, aom_highbd_masked_sad128x128_bits12,
- aom_highbd_12_masked_sub_pixel_variance128x128)
- HIGHBD_MBFP(BLOCK_128X64, aom_highbd_masked_sad128x64_bits12,
- aom_highbd_12_masked_sub_pixel_variance128x64)
- HIGHBD_MBFP(BLOCK_64X128, aom_highbd_masked_sad64x128_bits12,
- aom_highbd_12_masked_sub_pixel_variance64x128)
- HIGHBD_MBFP(BLOCK_64X64, aom_highbd_masked_sad64x64_bits12,
- aom_highbd_12_masked_sub_pixel_variance64x64)
- HIGHBD_MBFP(BLOCK_64X32, aom_highbd_masked_sad64x32_bits12,
- aom_highbd_12_masked_sub_pixel_variance64x32)
- HIGHBD_MBFP(BLOCK_32X64, aom_highbd_masked_sad32x64_bits12,
- aom_highbd_12_masked_sub_pixel_variance32x64)
- HIGHBD_MBFP(BLOCK_32X32, aom_highbd_masked_sad32x32_bits12,
- aom_highbd_12_masked_sub_pixel_variance32x32)
- HIGHBD_MBFP(BLOCK_32X16, aom_highbd_masked_sad32x16_bits12,
- aom_highbd_12_masked_sub_pixel_variance32x16)
- HIGHBD_MBFP(BLOCK_16X32, aom_highbd_masked_sad16x32_bits12,
- aom_highbd_12_masked_sub_pixel_variance16x32)
- HIGHBD_MBFP(BLOCK_16X16, aom_highbd_masked_sad16x16_bits12,
- aom_highbd_12_masked_sub_pixel_variance16x16)
- HIGHBD_MBFP(BLOCK_8X16, aom_highbd_masked_sad8x16_bits12,
- aom_highbd_12_masked_sub_pixel_variance8x16)
- HIGHBD_MBFP(BLOCK_16X8, aom_highbd_masked_sad16x8_bits12,
- aom_highbd_12_masked_sub_pixel_variance16x8)
- HIGHBD_MBFP(BLOCK_8X8, aom_highbd_masked_sad8x8_bits12,
- aom_highbd_12_masked_sub_pixel_variance8x8)
- HIGHBD_MBFP(BLOCK_4X8, aom_highbd_masked_sad4x8_bits12,
- aom_highbd_12_masked_sub_pixel_variance4x8)
- HIGHBD_MBFP(BLOCK_8X4, aom_highbd_masked_sad8x4_bits12,
- aom_highbd_12_masked_sub_pixel_variance8x4)
- HIGHBD_MBFP(BLOCK_4X4, aom_highbd_masked_sad4x4_bits12,
- aom_highbd_12_masked_sub_pixel_variance4x4)
- HIGHBD_MBFP(BLOCK_64X16, aom_highbd_masked_sad64x16_bits12,
- aom_highbd_12_masked_sub_pixel_variance64x16)
- HIGHBD_MBFP(BLOCK_16X64, aom_highbd_masked_sad16x64_bits12,
- aom_highbd_12_masked_sub_pixel_variance16x64)
- HIGHBD_MBFP(BLOCK_32X8, aom_highbd_masked_sad32x8_bits12,
- aom_highbd_12_masked_sub_pixel_variance32x8)
- HIGHBD_MBFP(BLOCK_8X32, aom_highbd_masked_sad8x32_bits12,
- aom_highbd_12_masked_sub_pixel_variance8x32)
- HIGHBD_MBFP(BLOCK_16X4, aom_highbd_masked_sad16x4_bits12,
- aom_highbd_12_masked_sub_pixel_variance16x4)
- HIGHBD_MBFP(BLOCK_4X16, aom_highbd_masked_sad4x16_bits12,
- aom_highbd_12_masked_sub_pixel_variance4x16)
- HIGHBD_OBFP(BLOCK_128X128, aom_highbd_obmc_sad128x128_bits12,
- aom_highbd_12_obmc_variance128x128,
- aom_highbd_12_obmc_sub_pixel_variance128x128)
- HIGHBD_OBFP(BLOCK_128X64, aom_highbd_obmc_sad128x64_bits12,
- aom_highbd_12_obmc_variance128x64,
- aom_highbd_12_obmc_sub_pixel_variance128x64)
- HIGHBD_OBFP(BLOCK_64X128, aom_highbd_obmc_sad64x128_bits12,
- aom_highbd_12_obmc_variance64x128,
- aom_highbd_12_obmc_sub_pixel_variance64x128)
- HIGHBD_OBFP(BLOCK_64X64, aom_highbd_obmc_sad64x64_bits12,
- aom_highbd_12_obmc_variance64x64,
- aom_highbd_12_obmc_sub_pixel_variance64x64)
- HIGHBD_OBFP(BLOCK_64X32, aom_highbd_obmc_sad64x32_bits12,
- aom_highbd_12_obmc_variance64x32,
- aom_highbd_12_obmc_sub_pixel_variance64x32)
- HIGHBD_OBFP(BLOCK_32X64, aom_highbd_obmc_sad32x64_bits12,
- aom_highbd_12_obmc_variance32x64,
- aom_highbd_12_obmc_sub_pixel_variance32x64)
- HIGHBD_OBFP(BLOCK_32X32, aom_highbd_obmc_sad32x32_bits12,
- aom_highbd_12_obmc_variance32x32,
- aom_highbd_12_obmc_sub_pixel_variance32x32)
- HIGHBD_OBFP(BLOCK_32X16, aom_highbd_obmc_sad32x16_bits12,
- aom_highbd_12_obmc_variance32x16,
- aom_highbd_12_obmc_sub_pixel_variance32x16)
- HIGHBD_OBFP(BLOCK_16X32, aom_highbd_obmc_sad16x32_bits12,
- aom_highbd_12_obmc_variance16x32,
- aom_highbd_12_obmc_sub_pixel_variance16x32)
- HIGHBD_OBFP(BLOCK_16X16, aom_highbd_obmc_sad16x16_bits12,
- aom_highbd_12_obmc_variance16x16,
- aom_highbd_12_obmc_sub_pixel_variance16x16)
- HIGHBD_OBFP(BLOCK_8X16, aom_highbd_obmc_sad8x16_bits12,
- aom_highbd_12_obmc_variance8x16,
- aom_highbd_12_obmc_sub_pixel_variance8x16)
- HIGHBD_OBFP(BLOCK_16X8, aom_highbd_obmc_sad16x8_bits12,
- aom_highbd_12_obmc_variance16x8,
- aom_highbd_12_obmc_sub_pixel_variance16x8)
- HIGHBD_OBFP(BLOCK_8X8, aom_highbd_obmc_sad8x8_bits12,
- aom_highbd_12_obmc_variance8x8,
- aom_highbd_12_obmc_sub_pixel_variance8x8)
- HIGHBD_OBFP(BLOCK_4X8, aom_highbd_obmc_sad4x8_bits12,
- aom_highbd_12_obmc_variance4x8,
- aom_highbd_12_obmc_sub_pixel_variance4x8)
- HIGHBD_OBFP(BLOCK_8X4, aom_highbd_obmc_sad8x4_bits12,
- aom_highbd_12_obmc_variance8x4,
- aom_highbd_12_obmc_sub_pixel_variance8x4)
- HIGHBD_OBFP(BLOCK_4X4, aom_highbd_obmc_sad4x4_bits12,
- aom_highbd_12_obmc_variance4x4,
- aom_highbd_12_obmc_sub_pixel_variance4x4)
- HIGHBD_OBFP(BLOCK_64X16, aom_highbd_obmc_sad64x16_bits12,
- aom_highbd_12_obmc_variance64x16,
- aom_highbd_12_obmc_sub_pixel_variance64x16)
- HIGHBD_OBFP(BLOCK_16X64, aom_highbd_obmc_sad16x64_bits12,
- aom_highbd_12_obmc_variance16x64,
- aom_highbd_12_obmc_sub_pixel_variance16x64)
- HIGHBD_OBFP(BLOCK_32X8, aom_highbd_obmc_sad32x8_bits12,
- aom_highbd_12_obmc_variance32x8,
- aom_highbd_12_obmc_sub_pixel_variance32x8)
- HIGHBD_OBFP(BLOCK_8X32, aom_highbd_obmc_sad8x32_bits12,
- aom_highbd_12_obmc_variance8x32,
- aom_highbd_12_obmc_sub_pixel_variance8x32)
- HIGHBD_OBFP(BLOCK_16X4, aom_highbd_obmc_sad16x4_bits12,
- aom_highbd_12_obmc_variance16x4,
- aom_highbd_12_obmc_sub_pixel_variance16x4)
- HIGHBD_OBFP(BLOCK_4X16, aom_highbd_obmc_sad4x16_bits12,
- aom_highbd_12_obmc_variance4x16,
- aom_highbd_12_obmc_sub_pixel_variance4x16)
- break;
-
- default:
- assert(0 &&
- "cm->seq_params.bit_depth should be AOM_BITS_8, "
- "AOM_BITS_10 or AOM_BITS_12");
- }
- }
-}
-
-static void realloc_segmentation_maps(AV1_COMP *cpi) {
- AV1_COMMON *const cm = &cpi->common;
-
- // Create the encoder segmentation map and set all entries to 0
- aom_free(cpi->segmentation_map);
- CHECK_MEM_ERROR(cm, cpi->segmentation_map,
- aom_calloc(cm->mi_rows * cm->mi_cols, 1));
-
- // Create a map used for cyclic background refresh.
- if (cpi->cyclic_refresh) av1_cyclic_refresh_free(cpi->cyclic_refresh);
- CHECK_MEM_ERROR(cm, cpi->cyclic_refresh,
- av1_cyclic_refresh_alloc(cm->mi_rows, cm->mi_cols));
-
- // Create a map used to mark inactive areas.
- aom_free(cpi->active_map.map);
- CHECK_MEM_ERROR(cm, cpi->active_map.map,
- aom_calloc(cm->mi_rows * cm->mi_cols, 1));
-}
-
-void av1_change_config(struct AV1_COMP *cpi, const AV1EncoderConfig *oxcf) {
- AV1_COMMON *const cm = &cpi->common;
- SequenceHeader *const seq_params = &cm->seq_params;
- const int num_planes = av1_num_planes(cm);
- RATE_CONTROL *const rc = &cpi->rc;
- MACROBLOCK *const x = &cpi->td.mb;
-
- if (seq_params->profile != oxcf->profile) seq_params->profile = oxcf->profile;
- seq_params->bit_depth = oxcf->bit_depth;
- seq_params->color_primaries = oxcf->color_primaries;
- seq_params->transfer_characteristics = oxcf->transfer_characteristics;
- seq_params->matrix_coefficients = oxcf->matrix_coefficients;
- seq_params->monochrome = oxcf->monochrome;
- seq_params->chroma_sample_position = oxcf->chroma_sample_position;
- seq_params->color_range = oxcf->color_range;
-
- assert(IMPLIES(seq_params->profile <= PROFILE_1,
- seq_params->bit_depth <= AOM_BITS_10));
-
- cm->timing_info_present = oxcf->timing_info_present;
- cm->timing_info.num_units_in_display_tick =
- oxcf->timing_info.num_units_in_display_tick;
- cm->timing_info.time_scale = oxcf->timing_info.time_scale;
- cm->timing_info.equal_picture_interval =
- oxcf->timing_info.equal_picture_interval;
- cm->timing_info.num_ticks_per_picture =
- oxcf->timing_info.num_ticks_per_picture;
-
- seq_params->display_model_info_present_flag =
- oxcf->display_model_info_present_flag;
- seq_params->decoder_model_info_present_flag =
- oxcf->decoder_model_info_present_flag;
- if (oxcf->decoder_model_info_present_flag) {
- // set the decoder model parameters in schedule mode
- cm->buffer_model.num_units_in_decoding_tick =
- oxcf->buffer_model.num_units_in_decoding_tick;
- cm->buffer_removal_time_present = 1;
- set_aom_dec_model_info(&cm->buffer_model);
- set_dec_model_op_parameters(&cm->op_params[0]);
- } else if (cm->timing_info_present &&
- cm->timing_info.equal_picture_interval &&
- !seq_params->decoder_model_info_present_flag) {
- // set the decoder model parameters in resource availability mode
- set_resource_availability_parameters(&cm->op_params[0]);
- } else {
- cm->op_params[0].initial_display_delay =
- 10; // Default value (not signaled)
- }
-
- update_film_grain_parameters(cpi, oxcf);
-
- cpi->oxcf = *oxcf;
- cpi->common.options = oxcf->cfg;
- cpi->row_mt = oxcf->row_mt;
- x->e_mbd.bd = (int)seq_params->bit_depth;
- x->e_mbd.global_motion = cm->global_motion;
-
- if ((oxcf->pass == 0) && (oxcf->rc_mode == AOM_Q)) {
- rc->baseline_gf_interval = FIXED_GF_INTERVAL;
- } else {
- rc->baseline_gf_interval = (MIN_GF_INTERVAL + MAX_GF_INTERVAL) / 2;
- }
-
- cpi->refresh_last_frame = 1;
- cpi->refresh_golden_frame = 0;
- cpi->refresh_bwd_ref_frame = 0;
- cpi->refresh_alt2_ref_frame = 0;
-
- cm->refresh_frame_context = (oxcf->frame_parallel_decoding_mode)
- ? REFRESH_FRAME_CONTEXT_DISABLED
- : REFRESH_FRAME_CONTEXT_BACKWARD;
- if (oxcf->large_scale_tile)
- cm->refresh_frame_context = REFRESH_FRAME_CONTEXT_DISABLED;
-
- if (x->palette_buffer == NULL) {
- CHECK_MEM_ERROR(cm, x->palette_buffer,
- aom_memalign(16, sizeof(*x->palette_buffer)));
- }
-
- if (x->tmp_conv_dst == NULL) {
- CHECK_MEM_ERROR(
- cm, x->tmp_conv_dst,
- aom_memalign(32, MAX_SB_SIZE * MAX_SB_SIZE * sizeof(*x->tmp_conv_dst)));
- x->e_mbd.tmp_conv_dst = x->tmp_conv_dst;
- }
- for (int i = 0; i < 2; ++i) {
- if (x->tmp_obmc_bufs[i] == NULL) {
- CHECK_MEM_ERROR(cm, x->tmp_obmc_bufs[i],
- aom_memalign(16, 2 * MAX_MB_PLANE * MAX_SB_SQUARE *
- sizeof(*x->tmp_obmc_bufs[i])));
- x->e_mbd.tmp_obmc_bufs[i] = x->tmp_obmc_bufs[i];
- }
- }
-
- av1_reset_segment_features(cm);
- set_high_precision_mv(cpi, 1, 0);
-
- set_rc_buffer_sizes(rc, &cpi->oxcf);
-
- // Under a configuration change, where maximum_buffer_size may change,
- // keep buffer level clipped to the maximum allowed buffer size.
- rc->bits_off_target = AOMMIN(rc->bits_off_target, rc->maximum_buffer_size);
- rc->buffer_level = AOMMIN(rc->buffer_level, rc->maximum_buffer_size);
-
- // Set up frame rate and related parameters rate control values.
- av1_new_framerate(cpi, cpi->framerate);
-
- // Set absolute upper and lower quality limits
- rc->worst_quality = cpi->oxcf.worst_allowed_q;
- rc->best_quality = cpi->oxcf.best_allowed_q;
-
- cm->interp_filter = oxcf->large_scale_tile ? EIGHTTAP_REGULAR : SWITCHABLE;
- cm->switchable_motion_mode = 1;
-
- if (cpi->oxcf.render_width > 0 && cpi->oxcf.render_height > 0) {
- cm->render_width = cpi->oxcf.render_width;
- cm->render_height = cpi->oxcf.render_height;
- } else {
- cm->render_width = cpi->oxcf.width;
- cm->render_height = cpi->oxcf.height;
- }
- cm->width = cpi->oxcf.width;
- cm->height = cpi->oxcf.height;
-
- int sb_size = seq_params->sb_size;
- // Superblock size should not be updated after the first key frame.
- if (!cpi->seq_params_locked) {
- set_sb_size(&cm->seq_params, select_sb_size(cpi));
- }
-
- if (cpi->initial_width || sb_size != seq_params->sb_size) {
- if (cm->width > cpi->initial_width || cm->height > cpi->initial_height ||
- seq_params->sb_size != sb_size) {
- av1_free_context_buffers(cm);
- av1_free_pc_tree(&cpi->td, num_planes);
- alloc_compressor_data(cpi);
- realloc_segmentation_maps(cpi);
- cpi->initial_width = cpi->initial_height = 0;
- }
- }
- update_frame_size(cpi);
-
- cpi->alt_ref_source = NULL;
- rc->is_src_frame_alt_ref = 0;
-
- rc->is_bwd_ref_frame = 0;
- rc->is_last_bipred_frame = 0;
- rc->is_bipred_frame = 0;
-
- set_tile_info(cpi);
-
- cpi->ext_refresh_frame_flags_pending = 0;
- cpi->ext_refresh_frame_context_pending = 0;
-
- highbd_set_var_fns(cpi);
-
- // Init sequence level coding tools
- // This should not be called after the first key frame.
- if (!cpi->seq_params_locked) {
- seq_params->operating_points_cnt_minus_1 =
- cm->number_spatial_layers > 1 ? cm->number_spatial_layers - 1 : 0;
- init_seq_coding_tools(&cm->seq_params, cm, oxcf);
- }
-}
-
-AV1_COMP *av1_create_compressor(AV1EncoderConfig *oxcf,
- BufferPool *const pool) {
- unsigned int i;
- AV1_COMP *volatile const cpi = aom_memalign(32, sizeof(AV1_COMP));
- AV1_COMMON *volatile const cm = cpi != NULL ? &cpi->common : NULL;
-
- if (!cm) return NULL;
-
- av1_zero(*cpi);
-
- // The jmp_buf is valid only for the duration of the function that calls
- // setjmp(). Therefore, this function must reset the 'setjmp' field to 0
- // before it returns.
- if (setjmp(cm->error.jmp)) {
- cm->error.setjmp = 0;
- av1_remove_compressor(cpi);
- return 0;
- }
-
- cm->error.setjmp = 1;
- cm->alloc_mi = enc_alloc_mi;
- cm->free_mi = enc_free_mi;
- cm->setup_mi = enc_setup_mi;
-
- CHECK_MEM_ERROR(cm, cm->fc,
- (FRAME_CONTEXT *)aom_memalign(32, sizeof(*cm->fc)));
- CHECK_MEM_ERROR(cm, cm->frame_contexts,
- (FRAME_CONTEXT *)aom_memalign(
- 32, FRAME_CONTEXTS * sizeof(*cm->frame_contexts)));
- memset(cm->fc, 0, sizeof(*cm->fc));
- memset(cm->frame_contexts, 0, FRAME_CONTEXTS * sizeof(*cm->frame_contexts));
-
- cpi->resize_state = 0;
- cpi->resize_avg_qp = 0;
- cpi->resize_buffer_underflow = 0;
-
- cpi->common.buffer_pool = pool;
-
- init_config(cpi, oxcf);
- av1_rc_init(&cpi->oxcf, oxcf->pass, &cpi->rc);
-
- cm->current_video_frame = 0;
- cpi->seq_params_locked = 0;
- cpi->partition_search_skippable_frame = 0;
- cpi->tile_data = NULL;
- cpi->last_show_frame_buf_idx = INVALID_IDX;
-
- realloc_segmentation_maps(cpi);
-
- memset(cpi->nmv_costs, 0, sizeof(cpi->nmv_costs));
- memset(cpi->nmv_costs_hp, 0, sizeof(cpi->nmv_costs_hp));
-
- for (i = 0; i < (sizeof(cpi->mbgraph_stats) / sizeof(cpi->mbgraph_stats[0]));
- i++) {
- CHECK_MEM_ERROR(
- cm, cpi->mbgraph_stats[i].mb_stats,
- aom_calloc(cm->MBs * sizeof(*cpi->mbgraph_stats[i].mb_stats), 1));
- }
-
-#if CONFIG_FP_MB_STATS
- cpi->use_fp_mb_stats = 0;
- if (cpi->use_fp_mb_stats) {
- // a place holder used to store the first pass mb stats in the first pass
- CHECK_MEM_ERROR(cm, cpi->twopass.frame_mb_stats_buf,
- aom_calloc(cm->MBs * sizeof(uint8_t), 1));
- } else {
- cpi->twopass.frame_mb_stats_buf = NULL;
- }
-#endif
-
- cpi->refresh_alt_ref_frame = 0;
-
- cpi->b_calculate_psnr = CONFIG_INTERNAL_STATS;
-#if CONFIG_INTERNAL_STATS
- cpi->b_calculate_blockiness = 1;
- cpi->b_calculate_consistency = 1;
- cpi->total_inconsistency = 0;
- cpi->psnr.worst = 100.0;
- cpi->worst_ssim = 100.0;
-
- cpi->count = 0;
- cpi->bytes = 0;
-
- if (cpi->b_calculate_psnr) {
- cpi->total_sq_error = 0;
- cpi->total_samples = 0;
- cpi->tot_recode_hits = 0;
- cpi->summed_quality = 0;
- cpi->summed_weights = 0;
- }
-
- cpi->fastssim.worst = 100.0;
- cpi->psnrhvs.worst = 100.0;
-
- if (cpi->b_calculate_blockiness) {
- cpi->total_blockiness = 0;
- cpi->worst_blockiness = 0.0;
- }
-
- if (cpi->b_calculate_consistency) {
- CHECK_MEM_ERROR(cm, cpi->ssim_vars,
- aom_malloc(sizeof(*cpi->ssim_vars) * 4 *
- cpi->common.mi_rows * cpi->common.mi_cols));
- cpi->worst_consistency = 100.0;
- }
-#endif
-#if CONFIG_ENTROPY_STATS
- av1_zero(aggregate_fc);
-#endif // CONFIG_ENTROPY_STATS
-
- cpi->first_time_stamp_ever = INT64_MAX;
-
- cpi->td.mb.nmvcost[0] = &cpi->nmv_costs[0][MV_MAX];
- cpi->td.mb.nmvcost[1] = &cpi->nmv_costs[1][MV_MAX];
- cpi->td.mb.nmvcost_hp[0] = &cpi->nmv_costs_hp[0][MV_MAX];
- cpi->td.mb.nmvcost_hp[1] = &cpi->nmv_costs_hp[1][MV_MAX];
-
-#ifdef OUTPUT_YUV_SKINMAP
- yuv_skinmap_file = fopen("skinmap.yuv", "ab");
-#endif
-#ifdef OUTPUT_YUV_REC
- yuv_rec_file = fopen("rec.yuv", "wb");
-#endif
-
- if (oxcf->pass == 1) {
- av1_init_first_pass(cpi);
- } else if (oxcf->pass == 2) {
- const size_t packet_sz = sizeof(FIRSTPASS_STATS);
- const int packets = (int)(oxcf->two_pass_stats_in.sz / packet_sz);
-
-#if CONFIG_FP_MB_STATS
- if (cpi->use_fp_mb_stats) {
- const size_t psz = cpi->common.MBs * sizeof(uint8_t);
- const int ps = (int)(oxcf->firstpass_mb_stats_in.sz / psz);
-
- cpi->twopass.firstpass_mb_stats.mb_stats_start =
- oxcf->firstpass_mb_stats_in.buf;
- cpi->twopass.firstpass_mb_stats.mb_stats_end =
- cpi->twopass.firstpass_mb_stats.mb_stats_start +
- (ps - 1) * cpi->common.MBs * sizeof(uint8_t);
- }
-#endif
-
- cpi->twopass.stats_in_start = oxcf->two_pass_stats_in.buf;
- cpi->twopass.stats_in = cpi->twopass.stats_in_start;
- cpi->twopass.stats_in_end = &cpi->twopass.stats_in[packets - 1];
-
- av1_init_second_pass(cpi);
- }
-
- CHECK_MEM_ERROR(
- cm, cpi->td.mb.above_pred_buf,
- (uint8_t *)aom_memalign(16, MAX_MB_PLANE * MAX_SB_SQUARE *
- sizeof(*cpi->td.mb.above_pred_buf)));
- CHECK_MEM_ERROR(
- cm, cpi->td.mb.left_pred_buf,
- (uint8_t *)aom_memalign(16, MAX_MB_PLANE * MAX_SB_SQUARE *
- sizeof(*cpi->td.mb.left_pred_buf)));
-
- CHECK_MEM_ERROR(cm, cpi->td.mb.wsrc_buf,
- (int32_t *)aom_memalign(
- 16, MAX_SB_SQUARE * sizeof(*cpi->td.mb.wsrc_buf)));
-
- for (int x = 0; x < 2; x++)
- for (int y = 0; y < 2; y++)
- CHECK_MEM_ERROR(
- cm, cpi->td.mb.hash_value_buffer[x][y],
- (uint32_t *)aom_malloc(AOM_BUFFER_SIZE_FOR_BLOCK_HASH *
- sizeof(*cpi->td.mb.hash_value_buffer[0][0])));
-
- cpi->td.mb.g_crc_initialized = 0;
-
- CHECK_MEM_ERROR(cm, cpi->td.mb.mask_buf,
- (int32_t *)aom_memalign(
- 16, MAX_SB_SQUARE * sizeof(*cpi->td.mb.mask_buf)));
-
- av1_set_speed_features_framesize_independent(cpi);
- av1_set_speed_features_framesize_dependent(cpi);
-
-#define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF, JSDAF, JSVAF) \
- cpi->fn_ptr[BT].sdf = SDF; \
- cpi->fn_ptr[BT].sdaf = SDAF; \
- cpi->fn_ptr[BT].vf = VF; \
- cpi->fn_ptr[BT].svf = SVF; \
- cpi->fn_ptr[BT].svaf = SVAF; \
- cpi->fn_ptr[BT].sdx4df = SDX4DF; \
- cpi->fn_ptr[BT].jsdaf = JSDAF; \
- cpi->fn_ptr[BT].jsvaf = JSVAF;
-
- BFP(BLOCK_4X16, aom_sad4x16, aom_sad4x16_avg, aom_variance4x16,
- aom_sub_pixel_variance4x16, aom_sub_pixel_avg_variance4x16,
- aom_sad4x16x4d, aom_jnt_sad4x16_avg, aom_jnt_sub_pixel_avg_variance4x16)
-
- BFP(BLOCK_16X4, aom_sad16x4, aom_sad16x4_avg, aom_variance16x4,
- aom_sub_pixel_variance16x4, aom_sub_pixel_avg_variance16x4,
- aom_sad16x4x4d, aom_jnt_sad16x4_avg, aom_jnt_sub_pixel_avg_variance16x4)
-
- BFP(BLOCK_8X32, aom_sad8x32, aom_sad8x32_avg, aom_variance8x32,
- aom_sub_pixel_variance8x32, aom_sub_pixel_avg_variance8x32,
- aom_sad8x32x4d, aom_jnt_sad8x32_avg, aom_jnt_sub_pixel_avg_variance8x32)
-
- BFP(BLOCK_32X8, aom_sad32x8, aom_sad32x8_avg, aom_variance32x8,
- aom_sub_pixel_variance32x8, aom_sub_pixel_avg_variance32x8,
- aom_sad32x8x4d, aom_jnt_sad32x8_avg, aom_jnt_sub_pixel_avg_variance32x8)
-
- BFP(BLOCK_16X64, aom_sad16x64, aom_sad16x64_avg, aom_variance16x64,
- aom_sub_pixel_variance16x64, aom_sub_pixel_avg_variance16x64,
- aom_sad16x64x4d, aom_jnt_sad16x64_avg,
- aom_jnt_sub_pixel_avg_variance16x64)
-
- BFP(BLOCK_64X16, aom_sad64x16, aom_sad64x16_avg, aom_variance64x16,
- aom_sub_pixel_variance64x16, aom_sub_pixel_avg_variance64x16,
- aom_sad64x16x4d, aom_jnt_sad64x16_avg,
- aom_jnt_sub_pixel_avg_variance64x16)
-
- BFP(BLOCK_128X128, aom_sad128x128, aom_sad128x128_avg, aom_variance128x128,
- aom_sub_pixel_variance128x128, aom_sub_pixel_avg_variance128x128,
- aom_sad128x128x4d, aom_jnt_sad128x128_avg,
- aom_jnt_sub_pixel_avg_variance128x128)
-
- BFP(BLOCK_128X64, aom_sad128x64, aom_sad128x64_avg, aom_variance128x64,
- aom_sub_pixel_variance128x64, aom_sub_pixel_avg_variance128x64,
- aom_sad128x64x4d, aom_jnt_sad128x64_avg,
- aom_jnt_sub_pixel_avg_variance128x64)
-
- BFP(BLOCK_64X128, aom_sad64x128, aom_sad64x128_avg, aom_variance64x128,
- aom_sub_pixel_variance64x128, aom_sub_pixel_avg_variance64x128,
- aom_sad64x128x4d, aom_jnt_sad64x128_avg,
- aom_jnt_sub_pixel_avg_variance64x128)
-
- BFP(BLOCK_32X16, aom_sad32x16, aom_sad32x16_avg, aom_variance32x16,
- aom_sub_pixel_variance32x16, aom_sub_pixel_avg_variance32x16,
- aom_sad32x16x4d, aom_jnt_sad32x16_avg,
- aom_jnt_sub_pixel_avg_variance32x16)
-
- BFP(BLOCK_16X32, aom_sad16x32, aom_sad16x32_avg, aom_variance16x32,
- aom_sub_pixel_variance16x32, aom_sub_pixel_avg_variance16x32,
- aom_sad16x32x4d, aom_jnt_sad16x32_avg,
- aom_jnt_sub_pixel_avg_variance16x32)
-
- BFP(BLOCK_64X32, aom_sad64x32, aom_sad64x32_avg, aom_variance64x32,
- aom_sub_pixel_variance64x32, aom_sub_pixel_avg_variance64x32,
- aom_sad64x32x4d, aom_jnt_sad64x32_avg,
- aom_jnt_sub_pixel_avg_variance64x32)
-
- BFP(BLOCK_32X64, aom_sad32x64, aom_sad32x64_avg, aom_variance32x64,
- aom_sub_pixel_variance32x64, aom_sub_pixel_avg_variance32x64,
- aom_sad32x64x4d, aom_jnt_sad32x64_avg,
- aom_jnt_sub_pixel_avg_variance32x64)
-
- BFP(BLOCK_32X32, aom_sad32x32, aom_sad32x32_avg, aom_variance32x32,
- aom_sub_pixel_variance32x32, aom_sub_pixel_avg_variance32x32,
- aom_sad32x32x4d, aom_jnt_sad32x32_avg,
- aom_jnt_sub_pixel_avg_variance32x32)
-
- BFP(BLOCK_64X64, aom_sad64x64, aom_sad64x64_avg, aom_variance64x64,
- aom_sub_pixel_variance64x64, aom_sub_pixel_avg_variance64x64,
- aom_sad64x64x4d, aom_jnt_sad64x64_avg,
- aom_jnt_sub_pixel_avg_variance64x64)
-
- BFP(BLOCK_16X16, aom_sad16x16, aom_sad16x16_avg, aom_variance16x16,
- aom_sub_pixel_variance16x16, aom_sub_pixel_avg_variance16x16,
- aom_sad16x16x4d, aom_jnt_sad16x16_avg,
- aom_jnt_sub_pixel_avg_variance16x16)
-
- BFP(BLOCK_16X8, aom_sad16x8, aom_sad16x8_avg, aom_variance16x8,
- aom_sub_pixel_variance16x8, aom_sub_pixel_avg_variance16x8,
- aom_sad16x8x4d, aom_jnt_sad16x8_avg, aom_jnt_sub_pixel_avg_variance16x8)
-
- BFP(BLOCK_8X16, aom_sad8x16, aom_sad8x16_avg, aom_variance8x16,
- aom_sub_pixel_variance8x16, aom_sub_pixel_avg_variance8x16,
- aom_sad8x16x4d, aom_jnt_sad8x16_avg, aom_jnt_sub_pixel_avg_variance8x16)
-
- BFP(BLOCK_8X8, aom_sad8x8, aom_sad8x8_avg, aom_variance8x8,
- aom_sub_pixel_variance8x8, aom_sub_pixel_avg_variance8x8, aom_sad8x8x4d,
- aom_jnt_sad8x8_avg, aom_jnt_sub_pixel_avg_variance8x8)
-
- BFP(BLOCK_8X4, aom_sad8x4, aom_sad8x4_avg, aom_variance8x4,
- aom_sub_pixel_variance8x4, aom_sub_pixel_avg_variance8x4, aom_sad8x4x4d,
- aom_jnt_sad8x4_avg, aom_jnt_sub_pixel_avg_variance8x4)
-
- BFP(BLOCK_4X8, aom_sad4x8, aom_sad4x8_avg, aom_variance4x8,
- aom_sub_pixel_variance4x8, aom_sub_pixel_avg_variance4x8, aom_sad4x8x4d,
- aom_jnt_sad4x8_avg, aom_jnt_sub_pixel_avg_variance4x8)
-
- BFP(BLOCK_4X4, aom_sad4x4, aom_sad4x4_avg, aom_variance4x4,
- aom_sub_pixel_variance4x4, aom_sub_pixel_avg_variance4x4, aom_sad4x4x4d,
- aom_jnt_sad4x4_avg, aom_jnt_sub_pixel_avg_variance4x4)
-
-#define OBFP(BT, OSDF, OVF, OSVF) \
- cpi->fn_ptr[BT].osdf = OSDF; \
- cpi->fn_ptr[BT].ovf = OVF; \
- cpi->fn_ptr[BT].osvf = OSVF;
-
- OBFP(BLOCK_128X128, aom_obmc_sad128x128, aom_obmc_variance128x128,
- aom_obmc_sub_pixel_variance128x128)
- OBFP(BLOCK_128X64, aom_obmc_sad128x64, aom_obmc_variance128x64,
- aom_obmc_sub_pixel_variance128x64)
- OBFP(BLOCK_64X128, aom_obmc_sad64x128, aom_obmc_variance64x128,
- aom_obmc_sub_pixel_variance64x128)
- OBFP(BLOCK_64X64, aom_obmc_sad64x64, aom_obmc_variance64x64,
- aom_obmc_sub_pixel_variance64x64)
- OBFP(BLOCK_64X32, aom_obmc_sad64x32, aom_obmc_variance64x32,
- aom_obmc_sub_pixel_variance64x32)
- OBFP(BLOCK_32X64, aom_obmc_sad32x64, aom_obmc_variance32x64,
- aom_obmc_sub_pixel_variance32x64)
- OBFP(BLOCK_32X32, aom_obmc_sad32x32, aom_obmc_variance32x32,
- aom_obmc_sub_pixel_variance32x32)
- OBFP(BLOCK_32X16, aom_obmc_sad32x16, aom_obmc_variance32x16,
- aom_obmc_sub_pixel_variance32x16)
- OBFP(BLOCK_16X32, aom_obmc_sad16x32, aom_obmc_variance16x32,
- aom_obmc_sub_pixel_variance16x32)
- OBFP(BLOCK_16X16, aom_obmc_sad16x16, aom_obmc_variance16x16,
- aom_obmc_sub_pixel_variance16x16)
- OBFP(BLOCK_16X8, aom_obmc_sad16x8, aom_obmc_variance16x8,
- aom_obmc_sub_pixel_variance16x8)
- OBFP(BLOCK_8X16, aom_obmc_sad8x16, aom_obmc_variance8x16,
- aom_obmc_sub_pixel_variance8x16)
- OBFP(BLOCK_8X8, aom_obmc_sad8x8, aom_obmc_variance8x8,
- aom_obmc_sub_pixel_variance8x8)
- OBFP(BLOCK_4X8, aom_obmc_sad4x8, aom_obmc_variance4x8,
- aom_obmc_sub_pixel_variance4x8)
- OBFP(BLOCK_8X4, aom_obmc_sad8x4, aom_obmc_variance8x4,
- aom_obmc_sub_pixel_variance8x4)
- OBFP(BLOCK_4X4, aom_obmc_sad4x4, aom_obmc_variance4x4,
- aom_obmc_sub_pixel_variance4x4)
- OBFP(BLOCK_4X16, aom_obmc_sad4x16, aom_obmc_variance4x16,
- aom_obmc_sub_pixel_variance4x16)
- OBFP(BLOCK_16X4, aom_obmc_sad16x4, aom_obmc_variance16x4,
- aom_obmc_sub_pixel_variance16x4)
- OBFP(BLOCK_8X32, aom_obmc_sad8x32, aom_obmc_variance8x32,
- aom_obmc_sub_pixel_variance8x32)
- OBFP(BLOCK_32X8, aom_obmc_sad32x8, aom_obmc_variance32x8,
- aom_obmc_sub_pixel_variance32x8)
- OBFP(BLOCK_16X64, aom_obmc_sad16x64, aom_obmc_variance16x64,
- aom_obmc_sub_pixel_variance16x64)
- OBFP(BLOCK_64X16, aom_obmc_sad64x16, aom_obmc_variance64x16,
- aom_obmc_sub_pixel_variance64x16)
-
-#define MBFP(BT, MCSDF, MCSVF) \
- cpi->fn_ptr[BT].msdf = MCSDF; \
- cpi->fn_ptr[BT].msvf = MCSVF;
-
- MBFP(BLOCK_128X128, aom_masked_sad128x128,
- aom_masked_sub_pixel_variance128x128)
- MBFP(BLOCK_128X64, aom_masked_sad128x64, aom_masked_sub_pixel_variance128x64)
- MBFP(BLOCK_64X128, aom_masked_sad64x128, aom_masked_sub_pixel_variance64x128)
- MBFP(BLOCK_64X64, aom_masked_sad64x64, aom_masked_sub_pixel_variance64x64)
- MBFP(BLOCK_64X32, aom_masked_sad64x32, aom_masked_sub_pixel_variance64x32)
- MBFP(BLOCK_32X64, aom_masked_sad32x64, aom_masked_sub_pixel_variance32x64)
- MBFP(BLOCK_32X32, aom_masked_sad32x32, aom_masked_sub_pixel_variance32x32)
- MBFP(BLOCK_32X16, aom_masked_sad32x16, aom_masked_sub_pixel_variance32x16)
- MBFP(BLOCK_16X32, aom_masked_sad16x32, aom_masked_sub_pixel_variance16x32)
- MBFP(BLOCK_16X16, aom_masked_sad16x16, aom_masked_sub_pixel_variance16x16)
- MBFP(BLOCK_16X8, aom_masked_sad16x8, aom_masked_sub_pixel_variance16x8)
- MBFP(BLOCK_8X16, aom_masked_sad8x16, aom_masked_sub_pixel_variance8x16)
- MBFP(BLOCK_8X8, aom_masked_sad8x8, aom_masked_sub_pixel_variance8x8)
- MBFP(BLOCK_4X8, aom_masked_sad4x8, aom_masked_sub_pixel_variance4x8)
- MBFP(BLOCK_8X4, aom_masked_sad8x4, aom_masked_sub_pixel_variance8x4)
- MBFP(BLOCK_4X4, aom_masked_sad4x4, aom_masked_sub_pixel_variance4x4)
-
- MBFP(BLOCK_4X16, aom_masked_sad4x16, aom_masked_sub_pixel_variance4x16)
-
- MBFP(BLOCK_16X4, aom_masked_sad16x4, aom_masked_sub_pixel_variance16x4)
-
- MBFP(BLOCK_8X32, aom_masked_sad8x32, aom_masked_sub_pixel_variance8x32)
-
- MBFP(BLOCK_32X8, aom_masked_sad32x8, aom_masked_sub_pixel_variance32x8)
-
- MBFP(BLOCK_16X64, aom_masked_sad16x64, aom_masked_sub_pixel_variance16x64)
-
- MBFP(BLOCK_64X16, aom_masked_sad64x16, aom_masked_sub_pixel_variance64x16)
-
- highbd_set_var_fns(cpi);
-
- /* av1_init_quantizer() is first called here. Add check in
- * av1_frame_init_quantizer() so that av1_init_quantizer is only
- * called later when needed. This will avoid unnecessary calls of
- * av1_init_quantizer() for every frame.
- */
- av1_init_quantizer(cpi);
- av1_qm_init(cm);
-
- av1_loop_filter_init(cm);
- cm->superres_scale_denominator = SCALE_NUMERATOR;
- cm->superres_upscaled_width = oxcf->width;
- cm->superres_upscaled_height = oxcf->height;
- av1_loop_restoration_precal();
-
- cm->error.setjmp = 0;
-
- return cpi;
-}
-
-#if CONFIG_INTERNAL_STATS
-#define SNPRINT(H, T) snprintf((H) + strlen(H), sizeof(H) - strlen(H), (T))
-
-#define SNPRINT2(H, T, V) \
- snprintf((H) + strlen(H), sizeof(H) - strlen(H), (T), (V))
-#endif // CONFIG_INTERNAL_STATS
-
-void av1_remove_compressor(AV1_COMP *cpi) {
- AV1_COMMON *cm;
- unsigned int i;
- int t;
-
- if (!cpi) return;
-
- cm = &cpi->common;
- const int num_planes = av1_num_planes(cm);
-
- if (cm->current_video_frame > 0) {
-#if CONFIG_ENTROPY_STATS
- if (cpi->oxcf.pass != 1) {
- fprintf(stderr, "Writing counts.stt\n");
- FILE *f = fopen("counts.stt", "wb");
- fwrite(&aggregate_fc, sizeof(aggregate_fc), 1, f);
- fclose(f);
- }
-#endif // CONFIG_ENTROPY_STATS
-#if CONFIG_INTERNAL_STATS
- aom_clear_system_state();
-
- if (cpi->oxcf.pass != 1) {
- char headings[512] = { 0 };
- char results[512] = { 0 };
- FILE *f = fopen("opsnr.stt", "a");
- double time_encoded =
- (cpi->last_end_time_stamp_seen - cpi->first_time_stamp_ever) /
- 10000000.000;
- double total_encode_time =
- (cpi->time_receive_data + cpi->time_compress_data) / 1000.000;
- const double dr =
- (double)cpi->bytes * (double)8 / (double)1000 / time_encoded;
- const double peak = (double)((1 << cpi->oxcf.input_bit_depth) - 1);
- const double target_rate = (double)cpi->oxcf.target_bandwidth / 1000;
- const double rate_err = ((100.0 * (dr - target_rate)) / target_rate);
-
- if (cpi->b_calculate_psnr) {
- const double total_psnr = aom_sse_to_psnr(
- (double)cpi->total_samples, peak, (double)cpi->total_sq_error);
- const double total_ssim =
- 100 * pow(cpi->summed_quality / cpi->summed_weights, 8.0);
- snprintf(headings, sizeof(headings),
- "Bitrate\tAVGPsnr\tGLBPsnr\tAVPsnrP\tGLPsnrP\t"
- "AOMSSIM\tVPSSIMP\tFASTSIM\tPSNRHVS\t"
- "WstPsnr\tWstSsim\tWstFast\tWstHVS\t"
- "AVPsrnY\tAPsnrCb\tAPsnrCr");
- snprintf(results, sizeof(results),
- "%7.2f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t"
- "%7.3f\t%7.3f\t%7.3f\t%7.3f\t"
- "%7.3f\t%7.3f\t%7.3f\t%7.3f\t"
- "%7.3f\t%7.3f\t%7.3f",
- dr, cpi->psnr.stat[STAT_ALL] / cpi->count, total_psnr,
- cpi->psnr.stat[STAT_ALL] / cpi->count, total_psnr, total_ssim,
- total_ssim, cpi->fastssim.stat[STAT_ALL] / cpi->count,
- cpi->psnrhvs.stat[STAT_ALL] / cpi->count, cpi->psnr.worst,
- cpi->worst_ssim, cpi->fastssim.worst, cpi->psnrhvs.worst,
- cpi->psnr.stat[STAT_Y] / cpi->count,
- cpi->psnr.stat[STAT_U] / cpi->count,
- cpi->psnr.stat[STAT_V] / cpi->count);
-
- if (cpi->b_calculate_blockiness) {
- SNPRINT(headings, "\t Block\tWstBlck");
- SNPRINT2(results, "\t%7.3f", cpi->total_blockiness / cpi->count);
- SNPRINT2(results, "\t%7.3f", cpi->worst_blockiness);
- }
-
- if (cpi->b_calculate_consistency) {
- double consistency =
- aom_sse_to_psnr((double)cpi->total_samples, peak,
- (double)cpi->total_inconsistency);
-
- SNPRINT(headings, "\tConsist\tWstCons");
- SNPRINT2(results, "\t%7.3f", consistency);
- SNPRINT2(results, "\t%7.3f", cpi->worst_consistency);
- }
- fprintf(f, "%s\t Time\tRcErr\tAbsErr\n", headings);
- fprintf(f, "%s\t%8.0f\t%7.2f\t%7.2f\n", results, total_encode_time,
- rate_err, fabs(rate_err));
- }
-
- fclose(f);
- }
-#endif // CONFIG_INTERNAL_STATS
- }
-
- for (t = 0; t < cpi->num_workers; ++t) {
- AVxWorker *const worker = &cpi->workers[t];
- EncWorkerData *const thread_data = &cpi->tile_thr_data[t];
-
- // Deallocate allocated threads.
- aom_get_worker_interface()->end(worker);
-
- // Deallocate allocated thread data.
- if (t < cpi->num_workers - 1) {
- aom_free(thread_data->td->palette_buffer);
- aom_free(thread_data->td->tmp_conv_dst);
- for (int j = 0; j < 2; ++j) {
- aom_free(thread_data->td->tmp_obmc_bufs[j]);
- }
- aom_free(thread_data->td->above_pred_buf);
- aom_free(thread_data->td->left_pred_buf);
- aom_free(thread_data->td->wsrc_buf);
- for (int x = 0; x < 2; x++) {
- for (int y = 0; y < 2; y++) {
- aom_free(thread_data->td->hash_value_buffer[x][y]);
- thread_data->td->hash_value_buffer[x][y] = NULL;
- }
- }
- aom_free(thread_data->td->mask_buf);
- aom_free(thread_data->td->counts);
- av1_free_pc_tree(thread_data->td, num_planes);
- aom_free(thread_data->td);
- }
- }
- aom_free(cpi->tile_thr_data);
- aom_free(cpi->workers);
-
- if (cpi->num_workers > 1) {
- av1_loop_filter_dealloc(&cpi->lf_row_sync);
- av1_loop_restoration_dealloc(&cpi->lr_row_sync, cpi->num_workers);
- }
-
- dealloc_compressor_data(cpi);
-
- for (i = 0; i < sizeof(cpi->mbgraph_stats) / sizeof(cpi->mbgraph_stats[0]);
- ++i) {
- aom_free(cpi->mbgraph_stats[i].mb_stats);
- }
-
-#if CONFIG_FP_MB_STATS
- if (cpi->use_fp_mb_stats) {
- aom_free(cpi->twopass.frame_mb_stats_buf);
- cpi->twopass.frame_mb_stats_buf = NULL;
- }
-#endif
-#if CONFIG_INTERNAL_STATS
- aom_free(cpi->ssim_vars);
- cpi->ssim_vars = NULL;
-#endif // CONFIG_INTERNAL_STATS
-
- av1_remove_common(cm);
- for (i = 0; i < FRAME_BUFFERS; ++i) {
- av1_hash_table_destroy(&cm->buffer_pool->frame_bufs[i].hash_table);
- }
- if (cpi->sf.use_hash_based_trellis) hbt_destroy();
- av1_free_ref_frame_buffers(cm->buffer_pool);
- aom_free(cpi);
-
-#ifdef OUTPUT_YUV_SKINMAP
- fclose(yuv_skinmap_file);
-#endif
-#ifdef OUTPUT_YUV_REC
- fclose(yuv_rec_file);
-#endif
-}
-
-static void generate_psnr_packet(AV1_COMP *cpi) {
- struct aom_codec_cx_pkt pkt;
- int i;
- PSNR_STATS psnr;
- aom_calc_highbd_psnr(cpi->source, cpi->common.frame_to_show, &psnr,
- cpi->td.mb.e_mbd.bd, cpi->oxcf.input_bit_depth);
-
- for (i = 0; i < 4; ++i) {
- pkt.data.psnr.samples[i] = psnr.samples[i];
- pkt.data.psnr.sse[i] = psnr.sse[i];
- pkt.data.psnr.psnr[i] = psnr.psnr[i];
- }
- pkt.kind = AOM_CODEC_PSNR_PKT;
- aom_codec_pkt_list_add(cpi->output_pkt_list, &pkt);
-}
-
-int av1_use_as_reference(AV1_COMP *cpi, int ref_frame_flags) {
- if (ref_frame_flags > ((1 << INTER_REFS_PER_FRAME) - 1)) return -1;
-
- cpi->ext_ref_frame_flags = ref_frame_flags;
- return 0;
-}
-
-void av1_update_reference(AV1_COMP *cpi, int ref_frame_upd_flags) {
- cpi->ext_refresh_last_frame = (ref_frame_upd_flags & AOM_LAST_FLAG) != 0;
- cpi->ext_refresh_golden_frame = (ref_frame_upd_flags & AOM_GOLD_FLAG) != 0;
- cpi->ext_refresh_alt_ref_frame = (ref_frame_upd_flags & AOM_ALT_FLAG) != 0;
- cpi->ext_refresh_bwd_ref_frame = (ref_frame_upd_flags & AOM_BWD_FLAG) != 0;
- cpi->ext_refresh_alt2_ref_frame = (ref_frame_upd_flags & AOM_ALT2_FLAG) != 0;
- cpi->ext_refresh_frame_flags_pending = 1;
-}
-
-int av1_copy_reference_enc(AV1_COMP *cpi, int idx, YV12_BUFFER_CONFIG *sd) {
- AV1_COMMON *const cm = &cpi->common;
- const int num_planes = av1_num_planes(cm);
- YV12_BUFFER_CONFIG *cfg = get_ref_frame(cm, idx);
- if (cfg) {
- aom_yv12_copy_frame(cfg, sd, num_planes);
- return 0;
- } else {
- return -1;
- }
-}
-
-int av1_set_reference_enc(AV1_COMP *cpi, int idx, YV12_BUFFER_CONFIG *sd) {
- AV1_COMMON *const cm = &cpi->common;
- const int num_planes = av1_num_planes(cm);
- YV12_BUFFER_CONFIG *cfg = get_ref_frame(cm, idx);
- if (cfg) {
- aom_yv12_copy_frame(sd, cfg, num_planes);
- return 0;
- } else {
- return -1;
- }
-}
-
-int av1_update_entropy(AV1_COMP *cpi, int update) {
- cpi->ext_refresh_frame_context = update;
- cpi->ext_refresh_frame_context_pending = 1;
- return 0;
-}
-
-#if defined(OUTPUT_YUV_DENOISED) || defined(OUTPUT_YUV_SKINMAP)
-// The denoiser buffer is allocated as a YUV 440 buffer. This function writes it
-// as YUV 420. We simply use the top-left pixels of the UV buffers, since we do
-// not denoise the UV channels at this time. If ever we implement UV channel
-// denoising we will have to modify this.
-void aom_write_yuv_frame_420(YV12_BUFFER_CONFIG *s, FILE *f) {
- uint8_t *src = s->y_buffer;
- int h = s->y_height;
-
- do {
- fwrite(src, s->y_width, 1, f);
- src += s->y_stride;
- } while (--h);
-
- src = s->u_buffer;
- h = s->uv_height;
-
- do {
- fwrite(src, s->uv_width, 1, f);
- src += s->uv_stride;
- } while (--h);
-
- src = s->v_buffer;
- h = s->uv_height;
-
- do {
- fwrite(src, s->uv_width, 1, f);
- src += s->uv_stride;
- } while (--h);
-}
-#endif
-
-static void check_show_existing_frame(AV1_COMP *cpi) {
- const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
- AV1_COMMON *const cm = &cpi->common;
- const FRAME_UPDATE_TYPE next_frame_update_type =
- gf_group->update_type[gf_group->index];
-#if USE_SYMM_MULTI_LAYER
- const int which_arf = (cpi->new_bwdref_update_rule == 1)
- ? gf_group->arf_update_idx[gf_group->index] > 0
- : gf_group->arf_update_idx[gf_group->index];
-#else
- const int which_arf = gf_group->arf_update_idx[gf_group->index];
-#endif
-
- if (cm->show_existing_frame == 1) {
- cm->show_existing_frame = 0;
- } else if (cpi->rc.is_last_bipred_frame) {
-#if USE_SYMM_MULTI_LAYER
- // NOTE: When new structure is used, every bwdref will have one overlay
- // frame. Therefore, there is no need to find out which frame to
- // show in advance.
- if (cpi->new_bwdref_update_rule == 0) {
-#endif
- // NOTE: If the current frame is a last bi-predictive frame, it is
- // needed next to show the BWDREF_FRAME, which is pointed by
- // the last_fb_idxes[0] after reference frame buffer update
- cpi->rc.is_last_bipred_frame = 0;
- cm->show_existing_frame = 1;
- cpi->existing_fb_idx_to_show = cpi->ref_fb_idx[0];
-#if USE_SYMM_MULTI_LAYER
- }
-#endif
- } else if (cpi->is_arf_filter_off[which_arf] &&
- (next_frame_update_type == OVERLAY_UPDATE ||
- next_frame_update_type == INTNL_OVERLAY_UPDATE)) {
-#if USE_SYMM_MULTI_LAYER
- const int bwdref_to_show =
- (cpi->new_bwdref_update_rule == 1) ? BWDREF_FRAME : ALTREF2_FRAME;
-#else
- const int bwdref_to_show = ALTREF2_FRAME;
-#endif
- // Other parameters related to OVERLAY_UPDATE will be taken care of
- // in av1_rc_get_second_pass_params(cpi)
- cm->show_existing_frame = 1;
- cpi->rc.is_src_frame_alt_ref = 1;
- cpi->existing_fb_idx_to_show = (next_frame_update_type == OVERLAY_UPDATE)
- ? cpi->ref_fb_idx[ALTREF_FRAME - 1]
- : cpi->ref_fb_idx[bwdref_to_show - 1];
-#if USE_SYMM_MULTI_LAYER
- if (cpi->new_bwdref_update_rule == 0)
-#endif
- cpi->is_arf_filter_off[which_arf] = 0;
- }
- cpi->rc.is_src_frame_ext_arf = 0;
-}
-
-#ifdef OUTPUT_YUV_REC
-void aom_write_one_yuv_frame(AV1_COMMON *cm, YV12_BUFFER_CONFIG *s) {
- uint8_t *src = s->y_buffer;
- int h = cm->height;
- if (yuv_rec_file == NULL) return;
- if (s->flags & YV12_FLAG_HIGHBITDEPTH) {
- uint16_t *src16 = CONVERT_TO_SHORTPTR(s->y_buffer);
-
- do {
- fwrite(src16, s->y_width, 2, yuv_rec_file);
- src16 += s->y_stride;
- } while (--h);
-
- src16 = CONVERT_TO_SHORTPTR(s->u_buffer);
- h = s->uv_height;
-
- do {
- fwrite(src16, s->uv_width, 2, yuv_rec_file);
- src16 += s->uv_stride;
- } while (--h);
-
- src16 = CONVERT_TO_SHORTPTR(s->v_buffer);
- h = s->uv_height;
-
- do {
- fwrite(src16, s->uv_width, 2, yuv_rec_file);
- src16 += s->uv_stride;
- } while (--h);
-
- fflush(yuv_rec_file);
- return;
- }
-
- do {
- fwrite(src, s->y_width, 1, yuv_rec_file);
- src += s->y_stride;
- } while (--h);
-
- src = s->u_buffer;
- h = s->uv_height;
-
- do {
- fwrite(src, s->uv_width, 1, yuv_rec_file);
- src += s->uv_stride;
- } while (--h);
-
- src = s->v_buffer;
- h = s->uv_height;
-
- do {
- fwrite(src, s->uv_width, 1, yuv_rec_file);
- src += s->uv_stride;
- } while (--h);
-
- fflush(yuv_rec_file);
-}
-#endif // OUTPUT_YUV_REC
-
-#define GM_RECODE_LOOP_NUM4X4_FACTOR 192
-static int recode_loop_test_global_motion(AV1_COMP *cpi) {
- int i;
- int recode = 0;
- RD_COUNTS *const rdc = &cpi->td.rd_counts;
- AV1_COMMON *const cm = &cpi->common;
- for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
- if (cm->global_motion[i].wmtype != IDENTITY &&
- rdc->global_motion_used[i] * GM_RECODE_LOOP_NUM4X4_FACTOR <
- cpi->gmparams_cost[i]) {
- cm->global_motion[i] = default_warp_params;
- assert(cm->global_motion[i].wmtype == IDENTITY);
- cpi->gmparams_cost[i] = 0;
- recode = 1;
- // TODO(sarahparker): The earlier condition for recoding here was:
- // "recode |= (rdc->global_motion_used[i] > 0);". Can we bring something
- // similar to that back to speed up global motion?
- }
- }
- return recode;
-}
-
-// Function to test for conditions that indicate we should loop
-// back and recode a frame.
-static int recode_loop_test(AV1_COMP *cpi, int high_limit, int low_limit, int q,
- int maxq, int minq) {
- const RATE_CONTROL *const rc = &cpi->rc;
- const AV1EncoderConfig *const oxcf = &cpi->oxcf;
- const int frame_is_kfgfarf = frame_is_kf_gf_arf(cpi);
- int force_recode = 0;
-
- if ((rc->projected_frame_size >= rc->max_frame_bandwidth) ||
- (cpi->sf.recode_loop == ALLOW_RECODE) ||
- (frame_is_kfgfarf && (cpi->sf.recode_loop == ALLOW_RECODE_KFARFGF))) {
- // TODO(agrange) high_limit could be greater than the scale-down threshold.
- if ((rc->projected_frame_size > high_limit && q < maxq) ||
- (rc->projected_frame_size < low_limit && q > minq)) {
- force_recode = 1;
- } else if (cpi->oxcf.rc_mode == AOM_CQ) {
- // Deal with frame undershoot and whether or not we are
- // below the automatically set cq level.
- if (q > oxcf->cq_level &&
- rc->projected_frame_size < ((rc->this_frame_target * 7) >> 3)) {
- force_recode = 1;
- }
- }
- }
- return force_recode;
-}
-
-#define DUMP_REF_FRAME_IMAGES 0
-
-#if DUMP_REF_FRAME_IMAGES == 1
-static int dump_one_image(AV1_COMMON *cm,
- const YV12_BUFFER_CONFIG *const ref_buf,
- char *file_name) {
- int h;
- FILE *f_ref = NULL;
-
- if (ref_buf == NULL) {
- printf("Frame data buffer is NULL.\n");
- return AOM_CODEC_MEM_ERROR;
- }
-
- if ((f_ref = fopen(file_name, "wb")) == NULL) {
- printf("Unable to open file %s to write.\n", file_name);
- return AOM_CODEC_MEM_ERROR;
- }
-
- // --- Y ---
- for (h = 0; h < cm->height; ++h) {
- fwrite(&ref_buf->y_buffer[h * ref_buf->y_stride], 1, cm->width, f_ref);
- }
- // --- U ---
- for (h = 0; h < (cm->height >> 1); ++h) {
- fwrite(&ref_buf->u_buffer[h * ref_buf->uv_stride], 1, (cm->width >> 1),
- f_ref);
- }
- // --- V ---
- for (h = 0; h < (cm->height >> 1); ++h) {
- fwrite(&ref_buf->v_buffer[h * ref_buf->uv_stride], 1, (cm->width >> 1),
- f_ref);
- }
-
- fclose(f_ref);
-
- return AOM_CODEC_OK;
-}
-
-static void dump_ref_frame_images(AV1_COMP *cpi) {
- AV1_COMMON *const cm = &cpi->common;
- MV_REFERENCE_FRAME ref_frame;
-
- for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
- char file_name[256] = "";
- snprintf(file_name, sizeof(file_name), "/tmp/enc_F%d_ref_%d.yuv",
- cm->current_video_frame, ref_frame);
- dump_one_image(cm, get_ref_frame_buffer(cpi, ref_frame), file_name);
- }
-}
-#endif // DUMP_REF_FRAME_IMAGES == 1
-
-// This function is used to shift the virtual indices of last reference frames
-// as follows:
-// LAST_FRAME -> LAST2_FRAME -> LAST3_FRAME
-// when the LAST_FRAME is updated.
-static INLINE void shift_last_ref_frames(AV1_COMP *cpi) {
- // TODO(isbs): shift the scaled indices as well
- int ref_frame;
- for (ref_frame = LAST_REF_FRAMES - 1; ref_frame > 0; --ref_frame) {
- cpi->ref_fb_idx[ref_frame] = cpi->ref_fb_idx[ref_frame - 1];
-
- // [0] is allocated to the current coded frame. The statistics for the
- // reference frames start at [LAST_FRAME], i.e. [1].
- if (!cpi->rc.is_src_frame_alt_ref) {
- memcpy(cpi->interp_filter_selected[ref_frame + LAST_FRAME],
- cpi->interp_filter_selected[ref_frame - 1 + LAST_FRAME],
- sizeof(cpi->interp_filter_selected[ref_frame - 1 + LAST_FRAME]));
- }
- }
-}
-
-#if USE_SYMM_MULTI_LAYER
-// This function is used to shift the virtual indices of bwd reference
-// frames as follows:
-// BWD_REF -> ALT2_REF -> EXT_REF
-// to clear a space to store the closest bwdref
-static INLINE void rshift_bwd_ref_frames(AV1_COMP *cpi) {
- // TODO(isbs): shift the scaled indices as well
- static const int ordered_bwd[3] = { BWDREF_FRAME - 1, ALTREF2_FRAME - 1,
- EXTREF_FRAME - 1 };
-
- for (int i = 2; i > 0; --i) {
- // [0] is allocated to the current coded frame, i.e. bwdref
- memcpy(
- cpi->interp_filter_selected[ordered_bwd[i] + LAST_FRAME],
- cpi->interp_filter_selected[ordered_bwd[i - 1] + LAST_FRAME],
- sizeof(cpi->interp_filter_selected[ordered_bwd[i - 1] + LAST_FRAME]));
-
- cpi->ref_fb_idx[ordered_bwd[i]] = cpi->ref_fb_idx[ordered_bwd[i - 1]];
- }
-}
-
-// This function is used to shift the virtual indices of bwd reference
-// frames as follows:
-// BWD_REF <- ALT2_REF <- EXT_REF
-// to update the bwd reference frame for coding the next frame.
-static INLINE void lshift_bwd_ref_frames(AV1_COMP *cpi) {
- // TODO(isbs): shift the scaled indices as well
- static const int ordered_bwd[3] = { BWDREF_FRAME - 1, ALTREF2_FRAME - 1,
- EXTREF_FRAME - 1 };
-
- for (int i = 0; i < 2; ++i) {
- // [0] is allocated to the current coded frame, i.e. bwdref
- memcpy(
- cpi->interp_filter_selected[ordered_bwd[i] + LAST_FRAME],
- cpi->interp_filter_selected[ordered_bwd[i + 1] + LAST_FRAME],
- sizeof(cpi->interp_filter_selected[ordered_bwd[i + 1] + LAST_FRAME]));
-
- cpi->ref_fb_idx[ordered_bwd[i]] = cpi->ref_fb_idx[ordered_bwd[i + 1]];
- }
-}
-#endif // USE_SYMM_MULTI_LAYER
-
-static void update_reference_frames(AV1_COMP *cpi) {
- AV1_COMMON *const cm = &cpi->common;
-
- // NOTE: Save the new show frame buffer index for --test-code=warn, i.e.,
- // for the purpose to verify no mismatch between encoder and decoder.
- if (cm->show_frame) cpi->last_show_frame_buf_idx = cm->new_fb_idx;
-
- // In the case of show_existing frame, we will not send fresh flag
- // to decoder. Any change in the reference frame buffer can be done by
- // switching the virtual indices.
- if (cm->show_existing_frame) {
- cpi->refresh_last_frame = 0;
- cpi->refresh_golden_frame = 0;
- cpi->refresh_bwd_ref_frame = 0;
- cpi->refresh_alt2_ref_frame = 0;
- cpi->refresh_alt_ref_frame = 0;
-
- cpi->rc.is_bwd_ref_frame = 0;
- cpi->rc.is_last_bipred_frame = 0;
- cpi->rc.is_bipred_frame = 0;
- }
-
- BufferPool *const pool = cm->buffer_pool;
-
- // At this point the new frame has been encoded.
- // If any buffer copy / swapping is signaled it should be done here.
-
- // Only update all of the reference buffers if a KEY_FRAME is also a
- // show_frame. This ensures a fwd keyframe does not update all of the buffers
- if ((cm->frame_type == KEY_FRAME && cm->show_frame) || frame_is_sframe(cm)) {
- for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
- ref_cnt_fb(pool->frame_bufs,
- &cm->ref_frame_map[cpi->ref_fb_idx[ref_frame]],
- cm->new_fb_idx);
- }
- return;
- }
-
- if (av1_preserve_existing_gf(cpi)) {
- // We have decided to preserve the previously existing golden frame as our
- // new ARF frame. However, in the short term in function
- // av1_bitstream.c::get_refresh_mask() we left it in the GF slot and, if
- // we're updating the GF with the current decoded frame, we save it to the
- // ARF slot instead.
- // We now have to update the ARF with the current frame and swap gld_fb_idx
- // and alt_fb_idx so that, overall, we've stored the old GF in the new ARF
- // slot and, if we're updating the GF, the current frame becomes the new GF.
- int tmp;
-
- // ARF in general is a better reference than overlay. We shouldkeep ARF as
- // reference instead of replacing it with overlay.
-
- if (!cpi->preserve_arf_as_gld) {
- ref_cnt_fb(pool->frame_bufs,
- &cm->ref_frame_map[cpi->ref_fb_idx[ALTREF_FRAME - 1]],
- cm->new_fb_idx);
- }
-
- tmp = cpi->ref_fb_idx[ALTREF_FRAME - 1];
- cpi->ref_fb_idx[ALTREF_FRAME - 1] = cpi->ref_fb_idx[GOLDEN_FRAME - 1];
- cpi->ref_fb_idx[GOLDEN_FRAME - 1] = tmp;
-
- // TODO(zoeliu): Do we need to copy cpi->interp_filter_selected[0] over to
- // cpi->interp_filter_selected[GOLDEN_FRAME]?
- } else if (cpi->rc.is_src_frame_ext_arf && cm->show_existing_frame) {
-#if CONFIG_DEBUG
- const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
- assert(gf_group->update_type[gf_group->index] == INTNL_OVERLAY_UPDATE);
-#endif
-#if USE_SYMM_MULTI_LAYER
- const int bwdref_to_show =
- (cpi->new_bwdref_update_rule == 1) ? BWDREF_FRAME : ALTREF2_FRAME;
-#else
- const int bwdref_to_show = ALTREF2_FRAME;
-#endif
- // Deal with the special case for showing existing internal ALTREF_FRAME
- // Refresh the LAST_FRAME with the ALTREF_FRAME and retire the LAST3_FRAME
- // by updating the virtual indices.
- const int tmp = cpi->ref_fb_idx[LAST_REF_FRAMES - 1];
- shift_last_ref_frames(cpi);
-
- cpi->ref_fb_idx[LAST_FRAME - 1] = cpi->ref_fb_idx[bwdref_to_show - 1];
-
- memcpy(cpi->interp_filter_selected[LAST_FRAME],
- cpi->interp_filter_selected[bwdref_to_show],
- sizeof(cpi->interp_filter_selected[bwdref_to_show]));
-#if USE_SYMM_MULTI_LAYER
- if (cpi->new_bwdref_update_rule == 1) {
- lshift_bwd_ref_frames(cpi);
- // pass outdated forward reference frame (previous LAST3) to the
- // spared space
- cpi->ref_fb_idx[EXTREF_FRAME - 1] = tmp;
- } else {
-#endif
- cpi->ref_fb_idx[bwdref_to_show - 1] = tmp;
-#if USE_SYMM_MULTI_LAYER
- }
-#endif
- } else { /* For non key/golden frames */
- // === ALTREF_FRAME ===
- if (cpi->refresh_alt_ref_frame) {
- int arf_idx = cpi->ref_fb_idx[ALTREF_FRAME - 1];
- ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[arf_idx], cm->new_fb_idx);
-
- memcpy(cpi->interp_filter_selected[ALTREF_FRAME],
- cpi->interp_filter_selected[0],
- sizeof(cpi->interp_filter_selected[0]));
- }
-
- // === GOLDEN_FRAME ===
- if (cpi->refresh_golden_frame) {
- ref_cnt_fb(pool->frame_bufs,
- &cm->ref_frame_map[cpi->ref_fb_idx[GOLDEN_FRAME - 1]],
- cm->new_fb_idx);
-
- memcpy(cpi->interp_filter_selected[GOLDEN_FRAME],
- cpi->interp_filter_selected[0],
- sizeof(cpi->interp_filter_selected[0]));
- }
-
- // === BWDREF_FRAME ===
- if (cpi->refresh_bwd_ref_frame) {
-#if USE_SYMM_MULTI_LAYER
- if (cpi->new_bwdref_update_rule) {
- // We shift the backward reference frame as follows:
- // BWDREF -> ALTREF2 -> EXTREF
- // and assign the newly coded frame to BWDREF so that it always
- // keeps the nearest future frame
- int tmp = cpi->ref_fb_idx[EXTREF_FRAME - 1];
- ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[tmp], cm->new_fb_idx);
-
- rshift_bwd_ref_frames(cpi);
- cpi->ref_fb_idx[BWDREF_FRAME - 1] = tmp;
- } else {
-#endif // USE_SYMM_MULTI_LAYER
- ref_cnt_fb(pool->frame_bufs,
- &cm->ref_frame_map[cpi->ref_fb_idx[BWDREF_FRAME - 1]],
- cm->new_fb_idx);
-#if USE_SYMM_MULTI_LAYER
- }
-#endif
- memcpy(cpi->interp_filter_selected[BWDREF_FRAME],
- cpi->interp_filter_selected[0],
- sizeof(cpi->interp_filter_selected[0]));
- }
-
- // === ALTREF2_FRAME ===
- if (cpi->refresh_alt2_ref_frame) {
- ref_cnt_fb(pool->frame_bufs,
- &cm->ref_frame_map[cpi->ref_fb_idx[ALTREF2_FRAME - 1]],
- cm->new_fb_idx);
-
- memcpy(cpi->interp_filter_selected[ALTREF2_FRAME],
- cpi->interp_filter_selected[0],
- sizeof(cpi->interp_filter_selected[0]));
- }
- }
-
- if (cpi->refresh_last_frame) {
- // NOTE(zoeliu): We have two layers of mapping (1) from the per-frame
- // reference to the reference frame buffer virtual index; and then (2) from
- // the virtual index to the reference frame buffer physical index:
- //
- // LAST_FRAME, ..., LAST3_FRAME, ..., ALTREF_FRAME
- // | | |
- // v v v
- // ref_fb_idx[0], ..., ref_fb_idx[2], ..., ref_fb_idx[ALTREF_FRAME-1]
- // | | |
- // v v v
- // ref_frame_map[], ..., ref_frame_map[], ..., ref_frame_map[]
- //
- // When refresh_last_frame is set, it is intended to retire LAST3_FRAME,
- // have the other 2 LAST reference frames shifted as follows:
- // LAST_FRAME -> LAST2_FRAME -> LAST3_FRAME
- // , and then have LAST_FRAME refreshed by the newly coded frame.
- //
- // To fulfill it, the decoder will be notified to execute following 2 steps:
- //
- // (a) To change ref_frame_map[] and have the virtual index of LAST3_FRAME
- // to point to the newly coded frame, i.e.
- // ref_frame_map[lst_fb_idexes[2]] => new_fb_idx;
- //
- // (b) To change the 1st layer mapping to have LAST_FRAME mapped to the
- // original virtual index of LAST3_FRAME and have the other mappings
- // shifted as follows:
- // LAST_FRAME, LAST2_FRAME, LAST3_FRAME
- // | | |
- // v v v
- // ref_fb_idx[2], ref_fb_idx[0], ref_fb_idx[1]
- int tmp;
-
- ref_cnt_fb(pool->frame_bufs,
- &cm->ref_frame_map[cpi->ref_fb_idx[LAST_REF_FRAMES - 1]],
- cm->new_fb_idx);
-
- tmp = cpi->ref_fb_idx[LAST_REF_FRAMES - 1];
-
- shift_last_ref_frames(cpi);
- cpi->ref_fb_idx[0] = tmp;
-
- assert(cm->show_existing_frame == 0);
- memcpy(cpi->interp_filter_selected[LAST_FRAME],
- cpi->interp_filter_selected[0],
- sizeof(cpi->interp_filter_selected[0]));
-
- // If the new structure is used, we will always have overlay frames coupled
- // with bwdref frames. Therefore, we won't have to perform this update
- // in advance (we do this update when the overlay frame shows up).
-#if USE_SYMM_MULTI_LAYER
- if (cpi->new_bwdref_update_rule == 0 && cpi->rc.is_last_bipred_frame) {
-#else
- if (cpi->rc.is_last_bipred_frame) {
-#endif
- // Refresh the LAST_FRAME with the BWDREF_FRAME and retire the
- // LAST3_FRAME by updating the virtual indices.
- //
- // NOTE: The source frame for BWDREF does not have a holding position as
- // the OVERLAY frame for ALTREF's. Hence, to resolve the reference
- // virtual index reshuffling for BWDREF, the encoder always
- // specifies a LAST_BIPRED right before BWDREF and completes the
- // reshuffling job accordingly.
- tmp = cpi->ref_fb_idx[LAST_REF_FRAMES - 1];
-
- shift_last_ref_frames(cpi);
- cpi->ref_fb_idx[0] = cpi->ref_fb_idx[BWDREF_FRAME - 1];
- cpi->ref_fb_idx[BWDREF_FRAME - 1] = tmp;
-
- memcpy(cpi->interp_filter_selected[LAST_FRAME],
- cpi->interp_filter_selected[BWDREF_FRAME],
- sizeof(cpi->interp_filter_selected[BWDREF_FRAME]));
- }
- }
-
-#if DUMP_REF_FRAME_IMAGES == 1
- // Dump out all reference frame images.
- dump_ref_frame_images(cpi);
-#endif // DUMP_REF_FRAME_IMAGES
-}
-
-static INLINE void alloc_frame_mvs(AV1_COMMON *const cm, int buffer_idx) {
- assert(buffer_idx != INVALID_IDX);
- RefCntBuffer *const new_fb_ptr = &cm->buffer_pool->frame_bufs[buffer_idx];
- ensure_mv_buffer(new_fb_ptr, cm);
- new_fb_ptr->width = cm->width;
- new_fb_ptr->height = cm->height;
-}
-
-static void scale_references(AV1_COMP *cpi) {
- AV1_COMMON *cm = &cpi->common;
- const int num_planes = av1_num_planes(cm);
- MV_REFERENCE_FRAME ref_frame;
- const AOM_REFFRAME ref_mask[INTER_REFS_PER_FRAME] = {
- AOM_LAST_FLAG, AOM_LAST2_FLAG, AOM_LAST3_FLAG, AOM_GOLD_FLAG,
- AOM_BWD_FLAG, AOM_ALT2_FLAG, AOM_ALT_FLAG
- };
-
- for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
- // Need to convert from AOM_REFFRAME to index into ref_mask (subtract 1).
- if (cpi->ref_frame_flags & ref_mask[ref_frame - 1]) {
- BufferPool *const pool = cm->buffer_pool;
- const YV12_BUFFER_CONFIG *const ref =
- get_ref_frame_buffer(cpi, ref_frame);
-
- if (ref == NULL) {
- cpi->scaled_ref_idx[ref_frame - 1] = INVALID_IDX;
- continue;
- }
-
- if (ref->y_crop_width != cm->width || ref->y_crop_height != cm->height) {
- RefCntBuffer *new_fb_ptr = NULL;
- int force_scaling = 0;
- int new_fb = cpi->scaled_ref_idx[ref_frame - 1];
- if (new_fb == INVALID_IDX) {
- new_fb = get_free_fb(cm);
- force_scaling = 1;
- }
- if (new_fb == INVALID_IDX) return;
- new_fb_ptr = &pool->frame_bufs[new_fb];
- if (force_scaling || new_fb_ptr->buf.y_crop_width != cm->width ||
- new_fb_ptr->buf.y_crop_height != cm->height) {
- if (aom_realloc_frame_buffer(
- &new_fb_ptr->buf, cm->width, cm->height,
- cm->seq_params.subsampling_x, cm->seq_params.subsampling_y,
- cm->seq_params.use_highbitdepth, AOM_BORDER_IN_PIXELS,
- cm->byte_alignment, NULL, NULL, NULL))
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
- "Failed to allocate frame buffer");
- av1_resize_and_extend_frame(
- ref, &new_fb_ptr->buf, (int)cm->seq_params.bit_depth, num_planes);
- cpi->scaled_ref_idx[ref_frame - 1] = new_fb;
- alloc_frame_mvs(cm, new_fb);
- }
- } else {
- const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
- RefCntBuffer *const buf = &pool->frame_bufs[buf_idx];
- buf->buf.y_crop_width = ref->y_crop_width;
- buf->buf.y_crop_height = ref->y_crop_height;
- cpi->scaled_ref_idx[ref_frame - 1] = buf_idx;
- ++buf->ref_count;
- }
- } else {
- if (cpi->oxcf.pass != 0) cpi->scaled_ref_idx[ref_frame - 1] = INVALID_IDX;
- }
- }
-}
-
-static void release_scaled_references(AV1_COMP *cpi) {
- AV1_COMMON *cm = &cpi->common;
- int i;
- // TODO(isbs): only refresh the necessary frames, rather than all of them
- for (i = 0; i < REF_FRAMES; ++i) {
- const int idx = cpi->scaled_ref_idx[i];
- RefCntBuffer *const buf =
- idx != INVALID_IDX ? &cm->buffer_pool->frame_bufs[idx] : NULL;
- if (buf != NULL) {
- --buf->ref_count;
- cpi->scaled_ref_idx[i] = INVALID_IDX;
- }
- }
-}
-
-static void set_mv_search_params(AV1_COMP *cpi) {
- const AV1_COMMON *const cm = &cpi->common;
- const unsigned int max_mv_def = AOMMIN(cm->width, cm->height);
-
- // Default based on max resolution.
- cpi->mv_step_param = av1_init_search_range(max_mv_def);
-
- if (cpi->sf.mv.auto_mv_step_size) {
- if (frame_is_intra_only(cm)) {
- // Initialize max_mv_magnitude for use in the first INTER frame
- // after a key/intra-only frame.
- cpi->max_mv_magnitude = max_mv_def;
- } else {
- if (cm->show_frame) {
- // Allow mv_steps to correspond to twice the max mv magnitude found
- // in the previous frame, capped by the default max_mv_magnitude based
- // on resolution.
- cpi->mv_step_param = av1_init_search_range(
- AOMMIN(max_mv_def, 2 * cpi->max_mv_magnitude));
- }
- cpi->max_mv_magnitude = 0;
- }
- }
-}
-
-static void set_size_independent_vars(AV1_COMP *cpi) {
- int i;
- for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
- cpi->common.global_motion[i] = default_warp_params;
- }
- cpi->global_motion_search_done = 0;
- av1_set_speed_features_framesize_independent(cpi);
- av1_set_rd_speed_thresholds(cpi);
- av1_set_rd_speed_thresholds_sub8x8(cpi);
- cpi->common.interp_filter = SWITCHABLE;
- cpi->common.switchable_motion_mode = 1;
-}
-
-static void set_size_dependent_vars(AV1_COMP *cpi, int *q, int *bottom_index,
- int *top_index) {
- AV1_COMMON *const cm = &cpi->common;
- const AV1EncoderConfig *const oxcf = &cpi->oxcf;
-
- // Setup variables that depend on the dimensions of the frame.
- av1_set_speed_features_framesize_dependent(cpi);
-
- // Decide q and q bounds.
- *q = av1_rc_pick_q_and_bounds(cpi, cm->width, cm->height, bottom_index,
- top_index);
-
- if (!frame_is_intra_only(cm)) {
- set_high_precision_mv(cpi, (*q) < HIGH_PRECISION_MV_QTHRESH,
- cpi->common.cur_frame_force_integer_mv);
- }
-
- // Configure experimental use of segmentation for enhanced coding of
- // static regions if indicated.
- // Only allowed in the second pass of a two pass encode, as it requires
- // lagged coding, and if the relevant speed feature flag is set.
- if (oxcf->pass == 2 && cpi->sf.static_segmentation)
- configure_static_seg_features(cpi);
-}
-
-static void init_motion_estimation(AV1_COMP *cpi) {
- int y_stride = cpi->scaled_source.y_stride;
-
- if (cpi->sf.mv.search_method == NSTEP) {
- av1_init3smotion_compensation(&cpi->ss_cfg, y_stride);
- } else if (cpi->sf.mv.search_method == DIAMOND) {
- av1_init_dsmotion_compensation(&cpi->ss_cfg, y_stride);
- }
-}
-
-#define COUPLED_CHROMA_FROM_LUMA_RESTORATION 0
-static void set_restoration_unit_size(int width, int height, int sx, int sy,
- RestorationInfo *rst) {
- (void)width;
- (void)height;
- (void)sx;
- (void)sy;
-#if COUPLED_CHROMA_FROM_LUMA_RESTORATION
- int s = AOMMIN(sx, sy);
-#else
- int s = 0;
-#endif // !COUPLED_CHROMA_FROM_LUMA_RESTORATION
-
- if (width * height > 352 * 288)
- rst[0].restoration_unit_size = RESTORATION_UNITSIZE_MAX;
- else
- rst[0].restoration_unit_size = (RESTORATION_UNITSIZE_MAX >> 1);
- rst[1].restoration_unit_size = rst[0].restoration_unit_size >> s;
- rst[2].restoration_unit_size = rst[1].restoration_unit_size;
-}
-
-static void init_ref_frame_bufs(AV1_COMP *cpi) {
- AV1_COMMON *const cm = &cpi->common;
- int i;
- BufferPool *const pool = cm->buffer_pool;
- cm->new_fb_idx = INVALID_IDX;
- for (i = 0; i < REF_FRAMES; ++i) {
- cm->ref_frame_map[i] = INVALID_IDX;
- pool->frame_bufs[i].ref_count = 0;
- }
- if (cm->seq_params.force_screen_content_tools) {
- for (i = 0; i < FRAME_BUFFERS; ++i) {
- av1_hash_table_init(&pool->frame_bufs[i].hash_table, &cpi->td.mb);
- }
- }
-}
-
-static void check_initial_width(AV1_COMP *cpi, int use_highbitdepth,
- int subsampling_x, int subsampling_y) {
- AV1_COMMON *const cm = &cpi->common;
- SequenceHeader *const seq_params = &cm->seq_params;
-
- if (!cpi->initial_width || seq_params->use_highbitdepth != use_highbitdepth ||
- seq_params->subsampling_x != subsampling_x ||
- seq_params->subsampling_y != subsampling_y) {
- seq_params->subsampling_x = subsampling_x;
- seq_params->subsampling_y = subsampling_y;
- seq_params->use_highbitdepth = use_highbitdepth;
-
- alloc_raw_frame_buffers(cpi);
- init_ref_frame_bufs(cpi);
- alloc_util_frame_buffers(cpi);
-
- init_motion_estimation(cpi); // TODO(agrange) This can be removed.
-
- cpi->initial_width = cm->width;
- cpi->initial_height = cm->height;
- cpi->initial_mbs = cm->MBs;
- }
-}
-
-// Returns 1 if the assigned width or height was <= 0.
-static int set_size_literal(AV1_COMP *cpi, int width, int height) {
- AV1_COMMON *cm = &cpi->common;
- const int num_planes = av1_num_planes(cm);
- check_initial_width(cpi, cm->seq_params.use_highbitdepth,
- cm->seq_params.subsampling_x,
- cm->seq_params.subsampling_y);
-
- if (width <= 0 || height <= 0) return 1;
-
- cm->width = width;
- cm->height = height;
-
- if (cpi->initial_width && cpi->initial_height &&
- (cm->width > cpi->initial_width || cm->height > cpi->initial_height)) {
- av1_free_context_buffers(cm);
- av1_free_pc_tree(&cpi->td, num_planes);
- alloc_compressor_data(cpi);
- realloc_segmentation_maps(cpi);
- cpi->initial_width = cpi->initial_height = 0;
- }
- update_frame_size(cpi);
-
- return 0;
-}
-
-static void set_frame_size(AV1_COMP *cpi, int width, int height) {
- AV1_COMMON *const cm = &cpi->common;
- const SequenceHeader *const seq_params = &cm->seq_params;
- const int num_planes = av1_num_planes(cm);
- MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
- int ref_frame;
-
- if (width != cm->width || height != cm->height) {
- // There has been a change in the encoded frame size
- set_size_literal(cpi, width, height);
- set_mv_search_params(cpi);
- // Recalculate 'all_lossless' in case super-resolution was (un)selected.
- cm->all_lossless = cm->coded_lossless && !av1_superres_scaled(cm);
- }
-
- if (cpi->oxcf.pass == 2) {
- av1_set_target_rate(cpi, cm->width, cm->height);
- }
-
- alloc_frame_mvs(cm, cm->new_fb_idx);
-
- // Allocate above context buffers
- if (cm->num_allocated_above_context_planes < av1_num_planes(cm) ||
- cm->num_allocated_above_context_mi_col < cm->mi_cols ||
- cm->num_allocated_above_contexts < cm->tile_rows) {
- av1_free_above_context_buffers(cm, cm->num_allocated_above_contexts);
- if (av1_alloc_above_context_buffers(cm, cm->tile_rows))
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
- "Failed to allocate context buffers");
- }
-
- // Reset the frame pointers to the current frame size.
- if (aom_realloc_frame_buffer(
- get_frame_new_buffer(cm), cm->width, cm->height,
- seq_params->subsampling_x, seq_params->subsampling_y,
- seq_params->use_highbitdepth, AOM_BORDER_IN_PIXELS,
- cm->byte_alignment, NULL, NULL, NULL))
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
- "Failed to allocate frame buffer");
-
- const int frame_width = cm->superres_upscaled_width;
- const int frame_height = cm->superres_upscaled_height;
- set_restoration_unit_size(frame_width, frame_height,
- seq_params->subsampling_x,
- seq_params->subsampling_y, cm->rst_info);
- for (int i = 0; i < num_planes; ++i)
- cm->rst_info[i].frame_restoration_type = RESTORE_NONE;
-
- av1_alloc_restoration_buffers(cm);
- alloc_util_frame_buffers(cpi); // TODO(afergs): Remove? Gets called anyways.
- init_motion_estimation(cpi);
-
- for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
- RefBuffer *const ref_buf = &cm->frame_refs[ref_frame - LAST_FRAME];
- const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
-
- ref_buf->idx = buf_idx;
-
- if (buf_idx != INVALID_IDX) {
- YV12_BUFFER_CONFIG *const buf = &cm->buffer_pool->frame_bufs[buf_idx].buf;
- ref_buf->buf = buf;
- av1_setup_scale_factors_for_frame(&ref_buf->sf, buf->y_crop_width,
- buf->y_crop_height, cm->width,
- cm->height);
- if (av1_is_scaled(&ref_buf->sf))
- aom_extend_frame_borders(buf, num_planes);
- } else {
- ref_buf->buf = NULL;
- }
- }
-
- av1_setup_scale_factors_for_frame(&cm->sf_identity, cm->width, cm->height,
- cm->width, cm->height);
-
- set_ref_ptrs(cm, xd, LAST_FRAME, LAST_FRAME);
-}
-
-static uint8_t calculate_next_resize_scale(const AV1_COMP *cpi) {
- // Choose an arbitrary random number
- static unsigned int seed = 56789;
- const AV1EncoderConfig *oxcf = &cpi->oxcf;
- if (oxcf->pass == 1) return SCALE_NUMERATOR;
- uint8_t new_denom = SCALE_NUMERATOR;
-
- if (cpi->common.seq_params.reduced_still_picture_hdr) return SCALE_NUMERATOR;
- switch (oxcf->resize_mode) {
- case RESIZE_NONE: new_denom = SCALE_NUMERATOR; break;
- case RESIZE_FIXED:
- if (cpi->common.frame_type == KEY_FRAME)
- new_denom = oxcf->resize_kf_scale_denominator;
- else
- new_denom = oxcf->resize_scale_denominator;
- break;
- case RESIZE_RANDOM: new_denom = lcg_rand16(&seed) % 9 + 8; break;
- default: assert(0);
- }
- return new_denom;
-}
-
-static uint8_t calculate_next_superres_scale(AV1_COMP *cpi) {
- // Choose an arbitrary random number
- static unsigned int seed = 34567;
- const AV1EncoderConfig *oxcf = &cpi->oxcf;
- if (oxcf->pass == 1) return SCALE_NUMERATOR;
- uint8_t new_denom = SCALE_NUMERATOR;
-
- // Make sure that superres mode of the frame is consistent with the
- // sequence-level flag.
- assert(IMPLIES(oxcf->superres_mode != SUPERRES_NONE,
- cpi->common.seq_params.enable_superres));
- assert(IMPLIES(!cpi->common.seq_params.enable_superres,
- oxcf->superres_mode == SUPERRES_NONE));
-
- switch (oxcf->superres_mode) {
- case SUPERRES_NONE: new_denom = SCALE_NUMERATOR; break;
- case SUPERRES_FIXED:
- if (cpi->common.frame_type == KEY_FRAME)
- new_denom = oxcf->superres_kf_scale_denominator;
- else
- new_denom = oxcf->superres_scale_denominator;
- break;
- case SUPERRES_RANDOM: new_denom = lcg_rand16(&seed) % 9 + 8; break;
- case SUPERRES_QTHRESH: {
- const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
- const RATE_FACTOR_LEVEL rf_level = gf_group->rf_level[gf_group->index];
- const double rate_factor_delta = rate_factor_deltas[rf_level];
- const int qthresh = (rate_factor_delta <= 1.0)
- ? oxcf->superres_qthresh
- : oxcf->superres_kf_qthresh;
- av1_set_target_rate(cpi, cpi->oxcf.width, cpi->oxcf.height);
- int bottom_index, top_index;
- const int q = av1_rc_pick_q_and_bounds(
- cpi, cpi->oxcf.width, cpi->oxcf.height, &bottom_index, &top_index);
- if (q < qthresh) {
- new_denom = SCALE_NUMERATOR;
- } else {
- const uint8_t min_denom = SCALE_NUMERATOR + 1;
- const uint8_t denom_step = (MAXQ - qthresh + 1) >> 3;
-
- if (q == qthresh) {
- new_denom = min_denom;
- } else if (denom_step == 0) {
- new_denom = SCALE_NUMERATOR << 1;
- } else {
- const uint8_t additional_denom = (q - qthresh) / denom_step;
- new_denom =
- AOMMIN(min_denom + additional_denom, SCALE_NUMERATOR << 1);
- }
- }
- break;
- }
- default: assert(0);
- }
- return new_denom;
-}
-
-static int dimension_is_ok(int orig_dim, int resized_dim, int denom) {
- return (resized_dim * SCALE_NUMERATOR >= orig_dim * denom / 2);
-}
-
-static int dimensions_are_ok(int owidth, int oheight, size_params_type *rsz) {
- // Only need to check the width, as scaling is horizontal only.
- (void)oheight;
- return dimension_is_ok(owidth, rsz->resize_width, rsz->superres_denom);
-}
-
-static int validate_size_scales(RESIZE_MODE resize_mode,
- SUPERRES_MODE superres_mode, int owidth,
- int oheight, size_params_type *rsz) {
- if (dimensions_are_ok(owidth, oheight, rsz)) { // Nothing to do.
- return 1;
- }
-
- // Calculate current resize scale.
- int resize_denom =
- AOMMAX(DIVIDE_AND_ROUND(owidth * SCALE_NUMERATOR, rsz->resize_width),
- DIVIDE_AND_ROUND(oheight * SCALE_NUMERATOR, rsz->resize_height));
-
- if (resize_mode != RESIZE_RANDOM && superres_mode == SUPERRES_RANDOM) {
- // Alter superres scale as needed to enforce conformity.
- rsz->superres_denom =
- (2 * SCALE_NUMERATOR * SCALE_NUMERATOR) / resize_denom;
- if (!dimensions_are_ok(owidth, oheight, rsz)) {
- if (rsz->superres_denom > SCALE_NUMERATOR) --rsz->superres_denom;
- }
- } else if (resize_mode == RESIZE_RANDOM && superres_mode != SUPERRES_RANDOM) {
- // Alter resize scale as needed to enforce conformity.
- resize_denom =
- (2 * SCALE_NUMERATOR * SCALE_NUMERATOR) / rsz->superres_denom;
- rsz->resize_width = owidth;
- rsz->resize_height = oheight;
- av1_calculate_scaled_size(&rsz->resize_width, &rsz->resize_height,
- resize_denom);
- if (!dimensions_are_ok(owidth, oheight, rsz)) {
- if (resize_denom > SCALE_NUMERATOR) {
- --resize_denom;
- rsz->resize_width = owidth;
- rsz->resize_height = oheight;
- av1_calculate_scaled_size(&rsz->resize_width, &rsz->resize_height,
- resize_denom);
- }
- }
- } else if (resize_mode == RESIZE_RANDOM && superres_mode == SUPERRES_RANDOM) {
- // Alter both resize and superres scales as needed to enforce conformity.
- do {
- if (resize_denom > rsz->superres_denom)
- --resize_denom;
- else
- --rsz->superres_denom;
- rsz->resize_width = owidth;
- rsz->resize_height = oheight;
- av1_calculate_scaled_size(&rsz->resize_width, &rsz->resize_height,
- resize_denom);
- } while (!dimensions_are_ok(owidth, oheight, rsz) &&
- (resize_denom > SCALE_NUMERATOR ||
- rsz->superres_denom > SCALE_NUMERATOR));
- } else { // We are allowed to alter neither resize scale nor superres
- // scale.
- return 0;
- }
- return dimensions_are_ok(owidth, oheight, rsz);
-}
-
-// Calculates resize and superres params for next frame
-size_params_type av1_calculate_next_size_params(AV1_COMP *cpi) {
- const AV1EncoderConfig *oxcf = &cpi->oxcf;
- size_params_type rsz = { oxcf->width, oxcf->height, SCALE_NUMERATOR };
- int resize_denom;
- if (oxcf->pass == 1) return rsz;
- if (cpi->resize_pending_width && cpi->resize_pending_height) {
- rsz.resize_width = cpi->resize_pending_width;
- rsz.resize_height = cpi->resize_pending_height;
- cpi->resize_pending_width = cpi->resize_pending_height = 0;
- } else {
- resize_denom = calculate_next_resize_scale(cpi);
- rsz.resize_width = cpi->oxcf.width;
- rsz.resize_height = cpi->oxcf.height;
- av1_calculate_scaled_size(&rsz.resize_width, &rsz.resize_height,
- resize_denom);
- }
- rsz.superres_denom = calculate_next_superres_scale(cpi);
- if (!validate_size_scales(oxcf->resize_mode, oxcf->superres_mode, oxcf->width,
- oxcf->height, &rsz))
- assert(0 && "Invalid scale parameters");
- return rsz;
-}
-
-static void setup_frame_size_from_params(AV1_COMP *cpi, size_params_type *rsz) {
- int encode_width = rsz->resize_width;
- int encode_height = rsz->resize_height;
-
- AV1_COMMON *cm = &cpi->common;
- cm->superres_upscaled_width = encode_width;
- cm->superres_upscaled_height = encode_height;
- cm->superres_scale_denominator = rsz->superres_denom;
- av1_calculate_scaled_superres_size(&encode_width, &encode_height,
- rsz->superres_denom);
- set_frame_size(cpi, encode_width, encode_height);
-}
-
-static void setup_frame_size(AV1_COMP *cpi) {
- size_params_type rsz = av1_calculate_next_size_params(cpi);
- setup_frame_size_from_params(cpi, &rsz);
-}
-
-static void superres_post_encode(AV1_COMP *cpi) {
- AV1_COMMON *cm = &cpi->common;
- const int num_planes = av1_num_planes(cm);
-
- if (!av1_superres_scaled(cm)) return;
-
- assert(cpi->oxcf.enable_superres);
- assert(!is_lossless_requested(&cpi->oxcf));
- assert(!cm->all_lossless);
-
- av1_superres_upscale(cm, NULL);
-
- // If regular resizing is occurring the source will need to be downscaled to
- // match the upscaled superres resolution. Otherwise the original source is
- // used.
- if (!av1_resize_scaled(cm)) {
- cpi->source = cpi->unscaled_source;
- if (cpi->last_source != NULL) cpi->last_source = cpi->unscaled_last_source;
- } else {
- assert(cpi->unscaled_source->y_crop_width != cm->superres_upscaled_width);
- assert(cpi->unscaled_source->y_crop_height != cm->superres_upscaled_height);
- // Do downscale. cm->(width|height) has been updated by
- // av1_superres_upscale
- if (aom_realloc_frame_buffer(
- &cpi->scaled_source, cm->superres_upscaled_width,
- cm->superres_upscaled_height, cm->seq_params.subsampling_x,
- cm->seq_params.subsampling_y, cm->seq_params.use_highbitdepth,
- AOM_BORDER_IN_PIXELS, cm->byte_alignment, NULL, NULL, NULL))
- aom_internal_error(
- &cm->error, AOM_CODEC_MEM_ERROR,
- "Failed to reallocate scaled source buffer for superres");
- assert(cpi->scaled_source.y_crop_width == cm->superres_upscaled_width);
- assert(cpi->scaled_source.y_crop_height == cm->superres_upscaled_height);
- av1_resize_and_extend_frame(cpi->unscaled_source, &cpi->scaled_source,
- (int)cm->seq_params.bit_depth, num_planes);
- cpi->source = &cpi->scaled_source;
- }
-}
-
-static void loopfilter_frame(AV1_COMP *cpi, AV1_COMMON *cm) {
- const int num_planes = av1_num_planes(cm);
- MACROBLOCKD *xd = &cpi->td.mb.e_mbd;
-
- assert(IMPLIES(is_lossless_requested(&cpi->oxcf),
- cm->coded_lossless && cm->all_lossless));
-
- const int no_loopfilter = cm->coded_lossless || cm->large_scale_tile;
- const int no_cdef =
- !cm->seq_params.enable_cdef || cm->coded_lossless || cm->large_scale_tile;
- const int no_restoration = !cm->seq_params.enable_restoration ||
- cm->all_lossless || cm->large_scale_tile;
-
- struct loopfilter *lf = &cm->lf;
-
- if (no_loopfilter) {
- lf->filter_level[0] = 0;
- lf->filter_level[1] = 0;
- } else {
- struct aom_usec_timer timer;
-
- aom_clear_system_state();
-
- aom_usec_timer_start(&timer);
-
- av1_pick_filter_level(cpi->source, cpi, cpi->sf.lpf_pick);
-
- aom_usec_timer_mark(&timer);
- cpi->time_pick_lpf += aom_usec_timer_elapsed(&timer);
- }
-
- if (lf->filter_level[0] || lf->filter_level[1]) {
-#if LOOP_FILTER_BITMASK
- av1_loop_filter_frame(cm->frame_to_show, cm, xd, 0, 0, num_planes, 0);
-#else
- if (cpi->num_workers > 1)
- av1_loop_filter_frame_mt(cm->frame_to_show, cm, xd, 0, num_planes, 0,
- cpi->workers, cpi->num_workers,
- &cpi->lf_row_sync);
- else
- av1_loop_filter_frame(cm->frame_to_show, cm, xd, 0, num_planes, 0);
-#endif
- }
-
- if (!no_restoration)
- av1_loop_restoration_save_boundary_lines(cm->frame_to_show, cm, 0);
-
- if (no_cdef) {
- cm->cdef_bits = 0;
- cm->cdef_strengths[0] = 0;
- cm->nb_cdef_strengths = 1;
- cm->cdef_uv_strengths[0] = 0;
- } else {
- // Find CDEF parameters
- av1_cdef_search(cm->frame_to_show, cpi->source, cm, xd,
- cpi->sf.fast_cdef_search);
-
- // Apply the filter
- av1_cdef_frame(cm->frame_to_show, cm, xd);
- }
-
- superres_post_encode(cpi);
-
- if (no_restoration) {
- cm->rst_info[0].frame_restoration_type = RESTORE_NONE;
- cm->rst_info[1].frame_restoration_type = RESTORE_NONE;
- cm->rst_info[2].frame_restoration_type = RESTORE_NONE;
- } else {
- av1_loop_restoration_save_boundary_lines(cm->frame_to_show, cm, 1);
- av1_pick_filter_restoration(cpi->source, cpi);
- if (cm->rst_info[0].frame_restoration_type != RESTORE_NONE ||
- cm->rst_info[1].frame_restoration_type != RESTORE_NONE ||
- cm->rst_info[2].frame_restoration_type != RESTORE_NONE) {
- if (cpi->num_workers > 1)
- av1_loop_restoration_filter_frame_mt(cm->frame_to_show, cm, 0,
- cpi->workers, cpi->num_workers,
- &cpi->lr_row_sync, &cpi->lr_ctxt);
- else
- av1_loop_restoration_filter_frame(cm->frame_to_show, cm, 0,
- &cpi->lr_ctxt);
- }
- }
-}
-
-static int encode_without_recode_loop(AV1_COMP *cpi) {
- AV1_COMMON *const cm = &cpi->common;
- int q = 0, bottom_index = 0, top_index = 0; // Dummy variables.
-
- aom_clear_system_state();
-
- set_size_independent_vars(cpi);
-
- setup_frame_size(cpi);
-
- assert(cm->width == cpi->scaled_source.y_crop_width);
- assert(cm->height == cpi->scaled_source.y_crop_height);
-
- set_size_dependent_vars(cpi, &q, &bottom_index, &top_index);
-
- cpi->source =
- av1_scale_if_required(cm, cpi->unscaled_source, &cpi->scaled_source);
- if (cpi->unscaled_last_source != NULL)
- cpi->last_source = av1_scale_if_required(cm, cpi->unscaled_last_source,
- &cpi->scaled_last_source);
- cpi->source->buf_8bit_valid = 0;
- if (frame_is_intra_only(cm) == 0) {
- scale_references(cpi);
- }
-
- av1_set_quantizer(cm, q);
- setup_frame(cpi);
- suppress_active_map(cpi);
-
- // Variance adaptive and in frame q adjustment experiments are mutually
- // exclusive.
- if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
- av1_vaq_frame_setup(cpi);
- } else if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) {
- av1_setup_in_frame_q_adj(cpi);
- } else if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
- av1_cyclic_refresh_setup(cpi);
- }
- apply_active_map(cpi);
- if (cm->seg.enabled) {
- if (!cm->seg.update_data && cm->prev_frame) {
- segfeatures_copy(&cm->seg, &cm->prev_frame->seg);
- } else {
- calculate_segdata(&cm->seg);
- }
- } else {
- memset(&cm->seg, 0, sizeof(cm->seg));
- }
- segfeatures_copy(&cm->cur_frame->seg, &cm->seg);
-
- // transform / motion compensation build reconstruction frame
- av1_encode_frame(cpi);
-
- // Update some stats from cyclic refresh, and check if we should not update
- // golden reference, for 1 pass CBR.
- if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->frame_type != KEY_FRAME &&
- (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == AOM_CBR))
- av1_cyclic_refresh_check_golden_update(cpi);
-
- // Update the skip mb flag probabilities based on the distribution
- // seen in the last encoder iteration.
- // update_base_skip_probs(cpi);
- aom_clear_system_state();
- return AOM_CODEC_OK;
-}
-
-static int encode_with_recode_loop(AV1_COMP *cpi, size_t *size, uint8_t *dest) {
- AV1_COMMON *const cm = &cpi->common;
- RATE_CONTROL *const rc = &cpi->rc;
- int bottom_index, top_index;
- int loop_count = 0;
- int loop_at_this_size = 0;
- int loop = 0;
- int overshoot_seen = 0;
- int undershoot_seen = 0;
- int frame_over_shoot_limit;
- int frame_under_shoot_limit;
- int q = 0, q_low = 0, q_high = 0;
-
- set_size_independent_vars(cpi);
-
- cpi->source->buf_8bit_valid = 0;
-
- aom_clear_system_state();
- setup_frame_size(cpi);
- set_size_dependent_vars(cpi, &q, &bottom_index, &top_index);
-
- do {
- aom_clear_system_state();
-
- if (loop_count == 0) {
- // TODO(agrange) Scale cpi->max_mv_magnitude if frame-size has changed.
- set_mv_search_params(cpi);
-
- // Reset the loop state for new frame size.
- overshoot_seen = 0;
- undershoot_seen = 0;
-
- q_low = bottom_index;
- q_high = top_index;
-
- loop_at_this_size = 0;
-
- // Decide frame size bounds first time through.
- av1_rc_compute_frame_size_bounds(cpi, rc->this_frame_target,
- &frame_under_shoot_limit,
- &frame_over_shoot_limit);
- }
-
- // if frame was scaled calculate global_motion_search again if already
- // done
- if (loop_count > 0 && cpi->source && cpi->global_motion_search_done)
- if (cpi->source->y_crop_width != cm->width ||
- cpi->source->y_crop_height != cm->height)
- cpi->global_motion_search_done = 0;
- cpi->source =
- av1_scale_if_required(cm, cpi->unscaled_source, &cpi->scaled_source);
- if (cpi->unscaled_last_source != NULL)
- cpi->last_source = av1_scale_if_required(cm, cpi->unscaled_last_source,
- &cpi->scaled_last_source);
-
- if (frame_is_intra_only(cm) == 0) {
- if (loop_count > 0) {
- release_scaled_references(cpi);
- }
- scale_references(cpi);
- }
- av1_set_quantizer(cm, q);
- // printf("Frame %d/%d: q = %d, frame_type = %d\n", cm->current_video_frame,
- // cm->show_frame, q, cm->frame_type);
-
- if (loop_count == 0) setup_frame(cpi);
-
- // Base q-index may have changed, so we need to assign proper default coef
- // probs before every iteration.
- if (cm->primary_ref_frame == PRIMARY_REF_NONE ||
- cm->frame_refs[cm->primary_ref_frame].idx < 0) {
- av1_default_coef_probs(cm);
- av1_setup_frame_contexts(cm);
- }
-
- // Variance adaptive and in frame q adjustment experiments are mutually
- // exclusive.
- if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
- av1_vaq_frame_setup(cpi);
- } else if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) {
- av1_setup_in_frame_q_adj(cpi);
- }
- if (cm->seg.enabled) {
- if (!cm->seg.update_data && cm->prev_frame) {
- segfeatures_copy(&cm->seg, &cm->prev_frame->seg);
- } else {
- calculate_segdata(&cm->seg);
- }
- } else {
- memset(&cm->seg, 0, sizeof(cm->seg));
- }
- segfeatures_copy(&cm->cur_frame->seg, &cm->seg);
-
- // transform / motion compensation build reconstruction frame
- save_coding_context(cpi);
- av1_encode_frame(cpi);
-
- // Update the skip mb flag probabilities based on the distribution
- // seen in the last encoder iteration.
- // update_base_skip_probs(cpi);
-
- aom_clear_system_state();
-
- // Dummy pack of the bitstream using up to date stats to get an
- // accurate estimate of output frame size to determine if we need
- // to recode.
- if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) {
- restore_coding_context(cpi);
-
- if (av1_pack_bitstream(cpi, dest, size) != AOM_CODEC_OK)
- return AOM_CODEC_ERROR;
-
- rc->projected_frame_size = (int)(*size) << 3;
- restore_coding_context(cpi);
-
- if (frame_over_shoot_limit == 0) frame_over_shoot_limit = 1;
- }
-
- if (cpi->oxcf.rc_mode == AOM_Q) {
- loop = 0;
- } else {
- if ((cm->frame_type == KEY_FRAME) && rc->this_key_frame_forced &&
- (rc->projected_frame_size < rc->max_frame_bandwidth)) {
- int last_q = q;
- int64_t kf_err;
-
- int64_t high_err_target = cpi->ambient_err;
- int64_t low_err_target = cpi->ambient_err >> 1;
-
- if (cm->seq_params.use_highbitdepth) {
- kf_err = aom_highbd_get_y_sse(cpi->source, get_frame_new_buffer(cm));
- } else {
- kf_err = aom_get_y_sse(cpi->source, get_frame_new_buffer(cm));
- }
- // Prevent possible divide by zero error below for perfect KF
- kf_err += !kf_err;
-
- // The key frame is not good enough or we can afford
- // to make it better without undue risk of popping.
- if ((kf_err > high_err_target &&
- rc->projected_frame_size <= frame_over_shoot_limit) ||
- (kf_err > low_err_target &&
- rc->projected_frame_size <= frame_under_shoot_limit)) {
- // Lower q_high
- q_high = q > q_low ? q - 1 : q_low;
-
- // Adjust Q
- q = (int)((q * high_err_target) / kf_err);
- q = AOMMIN(q, (q_high + q_low) >> 1);
- } else if (kf_err < low_err_target &&
- rc->projected_frame_size >= frame_under_shoot_limit) {
- // The key frame is much better than the previous frame
- // Raise q_low
- q_low = q < q_high ? q + 1 : q_high;
-
- // Adjust Q
- q = (int)((q * low_err_target) / kf_err);
- q = AOMMIN(q, (q_high + q_low + 1) >> 1);
- }
-
- // Clamp Q to upper and lower limits:
- q = clamp(q, q_low, q_high);
-
- loop = q != last_q;
- } else if (recode_loop_test(cpi, frame_over_shoot_limit,
- frame_under_shoot_limit, q,
- AOMMAX(q_high, top_index), bottom_index)) {
- // Is the projected frame size out of range and are we allowed
- // to attempt to recode.
- int last_q = q;
- int retries = 0;
-
- // Frame size out of permitted range:
- // Update correction factor & compute new Q to try...
- // Frame is too large
- if (rc->projected_frame_size > rc->this_frame_target) {
- // Special case if the projected size is > the max allowed.
- if (rc->projected_frame_size >= rc->max_frame_bandwidth)
- q_high = rc->worst_quality;
-
- // Raise Qlow as to at least the current value
- q_low = q < q_high ? q + 1 : q_high;
-
- if (undershoot_seen || loop_at_this_size > 1) {
- // Update rate_correction_factor unless
- av1_rc_update_rate_correction_factors(cpi, cm->width, cm->height);
-
- q = (q_high + q_low + 1) / 2;
- } else {
- // Update rate_correction_factor unless
- av1_rc_update_rate_correction_factors(cpi, cm->width, cm->height);
-
- q = av1_rc_regulate_q(cpi, rc->this_frame_target, bottom_index,
- AOMMAX(q_high, top_index), cm->width,
- cm->height);
-
- while (q < q_low && retries < 10) {
- av1_rc_update_rate_correction_factors(cpi, cm->width, cm->height);
- q = av1_rc_regulate_q(cpi, rc->this_frame_target, bottom_index,
- AOMMAX(q_high, top_index), cm->width,
- cm->height);
- retries++;
- }
- }
-
- overshoot_seen = 1;
- } else {
- // Frame is too small
- q_high = q > q_low ? q - 1 : q_low;
-
- if (overshoot_seen || loop_at_this_size > 1) {
- av1_rc_update_rate_correction_factors(cpi, cm->width, cm->height);
- q = (q_high + q_low) / 2;
- } else {
- av1_rc_update_rate_correction_factors(cpi, cm->width, cm->height);
- q = av1_rc_regulate_q(cpi, rc->this_frame_target, bottom_index,
- top_index, cm->width, cm->height);
- // Special case reset for qlow for constrained quality.
- // This should only trigger where there is very substantial
- // undershoot on a frame and the auto cq level is above
- // the user passsed in value.
- if (cpi->oxcf.rc_mode == AOM_CQ && q < q_low) {
- q_low = q;
- }
-
- while (q > q_high && retries < 10) {
- av1_rc_update_rate_correction_factors(cpi, cm->width, cm->height);
- q = av1_rc_regulate_q(cpi, rc->this_frame_target, bottom_index,
- top_index, cm->width, cm->height);
- retries++;
- }
- }
-
- undershoot_seen = 1;
- }
-
- // Clamp Q to upper and lower limits:
- q = clamp(q, q_low, q_high);
-
- loop = (q != last_q);
- } else {
- loop = 0;
- }
- }
-
- // Special case for overlay frame.
- if (rc->is_src_frame_alt_ref &&
- rc->projected_frame_size < rc->max_frame_bandwidth)
- loop = 0;
-
- if (!cpi->sf.gm_disable_recode) {
- if (recode_loop_test_global_motion(cpi)) loop = 1;
- }
-
- if (loop) {
- ++loop_count;
- ++loop_at_this_size;
-
-#if CONFIG_INTERNAL_STATS
- ++cpi->tot_recode_hits;
-#endif
- }
- } while (loop);
-
- return AOM_CODEC_OK;
-}
-
-static int get_ref_frame_flags(const AV1_COMP *cpi) {
- const int *const map = cpi->common.ref_frame_map;
-
- // No.1 Priority: LAST_FRAME
- const int last2_is_last = map[cpi->ref_fb_idx[1]] == map[cpi->ref_fb_idx[0]];
- const int last3_is_last = map[cpi->ref_fb_idx[2]] == map[cpi->ref_fb_idx[0]];
- const int gld_is_last =
- map[cpi->ref_fb_idx[GOLDEN_FRAME - 1]] == map[cpi->ref_fb_idx[0]];
- const int bwd_is_last =
- map[cpi->ref_fb_idx[BWDREF_FRAME - 1]] == map[cpi->ref_fb_idx[0]];
- const int alt2_is_last =
- map[cpi->ref_fb_idx[ALTREF2_FRAME - 1]] == map[cpi->ref_fb_idx[0]];
- const int alt_is_last =
- map[cpi->ref_fb_idx[ALTREF_FRAME - 1]] == map[cpi->ref_fb_idx[0]];
-
- // No.2 Priority: ALTREF_FRAME
- const int last2_is_alt =
- map[cpi->ref_fb_idx[1]] == map[cpi->ref_fb_idx[ALTREF_FRAME - 1]];
- const int last3_is_alt =
- map[cpi->ref_fb_idx[2]] == map[cpi->ref_fb_idx[ALTREF_FRAME - 1]];
- const int gld_is_alt = map[cpi->ref_fb_idx[GOLDEN_FRAME - 1]] ==
- map[cpi->ref_fb_idx[ALTREF_FRAME - 1]];
- const int bwd_is_alt = map[cpi->ref_fb_idx[BWDREF_FRAME - 1]] ==
- map[cpi->ref_fb_idx[ALTREF_FRAME - 1]];
- const int alt2_is_alt = map[cpi->ref_fb_idx[ALTREF2_FRAME - 1]] ==
- map[cpi->ref_fb_idx[ALTREF_FRAME - 1]];
-
- // No.3 Priority: LAST2_FRAME
- const int last3_is_last2 = map[cpi->ref_fb_idx[2]] == map[cpi->ref_fb_idx[1]];
- const int gld_is_last2 =
- map[cpi->ref_fb_idx[GOLDEN_FRAME - 1]] == map[cpi->ref_fb_idx[1]];
- const int bwd_is_last2 =
- map[cpi->ref_fb_idx[BWDREF_FRAME - 1]] == map[cpi->ref_fb_idx[1]];
- const int alt2_is_last2 =
- map[cpi->ref_fb_idx[ALTREF2_FRAME - 1]] == map[cpi->ref_fb_idx[1]];
-
- // No.4 Priority: LAST3_FRAME
- const int gld_is_last3 =
- map[cpi->ref_fb_idx[GOLDEN_FRAME - 1]] == map[cpi->ref_fb_idx[2]];
- const int bwd_is_last3 =
- map[cpi->ref_fb_idx[BWDREF_FRAME - 1]] == map[cpi->ref_fb_idx[2]];
- const int alt2_is_last3 =
- map[cpi->ref_fb_idx[ALTREF2_FRAME - 1]] == map[cpi->ref_fb_idx[2]];
-
- // No.5 Priority: GOLDEN_FRAME
- const int bwd_is_gld = map[cpi->ref_fb_idx[BWDREF_FRAME - 1]] ==
- map[cpi->ref_fb_idx[GOLDEN_FRAME - 1]];
- const int alt2_is_gld = map[cpi->ref_fb_idx[ALTREF2_FRAME - 1]] ==
- map[cpi->ref_fb_idx[GOLDEN_FRAME - 1]];
-
- // No.6 Priority: BWDREF_FRAME
- const int alt2_is_bwd = map[cpi->ref_fb_idx[ALTREF2_FRAME - 1]] ==
- map[cpi->ref_fb_idx[BWDREF_FRAME - 1]];
-
- // No.7 Priority: ALTREF2_FRAME
-
- // After av1_apply_encoding_flags() is called, cpi->ref_frame_flags might be
- // adjusted according to external encoder flags.
- int flags = cpi->ext_ref_frame_flags;
-
- if (cpi->rc.frames_till_gf_update_due == INT_MAX) flags &= ~AOM_GOLD_FLAG;
-
- if (alt_is_last) flags &= ~AOM_ALT_FLAG;
-
- if (last2_is_last || last2_is_alt) flags &= ~AOM_LAST2_FLAG;
-
- if (last3_is_last || last3_is_alt || last3_is_last2) flags &= ~AOM_LAST3_FLAG;
-
- if (gld_is_last || gld_is_alt || gld_is_last2 || gld_is_last3)
- flags &= ~AOM_GOLD_FLAG;
-
- if ((bwd_is_last || bwd_is_alt || bwd_is_last2 || bwd_is_last3 ||
- bwd_is_gld) &&
- (flags & AOM_BWD_FLAG))
- flags &= ~AOM_BWD_FLAG;
-
- if ((alt2_is_last || alt2_is_alt || alt2_is_last2 || alt2_is_last3 ||
- alt2_is_gld || alt2_is_bwd) &&
- (flags & AOM_ALT2_FLAG))
- flags &= ~AOM_ALT2_FLAG;
-
- return flags;
-}
-
-static void set_ext_overrides(AV1_COMP *cpi) {
- // Overrides the defaults with the externally supplied values with
- // av1_update_reference() and av1_update_entropy() calls
- // Note: The overrides are valid only for the next frame passed
- // to encode_frame_to_data_rate() function
- if (cpi->ext_use_s_frame) cpi->common.frame_type = S_FRAME;
- cpi->common.force_primary_ref_none = cpi->ext_use_primary_ref_none;
-
- if (cpi->ext_refresh_frame_context_pending) {
- cpi->common.refresh_frame_context = cpi->ext_refresh_frame_context;
- cpi->ext_refresh_frame_context_pending = 0;
- }
- if (cpi->ext_refresh_frame_flags_pending) {
- cpi->refresh_last_frame = cpi->ext_refresh_last_frame;
- cpi->refresh_golden_frame = cpi->ext_refresh_golden_frame;
- cpi->refresh_alt_ref_frame = cpi->ext_refresh_alt_ref_frame;
- cpi->refresh_bwd_ref_frame = cpi->ext_refresh_bwd_ref_frame;
- cpi->refresh_alt2_ref_frame = cpi->ext_refresh_alt2_ref_frame;
- cpi->ext_refresh_frame_flags_pending = 0;
- }
- cpi->common.allow_ref_frame_mvs = cpi->ext_use_ref_frame_mvs;
- // A keyframe is already error resilient and keyframes with
- // error_resilient_mode interferes with the use of show_existing_frame
- // when forward reference keyframes are enabled.
- cpi->common.error_resilient_mode =
- cpi->ext_use_error_resilient && cpi->common.frame_type != KEY_FRAME;
-}
-
-#define DUMP_RECON_FRAMES 0
-
-#if DUMP_RECON_FRAMES == 1
-// NOTE(zoeliu): For debug - Output the filtered reconstructed video.
-static void dump_filtered_recon_frames(AV1_COMP *cpi) {
- AV1_COMMON *const cm = &cpi->common;
- const YV12_BUFFER_CONFIG *recon_buf = cm->frame_to_show;
-
- if (recon_buf == NULL) {
- printf("Frame %d is not ready.\n", cm->current_video_frame);
- return;
- }
-
- static const int flag_list[REF_FRAMES] = { 0,
- AOM_LAST_FLAG,
- AOM_LAST2_FLAG,
- AOM_LAST3_FLAG,
- AOM_GOLD_FLAG,
- AOM_BWD_FLAG,
- AOM_ALT2_FLAG,
- AOM_ALT_FLAG };
- printf(
- "\n***Frame=%d (frame_offset=%d, show_frame=%d, "
- "show_existing_frame=%d) "
- "[LAST LAST2 LAST3 GOLDEN BWD ALT2 ALT]=[",
- cm->current_video_frame, cm->frame_offset, cm->show_frame,
- cm->show_existing_frame);
- for (int ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
- const int buf_idx = cm->frame_refs[ref_frame - LAST_FRAME].idx;
- const int ref_offset =
- (buf_idx >= 0)
- ? (int)cm->buffer_pool->frame_bufs[buf_idx].cur_frame_offset
- : -1;
- printf(
- " %d(%c-%d-%4.2f)", ref_offset,
- (cpi->ref_frame_flags & flag_list[ref_frame]) ? 'Y' : 'N',
- (buf_idx >= 0) ? (int)cpi->frame_rf_level[buf_idx] : -1,
- (buf_idx >= 0) ? rate_factor_deltas[cpi->frame_rf_level[buf_idx]] : -1);
- }
- printf(" ]\n");
-
- if (!cm->show_frame) {
- printf("Frame %d is a no show frame, so no image dump.\n",
- cm->current_video_frame);
- return;
- }
-
- int h;
- char file_name[256] = "/tmp/enc_filtered_recon.yuv";
- FILE *f_recon = NULL;
-
- if (cm->current_video_frame == 0) {
- if ((f_recon = fopen(file_name, "wb")) == NULL) {
- printf("Unable to open file %s to write.\n", file_name);
- return;
- }
- } else {
- if ((f_recon = fopen(file_name, "ab")) == NULL) {
- printf("Unable to open file %s to append.\n", file_name);
- return;
- }
- }
- printf(
- "\nFrame=%5d, encode_update_type[%5d]=%1d, frame_offset=%d, "
- "show_frame=%d, show_existing_frame=%d, source_alt_ref_active=%d, "
- "refresh_alt_ref_frame=%d, rf_level=%d, "
- "y_stride=%4d, uv_stride=%4d, cm->width=%4d, cm->height=%4d\n\n",
- cm->current_video_frame, cpi->twopass.gf_group.index,
- cpi->twopass.gf_group.update_type[cpi->twopass.gf_group.index],
- cm->frame_offset, cm->show_frame, cm->show_existing_frame,
- cpi->rc.source_alt_ref_active, cpi->refresh_alt_ref_frame,
- cpi->twopass.gf_group.rf_level[cpi->twopass.gf_group.index],
- recon_buf->y_stride, recon_buf->uv_stride, cm->width, cm->height);
-#if 0
- int ref_frame;
- printf("get_ref_frame_map_idx: [");
- for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame)
- printf(" %d", get_ref_frame_map_idx(cpi, ref_frame));
- printf(" ]\n");
- printf("cm->new_fb_idx = %d\n", cm->new_fb_idx);
- printf("cm->ref_frame_map = [");
- for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
- printf(" %d", cm->ref_frame_map[ref_frame - LAST_FRAME]);
- }
- printf(" ]\n");
-#endif // 0
-
- // --- Y ---
- for (h = 0; h < cm->height; ++h) {
- fwrite(&recon_buf->y_buffer[h * recon_buf->y_stride], 1, cm->width,
- f_recon);
- }
- // --- U ---
- for (h = 0; h < (cm->height >> 1); ++h) {
- fwrite(&recon_buf->u_buffer[h * recon_buf->uv_stride], 1, (cm->width >> 1),
- f_recon);
- }
- // --- V ---
- for (h = 0; h < (cm->height >> 1); ++h) {
- fwrite(&recon_buf->v_buffer[h * recon_buf->uv_stride], 1, (cm->width >> 1),
- f_recon);
- }
-
- fclose(f_recon);
-}
-#endif // DUMP_RECON_FRAMES
-
-static INLINE int is_frame_droppable(AV1_COMP *cpi) {
- return !(cpi->refresh_alt_ref_frame || cpi->refresh_alt2_ref_frame ||
- cpi->refresh_bwd_ref_frame || cpi->refresh_golden_frame ||
- cpi->refresh_last_frame);
-}
-
-static int encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size, uint8_t *dest,
- int skip_adapt,
- unsigned int *frame_flags) {
- AV1_COMMON *const cm = &cpi->common;
- SequenceHeader *const seq_params = &cm->seq_params;
- const AV1EncoderConfig *const oxcf = &cpi->oxcf;
- struct segmentation *const seg = &cm->seg;
-
- set_ext_overrides(cpi);
- aom_clear_system_state();
-
- // frame type has been decided outside of this function call
- cm->cur_frame->intra_only = frame_is_intra_only(cm);
- cm->cur_frame->frame_type = cm->frame_type;
-
- // S_FRAMEs are always error resilient
- cm->error_resilient_mode |= frame_is_sframe(cm);
-
- cm->large_scale_tile = cpi->oxcf.large_scale_tile;
- cm->single_tile_decoding = cpi->oxcf.single_tile_decoding;
- if (cm->large_scale_tile) seq_params->frame_id_numbers_present_flag = 0;
-
- cm->allow_ref_frame_mvs &= frame_might_allow_ref_frame_mvs(cm);
- // cm->allow_ref_frame_mvs needs to be written into the frame header while
- // cm->large_scale_tile is 1, therefore, "cm->large_scale_tile=1" case is
- // separated from frame_might_allow_ref_frame_mvs().
- cm->allow_ref_frame_mvs &= !cm->large_scale_tile;
-
- cm->allow_warped_motion =
- cpi->oxcf.allow_warped_motion && frame_might_allow_warped_motion(cm);
-
- // Reset the frame packet stamp index.
- if (cm->frame_type == KEY_FRAME && cm->show_frame)
- cm->current_video_frame = 0;
-
- // NOTE:
- // (1) Move the setup of the ref_frame_flags upfront as it would be
- // determined by the current frame properties;
- // (2) The setup of the ref_frame_flags applies to both
- // show_existing_frame's
- // and the other cases.
- if (cm->current_video_frame > 0)
- cpi->ref_frame_flags = get_ref_frame_flags(cpi);
-
- if (encode_show_existing_frame(cm)) {
- // NOTE(zoeliu): In BIDIR_PRED, the existing frame to show is the current
- // BWDREF_FRAME in the reference frame buffer.
- if (cm->frame_type == KEY_FRAME) {
- cm->reset_decoder_state = 1;
- } else {
- cm->frame_type = INTER_FRAME;
- }
- cm->show_frame = 1;
- cpi->frame_flags = *frame_flags;
-
- restore_coding_context(cpi);
-
- // Build the bitstream
- if (av1_pack_bitstream(cpi, dest, size) != AOM_CODEC_OK)
- return AOM_CODEC_ERROR;
-
- cpi->seq_params_locked = 1;
-
- // Set up frame to show to get ready for stats collection.
- cm->frame_to_show = get_frame_new_buffer(cm);
-
- // Update current frame offset.
- cm->frame_offset =
- cm->buffer_pool->frame_bufs[cm->new_fb_idx].cur_frame_offset;
-
-#if DUMP_RECON_FRAMES == 1
- // NOTE(zoeliu): For debug - Output the filtered reconstructed video.
- dump_filtered_recon_frames(cpi);
-#endif // DUMP_RECON_FRAMES
-
- // Update the LAST_FRAME in the reference frame buffer.
- // NOTE:
- // (1) For BWDREF_FRAME as the show_existing_frame, the reference frame
- // update has been done previously when handling the LAST_BIPRED_FRAME
- // right before BWDREF_FRAME (in the display order);
- // (2) For INTNL_OVERLAY as the show_existing_frame, the reference frame
- // update will be done when the following is called, which will
- // exchange
- // the virtual indexes between LAST_FRAME and ALTREF2_FRAME, so that
- // LAST3 will get retired, LAST2 becomes LAST3, LAST becomes LAST2,
- // and
- // ALTREF2_FRAME will serve as the new LAST_FRAME.
- update_reference_frames(cpi);
-
- // Update frame flags
- cpi->frame_flags &= ~FRAMEFLAGS_GOLDEN;
- cpi->frame_flags &= ~FRAMEFLAGS_BWDREF;
- cpi->frame_flags &= ~FRAMEFLAGS_ALTREF;
-
- *frame_flags = cpi->frame_flags & ~FRAMEFLAGS_KEY;
-
- // Update the frame type
- cm->last_frame_type = cm->frame_type;
-
- // Since we allocate a spot for the OVERLAY frame in the gf group, we need
- // to do post-encoding update accordingly.
- if (cpi->rc.is_src_frame_alt_ref) {
- av1_set_target_rate(cpi, cm->width, cm->height);
- av1_rc_postencode_update(cpi, *size);
- }
-
- ++cm->current_video_frame;
-
- return AOM_CODEC_OK;
- }
-
- // Set default state for segment based loop filter update flags.
- cm->lf.mode_ref_delta_update = 0;
-
- // Set various flags etc to special state if it is a key frame.
- if (frame_is_intra_only(cm) || frame_is_sframe(cm)) {
- // Reset the loop filter deltas and segmentation map.
- av1_reset_segment_features(cm);
-
- // If segmentation is enabled force a map update for key frames.
- if (seg->enabled) {
- seg->update_map = 1;
- seg->update_data = 1;
- }
-
- // The alternate reference frame cannot be active for a key frame.
- cpi->rc.source_alt_ref_active = 0;
- }
- if (cpi->oxcf.mtu == 0) {
- cm->num_tg = cpi->oxcf.num_tile_groups;
- } else {
- // Use a default value for the purposes of weighting costs in probability
- // updates
- cm->num_tg = DEFAULT_MAX_NUM_TG;
- }
-
- // For 1 pass CBR, check if we are dropping this frame.
- // Never drop on key frame.
- if (oxcf->pass == 0 && oxcf->rc_mode == AOM_CBR &&
- cm->frame_type != KEY_FRAME) {
- if (av1_rc_drop_frame(cpi)) {
- av1_rc_postencode_update_drop_frame(cpi);
- return AOM_CODEC_OK;
- }
- }
-
- aom_clear_system_state();
-
-#if CONFIG_INTERNAL_STATS
- memset(cpi->mode_chosen_counts, 0,
- MAX_MODES * sizeof(*cpi->mode_chosen_counts));
-#endif
-
- if (seq_params->frame_id_numbers_present_flag) {
- /* Non-normative definition of current_frame_id ("frame counter" with
- * wraparound) */
- const int frame_id_length = FRAME_ID_LENGTH;
- if (cm->current_frame_id == -1) {
- int lsb, msb;
- /* quasi-random initialization of current_frame_id for a key frame */
- if (cpi->source->flags & YV12_FLAG_HIGHBITDEPTH) {
- lsb = CONVERT_TO_SHORTPTR(cpi->source->y_buffer)[0] & 0xff;
- msb = CONVERT_TO_SHORTPTR(cpi->source->y_buffer)[1] & 0xff;
- } else {
- lsb = cpi->source->y_buffer[0] & 0xff;
- msb = cpi->source->y_buffer[1] & 0xff;
- }
- cm->current_frame_id = ((msb << 8) + lsb) % (1 << frame_id_length);
-
- // S_frame is meant for stitching different streams of different
- // resolutions together, so current_frame_id must be the
- // same across different streams of the same content current_frame_id
- // should be the same and not random. 0x37 is a chosen number as start
- // point
- if (cpi->oxcf.sframe_enabled) cm->current_frame_id = 0x37;
- } else {
- cm->current_frame_id =
- (cm->current_frame_id + 1 + (1 << frame_id_length)) %
- (1 << frame_id_length);
- }
- }
-
- switch (cpi->oxcf.cdf_update_mode) {
- case 0: // No CDF update for any frames(4~6% compression loss).
- cm->disable_cdf_update = 1;
- break;
- case 1: // Enable CDF update for all frames.
- cm->disable_cdf_update = 0;
- break;
- case 2:
- // Strategically determine at which frames to do CDF update.
- // Currently only enable CDF update for all-intra and no-show frames(1.5%
- // compression loss).
- // TODO(huisu@google.com): design schemes for various trade-offs between
- // compression quality and decoding speed.
- cm->disable_cdf_update =
- (frame_is_intra_only(cm) || !cm->show_frame) ? 0 : 1;
- break;
- }
- cm->timing_info_present &= !seq_params->reduced_still_picture_hdr;
-
- if (cpi->sf.recode_loop == DISALLOW_RECODE) {
- if (encode_without_recode_loop(cpi) != AOM_CODEC_OK) return AOM_CODEC_ERROR;
- } else {
- if (encode_with_recode_loop(cpi, size, dest) != AOM_CODEC_OK)
- return AOM_CODEC_ERROR;
- }
-
- cm->last_tile_cols = cm->tile_cols;
- cm->last_tile_rows = cm->tile_rows;
-
-#ifdef OUTPUT_YUV_SKINMAP
- if (cpi->common.current_video_frame > 1) {
- av1_compute_skin_map(cpi, yuv_skinmap_file);
- }
-#endif // OUTPUT_YUV_SKINMAP
-
- // Special case code to reduce pulsing when key frames are forced at a
- // fixed interval. Note the reconstruction error if it is the frame before
- // the force key frame
- if (cpi->rc.next_key_frame_forced && cpi->rc.frames_to_key == 1) {
- if (seq_params->use_highbitdepth) {
- cpi->ambient_err =
- aom_highbd_get_y_sse(cpi->source, get_frame_new_buffer(cm));
- } else {
- cpi->ambient_err = aom_get_y_sse(cpi->source, get_frame_new_buffer(cm));
- }
- }
-
- // If the encoder forced a KEY_FRAME decision or if frame is an S_FRAME
- if ((cm->frame_type == KEY_FRAME && cm->show_frame) || frame_is_sframe(cm)) {
- cpi->refresh_last_frame = 1;
- }
-
- cm->frame_to_show = get_frame_new_buffer(cm);
- cm->frame_to_show->color_primaries = seq_params->color_primaries;
- cm->frame_to_show->transfer_characteristics =
- seq_params->transfer_characteristics;
- cm->frame_to_show->matrix_coefficients = seq_params->matrix_coefficients;
- cm->frame_to_show->monochrome = seq_params->monochrome;
- cm->frame_to_show->chroma_sample_position =
- seq_params->chroma_sample_position;
- cm->frame_to_show->color_range = seq_params->color_range;
- cm->frame_to_show->render_width = cm->render_width;
- cm->frame_to_show->render_height = cm->render_height;
-
- // TODO(zoeliu): For non-ref frames, loop filtering may need to be turned
- // off.
-
- // Pick the loop filter level for the frame.
- if (!cm->allow_intrabc) {
- loopfilter_frame(cpi, cm);
- } else {
- cm->lf.filter_level[0] = 0;
- cm->lf.filter_level[1] = 0;
- cm->cdef_bits = 0;
- cm->cdef_strengths[0] = 0;
- cm->nb_cdef_strengths = 1;
- cm->cdef_uv_strengths[0] = 0;
- cm->rst_info[0].frame_restoration_type = RESTORE_NONE;
- cm->rst_info[1].frame_restoration_type = RESTORE_NONE;
- cm->rst_info[2].frame_restoration_type = RESTORE_NONE;
- }
-
- // TODO(debargha): Fix mv search range on encoder side
- // aom_extend_frame_inner_borders(cm->frame_to_show, av1_num_planes(cm));
- aom_extend_frame_borders(cm->frame_to_show, av1_num_planes(cm));
-
-#ifdef OUTPUT_YUV_REC
- aom_write_one_yuv_frame(cm, cm->frame_to_show);
-#endif
-
- // Build the bitstream
- if (av1_pack_bitstream(cpi, dest, size) != AOM_CODEC_OK)
- return AOM_CODEC_ERROR;
-
- cpi->seq_params_locked = 1;
-
- if (skip_adapt) return AOM_CODEC_OK;
-
- if (seq_params->frame_id_numbers_present_flag) {
- int i;
- // Update reference frame id values based on the value of refresh_frame_mask
- for (i = 0; i < REF_FRAMES; i++) {
- if ((cpi->refresh_frame_mask >> i) & 1) {
- cm->ref_frame_id[i] = cm->current_frame_id;
- }
- }
- }
-
-#if DUMP_RECON_FRAMES == 1
- // NOTE(zoeliu): For debug - Output the filtered reconstructed video.
- dump_filtered_recon_frames(cpi);
-#endif // DUMP_RECON_FRAMES
-
- if (cm->seg.enabled) {
- if (cm->seg.update_map) {
- update_reference_segmentation_map(cpi);
- } else if (cm->last_frame_seg_map) {
- memcpy(cm->current_frame_seg_map, cm->last_frame_seg_map,
- cm->mi_cols * cm->mi_rows * sizeof(uint8_t));
- }
- }
-
- if (frame_is_intra_only(cm) == 0) {
- release_scaled_references(cpi);
- }
-
- update_reference_frames(cpi);
-
-#if CONFIG_ENTROPY_STATS
- av1_accumulate_frame_counts(&aggregate_fc, &cpi->counts);
-#endif // CONFIG_ENTROPY_STATS
-
- if (cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) {
- *cm->fc = cpi->tile_data[cm->largest_tile_id].tctx;
- av1_reset_cdf_symbol_counters(cm->fc);
- }
-
- if (cpi->refresh_golden_frame == 1)
- cpi->frame_flags |= FRAMEFLAGS_GOLDEN;
- else
- cpi->frame_flags &= ~FRAMEFLAGS_GOLDEN;
-
- if (cpi->refresh_alt_ref_frame == 1)
- cpi->frame_flags |= FRAMEFLAGS_ALTREF;
- else
- cpi->frame_flags &= ~FRAMEFLAGS_ALTREF;
-
- if (cpi->refresh_bwd_ref_frame == 1)
- cpi->frame_flags |= FRAMEFLAGS_BWDREF;
- else
- cpi->frame_flags &= ~FRAMEFLAGS_BWDREF;
-
- cm->last_frame_type = cm->frame_type;
-
- av1_rc_postencode_update(cpi, *size);
-
- if (cm->frame_type == KEY_FRAME) {
- // Tell the caller that the frame was coded as a key frame
- *frame_flags = cpi->frame_flags | FRAMEFLAGS_KEY;
- } else {
- *frame_flags = cpi->frame_flags & ~FRAMEFLAGS_KEY;
- }
-
- // Clear the one shot update flags for segmentation map and mode/ref loop
- // filter deltas.
- cm->seg.update_map = 0;
- cm->seg.update_data = 0;
- cm->lf.mode_ref_delta_update = 0;
-
- // A droppable frame might not be shown but it always
- // takes a space in the gf group. Therefore, even when
- // it is not shown, we still need update the count down.
-
- if (cm->show_frame) {
- // TODO(zoeliu): We may only swamp mi and prev_mi for those frames that
- // are
- // being used as reference.
- swap_mi_and_prev_mi(cm);
- // Don't increment frame counters if this was an altref buffer
- // update not a real frame
-
- ++cm->current_video_frame;
- }
-
- // NOTE: Shall not refer to any frame not used as reference.
- if (cm->is_reference_frame) {
- // keep track of the last coded dimensions
- cm->last_width = cm->width;
- cm->last_height = cm->height;
-
- // reset to normal state now that we are done.
- cm->last_show_frame = cm->show_frame;
- }
-
- return AOM_CODEC_OK;
-}
-
-static INLINE void update_keyframe_counters(AV1_COMP *cpi) {
- // TODO(zoeliu): To investigate whether we should treat BWDREF_FRAME
- // differently here for rc->avg_frame_bandwidth.
- if (cpi->common.show_frame || cpi->rc.is_bwd_ref_frame) {
- if (!cpi->common.show_existing_frame || cpi->rc.is_src_frame_alt_ref ||
- cpi->common.frame_type == KEY_FRAME) {
- // If this is a show_existing_frame with a source other than altref,
- // or if it is not a displayed forward keyframe, the keyframe update
- // counters were incremented when it was originally encoded.
- cpi->rc.frames_since_key++;
- cpi->rc.frames_to_key--;
- }
- }
-}
-
-static INLINE void update_frames_till_gf_update(AV1_COMP *cpi) {
- // TODO(weitinglin): Updating this counter for is_frame_droppable
- // is a work-around to handle the condition when a frame is drop.
- // We should fix the cpi->common.show_frame flag
- // instead of checking the other condition to update the counter properly.
- if (cpi->common.show_frame || is_frame_droppable(cpi)) {
- // Decrement count down till next gf
- if (cpi->rc.frames_till_gf_update_due > 0)
- cpi->rc.frames_till_gf_update_due--;
- }
-}
-
-static INLINE void update_twopass_gf_group_index(AV1_COMP *cpi) {
- // Increment the gf group index ready for the next frame. If this is
- // a show_existing_frame with a source other than altref, or if it is not
- // a displayed forward keyframe, the index was incremented when it was
- // originally encoded.
- if (!cpi->common.show_existing_frame || cpi->rc.is_src_frame_alt_ref ||
- cpi->common.frame_type == KEY_FRAME) {
- ++cpi->twopass.gf_group.index;
- }
-}
-
-static void update_rc_counts(AV1_COMP *cpi) {
- update_keyframe_counters(cpi);
- update_frames_till_gf_update(cpi);
- if (cpi->oxcf.pass == 2) update_twopass_gf_group_index(cpi);
-}
-
-static int Pass0Encode(AV1_COMP *cpi, size_t *size, uint8_t *dest,
- int skip_adapt, unsigned int *frame_flags) {
- if (cpi->oxcf.rc_mode == AOM_CBR) {
- av1_rc_get_one_pass_cbr_params(cpi);
- } else {
- av1_rc_get_one_pass_vbr_params(cpi);
- }
- if (encode_frame_to_data_rate(cpi, size, dest, skip_adapt, frame_flags) !=
- AOM_CODEC_OK) {
- return AOM_CODEC_ERROR;
- }
- update_rc_counts(cpi);
- check_show_existing_frame(cpi);
- return AOM_CODEC_OK;
-}
-
-static int Pass2Encode(AV1_COMP *cpi, size_t *size, uint8_t *dest,
- unsigned int *frame_flags) {
-#if CONFIG_MISMATCH_DEBUG
- mismatch_move_frame_idx_w();
-#endif
-#if TXCOEFF_COST_TIMER
- AV1_COMMON *cm = &cpi->common;
- cm->txcoeff_cost_timer = 0;
- cm->txcoeff_cost_count = 0;
-#endif
-
- if (encode_frame_to_data_rate(cpi, size, dest, 0, frame_flags) !=
- AOM_CODEC_OK) {
- return AOM_CODEC_ERROR;
- }
-
-#if TXCOEFF_COST_TIMER
- cm->cum_txcoeff_cost_timer += cm->txcoeff_cost_timer;
- fprintf(stderr,
- "\ntxb coeff cost block number: %ld, frame time: %ld, cum time %ld "
- "in us\n",
- cm->txcoeff_cost_count, cm->txcoeff_cost_timer,
- cm->cum_txcoeff_cost_timer);
-#endif
-
- av1_twopass_postencode_update(cpi);
- update_rc_counts(cpi);
- check_show_existing_frame(cpi);
- return AOM_CODEC_OK;
-}
-
-#if CONFIG_DENOISE
-static int apply_denoise_2d(AV1_COMP *cpi, YV12_BUFFER_CONFIG *sd,
- int block_size, float noise_level,
- int64_t time_stamp, int64_t end_time) {
- AV1_COMMON *const cm = &cpi->common;
- if (!cpi->denoise_and_model) {
- cpi->denoise_and_model = aom_denoise_and_model_alloc(
- cm->seq_params.bit_depth, block_size, noise_level);
- if (!cpi->denoise_and_model) {
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
- "Error allocating denoise and model");
- return -1;
- }
- }
- if (!cpi->film_grain_table) {
- cpi->film_grain_table = aom_malloc(sizeof(*cpi->film_grain_table));
- if (!cpi->film_grain_table) {
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
- "Error allocating grain table");
- return -1;
- }
- memset(cpi->film_grain_table, 0, sizeof(*cpi->film_grain_table));
- }
- if (aom_denoise_and_model_run(cpi->denoise_and_model, sd,
- &cm->film_grain_params)) {
- if (cm->film_grain_params.apply_grain) {
- aom_film_grain_table_append(cpi->film_grain_table, time_stamp, end_time,
- &cm->film_grain_params);
- }
- }
- return 0;
-}
-#endif
-
-int av1_receive_raw_frame(AV1_COMP *cpi, aom_enc_frame_flags_t frame_flags,
- YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
- int64_t end_time) {
- AV1_COMMON *const cm = &cpi->common;
- const SequenceHeader *const seq_params = &cm->seq_params;
- struct aom_usec_timer timer;
- int res = 0;
- const int subsampling_x = sd->subsampling_x;
- const int subsampling_y = sd->subsampling_y;
- const int use_highbitdepth = (sd->flags & YV12_FLAG_HIGHBITDEPTH) != 0;
-
- check_initial_width(cpi, use_highbitdepth, subsampling_x, subsampling_y);
-
- aom_usec_timer_start(&timer);
-
-#if CONFIG_DENOISE
- if (cpi->oxcf.noise_level > 0)
- if (apply_denoise_2d(cpi, sd, cpi->oxcf.noise_block_size,
- cpi->oxcf.noise_level, time_stamp, end_time) < 0)
- res = -1;
-#endif // CONFIG_DENOISE
-
- if (av1_lookahead_push(cpi->lookahead, sd, time_stamp, end_time,
- use_highbitdepth, frame_flags))
- res = -1;
- aom_usec_timer_mark(&timer);
- cpi->time_receive_data += aom_usec_timer_elapsed(&timer);
-
- if ((seq_params->profile == PROFILE_0) && !seq_params->monochrome &&
- (subsampling_x != 1 || subsampling_y != 1)) {
- aom_internal_error(&cm->error, AOM_CODEC_INVALID_PARAM,
- "Non-4:2:0 color format requires profile 1 or 2");
- res = -1;
- }
- if ((seq_params->profile == PROFILE_1) &&
- !(subsampling_x == 0 && subsampling_y == 0)) {
- aom_internal_error(&cm->error, AOM_CODEC_INVALID_PARAM,
- "Profile 1 requires 4:4:4 color format");
- res = -1;
- }
- if ((seq_params->profile == PROFILE_2) &&
- (seq_params->bit_depth <= AOM_BITS_10) &&
- !(subsampling_x == 1 && subsampling_y == 0)) {
- aom_internal_error(&cm->error, AOM_CODEC_INVALID_PARAM,
- "Profile 2 bit-depth < 10 requires 4:2:2 color format");
- res = -1;
- }
-
- return res;
-}
-
-static int frame_is_reference(const AV1_COMP *cpi) {
- const AV1_COMMON *cm = &cpi->common;
-
- return cm->frame_type == KEY_FRAME || cpi->refresh_last_frame ||
- cpi->refresh_golden_frame || cpi->refresh_bwd_ref_frame ||
- cpi->refresh_alt2_ref_frame || cpi->refresh_alt_ref_frame ||
- !cm->error_resilient_mode || cm->lf.mode_ref_delta_update ||
- cm->seg.update_map || cm->seg.update_data;
-}
-
-static void adjust_frame_rate(AV1_COMP *cpi,
- const struct lookahead_entry *source) {
- int64_t this_duration;
- int step = 0;
-
- if (source->ts_start == cpi->first_time_stamp_ever) {
- this_duration = source->ts_end - source->ts_start;
- step = 1;
- } else {
- int64_t last_duration =
- cpi->last_end_time_stamp_seen - cpi->last_time_stamp_seen;
-
- this_duration = source->ts_end - cpi->last_end_time_stamp_seen;
-
- // do a step update if the duration changes by 10%
- if (last_duration)
- step = (int)((this_duration - last_duration) * 10 / last_duration);
- }
-
- if (this_duration) {
- if (step) {
- av1_new_framerate(cpi, 10000000.0 / this_duration);
- } else {
- // Average this frame's rate into the last second's average
- // frame rate. If we haven't seen 1 second yet, then average
- // over the whole interval seen.
- const double interval = AOMMIN(
- (double)(source->ts_end - cpi->first_time_stamp_ever), 10000000.0);
- double avg_duration = 10000000.0 / cpi->framerate;
- avg_duration *= (interval - avg_duration + this_duration);
- avg_duration /= interval;
-
- av1_new_framerate(cpi, 10000000.0 / avg_duration);
- }
- }
- cpi->last_time_stamp_seen = source->ts_start;
- cpi->last_end_time_stamp_seen = source->ts_end;
-}
-
-// Returns 0 if this is not an alt ref else the offset of the source frame
-// used as the arf midpoint.
-static int get_arf_src_index(AV1_COMP *cpi) {
- RATE_CONTROL *const rc = &cpi->rc;
- int arf_src_index = 0;
- if (is_altref_enabled(cpi)) {
- if (cpi->oxcf.pass == 2) {
- const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
- if (gf_group->update_type[gf_group->index] == ARF_UPDATE) {
- arf_src_index = gf_group->arf_src_offset[gf_group->index];
- }
- } else if (rc->source_alt_ref_pending) {
- arf_src_index = rc->frames_till_gf_update_due;
- }
- }
- return arf_src_index;
-}
-
-static int get_brf_src_index(AV1_COMP *cpi) {
- int brf_src_index = 0;
- const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
-
- // TODO(zoeliu): We need to add the check on the -bwd_ref command line setup
- // flag.
- if (gf_group->bidir_pred_enabled[gf_group->index]) {
- if (cpi->oxcf.pass == 2) {
- if (gf_group->update_type[gf_group->index] == BRF_UPDATE)
- brf_src_index = gf_group->brf_src_offset[gf_group->index];
- } else {
- // TODO(zoeliu): To re-visit the setup for this scenario
- brf_src_index = cpi->rc.bipred_group_interval - 1;
- }
- }
-
- return brf_src_index;
-}
-
-// Returns 0 if this is not an alt ref else the offset of the source frame
-// used as the arf midpoint.
-static int get_arf2_src_index(AV1_COMP *cpi) {
- int arf2_src_index = 0;
- if (is_altref_enabled(cpi) && cpi->num_extra_arfs) {
- if (cpi->oxcf.pass == 2) {
- const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
- if (gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE) {
- arf2_src_index = gf_group->arf_src_offset[gf_group->index];
- }
- }
- }
- return arf2_src_index;
-}
-
-static void check_src_altref(AV1_COMP *cpi,
- const struct lookahead_entry *source) {
- RATE_CONTROL *const rc = &cpi->rc;
-
- // If pass == 2, the parameters set here will be reset in
- // av1_rc_get_second_pass_params()
-
- if (cpi->oxcf.pass == 2) {
- const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
- rc->is_src_frame_alt_ref =
- (gf_group->update_type[gf_group->index] == INTNL_OVERLAY_UPDATE) ||
- (gf_group->update_type[gf_group->index] == OVERLAY_UPDATE);
- rc->is_src_frame_ext_arf =
- gf_group->update_type[gf_group->index] == INTNL_OVERLAY_UPDATE;
- } else {
- rc->is_src_frame_alt_ref =
- cpi->alt_ref_source && (source == cpi->alt_ref_source);
- }
-
- if (rc->is_src_frame_alt_ref) {
- // Current frame is an ARF overlay frame.
- cpi->alt_ref_source = NULL;
-
- if (rc->is_src_frame_ext_arf && !cpi->common.show_existing_frame) {
- // For INTNL_OVERLAY, when show_existing_frame == 0, they do need to
- // refresh the LAST_FRAME, i.e. LAST3 gets retired, LAST2 becomes LAST3,
- // LAST becomes LAST2, and INTNL_OVERLAY becomes LAST.
- cpi->refresh_last_frame = 1;
- } else {
- // Don't refresh the last buffer for an ARF overlay frame. It will
- // become the GF so preserve last as an alternative prediction option.
- cpi->refresh_last_frame = 0;
- }
- }
-}
-
-#if CONFIG_INTERNAL_STATS
-extern double av1_get_blockiness(const unsigned char *img1, int img1_pitch,
- const unsigned char *img2, int img2_pitch,
- int width, int height);
-
-static void adjust_image_stat(double y, double u, double v, double all,
- ImageStat *s) {
- s->stat[STAT_Y] += y;
- s->stat[STAT_U] += u;
- s->stat[STAT_V] += v;
- s->stat[STAT_ALL] += all;
- s->worst = AOMMIN(s->worst, all);
-}
-
-static void compute_internal_stats(AV1_COMP *cpi, int frame_bytes) {
- AV1_COMMON *const cm = &cpi->common;
- double samples = 0.0;
- uint32_t in_bit_depth = 8;
- uint32_t bit_depth = 8;
-
-#if CONFIG_INTER_STATS_ONLY
- if (cm->frame_type == KEY_FRAME) return; // skip key frame
-#endif
- cpi->bytes += frame_bytes;
-
- if (cm->seq_params.use_highbitdepth) {
- in_bit_depth = cpi->oxcf.input_bit_depth;
- bit_depth = cm->seq_params.bit_depth;
- }
- if (cm->show_frame) {
- const YV12_BUFFER_CONFIG *orig = cpi->source;
- const YV12_BUFFER_CONFIG *recon = cpi->common.frame_to_show;
- double y, u, v, frame_all;
-
- cpi->count++;
- if (cpi->b_calculate_psnr) {
- PSNR_STATS psnr;
- double frame_ssim2 = 0.0, weight = 0.0;
- aom_clear_system_state();
- // TODO(yaowu): unify these two versions into one.
- aom_calc_highbd_psnr(orig, recon, &psnr, bit_depth, in_bit_depth);
-
- adjust_image_stat(psnr.psnr[1], psnr.psnr[2], psnr.psnr[3], psnr.psnr[0],
- &cpi->psnr);
- cpi->total_sq_error += psnr.sse[0];
- cpi->total_samples += psnr.samples[0];
- samples = psnr.samples[0];
- // TODO(yaowu): unify these two versions into one.
- if (cm->seq_params.use_highbitdepth)
- frame_ssim2 =
- aom_highbd_calc_ssim(orig, recon, &weight, bit_depth, in_bit_depth);
- else
- frame_ssim2 = aom_calc_ssim(orig, recon, &weight);
-
- cpi->worst_ssim = AOMMIN(cpi->worst_ssim, frame_ssim2);
- cpi->summed_quality += frame_ssim2 * weight;
- cpi->summed_weights += weight;
-
-#if 0
- {
- FILE *f = fopen("q_used.stt", "a");
- double y2 = psnr.psnr[1];
- double u2 = psnr.psnr[2];
- double v2 = psnr.psnr[3];
- double frame_psnr2 = psnr.psnr[0];
- fprintf(f, "%5d : Y%f7.3:U%f7.3:V%f7.3:F%f7.3:S%7.3f\n",
- cm->current_video_frame, y2, u2, v2,
- frame_psnr2, frame_ssim2);
- fclose(f);
- }
-#endif
- }
- if (cpi->b_calculate_blockiness) {
- if (!cm->seq_params.use_highbitdepth) {
- const double frame_blockiness =
- av1_get_blockiness(orig->y_buffer, orig->y_stride, recon->y_buffer,
- recon->y_stride, orig->y_width, orig->y_height);
- cpi->worst_blockiness = AOMMAX(cpi->worst_blockiness, frame_blockiness);
- cpi->total_blockiness += frame_blockiness;
- }
-
- if (cpi->b_calculate_consistency) {
- if (!cm->seq_params.use_highbitdepth) {
- const double this_inconsistency = aom_get_ssim_metrics(
- orig->y_buffer, orig->y_stride, recon->y_buffer, recon->y_stride,
- orig->y_width, orig->y_height, cpi->ssim_vars, &cpi->metrics, 1);
-
- const double peak = (double)((1 << in_bit_depth) - 1);
- const double consistency =
- aom_sse_to_psnr(samples, peak, cpi->total_inconsistency);
- if (consistency > 0.0)
- cpi->worst_consistency =
- AOMMIN(cpi->worst_consistency, consistency);
- cpi->total_inconsistency += this_inconsistency;
- }
- }
- }
-
- frame_all =
- aom_calc_fastssim(orig, recon, &y, &u, &v, bit_depth, in_bit_depth);
- adjust_image_stat(y, u, v, frame_all, &cpi->fastssim);
- frame_all = aom_psnrhvs(orig, recon, &y, &u, &v, bit_depth, in_bit_depth);
- adjust_image_stat(y, u, v, frame_all, &cpi->psnrhvs);
- }
-}
-#endif // CONFIG_INTERNAL_STATS
-
-static int is_integer_mv(AV1_COMP *cpi, const YV12_BUFFER_CONFIG *cur_picture,
- const YV12_BUFFER_CONFIG *last_picture,
- hash_table *last_hash_table) {
- aom_clear_system_state();
- // check use hash ME
- int k;
- uint32_t hash_value_1;
- uint32_t hash_value_2;
-
- const int block_size = 8;
- const double threshold_current = 0.8;
- const double threshold_average = 0.95;
- const int max_history_size = 32;
- int T = 0; // total block
- int C = 0; // match with collocated block
- int S = 0; // smooth region but not match with collocated block
- int M = 0; // match with other block
-
- const int pic_width = cur_picture->y_width;
- const int pic_height = cur_picture->y_height;
- for (int i = 0; i + block_size <= pic_height; i += block_size) {
- for (int j = 0; j + block_size <= pic_width; j += block_size) {
- const int x_pos = j;
- const int y_pos = i;
- int match = 1;
- T++;
-
- // check whether collocated block match with current
- uint8_t *p_cur = cur_picture->y_buffer;
- uint8_t *p_ref = last_picture->y_buffer;
- int stride_cur = cur_picture->y_stride;
- int stride_ref = last_picture->y_stride;
- p_cur += (y_pos * stride_cur + x_pos);
- p_ref += (y_pos * stride_ref + x_pos);
-
- if (cur_picture->flags & YV12_FLAG_HIGHBITDEPTH) {
- uint16_t *p16_cur = CONVERT_TO_SHORTPTR(p_cur);
- uint16_t *p16_ref = CONVERT_TO_SHORTPTR(p_ref);
- for (int tmpY = 0; tmpY < block_size && match; tmpY++) {
- for (int tmpX = 0; tmpX < block_size && match; tmpX++) {
- if (p16_cur[tmpX] != p16_ref[tmpX]) {
- match = 0;
- }
- }
- p16_cur += stride_cur;
- p16_ref += stride_ref;
- }
- } else {
- for (int tmpY = 0; tmpY < block_size && match; tmpY++) {
- for (int tmpX = 0; tmpX < block_size && match; tmpX++) {
- if (p_cur[tmpX] != p_ref[tmpX]) {
- match = 0;
- }
- }
- p_cur += stride_cur;
- p_ref += stride_ref;
- }
- }
-
- if (match) {
- C++;
- continue;
- }
-
- if (av1_hash_is_horizontal_perfect(cur_picture, block_size, x_pos,
- y_pos) ||
- av1_hash_is_vertical_perfect(cur_picture, block_size, x_pos, y_pos)) {
- S++;
- continue;
- }
-
- av1_get_block_hash_value(
- cur_picture->y_buffer + y_pos * stride_cur + x_pos, stride_cur,
- block_size, &hash_value_1, &hash_value_2,
- (cur_picture->flags & YV12_FLAG_HIGHBITDEPTH), &cpi->td.mb);
- // Hashing does not work for highbitdepth currently.
- // TODO(Roger): Make it work for highbitdepth.
- if (av1_use_hash_me(&cpi->common)) {
- if (av1_has_exact_match(last_hash_table, hash_value_1, hash_value_2)) {
- M++;
- }
- }
- }
- }
-
- assert(T > 0);
- double csm_rate = ((double)(C + S + M)) / ((double)(T));
- double m_rate = ((double)(M)) / ((double)(T));
-
- cpi->csm_rate_array[cpi->rate_index] = csm_rate;
- cpi->m_rate_array[cpi->rate_index] = m_rate;
-
- cpi->rate_index = (cpi->rate_index + 1) % max_history_size;
- cpi->rate_size++;
- cpi->rate_size = AOMMIN(cpi->rate_size, max_history_size);
-
- if (csm_rate < threshold_current) {
- return 0;
- }
-
- if (C == T) {
- return 1;
- }
-
- double csm_average = 0.0;
- double m_average = 0.0;
-
- for (k = 0; k < cpi->rate_size; k++) {
- csm_average += cpi->csm_rate_array[k];
- m_average += cpi->m_rate_array[k];
- }
- csm_average /= cpi->rate_size;
- m_average /= cpi->rate_size;
-
- if (csm_average < threshold_average) {
- return 0;
- }
-
- if (M > (T - C - S) / 3) {
- return 1;
- }
-
- if (csm_rate > 0.99 && m_rate > 0.01) {
- return 1;
- }
-
- if (csm_average + m_average > 1.01) {
- return 1;
- }
-
- return 0;
-}
-
-int av1_get_compressed_data(AV1_COMP *cpi, unsigned int *frame_flags,
- size_t *size, uint8_t *dest, int64_t *time_stamp,
- int64_t *time_end, int flush,
- const aom_rational_t *timebase) {
- const AV1EncoderConfig *const oxcf = &cpi->oxcf;
- AV1_COMMON *const cm = &cpi->common;
- const int num_planes = av1_num_planes(cm);
- BufferPool *const pool = cm->buffer_pool;
- RATE_CONTROL *const rc = &cpi->rc;
- struct aom_usec_timer cmptimer;
- YV12_BUFFER_CONFIG *force_src_buffer = NULL;
- struct lookahead_entry *last_source = NULL;
- struct lookahead_entry *source = NULL;
- int arf_src_index;
- int brf_src_index;
- int i;
-
-#if CONFIG_BITSTREAM_DEBUG
- assert(cpi->oxcf.max_threads == 0 &&
- "bitstream debug tool does not support multithreading");
- bitstream_queue_record_write();
- bitstream_queue_set_frame_write(cm->current_video_frame * 2 + cm->show_frame);
-#endif
-
- cm->showable_frame = 0;
- aom_usec_timer_start(&cmptimer);
-
- set_high_precision_mv(cpi, ALTREF_HIGH_PRECISION_MV, 0);
-
- // Normal defaults
- cm->refresh_frame_context = oxcf->frame_parallel_decoding_mode
- ? REFRESH_FRAME_CONTEXT_DISABLED
- : REFRESH_FRAME_CONTEXT_BACKWARD;
- if (oxcf->large_scale_tile)
- cm->refresh_frame_context = REFRESH_FRAME_CONTEXT_DISABLED;
-
- // default reference buffers update config
- av1_configure_buffer_updates_firstpass(cpi, LF_UPDATE);
-
- // Initialize fields related to forward keyframes
- cpi->no_show_kf = 0;
- cm->reset_decoder_state = 0;
-
- // Don't allow a show_existing_frame to coincide with an error resilient or
- // S-Frame. An exception can be made in the case of a keyframe, since it
- // does not depend on any previous frames. We must make this exception here
- // because of the use of show_existing_frame with forward coded keyframes.
- struct lookahead_entry *lookahead_src = NULL;
- if (cm->current_video_frame > 0)
- lookahead_src = av1_lookahead_peek(cpi->lookahead, 0);
-
- int use_show_existing = 1;
- if (lookahead_src != NULL) {
- const int is_error_resilient =
- cpi->oxcf.error_resilient_mode ||
- (lookahead_src->flags & AOM_EFLAG_ERROR_RESILIENT);
- const int is_s_frame = cpi->oxcf.s_frame_mode ||
- (lookahead_src->flags & AOM_EFLAG_SET_S_FRAME);
- const int is_key_frame =
- (rc->frames_to_key == 0) || (cpi->frame_flags & FRAMEFLAGS_KEY);
- use_show_existing = !(is_error_resilient || is_s_frame) || is_key_frame;
- }
-
- if (oxcf->pass == 2 && cm->show_existing_frame && use_show_existing) {
- // Manage the source buffer and flush out the source frame that has been
- // coded already; Also get prepared for PSNR calculation if needed.
- if ((source = av1_lookahead_pop(cpi->lookahead, flush)) == NULL) {
- *size = 0;
- return -1;
- }
- av1_apply_encoding_flags(cpi, source->flags);
- cpi->source = &source->img;
- // TODO(zoeliu): To track down to determine whether it's needed to adjust
- // the frame rate.
- *time_stamp = source->ts_start;
- *time_end = source->ts_end;
-
- // We need to adjust frame rate for an overlay frame
- if (cpi->rc.is_src_frame_alt_ref) adjust_frame_rate(cpi, source);
-
- // Find a free buffer for the new frame, releasing the reference
- // previously
- // held.
- if (cm->new_fb_idx != INVALID_IDX) {
- --pool->frame_bufs[cm->new_fb_idx].ref_count;
- }
- cm->new_fb_idx = get_free_fb(cm);
-
- if (cm->new_fb_idx == INVALID_IDX) return -1;
-
- // Clear down mmx registers
- aom_clear_system_state();
-
- // Start with a 0 size frame.
- *size = 0;
-
- // We need to update the gf_group for show_existing overlay frame
- if (cpi->rc.is_src_frame_alt_ref) av1_rc_get_second_pass_params(cpi);
-
- if (Pass2Encode(cpi, size, dest, frame_flags) != AOM_CODEC_OK)
- return AOM_CODEC_ERROR;
-
- if (cpi->b_calculate_psnr) generate_psnr_packet(cpi);
-
-#if CONFIG_INTERNAL_STATS
- compute_internal_stats(cpi, (int)(*size));
-#endif // CONFIG_INTERNAL_STATS
-
- // Clear down mmx registers
- aom_clear_system_state();
-
- cm->show_existing_frame = 0;
- return 0;
- }
-
- // Should we encode an arf frame.
- arf_src_index = get_arf_src_index(cpi);
- if (arf_src_index) {
- for (i = 0; i <= arf_src_index; ++i) {
- struct lookahead_entry *e = av1_lookahead_peek(cpi->lookahead, i);
- // Avoid creating an alt-ref if there's a forced keyframe pending.
- if (e == NULL) {
- break;
- } else if (e->flags == AOM_EFLAG_FORCE_KF) {
- arf_src_index = 0;
- flush = 1;
- break;
- }
- }
- }
-
- if (arf_src_index) {
- assert(arf_src_index <= rc->frames_to_key);
-
- if ((source = av1_lookahead_peek(cpi->lookahead, arf_src_index)) != NULL) {
- cm->showable_frame = 1;
- cpi->alt_ref_source = source;
- // When arf_src_index == rc->frames_to_key, it indicates a fwd_kf
- if (arf_src_index == rc->frames_to_key) {
- // Skip temporal filtering and mark as intra_only if we have a fwd_kf
- const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
- int which_arf = gf_group->arf_update_idx[gf_group->index];
- cpi->is_arf_filter_off[which_arf] = 1;
- cpi->no_show_kf = 1;
- } else {
- if (oxcf->arnr_max_frames > 0) {
- // Produce the filtered ARF frame.
- av1_temporal_filter(cpi, arf_src_index);
- aom_extend_frame_borders(&cpi->alt_ref_buffer, num_planes);
- force_src_buffer = &cpi->alt_ref_buffer;
- }
- }
- cm->show_frame = 0;
- cm->intra_only = 0;
-
- if (oxcf->pass < 2) {
- // In second pass, the buffer updates configure will be set
- // in the function av1_rc_get_second_pass_params
- av1_configure_buffer_updates_firstpass(cpi, ARF_UPDATE);
- }
- }
- rc->source_alt_ref_pending = 0;
- }
-
- // Should we encode an arf2 frame.
- arf_src_index = get_arf2_src_index(cpi);
- if (arf_src_index) {
- for (i = 0; i <= arf_src_index; ++i) {
- struct lookahead_entry *e = av1_lookahead_peek(cpi->lookahead, i);
- // Avoid creating an alt-ref if there's a forced keyframe pending.
- if (e == NULL) {
- break;
- } else if (e->flags == AOM_EFLAG_FORCE_KF) {
- arf_src_index = 0;
- flush = 1;
- break;
- }
- }
- }
-
- if (arf_src_index) {
- assert(arf_src_index <= rc->frames_to_key);
-
- if ((source = av1_lookahead_peek(cpi->lookahead, arf_src_index)) != NULL) {
- cm->showable_frame = 1;
- cpi->alt_ref_source = source;
-
- if (oxcf->arnr_max_frames > 0) {
- // Produce the filtered ARF frame.
- av1_temporal_filter(cpi, arf_src_index);
- aom_extend_frame_borders(&cpi->alt_ref_buffer, num_planes);
- force_src_buffer = &cpi->alt_ref_buffer;
- }
-
- cm->show_frame = 0;
- cm->intra_only = 0;
-
- if (oxcf->pass < 2) {
- // In second pass, the buffer updates configure will be set
- // in the function av1_rc_get_second_pass_params
- av1_configure_buffer_updates_firstpass(cpi, INTNL_ARF_UPDATE);
- }
- }
- rc->source_alt_ref_pending = 0;
- }
-
- rc->is_bwd_ref_frame = 0;
- brf_src_index = get_brf_src_index(cpi);
- if (brf_src_index) {
- assert(brf_src_index <= rc->frames_to_key);
- if ((source = av1_lookahead_peek(cpi->lookahead, brf_src_index)) != NULL) {
- cm->showable_frame = 1;
- cm->show_frame = 0;
- cm->intra_only = 0;
-
- if (oxcf->pass < 2) {
- // In second pass, the buffer updates configure will be set
- // in the function av1_rc_get_second_pass_params
- av1_configure_buffer_updates_firstpass(cpi, BIPRED_UPDATE);
- }
- }
- }
-
- if (!source) {
- // Get last frame source.
- if (cm->current_video_frame > 0) {
- if ((last_source = av1_lookahead_peek(cpi->lookahead, -1)) == NULL)
- return -1;
- }
- if (cm->current_video_frame > 0) assert(last_source != NULL);
- // Read in the source frame.
- source = av1_lookahead_pop(cpi->lookahead, flush);
-
- if (source != NULL) {
- cm->show_frame = 1;
- cm->intra_only = 0;
-
- // Check to see if the frame should be encoded as an arf overlay.
- check_src_altref(cpi, source);
- }
- }
- if (source) {
- cpi->unscaled_source = cpi->source =
- force_src_buffer ? force_src_buffer : &source->img;
- cpi->unscaled_last_source = last_source != NULL ? &last_source->img : NULL;
-
- *time_stamp = source->ts_start;
- *time_end = source->ts_end;
- av1_apply_encoding_flags(cpi, source->flags);
- *frame_flags = (source->flags & AOM_EFLAG_FORCE_KF) ? FRAMEFLAGS_KEY : 0;
-
- } else {
- *size = 0;
- if (flush && oxcf->pass == 1 && !cpi->twopass.first_pass_done) {
- av1_end_first_pass(cpi); /* get last stats packet */
- cpi->twopass.first_pass_done = 1;
- }
- return -1;
- }
-
- if (source->ts_start < cpi->first_time_stamp_ever) {
- cpi->first_time_stamp_ever = source->ts_start;
- cpi->last_end_time_stamp_seen = source->ts_start;
- }
-
- // Clear down mmx registers
- aom_clear_system_state();
-
- // adjust frame rates based on timestamps given
- if (cm->show_frame) adjust_frame_rate(cpi, source);
-
- // Find a free buffer for the new frame, releasing the reference previously
- // held.
- if (cm->new_fb_idx != INVALID_IDX) {
- --pool->frame_bufs[cm->new_fb_idx].ref_count;
- }
- cm->new_fb_idx = get_free_fb(cm);
-
- if (cm->new_fb_idx == INVALID_IDX) return -1;
-
- // Retain the RF_LEVEL for the current newly coded frame.
- cpi->frame_rf_level[cm->new_fb_idx] =
- cpi->twopass.gf_group.rf_level[cpi->twopass.gf_group.index];
-
- cm->cur_frame = &pool->frame_bufs[cm->new_fb_idx];
- cm->cur_frame->buf.buf_8bit_valid = 0;
-
- if (cpi->film_grain_table) {
- cm->seq_params.film_grain_params_present = aom_film_grain_table_lookup(
- cpi->film_grain_table, *time_stamp, *time_end, 0 /* =erase */,
- &cm->film_grain_params);
- }
- cm->cur_frame->film_grain_params_present =
- cm->seq_params.film_grain_params_present;
-
- // only one operating point supported now
- const int64_t pts64 = ticks_to_timebase_units(timebase, *time_stamp);
- if (pts64 < 0 || pts64 > UINT32_MAX) return AOM_CODEC_ERROR;
- cpi->common.frame_presentation_time = (uint32_t)pts64;
-
- // Start with a 0 size frame.
- *size = 0;
-
- cpi->frame_flags = *frame_flags;
-
- if (oxcf->pass == 2) {
- av1_rc_get_second_pass_params(cpi);
- } else if (oxcf->pass == 1) {
- setup_frame_size(cpi);
- }
-
- if (cpi->oxcf.pass != 0 || frame_is_intra_only(cm) == 1) {
- for (i = 0; i < REF_FRAMES; ++i) cpi->scaled_ref_idx[i] = INVALID_IDX;
- }
-
- cm->using_qmatrix = cpi->oxcf.using_qm;
- cm->min_qmlevel = cpi->oxcf.qm_minlevel;
- cm->max_qmlevel = cpi->oxcf.qm_maxlevel;
-
- if (cm->seq_params.frame_id_numbers_present_flag) {
- if (*time_stamp == 0) {
- cpi->common.current_frame_id = -1;
- }
- }
-
- cpi->cur_poc++;
- if (oxcf->pass != 1 && cpi->common.allow_screen_content_tools &&
- !frame_is_intra_only(cm)) {
- if (cpi->common.seq_params.force_integer_mv == 2) {
- struct lookahead_entry *previous_entry =
- av1_lookahead_peek(cpi->lookahead, cpi->previous_index);
- if (!previous_entry)
- cpi->common.cur_frame_force_integer_mv = 0;
- else
- cpi->common.cur_frame_force_integer_mv = is_integer_mv(
- cpi, cpi->source, &previous_entry->img, cpi->previous_hash_table);
- } else {
- cpi->common.cur_frame_force_integer_mv =
- cpi->common.seq_params.force_integer_mv;
- }
- } else {
- cpi->common.cur_frame_force_integer_mv = 0;
- }
-
- if (oxcf->pass == 1) {
- cpi->td.mb.e_mbd.lossless[0] = is_lossless_requested(oxcf);
- av1_first_pass(cpi, source);
- } else if (oxcf->pass == 2) {
- if (Pass2Encode(cpi, size, dest, frame_flags) != AOM_CODEC_OK)
- return AOM_CODEC_ERROR;
- } else {
- // One pass encode
- if (Pass0Encode(cpi, size, dest, 0, frame_flags) != AOM_CODEC_OK)
- return AOM_CODEC_ERROR;
- }
- if (oxcf->pass != 1 && cpi->common.allow_screen_content_tools) {
- cpi->previous_hash_table = &cm->cur_frame->hash_table;
- {
- int l;
- for (l = -MAX_PRE_FRAMES; l < cpi->lookahead->max_sz; l++) {
- if ((cpi->lookahead->buf + l) == source) {
- cpi->previous_index = l;
- break;
- }
- }
-
- if (l == cpi->lookahead->max_sz) {
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
- "Failed to find last frame original buffer");
- }
- }
- }
-
- if (!cm->large_scale_tile) {
- cm->frame_contexts[cm->new_fb_idx] = *cm->fc;
- }
-
-#define EXT_TILE_DEBUG 0
-#if EXT_TILE_DEBUG
- if (cm->large_scale_tile && oxcf->pass == 2) {
- char fn[20] = "./fc";
- fn[4] = cm->current_video_frame / 100 + '0';
- fn[5] = (cm->current_video_frame % 100) / 10 + '0';
- fn[6] = (cm->current_video_frame % 10) + '0';
- fn[7] = '\0';
- av1_print_frame_contexts(cm->fc, fn);
- }
-#endif // EXT_TILE_DEBUG
-#undef EXT_TILE_DEBUG
-
- cm->showable_frame = !cm->show_frame && cm->showable_frame;
-
- // No frame encoded, or frame was dropped, release scaled references.
- if ((*size == 0) && (frame_is_intra_only(cm) == 0)) {
- release_scaled_references(cpi);
- }
-
- if (*size > 0) {
- cpi->droppable = !frame_is_reference(cpi);
- }
-
- aom_usec_timer_mark(&cmptimer);
- cpi->time_compress_data += aom_usec_timer_elapsed(&cmptimer);
-
- if (cpi->b_calculate_psnr && oxcf->pass != 1 && cm->show_frame)
- generate_psnr_packet(cpi);
-
-#if CONFIG_INTERNAL_STATS
- if (oxcf->pass != 1) {
- compute_internal_stats(cpi, (int)(*size));
- }
-#endif // CONFIG_INTERNAL_STATS
-
- aom_clear_system_state();
-
- return 0;
-}
-
-int av1_get_preview_raw_frame(AV1_COMP *cpi, YV12_BUFFER_CONFIG *dest) {
- AV1_COMMON *cm = &cpi->common;
- if (!cm->show_frame) {
- return -1;
- } else {
- int ret;
- if (cm->frame_to_show) {
- *dest = *cm->frame_to_show;
- dest->y_width = cm->width;
- dest->y_height = cm->height;
- dest->uv_width = cm->width >> cm->seq_params.subsampling_x;
- dest->uv_height = cm->height >> cm->seq_params.subsampling_y;
- ret = 0;
- } else {
- ret = -1;
- }
- aom_clear_system_state();
- return ret;
- }
-}
-
-int av1_get_last_show_frame(AV1_COMP *cpi, YV12_BUFFER_CONFIG *frame) {
- if (cpi->last_show_frame_buf_idx == INVALID_IDX) return -1;
-
- *frame =
- cpi->common.buffer_pool->frame_bufs[cpi->last_show_frame_buf_idx].buf;
- return 0;
-}
-
-static int equal_dimensions_and_border(const YV12_BUFFER_CONFIG *a,
- const YV12_BUFFER_CONFIG *b) {
- return a->y_height == b->y_height && a->y_width == b->y_width &&
- a->uv_height == b->uv_height && a->uv_width == b->uv_width &&
- a->y_stride == b->y_stride && a->uv_stride == b->uv_stride &&
- a->border == b->border &&
- (a->flags & YV12_FLAG_HIGHBITDEPTH) ==
- (b->flags & YV12_FLAG_HIGHBITDEPTH);
-}
-
-aom_codec_err_t av1_copy_new_frame_enc(AV1_COMMON *cm,
- YV12_BUFFER_CONFIG *new_frame,
- YV12_BUFFER_CONFIG *sd) {
- const int num_planes = av1_num_planes(cm);
- if (!equal_dimensions_and_border(new_frame, sd))
- aom_internal_error(&cm->error, AOM_CODEC_ERROR,
- "Incorrect buffer dimensions");
- else
- aom_yv12_copy_frame(new_frame, sd, num_planes);
-
- return cm->error.error_code;
-}
-
-int av1_set_internal_size(AV1_COMP *cpi, AOM_SCALING horiz_mode,
- AOM_SCALING vert_mode) {
- int hr = 0, hs = 0, vr = 0, vs = 0;
-
- if (horiz_mode > ONETWO || vert_mode > ONETWO) return -1;
-
- Scale2Ratio(horiz_mode, &hr, &hs);
- Scale2Ratio(vert_mode, &vr, &vs);
-
- // always go to the next whole number
- cpi->resize_pending_width = (hs - 1 + cpi->oxcf.width * hr) / hs;
- cpi->resize_pending_height = (vs - 1 + cpi->oxcf.height * vr) / vs;
-
- return 0;
-}
-
-int av1_get_quantizer(AV1_COMP *cpi) { return cpi->common.base_qindex; }
-
-int av1_convert_sect5obus_to_annexb(uint8_t *buffer, size_t *frame_size) {
- size_t output_size = 0;
- size_t total_bytes_read = 0;
- size_t remaining_size = *frame_size;
- uint8_t *buff_ptr = buffer;
-
- // go through each OBUs
- while (total_bytes_read < *frame_size) {
- uint8_t saved_obu_header[2];
- uint64_t obu_payload_size;
- size_t length_of_payload_size;
- size_t length_of_obu_size;
- uint32_t obu_header_size = (buff_ptr[0] >> 2) & 0x1 ? 2 : 1;
- size_t obu_bytes_read = obu_header_size; // bytes read for current obu
-
- // save the obu header (1 or 2 bytes)
- memmove(saved_obu_header, buff_ptr, obu_header_size);
- // clear the obu_has_size_field
- saved_obu_header[0] = saved_obu_header[0] & (~0x2);
-
- // get the payload_size and length of payload_size
- if (aom_uleb_decode(buff_ptr + obu_header_size, remaining_size,
- &obu_payload_size, &length_of_payload_size) != 0) {
- return AOM_CODEC_ERROR;
- }
- obu_bytes_read += length_of_payload_size;
-
- // calculate the length of size of the obu header plus payload
- length_of_obu_size =
- aom_uleb_size_in_bytes((uint64_t)(obu_header_size + obu_payload_size));
-
- // move the rest of data to new location
- memmove(buff_ptr + length_of_obu_size + obu_header_size,
- buff_ptr + obu_bytes_read, remaining_size - obu_bytes_read);
- obu_bytes_read += (size_t)obu_payload_size;
-
- // write the new obu size
- const uint64_t obu_size = obu_header_size + obu_payload_size;
- size_t coded_obu_size;
- if (aom_uleb_encode(obu_size, sizeof(obu_size), buff_ptr,
- &coded_obu_size) != 0) {
- return AOM_CODEC_ERROR;
- }
-
- // write the saved (modified) obu_header following obu size
- memmove(buff_ptr + length_of_obu_size, saved_obu_header, obu_header_size);
-
- total_bytes_read += obu_bytes_read;
- remaining_size -= obu_bytes_read;
- buff_ptr += length_of_obu_size + obu_size;
- output_size += length_of_obu_size + (size_t)obu_size;
- }
-
- *frame_size = output_size;
- return AOM_CODEC_OK;
-}
-
-void av1_apply_encoding_flags(AV1_COMP *cpi, aom_enc_frame_flags_t flags) {
- // TODO(yunqingwang): For what references to use, external encoding flags
- // should be consistent with internal reference frame selection. Need to
- // ensure that there is not conflict between the two. In AV1 encoder, the
- // priority rank for 7 reference frames are: LAST, ALTREF, LAST2, LAST3,
- // GOLDEN, BWDREF, ALTREF2. If only one reference frame is used, it must be
- // LAST.
- cpi->ext_ref_frame_flags = AOM_REFFRAME_ALL;
- if (flags &
- (AOM_EFLAG_NO_REF_LAST | AOM_EFLAG_NO_REF_LAST2 | AOM_EFLAG_NO_REF_LAST3 |
- AOM_EFLAG_NO_REF_GF | AOM_EFLAG_NO_REF_ARF | AOM_EFLAG_NO_REF_BWD |
- AOM_EFLAG_NO_REF_ARF2)) {
- if (flags & AOM_EFLAG_NO_REF_LAST) {
- cpi->ext_ref_frame_flags = 0;
- } else {
- int ref = AOM_REFFRAME_ALL;
-
- if (flags & AOM_EFLAG_NO_REF_LAST2) ref ^= AOM_LAST2_FLAG;
- if (flags & AOM_EFLAG_NO_REF_LAST3) ref ^= AOM_LAST3_FLAG;
-
- if (flags & AOM_EFLAG_NO_REF_GF) ref ^= AOM_GOLD_FLAG;
-
- if (flags & AOM_EFLAG_NO_REF_ARF) {
- ref ^= AOM_ALT_FLAG;
- ref ^= AOM_BWD_FLAG;
- ref ^= AOM_ALT2_FLAG;
- } else {
- if (flags & AOM_EFLAG_NO_REF_BWD) ref ^= AOM_BWD_FLAG;
- if (flags & AOM_EFLAG_NO_REF_ARF2) ref ^= AOM_ALT2_FLAG;
- }
-
- av1_use_as_reference(cpi, ref);
- }
- }
-
- if (flags &
- (AOM_EFLAG_NO_UPD_LAST | AOM_EFLAG_NO_UPD_GF | AOM_EFLAG_NO_UPD_ARF)) {
- int upd = AOM_REFFRAME_ALL;
-
- // Refreshing LAST/LAST2/LAST3 is handled by 1 common flag.
- if (flags & AOM_EFLAG_NO_UPD_LAST) upd ^= AOM_LAST_FLAG;
-
- if (flags & AOM_EFLAG_NO_UPD_GF) upd ^= AOM_GOLD_FLAG;
-
- if (flags & AOM_EFLAG_NO_UPD_ARF) {
- upd ^= AOM_ALT_FLAG;
- upd ^= AOM_BWD_FLAG;
- upd ^= AOM_ALT2_FLAG;
- }
-
- av1_update_reference(cpi, upd);
- }
-
- cpi->ext_use_ref_frame_mvs = cpi->oxcf.allow_ref_frame_mvs &
- ((flags & AOM_EFLAG_NO_REF_FRAME_MVS) == 0);
- cpi->ext_use_error_resilient = cpi->oxcf.error_resilient_mode |
- ((flags & AOM_EFLAG_ERROR_RESILIENT) != 0);
- cpi->ext_use_s_frame =
- cpi->oxcf.s_frame_mode | ((flags & AOM_EFLAG_SET_S_FRAME) != 0);
- cpi->ext_use_primary_ref_none = (flags & AOM_EFLAG_SET_PRIMARY_REF_NONE) != 0;
-
- if (flags & AOM_EFLAG_NO_UPD_ENTROPY) {
- av1_update_entropy(cpi, 0);
- }
-}
-
-int64_t timebase_units_to_ticks(const aom_rational_t *timebase, int64_t n) {
- return n * TICKS_PER_SEC * timebase->num / timebase->den;
-}
-
-int64_t ticks_to_timebase_units(const aom_rational_t *timebase, int64_t n) {
- const int64_t round = TICKS_PER_SEC * timebase->num / 2 - 1;
- return (n * timebase->den + round) / timebase->num / TICKS_PER_SEC;
-}
-
-aom_fixed_buf_t *av1_get_global_headers(AV1_COMP *cpi) {
- if (!cpi) return NULL;
-
- uint8_t header_buf[512] = { 0 };
- const uint32_t sequence_header_size =
- write_sequence_header_obu(cpi, &header_buf[0]);
- assert(sequence_header_size <= sizeof(header_buf));
- if (sequence_header_size == 0) return NULL;
-
- const size_t obu_header_size = 1;
- const size_t size_field_size = aom_uleb_size_in_bytes(sequence_header_size);
- const size_t payload_offset = obu_header_size + size_field_size;
-
- if (payload_offset + sequence_header_size > sizeof(header_buf)) return NULL;
- memmove(&header_buf[payload_offset], &header_buf[0], sequence_header_size);
-
- if (write_obu_header(OBU_SEQUENCE_HEADER, 0, &header_buf[0]) !=
- obu_header_size) {
- return NULL;
- }
-
- size_t coded_size_field_size = 0;
- if (aom_uleb_encode(sequence_header_size, size_field_size,
- &header_buf[obu_header_size],
- &coded_size_field_size) != 0) {
- return NULL;
- }
- assert(coded_size_field_size == size_field_size);
-
- aom_fixed_buf_t *global_headers =
- (aom_fixed_buf_t *)malloc(sizeof(*global_headers));
- if (!global_headers) return NULL;
-
- const size_t global_header_buf_size =
- obu_header_size + size_field_size + sequence_header_size;
-
- global_headers->buf = malloc(global_header_buf_size);
- if (!global_headers->buf) {
- free(global_headers);
- return NULL;
- }
-
- memcpy(global_headers->buf, &header_buf[0], global_header_buf_size);
- global_headers->sz = global_header_buf_size;
- return global_headers;
-}
diff --git a/third_party/aom/av1/encoder/encoder.h b/third_party/aom/av1/encoder/encoder.h
deleted file mode 100644
index ee7fc4637..000000000
--- a/third_party/aom/av1/encoder/encoder.h
+++ /dev/null
@@ -1,985 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_ENCODER_H_
-#define AOM_AV1_ENCODER_ENCODER_H_
-
-#include <stdio.h>
-
-#include "config/aom_config.h"
-
-#include "aom/aomcx.h"
-
-#include "av1/common/alloccommon.h"
-#include "av1/common/entropymode.h"
-#include "av1/common/thread_common.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/common/resize.h"
-#include "av1/common/timing.h"
-#include "av1/encoder/aq_cyclicrefresh.h"
-#include "av1/encoder/av1_quantize.h"
-#include "av1/encoder/context_tree.h"
-#include "av1/encoder/encodemb.h"
-#include "av1/encoder/firstpass.h"
-#include "av1/encoder/lookahead.h"
-#include "av1/encoder/mbgraph.h"
-#include "av1/encoder/mcomp.h"
-#include "av1/encoder/ratectrl.h"
-#include "av1/encoder/rd.h"
-#include "av1/encoder/speed_features.h"
-#include "av1/encoder/tokenize.h"
-
-#if CONFIG_INTERNAL_STATS
-#include "aom_dsp/ssim.h"
-#endif
-#include "aom_dsp/variance.h"
-#if CONFIG_DENOISE
-#include "aom_dsp/noise_model.h"
-#endif
-#include "aom/internal/aom_codec_internal.h"
-#include "aom_util/aom_thread.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct {
- int nmv_vec_cost[MV_JOINTS];
- int nmv_costs[2][MV_VALS];
- int nmv_costs_hp[2][MV_VALS];
-
- FRAME_CONTEXT fc;
-} CODING_CONTEXT;
-
-typedef enum {
- // regular inter frame
- REGULAR_FRAME = 0,
- // alternate reference frame
- ARF_FRAME = 1,
- // overlay frame
- OVERLAY_FRAME = 2,
- // golden frame
- GLD_FRAME = 3,
- // backward reference frame
- BRF_FRAME = 4,
- // extra alternate reference frame
- EXT_ARF_FRAME = 5,
- FRAME_CONTEXT_INDEXES
-} FRAME_CONTEXT_INDEX;
-
-typedef enum {
- NORMAL = 0,
- FOURFIVE = 1,
- THREEFIVE = 2,
- ONETWO = 3
-} AOM_SCALING;
-
-typedef enum {
- // Good Quality Fast Encoding. The encoder balances quality with the amount of
- // time it takes to encode the output. Speed setting controls how fast.
- GOOD
-} MODE;
-
-typedef enum {
- FRAMEFLAGS_KEY = 1 << 0,
- FRAMEFLAGS_GOLDEN = 1 << 1,
- FRAMEFLAGS_BWDREF = 1 << 2,
- // TODO(zoeliu): To determine whether a frame flag is needed for ALTREF2_FRAME
- FRAMEFLAGS_ALTREF = 1 << 3,
-} FRAMETYPE_FLAGS;
-
-typedef enum {
- NO_AQ = 0,
- VARIANCE_AQ = 1,
- COMPLEXITY_AQ = 2,
- CYCLIC_REFRESH_AQ = 3,
- AQ_MODE_COUNT // This should always be the last member of the enum
-} AQ_MODE;
-typedef enum {
- NO_DELTA_Q = 0,
- DELTA_Q_ONLY = 1,
- DELTA_Q_LF = 2,
- DELTAQ_MODE_COUNT // This should always be the last member of the enum
-} DELTAQ_MODE;
-
-typedef enum {
- RESIZE_NONE = 0, // No frame resizing allowed.
- RESIZE_FIXED = 1, // All frames are coded at the specified scale.
- RESIZE_RANDOM = 2, // All frames are coded at a random scale.
- RESIZE_MODES
-} RESIZE_MODE;
-
-typedef enum {
- SUPERRES_NONE = 0, // No frame superres allowed
- SUPERRES_FIXED = 1, // All frames are coded at the specified scale,
- // and super-resolved.
- SUPERRES_RANDOM = 2, // All frames are coded at a random scale,
- // and super-resolved.
- SUPERRES_QTHRESH = 3, // Superres scale for a frame is determined based on
- // q_index
- SUPERRES_MODES
-} SUPERRES_MODE;
-
-typedef struct AV1EncoderConfig {
- BITSTREAM_PROFILE profile;
- aom_bit_depth_t bit_depth; // Codec bit-depth.
- int width; // width of data passed to the compressor
- int height; // height of data passed to the compressor
- int forced_max_frame_width; // forced maximum width of frame (if != 0)
- int forced_max_frame_height; // forced maximum height of frame (if != 0)
- unsigned int input_bit_depth; // Input bit depth.
- double init_framerate; // set to passed in framerate
- int64_t target_bandwidth; // bandwidth to be used in bits per second
-
- int noise_sensitivity; // pre processing blur: recommendation 0
- int sharpness; // sharpening output: recommendation 0:
- int speed;
- // maximum allowed bitrate for any intra frame in % of bitrate target.
- unsigned int rc_max_intra_bitrate_pct;
- // maximum allowed bitrate for any inter frame in % of bitrate target.
- unsigned int rc_max_inter_bitrate_pct;
- // percent of rate boost for golden frame in CBR mode.
- unsigned int gf_cbr_boost_pct;
-
- MODE mode;
- int pass;
-
- // Key Framing Operations
- int auto_key; // autodetect cut scenes and set the keyframes
- int key_freq; // maximum distance to key frame.
- int sframe_dist;
- int sframe_mode;
- int sframe_enabled;
- int lag_in_frames; // how many frames lag before we start encoding
- int fwd_kf_enabled;
-
- // ----------------------------------------------------------------
- // DATARATE CONTROL OPTIONS
-
- // vbr, cbr, constrained quality or constant quality
- enum aom_rc_mode rc_mode;
-
- // buffer targeting aggressiveness
- int under_shoot_pct;
- int over_shoot_pct;
-
- // buffering parameters
- int64_t starting_buffer_level_ms;
- int64_t optimal_buffer_level_ms;
- int64_t maximum_buffer_size_ms;
-
- // Frame drop threshold.
- int drop_frames_water_mark;
-
- // controlling quality
- int fixed_q;
- int worst_allowed_q;
- int best_allowed_q;
- int cq_level;
- AQ_MODE aq_mode; // Adaptive Quantization mode
- DELTAQ_MODE deltaq_mode;
- int enable_cdef;
- int enable_restoration;
- int disable_trellis_quant;
- int using_qm;
- int qm_y;
- int qm_u;
- int qm_v;
- int qm_minlevel;
- int qm_maxlevel;
-#if CONFIG_DIST_8X8
- int using_dist_8x8;
-#endif
- unsigned int num_tile_groups;
- unsigned int mtu;
-
- // Internal frame size scaling.
- RESIZE_MODE resize_mode;
- uint8_t resize_scale_denominator;
- uint8_t resize_kf_scale_denominator;
-
- // Frame Super-Resolution size scaling.
- SUPERRES_MODE superres_mode;
- uint8_t superres_scale_denominator;
- uint8_t superres_kf_scale_denominator;
- int superres_qthresh;
- int superres_kf_qthresh;
-
- // Enable feature to reduce the frame quantization every x frames.
- int frame_periodic_boost;
-
- // two pass datarate control
- int two_pass_vbrbias; // two pass datarate control tweaks
- int two_pass_vbrmin_section;
- int two_pass_vbrmax_section;
- // END DATARATE CONTROL OPTIONS
- // ----------------------------------------------------------------
-
- int enable_auto_arf;
- int enable_auto_brf; // (b)ackward (r)ef (f)rame
-
- /* Bitfield defining the error resiliency features to enable.
- * Can provide decodable frames after losses in previous
- * frames and decodable partitions after losses in the same frame.
- */
- unsigned int error_resilient_mode;
-
- unsigned int s_frame_mode;
-
- /* Bitfield defining the parallel decoding mode where the
- * decoding in successive frames may be conducted in parallel
- * just by decoding the frame headers.
- */
- unsigned int frame_parallel_decoding_mode;
-
- unsigned int limit;
-
- int arnr_max_frames;
- int arnr_strength;
-
- int min_gf_interval;
- int max_gf_interval;
-
- int row_mt;
- int tile_columns;
- int tile_rows;
- int tile_width_count;
- int tile_height_count;
- int tile_widths[MAX_TILE_COLS];
- int tile_heights[MAX_TILE_ROWS];
-
- int max_threads;
-
- aom_fixed_buf_t two_pass_stats_in;
- struct aom_codec_pkt_list *output_pkt_list;
-
-#if CONFIG_FP_MB_STATS
- aom_fixed_buf_t firstpass_mb_stats_in;
-#endif
-
- aom_tune_metric tuning;
- aom_tune_content content;
- int use_highbitdepth;
- aom_color_primaries_t color_primaries;
- aom_transfer_characteristics_t transfer_characteristics;
- aom_matrix_coefficients_t matrix_coefficients;
- aom_chroma_sample_position_t chroma_sample_position;
- int color_range;
- int render_width;
- int render_height;
- aom_timing_info_type_t timing_info_type;
- int timing_info_present;
- aom_timing_info_t timing_info;
- int decoder_model_info_present_flag;
- int display_model_info_present_flag;
- int buffer_removal_time_present;
- aom_dec_model_info_t buffer_model;
- aom_dec_model_op_parameters_t op_params[MAX_NUM_OPERATING_POINTS + 1];
- aom_op_timing_info_t op_frame_timing[MAX_NUM_OPERATING_POINTS + 1];
- int film_grain_test_vector;
- const char *film_grain_table_filename;
-
- uint8_t cdf_update_mode;
- aom_superblock_size_t superblock_size;
- unsigned int large_scale_tile;
- unsigned int single_tile_decoding;
- int monochrome;
- unsigned int full_still_picture_hdr;
- int enable_dual_filter;
- unsigned int motion_vector_unit_test;
- const cfg_options_t *cfg;
- int enable_order_hint;
- int enable_jnt_comp;
- int enable_ref_frame_mvs;
- unsigned int allow_ref_frame_mvs;
- int enable_warped_motion;
- int allow_warped_motion;
- int enable_superres;
- unsigned int save_as_annexb;
-
-#if CONFIG_DENOISE
- float noise_level;
- int noise_block_size;
-#endif
-
- unsigned int chroma_subsampling_x;
- unsigned int chroma_subsampling_y;
-} AV1EncoderConfig;
-
-static INLINE int is_lossless_requested(const AV1EncoderConfig *cfg) {
- return cfg->best_allowed_q == 0 && cfg->worst_allowed_q == 0;
-}
-
-typedef struct FRAME_COUNTS {
-// Note: This structure should only contain 'unsigned int' fields, or
-// aggregates built solely from 'unsigned int' fields/elements
-#if CONFIG_ENTROPY_STATS
- unsigned int kf_y_mode[KF_MODE_CONTEXTS][KF_MODE_CONTEXTS][INTRA_MODES];
- unsigned int angle_delta[DIRECTIONAL_MODES][2 * MAX_ANGLE_DELTA + 1];
- unsigned int y_mode[BLOCK_SIZE_GROUPS][INTRA_MODES];
- unsigned int uv_mode[CFL_ALLOWED_TYPES][INTRA_MODES][UV_INTRA_MODES];
- unsigned int cfl_sign[CFL_JOINT_SIGNS];
- unsigned int cfl_alpha[CFL_ALPHA_CONTEXTS][CFL_ALPHABET_SIZE];
- unsigned int palette_y_mode[PALATTE_BSIZE_CTXS][PALETTE_Y_MODE_CONTEXTS][2];
- unsigned int palette_uv_mode[PALETTE_UV_MODE_CONTEXTS][2];
- unsigned int palette_y_size[PALATTE_BSIZE_CTXS][PALETTE_SIZES];
- unsigned int palette_uv_size[PALATTE_BSIZE_CTXS][PALETTE_SIZES];
- unsigned int palette_y_color_index[PALETTE_SIZES]
- [PALETTE_COLOR_INDEX_CONTEXTS]
- [PALETTE_COLORS];
- unsigned int palette_uv_color_index[PALETTE_SIZES]
- [PALETTE_COLOR_INDEX_CONTEXTS]
- [PALETTE_COLORS];
- unsigned int partition[PARTITION_CONTEXTS][EXT_PARTITION_TYPES];
- unsigned int txb_skip[TOKEN_CDF_Q_CTXS][TX_SIZES][TXB_SKIP_CONTEXTS][2];
- unsigned int eob_extra[TOKEN_CDF_Q_CTXS][TX_SIZES][PLANE_TYPES]
- [EOB_COEF_CONTEXTS][2];
- unsigned int dc_sign[PLANE_TYPES][DC_SIGN_CONTEXTS][2];
- unsigned int coeff_lps[TX_SIZES][PLANE_TYPES][BR_CDF_SIZE - 1][LEVEL_CONTEXTS]
- [2];
- unsigned int eob_flag[TX_SIZES][PLANE_TYPES][EOB_COEF_CONTEXTS][2];
- unsigned int eob_multi16[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][5];
- unsigned int eob_multi32[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][6];
- unsigned int eob_multi64[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][7];
- unsigned int eob_multi128[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][8];
- unsigned int eob_multi256[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][9];
- unsigned int eob_multi512[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][10];
- unsigned int eob_multi1024[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][11];
- unsigned int coeff_lps_multi[TOKEN_CDF_Q_CTXS][TX_SIZES][PLANE_TYPES]
- [LEVEL_CONTEXTS][BR_CDF_SIZE];
- unsigned int coeff_base_multi[TOKEN_CDF_Q_CTXS][TX_SIZES][PLANE_TYPES]
- [SIG_COEF_CONTEXTS][NUM_BASE_LEVELS + 2];
- unsigned int coeff_base_eob_multi[TOKEN_CDF_Q_CTXS][TX_SIZES][PLANE_TYPES]
- [SIG_COEF_CONTEXTS_EOB][NUM_BASE_LEVELS + 1];
- unsigned int newmv_mode[NEWMV_MODE_CONTEXTS][2];
- unsigned int zeromv_mode[GLOBALMV_MODE_CONTEXTS][2];
- unsigned int refmv_mode[REFMV_MODE_CONTEXTS][2];
- unsigned int drl_mode[DRL_MODE_CONTEXTS][2];
- unsigned int inter_compound_mode[INTER_MODE_CONTEXTS][INTER_COMPOUND_MODES];
- unsigned int wedge_idx[BLOCK_SIZES_ALL][16];
- unsigned int interintra[BLOCK_SIZE_GROUPS][2];
- unsigned int interintra_mode[BLOCK_SIZE_GROUPS][INTERINTRA_MODES];
- unsigned int wedge_interintra[BLOCK_SIZES_ALL][2];
- unsigned int compound_type[BLOCK_SIZES_ALL][COMPOUND_TYPES - 1];
- unsigned int motion_mode[BLOCK_SIZES_ALL][MOTION_MODES];
- unsigned int obmc[BLOCK_SIZES_ALL][2];
- unsigned int intra_inter[INTRA_INTER_CONTEXTS][2];
- unsigned int comp_inter[COMP_INTER_CONTEXTS][2];
- unsigned int comp_ref_type[COMP_REF_TYPE_CONTEXTS][2];
- unsigned int uni_comp_ref[UNI_COMP_REF_CONTEXTS][UNIDIR_COMP_REFS - 1][2];
- unsigned int single_ref[REF_CONTEXTS][SINGLE_REFS - 1][2];
- unsigned int comp_ref[REF_CONTEXTS][FWD_REFS - 1][2];
- unsigned int comp_bwdref[REF_CONTEXTS][BWD_REFS - 1][2];
- unsigned int intrabc[2];
-
- unsigned int txfm_partition[TXFM_PARTITION_CONTEXTS][2];
- unsigned int intra_tx_size[MAX_TX_CATS][TX_SIZE_CONTEXTS][MAX_TX_DEPTH + 1];
- unsigned int skip_mode[SKIP_MODE_CONTEXTS][2];
- unsigned int skip[SKIP_CONTEXTS][2];
- unsigned int compound_index[COMP_INDEX_CONTEXTS][2];
- unsigned int comp_group_idx[COMP_GROUP_IDX_CONTEXTS][2];
- unsigned int delta_q[DELTA_Q_PROBS][2];
- unsigned int delta_lf_multi[FRAME_LF_COUNT][DELTA_LF_PROBS][2];
- unsigned int delta_lf[DELTA_LF_PROBS][2];
-
- unsigned int inter_ext_tx[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES];
- unsigned int intra_ext_tx[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES]
- [TX_TYPES];
- unsigned int filter_intra_mode[FILTER_INTRA_MODES];
- unsigned int filter_intra[BLOCK_SIZES_ALL][2];
- unsigned int switchable_restore[RESTORE_SWITCHABLE_TYPES];
- unsigned int wiener_restore[2];
- unsigned int sgrproj_restore[2];
-#endif // CONFIG_ENTROPY_STATS
-
- unsigned int switchable_interp[SWITCHABLE_FILTER_CONTEXTS]
- [SWITCHABLE_FILTERS];
-} FRAME_COUNTS;
-
-#if CONFIG_COLLECT_INTER_MODE_RD_STATS
-#define INTER_MODE_RD_DATA_OVERALL_SIZE 6400
-
-typedef struct {
- int ready;
- double a;
- double b;
- double dist_mean;
- double ld_mean;
- double sse_mean;
- double sse_sse_mean;
- double sse_ld_mean;
- int num;
- double dist_sum;
- double ld_sum;
- double sse_sum;
- double sse_sse_sum;
- double sse_ld_sum;
-} InterModeRdModel;
-
-typedef struct {
- int idx;
- int64_t rd;
-} RdIdxPair;
-// TODO(angiebird): This is an estimated size. We still need to figure what is
-// the maximum number of modes.
-#define MAX_INTER_MODES 1024
-typedef struct inter_modes_info {
- int num;
- MB_MODE_INFO mbmi_arr[MAX_INTER_MODES];
- int mode_rate_arr[MAX_INTER_MODES];
- int64_t sse_arr[MAX_INTER_MODES];
- int64_t est_rd_arr[MAX_INTER_MODES];
- RdIdxPair rd_idx_pair_arr[MAX_INTER_MODES];
-} InterModesInfo;
-#endif
-
-// TODO(jingning) All spatially adaptive variables should go to TileDataEnc.
-typedef struct TileDataEnc {
- TileInfo tile_info;
- int thresh_freq_fact[BLOCK_SIZES_ALL][MAX_MODES];
- int mode_map[BLOCK_SIZES_ALL][MAX_MODES];
- int m_search_count;
- int ex_search_count;
- CFL_CTX cfl;
- DECLARE_ALIGNED(16, FRAME_CONTEXT, tctx);
- uint8_t allow_update_cdf;
-#if CONFIG_COLLECT_INTER_MODE_RD_STATS
- InterModeRdModel inter_mode_rd_models[BLOCK_SIZES_ALL];
- InterModesInfo inter_modes_info;
-#endif
-} TileDataEnc;
-
-typedef struct {
- TOKENEXTRA *start;
- TOKENEXTRA *stop;
- unsigned int count;
-} TOKENLIST;
-
-typedef struct RD_COUNTS {
- int64_t comp_pred_diff[REFERENCE_MODES];
- // Stores number of 4x4 blocks using global motion per reference frame.
- int global_motion_used[REF_FRAMES];
- int compound_ref_used_flag;
- int skip_mode_used_flag;
-} RD_COUNTS;
-
-typedef struct ThreadData {
- MACROBLOCK mb;
- RD_COUNTS rd_counts;
- FRAME_COUNTS *counts;
- PC_TREE *pc_tree;
- PC_TREE *pc_root[MAX_MIB_SIZE_LOG2 - MIN_MIB_SIZE_LOG2 + 1];
- uint32_t *hash_value_buffer[2][2];
- int32_t *wsrc_buf;
- int32_t *mask_buf;
- uint8_t *above_pred_buf;
- uint8_t *left_pred_buf;
- PALETTE_BUFFER *palette_buffer;
- CONV_BUF_TYPE *tmp_conv_dst;
- uint8_t *tmp_obmc_bufs[2];
- int intrabc_used_this_tile;
-} ThreadData;
-
-struct EncWorkerData;
-
-typedef struct ActiveMap {
- int enabled;
- int update;
- unsigned char *map;
-} ActiveMap;
-
-#if CONFIG_INTERNAL_STATS
-// types of stats
-typedef enum {
- STAT_Y,
- STAT_U,
- STAT_V,
- STAT_ALL,
- NUM_STAT_TYPES // This should always be the last member of the enum
-} StatType;
-
-typedef struct IMAGE_STAT {
- double stat[NUM_STAT_TYPES];
- double worst;
-} ImageStat;
-#endif // CONFIG_INTERNAL_STATS
-
-typedef struct {
- int ref_count;
- YV12_BUFFER_CONFIG buf;
-} EncRefCntBuffer;
-
-typedef struct TileBufferEnc {
- uint8_t *data;
- size_t size;
-} TileBufferEnc;
-
-typedef struct AV1_COMP {
- QUANTS quants;
- ThreadData td;
- FRAME_COUNTS counts;
- MB_MODE_INFO_EXT *mbmi_ext_base;
- CB_COEFF_BUFFER *coeff_buffer_base;
- Dequants dequants;
- AV1_COMMON common;
- AV1EncoderConfig oxcf;
- struct lookahead_ctx *lookahead;
- struct lookahead_entry *alt_ref_source;
- int no_show_kf;
-
- int optimize_speed_feature;
- int optimize_seg_arr[MAX_SEGMENTS];
-
- YV12_BUFFER_CONFIG *source;
- YV12_BUFFER_CONFIG *last_source; // NULL for first frame and alt_ref frames
- YV12_BUFFER_CONFIG *unscaled_source;
- YV12_BUFFER_CONFIG scaled_source;
- YV12_BUFFER_CONFIG *unscaled_last_source;
- YV12_BUFFER_CONFIG scaled_last_source;
-
- // For a still frame, this flag is set to 1 to skip partition search.
- int partition_search_skippable_frame;
- double csm_rate_array[32];
- double m_rate_array[32];
- int rate_size;
- int rate_index;
- hash_table *previous_hash_table;
- int previous_index;
- int cur_poc; // DebugInfo
-
- unsigned int row_mt;
- int scaled_ref_idx[REF_FRAMES];
- int ref_fb_idx[REF_FRAMES];
- int refresh_fb_idx; // ref frame buffer index to refresh
-
- int last_show_frame_buf_idx; // last show frame buffer index
-
- int refresh_last_frame;
- int refresh_golden_frame;
- int refresh_bwd_ref_frame;
- int refresh_alt2_ref_frame;
- int refresh_alt_ref_frame;
-#if USE_SYMM_MULTI_LAYER
- int new_bwdref_update_rule;
-#endif
-
- int ext_refresh_frame_flags_pending;
- int ext_refresh_last_frame;
- int ext_refresh_golden_frame;
- int ext_refresh_bwd_ref_frame;
- int ext_refresh_alt2_ref_frame;
- int ext_refresh_alt_ref_frame;
-
- int ext_refresh_frame_context_pending;
- int ext_refresh_frame_context;
- int ext_use_ref_frame_mvs;
- int ext_use_error_resilient;
- int ext_use_s_frame;
- int ext_use_primary_ref_none;
-
- YV12_BUFFER_CONFIG last_frame_uf;
- YV12_BUFFER_CONFIG trial_frame_rst;
-
- // Ambient reconstruction err target for force key frames
- int64_t ambient_err;
-
- RD_OPT rd;
-
- CODING_CONTEXT coding_context;
-
- int gmtype_cost[TRANS_TYPES];
- int gmparams_cost[REF_FRAMES];
-
- int nmv_costs[2][MV_VALS];
- int nmv_costs_hp[2][MV_VALS];
-
- int64_t last_time_stamp_seen;
- int64_t last_end_time_stamp_seen;
- int64_t first_time_stamp_ever;
-
- RATE_CONTROL rc;
- double framerate;
-
- // NOTE(zoeliu): Any inter frame allows maximum of REF_FRAMES inter
- // references; Plus the currently coded frame itself, it is needed to allocate
- // sufficient space to the size of the maximum possible number of frames.
- int interp_filter_selected[REF_FRAMES + 1][SWITCHABLE];
-
- struct aom_codec_pkt_list *output_pkt_list;
-
- MBGRAPH_FRAME_STATS mbgraph_stats[MAX_LAG_BUFFERS];
- int mbgraph_n_frames; // number of frames filled in the above
- int static_mb_pct; // % forced skip mbs by segmentation
- int ref_frame_flags;
- int ext_ref_frame_flags;
- RATE_FACTOR_LEVEL frame_rf_level[FRAME_BUFFERS];
-
- SPEED_FEATURES sf;
-
- unsigned int max_mv_magnitude;
- int mv_step_param;
-
- int allow_comp_inter_inter;
- int all_one_sided_refs;
-
- uint8_t *segmentation_map;
-
- CYCLIC_REFRESH *cyclic_refresh;
- ActiveMap active_map;
-
- fractional_mv_step_fp *find_fractional_mv_step;
- av1_diamond_search_fn_t diamond_search_sad;
- aom_variance_fn_ptr_t fn_ptr[BLOCK_SIZES_ALL];
- uint64_t time_receive_data;
- uint64_t time_compress_data;
- uint64_t time_pick_lpf;
- uint64_t time_encode_sb_row;
-
-#if CONFIG_FP_MB_STATS
- int use_fp_mb_stats;
-#endif
-
- TWO_PASS twopass;
-
- YV12_BUFFER_CONFIG alt_ref_buffer;
-
-#if CONFIG_INTERNAL_STATS
- unsigned int mode_chosen_counts[MAX_MODES];
-
- int count;
- uint64_t total_sq_error;
- uint64_t total_samples;
- ImageStat psnr;
-
- double total_blockiness;
- double worst_blockiness;
-
- int bytes;
- double summed_quality;
- double summed_weights;
- unsigned int tot_recode_hits;
- double worst_ssim;
-
- ImageStat fastssim;
- ImageStat psnrhvs;
-
- int b_calculate_blockiness;
- int b_calculate_consistency;
-
- double total_inconsistency;
- double worst_consistency;
- Ssimv *ssim_vars;
- Metrics metrics;
-#endif
- int b_calculate_psnr;
-
- int droppable;
-
- int initial_width;
- int initial_height;
- int initial_mbs; // Number of MBs in the full-size frame; to be used to
- // normalize the firstpass stats. This will differ from the
- // number of MBs in the current frame when the frame is
- // scaled.
-
- // When resize is triggered through external control, the desired width/height
- // are stored here until use in the next frame coded. They are effective only
- // for
- // one frame and are reset after use.
- int resize_pending_width;
- int resize_pending_height;
-
- int frame_flags;
-
- search_site_config ss_cfg;
-
- TileDataEnc *tile_data;
- int allocated_tiles; // Keep track of memory allocated for tiles.
-
- TOKENEXTRA *tile_tok[MAX_TILE_ROWS][MAX_TILE_COLS];
- unsigned int tok_count[MAX_TILE_ROWS][MAX_TILE_COLS];
- TOKENLIST *tplist[MAX_TILE_ROWS][MAX_TILE_COLS];
-
- TileBufferEnc tile_buffers[MAX_TILE_ROWS][MAX_TILE_COLS];
-
- int resize_state;
- int resize_avg_qp;
- int resize_buffer_underflow;
- int resize_count;
-
- // Sequence parameters have been transmitted already and locked
- // or not. Once locked av1_change_config cannot change the seq
- // parameters.
- int seq_params_locked;
-
- // VARIANCE_AQ segment map refresh
- int vaq_refresh;
-
- // Multi-threading
- int num_workers;
- AVxWorker *workers;
- struct EncWorkerData *tile_thr_data;
- int refresh_frame_mask;
- int existing_fb_idx_to_show;
- int is_arf_filter_off[MAX_EXT_ARFS + 1];
- int num_extra_arfs;
- int arf_pos_in_gf[MAX_EXT_ARFS + 1];
- int arf_pos_for_ovrly[MAX_EXT_ARFS + 1];
- int global_motion_search_done;
- tran_low_t *tcoeff_buf[MAX_MB_PLANE];
- int extra_arf_allowed;
- // A flag to indicate if intrabc is ever used in current frame.
- int intrabc_used;
- int dv_cost[2][MV_VALS];
- // TODO(huisu@google.com): we can update dv_joint_cost per SB.
- int dv_joint_cost[MV_JOINTS];
- int has_lossless_segment;
-
- // For frame refs short signaling:
- // A mapping of each reference frame from its encoder side value to the
- // decoder side value obtained following the short signaling procedure.
- int ref_conv[REF_FRAMES];
-
- AV1LfSync lf_row_sync;
- AV1LrSync lr_row_sync;
- AV1LrStruct lr_ctxt;
-
- aom_film_grain_table_t *film_grain_table;
-#if CONFIG_DENOISE
- struct aom_denoise_and_model_t *denoise_and_model;
-#endif
- // Stores the default value of skip flag depending on chroma format
- // Set as 1 for monochrome and 3 for other color formats
- int default_interp_skip_flags;
- int preserve_arf_as_gld;
-} AV1_COMP;
-
-// Must not be called more than once.
-void av1_initialize_enc(void);
-
-struct AV1_COMP *av1_create_compressor(AV1EncoderConfig *oxcf,
- BufferPool *const pool);
-void av1_remove_compressor(AV1_COMP *cpi);
-
-void av1_change_config(AV1_COMP *cpi, const AV1EncoderConfig *oxcf);
-
-// receive a frames worth of data. caller can assume that a copy of this
-// frame is made and not just a copy of the pointer..
-int av1_receive_raw_frame(AV1_COMP *cpi, aom_enc_frame_flags_t frame_flags,
- YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
- int64_t end_time_stamp);
-
-int av1_get_compressed_data(AV1_COMP *cpi, unsigned int *frame_flags,
- size_t *size, uint8_t *dest, int64_t *time_stamp,
- int64_t *time_end, int flush,
- const aom_rational_t *timebase);
-
-int av1_get_preview_raw_frame(AV1_COMP *cpi, YV12_BUFFER_CONFIG *dest);
-
-int av1_get_last_show_frame(AV1_COMP *cpi, YV12_BUFFER_CONFIG *frame);
-
-aom_codec_err_t av1_copy_new_frame_enc(AV1_COMMON *cm,
- YV12_BUFFER_CONFIG *new_frame,
- YV12_BUFFER_CONFIG *sd);
-
-int av1_use_as_reference(AV1_COMP *cpi, int ref_frame_flags);
-
-void av1_update_reference(AV1_COMP *cpi, int ref_frame_flags);
-
-int av1_copy_reference_enc(AV1_COMP *cpi, int idx, YV12_BUFFER_CONFIG *sd);
-
-int av1_set_reference_enc(AV1_COMP *cpi, int idx, YV12_BUFFER_CONFIG *sd);
-
-int av1_update_entropy(AV1_COMP *cpi, int update);
-
-int av1_set_active_map(AV1_COMP *cpi, unsigned char *map, int rows, int cols);
-
-int av1_get_active_map(AV1_COMP *cpi, unsigned char *map, int rows, int cols);
-
-int av1_set_internal_size(AV1_COMP *cpi, AOM_SCALING horiz_mode,
- AOM_SCALING vert_mode);
-
-int av1_get_quantizer(struct AV1_COMP *cpi);
-
-int av1_convert_sect5obus_to_annexb(uint8_t *buffer, size_t *input_size);
-
-int64_t timebase_units_to_ticks(const aom_rational_t *timebase, int64_t n);
-int64_t ticks_to_timebase_units(const aom_rational_t *timebase, int64_t n);
-
-static INLINE int frame_is_kf_gf_arf(const AV1_COMP *cpi) {
- return frame_is_intra_only(&cpi->common) || cpi->refresh_alt_ref_frame ||
- (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref);
-}
-
-static INLINE int get_ref_frame_map_idx(const AV1_COMP *cpi,
- MV_REFERENCE_FRAME ref_frame) {
- return (ref_frame >= 1) ? cpi->ref_fb_idx[ref_frame - 1] : INVALID_IDX;
-}
-
-static INLINE int get_ref_frame_buf_idx(const AV1_COMP *cpi,
- MV_REFERENCE_FRAME ref_frame) {
- const AV1_COMMON *const cm = &cpi->common;
- const int map_idx = get_ref_frame_map_idx(cpi, ref_frame);
- return (map_idx != INVALID_IDX) ? cm->ref_frame_map[map_idx] : INVALID_IDX;
-}
-
-// TODO(huisu@google.com, youzhou@microsoft.com): enable hash-me for HBD.
-static INLINE int av1_use_hash_me(const AV1_COMMON *const cm) {
- return cm->allow_screen_content_tools;
-}
-
-static INLINE hash_table *av1_get_ref_frame_hash_map(
- const AV1_COMP *cpi, MV_REFERENCE_FRAME ref_frame) {
- const AV1_COMMON *const cm = &cpi->common;
- const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
- return buf_idx != INVALID_IDX
- ? &cm->buffer_pool->frame_bufs[buf_idx].hash_table
- : NULL;
-}
-
-static INLINE YV12_BUFFER_CONFIG *get_ref_frame_buffer(
- const AV1_COMP *cpi, MV_REFERENCE_FRAME ref_frame) {
- const AV1_COMMON *const cm = &cpi->common;
- const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
- return buf_idx != INVALID_IDX ? &cm->buffer_pool->frame_bufs[buf_idx].buf
- : NULL;
-}
-
-static INLINE int enc_is_ref_frame_buf(AV1_COMP *cpi, RefCntBuffer *frame_buf) {
- MV_REFERENCE_FRAME ref_frame;
- AV1_COMMON *const cm = &cpi->common;
- for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
- const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
- if (buf_idx == INVALID_IDX) continue;
- if (frame_buf == &cm->buffer_pool->frame_bufs[buf_idx]) break;
- }
- return (ref_frame <= ALTREF_FRAME);
-}
-
-// Token buffer is only used for palette tokens.
-static INLINE unsigned int get_token_alloc(int mb_rows, int mb_cols,
- int sb_size_log2,
- const int num_planes) {
- // Calculate the maximum number of max superblocks in the image.
- const int shift = sb_size_log2 - 4;
- const int sb_size = 1 << sb_size_log2;
- const int sb_size_square = sb_size * sb_size;
- const int sb_rows = ALIGN_POWER_OF_TWO(mb_rows, shift) >> shift;
- const int sb_cols = ALIGN_POWER_OF_TWO(mb_cols, shift) >> shift;
-
- // One palette token for each pixel. There can be palettes on two planes.
- const int sb_palette_toks = AOMMIN(2, num_planes) * sb_size_square;
-
- return sb_rows * sb_cols * sb_palette_toks;
-}
-
-// Get the allocated token size for a tile. It does the same calculation as in
-// the frame token allocation.
-static INLINE unsigned int allocated_tokens(TileInfo tile, int sb_size_log2,
- int num_planes) {
- int tile_mb_rows = (tile.mi_row_end - tile.mi_row_start + 2) >> 2;
- int tile_mb_cols = (tile.mi_col_end - tile.mi_col_start + 2) >> 2;
-
- return get_token_alloc(tile_mb_rows, tile_mb_cols, sb_size_log2, num_planes);
-}
-
-static INLINE void get_start_tok(AV1_COMP *cpi, int tile_row, int tile_col,
- int mi_row, TOKENEXTRA **tok, int sb_size_log2,
- int num_planes) {
- AV1_COMMON *const cm = &cpi->common;
- const int tile_cols = cm->tile_cols;
- TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
- const TileInfo *const tile_info = &this_tile->tile_info;
-
- const int tile_mb_cols =
- (tile_info->mi_col_end - tile_info->mi_col_start + 2) >> 2;
- const int tile_mb_row = (mi_row - tile_info->mi_row_start + 2) >> 2;
-
- *tok = cpi->tile_tok[tile_row][tile_col] +
- get_token_alloc(tile_mb_row, tile_mb_cols, sb_size_log2, num_planes);
-}
-
-void av1_apply_encoding_flags(AV1_COMP *cpi, aom_enc_frame_flags_t flags);
-
-#define ALT_MIN_LAG 3
-static INLINE int is_altref_enabled(const AV1_COMP *const cpi) {
- return cpi->oxcf.lag_in_frames >= ALT_MIN_LAG && cpi->oxcf.enable_auto_arf;
-}
-
-// TODO(zoeliu): To set up cpi->oxcf.enable_auto_brf
-
-static INLINE void set_ref_ptrs(const AV1_COMMON *cm, MACROBLOCKD *xd,
- MV_REFERENCE_FRAME ref0,
- MV_REFERENCE_FRAME ref1) {
- xd->block_refs[0] =
- &cm->frame_refs[ref0 >= LAST_FRAME ? ref0 - LAST_FRAME : 0];
- xd->block_refs[1] =
- &cm->frame_refs[ref1 >= LAST_FRAME ? ref1 - LAST_FRAME : 0];
-}
-
-static INLINE int get_chessboard_index(int frame_index) {
- return frame_index & 0x1;
-}
-
-static INLINE int *cond_cost_list(const struct AV1_COMP *cpi, int *cost_list) {
- return cpi->sf.mv.subpel_search_method != SUBPEL_TREE ? cost_list : NULL;
-}
-
-void av1_new_framerate(AV1_COMP *cpi, double framerate);
-
-#define LAYER_IDS_TO_IDX(sl, tl, num_tl) ((sl) * (num_tl) + (tl))
-
-// Update up-sampled reference frame index.
-static INLINE void uref_cnt_fb(EncRefCntBuffer *ubufs, int *uidx,
- int new_uidx) {
- const int ref_index = *uidx;
-
- if (ref_index >= 0 && ubufs[ref_index].ref_count > 0)
- ubufs[ref_index].ref_count--;
-
- *uidx = new_uidx;
- ubufs[new_uidx].ref_count++;
-}
-
-// Returns 1 if a frame is scaled and 0 otherwise.
-static INLINE int av1_resize_scaled(const AV1_COMMON *cm) {
- return !(cm->superres_upscaled_width == cm->render_width &&
- cm->superres_upscaled_height == cm->render_height);
-}
-
-static INLINE int av1_frame_scaled(const AV1_COMMON *cm) {
- return !av1_superres_scaled(cm) && av1_resize_scaled(cm);
-}
-
-// Don't allow a show_existing_frame to coincide with an error resilient
-// frame. An exception can be made for a forward keyframe since it has no
-// previous dependencies.
-static INLINE int encode_show_existing_frame(const AV1_COMMON *cm) {
- return cm->show_existing_frame &&
- (!cm->error_resilient_mode || cm->frame_type == KEY_FRAME);
-}
-
-// Returns a Sequence Header OBU stored in an aom_fixed_buf_t, or NULL upon
-// failure. When a non-NULL aom_fixed_buf_t pointer is returned by this
-// function, the memory must be freed by the caller. Both the buf member of the
-// aom_fixed_buf_t, and the aom_fixed_buf_t pointer itself must be freed. Memory
-// returned must be freed via call to free().
-//
-// Note: The OBU returned is in Low Overhead Bitstream Format. Specifically,
-// the obu_has_size_field bit is set, and the buffer contains the obu_size
-// field.
-aom_fixed_buf_t *av1_get_global_headers(AV1_COMP *cpi);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_ENCODER_ENCODER_H_
diff --git a/third_party/aom/av1/encoder/encodetxb.c b/third_party/aom/av1/encoder/encodetxb.c
deleted file mode 100644
index 5a31d93d7..000000000
--- a/third_party/aom/av1/encoder/encodetxb.c
+++ /dev/null
@@ -1,2062 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "av1/encoder/encodetxb.h"
-
-#include "aom_ports/mem.h"
-#include "av1/common/blockd.h"
-#include "av1/common/idct.h"
-#include "av1/common/pred_common.h"
-#include "av1/common/scan.h"
-#include "av1/encoder/bitstream.h"
-#include "av1/encoder/cost.h"
-#include "av1/encoder/encodeframe.h"
-#include "av1/encoder/hash.h"
-#include "av1/encoder/rdopt.h"
-#include "av1/encoder/tokenize.h"
-
-static int hbt_needs_init = 1;
-static CRC32C crc_calculator;
-static const int HBT_EOB = 16; // also the length in opt_qcoeff
-static const int HBT_TABLE_SIZE = 65536; // 16 bit: holds 65536 'arrays'
-static const int HBT_ARRAY_LENGTH = 256; // 8 bit: 256 entries
-// If removed in hbt_create_hashes or increased beyond int8_t, widen deltas type
-static const int HBT_KICKOUT = 3;
-
-typedef struct OptTxbQcoeff {
- // Use larger type if larger/no kickout value is used in hbt_create_hashes
- int8_t deltas[16];
- uint32_t hbt_qc_hash;
- uint32_t hbt_ctx_hash;
- int init;
- int rate_cost;
-} OptTxbQcoeff;
-
-OptTxbQcoeff *hbt_hash_table;
-
-typedef struct LevelDownStats {
- int update;
- tran_low_t low_qc;
- tran_low_t low_dqc;
- int64_t dist0;
- int rate;
- int rate_low;
- int64_t dist;
- int64_t dist_low;
- int64_t rd;
- int64_t rd_low;
- int64_t nz_rd;
- int64_t rd_diff;
- int cost_diff;
- int64_t dist_diff;
- int new_eob;
-} LevelDownStats;
-
-void av1_alloc_txb_buf(AV1_COMP *cpi) {
- AV1_COMMON *cm = &cpi->common;
- int size = ((cm->mi_rows >> cm->seq_params.mib_size_log2) + 1) *
- ((cm->mi_cols >> cm->seq_params.mib_size_log2) + 1);
-
- av1_free_txb_buf(cpi);
- // TODO(jingning): This should be further reduced.
- CHECK_MEM_ERROR(cm, cpi->coeff_buffer_base,
- aom_memalign(32, sizeof(*cpi->coeff_buffer_base) * size));
-}
-
-void av1_free_txb_buf(AV1_COMP *cpi) { aom_free(cpi->coeff_buffer_base); }
-
-void av1_set_coeff_buffer(const AV1_COMP *const cpi, MACROBLOCK *const x,
- int mi_row, int mi_col) {
- const AV1_COMMON *const cm = &cpi->common;
- const int num_planes = av1_num_planes(cm);
- int mib_size_log2 = cm->seq_params.mib_size_log2;
- int stride = (cm->mi_cols >> mib_size_log2) + 1;
- int offset = (mi_row >> mib_size_log2) * stride + (mi_col >> mib_size_log2);
- CB_COEFF_BUFFER *coeff_buf = &cpi->coeff_buffer_base[offset];
- const int txb_offset = x->cb_offset / (TX_SIZE_W_MIN * TX_SIZE_H_MIN);
- assert(x->cb_offset < (1 << num_pels_log2_lookup[cm->seq_params.sb_size]));
- for (int plane = 0; plane < num_planes; ++plane) {
- x->mbmi_ext->tcoeff[plane] = coeff_buf->tcoeff[plane] + x->cb_offset;
- x->mbmi_ext->eobs[plane] = coeff_buf->eobs[plane] + txb_offset;
- x->mbmi_ext->txb_skip_ctx[plane] =
- coeff_buf->txb_skip_ctx[plane] + txb_offset;
- x->mbmi_ext->dc_sign_ctx[plane] =
- coeff_buf->dc_sign_ctx[plane] + txb_offset;
- }
-}
-
-static void write_golomb(aom_writer *w, int level) {
- int x = level + 1;
- int i = x;
- int length = 0;
-
- while (i) {
- i >>= 1;
- ++length;
- }
- assert(length > 0);
-
- for (i = 0; i < length - 1; ++i) aom_write_bit(w, 0);
-
- for (i = length - 1; i >= 0; --i) aom_write_bit(w, (x >> i) & 0x01);
-}
-
-static INLINE tran_low_t get_lower_coeff(tran_low_t qc) {
- if (qc == 0) {
- return 0;
- }
- return qc > 0 ? qc - 1 : qc + 1;
-}
-
-static INLINE tran_low_t qcoeff_to_dqcoeff(tran_low_t qc, int coeff_idx,
- int dqv, int shift,
- const qm_val_t *iqmatrix) {
- int sign = qc < 0 ? -1 : 1;
- if (iqmatrix != NULL)
- dqv =
- ((iqmatrix[coeff_idx] * dqv) + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
- return sign * ((abs(qc) * dqv) >> shift);
-}
-
-static INLINE int64_t get_coeff_dist(tran_low_t tcoeff, tran_low_t dqcoeff,
- int shift) {
- const int64_t diff = (tcoeff - dqcoeff) * (1 << shift);
- const int64_t error = diff * diff;
- return error;
-}
-
-static const int8_t eob_to_pos_small[33] = {
- 0, 1, 2, // 0-2
- 3, 3, // 3-4
- 4, 4, 4, 4, // 5-8
- 5, 5, 5, 5, 5, 5, 5, 5, // 9-16
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 // 17-32
-};
-
-static const int8_t eob_to_pos_large[17] = {
- 6, // place holder
- 7, // 33-64
- 8, 8, // 65-128
- 9, 9, 9, 9, // 129-256
- 10, 10, 10, 10, 10, 10, 10, 10, // 257-512
- 11 // 513-
-};
-
-static INLINE int get_eob_pos_token(const int eob, int *const extra) {
- int t;
-
- if (eob < 33) {
- t = eob_to_pos_small[eob];
- } else {
- const int e = AOMMIN((eob - 1) >> 5, 16);
- t = eob_to_pos_large[e];
- }
-
- *extra = eob - k_eob_group_start[t];
-
- return t;
-}
-
-#if CONFIG_ENTROPY_STATS
-void av1_update_eob_context(int cdf_idx, int eob, TX_SIZE tx_size,
- TX_CLASS tx_class, PLANE_TYPE plane,
- FRAME_CONTEXT *ec_ctx, FRAME_COUNTS *counts,
- uint8_t allow_update_cdf) {
-#else
-void av1_update_eob_context(int eob, TX_SIZE tx_size, TX_CLASS tx_class,
- PLANE_TYPE plane, FRAME_CONTEXT *ec_ctx,
- uint8_t allow_update_cdf) {
-#endif
- int eob_extra;
- const int eob_pt = get_eob_pos_token(eob, &eob_extra);
- TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size);
-
- const int eob_multi_size = txsize_log2_minus4[tx_size];
- const int eob_multi_ctx = (tx_class == TX_CLASS_2D) ? 0 : 1;
-
- switch (eob_multi_size) {
- case 0:
-#if CONFIG_ENTROPY_STATS
- ++counts->eob_multi16[cdf_idx][plane][eob_multi_ctx][eob_pt - 1];
-#endif
- if (allow_update_cdf)
- update_cdf(ec_ctx->eob_flag_cdf16[plane][eob_multi_ctx], eob_pt - 1, 5);
- break;
- case 1:
-#if CONFIG_ENTROPY_STATS
- ++counts->eob_multi32[cdf_idx][plane][eob_multi_ctx][eob_pt - 1];
-#endif
- if (allow_update_cdf)
- update_cdf(ec_ctx->eob_flag_cdf32[plane][eob_multi_ctx], eob_pt - 1, 6);
- break;
- case 2:
-#if CONFIG_ENTROPY_STATS
- ++counts->eob_multi64[cdf_idx][plane][eob_multi_ctx][eob_pt - 1];
-#endif
- if (allow_update_cdf)
- update_cdf(ec_ctx->eob_flag_cdf64[plane][eob_multi_ctx], eob_pt - 1, 7);
- break;
- case 3:
-#if CONFIG_ENTROPY_STATS
- ++counts->eob_multi128[cdf_idx][plane][eob_multi_ctx][eob_pt - 1];
-#endif
- if (allow_update_cdf) {
- update_cdf(ec_ctx->eob_flag_cdf128[plane][eob_multi_ctx], eob_pt - 1,
- 8);
- }
- break;
- case 4:
-#if CONFIG_ENTROPY_STATS
- ++counts->eob_multi256[cdf_idx][plane][eob_multi_ctx][eob_pt - 1];
-#endif
- if (allow_update_cdf) {
- update_cdf(ec_ctx->eob_flag_cdf256[plane][eob_multi_ctx], eob_pt - 1,
- 9);
- }
- break;
- case 5:
-#if CONFIG_ENTROPY_STATS
- ++counts->eob_multi512[cdf_idx][plane][eob_multi_ctx][eob_pt - 1];
-#endif
- if (allow_update_cdf) {
- update_cdf(ec_ctx->eob_flag_cdf512[plane][eob_multi_ctx], eob_pt - 1,
- 10);
- }
- break;
- case 6:
- default:
-#if CONFIG_ENTROPY_STATS
- ++counts->eob_multi1024[cdf_idx][plane][eob_multi_ctx][eob_pt - 1];
-#endif
- if (allow_update_cdf) {
- update_cdf(ec_ctx->eob_flag_cdf1024[plane][eob_multi_ctx], eob_pt - 1,
- 11);
- }
- break;
- }
-
- if (k_eob_offset_bits[eob_pt] > 0) {
- int eob_ctx = eob_pt - 3;
- int eob_shift = k_eob_offset_bits[eob_pt] - 1;
- int bit = (eob_extra & (1 << eob_shift)) ? 1 : 0;
-#if CONFIG_ENTROPY_STATS
- counts->eob_extra[cdf_idx][txs_ctx][plane][eob_pt][bit]++;
-#endif // CONFIG_ENTROPY_STATS
- if (allow_update_cdf)
- update_cdf(ec_ctx->eob_extra_cdf[txs_ctx][plane][eob_ctx], bit, 2);
- }
-}
-
-static int get_eob_cost(int eob, const LV_MAP_EOB_COST *txb_eob_costs,
- const LV_MAP_COEFF_COST *txb_costs, TX_CLASS tx_class) {
- int eob_extra;
- const int eob_pt = get_eob_pos_token(eob, &eob_extra);
- int eob_cost = 0;
- const int eob_multi_ctx = (tx_class == TX_CLASS_2D) ? 0 : 1;
- eob_cost = txb_eob_costs->eob_cost[eob_multi_ctx][eob_pt - 1];
-
- if (k_eob_offset_bits[eob_pt] > 0) {
- const int eob_ctx = eob_pt - 3;
- const int eob_shift = k_eob_offset_bits[eob_pt] - 1;
- const int bit = (eob_extra & (1 << eob_shift)) ? 1 : 0;
- eob_cost += txb_costs->eob_extra_cost[eob_ctx][bit];
- const int offset_bits = k_eob_offset_bits[eob_pt];
- if (offset_bits > 1) eob_cost += av1_cost_literal(offset_bits - 1);
- }
- return eob_cost;
-}
-
-static INLINE int get_sign_bit_cost(tran_low_t qc, int coeff_idx,
- const int (*dc_sign_cost)[2],
- int dc_sign_ctx) {
- if (coeff_idx == 0) {
- const int sign = (qc < 0) ? 1 : 0;
- return dc_sign_cost[dc_sign_ctx][sign];
- }
- return av1_cost_literal(1);
-}
-
-static INLINE int get_br_cost(tran_low_t abs_qc, int ctx,
- const int *coeff_lps) {
- const tran_low_t min_level = 1 + NUM_BASE_LEVELS;
- const tran_low_t max_level = 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE;
- (void)ctx;
- if (abs_qc >= min_level) {
- if (abs_qc >= max_level) {
- return coeff_lps[COEFF_BASE_RANGE]; // COEFF_BASE_RANGE * cost0;
- } else {
- return coeff_lps[(abs_qc - min_level)]; // * cost0 + cost1;
- }
- }
- return 0;
-}
-
-static INLINE int get_golomb_cost(int abs_qc) {
- if (abs_qc >= 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) {
- const int r = abs_qc - COEFF_BASE_RANGE - NUM_BASE_LEVELS;
- const int length = get_msb(r) + 1;
- return av1_cost_literal(2 * length - 1);
- }
- return 0;
-}
-
-static int get_coeff_cost(const tran_low_t qc, const int scan_idx,
- const int is_eob, const TxbInfo *const txb_info,
- const LV_MAP_COEFF_COST *const txb_costs,
- const int coeff_ctx, const TX_CLASS tx_class) {
- const TXB_CTX *const txb_ctx = txb_info->txb_ctx;
- const int is_nz = (qc != 0);
- const tran_low_t abs_qc = abs(qc);
- int cost = 0;
- const int16_t *const scan = txb_info->scan_order->scan;
- const int pos = scan[scan_idx];
-
- if (is_eob) {
- cost += txb_costs->base_eob_cost[coeff_ctx][AOMMIN(abs_qc, 3) - 1];
- } else {
- cost += txb_costs->base_cost[coeff_ctx][AOMMIN(abs_qc, 3)];
- }
- if (is_nz) {
- cost += get_sign_bit_cost(qc, scan_idx, txb_costs->dc_sign_cost,
- txb_ctx->dc_sign_ctx);
-
- if (abs_qc > NUM_BASE_LEVELS) {
- const int ctx =
- get_br_ctx(txb_info->levels, pos, txb_info->bwl, tx_class);
- cost += get_br_cost(abs_qc, ctx, txb_costs->lps_cost[ctx]);
- cost += get_golomb_cost(abs_qc);
- }
- }
- return cost;
-}
-
-static INLINE int get_nz_map_ctx(const uint8_t *const levels,
- const int coeff_idx, const int bwl,
- const int height, const int scan_idx,
- const int is_eob, const TX_SIZE tx_size,
- const TX_CLASS tx_class) {
- if (is_eob) {
- if (scan_idx == 0) return 0;
- if (scan_idx <= (height << bwl) / 8) return 1;
- if (scan_idx <= (height << bwl) / 4) return 2;
- return 3;
- }
- const int stats =
- get_nz_mag(levels + get_padded_idx(coeff_idx, bwl), bwl, tx_class);
- return get_nz_map_ctx_from_stats(stats, coeff_idx, bwl, tx_size, tx_class);
-}
-
-static void get_dist_cost_stats(LevelDownStats *const stats, const int scan_idx,
- const int is_eob,
- const LV_MAP_COEFF_COST *const txb_costs,
- const TxbInfo *const txb_info,
- const TX_CLASS tx_class) {
- const int16_t *const scan = txb_info->scan_order->scan;
- const int coeff_idx = scan[scan_idx];
- const tran_low_t qc = txb_info->qcoeff[coeff_idx];
- const uint8_t *const levels = txb_info->levels;
- stats->new_eob = -1;
- stats->update = 0;
- stats->rd_low = 0;
- stats->rd = 0;
- stats->nz_rd = 0;
- stats->dist_low = 0;
- stats->rate_low = 0;
- stats->low_qc = 0;
-
- const tran_low_t tqc = txb_info->tcoeff[coeff_idx];
- const int dqv = txb_info->dequant[coeff_idx != 0];
- const int coeff_ctx =
- get_nz_map_ctx(levels, coeff_idx, txb_info->bwl, txb_info->height,
- scan_idx, is_eob, txb_info->tx_size, tx_class);
- const int qc_cost = get_coeff_cost(qc, scan_idx, is_eob, txb_info, txb_costs,
- coeff_ctx, tx_class);
- assert(qc != 0);
- const tran_low_t dqc = qcoeff_to_dqcoeff(qc, coeff_idx, dqv, txb_info->shift,
- txb_info->iqmatrix);
- const int64_t dqc_dist = get_coeff_dist(tqc, dqc, txb_info->shift);
-
- // distortion difference when coefficient is quantized to 0
- const tran_low_t dqc0 =
- qcoeff_to_dqcoeff(0, coeff_idx, dqv, txb_info->shift, txb_info->iqmatrix);
-
- stats->dist0 = get_coeff_dist(tqc, dqc0, txb_info->shift);
- stats->dist = dqc_dist - stats->dist0;
- stats->rate = qc_cost;
-
- stats->rd = RDCOST(txb_info->rdmult, stats->rate, stats->dist);
-
- stats->low_qc = get_lower_coeff(qc);
-
- if (is_eob && stats->low_qc == 0) {
- stats->rd_low = stats->rd; // disable selection of low_qc in this case.
- } else {
- if (stats->low_qc == 0) {
- stats->dist_low = 0;
- } else {
- stats->low_dqc = qcoeff_to_dqcoeff(stats->low_qc, coeff_idx, dqv,
- txb_info->shift, txb_info->iqmatrix);
- const int64_t low_dqc_dist =
- get_coeff_dist(tqc, stats->low_dqc, txb_info->shift);
- stats->dist_low = low_dqc_dist - stats->dist0;
- }
- const int low_qc_cost =
- get_coeff_cost(stats->low_qc, scan_idx, is_eob, txb_info, txb_costs,
- coeff_ctx, tx_class);
- stats->rate_low = low_qc_cost;
- stats->rd_low = RDCOST(txb_info->rdmult, stats->rate_low, stats->dist_low);
- }
-}
-
-static void get_dist_cost_stats_with_eob(
- LevelDownStats *const stats, const int scan_idx,
- const LV_MAP_COEFF_COST *const txb_costs, const TxbInfo *const txb_info,
- const TX_CLASS tx_class) {
- const int is_eob = 0;
- get_dist_cost_stats(stats, scan_idx, is_eob, txb_costs, txb_info, tx_class);
-
- const int16_t *const scan = txb_info->scan_order->scan;
- const int coeff_idx = scan[scan_idx];
- const tran_low_t qc = txb_info->qcoeff[coeff_idx];
- const int coeff_ctx_temp = get_nz_map_ctx(
- txb_info->levels, coeff_idx, txb_info->bwl, txb_info->height, scan_idx, 1,
- txb_info->tx_size, tx_class);
- const int qc_eob_cost = get_coeff_cost(qc, scan_idx, 1, txb_info, txb_costs,
- coeff_ctx_temp, tx_class);
- int64_t rd_eob = RDCOST(txb_info->rdmult, qc_eob_cost, stats->dist);
- if (stats->low_qc != 0) {
- const int low_qc_eob_cost =
- get_coeff_cost(stats->low_qc, scan_idx, 1, txb_info, txb_costs,
- coeff_ctx_temp, tx_class);
- int64_t rd_eob_low =
- RDCOST(txb_info->rdmult, low_qc_eob_cost, stats->dist_low);
- rd_eob = (rd_eob > rd_eob_low) ? rd_eob_low : rd_eob;
- }
-
- stats->nz_rd = AOMMIN(stats->rd_low, stats->rd) - rd_eob;
-}
-
-static INLINE void update_qcoeff(const int coeff_idx, const tran_low_t qc,
- const TxbInfo *const txb_info) {
- txb_info->qcoeff[coeff_idx] = qc;
- txb_info->levels[get_padded_idx(coeff_idx, txb_info->bwl)] =
- (uint8_t)clamp(abs(qc), 0, INT8_MAX);
-}
-
-static INLINE void update_coeff(const int coeff_idx, const tran_low_t qc,
- const TxbInfo *const txb_info) {
- update_qcoeff(coeff_idx, qc, txb_info);
- const int dqv = txb_info->dequant[coeff_idx != 0];
- txb_info->dqcoeff[coeff_idx] = qcoeff_to_dqcoeff(
- qc, coeff_idx, dqv, txb_info->shift, txb_info->iqmatrix);
-}
-
-void av1_txb_init_levels_c(const tran_low_t *const coeff, const int width,
- const int height, uint8_t *const levels) {
- const int stride = width + TX_PAD_HOR;
- uint8_t *ls = levels;
-
- memset(levels - TX_PAD_TOP * stride, 0,
- sizeof(*levels) * TX_PAD_TOP * stride);
- memset(levels + stride * height, 0,
- sizeof(*levels) * (TX_PAD_BOTTOM * stride + TX_PAD_END));
-
- for (int i = 0; i < height; i++) {
- for (int j = 0; j < width; j++) {
- *ls++ = (uint8_t)clamp(abs(coeff[i * width + j]), 0, INT8_MAX);
- }
- for (int j = 0; j < TX_PAD_HOR; j++) {
- *ls++ = 0;
- }
- }
-}
-
-void av1_get_nz_map_contexts_c(const uint8_t *const levels,
- const int16_t *const scan, const uint16_t eob,
- const TX_SIZE tx_size, const TX_CLASS tx_class,
- int8_t *const coeff_contexts) {
- const int bwl = get_txb_bwl(tx_size);
- const int height = get_txb_high(tx_size);
- for (int i = 0; i < eob; ++i) {
- const int pos = scan[i];
- coeff_contexts[pos] = get_nz_map_ctx(levels, pos, bwl, height, i,
- i == eob - 1, tx_size, tx_class);
- }
-}
-
-void av1_write_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *xd,
- aom_writer *w, int blk_row, int blk_col, int plane,
- TX_SIZE tx_size, const tran_low_t *tcoeff,
- uint16_t eob, TXB_CTX *txb_ctx) {
- const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size);
- FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
- aom_write_symbol(w, eob == 0,
- ec_ctx->txb_skip_cdf[txs_ctx][txb_ctx->txb_skip_ctx], 2);
- if (eob == 0) return;
- const PLANE_TYPE plane_type = get_plane_type(plane);
- const TX_TYPE tx_type = av1_get_tx_type(plane_type, xd, blk_row, blk_col,
- tx_size, cm->reduced_tx_set_used);
- const TX_CLASS tx_class = tx_type_to_class[tx_type];
- const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type);
- const int16_t *const scan = scan_order->scan;
- int c;
- const int bwl = get_txb_bwl(tx_size);
- const int width = get_txb_wide(tx_size);
- const int height = get_txb_high(tx_size);
-
- uint8_t levels_buf[TX_PAD_2D];
- uint8_t *const levels = set_levels(levels_buf, width);
- DECLARE_ALIGNED(16, int8_t, coeff_contexts[MAX_TX_SQUARE]);
- av1_txb_init_levels(tcoeff, width, height, levels);
-
- av1_write_tx_type(cm, xd, blk_row, blk_col, plane, tx_size, w);
-
- int eob_extra;
- const int eob_pt = get_eob_pos_token(eob, &eob_extra);
- const int eob_multi_size = txsize_log2_minus4[tx_size];
- const int eob_multi_ctx = (tx_class == TX_CLASS_2D) ? 0 : 1;
- switch (eob_multi_size) {
- case 0:
- aom_write_symbol(w, eob_pt - 1,
- ec_ctx->eob_flag_cdf16[plane_type][eob_multi_ctx], 5);
- break;
- case 1:
- aom_write_symbol(w, eob_pt - 1,
- ec_ctx->eob_flag_cdf32[plane_type][eob_multi_ctx], 6);
- break;
- case 2:
- aom_write_symbol(w, eob_pt - 1,
- ec_ctx->eob_flag_cdf64[plane_type][eob_multi_ctx], 7);
- break;
- case 3:
- aom_write_symbol(w, eob_pt - 1,
- ec_ctx->eob_flag_cdf128[plane_type][eob_multi_ctx], 8);
- break;
- case 4:
- aom_write_symbol(w, eob_pt - 1,
- ec_ctx->eob_flag_cdf256[plane_type][eob_multi_ctx], 9);
- break;
- case 5:
- aom_write_symbol(w, eob_pt - 1,
- ec_ctx->eob_flag_cdf512[plane_type][eob_multi_ctx], 10);
- break;
- default:
- aom_write_symbol(w, eob_pt - 1,
- ec_ctx->eob_flag_cdf1024[plane_type][eob_multi_ctx], 11);
- break;
- }
-
- if (k_eob_offset_bits[eob_pt] > 0) {
- const int eob_ctx = eob_pt - 3;
- int eob_shift = k_eob_offset_bits[eob_pt] - 1;
- int bit = (eob_extra & (1 << eob_shift)) ? 1 : 0;
- aom_write_symbol(w, bit,
- ec_ctx->eob_extra_cdf[txs_ctx][plane_type][eob_ctx], 2);
- for (int i = 1; i < k_eob_offset_bits[eob_pt]; i++) {
- eob_shift = k_eob_offset_bits[eob_pt] - 1 - i;
- bit = (eob_extra & (1 << eob_shift)) ? 1 : 0;
- aom_write_bit(w, bit);
- }
- }
-
- av1_get_nz_map_contexts(levels, scan, eob, tx_size, tx_class, coeff_contexts);
-
- for (c = eob - 1; c >= 0; --c) {
- const int pos = scan[c];
- const int coeff_ctx = coeff_contexts[pos];
- const tran_low_t v = tcoeff[pos];
- const tran_low_t level = abs(v);
-
- if (c == eob - 1) {
- aom_write_symbol(
- w, AOMMIN(level, 3) - 1,
- ec_ctx->coeff_base_eob_cdf[txs_ctx][plane_type][coeff_ctx], 3);
- } else {
- aom_write_symbol(w, AOMMIN(level, 3),
- ec_ctx->coeff_base_cdf[txs_ctx][plane_type][coeff_ctx],
- 4);
- }
- if (level > NUM_BASE_LEVELS) {
- // level is above 1.
- const int base_range = level - 1 - NUM_BASE_LEVELS;
- const int br_ctx = get_br_ctx(levels, pos, bwl, tx_class);
- for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) {
- const int k = AOMMIN(base_range - idx, BR_CDF_SIZE - 1);
- aom_write_symbol(
- w, k,
- ec_ctx->coeff_br_cdf[AOMMIN(txs_ctx, TX_32X32)][plane_type][br_ctx],
- BR_CDF_SIZE);
- if (k < BR_CDF_SIZE - 1) break;
- }
- }
- }
-
- // Loop to code all signs in the transform block,
- // starting with the sign of DC (if applicable)
- for (c = 0; c < eob; ++c) {
- const tran_low_t v = tcoeff[scan[c]];
- const tran_low_t level = abs(v);
- const int sign = (v < 0) ? 1 : 0;
- if (level) {
- if (c == 0) {
- aom_write_symbol(
- w, sign, ec_ctx->dc_sign_cdf[plane_type][txb_ctx->dc_sign_ctx], 2);
- } else {
- aom_write_bit(w, sign);
- }
- if (level > COEFF_BASE_RANGE + NUM_BASE_LEVELS)
- write_golomb(w, level - COEFF_BASE_RANGE - 1 - NUM_BASE_LEVELS);
- }
- }
-}
-
-typedef struct encode_txb_args {
- const AV1_COMMON *cm;
- MACROBLOCK *x;
- aom_writer *w;
-} ENCODE_TXB_ARGS;
-
-static void write_coeffs_txb_wrap(const AV1_COMMON *cm, MACROBLOCK *x,
- aom_writer *w, int plane, int block,
- int blk_row, int blk_col, TX_SIZE tx_size) {
- MACROBLOCKD *xd = &x->e_mbd;
- tran_low_t *tcoeff = BLOCK_OFFSET(x->mbmi_ext->tcoeff[plane], block);
- uint16_t eob = x->mbmi_ext->eobs[plane][block];
- TXB_CTX txb_ctx = { x->mbmi_ext->txb_skip_ctx[plane][block],
- x->mbmi_ext->dc_sign_ctx[plane][block] };
- av1_write_coeffs_txb(cm, xd, w, blk_row, blk_col, plane, tx_size, tcoeff, eob,
- &txb_ctx);
-}
-
-void av1_write_coeffs_mb(const AV1_COMMON *const cm, MACROBLOCK *x, int mi_row,
- int mi_col, aom_writer *w, BLOCK_SIZE bsize) {
- MACROBLOCKD *xd = &x->e_mbd;
- const int num_planes = av1_num_planes(cm);
- int block[MAX_MB_PLANE] = { 0 };
- int row, col;
- assert(bsize == get_plane_block_size(bsize, xd->plane[0].subsampling_x,
- xd->plane[0].subsampling_y));
- const int max_blocks_wide = max_block_wide(xd, bsize, 0);
- const int max_blocks_high = max_block_high(xd, bsize, 0);
- const BLOCK_SIZE max_unit_bsize = BLOCK_64X64;
- int mu_blocks_wide = block_size_wide[max_unit_bsize] >> tx_size_wide_log2[0];
- int mu_blocks_high = block_size_high[max_unit_bsize] >> tx_size_high_log2[0];
- mu_blocks_wide = AOMMIN(max_blocks_wide, mu_blocks_wide);
- mu_blocks_high = AOMMIN(max_blocks_high, mu_blocks_high);
-
- for (row = 0; row < max_blocks_high; row += mu_blocks_high) {
- for (col = 0; col < max_blocks_wide; col += mu_blocks_wide) {
- for (int plane = 0; plane < num_planes; ++plane) {
- const struct macroblockd_plane *const pd = &xd->plane[plane];
- if (!is_chroma_reference(mi_row, mi_col, bsize, pd->subsampling_x,
- pd->subsampling_y))
- continue;
- const TX_SIZE tx_size = av1_get_tx_size(plane, xd);
- const int stepr = tx_size_high_unit[tx_size];
- const int stepc = tx_size_wide_unit[tx_size];
- const int step = stepr * stepc;
-
- const int unit_height = ROUND_POWER_OF_TWO(
- AOMMIN(mu_blocks_high + row, max_blocks_high), pd->subsampling_y);
- const int unit_width = ROUND_POWER_OF_TWO(
- AOMMIN(mu_blocks_wide + col, max_blocks_wide), pd->subsampling_x);
- for (int blk_row = row >> pd->subsampling_y; blk_row < unit_height;
- blk_row += stepr) {
- for (int blk_col = col >> pd->subsampling_x; blk_col < unit_width;
- blk_col += stepc) {
- write_coeffs_txb_wrap(cm, x, w, plane, block[plane], blk_row,
- blk_col, tx_size);
- block[plane] += step;
- }
- }
- }
- }
- }
-}
-
-// TODO(angiebird): use this function whenever it's possible
-static int get_tx_type_cost(const AV1_COMMON *cm, const MACROBLOCK *x,
- const MACROBLOCKD *xd, int plane, TX_SIZE tx_size,
- TX_TYPE tx_type) {
- if (plane > 0) return 0;
-
- const TX_SIZE square_tx_size = txsize_sqr_map[tx_size];
-
- const MB_MODE_INFO *mbmi = xd->mi[0];
- const int is_inter = is_inter_block(mbmi);
- if (get_ext_tx_types(tx_size, is_inter, cm->reduced_tx_set_used) > 1 &&
- !xd->lossless[xd->mi[0]->segment_id]) {
- const int ext_tx_set =
- get_ext_tx_set(tx_size, is_inter, cm->reduced_tx_set_used);
- if (is_inter) {
- if (ext_tx_set > 0)
- return x->inter_tx_type_costs[ext_tx_set][square_tx_size][tx_type];
- } else {
- if (ext_tx_set > 0) {
- PREDICTION_MODE intra_dir;
- if (mbmi->filter_intra_mode_info.use_filter_intra)
- intra_dir = fimode_to_intradir[mbmi->filter_intra_mode_info
- .filter_intra_mode];
- else
- intra_dir = mbmi->mode;
- return x->intra_tx_type_costs[ext_tx_set][square_tx_size][intra_dir]
- [tx_type];
- }
- }
- }
- return 0;
-}
-
-static AOM_FORCE_INLINE int warehouse_efficients_txb(
- const AV1_COMMON *const cm, const MACROBLOCK *x, const int plane,
- const int block, const TX_SIZE tx_size, const TXB_CTX *const txb_ctx,
- const struct macroblock_plane *p, const int eob,
- const PLANE_TYPE plane_type, const LV_MAP_COEFF_COST *const coeff_costs,
- const MACROBLOCKD *const xd, const TX_TYPE tx_type,
- const TX_CLASS tx_class) {
- const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
- const int txb_skip_ctx = txb_ctx->txb_skip_ctx;
- const int bwl = get_txb_bwl(tx_size);
- const int width = get_txb_wide(tx_size);
- const int height = get_txb_high(tx_size);
- const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type);
- const int16_t *const scan = scan_order->scan;
- uint8_t levels_buf[TX_PAD_2D];
- uint8_t *const levels = set_levels(levels_buf, width);
- DECLARE_ALIGNED(16, int8_t, coeff_contexts[MAX_TX_SQUARE]);
- const int eob_multi_size = txsize_log2_minus4[tx_size];
- const LV_MAP_EOB_COST *const eob_costs =
- &x->eob_costs[eob_multi_size][plane_type];
- int cost = coeff_costs->txb_skip_cost[txb_skip_ctx][0];
-
- av1_txb_init_levels(qcoeff, width, height, levels);
-
- cost += get_tx_type_cost(cm, x, xd, plane, tx_size, tx_type);
-
- cost += get_eob_cost(eob, eob_costs, coeff_costs, tx_class);
-
- av1_get_nz_map_contexts(levels, scan, eob, tx_size, tx_class, coeff_contexts);
-
- const int(*lps_cost)[COEFF_BASE_RANGE + 1] = coeff_costs->lps_cost;
- int c = eob - 1;
- {
- const int pos = scan[c];
- const tran_low_t v = qcoeff[pos];
- const int sign = v >> 31;
- const int level = (v ^ sign) - sign;
- const int coeff_ctx = coeff_contexts[pos];
- cost += coeff_costs->base_eob_cost[coeff_ctx][AOMMIN(level, 3) - 1];
-
- if (v) {
- // sign bit cost
- if (level > NUM_BASE_LEVELS) {
- const int ctx = get_br_ctx(levels, pos, bwl, tx_class);
- const int base_range =
- AOMMIN(level - 1 - NUM_BASE_LEVELS, COEFF_BASE_RANGE);
- cost += lps_cost[ctx][base_range];
- cost += get_golomb_cost(level);
- }
- if (c) {
- cost += av1_cost_literal(1);
- } else {
- const int sign01 = (sign ^ sign) - sign;
- const int dc_sign_ctx = txb_ctx->dc_sign_ctx;
- cost += coeff_costs->dc_sign_cost[dc_sign_ctx][sign01];
- return cost;
- }
- }
- }
- const int(*base_cost)[4] = coeff_costs->base_cost;
- for (c = eob - 2; c >= 1; --c) {
- const int pos = scan[c];
- const int coeff_ctx = coeff_contexts[pos];
- const tran_low_t v = qcoeff[pos];
- const int level = abs(v);
- const int cost0 = base_cost[coeff_ctx][AOMMIN(level, 3)];
- if (v) {
- // sign bit cost
- cost += av1_cost_literal(1);
- if (level > NUM_BASE_LEVELS) {
- const int ctx = get_br_ctx(levels, pos, bwl, tx_class);
- const int base_range =
- AOMMIN(level - 1 - NUM_BASE_LEVELS, COEFF_BASE_RANGE);
- cost += lps_cost[ctx][base_range];
- cost += get_golomb_cost(level);
- }
- }
- cost += cost0;
- }
- if (c == 0) {
- const int pos = scan[c];
- const tran_low_t v = qcoeff[pos];
- const int coeff_ctx = coeff_contexts[pos];
- const int sign = v >> 31;
- const int level = (v ^ sign) - sign;
- cost += base_cost[coeff_ctx][AOMMIN(level, 3)];
-
- if (v) {
- // sign bit cost
- const int sign01 = (sign ^ sign) - sign;
- const int dc_sign_ctx = txb_ctx->dc_sign_ctx;
- cost += coeff_costs->dc_sign_cost[dc_sign_ctx][sign01];
- if (level > NUM_BASE_LEVELS) {
- const int ctx = get_br_ctx(levels, pos, bwl, tx_class);
- const int base_range =
- AOMMIN(level - 1 - NUM_BASE_LEVELS, COEFF_BASE_RANGE);
- cost += lps_cost[ctx][base_range];
- cost += get_golomb_cost(level);
- }
- }
- }
- return cost;
-}
-
-int av1_cost_coeffs_txb(const AV1_COMMON *const cm, const MACROBLOCK *x,
- const int plane, const int block, const TX_SIZE tx_size,
- const TX_TYPE tx_type, const TXB_CTX *const txb_ctx) {
- const struct macroblock_plane *p = &x->plane[plane];
- const int eob = p->eobs[block];
- const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size);
- const PLANE_TYPE plane_type = get_plane_type(plane);
- const LV_MAP_COEFF_COST *const coeff_costs =
- &x->coeff_costs[txs_ctx][plane_type];
- if (eob == 0) {
- return coeff_costs->txb_skip_cost[txb_ctx->txb_skip_ctx][1];
- }
-
- const MACROBLOCKD *const xd = &x->e_mbd;
- const TX_CLASS tx_class = tx_type_to_class[tx_type];
-
-#define WAREHOUSE_EFFICIENTS_TXB_CASE(tx_class_literal) \
- case tx_class_literal: \
- return warehouse_efficients_txb(cm, x, plane, block, tx_size, txb_ctx, p, \
- eob, plane_type, coeff_costs, xd, tx_type, \
- tx_class_literal);
- switch (tx_class) {
- WAREHOUSE_EFFICIENTS_TXB_CASE(TX_CLASS_2D);
- WAREHOUSE_EFFICIENTS_TXB_CASE(TX_CLASS_HORIZ);
- WAREHOUSE_EFFICIENTS_TXB_CASE(TX_CLASS_VERT);
-#undef WAREHOUSE_EFFICIENTS_TXB_CASE
- default: assert(false); return 0;
- }
-}
-
-static int optimize_txb(TxbInfo *txb_info, const LV_MAP_COEFF_COST *txb_costs,
- const LV_MAP_EOB_COST *txb_eob_costs, int *rate_cost) {
- int update = 0;
- if (txb_info->eob == 0) return update;
- const int16_t *const scan = txb_info->scan_order->scan;
- // forward optimize the nz_map`
- const int init_eob = txb_info->eob;
- const TX_CLASS tx_class = tx_type_to_class[txb_info->tx_type];
- const int eob_cost =
- get_eob_cost(init_eob, txb_eob_costs, txb_costs, tx_class);
-
- // backward optimize the level-k map
- int accu_rate = eob_cost;
- int64_t accu_dist = 0;
- int64_t prev_eob_rd_cost = INT64_MAX;
- int64_t cur_eob_rd_cost = 0;
-
- {
- const int si = init_eob - 1;
- const int coeff_idx = scan[si];
- LevelDownStats stats;
- get_dist_cost_stats(&stats, si, si == init_eob - 1, txb_costs, txb_info,
- tx_class);
- if ((stats.rd_low < stats.rd) && (stats.low_qc != 0)) {
- update = 1;
- update_coeff(coeff_idx, stats.low_qc, txb_info);
- accu_rate += stats.rate_low;
- accu_dist += stats.dist_low;
- } else {
- accu_rate += stats.rate;
- accu_dist += stats.dist;
- }
- }
-
- int si = init_eob - 2;
- int8_t has_nz_tail = 0;
- // eob is not fixed
- for (; si >= 0 && has_nz_tail < 2; --si) {
- assert(si != init_eob - 1);
- const int coeff_idx = scan[si];
- tran_low_t qc = txb_info->qcoeff[coeff_idx];
-
- if (qc == 0) {
- const int coeff_ctx =
- get_lower_levels_ctx(txb_info->levels, coeff_idx, txb_info->bwl,
- txb_info->tx_size, tx_class);
- accu_rate += txb_costs->base_cost[coeff_ctx][0];
- } else {
- LevelDownStats stats;
- get_dist_cost_stats_with_eob(&stats, si, txb_costs, txb_info, tx_class);
- // check if it is better to make this the last significant coefficient
- int cur_eob_rate =
- get_eob_cost(si + 1, txb_eob_costs, txb_costs, tx_class);
- cur_eob_rd_cost = RDCOST(txb_info->rdmult, cur_eob_rate, 0);
- prev_eob_rd_cost =
- RDCOST(txb_info->rdmult, accu_rate, accu_dist) + stats.nz_rd;
- if (cur_eob_rd_cost <= prev_eob_rd_cost) {
- update = 1;
- for (int j = si + 1; j < txb_info->eob; j++) {
- const int coeff_pos_j = scan[j];
- update_coeff(coeff_pos_j, 0, txb_info);
- }
- txb_info->eob = si + 1;
-
- // rerun cost calculation due to change of eob
- accu_rate = cur_eob_rate;
- accu_dist = 0;
- get_dist_cost_stats(&stats, si, 1, txb_costs, txb_info, tx_class);
- if ((stats.rd_low < stats.rd) && (stats.low_qc != 0)) {
- update = 1;
- update_coeff(coeff_idx, stats.low_qc, txb_info);
- accu_rate += stats.rate_low;
- accu_dist += stats.dist_low;
- } else {
- accu_rate += stats.rate;
- accu_dist += stats.dist;
- }
-
- // reset non zero tail when new eob is found
- has_nz_tail = 0;
- } else {
- int bUpdCoeff = 0;
- if (stats.rd_low < stats.rd) {
- if ((si < txb_info->eob - 1)) {
- bUpdCoeff = 1;
- update = 1;
- }
- } else {
- ++has_nz_tail;
- }
-
- if (bUpdCoeff) {
- update_coeff(coeff_idx, stats.low_qc, txb_info);
- accu_rate += stats.rate_low;
- accu_dist += stats.dist_low;
- } else {
- accu_rate += stats.rate;
- accu_dist += stats.dist;
- }
- }
- }
- } // for (si)
-
- // eob is fixed
- for (; si >= 0; --si) {
- assert(si != init_eob - 1);
- const int coeff_idx = scan[si];
- tran_low_t qc = txb_info->qcoeff[coeff_idx];
-
- if (qc == 0) {
- const int coeff_ctx =
- get_lower_levels_ctx(txb_info->levels, coeff_idx, txb_info->bwl,
- txb_info->tx_size, tx_class);
- accu_rate += txb_costs->base_cost[coeff_ctx][0];
- } else {
- LevelDownStats stats;
- get_dist_cost_stats(&stats, si, 0, txb_costs, txb_info, tx_class);
-
- int bUpdCoeff = 0;
- if (stats.rd_low < stats.rd) {
- if ((si < txb_info->eob - 1)) {
- bUpdCoeff = 1;
- update = 1;
- }
- }
- if (bUpdCoeff) {
- update_coeff(coeff_idx, stats.low_qc, txb_info);
- accu_rate += stats.rate_low;
- accu_dist += stats.dist_low;
- } else {
- accu_rate += stats.rate;
- accu_dist += stats.dist;
- }
- }
- } // for (si)
-
- int non_zero_blk_rate =
- txb_costs->txb_skip_cost[txb_info->txb_ctx->txb_skip_ctx][0];
- prev_eob_rd_cost =
- RDCOST(txb_info->rdmult, accu_rate + non_zero_blk_rate, accu_dist);
-
- int zero_blk_rate =
- txb_costs->txb_skip_cost[txb_info->txb_ctx->txb_skip_ctx][1];
- int64_t zero_blk_rd_cost = RDCOST(txb_info->rdmult, zero_blk_rate, 0);
- if (zero_blk_rd_cost <= prev_eob_rd_cost) {
- update = 1;
- for (int j = 0; j < txb_info->eob; j++) {
- const int coeff_pos_j = scan[j];
- update_coeff(coeff_pos_j, 0, txb_info);
- }
- txb_info->eob = 0;
- }
-
- // record total rate cost
- *rate_cost = zero_blk_rd_cost <= prev_eob_rd_cost
- ? zero_blk_rate
- : accu_rate + non_zero_blk_rate;
-
- if (txb_info->eob > 0) {
- *rate_cost += txb_info->tx_type_cost;
- }
-
- return update;
-}
-
-// These numbers are empirically obtained.
-static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = {
- { 17, 13 },
- { 16, 10 },
-};
-
-void hbt_init() {
- hbt_hash_table =
- aom_malloc(sizeof(OptTxbQcoeff) * HBT_TABLE_SIZE * HBT_ARRAY_LENGTH);
- memset(hbt_hash_table, 0,
- sizeof(OptTxbQcoeff) * HBT_TABLE_SIZE * HBT_ARRAY_LENGTH);
- av1_crc32c_calculator_init(&crc_calculator); // 31 bit: qc & ctx
-
- hbt_needs_init = 0;
-}
-
-void hbt_destroy() { aom_free(hbt_hash_table); }
-
-int hbt_hash_miss(uint32_t hbt_ctx_hash, uint32_t hbt_qc_hash,
- TxbInfo *txb_info, const LV_MAP_COEFF_COST *txb_costs,
- const LV_MAP_EOB_COST *txb_eob_costs,
- const struct macroblock_plane *p, int block, int fast_mode,
- int *rate_cost) {
- (void)fast_mode;
- const int16_t *scan = txb_info->scan_order->scan;
- int prev_eob = txb_info->eob;
- assert(HBT_EOB <= 16); // Lengthen array if allowing longer eob.
- int32_t prev_coeff[16];
- for (int i = 0; i < prev_eob; i++) {
- prev_coeff[i] = txb_info->qcoeff[scan[i]];
- }
- for (int i = prev_eob; i < HBT_EOB; i++) {
- prev_coeff[i] = 0; // For compiler piece of mind.
- }
-
- av1_txb_init_levels(txb_info->qcoeff, txb_info->width, txb_info->height,
- txb_info->levels);
-
- const int update =
- optimize_txb(txb_info, txb_costs, txb_eob_costs, rate_cost);
-
- // Overwrite old entry
- uint16_t hbt_table_index = hbt_ctx_hash % HBT_TABLE_SIZE;
- uint16_t hbt_array_index = hbt_qc_hash % HBT_ARRAY_LENGTH;
- hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
- .rate_cost = *rate_cost;
- hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index].init = 1;
- hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
- .hbt_qc_hash = hbt_qc_hash;
- hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
- .hbt_ctx_hash = hbt_ctx_hash;
- assert(prev_eob >= txb_info->eob); // eob can't get longer
- for (int i = 0; i < txb_info->eob; i++) {
- // Record how coeff changed. Convention: towards zero is negative.
- if (txb_info->qcoeff[scan[i]] > 0)
- hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
- .deltas[i] = txb_info->qcoeff[scan[i]] - prev_coeff[i];
- else
- hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
- .deltas[i] = prev_coeff[i] - txb_info->qcoeff[scan[i]];
- }
- for (int i = txb_info->eob; i < prev_eob; i++) {
- // If eob got shorter, record that all after it changed to zero.
- if (prev_coeff[i] > 0)
- hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
- .deltas[i] = -prev_coeff[i];
- else
- hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
- .deltas[i] = prev_coeff[i];
- }
- for (int i = prev_eob; i < HBT_EOB; i++) {
- // Record 'no change' after optimized coefficients run out.
- hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
- .deltas[i] = 0;
- }
-
- if (update) {
- p->eobs[block] = txb_info->eob;
- p->txb_entropy_ctx[block] = av1_get_txb_entropy_context(
- txb_info->qcoeff, txb_info->scan_order, txb_info->eob);
- }
- return txb_info->eob;
-}
-
-int hbt_hash_hit(uint32_t hbt_table_index, int hbt_array_index,
- TxbInfo *txb_info, const struct macroblock_plane *p, int block,
- int *rate_cost) {
- const int16_t *scan = txb_info->scan_order->scan;
- int new_eob = 0;
- int update = 0;
-
- for (int i = 0; i < txb_info->eob; i++) {
- // Delta convention is negatives go towards zero, so only apply those ones.
- if (hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
- .deltas[i] < 0) {
- if (txb_info->qcoeff[scan[i]] > 0)
- txb_info->qcoeff[scan[i]] +=
- hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
- .deltas[i];
- else
- txb_info->qcoeff[scan[i]] -=
- hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
- .deltas[i];
-
- update = 1;
- update_coeff(scan[i], txb_info->qcoeff[scan[i]], txb_info);
- }
- if (txb_info->qcoeff[scan[i]]) new_eob = i + 1;
- }
-
- // Rate_cost can be calculated here instead (av1_cost_coeffs_txb), but
- // it is expensive and gives little benefit as long as qc_hash is high bit
- *rate_cost =
- hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
- .rate_cost;
-
- if (update) {
- txb_info->eob = new_eob;
- p->eobs[block] = txb_info->eob;
- p->txb_entropy_ctx[block] = av1_get_txb_entropy_context(
- txb_info->qcoeff, txb_info->scan_order, txb_info->eob);
- }
-
- return txb_info->eob;
-}
-
-int hbt_search_match(uint32_t hbt_ctx_hash, uint32_t hbt_qc_hash,
- TxbInfo *txb_info, const LV_MAP_COEFF_COST *txb_costs,
- const LV_MAP_EOB_COST *txb_eob_costs,
- const struct macroblock_plane *p, int block, int fast_mode,
- int *rate_cost) {
- // Check for qcoeff match
- int hbt_array_index = hbt_qc_hash % HBT_ARRAY_LENGTH;
- int hbt_table_index = hbt_ctx_hash % HBT_TABLE_SIZE;
-
- if (hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
- .hbt_qc_hash == hbt_qc_hash &&
- hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
- .hbt_ctx_hash == hbt_ctx_hash &&
- hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
- .init) {
- return hbt_hash_hit(hbt_table_index, hbt_array_index, txb_info, p, block,
- rate_cost);
- } else {
- return hbt_hash_miss(hbt_ctx_hash, hbt_qc_hash, txb_info, txb_costs,
- txb_eob_costs, p, block, fast_mode, rate_cost);
- }
-}
-
-int hbt_create_hashes(TxbInfo *txb_info, const LV_MAP_COEFF_COST *txb_costs,
- const LV_MAP_EOB_COST *txb_eob_costs,
- const struct macroblock_plane *p, int block,
- int fast_mode, int *rate_cost) {
- // Initialize hash table if needed.
- if (hbt_needs_init) {
- hbt_init();
- }
-
- //// Hash creation
- uint8_t txb_hash_data[256]; // Asserts below to ensure enough space.
- const int16_t *scan = txb_info->scan_order->scan;
- uint8_t chunk = 0;
- int hash_data_index = 0;
-
- // Make qc_hash.
- int packing_index = 0; // needed for packing.
- for (int i = 0; i < txb_info->eob; i++) {
- tran_low_t prechunk = txb_info->qcoeff[scan[i]];
-
- // Softening: Improves speed. Aligns with signed deltas.
- if (prechunk < 0) prechunk *= -1;
-
- // Early kick out: Don't apply feature if there are large coeffs:
- // If this kickout value is removed or raised beyond int8_t,
- // widen deltas type in OptTxbQcoeff struct.
- assert((int8_t)HBT_KICKOUT == HBT_KICKOUT); // If not, widen types.
- if (prechunk > HBT_KICKOUT) {
- av1_txb_init_levels(txb_info->qcoeff, txb_info->width, txb_info->height,
- txb_info->levels);
-
- const int update =
- optimize_txb(txb_info, txb_costs, txb_eob_costs, rate_cost);
-
- if (update) {
- p->eobs[block] = txb_info->eob;
- p->txb_entropy_ctx[block] = av1_get_txb_entropy_context(
- txb_info->qcoeff, txb_info->scan_order, txb_info->eob);
- }
- return txb_info->eob;
- }
-
- // Since coeffs are 0 to 3, only 2 bits are needed: pack into bytes
- if (packing_index == 0) txb_hash_data[hash_data_index] = 0;
- chunk = prechunk << packing_index;
- packing_index += 2;
- txb_hash_data[hash_data_index] |= chunk;
-
- // Full byte:
- if (packing_index == 8) {
- packing_index = 0;
- hash_data_index++;
- }
- }
- // Needed when packing_index != 0, to include final byte.
- hash_data_index++;
- assert(hash_data_index <= 64);
- // 31 bit qc_hash: index to array
- uint32_t hbt_qc_hash =
- av1_get_crc32c_value(&crc_calculator, txb_hash_data, hash_data_index);
-
- // Make ctx_hash.
- hash_data_index = 0;
- tran_low_t prechunk;
-
- for (int i = 0; i < txb_info->eob; i++) {
- // Save as magnitudes towards or away from zero.
- if (txb_info->tcoeff[scan[i]] >= 0)
- prechunk = txb_info->tcoeff[scan[i]] - txb_info->dqcoeff[scan[i]];
- else
- prechunk = txb_info->dqcoeff[scan[i]] - txb_info->tcoeff[scan[i]];
-
- chunk = prechunk & 0xff;
- txb_hash_data[hash_data_index++] = chunk;
- }
-
- // Extra ctx data:
- // Include dequants.
- txb_hash_data[hash_data_index++] = txb_info->dequant[0] & 0xff;
- txb_hash_data[hash_data_index++] = txb_info->dequant[1] & 0xff;
- chunk = txb_info->txb_ctx->txb_skip_ctx & 0xff;
- txb_hash_data[hash_data_index++] = chunk;
- chunk = txb_info->txb_ctx->dc_sign_ctx & 0xff;
- txb_hash_data[hash_data_index++] = chunk;
- // eob
- chunk = txb_info->eob & 0xff;
- txb_hash_data[hash_data_index++] = chunk;
- // rdmult (int64)
- chunk = txb_info->rdmult & 0xff;
- txb_hash_data[hash_data_index++] = chunk;
- // tx_type
- chunk = txb_info->tx_type & 0xff;
- txb_hash_data[hash_data_index++] = chunk;
- // base_eob_cost
- for (int i = 1; i < 3; i++) { // i = 0 are softened away
- for (int j = 0; j < SIG_COEF_CONTEXTS_EOB; j++) {
- chunk = (txb_costs->base_eob_cost[j][i] & 0xff00) >> 8;
- txb_hash_data[hash_data_index++] = chunk;
- }
- }
- // eob_cost
- for (int i = 0; i < 11; i++) {
- for (int j = 0; j < 2; j++) {
- chunk = (txb_eob_costs->eob_cost[j][i] & 0xff00) >> 8;
- txb_hash_data[hash_data_index++] = chunk;
- }
- }
- // dc_sign_cost
- for (int i = 0; i < 2; i++) {
- for (int j = 0; j < DC_SIGN_CONTEXTS; j++) {
- chunk = (txb_costs->dc_sign_cost[j][i] & 0xff00) >> 8;
- txb_hash_data[hash_data_index++] = chunk;
- }
- }
-
- assert(hash_data_index <= 256);
- // 31 bit ctx_hash: used to index table
- uint32_t hbt_ctx_hash =
- av1_get_crc32c_value(&crc_calculator, txb_hash_data, hash_data_index);
- //// End hash creation
-
- return hbt_search_match(hbt_ctx_hash, hbt_qc_hash, txb_info, txb_costs,
- txb_eob_costs, p, block, fast_mode, rate_cost);
-}
-
-static AOM_FORCE_INLINE int get_coeff_cost_simple(
- int ci, tran_low_t abs_qc, int coeff_ctx,
- const LV_MAP_COEFF_COST *txb_costs, int bwl, TX_CLASS tx_class,
- const uint8_t *levels) {
- // this simple version assumes the coeff's scan_idx is not DC (scan_idx != 0)
- // and not the last (scan_idx != eob - 1)
- assert(ci > 0);
- int cost = txb_costs->base_cost[coeff_ctx][AOMMIN(abs_qc, 3)];
- if (abs_qc) {
- cost += av1_cost_literal(1);
- if (abs_qc > NUM_BASE_LEVELS) {
- const int br_ctx = get_br_ctx(levels, ci, bwl, tx_class);
- cost += get_br_cost(abs_qc, br_ctx, txb_costs->lps_cost[br_ctx]);
- cost += get_golomb_cost(abs_qc);
- }
- }
- return cost;
-}
-
-static INLINE int get_coeff_cost_general(int is_last, int ci, tran_low_t abs_qc,
- int sign, int coeff_ctx,
- int dc_sign_ctx,
- const LV_MAP_COEFF_COST *txb_costs,
- int bwl, TX_CLASS tx_class,
- const uint8_t *levels) {
- int cost = 0;
- if (is_last) {
- cost += txb_costs->base_eob_cost[coeff_ctx][AOMMIN(abs_qc, 3) - 1];
- } else {
- cost += txb_costs->base_cost[coeff_ctx][AOMMIN(abs_qc, 3)];
- }
- if (abs_qc != 0) {
- if (ci == 0) {
- cost += txb_costs->dc_sign_cost[dc_sign_ctx][sign];
- } else {
- cost += av1_cost_literal(1);
- }
- if (abs_qc > NUM_BASE_LEVELS) {
- const int br_ctx = get_br_ctx(levels, ci, bwl, tx_class);
- cost += get_br_cost(abs_qc, br_ctx, txb_costs->lps_cost[br_ctx]);
- cost += get_golomb_cost(abs_qc);
- }
- }
- return cost;
-}
-
-static INLINE void get_qc_dqc_low(tran_low_t abs_qc, int sign, int dqv,
- int shift, tran_low_t *qc_low,
- tran_low_t *dqc_low) {
- tran_low_t abs_qc_low = abs_qc - 1;
- *qc_low = (-sign ^ abs_qc_low) + sign;
- assert((sign ? -abs_qc_low : abs_qc_low) == *qc_low);
- tran_low_t abs_dqc_low = (abs_qc_low * dqv) >> shift;
- *dqc_low = (-sign ^ abs_dqc_low) + sign;
- assert((sign ? -abs_dqc_low : abs_dqc_low) == *dqc_low);
-}
-
-static INLINE void update_coeff_general(
- int *accu_rate, int64_t *accu_dist, int si, int eob, TX_SIZE tx_size,
- TX_CLASS tx_class, int bwl, int height, int64_t rdmult, int shift,
- int dc_sign_ctx, const int16_t *dequant, const int16_t *scan,
- const LV_MAP_COEFF_COST *txb_costs, const tran_low_t *tcoeff,
- tran_low_t *qcoeff, tran_low_t *dqcoeff, uint8_t *levels) {
- const int dqv = dequant[si != 0];
- const int ci = scan[si];
- const tran_low_t qc = qcoeff[ci];
- const int is_last = si == (eob - 1);
- const int coeff_ctx = get_lower_levels_ctx_general(
- is_last, si, bwl, height, levels, ci, tx_size, tx_class);
- if (qc == 0) {
- *accu_rate += txb_costs->base_cost[coeff_ctx][0];
- } else {
- const int sign = (qc < 0) ? 1 : 0;
- const tran_low_t abs_qc = abs(qc);
- const tran_low_t tqc = tcoeff[ci];
- const tran_low_t dqc = dqcoeff[ci];
- const int64_t dist = get_coeff_dist(tqc, dqc, shift);
- const int64_t dist0 = get_coeff_dist(tqc, 0, shift);
- const int rate =
- get_coeff_cost_general(is_last, ci, abs_qc, sign, coeff_ctx,
- dc_sign_ctx, txb_costs, bwl, tx_class, levels);
- const int64_t rd = RDCOST(rdmult, rate, dist);
-
- tran_low_t qc_low, dqc_low;
- get_qc_dqc_low(abs_qc, sign, dqv, shift, &qc_low, &dqc_low);
- const tran_low_t abs_qc_low = abs_qc - 1;
- const int64_t dist_low = get_coeff_dist(tqc, dqc_low, shift);
- const int rate_low =
- get_coeff_cost_general(is_last, ci, abs_qc_low, sign, coeff_ctx,
- dc_sign_ctx, txb_costs, bwl, tx_class, levels);
- const int64_t rd_low = RDCOST(rdmult, rate_low, dist_low);
- if (rd_low < rd) {
- qcoeff[ci] = qc_low;
- dqcoeff[ci] = dqc_low;
- levels[get_padded_idx(ci, bwl)] = AOMMIN(abs_qc_low, INT8_MAX);
- *accu_rate += rate_low;
- *accu_dist += dist_low - dist0;
- } else {
- *accu_rate += rate;
- *accu_dist += dist - dist0;
- }
- }
-}
-
-static AOM_FORCE_INLINE void update_coeff_simple(
- int *accu_rate, int si, int eob, TX_SIZE tx_size, TX_CLASS tx_class,
- int bwl, int64_t rdmult, int shift, const int16_t *dequant,
- const int16_t *scan, const LV_MAP_COEFF_COST *txb_costs,
- const tran_low_t *tcoeff, tran_low_t *qcoeff, tran_low_t *dqcoeff,
- uint8_t *levels) {
- const int dqv = dequant[1];
- (void)eob;
- // this simple version assumes the coeff's scan_idx is not DC (scan_idx != 0)
- // and not the last (scan_idx != eob - 1)
- assert(si != eob - 1);
- assert(si > 0);
- const int ci = scan[si];
- const tran_low_t qc = qcoeff[ci];
- const int coeff_ctx =
- get_lower_levels_ctx(levels, ci, bwl, tx_size, tx_class);
- if (qc == 0) {
- *accu_rate += txb_costs->base_cost[coeff_ctx][0];
- } else {
- const tran_low_t abs_qc = abs(qc);
- const tran_low_t tqc = tcoeff[ci];
- const tran_low_t dqc = dqcoeff[ci];
- const int rate = get_coeff_cost_simple(ci, abs_qc, coeff_ctx, txb_costs,
- bwl, tx_class, levels);
- if (abs(dqc) < abs(tqc)) {
- *accu_rate += rate;
- return;
- }
- const int64_t dist = get_coeff_dist(tqc, dqc, shift);
- const int64_t rd = RDCOST(rdmult, rate, dist);
-
- const int sign = (qc < 0) ? 1 : 0;
- tran_low_t qc_low, dqc_low;
- get_qc_dqc_low(abs_qc, sign, dqv, shift, &qc_low, &dqc_low);
- const tran_low_t abs_qc_low = abs_qc - 1;
- const int64_t dist_low = get_coeff_dist(tqc, dqc_low, shift);
- const int rate_low = get_coeff_cost_simple(
- ci, abs_qc_low, coeff_ctx, txb_costs, bwl, tx_class, levels);
- const int64_t rd_low = RDCOST(rdmult, rate_low, dist_low);
- if (rd_low < rd) {
- qcoeff[ci] = qc_low;
- dqcoeff[ci] = dqc_low;
- levels[get_padded_idx(ci, bwl)] = AOMMIN(abs_qc_low, INT8_MAX);
- *accu_rate += rate_low;
- } else {
- *accu_rate += rate;
- }
- }
-}
-
-static AOM_FORCE_INLINE void update_coeff_eob(
- int *accu_rate, int64_t *accu_dist, int *eob, int *nz_num, int *nz_ci,
- int si, TX_SIZE tx_size, TX_CLASS tx_class, int bwl, int height,
- int dc_sign_ctx, int64_t rdmult, int shift, const int16_t *dequant,
- const int16_t *scan, const LV_MAP_EOB_COST *txb_eob_costs,
- const LV_MAP_COEFF_COST *txb_costs, const tran_low_t *tcoeff,
- tran_low_t *qcoeff, tran_low_t *dqcoeff, uint8_t *levels, int sharpness) {
- const int dqv = dequant[si != 0];
- assert(si != *eob - 1);
- const int ci = scan[si];
- const tran_low_t qc = qcoeff[ci];
- const int coeff_ctx =
- get_lower_levels_ctx(levels, ci, bwl, tx_size, tx_class);
- if (qc == 0) {
- *accu_rate += txb_costs->base_cost[coeff_ctx][0];
- } else {
- int lower_level = 0;
- const tran_low_t abs_qc = abs(qc);
- const tran_low_t tqc = tcoeff[ci];
- const tran_low_t dqc = dqcoeff[ci];
- const int sign = (qc < 0) ? 1 : 0;
- const int64_t dist0 = get_coeff_dist(tqc, 0, shift);
- int64_t dist = get_coeff_dist(tqc, dqc, shift) - dist0;
- int rate =
- get_coeff_cost_general(0, ci, abs_qc, sign, coeff_ctx, dc_sign_ctx,
- txb_costs, bwl, tx_class, levels);
- int64_t rd = RDCOST(rdmult, *accu_rate + rate, *accu_dist + dist);
-
- tran_low_t qc_low, dqc_low;
- get_qc_dqc_low(abs_qc, sign, dqv, shift, &qc_low, &dqc_low);
- const tran_low_t abs_qc_low = abs_qc - 1;
- const int64_t dist_low = get_coeff_dist(tqc, dqc_low, shift) - dist0;
- const int rate_low =
- get_coeff_cost_general(0, ci, abs_qc_low, sign, coeff_ctx, dc_sign_ctx,
- txb_costs, bwl, tx_class, levels);
- const int64_t rd_low =
- RDCOST(rdmult, *accu_rate + rate_low, *accu_dist + dist_low);
-
- int lower_level_new_eob = 0;
- const int new_eob = si + 1;
- uint8_t tmp_levels[3];
- for (int ni = 0; ni < *nz_num; ++ni) {
- const int last_ci = nz_ci[ni];
- tmp_levels[ni] = levels[get_padded_idx(last_ci, bwl)];
- levels[get_padded_idx(last_ci, bwl)] = 0;
- }
-
- const int coeff_ctx_new_eob = get_lower_levels_ctx_general(
- 1, si, bwl, height, levels, ci, tx_size, tx_class);
- const int new_eob_cost =
- get_eob_cost(new_eob, txb_eob_costs, txb_costs, tx_class);
- int rate_coeff_eob =
- new_eob_cost + get_coeff_cost_general(1, ci, abs_qc, sign,
- coeff_ctx_new_eob, dc_sign_ctx,
- txb_costs, bwl, tx_class, levels);
- int64_t dist_new_eob = dist;
- int64_t rd_new_eob = RDCOST(rdmult, rate_coeff_eob, dist_new_eob);
-
- if (abs_qc_low > 0) {
- const int rate_coeff_eob_low =
- new_eob_cost +
- get_coeff_cost_general(1, ci, abs_qc_low, sign, coeff_ctx_new_eob,
- dc_sign_ctx, txb_costs, bwl, tx_class, levels);
- const int64_t dist_new_eob_low = dist_low;
- const int64_t rd_new_eob_low =
- RDCOST(rdmult, rate_coeff_eob_low, dist_new_eob_low);
- if (rd_new_eob_low < rd_new_eob) {
- lower_level_new_eob = 1;
- rd_new_eob = rd_new_eob_low;
- rate_coeff_eob = rate_coeff_eob_low;
- dist_new_eob = dist_new_eob_low;
- }
- }
-
- if (rd_low < rd) {
- lower_level = 1;
- rd = rd_low;
- rate = rate_low;
- dist = dist_low;
- }
-
- if (sharpness == 0 && rd_new_eob < rd) {
- for (int ni = 0; ni < *nz_num; ++ni) {
- int last_ci = nz_ci[ni];
- // levels[get_padded_idx(last_ci, bwl)] = 0;
- qcoeff[last_ci] = 0;
- dqcoeff[last_ci] = 0;
- }
- *eob = new_eob;
- *nz_num = 0;
- *accu_rate = rate_coeff_eob;
- *accu_dist = dist_new_eob;
- lower_level = lower_level_new_eob;
- } else {
- for (int ni = 0; ni < *nz_num; ++ni) {
- const int last_ci = nz_ci[ni];
- levels[get_padded_idx(last_ci, bwl)] = tmp_levels[ni];
- }
- *accu_rate += rate;
- *accu_dist += dist;
- }
-
- if (lower_level) {
- qcoeff[ci] = qc_low;
- dqcoeff[ci] = dqc_low;
- levels[get_padded_idx(ci, bwl)] = AOMMIN(abs_qc_low, INT8_MAX);
- }
- if (qcoeff[ci]) {
- nz_ci[*nz_num] = ci;
- ++*nz_num;
- }
- }
-}
-
-static INLINE void update_skip(int *accu_rate, int64_t accu_dist, int *eob,
- int nz_num, int *nz_ci, int64_t rdmult,
- int skip_cost, int non_skip_cost,
- tran_low_t *qcoeff, tran_low_t *dqcoeff,
- int sharpness) {
- const int64_t rd = RDCOST(rdmult, *accu_rate + non_skip_cost, accu_dist);
- const int64_t rd_new_eob = RDCOST(rdmult, skip_cost, 0);
- if (sharpness == 0 && rd_new_eob < rd) {
- for (int i = 0; i < nz_num; ++i) {
- const int ci = nz_ci[i];
- qcoeff[ci] = 0;
- dqcoeff[ci] = 0;
- // no need to set up levels because this is the last step
- // levels[get_padded_idx(ci, bwl)] = 0;
- }
- *accu_rate = 0;
- *eob = 0;
- }
-}
-
-int av1_optimize_txb_new(const struct AV1_COMP *cpi, MACROBLOCK *x, int plane,
- int block, TX_SIZE tx_size, TX_TYPE tx_type,
- const TXB_CTX *const txb_ctx, int *rate_cost,
- int sharpness) {
- const AV1_COMMON *cm = &cpi->common;
- MACROBLOCKD *xd = &x->e_mbd;
- const PLANE_TYPE plane_type = get_plane_type(plane);
- const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size);
- const TX_CLASS tx_class = tx_type_to_class[tx_type];
- const MB_MODE_INFO *mbmi = xd->mi[0];
- const struct macroblock_plane *p = &x->plane[plane];
- struct macroblockd_plane *pd = &xd->plane[plane];
- tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
- tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
- const tran_low_t *tcoeff = BLOCK_OFFSET(p->coeff, block);
- const int16_t *dequant = p->dequant_QTX;
- const int bwl = get_txb_bwl(tx_size);
- const int width = get_txb_wide(tx_size);
- const int height = get_txb_high(tx_size);
- assert(width == (1 << bwl));
- const int is_inter = is_inter_block(mbmi);
- const SCAN_ORDER *scan_order = get_scan(tx_size, tx_type);
- const int16_t *scan = scan_order->scan;
- const LV_MAP_COEFF_COST *txb_costs = &x->coeff_costs[txs_ctx][plane_type];
- const int eob_multi_size = txsize_log2_minus4[tx_size];
- const LV_MAP_EOB_COST *txb_eob_costs =
- &x->eob_costs[eob_multi_size][plane_type];
-
- const int shift = av1_get_tx_scale(tx_size);
- const int64_t rdmult =
- ((x->rdmult * plane_rd_mult[is_inter][plane_type] << (2 * (xd->bd - 8))) +
- 2) >>
- (sharpness +
- (cpi->oxcf.aq_mode == VARIANCE_AQ && mbmi->segment_id < 4
- ? 7 - mbmi->segment_id
- : 2) +
- (cpi->oxcf.aq_mode != VARIANCE_AQ &&
- cpi->oxcf.deltaq_mode > NO_DELTA_Q && x->sb_energy_level < 0
- ? (3 - x->sb_energy_level)
- : 0));
-
- uint8_t levels_buf[TX_PAD_2D];
- uint8_t *const levels = set_levels(levels_buf, width);
-
- av1_txb_init_levels(qcoeff, width, height, levels);
-
- // TODO(angirbird): check iqmatrix
-
- const int non_skip_cost = txb_costs->txb_skip_cost[txb_ctx->txb_skip_ctx][0];
- const int skip_cost = txb_costs->txb_skip_cost[txb_ctx->txb_skip_ctx][1];
- int eob = p->eobs[block];
- const int eob_cost = get_eob_cost(eob, txb_eob_costs, txb_costs, tx_class);
- int accu_rate = eob_cost;
- int64_t accu_dist = 0;
- int si = eob - 1;
- const int ci = scan[si];
- const tran_low_t qc = qcoeff[ci];
- const tran_low_t abs_qc = abs(qc);
- const int sign = qc < 0;
- const int max_nz_num = 2;
- int nz_num = 1;
- int nz_ci[3] = { ci, 0, 0 };
- if (abs_qc >= 2) {
- update_coeff_general(&accu_rate, &accu_dist, si, eob, tx_size, tx_class,
- bwl, height, rdmult, shift, txb_ctx->dc_sign_ctx,
- dequant, scan, txb_costs, tcoeff, qcoeff, dqcoeff,
- levels);
- --si;
- } else {
- assert(abs_qc == 1);
- const int coeff_ctx = get_lower_levels_ctx_general(
- 1, si, bwl, height, levels, ci, tx_size, tx_class);
- accu_rate += get_coeff_cost_general(1, ci, abs_qc, sign, coeff_ctx,
- txb_ctx->dc_sign_ctx, txb_costs, bwl,
- tx_class, levels);
- const tran_low_t tqc = tcoeff[ci];
- const tran_low_t dqc = dqcoeff[ci];
- const int64_t dist = get_coeff_dist(tqc, dqc, shift);
- const int64_t dist0 = get_coeff_dist(tqc, 0, shift);
- accu_dist += dist - dist0;
- --si;
- }
-
-#define UPDATE_COEFF_EOB_CASE(tx_class_literal) \
- case tx_class_literal: \
- for (; si >= 0 && nz_num <= max_nz_num; --si) { \
- update_coeff_eob(&accu_rate, &accu_dist, &eob, &nz_num, nz_ci, si, \
- tx_size, tx_class_literal, bwl, height, \
- txb_ctx->dc_sign_ctx, rdmult, shift, dequant, scan, \
- txb_eob_costs, txb_costs, tcoeff, qcoeff, dqcoeff, \
- levels, sharpness); \
- } \
- break;
- switch (tx_class) {
- UPDATE_COEFF_EOB_CASE(TX_CLASS_2D);
- UPDATE_COEFF_EOB_CASE(TX_CLASS_HORIZ);
- UPDATE_COEFF_EOB_CASE(TX_CLASS_VERT);
-#undef UPDATE_COEFF_EOB_CASE
- default: assert(false);
- }
-
- if (si == -1 && nz_num <= max_nz_num) {
- update_skip(&accu_rate, accu_dist, &eob, nz_num, nz_ci, rdmult, skip_cost,
- non_skip_cost, qcoeff, dqcoeff, sharpness);
- }
-
-#define UPDATE_COEFF_SIMPLE_CASE(tx_class_literal) \
- case tx_class_literal: \
- for (; si >= 1; --si) { \
- update_coeff_simple(&accu_rate, si, eob, tx_size, tx_class_literal, bwl, \
- rdmult, shift, dequant, scan, txb_costs, tcoeff, \
- qcoeff, dqcoeff, levels); \
- } \
- break;
- switch (tx_class) {
- UPDATE_COEFF_SIMPLE_CASE(TX_CLASS_2D);
- UPDATE_COEFF_SIMPLE_CASE(TX_CLASS_HORIZ);
- UPDATE_COEFF_SIMPLE_CASE(TX_CLASS_VERT);
-#undef UPDATE_COEFF_SIMPLE_CASE
- default: assert(false);
- }
-
- // DC position
- if (si == 0) {
- // no need to update accu_dist because it's not used after this point
- int64_t dummy_dist = 0;
- update_coeff_general(&accu_rate, &dummy_dist, si, eob, tx_size, tx_class,
- bwl, height, rdmult, shift, txb_ctx->dc_sign_ctx,
- dequant, scan, txb_costs, tcoeff, qcoeff, dqcoeff,
- levels);
- }
-
- const int tx_type_cost = get_tx_type_cost(cm, x, xd, plane, tx_size, tx_type);
- if (eob == 0)
- accu_rate += skip_cost;
- else
- accu_rate += non_skip_cost + tx_type_cost;
-
- p->eobs[block] = eob;
- p->txb_entropy_ctx[block] =
- av1_get_txb_entropy_context(qcoeff, scan_order, p->eobs[block]);
-
- *rate_cost = accu_rate;
- return eob;
-}
-
-// This function is deprecated, but we keep it here because hash trellis
-// is not integrated with av1_optimize_txb_new yet
-int av1_optimize_txb(const struct AV1_COMP *cpi, MACROBLOCK *x, int plane,
- int blk_row, int blk_col, int block, TX_SIZE tx_size,
- TXB_CTX *txb_ctx, int fast_mode, int *rate_cost) {
- const AV1_COMMON *cm = &cpi->common;
- MACROBLOCKD *const xd = &x->e_mbd;
- const PLANE_TYPE plane_type = get_plane_type(plane);
- const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size);
- const TX_TYPE tx_type = av1_get_tx_type(plane_type, xd, blk_row, blk_col,
- tx_size, cm->reduced_tx_set_used);
- const MB_MODE_INFO *mbmi = xd->mi[0];
- const struct macroblock_plane *p = &x->plane[plane];
- struct macroblockd_plane *pd = &xd->plane[plane];
- const int eob = p->eobs[block];
- tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
- tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
- const tran_low_t *tcoeff = BLOCK_OFFSET(p->coeff, block);
- const int16_t *dequant = p->dequant_QTX;
- const int seg_eob = av1_get_max_eob(tx_size);
- const int bwl = get_txb_bwl(tx_size);
- const int width = get_txb_wide(tx_size);
- const int height = get_txb_high(tx_size);
- const int is_inter = is_inter_block(mbmi);
- const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type);
- const LV_MAP_COEFF_COST *txb_costs = &x->coeff_costs[txs_ctx][plane_type];
- const int eob_multi_size = txsize_log2_minus4[tx_size];
- const LV_MAP_EOB_COST txb_eob_costs =
- x->eob_costs[eob_multi_size][plane_type];
-
- const int shift = av1_get_tx_scale(tx_size);
- const int64_t rdmult =
- ((x->rdmult * plane_rd_mult[is_inter][plane_type] << (2 * (xd->bd - 8))) +
- 2) >>
- 2;
- uint8_t levels_buf[TX_PAD_2D];
- uint8_t *const levels = set_levels(levels_buf, width);
- const TX_SIZE qm_tx_size = av1_get_adjusted_tx_size(tx_size);
- const qm_val_t *iqmatrix =
- IS_2D_TRANSFORM(tx_type)
- ? pd->seg_iqmatrix[mbmi->segment_id][qm_tx_size]
- : cm->giqmatrix[NUM_QM_LEVELS - 1][0][qm_tx_size];
- assert(width == (1 << bwl));
- const int tx_type_cost = get_tx_type_cost(cm, x, xd, plane, tx_size, tx_type);
- TxbInfo txb_info = {
- qcoeff, levels, dqcoeff, tcoeff, dequant, shift,
- tx_size, txs_ctx, tx_type, bwl, width, height,
- eob, seg_eob, scan_order, txb_ctx, rdmult, &cm->coeff_ctx_table,
- iqmatrix, tx_type_cost,
- };
-
- // Hash based trellis (hbt) speed feature: avoid expensive optimize_txb calls
- // by storing the coefficient deltas in a hash table.
- // Currently disabled in speedfeatures.c
- if (eob <= HBT_EOB && eob > 0 && cpi->sf.use_hash_based_trellis) {
- return hbt_create_hashes(&txb_info, txb_costs, &txb_eob_costs, p, block,
- fast_mode, rate_cost);
- }
-
- av1_txb_init_levels(qcoeff, width, height, levels);
-
- const int update =
- optimize_txb(&txb_info, txb_costs, &txb_eob_costs, rate_cost);
-
- if (update) {
- p->eobs[block] = txb_info.eob;
- p->txb_entropy_ctx[block] =
- av1_get_txb_entropy_context(qcoeff, scan_order, txb_info.eob);
- }
- return txb_info.eob;
-}
-
-int av1_get_txb_entropy_context(const tran_low_t *qcoeff,
- const SCAN_ORDER *scan_order, int eob) {
- const int16_t *const scan = scan_order->scan;
- int cul_level = 0;
- int c;
-
- if (eob == 0) return 0;
- for (c = 0; c < eob; ++c) {
- cul_level += abs(qcoeff[scan[c]]);
- if (cul_level > COEFF_CONTEXT_MASK) break;
- }
-
- cul_level = AOMMIN(COEFF_CONTEXT_MASK, cul_level);
- set_dc_sign(&cul_level, qcoeff[0]);
-
- return cul_level;
-}
-
-void av1_update_txb_context_b(int plane, int block, int blk_row, int blk_col,
- BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
- void *arg) {
- struct tokenize_b_args *const args = arg;
- const AV1_COMP *cpi = args->cpi;
- const AV1_COMMON *cm = &cpi->common;
- ThreadData *const td = args->td;
- MACROBLOCK *const x = &td->mb;
- MACROBLOCKD *const xd = &x->e_mbd;
- struct macroblock_plane *p = &x->plane[plane];
- struct macroblockd_plane *pd = &xd->plane[plane];
- const uint16_t eob = p->eobs[block];
- const tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
- const PLANE_TYPE plane_type = pd->plane_type;
- const TX_TYPE tx_type = av1_get_tx_type(plane_type, xd, blk_row, blk_col,
- tx_size, cm->reduced_tx_set_used);
- const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type);
- const int cul_level = av1_get_txb_entropy_context(qcoeff, scan_order, eob);
- av1_set_contexts(xd, pd, plane, plane_bsize, tx_size, cul_level, blk_col,
- blk_row);
-}
-
-static void update_tx_type_count(const AV1_COMMON *cm, MACROBLOCKD *xd,
- int blk_row, int blk_col, int plane,
- TX_SIZE tx_size, FRAME_COUNTS *counts,
- uint8_t allow_update_cdf) {
- MB_MODE_INFO *mbmi = xd->mi[0];
- int is_inter = is_inter_block(mbmi);
- FRAME_CONTEXT *fc = xd->tile_ctx;
-#if !CONFIG_ENTROPY_STATS
- (void)counts;
-#endif // !CONFIG_ENTROPY_STATS
-
- // Only y plane's tx_type is updated
- if (plane > 0) return;
- TX_TYPE tx_type = av1_get_tx_type(PLANE_TYPE_Y, xd, blk_row, blk_col, tx_size,
- cm->reduced_tx_set_used);
- if (get_ext_tx_types(tx_size, is_inter, cm->reduced_tx_set_used) > 1 &&
- cm->base_qindex > 0 && !mbmi->skip &&
- !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
- const int eset = get_ext_tx_set(tx_size, is_inter, cm->reduced_tx_set_used);
- if (eset > 0) {
- const TxSetType tx_set_type =
- av1_get_ext_tx_set_type(tx_size, is_inter, cm->reduced_tx_set_used);
- if (is_inter) {
- if (allow_update_cdf) {
- update_cdf(fc->inter_ext_tx_cdf[eset][txsize_sqr_map[tx_size]],
- av1_ext_tx_ind[tx_set_type][tx_type],
- av1_num_ext_tx_set[tx_set_type]);
- }
-#if CONFIG_ENTROPY_STATS
- ++counts->inter_ext_tx[eset][txsize_sqr_map[tx_size]]
- [av1_ext_tx_ind[tx_set_type][tx_type]];
-#endif // CONFIG_ENTROPY_STATS
- } else {
- PREDICTION_MODE intra_dir;
- if (mbmi->filter_intra_mode_info.use_filter_intra)
- intra_dir = fimode_to_intradir[mbmi->filter_intra_mode_info
- .filter_intra_mode];
- else
- intra_dir = mbmi->mode;
-#if CONFIG_ENTROPY_STATS
- ++counts->intra_ext_tx[eset][txsize_sqr_map[tx_size]][intra_dir]
- [av1_ext_tx_ind[tx_set_type][tx_type]];
-#endif // CONFIG_ENTROPY_STATS
- if (allow_update_cdf) {
- update_cdf(
- fc->intra_ext_tx_cdf[eset][txsize_sqr_map[tx_size]][intra_dir],
- av1_ext_tx_ind[tx_set_type][tx_type],
- av1_num_ext_tx_set[tx_set_type]);
- }
- }
- }
- }
-}
-
-void av1_update_and_record_txb_context(int plane, int block, int blk_row,
- int blk_col, BLOCK_SIZE plane_bsize,
- TX_SIZE tx_size, void *arg) {
- struct tokenize_b_args *const args = arg;
- const AV1_COMP *cpi = args->cpi;
- const AV1_COMMON *cm = &cpi->common;
- ThreadData *const td = args->td;
- MACROBLOCK *const x = &td->mb;
- MACROBLOCKD *const xd = &x->e_mbd;
- struct macroblock_plane *p = &x->plane[plane];
- struct macroblockd_plane *pd = &xd->plane[plane];
- MB_MODE_INFO *mbmi = xd->mi[0];
- const int eob = p->eobs[block];
- TXB_CTX txb_ctx;
- get_txb_ctx(plane_bsize, tx_size, plane, pd->above_context + blk_col,
- pd->left_context + blk_row, &txb_ctx);
- const int bwl = get_txb_bwl(tx_size);
- const int width = get_txb_wide(tx_size);
- const int height = get_txb_high(tx_size);
- const uint8_t allow_update_cdf = args->allow_update_cdf;
- const TX_SIZE txsize_ctx = get_txsize_entropy_ctx(tx_size);
- FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
-#if CONFIG_ENTROPY_STATS
- int cdf_idx = cm->coef_cdf_category;
-#endif // CONFIG_ENTROPY_STATS
-
-#if CONFIG_ENTROPY_STATS
- ++td->counts->txb_skip[cdf_idx][txsize_ctx][txb_ctx.txb_skip_ctx][eob == 0];
-#endif // CONFIG_ENTROPY_STATS
- if (allow_update_cdf) {
- update_cdf(ec_ctx->txb_skip_cdf[txsize_ctx][txb_ctx.txb_skip_ctx], eob == 0,
- 2);
- }
-
- x->mbmi_ext->txb_skip_ctx[plane][block] = txb_ctx.txb_skip_ctx;
- x->mbmi_ext->eobs[plane][block] = eob;
-
- if (eob == 0) {
- av1_set_contexts(xd, pd, plane, plane_bsize, tx_size, 0, blk_col, blk_row);
- return;
- }
-
- tran_low_t *tcoeff = BLOCK_OFFSET(x->mbmi_ext->tcoeff[plane], block);
- const int segment_id = mbmi->segment_id;
- const int seg_eob = av1_get_tx_eob(&cpi->common.seg, segment_id, tx_size);
- const tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
- memcpy(tcoeff, qcoeff, sizeof(*tcoeff) * seg_eob);
-
- uint8_t levels_buf[TX_PAD_2D];
- uint8_t *const levels = set_levels(levels_buf, width);
- av1_txb_init_levels(tcoeff, width, height, levels);
- update_tx_type_count(cm, xd, blk_row, blk_col, plane, tx_size, td->counts,
- allow_update_cdf);
-
- const PLANE_TYPE plane_type = pd->plane_type;
- const TX_TYPE tx_type = av1_get_tx_type(plane_type, xd, blk_row, blk_col,
- tx_size, cm->reduced_tx_set_used);
- const TX_CLASS tx_class = tx_type_to_class[tx_type];
- const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type);
- const int16_t *const scan = scan_order->scan;
-#if CONFIG_ENTROPY_STATS
- av1_update_eob_context(cdf_idx, eob, tx_size, tx_class, plane_type, ec_ctx,
- td->counts, allow_update_cdf);
-#else
- av1_update_eob_context(eob, tx_size, tx_class, plane_type, ec_ctx,
- allow_update_cdf);
-#endif
-
- DECLARE_ALIGNED(16, int8_t, coeff_contexts[MAX_TX_SQUARE]);
- av1_get_nz_map_contexts(levels, scan, eob, tx_size, tx_class, coeff_contexts);
-
- for (int c = eob - 1; c >= 0; --c) {
- const int pos = scan[c];
- const int coeff_ctx = coeff_contexts[pos];
- const tran_low_t v = qcoeff[pos];
- const tran_low_t level = abs(v);
-
- if (allow_update_cdf) {
- if (c == eob - 1) {
- assert(coeff_ctx < 4);
- update_cdf(
- ec_ctx->coeff_base_eob_cdf[txsize_ctx][plane_type][coeff_ctx],
- AOMMIN(level, 3) - 1, 3);
- } else {
- update_cdf(ec_ctx->coeff_base_cdf[txsize_ctx][plane_type][coeff_ctx],
- AOMMIN(level, 3), 4);
- }
- }
- {
- if (c == eob - 1) {
- assert(coeff_ctx < 4);
-#if CONFIG_ENTROPY_STATS
- ++td->counts->coeff_base_eob_multi[cdf_idx][txsize_ctx][plane_type]
- [coeff_ctx][AOMMIN(level, 3) - 1];
- } else {
- ++td->counts->coeff_base_multi[cdf_idx][txsize_ctx][plane_type]
- [coeff_ctx][AOMMIN(level, 3)];
-#endif
- }
- }
- if (level > NUM_BASE_LEVELS) {
- const int base_range = level - 1 - NUM_BASE_LEVELS;
- const int br_ctx = get_br_ctx(levels, pos, bwl, tx_class);
- for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) {
- const int k = AOMMIN(base_range - idx, BR_CDF_SIZE - 1);
- if (allow_update_cdf) {
- update_cdf(ec_ctx->coeff_br_cdf[AOMMIN(txsize_ctx, TX_32X32)]
- [plane_type][br_ctx],
- k, BR_CDF_SIZE);
- }
- for (int lps = 0; lps < BR_CDF_SIZE - 1; lps++) {
-#if CONFIG_ENTROPY_STATS
- ++td->counts->coeff_lps[AOMMIN(txsize_ctx, TX_32X32)][plane_type][lps]
- [br_ctx][lps == k];
-#endif // CONFIG_ENTROPY_STATS
- if (lps == k) break;
- }
-#if CONFIG_ENTROPY_STATS
- ++td->counts->coeff_lps_multi[cdf_idx][AOMMIN(txsize_ctx, TX_32X32)]
- [plane_type][br_ctx][k];
-#endif
- if (k < BR_CDF_SIZE - 1) break;
- }
- }
- }
-
- // Update the context needed to code the DC sign (if applicable)
- if (tcoeff[0] != 0) {
- const int dc_sign = (tcoeff[0] < 0) ? 1 : 0;
- const int dc_sign_ctx = txb_ctx.dc_sign_ctx;
-#if CONFIG_ENTROPY_STATS
- ++td->counts->dc_sign[plane_type][dc_sign_ctx][dc_sign];
-#endif // CONFIG_ENTROPY_STATS
- if (allow_update_cdf)
- update_cdf(ec_ctx->dc_sign_cdf[plane_type][dc_sign_ctx], dc_sign, 2);
- x->mbmi_ext->dc_sign_ctx[plane][block] = dc_sign_ctx;
- }
-
- const int cul_level = av1_get_txb_entropy_context(tcoeff, scan_order, eob);
- av1_set_contexts(xd, pd, plane, plane_bsize, tx_size, cul_level, blk_col,
- blk_row);
-}
-
-void av1_update_txb_context(const AV1_COMP *cpi, ThreadData *td,
- RUN_TYPE dry_run, BLOCK_SIZE bsize, int *rate,
- int mi_row, int mi_col, uint8_t allow_update_cdf) {
- const AV1_COMMON *const cm = &cpi->common;
- const int num_planes = av1_num_planes(cm);
- MACROBLOCK *const x = &td->mb;
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = xd->mi[0];
- struct tokenize_b_args arg = { cpi, td, NULL, 0, allow_update_cdf };
- (void)rate;
- (void)mi_row;
- (void)mi_col;
- if (mbmi->skip) {
- av1_reset_skip_context(xd, mi_row, mi_col, bsize, num_planes);
- return;
- }
-
- if (!dry_run) {
- av1_foreach_transformed_block(xd, bsize, mi_row, mi_col,
- av1_update_and_record_txb_context, &arg,
- num_planes);
- } else if (dry_run == DRY_RUN_NORMAL) {
- av1_foreach_transformed_block(xd, bsize, mi_row, mi_col,
- av1_update_txb_context_b, &arg, num_planes);
- } else {
- printf("DRY_RUN_COSTCOEFFS is not supported yet\n");
- assert(0);
- }
-}
diff --git a/third_party/aom/av1/encoder/encodetxb.h b/third_party/aom/av1/encoder/encodetxb.h
deleted file mode 100644
index 40ae343b0..000000000
--- a/third_party/aom/av1/encoder/encodetxb.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_ENCODETXB_H_
-#define AOM_AV1_ENCODER_ENCODETXB_H_
-
-#include "config/aom_config.h"
-
-#include "av1/common/blockd.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/common/txb_common.h"
-#include "av1/encoder/block.h"
-#include "av1/encoder/encoder.h"
-#include "aom_dsp/bitwriter.h"
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct TxbInfo {
- tran_low_t *qcoeff;
- uint8_t *levels; // absolute values and clamped to 255.
- tran_low_t *dqcoeff;
- const tran_low_t *tcoeff;
- const int16_t *dequant;
- int shift;
- TX_SIZE tx_size;
- TX_SIZE txs_ctx;
- TX_TYPE tx_type;
- int bwl;
- int width;
- int height;
- int eob;
- int seg_eob;
- const SCAN_ORDER *scan_order;
- TXB_CTX *txb_ctx;
- int64_t rdmult;
- const LV_MAP_CTX_TABLE *coeff_ctx_table;
- const qm_val_t *iqmatrix;
- int tx_type_cost;
-} TxbInfo;
-
-void av1_alloc_txb_buf(AV1_COMP *cpi);
-void av1_free_txb_buf(AV1_COMP *cpi);
-int av1_cost_coeffs_txb(const AV1_COMMON *const cm, const MACROBLOCK *x,
- const int plane, const int block, const TX_SIZE tx_size,
- const TX_TYPE tx_type, const TXB_CTX *const txb_ctx);
-void av1_write_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *xd,
- aom_writer *w, int blk_row, int blk_col, int plane,
- TX_SIZE tx_size, const tran_low_t *tcoeff,
- uint16_t eob, TXB_CTX *txb_ctx);
-void av1_write_coeffs_mb(const AV1_COMMON *const cm, MACROBLOCK *x, int mi_row,
- int mi_col, aom_writer *w, BLOCK_SIZE bsize);
-int av1_get_txb_entropy_context(const tran_low_t *qcoeff,
- const SCAN_ORDER *scan_order, int eob);
-void av1_update_txb_context(const AV1_COMP *cpi, ThreadData *td,
- RUN_TYPE dry_run, BLOCK_SIZE bsize, int *rate,
- int mi_row, int mi_col, uint8_t allow_update_cdf);
-
-void av1_update_txb_context_b(int plane, int block, int blk_row, int blk_col,
- BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
- void *arg);
-
-void av1_update_and_record_txb_context(int plane, int block, int blk_row,
- int blk_col, BLOCK_SIZE plane_bsize,
- TX_SIZE tx_size, void *arg);
-
-void av1_set_coeff_buffer(const AV1_COMP *const cpi, MACROBLOCK *const x,
- int mi_row, int mi_col);
-
-void hbt_destroy();
-int av1_optimize_txb_new(const struct AV1_COMP *cpi, MACROBLOCK *x, int plane,
- int block, TX_SIZE tx_size, TX_TYPE tx_type,
- const TXB_CTX *const txb_ctx, int *rate_cost,
- int sharpness);
-#ifdef __cplusplus
-}
-#endif
-
-#endif // AOM_AV1_ENCODER_ENCODETXB_H_
diff --git a/third_party/aom/av1/encoder/ethread.c b/third_party/aom/av1/encoder/ethread.c
deleted file mode 100644
index e8ac30bb5..000000000
--- a/third_party/aom/av1/encoder/ethread.c
+++ /dev/null
@@ -1,261 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "av1/encoder/encodeframe.h"
-#include "av1/encoder/encoder.h"
-#include "av1/encoder/ethread.h"
-#include "aom_dsp/aom_dsp_common.h"
-
-static void accumulate_rd_opt(ThreadData *td, ThreadData *td_t) {
- for (int i = 0; i < REFERENCE_MODES; i++)
- td->rd_counts.comp_pred_diff[i] += td_t->rd_counts.comp_pred_diff[i];
-
- for (int i = 0; i < REF_FRAMES; i++)
- td->rd_counts.global_motion_used[i] +=
- td_t->rd_counts.global_motion_used[i];
-
- td->rd_counts.compound_ref_used_flag |=
- td_t->rd_counts.compound_ref_used_flag;
- td->rd_counts.skip_mode_used_flag |= td_t->rd_counts.skip_mode_used_flag;
-}
-
-static int enc_worker_hook(void *arg1, void *unused) {
- EncWorkerData *const thread_data = (EncWorkerData *)arg1;
- AV1_COMP *const cpi = thread_data->cpi;
- const AV1_COMMON *const cm = &cpi->common;
- const int tile_cols = cm->tile_cols;
- const int tile_rows = cm->tile_rows;
- int t;
-
- (void)unused;
-
- for (t = thread_data->start; t < tile_rows * tile_cols;
- t += cpi->num_workers) {
- int tile_row = t / tile_cols;
- int tile_col = t % tile_cols;
-
- av1_encode_tile(cpi, thread_data->td, tile_row, tile_col);
- }
-
- return 1;
-}
-
-static void create_enc_workers(AV1_COMP *cpi, int num_workers) {
- AV1_COMMON *const cm = &cpi->common;
- const AVxWorkerInterface *const winterface = aom_get_worker_interface();
-
- CHECK_MEM_ERROR(cm, cpi->workers,
- aom_malloc(num_workers * sizeof(*cpi->workers)));
-
- CHECK_MEM_ERROR(cm, cpi->tile_thr_data,
- aom_calloc(num_workers, sizeof(*cpi->tile_thr_data)));
-
- for (int i = 0; i < num_workers; i++) {
- AVxWorker *const worker = &cpi->workers[i];
- EncWorkerData *const thread_data = &cpi->tile_thr_data[i];
-
- ++cpi->num_workers;
- winterface->init(worker);
-
- thread_data->cpi = cpi;
-
- if (i < num_workers - 1) {
- // Allocate thread data.
- CHECK_MEM_ERROR(cm, thread_data->td,
- aom_memalign(32, sizeof(*thread_data->td)));
- av1_zero(*thread_data->td);
-
- // Set up pc_tree.
- thread_data->td->pc_tree = NULL;
- av1_setup_pc_tree(cm, thread_data->td);
-
- CHECK_MEM_ERROR(cm, thread_data->td->above_pred_buf,
- (uint8_t *)aom_memalign(
- 16, MAX_MB_PLANE * MAX_SB_SQUARE *
- sizeof(*thread_data->td->above_pred_buf)));
- CHECK_MEM_ERROR(cm, thread_data->td->left_pred_buf,
- (uint8_t *)aom_memalign(
- 16, MAX_MB_PLANE * MAX_SB_SQUARE *
- sizeof(*thread_data->td->left_pred_buf)));
-
- CHECK_MEM_ERROR(
- cm, thread_data->td->wsrc_buf,
- (int32_t *)aom_memalign(
- 16, MAX_SB_SQUARE * sizeof(*thread_data->td->wsrc_buf)));
-
- for (int x = 0; x < 2; x++)
- for (int y = 0; y < 2; y++)
- CHECK_MEM_ERROR(
- cm, thread_data->td->hash_value_buffer[x][y],
- (uint32_t *)aom_malloc(
- AOM_BUFFER_SIZE_FOR_BLOCK_HASH *
- sizeof(*thread_data->td->hash_value_buffer[0][0])));
-
- CHECK_MEM_ERROR(
- cm, thread_data->td->mask_buf,
- (int32_t *)aom_memalign(
- 16, MAX_SB_SQUARE * sizeof(*thread_data->td->mask_buf)));
- // Allocate frame counters in thread data.
- CHECK_MEM_ERROR(cm, thread_data->td->counts,
- aom_calloc(1, sizeof(*thread_data->td->counts)));
-
- // Allocate buffers used by palette coding mode.
- CHECK_MEM_ERROR(
- cm, thread_data->td->palette_buffer,
- aom_memalign(16, sizeof(*thread_data->td->palette_buffer)));
-
- CHECK_MEM_ERROR(
- cm, thread_data->td->tmp_conv_dst,
- aom_memalign(32, MAX_SB_SIZE * MAX_SB_SIZE *
- sizeof(*thread_data->td->tmp_conv_dst)));
- for (int j = 0; j < 2; ++j) {
- CHECK_MEM_ERROR(
- cm, thread_data->td->tmp_obmc_bufs[j],
- aom_memalign(16, 2 * MAX_MB_PLANE * MAX_SB_SQUARE *
- sizeof(*thread_data->td->tmp_obmc_bufs[j])));
- }
-
- // Create threads
- if (!winterface->reset(worker))
- aom_internal_error(&cm->error, AOM_CODEC_ERROR,
- "Tile encoder thread creation failed");
- } else {
- // Main thread acts as a worker and uses the thread data in cpi.
- thread_data->td = &cpi->td;
- }
- winterface->sync(worker);
- }
-}
-
-static void launch_enc_workers(AV1_COMP *cpi, int num_workers) {
- const AVxWorkerInterface *const winterface = aom_get_worker_interface();
- // Encode a frame
- for (int i = 0; i < num_workers; i++) {
- AVxWorker *const worker = &cpi->workers[i];
- EncWorkerData *const thread_data = (EncWorkerData *)worker->data1;
-
- // Set the starting tile for each thread.
- thread_data->start = i;
-
- if (i == cpi->num_workers - 1)
- winterface->execute(worker);
- else
- winterface->launch(worker);
- }
-}
-
-static void sync_enc_workers(AV1_COMP *cpi, int num_workers) {
- const AVxWorkerInterface *const winterface = aom_get_worker_interface();
-
- // Encoding ends.
- for (int i = 0; i < num_workers; i++) {
- AVxWorker *const worker = &cpi->workers[i];
- winterface->sync(worker);
- }
-}
-
-static void accumulate_counters_enc_workers(AV1_COMP *cpi, int num_workers) {
- for (int i = 0; i < num_workers; i++) {
- AVxWorker *const worker = &cpi->workers[i];
- EncWorkerData *const thread_data = (EncWorkerData *)worker->data1;
- cpi->intrabc_used |= thread_data->td->intrabc_used_this_tile;
- // Accumulate counters.
- if (i < cpi->num_workers - 1) {
- av1_accumulate_frame_counts(&cpi->counts, thread_data->td->counts);
- accumulate_rd_opt(&cpi->td, thread_data->td);
- cpi->td.mb.txb_split_count += thread_data->td->mb.txb_split_count;
- }
- }
-}
-
-static void prepare_enc_workers(AV1_COMP *cpi, AVxWorkerHook hook,
- int num_workers) {
- for (int i = 0; i < num_workers; i++) {
- AVxWorker *const worker = &cpi->workers[i];
- EncWorkerData *const thread_data = &cpi->tile_thr_data[i];
-
- worker->hook = hook;
- worker->data1 = thread_data;
- worker->data2 = NULL;
-
- // Before encoding a frame, copy the thread data from cpi.
- if (thread_data->td != &cpi->td) {
- thread_data->td->mb = cpi->td.mb;
- thread_data->td->rd_counts = cpi->td.rd_counts;
- thread_data->td->mb.above_pred_buf = thread_data->td->above_pred_buf;
- thread_data->td->mb.left_pred_buf = thread_data->td->left_pred_buf;
- thread_data->td->mb.wsrc_buf = thread_data->td->wsrc_buf;
- for (int x = 0; x < 2; x++) {
- for (int y = 0; y < 2; y++) {
- memcpy(thread_data->td->hash_value_buffer[x][y],
- cpi->td.mb.hash_value_buffer[x][y],
- AOM_BUFFER_SIZE_FOR_BLOCK_HASH *
- sizeof(*thread_data->td->hash_value_buffer[0][0]));
- thread_data->td->mb.hash_value_buffer[x][y] =
- thread_data->td->hash_value_buffer[x][y];
- }
- }
- thread_data->td->mb.mask_buf = thread_data->td->mask_buf;
- }
- if (thread_data->td->counts != &cpi->counts) {
- memcpy(thread_data->td->counts, &cpi->counts, sizeof(cpi->counts));
- }
-
- if (i < num_workers - 1) {
- thread_data->td->mb.palette_buffer = thread_data->td->palette_buffer;
- thread_data->td->mb.tmp_conv_dst = thread_data->td->tmp_conv_dst;
- for (int j = 0; j < 2; ++j) {
- thread_data->td->mb.tmp_obmc_bufs[j] =
- thread_data->td->tmp_obmc_bufs[j];
- }
-
- thread_data->td->mb.e_mbd.tmp_conv_dst = thread_data->td->mb.tmp_conv_dst;
- for (int j = 0; j < 2; ++j) {
- thread_data->td->mb.e_mbd.tmp_obmc_bufs[j] =
- thread_data->td->mb.tmp_obmc_bufs[j];
- }
- }
- }
-}
-
-void av1_encode_tiles_mt(AV1_COMP *cpi) {
- AV1_COMMON *const cm = &cpi->common;
- const int tile_cols = cm->tile_cols;
- const int tile_rows = cm->tile_rows;
- int num_workers = AOMMIN(cpi->oxcf.max_threads, tile_cols * tile_rows);
-
- if (cpi->tile_data == NULL || cpi->allocated_tiles < tile_cols * tile_rows)
- av1_alloc_tile_data(cpi);
-
- av1_init_tile_data(cpi);
- // Only run once to create threads and allocate thread data.
- if (cpi->num_workers == 0) {
- create_enc_workers(cpi, num_workers);
- } else {
- num_workers = AOMMIN(num_workers, cpi->num_workers);
- }
- prepare_enc_workers(cpi, enc_worker_hook, num_workers);
- launch_enc_workers(cpi, num_workers);
- sync_enc_workers(cpi, num_workers);
- accumulate_counters_enc_workers(cpi, num_workers);
-}
-
-// Accumulate frame counts. FRAME_COUNTS consist solely of 'unsigned int'
-// members, so we treat it as an array, and sum over the whole length.
-void av1_accumulate_frame_counts(FRAME_COUNTS *acc_counts,
- const FRAME_COUNTS *counts) {
- unsigned int *const acc = (unsigned int *)acc_counts;
- const unsigned int *const cnt = (const unsigned int *)counts;
-
- const unsigned int n_counts = sizeof(FRAME_COUNTS) / sizeof(unsigned int);
-
- for (unsigned int i = 0; i < n_counts; i++) acc[i] += cnt[i];
-}
diff --git a/third_party/aom/av1/encoder/ethread.h b/third_party/aom/av1/encoder/ethread.h
deleted file mode 100644
index 5de4b4803..000000000
--- a/third_party/aom/av1/encoder/ethread.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_ETHREAD_H_
-#define AOM_AV1_ENCODER_ETHREAD_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct AV1_COMP;
-struct ThreadData;
-
-typedef struct EncWorkerData {
- struct AV1_COMP *cpi;
- struct ThreadData *td;
- int start;
-} EncWorkerData;
-
-void av1_encode_tiles_mt(struct AV1_COMP *cpi);
-
-void av1_accumulate_frame_counts(struct FRAME_COUNTS *acc_counts,
- const struct FRAME_COUNTS *counts);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_ENCODER_ETHREAD_H_
diff --git a/third_party/aom/av1/encoder/extend.c b/third_party/aom/av1/encoder/extend.c
deleted file mode 100644
index e9621a574..000000000
--- a/third_party/aom/av1/encoder/extend.c
+++ /dev/null
@@ -1,188 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_mem/aom_mem.h"
-#include "aom_ports/mem.h"
-
-#include "av1/common/common.h"
-#include "av1/encoder/extend.h"
-
-static void copy_and_extend_plane(const uint8_t *src, int src_pitch,
- uint8_t *dst, int dst_pitch, int w, int h,
- int extend_top, int extend_left,
- int extend_bottom, int extend_right) {
- int i, linesize;
-
- // copy the left and right most columns out
- const uint8_t *src_ptr1 = src;
- const uint8_t *src_ptr2 = src + w - 1;
- uint8_t *dst_ptr1 = dst - extend_left;
- uint8_t *dst_ptr2 = dst + w;
-
- for (i = 0; i < h; i++) {
- memset(dst_ptr1, src_ptr1[0], extend_left);
- memcpy(dst_ptr1 + extend_left, src_ptr1, w);
- memset(dst_ptr2, src_ptr2[0], extend_right);
- src_ptr1 += src_pitch;
- src_ptr2 += src_pitch;
- dst_ptr1 += dst_pitch;
- dst_ptr2 += dst_pitch;
- }
-
- // Now copy the top and bottom lines into each line of the respective
- // borders
- src_ptr1 = dst - extend_left;
- src_ptr2 = dst + dst_pitch * (h - 1) - extend_left;
- dst_ptr1 = dst + dst_pitch * (-extend_top) - extend_left;
- dst_ptr2 = dst + dst_pitch * (h)-extend_left;
- linesize = extend_left + extend_right + w;
-
- for (i = 0; i < extend_top; i++) {
- memcpy(dst_ptr1, src_ptr1, linesize);
- dst_ptr1 += dst_pitch;
- }
-
- for (i = 0; i < extend_bottom; i++) {
- memcpy(dst_ptr2, src_ptr2, linesize);
- dst_ptr2 += dst_pitch;
- }
-}
-
-static void highbd_copy_and_extend_plane(const uint8_t *src8, int src_pitch,
- uint8_t *dst8, int dst_pitch, int w,
- int h, int extend_top, int extend_left,
- int extend_bottom, int extend_right) {
- int i, linesize;
- uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
-
- // copy the left and right most columns out
- const uint16_t *src_ptr1 = src;
- const uint16_t *src_ptr2 = src + w - 1;
- uint16_t *dst_ptr1 = dst - extend_left;
- uint16_t *dst_ptr2 = dst + w;
-
- for (i = 0; i < h; i++) {
- aom_memset16(dst_ptr1, src_ptr1[0], extend_left);
- memcpy(dst_ptr1 + extend_left, src_ptr1, w * sizeof(src_ptr1[0]));
- aom_memset16(dst_ptr2, src_ptr2[0], extend_right);
- src_ptr1 += src_pitch;
- src_ptr2 += src_pitch;
- dst_ptr1 += dst_pitch;
- dst_ptr2 += dst_pitch;
- }
-
- // Now copy the top and bottom lines into each line of the respective
- // borders
- src_ptr1 = dst - extend_left;
- src_ptr2 = dst + dst_pitch * (h - 1) - extend_left;
- dst_ptr1 = dst + dst_pitch * (-extend_top) - extend_left;
- dst_ptr2 = dst + dst_pitch * (h)-extend_left;
- linesize = extend_left + extend_right + w;
-
- for (i = 0; i < extend_top; i++) {
- memcpy(dst_ptr1, src_ptr1, linesize * sizeof(src_ptr1[0]));
- dst_ptr1 += dst_pitch;
- }
-
- for (i = 0; i < extend_bottom; i++) {
- memcpy(dst_ptr2, src_ptr2, linesize * sizeof(src_ptr2[0]));
- dst_ptr2 += dst_pitch;
- }
-}
-
-void av1_copy_and_extend_frame(const YV12_BUFFER_CONFIG *src,
- YV12_BUFFER_CONFIG *dst) {
- // Extend src frame in buffer
- // Altref filtering assumes 16 pixel extension
- const int et_y = 16;
- const int el_y = 16;
- // Motion estimation may use src block variance with the block size up
- // to 64x64, so the right and bottom need to be extended to 64 multiple
- // or up to 16, whichever is greater.
- const int er_y =
- AOMMAX(src->y_width + 16, ALIGN_POWER_OF_TWO(src->y_width, 6)) -
- src->y_crop_width;
- const int eb_y =
- AOMMAX(src->y_height + 16, ALIGN_POWER_OF_TWO(src->y_height, 6)) -
- src->y_crop_height;
- const int uv_width_subsampling = (src->uv_width != src->y_width);
- const int uv_height_subsampling = (src->uv_height != src->y_height);
- const int et_uv = et_y >> uv_height_subsampling;
- const int el_uv = el_y >> uv_width_subsampling;
- const int eb_uv = eb_y >> uv_height_subsampling;
- const int er_uv = er_y >> uv_width_subsampling;
-
- if (src->flags & YV12_FLAG_HIGHBITDEPTH) {
- highbd_copy_and_extend_plane(src->y_buffer, src->y_stride, dst->y_buffer,
- dst->y_stride, src->y_crop_width,
- src->y_crop_height, et_y, el_y, eb_y, er_y);
-
- highbd_copy_and_extend_plane(
- src->u_buffer, src->uv_stride, dst->u_buffer, dst->uv_stride,
- src->uv_crop_width, src->uv_crop_height, et_uv, el_uv, eb_uv, er_uv);
-
- highbd_copy_and_extend_plane(
- src->v_buffer, src->uv_stride, dst->v_buffer, dst->uv_stride,
- src->uv_crop_width, src->uv_crop_height, et_uv, el_uv, eb_uv, er_uv);
- return;
- }
-
- copy_and_extend_plane(src->y_buffer, src->y_stride, dst->y_buffer,
- dst->y_stride, src->y_crop_width, src->y_crop_height,
- et_y, el_y, eb_y, er_y);
-
- copy_and_extend_plane(src->u_buffer, src->uv_stride, dst->u_buffer,
- dst->uv_stride, src->uv_crop_width, src->uv_crop_height,
- et_uv, el_uv, eb_uv, er_uv);
-
- copy_and_extend_plane(src->v_buffer, src->uv_stride, dst->v_buffer,
- dst->uv_stride, src->uv_crop_width, src->uv_crop_height,
- et_uv, el_uv, eb_uv, er_uv);
-}
-
-void av1_copy_and_extend_frame_with_rect(const YV12_BUFFER_CONFIG *src,
- YV12_BUFFER_CONFIG *dst, int srcy,
- int srcx, int srch, int srcw) {
- // If the side is not touching the bounder then don't extend.
- const int et_y = srcy ? 0 : dst->border;
- const int el_y = srcx ? 0 : dst->border;
- const int eb_y = srcy + srch != src->y_height
- ? 0
- : dst->border + dst->y_height - src->y_height;
- const int er_y = srcx + srcw != src->y_width
- ? 0
- : dst->border + dst->y_width - src->y_width;
- const int src_y_offset = srcy * src->y_stride + srcx;
- const int dst_y_offset = srcy * dst->y_stride + srcx;
-
- const int et_uv = ROUND_POWER_OF_TWO(et_y, 1);
- const int el_uv = ROUND_POWER_OF_TWO(el_y, 1);
- const int eb_uv = ROUND_POWER_OF_TWO(eb_y, 1);
- const int er_uv = ROUND_POWER_OF_TWO(er_y, 1);
- const int src_uv_offset = ((srcy * src->uv_stride) >> 1) + (srcx >> 1);
- const int dst_uv_offset = ((srcy * dst->uv_stride) >> 1) + (srcx >> 1);
- const int srch_uv = ROUND_POWER_OF_TWO(srch, 1);
- const int srcw_uv = ROUND_POWER_OF_TWO(srcw, 1);
-
- copy_and_extend_plane(src->y_buffer + src_y_offset, src->y_stride,
- dst->y_buffer + dst_y_offset, dst->y_stride, srcw, srch,
- et_y, el_y, eb_y, er_y);
-
- copy_and_extend_plane(src->u_buffer + src_uv_offset, src->uv_stride,
- dst->u_buffer + dst_uv_offset, dst->uv_stride, srcw_uv,
- srch_uv, et_uv, el_uv, eb_uv, er_uv);
-
- copy_and_extend_plane(src->v_buffer + src_uv_offset, src->uv_stride,
- dst->v_buffer + dst_uv_offset, dst->uv_stride, srcw_uv,
- srch_uv, et_uv, el_uv, eb_uv, er_uv);
-}
diff --git a/third_party/aom/av1/encoder/extend.h b/third_party/aom/av1/encoder/extend.h
deleted file mode 100644
index e0432cc97..000000000
--- a/third_party/aom/av1/encoder/extend.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_EXTEND_H_
-#define AOM_AV1_ENCODER_EXTEND_H_
-
-#include "aom_scale/yv12config.h"
-#include "aom/aom_integer.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void av1_copy_and_extend_frame(const YV12_BUFFER_CONFIG *src,
- YV12_BUFFER_CONFIG *dst);
-
-void av1_copy_and_extend_frame_with_rect(const YV12_BUFFER_CONFIG *src,
- YV12_BUFFER_CONFIG *dst, int srcy,
- int srcx, int srch, int srcw);
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_ENCODER_EXTEND_H_
diff --git a/third_party/aom/av1/encoder/firstpass.c b/third_party/aom/av1/encoder/firstpass.c
deleted file mode 100644
index 69dd20c52..000000000
--- a/third_party/aom/av1/encoder/firstpass.c
+++ /dev/null
@@ -1,3480 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <limits.h>
-#include <math.h>
-#include <stdio.h>
-
-#include "config/aom_dsp_rtcd.h"
-#include "config/aom_scale_rtcd.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_mem/aom_mem.h"
-#include "aom_ports/mem.h"
-#include "aom_ports/system_state.h"
-#include "aom_scale/aom_scale.h"
-#include "aom_scale/yv12config.h"
-
-#include "aom_dsp/variance.h"
-#include "av1/common/entropymv.h"
-#include "av1/common/quant_common.h"
-#include "av1/common/reconinter.h" // av1_setup_dst_planes()
-#include "av1/common/txb_common.h"
-#include "av1/encoder/aq_variance.h"
-#include "av1/encoder/av1_quantize.h"
-#include "av1/encoder/block.h"
-#include "av1/encoder/dwt.h"
-#include "av1/encoder/encodeframe.h"
-#include "av1/encoder/encodemb.h"
-#include "av1/encoder/encodemv.h"
-#include "av1/encoder/encoder.h"
-#include "av1/encoder/extend.h"
-#include "av1/encoder/firstpass.h"
-#include "av1/encoder/mcomp.h"
-#include "av1/encoder/rd.h"
-#include "av1/encoder/reconinter_enc.h"
-
-#define OUTPUT_FPF 0
-#define ARF_STATS_OUTPUT 0
-
-#define GROUP_ADAPTIVE_MAXQ 1
-
-#define BOOST_BREAKOUT 12.5
-#define BOOST_FACTOR 12.5
-#define FACTOR_PT_LOW 0.70
-#define FACTOR_PT_HIGH 0.90
-#define FIRST_PASS_Q 10.0
-#define GF_MAX_BOOST 90.0
-#define INTRA_MODE_PENALTY 1024
-#define KF_MIN_FRAME_BOOST 80.0
-#define KF_MAX_FRAME_BOOST 128.0
-#define MIN_ARF_GF_BOOST 240
-#define MIN_DECAY_FACTOR 0.01
-#define MIN_KF_BOOST 300
-#define NEW_MV_MODE_PENALTY 32
-#define DARK_THRESH 64
-#define DEFAULT_GRP_WEIGHT 1.0
-#define RC_FACTOR_MIN 0.75
-#define RC_FACTOR_MAX 1.75
-#define MIN_FWD_KF_INTERVAL 8
-
-#define NCOUNT_INTRA_THRESH 8192
-#define NCOUNT_INTRA_FACTOR 3
-#define NCOUNT_FRAME_II_THRESH 5.0
-
-#define DOUBLE_DIVIDE_CHECK(x) ((x) < 0 ? (x)-0.000001 : (x) + 0.000001)
-
-#if ARF_STATS_OUTPUT
-unsigned int arf_count = 0;
-#endif
-
-// Resets the first pass file to the given position using a relative seek from
-// the current position.
-static void reset_fpf_position(TWO_PASS *p, const FIRSTPASS_STATS *position) {
- p->stats_in = position;
-}
-
-// Read frame stats at an offset from the current position.
-static const FIRSTPASS_STATS *read_frame_stats(const TWO_PASS *p, int offset) {
- if ((offset >= 0 && p->stats_in + offset >= p->stats_in_end) ||
- (offset < 0 && p->stats_in + offset < p->stats_in_start)) {
- return NULL;
- }
-
- return &p->stats_in[offset];
-}
-
-static int input_stats(TWO_PASS *p, FIRSTPASS_STATS *fps) {
- if (p->stats_in >= p->stats_in_end) return EOF;
-
- *fps = *p->stats_in;
- ++p->stats_in;
- return 1;
-}
-
-static void output_stats(FIRSTPASS_STATS *stats,
- struct aom_codec_pkt_list *pktlist) {
- struct aom_codec_cx_pkt pkt;
- pkt.kind = AOM_CODEC_STATS_PKT;
- pkt.data.twopass_stats.buf = stats;
- pkt.data.twopass_stats.sz = sizeof(FIRSTPASS_STATS);
- aom_codec_pkt_list_add(pktlist, &pkt);
-
-// TEMP debug code
-#if OUTPUT_FPF
- {
- FILE *fpfile;
- fpfile = fopen("firstpass.stt", "a");
-
- fprintf(fpfile,
- "%12.0lf %12.4lf %12.0lf %12.0lf %12.0lf %12.4lf %12.4lf"
- "%12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf"
- "%12.4lf %12.4lf %12.0lf %12.0lf %12.0lf %12.4lf %12.4lf\n",
- stats->frame, stats->weight, stats->intra_error, stats->coded_error,
- stats->sr_coded_error, stats->pcnt_inter, stats->pcnt_motion,
- stats->pcnt_second_ref, stats->pcnt_neutral, stats->intra_skip_pct,
- stats->inactive_zone_rows, stats->inactive_zone_cols, stats->MVr,
- stats->mvr_abs, stats->MVc, stats->mvc_abs, stats->MVrv,
- stats->MVcv, stats->mv_in_out_count, stats->new_mv_count,
- stats->count, stats->duration);
- fclose(fpfile);
- }
-#endif
-}
-
-#if CONFIG_FP_MB_STATS
-static void output_fpmb_stats(uint8_t *this_frame_mb_stats, int stats_size,
- struct aom_codec_pkt_list *pktlist) {
- struct aom_codec_cx_pkt pkt;
- pkt.kind = AOM_CODEC_FPMB_STATS_PKT;
- pkt.data.firstpass_mb_stats.buf = this_frame_mb_stats;
- pkt.data.firstpass_mb_stats.sz = stats_size * sizeof(*this_frame_mb_stats);
- aom_codec_pkt_list_add(pktlist, &pkt);
-}
-#endif
-
-static void zero_stats(FIRSTPASS_STATS *section) {
- section->frame = 0.0;
- section->weight = 0.0;
- section->intra_error = 0.0;
- section->frame_avg_wavelet_energy = 0.0;
- section->coded_error = 0.0;
- section->sr_coded_error = 0.0;
- section->pcnt_inter = 0.0;
- section->pcnt_motion = 0.0;
- section->pcnt_second_ref = 0.0;
- section->pcnt_neutral = 0.0;
- section->intra_skip_pct = 0.0;
- section->inactive_zone_rows = 0.0;
- section->inactive_zone_cols = 0.0;
- section->MVr = 0.0;
- section->mvr_abs = 0.0;
- section->MVc = 0.0;
- section->mvc_abs = 0.0;
- section->MVrv = 0.0;
- section->MVcv = 0.0;
- section->mv_in_out_count = 0.0;
- section->new_mv_count = 0.0;
- section->count = 0.0;
- section->duration = 1.0;
-}
-
-static void accumulate_stats(FIRSTPASS_STATS *section,
- const FIRSTPASS_STATS *frame) {
- section->frame += frame->frame;
- section->weight += frame->weight;
- section->intra_error += frame->intra_error;
- section->frame_avg_wavelet_energy += frame->frame_avg_wavelet_energy;
- section->coded_error += frame->coded_error;
- section->sr_coded_error += frame->sr_coded_error;
- section->pcnt_inter += frame->pcnt_inter;
- section->pcnt_motion += frame->pcnt_motion;
- section->pcnt_second_ref += frame->pcnt_second_ref;
- section->pcnt_neutral += frame->pcnt_neutral;
- section->intra_skip_pct += frame->intra_skip_pct;
- section->inactive_zone_rows += frame->inactive_zone_rows;
- section->inactive_zone_cols += frame->inactive_zone_cols;
- section->MVr += frame->MVr;
- section->mvr_abs += frame->mvr_abs;
- section->MVc += frame->MVc;
- section->mvc_abs += frame->mvc_abs;
- section->MVrv += frame->MVrv;
- section->MVcv += frame->MVcv;
- section->mv_in_out_count += frame->mv_in_out_count;
- section->new_mv_count += frame->new_mv_count;
- section->count += frame->count;
- section->duration += frame->duration;
-}
-
-static void subtract_stats(FIRSTPASS_STATS *section,
- const FIRSTPASS_STATS *frame) {
- section->frame -= frame->frame;
- section->weight -= frame->weight;
- section->intra_error -= frame->intra_error;
- section->frame_avg_wavelet_energy -= frame->frame_avg_wavelet_energy;
- section->coded_error -= frame->coded_error;
- section->sr_coded_error -= frame->sr_coded_error;
- section->pcnt_inter -= frame->pcnt_inter;
- section->pcnt_motion -= frame->pcnt_motion;
- section->pcnt_second_ref -= frame->pcnt_second_ref;
- section->pcnt_neutral -= frame->pcnt_neutral;
- section->intra_skip_pct -= frame->intra_skip_pct;
- section->inactive_zone_rows -= frame->inactive_zone_rows;
- section->inactive_zone_cols -= frame->inactive_zone_cols;
- section->MVr -= frame->MVr;
- section->mvr_abs -= frame->mvr_abs;
- section->MVc -= frame->MVc;
- section->mvc_abs -= frame->mvc_abs;
- section->MVrv -= frame->MVrv;
- section->MVcv -= frame->MVcv;
- section->mv_in_out_count -= frame->mv_in_out_count;
- section->new_mv_count -= frame->new_mv_count;
- section->count -= frame->count;
- section->duration -= frame->duration;
-}
-
-// Calculate the linear size relative to a baseline of 1080P
-#define BASE_SIZE 2073600.0 // 1920x1080
-static double get_linear_size_factor(const AV1_COMP *cpi) {
- const double this_area = cpi->initial_width * cpi->initial_height;
- return pow(this_area / BASE_SIZE, 0.5);
-}
-
-// Calculate an active area of the image that discounts formatting
-// bars and partially discounts other 0 energy areas.
-#define MIN_ACTIVE_AREA 0.5
-#define MAX_ACTIVE_AREA 1.0
-static double calculate_active_area(const AV1_COMP *cpi,
- const FIRSTPASS_STATS *this_frame) {
- double active_pct;
-
- active_pct =
- 1.0 -
- ((this_frame->intra_skip_pct / 2) +
- ((this_frame->inactive_zone_rows * 2) / (double)cpi->common.mb_rows));
- return fclamp(active_pct, MIN_ACTIVE_AREA, MAX_ACTIVE_AREA);
-}
-
-// Calculate a modified Error used in distributing bits between easier and
-// harder frames.
-#define ACT_AREA_CORRECTION 0.5
-static double calculate_modified_err(const AV1_COMP *cpi,
- const TWO_PASS *twopass,
- const AV1EncoderConfig *oxcf,
- const FIRSTPASS_STATS *this_frame) {
- const FIRSTPASS_STATS *const stats = &twopass->total_stats;
- const double av_weight = stats->weight / stats->count;
- const double av_err = (stats->coded_error * av_weight) / stats->count;
- double modified_error =
- av_err * pow(this_frame->coded_error * this_frame->weight /
- DOUBLE_DIVIDE_CHECK(av_err),
- oxcf->two_pass_vbrbias / 100.0);
-
- // Correction for active area. Frames with a reduced active area
- // (eg due to formatting bars) have a higher error per mb for the
- // remaining active MBs. The correction here assumes that coding
- // 0.5N blocks of complexity 2X is a little easier than coding N
- // blocks of complexity X.
- modified_error *=
- pow(calculate_active_area(cpi, this_frame), ACT_AREA_CORRECTION);
-
- return fclamp(modified_error, twopass->modified_error_min,
- twopass->modified_error_max);
-}
-
-// This function returns the maximum target rate per frame.
-static int frame_max_bits(const RATE_CONTROL *rc,
- const AV1EncoderConfig *oxcf) {
- int64_t max_bits = ((int64_t)rc->avg_frame_bandwidth *
- (int64_t)oxcf->two_pass_vbrmax_section) /
- 100;
- if (max_bits < 0)
- max_bits = 0;
- else if (max_bits > rc->max_frame_bandwidth)
- max_bits = rc->max_frame_bandwidth;
-
- return (int)max_bits;
-}
-
-void av1_init_first_pass(AV1_COMP *cpi) {
- zero_stats(&cpi->twopass.total_stats);
-}
-
-void av1_end_first_pass(AV1_COMP *cpi) {
- output_stats(&cpi->twopass.total_stats, cpi->output_pkt_list);
-}
-
-static aom_variance_fn_t get_block_variance_fn(BLOCK_SIZE bsize) {
- switch (bsize) {
- case BLOCK_8X8: return aom_mse8x8;
- case BLOCK_16X8: return aom_mse16x8;
- case BLOCK_8X16: return aom_mse8x16;
- default: return aom_mse16x16;
- }
-}
-
-static unsigned int get_prediction_error(BLOCK_SIZE bsize,
- const struct buf_2d *src,
- const struct buf_2d *ref) {
- unsigned int sse;
- const aom_variance_fn_t fn = get_block_variance_fn(bsize);
- fn(src->buf, src->stride, ref->buf, ref->stride, &sse);
- return sse;
-}
-
-static aom_variance_fn_t highbd_get_block_variance_fn(BLOCK_SIZE bsize,
- int bd) {
- switch (bd) {
- default:
- switch (bsize) {
- case BLOCK_8X8: return aom_highbd_8_mse8x8;
- case BLOCK_16X8: return aom_highbd_8_mse16x8;
- case BLOCK_8X16: return aom_highbd_8_mse8x16;
- default: return aom_highbd_8_mse16x16;
- }
- break;
- case 10:
- switch (bsize) {
- case BLOCK_8X8: return aom_highbd_10_mse8x8;
- case BLOCK_16X8: return aom_highbd_10_mse16x8;
- case BLOCK_8X16: return aom_highbd_10_mse8x16;
- default: return aom_highbd_10_mse16x16;
- }
- break;
- case 12:
- switch (bsize) {
- case BLOCK_8X8: return aom_highbd_12_mse8x8;
- case BLOCK_16X8: return aom_highbd_12_mse16x8;
- case BLOCK_8X16: return aom_highbd_12_mse8x16;
- default: return aom_highbd_12_mse16x16;
- }
- break;
- }
-}
-
-static unsigned int highbd_get_prediction_error(BLOCK_SIZE bsize,
- const struct buf_2d *src,
- const struct buf_2d *ref,
- int bd) {
- unsigned int sse;
- const aom_variance_fn_t fn = highbd_get_block_variance_fn(bsize, bd);
- fn(src->buf, src->stride, ref->buf, ref->stride, &sse);
- return sse;
-}
-
-// Refine the motion search range according to the frame dimension
-// for first pass test.
-static int get_search_range(const AV1_COMP *cpi) {
- int sr = 0;
- const int dim = AOMMIN(cpi->initial_width, cpi->initial_height);
-
- while ((dim << sr) < MAX_FULL_PEL_VAL) ++sr;
- return sr;
-}
-
-static void first_pass_motion_search(AV1_COMP *cpi, MACROBLOCK *x,
- const MV *ref_mv, MV *best_mv,
- int *best_motion_err) {
- MACROBLOCKD *const xd = &x->e_mbd;
- MV tmp_mv = kZeroMv;
- MV ref_mv_full = { ref_mv->row >> 3, ref_mv->col >> 3 };
- int num00, tmp_err, n;
- const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
- aom_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[bsize];
- const int new_mv_mode_penalty = NEW_MV_MODE_PENALTY;
-
- int step_param = 3;
- int further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
- const int sr = get_search_range(cpi);
- step_param += sr;
- further_steps -= sr;
-
- // Override the default variance function to use MSE.
- v_fn_ptr.vf = get_block_variance_fn(bsize);
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- v_fn_ptr.vf = highbd_get_block_variance_fn(bsize, xd->bd);
- }
-
- // Center the initial step/diamond search on best mv.
- tmp_err = cpi->diamond_search_sad(x, &cpi->ss_cfg, &ref_mv_full, &tmp_mv,
- step_param, x->sadperbit16, &num00,
- &v_fn_ptr, ref_mv);
- if (tmp_err < INT_MAX)
- tmp_err = av1_get_mvpred_var(x, &tmp_mv, ref_mv, &v_fn_ptr, 1);
- if (tmp_err < INT_MAX - new_mv_mode_penalty) tmp_err += new_mv_mode_penalty;
-
- if (tmp_err < *best_motion_err) {
- *best_motion_err = tmp_err;
- *best_mv = tmp_mv;
- }
-
- // Carry out further step/diamond searches as necessary.
- n = num00;
- num00 = 0;
-
- while (n < further_steps) {
- ++n;
-
- if (num00) {
- --num00;
- } else {
- tmp_err = cpi->diamond_search_sad(x, &cpi->ss_cfg, &ref_mv_full, &tmp_mv,
- step_param + n, x->sadperbit16, &num00,
- &v_fn_ptr, ref_mv);
- if (tmp_err < INT_MAX)
- tmp_err = av1_get_mvpred_var(x, &tmp_mv, ref_mv, &v_fn_ptr, 1);
- if (tmp_err < INT_MAX - new_mv_mode_penalty)
- tmp_err += new_mv_mode_penalty;
-
- if (tmp_err < *best_motion_err) {
- *best_motion_err = tmp_err;
- *best_mv = tmp_mv;
- }
- }
- }
-}
-
-static BLOCK_SIZE get_bsize(const AV1_COMMON *cm, int mb_row, int mb_col) {
- if (mi_size_wide[BLOCK_16X16] * mb_col + mi_size_wide[BLOCK_8X8] <
- cm->mi_cols) {
- return mi_size_wide[BLOCK_16X16] * mb_row + mi_size_wide[BLOCK_8X8] <
- cm->mi_rows
- ? BLOCK_16X16
- : BLOCK_16X8;
- } else {
- return mi_size_wide[BLOCK_16X16] * mb_row + mi_size_wide[BLOCK_8X8] <
- cm->mi_rows
- ? BLOCK_8X16
- : BLOCK_8X8;
- }
-}
-
-static int find_fp_qindex(aom_bit_depth_t bit_depth) {
- int i;
-
- for (i = 0; i < QINDEX_RANGE; ++i)
- if (av1_convert_qindex_to_q(i, bit_depth) >= FIRST_PASS_Q) break;
-
- if (i == QINDEX_RANGE) i--;
-
- return i;
-}
-
-static void set_first_pass_params(AV1_COMP *cpi) {
- AV1_COMMON *const cm = &cpi->common;
- if (!cpi->refresh_alt_ref_frame &&
- (cm->current_video_frame == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY))) {
- cm->frame_type = KEY_FRAME;
- } else {
- cm->frame_type = INTER_FRAME;
- }
- // Do not use periodic key frames.
- cpi->rc.frames_to_key = INT_MAX;
-}
-
-static double raw_motion_error_stdev(int *raw_motion_err_list,
- int raw_motion_err_counts) {
- int64_t sum_raw_err = 0;
- double raw_err_avg = 0;
- double raw_err_stdev = 0;
- if (raw_motion_err_counts == 0) return 0;
-
- int i;
- for (i = 0; i < raw_motion_err_counts; i++) {
- sum_raw_err += raw_motion_err_list[i];
- }
- raw_err_avg = (double)sum_raw_err / raw_motion_err_counts;
- for (i = 0; i < raw_motion_err_counts; i++) {
- raw_err_stdev += (raw_motion_err_list[i] - raw_err_avg) *
- (raw_motion_err_list[i] - raw_err_avg);
- }
- // Calculate the standard deviation for the motion error of all the inter
- // blocks of the 0,0 motion using the last source
- // frame as the reference.
- raw_err_stdev = sqrt(raw_err_stdev / raw_motion_err_counts);
- return raw_err_stdev;
-}
-
-#define UL_INTRA_THRESH 50
-#define INVALID_ROW -1
-void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) {
- int mb_row, mb_col;
- MACROBLOCK *const x = &cpi->td.mb;
- AV1_COMMON *const cm = &cpi->common;
- const SequenceHeader *const seq_params = &cm->seq_params;
- const int num_planes = av1_num_planes(cm);
- MACROBLOCKD *const xd = &x->e_mbd;
- TileInfo tile;
- struct macroblock_plane *const p = x->plane;
- struct macroblockd_plane *const pd = xd->plane;
- const PICK_MODE_CONTEXT *ctx =
- &cpi->td.pc_root[MAX_MIB_SIZE_LOG2 - MIN_MIB_SIZE_LOG2]->none;
- int i;
-
- int recon_yoffset, recon_uvoffset;
- int64_t intra_error = 0;
- int64_t frame_avg_wavelet_energy = 0;
- int64_t coded_error = 0;
- int64_t sr_coded_error = 0;
-
- int sum_mvr = 0, sum_mvc = 0;
- int sum_mvr_abs = 0, sum_mvc_abs = 0;
- int64_t sum_mvrs = 0, sum_mvcs = 0;
- int mvcount = 0;
- int intercount = 0;
- int second_ref_count = 0;
- const int intrapenalty = INTRA_MODE_PENALTY;
- double neutral_count;
- int intra_skip_count = 0;
- int image_data_start_row = INVALID_ROW;
- int new_mv_count = 0;
- int sum_in_vectors = 0;
- MV lastmv = kZeroMv;
- TWO_PASS *twopass = &cpi->twopass;
- int recon_y_stride, recon_uv_stride, uv_mb_height;
-
- YV12_BUFFER_CONFIG *const lst_yv12 = get_ref_frame_buffer(cpi, LAST_FRAME);
- YV12_BUFFER_CONFIG *gld_yv12 = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
- YV12_BUFFER_CONFIG *const new_yv12 = get_frame_new_buffer(cm);
- const YV12_BUFFER_CONFIG *first_ref_buf = lst_yv12;
- double intra_factor;
- double brightness_factor;
- BufferPool *const pool = cm->buffer_pool;
- const int qindex = find_fp_qindex(seq_params->bit_depth);
- const int mb_scale = mi_size_wide[BLOCK_16X16];
-
- int *raw_motion_err_list;
- int raw_motion_err_counts = 0;
- CHECK_MEM_ERROR(
- cm, raw_motion_err_list,
- aom_calloc(cm->mb_rows * cm->mb_cols, sizeof(*raw_motion_err_list)));
- // First pass code requires valid last and new frame buffers.
- assert(new_yv12 != NULL);
- assert(frame_is_intra_only(cm) || (lst_yv12 != NULL));
-
-#if CONFIG_FP_MB_STATS
- if (cpi->use_fp_mb_stats) {
- av1_zero_array(cpi->twopass.frame_mb_stats_buf, cpi->initial_mbs);
- }
-#endif
-
- aom_clear_system_state();
-
- xd->mi = cm->mi_grid_visible;
- xd->mi[0] = cm->mi;
- x->e_mbd.mi[0]->sb_type = BLOCK_16X16;
-
- intra_factor = 0.0;
- brightness_factor = 0.0;
- neutral_count = 0.0;
-
- set_first_pass_params(cpi);
- av1_set_quantizer(cm, qindex);
-
- av1_setup_block_planes(&x->e_mbd, seq_params->subsampling_x,
- seq_params->subsampling_y, num_planes);
-
- av1_setup_src_planes(x, cpi->source, 0, 0, num_planes);
- av1_setup_dst_planes(xd->plane, seq_params->sb_size, new_yv12, 0, 0, 0,
- num_planes);
-
- if (!frame_is_intra_only(cm)) {
- av1_setup_pre_planes(xd, 0, first_ref_buf, 0, 0, NULL, num_planes);
- }
-
- xd->mi = cm->mi_grid_visible;
- xd->mi[0] = cm->mi;
-
- // Don't store luma on the fist pass since chroma is not computed
- xd->cfl.store_y = 0;
- av1_frame_init_quantizer(cpi);
-
- for (i = 0; i < num_planes; ++i) {
- p[i].coeff = ctx->coeff[i];
- p[i].qcoeff = ctx->qcoeff[i];
- pd[i].dqcoeff = ctx->dqcoeff[i];
- p[i].eobs = ctx->eobs[i];
- p[i].txb_entropy_ctx = ctx->txb_entropy_ctx[i];
- }
-
- av1_init_mv_probs(cm);
- av1_init_lv_map(cm);
- av1_initialize_rd_consts(cpi);
-
- // Tiling is ignored in the first pass.
- av1_tile_init(&tile, cm, 0, 0);
-
- recon_y_stride = new_yv12->y_stride;
- recon_uv_stride = new_yv12->uv_stride;
- uv_mb_height = 16 >> (new_yv12->y_height > new_yv12->uv_height);
-
- for (mb_row = 0; mb_row < cm->mb_rows; ++mb_row) {
- MV best_ref_mv = kZeroMv;
-
- // Reset above block coeffs.
- xd->up_available = (mb_row != 0);
- recon_yoffset = (mb_row * recon_y_stride * 16);
- recon_uvoffset = (mb_row * recon_uv_stride * uv_mb_height);
-
- // Set up limit values for motion vectors to prevent them extending
- // outside the UMV borders.
- x->mv_limits.row_min = -((mb_row * 16) + BORDER_MV_PIXELS_B16);
- x->mv_limits.row_max =
- ((cm->mb_rows - 1 - mb_row) * 16) + BORDER_MV_PIXELS_B16;
-
- for (mb_col = 0; mb_col < cm->mb_cols; ++mb_col) {
- int this_error;
- const int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row);
- const BLOCK_SIZE bsize = get_bsize(cm, mb_row, mb_col);
- double log_intra;
- int level_sample;
-
-#if CONFIG_FP_MB_STATS
- const int mb_index = mb_row * cm->mb_cols + mb_col;
-#endif
-
- aom_clear_system_state();
-
- const int idx_str = xd->mi_stride * mb_row * mb_scale + mb_col * mb_scale;
- xd->mi = cm->mi_grid_visible + idx_str;
- xd->mi[0] = cm->mi + idx_str;
- xd->plane[0].dst.buf = new_yv12->y_buffer + recon_yoffset;
- xd->plane[1].dst.buf = new_yv12->u_buffer + recon_uvoffset;
- xd->plane[2].dst.buf = new_yv12->v_buffer + recon_uvoffset;
- xd->left_available = (mb_col != 0);
- xd->mi[0]->sb_type = bsize;
- xd->mi[0]->ref_frame[0] = INTRA_FRAME;
- set_mi_row_col(xd, &tile, mb_row * mb_scale, mi_size_high[bsize],
- mb_col * mb_scale, mi_size_wide[bsize], cm->mi_rows,
- cm->mi_cols);
-
- set_plane_n4(xd, mi_size_wide[bsize], mi_size_high[bsize], num_planes);
-
- // Do intra 16x16 prediction.
- xd->mi[0]->segment_id = 0;
- xd->lossless[xd->mi[0]->segment_id] = (qindex == 0);
- xd->mi[0]->mode = DC_PRED;
- xd->mi[0]->tx_size =
- use_dc_pred ? (bsize >= BLOCK_16X16 ? TX_16X16 : TX_8X8) : TX_4X4;
- av1_encode_intra_block_plane(cpi, x, bsize, 0, 0, mb_row * 2, mb_col * 2);
- this_error = aom_get_mb_ss(x->plane[0].src_diff);
-
- // Keep a record of blocks that have almost no intra error residual
- // (i.e. are in effect completely flat and untextured in the intra
- // domain). In natural videos this is uncommon, but it is much more
- // common in animations, graphics and screen content, so may be used
- // as a signal to detect these types of content.
- if (this_error < UL_INTRA_THRESH) {
- ++intra_skip_count;
- } else if ((mb_col > 0) && (image_data_start_row == INVALID_ROW)) {
- image_data_start_row = mb_row;
- }
-
- if (seq_params->use_highbitdepth) {
- switch (seq_params->bit_depth) {
- case AOM_BITS_8: break;
- case AOM_BITS_10: this_error >>= 4; break;
- case AOM_BITS_12: this_error >>= 8; break;
- default:
- assert(0 &&
- "seq_params->bit_depth should be AOM_BITS_8, "
- "AOM_BITS_10 or AOM_BITS_12");
- return;
- }
- }
-
- aom_clear_system_state();
- log_intra = log(this_error + 1.0);
- if (log_intra < 10.0)
- intra_factor += 1.0 + ((10.0 - log_intra) * 0.05);
- else
- intra_factor += 1.0;
-
- if (seq_params->use_highbitdepth)
- level_sample = CONVERT_TO_SHORTPTR(x->plane[0].src.buf)[0];
- else
- level_sample = x->plane[0].src.buf[0];
- if ((level_sample < DARK_THRESH) && (log_intra < 9.0))
- brightness_factor += 1.0 + (0.01 * (DARK_THRESH - level_sample));
- else
- brightness_factor += 1.0;
-
- // Intrapenalty below deals with situations where the intra and inter
- // error scores are very low (e.g. a plain black frame).
- // We do not have special cases in first pass for 0,0 and nearest etc so
- // all inter modes carry an overhead cost estimate for the mv.
- // When the error score is very low this causes us to pick all or lots of
- // INTRA modes and throw lots of key frames.
- // This penalty adds a cost matching that of a 0,0 mv to the intra case.
- this_error += intrapenalty;
-
- // Accumulate the intra error.
- intra_error += (int64_t)this_error;
-
- int stride = x->plane[0].src.stride;
- uint8_t *buf = x->plane[0].src.buf;
- for (int r8 = 0; r8 < 2; ++r8)
- for (int c8 = 0; c8 < 2; ++c8) {
- int hbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH;
- frame_avg_wavelet_energy += av1_haar_ac_sad_8x8_uint8_input(
- buf + c8 * 8 + r8 * 8 * stride, stride, hbd);
- }
-
-#if CONFIG_FP_MB_STATS
- if (cpi->use_fp_mb_stats) {
- // initialization
- cpi->twopass.frame_mb_stats_buf[mb_index] = 0;
- }
-#endif
-
- // Set up limit values for motion vectors to prevent them extending
- // outside the UMV borders.
- x->mv_limits.col_min = -((mb_col * 16) + BORDER_MV_PIXELS_B16);
- x->mv_limits.col_max =
- ((cm->mb_cols - 1 - mb_col) * 16) + BORDER_MV_PIXELS_B16;
-
- if (!frame_is_intra_only(cm)) { // Do a motion search
- int tmp_err, motion_error, raw_motion_error;
- // Assume 0,0 motion with no mv overhead.
- MV mv = kZeroMv, tmp_mv = kZeroMv;
- struct buf_2d unscaled_last_source_buf_2d;
-
- xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset;
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- motion_error = highbd_get_prediction_error(
- bsize, &x->plane[0].src, &xd->plane[0].pre[0], xd->bd);
- } else {
- motion_error = get_prediction_error(bsize, &x->plane[0].src,
- &xd->plane[0].pre[0]);
- }
-
- // Compute the motion error of the 0,0 motion using the last source
- // frame as the reference. Skip the further motion search on
- // reconstructed frame if this error is small.
- unscaled_last_source_buf_2d.buf =
- cpi->unscaled_last_source->y_buffer + recon_yoffset;
- unscaled_last_source_buf_2d.stride =
- cpi->unscaled_last_source->y_stride;
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- raw_motion_error = highbd_get_prediction_error(
- bsize, &x->plane[0].src, &unscaled_last_source_buf_2d, xd->bd);
- } else {
- raw_motion_error = get_prediction_error(bsize, &x->plane[0].src,
- &unscaled_last_source_buf_2d);
- }
-
- // TODO(pengchong): Replace the hard-coded threshold
- if (raw_motion_error > 25) {
- // Test last reference frame using the previous best mv as the
- // starting point (best reference) for the search.
- first_pass_motion_search(cpi, x, &best_ref_mv, &mv, &motion_error);
-
- // If the current best reference mv is not centered on 0,0 then do a
- // 0,0 based search as well.
- if (!is_zero_mv(&best_ref_mv)) {
- tmp_err = INT_MAX;
- first_pass_motion_search(cpi, x, &kZeroMv, &tmp_mv, &tmp_err);
-
- if (tmp_err < motion_error) {
- motion_error = tmp_err;
- mv = tmp_mv;
- }
- }
-
- // Search in an older reference frame.
- if ((cm->current_video_frame > 1) && gld_yv12 != NULL) {
- // Assume 0,0 motion with no mv overhead.
- int gf_motion_error;
-
- xd->plane[0].pre[0].buf = gld_yv12->y_buffer + recon_yoffset;
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- gf_motion_error = highbd_get_prediction_error(
- bsize, &x->plane[0].src, &xd->plane[0].pre[0], xd->bd);
- } else {
- gf_motion_error = get_prediction_error(bsize, &x->plane[0].src,
- &xd->plane[0].pre[0]);
- }
-
- first_pass_motion_search(cpi, x, &kZeroMv, &tmp_mv,
- &gf_motion_error);
-
- if (gf_motion_error < motion_error && gf_motion_error < this_error)
- ++second_ref_count;
-
- // Reset to last frame as reference buffer.
- xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset;
- xd->plane[1].pre[0].buf = first_ref_buf->u_buffer + recon_uvoffset;
- xd->plane[2].pre[0].buf = first_ref_buf->v_buffer + recon_uvoffset;
-
- // In accumulating a score for the older reference frame take the
- // best of the motion predicted score and the intra coded error
- // (just as will be done for) accumulation of "coded_error" for
- // the last frame.
- if (gf_motion_error < this_error)
- sr_coded_error += gf_motion_error;
- else
- sr_coded_error += this_error;
- } else {
- sr_coded_error += motion_error;
- }
- } else {
- sr_coded_error += motion_error;
- }
-
- // Start by assuming that intra mode is best.
- best_ref_mv.row = 0;
- best_ref_mv.col = 0;
-
-#if CONFIG_FP_MB_STATS
- if (cpi->use_fp_mb_stats) {
- // intra predication statistics
- cpi->twopass.frame_mb_stats_buf[mb_index] = 0;
- cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_DCINTRA_MASK;
- cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_MOTION_ZERO_MASK;
- if (this_error > FPMB_ERROR_LARGE_TH) {
- cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_ERROR_LARGE_MASK;
- } else if (this_error < FPMB_ERROR_SMALL_TH) {
- cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_ERROR_SMALL_MASK;
- }
- }
-#endif
-
- if (motion_error <= this_error) {
- aom_clear_system_state();
-
- // Keep a count of cases where the inter and intra were very close
- // and very low. This helps with scene cut detection for example in
- // cropped clips with black bars at the sides or top and bottom.
- if (((this_error - intrapenalty) * 9 <= motion_error * 10) &&
- (this_error < (2 * intrapenalty))) {
- neutral_count += 1.0;
- // Also track cases where the intra is not much worse than the inter
- // and use this in limiting the GF/arf group length.
- } else if ((this_error > NCOUNT_INTRA_THRESH) &&
- (this_error < (NCOUNT_INTRA_FACTOR * motion_error))) {
- neutral_count +=
- (double)motion_error / DOUBLE_DIVIDE_CHECK((double)this_error);
- }
-
- mv.row *= 8;
- mv.col *= 8;
- this_error = motion_error;
- xd->mi[0]->mode = NEWMV;
- xd->mi[0]->mv[0].as_mv = mv;
- xd->mi[0]->tx_size = TX_4X4;
- xd->mi[0]->ref_frame[0] = LAST_FRAME;
- xd->mi[0]->ref_frame[1] = NONE_FRAME;
- av1_build_inter_predictors_sby(cm, xd, mb_row * mb_scale,
- mb_col * mb_scale, NULL, bsize);
- av1_encode_sby_pass1(cm, x, bsize);
- sum_mvr += mv.row;
- sum_mvr_abs += abs(mv.row);
- sum_mvc += mv.col;
- sum_mvc_abs += abs(mv.col);
- sum_mvrs += mv.row * mv.row;
- sum_mvcs += mv.col * mv.col;
- ++intercount;
-
- best_ref_mv = mv;
-
-#if CONFIG_FP_MB_STATS
- if (cpi->use_fp_mb_stats) {
- // inter predication statistics
- cpi->twopass.frame_mb_stats_buf[mb_index] = 0;
- cpi->twopass.frame_mb_stats_buf[mb_index] &= ~FPMB_DCINTRA_MASK;
- cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_MOTION_ZERO_MASK;
- if (this_error > FPMB_ERROR_LARGE_TH) {
- cpi->twopass.frame_mb_stats_buf[mb_index] |=
- FPMB_ERROR_LARGE_MASK;
- } else if (this_error < FPMB_ERROR_SMALL_TH) {
- cpi->twopass.frame_mb_stats_buf[mb_index] |=
- FPMB_ERROR_SMALL_MASK;
- }
- }
-#endif
-
- if (!is_zero_mv(&mv)) {
- ++mvcount;
-
-#if CONFIG_FP_MB_STATS
- if (cpi->use_fp_mb_stats) {
- cpi->twopass.frame_mb_stats_buf[mb_index] &=
- ~FPMB_MOTION_ZERO_MASK;
- // check estimated motion direction
- if (mv.col > 0 && mv.col >= abs(mv.row)) {
- // right direction
- cpi->twopass.frame_mb_stats_buf[mb_index] |=
- FPMB_MOTION_RIGHT_MASK;
- } else if (mv.row < 0 && abs(mv.row) >= abs(mv.col)) {
- // up direction
- cpi->twopass.frame_mb_stats_buf[mb_index] |=
- FPMB_MOTION_UP_MASK;
- } else if (mv.col < 0 && abs(mv.col) >= abs(mv.row)) {
- // left direction
- cpi->twopass.frame_mb_stats_buf[mb_index] |=
- FPMB_MOTION_LEFT_MASK;
- } else {
- // down direction
- cpi->twopass.frame_mb_stats_buf[mb_index] |=
- FPMB_MOTION_DOWN_MASK;
- }
- }
-#endif
-
- // Non-zero vector, was it different from the last non zero vector?
- if (!is_equal_mv(&mv, &lastmv)) ++new_mv_count;
- lastmv = mv;
-
- // Does the row vector point inwards or outwards?
- if (mb_row < cm->mb_rows / 2) {
- if (mv.row > 0)
- --sum_in_vectors;
- else if (mv.row < 0)
- ++sum_in_vectors;
- } else if (mb_row > cm->mb_rows / 2) {
- if (mv.row > 0)
- ++sum_in_vectors;
- else if (mv.row < 0)
- --sum_in_vectors;
- }
-
- // Does the col vector point inwards or outwards?
- if (mb_col < cm->mb_cols / 2) {
- if (mv.col > 0)
- --sum_in_vectors;
- else if (mv.col < 0)
- ++sum_in_vectors;
- } else if (mb_col > cm->mb_cols / 2) {
- if (mv.col > 0)
- ++sum_in_vectors;
- else if (mv.col < 0)
- --sum_in_vectors;
- }
- }
- }
- raw_motion_err_list[raw_motion_err_counts++] = raw_motion_error;
- } else {
- sr_coded_error += (int64_t)this_error;
- }
- coded_error += (int64_t)this_error;
-
- // Adjust to the next column of MBs.
- x->plane[0].src.buf += 16;
- x->plane[1].src.buf += uv_mb_height;
- x->plane[2].src.buf += uv_mb_height;
-
- recon_yoffset += 16;
- recon_uvoffset += uv_mb_height;
- }
- // Adjust to the next row of MBs.
- x->plane[0].src.buf += 16 * x->plane[0].src.stride - 16 * cm->mb_cols;
- x->plane[1].src.buf +=
- uv_mb_height * x->plane[1].src.stride - uv_mb_height * cm->mb_cols;
- x->plane[2].src.buf +=
- uv_mb_height * x->plane[1].src.stride - uv_mb_height * cm->mb_cols;
-
- aom_clear_system_state();
- }
- const double raw_err_stdev =
- raw_motion_error_stdev(raw_motion_err_list, raw_motion_err_counts);
- aom_free(raw_motion_err_list);
-
- // Clamp the image start to rows/2. This number of rows is discarded top
- // and bottom as dead data so rows / 2 means the frame is blank.
- if ((image_data_start_row > cm->mb_rows / 2) ||
- (image_data_start_row == INVALID_ROW)) {
- image_data_start_row = cm->mb_rows / 2;
- }
- // Exclude any image dead zone
- if (image_data_start_row > 0) {
- intra_skip_count =
- AOMMAX(0, intra_skip_count - (image_data_start_row * cm->mb_cols * 2));
- }
-
- {
- FIRSTPASS_STATS fps;
- // The minimum error here insures some bit allocation to frames even
- // in static regions. The allocation per MB declines for larger formats
- // where the typical "real" energy per MB also falls.
- // Initial estimate here uses sqrt(mbs) to define the min_err, where the
- // number of mbs is proportional to the image area.
- const int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE)
- ? cpi->initial_mbs
- : cpi->common.MBs;
- const double min_err = 200 * sqrt(num_mbs);
-
- intra_factor = intra_factor / (double)num_mbs;
- brightness_factor = brightness_factor / (double)num_mbs;
- fps.weight = intra_factor * brightness_factor;
-
- fps.frame = cm->current_video_frame;
- fps.coded_error = (double)(coded_error >> 8) + min_err;
- fps.sr_coded_error = (double)(sr_coded_error >> 8) + min_err;
- fps.intra_error = (double)(intra_error >> 8) + min_err;
- fps.frame_avg_wavelet_energy = (double)frame_avg_wavelet_energy;
- fps.count = 1.0;
- fps.pcnt_inter = (double)intercount / num_mbs;
- fps.pcnt_second_ref = (double)second_ref_count / num_mbs;
- fps.pcnt_neutral = (double)neutral_count / num_mbs;
- fps.intra_skip_pct = (double)intra_skip_count / num_mbs;
- fps.inactive_zone_rows = (double)image_data_start_row;
- fps.inactive_zone_cols = (double)0; // TODO(paulwilkins): fix
- fps.raw_error_stdev = raw_err_stdev;
-
- if (mvcount > 0) {
- fps.MVr = (double)sum_mvr / mvcount;
- fps.mvr_abs = (double)sum_mvr_abs / mvcount;
- fps.MVc = (double)sum_mvc / mvcount;
- fps.mvc_abs = (double)sum_mvc_abs / mvcount;
- fps.MVrv =
- ((double)sum_mvrs - ((double)sum_mvr * sum_mvr / mvcount)) / mvcount;
- fps.MVcv =
- ((double)sum_mvcs - ((double)sum_mvc * sum_mvc / mvcount)) / mvcount;
- fps.mv_in_out_count = (double)sum_in_vectors / (mvcount * 2);
- fps.new_mv_count = new_mv_count;
- fps.pcnt_motion = (double)mvcount / num_mbs;
- } else {
- fps.MVr = 0.0;
- fps.mvr_abs = 0.0;
- fps.MVc = 0.0;
- fps.mvc_abs = 0.0;
- fps.MVrv = 0.0;
- fps.MVcv = 0.0;
- fps.mv_in_out_count = 0.0;
- fps.new_mv_count = 0.0;
- fps.pcnt_motion = 0.0;
- }
-
- // TODO(paulwilkins): Handle the case when duration is set to 0, or
- // something less than the full time between subsequent values of
- // cpi->source_time_stamp.
- fps.duration = (double)(source->ts_end - source->ts_start);
-
- // Don't want to do output stats with a stack variable!
- twopass->this_frame_stats = fps;
- output_stats(&twopass->this_frame_stats, cpi->output_pkt_list);
- accumulate_stats(&twopass->total_stats, &fps);
-
-#if CONFIG_FP_MB_STATS
- if (cpi->use_fp_mb_stats) {
- output_fpmb_stats(twopass->frame_mb_stats_buf, cpi->initial_mbs,
- cpi->output_pkt_list);
- }
-#endif
- }
-
- // Copy the previous Last Frame back into gf and and arf buffers if
- // the prediction is good enough... but also don't allow it to lag too far.
- if ((twopass->sr_update_lag > 3) ||
- ((cm->current_video_frame > 0) &&
- (twopass->this_frame_stats.pcnt_inter > 0.20) &&
- ((twopass->this_frame_stats.intra_error /
- DOUBLE_DIVIDE_CHECK(twopass->this_frame_stats.coded_error)) > 2.0))) {
- if (gld_yv12 != NULL) {
- ref_cnt_fb(pool->frame_bufs,
- &cm->ref_frame_map[cpi->ref_fb_idx[GOLDEN_FRAME - 1]],
- cm->ref_frame_map[cpi->ref_fb_idx[LAST_FRAME - 1]]);
- }
- twopass->sr_update_lag = 1;
- } else {
- ++twopass->sr_update_lag;
- }
-
- aom_extend_frame_borders(new_yv12, num_planes);
-
- // The frame we just compressed now becomes the last frame.
- ref_cnt_fb(pool->frame_bufs,
- &cm->ref_frame_map[cpi->ref_fb_idx[LAST_FRAME - 1]],
- cm->new_fb_idx);
-
- // Special case for the first frame. Copy into the GF buffer as a second
- // reference.
- if (cm->current_video_frame == 0 &&
- cpi->ref_fb_idx[GOLDEN_FRAME - 1] != INVALID_IDX) {
- ref_cnt_fb(pool->frame_bufs,
- &cm->ref_frame_map[cpi->ref_fb_idx[GOLDEN_FRAME - 1]],
- cm->ref_frame_map[cpi->ref_fb_idx[LAST_FRAME - 1]]);
- }
-
- // Use this to see what the first pass reconstruction looks like.
- if (0) {
- char filename[512];
- FILE *recon_file;
- snprintf(filename, sizeof(filename), "enc%04d.yuv",
- (int)cm->current_video_frame);
-
- if (cm->current_video_frame == 0)
- recon_file = fopen(filename, "wb");
- else
- recon_file = fopen(filename, "ab");
-
- (void)fwrite(lst_yv12->buffer_alloc, lst_yv12->frame_size, 1, recon_file);
- fclose(recon_file);
- }
-
- ++cm->current_video_frame;
-}
-
-static double calc_correction_factor(double err_per_mb, double err_divisor,
- double pt_low, double pt_high, int q,
- aom_bit_depth_t bit_depth) {
- const double error_term = err_per_mb / err_divisor;
-
- // Adjustment based on actual quantizer to power term.
- const double power_term =
- AOMMIN(av1_convert_qindex_to_q(q, bit_depth) * 0.01 + pt_low, pt_high);
-
- // Calculate correction factor.
- if (power_term < 1.0) assert(error_term >= 0.0);
-
- return fclamp(pow(error_term, power_term), 0.05, 5.0);
-}
-
-#define ERR_DIVISOR 100.0
-static int get_twopass_worst_quality(const AV1_COMP *cpi,
- const double section_err,
- double inactive_zone,
- int section_target_bandwidth,
- double group_weight_factor) {
- const RATE_CONTROL *const rc = &cpi->rc;
- const AV1EncoderConfig *const oxcf = &cpi->oxcf;
-
- inactive_zone = fclamp(inactive_zone, 0.0, 1.0);
-
- if (section_target_bandwidth <= 0) {
- return rc->worst_quality; // Highest value allowed
- } else {
- const int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE)
- ? cpi->initial_mbs
- : cpi->common.MBs;
- const int active_mbs = AOMMAX(1, num_mbs - (int)(num_mbs * inactive_zone));
- const double av_err_per_mb = section_err / active_mbs;
- const double speed_term = 1.0;
- double ediv_size_correction;
- const int target_norm_bits_per_mb =
- (int)((uint64_t)section_target_bandwidth << BPER_MB_NORMBITS) /
- active_mbs;
- int q;
-
- // Larger image formats are expected to be a little harder to code
- // relatively given the same prediction error score. This in part at
- // least relates to the increased size and hence coding overheads of
- // motion vectors. Some account of this is made through adjustment of
- // the error divisor.
- ediv_size_correction =
- AOMMAX(0.2, AOMMIN(5.0, get_linear_size_factor(cpi)));
- if (ediv_size_correction < 1.0)
- ediv_size_correction = -(1.0 / ediv_size_correction);
- ediv_size_correction *= 4.0;
-
- // Try and pick a max Q that will be high enough to encode the
- // content at the given rate.
- for (q = rc->best_quality; q < rc->worst_quality; ++q) {
- const double factor = calc_correction_factor(
- av_err_per_mb, ERR_DIVISOR - ediv_size_correction, FACTOR_PT_LOW,
- FACTOR_PT_HIGH, q, cpi->common.seq_params.bit_depth);
- const int bits_per_mb = av1_rc_bits_per_mb(
- INTER_FRAME, q, factor * speed_term * group_weight_factor,
- cpi->common.seq_params.bit_depth);
- if (bits_per_mb <= target_norm_bits_per_mb) break;
- }
-
- // Restriction on active max q for constrained quality mode.
- if (cpi->oxcf.rc_mode == AOM_CQ) q = AOMMAX(q, oxcf->cq_level);
- return q;
- }
-}
-
-static void setup_rf_level_maxq(AV1_COMP *cpi) {
- int i;
- RATE_CONTROL *const rc = &cpi->rc;
- for (i = INTER_NORMAL; i < RATE_FACTOR_LEVELS; ++i) {
- int qdelta = av1_frame_type_qdelta(cpi, i, rc->worst_quality);
- rc->rf_level_maxq[i] = AOMMAX(rc->worst_quality + qdelta, rc->best_quality);
- }
-}
-
-void av1_init_second_pass(AV1_COMP *cpi) {
- const AV1EncoderConfig *const oxcf = &cpi->oxcf;
- TWO_PASS *const twopass = &cpi->twopass;
- double frame_rate;
- FIRSTPASS_STATS *stats;
-
- zero_stats(&twopass->total_stats);
- zero_stats(&twopass->total_left_stats);
-
- if (!twopass->stats_in_end) return;
-
- stats = &twopass->total_stats;
-
- *stats = *twopass->stats_in_end;
- twopass->total_left_stats = *stats;
-
- frame_rate = 10000000.0 * stats->count / stats->duration;
- // Each frame can have a different duration, as the frame rate in the source
- // isn't guaranteed to be constant. The frame rate prior to the first frame
- // encoded in the second pass is a guess. However, the sum duration is not.
- // It is calculated based on the actual durations of all frames from the
- // first pass.
- av1_new_framerate(cpi, frame_rate);
- twopass->bits_left =
- (int64_t)(stats->duration * oxcf->target_bandwidth / 10000000.0);
-
- // This variable monitors how far behind the second ref update is lagging.
- twopass->sr_update_lag = 1;
-
- // Scan the first pass file and calculate a modified total error based upon
- // the bias/power function used to allocate bits.
- {
- const double avg_error =
- stats->coded_error / DOUBLE_DIVIDE_CHECK(stats->count);
- const FIRSTPASS_STATS *s = twopass->stats_in;
- double modified_error_total = 0.0;
- twopass->modified_error_min =
- (avg_error * oxcf->two_pass_vbrmin_section) / 100;
- twopass->modified_error_max =
- (avg_error * oxcf->two_pass_vbrmax_section) / 100;
- while (s < twopass->stats_in_end) {
- modified_error_total += calculate_modified_err(cpi, twopass, oxcf, s);
- ++s;
- }
- twopass->modified_error_left = modified_error_total;
- }
-
- // Reset the vbr bits off target counters
- cpi->rc.vbr_bits_off_target = 0;
- cpi->rc.vbr_bits_off_target_fast = 0;
-
- cpi->rc.rate_error_estimate = 0;
-
- // Static sequence monitor variables.
- twopass->kf_zeromotion_pct = 100;
- twopass->last_kfgroup_zeromotion_pct = 100;
-
- if (oxcf->resize_mode != RESIZE_NONE) {
- setup_rf_level_maxq(cpi);
- }
-}
-
-#define SR_DIFF_PART 0.0015
-#define MOTION_AMP_PART 0.003
-#define INTRA_PART 0.005
-#define DEFAULT_DECAY_LIMIT 0.75
-#define LOW_SR_DIFF_TRHESH 0.1
-#define SR_DIFF_MAX 128.0
-
-static double get_sr_decay_rate(const AV1_COMP *cpi,
- const FIRSTPASS_STATS *frame) {
- const int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE) ? cpi->initial_mbs
- : cpi->common.MBs;
- double sr_diff = (frame->sr_coded_error - frame->coded_error) / num_mbs;
- double sr_decay = 1.0;
- double modified_pct_inter;
- double modified_pcnt_intra;
- const double motion_amplitude_factor =
- frame->pcnt_motion * ((frame->mvc_abs + frame->mvr_abs) / 2);
-
- modified_pct_inter = frame->pcnt_inter;
- if ((frame->intra_error / DOUBLE_DIVIDE_CHECK(frame->coded_error)) <
- (double)NCOUNT_FRAME_II_THRESH) {
- modified_pct_inter = frame->pcnt_inter - frame->pcnt_neutral;
- }
- modified_pcnt_intra = 100 * (1.0 - modified_pct_inter);
-
- if ((sr_diff > LOW_SR_DIFF_TRHESH)) {
- sr_diff = AOMMIN(sr_diff, SR_DIFF_MAX);
- sr_decay = 1.0 - (SR_DIFF_PART * sr_diff) -
- (MOTION_AMP_PART * motion_amplitude_factor) -
- (INTRA_PART * modified_pcnt_intra);
- }
- return AOMMAX(sr_decay, AOMMIN(DEFAULT_DECAY_LIMIT, modified_pct_inter));
-}
-
-// This function gives an estimate of how badly we believe the prediction
-// quality is decaying from frame to frame.
-static double get_zero_motion_factor(const AV1_COMP *cpi,
- const FIRSTPASS_STATS *frame) {
- const double zero_motion_pct = frame->pcnt_inter - frame->pcnt_motion;
- double sr_decay = get_sr_decay_rate(cpi, frame);
- return AOMMIN(sr_decay, zero_motion_pct);
-}
-
-#define ZM_POWER_FACTOR 0.75
-
-static double get_prediction_decay_rate(const AV1_COMP *cpi,
- const FIRSTPASS_STATS *next_frame) {
- const double sr_decay_rate = get_sr_decay_rate(cpi, next_frame);
- const double zero_motion_factor =
- (0.95 * pow((next_frame->pcnt_inter - next_frame->pcnt_motion),
- ZM_POWER_FACTOR));
-
- return AOMMAX(zero_motion_factor,
- (sr_decay_rate + ((1.0 - sr_decay_rate) * zero_motion_factor)));
-}
-
-// Function to test for a condition where a complex transition is followed
-// by a static section. For example in slide shows where there is a fade
-// between slides. This is to help with more optimal kf and gf positioning.
-static int detect_transition_to_still(AV1_COMP *cpi, int frame_interval,
- int still_interval,
- double loop_decay_rate,
- double last_decay_rate) {
- TWO_PASS *const twopass = &cpi->twopass;
- RATE_CONTROL *const rc = &cpi->rc;
-
- // Break clause to detect very still sections after motion
- // For example a static image after a fade or other transition
- // instead of a clean scene cut.
- if (frame_interval > rc->min_gf_interval && loop_decay_rate >= 0.999 &&
- last_decay_rate < 0.9) {
- int j;
-
- // Look ahead a few frames to see if static condition persists...
- for (j = 0; j < still_interval; ++j) {
- const FIRSTPASS_STATS *stats = &twopass->stats_in[j];
- if (stats >= twopass->stats_in_end) break;
-
- if (stats->pcnt_inter - stats->pcnt_motion < 0.999) break;
- }
-
- // Only if it does do we signal a transition to still.
- return j == still_interval;
- }
-
- return 0;
-}
-
-// This function detects a flash through the high relative pcnt_second_ref
-// score in the frame following a flash frame. The offset passed in should
-// reflect this.
-static int detect_flash(const TWO_PASS *twopass, int offset) {
- const FIRSTPASS_STATS *const next_frame = read_frame_stats(twopass, offset);
-
- // What we are looking for here is a situation where there is a
- // brief break in prediction (such as a flash) but subsequent frames
- // are reasonably well predicted by an earlier (pre flash) frame.
- // The recovery after a flash is indicated by a high pcnt_second_ref
- // compared to pcnt_inter.
- return next_frame != NULL &&
- next_frame->pcnt_second_ref > next_frame->pcnt_inter &&
- next_frame->pcnt_second_ref >= 0.5;
-}
-
-// Update the motion related elements to the GF arf boost calculation.
-static void accumulate_frame_motion_stats(const FIRSTPASS_STATS *stats,
- double *mv_in_out,
- double *mv_in_out_accumulator,
- double *abs_mv_in_out_accumulator,
- double *mv_ratio_accumulator) {
- const double pct = stats->pcnt_motion;
-
- // Accumulate Motion In/Out of frame stats.
- *mv_in_out = stats->mv_in_out_count * pct;
- *mv_in_out_accumulator += *mv_in_out;
- *abs_mv_in_out_accumulator += fabs(*mv_in_out);
-
- // Accumulate a measure of how uniform (or conversely how random) the motion
- // field is (a ratio of abs(mv) / mv).
- if (pct > 0.05) {
- const double mvr_ratio =
- fabs(stats->mvr_abs) / DOUBLE_DIVIDE_CHECK(fabs(stats->MVr));
- const double mvc_ratio =
- fabs(stats->mvc_abs) / DOUBLE_DIVIDE_CHECK(fabs(stats->MVc));
-
- *mv_ratio_accumulator +=
- pct * (mvr_ratio < stats->mvr_abs ? mvr_ratio : stats->mvr_abs);
- *mv_ratio_accumulator +=
- pct * (mvc_ratio < stats->mvc_abs ? mvc_ratio : stats->mvc_abs);
- }
-}
-
-#define BASELINE_ERR_PER_MB 1000.0
-static double calc_frame_boost(AV1_COMP *cpi, const FIRSTPASS_STATS *this_frame,
- double this_frame_mv_in_out, double max_boost) {
- double frame_boost;
- const double lq = av1_convert_qindex_to_q(
- cpi->rc.avg_frame_qindex[INTER_FRAME], cpi->common.seq_params.bit_depth);
- const double boost_q_correction = AOMMIN((0.5 + (lq * 0.015)), 1.5);
- int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE) ? cpi->initial_mbs
- : cpi->common.MBs;
-
- // Correct for any inactive region in the image
- num_mbs = (int)AOMMAX(1, num_mbs * calculate_active_area(cpi, this_frame));
-
- // Underlying boost factor is based on inter error ratio.
- frame_boost = (BASELINE_ERR_PER_MB * num_mbs) /
- DOUBLE_DIVIDE_CHECK(this_frame->coded_error);
- frame_boost = frame_boost * BOOST_FACTOR * boost_q_correction;
-
- // Increase boost for frames where new data coming into frame (e.g. zoom out).
- // Slightly reduce boost if there is a net balance of motion out of the frame
- // (zoom in). The range for this_frame_mv_in_out is -1.0 to +1.0.
- if (this_frame_mv_in_out > 0.0)
- frame_boost += frame_boost * (this_frame_mv_in_out * 2.0);
- // In the extreme case the boost is halved.
- else
- frame_boost += frame_boost * (this_frame_mv_in_out / 2.0);
-
- return AOMMIN(frame_boost, max_boost * boost_q_correction);
-}
-
-static int calc_arf_boost(AV1_COMP *cpi, int offset, int f_frames, int b_frames,
- int *f_boost, int *b_boost) {
- TWO_PASS *const twopass = &cpi->twopass;
- int i;
- double boost_score = 0.0;
- double mv_ratio_accumulator = 0.0;
- double decay_accumulator = 1.0;
- double this_frame_mv_in_out = 0.0;
- double mv_in_out_accumulator = 0.0;
- double abs_mv_in_out_accumulator = 0.0;
- int arf_boost;
- int flash_detected = 0;
-
- // Search forward from the proposed arf/next gf position.
- for (i = 0; i < f_frames; ++i) {
- const FIRSTPASS_STATS *this_frame = read_frame_stats(twopass, i + offset);
- if (this_frame == NULL) break;
-
- // Update the motion related elements to the boost calculation.
- accumulate_frame_motion_stats(
- this_frame, &this_frame_mv_in_out, &mv_in_out_accumulator,
- &abs_mv_in_out_accumulator, &mv_ratio_accumulator);
-
- // We want to discount the flash frame itself and the recovery
- // frame that follows as both will have poor scores.
- flash_detected = detect_flash(twopass, i + offset) ||
- detect_flash(twopass, i + offset + 1);
-
- // Accumulate the effect of prediction quality decay.
- if (!flash_detected) {
- decay_accumulator *= get_prediction_decay_rate(cpi, this_frame);
- decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR
- ? MIN_DECAY_FACTOR
- : decay_accumulator;
- }
-
- boost_score +=
- decay_accumulator *
- calc_frame_boost(cpi, this_frame, this_frame_mv_in_out, GF_MAX_BOOST);
- }
-
- *f_boost = (int)boost_score;
-
- // Reset for backward looking loop.
- boost_score = 0.0;
- mv_ratio_accumulator = 0.0;
- decay_accumulator = 1.0;
- this_frame_mv_in_out = 0.0;
- mv_in_out_accumulator = 0.0;
- abs_mv_in_out_accumulator = 0.0;
-
- // Search backward towards last gf position.
- for (i = -1; i >= -b_frames; --i) {
- const FIRSTPASS_STATS *this_frame = read_frame_stats(twopass, i + offset);
- if (this_frame == NULL) break;
-
- // Update the motion related elements to the boost calculation.
- accumulate_frame_motion_stats(
- this_frame, &this_frame_mv_in_out, &mv_in_out_accumulator,
- &abs_mv_in_out_accumulator, &mv_ratio_accumulator);
-
- // We want to discount the the flash frame itself and the recovery
- // frame that follows as both will have poor scores.
- flash_detected = detect_flash(twopass, i + offset) ||
- detect_flash(twopass, i + offset + 1);
-
- // Cumulative effect of prediction quality decay.
- if (!flash_detected) {
- decay_accumulator *= get_prediction_decay_rate(cpi, this_frame);
- decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR
- ? MIN_DECAY_FACTOR
- : decay_accumulator;
- }
-
- boost_score +=
- decay_accumulator *
- calc_frame_boost(cpi, this_frame, this_frame_mv_in_out, GF_MAX_BOOST);
- }
- *b_boost = (int)boost_score;
-
- arf_boost = (*f_boost + *b_boost);
- if (arf_boost < ((b_frames + f_frames) * 20))
- arf_boost = ((b_frames + f_frames) * 20);
- arf_boost = AOMMAX(arf_boost, MIN_ARF_GF_BOOST);
-
- return arf_boost;
-}
-
-// Calculate a section intra ratio used in setting max loop filter.
-static int calculate_section_intra_ratio(const FIRSTPASS_STATS *begin,
- const FIRSTPASS_STATS *end,
- int section_length) {
- const FIRSTPASS_STATS *s = begin;
- double intra_error = 0.0;
- double coded_error = 0.0;
- int i = 0;
-
- while (s < end && i < section_length) {
- intra_error += s->intra_error;
- coded_error += s->coded_error;
- ++s;
- ++i;
- }
-
- return (int)(intra_error / DOUBLE_DIVIDE_CHECK(coded_error));
-}
-
-// Calculate the total bits to allocate in this GF/ARF group.
-static int64_t calculate_total_gf_group_bits(AV1_COMP *cpi,
- double gf_group_err) {
- const RATE_CONTROL *const rc = &cpi->rc;
- const TWO_PASS *const twopass = &cpi->twopass;
- const int max_bits = frame_max_bits(rc, &cpi->oxcf);
- int64_t total_group_bits;
-
- // Calculate the bits to be allocated to the group as a whole.
- if ((twopass->kf_group_bits > 0) && (twopass->kf_group_error_left > 0)) {
- total_group_bits = (int64_t)(twopass->kf_group_bits *
- (gf_group_err / twopass->kf_group_error_left));
- } else {
- total_group_bits = 0;
- }
-
- // Clamp odd edge cases.
- total_group_bits = (total_group_bits < 0)
- ? 0
- : (total_group_bits > twopass->kf_group_bits)
- ? twopass->kf_group_bits
- : total_group_bits;
-
- // Clip based on user supplied data rate variability limit.
- if (total_group_bits > (int64_t)max_bits * rc->baseline_gf_interval)
- total_group_bits = (int64_t)max_bits * rc->baseline_gf_interval;
-
- return total_group_bits;
-}
-
-// Calculate the number bits extra to assign to boosted frames in a group.
-static int calculate_boost_bits(int frame_count, int boost,
- int64_t total_group_bits) {
- int allocation_chunks;
-
- // return 0 for invalid inputs (could arise e.g. through rounding errors)
- if (!boost || (total_group_bits <= 0) || (frame_count <= 0)) return 0;
-
- allocation_chunks = (frame_count * 100) + boost;
-
- // Prevent overflow.
- if (boost > 1023) {
- int divisor = boost >> 10;
- boost /= divisor;
- allocation_chunks /= divisor;
- }
-
- // Calculate the number of extra bits for use in the boosted frame or frames.
- return AOMMAX((int)(((int64_t)boost * total_group_bits) / allocation_chunks),
- 0);
-}
-
-#if USE_SYMM_MULTI_LAYER
-// #define CHCEK_GF_PARAMETER
-#ifdef CHCEK_GF_PARAMETER
-void check_frame_params(GF_GROUP *const gf_group, int gf_interval,
- int frame_nums) {
- static const char *update_type_strings[] = {
- "KF_UPDATE", "LF_UPDATE", "GF_UPDATE",
- "ARF_UPDATE", "OVERLAY_UPDATE", "BRF_UPDATE",
- "LAST_BIPRED_UPDATE", "BIPRED_UPDATE", "INTNL_OVERLAY_UPDATE",
- "INTNL_ARF_UPDATE"
- };
- FILE *fid = fopen("GF_PARAMS.txt", "a");
-
- fprintf(fid, "\n{%d}\n", gf_interval);
- for (int i = 0; i <= frame_nums; ++i) {
- fprintf(fid, "%s %d %d %d %d\n",
- update_type_strings[gf_group->update_type[i]],
- gf_group->arf_src_offset[i], gf_group->arf_pos_in_gf[i],
- gf_group->arf_update_idx[i], gf_group->pyramid_level[i]);
- }
-
- fprintf(fid, "number of nodes in each level: \n");
- for (int i = 0; i < MAX_PYRAMID_LVL; ++i) {
- fprintf(fid, "lvl %d: %d ", i, gf_group->pyramid_lvl_nodes[i]);
- }
- fprintf(fid, "\n");
- fclose(fid);
-}
-#endif // CHCEK_GF_PARAMETER
-static int update_type_2_rf_level(FRAME_UPDATE_TYPE update_type) {
- // Derive rf_level from update_type
- switch (update_type) {
- case LF_UPDATE: return INTER_NORMAL;
- case ARF_UPDATE: return GF_ARF_STD;
- case OVERLAY_UPDATE: return INTER_NORMAL;
- case BRF_UPDATE: return GF_ARF_LOW;
- case LAST_BIPRED_UPDATE: return INTER_NORMAL;
- case BIPRED_UPDATE: return INTER_NORMAL;
- case INTNL_ARF_UPDATE: return GF_ARF_LOW;
- case INTNL_OVERLAY_UPDATE: return INTER_NORMAL;
- default: return INTER_NORMAL;
- }
-}
-
-static void set_multi_layer_params(GF_GROUP *const gf_group, int l, int r,
- int *frame_ind, int arf_ind, int level) {
- if (r - l < 4) {
- while (++l < r) {
- // leaf nodes, not a look-ahead frame
- gf_group->update_type[*frame_ind] = LF_UPDATE;
- gf_group->arf_src_offset[*frame_ind] = 0;
- gf_group->arf_pos_in_gf[*frame_ind] = 0;
- gf_group->arf_update_idx[*frame_ind] = arf_ind;
- gf_group->pyramid_level[*frame_ind] = 0;
- ++gf_group->pyramid_lvl_nodes[0];
- ++(*frame_ind);
- }
- } else {
- int m = (l + r) / 2;
- int arf_pos_in_gf = *frame_ind;
-
- gf_group->update_type[*frame_ind] = INTNL_ARF_UPDATE;
- gf_group->arf_src_offset[*frame_ind] = m - l - 1;
- gf_group->arf_pos_in_gf[*frame_ind] = 0;
- gf_group->arf_update_idx[*frame_ind] = 1; // mark all internal ARF 1
- gf_group->pyramid_level[*frame_ind] = level;
- ++gf_group->pyramid_lvl_nodes[level];
- ++(*frame_ind);
-
- // set parameters for frames displayed before this frame
- set_multi_layer_params(gf_group, l, m, frame_ind, 1, level - 1);
-
- // for overlay frames, we need to record the position of its corresponding
- // arf frames for bit allocation
- gf_group->update_type[*frame_ind] = INTNL_OVERLAY_UPDATE;
- gf_group->arf_src_offset[*frame_ind] = 0;
- gf_group->arf_pos_in_gf[*frame_ind] = arf_pos_in_gf;
- gf_group->arf_update_idx[*frame_ind] = 1;
- gf_group->pyramid_level[*frame_ind] = 0;
- ++(*frame_ind);
-
- // set parameters for frames displayed after this frame
- set_multi_layer_params(gf_group, m, r, frame_ind, arf_ind, level - 1);
- }
-}
-
-static INLINE unsigned char get_pyramid_height(int pyramid_width) {
- assert(pyramid_width <= 16 && pyramid_width >= 4 &&
- "invalid gf interval for pyramid structure");
-
- return pyramid_width > 12 ? 4 : (pyramid_width > 6 ? 3 : 2);
-}
-
-static int construct_multi_layer_gf_structure(GF_GROUP *const gf_group,
- const int gf_interval) {
- int frame_index = 0;
- gf_group->pyramid_height = get_pyramid_height(gf_interval);
-
- assert(gf_group->pyramid_height <= MAX_PYRAMID_LVL);
-
- av1_zero_array(gf_group->pyramid_lvl_nodes, MAX_PYRAMID_LVL);
-
- // At the beginning of each GF group it will be a key or overlay frame,
- gf_group->update_type[frame_index] = OVERLAY_UPDATE;
- gf_group->arf_src_offset[frame_index] = 0;
- gf_group->arf_pos_in_gf[frame_index] = 0;
- gf_group->arf_update_idx[frame_index] = 0;
- gf_group->pyramid_level[frame_index] = 0;
- ++frame_index;
-
- // ALT0
- gf_group->update_type[frame_index] = ARF_UPDATE;
- gf_group->arf_src_offset[frame_index] = gf_interval - 1;
- gf_group->arf_pos_in_gf[frame_index] = 0;
- gf_group->arf_update_idx[frame_index] = 0;
- gf_group->pyramid_level[frame_index] = gf_group->pyramid_height;
- ++frame_index;
-
- // set parameters for the rest of the frames
- set_multi_layer_params(gf_group, 0, gf_interval, &frame_index, 0,
- gf_group->pyramid_height - 1);
- return frame_index;
-}
-
-void define_customized_gf_group_structure(AV1_COMP *cpi) {
- RATE_CONTROL *const rc = &cpi->rc;
- TWO_PASS *const twopass = &cpi->twopass;
- GF_GROUP *const gf_group = &twopass->gf_group;
- const int key_frame = cpi->common.frame_type == KEY_FRAME;
-
- assert(rc->baseline_gf_interval >= 4 &&
- rc->baseline_gf_interval <= MAX_PYRAMID_SIZE);
-
- const int gf_update_frames =
- construct_multi_layer_gf_structure(gf_group, rc->baseline_gf_interval);
- int frame_index;
-
- cpi->num_extra_arfs = 0;
-
- for (frame_index = 0; frame_index < gf_update_frames; ++frame_index) {
- // Set unused variables to default values
- gf_group->bidir_pred_enabled[frame_index] = 0;
- gf_group->brf_src_offset[frame_index] = 0;
-
- // Special handle for the first frame for assigning update_type
- if (frame_index == 0) {
- // For key frames the frame target rate is already set and it
- // is also the golden frame.
- if (key_frame) {
- gf_group->update_type[frame_index] = KF_UPDATE;
- continue;
- }
-
- if (rc->source_alt_ref_active) {
- gf_group->update_type[frame_index] = OVERLAY_UPDATE;
- } else {
- gf_group->update_type[frame_index] = GF_UPDATE;
- }
- } else {
- if (gf_group->update_type[frame_index] == INTNL_ARF_UPDATE)
- ++cpi->num_extra_arfs;
- }
-
- // Assign rf level based on update type
- gf_group->rf_level[frame_index] =
- update_type_2_rf_level(gf_group->update_type[frame_index]);
- }
-
- // NOTE: We need to configure the frame at the end of the sequence + 1 that
- // will be the start frame for the next group. Otherwise prior to the
- // call to av1_rc_get_second_pass_params() the data will be undefined.
- if (rc->source_alt_ref_pending) {
- gf_group->update_type[frame_index] = OVERLAY_UPDATE;
- gf_group->rf_level[frame_index] = INTER_NORMAL;
- } else {
- gf_group->update_type[frame_index] = GF_UPDATE;
- gf_group->rf_level[frame_index] = GF_ARF_STD;
- }
-
- gf_group->bidir_pred_enabled[frame_index] = 0;
- gf_group->brf_src_offset[frame_index] = 0;
- gf_group->arf_update_idx[frame_index] = 0;
- // This value is only used for INTNL_OVERLAY_UPDATE
- gf_group->arf_pos_in_gf[frame_index] = 0;
-
- // This parameter is useless?
- gf_group->arf_ref_idx[frame_index] = 0;
-#ifdef CHCEK_GF_PARAMETER
- check_frame_params(gf_group, rc->baseline_gf_interval, gf_update_frames);
-#endif
-}
-
-// It is an example of how to define a GF stucture manually. The function will
-// result in exactly the same GF group structure as
-// define_customized_gf_group_structure() when rc->baseline_gf_interval == 4
-#if USE_MANUAL_GF4_STRUCT
-#define GF_INTERVAL_4 4
-static const unsigned char gf4_multi_layer_params[][GF_FRAME_PARAMS] = {
- {
- // gf_group->index == 0 (Frame 0)
- // It can also be KEY frame. Will assign the proper value
- // in define_gf_group_structure
- OVERLAY_UPDATE, // update_type (default value)
- 0, // arf_src_offset
- 0, // arf_pos_in_gf
- 0 // arf_update_idx
- },
- {
- // gf_group->index == 1 (Frame 4)
- ARF_UPDATE, // update_type
- GF_INTERVAL_4 - 1, // arf_src_offset
- 0, // arf_pos_in_gf
- 0 // arf_update_idx
- },
- {
- // gf_group->index == 2 (Frame 2)
- INTNL_ARF_UPDATE, // update_type
- (GF_INTERVAL_4 >> 1) - 1, // arf_src_offset
- 0, // arf_pos_in_gf
- 0 // arf_update_idx
- },
- {
- // gf_group->index == 3 (Frame 1)
- LAST_BIPRED_UPDATE, // update_type
- 0, // arf_src_offset
- 0, // arf_pos_in_gf
- 0 // arf_update_idx
- },
-
- {
- // gf_group->index == 4 (Frame 2 - OVERLAY)
- INTNL_OVERLAY_UPDATE, // update_type
- 0, // arf_src_offset
- 2, // arf_pos_in_gf
- 0 // arf_update_idx
- },
- {
- // gf_group->index == 5 (Frame 3)
- LF_UPDATE, // update_type
- 0, // arf_src_offset
- 0, // arf_pos_in_gf
- 1 // arf_update_idx
- }
-};
-
-static int define_gf_group_structure_4(AV1_COMP *cpi) {
- RATE_CONTROL *const rc = &cpi->rc;
- TWO_PASS *const twopass = &cpi->twopass;
- GF_GROUP *const gf_group = &twopass->gf_group;
- const int key_frame = cpi->common.frame_type == KEY_FRAME;
-
- assert(rc->baseline_gf_interval == GF_INTERVAL_4);
-
- const int gf_update_frames = rc->baseline_gf_interval + 2;
- int frame_index;
-
- for (frame_index = 0; frame_index < gf_update_frames; ++frame_index) {
- int param_idx = 0;
-
- gf_group->bidir_pred_enabled[frame_index] = 0;
-
- if (frame_index == 0) {
- // gf_group->arf_src_offset[frame_index] = 0;
- gf_group->brf_src_offset[frame_index] = 0;
- gf_group->bidir_pred_enabled[frame_index] = 0;
-
- // For key frames the frame target rate is already set and it
- // is also the golden frame.
- if (key_frame) continue;
-
- gf_group->update_type[frame_index] =
- gf4_multi_layer_params[frame_index][param_idx++];
-
- if (rc->source_alt_ref_active) {
- gf_group->update_type[frame_index] = OVERLAY_UPDATE;
- } else {
- gf_group->update_type[frame_index] = GF_UPDATE;
- }
- param_idx++;
- } else {
- gf_group->update_type[frame_index] =
- gf4_multi_layer_params[frame_index][param_idx++];
- }
-
- // setup other parameters
- gf_group->rf_level[frame_index] =
- update_type_2_rf_level(gf_group->update_type[frame_index]);
-
- // == arf_src_offset ==
- gf_group->arf_src_offset[frame_index] =
- gf4_multi_layer_params[frame_index][param_idx++];
-
- // == arf_pos_in_gf ==
- gf_group->arf_pos_in_gf[frame_index] =
- gf4_multi_layer_params[frame_index][param_idx++];
-
- // == arf_update_idx ==
- gf_group->brf_src_offset[frame_index] =
- gf4_multi_layer_params[frame_index][param_idx];
- }
-
- // NOTE: We need to configure the frame at the end of the sequence + 1 that
- // will be the start frame for the next group. Otherwise prior to the
- // call to av1_rc_get_second_pass_params() the data will be undefined.
- gf_group->arf_update_idx[frame_index] = 0;
- gf_group->arf_ref_idx[frame_index] = 0;
-
- if (rc->source_alt_ref_pending) {
- gf_group->update_type[frame_index] = OVERLAY_UPDATE;
- gf_group->rf_level[frame_index] = INTER_NORMAL;
-
- } else {
- gf_group->update_type[frame_index] = GF_UPDATE;
- gf_group->rf_level[frame_index] = GF_ARF_STD;
- }
-
- gf_group->bidir_pred_enabled[frame_index] = 0;
- gf_group->brf_src_offset[frame_index] = 0;
-
- // This value is only used for INTNL_OVERLAY_UPDATE
- gf_group->arf_pos_in_gf[frame_index] = 0;
-
- return gf_update_frames;
-}
-#endif // USE_MANUAL_GF4_STRUCT
-#endif // USE_SYMM_MULTI_LAYER
-
-static void define_gf_group_structure(AV1_COMP *cpi) {
- RATE_CONTROL *const rc = &cpi->rc;
-
-#if USE_SYMM_MULTI_LAYER
- const int valid_customized_gf_length =
- rc->baseline_gf_interval >= 4 &&
- rc->baseline_gf_interval <= MAX_PYRAMID_SIZE;
- // used the new structure only if extra_arf is allowed
- if (valid_customized_gf_length && rc->source_alt_ref_pending &&
- cpi->extra_arf_allowed > 0) {
-#if USE_MANUAL_GF4_STRUCT
- if (rc->baseline_gf_interval == 4)
- define_gf_group_structure_4(cpi);
- else
-#endif
- define_customized_gf_group_structure(cpi);
- cpi->new_bwdref_update_rule = 1;
- return;
- } else {
- cpi->new_bwdref_update_rule = 0;
- }
-#endif
-
- TWO_PASS *const twopass = &cpi->twopass;
- GF_GROUP *const gf_group = &twopass->gf_group;
- int i;
- int frame_index = 0;
- const int key_frame = cpi->common.frame_type == KEY_FRAME;
-
- // The use of bi-predictive frames are only enabled when following 3
- // conditions are met:
- // (1) ALTREF is enabled;
- // (2) The bi-predictive group interval is at least 2; and
- // (3) The bi-predictive group interval is strictly smaller than the
- // golden group interval.
- const int is_bipred_enabled =
- cpi->extra_arf_allowed && rc->source_alt_ref_pending &&
- rc->bipred_group_interval &&
- rc->bipred_group_interval <=
- (rc->baseline_gf_interval - rc->source_alt_ref_pending);
- int bipred_group_end = 0;
- int bipred_frame_index = 0;
-
- const unsigned char ext_arf_interval =
- (unsigned char)(rc->baseline_gf_interval / (cpi->num_extra_arfs + 1) - 1);
- int which_arf = cpi->num_extra_arfs;
- int subgroup_interval[MAX_EXT_ARFS + 1];
- int is_sg_bipred_enabled = is_bipred_enabled;
- int accumulative_subgroup_interval = 0;
-
- // For key frames the frame target rate is already set and it
- // is also the golden frame.
- // === [frame_index == 0] ===
- if (!key_frame) {
- if (rc->source_alt_ref_active) {
- gf_group->update_type[frame_index] = OVERLAY_UPDATE;
- gf_group->rf_level[frame_index] = INTER_NORMAL;
- } else {
- gf_group->update_type[frame_index] = GF_UPDATE;
- gf_group->rf_level[frame_index] = GF_ARF_STD;
- }
- gf_group->arf_update_idx[frame_index] = 0;
- gf_group->arf_ref_idx[frame_index] = 0;
- }
-
- gf_group->bidir_pred_enabled[frame_index] = 0;
- gf_group->brf_src_offset[frame_index] = 0;
-
- frame_index++;
-
- bipred_frame_index++;
-
- // === [frame_index == 1] ===
- if (rc->source_alt_ref_pending) {
- gf_group->update_type[frame_index] = ARF_UPDATE;
- gf_group->rf_level[frame_index] = GF_ARF_STD;
- gf_group->arf_src_offset[frame_index] =
- (unsigned char)(rc->baseline_gf_interval - 1);
-
- gf_group->arf_update_idx[frame_index] = 0;
- gf_group->arf_ref_idx[frame_index] = 0;
-
- gf_group->bidir_pred_enabled[frame_index] = 0;
- gf_group->brf_src_offset[frame_index] = 0;
- // NOTE: "bidir_pred_frame_index" stays unchanged for ARF_UPDATE frames.
-
- // Work out the ARFs' positions in this gf group
- // NOTE(weitinglin): ALT_REFs' are indexed inversely, but coded in display
- // order (except for the original ARF). In the example of three ALT_REF's,
- // We index ALTREF's as: KEY ----- ALT2 ----- ALT1 ----- ALT0
- // but code them in the following order:
- // KEY-ALT0-ALT2 ----- OVERLAY2-ALT1 ----- OVERLAY1 ----- OVERLAY0
- //
- // arf_pos_for_ovrly[]: Position for OVERLAY
- // arf_pos_in_gf[]: Position for ALTREF
- cpi->arf_pos_for_ovrly[0] = frame_index + cpi->num_extra_arfs +
- gf_group->arf_src_offset[frame_index] + 1;
- for (i = 0; i < cpi->num_extra_arfs; ++i) {
- cpi->arf_pos_for_ovrly[i + 1] =
- frame_index + (cpi->num_extra_arfs - i) * (ext_arf_interval + 2);
- subgroup_interval[i] = cpi->arf_pos_for_ovrly[i] -
- cpi->arf_pos_for_ovrly[i + 1] - (i == 0 ? 1 : 2);
- }
- subgroup_interval[cpi->num_extra_arfs] =
- cpi->arf_pos_for_ovrly[cpi->num_extra_arfs] - frame_index -
- (cpi->num_extra_arfs == 0 ? 1 : 2);
-
- ++frame_index;
-
- // Insert an extra ARF
- // === [frame_index == 2] ===
- if (cpi->num_extra_arfs) {
- gf_group->update_type[frame_index] = INTNL_ARF_UPDATE;
- gf_group->rf_level[frame_index] = GF_ARF_LOW;
- gf_group->arf_src_offset[frame_index] = ext_arf_interval;
-
- gf_group->arf_update_idx[frame_index] = which_arf;
- gf_group->arf_ref_idx[frame_index] = 0;
- ++frame_index;
- }
- accumulative_subgroup_interval += subgroup_interval[cpi->num_extra_arfs];
- }
-
- for (i = 0; i < rc->baseline_gf_interval - rc->source_alt_ref_pending; ++i) {
- gf_group->arf_update_idx[frame_index] = which_arf;
- gf_group->arf_ref_idx[frame_index] = which_arf;
-
- // If we are going to have ARFs, check whether we can have BWDREF in this
- // subgroup, and further, whether we can have ARF subgroup which contains
- // the BWDREF subgroup but contained within the GF group:
- //
- // GF group --> ARF subgroup --> BWDREF subgroup
- if (rc->source_alt_ref_pending) {
- is_sg_bipred_enabled =
- is_bipred_enabled &&
- (subgroup_interval[which_arf] > rc->bipred_group_interval);
- }
-
- // NOTE: BIDIR_PRED is only enabled when the length of the bi-predictive
- // frame group interval is strictly smaller than that of the GOLDEN
- // FRAME group interval.
- // TODO(zoeliu): Currently BIDIR_PRED is only enabled when alt-ref is on.
- if (is_sg_bipred_enabled && !bipred_group_end) {
- const int cur_brf_src_offset = rc->bipred_group_interval - 1;
-
- if (bipred_frame_index == 1) {
- // --- BRF_UPDATE ---
- gf_group->update_type[frame_index] = BRF_UPDATE;
- gf_group->rf_level[frame_index] = GF_ARF_LOW;
- gf_group->brf_src_offset[frame_index] = cur_brf_src_offset;
- } else if (bipred_frame_index == rc->bipred_group_interval) {
- // --- LAST_BIPRED_UPDATE ---
- gf_group->update_type[frame_index] = LAST_BIPRED_UPDATE;
- gf_group->rf_level[frame_index] = INTER_NORMAL;
- gf_group->brf_src_offset[frame_index] = 0;
-
- // Reset the bi-predictive frame index.
- bipred_frame_index = 0;
- } else {
- // --- BIPRED_UPDATE ---
- gf_group->update_type[frame_index] = BIPRED_UPDATE;
- gf_group->rf_level[frame_index] = INTER_NORMAL;
- gf_group->brf_src_offset[frame_index] = 0;
- }
- gf_group->bidir_pred_enabled[frame_index] = 1;
-
- bipred_frame_index++;
- // Check whether the next bi-predictive frame group would entirely be
- // included within the current golden frame group.
- // In addition, we need to avoid coding a BRF right before an ARF.
- if (bipred_frame_index == 1 &&
- (i + 2 + cur_brf_src_offset) >= accumulative_subgroup_interval) {
- bipred_group_end = 1;
- }
- } else {
- gf_group->update_type[frame_index] = LF_UPDATE;
- gf_group->rf_level[frame_index] = INTER_NORMAL;
- gf_group->bidir_pred_enabled[frame_index] = 0;
- gf_group->brf_src_offset[frame_index] = 0;
- }
-
- ++frame_index;
-
- // Check if we need to update the ARF.
- if (is_sg_bipred_enabled && cpi->num_extra_arfs && which_arf > 0 &&
- frame_index > cpi->arf_pos_for_ovrly[which_arf]) {
- --which_arf;
- accumulative_subgroup_interval += subgroup_interval[which_arf] + 1;
-
- // Meet the new subgroup; Reset the bipred_group_end flag.
- bipred_group_end = 0;
- // Insert another extra ARF after the overlay frame
- if (which_arf) {
- gf_group->update_type[frame_index] = INTNL_ARF_UPDATE;
- gf_group->rf_level[frame_index] = GF_ARF_LOW;
- gf_group->arf_src_offset[frame_index] = ext_arf_interval;
-
- gf_group->arf_update_idx[frame_index] = which_arf;
- gf_group->arf_ref_idx[frame_index] = 0;
- ++frame_index;
- }
- }
- }
-
- // NOTE: We need to configure the frame at the end of the sequence + 1 that
- // will be the start frame for the next group. Otherwise prior to the
- // call to av1_rc_get_second_pass_params() the data will be undefined.
- gf_group->arf_update_idx[frame_index] = 0;
- gf_group->arf_ref_idx[frame_index] = 0;
-
- if (rc->source_alt_ref_pending) {
- gf_group->update_type[frame_index] = OVERLAY_UPDATE;
- gf_group->rf_level[frame_index] = INTER_NORMAL;
-
- cpi->arf_pos_in_gf[0] = 1;
- if (cpi->num_extra_arfs) {
- // Overwrite the update_type for extra-ARF's corresponding internal
- // OVERLAY's: Change from LF_UPDATE to INTNL_OVERLAY_UPDATE.
- for (i = cpi->num_extra_arfs; i > 0; --i) {
- cpi->arf_pos_in_gf[i] =
- (i == cpi->num_extra_arfs ? 2 : cpi->arf_pos_for_ovrly[i + 1] + 1);
-
- gf_group->update_type[cpi->arf_pos_for_ovrly[i]] = INTNL_OVERLAY_UPDATE;
- gf_group->rf_level[cpi->arf_pos_for_ovrly[i]] = INTER_NORMAL;
- }
- }
- } else {
- gf_group->update_type[frame_index] = GF_UPDATE;
- gf_group->rf_level[frame_index] = GF_ARF_STD;
- }
-
- gf_group->bidir_pred_enabled[frame_index] = 0;
- gf_group->brf_src_offset[frame_index] = 0;
-}
-
-#if USE_SYMM_MULTI_LAYER
-#define LEAF_REDUCTION_FACTOR 0.75f
-#define LVL_3_BOOST_FACTOR 0.8f
-#define LVL_2_BOOST_FACTOR 0.3f
-
-static float_t lvl_budget_factor[MAX_PYRAMID_LVL - 1][MAX_PYRAMID_LVL - 1] = {
- { 1, 0, 0 },
- { LVL_3_BOOST_FACTOR, 0, 0 }, // Leaking budget works better
- { LVL_3_BOOST_FACTOR, (1 - LVL_3_BOOST_FACTOR) * LVL_2_BOOST_FACTOR,
- (1 - LVL_3_BOOST_FACTOR) * (1 - LVL_2_BOOST_FACTOR) }
-};
-#endif // USE_SYMM_MULTI_LAYER
-static void allocate_gf_group_bits(AV1_COMP *cpi, int64_t gf_group_bits,
- double group_error, int gf_arf_bits) {
- RATE_CONTROL *const rc = &cpi->rc;
- const AV1EncoderConfig *const oxcf = &cpi->oxcf;
- TWO_PASS *const twopass = &cpi->twopass;
- GF_GROUP *const gf_group = &twopass->gf_group;
- FIRSTPASS_STATS frame_stats;
- int i;
- int frame_index = 0;
- int target_frame_size;
- int key_frame;
- const int max_bits = frame_max_bits(&cpi->rc, &cpi->oxcf);
- int64_t total_group_bits = gf_group_bits;
- double modified_err = 0.0;
- double err_fraction;
- int ext_arf_boost[MAX_EXT_ARFS];
-
- define_gf_group_structure(cpi);
-
- av1_zero_array(ext_arf_boost, MAX_EXT_ARFS);
-
- key_frame = cpi->common.frame_type == KEY_FRAME;
-
- // For key frames the frame target rate is already set and it
- // is also the golden frame.
- // === [frame_index == 0] ===
- if (!key_frame) {
- if (rc->source_alt_ref_active)
- gf_group->bit_allocation[frame_index] = 0;
- else
- gf_group->bit_allocation[frame_index] = gf_arf_bits;
-
- // Step over the golden frame / overlay frame
- if (EOF == input_stats(twopass, &frame_stats)) return;
- }
-
- // Deduct the boost bits for arf (or gf if it is not a key frame)
- // from the group total.
- if (rc->source_alt_ref_pending || !key_frame) total_group_bits -= gf_arf_bits;
-
- frame_index++;
-
- // Store the bits to spend on the ARF if there is one.
- // === [frame_index == 1] ===
- if (rc->source_alt_ref_pending) {
- gf_group->bit_allocation[frame_index] = gf_arf_bits;
-
- ++frame_index;
-
- // Skip all the extra-ARF's right after ARF at the starting segment of
- // the current GF group.
- if (cpi->num_extra_arfs) {
- while (gf_group->update_type[frame_index] == INTNL_ARF_UPDATE)
- ++frame_index;
- }
- }
-
- // Allocate bits to the other frames in the group.
- for (i = 0; i < rc->baseline_gf_interval - rc->source_alt_ref_pending; ++i) {
- if (EOF == input_stats(twopass, &frame_stats)) break;
-
- modified_err = calculate_modified_err(cpi, twopass, oxcf, &frame_stats);
-
- if (group_error > 0)
- err_fraction = modified_err / DOUBLE_DIVIDE_CHECK(group_error);
- else
- err_fraction = 0.0;
-
- target_frame_size = (int)((double)total_group_bits * err_fraction);
-
- target_frame_size =
- clamp(target_frame_size, 0, AOMMIN(max_bits, (int)total_group_bits));
-
- if (gf_group->update_type[frame_index] == BRF_UPDATE) {
- // Boost up the allocated bits on BWDREF_FRAME
- gf_group->bit_allocation[frame_index] =
- target_frame_size + (target_frame_size >> 2);
- } else if (gf_group->update_type[frame_index] == LAST_BIPRED_UPDATE) {
- // Press down the allocated bits on LAST_BIPRED_UPDATE frames
- gf_group->bit_allocation[frame_index] =
- target_frame_size - (target_frame_size >> 1);
- } else if (gf_group->update_type[frame_index] == BIPRED_UPDATE) {
- // TODO(zoeliu): To investigate whether the allocated bits on
- // BIPRED_UPDATE frames need to be further adjusted.
- gf_group->bit_allocation[frame_index] = target_frame_size;
-#if USE_SYMM_MULTI_LAYER
- } else if (cpi->new_bwdref_update_rule &&
- gf_group->update_type[frame_index] == INTNL_OVERLAY_UPDATE) {
- assert(gf_group->pyramid_height <= MAX_PYRAMID_LVL &&
- gf_group->pyramid_height >= 0 &&
- "non-valid height for a pyramid structure");
-
- int arf_pos = gf_group->arf_pos_in_gf[frame_index];
- gf_group->bit_allocation[frame_index] = 0;
-
- gf_group->bit_allocation[arf_pos] = target_frame_size;
-#if MULTI_LVL_BOOST_VBR_CQ
- const int pyr_h = gf_group->pyramid_height - 2;
- const int this_lvl = gf_group->pyramid_level[arf_pos];
- const int dist2top = gf_group->pyramid_height - 1 - this_lvl;
-
- const float_t budget =
- LEAF_REDUCTION_FACTOR * gf_group->pyramid_lvl_nodes[0];
- const float_t lvl_boost = budget * lvl_budget_factor[pyr_h][dist2top] /
- gf_group->pyramid_lvl_nodes[this_lvl];
-
- gf_group->bit_allocation[arf_pos] += (int)(target_frame_size * lvl_boost);
-#endif // MULTI_LVL_BOOST_VBR_CQ
-#endif // USE_SYMM_MULTI_LAYER
- } else {
- assert(gf_group->update_type[frame_index] == LF_UPDATE ||
- gf_group->update_type[frame_index] == INTNL_OVERLAY_UPDATE);
- gf_group->bit_allocation[frame_index] = target_frame_size;
-#if MULTI_LVL_BOOST_VBR_CQ
- if (cpi->new_bwdref_update_rule) {
- gf_group->bit_allocation[frame_index] -=
- (int)(target_frame_size * LEAF_REDUCTION_FACTOR);
- }
-#endif // MULTI_LVL_BOOST_VBR_CQ
- }
-
- ++frame_index;
-
- // Skip all the extra-ARF's.
- if (cpi->num_extra_arfs) {
- while (gf_group->update_type[frame_index] == INTNL_ARF_UPDATE)
- ++frame_index;
- }
- }
-
-#if USE_SYMM_MULTI_LAYER
- if (cpi->new_bwdref_update_rule == 0 && rc->source_alt_ref_pending) {
-#else
- if (rc->source_alt_ref_pending) {
-#endif
- if (cpi->num_extra_arfs) {
- // NOTE: For bit allocation, move the allocated bits associated with
- // INTNL_OVERLAY_UPDATE to the corresponding INTNL_ARF_UPDATE.
- // i > 0 for extra-ARF's and i == 0 for ARF:
- // arf_pos_for_ovrly[i]: Position for INTNL_OVERLAY_UPDATE
- // arf_pos_in_gf[i]: Position for INTNL_ARF_UPDATE
- for (i = cpi->num_extra_arfs; i > 0; --i) {
- assert(gf_group->update_type[cpi->arf_pos_for_ovrly[i]] ==
- INTNL_OVERLAY_UPDATE);
-
- // Encoder's choice:
- // Set show_existing_frame == 1 for all extra-ARF's, and hence
- // allocate zero bit for both all internal OVERLAY frames.
- gf_group->bit_allocation[cpi->arf_pos_in_gf[i]] =
- gf_group->bit_allocation[cpi->arf_pos_for_ovrly[i]];
- gf_group->bit_allocation[cpi->arf_pos_for_ovrly[i]] = 0;
- }
- }
- }
-}
-
-// Analyse and define a gf/arf group.
-static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
- AV1_COMMON *const cm = &cpi->common;
- RATE_CONTROL *const rc = &cpi->rc;
- AV1EncoderConfig *const oxcf = &cpi->oxcf;
- TWO_PASS *const twopass = &cpi->twopass;
- FIRSTPASS_STATS next_frame;
- const FIRSTPASS_STATS *const start_pos = twopass->stats_in;
- int i;
-
- double boost_score = 0.0;
-#if !CONFIG_FIX_GF_LENGTH
- double old_boost_score = 0.0;
- double mv_ratio_accumulator_thresh;
- int active_max_gf_interval;
- int active_min_gf_interval;
-#endif
- double gf_group_err = 0.0;
-#if GROUP_ADAPTIVE_MAXQ
- double gf_group_raw_error = 0.0;
-#endif
- double gf_group_skip_pct = 0.0;
- double gf_group_inactive_zone_rows = 0.0;
- double gf_first_frame_err = 0.0;
- double mod_frame_err = 0.0;
-
- double mv_ratio_accumulator = 0.0;
- double decay_accumulator = 1.0;
- double zero_motion_accumulator = 1.0;
-
- double loop_decay_rate = 1.00;
- double last_loop_decay_rate = 1.00;
-
- double this_frame_mv_in_out = 0.0;
- double mv_in_out_accumulator = 0.0;
- double abs_mv_in_out_accumulator = 0.0;
-
- unsigned int allow_alt_ref = is_altref_enabled(cpi);
-
- int f_boost = 0;
- int b_boost = 0;
- int flash_detected;
- int64_t gf_group_bits;
- double gf_group_error_left;
- int gf_arf_bits;
- const int is_key_frame = frame_is_intra_only(cm);
- const int arf_active_or_kf = is_key_frame || rc->source_alt_ref_active;
-
- cpi->extra_arf_allowed = 1;
-
- // Reset the GF group data structures unless this is a key
- // frame in which case it will already have been done.
- if (is_key_frame == 0) {
- av1_zero(twopass->gf_group);
- }
-
- aom_clear_system_state();
- av1_zero(next_frame);
-
- // Load stats for the current frame.
- mod_frame_err = calculate_modified_err(cpi, twopass, oxcf, this_frame);
-
- // Note the error of the frame at the start of the group. This will be
- // the GF frame error if we code a normal gf.
- gf_first_frame_err = mod_frame_err;
-
- // If this is a key frame or the overlay from a previous arf then
- // the error score / cost of this frame has already been accounted for.
- if (arf_active_or_kf) {
- gf_group_err -= gf_first_frame_err;
-#if GROUP_ADAPTIVE_MAXQ
- gf_group_raw_error -= this_frame->coded_error;
-#endif
- gf_group_skip_pct -= this_frame->intra_skip_pct;
- gf_group_inactive_zone_rows -= this_frame->inactive_zone_rows;
- }
-#if !CONFIG_FIX_GF_LENGTH
- // Motion breakout threshold for loop below depends on image size.
- mv_ratio_accumulator_thresh =
- (cpi->initial_height + cpi->initial_width) / 4.0;
- // Set a maximum and minimum interval for the GF group.
- // If the image appears almost completely static we can extend beyond this.
- {
- int int_max_q = (int)(av1_convert_qindex_to_q(
- twopass->active_worst_quality, cpi->common.seq_params.bit_depth));
- int int_lbq = (int)(av1_convert_qindex_to_q(
- rc->last_boosted_qindex, cpi->common.seq_params.bit_depth));
-
- active_min_gf_interval = rc->min_gf_interval + AOMMIN(2, int_max_q / 200);
- if (active_min_gf_interval > rc->max_gf_interval)
- active_min_gf_interval = rc->max_gf_interval;
-
- // The value chosen depends on the active Q range. At low Q we have
- // bits to spare and are better with a smaller interval and smaller boost.
- // At high Q when there are few bits to spare we are better with a longer
- // interval to spread the cost of the GF.
- active_max_gf_interval = 12 + AOMMIN(4, (int_lbq / 6));
-
- // We have: active_min_gf_interval <= rc->max_gf_interval
- if (active_max_gf_interval < active_min_gf_interval)
- active_max_gf_interval = active_min_gf_interval;
- else if (active_max_gf_interval > rc->max_gf_interval)
- active_max_gf_interval = rc->max_gf_interval;
- }
-#endif // !CONFIG_FIX_GF_LENGTH
- double avg_sr_coded_error = 0;
- double avg_raw_err_stdev = 0;
- int non_zero_stdev_count = 0;
-
- i = 0;
- while (i < rc->static_scene_max_gf_interval && i < rc->frames_to_key) {
- ++i;
-
- // Accumulate error score of frames in this gf group.
- mod_frame_err = calculate_modified_err(cpi, twopass, oxcf, this_frame);
- gf_group_err += mod_frame_err;
-#if GROUP_ADAPTIVE_MAXQ
- gf_group_raw_error += this_frame->coded_error;
-#endif
- gf_group_skip_pct += this_frame->intra_skip_pct;
- gf_group_inactive_zone_rows += this_frame->inactive_zone_rows;
-
- if (EOF == input_stats(twopass, &next_frame)) break;
-
- // Test for the case where there is a brief flash but the prediction
- // quality back to an earlier frame is then restored.
- flash_detected = detect_flash(twopass, 0);
-
- // Update the motion related elements to the boost calculation.
- accumulate_frame_motion_stats(
- &next_frame, &this_frame_mv_in_out, &mv_in_out_accumulator,
- &abs_mv_in_out_accumulator, &mv_ratio_accumulator);
- // sum up the metric values of current gf group
- avg_sr_coded_error += next_frame.sr_coded_error;
- if (fabs(next_frame.raw_error_stdev) > 0.000001) {
- non_zero_stdev_count++;
- avg_raw_err_stdev += next_frame.raw_error_stdev;
- }
-
- // Accumulate the effect of prediction quality decay.
- if (!flash_detected) {
- last_loop_decay_rate = loop_decay_rate;
- loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame);
-
- decay_accumulator = decay_accumulator * loop_decay_rate;
-
- // Monitor for static sections.
- zero_motion_accumulator = AOMMIN(
- zero_motion_accumulator, get_zero_motion_factor(cpi, &next_frame));
-
- // Break clause to detect very still sections after motion. For example,
- // a static image after a fade or other transition.
- if (detect_transition_to_still(cpi, i, 5, loop_decay_rate,
- last_loop_decay_rate)) {
- allow_alt_ref = 0;
- break;
- }
- }
-
- // Calculate a boost number for this frame.
- boost_score +=
- decay_accumulator *
- calc_frame_boost(cpi, &next_frame, this_frame_mv_in_out, GF_MAX_BOOST);
-#if CONFIG_FIX_GF_LENGTH
- if (i == (FIXED_GF_LENGTH + 1)) break;
-#else
- // Skip breaking condition for CONFIG_FIX_GF_LENGTH
- // Break out conditions.
- if (
- // Break at active_max_gf_interval unless almost totally static.
- (i >= (active_max_gf_interval + arf_active_or_kf) &&
- zero_motion_accumulator < 0.995) ||
- (
- // Don't break out with a very short interval.
- (i >= active_min_gf_interval + arf_active_or_kf) &&
- (!flash_detected) &&
- ((mv_ratio_accumulator > mv_ratio_accumulator_thresh) ||
- (abs_mv_in_out_accumulator > 3.0) ||
- (mv_in_out_accumulator < -2.0) ||
- ((boost_score - old_boost_score) < BOOST_BREAKOUT)))) {
- // If GF group interval is < 12, we force it to be 8. Otherwise,
- // if it is >= 12, we keep it as is.
- // NOTE: 'i' is 1 more than the GF group interval candidate that is being
- // checked.
- if (i == (8 + 1) || i >= (12 + 1)) {
- boost_score = old_boost_score;
- break;
- }
- }
- old_boost_score = boost_score;
-#endif // CONFIG_FIX_GF_LENGTH
- *this_frame = next_frame;
- }
- twopass->gf_zeromotion_pct = (int)(zero_motion_accumulator * 1000.0);
-
- // Was the group length constrained by the requirement for a new KF?
- rc->constrained_gf_group = (i >= rc->frames_to_key) ? 1 : 0;
-
- const int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE) ? cpi->initial_mbs
- : cpi->common.MBs;
- assert(num_mbs > 0);
- if (i) avg_sr_coded_error /= i;
-
- if (non_zero_stdev_count) avg_raw_err_stdev /= non_zero_stdev_count;
-
- // Disable extra altrefs and backward refs for "still" gf group:
- // zero_motion_accumulator: minimum percentage of (0,0) motion;
- // avg_sr_coded_error: average of the SSE per pixel of each frame;
- // avg_raw_err_stdev: average of the standard deviation of (0,0)
- // motion error per block of each frame.
- const int disable_bwd_extarf =
- (zero_motion_accumulator > MIN_ZERO_MOTION &&
- avg_sr_coded_error / num_mbs < MAX_SR_CODED_ERROR &&
- avg_raw_err_stdev < MAX_RAW_ERR_VAR);
-
- if (disable_bwd_extarf) cpi->extra_arf_allowed = 0;
-
-#define REDUCE_GF_LENGTH_THRESH 4
-#define REDUCE_GF_LENGTH_TO_KEY_THRESH 9
-#define REDUCE_GF_LENGTH_BY 1
- int alt_offset = 0;
-#if REDUCE_LAST_GF_LENGTH
- // TODO(weitinglin): The length reduction stretagy is tweaking using AOM_Q
- // mode, and hurting the performance of VBR mode. We need to investigate how
- // to adjust GF length for other modes.
-
- int allow_gf_length_reduction =
- cpi->oxcf.rc_mode == AOM_Q || cpi->extra_arf_allowed == 0;
-
- // We are going to have an alt ref, but we don't have do adjustment for
- // lossless mode
- if (allow_alt_ref && allow_gf_length_reduction &&
- (i < cpi->oxcf.lag_in_frames) && (i >= rc->min_gf_interval) &&
- !is_lossless_requested(&cpi->oxcf)) {
- // adjust length of this gf group if one of the following condition met
- // 1: only one overlay frame left and this gf is too long
- // 2: next gf group is too short to have arf compared to the current gf
-
- // maximum length of next gf group
- const int next_gf_len = rc->frames_to_key - i;
- const int single_overlay_left =
- next_gf_len == 0 && i > REDUCE_GF_LENGTH_THRESH;
- // the next gf is probably going to have a ARF but it will be shorter than
- // this gf
- const int unbalanced_gf =
- i > REDUCE_GF_LENGTH_TO_KEY_THRESH &&
- next_gf_len + 1 < REDUCE_GF_LENGTH_TO_KEY_THRESH &&
- next_gf_len + 1 >= rc->min_gf_interval;
-
- if (single_overlay_left || unbalanced_gf) {
- // Note: Tried roll_back = DIVIDE_AND_ROUND(i, 8), but is does not work
- // better in the current setting
- const int roll_back = REDUCE_GF_LENGTH_BY;
- alt_offset = -roll_back;
- i -= roll_back;
- }
- }
-#endif
-
- // Should we use the alternate reference frame.
- if (allow_alt_ref && (i < cpi->oxcf.lag_in_frames) &&
- (i >= rc->min_gf_interval)) {
- // Calculate the boost for alt ref.
- rc->gfu_boost =
- calc_arf_boost(cpi, alt_offset, (i - 1), (i - 1), &f_boost, &b_boost);
- rc->source_alt_ref_pending = 1;
-
- // do not replace ARFs with overlay frames, and keep it as GOLDEN_REF
- cpi->preserve_arf_as_gld = 1;
- } else {
- rc->gfu_boost = AOMMAX((int)boost_score, MIN_ARF_GF_BOOST);
- rc->source_alt_ref_pending = 0;
- cpi->preserve_arf_as_gld = 0;
- }
-
- // Set the interval until the next gf.
- // If forward keyframes are enabled, ensure the final gf group obeys the
- // MIN_FWD_KF_INTERVAL.
- if (cpi->oxcf.fwd_kf_enabled &&
- ((twopass->stats_in - i + rc->frames_to_key) < twopass->stats_in_end)) {
- if (i == rc->frames_to_key) {
- rc->baseline_gf_interval = i;
- // if the last gf group will be smaller than MIN_FWD_KF_INTERVAL
- } else if ((rc->frames_to_key - i <
- AOMMAX(MIN_FWD_KF_INTERVAL, rc->min_gf_interval)) &&
- (rc->frames_to_key != i)) {
- // if possible, merge the last two gf groups
- if (rc->frames_to_key <= MAX_PYRAMID_SIZE) {
- rc->baseline_gf_interval = rc->frames_to_key;
- // if merging the last two gf groups creates a group that is too long,
- // split them and force the last gf group to be the MIN_FWD_KF_INTERVAL
- } else {
- rc->baseline_gf_interval = rc->frames_to_key - MIN_FWD_KF_INTERVAL;
- }
- } else {
- rc->baseline_gf_interval =
- i - (is_key_frame || rc->source_alt_ref_pending);
- }
- } else {
- rc->baseline_gf_interval = i - (is_key_frame || rc->source_alt_ref_pending);
- }
-
-#if REDUCE_LAST_ALT_BOOST
-#define LAST_ALR_BOOST_FACTOR 0.2f
- rc->arf_boost_factor = 1.0;
- if (rc->source_alt_ref_pending && !is_lossless_requested(&cpi->oxcf)) {
- // Reduce the boost of altref in the last gf group
- if (rc->frames_to_key - i == REDUCE_GF_LENGTH_BY ||
- rc->frames_to_key - i == 0) {
- rc->arf_boost_factor = LAST_ALR_BOOST_FACTOR;
- }
- }
-#endif
-
- if (!cpi->extra_arf_allowed) {
- cpi->num_extra_arfs = 0;
- } else {
-#if USE_SYMM_MULTI_LAYER
- if (rc->baseline_gf_interval == 4 && rc->source_alt_ref_pending)
- cpi->num_extra_arfs = 1;
- else
- cpi->num_extra_arfs = get_number_of_extra_arfs(
- rc->baseline_gf_interval, rc->source_alt_ref_pending);
-#else
- // Compute how many extra alt_refs we can have
- cpi->num_extra_arfs = get_number_of_extra_arfs(rc->baseline_gf_interval,
- rc->source_alt_ref_pending);
-#endif // USE_SYMM_MULTI_LAYER
- }
-
-#if !USE_SYMM_MULTI_LAYER
- // Currently at maximum two extra ARFs' are allowed
- assert(cpi->num_extra_arfs <= MAX_EXT_ARFS);
-#endif
-
- rc->frames_till_gf_update_due = rc->baseline_gf_interval;
-
- rc->bipred_group_interval = BFG_INTERVAL;
- // The minimum bi-predictive frame group interval is 2.
- if (rc->bipred_group_interval < 2) rc->bipred_group_interval = 0;
-
- // Reset the file position.
- reset_fpf_position(twopass, start_pos);
-
- // Calculate the bits to be allocated to the gf/arf group as a whole
- gf_group_bits = calculate_total_gf_group_bits(cpi, gf_group_err);
-
-#if GROUP_ADAPTIVE_MAXQ
- // Calculate an estimate of the maxq needed for the group.
- // We are more agressive about correcting for sections
- // where there could be significant overshoot than for easier
- // sections where we do not wish to risk creating an overshoot
- // of the allocated bit budget.
- if ((cpi->oxcf.rc_mode != AOM_Q) && (rc->baseline_gf_interval > 1)) {
- const int vbr_group_bits_per_frame =
- (int)(gf_group_bits / rc->baseline_gf_interval);
- const double group_av_err = gf_group_raw_error / rc->baseline_gf_interval;
- const double group_av_skip_pct =
- gf_group_skip_pct / rc->baseline_gf_interval;
- const double group_av_inactive_zone =
- ((gf_group_inactive_zone_rows * 2) /
- (rc->baseline_gf_interval * (double)cm->mb_rows));
-
- int tmp_q;
- // rc factor is a weight factor that corrects for local rate control drift.
- double rc_factor = 1.0;
- if (rc->rate_error_estimate > 0) {
- rc_factor = AOMMAX(RC_FACTOR_MIN,
- (double)(100 - rc->rate_error_estimate) / 100.0);
- } else {
- rc_factor = AOMMIN(RC_FACTOR_MAX,
- (double)(100 - rc->rate_error_estimate) / 100.0);
- }
- tmp_q = get_twopass_worst_quality(
- cpi, group_av_err, (group_av_skip_pct + group_av_inactive_zone),
- vbr_group_bits_per_frame, twopass->kfgroup_inter_fraction * rc_factor);
- twopass->active_worst_quality =
- AOMMAX(tmp_q, twopass->active_worst_quality >> 1);
- }
-#endif
-
- // Calculate the extra bits to be used for boosted frame(s)
- gf_arf_bits = calculate_boost_bits(rc->baseline_gf_interval, rc->gfu_boost,
- gf_group_bits);
-
- // Adjust KF group bits and error remaining.
- twopass->kf_group_error_left -= (int64_t)gf_group_err;
-
- // If this is an arf update we want to remove the score for the overlay
- // frame at the end which will usually be very cheap to code.
- // The overlay frame has already, in effect, been coded so we want to spread
- // the remaining bits among the other frames.
- // For normal GFs remove the score for the GF itself unless this is
- // also a key frame in which case it has already been accounted for.
- if (rc->source_alt_ref_pending) {
- gf_group_error_left = gf_group_err - mod_frame_err;
- } else if (is_key_frame == 0) {
- gf_group_error_left = gf_group_err - gf_first_frame_err;
- } else {
- gf_group_error_left = gf_group_err;
- }
-
- // Allocate bits to each of the frames in the GF group.
- allocate_gf_group_bits(cpi, gf_group_bits, gf_group_error_left, gf_arf_bits);
-
- // Reset the file position.
- reset_fpf_position(twopass, start_pos);
-
- // Calculate a section intra ratio used in setting max loop filter.
- if (cpi->common.frame_type != KEY_FRAME) {
- twopass->section_intra_rating = calculate_section_intra_ratio(
- start_pos, twopass->stats_in_end, rc->baseline_gf_interval);
- }
-}
-
-// Threshold for use of the lagging second reference frame. High second ref
-// usage may point to a transient event like a flash or occlusion rather than
-// a real scene cut.
-#define SECOND_REF_USEAGE_THRESH 0.1
-// Minimum % intra coding observed in first pass (1.0 = 100%)
-#define MIN_INTRA_LEVEL 0.25
-// Minimum ratio between the % of intra coding and inter coding in the first
-// pass after discounting neutral blocks (discounting neutral blocks in this
-// way helps catch scene cuts in clips with very flat areas or letter box
-// format clips with image padding.
-#define INTRA_VS_INTER_THRESH 2.0
-// Hard threshold where the first pass chooses intra for almost all blocks.
-// In such a case even if the frame is not a scene cut coding a key frame
-// may be a good option.
-#define VERY_LOW_INTER_THRESH 0.05
-// Maximum threshold for the relative ratio of intra error score vs best
-// inter error score.
-#define KF_II_ERR_THRESHOLD 2.5
-// In real scene cuts there is almost always a sharp change in the intra
-// or inter error score.
-#define ERR_CHANGE_THRESHOLD 0.4
-// For real scene cuts we expect an improvment in the intra inter error
-// ratio in the next frame.
-#define II_IMPROVEMENT_THRESHOLD 3.5
-#define KF_II_MAX 128.0
-
-static int test_candidate_kf(TWO_PASS *twopass,
- const FIRSTPASS_STATS *last_frame,
- const FIRSTPASS_STATS *this_frame,
- const FIRSTPASS_STATS *next_frame) {
- int is_viable_kf = 0;
- double pcnt_intra = 1.0 - this_frame->pcnt_inter;
- double modified_pcnt_inter =
- this_frame->pcnt_inter - this_frame->pcnt_neutral;
-
- // Does the frame satisfy the primary criteria of a key frame?
- // See above for an explanation of the test criteria.
- // If so, then examine how well it predicts subsequent frames.
- if ((this_frame->pcnt_second_ref < SECOND_REF_USEAGE_THRESH) &&
- (next_frame->pcnt_second_ref < SECOND_REF_USEAGE_THRESH) &&
- ((this_frame->pcnt_inter < VERY_LOW_INTER_THRESH) ||
- ((pcnt_intra > MIN_INTRA_LEVEL) &&
- (pcnt_intra > (INTRA_VS_INTER_THRESH * modified_pcnt_inter)) &&
- ((this_frame->intra_error /
- DOUBLE_DIVIDE_CHECK(this_frame->coded_error)) <
- KF_II_ERR_THRESHOLD) &&
- ((fabs(last_frame->coded_error - this_frame->coded_error) /
- DOUBLE_DIVIDE_CHECK(this_frame->coded_error) >
- ERR_CHANGE_THRESHOLD) ||
- (fabs(last_frame->intra_error - this_frame->intra_error) /
- DOUBLE_DIVIDE_CHECK(this_frame->intra_error) >
- ERR_CHANGE_THRESHOLD) ||
- ((next_frame->intra_error /
- DOUBLE_DIVIDE_CHECK(next_frame->coded_error)) >
- II_IMPROVEMENT_THRESHOLD))))) {
- int i;
- const FIRSTPASS_STATS *start_pos = twopass->stats_in;
- FIRSTPASS_STATS local_next_frame = *next_frame;
- double boost_score = 0.0;
- double old_boost_score = 0.0;
- double decay_accumulator = 1.0;
-
- // Examine how well the key frame predicts subsequent frames.
- for (i = 0; i < 16; ++i) {
- double next_iiratio = (BOOST_FACTOR * local_next_frame.intra_error /
- DOUBLE_DIVIDE_CHECK(local_next_frame.coded_error));
-
- if (next_iiratio > KF_II_MAX) next_iiratio = KF_II_MAX;
-
- // Cumulative effect of decay in prediction quality.
- if (local_next_frame.pcnt_inter > 0.85)
- decay_accumulator *= local_next_frame.pcnt_inter;
- else
- decay_accumulator *= (0.85 + local_next_frame.pcnt_inter) / 2.0;
-
- // Keep a running total.
- boost_score += (decay_accumulator * next_iiratio);
-
- // Test various breakout clauses.
- if ((local_next_frame.pcnt_inter < 0.05) || (next_iiratio < 1.5) ||
- (((local_next_frame.pcnt_inter - local_next_frame.pcnt_neutral) <
- 0.20) &&
- (next_iiratio < 3.0)) ||
- ((boost_score - old_boost_score) < 3.0) ||
- (local_next_frame.intra_error < 200)) {
- break;
- }
-
- old_boost_score = boost_score;
-
- // Get the next frame details
- if (EOF == input_stats(twopass, &local_next_frame)) break;
- }
-
- // If there is tolerable prediction for at least the next 3 frames then
- // break out else discard this potential key frame and move on
- if (boost_score > 30.0 && (i > 3)) {
- is_viable_kf = 1;
- } else {
- // Reset the file position
- reset_fpf_position(twopass, start_pos);
-
- is_viable_kf = 0;
- }
- }
-
- return is_viable_kf;
-}
-
-#define FRAMES_TO_CHECK_DECAY 8
-
-static void find_next_key_frame(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
- int i, j;
- RATE_CONTROL *const rc = &cpi->rc;
- TWO_PASS *const twopass = &cpi->twopass;
- GF_GROUP *const gf_group = &twopass->gf_group;
- const AV1EncoderConfig *const oxcf = &cpi->oxcf;
- const FIRSTPASS_STATS first_frame = *this_frame;
- const FIRSTPASS_STATS *const start_position = twopass->stats_in;
- FIRSTPASS_STATS next_frame;
- FIRSTPASS_STATS last_frame;
- int kf_bits = 0;
- int loop_decay_counter = 0;
- double decay_accumulator = 1.0;
- double av_decay_accumulator = 0.0;
- double zero_motion_accumulator = 1.0;
- double boost_score = 0.0;
- double kf_mod_err = 0.0;
- double kf_group_err = 0.0;
- double recent_loop_decay[FRAMES_TO_CHECK_DECAY];
-
- av1_zero(next_frame);
-
- cpi->common.frame_type = KEY_FRAME;
-
- // Reset the GF group data structures.
- av1_zero(*gf_group);
-
- // Is this a forced key frame by interval.
- rc->this_key_frame_forced = rc->next_key_frame_forced;
-
- // Clear the alt ref active flag and last group multi arf flags as they
- // can never be set for a key frame.
- rc->source_alt_ref_active = 0;
-
- // KF is always a GF so clear frames till next gf counter.
- rc->frames_till_gf_update_due = 0;
-
- rc->frames_to_key = 1;
-
- twopass->kf_group_bits = 0; // Total bits available to kf group
- twopass->kf_group_error_left = 0; // Group modified error score.
-
- kf_mod_err = calculate_modified_err(cpi, twopass, oxcf, this_frame);
-
- // Initialize the decay rates for the recent frames to check
- for (j = 0; j < FRAMES_TO_CHECK_DECAY; ++j) recent_loop_decay[j] = 1.0;
-
- // Find the next keyframe.
- i = 0;
- while (twopass->stats_in < twopass->stats_in_end &&
- rc->frames_to_key < cpi->oxcf.key_freq) {
- // Accumulate kf group error.
- kf_group_err += calculate_modified_err(cpi, twopass, oxcf, this_frame);
-
- // Load the next frame's stats.
- last_frame = *this_frame;
- input_stats(twopass, this_frame);
-
- // Provided that we are not at the end of the file...
- if (cpi->oxcf.auto_key && twopass->stats_in < twopass->stats_in_end) {
- double loop_decay_rate;
-
- // Check for a scene cut.
- if (test_candidate_kf(twopass, &last_frame, this_frame,
- twopass->stats_in))
- break;
-
- // How fast is the prediction quality decaying?
- loop_decay_rate = get_prediction_decay_rate(cpi, twopass->stats_in);
-
- // We want to know something about the recent past... rather than
- // as used elsewhere where we are concerned with decay in prediction
- // quality since the last GF or KF.
- recent_loop_decay[i % FRAMES_TO_CHECK_DECAY] = loop_decay_rate;
- decay_accumulator = 1.0;
- for (j = 0; j < FRAMES_TO_CHECK_DECAY; ++j)
- decay_accumulator *= recent_loop_decay[j];
-
- // Special check for transition or high motion followed by a
- // static scene.
- if (detect_transition_to_still(cpi, i, cpi->oxcf.key_freq - i,
- loop_decay_rate, decay_accumulator))
- break;
-
- // Step on to the next frame.
- ++rc->frames_to_key;
-
- // If we don't have a real key frame within the next two
- // key_freq intervals then break out of the loop.
- if (rc->frames_to_key >= 2 * cpi->oxcf.key_freq) break;
- } else {
- ++rc->frames_to_key;
- }
- ++i;
- }
-
- // If there is a max kf interval set by the user we must obey it.
- // We already breakout of the loop above at 2x max.
- // This code centers the extra kf if the actual natural interval
- // is between 1x and 2x.
- if (cpi->oxcf.auto_key && rc->frames_to_key > cpi->oxcf.key_freq) {
- FIRSTPASS_STATS tmp_frame = first_frame;
-
- rc->frames_to_key /= 2;
-
- // Reset to the start of the group.
- reset_fpf_position(twopass, start_position);
-
- kf_group_err = 0.0;
-
- // Rescan to get the correct error data for the forced kf group.
- for (i = 0; i < rc->frames_to_key; ++i) {
- kf_group_err += calculate_modified_err(cpi, twopass, oxcf, &tmp_frame);
- input_stats(twopass, &tmp_frame);
- }
- rc->next_key_frame_forced = 1;
- } else if (twopass->stats_in == twopass->stats_in_end ||
- rc->frames_to_key >= cpi->oxcf.key_freq) {
- rc->next_key_frame_forced = 1;
- } else {
- rc->next_key_frame_forced = 0;
- }
-
- // Special case for the last key frame of the file.
- if (twopass->stats_in >= twopass->stats_in_end) {
- // Accumulate kf group error.
- kf_group_err += calculate_modified_err(cpi, twopass, oxcf, this_frame);
- }
-
- // Calculate the number of bits that should be assigned to the kf group.
- if (twopass->bits_left > 0 && twopass->modified_error_left > 0.0) {
- // Maximum number of bits for a single normal frame (not key frame).
- const int max_bits = frame_max_bits(rc, &cpi->oxcf);
-
- // Maximum number of bits allocated to the key frame group.
- int64_t max_grp_bits;
-
- // Default allocation based on bits left and relative
- // complexity of the section.
- twopass->kf_group_bits = (int64_t)(
- twopass->bits_left * (kf_group_err / twopass->modified_error_left));
-
- // Clip based on maximum per frame rate defined by the user.
- max_grp_bits = (int64_t)max_bits * (int64_t)rc->frames_to_key;
- if (twopass->kf_group_bits > max_grp_bits)
- twopass->kf_group_bits = max_grp_bits;
- } else {
- twopass->kf_group_bits = 0;
- }
- twopass->kf_group_bits = AOMMAX(0, twopass->kf_group_bits);
-
- // Reset the first pass file position.
- reset_fpf_position(twopass, start_position);
-
- // Scan through the kf group collating various stats used to determine
- // how many bits to spend on it.
- decay_accumulator = 1.0;
- boost_score = 0.0;
- const double kf_max_boost =
- cpi->oxcf.rc_mode == AOM_Q
- ? AOMMIN(AOMMAX(rc->frames_to_key * 2.0, KF_MIN_FRAME_BOOST),
- KF_MAX_FRAME_BOOST)
- : KF_MAX_FRAME_BOOST;
- for (i = 0; i < (rc->frames_to_key - 1); ++i) {
- if (EOF == input_stats(twopass, &next_frame)) break;
-
- // Monitor for static sections.
- zero_motion_accumulator = AOMMIN(zero_motion_accumulator,
- get_zero_motion_factor(cpi, &next_frame));
-
- // Not all frames in the group are necessarily used in calculating boost.
- if ((i <= rc->max_gf_interval) ||
- ((i <= (rc->max_gf_interval * 4)) && (decay_accumulator > 0.5))) {
- const double frame_boost =
- calc_frame_boost(cpi, this_frame, 0, kf_max_boost);
-
- // How fast is prediction quality decaying.
- if (!detect_flash(twopass, 0)) {
- const double loop_decay_rate =
- get_prediction_decay_rate(cpi, &next_frame);
- decay_accumulator *= loop_decay_rate;
- decay_accumulator = AOMMAX(decay_accumulator, MIN_DECAY_FACTOR);
- av_decay_accumulator += decay_accumulator;
- ++loop_decay_counter;
- }
- boost_score += (decay_accumulator * frame_boost);
- }
- }
- if (loop_decay_counter > 0)
- av_decay_accumulator /= (double)loop_decay_counter;
-
- reset_fpf_position(twopass, start_position);
-
- // Store the zero motion percentage
- twopass->kf_zeromotion_pct = (int)(zero_motion_accumulator * 100.0);
-
- // Calculate a section intra ratio used in setting max loop filter.
- twopass->section_intra_rating = calculate_section_intra_ratio(
- start_position, twopass->stats_in_end, rc->frames_to_key);
-
- // Apply various clamps for min and max boost
- rc->kf_boost = (int)(av_decay_accumulator * boost_score);
- rc->kf_boost = AOMMAX(rc->kf_boost, (rc->frames_to_key * 3));
- rc->kf_boost = AOMMAX(rc->kf_boost, MIN_KF_BOOST);
-
- // Work out how many bits to allocate for the key frame itself.
- kf_bits = calculate_boost_bits((rc->frames_to_key - 1), rc->kf_boost,
- twopass->kf_group_bits);
- // printf("kf boost = %d kf_bits = %d kf_zeromotion_pct = %d\n", rc->kf_boost,
- // kf_bits, twopass->kf_zeromotion_pct);
-
- // Work out the fraction of the kf group bits reserved for the inter frames
- // within the group after discounting the bits for the kf itself.
- if (twopass->kf_group_bits) {
- twopass->kfgroup_inter_fraction =
- (double)(twopass->kf_group_bits - kf_bits) /
- (double)twopass->kf_group_bits;
- } else {
- twopass->kfgroup_inter_fraction = 1.0;
- }
-
- twopass->kf_group_bits -= kf_bits;
-
- // Save the bits to spend on the key frame.
- gf_group->bit_allocation[0] = kf_bits;
- gf_group->update_type[0] = KF_UPDATE;
- gf_group->rf_level[0] = KF_STD;
-
- // Note the total error score of the kf group minus the key frame itself.
- twopass->kf_group_error_left = (int)(kf_group_err - kf_mod_err);
-
- // Adjust the count of total modified error left.
- // The count of bits left is adjusted elsewhere based on real coded frame
- // sizes.
- twopass->modified_error_left -= kf_group_err;
-}
-
-// Define the reference buffers that will be updated post encode.
-static void configure_buffer_updates(AV1_COMP *cpi) {
- TWO_PASS *const twopass = &cpi->twopass;
-
- // NOTE(weitinglin): Should we define another function to take care of
- // cpi->rc.is_$Source_Type to make this function as it is in the comment?
-
- cpi->rc.is_src_frame_alt_ref = 0;
- cpi->rc.is_bwd_ref_frame = 0;
- cpi->rc.is_last_bipred_frame = 0;
- cpi->rc.is_bipred_frame = 0;
- cpi->rc.is_src_frame_ext_arf = 0;
-
- switch (twopass->gf_group.update_type[twopass->gf_group.index]) {
- case KF_UPDATE:
- cpi->refresh_last_frame = 1;
- cpi->refresh_golden_frame = 1;
- cpi->refresh_bwd_ref_frame = 1;
- cpi->refresh_alt2_ref_frame = 1;
- cpi->refresh_alt_ref_frame = 1;
- break;
-
- case LF_UPDATE:
- cpi->refresh_last_frame = 1;
- cpi->refresh_golden_frame = 0;
- cpi->refresh_bwd_ref_frame = 0;
- cpi->refresh_alt2_ref_frame = 0;
- cpi->refresh_alt_ref_frame = 0;
- break;
-
- case GF_UPDATE:
- // TODO(zoeliu): To further investigate whether 'refresh_last_frame' is
- // needed.
- cpi->refresh_last_frame = 1;
- cpi->refresh_golden_frame = 1;
- cpi->refresh_bwd_ref_frame = 0;
- cpi->refresh_alt2_ref_frame = 0;
- cpi->refresh_alt_ref_frame = 0;
- break;
-
- case OVERLAY_UPDATE:
- cpi->refresh_last_frame = 0;
- cpi->refresh_golden_frame = 1;
- cpi->refresh_bwd_ref_frame = 0;
- cpi->refresh_alt2_ref_frame = 0;
- cpi->refresh_alt_ref_frame = 0;
-
- cpi->rc.is_src_frame_alt_ref = 1;
- break;
-
- case ARF_UPDATE:
- cpi->refresh_last_frame = 0;
- cpi->refresh_golden_frame = 0;
- // NOTE: BWDREF does not get updated along with ALTREF_FRAME.
- cpi->refresh_bwd_ref_frame = 0;
- cpi->refresh_alt2_ref_frame = 0;
- cpi->refresh_alt_ref_frame = 1;
- break;
-
- case BRF_UPDATE:
- cpi->refresh_last_frame = 0;
- cpi->refresh_golden_frame = 0;
- cpi->refresh_bwd_ref_frame = 1;
- cpi->refresh_alt2_ref_frame = 0;
- cpi->refresh_alt_ref_frame = 0;
-
- cpi->rc.is_bwd_ref_frame = 1;
- break;
-
- case LAST_BIPRED_UPDATE:
- cpi->refresh_last_frame = 1;
- cpi->refresh_golden_frame = 0;
- cpi->refresh_bwd_ref_frame = 0;
- cpi->refresh_alt2_ref_frame = 0;
- cpi->refresh_alt_ref_frame = 0;
-
- cpi->rc.is_last_bipred_frame = 1;
- break;
-
- case BIPRED_UPDATE:
- cpi->refresh_last_frame = 1;
- cpi->refresh_golden_frame = 0;
- cpi->refresh_bwd_ref_frame = 0;
- cpi->refresh_alt2_ref_frame = 0;
- cpi->refresh_alt_ref_frame = 0;
-
- cpi->rc.is_bipred_frame = 1;
- break;
-
- case INTNL_OVERLAY_UPDATE:
- cpi->refresh_last_frame = 1;
- cpi->refresh_golden_frame = 0;
- cpi->refresh_bwd_ref_frame = 0;
- cpi->refresh_alt2_ref_frame = 0;
- cpi->refresh_alt_ref_frame = 0;
-
- cpi->rc.is_src_frame_alt_ref = 1;
- cpi->rc.is_src_frame_ext_arf = 1;
- break;
-
- case INTNL_ARF_UPDATE:
- cpi->refresh_last_frame = 0;
- cpi->refresh_golden_frame = 0;
-#if USE_SYMM_MULTI_LAYER
- if (cpi->new_bwdref_update_rule == 1) {
- cpi->refresh_bwd_ref_frame = 1;
- cpi->refresh_alt2_ref_frame = 0;
- } else {
-#endif
- cpi->refresh_bwd_ref_frame = 0;
- cpi->refresh_alt2_ref_frame = 1;
-#if USE_SYMM_MULTI_LAYER
- }
-#endif
- cpi->refresh_alt_ref_frame = 0;
- break;
-
- default: assert(0); break;
- }
-}
-
-void av1_configure_buffer_updates_firstpass(AV1_COMP *cpi,
- FRAME_UPDATE_TYPE update_type) {
- RATE_CONTROL *rc = &cpi->rc;
-
- cpi->refresh_last_frame = 1;
- cpi->refresh_golden_frame = 0;
- cpi->refresh_bwd_ref_frame = 0;
- cpi->refresh_alt2_ref_frame = 0;
- cpi->refresh_alt_ref_frame = 0;
-
- rc->is_bwd_ref_frame = 0;
-
- switch (update_type) {
- case ARF_UPDATE:
- cpi->refresh_alt_ref_frame = 1;
- cpi->refresh_last_frame = 0;
- cpi->refresh_golden_frame = 0;
- cpi->refresh_bwd_ref_frame = 0;
- cpi->refresh_alt2_ref_frame = 0;
-
- rc->is_src_frame_alt_ref = 0;
- break;
- case INTNL_ARF_UPDATE:
- cpi->refresh_alt2_ref_frame = 1;
- cpi->refresh_last_frame = 0;
- cpi->refresh_golden_frame = 0;
- cpi->refresh_bwd_ref_frame = 0;
- cpi->refresh_alt_ref_frame = 0;
- rc->is_src_frame_alt_ref = 0;
- rc->is_src_frame_ext_arf = 0;
-
- break;
- case BIPRED_UPDATE:
- cpi->refresh_bwd_ref_frame = 1;
- cpi->refresh_last_frame = 0;
- cpi->refresh_golden_frame = 0;
- cpi->refresh_alt2_ref_frame = 0;
- cpi->refresh_alt_ref_frame = 0;
-
- rc->is_bwd_ref_frame = 1;
- break;
- default: break;
- }
-}
-
-static int is_skippable_frame(const AV1_COMP *cpi) {
- // If the current frame does not have non-zero motion vector detected in the
- // first pass, and so do its previous and forward frames, then this frame
- // can be skipped for partition check, and the partition size is assigned
- // according to the variance
- const TWO_PASS *const twopass = &cpi->twopass;
-
- return (!frame_is_intra_only(&cpi->common) &&
- twopass->stats_in - 2 > twopass->stats_in_start &&
- twopass->stats_in < twopass->stats_in_end &&
- (twopass->stats_in - 1)->pcnt_inter -
- (twopass->stats_in - 1)->pcnt_motion ==
- 1 &&
- (twopass->stats_in - 2)->pcnt_inter -
- (twopass->stats_in - 2)->pcnt_motion ==
- 1 &&
- twopass->stats_in->pcnt_inter - twopass->stats_in->pcnt_motion == 1);
-}
-
-void av1_rc_get_second_pass_params(AV1_COMP *cpi) {
- AV1_COMMON *const cm = &cpi->common;
- RATE_CONTROL *const rc = &cpi->rc;
- TWO_PASS *const twopass = &cpi->twopass;
- GF_GROUP *const gf_group = &twopass->gf_group;
- int frames_left;
- FIRSTPASS_STATS this_frame;
-
- int target_rate;
-
- frames_left = (int)(twopass->total_stats.count - cm->current_video_frame);
-
- if (!twopass->stats_in) return;
-
- // If this is an arf frame then we dont want to read the stats file or
- // advance the input pointer as we already have what we need.
- if (gf_group->update_type[gf_group->index] == ARF_UPDATE ||
- gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE) {
- configure_buffer_updates(cpi);
- target_rate = gf_group->bit_allocation[gf_group->index];
- target_rate = av1_rc_clamp_pframe_target_size(cpi, target_rate);
- rc->base_frame_target = target_rate;
-
- if (cpi->no_show_kf) {
- assert(gf_group->update_type[gf_group->index] == ARF_UPDATE);
- cm->frame_type = KEY_FRAME;
- } else {
- cm->frame_type = INTER_FRAME;
- }
-
- // Do the firstpass stats indicate that this frame is skippable for the
- // partition search?
- if (cpi->sf.allow_partition_search_skip && cpi->oxcf.pass == 2) {
- cpi->partition_search_skippable_frame = is_skippable_frame(cpi);
- }
-
- return;
- }
-
- aom_clear_system_state();
-
- if (cpi->oxcf.rc_mode == AOM_Q) {
- twopass->active_worst_quality = cpi->oxcf.cq_level;
- } else if (cm->current_video_frame == 0) {
- // Special case code for first frame.
- const int section_target_bandwidth =
- (int)(twopass->bits_left / frames_left);
- const double section_length = twopass->total_left_stats.count;
- const double section_error =
- twopass->total_left_stats.coded_error / section_length;
- const double section_intra_skip =
- twopass->total_left_stats.intra_skip_pct / section_length;
- const double section_inactive_zone =
- (twopass->total_left_stats.inactive_zone_rows * 2) /
- ((double)cm->mb_rows * section_length);
- const int tmp_q = get_twopass_worst_quality(
- cpi, section_error, section_intra_skip + section_inactive_zone,
- section_target_bandwidth, DEFAULT_GRP_WEIGHT);
-
- twopass->active_worst_quality = tmp_q;
- twopass->baseline_active_worst_quality = tmp_q;
- rc->ni_av_qi = tmp_q;
- rc->last_q[INTER_FRAME] = tmp_q;
- rc->avg_q = av1_convert_qindex_to_q(tmp_q, cm->seq_params.bit_depth);
- rc->avg_frame_qindex[INTER_FRAME] = tmp_q;
- rc->last_q[KEY_FRAME] = (tmp_q + cpi->oxcf.best_allowed_q) / 2;
- rc->avg_frame_qindex[KEY_FRAME] = rc->last_q[KEY_FRAME];
- }
-
- av1_zero(this_frame);
- if (EOF == input_stats(twopass, &this_frame)) return;
-
- // Set the frame content type flag.
- if (this_frame.intra_skip_pct >= FC_ANIMATION_THRESH)
- twopass->fr_content_type = FC_GRAPHICS_ANIMATION;
- else
- twopass->fr_content_type = FC_NORMAL;
-
- // Keyframe and section processing.
- if (rc->frames_to_key == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY)) {
- FIRSTPASS_STATS this_frame_copy;
- this_frame_copy = this_frame;
- // Define next KF group and assign bits to it.
- find_next_key_frame(cpi, &this_frame);
- this_frame = this_frame_copy;
- } else {
- cm->frame_type = INTER_FRAME;
- }
-
- // Define a new GF/ARF group. (Should always enter here for key frames).
- if (rc->frames_till_gf_update_due == 0) {
- define_gf_group(cpi, &this_frame);
-
- rc->frames_till_gf_update_due = rc->baseline_gf_interval;
-
-#if ARF_STATS_OUTPUT
- {
- FILE *fpfile;
- fpfile = fopen("arf.stt", "a");
- ++arf_count;
- fprintf(fpfile, "%10d %10d %10d %10d %10d\n", cm->current_video_frame,
- rc->frames_till_gf_update_due, rc->kf_boost, arf_count,
- rc->gfu_boost);
-
- fclose(fpfile);
- }
-#endif
- }
-
- configure_buffer_updates(cpi);
-
- // Do the firstpass stats indicate that this frame is skippable for the
- // partition search?
- if (cpi->sf.allow_partition_search_skip && cpi->oxcf.pass == 2) {
- cpi->partition_search_skippable_frame = is_skippable_frame(cpi);
- }
-
- target_rate = gf_group->bit_allocation[gf_group->index];
-
- if (cpi->common.frame_type == KEY_FRAME)
- target_rate = av1_rc_clamp_iframe_target_size(cpi, target_rate);
- else
- target_rate = av1_rc_clamp_pframe_target_size(cpi, target_rate);
-
- rc->base_frame_target = target_rate;
-
- {
- const int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE)
- ? cpi->initial_mbs
- : cpi->common.MBs;
- // The multiplication by 256 reverses a scaling factor of (>> 8)
- // applied when combining MB error values for the frame.
- twopass->mb_av_energy = log((this_frame.intra_error / num_mbs) + 1.0);
- twopass->frame_avg_haar_energy =
- log((this_frame.frame_avg_wavelet_energy / num_mbs) + 1.0);
- }
-
- // Update the total stats remaining structure.
- subtract_stats(&twopass->total_left_stats, &this_frame);
-}
-
-#define MINQ_ADJ_LIMIT 48
-#define MINQ_ADJ_LIMIT_CQ 20
-#define HIGH_UNDERSHOOT_RATIO 2
-void av1_twopass_postencode_update(AV1_COMP *cpi) {
- TWO_PASS *const twopass = &cpi->twopass;
- RATE_CONTROL *const rc = &cpi->rc;
- const int bits_used = rc->base_frame_target;
-
- // VBR correction is done through rc->vbr_bits_off_target. Based on the
- // sign of this value, a limited % adjustment is made to the target rate
- // of subsequent frames, to try and push it back towards 0. This method
- // is designed to prevent extreme behaviour at the end of a clip
- // or group of frames.
- rc->vbr_bits_off_target += rc->base_frame_target - rc->projected_frame_size;
- twopass->bits_left = AOMMAX(twopass->bits_left - bits_used, 0);
-
- // Calculate the pct rc error.
- if (rc->total_actual_bits) {
- rc->rate_error_estimate =
- (int)((rc->vbr_bits_off_target * 100) / rc->total_actual_bits);
- rc->rate_error_estimate = clamp(rc->rate_error_estimate, -100, 100);
- } else {
- rc->rate_error_estimate = 0;
- }
-
- if (cpi->common.frame_type != KEY_FRAME) {
- twopass->kf_group_bits -= bits_used;
- twopass->last_kfgroup_zeromotion_pct = twopass->kf_zeromotion_pct;
- }
- twopass->kf_group_bits = AOMMAX(twopass->kf_group_bits, 0);
-
- // If the rate control is drifting consider adjustment to min or maxq.
- if ((cpi->oxcf.rc_mode != AOM_Q) &&
- (cpi->twopass.gf_zeromotion_pct < VLOW_MOTION_THRESHOLD) &&
- !cpi->rc.is_src_frame_alt_ref) {
- const int maxq_adj_limit =
- rc->worst_quality - twopass->active_worst_quality;
- const int minq_adj_limit =
- (cpi->oxcf.rc_mode == AOM_CQ ? MINQ_ADJ_LIMIT_CQ : MINQ_ADJ_LIMIT);
-
- // Undershoot.
- if (rc->rate_error_estimate > cpi->oxcf.under_shoot_pct) {
- --twopass->extend_maxq;
- if (rc->rolling_target_bits >= rc->rolling_actual_bits)
- ++twopass->extend_minq;
- // Overshoot.
- } else if (rc->rate_error_estimate < -cpi->oxcf.over_shoot_pct) {
- --twopass->extend_minq;
- if (rc->rolling_target_bits < rc->rolling_actual_bits)
- ++twopass->extend_maxq;
- } else {
- // Adjustment for extreme local overshoot.
- if (rc->projected_frame_size > (2 * rc->base_frame_target) &&
- rc->projected_frame_size > (2 * rc->avg_frame_bandwidth))
- ++twopass->extend_maxq;
-
- // Unwind undershoot or overshoot adjustment.
- if (rc->rolling_target_bits < rc->rolling_actual_bits)
- --twopass->extend_minq;
- else if (rc->rolling_target_bits > rc->rolling_actual_bits)
- --twopass->extend_maxq;
- }
-
- twopass->extend_minq = clamp(twopass->extend_minq, 0, minq_adj_limit);
- twopass->extend_maxq = clamp(twopass->extend_maxq, 0, maxq_adj_limit);
-
- // If there is a big and undexpected undershoot then feed the extra
- // bits back in quickly. One situation where this may happen is if a
- // frame is unexpectedly almost perfectly predicted by the ARF or GF
- // but not very well predcited by the previous frame.
- if (!frame_is_kf_gf_arf(cpi) && !cpi->rc.is_src_frame_alt_ref) {
- int fast_extra_thresh = rc->base_frame_target / HIGH_UNDERSHOOT_RATIO;
- if (rc->projected_frame_size < fast_extra_thresh) {
- rc->vbr_bits_off_target_fast +=
- fast_extra_thresh - rc->projected_frame_size;
- rc->vbr_bits_off_target_fast =
- AOMMIN(rc->vbr_bits_off_target_fast, (4 * rc->avg_frame_bandwidth));
-
- // Fast adaptation of minQ if necessary to use up the extra bits.
- if (rc->avg_frame_bandwidth) {
- twopass->extend_minq_fast =
- (int)(rc->vbr_bits_off_target_fast * 8 / rc->avg_frame_bandwidth);
- }
- twopass->extend_minq_fast = AOMMIN(
- twopass->extend_minq_fast, minq_adj_limit - twopass->extend_minq);
- } else if (rc->vbr_bits_off_target_fast) {
- twopass->extend_minq_fast = AOMMIN(
- twopass->extend_minq_fast, minq_adj_limit - twopass->extend_minq);
- } else {
- twopass->extend_minq_fast = 0;
- }
- }
- }
-}
diff --git a/third_party/aom/av1/encoder/firstpass.h b/third_party/aom/av1/encoder/firstpass.h
deleted file mode 100644
index 4b7325ae2..000000000
--- a/third_party/aom/av1/encoder/firstpass.h
+++ /dev/null
@@ -1,208 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_FIRSTPASS_H_
-#define AOM_AV1_ENCODER_FIRSTPASS_H_
-
-#include "av1/common/enums.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/encoder/lookahead.h"
-#include "av1/encoder/ratectrl.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if CONFIG_FP_MB_STATS
-
-#define FPMB_DCINTRA_MASK 0x01
-
-#define FPMB_MOTION_ZERO_MASK 0x02
-#define FPMB_MOTION_LEFT_MASK 0x04
-#define FPMB_MOTION_RIGHT_MASK 0x08
-#define FPMB_MOTION_UP_MASK 0x10
-#define FPMB_MOTION_DOWN_MASK 0x20
-
-#define FPMB_ERROR_SMALL_MASK 0x40
-#define FPMB_ERROR_LARGE_MASK 0x80
-#define FPMB_ERROR_SMALL_TH 2000
-#define FPMB_ERROR_LARGE_TH 48000
-
-typedef struct {
- uint8_t *mb_stats_start;
- uint8_t *mb_stats_end;
-} FIRSTPASS_MB_STATS;
-#endif
-
-// Length of the bi-predictive frame group (BFG)
-// NOTE: Currently each BFG contains one backward ref (BWF) frame plus a certain
-// number of bi-predictive frames.
-#define BFG_INTERVAL 2
-// The maximum number of extra ALTREF's except ALTREF_FRAME
-#define MAX_EXT_ARFS (REF_FRAMES - BWDREF_FRAME - 1)
-
-#define MIN_EXT_ARF_INTERVAL 4
-
-#define MIN_ZERO_MOTION 0.95
-#define MAX_SR_CODED_ERROR 40
-#define MAX_RAW_ERR_VAR 2000
-#define MIN_MV_IN_OUT 0.4
-
-#define VLOW_MOTION_THRESHOLD 950
-
-typedef struct {
- double frame;
- double weight;
- double intra_error;
- double frame_avg_wavelet_energy;
- double coded_error;
- double sr_coded_error;
- double pcnt_inter;
- double pcnt_motion;
- double pcnt_second_ref;
- double pcnt_neutral;
- double intra_skip_pct;
- double inactive_zone_rows; // Image mask rows top and bottom.
- double inactive_zone_cols; // Image mask columns at left and right edges.
- double MVr;
- double mvr_abs;
- double MVc;
- double mvc_abs;
- double MVrv;
- double MVcv;
- double mv_in_out_count;
- double new_mv_count;
- double duration;
- double count;
- // standard deviation for (0, 0) motion prediction error
- double raw_error_stdev;
-} FIRSTPASS_STATS;
-
-typedef enum {
- KF_UPDATE = 0,
- LF_UPDATE = 1,
- GF_UPDATE = 2,
- ARF_UPDATE = 3,
- OVERLAY_UPDATE = 4,
- BRF_UPDATE = 5, // Backward Reference Frame
- LAST_BIPRED_UPDATE = 6, // Last Bi-predictive Frame
- BIPRED_UPDATE = 7, // Bi-predictive Frame, but not the last one
- INTNL_OVERLAY_UPDATE = 8, // Internal Overlay Frame
- INTNL_ARF_UPDATE = 9, // Internal Altref Frame (candidate for ALTREF2)
- FRAME_UPDATE_TYPES = 10
-} FRAME_UPDATE_TYPE;
-
-#define FC_ANIMATION_THRESH 0.15
-typedef enum {
- FC_NORMAL = 0,
- FC_GRAPHICS_ANIMATION = 1,
- FRAME_CONTENT_TYPES = 2
-} FRAME_CONTENT_TYPE;
-
-typedef struct {
- unsigned char index;
- RATE_FACTOR_LEVEL rf_level[(MAX_LAG_BUFFERS * 2) + 1];
- FRAME_UPDATE_TYPE update_type[(MAX_LAG_BUFFERS * 2) + 1];
- unsigned char arf_src_offset[(MAX_LAG_BUFFERS * 2) + 1];
- unsigned char arf_update_idx[(MAX_LAG_BUFFERS * 2) + 1];
- unsigned char arf_ref_idx[(MAX_LAG_BUFFERS * 2) + 1];
-#if USE_SYMM_MULTI_LAYER
- unsigned char arf_pos_in_gf[(MAX_LAG_BUFFERS * 2) + 1];
- unsigned char pyramid_level[(MAX_LAG_BUFFERS * 2) + 1];
- unsigned char pyramid_height;
- unsigned char pyramid_lvl_nodes[MAX_PYRAMID_LVL];
-#endif
- unsigned char brf_src_offset[(MAX_LAG_BUFFERS * 2) + 1];
- unsigned char bidir_pred_enabled[(MAX_LAG_BUFFERS * 2) + 1];
- unsigned char ref_fb_idx_map[(MAX_LAG_BUFFERS * 2) + 1][REF_FRAMES];
- unsigned char refresh_idx[(MAX_LAG_BUFFERS * 2) + 1];
- unsigned char refresh_flag[(MAX_LAG_BUFFERS * 2) + 1];
- int bit_allocation[(MAX_LAG_BUFFERS * 2) + 1];
-} GF_GROUP;
-
-typedef struct {
- unsigned int section_intra_rating;
- FIRSTPASS_STATS total_stats;
- FIRSTPASS_STATS this_frame_stats;
- const FIRSTPASS_STATS *stats_in;
- const FIRSTPASS_STATS *stats_in_start;
- const FIRSTPASS_STATS *stats_in_end;
- FIRSTPASS_STATS total_left_stats;
- int first_pass_done;
- int64_t bits_left;
- double modified_error_min;
- double modified_error_max;
- double modified_error_left;
- double mb_av_energy;
- double frame_avg_haar_energy;
-
-#if CONFIG_FP_MB_STATS
- uint8_t *frame_mb_stats_buf;
- uint8_t *this_frame_mb_stats;
- FIRSTPASS_MB_STATS firstpass_mb_stats;
-#endif
- // An indication of the content type of the current frame
- FRAME_CONTENT_TYPE fr_content_type;
-
- // Projected total bits available for a key frame group of frames
- int64_t kf_group_bits;
-
- // Error score of frames still to be coded in kf group
- int64_t kf_group_error_left;
-
- // The fraction for a kf groups total bits allocated to the inter frames
- double kfgroup_inter_fraction;
-
- int sr_update_lag;
-
- int kf_zeromotion_pct;
- int last_kfgroup_zeromotion_pct;
- int gf_zeromotion_pct;
- int active_worst_quality;
- int baseline_active_worst_quality;
- int extend_minq;
- int extend_maxq;
- int extend_minq_fast;
-
- GF_GROUP gf_group;
-} TWO_PASS;
-
-struct AV1_COMP;
-
-void av1_init_first_pass(struct AV1_COMP *cpi);
-void av1_rc_get_first_pass_params(struct AV1_COMP *cpi);
-void av1_first_pass(struct AV1_COMP *cpi, const struct lookahead_entry *source);
-void av1_end_first_pass(struct AV1_COMP *cpi);
-
-void av1_init_second_pass(struct AV1_COMP *cpi);
-void av1_rc_get_second_pass_params(struct AV1_COMP *cpi);
-void av1_configure_buffer_updates_firstpass(struct AV1_COMP *cpi,
- FRAME_UPDATE_TYPE update_type);
-
-// Post encode update of the rate control parameters for 2-pass
-void av1_twopass_postencode_update(struct AV1_COMP *cpi);
-
-static INLINE int get_number_of_extra_arfs(int interval, int arf_pending) {
- if (arf_pending && MAX_EXT_ARFS > 0)
- return interval >= MIN_EXT_ARF_INTERVAL * (MAX_EXT_ARFS + 1)
- ? MAX_EXT_ARFS
- : interval >= MIN_EXT_ARF_INTERVAL * MAX_EXT_ARFS
- ? MAX_EXT_ARFS - 1
- : 0;
- else
- return 0;
-}
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_ENCODER_FIRSTPASS_H_
diff --git a/third_party/aom/av1/encoder/global_motion.c b/third_party/aom/av1/encoder/global_motion.c
deleted file mode 100644
index e9f8b0bb4..000000000
--- a/third_party/aom/av1/encoder/global_motion.c
+++ /dev/null
@@ -1,298 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <memory.h>
-#include <math.h>
-#include <assert.h>
-
-#include "av1/encoder/global_motion.h"
-
-#include "av1/common/warped_motion.h"
-
-#include "av1/encoder/segmentation.h"
-#include "av1/encoder/corner_detect.h"
-#include "av1/encoder/corner_match.h"
-#include "av1/encoder/ransac.h"
-
-#define MAX_CORNERS 4096
-#define MIN_INLIER_PROB 0.1
-
-#define MIN_TRANS_THRESH (1 * GM_TRANS_DECODE_FACTOR)
-
-// Border over which to compute the global motion
-#define ERRORADV_BORDER 0
-
-static const double erroradv_tr[] = { 0.65, 0.60, 0.55 };
-static const double erroradv_prod_tr[] = { 20000, 18000, 16000 };
-
-int is_enough_erroradvantage(double best_erroradvantage, int params_cost,
- int erroradv_type) {
- assert(erroradv_type < GM_ERRORADV_TR_TYPES);
- return best_erroradvantage < erroradv_tr[erroradv_type] &&
- best_erroradvantage * params_cost < erroradv_prod_tr[erroradv_type];
-}
-
-static void convert_to_params(const double *params, int32_t *model) {
- int i;
- int alpha_present = 0;
- model[0] = (int32_t)floor(params[0] * (1 << GM_TRANS_PREC_BITS) + 0.5);
- model[1] = (int32_t)floor(params[1] * (1 << GM_TRANS_PREC_BITS) + 0.5);
- model[0] = (int32_t)clamp(model[0], GM_TRANS_MIN, GM_TRANS_MAX) *
- GM_TRANS_DECODE_FACTOR;
- model[1] = (int32_t)clamp(model[1], GM_TRANS_MIN, GM_TRANS_MAX) *
- GM_TRANS_DECODE_FACTOR;
-
- for (i = 2; i < 6; ++i) {
- const int diag_value = ((i == 2 || i == 5) ? (1 << GM_ALPHA_PREC_BITS) : 0);
- model[i] = (int32_t)floor(params[i] * (1 << GM_ALPHA_PREC_BITS) + 0.5);
- model[i] =
- (int32_t)clamp(model[i] - diag_value, GM_ALPHA_MIN, GM_ALPHA_MAX);
- alpha_present |= (model[i] != 0);
- model[i] = (model[i] + diag_value) * GM_ALPHA_DECODE_FACTOR;
- }
- for (; i < 8; ++i) {
- model[i] = (int32_t)floor(params[i] * (1 << GM_ROW3HOMO_PREC_BITS) + 0.5);
- model[i] = (int32_t)clamp(model[i], GM_ROW3HOMO_MIN, GM_ROW3HOMO_MAX) *
- GM_ROW3HOMO_DECODE_FACTOR;
- alpha_present |= (model[i] != 0);
- }
-
- if (!alpha_present) {
- if (abs(model[0]) < MIN_TRANS_THRESH && abs(model[1]) < MIN_TRANS_THRESH) {
- model[0] = 0;
- model[1] = 0;
- }
- }
-}
-
-void convert_model_to_params(const double *params, WarpedMotionParams *model) {
- convert_to_params(params, model->wmmat);
- model->wmtype = get_gmtype(model);
- model->invalid = 0;
-}
-
-// Adds some offset to a global motion parameter and handles
-// all of the necessary precision shifts, clamping, and
-// zero-centering.
-static int32_t add_param_offset(int param_index, int32_t param_value,
- int32_t offset) {
- const int scale_vals[3] = { GM_TRANS_PREC_DIFF, GM_ALPHA_PREC_DIFF,
- GM_ROW3HOMO_PREC_DIFF };
- const int clamp_vals[3] = { GM_TRANS_MAX, GM_ALPHA_MAX, GM_ROW3HOMO_MAX };
- // type of param: 0 - translation, 1 - affine, 2 - homography
- const int param_type = (param_index < 2 ? 0 : (param_index < 6 ? 1 : 2));
- const int is_one_centered = (param_index == 2 || param_index == 5);
-
- // Make parameter zero-centered and offset the shift that was done to make
- // it compatible with the warped model
- param_value = (param_value - (is_one_centered << WARPEDMODEL_PREC_BITS)) >>
- scale_vals[param_type];
- // Add desired offset to the rescaled/zero-centered parameter
- param_value += offset;
- // Clamp the parameter so it does not overflow the number of bits allotted
- // to it in the bitstream
- param_value = (int32_t)clamp(param_value, -clamp_vals[param_type],
- clamp_vals[param_type]);
- // Rescale the parameter to WARPEDMODEL_PRECISION_BITS so it is compatible
- // with the warped motion library
- param_value *= (1 << scale_vals[param_type]);
-
- // Undo the zero-centering step if necessary
- return param_value + (is_one_centered << WARPEDMODEL_PREC_BITS);
-}
-
-static void force_wmtype(WarpedMotionParams *wm, TransformationType wmtype) {
- switch (wmtype) {
- case IDENTITY:
- wm->wmmat[0] = 0;
- wm->wmmat[1] = 0;
- AOM_FALLTHROUGH_INTENDED;
- case TRANSLATION:
- wm->wmmat[2] = 1 << WARPEDMODEL_PREC_BITS;
- wm->wmmat[3] = 0;
- AOM_FALLTHROUGH_INTENDED;
- case ROTZOOM:
- wm->wmmat[4] = -wm->wmmat[3];
- wm->wmmat[5] = wm->wmmat[2];
- AOM_FALLTHROUGH_INTENDED;
- case AFFINE: wm->wmmat[6] = wm->wmmat[7] = 0; break;
- default: assert(0);
- }
- wm->wmtype = wmtype;
-}
-
-int64_t refine_integerized_param(WarpedMotionParams *wm,
- TransformationType wmtype, int use_hbd, int bd,
- uint8_t *ref, int r_width, int r_height,
- int r_stride, uint8_t *dst, int d_width,
- int d_height, int d_stride, int n_refinements,
- int64_t best_frame_error) {
- static const int max_trans_model_params[TRANS_TYPES] = { 0, 2, 4, 6 };
- const int border = ERRORADV_BORDER;
- int i = 0, p;
- int n_params = max_trans_model_params[wmtype];
- int32_t *param_mat = wm->wmmat;
- int64_t step_error, best_error;
- int32_t step;
- int32_t *param;
- int32_t curr_param;
- int32_t best_param;
-
- force_wmtype(wm, wmtype);
- best_error = av1_warp_error(wm, use_hbd, bd, ref, r_width, r_height, r_stride,
- dst + border * d_stride + border, border, border,
- d_width - 2 * border, d_height - 2 * border,
- d_stride, 0, 0, best_frame_error);
- best_error = AOMMIN(best_error, best_frame_error);
- step = 1 << (n_refinements - 1);
- for (i = 0; i < n_refinements; i++, step >>= 1) {
- for (p = 0; p < n_params; ++p) {
- int step_dir = 0;
- // Skip searches for parameters that are forced to be 0
- param = param_mat + p;
- curr_param = *param;
- best_param = curr_param;
- // look to the left
- *param = add_param_offset(p, curr_param, -step);
- step_error =
- av1_warp_error(wm, use_hbd, bd, ref, r_width, r_height, r_stride,
- dst + border * d_stride + border, border, border,
- d_width - 2 * border, d_height - 2 * border, d_stride,
- 0, 0, best_error);
- if (step_error < best_error) {
- best_error = step_error;
- best_param = *param;
- step_dir = -1;
- }
-
- // look to the right
- *param = add_param_offset(p, curr_param, step);
- step_error =
- av1_warp_error(wm, use_hbd, bd, ref, r_width, r_height, r_stride,
- dst + border * d_stride + border, border, border,
- d_width - 2 * border, d_height - 2 * border, d_stride,
- 0, 0, best_error);
- if (step_error < best_error) {
- best_error = step_error;
- best_param = *param;
- step_dir = 1;
- }
- *param = best_param;
-
- // look to the direction chosen above repeatedly until error increases
- // for the biggest step size
- while (step_dir) {
- *param = add_param_offset(p, best_param, step * step_dir);
- step_error =
- av1_warp_error(wm, use_hbd, bd, ref, r_width, r_height, r_stride,
- dst + border * d_stride + border, border, border,
- d_width - 2 * border, d_height - 2 * border,
- d_stride, 0, 0, best_error);
- if (step_error < best_error) {
- best_error = step_error;
- best_param = *param;
- } else {
- *param = best_param;
- step_dir = 0;
- }
- }
- }
- }
- force_wmtype(wm, wmtype);
- wm->wmtype = get_gmtype(wm);
- return best_error;
-}
-
-static INLINE RansacFunc get_ransac_type(TransformationType type) {
- switch (type) {
- case AFFINE: return ransac_affine;
- case ROTZOOM: return ransac_rotzoom;
- case TRANSLATION: return ransac_translation;
- default: assert(0); return NULL;
- }
-}
-
-static unsigned char *downconvert_frame(YV12_BUFFER_CONFIG *frm,
- int bit_depth) {
- int i, j;
- uint16_t *orig_buf = CONVERT_TO_SHORTPTR(frm->y_buffer);
- uint8_t *buf_8bit = frm->y_buffer_8bit;
- assert(buf_8bit);
- if (!frm->buf_8bit_valid) {
- for (i = 0; i < frm->y_height; ++i) {
- for (j = 0; j < frm->y_width; ++j) {
- buf_8bit[i * frm->y_stride + j] =
- orig_buf[i * frm->y_stride + j] >> (bit_depth - 8);
- }
- }
- frm->buf_8bit_valid = 1;
- }
- return buf_8bit;
-}
-
-int compute_global_motion_feature_based(TransformationType type,
- YV12_BUFFER_CONFIG *frm,
- YV12_BUFFER_CONFIG *ref, int bit_depth,
- int *num_inliers_by_motion,
- double *params_by_motion,
- int num_motions) {
- int i;
- int num_frm_corners, num_ref_corners;
- int num_correspondences;
- int *correspondences;
- int frm_corners[2 * MAX_CORNERS], ref_corners[2 * MAX_CORNERS];
- unsigned char *frm_buffer = frm->y_buffer;
- unsigned char *ref_buffer = ref->y_buffer;
- RansacFunc ransac = get_ransac_type(type);
-
- if (frm->flags & YV12_FLAG_HIGHBITDEPTH) {
- // The frame buffer is 16-bit, so we need to convert to 8 bits for the
- // following code. We cache the result until the frame is released.
- frm_buffer = downconvert_frame(frm, bit_depth);
- }
- if (ref->flags & YV12_FLAG_HIGHBITDEPTH) {
- ref_buffer = downconvert_frame(ref, bit_depth);
- }
-
- // compute interest points in images using FAST features
- num_frm_corners = fast_corner_detect(frm_buffer, frm->y_width, frm->y_height,
- frm->y_stride, frm_corners, MAX_CORNERS);
- num_ref_corners = fast_corner_detect(ref_buffer, ref->y_width, ref->y_height,
- ref->y_stride, ref_corners, MAX_CORNERS);
-
- // find correspondences between the two images
- correspondences =
- (int *)malloc(num_frm_corners * 4 * sizeof(*correspondences));
- num_correspondences = determine_correspondence(
- frm_buffer, (int *)frm_corners, num_frm_corners, ref_buffer,
- (int *)ref_corners, num_ref_corners, frm->y_width, frm->y_height,
- frm->y_stride, ref->y_stride, correspondences);
-
- ransac(correspondences, num_correspondences, num_inliers_by_motion,
- params_by_motion, num_motions);
-
- free(correspondences);
-
- // Set num_inliers = 0 for motions with too few inliers so they are ignored.
- for (i = 0; i < num_motions; ++i) {
- if (num_inliers_by_motion[i] < MIN_INLIER_PROB * num_correspondences) {
- num_inliers_by_motion[i] = 0;
- }
- }
-
- // Return true if any one of the motions has inliers.
- for (i = 0; i < num_motions; ++i) {
- if (num_inliers_by_motion[i] > 0) return 1;
- }
- return 0;
-}
diff --git a/third_party/aom/av1/encoder/global_motion.h b/third_party/aom/av1/encoder/global_motion.h
deleted file mode 100644
index c7c016c43..000000000
--- a/third_party/aom/av1/encoder/global_motion.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_GLOBAL_MOTION_H_
-#define AOM_AV1_ENCODER_GLOBAL_MOTION_H_
-
-#include "aom/aom_integer.h"
-#include "aom_scale/yv12config.h"
-#include "av1/common/mv.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define RANSAC_NUM_MOTIONS 1
-
-void convert_model_to_params(const double *params, WarpedMotionParams *model);
-
-int is_enough_erroradvantage(double best_erroradvantage, int params_cost,
- int erroradv_type);
-
-// Returns the av1_warp_error between "dst" and the result of applying the
-// motion params that result from fine-tuning "wm" to "ref". Note that "wm" is
-// modified in place.
-int64_t refine_integerized_param(WarpedMotionParams *wm,
- TransformationType wmtype, int use_hbd, int bd,
- uint8_t *ref, int r_width, int r_height,
- int r_stride, uint8_t *dst, int d_width,
- int d_height, int d_stride, int n_refinements,
- int64_t best_frame_error);
-
-/*
- Computes "num_motions" candidate global motion parameters between two frames.
- The array "params_by_motion" should be length 8 * "num_motions". The ordering
- of each set of parameters is best described by the homography:
-
- [x' (m2 m3 m0 [x
- z . y' = m4 m5 m1 * y
- 1] m6 m7 1) 1]
-
- where m{i} represents the ith value in any given set of parameters.
-
- "num_inliers" should be length "num_motions", and will be populated with the
- number of inlier feature points for each motion. Params for which the
- num_inliers entry is 0 should be ignored by the caller.
-*/
-int compute_global_motion_feature_based(TransformationType type,
- YV12_BUFFER_CONFIG *frm,
- YV12_BUFFER_CONFIG *ref, int bit_depth,
- int *num_inliers_by_motion,
- double *params_by_motion,
- int num_motions);
-#ifdef __cplusplus
-} // extern "C"
-#endif
-#endif // AOM_AV1_ENCODER_GLOBAL_MOTION_H_
diff --git a/third_party/aom/av1/encoder/grain_test_vectors.h b/third_party/aom/av1/encoder/grain_test_vectors.h
deleted file mode 100644
index 945dc3733..000000000
--- a/third_party/aom/av1/encoder/grain_test_vectors.h
+++ /dev/null
@@ -1,781 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#ifndef AOM_AV1_ENCODER_GRAIN_TEST_VECTORS_H_
-#define AOM_AV1_ENCODER_GRAIN_TEST_VECTORS_H_
-
-/* Test vectors for emulation of different film grain types.
- * Note that bit depth would be derived from the bitstream and
- * not signaled in film grain metadata. The parameters are valid
- * for any bit depth.
- */
-static aom_film_grain_t film_grain_test_vectors[16] = {
- /* Test 1 */
- {
- 1 /* apply_grain */,
- 1 /* update_parameters */,
- { { 16, 0 },
- { 25, 136 },
- { 33, 144 },
- { 41, 160 },
- { 48, 168 },
- { 56, 136 },
- { 67, 128 },
- { 82, 144 },
- { 97, 152 },
- { 113, 144 },
- { 128, 176 },
- { 143, 168 },
- { 158, 176 },
- { 178, 184 } },
- 14 /* num_points_y */,
- { { 16, 0 },
- { 20, 64 },
- { 28, 88 },
- { 60, 104 },
- { 90, 136 },
- { 105, 160 },
- { 134, 168 },
- { 168, 208 } },
- 8 /* num_cb_points */,
- { { 16, 0 },
- { 28, 96 },
- { 56, 80 },
- { 66, 96 },
- { 80, 104 },
- { 108, 96 },
- { 122, 112 },
- { 137, 112 },
- { 169, 176 } },
- 9 /* num_cr_points */,
- 11 /* scaling_shift */,
- 2 /* ar_coeff_lag */,
- { 0, 0, -58, 0, 0, 0, -76, 100, -43, 0, -51, 82 },
- { 0, 0, -49, 0, 0, 0, -36, 22, -30, 0, -38, 7, 39 },
- { 0, 0, -47, 0, 0, 0, -31, 31, -25, 0, -32, 13, -100 },
- 8 /* ar_coeff_shift */,
- 247 /* cb_mult */,
- 192 /* cb_luma_mult */,
- 18 /* cb_offset */,
- 229 /* cr_mult */,
- 192 /* cr_luma_mult */,
- 54 /* cr_offset */,
- 0 /* overlap_flag */,
- 1 /* clip_to_restricted_range */,
- 8 /* bit_depth */,
- 0 /* chroma_scaling_from_luma*/,
- 0 /* grain_scale_shift*/,
- 45231 /* random_seed */
- },
- /* Test 2 */
- {
- 1 /* apply_grain */,
- 1 /* update_parameters */,
- { { 0, 96 }, { 255, 96 } },
- 2 /* num_points_y */,
- { { 0, 64 }, { 255, 64 } },
- 2 /* num_cb_points */,
- { { 0, 64 }, { 255, 64 } },
- 2 /* num_cr_points */,
- 11 /* scaling_shift */,
- 3 /* ar_coeff_lag */,
- {
- 4, 1, 3, 0, 1, -3, 8, -3, 7, -23, 1, -25,
- 0, -10, 6, -17, -4, 53, 36, 5, -5, -17, 8, 66,
- },
- {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127,
- },
- {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127,
- },
- 7 /* ar_coeff_shift */,
- 128 /* cb_mult */,
- 192 /* cb_luma_mult */,
- 256 /* cb_offset */,
- 128 /* cr_mult */,
- 192 /* cr_luma_mult */,
- 256 /* cr_offset */,
- 1 /* overlap_flag */,
- 0 /* clip_to_restricted_range */,
- 8 /* bit_depth */,
- 0 /*chroma_scaling_from_luma*/,
- 0 /* grain_scale_shift*/,
- 45231 /* random_seed */
- },
- /* Test 3 */
- {
- 1 /* apply_grain */,
- 1 /* update_parameters */,
- { { 0, 192 }, { 255, 192 } },
- 2 /* num_points_y */,
- { { 0, 128 }, { 255, 128 } },
- 2 /* num_cb_points */,
- { { 0, 128 }, { 255, 128 } },
- 2 /* num_cr_points */,
- 11 /* scaling_shift */,
- 3 /* ar_coeff_lag */,
- {
- 4, 1, 3, 0, 1, -3, 8, -3, 7, -23, 1, -25,
- 0, -10, 6, -17, -4, 53, 36, 5, -5, -17, 8, 66,
- },
- {
- 4, -7, 2, 4, 12, -12, 5, -8, 6, 8, -19, -16, 19,
- -10, -2, 17, -42, 58, -2, -13, 9, 14, -36, 67, 0,
- },
- {
- 4, -7, 2, 4, 12, -12, 5, -8, 6, 8, -19, -16, 19,
- -10, -2, 17, -42, 58, -2, -13, 9, 14, -36, 67, 0,
- },
- 7 /* ar_coeff_shift */,
- 128 /* cb_mult */,
- 192 /* cb_luma_mult */,
- 256 /* cb_offset */,
- 128 /* cr_mult */,
- 192 /* cr_luma_mult */,
- 256 /* cr_offset */,
- 1 /* overlap_flag */,
- 1 /* clip_to_restricted_range */,
- 8 /* bit_depth */,
- 0 /*chroma_scaling_from_luma*/,
- 1 /* grain_scale_shift*/,
- 45231 /* random_seed */
- },
- /* Test 4 */
- {
- 1 /* apply_grain */,
- 1 /* update_parameters */,
- {
- { 16, 0 },
- { 24, 137 },
- { 53, 146 },
- { 63, 155 },
- { 78, 155 },
- { 107, 150 },
- { 122, 147 },
- { 136, 147 },
- { 166, 153 },
- },
- 9 /* num_points_y */,
- {
- { 16, 0 },
- { 20, 72 },
- { 27, 82 },
- { 33, 91 },
- { 69, 121 },
- { 95, 143 },
- { 108, 154 },
- { 134, 169 },
- { 147, 177 },
- },
- 9 /* num_cb_points */,
- {
- { 16, 0 },
- { 24, 95 },
- { 54, 93 },
- { 65, 94 },
- { 79, 98 },
- { 109, 107 },
- { 124, 119 },
- { 139, 136 },
- { 169, 170 },
- },
- 9 /* num_cr_points */,
- 11 /* scaling_shift */,
- 3 /* ar_coeff_lag */,
- {
- 7, -9, 2, 4, 7, -12, 7, -18, 18, -30, -27, -42,
- 13, -20, 7, -18, 6, 107, 55, -2, -4, -9, -22, 113,
- },
- {
- -3, -1, -4, 3, -6, -2, 3, 1, -4, -10, -10, -5, -5,
- -3, -1, -13, -28, -25, -31, -6, -4, 14, -64, 66, 0,
- },
- {
- 0, 4, -3, 13, 0, 1, -3, 0, -3, -10, -68, -4, -2,
- -5, 2, -3, -20, 62, -31, 0, -4, -1, -8, -29, 0,
- },
- 8 /* ar_coeff_shift */,
- 128 /* cb_mult */,
- 192 /* cb_luma_mult */,
- 256 /* cb_offset */,
- 128 /* cr_mult */,
- 192 /* cr_luma_mult */,
- 256 /* cr_offset */,
- 1 /* overlap_flag */,
- 0 /* clip_to_restricted_range */,
- 8 /* bit_depth */,
- 0 /*chroma_scaling_from_luma*/,
- 0 /* grain_scale_shift*/,
- 45231 /* random_seed */
- },
- /* Test 5 */
- {
- 1 /* apply_grain */,
- 0 /* update_parameters */,
- { { 0, 64 }, { 255, 64 } },
- 2 /* num_points_y */,
- {
- { 0, 96 },
- { 32, 90 },
- { 64, 83 },
- { 96, 76 },
- { 128, 68 },
- { 159, 59 },
- { 191, 48 },
- { 223, 34 },
- { 255, 0 },
- },
- 9 /* num_cb_points */,
- {
- { 0, 0 },
- { 32, 34 },
- { 64, 48 },
- { 96, 59 },
- { 128, 68 },
- { 159, 76 },
- { 191, 83 },
- { 223, 90 },
- { 255, 96 },
- },
- 9 /* num_cr_points */,
- 11 /* scaling_shift */,
- 3 /* ar_coeff_lag */,
- {
- 4, 1, 3, 0, 1, -3, 8, -3, 7, -23, 1, -25,
- 0, -10, 6, -17, -4, 53, 36, 5, -5, -17, 8, 66,
- },
- {
- -2, 2, -5, 7, -6, 4, -2, -1, 1, -2, 0, -2, 2,
- -3, -5, 13, -13, 6, -14, 8, -1, 18, -36, 58, 0,
- },
- {
- -2, -1, -3, 14, -4, -1, -3, 0, -1, 7, -31, 7, 2,
- 0, 1, 0, -7, 50, -8, -2, 2, 2, 2, -4, 0,
- },
- 7 /* ar_coeff_shift */,
- 128 /* cb_mult */,
- 192 /* cb_luma_mult */,
- 256 /* cb_offset */,
- 128 /* cr_mult */,
- 192 /* cr_luma_mult */,
- 256 /* cr_offset */,
- 1 /* overlap_flag */,
- 1 /* clip_to_restricted_range */,
- 8 /* bit_depth */,
- 0 /*chroma_scaling_from_luma*/,
- 0 /* grain_scale_shift*/,
- 1063 /* random_seed */
- },
- /* Test 6 */
- {
- 1 /* apply_grain */,
- 1 /* update_parameters */,
- {
- { 0, 96 },
- { 20, 92 },
- { 39, 88 },
- { 59, 84 },
- { 78, 80 },
- { 98, 75 },
- { 118, 70 },
- { 137, 65 },
- { 157, 60 },
- { 177, 53 },
- { 196, 46 },
- { 216, 38 },
- { 235, 27 },
- { 255, 0 },
- },
- 14 /* num_points_y */,
- { { 0, 0 } },
- 0 /* num_cb_points */,
- { { 0, 0 } },
- 0 /* num_cr_points */,
- 11 /* scaling_shift */,
- 3 /* ar_coeff_lag */,
- {
- 4, 1, 3, 0, 1, -3, 8, -3, 7, -23, 1, -25,
- 0, -10, 6, -17, -4, 53, 36, 5, -5, -17, 8, 66,
- },
- {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- },
- {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- },
- 7 /* ar_coeff_shift */,
- 128 /* cb_mult */,
- 192 /* cb_luma_mult */,
- 256 /* cb_offset */,
- 128 /* cr_mult */,
- 192 /* cr_luma_mult */,
- 256 /* cr_offset */,
- 1 /* overlap_flag */,
- 1 /* clip_to_restricted_range */,
- 8 /* bit_depth */,
- 0 /*chroma_scaling_from_luma*/,
- 0 /* grain_scale_shift*/,
- 2754 /* random_seed */
- },
- /* Test 7 */
- {
- 1 /* apply_grain */,
- 1 /* update_parameters */,
- {
- { 0, 0 },
- { 20, 27 },
- { 39, 38 },
- { 59, 46 },
- { 78, 53 },
- { 98, 60 },
- { 118, 65 },
- { 137, 70 },
- { 157, 75 },
- { 177, 80 },
- { 196, 84 },
- { 216, 88 },
- { 235, 92 },
- { 255, 96 },
- },
- 14 /* num_points_y */,
- { { 0, 0 }, { 255, 0 } },
- 2 /* num_cb_points */,
- { { 0, 0 }, { 255, 0 } },
- 2 /* num_cr_points */,
- 11 /* scaling_shift */,
- 3 /* ar_coeff_lag */,
- {
- 4, 1, 3, 0, 1, -3, 8, -3, 7, -23, 1, -25,
- 0, -10, 6, -17, -4, 53, 36, 5, -5, -17, 8, 66,
- },
- {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- },
- {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- },
- 7 /* ar_coeff_shift */,
- 128 /* cb_mult */,
- 192 /* cb_luma_mult */,
- 256 /* cb_offset */,
- 128 /* cr_mult */,
- 192 /* cr_luma_mult */,
- 256 /* cr_offset */,
- 1 /* overlap_flag */,
- 1 /* clip_to_restricted_range */,
- 8 /* bit_depth */,
- 0 /*chroma_scaling_from_luma*/,
- 0 /* grain_scale_shift*/,
- 45231 /* random_seed */
- },
- /* Test 8 */
- {
- 1 /* apply_grain */,
- 1 /* update_parameters */,
- { { 0, 96 }, { 255, 96 } },
- 2 /* num_points_y */,
- { { 0, 62 }, { 255, 62 } },
- 2 /* num_cb_points */,
- { { 0, 62 }, { 255, 62 } },
- 2 /* num_cr_points */,
- 11 /* scaling_shift */,
- 3 /* ar_coeff_lag */,
- {
- 4, 1, 3, 0, 1, -3, 8, -3, 7, -23, 1, -25,
- 0, -10, 6, -17, -4, 53, 36, 5, -5, -17, 8, 66,
- },
- {
- 0, -2, -2, 8, 5, -1, 1, -1, 5, 16, -33, -9, 6,
- -1, -3, 10, -47, 63, 0, -15, 3, 11, -42, 75, -69,
- },
- {
- 1, -1, -1, 9, 5, 0, 1, -1, 5, 15, -32, -10, 8,
- -2, -4, 11, -46, 62, 1, -16, 3, 13, -43, 75, -55,
- },
- 7 /* ar_coeff_shift */,
- 128 /* cb_mult */,
- 192 /* cb_luma_mult */,
- 256 /* cb_offset */,
- 128 /* cr_mult */,
- 192 /* cr_luma_mult */,
- 256 /* cr_offset */,
- 1 /* overlap_flag */,
- 0 /* clip_to_restricted_range */,
- 8 /* bit_depth */,
- 0 /*chroma_scaling_from_luma*/,
- 0 /* grain_scale_shift*/,
- 45231 /* random_seed */
- },
- /* Test 9 */
- {
- 1 /* apply_grain */,
- 0 /* update_parameters */,
- { { 0, 48 }, { 255, 48 } },
- 2 /* num_points_y */,
- { { 0, 32 }, { 255, 32 } },
- 2 /* num_cb_points */,
- { { 0, 32 }, { 255, 32 } },
- 2 /* num_cr_points */,
- 10 /* scaling_shift */,
- 2 /* ar_coeff_lag */,
- { 10, -30, -20, -39, 1, -24, 12, 103, 60, -9, -24, 113 },
- { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127 },
- { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127 },
- 8 /* ar_coeff_shift */,
- 128 /* cb_mult */,
- 192 /* cb_luma_mult */,
- 256 /* cb_offset */,
- 128 /* cr_mult */,
- 192 /* cr_luma_mult */,
- 256 /* cr_offset */,
- 1 /* overlap_flag */,
- 0 /* clip_to_restricted_range */,
- 8 /* bit_depth */,
- 0 /*chroma_scaling_from_luma*/,
- 0 /* grain_scale_shift*/,
- 45231 /* random_seed */
- },
- /* Test 10 */
- {
- 1 /* apply_grain */,
- 1 /* update_parameters */,
- { { 0, 48 }, { 255, 48 } },
- 2 /* num_points_y */,
- { { 0, 32 }, { 255, 32 } },
- 2 /* num_cb_points */,
- { { 0, 32 }, { 255, 32 } },
- 2 /* num_cr_points */,
- 10 /* scaling_shift */,
- 2 /* ar_coeff_lag */,
- { 10, -30, -20, -39, 1, -24, 12, 103, 60, -9, -24, 113 },
- { -7, -6, -48, -22, 2, -3, -45, 73, -11, -26, -52, 76, 0 },
- { -7, -6, -48, -22, 2, -3, -45, 73, -11, -26, -52, 76, 0 },
- 8 /* ar_coeff_shift */,
- 128 /* cb_mult */,
- 192 /* cb_luma_mult */,
- 256 /* cb_offset */,
- 128 /* cr_mult */,
- 192 /* cr_luma_mult */,
- 256 /* cr_offset */,
- 1 /* overlap_flag */,
- 0 /* clip_to_restricted_range */,
- 8 /* bit_depth */,
- 0 /*chroma_scaling_from_luma*/,
- 0 /* grain_scale_shift*/,
- 45231 /* random_seed */
- },
- /* Test 11 */
- {
- 1 /* apply_grain */,
- 0 /* update_parameters */,
- { { 0, 32 }, { 255, 32 } },
- 2 /* num_points_y */,
- {
- { 0, 48 },
- { 32, 45 },
- { 64, 42 },
- { 96, 38 },
- { 128, 34 },
- { 159, 29 },
- { 191, 24 },
- { 223, 17 },
- { 255, 0 },
- },
- 9 /* num_cb_points */,
- {
- { 0, 0 },
- { 32, 17 },
- { 64, 24 },
- { 96, 29 },
- { 128, 34 },
- { 159, 38 },
- { 191, 42 },
- { 223, 45 },
- { 255, 48 },
- },
- 9 /* num_cr_points */,
- 10 /* scaling_shift */,
- 3 /* ar_coeff_lag */,
- {
- 7, -9, 2, 4, 7, -12, 7, -18, 18, -30, -27, -42,
- 13, -20, 7, -18, 6, 107, 55, -2, -4, -9, -22, 113,
- },
- {
- -3, -1, -4, 3, -6, -2, 3, 1, -4, -10, -10, -5, -5,
- -3, -1, -13, -28, -25, -31, -6, -4, 14, -64, 66, 0,
- },
- {
- 0, 4, -3, 13, 0, 1, -3, 0, -3, -10, -68, -4, -2,
- -5, 2, -3, -20, 62, -31, 0, -4, -1, -8, -29, 0,
- },
- 8 /* ar_coeff_shift */,
- 128 /* cb_mult */,
- 192 /* cb_luma_mult */,
- 256 /* cb_offset */,
- 128 /* cr_mult */,
- 192 /* cr_luma_mult */,
- 256 /* cr_offset */,
- 1 /* overlap_flag */,
- 1 /* clip_to_restricted_range */,
- 8 /* bit_depth */,
- 0 /*chroma_scaling_from_luma*/,
- 0 /* grain_scale_shift*/,
- 1357 /* random_seed */
- },
- /* Test 12 */
- {
- 1 /* apply_grain */,
- 1 /* update_parameters */,
- {
- { 16, 0 },
- { 24, 49 },
- { 39, 69 },
- { 46, 84 },
- { 53, 91 },
- { 63, 100 },
- { 78, 114 },
- { 92, 134 },
- { 164, 139 },
- },
- 9 /* num_points_y */,
- {
- { 16, 0 },
- { 20, 31 },
- { 26, 42 },
- { 33, 54 },
- { 40, 65 },
- { 47, 72 },
- { 56, 85 },
- { 84, 123 },
- { 152, 157 },
- },
- 9 /* num_cb_points */,
- {
- { 16, 0 },
- { 25, 14 },
- { 39, 33 },
- { 47, 40 },
- { 54, 47 },
- { 64, 62 },
- { 79, 76 },
- { 94, 83 },
- { 167, 101 },
- },
- 9 /* num_cr_points */,
- 10 /* scaling_shift */,
- 2 /* ar_coeff_lag */,
- { 0, 0, -58, 0, 0, 0, -76, 100, -43, 0, -51, 82 },
- { 0, 0, -49, 0, 0, 0, -36, 22, -30, 0, -38, 7, 39 },
- { 0, 0, -47, 0, 0, 0, -31, 31, -25, 0, -32, 13, -100 },
- 8 /* ar_coeff_shift */,
- 128 /* cb_mult */,
- 192 /* cb_luma_mult */,
- 256 /* cb_offset */,
- 128 /* cr_mult */,
- 192 /* cr_luma_mult */,
- 256 /* cr_offset */,
- 0 /* overlap_flag */,
- 0 /* clip_to_restricted_range */,
- 8 /* bit_depth */,
- 0 /*chroma_scaling_from_luma*/,
- 0 /* grain_scale_shift*/,
- 45231 /* random_seed */
- },
- /* Test 13 */
- {
- 1 /* apply_grain */,
- 1 /* update_parameters */,
- {
- { 0, 48 },
- { 20, 46 },
- { 39, 44 },
- { 59, 42 },
- { 78, 40 },
- { 98, 38 },
- { 118, 35 },
- { 137, 33 },
- { 157, 30 },
- { 177, 27 },
- { 196, 23 },
- { 216, 19 },
- { 235, 13 },
- { 255, 0 },
- },
- 14 /* num_points_y */,
- { { 0, 0 }, { 255, 0 } },
- 0 /* num_cb_points */,
- { { 0, 0 }, { 255, 0 } },
- 0 /* num_cr_points */,
- 10 /* scaling_shift */,
- 2 /* ar_coeff_lag */,
- { 10, -30, -20, -39, 1, -24, 12, 103, 60, -9, -24, 113 },
- { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
- { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
- 8 /* ar_coeff_shift */,
- 128 /* cb_mult */,
- 192 /* cb_luma_mult */,
- 256 /* cb_offset */,
- 128 /* cr_mult */,
- 192 /* cr_luma_mult */,
- 256 /* cr_offset */,
- 1 /* overlap_flag */,
- 0 /* clip_to_restricted_range */,
- 8 /* bit_depth */,
- 0 /*chroma_scaling_from_luma*/,
- 0 /* grain_scale_shift*/,
- 45231 /* random_seed */
- },
- /* Test 14 */
- {
- 1 /* apply_grain */,
- 1 /* update_parameters */,
- {
- { 0, 0 },
- { 20, 13 },
- { 39, 19 },
- { 59, 23 },
- { 78, 27 },
- { 98, 30 },
- { 118, 33 },
- { 137, 35 },
- { 157, 38 },
- { 177, 40 },
- { 196, 42 },
- { 216, 44 },
- { 235, 46 },
- { 255, 48 },
- },
- 14 /* num_points_y */,
- { { 0, 0 }, { 255, 0 } },
- 0 /* num_cb_points */,
- { { 0, 0 }, { 255, 0 } },
- 0 /* num_cr_points */,
- 10 /* scaling_shift */,
- 2 /* ar_coeff_lag */,
- { 10, -30, -20, -39, 1, -24, 12, 103, 60, -9, -24, 113 },
- { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
- { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
- 8 /* ar_coeff_shift */,
- 128 /* cb_mult */,
- 192 /* cb_luma_mult */,
- 256 /* cb_offset */,
- 128 /* cr_mult */,
- 192 /* cr_luma_mult */,
- 256 /* cr_offset */,
- 1 /* overlap_flag */,
- 1 /* clip_to_restricted_range */,
- 8 /* bit_depth */,
- 0 /*chroma_scaling_from_luma*/,
- 0 /* grain_scale_shift*/,
- 45231 /* random_seed */
- },
- /* Test 15 */
- {
- 1 /* apply_grain */,
- 1 /* update_parameters */,
- { { 0, 96 }, { 255, 96 } },
- 1 /* num_points_y */,
- { { 0, 96 }, { 255, 96 } },
- 0 /* num_cb_points */,
- { { 0, 96 }, { 255, 96 } },
- 0 /* num_cr_points */,
- 11 /* scaling_shift */,
- 2 /* ar_coeff_lag */,
- { 5, -15, -10, -19, 0, -12, 6, 51, 30, -5, -12, 56 },
- { 2, 2, -24, -5, 1, 1, -18, 37, -2, 0, -15, 39, -70 },
- { 2, 3, -24, -5, -1, 0, -18, 38, -2, 0, -15, 39, -55 },
- 7 /* ar_coeff_shift */,
- 128 /* cb_mult */,
- 192 /* cb_luma_mult */,
- 256 /* cb_offset */,
- 128 /* cr_mult */,
- 192 /* cr_luma_mult */,
- 256 /* cr_offset */,
- 1 /* overlap_flag */,
- 0 /* clip_to_restricted_range */,
- 8 /* bit_depth */,
- 1 /*chroma_scaling_from_luma*/,
- 0 /* grain_scale_shift*/,
- 45231 /* random_seed */
- },
- /* Test 16 */
- {
- 1 /* apply_grain */,
- 1 /* update_parameters */,
- {
- { 16, 0 },
- { 58, 126 },
- { 87, 120 },
- { 97, 122 },
- { 112, 125 },
- { 126, 131 },
- { 141, 139 },
- { 199, 153 },
- },
- 8 /* num_points_y */,
- {
- { 16, 0 },
- { 59, 68 },
- { 66, 76 },
- { 73, 82 },
- { 79, 85 },
- { 86, 86 },
- { 151, 95 },
- { 192, 101 },
- },
- 8 /* num_cb_points */,
- {
- { 16, 0 },
- { 59, 64 },
- { 89, 80 },
- { 99, 86 },
- { 114, 90 },
- { 129, 93 },
- { 144, 97 },
- { 203, 85 },
- },
- 8 /* num_cr_points */,
- 10 /* scaling_shift */,
- 3 /* ar_coeff_lag */,
- {
- 4, 1, 3, 0, 1, -3, 8, -3, 7, -23, 1, -25,
- 0, -10, 6, -17, -4, 53, 36, 5, -5, -17, 8, 66,
- },
- {
- 0, -2, -2, 8, 5, -1, 1, -1, 5, 16, -33, -9, 6,
- -1, -3, 10, -47, 63, 0, -15, 3, 11, -42, 75, -69,
- },
- {
- 1, -1, -1, 9, 5, 0, 1, -1, 5, 15, -32, -10, 8,
- -2, -4, 11, -46, 62, 1, -16, 3, 13, -43, 75, -55,
- },
- 7 /* ar_coeff_shift */,
- 128 /* cb_mult */,
- 192 /* cb_luma_mult */,
- 256 /* cb_offset */,
- 128 /* cr_mult */,
- 192 /* cr_luma_mult */,
- 256 /* cr_offset */,
- 1 /* overlap_flag */,
- 0 /* clip_to_restricted_range */,
- 8 /* bit_depth */,
- 0 /*chroma_scaling_from_luma*/,
- 2 /* grain_scale_shift*/,
- 45231 /* random_seed */
- },
-};
-#endif // AOM_AV1_ENCODER_GRAIN_TEST_VECTORS_H_
diff --git a/third_party/aom/av1/encoder/hash.c b/third_party/aom/av1/encoder/hash.c
deleted file mode 100644
index 180115d9f..000000000
--- a/third_party/aom/av1/encoder/hash.c
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "av1/encoder/hash.h"
-
-static void crc_calculator_process_data(CRC_CALCULATOR *p_crc_calculator,
- uint8_t *pData, uint32_t dataLength) {
- for (uint32_t i = 0; i < dataLength; i++) {
- const uint8_t index =
- (p_crc_calculator->remainder >> (p_crc_calculator->bits - 8)) ^
- pData[i];
- p_crc_calculator->remainder <<= 8;
- p_crc_calculator->remainder ^= p_crc_calculator->table[index];
- }
-}
-
-static void crc_calculator_reset(CRC_CALCULATOR *p_crc_calculator) {
- p_crc_calculator->remainder = 0;
-}
-
-static uint32_t crc_calculator_get_crc(CRC_CALCULATOR *p_crc_calculator) {
- return p_crc_calculator->remainder & p_crc_calculator->final_result_mask;
-}
-
-static void crc_calculator_init_table(CRC_CALCULATOR *p_crc_calculator) {
- const uint32_t high_bit = 1 << (p_crc_calculator->bits - 1);
- const uint32_t byte_high_bit = 1 << (8 - 1);
-
- for (uint32_t value = 0; value < 256; value++) {
- uint32_t remainder = 0;
- for (uint8_t mask = byte_high_bit; mask != 0; mask >>= 1) {
- if (value & mask) {
- remainder ^= high_bit;
- }
-
- if (remainder & high_bit) {
- remainder <<= 1;
- remainder ^= p_crc_calculator->trunc_poly;
- } else {
- remainder <<= 1;
- }
- }
- p_crc_calculator->table[value] = remainder;
- }
-}
-
-void av1_crc_calculator_init(CRC_CALCULATOR *p_crc_calculator, uint32_t bits,
- uint32_t truncPoly) {
- p_crc_calculator->remainder = 0;
- p_crc_calculator->bits = bits;
- p_crc_calculator->trunc_poly = truncPoly;
- p_crc_calculator->final_result_mask = (1 << bits) - 1;
- crc_calculator_init_table(p_crc_calculator);
-}
-
-uint32_t av1_get_crc_value(void *crc_calculator, uint8_t *p, int length) {
- CRC_CALCULATOR *p_crc_calculator = (CRC_CALCULATOR *)crc_calculator;
- crc_calculator_reset(p_crc_calculator);
- crc_calculator_process_data(p_crc_calculator, p, length);
- return crc_calculator_get_crc(p_crc_calculator);
-}
-
-/* CRC-32C (iSCSI) polynomial in reversed bit order. */
-#define POLY 0x82f63b78
-
-/* Construct table for software CRC-32C calculation. */
-void av1_crc32c_calculator_init(CRC32C *p_crc32c) {
- uint32_t crc;
-
- for (int n = 0; n < 256; n++) {
- crc = n;
- crc = crc & 1 ? (crc >> 1) ^ POLY : crc >> 1;
- crc = crc & 1 ? (crc >> 1) ^ POLY : crc >> 1;
- crc = crc & 1 ? (crc >> 1) ^ POLY : crc >> 1;
- crc = crc & 1 ? (crc >> 1) ^ POLY : crc >> 1;
- crc = crc & 1 ? (crc >> 1) ^ POLY : crc >> 1;
- crc = crc & 1 ? (crc >> 1) ^ POLY : crc >> 1;
- crc = crc & 1 ? (crc >> 1) ^ POLY : crc >> 1;
- crc = crc & 1 ? (crc >> 1) ^ POLY : crc >> 1;
- p_crc32c->table[0][n] = crc;
- }
- for (int n = 0; n < 256; n++) {
- crc = p_crc32c->table[0][n];
- for (int k = 1; k < 8; k++) {
- crc = p_crc32c->table[0][crc & 0xff] ^ (crc >> 8);
- p_crc32c->table[k][n] = crc;
- }
- }
-}
-
-/* Table-driven software version as a fall-back. This is about 15 times slower
- than using the hardware instructions. This assumes little-endian integers,
- as is the case on Intel processors that the assembler code here is for. */
-uint32_t av1_get_crc32c_value_c(CRC32C *p, uint8_t *buf, size_t len) {
- const uint8_t *next = (const uint8_t *)(buf);
- uint64_t crc;
-
- crc = 0 ^ 0xffffffff;
- while (len && ((uintptr_t)next & 7) != 0) {
- crc = p->table[0][(crc ^ *next++) & 0xff] ^ (crc >> 8);
- len--;
- }
- while (len >= 8) {
- crc ^= *(uint64_t *)next;
- crc = p->table[7][crc & 0xff] ^ p->table[6][(crc >> 8) & 0xff] ^
- p->table[5][(crc >> 16) & 0xff] ^ p->table[4][(crc >> 24) & 0xff] ^
- p->table[3][(crc >> 32) & 0xff] ^ p->table[2][(crc >> 40) & 0xff] ^
- p->table[1][(crc >> 48) & 0xff] ^ p->table[0][crc >> 56];
- next += 8;
- len -= 8;
- }
- while (len) {
- crc = p->table[0][(crc ^ *next++) & 0xff] ^ (crc >> 8);
- len--;
- }
- return (uint32_t)crc ^ 0xffffffff;
-}
diff --git a/third_party/aom/av1/encoder/hash.h b/third_party/aom/av1/encoder/hash.h
deleted file mode 100644
index 826c004d6..000000000
--- a/third_party/aom/av1/encoder/hash.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_HASH_H_
-#define AOM_AV1_ENCODER_HASH_H_
-
-#include "config/aom_config.h"
-
-#include "aom/aom_integer.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct _crc_calculator {
- uint32_t remainder;
- uint32_t trunc_poly;
- uint32_t bits;
- uint32_t table[256];
- uint32_t final_result_mask;
-} CRC_CALCULATOR;
-
-// Initialize the crc calculator. It must be executed at least once before
-// calling av1_get_crc_value().
-void av1_crc_calculator_init(CRC_CALCULATOR *p_crc_calculator, uint32_t bits,
- uint32_t truncPoly);
-uint32_t av1_get_crc_value(void *crc_calculator, uint8_t *p, int length);
-
-// CRC32C: POLY = 0x82f63b78;
-typedef struct _CRC32C {
- /* Table for a quadword-at-a-time software crc. */
- uint32_t table[8][256];
-} CRC32C;
-
-// init table for software version crc32c
-void av1_crc32c_calculator_init(CRC32C *p_crc32c);
-
-#define AOM_BUFFER_SIZE_FOR_BLOCK_HASH (4096)
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_ENCODER_HASH_H_
diff --git a/third_party/aom/av1/encoder/hash_motion.c b/third_party/aom/av1/encoder/hash_motion.c
deleted file mode 100644
index e85a516e8..000000000
--- a/third_party/aom/av1/encoder/hash_motion.c
+++ /dev/null
@@ -1,482 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-
-#include "config/av1_rtcd.h"
-
-#include "av1/encoder/block.h"
-#include "av1/encoder/hash.h"
-#include "av1/encoder/hash_motion.h"
-
-static const int crc_bits = 16;
-static const int block_size_bits = 3;
-
-static void hash_table_clear_all(hash_table *p_hash_table) {
- if (p_hash_table->p_lookup_table == NULL) {
- return;
- }
- int max_addr = 1 << (crc_bits + block_size_bits);
- for (int i = 0; i < max_addr; i++) {
- if (p_hash_table->p_lookup_table[i] != NULL) {
- aom_vector_destroy(p_hash_table->p_lookup_table[i]);
- aom_free(p_hash_table->p_lookup_table[i]);
- p_hash_table->p_lookup_table[i] = NULL;
- }
- }
-}
-
-// TODO(youzhou@microsoft.com): is higher than 8 bits screen content supported?
-// If yes, fix this function
-static void get_pixels_in_1D_char_array_by_block_2x2(uint8_t *y_src, int stride,
- uint8_t *p_pixels_in1D) {
- uint8_t *p_pel = y_src;
- int index = 0;
- for (int i = 0; i < 2; i++) {
- for (int j = 0; j < 2; j++) {
- p_pixels_in1D[index++] = p_pel[j];
- }
- p_pel += stride;
- }
-}
-
-static void get_pixels_in_1D_short_array_by_block_2x2(uint16_t *y_src,
- int stride,
- uint16_t *p_pixels_in1D) {
- uint16_t *p_pel = y_src;
- int index = 0;
- for (int i = 0; i < 2; i++) {
- for (int j = 0; j < 2; j++) {
- p_pixels_in1D[index++] = p_pel[j];
- }
- p_pel += stride;
- }
-}
-
-static int is_block_2x2_row_same_value(uint8_t *p) {
- if (p[0] != p[1] || p[2] != p[3]) {
- return 0;
- }
- return 1;
-}
-
-static int is_block16_2x2_row_same_value(uint16_t *p) {
- if (p[0] != p[1] || p[2] != p[3]) {
- return 0;
- }
- return 1;
-}
-
-static int is_block_2x2_col_same_value(uint8_t *p) {
- if ((p[0] != p[2]) || (p[1] != p[3])) {
- return 0;
- }
- return 1;
-}
-
-static int is_block16_2x2_col_same_value(uint16_t *p) {
- if ((p[0] != p[2]) || (p[1] != p[3])) {
- return 0;
- }
- return 1;
-}
-
-// the hash value (hash_value1 consists two parts, the first 3 bits relate to
-// the block size and the remaining 16 bits are the crc values. This fuction
-// is used to get the first 3 bits.
-static int hash_block_size_to_index(int block_size) {
- switch (block_size) {
- case 4: return 0;
- case 8: return 1;
- case 16: return 2;
- case 32: return 3;
- case 64: return 4;
- case 128: return 5;
- default: return -1;
- }
-}
-
-void av1_hash_table_init(hash_table *p_hash_table, MACROBLOCK *x) {
- if (x->g_crc_initialized == 0) {
- av1_crc_calculator_init(&x->crc_calculator1, 24, 0x5D6DCB);
- av1_crc_calculator_init(&x->crc_calculator2, 24, 0x864CFB);
- x->g_crc_initialized = 1;
- }
- p_hash_table->p_lookup_table = NULL;
-}
-
-void av1_hash_table_destroy(hash_table *p_hash_table) {
- hash_table_clear_all(p_hash_table);
- aom_free(p_hash_table->p_lookup_table);
- p_hash_table->p_lookup_table = NULL;
-}
-
-void av1_hash_table_create(hash_table *p_hash_table) {
- if (p_hash_table->p_lookup_table != NULL) {
- hash_table_clear_all(p_hash_table);
- return;
- }
- const int max_addr = 1 << (crc_bits + block_size_bits);
- p_hash_table->p_lookup_table =
- (Vector **)aom_malloc(sizeof(p_hash_table->p_lookup_table[0]) * max_addr);
- memset(p_hash_table->p_lookup_table, 0,
- sizeof(p_hash_table->p_lookup_table[0]) * max_addr);
-}
-
-static void hash_table_add_to_table(hash_table *p_hash_table,
- uint32_t hash_value,
- block_hash *curr_block_hash) {
- if (p_hash_table->p_lookup_table[hash_value] == NULL) {
- p_hash_table->p_lookup_table[hash_value] =
- aom_malloc(sizeof(p_hash_table->p_lookup_table[0][0]));
- aom_vector_setup(p_hash_table->p_lookup_table[hash_value], 10,
- sizeof(curr_block_hash[0]));
- aom_vector_push_back(p_hash_table->p_lookup_table[hash_value],
- curr_block_hash);
- } else {
- aom_vector_push_back(p_hash_table->p_lookup_table[hash_value],
- curr_block_hash);
- }
-}
-
-int32_t av1_hash_table_count(hash_table *p_hash_table, uint32_t hash_value) {
- if (p_hash_table->p_lookup_table[hash_value] == NULL) {
- return 0;
- } else {
- return (int32_t)(p_hash_table->p_lookup_table[hash_value]->size);
- }
-}
-
-Iterator av1_hash_get_first_iterator(hash_table *p_hash_table,
- uint32_t hash_value) {
- assert(av1_hash_table_count(p_hash_table, hash_value) > 0);
- return aom_vector_begin(p_hash_table->p_lookup_table[hash_value]);
-}
-
-int32_t av1_has_exact_match(hash_table *p_hash_table, uint32_t hash_value1,
- uint32_t hash_value2) {
- if (p_hash_table->p_lookup_table[hash_value1] == NULL) {
- return 0;
- }
- Iterator iterator =
- aom_vector_begin(p_hash_table->p_lookup_table[hash_value1]);
- Iterator last = aom_vector_end(p_hash_table->p_lookup_table[hash_value1]);
- for (; !iterator_equals(&iterator, &last); iterator_increment(&iterator)) {
- if ((*(block_hash *)iterator_get(&iterator)).hash_value2 == hash_value2) {
- return 1;
- }
- }
- return 0;
-}
-
-void av1_generate_block_2x2_hash_value(const YV12_BUFFER_CONFIG *picture,
- uint32_t *pic_block_hash[2],
- int8_t *pic_block_same_info[3],
- MACROBLOCK *x) {
- const int width = 2;
- const int height = 2;
- const int x_end = picture->y_crop_width - width + 1;
- const int y_end = picture->y_crop_height - height + 1;
-
- const int length = width * 2;
- if (picture->flags & YV12_FLAG_HIGHBITDEPTH) {
- uint16_t p[4];
- int pos = 0;
- for (int y_pos = 0; y_pos < y_end; y_pos++) {
- for (int x_pos = 0; x_pos < x_end; x_pos++) {
- get_pixels_in_1D_short_array_by_block_2x2(
- CONVERT_TO_SHORTPTR(picture->y_buffer) + y_pos * picture->y_stride +
- x_pos,
- picture->y_stride, p);
- pic_block_same_info[0][pos] = is_block16_2x2_row_same_value(p);
- pic_block_same_info[1][pos] = is_block16_2x2_col_same_value(p);
-
- pic_block_hash[0][pos] = av1_get_crc_value(
- &x->crc_calculator1, (uint8_t *)p, length * sizeof(p[0]));
- pic_block_hash[1][pos] = av1_get_crc_value(
- &x->crc_calculator2, (uint8_t *)p, length * sizeof(p[0]));
- pos++;
- }
- pos += width - 1;
- }
- } else {
- uint8_t p[4];
- int pos = 0;
- for (int y_pos = 0; y_pos < y_end; y_pos++) {
- for (int x_pos = 0; x_pos < x_end; x_pos++) {
- get_pixels_in_1D_char_array_by_block_2x2(
- picture->y_buffer + y_pos * picture->y_stride + x_pos,
- picture->y_stride, p);
- pic_block_same_info[0][pos] = is_block_2x2_row_same_value(p);
- pic_block_same_info[1][pos] = is_block_2x2_col_same_value(p);
-
- pic_block_hash[0][pos] =
- av1_get_crc_value(&x->crc_calculator1, p, length * sizeof(p[0]));
- pic_block_hash[1][pos] =
- av1_get_crc_value(&x->crc_calculator2, p, length * sizeof(p[0]));
- pos++;
- }
- pos += width - 1;
- }
- }
-}
-
-void av1_generate_block_hash_value(const YV12_BUFFER_CONFIG *picture,
- int block_size,
- uint32_t *src_pic_block_hash[2],
- uint32_t *dst_pic_block_hash[2],
- int8_t *src_pic_block_same_info[3],
- int8_t *dst_pic_block_same_info[3],
- MACROBLOCK *x) {
- const int pic_width = picture->y_crop_width;
- const int x_end = picture->y_crop_width - block_size + 1;
- const int y_end = picture->y_crop_height - block_size + 1;
-
- const int src_size = block_size >> 1;
- const int quad_size = block_size >> 2;
-
- uint32_t p[4];
- const int length = sizeof(p);
-
- int pos = 0;
- for (int y_pos = 0; y_pos < y_end; y_pos++) {
- for (int x_pos = 0; x_pos < x_end; x_pos++) {
- p[0] = src_pic_block_hash[0][pos];
- p[1] = src_pic_block_hash[0][pos + src_size];
- p[2] = src_pic_block_hash[0][pos + src_size * pic_width];
- p[3] = src_pic_block_hash[0][pos + src_size * pic_width + src_size];
- dst_pic_block_hash[0][pos] =
- av1_get_crc_value(&x->crc_calculator1, (uint8_t *)p, length);
-
- p[0] = src_pic_block_hash[1][pos];
- p[1] = src_pic_block_hash[1][pos + src_size];
- p[2] = src_pic_block_hash[1][pos + src_size * pic_width];
- p[3] = src_pic_block_hash[1][pos + src_size * pic_width + src_size];
- dst_pic_block_hash[1][pos] =
- av1_get_crc_value(&x->crc_calculator2, (uint8_t *)p, length);
-
- dst_pic_block_same_info[0][pos] =
- src_pic_block_same_info[0][pos] &&
- src_pic_block_same_info[0][pos + quad_size] &&
- src_pic_block_same_info[0][pos + src_size] &&
- src_pic_block_same_info[0][pos + src_size * pic_width] &&
- src_pic_block_same_info[0][pos + src_size * pic_width + quad_size] &&
- src_pic_block_same_info[0][pos + src_size * pic_width + src_size];
-
- dst_pic_block_same_info[1][pos] =
- src_pic_block_same_info[1][pos] &&
- src_pic_block_same_info[1][pos + src_size] &&
- src_pic_block_same_info[1][pos + quad_size * pic_width] &&
- src_pic_block_same_info[1][pos + quad_size * pic_width + src_size] &&
- src_pic_block_same_info[1][pos + src_size * pic_width] &&
- src_pic_block_same_info[1][pos + src_size * pic_width + src_size];
- pos++;
- }
- pos += block_size - 1;
- }
-
- if (block_size >= 4) {
- const int size_minus_1 = block_size - 1;
- pos = 0;
- for (int y_pos = 0; y_pos < y_end; y_pos++) {
- for (int x_pos = 0; x_pos < x_end; x_pos++) {
- dst_pic_block_same_info[2][pos] =
- (!dst_pic_block_same_info[0][pos] &&
- !dst_pic_block_same_info[1][pos]) ||
- (((x_pos & size_minus_1) == 0) && ((y_pos & size_minus_1) == 0));
- pos++;
- }
- pos += block_size - 1;
- }
- }
-}
-
-void av1_add_to_hash_map_by_row_with_precal_data(hash_table *p_hash_table,
- uint32_t *pic_hash[2],
- int8_t *pic_is_same,
- int pic_width, int pic_height,
- int block_size) {
- const int x_end = pic_width - block_size + 1;
- const int y_end = pic_height - block_size + 1;
-
- const int8_t *src_is_added = pic_is_same;
- const uint32_t *src_hash[2] = { pic_hash[0], pic_hash[1] };
-
- int add_value = hash_block_size_to_index(block_size);
- assert(add_value >= 0);
- add_value <<= crc_bits;
- const int crc_mask = (1 << crc_bits) - 1;
-
- for (int x_pos = 0; x_pos < x_end; x_pos++) {
- for (int y_pos = 0; y_pos < y_end; y_pos++) {
- const int pos = y_pos * pic_width + x_pos;
- // valid data
- if (src_is_added[pos]) {
- block_hash curr_block_hash;
- curr_block_hash.x = x_pos;
- curr_block_hash.y = y_pos;
-
- const uint32_t hash_value1 = (src_hash[0][pos] & crc_mask) + add_value;
- curr_block_hash.hash_value2 = src_hash[1][pos];
-
- hash_table_add_to_table(p_hash_table, hash_value1, &curr_block_hash);
- }
- }
- }
-}
-
-int av1_hash_is_horizontal_perfect(const YV12_BUFFER_CONFIG *picture,
- int block_size, int x_start, int y_start) {
- const int stride = picture->y_stride;
- const uint8_t *p = picture->y_buffer + y_start * stride + x_start;
-
- if (picture->flags & YV12_FLAG_HIGHBITDEPTH) {
- const uint16_t *p16 = CONVERT_TO_SHORTPTR(p);
- for (int i = 0; i < block_size; i++) {
- for (int j = 1; j < block_size; j++) {
- if (p16[j] != p16[0]) {
- return 0;
- }
- }
- p16 += stride;
- }
- } else {
- for (int i = 0; i < block_size; i++) {
- for (int j = 1; j < block_size; j++) {
- if (p[j] != p[0]) {
- return 0;
- }
- }
- p += stride;
- }
- }
-
- return 1;
-}
-
-int av1_hash_is_vertical_perfect(const YV12_BUFFER_CONFIG *picture,
- int block_size, int x_start, int y_start) {
- const int stride = picture->y_stride;
- const uint8_t *p = picture->y_buffer + y_start * stride + x_start;
-
- if (picture->flags & YV12_FLAG_HIGHBITDEPTH) {
- const uint16_t *p16 = CONVERT_TO_SHORTPTR(p);
- for (int i = 0; i < block_size; i++) {
- for (int j = 1; j < block_size; j++) {
- if (p16[j * stride + i] != p16[i]) {
- return 0;
- }
- }
- }
- } else {
- for (int i = 0; i < block_size; i++) {
- for (int j = 1; j < block_size; j++) {
- if (p[j * stride + i] != p[i]) {
- return 0;
- }
- }
- }
- }
- return 1;
-}
-
-void av1_get_block_hash_value(uint8_t *y_src, int stride, int block_size,
- uint32_t *hash_value1, uint32_t *hash_value2,
- int use_highbitdepth, MACROBLOCK *x) {
- uint32_t to_hash[4];
- const int add_value = hash_block_size_to_index(block_size) << crc_bits;
- assert(add_value >= 0);
- const int crc_mask = (1 << crc_bits) - 1;
-
- // 2x2 subblock hash values in current CU
- int sub_block_in_width = (block_size >> 1);
- if (use_highbitdepth) {
- uint16_t pixel_to_hash[4];
- uint16_t *y16_src = CONVERT_TO_SHORTPTR(y_src);
- for (int y_pos = 0; y_pos < block_size; y_pos += 2) {
- for (int x_pos = 0; x_pos < block_size; x_pos += 2) {
- int pos = (y_pos >> 1) * sub_block_in_width + (x_pos >> 1);
- get_pixels_in_1D_short_array_by_block_2x2(
- y16_src + y_pos * stride + x_pos, stride, pixel_to_hash);
- assert(pos < AOM_BUFFER_SIZE_FOR_BLOCK_HASH);
- x->hash_value_buffer[0][0][pos] =
- av1_get_crc_value(&x->crc_calculator1, (uint8_t *)pixel_to_hash,
- sizeof(pixel_to_hash));
- x->hash_value_buffer[1][0][pos] =
- av1_get_crc_value(&x->crc_calculator2, (uint8_t *)pixel_to_hash,
- sizeof(pixel_to_hash));
- }
- }
- } else {
- uint8_t pixel_to_hash[4];
- for (int y_pos = 0; y_pos < block_size; y_pos += 2) {
- for (int x_pos = 0; x_pos < block_size; x_pos += 2) {
- int pos = (y_pos >> 1) * sub_block_in_width + (x_pos >> 1);
- get_pixels_in_1D_char_array_by_block_2x2(y_src + y_pos * stride + x_pos,
- stride, pixel_to_hash);
- assert(pos < AOM_BUFFER_SIZE_FOR_BLOCK_HASH);
- x->hash_value_buffer[0][0][pos] = av1_get_crc_value(
- &x->crc_calculator1, pixel_to_hash, sizeof(pixel_to_hash));
- x->hash_value_buffer[1][0][pos] = av1_get_crc_value(
- &x->crc_calculator2, pixel_to_hash, sizeof(pixel_to_hash));
- }
- }
- }
-
- int src_sub_block_in_width = sub_block_in_width;
- sub_block_in_width >>= 1;
-
- int src_idx = 1;
- int dst_idx = 0;
-
- // 4x4 subblock hash values to current block hash values
- for (int sub_width = 4; sub_width <= block_size; sub_width *= 2) {
- src_idx = 1 - src_idx;
- dst_idx = 1 - dst_idx;
-
- int dst_pos = 0;
- for (int y_pos = 0; y_pos < sub_block_in_width; y_pos++) {
- for (int x_pos = 0; x_pos < sub_block_in_width; x_pos++) {
- int srcPos = (y_pos << 1) * src_sub_block_in_width + (x_pos << 1);
-
- assert(srcPos + 1 < AOM_BUFFER_SIZE_FOR_BLOCK_HASH);
- assert(srcPos + src_sub_block_in_width + 1 <
- AOM_BUFFER_SIZE_FOR_BLOCK_HASH);
- assert(dst_pos < AOM_BUFFER_SIZE_FOR_BLOCK_HASH);
- to_hash[0] = x->hash_value_buffer[0][src_idx][srcPos];
- to_hash[1] = x->hash_value_buffer[0][src_idx][srcPos + 1];
- to_hash[2] =
- x->hash_value_buffer[0][src_idx][srcPos + src_sub_block_in_width];
- to_hash[3] = x->hash_value_buffer[0][src_idx]
- [srcPos + src_sub_block_in_width + 1];
-
- x->hash_value_buffer[0][dst_idx][dst_pos] = av1_get_crc_value(
- &x->crc_calculator1, (uint8_t *)to_hash, sizeof(to_hash));
-
- to_hash[0] = x->hash_value_buffer[1][src_idx][srcPos];
- to_hash[1] = x->hash_value_buffer[1][src_idx][srcPos + 1];
- to_hash[2] =
- x->hash_value_buffer[1][src_idx][srcPos + src_sub_block_in_width];
- to_hash[3] = x->hash_value_buffer[1][src_idx]
- [srcPos + src_sub_block_in_width + 1];
- x->hash_value_buffer[1][dst_idx][dst_pos] = av1_get_crc_value(
- &x->crc_calculator2, (uint8_t *)to_hash, sizeof(to_hash));
- dst_pos++;
- }
- }
-
- src_sub_block_in_width = sub_block_in_width;
- sub_block_in_width >>= 1;
- }
-
- *hash_value1 = (x->hash_value_buffer[0][dst_idx][0] & crc_mask) + add_value;
- *hash_value2 = x->hash_value_buffer[1][dst_idx][0];
-}
diff --git a/third_party/aom/av1/encoder/hash_motion.h b/third_party/aom/av1/encoder/hash_motion.h
deleted file mode 100644
index df3ec3215..000000000
--- a/third_party/aom/av1/encoder/hash_motion.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_HASH_MOTION_H_
-#define AOM_AV1_ENCODER_HASH_MOTION_H_
-
-#include "config/aom_config.h"
-
-#include "aom/aom_integer.h"
-#include "aom_scale/yv12config.h"
-#include "third_party/vector/vector.h"
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// store a block's hash info.
-// x and y are the position from the top left of the picture
-// hash_value2 is used to store the second hash value
-typedef struct _block_hash {
- int16_t x;
- int16_t y;
- uint32_t hash_value2;
-} block_hash;
-
-typedef struct _hash_table {
- Vector **p_lookup_table;
-} hash_table;
-
-void av1_hash_table_init(hash_table *p_hash_table, struct macroblock *x);
-void av1_hash_table_destroy(hash_table *p_hash_table);
-void av1_hash_table_create(hash_table *p_hash_table);
-int32_t av1_hash_table_count(hash_table *p_hash_table, uint32_t hash_value);
-Iterator av1_hash_get_first_iterator(hash_table *p_hash_table,
- uint32_t hash_value);
-int32_t av1_has_exact_match(hash_table *p_hash_table, uint32_t hash_value1,
- uint32_t hash_value2);
-void av1_generate_block_2x2_hash_value(const YV12_BUFFER_CONFIG *picture,
- uint32_t *pic_block_hash[2],
- int8_t *pic_block_same_info[3],
- struct macroblock *x);
-void av1_generate_block_hash_value(const YV12_BUFFER_CONFIG *picture,
- int block_size,
- uint32_t *src_pic_block_hash[2],
- uint32_t *dst_pic_block_hash[2],
- int8_t *src_pic_block_same_info[3],
- int8_t *dst_pic_block_same_info[3],
- struct macroblock *x);
-void av1_add_to_hash_map_by_row_with_precal_data(hash_table *p_hash_table,
- uint32_t *pic_hash[2],
- int8_t *pic_is_same,
- int pic_width, int pic_height,
- int block_size);
-
-// check whether the block starts from (x_start, y_start) with the size of
-// block_size x block_size has the same color in all rows
-int av1_hash_is_horizontal_perfect(const YV12_BUFFER_CONFIG *picture,
- int block_size, int x_start, int y_start);
-// check whether the block starts from (x_start, y_start) with the size of
-// block_size x block_size has the same color in all columns
-int av1_hash_is_vertical_perfect(const YV12_BUFFER_CONFIG *picture,
- int block_size, int x_start, int y_start);
-void av1_get_block_hash_value(uint8_t *y_src, int stride, int block_size,
- uint32_t *hash_value1, uint32_t *hash_value2,
- int use_highbitdepth, struct macroblock *x);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_ENCODER_HASH_MOTION_H_
diff --git a/third_party/aom/av1/encoder/hybrid_fwd_txfm.c b/third_party/aom/av1/encoder/hybrid_fwd_txfm.c
deleted file mode 100644
index 67898fd18..000000000
--- a/third_party/aom/av1/encoder/hybrid_fwd_txfm.c
+++ /dev/null
@@ -1,390 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "config/aom_config.h"
-#include "config/av1_rtcd.h"
-#include "config/aom_dsp_rtcd.h"
-
-#include "av1/common/idct.h"
-#include "av1/encoder/hybrid_fwd_txfm.h"
-
-/* 4-point reversible, orthonormal Walsh-Hadamard in 3.5 adds, 0.5 shifts per
- pixel. */
-void av1_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride) {
- int i;
- tran_high_t a1, b1, c1, d1, e1;
- const int16_t *ip_pass0 = input;
- const tran_low_t *ip = NULL;
- tran_low_t *op = output;
-
- for (i = 0; i < 4; i++) {
- a1 = ip_pass0[0 * stride];
- b1 = ip_pass0[1 * stride];
- c1 = ip_pass0[2 * stride];
- d1 = ip_pass0[3 * stride];
-
- a1 += b1;
- d1 = d1 - c1;
- e1 = (a1 - d1) >> 1;
- b1 = e1 - b1;
- c1 = e1 - c1;
- a1 -= c1;
- d1 += b1;
- op[0] = (tran_low_t)a1;
- op[4] = (tran_low_t)c1;
- op[8] = (tran_low_t)d1;
- op[12] = (tran_low_t)b1;
-
- ip_pass0++;
- op++;
- }
- ip = output;
- op = output;
-
- for (i = 0; i < 4; i++) {
- a1 = ip[0];
- b1 = ip[1];
- c1 = ip[2];
- d1 = ip[3];
-
- a1 += b1;
- d1 -= c1;
- e1 = (a1 - d1) >> 1;
- b1 = e1 - b1;
- c1 = e1 - c1;
- a1 -= c1;
- d1 += b1;
- op[0] = (tran_low_t)(a1 * UNIT_QUANT_FACTOR);
- op[1] = (tran_low_t)(c1 * UNIT_QUANT_FACTOR);
- op[2] = (tran_low_t)(d1 * UNIT_QUANT_FACTOR);
- op[3] = (tran_low_t)(b1 * UNIT_QUANT_FACTOR);
-
- ip += 4;
- op += 4;
- }
-}
-
-void av1_highbd_fwht4x4_c(const int16_t *input, tran_low_t *output,
- int stride) {
- av1_fwht4x4_c(input, output, stride);
-}
-
-static void highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TxfmParam *txfm_param) {
- int32_t *dst_coeff = (int32_t *)coeff;
- const TX_TYPE tx_type = txfm_param->tx_type;
- const int bd = txfm_param->bd;
- if (txfm_param->lossless) {
- assert(tx_type == DCT_DCT);
- av1_highbd_fwht4x4(src_diff, coeff, diff_stride);
- return;
- }
- switch (tx_type) {
- // use the c version for anything including identity for now
- case V_DCT:
- case H_DCT:
- case V_ADST:
- case H_ADST:
- case V_FLIPADST:
- case H_FLIPADST:
- case IDTX:
- av1_fwd_txfm2d_4x4_c(src_diff, dst_coeff, diff_stride, tx_type, bd);
- break;
- default:
- av1_fwd_txfm2d_4x4(src_diff, dst_coeff, diff_stride, tx_type, bd);
- break;
- }
-}
-
-static void highbd_fwd_txfm_4x8(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TxfmParam *txfm_param) {
- int32_t *dst_coeff = (int32_t *)coeff;
- av1_fwd_txfm2d_4x8_c(src_diff, dst_coeff, diff_stride, txfm_param->tx_type,
- txfm_param->bd);
-}
-
-static void highbd_fwd_txfm_8x4(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TxfmParam *txfm_param) {
- int32_t *dst_coeff = (int32_t *)coeff;
- av1_fwd_txfm2d_8x4_c(src_diff, dst_coeff, diff_stride, txfm_param->tx_type,
- txfm_param->bd);
-}
-
-static void highbd_fwd_txfm_8x16(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TxfmParam *txfm_param) {
- int32_t *dst_coeff = (int32_t *)coeff;
- const TX_TYPE tx_type = txfm_param->tx_type;
- const int bd = txfm_param->bd;
- switch (tx_type) {
- // use the c version for anything including identity for now
- case V_DCT:
- case H_DCT:
- case V_ADST:
- case H_ADST:
- case V_FLIPADST:
- case H_FLIPADST:
- case IDTX:
- av1_fwd_txfm2d_8x16_c(src_diff, dst_coeff, diff_stride, tx_type, bd);
- break;
- default:
- av1_fwd_txfm2d_8x16(src_diff, dst_coeff, diff_stride, tx_type, bd);
- break;
- }
-}
-
-static void highbd_fwd_txfm_16x8(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TxfmParam *txfm_param) {
- int32_t *dst_coeff = (int32_t *)coeff;
- const TX_TYPE tx_type = txfm_param->tx_type;
- const int bd = txfm_param->bd;
- switch (tx_type) {
- // use the c version for anything including identity for now
- case V_DCT:
- case H_DCT:
- case V_ADST:
- case H_ADST:
- case V_FLIPADST:
- case H_FLIPADST:
- case IDTX:
- av1_fwd_txfm2d_16x8_c(src_diff, dst_coeff, diff_stride, tx_type, bd);
- break;
- default:
- av1_fwd_txfm2d_16x8(src_diff, dst_coeff, diff_stride, tx_type, bd);
- break;
- }
-}
-
-static void highbd_fwd_txfm_16x32(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TxfmParam *txfm_param) {
- int32_t *dst_coeff = (int32_t *)coeff;
- av1_fwd_txfm2d_16x32_c(src_diff, dst_coeff, diff_stride, txfm_param->tx_type,
- txfm_param->bd);
-}
-
-static void highbd_fwd_txfm_32x16(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TxfmParam *txfm_param) {
- int32_t *dst_coeff = (int32_t *)coeff;
- av1_fwd_txfm2d_32x16_c(src_diff, dst_coeff, diff_stride, txfm_param->tx_type,
- txfm_param->bd);
-}
-
-static void highbd_fwd_txfm_16x4(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TxfmParam *txfm_param) {
- int32_t *dst_coeff = (int32_t *)coeff;
- av1_fwd_txfm2d_16x4_c(src_diff, dst_coeff, diff_stride, txfm_param->tx_type,
- txfm_param->bd);
-}
-
-static void highbd_fwd_txfm_4x16(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TxfmParam *txfm_param) {
- int32_t *dst_coeff = (int32_t *)coeff;
- av1_fwd_txfm2d_4x16_c(src_diff, dst_coeff, diff_stride, txfm_param->tx_type,
- txfm_param->bd);
-}
-
-static void highbd_fwd_txfm_32x8(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TxfmParam *txfm_param) {
- int32_t *dst_coeff = (int32_t *)coeff;
- av1_fwd_txfm2d_32x8_c(src_diff, dst_coeff, diff_stride, txfm_param->tx_type,
- txfm_param->bd);
-}
-
-static void highbd_fwd_txfm_8x32(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TxfmParam *txfm_param) {
- int32_t *dst_coeff = (int32_t *)coeff;
- av1_fwd_txfm2d_8x32_c(src_diff, dst_coeff, diff_stride, txfm_param->tx_type,
- txfm_param->bd);
-}
-
-static void highbd_fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TxfmParam *txfm_param) {
- int32_t *dst_coeff = (int32_t *)coeff;
- const TX_TYPE tx_type = txfm_param->tx_type;
- const int bd = txfm_param->bd;
- switch (tx_type) {
- // use the c version for anything including identity for now
- case V_DCT:
- case H_DCT:
- case V_ADST:
- case H_ADST:
- case V_FLIPADST:
- case H_FLIPADST:
- case IDTX:
- av1_fwd_txfm2d_8x8_c(src_diff, dst_coeff, diff_stride, tx_type, bd);
- break;
- default:
- av1_fwd_txfm2d_8x8(src_diff, dst_coeff, diff_stride, tx_type, bd);
- break;
- }
-}
-
-static void highbd_fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TxfmParam *txfm_param) {
- int32_t *dst_coeff = (int32_t *)coeff;
- const TX_TYPE tx_type = txfm_param->tx_type;
- const int bd = txfm_param->bd;
- switch (tx_type) {
- // use the c version for anything including identity for now
- case V_DCT:
- case H_DCT:
- case V_ADST:
- case H_ADST:
- case V_FLIPADST:
- case H_FLIPADST:
- case IDTX:
- av1_fwd_txfm2d_16x16_c(src_diff, dst_coeff, diff_stride, tx_type, bd);
- break;
- default:
- av1_fwd_txfm2d_16x16(src_diff, dst_coeff, diff_stride, tx_type, bd);
- break;
- }
-}
-
-static void highbd_fwd_txfm_32x32(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TxfmParam *txfm_param) {
- int32_t *dst_coeff = (int32_t *)coeff;
- const TX_TYPE tx_type = txfm_param->tx_type;
- const int bd = txfm_param->bd;
- switch (tx_type) {
- // use the c version for anything including identity for now
- case V_DCT:
- case H_DCT:
- case V_ADST:
- case H_ADST:
- case V_FLIPADST:
- case H_FLIPADST:
- case IDTX:
- av1_fwd_txfm2d_32x32_c(src_diff, dst_coeff, diff_stride, tx_type, bd);
- break;
- default:
- av1_fwd_txfm2d_32x32(src_diff, dst_coeff, diff_stride, tx_type, bd);
- break;
- }
-}
-
-static void highbd_fwd_txfm_32x64(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TxfmParam *txfm_param) {
- assert(txfm_param->tx_type == DCT_DCT);
- int32_t *dst_coeff = (int32_t *)coeff;
- const int bd = txfm_param->bd;
- av1_fwd_txfm2d_32x64_c(src_diff, dst_coeff, diff_stride, DCT_DCT, bd);
-}
-
-static void highbd_fwd_txfm_64x32(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TxfmParam *txfm_param) {
- assert(txfm_param->tx_type == DCT_DCT);
- int32_t *dst_coeff = (int32_t *)coeff;
- const int bd = txfm_param->bd;
- av1_fwd_txfm2d_64x32_c(src_diff, dst_coeff, diff_stride, DCT_DCT, bd);
-}
-
-static void highbd_fwd_txfm_16x64(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TxfmParam *txfm_param) {
- assert(txfm_param->tx_type == DCT_DCT);
- int32_t *dst_coeff = (int32_t *)coeff;
- const int bd = txfm_param->bd;
- av1_fwd_txfm2d_16x64_c(src_diff, dst_coeff, diff_stride, DCT_DCT, bd);
-}
-
-static void highbd_fwd_txfm_64x16(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TxfmParam *txfm_param) {
- assert(txfm_param->tx_type == DCT_DCT);
- int32_t *dst_coeff = (int32_t *)coeff;
- const int bd = txfm_param->bd;
- av1_fwd_txfm2d_64x16_c(src_diff, dst_coeff, diff_stride, DCT_DCT, bd);
-}
-
-static void highbd_fwd_txfm_64x64(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TxfmParam *txfm_param) {
- assert(txfm_param->tx_type == DCT_DCT);
- int32_t *dst_coeff = (int32_t *)coeff;
- const int bd = txfm_param->bd;
- av1_fwd_txfm2d_64x64(src_diff, dst_coeff, diff_stride, DCT_DCT, bd);
-}
-
-void av1_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, int diff_stride,
- TxfmParam *txfm_param) {
- if (txfm_param->bd == 8)
- av1_lowbd_fwd_txfm(src_diff, coeff, diff_stride, txfm_param);
- else
- av1_highbd_fwd_txfm(src_diff, coeff, diff_stride, txfm_param);
-}
-
-void av1_lowbd_fwd_txfm_c(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TxfmParam *txfm_param) {
- av1_highbd_fwd_txfm(src_diff, coeff, diff_stride, txfm_param);
-}
-
-void av1_highbd_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TxfmParam *txfm_param) {
- assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]);
- const TX_SIZE tx_size = txfm_param->tx_size;
- switch (tx_size) {
- case TX_64X64:
- highbd_fwd_txfm_64x64(src_diff, coeff, diff_stride, txfm_param);
- break;
- case TX_32X64:
- highbd_fwd_txfm_32x64(src_diff, coeff, diff_stride, txfm_param);
- break;
- case TX_64X32:
- highbd_fwd_txfm_64x32(src_diff, coeff, diff_stride, txfm_param);
- break;
- case TX_16X64:
- highbd_fwd_txfm_16x64(src_diff, coeff, diff_stride, txfm_param);
- break;
- case TX_64X16:
- highbd_fwd_txfm_64x16(src_diff, coeff, diff_stride, txfm_param);
- break;
- case TX_32X32:
- highbd_fwd_txfm_32x32(src_diff, coeff, diff_stride, txfm_param);
- break;
- case TX_16X16:
- highbd_fwd_txfm_16x16(src_diff, coeff, diff_stride, txfm_param);
- break;
- case TX_8X8:
- highbd_fwd_txfm_8x8(src_diff, coeff, diff_stride, txfm_param);
- break;
- case TX_4X8:
- highbd_fwd_txfm_4x8(src_diff, coeff, diff_stride, txfm_param);
- break;
- case TX_8X4:
- highbd_fwd_txfm_8x4(src_diff, coeff, diff_stride, txfm_param);
- break;
- case TX_8X16:
- highbd_fwd_txfm_8x16(src_diff, coeff, diff_stride, txfm_param);
- break;
- case TX_16X8:
- highbd_fwd_txfm_16x8(src_diff, coeff, diff_stride, txfm_param);
- break;
- case TX_16X32:
- highbd_fwd_txfm_16x32(src_diff, coeff, diff_stride, txfm_param);
- break;
- case TX_32X16:
- highbd_fwd_txfm_32x16(src_diff, coeff, diff_stride, txfm_param);
- break;
- case TX_4X4:
- highbd_fwd_txfm_4x4(src_diff, coeff, diff_stride, txfm_param);
- break;
- case TX_4X16:
- highbd_fwd_txfm_4x16(src_diff, coeff, diff_stride, txfm_param);
- break;
- case TX_16X4:
- highbd_fwd_txfm_16x4(src_diff, coeff, diff_stride, txfm_param);
- break;
- case TX_8X32:
- highbd_fwd_txfm_8x32(src_diff, coeff, diff_stride, txfm_param);
- break;
- case TX_32X8:
- highbd_fwd_txfm_32x8(src_diff, coeff, diff_stride, txfm_param);
- break;
- default: assert(0); break;
- }
-}
diff --git a/third_party/aom/av1/encoder/hybrid_fwd_txfm.h b/third_party/aom/av1/encoder/hybrid_fwd_txfm.h
deleted file mode 100644
index daabc7119..000000000
--- a/third_party/aom/av1/encoder/hybrid_fwd_txfm.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_HYBRID_FWD_TXFM_H_
-#define AOM_AV1_ENCODER_HYBRID_FWD_TXFM_H_
-
-#include "config/aom_config.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void av1_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, int diff_stride,
- TxfmParam *txfm_param);
-
-void av1_highbd_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TxfmParam *txfm_param);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_ENCODER_HYBRID_FWD_TXFM_H_
diff --git a/third_party/aom/av1/encoder/k_means_template.h b/third_party/aom/av1/encoder/k_means_template.h
deleted file mode 100644
index 9e526b88b..000000000
--- a/third_party/aom/av1/encoder/k_means_template.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <stdint.h>
-#include <string.h>
-
-#include "av1/encoder/palette.h"
-#include "av1/encoder/random.h"
-
-#ifndef AV1_K_MEANS_DIM
-#error "This template requires AV1_K_MEANS_DIM to be defined"
-#endif
-
-#define RENAME_(x, y) AV1_K_MEANS_RENAME(x, y)
-#define RENAME(x) RENAME_(x, AV1_K_MEANS_DIM)
-
-static int RENAME(calc_dist)(const int *p1, const int *p2) {
- int dist = 0;
- for (int i = 0; i < AV1_K_MEANS_DIM; ++i) {
- const int diff = p1[i] - p2[i];
- dist += diff * diff;
- }
- return dist;
-}
-
-void RENAME(av1_calc_indices)(const int *data, const int *centroids,
- uint8_t *indices, int n, int k) {
- for (int i = 0; i < n; ++i) {
- int min_dist = RENAME(calc_dist)(data + i * AV1_K_MEANS_DIM, centroids);
- indices[i] = 0;
- for (int j = 1; j < k; ++j) {
- const int this_dist = RENAME(calc_dist)(data + i * AV1_K_MEANS_DIM,
- centroids + j * AV1_K_MEANS_DIM);
- if (this_dist < min_dist) {
- min_dist = this_dist;
- indices[i] = j;
- }
- }
- }
-}
-
-static void RENAME(calc_centroids)(const int *data, int *centroids,
- const uint8_t *indices, int n, int k) {
- int i, j;
- int count[PALETTE_MAX_SIZE] = { 0 };
- unsigned int rand_state = (unsigned int)data[0];
- assert(n <= 32768);
- memset(centroids, 0, sizeof(centroids[0]) * k * AV1_K_MEANS_DIM);
-
- for (i = 0; i < n; ++i) {
- const int index = indices[i];
- assert(index < k);
- ++count[index];
- for (j = 0; j < AV1_K_MEANS_DIM; ++j) {
- centroids[index * AV1_K_MEANS_DIM + j] += data[i * AV1_K_MEANS_DIM + j];
- }
- }
-
- for (i = 0; i < k; ++i) {
- if (count[i] == 0) {
- memcpy(centroids + i * AV1_K_MEANS_DIM,
- data + (lcg_rand16(&rand_state) % n) * AV1_K_MEANS_DIM,
- sizeof(centroids[0]) * AV1_K_MEANS_DIM);
- } else {
- for (j = 0; j < AV1_K_MEANS_DIM; ++j) {
- centroids[i * AV1_K_MEANS_DIM + j] =
- DIVIDE_AND_ROUND(centroids[i * AV1_K_MEANS_DIM + j], count[i]);
- }
- }
- }
-}
-
-static int64_t RENAME(calc_total_dist)(const int *data, const int *centroids,
- const uint8_t *indices, int n, int k) {
- int64_t dist = 0;
- (void)k;
- for (int i = 0; i < n; ++i) {
- dist += RENAME(calc_dist)(data + i * AV1_K_MEANS_DIM,
- centroids + indices[i] * AV1_K_MEANS_DIM);
- }
- return dist;
-}
-
-void RENAME(av1_k_means)(const int *data, int *centroids, uint8_t *indices,
- int n, int k, int max_itr) {
- int pre_centroids[2 * PALETTE_MAX_SIZE];
- uint8_t pre_indices[MAX_SB_SQUARE];
-
- RENAME(av1_calc_indices)(data, centroids, indices, n, k);
- int64_t this_dist = RENAME(calc_total_dist)(data, centroids, indices, n, k);
-
- for (int i = 0; i < max_itr; ++i) {
- const int64_t pre_dist = this_dist;
- memcpy(pre_centroids, centroids,
- sizeof(pre_centroids[0]) * k * AV1_K_MEANS_DIM);
- memcpy(pre_indices, indices, sizeof(pre_indices[0]) * n);
-
- RENAME(calc_centroids)(data, centroids, indices, n, k);
- RENAME(av1_calc_indices)(data, centroids, indices, n, k);
- this_dist = RENAME(calc_total_dist)(data, centroids, indices, n, k);
-
- if (this_dist > pre_dist) {
- memcpy(centroids, pre_centroids,
- sizeof(pre_centroids[0]) * k * AV1_K_MEANS_DIM);
- memcpy(indices, pre_indices, sizeof(pre_indices[0]) * n);
- break;
- }
- if (!memcmp(centroids, pre_centroids,
- sizeof(pre_centroids[0]) * k * AV1_K_MEANS_DIM))
- break;
- }
-}
-#undef RENAME_
-#undef RENAME
diff --git a/third_party/aom/av1/encoder/lookahead.c b/third_party/aom/av1/encoder/lookahead.c
deleted file mode 100644
index 1bf8ecbac..000000000
--- a/third_party/aom/av1/encoder/lookahead.c
+++ /dev/null
@@ -1,210 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#include <assert.h>
-#include <stdlib.h>
-
-#include "config/aom_config.h"
-
-#include "av1/common/common.h"
-#include "av1/encoder/encoder.h"
-#include "av1/encoder/extend.h"
-#include "av1/encoder/lookahead.h"
-
-/* Return the buffer at the given absolute index and increment the index */
-static struct lookahead_entry *pop(struct lookahead_ctx *ctx, int *idx) {
- int index = *idx;
- struct lookahead_entry *buf = ctx->buf + index;
-
- assert(index < ctx->max_sz);
- if (++index >= ctx->max_sz) index -= ctx->max_sz;
- *idx = index;
- return buf;
-}
-
-void av1_lookahead_destroy(struct lookahead_ctx *ctx) {
- if (ctx) {
- if (ctx->buf) {
- int i;
-
- for (i = 0; i < ctx->max_sz; i++) aom_free_frame_buffer(&ctx->buf[i].img);
- free(ctx->buf);
- }
- free(ctx);
- }
-}
-
-struct lookahead_ctx *av1_lookahead_init(
- unsigned int width, unsigned int height, unsigned int subsampling_x,
- unsigned int subsampling_y, int use_highbitdepth, unsigned int depth) {
- struct lookahead_ctx *ctx = NULL;
-
- // Clamp the lookahead queue depth
- depth = clamp(depth, 1, MAX_LAG_BUFFERS);
-
- // Allocate memory to keep previous source frames available.
- depth += MAX_PRE_FRAMES;
-
- // Allocate the lookahead structures
- ctx = calloc(1, sizeof(*ctx));
- if (ctx) {
- const int legacy_byte_alignment = 0;
- unsigned int i;
- ctx->max_sz = depth;
- ctx->buf = calloc(depth, sizeof(*ctx->buf));
- if (!ctx->buf) goto bail;
- for (i = 0; i < depth; i++)
- if (aom_alloc_frame_buffer(&ctx->buf[i].img, width, height, subsampling_x,
- subsampling_y, use_highbitdepth,
- AOM_BORDER_IN_PIXELS, legacy_byte_alignment))
- goto bail;
- }
- return ctx;
-bail:
- av1_lookahead_destroy(ctx);
- return NULL;
-}
-
-#define USE_PARTIAL_COPY 0
-
-int av1_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src,
- int64_t ts_start, int64_t ts_end, int use_highbitdepth,
- aom_enc_frame_flags_t flags) {
- struct lookahead_entry *buf;
-#if USE_PARTIAL_COPY
- int row, col, active_end;
- int mb_rows = (src->y_height + 15) >> 4;
- int mb_cols = (src->y_width + 15) >> 4;
-#endif
- int width = src->y_crop_width;
- int height = src->y_crop_height;
- int uv_width = src->uv_crop_width;
- int uv_height = src->uv_crop_height;
- int subsampling_x = src->subsampling_x;
- int subsampling_y = src->subsampling_y;
- int larger_dimensions, new_dimensions;
-
- if (ctx->sz + 1 + MAX_PRE_FRAMES > ctx->max_sz) return 1;
- ctx->sz++;
- buf = pop(ctx, &ctx->write_idx);
-
- new_dimensions = width != buf->img.y_crop_width ||
- height != buf->img.y_crop_height ||
- uv_width != buf->img.uv_crop_width ||
- uv_height != buf->img.uv_crop_height;
- larger_dimensions = width > buf->img.y_width || height > buf->img.y_height ||
- uv_width > buf->img.uv_width ||
- uv_height > buf->img.uv_height;
- assert(!larger_dimensions || new_dimensions);
-
-#if USE_PARTIAL_COPY
- // TODO(jkoleszar): This is disabled for now, as
- // av1_copy_and_extend_frame_with_rect is not subsampling/alpha aware.
-
- // Only do this partial copy if the following conditions are all met:
- // 1. Lookahead queue has has size of 1.
- // 2. Active map is provided.
- // 3. This is not a key frame, golden nor altref frame.
- if (!new_dimensions && ctx->max_sz == 1 && active_map && !flags) {
- for (row = 0; row < mb_rows; ++row) {
- col = 0;
-
- while (1) {
- // Find the first active macroblock in this row.
- for (; col < mb_cols; ++col) {
- if (active_map[col]) break;
- }
-
- // No more active macroblock in this row.
- if (col == mb_cols) break;
-
- // Find the end of active region in this row.
- active_end = col;
-
- for (; active_end < mb_cols; ++active_end) {
- if (!active_map[active_end]) break;
- }
-
- // Only copy this active region.
- av1_copy_and_extend_frame_with_rect(src, &buf->img, row << 4, col << 4,
- 16, (active_end - col) << 4);
-
- // Start again from the end of this active region.
- col = active_end;
- }
-
- active_map += mb_cols;
- }
- } else {
-#endif
- if (larger_dimensions) {
- YV12_BUFFER_CONFIG new_img;
- memset(&new_img, 0, sizeof(new_img));
- if (aom_alloc_frame_buffer(&new_img, width, height, subsampling_x,
- subsampling_y, use_highbitdepth,
- AOM_BORDER_IN_PIXELS, 0))
- return 1;
- aom_free_frame_buffer(&buf->img);
- buf->img = new_img;
- } else if (new_dimensions) {
- buf->img.y_crop_width = src->y_crop_width;
- buf->img.y_crop_height = src->y_crop_height;
- buf->img.uv_crop_width = src->uv_crop_width;
- buf->img.uv_crop_height = src->uv_crop_height;
- buf->img.subsampling_x = src->subsampling_x;
- buf->img.subsampling_y = src->subsampling_y;
- }
- // Partial copy not implemented yet
- av1_copy_and_extend_frame(src, &buf->img);
-#if USE_PARTIAL_COPY
- }
-#endif
-
- buf->ts_start = ts_start;
- buf->ts_end = ts_end;
- buf->flags = flags;
- return 0;
-}
-
-struct lookahead_entry *av1_lookahead_pop(struct lookahead_ctx *ctx,
- int drain) {
- struct lookahead_entry *buf = NULL;
-
- if (ctx && ctx->sz && (drain || ctx->sz == ctx->max_sz - MAX_PRE_FRAMES)) {
- buf = pop(ctx, &ctx->read_idx);
- ctx->sz--;
- }
- return buf;
-}
-
-struct lookahead_entry *av1_lookahead_peek(struct lookahead_ctx *ctx,
- int index) {
- struct lookahead_entry *buf = NULL;
-
- if (index >= 0) {
- // Forward peek
- if (index < ctx->sz) {
- index += ctx->read_idx;
- if (index >= ctx->max_sz) index -= ctx->max_sz;
- buf = ctx->buf + index;
- }
- } else if (index < 0) {
- // Backward peek
- if (-index <= MAX_PRE_FRAMES) {
- index += (int)(ctx->read_idx);
- if (index < 0) index += (int)(ctx->max_sz);
- buf = ctx->buf + index;
- }
- }
-
- return buf;
-}
-
-unsigned int av1_lookahead_depth(struct lookahead_ctx *ctx) { return ctx->sz; }
diff --git a/third_party/aom/av1/encoder/lookahead.h b/third_party/aom/av1/encoder/lookahead.h
deleted file mode 100644
index e55224cf7..000000000
--- a/third_party/aom/av1/encoder/lookahead.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_LOOKAHEAD_H_
-#define AOM_AV1_ENCODER_LOOKAHEAD_H_
-
-#include "aom_scale/yv12config.h"
-#include "aom/aom_integer.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define MAX_LAG_BUFFERS 25
-
-struct lookahead_entry {
- YV12_BUFFER_CONFIG img;
- int64_t ts_start;
- int64_t ts_end;
- aom_enc_frame_flags_t flags;
-};
-
-// The max of past frames we want to keep in the queue.
-#define MAX_PRE_FRAMES 1
-
-struct lookahead_ctx {
- int max_sz; /* Absolute size of the queue */
- int sz; /* Number of buffers currently in the queue */
- int read_idx; /* Read index */
- int write_idx; /* Write index */
- struct lookahead_entry *buf; /* Buffer list */
-};
-
-/**\brief Initializes the lookahead stage
- *
- * The lookahead stage is a queue of frame buffers on which some analysis
- * may be done when buffers are enqueued.
- */
-struct lookahead_ctx *av1_lookahead_init(
- unsigned int width, unsigned int height, unsigned int subsampling_x,
- unsigned int subsampling_y, int use_highbitdepth, unsigned int depth);
-
-/**\brief Destroys the lookahead stage
- */
-void av1_lookahead_destroy(struct lookahead_ctx *ctx);
-
-/**\brief Enqueue a source buffer
- *
- * This function will copy the source image into a new framebuffer with
- * the expected stride/border.
- *
- * If active_map is non-NULL and there is only one frame in the queue, then copy
- * only active macroblocks.
- *
- * \param[in] ctx Pointer to the lookahead context
- * \param[in] src Pointer to the image to enqueue
- * \param[in] ts_start Timestamp for the start of this frame
- * \param[in] ts_end Timestamp for the end of this frame
- * \param[in] flags Flags set on this frame
- * \param[in] active_map Map that specifies which macroblock is active
- */
-int av1_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src,
- int64_t ts_start, int64_t ts_end, int use_highbitdepth,
- aom_enc_frame_flags_t flags);
-
-/**\brief Get the next source buffer to encode
- *
- *
- * \param[in] ctx Pointer to the lookahead context
- * \param[in] drain Flag indicating the buffer should be drained
- * (return a buffer regardless of the current queue depth)
- *
- * \retval NULL, if drain set and queue is empty
- * \retval NULL, if drain not set and queue not of the configured depth
- */
-struct lookahead_entry *av1_lookahead_pop(struct lookahead_ctx *ctx, int drain);
-
-/**\brief Get a future source buffer to encode
- *
- * \param[in] ctx Pointer to the lookahead context
- * \param[in] index Index of the frame to be returned, 0 == next frame
- *
- * \retval NULL, if no buffer exists at the specified index
- */
-struct lookahead_entry *av1_lookahead_peek(struct lookahead_ctx *ctx,
- int index);
-
-/**\brief Get the number of frames currently in the lookahead queue
- *
- * \param[in] ctx Pointer to the lookahead context
- */
-unsigned int av1_lookahead_depth(struct lookahead_ctx *ctx);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_ENCODER_LOOKAHEAD_H_
diff --git a/third_party/aom/av1/encoder/mathutils.h b/third_party/aom/av1/encoder/mathutils.h
deleted file mode 100644
index 64f936176..000000000
--- a/third_party/aom/av1/encoder/mathutils.h
+++ /dev/null
@@ -1,359 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_MATHUTILS_H_
-#define AOM_AV1_ENCODER_MATHUTILS_H_
-
-#include <memory.h>
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <assert.h>
-
-static const double TINY_NEAR_ZERO = 1.0E-16;
-
-// Solves Ax = b, where x and b are column vectors of size nx1 and A is nxn
-static INLINE int linsolve(int n, double *A, int stride, double *b, double *x) {
- int i, j, k;
- double c;
- // Forward elimination
- for (k = 0; k < n - 1; k++) {
- // Bring the largest magnitude to the diagonal position
- for (i = n - 1; i > k; i--) {
- if (fabs(A[(i - 1) * stride + k]) < fabs(A[i * stride + k])) {
- for (j = 0; j < n; j++) {
- c = A[i * stride + j];
- A[i * stride + j] = A[(i - 1) * stride + j];
- A[(i - 1) * stride + j] = c;
- }
- c = b[i];
- b[i] = b[i - 1];
- b[i - 1] = c;
- }
- }
- for (i = k; i < n - 1; i++) {
- if (fabs(A[k * stride + k]) < TINY_NEAR_ZERO) return 0;
- c = A[(i + 1) * stride + k] / A[k * stride + k];
- for (j = 0; j < n; j++) A[(i + 1) * stride + j] -= c * A[k * stride + j];
- b[i + 1] -= c * b[k];
- }
- }
- // Backward substitution
- for (i = n - 1; i >= 0; i--) {
- if (fabs(A[i * stride + i]) < TINY_NEAR_ZERO) return 0;
- c = 0;
- for (j = i + 1; j <= n - 1; j++) c += A[i * stride + j] * x[j];
- x[i] = (b[i] - c) / A[i * stride + i];
- }
-
- return 1;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// Least-squares
-// Solves for n-dim x in a least squares sense to minimize |Ax - b|^2
-// The solution is simply x = (A'A)^-1 A'b or simply the solution for
-// the system: A'A x = A'b
-static INLINE int least_squares(int n, double *A, int rows, int stride,
- double *b, double *scratch, double *x) {
- int i, j, k;
- double *scratch_ = NULL;
- double *AtA, *Atb;
- if (!scratch) {
- scratch_ = (double *)aom_malloc(sizeof(*scratch) * n * (n + 1));
- scratch = scratch_;
- }
- AtA = scratch;
- Atb = scratch + n * n;
-
- for (i = 0; i < n; ++i) {
- for (j = i; j < n; ++j) {
- AtA[i * n + j] = 0.0;
- for (k = 0; k < rows; ++k)
- AtA[i * n + j] += A[k * stride + i] * A[k * stride + j];
- AtA[j * n + i] = AtA[i * n + j];
- }
- Atb[i] = 0;
- for (k = 0; k < rows; ++k) Atb[i] += A[k * stride + i] * b[k];
- }
- int ret = linsolve(n, AtA, n, Atb, x);
- if (scratch_) aom_free(scratch_);
- return ret;
-}
-
-// Matrix multiply
-static INLINE void multiply_mat(const double *m1, const double *m2, double *res,
- const int m1_rows, const int inner_dim,
- const int m2_cols) {
- double sum;
-
- int row, col, inner;
- for (row = 0; row < m1_rows; ++row) {
- for (col = 0; col < m2_cols; ++col) {
- sum = 0;
- for (inner = 0; inner < inner_dim; ++inner)
- sum += m1[row * inner_dim + inner] * m2[inner * m2_cols + col];
- *(res++) = sum;
- }
- }
-}
-
-//
-// The functions below are needed only for homography computation
-// Remove if the homography models are not used.
-//
-///////////////////////////////////////////////////////////////////////////////
-// svdcmp
-// Adopted from Numerical Recipes in C
-
-static INLINE double sign(double a, double b) {
- return ((b) >= 0 ? fabs(a) : -fabs(a));
-}
-
-static INLINE double pythag(double a, double b) {
- double ct;
- const double absa = fabs(a);
- const double absb = fabs(b);
-
- if (absa > absb) {
- ct = absb / absa;
- return absa * sqrt(1.0 + ct * ct);
- } else {
- ct = absa / absb;
- return (absb == 0) ? 0 : absb * sqrt(1.0 + ct * ct);
- }
-}
-
-static INLINE int svdcmp(double **u, int m, int n, double w[], double **v) {
- const int max_its = 30;
- int flag, i, its, j, jj, k, l, nm;
- double anorm, c, f, g, h, s, scale, x, y, z;
- double *rv1 = (double *)aom_malloc(sizeof(*rv1) * (n + 1));
- g = scale = anorm = 0.0;
- for (i = 0; i < n; i++) {
- l = i + 1;
- rv1[i] = scale * g;
- g = s = scale = 0.0;
- if (i < m) {
- for (k = i; k < m; k++) scale += fabs(u[k][i]);
- if (scale != 0.) {
- for (k = i; k < m; k++) {
- u[k][i] /= scale;
- s += u[k][i] * u[k][i];
- }
- f = u[i][i];
- g = -sign(sqrt(s), f);
- h = f * g - s;
- u[i][i] = f - g;
- for (j = l; j < n; j++) {
- for (s = 0.0, k = i; k < m; k++) s += u[k][i] * u[k][j];
- f = s / h;
- for (k = i; k < m; k++) u[k][j] += f * u[k][i];
- }
- for (k = i; k < m; k++) u[k][i] *= scale;
- }
- }
- w[i] = scale * g;
- g = s = scale = 0.0;
- if (i < m && i != n - 1) {
- for (k = l; k < n; k++) scale += fabs(u[i][k]);
- if (scale != 0.) {
- for (k = l; k < n; k++) {
- u[i][k] /= scale;
- s += u[i][k] * u[i][k];
- }
- f = u[i][l];
- g = -sign(sqrt(s), f);
- h = f * g - s;
- u[i][l] = f - g;
- for (k = l; k < n; k++) rv1[k] = u[i][k] / h;
- for (j = l; j < m; j++) {
- for (s = 0.0, k = l; k < n; k++) s += u[j][k] * u[i][k];
- for (k = l; k < n; k++) u[j][k] += s * rv1[k];
- }
- for (k = l; k < n; k++) u[i][k] *= scale;
- }
- }
- anorm = fmax(anorm, (fabs(w[i]) + fabs(rv1[i])));
- }
-
- for (i = n - 1; i >= 0; i--) {
- if (i < n - 1) {
- if (g != 0.) {
- for (j = l; j < n; j++) v[j][i] = (u[i][j] / u[i][l]) / g;
- for (j = l; j < n; j++) {
- for (s = 0.0, k = l; k < n; k++) s += u[i][k] * v[k][j];
- for (k = l; k < n; k++) v[k][j] += s * v[k][i];
- }
- }
- for (j = l; j < n; j++) v[i][j] = v[j][i] = 0.0;
- }
- v[i][i] = 1.0;
- g = rv1[i];
- l = i;
- }
- for (i = AOMMIN(m, n) - 1; i >= 0; i--) {
- l = i + 1;
- g = w[i];
- for (j = l; j < n; j++) u[i][j] = 0.0;
- if (g != 0.) {
- g = 1.0 / g;
- for (j = l; j < n; j++) {
- for (s = 0.0, k = l; k < m; k++) s += u[k][i] * u[k][j];
- f = (s / u[i][i]) * g;
- for (k = i; k < m; k++) u[k][j] += f * u[k][i];
- }
- for (j = i; j < m; j++) u[j][i] *= g;
- } else {
- for (j = i; j < m; j++) u[j][i] = 0.0;
- }
- ++u[i][i];
- }
- for (k = n - 1; k >= 0; k--) {
- for (its = 0; its < max_its; its++) {
- flag = 1;
- for (l = k; l >= 0; l--) {
- nm = l - 1;
- if ((double)(fabs(rv1[l]) + anorm) == anorm || nm < 0) {
- flag = 0;
- break;
- }
- if ((double)(fabs(w[nm]) + anorm) == anorm) break;
- }
- if (flag) {
- c = 0.0;
- s = 1.0;
- for (i = l; i <= k; i++) {
- f = s * rv1[i];
- rv1[i] = c * rv1[i];
- if ((double)(fabs(f) + anorm) == anorm) break;
- g = w[i];
- h = pythag(f, g);
- w[i] = h;
- h = 1.0 / h;
- c = g * h;
- s = -f * h;
- for (j = 0; j < m; j++) {
- y = u[j][nm];
- z = u[j][i];
- u[j][nm] = y * c + z * s;
- u[j][i] = z * c - y * s;
- }
- }
- }
- z = w[k];
- if (l == k) {
- if (z < 0.0) {
- w[k] = -z;
- for (j = 0; j < n; j++) v[j][k] = -v[j][k];
- }
- break;
- }
- if (its == max_its - 1) {
- aom_free(rv1);
- return 1;
- }
- assert(k > 0);
- x = w[l];
- nm = k - 1;
- y = w[nm];
- g = rv1[nm];
- h = rv1[k];
- f = ((y - z) * (y + z) + (g - h) * (g + h)) / (2.0 * h * y);
- g = pythag(f, 1.0);
- f = ((x - z) * (x + z) + h * ((y / (f + sign(g, f))) - h)) / x;
- c = s = 1.0;
- for (j = l; j <= nm; j++) {
- i = j + 1;
- g = rv1[i];
- y = w[i];
- h = s * g;
- g = c * g;
- z = pythag(f, h);
- rv1[j] = z;
- c = f / z;
- s = h / z;
- f = x * c + g * s;
- g = g * c - x * s;
- h = y * s;
- y *= c;
- for (jj = 0; jj < n; jj++) {
- x = v[jj][j];
- z = v[jj][i];
- v[jj][j] = x * c + z * s;
- v[jj][i] = z * c - x * s;
- }
- z = pythag(f, h);
- w[j] = z;
- if (z != 0.) {
- z = 1.0 / z;
- c = f * z;
- s = h * z;
- }
- f = c * g + s * y;
- x = c * y - s * g;
- for (jj = 0; jj < m; jj++) {
- y = u[jj][j];
- z = u[jj][i];
- u[jj][j] = y * c + z * s;
- u[jj][i] = z * c - y * s;
- }
- }
- rv1[l] = 0.0;
- rv1[k] = f;
- w[k] = x;
- }
- }
- aom_free(rv1);
- return 0;
-}
-
-static INLINE int SVD(double *U, double *W, double *V, double *matx, int M,
- int N) {
- // Assumes allocation for U is MxN
- double **nrU = (double **)aom_malloc((M) * sizeof(*nrU));
- double **nrV = (double **)aom_malloc((N) * sizeof(*nrV));
- int problem, i;
-
- problem = !(nrU && nrV);
- if (!problem) {
- for (i = 0; i < M; i++) {
- nrU[i] = &U[i * N];
- }
- for (i = 0; i < N; i++) {
- nrV[i] = &V[i * N];
- }
- } else {
- if (nrU) aom_free(nrU);
- if (nrV) aom_free(nrV);
- return 1;
- }
-
- /* copy from given matx into nrU */
- for (i = 0; i < M; i++) {
- memcpy(&(nrU[i][0]), matx + N * i, N * sizeof(*matx));
- }
-
- /* HERE IT IS: do SVD */
- if (svdcmp(nrU, M, N, W, nrV)) {
- aom_free(nrU);
- aom_free(nrV);
- return 1;
- }
-
- /* aom_free Numerical Recipes arrays */
- aom_free(nrU);
- aom_free(nrV);
-
- return 0;
-}
-
-#endif // AOM_AV1_ENCODER_MATHUTILS_H_
diff --git a/third_party/aom/av1/encoder/mbgraph.c b/third_party/aom/av1/encoder/mbgraph.c
deleted file mode 100644
index 1a35ff77c..000000000
--- a/third_party/aom/av1/encoder/mbgraph.c
+++ /dev/null
@@ -1,401 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <limits.h>
-
-#include "config/av1_rtcd.h"
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_mem/aom_mem.h"
-#include "aom_ports/system_state.h"
-#include "av1/common/blockd.h"
-#include "av1/common/reconinter.h"
-#include "av1/common/reconintra.h"
-#include "av1/encoder/mcomp.h"
-#include "av1/encoder/reconinter_enc.h"
-#include "av1/encoder/segmentation.h"
-
-static unsigned int do_16x16_motion_iteration(AV1_COMP *cpi, const MV *ref_mv,
- int mb_row, int mb_col) {
- MACROBLOCK *const x = &cpi->td.mb;
- MACROBLOCKD *const xd = &x->e_mbd;
- const MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
- const aom_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[BLOCK_16X16];
-
- const MvLimits tmp_mv_limits = x->mv_limits;
- MV ref_full;
- int cost_list[5];
-
- // Further step/diamond searches as necessary
- int step_param = mv_sf->reduce_first_step_size;
- step_param = AOMMIN(step_param, MAX_MVSEARCH_STEPS - 2);
-
- av1_set_mv_search_range(&x->mv_limits, ref_mv);
-
- ref_full.col = ref_mv->col >> 3;
- ref_full.row = ref_mv->row >> 3;
-
- /*cpi->sf.search_method == HEX*/
- av1_hex_search(x, &ref_full, step_param, x->errorperbit, 0,
- cond_cost_list(cpi, cost_list), &v_fn_ptr, 0, ref_mv);
-
- // Try sub-pixel MC
- // if (bestsme > error_thresh && bestsme < INT_MAX)
- if (cpi->common.cur_frame_force_integer_mv == 1) {
- x->best_mv.as_mv.row *= 8;
- x->best_mv.as_mv.col *= 8;
- } else {
- int distortion;
- unsigned int sse;
- cpi->find_fractional_mv_step(
- x, &cpi->common, mb_row, mb_col, ref_mv,
- cpi->common.allow_high_precision_mv, x->errorperbit, &v_fn_ptr, 0,
- mv_sf->subpel_iters_per_step, cond_cost_list(cpi, cost_list), NULL,
- NULL, &distortion, &sse, NULL, NULL, 0, 0, 0, 0, 0);
- }
-
- if (has_second_ref(xd->mi[0]))
- xd->mi[0]->mode = NEW_NEWMV;
- else
- xd->mi[0]->mode = NEWMV;
-
- xd->mi[0]->mv[0] = x->best_mv;
- xd->mi[0]->ref_frame[1] = NONE_FRAME;
-
- av1_build_inter_predictors_sby(&cpi->common, xd, mb_row, mb_col, NULL,
- BLOCK_16X16);
-
- /* restore UMV window */
- x->mv_limits = tmp_mv_limits;
-
- return aom_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride,
- xd->plane[0].dst.buf, xd->plane[0].dst.stride);
-}
-
-static int do_16x16_motion_search(AV1_COMP *cpi, const MV *ref_mv, int mb_row,
- int mb_col) {
- MACROBLOCK *const x = &cpi->td.mb;
- MACROBLOCKD *const xd = &x->e_mbd;
- unsigned int err, tmp_err;
- MV best_mv;
-
- // Try zero MV first
- // FIXME should really use something like near/nearest MV and/or MV prediction
- err = aom_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride,
- xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride);
- best_mv.col = best_mv.row = 0;
-
- // Test last reference frame using the previous best mv as the
- // starting point (best reference) for the search
- tmp_err = do_16x16_motion_iteration(cpi, ref_mv, mb_row, mb_col);
- if (tmp_err < err) {
- err = tmp_err;
- best_mv = x->best_mv.as_mv;
- }
-
- // If the current best reference mv is not centered on 0,0 then do a 0,0
- // based search as well.
- if (ref_mv->row != 0 || ref_mv->col != 0) {
- MV zero_ref_mv = kZeroMv;
-
- tmp_err = do_16x16_motion_iteration(cpi, &zero_ref_mv, mb_row, mb_col);
- if (tmp_err < err) {
- err = tmp_err;
- best_mv = x->best_mv.as_mv;
- }
- }
-
- x->best_mv.as_mv = best_mv;
- return err;
-}
-
-static int do_16x16_zerozero_search(AV1_COMP *cpi, int_mv *dst_mv) {
- MACROBLOCK *const x = &cpi->td.mb;
- MACROBLOCKD *const xd = &x->e_mbd;
- unsigned int err;
-
- // Try zero MV first
- // FIXME should really use something like near/nearest MV and/or MV prediction
- err = aom_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride,
- xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride);
-
- dst_mv->as_int = 0;
-
- return err;
-}
-static int find_best_16x16_intra(AV1_COMP *cpi, PREDICTION_MODE *pbest_mode) {
- const AV1_COMMON *cm = &cpi->common;
- MACROBLOCK *const x = &cpi->td.mb;
- MACROBLOCKD *const xd = &x->e_mbd;
- PREDICTION_MODE best_mode = -1, mode;
- unsigned int best_err = INT_MAX;
-
- // calculate SATD for each intra prediction mode;
- // we're intentionally not doing 4x4, we just want a rough estimate
- for (mode = INTRA_MODE_START; mode < INTRA_MODE_END; mode++) {
- unsigned int err;
-
- xd->mi[0]->mode = mode;
- av1_predict_intra_block(cm, xd, 16, 16, TX_16X16, mode, 0, 0,
- FILTER_INTRA_MODES, x->plane[0].src.buf,
- x->plane[0].src.stride, xd->plane[0].dst.buf,
- xd->plane[0].dst.stride, 0, 0, 0);
- err = aom_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride,
- xd->plane[0].dst.buf, xd->plane[0].dst.stride);
-
- // find best
- if (err < best_err) {
- best_err = err;
- best_mode = mode;
- }
- }
-
- if (pbest_mode) *pbest_mode = best_mode;
-
- return best_err;
-}
-
-static void update_mbgraph_mb_stats(AV1_COMP *cpi, MBGRAPH_MB_STATS *stats,
- YV12_BUFFER_CONFIG *buf, int mb_y_offset,
- YV12_BUFFER_CONFIG *golden_ref,
- const MV *prev_golden_ref_mv,
- YV12_BUFFER_CONFIG *alt_ref, int mb_row,
- int mb_col) {
- MACROBLOCK *const x = &cpi->td.mb;
- MACROBLOCKD *const xd = &x->e_mbd;
- int intra_error;
- AV1_COMMON *cm = &cpi->common;
-
- // FIXME in practice we're completely ignoring chroma here
- x->plane[0].src.buf = buf->y_buffer + mb_y_offset;
- x->plane[0].src.stride = buf->y_stride;
-
- xd->plane[0].dst.buf = get_frame_new_buffer(cm)->y_buffer + mb_y_offset;
- xd->plane[0].dst.stride = get_frame_new_buffer(cm)->y_stride;
-
- // do intra 16x16 prediction
- intra_error = find_best_16x16_intra(cpi, &stats->ref[INTRA_FRAME].m.mode);
- if (intra_error <= 0) intra_error = 1;
- stats->ref[INTRA_FRAME].err = intra_error;
-
- // Golden frame MV search, if it exists and is different than last frame
- if (golden_ref) {
- int g_motion_error;
- xd->plane[0].pre[0].buf = golden_ref->y_buffer + mb_y_offset;
- xd->plane[0].pre[0].stride = golden_ref->y_stride;
- g_motion_error =
- do_16x16_motion_search(cpi, prev_golden_ref_mv, mb_row, mb_col);
- stats->ref[GOLDEN_FRAME].m.mv = x->best_mv;
- stats->ref[GOLDEN_FRAME].err = g_motion_error;
- } else {
- stats->ref[GOLDEN_FRAME].err = INT_MAX;
- stats->ref[GOLDEN_FRAME].m.mv.as_int = 0;
- }
-
- // Do an Alt-ref frame MV search, if it exists and is different than
- // last/golden frame.
- if (alt_ref) {
- int a_motion_error;
- xd->plane[0].pre[0].buf = alt_ref->y_buffer + mb_y_offset;
- xd->plane[0].pre[0].stride = alt_ref->y_stride;
- a_motion_error =
- do_16x16_zerozero_search(cpi, &stats->ref[ALTREF_FRAME].m.mv);
-
- stats->ref[ALTREF_FRAME].err = a_motion_error;
- } else {
- stats->ref[ALTREF_FRAME].err = INT_MAX;
- stats->ref[ALTREF_FRAME].m.mv.as_int = 0;
- }
-}
-
-static void update_mbgraph_frame_stats(AV1_COMP *cpi,
- MBGRAPH_FRAME_STATS *stats,
- YV12_BUFFER_CONFIG *buf,
- YV12_BUFFER_CONFIG *golden_ref,
- YV12_BUFFER_CONFIG *alt_ref) {
- MACROBLOCK *const x = &cpi->td.mb;
- MACROBLOCKD *const xd = &x->e_mbd;
- AV1_COMMON *const cm = &cpi->common;
-
- int mb_col, mb_row, offset = 0;
- int mb_y_offset = 0, arf_y_offset = 0, gld_y_offset = 0;
- MV gld_top_mv = kZeroMv;
- MB_MODE_INFO mi_local;
-
- av1_zero(mi_local);
- // Set up limit values for motion vectors to prevent them extending outside
- // the UMV borders.
- x->mv_limits.row_min = -BORDER_MV_PIXELS_B16;
- x->mv_limits.row_max = (cm->mb_rows - 1) * 8 + BORDER_MV_PIXELS_B16;
- xd->up_available = 0;
- xd->plane[0].dst.stride = buf->y_stride;
- xd->plane[0].pre[0].stride = buf->y_stride;
- xd->plane[1].dst.stride = buf->uv_stride;
- xd->mi[0] = &mi_local;
- mi_local.sb_type = BLOCK_16X16;
- mi_local.ref_frame[0] = LAST_FRAME;
- mi_local.ref_frame[1] = NONE_FRAME;
-
- for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) {
- MV gld_left_mv = gld_top_mv;
- int mb_y_in_offset = mb_y_offset;
- int arf_y_in_offset = arf_y_offset;
- int gld_y_in_offset = gld_y_offset;
-
- // Set up limit values for motion vectors to prevent them extending outside
- // the UMV borders.
- x->mv_limits.col_min = -BORDER_MV_PIXELS_B16;
- x->mv_limits.col_max = (cm->mb_cols - 1) * 8 + BORDER_MV_PIXELS_B16;
- xd->left_available = 0;
-
- for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) {
- MBGRAPH_MB_STATS *mb_stats = &stats->mb_stats[offset + mb_col];
-
- update_mbgraph_mb_stats(cpi, mb_stats, buf, mb_y_in_offset, golden_ref,
- &gld_left_mv, alt_ref, mb_row, mb_col);
- gld_left_mv = mb_stats->ref[GOLDEN_FRAME].m.mv.as_mv;
- if (mb_col == 0) {
- gld_top_mv = gld_left_mv;
- }
- xd->left_available = 1;
- mb_y_in_offset += 16;
- gld_y_in_offset += 16;
- arf_y_in_offset += 16;
- x->mv_limits.col_min -= 16;
- x->mv_limits.col_max -= 16;
- }
- xd->up_available = 1;
- mb_y_offset += buf->y_stride * 16;
- gld_y_offset += golden_ref->y_stride * 16;
- if (alt_ref) arf_y_offset += alt_ref->y_stride * 16;
- x->mv_limits.row_min -= 16;
- x->mv_limits.row_max -= 16;
- offset += cm->mb_cols;
- }
-}
-
-// void separate_arf_mbs_byzz
-static void separate_arf_mbs(AV1_COMP *cpi) {
- AV1_COMMON *const cm = &cpi->common;
- int mb_col, mb_row, offset, i;
- int mi_row, mi_col;
- int ncnt[4] = { 0 };
- int n_frames = cpi->mbgraph_n_frames;
-
- int *arf_not_zz;
-
- CHECK_MEM_ERROR(
- cm, arf_not_zz,
- aom_calloc(cm->mb_rows * cm->mb_cols * sizeof(*arf_not_zz), 1));
-
- // We are not interested in results beyond the alt ref itself.
- if (n_frames > cpi->rc.frames_till_gf_update_due)
- n_frames = cpi->rc.frames_till_gf_update_due;
-
- // defer cost to reference frames
- for (i = n_frames - 1; i >= 0; i--) {
- MBGRAPH_FRAME_STATS *frame_stats = &cpi->mbgraph_stats[i];
-
- for (offset = 0, mb_row = 0; mb_row < cm->mb_rows;
- offset += cm->mb_cols, mb_row++) {
- for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) {
- MBGRAPH_MB_STATS *mb_stats = &frame_stats->mb_stats[offset + mb_col];
-
- int altref_err = mb_stats->ref[ALTREF_FRAME].err;
- int intra_err = mb_stats->ref[INTRA_FRAME].err;
- int golden_err = mb_stats->ref[GOLDEN_FRAME].err;
-
- // Test for altref vs intra and gf and that its mv was 0,0.
- if (altref_err > 1000 || altref_err > intra_err ||
- altref_err > golden_err) {
- arf_not_zz[offset + mb_col]++;
- }
- }
- }
- }
-
- // arf_not_zz is indexed by MB, but this loop is indexed by MI to avoid out
- // of bound access in segmentation_map
- for (mi_row = 0; mi_row < cm->mi_rows; mi_row++) {
- for (mi_col = 0; mi_col < cm->mi_cols; mi_col++) {
- // If any of the blocks in the sequence failed then the MB
- // goes in segment 0
- if (arf_not_zz[mi_row / 2 * cm->mb_cols + mi_col / 2]) {
- ncnt[0]++;
- cpi->segmentation_map[mi_row * cm->mi_cols + mi_col] = 0;
- } else {
- cpi->segmentation_map[mi_row * cm->mi_cols + mi_col] = 1;
- ncnt[1]++;
- }
- }
- }
-
- // Only bother with segmentation if over 10% of the MBs in static segment
- // if ( ncnt[1] && (ncnt[0] / ncnt[1] < 10) )
- if (1) {
- // Note % of blocks that are marked as static
- if (cm->MBs)
- cpi->static_mb_pct = (ncnt[1] * 100) / (cm->mi_rows * cm->mi_cols);
-
- // This error case should not be reachable as this function should
- // never be called with the common data structure uninitialized.
- else
- cpi->static_mb_pct = 0;
-
- av1_enable_segmentation(&cm->seg);
- } else {
- cpi->static_mb_pct = 0;
- av1_disable_segmentation(&cm->seg);
- }
-
- // Free localy allocated storage
- aom_free(arf_not_zz);
-}
-
-void av1_update_mbgraph_stats(AV1_COMP *cpi) {
- AV1_COMMON *const cm = &cpi->common;
- int i, n_frames = av1_lookahead_depth(cpi->lookahead);
- YV12_BUFFER_CONFIG *golden_ref = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
-
- assert(golden_ref != NULL);
-
- // we need to look ahead beyond where the ARF transitions into
- // being a GF - so exit if we don't look ahead beyond that
- if (n_frames <= cpi->rc.frames_till_gf_update_due) return;
-
- if (n_frames > MAX_LAG_BUFFERS) n_frames = MAX_LAG_BUFFERS;
-
- cpi->mbgraph_n_frames = n_frames;
- for (i = 0; i < n_frames; i++) {
- MBGRAPH_FRAME_STATS *frame_stats = &cpi->mbgraph_stats[i];
- memset(frame_stats->mb_stats, 0,
- cm->mb_rows * cm->mb_cols * sizeof(*cpi->mbgraph_stats[i].mb_stats));
- }
-
- // do motion search to find contribution of each reference to data
- // later on in this GF group
- // FIXME really, the GF/last MC search should be done forward, and
- // the ARF MC search backwards, to get optimal results for MV caching
- for (i = 0; i < n_frames; i++) {
- MBGRAPH_FRAME_STATS *frame_stats = &cpi->mbgraph_stats[i];
- struct lookahead_entry *q_cur = av1_lookahead_peek(cpi->lookahead, i);
-
- assert(q_cur != NULL);
-
- update_mbgraph_frame_stats(cpi, frame_stats, &q_cur->img, golden_ref,
- cpi->source);
- }
-
- aom_clear_system_state();
-
- separate_arf_mbs(cpi);
-}
diff --git a/third_party/aom/av1/encoder/mbgraph.h b/third_party/aom/av1/encoder/mbgraph.h
deleted file mode 100644
index ba08476f7..000000000
--- a/third_party/aom/av1/encoder/mbgraph.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_MBGRAPH_H_
-#define AOM_AV1_ENCODER_MBGRAPH_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct {
- struct {
- int err;
- union {
- int_mv mv;
- PREDICTION_MODE mode;
- } m;
- } ref[REF_FRAMES];
-} MBGRAPH_MB_STATS;
-
-typedef struct {
- MBGRAPH_MB_STATS *mb_stats;
-} MBGRAPH_FRAME_STATS;
-
-struct AV1_COMP;
-
-void av1_update_mbgraph_stats(struct AV1_COMP *cpi);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_ENCODER_MBGRAPH_H_
diff --git a/third_party/aom/av1/encoder/mcomp.c b/third_party/aom/av1/encoder/mcomp.c
deleted file mode 100644
index 8f6de9b53..000000000
--- a/third_party/aom/av1/encoder/mcomp.c
+++ /dev/null
@@ -1,2885 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <limits.h>
-#include <math.h>
-#include <stdio.h>
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_mem/aom_mem.h"
-#include "aom_ports/mem.h"
-
-#include "av1/common/common.h"
-#include "av1/common/mvref_common.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/common/reconinter.h"
-
-#include "av1/encoder/encoder.h"
-#include "av1/encoder/encodemv.h"
-#include "av1/encoder/mcomp.h"
-#include "av1/encoder/rdopt.h"
-#include "av1/encoder/reconinter_enc.h"
-
-// #define NEW_DIAMOND_SEARCH
-
-static INLINE const uint8_t *get_buf_from_mv(const struct buf_2d *buf,
- const MV *mv) {
- return &buf->buf[mv->row * buf->stride + mv->col];
-}
-
-void av1_set_mv_search_range(MvLimits *mv_limits, const MV *mv) {
- int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0);
- int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0);
- int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL;
- int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL;
-
- col_min = AOMMAX(col_min, (MV_LOW >> 3) + 1);
- row_min = AOMMAX(row_min, (MV_LOW >> 3) + 1);
- col_max = AOMMIN(col_max, (MV_UPP >> 3) - 1);
- row_max = AOMMIN(row_max, (MV_UPP >> 3) - 1);
-
- // Get intersection of UMV window and valid MV window to reduce # of checks
- // in diamond search.
- if (mv_limits->col_min < col_min) mv_limits->col_min = col_min;
- if (mv_limits->col_max > col_max) mv_limits->col_max = col_max;
- if (mv_limits->row_min < row_min) mv_limits->row_min = row_min;
- if (mv_limits->row_max > row_max) mv_limits->row_max = row_max;
-}
-
-static void set_subpel_mv_search_range(const MvLimits *mv_limits, int *col_min,
- int *col_max, int *row_min, int *row_max,
- const MV *ref_mv) {
- const int max_mv = MAX_FULL_PEL_VAL * 8;
- const int minc = AOMMAX(mv_limits->col_min * 8, ref_mv->col - max_mv);
- const int maxc = AOMMIN(mv_limits->col_max * 8, ref_mv->col + max_mv);
- const int minr = AOMMAX(mv_limits->row_min * 8, ref_mv->row - max_mv);
- const int maxr = AOMMIN(mv_limits->row_max * 8, ref_mv->row + max_mv);
-
- *col_min = AOMMAX(MV_LOW + 1, minc);
- *col_max = AOMMIN(MV_UPP - 1, maxc);
- *row_min = AOMMAX(MV_LOW + 1, minr);
- *row_max = AOMMIN(MV_UPP - 1, maxr);
-}
-
-int av1_init_search_range(int size) {
- int sr = 0;
- // Minimum search size no matter what the passed in value.
- size = AOMMAX(16, size);
-
- while ((size << sr) < MAX_FULL_PEL_VAL) sr++;
-
- sr = AOMMIN(sr, MAX_MVSEARCH_STEPS - 2);
- return sr;
-}
-
-static INLINE int mv_cost(const MV *mv, const int *joint_cost,
- int *const comp_cost[2]) {
- return joint_cost[av1_get_mv_joint(mv)] + comp_cost[0][mv->row] +
- comp_cost[1][mv->col];
-}
-
-int av1_mv_bit_cost(const MV *mv, const MV *ref, const int *mvjcost,
- int *mvcost[2], int weight) {
- const MV diff = { mv->row - ref->row, mv->col - ref->col };
- return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7);
-}
-
-#define PIXEL_TRANSFORM_ERROR_SCALE 4
-static int mv_err_cost(const MV *mv, const MV *ref, const int *mvjcost,
- int *mvcost[2], int error_per_bit) {
- if (mvcost) {
- const MV diff = { mv->row - ref->row, mv->col - ref->col };
- return (int)ROUND_POWER_OF_TWO_64(
- (int64_t)mv_cost(&diff, mvjcost, mvcost) * error_per_bit,
- RDDIV_BITS + AV1_PROB_COST_SHIFT - RD_EPB_SHIFT +
- PIXEL_TRANSFORM_ERROR_SCALE);
- }
- return 0;
-}
-
-static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref,
- int sad_per_bit) {
- const MV diff = { (mv->row - ref->row) * 8, (mv->col - ref->col) * 8 };
- return ROUND_POWER_OF_TWO(
- (unsigned)mv_cost(&diff, x->nmvjointcost, x->mvcost) * sad_per_bit,
- AV1_PROB_COST_SHIFT);
-}
-
-void av1_init_dsmotion_compensation(search_site_config *cfg, int stride) {
- int len, ss_count = 1;
-
- cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0;
- cfg->ss[0].offset = 0;
-
- for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
- // Generate offsets for 4 search sites per step.
- const MV ss_mvs[] = { { -len, 0 }, { len, 0 }, { 0, -len }, { 0, len } };
- int i;
- for (i = 0; i < 4; ++i) {
- search_site *const ss = &cfg->ss[ss_count++];
- ss->mv = ss_mvs[i];
- ss->offset = ss->mv.row * stride + ss->mv.col;
- }
- }
-
- cfg->ss_count = ss_count;
- cfg->searches_per_step = 4;
-}
-
-void av1_init3smotion_compensation(search_site_config *cfg, int stride) {
- int len, ss_count = 1;
-
- cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0;
- cfg->ss[0].offset = 0;
-
- for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
- // Generate offsets for 8 search sites per step.
- const MV ss_mvs[8] = { { -len, 0 }, { len, 0 }, { 0, -len },
- { 0, len }, { -len, -len }, { -len, len },
- { len, -len }, { len, len } };
- int i;
- for (i = 0; i < 8; ++i) {
- search_site *const ss = &cfg->ss[ss_count++];
- ss->mv = ss_mvs[i];
- ss->offset = ss->mv.row * stride + ss->mv.col;
- }
- }
-
- cfg->ss_count = ss_count;
- cfg->searches_per_step = 8;
-}
-
-/*
- * To avoid the penalty for crossing cache-line read, preload the reference
- * area in a small buffer, which is aligned to make sure there won't be crossing
- * cache-line read while reading from this buffer. This reduced the cpu
- * cycles spent on reading ref data in sub-pixel filter functions.
- * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
- * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
- * could reduce the area.
- */
-
-// convert motion vector component to offset for sv[a]f calc
-static INLINE int sp(int x) { return x & 7; }
-
-static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
- const int offset = (r >> 3) * stride + (c >> 3);
- return buf + offset;
-}
-
-/* checks if (r, c) has better score than previous best */
-#define CHECK_BETTER(v, r, c) \
- if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
- MV this_mv = { r, c }; \
- v = mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); \
- if (second_pred == NULL) { \
- thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \
- src_address, src_stride, &sse); \
- } else if (mask) { \
- thismse = vfp->msvf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \
- src_address, src_stride, second_pred, mask, \
- mask_stride, invert_mask, &sse); \
- } else { \
- if (xd->jcp_param.use_jnt_comp_avg) \
- thismse = vfp->jsvaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \
- src_address, src_stride, &sse, second_pred, \
- &xd->jcp_param); \
- else \
- thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \
- src_address, src_stride, &sse, second_pred); \
- } \
- v += thismse; \
- if (v < besterr) { \
- besterr = v; \
- br = r; \
- bc = c; \
- *distortion = thismse; \
- *sse1 = sse; \
- } \
- } else { \
- v = INT_MAX; \
- }
-
-#define CHECK_BETTER0(v, r, c) CHECK_BETTER(v, r, c)
-
-/* checks if (r, c) has better score than previous best */
-#define CHECK_BETTER1(v, r, c) \
- if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
- MV this_mv = { r, c }; \
- thismse = upsampled_pref_error( \
- xd, cm, mi_row, mi_col, &this_mv, vfp, src_address, src_stride, \
- pre(y, y_stride, r, c), y_stride, sp(c), sp(r), second_pred, mask, \
- mask_stride, invert_mask, w, h, &sse, use_accurate_subpel_search); \
- v = mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); \
- v += thismse; \
- if (v < besterr) { \
- besterr = v; \
- br = r; \
- bc = c; \
- *distortion = thismse; \
- *sse1 = sse; \
- } \
- } else { \
- v = INT_MAX; \
- }
-
-#define FIRST_LEVEL_CHECKS \
- { \
- unsigned int left, right, up, down, diag; \
- CHECK_BETTER(left, tr, tc - hstep); \
- CHECK_BETTER(right, tr, tc + hstep); \
- CHECK_BETTER(up, tr - hstep, tc); \
- CHECK_BETTER(down, tr + hstep, tc); \
- whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); \
- switch (whichdir) { \
- case 0: CHECK_BETTER(diag, tr - hstep, tc - hstep); break; \
- case 1: CHECK_BETTER(diag, tr - hstep, tc + hstep); break; \
- case 2: CHECK_BETTER(diag, tr + hstep, tc - hstep); break; \
- case 3: CHECK_BETTER(diag, tr + hstep, tc + hstep); break; \
- } \
- }
-
-#define SECOND_LEVEL_CHECKS \
- { \
- int kr, kc; \
- unsigned int second; \
- if (tr != br && tc != bc) { \
- kr = br - tr; \
- kc = bc - tc; \
- CHECK_BETTER(second, tr + kr, tc + 2 * kc); \
- CHECK_BETTER(second, tr + 2 * kr, tc + kc); \
- } else if (tr == br && tc != bc) { \
- kc = bc - tc; \
- CHECK_BETTER(second, tr + hstep, tc + 2 * kc); \
- CHECK_BETTER(second, tr - hstep, tc + 2 * kc); \
- switch (whichdir) { \
- case 0: \
- case 1: CHECK_BETTER(second, tr + hstep, tc + kc); break; \
- case 2: \
- case 3: CHECK_BETTER(second, tr - hstep, tc + kc); break; \
- } \
- } else if (tr != br && tc == bc) { \
- kr = br - tr; \
- CHECK_BETTER(second, tr + 2 * kr, tc + hstep); \
- CHECK_BETTER(second, tr + 2 * kr, tc - hstep); \
- switch (whichdir) { \
- case 0: \
- case 2: CHECK_BETTER(second, tr + kr, tc + hstep); break; \
- case 1: \
- case 3: CHECK_BETTER(second, tr + kr, tc - hstep); break; \
- } \
- } \
- }
-
-// TODO(yunqingwang): SECOND_LEVEL_CHECKS_BEST was a rewrote of
-// SECOND_LEVEL_CHECKS, and SECOND_LEVEL_CHECKS should be rewritten
-// later in the same way.
-#define SECOND_LEVEL_CHECKS_BEST(k) \
- { \
- unsigned int second; \
- int br0 = br; \
- int bc0 = bc; \
- assert(tr == br || tc == bc); \
- if (tr == br && tc != bc) { \
- kc = bc - tc; \
- } else if (tr != br && tc == bc) { \
- kr = br - tr; \
- } \
- CHECK_BETTER##k(second, br0 + kr, bc0); \
- CHECK_BETTER##k(second, br0, bc0 + kc); \
- if (br0 != br || bc0 != bc) { \
- CHECK_BETTER##k(second, br0 + kr, bc0 + kc); \
- } \
- }
-
-#define SETUP_SUBPEL_SEARCH \
- const uint8_t *const src_address = x->plane[0].src.buf; \
- const int src_stride = x->plane[0].src.stride; \
- const MACROBLOCKD *xd = &x->e_mbd; \
- unsigned int besterr = INT_MAX; \
- unsigned int sse; \
- unsigned int whichdir; \
- int thismse; \
- MV *bestmv = &x->best_mv.as_mv; \
- const unsigned int halfiters = iters_per_step; \
- const unsigned int quarteriters = iters_per_step; \
- const unsigned int eighthiters = iters_per_step; \
- const int y_stride = xd->plane[0].pre[0].stride; \
- const int offset = bestmv->row * y_stride + bestmv->col; \
- const uint8_t *const y = xd->plane[0].pre[0].buf; \
- \
- int br = bestmv->row * 8; \
- int bc = bestmv->col * 8; \
- int hstep = 4; \
- int minc, maxc, minr, maxr; \
- int tr = br; \
- int tc = bc; \
- \
- set_subpel_mv_search_range(&x->mv_limits, &minc, &maxc, &minr, &maxr, \
- ref_mv); \
- \
- bestmv->row *= 8; \
- bestmv->col *= 8;
-
-static unsigned int setup_center_error(
- const MACROBLOCKD *xd, const MV *bestmv, const MV *ref_mv,
- int error_per_bit, const aom_variance_fn_ptr_t *vfp,
- const uint8_t *const src, const int src_stride, const uint8_t *const y,
- int y_stride, const uint8_t *second_pred, const uint8_t *mask,
- int mask_stride, int invert_mask, int w, int h, int offset, int *mvjcost,
- int *mvcost[2], unsigned int *sse1, int *distortion) {
- unsigned int besterr;
- if (second_pred != NULL) {
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- DECLARE_ALIGNED(16, uint16_t, comp_pred16[MAX_SB_SQUARE]);
- uint8_t *comp_pred = CONVERT_TO_BYTEPTR(comp_pred16);
- if (mask) {
- aom_highbd_comp_mask_pred(comp_pred, second_pred, w, h, y + offset,
- y_stride, mask, mask_stride, invert_mask);
- } else {
- if (xd->jcp_param.use_jnt_comp_avg)
- aom_highbd_jnt_comp_avg_pred(comp_pred, second_pred, w, h, y + offset,
- y_stride, &xd->jcp_param);
- else
- aom_highbd_comp_avg_pred(comp_pred, second_pred, w, h, y + offset,
- y_stride);
- }
- besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
- } else {
- DECLARE_ALIGNED(16, uint8_t, comp_pred[MAX_SB_SQUARE]);
- if (mask) {
- aom_comp_mask_pred(comp_pred, second_pred, w, h, y + offset, y_stride,
- mask, mask_stride, invert_mask);
- } else {
- if (xd->jcp_param.use_jnt_comp_avg)
- aom_jnt_comp_avg_pred(comp_pred, second_pred, w, h, y + offset,
- y_stride, &xd->jcp_param);
- else
- aom_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
- }
- besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
- }
- } else {
- besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1);
- }
- *distortion = besterr;
- besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
- return besterr;
-}
-
-static INLINE int divide_and_round(int n, int d) {
- return ((n < 0) ^ (d < 0)) ? ((n - d / 2) / d) : ((n + d / 2) / d);
-}
-
-static INLINE int is_cost_list_wellbehaved(int *cost_list) {
- return cost_list[0] < cost_list[1] && cost_list[0] < cost_list[2] &&
- cost_list[0] < cost_list[3] && cost_list[0] < cost_list[4];
-}
-
-// Returns surface minima estimate at given precision in 1/2^n bits.
-// Assume a model for the cost surface: S = A(x - x0)^2 + B(y - y0)^2 + C
-// For a given set of costs S0, S1, S2, S3, S4 at points
-// (y, x) = (0, 0), (0, -1), (1, 0), (0, 1) and (-1, 0) respectively,
-// the solution for the location of the minima (x0, y0) is given by:
-// x0 = 1/2 (S1 - S3)/(S1 + S3 - 2*S0),
-// y0 = 1/2 (S4 - S2)/(S4 + S2 - 2*S0).
-// The code below is an integerized version of that.
-static void get_cost_surf_min(int *cost_list, int *ir, int *ic, int bits) {
- *ic = divide_and_round((cost_list[1] - cost_list[3]) * (1 << (bits - 1)),
- (cost_list[1] - 2 * cost_list[0] + cost_list[3]));
- *ir = divide_and_round((cost_list[4] - cost_list[2]) * (1 << (bits - 1)),
- (cost_list[4] - 2 * cost_list[0] + cost_list[2]));
-}
-
-int av1_find_best_sub_pixel_tree_pruned_evenmore(
- MACROBLOCK *x, const AV1_COMMON *const cm, int mi_row, int mi_col,
- const MV *ref_mv, int allow_hp, int error_per_bit,
- const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
- int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
- unsigned int *sse1, const uint8_t *second_pred, const uint8_t *mask,
- int mask_stride, int invert_mask, int w, int h,
- int use_accurate_subpel_search) {
- SETUP_SUBPEL_SEARCH;
- besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
- src_address, src_stride, y, y_stride,
- second_pred, mask, mask_stride, invert_mask, w,
- h, offset, mvjcost, mvcost, sse1, distortion);
- (void)halfiters;
- (void)quarteriters;
- (void)eighthiters;
- (void)whichdir;
- (void)allow_hp;
- (void)forced_stop;
- (void)hstep;
- (void)use_accurate_subpel_search;
- (void)cm;
- (void)mi_row;
- (void)mi_col;
-
- if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
- cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
- cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) {
- int ir, ic;
- unsigned int minpt;
- get_cost_surf_min(cost_list, &ir, &ic, 2);
- if (ir != 0 || ic != 0) {
- CHECK_BETTER(minpt, tr + 2 * ir, tc + 2 * ic);
- }
- } else {
- FIRST_LEVEL_CHECKS;
- if (halfiters > 1) {
- SECOND_LEVEL_CHECKS;
- }
-
- tr = br;
- tc = bc;
-
- // Each subsequent iteration checks at least one point in common with
- // the last iteration could be 2 ( if diag selected) 1/4 pel
- // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
- if (forced_stop != 2) {
- hstep >>= 1;
- FIRST_LEVEL_CHECKS;
- if (quarteriters > 1) {
- SECOND_LEVEL_CHECKS;
- }
- }
- }
-
- tr = br;
- tc = bc;
-
- if (allow_hp && forced_stop == 0) {
- hstep >>= 1;
- FIRST_LEVEL_CHECKS;
- if (eighthiters > 1) {
- SECOND_LEVEL_CHECKS;
- }
- }
-
- bestmv->row = br;
- bestmv->col = bc;
-
- return besterr;
-}
-
-int av1_find_best_sub_pixel_tree_pruned_more(
- MACROBLOCK *x, const AV1_COMMON *const cm, int mi_row, int mi_col,
- const MV *ref_mv, int allow_hp, int error_per_bit,
- const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
- int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
- unsigned int *sse1, const uint8_t *second_pred, const uint8_t *mask,
- int mask_stride, int invert_mask, int w, int h,
- int use_accurate_subpel_search) {
- SETUP_SUBPEL_SEARCH;
- (void)use_accurate_subpel_search;
- (void)cm;
- (void)mi_row;
- (void)mi_col;
-
- besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
- src_address, src_stride, y, y_stride,
- second_pred, mask, mask_stride, invert_mask, w,
- h, offset, mvjcost, mvcost, sse1, distortion);
- if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
- cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
- cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) {
- unsigned int minpt;
- int ir, ic;
- get_cost_surf_min(cost_list, &ir, &ic, 1);
- if (ir != 0 || ic != 0) {
- CHECK_BETTER(minpt, tr + ir * hstep, tc + ic * hstep);
- }
- } else {
- FIRST_LEVEL_CHECKS;
- if (halfiters > 1) {
- SECOND_LEVEL_CHECKS;
- }
- }
-
- // Each subsequent iteration checks at least one point in common with
- // the last iteration could be 2 ( if diag selected) 1/4 pel
-
- // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
- if (forced_stop != 2) {
- tr = br;
- tc = bc;
- hstep >>= 1;
- FIRST_LEVEL_CHECKS;
- if (quarteriters > 1) {
- SECOND_LEVEL_CHECKS;
- }
- }
-
- if (allow_hp && forced_stop == 0) {
- tr = br;
- tc = bc;
- hstep >>= 1;
- FIRST_LEVEL_CHECKS;
- if (eighthiters > 1) {
- SECOND_LEVEL_CHECKS;
- }
- }
- // These lines insure static analysis doesn't warn that
- // tr and tc aren't used after the above point.
- (void)tr;
- (void)tc;
-
- bestmv->row = br;
- bestmv->col = bc;
-
- return besterr;
-}
-
-int av1_find_best_sub_pixel_tree_pruned(
- MACROBLOCK *x, const AV1_COMMON *const cm, int mi_row, int mi_col,
- const MV *ref_mv, int allow_hp, int error_per_bit,
- const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
- int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
- unsigned int *sse1, const uint8_t *second_pred, const uint8_t *mask,
- int mask_stride, int invert_mask, int w, int h,
- int use_accurate_subpel_search) {
- SETUP_SUBPEL_SEARCH;
- (void)use_accurate_subpel_search;
- (void)cm;
- (void)mi_row;
- (void)mi_col;
-
- besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
- src_address, src_stride, y, y_stride,
- second_pred, mask, mask_stride, invert_mask, w,
- h, offset, mvjcost, mvcost, sse1, distortion);
- if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
- cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
- cost_list[4] != INT_MAX) {
- unsigned int left, right, up, down, diag;
- whichdir = (cost_list[1] < cost_list[3] ? 0 : 1) +
- (cost_list[2] < cost_list[4] ? 0 : 2);
- switch (whichdir) {
- case 0:
- CHECK_BETTER(left, tr, tc - hstep);
- CHECK_BETTER(down, tr + hstep, tc);
- CHECK_BETTER(diag, tr + hstep, tc - hstep);
- break;
- case 1:
- CHECK_BETTER(right, tr, tc + hstep);
- CHECK_BETTER(down, tr + hstep, tc);
- CHECK_BETTER(diag, tr + hstep, tc + hstep);
- break;
- case 2:
- CHECK_BETTER(left, tr, tc - hstep);
- CHECK_BETTER(up, tr - hstep, tc);
- CHECK_BETTER(diag, tr - hstep, tc - hstep);
- break;
- case 3:
- CHECK_BETTER(right, tr, tc + hstep);
- CHECK_BETTER(up, tr - hstep, tc);
- CHECK_BETTER(diag, tr - hstep, tc + hstep);
- break;
- }
- } else {
- FIRST_LEVEL_CHECKS;
- if (halfiters > 1) {
- SECOND_LEVEL_CHECKS;
- }
- }
-
- tr = br;
- tc = bc;
-
- // Each subsequent iteration checks at least one point in common with
- // the last iteration could be 2 ( if diag selected) 1/4 pel
-
- // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
- if (forced_stop != 2) {
- hstep >>= 1;
- FIRST_LEVEL_CHECKS;
- if (quarteriters > 1) {
- SECOND_LEVEL_CHECKS;
- }
- tr = br;
- tc = bc;
- }
-
- if (allow_hp && forced_stop == 0) {
- hstep >>= 1;
- FIRST_LEVEL_CHECKS;
- if (eighthiters > 1) {
- SECOND_LEVEL_CHECKS;
- }
- tr = br;
- tc = bc;
- }
- // These lines insure static analysis doesn't warn that
- // tr and tc aren't used after the above point.
- (void)tr;
- (void)tc;
-
- bestmv->row = br;
- bestmv->col = bc;
-
- return besterr;
-}
-
-/* clang-format off */
-static const MV search_step_table[12] = {
- // left, right, up, down
- { 0, -4 }, { 0, 4 }, { -4, 0 }, { 4, 0 },
- { 0, -2 }, { 0, 2 }, { -2, 0 }, { 2, 0 },
- { 0, -1 }, { 0, 1 }, { -1, 0 }, { 1, 0 }
-};
-/* clang-format on */
-
-static int upsampled_pref_error(MACROBLOCKD *xd, const AV1_COMMON *const cm,
- int mi_row, int mi_col, const MV *const mv,
- const aom_variance_fn_ptr_t *vfp,
- const uint8_t *const src, const int src_stride,
- const uint8_t *const y, int y_stride,
- int subpel_x_q3, int subpel_y_q3,
- const uint8_t *second_pred, const uint8_t *mask,
- int mask_stride, int invert_mask, int w, int h,
- unsigned int *sse, int subpel_search) {
- unsigned int besterr;
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- DECLARE_ALIGNED(16, uint16_t, pred16[MAX_SB_SQUARE]);
- uint8_t *pred8 = CONVERT_TO_BYTEPTR(pred16);
- if (second_pred != NULL) {
- if (mask) {
- aom_highbd_comp_mask_upsampled_pred(
- xd, cm, mi_row, mi_col, mv, pred8, second_pred, w, h, subpel_x_q3,
- subpel_y_q3, y, y_stride, mask, mask_stride, invert_mask, xd->bd,
- subpel_search);
- } else {
- if (xd->jcp_param.use_jnt_comp_avg)
- aom_highbd_jnt_comp_avg_upsampled_pred(
- xd, cm, mi_row, mi_col, mv, pred8, second_pred, w, h, subpel_x_q3,
- subpel_y_q3, y, y_stride, xd->bd, &xd->jcp_param, subpel_search);
- else
- aom_highbd_comp_avg_upsampled_pred(
- xd, cm, mi_row, mi_col, mv, pred8, second_pred, w, h, subpel_x_q3,
- subpel_y_q3, y, y_stride, xd->bd, subpel_search);
- }
- } else {
- aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, mv, pred8, w, h,
- subpel_x_q3, subpel_y_q3, y, y_stride, xd->bd,
- subpel_search);
- }
- besterr = vfp->vf(pred8, w, src, src_stride, sse);
- } else {
- DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]);
- if (second_pred != NULL) {
- if (mask) {
- aom_comp_mask_upsampled_pred(xd, cm, mi_row, mi_col, mv, pred,
- second_pred, w, h, subpel_x_q3,
- subpel_y_q3, y, y_stride, mask,
- mask_stride, invert_mask, subpel_search);
- } else {
- if (xd->jcp_param.use_jnt_comp_avg)
- aom_jnt_comp_avg_upsampled_pred(
- xd, cm, mi_row, mi_col, mv, pred, second_pred, w, h, subpel_x_q3,
- subpel_y_q3, y, y_stride, &xd->jcp_param, subpel_search);
- else
- aom_comp_avg_upsampled_pred(xd, cm, mi_row, mi_col, mv, pred,
- second_pred, w, h, subpel_x_q3,
- subpel_y_q3, y, y_stride, subpel_search);
- }
- } else {
- aom_upsampled_pred(xd, cm, mi_row, mi_col, mv, pred, w, h, subpel_x_q3,
- subpel_y_q3, y, y_stride, subpel_search);
- }
-
- besterr = vfp->vf(pred, w, src, src_stride, sse);
- }
- return besterr;
-}
-
-static unsigned int upsampled_setup_center_error(
- MACROBLOCKD *xd, const AV1_COMMON *const cm, int mi_row, int mi_col,
- const MV *bestmv, const MV *ref_mv, int error_per_bit,
- const aom_variance_fn_ptr_t *vfp, const uint8_t *const src,
- const int src_stride, const uint8_t *const y, int y_stride,
- const uint8_t *second_pred, const uint8_t *mask, int mask_stride,
- int invert_mask, int w, int h, int offset, int *mvjcost, int *mvcost[2],
- unsigned int *sse1, int *distortion, int subpel_search) {
- unsigned int besterr =
- upsampled_pref_error(xd, cm, mi_row, mi_col, bestmv, vfp, src, src_stride,
- y + offset, y_stride, 0, 0, second_pred, mask,
- mask_stride, invert_mask, w, h, sse1, subpel_search);
- *distortion = besterr;
- besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
- return besterr;
-}
-
-// when use_accurate_subpel_search == 0
-static INLINE unsigned int estimate_upsampled_pref_error(
- MACROBLOCKD *xd, const aom_variance_fn_ptr_t *vfp, const uint8_t *const src,
- const int src_stride, const uint8_t *const pre, int y_stride,
- int subpel_x_q3, int subpel_y_q3, const uint8_t *second_pred,
- const uint8_t *mask, int mask_stride, int invert_mask, unsigned int *sse) {
- if (second_pred == NULL) {
- return vfp->svf(pre, y_stride, subpel_x_q3, subpel_y_q3, src, src_stride,
- sse);
- } else if (mask) {
- return vfp->msvf(pre, y_stride, subpel_x_q3, subpel_y_q3, src, src_stride,
- second_pred, mask, mask_stride, invert_mask, sse);
- } else {
- if (xd->jcp_param.use_jnt_comp_avg)
- return vfp->jsvaf(pre, y_stride, subpel_x_q3, subpel_y_q3, src,
- src_stride, sse, second_pred, &xd->jcp_param);
- else
- return vfp->svaf(pre, y_stride, subpel_x_q3, subpel_y_q3, src, src_stride,
- sse, second_pred);
- }
-}
-
-int av1_find_best_sub_pixel_tree(
- MACROBLOCK *x, const AV1_COMMON *const cm, int mi_row, int mi_col,
- const MV *ref_mv, int allow_hp, int error_per_bit,
- const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
- int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
- unsigned int *sse1, const uint8_t *second_pred, const uint8_t *mask,
- int mask_stride, int invert_mask, int w, int h,
- int use_accurate_subpel_search) {
- const uint8_t *const src_address = x->plane[0].src.buf;
- const int src_stride = x->plane[0].src.stride;
- MACROBLOCKD *xd = &x->e_mbd;
- unsigned int besterr = INT_MAX;
- unsigned int sse;
- unsigned int thismse;
- const int y_stride = xd->plane[0].pre[0].stride;
- MV *bestmv = &x->best_mv.as_mv;
- const int offset = bestmv->row * y_stride + bestmv->col;
- const uint8_t *const y = xd->plane[0].pre[0].buf;
-
- int br = bestmv->row * 8;
- int bc = bestmv->col * 8;
- int hstep = 4;
- int iter, round = 3 - forced_stop;
- int tr = br;
- int tc = bc;
- const MV *search_step = search_step_table;
- int idx, best_idx = -1;
- unsigned int cost_array[5];
- int kr, kc;
- int minc, maxc, minr, maxr;
-
- set_subpel_mv_search_range(&x->mv_limits, &minc, &maxc, &minr, &maxr, ref_mv);
-
- if (!allow_hp)
- if (round == 3) round = 2;
-
- bestmv->row *= 8;
- bestmv->col *= 8;
-
- if (use_accurate_subpel_search)
- besterr = upsampled_setup_center_error(
- xd, cm, mi_row, mi_col, bestmv, ref_mv, error_per_bit, vfp, src_address,
- src_stride, y, y_stride, second_pred, mask, mask_stride, invert_mask, w,
- h, offset, mvjcost, mvcost, sse1, distortion,
- use_accurate_subpel_search);
- else
- besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
- src_address, src_stride, y, y_stride,
- second_pred, mask, mask_stride, invert_mask, w,
- h, offset, mvjcost, mvcost, sse1, distortion);
-
- (void)cost_list; // to silence compiler warning
-
- for (iter = 0; iter < round; ++iter) {
- // Check vertical and horizontal sub-pixel positions.
- for (idx = 0; idx < 4; ++idx) {
- tr = br + search_step[idx].row;
- tc = bc + search_step[idx].col;
- if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
- MV this_mv = { tr, tc };
-
- if (use_accurate_subpel_search) {
- thismse = upsampled_pref_error(
- xd, cm, mi_row, mi_col, &this_mv, vfp, src_address, src_stride,
- pre(y, y_stride, tr, tc), y_stride, sp(tc), sp(tr), second_pred,
- mask, mask_stride, invert_mask, w, h, &sse,
- use_accurate_subpel_search);
- } else {
- thismse = estimate_upsampled_pref_error(
- xd, vfp, src_address, src_stride, pre(y, y_stride, tr, tc),
- y_stride, sp(tc), sp(tr), second_pred, mask, mask_stride,
- invert_mask, &sse);
- }
-
- cost_array[idx] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost,
- mvcost, error_per_bit);
-
- if (cost_array[idx] < besterr) {
- best_idx = idx;
- besterr = cost_array[idx];
- *distortion = thismse;
- *sse1 = sse;
- }
- } else {
- cost_array[idx] = INT_MAX;
- }
- }
-
- // Check diagonal sub-pixel position
- kc = (cost_array[0] <= cost_array[1] ? -hstep : hstep);
- kr = (cost_array[2] <= cost_array[3] ? -hstep : hstep);
-
- tc = bc + kc;
- tr = br + kr;
- if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
- MV this_mv = { tr, tc };
-
- if (use_accurate_subpel_search) {
- thismse = upsampled_pref_error(
- xd, cm, mi_row, mi_col, &this_mv, vfp, src_address, src_stride,
- pre(y, y_stride, tr, tc), y_stride, sp(tc), sp(tr), second_pred,
- mask, mask_stride, invert_mask, w, h, &sse,
- use_accurate_subpel_search);
- } else {
- thismse = estimate_upsampled_pref_error(
- xd, vfp, src_address, src_stride, pre(y, y_stride, tr, tc),
- y_stride, sp(tc), sp(tr), second_pred, mask, mask_stride,
- invert_mask, &sse);
- }
-
- cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
- error_per_bit);
-
- if (cost_array[4] < besterr) {
- best_idx = 4;
- besterr = cost_array[4];
- *distortion = thismse;
- *sse1 = sse;
- }
- } else {
- cost_array[idx] = INT_MAX;
- }
-
- if (best_idx < 4 && best_idx >= 0) {
- br += search_step[best_idx].row;
- bc += search_step[best_idx].col;
- } else if (best_idx == 4) {
- br = tr;
- bc = tc;
- }
-
- if (iters_per_step > 1 && best_idx != -1) {
- if (use_accurate_subpel_search) {
- SECOND_LEVEL_CHECKS_BEST(1);
- } else {
- SECOND_LEVEL_CHECKS_BEST(0);
- }
- }
-
- search_step += 4;
- hstep >>= 1;
- best_idx = -1;
- }
-
- // These lines insure static analysis doesn't warn that
- // tr and tc aren't used after the above point.
- (void)tr;
- (void)tc;
-
- bestmv->row = br;
- bestmv->col = bc;
-
- return besterr;
-}
-
-#undef PRE
-#undef CHECK_BETTER
-
-unsigned int av1_compute_motion_cost(const AV1_COMP *cpi, MACROBLOCK *const x,
- BLOCK_SIZE bsize, int mi_row, int mi_col,
- const MV *this_mv) {
- const AV1_COMMON *const cm = &cpi->common;
- MACROBLOCKD *xd = &x->e_mbd;
- const uint8_t *const src = x->plane[0].src.buf;
- const int src_stride = x->plane[0].src.stride;
- uint8_t *const dst = xd->plane[0].dst.buf;
- const int dst_stride = xd->plane[0].dst.stride;
- const aom_variance_fn_ptr_t *vfp = &cpi->fn_ptr[bsize];
- const int_mv ref_mv = av1_get_ref_mv(x, 0);
- unsigned int mse;
- unsigned int sse;
-
- av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, NULL, bsize);
- mse = vfp->vf(dst, dst_stride, src, src_stride, &sse);
- mse += mv_err_cost(this_mv, &ref_mv.as_mv, x->nmvjointcost, x->mvcost,
- x->errorperbit);
- return mse;
-}
-
-// Refine MV in a small range
-unsigned int av1_refine_warped_mv(const AV1_COMP *cpi, MACROBLOCK *const x,
- BLOCK_SIZE bsize, int mi_row, int mi_col,
- int *pts0, int *pts_inref0,
- int total_samples) {
- const AV1_COMMON *const cm = &cpi->common;
- MACROBLOCKD *xd = &x->e_mbd;
- MB_MODE_INFO *mbmi = xd->mi[0];
- const MV neighbors[8] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 },
- { 0, -2 }, { 2, 0 }, { 0, 2 }, { -2, 0 } };
- const int_mv ref_mv = av1_get_ref_mv(x, 0);
- int16_t br = mbmi->mv[0].as_mv.row;
- int16_t bc = mbmi->mv[0].as_mv.col;
- int16_t *tr = &mbmi->mv[0].as_mv.row;
- int16_t *tc = &mbmi->mv[0].as_mv.col;
- WarpedMotionParams best_wm_params = mbmi->wm_params;
- int best_num_proj_ref = mbmi->num_proj_ref;
- unsigned int bestmse;
- int minc, maxc, minr, maxr;
- const int start = cm->allow_high_precision_mv ? 0 : 4;
- int ite;
-
- set_subpel_mv_search_range(&x->mv_limits, &minc, &maxc, &minr, &maxr,
- &ref_mv.as_mv);
-
- // Calculate the center position's error
- assert(bc >= minc && bc <= maxc && br >= minr && br <= maxr);
- bestmse = av1_compute_motion_cost(cpi, x, bsize, mi_row, mi_col,
- &mbmi->mv[0].as_mv);
-
- // MV search
- for (ite = 0; ite < 2; ++ite) {
- int best_idx = -1;
- int idx;
-
- for (idx = start; idx < start + 4; ++idx) {
- unsigned int thismse;
-
- *tr = br + neighbors[idx].row;
- *tc = bc + neighbors[idx].col;
-
- if (*tc >= minc && *tc <= maxc && *tr >= minr && *tr <= maxr) {
- MV this_mv = { *tr, *tc };
- int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
-
- memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0));
- memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0));
- if (total_samples > 1)
- mbmi->num_proj_ref =
- selectSamples(&this_mv, pts, pts_inref, total_samples, bsize);
-
- if (!find_projection(mbmi->num_proj_ref, pts, pts_inref, bsize, *tr,
- *tc, &mbmi->wm_params, mi_row, mi_col)) {
- thismse =
- av1_compute_motion_cost(cpi, x, bsize, mi_row, mi_col, &this_mv);
-
- if (thismse < bestmse) {
- best_idx = idx;
- best_wm_params = mbmi->wm_params;
- best_num_proj_ref = mbmi->num_proj_ref;
- bestmse = thismse;
- }
- }
- }
- }
-
- if (best_idx == -1) break;
-
- if (best_idx >= 0) {
- br += neighbors[best_idx].row;
- bc += neighbors[best_idx].col;
- }
- }
-
- *tr = br;
- *tc = bc;
- mbmi->wm_params = best_wm_params;
- mbmi->num_proj_ref = best_num_proj_ref;
- return bestmse;
-}
-
-static INLINE int check_bounds(const MvLimits *mv_limits, int row, int col,
- int range) {
- return ((row - range) >= mv_limits->row_min) &
- ((row + range) <= mv_limits->row_max) &
- ((col - range) >= mv_limits->col_min) &
- ((col + range) <= mv_limits->col_max);
-}
-
-static INLINE int is_mv_in(const MvLimits *mv_limits, const MV *mv) {
- return (mv->col >= mv_limits->col_min) && (mv->col <= mv_limits->col_max) &&
- (mv->row >= mv_limits->row_min) && (mv->row <= mv_limits->row_max);
-}
-
-#define CHECK_BETTER \
- { \
- if (thissad < bestsad) { \
- if (use_mvcost) \
- thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); \
- if (thissad < bestsad) { \
- bestsad = thissad; \
- best_site = i; \
- } \
- } \
- }
-
-#define MAX_PATTERN_SCALES 11
-#define MAX_PATTERN_CANDIDATES 8 // max number of canddiates per scale
-#define PATTERN_CANDIDATES_REF 3 // number of refinement candidates
-
-// Calculate and return a sad+mvcost list around an integer best pel.
-static INLINE void calc_int_cost_list(const MACROBLOCK *x,
- const MV *const ref_mv, int sadpb,
- const aom_variance_fn_ptr_t *fn_ptr,
- const MV *best_mv, int *cost_list) {
- static const MV neighbors[4] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } };
- const struct buf_2d *const what = &x->plane[0].src;
- const struct buf_2d *const in_what = &x->e_mbd.plane[0].pre[0];
- const MV fcenter_mv = { ref_mv->row >> 3, ref_mv->col >> 3 };
- const int br = best_mv->row;
- const int bc = best_mv->col;
- int i;
- unsigned int sse;
- const MV this_mv = { br, bc };
-
- cost_list[0] =
- fn_ptr->vf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv),
- in_what->stride, &sse) +
- mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb);
- if (check_bounds(&x->mv_limits, br, bc, 1)) {
- for (i = 0; i < 4; i++) {
- const MV neighbor_mv = { br + neighbors[i].row, bc + neighbors[i].col };
- cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride,
- get_buf_from_mv(in_what, &neighbor_mv),
- in_what->stride, &sse) +
- mv_err_cost(&neighbor_mv, &fcenter_mv, x->nmvjointcost,
- x->mvcost, x->errorperbit);
- }
- } else {
- for (i = 0; i < 4; i++) {
- const MV neighbor_mv = { br + neighbors[i].row, bc + neighbors[i].col };
- if (!is_mv_in(&x->mv_limits, &neighbor_mv))
- cost_list[i + 1] = INT_MAX;
- else
- cost_list[i + 1] =
- fn_ptr->vf(what->buf, what->stride,
- get_buf_from_mv(in_what, &neighbor_mv), in_what->stride,
- &sse) +
- mv_err_cost(&neighbor_mv, &fcenter_mv, x->nmvjointcost, x->mvcost,
- x->errorperbit);
- }
- }
-}
-
-static INLINE void calc_int_sad_list(const MACROBLOCK *x,
- const MV *const ref_mv, int sadpb,
- const aom_variance_fn_ptr_t *fn_ptr,
- const MV *best_mv, int *cost_list,
- const int use_mvcost, const int bestsad) {
- static const MV neighbors[4] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } };
- const struct buf_2d *const what = &x->plane[0].src;
- const struct buf_2d *const in_what = &x->e_mbd.plane[0].pre[0];
- const MV fcenter_mv = { ref_mv->row >> 3, ref_mv->col >> 3 };
- int i;
- const int br = best_mv->row;
- const int bc = best_mv->col;
-
- if (cost_list[0] == INT_MAX) {
- cost_list[0] = bestsad;
- if (check_bounds(&x->mv_limits, br, bc, 1)) {
- for (i = 0; i < 4; i++) {
- const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
- cost_list[i + 1] =
- fn_ptr->sdf(what->buf, what->stride,
- get_buf_from_mv(in_what, &this_mv), in_what->stride);
- }
- } else {
- for (i = 0; i < 4; i++) {
- const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
- if (!is_mv_in(&x->mv_limits, &this_mv))
- cost_list[i + 1] = INT_MAX;
- else
- cost_list[i + 1] =
- fn_ptr->sdf(what->buf, what->stride,
- get_buf_from_mv(in_what, &this_mv), in_what->stride);
- }
- }
- } else {
- if (use_mvcost) {
- for (i = 0; i < 4; i++) {
- const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
- if (cost_list[i + 1] != INT_MAX) {
- cost_list[i + 1] += mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb);
- }
- }
- }
- }
-}
-
-// Generic pattern search function that searches over multiple scales.
-// Each scale can have a different number of candidates and shape of
-// candidates as indicated in the num_candidates and candidates arrays
-// passed into this function
-//
-static int pattern_search(
- MACROBLOCK *x, MV *start_mv, int search_param, int sad_per_bit,
- int do_init_search, int *cost_list, const aom_variance_fn_ptr_t *vfp,
- int use_mvcost, const MV *center_mv,
- const int num_candidates[MAX_PATTERN_SCALES],
- const MV candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES]) {
- const MACROBLOCKD *const xd = &x->e_mbd;
- static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = {
- 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
- };
- int i, s, t;
- const struct buf_2d *const what = &x->plane[0].src;
- const struct buf_2d *const in_what = &xd->plane[0].pre[0];
- const int last_is_4 = num_candidates[0] == 4;
- int br, bc;
- int bestsad = INT_MAX;
- int thissad;
- int k = -1;
- const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
- assert(search_param < MAX_MVSEARCH_STEPS);
- int best_init_s = search_param_to_steps[search_param];
- // adjust ref_mv to make sure it is within MV range
- clamp_mv(start_mv, x->mv_limits.col_min, x->mv_limits.col_max,
- x->mv_limits.row_min, x->mv_limits.row_max);
- br = start_mv->row;
- bc = start_mv->col;
- if (cost_list != NULL) {
- cost_list[0] = cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] =
- INT_MAX;
- }
-
- // Work out the start point for the search
- bestsad = vfp->sdf(what->buf, what->stride,
- get_buf_from_mv(in_what, start_mv), in_what->stride) +
- mvsad_err_cost(x, start_mv, &fcenter_mv, sad_per_bit);
-
- // Search all possible scales upto the search param around the center point
- // pick the scale of the point that is best as the starting scale of
- // further steps around it.
- if (do_init_search) {
- s = best_init_s;
- best_init_s = -1;
- for (t = 0; t <= s; ++t) {
- int best_site = -1;
- if (check_bounds(&x->mv_limits, br, bc, 1 << t)) {
- for (i = 0; i < num_candidates[t]; i++) {
- const MV this_mv = { br + candidates[t][i].row,
- bc + candidates[t][i].col };
- thissad =
- vfp->sdf(what->buf, what->stride,
- get_buf_from_mv(in_what, &this_mv), in_what->stride);
- CHECK_BETTER
- }
- } else {
- for (i = 0; i < num_candidates[t]; i++) {
- const MV this_mv = { br + candidates[t][i].row,
- bc + candidates[t][i].col };
- if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
- thissad =
- vfp->sdf(what->buf, what->stride,
- get_buf_from_mv(in_what, &this_mv), in_what->stride);
- CHECK_BETTER
- }
- }
- if (best_site == -1) {
- continue;
- } else {
- best_init_s = t;
- k = best_site;
- }
- }
- if (best_init_s != -1) {
- br += candidates[best_init_s][k].row;
- bc += candidates[best_init_s][k].col;
- }
- }
-
- // If the center point is still the best, just skip this and move to
- // the refinement step.
- if (best_init_s != -1) {
- const int last_s = (last_is_4 && cost_list != NULL);
- int best_site = -1;
- s = best_init_s;
-
- for (; s >= last_s; s--) {
- // No need to search all points the 1st time if initial search was used
- if (!do_init_search || s != best_init_s) {
- if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
- for (i = 0; i < num_candidates[s]; i++) {
- const MV this_mv = { br + candidates[s][i].row,
- bc + candidates[s][i].col };
- thissad =
- vfp->sdf(what->buf, what->stride,
- get_buf_from_mv(in_what, &this_mv), in_what->stride);
- CHECK_BETTER
- }
- } else {
- for (i = 0; i < num_candidates[s]; i++) {
- const MV this_mv = { br + candidates[s][i].row,
- bc + candidates[s][i].col };
- if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
- thissad =
- vfp->sdf(what->buf, what->stride,
- get_buf_from_mv(in_what, &this_mv), in_what->stride);
- CHECK_BETTER
- }
- }
-
- if (best_site == -1) {
- continue;
- } else {
- br += candidates[s][best_site].row;
- bc += candidates[s][best_site].col;
- k = best_site;
- }
- }
-
- do {
- int next_chkpts_indices[PATTERN_CANDIDATES_REF];
- best_site = -1;
- next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
- next_chkpts_indices[1] = k;
- next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
-
- if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
- for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
- const MV this_mv = {
- br + candidates[s][next_chkpts_indices[i]].row,
- bc + candidates[s][next_chkpts_indices[i]].col
- };
- thissad =
- vfp->sdf(what->buf, what->stride,
- get_buf_from_mv(in_what, &this_mv), in_what->stride);
- CHECK_BETTER
- }
- } else {
- for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
- const MV this_mv = {
- br + candidates[s][next_chkpts_indices[i]].row,
- bc + candidates[s][next_chkpts_indices[i]].col
- };
- if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
- thissad =
- vfp->sdf(what->buf, what->stride,
- get_buf_from_mv(in_what, &this_mv), in_what->stride);
- CHECK_BETTER
- }
- }
-
- if (best_site != -1) {
- k = next_chkpts_indices[best_site];
- br += candidates[s][k].row;
- bc += candidates[s][k].col;
- }
- } while (best_site != -1);
- }
-
- // Note: If we enter the if below, then cost_list must be non-NULL.
- if (s == 0) {
- cost_list[0] = bestsad;
- if (!do_init_search || s != best_init_s) {
- if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
- for (i = 0; i < num_candidates[s]; i++) {
- const MV this_mv = { br + candidates[s][i].row,
- bc + candidates[s][i].col };
- cost_list[i + 1] = thissad =
- vfp->sdf(what->buf, what->stride,
- get_buf_from_mv(in_what, &this_mv), in_what->stride);
- CHECK_BETTER
- }
- } else {
- for (i = 0; i < num_candidates[s]; i++) {
- const MV this_mv = { br + candidates[s][i].row,
- bc + candidates[s][i].col };
- if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
- cost_list[i + 1] = thissad =
- vfp->sdf(what->buf, what->stride,
- get_buf_from_mv(in_what, &this_mv), in_what->stride);
- CHECK_BETTER
- }
- }
-
- if (best_site != -1) {
- br += candidates[s][best_site].row;
- bc += candidates[s][best_site].col;
- k = best_site;
- }
- }
- while (best_site != -1) {
- int next_chkpts_indices[PATTERN_CANDIDATES_REF];
- best_site = -1;
- next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
- next_chkpts_indices[1] = k;
- next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
- cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = INT_MAX;
- cost_list[((k + 2) % 4) + 1] = cost_list[0];
- cost_list[0] = bestsad;
-
- if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
- for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
- const MV this_mv = {
- br + candidates[s][next_chkpts_indices[i]].row,
- bc + candidates[s][next_chkpts_indices[i]].col
- };
- cost_list[next_chkpts_indices[i] + 1] = thissad =
- vfp->sdf(what->buf, what->stride,
- get_buf_from_mv(in_what, &this_mv), in_what->stride);
- CHECK_BETTER
- }
- } else {
- for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
- const MV this_mv = {
- br + candidates[s][next_chkpts_indices[i]].row,
- bc + candidates[s][next_chkpts_indices[i]].col
- };
- if (!is_mv_in(&x->mv_limits, &this_mv)) {
- cost_list[next_chkpts_indices[i] + 1] = INT_MAX;
- continue;
- }
- cost_list[next_chkpts_indices[i] + 1] = thissad =
- vfp->sdf(what->buf, what->stride,
- get_buf_from_mv(in_what, &this_mv), in_what->stride);
- CHECK_BETTER
- }
- }
-
- if (best_site != -1) {
- k = next_chkpts_indices[best_site];
- br += candidates[s][k].row;
- bc += candidates[s][k].col;
- }
- }
- }
- }
-
- // Returns the one-away integer pel cost/sad around the best as follows:
- // cost_list[0]: cost/sad at the best integer pel
- // cost_list[1]: cost/sad at delta {0, -1} (left) from the best integer pel
- // cost_list[2]: cost/sad at delta { 1, 0} (bottom) from the best integer pel
- // cost_list[3]: cost/sad at delta { 0, 1} (right) from the best integer pel
- // cost_list[4]: cost/sad at delta {-1, 0} (top) from the best integer pel
- if (cost_list) {
- const MV best_int_mv = { br, bc };
- if (last_is_4) {
- calc_int_sad_list(x, center_mv, sad_per_bit, vfp, &best_int_mv, cost_list,
- use_mvcost, bestsad);
- } else {
- calc_int_cost_list(x, center_mv, sad_per_bit, vfp, &best_int_mv,
- cost_list);
- }
- }
- x->best_mv.as_mv.row = br;
- x->best_mv.as_mv.col = bc;
- return bestsad;
-}
-
-int av1_get_mvpred_var(const MACROBLOCK *x, const MV *best_mv,
- const MV *center_mv, const aom_variance_fn_ptr_t *vfp,
- int use_mvcost) {
- const MACROBLOCKD *const xd = &x->e_mbd;
- const struct buf_2d *const what = &x->plane[0].src;
- const struct buf_2d *const in_what = &xd->plane[0].pre[0];
- const MV mv = { best_mv->row * 8, best_mv->col * 8 };
- unsigned int unused;
-
- return vfp->vf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv),
- in_what->stride, &unused) +
- (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
- x->errorperbit)
- : 0);
-}
-
-int av1_get_mvpred_av_var(const MACROBLOCK *x, const MV *best_mv,
- const MV *center_mv, const uint8_t *second_pred,
- const aom_variance_fn_ptr_t *vfp, int use_mvcost) {
- const MACROBLOCKD *const xd = &x->e_mbd;
- const struct buf_2d *const what = &x->plane[0].src;
- const struct buf_2d *const in_what = &xd->plane[0].pre[0];
- const MV mv = { best_mv->row * 8, best_mv->col * 8 };
- unsigned int unused;
-
- if (xd->jcp_param.use_jnt_comp_avg)
- return vfp->jsvaf(get_buf_from_mv(in_what, best_mv), in_what->stride, 0, 0,
- what->buf, what->stride, &unused, second_pred,
- &xd->jcp_param) +
- (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
- x->errorperbit)
- : 0);
- else
- return vfp->svaf(get_buf_from_mv(in_what, best_mv), in_what->stride, 0, 0,
- what->buf, what->stride, &unused, second_pred) +
- (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
- x->errorperbit)
- : 0);
-}
-
-int av1_get_mvpred_mask_var(const MACROBLOCK *x, const MV *best_mv,
- const MV *center_mv, const uint8_t *second_pred,
- const uint8_t *mask, int mask_stride,
- int invert_mask, const aom_variance_fn_ptr_t *vfp,
- int use_mvcost) {
- const MACROBLOCKD *const xd = &x->e_mbd;
- const struct buf_2d *const what = &x->plane[0].src;
- const struct buf_2d *const in_what = &xd->plane[0].pre[0];
- const MV mv = { best_mv->row * 8, best_mv->col * 8 };
- unsigned int unused;
-
- return vfp->msvf(what->buf, what->stride, 0, 0,
- get_buf_from_mv(in_what, best_mv), in_what->stride,
- second_pred, mask, mask_stride, invert_mask, &unused) +
- (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
- x->errorperbit)
- : 0);
-}
-
-int av1_hex_search(MACROBLOCK *x, MV *start_mv, int search_param,
- int sad_per_bit, int do_init_search, int *cost_list,
- const aom_variance_fn_ptr_t *vfp, int use_mvcost,
- const MV *center_mv) {
- // First scale has 8-closest points, the rest have 6 points in hex shape
- // at increasing scales
- static const int hex_num_candidates[MAX_PATTERN_SCALES] = { 8, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6 };
- // Note that the largest candidate step at each scale is 2^scale
- /* clang-format off */
- static const MV hex_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
- { { -1, -1 }, { 0, -1 }, { 1, -1 }, { 1, 0 }, { 1, 1 }, { 0, 1 }, { -1, 1 },
- { -1, 0 } },
- { { -1, -2 }, { 1, -2 }, { 2, 0 }, { 1, 2 }, { -1, 2 }, { -2, 0 } },
- { { -2, -4 }, { 2, -4 }, { 4, 0 }, { 2, 4 }, { -2, 4 }, { -4, 0 } },
- { { -4, -8 }, { 4, -8 }, { 8, 0 }, { 4, 8 }, { -4, 8 }, { -8, 0 } },
- { { -8, -16 }, { 8, -16 }, { 16, 0 }, { 8, 16 }, { -8, 16 }, { -16, 0 } },
- { { -16, -32 }, { 16, -32 }, { 32, 0 }, { 16, 32 }, { -16, 32 },
- { -32, 0 } },
- { { -32, -64 }, { 32, -64 }, { 64, 0 }, { 32, 64 }, { -32, 64 },
- { -64, 0 } },
- { { -64, -128 }, { 64, -128 }, { 128, 0 }, { 64, 128 }, { -64, 128 },
- { -128, 0 } },
- { { -128, -256 }, { 128, -256 }, { 256, 0 }, { 128, 256 }, { -128, 256 },
- { -256, 0 } },
- { { -256, -512 }, { 256, -512 }, { 512, 0 }, { 256, 512 }, { -256, 512 },
- { -512, 0 } },
- { { -512, -1024 }, { 512, -1024 }, { 1024, 0 }, { 512, 1024 },
- { -512, 1024 }, { -1024, 0 } },
- };
- /* clang-format on */
- return pattern_search(x, start_mv, search_param, sad_per_bit, do_init_search,
- cost_list, vfp, use_mvcost, center_mv,
- hex_num_candidates, hex_candidates);
-}
-
-static int bigdia_search(MACROBLOCK *x, MV *start_mv, int search_param,
- int sad_per_bit, int do_init_search, int *cost_list,
- const aom_variance_fn_ptr_t *vfp, int use_mvcost,
- const MV *center_mv) {
- // First scale has 4-closest points, the rest have 8 points in diamond
- // shape at increasing scales
- static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = {
- 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
- };
- // Note that the largest candidate step at each scale is 2^scale
- /* clang-format off */
- static const MV
- bigdia_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
- { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } },
- { { -1, -1 }, { 0, -2 }, { 1, -1 }, { 2, 0 }, { 1, 1 }, { 0, 2 },
- { -1, 1 }, { -2, 0 } },
- { { -2, -2 }, { 0, -4 }, { 2, -2 }, { 4, 0 }, { 2, 2 }, { 0, 4 },
- { -2, 2 }, { -4, 0 } },
- { { -4, -4 }, { 0, -8 }, { 4, -4 }, { 8, 0 }, { 4, 4 }, { 0, 8 },
- { -4, 4 }, { -8, 0 } },
- { { -8, -8 }, { 0, -16 }, { 8, -8 }, { 16, 0 }, { 8, 8 }, { 0, 16 },
- { -8, 8 }, { -16, 0 } },
- { { -16, -16 }, { 0, -32 }, { 16, -16 }, { 32, 0 }, { 16, 16 },
- { 0, 32 }, { -16, 16 }, { -32, 0 } },
- { { -32, -32 }, { 0, -64 }, { 32, -32 }, { 64, 0 }, { 32, 32 },
- { 0, 64 }, { -32, 32 }, { -64, 0 } },
- { { -64, -64 }, { 0, -128 }, { 64, -64 }, { 128, 0 }, { 64, 64 },
- { 0, 128 }, { -64, 64 }, { -128, 0 } },
- { { -128, -128 }, { 0, -256 }, { 128, -128 }, { 256, 0 }, { 128, 128 },
- { 0, 256 }, { -128, 128 }, { -256, 0 } },
- { { -256, -256 }, { 0, -512 }, { 256, -256 }, { 512, 0 }, { 256, 256 },
- { 0, 512 }, { -256, 256 }, { -512, 0 } },
- { { -512, -512 }, { 0, -1024 }, { 512, -512 }, { 1024, 0 },
- { 512, 512 }, { 0, 1024 }, { -512, 512 }, { -1024, 0 } },
- };
- /* clang-format on */
- return pattern_search(x, start_mv, search_param, sad_per_bit, do_init_search,
- cost_list, vfp, use_mvcost, center_mv,
- bigdia_num_candidates, bigdia_candidates);
-}
-
-static int square_search(MACROBLOCK *x, MV *start_mv, int search_param,
- int sad_per_bit, int do_init_search, int *cost_list,
- const aom_variance_fn_ptr_t *vfp, int use_mvcost,
- const MV *center_mv) {
- // All scales have 8 closest points in square shape
- static const int square_num_candidates[MAX_PATTERN_SCALES] = {
- 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
- };
- // Note that the largest candidate step at each scale is 2^scale
- /* clang-format off */
- static const MV
- square_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
- { { -1, -1 }, { 0, -1 }, { 1, -1 }, { 1, 0 }, { 1, 1 }, { 0, 1 },
- { -1, 1 }, { -1, 0 } },
- { { -2, -2 }, { 0, -2 }, { 2, -2 }, { 2, 0 }, { 2, 2 }, { 0, 2 },
- { -2, 2 }, { -2, 0 } },
- { { -4, -4 }, { 0, -4 }, { 4, -4 }, { 4, 0 }, { 4, 4 }, { 0, 4 },
- { -4, 4 }, { -4, 0 } },
- { { -8, -8 }, { 0, -8 }, { 8, -8 }, { 8, 0 }, { 8, 8 }, { 0, 8 },
- { -8, 8 }, { -8, 0 } },
- { { -16, -16 }, { 0, -16 }, { 16, -16 }, { 16, 0 }, { 16, 16 },
- { 0, 16 }, { -16, 16 }, { -16, 0 } },
- { { -32, -32 }, { 0, -32 }, { 32, -32 }, { 32, 0 }, { 32, 32 },
- { 0, 32 }, { -32, 32 }, { -32, 0 } },
- { { -64, -64 }, { 0, -64 }, { 64, -64 }, { 64, 0 }, { 64, 64 },
- { 0, 64 }, { -64, 64 }, { -64, 0 } },
- { { -128, -128 }, { 0, -128 }, { 128, -128 }, { 128, 0 }, { 128, 128 },
- { 0, 128 }, { -128, 128 }, { -128, 0 } },
- { { -256, -256 }, { 0, -256 }, { 256, -256 }, { 256, 0 }, { 256, 256 },
- { 0, 256 }, { -256, 256 }, { -256, 0 } },
- { { -512, -512 }, { 0, -512 }, { 512, -512 }, { 512, 0 }, { 512, 512 },
- { 0, 512 }, { -512, 512 }, { -512, 0 } },
- { { -1024, -1024 }, { 0, -1024 }, { 1024, -1024 }, { 1024, 0 },
- { 1024, 1024 }, { 0, 1024 }, { -1024, 1024 }, { -1024, 0 } },
- };
- /* clang-format on */
- return pattern_search(x, start_mv, search_param, sad_per_bit, do_init_search,
- cost_list, vfp, use_mvcost, center_mv,
- square_num_candidates, square_candidates);
-}
-
-static int fast_hex_search(MACROBLOCK *x, MV *ref_mv, int search_param,
- int sad_per_bit,
- int do_init_search, // must be zero for fast_hex
- int *cost_list, const aom_variance_fn_ptr_t *vfp,
- int use_mvcost, const MV *center_mv) {
- return av1_hex_search(x, ref_mv, AOMMAX(MAX_MVSEARCH_STEPS - 2, search_param),
- sad_per_bit, do_init_search, cost_list, vfp, use_mvcost,
- center_mv);
-}
-
-static int fast_dia_search(MACROBLOCK *x, MV *ref_mv, int search_param,
- int sad_per_bit, int do_init_search, int *cost_list,
- const aom_variance_fn_ptr_t *vfp, int use_mvcost,
- const MV *center_mv) {
- return bigdia_search(x, ref_mv, AOMMAX(MAX_MVSEARCH_STEPS - 2, search_param),
- sad_per_bit, do_init_search, cost_list, vfp, use_mvcost,
- center_mv);
-}
-
-#undef CHECK_BETTER
-
-// Exhuastive motion search around a given centre position with a given
-// step size.
-static int exhuastive_mesh_search(MACROBLOCK *x, MV *ref_mv, MV *best_mv,
- int range, int step, int sad_per_bit,
- const aom_variance_fn_ptr_t *fn_ptr,
- const MV *center_mv) {
- const MACROBLOCKD *const xd = &x->e_mbd;
- const struct buf_2d *const what = &x->plane[0].src;
- const struct buf_2d *const in_what = &xd->plane[0].pre[0];
- MV fcenter_mv = { center_mv->row, center_mv->col };
- unsigned int best_sad = INT_MAX;
- int r, c, i;
- int start_col, end_col, start_row, end_row;
- int col_step = (step > 1) ? step : 4;
-
- assert(step >= 1);
-
- clamp_mv(&fcenter_mv, x->mv_limits.col_min, x->mv_limits.col_max,
- x->mv_limits.row_min, x->mv_limits.row_max);
- *best_mv = fcenter_mv;
- best_sad =
- fn_ptr->sdf(what->buf, what->stride,
- get_buf_from_mv(in_what, &fcenter_mv), in_what->stride) +
- mvsad_err_cost(x, &fcenter_mv, ref_mv, sad_per_bit);
- start_row = AOMMAX(-range, x->mv_limits.row_min - fcenter_mv.row);
- start_col = AOMMAX(-range, x->mv_limits.col_min - fcenter_mv.col);
- end_row = AOMMIN(range, x->mv_limits.row_max - fcenter_mv.row);
- end_col = AOMMIN(range, x->mv_limits.col_max - fcenter_mv.col);
-
- for (r = start_row; r <= end_row; r += step) {
- for (c = start_col; c <= end_col; c += col_step) {
- // Step > 1 means we are not checking every location in this pass.
- if (step > 1) {
- const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c };
- unsigned int sad =
- fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv),
- in_what->stride);
- if (sad < best_sad) {
- sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
- if (sad < best_sad) {
- best_sad = sad;
- x->second_best_mv.as_mv = *best_mv;
- *best_mv = mv;
- }
- }
- } else {
- // 4 sads in a single call if we are checking every location
- if (c + 3 <= end_col) {
- unsigned int sads[4];
- const uint8_t *addrs[4];
- for (i = 0; i < 4; ++i) {
- const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
- addrs[i] = get_buf_from_mv(in_what, &mv);
- }
- fn_ptr->sdx4df(what->buf, what->stride, addrs, in_what->stride, sads);
-
- for (i = 0; i < 4; ++i) {
- if (sads[i] < best_sad) {
- const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
- const unsigned int sad =
- sads[i] + mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
- if (sad < best_sad) {
- best_sad = sad;
- x->second_best_mv.as_mv = *best_mv;
- *best_mv = mv;
- }
- }
- }
- } else {
- for (i = 0; i < end_col - c; ++i) {
- const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
- unsigned int sad =
- fn_ptr->sdf(what->buf, what->stride,
- get_buf_from_mv(in_what, &mv), in_what->stride);
- if (sad < best_sad) {
- sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
- if (sad < best_sad) {
- best_sad = sad;
- x->second_best_mv.as_mv = *best_mv;
- *best_mv = mv;
- }
- }
- }
- }
- }
- }
- }
-
- return best_sad;
-}
-
-int av1_diamond_search_sad_c(MACROBLOCK *x, const search_site_config *cfg,
- MV *ref_mv, MV *best_mv, int search_param,
- int sad_per_bit, int *num00,
- const aom_variance_fn_ptr_t *fn_ptr,
- const MV *center_mv) {
- int i, j, step;
-
- const MACROBLOCKD *const xd = &x->e_mbd;
- uint8_t *what = x->plane[0].src.buf;
- const int what_stride = x->plane[0].src.stride;
- const uint8_t *in_what;
- const int in_what_stride = xd->plane[0].pre[0].stride;
- const uint8_t *best_address;
-
- unsigned int bestsad = INT_MAX;
- int best_site = 0;
- int last_site = 0;
-
- int ref_row;
- int ref_col;
-
- // search_param determines the length of the initial step and hence the number
- // of iterations.
- // 0 = initial step (MAX_FIRST_STEP) pel
- // 1 = (MAX_FIRST_STEP/2) pel,
- // 2 = (MAX_FIRST_STEP/4) pel...
- const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step];
- const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param;
-
- const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
- clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max,
- x->mv_limits.row_min, x->mv_limits.row_max);
- ref_row = ref_mv->row;
- ref_col = ref_mv->col;
- *num00 = 0;
- best_mv->row = ref_row;
- best_mv->col = ref_col;
-
- // Work out the start point for the search
- in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col;
- best_address = in_what;
-
- // Check the starting position
- bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
- mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
-
- i = 1;
-
- for (step = 0; step < tot_steps; step++) {
- int all_in = 1, t;
-
- // All_in is true if every one of the points we are checking are within
- // the bounds of the image.
- all_in &= ((best_mv->row + ss[i].mv.row) > x->mv_limits.row_min);
- all_in &= ((best_mv->row + ss[i + 1].mv.row) < x->mv_limits.row_max);
- all_in &= ((best_mv->col + ss[i + 2].mv.col) > x->mv_limits.col_min);
- all_in &= ((best_mv->col + ss[i + 3].mv.col) < x->mv_limits.col_max);
-
- // If all the pixels are within the bounds we don't check whether the
- // search point is valid in this loop, otherwise we check each point
- // for validity..
- if (all_in) {
- unsigned int sad_array[4];
-
- for (j = 0; j < cfg->searches_per_step; j += 4) {
- unsigned char const *block_offset[4];
-
- for (t = 0; t < 4; t++)
- block_offset[t] = ss[i + t].offset + best_address;
-
- fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
- sad_array);
-
- for (t = 0; t < 4; t++, i++) {
- if (sad_array[t] < bestsad) {
- const MV this_mv = { best_mv->row + ss[i].mv.row,
- best_mv->col + ss[i].mv.col };
- sad_array[t] +=
- mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
- if (sad_array[t] < bestsad) {
- bestsad = sad_array[t];
- best_site = i;
- }
- }
- }
- }
- } else {
- for (j = 0; j < cfg->searches_per_step; j++) {
- // Trap illegal vectors
- const MV this_mv = { best_mv->row + ss[i].mv.row,
- best_mv->col + ss[i].mv.col };
-
- if (is_mv_in(&x->mv_limits, &this_mv)) {
- const uint8_t *const check_here = ss[i].offset + best_address;
- unsigned int thissad =
- fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
-
- if (thissad < bestsad) {
- thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
- if (thissad < bestsad) {
- bestsad = thissad;
- best_site = i;
- }
- }
- }
- i++;
- }
- }
- if (best_site != last_site) {
- x->second_best_mv.as_mv = *best_mv;
- best_mv->row += ss[best_site].mv.row;
- best_mv->col += ss[best_site].mv.col;
- best_address += ss[best_site].offset;
- last_site = best_site;
-#if defined(NEW_DIAMOND_SEARCH)
- while (1) {
- const MV this_mv = { best_mv->row + ss[best_site].mv.row,
- best_mv->col + ss[best_site].mv.col };
- if (is_mv_in(&x->mv_limits, &this_mv)) {
- const uint8_t *const check_here = ss[best_site].offset + best_address;
- unsigned int thissad =
- fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
- if (thissad < bestsad) {
- thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
- if (thissad < bestsad) {
- bestsad = thissad;
- best_mv->row += ss[best_site].mv.row;
- best_mv->col += ss[best_site].mv.col;
- best_address += ss[best_site].offset;
- continue;
- }
- }
- }
- break;
- }
-#endif
- } else if (best_address == in_what) {
- (*num00)++;
- }
- }
- return bestsad;
-}
-
-/* do_refine: If last step (1-away) of n-step search doesn't pick the center
- point as the best match, we will do a final 1-away diamond
- refining search */
-static int full_pixel_diamond(const AV1_COMP *const cpi, MACROBLOCK *x,
- MV *mvp_full, int step_param, int sadpb,
- int further_steps, int do_refine, int *cost_list,
- const aom_variance_fn_ptr_t *fn_ptr,
- const MV *ref_mv) {
- MV temp_mv;
- int thissme, n, num00 = 0;
- int bestsme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv,
- step_param, sadpb, &n, fn_ptr, ref_mv);
- if (bestsme < INT_MAX)
- bestsme = av1_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
- x->best_mv.as_mv = temp_mv;
-
- // If there won't be more n-step search, check to see if refining search is
- // needed.
- if (n > further_steps) do_refine = 0;
-
- while (n < further_steps) {
- ++n;
-
- if (num00) {
- num00--;
- } else {
- thissme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv,
- step_param + n, sadpb, &num00, fn_ptr,
- ref_mv);
- if (thissme < INT_MAX)
- thissme = av1_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
-
- // check to see if refining search is needed.
- if (num00 > further_steps - n) do_refine = 0;
-
- if (thissme < bestsme) {
- bestsme = thissme;
- x->best_mv.as_mv = temp_mv;
- }
- }
- }
-
- // final 1-away diamond refining search
- if (do_refine) {
- const int search_range = 8;
- MV best_mv = x->best_mv.as_mv;
- thissme = av1_refining_search_sad(x, &best_mv, sadpb, search_range, fn_ptr,
- ref_mv);
- if (thissme < INT_MAX)
- thissme = av1_get_mvpred_var(x, &best_mv, ref_mv, fn_ptr, 1);
- if (thissme < bestsme) {
- bestsme = thissme;
- x->best_mv.as_mv = best_mv;
- }
- }
-
- // Return cost list.
- if (cost_list) {
- calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, &x->best_mv.as_mv, cost_list);
- }
- return bestsme;
-}
-
-#define MIN_RANGE 7
-#define MAX_RANGE 256
-#define MIN_INTERVAL 1
-// Runs an limited range exhaustive mesh search using a pattern set
-// according to the encode speed profile.
-static int full_pixel_exhaustive(const AV1_COMP *const cpi, MACROBLOCK *x,
- const MV *centre_mv_full, int sadpb,
- int *cost_list,
- const aom_variance_fn_ptr_t *fn_ptr,
- const MV *ref_mv, MV *dst_mv) {
- const SPEED_FEATURES *const sf = &cpi->sf;
- MV temp_mv = { centre_mv_full->row, centre_mv_full->col };
- MV f_ref_mv = { ref_mv->row >> 3, ref_mv->col >> 3 };
- int bestsme;
- int i;
- int interval = sf->mesh_patterns[0].interval;
- int range = sf->mesh_patterns[0].range;
- int baseline_interval_divisor;
-
- // Keep track of number of exhaustive calls (this frame in this thread).
- ++(*x->ex_search_count_ptr);
-
- // Trap illegal values for interval and range for this function.
- if ((range < MIN_RANGE) || (range > MAX_RANGE) || (interval < MIN_INTERVAL) ||
- (interval > range))
- return INT_MAX;
-
- baseline_interval_divisor = range / interval;
-
- // Check size of proposed first range against magnitude of the centre
- // value used as a starting point.
- range = AOMMAX(range, (5 * AOMMAX(abs(temp_mv.row), abs(temp_mv.col))) / 4);
- range = AOMMIN(range, MAX_RANGE);
- interval = AOMMAX(interval, range / baseline_interval_divisor);
-
- // initial search
- bestsme = exhuastive_mesh_search(x, &f_ref_mv, &temp_mv, range, interval,
- sadpb, fn_ptr, &temp_mv);
-
- if ((interval > MIN_INTERVAL) && (range > MIN_RANGE)) {
- // Progressive searches with range and step size decreasing each time
- // till we reach a step size of 1. Then break out.
- for (i = 1; i < MAX_MESH_STEP; ++i) {
- // First pass with coarser step and longer range
- bestsme = exhuastive_mesh_search(
- x, &f_ref_mv, &temp_mv, sf->mesh_patterns[i].range,
- sf->mesh_patterns[i].interval, sadpb, fn_ptr, &temp_mv);
-
- if (sf->mesh_patterns[i].interval == 1) break;
- }
- }
-
- if (bestsme < INT_MAX)
- bestsme = av1_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
- *dst_mv = temp_mv;
-
- // Return cost list.
- if (cost_list) {
- calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, dst_mv, cost_list);
- }
- return bestsme;
-}
-
-int av1_refining_search_sad(MACROBLOCK *x, MV *ref_mv, int error_per_bit,
- int search_range,
- const aom_variance_fn_ptr_t *fn_ptr,
- const MV *center_mv) {
- const MACROBLOCKD *const xd = &x->e_mbd;
- const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
- const struct buf_2d *const what = &x->plane[0].src;
- const struct buf_2d *const in_what = &xd->plane[0].pre[0];
- const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
- const uint8_t *best_address = get_buf_from_mv(in_what, ref_mv);
- unsigned int best_sad =
- fn_ptr->sdf(what->buf, what->stride, best_address, in_what->stride) +
- mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
- int i, j;
-
- for (i = 0; i < search_range; i++) {
- int best_site = -1;
- const int all_in = ((ref_mv->row - 1) > x->mv_limits.row_min) &
- ((ref_mv->row + 1) < x->mv_limits.row_max) &
- ((ref_mv->col - 1) > x->mv_limits.col_min) &
- ((ref_mv->col + 1) < x->mv_limits.col_max);
-
- if (all_in) {
- unsigned int sads[4];
- const uint8_t *const positions[4] = { best_address - in_what->stride,
- best_address - 1, best_address + 1,
- best_address + in_what->stride };
-
- fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride, sads);
-
- for (j = 0; j < 4; ++j) {
- if (sads[j] < best_sad) {
- const MV mv = { ref_mv->row + neighbors[j].row,
- ref_mv->col + neighbors[j].col };
- sads[j] += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
- if (sads[j] < best_sad) {
- best_sad = sads[j];
- best_site = j;
- }
- }
- }
- } else {
- for (j = 0; j < 4; ++j) {
- const MV mv = { ref_mv->row + neighbors[j].row,
- ref_mv->col + neighbors[j].col };
-
- if (is_mv_in(&x->mv_limits, &mv)) {
- unsigned int sad =
- fn_ptr->sdf(what->buf, what->stride,
- get_buf_from_mv(in_what, &mv), in_what->stride);
- if (sad < best_sad) {
- sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
- if (sad < best_sad) {
- best_sad = sad;
- best_site = j;
- }
- }
- }
- }
- }
-
- if (best_site == -1) {
- break;
- } else {
- x->second_best_mv.as_mv = *ref_mv;
- ref_mv->row += neighbors[best_site].row;
- ref_mv->col += neighbors[best_site].col;
- best_address = get_buf_from_mv(in_what, ref_mv);
- }
- }
-
- return best_sad;
-}
-
-// This function is called when we do joint motion search in comp_inter_inter
-// mode, or when searching for one component of an ext-inter compound mode.
-int av1_refining_search_8p_c(MACROBLOCK *x, int error_per_bit, int search_range,
- const aom_variance_fn_ptr_t *fn_ptr,
- const uint8_t *mask, int mask_stride,
- int invert_mask, const MV *center_mv,
- const uint8_t *second_pred) {
- static const search_neighbors neighbors[8] = {
- { { -1, 0 }, -1 * SEARCH_GRID_STRIDE_8P + 0 },
- { { 0, -1 }, 0 * SEARCH_GRID_STRIDE_8P - 1 },
- { { 0, 1 }, 0 * SEARCH_GRID_STRIDE_8P + 1 },
- { { 1, 0 }, 1 * SEARCH_GRID_STRIDE_8P + 0 },
- { { -1, -1 }, -1 * SEARCH_GRID_STRIDE_8P - 1 },
- { { 1, -1 }, 1 * SEARCH_GRID_STRIDE_8P - 1 },
- { { -1, 1 }, -1 * SEARCH_GRID_STRIDE_8P + 1 },
- { { 1, 1 }, 1 * SEARCH_GRID_STRIDE_8P + 1 }
- };
- const MACROBLOCKD *const xd = &x->e_mbd;
- const struct buf_2d *const what = &x->plane[0].src;
- const struct buf_2d *const in_what = &xd->plane[0].pre[0];
- const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
- MV *best_mv = &x->best_mv.as_mv;
- unsigned int best_sad = INT_MAX;
- int i, j;
- uint8_t do_refine_search_grid[SEARCH_GRID_STRIDE_8P * SEARCH_GRID_STRIDE_8P] =
- { 0 };
- int grid_center = SEARCH_GRID_CENTER_8P;
- int grid_coord = grid_center;
-
- clamp_mv(best_mv, x->mv_limits.col_min, x->mv_limits.col_max,
- x->mv_limits.row_min, x->mv_limits.row_max);
- if (mask) {
- best_sad = fn_ptr->msdf(what->buf, what->stride,
- get_buf_from_mv(in_what, best_mv), in_what->stride,
- second_pred, mask, mask_stride, invert_mask) +
- mvsad_err_cost(x, best_mv, &fcenter_mv, error_per_bit);
- } else {
- if (xd->jcp_param.use_jnt_comp_avg)
- best_sad = fn_ptr->jsdaf(what->buf, what->stride,
- get_buf_from_mv(in_what, best_mv),
- in_what->stride, second_pred, &xd->jcp_param) +
- mvsad_err_cost(x, best_mv, &fcenter_mv, error_per_bit);
- else
- best_sad = fn_ptr->sdaf(what->buf, what->stride,
- get_buf_from_mv(in_what, best_mv),
- in_what->stride, second_pred) +
- mvsad_err_cost(x, best_mv, &fcenter_mv, error_per_bit);
- }
-
- do_refine_search_grid[grid_coord] = 1;
-
- for (i = 0; i < search_range; ++i) {
- int best_site = -1;
-
- for (j = 0; j < 8; ++j) {
- grid_coord = grid_center + neighbors[j].coord_offset;
- if (do_refine_search_grid[grid_coord] == 1) {
- continue;
- }
- const MV mv = { best_mv->row + neighbors[j].coord.row,
- best_mv->col + neighbors[j].coord.col };
-
- do_refine_search_grid[grid_coord] = 1;
- if (is_mv_in(&x->mv_limits, &mv)) {
- unsigned int sad;
- if (mask) {
- sad = fn_ptr->msdf(what->buf, what->stride,
- get_buf_from_mv(in_what, &mv), in_what->stride,
- second_pred, mask, mask_stride, invert_mask);
- } else {
- if (xd->jcp_param.use_jnt_comp_avg)
- sad = fn_ptr->jsdaf(what->buf, what->stride,
- get_buf_from_mv(in_what, &mv), in_what->stride,
- second_pred, &xd->jcp_param);
- else
- sad = fn_ptr->sdaf(what->buf, what->stride,
- get_buf_from_mv(in_what, &mv), in_what->stride,
- second_pred);
- }
- if (sad < best_sad) {
- sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
- if (sad < best_sad) {
- best_sad = sad;
- best_site = j;
- }
- }
- }
- }
-
- if (best_site == -1) {
- break;
- } else {
- best_mv->row += neighbors[best_site].coord.row;
- best_mv->col += neighbors[best_site].coord.col;
- grid_center += neighbors[best_site].coord_offset;
- }
- }
- return best_sad;
-}
-
-#define MIN_EX_SEARCH_LIMIT 128
-static int is_exhaustive_allowed(const AV1_COMP *const cpi, MACROBLOCK *x) {
- const SPEED_FEATURES *const sf = &cpi->sf;
- const int max_ex =
- AOMMAX(MIN_EX_SEARCH_LIMIT,
- (*x->m_search_count_ptr * sf->max_exaustive_pct) / 100);
-
- return sf->allow_exhaustive_searches &&
- (sf->exhaustive_searches_thresh < INT_MAX) &&
- (*x->ex_search_count_ptr <= max_ex) && !cpi->rc.is_src_frame_alt_ref;
-}
-
-int av1_full_pixel_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
- MV *mvp_full, int step_param, int method,
- int run_mesh_search, int error_per_bit,
- int *cost_list, const MV *ref_mv, int var_max, int rd,
- int x_pos, int y_pos, int intra) {
- const SPEED_FEATURES *const sf = &cpi->sf;
- const aom_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize];
- int var = 0;
-
- if (cost_list) {
- cost_list[0] = INT_MAX;
- cost_list[1] = INT_MAX;
- cost_list[2] = INT_MAX;
- cost_list[3] = INT_MAX;
- cost_list[4] = INT_MAX;
- }
-
- // Keep track of number of searches (this frame in this thread).
- ++(*x->m_search_count_ptr);
-
- switch (method) {
- case FAST_DIAMOND:
- var = fast_dia_search(x, mvp_full, step_param, error_per_bit, 0,
- cost_list, fn_ptr, 1, ref_mv);
- break;
- case FAST_HEX:
- var = fast_hex_search(x, mvp_full, step_param, error_per_bit, 0,
- cost_list, fn_ptr, 1, ref_mv);
- break;
- case HEX:
- var = av1_hex_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
- fn_ptr, 1, ref_mv);
- break;
- case SQUARE:
- var = square_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
- fn_ptr, 1, ref_mv);
- break;
- case BIGDIA:
- var = bigdia_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
- fn_ptr, 1, ref_mv);
- break;
- case NSTEP:
- var = full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit,
- MAX_MVSEARCH_STEPS - 1 - step_param, 1,
- cost_list, fn_ptr, ref_mv);
-
- // Should we allow a follow on exhaustive search?
- if (is_exhaustive_allowed(cpi, x)) {
- int exhuastive_thr = sf->exhaustive_searches_thresh;
- exhuastive_thr >>=
- 10 - (mi_size_wide_log2[bsize] + mi_size_high_log2[bsize]);
-
- // Threshold variance for an exhaustive full search.
- if (var > exhuastive_thr) {
- int var_ex;
- MV tmp_mv_ex;
- var_ex =
- full_pixel_exhaustive(cpi, x, &x->best_mv.as_mv, error_per_bit,
- cost_list, fn_ptr, ref_mv, &tmp_mv_ex);
-
- if (var_ex < var) {
- var = var_ex;
- x->best_mv.as_mv = tmp_mv_ex;
- }
- }
- }
- break;
- default: assert(0 && "Invalid search method.");
- }
-
- // Should we allow a follow on exhaustive search?
- if (!run_mesh_search) {
- if (method == NSTEP) {
- if (is_exhaustive_allowed(cpi, x)) {
- int exhuastive_thr = sf->exhaustive_searches_thresh;
- exhuastive_thr >>=
- 10 - (mi_size_wide_log2[bsize] + mi_size_high_log2[bsize]);
- // Threshold variance for an exhaustive full search.
- if (var > exhuastive_thr) run_mesh_search = 1;
- }
- }
- }
-
- if (run_mesh_search) {
- int var_ex;
- MV tmp_mv_ex;
- var_ex = full_pixel_exhaustive(cpi, x, &x->best_mv.as_mv, error_per_bit,
- cost_list, fn_ptr, ref_mv, &tmp_mv_ex);
- if (var_ex < var) {
- var = var_ex;
- x->best_mv.as_mv = tmp_mv_ex;
- }
- }
-
- if (method != NSTEP && rd && var < var_max)
- var = av1_get_mvpred_var(x, &x->best_mv.as_mv, ref_mv, fn_ptr, 1);
-
- do {
- if (!intra || !av1_use_hash_me(&cpi->common)) break;
-
- // already single ME
- // get block size and original buffer of current block
- const int block_height = block_size_high[bsize];
- const int block_width = block_size_wide[bsize];
- if (block_height == block_width && x_pos >= 0 && y_pos >= 0) {
- if (block_width == 4 || block_width == 8 || block_width == 16 ||
- block_width == 32 || block_width == 64 || block_width == 128) {
- uint8_t *what = x->plane[0].src.buf;
- const int what_stride = x->plane[0].src.stride;
- uint32_t hash_value1, hash_value2;
- MV best_hash_mv;
- int best_hash_cost = INT_MAX;
-
- // for the hashMap
- hash_table *ref_frame_hash =
- intra
- ? &cpi->common.cur_frame->hash_table
- : av1_get_ref_frame_hash_map(cpi, x->e_mbd.mi[0]->ref_frame[0]);
-
- av1_get_block_hash_value(
- what, what_stride, block_width, &hash_value1, &hash_value2,
- x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, x);
-
- const int count = av1_hash_table_count(ref_frame_hash, hash_value1);
- // for intra, at lest one matching can be found, itself.
- if (count <= (intra ? 1 : 0)) {
- break;
- }
-
- Iterator iterator =
- av1_hash_get_first_iterator(ref_frame_hash, hash_value1);
- for (int i = 0; i < count; i++, iterator_increment(&iterator)) {
- block_hash ref_block_hash = *(block_hash *)(iterator_get(&iterator));
- if (hash_value2 == ref_block_hash.hash_value2) {
- // For intra, make sure the prediction is from valid area.
- if (intra) {
- const int mi_col = x_pos / MI_SIZE;
- const int mi_row = y_pos / MI_SIZE;
- const MV dv = { 8 * (ref_block_hash.y - y_pos),
- 8 * (ref_block_hash.x - x_pos) };
- if (!av1_is_dv_valid(dv, &cpi->common, &x->e_mbd, mi_row, mi_col,
- bsize, cpi->common.seq_params.mib_size_log2))
- continue;
- }
- MV hash_mv;
- hash_mv.col = ref_block_hash.x - x_pos;
- hash_mv.row = ref_block_hash.y - y_pos;
- if (!is_mv_in(&x->mv_limits, &hash_mv)) continue;
- const int refCost =
- av1_get_mvpred_var(x, &hash_mv, ref_mv, fn_ptr, 1);
- if (refCost < best_hash_cost) {
- best_hash_cost = refCost;
- best_hash_mv = hash_mv;
- }
- }
- }
- if (best_hash_cost < var) {
- x->second_best_mv = x->best_mv;
- x->best_mv.as_mv = best_hash_mv;
- var = best_hash_cost;
- }
- }
- }
- } while (0);
-
- return var;
-}
-
-/* returns subpixel variance error function */
-#define DIST(r, c) \
- vfp->osvf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, mask, &sse)
-
-/* checks if (r, c) has better score than previous best */
-#define MVC(r, c) \
- (unsigned int)(mvcost \
- ? ((mvjcost[((r) != rr) * 2 + ((c) != rc)] + \
- mvcost[0][((r)-rr)] + (int64_t)mvcost[1][((c)-rc)]) * \
- error_per_bit + \
- 4096) >> \
- 13 \
- : 0)
-
-#define CHECK_BETTER(v, r, c) \
- if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
- thismse = (DIST(r, c)); \
- if ((v = MVC(r, c) + thismse) < besterr) { \
- besterr = v; \
- br = r; \
- bc = c; \
- *distortion = thismse; \
- *sse1 = sse; \
- } \
- } else { \
- v = INT_MAX; \
- }
-
-#undef CHECK_BETTER0
-#define CHECK_BETTER0(v, r, c) CHECK_BETTER(v, r, c)
-
-#undef CHECK_BETTER1
-#define CHECK_BETTER1(v, r, c) \
- if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
- MV this_mv = { r, c }; \
- thismse = upsampled_obmc_pref_error(xd, cm, mi_row, mi_col, &this_mv, \
- mask, vfp, z, pre(y, y_stride, r, c), \
- y_stride, sp(c), sp(r), w, h, &sse, \
- use_accurate_subpel_search); \
- if ((v = MVC(r, c) + thismse) < besterr) { \
- besterr = v; \
- br = r; \
- bc = c; \
- *distortion = thismse; \
- *sse1 = sse; \
- } \
- } else { \
- v = INT_MAX; \
- }
-
-static unsigned int setup_obmc_center_error(
- const int32_t *mask, const MV *bestmv, const MV *ref_mv, int error_per_bit,
- const aom_variance_fn_ptr_t *vfp, const int32_t *const wsrc,
- const uint8_t *const y, int y_stride, int offset, int *mvjcost,
- int *mvcost[2], unsigned int *sse1, int *distortion) {
- unsigned int besterr;
- besterr = vfp->ovf(y + offset, y_stride, wsrc, mask, sse1);
- *distortion = besterr;
- besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
- return besterr;
-}
-
-static int upsampled_obmc_pref_error(
- MACROBLOCKD *xd, const AV1_COMMON *const cm, int mi_row, int mi_col,
- const MV *const mv, const int32_t *mask, const aom_variance_fn_ptr_t *vfp,
- const int32_t *const wsrc, const uint8_t *const y, int y_stride,
- int subpel_x_q3, int subpel_y_q3, int w, int h, unsigned int *sse,
- int subpel_search) {
- unsigned int besterr;
-
- DECLARE_ALIGNED(16, uint8_t, pred[2 * MAX_SB_SQUARE]);
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- uint8_t *pred8 = CONVERT_TO_BYTEPTR(pred);
- aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, mv, pred8, w, h,
- subpel_x_q3, subpel_y_q3, y, y_stride, xd->bd,
- subpel_search);
- besterr = vfp->ovf(pred8, w, wsrc, mask, sse);
- } else {
- aom_upsampled_pred(xd, cm, mi_row, mi_col, mv, pred, w, h, subpel_x_q3,
- subpel_y_q3, y, y_stride, subpel_search);
-
- besterr = vfp->ovf(pred, w, wsrc, mask, sse);
- }
- return besterr;
-}
-
-static unsigned int upsampled_setup_obmc_center_error(
- MACROBLOCKD *xd, const AV1_COMMON *const cm, int mi_row, int mi_col,
- const int32_t *mask, const MV *bestmv, const MV *ref_mv, int error_per_bit,
- const aom_variance_fn_ptr_t *vfp, const int32_t *const wsrc,
- const uint8_t *const y, int y_stride, int w, int h, int offset,
- int *mvjcost, int *mvcost[2], unsigned int *sse1, int *distortion,
- int subpel_search) {
- unsigned int besterr = upsampled_obmc_pref_error(
- xd, cm, mi_row, mi_col, bestmv, mask, vfp, wsrc, y + offset, y_stride, 0,
- 0, w, h, sse1, subpel_search);
- *distortion = besterr;
- besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
- return besterr;
-}
-
-int av1_find_best_obmc_sub_pixel_tree_up(
- MACROBLOCK *x, const AV1_COMMON *const cm, int mi_row, int mi_col,
- MV *bestmv, const MV *ref_mv, int allow_hp, int error_per_bit,
- const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
- int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1,
- int is_second, int use_accurate_subpel_search) {
- const int32_t *wsrc = x->wsrc_buf;
- const int32_t *mask = x->mask_buf;
- const int *const z = wsrc;
- const int *const src_address = z;
- MACROBLOCKD *xd = &x->e_mbd;
- struct macroblockd_plane *const pd = &xd->plane[0];
- MB_MODE_INFO *mbmi = xd->mi[0];
- unsigned int besterr = INT_MAX;
- unsigned int sse;
- unsigned int thismse;
-
- int rr = ref_mv->row;
- int rc = ref_mv->col;
- int br = bestmv->row * 8;
- int bc = bestmv->col * 8;
- int hstep = 4;
- int iter;
- int round = 3 - forced_stop;
- int tr = br;
- int tc = bc;
- const MV *search_step = search_step_table;
- int idx, best_idx = -1;
- unsigned int cost_array[5];
- int kr, kc;
- const int w = block_size_wide[mbmi->sb_type];
- const int h = block_size_high[mbmi->sb_type];
- int offset;
- int y_stride;
- const uint8_t *y;
-
- int minc, maxc, minr, maxr;
-
- set_subpel_mv_search_range(&x->mv_limits, &minc, &maxc, &minr, &maxr, ref_mv);
-
- y = pd->pre[is_second].buf;
- y_stride = pd->pre[is_second].stride;
- offset = bestmv->row * y_stride + bestmv->col;
-
- if (!allow_hp)
- if (round == 3) round = 2;
-
- bestmv->row *= 8;
- bestmv->col *= 8;
- // use_accurate_subpel_search can be 0 or 1 or 2
- if (use_accurate_subpel_search)
- besterr = upsampled_setup_obmc_center_error(
- xd, cm, mi_row, mi_col, mask, bestmv, ref_mv, error_per_bit, vfp, z, y,
- y_stride, w, h, offset, mvjcost, mvcost, sse1, distortion,
- use_accurate_subpel_search);
- else
- besterr = setup_obmc_center_error(mask, bestmv, ref_mv, error_per_bit, vfp,
- z, y, y_stride, offset, mvjcost, mvcost,
- sse1, distortion);
-
- for (iter = 0; iter < round; ++iter) {
- // Check vertical and horizontal sub-pixel positions.
- for (idx = 0; idx < 4; ++idx) {
- tr = br + search_step[idx].row;
- tc = bc + search_step[idx].col;
- if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
- MV this_mv = { tr, tc };
- if (use_accurate_subpel_search) {
- thismse = upsampled_obmc_pref_error(
- xd, cm, mi_row, mi_col, &this_mv, mask, vfp, src_address,
- pre(y, y_stride, tr, tc), y_stride, sp(tc), sp(tr), w, h, &sse,
- use_accurate_subpel_search);
- } else {
- thismse = vfp->osvf(pre(y, y_stride, tr, tc), y_stride, sp(tc),
- sp(tr), src_address, mask, &sse);
- }
-
- cost_array[idx] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost,
- mvcost, error_per_bit);
- if (cost_array[idx] < besterr) {
- best_idx = idx;
- besterr = cost_array[idx];
- *distortion = thismse;
- *sse1 = sse;
- }
- } else {
- cost_array[idx] = INT_MAX;
- }
- }
-
- // Check diagonal sub-pixel position
- kc = (cost_array[0] <= cost_array[1] ? -hstep : hstep);
- kr = (cost_array[2] <= cost_array[3] ? -hstep : hstep);
-
- tc = bc + kc;
- tr = br + kr;
- if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
- MV this_mv = { tr, tc };
-
- if (use_accurate_subpel_search) {
- thismse = upsampled_obmc_pref_error(
- xd, cm, mi_row, mi_col, &this_mv, mask, vfp, src_address,
- pre(y, y_stride, tr, tc), y_stride, sp(tc), sp(tr), w, h, &sse,
- use_accurate_subpel_search);
- } else {
- thismse = vfp->osvf(pre(y, y_stride, tr, tc), y_stride, sp(tc), sp(tr),
- src_address, mask, &sse);
- }
-
- cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
- error_per_bit);
-
- if (cost_array[4] < besterr) {
- best_idx = 4;
- besterr = cost_array[4];
- *distortion = thismse;
- *sse1 = sse;
- }
- } else {
- cost_array[idx] = INT_MAX;
- }
-
- if (best_idx < 4 && best_idx >= 0) {
- br += search_step[best_idx].row;
- bc += search_step[best_idx].col;
- } else if (best_idx == 4) {
- br = tr;
- bc = tc;
- }
-
- if (iters_per_step > 1 && best_idx != -1) {
- if (use_accurate_subpel_search) {
- SECOND_LEVEL_CHECKS_BEST(1);
- } else {
- SECOND_LEVEL_CHECKS_BEST(0);
- }
- }
-
- tr = br;
- tc = bc;
-
- search_step += 4;
- hstep >>= 1;
- best_idx = -1;
- }
-
- // These lines insure static analysis doesn't warn that
- // tr and tc aren't used after the above point.
- (void)tr;
- (void)tc;
-
- bestmv->row = br;
- bestmv->col = bc;
-
- return besterr;
-}
-
-#undef DIST
-#undef MVC
-#undef CHECK_BETTER
-
-static int get_obmc_mvpred_var(const MACROBLOCK *x, const int32_t *wsrc,
- const int32_t *mask, const MV *best_mv,
- const MV *center_mv,
- const aom_variance_fn_ptr_t *vfp, int use_mvcost,
- int is_second) {
- const MACROBLOCKD *const xd = &x->e_mbd;
- const struct buf_2d *const in_what = &xd->plane[0].pre[is_second];
- const MV mv = { best_mv->row * 8, best_mv->col * 8 };
- unsigned int unused;
-
- return vfp->ovf(get_buf_from_mv(in_what, best_mv), in_what->stride, wsrc,
- mask, &unused) +
- (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
- x->errorperbit)
- : 0);
-}
-
-int obmc_refining_search_sad(const MACROBLOCK *x, const int32_t *wsrc,
- const int32_t *mask, MV *ref_mv, int error_per_bit,
- int search_range,
- const aom_variance_fn_ptr_t *fn_ptr,
- const MV *center_mv, int is_second) {
- const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
- const MACROBLOCKD *const xd = &x->e_mbd;
- const struct buf_2d *const in_what = &xd->plane[0].pre[is_second];
- const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
- unsigned int best_sad = fn_ptr->osdf(get_buf_from_mv(in_what, ref_mv),
- in_what->stride, wsrc, mask) +
- mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
- int i, j;
-
- for (i = 0; i < search_range; i++) {
- int best_site = -1;
-
- for (j = 0; j < 4; j++) {
- const MV mv = { ref_mv->row + neighbors[j].row,
- ref_mv->col + neighbors[j].col };
- if (is_mv_in(&x->mv_limits, &mv)) {
- unsigned int sad = fn_ptr->osdf(get_buf_from_mv(in_what, &mv),
- in_what->stride, wsrc, mask);
- if (sad < best_sad) {
- sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
- if (sad < best_sad) {
- best_sad = sad;
- best_site = j;
- }
- }
- }
- }
-
- if (best_site == -1) {
- break;
- } else {
- ref_mv->row += neighbors[best_site].row;
- ref_mv->col += neighbors[best_site].col;
- }
- }
- return best_sad;
-}
-
-int obmc_diamond_search_sad(const MACROBLOCK *x, const search_site_config *cfg,
- const int32_t *wsrc, const int32_t *mask,
- MV *ref_mv, MV *best_mv, int search_param,
- int sad_per_bit, int *num00,
- const aom_variance_fn_ptr_t *fn_ptr,
- const MV *center_mv, int is_second) {
- const MACROBLOCKD *const xd = &x->e_mbd;
- const struct buf_2d *const in_what = &xd->plane[0].pre[is_second];
- // search_param determines the length of the initial step and hence the number
- // of iterations
- // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 =
- // (MAX_FIRST_STEP/4) pel... etc.
- const search_site *const ss = &cfg->ss[search_param * cfg->searches_per_step];
- const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param;
- const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
- const uint8_t *best_address, *in_what_ref;
- int best_sad = INT_MAX;
- int best_site = 0;
- int last_site = 0;
- int i, j, step;
-
- clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max,
- x->mv_limits.row_min, x->mv_limits.row_max);
- in_what_ref = in_what->buf + ref_mv->row * in_what->stride + ref_mv->col;
- best_address = in_what_ref;
- *num00 = 0;
- *best_mv = *ref_mv;
-
- // Check the starting position
- best_sad = fn_ptr->osdf(best_address, in_what->stride, wsrc, mask) +
- mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
-
- i = 1;
-
- for (step = 0; step < tot_steps; step++) {
- for (j = 0; j < cfg->searches_per_step; j++) {
- const MV mv = { best_mv->row + ss[i].mv.row,
- best_mv->col + ss[i].mv.col };
- if (is_mv_in(&x->mv_limits, &mv)) {
- int sad = fn_ptr->osdf(best_address + ss[i].offset, in_what->stride,
- wsrc, mask);
- if (sad < best_sad) {
- sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
- if (sad < best_sad) {
- best_sad = sad;
- best_site = i;
- }
- }
- }
-
- i++;
- }
-
- if (best_site != last_site) {
- best_mv->row += ss[best_site].mv.row;
- best_mv->col += ss[best_site].mv.col;
- best_address += ss[best_site].offset;
- last_site = best_site;
-#if defined(NEW_DIAMOND_SEARCH)
- while (1) {
- const MV this_mv = { best_mv->row + ss[best_site].mv.row,
- best_mv->col + ss[best_site].mv.col };
- if (is_mv_in(&x->mv_limits, &this_mv)) {
- int sad = fn_ptr->osdf(best_address + ss[best_site].offset,
- in_what->stride, wsrc, mask);
- if (sad < best_sad) {
- sad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
- if (sad < best_sad) {
- best_sad = sad;
- best_mv->row += ss[best_site].mv.row;
- best_mv->col += ss[best_site].mv.col;
- best_address += ss[best_site].offset;
- continue;
- }
- }
- }
- break;
- }
-#endif
- } else if (best_address == in_what_ref) {
- (*num00)++;
- }
- }
- return best_sad;
-}
-
-static int obmc_full_pixel_diamond(const AV1_COMP *cpi, MACROBLOCK *x,
- MV *mvp_full, int step_param, int sadpb,
- int further_steps, int do_refine,
- const aom_variance_fn_ptr_t *fn_ptr,
- const MV *ref_mv, MV *dst_mv,
- int is_second) {
- const int32_t *wsrc = x->wsrc_buf;
- const int32_t *mask = x->mask_buf;
- MV temp_mv;
- int thissme, n, num00 = 0;
- int bestsme =
- obmc_diamond_search_sad(x, &cpi->ss_cfg, wsrc, mask, mvp_full, &temp_mv,
- step_param, sadpb, &n, fn_ptr, ref_mv, is_second);
- if (bestsme < INT_MAX)
- bestsme = get_obmc_mvpred_var(x, wsrc, mask, &temp_mv, ref_mv, fn_ptr, 1,
- is_second);
- *dst_mv = temp_mv;
-
- // If there won't be more n-step search, check to see if refining search is
- // needed.
- if (n > further_steps) do_refine = 0;
-
- while (n < further_steps) {
- ++n;
-
- if (num00) {
- num00--;
- } else {
- thissme = obmc_diamond_search_sad(x, &cpi->ss_cfg, wsrc, mask, mvp_full,
- &temp_mv, step_param + n, sadpb, &num00,
- fn_ptr, ref_mv, is_second);
- if (thissme < INT_MAX)
- thissme = get_obmc_mvpred_var(x, wsrc, mask, &temp_mv, ref_mv, fn_ptr,
- 1, is_second);
-
- // check to see if refining search is needed.
- if (num00 > further_steps - n) do_refine = 0;
-
- if (thissme < bestsme) {
- bestsme = thissme;
- *dst_mv = temp_mv;
- }
- }
- }
-
- // final 1-away diamond refining search
- if (do_refine) {
- const int search_range = 8;
- MV best_mv = *dst_mv;
- thissme = obmc_refining_search_sad(x, wsrc, mask, &best_mv, sadpb,
- search_range, fn_ptr, ref_mv, is_second);
- if (thissme < INT_MAX)
- thissme = get_obmc_mvpred_var(x, wsrc, mask, &best_mv, ref_mv, fn_ptr, 1,
- is_second);
- if (thissme < bestsme) {
- bestsme = thissme;
- *dst_mv = best_mv;
- }
- }
- return bestsme;
-}
-
-int av1_obmc_full_pixel_search(const AV1_COMP *cpi, MACROBLOCK *x, MV *mvp_full,
- int step_param, int sadpb, int further_steps,
- int do_refine,
- const aom_variance_fn_ptr_t *fn_ptr,
- const MV *ref_mv, MV *dst_mv, int is_second) {
- if (cpi->sf.obmc_full_pixel_search_level == 0) {
- return obmc_full_pixel_diamond(cpi, x, mvp_full, step_param, sadpb,
- further_steps, do_refine, fn_ptr, ref_mv,
- dst_mv, is_second);
- } else {
- const int32_t *wsrc = x->wsrc_buf;
- const int32_t *mask = x->mask_buf;
- const int search_range = 8;
- *dst_mv = *mvp_full;
- clamp_mv(dst_mv, x->mv_limits.col_min, x->mv_limits.col_max,
- x->mv_limits.row_min, x->mv_limits.row_max);
- int thissme = obmc_refining_search_sad(
- x, wsrc, mask, dst_mv, sadpb, search_range, fn_ptr, ref_mv, is_second);
- if (thissme < INT_MAX)
- thissme = get_obmc_mvpred_var(x, wsrc, mask, dst_mv, ref_mv, fn_ptr, 1,
- is_second);
- return thissme;
- }
-}
-
-// Note(yunqingwang): The following 2 functions are only used in the motion
-// vector unit test, which return extreme motion vectors allowed by the MV
-// limits.
-#define COMMON_MV_TEST \
- SETUP_SUBPEL_SEARCH; \
- \
- (void)error_per_bit; \
- (void)vfp; \
- (void)src_address; \
- (void)src_stride; \
- (void)y; \
- (void)y_stride; \
- (void)second_pred; \
- (void)w; \
- (void)h; \
- (void)use_accurate_subpel_search; \
- (void)offset; \
- (void)mvjcost; \
- (void)mvcost; \
- (void)sse1; \
- (void)distortion; \
- \
- (void)halfiters; \
- (void)quarteriters; \
- (void)eighthiters; \
- (void)whichdir; \
- (void)forced_stop; \
- (void)hstep; \
- \
- (void)tr; \
- (void)tc; \
- (void)sse; \
- (void)thismse; \
- (void)cost_list;
-// Return the maximum MV.
-int av1_return_max_sub_pixel_mv(MACROBLOCK *x, const AV1_COMMON *const cm,
- int mi_row, int mi_col, const MV *ref_mv,
- int allow_hp, int error_per_bit,
- const aom_variance_fn_ptr_t *vfp,
- int forced_stop, int iters_per_step,
- int *cost_list, int *mvjcost, int *mvcost[2],
- int *distortion, unsigned int *sse1,
- const uint8_t *second_pred, const uint8_t *mask,
- int mask_stride, int invert_mask, int w, int h,
- int use_accurate_subpel_search) {
- COMMON_MV_TEST;
- (void)mask;
- (void)mask_stride;
- (void)invert_mask;
- (void)minr;
- (void)minc;
-
- (void)cm;
- (void)mi_row;
- (void)mi_col;
-
- bestmv->row = maxr;
- bestmv->col = maxc;
- besterr = 0;
- // In the sub-pel motion search, if hp is not used, then the last bit of mv
- // has to be 0.
- lower_mv_precision(bestmv, allow_hp, 0);
- return besterr;
-}
-// Return the minimum MV.
-int av1_return_min_sub_pixel_mv(MACROBLOCK *x, const AV1_COMMON *const cm,
- int mi_row, int mi_col, const MV *ref_mv,
- int allow_hp, int error_per_bit,
- const aom_variance_fn_ptr_t *vfp,
- int forced_stop, int iters_per_step,
- int *cost_list, int *mvjcost, int *mvcost[2],
- int *distortion, unsigned int *sse1,
- const uint8_t *second_pred, const uint8_t *mask,
- int mask_stride, int invert_mask, int w, int h,
- int use_accurate_subpel_search) {
- COMMON_MV_TEST;
- (void)maxr;
- (void)maxc;
- (void)mask;
- (void)mask_stride;
- (void)invert_mask;
-
- (void)cm;
- (void)mi_row;
- (void)mi_col;
-
- bestmv->row = minr;
- bestmv->col = minc;
- besterr = 0;
- // In the sub-pel motion search, if hp is not used, then the last bit of mv
- // has to be 0.
- lower_mv_precision(bestmv, allow_hp, 0);
- return besterr;
-}
diff --git a/third_party/aom/av1/encoder/mcomp.h b/third_party/aom/av1/encoder/mcomp.h
deleted file mode 100644
index a975218b0..000000000
--- a/third_party/aom/av1/encoder/mcomp.h
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_MCOMP_H_
-#define AOM_AV1_ENCODER_MCOMP_H_
-
-#include "av1/encoder/block.h"
-#include "aom_dsp/variance.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// The maximum number of steps in a step search given the largest
-// allowed initial step
-#define MAX_MVSEARCH_STEPS 11
-// Max full pel mv specified in the unit of full pixel
-// Enable the use of motion vector in range [-1023, 1023].
-#define MAX_FULL_PEL_VAL ((1 << (MAX_MVSEARCH_STEPS - 1)) - 1)
-// Maximum size of the first step in full pel units
-#define MAX_FIRST_STEP (1 << (MAX_MVSEARCH_STEPS - 1))
-// Allowed motion vector pixel distance outside image border
-// for Block_16x16
-#define BORDER_MV_PIXELS_B16 (16 + AOM_INTERP_EXTEND)
-
-#define SEARCH_RANGE_8P 3
-#define SEARCH_GRID_STRIDE_8P (2 * SEARCH_RANGE_8P + 1)
-#define SEARCH_GRID_CENTER_8P \
- (SEARCH_RANGE_8P * SEARCH_GRID_STRIDE_8P + SEARCH_RANGE_8P)
-
-// motion search site
-typedef struct search_site {
- MV mv;
- int offset;
-} search_site;
-
-typedef struct search_site_config {
- search_site ss[8 * MAX_MVSEARCH_STEPS + 1];
- int ss_count;
- int searches_per_step;
-} search_site_config;
-
-typedef struct {
- MV coord;
- int coord_offset;
-} search_neighbors;
-
-void av1_init_dsmotion_compensation(search_site_config *cfg, int stride);
-void av1_init3smotion_compensation(search_site_config *cfg, int stride);
-
-void av1_set_mv_search_range(MvLimits *mv_limits, const MV *mv);
-
-int av1_mv_bit_cost(const MV *mv, const MV *ref, const int *mvjcost,
- int *mvcost[2], int weight);
-
-// Utility to compute variance + MV rate cost for a given MV
-int av1_get_mvpred_var(const MACROBLOCK *x, const MV *best_mv,
- const MV *center_mv, const aom_variance_fn_ptr_t *vfp,
- int use_mvcost);
-int av1_get_mvpred_av_var(const MACROBLOCK *x, const MV *best_mv,
- const MV *center_mv, const uint8_t *second_pred,
- const aom_variance_fn_ptr_t *vfp, int use_mvcost);
-int av1_get_mvpred_mask_var(const MACROBLOCK *x, const MV *best_mv,
- const MV *center_mv, const uint8_t *second_pred,
- const uint8_t *mask, int mask_stride,
- int invert_mask, const aom_variance_fn_ptr_t *vfp,
- int use_mvcost);
-
-struct AV1_COMP;
-struct SPEED_FEATURES;
-
-int av1_init_search_range(int size);
-
-int av1_refining_search_sad(struct macroblock *x, MV *ref_mv, int sad_per_bit,
- int distance, const aom_variance_fn_ptr_t *fn_ptr,
- const MV *center_mv);
-
-// Runs sequence of diamond searches in smaller steps for RD.
-int av1_full_pixel_diamond(const struct AV1_COMP *cpi, MACROBLOCK *x,
- MV *mvp_full, int step_param, int sadpb,
- int further_steps, int do_refine, int *cost_list,
- const aom_variance_fn_ptr_t *fn_ptr,
- const MV *ref_mv, MV *dst_mv);
-
-int av1_hex_search(MACROBLOCK *x, MV *start_mv, int search_param,
- int sad_per_bit, int do_init_search, int *cost_list,
- const aom_variance_fn_ptr_t *vfp, int use_mvcost,
- const MV *center_mv);
-
-typedef int(fractional_mv_step_fp)(
- MACROBLOCK *x, const AV1_COMMON *const cm, int mi_row, int mi_col,
- const MV *ref_mv, int allow_hp, int error_per_bit,
- const aom_variance_fn_ptr_t *vfp,
- int forced_stop, // 0 - full, 1 - qtr only, 2 - half only
- int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
- int *distortion, unsigned int *sse1, const uint8_t *second_pred,
- const uint8_t *mask, int mask_stride, int invert_mask, int w, int h,
- int use_accurate_subpel_search);
-
-extern fractional_mv_step_fp av1_find_best_sub_pixel_tree;
-extern fractional_mv_step_fp av1_find_best_sub_pixel_tree_pruned;
-extern fractional_mv_step_fp av1_find_best_sub_pixel_tree_pruned_more;
-extern fractional_mv_step_fp av1_find_best_sub_pixel_tree_pruned_evenmore;
-extern fractional_mv_step_fp av1_return_max_sub_pixel_mv;
-extern fractional_mv_step_fp av1_return_min_sub_pixel_mv;
-
-typedef int (*av1_full_search_fn_t)(const MACROBLOCK *x, const MV *ref_mv,
- int sad_per_bit, int distance,
- const aom_variance_fn_ptr_t *fn_ptr,
- const MV *center_mv, MV *best_mv);
-
-typedef int (*av1_diamond_search_fn_t)(
- MACROBLOCK *x, const search_site_config *cfg, MV *ref_mv, MV *best_mv,
- int search_param, int sad_per_bit, int *num00,
- const aom_variance_fn_ptr_t *fn_ptr, const MV *center_mv);
-
-int av1_refining_search_8p_c(MACROBLOCK *x, int error_per_bit, int search_range,
- const aom_variance_fn_ptr_t *fn_ptr,
- const uint8_t *mask, int mask_stride,
- int invert_mask, const MV *center_mv,
- const uint8_t *second_pred);
-
-int av1_full_pixel_search(const struct AV1_COMP *cpi, MACROBLOCK *x,
- BLOCK_SIZE bsize, MV *mvp_full, int step_param,
- int method, int run_mesh_search, int error_per_bit,
- int *cost_list, const MV *ref_mv, int var_max, int rd,
- int x_pos, int y_pos, int intra);
-
-int av1_obmc_full_pixel_search(const struct AV1_COMP *cpi, MACROBLOCK *x,
- MV *mvp_full, int step_param, int sadpb,
- int further_steps, int do_refine,
- const aom_variance_fn_ptr_t *fn_ptr,
- const MV *ref_mv, MV *dst_mv, int is_second);
-int av1_find_best_obmc_sub_pixel_tree_up(
- MACROBLOCK *x, const AV1_COMMON *const cm, int mi_row, int mi_col,
- MV *bestmv, const MV *ref_mv, int allow_hp, int error_per_bit,
- const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
- int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1,
- int is_second, int use_accurate_subpel_search);
-
-unsigned int av1_compute_motion_cost(const struct AV1_COMP *cpi,
- MACROBLOCK *const x, BLOCK_SIZE bsize,
- int mi_row, int mi_col, const MV *this_mv);
-unsigned int av1_refine_warped_mv(const struct AV1_COMP *cpi,
- MACROBLOCK *const x, BLOCK_SIZE bsize,
- int mi_row, int mi_col, int *pts0,
- int *pts_inref0, int total_samples);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_ENCODER_MCOMP_H_
diff --git a/third_party/aom/av1/encoder/mips/msa/error_msa.c b/third_party/aom/av1/encoder/mips/msa/error_msa.c
deleted file mode 100644
index 2e86dee43..000000000
--- a/third_party/aom/av1/encoder/mips/msa/error_msa.c
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "config/av1_rtcd.h"
-
-#include "aom_dsp/mips/macros_msa.h"
-
-#define BLOCK_ERROR_BLOCKSIZE_MSA(BSize) \
- static int64_t block_error_##BSize##size_msa( \
- const int16_t *coeff_ptr, const int16_t *dq_coeff_ptr, int64_t *ssz) { \
- int64_t err = 0; \
- uint32_t loop_cnt; \
- v8i16 coeff, dq_coeff, coeff_r_h, coeff_l_h; \
- v4i32 diff_r, diff_l, coeff_r_w, coeff_l_w; \
- v2i64 sq_coeff_r, sq_coeff_l; \
- v2i64 err0, err_dup0, err1, err_dup1; \
- \
- coeff = LD_SH(coeff_ptr); \
- dq_coeff = LD_SH(dq_coeff_ptr); \
- UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
- ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
- HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
- DOTP_SW2_SD(coeff_r_w, coeff_l_w, coeff_r_w, coeff_l_w, sq_coeff_r, \
- sq_coeff_l); \
- DOTP_SW2_SD(diff_r, diff_l, diff_r, diff_l, err0, err1); \
- \
- coeff = LD_SH(coeff_ptr + 8); \
- dq_coeff = LD_SH(dq_coeff_ptr + 8); \
- UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
- ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
- HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
- DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
- DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
- \
- coeff_ptr += 16; \
- dq_coeff_ptr += 16; \
- \
- for (loop_cnt = ((BSize >> 4) - 1); loop_cnt--;) { \
- coeff = LD_SH(coeff_ptr); \
- dq_coeff = LD_SH(dq_coeff_ptr); \
- UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
- ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
- HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
- DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
- DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
- \
- coeff = LD_SH(coeff_ptr + 8); \
- dq_coeff = LD_SH(dq_coeff_ptr + 8); \
- UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
- ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
- HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
- DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
- DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
- \
- coeff_ptr += 16; \
- dq_coeff_ptr += 16; \
- } \
- \
- err_dup0 = __msa_splati_d(sq_coeff_r, 1); \
- err_dup1 = __msa_splati_d(sq_coeff_l, 1); \
- sq_coeff_r += err_dup0; \
- sq_coeff_l += err_dup1; \
- *ssz = __msa_copy_s_d(sq_coeff_r, 0); \
- *ssz += __msa_copy_s_d(sq_coeff_l, 0); \
- \
- err_dup0 = __msa_splati_d(err0, 1); \
- err_dup1 = __msa_splati_d(err1, 1); \
- err0 += err_dup0; \
- err1 += err_dup1; \
- err = __msa_copy_s_d(err0, 0); \
- err += __msa_copy_s_d(err1, 0); \
- \
- return err; \
- }
-
-/* clang-format off */
-BLOCK_ERROR_BLOCKSIZE_MSA(16)
-BLOCK_ERROR_BLOCKSIZE_MSA(64)
-BLOCK_ERROR_BLOCKSIZE_MSA(256)
-BLOCK_ERROR_BLOCKSIZE_MSA(1024)
-/* clang-format on */
-
-int64_t av1_block_error_msa(const tran_low_t *coeff_ptr,
- const tran_low_t *dq_coeff_ptr, intptr_t blk_size,
- int64_t *ssz) {
- int64_t err;
- const int16_t *coeff = (const int16_t *)coeff_ptr;
- const int16_t *dq_coeff = (const int16_t *)dq_coeff_ptr;
-
- switch (blk_size) {
- case 16: err = block_error_16size_msa(coeff, dq_coeff, ssz); break;
- case 64: err = block_error_64size_msa(coeff, dq_coeff, ssz); break;
- case 256: err = block_error_256size_msa(coeff, dq_coeff, ssz); break;
- case 1024: err = block_error_1024size_msa(coeff, dq_coeff, ssz); break;
- default:
- err = av1_block_error_c(coeff_ptr, dq_coeff_ptr, blk_size, ssz);
- break;
- }
-
- return err;
-}
diff --git a/third_party/aom/av1/encoder/mips/msa/fdct4x4_msa.c b/third_party/aom/av1/encoder/mips/msa/fdct4x4_msa.c
deleted file mode 100644
index 085c08bfb..000000000
--- a/third_party/aom/av1/encoder/mips/msa/fdct4x4_msa.c
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-
-#include "av1/common/enums.h"
-
-void av1_fwht4x4_msa(const int16_t *input, int16_t *output,
- int32_t src_stride) {
- v8i16 in0, in1, in2, in3, in4;
-
- LD_SH4(input, src_stride, in0, in1, in2, in3);
-
- in0 += in1;
- in3 -= in2;
- in4 = (in0 - in3) >> 1;
- SUB2(in4, in1, in4, in2, in1, in2);
- in0 -= in2;
- in3 += in1;
-
- TRANSPOSE4x4_SH_SH(in0, in2, in3, in1, in0, in2, in3, in1);
-
- in0 += in2;
- in1 -= in3;
- in4 = (in0 - in1) >> 1;
- SUB2(in4, in2, in4, in3, in2, in3);
- in0 -= in3;
- in1 += in2;
-
- SLLI_4V(in0, in1, in2, in3, 2);
-
- TRANSPOSE4x4_SH_SH(in0, in3, in1, in2, in0, in3, in1, in2);
-
- ST4x2_UB(in0, output, 4);
- ST4x2_UB(in3, output + 4, 4);
- ST4x2_UB(in1, output + 8, 4);
- ST4x2_UB(in2, output + 12, 4);
-}
diff --git a/third_party/aom/av1/encoder/mips/msa/temporal_filter_msa.c b/third_party/aom/av1/encoder/mips/msa/temporal_filter_msa.c
deleted file mode 100644
index 531ae090a..000000000
--- a/third_party/aom/av1/encoder/mips/msa/temporal_filter_msa.c
+++ /dev/null
@@ -1,285 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "config/av1_rtcd.h"
-
-#include "aom_dsp/mips/macros_msa.h"
-
-static void temporal_filter_apply_8size_msa(uint8_t *frm1_ptr, uint32_t stride,
- uint8_t *frm2_ptr, int32_t filt_sth,
- int32_t filt_wgt, uint32_t *acc,
- uint16_t *cnt) {
- uint32_t row;
- uint64_t f0, f1, f2, f3;
- v16i8 frm2, frm1 = { 0 };
- v16i8 frm4, frm3 = { 0 };
- v16u8 frm_r, frm_l;
- v8i16 frm2_r, frm2_l;
- v8i16 diff0, diff1, mod0_h, mod1_h;
- v4i32 cnst3, cnst16, filt_wt, strength;
- v4i32 mod0_w, mod1_w, mod2_w, mod3_w;
- v4i32 diff0_r, diff0_l, diff1_r, diff1_l;
- v4i32 frm2_rr, frm2_rl, frm2_lr, frm2_ll;
- v4i32 acc0, acc1, acc2, acc3;
- v8i16 cnt0, cnt1;
-
- filt_wt = __msa_fill_w(filt_wgt);
- strength = __msa_fill_w(filt_sth);
- cnst3 = __msa_ldi_w(3);
- cnst16 = __msa_ldi_w(16);
-
- for (row = 2; row--;) {
- LD4(frm1_ptr, stride, f0, f1, f2, f3);
- frm1_ptr += (4 * stride);
-
- LD_SB2(frm2_ptr, 16, frm2, frm4);
- frm2_ptr += 32;
-
- LD_SW2(acc, 4, acc0, acc1);
- LD_SW2(acc + 8, 4, acc2, acc3);
- LD_SH2(cnt, 8, cnt0, cnt1);
-
- INSERT_D2_SB(f0, f1, frm1);
- INSERT_D2_SB(f2, f3, frm3);
- ILVRL_B2_UB(frm1, frm2, frm_r, frm_l);
- HSUB_UB2_SH(frm_r, frm_l, diff0, diff1);
- UNPCK_SH_SW(diff0, diff0_r, diff0_l);
- UNPCK_SH_SW(diff1, diff1_r, diff1_l);
- MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l, diff1_l,
- mod0_w, mod1_w, mod2_w, mod3_w);
- MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3, mod0_w,
- mod1_w, mod2_w, mod3_w);
- SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength);
-
- diff0_r = (mod0_w < cnst16);
- diff0_l = (mod1_w < cnst16);
- diff1_r = (mod2_w < cnst16);
- diff1_l = (mod3_w < cnst16);
-
- SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w, mod0_w,
- mod1_w, mod2_w, mod3_w);
-
- mod0_w = diff0_r & mod0_w;
- mod1_w = diff0_l & mod1_w;
- mod2_w = diff1_r & mod2_w;
- mod3_w = diff1_l & mod3_w;
-
- MUL4(mod0_w, filt_wt, mod1_w, filt_wt, mod2_w, filt_wt, mod3_w, filt_wt,
- mod0_w, mod1_w, mod2_w, mod3_w);
- PCKEV_H2_SH(mod1_w, mod0_w, mod3_w, mod2_w, mod0_h, mod1_h);
- ADD2(mod0_h, cnt0, mod1_h, cnt1, mod0_h, mod1_h);
- ST_SH2(mod0_h, mod1_h, cnt, 8);
- cnt += 16;
-
- UNPCK_UB_SH(frm2, frm2_r, frm2_l);
- UNPCK_SH_SW(frm2_r, frm2_rr, frm2_rl);
- UNPCK_SH_SW(frm2_l, frm2_lr, frm2_ll);
- MUL4(mod0_w, frm2_rr, mod1_w, frm2_rl, mod2_w, frm2_lr, mod3_w, frm2_ll,
- mod0_w, mod1_w, mod2_w, mod3_w);
- ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3, mod0_w, mod1_w,
- mod2_w, mod3_w);
-
- ST_SW2(mod0_w, mod1_w, acc, 4);
- acc += 8;
- ST_SW2(mod2_w, mod3_w, acc, 4);
- acc += 8;
-
- LD_SW2(acc, 4, acc0, acc1);
- LD_SW2(acc + 8, 4, acc2, acc3);
- LD_SH2(cnt, 8, cnt0, cnt1);
-
- ILVRL_B2_UB(frm3, frm4, frm_r, frm_l);
- HSUB_UB2_SH(frm_r, frm_l, diff0, diff1);
- UNPCK_SH_SW(diff0, diff0_r, diff0_l);
- UNPCK_SH_SW(diff1, diff1_r, diff1_l);
- MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l, diff1_l,
- mod0_w, mod1_w, mod2_w, mod3_w);
- MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3, mod0_w,
- mod1_w, mod2_w, mod3_w);
- SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength);
-
- diff0_r = (mod0_w < cnst16);
- diff0_l = (mod1_w < cnst16);
- diff1_r = (mod2_w < cnst16);
- diff1_l = (mod3_w < cnst16);
-
- SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w, mod0_w,
- mod1_w, mod2_w, mod3_w);
-
- mod0_w = diff0_r & mod0_w;
- mod1_w = diff0_l & mod1_w;
- mod2_w = diff1_r & mod2_w;
- mod3_w = diff1_l & mod3_w;
-
- MUL4(mod0_w, filt_wt, mod1_w, filt_wt, mod2_w, filt_wt, mod3_w, filt_wt,
- mod0_w, mod1_w, mod2_w, mod3_w);
- PCKEV_H2_SH(mod1_w, mod0_w, mod3_w, mod2_w, mod0_h, mod1_h);
- ADD2(mod0_h, cnt0, mod1_h, cnt1, mod0_h, mod1_h);
- ST_SH2(mod0_h, mod1_h, cnt, 8);
- cnt += 16;
- UNPCK_UB_SH(frm4, frm2_r, frm2_l);
- UNPCK_SH_SW(frm2_r, frm2_rr, frm2_rl);
- UNPCK_SH_SW(frm2_l, frm2_lr, frm2_ll);
- MUL4(mod0_w, frm2_rr, mod1_w, frm2_rl, mod2_w, frm2_lr, mod3_w, frm2_ll,
- mod0_w, mod1_w, mod2_w, mod3_w);
- ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3, mod0_w, mod1_w,
- mod2_w, mod3_w);
-
- ST_SW2(mod0_w, mod1_w, acc, 4);
- acc += 8;
- ST_SW2(mod2_w, mod3_w, acc, 4);
- acc += 8;
- }
-}
-
-static void temporal_filter_apply_16size_msa(uint8_t *frm1_ptr, uint32_t stride,
- uint8_t *frm2_ptr,
- int32_t filt_sth, int32_t filt_wgt,
- uint32_t *acc, uint16_t *cnt) {
- uint32_t row;
- v16i8 frm1, frm2, frm3, frm4;
- v16u8 frm_r, frm_l;
- v16i8 zero = { 0 };
- v8u16 frm2_r, frm2_l;
- v8i16 diff0, diff1, mod0_h, mod1_h;
- v4i32 cnst3, cnst16, filt_wt, strength;
- v4i32 mod0_w, mod1_w, mod2_w, mod3_w;
- v4i32 diff0_r, diff0_l, diff1_r, diff1_l;
- v4i32 frm2_rr, frm2_rl, frm2_lr, frm2_ll;
- v4i32 acc0, acc1, acc2, acc3;
- v8i16 cnt0, cnt1;
-
- filt_wt = __msa_fill_w(filt_wgt);
- strength = __msa_fill_w(filt_sth);
- cnst3 = __msa_ldi_w(3);
- cnst16 = __msa_ldi_w(16);
-
- for (row = 8; row--;) {
- LD_SB2(frm1_ptr, stride, frm1, frm3);
- frm1_ptr += stride;
-
- LD_SB2(frm2_ptr, 16, frm2, frm4);
- frm2_ptr += 16;
-
- LD_SW2(acc, 4, acc0, acc1);
- LD_SW2(acc, 4, acc2, acc3);
- LD_SH2(cnt, 8, cnt0, cnt1);
-
- ILVRL_B2_UB(frm1, frm2, frm_r, frm_l);
- HSUB_UB2_SH(frm_r, frm_l, diff0, diff1);
- UNPCK_SH_SW(diff0, diff0_r, diff0_l);
- UNPCK_SH_SW(diff1, diff1_r, diff1_l);
- MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l, diff1_l,
- mod0_w, mod1_w, mod2_w, mod3_w);
- MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3, mod0_w,
- mod1_w, mod2_w, mod3_w);
- SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength);
-
- diff0_r = (mod0_w < cnst16);
- diff0_l = (mod1_w < cnst16);
- diff1_r = (mod2_w < cnst16);
- diff1_l = (mod3_w < cnst16);
-
- SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w, mod0_w,
- mod1_w, mod2_w, mod3_w);
-
- mod0_w = diff0_r & mod0_w;
- mod1_w = diff0_l & mod1_w;
- mod2_w = diff1_r & mod2_w;
- mod3_w = diff1_l & mod3_w;
-
- MUL4(mod0_w, filt_wt, mod1_w, filt_wt, mod2_w, filt_wt, mod3_w, filt_wt,
- mod0_w, mod1_w, mod2_w, mod3_w);
- PCKEV_H2_SH(mod1_w, mod0_w, mod3_w, mod2_w, mod0_h, mod1_h);
- ADD2(mod0_h, cnt0, mod1_h, cnt1, mod0_h, mod1_h);
- ST_SH2(mod0_h, mod1_h, cnt, 8);
- cnt += 16;
-
- ILVRL_B2_UH(zero, frm2, frm2_r, frm2_l);
- UNPCK_SH_SW(frm2_r, frm2_rr, frm2_rl);
- UNPCK_SH_SW(frm2_l, frm2_lr, frm2_ll);
- MUL4(mod0_w, frm2_rr, mod1_w, frm2_rl, mod2_w, frm2_lr, mod3_w, frm2_ll,
- mod0_w, mod1_w, mod2_w, mod3_w);
- ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3, mod0_w, mod1_w,
- mod2_w, mod3_w);
-
- ST_SW2(mod0_w, mod1_w, acc, 4);
- acc += 8;
- ST_SW2(mod2_w, mod3_w, acc, 4);
- acc += 8;
-
- LD_SW2(acc, 4, acc0, acc1);
- LD_SW2(acc + 8, 4, acc2, acc3);
- LD_SH2(cnt, 8, cnt0, cnt1);
-
- ILVRL_B2_UB(frm3, frm4, frm_r, frm_l);
- HSUB_UB2_SH(frm_r, frm_l, diff0, diff1);
- UNPCK_SH_SW(diff0, diff0_r, diff0_l);
- UNPCK_SH_SW(diff1, diff1_r, diff1_l);
- MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l, diff1_l,
- mod0_w, mod1_w, mod2_w, mod3_w);
- MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3, mod0_w,
- mod1_w, mod2_w, mod3_w);
- SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength);
-
- diff0_r = (mod0_w < cnst16);
- diff0_l = (mod1_w < cnst16);
- diff1_r = (mod2_w < cnst16);
- diff1_l = (mod3_w < cnst16);
-
- SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w, mod0_w,
- mod1_w, mod2_w, mod3_w);
-
- mod0_w = diff0_r & mod0_w;
- mod1_w = diff0_l & mod1_w;
- mod2_w = diff1_r & mod2_w;
- mod3_w = diff1_l & mod3_w;
-
- MUL4(mod0_w, filt_wt, mod1_w, filt_wt, mod2_w, filt_wt, mod3_w, filt_wt,
- mod0_w, mod1_w, mod2_w, mod3_w);
- PCKEV_H2_SH(mod1_w, mod0_w, mod3_w, mod2_w, mod0_h, mod1_h);
- ADD2(mod0_h, cnt0, mod1_h, cnt1, mod0_h, mod1_h);
- ST_SH2(mod0_h, mod1_h, cnt, 8);
- cnt += 16;
-
- ILVRL_B2_UH(zero, frm4, frm2_r, frm2_l);
- UNPCK_SH_SW(frm2_r, frm2_rr, frm2_rl);
- UNPCK_SH_SW(frm2_l, frm2_lr, frm2_ll);
- MUL4(mod0_w, frm2_rr, mod1_w, frm2_rl, mod2_w, frm2_lr, mod3_w, frm2_ll,
- mod0_w, mod1_w, mod2_w, mod3_w);
- ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3, mod0_w, mod1_w,
- mod2_w, mod3_w);
- ST_SW2(mod0_w, mod1_w, acc, 4);
- acc += 8;
- ST_SW2(mod2_w, mod3_w, acc, 4);
- acc += 8;
-
- frm1_ptr += stride;
- frm2_ptr += 16;
- }
-}
-
-void av1_temporal_filter_apply_msa(uint8_t *frame1_ptr, uint32_t stride,
- uint8_t *frame2_ptr, uint32_t blk_w,
- uint32_t blk_h, int32_t strength,
- int32_t filt_wgt, uint32_t *accu,
- uint16_t *cnt) {
- if (8 == (blk_w * blk_h)) {
- temporal_filter_apply_8size_msa(frame1_ptr, stride, frame2_ptr, strength,
- filt_wgt, accu, cnt);
- } else if (16 == (blk_w * blk_h)) {
- temporal_filter_apply_16size_msa(frame1_ptr, stride, frame2_ptr, strength,
- filt_wgt, accu, cnt);
- } else {
- av1_temporal_filter_apply_c(frame1_ptr, stride, frame2_ptr, blk_w, blk_h,
- strength, filt_wgt, accu, cnt);
- }
-}
diff --git a/third_party/aom/av1/encoder/ml.c b/third_party/aom/av1/encoder/ml.c
deleted file mode 100644
index d21def43a..000000000
--- a/third_party/aom/av1/encoder/ml.c
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <math.h>
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "av1/encoder/ml.h"
-
-void av1_nn_predict(const float *features, const NN_CONFIG *nn_config,
- float *output) {
- int num_input_nodes = nn_config->num_inputs;
- int buf_index = 0;
- float buf[2][NN_MAX_NODES_PER_LAYER];
- const float *input_nodes = features;
-
- // Propagate hidden layers.
- const int num_layers = nn_config->num_hidden_layers;
- assert(num_layers <= NN_MAX_HIDDEN_LAYERS);
- for (int layer = 0; layer < num_layers; ++layer) {
- const float *weights = nn_config->weights[layer];
- const float *bias = nn_config->bias[layer];
- float *output_nodes = buf[buf_index];
- const int num_output_nodes = nn_config->num_hidden_nodes[layer];
- assert(num_output_nodes < NN_MAX_NODES_PER_LAYER);
- for (int node = 0; node < num_output_nodes; ++node) {
- float val = 0.0f;
- for (int i = 0; i < num_input_nodes; ++i)
- val += weights[i] * input_nodes[i];
- val += bias[node];
- // ReLU as activation function.
- val = val > 0.0f ? val : 0.0f; // Could use AOMMAX().
- output_nodes[node] = val;
- weights += num_input_nodes;
- }
- num_input_nodes = num_output_nodes;
- input_nodes = output_nodes;
- buf_index = 1 - buf_index;
- }
-
- // Final output layer.
- const float *weights = nn_config->weights[num_layers];
- for (int node = 0; node < nn_config->num_outputs; ++node) {
- const float *bias = nn_config->bias[num_layers];
- float val = 0.0f;
- for (int i = 0; i < num_input_nodes; ++i)
- val += weights[i] * input_nodes[i];
- output[node] = val + bias[node];
- weights += num_input_nodes;
- }
-}
-
-void av1_nn_softmax(const float *input, float *output, int n) {
- // Softmax function is invariant to adding the same constant
- // to all input values, so we subtract the maximum input to avoid
- // possible overflow.
- float max_inp = input[0];
- for (int i = 1; i < n; i++) max_inp = AOMMAX(max_inp, input[i]);
- float sum_out = 0.0f;
- for (int i = 0; i < n; i++) {
- output[i] = (float)exp(input[i] - max_inp);
- sum_out += output[i];
- }
- for (int i = 0; i < n; i++) output[i] /= sum_out;
-}
diff --git a/third_party/aom/av1/encoder/ml.h b/third_party/aom/av1/encoder/ml.h
deleted file mode 100644
index cb8ef2871..000000000
--- a/third_party/aom/av1/encoder/ml.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_ML_H_
-#define AOM_AV1_ENCODER_ML_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define NN_MAX_HIDDEN_LAYERS 10
-#define NN_MAX_NODES_PER_LAYER 128
-
-typedef struct {
- int num_inputs; // Number of input nodes, i.e. features.
- int num_outputs; // Number of output nodes.
- int num_hidden_layers; // Number of hidden layers, maximum 10.
- // Number of nodes for each hidden layer.
- int num_hidden_nodes[NN_MAX_HIDDEN_LAYERS];
- // Weight parameters, indexed by layer.
- const float *weights[NN_MAX_HIDDEN_LAYERS + 1];
- // Bias parameters, indexed by layer.
- const float *bias[NN_MAX_HIDDEN_LAYERS + 1];
-} NN_CONFIG;
-
-// Calculate prediction based on the given input features and neural net config.
-// Assume there are no more than NN_MAX_NODES_PER_LAYER nodes in each hidden
-// layer.
-void av1_nn_predict(const float *features, const NN_CONFIG *nn_config,
- float *output);
-
-// Applies the softmax normalization function to the input
-// to get a valid probability distribution in the output:
-// output[i] = exp(input[i]) / sum_{k \in [0,n)}(exp(input[k]))
-void av1_nn_softmax(const float *input, float *output, int n);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_ENCODER_ML_H_
diff --git a/third_party/aom/av1/encoder/palette.c b/third_party/aom/av1/encoder/palette.c
deleted file mode 100644
index e61cd02ce..000000000
--- a/third_party/aom/av1/encoder/palette.c
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <math.h>
-#include <stdlib.h>
-
-#include "av1/encoder/cost.h"
-#include "av1/encoder/palette.h"
-#include "av1/encoder/random.h"
-
-#define AV1_K_MEANS_DIM 1
-#include "av1/encoder/k_means_template.h"
-#undef AV1_K_MEANS_DIM
-#define AV1_K_MEANS_DIM 2
-#include "av1/encoder/k_means_template.h"
-#undef AV1_K_MEANS_DIM
-
-static int int_comparer(const void *a, const void *b) {
- return (*(int *)a - *(int *)b);
-}
-
-int av1_remove_duplicates(int *centroids, int num_centroids) {
- int num_unique; // number of unique centroids
- int i;
- qsort(centroids, num_centroids, sizeof(*centroids), int_comparer);
- // Remove duplicates.
- num_unique = 1;
- for (i = 1; i < num_centroids; ++i) {
- if (centroids[i] != centroids[i - 1]) { // found a new unique centroid
- centroids[num_unique++] = centroids[i];
- }
- }
- return num_unique;
-}
-
-static int delta_encode_cost(const int *colors, int num, int bit_depth,
- int min_val) {
- if (num <= 0) return 0;
- int bits_cost = bit_depth;
- if (num == 1) return bits_cost;
- bits_cost += 2;
- int max_delta = 0;
- int deltas[PALETTE_MAX_SIZE];
- const int min_bits = bit_depth - 3;
- for (int i = 1; i < num; ++i) {
- const int delta = colors[i] - colors[i - 1];
- deltas[i - 1] = delta;
- assert(delta >= min_val);
- if (delta > max_delta) max_delta = delta;
- }
- int bits_per_delta = AOMMAX(av1_ceil_log2(max_delta + 1 - min_val), min_bits);
- assert(bits_per_delta <= bit_depth);
- int range = (1 << bit_depth) - colors[0] - min_val;
- for (int i = 0; i < num - 1; ++i) {
- bits_cost += bits_per_delta;
- range -= deltas[i];
- bits_per_delta = AOMMIN(bits_per_delta, av1_ceil_log2(range));
- }
- return bits_cost;
-}
-
-int av1_index_color_cache(const uint16_t *color_cache, int n_cache,
- const uint16_t *colors, int n_colors,
- uint8_t *cache_color_found, int *out_cache_colors) {
- if (n_cache <= 0) {
- for (int i = 0; i < n_colors; ++i) out_cache_colors[i] = colors[i];
- return n_colors;
- }
- memset(cache_color_found, 0, n_cache * sizeof(*cache_color_found));
- int n_in_cache = 0;
- int in_cache_flags[PALETTE_MAX_SIZE];
- memset(in_cache_flags, 0, sizeof(in_cache_flags));
- for (int i = 0; i < n_cache && n_in_cache < n_colors; ++i) {
- for (int j = 0; j < n_colors; ++j) {
- if (colors[j] == color_cache[i]) {
- in_cache_flags[j] = 1;
- cache_color_found[i] = 1;
- ++n_in_cache;
- break;
- }
- }
- }
- int j = 0;
- for (int i = 0; i < n_colors; ++i)
- if (!in_cache_flags[i]) out_cache_colors[j++] = colors[i];
- assert(j == n_colors - n_in_cache);
- return j;
-}
-
-int av1_get_palette_delta_bits_v(const PALETTE_MODE_INFO *const pmi,
- int bit_depth, int *zero_count,
- int *min_bits) {
- const int n = pmi->palette_size[1];
- const int max_val = 1 << bit_depth;
- int max_d = 0;
- *min_bits = bit_depth - 4;
- *zero_count = 0;
- for (int i = 1; i < n; ++i) {
- const int delta = pmi->palette_colors[2 * PALETTE_MAX_SIZE + i] -
- pmi->palette_colors[2 * PALETTE_MAX_SIZE + i - 1];
- const int v = abs(delta);
- const int d = AOMMIN(v, max_val - v);
- if (d > max_d) max_d = d;
- if (d == 0) ++(*zero_count);
- }
- return AOMMAX(av1_ceil_log2(max_d + 1), *min_bits);
-}
-
-int av1_palette_color_cost_y(const PALETTE_MODE_INFO *const pmi,
- uint16_t *color_cache, int n_cache,
- int bit_depth) {
- const int n = pmi->palette_size[0];
- int out_cache_colors[PALETTE_MAX_SIZE];
- uint8_t cache_color_found[2 * PALETTE_MAX_SIZE];
- const int n_out_cache =
- av1_index_color_cache(color_cache, n_cache, pmi->palette_colors, n,
- cache_color_found, out_cache_colors);
- const int total_bits =
- n_cache + delta_encode_cost(out_cache_colors, n_out_cache, bit_depth, 1);
- return av1_cost_literal(total_bits);
-}
-
-int av1_palette_color_cost_uv(const PALETTE_MODE_INFO *const pmi,
- uint16_t *color_cache, int n_cache,
- int bit_depth) {
- const int n = pmi->palette_size[1];
- int total_bits = 0;
- // U channel palette color cost.
- int out_cache_colors[PALETTE_MAX_SIZE];
- uint8_t cache_color_found[2 * PALETTE_MAX_SIZE];
- const int n_out_cache = av1_index_color_cache(
- color_cache, n_cache, pmi->palette_colors + PALETTE_MAX_SIZE, n,
- cache_color_found, out_cache_colors);
- total_bits +=
- n_cache + delta_encode_cost(out_cache_colors, n_out_cache, bit_depth, 0);
-
- // V channel palette color cost.
- int zero_count = 0, min_bits_v = 0;
- const int bits_v =
- av1_get_palette_delta_bits_v(pmi, bit_depth, &zero_count, &min_bits_v);
- const int bits_using_delta =
- 2 + bit_depth + (bits_v + 1) * (n - 1) - zero_count;
- const int bits_using_raw = bit_depth * n;
- total_bits += 1 + AOMMIN(bits_using_delta, bits_using_raw);
- return av1_cost_literal(total_bits);
-}
diff --git a/third_party/aom/av1/encoder/palette.h b/third_party/aom/av1/encoder/palette.h
deleted file mode 100644
index 8b88c4755..000000000
--- a/third_party/aom/av1/encoder/palette.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_PALETTE_H_
-#define AOM_AV1_ENCODER_PALETTE_H_
-
-#include "av1/common/blockd.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define AV1_K_MEANS_RENAME(func, dim) func##_dim##dim
-
-void AV1_K_MEANS_RENAME(av1_calc_indices, 1)(const int *data,
- const int *centroids,
- uint8_t *indices, int n, int k);
-void AV1_K_MEANS_RENAME(av1_calc_indices, 2)(const int *data,
- const int *centroids,
- uint8_t *indices, int n, int k);
-void AV1_K_MEANS_RENAME(av1_k_means, 1)(const int *data, int *centroids,
- uint8_t *indices, int n, int k,
- int max_itr);
-void AV1_K_MEANS_RENAME(av1_k_means, 2)(const int *data, int *centroids,
- uint8_t *indices, int n, int k,
- int max_itr);
-
-// Given 'n' 'data' points and 'k' 'centroids' each of dimension 'dim',
-// calculate the centroid 'indices' for the data points.
-static INLINE void av1_calc_indices(const int *data, const int *centroids,
- uint8_t *indices, int n, int k, int dim) {
- if (dim == 1) {
- AV1_K_MEANS_RENAME(av1_calc_indices, 1)(data, centroids, indices, n, k);
- } else if (dim == 2) {
- AV1_K_MEANS_RENAME(av1_calc_indices, 2)(data, centroids, indices, n, k);
- } else {
- assert(0 && "Untemplated k means dimension");
- }
-}
-
-// Given 'n' 'data' points and an initial guess of 'k' 'centroids' each of
-// dimension 'dim', runs up to 'max_itr' iterations of k-means algorithm to get
-// updated 'centroids' and the centroid 'indices' for elements in 'data'.
-// Note: the output centroids are rounded off to nearest integers.
-static INLINE void av1_k_means(const int *data, int *centroids,
- uint8_t *indices, int n, int k, int dim,
- int max_itr) {
- if (dim == 1) {
- AV1_K_MEANS_RENAME(av1_k_means, 1)(data, centroids, indices, n, k, max_itr);
- } else if (dim == 2) {
- AV1_K_MEANS_RENAME(av1_k_means, 2)(data, centroids, indices, n, k, max_itr);
- } else {
- assert(0 && "Untemplated k means dimension");
- }
-}
-
-// Given a list of centroids, returns the unique number of centroids 'k', and
-// puts these unique centroids in first 'k' indices of 'centroids' array.
-// Ideally, the centroids should be rounded to integers before calling this
-// method.
-int av1_remove_duplicates(int *centroids, int num_centroids);
-
-// Given a color cache and a set of base colors, find if each cache color is
-// present in the base colors, record the binary results in "cache_color_found".
-// Record the colors that are not in the color cache in "out_cache_colors".
-int av1_index_color_cache(const uint16_t *color_cache, int n_cache,
- const uint16_t *colors, int n_colors,
- uint8_t *cache_color_found, int *out_cache_colors);
-
-// Return the number of bits used to transmit each v palette color delta;
-// assign zero_count with the number of deltas being 0.
-int av1_get_palette_delta_bits_v(const PALETTE_MODE_INFO *const pmi,
- int bit_depth, int *zero_count, int *min_bits);
-
-// Return the rate cost for transmitting luma palette color values.
-int av1_palette_color_cost_y(const PALETTE_MODE_INFO *const pmi,
- uint16_t *color_cache, int n_cache, int bit_depth);
-
-// Return the rate cost for transmitting chroma palette color values.
-int av1_palette_color_cost_uv(const PALETTE_MODE_INFO *const pmi,
- uint16_t *color_cache, int n_cache,
- int bit_depth);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_ENCODER_PALETTE_H_
diff --git a/third_party/aom/av1/encoder/partition_model_weights.h b/third_party/aom/av1/encoder/partition_model_weights.h
deleted file mode 100644
index 437ea43f9..000000000
--- a/third_party/aom/av1/encoder/partition_model_weights.h
+++ /dev/null
@@ -1,2448 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_PARTITION_MODEL_WEIGHTS_H_
-#define AOM_AV1_ENCODER_PARTITION_MODEL_WEIGHTS_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "av1/encoder/ml.h"
-
-#define FEATURE_SIZE 10
-#define LABEL_SIZE 16
-// nn model for ab partition pruning, 128x128.
-static const float av1_ab_partition_nn_weights_128_layer0[FEATURE_SIZE * 64] = {
- -0.715251f, -0.015767f, -0.667353f, -0.345255f, 0.177887f, -0.469759f,
- 0.426152f, 0.489798f, 0.469865f, 0.773821f, 0.088517f, 0.074585f,
- 0.838754f, 0.048449f, -0.007584f, 0.638968f, 0.233305f, -0.319236f,
- -0.257124f, -0.170869f, 0.137180f, 0.114852f, -0.721241f, -0.947962f,
- -0.411298f, 0.494306f, -0.060435f, -0.648421f, -0.126624f, 0.072686f,
- -0.143904f, -0.115839f, -0.175527f, -0.117728f, 0.040686f, -0.189925f,
- 0.134361f, -0.258070f, -0.177558f, 0.158049f, 0.168668f, -0.062919f,
- 0.341986f, 0.038100f, -0.435577f, -0.321255f, 0.203213f, 0.213061f,
- 0.533304f, 0.359296f, -0.079558f, 0.004637f, 0.663904f, 0.043779f,
- 0.383018f, 1.136559f, -0.084155f, 0.333057f, -0.199011f, 0.152059f,
- -0.078419f, -0.167752f, -0.093651f, 0.083171f, -0.190143f, 0.086195f,
- -0.280632f, -0.160663f, -0.017298f, 0.122628f, -0.138116f, 0.062927f,
- 0.222462f, 0.626979f, 0.426928f, 0.117170f, -0.240457f, 0.053750f,
- 0.038017f, 0.007359f, -0.017595f, 0.101407f, 0.332891f, 0.074933f,
- 0.306498f, 0.219380f, -0.151638f, -0.247976f, 0.343405f, 0.121256f,
- 0.049173f, 0.171474f, -0.139608f, -1.016599f, -0.345553f, -0.901138f,
- 0.243401f, 0.059928f, -0.089396f, -0.195565f, 0.364705f, -0.020400f,
- -1.383672f, 0.413018f, 0.536950f, -0.020904f, -1.335306f, -0.732290f,
- 0.102885f, 0.315290f, -0.208521f, -0.081811f, 0.182300f, 0.125712f,
- -0.593833f, -0.220639f, -0.314155f, 0.188327f, 0.118503f, 0.524427f,
- -1.083859f, -1.130640f, 0.390352f, -0.045591f, 0.113160f, -0.009149f,
- -0.096183f, 0.115829f, 0.377752f, 0.318396f, -0.591983f, 0.004797f,
- -0.497377f, -0.342248f, 0.079546f, -0.025249f, -0.295972f, 0.615501f,
- -0.464372f, 0.418315f, -0.173556f, 0.105217f, 0.298073f, 0.082478f,
- 0.033223f, 0.977341f, -0.372982f, -0.052337f, 0.154124f, 0.396787f,
- 0.536654f, -0.139061f, -0.223702f, 0.229666f, -0.846766f, 0.107723f,
- 0.563839f, -0.483141f, 0.304813f, -0.765283f, 0.070964f, 0.151101f,
- 0.275188f, 0.490303f, 1.175892f, 0.085377f, -0.191200f, 0.544532f,
- -0.365075f, 0.167546f, 0.052183f, -0.220529f, -0.212227f, -0.144988f,
- -0.273356f, -0.062023f, 0.103993f, -0.238493f, -0.161204f, -0.054611f,
- -0.166672f, 0.128327f, 0.461751f, -0.545822f, 0.739798f, 0.594386f,
- -0.163192f, -0.332501f, 0.363834f, -0.065043f, 0.474812f, -0.138811f,
- 0.170924f, -0.778142f, -0.316474f, -0.508065f, -0.039986f, -0.478001f,
- 0.340591f, 0.041783f, 0.055419f, 0.015155f, -0.981830f, -1.355237f,
- 0.347516f, 1.155327f, 0.081319f, 0.274163f, -0.327230f, -0.113478f,
- 0.556552f, -0.055986f, 0.217318f, -0.445351f, 0.325759f, 0.526547f,
- -0.657434f, -0.572214f, -0.037087f, 0.081384f, 0.064518f, 0.014892f,
- 0.215279f, 1.834504f, -0.242107f, 0.079810f, 0.129558f, 0.079588f,
- -0.035189f, -0.221745f, -0.163414f, 0.043978f, -1.028662f, -0.623609f,
- 1.130336f, 0.664661f, -0.063975f, -0.415863f, 0.018581f, 0.157758f,
- 0.200570f, 0.063420f, 0.901039f, -0.746286f, 0.196230f, -0.290592f,
- 0.042373f, -0.502500f, 0.183638f, 0.103394f, -0.298858f, 0.145436f,
- 0.196916f, 0.108319f, -0.448572f, -0.881385f, 0.302497f, 0.121679f,
- -0.021327f, 0.025150f, 0.481306f, -0.359634f, 0.350257f, -0.228647f,
- -0.669860f, 0.260025f, -0.034182f, 0.619247f, -0.158826f, -0.405864f,
- 0.674112f, -0.027885f, -0.325274f, -0.241492f, 0.036024f, -0.437685f,
- -0.091458f, -0.109295f, -0.350676f, 0.044706f, 0.297059f, 0.016290f,
- 1.121203f, 1.289062f, -1.299476f, -1.129221f, 0.103752f, 0.131302f,
- -0.263265f, 0.222155f, -0.229908f, 0.013922f, -0.226001f, -0.248383f,
- -0.004415f, -0.020958f, 0.055634f, 0.086200f, 0.114556f, -0.184061f,
- -0.096210f, -0.146466f, -0.249618f, -0.195998f, 0.088758f, 0.023781f,
- -0.264460f, 0.157026f, -0.235228f, -0.102564f, 0.043463f, -0.187823f,
- -0.257500f, -0.199049f, -0.242210f, 0.030448f, 0.221604f, 0.151804f,
- -0.100404f, -0.073931f, 0.144749f, -0.001572f, -1.438079f, -0.233716f,
- 0.733422f, 1.727080f, -0.036397f, 0.027551f, 0.425321f, 0.085703f,
- 0.031186f, 0.032333f, -0.675130f, 1.437733f, -0.202392f, -0.525003f,
- 0.087048f, 0.328194f, -0.079989f, -0.391088f, -0.238732f, -0.120660f,
- -0.139600f, 0.154665f, 0.026202f, -0.233501f, -0.009046f, -0.149187f,
- -0.199646f, 0.115375f, 0.209762f, -0.014875f, 0.124038f, -0.119985f,
- 1.079625f, -0.461513f, 0.614114f, 0.021003f, 0.439449f, -0.824834f,
- -0.299701f, 0.193817f, -0.870551f, -1.262313f, -0.079517f, 0.341570f,
- 0.305310f, -0.089721f, -0.317314f, -0.075631f, 0.127172f, -0.208635f,
- 1.191922f, 0.163141f, 0.564285f, 0.286352f, 0.480865f, 0.173094f,
- -0.094034f, -0.071339f, -0.328992f, -0.006382f, 0.314705f, 0.090258f,
- -0.016099f, 0.193230f, 0.188061f, 0.398144f, 0.722781f, 0.769949f,
- 0.025442f, -0.162016f, 0.070192f, -0.056946f, -0.100957f, -0.219934f,
- -0.203492f, -0.015454f, -0.013272f, -0.098008f, 0.051707f, -0.017493f,
- 0.527446f, 0.083605f, 0.588318f, 0.878215f, 0.028747f, -0.146479f,
- -0.345170f, -0.136059f, -0.152005f, -0.203634f, 0.232702f, -0.101340f,
- -0.027733f, -0.282611f, 0.265366f, 0.082362f, -0.265420f, -0.131124f,
- 0.166303f, 0.040194f, -0.100710f, 0.579151f, -0.530136f, 0.163422f,
- -0.998821f, -1.565311f, -1.774785f, -2.493372f, 0.116970f, -0.090302f,
- 1.723272f, 0.552370f, -0.295954f, -0.439095f, -0.266730f, 0.027936f,
- 0.539616f, -0.234902f, -0.167601f, -0.149877f, -0.242983f, 0.122353f,
- -0.121620f, -0.205517f, -0.180144f, -0.264208f, 0.151500f, -0.159378f,
- 0.029145f, -0.050892f, -0.223407f, -0.246239f, 0.043152f, -0.018460f,
- 0.169972f, -0.187769f, -0.034670f, -0.238330f, 0.288070f, -0.093243f,
- -0.437105f, -0.573376f, 0.660073f, 0.285727f, 0.408470f, 0.158475f,
- 0.032699f, 0.056280f, -0.237176f, -0.083003f, 0.105598f, -0.169522f,
- -0.260420f, -0.121100f, -0.173983f, -0.195693f, -0.232028f, 0.224940f,
- 0.029124f, 0.009580f, -0.252034f, 0.103087f, 1.156561f, 0.603848f,
- -0.562805f, -1.652742f, -0.568288f, -1.829395f, 0.046169f, 0.076095f,
- 1.490819f, 0.415893f, -0.277788f, -0.115787f, 0.093750f, 0.270726f,
- -0.395983f, -0.353742f, 0.034605f, 0.005342f, 0.184537f, 0.086445f,
- 0.156417f, 1.476367f, 0.122587f, 0.002145f, 0.431057f, -0.381184f,
- -1.646457f, -0.014009f, -0.671224f, 0.193726f, -0.019247f, -0.031267f,
- -0.046208f, 0.298733f, 0.064734f, 0.616984f, 0.039381f, 0.182722f,
- -0.116670f, 0.233093f, -1.214374f, -0.817970f, -0.064394f, -0.584783f,
- 0.077697f, -0.266720f, 0.130875f, -0.235295f, -0.265754f, -0.159999f,
- -0.250114f, -0.183017f, 0.194403f, -0.105808f, -0.169215f, -0.240866f,
- -0.026662f, -0.045123f, -0.036175f, -0.167471f, -0.192908f, -0.232602f,
- -0.267036f, -0.112500f, -0.257944f, -0.111909f, -0.802226f, -0.008800f,
- 0.881460f, -0.678603f, 0.008666f, -0.252053f, -0.341035f, -0.175290f,
- 0.183012f, 0.385991f, 0.079888f, -0.014039f, -0.148653f, 0.671778f,
- -0.130219f, 1.086467f, 0.129267f, -0.040400f, -0.201221f, -0.077005f,
- 0.015890f, 0.000781f, 0.137764f, 1.389546f, 0.172152f, 0.047279f,
- -0.042783f, 0.127740f, 0.141467f, -0.335738f, -1.396392f, 0.031496f,
- 0.357385f, 0.343602f, -0.714553f, 0.311014f, 0.132845f, 0.061149f,
- 0.006796f, 0.568106f, -0.255949f, 0.104134f, -0.993447f, 0.298135f,
- -0.406590f, -0.049228f, -0.578570f, -0.188561f, -0.107046f, 0.374095f,
- 0.068481f, 0.036240f, -0.495801f, 0.180574f, -0.766129f, 0.886967f,
- -0.568868f, -0.936062f, -0.418886f, -0.058735f, -0.511964f, -0.438596f,
- 0.019016f, -0.015837f, 0.600197f, 0.429773f, 0.315026f, 0.319667f,
- 0.214617f, -0.017316f, 0.270257f, -0.040524f, 0.695803f, -0.015223f,
- -1.554965f, 0.356997f, -1.472428f, 0.024637f, -0.562958f, 0.870351f,
- 0.193635f, 0.036063f, 0.328638f, 0.200274f, -1.634707f, 0.110534f,
- 0.420104f, -0.072042f, -0.006404f, 0.171680f,
-};
-
-static const float av1_ab_partition_nn_bias_128_layer0[64] = {
- 0.643147f, -1.348826f, 0.431627f, 0.000000f, 0.102717f, -0.772628f,
- -0.034351f, -0.761977f, -0.638397f, 0.541969f, -0.391311f, 0.563076f,
- 0.148553f, 0.267217f, -0.788092f, 0.544573f, -0.546280f, 0.000000f,
- -0.446945f, 0.127732f, 0.270624f, -0.219435f, -1.220203f, 0.324584f,
- 0.110885f, 0.276547f, 0.179726f, -0.375160f, 0.026401f, -0.032595f,
- 0.000000f, -0.047932f, -0.648602f, -0.512637f, -0.031661f, -0.236761f,
- 0.476453f, -0.028021f, -0.013673f, -0.015578f, -0.920077f, 0.000000f,
- 0.915351f, -0.209962f, 0.000000f, -0.025731f, 0.218288f, 0.000000f,
- 0.047726f, -0.813077f, -1.263281f, 0.239087f, 0.278614f, -0.030753f,
- 0.000000f, 0.346744f, -0.948543f, -1.174211f, 0.216377f, 0.498913f,
- 0.853918f, 0.002504f, -0.190403f, 0.452050f,
-};
-
-static const float av1_ab_partition_nn_weights_128_layer1[64 * LABEL_SIZE] = {
- 0.179769f, 1.499417f, -0.445135f, -0.142278f, -0.337661f, 0.682064f,
- -0.203213f, 0.302171f, 0.226877f, -0.422169f, 1.687586f, 0.783773f,
- 0.220995f, 0.253482f, 0.370435f, -1.342775f, 0.337229f, -0.271473f,
- 0.291796f, 1.362227f, -1.751397f, -0.086178f, 0.725496f, -0.118597f,
- 0.227963f, -0.501577f, 0.223849f, -0.122421f, -0.123437f, -0.051045f,
- -0.020115f, 0.212711f, 0.246025f, 0.088120f, -0.168995f, 1.740190f,
- -0.195098f, 0.680339f, -0.589572f, -0.075244f, 0.878766f, 0.064092f,
- -3.548527f, 0.001660f, 0.107926f, -0.169501f, -0.455212f, 0.123045f,
- -1.836998f, 0.330365f, 1.301475f, 0.454761f, -0.576552f, -0.190761f,
- 0.208459f, 0.618483f, 1.383364f, 0.970718f, 0.390174f, 0.406252f,
- -0.564519f, -0.312062f, 1.345712f, -0.151873f, 0.109290f, 0.408847f,
- 0.391243f, 0.152024f, 0.181764f, -0.036263f, -0.160466f, 0.153595f,
- 0.049163f, -0.753012f, -1.804062f, 0.347475f, -2.746580f, 0.575618f,
- 0.261799f, 0.210505f, -0.302054f, -0.109872f, 0.199506f, -1.182971f,
- 0.723668f, 0.177758f, -0.338202f, 0.254396f, -0.220023f, 0.043504f,
- 0.669866f, -0.040816f, -0.402730f, 0.017990f, 0.215523f, -0.216816f,
- 0.454826f, -0.726067f, -0.018750f, -0.928679f, 0.154315f, -0.465641f,
- 0.144566f, -0.030064f, -0.054667f, -0.154055f, 0.625384f, 1.323795f,
- -0.159496f, 0.097072f, -0.463197f, -0.057938f, 0.750290f, -0.233061f,
- 0.412631f, -0.535223f, -0.151423f, -0.154583f, 0.024721f, -0.494448f,
- 0.230594f, -0.980138f, -0.653968f, 0.126079f, 0.051814f, -0.053219f,
- -0.421708f, -0.228853f, 0.237885f, 0.888157f, 0.059655f, 0.241295f,
- 0.210443f, 0.228238f, 0.119127f, -0.051989f, -0.355408f, 0.182215f,
- 0.244277f, -0.104577f, -0.558035f, -0.023270f, 0.054571f, 0.700646f,
- -0.223006f, 0.115523f, 0.023391f, 0.437264f, 0.709477f, -0.531212f,
- -0.094731f, 0.328161f, -0.105418f, -0.133511f, 0.497168f, -0.030948f,
- -0.407132f, -0.043943f, 0.155505f, 0.251945f, 0.205010f, 0.167160f,
- 0.083654f, -0.636810f, 0.401315f, -0.398414f, 0.290046f, 0.206846f,
- 0.042218f, 0.168150f, 0.843181f, -0.671242f, -0.202392f, -0.073301f,
- 0.142895f, 0.237466f, 0.212145f, -0.091828f, 0.187038f, -0.720841f,
- -0.616069f, -0.238021f, 0.065365f, 0.434119f, 0.179023f, -0.040107f,
- -0.430734f, -0.297368f, 0.575954f, 0.382619f, -0.709787f, -0.320810f,
- 0.242342f, -0.047614f, 0.705216f, 0.098077f, 0.357179f, 0.046017f,
- 0.115074f, -0.412305f, -0.272304f, 0.048096f, -0.803811f, 0.275000f,
- 0.642198f, 0.180286f, -0.087178f, -0.112707f, -0.394443f, 0.201989f,
- 0.241759f, -1.038870f, 0.728124f, 0.800559f, -1.296268f, 0.198612f,
- -0.053478f, 0.414344f, -0.510529f, 0.124179f, -2.219115f, -0.074583f,
- -0.143055f, 0.001697f, 0.810811f, -0.657140f, 0.186818f, -0.936414f,
- 0.539578f, -0.308244f, -0.126624f, -0.204767f, 0.091145f, -0.049340f,
- 0.252014f, 0.394582f, 0.018764f, -0.060377f, -0.019133f, 0.064083f,
- 0.069211f, -0.526693f, 0.209850f, -0.481466f, -0.468302f, -0.100407f,
- 0.241018f, -1.037781f, 0.038539f, -2.113840f, -0.974895f, 0.163187f,
- 0.425132f, -0.772546f, -1.261254f, -0.217488f, -0.971748f, -0.805640f,
- -0.745175f, -0.177077f, 0.217658f, 0.381431f, -0.052338f, 0.087176f,
- -0.165972f, 0.085937f, 0.472564f, -0.796627f, -2.453307f, 0.569664f,
- -0.233010f, -0.192134f, 0.064339f, -0.111411f, -0.262469f, -0.410022f,
- 0.519993f, -0.684620f, 0.393460f, -0.277753f, -0.153624f, 0.528984f,
- -0.415558f, -0.445863f, 0.588512f, -0.142439f, -0.132127f, 0.199776f,
- -0.579284f, 0.119488f, -0.033590f, -0.503846f, -0.674979f, 0.335125f,
- 0.020519f, 0.233973f, -0.297998f, -0.051511f, 0.518626f, -0.412782f,
- -0.074045f, 0.130523f, 0.465751f, -0.117795f, 2.535813f, 0.352108f,
- -0.499228f, 0.379784f, 0.056699f, 0.173142f, -0.076519f, -0.026666f,
- 0.017834f, 0.492333f, 0.093364f, 0.037867f, -0.165420f, -0.356429f,
- -0.562334f, 0.057656f, -0.307544f, 0.085857f, -0.559851f, 0.107230f,
- -0.398633f, 0.152618f, -0.216835f, -0.024539f, 0.026044f, -0.249519f,
- -0.563594f, -0.746025f, 0.025265f, -0.298888f, -0.185243f, 0.058794f,
- 0.233696f, -0.115223f, 0.144617f, -0.864390f, 0.619944f, -0.023980f,
- 0.019481f, 0.225252f, 0.416552f, -0.115993f, 0.935387f, 0.744386f,
- 0.053353f, -0.052582f, -0.065650f, 0.228488f, -0.032042f, -0.371252f,
- -0.003638f, -0.736984f, -0.203776f, 0.030922f, -0.065577f, -0.031643f,
- -0.049253f, -0.054640f, 0.787134f, 0.545414f, -0.140297f, -0.124274f,
- -0.110011f, -0.029552f, 0.657005f, 0.214973f, -0.374300f, 0.251642f,
- 0.276591f, 0.030566f, -0.145470f, 0.350579f, -0.356436f, -0.052694f,
- -0.063966f, -0.751008f, -1.042392f, 0.328892f, -0.425058f, -0.421571f,
- -0.571889f, -1.141472f, -0.125216f, 0.212713f, -0.485170f, -0.088791f,
- 0.124589f, 0.023237f, 0.077635f, 0.020901f, -0.271402f, -0.321424f,
- -0.513946f, -0.867872f, -0.284593f, 0.106276f, 0.220192f, -0.143532f,
- -0.014648f, 0.073402f, 0.327256f, -0.139803f, 0.168763f, 0.048199f,
- -0.122526f, 0.111713f, -0.134257f, 0.810364f, -0.085222f, -0.259221f,
- -0.239349f, 0.044448f, 0.205031f, 0.413113f, -0.107720f, -0.018816f,
- -0.247741f, -0.004963f, 0.041170f, -0.158019f, 0.134839f, 0.129502f,
- 0.800488f, -1.041584f, -0.129336f, 0.170834f, 0.566586f, -0.230443f,
- 0.437937f, -0.149922f, -0.046665f, -0.094646f, 0.200070f, 0.072943f,
- -0.076943f, -0.084971f, -0.515843f, -0.146720f, 0.472869f, -0.444731f,
- -0.100877f, 0.545196f, -1.786626f, -0.482946f, 0.500509f, -0.843257f,
- 0.200374f, 0.045103f, -0.575718f, -0.164335f, -0.232522f, -0.021825f,
- -0.139490f, 0.356058f, -0.352075f, 0.061751f, -0.200616f, -1.180921f,
- -0.181355f, -0.137459f, 0.247574f, 0.181541f, 0.184314f, -0.961482f,
- 0.493615f, 0.910261f, -2.279238f, 0.648631f, -0.055526f, -0.037137f,
- 0.038643f, 0.136609f, -0.819373f, -0.040840f, -0.265989f, 0.006877f,
- 0.454651f, -0.595323f, -0.099500f, -0.263717f, 0.150456f, 0.245077f,
- -0.268666f, 0.162232f, -0.516451f, -0.024501f, 0.188046f, -0.002262f,
- 0.261319f, 0.004173f, 0.746982f, 0.174761f, 0.470447f, -0.159558f,
- -0.385240f, 0.023084f, -0.133520f, -0.220607f, -0.018731f, -0.373558f,
- -0.707763f, -1.850150f, -0.807404f, -0.168063f, -0.071435f, -0.160740f,
- -0.478789f, -1.070674f, -0.489740f, -0.255796f, 0.100486f, -0.153361f,
- 0.334394f, -0.569472f, -0.198118f, 0.255922f, 0.104717f, -0.065179f,
- 0.111879f, -0.447237f, 1.373623f, -0.190191f, -0.063311f, 0.337529f,
- -0.138800f, 0.057009f, -0.137006f, 0.641378f, 0.883147f, -0.679655f,
- 0.267717f, -0.351602f, -0.135225f, 0.229398f, -0.513225f, -1.120345f,
- 0.528786f, -0.051081f, 0.086653f, 0.140141f, -0.563969f, 0.333402f,
- -0.174745f, 0.321093f, -0.438641f, -0.005131f, 0.247415f, 0.110120f,
- -0.076308f, -0.083244f, 0.838944f, -0.113043f, -0.013258f, -0.175028f,
- -0.179941f, 0.272676f, -0.047946f, -0.088076f, -0.450031f, 0.053929f,
- -0.083549f, -0.089952f, -0.186253f, 0.257483f, 0.011019f, 0.586435f,
- 0.060580f, -0.052078f, 0.090277f, -0.780869f, 0.969811f, -0.025349f,
- -0.281917f, 0.014857f, 0.231863f, -0.228601f, -0.003861f, 0.226550f,
- 0.141825f, -0.102171f, -0.010387f, 0.220378f, -2.561975f, -0.497071f,
- -0.315117f, 0.371981f, 0.138247f, 0.625031f, -0.308133f, -0.217876f,
- 0.005615f, -0.860179f, 0.747491f, 0.006356f, -0.057024f, -0.483189f,
- 0.055592f, -0.316834f, 0.069858f, 0.218788f, -0.200044f, 0.227588f,
- 0.215496f, -0.055324f, -0.393147f, -0.394062f, -0.253264f, -0.075619f,
- -0.152512f, -0.332995f, 0.129053f, 0.178668f, -0.302694f, 0.030678f,
- 0.925896f, 0.964375f, 0.169021f, -0.218657f, -0.627204f, 0.206437f,
- -0.521336f, 0.176206f, 0.142733f, 0.139248f, 0.411682f, 0.181544f,
- 0.224850f, -0.935547f, -0.558208f, 0.348096f, 0.342129f, -0.389340f,
- -0.236308f, -0.132099f, 0.073642f, 0.089391f, -0.306901f, -0.397842f,
- 0.444282f, 0.074623f, -0.051075f, -0.106617f, -0.184037f, -0.239046f,
- -0.138761f, 0.120794f, -0.647577f, -0.336471f, 0.527899f, -0.164234f,
- -0.028354f, 1.083678f, -0.251534f, -0.145903f, -0.182783f, 0.070976f,
- -0.199590f, -0.400306f, -0.029763f, -0.548042f, -0.266270f, -0.118084f,
- -1.152632f, 0.383685f, -0.105895f, -0.096829f, 0.118382f, 0.047447f,
- -0.019051f, 0.310180f, -0.162793f, -0.029574f, 0.058054f, -0.636017f,
- 0.490639f, 0.158347f, -0.385701f, -0.147057f, 1.285825f, -1.276083f,
- -0.021795f, -0.101600f, 0.163254f, 0.267160f, -2.317864f, -0.098598f,
- -0.296337f, -0.309017f, 0.164127f, -0.270012f, -0.071187f, -0.262270f,
- 0.075415f, -0.368328f, 0.186728f, -0.158031f, 0.481663f, 0.515950f,
- -0.162551f, 0.497981f, 0.262196f, 0.168479f, 0.726066f, -0.243856f,
- -0.058998f, 0.140168f, 0.053242f, -0.624623f, -0.249480f, 0.055197f,
- -1.376804f, 0.417571f, 0.203784f, 0.174370f, -0.155531f, -0.029400f,
- -0.491473f, 0.079811f, -0.080123f, 1.345900f, 0.637077f, 0.434862f,
- -1.787438f, 0.005756f, -0.362706f, 0.179458f, -0.288263f, 0.516788f,
- -0.921248f, 0.043794f, -0.137729f, -0.196171f, -0.046295f, -0.793781f,
- -0.156532f, -0.132566f, 0.517989f, -0.154321f, -0.054174f, -0.077900f,
- -0.373316f, -0.117718f, 0.188986f, -0.476188f, -0.245312f, 0.181439f,
- -0.161024f, -0.229059f, -3.079907f, -0.225452f, -0.594355f, -0.558027f,
- -0.135429f, 0.125766f, -0.081314f, -0.350894f, -0.163165f, -1.936507f,
- -0.205966f, 0.031472f, 0.744446f, -0.006680f, -0.837551f, 0.605862f,
- -0.854929f, -1.543750f, -0.307704f, -0.240517f, 0.178240f, -0.183586f,
- -0.010307f, 0.099373f, -0.228278f, 0.175236f, -0.000133f, 0.104491f,
- -1.540545f, -0.570971f, -0.252885f, 0.483036f, 0.052531f, 0.260214f,
- -0.515016f, -0.602081f, -0.485690f, -0.730710f, 0.163719f, -1.775975f,
- -0.298634f, 0.323626f, -0.373579f, -0.872977f, 0.619574f, 0.026862f,
- -0.122531f, -0.084698f, -2.436297f, 0.483996f, -0.203640f, -0.302157f,
- -0.150666f, -0.238320f, 0.089250f, 0.236485f, -0.668654f, -0.122863f,
- 0.491152f, -0.226444f, -0.181248f, 0.120158f, 0.294027f, 0.250056f,
- 0.307601f, 0.357875f, -1.746455f, -0.175670f, 0.385447f, -0.108808f,
- -0.090235f, -0.642504f, -0.486004f, -0.055160f, -0.068692f, 0.009736f,
- 0.607555f, -0.489426f, 0.150624f, 0.598114f, -0.128816f, -0.445793f,
- -0.066524f, -0.254380f, 0.227106f, -0.406495f, -0.121632f, -0.275960f,
- -0.136494f, 0.339457f, -1.318132f, -0.417572f, -2.614077f, 0.324603f,
- -0.001211f, 0.375192f, -0.473448f, -0.162510f, 0.099329f, -0.277965f,
- 0.101221f, -0.060263f, 0.121867f, -1.042140f, 0.440851f, 0.078898f,
- -0.209007f, -0.243699f, 0.715197f, -0.093997f, 0.086022f, -0.178203f,
- -2.275496f, -0.098413f, 0.199352f, -0.526791f, -0.162086f, -0.197806f,
- -0.231657f, -0.269202f, -0.794294f, -0.223461f, 0.503584f, 0.416236f,
- 0.064082f, 0.197655f, 0.340871f, -0.186645f, -0.291498f, 0.433938f,
- -1.110063f, 0.003751f, 0.392738f, 0.069360f, 0.102088f, -0.302128f,
- -1.518457f, 0.106939f, 0.404527f, -0.306868f, -0.286928f, 0.729276f,
- -0.531710f, 0.745048f, -0.168837f, -1.953886f, -0.258828f, -0.190252f,
- 0.241877f, -0.916744f, -0.030326f, -0.070541f, -0.271037f, 0.211303f,
- -0.489957f, 0.100850f, 0.323999f, -0.802837f, -0.462408f, -0.079350f,
- -0.029374f, 0.131213f, -0.825032f, 0.040202f, 0.351821f, 0.002869f,
- -0.132516f, -0.471264f, -0.297002f, 0.263913f, 0.033478f, 0.146161f,
- 0.533229f, -0.228608f, -0.200639f, -0.170955f, -0.915037f, 0.724491f,
- 0.005151f, 0.018584f, -0.029771f, -0.396038f, -0.159236f, 0.038691f,
- -1.197056f, 0.146302f, 0.226840f, -0.852126f, 0.031214f, 0.108880f,
- 0.562000f, -0.134633f, -0.713343f, -0.342252f, -1.764521f, -0.114653f,
- 0.515073f, -0.080515f, -0.121155f, -0.865139f, -0.833694f, -0.368553f,
- 0.347673f, 0.623379f, 0.722067f, -0.492458f, -0.513263f, 0.585167f,
- 0.721518f, -0.693499f, 0.343725f, -0.273861f, -0.040230f, -0.785664f,
- -0.157500f, -0.308445f, 0.054062f, 0.600131f, -0.860887f, 0.434470f,
- -0.191382f, -0.306150f, -0.243965f, 0.705444f, 0.007789f, -0.146154f,
- -0.054499f, -0.073500f, -1.067364f, 0.404936f, -2.864590f, 0.182323f,
- 0.326126f, 0.102405f, -0.135800f, 1.128095f, -0.012267f, -0.023996f,
- -0.264834f, -0.108967f, -1.176746f, -0.926666f, 0.082999f, -0.498361f,
- 0.083560f, -0.210074f, 0.019225f, -0.201614f, -0.904760f, 0.181421f,
- 0.586384f, -0.177706f, 0.065471f, 0.168552f, 0.054705f, 0.045241f,
- 0.048057f, -0.410957f, -2.188854f, -0.169812f, 0.015521f, 0.176856f,
- -0.179331f, -0.352640f, -0.491735f, -1.743206f, 0.044227f, 0.010454f,
- 0.823643f, -0.119781f, -0.098359f, 0.093119f,
-};
-
-static const float av1_ab_partition_nn_bias_128_layer1[LABEL_SIZE] = {
- -0.433195f, -0.120488f, -0.116721f, 0.112134f, 0.118170f, -0.259769f,
- -0.077530f, 0.394044f, 0.279167f, -0.317988f, 0.189538f, 0.314776f,
- 0.325655f, -0.107123f, 0.591049f, 0.358744f,
-};
-
-static const NN_CONFIG av1_ab_partition_nnconfig_128 = {
- FEATURE_SIZE, // num_inputs
- LABEL_SIZE, // num_outputs
- 1, // num_hidden_layers
- {
- 64, // num_hidden_nodes
- },
- {
- av1_ab_partition_nn_weights_128_layer0,
- av1_ab_partition_nn_weights_128_layer1,
- },
- {
- av1_ab_partition_nn_bias_128_layer0,
- av1_ab_partition_nn_bias_128_layer1,
- },
-};
-
-// nn model for ab partition pruning, 64x64.
-static const float av1_ab_partition_nn_weights_64_layer0[FEATURE_SIZE * 64] = {
- -0.495347f, -0.049498f, -0.026804f, 0.030474f, -0.289308f, -0.264193f,
- -0.141121f, -0.072562f, -0.391665f, -0.051491f, -0.234761f, 0.027155f,
- -0.038217f, 0.014872f, -0.289728f, -0.233577f, -0.415875f, -0.343615f,
- -0.442543f, -0.482492f, 0.073510f, 0.007503f, 2.162329f, -0.362849f,
- 2.145915f, -0.883135f, 0.185636f, -0.062859f, -0.465574f, -0.486205f,
- -0.056710f, -0.330642f, -0.321860f, 0.042321f, -0.348965f, 0.003542f,
- -0.291365f, -0.078164f, -0.345093f, -0.220272f, -0.471270f, -0.763853f,
- 0.246622f, 0.199651f, -0.663420f, -0.154152f, -1.220383f, 0.047138f,
- 0.816811f, 0.083247f, -0.218839f, 0.038143f, -0.063436f, 0.015517f,
- -0.307320f, -0.166956f, -0.169499f, -0.399005f, -0.234638f, -0.162266f,
- 0.050425f, -0.221723f, -0.256942f, -0.287285f, 0.144011f, -0.033245f,
- 0.083649f, 0.119428f, -0.056706f, -0.117805f, 0.021866f, -0.257300f,
- -0.201378f, -0.217484f, -0.413780f, -0.145793f, 0.082792f, -0.347247f,
- 0.042539f, -0.302697f, 1.652316f, 0.000701f, -0.482843f, -0.160332f,
- -0.450099f, 0.212399f, -4.715360f, -5.336774f, -5.375758f, -6.048339f,
- 0.085956f, -0.037767f, 1.052409f, -0.931924f, -2.221907f, 0.268946f,
- 0.015512f, 1.237094f, -1.092185f, 0.418247f, -0.082143f, -0.076914f,
- -0.060749f, -0.325440f, -0.296960f, -0.066815f, -0.158477f, -0.373945f,
- -0.122322f, -0.113495f, -0.097978f, -0.192816f, -0.270418f, 0.035840f,
- -0.015458f, -0.121071f, -0.279582f, -0.067683f, 0.097855f, 0.019839f,
- 0.451127f, 0.004376f, 1.410392f, 3.255835f, -0.344815f, 0.145202f,
- 0.204132f, 0.171948f, -0.527736f, -0.110353f, 0.901448f, 0.003238f,
- -3.822090f, 0.235462f, 1.024823f, -0.821244f, 0.876056f, 2.553762f,
- -3.478597f, -2.076582f, -0.265515f, -0.055923f, -0.156980f, -0.164097f,
- -0.246040f, 0.039430f, -0.071769f, -0.118847f, -0.304053f, -0.281541f,
- -0.226021f, -0.263091f, -0.127359f, -0.249410f, -0.051023f, 0.083911f,
- 0.084721f, 0.168089f, -0.272169f, -0.204998f, -0.008303f, -0.173998f,
- 0.079376f, -0.197426f, -0.199052f, -0.118794f, -0.063753f, -0.094769f,
- 0.066176f, -0.175832f, -0.238752f, -0.287960f, -0.134307f, -0.185953f,
- -0.385845f, 0.119769f, -0.006567f, -0.382126f, -0.214221f, 0.038449f,
- -0.253484f, -0.282766f, -0.020249f, -0.193929f, 0.016281f, -0.114423f,
- -0.145940f, -0.281621f, -0.007588f, -0.131470f, -0.189012f, -0.185699f,
- -0.279011f, -0.008132f, 0.208463f, 0.020569f, -0.206803f, -0.213408f,
- -0.206131f, -0.290245f, 0.069701f, -0.000371f, -0.307572f, -0.451785f,
- -0.300838f, -0.453186f, -0.301691f, 0.046327f, -0.312668f, 0.058272f,
- -0.303131f, -0.376252f, 0.108384f, -0.086623f, -0.100630f, -0.027330f,
- -0.003969f, 0.089502f, -0.200722f, -0.107889f, 0.061843f, -0.008478f,
- -0.265057f, -0.271132f, -0.073562f, 0.129337f, -0.283698f, -0.353414f,
- 0.076420f, -0.244280f, -0.119537f, -0.105366f, -0.184692f, -0.038817f,
- -0.478507f, -0.118808f, -0.472979f, -0.305884f, -0.462813f, -0.189581f,
- -0.011932f, -0.585700f, 0.253212f, -1.061900f, -0.205116f, -0.336407f,
- -0.762199f, 0.577737f, 0.230832f, 0.434440f, -0.096713f, 0.038552f,
- -0.147800f, -0.213553f, 0.041740f, -0.281907f, -0.026154f, -0.082356f,
- -0.331871f, -0.408247f, -0.129022f, -0.037550f, -0.310233f, -0.320883f,
- -0.391963f, -0.467392f, 0.027453f, -0.394761f, -0.045544f, 0.076052f,
- 0.483985f, 0.067093f, 0.141361f, 0.576772f, 0.859718f, 2.566515f,
- -0.025476f, 0.769738f, -0.680235f, -1.683309f, -2.394131f, -0.000714f,
- -0.615021f, -0.195856f, -0.434035f, -0.295010f, -0.668659f, -0.245959f,
- 0.551148f, 1.777227f, -0.461630f, 0.043093f, 0.012293f, -0.255841f,
- -0.097070f, -0.371156f, -0.146323f, -0.015508f, -0.103873f, -0.087476f,
- -0.297266f, -0.128699f, -0.149555f, 0.016534f, -0.375498f, -0.346759f,
- -0.455156f, -0.147509f, -0.427076f, -0.354431f, -0.158025f, -0.164604f,
- -0.237038f, -0.010314f, -0.092884f, -0.397084f, -0.217980f, -0.127184f,
- -0.048421f, -0.144133f, 0.889073f, 0.012606f, 3.007608f, -0.602584f,
- -1.849480f, -0.373159f, -1.890695f, -3.609938f, 0.811923f, -1.867208f,
- -0.244326f, -0.018012f, -0.211192f, -0.220196f, 0.169363f, 0.119141f,
- -0.230715f, 0.083247f, 0.020367f, -0.128629f, -0.217455f, -0.159640f,
- 1.815952f, -0.369238f, -1.186447f, -0.658753f, -0.511026f, -0.096934f,
- 0.662971f, 0.486475f, 0.159746f, -0.018932f, 3.692397f, 1.384353f,
- -0.401984f, -0.248380f, -0.140861f, 0.215248f, -0.023711f, 0.059679f,
- -0.072260f, 0.004271f, 0.039545f, -0.347971f, -0.081851f, -0.474896f,
- -0.181572f, 0.066736f, -0.157822f, -0.163760f, -0.171113f, -0.089935f,
- -0.338281f, -0.421444f, -0.306687f, -0.085283f, -0.377953f, -0.138750f,
- -0.102701f, -0.312336f, 0.149831f, 0.007229f, -0.155700f, -0.173611f,
- 4.074261f, 1.342306f, -1.272712f, 1.570899f, -0.545093f, -0.317605f,
- -0.189440f, -0.133910f, -0.273190f, -0.108020f, -0.166107f, 0.021413f,
- -0.239130f, -0.067211f, 0.041957f, -0.039234f, -1.003587f, -0.094412f,
- 0.532512f, -0.870538f, -1.118023f, -1.160983f, -0.736307f, -0.418752f,
- 0.419466f, 0.492122f, -0.004368f, -0.022096f, -1.115132f, 0.150886f,
- 2.396852f, 2.660000f, -0.376537f, 0.468628f, 0.149413f, -0.074898f,
- -0.067154f, 0.021245f, 0.127857f, 0.294189f, 0.508056f, 0.390232f,
- -3.899177f, -3.414681f, -3.929195f, -4.160545f, -0.274323f, -0.052583f,
- -0.003545f, -0.433084f, -0.404891f, -0.145051f, -0.312367f, 0.004579f,
- -0.398724f, -0.372068f, -0.234279f, 0.017799f, -0.424760f, -0.646717f,
- -0.047568f, 2.924664f, -0.644165f, 0.359349f, -0.294800f, 0.591746f,
- -0.404710f, -0.092358f, -0.250729f, 0.030829f, -0.147149f, -0.476023f,
- -0.071803f, -0.482516f, -0.293117f, -0.215923f, -0.373122f, -0.085315f,
- -0.377052f, -0.449899f, -0.056452f, 0.138081f, -0.085350f, -0.308391f,
- 0.106661f, 0.176234f, 0.258869f, -0.230172f, -0.233029f, -0.241208f,
- -0.067509f, -0.223172f, -0.118353f, -0.302478f, -0.579632f, -0.561326f,
- -0.158114f, -0.223167f, -0.026689f, 0.051863f, 0.212834f, -0.304714f,
- -0.169071f, -0.193695f, -0.075682f, -0.170860f, -0.241008f, -0.044648f,
- 0.280815f, -0.002585f, -0.283552f, -0.037701f, -0.681169f, -0.274535f,
- -0.380595f, 0.109504f, -0.111141f, -0.437685f, -0.094459f, 0.144206f,
- -0.106139f, -0.211832f, -0.054742f, -0.172813f, -0.295905f, -0.071907f,
- -0.418429f, -0.183240f, 0.031319f, -0.095785f, -0.315447f, 0.069404f,
- -0.422910f, -0.029867f, -0.357321f, -0.199976f, -0.337707f, -0.070188f,
- -0.178198f, 0.177208f, 0.134688f, -0.081933f, -0.229452f, -0.208872f,
- 0.026287f, -0.364040f, -0.063696f, -0.227443f, -0.234401f, -0.205699f,
- -0.267238f, -0.494125f, -0.056255f, 0.053715f, -0.487754f, 0.014818f,
- 0.087383f, -0.077556f, -0.168085f, -0.436851f, -0.276286f, -0.137845f,
- -0.107606f, -0.103653f, -0.233766f, -0.419083f, 0.169185f, 0.010186f,
- -0.001587f, 0.086735f, -2.465718f, 1.482185f, 1.621193f, -2.081680f,
- 1.386553f, -3.204335f, -0.267111f, -0.004508f, 0.164712f, 0.274147f,
- 1.724306f, -2.273659f, 0.749574f, -0.891905f, 0.105965f, -0.030428f,
- -0.416018f, -0.300762f, 0.122911f, -0.316908f, -0.292504f, 0.138666f,
- -0.161327f, -0.042143f, -0.249128f, 0.149210f, -0.088987f, -0.654101f,
- -1.501843f, 0.216777f, 0.955914f, 0.524158f, -1.642561f, -1.643626f,
- 0.864797f, -0.425451f, -2.115764f, -0.012502f, 0.065172f, 1.297270f,
- 0.018845f, 1.167276f, -0.470970f, -0.244995f, 0.374782f, -1.811056f,
- -0.055430f, -0.024102f, -0.376519f, -0.339640f, -0.119177f, -0.277995f,
- -0.290095f, -0.081362f, -0.144139f, -0.118037f, -0.180357f, -0.217559f,
- -0.370683f, 0.172816f, -0.265069f, 0.194321f, -0.273478f, 0.037442f,
- -0.235552f, -0.078625f, -0.447541f, 0.016836f, -0.271123f, -0.171481f,
- -0.321477f, -0.184826f, -0.442981f, -0.227273f, -0.370666f, -0.237232f,
- -0.257493f, -0.225714f, -0.153716f, -0.283487f, -0.155399f, 0.067697f,
- 0.230343f, -0.034318f, -0.022687f, -0.047090f,
-};
-
-static const float av1_ab_partition_nn_bias_64_layer0[64] = {
- -0.212182f, -0.233725f, -0.758846f, -0.158162f, 0.614743f, -0.150944f,
- -0.075727f, -0.208414f, 1.054996f, 0.713758f, -0.300051f, -0.151482f,
- -2.443570f, 0.430590f, -0.129001f, -0.160733f, -0.230547f, -0.143228f,
- -0.140577f, -0.086812f, -0.212298f, -0.159557f, -0.055647f, -0.211423f,
- 0.578161f, -0.220318f, -0.210107f, -3.111584f, 0.604419f, -0.232622f,
- -0.209924f, -0.130794f, -0.084097f, -0.036005f, 0.294594f, -2.535531f,
- -0.209783f, -0.211189f, -2.766337f, 0.000000f, 0.450177f, -1.754884f,
- 3.262664f, -0.209691f, -0.614886f, -0.211257f, -0.109096f, -0.190492f,
- -0.109007f, -0.026910f, -0.136035f, -0.212321f, -0.139320f, -0.212233f,
- -0.305430f, 0.739171f, 0.991277f, -0.088150f, 0.086313f, -0.023379f,
- -0.125366f, -0.063576f, -0.212169f, -0.047463f,
-};
-
-static const float av1_ab_partition_nn_weights_64_layer1[64 * LABEL_SIZE] = {
- -0.036800f, 0.528721f, 0.490767f, 0.144409f, 1.103640f, 0.361910f,
- -0.180069f, 0.068033f, -14.868382f, 0.359013f, 0.322567f, -0.199212f,
- 0.906164f, -0.488254f, 0.149653f, -0.216394f, -0.099347f, 0.004936f,
- -0.111391f, 0.074848f, -0.041709f, 0.147627f, -0.018905f, 0.096116f,
- 0.184817f, -0.016241f, 0.115739f, 2.376754f, 0.637097f, 0.052954f,
- 0.136428f, 0.225267f, -0.181873f, -0.142876f, 0.684048f, 0.658791f,
- 0.105795f, 0.241705f, 1.381114f, -0.209379f, 1.145949f, 0.795293f,
- -9.361877f, 0.198302f, 0.539600f, 0.092317f, -0.081695f, 0.200777f,
- 0.102334f, 0.081583f, 0.060948f, -0.025110f, 0.160951f, -0.020170f,
- 0.234006f, -0.029369f, 0.375036f, 0.270209f, -0.556529f, 1.402949f,
- 0.101777f, -0.027331f, 0.004502f, -0.153166f, -0.116651f, 0.151573f,
- -0.022187f, 0.144044f, -0.108719f, -0.129942f, -0.270321f, 0.227363f,
- 1.892330f, -0.661052f, -0.219398f, -0.229417f, -0.856438f, -1.196988f,
- -0.081774f, 0.078847f, -0.207057f, -0.048947f, 0.152073f, -0.243056f,
- -0.233329f, -0.288689f, -0.158333f, -0.141177f, -0.715436f, 0.016947f,
- -0.093752f, 0.204984f, -1.209782f, 0.155683f, 0.092239f, 0.146495f,
- 0.813146f, -0.027757f, 0.330982f, 2.173948f, -0.028867f, -0.141815f,
- 0.292708f, -0.204794f, 0.014496f, 1.032799f, 1.312155f, 0.107020f,
- 0.824752f, -0.013945f, 0.184829f, -0.041633f, 0.215300f, -0.476088f,
- -0.053213f, 0.126862f, -0.020777f, 0.082893f, -0.223727f, -0.923063f,
- 0.466529f, 0.082140f, -0.845758f, -1.140791f, -0.262033f, 0.138491f,
- 0.151717f, -0.182479f, -0.131128f, 0.055411f, 0.106771f, 0.125552f,
- 0.297184f, -0.257403f, -0.059884f, -0.274903f, 2.694357f, -0.108244f,
- 0.025377f, 0.043092f, -0.558317f, 3.517159f, -0.270833f, -0.240676f,
- 0.205100f, -0.057068f, -0.140445f, -0.193449f, -0.030061f, -0.286762f,
- -0.467523f, -0.012647f, 0.190564f, 0.022394f, -0.101479f, 0.339684f,
- -0.902743f, -0.169578f, -0.178029f, -0.041836f, -3.952108f, -0.028298f,
- -0.221137f, -0.733895f, -0.223895f, 0.039012f, 0.687867f, 0.021423f,
- 0.113063f, 0.676087f, -0.961000f, -0.064847f, 0.712856f, -0.192765f,
- -0.001132f, 0.016689f, -0.236020f, -0.766186f, -0.175729f, 0.012879f,
- -0.251064f, -0.105523f, -0.039212f, -0.347584f, 0.304352f, -0.034174f,
- -0.364258f, -0.685252f, -0.266115f, -0.247345f, -0.155905f, 0.152283f,
- -0.156315f, 0.174082f, -0.757654f, 0.102303f, -2.192316f, -0.245815f,
- 0.119882f, -0.086542f, 1.987246f, -1.353163f, -0.374813f, -0.233504f,
- -1.980895f, 0.692093f, -0.168351f, 0.172700f, -0.009052f, -0.015734f,
- 0.106679f, -0.060472f, -0.256813f, -0.074874f, -0.207488f, -0.329515f,
- -0.418268f, -0.017940f, -0.036081f, 0.064719f, -1.488016f, 0.020591f,
- -0.176325f, -0.141074f, 0.944494f, 0.150237f, -0.249805f, -0.277280f,
- 0.012686f, 0.132483f, 0.116123f, 0.013737f, -0.116091f, 0.750340f,
- 3.251343f, -0.188864f, 1.096992f, 0.058467f, -0.041433f, -0.037937f,
- -0.133294f, -0.137908f, -0.171132f, 0.106362f, 0.069383f, -0.052662f,
- -0.177883f, -0.408049f, 0.680221f, -0.117035f, -0.904240f, -1.395228f,
- 0.154527f, 0.134427f, 0.022767f, -0.158886f, -0.230316f, 0.161096f,
- 0.362213f, -0.235060f, -0.941620f, 0.055912f, -0.049458f, -0.166632f,
- 0.481418f, 0.930146f, 0.041108f, 0.033674f, 1.372066f, -1.847709f,
- 0.003324f, 0.259534f, 0.177014f, -0.202761f, -0.262017f, -0.190852f,
- -0.102839f, 0.028338f, 0.187193f, -0.041684f, 0.123973f, -0.198576f,
- -0.110369f, -1.431400f, 0.208369f, -0.302370f, -0.248549f, 0.062985f,
- 0.673409f, 0.036662f, -0.711340f, -0.120584f, -0.189789f, 0.098812f,
- 2.947819f, 0.216567f, -0.414472f, -0.181742f, 1.873779f, -0.222726f,
- -0.782870f, 0.007889f, 0.015062f, -0.554328f, 0.182928f, -0.191430f,
- 0.123636f, -0.215460f, -0.225245f, 0.251516f, -0.013025f, -1.359595f,
- -0.750602f, 0.342667f, -0.141899f, -0.687493f, -0.072639f, 0.048018f,
- -0.242107f, -0.031917f, -0.287472f, -0.046088f, 0.832197f, -0.016576f,
- -1.553349f, -0.216341f, 0.023077f, -0.410867f, 4.243743f, -0.514878f,
- -0.066007f, -0.160696f, -0.262678f, -0.648790f, -0.430586f, 0.199940f,
- -0.202496f, -0.222241f, -0.016406f, -0.121473f, 0.000828f, -0.081584f,
- -0.152641f, -0.190166f, 0.644400f, 0.040196f, -0.302104f, -1.143654f,
- -0.160327f, -0.320780f, -0.187006f, 0.037311f, 0.440618f, -0.070733f,
- -0.117785f, 1.527539f, -0.419310f, 0.001300f, 1.389956f, -0.036366f,
- -0.269203f, 0.612265f, 2.721897f, -0.086836f, -0.446999f, 0.012525f,
- -0.078317f, -0.287052f, -0.111188f, -0.085181f, -0.164667f, -0.010466f,
- -0.569722f, -0.018888f, -0.101663f, -1.147130f, -0.465204f, 0.114524f,
- -2.192402f, -0.221325f, 0.375748f, 0.206284f, -0.261548f, -0.246257f,
- -0.143004f, -0.069981f, -0.057306f, -0.116481f, -0.435903f, -0.314970f,
- 0.013210f, -0.010175f, 4.630571f, -0.473226f, -0.197199f, -0.028204f,
- 0.122907f, 2.475548f, 0.025011f, -0.092603f, -0.127561f, -0.151330f,
- -0.077295f, 0.245016f, -0.045005f, 0.183396f, -0.330556f, -0.384887f,
- 0.356374f, -0.016618f, -0.463353f, -1.291546f, -0.071986f, -0.311599f,
- 0.072385f, -0.430786f, -2.094788f, 0.202733f, -0.910109f, -1.336543f,
- -0.086800f, -0.096413f, 1.544383f, 0.031860f, -0.796211f, 0.762786f,
- 3.250022f, -0.441798f, -0.698537f, 0.062839f, 0.033525f, -0.362996f,
- 0.027022f, -1.131264f, -0.228926f, 0.053885f, -0.338628f, 0.155037f,
- -0.046844f, -0.888172f, -0.241767f, 0.084965f, -0.617743f, -0.049896f,
- -0.036894f, -0.304783f, -0.002639f, 0.137957f, 0.052121f, -0.131161f,
- -0.117200f, -0.253380f, -0.205561f, -0.302450f, -0.047397f, -0.330518f,
- 3.613420f, -1.525951f, -0.026738f, 0.209150f, -2.103534f, 2.019689f,
- -0.366199f, -0.095260f, 0.027417f, -0.242512f, 0.162579f, 0.052113f,
- -0.293851f, -0.068138f, -0.005799f, -0.344696f, -0.114824f, -0.431107f,
- -0.120058f, -1.139926f, -1.048379f, 0.036446f, -0.323020f, -0.432945f,
- 0.454151f, -0.140058f, 0.050649f, -0.094900f, -0.017278f, -0.238719f,
- 1.193153f, 0.120447f, -0.496061f, 0.917431f, 2.936126f, -0.115521f,
- -0.347397f, -0.435325f, -0.004383f, -0.211864f, 0.162383f, -1.040726f,
- 0.089537f, -0.128579f, -0.133505f, 0.107129f, -0.435657f, -0.180388f,
- 0.043650f, 0.018709f, -0.773242f, -0.687192f, -0.120633f, -0.063626f,
- 0.029912f, 0.113972f, -0.403502f, -0.127640f, -0.269625f, 0.129794f,
- -0.188539f, 0.041641f, 0.029769f, -0.198374f, 1.401407f, 0.353887f,
- -0.219925f, 0.260515f, 1.157034f, -2.992044f, -0.097618f, -0.064417f,
- -0.203626f, -0.008217f, -0.112339f, -0.227407f, -0.155118f, 0.247705f,
- -0.012304f, -0.248447f, -0.913463f, -0.064788f, -0.214619f, -0.251761f,
- -0.386861f, -0.040574f, -0.163219f, -0.100700f, 1.488274f, -0.071684f,
- -0.033626f, -0.006497f, -0.246945f, -0.145221f, -3.747390f, 0.149609f,
- -0.263326f, -0.297385f, -1.039896f, -0.083174f, -0.025473f, -0.235586f,
- -0.001087f, 0.254286f, 0.265106f, 0.007325f, 0.199239f, 0.134103f,
- -0.578211f, -0.259801f, -0.062373f, 2.368348f, 0.560556f, -0.252260f,
- 0.889997f, -0.447872f, -0.059218f, -0.095315f, -0.061667f, 0.183580f,
- -0.157479f, 0.055387f, -0.831734f, 0.007606f, -1.104906f, 0.301180f,
- -0.117115f, 0.212959f, 4.727223f, -0.243833f, -0.397495f, -0.025021f,
- -0.367587f, -2.082058f, -0.217699f, 0.148111f, 0.252430f, 0.111088f,
- -0.260692f, 0.095124f, -0.407774f, -0.322169f, 0.002927f, 0.126169f,
- -1.272325f, -0.279772f, -0.373680f, -0.485177f, -0.605458f, 0.021225f,
- -0.092031f, -0.226585f, 1.895162f, 0.037866f, -0.275475f, 1.614360f,
- -0.014972f, -0.277679f, -3.449082f, -0.092060f, -0.747873f, 0.020716f,
- 2.776178f, -0.049963f, 0.183999f, -0.295259f, -0.028868f, 0.221895f,
- 0.001265f, 0.336823f, 0.219372f, 0.112824f, 0.408132f, -0.017940f,
- -0.311666f, 1.489606f, -0.058093f, -0.305659f, -0.491933f, -0.143847f,
- 0.166115f, 0.042867f, -0.123447f, -0.087099f, -0.305395f, -0.365079f,
- -0.755801f, -0.160649f, 0.736260f, -0.008611f, 0.095836f, -0.017345f,
- 5.697515f, -0.498971f, -0.125280f, 0.199907f, 0.300053f, 0.605026f,
- -0.228225f, -0.259523f, 0.016384f, 0.146973f, 0.210258f, 0.226766f,
- -0.075178f, -0.050924f, 0.188496f, -0.415266f, -0.484880f, -0.236384f,
- 0.071931f, -0.331863f, -0.601243f, -0.232479f, -0.285272f, 0.123789f,
- -1.341333f, 0.037082f, -0.315202f, -1.587215f, -0.271576f, 0.003216f,
- -4.437186f, -0.256205f, -0.576589f, -0.114147f, 2.153916f, -0.369618f,
- 0.271415f, 0.145036f, -0.158731f, -0.240938f, -0.187369f, 0.036325f,
- 0.254771f, 0.211488f, -0.240297f, 0.098417f, -0.415011f, 2.334793f,
- -0.127252f, 0.020069f, -0.168755f, -0.448922f, -0.219207f, 0.016232f,
- -0.221935f, -0.269500f, -0.100636f, 0.102545f, -0.809376f, -0.054979f,
- 0.360713f, -0.326541f, 0.112933f, 0.138073f, 4.229404f, -0.763801f,
- -0.305429f, 0.199955f, -1.787713f, 0.272866f, 0.109895f, 0.138466f,
- -0.250259f, -0.167162f, -0.212588f, -0.217589f, -0.067125f, -0.077490f,
- -0.208970f, -0.006863f, -0.671146f, -0.298320f, -0.165509f, 0.044597f,
- -1.408624f, -0.213957f, -0.220947f, 0.129718f, 1.316777f, -0.098928f,
- -0.008121f, -0.558293f, -0.297290f, -0.218873f, -4.346638f, -0.228174f,
- -0.204710f, -0.388864f, 2.697919f, 0.025260f, 0.857020f, 0.009921f,
- 0.036915f, -0.320275f, -0.087937f, 0.022636f, 0.236667f, 0.135496f,
- -0.059616f, -0.192955f, 0.009470f, 2.139589f, -0.200449f, 0.129818f,
- 1.017444f, -0.608299f, 0.257914f, -0.134306f, -0.033327f, 0.002855f,
- -0.338598f, 0.015559f, 0.117362f, -0.166760f, 0.086903f, -0.167666f,
- 0.193523f, 0.033852f, -1.147686f, 0.489468f, -0.006969f, 0.125630f,
- 1.557907f, -1.604449f, -0.071114f, 0.096178f, 0.007065f, 0.200013f,
- 0.213393f, 0.168466f, -0.100568f, -0.117861f, -0.161542f, -0.072561f,
- -1.069871f, -0.470138f, -0.352578f, -1.503513f, -0.001394f, -0.380109f,
- 0.065089f, -0.281668f, 0.988953f, -0.002778f, -0.659026f, -0.470692f,
- -0.407292f, 0.011710f, -1.362085f, 0.184738f, -0.135786f, -1.374241f,
- 4.487930f, -0.067274f, -0.956404f, -0.233995f, 0.224527f, -0.454556f,
- 0.037900f, -0.281658f, 0.208224f, -0.254753f, 0.045740f, 0.051444f,
- -0.388281f, 0.257112f, -0.485030f, -0.082659f, 0.148103f, -1.007456f,
- -0.022295f, 0.036984f, -0.369401f, -0.076943f, -0.007636f, -0.293022f,
- 0.470466f, 0.199012f, -2.158182f, 0.036577f, -0.014725f, -0.229516f,
- 2.236929f, 0.030945f, -0.400045f, 0.109348f, 0.214691f, -0.891516f,
- -0.251379f, -0.217358f, 0.013733f, 0.205573f, -0.151725f, -0.191782f,
- -0.339630f, -0.163905f, -0.119191f, -0.032516f, 0.503015f, 0.025772f,
- 0.029094f, -1.146153f, 0.216723f, -0.330023f, 0.064695f, -0.262521f,
- 0.425612f, -0.093080f, -0.489648f, 1.051293f, -0.092332f, 0.095557f,
- -0.874132f, 0.218483f, -0.127648f, -1.605802f, 2.763617f, -0.186734f,
- -1.243166f, -0.193514f, -0.173748f, 0.337822f, 0.183873f, -0.251594f,
- -0.211582f, 0.144081f, 0.029620f, -0.024853f, -0.385140f, 0.467341f,
- -0.928316f, -0.195442f, 0.917783f, 0.357084f, 0.174445f, -0.073659f,
- -0.012811f, -0.115420f, -0.181147f, -0.364449f, -0.567395f, -0.012969f,
- -1.680714f, 0.065323f, 0.198063f, -0.244201f, 1.428545f, -0.432539f,
- -0.208931f, -0.091205f, 0.957125f, 0.813519f, -0.262677f, 0.246852f,
- 0.015536f, 0.055026f, 0.067054f, 0.262103f, -0.358115f, -0.095206f,
- -0.267522f, -0.402710f, -0.680397f, -0.123627f, -0.385590f, -1.504680f,
- -0.169513f, -0.215338f, 0.043633f, -0.079052f, -0.464410f, 0.122894f,
- -0.278231f, -2.456445f, -0.159917f, -0.015597f, -0.735449f, -0.078854f,
- -0.400290f, -1.153870f, 3.657228f, -0.287093f, -1.174355f, -0.102001f,
- -0.288281f, 0.185209f, -0.145228f, -0.200449f, -0.099914f, -0.138354f,
- 0.254428f, -0.161751f, -0.118206f, 0.296043f, -0.482613f, 0.080932f,
- 1.097605f, -0.010190f, 0.232439f, 0.447617f, -0.133508f, 0.115763f,
- -0.388589f, 0.174695f, -0.236014f, 0.006284f, -1.374129f, 0.092015f,
- -0.241419f, -0.231667f, 2.763950f, -0.922932f, -0.061605f, 0.208740f,
- -1.597190f, 1.353325f, -0.198528f, 0.250498f, -0.013950f, -0.203861f,
- -0.254563f, 0.081931f, -0.413369f, 0.011844f, 0.080961f, -0.231161f,
- -1.234909f, -0.440843f, -0.174980f, -0.315283f, -0.337474f, -0.123243f,
- -0.310001f, -0.271028f, 0.364179f, 0.022845f, -0.535517f, -0.772936f,
- -0.188435f, 0.039667f, -0.807463f, 0.266550f, -0.288857f, -1.630789f,
- 1.280155f, 0.065712f, -0.279960f, -0.300056f, 0.258440f, -0.073781f,
- 0.213878f, 0.042196f, 0.021360f, 0.211698f, -0.003751f, -0.192673f,
- -0.137008f, 0.247878f, -0.470604f, 0.073164f, 1.523241f, 0.734755f,
- -0.114126f, -0.193834f, -0.025759f, 0.263183f,
-};
-
-static const float av1_ab_partition_nn_bias_64_layer1[LABEL_SIZE] = {
- -0.343508f, -0.706936f, -0.160676f, -0.877101f, -0.517567f, -0.253254f,
- -0.148074f, 0.923430f, -0.364770f, 0.203550f, 0.401216f, 0.938246f,
- -0.872737f, 0.718723f, 0.703398f, 2.560015f,
-};
-
-static const NN_CONFIG av1_ab_partition_nnconfig_64 = {
- FEATURE_SIZE, // num_inputs
- LABEL_SIZE, // num_outputs
- 1, // num_hidden_layers
- {
- 64, // num_hidden_nodes
- },
- {
- av1_ab_partition_nn_weights_64_layer0,
- av1_ab_partition_nn_weights_64_layer1,
- },
- {
- av1_ab_partition_nn_bias_64_layer0,
- av1_ab_partition_nn_bias_64_layer1,
- },
-};
-
-// nn model for ab partition pruning, 32x32.
-static const float av1_ab_partition_nn_weights_32_layer0[FEATURE_SIZE * 64] = {
- -0.323723f, -0.214013f, -0.007772f, -0.458851f, -0.125542f, -0.123860f,
- -0.410973f, -0.209389f, -0.087580f, -0.272881f, -0.168500f, -1.130845f,
- 0.344916f, -0.475017f, -0.362262f, -0.195662f, -0.566124f, 0.782163f,
- 0.411575f, -0.013378f, -0.318650f, -0.124678f, -0.612909f, -0.315788f,
- -0.263990f, -0.508783f, -0.048938f, -0.416407f, -0.402648f, -0.156644f,
- 0.225887f, -0.000493f, 2.682241f, 0.871204f, 0.059014f, 0.803542f,
- -1.407028f, -1.154669f, 1.388148f, -0.293348f, -0.003669f, -0.009607f,
- 1.330030f, -0.337841f, 2.118617f, 1.033059f, -0.084788f, 0.212904f,
- 0.082405f, -0.070579f, -0.494005f, -0.173392f, 0.039546f, -0.463865f,
- 0.077163f, -0.434066f, 0.030835f, -0.427139f, -0.560520f, -0.031606f,
- -0.368541f, -0.027458f, 0.370574f, 0.461418f, 1.087682f, -0.572137f,
- -1.509596f, -0.765697f, -0.499383f, -0.277998f, -0.106492f, -0.129564f,
- -0.169133f, -0.269834f, -0.114270f, -0.275431f, 0.016339f, -0.156744f,
- -0.267922f, 0.171216f, 0.110556f, 0.002954f, -0.200327f, -0.187663f,
- 3.691601f, 1.234152f, 0.186315f, -0.125370f, -0.211235f, -0.554432f,
- -0.131072f, -0.124982f, -0.130339f, -0.235350f, 0.018903f, 0.012896f,
- -0.159372f, -0.269571f, -0.025709f, -0.221251f, 0.061919f, 0.016307f,
- 0.384673f, -0.134525f, -1.599126f, -0.416459f, -0.743052f, 0.670249f,
- -0.169709f, 0.421681f, -0.033360f, -0.072817f, 0.003647f, -0.110632f,
- -0.158651f, -0.095136f, 0.223759f, 0.165767f, -0.269129f, -0.196075f,
- -0.023183f, -0.293420f, 0.014875f, 0.018688f, -0.153407f, -0.172009f,
- -0.259947f, -0.124015f, 0.173653f, -0.089103f, -0.021001f, -0.334230f,
- 0.027177f, 0.103371f, -0.183860f, -0.204051f, -0.023721f, -0.192297f,
- -0.143771f, -0.247106f, 0.218116f, -0.013240f, 2.831783f, 1.483928f,
- -0.877025f, -0.313462f, -0.411320f, -0.447825f, 0.605977f, 0.234684f,
- -0.119150f, -0.075182f, -0.330463f, 0.071503f, -0.254924f, -0.360071f,
- -0.037022f, 0.063261f, -0.148759f, -0.238254f, -0.462018f, -0.027166f,
- 0.065318f, -0.235743f, -0.257194f, -0.094784f, 0.022423f, 0.055925f,
- 0.086672f, -0.021010f, 0.009965f, -0.001648f, -0.104917f, -0.387443f,
- -0.102673f, -0.281706f, 0.145923f, -0.233391f, -0.378365f, -0.145584f,
- -0.077751f, -0.121166f, 1.134565f, -0.097500f, -0.749202f, -0.544566f,
- -1.361374f, -0.102494f, 1.089275f, 0.375299f, -0.105091f, 0.037641f,
- -0.054248f, -0.282691f, -0.377797f, -0.066427f, -0.253815f, -0.329677f,
- -0.339326f, -0.128217f, -0.282905f, 0.014937f, 1.067185f, -0.171764f,
- 0.484458f, 0.396706f, -0.557055f, -0.891596f, -0.257839f, -0.720879f,
- -0.218449f, -0.004755f, 1.572857f, 0.006229f, 1.962895f, -0.029746f,
- -4.137691f, -2.185991f, -2.763477f, -0.520437f, -0.208708f, 0.006444f,
- -1.263078f, -0.304560f, 1.072374f, 2.556429f, 0.312850f, 0.257488f,
- -0.634264f, 0.156769f, -0.188943f, 0.040295f, -0.389915f, 0.085250f,
- -0.248525f, 0.045667f, -0.776115f, -0.274680f, -0.448145f, -0.566161f,
- -1.285316f, 0.079060f, 0.389124f, -0.510401f, -0.015299f, -0.664661f,
- 0.099901f, -0.470694f, -0.051593f, -1.076381f, -0.442104f, -0.197867f,
- -0.330011f, -0.448523f, -0.301018f, -0.442093f, -0.491953f, -0.582091f,
- -0.064569f, -0.156516f, 0.543522f, -0.005924f, 0.161432f, 0.974793f,
- 0.273712f, 1.104850f, -0.290312f, 0.313417f, -0.125370f, 0.136234f,
- -0.191227f, -0.165054f, 0.011872f, -0.298871f, 0.095740f, 0.142760f,
- -0.215771f, -0.031437f, 0.101041f, -0.085620f, 0.435387f, 0.002786f,
- 1.971375f, 0.018392f, -1.771940f, -0.401433f, 0.808263f, -3.350013f,
- 2.296952f, -1.024403f, -0.041645f, -0.034799f, -0.024078f, -0.347301f,
- -0.276088f, -0.455907f, 0.266021f, 0.087348f, -0.146566f, 0.040492f,
- -0.539866f, -0.206851f, -0.387874f, -0.125508f, -0.496676f, -0.373845f,
- -0.472356f, -0.357082f, -0.081254f, -0.456466f, 0.554713f, 0.002185f,
- -4.225019f, 0.344025f, 0.728796f, -0.262936f, 1.383924f, 1.577300f,
- -2.653320f, -2.516156f, -0.301604f, -0.204105f, -0.138252f, -0.587536f,
- -0.097889f, -0.352414f, -0.288276f, -0.184340f, -0.122741f, -0.243376f,
- 0.031970f, -0.373402f, -0.396079f, 0.045566f, 0.072595f, -0.222681f,
- -0.243802f, -0.340129f, -0.258494f, -0.192041f, -0.386112f, -0.240940f,
- -0.047268f, -0.555802f, -0.032514f, -0.241341f, -0.167463f, -0.478308f,
- -0.205936f, -0.316275f, 0.103729f, -0.197893f, -0.128029f, -0.218796f,
- -0.167362f, -0.111814f, -0.126062f, -0.394260f, -0.025357f, -0.402697f,
- -0.587395f, -0.400385f, -0.259664f, -0.415588f, -0.338503f, -0.399166f,
- -0.270504f, 0.234505f, 0.272144f, 0.266938f, -0.392395f, -0.011717f,
- -0.384221f, -0.473446f, -0.038420f, -0.241101f, -0.234402f, -0.275567f,
- -0.410454f, -0.377599f, -0.179099f, -0.138432f, -0.248083f, -0.543026f,
- -0.428043f, -0.239895f, -0.333193f, -0.103346f, -0.039038f, -0.171109f,
- -0.119432f, -0.222351f, 0.000450f, 0.208724f, -0.510526f, -0.144656f,
- -0.316721f, -0.344846f, -0.244794f, -0.129134f, -0.045634f, -0.400183f,
- 0.043714f, -0.235414f, 0.115594f, -0.195616f, -0.106693f, -0.124242f,
- 0.083990f, 0.049110f, -0.196130f, -0.059860f, -0.464235f, -0.516443f,
- -0.101521f, -0.422379f, -0.413955f, -0.042991f, -0.345263f, -0.129264f,
- -0.106911f, -0.140156f, -0.457841f, -0.199848f, -0.218954f, -0.329850f,
- -0.364097f, -0.335262f, -0.312254f, -0.299331f, -0.052710f, -0.251019f,
- -0.023459f, -0.222538f, 0.028849f, -0.088038f, -0.301550f, -0.273566f,
- 0.067295f, -0.174608f, -0.445784f, -0.158366f, -0.567275f, -0.557652f,
- -0.353503f, -0.302092f, -0.302049f, -0.551793f, -0.034535f, -0.225190f,
- -0.210733f, -0.219377f, -0.057197f, -0.430933f, -0.025185f, -0.388150f,
- -0.086147f, -0.430088f, 0.058466f, -0.152129f, -0.058411f, -0.236392f,
- -0.547669f, -0.613849f, -0.893774f, -0.351715f, -0.399227f, -0.454909f,
- -0.324501f, 0.000490f, -0.282167f, -0.073163f, -0.281452f, 0.047932f,
- -0.175500f, 0.165220f, -0.276212f, 0.062153f, -0.217054f, -0.255487f,
- -0.146416f, -0.097718f, -0.173809f, -0.559328f, -0.055695f, -0.391193f,
- -0.132020f, -0.561184f, -0.308666f, -0.474053f, -0.219149f, -0.246558f,
- -0.158325f, 0.151907f, -0.266835f, -0.144697f, -0.193960f, -0.046587f,
- -0.220028f, -0.247355f, 0.135584f, 0.016511f, 0.367705f, -1.855877f,
- 0.435622f, 0.444710f, -3.372301f, -3.030489f, 1.013267f, 0.380951f,
- -0.170011f, -0.111415f, -0.456146f, -0.107254f, -0.095220f, -0.053078f,
- -0.135864f, -0.591949f, -0.252810f, -0.324799f, -0.094796f, -0.260969f,
- -0.391981f, -0.063170f, -0.336130f, -0.470127f, -0.405168f, -0.433219f,
- -0.309563f, -0.295462f, -0.552270f, -0.012300f, -0.057793f, -0.034494f,
- -0.446843f, -0.640160f, -1.188681f, -0.791361f, 0.543271f, 1.189112f,
- 1.458468f, -0.005876f, -0.927475f, 0.062038f, -1.170818f, 0.338227f,
- -3.007096f, -4.559296f, -4.045457f, -5.953635f, -0.228386f, -0.266890f,
- -0.092595f, -0.377440f, -0.044534f, -0.053565f, -0.349268f, -0.415030f,
- -0.310094f, 0.062721f, 0.251422f, -0.014350f, -1.282910f, 1.619560f,
- 1.180566f, -0.032163f, -1.322951f, -0.603601f, 1.443710f, 0.654650f,
- -0.393227f, 0.003536f, 0.029725f, -0.108925f, -0.053911f, 0.133977f,
- -0.036145f, -0.168438f, 0.046989f, -0.331463f, -0.176983f, -0.311922f,
- -0.272389f, -0.379592f, -0.399993f, -0.297873f, -0.193425f, -0.177524f,
- -0.258309f, -0.567312f, -0.260217f, -0.241869f, 0.024010f, -0.032867f,
- -0.039424f, -0.063670f, 0.193808f, -0.303514f, -0.013376f, -0.057761f,
- 0.187922f, 0.006938f, 0.031810f, 0.180594f, -1.198427f, 2.820662f,
- 0.154986f, -0.375518f, 0.116925f, -0.795782f, -0.085139f, -0.079365f,
- -0.197936f, -0.321468f, -0.205271f, -0.558203f, -0.296235f, -0.151193f,
- -0.158282f, -0.245402f, -0.208504f, -0.042335f, -0.087426f, -0.557129f,
- -0.381427f, -0.441551f, -0.541011f, -0.060567f, -0.469305f, -0.032326f,
- -2.453587f, -0.045568f, -0.296932f, 0.613061f, -0.320284f, 0.191620f,
- -0.827145f, -0.225277f, 0.275800f, 1.696635f,
-};
-
-static const float av1_ab_partition_nn_bias_32_layer0[64] = {
- -0.176206f, 0.660189f, -0.186156f, -2.481963f, -1.564218f, -0.280424f,
- 0.732684f, -0.135581f, -2.193132f, -0.172771f, 0.605001f, -0.060392f,
- -0.067190f, -0.132969f, -1.410812f, -0.298701f, -0.105963f, -0.086173f,
- 0.632779f, 0.005585f, 1.310169f, 1.392136f, -0.563860f, -0.051053f,
- 0.660998f, -0.214726f, -1.894342f, -0.128288f, -0.330721f, -0.053988f,
- -0.177726f, 1.200859f, -0.178902f, -0.172620f, -0.184476f, -0.175559f,
- 0.538503f, -0.322158f, -0.219080f, -0.058208f, -0.171347f, -0.216060f,
- -0.174950f, -0.295740f, -0.184820f, -0.213896f, 1.317728f, -0.020116f,
- -0.208096f, 0.000000f, 1.246166f, -0.225421f, -0.181555f, 0.861761f,
- 1.172429f, -0.172892f, -0.737092f, -0.189904f, -0.179385f, -0.114618f,
- -1.384604f, -0.201713f, -0.271948f, 0.372351f,
-};
-
-static const float av1_ab_partition_nn_weights_32_layer1[64 * 16] = {
- -0.037828f, 1.529029f, 0.004927f, 1.475763f, 0.627172f, 0.325872f,
- -0.990757f, 0.129476f, 0.889958f, -0.082031f, 0.332133f, 0.074422f,
- -0.176212f, -0.074355f, 0.774378f, 0.110987f, -0.155469f, 0.253310f,
- 0.882538f, 0.253605f, 0.332436f, -5.389474f, 0.278470f, 0.168644f,
- 0.914611f, 0.154165f, 0.809262f, -0.174734f, 0.923673f, 0.064716f,
- -0.070228f, -0.228735f, 0.002312f, 0.112222f, -0.045502f, -0.046004f,
- 0.514101f, 0.306480f, 0.021232f, -0.015955f, -0.288260f, 0.189177f,
- -0.104158f, 0.103273f, 0.096910f, -0.086328f, 1.327289f, -0.154247f,
- 0.056676f, -0.243327f, -0.646676f, 0.177221f, -0.086761f, 0.729729f,
- -14.710893f, -0.044881f, 0.339003f, -0.134737f, 0.073621f, -0.162913f,
- 1.215237f, 0.140723f, 0.138630f, 1.241719f, 0.204092f, -0.463080f,
- -0.176086f, 1.125868f, 1.034814f, 0.225455f, -0.203421f, -0.078787f,
- -0.527498f, 0.012491f, -0.563307f, -0.170792f, 0.002679f, 0.116153f,
- 0.211348f, -0.191900f, -0.212505f, 0.263445f, -0.074679f, -0.081441f,
- -0.815405f, 2.448215f, 0.781299f, 0.149542f, -1.045162f, 0.043014f,
- 0.217381f, -0.094500f, -0.090427f, 0.025784f, -0.228906f, -2.741798f,
- 0.230475f, -0.256112f, -0.103297f, 0.159121f, -0.229793f, -0.014883f,
- -0.104131f, -0.123816f, 0.164148f, -0.052279f, -0.071845f, -0.041197f,
- 0.208527f, -0.234197f, -0.542336f, 0.020053f, 0.088870f, 0.014346f,
- 2.502164f, -0.010244f, -0.267792f, 0.844394f, 2.711486f, -0.015262f,
- -0.868053f, -0.295704f, 0.222289f, -0.000286f, -0.352098f, -0.079000f,
- 0.021267f, -0.721739f, -0.240558f, -0.384775f, 0.065974f, -2.161058f,
- 0.195889f, 0.268966f, -0.009329f, 0.014949f, 0.314943f, 0.235885f,
- 0.072591f, -0.127120f, 0.150784f, 0.105697f, -1.297403f, -0.207509f,
- -0.217688f, -0.076752f, 0.170952f, -0.294235f, 0.449973f, -1.712690f,
- 0.860989f, 0.054757f, -0.812627f, -0.105316f, -0.736230f, -0.133192f,
- -3.741608f, 0.495660f, -0.288936f, 4.654852f, -0.021305f, -0.308916f,
- 0.049205f, -0.259996f, 0.114248f, -0.252647f, -0.253180f, -0.449314f,
- 0.022979f, 0.063281f, -0.196154f, 0.078295f, -0.322317f, -0.145142f,
- 0.300573f, 0.048385f, -0.254787f, 0.123939f, -1.263088f, -0.228565f,
- -0.389061f, 0.391084f, 2.322438f, 0.075009f, 0.225743f, -0.198808f,
- -0.280538f, -0.173939f, -0.120543f, -0.070792f, -0.417187f, -0.781056f,
- -0.102756f, -1.760965f, 0.019149f, -0.867342f, 0.347141f, 0.031588f,
- 0.302572f, -0.203573f, -0.357320f, -0.096078f, -0.527528f, 0.046699f,
- -0.108561f, -0.167077f, -2.851509f, -0.307116f, 0.202720f, -0.160280f,
- -0.215525f, 0.064355f, -0.427220f, 1.516230f, 0.634453f, 0.099400f,
- -1.013887f, -0.029740f, -0.093426f, -0.044272f, -1.297636f, -0.237614f,
- -0.160953f, 0.399036f, -0.030685f, -0.113619f, -0.184704f, 0.040519f,
- -0.588252f, -0.210235f, -0.067623f, -0.031841f, -0.107261f, -0.192582f,
- -0.253959f, -0.430821f, -0.103184f, -0.280185f, -0.357723f, 0.197761f,
- -0.175087f, -0.055171f, 1.642014f, -0.192559f, -0.288147f, 0.610311f,
- 4.688195f, -0.128728f, -0.914869f, -0.108286f, 0.013789f, 0.092125f,
- 0.019770f, -0.178386f, 0.074164f, -1.152658f, -0.216738f, -0.277286f,
- 0.012381f, 0.418259f, -0.680727f, -0.221917f, -0.485946f, 0.101672f,
- 2.009457f, 0.054302f, 1.019838f, -0.116170f, 0.165134f, -0.112567f,
- 0.852632f, -0.385796f, -0.108666f, 0.053181f, -0.311797f, -0.372875f,
- -0.675717f, 2.409268f, -0.514720f, -0.214245f, -0.646596f, 0.009756f,
- 0.203993f, 0.093617f, -0.301290f, 0.253551f, -0.128909f, -1.448442f,
- -0.186823f, -0.278001f, -0.294993f, -0.176928f, -0.473605f, 0.062049f,
- -0.212084f, -0.137326f, 0.012505f, 0.087850f, -0.200413f, -0.394119f,
- -0.132224f, 0.146917f, 0.155746f, 0.198725f, -0.322541f, 0.196391f,
- -0.945500f, 0.036736f, -0.155646f, -0.677341f, 1.130545f, -0.339554f,
- 0.411628f, -0.355813f, -0.249843f, 0.213694f, -2.035607f, 0.055694f,
- -0.111669f, 0.408696f, -0.067043f, -0.048182f, 0.398110f, -0.067542f,
- 1.459801f, 0.236833f, -0.178806f, 0.168758f, 0.492387f, 0.099691f,
- -0.776680f, -0.172865f, 0.204225f, 0.193982f, 0.575685f, -0.062248f,
- 0.011486f, 0.058571f, -0.493391f, 0.026893f, -0.900467f, 3.793129f,
- -0.634613f, -0.064660f, -0.048262f, 0.361905f, 0.033641f, 0.245171f,
- -0.064671f, 0.034954f, 0.204358f, -0.904023f, -0.052714f, -0.250134f,
- 0.136700f, 0.000734f, -0.371720f, 0.226483f, 0.217958f, 0.060559f,
- 0.180111f, 0.000970f, 0.079556f, -0.096775f, 0.093855f, -0.026224f,
- -0.243664f, 0.004290f, 0.123281f, -0.239476f, 1.230374f, -0.107826f,
- -0.101982f, -0.153917f, 5.464427f, 0.304375f, -0.809957f, 0.090564f,
- -0.278416f, -0.245555f, -2.078421f, 0.243093f, -0.127666f, 0.052451f,
- -0.126662f, -0.783505f, 0.025149f, -1.422675f, -0.207769f, -0.362547f,
- 0.115310f, 0.133390f, 1.264754f, -0.027055f, -0.485312f, -0.240717f,
- -0.239722f, 0.146818f, -1.265043f, -0.235553f, 0.267104f, -0.021357f,
- -0.435949f, -0.309371f, 0.049920f, 1.302721f, -0.233978f, -0.097551f,
- -0.240631f, -0.287821f, -0.378380f, -0.273131f, -3.075169f, 0.226404f,
- -0.029361f, 2.703590f, -0.430659f, 0.067927f, -0.387520f, -0.370630f,
- -0.229236f, 0.085653f, -0.370956f, -0.065556f, -0.187859f, 0.068309f,
- -0.109299f, -0.259898f, -0.103644f, -0.271199f, -0.209350f, 0.140993f,
- -0.196713f, -0.135508f, -1.423209f, -0.406385f, -0.019956f, -0.864694f,
- 5.963707f, -0.201157f, 0.726377f, -0.011076f, 0.010553f, -0.102918f,
- -2.230088f, -0.258098f, -0.039547f, -0.029262f, -0.082324f, -0.860222f,
- -0.094735f, -1.381839f, 0.587298f, -0.173048f, 0.721360f, 0.241900f,
- 0.764302f, -0.023609f, -1.173755f, 0.103912f, -0.185363f, 0.078435f,
- -2.245062f, -0.127269f, 0.202234f, 0.158975f, -0.260909f, 0.098608f,
- -0.348247f, 1.732502f, -0.412298f, -0.269602f, -0.425771f, -0.146243f,
- -0.530730f, 0.125716f, -1.004419f, 0.145109f, -0.059289f, 1.096304f,
- 0.012891f, 0.045033f, -0.306875f, 0.003514f, -0.176110f, 0.037544f,
- -0.441537f, -0.518921f, -0.262149f, -0.060407f, -0.379419f, -0.141245f,
- -0.128894f, -0.176537f, -1.161318f, -0.249100f, -0.118330f, 0.042816f,
- 1.173404f, 0.088312f, -0.393568f, -0.175134f, 6.529819f, -0.326652f,
- -0.631917f, -0.393476f, 0.057781f, -0.217748f, -1.781139f, -0.012614f,
- -0.212621f, -0.720322f, -0.218498f, -0.388556f, -0.254796f, -0.248399f,
- -0.608744f, -0.265146f, 0.238517f, 0.066882f, -2.916806f, 0.054642f,
- 0.282590f, 0.075248f, 0.010188f, -0.133486f, 0.985945f, -0.045849f,
- -0.347564f, 0.057320f, -0.417920f, 0.063664f, 0.387062f, -2.692059f,
- -0.535549f, 0.263736f, 0.327889f, -0.070273f, -0.775254f, 0.147250f,
- 3.309425f, -0.212191f, -0.067204f, -2.912663f, -0.061496f, 0.084233f,
- 0.022907f, 0.138421f, -0.112159f, -0.288447f, -0.010799f, 0.056049f,
- -0.036527f, 0.021525f, 0.106649f, -0.291883f, 0.088424f, -0.057773f,
- -0.086031f, 0.015277f, -0.318505f, -0.269049f, -1.008913f, -0.224785f,
- -0.025820f, -0.649037f, 0.706381f, 0.096410f, 0.643776f, -0.046743f,
- -0.009654f, -0.024246f, 1.469255f, -0.183536f, -0.370046f, -0.048442f,
- -0.376527f, -0.431264f, -0.245109f, -0.093951f, 0.203683f, -0.099872f,
- 0.087210f, 0.160692f, -3.527694f, -0.068891f, -0.228994f, -0.231817f,
- -0.241949f, 0.193613f, 0.979597f, -0.091259f, 0.414424f, -0.047341f,
- -0.209582f, -0.295134f, -0.016824f, 0.460327f, -0.072671f, 0.246234f,
- 0.235896f, 0.127238f, -1.068683f, 0.035648f, 2.254888f, 0.180105f,
- -0.260098f, -2.322120f, -0.184249f, -0.314801f, -0.099969f, -0.272117f,
- -0.237916f, 0.031103f, -0.274063f, -0.049384f, -0.044917f, 0.102477f,
- -0.342148f, -0.257558f, -0.346300f, 0.115333f, -0.115456f, 0.208354f,
- -0.359301f, -0.167395f, 1.146514f, -0.177861f, -0.098658f, -0.444570f,
- 6.759993f, -0.369772f, -0.831118f, 0.001866f, -0.073298f, -0.072095f,
- 0.811902f, -0.431997f, -0.286587f, -0.269500f, 0.111492f, -0.525364f,
- -0.351785f, -2.463474f, -1.852659f, 0.135325f, 0.138267f, 0.100643f,
- -2.373278f, -0.285514f, -0.395388f, -0.185016f, -0.030249f, -0.005767f,
- -0.716424f, -0.031674f, 0.011147f, 0.057405f, -0.215873f, -0.094401f,
- 0.573528f, -1.223820f, 0.414852f, -0.059053f, -0.076488f, -0.287168f,
- -0.842640f, 0.174084f, -0.567186f, 0.336629f, -0.062514f, 2.075448f,
- -0.061680f, -0.131529f, -0.098994f, -0.204111f, -0.347865f, 0.108516f,
- -0.049616f, -0.069212f, -0.273935f, -0.096545f, -0.210784f, -0.284698f,
- 0.141501f, -0.176924f, -0.361341f, -0.251197f, -0.286694f, 0.245569f,
- -1.521661f, -0.122639f, -0.015760f, -0.718912f, 5.877828f, 0.146916f,
- 0.151767f, 0.220785f, -0.032298f, 0.230902f, 0.663943f, -0.252613f,
- 0.057718f, -0.436038f, -0.323994f, -1.139787f, -0.042489f, -1.326298f,
- -1.031206f, -0.104136f, 0.389897f, 0.127602f, -2.667789f, -0.212366f,
- -0.506262f, -0.009115f, -0.213202f, 0.076167f, -1.629405f, 0.055129f,
- 0.375393f, -0.150272f, -0.241515f, -0.326497f, 0.100069f, 0.410703f,
- 0.340622f, 0.042437f, -0.349945f, 0.041176f, -1.178950f, 0.030992f,
- 0.933908f, -0.035844f, -0.098660f, 1.030584f, -0.092043f, -0.355739f,
- -0.305562f, 0.036161f, -0.049558f, -0.033225f, -0.403856f, -0.088276f,
- 0.215493f, -0.149105f, -0.013363f, 0.025886f, -0.101306f, -0.205781f,
- -1.072487f, -0.076019f, 0.077555f, 0.131003f, 1.267763f, -0.008954f,
- -0.327617f, -0.246539f, 6.664081f, -0.404403f, -1.442489f, 0.191301f,
- -0.336361f, 0.181156f, 0.833108f, 0.007879f, -0.194464f, -1.029408f,
- -0.036268f, -0.927110f, -0.379190f, -0.293443f, -1.848579f, -0.242548f,
- -0.065990f, 0.203160f, -0.291788f, 0.000680f, 0.587011f, -0.241289f,
- 0.037034f, 0.000552f, 1.072308f, -0.387230f, -0.230050f, 0.292322f,
- -0.720001f, 0.034109f, -0.467260f, 2.211644f, -1.839191f, -0.048797f,
- -0.083469f, -0.334686f, -0.269056f, 0.051295f, 1.319904f, -0.035603f,
- -0.018457f, -0.824915f, -0.212285f, -0.230516f, -0.035093f, -0.400843f,
- -0.305469f, -0.099011f, 0.014225f, -0.452772f, 0.170331f, -0.389312f,
- -0.115084f, -0.014770f, -0.429387f, -0.155961f, -0.568200f, -0.037853f,
- -0.125137f, 0.067228f, -1.329271f, -0.117874f, -0.132499f, -0.218376f,
- -0.588325f, -0.320024f, 0.085695f, -0.235047f, -0.217790f, 0.103015f,
- -0.698644f, 0.017766f, -0.058299f, 0.199411f, -0.122485f, -0.563949f,
- -0.349011f, -0.557045f, -0.131165f, 0.002281f, 0.118559f, -0.210302f,
- -1.153815f, 0.116738f, -0.236007f, -0.003487f, -0.006885f, -0.244816f,
- 0.953222f, 0.093748f, 0.266869f, 0.241869f, -0.860832f, -0.387012f,
- -0.338986f, 2.097515f, -1.942512f, -0.298021f, 0.543911f, -0.043214f,
- 0.082125f, -0.120242f, 0.712231f, 0.213327f, -0.301687f, -0.544011f,
- -0.392131f, 0.004302f, 0.004825f, -0.317440f, -0.107518f, -0.293407f,
- -0.159111f, -0.080367f, 0.132663f, -0.017726f, -0.237521f, -0.190297f,
- -0.361633f, 0.200518f, -0.538296f, -0.027975f, -0.381704f, -0.016963f,
- 0.630105f, -0.190997f, -0.287840f, -0.603488f, 3.605598f, -0.276614f,
- -1.346383f, 0.186912f, -0.047575f, -0.189232f, -1.519072f, 0.097816f,
- -0.223722f, 0.304924f, -0.213022f, -1.052433f, -0.322283f, -1.706734f,
- -2.458027f, 0.237976f, 0.171050f, -0.103139f, -0.278689f, 0.329824f,
- -0.262448f, -0.122916f, -0.236398f, -0.013848f, -0.969160f, -0.374907f,
- 0.091018f, -0.386471f, -0.723940f, 0.064956f, -0.057652f, 1.321024f,
- -1.397418f, -0.143136f, 0.272468f, -0.030749f, 0.037324f, 0.069316f,
- -0.904925f, -0.333693f, -0.117709f, 2.279598f, -0.428065f, -0.131157f,
- -0.014288f, -0.402862f, -0.666090f, 0.017070f, -0.028333f, 0.002481f,
- 0.197156f, -0.038120f, -0.271062f, -0.188275f, -0.021370f, -0.070849f,
- -0.905007f, -0.095886f, -0.093055f, -0.121821f, -1.239812f, -0.411799f,
- -0.089948f, -0.936827f, 1.437569f, -0.388908f, 0.126170f, 0.186162f,
- -0.018819f, -0.138364f, -1.066412f, -0.138222f, -0.022186f, 0.107331f,
- -0.230436f, -1.352605f, -0.161323f, -1.081810f, -0.933825f, -0.136675f,
- 0.378157f, 0.113377f, -0.850610f, 0.080245f, -0.087305f, -0.002852f,
- 0.044408f, -0.188172f, -1.891998f, 0.092189f, 0.125325f, -0.105090f,
- -0.848510f, -0.396308f, -0.384130f, 2.007509f, -1.480787f, -0.126946f,
- 0.314767f, 0.000195f, -0.285628f, -0.110442f, -0.293948f, 0.258559f,
- -0.417603f, 1.570705f, 0.092459f, -0.340974f, -0.284754f, -0.007801f,
- -0.324610f, -0.004734f, -0.207716f, -0.057175f, 0.055467f, -0.210830f,
- -0.113005f, -0.299177f, 0.068074f, 0.017929f, -2.897598f, -0.260074f,
- -0.014422f, -0.206467f, 1.246997f, -0.372863f, -0.214160f, -0.114035f,
- 5.805862f, 0.003611f, -1.340990f, -0.021085f, -0.260431f, -0.002720f,
- -1.251640f, -0.353531f, -0.304009f, -0.153376f,
-};
-
-static const float av1_ab_partition_nn_bias_32_layer1[LABEL_SIZE] = {
- -0.521497f, -1.061572f, -0.078756f, -0.660662f, -0.403741f, -0.960163f,
- 0.001427f, 0.523607f, 0.225068f, -0.055273f, 1.019519f, 1.181880f,
- -0.010198f, 0.130597f, 1.276752f, 2.028188f,
-};
-
-static const NN_CONFIG av1_ab_partition_nnconfig_32 = {
- FEATURE_SIZE, // num_inputs
- LABEL_SIZE, // num_outputs
- 1, // num_hidden_layers
- {
- 64, // num_hidden_nodes
- },
- {
- av1_ab_partition_nn_weights_32_layer0,
- av1_ab_partition_nn_weights_32_layer1,
- },
- {
- av1_ab_partition_nn_bias_32_layer0,
- av1_ab_partition_nn_bias_32_layer1,
- },
-};
-
-// nn model for ab partition pruning, 16x16.
-static const float av1_ab_partition_nn_weights_16_layer0[FEATURE_SIZE * 64] = {
- 0.151902f, 0.007947f, -1.788454f, 0.431869f, -2.971387f, 0.923566f,
- 1.632542f, -1.665136f, -0.338632f, -5.075884f, 0.398267f, 0.030467f,
- 2.263534f, -0.045532f, -1.066128f, 0.915139f, -0.560500f, -3.293125f,
- 2.072793f, -1.011414f, 0.122716f, -0.060169f, -0.388860f, 0.031019f,
- -0.381861f, 0.001551f, -0.328472f, 0.038296f, -0.060398f, -0.375556f,
- 0.209226f, 0.014764f, -1.443469f, -0.345486f, 2.409269f, 1.524846f,
- -0.640666f, 1.322139f, -2.074771f, -0.580944f, -0.203960f, -0.072893f,
- 0.329701f, 0.115339f, -1.339542f, 0.249024f, -0.421545f, -0.409151f,
- -0.258293f, 0.836288f, -0.073685f, -0.009624f, 0.895712f, 0.320639f,
- 0.451002f, -1.544558f, 0.193709f, -1.389012f, 1.305451f, 0.089795f,
- 0.050338f, -0.017433f, -0.304667f, 0.500729f, 0.504346f, 0.073757f,
- 0.582649f, -0.993623f, 1.766766f, -3.067265f, -0.415774f, -0.006036f,
- -1.245281f, 0.253205f, -0.591245f, -0.626238f, 0.551852f, 0.593755f,
- 0.491023f, 1.099384f, -0.348448f, 0.054564f, -0.451422f, -0.375781f,
- -0.248390f, -0.052548f, -0.380069f, -0.165391f, -0.297968f, -0.052142f,
- -0.316381f, -0.045246f, -0.243905f, -0.034169f, -0.247523f, -0.180773f,
- 0.068066f, -0.374920f, 0.057536f, -0.189748f, 0.058375f, -0.267749f,
- -0.147286f, -0.246153f, 0.006183f, -0.202029f, -0.059128f, 0.116852f,
- 0.134719f, -0.126900f, -0.064646f, -0.196458f, -0.182331f, 0.108029f,
- -0.264499f, 0.155816f, -0.107255f, -0.056983f, -0.209771f, -0.099070f,
- 0.007313f, -0.254124f, -0.231964f, -0.275972f, 0.032098f, -0.264564f,
- -0.208743f, 0.155599f, -0.121511f, -0.156145f, -0.162315f, -0.059788f,
- -0.257073f, -0.076654f, -0.110616f, -0.321675f, -0.051952f, 0.006301f,
- -0.154114f, 0.017032f, -0.017364f, -0.233247f, 0.009918f, -0.179289f,
- -0.190722f, 0.147106f, -0.063910f, -0.396872f, -0.263123f, -0.003850f,
- -0.040718f, -0.324699f, 0.118660f, -0.170727f, -0.316788f, 0.100886f,
- -0.202842f, 0.045371f, 0.150561f, -0.057054f, -0.308150f, 0.028346f,
- -0.381473f, -0.195365f, 0.026221f, -0.281795f, 0.087204f, 0.047689f,
- -0.027643f, -0.104724f, -0.089030f, -0.117661f, -0.349160f, 0.056982f,
- -0.340273f, 0.048086f, 0.046103f, -0.121527f, 0.021697f, 0.054109f,
- -0.002768f, -0.008461f, -2.297240f, 0.124651f, 3.621661f, -0.057120f,
- -1.151656f, 2.296894f, -3.678720f, -0.290240f, 0.087683f, -0.186389f,
- 0.007656f, -0.090236f, -0.245217f, 0.110389f, -0.251719f, -0.029084f,
- -0.128203f, -0.100005f, -0.032779f, 0.007281f, -0.366596f, -0.267870f,
- -0.215620f, 0.047687f, 0.010303f, 0.097980f, -0.191569f, -0.341162f,
- 0.119249f, 0.026279f, -2.161546f, 0.459591f, 1.290566f, 1.791797f,
- -0.409835f, 0.127081f, -1.156367f, 0.198286f, 0.099561f, -0.067445f,
- -0.034352f, 0.017966f, -0.277380f, -0.057220f, -0.174198f, -0.014164f,
- 0.146090f, -0.357530f, 0.097644f, -0.000932f, 0.446603f, -0.066793f,
- 2.448620f, 0.937617f, -1.232922f, 0.313183f, 0.816827f, -0.275115f,
- -0.245205f, -0.126895f, 0.156668f, -0.186977f, -0.273505f, 0.013315f,
- 0.168629f, -0.089084f, 0.006166f, -0.116107f, -0.199316f, -0.024010f,
- -0.242303f, 0.011612f, -0.218485f, -0.229661f, -0.123922f, 0.136699f,
- 0.006732f, -0.148718f, -0.164225f, 0.116063f, 1.587898f, 0.690519f,
- 0.360566f, 0.009739f, -0.678702f, -0.046003f, 0.126984f, 0.605212f,
- 1.240663f, -0.000228f, -1.119369f, -0.415589f, -0.721003f, 0.097936f,
- -1.410586f, -2.358833f, -2.773129f, -3.983361f, -0.087144f, -0.050029f,
- -0.242255f, 0.137424f, -0.307490f, -0.084637f, -0.023812f, -0.196582f,
- -0.078695f, 0.038257f, -0.012110f, -0.263521f, 0.009839f, -0.109125f,
- -0.226036f, 0.060712f, 0.093671f, 0.153143f, 0.039116f, -0.290891f,
- 0.227057f, -0.204633f, -0.207539f, -0.148242f, 0.046204f, -0.231268f,
- -0.209315f, -0.307579f, -0.436556f, 0.023475f, 0.131793f, -0.038301f,
- 1.650584f, 0.392570f, 1.446576f, 1.254380f, -0.516867f, -0.057116f,
- 0.149320f, 0.414424f, -0.246309f, 0.003877f, -0.480238f, -1.037035f,
- -0.830779f, -1.122244f, -0.408267f, -0.253956f, 0.382005f, 0.940609f,
- -1.113370f, -0.018554f, 0.141064f, -0.182504f, 1.270707f, 0.414904f,
- -0.216036f, 0.203831f, 0.450716f, -0.452909f, 0.139358f, -0.027143f,
- 1.956892f, 1.643732f, -0.867839f, -0.620520f, -0.334607f, -0.519982f,
- 0.205023f, 0.661159f, -0.000809f, 0.049033f, -0.348579f, -0.200338f,
- -0.362144f, -0.346590f, -0.230096f, 0.180746f, -0.149954f, -0.253429f,
- -0.378170f, -0.040724f, -0.041597f, 0.243659f, -0.472181f, 0.015401f,
- -0.180376f, 0.153139f, -0.247738f, -0.010485f, -0.157158f, 0.016825f,
- -0.238925f, -0.265798f, -0.318374f, 0.142352f, -0.210520f, 0.051928f,
- -0.352190f, -0.179052f, -0.185498f, 0.025540f, -0.111667f, -0.235187f,
- -0.215454f, 0.010931f, -0.238372f, -0.126659f, 0.075691f, -0.091167f,
- -2.462379f, -0.007950f, -0.637990f, 0.285554f, -0.051275f, 0.282279f,
- -0.744083f, -0.570646f, 0.592198f, 1.421332f, -0.256027f, -0.140315f,
- 0.160247f, -0.063185f, -0.055895f, -0.199864f, -0.287353f, -0.074561f,
- -0.071228f, 0.055864f, -1.084764f, -0.263409f, 0.779266f, 0.228187f,
- 0.375013f, 0.121204f, -0.656948f, 0.533561f, 0.272671f, -0.015423f,
- -0.124180f, -0.009127f, 2.934838f, -0.150998f, 1.163152f, 0.081997f,
- -4.715939f, -3.676595f, -1.524886f, -0.167593f, 0.281186f, 0.024046f,
- -1.451709f, 0.332558f, 0.990504f, 0.376290f, -1.466773f, -0.448439f,
- -2.929108f, -4.255188f, 0.065238f, 0.019950f, 1.372393f, 0.444052f,
- -2.538772f, 1.579767f, -0.464911f, -1.866114f, 1.053958f, 0.434467f,
- -0.125964f, 0.034671f, 0.077116f, -0.138466f, -0.413395f, -0.223453f,
- -0.172127f, -0.251265f, -0.048239f, -0.395519f, 0.023141f, 0.037459f,
- -0.249593f, -0.062215f, -0.047209f, -0.435189f, -0.164155f, -0.077590f,
- -0.241164f, -0.126128f, -0.038243f, -0.180888f, 0.198840f, -0.328036f,
- -0.169790f, 0.036506f, 0.052572f, -0.183570f, -0.073617f, -0.244959f,
- 0.266498f, 0.032846f, -1.902106f, 0.486078f, 2.414993f, 0.975182f,
- -0.382875f, 1.647810f, -2.197017f, -0.890107f, 0.221287f, 0.010889f,
- 3.817042f, 0.572728f, 0.092466f, 0.473337f, -1.634659f, -1.069455f,
- 1.486776f, -1.023850f, 0.088184f, 0.008842f, 0.518202f, 0.270259f,
- 1.757191f, -0.121839f, -2.912229f, -1.250866f, -2.381808f, 0.335309f,
- -0.120079f, -0.061294f, -0.058725f, -0.315169f, -0.262443f, 0.072434f,
- -0.267836f, -0.319354f, -0.274975f, 0.068970f, -0.406467f, 0.044074f,
- -0.152311f, -0.333656f, -0.228355f, -0.185613f, 0.017346f, -0.177674f,
- -0.090675f, -0.102047f, -0.011768f, -0.025280f, -0.271661f, 0.098099f,
- -0.312272f, -0.222217f, -0.100548f, 0.106260f, -0.034655f, 0.135109f,
- -0.021276f, 0.018177f, -0.353097f, -0.011128f, 0.061136f, -0.511662f,
- -0.223236f, -0.308841f, 0.118789f, -0.154628f, -0.053178f, -0.055973f,
- 0.013175f, -0.368337f, -0.090863f, -0.116920f, 0.178990f, -0.025278f,
- -0.190553f, -0.238092f, 0.303943f, -0.024944f, 0.719373f, 0.384332f,
- -0.378480f, -0.423316f, 0.709922f, 0.758514f, -1.559023f, -2.503173f,
- 0.068652f, -0.234741f, -0.182932f, 0.037878f, 0.020684f, -0.174142f,
- -0.182300f, -0.052796f, -0.219145f, 0.113028f, -1.041826f, 0.035317f,
- 0.919904f, -0.676011f, 0.652297f, 1.456447f, -0.166904f, -0.861823f,
- 0.895827f, 0.429821f, -0.180376f, -0.076587f, -0.273945f, -0.288990f,
- -0.206692f, -0.080745f, -0.085444f, 0.186953f, -0.050135f, 0.044243f,
- -0.391706f, -0.160498f, -0.292268f, 0.164060f, 0.412649f, 0.211611f,
- -0.327294f, -0.919399f, 0.320297f, 0.385284f, -0.088848f, -0.072556f,
- -0.384813f, -0.176267f, -0.065918f, 0.134724f, -0.231104f, -0.337707f,
- -0.195442f, -0.263569f, 0.098090f, -0.341411f, -0.189211f, -0.439276f,
- -0.404046f, 0.262491f, -0.311093f, -0.086454f, -0.013400f, -0.061447f,
- -0.026945f, -0.112036f, -0.322985f, 0.078500f, -0.230205f, -0.344535f,
- -0.021087f, 0.110220f, -0.128671f, 0.044219f,
-};
-
-static const float av1_ab_partition_nn_bias_16_layer0[64] = {
- 2.936406f, -0.396539f, -0.110456f, -1.254954f, 0.785350f, 0.516290f,
- -0.172341f, 0.254386f, -0.192465f, -0.106751f, -0.055518f, -0.094994f,
- 0.000000f, -0.065018f, -0.004908f, -0.130483f, -0.119580f, -0.142072f,
- 0.457446f, -0.125051f, -0.107712f, 0.714607f, -0.140809f, -1.788650f,
- -0.087199f, 0.000000f, -1.290050f, 0.443930f, -0.110634f, -0.109380f,
- -0.188213f, -1.414179f, 1.193579f, 0.388775f, -0.873193f, -0.110050f,
- -0.072565f, -0.117050f, -0.119132f, 0.456959f, -0.132069f, 0.131974f,
- 1.160474f, 1.746465f, 0.442628f, -0.188849f, -0.207794f, -0.108364f,
- -0.856655f, -2.141620f, 0.335476f, -0.105508f, -0.212162f, -0.109319f,
- -0.237213f, -0.109980f, -0.291044f, -0.137877f, 0.470191f, -0.023908f,
- 0.123809f, -0.109797f, 0.200510f, -0.147542f,
-};
-
-static const float av1_ab_partition_nn_weights_16_layer1[64 * LABEL_SIZE] = {
- -6.823716f, 1.406568f, -0.144009f, 2.228765f, 0.838336f, 0.738107f,
- -0.319014f, -0.148756f, 0.240862f, -0.111089f, -0.004241f, 0.025758f,
- -0.193820f, -0.246362f, -0.181363f, -0.201556f, 0.024268f, 0.252994f,
- -0.289443f, 0.194932f, 0.057467f, 0.724735f, 0.014063f, 1.361352f,
- 0.025191f, 0.024274f, 0.231462f, -7.227959f, -0.094515f, 0.039946f,
- 0.412719f, 0.812318f, 3.038903f, -0.286289f, 0.647482f, -0.115114f,
- 0.053590f, 0.066069f, 0.153134f, 0.996250f, -0.125700f, 0.951365f,
- -6.243494f, -4.827697f, 0.566320f, 0.239515f, -0.099702f, 0.054546f,
- 1.847330f, 3.680076f, -3.049829f, -0.127709f, 0.068469f, -0.017794f,
- 0.223864f, -0.106778f, -0.020425f, -0.040226f, -0.251890f, -0.168673f,
- -0.552073f, 0.043311f, 0.218668f, 0.033209f, -3.199210f, 0.193079f,
- 0.321406f, 0.718307f, -0.181418f, -0.459612f, -1.981170f, 0.968496f,
- -0.029757f, -0.130065f, 0.043782f, 0.072394f, -0.088686f, 0.025322f,
- 0.129882f, 0.101324f, 0.335707f, 0.072714f, -2.079774f, 0.203997f,
- 0.239321f, -0.301757f, 0.257845f, 1.288382f, -0.031275f, -0.234194f,
- 0.310722f, 2.045469f, 0.034716f, 0.135638f, -0.251388f, 0.320071f,
- -1.065301f, -0.322731f, -0.545028f, 0.226276f, 0.090799f, 0.019289f,
- 0.048950f, -1.079300f, 0.231938f, 0.083683f, 4.762127f, 0.145037f,
- -0.145549f, 0.075592f, 0.172336f, 0.108175f, 0.333751f, 1.090501f,
- 1.056114f, 0.047073f, 0.182052f, -0.081587f, 0.089900f, 0.339286f,
- 2.049988f, 0.073585f, 0.537355f, -0.243322f, -0.010179f, -0.052601f,
- -0.174915f, 0.117793f, 2.222990f, -2.520837f, -0.092699f, 1.199887f,
- 0.138720f, 0.679918f, -0.463155f, -0.659496f, -0.109913f, -0.003398f,
- 0.114633f, -0.128377f, 0.092970f, -0.107489f, -0.191078f, 0.185182f,
- 0.216980f, -0.019343f, 3.443133f, 0.287953f, 0.099314f, 0.985958f,
- 0.157268f, -0.606516f, 0.049418f, -0.221809f, -0.453081f, -0.344796f,
- -0.003735f, -0.107269f, -0.128541f, -0.259543f, -0.934806f, -0.542456f,
- -1.011192f, 0.022795f, 0.186363f, -0.076356f, -0.050932f, -0.165098f,
- 0.168177f, -0.101596f, -5.270886f, 2.553943f, -0.440870f, -0.017494f,
- 0.215208f, -0.017032f, 1.495915f, -4.304677f, 0.762211f, 0.182937f,
- 0.254406f, -0.029433f, -0.088364f, -0.110160f, -0.108257f, -0.036538f,
- 0.737697f, -0.234989f, 0.168095f, 0.245118f, -0.077262f, 0.195718f,
- 0.753302f, -1.637869f, 0.126227f, 0.982129f, -0.121444f, -0.295570f,
- -1.215799f, 0.147867f, -0.068496f, 0.132726f, -0.005772f, -0.181774f,
- 0.126513f, 0.204723f, -0.366123f, 0.103906f, -0.148053f, -0.075272f,
- 0.243884f, -0.104828f, 0.198988f, 0.501034f, -0.112671f, 0.111421f,
- 0.167508f, -0.117803f, -0.738624f, 2.046292f, 0.124011f, 0.057983f,
- -0.359154f, -0.648883f, -0.259462f, -0.459041f, -2.501223f, -0.065138f,
- 0.122417f, 0.060291f, -0.129033f, -0.843086f, 0.268241f, -0.399927f,
- 1.585888f, 1.816393f, -0.631427f, 0.127826f, 0.088105f, 0.073488f,
- 0.717694f, -1.497362f, 2.608528f, 0.066896f, -0.079230f, 0.223436f,
- -0.010530f, 0.175310f, 1.120365f, 0.034391f, 0.835312f, 0.071652f,
- -0.080615f, 0.111395f, 0.162742f, 0.079927f, -3.859582f, -0.638431f,
- -0.167880f, -0.992659f, -0.885355f, -1.276197f, 1.334344f, 0.931940f,
- -0.078244f, -0.149030f, -0.070974f, -0.133566f, 0.200034f, 0.102793f,
- -0.048546f, 0.063545f, 0.023864f, -0.190863f, 1.934257f, -0.136286f,
- -0.107916f, -0.637468f, 0.066449f, 1.089693f, -0.214047f, -0.265780f,
- 0.899660f, -0.130333f, 0.288311f, -0.049024f, 0.090202f, 0.487969f,
- 0.339704f, 0.858479f, 0.841253f, -0.184100f, -0.637070f, -0.125071f,
- -0.077650f, -0.087877f, 0.202268f, -0.027300f, 2.842862f, -0.100698f,
- -0.259080f, 0.260556f, 0.157912f, -0.070364f, 0.467190f, 1.200037f,
- 1.419317f, -0.033588f, -0.227824f, 0.292617f, 0.228574f, 0.213839f,
- -1.091099f, -0.022258f, -1.294681f, 0.136118f, 0.081652f, -0.185359f,
- -0.039706f, 0.191407f, -2.053219f, -0.261934f, 0.047812f, -0.029536f,
- -0.823869f, -1.090534f, -0.755890f, 0.441035f, -0.167945f, 0.231441f,
- -0.135013f, -0.260762f, 0.256872f, 0.130339f, -0.243751f, 0.189760f,
- -0.288454f, 0.145363f, 0.338490f, 0.403898f, -0.022814f, -1.263598f,
- -0.101315f, 0.860135f, 0.136511f, 0.028942f, 0.574047f, 2.656370f,
- 0.037587f, -0.188690f, -0.125312f, 1.100435f, -1.080402f, 0.380905f,
- 0.004635f, 0.097144f, -0.214309f, 0.085552f, -0.285066f, -0.705134f,
- -0.054704f, -0.319951f, 5.486626f, 0.958158f, -1.380585f, 0.223340f,
- -0.169167f, -0.170697f, -0.216748f, 0.324232f, 2.684204f, -0.008490f,
- -0.211052f, -0.201190f, 0.123466f, -0.000234f, 0.579907f, 0.096938f,
- -0.042745f, 0.201855f, 0.157195f, -0.261440f, 0.029699f, -0.046599f,
- 1.618216f, -2.596280f, -0.377420f, -0.526725f, -0.493592f, -0.579615f,
- 0.579699f, -0.100392f, 0.150694f, 0.061794f, 0.200425f, -0.062515f,
- -0.179122f, 0.250112f, -0.344675f, -0.118359f, -0.095670f, 0.152311f,
- 3.662276f, -0.154921f, -0.312991f, 0.972008f, -0.308596f, -0.190426f,
- 0.133889f, -0.238673f, -0.094726f, 1.683835f, -0.215629f, -0.198890f,
- -0.035278f, -0.367973f, -0.822435f, 0.240848f, -0.194656f, 0.034655f,
- -0.079424f, 0.146670f, 0.026646f, -0.034507f, 0.059467f, -0.153109f,
- -0.431033f, 2.552991f, -1.894091f, -0.180462f, -0.306839f, -0.025648f,
- 1.026326f, -3.096230f, 1.346935f, 0.033633f, -0.181827f, 0.094376f,
- 0.001696f, -0.379264f, -1.069503f, -0.140972f, -0.208769f, -0.195239f,
- 0.281795f, -0.127251f, 0.180776f, 0.067763f, 0.697124f, -1.040779f,
- 0.111280f, 0.188351f, -0.340234f, -0.207790f, -0.720075f, -0.137409f,
- -0.070310f, -0.032918f, -0.060787f, 0.131484f, -0.077845f, -0.258652f,
- 0.056911f, -0.062034f, 0.007663f, -0.185100f, 1.340361f, 0.014096f,
- -0.124602f, 0.194241f, 0.128383f, 0.360465f, 0.082979f, -0.050475f,
- -0.519294f, 3.323262f, 0.067014f, 0.221203f, -0.085082f, -0.228606f,
- -0.916668f, -0.022643f, -1.386737f, -0.131902f, -0.349952f, -0.032874f,
- -0.189190f, -0.898790f, -0.102394f, -1.017387f, 2.214050f, 1.790253f,
- -1.913561f, -0.043716f, -0.214924f, -0.194598f, -0.064723f, -1.671793f,
- 2.251166f, -0.146007f, 0.138527f, -0.003134f, 0.103665f, 0.006928f,
- -0.240253f, -0.227464f, 0.578437f, -0.214724f, 0.503085f, 0.158093f,
- 0.033091f, 0.008061f, 4.815371f, 2.132264f, 0.281850f, -2.288560f,
- -0.145012f, 1.296832f, -0.362401f, -0.403252f, 0.109873f, 0.185746f,
- 0.244764f, 0.172367f, -0.185588f, 0.139801f, -0.178254f, 0.068629f,
- 0.358488f, -0.153969f, -6.433524f, 0.225983f, -0.138123f, -0.095971f,
- -0.036089f, -1.400083f, 0.265908f, 0.257787f, 0.181144f, -1.647228f,
- -0.136289f, -0.074206f, 0.122988f, -0.088895f, -1.266717f, 0.006010f,
- 0.536681f, 0.263061f, -0.032207f, -0.155136f, 0.086431f, 0.441950f,
- -0.060755f, -0.280683f, -0.783475f, -2.567033f, 1.093221f, 0.117667f,
- -0.000408f, 0.225719f, -2.199698f, 0.141447f, -1.459051f, 0.051315f,
- 0.203228f, 0.354432f, -0.005775f, -0.028073f, -0.965817f, 0.231083f,
- -0.666884f, 0.026283f, -0.317486f, 0.210754f, 0.123897f, 0.223827f,
- 4.214405f, 1.457334f, -0.253945f, -1.306733f, -0.391235f, 0.451154f,
- -1.553888f, -0.353429f, 0.069533f, 0.159278f, -0.173836f, -0.004952f,
- -0.137033f, 0.127012f, 0.143600f, 0.051587f, -0.070549f, 0.066509f,
- -5.776547f, 0.180021f, -0.189183f, -1.288504f, -0.233575f, -1.473873f,
- 0.140940f, 0.144451f, -0.104534f, 2.089873f, -0.168168f, 0.110726f,
- 0.132134f, -0.215223f, -1.682754f, 0.157757f, -0.146163f, 0.064882f,
- 0.117313f, -0.038780f, -0.124720f, -0.501697f, 0.092047f, -0.233992f,
- 3.324976f, 0.516601f, 1.294202f, 0.119989f, 0.061055f, 0.043420f,
- -2.750727f, -0.382812f, -0.648496f, -0.115353f, -0.334205f, 0.024354f,
- -0.282998f, -0.282705f, 0.073798f, 0.169851f, 0.135651f, 0.182677f,
- -0.040220f, 0.132462f, -0.303120f, -0.230113f, 6.165739f, -0.258596f,
- 0.024127f, -1.388283f, -0.006042f, 0.572600f, 0.348411f, -0.387376f,
- -0.075845f, 0.122319f, -0.029616f, 0.077873f, 0.154763f, 0.049073f,
- 0.018597f, 0.102688f, -0.204165f, 0.020734f, -1.389133f, -0.032854f,
- -0.147561f, 0.853944f, 0.132100f, -3.259659f, 0.243745f, 0.181529f,
- -0.738414f, 1.509994f, 0.023470f, -0.005329f, 0.066115f, -1.345081f,
- -1.455402f, -0.172023f, -0.194625f, 0.071885f, -0.201742f, -0.262402f,
- 0.077601f, -0.048938f, 0.257993f, -0.504029f, -2.032415f, 1.158880f,
- 0.448647f, -0.025633f, 0.117586f, -0.072275f, -0.673744f, -3.854342f,
- -0.983843f, 0.047766f, -0.017193f, -0.215775f, -0.158743f, -0.232042f,
- -0.509112f, 0.148812f, 0.130122f, 0.006486f, -0.099016f, 0.022514f,
- -0.486850f, -0.059623f, 4.012731f, 0.025454f, 0.029059f, -0.783546f,
- -0.295260f, 0.322521f, -0.473201f, -0.172100f, -0.100087f, -0.076516f,
- -0.258367f, -0.112897f, 0.269364f, -0.065912f, 0.169022f, -0.178783f,
- -0.095114f, 0.122089f, -2.790099f, -0.100431f, -0.087963f, -0.009431f,
- -0.087819f, -2.774399f, -0.100757f, 0.013005f, -0.964533f, 3.236665f,
- -0.354903f, -0.144169f, -0.166869f, -1.396513f, -0.931271f, -0.046261f,
- -1.799262f, -0.365269f, 0.108611f, 0.037994f, 0.024747f, -1.073639f,
- -0.203158f, -0.935006f, 1.880891f, 1.578385f, 0.726272f, -0.024546f,
- -0.011626f, -0.151363f, -1.121716f, -1.787484f, 0.232806f, 0.075451f,
- 0.182899f, 0.092215f, -0.207347f, -0.030111f, 0.054316f, 0.192481f,
- 0.594639f, -0.247694f, 0.547471f, -0.032094f, -0.065000f, 0.007198f,
- 1.605377f, -0.155945f, -0.066200f, -2.343716f, -1.016283f, -0.079321f,
- 0.919365f, 0.599980f, 0.125545f, 0.265813f, 0.246884f, 0.095385f,
- -0.260374f, -0.202916f, -0.042770f, 0.234967f, -0.233139f, -0.326994f,
- -1.375256f, 0.121766f, 0.077433f, -1.103569f, 0.019497f, -1.029185f,
- 0.253905f, 0.206569f, 0.187334f, -0.237089f, -0.294351f, 0.164137f,
- 0.149696f, -0.749787f, -0.413433f, 0.976587f, 1.027976f, -0.285264f,
- 0.209273f, -0.124762f, 0.050884f, 0.250764f, -0.082031f, -0.646520f,
- 4.116680f, 0.437336f, 0.671684f, 0.129509f, -0.078462f, 0.014072f,
- -0.678232f, 0.094831f, 1.125624f, 0.207070f, -0.154750f, -0.025780f,
- -0.103030f, 0.118019f, -0.908186f, -0.263546f, -1.555324f, -0.236887f,
- -0.217854f, -0.051790f, 0.017915f, 0.171001f, 1.355562f, 0.094603f,
- -0.233929f, -1.282169f, -0.773183f, -0.161682f, -0.834565f, -0.286776f,
- -0.298901f, 0.038162f, 0.251899f, 0.039612f, -0.022935f, -0.232308f,
- -0.043855f, -0.192892f, -0.279009f, -0.182234f, -1.272808f, -0.070344f,
- -0.092432f, -1.915946f, -0.134373f, -1.405496f, -0.067071f, -0.131922f,
- 0.185269f, 1.465082f, 0.040240f, 0.112665f, 0.144329f, -0.286112f,
- -0.617649f, 0.916177f, 0.221044f, -0.079867f, 0.170251f, -0.093638f,
- -0.212620f, -0.305945f, -0.234356f, -0.482501f, 3.928472f, 1.241179f,
- 0.355922f, -0.170848f, -0.189168f, 0.080225f, -1.357793f, 0.190890f,
- 0.976800f, -0.068070f, -0.016295f, -0.088623f, -0.129560f, -0.212267f,
- -0.071537f, -0.219501f, -0.655198f, -0.225188f, -0.116024f, 0.224174f,
- -0.049715f, -0.178005f, 3.029985f, -1.141546f, 0.080066f, -1.932316f,
- -0.641137f, -0.189564f, 0.935080f, 0.136119f, 0.015558f, -0.179331f,
- 0.204571f, 0.020350f, 0.009362f, 0.108478f, 0.037076f, -0.049009f,
- 0.081090f, -0.180202f, 1.455561f, -0.081559f, 0.059361f, 0.484971f,
- 0.160923f, -2.170744f, -0.013204f, 0.126561f, -0.407122f, 1.223661f,
- 0.044262f, 0.118044f, 0.058274f, -1.747100f, -0.171318f, 0.971374f,
- 0.306995f, -0.103268f, -0.319443f, -0.333176f, -0.038608f, 0.119674f,
- -0.106479f, -0.907933f, 1.121231f, 1.673840f, -0.421458f, -0.021146f,
- -0.254838f, 0.097632f, 0.235109f, -2.901782f, 0.289518f, -0.355459f,
- -0.068264f, -0.179121f, 0.068560f, -0.047570f, -0.522523f, -0.228963f,
- -1.037158f, -0.163723f, 0.280563f, -0.000868f, -0.197220f, -0.239329f,
- 1.985274f, -0.256181f, -0.064341f, -0.822417f, -0.465140f, -0.010942f,
- -0.792024f, -0.114290f, 0.060969f, 0.104106f, -0.252123f, -0.150400f,
- -0.133277f, 0.267147f, 0.274413f, 0.223744f, -0.180223f, -0.345415f,
- -0.104883f, 0.119210f, -0.095041f, -0.301635f, 0.013175f, -2.128121f,
- -0.147208f, -0.151509f, -0.692013f, 3.418555f, -0.016541f, 0.171511f,
- 0.107159f, -1.516672f, 0.127408f, 0.687035f, -0.906486f, -0.145463f,
- -0.169382f, -0.143906f, 0.125091f, -0.960645f, -0.180869f, -0.716908f,
- 2.840951f, 1.904919f, -0.416268f, -0.425181f, -0.194697f, -0.075932f,
- -0.950604f, -1.599800f, 0.943671f, -0.022744f, -0.270492f, 0.080843f,
- -0.372916f, 0.047838f, -0.100300f, -0.026600f, 0.011733f, -0.226051f,
- 0.172790f, -0.172982f, 0.041258f, -0.299379f,
-};
-
-static const float av1_ab_partition_nn_bias_16_layer1[LABEL_SIZE] = {
- -0.053805f, -1.248639f, 0.520965f, -0.904962f, -0.126425f, -0.118798f,
- 0.748430f, 0.203096f, 0.059317f, 0.418219f, 0.841294f, 0.402693f,
- -0.658522f, 0.723479f, 0.544264f, 1.035225f,
-};
-
-static const NN_CONFIG av1_ab_partition_nnconfig_16 = {
- FEATURE_SIZE, // num_inputs
- LABEL_SIZE, // num_outputs
- 1, // num_hidden_layers
- {
- 64, // num_hidden_nodes
- },
- {
- av1_ab_partition_nn_weights_16_layer0,
- av1_ab_partition_nn_weights_16_layer1,
- },
- {
- av1_ab_partition_nn_bias_16_layer0,
- av1_ab_partition_nn_bias_16_layer1,
- },
-};
-
-#undef FEATURE_SIZE
-#undef LABEL_SIZE
-
-#define FEATURE_SIZE 18
-#define LABEL_SIZE 4
-
-static const float av1_4_partition_nn_weights_16_layer0[FEATURE_SIZE * 24] = {
- -2.032866f, 0.056691f, 0.495960f, 0.778785f, 0.548153f, -0.806942f,
- 0.481155f, 0.282298f, 0.584980f, 0.504688f, 0.209648f, 0.234616f,
- 0.213484f, 0.221969f, 0.205862f, 0.235054f, 0.317863f, 0.257139f,
- 0.529478f, 0.098122f, -0.657532f, 0.036296f, 0.327728f, 1.323180f,
- -0.813082f, 0.160216f, -0.702030f, 0.722733f, -0.270576f, -0.347416f,
- -0.264700f, -0.254248f, 0.159820f, 0.087995f, -0.184163f, 0.117357f,
- 0.074194f, -0.667369f, 0.498246f, 0.420506f, 0.072409f, -0.121581f,
- 0.315788f, 0.000525f, 0.414986f, 0.678166f, -0.011230f, 0.188131f,
- -0.227749f, 0.009564f, 0.108672f, 0.106923f, -0.080695f, -0.279382f,
- -0.061339f, -0.297835f, -0.134707f, 0.145865f, -0.009655f, -0.000842f,
- -0.047436f, -0.159149f, -0.320353f, -0.089646f, -0.344765f, 0.313416f,
- -0.143413f, 0.279668f, 0.000885f, -0.022380f, -0.140194f, -0.310473f,
- 0.252699f, 0.066204f, 0.477568f, 0.994609f, -0.276000f, 1.213182f,
- 0.277028f, -0.411570f, -0.211559f, 0.377815f, 0.121488f, -0.100559f,
- -0.317082f, -0.251039f, -0.335181f, -0.154114f, -0.052726f, -0.332558f,
- -0.143196f, -0.334035f, 0.162305f, 0.142279f, -0.001210f, -0.135252f,
- -0.033562f, 0.204307f, -0.039757f, -0.394174f, 0.126617f, -0.128648f,
- -0.410979f, 0.107641f, -0.117573f, -0.326512f, 0.235166f, 0.084959f,
- 0.290063f, -0.005838f, 0.459894f, 1.023709f, -0.196145f, 1.100137f,
- -0.319815f, -0.308526f, -0.443389f, -0.272769f, -0.035259f, -0.026932f,
- -0.029743f, 0.125113f, -0.131024f, -0.321458f, -0.143996f, 0.008714f,
- -0.101234f, 0.079706f, -1.128615f, -0.467381f, 0.220563f, -0.409900f,
- -0.435353f, 0.759499f, -0.465799f, -0.394309f, 0.176282f, -0.086275f,
- -0.161225f, -0.354814f, 0.562871f, 0.418253f, 0.414361f, 0.445480f,
- -0.995903f, -0.086632f, -0.230645f, 0.354656f, -0.317576f, 0.079926f,
- 0.424369f, 0.997232f, -0.304388f, 1.071667f, -0.023540f, 0.029677f,
- 0.108564f, 0.183581f, -0.201395f, -0.054854f, -0.193039f, -0.049899f,
- -0.271949f, -0.358483f, 0.304930f, 0.023823f, -0.009319f, -0.214247f,
- 0.100712f, -0.050162f, 0.327103f, -0.212999f, -0.030496f, 0.316380f,
- -0.439589f, -0.249959f, 0.229777f, -0.353664f, -0.384559f, 0.114236f,
- 0.023119f, 0.007927f, 0.618368f, 0.957759f, -0.019780f, -1.002389f,
- 0.564277f, -0.839531f, 1.040445f, 0.054340f, 0.031908f, -0.032893f,
- -0.019170f, -0.042011f, 0.568928f, 0.362567f, -0.559999f, -0.605344f,
- -0.586146f, -0.290778f, 0.195943f, -0.109580f, -0.088898f, -0.113054f,
- 0.293282f, 0.429019f, 0.306136f, 0.863025f, 0.021234f, 0.125770f,
- -0.097108f, -0.072659f, -0.137053f, -0.191631f, 0.106281f, 0.064151f,
- 0.029883f, 0.076287f, 0.757543f, 0.276713f, -2.529775f, -0.351727f,
- -1.832316f, 0.544780f, -0.944529f, 0.509705f, -0.010236f, -0.016181f,
- 0.021520f, 0.086417f, 0.041312f, 0.296853f, -0.372378f, 0.354446f,
- -1.366762f, 0.048875f, 0.464918f, -0.007450f, 0.750013f, -0.360261f,
- 0.518532f, 0.753776f, 0.641448f, 0.710746f, 0.250866f, 0.257063f,
- 0.283421f, 0.253585f, 0.170303f, 0.210426f, 0.208842f, 0.158000f,
- -0.033144f, 0.130748f, 0.907147f, 0.409248f, -0.854301f, -0.981307f,
- 0.294427f, -0.507137f, 1.079967f, 0.203203f, 0.383890f, 0.368278f,
- 0.305122f, 0.449288f, -0.044507f, -0.547263f, -0.298245f, -0.497834f,
- 0.007016f, -0.101982f, -0.073488f, -0.096111f, -0.479418f, -0.045497f,
- 0.033502f, -0.018578f, -0.231531f, 0.177949f, 0.099564f, -0.010233f,
- -0.333055f, -0.078586f, -0.417867f, 0.171271f, 0.013662f, -0.143599f,
- -0.117296f, 0.135382f, 0.048321f, 0.000924f, -0.055024f, -0.405595f,
- -0.068260f, -0.271011f, -0.436425f, 0.206751f, -0.899890f, 0.605510f,
- 0.535649f, -0.238919f, -0.037619f, -0.213734f, -0.391360f, -0.132344f,
- 0.004660f, 0.176644f, -1.008475f, -0.038895f, 0.155429f, -0.095229f,
- -0.680124f, -0.258063f, -0.261901f, 0.110380f, -0.337649f, -0.505870f,
- -1.428536f, 0.610629f, 0.254905f, 0.045098f, 0.044109f, 0.172329f,
- 0.060001f, -0.234009f, -0.184855f, -0.153028f, -0.140897f, -0.152006f,
- -0.312134f, 0.081261f, 0.160166f, 0.112690f, 0.266081f, 0.030175f,
- -0.242746f, 0.000754f, -0.341811f, -0.149774f, -0.017484f, -0.301342f,
- -0.121466f, 0.067300f, 0.342176f, 0.474538f, 0.085441f, -0.263935f,
- 0.479235f, -0.003713f, -0.784840f, 0.119480f, 0.456632f, -0.640082f,
- -0.080575f, -0.744403f, 0.259970f, 0.034667f, -0.274641f, -0.257594f,
- -1.121124f, -0.003745f, -0.420693f, 0.300441f, -0.100976f, -1.049016f,
- 0.201960f, 0.113054f, 0.187010f, 1.237427f, 0.054803f, -0.028673f,
- 0.003596f, -0.034724f, 0.117246f, 0.190977f, 0.278915f, 0.224307f,
- 0.017852f, -0.336233f, -0.372311f, -0.182284f, -0.143510f, 0.331466f,
- 0.045698f, -0.301095f, 0.184447f, 0.348240f, -0.017021f, -0.145064f,
- -0.000221f, -0.382256f, -0.302683f, -0.083927f, -0.008070f, 0.217907f,
- 0.647597f, -0.050490f, -0.572736f, -0.985748f, -0.289943f, 0.041391f,
- -0.795464f, -0.186680f, -0.354062f, -0.617400f, -0.282783f, -0.170450f,
- -0.197197f, -0.146496f, -0.173692f, -0.106277f, -0.071004f, -0.124405f,
- -0.971412f, 0.038542f, 0.705204f, 0.887113f, 0.150430f, -0.243676f,
- 0.638410f, 0.320953f, 0.776676f, 0.527584f, 0.070389f, 0.051554f,
- 0.177519f, 0.140451f, 0.128892f, 0.087771f, 0.197660f, 0.194764f,
-};
-
-static const float av1_4_partition_nn_bias_16_layer0[24] = {
- 0.614063f, -0.384872f, 0.084884f, -0.023980f, -0.378765f, -0.082312f,
- -0.458271f, 0.189578f, -0.046169f, -0.073308f, -0.372322f, 0.162793f,
- 0.148803f, 0.829214f, -0.221162f, -0.111157f, -0.017484f, -0.280596f,
- -0.031905f, -0.143459f, 0.078823f, -0.021940f, 0.026834f, 0.257472f,
-};
-
-static const float av1_4_partition_nn_weights_16_layer1[24 * LABEL_SIZE] = {
- -0.985391f, 0.587616f, 0.740683f, 0.192066f, 0.447080f, -0.016585f,
- 0.680449f, 0.028983f, 0.643111f, 0.234338f, 0.107148f, 0.328456f,
- -0.216394f, 1.106838f, -0.179062f, -0.129108f, -0.121655f, -0.151340f,
- -0.306017f, -0.350989f, 0.859284f, -0.372831f, -0.954419f, 0.250495f,
- 1.046732f, 0.287923f, -0.421088f, 0.326613f, -0.314396f, -0.084757f,
- -0.474228f, 0.687999f, 0.052334f, 0.441708f, -0.630698f, -0.350348f,
- -0.602067f, -0.434161f, -0.489824f, -0.313193f, 0.315568f, 0.603119f,
- 0.120245f, 0.182920f, -1.117797f, -0.239594f, -0.296296f, -0.718093f,
- 0.489497f, -0.527019f, 0.102453f, 0.426731f, 0.034606f, 0.311461f,
- -0.012723f, -0.229877f, -0.284290f, 0.383227f, 0.065696f, -0.222400f,
- 1.279248f, -0.862190f, 0.629766f, -0.250011f, -0.325060f, -0.360115f,
- -0.159540f, -0.291856f, -0.038348f, 0.224639f, 0.600934f, 0.030205f,
- 1.337615f, -0.286409f, -0.473710f, -0.418995f, -1.035249f, 0.004359f,
- -0.481860f, 0.563625f, -0.154709f, -0.101198f, -0.758796f, -0.507616f,
- -0.095253f, -0.711135f, 0.207759f, 0.076313f, -0.056087f, -0.162719f,
- -0.232918f, -0.128402f, -0.444620f, -0.447344f, 1.126012f, -1.504446f,
-};
-
-static const float av1_4_partition_nn_bias_16_layer1[LABEL_SIZE] = {
- -0.462133f,
- 0.465060f,
- 0.062211f,
- 0.401786f,
-};
-
-static const NN_CONFIG av1_4_partition_nnconfig_16 = {
- FEATURE_SIZE, // num_inputs
- LABEL_SIZE, // num_outputs
- 1, // num_hidden_layers
- {
- 24, // num_hidden_nodes
- },
- {
- av1_4_partition_nn_weights_16_layer0,
- av1_4_partition_nn_weights_16_layer1,
- },
- {
- av1_4_partition_nn_bias_16_layer0,
- av1_4_partition_nn_bias_16_layer1,
- },
-};
-
-static const float av1_4_partition_nn_weights_32_layer0[FEATURE_SIZE * 32] = {
- -0.219494f, -0.428273f, 0.471006f, 0.448210f, -0.152935f, 0.440435f,
- 0.922857f, -0.074436f, 1.002195f, 0.414176f, -0.327202f, -0.380066f,
- -0.212346f, 0.061868f, -0.056620f, 0.594134f, 0.617995f, 0.308358f,
- 0.232484f, 0.129849f, 1.483593f, -0.071460f, 1.984515f, 1.116422f,
- -1.141762f, -0.306220f, 0.089075f, -0.271845f, 0.187524f, 0.050396f,
- -0.061025f, 0.030809f, 0.172799f, -0.458151f, -0.318357f, 0.122052f,
- -0.414329f, 0.089366f, 0.118898f, -0.376213f, -0.206151f, -0.519946f,
- -0.463252f, -0.206694f, -0.254383f, -0.379487f, 0.093059f, -0.245280f,
- -0.205044f, -0.280060f, -0.171229f, -0.045389f, -0.179481f, -0.306245f,
- -0.500856f, 0.003388f, -0.527397f, -0.449330f, -0.174272f, 0.123769f,
- 0.023005f, 0.157273f, 0.073400f, 0.019099f, -0.113848f, -0.098601f,
- -0.290946f, -0.046770f, -0.314592f, -0.179914f, -0.391411f, -0.235631f,
- -1.282604f, 0.048505f, -0.746382f, 0.093740f, -0.706583f, -0.085729f,
- 0.947382f, -0.002961f, 1.175362f, 1.007309f, 0.141638f, -0.037608f,
- -0.118807f, -0.021474f, -0.146763f, 0.069363f, -0.074372f, -0.215713f,
- -0.004134f, -0.114110f, -0.330438f, -0.031136f, 0.111821f, -0.534598f,
- -0.357759f, -0.455950f, 0.139469f, 0.036582f, -0.384743f, -0.168828f,
- -0.239250f, 0.003520f, -0.049003f, 0.075702f, -0.025809f, -0.225972f,
- -0.228905f, -0.412489f, 0.060570f, -0.328819f, -0.206446f, -0.080231f,
- -0.372008f, -0.218118f, -0.011954f, 0.024155f, 0.156014f, 0.020679f,
- 0.194398f, -0.283491f, -0.024463f, -0.275099f, 0.028031f, 0.026340f,
- -0.254668f, 0.103637f, 2.178693f, 0.552284f, 0.109366f, -0.474806f,
- -0.379286f, -0.026315f, 2.487924f, -0.089466f, 0.206428f, 0.114578f,
- 0.152248f, 0.184050f, -0.631948f, -0.014793f, -0.283782f, -0.830353f,
- 0.009343f, -0.021029f, -0.060534f, -0.025164f, 1.841311f, 1.842748f,
- -1.979708f, 0.450985f, -1.606357f, -0.785454f, -0.212679f, -0.344342f,
- 0.198991f, -0.258070f, 0.055974f, 0.224069f, 0.453051f, 0.408053f,
- 0.027873f, -0.180538f, 0.056609f, 0.207654f, 0.104086f, -0.194426f,
- -0.359789f, -0.381143f, -0.331212f, -0.203973f, -0.324313f, -0.160825f,
- -0.160439f, -0.044856f, -0.346647f, 0.044859f, 0.231398f, -0.023643f,
- -0.140316f, -0.260177f, 0.206965f, -0.425386f, -0.420268f, -0.409748f,
- 0.006971f, 0.066186f, -0.034950f, -0.345518f, 0.018633f, -0.122489f,
- -0.038506f, -0.330942f, 0.161236f, -0.314119f, -0.050202f, -0.179597f,
- 0.731897f, -0.184481f, 0.153598f, -0.539501f, -0.301493f, -0.184967f,
- -0.883754f, -0.586959f, -0.136292f, -1.772065f, -0.196276f, -0.053272f,
- -0.101083f, -0.064142f, 0.161190f, 0.430826f, 0.355647f, 0.138266f,
- 0.051114f, -0.028893f, -0.477673f, -0.238663f, -0.354117f, -0.056747f,
- -0.334273f, -0.497688f, -0.486004f, -0.092033f, -0.241304f, -0.373250f,
- 0.120193f, 0.011360f, -0.010475f, -0.092739f, -0.159650f, -0.033129f,
- -0.259893f, -0.073217f, 0.200128f, 0.103407f, -0.229233f, 0.128831f,
- -0.063450f, -0.241732f, -0.408428f, -0.342239f, -0.264326f, -0.105403f,
- -0.442879f, -0.310456f, -0.112881f, 0.263696f, -0.205014f, -0.497936f,
- -0.261734f, -0.382312f, -0.426807f, -0.021995f, -0.152794f, -0.301494f,
- 0.117232f, -0.577809f, 0.154596f, -0.409522f, -0.413113f, -0.359199f,
- 0.307294f, -0.008746f, -0.310522f, 0.347620f, -0.384845f, -0.451398f,
- -0.226199f, 0.054154f, -0.167608f, 0.046836f, -0.013285f, -0.408119f,
- -0.177973f, -0.248293f, -0.465830f, 0.035827f, -0.222208f, -0.221717f,
- 0.066392f, -0.349769f, -0.428029f, -0.516692f, 0.022398f, -0.251682f,
- 0.134746f, 0.011167f, -2.078787f, 0.173592f, -1.948348f, 0.330060f,
- 1.993785f, -0.052859f, -0.004795f, -3.703177f, 0.013450f, -0.011687f,
- 0.073079f, 0.034803f, 0.025515f, 0.005994f, 0.101731f, 0.074303f,
- -0.109962f, -0.270825f, -0.068273f, -0.163268f, -0.252826f, 0.137190f,
- 0.007667f, -0.358453f, 0.027412f, 0.033492f, 0.021197f, -0.049991f,
- 0.104468f, -0.012157f, -0.056252f, -0.380756f, -0.338483f, 0.233235f,
- -0.048631f, -0.441209f, -0.158482f, -0.148108f, -0.263453f, 0.138847f,
- -0.304073f, -0.336312f, -0.017941f, -0.135563f, 0.075137f, -0.246475f,
- -0.229144f, -0.087744f, -0.346909f, 0.172611f, 0.004377f, -0.009386f,
- -0.023104f, 0.008000f, -0.029390f, -0.317842f, 0.549674f, -0.195337f,
- -0.863979f, 0.160889f, -0.269014f, -0.442104f, -1.799191f, 1.396533f,
- -0.112837f, 0.881303f, 0.000764f, -0.035415f, -0.141877f, 0.184831f,
- -0.363566f, -0.178569f, 0.254134f, -0.326893f, 0.127325f, 0.310620f,
- -0.384621f, 0.146058f, -0.287682f, -0.373447f, 0.026930f, 0.251650f,
- 0.053817f, 0.227509f, 0.121396f, 0.396514f, -0.278381f, -0.038969f,
- -1.538756f, -0.002856f, -0.892900f, 0.363426f, -1.257922f, 0.743795f,
- 0.941177f, 0.219345f, 0.684189f, 1.396858f, 0.026299f, -0.093433f,
- -0.066182f, 0.057868f, -0.089278f, -0.159680f, -0.262035f, -0.236656f,
- 0.005349f, -0.031314f, 0.027917f, -0.182113f, -0.212086f, -0.160774f,
- 0.051468f, 0.036787f, 0.183881f, -0.288205f, -0.349691f, 0.162511f,
- 0.117878f, -0.294534f, -0.365037f, -0.246313f, 0.073977f, -0.072378f,
- -0.173579f, -0.584560f, 0.547194f, 0.259853f, -0.405287f, -0.421146f,
- 0.165788f, -0.146964f, 0.257415f, 0.772394f, -0.475302f, -0.310906f,
- 0.058723f, 0.276833f, 0.586842f, 0.248998f, -0.061135f, 0.255779f,
- 0.152158f, -0.024781f, 2.821834f, 1.365141f, 0.914744f, 0.165752f,
- -1.048304f, -0.333891f, 1.804087f, -0.437028f, -0.120211f, -0.020443f,
- 0.040077f, 0.258600f, -0.598893f, -0.494579f, -0.281054f, -0.517041f,
- 0.005258f, 0.053986f, 0.322755f, 0.429495f, -1.992364f, -0.717192f,
- -1.774802f, 2.047362f, -0.016194f, 0.312606f, 0.019331f, 0.060950f,
- 0.116428f, 0.168458f, -0.307001f, -0.420734f, 0.475843f, 0.425346f,
- -0.107119f, 0.049892f, -1.168619f, 0.010878f, 0.354872f, 0.902717f,
- -0.391407f, 0.332772f, -1.335037f, -0.447100f, 0.481719f, -0.101069f,
- -1.806565f, 0.925280f, 0.346999f, 0.093809f, 0.006275f, 0.270814f,
- -0.691123f, 0.230748f, 0.137033f, 0.068228f, 1.555975f, -0.271637f,
- -0.370403f, 0.236131f, 0.367464f, -0.136562f, 0.428838f, 0.181750f,
- 0.338762f, 0.292449f, -0.748204f, -0.922731f, -0.959445f, -0.806418f,
- -0.140501f, 0.070525f, 1.248748f, 0.637990f, -1.307246f, -0.514055f,
- 0.393858f, -1.858727f, 0.713591f, -0.141044f, 0.080723f, 0.120220f,
- -0.031175f, 0.224488f, 0.753818f, -0.833351f, -1.099132f, 0.651100f,
- -0.135061f, -0.043820f, 0.026983f, -0.059259f, 0.001345f, -0.281775f,
- 0.006958f, 0.046103f, -0.246539f, 0.057630f, -0.360778f, -0.160681f,
- -0.414870f, -0.301979f, 0.000683f, 0.132957f, -0.477609f, 0.106110f,
- -0.637769f, -0.078374f, -0.229494f, 0.583108f, -0.822973f, -0.107540f,
- 1.063426f, -0.268346f, 1.105787f, 2.587550f, -0.020314f, -0.002161f,
- -0.063836f, -0.099990f, -0.103975f, -0.114078f, -0.094199f, -0.065181f,
- -0.019870f, -0.018920f, -0.219732f, 0.035608f, -1.789450f, 0.483032f,
- -0.464729f, 1.563277f, -1.054195f, 0.359991f, 0.065204f, 0.135623f,
- 0.158380f, -0.103815f, -1.398726f, -1.436666f, -0.356311f, 0.507752f,
-};
-
-static const float av1_4_partition_nn_bias_32_layer0[32] = {
- 0.421645f, -0.620548f, -0.187819f, -0.189414f, -0.204975f, -0.189600f,
- -0.174917f, -0.651928f, -0.799655f, -0.086105f, -0.163449f, -0.089212f,
- -0.214495f, -0.108500f, -0.065777f, -0.127704f, 1.544948f, -0.032831f,
- -0.165621f, 0.145844f, -0.032104f, -0.453246f, -0.113444f, 0.321589f,
- -0.862375f, -0.108826f, -0.486259f, 0.685325f, 0.072569f, -0.187961f,
- 0.109579f, -0.082685f,
-};
-
-static const float av1_4_partition_nn_weights_32_layer1[32 * LABEL_SIZE] = {
- 0.255012f, 0.658860f, 0.216907f, 0.165947f, 0.241182f, 0.340854f,
- 0.409445f, 0.165220f, 0.553373f, -0.242385f, -0.209571f, 0.255515f,
- 0.222500f, 0.037032f, 0.238590f, 0.061624f, -2.038693f, 0.264167f,
- -0.230144f, 0.129952f, -0.027979f, 0.847761f, 0.438922f, 0.462323f,
- 0.555345f, 0.030689f, 0.336357f, -0.357326f, -0.113137f, 0.272631f,
- 0.421022f, 0.367776f, -0.197094f, 0.157117f, -0.015008f, -0.056123f,
- -0.283913f, 0.186417f, 0.178561f, -0.763041f, 0.602038f, 0.341092f,
- 0.320453f, -0.312776f, -0.371240f, -0.356279f, 0.220117f, -0.131871f,
- 1.517429f, 0.162223f, -0.255069f, 0.451861f, 0.045071f, -0.223257f,
- 0.003257f, 0.015734f, -0.630447f, -0.672588f, 0.670164f, 0.571031f,
- -0.657948f, 0.034506f, -0.249076f, 0.790293f, 0.066491f, -0.131245f,
- 0.355173f, 0.564622f, 0.374048f, 0.033974f, 0.253970f, 0.495498f,
- -0.556321f, -0.104651f, 0.276947f, 0.057148f, -0.039126f, -0.170050f,
- -0.141542f, 0.158541f, 0.582763f, -0.100992f, 0.096705f, -0.209029f,
- 0.008449f, 0.255865f, 0.103565f, 0.317719f, 0.479499f, 0.599126f,
- -0.065613f, -0.268614f, 0.508736f, 0.180813f, -0.815868f, 0.051238f,
- 0.001223f, -0.305423f, -0.270079f, 0.036180f, 0.304342f, 0.202634f,
- 0.218348f, -0.304304f, -0.438297f, 0.241123f, 0.200230f, 0.151804f,
- 0.051944f, 0.160422f, -0.262981f, -0.417412f, 1.845729f, -0.086183f,
- 0.403517f, 0.059667f, 0.564543f, -0.081752f, 0.114907f, -0.284489f,
- -0.673943f, 0.056965f, 0.362221f, 0.403224f, -0.000233f, -0.209552f,
- -0.800926f, -0.134132f,
-};
-
-static const float av1_4_partition_nn_bias_32_layer1[LABEL_SIZE] = {
- -0.019518f,
- 0.198546f,
- 0.339015f,
- -0.261961f,
-};
-
-static const NN_CONFIG av1_4_partition_nnconfig_32 = {
- FEATURE_SIZE, // num_inputs
- LABEL_SIZE, // num_outputs
- 1, // num_hidden_layers
- {
- 32, // num_hidden_nodes
- },
- {
- av1_4_partition_nn_weights_32_layer0,
- av1_4_partition_nn_weights_32_layer1,
- },
- {
- av1_4_partition_nn_bias_32_layer0,
- av1_4_partition_nn_bias_32_layer1,
- },
-};
-
-static const float av1_4_partition_nn_weights_64_layer0[FEATURE_SIZE * 24] = {
- -0.152649f, 0.074509f, 1.000136f, 0.601661f, -1.416694f, -1.932396f,
- -1.163850f, 0.640931f, -0.888625f, -0.345711f, 0.161799f, 0.103165f,
- 0.147513f, 0.089956f, 0.204329f, 0.196922f, 0.014927f, 0.283714f,
- -0.110422f, 0.062005f, -0.531870f, -0.075287f, -0.448349f, -0.218881f,
- -0.005592f, -0.130490f, -0.015779f, 0.093521f, -0.158487f, 0.072241f,
- 0.066879f, -0.418566f, -0.206281f, 0.025634f, 0.048334f, -0.534750f,
- 0.302081f, 0.028707f, -1.543248f, 0.103799f, -1.214052f, 0.395870f,
- 0.394754f, -0.272170f, -0.702953f, -4.057464f, -0.033497f, -0.042142f,
- 0.014742f, 0.065263f, 0.000879f, -0.019768f, 0.101275f, 0.163059f,
- -0.371392f, -0.283484f, 0.241915f, 0.012684f, -0.210101f, -0.166534f,
- -0.024894f, 0.274696f, 0.098993f, 0.104086f, 0.055044f, -0.289378f,
- 0.146571f, -0.147441f, 0.004056f, 0.112244f, -0.416162f, -0.033176f,
- -0.214836f, -0.213787f, 0.023197f, -0.339043f, 0.301109f, -0.408551f,
- 0.284922f, -0.344418f, -0.039255f, 0.158748f, -0.344169f, 0.078286f,
- -0.043957f, -0.302162f, -0.310826f, 0.063425f, 0.198166f, -0.285324f,
- -0.108252f, 0.038992f, -1.053110f, -1.663290f, -0.417185f, 1.504443f,
- 0.643206f, -0.850240f, 0.889641f, -0.733214f, 0.147302f, 0.060291f,
- -0.052954f, 0.167453f, 0.111870f, 0.085471f, 0.035107f, 0.064361f,
- 0.176053f, 0.184373f, 0.676576f, 0.066164f, 1.455569f, 0.925111f,
- -0.640845f, 0.803795f, -0.653782f, -0.201038f, 0.060033f, 0.016964f,
- -0.047590f, 0.045908f, 0.354162f, 0.014812f, 0.156978f, 0.058792f,
- -0.238119f, 0.002450f, -0.094388f, -0.155229f, 0.194858f, -0.355429f,
- -0.187098f, -0.119264f, -0.088694f, -0.102845f, 0.184905f, -0.425339f,
- -0.157808f, -0.104599f, -0.393248f, -0.379842f, 0.027741f, -0.185816f,
- -0.317294f, 0.002453f, -0.498241f, -0.204302f, -0.079093f, 0.020646f,
- -0.412850f, -0.426039f, -0.177050f, -0.419304f, -0.064478f, -0.191802f,
- -0.146812f, 0.171111f, 0.090261f, -0.367033f, -0.299051f, -0.322132f,
- 0.428192f, -0.252613f, 0.488498f, -0.559682f, 0.486720f, -0.511084f,
- 0.992506f, 0.346765f, -0.118697f, -0.065127f, -0.376612f, -0.345137f,
- -0.426517f, -0.516836f, 0.307083f, 0.609362f, 0.369555f, 0.093775f,
- -0.375664f, -0.221595f, -0.025465f, 0.134374f, -0.387031f, 0.096236f,
- 0.337465f, -0.124029f, -0.157340f, -0.368790f, -0.104490f, -0.279507f,
- -0.247705f, 0.146559f, -0.236206f, -0.036073f, 0.064206f, -0.330919f,
- 0.516591f, -0.013492f, 1.269568f, 1.182530f, -0.455390f, -1.328091f,
- -0.200950f, -0.380513f, -0.195532f, -0.341479f, 0.016064f, 0.021176f,
- 0.169119f, 0.103707f, -0.174504f, -0.462719f, -0.079445f, -0.247128f,
- 0.459111f, 0.036129f, 0.769570f, -0.080405f, 1.667107f, 0.355567f,
- -2.433896f, 0.627572f, -0.600090f, -0.651872f, -0.059769f, -0.041945f,
- -0.009933f, 0.014864f, -0.049378f, -0.041561f, 0.075180f, 0.138307f,
- 0.122366f, -0.160756f, 0.215327f, 0.013572f, 0.198194f, -0.762650f,
- 0.054466f, 1.110332f, 1.692853f, 0.658654f, -0.409549f, 0.506085f,
- 0.330962f, -0.223008f, 0.007448f, -0.289062f, -0.476231f, -0.228359f,
- 0.013977f, -0.000609f, -0.673604f, 0.275996f, 0.405291f, 1.693561f,
- -1.079768f, 1.122516f, -0.203227f, 0.099265f, -0.165207f, -0.323899f,
- -0.269973f, -0.080122f, 0.127700f, 0.190201f, 0.219527f, 0.306194f,
- 0.026049f, -0.003779f, 1.107357f, 1.720315f, 1.017908f, 0.078664f,
- -1.599813f, -0.482636f, -0.117450f, 0.122249f, 0.030220f, 0.039794f,
- 0.176350f, 0.129715f, -0.305755f, -0.274044f, -0.299640f, -0.187335f,
- -0.073616f, -0.564507f, -0.127758f, 0.044855f, -0.191090f, 0.039095f,
- 0.115378f, 0.969352f, -0.088360f, 0.301443f, 0.065726f, -0.019740f,
- -0.102350f, -0.084913f, -0.194615f, 0.118582f, 0.920789f, -0.171615f,
- -1.436553f, -0.026419f, -0.730864f, 0.615697f, -0.795079f, 0.119701f,
- 0.601782f, 0.792902f, 0.184920f, 1.635090f, -0.085860f, -0.033187f,
- -0.166883f, 0.008487f, -0.128300f, -0.089923f, -0.108781f, -0.133719f,
- -0.011988f, -0.239816f, -0.092563f, -0.238471f, -0.339722f, 0.177432f,
- -0.063101f, -0.121002f, 0.058072f, -0.031166f, 0.086413f, -0.016203f,
- -0.305075f, -0.005420f, -0.168796f, 0.148745f, -0.116737f, -0.050222f,
- -0.287952f, -0.290982f, -0.090449f, 0.076098f, -0.345632f, -0.061309f,
- 0.142218f, 0.035692f, 0.304517f, -0.228031f, 0.119608f, -0.120350f,
- 0.163404f, -0.105605f, -0.305462f, -0.176657f, 0.210070f, -0.227600f,
- -0.081965f, -0.464027f, -0.053782f, -0.018367f, 0.119159f, 0.017162f,
- -0.069792f, 0.305768f, -0.421095f, 0.187740f, -0.032059f, 0.575115f,
- -0.064283f, -0.091828f, 0.772648f, -0.393189f, -0.297098f, 0.141420f,
- 0.826389f, -0.071586f, -0.893968f, -0.346793f, -1.151655f, 0.039393f,
- 1.546000f, -0.094029f, -0.005786f, -0.195764f, -0.169724f, -0.133167f,
- -0.129312f, -0.418860f, -0.026553f, -0.053667f, -0.091976f, -0.106275f,
- -0.492625f, 0.025350f, -0.332075f, -0.475638f, -0.076667f, -0.065779f,
- 0.108957f, 0.246298f, -0.289007f, -0.442552f, -0.206692f, -0.257453f,
- 0.073806f, -0.458606f, -0.410390f, -0.312674f, -0.144813f, 0.170128f,
- 0.018810f, -0.098241f, 1.027369f, 0.479328f, 1.129707f, 0.484813f,
- -0.085207f, 0.621873f, -0.520981f, 0.236175f, 0.273487f, 0.061426f,
- 0.306085f, 0.161487f, 0.220991f, 0.223783f, -0.091826f, 0.391031f,
-};
-
-static const float av1_4_partition_nn_bias_64_layer0[24] = {
- 0.580225f, -0.191304f, 1.091767f, -0.134522f, -0.089361f, 0.398750f,
- -0.882708f, -0.213102f, -0.119981f, 0.378296f, -0.075719f, 0.426598f,
- -2.015505f, 0.202534f, -1.044792f, -0.841519f, 0.266421f, -0.047115f,
- -0.131147f, -0.075066f, -0.009441f, 0.853007f, -0.175606f, -0.868306f,
-};
-
-static const float av1_4_partition_nn_weights_64_layer1[24 * LABEL_SIZE] = {
- -0.851937f, -0.211148f, -2.289513f, -0.275071f, 0.251340f, -0.340847f,
- 0.498032f, 0.308652f, -0.051574f, 0.323146f, -0.097547f, -0.040269f,
- 1.909655f, 0.098348f, 0.588136f, 0.568112f, 0.313297f, 0.920848f,
- -0.014486f, 0.386014f, 0.029199f, -0.537330f, -0.021502f, 0.349073f,
- -0.524715f, -0.351848f, 1.565454f, -0.297148f, 0.020177f, 0.648369f,
- 0.027321f, -0.096052f, -0.363163f, -0.132642f, 0.024292f, -0.734176f,
- -0.782700f, 0.408299f, 0.476945f, -0.489512f, -0.728318f, -0.632042f,
- 0.405417f, 0.184086f, -0.400730f, 0.359032f, 0.019710f, -0.217409f,
- 0.519159f, -0.136316f, 0.993592f, -0.147128f, 0.097495f, 0.426189f,
- -0.295233f, 0.278799f, 0.080667f, -0.025052f, -0.307757f, 0.418716f,
- -0.853388f, -0.374878f, -0.322725f, 0.696335f, -0.380649f, -0.160356f,
- -0.140060f, 0.502455f, 0.656728f, -0.095023f, -0.184198f, -0.347069f,
- 0.456372f, -0.029754f, 0.907923f, 0.265710f, -0.065505f, 0.226763f,
- -0.277798f, 0.413292f, -0.593899f, -0.060740f, -0.313358f, -0.249944f,
- -0.627329f, -0.327151f, -0.853788f, -1.163807f, -0.388944f, -0.228788f,
- -0.057382f, 0.334741f, -0.283083f, 0.368280f, -0.407197f, -0.441849f,
-};
-
-static const float av1_4_partition_nn_bias_64_layer1[LABEL_SIZE] = {
- -0.478735f,
- 0.292948f,
- 0.293172f,
- 0.040013f,
-};
-
-static const NN_CONFIG av1_4_partition_nnconfig_64 = {
- FEATURE_SIZE, // num_inputs
- LABEL_SIZE, // num_outputs
- 1, // num_hidden_layers
- {
- 24, // num_hidden_nodes
- },
- {
- av1_4_partition_nn_weights_64_layer0,
- av1_4_partition_nn_weights_64_layer1,
- },
- {
- av1_4_partition_nn_bias_64_layer0,
- av1_4_partition_nn_bias_64_layer1,
- },
-};
-
-#undef FEATURE_SIZE
-#undef LABEL_SIZE
-
-#define FEATURE_SIZE 4
-static const float
- av1_partition_breakout_nn_weights_128_layer0[FEATURE_SIZE * 32] = {
- -0.331785f, 0.068675f, -0.323814f, 0.033714f, -0.237835f, 0.166316f,
- -0.498766f, -0.545634f, -0.266173f, -0.476957f, -0.120409f, -0.021042f,
- 0.124056f, -0.278750f, -0.110120f, -0.372812f, 4.547939f, 0.097618f,
- -0.002710f, -0.064169f, -1.841173f, -0.403833f, 0.005536f, 0.067188f,
- -0.434935f, -0.227421f, -0.000011f, -0.139961f, -0.174056f, -0.652384f,
- -0.000015f, -0.262847f, -3.319706f, -0.947693f, 0.002981f, 0.016717f,
- -10.408850f, -0.014568f, -0.000018f, 0.019084f, 1.523383f, 0.074525f,
- -0.002076f, -0.020734f, 4.881495f, 0.002799f, 0.000342f, -0.019623f,
- 1.786154f, 0.037462f, -0.019037f, 0.052833f, 11.408153f, -0.044602f,
- 0.026155f, -0.518627f, -0.474499f, -0.427430f, -0.442733f, -0.011116f,
- -22.379410f, -0.000549f, -0.001418f, 0.008090f, -0.295090f, -0.230268f,
- -0.337278f, -0.001127f, -0.644282f, -0.598783f, -0.539417f, -0.003303f,
- 9.189824f, 0.038066f, -0.004097f, -0.460045f, -0.308858f, -0.242691f,
- -0.230835f, -0.273057f, 0.152226f, 0.179239f, -0.146382f, -0.004655f,
- -0.242940f, -0.718862f, -0.001685f, -0.214736f, 3.263186f, 0.079463f,
- -0.003854f, -0.187461f, -0.599144f, -0.419808f, -0.000597f, -0.136980f,
- 0.184813f, -0.319525f, -0.007246f, 0.079709f, -0.883229f, -0.343748f,
- -0.000077f, -0.172214f, -0.548759f, -0.194674f, -0.144786f, 0.043896f,
- -0.176364f, -0.248394f, -0.090215f, -0.294743f, -0.280980f, -0.181436f,
- -0.115681f, -0.071915f, -13.035494f, -0.075623f, 0.017052f, -0.171152f,
- 5.910803f, 0.128344f, 0.010256f, -1.073301f, 2.387826f, 0.166183f,
- -0.007193f, -0.257836f,
- };
-
-static const float av1_partition_breakout_nn_bias_128_layer0[32] = {
- 0.115591f, -0.100178f, -0.165523f, -0.122997f, 11.045759f, 1.034761f,
- -0.323672f, -0.189087f, 2.850950f, 7.010029f, -21.447067f, 1.877031f,
- 0.437442f, 5.929414f, -0.117274f, 4.462253f, -0.135198f, -0.145927f,
- 8.727211f, 0.000000f, -3.532987f, -0.405898f, 11.364439f, -0.141728f,
- -5.994947f, -0.362574f, 1.857687f, -0.100400f, -0.130312f, 0.006080f,
- 0.429660f, -8.439470f,
-};
-
-static const float av1_partition_breakout_nn_weights_128_layer1[32] = {
- -0.013738f, 0.022052f, -0.074437f, -0.211377f, -0.080433f, 0.015543f,
- 0.002091f, 0.014252f, 0.134834f, 0.190263f, 0.244175f, -0.031747f,
- 0.020068f, -0.068326f, 0.185471f, 0.660268f, -0.134898f, -0.010376f,
- -0.276023f, -0.282921f, -0.022769f, 0.007070f, -0.186235f, 0.024407f,
- -0.024837f, 0.005764f, 0.016599f, -0.040077f, 0.020990f, 0.095054f,
- -0.039662f, 0.131499f,
-};
-
-static const float av1_partition_breakout_nn_bias_128_layer1[1] = {
- 0.86678213f,
-};
-
-static const NN_CONFIG av1_partition_breakout_nnconfig_128 = {
- FEATURE_SIZE, // num_inputs
- 1, // num_outputs
- 1, // num_hidden_layers
- {
- 32, // num_hidden_nodes
- },
- {
- av1_partition_breakout_nn_weights_128_layer0,
- av1_partition_breakout_nn_weights_128_layer1,
- },
- {
- av1_partition_breakout_nn_bias_128_layer0,
- av1_partition_breakout_nn_bias_128_layer1,
- },
-};
-
-static const float
- av1_partition_breakout_nn_weights_64_layer0[FEATURE_SIZE * 16] = {
- 0.872892f, -0.235539f, -0.412159f, -0.142533f, -2.251479f, -0.057073f,
- -0.001373f, 0.112147f, 5.281734f, 0.060704f, 0.000838f, -0.961554f,
- 0.244995f, 0.154515f, -0.292654f, -0.167177f, -3.759112f, -0.486347f,
- 0.003208f, -0.418226f, 2.618152f, 0.026832f, 0.003988f, -0.404406f,
- -0.405434f, 0.102791f, -0.033406f, -0.029820f, -4.492342f, -0.154291f,
- 0.012947f, -0.195075f, 0.009311f, -0.411410f, -0.010986f, -0.554822f,
- 0.160576f, 0.020796f, -0.457230f, -0.191111f, -7.759542f, -0.065039f,
- -0.001322f, 0.055691f, 0.291924f, -0.053076f, -0.148379f, -0.298383f,
- 1.022023f, -0.033668f, -0.000804f, -0.825778f, -3.902254f, -0.085812f,
- -0.052520f, -0.035012f, -0.465468f, -0.319231f, -0.497529f, -0.183068f,
- -2.407131f, -0.062304f, 0.000874f, 0.108786f,
- };
-
-static const float av1_partition_breakout_nn_bias_64_layer0[16] = {
- 0.081425f, -14.404084f, 11.511393f, -0.930053f, 1.841889f, 15.020920f,
- -1.872288f, 5.392535f, -0.329335f, -0.005358f, 12.600776f, 0.000000f,
- -0.337413f, 4.492778f, 0.000000f, 17.043072f,
-};
-
-static const float av1_partition_breakout_nn_weights_64_layer1[16] = {
- -0.465338f, -0.103023f, -0.174808f, -0.005156f, -0.016366f, -0.172494f,
- 0.014185f, 0.067030f, -0.001939f, -0.175049f, 0.245992f, -0.181660f,
- -0.038572f, 0.307899f, -0.294283f, 0.118323f,
-};
-
-static const float av1_partition_breakout_nn_bias_64_layer1[1] = {
- -1.33438122f,
-};
-
-static const NN_CONFIG av1_partition_breakout_nnconfig_64 = {
- FEATURE_SIZE, // num_inputs
- 1, // num_outputs
- 1, // num_hidden_layers
- {
- 16, // num_hidden_nodes
- },
- {
- av1_partition_breakout_nn_weights_64_layer0,
- av1_partition_breakout_nn_weights_64_layer1,
- },
- {
- av1_partition_breakout_nn_bias_64_layer0,
- av1_partition_breakout_nn_bias_64_layer1,
- },
-};
-
-static const float
- av1_partition_breakout_nn_weights_32_layer0[FEATURE_SIZE * 16] = {
- -4.825528f, -0.145737f, 0.001907f, 0.145415f, -1.858153f, -0.080744f,
- 0.000601f, 0.211991f, 0.384265f, -0.043945f, -0.521332f, -0.170622f,
- -0.046866f, -0.600506f, -0.001216f, -0.332760f, -0.447677f, -0.605844f,
- -0.121008f, -0.119936f, -0.215739f, -0.269665f, -0.668587f, 0.071318f,
- -1.202551f, -0.729727f, -0.370084f, 0.088215f, -1.926800f, -0.086519f,
- 0.000359f, 0.215120f, 0.718749f, 0.022942f, 0.003840f, -0.176518f,
- 1.213451f, 0.080786f, 0.001557f, -1.053430f, 0.202698f, -0.583919f,
- -0.535512f, -0.239927f, -0.110151f, -0.128832f, -0.441087f, -0.145575f,
- -0.178518f, -0.585784f, 0.000029f, -0.833014f, -0.331358f, -0.520297f,
- -0.088676f, -0.178487f, -1.430755f, 0.022981f, -0.106931f, 0.015573f,
- -0.520814f, -0.045386f, -0.443123f, -0.484209f,
- };
-
-static const float av1_partition_breakout_nn_bias_32_layer0[16] = {
- 11.747026f, -9.337718f, 0.341648f, -0.155847f, -0.104005f, 4.666283f,
- 6.669584f, 16.625504f, 9.885626f, 15.439183f, -0.346080f, 0.000000f,
- -0.423808f, 0.000000f, 6.352258f, -0.155787f,
-};
-
-static const float av1_partition_breakout_nn_weights_32_layer1[16] = {
- 0.168561f, -0.122519f, 0.524667f, 0.032474f, 0.059097f, 0.011900f,
- 0.166445f, 0.127256f, -0.034838f, -0.212586f, -0.317973f, 0.348419f,
- -0.004171f, 0.157694f, 0.117845f, 0.272115f,
-};
-
-static const float av1_partition_breakout_nn_bias_32_layer1[1] = {
- 0.09049262f,
-};
-
-static const NN_CONFIG av1_partition_breakout_nnconfig_32 = {
- FEATURE_SIZE, // num_inputs
- 1, // num_outputs
- 1, // num_hidden_layers
- {
- 16, // num_hidden_nodes
- },
- {
- av1_partition_breakout_nn_weights_32_layer0,
- av1_partition_breakout_nn_weights_32_layer1,
- },
- {
- av1_partition_breakout_nn_bias_32_layer0,
- av1_partition_breakout_nn_bias_32_layer1,
- },
-};
-
-static const float
- av1_partition_breakout_nn_weights_16_layer0[FEATURE_SIZE * 16] = {
- 0.209371f, 0.028758f, 0.005764f, -0.384401f, -0.625777f, -0.005647f,
- -0.316867f, 0.042985f, 0.127344f, 0.025461f, 0.011465f, -0.071043f,
- -0.295977f, -0.076093f, -0.209681f, -0.311653f, -0.147538f, 0.009910f,
- -0.130997f, -0.012326f, 0.024124f, -0.323578f, -0.005790f, -0.085664f,
- -1.575066f, -0.119221f, 0.015018f, 0.187204f, 0.238117f, 0.084924f,
- -0.004444f, -1.271538f, -0.709860f, -0.006226f, -0.903111f, 0.090573f,
- -0.278642f, -0.011114f, 0.021162f, 0.081290f, -0.467486f, -0.040771f,
- -0.224069f, -0.714390f, -0.281905f, -0.001336f, -0.761212f, -0.060385f,
- -0.814479f, -0.050450f, -0.003666f, 0.085668f, -0.272589f, 0.057330f,
- -0.206540f, -0.303418f, 0.075335f, -0.180468f, -0.064872f, -0.755948f,
- -0.509287f, -0.048877f, -0.001512f, 0.077086f,
- };
-
-static const float av1_partition_breakout_nn_bias_16_layer0[16] = {
- 16.421495f, 4.012273f, -1.828571f, 0.000000f, -0.263564f, -0.201972f,
- 6.564987f, 14.651000f, -3.227779f, 2.241833f, -0.137116f, 0.762876f,
- 5.625762f, 0.615822f, 0.040057f, 16.668884f,
-};
-
-static const float av1_partition_breakout_nn_weights_16_layer1[16] = {
- -0.096440f, 0.184316f, -0.021148f, 0.424974f, 0.003743f, 0.006310f,
- 0.046266f, -0.219224f, -0.087004f, 0.024623f, -0.275798f, 0.120164f,
- 0.269773f, -0.021105f, -0.146698f, 0.188764f,
-};
-
-static const float av1_partition_breakout_nn_bias_16_layer1[1] = {
- 1.60751927f,
-};
-
-static const NN_CONFIG av1_partition_breakout_nnconfig_16 = {
- FEATURE_SIZE, // num_inputs
- 1, // num_outputs
- 1, // num_hidden_layers
- {
- 16, // num_hidden_nodes
- },
- {
- av1_partition_breakout_nn_weights_16_layer0,
- av1_partition_breakout_nn_weights_16_layer1,
- },
- {
- av1_partition_breakout_nn_bias_16_layer0,
- av1_partition_breakout_nn_bias_16_layer1,
- },
-};
-
-static const float
- av1_partition_breakout_nn_weights_8_layer0[FEATURE_SIZE * 16] = {
- -0.255885f, 0.109548f, -0.111054f, -0.476119f, -1.083031f, -0.342003f,
- 0.048241f, -0.356013f, -0.085054f, 0.124908f, 0.000084f, -0.149906f,
- -0.729829f, 0.133535f, -0.002125f, 0.207516f, -0.210163f, -0.567365f,
- -0.590103f, 0.045308f, -0.539406f, 0.130550f, -0.663879f, -0.170549f,
- 0.017587f, -0.054187f, 0.000550f, 0.038297f, -0.112891f, -0.012751f,
- -0.048067f, 0.095564f, 0.079892f, 0.077285f, -0.749708f, -0.286312f,
- -0.054334f, 0.132242f, -0.004152f, -0.209758f, -0.073407f, 0.082306f,
- -0.001034f, -0.090990f, 0.122823f, -0.109794f, -0.230066f, -0.391155f,
- -0.262245f, -0.004744f, -0.232246f, 0.099290f, -0.637484f, 0.111937f,
- -0.548556f, -0.598344f, 0.123265f, -0.281395f, -0.399711f, -0.525671f,
- -0.596269f, 0.098494f, -0.005765f, 0.173652f,
- };
-
-static const float av1_partition_breakout_nn_bias_8_layer0[16] = {
- 0.194141f, -0.111223f, 2.503733f, -7.155602f, -0.695068f, 0.114874f,
- 2.056990f, 5.284306f, 0.639643f, -2.792049f, -2.232339f, -0.232209f,
- 2.336705f, -0.278834f, 0.231905f, 7.954366f,
-};
-
-static const float av1_partition_breakout_nn_weights_8_layer1[16] = {
- -0.014439f, 0.010171f, 0.048116f, -0.090659f, -0.081235f, -0.021840f,
- -0.017360f, 0.031063f, -0.031737f, -0.023439f, -0.037725f, 0.021954f,
- 0.055858f, 0.230970f, -0.056466f, 0.119780f,
-};
-
-static const float av1_partition_breakout_nn_bias_8_layer1[1] = {
- 1.27784479f,
-};
-
-static const NN_CONFIG av1_partition_breakout_nnconfig_8 = {
- FEATURE_SIZE, // num_inputs
- 1, // num_outputs
- 1, // num_hidden_layers
- {
- 16, // num_hidden_nodes
- },
- {
- av1_partition_breakout_nn_weights_8_layer0,
- av1_partition_breakout_nn_weights_8_layer1,
- },
- {
- av1_partition_breakout_nn_bias_8_layer0,
- av1_partition_breakout_nn_bias_8_layer1,
- },
-};
-#undef FEATURE_SIZE
-
-#define FEATURE_SIZE 9 // Input layer size
-#define NUM_NODES 32 // Hidden layer size
-#define LABEL_SIZE 3 // Output layer size
-
-static const float av1_rect_partition_nn_weights_8_layer0[FEATURE_SIZE *
- NUM_NODES] = {
- 0.22151f, 0.99424f, 0.23415f, -1.13841f, -0.11277f, 0.09530f, 0.14769f,
- -1.18895f, -0.96640f, -0.21421f, -0.13974f, 0.03236f, 0.15777f, -0.03176f,
- 0.02729f, -0.37344f, -0.01727f, -0.05469f, 0.19402f, -3.45508f, 0.90106f,
- -2.91557f, 0.19379f, 0.14356f, -0.13291f, 0.05734f, -0.03032f, -0.13060f,
- 0.35744f, 1.31630f, -1.54493f, -0.20749f, -0.24413f, -0.04524f, -0.12400f,
- 1.08305f, -0.21596f, 0.76244f, 1.10616f, -1.71706f, 0.05768f, 0.10966f,
- 0.00949f, -0.12680f, 0.00699f, -0.11522f, -0.38566f, 0.34283f, -0.35266f,
- -0.40643f, -0.22462f, 0.32300f, -0.39737f, -0.20587f, -0.16096f, 1.07543f,
- 0.30314f, -1.35659f, -0.38212f, 0.45857f, 0.76615f, 0.16819f, -1.24459f,
- 0.39677f, 0.87436f, -2.33757f, 1.27471f, 0.27488f, 0.01019f, -0.01221f,
- -0.07461f, -0.14577f, -0.01231f, -0.64426f, -1.02733f, -1.96242f, 0.95143f,
- -0.06777f, -1.13868f, 0.01354f, -0.75590f, -0.78222f, -0.07453f, 0.61788f,
- 0.56899f, 1.17144f, 0.70899f, 0.48568f, 0.11266f, 0.81579f, -0.03929f,
- 0.01088f, 0.33599f, -0.22401f, -0.49654f, -0.02598f, 0.04509f, -0.08217f,
- -0.30687f, 0.19851f, -2.96860f, -2.30698f, 0.01848f, 0.11801f, 0.06614f,
- 0.01673f, -0.11002f, -0.08168f, 0.09204f, -0.06379f, 0.27972f, -0.31716f,
- -0.00566f, -0.13651f, -0.37276f, 0.01511f, -0.23697f, 0.21696f, -0.19480f,
- 0.60758f, -0.43506f, -0.02247f, -1.45073f, 0.84442f, -0.94018f, 0.32550f,
- 0.03985f, -0.06581f, 0.21665f, 0.79472f, -2.41080f, 0.04788f, -0.09492f,
- -0.10677f, 0.07250f, 0.14329f, -0.37319f, 0.53043f, -0.49108f, 0.25792f,
- -0.36569f, -0.28669f, -0.18416f, -0.52385f, -1.17081f, -1.32153f, -1.13403f,
- -0.26196f, 0.93379f, 0.72115f, 0.54464f, 0.27642f, 0.04757f, 2.01629f,
- 1.55787f, -0.11665f, 1.00722f, -0.24352f, 0.53308f, 0.57719f, 0.39344f,
- 0.19174f, 0.06339f, -0.02530f, 0.07724f, -0.32416f, -0.26992f, -0.35887f,
- -0.35285f, -0.33379f, -0.37475f, -0.77335f, 1.70027f, -1.52153f, -0.26503f,
- 0.97552f, -2.96705f, -0.91220f, -0.11827f, 0.00406f, -0.14514f, 0.18417f,
- -0.20874f, 0.27293f, -0.34072f, -0.34838f, -0.19054f, -0.29806f, -0.27960f,
- -0.19293f, -0.18275f, -0.05902f, 0.58625f, -0.05470f, -0.48814f, -0.45382f,
- -0.05959f, 2.01250f, -0.30014f, 0.69546f, -1.24180f, 1.34923f, 0.20337f,
- 0.16850f, 0.07187f, 0.72630f, -0.15380f, -2.40973f, -2.73561f, -1.71375f,
- -1.61695f, 0.50052f, 0.09730f, 0.00579f, 0.06133f, -0.06512f, -0.61439f,
- -1.16173f, -0.58716f, 1.60438f, 0.23242f, 0.91847f, 0.49041f, -0.16277f,
- -0.02574f, -0.64593f, 1.17028f, 0.46852f, 0.14926f, 0.73853f, -0.78521f,
- 0.05959f, -0.35590f, 0.02039f, 0.10812f, -0.28650f, 1.34038f, -0.72188f,
- 0.62385f, -0.35271f, -0.39599f, 0.41543f, 0.53124f, -0.23510f, -0.15480f,
- -0.05066f, -0.33529f, 0.05238f, -0.35311f, -0.26983f, -0.39764f, 0.01085f,
- 0.26593f, -0.18411f, -0.29945f, 0.50090f, -0.03397f, 0.78562f, -0.33068f,
- 1.21308f, -2.23273f, -0.33366f, -0.15164f, -1.13270f, 0.17394f, 0.65567f,
- 0.76496f, 0.44325f, 0.01368f, -0.33619f, -0.64256f, 0.64478f, 0.84553f,
- 1.74183f, 0.22563f, -0.14550f, -0.16258f, 0.03010f, 0.49922f, 0.64575f,
- -0.29187f, -0.10348f, -1.43619f, -0.56540f, -0.14779f, 0.04616f, 0.87411f,
- -1.08228f,
-};
-
-static const float av1_rect_partition_nn_bias_8_layer0[NUM_NODES] = {
- 0.33919f, -0.03003f, 0.79073f, -0.18508f, 0.00668f, -0.12017f, 0.35362f,
- -0.51642f, 0.06536f, 0.41668f, -0.06509f, 0.94606f, -0.15385f, 0.14936f,
- 1.46274f, -0.06961f, 2.82537f, -1.95576f, -0.09457f, 0.02042f, -0.07480f,
- -0.55083f, 0.26170f, 4.39883f, 0.33999f, -0.10502f, 0.70884f, -0.06992f,
- -0.22638f, 1.40940f, -0.09309f, 0.05828f,
-};
-
-static const float av1_rect_partition_nn_weights_8_layer1[NUM_NODES *
- LABEL_SIZE] = {
- 0.09209f, 0.26236f, 0.62136f, 0.76324f, -1.14678f, 0.42289f, -0.08895f,
- -0.97267f, 2.05958f, 0.00843f, 0.35335f, 1.12096f, -0.11679f, 0.07350f,
- -1.23231f, -0.61990f, 1.51379f, -1.99450f, 0.22441f, 2.41974f, -0.30488f,
- -0.37869f, 0.47168f, -3.70132f, 0.00061f, 0.19432f, 0.11512f, 0.26200f,
- -0.35285f, 0.37985f, 0.90571f, 0.27344f, 0.74840f, -0.17965f, -2.51433f,
- 0.59235f, 1.16670f, -0.53446f, 0.67897f, 0.04505f, -0.86874f, 0.45361f,
- -0.35033f, 1.21283f, 0.31426f, -0.20841f, 0.56757f, 0.45909f, -1.23683f,
- 0.09835f, -0.17214f, -0.96323f, 0.01138f, -0.50233f, 0.30104f, 2.01814f,
- 1.15821f, -0.11947f, 0.74574f, -0.30714f, -0.39646f, -1.30086f, -0.88541f,
- -0.12259f, -0.54977f, 0.30069f, 1.84299f, -0.95141f, -0.65887f, -0.25888f,
- -0.63265f, 1.29531f, -0.56672f, 0.10837f, -0.21297f, -2.19131f, 0.01156f,
- 0.51912f, 0.46704f, 0.42810f, -0.59271f, 0.98469f, -0.17914f, -1.91163f,
- -0.32807f, 0.48199f, -0.99525f, 1.67108f, -0.87631f, -0.60258f, -0.78731f,
- -0.32877f, 0.44237f, 0.01087f, 0.07489f, -0.28224f,
-};
-
-static const float av1_rect_partition_nn_bias_8_layer1[LABEL_SIZE] = {
- 1.70665f,
- -0.77954f,
- -0.92709f,
-};
-
-static const NN_CONFIG av1_rect_partition_nnconfig_8 = {
- FEATURE_SIZE, // num_inputs
- LABEL_SIZE, // num_outputs
- 1, // num_hidden_layers
- {
- NUM_NODES,
- }, // num_hidden_nodes
- { av1_rect_partition_nn_weights_8_layer0,
- av1_rect_partition_nn_weights_8_layer1 },
- { av1_rect_partition_nn_bias_8_layer0, av1_rect_partition_nn_bias_8_layer1 }
-};
-
-static const float av1_rect_partition_nn_weights_16_layer0[FEATURE_SIZE *
- NUM_NODES] = {
- -0.18480f, -0.05410f, -0.18957f, 0.15451f, -0.38649f, -0.26162f, -0.22727f,
- -0.38555f, -0.36738f, 0.74384f, -1.85999f, 0.98491f, -0.72119f, 1.77321f,
- 0.39983f, 0.96314f, 0.23695f, 0.30200f, 0.30629f, -0.47617f, -1.43320f,
- -1.81730f, 0.36554f, -0.07142f, -1.27242f, -1.27697f, 0.00110f, -0.32179f,
- 0.27460f, 0.45428f, 0.15308f, -0.73906f, -0.28577f, -0.01238f, -0.16958f,
- -0.85390f, 1.05484f, -1.62812f, 0.77632f, -0.27327f, -0.32527f, 0.32726f,
- 1.73255f, 0.53763f, 0.59121f, -0.39068f, -0.32451f, -0.31869f, 0.17777f,
- 0.07519f, -0.18066f, -0.11250f, -0.14616f, -0.16882f, -0.04099f, -0.67959f,
- 0.39674f, -0.08596f, 0.18587f, -2.04097f, -1.73993f, 1.57212f, 1.42410f,
- -1.36762f, -0.41485f, -1.12103f, 0.56959f, 0.11500f, 0.48945f, -0.13585f,
- 1.22125f, 0.67071f, -1.11812f, -0.20660f, -0.52856f, 0.70663f, 0.74382f,
- 0.61114f, -0.11454f, 1.14687f, 0.80322f, -0.45965f, -0.44466f, -0.05830f,
- 0.13206f, -0.53750f, -0.11324f, -0.37971f, -0.13491f, -0.21268f, 1.93407f,
- 1.34433f, 2.49427f, 2.91955f, 1.71730f, 0.03295f, 0.03587f, -0.14550f,
- 0.08189f, -0.38655f, -0.35432f, -0.62706f, -0.01849f, -0.57882f, -0.60438f,
- -1.01334f, -0.57302f, 0.22592f, 0.05916f, -0.05305f, -0.89824f, -0.52969f,
- -0.24542f, 0.27029f, -0.40924f, -0.82452f, -0.60665f, -5.03025f, 0.83302f,
- 1.83695f, 2.19716f, 2.31001f, 0.03657f, 0.00063f, -0.04379f, 0.05835f,
- -0.08623f, 0.20557f, -0.17791f, 0.07874f, -0.25456f, -0.19513f, -0.27753f,
- -0.31982f, 0.00245f, -0.33183f, 0.26059f, -0.22165f, 0.37582f, -0.30411f,
- -0.22639f, -0.14739f, -0.20201f, -0.37507f, -1.30653f, 0.49570f, 1.03673f,
- 0.66139f, 0.44941f, -0.44461f, -0.50376f, -0.49664f, 0.18608f, -0.26175f,
- 0.14844f, 0.78715f, -0.70344f, -0.87624f, -0.98535f, -0.35346f, 0.37094f,
- -0.43135f, -0.22571f, 3.46263f, 3.13580f, -1.33203f, -0.15247f, -0.15866f,
- -0.11214f, 0.12211f, 0.03964f, -1.87597f, -4.81597f, -4.80195f, -4.98096f,
- -5.62336f, -0.05337f, -0.00943f, 0.00792f, 0.02742f, 1.05679f, 2.41455f,
- 0.85382f, 1.42504f, 0.58096f, 0.21443f, 1.02694f, 1.06746f, 1.20242f,
- 0.60767f, 1.98667f, -0.80879f, -0.63495f, 1.95508f, 0.23952f, -0.15019f,
- -0.16097f, 0.30155f, -3.42407f, -1.34998f, 9.07689f, -2.22559f, 2.22562f,
- -0.03348f, -0.05229f, 0.05931f, 0.03042f, -0.18068f, -0.05732f, -0.33010f,
- -0.32279f, -0.26607f, -0.02723f, -0.04067f, 0.08700f, -0.16366f, -0.24935f,
- -0.69124f, 0.58508f, 0.50654f, 0.04492f, 1.38340f, -1.51487f, 1.72889f,
- -1.95618f, -3.65013f, -1.38525f, -3.05516f, -2.40448f, 2.47467f, 0.03784f,
- 0.08052f, -0.01971f, -0.08918f, -0.84997f, -0.55302f, -1.07861f, -0.62626f,
- 0.61751f, -0.11012f, -0.24185f, -0.39201f, -1.85390f, -0.31261f, -0.11927f,
- 0.15671f, -0.23450f, -0.14916f, -0.31715f, -0.19350f, 0.01795f, -0.11533f,
- -0.05799f, -0.03142f, 0.20218f, -0.39499f, -0.33859f, -0.13201f, -0.19527f,
- -0.28459f, -0.20346f, 0.89457f, -2.22103f, -2.37455f, -2.00221f, 2.44553f,
- 0.33915f, 0.50047f, -0.34625f, -0.19667f, -0.56333f, -0.84328f, 1.25767f,
- -1.70297f, 1.00482f, -0.00103f, -1.40813f, 0.21311f, 0.39230f, -0.07302f,
- -3.49100f, 1.60675f, -2.90692f, 0.11022f, 0.13507f, -0.13308f, 0.15201f,
- -0.05573f,
-};
-
-static const float av1_rect_partition_nn_bias_16_layer0[NUM_NODES] = {
- -0.16783f, -0.16023f, 0.52215f, -0.04109f, 2.00122f, -0.11633f, 0.25535f,
- 1.80638f, 1.69273f, -0.25998f, -6.83550f, -0.79682f, -1.03466f, 1.42721f,
- 0.00000f, -0.00000f, -0.11665f, -0.12047f, -1.01497f, 7.27181f, -0.78548f,
- -1.39335f, -5.42248f, -0.10388f, 0.07634f, 2.81012f, -0.57429f, -0.15629f,
- -0.12044f, 1.65478f, -0.75153f, 1.18441f,
-};
-
-static const float av1_rect_partition_nn_weights_16_layer1[NUM_NODES *
- LABEL_SIZE] = {
- -0.26407f, 0.06322f, 0.87932f, 0.17772f, 0.71686f, -0.12283f, 0.08454f,
- 0.20098f, -0.31763f, -0.33178f, -4.59535f, -0.04367f, 0.17099f, 3.80486f,
- 0.16750f, 0.29218f, 0.57234f, -0.96550f, -0.10599f, -4.91130f, -0.14658f,
- 0.95803f, -4.13925f, 0.24567f, 0.25708f, 1.60547f, -1.03251f, -0.31053f,
- -0.05659f, -0.94121f, -0.68926f, -0.24738f, -0.38019f, 0.98950f, 0.13689f,
- 0.24504f, 0.49623f, 0.19980f, 0.38349f, 0.37481f, 0.54540f, -0.02198f,
- 3.43385f, 1.02543f, -0.40921f, -3.07235f, 0.02996f, 0.00323f, -0.35414f,
- 0.71099f, 1.39334f, 2.43741f, -1.11007f, -0.22739f, -4.21757f, 0.11905f,
- 0.00353f, -1.69637f, 0.45944f, -0.19884f, 0.03624f, 0.25729f, 0.23659f,
- -2.08405f, 0.08573f, -0.53393f, -1.28103f, -0.53970f, -0.65465f, 0.31821f,
- -0.09884f, -0.69026f, -0.37284f, 0.04622f, 1.32973f, -0.15414f, 0.19138f,
- -0.67927f, -0.17658f, 0.36008f, -0.51832f, 0.09887f, -1.94414f, 2.95227f,
- 1.76937f, -0.26687f, 8.50976f, 0.26247f, 0.60262f, -0.27910f, 0.30061f,
- -0.05117f, 0.16018f, 0.71195f, 0.57871f, 1.57794f,
-};
-
-static const float av1_rect_partition_nn_bias_16_layer1[3] = {
- 2.68750f,
- -1.31894f,
- -1.36768f,
-};
-
-static const NN_CONFIG av1_rect_partition_nnconfig_16 = {
- FEATURE_SIZE, // num_inputs
- LABEL_SIZE, // num_outputs
- 1, // num_hidden_layers
- {
- NUM_NODES,
- }, // num_hidden_nodes
- { av1_rect_partition_nn_weights_16_layer0,
- av1_rect_partition_nn_weights_16_layer1 },
- { av1_rect_partition_nn_bias_16_layer0, av1_rect_partition_nn_bias_16_layer1 }
-};
-
-static const float av1_rect_partition_nn_weights_32_layer0[FEATURE_SIZE *
- NUM_NODES] = {
- -0.54654f, -0.43537f, -0.10620f, -0.48051f, -0.43543f, -0.22737f, -0.15429f,
- -0.09858f, -0.09438f, 0.37306f, 0.23934f, -1.86375f, -1.18307f, -0.32995f,
- -0.09745f, 0.05431f, -0.13799f, 0.14734f, -0.33219f, 0.18057f, -0.23792f,
- -0.28126f, 0.02977f, -0.07431f, 0.07860f, 0.00067f, -0.01927f, 1.01841f,
- -0.57739f, 0.08412f, -1.33843f, -1.05563f, -0.28693f, -0.39425f, -0.69572f,
- -0.16703f, 0.02808f, 0.11994f, -0.26267f, 0.19706f, -0.29707f, -0.25305f,
- -0.07050f, -0.02704f, -0.31528f, -0.42301f, 0.22496f, -0.37001f, -0.23319f,
- -0.11139f, -0.30513f, 0.04213f, -0.12550f, 0.02504f, 0.33245f, 0.01102f,
- -0.35950f, -0.05949f, -0.19590f, -0.27457f, -0.28339f, -0.15676f, -0.21538f,
- 0.65066f, 0.28443f, -1.24943f, -3.00246f, -1.01897f, 0.09304f, 0.70052f,
- -0.12877f, 0.21120f, -0.37476f, 0.23261f, -0.28401f, 0.09837f, 0.00020f,
- -0.12106f, -0.32354f, -0.02472f, -0.19772f, 1.01886f, 0.16596f, -0.06532f,
- 1.72938f, 1.57754f, 0.55963f, 0.33246f, -0.20023f, 0.30715f, 0.08629f,
- 0.18945f, -0.45988f, -1.22610f, -0.05152f, -0.48859f, -1.02104f, -0.27315f,
- -0.57698f, 0.04157f, -0.92428f, -1.31268f, 1.78210f, 0.10291f, 1.55042f,
- -1.26793f, 1.39042f, -1.43729f, 0.25600f, 5.21263f, 5.31955f, 5.19316f,
- 5.43430f, 0.00294f, -0.00970f, -0.02333f, 0.00250f, 1.17672f, 6.27544f,
- 4.95973f, 3.54009f, 4.51269f, 0.30750f, 0.78780f, -0.44741f, -0.76442f,
- 0.75050f, 0.58799f, 0.03400f, -2.09859f, 1.67313f, 0.12503f, 0.28609f,
- 1.15809f, 2.46530f, -0.04898f, 0.23072f, -0.12635f, -0.82097f, -0.63827f,
- 2.16779f, 1.77132f, 0.15434f, -1.06427f, 0.06206f, -0.87732f, -0.61897f,
- -0.44593f, -0.77131f, -0.15979f, -0.02282f, -0.74381f, 0.66052f, -0.22992f,
- 1.74638f, 1.29199f, -0.55464f, 0.98316f, 0.06665f, 0.50254f, -0.66292f,
- 0.17113f, -0.32633f, -1.85803f, -0.92759f, 4.44965f, 1.33057f, 0.02135f,
- -0.27446f, -0.26018f, -0.12613f, -0.14470f, -0.23355f, -0.09717f, -0.24123f,
- -0.05535f, -0.19146f, -0.36222f, -0.30458f, -0.40323f, 0.21779f, 0.14248f,
- -0.48630f, 0.18840f, 0.11040f, 0.17287f, -0.51880f, 1.12466f, -0.38888f,
- -0.16421f, -0.31784f, -0.36112f, -0.25386f, -0.01636f, 0.10029f, -0.26881f,
- -0.17051f, -0.30903f, -0.08573f, -0.28774f, -0.01173f, -0.09706f, -0.23089f,
- -0.12922f, -0.17463f, -0.12433f, -0.23074f, 0.15220f, 1.29826f, 0.23788f,
- 0.04189f, 2.66416f, 0.48815f, -0.06803f, 0.96742f, 1.27165f, -0.70348f,
- -0.09941f, -0.42948f, -0.20243f, -0.02364f, -0.26689f, -0.40629f, -0.68217f,
- -0.48073f, 2.43657f, -2.60191f, -1.82837f, 0.50440f, 0.71829f, 0.76491f,
- 0.28293f, 0.20568f, 0.92642f, -0.02496f, 1.43637f, -0.24474f, -1.21030f,
- 0.54084f, 1.05130f, 1.29572f, 0.03750f, -0.36894f, 0.74548f, -1.33857f,
- -0.84858f, 1.35230f, 0.80175f, 0.66136f, 1.06473f, 0.18701f, 1.42413f,
- 0.04661f, -0.07820f, 0.64990f, -0.43595f, 1.18304f, -0.11437f, -0.06365f,
- 0.03558f, 0.78260f, -1.74890f, 1.56217f, -1.23424f, 4.59193f, -3.35072f,
- 0.01180f, -0.18296f, -0.20870f, 0.04510f, 1.52595f, -1.37402f, -0.33123f,
- -0.85957f, 0.80598f, 0.03743f, 0.02354f, 0.37707f, 1.62095f, -0.29627f,
- -0.31778f, -0.45789f, -0.14906f, 0.25315f, -0.10817f, -0.32610f, -0.40890f,
- 0.33984f,
-};
-
-static const float av1_rect_partition_nn_bias_32_layer0[NUM_NODES] = {
- -0.17482f, 0.39042f, 0.00000f, 1.69677f, 0.08792f, -0.09301f, 0.13809f,
- 4.84061f, 0.00000f, 0.40515f, 0.46246f, 0.20644f, -5.77478f, -1.54510f,
- 0.05660f, -0.32013f, 0.23649f, 0.03778f, -2.53710f, -0.27869f, 0.45623f,
- -0.04155f, -0.18445f, -0.73405f, -0.50243f, 2.23191f, 1.93272f, -1.07032f,
- -0.27602f, -1.98063f, 0.20816f, -0.01315f,
-};
-
-static const float av1_rect_partition_nn_weights_32_layer1[NUM_NODES *
- LABEL_SIZE] = {
- 0.02827f, 1.02560f, -0.07137f, -0.31911f, 0.11365f, 0.13684f, -0.07816f,
- -5.23036f, -0.34340f, 0.84526f, -1.51845f, 0.07017f, -8.12570f, 6.24061f,
- 0.35739f, -0.09937f, -0.30978f, 0.22032f, 0.74968f, -0.34557f, 0.45547f,
- -0.16512f, 0.07118f, 1.66415f, 0.41320f, -1.81533f, -1.96004f, 1.04666f,
- 0.84049f, 4.31009f, 0.68850f, 0.26322f, -0.24634f, -1.25889f, 0.31952f,
- 0.63632f, 0.05801f, -0.10664f, -0.21992f, 2.44386f, 0.19526f, -0.09838f,
- 1.53049f, -0.26630f, 3.54126f, -3.40574f, 0.72730f, 0.04557f, 0.92652f,
- 0.15522f, 2.35895f, -0.13347f, 0.56907f, 0.15352f, 0.01823f, -0.73939f,
- 0.43104f, 1.90321f, 0.31267f, -0.51972f, 0.50094f, -3.98372f, -3.41518f,
- -0.48183f, 0.26661f, 0.64146f, 0.14500f, -0.01695f, 0.16653f, -0.37846f,
- 0.08412f, 2.69714f, -0.20258f, -0.75786f, 0.11201f, 0.61878f, 4.22231f,
- -3.55330f, -1.14137f, -0.37722f, -0.28000f, -0.72581f, -2.62827f, -0.19448f,
- -0.59398f, -0.30136f, -0.17725f, -0.69630f, -0.41132f, 0.12208f, 2.11441f,
- -1.08794f, -1.41694f, 0.02620f, 2.18792f, 0.04271f,
-};
-
-static const float av1_rect_partition_nn_bias_32_layer1[3] = {
- 2.47332f,
- -1.65756f,
- -0.81573f,
-};
-
-static const NN_CONFIG av1_rect_partition_nnconfig_32 = {
- FEATURE_SIZE, // num_inputs
- LABEL_SIZE, // num_outputs
- 1, // num_hidden_layers
- {
- NUM_NODES,
- }, // num_hidden_nodes
- { av1_rect_partition_nn_weights_32_layer0,
- av1_rect_partition_nn_weights_32_layer1 },
- { av1_rect_partition_nn_bias_32_layer0, av1_rect_partition_nn_bias_32_layer1 }
-};
-
-static const float av1_rect_partition_nn_weights_64_layer0[FEATURE_SIZE *
- NUM_NODES] = {
- 0.08972f, 4.09095f, -0.31398f, -2.43631f, -0.74767f, 1.42471f, 1.60926f,
- 1.44721f, 1.88259f, 2.35375f, 1.88299f, 2.01109f, 0.98679f, 2.24131f,
- 0.06279f, -0.08315f, 0.32107f, 0.91334f, -0.36569f, 5.55049f, 5.44943f,
- 5.20471f, 5.39099f, -0.01943f, -0.00284f, 0.02203f, -0.01309f, 1.41917f,
- 6.68460f, -6.15986f, 6.41341f, -3.20630f, -0.00567f, -0.00038f, 0.05960f,
- 0.04308f, 0.95366f, 3.48535f, 2.98266f, 4.11784f, 3.44255f, 0.61630f,
- 0.71405f, 0.63945f, -0.00713f, 0.39193f, 1.91621f, 3.32755f, 0.71674f,
- -0.11647f, 2.07090f, 2.64191f, 0.07949f, -0.05023f, 0.99935f, 0.83145f,
- 0.75898f, -0.98764f, -0.58731f, 1.21734f, -0.08076f, -3.26780f, 1.66278f,
- 0.04189f, -0.33177f, -1.58648f, 1.00883f, -0.56132f, -2.34877f, 0.67056f,
- -2.32297f, -0.91641f, -1.02909f, 4.19781f, 3.87484f, 4.32778f, -1.97171f,
- -0.24734f, 0.00822f, 0.05892f, 0.12697f, -3.62915f, -2.93127f, 7.94856f,
- -3.29311f, 3.26001f, -0.02231f, 0.02741f, 0.05919f, 0.08190f, -1.49344f,
- -0.64475f, -0.24627f, 4.03324f, -1.14799f, -0.18465f, -0.17829f, 0.10394f,
- 0.08580f, -5.74721f, 4.42467f, 3.63964f, 3.00258f, -1.22744f, -0.29408f,
- 0.00767f, 0.12305f, 0.05249f, -0.17166f, -0.20120f, -0.32941f, -0.31901f,
- 0.04628f, -0.35249f, -0.18272f, 0.03956f, -0.19329f, -0.33564f, 0.09856f,
- -0.00173f, -0.31751f, -0.05702f, -0.20558f, -0.31464f, -0.02488f, -0.00729f,
- -0.35854f, -0.14762f, -0.34897f, -0.12746f, 0.04011f, -0.24918f, -0.53516f,
- -0.28440f, -0.36789f, -1.34889f, -9.10044f, -9.19238f, 4.48042f, 6.54429f,
- -0.00226f, 0.00430f, 0.00321f, 0.00442f, 0.87551f, -0.16224f, -0.22832f,
- -0.60640f, -0.28738f, 0.18062f, 0.22008f, -0.47406f, 0.80302f, 0.12149f,
- 1.49530f, 1.05069f, -2.02985f, -0.92833f, 0.25616f, 0.12852f, 3.51840f,
- 0.25226f, -2.63283f, -4.04386f, 8.46300f, -2.93408f, 0.44069f, 0.08276f,
- 0.34482f, -0.22615f, 0.28666f, 3.02962f, -1.20055f, -1.04832f, -0.97632f,
- -0.99530f, 1.44196f, 1.68550f, 0.49360f, 1.08155f, -0.26059f, -0.02876f,
- -0.27492f, -0.06205f, -0.09496f, -0.12314f, -0.30228f, -0.07453f, -0.38857f,
- 1.17443f, 2.41497f, 1.90537f, 2.37716f, 2.91495f, -0.44455f, -0.51176f,
- 0.48195f, 0.53032f, 0.23696f, -1.06211f, 1.47459f, -0.89029f, 0.29521f,
- 0.66291f, -0.42653f, 1.82308f, -1.30372f, -0.36192f, -3.40388f, -1.61476f,
- -2.29745f, -0.66886f, -2.08252f, -0.54552f, -4.06849f, 0.02948f, 0.27297f,
- -4.81472f, 4.60404f, -0.11053f, 0.14765f, 0.02826f, -0.14688f, -0.07066f,
- -0.01224f, 1.20377f, 7.02725f, -6.02627f, 6.87255f, -3.14257f, 0.01074f,
- 0.02397f, -0.02359f, 0.01901f, 0.14956f, -1.67671f, 2.26714f, 2.57043f,
- -0.45888f, -1.60265f, -2.11475f, -2.74029f, -2.74658f, -0.35630f, -2.63013f,
- -2.14814f, -0.67266f, -1.56850f, 0.57137f, -1.14428f, -0.34265f, -0.12521f,
- 0.01220f, -0.74906f, -0.19270f, 0.68110f, -0.24737f, -0.70568f, -1.64826f,
- -0.35847f, -0.15984f, -1.17932f, -8.72306f, -8.72834f, 3.93701f, 6.17812f,
- -0.03191f, -0.00104f, 0.01402f, -0.00046f, -0.94517f, 1.51266f, -0.56318f,
- 0.72260f, -0.09253f, -0.09069f, -2.16695f, -0.23653f, 0.24418f, 2.21148f,
- -1.47954f, -1.01439f, 0.31536f, 0.77238f, -0.85083f, -0.15758f, -0.50886f,
- 0.09101f,
-};
-
-static const float av1_rect_partition_nn_bias_64_layer0[NUM_NODES] = {
- 0.91706f, -1.31328f, -5.16196f, 1.13191f, -0.98044f, -1.61122f, 1.03039f,
- -0.98537f, -4.45568f, -4.34802f, -0.92116f, 0.66836f, -0.10752f, -0.13065f,
- -0.35567f, -0.35693f, 1.74941f, 1.17379f, -3.45555f, 5.66321f, -0.24917f,
- -1.11940f, -0.73656f, -0.19299f, -0.04181f, 1.11010f, -2.97859f, -0.16774f,
- 0.59835f, -0.31269f, -0.30585f, -1.66212f,
-};
-
-static const float av1_rect_partition_nn_weights_64_layer1[NUM_NODES *
- LABEL_SIZE] = {
- 0.58963f, 4.20320f, -8.62465f, -6.54014f, 5.41108f, 2.33581f, -0.10354f,
- -1.17753f, -3.45909f, -2.24722f, 2.20881f, 3.21971f, -0.09087f, -0.21624f,
- 0.16529f, -8.40985f, -1.60205f, -1.41538f, 4.41826f, -4.63069f, -0.27742f,
- 4.08710f, 0.26439f, -1.46028f, 0.51234f, 6.25212f, -3.35650f, -1.21348f,
- 1.37201f, 8.89151f, 0.28859f, -0.97328f, -0.36196f, -2.71701f, 4.54196f,
- -0.62476f, -2.43814f, -1.34209f, 0.12850f, 1.73859f, 3.09809f, -4.42434f,
- -1.82552f, -3.66420f, -0.31535f, 0.00968f, -0.02019f, 9.66824f, 0.58835f,
- 1.50425f, 2.84487f, 2.55522f, 0.01409f, -2.27594f, -0.31800f, 0.91076f,
- -0.66808f, 0.33120f, -0.12460f, 0.64457f, -0.36416f, -10.30843f, 1.51013f,
- 2.06861f, -0.20989f, -0.87119f, 3.68642f, 7.33662f, -2.88037f, -0.52414f,
- -0.35036f, -0.45947f, -0.07406f, 6.46346f, -0.16031f, 0.27071f, 0.38845f,
- -0.21940f, 0.08583f, -1.39526f, 0.50554f, 0.45279f, -6.61856f, 1.84069f,
- -0.19149f, -1.77235f, 0.75136f, 1.11797f, 0.32677f, -7.10427f, 3.82908f,
- 1.04238f, -0.91435f, 1.93317f, -1.84946f, -0.48909f,
-};
-
-static const float av1_rect_partition_nn_bias_64_layer1[3] = {
- 0.32215f,
- -0.57522f,
- 0.25314f,
-};
-
-static const NN_CONFIG av1_rect_partition_nnconfig_64 = {
- FEATURE_SIZE, // num_inputs
- LABEL_SIZE, // num_outputs
- 1, // num_hidden_layers
- {
- NUM_NODES,
- }, // num_hidden_nodes
- { av1_rect_partition_nn_weights_64_layer0,
- av1_rect_partition_nn_weights_64_layer1 },
- { av1_rect_partition_nn_bias_64_layer0, av1_rect_partition_nn_bias_64_layer1 }
-};
-
-static const float av1_rect_partition_nn_weights_128_layer0[FEATURE_SIZE *
- NUM_NODES] = {
- -0.70901f, -3.03481f, 3.30604f, -1.28803f, -0.08610f, -0.33320f, -0.30716f,
- 0.25100f, 0.14323f, -0.98422f, -0.89084f, -0.24508f, -1.10785f, -0.82524f,
- 0.11766f, -0.42777f, 1.08965f, 4.35125f, -1.19388f, 4.22042f, 4.96306f,
- 6.32406f, 3.29899f, -0.90768f, 0.05203f, 0.38467f, 1.74257f, -0.19918f,
- -0.11335f, 0.00140f, -0.42303f, -0.04419f, 0.03583f, -0.05441f, -0.19586f,
- 0.01484f, -1.19964f, 0.25497f, 3.04502f, 0.05446f, -0.23253f, 0.00266f,
- 0.07117f, -2.78986f, -4.62953f, 1.45331f, 0.43923f, 0.92298f, -0.47736f,
- 1.49165f, 0.45942f, -1.99787f, 3.33510f, 0.17234f, 0.04024f, -1.42780f,
- 0.23566f, -0.90970f, 1.18041f, -1.45865f, 2.30878f, -1.28507f, 1.87290f,
- 1.91186f, 4.74826f, -3.70735f, 4.49808f, -4.72275f, -0.02696f, -0.02642f,
- -0.06093f, -0.01121f, -0.70683f, 2.69737f, -1.88563f, 2.48637f, 1.10922f,
- 0.74624f, 0.40308f, 2.06396f, 1.39289f, 0.00909f, -2.05271f, -1.53539f,
- -1.38323f, 0.83303f, -0.32250f, 0.51172f, 3.91249f, 1.66373f, 1.13184f,
- -2.22874f, -1.13448f, -0.11185f, 0.19387f, 0.36770f, -0.58933f, 0.22789f,
- 1.17307f, 0.77461f, 0.20817f, 0.33417f, 0.54037f, 0.32961f, -0.18456f,
- -9.78171f, -0.17216f, -3.44703f, -2.42158f, 0.51946f, 4.35949f, -0.73335f,
- -1.61515f, -0.29622f, -0.37617f, -0.42316f, 0.74922f, 1.44386f, 3.92704f,
- -3.76274f, 4.19775f, -3.86958f, 0.00074f, -0.02418f, -0.12944f, 0.05857f,
- -0.85507f, 5.42546f, 5.40338f, 5.54347f, 5.59791f, -0.01611f, 0.01618f,
- -0.01654f, -0.00270f, -0.39608f, -0.40410f, -0.24551f, 0.09124f, -0.34413f,
- -0.11504f, 0.12793f, -0.31523f, 0.09148f, -0.08567f, -0.05140f, -0.13310f,
- -0.81200f, 0.06882f, -0.52537f, -12.74048f, -0.45395f, -4.04775f, -1.84887f,
- -1.02573f, 0.32788f, 1.06828f, -1.25503f, -0.42693f, 2.01413f, -2.29103f,
- 0.62271f, 1.11764f, -1.83113f, -1.32325f, -1.65651f, -2.87826f, 1.46910f,
- 0.60885f, 0.16079f, 0.00171f, -0.25658f, -0.25465f, -0.14149f, 0.19497f,
- -0.07866f, -0.37080f, -0.05778f, -0.08870f, -0.20491f, 0.84521f, -0.18214f,
- -1.38441f, -1.08932f, -1.76627f, 0.73172f, 0.05967f, 1.28057f, 3.42722f,
- 1.69287f, 0.77169f, 0.44528f, 1.85513f, 0.07840f, 1.31252f, 2.89948f,
- 1.49489f, 0.15281f, 0.54708f, -1.14185f, -2.51063f, 0.36618f, -0.55322f,
- 0.96671f, 1.59470f, 1.38252f, 1.99697f, 0.03266f, -0.23200f, -0.01127f,
- -0.18918f, -0.37598f, -0.03119f, -0.36039f, -0.21192f, -0.11565f, -4.22635f,
- 1.41252f, 0.56608f, -0.08867f, 3.11924f, -0.54597f, -0.12504f, -0.05289f,
- -0.28665f, -0.58297f, -1.18362f, -0.76201f, -1.22011f, -0.58756f, 0.14740f,
- 1.43971f, 0.98381f, -0.02998f, -0.40678f, -0.23047f, -0.12979f, 0.04003f,
- -0.22081f, -0.09294f, -0.15955f, -0.10379f, -0.10192f, -1.51316f, 2.39482f,
- -1.69975f, 3.58976f, -0.91032f, -0.03498f, 0.48982f, -0.13418f, 0.76256f,
- 1.61003f, -2.01676f, -1.24430f, -3.25763f, 1.12314f, 2.00740f, 0.04613f,
- -0.14746f, -0.57374f, 3.44511f, -0.56767f, -4.08432f, -2.04894f, 2.35951f,
- -0.00458f, 0.18512f, 0.09916f, -0.04084f, -1.56207f, 1.38034f, 4.17302f,
- -1.47326f, -2.03530f, -0.00210f, 0.27469f, -0.17423f, 0.86860f, 2.76195f,
- 2.43269f, -3.57331f, 2.08715f, -1.44171f, -0.17389f, 2.26157f, -0.07852f,
- 2.02519f,
-};
-
-static const float av1_rect_partition_nn_bias_128_layer0[NUM_NODES] = {
- 2.53427f, 1.66678f, -0.84914f, -0.15070f, -1.74769f, 0.45218f, -0.26067f,
- 2.05916f, 0.08978f, 5.30984f, 2.66243f, -1.62740f, 0.70018f, 1.96403f,
- -4.97152f, -0.05425f, -3.84474f, -1.28006f, 3.47490f, -0.08373f, 0.00225f,
- -1.40692f, -0.27569f, -0.30253f, 0.77377f, -0.67636f, -0.26379f, 1.82348f,
- 0.66120f, 0.61119f, -1.42293f, 0.32676f,
-};
-
-static const float av1_rect_partition_nn_weights_128_layer1[NUM_NODES *
- LABEL_SIZE] = {
- 1.53453f, -0.23707f, 7.88368f, 0.33340f, 0.97523f, 1.38538f, -0.16746f,
- 4.42070f, 3.18678f, -5.03545f, -2.27029f, -3.75719f, -0.26850f, -4.93432f,
- -8.75673f, 0.27398f, -5.77882f, -0.91616f, -2.62725f, -0.23961f, 0.31249f,
- 3.32134f, 0.25375f, -0.00394f, 2.30213f, -0.14183f, 0.14544f, -1.42830f,
- 1.31101f, 3.99389f, -0.00017f, -2.90184f, -2.11444f, 2.16734f, -3.05133f,
- 0.39206f, 4.61489f, -2.88181f, -0.47745f, 2.86649f, -1.20621f, 3.70550f,
- 1.58029f, -4.58731f, -2.29350f, -0.76930f, 5.19135f, -0.22521f, -5.08782f,
- 2.17316f, 1.30563f, 0.16777f, -2.17767f, -2.09904f, 1.37001f, 0.25091f,
- -1.76743f, 1.57940f, 0.30544f, -2.39895f, -0.08532f, -1.77122f, 1.84010f,
- -0.88449f, 0.79299f, -1.35368f, -4.54110f, 0.02244f, -5.11580f, 1.60883f,
- 0.29352f, -6.47042f, -1.81426f, 1.24013f, 0.90980f, 7.93977f, 2.12555f,
- 5.24720f, 4.19508f, 0.21499f, 11.06045f, -0.74752f, 0.89396f, 0.26422f,
- 1.72332f, -1.25113f, -1.71136f, 0.13676f, -0.07867f, -0.96929f, 0.19911f,
- 3.58233f, -0.76470f, -2.24162f, -2.87465f, 3.18736f,
-};
-
-static const float av1_rect_partition_nn_bias_128_layer1[3] = {
- 1.09014f,
- -0.53317f,
- -0.55668f,
-};
-
-static const NN_CONFIG av1_rect_partition_nnconfig_128 = {
- FEATURE_SIZE, // num_inputs
- LABEL_SIZE, // num_outputs
- 1, // num_hidden_layers
- {
- NUM_NODES,
- }, // num_hidden_nodes
- { av1_rect_partition_nn_weights_128_layer0,
- av1_rect_partition_nn_weights_128_layer1 },
- { av1_rect_partition_nn_bias_128_layer0,
- av1_rect_partition_nn_bias_128_layer1 }
-};
-#undef FEATURE_SIZE
-#undef NUM_NODES
-#undef LABEL_SIZE
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_ENCODER_PARTITION_MODEL_WEIGHTS_H_
diff --git a/third_party/aom/av1/encoder/pickcdef.c b/third_party/aom/av1/encoder/pickcdef.c
deleted file mode 100644
index 6d154a7d2..000000000
--- a/third_party/aom/av1/encoder/pickcdef.c
+++ /dev/null
@@ -1,526 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <math.h>
-#include <string.h>
-
-#include "config/aom_scale_rtcd.h"
-
-#include "aom/aom_integer.h"
-#include "av1/common/cdef.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/common/reconinter.h"
-#include "av1/encoder/encoder.h"
-
-#define REDUCED_PRI_STRENGTHS 8
-#define REDUCED_TOTAL_STRENGTHS (REDUCED_PRI_STRENGTHS * CDEF_SEC_STRENGTHS)
-#define TOTAL_STRENGTHS (CDEF_PRI_STRENGTHS * CDEF_SEC_STRENGTHS)
-
-static int priconv[REDUCED_PRI_STRENGTHS] = { 0, 1, 2, 3, 5, 7, 10, 13 };
-
-/* Search for the best strength to add as an option, knowing we
- already selected nb_strengths options. */
-static uint64_t search_one(int *lev, int nb_strengths,
- uint64_t mse[][TOTAL_STRENGTHS], int sb_count,
- int fast) {
- uint64_t tot_mse[TOTAL_STRENGTHS];
- const int total_strengths = fast ? REDUCED_TOTAL_STRENGTHS : TOTAL_STRENGTHS;
- int i, j;
- uint64_t best_tot_mse = (uint64_t)1 << 63;
- int best_id = 0;
- memset(tot_mse, 0, sizeof(tot_mse));
- for (i = 0; i < sb_count; i++) {
- int gi;
- uint64_t best_mse = (uint64_t)1 << 63;
- /* Find best mse among already selected options. */
- for (gi = 0; gi < nb_strengths; gi++) {
- if (mse[i][lev[gi]] < best_mse) {
- best_mse = mse[i][lev[gi]];
- }
- }
- /* Find best mse when adding each possible new option. */
- for (j = 0; j < total_strengths; j++) {
- uint64_t best = best_mse;
- if (mse[i][j] < best) best = mse[i][j];
- tot_mse[j] += best;
- }
- }
- for (j = 0; j < total_strengths; j++) {
- if (tot_mse[j] < best_tot_mse) {
- best_tot_mse = tot_mse[j];
- best_id = j;
- }
- }
- lev[nb_strengths] = best_id;
- return best_tot_mse;
-}
-
-/* Search for the best luma+chroma strength to add as an option, knowing we
- already selected nb_strengths options. */
-static uint64_t search_one_dual(int *lev0, int *lev1, int nb_strengths,
- uint64_t (**mse)[TOTAL_STRENGTHS], int sb_count,
- int fast) {
- uint64_t tot_mse[TOTAL_STRENGTHS][TOTAL_STRENGTHS];
- int i, j;
- uint64_t best_tot_mse = (uint64_t)1 << 63;
- int best_id0 = 0;
- int best_id1 = 0;
- const int total_strengths = fast ? REDUCED_TOTAL_STRENGTHS : TOTAL_STRENGTHS;
- memset(tot_mse, 0, sizeof(tot_mse));
- for (i = 0; i < sb_count; i++) {
- int gi;
- uint64_t best_mse = (uint64_t)1 << 63;
- /* Find best mse among already selected options. */
- for (gi = 0; gi < nb_strengths; gi++) {
- uint64_t curr = mse[0][i][lev0[gi]];
- curr += mse[1][i][lev1[gi]];
- if (curr < best_mse) {
- best_mse = curr;
- }
- }
- /* Find best mse when adding each possible new option. */
- for (j = 0; j < total_strengths; j++) {
- int k;
- for (k = 0; k < total_strengths; k++) {
- uint64_t best = best_mse;
- uint64_t curr = mse[0][i][j];
- curr += mse[1][i][k];
- if (curr < best) best = curr;
- tot_mse[j][k] += best;
- }
- }
- }
- for (j = 0; j < total_strengths; j++) {
- int k;
- for (k = 0; k < total_strengths; k++) {
- if (tot_mse[j][k] < best_tot_mse) {
- best_tot_mse = tot_mse[j][k];
- best_id0 = j;
- best_id1 = k;
- }
- }
- }
- lev0[nb_strengths] = best_id0;
- lev1[nb_strengths] = best_id1;
- return best_tot_mse;
-}
-
-/* Search for the set of strengths that minimizes mse. */
-static uint64_t joint_strength_search(int *best_lev, int nb_strengths,
- uint64_t mse[][TOTAL_STRENGTHS],
- int sb_count, int fast) {
- uint64_t best_tot_mse;
- int i;
- best_tot_mse = (uint64_t)1 << 63;
- /* Greedy search: add one strength options at a time. */
- for (i = 0; i < nb_strengths; i++) {
- best_tot_mse = search_one(best_lev, i, mse, sb_count, fast);
- }
- /* Trying to refine the greedy search by reconsidering each
- already-selected option. */
- if (!fast) {
- for (i = 0; i < 4 * nb_strengths; i++) {
- int j;
- for (j = 0; j < nb_strengths - 1; j++) best_lev[j] = best_lev[j + 1];
- best_tot_mse =
- search_one(best_lev, nb_strengths - 1, mse, sb_count, fast);
- }
- }
- return best_tot_mse;
-}
-
-/* Search for the set of luma+chroma strengths that minimizes mse. */
-static uint64_t joint_strength_search_dual(int *best_lev0, int *best_lev1,
- int nb_strengths,
- uint64_t (**mse)[TOTAL_STRENGTHS],
- int sb_count, int fast) {
- uint64_t best_tot_mse;
- int i;
- best_tot_mse = (uint64_t)1 << 63;
- /* Greedy search: add one strength options at a time. */
- for (i = 0; i < nb_strengths; i++) {
- best_tot_mse =
- search_one_dual(best_lev0, best_lev1, i, mse, sb_count, fast);
- }
- /* Trying to refine the greedy search by reconsidering each
- already-selected option. */
- for (i = 0; i < 4 * nb_strengths; i++) {
- int j;
- for (j = 0; j < nb_strengths - 1; j++) {
- best_lev0[j] = best_lev0[j + 1];
- best_lev1[j] = best_lev1[j + 1];
- }
- best_tot_mse = search_one_dual(best_lev0, best_lev1, nb_strengths - 1, mse,
- sb_count, fast);
- }
- return best_tot_mse;
-}
-
-/* FIXME: SSE-optimize this. */
-static void copy_sb16_16(uint16_t *dst, int dstride, const uint16_t *src,
- int src_voffset, int src_hoffset, int sstride,
- int vsize, int hsize) {
- int r, c;
- const uint16_t *base = &src[src_voffset * sstride + src_hoffset];
- for (r = 0; r < vsize; r++) {
- for (c = 0; c < hsize; c++) {
- dst[r * dstride + c] = base[r * sstride + c];
- }
- }
-}
-
-static INLINE uint64_t dist_8x8_16bit(uint16_t *dst, int dstride, uint16_t *src,
- int sstride, int coeff_shift) {
- uint64_t svar = 0;
- uint64_t dvar = 0;
- uint64_t sum_s = 0;
- uint64_t sum_d = 0;
- uint64_t sum_s2 = 0;
- uint64_t sum_d2 = 0;
- uint64_t sum_sd = 0;
- int i, j;
- for (i = 0; i < 8; i++) {
- for (j = 0; j < 8; j++) {
- sum_s += src[i * sstride + j];
- sum_d += dst[i * dstride + j];
- sum_s2 += src[i * sstride + j] * src[i * sstride + j];
- sum_d2 += dst[i * dstride + j] * dst[i * dstride + j];
- sum_sd += src[i * sstride + j] * dst[i * dstride + j];
- }
- }
- /* Compute the variance -- the calculation cannot go negative. */
- svar = sum_s2 - ((sum_s * sum_s + 32) >> 6);
- dvar = sum_d2 - ((sum_d * sum_d + 32) >> 6);
- return (uint64_t)floor(
- .5 + (sum_d2 + sum_s2 - 2 * sum_sd) * .5 *
- (svar + dvar + (400 << 2 * coeff_shift)) /
- (sqrt((20000 << 4 * coeff_shift) + svar * (double)dvar)));
-}
-
-static INLINE uint64_t mse_8x8_16bit(uint16_t *dst, int dstride, uint16_t *src,
- int sstride) {
- uint64_t sum = 0;
- int i, j;
- for (i = 0; i < 8; i++) {
- for (j = 0; j < 8; j++) {
- int e = dst[i * dstride + j] - src[i * sstride + j];
- sum += e * e;
- }
- }
- return sum;
-}
-
-static INLINE uint64_t mse_4x4_16bit(uint16_t *dst, int dstride, uint16_t *src,
- int sstride) {
- uint64_t sum = 0;
- int i, j;
- for (i = 0; i < 4; i++) {
- for (j = 0; j < 4; j++) {
- int e = dst[i * dstride + j] - src[i * sstride + j];
- sum += e * e;
- }
- }
- return sum;
-}
-
-/* Compute MSE only on the blocks we filtered. */
-uint64_t compute_cdef_dist(uint16_t *dst, int dstride, uint16_t *src,
- cdef_list *dlist, int cdef_count, BLOCK_SIZE bsize,
- int coeff_shift, int pli) {
- uint64_t sum = 0;
- int bi, bx, by;
- if (bsize == BLOCK_8X8) {
- for (bi = 0; bi < cdef_count; bi++) {
- by = dlist[bi].by;
- bx = dlist[bi].bx;
- if (pli == 0) {
- sum += dist_8x8_16bit(&dst[(by << 3) * dstride + (bx << 3)], dstride,
- &src[bi << (3 + 3)], 8, coeff_shift);
- } else {
- sum += mse_8x8_16bit(&dst[(by << 3) * dstride + (bx << 3)], dstride,
- &src[bi << (3 + 3)], 8);
- }
- }
- } else if (bsize == BLOCK_4X8) {
- for (bi = 0; bi < cdef_count; bi++) {
- by = dlist[bi].by;
- bx = dlist[bi].bx;
- sum += mse_4x4_16bit(&dst[(by << 3) * dstride + (bx << 2)], dstride,
- &src[bi << (3 + 2)], 4);
- sum += mse_4x4_16bit(&dst[((by << 3) + 4) * dstride + (bx << 2)], dstride,
- &src[(bi << (3 + 2)) + 4 * 4], 4);
- }
- } else if (bsize == BLOCK_8X4) {
- for (bi = 0; bi < cdef_count; bi++) {
- by = dlist[bi].by;
- bx = dlist[bi].bx;
- sum += mse_4x4_16bit(&dst[(by << 2) * dstride + (bx << 3)], dstride,
- &src[bi << (2 + 3)], 8);
- sum += mse_4x4_16bit(&dst[(by << 2) * dstride + (bx << 3) + 4], dstride,
- &src[(bi << (2 + 3)) + 4], 8);
- }
- } else {
- assert(bsize == BLOCK_4X4);
- for (bi = 0; bi < cdef_count; bi++) {
- by = dlist[bi].by;
- bx = dlist[bi].bx;
- sum += mse_4x4_16bit(&dst[(by << 2) * dstride + (bx << 2)], dstride,
- &src[bi << (2 + 2)], 4);
- }
- }
- return sum >> 2 * coeff_shift;
-}
-
-void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
- AV1_COMMON *cm, MACROBLOCKD *xd, int fast) {
- int r, c;
- int fbr, fbc;
- uint16_t *src[3];
- uint16_t *ref_coeff[3];
- static cdef_list dlist[MI_SIZE_128X128 * MI_SIZE_128X128];
- int dir[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
- int var[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
- int stride[3];
- int bsize[3];
- int mi_wide_l2[3];
- int mi_high_l2[3];
- int xdec[3];
- int ydec[3];
- int pli;
- int cdef_count;
- int coeff_shift = AOMMAX(cm->seq_params.bit_depth - 8, 0);
- uint64_t best_tot_mse = (uint64_t)1 << 63;
- uint64_t tot_mse;
- int sb_count;
- int nvfb = (cm->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
- int nhfb = (cm->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
- int *sb_index = aom_malloc(nvfb * nhfb * sizeof(*sb_index));
- int *selected_strength = aom_malloc(nvfb * nhfb * sizeof(*sb_index));
- uint64_t(*mse[2])[TOTAL_STRENGTHS];
- int pri_damping = 3 + (cm->base_qindex >> 6);
- int sec_damping = 3 + (cm->base_qindex >> 6);
- int i;
- int nb_strengths;
- int nb_strength_bits;
- int quantizer;
- double lambda;
- const int num_planes = av1_num_planes(cm);
- const int total_strengths = fast ? REDUCED_TOTAL_STRENGTHS : TOTAL_STRENGTHS;
- DECLARE_ALIGNED(32, uint16_t, inbuf[CDEF_INBUF_SIZE]);
- uint16_t *in;
- DECLARE_ALIGNED(32, uint16_t, tmp_dst[1 << (MAX_SB_SIZE_LOG2 * 2)]);
- quantizer = av1_ac_quant_Q3(cm->base_qindex, 0, cm->seq_params.bit_depth) >>
- (cm->seq_params.bit_depth - 8);
- lambda = .12 * quantizer * quantizer / 256.;
-
- av1_setup_dst_planes(xd->plane, cm->seq_params.sb_size, frame, 0, 0, 0,
- num_planes);
- mse[0] = aom_malloc(sizeof(**mse) * nvfb * nhfb);
- mse[1] = aom_malloc(sizeof(**mse) * nvfb * nhfb);
- for (pli = 0; pli < num_planes; pli++) {
- uint8_t *ref_buffer;
- int ref_stride;
- switch (pli) {
- case 0:
- ref_buffer = ref->y_buffer;
- ref_stride = ref->y_stride;
- break;
- case 1:
- ref_buffer = ref->u_buffer;
- ref_stride = ref->uv_stride;
- break;
- case 2:
- ref_buffer = ref->v_buffer;
- ref_stride = ref->uv_stride;
- break;
- }
- src[pli] = aom_memalign(
- 32, sizeof(*src) * cm->mi_rows * cm->mi_cols * MI_SIZE * MI_SIZE);
- ref_coeff[pli] = aom_memalign(
- 32, sizeof(*ref_coeff) * cm->mi_rows * cm->mi_cols * MI_SIZE * MI_SIZE);
- xdec[pli] = xd->plane[pli].subsampling_x;
- ydec[pli] = xd->plane[pli].subsampling_y;
- bsize[pli] = ydec[pli] ? (xdec[pli] ? BLOCK_4X4 : BLOCK_8X4)
- : (xdec[pli] ? BLOCK_4X8 : BLOCK_8X8);
- stride[pli] = cm->mi_cols << MI_SIZE_LOG2;
- mi_wide_l2[pli] = MI_SIZE_LOG2 - xd->plane[pli].subsampling_x;
- mi_high_l2[pli] = MI_SIZE_LOG2 - xd->plane[pli].subsampling_y;
-
- const int frame_height =
- (cm->mi_rows * MI_SIZE) >> xd->plane[pli].subsampling_y;
- const int frame_width =
- (cm->mi_cols * MI_SIZE) >> xd->plane[pli].subsampling_x;
-
- for (r = 0; r < frame_height; ++r) {
- for (c = 0; c < frame_width; ++c) {
- if (cm->seq_params.use_highbitdepth) {
- src[pli][r * stride[pli] + c] = CONVERT_TO_SHORTPTR(
- xd->plane[pli].dst.buf)[r * xd->plane[pli].dst.stride + c];
- ref_coeff[pli][r * stride[pli] + c] =
- CONVERT_TO_SHORTPTR(ref_buffer)[r * ref_stride + c];
- } else {
- src[pli][r * stride[pli] + c] =
- xd->plane[pli].dst.buf[r * xd->plane[pli].dst.stride + c];
- ref_coeff[pli][r * stride[pli] + c] = ref_buffer[r * ref_stride + c];
- }
- }
- }
- }
- in = inbuf + CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER;
- sb_count = 0;
- for (fbr = 0; fbr < nvfb; ++fbr) {
- for (fbc = 0; fbc < nhfb; ++fbc) {
- int nvb, nhb;
- int gi;
- int dirinit = 0;
- nhb = AOMMIN(MI_SIZE_64X64, cm->mi_cols - MI_SIZE_64X64 * fbc);
- nvb = AOMMIN(MI_SIZE_64X64, cm->mi_rows - MI_SIZE_64X64 * fbr);
- int hb_step = 1;
- int vb_step = 1;
- BLOCK_SIZE bs = BLOCK_64X64;
- MB_MODE_INFO *const mbmi =
- cm->mi_grid_visible[MI_SIZE_64X64 * fbr * cm->mi_stride +
- MI_SIZE_64X64 * fbc];
- if (((fbc & 1) &&
- (mbmi->sb_type == BLOCK_128X128 || mbmi->sb_type == BLOCK_128X64)) ||
- ((fbr & 1) &&
- (mbmi->sb_type == BLOCK_128X128 || mbmi->sb_type == BLOCK_64X128)))
- continue;
- if (mbmi->sb_type == BLOCK_128X128 || mbmi->sb_type == BLOCK_128X64 ||
- mbmi->sb_type == BLOCK_64X128)
- bs = mbmi->sb_type;
- if (bs == BLOCK_128X128 || bs == BLOCK_128X64) {
- nhb = AOMMIN(MI_SIZE_128X128, cm->mi_cols - MI_SIZE_64X64 * fbc);
- hb_step = 2;
- }
- if (bs == BLOCK_128X128 || bs == BLOCK_64X128) {
- nvb = AOMMIN(MI_SIZE_128X128, cm->mi_rows - MI_SIZE_64X64 * fbr);
- vb_step = 2;
- }
- // No filtering if the entire filter block is skipped
- if (sb_all_skip(cm, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64)) continue;
- cdef_count = sb_compute_cdef_list(cm, fbr * MI_SIZE_64X64,
- fbc * MI_SIZE_64X64, dlist, bs);
- for (pli = 0; pli < num_planes; pli++) {
- for (i = 0; i < CDEF_INBUF_SIZE; i++) inbuf[i] = CDEF_VERY_LARGE;
- for (gi = 0; gi < total_strengths; gi++) {
- int threshold;
- uint64_t curr_mse;
- int sec_strength;
- threshold = gi / CDEF_SEC_STRENGTHS;
- if (fast) threshold = priconv[threshold];
- /* We avoid filtering the pixels for which some of the pixels to
- average
- are outside the frame. We could change the filter instead, but it
- would add special cases for any future vectorization. */
- int yoff = CDEF_VBORDER * (fbr != 0);
- int xoff = CDEF_HBORDER * (fbc != 0);
- int ysize = (nvb << mi_high_l2[pli]) +
- CDEF_VBORDER * (fbr + vb_step < nvfb) + yoff;
- int xsize = (nhb << mi_wide_l2[pli]) +
- CDEF_HBORDER * (fbc + hb_step < nhfb) + xoff;
- sec_strength = gi % CDEF_SEC_STRENGTHS;
- copy_sb16_16(&in[(-yoff * CDEF_BSTRIDE - xoff)], CDEF_BSTRIDE,
- src[pli],
- (fbr * MI_SIZE_64X64 << mi_high_l2[pli]) - yoff,
- (fbc * MI_SIZE_64X64 << mi_wide_l2[pli]) - xoff,
- stride[pli], ysize, xsize);
- cdef_filter_fb(NULL, tmp_dst, CDEF_BSTRIDE, in, xdec[pli], ydec[pli],
- dir, &dirinit, var, pli, dlist, cdef_count, threshold,
- sec_strength + (sec_strength == 3), pri_damping,
- sec_damping, coeff_shift);
- curr_mse = compute_cdef_dist(
- ref_coeff[pli] +
- (fbr * MI_SIZE_64X64 << mi_high_l2[pli]) * stride[pli] +
- (fbc * MI_SIZE_64X64 << mi_wide_l2[pli]),
- stride[pli], tmp_dst, dlist, cdef_count, bsize[pli], coeff_shift,
- pli);
- if (pli < 2)
- mse[pli][sb_count][gi] = curr_mse;
- else
- mse[1][sb_count][gi] += curr_mse;
- sb_index[sb_count] =
- MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc;
- }
- }
- sb_count++;
- }
- }
- nb_strength_bits = 0;
- /* Search for different number of signalling bits. */
- for (i = 0; i <= 3; i++) {
- int j;
- int best_lev0[CDEF_MAX_STRENGTHS];
- int best_lev1[CDEF_MAX_STRENGTHS] = { 0 };
- nb_strengths = 1 << i;
- if (num_planes >= 3)
- tot_mse = joint_strength_search_dual(best_lev0, best_lev1, nb_strengths,
- mse, sb_count, fast);
- else
- tot_mse = joint_strength_search(best_lev0, nb_strengths, mse[0], sb_count,
- fast);
- /* Count superblock signalling cost. */
- tot_mse += (uint64_t)(sb_count * lambda * i);
- /* Count header signalling cost. */
- tot_mse += (uint64_t)(nb_strengths * lambda * CDEF_STRENGTH_BITS);
- if (tot_mse < best_tot_mse) {
- best_tot_mse = tot_mse;
- nb_strength_bits = i;
- for (j = 0; j < 1 << nb_strength_bits; j++) {
- cm->cdef_strengths[j] = best_lev0[j];
- cm->cdef_uv_strengths[j] = best_lev1[j];
- }
- }
- }
- nb_strengths = 1 << nb_strength_bits;
-
- cm->cdef_bits = nb_strength_bits;
- cm->nb_cdef_strengths = nb_strengths;
- for (i = 0; i < sb_count; i++) {
- int gi;
- int best_gi;
- uint64_t best_mse = (uint64_t)1 << 63;
- best_gi = 0;
- for (gi = 0; gi < cm->nb_cdef_strengths; gi++) {
- uint64_t curr = mse[0][i][cm->cdef_strengths[gi]];
- if (num_planes >= 3) curr += mse[1][i][cm->cdef_uv_strengths[gi]];
- if (curr < best_mse) {
- best_gi = gi;
- best_mse = curr;
- }
- }
- selected_strength[i] = best_gi;
- cm->mi_grid_visible[sb_index[i]]->cdef_strength = best_gi;
- }
-
- if (fast) {
- for (int j = 0; j < nb_strengths; j++) {
- cm->cdef_strengths[j] =
- priconv[cm->cdef_strengths[j] / CDEF_SEC_STRENGTHS] *
- CDEF_SEC_STRENGTHS +
- (cm->cdef_strengths[j] % CDEF_SEC_STRENGTHS);
- cm->cdef_uv_strengths[j] =
- priconv[cm->cdef_uv_strengths[j] / CDEF_SEC_STRENGTHS] *
- CDEF_SEC_STRENGTHS +
- (cm->cdef_uv_strengths[j] % CDEF_SEC_STRENGTHS);
- }
- }
- cm->cdef_pri_damping = pri_damping;
- cm->cdef_sec_damping = sec_damping;
- aom_free(mse[0]);
- aom_free(mse[1]);
- for (pli = 0; pli < num_planes; pli++) {
- aom_free(src[pli]);
- aom_free(ref_coeff[pli]);
- }
- aom_free(sb_index);
- aom_free(selected_strength);
-}
diff --git a/third_party/aom/av1/encoder/picklpf.c b/third_party/aom/av1/encoder/picklpf.c
deleted file mode 100644
index c5508e25c..000000000
--- a/third_party/aom/av1/encoder/picklpf.c
+++ /dev/null
@@ -1,263 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <limits.h>
-
-#include "config/aom_scale_rtcd.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/psnr.h"
-#include "aom_mem/aom_mem.h"
-#include "aom_ports/mem.h"
-
-#include "av1/common/av1_loopfilter.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/common/quant_common.h"
-
-#include "av1/encoder/av1_quantize.h"
-#include "av1/encoder/encoder.h"
-#include "av1/encoder/picklpf.h"
-
-static void yv12_copy_plane(const YV12_BUFFER_CONFIG *src_bc,
- YV12_BUFFER_CONFIG *dst_bc, int plane) {
- switch (plane) {
- case 0: aom_yv12_copy_y(src_bc, dst_bc); break;
- case 1: aom_yv12_copy_u(src_bc, dst_bc); break;
- case 2: aom_yv12_copy_v(src_bc, dst_bc); break;
- default: assert(plane >= 0 && plane <= 2); break;
- }
-}
-
-int av1_get_max_filter_level(const AV1_COMP *cpi) {
- if (cpi->oxcf.pass == 2) {
- return cpi->twopass.section_intra_rating > 8 ? MAX_LOOP_FILTER * 3 / 4
- : MAX_LOOP_FILTER;
- } else {
- return MAX_LOOP_FILTER;
- }
-}
-
-static int64_t try_filter_frame(const YV12_BUFFER_CONFIG *sd,
- AV1_COMP *const cpi, int filt_level,
- int partial_frame, int plane, int dir) {
- AV1_COMMON *const cm = &cpi->common;
- int64_t filt_err;
-
- assert(plane >= 0 && plane <= 2);
- int filter_level[2] = { filt_level, filt_level };
- if (plane == 0 && dir == 0) filter_level[1] = cm->lf.filter_level[1];
- if (plane == 0 && dir == 1) filter_level[0] = cm->lf.filter_level[0];
-
- // set base filters for use of get_filter_level when in DELTA_Q_LF mode
- switch (plane) {
- case 0:
- cm->lf.filter_level[0] = filter_level[0];
- cm->lf.filter_level[1] = filter_level[1];
- break;
- case 1: cm->lf.filter_level_u = filter_level[0]; break;
- case 2: cm->lf.filter_level_v = filter_level[0]; break;
- }
-
- // TODO(any): please enable multi-thread and remove the flag when loop
- // filter mask is compatible with multi-thread.
-#if LOOP_FILTER_BITMASK
- av1_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, 0, plane,
- plane + 1, partial_frame);
-#else
- if (cpi->num_workers > 1)
- av1_loop_filter_frame_mt(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, plane,
- plane + 1, partial_frame, cpi->workers,
- cpi->num_workers, &cpi->lf_row_sync);
- else
- av1_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, plane,
- plane + 1, partial_frame);
-#endif
-
- filt_err = aom_get_sse_plane(sd, cm->frame_to_show, plane,
- cm->seq_params.use_highbitdepth);
-
- // Re-instate the unfiltered frame
- yv12_copy_plane(&cpi->last_frame_uf, cm->frame_to_show, plane);
-
- return filt_err;
-}
-
-static int search_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
- int partial_frame,
- const int *last_frame_filter_level,
- double *best_cost_ret, int plane, int dir) {
- const AV1_COMMON *const cm = &cpi->common;
- const int min_filter_level = 0;
- const int max_filter_level = av1_get_max_filter_level(cpi);
- int filt_direction = 0;
- int64_t best_err;
- int filt_best;
- MACROBLOCK *x = &cpi->td.mb;
-
- // Start the search at the previous frame filter level unless it is now out of
- // range.
- int lvl;
- switch (plane) {
- case 0: lvl = last_frame_filter_level[dir]; break;
- case 1: lvl = last_frame_filter_level[2]; break;
- case 2: lvl = last_frame_filter_level[3]; break;
- default: assert(plane >= 0 && plane <= 2); return 0;
- }
- int filt_mid = clamp(lvl, min_filter_level, max_filter_level);
- int filter_step = filt_mid < 16 ? 4 : filt_mid / 4;
- // Sum squared error at each filter level
- int64_t ss_err[MAX_LOOP_FILTER + 1];
-
- // Set each entry to -1
- memset(ss_err, 0xFF, sizeof(ss_err));
- yv12_copy_plane(cm->frame_to_show, &cpi->last_frame_uf, plane);
- best_err = try_filter_frame(sd, cpi, filt_mid, partial_frame, plane, dir);
- filt_best = filt_mid;
- ss_err[filt_mid] = best_err;
-
- while (filter_step > 0) {
- const int filt_high = AOMMIN(filt_mid + filter_step, max_filter_level);
- const int filt_low = AOMMAX(filt_mid - filter_step, min_filter_level);
-
- // Bias against raising loop filter in favor of lowering it.
- int64_t bias = (best_err >> (15 - (filt_mid / 8))) * filter_step;
-
- if ((cpi->oxcf.pass == 2) && (cpi->twopass.section_intra_rating < 20))
- bias = (bias * cpi->twopass.section_intra_rating) / 20;
-
- // yx, bias less for large block size
- if (cm->tx_mode != ONLY_4X4) bias >>= 1;
-
- if (filt_direction <= 0 && filt_low != filt_mid) {
- // Get Low filter error score
- if (ss_err[filt_low] < 0) {
- ss_err[filt_low] =
- try_filter_frame(sd, cpi, filt_low, partial_frame, plane, dir);
- }
- // If value is close to the best so far then bias towards a lower loop
- // filter value.
- if (ss_err[filt_low] < (best_err + bias)) {
- // Was it actually better than the previous best?
- if (ss_err[filt_low] < best_err) {
- best_err = ss_err[filt_low];
- }
- filt_best = filt_low;
- }
- }
-
- // Now look at filt_high
- if (filt_direction >= 0 && filt_high != filt_mid) {
- if (ss_err[filt_high] < 0) {
- ss_err[filt_high] =
- try_filter_frame(sd, cpi, filt_high, partial_frame, plane, dir);
- }
- // If value is significantly better than previous best, bias added against
- // raising filter value
- if (ss_err[filt_high] < (best_err - bias)) {
- best_err = ss_err[filt_high];
- filt_best = filt_high;
- }
- }
-
- // Half the step distance if the best filter value was the same as last time
- if (filt_best == filt_mid) {
- filter_step /= 2;
- filt_direction = 0;
- } else {
- filt_direction = (filt_best < filt_mid) ? -1 : 1;
- filt_mid = filt_best;
- }
- }
-
- // Update best error
- best_err = ss_err[filt_best];
-
- if (best_cost_ret) *best_cost_ret = RDCOST_DBL(x->rdmult, 0, best_err);
- return filt_best;
-}
-
-void av1_pick_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
- LPF_PICK_METHOD method) {
- AV1_COMMON *const cm = &cpi->common;
- const int num_planes = av1_num_planes(cm);
- struct loopfilter *const lf = &cm->lf;
- (void)sd;
-
- lf->sharpness_level = 0;
- cpi->td.mb.rdmult = cpi->rd.RDMULT;
-
- if (method == LPF_PICK_MINIMAL_LPF) {
- lf->filter_level[0] = 0;
- lf->filter_level[1] = 0;
- } else if (method >= LPF_PICK_FROM_Q) {
- const int min_filter_level = 0;
- const int max_filter_level = av1_get_max_filter_level(cpi);
- const int q = av1_ac_quant_Q3(cm->base_qindex, 0, cm->seq_params.bit_depth);
- // These values were determined by linear fitting the result of the
- // searched level for 8 bit depth:
- // Keyframes: filt_guess = q * 0.06699 - 1.60817
- // Other frames: filt_guess = q * 0.02295 + 2.48225
- //
- // And high bit depth separately:
- // filt_guess = q * 0.316206 + 3.87252
- int filt_guess;
- switch (cm->seq_params.bit_depth) {
- case AOM_BITS_8:
- filt_guess = (cm->frame_type == KEY_FRAME)
- ? ROUND_POWER_OF_TWO(q * 17563 - 421574, 18)
- : ROUND_POWER_OF_TWO(q * 6017 + 650707, 18);
- break;
- case AOM_BITS_10:
- filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 4060632, 20);
- break;
- case AOM_BITS_12:
- filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 16242526, 22);
- break;
- default:
- assert(0 &&
- "bit_depth should be AOM_BITS_8, AOM_BITS_10 "
- "or AOM_BITS_12");
- return;
- }
- if (cm->seq_params.bit_depth != AOM_BITS_8 && cm->frame_type == KEY_FRAME)
- filt_guess -= 4;
- // TODO(chengchen): retrain the model for Y, U, V filter levels
- lf->filter_level[0] = clamp(filt_guess, min_filter_level, max_filter_level);
- lf->filter_level[1] = clamp(filt_guess, min_filter_level, max_filter_level);
- lf->filter_level_u = clamp(filt_guess, min_filter_level, max_filter_level);
- lf->filter_level_v = clamp(filt_guess, min_filter_level, max_filter_level);
- } else {
- const int last_frame_filter_level[4] = { lf->filter_level[0],
- lf->filter_level[1],
- lf->filter_level_u,
- lf->filter_level_v };
-
- lf->filter_level[0] = lf->filter_level[1] =
- search_filter_level(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE,
- last_frame_filter_level, NULL, 0, 2);
- lf->filter_level[0] =
- search_filter_level(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE,
- last_frame_filter_level, NULL, 0, 0);
- lf->filter_level[1] =
- search_filter_level(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE,
- last_frame_filter_level, NULL, 0, 1);
-
- if (num_planes > 1) {
- lf->filter_level_u =
- search_filter_level(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE,
- last_frame_filter_level, NULL, 1, 0);
- lf->filter_level_v =
- search_filter_level(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE,
- last_frame_filter_level, NULL, 2, 0);
- }
- }
-}
diff --git a/third_party/aom/av1/encoder/picklpf.h b/third_party/aom/av1/encoder/picklpf.h
deleted file mode 100644
index 357097ae1..000000000
--- a/third_party/aom/av1/encoder/picklpf.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_PICKLPF_H_
-#define AOM_AV1_ENCODER_PICKLPF_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "av1/encoder/encoder.h"
-
-struct yv12_buffer_config;
-struct AV1_COMP;
-int av1_get_max_filter_level(const AV1_COMP *cpi);
-void av1_pick_filter_level(const struct yv12_buffer_config *sd,
- struct AV1_COMP *cpi, LPF_PICK_METHOD method);
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_ENCODER_PICKLPF_H_
diff --git a/third_party/aom/av1/encoder/pickrst.c b/third_party/aom/av1/encoder/pickrst.c
deleted file mode 100644
index e7804f6b4..000000000
--- a/third_party/aom/av1/encoder/pickrst.c
+++ /dev/null
@@ -1,1362 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <float.h>
-#include <limits.h>
-#include <math.h>
-
-#include "config/aom_scale_rtcd.h"
-#include "config/av1_rtcd.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/binary_codes_writer.h"
-#include "aom_dsp/psnr.h"
-#include "aom_mem/aom_mem.h"
-#include "aom_ports/mem.h"
-#include "aom_ports/system_state.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/common/quant_common.h"
-#include "av1/common/restoration.h"
-
-#include "av1/encoder/av1_quantize.h"
-#include "av1/encoder/encoder.h"
-#include "av1/encoder/mathutils.h"
-#include "av1/encoder/picklpf.h"
-#include "av1/encoder/pickrst.h"
-
-// When set to RESTORE_WIENER or RESTORE_SGRPROJ only those are allowed.
-// When set to RESTORE_TYPES we allow switchable.
-static const RestorationType force_restore_type = RESTORE_TYPES;
-
-// Number of Wiener iterations
-#define NUM_WIENER_ITERS 5
-
-// Penalty factor for use of dual sgr
-#define DUAL_SGR_PENALTY_MULT 0.01
-
-const int frame_level_restore_bits[RESTORE_TYPES] = { 2, 2, 2, 2 };
-
-typedef int64_t (*sse_extractor_type)(const YV12_BUFFER_CONFIG *a,
- const YV12_BUFFER_CONFIG *b);
-typedef int64_t (*sse_part_extractor_type)(const YV12_BUFFER_CONFIG *a,
- const YV12_BUFFER_CONFIG *b,
- int hstart, int width, int vstart,
- int height);
-
-#define NUM_EXTRACTORS (3 * (1 + 1))
-
-static const sse_part_extractor_type sse_part_extractors[NUM_EXTRACTORS] = {
- aom_get_y_sse_part, aom_get_u_sse_part,
- aom_get_v_sse_part, aom_highbd_get_y_sse_part,
- aom_highbd_get_u_sse_part, aom_highbd_get_v_sse_part,
-};
-
-static int64_t sse_restoration_unit(const RestorationTileLimits *limits,
- const YV12_BUFFER_CONFIG *src,
- const YV12_BUFFER_CONFIG *dst, int plane,
- int highbd) {
- return sse_part_extractors[3 * highbd + plane](
- src, dst, limits->h_start, limits->h_end - limits->h_start,
- limits->v_start, limits->v_end - limits->v_start);
-}
-
-typedef struct {
- // The best coefficients for Wiener or Sgrproj restoration
- WienerInfo wiener;
- SgrprojInfo sgrproj;
-
- // The sum of squared errors for this rtype.
- int64_t sse[RESTORE_SWITCHABLE_TYPES];
-
- // The rtype to use for this unit given a frame rtype as
- // index. Indices: WIENER, SGRPROJ, SWITCHABLE.
- RestorationType best_rtype[RESTORE_TYPES - 1];
-} RestUnitSearchInfo;
-
-typedef struct {
- const YV12_BUFFER_CONFIG *src;
- YV12_BUFFER_CONFIG *dst;
-
- const AV1_COMMON *cm;
- const MACROBLOCK *x;
- int plane;
- int plane_width;
- int plane_height;
- RestUnitSearchInfo *rusi;
-
- // Speed features
- const SPEED_FEATURES *sf;
-
- uint8_t *dgd_buffer;
- int dgd_stride;
- const uint8_t *src_buffer;
- int src_stride;
-
- // sse and bits are initialised by reset_rsc in search_rest_type
- int64_t sse;
- int64_t bits;
- int tile_y0, tile_stripe0;
-
- // sgrproj and wiener are initialised by rsc_on_tile when starting the first
- // tile in the frame.
- SgrprojInfo sgrproj;
- WienerInfo wiener;
- AV1PixelRect tile_rect;
-} RestSearchCtxt;
-
-static void rsc_on_tile(int tile_row, int tile_col, void *priv) {
- (void)tile_col;
-
- RestSearchCtxt *rsc = (RestSearchCtxt *)priv;
- set_default_sgrproj(&rsc->sgrproj);
- set_default_wiener(&rsc->wiener);
-
- rsc->tile_stripe0 =
- (tile_row == 0) ? 0 : rsc->cm->rst_end_stripe[tile_row - 1];
-}
-
-static void reset_rsc(RestSearchCtxt *rsc) {
- rsc->sse = 0;
- rsc->bits = 0;
-}
-
-static void init_rsc(const YV12_BUFFER_CONFIG *src, const AV1_COMMON *cm,
- const MACROBLOCK *x, const SPEED_FEATURES *sf, int plane,
- RestUnitSearchInfo *rusi, YV12_BUFFER_CONFIG *dst,
- RestSearchCtxt *rsc) {
- rsc->src = src;
- rsc->dst = dst;
- rsc->cm = cm;
- rsc->x = x;
- rsc->plane = plane;
- rsc->rusi = rusi;
- rsc->sf = sf;
-
- const YV12_BUFFER_CONFIG *dgd = cm->frame_to_show;
- const int is_uv = plane != AOM_PLANE_Y;
- rsc->plane_width = src->crop_widths[is_uv];
- rsc->plane_height = src->crop_heights[is_uv];
- rsc->src_buffer = src->buffers[plane];
- rsc->src_stride = src->strides[is_uv];
- rsc->dgd_buffer = dgd->buffers[plane];
- rsc->dgd_stride = dgd->strides[is_uv];
- rsc->tile_rect = av1_whole_frame_rect(cm, is_uv);
- assert(src->crop_widths[is_uv] == dgd->crop_widths[is_uv]);
- assert(src->crop_heights[is_uv] == dgd->crop_heights[is_uv]);
-}
-
-static int64_t try_restoration_unit(const RestSearchCtxt *rsc,
- const RestorationTileLimits *limits,
- const AV1PixelRect *tile_rect,
- const RestorationUnitInfo *rui) {
- const AV1_COMMON *const cm = rsc->cm;
- const int plane = rsc->plane;
- const int is_uv = plane > 0;
- const RestorationInfo *rsi = &cm->rst_info[plane];
- RestorationLineBuffers rlbs;
- const int bit_depth = cm->seq_params.bit_depth;
- const int highbd = cm->seq_params.use_highbitdepth;
-
- const YV12_BUFFER_CONFIG *fts = cm->frame_to_show;
- // TODO(yunqing): For now, only use optimized LR filter in decoder. Can be
- // also used in encoder.
- const int optimized_lr = 0;
-
- av1_loop_restoration_filter_unit(
- limits, rui, &rsi->boundaries, &rlbs, tile_rect, rsc->tile_stripe0,
- is_uv && cm->seq_params.subsampling_x,
- is_uv && cm->seq_params.subsampling_y, highbd, bit_depth,
- fts->buffers[plane], fts->strides[is_uv], rsc->dst->buffers[plane],
- rsc->dst->strides[is_uv], cm->rst_tmpbuf, optimized_lr);
-
- return sse_restoration_unit(limits, rsc->src, rsc->dst, plane, highbd);
-}
-
-int64_t av1_lowbd_pixel_proj_error_c(const uint8_t *src8, int width, int height,
- int src_stride, const uint8_t *dat8,
- int dat_stride, int32_t *flt0,
- int flt0_stride, int32_t *flt1,
- int flt1_stride, int xq[2],
- const sgr_params_type *params) {
- int i, j;
- const uint8_t *src = src8;
- const uint8_t *dat = dat8;
- int64_t err = 0;
- if (params->r[0] > 0 && params->r[1] > 0) {
- for (i = 0; i < height; ++i) {
- for (j = 0; j < width; ++j) {
- assert(flt1[j] < (1 << 15) && flt1[j] > -(1 << 15));
- assert(flt0[j] < (1 << 15) && flt0[j] > -(1 << 15));
- const int32_t u = (int32_t)(dat[j] << SGRPROJ_RST_BITS);
- int32_t v = u << SGRPROJ_PRJ_BITS;
- v += xq[0] * (flt0[j] - u) + xq[1] * (flt1[j] - u);
- const int32_t e =
- ROUND_POWER_OF_TWO(v, SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS) - src[j];
- err += e * e;
- }
- dat += dat_stride;
- src += src_stride;
- flt0 += flt0_stride;
- flt1 += flt1_stride;
- }
- } else if (params->r[0] > 0) {
- for (i = 0; i < height; ++i) {
- for (j = 0; j < width; ++j) {
- assert(flt0[j] < (1 << 15) && flt0[j] > -(1 << 15));
- const int32_t u = (int32_t)(dat[j] << SGRPROJ_RST_BITS);
- int32_t v = u << SGRPROJ_PRJ_BITS;
- v += xq[0] * (flt0[j] - u);
- const int32_t e =
- ROUND_POWER_OF_TWO(v, SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS) - src[j];
- err += e * e;
- }
- dat += dat_stride;
- src += src_stride;
- flt0 += flt0_stride;
- }
- } else if (params->r[1] > 0) {
- for (i = 0; i < height; ++i) {
- for (j = 0; j < width; ++j) {
- assert(flt1[j] < (1 << 15) && flt1[j] > -(1 << 15));
- const int32_t u = (int32_t)(dat[j] << SGRPROJ_RST_BITS);
- int32_t v = u << SGRPROJ_PRJ_BITS;
- v += xq[1] * (flt1[j] - u);
- const int32_t e =
- ROUND_POWER_OF_TWO(v, SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS) - src[j];
- err += e * e;
- }
- dat += dat_stride;
- src += src_stride;
- flt1 += flt1_stride;
- }
- } else {
- for (i = 0; i < height; ++i) {
- for (j = 0; j < width; ++j) {
- const int32_t e = (int32_t)(dat[j]) - src[j];
- err += e * e;
- }
- dat += dat_stride;
- src += src_stride;
- }
- }
-
- return err;
-}
-
-static int64_t get_pixel_proj_error(const uint8_t *src8, int width, int height,
- int src_stride, const uint8_t *dat8,
- int dat_stride, int use_highbitdepth,
- int32_t *flt0, int flt0_stride,
- int32_t *flt1, int flt1_stride, int *xqd,
- const sgr_params_type *params) {
- int i, j;
- int64_t err = 0;
- int xq[2];
- decode_xq(xqd, xq, params);
- if (!use_highbitdepth) {
- err = av1_lowbd_pixel_proj_error(src8, width, height, src_stride, dat8,
- dat_stride, flt0, flt0_stride, flt1,
- flt1_stride, xq, params);
- } else {
- const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- const uint16_t *dat = CONVERT_TO_SHORTPTR(dat8);
- const int32_t half = 1 << (SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS - 1);
- if (params->r[0] > 0 && params->r[1] > 0) {
- int xq0 = xq[0];
- int xq1 = xq[1];
- for (i = 0; i < height; ++i) {
- for (j = 0; j < width; ++j) {
- const int32_t d = dat[j];
- const int32_t s = src[j];
- const int32_t u = (int32_t)(d << SGRPROJ_RST_BITS);
- int32_t v0 = flt0[j] - u;
- int32_t v1 = flt1[j] - u;
- int32_t v = half;
- v += xq0 * v0;
- v += xq1 * v1;
- const int32_t e =
- (v >> (SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS)) + d - s;
- err += e * e;
- }
- dat += dat_stride;
- flt0 += flt0_stride;
- flt1 += flt1_stride;
- src += src_stride;
- }
- } else if (params->r[0] > 0 || params->r[1] > 0) {
- int exq;
- int32_t *flt;
- int flt_stride;
- if (params->r[0] > 0) {
- exq = xq[0];
- flt = flt0;
- flt_stride = flt0_stride;
- } else {
- exq = xq[1];
- flt = flt1;
- flt_stride = flt1_stride;
- }
- for (i = 0; i < height; ++i) {
- for (j = 0; j < width; ++j) {
- const int32_t d = dat[j];
- const int32_t s = src[j];
- const int32_t u = (int32_t)(d << SGRPROJ_RST_BITS);
- int32_t v = half;
- v += exq * (flt[j] - u);
- const int32_t e =
- (v >> (SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS)) + d - s;
- err += e * e;
- }
- dat += dat_stride;
- flt += flt_stride;
- src += src_stride;
- }
- } else {
- for (i = 0; i < height; ++i) {
- for (j = 0; j < width; ++j) {
- const int32_t d = dat[j];
- const int32_t s = src[j];
- const int32_t e = d - s;
- err += e * e;
- }
- dat += dat_stride;
- src += src_stride;
- }
- }
- }
- return err;
-}
-
-#define USE_SGRPROJ_REFINEMENT_SEARCH 1
-static int64_t finer_search_pixel_proj_error(
- const uint8_t *src8, int width, int height, int src_stride,
- const uint8_t *dat8, int dat_stride, int use_highbitdepth, int32_t *flt0,
- int flt0_stride, int32_t *flt1, int flt1_stride, int start_step, int *xqd,
- const sgr_params_type *params) {
- int64_t err = get_pixel_proj_error(
- src8, width, height, src_stride, dat8, dat_stride, use_highbitdepth, flt0,
- flt0_stride, flt1, flt1_stride, xqd, params);
- (void)start_step;
-#if USE_SGRPROJ_REFINEMENT_SEARCH
- int64_t err2;
- int tap_min[] = { SGRPROJ_PRJ_MIN0, SGRPROJ_PRJ_MIN1 };
- int tap_max[] = { SGRPROJ_PRJ_MAX0, SGRPROJ_PRJ_MAX1 };
- for (int s = start_step; s >= 1; s >>= 1) {
- for (int p = 0; p < 2; ++p) {
- if ((params->r[0] == 0 && p == 0) || (params->r[1] == 0 && p == 1)) {
- continue;
- }
- int skip = 0;
- do {
- if (xqd[p] - s >= tap_min[p]) {
- xqd[p] -= s;
- err2 =
- get_pixel_proj_error(src8, width, height, src_stride, dat8,
- dat_stride, use_highbitdepth, flt0,
- flt0_stride, flt1, flt1_stride, xqd, params);
- if (err2 > err) {
- xqd[p] += s;
- } else {
- err = err2;
- skip = 1;
- // At the highest step size continue moving in the same direction
- if (s == start_step) continue;
- }
- }
- break;
- } while (1);
- if (skip) break;
- do {
- if (xqd[p] + s <= tap_max[p]) {
- xqd[p] += s;
- err2 =
- get_pixel_proj_error(src8, width, height, src_stride, dat8,
- dat_stride, use_highbitdepth, flt0,
- flt0_stride, flt1, flt1_stride, xqd, params);
- if (err2 > err) {
- xqd[p] -= s;
- } else {
- err = err2;
- // At the highest step size continue moving in the same direction
- if (s == start_step) continue;
- }
- }
- break;
- } while (1);
- }
- }
-#endif // USE_SGRPROJ_REFINEMENT_SEARCH
- return err;
-}
-
-static void get_proj_subspace(const uint8_t *src8, int width, int height,
- int src_stride, const uint8_t *dat8,
- int dat_stride, int use_highbitdepth,
- int32_t *flt0, int flt0_stride, int32_t *flt1,
- int flt1_stride, int *xq,
- const sgr_params_type *params) {
- int i, j;
- double H[2][2] = { { 0, 0 }, { 0, 0 } };
- double C[2] = { 0, 0 };
- double Det;
- double x[2];
- const int size = width * height;
-
- aom_clear_system_state();
-
- // Default
- xq[0] = 0;
- xq[1] = 0;
- if (!use_highbitdepth) {
- const uint8_t *src = src8;
- const uint8_t *dat = dat8;
- for (i = 0; i < height; ++i) {
- for (j = 0; j < width; ++j) {
- const double u = (double)(dat[i * dat_stride + j] << SGRPROJ_RST_BITS);
- const double s =
- (double)(src[i * src_stride + j] << SGRPROJ_RST_BITS) - u;
- const double f1 =
- (params->r[0] > 0) ? (double)flt0[i * flt0_stride + j] - u : 0;
- const double f2 =
- (params->r[1] > 0) ? (double)flt1[i * flt1_stride + j] - u : 0;
- H[0][0] += f1 * f1;
- H[1][1] += f2 * f2;
- H[0][1] += f1 * f2;
- C[0] += f1 * s;
- C[1] += f2 * s;
- }
- }
- } else {
- const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- const uint16_t *dat = CONVERT_TO_SHORTPTR(dat8);
- for (i = 0; i < height; ++i) {
- for (j = 0; j < width; ++j) {
- const double u = (double)(dat[i * dat_stride + j] << SGRPROJ_RST_BITS);
- const double s =
- (double)(src[i * src_stride + j] << SGRPROJ_RST_BITS) - u;
- const double f1 =
- (params->r[0] > 0) ? (double)flt0[i * flt0_stride + j] - u : 0;
- const double f2 =
- (params->r[1] > 0) ? (double)flt1[i * flt1_stride + j] - u : 0;
- H[0][0] += f1 * f1;
- H[1][1] += f2 * f2;
- H[0][1] += f1 * f2;
- C[0] += f1 * s;
- C[1] += f2 * s;
- }
- }
- }
- H[0][0] /= size;
- H[0][1] /= size;
- H[1][1] /= size;
- H[1][0] = H[0][1];
- C[0] /= size;
- C[1] /= size;
- if (params->r[0] == 0) {
- // H matrix is now only the scalar H[1][1]
- // C vector is now only the scalar C[1]
- Det = H[1][1];
- if (Det < 1e-8) return; // ill-posed, return default values
- x[0] = 0;
- x[1] = C[1] / Det;
-
- xq[0] = 0;
- xq[1] = (int)rint(x[1] * (1 << SGRPROJ_PRJ_BITS));
- } else if (params->r[1] == 0) {
- // H matrix is now only the scalar H[0][0]
- // C vector is now only the scalar C[0]
- Det = H[0][0];
- if (Det < 1e-8) return; // ill-posed, return default values
- x[0] = C[0] / Det;
- x[1] = 0;
-
- xq[0] = (int)rint(x[0] * (1 << SGRPROJ_PRJ_BITS));
- xq[1] = 0;
- } else {
- Det = (H[0][0] * H[1][1] - H[0][1] * H[1][0]);
- if (Det < 1e-8) return; // ill-posed, return default values
- x[0] = (H[1][1] * C[0] - H[0][1] * C[1]) / Det;
- x[1] = (H[0][0] * C[1] - H[1][0] * C[0]) / Det;
-
- xq[0] = (int)rint(x[0] * (1 << SGRPROJ_PRJ_BITS));
- xq[1] = (int)rint(x[1] * (1 << SGRPROJ_PRJ_BITS));
- }
-}
-
-void encode_xq(int *xq, int *xqd, const sgr_params_type *params) {
- if (params->r[0] == 0) {
- xqd[0] = 0;
- xqd[1] = clamp((1 << SGRPROJ_PRJ_BITS) - xq[1], SGRPROJ_PRJ_MIN1,
- SGRPROJ_PRJ_MAX1);
- } else if (params->r[1] == 0) {
- xqd[0] = clamp(xq[0], SGRPROJ_PRJ_MIN0, SGRPROJ_PRJ_MAX0);
- xqd[1] = clamp((1 << SGRPROJ_PRJ_BITS) - xqd[0], SGRPROJ_PRJ_MIN1,
- SGRPROJ_PRJ_MAX1);
- } else {
- xqd[0] = clamp(xq[0], SGRPROJ_PRJ_MIN0, SGRPROJ_PRJ_MAX0);
- xqd[1] = clamp((1 << SGRPROJ_PRJ_BITS) - xqd[0] - xq[1], SGRPROJ_PRJ_MIN1,
- SGRPROJ_PRJ_MAX1);
- }
-}
-
-// Apply the self-guided filter across an entire restoration unit.
-static void apply_sgr(int sgr_params_idx, const uint8_t *dat8, int width,
- int height, int dat_stride, int use_highbd, int bit_depth,
- int pu_width, int pu_height, int32_t *flt0, int32_t *flt1,
- int flt_stride) {
- for (int i = 0; i < height; i += pu_height) {
- const int h = AOMMIN(pu_height, height - i);
- int32_t *flt0_row = flt0 + i * flt_stride;
- int32_t *flt1_row = flt1 + i * flt_stride;
- const uint8_t *dat8_row = dat8 + i * dat_stride;
-
- // Iterate over the stripe in blocks of width pu_width
- for (int j = 0; j < width; j += pu_width) {
- const int w = AOMMIN(pu_width, width - j);
- const int ret = av1_selfguided_restoration(
- dat8_row + j, w, h, dat_stride, flt0_row + j, flt1_row + j,
- flt_stride, sgr_params_idx, bit_depth, use_highbd);
- (void)ret;
- assert(!ret);
- }
- }
-}
-
-static SgrprojInfo search_selfguided_restoration(
- const uint8_t *dat8, int width, int height, int dat_stride,
- const uint8_t *src8, int src_stride, int use_highbitdepth, int bit_depth,
- int pu_width, int pu_height, int32_t *rstbuf) {
- int32_t *flt0 = rstbuf;
- int32_t *flt1 = flt0 + RESTORATION_UNITPELS_MAX;
- int ep, bestep = 0;
- int64_t besterr = -1;
- int exqd[2], bestxqd[2] = { 0, 0 };
- int flt_stride = ((width + 7) & ~7) + 8;
- assert(pu_width == (RESTORATION_PROC_UNIT_SIZE >> 1) ||
- pu_width == RESTORATION_PROC_UNIT_SIZE);
- assert(pu_height == (RESTORATION_PROC_UNIT_SIZE >> 1) ||
- pu_height == RESTORATION_PROC_UNIT_SIZE);
-
- for (ep = 0; ep < SGRPROJ_PARAMS; ep++) {
- int exq[2];
- apply_sgr(ep, dat8, width, height, dat_stride, use_highbitdepth, bit_depth,
- pu_width, pu_height, flt0, flt1, flt_stride);
- aom_clear_system_state();
- const sgr_params_type *const params = &sgr_params[ep];
- get_proj_subspace(src8, width, height, src_stride, dat8, dat_stride,
- use_highbitdepth, flt0, flt_stride, flt1, flt_stride, exq,
- params);
- aom_clear_system_state();
- encode_xq(exq, exqd, params);
- int64_t err = finer_search_pixel_proj_error(
- src8, width, height, src_stride, dat8, dat_stride, use_highbitdepth,
- flt0, flt_stride, flt1, flt_stride, 2, exqd, params);
- if (besterr == -1 || err < besterr) {
- bestep = ep;
- besterr = err;
- bestxqd[0] = exqd[0];
- bestxqd[1] = exqd[1];
- }
- }
-
- SgrprojInfo ret;
- ret.ep = bestep;
- ret.xqd[0] = bestxqd[0];
- ret.xqd[1] = bestxqd[1];
- return ret;
-}
-
-static int count_sgrproj_bits(SgrprojInfo *sgrproj_info,
- SgrprojInfo *ref_sgrproj_info) {
- int bits = SGRPROJ_PARAMS_BITS;
- const sgr_params_type *params = &sgr_params[sgrproj_info->ep];
- if (params->r[0] > 0)
- bits += aom_count_primitive_refsubexpfin(
- SGRPROJ_PRJ_MAX0 - SGRPROJ_PRJ_MIN0 + 1, SGRPROJ_PRJ_SUBEXP_K,
- ref_sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0,
- sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0);
- if (params->r[1] > 0)
- bits += aom_count_primitive_refsubexpfin(
- SGRPROJ_PRJ_MAX1 - SGRPROJ_PRJ_MIN1 + 1, SGRPROJ_PRJ_SUBEXP_K,
- ref_sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1,
- sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1);
- return bits;
-}
-
-static void search_sgrproj(const RestorationTileLimits *limits,
- const AV1PixelRect *tile, int rest_unit_idx,
- void *priv, int32_t *tmpbuf,
- RestorationLineBuffers *rlbs) {
- (void)rlbs;
- RestSearchCtxt *rsc = (RestSearchCtxt *)priv;
- RestUnitSearchInfo *rusi = &rsc->rusi[rest_unit_idx];
-
- const MACROBLOCK *const x = rsc->x;
- const AV1_COMMON *const cm = rsc->cm;
- const int highbd = cm->seq_params.use_highbitdepth;
- const int bit_depth = cm->seq_params.bit_depth;
-
- uint8_t *dgd_start =
- rsc->dgd_buffer + limits->v_start * rsc->dgd_stride + limits->h_start;
- const uint8_t *src_start =
- rsc->src_buffer + limits->v_start * rsc->src_stride + limits->h_start;
-
- const int is_uv = rsc->plane > 0;
- const int ss_x = is_uv && cm->seq_params.subsampling_x;
- const int ss_y = is_uv && cm->seq_params.subsampling_y;
- const int procunit_width = RESTORATION_PROC_UNIT_SIZE >> ss_x;
- const int procunit_height = RESTORATION_PROC_UNIT_SIZE >> ss_y;
-
- rusi->sgrproj = search_selfguided_restoration(
- dgd_start, limits->h_end - limits->h_start,
- limits->v_end - limits->v_start, rsc->dgd_stride, src_start,
- rsc->src_stride, highbd, bit_depth, procunit_width, procunit_height,
- tmpbuf);
-
- RestorationUnitInfo rui;
- rui.restoration_type = RESTORE_SGRPROJ;
- rui.sgrproj_info = rusi->sgrproj;
-
- rusi->sse[RESTORE_SGRPROJ] = try_restoration_unit(rsc, limits, tile, &rui);
-
- const int64_t bits_none = x->sgrproj_restore_cost[0];
- const int64_t bits_sgr = x->sgrproj_restore_cost[1] +
- (count_sgrproj_bits(&rusi->sgrproj, &rsc->sgrproj)
- << AV1_PROB_COST_SHIFT);
-
- double cost_none =
- RDCOST_DBL(x->rdmult, bits_none >> 4, rusi->sse[RESTORE_NONE]);
- double cost_sgr =
- RDCOST_DBL(x->rdmult, bits_sgr >> 4, rusi->sse[RESTORE_SGRPROJ]);
- if (rusi->sgrproj.ep < 10)
- cost_sgr *= (1 + DUAL_SGR_PENALTY_MULT * rsc->sf->dual_sgr_penalty_level);
-
- RestorationType rtype =
- (cost_sgr < cost_none) ? RESTORE_SGRPROJ : RESTORE_NONE;
- rusi->best_rtype[RESTORE_SGRPROJ - 1] = rtype;
-
- rsc->sse += rusi->sse[rtype];
- rsc->bits += (cost_sgr < cost_none) ? bits_sgr : bits_none;
- if (cost_sgr < cost_none) rsc->sgrproj = rusi->sgrproj;
-}
-
-void av1_compute_stats_c(int wiener_win, const uint8_t *dgd, const uint8_t *src,
- int h_start, int h_end, int v_start, int v_end,
- int dgd_stride, int src_stride, double *M, double *H) {
- int i, j, k, l;
- double Y[WIENER_WIN2];
- const int wiener_win2 = wiener_win * wiener_win;
- const int wiener_halfwin = (wiener_win >> 1);
- const double avg =
- find_average(dgd, h_start, h_end, v_start, v_end, dgd_stride);
-
- memset(M, 0, sizeof(*M) * wiener_win2);
- memset(H, 0, sizeof(*H) * wiener_win2 * wiener_win2);
- for (i = v_start; i < v_end; i++) {
- for (j = h_start; j < h_end; j++) {
- const double X = (double)src[i * src_stride + j] - avg;
- int idx = 0;
- for (k = -wiener_halfwin; k <= wiener_halfwin; k++) {
- for (l = -wiener_halfwin; l <= wiener_halfwin; l++) {
- Y[idx] = (double)dgd[(i + l) * dgd_stride + (j + k)] - avg;
- idx++;
- }
- }
- assert(idx == wiener_win2);
- for (k = 0; k < wiener_win2; ++k) {
- M[k] += Y[k] * X;
- for (l = k; l < wiener_win2; ++l) {
- // H is a symmetric matrix, so we only need to fill out the upper
- // triangle here. We can copy it down to the lower triangle outside
- // the (i, j) loops.
- H[k * wiener_win2 + l] += Y[k] * Y[l];
- }
- }
- }
- }
- for (k = 0; k < wiener_win2; ++k) {
- for (l = k + 1; l < wiener_win2; ++l) {
- H[l * wiener_win2 + k] = H[k * wiener_win2 + l];
- }
- }
-}
-
-static double find_average_highbd(const uint16_t *src, int h_start, int h_end,
- int v_start, int v_end, int stride) {
- uint64_t sum = 0;
- double avg = 0;
- int i, j;
- aom_clear_system_state();
- for (i = v_start; i < v_end; i++)
- for (j = h_start; j < h_end; j++) sum += src[i * stride + j];
- avg = (double)sum / ((v_end - v_start) * (h_end - h_start));
- return avg;
-}
-
-static AOM_FORCE_INLINE void compute_stats_highbd(
- int wiener_win, const uint8_t *dgd8, const uint8_t *src8, int h_start,
- int h_end, int v_start, int v_end, int dgd_stride, int src_stride,
- double *M, double *H) {
- int i, j, k, l;
- double Y[WIENER_WIN2];
- const int wiener_win2 = wiener_win * wiener_win;
- const int wiener_halfwin = (wiener_win >> 1);
- const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- const uint16_t *dgd = CONVERT_TO_SHORTPTR(dgd8);
- const double avg =
- find_average_highbd(dgd, h_start, h_end, v_start, v_end, dgd_stride);
-
- memset(M, 0, sizeof(*M) * wiener_win2);
- memset(H, 0, sizeof(*H) * wiener_win2 * wiener_win2);
- for (i = v_start; i < v_end; i++) {
- for (j = h_start; j < h_end; j++) {
- const double X = (double)src[i * src_stride + j] - avg;
- int idx = 0;
- for (k = -wiener_halfwin; k <= wiener_halfwin; k++) {
- for (l = -wiener_halfwin; l <= wiener_halfwin; l++) {
- Y[idx] = (double)dgd[(i + l) * dgd_stride + (j + k)] - avg;
- idx++;
- }
- }
- assert(idx == wiener_win2);
- for (k = 0; k < wiener_win2; ++k) {
- double Yk = Y[k];
- M[k] += Yk * X;
- double *H2 = &H[k * wiener_win2];
- H2[k] += Yk * Yk;
- for (l = k + 1; l < wiener_win2; ++l) {
- // H is a symmetric matrix, so we only need to fill out the upper
- // triangle here. We can copy it down to the lower triangle outside
- // the (i, j) loops.
- H2[l] += Yk * Y[l];
- }
- }
- }
- }
- for (k = 0; k < wiener_win2; ++k) {
- for (l = k + 1; l < wiener_win2; ++l) {
- H[l * wiener_win2 + k] = H[k * wiener_win2 + l];
- }
- }
-}
-
-static INLINE int wrap_index(int i, int wiener_win) {
- const int wiener_halfwin1 = (wiener_win >> 1) + 1;
- return (i >= wiener_halfwin1 ? wiener_win - 1 - i : i);
-}
-
-// Fix vector b, update vector a
-static void update_a_sep_sym(int wiener_win, double **Mc, double **Hc,
- double *a, double *b) {
- int i, j;
- double S[WIENER_WIN];
- double A[WIENER_HALFWIN1], B[WIENER_HALFWIN1 * WIENER_HALFWIN1];
- const int wiener_win2 = wiener_win * wiener_win;
- const int wiener_halfwin1 = (wiener_win >> 1) + 1;
- memset(A, 0, sizeof(A));
- memset(B, 0, sizeof(B));
- for (i = 0; i < wiener_win; i++) {
- for (j = 0; j < wiener_win; ++j) {
- const int jj = wrap_index(j, wiener_win);
- A[jj] += Mc[i][j] * b[i];
- }
- }
- for (i = 0; i < wiener_win; i++) {
- for (j = 0; j < wiener_win; j++) {
- int k, l;
- for (k = 0; k < wiener_win; ++k)
- for (l = 0; l < wiener_win; ++l) {
- const int kk = wrap_index(k, wiener_win);
- const int ll = wrap_index(l, wiener_win);
- B[ll * wiener_halfwin1 + kk] +=
- Hc[j * wiener_win + i][k * wiener_win2 + l] * b[i] * b[j];
- }
- }
- }
- // Normalization enforcement in the system of equations itself
- for (i = 0; i < wiener_halfwin1 - 1; ++i)
- A[i] -=
- A[wiener_halfwin1 - 1] * 2 +
- B[i * wiener_halfwin1 + wiener_halfwin1 - 1] -
- 2 * B[(wiener_halfwin1 - 1) * wiener_halfwin1 + (wiener_halfwin1 - 1)];
- for (i = 0; i < wiener_halfwin1 - 1; ++i)
- for (j = 0; j < wiener_halfwin1 - 1; ++j)
- B[i * wiener_halfwin1 + j] -=
- 2 * (B[i * wiener_halfwin1 + (wiener_halfwin1 - 1)] +
- B[(wiener_halfwin1 - 1) * wiener_halfwin1 + j] -
- 2 * B[(wiener_halfwin1 - 1) * wiener_halfwin1 +
- (wiener_halfwin1 - 1)]);
- if (linsolve(wiener_halfwin1 - 1, B, wiener_halfwin1, A, S)) {
- S[wiener_halfwin1 - 1] = 1.0;
- for (i = wiener_halfwin1; i < wiener_win; ++i) {
- S[i] = S[wiener_win - 1 - i];
- S[wiener_halfwin1 - 1] -= 2 * S[i];
- }
- memcpy(a, S, wiener_win * sizeof(*a));
- }
-}
-
-// Fix vector a, update vector b
-static void update_b_sep_sym(int wiener_win, double **Mc, double **Hc,
- double *a, double *b) {
- int i, j;
- double S[WIENER_WIN];
- double A[WIENER_HALFWIN1], B[WIENER_HALFWIN1 * WIENER_HALFWIN1];
- const int wiener_win2 = wiener_win * wiener_win;
- const int wiener_halfwin1 = (wiener_win >> 1) + 1;
- memset(A, 0, sizeof(A));
- memset(B, 0, sizeof(B));
- for (i = 0; i < wiener_win; i++) {
- const int ii = wrap_index(i, wiener_win);
- for (j = 0; j < wiener_win; j++) A[ii] += Mc[i][j] * a[j];
- }
-
- for (i = 0; i < wiener_win; i++) {
- for (j = 0; j < wiener_win; j++) {
- const int ii = wrap_index(i, wiener_win);
- const int jj = wrap_index(j, wiener_win);
- int k, l;
- for (k = 0; k < wiener_win; ++k)
- for (l = 0; l < wiener_win; ++l)
- B[jj * wiener_halfwin1 + ii] +=
- Hc[i * wiener_win + j][k * wiener_win2 + l] * a[k] * a[l];
- }
- }
- // Normalization enforcement in the system of equations itself
- for (i = 0; i < wiener_halfwin1 - 1; ++i)
- A[i] -=
- A[wiener_halfwin1 - 1] * 2 +
- B[i * wiener_halfwin1 + wiener_halfwin1 - 1] -
- 2 * B[(wiener_halfwin1 - 1) * wiener_halfwin1 + (wiener_halfwin1 - 1)];
- for (i = 0; i < wiener_halfwin1 - 1; ++i)
- for (j = 0; j < wiener_halfwin1 - 1; ++j)
- B[i * wiener_halfwin1 + j] -=
- 2 * (B[i * wiener_halfwin1 + (wiener_halfwin1 - 1)] +
- B[(wiener_halfwin1 - 1) * wiener_halfwin1 + j] -
- 2 * B[(wiener_halfwin1 - 1) * wiener_halfwin1 +
- (wiener_halfwin1 - 1)]);
- if (linsolve(wiener_halfwin1 - 1, B, wiener_halfwin1, A, S)) {
- S[wiener_halfwin1 - 1] = 1.0;
- for (i = wiener_halfwin1; i < wiener_win; ++i) {
- S[i] = S[wiener_win - 1 - i];
- S[wiener_halfwin1 - 1] -= 2 * S[i];
- }
- memcpy(b, S, wiener_win * sizeof(*b));
- }
-}
-
-static int wiener_decompose_sep_sym(int wiener_win, double *M, double *H,
- double *a, double *b) {
- static const int init_filt[WIENER_WIN] = {
- WIENER_FILT_TAP0_MIDV, WIENER_FILT_TAP1_MIDV, WIENER_FILT_TAP2_MIDV,
- WIENER_FILT_TAP3_MIDV, WIENER_FILT_TAP2_MIDV, WIENER_FILT_TAP1_MIDV,
- WIENER_FILT_TAP0_MIDV,
- };
- double *Hc[WIENER_WIN2];
- double *Mc[WIENER_WIN];
- int i, j, iter;
- const int plane_off = (WIENER_WIN - wiener_win) >> 1;
- const int wiener_win2 = wiener_win * wiener_win;
- for (i = 0; i < wiener_win; i++) {
- a[i] = b[i] = (double)init_filt[i + plane_off] / WIENER_FILT_STEP;
- }
- for (i = 0; i < wiener_win; i++) {
- Mc[i] = M + i * wiener_win;
- for (j = 0; j < wiener_win; j++) {
- Hc[i * wiener_win + j] =
- H + i * wiener_win * wiener_win2 + j * wiener_win;
- }
- }
-
- iter = 1;
- while (iter < NUM_WIENER_ITERS) {
- update_a_sep_sym(wiener_win, Mc, Hc, a, b);
- update_b_sep_sym(wiener_win, Mc, Hc, a, b);
- iter++;
- }
- return 1;
-}
-
-// Computes the function x'*H*x - x'*M for the learned 2D filter x, and compares
-// against identity filters; Final score is defined as the difference between
-// the function values
-static double compute_score(int wiener_win, double *M, double *H,
- InterpKernel vfilt, InterpKernel hfilt) {
- double ab[WIENER_WIN * WIENER_WIN];
- int i, k, l;
- double P = 0, Q = 0;
- double iP = 0, iQ = 0;
- double Score, iScore;
- double a[WIENER_WIN], b[WIENER_WIN];
- const int plane_off = (WIENER_WIN - wiener_win) >> 1;
- const int wiener_win2 = wiener_win * wiener_win;
-
- aom_clear_system_state();
-
- a[WIENER_HALFWIN] = b[WIENER_HALFWIN] = 1.0;
- for (i = 0; i < WIENER_HALFWIN; ++i) {
- a[i] = a[WIENER_WIN - i - 1] = (double)vfilt[i] / WIENER_FILT_STEP;
- b[i] = b[WIENER_WIN - i - 1] = (double)hfilt[i] / WIENER_FILT_STEP;
- a[WIENER_HALFWIN] -= 2 * a[i];
- b[WIENER_HALFWIN] -= 2 * b[i];
- }
- memset(ab, 0, sizeof(ab));
- for (k = 0; k < wiener_win; ++k) {
- for (l = 0; l < wiener_win; ++l)
- ab[k * wiener_win + l] = a[l + plane_off] * b[k + plane_off];
- }
- for (k = 0; k < wiener_win2; ++k) {
- P += ab[k] * M[k];
- for (l = 0; l < wiener_win2; ++l)
- Q += ab[k] * H[k * wiener_win2 + l] * ab[l];
- }
- Score = Q - 2 * P;
-
- iP = M[wiener_win2 >> 1];
- iQ = H[(wiener_win2 >> 1) * wiener_win2 + (wiener_win2 >> 1)];
- iScore = iQ - 2 * iP;
-
- return Score - iScore;
-}
-
-static void quantize_sym_filter(int wiener_win, double *f, InterpKernel fi) {
- int i;
- const int wiener_halfwin = (wiener_win >> 1);
- for (i = 0; i < wiener_halfwin; ++i) {
- fi[i] = RINT(f[i] * WIENER_FILT_STEP);
- }
- // Specialize for 7-tap filter
- if (wiener_win == WIENER_WIN) {
- fi[0] = CLIP(fi[0], WIENER_FILT_TAP0_MINV, WIENER_FILT_TAP0_MAXV);
- fi[1] = CLIP(fi[1], WIENER_FILT_TAP1_MINV, WIENER_FILT_TAP1_MAXV);
- fi[2] = CLIP(fi[2], WIENER_FILT_TAP2_MINV, WIENER_FILT_TAP2_MAXV);
- } else {
- fi[2] = CLIP(fi[1], WIENER_FILT_TAP2_MINV, WIENER_FILT_TAP2_MAXV);
- fi[1] = CLIP(fi[0], WIENER_FILT_TAP1_MINV, WIENER_FILT_TAP1_MAXV);
- fi[0] = 0;
- }
- // Satisfy filter constraints
- fi[WIENER_WIN - 1] = fi[0];
- fi[WIENER_WIN - 2] = fi[1];
- fi[WIENER_WIN - 3] = fi[2];
- // The central element has an implicit +WIENER_FILT_STEP
- fi[3] = -2 * (fi[0] + fi[1] + fi[2]);
-}
-
-static int count_wiener_bits(int wiener_win, WienerInfo *wiener_info,
- WienerInfo *ref_wiener_info) {
- int bits = 0;
- if (wiener_win == WIENER_WIN)
- bits += aom_count_primitive_refsubexpfin(
- WIENER_FILT_TAP0_MAXV - WIENER_FILT_TAP0_MINV + 1,
- WIENER_FILT_TAP0_SUBEXP_K,
- ref_wiener_info->vfilter[0] - WIENER_FILT_TAP0_MINV,
- wiener_info->vfilter[0] - WIENER_FILT_TAP0_MINV);
- bits += aom_count_primitive_refsubexpfin(
- WIENER_FILT_TAP1_MAXV - WIENER_FILT_TAP1_MINV + 1,
- WIENER_FILT_TAP1_SUBEXP_K,
- ref_wiener_info->vfilter[1] - WIENER_FILT_TAP1_MINV,
- wiener_info->vfilter[1] - WIENER_FILT_TAP1_MINV);
- bits += aom_count_primitive_refsubexpfin(
- WIENER_FILT_TAP2_MAXV - WIENER_FILT_TAP2_MINV + 1,
- WIENER_FILT_TAP2_SUBEXP_K,
- ref_wiener_info->vfilter[2] - WIENER_FILT_TAP2_MINV,
- wiener_info->vfilter[2] - WIENER_FILT_TAP2_MINV);
- if (wiener_win == WIENER_WIN)
- bits += aom_count_primitive_refsubexpfin(
- WIENER_FILT_TAP0_MAXV - WIENER_FILT_TAP0_MINV + 1,
- WIENER_FILT_TAP0_SUBEXP_K,
- ref_wiener_info->hfilter[0] - WIENER_FILT_TAP0_MINV,
- wiener_info->hfilter[0] - WIENER_FILT_TAP0_MINV);
- bits += aom_count_primitive_refsubexpfin(
- WIENER_FILT_TAP1_MAXV - WIENER_FILT_TAP1_MINV + 1,
- WIENER_FILT_TAP1_SUBEXP_K,
- ref_wiener_info->hfilter[1] - WIENER_FILT_TAP1_MINV,
- wiener_info->hfilter[1] - WIENER_FILT_TAP1_MINV);
- bits += aom_count_primitive_refsubexpfin(
- WIENER_FILT_TAP2_MAXV - WIENER_FILT_TAP2_MINV + 1,
- WIENER_FILT_TAP2_SUBEXP_K,
- ref_wiener_info->hfilter[2] - WIENER_FILT_TAP2_MINV,
- wiener_info->hfilter[2] - WIENER_FILT_TAP2_MINV);
- return bits;
-}
-
-#define USE_WIENER_REFINEMENT_SEARCH 1
-static int64_t finer_tile_search_wiener(const RestSearchCtxt *rsc,
- const RestorationTileLimits *limits,
- const AV1PixelRect *tile,
- RestorationUnitInfo *rui,
- int wiener_win) {
- const int plane_off = (WIENER_WIN - wiener_win) >> 1;
- int64_t err = try_restoration_unit(rsc, limits, tile, rui);
-#if USE_WIENER_REFINEMENT_SEARCH
- int64_t err2;
- int tap_min[] = { WIENER_FILT_TAP0_MINV, WIENER_FILT_TAP1_MINV,
- WIENER_FILT_TAP2_MINV };
- int tap_max[] = { WIENER_FILT_TAP0_MAXV, WIENER_FILT_TAP1_MAXV,
- WIENER_FILT_TAP2_MAXV };
-
- WienerInfo *plane_wiener = &rui->wiener_info;
-
- // printf("err pre = %"PRId64"\n", err);
- const int start_step = 4;
- for (int s = start_step; s >= 1; s >>= 1) {
- for (int p = plane_off; p < WIENER_HALFWIN; ++p) {
- int skip = 0;
- do {
- if (plane_wiener->hfilter[p] - s >= tap_min[p]) {
- plane_wiener->hfilter[p] -= s;
- plane_wiener->hfilter[WIENER_WIN - p - 1] -= s;
- plane_wiener->hfilter[WIENER_HALFWIN] += 2 * s;
- err2 = try_restoration_unit(rsc, limits, tile, rui);
- if (err2 > err) {
- plane_wiener->hfilter[p] += s;
- plane_wiener->hfilter[WIENER_WIN - p - 1] += s;
- plane_wiener->hfilter[WIENER_HALFWIN] -= 2 * s;
- } else {
- err = err2;
- skip = 1;
- // At the highest step size continue moving in the same direction
- if (s == start_step) continue;
- }
- }
- break;
- } while (1);
- if (skip) break;
- do {
- if (plane_wiener->hfilter[p] + s <= tap_max[p]) {
- plane_wiener->hfilter[p] += s;
- plane_wiener->hfilter[WIENER_WIN - p - 1] += s;
- plane_wiener->hfilter[WIENER_HALFWIN] -= 2 * s;
- err2 = try_restoration_unit(rsc, limits, tile, rui);
- if (err2 > err) {
- plane_wiener->hfilter[p] -= s;
- plane_wiener->hfilter[WIENER_WIN - p - 1] -= s;
- plane_wiener->hfilter[WIENER_HALFWIN] += 2 * s;
- } else {
- err = err2;
- // At the highest step size continue moving in the same direction
- if (s == start_step) continue;
- }
- }
- break;
- } while (1);
- }
- for (int p = plane_off; p < WIENER_HALFWIN; ++p) {
- int skip = 0;
- do {
- if (plane_wiener->vfilter[p] - s >= tap_min[p]) {
- plane_wiener->vfilter[p] -= s;
- plane_wiener->vfilter[WIENER_WIN - p - 1] -= s;
- plane_wiener->vfilter[WIENER_HALFWIN] += 2 * s;
- err2 = try_restoration_unit(rsc, limits, tile, rui);
- if (err2 > err) {
- plane_wiener->vfilter[p] += s;
- plane_wiener->vfilter[WIENER_WIN - p - 1] += s;
- plane_wiener->vfilter[WIENER_HALFWIN] -= 2 * s;
- } else {
- err = err2;
- skip = 1;
- // At the highest step size continue moving in the same direction
- if (s == start_step) continue;
- }
- }
- break;
- } while (1);
- if (skip) break;
- do {
- if (plane_wiener->vfilter[p] + s <= tap_max[p]) {
- plane_wiener->vfilter[p] += s;
- plane_wiener->vfilter[WIENER_WIN - p - 1] += s;
- plane_wiener->vfilter[WIENER_HALFWIN] -= 2 * s;
- err2 = try_restoration_unit(rsc, limits, tile, rui);
- if (err2 > err) {
- plane_wiener->vfilter[p] -= s;
- plane_wiener->vfilter[WIENER_WIN - p - 1] -= s;
- plane_wiener->vfilter[WIENER_HALFWIN] += 2 * s;
- } else {
- err = err2;
- // At the highest step size continue moving in the same direction
- if (s == start_step) continue;
- }
- }
- break;
- } while (1);
- }
- }
-// printf("err post = %"PRId64"\n", err);
-#endif // USE_WIENER_REFINEMENT_SEARCH
- return err;
-}
-
-static void search_wiener(const RestorationTileLimits *limits,
- const AV1PixelRect *tile_rect, int rest_unit_idx,
- void *priv, int32_t *tmpbuf,
- RestorationLineBuffers *rlbs) {
- (void)tmpbuf;
- (void)rlbs;
- RestSearchCtxt *rsc = (RestSearchCtxt *)priv;
- RestUnitSearchInfo *rusi = &rsc->rusi[rest_unit_idx];
-
- const int wiener_win =
- (rsc->plane == AOM_PLANE_Y) ? WIENER_WIN : WIENER_WIN_CHROMA;
-
- double M[WIENER_WIN2];
- double H[WIENER_WIN2 * WIENER_WIN2];
- double vfilterd[WIENER_WIN], hfilterd[WIENER_WIN];
-
- const AV1_COMMON *const cm = rsc->cm;
- if (cm->seq_params.use_highbitdepth) {
- compute_stats_highbd(wiener_win, rsc->dgd_buffer, rsc->src_buffer,
- limits->h_start, limits->h_end, limits->v_start,
- limits->v_end, rsc->dgd_stride, rsc->src_stride, M, H);
- } else {
- av1_compute_stats(wiener_win, rsc->dgd_buffer, rsc->src_buffer,
- limits->h_start, limits->h_end, limits->v_start,
- limits->v_end, rsc->dgd_stride, rsc->src_stride, M, H);
- }
-
- const MACROBLOCK *const x = rsc->x;
- const int64_t bits_none = x->wiener_restore_cost[0];
-
- if (!wiener_decompose_sep_sym(wiener_win, M, H, vfilterd, hfilterd)) {
- rsc->bits += bits_none;
- rsc->sse += rusi->sse[RESTORE_NONE];
- rusi->best_rtype[RESTORE_WIENER - 1] = RESTORE_NONE;
- rusi->sse[RESTORE_WIENER] = INT64_MAX;
- return;
- }
-
- RestorationUnitInfo rui;
- memset(&rui, 0, sizeof(rui));
- rui.restoration_type = RESTORE_WIENER;
- quantize_sym_filter(wiener_win, vfilterd, rui.wiener_info.vfilter);
- quantize_sym_filter(wiener_win, hfilterd, rui.wiener_info.hfilter);
-
- // Filter score computes the value of the function x'*A*x - x'*b for the
- // learned filter and compares it against identity filer. If there is no
- // reduction in the function, the filter is reverted back to identity
- if (compute_score(wiener_win, M, H, rui.wiener_info.vfilter,
- rui.wiener_info.hfilter) > 0) {
- rsc->bits += bits_none;
- rsc->sse += rusi->sse[RESTORE_NONE];
- rusi->best_rtype[RESTORE_WIENER - 1] = RESTORE_NONE;
- rusi->sse[RESTORE_WIENER] = INT64_MAX;
- return;
- }
-
- aom_clear_system_state();
-
- rusi->sse[RESTORE_WIENER] =
- finer_tile_search_wiener(rsc, limits, tile_rect, &rui, wiener_win);
- rusi->wiener = rui.wiener_info;
-
- if (wiener_win != WIENER_WIN) {
- assert(rui.wiener_info.vfilter[0] == 0 &&
- rui.wiener_info.vfilter[WIENER_WIN - 1] == 0);
- assert(rui.wiener_info.hfilter[0] == 0 &&
- rui.wiener_info.hfilter[WIENER_WIN - 1] == 0);
- }
-
- const int64_t bits_wiener =
- x->wiener_restore_cost[1] +
- (count_wiener_bits(wiener_win, &rusi->wiener, &rsc->wiener)
- << AV1_PROB_COST_SHIFT);
-
- double cost_none =
- RDCOST_DBL(x->rdmult, bits_none >> 4, rusi->sse[RESTORE_NONE]);
- double cost_wiener =
- RDCOST_DBL(x->rdmult, bits_wiener >> 4, rusi->sse[RESTORE_WIENER]);
-
- RestorationType rtype =
- (cost_wiener < cost_none) ? RESTORE_WIENER : RESTORE_NONE;
- rusi->best_rtype[RESTORE_WIENER - 1] = rtype;
-
- rsc->sse += rusi->sse[rtype];
- rsc->bits += (cost_wiener < cost_none) ? bits_wiener : bits_none;
- if (cost_wiener < cost_none) rsc->wiener = rusi->wiener;
-}
-
-static void search_norestore(const RestorationTileLimits *limits,
- const AV1PixelRect *tile_rect, int rest_unit_idx,
- void *priv, int32_t *tmpbuf,
- RestorationLineBuffers *rlbs) {
- (void)tile_rect;
- (void)tmpbuf;
- (void)rlbs;
-
- RestSearchCtxt *rsc = (RestSearchCtxt *)priv;
- RestUnitSearchInfo *rusi = &rsc->rusi[rest_unit_idx];
-
- const int highbd = rsc->cm->seq_params.use_highbitdepth;
- rusi->sse[RESTORE_NONE] = sse_restoration_unit(
- limits, rsc->src, rsc->cm->frame_to_show, rsc->plane, highbd);
-
- rsc->sse += rusi->sse[RESTORE_NONE];
-}
-
-static void search_switchable(const RestorationTileLimits *limits,
- const AV1PixelRect *tile_rect, int rest_unit_idx,
- void *priv, int32_t *tmpbuf,
- RestorationLineBuffers *rlbs) {
- (void)limits;
- (void)tile_rect;
- (void)tmpbuf;
- (void)rlbs;
- RestSearchCtxt *rsc = (RestSearchCtxt *)priv;
- RestUnitSearchInfo *rusi = &rsc->rusi[rest_unit_idx];
-
- const MACROBLOCK *const x = rsc->x;
-
- const int wiener_win =
- (rsc->plane == AOM_PLANE_Y) ? WIENER_WIN : WIENER_WIN_CHROMA;
-
- double best_cost = 0;
- int64_t best_bits = 0;
- RestorationType best_rtype = RESTORE_NONE;
-
- for (RestorationType r = 0; r < RESTORE_SWITCHABLE_TYPES; ++r) {
- // Check for the condition that wiener or sgrproj search could not
- // find a solution or the solution was worse than RESTORE_NONE.
- // In either case the best_rtype will be set as RESTORE_NONE. These
- // should be skipped from the test below.
- if (r > RESTORE_NONE) {
- if (rusi->best_rtype[r - 1] == RESTORE_NONE) continue;
- }
-
- const int64_t sse = rusi->sse[r];
- int64_t coeff_pcost = 0;
- switch (r) {
- case RESTORE_NONE: coeff_pcost = 0; break;
- case RESTORE_WIENER:
- coeff_pcost =
- count_wiener_bits(wiener_win, &rusi->wiener, &rsc->wiener);
- break;
- case RESTORE_SGRPROJ:
- coeff_pcost = count_sgrproj_bits(&rusi->sgrproj, &rsc->sgrproj);
- break;
- default: assert(0); break;
- }
- const int64_t coeff_bits = coeff_pcost << AV1_PROB_COST_SHIFT;
- const int64_t bits = x->switchable_restore_cost[r] + coeff_bits;
- double cost = RDCOST_DBL(x->rdmult, bits >> 4, sse);
- if (r == RESTORE_SGRPROJ && rusi->sgrproj.ep < 10)
- cost *= (1 + DUAL_SGR_PENALTY_MULT * rsc->sf->dual_sgr_penalty_level);
- if (r == 0 || cost < best_cost) {
- best_cost = cost;
- best_bits = bits;
- best_rtype = r;
- }
- }
-
- rusi->best_rtype[RESTORE_SWITCHABLE - 1] = best_rtype;
-
- rsc->sse += rusi->sse[best_rtype];
- rsc->bits += best_bits;
- if (best_rtype == RESTORE_WIENER) rsc->wiener = rusi->wiener;
- if (best_rtype == RESTORE_SGRPROJ) rsc->sgrproj = rusi->sgrproj;
-}
-
-static void copy_unit_info(RestorationType frame_rtype,
- const RestUnitSearchInfo *rusi,
- RestorationUnitInfo *rui) {
- assert(frame_rtype > 0);
- rui->restoration_type = rusi->best_rtype[frame_rtype - 1];
- if (rui->restoration_type == RESTORE_WIENER)
- rui->wiener_info = rusi->wiener;
- else
- rui->sgrproj_info = rusi->sgrproj;
-}
-
-static double search_rest_type(RestSearchCtxt *rsc, RestorationType rtype) {
- static const rest_unit_visitor_t funs[RESTORE_TYPES] = {
- search_norestore, search_wiener, search_sgrproj, search_switchable
- };
-
- reset_rsc(rsc);
- rsc_on_tile(LR_TILE_ROW, LR_TILE_COL, rsc);
- av1_foreach_rest_unit_in_plane(rsc->cm, rsc->plane, funs[rtype], rsc,
- &rsc->tile_rect, rsc->cm->rst_tmpbuf, NULL);
- return RDCOST_DBL(rsc->x->rdmult, rsc->bits >> 4, rsc->sse);
-}
-
-static int rest_tiles_in_plane(const AV1_COMMON *cm, int plane) {
- const RestorationInfo *rsi = &cm->rst_info[plane];
- return rsi->units_per_tile;
-}
-
-void av1_pick_filter_restoration(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi) {
- AV1_COMMON *const cm = &cpi->common;
- const int num_planes = av1_num_planes(cm);
- assert(!cm->all_lossless);
-
- int ntiles[2];
- for (int is_uv = 0; is_uv < 2; ++is_uv)
- ntiles[is_uv] = rest_tiles_in_plane(cm, is_uv);
-
- assert(ntiles[1] <= ntiles[0]);
- RestUnitSearchInfo *rusi =
- (RestUnitSearchInfo *)aom_memalign(16, sizeof(*rusi) * ntiles[0]);
-
- // If the restoration unit dimensions are not multiples of
- // rsi->restoration_unit_size then some elements of the rusi array may be
- // left uninitialised when we reach copy_unit_info(...). This is not a
- // problem, as these elements are ignored later, but in order to quiet
- // Valgrind's warnings we initialise the array below.
- memset(rusi, 0, sizeof(*rusi) * ntiles[0]);
- cpi->td.mb.rdmult = cpi->rd.RDMULT;
-
- RestSearchCtxt rsc;
- const int plane_start = AOM_PLANE_Y;
- const int plane_end = num_planes > 1 ? AOM_PLANE_V : AOM_PLANE_Y;
- for (int plane = plane_start; plane <= plane_end; ++plane) {
- init_rsc(src, &cpi->common, &cpi->td.mb, &cpi->sf, plane, rusi,
- &cpi->trial_frame_rst, &rsc);
-
- const int plane_ntiles = ntiles[plane > 0];
- const RestorationType num_rtypes =
- (plane_ntiles > 1) ? RESTORE_TYPES : RESTORE_SWITCHABLE_TYPES;
-
- double best_cost = 0;
- RestorationType best_rtype = RESTORE_NONE;
-
- const int highbd = rsc.cm->seq_params.use_highbitdepth;
- extend_frame(rsc.dgd_buffer, rsc.plane_width, rsc.plane_height,
- rsc.dgd_stride, RESTORATION_BORDER, RESTORATION_BORDER,
- highbd);
-
- for (RestorationType r = 0; r < num_rtypes; ++r) {
- if ((force_restore_type != RESTORE_TYPES) && (r != RESTORE_NONE) &&
- (r != force_restore_type))
- continue;
-
- double cost = search_rest_type(&rsc, r);
-
- if (r == 0 || cost < best_cost) {
- best_cost = cost;
- best_rtype = r;
- }
- }
-
- cm->rst_info[plane].frame_restoration_type = best_rtype;
- if (force_restore_type != RESTORE_TYPES)
- assert(best_rtype == force_restore_type || best_rtype == RESTORE_NONE);
-
- if (best_rtype != RESTORE_NONE) {
- for (int u = 0; u < plane_ntiles; ++u) {
- copy_unit_info(best_rtype, &rusi[u], &cm->rst_info[plane].unit_info[u]);
- }
- }
- }
-
- aom_free(rusi);
-}
diff --git a/third_party/aom/av1/encoder/pickrst.h b/third_party/aom/av1/encoder/pickrst.h
deleted file mode 100644
index 3fec0c34b..000000000
--- a/third_party/aom/av1/encoder/pickrst.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#ifndef AOM_AV1_ENCODER_PICKRST_H_
-#define AOM_AV1_ENCODER_PICKRST_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "av1/encoder/encoder.h"
-#include "aom_ports/system_state.h"
-
-struct yv12_buffer_config;
-struct AV1_COMP;
-
-static const uint8_t g_shuffle_stats_data[16] = {
- 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8,
-};
-
-static INLINE double find_average(const uint8_t *src, int h_start, int h_end,
- int v_start, int v_end, int stride) {
- uint64_t sum = 0;
- double avg = 0;
- int i, j;
- aom_clear_system_state();
- for (i = v_start; i < v_end; i++)
- for (j = h_start; j < h_end; j++) sum += src[i * stride + j];
- avg = (double)sum / ((v_end - v_start) * (h_end - h_start));
- return avg;
-}
-
-void av1_pick_filter_restoration(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_ENCODER_PICKRST_H_
diff --git a/third_party/aom/av1/encoder/pustats.h b/third_party/aom/av1/encoder/pustats.h
deleted file mode 100644
index 40dd46768..000000000
--- a/third_party/aom/av1/encoder/pustats.h
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_PUSTATS_H_
-#define AOM_AV1_ENCODER_PUSTATS_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "av1/encoder/ml.h"
-
-#define NUM_FEATURES_PUSTATS 8
-#define NUM_HIDDEN_LAYERS 2
-#define HIDDEN_LAYERS_0_NODES 12
-#define HIDDEN_LAYERS_1_NODES 10
-#define LOGITS_NODES 1
-
-static const float
- av1_pustats_rate_hiddenlayer_0_kernel[NUM_FEATURES_PUSTATS *
- HIDDEN_LAYERS_0_NODES] = {
- -0.1758f, -0.0499f, -10.0069f, -2.2838f, -0.3359f, 0.3459f, -0.3285f,
- -0.0515f, -0.5417f, 0.2357f, -0.0575f, -69.0782f, 0.5348f, 1.4068f,
- 0.2213f, -1.0490f, -0.0636f, 0.1654f, 1.1002f, 33.4924f, 0.4358f,
- 1.2499f, 0.1143f, 0.0592f, -1.6335f, -0.0092f, 1.2207f, -28.4543f,
- -0.4973f, 0.4368f, 0.2341f, -0.1623f, -3.8986f, 0.1311f, -1.8789f,
- -3.9079f, -0.8158f, -0.8420f, 1.4295f, -2.3629f, -1.4825f, 0.6498f,
- -5.3669f, 6.4434f, 1.8393f, -35.0678f, 3.7459f, -2.8504f, 2.0502f,
- -0.1812f, -3.9011f, -1.0155f, 1.8375f, -1.4517f, 1.3917f, 3.8664f,
- 0.8345f, -0.3472f, 5.7740f, -1.1196f, -0.3264f, -1.2481f, -0.9284f,
- -4.9657f, 2.2831f, 0.7337f, 2.3176f, 0.6416f, 0.8804f, 1.9988f,
- -1.3426f, 1.2728f, 1.2249f, -0.1551f, 5.6045f, 0.2046f, -2.1464f,
- -2.4922f, -0.5334f, 12.1055f, 7.2467f, -0.0070f, 0.0234f, 0.0021f,
- 0.0215f, -0.0098f, -0.0682f, -6.1494f, -0.3176f, -1.6069f, -0.2119f,
- -1.0533f, -0.3566f, 0.5294f, -0.4335f, 0.1626f,
- };
-
-static const float av1_pustats_rate_hiddenlayer_0_bias[HIDDEN_LAYERS_0_NODES] =
- {
- 10.5266f, 5.3268f, -1.0678f, 7.7411f, 8.7164f, -0.3235f,
- 7.3028f, 9.0874f, -6.4594f, -1.0102f, -1.1146f, 10.8419f,
- };
-
-static const float
- av1_pustats_rate_hiddenlayer_1_kernel[HIDDEN_LAYERS_0_NODES *
- HIDDEN_LAYERS_1_NODES] = {
- 10.5932f, 2.5192f, -0.0015f, 5.9479f, 5.2426f, -0.4091f, 5.3220f,
- 6.0469f, 0.7200f, 3.3241f, 5.5006f, 12.8290f, -1.6396f, 0.5743f,
- -0.8370f, 1.9956f, -4.9270f, -1.5295f, 2.1350f, -9.4415f, -0.7094f,
- 5.1822f, 19.7287f, -3.0444f, -0.3320f, 0.0031f, -0.2709f, -0.5249f,
- 0.3281f, -0.2240f, 0.2225f, -0.2386f, -0.4370f, -0.2438f, -0.4928f,
- -0.2842f, -2.1772f, 9.2570f, -17.6655f, 3.5448f, -2.8394f, -1.0167f,
- -0.5115f, -1.9260f, -0.2111f, -0.7528f, -1.2387f, -0.0401f, 5.0716f,
- -3.3763f, -0.2898f, -0.4956f, -7.9993f, 0.1526f, -0.0242f, 0.7354f,
- 6.0432f, 4.8043f, 7.4790f, -0.6295f, 1.7565f, 3.7197f, -2.3963f,
- 6.8945f, 2.9717f, -3.1623f, 3.4241f, 4.4676f, -1.8154f, -2.9401f,
- -8.5657f, -3.0240f, -1.4661f, 8.1145f, -12.7858f, 3.3624f, -1.0819f,
- -4.2856f, 1.1801f, -0.5587f, -1.6062f, -1.1813f, -3.5882f, -0.2490f,
- -24.9566f, -0.4140f, -0.1113f, 3.5537f, 4.4112f, 0.1367f, -1.5876f,
- 1.6605f, 1.3903f, -0.0253f, -2.1419f, -2.2197f, -0.7659f, -0.4249f,
- -0.0424f, 0.1486f, 0.4643f, -0.9068f, -0.3619f, -0.7624f, -0.9132f,
- -0.4947f, -0.3527f, -0.5445f, -0.4768f, -1.7761f, -1.0686f, 0.5462f,
- 1.3371f, 4.3116f, 0.0777f, -2.7216f, -1.8908f, 3.4989f, 7.7269f,
- -2.7566f,
- };
-
-static const float av1_pustats_rate_hiddenlayer_1_bias[HIDDEN_LAYERS_1_NODES] =
- {
- 13.2435f, -8.5477f, -0.0998f, -1.5131f, -12.0187f,
- 6.1715f, 0.5094f, 7.6433f, -0.3992f, -1.3555f,
- };
-
-static const float
- av1_pustats_rate_logits_kernel[HIDDEN_LAYERS_1_NODES * LOGITS_NODES] = {
- 4.3078f, -17.3497f, 0.0195f, 34.6032f, -5.0127f,
- 5.3079f, 10.0077f, -13.129f, 0.0087f, -8.4009f,
- };
-
-static const float av1_pustats_rate_logits_bias[LOGITS_NODES] = {
- 4.5103f,
-};
-
-static const NN_CONFIG av1_pustats_rate_nnconfig = {
- NUM_FEATURES_PUSTATS, // num_inputs
- LOGITS_NODES, // num_outputs
- NUM_HIDDEN_LAYERS, // num_hidden_layers
- { HIDDEN_LAYERS_0_NODES, HIDDEN_LAYERS_1_NODES }, // num_hidden_nodes
- {
- av1_pustats_rate_hiddenlayer_0_kernel,
- av1_pustats_rate_hiddenlayer_1_kernel,
- av1_pustats_rate_logits_kernel,
- },
- {
- av1_pustats_rate_hiddenlayer_0_bias,
- av1_pustats_rate_hiddenlayer_1_bias,
- av1_pustats_rate_logits_bias,
- },
-};
-
-static const float
- av1_pustats_dist_hiddenlayer_0_kernel[NUM_FEATURES_PUSTATS *
- HIDDEN_LAYERS_0_NODES] = {
- -0.2560f, 0.1105f, -0.8434f, -0.0132f, -8.9371f, -1.1176f, -0.3655f,
- 0.4885f, 1.7518f, 0.4985f, 0.5582f, -0.3739f, 0.9403f, 0.3874f,
- 0.3265f, 1.7383f, 3.1747f, 0.0285f, 3.3942f, -0.0123f, 0.5057f,
- 0.1584f, 0.2697f, 4.6151f, 3.6251f, -0.0121f, -1.0047f, -0.0037f,
- 0.0127f, 0.1935f, -0.5277f, -2.7144f, 0.0729f, -0.1457f, -0.0816f,
- -0.5462f, 0.4738f, 0.3599f, -0.0564f, 0.0910f, 0.0126f, -0.0310f,
- -2.1311f, -0.4666f, -0.0074f, -0.0765f, 0.0287f, -0.2662f, -0.0999f,
- -0.2983f, -0.4899f, -0.2314f, 0.2873f, -0.3614f, 0.1783f, -0.1210f,
- 0.3569f, 0.5436f, -8.0536f, -0.0044f, -1.5255f, -0.8247f, -0.4556f,
- 1.9045f, 0.5463f, 0.1102f, -0.9293f, -0.0185f, -0.8302f, -0.4378f,
- -0.3531f, -1.3095f, 0.6099f, 0.7977f, 4.1950f, -0.0067f, -0.2762f,
- -0.1574f, -0.2149f, 0.6104f, -1.7053f, 0.1904f, 4.2402f, -0.2671f,
- 0.8940f, 0.6820f, 0.2241f, -0.9459f, 1.4571f, 0.5255f, 2.3352f,
- -0.0806f, 0.5231f, 0.3928f, 0.4146f, 2.0956f,
- };
-
-static const float av1_pustats_dist_hiddenlayer_0_bias[HIDDEN_LAYERS_0_NODES] =
- {
- 1.1597f, 0.0836f, -0.7471f, -0.2439f, -0.0438f, 2.4626f,
- 0.f, 1.1485f, 2.7085f, -4.7897f, 1.4093f, -1.657f,
- };
-
-static const float
- av1_pustats_dist_hiddenlayer_1_kernel[HIDDEN_LAYERS_0_NODES *
- HIDDEN_LAYERS_1_NODES] = {
- -0.5203f, -1.3468f, 0.3865f, -0.6859f, 0.0058f, 4.0682f, 0.4807f,
- -0.1380f, 0.6050f, 0.8958f, 0.7748f, -0.1311f, 1.7317f, 1.1265f,
- 0.0827f, 0.1407f, -0.3605f, 0.5429f, 0.1880f, -0.1439f, 0.2837f,
- 1.6477f, 0.0832f, 0.0593f, -1.8464f, -0.7241f, -1.0672f, -0.3546f,
- -0.3842f, -2.3637f, 0.2514f, 0.8263f, -0.1872f, 0.5774f, -0.3610f,
- -0.0205f, 1.3977f, -0.1083f, 0.6923f, 1.3039f, -0.2870f, 1.0622f,
- -0.0566f, 0.2697f, -0.5429f, -0.6193f, 1.7559f, 0.3246f, 1.9159f,
- 0.3744f, 0.0686f, 1.0191f, -0.4212f, 1.9591f, -0.0691f, -0.1085f,
- -1.2034f, 0.0606f, 1.0116f, 0.5565f, -0.1874f, -0.7898f, 0.4796f,
- 0.2290f, 0.4334f, -0.5817f, -0.2949f, 0.1367f, -0.2932f, -1.1265f,
- 0.0133f, -0.5309f, -3.3191f, 0.0939f, 0.3895f, -2.5812f, -0.0066f,
- -3.0063f, -0.2982f, 0.7309f, -0.2422f, -0.2770f, -0.7152f, 0.1700f,
- 1.9630f, 0.1988f, 0.4194f, 0.8762f, 0.3402f, 0.1051f, -0.1598f,
- 0.2405f, 0.0392f, 1.1256f, 1.5245f, 0.0950f, 0.2160f, -0.5023f,
- 0.2584f, 0.2074f, 0.2218f, 0.3966f, -0.0921f, -0.2435f, -0.4560f,
- -1.1923f, -0.3716f, -0.3286f, -1.3225f, 0.1896f, -0.3342f, -0.7888f,
- -0.4488f, -1.7168f, 0.3341f, 0.1146f, 0.5226f, 0.2610f, -0.4574f,
- -0.4164f,
- };
-
-static const float av1_pustats_dist_hiddenlayer_1_bias[HIDDEN_LAYERS_1_NODES] =
- {
- -2.3014f, -2.4292f, 1.3317f, -3.2361f, -1.918f,
- 2.7149f, -2.5649f, 2.7765f, 2.9617f, 2.7684f,
- };
-
-static const float
- av1_pustats_dist_logits_kernel[HIDDEN_LAYERS_1_NODES * LOGITS_NODES] = {
- -0.6868f, -0.6715f, 0.449f, -1.293f, 0.6214f,
- 0.9894f, -0.4342f, 0.7002f, 1.4363f, 0.6951f,
- };
-
-static const float av1_pustats_dist_logits_bias[LOGITS_NODES] = {
- 2.3371f,
-};
-
-static const NN_CONFIG av1_pustats_dist_nnconfig = {
- NUM_FEATURES_PUSTATS, // num_inputs
- LOGITS_NODES, // num_outputs
- NUM_HIDDEN_LAYERS, // num_hidden_layers
- { HIDDEN_LAYERS_0_NODES, HIDDEN_LAYERS_1_NODES }, // num_hidden_nodes
- {
- av1_pustats_dist_hiddenlayer_0_kernel,
- av1_pustats_dist_hiddenlayer_1_kernel,
- av1_pustats_dist_logits_kernel,
- },
- {
- av1_pustats_dist_hiddenlayer_0_bias,
- av1_pustats_dist_hiddenlayer_1_bias,
- av1_pustats_dist_logits_bias,
- },
-};
-
-#undef NUM_HIDDEN_LAYERS
-#undef HIDDEN_LAYERS_0_NODES
-#undef HIDDEN_LAYERS_1_NODES
-#undef LOGITS_NODES
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_ENCODER_PUSTATS_H_
diff --git a/third_party/aom/av1/encoder/random.h b/third_party/aom/av1/encoder/random.h
deleted file mode 100644
index 0bca39102..000000000
--- a/third_party/aom/av1/encoder/random.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_RANDOM_H_
-#define AOM_AV1_ENCODER_RANDOM_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// Generate a random number in the range [0, 32768).
-static INLINE unsigned int lcg_rand16(unsigned int *state) {
- *state = (unsigned int)(*state * 1103515245ULL + 12345);
- return *state / 65536 % 32768;
-}
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_ENCODER_RANDOM_H_
diff --git a/third_party/aom/av1/encoder/ransac.c b/third_party/aom/av1/encoder/ransac.c
deleted file mode 100644
index 781f528eb..000000000
--- a/third_party/aom/av1/encoder/ransac.c
+++ /dev/null
@@ -1,603 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#include <memory.h>
-#include <math.h>
-#include <time.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <assert.h>
-
-#include "av1/encoder/ransac.h"
-#include "av1/encoder/mathutils.h"
-#include "av1/encoder/random.h"
-
-#define MAX_MINPTS 4
-#define MAX_DEGENERATE_ITER 10
-#define MINPTS_MULTIPLIER 5
-
-#define INLIER_THRESHOLD 1.0
-#define MIN_TRIALS 20
-
-////////////////////////////////////////////////////////////////////////////////
-// ransac
-typedef int (*IsDegenerateFunc)(double *p);
-typedef void (*NormalizeFunc)(double *p, int np, double *T);
-typedef void (*DenormalizeFunc)(double *params, double *T1, double *T2);
-typedef int (*FindTransformationFunc)(int points, double *points1,
- double *points2, double *params);
-typedef void (*ProjectPointsDoubleFunc)(double *mat, double *points,
- double *proj, const int n,
- const int stride_points,
- const int stride_proj);
-
-static void project_points_double_translation(double *mat, double *points,
- double *proj, const int n,
- const int stride_points,
- const int stride_proj) {
- int i;
- for (i = 0; i < n; ++i) {
- const double x = *(points++), y = *(points++);
- *(proj++) = x + mat[0];
- *(proj++) = y + mat[1];
- points += stride_points - 2;
- proj += stride_proj - 2;
- }
-}
-
-static void project_points_double_rotzoom(double *mat, double *points,
- double *proj, const int n,
- const int stride_points,
- const int stride_proj) {
- int i;
- for (i = 0; i < n; ++i) {
- const double x = *(points++), y = *(points++);
- *(proj++) = mat[2] * x + mat[3] * y + mat[0];
- *(proj++) = -mat[3] * x + mat[2] * y + mat[1];
- points += stride_points - 2;
- proj += stride_proj - 2;
- }
-}
-
-static void project_points_double_affine(double *mat, double *points,
- double *proj, const int n,
- const int stride_points,
- const int stride_proj) {
- int i;
- for (i = 0; i < n; ++i) {
- const double x = *(points++), y = *(points++);
- *(proj++) = mat[2] * x + mat[3] * y + mat[0];
- *(proj++) = mat[4] * x + mat[5] * y + mat[1];
- points += stride_points - 2;
- proj += stride_proj - 2;
- }
-}
-
-static void normalize_homography(double *pts, int n, double *T) {
- double *p = pts;
- double mean[2] = { 0, 0 };
- double msqe = 0;
- double scale;
- int i;
-
- assert(n > 0);
- for (i = 0; i < n; ++i, p += 2) {
- mean[0] += p[0];
- mean[1] += p[1];
- }
- mean[0] /= n;
- mean[1] /= n;
- for (p = pts, i = 0; i < n; ++i, p += 2) {
- p[0] -= mean[0];
- p[1] -= mean[1];
- msqe += sqrt(p[0] * p[0] + p[1] * p[1]);
- }
- msqe /= n;
- scale = (msqe == 0 ? 1.0 : sqrt(2) / msqe);
- T[0] = scale;
- T[1] = 0;
- T[2] = -scale * mean[0];
- T[3] = 0;
- T[4] = scale;
- T[5] = -scale * mean[1];
- T[6] = 0;
- T[7] = 0;
- T[8] = 1;
- for (p = pts, i = 0; i < n; ++i, p += 2) {
- p[0] *= scale;
- p[1] *= scale;
- }
-}
-
-static void invnormalize_mat(double *T, double *iT) {
- double is = 1.0 / T[0];
- double m0 = -T[2] * is;
- double m1 = -T[5] * is;
- iT[0] = is;
- iT[1] = 0;
- iT[2] = m0;
- iT[3] = 0;
- iT[4] = is;
- iT[5] = m1;
- iT[6] = 0;
- iT[7] = 0;
- iT[8] = 1;
-}
-
-static void denormalize_homography(double *params, double *T1, double *T2) {
- double iT2[9];
- double params2[9];
- invnormalize_mat(T2, iT2);
- multiply_mat(params, T1, params2, 3, 3, 3);
- multiply_mat(iT2, params2, params, 3, 3, 3);
-}
-
-static void denormalize_affine_reorder(double *params, double *T1, double *T2) {
- double params_denorm[MAX_PARAMDIM];
- params_denorm[0] = params[0];
- params_denorm[1] = params[1];
- params_denorm[2] = params[4];
- params_denorm[3] = params[2];
- params_denorm[4] = params[3];
- params_denorm[5] = params[5];
- params_denorm[6] = params_denorm[7] = 0;
- params_denorm[8] = 1;
- denormalize_homography(params_denorm, T1, T2);
- params[0] = params_denorm[2];
- params[1] = params_denorm[5];
- params[2] = params_denorm[0];
- params[3] = params_denorm[1];
- params[4] = params_denorm[3];
- params[5] = params_denorm[4];
- params[6] = params[7] = 0;
-}
-
-static void denormalize_rotzoom_reorder(double *params, double *T1,
- double *T2) {
- double params_denorm[MAX_PARAMDIM];
- params_denorm[0] = params[0];
- params_denorm[1] = params[1];
- params_denorm[2] = params[2];
- params_denorm[3] = -params[1];
- params_denorm[4] = params[0];
- params_denorm[5] = params[3];
- params_denorm[6] = params_denorm[7] = 0;
- params_denorm[8] = 1;
- denormalize_homography(params_denorm, T1, T2);
- params[0] = params_denorm[2];
- params[1] = params_denorm[5];
- params[2] = params_denorm[0];
- params[3] = params_denorm[1];
- params[4] = -params[3];
- params[5] = params[2];
- params[6] = params[7] = 0;
-}
-
-static void denormalize_translation_reorder(double *params, double *T1,
- double *T2) {
- double params_denorm[MAX_PARAMDIM];
- params_denorm[0] = 1;
- params_denorm[1] = 0;
- params_denorm[2] = params[0];
- params_denorm[3] = 0;
- params_denorm[4] = 1;
- params_denorm[5] = params[1];
- params_denorm[6] = params_denorm[7] = 0;
- params_denorm[8] = 1;
- denormalize_homography(params_denorm, T1, T2);
- params[0] = params_denorm[2];
- params[1] = params_denorm[5];
- params[2] = params[5] = 1;
- params[3] = params[4] = 0;
- params[6] = params[7] = 0;
-}
-
-static int find_translation(int np, double *pts1, double *pts2, double *mat) {
- int i;
- double sx, sy, dx, dy;
- double sumx, sumy;
-
- double T1[9], T2[9];
- normalize_homography(pts1, np, T1);
- normalize_homography(pts2, np, T2);
-
- sumx = 0;
- sumy = 0;
- for (i = 0; i < np; ++i) {
- dx = *(pts2++);
- dy = *(pts2++);
- sx = *(pts1++);
- sy = *(pts1++);
-
- sumx += dx - sx;
- sumy += dy - sy;
- }
- mat[0] = sumx / np;
- mat[1] = sumy / np;
- denormalize_translation_reorder(mat, T1, T2);
- return 0;
-}
-
-static int find_rotzoom(int np, double *pts1, double *pts2, double *mat) {
- const int np2 = np * 2;
- double *a = (double *)aom_malloc(sizeof(*a) * (np2 * 5 + 20));
- double *b = a + np2 * 4;
- double *temp = b + np2;
- int i;
- double sx, sy, dx, dy;
-
- double T1[9], T2[9];
- normalize_homography(pts1, np, T1);
- normalize_homography(pts2, np, T2);
-
- for (i = 0; i < np; ++i) {
- dx = *(pts2++);
- dy = *(pts2++);
- sx = *(pts1++);
- sy = *(pts1++);
-
- a[i * 2 * 4 + 0] = sx;
- a[i * 2 * 4 + 1] = sy;
- a[i * 2 * 4 + 2] = 1;
- a[i * 2 * 4 + 3] = 0;
- a[(i * 2 + 1) * 4 + 0] = sy;
- a[(i * 2 + 1) * 4 + 1] = -sx;
- a[(i * 2 + 1) * 4 + 2] = 0;
- a[(i * 2 + 1) * 4 + 3] = 1;
-
- b[2 * i] = dx;
- b[2 * i + 1] = dy;
- }
- if (!least_squares(4, a, np2, 4, b, temp, mat)) {
- aom_free(a);
- return 1;
- }
- denormalize_rotzoom_reorder(mat, T1, T2);
- aom_free(a);
- return 0;
-}
-
-static int find_affine(int np, double *pts1, double *pts2, double *mat) {
- const int np2 = np * 2;
- double *a = (double *)aom_malloc(sizeof(*a) * (np2 * 7 + 42));
- double *b = a + np2 * 6;
- double *temp = b + np2;
- int i;
- double sx, sy, dx, dy;
-
- double T1[9], T2[9];
- normalize_homography(pts1, np, T1);
- normalize_homography(pts2, np, T2);
-
- for (i = 0; i < np; ++i) {
- dx = *(pts2++);
- dy = *(pts2++);
- sx = *(pts1++);
- sy = *(pts1++);
-
- a[i * 2 * 6 + 0] = sx;
- a[i * 2 * 6 + 1] = sy;
- a[i * 2 * 6 + 2] = 0;
- a[i * 2 * 6 + 3] = 0;
- a[i * 2 * 6 + 4] = 1;
- a[i * 2 * 6 + 5] = 0;
- a[(i * 2 + 1) * 6 + 0] = 0;
- a[(i * 2 + 1) * 6 + 1] = 0;
- a[(i * 2 + 1) * 6 + 2] = sx;
- a[(i * 2 + 1) * 6 + 3] = sy;
- a[(i * 2 + 1) * 6 + 4] = 0;
- a[(i * 2 + 1) * 6 + 5] = 1;
-
- b[2 * i] = dx;
- b[2 * i + 1] = dy;
- }
- if (!least_squares(6, a, np2, 6, b, temp, mat)) {
- aom_free(a);
- return 1;
- }
- denormalize_affine_reorder(mat, T1, T2);
- aom_free(a);
- return 0;
-}
-
-static int get_rand_indices(int npoints, int minpts, int *indices,
- unsigned int *seed) {
- int i, j;
- int ptr = lcg_rand16(seed) % npoints;
- if (minpts > npoints) return 0;
- indices[0] = ptr;
- ptr = (ptr == npoints - 1 ? 0 : ptr + 1);
- i = 1;
- while (i < minpts) {
- int index = lcg_rand16(seed) % npoints;
- while (index) {
- ptr = (ptr == npoints - 1 ? 0 : ptr + 1);
- for (j = 0; j < i; ++j) {
- if (indices[j] == ptr) break;
- }
- if (j == i) index--;
- }
- indices[i++] = ptr;
- }
- return 1;
-}
-
-typedef struct {
- int num_inliers;
- double variance;
- int *inlier_indices;
-} RANSAC_MOTION;
-
-// Return -1 if 'a' is a better motion, 1 if 'b' is better, 0 otherwise.
-static int compare_motions(const void *arg_a, const void *arg_b) {
- const RANSAC_MOTION *motion_a = (RANSAC_MOTION *)arg_a;
- const RANSAC_MOTION *motion_b = (RANSAC_MOTION *)arg_b;
-
- if (motion_a->num_inliers > motion_b->num_inliers) return -1;
- if (motion_a->num_inliers < motion_b->num_inliers) return 1;
- if (motion_a->variance < motion_b->variance) return -1;
- if (motion_a->variance > motion_b->variance) return 1;
- return 0;
-}
-
-static int is_better_motion(const RANSAC_MOTION *motion_a,
- const RANSAC_MOTION *motion_b) {
- return compare_motions(motion_a, motion_b) < 0;
-}
-
-static void copy_points_at_indices(double *dest, const double *src,
- const int *indices, int num_points) {
- for (int i = 0; i < num_points; ++i) {
- const int index = indices[i];
- dest[i * 2] = src[index * 2];
- dest[i * 2 + 1] = src[index * 2 + 1];
- }
-}
-
-static const double kInfiniteVariance = 1e12;
-
-static void clear_motion(RANSAC_MOTION *motion, int num_points) {
- motion->num_inliers = 0;
- motion->variance = kInfiniteVariance;
- memset(motion->inlier_indices, 0,
- sizeof(*motion->inlier_indices * num_points));
-}
-
-static int ransac(const int *matched_points, int npoints,
- int *num_inliers_by_motion, double *params_by_motion,
- int num_desired_motions, const int minpts,
- IsDegenerateFunc is_degenerate,
- FindTransformationFunc find_transformation,
- ProjectPointsDoubleFunc projectpoints) {
- static const double PROBABILITY_REQUIRED = 0.9;
- static const double EPS = 1e-12;
-
- int N = 10000, trial_count = 0;
- int i = 0;
- int ret_val = 0;
-
- unsigned int seed = (unsigned int)npoints;
-
- int indices[MAX_MINPTS] = { 0 };
-
- double *points1, *points2;
- double *corners1, *corners2;
- double *image1_coord;
-
- // Store information for the num_desired_motions best transformations found
- // and the worst motion among them, as well as the motion currently under
- // consideration.
- RANSAC_MOTION *motions, *worst_kept_motion = NULL;
- RANSAC_MOTION current_motion;
-
- // Store the parameters and the indices of the inlier points for the motion
- // currently under consideration.
- double params_this_motion[MAX_PARAMDIM];
-
- double *cnp1, *cnp2;
-
- for (i = 0; i < num_desired_motions; ++i) {
- num_inliers_by_motion[i] = 0;
- }
- if (npoints < minpts * MINPTS_MULTIPLIER || npoints == 0) {
- return 1;
- }
-
- points1 = (double *)aom_malloc(sizeof(*points1) * npoints * 2);
- points2 = (double *)aom_malloc(sizeof(*points2) * npoints * 2);
- corners1 = (double *)aom_malloc(sizeof(*corners1) * npoints * 2);
- corners2 = (double *)aom_malloc(sizeof(*corners2) * npoints * 2);
- image1_coord = (double *)aom_malloc(sizeof(*image1_coord) * npoints * 2);
-
- motions =
- (RANSAC_MOTION *)aom_malloc(sizeof(RANSAC_MOTION) * num_desired_motions);
- for (i = 0; i < num_desired_motions; ++i) {
- motions[i].inlier_indices =
- (int *)aom_malloc(sizeof(*motions->inlier_indices) * npoints);
- clear_motion(motions + i, npoints);
- }
- current_motion.inlier_indices =
- (int *)aom_malloc(sizeof(*current_motion.inlier_indices) * npoints);
- clear_motion(&current_motion, npoints);
-
- worst_kept_motion = motions;
-
- if (!(points1 && points2 && corners1 && corners2 && image1_coord && motions &&
- current_motion.inlier_indices)) {
- ret_val = 1;
- goto finish_ransac;
- }
-
- cnp1 = corners1;
- cnp2 = corners2;
- for (i = 0; i < npoints; ++i) {
- *(cnp1++) = *(matched_points++);
- *(cnp1++) = *(matched_points++);
- *(cnp2++) = *(matched_points++);
- *(cnp2++) = *(matched_points++);
- }
-
- while (N > trial_count) {
- double sum_distance = 0.0;
- double sum_distance_squared = 0.0;
-
- clear_motion(&current_motion, npoints);
-
- int degenerate = 1;
- int num_degenerate_iter = 0;
-
- while (degenerate) {
- num_degenerate_iter++;
- if (!get_rand_indices(npoints, minpts, indices, &seed)) {
- ret_val = 1;
- goto finish_ransac;
- }
-
- copy_points_at_indices(points1, corners1, indices, minpts);
- copy_points_at_indices(points2, corners2, indices, minpts);
-
- degenerate = is_degenerate(points1);
- if (num_degenerate_iter > MAX_DEGENERATE_ITER) {
- ret_val = 1;
- goto finish_ransac;
- }
- }
-
- if (find_transformation(minpts, points1, points2, params_this_motion)) {
- trial_count++;
- continue;
- }
-
- projectpoints(params_this_motion, corners1, image1_coord, npoints, 2, 2);
-
- for (i = 0; i < npoints; ++i) {
- double dx = image1_coord[i * 2] - corners2[i * 2];
- double dy = image1_coord[i * 2 + 1] - corners2[i * 2 + 1];
- double distance = sqrt(dx * dx + dy * dy);
-
- if (distance < INLIER_THRESHOLD) {
- current_motion.inlier_indices[current_motion.num_inliers++] = i;
- sum_distance += distance;
- sum_distance_squared += distance * distance;
- }
- }
-
- if (current_motion.num_inliers >= worst_kept_motion->num_inliers &&
- current_motion.num_inliers > 1) {
- int temp;
- double fracinliers, pNoOutliers, mean_distance, dtemp;
- mean_distance = sum_distance / ((double)current_motion.num_inliers);
- current_motion.variance =
- sum_distance_squared / ((double)current_motion.num_inliers - 1.0) -
- mean_distance * mean_distance * ((double)current_motion.num_inliers) /
- ((double)current_motion.num_inliers - 1.0);
- if (is_better_motion(&current_motion, worst_kept_motion)) {
- // This motion is better than the worst currently kept motion. Remember
- // the inlier points and variance. The parameters for each kept motion
- // will be recomputed later using only the inliers.
- worst_kept_motion->num_inliers = current_motion.num_inliers;
- worst_kept_motion->variance = current_motion.variance;
- memcpy(worst_kept_motion->inlier_indices, current_motion.inlier_indices,
- sizeof(*current_motion.inlier_indices) * npoints);
-
- assert(npoints > 0);
- fracinliers = (double)current_motion.num_inliers / (double)npoints;
- pNoOutliers = 1 - pow(fracinliers, minpts);
- pNoOutliers = fmax(EPS, pNoOutliers);
- pNoOutliers = fmin(1 - EPS, pNoOutliers);
- dtemp = log(1.0 - PROBABILITY_REQUIRED) / log(pNoOutliers);
- temp = (dtemp > (double)INT32_MAX)
- ? INT32_MAX
- : dtemp < (double)INT32_MIN ? INT32_MIN : (int)dtemp;
-
- if (temp > 0 && temp < N) {
- N = AOMMAX(temp, MIN_TRIALS);
- }
-
- // Determine the new worst kept motion and its num_inliers and variance.
- for (i = 0; i < num_desired_motions; ++i) {
- if (is_better_motion(worst_kept_motion, &motions[i])) {
- worst_kept_motion = &motions[i];
- }
- }
- }
- }
- trial_count++;
- }
-
- // Sort the motions, best first.
- qsort(motions, num_desired_motions, sizeof(RANSAC_MOTION), compare_motions);
-
- // Recompute the motions using only the inliers.
- for (i = 0; i < num_desired_motions; ++i) {
- if (motions[i].num_inliers >= minpts) {
- copy_points_at_indices(points1, corners1, motions[i].inlier_indices,
- motions[i].num_inliers);
- copy_points_at_indices(points2, corners2, motions[i].inlier_indices,
- motions[i].num_inliers);
-
- find_transformation(motions[i].num_inliers, points1, points2,
- params_by_motion + (MAX_PARAMDIM - 1) * i);
- }
- num_inliers_by_motion[i] = motions[i].num_inliers;
- }
-
-finish_ransac:
- aom_free(points1);
- aom_free(points2);
- aom_free(corners1);
- aom_free(corners2);
- aom_free(image1_coord);
- aom_free(current_motion.inlier_indices);
- for (i = 0; i < num_desired_motions; ++i) {
- aom_free(motions[i].inlier_indices);
- }
- aom_free(motions);
-
- return ret_val;
-}
-
-static int is_collinear3(double *p1, double *p2, double *p3) {
- static const double collinear_eps = 1e-3;
- const double v =
- (p2[0] - p1[0]) * (p3[1] - p1[1]) - (p2[1] - p1[1]) * (p3[0] - p1[0]);
- return fabs(v) < collinear_eps;
-}
-
-static int is_degenerate_translation(double *p) {
- return (p[0] - p[2]) * (p[0] - p[2]) + (p[1] - p[3]) * (p[1] - p[3]) <= 2;
-}
-
-static int is_degenerate_affine(double *p) {
- return is_collinear3(p, p + 2, p + 4);
-}
-
-int ransac_translation(int *matched_points, int npoints,
- int *num_inliers_by_motion, double *params_by_motion,
- int num_desired_motions) {
- return ransac(matched_points, npoints, num_inliers_by_motion,
- params_by_motion, num_desired_motions, 3,
- is_degenerate_translation, find_translation,
- project_points_double_translation);
-}
-
-int ransac_rotzoom(int *matched_points, int npoints, int *num_inliers_by_motion,
- double *params_by_motion, int num_desired_motions) {
- return ransac(matched_points, npoints, num_inliers_by_motion,
- params_by_motion, num_desired_motions, 3, is_degenerate_affine,
- find_rotzoom, project_points_double_rotzoom);
-}
-
-int ransac_affine(int *matched_points, int npoints, int *num_inliers_by_motion,
- double *params_by_motion, int num_desired_motions) {
- return ransac(matched_points, npoints, num_inliers_by_motion,
- params_by_motion, num_desired_motions, 3, is_degenerate_affine,
- find_affine, project_points_double_affine);
-}
diff --git a/third_party/aom/av1/encoder/ransac.h b/third_party/aom/av1/encoder/ransac.h
deleted file mode 100644
index c429f2ce5..000000000
--- a/third_party/aom/av1/encoder/ransac.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_RANSAC_H_
-#define AOM_AV1_ENCODER_RANSAC_H_
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include <memory.h>
-
-#include "av1/common/warped_motion.h"
-
-typedef int (*RansacFunc)(int *matched_points, int npoints,
- int *num_inliers_by_motion, double *params_by_motion,
- int num_motions);
-
-/* Each of these functions fits a motion model from a set of
- corresponding points in 2 frames using RANSAC. */
-int ransac_affine(int *matched_points, int npoints, int *num_inliers_by_motion,
- double *params_by_motion, int num_motions);
-int ransac_rotzoom(int *matched_points, int npoints, int *num_inliers_by_motion,
- double *params_by_motion, int num_motions);
-int ransac_translation(int *matched_points, int npoints,
- int *num_inliers_by_motion, double *params_by_motion,
- int num_motions);
-#endif // AOM_AV1_ENCODER_RANSAC_H_
diff --git a/third_party/aom/av1/encoder/rate_distortion_model_params.h b/third_party/aom/av1/encoder/rate_distortion_model_params.h
deleted file mode 100644
index 7cd0962c5..000000000
--- a/third_party/aom/av1/encoder/rate_distortion_model_params.h
+++ /dev/null
@@ -1,591 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_RATE_DISTORTION_MODEL_PARAMS_H_
-#define AOM_AV1_ENCODER_RATE_DISTORTION_MODEL_PARAMS_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "av1/encoder/ml.h"
-
-// 22 float features +
-// 2 categorical features with 4 possible values, converted to one-hot vectors.
-// So, total 22 + 2 * 4 = 30 features.
-#define NUM_FEATURES 30
-#define NUM_HIDDEN_LAYERS 1
-#define NUM_HIDDEN_NODES 96
-#define NUM_OUTPUTS 1
-
-//------------------------------------------------------------------------------
-// RDCost model
-
-static const float
- av1_rdcost_model_nn_weights_layer0[NUM_FEATURES * NUM_HIDDEN_NODES] = {
- -0.0699f, 0.2790f, 0.1915f, 0.2669f, 0.4637f, 0.4095f,
- 0.2129f, 0.0634f, 0.2306f, -0.2232f, -0.5711f, -0.6493f,
- -0.7406f, -0.8440f, 0.4105f, 0.1392f, 0.5218f, -0.1618f,
- -0.1719f, 0.3409f, 0.1111f, -0.3609f, -0.2929f, 0.3869f,
- -0.5373f, 0.0700f, 0.2572f, 0.2483f, -0.0314f, 0.5228f,
- 0.0169f, -0.1357f, 0.0419f, -0.1722f, 0.1303f, 0.1198f,
- -0.0013f, 0.1309f, 0.0293f, -0.1941f, 0.0668f, -0.0643f,
- -0.0381f, 0.1249f, -0.0731f, -0.1649f, 0.0964f, 0.0270f,
- 0.1354f, 0.0538f, -0.2064f, -0.2067f, -0.0569f, 0.0449f,
- 0.1680f, -0.0732f, -0.0785f, 0.1884f, -0.2137f, -0.0189f,
- 0.2976f, 0.2818f, -0.0222f, 0.2658f, 0.0488f, 0.2778f,
- -0.1110f, 0.2069f, -0.0072f, -0.0095f, -0.1105f, -0.1365f,
- -0.4245f, -0.4751f, -0.0736f, 0.2333f, 0.0653f, -0.0249f,
- 0.0055f, -0.0838f, -0.0489f, -0.2597f, 0.2621f, -0.0251f,
- -0.0545f, 0.0816f, -0.0816f, 0.3396f, -0.1047f, 0.3678f,
- 0.1487f, -0.0270f, 0.2574f, 0.1018f, 0.2560f, -0.0598f,
- -0.0446f, -0.1792f, 0.5336f, -0.1590f, -0.9820f, -0.6514f,
- -0.6304f, -0.8359f, -0.0699f, 0.0295f, -0.0057f, -0.3088f,
- -0.1466f, 0.2220f, -0.1980f, -0.3400f, -0.1228f, 0.2667f,
- -0.4816f, 0.0155f, -0.0194f, 0.2051f, 0.0513f, 0.1575f,
- -121.4240f, -126.6840f, -124.1106f, -127.6184f, -85.0333f, -26.6396f,
- 2.7020f, 102.0452f, -85.5128f, 0.0076f, 122.2206f, 107.5265f,
- 108.3773f, 93.4847f, 20.3705f, -89.6993f, -176.9070f, -41.7543f,
- -123.0293f, -91.6437f, -205.7099f, -62.5346f, -83.2987f, 21.3830f,
- 56.6341f, -120.8647f, -127.7562f, -121.6688f, -127.4225f, -74.8045f,
- -15.9247f, -14.6468f, -14.7788f, -15.4498f, -18.5514f, -11.1579f,
- -5.8164f, -3.4318f, 0.8100f, 0.0642f, 203.5111f, 189.6872f,
- 190.4776f, 176.4784f, -4.9427f, -12.5324f, -7.6861f, 21.9182f,
- -6.7864f, -7.1906f, -8.1292f, 21.4780f, -7.8016f, -5.2653f,
- 61.8526f, -15.5105f, -14.6900f, -14.1459f, -15.4350f, -19.1379f,
- -0.7876f, -1.8558f, -4.6035f, -6.8405f, -0.2904f, 2.3202f,
- 1.8127f, -2.9397f, -0.8187f, -0.6098f, 22.6173f, 10.3668f,
- 12.9363f, 2.4541f, 6.6700f, 0.3804f, -3.3117f, 8.5464f,
- -25.8348f, 1.8698f, -9.5753f, 8.5558f, -16.3576f, 7.2217f,
- 35.3115f, -1.1447f, -2.6530f, -4.7027f, -5.7024f, -0.9513f,
- 0.8393f, 0.7085f, 0.7879f, 0.3728f, 3.0574f, 1.1360f,
- 26.0531f, 4.1084f, -1.7340f, 0.1683f, -450.7927f, -444.5818f,
- -442.5239f, -438.1168f, 2.4924f, -0.0147f, -0.0797f, -47.5322f,
- -1.7638f, -0.8608f, -0.6500f, -44.4326f, -0.9027f, 2.5560f,
- -267.6517f, 0.2642f, 0.9457f, 0.7944f, 0.3609f, 3.2742f,
- -74.3400f, -81.6894f, -76.2162f, -69.2979f, -90.2476f, -39.7389f,
- 2.2545f, 36.5095f, -60.1129f, -1.0383f, 87.0348f, 83.9940f,
- 83.7199f, 80.8609f, 14.9075f, -78.7405f, -74.3549f, -4.2382f,
- -23.9739f, -91.8469f, -67.2654f, -21.5293f, -9.9857f, 11.8391f,
- 35.8223f, -74.2551f, -81.0729f, -73.8347f, -70.3798f, -86.8052f,
- 0.1701f, -0.1136f, 0.0060f, -0.0496f, -0.1727f, 0.0195f,
- -0.1040f, 0.1027f, 0.0467f, -0.2538f, -0.1322f, 0.0860f,
- 0.0093f, -0.2801f, -0.0958f, 0.0497f, -0.0582f, -0.0311f,
- 0.1840f, 0.0752f, 0.0282f, 0.0297f, 0.0607f, 0.0650f,
- 0.0893f, 0.1297f, 0.0373f, 0.0040f, -0.0973f, 0.0248f,
- -0.1419f, 0.0322f, -0.0712f, 0.0860f, -0.0426f, -0.1989f,
- 0.1393f, -0.1183f, 0.0735f, -0.1895f, 0.1447f, -0.0056f,
- -0.1833f, 0.0884f, 0.0949f, 0.0476f, 0.0551f, 0.2125f,
- -0.1537f, -0.0141f, -0.2182f, 0.1567f, 0.0457f, -0.1485f,
- -0.1177f, 0.0391f, 0.1982f, -0.1288f, 0.1165f, -0.2019f,
- 0.4550f, 0.5179f, 0.4311f, 0.1861f, 0.6199f, 0.4542f,
- 0.2034f, 0.1128f, 1.3489f, -0.2525f, -2.1139f, -2.2444f,
- -2.3679f, -2.3378f, 0.5682f, 0.1348f, 0.3032f, -1.5835f,
- 0.2883f, 0.1693f, 0.0439f, -1.4556f, 0.3818f, 0.4875f,
- -1.8899f, 0.2510f, 0.6450f, 0.6082f, 0.5962f, 0.8131f,
- 12.0281f, 13.3899f, 13.6249f, 15.8068f, -1.5453f, 6.7456f,
- -6.0877f, 26.2596f, 6.2223f, -0.5922f, 134.1428f, 128.8985f,
- 128.7538f, 123.0920f, 1.3207f, 18.3069f, 15.7436f, 46.5230f,
- 24.7455f, 15.0688f, 19.9965f, 34.7236f, 19.7171f, 1.2018f,
- 49.7274f, 11.8957f, 13.1578f, 14.0451f, 15.3544f, -3.5601f,
- 1.0048f, 0.9479f, 1.1832f, 2.0635f, -2.9808f, 2.0803f,
- -7.5815f, 8.4733f, -4.2008f, 0.1217f, 226.5257f, 210.7018f,
- 211.6235f, 195.2605f, 0.8283f, 1.0977f, 1.4858f, 41.1242f,
- 1.5822f, 0.8742f, 2.0440f, 33.6213f, 1.6177f, 0.9661f,
- 65.0014f, 1.4197f, 1.0109f, 1.3153f, 1.5470f, -3.2833f,
- 2.0858f, 2.0012f, 2.1088f, 2.5593f, -0.9422f, 1.8554f,
- -6.5378f, 0.6780f, 2.3186f, 0.0506f, 218.3285f, 203.4055f,
- 204.0362f, 188.7854f, 0.3701f, 2.5257f, 3.5172f, 28.8144f,
- 2.1511f, 3.4676f, 2.6337f, 28.5113f, 2.4254f, -0.0548f,
- 59.4511f, 2.0757f, 2.1551f, 2.2271f, 2.5300f, -1.4173f,
- 91.9240f, 88.2142f, 83.6155f, 82.2482f, -9.2566f, 10.9654f,
- -2.6974f, 62.6750f, -3.6298f, -0.1245f, 69.6721f, 67.1340f,
- 66.9162f, 64.1994f, -83.6778f, 76.8107f, 69.7832f, 64.9261f,
- 68.4901f, 76.3615f, 70.8108f, 63.5435f, 69.1973f, -83.6034f,
- 24.8275f, 90.1923f, 87.6831f, 82.9783f, 81.8558f, -7.1010f,
- 95.1656f, 88.3853f, 80.5835f, 79.5990f, -3.0720f, 8.1290f,
- -0.6151f, 63.6425f, -4.5833f, -0.0063f, 70.1861f, 66.6250f,
- 66.6148f, 63.0886f, -89.2863f, 74.7684f, 64.8897f, 60.4134f,
- 62.5241f, 78.7076f, 61.7234f, 60.1688f, 61.9509f, -89.4098f,
- 30.3361f, 92.9144f, 88.5954f, 79.6336f, 79.2453f, -0.4101f,
- 0.6287f, 0.8050f, 0.4417f, 0.5419f, 0.5972f, 1.3037f,
- 0.4316f, -0.0013f, -0.3673f, -0.4952f, 6.1773f, 5.7825f,
- 6.1705f, 5.3848f, 1.7607f, -0.0152f, -0.2924f, 0.8199f,
- 1.3326f, 0.7197f, -0.6332f, 1.1127f, 1.0472f, 1.8468f,
- 3.4419f, 0.8233f, 0.7175f, 0.8514f, 0.6372f, 0.9472f,
- -0.0813f, -0.0197f, -0.0096f, -0.2015f, 0.1133f, -0.0305f,
- 0.0578f, 0.1375f, -0.0750f, -0.1702f, 0.1246f, -0.1782f,
- 0.2017f, 0.0425f, -0.0602f, 0.1837f, 0.1044f, -0.1273f,
- -0.1431f, 0.0672f, -0.1807f, -0.1045f, -0.1355f, -0.0497f,
- -0.0561f, -0.0633f, 0.1907f, -0.0777f, 0.1203f, 0.0754f,
- 0.4079f, 0.2001f, 0.0558f, 0.0622f, 0.2951f, 0.6541f,
- -0.0068f, 0.1070f, 0.4469f, -0.1266f, -1.3035f, -1.3324f,
- -1.3612f, -0.9966f, 0.7986f, 0.3192f, -0.5028f, -0.3844f,
- -0.4079f, 0.6690f, -0.5109f, -0.2719f, -0.4958f, 1.0310f,
- -0.8044f, 0.1447f, 0.4221f, 0.3194f, 0.3063f, 0.5520f,
- 0.4667f, -5.7238f, -0.5602f, 12.6339f, -15.1865f, -14.9035f,
- -3.0726f, 9.5347f, -24.6225f, -2.7086f, 89.8557f, 95.0657f,
- 93.8693f, 99.1085f, -35.9483f, -18.0363f, -1.6298f, 25.3484f,
- 39.3975f, -15.3199f, 5.7664f, 17.2367f, 25.2788f, -36.5648f,
- 29.1426f, 0.3857f, -5.2117f, 0.0533f, 12.1707f, -11.1735f,
- 0.2673f, 0.0090f, 0.1574f, 0.0904f, 0.0281f, 0.1144f,
- 0.1123f, -0.0061f, 0.0954f, -0.0094f, -0.4387f, -0.5006f,
- -0.2560f, -0.2326f, -0.1769f, 0.0465f, 0.1273f, -0.1627f,
- 0.2987f, -0.3041f, 0.1131f, -0.3620f, 0.0932f, -0.0649f,
- -0.4597f, 0.2535f, -0.0994f, 0.1390f, 0.1279f, 0.4207f,
- -39.1159f, -42.6382f, -38.4225f, -31.2301f, -28.2382f, -28.1176f,
- -9.5822f, 1.1886f, -1.2964f, -0.7908f, 154.9819f, 147.1914f,
- 147.0482f, 138.7535f, -21.7014f, -35.7117f, -28.8802f, -3.8968f,
- -21.5007f, -28.2213f, -28.4878f, -3.7558f, -26.8317f, -22.8491f,
- 50.9464f, -37.0918f, -42.8811f, -39.3079f, -32.1904f, -26.6354f,
- -72.5346f, -75.5751f, -72.6896f, -71.3671f, -35.3279f, -21.6077f,
- -5.8259f, 38.7516f, -6.8012f, 0.0172f, 170.0685f, 157.4452f,
- 158.2334f, 145.0102f, 10.0653f, -45.1775f, -56.4571f, -5.1165f,
- -75.8980f, -46.8672f, -55.3642f, -6.5631f, -81.0258f, 10.1348f,
- 55.9786f, -70.8124f, -75.7040f, -73.9831f, -70.8786f, -34.9723f,
- 88.6239f, 86.5330f, 80.9333f, 79.6833f, -10.0096f, 10.6312f,
- -4.2350f, 62.6230f, -3.2991f, -0.0843f, 75.8659f, 72.7886f,
- 72.5301f, 68.8265f, -81.8276f, 70.3025f, 62.9511f, 62.5706f,
- 69.1842f, 69.3637f, 65.4820f, 65.4357f, 71.5347f, -82.1064f,
- 24.1925f, 86.2418f, 85.4985f, 80.4091f, 79.5378f, -9.3877f,
- -7.6594f, -4.9581f, -10.6385f, -20.2307f, -44.2261f, -13.7557f,
- -4.5344f, 18.1793f, -10.5522f, -1.5878f, 110.3187f, 102.4945f,
- 102.3305f, 94.1324f, -25.2665f, 9.8172f, -4.4791f, 69.4972f,
- -6.7571f, 5.8378f, -11.6101f, 70.7066f, -4.9327f, -24.0513f,
- 41.4598f, -7.0600f, -7.0940f, -10.2478f, -18.9616f, -46.7505f,
- 90.9365f, 86.0260f, 73.2934f, 69.3406f, 3.3863f, 3.8524f,
- 0.6536f, 63.2150f, -10.6304f, 0.0291f, 73.0071f, 69.7660f,
- 69.0457f, 65.5611f, -92.3379f, 74.2756f, 54.5025f, 84.3183f,
- 53.7481f, 73.5624f, 55.3827f, 82.3242f, 53.5432f, -92.5355f,
- 25.3457f, 89.1858f, 84.4763f, 72.9840f, 69.1889f, 4.6719f,
- -0.0129f, 0.1995f, 0.2069f, 0.0358f, 0.1209f, -0.1185f,
- -0.1217f, -0.1456f, 0.0125f, -0.1354f, 0.0510f, -0.0572f,
- 0.1397f, 0.1453f, -0.0086f, 0.0107f, 0.0232f, 0.1508f,
- 0.0884f, -0.0967f, -0.1786f, 0.1361f, -0.1399f, -0.2021f,
- -0.0242f, -0.2169f, 0.0133f, 0.0116f, -0.1489f, -0.0093f,
- -0.0796f, 0.1507f, 0.0906f, 0.0228f, -0.0166f, -0.1875f,
- 0.0471f, 0.1184f, -0.0007f, -0.2732f, -0.1386f, -0.2057f,
- -0.0213f, -0.1699f, 0.0996f, 0.1562f, 0.1850f, -0.0362f,
- -0.2059f, 0.0258f, -0.0135f, -0.1276f, 0.0034f, 0.2023f,
- 0.0857f, -0.0085f, -0.1955f, -0.1666f, -0.0920f, 0.0971f,
- -0.0292f, -0.0512f, -0.0753f, -0.0739f, -0.0873f, -0.1200f,
- 0.0220f, -0.1359f, 0.2013f, -0.0445f, 0.1143f, -0.1484f,
- -0.1556f, -0.0003f, 0.1711f, -0.0724f, -0.0531f, 0.1126f,
- 0.0476f, -0.0057f, 0.0088f, 0.0792f, -0.0438f, -0.1118f,
- -0.0244f, 0.0712f, 0.0930f, -0.0203f, 0.1662f, -0.0695f,
- -12.3872f, -18.7022f, -13.4237f, -1.4731f, -18.6843f, -14.1515f,
- -7.5057f, 40.2090f, -2.7774f, -1.8433f, 123.6006f, 119.0557f,
- 118.2758f, 113.6423f, -32.6216f, -19.5865f, -16.2897f, 17.2068f,
- 6.3559f, -17.8742f, 0.7098f, 11.5970f, -10.1104f, -33.1830f,
- 39.5617f, -10.5499f, -17.8137f, -14.7185f, -2.6172f, -14.6004f,
- 0.3893f, 0.4443f, 0.5305f, 0.3049f, 0.8316f, 0.8679f,
- 0.2265f, 0.2393f, 1.1970f, -0.2891f, -1.8666f, -1.8266f,
- -1.6984f, -1.8787f, 0.8706f, 0.4208f, 0.5076f, -0.8436f,
- -0.1623f, 0.8008f, 0.1512f, -1.0839f, -0.3002f, 0.9263f,
- -1.3031f, 0.5964f, 0.3413f, 0.5551f, 0.2618f, 0.7018f,
- -0.1320f, -0.1944f, -0.0209f, -0.0877f, 0.0721f, -0.0840f,
- 0.0589f, 0.1019f, 0.1927f, -0.2011f, -0.1117f, 0.1575f,
- 0.1080f, -0.0516f, 0.2154f, -0.1231f, 0.0426f, -0.0522f,
- -0.1824f, -0.1923f, -0.1206f, -0.1724f, -0.0798f, 0.0401f,
- -0.2170f, 0.0293f, -0.0853f, 0.1517f, 0.2128f, -0.1934f,
- 0.0406f, 0.0517f, 0.0822f, -0.0150f, 0.0943f, -0.0989f,
- -0.1802f, -0.1453f, -0.1967f, -0.1797f, 0.1545f, -0.1217f,
- 0.1755f, -0.1604f, -0.0515f, 0.0509f, 0.0310f, -0.1220f,
- -0.1770f, -0.0157f, 0.1989f, -0.0069f, 0.1766f, 0.1267f,
- -0.0517f, -0.0396f, 0.0346f, 0.1946f, 0.1162f, -0.1345f,
- -106.6179f, -110.5917f, -107.5476f, -108.0601f, -61.1687f, -22.4247f,
- 2.6632f, 109.5208f, -66.1177f, 0.0062f, 159.9339f, 144.7755f,
- 145.5032f, 128.9872f, 18.9180f, -75.3569f, -105.0866f, -52.0704f,
- -119.1299f, -74.7543f, -109.9468f, -59.0682f, -104.5754f, 19.2878f,
- 67.2573f, -104.8061f, -111.8610f, -106.6751f, -107.3537f, -56.4758f,
- -0.6967f, -0.8495f, -0.9586f, -1.0461f, 1.4522f, -0.2762f,
- 28.2828f, 2.9157f, -2.1062f, 0.1566f, -467.2388f, -461.0685f,
- -459.0092f, -453.8370f, 1.5422f, -0.8186f, -0.4884f, -53.0399f,
- -2.0255f, -1.1348f, -1.1039f, -50.2489f, -1.4821f, 1.8021f,
- -258.0319f, -1.0865f, -0.5542f, -1.0443f, -1.2732f, 1.8413f,
- 0.2377f, 0.1937f, -0.0116f, 0.0935f, -0.0599f, 0.0118f,
- -0.0875f, 0.0455f, -0.1301f, -0.1081f, -0.2622f, -0.1960f,
- 0.0393f, -0.1490f, 0.1852f, -0.0964f, -0.0741f, 0.0419f,
- 0.1162f, -0.0274f, 0.1200f, -0.0333f, -0.1337f, 0.2141f,
- 0.0664f, 0.1044f, -0.1744f, 0.1060f, -0.1468f, 0.0679f,
- 0.0218f, 0.0494f, 0.1064f, 0.1363f, 0.0013f, 0.1331f,
- -0.2095f, 0.2088f, -0.0399f, -0.1811f, 0.0678f, -0.1974f,
- 0.1855f, -0.0968f, -0.2008f, 0.0162f, -0.0096f, -0.1493f,
- 0.2170f, -0.1248f, -0.2055f, 0.1276f, -0.0269f, -0.1697f,
- -0.0662f, 0.1073f, -0.0029f, -0.1051f, -0.1573f, 0.2106f,
- -0.2020f, -0.1565f, 0.0335f, -0.1818f, -0.1665f, 0.2169f,
- 0.1974f, -0.1470f, -0.1738f, -0.2038f, 0.0558f, -0.0441f,
- 0.0065f, -0.1485f, -0.1366f, -0.2131f, 0.1042f, 0.0349f,
- -0.1804f, -0.1361f, -0.0116f, -0.1012f, -0.0860f, 0.0606f,
- -0.2077f, 0.1826f, -0.1014f, -0.0721f, -0.1517f, 0.1022f,
- -0.1110f, -0.0186f, 0.1505f, 0.1797f, 0.0911f, 0.0340f,
- 0.1702f, -0.1404f, -0.0566f, -0.2744f, -0.1943f, -0.1871f,
- 0.0046f, 0.0306f, -0.0436f, 0.1625f, -0.1302f, 0.0175f,
- 0.1570f, -0.1425f, 0.0779f, 0.1398f, 0.0929f, 0.0897f,
- 0.0458f, -0.0936f, 0.1321f, -0.1355f, 0.0974f, 0.0457f,
- -73.3516f, -75.0655f, -72.1062f, -72.4624f, -34.8640f, -14.3727f,
- -4.4720f, 66.4982f, -18.8358f, 0.0397f, 174.2172f, 160.4959f,
- 161.1034f, 147.3250f, 9.5507f, -45.0180f, -73.1609f, -1.5230f,
- -74.8677f, -43.8559f, -68.7622f, -4.8971f, -82.1922f, 9.6490f,
- 64.7115f, -71.8566f, -75.3879f, -72.5479f, -71.7161f, -34.8056f,
- 0.1442f, 0.1558f, 0.1267f, -0.1261f, -0.0506f, -0.0823f,
- -0.1807f, -0.0889f, -0.2098f, -0.1295f, -0.2046f, -0.1749f,
- -0.1197f, -0.1380f, 0.0799f, -0.0889f, -0.1209f, 0.1919f,
- 0.1947f, -0.2086f, -0.1042f, -0.0468f, 0.0232f, 0.1052f,
- -0.0535f, 0.1398f, 0.1713f, -0.1522f, 0.1453f, 0.0286f,
- -64.8503f, -67.6746f, -63.6497f, -60.4614f, -35.6091f, -20.1605f,
- -3.6082f, 84.2801f, -37.8552f, -2.2371f, 132.4947f, 123.5057f,
- 123.5776f, 113.9060f, -14.8772f, -40.7130f, -79.1391f, -10.7024f,
- -65.7831f, -43.6078f, -79.6847f, -13.0743f, -69.2533f, -16.0171f,
- 50.4868f, -64.3678f, -68.7061f, -64.0823f, -59.3413f, -28.9405f,
- 77.1601f, 75.4899f, 69.8696f, 67.8764f, -22.7548f, 5.9814f,
- -3.2826f, 57.9754f, -5.9500f, -0.0014f, 77.2251f, 74.0737f,
- 73.7004f, 70.5072f, -80.9661f, 69.3065f, 55.8337f, 76.8831f,
- 57.9902f, 63.4765f, 56.4748f, 70.0282f, 61.0874f, -81.3960f,
- 26.2594f, 76.0367f, 74.9115f, 69.2361f, 66.9262f, -20.1637f,
- 0.1886f, -0.1108f, 0.1262f, 0.0189f, 0.1382f, 0.0859f,
- -0.1874f, -0.1986f, -0.0171f, -0.1400f, -0.2944f, -0.0750f,
- -0.0395f, -0.2092f, -0.0878f, 0.1216f, -0.0870f, -0.1613f,
- 0.2495f, 0.0754f, 0.0244f, -0.1205f, -0.0196f, -0.1729f,
- 0.1170f, 0.1585f, 0.1482f, -0.1705f, -0.1337f, 0.0199f,
- 13.0897f, 9.1111f, 6.7413f, 6.3907f, -28.1187f, 0.4556f,
- -5.3116f, 30.7293f, -16.3644f, -0.0365f, 118.9118f, 111.6125f,
- 111.3227f, 103.4680f, -30.1883f, 8.9328f, -4.1876f, 79.3936f,
- -9.0522f, 12.7861f, -1.2736f, 78.0446f, -5.9485f, -30.5716f,
- 27.8951f, 13.9613f, 6.7173f, 5.2345f, 8.3271f, -27.3705f,
- 1.0488f, 1.0864f, 1.0710f, 1.7332f, -3.0561f, 1.1622f,
- -7.6688f, 3.0491f, -1.3865f, 0.0769f, 222.5451f, 207.8170f,
- 208.1767f, 193.1396f, 0.4447f, 2.1654f, 1.8929f, 35.1469f,
- 1.1783f, 2.6199f, 1.1611f, 26.2989f, 3.4446f, 0.1551f,
- 65.6529f, 1.2229f, 0.9851f, 1.0241f, 1.4373f, -3.3421f,
- 0.1388f, 0.0756f, 0.2047f, 0.1140f, 0.0945f, 0.2038f,
- 0.1038f, -0.2068f, -0.0626f, -0.1937f, 0.1347f, -0.0464f,
- -0.0866f, 0.0250f, 0.0264f, -0.1556f, -0.1625f, 0.1028f,
- -0.1255f, -0.0854f, 0.1033f, 0.0008f, -0.2133f, -0.0317f,
- 0.1725f, -0.1054f, -0.1900f, 0.0383f, 0.0440f, -0.1900f,
- -30.0811f, -30.9929f, -29.3194f, -26.8347f, -20.5957f, -4.1595f,
- -1.9066f, 42.4707f, -9.0435f, 0.0064f, 175.7328f, 163.1350f,
- 163.5085f, 151.1648f, 4.4620f, -20.6011f, -19.3402f, 1.5468f,
- -32.0920f, -25.4581f, -12.3706f, -2.1636f, -32.4569f, 3.9365f,
- 61.0117f, -28.4195f, -31.0837f, -30.2749f, -27.5522f, -22.8688f,
- -0.3000f, 0.0092f, -0.3675f, -0.4113f, 0.0033f, 0.1138f,
- 0.2182f, -0.5803f, 0.7507f, -0.2529f, -1.7724f, -1.4702f,
- -1.5805f, -1.4294f, 0.1435f, -0.0168f, 0.2356f, -0.4373f,
- -0.4500f, -0.4803f, -0.0041f, -0.3878f, 0.1321f, 0.2761f,
- -1.1975f, -0.3509f, -0.0465f, -0.4050f, -0.1110f, 0.2233f,
- 0.0950f, 0.0974f, -0.1600f, -0.1753f, -0.0328f, 0.0741f,
- -0.0706f, 0.1839f, -0.0833f, -0.1367f, -0.1094f, -0.1739f,
- -0.1069f, 0.0370f, -0.1404f, 0.1631f, -0.1570f, 0.2117f,
- -0.1891f, 0.0395f, 0.1081f, 0.1760f, 0.0997f, 0.0853f,
- -0.1018f, 0.1306f, -0.0924f, -0.2078f, 0.0801f, -0.0949f,
- 0.5803f, 0.5578f, 0.4089f, 0.1912f, 0.6774f, 0.3145f,
- 0.3992f, -0.1316f, 1.3142f, -0.2457f, -2.3536f, -2.4939f,
- -2.3165f, -2.4879f, 0.2321f, 0.1901f, 0.1789f, -1.5215f,
- 0.2645f, 0.2231f, 0.2411f, -1.2361f, 0.2971f, 0.1421f,
- -1.6715f, 0.3158f, 0.2476f, 0.3596f, 0.3029f, 0.9297f,
- -88.8401f, -89.5209f, -86.1926f, -87.4196f, -39.6504f, -17.9684f,
- -4.2702f, 80.2017f, -29.1676f, -0.4190f, 150.2820f, 138.4751f,
- 139.1087f, 126.6569f, 13.7188f, -57.0739f, -80.3383f, -18.8351f,
- -87.4103f, -56.0072f, -82.7707f, -23.1871f, -93.6787f, 13.9287f,
- 59.6213f, -87.4843f, -90.4227f, -86.2635f, -86.6841f, -37.9086f,
- 0.1184f, -0.2169f, -0.1915f, 0.0543f, 0.1253f, -0.1370f,
- 0.0836f, -0.1198f, 0.1544f, -0.2004f, -0.1118f, -0.0786f,
- 0.1517f, -0.1000f, -0.1055f, 0.0936f, -0.1579f, 0.1098f,
- -0.0234f, -0.0499f, 0.0951f, -0.1711f, 0.0186f, -0.2008f,
- 0.1777f, 0.1386f, -0.1495f, -0.0684f, -0.2149f, -0.1198f,
- -0.6205f, -0.7209f, -0.5487f, -0.9080f, 1.3400f, 0.0085f,
- 28.2837f, 3.2217f, -1.8463f, 0.1620f, -464.3599f, -458.4327f,
- -455.9967f, -451.0393f, 1.6619f, -0.6944f, -0.3167f, -52.3630f,
- -1.6971f, -0.7340f, -0.8923f, -49.2771f, -1.1177f, 1.8810f,
- -258.9386f, -1.0765f, -0.7279f, -0.5208f, -0.8839f, 1.8175f,
- -78.8510f, -80.5740f, -77.8843f, -77.9798f, -36.5560f, -16.0818f,
- -5.5362f, 66.4228f, -16.8150f, 0.0036f, 181.8365f, 167.7181f,
- 168.2344f, 153.9725f, 11.2659f, -47.5786f, -92.6978f, 6.7573f,
- -68.7704f, -48.3850f, -95.3637f, 8.8888f, -76.9497f, 11.2243f,
- 60.9020f, -77.6515f, -80.7610f, -78.4537f, -77.4659f, -36.2872f,
- -0.0936f, 0.1966f, -0.2121f, 0.0193f, 0.0489f, -0.1445f,
- 0.0060f, 0.0358f, -0.0783f, -0.0985f, -0.2072f, -0.0802f,
- -0.0185f, 0.1868f, -0.0631f, 0.1260f, -0.0675f, 0.2167f,
- -0.2174f, -0.1085f, 0.1483f, -0.1655f, -0.1040f, 0.1605f,
- -0.1673f, -0.0148f, -0.1856f, -0.1454f, 0.1603f, -0.1620f,
- -0.9205f, -1.2716f, -3.6561f, -5.0834f, -0.7934f, 1.8710f,
- 2.2999f, -2.9516f, -1.7631f, -0.3804f, 41.2998f, 26.2358f,
- 28.9763f, 15.7315f, 5.2164f, 3.2963f, -5.4457f, 18.6310f,
- -25.0076f, 5.4368f, -12.0085f, 17.1462f, -14.6992f, 5.6365f,
- 48.6207f, -1.0921f, -1.8723f, -3.5354f, -5.1774f, -1.0200f,
- -0.1065f, -0.2021f, 0.0332f, 0.1692f, -0.1239f, 0.1325f,
- -0.0660f, -0.0567f, 0.2107f, -0.2084f, -0.0263f, 0.1411f,
- 0.0178f, 0.0451f, 0.2024f, -0.1756f, -0.0771f, -0.1690f,
- -0.2097f, -0.2130f, 0.0714f, 0.0172f, -0.0310f, 0.0649f,
- -0.1550f, 0.0701f, 0.0306f, -0.1750f, -0.1988f, -0.2060f,
- 0.0005f, -0.1325f, -0.1823f, -0.0900f, -0.1291f, -0.1817f,
- 0.0144f, 0.0951f, -0.1954f, -0.0171f, -0.1985f, 0.0875f,
- 0.0901f, -0.0857f, 0.1681f, 0.0465f, 0.1023f, 0.0985f,
- -0.2152f, -0.1723f, -0.0825f, 0.0203f, -0.1206f, -0.1431f,
- -0.1552f, 0.1344f, 0.0398f, 0.0169f, 0.2180f, -0.1530f,
- 2.7964f, 2.7312f, 2.8831f, 3.4729f, -3.1366f, 2.4043f,
- -7.2004f, 1.4128f, 2.8648f, 0.0578f, 225.5640f, 210.3712f,
- 210.6907f, 195.0339f, 0.3140f, 1.8060f, 2.7355f, 33.6917f,
- 3.3542f, 3.3682f, 1.7371f, 31.2424f, 3.4094f, -0.1192f,
- 63.0864f, 3.0562f, 2.8633f, 2.6777f, 3.5495f, -4.2616f,
- -1.4034f, 0.3930f, -4.6756f, -9.9870f, -27.8511f, 5.6071f,
- -1.0862f, 34.4907f, -10.4831f, -0.0281f, 117.2617f, 104.9590f,
- 106.1515f, 93.9707f, -16.8801f, 5.3036f, -21.7458f, 98.5306f,
- -20.7596f, 6.4733f, -17.6440f, 98.3097f, -31.9540f, -17.0600f,
- 27.4543f, -0.6140f, -1.6182f, -4.9167f, -8.9017f, -26.2485f,
- -0.1952f, -0.0462f, -0.1958f, 0.1679f, -0.1592f, -0.1634f,
- -0.0507f, -0.0542f, 0.0038f, -0.0343f, 0.0567f, -0.1983f,
- 0.0250f, -0.0762f, 0.0902f, -0.0343f, 0.1240f, 0.1161f,
- 0.1237f, 0.1870f, 0.0346f, 0.0340f, 0.0625f, -0.0355f,
- 0.0278f, -0.1043f, 0.1755f, 0.0253f, 0.1750f, -0.2070f,
- -5.5531f, -5.3122f, -4.9348f, -4.4782f, -7.5686f, -1.5478f,
- -5.4341f, 0.5087f, -2.1382f, 0.0798f, 208.3677f, 194.0083f,
- 194.4168f, 179.3082f, 1.4443f, -1.5038f, -1.4021f, 25.9363f,
- -4.0635f, -2.6785f, -1.6640f, 22.2589f, -1.4910f, 1.4715f,
- 59.1972f, -4.9638f, -5.1920f, -4.9193f, -5.2649f, -8.0556f,
- 20.1226f, 12.0195f, 9.7385f, 10.7058f, -27.4201f, 8.4869f,
- -5.0826f, 32.9212f, -2.0674f, -0.0290f, 120.5002f, 112.3222f,
- 112.3287f, 104.1107f, -20.6293f, 14.8534f, -0.8748f, 103.1141f,
- -1.1368f, 15.3716f, 2.7653f, 91.7285f, -0.5991f, -20.7338f,
- 35.9363f, 20.5104f, 11.1988f, 9.0368f, 10.6355f, -26.5309f,
- -0.2058f, -0.2176f, 0.1331f, -0.1415f, -0.0825f, -0.0470f,
- -0.0615f, 0.1274f, 0.0076f, -0.0575f, -0.2065f, 0.0866f,
- 0.2166f, -0.1942f, -0.1952f, 0.1323f, -0.1016f, 0.1803f,
- -0.0424f, 0.1555f, 0.1118f, 0.1559f, 0.0337f, -0.0341f,
- -0.0430f, 0.1988f, -0.0553f, -0.0255f, 0.1817f, 0.0608f,
- 0.1431f, 0.0686f, -0.0245f, -0.2107f, 0.2001f, -0.0964f,
- -0.0090f, 0.1151f, -0.0365f, -0.1986f, 0.1740f, -0.2098f,
- 0.0013f, 0.1369f, 0.1910f, 0.1801f, -0.2019f, 0.0348f,
- -0.1175f, 0.0627f, -0.1929f, -0.0099f, 0.1349f, 0.1804f,
- -0.1071f, -0.1651f, -0.1146f, -0.0259f, 0.1626f, -0.0271f,
- 0.1393f, 0.1304f, -0.0200f, 0.0924f, -0.0839f, -0.0031f,
- -0.1311f, 0.0350f, -0.1330f, -0.0911f, 0.1949f, -0.0209f,
- -0.1883f, 0.0269f, 0.2040f, 0.1552f, 0.1532f, 0.1157f,
- -0.1102f, -0.1220f, -0.0808f, -0.1050f, 0.1716f, 0.0846f,
- -0.0180f, -0.1037f, 0.2063f, 0.1237f, 0.1253f, -0.0496f,
- -0.0183f, 0.0491f, 0.1703f, -0.0824f, -0.0702f, -0.1100f,
- -0.0965f, 0.0130f, -0.1222f, -0.1081f, 0.0329f, 0.2115f,
- -0.1438f, 0.0799f, -0.1602f, -0.0330f, 0.0501f, 0.1072f,
- -0.0744f, -0.1783f, -0.0240f, 0.0777f, -0.1944f, 0.0438f,
- -0.0033f, -0.1873f, 0.0984f, -0.0318f, 0.0773f, 0.1489f,
- 0.3966f, 0.4711f, 0.3972f, 0.0623f, 0.5970f, 0.1018f,
- 0.1375f, -0.1881f, 0.8921f, -0.1854f, -2.1138f, -2.1178f,
- -1.8295f, -2.1703f, 0.5784f, -0.1937f, -0.0728f, -0.9953f,
- 0.2442f, -0.4074f, -0.1591f, -1.1660f, 0.4832f, 0.2203f,
- -1.4957f, 0.1544f, 0.1810f, 0.2275f, 0.4075f, 0.8153f,
- 0.0715f, 0.0222f, 0.0463f, -0.0201f, 0.0396f, 0.5951f,
- -0.2779f, -0.0306f, 0.7532f, -0.1596f, -4.1080f, -3.7925f,
- -3.8522f, -3.2468f, 0.7728f, 0.0188f, -0.1448f, 0.4084f,
- -0.4666f, -0.1036f, -1.1469f, 0.4243f, 0.2778f, 0.9023f,
- -3.0216f, 0.0384f, -0.3348f, -0.0314f, -0.2788f, 0.0479f,
- 139.0773f, 131.6164f, 115.0392f, 111.1817f, 41.7596f, 9.5379f,
- 1.8542f, 46.9890f, -12.8221f, 0.0241f, 52.9779f, 51.5268f,
- 50.8060f, 48.7028f, -132.9665f, 118.3478f, 101.1239f, 81.4608f,
- 75.4251f, 121.0643f, 97.8947f, 86.8911f, 74.5576f, -133.7606f,
- 29.2657f, 135.8916f, 131.3661f, 114.1687f, 111.0784f, 31.3790f,
- -0.0807f, -0.0657f, -0.0027f, 0.0410f, 0.0765f, 0.1194f,
- 0.0953f, -0.0060f, 0.1531f, -0.2339f, 0.1488f, -0.0615f,
- -0.0579f, 0.0761f, 0.1250f, -0.0469f, 0.1480f, 0.0683f,
- -0.0049f, 0.1558f, 0.2168f, -0.0736f, 0.1135f, -0.1244f,
- 0.0725f, -0.1297f, -0.0215f, -0.0412f, -0.1632f, -0.0200f,
- -0.1346f, -0.1954f, 0.0053f, 0.0151f, 0.1379f, -0.1497f,
- -0.0102f, -0.0336f, 0.0900f, -0.1706f, -0.0932f, -0.2084f,
- 0.1242f, -0.2027f, 0.0849f, -0.2139f, -0.2015f, 0.0944f,
- -0.0984f, 0.2082f, 0.1625f, -0.0227f, -0.1676f, 0.1021f,
- 0.1516f, 0.0245f, 0.0955f, -0.1488f, -0.0057f, 0.1783f,
- -0.8568f, -0.8175f, -0.6282f, -1.3107f, 1.5712f, 0.1044f,
- 28.2289f, 3.0885f, -1.9829f, 0.1600f, -465.9583f, -459.5893f,
- -457.5055f, -452.7600f, 1.7229f, -0.6620f, -0.1065f, -52.8017f,
- -2.0293f, -0.8224f, -1.0389f, -49.9049f, -1.2250f, 1.7647f,
- -259.2465f, -1.0978f, -0.5169f, -0.8721f, -0.8197f, 1.9158f,
- 16.2234f, 15.8523f, 13.8343f, 9.8509f, -21.4326f, 15.7650f,
- -6.4451f, 34.8575f, 1.1387f, -0.0223f, 117.7213f, 109.8494f,
- 109.7624f, 101.8532f, -20.3275f, 16.0812f, 4.9165f, 92.4919f,
- 4.1615f, 13.8451f, 9.2112f, 97.1580f, -8.7037f, -20.4420f,
- 27.1105f, 17.4922f, 13.9998f, 12.3888f, 11.4705f, -20.9568f,
- 0.5457f, 0.5322f, 0.2823f, 0.3581f, 0.5359f, 0.1576f,
- 0.1969f, -0.0136f, -0.2748f, -0.3168f, -0.3918f, -0.2167f,
- -0.1797f, -0.1869f, 0.2986f, -0.2116f, -0.4226f, -0.2022f,
- 0.9452f, 0.5474f, -0.1218f, 0.2067f, -0.1600f, 0.1937f,
- 0.0808f, 0.4877f, 0.5106f, 0.2626f, 0.5076f, 0.6228f,
- 0.5124f, 0.4044f, 0.4023f, 0.1222f, 2.5446f, 0.9623f,
- 24.9875f, 4.7442f, -2.0551f, 0.1642f, -449.9478f, -444.1841f,
- -442.0153f, -437.1498f, 2.3209f, -0.6986f, -0.3456f, -47.4074f,
- -1.2374f, -1.0939f, -0.9112f, -41.1851f, -0.5064f, 2.4209f,
- -263.4446f, -0.0433f, 0.3460f, 0.1475f, 0.3770f, 2.9154f,
- 0.2032f, 0.1527f, 0.2161f, -0.1981f, 0.1893f, -0.2003f,
- 0.1734f, 0.1713f, 0.1207f, -0.2073f, -0.1018f, 0.0770f,
- 0.0728f, 0.1665f, 0.0689f, 0.1884f, -0.1399f, -0.1326f,
- -0.0518f, -0.1948f, 0.1576f, -0.1835f, 0.1436f, 0.0497f,
- 0.0883f, -0.1253f, -0.0417f, -0.0507f, -0.1555f, 0.2076f,
- -2.4080f, 6.1616f, -0.8564f, -13.6773f, -32.7238f, -16.3144f,
- -1.9828f, 20.5110f, -17.0191f, -1.7154f, 103.6642f, 95.3675f,
- 95.5662f, 86.9504f, -35.5340f, 19.6681f, -2.4900f, 65.0847f,
- -15.8119f, 13.7256f, -4.6753f, 63.4713f, -6.5992f, -34.2369f,
- 41.3959f, -1.5528f, 3.8106f, -0.7762f, -12.3204f, -35.1734f,
- -83.9509f, -87.4861f, -83.5925f, -81.5047f, -54.1256f, -45.7506f,
- -13.5325f, -6.0331f, -8.5062f, 0.0261f, 189.9450f, 177.7870f,
- 178.6945f, 164.9762f, 9.8521f, -68.0619f, -68.6145f, 6.5056f,
- -55.9651f, -66.9540f, -65.3349f, -2.1954f, -57.2408f, 8.6577f,
- 60.6966f, -82.1056f, -88.5245f, -83.3057f, -80.7283f, -50.5285f,
- -0.1397f, 0.1862f, -0.0691f, -0.0906f, 0.1560f, 0.1377f,
- -0.0066f, -0.0213f, 0.0708f, -0.0386f, -0.0015f, -0.0020f,
- -0.2122f, 0.0747f, 0.0795f, 0.0229f, 0.1923f, -0.1661f,
- 0.0895f, 0.1176f, 0.1398f, -0.0443f, 0.0934f, 0.0638f,
- -0.1924f, 0.0602f, 0.0404f, 0.1597f, 0.1387f, -0.0601f,
- -28.3967f, -21.8483f, -25.5175f, -29.9252f, 2.0161f, -3.0092f,
- 7.7435f, 28.2367f, -35.0188f, -0.1578f, 105.0164f, 93.4495f,
- 94.9134f, 81.0315f, 4.3602f, 8.1303f, -37.7665f, -16.6986f,
- -40.8902f, 8.2542f, -33.3215f, -2.0457f, -69.0245f, 4.1016f,
- 47.2770f, -25.8268f, -23.6034f, -26.4339f, -27.8305f, 8.4468f,
- 13.8742f, 8.3874f, 4.2044f, 1.4619f, -40.2909f, -0.6358f,
- -0.7982f, 36.1931f, -17.3147f, -0.3348f, 106.8135f, 96.5298f,
- 97.8829f, 86.9994f, -25.8170f, 15.0652f, -0.9181f, 85.8544f,
- 2.5475f, 9.8009f, -3.5931f, 89.2017f, -3.7252f, -25.2986f,
- 22.5505f, 14.0434f, 7.0708f, 4.6646f, 1.5807f, -39.4024f,
- -0.1436f, 0.0256f, 0.0274f, -0.2126f, 0.0401f, 0.0745f,
- -0.0379f, -0.0357f, 0.0777f, -0.0709f, -0.1093f, -0.2047f,
- -0.0713f, -0.0478f, -0.0908f, 0.1963f, 0.1282f, 0.0977f,
- 0.1304f, 0.2058f, 0.0700f, 0.0518f, 0.0239f, 0.0686f,
- -0.1909f, 0.0828f, -0.1243f, -0.1920f, 0.1908f, -0.0808f,
- 90.8028f, 89.2894f, 84.5339f, 83.3491f, -13.3838f, 12.0240f,
- -3.9443f, 63.0867f, -2.5321f, -0.0099f, 68.9140f, 66.3206f,
- 66.0278f, 63.1498f, -83.7261f, 74.3448f, 73.4998f, 64.8477f,
- 69.7701f, 74.5878f, 71.0331f, 63.2116f, 74.3162f, -83.9282f,
- 20.8163f, 89.6818f, 88.6452f, 83.7338f, 82.9360f, -13.2357f,
- 0.1299f, -0.1765f, -0.0168f, -0.1372f, -0.1183f, 0.0472f,
- 0.1312f, 0.0267f, 0.0194f, -0.1593f, 0.0059f, 0.1775f,
- 0.0668f, -0.1239f, -0.1982f, -0.1415f, -0.1659f, -0.1148f,
- 0.0136f, 0.0913f, -0.1254f, -0.0357f, 0.0892f, 0.0835f,
- -0.0554f, 0.1969f, -0.0888f, -0.0623f, -0.0236f, -0.1492f,
- 0.4196f, 0.3218f, 0.2287f, 0.5095f, 0.7210f, 0.2279f,
- 0.4523f, -0.1832f, 1.3095f, -0.2041f, -2.1443f, -2.1947f,
- -1.9292f, -2.1142f, 0.5840f, 0.1018f, 0.1011f, -1.6565f,
- 0.4325f, 0.0424f, 0.2836f, -1.7183f, 0.2595f, 0.2686f,
- -1.8784f, 0.3891f, 0.3050f, 0.6195f, 0.2896f, 0.5905f,
- -5.3024f, -3.2518f, -12.5192f, -29.1732f, 1.6538f, -1.8315f,
- 9.9788f, 10.5155f, 6.3234f, -0.3460f, 76.9925f, 51.3785f,
- 55.7120f, 29.0432f, 5.5901f, 25.6578f, -3.9565f, 13.0509f,
- -106.0371f, 23.2124f, -18.2004f, 8.4618f, -69.3585f, 5.5651f,
- 80.0565f, -6.4941f, -5.3742f, -14.4209f, -24.1565f, 6.6801f,
- -22.0585f, -20.9909f, -26.7939f, -29.6890f, -14.5085f, 2.1866f,
- -4.2608f, 17.3977f, -30.8824f, -0.4017f, 135.6957f, 126.9320f,
- 127.0044f, 118.1835f, -1.8768f, -0.8629f, -32.0882f, 44.7862f,
- -23.9174f, 1.6485f, -27.9940f, 51.9078f, -48.5279f, -1.7550f,
- 49.9230f, -19.9785f, -22.4647f, -27.6911f, -27.3197f, -10.6545f,
- -0.1922f, -0.1999f, -0.1396f, 0.1065f, 0.0085f, -0.1940f,
- 0.0351f, 0.1285f, -0.0292f, -0.1296f, 0.1543f, -0.2082f,
- -0.1758f, 0.0719f, 0.0764f, 0.1394f, -0.0255f, -0.0370f,
- 0.1615f, -0.0568f, 0.1920f, -0.1631f, 0.0199f, 0.1884f,
- 0.0693f, 0.1074f, -0.0273f, 0.1540f, 0.0098f, 0.2111f,
- 0.1805f, -0.0555f, 0.1159f, 0.0469f, 0.1789f, -0.1711f,
- -0.1304f, 0.1912f, -0.0737f, -0.1408f, 0.1804f, -0.2023f,
- -0.0467f, -0.1019f, -0.0136f, 0.0691f, 0.1454f, -0.0213f,
- 0.0929f, -0.0958f, 0.1299f, 0.1137f, 0.1175f, 0.1042f,
- -0.2081f, -0.0737f, 0.0582f, 0.1640f, 0.2120f, -0.0646f,
- -0.0326f, 0.1976f, 0.1182f, -0.1365f, -0.1784f, 0.2113f,
- 0.0469f, 0.0763f, -0.0197f, -0.1902f, 0.1259f, 0.1598f,
- -0.0180f, -0.1339f, -0.1675f, -0.1884f, -0.1973f, 0.1529f,
- 0.1160f, 0.2154f, -0.1446f, -0.1395f, 0.0355f, 0.1513f,
- -0.2086f, -0.1135f, -0.1502f, -0.0018f, 0.0486f, -0.0110f,
- -0.0843f, -0.0716f, -0.1367f, 0.0753f, 0.0114f, 0.0475f,
- -0.0632f, 0.2045f, -0.0512f, -0.0906f, -0.1071f, -0.1957f,
- 0.1361f, 0.1821f, -0.1684f, -0.1383f, 0.1059f, 0.1579f,
- -0.0064f, -0.1205f, -0.0718f, -0.1323f, -0.0174f, -0.1092f,
- -0.1915f, 0.1978f, -0.1245f, 0.1297f, -0.1542f, 0.1556f,
- -0.1752f, 0.0718f, -0.1020f, -0.1970f, 0.0518f, -0.0888f,
- 0.0541f, -0.1922f, -0.1467f, -0.0653f, -0.1940f, -0.0800f,
- -0.1096f, -0.0796f, -0.1310f, 0.0191f, -0.1077f, -0.0973f,
- 0.1566f, 0.0074f, 0.0500f, -0.0415f, -0.2116f, 0.0227f,
- 0.0895f, 0.1528f, 0.1404f, 0.0467f, 0.0462f, -0.0973f,
- -0.1669f, 0.0551f, 0.1167f, -0.1470f, -0.0542f, -0.1006f,
- 0.2104f, 0.1039f, -0.0211f, -0.1726f, -0.0694f, -0.0270f,
- 0.0277f, -0.0715f, -0.2055f, -0.1502f, -0.1718f, -0.0043f,
- 0.0174f, 0.1019f, -0.0233f, -0.1518f, -0.1331f, -0.0001f,
- -0.1483f, -0.2115f, 0.0666f, 0.0014f, 0.1601f, -0.0690f,
- };
-
-static const float av1_rdcost_model_nn_biases_layer0[NUM_HIDDEN_NODES] = {
- 0.156824f, 0.f, 0.130013f, 0.084482f, -129.058197f, -15.090252f,
- -3.859116f, 0.736356f, -81.361557f, -0.001922f, -0.000713f, 0.440181f,
- 14.982646f, 1.282223f, 2.23122f, 94.26635f, 93.920929f, 0.614672f,
- 0.f, 0.315858f, 4.746014f, 0.116901f, -35.661354f, -75.148285f,
- 92.006989f, -14.112332f, 86.673157f, -0.000307f, -0.000544f, 0.f,
- -7.851313f, 0.505186f, 0.f, 0.f, -111.681091f, -0.937782f,
- 0.035789f, 0.f, 0.f, -0.00102f, -75.180527f, 0.f,
- -63.821148f, 79.592392f, 0.085068f, 11.184906f, 1.25406f, 0.f,
- -29.779242f, -0.181732f, 0.f, 0.425554f, -90.78405f, 0.f,
- -0.828326f, -81.132179f, 0.f, -2.757063f, 0.f, 0.f,
- 2.967951f, -4.440599f, 0.f, -5.105355f, 14.734543f, 0.f,
- 0.f, 0.f, 0.f, 0.295342f, -0.026907f, 133.375412f,
- -0.000855f, 0.f, -0.875029f, 15.665165f, 0.437296f, 0.321257f,
- -0.001932f, -4.235782f, -87.187782f, 0.f, -28.84696f, 7.055514f,
- 0.f, 95.548302f, -0.000425f, 0.38969f, -13.88008f, -27.347931f,
- 0.f, 0.f, 0.f, -0.000026f, 0.f, 0.f,
-};
-
-static const float
- av1_rdcost_model_nn_weights_layer1[NUM_HIDDEN_NODES * NUM_OUTPUTS] = {
- -0.101706f, -0.14411f, -0.139118f, -0.132945f, 118.811302f,
- 3.137232f, -32.969776f, -4.150725f, 26.263071f, 0.092841f,
- 0.174125f, -0.028195f, 15.712872f, 17.722702f, 5.666006f,
- -121.143929f, -131.933731f, -3.000318f, -0.032063f, -0.380065f,
- -1.660653f, -0.164802f, 7.177527f, 87.759155f, -119.564224f,
- -98.051651f, -110.581116f, -0.069982f, 0.023906f, 0.183792f,
- 40.606274f, -0.080804f, -0.053744f, -0.187848f, 157.44313f,
- -4.820149f, 0.089499f, 0.070232f, -0.043038f, 0.072996f,
- 93.347313f, 0.225259f, 103.223228f, -110.682541f, 0.14314f,
- -89.827538f, 6.505952f, -0.076949f, 73.816132f, -0.063416f,
- -0.23736f, -0.066059f, 116.049599f, 0.120871f, -4.708246f,
- 107.501671f, -0.206708f, -32.688675f, 0.047608f, -0.105907f,
- 6.505825f, -75.461891f, -0.160341f, 6.532121f, -84.868111f,
- -0.065622f, 0.044756f, 0.008672f, 0.017155f, 0.046108f,
- -0.218818f, -126.507957f, 0.028271f, 0.180625f, -4.707376f,
- -121.524307f, -0.03853f, -4.103166f, -0.018947f, -95.768463f,
- 15.941695f, 0.147154f, -102.863029f, -72.521698f, -0.037133f,
- -138.1492f, 0.210016f, -0.084692f, -68.693665f, -52.523472f,
- -0.133385f, -0.17438f, 0.008654f, -0.035642f, -0.145202f,
- 0.211135f,
- };
-
-static const float av1_rdcost_model_nn_biases_layer1[NUM_OUTPUTS] = {
- 0.251909f
-};
-
-static const NN_CONFIG av1_rdcost_model_nnconfig = {
- NUM_FEATURES,
- NUM_OUTPUTS,
- NUM_HIDDEN_LAYERS,
- {
- NUM_HIDDEN_NODES,
- },
- {
- av1_rdcost_model_nn_weights_layer0,
- av1_rdcost_model_nn_weights_layer1,
- },
- {
- av1_rdcost_model_nn_biases_layer0,
- av1_rdcost_model_nn_biases_layer1,
- },
-};
-
-//------------------------------------------------------------------------------
-
-#undef NUM_FEATURES
-#undef NUM_HIDDEN_LAYERS
-#undef NUM_HIDDEN_NODES
-#undef NUM_OUTPUTS
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_ENCODER_RATE_DISTORTION_MODEL_PARAMS_H_
diff --git a/third_party/aom/av1/encoder/ratectrl.c b/third_party/aom/av1/encoder/ratectrl.c
deleted file mode 100644
index 2597fb990..000000000
--- a/third_party/aom/av1/encoder/ratectrl.c
+++ /dev/null
@@ -1,1776 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <limits.h>
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_mem/aom_mem.h"
-#include "aom_ports/mem.h"
-#include "aom_ports/system_state.h"
-
-#include "av1/common/alloccommon.h"
-#include "av1/encoder/aq_cyclicrefresh.h"
-#include "av1/common/common.h"
-#include "av1/common/entropymode.h"
-#include "av1/common/quant_common.h"
-#include "av1/common/seg_common.h"
-
-#include "av1/encoder/encodemv.h"
-#include "av1/encoder/random.h"
-#include "av1/encoder/ratectrl.h"
-
-// Max rate target for 1080P and below encodes under normal circumstances
-// (1920 * 1080 / (16 * 16)) * MAX_MB_RATE bits per MB
-#define MAX_MB_RATE 250
-#define MAXRATE_1080P 2025000
-
-#define DEFAULT_KF_BOOST 2000
-#define DEFAULT_GF_BOOST 2000
-
-#define MIN_BPB_FACTOR 0.005
-#define MAX_BPB_FACTOR 50
-
-#define FRAME_OVERHEAD_BITS 200
-#define ASSIGN_MINQ_TABLE(bit_depth, name) \
- do { \
- switch (bit_depth) { \
- case AOM_BITS_8: name = name##_8; break; \
- case AOM_BITS_10: name = name##_10; break; \
- case AOM_BITS_12: name = name##_12; break; \
- default: \
- assert(0 && \
- "bit_depth should be AOM_BITS_8, AOM_BITS_10" \
- " or AOM_BITS_12"); \
- name = NULL; \
- } \
- } while (0)
-
-// Tables relating active max Q to active min Q
-static int kf_low_motion_minq_8[QINDEX_RANGE];
-static int kf_high_motion_minq_8[QINDEX_RANGE];
-static int arfgf_low_motion_minq_8[QINDEX_RANGE];
-static int arfgf_high_motion_minq_8[QINDEX_RANGE];
-static int inter_minq_8[QINDEX_RANGE];
-static int rtc_minq_8[QINDEX_RANGE];
-
-static int kf_low_motion_minq_10[QINDEX_RANGE];
-static int kf_high_motion_minq_10[QINDEX_RANGE];
-static int arfgf_low_motion_minq_10[QINDEX_RANGE];
-static int arfgf_high_motion_minq_10[QINDEX_RANGE];
-static int inter_minq_10[QINDEX_RANGE];
-static int rtc_minq_10[QINDEX_RANGE];
-static int kf_low_motion_minq_12[QINDEX_RANGE];
-static int kf_high_motion_minq_12[QINDEX_RANGE];
-static int arfgf_low_motion_minq_12[QINDEX_RANGE];
-static int arfgf_high_motion_minq_12[QINDEX_RANGE];
-static int inter_minq_12[QINDEX_RANGE];
-static int rtc_minq_12[QINDEX_RANGE];
-
-static int gf_high = 2000;
-static int gf_low = 400;
-static int kf_high = 5000;
-static int kf_low = 400;
-
-// How many times less pixels there are to encode given the current scaling.
-// Temporary replacement for rcf_mult and rate_thresh_mult.
-static double resize_rate_factor(const AV1_COMP *cpi, int width, int height) {
- return (double)(cpi->oxcf.width * cpi->oxcf.height) / (width * height);
-}
-
-// Functions to compute the active minq lookup table entries based on a
-// formulaic approach to facilitate easier adjustment of the Q tables.
-// The formulae were derived from computing a 3rd order polynomial best
-// fit to the original data (after plotting real maxq vs minq (not q index))
-static int get_minq_index(double maxq, double x3, double x2, double x1,
- aom_bit_depth_t bit_depth) {
- int i;
- const double minqtarget = AOMMIN(((x3 * maxq + x2) * maxq + x1) * maxq, maxq);
-
- // Special case handling to deal with the step from q2.0
- // down to lossless mode represented by q 1.0.
- if (minqtarget <= 2.0) return 0;
-
- for (i = 0; i < QINDEX_RANGE; i++) {
- if (minqtarget <= av1_convert_qindex_to_q(i, bit_depth)) return i;
- }
-
- return QINDEX_RANGE - 1;
-}
-
-static void init_minq_luts(int *kf_low_m, int *kf_high_m, int *arfgf_low,
- int *arfgf_high, int *inter, int *rtc,
- aom_bit_depth_t bit_depth) {
- int i;
- for (i = 0; i < QINDEX_RANGE; i++) {
- const double maxq = av1_convert_qindex_to_q(i, bit_depth);
- kf_low_m[i] = get_minq_index(maxq, 0.000001, -0.0004, 0.150, bit_depth);
- kf_high_m[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.45, bit_depth);
- arfgf_low[i] = get_minq_index(maxq, 0.0000015, -0.0009, 0.30, bit_depth);
- arfgf_high[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.55, bit_depth);
- inter[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.90, bit_depth);
- rtc[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.70, bit_depth);
- }
-}
-
-void av1_rc_init_minq_luts(void) {
- init_minq_luts(kf_low_motion_minq_8, kf_high_motion_minq_8,
- arfgf_low_motion_minq_8, arfgf_high_motion_minq_8,
- inter_minq_8, rtc_minq_8, AOM_BITS_8);
- init_minq_luts(kf_low_motion_minq_10, kf_high_motion_minq_10,
- arfgf_low_motion_minq_10, arfgf_high_motion_minq_10,
- inter_minq_10, rtc_minq_10, AOM_BITS_10);
- init_minq_luts(kf_low_motion_minq_12, kf_high_motion_minq_12,
- arfgf_low_motion_minq_12, arfgf_high_motion_minq_12,
- inter_minq_12, rtc_minq_12, AOM_BITS_12);
-}
-
-// These functions use formulaic calculations to make playing with the
-// quantizer tables easier. If necessary they can be replaced by lookup
-// tables if and when things settle down in the experimental bitstream
-double av1_convert_qindex_to_q(int qindex, aom_bit_depth_t bit_depth) {
- // Convert the index to a real Q value (scaled down to match old Q values)
- switch (bit_depth) {
- case AOM_BITS_8: return av1_ac_quant_Q3(qindex, 0, bit_depth) / 4.0;
- case AOM_BITS_10: return av1_ac_quant_Q3(qindex, 0, bit_depth) / 16.0;
- case AOM_BITS_12: return av1_ac_quant_Q3(qindex, 0, bit_depth) / 64.0;
- default:
- assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
- return -1.0;
- }
-}
-
-int av1_rc_bits_per_mb(FRAME_TYPE frame_type, int qindex,
- double correction_factor, aom_bit_depth_t bit_depth) {
- const double q = av1_convert_qindex_to_q(qindex, bit_depth);
- int enumerator = frame_type == KEY_FRAME ? 2700000 : 1800000;
-
- assert(correction_factor <= MAX_BPB_FACTOR &&
- correction_factor >= MIN_BPB_FACTOR);
-
- // q based adjustment to baseline enumerator
- enumerator += (int)(enumerator * q) >> 12;
- return (int)(enumerator * correction_factor / q);
-}
-
-int av1_estimate_bits_at_q(FRAME_TYPE frame_type, int q, int mbs,
- double correction_factor,
- aom_bit_depth_t bit_depth) {
- const int bpm =
- (int)(av1_rc_bits_per_mb(frame_type, q, correction_factor, bit_depth));
- return AOMMAX(FRAME_OVERHEAD_BITS,
- (int)((uint64_t)bpm * mbs) >> BPER_MB_NORMBITS);
-}
-
-int av1_rc_clamp_pframe_target_size(const AV1_COMP *const cpi, int target) {
- const RATE_CONTROL *rc = &cpi->rc;
- const AV1EncoderConfig *oxcf = &cpi->oxcf;
- const int min_frame_target =
- AOMMAX(rc->min_frame_bandwidth, rc->avg_frame_bandwidth >> 5);
- // Clip the frame target to the minimum setup value.
- if (cpi->rc.is_src_frame_alt_ref) {
- // If there is an active ARF at this location use the minimum
- // bits on this frame even if it is a constructed arf.
- // The active maximum quantizer insures that an appropriate
- // number of bits will be spent if needed for constructed ARFs.
- target = min_frame_target;
- } else if (target < min_frame_target) {
- target = min_frame_target;
- }
-
- // Clip the frame target to the maximum allowed value.
- if (target > rc->max_frame_bandwidth) target = rc->max_frame_bandwidth;
- if (oxcf->rc_max_inter_bitrate_pct) {
- const int max_rate =
- rc->avg_frame_bandwidth * oxcf->rc_max_inter_bitrate_pct / 100;
- target = AOMMIN(target, max_rate);
- }
-
- return target;
-}
-
-int av1_rc_clamp_iframe_target_size(const AV1_COMP *const cpi, int target) {
- const RATE_CONTROL *rc = &cpi->rc;
- const AV1EncoderConfig *oxcf = &cpi->oxcf;
- if (oxcf->rc_max_intra_bitrate_pct) {
- const int max_rate =
- rc->avg_frame_bandwidth * oxcf->rc_max_intra_bitrate_pct / 100;
- target = AOMMIN(target, max_rate);
- }
- if (target > rc->max_frame_bandwidth) target = rc->max_frame_bandwidth;
- return target;
-}
-
-// Update the buffer level: leaky bucket model.
-static void update_buffer_level(AV1_COMP *cpi, int encoded_frame_size) {
- const AV1_COMMON *const cm = &cpi->common;
- RATE_CONTROL *const rc = &cpi->rc;
-
- // Non-viewable frames are a special case and are treated as pure overhead.
- // TODO(zoeliu): To further explore whether we should treat BWDREF_FRAME
- // differently, since it is a no-show frame.
- if (!cm->show_frame && !rc->is_bwd_ref_frame)
- rc->bits_off_target -= encoded_frame_size;
- else
- rc->bits_off_target += rc->avg_frame_bandwidth - encoded_frame_size;
-
- // Clip the buffer level to the maximum specified buffer size.
- rc->bits_off_target = AOMMIN(rc->bits_off_target, rc->maximum_buffer_size);
- rc->buffer_level = rc->bits_off_target;
-}
-
-int av1_rc_get_default_min_gf_interval(int width, int height,
- double framerate) {
- // Assume we do not need any constraint lower than 4K 20 fps
- static const double factor_safe = 3840 * 2160 * 20.0;
- const double factor = width * height * framerate;
- const int default_interval =
- clamp((int)(framerate * 0.125), MIN_GF_INTERVAL, MAX_GF_INTERVAL);
-
- if (factor <= factor_safe)
- return default_interval;
- else
- return AOMMAX(default_interval,
- (int)(MIN_GF_INTERVAL * factor / factor_safe + 0.5));
- // Note this logic makes:
- // 4K24: 5
- // 4K30: 6
- // 4K60: 12
-}
-
-int av1_rc_get_default_max_gf_interval(double framerate, int min_gf_interval) {
- int interval = AOMMIN(MAX_GF_INTERVAL, (int)(framerate * 0.75));
- interval += (interval & 0x01); // Round to even value
-#if CONFIG_FIX_GF_LENGTH
- interval = AOMMAX(FIXED_GF_LENGTH, interval);
-#endif
- return AOMMAX(interval, min_gf_interval);
-}
-
-void av1_rc_init(const AV1EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) {
- int i;
-
- if (pass == 0 && oxcf->rc_mode == AOM_CBR) {
- rc->avg_frame_qindex[KEY_FRAME] = oxcf->worst_allowed_q;
- rc->avg_frame_qindex[INTER_FRAME] = oxcf->worst_allowed_q;
- } else {
- rc->avg_frame_qindex[KEY_FRAME] =
- (oxcf->worst_allowed_q + oxcf->best_allowed_q) / 2;
- rc->avg_frame_qindex[INTER_FRAME] =
- (oxcf->worst_allowed_q + oxcf->best_allowed_q) / 2;
- }
-
- rc->last_q[KEY_FRAME] = oxcf->best_allowed_q;
- rc->last_q[INTER_FRAME] = oxcf->worst_allowed_q;
-
- rc->buffer_level = rc->starting_buffer_level;
- rc->bits_off_target = rc->starting_buffer_level;
-
- rc->rolling_target_bits = rc->avg_frame_bandwidth;
- rc->rolling_actual_bits = rc->avg_frame_bandwidth;
- rc->long_rolling_target_bits = rc->avg_frame_bandwidth;
- rc->long_rolling_actual_bits = rc->avg_frame_bandwidth;
-
- rc->total_actual_bits = 0;
- rc->total_target_bits = 0;
- rc->total_target_vs_actual = 0;
-
- rc->frames_since_key = 8; // Sensible default for first frame.
- rc->this_key_frame_forced = 0;
- rc->next_key_frame_forced = 0;
- rc->source_alt_ref_pending = 0;
- rc->source_alt_ref_active = 0;
-
- rc->frames_till_gf_update_due = 0;
- rc->ni_av_qi = oxcf->worst_allowed_q;
- rc->ni_tot_qi = 0;
- rc->ni_frames = 0;
-
- rc->tot_q = 0.0;
- rc->avg_q = av1_convert_qindex_to_q(oxcf->worst_allowed_q, oxcf->bit_depth);
-
- for (i = 0; i < RATE_FACTOR_LEVELS; ++i) {
- rc->rate_correction_factors[i] = 0.7;
- }
- rc->rate_correction_factors[KF_STD] = 1.0;
- rc->min_gf_interval = oxcf->min_gf_interval;
- rc->max_gf_interval = oxcf->max_gf_interval;
- if (rc->min_gf_interval == 0)
- rc->min_gf_interval = av1_rc_get_default_min_gf_interval(
- oxcf->width, oxcf->height, oxcf->init_framerate);
- if (rc->max_gf_interval == 0)
- rc->max_gf_interval = av1_rc_get_default_max_gf_interval(
- oxcf->init_framerate, rc->min_gf_interval);
- rc->baseline_gf_interval = (rc->min_gf_interval + rc->max_gf_interval) / 2;
-}
-
-int av1_rc_drop_frame(AV1_COMP *cpi) {
- const AV1EncoderConfig *oxcf = &cpi->oxcf;
- RATE_CONTROL *const rc = &cpi->rc;
-
- if (!oxcf->drop_frames_water_mark) {
- return 0;
- } else {
- if (rc->buffer_level < 0) {
- // Always drop if buffer is below 0.
- return 1;
- } else {
- // If buffer is below drop_mark, for now just drop every other frame
- // (starting with the next frame) until it increases back over drop_mark.
- int drop_mark =
- (int)(oxcf->drop_frames_water_mark * rc->optimal_buffer_level / 100);
- if ((rc->buffer_level > drop_mark) && (rc->decimation_factor > 0)) {
- --rc->decimation_factor;
- } else if (rc->buffer_level <= drop_mark && rc->decimation_factor == 0) {
- rc->decimation_factor = 1;
- }
- if (rc->decimation_factor > 0) {
- if (rc->decimation_count > 0) {
- --rc->decimation_count;
- return 1;
- } else {
- rc->decimation_count = rc->decimation_factor;
- return 0;
- }
- } else {
- rc->decimation_count = 0;
- return 0;
- }
- }
- }
-}
-
-static double get_rate_correction_factor(const AV1_COMP *cpi, int width,
- int height) {
- const RATE_CONTROL *const rc = &cpi->rc;
- double rcf;
-
- if (cpi->common.frame_type == KEY_FRAME) {
- rcf = rc->rate_correction_factors[KF_STD];
- } else if (cpi->oxcf.pass == 2) {
- RATE_FACTOR_LEVEL rf_lvl =
- cpi->twopass.gf_group.rf_level[cpi->twopass.gf_group.index];
- rcf = rc->rate_correction_factors[rf_lvl];
- } else {
- if ((cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) &&
- !rc->is_src_frame_alt_ref &&
- (cpi->oxcf.rc_mode != AOM_CBR || cpi->oxcf.gf_cbr_boost_pct > 20))
- rcf = rc->rate_correction_factors[GF_ARF_STD];
- else
- rcf = rc->rate_correction_factors[INTER_NORMAL];
- }
- rcf *= resize_rate_factor(cpi, width, height);
- return fclamp(rcf, MIN_BPB_FACTOR, MAX_BPB_FACTOR);
-}
-
-static void set_rate_correction_factor(AV1_COMP *cpi, double factor, int width,
- int height) {
- RATE_CONTROL *const rc = &cpi->rc;
-
- // Normalize RCF to account for the size-dependent scaling factor.
- factor /= resize_rate_factor(cpi, width, height);
-
- factor = fclamp(factor, MIN_BPB_FACTOR, MAX_BPB_FACTOR);
-
- if (cpi->common.frame_type == KEY_FRAME) {
- rc->rate_correction_factors[KF_STD] = factor;
- } else if (cpi->oxcf.pass == 2) {
- RATE_FACTOR_LEVEL rf_lvl =
- cpi->twopass.gf_group.rf_level[cpi->twopass.gf_group.index];
- rc->rate_correction_factors[rf_lvl] = factor;
- } else {
- if ((cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) &&
- !rc->is_src_frame_alt_ref &&
- (cpi->oxcf.rc_mode != AOM_CBR || cpi->oxcf.gf_cbr_boost_pct > 20))
- rc->rate_correction_factors[GF_ARF_STD] = factor;
- else
- rc->rate_correction_factors[INTER_NORMAL] = factor;
- }
-}
-
-void av1_rc_update_rate_correction_factors(AV1_COMP *cpi, int width,
- int height) {
- const AV1_COMMON *const cm = &cpi->common;
- int correction_factor = 100;
- double rate_correction_factor =
- get_rate_correction_factor(cpi, width, height);
- double adjustment_limit;
- const int MBs = av1_get_MBs(width, height);
-
- int projected_size_based_on_q = 0;
-
- // Do not update the rate factors for arf overlay frames.
- if (cpi->rc.is_src_frame_alt_ref) return;
-
- // Clear down mmx registers to allow floating point in what follows
- aom_clear_system_state();
-
- // Work out how big we would have expected the frame to be at this Q given
- // the current correction factor.
- // Stay in double to avoid int overflow when values are large
- if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cpi->common.seg.enabled) {
- projected_size_based_on_q =
- av1_cyclic_refresh_estimate_bits_at_q(cpi, rate_correction_factor);
- } else {
- projected_size_based_on_q = av1_estimate_bits_at_q(
- cpi->common.frame_type, cm->base_qindex, MBs, rate_correction_factor,
- cm->seq_params.bit_depth);
- }
- // Work out a size correction factor.
- if (projected_size_based_on_q > FRAME_OVERHEAD_BITS)
- correction_factor = (int)((100 * (int64_t)cpi->rc.projected_frame_size) /
- projected_size_based_on_q);
-
- // More heavily damped adjustment used if we have been oscillating either side
- // of target.
- if (correction_factor > 0) {
- adjustment_limit =
- 0.25 + 0.5 * AOMMIN(1, fabs(log10(0.01 * correction_factor)));
- } else {
- adjustment_limit = 0.75;
- }
-
- cpi->rc.q_2_frame = cpi->rc.q_1_frame;
- cpi->rc.q_1_frame = cm->base_qindex;
- cpi->rc.rc_2_frame = cpi->rc.rc_1_frame;
- if (correction_factor > 110)
- cpi->rc.rc_1_frame = -1;
- else if (correction_factor < 90)
- cpi->rc.rc_1_frame = 1;
- else
- cpi->rc.rc_1_frame = 0;
-
- if (correction_factor > 102) {
- // We are not already at the worst allowable quality
- correction_factor =
- (int)(100 + ((correction_factor - 100) * adjustment_limit));
- rate_correction_factor = (rate_correction_factor * correction_factor) / 100;
- // Keep rate_correction_factor within limits
- if (rate_correction_factor > MAX_BPB_FACTOR)
- rate_correction_factor = MAX_BPB_FACTOR;
- } else if (correction_factor < 99) {
- // We are not already at the best allowable quality
- correction_factor =
- (int)(100 - ((100 - correction_factor) * adjustment_limit));
- rate_correction_factor = (rate_correction_factor * correction_factor) / 100;
-
- // Keep rate_correction_factor within limits
- if (rate_correction_factor < MIN_BPB_FACTOR)
- rate_correction_factor = MIN_BPB_FACTOR;
- }
-
- set_rate_correction_factor(cpi, rate_correction_factor, width, height);
-}
-
-int av1_rc_regulate_q(const AV1_COMP *cpi, int target_bits_per_frame,
- int active_best_quality, int active_worst_quality,
- int width, int height) {
- const AV1_COMMON *const cm = &cpi->common;
- int q = active_worst_quality;
- int last_error = INT_MAX;
- int i, target_bits_per_mb, bits_per_mb_at_this_q;
- const int MBs = av1_get_MBs(width, height);
- const double correction_factor =
- get_rate_correction_factor(cpi, width, height);
-
- // Calculate required scaling factor based on target frame size and size of
- // frame produced using previous Q.
- target_bits_per_mb =
- (int)((uint64_t)(target_bits_per_frame) << BPER_MB_NORMBITS) / MBs;
-
- i = active_best_quality;
-
- do {
- if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) {
- bits_per_mb_at_this_q =
- (int)av1_cyclic_refresh_rc_bits_per_mb(cpi, i, correction_factor);
- } else {
- bits_per_mb_at_this_q = (int)av1_rc_bits_per_mb(
- cm->frame_type, i, correction_factor, cm->seq_params.bit_depth);
- }
-
- if (bits_per_mb_at_this_q <= target_bits_per_mb) {
- if ((target_bits_per_mb - bits_per_mb_at_this_q) <= last_error)
- q = i;
- else
- q = i - 1;
-
- break;
- } else {
- last_error = bits_per_mb_at_this_q - target_bits_per_mb;
- }
- } while (++i <= active_worst_quality);
-
- // In CBR mode, this makes sure q is between oscillating Qs to prevent
- // resonance.
- if (cpi->oxcf.rc_mode == AOM_CBR &&
- (cpi->rc.rc_1_frame * cpi->rc.rc_2_frame == -1) &&
- cpi->rc.q_1_frame != cpi->rc.q_2_frame) {
- q = clamp(q, AOMMIN(cpi->rc.q_1_frame, cpi->rc.q_2_frame),
- AOMMAX(cpi->rc.q_1_frame, cpi->rc.q_2_frame));
- }
- return q;
-}
-
-static int get_active_quality(int q, int gfu_boost, int low, int high,
- int *low_motion_minq, int *high_motion_minq) {
- if (gfu_boost > high) {
- return low_motion_minq[q];
- } else if (gfu_boost < low) {
- return high_motion_minq[q];
- } else {
- const int gap = high - low;
- const int offset = high - gfu_boost;
- const int qdiff = high_motion_minq[q] - low_motion_minq[q];
- const int adjustment = ((offset * qdiff) + (gap >> 1)) / gap;
- return low_motion_minq[q] + adjustment;
- }
-}
-
-static int get_kf_active_quality(const RATE_CONTROL *const rc, int q,
- aom_bit_depth_t bit_depth) {
- int *kf_low_motion_minq;
- int *kf_high_motion_minq;
- ASSIGN_MINQ_TABLE(bit_depth, kf_low_motion_minq);
- ASSIGN_MINQ_TABLE(bit_depth, kf_high_motion_minq);
- return get_active_quality(q, rc->kf_boost, kf_low, kf_high,
- kf_low_motion_minq, kf_high_motion_minq);
-}
-
-static int get_gf_active_quality(const RATE_CONTROL *const rc, int q,
- aom_bit_depth_t bit_depth) {
- int *arfgf_low_motion_minq;
- int *arfgf_high_motion_minq;
- ASSIGN_MINQ_TABLE(bit_depth, arfgf_low_motion_minq);
- ASSIGN_MINQ_TABLE(bit_depth, arfgf_high_motion_minq);
- return get_active_quality(q, rc->gfu_boost, gf_low, gf_high,
- arfgf_low_motion_minq, arfgf_high_motion_minq);
-}
-
-#if REDUCE_LAST_ALT_BOOST
-static int get_gf_high_motion_quality(int q, aom_bit_depth_t bit_depth) {
- int *arfgf_high_motion_minq;
- ASSIGN_MINQ_TABLE(bit_depth, arfgf_high_motion_minq);
- return arfgf_high_motion_minq[q];
-}
-#endif
-
-static int calc_active_worst_quality_one_pass_vbr(const AV1_COMP *cpi) {
- const RATE_CONTROL *const rc = &cpi->rc;
- const unsigned int curr_frame = cpi->common.current_video_frame;
- int active_worst_quality;
-
- if (cpi->common.frame_type == KEY_FRAME) {
- active_worst_quality =
- curr_frame == 0 ? rc->worst_quality : rc->last_q[KEY_FRAME] * 2;
- } else {
- if (!rc->is_src_frame_alt_ref &&
- (cpi->refresh_golden_frame || cpi->refresh_alt2_ref_frame ||
- cpi->refresh_alt_ref_frame)) {
- active_worst_quality = curr_frame == 1 ? rc->last_q[KEY_FRAME] * 5 / 4
- : rc->last_q[INTER_FRAME];
- } else {
- active_worst_quality = curr_frame == 1 ? rc->last_q[KEY_FRAME] * 2
- : rc->last_q[INTER_FRAME] * 2;
- }
- }
- return AOMMIN(active_worst_quality, rc->worst_quality);
-}
-
-// Adjust active_worst_quality level based on buffer level.
-static int calc_active_worst_quality_one_pass_cbr(const AV1_COMP *cpi) {
- // Adjust active_worst_quality: If buffer is above the optimal/target level,
- // bring active_worst_quality down depending on fullness of buffer.
- // If buffer is below the optimal level, let the active_worst_quality go from
- // ambient Q (at buffer = optimal level) to worst_quality level
- // (at buffer = critical level).
- const AV1_COMMON *const cm = &cpi->common;
- const RATE_CONTROL *rc = &cpi->rc;
- // Buffer level below which we push active_worst to worst_quality.
- int64_t critical_level = rc->optimal_buffer_level >> 3;
- int64_t buff_lvl_step = 0;
- int adjustment = 0;
- int active_worst_quality;
- int ambient_qp;
- if (cm->frame_type == KEY_FRAME) return rc->worst_quality;
- // For ambient_qp we use minimum of avg_frame_qindex[KEY_FRAME/INTER_FRAME]
- // for the first few frames following key frame. These are both initialized
- // to worst_quality and updated with (3/4, 1/4) average in postencode_update.
- // So for first few frames following key, the qp of that key frame is weighted
- // into the active_worst_quality setting.
- ambient_qp = (cm->current_video_frame < 5)
- ? AOMMIN(rc->avg_frame_qindex[INTER_FRAME],
- rc->avg_frame_qindex[KEY_FRAME])
- : rc->avg_frame_qindex[INTER_FRAME];
- active_worst_quality = AOMMIN(rc->worst_quality, ambient_qp * 5 / 4);
- if (rc->buffer_level > rc->optimal_buffer_level) {
- // Adjust down.
- // Maximum limit for down adjustment, ~30%.
- int max_adjustment_down = active_worst_quality / 3;
- if (max_adjustment_down) {
- buff_lvl_step = ((rc->maximum_buffer_size - rc->optimal_buffer_level) /
- max_adjustment_down);
- if (buff_lvl_step)
- adjustment = (int)((rc->buffer_level - rc->optimal_buffer_level) /
- buff_lvl_step);
- active_worst_quality -= adjustment;
- }
- } else if (rc->buffer_level > critical_level) {
- // Adjust up from ambient Q.
- if (critical_level) {
- buff_lvl_step = (rc->optimal_buffer_level - critical_level);
- if (buff_lvl_step) {
- adjustment = (int)((rc->worst_quality - ambient_qp) *
- (rc->optimal_buffer_level - rc->buffer_level) /
- buff_lvl_step);
- }
- active_worst_quality = ambient_qp + adjustment;
- }
- } else {
- // Set to worst_quality if buffer is below critical level.
- active_worst_quality = rc->worst_quality;
- }
- return active_worst_quality;
-}
-
-static int rc_pick_q_and_bounds_one_pass_cbr(const AV1_COMP *cpi, int width,
- int height, int *bottom_index,
- int *top_index) {
- const AV1_COMMON *const cm = &cpi->common;
- const RATE_CONTROL *const rc = &cpi->rc;
- int active_best_quality;
- int active_worst_quality = calc_active_worst_quality_one_pass_cbr(cpi);
- int q;
- int *rtc_minq;
- const int bit_depth = cm->seq_params.bit_depth;
- ASSIGN_MINQ_TABLE(bit_depth, rtc_minq);
-
- if (frame_is_intra_only(cm)) {
- active_best_quality = rc->best_quality;
- // Handle the special case for key frames forced when we have reached
- // the maximum key frame interval. Here force the Q to a range
- // based on the ambient Q to reduce the risk of popping.
- if (rc->this_key_frame_forced) {
- int qindex = rc->last_boosted_qindex;
- double last_boosted_q = av1_convert_qindex_to_q(qindex, bit_depth);
- int delta_qindex = av1_compute_qdelta(rc, last_boosted_q,
- (last_boosted_q * 0.75), bit_depth);
- active_best_quality = AOMMAX(qindex + delta_qindex, rc->best_quality);
- } else if (cm->current_video_frame > 0) {
- // not first frame of one pass and kf_boost is set
- double q_adj_factor = 1.0;
- double q_val;
-
- active_best_quality =
- get_kf_active_quality(rc, rc->avg_frame_qindex[KEY_FRAME], bit_depth);
-
- // Allow somewhat lower kf minq with small image formats.
- if ((width * height) <= (352 * 288)) {
- q_adj_factor -= 0.25;
- }
-
- // Convert the adjustment factor to a qindex delta
- // on active_best_quality.
- q_val = av1_convert_qindex_to_q(active_best_quality, bit_depth);
- active_best_quality +=
- av1_compute_qdelta(rc, q_val, q_val * q_adj_factor, bit_depth);
- }
- } else if (!rc->is_src_frame_alt_ref &&
- (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
- // Use the lower of active_worst_quality and recent
- // average Q as basis for GF/ARF best Q limit unless last frame was
- // a key frame.
- if (rc->frames_since_key > 1 &&
- rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality) {
- q = rc->avg_frame_qindex[INTER_FRAME];
- } else {
- q = active_worst_quality;
- }
- active_best_quality = get_gf_active_quality(rc, q, bit_depth);
- } else {
- // Use the lower of active_worst_quality and recent/average Q.
- if (cm->current_video_frame > 1) {
- if (rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality)
- active_best_quality = rtc_minq[rc->avg_frame_qindex[INTER_FRAME]];
- else
- active_best_quality = rtc_minq[active_worst_quality];
- } else {
- if (rc->avg_frame_qindex[KEY_FRAME] < active_worst_quality)
- active_best_quality = rtc_minq[rc->avg_frame_qindex[KEY_FRAME]];
- else
- active_best_quality = rtc_minq[active_worst_quality];
- }
- }
-
- // Clip the active best and worst quality values to limits
- active_best_quality =
- clamp(active_best_quality, rc->best_quality, rc->worst_quality);
- active_worst_quality =
- clamp(active_worst_quality, active_best_quality, rc->worst_quality);
-
- *top_index = active_worst_quality;
- *bottom_index = active_best_quality;
-
- // Limit Q range for the adaptive loop.
- if (cm->frame_type == KEY_FRAME && !rc->this_key_frame_forced &&
- !(cm->current_video_frame == 0)) {
- int qdelta = 0;
- aom_clear_system_state();
- qdelta = av1_compute_qdelta_by_rate(&cpi->rc, cm->frame_type,
- active_worst_quality, 2.0, bit_depth);
- *top_index = active_worst_quality + qdelta;
- *top_index = AOMMAX(*top_index, *bottom_index);
- }
-
- // Special case code to try and match quality with forced key frames
- if (cm->frame_type == KEY_FRAME && rc->this_key_frame_forced) {
- q = rc->last_boosted_qindex;
- } else {
- q = av1_rc_regulate_q(cpi, rc->this_frame_target, active_best_quality,
- active_worst_quality, width, height);
- if (q > *top_index) {
- // Special case when we are targeting the max allowed rate
- if (rc->this_frame_target >= rc->max_frame_bandwidth)
- *top_index = q;
- else
- q = *top_index;
- }
- }
-
- assert(*top_index <= rc->worst_quality && *top_index >= rc->best_quality);
- assert(*bottom_index <= rc->worst_quality &&
- *bottom_index >= rc->best_quality);
- assert(q <= rc->worst_quality && q >= rc->best_quality);
- return q;
-}
-
-static int get_active_cq_level(const RATE_CONTROL *rc,
- const AV1EncoderConfig *const oxcf) {
- static const double cq_adjust_threshold = 0.1;
- int active_cq_level = oxcf->cq_level;
- if (oxcf->rc_mode == AOM_CQ && rc->total_target_bits > 0) {
- const double x = (double)rc->total_actual_bits / rc->total_target_bits;
- if (x < cq_adjust_threshold) {
- active_cq_level = (int)(active_cq_level * x / cq_adjust_threshold);
- }
- }
- return active_cq_level;
-}
-
-static int rc_pick_q_and_bounds_one_pass_vbr(const AV1_COMP *cpi, int width,
- int height, int *bottom_index,
- int *top_index) {
- const AV1_COMMON *const cm = &cpi->common;
- const RATE_CONTROL *const rc = &cpi->rc;
- const AV1EncoderConfig *const oxcf = &cpi->oxcf;
- const int cq_level = get_active_cq_level(rc, oxcf);
- int active_best_quality;
- int active_worst_quality = calc_active_worst_quality_one_pass_vbr(cpi);
- int q;
- int *inter_minq;
- const int bit_depth = cm->seq_params.bit_depth;
- ASSIGN_MINQ_TABLE(bit_depth, inter_minq);
-
- if (frame_is_intra_only(cm)) {
- if (oxcf->rc_mode == AOM_Q) {
- const int qindex = cq_level;
- const double q_val = av1_convert_qindex_to_q(qindex, bit_depth);
- const int delta_qindex =
- av1_compute_qdelta(rc, q_val, q_val * 0.25, bit_depth);
- active_best_quality = AOMMAX(qindex + delta_qindex, rc->best_quality);
- } else if (rc->this_key_frame_forced) {
- const int qindex = rc->last_boosted_qindex;
- const double last_boosted_q = av1_convert_qindex_to_q(qindex, bit_depth);
- const int delta_qindex = av1_compute_qdelta(
- rc, last_boosted_q, last_boosted_q * 0.75, bit_depth);
- active_best_quality = AOMMAX(qindex + delta_qindex, rc->best_quality);
- } else { // not first frame of one pass and kf_boost is set
- double q_adj_factor = 1.0;
-
- active_best_quality =
- get_kf_active_quality(rc, rc->avg_frame_qindex[KEY_FRAME], bit_depth);
-
- // Allow somewhat lower kf minq with small image formats.
- if ((width * height) <= (352 * 288)) {
- q_adj_factor -= 0.25;
- }
-
- // Convert the adjustment factor to a qindex delta on active_best_quality.
- {
- const double q_val =
- av1_convert_qindex_to_q(active_best_quality, bit_depth);
- active_best_quality +=
- av1_compute_qdelta(rc, q_val, q_val * q_adj_factor, bit_depth);
- }
- }
- } else if (!rc->is_src_frame_alt_ref &&
- (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
- // Use the lower of active_worst_quality and recent
- // average Q as basis for GF/ARF best Q limit unless last frame was
- // a key frame.
- q = (rc->frames_since_key > 1 &&
- rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality)
- ? rc->avg_frame_qindex[INTER_FRAME]
- : rc->avg_frame_qindex[KEY_FRAME];
- // For constrained quality dont allow Q less than the cq level
- if (oxcf->rc_mode == AOM_CQ) {
- if (q < cq_level) q = cq_level;
- active_best_quality = get_gf_active_quality(rc, q, bit_depth);
- // Constrained quality use slightly lower active best.
- active_best_quality = active_best_quality * 15 / 16;
- } else if (oxcf->rc_mode == AOM_Q) {
- const int qindex = cq_level;
- const double q_val = av1_convert_qindex_to_q(qindex, bit_depth);
- const int delta_qindex =
- (cpi->refresh_alt_ref_frame)
- ? av1_compute_qdelta(rc, q_val, q_val * 0.40, bit_depth)
- : av1_compute_qdelta(rc, q_val, q_val * 0.50, bit_depth);
- active_best_quality = AOMMAX(qindex + delta_qindex, rc->best_quality);
- } else {
- active_best_quality = get_gf_active_quality(rc, q, bit_depth);
- }
- } else {
- if (oxcf->rc_mode == AOM_Q) {
- const int qindex = cq_level;
- const double q_val = av1_convert_qindex_to_q(qindex, bit_depth);
- const double delta_rate[FIXED_GF_INTERVAL] = { 0.50, 1.0, 0.85, 1.0,
- 0.70, 1.0, 0.85, 1.0 };
- const int delta_qindex = av1_compute_qdelta(
- rc, q_val,
- q_val * delta_rate[cm->current_video_frame % FIXED_GF_INTERVAL],
- bit_depth);
- active_best_quality = AOMMAX(qindex + delta_qindex, rc->best_quality);
- } else {
- // Use the lower of active_worst_quality and recent/average Q.
- active_best_quality = (cm->current_video_frame > 1)
- ? inter_minq[rc->avg_frame_qindex[INTER_FRAME]]
- : inter_minq[rc->avg_frame_qindex[KEY_FRAME]];
- // For the constrained quality mode we don't want
- // q to fall below the cq level.
- if ((oxcf->rc_mode == AOM_CQ) && (active_best_quality < cq_level)) {
- active_best_quality = cq_level;
- }
- }
- }
-
- // Clip the active best and worst quality values to limits
- active_best_quality =
- clamp(active_best_quality, rc->best_quality, rc->worst_quality);
- active_worst_quality =
- clamp(active_worst_quality, active_best_quality, rc->worst_quality);
-
- *top_index = active_worst_quality;
- *bottom_index = active_best_quality;
-
- // Limit Q range for the adaptive loop.
- {
- int qdelta = 0;
- aom_clear_system_state();
- if (cm->frame_type == KEY_FRAME && !rc->this_key_frame_forced &&
- !(cm->current_video_frame == 0)) {
- qdelta = av1_compute_qdelta_by_rate(&cpi->rc, cm->frame_type,
- active_worst_quality, 2.0, bit_depth);
- } else if (!rc->is_src_frame_alt_ref &&
- (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
- qdelta = av1_compute_qdelta_by_rate(
- &cpi->rc, cm->frame_type, active_worst_quality, 1.75, bit_depth);
- }
- *top_index = active_worst_quality + qdelta;
- *top_index = AOMMAX(*top_index, *bottom_index);
- }
-
- if (oxcf->rc_mode == AOM_Q) {
- q = active_best_quality;
- // Special case code to try and match quality with forced key frames
- } else if ((cm->frame_type == KEY_FRAME) && rc->this_key_frame_forced) {
- q = rc->last_boosted_qindex;
- } else {
- q = av1_rc_regulate_q(cpi, rc->this_frame_target, active_best_quality,
- active_worst_quality, width, height);
- if (q > *top_index) {
- // Special case when we are targeting the max allowed rate
- if (rc->this_frame_target >= rc->max_frame_bandwidth)
- *top_index = q;
- else
- q = *top_index;
- }
- }
-
- assert(*top_index <= rc->worst_quality && *top_index >= rc->best_quality);
- assert(*bottom_index <= rc->worst_quality &&
- *bottom_index >= rc->best_quality);
- assert(q <= rc->worst_quality && q >= rc->best_quality);
- return q;
-}
-
-int av1_frame_type_qdelta(const AV1_COMP *cpi, int rf_level, int q) {
- static const FRAME_TYPE frame_type[RATE_FACTOR_LEVELS] = {
- INTER_FRAME, INTER_FRAME, INTER_FRAME, INTER_FRAME, INTER_FRAME, KEY_FRAME
- };
- const AV1_COMMON *const cm = &cpi->common;
- int qdelta = av1_compute_qdelta_by_rate(&cpi->rc, frame_type[rf_level], q,
- rate_factor_deltas[rf_level],
- cm->seq_params.bit_depth);
- return qdelta;
-}
-
-#define STATIC_MOTION_THRESH 95
-static int rc_pick_q_and_bounds_two_pass(const AV1_COMP *cpi, int width,
- int height, int *bottom_index,
- int *top_index, int *arf_q) {
- const AV1_COMMON *const cm = &cpi->common;
- const RATE_CONTROL *const rc = &cpi->rc;
- const AV1EncoderConfig *const oxcf = &cpi->oxcf;
- const GF_GROUP *gf_group = &cpi->twopass.gf_group;
- const int cq_level = get_active_cq_level(rc, oxcf);
- int active_best_quality;
- int active_worst_quality = cpi->twopass.active_worst_quality;
- int q;
- int *inter_minq;
- const int bit_depth = cm->seq_params.bit_depth;
- ASSIGN_MINQ_TABLE(bit_depth, inter_minq);
-
-#if CUSTOMIZED_GF
- const int is_intrl_arf_boost =
- gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE;
-#else
- const int is_intrl_arf_boost = cpi->refresh_alt2_ref_frame;
-#endif // CUSTOMIZED_GF
-
- if (frame_is_intra_only(cm)) {
- // Handle the special case for key frames forced when we have reached
- // the maximum key frame interval. Here force the Q to a range
- // based on the ambient Q to reduce the risk of popping.
- if (rc->this_key_frame_forced) {
- double last_boosted_q;
- int delta_qindex;
- int qindex;
-
- if (cpi->twopass.last_kfgroup_zeromotion_pct >= STATIC_MOTION_THRESH) {
- qindex = AOMMIN(rc->last_kf_qindex, rc->last_boosted_qindex);
- active_best_quality = qindex;
- last_boosted_q = av1_convert_qindex_to_q(qindex, bit_depth);
- delta_qindex = av1_compute_qdelta(rc, last_boosted_q,
- last_boosted_q * 1.25, bit_depth);
- active_worst_quality =
- AOMMIN(qindex + delta_qindex, active_worst_quality);
- } else {
- qindex = rc->last_boosted_qindex;
- last_boosted_q = av1_convert_qindex_to_q(qindex, bit_depth);
- delta_qindex = av1_compute_qdelta(rc, last_boosted_q,
- last_boosted_q * 0.5, bit_depth);
- active_best_quality = AOMMAX(qindex + delta_qindex, rc->best_quality);
- }
- } else {
- // Not forced keyframe.
- double q_adj_factor = 1.0;
- double q_val;
-
- // Baseline value derived from cpi->active_worst_quality and kf boost.
- active_best_quality =
- get_kf_active_quality(rc, active_worst_quality, bit_depth);
-
- // Allow somewhat lower kf minq with small image formats.
- if ((width * height) <= (352 * 288)) {
- q_adj_factor -= 0.25;
- }
-
- // Make a further adjustment based on the kf zero motion measure.
- q_adj_factor += 0.05 - (0.001 * (double)cpi->twopass.kf_zeromotion_pct);
-
- // Convert the adjustment factor to a qindex delta
- // on active_best_quality.
- q_val = av1_convert_qindex_to_q(active_best_quality, bit_depth);
- active_best_quality +=
- av1_compute_qdelta(rc, q_val, q_val * q_adj_factor, bit_depth);
- }
- } else if (!rc->is_src_frame_alt_ref &&
- (cpi->refresh_golden_frame || is_intrl_arf_boost ||
- cpi->refresh_alt_ref_frame)) {
- // Use the lower of active_worst_quality and recent
- // average Q as basis for GF/ARF best Q limit unless last frame was
- // a key frame.
- if (rc->frames_since_key > 1 &&
- rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality) {
- q = rc->avg_frame_qindex[INTER_FRAME];
- } else {
- q = active_worst_quality;
- }
- // For constrained quality dont allow Q less than the cq level
- if (oxcf->rc_mode == AOM_CQ) {
- if (q < cq_level) q = cq_level;
-#if USE_SYMM_MULTI_LAYER && MULTI_LVL_BOOST_VBR_CQ
- if (gf_group->update_type[gf_group->index] == ARF_UPDATE ||
- (is_intrl_arf_boost && !cpi->new_bwdref_update_rule)) {
-#endif // USE_SYMM_MULTI_LAYER && MULTI_LVL_BOOST_VBR_CQ
- active_best_quality = get_gf_active_quality(rc, q, bit_depth);
-
- // Constrained quality use slightly lower active best.
- active_best_quality = active_best_quality * 15 / 16;
-#if REDUCE_LAST_ALT_BOOST
- if (gf_group->update_type[gf_group->index] == ARF_UPDATE) {
- const int min_boost = get_gf_high_motion_quality(q, bit_depth);
- const int boost = min_boost - active_best_quality;
-
- active_best_quality = min_boost - (int)(boost * rc->arf_boost_factor);
- }
-#endif
- *arf_q = active_best_quality;
-#if USE_SYMM_MULTI_LAYER && MULTI_LVL_BOOST_VBR_CQ
- } else {
- active_best_quality = rc->arf_q;
- int this_height = gf_group->pyramid_level[gf_group->index];
- while (this_height < gf_group->pyramid_height) {
- active_best_quality = (active_best_quality + cq_level + 1) / 2;
- ++this_height;
- }
- }
-#endif // USE_SYMM_MULTI_LAYER && MULTI_LVL_BOOST_VBR_CQ
- } else if (oxcf->rc_mode == AOM_Q) {
- if (!cpi->refresh_alt_ref_frame && !is_intrl_arf_boost) {
- active_best_quality = cq_level;
- } else {
- if (gf_group->update_type[gf_group->index] == ARF_UPDATE) {
- active_best_quality = get_gf_active_quality(rc, q, bit_depth);
- *arf_q = active_best_quality;
-#if REDUCE_LAST_ALT_BOOST
- const int min_boost = get_gf_high_motion_quality(q, bit_depth);
- const int boost = min_boost - active_best_quality;
-
- active_best_quality = min_boost - (int)(boost * rc->arf_boost_factor);
-#endif
- } else {
- active_best_quality = rc->arf_q;
- }
-#if USE_SYMM_MULTI_LAYER
- if (cpi->new_bwdref_update_rule && is_intrl_arf_boost) {
- int this_height = gf_group->pyramid_level[gf_group->index];
- while (this_height < gf_group->pyramid_height) {
- active_best_quality = (active_best_quality + cq_level + 1) / 2;
- ++this_height;
- }
- } else {
-#endif
- // Modify best quality for second level arfs. For mode AOM_Q this
- // becomes the baseline frame q.
- if (gf_group->rf_level[gf_group->index] == GF_ARF_LOW)
- active_best_quality = (active_best_quality + cq_level + 1) / 2;
-#if USE_SYMM_MULTI_LAYER
- }
-#endif
- }
- } else {
- active_best_quality = get_gf_active_quality(rc, q, bit_depth);
-#if REDUCE_LAST_ALT_BOOST
- const int min_boost = get_gf_high_motion_quality(q, bit_depth);
- const int boost = min_boost - active_best_quality;
-
- active_best_quality = min_boost - (int)(boost * rc->arf_boost_factor);
-#endif
-#if USE_SYMM_MULTI_LAYER
- if (cpi->new_bwdref_update_rule && is_intrl_arf_boost) {
- int this_height = gf_group->pyramid_level[gf_group->index];
- while (this_height < gf_group->pyramid_height) {
- active_best_quality =
- (active_best_quality + active_worst_quality + 1) / 2;
- ++this_height;
- }
- }
-#endif
- }
- } else {
- if (oxcf->rc_mode == AOM_Q) {
- active_best_quality = cq_level;
- } else {
- active_best_quality = inter_minq[active_worst_quality];
-
- // For the constrained quality mode we don't want
- // q to fall below the cq level.
- if ((oxcf->rc_mode == AOM_CQ) && (active_best_quality < cq_level)) {
- active_best_quality = cq_level;
- }
- }
- }
-
- // Extension to max or min Q if undershoot or overshoot is outside
- // the permitted range.
- if ((cpi->oxcf.rc_mode != AOM_Q) &&
- (cpi->twopass.gf_zeromotion_pct < VLOW_MOTION_THRESHOLD)) {
- if (frame_is_intra_only(cm) ||
- (!rc->is_src_frame_alt_ref &&
- (cpi->refresh_golden_frame || is_intrl_arf_boost ||
- cpi->refresh_alt_ref_frame))) {
- active_best_quality -=
- (cpi->twopass.extend_minq + cpi->twopass.extend_minq_fast);
- active_worst_quality += (cpi->twopass.extend_maxq / 2);
- } else {
- active_best_quality -=
- (cpi->twopass.extend_minq + cpi->twopass.extend_minq_fast) / 2;
- active_worst_quality += cpi->twopass.extend_maxq;
- }
- }
-
- aom_clear_system_state();
- // Static forced key frames Q restrictions dealt with elsewhere.
- if (!(frame_is_intra_only(cm)) || !rc->this_key_frame_forced ||
- (cpi->twopass.last_kfgroup_zeromotion_pct < STATIC_MOTION_THRESH)) {
- int qdelta = av1_frame_type_qdelta(cpi, gf_group->rf_level[gf_group->index],
- active_worst_quality);
- active_worst_quality =
- AOMMAX(active_worst_quality + qdelta, active_best_quality);
- }
-
- // Modify active_best_quality for downscaled normal frames.
- if (av1_frame_scaled(cm) && !frame_is_kf_gf_arf(cpi)) {
- int qdelta = av1_compute_qdelta_by_rate(
- rc, cm->frame_type, active_best_quality, 2.0, bit_depth);
- active_best_quality =
- AOMMAX(active_best_quality + qdelta, rc->best_quality);
- }
-
- active_best_quality =
- clamp(active_best_quality, rc->best_quality, rc->worst_quality);
- active_worst_quality =
- clamp(active_worst_quality, active_best_quality, rc->worst_quality);
-
- if (oxcf->rc_mode == AOM_Q) {
- q = active_best_quality;
- // Special case code to try and match quality with forced key frames.
- } else if (frame_is_intra_only(cm) && rc->this_key_frame_forced) {
- // If static since last kf use better of last boosted and last kf q.
- if (cpi->twopass.last_kfgroup_zeromotion_pct >= STATIC_MOTION_THRESH) {
- q = AOMMIN(rc->last_kf_qindex, rc->last_boosted_qindex);
- } else {
- q = AOMMIN(rc->last_boosted_qindex,
- (active_best_quality + active_worst_quality) / 2);
- }
- } else {
- q = av1_rc_regulate_q(cpi, rc->this_frame_target, active_best_quality,
- active_worst_quality, width, height);
- if (q > active_worst_quality) {
- // Special case when we are targeting the max allowed rate.
- if (rc->this_frame_target >= rc->max_frame_bandwidth)
- active_worst_quality = q;
- else
- q = active_worst_quality;
- }
- }
- clamp(q, active_best_quality, active_worst_quality);
-
- *top_index = active_worst_quality;
- *bottom_index = active_best_quality;
-
- assert(*top_index <= rc->worst_quality && *top_index >= rc->best_quality);
- assert(*bottom_index <= rc->worst_quality &&
- *bottom_index >= rc->best_quality);
- assert(q <= rc->worst_quality && q >= rc->best_quality);
- return q;
-}
-
-int av1_rc_pick_q_and_bounds(AV1_COMP *cpi, int width, int height,
- int *bottom_index, int *top_index) {
- int q;
- if (cpi->oxcf.pass == 0) {
- if (cpi->oxcf.rc_mode == AOM_CBR)
- q = rc_pick_q_and_bounds_one_pass_cbr(cpi, width, height, bottom_index,
- top_index);
- else
- q = rc_pick_q_and_bounds_one_pass_vbr(cpi, width, height, bottom_index,
- top_index);
- } else {
- assert(cpi->oxcf.pass == 2 && "invalid encode pass");
-
- GF_GROUP *gf_group = &cpi->twopass.gf_group;
- int arf_q = 0;
-
- q = rc_pick_q_and_bounds_two_pass(cpi, width, height, bottom_index,
- top_index, &arf_q);
-
- if (gf_group->update_type[gf_group->index] == ARF_UPDATE) {
- cpi->rc.arf_q = arf_q;
- }
- }
-
- return q;
-}
-
-void av1_rc_compute_frame_size_bounds(const AV1_COMP *cpi, int frame_target,
- int *frame_under_shoot_limit,
- int *frame_over_shoot_limit) {
- if (cpi->oxcf.rc_mode == AOM_Q) {
- *frame_under_shoot_limit = 0;
- *frame_over_shoot_limit = INT_MAX;
- } else {
- // For very small rate targets where the fractional adjustment
- // may be tiny make sure there is at least a minimum range.
- const int tolerance = (cpi->sf.recode_tolerance * frame_target) / 100;
- *frame_under_shoot_limit = AOMMAX(frame_target - tolerance - 200, 0);
- *frame_over_shoot_limit =
- AOMMIN(frame_target + tolerance + 200, cpi->rc.max_frame_bandwidth);
- }
-}
-
-static void rc_set_frame_target(AV1_COMP *cpi, int target, int width,
- int height) {
- const AV1_COMMON *const cm = &cpi->common;
- RATE_CONTROL *const rc = &cpi->rc;
-
- rc->this_frame_target = target;
-
- // Modify frame size target when down-scaled.
- if (av1_frame_scaled(cm))
- rc->this_frame_target =
- (int)(rc->this_frame_target * resize_rate_factor(cpi, width, height));
-
- // Target rate per SB64 (including partial SB64s.
- rc->sb64_target_rate =
- (int)((int64_t)rc->this_frame_target * 64 * 64) / (width * height);
-}
-
-static void update_alt_ref_frame_stats(AV1_COMP *cpi) {
- // this frame refreshes means next frames don't unless specified by user
- RATE_CONTROL *const rc = &cpi->rc;
- rc->frames_since_golden = 0;
-
- // Mark the alt ref as done (setting to 0 means no further alt refs pending).
- rc->source_alt_ref_pending = 0;
-
- // Set the alternate reference frame active flag
- rc->source_alt_ref_active = 1;
-}
-
-static void update_golden_frame_stats(AV1_COMP *cpi) {
- RATE_CONTROL *const rc = &cpi->rc;
-#if CUSTOMIZED_GF
- const TWO_PASS *const twopass = &cpi->twopass;
- const GF_GROUP *const gf_group = &twopass->gf_group;
- const int is_intrnl_arf =
- cpi->oxcf.pass == 2
- ? gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE
- : cpi->refresh_alt2_ref_frame;
-#else
- const int is_intnl_arf = cpi->refresh_alt2_ref_frame;
-#endif
-
- // Update the Golden frame usage counts.
- // NOTE(weitinglin): If we use show_existing_frame for an OVERLAY frame,
- // only the virtual indices for the reference frame will be
- // updated and cpi->refresh_golden_frame will still be zero.
- if (cpi->refresh_golden_frame || rc->is_src_frame_alt_ref) {
- // We will not use internal overlay frames to replace the golden frame
- if (!rc->is_src_frame_ext_arf)
- // this frame refreshes means next frames don't unless specified by user
- rc->frames_since_golden = 0;
-
- // If we are not using alt ref in the up and coming group clear the arf
- // active flag. In multi arf group case, if the index is not 0 then
- // we are overlaying a mid group arf so should not reset the flag.
- if (cpi->oxcf.pass == 2) {
- if (!rc->source_alt_ref_pending && (cpi->twopass.gf_group.index == 0))
- rc->source_alt_ref_active = 0;
- } else if (!rc->source_alt_ref_pending) {
- rc->source_alt_ref_active = 0;
- }
- } else if (!cpi->refresh_alt_ref_frame && !is_intrnl_arf) {
- rc->frames_since_golden++;
- }
-}
-
-void av1_rc_postencode_update(AV1_COMP *cpi, uint64_t bytes_used) {
- const AV1_COMMON *const cm = &cpi->common;
- RATE_CONTROL *const rc = &cpi->rc;
-#if CUSTOMIZED_GF
- const TWO_PASS *const twopass = &cpi->twopass;
- const GF_GROUP *const gf_group = &twopass->gf_group;
- const int is_intrnl_arf =
- cpi->oxcf.pass == 2
- ? gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE
- : cpi->refresh_alt2_ref_frame;
-#else
- const int is_intrnl_arf = cpi->refresh_alt2_ref_frame;
-#endif
-
- const int qindex = cm->base_qindex;
-
- if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) {
- av1_cyclic_refresh_postencode(cpi);
- }
-
- // Update rate control heuristics
- rc->projected_frame_size = (int)(bytes_used << 3);
-
- // Post encode loop adjustment of Q prediction.
- av1_rc_update_rate_correction_factors(cpi, cm->width, cm->height);
-
- // Keep a record of last Q and ambient average Q.
- if (cm->frame_type == KEY_FRAME) {
- rc->last_q[KEY_FRAME] = qindex;
- rc->avg_frame_qindex[KEY_FRAME] =
- ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[KEY_FRAME] + qindex, 2);
- } else {
- if (!rc->is_src_frame_alt_ref &&
- !(cpi->refresh_golden_frame || is_intrnl_arf ||
- cpi->refresh_alt_ref_frame)) {
- rc->last_q[INTER_FRAME] = qindex;
- rc->avg_frame_qindex[INTER_FRAME] =
- ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[INTER_FRAME] + qindex, 2);
- rc->ni_frames++;
- rc->tot_q += av1_convert_qindex_to_q(qindex, cm->seq_params.bit_depth);
- rc->avg_q = rc->tot_q / rc->ni_frames;
- // Calculate the average Q for normal inter frames (not key or GFU
- // frames).
- rc->ni_tot_qi += qindex;
- rc->ni_av_qi = rc->ni_tot_qi / rc->ni_frames;
- }
- }
-
- // Keep record of last boosted (KF/GF/ARF) Q value.
- // If the current frame is coded at a lower Q then we also update it.
- // If all mbs in this group are skipped only update if the Q value is
- // better than that already stored.
- // This is used to help set quality in forced key frames to reduce popping
- if ((qindex < rc->last_boosted_qindex) || (cm->frame_type == KEY_FRAME) ||
- (!rc->constrained_gf_group &&
- (cpi->refresh_alt_ref_frame || is_intrnl_arf ||
- (cpi->refresh_golden_frame && !rc->is_src_frame_alt_ref)))) {
- rc->last_boosted_qindex = qindex;
- }
- if (cm->frame_type == KEY_FRAME) rc->last_kf_qindex = qindex;
-
- update_buffer_level(cpi, rc->projected_frame_size);
-
- // Rolling monitors of whether we are over or underspending used to help
- // regulate min and Max Q in two pass.
- if (av1_frame_scaled(cm))
- rc->this_frame_target =
- (int)(rc->this_frame_target /
- resize_rate_factor(cpi, cm->width, cm->height));
- if (cm->frame_type != KEY_FRAME) {
- rc->rolling_target_bits = ROUND_POWER_OF_TWO(
- rc->rolling_target_bits * 3 + rc->this_frame_target, 2);
- rc->rolling_actual_bits = ROUND_POWER_OF_TWO(
- rc->rolling_actual_bits * 3 + rc->projected_frame_size, 2);
- rc->long_rolling_target_bits = ROUND_POWER_OF_TWO(
- rc->long_rolling_target_bits * 31 + rc->this_frame_target, 5);
- rc->long_rolling_actual_bits = ROUND_POWER_OF_TWO(
- rc->long_rolling_actual_bits * 31 + rc->projected_frame_size, 5);
- }
-
- // Actual bits spent
- rc->total_actual_bits += rc->projected_frame_size;
- // TODO(zoeliu): To investigate whether we should treat BWDREF_FRAME
- // differently here for rc->avg_frame_bandwidth.
- rc->total_target_bits +=
- (cm->show_frame || rc->is_bwd_ref_frame) ? rc->avg_frame_bandwidth : 0;
-
- rc->total_target_vs_actual = rc->total_actual_bits - rc->total_target_bits;
-
- if (is_altref_enabled(cpi) && cpi->refresh_alt_ref_frame &&
- (cm->frame_type != KEY_FRAME))
- // Update the alternate reference frame stats as appropriate.
- update_alt_ref_frame_stats(cpi);
- else
- // Update the Golden frame stats as appropriate.
- update_golden_frame_stats(cpi);
-
- if (cm->frame_type == KEY_FRAME) rc->frames_since_key = 0;
- // if (cm->current_video_frame == 1 && cm->show_frame)
- /*
- rc->this_frame_target =
- (int)(rc->this_frame_target / resize_rate_factor(cpi, cm->width,
- cm->height));
- */
-}
-
-void av1_rc_postencode_update_drop_frame(AV1_COMP *cpi) {
- // Update buffer level with zero size, update frame counters, and return.
- update_buffer_level(cpi, 0);
- cpi->rc.frames_since_key++;
- cpi->rc.frames_to_key--;
- cpi->rc.rc_2_frame = 0;
- cpi->rc.rc_1_frame = 0;
-}
-
-// Use this macro to turn on/off use of alt-refs in one-pass mode.
-#define USE_ALTREF_FOR_ONE_PASS 1
-
-static int calc_pframe_target_size_one_pass_vbr(const AV1_COMP *const cpi) {
- static const int af_ratio = 10;
- const RATE_CONTROL *const rc = &cpi->rc;
- int target;
-#if USE_ALTREF_FOR_ONE_PASS
- target =
- (!rc->is_src_frame_alt_ref &&
- (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame))
- ? (rc->avg_frame_bandwidth * rc->baseline_gf_interval * af_ratio) /
- (rc->baseline_gf_interval + af_ratio - 1)
- : (rc->avg_frame_bandwidth * rc->baseline_gf_interval) /
- (rc->baseline_gf_interval + af_ratio - 1);
-#else
- target = rc->avg_frame_bandwidth;
-#endif
- return av1_rc_clamp_pframe_target_size(cpi, target);
-}
-
-static int calc_iframe_target_size_one_pass_vbr(const AV1_COMP *const cpi) {
- static const int kf_ratio = 25;
- const RATE_CONTROL *rc = &cpi->rc;
- const int target = rc->avg_frame_bandwidth * kf_ratio;
- return av1_rc_clamp_iframe_target_size(cpi, target);
-}
-
-void av1_rc_get_one_pass_vbr_params(AV1_COMP *cpi) {
- AV1_COMMON *const cm = &cpi->common;
- RATE_CONTROL *const rc = &cpi->rc;
- int target;
- int altref_enabled = is_altref_enabled(cpi);
- int sframe_dist = cpi->oxcf.sframe_dist;
- int sframe_mode = cpi->oxcf.sframe_mode;
- int sframe_enabled = cpi->oxcf.sframe_enabled;
- // TODO(yaowu): replace the "auto_key && 0" below with proper decision logic.
- if (!cpi->refresh_alt_ref_frame &&
- (cm->current_video_frame == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY) ||
- rc->frames_to_key == 0 || (cpi->oxcf.auto_key && 0))) {
- cm->frame_type = KEY_FRAME;
- rc->this_key_frame_forced =
- cm->current_video_frame != 0 && rc->frames_to_key == 0;
- rc->frames_to_key = cpi->oxcf.key_freq;
- rc->kf_boost = DEFAULT_KF_BOOST;
- rc->source_alt_ref_active = 0;
- } else {
- cm->frame_type = INTER_FRAME;
- if (sframe_enabled) {
- if (altref_enabled) {
- if (sframe_mode == 1) {
- // sframe_mode == 1: insert sframe if it matches altref frame.
-
- if (cm->current_video_frame % sframe_dist == 0 &&
- cm->frame_type != KEY_FRAME && cm->current_video_frame != 0 &&
- cpi->refresh_alt_ref_frame) {
- cm->frame_type = S_FRAME;
- }
- } else {
- // sframe_mode != 1: if sframe will be inserted at the next available
- // altref frame
-
- if (cm->current_video_frame % sframe_dist == 0 &&
- cm->frame_type != KEY_FRAME && cm->current_video_frame != 0) {
- rc->sframe_due = 1;
- }
-
- if (rc->sframe_due && cpi->refresh_alt_ref_frame) {
- cm->frame_type = S_FRAME;
- rc->sframe_due = 0;
- }
- }
- } else {
- if (cm->current_video_frame % sframe_dist == 0 &&
- cm->frame_type != KEY_FRAME && cm->current_video_frame != 0) {
- cm->frame_type = S_FRAME;
- }
- }
- }
- }
- if (rc->frames_till_gf_update_due == 0) {
- rc->baseline_gf_interval = (rc->min_gf_interval + rc->max_gf_interval) / 2;
- rc->frames_till_gf_update_due = rc->baseline_gf_interval;
- // NOTE: frames_till_gf_update_due must be <= frames_to_key.
- if (rc->frames_till_gf_update_due > rc->frames_to_key) {
- rc->frames_till_gf_update_due = rc->frames_to_key;
- rc->constrained_gf_group = 1;
- } else {
- rc->constrained_gf_group = 0;
- }
- cpi->refresh_golden_frame = 1;
- rc->source_alt_ref_pending = USE_ALTREF_FOR_ONE_PASS;
- rc->gfu_boost = DEFAULT_GF_BOOST;
- }
-
- if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
- av1_cyclic_refresh_update_parameters(cpi);
-
- if (cm->frame_type == KEY_FRAME)
- target = calc_iframe_target_size_one_pass_vbr(cpi);
- else
- target = calc_pframe_target_size_one_pass_vbr(cpi);
- rc_set_frame_target(cpi, target, cm->width, cm->height);
-}
-
-static int calc_pframe_target_size_one_pass_cbr(const AV1_COMP *cpi) {
- const AV1EncoderConfig *oxcf = &cpi->oxcf;
- const RATE_CONTROL *rc = &cpi->rc;
- const int64_t diff = rc->optimal_buffer_level - rc->buffer_level;
- const int64_t one_pct_bits = 1 + rc->optimal_buffer_level / 100;
- int min_frame_target =
- AOMMAX(rc->avg_frame_bandwidth >> 4, FRAME_OVERHEAD_BITS);
- int target;
-
- if (oxcf->gf_cbr_boost_pct) {
- const int af_ratio_pct = oxcf->gf_cbr_boost_pct + 100;
- target = cpi->refresh_golden_frame
- ? (rc->avg_frame_bandwidth * rc->baseline_gf_interval *
- af_ratio_pct) /
- (rc->baseline_gf_interval * 100 + af_ratio_pct - 100)
- : (rc->avg_frame_bandwidth * rc->baseline_gf_interval * 100) /
- (rc->baseline_gf_interval * 100 + af_ratio_pct - 100);
- } else {
- target = rc->avg_frame_bandwidth;
- }
-
- if (diff > 0) {
- // Lower the target bandwidth for this frame.
- const int pct_low = (int)AOMMIN(diff / one_pct_bits, oxcf->under_shoot_pct);
- target -= (target * pct_low) / 200;
- } else if (diff < 0) {
- // Increase the target bandwidth for this frame.
- const int pct_high =
- (int)AOMMIN(-diff / one_pct_bits, oxcf->over_shoot_pct);
- target += (target * pct_high) / 200;
- }
- if (oxcf->rc_max_inter_bitrate_pct) {
- const int max_rate =
- rc->avg_frame_bandwidth * oxcf->rc_max_inter_bitrate_pct / 100;
- target = AOMMIN(target, max_rate);
- }
- return AOMMAX(min_frame_target, target);
-}
-
-static int calc_iframe_target_size_one_pass_cbr(const AV1_COMP *cpi) {
- const RATE_CONTROL *rc = &cpi->rc;
- int target;
- if (cpi->common.current_video_frame == 0) {
- target = ((rc->starting_buffer_level / 2) > INT_MAX)
- ? INT_MAX
- : (int)(rc->starting_buffer_level / 2);
- } else {
- int kf_boost = 32;
- double framerate = cpi->framerate;
-
- kf_boost = AOMMAX(kf_boost, (int)(2 * framerate - 16));
- if (rc->frames_since_key < framerate / 2) {
- kf_boost = (int)(kf_boost * rc->frames_since_key / (framerate / 2));
- }
- target = ((16 + kf_boost) * rc->avg_frame_bandwidth) >> 4;
- }
- return av1_rc_clamp_iframe_target_size(cpi, target);
-}
-
-void av1_rc_get_one_pass_cbr_params(AV1_COMP *cpi) {
- AV1_COMMON *const cm = &cpi->common;
- RATE_CONTROL *const rc = &cpi->rc;
- int target;
- // TODO(yaowu): replace the "auto_key && 0" below with proper decision logic.
- if ((cm->current_video_frame == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY) ||
- rc->frames_to_key == 0 || (cpi->oxcf.auto_key && 0))) {
- cm->frame_type = KEY_FRAME;
- rc->this_key_frame_forced =
- cm->current_video_frame != 0 && rc->frames_to_key == 0;
- rc->frames_to_key = cpi->oxcf.key_freq;
- rc->kf_boost = DEFAULT_KF_BOOST;
- rc->source_alt_ref_active = 0;
- } else {
- cm->frame_type = INTER_FRAME;
- }
- if (rc->frames_till_gf_update_due == 0) {
- if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
- av1_cyclic_refresh_set_golden_update(cpi);
- else
- rc->baseline_gf_interval =
- (rc->min_gf_interval + rc->max_gf_interval) / 2;
- rc->frames_till_gf_update_due = rc->baseline_gf_interval;
- // NOTE: frames_till_gf_update_due must be <= frames_to_key.
- if (rc->frames_till_gf_update_due > rc->frames_to_key)
- rc->frames_till_gf_update_due = rc->frames_to_key;
- cpi->refresh_golden_frame = 1;
- rc->gfu_boost = DEFAULT_GF_BOOST;
- }
-
- // Any update/change of global cyclic refresh parameters (amount/delta-qp)
- // should be done here, before the frame qp is selected.
- if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
- av1_cyclic_refresh_update_parameters(cpi);
-
- if (cm->frame_type == KEY_FRAME)
- target = calc_iframe_target_size_one_pass_cbr(cpi);
- else
- target = calc_pframe_target_size_one_pass_cbr(cpi);
-
- rc_set_frame_target(cpi, target, cm->width, cm->height);
- // TODO(afergs): Decide whether to scale up, down, or not at all
-}
-
-int av1_compute_qdelta(const RATE_CONTROL *rc, double qstart, double qtarget,
- aom_bit_depth_t bit_depth) {
- int start_index = rc->worst_quality;
- int target_index = rc->worst_quality;
- int i;
-
- // Convert the average q value to an index.
- for (i = rc->best_quality; i < rc->worst_quality; ++i) {
- start_index = i;
- if (av1_convert_qindex_to_q(i, bit_depth) >= qstart) break;
- }
-
- // Convert the q target to an index
- for (i = rc->best_quality; i < rc->worst_quality; ++i) {
- target_index = i;
- if (av1_convert_qindex_to_q(i, bit_depth) >= qtarget) break;
- }
-
- return target_index - start_index;
-}
-
-int av1_compute_qdelta_by_rate(const RATE_CONTROL *rc, FRAME_TYPE frame_type,
- int qindex, double rate_target_ratio,
- aom_bit_depth_t bit_depth) {
- int target_index = rc->worst_quality;
- int i;
-
- // Look up the current projected bits per block for the base index
- const int base_bits_per_mb =
- av1_rc_bits_per_mb(frame_type, qindex, 1.0, bit_depth);
-
- // Find the target bits per mb based on the base value and given ratio.
- const int target_bits_per_mb = (int)(rate_target_ratio * base_bits_per_mb);
-
- // Convert the q target to an index
- for (i = rc->best_quality; i < rc->worst_quality; ++i) {
- if (av1_rc_bits_per_mb(frame_type, i, 1.0, bit_depth) <=
- target_bits_per_mb) {
- target_index = i;
- break;
- }
- }
- return target_index - qindex;
-}
-
-void av1_rc_set_gf_interval_range(const AV1_COMP *const cpi,
- RATE_CONTROL *const rc) {
- const AV1EncoderConfig *const oxcf = &cpi->oxcf;
-
- // Special case code for 1 pass fixed Q mode tests
- if ((oxcf->pass == 0) && (oxcf->rc_mode == AOM_Q)) {
- rc->max_gf_interval = FIXED_GF_INTERVAL;
- rc->min_gf_interval = FIXED_GF_INTERVAL;
- rc->static_scene_max_gf_interval = FIXED_GF_INTERVAL;
- } else {
- // Set Maximum gf/arf interval
- rc->max_gf_interval = oxcf->max_gf_interval;
- rc->min_gf_interval = oxcf->min_gf_interval;
- if (rc->min_gf_interval == 0)
- rc->min_gf_interval = av1_rc_get_default_min_gf_interval(
- oxcf->width, oxcf->height, cpi->framerate);
- if (rc->max_gf_interval == 0)
- rc->max_gf_interval = av1_rc_get_default_max_gf_interval(
- cpi->framerate, rc->min_gf_interval);
-
- // Extended interval for genuinely static scenes
- rc->static_scene_max_gf_interval = MAX_LAG_BUFFERS * 2;
-
- if (is_altref_enabled(cpi)) {
- if (rc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1)
- rc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1;
- }
-
- if (rc->max_gf_interval > rc->static_scene_max_gf_interval)
- rc->max_gf_interval = rc->static_scene_max_gf_interval;
-
- // Clamp min to max
- rc->min_gf_interval = AOMMIN(rc->min_gf_interval, rc->max_gf_interval);
- }
-}
-
-void av1_rc_update_framerate(AV1_COMP *cpi, int width, int height) {
- const AV1EncoderConfig *const oxcf = &cpi->oxcf;
- RATE_CONTROL *const rc = &cpi->rc;
- int vbr_max_bits;
- const int MBs = av1_get_MBs(width, height);
-
- rc->avg_frame_bandwidth = (int)(oxcf->target_bandwidth / cpi->framerate);
- rc->min_frame_bandwidth =
- (int)(rc->avg_frame_bandwidth * oxcf->two_pass_vbrmin_section / 100);
-
- rc->min_frame_bandwidth =
- AOMMAX(rc->min_frame_bandwidth, FRAME_OVERHEAD_BITS);
-
- // A maximum bitrate for a frame is defined.
- // The baseline for this aligns with HW implementations that
- // can support decode of 1080P content up to a bitrate of MAX_MB_RATE bits
- // per 16x16 MB (averaged over a frame). However this limit is extended if
- // a very high rate is given on the command line or the the rate cannnot
- // be acheived because of a user specificed max q (e.g. when the user
- // specifies lossless encode.
- vbr_max_bits =
- (int)(((int64_t)rc->avg_frame_bandwidth * oxcf->two_pass_vbrmax_section) /
- 100);
- rc->max_frame_bandwidth =
- AOMMAX(AOMMAX((MBs * MAX_MB_RATE), MAXRATE_1080P), vbr_max_bits);
-
- av1_rc_set_gf_interval_range(cpi, rc);
-}
-
-#define VBR_PCT_ADJUSTMENT_LIMIT 50
-// For VBR...adjustment to the frame target based on error from previous frames
-static void vbr_rate_correction(AV1_COMP *cpi, int *this_frame_target) {
- RATE_CONTROL *const rc = &cpi->rc;
- int64_t vbr_bits_off_target = rc->vbr_bits_off_target;
- int max_delta;
- double position_factor = 1.0;
-
- // How far through the clip are we.
- // This number is used to damp the per frame rate correction.
- // Range 0 - 1.0
- if (cpi->twopass.total_stats.count != 0.) {
- position_factor = sqrt((double)cpi->common.current_video_frame /
- cpi->twopass.total_stats.count);
- }
- max_delta = (int)(position_factor *
- ((*this_frame_target * VBR_PCT_ADJUSTMENT_LIMIT) / 100));
-
- // vbr_bits_off_target > 0 means we have extra bits to spend
- if (vbr_bits_off_target > 0) {
- *this_frame_target += (vbr_bits_off_target > max_delta)
- ? max_delta
- : (int)vbr_bits_off_target;
- } else {
- *this_frame_target -= (vbr_bits_off_target < -max_delta)
- ? max_delta
- : (int)-vbr_bits_off_target;
- }
-
- // Fast redistribution of bits arising from massive local undershoot.
- // Dont do it for kf,arf,gf or overlay frames.
- if (!frame_is_kf_gf_arf(cpi) && !rc->is_src_frame_alt_ref &&
- rc->vbr_bits_off_target_fast) {
- int one_frame_bits = AOMMAX(rc->avg_frame_bandwidth, *this_frame_target);
- int fast_extra_bits;
- fast_extra_bits = (int)AOMMIN(rc->vbr_bits_off_target_fast, one_frame_bits);
- fast_extra_bits = (int)AOMMIN(
- fast_extra_bits,
- AOMMAX(one_frame_bits / 8, rc->vbr_bits_off_target_fast / 8));
- *this_frame_target += (int)fast_extra_bits;
- rc->vbr_bits_off_target_fast -= fast_extra_bits;
- }
-}
-
-void av1_set_target_rate(AV1_COMP *cpi, int width, int height) {
- RATE_CONTROL *const rc = &cpi->rc;
- int target_rate = rc->base_frame_target;
-
- // Correction to rate target based on prior over or under shoot.
- if (cpi->oxcf.rc_mode == AOM_VBR || cpi->oxcf.rc_mode == AOM_CQ)
- vbr_rate_correction(cpi, &target_rate);
- rc_set_frame_target(cpi, target_rate, width, height);
-}
diff --git a/third_party/aom/av1/encoder/ratectrl.h b/third_party/aom/av1/encoder/ratectrl.h
deleted file mode 100644
index 198ecab97..000000000
--- a/third_party/aom/av1/encoder/ratectrl.h
+++ /dev/null
@@ -1,295 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_RATECTRL_H_
-#define AOM_AV1_ENCODER_RATECTRL_H_
-
-#include "aom/aom_codec.h"
-#include "aom/aom_integer.h"
-
-#include "av1/common/blockd.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// Bits Per MB at different Q (Multiplied by 512)
-#define BPER_MB_NORMBITS 9
-
-#define CUSTOMIZED_GF 1
-
-#if CONFIG_FIX_GF_LENGTH
-#define FIXED_GF_LENGTH 16
-#define MAX_PYRAMID_LVL 4
-// We allow a frame to have at most two left/right descendants before changing
-// them into to a subtree, i.e., we allow the following structure:
-/* OUT_OF_ORDER_FRAME
- / / \ \
-(two left children) F F F F (two right children) */
-// Therefore the max gf size supported by 4 layer structure is
-// 1 (KEY/OVERLAY) + 1 + 2 + 4 + 16 (two children on both side of their parent)
-#define MAX_PYRAMID_SIZE 24
-#define USE_SYMM_MULTI_LAYER 1
-#define REDUCE_LAST_ALT_BOOST 1
-#define REDUCE_LAST_GF_LENGTH 1
-#define MULTI_LVL_BOOST_VBR_CQ 1
-#else
-#define USE_SYMM_MULTI_LAYER 0
-#define REDUCE_LAST_ALT_BOOST 0
-#define REDUCE_LAST_GF_LENGTH 0
-#define MULTI_LVL_BOOST_VBR_CQ 0
-#endif
-
-#if USE_SYMM_MULTI_LAYER
-#define USE_MANUAL_GF4_STRUCT 0
-#endif
-
-#define MIN_GF_INTERVAL 4
-#define MAX_GF_INTERVAL 16
-#define FIXED_GF_INTERVAL 8 // Used in some testing modes only
-
-typedef enum {
- INTER_NORMAL = 0,
- INTER_LOW = 1,
- INTER_HIGH = 2,
- GF_ARF_LOW = 3,
- GF_ARF_STD = 4,
- KF_STD = 5,
- RATE_FACTOR_LEVELS = 6
-} RATE_FACTOR_LEVEL;
-
-static const double rate_factor_deltas[RATE_FACTOR_LEVELS] = {
- 1.00, // INTER_NORMAL
- 0.80, // INTER_LOW
- 1.50, // INTER_HIGH
- 1.25, // GF_ARF_LOW
- 2.00, // GF_ARF_STD
- 2.00, // KF_STD
-};
-
-typedef struct {
- int resize_width;
- int resize_height;
- uint8_t superres_denom;
-} size_params_type;
-
-typedef struct {
- // Rate targetting variables
- int base_frame_target; // A baseline frame target before adjustment
- // for previous under or over shoot.
- int this_frame_target; // Actual frame target after rc adjustment.
- int projected_frame_size;
- int sb64_target_rate;
- int last_q[FRAME_TYPES]; // Separate values for Intra/Inter
- int last_boosted_qindex; // Last boosted GF/KF/ARF q
- int last_kf_qindex; // Q index of the last key frame coded.
-
- int gfu_boost;
- int last_boost;
- int kf_boost;
-
- double rate_correction_factors[RATE_FACTOR_LEVELS];
-
- int frames_since_golden;
- int frames_till_gf_update_due;
- int min_gf_interval;
- int max_gf_interval;
- int static_scene_max_gf_interval;
- int baseline_gf_interval;
- int constrained_gf_group;
- int frames_to_key;
- int frames_since_key;
- int this_key_frame_forced;
- int next_key_frame_forced;
- int source_alt_ref_pending;
- int source_alt_ref_active;
- int is_src_frame_alt_ref;
- int sframe_due;
-
- // Length of the bi-predictive frame group interval
- int bipred_group_interval;
-
- // NOTE: Different types of frames may have different bits allocated
- // accordingly, aiming to achieve the overall optimal RD performance.
- int is_bwd_ref_frame;
- int is_last_bipred_frame;
- int is_bipred_frame;
- int is_src_frame_ext_arf;
-
- int avg_frame_bandwidth; // Average frame size target for clip
- int min_frame_bandwidth; // Minimum allocation used for any frame
- int max_frame_bandwidth; // Maximum burst rate allowed for a frame.
-
- int ni_av_qi;
- int ni_tot_qi;
- int ni_frames;
- int avg_frame_qindex[FRAME_TYPES];
- double tot_q;
- double avg_q;
-
- int64_t buffer_level;
- int64_t bits_off_target;
- int64_t vbr_bits_off_target;
- int64_t vbr_bits_off_target_fast;
-
- int decimation_factor;
- int decimation_count;
-
- int rolling_target_bits;
- int rolling_actual_bits;
-
- int long_rolling_target_bits;
- int long_rolling_actual_bits;
-
- int rate_error_estimate;
-
- int64_t total_actual_bits;
- int64_t total_target_bits;
- int64_t total_target_vs_actual;
-
- int worst_quality;
- int best_quality;
-
- int64_t starting_buffer_level;
- int64_t optimal_buffer_level;
- int64_t maximum_buffer_size;
-
- // rate control history for last frame(1) and the frame before(2).
- // -1: undershot
- // 1: overshoot
- // 0: not initialized.
- int rc_1_frame;
- int rc_2_frame;
- int q_1_frame;
- int q_2_frame;
-
- // Auto frame-scaling variables.
- int rf_level_maxq[RATE_FACTOR_LEVELS];
- float_t arf_boost_factor;
- // Q index used for ALT frame
- int arf_q;
-} RATE_CONTROL;
-
-struct AV1_COMP;
-struct AV1EncoderConfig;
-
-void av1_rc_init(const struct AV1EncoderConfig *oxcf, int pass,
- RATE_CONTROL *rc);
-
-int av1_estimate_bits_at_q(FRAME_TYPE frame_kind, int q, int mbs,
- double correction_factor, aom_bit_depth_t bit_depth);
-
-double av1_convert_qindex_to_q(int qindex, aom_bit_depth_t bit_depth);
-
-void av1_rc_init_minq_luts(void);
-
-int av1_rc_get_default_min_gf_interval(int width, int height, double framerate);
-// Note av1_rc_get_default_max_gf_interval() requires the min_gf_interval to
-// be passed in to ensure that the max_gf_interval returned is at least as bis
-// as that.
-int av1_rc_get_default_max_gf_interval(double framerate, int min_frame_rate);
-
-// Generally at the high level, the following flow is expected
-// to be enforced for rate control:
-// First call per frame, one of:
-// av1_rc_get_one_pass_vbr_params()
-// av1_rc_get_one_pass_cbr_params()
-// av1_rc_get_first_pass_params()
-// av1_rc_get_second_pass_params()
-// depending on the usage to set the rate control encode parameters desired.
-//
-// Then, call encode_frame_to_data_rate() to perform the
-// actual encode. This function will in turn call encode_frame()
-// one or more times, followed by one of:
-// av1_rc_postencode_update()
-// av1_rc_postencode_update_drop_frame()
-//
-// The majority of rate control parameters are only expected
-// to be set in the av1_rc_get_..._params() functions and
-// updated during the av1_rc_postencode_update...() functions.
-// The only exceptions are av1_rc_drop_frame() and
-// av1_rc_update_rate_correction_factors() functions.
-
-// Functions to set parameters for encoding before the actual
-// encode_frame_to_data_rate() function.
-void av1_rc_get_one_pass_vbr_params(struct AV1_COMP *cpi);
-void av1_rc_get_one_pass_cbr_params(struct AV1_COMP *cpi);
-
-// Post encode update of the rate control parameters based
-// on bytes used
-void av1_rc_postencode_update(struct AV1_COMP *cpi, uint64_t bytes_used);
-// Post encode update of the rate control parameters for dropped frames
-void av1_rc_postencode_update_drop_frame(struct AV1_COMP *cpi);
-
-// Updates rate correction factors
-// Changes only the rate correction factors in the rate control structure.
-void av1_rc_update_rate_correction_factors(struct AV1_COMP *cpi, int width,
- int height);
-
-// Decide if we should drop this frame: For 1-pass CBR.
-// Changes only the decimation count in the rate control structure
-int av1_rc_drop_frame(struct AV1_COMP *cpi);
-
-// Computes frame size bounds.
-void av1_rc_compute_frame_size_bounds(const struct AV1_COMP *cpi,
- int this_frame_target,
- int *frame_under_shoot_limit,
- int *frame_over_shoot_limit);
-
-// Picks q and q bounds given the target for bits
-int av1_rc_pick_q_and_bounds(struct AV1_COMP *cpi, int width, int height,
- int *bottom_index, int *top_index);
-
-// Estimates q to achieve a target bits per frame
-int av1_rc_regulate_q(const struct AV1_COMP *cpi, int target_bits_per_frame,
- int active_best_quality, int active_worst_quality,
- int width, int height);
-
-// Estimates bits per mb for a given qindex and correction factor.
-int av1_rc_bits_per_mb(FRAME_TYPE frame_type, int qindex,
- double correction_factor, aom_bit_depth_t bit_depth);
-
-// Clamping utilities for bitrate targets for iframes and pframes.
-int av1_rc_clamp_iframe_target_size(const struct AV1_COMP *const cpi,
- int target);
-int av1_rc_clamp_pframe_target_size(const struct AV1_COMP *const cpi,
- int target);
-// Utility to set frame_target into the RATE_CONTROL structure
-// This function is called only from the av1_rc_get_..._params() functions.
-void av1_rc_set_frame_target(struct AV1_COMP *cpi, int target);
-
-// Computes a q delta (in "q index" terms) to get from a starting q value
-// to a target q value
-int av1_compute_qdelta(const RATE_CONTROL *rc, double qstart, double qtarget,
- aom_bit_depth_t bit_depth);
-
-// Computes a q delta (in "q index" terms) to get from a starting q value
-// to a value that should equate to the given rate ratio.
-int av1_compute_qdelta_by_rate(const RATE_CONTROL *rc, FRAME_TYPE frame_type,
- int qindex, double rate_target_ratio,
- aom_bit_depth_t bit_depth);
-
-int av1_frame_type_qdelta(const struct AV1_COMP *cpi, int rf_level, int q);
-
-void av1_rc_update_framerate(struct AV1_COMP *cpi, int width, int height);
-
-void av1_rc_set_gf_interval_range(const struct AV1_COMP *const cpi,
- RATE_CONTROL *const rc);
-
-void av1_set_target_rate(struct AV1_COMP *cpi, int width, int height);
-
-int av1_resize_one_pass_cbr(struct AV1_COMP *cpi);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_ENCODER_RATECTRL_H_
diff --git a/third_party/aom/av1/encoder/rd.c b/third_party/aom/av1/encoder/rd.c
deleted file mode 100644
index b87d89e50..000000000
--- a/third_party/aom/av1/encoder/rd.c
+++ /dev/null
@@ -1,1512 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <math.h>
-#include <stdio.h>
-
-#include "config/av1_rtcd.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_mem/aom_mem.h"
-#include "aom_ports/bitops.h"
-#include "aom_ports/mem.h"
-#include "aom_ports/system_state.h"
-
-#include "av1/common/common.h"
-#include "av1/common/entropy.h"
-#include "av1/common/entropymode.h"
-#include "av1/common/mvref_common.h"
-#include "av1/common/pred_common.h"
-#include "av1/common/quant_common.h"
-#include "av1/common/reconinter.h"
-#include "av1/common/reconintra.h"
-#include "av1/common/seg_common.h"
-
-#include "av1/encoder/av1_quantize.h"
-#include "av1/encoder/cost.h"
-#include "av1/encoder/encodemb.h"
-#include "av1/encoder/encodemv.h"
-#include "av1/encoder/encoder.h"
-#include "av1/encoder/encodetxb.h"
-#include "av1/encoder/mcomp.h"
-#include "av1/encoder/ratectrl.h"
-#include "av1/encoder/rd.h"
-#include "av1/encoder/tokenize.h"
-
-#define RD_THRESH_POW 1.25
-
-// The baseline rd thresholds for breaking out of the rd loop for
-// certain modes are assumed to be based on 8x8 blocks.
-// This table is used to correct for block size.
-// The factors here are << 2 (2 = x0.5, 32 = x8 etc).
-static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES_ALL] = {
- 2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32, 48, 48, 64, 4, 4, 8, 8, 16, 16
-};
-
-static const int use_intra_ext_tx_for_txsize[EXT_TX_SETS_INTRA][EXT_TX_SIZES] =
- {
- { 1, 1, 1, 1 }, // unused
- { 1, 1, 0, 0 },
- { 0, 0, 1, 0 },
- };
-
-static const int use_inter_ext_tx_for_txsize[EXT_TX_SETS_INTER][EXT_TX_SIZES] =
- {
- { 1, 1, 1, 1 }, // unused
- { 1, 1, 0, 0 },
- { 0, 0, 1, 0 },
- { 0, 0, 0, 1 },
- };
-
-static const int av1_ext_tx_set_idx_to_type[2][AOMMAX(EXT_TX_SETS_INTRA,
- EXT_TX_SETS_INTER)] = {
- {
- // Intra
- EXT_TX_SET_DCTONLY,
- EXT_TX_SET_DTT4_IDTX_1DDCT,
- EXT_TX_SET_DTT4_IDTX,
- },
- {
- // Inter
- EXT_TX_SET_DCTONLY,
- EXT_TX_SET_ALL16,
- EXT_TX_SET_DTT9_IDTX_1DDCT,
- EXT_TX_SET_DCT_IDTX,
- },
-};
-
-void av1_fill_mode_rates(AV1_COMMON *const cm, MACROBLOCK *x,
- FRAME_CONTEXT *fc) {
- int i, j;
-
- for (i = 0; i < PARTITION_CONTEXTS; ++i)
- av1_cost_tokens_from_cdf(x->partition_cost[i], fc->partition_cdf[i], NULL);
-
- if (cm->skip_mode_flag) {
- for (i = 0; i < SKIP_CONTEXTS; ++i) {
- av1_cost_tokens_from_cdf(x->skip_mode_cost[i], fc->skip_mode_cdfs[i],
- NULL);
- }
- }
-
- for (i = 0; i < SKIP_CONTEXTS; ++i) {
- av1_cost_tokens_from_cdf(x->skip_cost[i], fc->skip_cdfs[i], NULL);
- }
-
- for (i = 0; i < KF_MODE_CONTEXTS; ++i)
- for (j = 0; j < KF_MODE_CONTEXTS; ++j)
- av1_cost_tokens_from_cdf(x->y_mode_costs[i][j], fc->kf_y_cdf[i][j], NULL);
-
- for (i = 0; i < BLOCK_SIZE_GROUPS; ++i)
- av1_cost_tokens_from_cdf(x->mbmode_cost[i], fc->y_mode_cdf[i], NULL);
- for (i = 0; i < CFL_ALLOWED_TYPES; ++i)
- for (j = 0; j < INTRA_MODES; ++j)
- av1_cost_tokens_from_cdf(x->intra_uv_mode_cost[i][j],
- fc->uv_mode_cdf[i][j], NULL);
-
- av1_cost_tokens_from_cdf(x->filter_intra_mode_cost, fc->filter_intra_mode_cdf,
- NULL);
- for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
- if (av1_filter_intra_allowed_bsize(cm, i))
- av1_cost_tokens_from_cdf(x->filter_intra_cost[i],
- fc->filter_intra_cdfs[i], NULL);
- }
-
- for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
- av1_cost_tokens_from_cdf(x->switchable_interp_costs[i],
- fc->switchable_interp_cdf[i], NULL);
-
- for (i = 0; i < PALATTE_BSIZE_CTXS; ++i) {
- av1_cost_tokens_from_cdf(x->palette_y_size_cost[i],
- fc->palette_y_size_cdf[i], NULL);
- av1_cost_tokens_from_cdf(x->palette_uv_size_cost[i],
- fc->palette_uv_size_cdf[i], NULL);
- for (j = 0; j < PALETTE_Y_MODE_CONTEXTS; ++j) {
- av1_cost_tokens_from_cdf(x->palette_y_mode_cost[i][j],
- fc->palette_y_mode_cdf[i][j], NULL);
- }
- }
-
- for (i = 0; i < PALETTE_UV_MODE_CONTEXTS; ++i) {
- av1_cost_tokens_from_cdf(x->palette_uv_mode_cost[i],
- fc->palette_uv_mode_cdf[i], NULL);
- }
-
- for (i = 0; i < PALETTE_SIZES; ++i) {
- for (j = 0; j < PALETTE_COLOR_INDEX_CONTEXTS; ++j) {
- av1_cost_tokens_from_cdf(x->palette_y_color_cost[i][j],
- fc->palette_y_color_index_cdf[i][j], NULL);
- av1_cost_tokens_from_cdf(x->palette_uv_color_cost[i][j],
- fc->palette_uv_color_index_cdf[i][j], NULL);
- }
- }
-
- int sign_cost[CFL_JOINT_SIGNS];
- av1_cost_tokens_from_cdf(sign_cost, fc->cfl_sign_cdf, NULL);
- for (int joint_sign = 0; joint_sign < CFL_JOINT_SIGNS; joint_sign++) {
- int *cost_u = x->cfl_cost[joint_sign][CFL_PRED_U];
- int *cost_v = x->cfl_cost[joint_sign][CFL_PRED_V];
- if (CFL_SIGN_U(joint_sign) == CFL_SIGN_ZERO) {
- memset(cost_u, 0, CFL_ALPHABET_SIZE * sizeof(*cost_u));
- } else {
- const aom_cdf_prob *cdf_u = fc->cfl_alpha_cdf[CFL_CONTEXT_U(joint_sign)];
- av1_cost_tokens_from_cdf(cost_u, cdf_u, NULL);
- }
- if (CFL_SIGN_V(joint_sign) == CFL_SIGN_ZERO) {
- memset(cost_v, 0, CFL_ALPHABET_SIZE * sizeof(*cost_v));
- } else {
- const aom_cdf_prob *cdf_v = fc->cfl_alpha_cdf[CFL_CONTEXT_V(joint_sign)];
- av1_cost_tokens_from_cdf(cost_v, cdf_v, NULL);
- }
- for (int u = 0; u < CFL_ALPHABET_SIZE; u++)
- cost_u[u] += sign_cost[joint_sign];
- }
-
- for (i = 0; i < MAX_TX_CATS; ++i)
- for (j = 0; j < TX_SIZE_CONTEXTS; ++j)
- av1_cost_tokens_from_cdf(x->tx_size_cost[i][j], fc->tx_size_cdf[i][j],
- NULL);
-
- for (i = 0; i < TXFM_PARTITION_CONTEXTS; ++i) {
- av1_cost_tokens_from_cdf(x->txfm_partition_cost[i],
- fc->txfm_partition_cdf[i], NULL);
- }
-
- for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
- int s;
- for (s = 1; s < EXT_TX_SETS_INTER; ++s) {
- if (use_inter_ext_tx_for_txsize[s][i]) {
- av1_cost_tokens_from_cdf(
- x->inter_tx_type_costs[s][i], fc->inter_ext_tx_cdf[s][i],
- av1_ext_tx_inv[av1_ext_tx_set_idx_to_type[1][s]]);
- }
- }
- for (s = 1; s < EXT_TX_SETS_INTRA; ++s) {
- if (use_intra_ext_tx_for_txsize[s][i]) {
- for (j = 0; j < INTRA_MODES; ++j) {
- av1_cost_tokens_from_cdf(
- x->intra_tx_type_costs[s][i][j], fc->intra_ext_tx_cdf[s][i][j],
- av1_ext_tx_inv[av1_ext_tx_set_idx_to_type[0][s]]);
- }
- }
- }
- }
- for (i = 0; i < DIRECTIONAL_MODES; ++i) {
- av1_cost_tokens_from_cdf(x->angle_delta_cost[i], fc->angle_delta_cdf[i],
- NULL);
- }
- av1_cost_tokens_from_cdf(x->switchable_restore_cost,
- fc->switchable_restore_cdf, NULL);
- av1_cost_tokens_from_cdf(x->wiener_restore_cost, fc->wiener_restore_cdf,
- NULL);
- av1_cost_tokens_from_cdf(x->sgrproj_restore_cost, fc->sgrproj_restore_cdf,
- NULL);
- av1_cost_tokens_from_cdf(x->intrabc_cost, fc->intrabc_cdf, NULL);
-
- if (!frame_is_intra_only(cm)) {
- for (i = 0; i < COMP_INTER_CONTEXTS; ++i) {
- av1_cost_tokens_from_cdf(x->comp_inter_cost[i], fc->comp_inter_cdf[i],
- NULL);
- }
-
- for (i = 0; i < REF_CONTEXTS; ++i) {
- for (j = 0; j < SINGLE_REFS - 1; ++j) {
- av1_cost_tokens_from_cdf(x->single_ref_cost[i][j],
- fc->single_ref_cdf[i][j], NULL);
- }
- }
-
- for (i = 0; i < COMP_REF_TYPE_CONTEXTS; ++i) {
- av1_cost_tokens_from_cdf(x->comp_ref_type_cost[i],
- fc->comp_ref_type_cdf[i], NULL);
- }
-
- for (i = 0; i < UNI_COMP_REF_CONTEXTS; ++i) {
- for (j = 0; j < UNIDIR_COMP_REFS - 1; ++j) {
- av1_cost_tokens_from_cdf(x->uni_comp_ref_cost[i][j],
- fc->uni_comp_ref_cdf[i][j], NULL);
- }
- }
-
- for (i = 0; i < REF_CONTEXTS; ++i) {
- for (j = 0; j < FWD_REFS - 1; ++j) {
- av1_cost_tokens_from_cdf(x->comp_ref_cost[i][j], fc->comp_ref_cdf[i][j],
- NULL);
- }
- }
-
- for (i = 0; i < REF_CONTEXTS; ++i) {
- for (j = 0; j < BWD_REFS - 1; ++j) {
- av1_cost_tokens_from_cdf(x->comp_bwdref_cost[i][j],
- fc->comp_bwdref_cdf[i][j], NULL);
- }
- }
-
- for (i = 0; i < INTRA_INTER_CONTEXTS; ++i) {
- av1_cost_tokens_from_cdf(x->intra_inter_cost[i], fc->intra_inter_cdf[i],
- NULL);
- }
-
- for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i) {
- av1_cost_tokens_from_cdf(x->newmv_mode_cost[i], fc->newmv_cdf[i], NULL);
- }
-
- for (i = 0; i < GLOBALMV_MODE_CONTEXTS; ++i) {
- av1_cost_tokens_from_cdf(x->zeromv_mode_cost[i], fc->zeromv_cdf[i], NULL);
- }
-
- for (i = 0; i < REFMV_MODE_CONTEXTS; ++i) {
- av1_cost_tokens_from_cdf(x->refmv_mode_cost[i], fc->refmv_cdf[i], NULL);
- }
-
- for (i = 0; i < DRL_MODE_CONTEXTS; ++i) {
- av1_cost_tokens_from_cdf(x->drl_mode_cost0[i], fc->drl_cdf[i], NULL);
- }
- for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
- av1_cost_tokens_from_cdf(x->inter_compound_mode_cost[i],
- fc->inter_compound_mode_cdf[i], NULL);
- for (i = 0; i < BLOCK_SIZES_ALL; ++i)
- av1_cost_tokens_from_cdf(x->compound_type_cost[i],
- fc->compound_type_cdf[i], NULL);
- for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
- if (get_interinter_wedge_bits(i)) {
- av1_cost_tokens_from_cdf(x->wedge_idx_cost[i], fc->wedge_idx_cdf[i],
- NULL);
- }
- }
- for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) {
- av1_cost_tokens_from_cdf(x->interintra_cost[i], fc->interintra_cdf[i],
- NULL);
- av1_cost_tokens_from_cdf(x->interintra_mode_cost[i],
- fc->interintra_mode_cdf[i], NULL);
- }
- for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
- av1_cost_tokens_from_cdf(x->wedge_interintra_cost[i],
- fc->wedge_interintra_cdf[i], NULL);
- }
- for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) {
- av1_cost_tokens_from_cdf(x->motion_mode_cost[i], fc->motion_mode_cdf[i],
- NULL);
- }
- for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) {
- av1_cost_tokens_from_cdf(x->motion_mode_cost1[i], fc->obmc_cdf[i], NULL);
- }
- for (i = 0; i < COMP_INDEX_CONTEXTS; ++i) {
- av1_cost_tokens_from_cdf(x->comp_idx_cost[i], fc->compound_index_cdf[i],
- NULL);
- }
- for (i = 0; i < COMP_GROUP_IDX_CONTEXTS; ++i) {
- av1_cost_tokens_from_cdf(x->comp_group_idx_cost[i],
- fc->comp_group_idx_cdf[i], NULL);
- }
- }
-}
-
-// Values are now correlated to quantizer.
-static int sad_per_bit16lut_8[QINDEX_RANGE];
-static int sad_per_bit4lut_8[QINDEX_RANGE];
-static int sad_per_bit16lut_10[QINDEX_RANGE];
-static int sad_per_bit4lut_10[QINDEX_RANGE];
-static int sad_per_bit16lut_12[QINDEX_RANGE];
-static int sad_per_bit4lut_12[QINDEX_RANGE];
-
-static void init_me_luts_bd(int *bit16lut, int *bit4lut, int range,
- aom_bit_depth_t bit_depth) {
- int i;
- // Initialize the sad lut tables using a formulaic calculation for now.
- // This is to make it easier to resolve the impact of experimental changes
- // to the quantizer tables.
- for (i = 0; i < range; i++) {
- const double q = av1_convert_qindex_to_q(i, bit_depth);
- bit16lut[i] = (int)(0.0418 * q + 2.4107);
- bit4lut[i] = (int)(0.063 * q + 2.742);
- }
-}
-
-void av1_init_me_luts(void) {
- init_me_luts_bd(sad_per_bit16lut_8, sad_per_bit4lut_8, QINDEX_RANGE,
- AOM_BITS_8);
- init_me_luts_bd(sad_per_bit16lut_10, sad_per_bit4lut_10, QINDEX_RANGE,
- AOM_BITS_10);
- init_me_luts_bd(sad_per_bit16lut_12, sad_per_bit4lut_12, QINDEX_RANGE,
- AOM_BITS_12);
-}
-
-static const int rd_boost_factor[16] = { 64, 32, 32, 32, 24, 16, 12, 12,
- 8, 8, 4, 4, 2, 2, 1, 0 };
-static const int rd_frame_type_factor[FRAME_UPDATE_TYPES] = {
- 128, 144, 128, 128, 144,
- // TODO(zoeliu): To adjust further following factor values.
- 128, 128, 128,
- // TODO(weitinglin): We should investigate if the values should be the same
- // as the value used by OVERLAY frame
- 144, // INTNL_OVERLAY_UPDATE
- 128 // INTNL_ARF_UPDATE
-};
-
-int av1_compute_rd_mult(const AV1_COMP *cpi, int qindex) {
- const int64_t q =
- av1_dc_quant_Q3(qindex, 0, cpi->common.seq_params.bit_depth);
- int64_t rdmult = 0;
- switch (cpi->common.seq_params.bit_depth) {
- case AOM_BITS_8: rdmult = 88 * q * q / 24; break;
- case AOM_BITS_10: rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 4); break;
- case AOM_BITS_12: rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 8); break;
- default:
- assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
- return -1;
- }
- if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
- const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
- const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index];
- const int boost_index = AOMMIN(15, (cpi->rc.gfu_boost / 100));
-
- rdmult = (rdmult * rd_frame_type_factor[frame_type]) >> 7;
- rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7);
- }
- if (rdmult < 1) rdmult = 1;
- return (int)rdmult;
-}
-
-static int compute_rd_thresh_factor(int qindex, aom_bit_depth_t bit_depth) {
- double q;
- switch (bit_depth) {
- case AOM_BITS_8: q = av1_dc_quant_Q3(qindex, 0, AOM_BITS_8) / 4.0; break;
- case AOM_BITS_10: q = av1_dc_quant_Q3(qindex, 0, AOM_BITS_10) / 16.0; break;
- case AOM_BITS_12: q = av1_dc_quant_Q3(qindex, 0, AOM_BITS_12) / 64.0; break;
- default:
- assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
- return -1;
- }
- // TODO(debargha): Adjust the function below.
- return AOMMAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8);
-}
-
-void av1_initialize_me_consts(const AV1_COMP *cpi, MACROBLOCK *x, int qindex) {
- switch (cpi->common.seq_params.bit_depth) {
- case AOM_BITS_8:
- x->sadperbit16 = sad_per_bit16lut_8[qindex];
- x->sadperbit4 = sad_per_bit4lut_8[qindex];
- break;
- case AOM_BITS_10:
- x->sadperbit16 = sad_per_bit16lut_10[qindex];
- x->sadperbit4 = sad_per_bit4lut_10[qindex];
- break;
- case AOM_BITS_12:
- x->sadperbit16 = sad_per_bit16lut_12[qindex];
- x->sadperbit4 = sad_per_bit4lut_12[qindex];
- break;
- default:
- assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
- }
-}
-
-static void set_block_thresholds(const AV1_COMMON *cm, RD_OPT *rd) {
- int i, bsize, segment_id;
-
- for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
- const int qindex =
- clamp(av1_get_qindex(&cm->seg, segment_id, cm->base_qindex) +
- cm->y_dc_delta_q,
- 0, MAXQ);
- const int q = compute_rd_thresh_factor(qindex, cm->seq_params.bit_depth);
-
- for (bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
- // Threshold here seems unnecessarily harsh but fine given actual
- // range of values used for cpi->sf.thresh_mult[].
- const int t = q * rd_thresh_block_size_factor[bsize];
- const int thresh_max = INT_MAX / t;
-
- for (i = 0; i < MAX_MODES; ++i)
- rd->threshes[segment_id][bsize][i] = rd->thresh_mult[i] < thresh_max
- ? rd->thresh_mult[i] * t / 4
- : INT_MAX;
- }
- }
-}
-
-void av1_set_mvcost(MACROBLOCK *x, int ref, int ref_mv_idx) {
- (void)ref;
- (void)ref_mv_idx;
- x->mvcost = x->mv_cost_stack;
- x->nmvjointcost = x->nmv_vec_cost;
-}
-
-void av1_fill_coeff_costs(MACROBLOCK *x, FRAME_CONTEXT *fc,
- const int num_planes) {
- const int nplanes = AOMMIN(num_planes, PLANE_TYPES);
- for (int eob_multi_size = 0; eob_multi_size < 7; ++eob_multi_size) {
- for (int plane = 0; plane < nplanes; ++plane) {
- LV_MAP_EOB_COST *pcost = &x->eob_costs[eob_multi_size][plane];
-
- for (int ctx = 0; ctx < 2; ++ctx) {
- aom_cdf_prob *pcdf;
- switch (eob_multi_size) {
- case 0: pcdf = fc->eob_flag_cdf16[plane][ctx]; break;
- case 1: pcdf = fc->eob_flag_cdf32[plane][ctx]; break;
- case 2: pcdf = fc->eob_flag_cdf64[plane][ctx]; break;
- case 3: pcdf = fc->eob_flag_cdf128[plane][ctx]; break;
- case 4: pcdf = fc->eob_flag_cdf256[plane][ctx]; break;
- case 5: pcdf = fc->eob_flag_cdf512[plane][ctx]; break;
- case 6:
- default: pcdf = fc->eob_flag_cdf1024[plane][ctx]; break;
- }
- av1_cost_tokens_from_cdf(pcost->eob_cost[ctx], pcdf, NULL);
- }
- }
- }
- for (int tx_size = 0; tx_size < TX_SIZES; ++tx_size) {
- for (int plane = 0; plane < nplanes; ++plane) {
- LV_MAP_COEFF_COST *pcost = &x->coeff_costs[tx_size][plane];
-
- for (int ctx = 0; ctx < TXB_SKIP_CONTEXTS; ++ctx)
- av1_cost_tokens_from_cdf(pcost->txb_skip_cost[ctx],
- fc->txb_skip_cdf[tx_size][ctx], NULL);
-
- for (int ctx = 0; ctx < SIG_COEF_CONTEXTS_EOB; ++ctx)
- av1_cost_tokens_from_cdf(pcost->base_eob_cost[ctx],
- fc->coeff_base_eob_cdf[tx_size][plane][ctx],
- NULL);
- for (int ctx = 0; ctx < SIG_COEF_CONTEXTS; ++ctx)
- av1_cost_tokens_from_cdf(pcost->base_cost[ctx],
- fc->coeff_base_cdf[tx_size][plane][ctx], NULL);
-
- for (int ctx = 0; ctx < EOB_COEF_CONTEXTS; ++ctx)
- av1_cost_tokens_from_cdf(pcost->eob_extra_cost[ctx],
- fc->eob_extra_cdf[tx_size][plane][ctx], NULL);
-
- for (int ctx = 0; ctx < DC_SIGN_CONTEXTS; ++ctx)
- av1_cost_tokens_from_cdf(pcost->dc_sign_cost[ctx],
- fc->dc_sign_cdf[plane][ctx], NULL);
-
- for (int ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx) {
- int br_rate[BR_CDF_SIZE];
- int prev_cost = 0;
- int i, j;
- av1_cost_tokens_from_cdf(br_rate, fc->coeff_br_cdf[tx_size][plane][ctx],
- NULL);
- // printf("br_rate: ");
- // for(j = 0; j < BR_CDF_SIZE; j++)
- // printf("%4d ", br_rate[j]);
- // printf("\n");
- for (i = 0; i < COEFF_BASE_RANGE; i += BR_CDF_SIZE - 1) {
- for (j = 0; j < BR_CDF_SIZE - 1; j++) {
- pcost->lps_cost[ctx][i + j] = prev_cost + br_rate[j];
- }
- prev_cost += br_rate[j];
- }
- pcost->lps_cost[ctx][i] = prev_cost;
- // printf("lps_cost: %d %d %2d : ", tx_size, plane, ctx);
- // for (i = 0; i <= COEFF_BASE_RANGE; i++)
- // printf("%5d ", pcost->lps_cost[ctx][i]);
- // printf("\n");
- }
- }
- }
-}
-
-void av1_initialize_rd_consts(AV1_COMP *cpi) {
- AV1_COMMON *const cm = &cpi->common;
- MACROBLOCK *const x = &cpi->td.mb;
- RD_OPT *const rd = &cpi->rd;
-
- aom_clear_system_state();
-
- rd->RDMULT = av1_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
-
- set_error_per_bit(x, rd->RDMULT);
-
- set_block_thresholds(cm, rd);
-
- if (cm->cur_frame_force_integer_mv) {
- av1_build_nmv_cost_table(x->nmv_vec_cost, x->nmvcost, &cm->fc->nmvc,
- MV_SUBPEL_NONE);
- } else {
- av1_build_nmv_cost_table(
- x->nmv_vec_cost,
- cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost, &cm->fc->nmvc,
- cm->allow_high_precision_mv);
- }
-
- x->mvcost = x->mv_cost_stack;
- x->nmvjointcost = x->nmv_vec_cost;
-
- if (frame_is_intra_only(cm) && cm->allow_screen_content_tools &&
- cpi->oxcf.pass != 1) {
- int *dvcost[2] = { &cpi->dv_cost[0][MV_MAX], &cpi->dv_cost[1][MV_MAX] };
- av1_build_nmv_cost_table(cpi->dv_joint_cost, dvcost, &cm->fc->ndvc,
- MV_SUBPEL_NONE);
- }
-
- if (cpi->oxcf.pass != 1) {
- for (int i = 0; i < TRANS_TYPES; ++i)
- // IDENTITY: 1 bit
- // TRANSLATION: 3 bits
- // ROTZOOM: 2 bits
- // AFFINE: 3 bits
- cpi->gmtype_cost[i] = (1 + (i > 0 ? (i == ROTZOOM ? 1 : 2) : 0))
- << AV1_PROB_COST_SHIFT;
- }
-}
-
-static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
- // NOTE: The tables below must be of the same size.
-
- // The functions described below are sampled at the four most significant
- // bits of x^2 + 8 / 256.
-
- // Normalized rate:
- // This table models the rate for a Laplacian source with given variance
- // when quantized with a uniform quantizer with given stepsize. The
- // closed form expression is:
- // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
- // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
- // and H(x) is the binary entropy function.
- static const int rate_tab_q10[] = {
- 65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651, 4553, 4389, 4255, 4142,
- 4044, 3958, 3881, 3811, 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186,
- 3133, 3037, 2952, 2877, 2809, 2747, 2690, 2638, 2589, 2501, 2423, 2353,
- 2290, 2232, 2179, 2130, 2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651,
- 1608, 1530, 1460, 1398, 1342, 1290, 1243, 1199, 1159, 1086, 1021, 963,
- 911, 864, 821, 781, 745, 680, 623, 574, 530, 490, 455, 424,
- 395, 345, 304, 269, 239, 213, 190, 171, 154, 126, 104, 87,
- 73, 61, 52, 44, 38, 28, 21, 16, 12, 10, 8, 6,
- 5, 3, 2, 1, 1, 1, 0, 0,
- };
- // Normalized distortion:
- // This table models the normalized distortion for a Laplacian source
- // with given variance when quantized with a uniform quantizer
- // with given stepsize. The closed form expression is:
- // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
- // where x = qpstep / sqrt(variance).
- // Note the actual distortion is Dn * variance.
- static const int dist_tab_q10[] = {
- 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 5,
- 5, 6, 7, 7, 8, 9, 11, 12, 13, 15, 16, 17,
- 18, 21, 24, 26, 29, 31, 34, 36, 39, 44, 49, 54,
- 59, 64, 69, 73, 78, 88, 97, 106, 115, 124, 133, 142,
- 151, 167, 184, 200, 215, 231, 245, 260, 274, 301, 327, 351,
- 375, 397, 418, 439, 458, 495, 528, 559, 587, 613, 637, 659,
- 680, 717, 749, 777, 801, 823, 842, 859, 874, 899, 919, 936,
- 949, 960, 969, 977, 983, 994, 1001, 1006, 1010, 1013, 1015, 1017,
- 1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024,
- };
- static const int xsq_iq_q10[] = {
- 0, 4, 8, 12, 16, 20, 24, 28, 32,
- 40, 48, 56, 64, 72, 80, 88, 96, 112,
- 128, 144, 160, 176, 192, 208, 224, 256, 288,
- 320, 352, 384, 416, 448, 480, 544, 608, 672,
- 736, 800, 864, 928, 992, 1120, 1248, 1376, 1504,
- 1632, 1760, 1888, 2016, 2272, 2528, 2784, 3040, 3296,
- 3552, 3808, 4064, 4576, 5088, 5600, 6112, 6624, 7136,
- 7648, 8160, 9184, 10208, 11232, 12256, 13280, 14304, 15328,
- 16352, 18400, 20448, 22496, 24544, 26592, 28640, 30688, 32736,
- 36832, 40928, 45024, 49120, 53216, 57312, 61408, 65504, 73696,
- 81888, 90080, 98272, 106464, 114656, 122848, 131040, 147424, 163808,
- 180192, 196576, 212960, 229344, 245728,
- };
- const int tmp = (xsq_q10 >> 2) + 8;
- const int k = get_msb(tmp) - 3;
- const int xq = (k << 3) + ((tmp >> k) & 0x7);
- const int one_q10 = 1 << 10;
- const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
- const int b_q10 = one_q10 - a_q10;
- *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
- *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
-}
-
-void av1_model_rd_from_var_lapndz(int64_t var, unsigned int n_log2,
- unsigned int qstep, int *rate,
- int64_t *dist) {
- // This function models the rate and distortion for a Laplacian
- // source with given variance when quantized with a uniform quantizer
- // with given stepsize. The closed form expressions are in:
- // Hang and Chen, "Source Model for transform video coder and its
- // application - Part I: Fundamental Theory", IEEE Trans. Circ.
- // Sys. for Video Tech., April 1997.
- if (var == 0) {
- *rate = 0;
- *dist = 0;
- } else {
- int d_q10, r_q10;
- static const uint32_t MAX_XSQ_Q10 = 245727;
- const uint64_t xsq_q10_64 =
- (((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var;
- const int xsq_q10 = (int)AOMMIN(xsq_q10_64, MAX_XSQ_Q10);
- model_rd_norm(xsq_q10, &r_q10, &d_q10);
- *rate = ROUND_POWER_OF_TWO(r_q10 << n_log2, 10 - AV1_PROB_COST_SHIFT);
- *dist = (var * (int64_t)d_q10 + 512) >> 10;
- }
-}
-
-static double interp_cubic(const double *p, double x) {
- return p[1] + 0.5 * x *
- (p[2] - p[0] +
- x * (2.0 * p[0] - 5.0 * p[1] + 4.0 * p[2] - p[3] +
- x * (3.0 * (p[1] - p[2]) + p[3] - p[0])));
-}
-
-static double interp_bicubic(const double *p, int p_stride, double x,
- double y) {
- double q[4];
- q[0] = interp_cubic(p, x);
- q[1] = interp_cubic(p + p_stride, x);
- q[2] = interp_cubic(p + 2 * p_stride, x);
- q[3] = interp_cubic(p + 3 * p_stride, x);
- return interp_cubic(q, y);
-}
-
-static const double interp_rgrid_surf[65 * 18] = {
- 0.104019, 0.245714, 0.293686, 0.358635, 0.382167, 0.412446,
- 0.419955, 0.421388, 0.426672, 0.427990, 0.428531, 0.456868,
- 0.569880, 0.638822, 1.016319, 2.143453, 3.565229, 4.720880,
- 0.124618, 0.294211, 0.352023, 0.429991, 0.458206, 0.494510,
- 0.503513, 0.505232, 0.511566, 0.513234, 0.519365, 0.570225,
- 0.697373, 0.840624, 1.462198, 3.289054, 6.256517, 6.852788,
- 0.118630, 0.269669, 0.346620, 0.430999, 0.459385, 0.495783,
- 0.504808, 0.506532, 0.512884, 0.514988, 0.543437, 0.662772,
- 0.795876, 1.313596, 2.403841, 4.163098, 7.440589, 8.616275,
- 0.093329, 0.168205, 0.321320, 0.430607, 0.459385, 0.495783,
- 0.504813, 0.506548, 0.512975, 0.520662, 0.571659, 0.701841,
- 1.010727, 2.138851, 3.460626, 6.317955, 10.098127, 14.418553,
- 0.087021, 0.142905, 0.315011, 0.430509, 0.459385, 0.495787,
- 0.505075, 0.507599, 0.513584, 0.543182, 0.669941, 0.825620,
- 1.362800, 2.572187, 4.205047, 7.498399, 12.303118, 16.641735,
- 0.086923, 0.142513, 0.314913, 0.430508, 0.459385, 0.495803,
- 0.506126, 0.511816, 0.514810, 0.549705, 0.725350, 1.127334,
- 2.168597, 3.463686, 6.318605, 10.162284, 18.556041, 19.847042,
- 0.086923, 0.142513, 0.314913, 0.430506, 0.459376, 0.495805,
- 0.506388, 0.512954, 0.520772, 0.580215, 0.810474, 1.391548,
- 2.579442, 4.205160, 7.498399, 12.381597, 21.703618, 24.015457,
- 0.086923, 0.142513, 0.314911, 0.430353, 0.458765, 0.495652,
- 0.506391, 0.513406, 0.544098, 0.702950, 1.121860, 2.168961,
- 3.463798, 6.318607, 10.162284, 18.685361, 28.188192, 37.638872,
- 0.086923, 0.142513, 0.314901, 0.429742, 0.456313, 0.495045,
- 0.506484, 0.519195, 0.580104, 0.810126, 1.391462, 2.579441,
- 4.205160, 7.498399, 12.381597, 21.848607, 33.367199, 42.623190,
- 0.086923, 0.142513, 0.314899, 0.429589, 0.455706, 0.495155,
- 0.507882, 0.542426, 0.702360, 1.119921, 2.168478, 3.463791,
- 6.318607, 10.162284, 18.685361, 28.345760, 47.802028, 49.163533,
- 0.086924, 0.142548, 0.315086, 0.429842, 0.455870, 0.496336,
- 0.512412, 0.556953, 0.773373, 1.266396, 2.548277, 4.204676,
- 7.498399, 12.381597, 21.848607, 33.548250, 54.301011, 56.262859,
- 0.087067, 0.144957, 0.327436, 0.446616, 0.466362, 0.505706,
- 0.522077, 0.610747, 0.972543, 1.666916, 3.338812, 6.316669,
- 10.162284, 18.685361, 28.345760, 48.065311, 66.145302, 78.396020,
- 0.094295, 0.164235, 0.393722, 0.534219, 0.530922, 0.579308,
- 0.603889, 0.760870, 1.229961, 2.423214, 4.173513, 7.497916,
- 12.381597, 21.848607, 33.548250, 54.589585, 74.875848, 86.468182,
- 0.124096, 0.213005, 0.497188, 0.665176, 0.685973, 0.800200,
- 0.911394, 1.077971, 1.677290, 3.332129, 6.314960, 10.162257,
- 18.685361, 28.345760, 48.065311, 66.453506, 98.275189, 96.862588,
- 0.140999, 0.270140, 0.658212, 0.867661, 0.970183, 1.149516,
- 1.480599, 1.664833, 2.421893, 3.857981, 7.418830, 12.380371,
- 21.848607, 33.548250, 54.589585, 75.188867, 106.657971, 99.762997,
- 0.178353, 0.398001, 0.988462, 1.241473, 1.340967, 1.713568,
- 2.335030, 2.701432, 3.348532, 5.077158, 9.829903, 18.676528,
- 28.345700, 48.065311, 66.453506, 98.588283, 117.057193, 101.130722,
- 0.281079, 0.548300, 1.395825, 1.780770, 2.000508, 2.702964,
- 3.638454, 4.573843, 5.051641, 7.079129, 11.293332, 21.594861,
- 33.544335, 54.589585, 75.188867, 106.971065, 119.957601, 101.466632,
- 0.476762, 0.842189, 2.019678, 2.723895, 3.188467, 4.011610,
- 5.545111, 7.508984, 8.176339, 9.774504, 14.720782, 27.334416,
- 48.049609, 66.453506, 98.588283, 117.370357, 121.329855, 101.509242,
- 0.993999, 1.520111, 3.013605, 4.203530, 4.982992, 6.074944,
- 8.583581, 11.818375, 14.192544, 14.937517, 21.258160, 33.305953,
- 54.585735, 75.188867, 106.971135, 120.279824, 121.976055, 102.690130,
- 1.776487, 2.613655, 4.356487, 6.161726, 7.622196, 9.464193,
- 13.077233, 18.051656, 23.221051, 24.080068, 30.085038, 48.345269,
- 66.457698, 98.588353, 117.379415, 121.976128, 124.356210, 107.713202,
- 3.191085, 4.495201, 5.686033, 8.365566, 11.275339, 14.706437,
- 20.300969, 28.152237, 35.688355, 39.341382, 41.030743, 55.752262,
- 75.211764, 106.980285, 120.608403, 124.680746, 130.222528, 112.260098,
- 6.136611, 7.305215, 7.272532, 10.646713, 15.630815, 22.383168,
- 31.349131, 42.419822, 52.301680, 58.983454, 58.915405, 69.161305,
- 98.992460, 117.713855, 124.344836, 130.623638, 138.442401, 127.846670,
- 11.707980, 13.490761, 11.640845, 14.176132, 22.131124, 33.776462,
- 47.365711, 61.603834, 75.281056, 83.463985, 85.510533, 86.026513,
- 108.787480, 123.031136, 130.607284, 138.954406, 160.867784, 158.958882,
- 27.062874, 32.195139, 24.147297, 22.114632, 35.580506, 52.551674,
- 71.652956, 88.606776, 102.107193, 110.703186, 114.398733, 111.118539,
- 121.503578, 132.455924, 139.490806, 161.412674, 193.563210, 172.203945,
- 35.625692, 47.953028, 42.639820, 42.276254, 58.815664, 84.977282,
- 110.656412, 126.168446, 134.658126, 140.604482, 144.006012, 141.702382,
- 140.125323, 153.122630, 164.748041, 194.156197, 206.854650, 174.013079,
- 49.516447, 65.335381, 71.738306, 81.872819, 98.400740, 136.840488,
- 163.775802, 169.440078, 172.747876, 171.222919, 171.679604, 172.173550,
- 168.200129, 187.617133, 199.683394, 207.768200, 210.062520, 175.478356,
- 60.341673, 92.487135, 119.907299, 136.068010, 144.778950, 189.443534,
- 220.120077, 219.641635, 214.616503, 205.894657, 198.453924, 200.013069,
- 195.938103, 206.118661, 210.447375, 212.061379, 216.078218, 181.162805,
- 78.422159, 112.242899, 158.416312, 181.404320, 193.188690, 229.296967,
- 270.461799, 275.168977, 256.511701, 244.706786, 231.344608, 226.065087,
- 222.248618, 218.662324, 217.966722, 218.248574, 218.818588, 182.740573,
- 88.713664, 123.594164, 172.928179, 213.781414, 245.800351, 252.063414,
- 313.283141, 331.703831, 305.866639, 285.177142, 269.759635, 251.988739,
- 245.998388, 232.688076, 230.588702, 230.882657, 230.319053, 192.120741,
- 102.540561, 152.905927, 189.137131, 241.806756, 273.868497, 284.258017,
- 339.689853, 373.561104, 362.657463, 326.291984, 311.922687, 290.460189,
- 276.774381, 273.012072, 277.751792, 279.123748, 278.820447, 233.813798,
- 132.983118, 176.307242, 197.415684, 243.307787, 280.893995, 332.922370,
- 340.329043, 404.530166, 419.475405, 375.775209, 351.300889, 340.042759,
- 315.683832, 306.123530, 306.359319, 306.733063, 307.609556, 261.647847,
- 149.579109, 185.925581, 207.937033, 245.159084, 301.890957, 350.040480,
- 352.250771, 418.742329, 458.112686, 430.125208, 386.460441, 380.346839,
- 354.679150, 337.305620, 334.504124, 335.889932, 341.060725, 286.898578,
- 153.576812, 202.105624, 219.366967, 248.524506, 314.255692, 350.607526,
- 390.567688, 408.629209, 488.000213, 480.563823, 432.461799, 410.412624,
- 398.607371, 400.188740, 402.780916, 408.853470, 430.449735, 363.777088,
- 161.353129, 214.848904, 231.549852, 258.536466, 313.163177, 368.140577,
- 412.136393, 413.409032, 499.838438, 519.571063, 485.833867, 444.562715,
- 435.738129, 442.358549, 450.166531, 453.208524, 458.424358, 385.823139,
- 175.109034, 227.608058, 250.069563, 286.101747, 312.256740, 378.421485,
- 413.344147, 435.058646, 476.960941, 542.448886, 530.189154, 495.408402,
- 475.326752, 465.017144, 464.694045, 465.144689, 466.905382, 398.669138,
- 184.750180, 240.766694, 283.240772, 305.480150, 322.409001, 374.526162,
- 427.141326, 452.840323, 472.604139, 545.366105, 567.676694, 541.666203,
- 509.591873, 492.044219, 492.778569, 493.765684, 493.235693, 413.684325,
- 194.728357, 254.928927, 289.991157, 300.193195, 324.194589, 371.563147,
- 439.226438, 468.295088, 495.654854, 533.506353, 587.476353, 578.298989,
- 548.041942, 527.393885, 538.965146, 545.070442, 544.295454, 454.012211,
- 205.195287, 283.135677, 297.921431, 319.295927, 355.621830, 392.466463,
- 446.696167, 485.053519, 516.426615, 532.264584, 588.481600, 615.906737,
- 589.319634, 555.754316, 558.389367, 569.094521, 569.779764, 475.384946,
- 218.552054, 298.511016, 319.188338, 351.781666, 372.789510, 412.827434,
- 464.569387, 506.270203, 533.049810, 553.347364, 580.644599, 632.759854,
- 622.235843, 569.960552, 580.799340, 586.553714, 579.488366, 491.826482,
- 244.803348, 299.790203, 324.187975, 363.280782, 403.710443, 441.724083,
- 492.732682, 534.722691, 552.193622, 575.112647, 586.097705, 635.224970,
- 644.642944, 606.017786, 640.321218, 642.316989, 616.397020, 548.300111,
- 256.957358, 318.638991, 355.063346, 389.889307, 433.607315, 468.209001,
- 515.178157, 573.556591, 578.113115, 587.246475, 601.762801, 638.454644,
- 656.574853, 641.184609, 676.908189, 684.198162, 678.387412, 574.805864,
- 251.211502, 323.448532, 364.227424, 411.792704, 462.226488, 503.572288,
- 549.299249, 599.124071, 601.227977, 597.118176, 613.247552, 633.278532,
- 658.074755, 664.930719, 685.731531, 693.632845, 693.076350, 578.326477,
- 267.695377, 354.273736, 389.976833, 438.518178, 493.332686, 544.343027,
- 588.895829, 620.206193, 628.327410, 606.067827, 620.998532, 657.985256,
- 683.936059, 691.345257, 693.894723, 695.175306, 693.618786, 578.517148,
- 274.290725, 363.465288, 411.808596, 463.369805, 515.310226, 581.009306,
- 613.070738, 636.638714, 647.333929, 629.867603, 644.646319, 687.796202,
- 702.859596, 713.495479, 704.068069, 704.991807, 704.188594, 587.283658,
- 302.538449, 389.174737, 438.518422, 493.398902, 547.662399, 601.981814,
- 624.773046, 641.629484, 644.699451, 645.848784, 668.033340, 703.643523,
- 707.422408, 717.329600, 726.298973, 744.127507, 745.365167, 617.954068,
- 310.328188, 410.984766, 463.369805, 515.315010, 581.309832, 613.787792,
- 634.988538, 654.145284, 662.632978, 668.413496, 706.494057, 750.545471,
- 730.724808, 730.002100, 743.625262, 750.801609, 745.308457, 606.505800,
- 329.948756, 437.600191, 493.398902, 547.661910, 601.917884, 622.557745,
- 633.244395, 644.055898, 648.224221, 665.062911, 763.555733, 812.391078,
- 769.063582, 744.865168, 727.579796, 724.950408, 722.179707, 598.564510,
- 350.848328, 462.437458, 515.315010, 581.309823, 613.779123, 634.465309,
- 652.056257, 662.179143, 671.466297, 726.881256, 819.824030, 880.232789,
- 810.371672, 754.246481, 725.053473, 724.253390, 723.503395, 603.394909,
- 373.704088, 492.408266, 547.661910, 601.917884, 622.557620, 633.236320,
- 644.023513, 648.232514, 666.381639, 785.498283, 929.441612, 999.772800,
- 890.339033, 775.852504, 731.840181, 726.905100, 725.251844, 604.899901,
- 394.473422, 514.261306, 581.309823, 613.779123, 634.465309, 652.056257,
- 662.179143, 671.466557, 727.134512, 835.764144, 981.747089, 1018.462934,
- 939.686967, 811.276731, 739.398459, 727.365647, 725.285425, 604.923525,
- 419.976505, 546.538939, 601.917884, 622.557620, 633.236320, 644.023513,
- 648.232514, 666.381639, 785.545191, 932.841398, 1036.609617, 1026.945092,
- 963.822765, 840.827315, 755.532423, 730.241865, 725.366847, 604.924155,
- 437.281359, 580.116337, 613.779123, 634.465309, 652.056257, 662.179143,
- 671.466557, 727.134512, 835.764859, 981.996194, 1031.896881, 1002.544732,
- 881.157178, 828.151494, 799.340975, 751.314325, 728.316587, 605.005504,
- 464.713920, 600.649281, 622.557620, 633.236320, 644.023513, 648.232514,
- 666.381639, 785.545191, 932.841398, 1036.735329, 1035.037004, 995.478339,
- 858.093733, 823.471976, 819.881754, 798.749289, 749.440463, 607.955244,
- 495.880237, 612.473139, 634.465309, 652.056257, 662.179143, 671.466557,
- 727.134512, 835.764859, 981.996194, 1032.339788, 1031.105117, 995.303259,
- 857.733663, 823.435877, 822.822791, 819.873050, 796.882480, 629.038445,
- 510.391280, 621.158273, 633.236320, 644.023513, 648.232514, 666.381639,
- 785.545191, 932.841398, 1036.735329, 1035.566013, 1029.599350, 994.926093,
- 857.645648, 823.435143, 822.904139, 822.822791, 817.965681, 673.856962,
- 514.588176, 632.947715, 652.056257, 662.179143, 671.466557, 727.134512,
- 835.764859, 981.996194, 1032.339788, 1031.547475, 1023.835377, 972.158629,
- 851.968626, 823.347128, 822.904770, 822.904139, 820.752301, 684.418900,
- 520.013294, 631.668183, 644.023513, 648.232514, 666.381639, 785.545191,
- 932.841398, 1036.735329, 1035.567378, 1029.776746, 1001.044108, 880.853721,
- 829.201546, 822.994150, 822.904770, 822.904770, 820.792975, 684.582020,
- 531.253628, 650.479606, 662.179143, 671.466557, 727.134512, 835.764859,
- 981.996194, 1032.339788, 1031.636855, 1029.601779, 995.366703, 858.086641,
- 823.524524, 822.906135, 822.904770, 822.904770, 820.792975, 684.582020,
- 528.531744, 642.424501, 648.232514, 666.381639, 785.545191, 932.841398,
- 1036.735329, 1035.567378, 1030.219103, 1029.576226, 995.278687, 857.733663,
- 823.436508, 822.904770, 822.904770, 822.904770, 820.792975, 684.582020,
- 545.401164, 660.550678, 671.508859, 727.304161, 835.807162, 981.996850,
- 1032.339788, 1031.636855, 1030.130788, 1029.487827, 994.925709, 857.645648,
- 823.435143, 822.904770, 822.904770, 822.904770, 820.792975, 684.582020,
- 537.684760, 646.650947, 669.110131, 796.487512, 935.569890, 1036.777631,
- 1035.567378, 1030.219103, 1030.018584, 1023.810805, 972.158629, 851.968626,
- 823.347128, 822.904770, 822.904770, 822.904770, 820.792975, 684.582020,
- 552.408370, 670.001885, 738.246482, 879.690154, 992.939171, 1032.509436,
- 1031.636855, 1030.132153, 1029.665223, 1001.043724, 880.853721, 829.201546,
- 822.994150, 822.904770, 822.904770, 822.904770, 820.792975, 684.582020,
- 539.835902, 667.496388, 799.216004, 946.512211, 1039.506123, 1035.609680,
- 1030.219103, 1030.107964, 1029.577207, 995.366703, 858.086641, 823.524524,
- 822.906135, 822.904770, 822.904770, 822.904770, 820.792975, 684.582020,
- 558.362529, 734.277451, 877.197218, 990.478243, 1029.908393, 1028.993978,
- 1027.488620, 1027.464048, 1026.933674, 992.724534, 855.532488, 821.323349,
- 820.792975, 820.792975, 820.792975, 820.792975, 818.686600, 682.825198,
- 453.127195, 649.075095, 780.278390, 867.165890, 862.469711, 857.067460,
- 856.956321, 856.955937, 856.513579, 827.981461, 713.556496, 685.024378,
- 684.582020, 684.582020, 684.582020, 684.582020, 682.825198, 569.510056,
-};
-
-static const double interp_dgrid_surf[65 * 18] = {
- 10.650434, 12.204694, 12.040917, 11.843008, 11.845578, 12.051535, 12.103583,
- 12.136780, 12.266709, 12.299107, 12.299673, 12.303120, 12.316337, 12.293431,
- 12.092165, 11.602421, 11.141559, 8.864495, 12.770003, 14.634889, 14.437149,
- 14.199413, 14.202487, 14.449423, 14.511827, 14.551629, 14.707410, 14.746265,
- 14.747610, 14.753705, 14.762194, 14.699395, 14.390525, 13.690970, 12.874168,
- 10.367121, 12.832328, 14.790730, 14.503765, 14.236403, 14.239028, 14.486600,
- 14.549164, 14.589069, 14.745250, 14.784258, 14.788320, 14.801930, 14.762798,
- 14.499088, 14.021544, 13.469684, 12.661560, 10.108384, 12.950520, 15.264726,
- 14.621957, 14.238236, 14.239028, 14.486601, 14.549264, 14.589469, 14.745361,
- 14.784949, 14.791572, 14.798652, 14.660251, 14.119394, 13.651131, 12.935657,
- 12.176082, 9.228999, 12.979992, 15.382918, 14.651428, 14.238693, 14.239028,
- 14.486701, 14.555710, 14.615321, 14.751849, 14.787700, 14.797104, 14.743189,
- 14.475057, 13.944406, 13.450468, 12.687876, 11.824993, 8.906683, 12.980449,
- 15.384750, 14.651885, 14.238700, 14.239028, 14.487102, 14.581562, 14.718998,
- 14.777721, 14.788445, 14.778661, 14.582790, 14.099785, 13.649637, 12.935359,
- 12.201859, 10.891931, 8.482221, 12.980449, 15.384750, 14.651886, 14.238801,
- 14.239434, 14.487303, 14.588010, 14.744860, 14.784773, 14.786094, 14.735647,
- 14.455704, 13.939591, 13.450393, 12.687876, 11.849334, 10.476658, 8.043672,
- 12.980449, 15.384750, 14.651987, 14.245320, 14.265579, 14.493824, 14.588211,
- 14.745312, 14.787263, 14.775934, 14.582036, 14.099475, 13.649563, 12.935358,
- 12.201859, 10.911285, 9.730570, 6.696921, 12.980449, 15.384750, 14.652393,
- 14.271466, 14.370434, 14.520069, 14.589027, 14.746028, 14.785482, 14.735605,
- 14.455693, 13.939590, 13.450393, 12.687876, 11.849334, 10.494514, 9.195398,
- 6.215460, 12.980449, 15.384750, 14.652494, 14.277985, 14.396679, 14.533035,
- 14.615021, 14.754825, 14.775610, 14.582796, 14.099664, 13.649565, 12.935358,
- 12.201859, 10.911285, 9.747361, 7.779960, 5.617541, 12.980448, 15.384731,
- 14.652415, 14.278078, 14.397578, 14.559053, 14.718657, 14.776398, 14.747044,
- 14.504690, 13.951810, 13.450583, 12.687876, 11.849334, 10.494514, 9.210817,
- 7.210003, 5.164575, 12.980446, 15.383448, 14.647073, 14.277541, 14.403813,
- 14.569546, 14.744956, 14.765103, 14.629073, 14.296161, 13.698573, 12.936118,
- 12.201859, 10.911285, 9.747361, 7.790897, 6.322998, 3.931551, 12.981550,
- 15.376916, 14.615597, 14.274820, 14.437479, 14.575942, 14.707492, 14.734111,
- 14.515975, 14.000806, 13.462803, 12.688066, 11.849334, 10.494514, 9.210817,
- 7.219566, 5.781392, 3.486081, 12.991899, 15.376201, 14.579444, 14.296898,
- 14.473361, 14.522910, 14.491600, 14.543267, 14.288580, 13.700311, 12.936579,
- 12.201867, 10.911285, 9.747361, 7.790897, 6.331506, 4.480348, 2.923138,
- 13.019848, 15.383477, 14.582260, 14.385262, 14.452673, 14.436019, 14.238174,
- 14.255993, 13.977481, 13.532342, 12.705591, 11.849605, 10.494514, 9.210817,
- 7.219566, 5.789642, 4.018194, 2.766222, 13.028558, 15.315782, 14.439141,
- 14.326286, 14.452429, 14.311731, 14.033235, 13.922587, 13.665868, 13.207897,
- 12.274375, 10.912967, 9.747371, 7.790897, 6.331506, 4.488594, 3.454993,
- 2.692682, 12.992752, 15.321471, 14.409573, 14.236340, 14.322969, 14.049072,
- 13.764823, 13.479242, 13.250105, 12.759133, 12.019174, 10.532951, 9.211409,
- 7.219566, 5.789642, 4.026440, 3.298077, 2.674624, 12.945493, 15.276596,
- 14.315745, 14.026198, 14.085774, 13.844563, 13.447576, 12.964935, 12.735525,
- 12.288592, 11.511693, 9.900227, 7.793270, 6.331506, 4.488594, 3.463236,
- 3.224318, 2.672433, 12.757570, 15.056661, 14.095011, 13.722362, 13.812624,
- 13.608480, 13.021206, 12.367627, 11.937931, 11.581049, 10.599552, 9.247860,
- 7.220151, 5.789642, 4.026437, 3.305882, 3.191260, 2.615317, 12.581293,
- 14.824658, 13.909074, 13.496158, 13.491402, 13.221550, 12.514140, 11.677229,
- 10.936895, 10.619912, 9.634779, 7.763570, 6.331082, 4.488590, 3.462798,
- 3.216460, 3.076315, 2.373499, 12.283499, 14.455760, 13.890593, 13.427587,
- 13.183783, 12.763833, 11.861006, 10.740618, 9.820756, 9.354945, 8.669862,
- 7.123268, 5.787860, 4.025994, 3.290000, 3.084410, 2.810905, 2.222916,
- 12.010893, 14.300919, 13.986624, 13.484026, 13.025385, 12.224281, 11.064265,
- 9.631040, 8.594396, 8.003736, 7.561587, 6.274418, 4.466637, 3.446574,
- 3.102467, 2.816989, 2.598688, 1.951541, 11.581477, 13.831132, 13.632027,
- 13.380414, 12.807880, 11.665651, 10.218236, 8.562237, 7.222614, 6.611808,
- 6.261676, 5.402793, 3.938544, 3.174375, 2.818166, 2.602758, 2.213911,
- 1.434763, 11.050735, 12.893449, 12.363152, 12.712829, 12.012961, 10.887854,
- 9.109699, 7.421701, 5.965603, 5.272129, 4.991435, 4.423000, 3.369988,
- 2.800371, 2.593901, 2.217431, 1.670917, 1.215265, 10.641194, 11.766277,
- 10.777082, 10.972917, 10.689298, 9.701545, 7.719947, 6.145654, 4.872442,
- 4.099600, 3.880934, 3.514159, 2.786474, 2.368963, 2.162376, 1.673670,
- 1.450770, 1.185424, 10.071964, 11.107701, 9.172361, 8.551313, 8.412080,
- 7.641397, 6.174246, 4.853916, 3.904549, 3.246810, 2.959903, 2.785066,
- 2.240001, 1.793166, 1.585520, 1.449824, 1.405368, 1.168856, 9.213182,
- 9.173278, 7.219231, 6.242951, 5.626013, 5.768007, 4.908666, 3.809589,
- 3.115109, 2.617899, 2.274793, 2.172960, 1.838597, 1.505915, 1.414333,
- 1.392666, 1.338173, 1.105611, 7.365015, 7.471370, 5.622346, 4.520127,
- 3.936272, 4.208822, 3.623024, 2.977794, 2.450003, 2.097261, 1.824090,
- 1.643270, 1.473525, 1.351388, 1.327504, 1.323865, 1.307894, 1.088234,
- 6.198210, 6.580712, 4.682511, 3.416952, 2.941929, 2.766637, 2.650686,
- 2.315439, 1.925838, 1.659784, 1.464419, 1.252806, 1.162722, 1.197518,
- 1.199875, 1.197365, 1.194040, 0.995797, 5.402507, 5.055466, 3.728724,
- 2.624359, 2.165810, 1.943189, 1.918190, 1.738078, 1.516328, 1.290520,
- 1.155793, 1.015962, 0.881900, 0.807203, 0.754242, 0.743378, 0.740288,
- 0.614158, 3.937867, 3.862507, 2.884664, 2.088147, 1.648496, 1.473584,
- 1.340123, 1.291769, 1.165381, 1.000224, 0.893316, 0.821333, 0.691363,
- 0.610501, 0.586766, 0.583762, 0.577840, 0.468733, 3.104660, 3.181078,
- 2.420208, 1.747442, 1.297956, 1.109835, 0.970385, 0.943229, 0.876923,
- 0.777584, 0.678183, 0.628623, 0.553745, 0.523430, 0.519490, 0.514394,
- 0.492259, 0.403172, 2.593833, 2.533720, 2.010452, 1.480944, 1.060302,
- 0.846383, 0.738703, 0.673144, 0.658010, 0.592449, 0.518236, 0.470335,
- 0.425088, 0.393168, 0.378116, 0.355846, 0.275469, 0.213128, 2.176988,
- 2.089575, 1.671284, 1.225008, 0.895382, 0.672008, 0.566241, 0.496746,
- 0.488005, 0.449874, 0.400899, 0.354002, 0.318150, 0.281533, 0.238545,
- 0.224159, 0.202399, 0.160681, 1.874679, 1.769165, 1.430124, 1.068727,
- 0.780272, 0.557801, 0.441643, 0.377256, 0.352957, 0.338452, 0.304965,
- 0.273172, 0.240052, 0.208724, 0.193431, 0.190845, 0.185025, 0.138166,
- 1.590226, 1.502830, 1.193127, 0.917885, 0.670432, 0.474546, 0.355420,
- 0.292305, 0.259035, 0.249937, 0.232079, 0.208943, 0.181936, 0.160038,
- 0.152257, 0.151235, 0.149583, 0.120747, 1.331730, 1.255907, 1.012871,
- 0.778422, 0.578977, 0.412432, 0.293155, 0.231824, 0.197187, 0.183921,
- 0.174876, 0.157252, 0.140263, 0.127050, 0.110244, 0.105041, 0.104323,
- 0.086944, 1.153994, 1.118771, 0.822355, 0.612321, 0.478249, 0.348222,
- 0.247408, 0.186141, 0.152714, 0.135445, 0.129810, 0.119994, 0.115619,
- 0.131626, 0.095612, 0.079343, 0.077502, 0.064550, 0.946317, 0.925894,
- 0.677969, 0.499906, 0.397101, 0.297931, 0.214467, 0.152333, 0.120731,
- 0.102686, 0.095062, 0.090361, 0.122319, 0.240194, 0.112687, 0.070690,
- 0.070461, 0.054194, 0.824155, 0.787241, 0.581856, 0.419228, 0.313167,
- 0.245582, 0.183500, 0.128101, 0.096577, 0.080267, 0.071022, 0.066851,
- 0.085754, 0.154163, 0.075884, 0.052401, 0.054270, 0.026656, 0.716310,
- 0.671378, 0.489580, 0.349569, 0.256155, 0.206343, 0.157853, 0.111950,
- 0.079271, 0.062518, 0.053441, 0.049660, 0.051400, 0.063778, 0.039993,
- 0.029133, 0.023382, 0.013725, 0.614125, 0.579096, 0.417126, 0.299465,
- 0.217849, 0.165515, 0.129040, 0.093127, 0.065612, 0.049543, 0.041429,
- 0.036850, 0.034416, 0.033989, 0.024216, 0.017377, 0.014833, 0.011987,
- 0.520407, 0.487239, 0.349473, 0.251741, 0.184897, 0.135813, 0.107098,
- 0.073607, 0.053938, 0.040531, 0.032931, 0.028876, 0.025759, 0.022168,
- 0.016739, 0.014638, 0.014333, 0.011947, 0.449954, 0.415124, 0.299452,
- 0.216942, 0.158874, 0.115334, 0.088821, 0.060105, 0.042610, 0.032566,
- 0.026903, 0.023123, 0.019913, 0.016835, 0.014306, 0.013625, 0.013535,
- 0.011284, 0.377618, 0.347773, 0.251741, 0.184839, 0.132857, 0.095439,
- 0.070462, 0.052244, 0.036078, 0.026025, 0.021518, 0.018487, 0.015361,
- 0.012905, 0.011470, 0.010569, 0.010283, 0.008297, 0.319953, 0.297976,
- 0.216942, 0.158842, 0.113280, 0.080426, 0.057367, 0.041987, 0.030135,
- 0.022295, 0.017901, 0.015121, 0.012224, 0.010035, 0.009353, 0.009108,
- 0.008695, 0.006139, 0.267864, 0.250502, 0.184839, 0.132851, 0.095039,
- 0.068220, 0.049135, 0.035315, 0.025144, 0.018237, 0.013857, 0.012094,
- 0.009715, 0.007743, 0.006937, 0.006446, 0.006243, 0.004929, 0.230449,
- 0.215895, 0.158842, 0.113280, 0.080417, 0.057174, 0.041304, 0.029959,
- 0.021866, 0.015673, 0.012133, 0.010083, 0.007801, 0.006053, 0.005401,
- 0.003834, 0.003429, 0.002851, 0.193984, 0.183963, 0.132851, 0.095039,
- 0.068220, 0.049133, 0.035305, 0.025140, 0.018150, 0.013175, 0.010422,
- 0.008491, 0.006397, 0.004567, 0.003494, 0.002933, 0.002825, 0.002355,
- 0.167298, 0.158088, 0.113280, 0.080417, 0.057174, 0.041304, 0.029959,
- 0.021866, 0.015669, 0.011955, 0.009257, 0.007051, 0.005543, 0.003905,
- 0.002984, 0.002825, 0.002814, 0.002347, 0.143228, 0.132220, 0.095039,
- 0.068220, 0.049133, 0.035305, 0.025140, 0.018150, 0.013174, 0.010394,
- 0.008403, 0.006661, 0.005378, 0.003545, 0.002876, 0.002818, 0.002814,
- 0.002347, 0.122934, 0.112735, 0.080417, 0.057174, 0.041304, 0.029959,
- 0.021866, 0.015669, 0.011955, 0.009258, 0.007182, 0.006012, 0.003762,
- 0.002866, 0.002739, 0.002788, 0.002810, 0.002347, 0.101934, 0.094569,
- 0.068220, 0.049133, 0.035305, 0.025140, 0.018150, 0.013174, 0.010394,
- 0.008405, 0.006797, 0.005845, 0.003333, 0.002703, 0.002695, 0.002723,
- 0.002781, 0.002343, 0.086702, 0.080014, 0.057174, 0.041304, 0.029959,
- 0.021866, 0.015669, 0.011955, 0.009258, 0.007190, 0.006533, 0.005839,
- 0.003326, 0.002700, 0.002690, 0.002694, 0.002716, 0.002314, 0.073040,
- 0.067886, 0.049133, 0.035305, 0.025140, 0.018150, 0.013174, 0.010394,
- 0.008405, 0.006807, 0.006468, 0.005831, 0.003325, 0.002700, 0.002690,
- 0.002690, 0.002687, 0.002253, 0.061685, 0.056890, 0.041304, 0.029959,
- 0.021866, 0.015669, 0.011955, 0.009258, 0.007190, 0.006542, 0.006360,
- 0.005416, 0.003221, 0.002698, 0.002690, 0.002690, 0.002683, 0.002238,
- 0.052465, 0.048894, 0.035305, 0.025140, 0.018150, 0.013174, 0.010394,
- 0.008405, 0.006807, 0.006472, 0.005943, 0.003748, 0.002805, 0.002692,
- 0.002690, 0.002690, 0.002683, 0.002238, 0.043838, 0.041101, 0.029959,
- 0.021866, 0.015669, 0.011955, 0.009258, 0.007190, 0.006543, 0.006465,
- 0.005839, 0.003333, 0.002702, 0.002690, 0.002690, 0.002690, 0.002683,
- 0.002238, 0.037824, 0.035133, 0.025140, 0.018150, 0.013174, 0.010394,
- 0.008405, 0.006807, 0.006480, 0.006464, 0.005838, 0.003326, 0.002700,
- 0.002690, 0.002690, 0.002690, 0.002683, 0.002238, 0.031865, 0.029815,
- 0.021866, 0.015668, 0.011955, 0.009258, 0.007190, 0.006543, 0.006475,
- 0.006462, 0.005831, 0.003325, 0.002700, 0.002690, 0.002690, 0.002690,
- 0.002683, 0.002238, 0.027150, 0.025016, 0.018128, 0.013083, 0.010371,
- 0.008405, 0.006807, 0.006480, 0.006472, 0.006359, 0.005416, 0.003221,
- 0.002698, 0.002690, 0.002690, 0.002690, 0.002683, 0.002238, 0.023094,
- 0.021760, 0.015577, 0.011590, 0.009167, 0.007188, 0.006543, 0.006475,
- 0.006466, 0.005943, 0.003748, 0.002805, 0.002692, 0.002690, 0.002690,
- 0.002690, 0.002683, 0.002238, 0.019269, 0.018038, 0.013060, 0.010280,
- 0.008382, 0.006806, 0.006480, 0.006474, 0.006464, 0.005839, 0.003333,
- 0.002702, 0.002690, 0.002690, 0.002690, 0.002690, 0.002683, 0.002238,
- 0.016874, 0.015472, 0.011566, 0.009148, 0.007171, 0.006527, 0.006458,
- 0.006457, 0.006447, 0.005823, 0.003318, 0.002693, 0.002683, 0.002683,
- 0.002683, 0.002683, 0.002676, 0.002232, 0.011968, 0.011056, 0.008762,
- 0.007219, 0.005717, 0.005391, 0.005386, 0.005386, 0.005377, 0.004856,
- 0.002767, 0.002246, 0.002238, 0.002238, 0.002238, 0.002238, 0.002232,
- 0.001862,
-};
-
-void av1_model_rd_surffit(double xm, double yl, double *rate_f,
- double *dist_f) {
- const double x_start = -0.5;
- const double x_end = 16.5;
- const double x_step = 1;
- const double y_start = -15.5;
- const double y_end = 16.5;
- const double y_step = 0.5;
- const double epsilon = 1e-6;
- const int stride = (int)rint((x_end - x_start) / x_step) + 1;
- (void)y_end;
-
- xm = AOMMAX(xm, x_start + x_step + epsilon);
- xm = AOMMIN(xm, x_end - x_step - epsilon);
- yl = AOMMAX(yl, y_start + y_step + epsilon);
- yl = AOMMIN(yl, y_end - y_step - epsilon);
-
- const double y = (yl - y_start) / y_step;
- const double x = (xm - x_start) / x_step;
-
- const int yi = (int)floor(y);
- const int xi = (int)floor(x);
- assert(xi > 0);
- assert(yi > 0);
-
- const double yo = y - yi;
- const double xo = x - xi;
- const double *prate = &interp_rgrid_surf[(yi - 1) * stride + (xi - 1)];
- const double *pdist = &interp_dgrid_surf[(yi - 1) * stride + (xi - 1)];
- *rate_f = interp_bicubic(prate, stride, xo, yo);
- *dist_f = interp_bicubic(pdist, stride, xo, yo);
-}
-
-static const double interp_rgrid_curv[65] = {
- 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
- 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
- 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 4.759876,
- 8.132086, 13.651828, 21.908271, 33.522054, 48.782376, 71.530983,
- 106.728649, 151.942795, 199.893011, 242.850965, 283.933923, 322.154203,
- 360.684608, 394.801656, 426.879017, 460.234313, 484.103987, 508.261495,
- 536.486763, 558.196737, 586.285894, 614.764511, 634.166333, 647.706472,
- 658.211478, 681.360407, 701.052141, 727.007310, 768.663973, 804.407660,
- 884.627751, 1065.658131, 1238.875214, 1440.185176, 1678.377931, 1962.243390,
- 2300.571467, 2702.152072, 3175.775119, 3730.230519, 4374.308184, 5116.798028,
- 5966.489961, 6932.173897, 8022.639747, 9246.677424, 10613.076839,
-};
-
-static const double interp_dgrid_curv[65] = {
- 14.604855, 14.604855, 14.604855, 14.604855, 14.604855, 14.604855, 14.604855,
- 14.604855, 14.604855, 14.604855, 14.604855, 14.604855, 14.555776, 14.533692,
- 14.439920, 14.257791, 13.977230, 13.623229, 13.064884, 12.355411, 11.560773,
- 10.728960, 9.861975, 8.643612, 6.916021, 5.154769, 3.734940, 2.680051,
- 1.925506, 1.408410, 1.042223, 0.767641, 0.565392, 0.420116, 0.310427,
- 0.231711, 0.172999, 0.128293, 0.094992, 0.072171, 0.052972, 0.039354,
- 0.029555, 0.022857, 0.016832, 0.013297, 0.000000, 0.000000, 0.000000,
- 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
- 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
- 0.000000, 0.000000,
-};
-
-void av1_model_rd_curvfit(double xqr, double *rate_f, double *distbysse_f) {
- const double x_start = -15.5;
- const double x_end = 16.5;
- const double x_step = 0.5;
- const double epsilon = 1e-6;
- (void)x_end;
-
- xqr = AOMMAX(xqr, x_start + x_step + epsilon);
- xqr = AOMMIN(xqr, x_end - x_step - epsilon);
- const double x = (xqr - x_start) / x_step;
- const int xi = (int)floor(x);
- const double xo = x - xi;
-
- assert(xi > 0);
-
- const double *prate = &interp_rgrid_curv[(xi - 1)];
- const double *pdist = &interp_dgrid_curv[(xi - 1)];
- *rate_f = interp_cubic(prate, xo);
- *distbysse_f = interp_cubic(pdist, xo);
-}
-
-static void get_entropy_contexts_plane(BLOCK_SIZE plane_bsize,
- const struct macroblockd_plane *pd,
- ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],
- ENTROPY_CONTEXT t_left[MAX_MIB_SIZE]) {
- const int num_4x4_w = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
- const int num_4x4_h = block_size_high[plane_bsize] >> tx_size_high_log2[0];
- const ENTROPY_CONTEXT *const above = pd->above_context;
- const ENTROPY_CONTEXT *const left = pd->left_context;
-
- memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
- memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
-}
-
-void av1_get_entropy_contexts(BLOCK_SIZE bsize,
- const struct macroblockd_plane *pd,
- ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],
- ENTROPY_CONTEXT t_left[MAX_MIB_SIZE]) {
- const BLOCK_SIZE plane_bsize =
- get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
- get_entropy_contexts_plane(plane_bsize, pd, t_above, t_left);
-}
-
-void av1_mv_pred(const AV1_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer,
- int ref_y_stride, int ref_frame, BLOCK_SIZE block_size) {
- int i;
- int zero_seen = 0;
- int best_sad = INT_MAX;
- int this_sad = INT_MAX;
- int max_mv = 0;
- uint8_t *src_y_ptr = x->plane[0].src.buf;
- uint8_t *ref_y_ptr;
- MV pred_mv[MAX_MV_REF_CANDIDATES + 1];
- int num_mv_refs = 0;
- const MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, NONE_FRAME };
- const int_mv ref_mv =
- av1_get_ref_mv_from_stack(0, ref_frames, 0, x->mbmi_ext);
- const int_mv ref_mv1 =
- av1_get_ref_mv_from_stack(0, ref_frames, 1, x->mbmi_ext);
-
- pred_mv[num_mv_refs++] = ref_mv.as_mv;
- if (ref_mv.as_int != ref_mv1.as_int) {
- pred_mv[num_mv_refs++] = ref_mv1.as_mv;
- }
- if (cpi->sf.adaptive_motion_search && block_size < x->max_partition_size)
- pred_mv[num_mv_refs++] = x->pred_mv[ref_frame];
-
- assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0])));
-
- // Get the sad for each candidate reference mv.
- for (i = 0; i < num_mv_refs; ++i) {
- const MV *this_mv = &pred_mv[i];
- int fp_row, fp_col;
- fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3;
- fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3;
- max_mv = AOMMAX(max_mv, AOMMAX(abs(this_mv->row), abs(this_mv->col)) >> 3);
-
- if (fp_row == 0 && fp_col == 0 && zero_seen) continue;
- zero_seen |= (fp_row == 0 && fp_col == 0);
-
- ref_y_ptr = &ref_y_buffer[ref_y_stride * fp_row + fp_col];
- // Find sad for current vector.
- this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
- ref_y_ptr, ref_y_stride);
- // Note if it is the best so far.
- if (this_sad < best_sad) {
- best_sad = this_sad;
- }
- }
-
- // Note the index of the mv that worked best in the reference list.
- x->max_mv_context[ref_frame] = max_mv;
- x->pred_mv_sad[ref_frame] = best_sad;
-}
-
-void av1_setup_pred_block(const MACROBLOCKD *xd,
- struct buf_2d dst[MAX_MB_PLANE],
- const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
- const struct scale_factors *scale,
- const struct scale_factors *scale_uv,
- const int num_planes) {
- int i;
-
- dst[0].buf = src->y_buffer;
- dst[0].stride = src->y_stride;
- dst[1].buf = src->u_buffer;
- dst[2].buf = src->v_buffer;
- dst[1].stride = dst[2].stride = src->uv_stride;
-
- for (i = 0; i < num_planes; ++i) {
- setup_pred_plane(dst + i, xd->mi[0]->sb_type, dst[i].buf,
- i ? src->uv_crop_width : src->y_crop_width,
- i ? src->uv_crop_height : src->y_crop_height,
- dst[i].stride, mi_row, mi_col, i ? scale_uv : scale,
- xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
- }
-}
-
-int av1_raster_block_offset(BLOCK_SIZE plane_bsize, int raster_block,
- int stride) {
- const int bw = mi_size_wide_log2[plane_bsize];
- const int y = 4 * (raster_block >> bw);
- const int x = 4 * (raster_block & ((1 << bw) - 1));
- return y * stride + x;
-}
-
-int16_t *av1_raster_block_offset_int16(BLOCK_SIZE plane_bsize, int raster_block,
- int16_t *base) {
- const int stride = block_size_wide[plane_bsize];
- return base + av1_raster_block_offset(plane_bsize, raster_block, stride);
-}
-
-YV12_BUFFER_CONFIG *av1_get_scaled_ref_frame(const AV1_COMP *cpi,
- int ref_frame) {
- const AV1_COMMON *const cm = &cpi->common;
- const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1];
- const int ref_idx = get_ref_frame_buf_idx(cpi, ref_frame);
- return (scaled_idx != ref_idx && scaled_idx != INVALID_IDX)
- ? &cm->buffer_pool->frame_bufs[scaled_idx].buf
- : NULL;
-}
-
-int av1_get_switchable_rate(const AV1_COMMON *const cm, MACROBLOCK *x,
- const MACROBLOCKD *xd) {
- if (cm->interp_filter == SWITCHABLE) {
- const MB_MODE_INFO *const mbmi = xd->mi[0];
- int inter_filter_cost = 0;
- int dir;
-
- for (dir = 0; dir < 2; ++dir) {
- const int ctx = av1_get_pred_context_switchable_interp(xd, dir);
- const InterpFilter filter =
- av1_extract_interp_filter(mbmi->interp_filters, dir);
- inter_filter_cost += x->switchable_interp_costs[ctx][filter];
- }
- return SWITCHABLE_INTERP_RATE_FACTOR * inter_filter_cost;
- } else {
- return 0;
- }
-}
-
-void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
- int i;
- RD_OPT *const rd = &cpi->rd;
- SPEED_FEATURES *const sf = &cpi->sf;
-
- // Set baseline threshold values.
- for (i = 0; i < MAX_MODES; ++i) rd->thresh_mult[i] = cpi->oxcf.mode == 0;
-
- if (sf->adaptive_rd_thresh) {
- rd->thresh_mult[THR_NEARESTMV] = 300;
- rd->thresh_mult[THR_NEARESTL2] = 300;
- rd->thresh_mult[THR_NEARESTL3] = 300;
- rd->thresh_mult[THR_NEARESTB] = 300;
- rd->thresh_mult[THR_NEARESTA2] = 300;
- rd->thresh_mult[THR_NEARESTA] = 300;
- rd->thresh_mult[THR_NEARESTG] = 300;
- } else {
- rd->thresh_mult[THR_NEARESTMV] = 0;
- rd->thresh_mult[THR_NEARESTL2] = 0;
- rd->thresh_mult[THR_NEARESTL3] = 0;
- rd->thresh_mult[THR_NEARESTB] = 0;
- rd->thresh_mult[THR_NEARESTA2] = 0;
- rd->thresh_mult[THR_NEARESTA] = 0;
- rd->thresh_mult[THR_NEARESTG] = 0;
- }
-
- rd->thresh_mult[THR_NEWMV] += 1000;
- rd->thresh_mult[THR_NEWL2] += 1000;
- rd->thresh_mult[THR_NEWL3] += 1000;
- rd->thresh_mult[THR_NEWB] += 1000;
- rd->thresh_mult[THR_NEWA2] = 1000;
- rd->thresh_mult[THR_NEWA] += 1000;
- rd->thresh_mult[THR_NEWG] += 1000;
-
- rd->thresh_mult[THR_NEARMV] += 1000;
- rd->thresh_mult[THR_NEARL2] += 1000;
- rd->thresh_mult[THR_NEARL3] += 1000;
- rd->thresh_mult[THR_NEARB] += 1000;
- rd->thresh_mult[THR_NEARA2] = 1000;
- rd->thresh_mult[THR_NEARA] += 1000;
- rd->thresh_mult[THR_NEARG] += 1000;
-
- rd->thresh_mult[THR_GLOBALMV] += 2000;
- rd->thresh_mult[THR_GLOBALL2] += 2000;
- rd->thresh_mult[THR_GLOBALL3] += 2000;
- rd->thresh_mult[THR_GLOBALB] += 2000;
- rd->thresh_mult[THR_GLOBALA2] = 2000;
- rd->thresh_mult[THR_GLOBALG] += 2000;
- rd->thresh_mult[THR_GLOBALA] += 2000;
-
- rd->thresh_mult[THR_COMP_NEAREST_NEARESTLA] += 1000;
- rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2A] += 1000;
- rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3A] += 1000;
- rd->thresh_mult[THR_COMP_NEAREST_NEARESTGA] += 1000;
- rd->thresh_mult[THR_COMP_NEAREST_NEARESTLB] += 1000;
- rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2B] += 1000;
- rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3B] += 1000;
- rd->thresh_mult[THR_COMP_NEAREST_NEARESTGB] += 1000;
- rd->thresh_mult[THR_COMP_NEAREST_NEARESTLA2] += 1000;
- rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2A2] += 1000;
- rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3A2] += 1000;
- rd->thresh_mult[THR_COMP_NEAREST_NEARESTGA2] += 1000;
-
- rd->thresh_mult[THR_COMP_NEAREST_NEARESTLL2] += 2000;
- rd->thresh_mult[THR_COMP_NEAREST_NEARESTLL3] += 2000;
- rd->thresh_mult[THR_COMP_NEAREST_NEARESTLG] += 2000;
- rd->thresh_mult[THR_COMP_NEAREST_NEARESTBA] += 2000;
-
- rd->thresh_mult[THR_COMP_NEAR_NEARLA] += 1200;
- rd->thresh_mult[THR_COMP_NEAREST_NEWLA] += 1500;
- rd->thresh_mult[THR_COMP_NEW_NEARESTLA] += 1500;
- rd->thresh_mult[THR_COMP_NEAR_NEWLA] += 1700;
- rd->thresh_mult[THR_COMP_NEW_NEARLA] += 1700;
- rd->thresh_mult[THR_COMP_NEW_NEWLA] += 2000;
- rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLA] += 2500;
-
- rd->thresh_mult[THR_COMP_NEAR_NEARL2A] += 1200;
- rd->thresh_mult[THR_COMP_NEAREST_NEWL2A] += 1500;
- rd->thresh_mult[THR_COMP_NEW_NEARESTL2A] += 1500;
- rd->thresh_mult[THR_COMP_NEAR_NEWL2A] += 1700;
- rd->thresh_mult[THR_COMP_NEW_NEARL2A] += 1700;
- rd->thresh_mult[THR_COMP_NEW_NEWL2A] += 2000;
- rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2A] += 2500;
-
- rd->thresh_mult[THR_COMP_NEAR_NEARL3A] += 1200;
- rd->thresh_mult[THR_COMP_NEAREST_NEWL3A] += 1500;
- rd->thresh_mult[THR_COMP_NEW_NEARESTL3A] += 1500;
- rd->thresh_mult[THR_COMP_NEAR_NEWL3A] += 1700;
- rd->thresh_mult[THR_COMP_NEW_NEARL3A] += 1700;
- rd->thresh_mult[THR_COMP_NEW_NEWL3A] += 2000;
- rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3A] += 2500;
-
- rd->thresh_mult[THR_COMP_NEAR_NEARGA] += 1200;
- rd->thresh_mult[THR_COMP_NEAREST_NEWGA] += 1500;
- rd->thresh_mult[THR_COMP_NEW_NEARESTGA] += 1500;
- rd->thresh_mult[THR_COMP_NEAR_NEWGA] += 1700;
- rd->thresh_mult[THR_COMP_NEW_NEARGA] += 1700;
- rd->thresh_mult[THR_COMP_NEW_NEWGA] += 2000;
- rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGA] += 2500;
-
- rd->thresh_mult[THR_COMP_NEAR_NEARLB] += 1200;
- rd->thresh_mult[THR_COMP_NEAREST_NEWLB] += 1500;
- rd->thresh_mult[THR_COMP_NEW_NEARESTLB] += 1500;
- rd->thresh_mult[THR_COMP_NEAR_NEWLB] += 1700;
- rd->thresh_mult[THR_COMP_NEW_NEARLB] += 1700;
- rd->thresh_mult[THR_COMP_NEW_NEWLB] += 2000;
- rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLB] += 2500;
-
- rd->thresh_mult[THR_COMP_NEAR_NEARL2B] += 1200;
- rd->thresh_mult[THR_COMP_NEAREST_NEWL2B] += 1500;
- rd->thresh_mult[THR_COMP_NEW_NEARESTL2B] += 1500;
- rd->thresh_mult[THR_COMP_NEAR_NEWL2B] += 1700;
- rd->thresh_mult[THR_COMP_NEW_NEARL2B] += 1700;
- rd->thresh_mult[THR_COMP_NEW_NEWL2B] += 2000;
- rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2B] += 2500;
-
- rd->thresh_mult[THR_COMP_NEAR_NEARL3B] += 1200;
- rd->thresh_mult[THR_COMP_NEAREST_NEWL3B] += 1500;
- rd->thresh_mult[THR_COMP_NEW_NEARESTL3B] += 1500;
- rd->thresh_mult[THR_COMP_NEAR_NEWL3B] += 1700;
- rd->thresh_mult[THR_COMP_NEW_NEARL3B] += 1700;
- rd->thresh_mult[THR_COMP_NEW_NEWL3B] += 2000;
- rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3B] += 2500;
-
- rd->thresh_mult[THR_COMP_NEAR_NEARGB] += 1200;
- rd->thresh_mult[THR_COMP_NEAREST_NEWGB] += 1500;
- rd->thresh_mult[THR_COMP_NEW_NEARESTGB] += 1500;
- rd->thresh_mult[THR_COMP_NEAR_NEWGB] += 1700;
- rd->thresh_mult[THR_COMP_NEW_NEARGB] += 1700;
- rd->thresh_mult[THR_COMP_NEW_NEWGB] += 2000;
- rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGB] += 2500;
-
- rd->thresh_mult[THR_COMP_NEAR_NEARLA2] += 1200;
- rd->thresh_mult[THR_COMP_NEAREST_NEWLA2] += 1500;
- rd->thresh_mult[THR_COMP_NEW_NEARESTLA2] += 1500;
- rd->thresh_mult[THR_COMP_NEAR_NEWLA2] += 1700;
- rd->thresh_mult[THR_COMP_NEW_NEARLA2] += 1700;
- rd->thresh_mult[THR_COMP_NEW_NEWLA2] += 2000;
- rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLA2] += 2500;
-
- rd->thresh_mult[THR_COMP_NEAR_NEARL2A2] += 1200;
- rd->thresh_mult[THR_COMP_NEAREST_NEWL2A2] += 1500;
- rd->thresh_mult[THR_COMP_NEW_NEARESTL2A2] += 1500;
- rd->thresh_mult[THR_COMP_NEAR_NEWL2A2] += 1700;
- rd->thresh_mult[THR_COMP_NEW_NEARL2A2] += 1700;
- rd->thresh_mult[THR_COMP_NEW_NEWL2A2] += 2000;
- rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2A2] += 2500;
-
- rd->thresh_mult[THR_COMP_NEAR_NEARL3A2] += 1200;
- rd->thresh_mult[THR_COMP_NEAREST_NEWL3A2] += 1500;
- rd->thresh_mult[THR_COMP_NEW_NEARESTL3A2] += 1500;
- rd->thresh_mult[THR_COMP_NEAR_NEWL3A2] += 1700;
- rd->thresh_mult[THR_COMP_NEW_NEARL3A2] += 1700;
- rd->thresh_mult[THR_COMP_NEW_NEWL3A2] += 2000;
- rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3A2] += 2500;
-
- rd->thresh_mult[THR_COMP_NEAR_NEARGA2] += 1200;
- rd->thresh_mult[THR_COMP_NEAREST_NEWGA2] += 1500;
- rd->thresh_mult[THR_COMP_NEW_NEARESTGA2] += 1500;
- rd->thresh_mult[THR_COMP_NEAR_NEWGA2] += 1700;
- rd->thresh_mult[THR_COMP_NEW_NEARGA2] += 1700;
- rd->thresh_mult[THR_COMP_NEW_NEWGA2] += 2000;
- rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGA2] += 2500;
-
- rd->thresh_mult[THR_COMP_NEAR_NEARLL2] += 1600;
- rd->thresh_mult[THR_COMP_NEAREST_NEWLL2] += 2000;
- rd->thresh_mult[THR_COMP_NEW_NEARESTLL2] += 2000;
- rd->thresh_mult[THR_COMP_NEAR_NEWLL2] += 2200;
- rd->thresh_mult[THR_COMP_NEW_NEARLL2] += 2200;
- rd->thresh_mult[THR_COMP_NEW_NEWLL2] += 2400;
- rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLL2] += 3200;
-
- rd->thresh_mult[THR_COMP_NEAR_NEARLL3] += 1600;
- rd->thresh_mult[THR_COMP_NEAREST_NEWLL3] += 2000;
- rd->thresh_mult[THR_COMP_NEW_NEARESTLL3] += 2000;
- rd->thresh_mult[THR_COMP_NEAR_NEWLL3] += 2200;
- rd->thresh_mult[THR_COMP_NEW_NEARLL3] += 2200;
- rd->thresh_mult[THR_COMP_NEW_NEWLL3] += 2400;
- rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLL3] += 3200;
-
- rd->thresh_mult[THR_COMP_NEAR_NEARLG] += 1600;
- rd->thresh_mult[THR_COMP_NEAREST_NEWLG] += 2000;
- rd->thresh_mult[THR_COMP_NEW_NEARESTLG] += 2000;
- rd->thresh_mult[THR_COMP_NEAR_NEWLG] += 2200;
- rd->thresh_mult[THR_COMP_NEW_NEARLG] += 2200;
- rd->thresh_mult[THR_COMP_NEW_NEWLG] += 2400;
- rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLG] += 3200;
-
- rd->thresh_mult[THR_COMP_NEAR_NEARBA] += 1600;
- rd->thresh_mult[THR_COMP_NEAREST_NEWBA] += 2000;
- rd->thresh_mult[THR_COMP_NEW_NEARESTBA] += 2000;
- rd->thresh_mult[THR_COMP_NEAR_NEWBA] += 2200;
- rd->thresh_mult[THR_COMP_NEW_NEARBA] += 2200;
- rd->thresh_mult[THR_COMP_NEW_NEWBA] += 2400;
- rd->thresh_mult[THR_COMP_GLOBAL_GLOBALBA] += 3200;
-
- rd->thresh_mult[THR_DC] += 1000;
- rd->thresh_mult[THR_PAETH] += 1000;
- rd->thresh_mult[THR_SMOOTH] += 2000;
- rd->thresh_mult[THR_SMOOTH_V] += 2000;
- rd->thresh_mult[THR_SMOOTH_H] += 2000;
- rd->thresh_mult[THR_H_PRED] += 2000;
- rd->thresh_mult[THR_V_PRED] += 2000;
- rd->thresh_mult[THR_D135_PRED] += 2500;
- rd->thresh_mult[THR_D203_PRED] += 2500;
- rd->thresh_mult[THR_D157_PRED] += 2500;
- rd->thresh_mult[THR_D67_PRED] += 2500;
- rd->thresh_mult[THR_D113_PRED] += 2500;
- rd->thresh_mult[THR_D45_PRED] += 2500;
-}
-
-void av1_set_rd_speed_thresholds_sub8x8(AV1_COMP *cpi) {
- static const int thresh_mult[MAX_REFS] = { 2500, 2500, 2500, 2500, 2500,
- 2500, 2500, 4500, 4500, 4500,
- 4500, 4500, 4500, 4500, 4500,
- 4500, 4500, 4500, 4500, 2500 };
- RD_OPT *const rd = &cpi->rd;
- memcpy(rd->thresh_mult_sub8x8, thresh_mult, sizeof(thresh_mult));
-}
-
-void av1_update_rd_thresh_fact(const AV1_COMMON *const cm,
- int (*factor_buf)[MAX_MODES], int rd_thresh,
- int bsize, int best_mode_index) {
- if (rd_thresh > 0) {
- const int top_mode = MAX_MODES;
- int mode;
- for (mode = 0; mode < top_mode; ++mode) {
- const BLOCK_SIZE min_size = AOMMAX(bsize - 1, BLOCK_4X4);
- const BLOCK_SIZE max_size =
- AOMMIN(bsize + 2, (int)cm->seq_params.sb_size);
- BLOCK_SIZE bs;
- for (bs = min_size; bs <= max_size; ++bs) {
- int *const fact = &factor_buf[bs][mode];
- if (mode == best_mode_index) {
- *fact -= (*fact >> 4);
- } else {
- *fact = AOMMIN(*fact + RD_THRESH_INC, rd_thresh * RD_THRESH_MAX_FACT);
- }
- }
- }
- }
-}
-
-int av1_get_intra_cost_penalty(int qindex, int qdelta,
- aom_bit_depth_t bit_depth) {
- const int q = av1_dc_quant_Q3(qindex, qdelta, bit_depth);
- switch (bit_depth) {
- case AOM_BITS_8: return 20 * q;
- case AOM_BITS_10: return 5 * q;
- case AOM_BITS_12: return ROUND_POWER_OF_TWO(5 * q, 2);
- default:
- assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
- return -1;
- }
-}
diff --git a/third_party/aom/av1/encoder/rd.h b/third_party/aom/av1/encoder/rd.h
deleted file mode 100644
index 755b61df5..000000000
--- a/third_party/aom/av1/encoder/rd.h
+++ /dev/null
@@ -1,464 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_RD_H_
-#define AOM_AV1_ENCODER_RD_H_
-
-#include <limits.h>
-
-#include "av1/common/blockd.h"
-
-#include "av1/encoder/block.h"
-#include "av1/encoder/context_tree.h"
-#include "av1/encoder/cost.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define RDDIV_BITS 7
-#define RD_EPB_SHIFT 6
-
-#define RDCOST(RM, R, D) \
- (ROUND_POWER_OF_TWO(((int64_t)(R)) * (RM), AV1_PROB_COST_SHIFT) + \
- ((D) * (1 << RDDIV_BITS)))
-
-#define RDCOST_DBL(RM, R, D) \
- (((((double)(R)) * (RM)) / (double)(1 << AV1_PROB_COST_SHIFT)) + \
- ((double)(D) * (1 << RDDIV_BITS)))
-
-#define QIDX_SKIP_THRESH 115
-
-#define MV_COST_WEIGHT 108
-#define MV_COST_WEIGHT_SUB 120
-
-#define RD_THRESH_MAX_FACT 64
-#define RD_THRESH_INC 1
-
-// Factor to weigh the rate for switchable interp filters.
-#define SWITCHABLE_INTERP_RATE_FACTOR 1
-
-// This enumerator type needs to be kept aligned with the mode order in
-// const MODE_DEFINITION av1_mode_order[MAX_MODES] used in the rd code.
-typedef enum {
- THR_NEARESTMV,
- THR_NEARESTL2,
- THR_NEARESTL3,
- THR_NEARESTB,
- THR_NEARESTA2,
- THR_NEARESTA,
- THR_NEARESTG,
-
- THR_NEWMV,
- THR_NEWL2,
- THR_NEWL3,
- THR_NEWB,
- THR_NEWA2,
- THR_NEWA,
- THR_NEWG,
-
- THR_NEARMV,
- THR_NEARL2,
- THR_NEARL3,
- THR_NEARB,
- THR_NEARA2,
- THR_NEARA,
- THR_NEARG,
-
- THR_GLOBALMV,
- THR_GLOBALL2,
- THR_GLOBALL3,
- THR_GLOBALB,
- THR_GLOBALA2,
- THR_GLOBALA,
- THR_GLOBALG,
-
- THR_COMP_NEAREST_NEARESTLA,
- THR_COMP_NEAREST_NEARESTL2A,
- THR_COMP_NEAREST_NEARESTL3A,
- THR_COMP_NEAREST_NEARESTGA,
- THR_COMP_NEAREST_NEARESTLB,
- THR_COMP_NEAREST_NEARESTL2B,
- THR_COMP_NEAREST_NEARESTL3B,
- THR_COMP_NEAREST_NEARESTGB,
- THR_COMP_NEAREST_NEARESTLA2,
- THR_COMP_NEAREST_NEARESTL2A2,
- THR_COMP_NEAREST_NEARESTL3A2,
- THR_COMP_NEAREST_NEARESTGA2,
- THR_COMP_NEAREST_NEARESTLL2,
- THR_COMP_NEAREST_NEARESTLL3,
- THR_COMP_NEAREST_NEARESTLG,
- THR_COMP_NEAREST_NEARESTBA,
-
- THR_COMP_NEAR_NEARLA,
- THR_COMP_NEW_NEARESTLA,
- THR_COMP_NEAREST_NEWLA,
- THR_COMP_NEW_NEARLA,
- THR_COMP_NEAR_NEWLA,
- THR_COMP_NEW_NEWLA,
- THR_COMP_GLOBAL_GLOBALLA,
-
- THR_COMP_NEAR_NEARL2A,
- THR_COMP_NEW_NEARESTL2A,
- THR_COMP_NEAREST_NEWL2A,
- THR_COMP_NEW_NEARL2A,
- THR_COMP_NEAR_NEWL2A,
- THR_COMP_NEW_NEWL2A,
- THR_COMP_GLOBAL_GLOBALL2A,
-
- THR_COMP_NEAR_NEARL3A,
- THR_COMP_NEW_NEARESTL3A,
- THR_COMP_NEAREST_NEWL3A,
- THR_COMP_NEW_NEARL3A,
- THR_COMP_NEAR_NEWL3A,
- THR_COMP_NEW_NEWL3A,
- THR_COMP_GLOBAL_GLOBALL3A,
-
- THR_COMP_NEAR_NEARGA,
- THR_COMP_NEW_NEARESTGA,
- THR_COMP_NEAREST_NEWGA,
- THR_COMP_NEW_NEARGA,
- THR_COMP_NEAR_NEWGA,
- THR_COMP_NEW_NEWGA,
- THR_COMP_GLOBAL_GLOBALGA,
-
- THR_COMP_NEAR_NEARLB,
- THR_COMP_NEW_NEARESTLB,
- THR_COMP_NEAREST_NEWLB,
- THR_COMP_NEW_NEARLB,
- THR_COMP_NEAR_NEWLB,
- THR_COMP_NEW_NEWLB,
- THR_COMP_GLOBAL_GLOBALLB,
-
- THR_COMP_NEAR_NEARL2B,
- THR_COMP_NEW_NEARESTL2B,
- THR_COMP_NEAREST_NEWL2B,
- THR_COMP_NEW_NEARL2B,
- THR_COMP_NEAR_NEWL2B,
- THR_COMP_NEW_NEWL2B,
- THR_COMP_GLOBAL_GLOBALL2B,
-
- THR_COMP_NEAR_NEARL3B,
- THR_COMP_NEW_NEARESTL3B,
- THR_COMP_NEAREST_NEWL3B,
- THR_COMP_NEW_NEARL3B,
- THR_COMP_NEAR_NEWL3B,
- THR_COMP_NEW_NEWL3B,
- THR_COMP_GLOBAL_GLOBALL3B,
-
- THR_COMP_NEAR_NEARGB,
- THR_COMP_NEW_NEARESTGB,
- THR_COMP_NEAREST_NEWGB,
- THR_COMP_NEW_NEARGB,
- THR_COMP_NEAR_NEWGB,
- THR_COMP_NEW_NEWGB,
- THR_COMP_GLOBAL_GLOBALGB,
-
- THR_COMP_NEAR_NEARLA2,
- THR_COMP_NEW_NEARESTLA2,
- THR_COMP_NEAREST_NEWLA2,
- THR_COMP_NEW_NEARLA2,
- THR_COMP_NEAR_NEWLA2,
- THR_COMP_NEW_NEWLA2,
- THR_COMP_GLOBAL_GLOBALLA2,
-
- THR_COMP_NEAR_NEARL2A2,
- THR_COMP_NEW_NEARESTL2A2,
- THR_COMP_NEAREST_NEWL2A2,
- THR_COMP_NEW_NEARL2A2,
- THR_COMP_NEAR_NEWL2A2,
- THR_COMP_NEW_NEWL2A2,
- THR_COMP_GLOBAL_GLOBALL2A2,
-
- THR_COMP_NEAR_NEARL3A2,
- THR_COMP_NEW_NEARESTL3A2,
- THR_COMP_NEAREST_NEWL3A2,
- THR_COMP_NEW_NEARL3A2,
- THR_COMP_NEAR_NEWL3A2,
- THR_COMP_NEW_NEWL3A2,
- THR_COMP_GLOBAL_GLOBALL3A2,
-
- THR_COMP_NEAR_NEARGA2,
- THR_COMP_NEW_NEARESTGA2,
- THR_COMP_NEAREST_NEWGA2,
- THR_COMP_NEW_NEARGA2,
- THR_COMP_NEAR_NEWGA2,
- THR_COMP_NEW_NEWGA2,
- THR_COMP_GLOBAL_GLOBALGA2,
-
- THR_COMP_NEAR_NEARLL2,
- THR_COMP_NEW_NEARESTLL2,
- THR_COMP_NEAREST_NEWLL2,
- THR_COMP_NEW_NEARLL2,
- THR_COMP_NEAR_NEWLL2,
- THR_COMP_NEW_NEWLL2,
- THR_COMP_GLOBAL_GLOBALLL2,
-
- THR_COMP_NEAR_NEARLL3,
- THR_COMP_NEW_NEARESTLL3,
- THR_COMP_NEAREST_NEWLL3,
- THR_COMP_NEW_NEARLL3,
- THR_COMP_NEAR_NEWLL3,
- THR_COMP_NEW_NEWLL3,
- THR_COMP_GLOBAL_GLOBALLL3,
-
- THR_COMP_NEAR_NEARLG,
- THR_COMP_NEW_NEARESTLG,
- THR_COMP_NEAREST_NEWLG,
- THR_COMP_NEW_NEARLG,
- THR_COMP_NEAR_NEWLG,
- THR_COMP_NEW_NEWLG,
- THR_COMP_GLOBAL_GLOBALLG,
-
- THR_COMP_NEAR_NEARBA,
- THR_COMP_NEW_NEARESTBA,
- THR_COMP_NEAREST_NEWBA,
- THR_COMP_NEW_NEARBA,
- THR_COMP_NEAR_NEWBA,
- THR_COMP_NEW_NEWBA,
- THR_COMP_GLOBAL_GLOBALBA,
-
- THR_DC,
- THR_PAETH,
- THR_SMOOTH,
- THR_SMOOTH_V,
- THR_SMOOTH_H,
- THR_H_PRED,
- THR_V_PRED,
- THR_D135_PRED,
- THR_D203_PRED,
- THR_D157_PRED,
- THR_D67_PRED,
- THR_D113_PRED,
- THR_D45_PRED,
-
- MAX_MODES,
-
- LAST_SINGLE_REF_MODES = THR_GLOBALG,
- MAX_SINGLE_REF_MODES = LAST_SINGLE_REF_MODES + 1,
- LAST_COMP_REF_MODES = THR_COMP_GLOBAL_GLOBALBA,
- MAX_COMP_REF_MODES = LAST_COMP_REF_MODES + 1
-} THR_MODES;
-
-typedef enum {
- THR_LAST,
- THR_LAST2,
- THR_LAST3,
- THR_BWDR,
- THR_ALTR2,
- THR_GOLD,
- THR_ALTR,
-
- THR_COMP_LA,
- THR_COMP_L2A,
- THR_COMP_L3A,
- THR_COMP_GA,
-
- THR_COMP_LB,
- THR_COMP_L2B,
- THR_COMP_L3B,
- THR_COMP_GB,
-
- THR_COMP_LA2,
- THR_COMP_L2A2,
- THR_COMP_L3A2,
- THR_COMP_GA2,
-
- THR_INTRA,
-
- MAX_REFS
-} THR_MODES_SUB8X8;
-
-typedef struct RD_OPT {
- // Thresh_mult is used to set a threshold for the rd score. A higher value
- // means that we will accept the best mode so far more often. This number
- // is used in combination with the current block size, and thresh_freq_fact
- // to pick a threshold.
- int thresh_mult[MAX_MODES];
- int thresh_mult_sub8x8[MAX_REFS];
-
- int threshes[MAX_SEGMENTS][BLOCK_SIZES_ALL][MAX_MODES];
-
- int64_t prediction_type_threshes[REF_FRAMES][REFERENCE_MODES];
-
- int RDMULT;
-} RD_OPT;
-
-static INLINE void av1_init_rd_stats(RD_STATS *rd_stats) {
-#if CONFIG_RD_DEBUG
- int plane;
-#endif
- rd_stats->rate = 0;
- rd_stats->dist = 0;
- rd_stats->rdcost = 0;
- rd_stats->sse = 0;
- rd_stats->skip = 1;
- rd_stats->zero_rate = 0;
- rd_stats->invalid_rate = 0;
- rd_stats->ref_rdcost = INT64_MAX;
-#if CONFIG_RD_DEBUG
- // This may run into problems when monochrome video is
- // encoded, as there will only be 1 plane
- for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
- rd_stats->txb_coeff_cost[plane] = 0;
- {
- int r, c;
- for (r = 0; r < TXB_COEFF_COST_MAP_SIZE; ++r)
- for (c = 0; c < TXB_COEFF_COST_MAP_SIZE; ++c)
- rd_stats->txb_coeff_cost_map[plane][r][c] = 0;
- }
- }
-#endif
-}
-
-static INLINE void av1_invalid_rd_stats(RD_STATS *rd_stats) {
-#if CONFIG_RD_DEBUG
- int plane;
-#endif
- rd_stats->rate = INT_MAX;
- rd_stats->dist = INT64_MAX;
- rd_stats->rdcost = INT64_MAX;
- rd_stats->sse = INT64_MAX;
- rd_stats->skip = 0;
- rd_stats->zero_rate = 0;
- rd_stats->invalid_rate = 1;
- rd_stats->ref_rdcost = INT64_MAX;
-#if CONFIG_RD_DEBUG
- // This may run into problems when monochrome video is
- // encoded, as there will only be 1 plane
- for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
- rd_stats->txb_coeff_cost[plane] = INT_MAX;
- {
- int r, c;
- for (r = 0; r < TXB_COEFF_COST_MAP_SIZE; ++r)
- for (c = 0; c < TXB_COEFF_COST_MAP_SIZE; ++c)
- rd_stats->txb_coeff_cost_map[plane][r][c] = INT_MAX;
- }
- }
-#endif
-}
-
-static INLINE void av1_merge_rd_stats(RD_STATS *rd_stats_dst,
- const RD_STATS *rd_stats_src) {
-#if CONFIG_RD_DEBUG
- int plane;
-#endif
- rd_stats_dst->rate += rd_stats_src->rate;
- if (!rd_stats_dst->zero_rate)
- rd_stats_dst->zero_rate = rd_stats_src->zero_rate;
- rd_stats_dst->dist += rd_stats_src->dist;
- rd_stats_dst->sse += rd_stats_src->sse;
- rd_stats_dst->skip &= rd_stats_src->skip;
- rd_stats_dst->invalid_rate &= rd_stats_src->invalid_rate;
-#if CONFIG_RD_DEBUG
- // This may run into problems when monochrome video is
- // encoded, as there will only be 1 plane
- for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
- rd_stats_dst->txb_coeff_cost[plane] += rd_stats_src->txb_coeff_cost[plane];
- {
- // TODO(angiebird): optimize this part
- int r, c;
- int ref_txb_coeff_cost = 0;
- for (r = 0; r < TXB_COEFF_COST_MAP_SIZE; ++r)
- for (c = 0; c < TXB_COEFF_COST_MAP_SIZE; ++c) {
- rd_stats_dst->txb_coeff_cost_map[plane][r][c] +=
- rd_stats_src->txb_coeff_cost_map[plane][r][c];
- ref_txb_coeff_cost += rd_stats_dst->txb_coeff_cost_map[plane][r][c];
- }
- assert(ref_txb_coeff_cost == rd_stats_dst->txb_coeff_cost[plane]);
- }
- }
-#endif
-}
-
-struct TileInfo;
-struct TileDataEnc;
-struct AV1_COMP;
-struct macroblock;
-
-int av1_compute_rd_mult(const struct AV1_COMP *cpi, int qindex);
-
-void av1_initialize_rd_consts(struct AV1_COMP *cpi);
-
-void av1_initialize_me_consts(const struct AV1_COMP *cpi, MACROBLOCK *x,
- int qindex);
-
-void av1_model_rd_from_var_lapndz(int64_t var, unsigned int n,
- unsigned int qstep, int *rate, int64_t *dist);
-
-void av1_model_rd_curvfit(double xqr, double *rate_f, double *distbysse_f);
-void av1_model_rd_surffit(double xm, double yl, double *rate_f,
- double *distbysse_f);
-
-int av1_get_switchable_rate(const AV1_COMMON *const cm, MACROBLOCK *x,
- const MACROBLOCKD *xd);
-
-int av1_raster_block_offset(BLOCK_SIZE plane_bsize, int raster_block,
- int stride);
-
-int16_t *av1_raster_block_offset_int16(BLOCK_SIZE plane_bsize, int raster_block,
- int16_t *base);
-
-YV12_BUFFER_CONFIG *av1_get_scaled_ref_frame(const struct AV1_COMP *cpi,
- int ref_frame);
-
-void av1_init_me_luts(void);
-
-void av1_set_mvcost(MACROBLOCK *x, int ref, int ref_mv_idx);
-
-void av1_get_entropy_contexts(BLOCK_SIZE bsize,
- const struct macroblockd_plane *pd,
- ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],
- ENTROPY_CONTEXT t_left[MAX_MIB_SIZE]);
-
-void av1_set_rd_speed_thresholds(struct AV1_COMP *cpi);
-
-void av1_set_rd_speed_thresholds_sub8x8(struct AV1_COMP *cpi);
-
-void av1_update_rd_thresh_fact(const AV1_COMMON *const cm,
- int (*fact)[MAX_MODES], int rd_thresh, int bsize,
- int best_mode_index);
-
-static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh,
- int thresh_fact) {
- return best_rd < ((int64_t)thresh * thresh_fact >> 5) || thresh == INT_MAX;
-}
-
-void av1_mv_pred(const struct AV1_COMP *cpi, MACROBLOCK *x,
- uint8_t *ref_y_buffer, int ref_y_stride, int ref_frame,
- BLOCK_SIZE block_size);
-
-static INLINE void set_error_per_bit(MACROBLOCK *x, int rdmult) {
- x->errorperbit = rdmult >> RD_EPB_SHIFT;
- x->errorperbit += (x->errorperbit == 0);
-}
-
-void av1_setup_pred_block(const MACROBLOCKD *xd,
- struct buf_2d dst[MAX_MB_PLANE],
- const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
- const struct scale_factors *scale,
- const struct scale_factors *scale_uv,
- const int num_planes);
-
-int av1_get_intra_cost_penalty(int qindex, int qdelta,
- aom_bit_depth_t bit_depth);
-
-void av1_fill_mode_rates(AV1_COMMON *const cm, MACROBLOCK *x,
- FRAME_CONTEXT *fc);
-
-void av1_fill_coeff_costs(MACROBLOCK *x, FRAME_CONTEXT *fc,
- const int num_planes);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_ENCODER_RD_H_
diff --git a/third_party/aom/av1/encoder/rdopt.c b/third_party/aom/av1/encoder/rdopt.c
deleted file mode 100644
index c2d15534f..000000000
--- a/third_party/aom/av1/encoder/rdopt.c
+++ /dev/null
@@ -1,12199 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <math.h>
-
-#include "config/aom_dsp_rtcd.h"
-#include "config/av1_rtcd.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/blend.h"
-#include "aom_mem/aom_mem.h"
-#include "aom_ports/aom_timer.h"
-#include "aom_ports/mem.h"
-#include "aom_ports/system_state.h"
-
-#include "av1/common/cfl.h"
-#include "av1/common/common.h"
-#include "av1/common/common_data.h"
-#include "av1/common/entropy.h"
-#include "av1/common/entropymode.h"
-#include "av1/common/idct.h"
-#include "av1/common/mvref_common.h"
-#include "av1/common/obmc.h"
-#include "av1/common/pred_common.h"
-#include "av1/common/quant_common.h"
-#include "av1/common/reconinter.h"
-#include "av1/common/reconintra.h"
-#include "av1/common/scan.h"
-#include "av1/common/seg_common.h"
-#include "av1/common/txb_common.h"
-#include "av1/common/warped_motion.h"
-
-#include "av1/encoder/aq_variance.h"
-#include "av1/encoder/av1_quantize.h"
-#include "av1/encoder/cost.h"
-#include "av1/encoder/encodemb.h"
-#include "av1/encoder/encodemv.h"
-#include "av1/encoder/encoder.h"
-#include "av1/encoder/encodetxb.h"
-#include "av1/encoder/hybrid_fwd_txfm.h"
-#include "av1/encoder/mcomp.h"
-#include "av1/encoder/ml.h"
-#include "av1/encoder/palette.h"
-#include "av1/encoder/pustats.h"
-#include "av1/encoder/random.h"
-#include "av1/encoder/ratectrl.h"
-#include "av1/encoder/rd.h"
-#include "av1/encoder/rdopt.h"
-#include "av1/encoder/reconinter_enc.h"
-#include "av1/encoder/tokenize.h"
-#include "av1/encoder/tx_prune_model_weights.h"
-
-typedef void (*model_rd_for_sb_type)(
- const AV1_COMP *const cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd,
- int plane_from, int plane_to, int mi_row, int mi_col, int *out_rate_sum,
- int64_t *out_dist_sum, int *skip_txfm_sb, int64_t *skip_sse_sb,
- int *plane_rate, int64_t *plane_sse, int64_t *plane_dist);
-typedef void (*model_rd_from_sse_type)(const AV1_COMP *const cpi,
- const MACROBLOCK *const x,
- BLOCK_SIZE plane_bsize, int plane,
- int64_t sse, int num_samples, int *rate,
- int64_t *dist);
-
-static void model_rd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
- MACROBLOCK *x, MACROBLOCKD *xd, int plane_from,
- int plane_to, int mi_row, int mi_col,
- int *out_rate_sum, int64_t *out_dist_sum,
- int *skip_txfm_sb, int64_t *skip_sse_sb,
- int *plane_rate, int64_t *plane_sse,
- int64_t *plane_dist);
-static void model_rd_for_sb_with_curvfit(
- const AV1_COMP *const cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd,
- int plane_from, int plane_to, int mi_row, int mi_col, int *out_rate_sum,
- int64_t *out_dist_sum, int *skip_txfm_sb, int64_t *skip_sse_sb,
- int *plane_rate, int64_t *plane_sse, int64_t *plane_dist);
-static void model_rd_for_sb_with_surffit(
- const AV1_COMP *const cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd,
- int plane_from, int plane_to, int mi_row, int mi_col, int *out_rate_sum,
- int64_t *out_dist_sum, int *skip_txfm_sb, int64_t *skip_sse_sb,
- int *plane_rate, int64_t *plane_sse, int64_t *plane_dist);
-static void model_rd_for_sb_with_dnn(
- const AV1_COMP *const cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd,
- int plane_from, int plane_to, int mi_row, int mi_col, int *out_rate_sum,
- int64_t *out_dist_sum, int *skip_txfm_sb, int64_t *skip_sse_sb,
- int *plane_rate, int64_t *plane_sse, int64_t *plane_dist);
-static void model_rd_for_sb_with_fullrdy(
- const AV1_COMP *const cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd,
- int plane_from, int plane_to, int mi_row, int mi_col, int *out_rate_sum,
- int64_t *out_dist_sum, int *skip_txfm_sb, int64_t *skip_sse_sb,
- int *plane_rate, int64_t *plane_sse, int64_t *plane_dist);
-static void model_rd_from_sse(const AV1_COMP *const cpi,
- const MACROBLOCK *const x, BLOCK_SIZE plane_bsize,
- int plane, int64_t sse, int num_samples,
- int *rate, int64_t *dist);
-static void model_rd_with_dnn(const AV1_COMP *const cpi,
- const MACROBLOCK *const x, BLOCK_SIZE plane_bsize,
- int plane, int64_t sse, int num_samples,
- int *rate, int64_t *dist);
-static void model_rd_with_curvfit(const AV1_COMP *const cpi,
- const MACROBLOCK *const x,
- BLOCK_SIZE plane_bsize, int plane,
- int64_t sse, int num_samples, int *rate,
- int64_t *dist);
-static void model_rd_with_surffit(const AV1_COMP *const cpi,
- const MACROBLOCK *const x,
- BLOCK_SIZE plane_bsize, int plane,
- int64_t sse, int num_samples, int *rate,
- int64_t *dist);
-
-typedef enum {
- MODELRD_LEGACY,
- MODELRD_CURVFIT,
- MODELRD_SUFFIT,
- MODELRD_DNN,
- MODELRD_FULLRDY,
- MODELRD_TYPES
-} ModelRdType;
-
-static model_rd_for_sb_type model_rd_sb_fn[MODELRD_TYPES] = {
- model_rd_for_sb, model_rd_for_sb_with_curvfit, model_rd_for_sb_with_surffit,
- model_rd_for_sb_with_dnn, model_rd_for_sb_with_fullrdy
-};
-
-static model_rd_from_sse_type model_rd_sse_fn[MODELRD_TYPES] = {
- model_rd_from_sse, model_rd_with_curvfit, model_rd_with_surffit,
- model_rd_with_dnn, NULL
-};
-
-// 0: Legacy model
-// 1: Curve fit model
-// 2: Surface fit model
-// 3: DNN regression model
-// 4: Full rd model
-#define MODELRD_TYPE_INTERP_FILTER 1
-#define MODELRD_TYPE_TX_SEARCH_PRUNE 2
-#define MODELRD_TYPE_MASKED_COMPOUND 1
-#define MODELRD_TYPE_INTERINTRA 1
-#define MODELRD_TYPE_INTRA 1
-#define MODELRD_TYPE_JNT_COMPOUND 1
-
-#define DUAL_FILTER_SET_SIZE (SWITCHABLE_FILTERS * SWITCHABLE_FILTERS)
-static const InterpFilters filter_sets[DUAL_FILTER_SET_SIZE] = {
- 0x00000000, 0x00010000, 0x00020000, // y = 0
- 0x00000001, 0x00010001, 0x00020001, // y = 1
- 0x00000002, 0x00010002, 0x00020002, // y = 2
-};
-
-#define SECOND_REF_FRAME_MASK \
- ((1 << ALTREF_FRAME) | (1 << ALTREF2_FRAME) | (1 << BWDREF_FRAME) | \
- (1 << GOLDEN_FRAME) | (1 << LAST2_FRAME) | 0x01)
-
-#define ANGLE_SKIP_THRESH 10
-
-static const double ADST_FLIP_SVM[8] = {
- /* vertical */
- -6.6623, -2.8062, -3.2531, 3.1671,
- /* horizontal */
- -7.7051, -3.2234, -3.6193, 3.4533
-};
-
-typedef struct {
- PREDICTION_MODE mode;
- MV_REFERENCE_FRAME ref_frame[2];
-} MODE_DEFINITION;
-
-typedef struct {
- MV_REFERENCE_FRAME ref_frame[2];
-} REF_DEFINITION;
-
-typedef enum {
- FTXS_NONE = 0,
- FTXS_DCT_AND_1D_DCT_ONLY = 1 << 0,
- FTXS_DISABLE_TRELLIS_OPT = 1 << 1,
- FTXS_USE_TRANSFORM_DOMAIN = 1 << 2
-} FAST_TX_SEARCH_MODE;
-
-static void select_tx_type_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
- RD_STATS *rd_stats, BLOCK_SIZE bsize, int mi_row,
- int mi_col, int64_t ref_best_rd);
-
-static int inter_block_uvrd(const AV1_COMP *cpi, MACROBLOCK *x,
- RD_STATS *rd_stats, BLOCK_SIZE bsize,
- int64_t non_skip_ref_best_rd,
- int64_t skip_ref_best_rd,
- FAST_TX_SEARCH_MODE ftxs_mode);
-
-struct rdcost_block_args {
- const AV1_COMP *cpi;
- MACROBLOCK *x;
- ENTROPY_CONTEXT t_above[MAX_MIB_SIZE];
- ENTROPY_CONTEXT t_left[MAX_MIB_SIZE];
- RD_STATS rd_stats;
- int64_t this_rd;
- int64_t best_rd;
- int exit_early;
- int incomplete_exit;
- int use_fast_coef_costing;
- FAST_TX_SEARCH_MODE ftxs_mode;
-};
-
-#define LAST_NEW_MV_INDEX 6
-static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
- { NEARESTMV, { LAST_FRAME, NONE_FRAME } },
- { NEARESTMV, { LAST2_FRAME, NONE_FRAME } },
- { NEARESTMV, { LAST3_FRAME, NONE_FRAME } },
- { NEARESTMV, { BWDREF_FRAME, NONE_FRAME } },
- { NEARESTMV, { ALTREF2_FRAME, NONE_FRAME } },
- { NEARESTMV, { ALTREF_FRAME, NONE_FRAME } },
- { NEARESTMV, { GOLDEN_FRAME, NONE_FRAME } },
-
- { NEWMV, { LAST_FRAME, NONE_FRAME } },
- { NEWMV, { LAST2_FRAME, NONE_FRAME } },
- { NEWMV, { LAST3_FRAME, NONE_FRAME } },
- { NEWMV, { BWDREF_FRAME, NONE_FRAME } },
- { NEWMV, { ALTREF2_FRAME, NONE_FRAME } },
- { NEWMV, { ALTREF_FRAME, NONE_FRAME } },
- { NEWMV, { GOLDEN_FRAME, NONE_FRAME } },
-
- { NEARMV, { LAST_FRAME, NONE_FRAME } },
- { NEARMV, { LAST2_FRAME, NONE_FRAME } },
- { NEARMV, { LAST3_FRAME, NONE_FRAME } },
- { NEARMV, { BWDREF_FRAME, NONE_FRAME } },
- { NEARMV, { ALTREF2_FRAME, NONE_FRAME } },
- { NEARMV, { ALTREF_FRAME, NONE_FRAME } },
- { NEARMV, { GOLDEN_FRAME, NONE_FRAME } },
-
- { GLOBALMV, { LAST_FRAME, NONE_FRAME } },
- { GLOBALMV, { LAST2_FRAME, NONE_FRAME } },
- { GLOBALMV, { LAST3_FRAME, NONE_FRAME } },
- { GLOBALMV, { BWDREF_FRAME, NONE_FRAME } },
- { GLOBALMV, { ALTREF2_FRAME, NONE_FRAME } },
- { GLOBALMV, { GOLDEN_FRAME, NONE_FRAME } },
- { GLOBALMV, { ALTREF_FRAME, NONE_FRAME } },
-
- // TODO(zoeliu): May need to reconsider the order on the modes to check
-
- { NEAREST_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
- { NEAREST_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
- { NEAREST_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
- { NEAREST_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
- { NEAREST_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
- { NEAREST_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
- { NEAREST_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
- { NEAREST_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
- { NEAREST_NEARESTMV, { LAST_FRAME, ALTREF2_FRAME } },
- { NEAREST_NEARESTMV, { LAST2_FRAME, ALTREF2_FRAME } },
- { NEAREST_NEARESTMV, { LAST3_FRAME, ALTREF2_FRAME } },
- { NEAREST_NEARESTMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
-
- { NEAREST_NEARESTMV, { LAST_FRAME, LAST2_FRAME } },
- { NEAREST_NEARESTMV, { LAST_FRAME, LAST3_FRAME } },
- { NEAREST_NEARESTMV, { LAST_FRAME, GOLDEN_FRAME } },
- { NEAREST_NEARESTMV, { BWDREF_FRAME, ALTREF_FRAME } },
-
- { NEAR_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
- { NEW_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
- { NEAREST_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
- { NEW_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
- { NEAR_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
- { NEW_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
- { GLOBAL_GLOBALMV, { LAST_FRAME, ALTREF_FRAME } },
-
- { NEAR_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
- { NEW_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
- { NEAREST_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
- { NEW_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
- { NEAR_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
- { NEW_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
- { GLOBAL_GLOBALMV, { LAST2_FRAME, ALTREF_FRAME } },
-
- { NEAR_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
- { NEW_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
- { NEAREST_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
- { NEW_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
- { NEAR_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
- { NEW_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
- { GLOBAL_GLOBALMV, { LAST3_FRAME, ALTREF_FRAME } },
-
- { NEAR_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
- { NEW_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
- { NEAREST_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
- { NEW_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
- { NEAR_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
- { NEW_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
- { GLOBAL_GLOBALMV, { GOLDEN_FRAME, ALTREF_FRAME } },
-
- { NEAR_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
- { NEW_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
- { NEAREST_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
- { NEW_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
- { NEAR_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
- { NEW_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
- { GLOBAL_GLOBALMV, { LAST_FRAME, BWDREF_FRAME } },
-
- { NEAR_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
- { NEW_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
- { NEAREST_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
- { NEW_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
- { NEAR_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
- { NEW_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
- { GLOBAL_GLOBALMV, { LAST2_FRAME, BWDREF_FRAME } },
-
- { NEAR_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
- { NEW_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
- { NEAREST_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
- { NEW_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
- { NEAR_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
- { NEW_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
- { GLOBAL_GLOBALMV, { LAST3_FRAME, BWDREF_FRAME } },
-
- { NEAR_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
- { NEW_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
- { NEAREST_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
- { NEW_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
- { NEAR_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
- { NEW_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
- { GLOBAL_GLOBALMV, { GOLDEN_FRAME, BWDREF_FRAME } },
-
- { NEAR_NEARMV, { LAST_FRAME, ALTREF2_FRAME } },
- { NEW_NEARESTMV, { LAST_FRAME, ALTREF2_FRAME } },
- { NEAREST_NEWMV, { LAST_FRAME, ALTREF2_FRAME } },
- { NEW_NEARMV, { LAST_FRAME, ALTREF2_FRAME } },
- { NEAR_NEWMV, { LAST_FRAME, ALTREF2_FRAME } },
- { NEW_NEWMV, { LAST_FRAME, ALTREF2_FRAME } },
- { GLOBAL_GLOBALMV, { LAST_FRAME, ALTREF2_FRAME } },
-
- { NEAR_NEARMV, { LAST2_FRAME, ALTREF2_FRAME } },
- { NEW_NEARESTMV, { LAST2_FRAME, ALTREF2_FRAME } },
- { NEAREST_NEWMV, { LAST2_FRAME, ALTREF2_FRAME } },
- { NEW_NEARMV, { LAST2_FRAME, ALTREF2_FRAME } },
- { NEAR_NEWMV, { LAST2_FRAME, ALTREF2_FRAME } },
- { NEW_NEWMV, { LAST2_FRAME, ALTREF2_FRAME } },
- { GLOBAL_GLOBALMV, { LAST2_FRAME, ALTREF2_FRAME } },
-
- { NEAR_NEARMV, { LAST3_FRAME, ALTREF2_FRAME } },
- { NEW_NEARESTMV, { LAST3_FRAME, ALTREF2_FRAME } },
- { NEAREST_NEWMV, { LAST3_FRAME, ALTREF2_FRAME } },
- { NEW_NEARMV, { LAST3_FRAME, ALTREF2_FRAME } },
- { NEAR_NEWMV, { LAST3_FRAME, ALTREF2_FRAME } },
- { NEW_NEWMV, { LAST3_FRAME, ALTREF2_FRAME } },
- { GLOBAL_GLOBALMV, { LAST3_FRAME, ALTREF2_FRAME } },
-
- { NEAR_NEARMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
- { NEW_NEARESTMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
- { NEAREST_NEWMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
- { NEW_NEARMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
- { NEAR_NEWMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
- { NEW_NEWMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
- { GLOBAL_GLOBALMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
-
- { NEAR_NEARMV, { LAST_FRAME, LAST2_FRAME } },
- { NEW_NEARESTMV, { LAST_FRAME, LAST2_FRAME } },
- { NEAREST_NEWMV, { LAST_FRAME, LAST2_FRAME } },
- { NEW_NEARMV, { LAST_FRAME, LAST2_FRAME } },
- { NEAR_NEWMV, { LAST_FRAME, LAST2_FRAME } },
- { NEW_NEWMV, { LAST_FRAME, LAST2_FRAME } },
- { GLOBAL_GLOBALMV, { LAST_FRAME, LAST2_FRAME } },
-
- { NEAR_NEARMV, { LAST_FRAME, LAST3_FRAME } },
- { NEW_NEARESTMV, { LAST_FRAME, LAST3_FRAME } },
- { NEAREST_NEWMV, { LAST_FRAME, LAST3_FRAME } },
- { NEW_NEARMV, { LAST_FRAME, LAST3_FRAME } },
- { NEAR_NEWMV, { LAST_FRAME, LAST3_FRAME } },
- { NEW_NEWMV, { LAST_FRAME, LAST3_FRAME } },
- { GLOBAL_GLOBALMV, { LAST_FRAME, LAST3_FRAME } },
-
- { NEAR_NEARMV, { LAST_FRAME, GOLDEN_FRAME } },
- { NEW_NEARESTMV, { LAST_FRAME, GOLDEN_FRAME } },
- { NEAREST_NEWMV, { LAST_FRAME, GOLDEN_FRAME } },
- { NEW_NEARMV, { LAST_FRAME, GOLDEN_FRAME } },
- { NEAR_NEWMV, { LAST_FRAME, GOLDEN_FRAME } },
- { NEW_NEWMV, { LAST_FRAME, GOLDEN_FRAME } },
- { GLOBAL_GLOBALMV, { LAST_FRAME, GOLDEN_FRAME } },
-
- { NEAR_NEARMV, { BWDREF_FRAME, ALTREF_FRAME } },
- { NEW_NEARESTMV, { BWDREF_FRAME, ALTREF_FRAME } },
- { NEAREST_NEWMV, { BWDREF_FRAME, ALTREF_FRAME } },
- { NEW_NEARMV, { BWDREF_FRAME, ALTREF_FRAME } },
- { NEAR_NEWMV, { BWDREF_FRAME, ALTREF_FRAME } },
- { NEW_NEWMV, { BWDREF_FRAME, ALTREF_FRAME } },
- { GLOBAL_GLOBALMV, { BWDREF_FRAME, ALTREF_FRAME } },
-
- // intra modes
- { DC_PRED, { INTRA_FRAME, NONE_FRAME } },
- { PAETH_PRED, { INTRA_FRAME, NONE_FRAME } },
- { SMOOTH_PRED, { INTRA_FRAME, NONE_FRAME } },
- { SMOOTH_V_PRED, { INTRA_FRAME, NONE_FRAME } },
- { SMOOTH_H_PRED, { INTRA_FRAME, NONE_FRAME } },
- { H_PRED, { INTRA_FRAME, NONE_FRAME } },
- { V_PRED, { INTRA_FRAME, NONE_FRAME } },
- { D135_PRED, { INTRA_FRAME, NONE_FRAME } },
- { D203_PRED, { INTRA_FRAME, NONE_FRAME } },
- { D157_PRED, { INTRA_FRAME, NONE_FRAME } },
- { D67_PRED, { INTRA_FRAME, NONE_FRAME } },
- { D113_PRED, { INTRA_FRAME, NONE_FRAME } },
- { D45_PRED, { INTRA_FRAME, NONE_FRAME } },
-};
-
-static const int16_t intra_to_mode_idx[INTRA_MODE_NUM] = {
- 7, // DC_PRED,
- 134, // V_PRED,
- 133, // H_PRED,
- 140, // D45_PRED,
- 135, // D135_PRED,
- 139, // D113_PRED,
- 137, // D157_PRED,
- 136, // D203_PRED,
- 138, // D67_PRED,
- 46, // SMOOTH_PRED,
- 47, // SMOOTH_V_PRED,
- 48, // SMOOTH_H_PRED,
- 45, // PAETH_PRED,
-};
-
-/* clang-format off */
-static const int16_t single_inter_to_mode_idx[SINGLE_INTER_MODE_NUM]
- [REF_FRAMES] = {
- // NEARESTMV,
- { -1, 0, 1, 2, 6, 3, 4, 5, },
- // NEARMV,
- { -1, 15, 16, 17, 21, 18, 19, 20, },
- // GLOBALMV,
- { -1, 22, 23, 24, 27, 25, 26, 28, },
- // NEWMV,
- { -1, 8, 9, 10, 14, 11, 12, 13, },
-};
-/* clang-format on */
-
-/* clang-format off */
-static const int16_t comp_inter_to_mode_idx[COMP_INTER_MODE_NUM][REF_FRAMES]
- [REF_FRAMES] = {
- // NEAREST_NEARESTMV,
- {
- { -1, -1, -1, -1, -1, -1, -1, -1, },
- { -1, -1, 41, 42, 43, 33, 37, 29, },
- { -1, -1, -1, -1, -1, 34, 38, 30, },
- { -1, -1, -1, -1, -1, 35, 39, 31, },
- { -1, -1, -1, -1, -1, 36, 40, 32, },
- { -1, -1, -1, -1, -1, -1, -1, 44, },
- { -1, -1, -1, -1, -1, -1, -1, -1, },
- { -1, -1, -1, -1, -1, -1, -1, -1, },
- },
- // NEAR_NEARMV,
- {
- { -1, -1, -1, -1, -1, -1, -1, -1, },
- { -1, -1, 141, 148, 155, 77, 105, 49, },
- { -1, -1, -1, -1, -1, 84, 112, 56, },
- { -1, -1, -1, -1, -1, 91, 119, 63, },
- { -1, -1, -1, -1, -1, 98, 126, 70, },
- { -1, -1, -1, -1, -1, -1, -1, 162, },
- { -1, -1, -1, -1, -1, -1, -1, -1, },
- { -1, -1, -1, -1, -1, -1, -1, -1, },
- },
- // NEAREST_NEWMV,
- {
- { -1, -1, -1, -1, -1, -1, -1, -1, },
- { -1, -1, 143, 150, 157, 79, 107, 51, },
- { -1, -1, -1, -1, -1, 86, 114, 58, },
- { -1, -1, -1, -1, -1, 93, 121, 65, },
- { -1, -1, -1, -1, -1, 100, 128, 72, },
- { -1, -1, -1, -1, -1, -1, -1, 164, },
- { -1, -1, -1, -1, -1, -1, -1, -1, },
- { -1, -1, -1, -1, -1, -1, -1, -1, },
- },
- // NEW_NEARESTMV,
- {
- { -1, -1, -1, -1, -1, -1, -1, -1, },
- { -1, -1, 142, 149, 156, 78, 106, 50, },
- { -1, -1, -1, -1, -1, 85, 113, 57, },
- { -1, -1, -1, -1, -1, 92, 120, 64, },
- { -1, -1, -1, -1, -1, 99, 127, 71, },
- { -1, -1, -1, -1, -1, -1, -1, 163, },
- { -1, -1, -1, -1, -1, -1, -1, -1, },
- { -1, -1, -1, -1, -1, -1, -1, -1, },
- },
- // NEAR_NEWMV,
- {
- { -1, -1, -1, -1, -1, -1, -1, -1, },
- { -1, -1, 145, 152, 159, 81, 109, 53, },
- { -1, -1, -1, -1, -1, 88, 116, 60, },
- { -1, -1, -1, -1, -1, 95, 123, 67, },
- { -1, -1, -1, -1, -1, 102, 130, 74, },
- { -1, -1, -1, -1, -1, -1, -1, 166, },
- { -1, -1, -1, -1, -1, -1, -1, -1, },
- { -1, -1, -1, -1, -1, -1, -1, -1, },
- },
- // NEW_NEARMV,
- {
- { -1, -1, -1, -1, -1, -1, -1, -1, },
- { -1, -1, 144, 151, 158, 80, 108, 52, },
- { -1, -1, -1, -1, -1, 87, 115, 59, },
- { -1, -1, -1, -1, -1, 94, 122, 66, },
- { -1, -1, -1, -1, -1, 101, 129, 73, },
- { -1, -1, -1, -1, -1, -1, -1, 165, },
- { -1, -1, -1, -1, -1, -1, -1, -1, },
- { -1, -1, -1, -1, -1, -1, -1, -1, },
- },
- // GLOBAL_GLOBALMV,
- {
- { -1, -1, -1, -1, -1, -1, -1, -1, },
- { -1, -1, 147, 154, 161, 83, 111, 55, },
- { -1, -1, -1, -1, -1, 90, 118, 62, },
- { -1, -1, -1, -1, -1, 97, 125, 69, },
- { -1, -1, -1, -1, -1, 104, 132, 76, },
- { -1, -1, -1, -1, -1, -1, -1, 168, },
- { -1, -1, -1, -1, -1, -1, -1, -1, },
- { -1, -1, -1, -1, -1, -1, -1, -1, },
- },
- // NEW_NEWMV,
- {
- { -1, -1, -1, -1, -1, -1, -1, -1, },
- { -1, -1, 146, 153, 160, 82, 110, 54, },
- { -1, -1, -1, -1, -1, 89, 117, 61, },
- { -1, -1, -1, -1, -1, 96, 124, 68, },
- { -1, -1, -1, -1, -1, 103, 131, 75, },
- { -1, -1, -1, -1, -1, -1, -1, 167, },
- { -1, -1, -1, -1, -1, -1, -1, -1, },
- { -1, -1, -1, -1, -1, -1, -1, -1, },
- },
-};
-/* clang-format on */
-
-static int get_prediction_mode_idx(PREDICTION_MODE this_mode,
- MV_REFERENCE_FRAME ref_frame,
- MV_REFERENCE_FRAME second_ref_frame) {
- if (this_mode < INTRA_MODE_END) {
- assert(ref_frame == INTRA_FRAME);
- assert(second_ref_frame == NONE_FRAME);
- return intra_to_mode_idx[this_mode - INTRA_MODE_START];
- }
- if (this_mode >= SINGLE_INTER_MODE_START &&
- this_mode < SINGLE_INTER_MODE_END) {
- assert((ref_frame > INTRA_FRAME) && (ref_frame <= ALTREF_FRAME));
- return single_inter_to_mode_idx[this_mode - SINGLE_INTER_MODE_START]
- [ref_frame];
- }
- if (this_mode >= COMP_INTER_MODE_START && this_mode < COMP_INTER_MODE_END) {
- assert((ref_frame > INTRA_FRAME) && (ref_frame <= ALTREF_FRAME));
- assert((second_ref_frame > INTRA_FRAME) &&
- (second_ref_frame <= ALTREF_FRAME));
- return comp_inter_to_mode_idx[this_mode - COMP_INTER_MODE_START][ref_frame]
- [second_ref_frame];
- }
- assert(0);
- return -1;
-}
-
-static const PREDICTION_MODE intra_rd_search_mode_order[INTRA_MODES] = {
- DC_PRED, H_PRED, V_PRED, SMOOTH_PRED, PAETH_PRED,
- SMOOTH_V_PRED, SMOOTH_H_PRED, D135_PRED, D203_PRED, D157_PRED,
- D67_PRED, D113_PRED, D45_PRED,
-};
-
-static const UV_PREDICTION_MODE uv_rd_search_mode_order[UV_INTRA_MODES] = {
- UV_DC_PRED, UV_CFL_PRED, UV_H_PRED, UV_V_PRED,
- UV_SMOOTH_PRED, UV_PAETH_PRED, UV_SMOOTH_V_PRED, UV_SMOOTH_H_PRED,
- UV_D135_PRED, UV_D203_PRED, UV_D157_PRED, UV_D67_PRED,
- UV_D113_PRED, UV_D45_PRED,
-};
-
-typedef struct SingleInterModeState {
- int64_t rd;
- MV_REFERENCE_FRAME ref_frame;
- int valid;
-} SingleInterModeState;
-
-typedef struct InterModeSearchState {
- int64_t best_rd;
- MB_MODE_INFO best_mbmode;
- int best_rate_y;
- int best_rate_uv;
- int best_mode_skippable;
- int best_skip2;
- int best_mode_index;
- int skip_intra_modes;
- int num_available_refs;
- int64_t dist_refs[REF_FRAMES];
- int dist_order_refs[REF_FRAMES];
- int64_t mode_threshold[MAX_MODES];
- PREDICTION_MODE best_intra_mode;
- int64_t best_intra_rd;
- int angle_stats_ready;
- uint8_t directional_mode_skip_mask[INTRA_MODES];
- unsigned int best_pred_sse;
- int rate_uv_intra[TX_SIZES_ALL];
- int rate_uv_tokenonly[TX_SIZES_ALL];
- int64_t dist_uvs[TX_SIZES_ALL];
- int skip_uvs[TX_SIZES_ALL];
- UV_PREDICTION_MODE mode_uv[TX_SIZES_ALL];
- PALETTE_MODE_INFO pmi_uv[TX_SIZES_ALL];
- int8_t uv_angle_delta[TX_SIZES_ALL];
- int64_t best_pred_rd[REFERENCE_MODES];
- int64_t best_pred_diff[REFERENCE_MODES];
- // Save a set of single_newmv for each checked ref_mv.
- int_mv single_newmv[MAX_REF_MV_SERCH][REF_FRAMES];
- int single_newmv_rate[MAX_REF_MV_SERCH][REF_FRAMES];
- int single_newmv_valid[MAX_REF_MV_SERCH][REF_FRAMES];
- int64_t modelled_rd[MB_MODE_COUNT][MAX_REF_MV_SERCH][REF_FRAMES];
- // The rd of simple translation in single inter modes
- int64_t simple_rd[MB_MODE_COUNT][MAX_REF_MV_SERCH][REF_FRAMES];
-
- // Single search results by [directions][modes][reference frames]
- SingleInterModeState single_state[2][SINGLE_INTER_MODE_NUM][FWD_REFS];
- int single_state_cnt[2][SINGLE_INTER_MODE_NUM];
- SingleInterModeState single_state_modelled[2][SINGLE_INTER_MODE_NUM]
- [FWD_REFS];
- int single_state_modelled_cnt[2][SINGLE_INTER_MODE_NUM];
-
- MV_REFERENCE_FRAME single_rd_order[2][SINGLE_INTER_MODE_NUM][FWD_REFS];
-} InterModeSearchState;
-
-#if CONFIG_COLLECT_INTER_MODE_RD_STATS
-int inter_mode_data_block_idx(BLOCK_SIZE bsize) {
- if (bsize == BLOCK_8X8) return 1;
- if (bsize == BLOCK_16X16) return 2;
- if (bsize == BLOCK_32X32) return 3;
- return -1;
-}
-
-void av1_inter_mode_data_init(TileDataEnc *tile_data) {
- for (int i = 0; i < BLOCK_SIZES_ALL; ++i) {
- InterModeRdModel *md = &tile_data->inter_mode_rd_models[i];
- md->ready = 0;
- md->num = 0;
- md->dist_sum = 0;
- md->ld_sum = 0;
- md->sse_sum = 0;
- md->sse_sse_sum = 0;
- md->sse_ld_sum = 0;
- }
-}
-
-static int get_est_rate_dist(TileDataEnc *tile_data, BLOCK_SIZE bsize,
- int64_t sse, int *est_residue_cost,
- int64_t *est_dist) {
- aom_clear_system_state();
- const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
- if (md->ready) {
- const double est_ld = md->a * sse + md->b;
- if (sse < md->dist_mean) {
- *est_residue_cost = 0;
- *est_dist = sse;
- } else {
- *est_residue_cost = (int)round((sse - md->dist_mean) / est_ld);
- *est_dist = (int64_t)round(md->dist_mean);
- }
- return 1;
- }
- return 0;
-}
-
-static int64_t get_est_rd(TileDataEnc *tile_data, BLOCK_SIZE bsize, int rdmult,
- int64_t sse, int curr_cost) {
- int est_residue_cost;
- int64_t est_dist;
- if (get_est_rate_dist(tile_data, bsize, sse, &est_residue_cost, &est_dist)) {
- int rate = est_residue_cost + curr_cost;
- int64_t est_rd = RDCOST(rdmult, rate, est_dist);
- return est_rd;
- }
- return 0;
-}
-
-void av1_inter_mode_data_fit(TileDataEnc *tile_data, int rdmult) {
- aom_clear_system_state();
- for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
- const int block_idx = inter_mode_data_block_idx(bsize);
- InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
- if (block_idx == -1) continue;
- if ((md->ready == 0 && md->num < 200) || (md->ready == 1 && md->num < 64)) {
- continue;
- } else {
- if (md->ready == 0) {
- md->dist_mean = md->dist_sum / md->num;
- md->ld_mean = md->ld_sum / md->num;
- md->sse_mean = md->sse_sum / md->num;
- md->sse_sse_mean = md->sse_sse_sum / md->num;
- md->sse_ld_mean = md->sse_ld_sum / md->num;
- } else {
- const double factor = 3;
- md->dist_mean =
- (md->dist_mean * factor + (md->dist_sum / md->num)) / (factor + 1);
- md->ld_mean =
- (md->ld_mean * factor + (md->ld_sum / md->num)) / (factor + 1);
- md->sse_mean =
- (md->sse_mean * factor + (md->sse_sum / md->num)) / (factor + 1);
- md->sse_sse_mean =
- (md->sse_sse_mean * factor + (md->sse_sse_sum / md->num)) /
- (factor + 1);
- md->sse_ld_mean =
- (md->sse_ld_mean * factor + (md->sse_ld_sum / md->num)) /
- (factor + 1);
- }
-
- const double my = md->ld_mean;
- const double mx = md->sse_mean;
- const double dx = sqrt(md->sse_sse_mean);
- const double dxy = md->sse_ld_mean;
-
- md->a = (dxy - mx * my) / (dx * dx - mx * mx);
- md->b = my - md->a * mx;
- md->ready = 1;
-
- md->num = 0;
- md->dist_sum = 0;
- md->ld_sum = 0;
- md->sse_sum = 0;
- md->sse_sse_sum = 0;
- md->sse_ld_sum = 0;
- }
- (void)rdmult;
- }
-}
-
-static void inter_mode_data_push(TileDataEnc *tile_data, BLOCK_SIZE bsize,
- int64_t sse, int64_t dist, int residue_cost) {
- if (residue_cost == 0 || sse == dist) return;
- const int block_idx = inter_mode_data_block_idx(bsize);
- if (block_idx == -1) return;
- InterModeRdModel *rd_model = &tile_data->inter_mode_rd_models[bsize];
- if (rd_model->num < INTER_MODE_RD_DATA_OVERALL_SIZE) {
- aom_clear_system_state();
- const double ld = (sse - dist) * 1. / residue_cost;
- ++rd_model->num;
- rd_model->dist_sum += dist;
- rd_model->ld_sum += ld;
- rd_model->sse_sum += sse;
- rd_model->sse_sse_sum += sse * sse;
- rd_model->sse_ld_sum += sse * ld;
- }
-}
-
-static void inter_modes_info_push(InterModesInfo *inter_modes_info,
- int mode_rate, int64_t sse, int64_t est_rd,
- const MB_MODE_INFO *mbmi) {
- const int num = inter_modes_info->num;
- assert(num < MAX_INTER_MODES);
- inter_modes_info->mbmi_arr[num] = *mbmi;
- inter_modes_info->mode_rate_arr[num] = mode_rate;
- inter_modes_info->sse_arr[num] = sse;
- inter_modes_info->est_rd_arr[num] = est_rd;
- ++inter_modes_info->num;
-}
-
-static int compare_rd_idx_pair(const void *a, const void *b) {
- if (((RdIdxPair *)a)->rd == ((RdIdxPair *)b)->rd) {
- return 0;
- } else if (((const RdIdxPair *)a)->rd > ((const RdIdxPair *)b)->rd) {
- return 1;
- } else {
- return -1;
- }
-}
-
-static void inter_modes_info_sort(const InterModesInfo *inter_modes_info,
- RdIdxPair *rd_idx_pair_arr) {
- if (inter_modes_info->num == 0) {
- return;
- }
- for (int i = 0; i < inter_modes_info->num; ++i) {
- rd_idx_pair_arr[i].idx = i;
- rd_idx_pair_arr[i].rd = inter_modes_info->est_rd_arr[i];
- }
- qsort(rd_idx_pair_arr, inter_modes_info->num, sizeof(rd_idx_pair_arr[0]),
- compare_rd_idx_pair);
-}
-#endif // CONFIG_COLLECT_INTER_MODE_RD_STATS
-
-static INLINE int write_uniform_cost(int n, int v) {
- const int l = get_unsigned_bits(n);
- const int m = (1 << l) - n;
- if (l == 0) return 0;
- if (v < m)
- return av1_cost_literal(l - 1);
- else
- return av1_cost_literal(l);
-}
-
-// Similar to store_cfl_required(), but for use during the RDO process,
-// where we haven't yet determined whether this block uses CfL.
-static INLINE CFL_ALLOWED_TYPE store_cfl_required_rdo(const AV1_COMMON *cm,
- const MACROBLOCK *x) {
- const MACROBLOCKD *xd = &x->e_mbd;
-
- if (cm->seq_params.monochrome || x->skip_chroma_rd) return CFL_DISALLOWED;
-
- if (!xd->cfl.is_chroma_reference) {
- // For non-chroma-reference blocks, we should always store the luma pixels,
- // in case the corresponding chroma-reference block uses CfL.
- // Note that this can only happen for block sizes which are <8 on
- // their shortest side, as otherwise they would be chroma reference
- // blocks.
- return CFL_ALLOWED;
- }
-
- // For chroma reference blocks, we should store data in the encoder iff we're
- // allowed to try out CfL.
- return is_cfl_allowed(xd);
-}
-
-// constants for prune 1 and prune 2 decision boundaries
-#define FAST_EXT_TX_CORR_MID 0.0
-#define FAST_EXT_TX_EDST_MID 0.1
-#define FAST_EXT_TX_CORR_MARGIN 0.5
-#define FAST_EXT_TX_EDST_MARGIN 0.3
-
-static int inter_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
- RD_STATS *rd_stats, BLOCK_SIZE bsize,
- int64_t ref_best_rd, FAST_TX_SEARCH_MODE ftxs_mode);
-
-static unsigned pixel_dist_visible_only(
- const AV1_COMP *const cpi, const MACROBLOCK *x, const uint8_t *src,
- const int src_stride, const uint8_t *dst, const int dst_stride,
- const BLOCK_SIZE tx_bsize, int txb_rows, int txb_cols, int visible_rows,
- int visible_cols) {
- unsigned sse;
-
- if (txb_rows == visible_rows && txb_cols == visible_cols) {
- cpi->fn_ptr[tx_bsize].vf(src, src_stride, dst, dst_stride, &sse);
- return sse;
- }
- const MACROBLOCKD *xd = &x->e_mbd;
-
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- uint64_t sse64 = aom_highbd_sse_odd_size(src, src_stride, dst, dst_stride,
- visible_cols, visible_rows);
- return (unsigned int)ROUND_POWER_OF_TWO(sse64, (xd->bd - 8) * 2);
- }
- sse = aom_sse_odd_size(src, src_stride, dst, dst_stride, visible_cols,
- visible_rows);
- return sse;
-}
-
-#if CONFIG_DIST_8X8
-static uint64_t cdef_dist_8x8_16bit(uint16_t *dst, int dstride, uint16_t *src,
- int sstride, int coeff_shift) {
- uint64_t svar = 0;
- uint64_t dvar = 0;
- uint64_t sum_s = 0;
- uint64_t sum_d = 0;
- uint64_t sum_s2 = 0;
- uint64_t sum_d2 = 0;
- uint64_t sum_sd = 0;
- uint64_t dist = 0;
-
- int i, j;
- for (i = 0; i < 8; i++) {
- for (j = 0; j < 8; j++) {
- sum_s += src[i * sstride + j];
- sum_d += dst[i * dstride + j];
- sum_s2 += src[i * sstride + j] * src[i * sstride + j];
- sum_d2 += dst[i * dstride + j] * dst[i * dstride + j];
- sum_sd += src[i * sstride + j] * dst[i * dstride + j];
- }
- }
- /* Compute the variance -- the calculation cannot go negative. */
- svar = sum_s2 - ((sum_s * sum_s + 32) >> 6);
- dvar = sum_d2 - ((sum_d * sum_d + 32) >> 6);
-
- // Tuning of jm's original dering distortion metric used in CDEF tool,
- // suggested by jm
- const uint64_t a = 4;
- const uint64_t b = 2;
- const uint64_t c1 = (400 * a << 2 * coeff_shift);
- const uint64_t c2 = (b * 20000 * a * a << 4 * coeff_shift);
-
- dist = (uint64_t)floor(.5 + (sum_d2 + sum_s2 - 2 * sum_sd) * .5 *
- (svar + dvar + c1) /
- (sqrt(svar * (double)dvar + c2)));
-
- // Calibrate dist to have similar rate for the same QP with MSE only
- // distortion (as in master branch)
- dist = (uint64_t)((float)dist * 0.75);
-
- return dist;
-}
-
-static int od_compute_var_4x4(uint16_t *x, int stride) {
- int sum;
- int s2;
- int i;
- sum = 0;
- s2 = 0;
- for (i = 0; i < 4; i++) {
- int j;
- for (j = 0; j < 4; j++) {
- int t;
-
- t = x[i * stride + j];
- sum += t;
- s2 += t * t;
- }
- }
-
- return (s2 - (sum * sum >> 4)) >> 4;
-}
-
-/* OD_DIST_LP_MID controls the frequency weighting filter used for computing
- the distortion. For a value X, the filter is [1 X 1]/(X + 2) and
- is applied both horizontally and vertically. For X=5, the filter is
- a good approximation for the OD_QM8_Q4_HVS quantization matrix. */
-#define OD_DIST_LP_MID (5)
-#define OD_DIST_LP_NORM (OD_DIST_LP_MID + 2)
-
-static double od_compute_dist_8x8(int use_activity_masking, uint16_t *x,
- uint16_t *y, od_coeff *e_lp, int stride) {
- double sum;
- int min_var;
- double mean_var;
- double var_stat;
- double activity;
- double calibration;
- int i;
- int j;
- double vardist;
-
- vardist = 0;
-
-#if 1
- min_var = INT_MAX;
- mean_var = 0;
- for (i = 0; i < 3; i++) {
- for (j = 0; j < 3; j++) {
- int varx;
- int vary;
- varx = od_compute_var_4x4(x + 2 * i * stride + 2 * j, stride);
- vary = od_compute_var_4x4(y + 2 * i * stride + 2 * j, stride);
- min_var = OD_MINI(min_var, varx);
- mean_var += 1. / (1 + varx);
- /* The cast to (double) is to avoid an overflow before the sqrt.*/
- vardist += varx - 2 * sqrt(varx * (double)vary) + vary;
- }
- }
- /* We use a different variance statistic depending on whether activity
- masking is used, since the harmonic mean appeared slightly worse with
- masking off. The calibration constant just ensures that we preserve the
- rate compared to activity=1. */
- if (use_activity_masking) {
- calibration = 1.95;
- var_stat = 9. / mean_var;
- } else {
- calibration = 1.62;
- var_stat = min_var;
- }
- /* 1.62 is a calibration constant, 0.25 is a noise floor and 1/6 is the
- activity masking constant. */
- activity = calibration * pow(.25 + var_stat, -1. / 6);
-#else
- activity = 1;
-#endif // 1
- sum = 0;
- for (i = 0; i < 8; i++) {
- for (j = 0; j < 8; j++)
- sum += e_lp[i * stride + j] * (double)e_lp[i * stride + j];
- }
- /* Normalize the filter to unit DC response. */
- sum *= 1. / (OD_DIST_LP_NORM * OD_DIST_LP_NORM * OD_DIST_LP_NORM *
- OD_DIST_LP_NORM);
- return activity * activity * (sum + vardist);
-}
-
-// Note : Inputs x and y are in a pixel domain
-static double od_compute_dist_common(int activity_masking, uint16_t *x,
- uint16_t *y, int bsize_w, int bsize_h,
- int qindex, od_coeff *tmp,
- od_coeff *e_lp) {
- int i, j;
- double sum = 0;
- const int mid = OD_DIST_LP_MID;
-
- for (j = 0; j < bsize_w; j++) {
- e_lp[j] = mid * tmp[j] + 2 * tmp[bsize_w + j];
- e_lp[(bsize_h - 1) * bsize_w + j] = mid * tmp[(bsize_h - 1) * bsize_w + j] +
- 2 * tmp[(bsize_h - 2) * bsize_w + j];
- }
- for (i = 1; i < bsize_h - 1; i++) {
- for (j = 0; j < bsize_w; j++) {
- e_lp[i * bsize_w + j] = mid * tmp[i * bsize_w + j] +
- tmp[(i - 1) * bsize_w + j] +
- tmp[(i + 1) * bsize_w + j];
- }
- }
- for (i = 0; i < bsize_h; i += 8) {
- for (j = 0; j < bsize_w; j += 8) {
- sum += od_compute_dist_8x8(activity_masking, &x[i * bsize_w + j],
- &y[i * bsize_w + j], &e_lp[i * bsize_w + j],
- bsize_w);
- }
- }
- /* Scale according to linear regression against SSE, for 8x8 blocks. */
- if (activity_masking) {
- sum *= 2.2 + (1.7 - 2.2) * (qindex - 99) / (210 - 99) +
- (qindex < 99 ? 2.5 * (qindex - 99) / 99 * (qindex - 99) / 99 : 0);
- } else {
- sum *= qindex >= 128
- ? 1.4 + (0.9 - 1.4) * (qindex - 128) / (209 - 128)
- : qindex <= 43 ? 1.5 + (2.0 - 1.5) * (qindex - 43) / (16 - 43)
- : 1.5 + (1.4 - 1.5) * (qindex - 43) / (128 - 43);
- }
-
- return sum;
-}
-
-static double od_compute_dist(uint16_t *x, uint16_t *y, int bsize_w,
- int bsize_h, int qindex) {
- assert(bsize_w >= 8 && bsize_h >= 8);
-
- int activity_masking = 0;
-
- int i, j;
- DECLARE_ALIGNED(16, od_coeff, e[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(16, od_coeff, tmp[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(16, od_coeff, e_lp[MAX_SB_SQUARE]);
- for (i = 0; i < bsize_h; i++) {
- for (j = 0; j < bsize_w; j++) {
- e[i * bsize_w + j] = x[i * bsize_w + j] - y[i * bsize_w + j];
- }
- }
- int mid = OD_DIST_LP_MID;
- for (i = 0; i < bsize_h; i++) {
- tmp[i * bsize_w] = mid * e[i * bsize_w] + 2 * e[i * bsize_w + 1];
- tmp[i * bsize_w + bsize_w - 1] =
- mid * e[i * bsize_w + bsize_w - 1] + 2 * e[i * bsize_w + bsize_w - 2];
- for (j = 1; j < bsize_w - 1; j++) {
- tmp[i * bsize_w + j] = mid * e[i * bsize_w + j] + e[i * bsize_w + j - 1] +
- e[i * bsize_w + j + 1];
- }
- }
- return od_compute_dist_common(activity_masking, x, y, bsize_w, bsize_h,
- qindex, tmp, e_lp);
-}
-
-static double od_compute_dist_diff(uint16_t *x, int16_t *e, int bsize_w,
- int bsize_h, int qindex) {
- assert(bsize_w >= 8 && bsize_h >= 8);
-
- int activity_masking = 0;
-
- DECLARE_ALIGNED(16, uint16_t, y[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(16, od_coeff, tmp[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(16, od_coeff, e_lp[MAX_SB_SQUARE]);
- int i, j;
- for (i = 0; i < bsize_h; i++) {
- for (j = 0; j < bsize_w; j++) {
- y[i * bsize_w + j] = x[i * bsize_w + j] - e[i * bsize_w + j];
- }
- }
- int mid = OD_DIST_LP_MID;
- for (i = 0; i < bsize_h; i++) {
- tmp[i * bsize_w] = mid * e[i * bsize_w] + 2 * e[i * bsize_w + 1];
- tmp[i * bsize_w + bsize_w - 1] =
- mid * e[i * bsize_w + bsize_w - 1] + 2 * e[i * bsize_w + bsize_w - 2];
- for (j = 1; j < bsize_w - 1; j++) {
- tmp[i * bsize_w + j] = mid * e[i * bsize_w + j] + e[i * bsize_w + j - 1] +
- e[i * bsize_w + j + 1];
- }
- }
- return od_compute_dist_common(activity_masking, x, y, bsize_w, bsize_h,
- qindex, tmp, e_lp);
-}
-
-int64_t av1_dist_8x8(const AV1_COMP *const cpi, const MACROBLOCK *x,
- const uint8_t *src, int src_stride, const uint8_t *dst,
- int dst_stride, const BLOCK_SIZE tx_bsize, int bsw,
- int bsh, int visible_w, int visible_h, int qindex) {
- int64_t d = 0;
- int i, j;
- const MACROBLOCKD *xd = &x->e_mbd;
-
- DECLARE_ALIGNED(16, uint16_t, orig[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(16, uint16_t, rec[MAX_SB_SQUARE]);
-
- assert(bsw >= 8);
- assert(bsh >= 8);
- assert((bsw & 0x07) == 0);
- assert((bsh & 0x07) == 0);
-
- if (x->tune_metric == AOM_TUNE_CDEF_DIST ||
- x->tune_metric == AOM_TUNE_DAALA_DIST) {
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- for (j = 0; j < bsh; j++)
- for (i = 0; i < bsw; i++)
- orig[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
-
- if ((bsw == visible_w) && (bsh == visible_h)) {
- for (j = 0; j < bsh; j++)
- for (i = 0; i < bsw; i++)
- rec[j * bsw + i] = CONVERT_TO_SHORTPTR(dst)[j * dst_stride + i];
- } else {
- for (j = 0; j < visible_h; j++)
- for (i = 0; i < visible_w; i++)
- rec[j * bsw + i] = CONVERT_TO_SHORTPTR(dst)[j * dst_stride + i];
-
- if (visible_w < bsw) {
- for (j = 0; j < bsh; j++)
- for (i = visible_w; i < bsw; i++)
- rec[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
- }
-
- if (visible_h < bsh) {
- for (j = visible_h; j < bsh; j++)
- for (i = 0; i < bsw; i++)
- rec[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
- }
- }
- } else {
- for (j = 0; j < bsh; j++)
- for (i = 0; i < bsw; i++) orig[j * bsw + i] = src[j * src_stride + i];
-
- if ((bsw == visible_w) && (bsh == visible_h)) {
- for (j = 0; j < bsh; j++)
- for (i = 0; i < bsw; i++) rec[j * bsw + i] = dst[j * dst_stride + i];
- } else {
- for (j = 0; j < visible_h; j++)
- for (i = 0; i < visible_w; i++)
- rec[j * bsw + i] = dst[j * dst_stride + i];
-
- if (visible_w < bsw) {
- for (j = 0; j < bsh; j++)
- for (i = visible_w; i < bsw; i++)
- rec[j * bsw + i] = src[j * src_stride + i];
- }
-
- if (visible_h < bsh) {
- for (j = visible_h; j < bsh; j++)
- for (i = 0; i < bsw; i++)
- rec[j * bsw + i] = src[j * src_stride + i];
- }
- }
- }
- }
-
- if (x->tune_metric == AOM_TUNE_DAALA_DIST) {
- d = (int64_t)od_compute_dist(orig, rec, bsw, bsh, qindex);
- } else if (x->tune_metric == AOM_TUNE_CDEF_DIST) {
- int coeff_shift = AOMMAX(xd->bd - 8, 0);
-
- for (i = 0; i < bsh; i += 8) {
- for (j = 0; j < bsw; j += 8) {
- d += cdef_dist_8x8_16bit(&rec[i * bsw + j], bsw, &orig[i * bsw + j],
- bsw, coeff_shift);
- }
- }
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
- d = ((uint64_t)d) >> 2 * coeff_shift;
- } else {
- // Otherwise, MSE by default
- d = pixel_dist_visible_only(cpi, x, src, src_stride, dst, dst_stride,
- tx_bsize, bsh, bsw, visible_h, visible_w);
- }
-
- return d;
-}
-
-static int64_t dist_8x8_diff(const MACROBLOCK *x, const uint8_t *src,
- int src_stride, const int16_t *diff,
- int diff_stride, int bsw, int bsh, int visible_w,
- int visible_h, int qindex) {
- int64_t d = 0;
- int i, j;
- const MACROBLOCKD *xd = &x->e_mbd;
-
- DECLARE_ALIGNED(16, uint16_t, orig[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(16, int16_t, diff16[MAX_SB_SQUARE]);
-
- assert(bsw >= 8);
- assert(bsh >= 8);
- assert((bsw & 0x07) == 0);
- assert((bsh & 0x07) == 0);
-
- if (x->tune_metric == AOM_TUNE_CDEF_DIST ||
- x->tune_metric == AOM_TUNE_DAALA_DIST) {
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- for (j = 0; j < bsh; j++)
- for (i = 0; i < bsw; i++)
- orig[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
- } else {
- for (j = 0; j < bsh; j++)
- for (i = 0; i < bsw; i++) orig[j * bsw + i] = src[j * src_stride + i];
- }
-
- if ((bsw == visible_w) && (bsh == visible_h)) {
- for (j = 0; j < bsh; j++)
- for (i = 0; i < bsw; i++)
- diff16[j * bsw + i] = diff[j * diff_stride + i];
- } else {
- for (j = 0; j < visible_h; j++)
- for (i = 0; i < visible_w; i++)
- diff16[j * bsw + i] = diff[j * diff_stride + i];
-
- if (visible_w < bsw) {
- for (j = 0; j < bsh; j++)
- for (i = visible_w; i < bsw; i++) diff16[j * bsw + i] = 0;
- }
-
- if (visible_h < bsh) {
- for (j = visible_h; j < bsh; j++)
- for (i = 0; i < bsw; i++) diff16[j * bsw + i] = 0;
- }
- }
- }
-
- if (x->tune_metric == AOM_TUNE_DAALA_DIST) {
- d = (int64_t)od_compute_dist_diff(orig, diff16, bsw, bsh, qindex);
- } else if (x->tune_metric == AOM_TUNE_CDEF_DIST) {
- int coeff_shift = AOMMAX(xd->bd - 8, 0);
- DECLARE_ALIGNED(16, uint16_t, dst16[MAX_SB_SQUARE]);
-
- for (i = 0; i < bsh; i++) {
- for (j = 0; j < bsw; j++) {
- dst16[i * bsw + j] = orig[i * bsw + j] - diff16[i * bsw + j];
- }
- }
-
- for (i = 0; i < bsh; i += 8) {
- for (j = 0; j < bsw; j += 8) {
- d += cdef_dist_8x8_16bit(&dst16[i * bsw + j], bsw, &orig[i * bsw + j],
- bsw, coeff_shift);
- }
- }
- // Don't scale 'd' for HBD since it will be done by caller side for diff
- // input
- } else {
- // Otherwise, MSE by default
- d = aom_sum_squares_2d_i16(diff, diff_stride, visible_w, visible_h);
- }
-
- return d;
-}
-#endif // CONFIG_DIST_8X8
-
-static void get_energy_distribution_fine(const AV1_COMP *cpi, BLOCK_SIZE bsize,
- const uint8_t *src, int src_stride,
- const uint8_t *dst, int dst_stride,
- int need_4th, double *hordist,
- double *verdist) {
- const int bw = block_size_wide[bsize];
- const int bh = block_size_high[bsize];
- unsigned int esq[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
-
- if (bsize < BLOCK_16X16 || (bsize >= BLOCK_4X16 && bsize <= BLOCK_32X8)) {
- // Special cases: calculate 'esq' values manually, as we don't have 'vf'
- // functions for the 16 (very small) sub-blocks of this block.
- const int w_shift = (bw == 4) ? 0 : (bw == 8) ? 1 : (bw == 16) ? 2 : 3;
- const int h_shift = (bh == 4) ? 0 : (bh == 8) ? 1 : (bh == 16) ? 2 : 3;
- assert(bw <= 32);
- assert(bh <= 32);
- assert(((bw - 1) >> w_shift) + (((bh - 1) >> h_shift) << 2) == 15);
- if (cpi->common.seq_params.use_highbitdepth) {
- const uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
- const uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
- for (int i = 0; i < bh; ++i)
- for (int j = 0; j < bw; ++j) {
- const int index = (j >> w_shift) + ((i >> h_shift) << 2);
- esq[index] +=
- (src16[j + i * src_stride] - dst16[j + i * dst_stride]) *
- (src16[j + i * src_stride] - dst16[j + i * dst_stride]);
- }
- } else {
- for (int i = 0; i < bh; ++i)
- for (int j = 0; j < bw; ++j) {
- const int index = (j >> w_shift) + ((i >> h_shift) << 2);
- esq[index] += (src[j + i * src_stride] - dst[j + i * dst_stride]) *
- (src[j + i * src_stride] - dst[j + i * dst_stride]);
- }
- }
- } else { // Calculate 'esq' values using 'vf' functions on the 16 sub-blocks.
- const int f_index =
- (bsize < BLOCK_SIZES) ? bsize - BLOCK_16X16 : bsize - BLOCK_8X16;
- assert(f_index >= 0 && f_index < BLOCK_SIZES_ALL);
- const BLOCK_SIZE subsize = (BLOCK_SIZE)f_index;
- assert(block_size_wide[bsize] == 4 * block_size_wide[subsize]);
- assert(block_size_high[bsize] == 4 * block_size_high[subsize]);
- cpi->fn_ptr[subsize].vf(src, src_stride, dst, dst_stride, &esq[0]);
- cpi->fn_ptr[subsize].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
- &esq[1]);
- cpi->fn_ptr[subsize].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
- &esq[2]);
- cpi->fn_ptr[subsize].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
- dst_stride, &esq[3]);
- src += bh / 4 * src_stride;
- dst += bh / 4 * dst_stride;
-
- cpi->fn_ptr[subsize].vf(src, src_stride, dst, dst_stride, &esq[4]);
- cpi->fn_ptr[subsize].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
- &esq[5]);
- cpi->fn_ptr[subsize].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
- &esq[6]);
- cpi->fn_ptr[subsize].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
- dst_stride, &esq[7]);
- src += bh / 4 * src_stride;
- dst += bh / 4 * dst_stride;
-
- cpi->fn_ptr[subsize].vf(src, src_stride, dst, dst_stride, &esq[8]);
- cpi->fn_ptr[subsize].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
- &esq[9]);
- cpi->fn_ptr[subsize].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
- &esq[10]);
- cpi->fn_ptr[subsize].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
- dst_stride, &esq[11]);
- src += bh / 4 * src_stride;
- dst += bh / 4 * dst_stride;
-
- cpi->fn_ptr[subsize].vf(src, src_stride, dst, dst_stride, &esq[12]);
- cpi->fn_ptr[subsize].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
- &esq[13]);
- cpi->fn_ptr[subsize].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
- &esq[14]);
- cpi->fn_ptr[subsize].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
- dst_stride, &esq[15]);
- }
-
- double total = (double)esq[0] + esq[1] + esq[2] + esq[3] + esq[4] + esq[5] +
- esq[6] + esq[7] + esq[8] + esq[9] + esq[10] + esq[11] +
- esq[12] + esq[13] + esq[14] + esq[15];
- if (total > 0) {
- const double e_recip = 1.0 / total;
- hordist[0] = ((double)esq[0] + esq[4] + esq[8] + esq[12]) * e_recip;
- hordist[1] = ((double)esq[1] + esq[5] + esq[9] + esq[13]) * e_recip;
- hordist[2] = ((double)esq[2] + esq[6] + esq[10] + esq[14]) * e_recip;
- if (need_4th) {
- hordist[3] = ((double)esq[3] + esq[7] + esq[11] + esq[15]) * e_recip;
- }
- verdist[0] = ((double)esq[0] + esq[1] + esq[2] + esq[3]) * e_recip;
- verdist[1] = ((double)esq[4] + esq[5] + esq[6] + esq[7]) * e_recip;
- verdist[2] = ((double)esq[8] + esq[9] + esq[10] + esq[11]) * e_recip;
- if (need_4th) {
- verdist[3] = ((double)esq[12] + esq[13] + esq[14] + esq[15]) * e_recip;
- }
- } else {
- hordist[0] = verdist[0] = 0.25;
- hordist[1] = verdist[1] = 0.25;
- hordist[2] = verdist[2] = 0.25;
- if (need_4th) {
- hordist[3] = verdist[3] = 0.25;
- }
- }
-}
-
-static int adst_vs_flipadst(const AV1_COMP *cpi, BLOCK_SIZE bsize,
- const uint8_t *src, int src_stride,
- const uint8_t *dst, int dst_stride) {
- int prune_bitmask = 0;
- double svm_proj_h = 0, svm_proj_v = 0;
- double hdist[3] = { 0, 0, 0 }, vdist[3] = { 0, 0, 0 };
- get_energy_distribution_fine(cpi, bsize, src, src_stride, dst, dst_stride, 0,
- hdist, vdist);
-
- svm_proj_v = vdist[0] * ADST_FLIP_SVM[0] + vdist[1] * ADST_FLIP_SVM[1] +
- vdist[2] * ADST_FLIP_SVM[2] + ADST_FLIP_SVM[3];
- svm_proj_h = hdist[0] * ADST_FLIP_SVM[4] + hdist[1] * ADST_FLIP_SVM[5] +
- hdist[2] * ADST_FLIP_SVM[6] + ADST_FLIP_SVM[7];
- if (svm_proj_v > FAST_EXT_TX_EDST_MID + FAST_EXT_TX_EDST_MARGIN)
- prune_bitmask |= 1 << FLIPADST_1D;
- else if (svm_proj_v < FAST_EXT_TX_EDST_MID - FAST_EXT_TX_EDST_MARGIN)
- prune_bitmask |= 1 << ADST_1D;
-
- if (svm_proj_h > FAST_EXT_TX_EDST_MID + FAST_EXT_TX_EDST_MARGIN)
- prune_bitmask |= 1 << (FLIPADST_1D + 8);
- else if (svm_proj_h < FAST_EXT_TX_EDST_MID - FAST_EXT_TX_EDST_MARGIN)
- prune_bitmask |= 1 << (ADST_1D + 8);
-
- return prune_bitmask;
-}
-
-static void get_horver_correlation(const int16_t *diff, int stride, int w,
- int h, double *hcorr, double *vcorr) {
- // Returns hor/ver correlation coefficient
- const int num = (h - 1) * (w - 1);
- double num_r;
- int i, j;
- int64_t xy_sum = 0, xz_sum = 0;
- int64_t x_sum = 0, y_sum = 0, z_sum = 0;
- int64_t x2_sum = 0, y2_sum = 0, z2_sum = 0;
- double x_var_n, y_var_n, z_var_n, xy_var_n, xz_var_n;
- *hcorr = *vcorr = 1;
-
- assert(num > 0);
- num_r = 1.0 / num;
- for (i = 1; i < h; ++i) {
- for (j = 1; j < w; ++j) {
- const int16_t x = diff[i * stride + j];
- const int16_t y = diff[i * stride + j - 1];
- const int16_t z = diff[(i - 1) * stride + j];
- xy_sum += x * y;
- xz_sum += x * z;
- x_sum += x;
- y_sum += y;
- z_sum += z;
- x2_sum += x * x;
- y2_sum += y * y;
- z2_sum += z * z;
- }
- }
- x_var_n = x2_sum - (x_sum * x_sum) * num_r;
- y_var_n = y2_sum - (y_sum * y_sum) * num_r;
- z_var_n = z2_sum - (z_sum * z_sum) * num_r;
- xy_var_n = xy_sum - (x_sum * y_sum) * num_r;
- xz_var_n = xz_sum - (x_sum * z_sum) * num_r;
- if (x_var_n > 0 && y_var_n > 0) {
- *hcorr = xy_var_n / sqrt(x_var_n * y_var_n);
- *hcorr = *hcorr < 0 ? 0 : *hcorr;
- }
- if (x_var_n > 0 && z_var_n > 0) {
- *vcorr = xz_var_n / sqrt(x_var_n * z_var_n);
- *vcorr = *vcorr < 0 ? 0 : *vcorr;
- }
-}
-
-static int dct_vs_idtx(const int16_t *diff, int stride, int w, int h) {
- double hcorr, vcorr;
- int prune_bitmask = 0;
- get_horver_correlation(diff, stride, w, h, &hcorr, &vcorr);
-
- if (vcorr > FAST_EXT_TX_CORR_MID + FAST_EXT_TX_CORR_MARGIN)
- prune_bitmask |= 1 << IDTX_1D;
- else if (vcorr < FAST_EXT_TX_CORR_MID - FAST_EXT_TX_CORR_MARGIN)
- prune_bitmask |= 1 << DCT_1D;
-
- if (hcorr > FAST_EXT_TX_CORR_MID + FAST_EXT_TX_CORR_MARGIN)
- prune_bitmask |= 1 << (IDTX_1D + 8);
- else if (hcorr < FAST_EXT_TX_CORR_MID - FAST_EXT_TX_CORR_MARGIN)
- prune_bitmask |= 1 << (DCT_1D + 8);
- return prune_bitmask;
-}
-
-// Performance drop: 0.5%, Speed improvement: 24%
-static int prune_two_for_sby(const AV1_COMP *cpi, BLOCK_SIZE bsize,
- MACROBLOCK *x, const MACROBLOCKD *xd,
- int adst_flipadst, int dct_idtx) {
- int prune = 0;
-
- if (adst_flipadst) {
- const struct macroblock_plane *const p = &x->plane[0];
- const struct macroblockd_plane *const pd = &xd->plane[0];
- prune |= adst_vs_flipadst(cpi, bsize, p->src.buf, p->src.stride,
- pd->dst.buf, pd->dst.stride);
- }
- if (dct_idtx) {
- av1_subtract_plane(x, bsize, 0);
- const struct macroblock_plane *const p = &x->plane[0];
- const int bw = block_size_wide[bsize];
- const int bh = block_size_high[bsize];
- prune |= dct_vs_idtx(p->src_diff, bw, bw, bh);
- }
-
- return prune;
-}
-
-// Performance drop: 0.3%, Speed improvement: 5%
-static int prune_one_for_sby(const AV1_COMP *cpi, BLOCK_SIZE bsize,
- const MACROBLOCK *x, const MACROBLOCKD *xd) {
- const struct macroblock_plane *const p = &x->plane[0];
- const struct macroblockd_plane *const pd = &xd->plane[0];
- return adst_vs_flipadst(cpi, bsize, p->src.buf, p->src.stride, pd->dst.buf,
- pd->dst.stride);
-}
-
-// 1D Transforms used in inter set, this needs to be changed if
-// ext_tx_used_inter is changed
-static const int ext_tx_used_inter_1D[EXT_TX_SETS_INTER][TX_TYPES_1D] = {
- { 1, 0, 0, 0 },
- { 1, 1, 1, 1 },
- { 1, 1, 1, 1 },
- { 1, 0, 0, 1 },
-};
-
-static void get_energy_distribution_finer(const int16_t *diff, int stride,
- int bw, int bh, float *hordist,
- float *verdist) {
- // First compute downscaled block energy values (esq); downscale factors
- // are defined by w_shift and h_shift.
- unsigned int esq[256];
- const int w_shift = bw <= 8 ? 0 : 1;
- const int h_shift = bh <= 8 ? 0 : 1;
- const int esq_w = bw >> w_shift;
- const int esq_h = bh >> h_shift;
- const int esq_sz = esq_w * esq_h;
- int i, j;
- memset(esq, 0, esq_sz * sizeof(esq[0]));
- if (w_shift) {
- for (i = 0; i < bh; i++) {
- unsigned int *cur_esq_row = esq + (i >> h_shift) * esq_w;
- const int16_t *cur_diff_row = diff + i * stride;
- for (j = 0; j < bw; j += 2) {
- cur_esq_row[j >> 1] += (cur_diff_row[j] * cur_diff_row[j] +
- cur_diff_row[j + 1] * cur_diff_row[j + 1]);
- }
- }
- } else {
- for (i = 0; i < bh; i++) {
- unsigned int *cur_esq_row = esq + (i >> h_shift) * esq_w;
- const int16_t *cur_diff_row = diff + i * stride;
- for (j = 0; j < bw; j++) {
- cur_esq_row[j] += cur_diff_row[j] * cur_diff_row[j];
- }
- }
- }
-
- uint64_t total = 0;
- for (i = 0; i < esq_sz; i++) total += esq[i];
-
- // Output hordist and verdist arrays are normalized 1D projections of esq
- if (total == 0) {
- float hor_val = 1.0f / esq_w;
- for (j = 0; j < esq_w - 1; j++) hordist[j] = hor_val;
- float ver_val = 1.0f / esq_h;
- for (i = 0; i < esq_h - 1; i++) verdist[i] = ver_val;
- return;
- }
-
- const float e_recip = 1.0f / (float)total;
- memset(hordist, 0, (esq_w - 1) * sizeof(hordist[0]));
- memset(verdist, 0, (esq_h - 1) * sizeof(verdist[0]));
- const unsigned int *cur_esq_row;
- for (i = 0; i < esq_h - 1; i++) {
- cur_esq_row = esq + i * esq_w;
- for (j = 0; j < esq_w - 1; j++) {
- hordist[j] += (float)cur_esq_row[j];
- verdist[i] += (float)cur_esq_row[j];
- }
- verdist[i] += (float)cur_esq_row[j];
- }
- cur_esq_row = esq + i * esq_w;
- for (j = 0; j < esq_w - 1; j++) hordist[j] += (float)cur_esq_row[j];
-
- for (j = 0; j < esq_w - 1; j++) hordist[j] *= e_recip;
- for (i = 0; i < esq_h - 1; i++) verdist[i] *= e_recip;
-}
-
-// Similar to get_horver_correlation, but also takes into account first
-// row/column, when computing horizontal/vertical correlation.
-static void get_horver_correlation_full(const int16_t *diff, int stride, int w,
- int h, float *hcorr, float *vcorr) {
- const float num_hor = (float)(h * (w - 1));
- const float num_ver = (float)((h - 1) * w);
- int i, j;
-
- // The following notation is used:
- // x - current pixel
- // y - left neighbor pixel
- // z - top neighbor pixel
- int64_t xy_sum = 0, xz_sum = 0;
- int64_t xhor_sum = 0, xver_sum = 0, y_sum = 0, z_sum = 0;
- int64_t x2hor_sum = 0, x2ver_sum = 0, y2_sum = 0, z2_sum = 0;
-
- int16_t x, y, z;
- for (j = 1; j < w; ++j) {
- x = diff[j];
- y = diff[j - 1];
- xy_sum += x * y;
- xhor_sum += x;
- y_sum += y;
- x2hor_sum += x * x;
- y2_sum += y * y;
- }
- for (i = 1; i < h; ++i) {
- x = diff[i * stride];
- z = diff[(i - 1) * stride];
- xz_sum += x * z;
- xver_sum += x;
- z_sum += z;
- x2ver_sum += x * x;
- z2_sum += z * z;
- for (j = 1; j < w; ++j) {
- x = diff[i * stride + j];
- y = diff[i * stride + j - 1];
- z = diff[(i - 1) * stride + j];
- xy_sum += x * y;
- xz_sum += x * z;
- xhor_sum += x;
- xver_sum += x;
- y_sum += y;
- z_sum += z;
- x2hor_sum += x * x;
- x2ver_sum += x * x;
- y2_sum += y * y;
- z2_sum += z * z;
- }
- }
- const float xhor_var_n = x2hor_sum - (xhor_sum * xhor_sum) / num_hor;
- const float y_var_n = y2_sum - (y_sum * y_sum) / num_hor;
- const float xy_var_n = xy_sum - (xhor_sum * y_sum) / num_hor;
- const float xver_var_n = x2ver_sum - (xver_sum * xver_sum) / num_ver;
- const float z_var_n = z2_sum - (z_sum * z_sum) / num_ver;
- const float xz_var_n = xz_sum - (xver_sum * z_sum) / num_ver;
-
- *hcorr = *vcorr = 1;
- if (xhor_var_n > 0 && y_var_n > 0) {
- *hcorr = xy_var_n / sqrtf(xhor_var_n * y_var_n);
- *hcorr = *hcorr < 0 ? 0 : *hcorr;
- }
- if (xver_var_n > 0 && z_var_n > 0) {
- *vcorr = xz_var_n / sqrtf(xver_var_n * z_var_n);
- *vcorr = *vcorr < 0 ? 0 : *vcorr;
- }
-}
-
-// Transforms raw scores into a probability distribution across 16 TX types
-static void score_2D_transform_pow8(float *scores_2D, float shift) {
- float sum = 0.0f;
- int i;
-
- for (i = 0; i < 16; i++) {
- float v, v2, v4;
- v = AOMMAX(scores_2D[i] + shift, 0.0f);
- v2 = v * v;
- v4 = v2 * v2;
- scores_2D[i] = v4 * v4;
- sum += scores_2D[i];
- }
- for (i = 0; i < 16; i++) scores_2D[i] /= sum;
-}
-
-// These thresholds were calibrated to provide a certain number of TX types
-// pruned by the model on average, i.e. selecting a threshold with index i
-// will lead to pruning i+1 TX types on average
-static const float *prune_2D_adaptive_thresholds[] = {
- // TX_4X4
- (float[]){ 0.00549f, 0.01306f, 0.02039f, 0.02747f, 0.03406f, 0.04065f,
- 0.04724f, 0.05383f, 0.06067f, 0.06799f, 0.07605f, 0.08533f,
- 0.09778f, 0.11780f },
- // TX_8X8
- (float[]){ 0.00037f, 0.00183f, 0.00525f, 0.01038f, 0.01697f, 0.02502f,
- 0.03381f, 0.04333f, 0.05286f, 0.06287f, 0.07434f, 0.08850f,
- 0.10803f, 0.14124f },
- // TX_16X16
- (float[]){ 0.01404f, 0.02820f, 0.04211f, 0.05164f, 0.05798f, 0.06335f,
- 0.06897f, 0.07629f, 0.08875f, 0.11169f },
- // TX_32X32
- NULL,
- // TX_64X64
- NULL,
- // TX_4X8
- (float[]){ 0.00183f, 0.00745f, 0.01428f, 0.02185f, 0.02966f, 0.03723f,
- 0.04456f, 0.05188f, 0.05920f, 0.06702f, 0.07605f, 0.08704f,
- 0.10168f, 0.12585f },
- // TX_8X4
- (float[]){ 0.00085f, 0.00476f, 0.01135f, 0.01892f, 0.02698f, 0.03528f,
- 0.04358f, 0.05164f, 0.05994f, 0.06848f, 0.07849f, 0.09021f,
- 0.10583f, 0.13123f },
- // TX_8X16
- (float[]){ 0.00037f, 0.00232f, 0.00671f, 0.01257f, 0.01965f, 0.02722f,
- 0.03552f, 0.04382f, 0.05237f, 0.06189f, 0.07336f, 0.08728f,
- 0.10730f, 0.14221f },
- // TX_16X8
- (float[]){ 0.00061f, 0.00330f, 0.00818f, 0.01453f, 0.02185f, 0.02966f,
- 0.03772f, 0.04578f, 0.05383f, 0.06262f, 0.07288f, 0.08582f,
- 0.10339f, 0.13464f },
- // TX_16X32
- NULL,
- // TX_32X16
- NULL,
- // TX_32X64
- NULL,
- // TX_64X32
- NULL,
- // TX_4X16
- (float[]){ 0.00232f, 0.00671f, 0.01257f, 0.01941f, 0.02673f, 0.03430f,
- 0.04211f, 0.04968f, 0.05750f, 0.06580f, 0.07507f, 0.08655f,
- 0.10242f, 0.12878f },
- // TX_16X4
- (float[]){ 0.00110f, 0.00525f, 0.01208f, 0.01990f, 0.02795f, 0.03601f,
- 0.04358f, 0.05115f, 0.05896f, 0.06702f, 0.07629f, 0.08752f,
- 0.10217f, 0.12610f },
- // TX_8X32
- NULL,
- // TX_32X8
- NULL,
- // TX_16X64
- NULL,
- // TX_64X16
- NULL,
-};
-
-static uint16_t prune_tx_2D(MACROBLOCK *x, BLOCK_SIZE bsize, TX_SIZE tx_size,
- int blk_row, int blk_col, TxSetType tx_set_type,
- TX_TYPE_PRUNE_MODE prune_mode) {
- static const int tx_type_table_2D[16] = {
- DCT_DCT, DCT_ADST, DCT_FLIPADST, V_DCT,
- ADST_DCT, ADST_ADST, ADST_FLIPADST, V_ADST,
- FLIPADST_DCT, FLIPADST_ADST, FLIPADST_FLIPADST, V_FLIPADST,
- H_DCT, H_ADST, H_FLIPADST, IDTX
- };
- if (tx_set_type != EXT_TX_SET_ALL16 &&
- tx_set_type != EXT_TX_SET_DTT9_IDTX_1DDCT)
- return 0;
- const NN_CONFIG *nn_config_hor = av1_tx_type_nnconfig_map_hor[tx_size];
- const NN_CONFIG *nn_config_ver = av1_tx_type_nnconfig_map_ver[tx_size];
- if (!nn_config_hor || !nn_config_ver) return 0; // Model not established yet.
-
- aom_clear_system_state();
- float hfeatures[16], vfeatures[16];
- float hscores[4], vscores[4];
- float scores_2D[16];
- const int bw = tx_size_wide[tx_size];
- const int bh = tx_size_high[tx_size];
- const int hfeatures_num = bw <= 8 ? bw : bw / 2;
- const int vfeatures_num = bh <= 8 ? bh : bh / 2;
- assert(hfeatures_num <= 16);
- assert(vfeatures_num <= 16);
-
- const struct macroblock_plane *const p = &x->plane[0];
- const int diff_stride = block_size_wide[bsize];
- const int16_t *diff = p->src_diff + 4 * blk_row * diff_stride + 4 * blk_col;
- get_energy_distribution_finer(diff, diff_stride, bw, bh, hfeatures,
- vfeatures);
- get_horver_correlation_full(diff, diff_stride, bw, bh,
- &hfeatures[hfeatures_num - 1],
- &vfeatures[vfeatures_num - 1]);
- av1_nn_predict(hfeatures, nn_config_hor, hscores);
- av1_nn_predict(vfeatures, nn_config_ver, vscores);
-
- float score_2D_average = 0.0f;
- for (int i = 0; i < 4; i++) {
- float *cur_scores_2D = scores_2D + i * 4;
- cur_scores_2D[0] = vscores[i] * hscores[0];
- cur_scores_2D[1] = vscores[i] * hscores[1];
- cur_scores_2D[2] = vscores[i] * hscores[2];
- cur_scores_2D[3] = vscores[i] * hscores[3];
- score_2D_average += cur_scores_2D[0] + cur_scores_2D[1] + cur_scores_2D[2] +
- cur_scores_2D[3];
- }
- score_2D_average /= 16;
-
- const int prune_aggr_table[2][2] = { { 6, 4 }, { 10, 7 } };
- int pruning_aggressiveness = 1;
- if (tx_set_type == EXT_TX_SET_ALL16) {
- score_2D_transform_pow8(scores_2D, (10 - score_2D_average));
- pruning_aggressiveness =
- prune_aggr_table[prune_mode - PRUNE_2D_ACCURATE][0];
- } else if (tx_set_type == EXT_TX_SET_DTT9_IDTX_1DDCT) {
- score_2D_transform_pow8(scores_2D, (20 - score_2D_average));
- pruning_aggressiveness =
- prune_aggr_table[prune_mode - PRUNE_2D_ACCURATE][1];
- }
-
- // Always keep the TX type with the highest score, prune all others with
- // score below score_thresh.
- int max_score_i = 0;
- float max_score = 0.0f;
- for (int i = 0; i < 16; i++) {
- if (scores_2D[i] > max_score &&
- av1_ext_tx_used[tx_set_type][tx_type_table_2D[i]]) {
- max_score = scores_2D[i];
- max_score_i = i;
- }
- }
-
- const float score_thresh =
- prune_2D_adaptive_thresholds[tx_size][pruning_aggressiveness - 1];
-
- uint16_t prune_bitmask = 0;
- for (int i = 0; i < 16; i++) {
- if (scores_2D[i] < score_thresh && i != max_score_i)
- prune_bitmask |= (1 << tx_type_table_2D[i]);
- }
- return prune_bitmask;
-}
-
-// ((prune >> vtx_tab[tx_type]) & 1)
-static const uint16_t prune_v_mask[] = {
- 0x0000, 0x0425, 0x108a, 0x14af, 0x4150, 0x4575, 0x51da, 0x55ff,
- 0xaa00, 0xae25, 0xba8a, 0xbeaf, 0xeb50, 0xef75, 0xfbda, 0xffff,
-};
-
-// ((prune >> (htx_tab[tx_type] + 8)) & 1)
-static const uint16_t prune_h_mask[] = {
- 0x0000, 0x0813, 0x210c, 0x291f, 0x80e0, 0x88f3, 0xa1ec, 0xa9ff,
- 0x5600, 0x5e13, 0x770c, 0x7f1f, 0xd6e0, 0xdef3, 0xf7ec, 0xffff,
-};
-
-static INLINE uint16_t gen_tx_search_prune_mask(int tx_search_prune) {
- uint8_t prune_v = tx_search_prune & 0x0F;
- uint8_t prune_h = (tx_search_prune >> 8) & 0x0F;
- return (prune_v_mask[prune_v] & prune_h_mask[prune_h]);
-}
-
-static void prune_tx(const AV1_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x,
- const MACROBLOCKD *const xd, int tx_set_type) {
- x->tx_search_prune[tx_set_type] = 0;
- x->tx_split_prune_flag = 0;
- const MB_MODE_INFO *mbmi = xd->mi[0];
- if (!is_inter_block(mbmi) || cpi->sf.tx_type_search.prune_mode == NO_PRUNE ||
- x->use_default_inter_tx_type || xd->lossless[mbmi->segment_id] ||
- x->cb_partition_scan)
- return;
- int tx_set = ext_tx_set_index[1][tx_set_type];
- assert(tx_set >= 0);
- const int *tx_set_1D = ext_tx_used_inter_1D[tx_set];
- int prune = 0;
- switch (cpi->sf.tx_type_search.prune_mode) {
- case NO_PRUNE: return;
- case PRUNE_ONE:
- if (!(tx_set_1D[FLIPADST_1D] & tx_set_1D[ADST_1D])) return;
- prune = prune_one_for_sby(cpi, bsize, x, xd);
- x->tx_search_prune[tx_set_type] = gen_tx_search_prune_mask(prune);
- break;
- case PRUNE_TWO:
- if (!(tx_set_1D[FLIPADST_1D] & tx_set_1D[ADST_1D])) {
- if (!(tx_set_1D[DCT_1D] & tx_set_1D[IDTX_1D])) return;
- prune = prune_two_for_sby(cpi, bsize, x, xd, 0, 1);
- } else if (!(tx_set_1D[DCT_1D] & tx_set_1D[IDTX_1D])) {
- prune = prune_two_for_sby(cpi, bsize, x, xd, 1, 0);
- } else {
- prune = prune_two_for_sby(cpi, bsize, x, xd, 1, 1);
- }
- x->tx_search_prune[tx_set_type] = gen_tx_search_prune_mask(prune);
- break;
- case PRUNE_2D_ACCURATE:
- case PRUNE_2D_FAST: break;
- default: assert(0);
- }
-}
-
-static void model_rd_from_sse(const AV1_COMP *const cpi,
- const MACROBLOCK *const x, BLOCK_SIZE plane_bsize,
- int plane, int64_t sse, int num_samples,
- int *rate, int64_t *dist) {
- (void)num_samples;
- const MACROBLOCKD *const xd = &x->e_mbd;
- const struct macroblockd_plane *const pd = &xd->plane[plane];
- const int dequant_shift =
- (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 : 3;
-
- // Fast approximate the modelling function.
- if (cpi->sf.simple_model_rd_from_var) {
- const int64_t square_error = sse;
- int quantizer = pd->dequant_Q3[1] >> dequant_shift;
- if (quantizer < 120)
- *rate = (int)AOMMIN(
- (square_error * (280 - quantizer)) >> (16 - AV1_PROB_COST_SHIFT),
- INT_MAX);
- else
- *rate = 0;
- assert(*rate >= 0);
- *dist = (square_error * quantizer) >> 8;
- } else {
- av1_model_rd_from_var_lapndz(sse, num_pels_log2_lookup[plane_bsize],
- pd->dequant_Q3[1] >> dequant_shift, rate,
- dist);
- }
- *dist <<= 4;
-}
-
-#if CONFIG_COLLECT_INTER_MODE_RD_STATS
-static int64_t get_sse(const AV1_COMP *cpi, const MACROBLOCK *x) {
- const AV1_COMMON *cm = &cpi->common;
- const int num_planes = av1_num_planes(cm);
- const MACROBLOCKD *xd = &x->e_mbd;
- const MB_MODE_INFO *mbmi = xd->mi[0];
- int64_t total_sse = 0;
- for (int plane = 0; plane < num_planes; ++plane) {
- const struct macroblock_plane *const p = &x->plane[plane];
- const struct macroblockd_plane *const pd = &xd->plane[plane];
- const BLOCK_SIZE bs = get_plane_block_size(mbmi->sb_type, pd->subsampling_x,
- pd->subsampling_y);
- unsigned int sse;
-
- if (x->skip_chroma_rd && plane) continue;
-
- cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride,
- &sse);
- total_sse += sse;
- }
- total_sse <<= 4;
- return total_sse;
-}
-#endif
-
-static void model_rd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
- MACROBLOCK *x, MACROBLOCKD *xd, int plane_from,
- int plane_to, int mi_row, int mi_col,
- int *out_rate_sum, int64_t *out_dist_sum,
- int *skip_txfm_sb, int64_t *skip_sse_sb,
- int *plane_rate, int64_t *plane_sse,
- int64_t *plane_dist) {
- // Note our transform coeffs are 8 times an orthogonal transform.
- // Hence quantizer step is also 8 times. To get effective quantizer
- // we need to divide by 8 before sending to modeling function.
- int plane;
- (void)mi_row;
- (void)mi_col;
- const int ref = xd->mi[0]->ref_frame[0];
-
- int64_t rate_sum = 0;
- int64_t dist_sum = 0;
- int64_t total_sse = 0;
-
- for (plane = plane_from; plane <= plane_to; ++plane) {
- struct macroblock_plane *const p = &x->plane[plane];
- struct macroblockd_plane *const pd = &xd->plane[plane];
- const BLOCK_SIZE plane_bsize =
- get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
- const int bw = block_size_wide[plane_bsize];
- const int bh = block_size_high[plane_bsize];
- int64_t sse;
- int rate;
- int64_t dist;
-
- if (x->skip_chroma_rd && plane) continue;
-
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- sse = aom_highbd_sse(p->src.buf, p->src.stride, pd->dst.buf,
- pd->dst.stride, bw, bh);
- } else {
- sse = aom_sse(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, bw,
- bh);
- }
- sse = ROUND_POWER_OF_TWO(sse, (xd->bd - 8) * 2);
-
- model_rd_from_sse(cpi, x, plane_bsize, plane, sse, bw * bh, &rate, &dist);
-
- if (plane == 0) x->pred_sse[ref] = (unsigned int)AOMMIN(sse, UINT_MAX);
-
- total_sse += sse;
- rate_sum += rate;
- dist_sum += dist;
- if (plane_rate) plane_rate[plane] = rate;
- if (plane_sse) plane_sse[plane] = sse;
- if (plane_dist) plane_dist[plane] = dist;
- assert(rate_sum >= 0);
- }
-
- if (skip_txfm_sb) *skip_txfm_sb = total_sse == 0;
- if (skip_sse_sb) *skip_sse_sb = total_sse << 4;
- rate_sum = AOMMIN(rate_sum, INT_MAX);
- *out_rate_sum = (int)rate_sum;
- *out_dist_sum = dist_sum;
-}
-
-static void check_block_skip(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
- MACROBLOCK *x, MACROBLOCKD *xd, int plane_from,
- int plane_to, int *skip_txfm_sb) {
- *skip_txfm_sb = 1;
- for (int plane = plane_from; plane <= plane_to; ++plane) {
- struct macroblock_plane *const p = &x->plane[plane];
- struct macroblockd_plane *const pd = &xd->plane[plane];
- const BLOCK_SIZE bs =
- get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
- unsigned int sse;
-
- if (x->skip_chroma_rd && plane) continue;
-
- // Since fast HBD variance functions scale down sse by 4 bit, we first use
- // fast vf implementation to rule out blocks with non-zero scaled sse. Then,
- // only if the source is HBD and the scaled sse is 0, accurate sse
- // computation is applied to determine if the sse is really 0. This step is
- // necessary for HBD lossless coding.
- cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride,
- &sse);
- if (sse) {
- *skip_txfm_sb = 0;
- return;
- } else if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- uint64_t sse64 = aom_highbd_sse_odd_size(
- p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride,
- block_size_wide[bs], block_size_high[bs]);
-
- if (sse64) {
- *skip_txfm_sb = 0;
- return;
- }
- }
- }
- return;
-}
-
-int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
- intptr_t block_size, int64_t *ssz) {
- int i;
- int64_t error = 0, sqcoeff = 0;
-
- for (i = 0; i < block_size; i++) {
- const int diff = coeff[i] - dqcoeff[i];
- error += diff * diff;
- sqcoeff += coeff[i] * coeff[i];
- }
-
- *ssz = sqcoeff;
- return error;
-}
-
-int64_t av1_highbd_block_error_c(const tran_low_t *coeff,
- const tran_low_t *dqcoeff, intptr_t block_size,
- int64_t *ssz, int bd) {
- int i;
- int64_t error = 0, sqcoeff = 0;
- int shift = 2 * (bd - 8);
- int rounding = shift > 0 ? 1 << (shift - 1) : 0;
-
- for (i = 0; i < block_size; i++) {
- const int64_t diff = coeff[i] - dqcoeff[i];
- error += diff * diff;
- sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
- }
- assert(error >= 0 && sqcoeff >= 0);
- error = (error + rounding) >> shift;
- sqcoeff = (sqcoeff + rounding) >> shift;
-
- *ssz = sqcoeff;
- return error;
-}
-
-// Get transform block visible dimensions cropped to the MI units.
-static void get_txb_dimensions(const MACROBLOCKD *xd, int plane,
- BLOCK_SIZE plane_bsize, int blk_row, int blk_col,
- BLOCK_SIZE tx_bsize, int *width, int *height,
- int *visible_width, int *visible_height) {
- assert(tx_bsize <= plane_bsize);
- int txb_height = block_size_high[tx_bsize];
- int txb_width = block_size_wide[tx_bsize];
- const int block_height = block_size_high[plane_bsize];
- const int block_width = block_size_wide[plane_bsize];
- const struct macroblockd_plane *const pd = &xd->plane[plane];
- // TODO(aconverse@google.com): Investigate using crop_width/height here rather
- // than the MI size
- const int block_rows =
- (xd->mb_to_bottom_edge >= 0)
- ? block_height
- : (xd->mb_to_bottom_edge >> (3 + pd->subsampling_y)) + block_height;
- const int block_cols =
- (xd->mb_to_right_edge >= 0)
- ? block_width
- : (xd->mb_to_right_edge >> (3 + pd->subsampling_x)) + block_width;
- const int tx_unit_size = tx_size_wide_log2[0];
- if (width) *width = txb_width;
- if (height) *height = txb_height;
- *visible_width = clamp(block_cols - (blk_col << tx_unit_size), 0, txb_width);
- *visible_height =
- clamp(block_rows - (blk_row << tx_unit_size), 0, txb_height);
-}
-
-// Compute the pixel domain distortion from src and dst on all visible 4x4s in
-// the
-// transform block.
-static unsigned pixel_dist(const AV1_COMP *const cpi, const MACROBLOCK *x,
- int plane, const uint8_t *src, const int src_stride,
- const uint8_t *dst, const int dst_stride,
- int blk_row, int blk_col,
- const BLOCK_SIZE plane_bsize,
- const BLOCK_SIZE tx_bsize) {
- int txb_rows, txb_cols, visible_rows, visible_cols;
- const MACROBLOCKD *xd = &x->e_mbd;
-
- get_txb_dimensions(xd, plane, plane_bsize, blk_row, blk_col, tx_bsize,
- &txb_cols, &txb_rows, &visible_cols, &visible_rows);
- assert(visible_rows > 0);
- assert(visible_cols > 0);
-
-#if CONFIG_DIST_8X8
- if (x->using_dist_8x8 && plane == 0)
- return (unsigned)av1_dist_8x8(cpi, x, src, src_stride, dst, dst_stride,
- tx_bsize, txb_cols, txb_rows, visible_cols,
- visible_rows, x->qindex);
-#endif // CONFIG_DIST_8X8
-
- unsigned sse = pixel_dist_visible_only(cpi, x, src, src_stride, dst,
- dst_stride, tx_bsize, txb_rows,
- txb_cols, visible_rows, visible_cols);
-
- return sse;
-}
-
-// Compute the pixel domain distortion from diff on all visible 4x4s in the
-// transform block.
-static INLINE int64_t pixel_diff_dist(const MACROBLOCK *x, int plane,
- int blk_row, int blk_col,
- const BLOCK_SIZE plane_bsize,
- const BLOCK_SIZE tx_bsize) {
- int visible_rows, visible_cols;
- const MACROBLOCKD *xd = &x->e_mbd;
- get_txb_dimensions(xd, plane, plane_bsize, blk_row, blk_col, tx_bsize, NULL,
- NULL, &visible_cols, &visible_rows);
- const int diff_stride = block_size_wide[plane_bsize];
- const int16_t *diff = x->plane[plane].src_diff;
-#if CONFIG_DIST_8X8
- int txb_height = block_size_high[tx_bsize];
- int txb_width = block_size_wide[tx_bsize];
- if (x->using_dist_8x8 && plane == 0) {
- const int src_stride = x->plane[plane].src.stride;
- const int src_idx = (blk_row * src_stride + blk_col)
- << tx_size_wide_log2[0];
- const int diff_idx = (blk_row * diff_stride + blk_col)
- << tx_size_wide_log2[0];
- const uint8_t *src = &x->plane[plane].src.buf[src_idx];
- return dist_8x8_diff(x, src, src_stride, diff + diff_idx, diff_stride,
- txb_width, txb_height, visible_cols, visible_rows,
- x->qindex);
- }
-#endif
- diff += ((blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]);
- return aom_sum_squares_2d_i16(diff, diff_stride, visible_cols, visible_rows);
-}
-
-int av1_count_colors(const uint8_t *src, int stride, int rows, int cols,
- int *val_count) {
- const int max_pix_val = 1 << 8;
- memset(val_count, 0, max_pix_val * sizeof(val_count[0]));
- for (int r = 0; r < rows; ++r) {
- for (int c = 0; c < cols; ++c) {
- const int this_val = src[r * stride + c];
- assert(this_val < max_pix_val);
- ++val_count[this_val];
- }
- }
- int n = 0;
- for (int i = 0; i < max_pix_val; ++i) {
- if (val_count[i]) ++n;
- }
- return n;
-}
-
-int av1_count_colors_highbd(const uint8_t *src8, int stride, int rows, int cols,
- int bit_depth, int *val_count) {
- assert(bit_depth <= 12);
- const int max_pix_val = 1 << bit_depth;
- const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- memset(val_count, 0, max_pix_val * sizeof(val_count[0]));
- for (int r = 0; r < rows; ++r) {
- for (int c = 0; c < cols; ++c) {
- const int this_val = src[r * stride + c];
- assert(this_val < max_pix_val);
- if (this_val >= max_pix_val) return 0;
- ++val_count[this_val];
- }
- }
- int n = 0;
- for (int i = 0; i < max_pix_val; ++i) {
- if (val_count[i]) ++n;
- }
- return n;
-}
-
-static void inverse_transform_block_facade(MACROBLOCKD *xd, int plane,
- int block, int blk_row, int blk_col,
- int eob, int reduced_tx_set) {
- struct macroblockd_plane *const pd = &xd->plane[plane];
- tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
- const PLANE_TYPE plane_type = get_plane_type(plane);
- const TX_SIZE tx_size = av1_get_tx_size(plane, xd);
- const TX_TYPE tx_type = av1_get_tx_type(plane_type, xd, blk_row, blk_col,
- tx_size, reduced_tx_set);
- const int dst_stride = pd->dst.stride;
- uint8_t *dst =
- &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
- av1_inverse_transform_block(xd, dqcoeff, plane, tx_type, tx_size, dst,
- dst_stride, eob, reduced_tx_set);
-}
-
-static int find_tx_size_rd_info(TXB_RD_RECORD *cur_record, const uint32_t hash);
-
-static uint32_t get_intra_txb_hash(MACROBLOCK *x, int plane, int blk_row,
- int blk_col, BLOCK_SIZE plane_bsize,
- TX_SIZE tx_size) {
- int16_t tmp_data[64 * 64];
- const int diff_stride = block_size_wide[plane_bsize];
- const int16_t *diff = x->plane[plane].src_diff;
- const int16_t *cur_diff_row = diff + 4 * blk_row * diff_stride + 4 * blk_col;
- const int txb_w = tx_size_wide[tx_size];
- const int txb_h = tx_size_high[tx_size];
- uint8_t *hash_data = (uint8_t *)cur_diff_row;
- if (txb_w != diff_stride) {
- int16_t *cur_hash_row = tmp_data;
- for (int i = 0; i < txb_h; i++) {
- memcpy(cur_hash_row, cur_diff_row, sizeof(*diff) * txb_w);
- cur_hash_row += txb_w;
- cur_diff_row += diff_stride;
- }
- hash_data = (uint8_t *)tmp_data;
- }
- CRC32C *crc = &x->mb_rd_record.crc_calculator;
- const uint32_t hash = av1_get_crc32c_value(crc, hash_data, 2 * txb_w * txb_h);
- return (hash << 5) + tx_size;
-}
-
-static INLINE void dist_block_tx_domain(MACROBLOCK *x, int plane, int block,
- TX_SIZE tx_size, int64_t *out_dist,
- int64_t *out_sse) {
- MACROBLOCKD *const xd = &x->e_mbd;
- const struct macroblock_plane *const p = &x->plane[plane];
- const struct macroblockd_plane *const pd = &xd->plane[plane];
- // Transform domain distortion computation is more efficient as it does
- // not involve an inverse transform, but it is less accurate.
- const int buffer_length = av1_get_max_eob(tx_size);
- int64_t this_sse;
- // TX-domain results need to shift down to Q2/D10 to match pixel
- // domain distortion values which are in Q2^2
- int shift = (MAX_TX_SCALE - av1_get_tx_scale(tx_size)) * 2;
- tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
- tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
-
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
- *out_dist = av1_highbd_block_error(coeff, dqcoeff, buffer_length, &this_sse,
- xd->bd);
- else
- *out_dist = av1_block_error(coeff, dqcoeff, buffer_length, &this_sse);
-
- *out_dist = RIGHT_SIGNED_SHIFT(*out_dist, shift);
- *out_sse = RIGHT_SIGNED_SHIFT(this_sse, shift);
-}
-
-static INLINE int64_t dist_block_px_domain(const AV1_COMP *cpi, MACROBLOCK *x,
- int plane, BLOCK_SIZE plane_bsize,
- int block, int blk_row, int blk_col,
- TX_SIZE tx_size) {
- MACROBLOCKD *const xd = &x->e_mbd;
- const struct macroblock_plane *const p = &x->plane[plane];
- const struct macroblockd_plane *const pd = &xd->plane[plane];
- const uint16_t eob = p->eobs[block];
- const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size];
- const int bsw = block_size_wide[tx_bsize];
- const int bsh = block_size_high[tx_bsize];
- const int src_stride = x->plane[plane].src.stride;
- const int dst_stride = xd->plane[plane].dst.stride;
- // Scale the transform block index to pixel unit.
- const int src_idx = (blk_row * src_stride + blk_col) << tx_size_wide_log2[0];
- const int dst_idx = (blk_row * dst_stride + blk_col) << tx_size_wide_log2[0];
- const uint8_t *src = &x->plane[plane].src.buf[src_idx];
- const uint8_t *dst = &xd->plane[plane].dst.buf[dst_idx];
- const tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
-
- assert(cpi != NULL);
- assert(tx_size_wide_log2[0] == tx_size_high_log2[0]);
-
- uint8_t *recon;
- DECLARE_ALIGNED(16, uint16_t, recon16[MAX_TX_SQUARE]);
-
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- recon = CONVERT_TO_BYTEPTR(recon16);
- av1_highbd_convolve_2d_copy_sr(CONVERT_TO_SHORTPTR(dst), dst_stride,
- CONVERT_TO_SHORTPTR(recon), MAX_TX_SIZE, bsw,
- bsh, NULL, NULL, 0, 0, NULL, xd->bd);
- } else {
- recon = (uint8_t *)recon16;
- av1_convolve_2d_copy_sr(dst, dst_stride, recon, MAX_TX_SIZE, bsw, bsh, NULL,
- NULL, 0, 0, NULL);
- }
-
- const PLANE_TYPE plane_type = get_plane_type(plane);
- TX_TYPE tx_type = av1_get_tx_type(plane_type, xd, blk_row, blk_col, tx_size,
- cpi->common.reduced_tx_set_used);
- av1_inverse_transform_block(xd, dqcoeff, plane, tx_type, tx_size, recon,
- MAX_TX_SIZE, eob,
- cpi->common.reduced_tx_set_used);
-
- return 16 * pixel_dist(cpi, x, plane, src, src_stride, recon, MAX_TX_SIZE,
- blk_row, blk_col, plane_bsize, tx_bsize);
-}
-
-static double get_mean(const int16_t *diff, int stride, int w, int h) {
- double sum = 0.0;
- for (int j = 0; j < h; ++j) {
- for (int i = 0; i < w; ++i) {
- sum += diff[j * stride + i];
- }
- }
- assert(w > 0 && h > 0);
- return sum / (w * h);
-}
-
-static double get_sse_norm(const int16_t *diff, int stride, int w, int h) {
- double sum = 0.0;
- for (int j = 0; j < h; ++j) {
- for (int i = 0; i < w; ++i) {
- const int err = diff[j * stride + i];
- sum += err * err;
- }
- }
- assert(w > 0 && h > 0);
- return sum / (w * h);
-}
-
-static double get_sad_norm(const int16_t *diff, int stride, int w, int h) {
- double sum = 0.0;
- for (int j = 0; j < h; ++j) {
- for (int i = 0; i < w; ++i) {
- sum += abs(diff[j * stride + i]);
- }
- }
- assert(w > 0 && h > 0);
- return sum / (w * h);
-}
-
-static void get_2x2_normalized_sses_and_sads(
- const AV1_COMP *const cpi, BLOCK_SIZE tx_bsize, const uint8_t *const src,
- int src_stride, const uint8_t *const dst, int dst_stride,
- const int16_t *const src_diff, int diff_stride, double *const sse_norm_arr,
- double *const sad_norm_arr) {
- const BLOCK_SIZE tx_bsize_half =
- get_partition_subsize(tx_bsize, PARTITION_SPLIT);
- if (tx_bsize_half == BLOCK_INVALID) { // manually calculate stats
- const int half_width = block_size_wide[tx_bsize] / 2;
- const int half_height = block_size_high[tx_bsize] / 2;
- for (int row = 0; row < 2; ++row) {
- for (int col = 0; col < 2; ++col) {
- const int16_t *const this_src_diff =
- src_diff + row * half_height * diff_stride + col * half_width;
- if (sse_norm_arr) {
- sse_norm_arr[row * 2 + col] =
- get_sse_norm(this_src_diff, diff_stride, half_width, half_height);
- }
- if (sad_norm_arr) {
- sad_norm_arr[row * 2 + col] =
- get_sad_norm(this_src_diff, diff_stride, half_width, half_height);
- }
- }
- }
- } else { // use function pointers to calculate stats
- const int half_width = block_size_wide[tx_bsize_half];
- const int half_height = block_size_high[tx_bsize_half];
- const int num_samples_half = half_width * half_height;
- for (int row = 0; row < 2; ++row) {
- for (int col = 0; col < 2; ++col) {
- const uint8_t *const this_src =
- src + row * half_height * src_stride + col * half_width;
- const uint8_t *const this_dst =
- dst + row * half_height * dst_stride + col * half_width;
-
- if (sse_norm_arr) {
- unsigned int this_sse;
- cpi->fn_ptr[tx_bsize_half].vf(this_src, src_stride, this_dst,
- dst_stride, &this_sse);
- sse_norm_arr[row * 2 + col] = (double)this_sse / num_samples_half;
- }
-
- if (sad_norm_arr) {
- const unsigned int this_sad = cpi->fn_ptr[tx_bsize_half].sdf(
- this_src, src_stride, this_dst, dst_stride);
- sad_norm_arr[row * 2 + col] = (double)this_sad / num_samples_half;
- }
- }
- }
- }
-}
-
-// NOTE: CONFIG_COLLECT_RD_STATS has 3 possible values
-// 0: Do not collect any RD stats
-// 1: Collect RD stats for transform units
-// 2: Collect RD stats for partition units
-#if CONFIG_COLLECT_RD_STATS
-
-#if CONFIG_COLLECT_RD_STATS == 1
-static void PrintTransformUnitStats(const AV1_COMP *const cpi, MACROBLOCK *x,
- const RD_STATS *const rd_stats, int blk_row,
- int blk_col, BLOCK_SIZE plane_bsize,
- TX_SIZE tx_size, TX_TYPE tx_type,
- int64_t rd) {
- if (rd_stats->rate == INT_MAX || rd_stats->dist == INT64_MAX) return;
-
- // Generate small sample to restrict output size.
- static unsigned int seed = 21743;
- if (lcg_rand16(&seed) % 256 > 0) return;
-
- const char output_file[] = "tu_stats.txt";
- FILE *fout = fopen(output_file, "a");
- if (!fout) return;
-
- const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size];
- const MACROBLOCKD *const xd = &x->e_mbd;
- const int plane = 0;
- struct macroblock_plane *const p = &x->plane[plane];
- const struct macroblockd_plane *const pd = &xd->plane[plane];
- const int txw = tx_size_wide[tx_size];
- const int txh = tx_size_high[tx_size];
- const int dequant_shift =
- (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 : 3;
- const int q_step = pd->dequant_Q3[1] >> dequant_shift;
- const double num_samples = txw * txh;
-
- const double rate_norm = (double)rd_stats->rate / num_samples;
- const double dist_norm = (double)rd_stats->dist / num_samples;
-
- fprintf(fout, "%g %g", rate_norm, dist_norm);
-
- const int src_stride = p->src.stride;
- const uint8_t *const src =
- &p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]];
- const int dst_stride = pd->dst.stride;
- const uint8_t *const dst =
- &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
- unsigned int sse;
- cpi->fn_ptr[tx_bsize].vf(src, src_stride, dst, dst_stride, &sse);
- const double sse_norm = (double)sse / num_samples;
-
- const unsigned int sad =
- cpi->fn_ptr[tx_bsize].sdf(src, src_stride, dst, dst_stride);
- const double sad_norm = (double)sad / num_samples;
-
- fprintf(fout, " %g %g", sse_norm, sad_norm);
-
- const int diff_stride = block_size_wide[plane_bsize];
- const int16_t *const src_diff =
- &p->src_diff[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]];
-
- double sse_norm_arr[4], sad_norm_arr[4];
- get_2x2_normalized_sses_and_sads(cpi, tx_bsize, src, src_stride, dst,
- dst_stride, src_diff, diff_stride,
- sse_norm_arr, sad_norm_arr);
- for (int i = 0; i < 4; ++i) {
- fprintf(fout, " %g", sse_norm_arr[i]);
- }
- for (int i = 0; i < 4; ++i) {
- fprintf(fout, " %g", sad_norm_arr[i]);
- }
-
- const TX_TYPE_1D tx_type_1d_row = htx_tab[tx_type];
- const TX_TYPE_1D tx_type_1d_col = vtx_tab[tx_type];
-
- fprintf(fout, " %d %d %d %d %d", q_step, tx_size_wide[tx_size],
- tx_size_high[tx_size], tx_type_1d_row, tx_type_1d_col);
-
- int model_rate;
- int64_t model_dist;
- model_rd_sse_fn[MODELRD_CURVFIT](cpi, x, tx_bsize, plane, sse, num_samples,
- &model_rate, &model_dist);
- const double model_rate_norm = (double)model_rate / num_samples;
- const double model_dist_norm = (double)model_dist / num_samples;
- fprintf(fout, " %g %g", model_rate_norm, model_dist_norm);
-
- const double mean = get_mean(src_diff, diff_stride, txw, txh);
- double hor_corr, vert_corr;
- get_horver_correlation(src_diff, diff_stride, txw, txh, &hor_corr,
- &vert_corr);
- fprintf(fout, " %g %g %g", mean, hor_corr, vert_corr);
-
- double hdist[4] = { 0 }, vdist[4] = { 0 };
- get_energy_distribution_fine(cpi, tx_bsize, src, src_stride, dst, dst_stride,
- 1, hdist, vdist);
- fprintf(fout, " %g %g %g %g %g %g %g %g", hdist[0], hdist[1], hdist[2],
- hdist[3], vdist[0], vdist[1], vdist[2], vdist[3]);
-
- fprintf(fout, " %d %" PRId64, x->rdmult, rd);
-
- fprintf(fout, "\n");
- fclose(fout);
-}
-#endif // CONFIG_COLLECT_RD_STATS == 1
-
-#if CONFIG_COLLECT_RD_STATS >= 2
-static void PrintPredictionUnitStats(const AV1_COMP *const cpi, MACROBLOCK *x,
- const RD_STATS *const rd_stats,
- BLOCK_SIZE plane_bsize) {
- if (rd_stats->invalid_rate) return;
- if (rd_stats->rate == INT_MAX || rd_stats->dist == INT64_MAX) return;
-
- // Generate small sample to restrict output size.
- static unsigned int seed = 95014;
- if (lcg_rand16(&seed) % 256 > 0) return;
-
- const char output_file[] = "pu_stats.txt";
- FILE *fout = fopen(output_file, "a");
- if (!fout) return;
-
- const MACROBLOCKD *const xd = &x->e_mbd;
- const int plane = 0;
- struct macroblock_plane *const p = &x->plane[plane];
- const struct macroblockd_plane *const pd = &xd->plane[plane];
- const int diff_stride = block_size_wide[plane_bsize];
- int bw, bh;
- get_txb_dimensions(xd, plane, plane_bsize, 0, 0, plane_bsize, NULL, NULL, &bw,
- &bh);
- const int num_samples = bw * bh;
- const int dequant_shift =
- (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 : 3;
- const int q_step = pd->dequant_Q3[1] >> dequant_shift;
-
- const double rate_norm = (double)rd_stats->rate / num_samples;
- const double dist_norm = (double)rd_stats->dist / num_samples;
- const double rdcost_norm =
- (double)RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist) / num_samples;
-
- fprintf(fout, "%g %g %g", rate_norm, dist_norm, rdcost_norm);
-
- const int src_stride = p->src.stride;
- const uint8_t *const src = p->src.buf;
- const int dst_stride = pd->dst.stride;
- const uint8_t *const dst = pd->dst.buf;
- const int16_t *const src_diff = p->src_diff;
- const int shift = (xd->bd - 8);
-
- int64_t sse = aom_sum_squares_2d_i16(src_diff, diff_stride, bw, bh);
- sse = ROUND_POWER_OF_TWO(sse, shift * 2);
- const double sse_norm = (double)sse / num_samples;
-
- const unsigned int sad =
- cpi->fn_ptr[plane_bsize].sdf(src, src_stride, dst, dst_stride);
- const double sad_norm =
- (double)sad / (1 << num_pels_log2_lookup[plane_bsize]);
-
- fprintf(fout, " %g %g", sse_norm, sad_norm);
-
- double sse_norm_arr[4], sad_norm_arr[4];
- get_2x2_normalized_sses_and_sads(cpi, plane_bsize, src, src_stride, dst,
- dst_stride, src_diff, diff_stride,
- sse_norm_arr, sad_norm_arr);
- if (shift) {
- for (int k = 0; k < 4; ++k) sse_norm_arr[k] /= (1 << (2 * shift));
- for (int k = 0; k < 4; ++k) sad_norm_arr[k] /= (1 << shift);
- }
- for (int i = 0; i < 4; ++i) {
- fprintf(fout, " %g", sse_norm_arr[i]);
- }
- for (int i = 0; i < 4; ++i) {
- fprintf(fout, " %g", sad_norm_arr[i]);
- }
-
- fprintf(fout, " %d %d %d %d", q_step, x->rdmult, bw, bh);
-
- int model_rate;
- int64_t model_dist;
- model_rd_sse_fn[MODELRD_CURVFIT](cpi, x, plane_bsize, plane, sse, num_samples,
- &model_rate, &model_dist);
- const double model_rdcost_norm =
- (double)RDCOST(x->rdmult, model_rate, model_dist) / num_samples;
- const double model_rate_norm = (double)model_rate / num_samples;
- const double model_dist_norm = (double)model_dist / num_samples;
- fprintf(fout, " %g %g %g", model_rate_norm, model_dist_norm,
- model_rdcost_norm);
-
- double mean = get_mean(src_diff, diff_stride, bw, bh);
- mean /= (1 << shift);
- double hor_corr, vert_corr;
- get_horver_correlation(src_diff, diff_stride, bw, bh, &hor_corr, &vert_corr);
- fprintf(fout, " %g %g %g", mean, hor_corr, vert_corr);
-
- double hdist[4] = { 0 }, vdist[4] = { 0 };
- get_energy_distribution_fine(cpi, plane_bsize, src, src_stride, dst,
- dst_stride, 1, hdist, vdist);
- fprintf(fout, " %g %g %g %g %g %g %g %g", hdist[0], hdist[1], hdist[2],
- hdist[3], vdist[0], vdist[1], vdist[2], vdist[3]);
-
- fprintf(fout, "\n");
- fclose(fout);
-}
-#endif // CONFIG_COLLECT_RD_STATS >= 2
-#endif // CONFIG_COLLECT_RD_STATS
-
-static void model_rd_with_dnn(const AV1_COMP *const cpi,
- const MACROBLOCK *const x, BLOCK_SIZE plane_bsize,
- int plane, int64_t sse, int num_samples,
- int *rate, int64_t *dist) {
- const MACROBLOCKD *const xd = &x->e_mbd;
- const struct macroblockd_plane *const pd = &xd->plane[plane];
- const int log_numpels = num_pels_log2_lookup[plane_bsize];
-
- const int dequant_shift =
- (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 : 3;
- const int q_step = AOMMAX(pd->dequant_Q3[1] >> dequant_shift, 1);
-
- const struct macroblock_plane *const p = &x->plane[plane];
- int bw, bh;
- get_txb_dimensions(xd, plane, plane_bsize, 0, 0, plane_bsize, NULL, NULL, &bw,
- &bh);
- const int src_stride = p->src.stride;
- const uint8_t *const src = p->src.buf;
- const int dst_stride = pd->dst.stride;
- const uint8_t *const dst = pd->dst.buf;
- const int16_t *const src_diff = p->src_diff;
- const int diff_stride = block_size_wide[plane_bsize];
- const int shift = (xd->bd - 8);
-
- if (sse == 0) {
- if (rate) *rate = 0;
- if (dist) *dist = 0;
- return;
- }
- if (plane) {
- int model_rate;
- int64_t model_dist;
- model_rd_with_curvfit(cpi, x, plane_bsize, plane, sse, num_samples,
- &model_rate, &model_dist);
- if (rate) *rate = model_rate;
- if (dist) *dist = model_dist;
- return;
- }
-
- aom_clear_system_state();
- const double sse_norm = (double)sse / num_samples;
-
- double sse_norm_arr[4];
- get_2x2_normalized_sses_and_sads(cpi, plane_bsize, src, src_stride, dst,
- dst_stride, src_diff, diff_stride,
- sse_norm_arr, NULL);
- double mean = get_mean(src_diff, bw, bw, bh);
- if (shift) {
- for (int k = 0; k < 4; ++k) sse_norm_arr[k] /= (1 << (2 * shift));
- mean /= (1 << shift);
- }
- double sse_norm_sum = 0.0, sse_frac_arr[3];
- for (int k = 0; k < 4; ++k) sse_norm_sum += sse_norm_arr[k];
- for (int k = 0; k < 3; ++k)
- sse_frac_arr[k] =
- sse_norm_sum > 0.0 ? sse_norm_arr[k] / sse_norm_sum : 0.25;
- const double q_sqr = (double)(q_step * q_step);
- const double q_sqr_by_sse_norm = q_sqr / (sse_norm + 1.0);
- const double mean_sqr_by_sse_norm = mean * mean / (sse_norm + 1.0);
- double hor_corr, vert_corr;
- get_horver_correlation(src_diff, diff_stride, bw, bh, &hor_corr, &vert_corr);
-
- float features[NUM_FEATURES_PUSTATS];
- features[0] = (float)hor_corr;
- features[1] = (float)log_numpels;
- features[2] = (float)mean_sqr_by_sse_norm;
- features[3] = (float)q_sqr_by_sse_norm;
- features[4] = (float)sse_frac_arr[0];
- features[5] = (float)sse_frac_arr[1];
- features[6] = (float)sse_frac_arr[2];
- features[7] = (float)vert_corr;
-
- float rate_f, dist_by_sse_norm_f;
- av1_nn_predict(features, &av1_pustats_dist_nnconfig, &dist_by_sse_norm_f);
- av1_nn_predict(features, &av1_pustats_rate_nnconfig, &rate_f);
- const float dist_f = (float)((double)dist_by_sse_norm_f * (1.0 + sse_norm));
- int rate_i = (int)(AOMMAX(0.0, rate_f * num_samples) + 0.5);
- int64_t dist_i = (int64_t)(AOMMAX(0.0, dist_f * num_samples) + 0.5);
- aom_clear_system_state();
-
- // Check if skip is better
- if (rate_i == 0) {
- dist_i = sse << 4;
- } else if (RDCOST(x->rdmult, rate_i, dist_i) >=
- RDCOST(x->rdmult, 0, sse << 4)) {
- rate_i = 0;
- dist_i = sse << 4;
- }
-
- if (rate) *rate = rate_i;
- if (dist) *dist = dist_i;
- return;
-}
-
-static void model_rd_for_sb_with_dnn(
- const AV1_COMP *const cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd,
- int plane_from, int plane_to, int mi_row, int mi_col, int *out_rate_sum,
- int64_t *out_dist_sum, int *skip_txfm_sb, int64_t *skip_sse_sb,
- int *plane_rate, int64_t *plane_sse, int64_t *plane_dist) {
- (void)mi_row;
- (void)mi_col;
- // Note our transform coeffs are 8 times an orthogonal transform.
- // Hence quantizer step is also 8 times. To get effective quantizer
- // we need to divide by 8 before sending to modeling function.
- const int ref = xd->mi[0]->ref_frame[0];
-
- int64_t rate_sum = 0;
- int64_t dist_sum = 0;
- int64_t total_sse = 0;
-
- for (int plane = plane_from; plane <= plane_to; ++plane) {
- struct macroblockd_plane *const pd = &xd->plane[plane];
- const BLOCK_SIZE plane_bsize =
- get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
- int64_t dist, sse;
- int rate;
-
- if (x->skip_chroma_rd && plane) continue;
-
- const struct macroblock_plane *const p = &x->plane[plane];
- const int shift = (xd->bd - 8);
- int bw, bh;
- get_txb_dimensions(xd, plane, plane_bsize, 0, 0, plane_bsize, NULL, NULL,
- &bw, &bh);
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- sse = aom_highbd_sse(p->src.buf, p->src.stride, pd->dst.buf,
- pd->dst.stride, bw, bh);
- } else {
- sse = aom_sse(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, bw,
- bh);
- }
- sse = ROUND_POWER_OF_TWO(sse, shift * 2);
-
- model_rd_with_dnn(cpi, x, plane_bsize, plane, sse, bw * bh, &rate, &dist);
-
- if (plane == 0) x->pred_sse[ref] = (unsigned int)AOMMIN(sse, UINT_MAX);
-
- total_sse += sse;
- rate_sum += rate;
- dist_sum += dist;
-
- if (plane_rate) plane_rate[plane] = rate;
- if (plane_sse) plane_sse[plane] = sse;
- if (plane_dist) plane_dist[plane] = dist;
- }
-
- if (skip_txfm_sb) *skip_txfm_sb = total_sse == 0;
- if (skip_sse_sb) *skip_sse_sb = total_sse << 4;
- *out_rate_sum = (int)rate_sum;
- *out_dist_sum = dist_sum;
-}
-
-// Fits a surface for rate and distortion using as features:
-// log2(sse_norm + 1) and log2(sse_norm/qstep^2)
-static void model_rd_with_surffit(const AV1_COMP *const cpi,
- const MACROBLOCK *const x,
- BLOCK_SIZE plane_bsize, int plane,
- int64_t sse, int num_samples, int *rate,
- int64_t *dist) {
- (void)cpi;
- (void)plane_bsize;
- const MACROBLOCKD *const xd = &x->e_mbd;
- const struct macroblockd_plane *const pd = &xd->plane[plane];
- const int dequant_shift =
- (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 : 3;
- const int qstep = AOMMAX(pd->dequant_Q3[1] >> dequant_shift, 1);
- if (sse == 0) {
- if (rate) *rate = 0;
- if (dist) *dist = 0;
- return;
- }
- aom_clear_system_state();
- const double sse_norm = (double)sse / num_samples;
- const double qstepsqr = (double)qstep * qstep;
- const double xm = log(sse_norm + 1.0) / log(2.0);
- const double yl = log(sse_norm / qstepsqr) / log(2.0);
- double rate_f, dist_by_sse_norm_f;
-
- av1_model_rd_surffit(xm, yl, &rate_f, &dist_by_sse_norm_f);
-
- const double dist_f = dist_by_sse_norm_f * sse_norm;
- int rate_i = (int)(AOMMAX(0.0, rate_f * num_samples) + 0.5);
- int64_t dist_i = (int64_t)(AOMMAX(0.0, dist_f * num_samples) + 0.5);
- aom_clear_system_state();
-
- // Check if skip is better
- if (rate_i == 0) {
- dist_i = sse << 4;
- } else if (RDCOST(x->rdmult, rate_i, dist_i) >=
- RDCOST(x->rdmult, 0, sse << 4)) {
- rate_i = 0;
- dist_i = sse << 4;
- }
-
- if (rate) *rate = rate_i;
- if (dist) *dist = dist_i;
-}
-
-static void model_rd_for_sb_with_surffit(
- const AV1_COMP *const cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd,
- int plane_from, int plane_to, int mi_row, int mi_col, int *out_rate_sum,
- int64_t *out_dist_sum, int *skip_txfm_sb, int64_t *skip_sse_sb,
- int *plane_rate, int64_t *plane_sse, int64_t *plane_dist) {
- (void)mi_row;
- (void)mi_col;
- // Note our transform coeffs are 8 times an orthogonal transform.
- // Hence quantizer step is also 8 times. To get effective quantizer
- // we need to divide by 8 before sending to modeling function.
- const int ref = xd->mi[0]->ref_frame[0];
-
- int64_t rate_sum = 0;
- int64_t dist_sum = 0;
- int64_t total_sse = 0;
-
- for (int plane = plane_from; plane <= plane_to; ++plane) {
- struct macroblockd_plane *const pd = &xd->plane[plane];
- const BLOCK_SIZE plane_bsize =
- get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
- int64_t dist, sse;
- int rate;
-
- if (x->skip_chroma_rd && plane) continue;
-
- int bw, bh;
- const struct macroblock_plane *const p = &x->plane[plane];
- const int shift = (xd->bd - 8);
- get_txb_dimensions(xd, plane, plane_bsize, 0, 0, plane_bsize, NULL, NULL,
- &bw, &bh);
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- sse = aom_highbd_sse(p->src.buf, p->src.stride, pd->dst.buf,
- pd->dst.stride, bw, bh);
- } else {
- sse = aom_sse(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, bw,
- bh);
- }
- sse = ROUND_POWER_OF_TWO(sse, shift * 2);
-
- model_rd_with_surffit(cpi, x, plane_bsize, plane, sse, bw * bh, &rate,
- &dist);
-
- if (plane == 0) x->pred_sse[ref] = (unsigned int)AOMMIN(sse, UINT_MAX);
-
- total_sse += sse;
- rate_sum += rate;
- dist_sum += dist;
-
- if (plane_rate) plane_rate[plane] = rate;
- if (plane_sse) plane_sse[plane] = sse;
- if (plane_dist) plane_dist[plane] = dist;
- }
-
- if (skip_txfm_sb) *skip_txfm_sb = total_sse == 0;
- if (skip_sse_sb) *skip_sse_sb = total_sse << 4;
- *out_rate_sum = (int)rate_sum;
- *out_dist_sum = dist_sum;
-}
-
-// Fits a curve for rate and distortion using as feature:
-// log2(sse_norm/qstep^2)
-static void model_rd_with_curvfit(const AV1_COMP *const cpi,
- const MACROBLOCK *const x,
- BLOCK_SIZE plane_bsize, int plane,
- int64_t sse, int num_samples, int *rate,
- int64_t *dist) {
- (void)cpi;
- (void)plane_bsize;
- const MACROBLOCKD *const xd = &x->e_mbd;
- const struct macroblockd_plane *const pd = &xd->plane[plane];
- const int dequant_shift =
- (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 : 3;
- const int qstep = AOMMAX(pd->dequant_Q3[1] >> dequant_shift, 1);
-
- if (sse == 0) {
- if (rate) *rate = 0;
- if (dist) *dist = 0;
- return;
- }
- aom_clear_system_state();
- const double sse_norm = (double)sse / num_samples;
- const double qstepsqr = (double)qstep * qstep;
- const double xqr = log(sse_norm / qstepsqr) / log(2.0);
-
- double rate_f, dist_by_sse_norm_f;
- av1_model_rd_curvfit(xqr, &rate_f, &dist_by_sse_norm_f);
-
- const double dist_f = dist_by_sse_norm_f * sse_norm;
- int rate_i = (int)(AOMMAX(0.0, rate_f * num_samples) + 0.5);
- int64_t dist_i = (int64_t)(AOMMAX(0.0, dist_f * num_samples) + 0.5);
- aom_clear_system_state();
-
- // Check if skip is better
- if (rate_i == 0) {
- dist_i = sse << 4;
- } else if (RDCOST(x->rdmult, rate_i, dist_i) >=
- RDCOST(x->rdmult, 0, sse << 4)) {
- rate_i = 0;
- dist_i = sse << 4;
- }
-
- if (rate) *rate = rate_i;
- if (dist) *dist = dist_i;
-}
-
-static void model_rd_for_sb_with_curvfit(
- const AV1_COMP *const cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd,
- int plane_from, int plane_to, int mi_row, int mi_col, int *out_rate_sum,
- int64_t *out_dist_sum, int *skip_txfm_sb, int64_t *skip_sse_sb,
- int *plane_rate, int64_t *plane_sse, int64_t *plane_dist) {
- (void)mi_row;
- (void)mi_col;
- // Note our transform coeffs are 8 times an orthogonal transform.
- // Hence quantizer step is also 8 times. To get effective quantizer
- // we need to divide by 8 before sending to modeling function.
- const int ref = xd->mi[0]->ref_frame[0];
-
- int64_t rate_sum = 0;
- int64_t dist_sum = 0;
- int64_t total_sse = 0;
-
- for (int plane = plane_from; plane <= plane_to; ++plane) {
- struct macroblockd_plane *const pd = &xd->plane[plane];
- const BLOCK_SIZE plane_bsize =
- get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
- int64_t dist, sse;
- int rate;
-
- if (x->skip_chroma_rd && plane) continue;
-
- int bw, bh;
- const struct macroblock_plane *const p = &x->plane[plane];
- const int shift = (xd->bd - 8);
- get_txb_dimensions(xd, plane, plane_bsize, 0, 0, plane_bsize, NULL, NULL,
- &bw, &bh);
-
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- sse = aom_highbd_sse(p->src.buf, p->src.stride, pd->dst.buf,
- pd->dst.stride, bw, bh);
- } else {
- sse = aom_sse(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, bw,
- bh);
- }
-
- sse = ROUND_POWER_OF_TWO(sse, shift * 2);
- model_rd_with_curvfit(cpi, x, plane_bsize, plane, sse, bw * bh, &rate,
- &dist);
-
- if (plane == 0) x->pred_sse[ref] = (unsigned int)AOMMIN(sse, UINT_MAX);
-
- total_sse += sse;
- rate_sum += rate;
- dist_sum += dist;
-
- if (plane_rate) plane_rate[plane] = rate;
- if (plane_sse) plane_sse[plane] = sse;
- if (plane_dist) plane_dist[plane] = dist;
- }
-
- if (skip_txfm_sb) *skip_txfm_sb = total_sse == 0;
- if (skip_sse_sb) *skip_sse_sb = total_sse << 4;
- *out_rate_sum = (int)rate_sum;
- *out_dist_sum = dist_sum;
-}
-
-static void model_rd_for_sb_with_fullrdy(
- const AV1_COMP *const cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd,
- int plane_from, int plane_to, int mi_row, int mi_col, int *out_rate_sum,
- int64_t *out_dist_sum, int *skip_txfm_sb, int64_t *skip_sse_sb,
- int *plane_rate, int64_t *plane_sse, int64_t *plane_dist) {
- const int ref = xd->mi[0]->ref_frame[0];
-
- int64_t rate_sum = 0;
- int64_t dist_sum = 0;
- int64_t total_sse = 0;
-
- for (int plane = plane_from; plane <= plane_to; ++plane) {
- struct macroblock_plane *const p = &x->plane[plane];
- struct macroblockd_plane *const pd = &xd->plane[plane];
- const BLOCK_SIZE plane_bsize =
- get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
- const int bw = block_size_wide[plane_bsize];
- const int bh = block_size_high[plane_bsize];
- int64_t sse;
- int rate;
- int64_t dist;
-
- if (x->skip_chroma_rd && plane) continue;
-
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- sse = aom_highbd_sse(p->src.buf, p->src.stride, pd->dst.buf,
- pd->dst.stride, bw, bh);
- } else {
- sse = aom_sse(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, bw,
- bh);
- }
- sse = ROUND_POWER_OF_TWO(sse, (xd->bd - 8) * 2);
-
- RD_STATS rd_stats;
- if (plane == 0) {
- select_tx_type_yrd(cpi, x, &rd_stats, bsize, mi_row, mi_col, INT64_MAX);
- if (rd_stats.invalid_rate) {
- rate = 0;
- dist = sse << 4;
- } else {
- rate = rd_stats.rate;
- dist = rd_stats.dist;
- }
- } else {
- model_rd_with_curvfit(cpi, x, plane_bsize, plane, sse, bw * bh, &rate,
- &dist);
- }
-
- if (plane == 0) x->pred_sse[ref] = (unsigned int)AOMMIN(sse, UINT_MAX);
-
- total_sse += sse;
- rate_sum += rate;
- dist_sum += dist;
-
- if (plane_rate) plane_rate[plane] = rate;
- if (plane_sse) plane_sse[plane] = sse;
- if (plane_dist) plane_dist[plane] = dist;
- }
-
- if (skip_txfm_sb) *skip_txfm_sb = total_sse == 0;
- if (skip_sse_sb) *skip_sse_sb = total_sse << 4;
- *out_rate_sum = (int)rate_sum;
- *out_dist_sum = dist_sum;
-}
-
-static int64_t search_txk_type(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
- int block, int blk_row, int blk_col,
- BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
- const TXB_CTX *const txb_ctx,
- FAST_TX_SEARCH_MODE ftxs_mode,
- int use_fast_coef_costing, int64_t ref_best_rd,
- RD_STATS *best_rd_stats) {
- const AV1_COMMON *cm = &cpi->common;
- MACROBLOCKD *xd = &x->e_mbd;
- struct macroblockd_plane *const pd = &xd->plane[plane];
- MB_MODE_INFO *mbmi = xd->mi[0];
- const int is_inter = is_inter_block(mbmi);
- int64_t best_rd = INT64_MAX;
- uint16_t best_eob = 0;
- TX_TYPE best_tx_type = DCT_DCT;
- TX_TYPE last_tx_type = TX_TYPES;
- const int fast_tx_search = ftxs_mode & FTXS_DCT_AND_1D_DCT_ONLY;
- // The buffer used to swap dqcoeff in macroblockd_plane so we can keep dqcoeff
- // of the best tx_type
- DECLARE_ALIGNED(32, tran_low_t, this_dqcoeff[MAX_SB_SQUARE]);
- tran_low_t *orig_dqcoeff = pd->dqcoeff;
- tran_low_t *best_dqcoeff = this_dqcoeff;
- const int txk_type_idx =
- av1_get_txk_type_index(plane_bsize, blk_row, blk_col);
- av1_invalid_rd_stats(best_rd_stats);
-
- TXB_RD_INFO *intra_txb_rd_info = NULL;
- uint16_t cur_joint_ctx = 0;
- const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2);
- const int mi_col = -xd->mb_to_left_edge >> (3 + MI_SIZE_LOG2);
- const int within_border =
- mi_row >= xd->tile.mi_row_start &&
- (mi_row + mi_size_high[plane_bsize] < xd->tile.mi_row_end) &&
- mi_col >= xd->tile.mi_col_start &&
- (mi_col + mi_size_wide[plane_bsize] < xd->tile.mi_col_end);
- if (within_border && cpi->sf.use_intra_txb_hash && frame_is_intra_only(cm) &&
- !is_inter && plane == 0 &&
- tx_size_wide[tx_size] == tx_size_high[tx_size]) {
- const uint32_t intra_hash =
- get_intra_txb_hash(x, plane, blk_row, blk_col, plane_bsize, tx_size);
- const int intra_hash_idx =
- find_tx_size_rd_info(&x->txb_rd_record_intra, intra_hash);
- intra_txb_rd_info = &x->txb_rd_record_intra.tx_rd_info[intra_hash_idx];
-
- cur_joint_ctx = (txb_ctx->dc_sign_ctx << 8) + txb_ctx->txb_skip_ctx;
- if (intra_txb_rd_info->entropy_context == cur_joint_ctx &&
- x->txb_rd_record_intra.tx_rd_info[intra_hash_idx].valid) {
- mbmi->txk_type[txk_type_idx] = intra_txb_rd_info->tx_type;
- const TX_TYPE ref_tx_type =
- av1_get_tx_type(get_plane_type(plane), &x->e_mbd, blk_row, blk_col,
- tx_size, cpi->common.reduced_tx_set_used);
- if (ref_tx_type == intra_txb_rd_info->tx_type) {
- best_rd_stats->rate = intra_txb_rd_info->rate;
- best_rd_stats->dist = intra_txb_rd_info->dist;
- best_rd_stats->sse = intra_txb_rd_info->sse;
- best_rd_stats->skip = intra_txb_rd_info->eob == 0;
- x->plane[plane].eobs[block] = intra_txb_rd_info->eob;
- x->plane[plane].txb_entropy_ctx[block] =
- intra_txb_rd_info->txb_entropy_ctx;
- best_rd = RDCOST(x->rdmult, best_rd_stats->rate, best_rd_stats->dist);
- best_eob = intra_txb_rd_info->eob;
- best_tx_type = intra_txb_rd_info->tx_type;
- update_txk_array(mbmi->txk_type, plane_bsize, blk_row, blk_col, tx_size,
- best_tx_type);
- goto RECON_INTRA;
- }
- }
- }
-
- int rate_cost = 0;
- TX_TYPE txk_start = DCT_DCT;
- TX_TYPE txk_end = TX_TYPES - 1;
- if ((!is_inter && x->use_default_intra_tx_type) ||
- (is_inter && x->use_default_inter_tx_type)) {
- txk_start = txk_end = get_default_tx_type(0, xd, tx_size);
- } else if (x->rd_model == LOW_TXFM_RD || x->cb_partition_scan) {
- if (plane == 0) txk_end = DCT_DCT;
- }
-
- uint8_t best_txb_ctx = 0;
- const TxSetType tx_set_type =
- av1_get_ext_tx_set_type(tx_size, is_inter, cm->reduced_tx_set_used);
-
- TX_TYPE uv_tx_type = DCT_DCT;
- if (plane) {
- // tx_type of PLANE_TYPE_UV should be the same as PLANE_TYPE_Y
- uv_tx_type = txk_start = txk_end =
- av1_get_tx_type(get_plane_type(plane), xd, blk_row, blk_col, tx_size,
- cm->reduced_tx_set_used);
- }
- const uint16_t ext_tx_used_flag = av1_ext_tx_used_flag[tx_set_type];
- if (xd->lossless[mbmi->segment_id] || txsize_sqr_up_map[tx_size] > TX_32X32 ||
- ext_tx_used_flag == 0x0001) {
- txk_start = txk_end = DCT_DCT;
- }
- uint16_t allowed_tx_mask = 0; // 1: allow; 0: skip.
- if (txk_start == txk_end) {
- allowed_tx_mask = 1 << txk_start;
- allowed_tx_mask &= ext_tx_used_flag;
- } else if (fast_tx_search) {
- allowed_tx_mask = 0x0c01; // V_DCT, H_DCT, DCT_DCT
- allowed_tx_mask &= ext_tx_used_flag;
- } else {
- assert(plane == 0);
- allowed_tx_mask = ext_tx_used_flag;
- // !fast_tx_search && txk_end != txk_start && plane == 0
- const int do_prune = cpi->sf.tx_type_search.prune_mode > NO_PRUNE;
- if (do_prune && is_inter) {
- if (cpi->sf.tx_type_search.prune_mode >= PRUNE_2D_ACCURATE) {
- const uint16_t prune =
- prune_tx_2D(x, plane_bsize, tx_size, blk_row, blk_col, tx_set_type,
- cpi->sf.tx_type_search.prune_mode);
- allowed_tx_mask &= (~prune);
- } else {
- allowed_tx_mask &= (~x->tx_search_prune[tx_set_type]);
- }
- }
- }
- // Need to have at least one transform type allowed.
- if (allowed_tx_mask == 0) {
- txk_start = txk_end = (plane ? uv_tx_type : DCT_DCT);
- allowed_tx_mask = (1 << txk_start);
- }
-
- int use_transform_domain_distortion =
- (cpi->sf.use_transform_domain_distortion > 0) &&
- // Any 64-pt transforms only preserves half the coefficients.
- // Therefore transform domain distortion is not valid for these
- // transform sizes.
- txsize_sqr_up_map[tx_size] != TX_64X64;
-#if CONFIG_DIST_8X8
- if (x->using_dist_8x8) use_transform_domain_distortion = 0;
-#endif
- int calc_pixel_domain_distortion_final =
- cpi->sf.use_transform_domain_distortion == 1 &&
- use_transform_domain_distortion && x->rd_model != LOW_TXFM_RD &&
- !x->cb_partition_scan;
- if (calc_pixel_domain_distortion_final &&
- (txk_start == txk_end || allowed_tx_mask == 0x0001))
- calc_pixel_domain_distortion_final = use_transform_domain_distortion = 0;
-
- const uint16_t *eobs_ptr = x->plane[plane].eobs;
-
- const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size];
- int64_t block_sse =
- pixel_diff_dist(x, plane, blk_row, blk_col, plane_bsize, tx_bsize);
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
- block_sse = ROUND_POWER_OF_TWO(block_sse, (xd->bd - 8) * 2);
- block_sse *= 16;
-
- for (TX_TYPE tx_type = txk_start; tx_type <= txk_end; ++tx_type) {
- if (!(allowed_tx_mask & (1 << tx_type))) continue;
- if (plane == 0) mbmi->txk_type[txk_type_idx] = tx_type;
- RD_STATS this_rd_stats;
- av1_invalid_rd_stats(&this_rd_stats);
-
- if (!cpi->optimize_seg_arr[mbmi->segment_id]) {
- av1_xform_quant(
- cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size, tx_type,
- USE_B_QUANT_NO_TRELLIS ? AV1_XFORM_QUANT_B : AV1_XFORM_QUANT_FP);
- rate_cost = av1_cost_coeffs(cm, x, plane, block, tx_size, tx_type,
- txb_ctx, use_fast_coef_costing);
- } else {
- av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize,
- tx_size, tx_type, AV1_XFORM_QUANT_FP);
- if (cpi->sf.optimize_b_precheck && best_rd < INT64_MAX &&
- eobs_ptr[block] >= 4) {
- // Calculate distortion quickly in transform domain.
- dist_block_tx_domain(x, plane, block, tx_size, &this_rd_stats.dist,
- &this_rd_stats.sse);
-
- const int64_t best_rd_ = AOMMIN(best_rd, ref_best_rd);
- const int64_t dist_cost_estimate =
- RDCOST(x->rdmult, 0, AOMMIN(this_rd_stats.dist, this_rd_stats.sse));
- if (dist_cost_estimate - (dist_cost_estimate >> 3) > best_rd_) continue;
-
- rate_cost = av1_cost_coeffs(cm, x, plane, block, tx_size, tx_type,
- txb_ctx, use_fast_coef_costing);
- const int64_t rd_estimate =
- AOMMIN(RDCOST(x->rdmult, rate_cost, this_rd_stats.dist),
- RDCOST(x->rdmult, 0, this_rd_stats.sse));
- if (rd_estimate - (rd_estimate >> 3) > best_rd_) continue;
- }
- av1_optimize_b(cpi, x, plane, block, tx_size, tx_type, txb_ctx, 1,
- &rate_cost);
- }
- if (eobs_ptr[block] == 0) {
- // When eob is 0, pixel domain distortion is more efficient and accurate.
- this_rd_stats.dist = this_rd_stats.sse = block_sse;
- } else if (use_transform_domain_distortion) {
- dist_block_tx_domain(x, plane, block, tx_size, &this_rd_stats.dist,
- &this_rd_stats.sse);
- } else {
- this_rd_stats.dist = dist_block_px_domain(
- cpi, x, plane, plane_bsize, block, blk_row, blk_col, tx_size);
- this_rd_stats.sse = block_sse;
- }
-
- this_rd_stats.rate = rate_cost;
-
- const int64_t rd =
- RDCOST(x->rdmult, this_rd_stats.rate, this_rd_stats.dist);
-
- if (rd < best_rd) {
- best_rd = rd;
- *best_rd_stats = this_rd_stats;
- best_tx_type = tx_type;
- best_txb_ctx = x->plane[plane].txb_entropy_ctx[block];
- best_eob = x->plane[plane].eobs[block];
- last_tx_type = best_tx_type;
-
- // Swap qcoeff and dqcoeff buffers
- tran_low_t *const tmp_dqcoeff = best_dqcoeff;
- best_dqcoeff = pd->dqcoeff;
- pd->dqcoeff = tmp_dqcoeff;
- }
-
-#if CONFIG_COLLECT_RD_STATS == 1
- if (plane == 0) {
- PrintTransformUnitStats(cpi, x, &this_rd_stats, blk_row, blk_col,
- plane_bsize, tx_size, tx_type, rd);
- }
-#endif // CONFIG_COLLECT_RD_STATS == 1
-
- if (cpi->sf.adaptive_txb_search_level) {
- if ((best_rd - (best_rd >> cpi->sf.adaptive_txb_search_level)) >
- ref_best_rd) {
- break;
- }
- }
-
- // Skip transform type search when we found the block has been quantized to
- // all zero and at the same time, it has better rdcost than doing transform.
- if (cpi->sf.tx_type_search.skip_tx_search && !best_eob) break;
- }
-
- assert(best_rd != INT64_MAX);
-
- best_rd_stats->skip = best_eob == 0;
- if (plane == 0) {
- update_txk_array(mbmi->txk_type, plane_bsize, blk_row, blk_col, tx_size,
- best_tx_type);
- }
- x->plane[plane].txb_entropy_ctx[block] = best_txb_ctx;
- x->plane[plane].eobs[block] = best_eob;
-
- pd->dqcoeff = best_dqcoeff;
-
- if (calc_pixel_domain_distortion_final && best_eob) {
- best_rd_stats->dist = dist_block_px_domain(
- cpi, x, plane, plane_bsize, block, blk_row, blk_col, tx_size);
- best_rd_stats->sse = block_sse;
- }
-
- if (intra_txb_rd_info != NULL) {
- intra_txb_rd_info->valid = 1;
- intra_txb_rd_info->entropy_context = cur_joint_ctx;
- intra_txb_rd_info->rate = best_rd_stats->rate;
- intra_txb_rd_info->dist = best_rd_stats->dist;
- intra_txb_rd_info->sse = best_rd_stats->sse;
- intra_txb_rd_info->eob = best_eob;
- intra_txb_rd_info->txb_entropy_ctx = best_txb_ctx;
- if (plane == 0) intra_txb_rd_info->tx_type = best_tx_type;
- }
-
-RECON_INTRA:
- if (!is_inter && best_eob &&
- (blk_row + tx_size_high_unit[tx_size] < mi_size_high[plane_bsize] ||
- blk_col + tx_size_wide_unit[tx_size] < mi_size_wide[plane_bsize])) {
- // intra mode needs decoded result such that the next transform block
- // can use it for prediction.
- // if the last search tx_type is the best tx_type, we don't need to
- // do this again
- if (best_tx_type != last_tx_type) {
- if (!cpi->optimize_seg_arr[mbmi->segment_id]) {
- av1_xform_quant(
- cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
- best_tx_type,
- USE_B_QUANT_NO_TRELLIS ? AV1_XFORM_QUANT_B : AV1_XFORM_QUANT_FP);
- } else {
- av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize,
- tx_size, best_tx_type, AV1_XFORM_QUANT_FP);
- av1_optimize_b(cpi, x, plane, block, tx_size, best_tx_type, txb_ctx, 1,
- &rate_cost);
- }
- }
-
- inverse_transform_block_facade(xd, plane, block, blk_row, blk_col,
- x->plane[plane].eobs[block],
- cm->reduced_tx_set_used);
-
- // This may happen because of hash collision. The eob stored in the hash
- // table is non-zero, but the real eob is zero. We need to make sure tx_type
- // is DCT_DCT in this case.
- if (plane == 0 && x->plane[plane].eobs[block] == 0 &&
- best_tx_type != DCT_DCT) {
- update_txk_array(mbmi->txk_type, plane_bsize, blk_row, blk_col, tx_size,
- DCT_DCT);
- }
- }
- pd->dqcoeff = orig_dqcoeff;
-
- return best_rd;
-}
-
-static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
- BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
- struct rdcost_block_args *args = arg;
- MACROBLOCK *const x = args->x;
- MACROBLOCKD *const xd = &x->e_mbd;
- const MB_MODE_INFO *const mbmi = xd->mi[0];
- const AV1_COMP *cpi = args->cpi;
- ENTROPY_CONTEXT *a = args->t_above + blk_col;
- ENTROPY_CONTEXT *l = args->t_left + blk_row;
- const AV1_COMMON *cm = &cpi->common;
- int64_t rd1, rd2, rd;
- RD_STATS this_rd_stats;
-
- av1_init_rd_stats(&this_rd_stats);
-
- if (args->exit_early) {
- args->incomplete_exit = 1;
- return;
- }
-
- if (!is_inter_block(mbmi)) {
- av1_predict_intra_block_facade(cm, xd, plane, blk_col, blk_row, tx_size);
- av1_subtract_txb(x, plane, plane_bsize, blk_col, blk_row, tx_size);
- }
- TXB_CTX txb_ctx;
- get_txb_ctx(plane_bsize, tx_size, plane, a, l, &txb_ctx);
- search_txk_type(cpi, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
- &txb_ctx, args->ftxs_mode, args->use_fast_coef_costing,
- args->best_rd - args->this_rd, &this_rd_stats);
-
- if (plane == AOM_PLANE_Y && xd->cfl.store_y) {
- assert(!is_inter_block(mbmi) || plane_bsize < BLOCK_8X8);
- cfl_store_tx(xd, blk_row, blk_col, tx_size, plane_bsize);
- }
-
-#if CONFIG_RD_DEBUG
- av1_update_txb_coeff_cost(&this_rd_stats, plane, tx_size, blk_row, blk_col,
- this_rd_stats.rate);
-#endif // CONFIG_RD_DEBUG
- av1_set_txb_context(x, plane, block, tx_size, a, l);
-
- const int blk_idx =
- blk_row * (block_size_wide[plane_bsize] >> tx_size_wide_log2[0]) +
- blk_col;
-
- if (plane == 0)
- set_blk_skip(x, plane, blk_idx, x->plane[plane].eobs[block] == 0);
- else
- set_blk_skip(x, plane, blk_idx, 0);
-
- rd1 = RDCOST(x->rdmult, this_rd_stats.rate, this_rd_stats.dist);
- rd2 = RDCOST(x->rdmult, 0, this_rd_stats.sse);
-
- // TODO(jingning): temporarily enabled only for luma component
- rd = AOMMIN(rd1, rd2);
-
- this_rd_stats.skip &= !x->plane[plane].eobs[block];
-
- av1_merge_rd_stats(&args->rd_stats, &this_rd_stats);
-
- args->this_rd += rd;
-
- if (args->this_rd > args->best_rd) {
- args->exit_early = 1;
- return;
- }
-}
-
-static void txfm_rd_in_plane(MACROBLOCK *x, const AV1_COMP *cpi,
- RD_STATS *rd_stats, int64_t ref_best_rd, int plane,
- BLOCK_SIZE bsize, TX_SIZE tx_size,
- int use_fast_coef_casting,
- FAST_TX_SEARCH_MODE ftxs_mode) {
- MACROBLOCKD *const xd = &x->e_mbd;
- const struct macroblockd_plane *const pd = &xd->plane[plane];
- struct rdcost_block_args args;
- av1_zero(args);
- args.x = x;
- args.cpi = cpi;
- args.best_rd = ref_best_rd;
- args.use_fast_coef_costing = use_fast_coef_casting;
- args.ftxs_mode = ftxs_mode;
- av1_init_rd_stats(&args.rd_stats);
-
- if (plane == 0) xd->mi[0]->tx_size = tx_size;
-
- av1_get_entropy_contexts(bsize, pd, args.t_above, args.t_left);
-
- av1_foreach_transformed_block_in_plane(xd, bsize, plane, block_rd_txfm,
- &args);
-
- MB_MODE_INFO *const mbmi = xd->mi[0];
- const int is_inter = is_inter_block(mbmi);
- const int invalid_rd = is_inter ? args.incomplete_exit : args.exit_early;
-
- if (invalid_rd) {
- av1_invalid_rd_stats(rd_stats);
- } else {
- *rd_stats = args.rd_stats;
- }
-}
-
-static int tx_size_cost(const AV1_COMMON *const cm, const MACROBLOCK *const x,
- BLOCK_SIZE bsize, TX_SIZE tx_size) {
- const MACROBLOCKD *const xd = &x->e_mbd;
- const MB_MODE_INFO *const mbmi = xd->mi[0];
-
- if (cm->tx_mode == TX_MODE_SELECT && block_signals_txsize(mbmi->sb_type)) {
- const int32_t tx_size_cat = bsize_to_tx_size_cat(bsize);
- const int depth = tx_size_to_depth(tx_size, bsize);
- const int tx_size_ctx = get_tx_size_context(xd);
- int r_tx_size = x->tx_size_cost[tx_size_cat][tx_size_ctx][depth];
- return r_tx_size;
- } else {
- return 0;
- }
-}
-
-static int64_t txfm_yrd(const AV1_COMP *const cpi, MACROBLOCK *x,
- RD_STATS *rd_stats, int64_t ref_best_rd, BLOCK_SIZE bs,
- TX_SIZE tx_size, FAST_TX_SEARCH_MODE ftxs_mode) {
- const AV1_COMMON *const cm = &cpi->common;
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = xd->mi[0];
- int64_t rd = INT64_MAX;
- const int skip_ctx = av1_get_skip_context(xd);
- int s0, s1;
- const int is_inter = is_inter_block(mbmi);
- const int tx_select =
- cm->tx_mode == TX_MODE_SELECT && block_signals_txsize(mbmi->sb_type);
- int ctx = txfm_partition_context(
- xd->above_txfm_context, xd->left_txfm_context, mbmi->sb_type, tx_size);
- const int r_tx_size = is_inter ? x->txfm_partition_cost[ctx][0]
- : tx_size_cost(cm, x, bs, tx_size);
-
- assert(IMPLIES(is_rect_tx(tx_size), is_rect_tx_allowed_bsize(bs)));
-
- s0 = x->skip_cost[skip_ctx][0];
- s1 = x->skip_cost[skip_ctx][1];
-
- mbmi->tx_size = tx_size;
- txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd, AOM_PLANE_Y, bs, tx_size,
- cpi->sf.use_fast_coef_costing, ftxs_mode);
- if (rd_stats->rate == INT_MAX) return INT64_MAX;
-
- if (rd_stats->skip) {
- if (is_inter) {
- rd = RDCOST(x->rdmult, s1, rd_stats->sse);
- } else {
- rd = RDCOST(x->rdmult, s1 + r_tx_size * tx_select, rd_stats->sse);
- }
- } else {
- rd = RDCOST(x->rdmult, rd_stats->rate + s0 + r_tx_size * tx_select,
- rd_stats->dist);
- }
-
- if (tx_select) rd_stats->rate += r_tx_size;
-
- if (is_inter && !xd->lossless[xd->mi[0]->segment_id] && !(rd_stats->skip))
- rd = AOMMIN(rd, RDCOST(x->rdmult, s1, rd_stats->sse));
-
- return rd;
-}
-
-static int64_t estimate_yrd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bs,
- MACROBLOCK *x, int *r, int64_t *d, int *s,
- int64_t *sse, int64_t ref_best_rd) {
- RD_STATS rd_stats;
- av1_subtract_plane(x, bs, 0);
- x->rd_model = LOW_TXFM_RD;
- int64_t rd = txfm_yrd(cpi, x, &rd_stats, ref_best_rd, bs,
- max_txsize_rect_lookup[bs], FTXS_NONE);
- x->rd_model = FULL_TXFM_RD;
- *r = rd_stats.rate;
- *d = rd_stats.dist;
- *s = rd_stats.skip;
- *sse = rd_stats.sse;
- return rd;
-}
-
-static void choose_largest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x,
- RD_STATS *rd_stats, int64_t ref_best_rd,
- BLOCK_SIZE bs) {
- const AV1_COMMON *const cm = &cpi->common;
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = xd->mi[0];
- const int is_inter = is_inter_block(mbmi);
- mbmi->tx_size = tx_size_from_tx_mode(bs, cm->tx_mode);
- const TxSetType tx_set_type =
- av1_get_ext_tx_set_type(mbmi->tx_size, is_inter, cm->reduced_tx_set_used);
- prune_tx(cpi, bs, x, xd, tx_set_type);
- txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd, AOM_PLANE_Y, bs,
- mbmi->tx_size, cpi->sf.use_fast_coef_costing, FTXS_NONE);
- // Reset the pruning flags.
- av1_zero(x->tx_search_prune);
- x->tx_split_prune_flag = 0;
-}
-
-static void choose_smallest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x,
- RD_STATS *rd_stats, int64_t ref_best_rd,
- BLOCK_SIZE bs) {
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = xd->mi[0];
-
- mbmi->tx_size = TX_4X4;
- txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd, 0, bs, mbmi->tx_size,
- cpi->sf.use_fast_coef_costing, FTXS_NONE);
-}
-
-static INLINE int bsize_to_num_blk(BLOCK_SIZE bsize) {
- int num_blk = 1 << (num_pels_log2_lookup[bsize] - 2 * tx_size_wide_log2[0]);
- return num_blk;
-}
-
-static int get_search_init_depth(int mi_width, int mi_height, int is_inter,
- const SPEED_FEATURES *sf) {
- if (sf->tx_size_search_method == USE_LARGESTALL) return MAX_VARTX_DEPTH;
-
- if (sf->tx_size_search_lgr_block) {
- if (mi_width > mi_size_wide[BLOCK_64X64] ||
- mi_height > mi_size_high[BLOCK_64X64])
- return MAX_VARTX_DEPTH;
- }
-
- if (is_inter) {
- return (mi_height != mi_width) ? sf->inter_tx_size_search_init_depth_rect
- : sf->inter_tx_size_search_init_depth_sqr;
- } else {
- return (mi_height != mi_width) ? sf->intra_tx_size_search_init_depth_rect
- : sf->intra_tx_size_search_init_depth_sqr;
- }
-}
-
-static void choose_tx_size_type_from_rd(const AV1_COMP *const cpi,
- MACROBLOCK *x, RD_STATS *rd_stats,
- int64_t ref_best_rd, BLOCK_SIZE bs) {
- const AV1_COMMON *const cm = &cpi->common;
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = xd->mi[0];
- int64_t rd = INT64_MAX;
- int n;
- int start_tx;
- int depth;
- int64_t best_rd = INT64_MAX;
- const TX_SIZE max_rect_tx_size = max_txsize_rect_lookup[bs];
- TX_SIZE best_tx_size = max_rect_tx_size;
- TX_TYPE best_txk_type[TXK_TYPE_BUF_LEN];
- uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
- const int n4 = bsize_to_num_blk(bs);
- const int tx_select = cm->tx_mode == TX_MODE_SELECT;
-
- av1_invalid_rd_stats(rd_stats);
-
- if (tx_select) {
- start_tx = max_rect_tx_size;
- depth = get_search_init_depth(mi_size_wide[bs], mi_size_high[bs],
- is_inter_block(mbmi), &cpi->sf);
- } else {
- const TX_SIZE chosen_tx_size = tx_size_from_tx_mode(bs, cm->tx_mode);
- start_tx = chosen_tx_size;
- depth = MAX_TX_DEPTH;
- }
-
- prune_tx(cpi, bs, x, xd, EXT_TX_SET_ALL16);
-
- for (n = start_tx; depth <= MAX_TX_DEPTH; depth++, n = sub_tx_size_map[n]) {
-#if CONFIG_DIST_8X8
- if (x->using_dist_8x8) {
- if (tx_size_wide[n] < 8 || tx_size_high[n] < 8) continue;
- }
-#endif
- RD_STATS this_rd_stats;
- if (mbmi->ref_mv_idx > 0) x->rd_model = LOW_TXFM_RD;
- rd = txfm_yrd(cpi, x, &this_rd_stats, ref_best_rd, bs, n, FTXS_NONE);
- x->rd_model = FULL_TXFM_RD;
-
- if (rd < best_rd) {
- memcpy(best_txk_type, mbmi->txk_type,
- sizeof(best_txk_type[0]) * TXK_TYPE_BUF_LEN);
- memcpy(best_blk_skip, x->blk_skip, sizeof(best_blk_skip[0]) * n4);
- best_tx_size = n;
- best_rd = rd;
- *rd_stats = this_rd_stats;
- }
- if (n == TX_4X4) break;
- }
-
- if (rd_stats->rate != INT_MAX) {
- mbmi->tx_size = best_tx_size;
- memcpy(mbmi->txk_type, best_txk_type,
- sizeof(best_txk_type[0]) * TXK_TYPE_BUF_LEN);
- memcpy(x->blk_skip, best_blk_skip, sizeof(best_blk_skip[0]) * n4);
- }
-
- // Reset the pruning flags.
- av1_zero(x->tx_search_prune);
- x->tx_split_prune_flag = 0;
-}
-
-static void super_block_yrd(const AV1_COMP *const cpi, MACROBLOCK *x,
- RD_STATS *rd_stats, BLOCK_SIZE bs,
- int64_t ref_best_rd) {
- MACROBLOCKD *xd = &x->e_mbd;
- av1_init_rd_stats(rd_stats);
-
- assert(bs == xd->mi[0]->sb_type);
-
- if (xd->lossless[xd->mi[0]->segment_id]) {
- choose_smallest_tx_size(cpi, x, rd_stats, ref_best_rd, bs);
- } else if (cpi->sf.tx_size_search_method == USE_LARGESTALL) {
- choose_largest_tx_size(cpi, x, rd_stats, ref_best_rd, bs);
- } else {
- choose_tx_size_type_from_rd(cpi, x, rd_stats, ref_best_rd, bs);
- }
-}
-
-// Return the rate cost for luma prediction mode info. of intra blocks.
-static int intra_mode_info_cost_y(const AV1_COMP *cpi, const MACROBLOCK *x,
- const MB_MODE_INFO *mbmi, BLOCK_SIZE bsize,
- int mode_cost) {
- int total_rate = mode_cost;
- const int use_palette = mbmi->palette_mode_info.palette_size[0] > 0;
- const int use_filter_intra = mbmi->filter_intra_mode_info.use_filter_intra;
- const int use_intrabc = mbmi->use_intrabc;
- // Can only activate one mode.
- assert(((mbmi->mode != DC_PRED) + use_palette + use_intrabc +
- use_filter_intra) <= 1);
- const int try_palette =
- av1_allow_palette(cpi->common.allow_screen_content_tools, mbmi->sb_type);
- if (try_palette && mbmi->mode == DC_PRED) {
- const MACROBLOCKD *xd = &x->e_mbd;
- const int bsize_ctx = av1_get_palette_bsize_ctx(bsize);
- const int mode_ctx = av1_get_palette_mode_ctx(xd);
- total_rate += x->palette_y_mode_cost[bsize_ctx][mode_ctx][use_palette];
- if (use_palette) {
- const uint8_t *const color_map = xd->plane[0].color_index_map;
- int block_width, block_height, rows, cols;
- av1_get_block_dimensions(bsize, 0, xd, &block_width, &block_height, &rows,
- &cols);
- const int plt_size = mbmi->palette_mode_info.palette_size[0];
- int palette_mode_cost =
- x->palette_y_size_cost[bsize_ctx][plt_size - PALETTE_MIN_SIZE] +
- write_uniform_cost(plt_size, color_map[0]);
- uint16_t color_cache[2 * PALETTE_MAX_SIZE];
- const int n_cache = av1_get_palette_cache(xd, 0, color_cache);
- palette_mode_cost +=
- av1_palette_color_cost_y(&mbmi->palette_mode_info, color_cache,
- n_cache, cpi->common.seq_params.bit_depth);
- palette_mode_cost +=
- av1_cost_color_map(x, 0, bsize, mbmi->tx_size, PALETTE_MAP);
- total_rate += palette_mode_cost;
- }
- }
- if (av1_filter_intra_allowed(&cpi->common, mbmi)) {
- total_rate += x->filter_intra_cost[mbmi->sb_type][use_filter_intra];
- if (use_filter_intra) {
- total_rate += x->filter_intra_mode_cost[mbmi->filter_intra_mode_info
- .filter_intra_mode];
- }
- }
- if (av1_is_directional_mode(mbmi->mode)) {
- if (av1_use_angle_delta(bsize)) {
- total_rate += x->angle_delta_cost[mbmi->mode - V_PRED]
- [MAX_ANGLE_DELTA +
- mbmi->angle_delta[PLANE_TYPE_Y]];
- }
- }
- if (av1_allow_intrabc(&cpi->common))
- total_rate += x->intrabc_cost[use_intrabc];
- return total_rate;
-}
-
-// Return the rate cost for chroma prediction mode info. of intra blocks.
-static int intra_mode_info_cost_uv(const AV1_COMP *cpi, const MACROBLOCK *x,
- const MB_MODE_INFO *mbmi, BLOCK_SIZE bsize,
- int mode_cost) {
- int total_rate = mode_cost;
- const int use_palette = mbmi->palette_mode_info.palette_size[1] > 0;
- const UV_PREDICTION_MODE mode = mbmi->uv_mode;
- // Can only activate one mode.
- assert(((mode != UV_DC_PRED) + use_palette + mbmi->use_intrabc) <= 1);
-
- const int try_palette =
- av1_allow_palette(cpi->common.allow_screen_content_tools, mbmi->sb_type);
- if (try_palette && mode == UV_DC_PRED) {
- const PALETTE_MODE_INFO *pmi = &mbmi->palette_mode_info;
- total_rate +=
- x->palette_uv_mode_cost[pmi->palette_size[0] > 0][use_palette];
- if (use_palette) {
- const int bsize_ctx = av1_get_palette_bsize_ctx(bsize);
- const int plt_size = pmi->palette_size[1];
- const MACROBLOCKD *xd = &x->e_mbd;
- const uint8_t *const color_map = xd->plane[1].color_index_map;
- int palette_mode_cost =
- x->palette_uv_size_cost[bsize_ctx][plt_size - PALETTE_MIN_SIZE] +
- write_uniform_cost(plt_size, color_map[0]);
- uint16_t color_cache[2 * PALETTE_MAX_SIZE];
- const int n_cache = av1_get_palette_cache(xd, 1, color_cache);
- palette_mode_cost += av1_palette_color_cost_uv(
- pmi, color_cache, n_cache, cpi->common.seq_params.bit_depth);
- palette_mode_cost +=
- av1_cost_color_map(x, 1, bsize, mbmi->tx_size, PALETTE_MAP);
- total_rate += palette_mode_cost;
- }
- }
- if (av1_is_directional_mode(get_uv_mode(mode))) {
- if (av1_use_angle_delta(bsize)) {
- total_rate +=
- x->angle_delta_cost[mode - V_PRED][mbmi->angle_delta[PLANE_TYPE_UV] +
- MAX_ANGLE_DELTA];
- }
- }
- return total_rate;
-}
-
-static int conditional_skipintra(PREDICTION_MODE mode,
- PREDICTION_MODE best_intra_mode) {
- if (mode == D113_PRED && best_intra_mode != V_PRED &&
- best_intra_mode != D135_PRED)
- return 1;
- if (mode == D67_PRED && best_intra_mode != V_PRED &&
- best_intra_mode != D45_PRED)
- return 1;
- if (mode == D203_PRED && best_intra_mode != H_PRED &&
- best_intra_mode != D45_PRED)
- return 1;
- if (mode == D157_PRED && best_intra_mode != H_PRED &&
- best_intra_mode != D135_PRED)
- return 1;
- return 0;
-}
-
-// Model based RD estimation for luma intra blocks.
-static int64_t intra_model_yrd(const AV1_COMP *const cpi, MACROBLOCK *const x,
- BLOCK_SIZE bsize, int mode_cost, int mi_row,
- int mi_col) {
- const AV1_COMMON *cm = &cpi->common;
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = xd->mi[0];
- assert(!is_inter_block(mbmi));
- RD_STATS this_rd_stats;
- int row, col;
- int64_t temp_sse, this_rd;
- TX_SIZE tx_size = tx_size_from_tx_mode(bsize, cm->tx_mode);
- const int stepr = tx_size_high_unit[tx_size];
- const int stepc = tx_size_wide_unit[tx_size];
- const int max_blocks_wide = max_block_wide(xd, bsize, 0);
- const int max_blocks_high = max_block_high(xd, bsize, 0);
- mbmi->tx_size = tx_size;
- // Prediction.
- for (row = 0; row < max_blocks_high; row += stepr) {
- for (col = 0; col < max_blocks_wide; col += stepc) {
- av1_predict_intra_block_facade(cm, xd, 0, col, row, tx_size);
- }
- }
- // RD estimation.
- model_rd_sb_fn[MODELRD_TYPE_INTRA](
- cpi, bsize, x, xd, 0, 0, mi_row, mi_col, &this_rd_stats.rate,
- &this_rd_stats.dist, &this_rd_stats.skip, &temp_sse, NULL, NULL, NULL);
- if (av1_is_directional_mode(mbmi->mode) && av1_use_angle_delta(bsize)) {
- mode_cost +=
- x->angle_delta_cost[mbmi->mode - V_PRED]
- [MAX_ANGLE_DELTA + mbmi->angle_delta[PLANE_TYPE_Y]];
- }
- if (mbmi->mode == DC_PRED &&
- av1_filter_intra_allowed_bsize(cm, mbmi->sb_type)) {
- if (mbmi->filter_intra_mode_info.use_filter_intra) {
- const int mode = mbmi->filter_intra_mode_info.filter_intra_mode;
- mode_cost += x->filter_intra_cost[mbmi->sb_type][1] +
- x->filter_intra_mode_cost[mode];
- } else {
- mode_cost += x->filter_intra_cost[mbmi->sb_type][0];
- }
- }
- this_rd =
- RDCOST(x->rdmult, this_rd_stats.rate + mode_cost, this_rd_stats.dist);
- return this_rd;
-}
-
-// Extends 'color_map' array from 'orig_width x orig_height' to 'new_width x
-// new_height'. Extra rows and columns are filled in by copying last valid
-// row/column.
-static void extend_palette_color_map(uint8_t *const color_map, int orig_width,
- int orig_height, int new_width,
- int new_height) {
- int j;
- assert(new_width >= orig_width);
- assert(new_height >= orig_height);
- if (new_width == orig_width && new_height == orig_height) return;
-
- for (j = orig_height - 1; j >= 0; --j) {
- memmove(color_map + j * new_width, color_map + j * orig_width, orig_width);
- // Copy last column to extra columns.
- memset(color_map + j * new_width + orig_width,
- color_map[j * new_width + orig_width - 1], new_width - orig_width);
- }
- // Copy last row to extra rows.
- for (j = orig_height; j < new_height; ++j) {
- memcpy(color_map + j * new_width, color_map + (orig_height - 1) * new_width,
- new_width);
- }
-}
-
-// Bias toward using colors in the cache.
-// TODO(huisu): Try other schemes to improve compression.
-static void optimize_palette_colors(uint16_t *color_cache, int n_cache,
- int n_colors, int stride, int *centroids) {
- if (n_cache <= 0) return;
- for (int i = 0; i < n_colors * stride; i += stride) {
- int min_diff = abs(centroids[i] - (int)color_cache[0]);
- int idx = 0;
- for (int j = 1; j < n_cache; ++j) {
- const int this_diff = abs(centroids[i] - color_cache[j]);
- if (this_diff < min_diff) {
- min_diff = this_diff;
- idx = j;
- }
- }
- if (min_diff <= 1) centroids[i] = color_cache[idx];
- }
-}
-
-// Given the base colors as specified in centroids[], calculate the RD cost
-// of palette mode.
-static void palette_rd_y(const AV1_COMP *const cpi, MACROBLOCK *x,
- MB_MODE_INFO *mbmi, BLOCK_SIZE bsize, int mi_row,
- int mi_col, int dc_mode_cost, const int *data,
- int *centroids, int n, uint16_t *color_cache,
- int n_cache, MB_MODE_INFO *best_mbmi,
- uint8_t *best_palette_color_map, int64_t *best_rd,
- int64_t *best_model_rd, int *rate, int *rate_tokenonly,
- int *rate_overhead, int64_t *distortion,
- int *skippable, PICK_MODE_CONTEXT *ctx,
- uint8_t *blk_skip) {
- optimize_palette_colors(color_cache, n_cache, n, 1, centroids);
- int k = av1_remove_duplicates(centroids, n);
- if (k < PALETTE_MIN_SIZE) {
- // Too few unique colors to create a palette. And DC_PRED will work
- // well for that case anyway. So skip.
- return;
- }
- PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
- if (cpi->common.seq_params.use_highbitdepth)
- for (int i = 0; i < k; ++i)
- pmi->palette_colors[i] = clip_pixel_highbd(
- (int)centroids[i], cpi->common.seq_params.bit_depth);
- else
- for (int i = 0; i < k; ++i)
- pmi->palette_colors[i] = clip_pixel(centroids[i]);
- pmi->palette_size[0] = k;
- MACROBLOCKD *const xd = &x->e_mbd;
- uint8_t *const color_map = xd->plane[0].color_index_map;
- int block_width, block_height, rows, cols;
- av1_get_block_dimensions(bsize, 0, xd, &block_width, &block_height, &rows,
- &cols);
- av1_calc_indices(data, centroids, color_map, rows * cols, k, 1);
- extend_palette_color_map(color_map, cols, rows, block_width, block_height);
- const int palette_mode_cost =
- intra_mode_info_cost_y(cpi, x, mbmi, bsize, dc_mode_cost);
- int64_t this_model_rd =
- intra_model_yrd(cpi, x, bsize, palette_mode_cost, mi_row, mi_col);
- if (*best_model_rd != INT64_MAX &&
- this_model_rd > *best_model_rd + (*best_model_rd >> 1))
- return;
- if (this_model_rd < *best_model_rd) *best_model_rd = this_model_rd;
- RD_STATS tokenonly_rd_stats;
- super_block_yrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd);
- if (tokenonly_rd_stats.rate == INT_MAX) return;
- int this_rate = tokenonly_rd_stats.rate + palette_mode_cost;
- int64_t this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
- if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(mbmi->sb_type)) {
- tokenonly_rd_stats.rate -=
- tx_size_cost(&cpi->common, x, bsize, mbmi->tx_size);
- }
- if (this_rd < *best_rd) {
- *best_rd = this_rd;
- memcpy(best_palette_color_map, color_map,
- block_width * block_height * sizeof(color_map[0]));
- *best_mbmi = *mbmi;
- memcpy(blk_skip, x->blk_skip, sizeof(x->blk_skip[0]) * ctx->num_4x4_blk);
- *rate_overhead = this_rate - tokenonly_rd_stats.rate;
- if (rate) *rate = this_rate;
- if (rate_tokenonly) *rate_tokenonly = tokenonly_rd_stats.rate;
- if (distortion) *distortion = tokenonly_rd_stats.dist;
- if (skippable) *skippable = tokenonly_rd_stats.skip;
- }
-}
-
-static int rd_pick_palette_intra_sby(
- const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row,
- int mi_col, int dc_mode_cost, MB_MODE_INFO *best_mbmi,
- uint8_t *best_palette_color_map, int64_t *best_rd, int64_t *best_model_rd,
- int *rate, int *rate_tokenonly, int64_t *distortion, int *skippable,
- PICK_MODE_CONTEXT *ctx, uint8_t *best_blk_skip) {
- int rate_overhead = 0;
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = xd->mi[0];
- assert(!is_inter_block(mbmi));
- assert(av1_allow_palette(cpi->common.allow_screen_content_tools, bsize));
- const SequenceHeader *const seq_params = &cpi->common.seq_params;
- int colors, n;
- const int src_stride = x->plane[0].src.stride;
- const uint8_t *const src = x->plane[0].src.buf;
- uint8_t *const color_map = xd->plane[0].color_index_map;
- int block_width, block_height, rows, cols;
- av1_get_block_dimensions(bsize, 0, xd, &block_width, &block_height, &rows,
- &cols);
-
- int count_buf[1 << 12]; // Maximum (1 << 12) color levels.
- if (seq_params->use_highbitdepth)
- colors = av1_count_colors_highbd(src, src_stride, rows, cols,
- seq_params->bit_depth, count_buf);
- else
- colors = av1_count_colors(src, src_stride, rows, cols, count_buf);
- mbmi->filter_intra_mode_info.use_filter_intra = 0;
-
- if (colors > 1 && colors <= 64) {
- int r, c, i;
- const int max_itr = 50;
- int *const data = x->palette_buffer->kmeans_data_buf;
- int centroids[PALETTE_MAX_SIZE];
- int lb, ub, val;
- uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
- if (seq_params->use_highbitdepth)
- lb = ub = src16[0];
- else
- lb = ub = src[0];
-
- if (seq_params->use_highbitdepth) {
- for (r = 0; r < rows; ++r) {
- for (c = 0; c < cols; ++c) {
- val = src16[r * src_stride + c];
- data[r * cols + c] = val;
- if (val < lb)
- lb = val;
- else if (val > ub)
- ub = val;
- }
- }
- } else {
- for (r = 0; r < rows; ++r) {
- for (c = 0; c < cols; ++c) {
- val = src[r * src_stride + c];
- data[r * cols + c] = val;
- if (val < lb)
- lb = val;
- else if (val > ub)
- ub = val;
- }
- }
- }
-
- mbmi->mode = DC_PRED;
- mbmi->filter_intra_mode_info.use_filter_intra = 0;
-
- uint16_t color_cache[2 * PALETTE_MAX_SIZE];
- const int n_cache = av1_get_palette_cache(xd, 0, color_cache);
-
- // Find the dominant colors, stored in top_colors[].
- int top_colors[PALETTE_MAX_SIZE] = { 0 };
- for (i = 0; i < AOMMIN(colors, PALETTE_MAX_SIZE); ++i) {
- int max_count = 0;
- for (int j = 0; j < (1 << seq_params->bit_depth); ++j) {
- if (count_buf[j] > max_count) {
- max_count = count_buf[j];
- top_colors[i] = j;
- }
- }
- assert(max_count > 0);
- count_buf[top_colors[i]] = 0;
- }
-
- // Try the dominant colors directly.
- // TODO(huisu@google.com): Try to avoid duplicate computation in cases
- // where the dominant colors and the k-means results are similar.
- for (n = AOMMIN(colors, PALETTE_MAX_SIZE); n >= 2; --n) {
- for (i = 0; i < n; ++i) centroids[i] = top_colors[i];
- palette_rd_y(cpi, x, mbmi, bsize, mi_row, mi_col, dc_mode_cost, data,
- centroids, n, color_cache, n_cache, best_mbmi,
- best_palette_color_map, best_rd, best_model_rd, rate,
- rate_tokenonly, &rate_overhead, distortion, skippable, ctx,
- best_blk_skip);
- }
-
- // K-means clustering.
- for (n = AOMMIN(colors, PALETTE_MAX_SIZE); n >= 2; --n) {
- if (colors == PALETTE_MIN_SIZE) {
- // Special case: These colors automatically become the centroids.
- assert(colors == n);
- assert(colors == 2);
- centroids[0] = lb;
- centroids[1] = ub;
- } else {
- for (i = 0; i < n; ++i) {
- centroids[i] = lb + (2 * i + 1) * (ub - lb) / n / 2;
- }
- av1_k_means(data, centroids, color_map, rows * cols, n, 1, max_itr);
- }
- palette_rd_y(cpi, x, mbmi, bsize, mi_row, mi_col, dc_mode_cost, data,
- centroids, n, color_cache, n_cache, best_mbmi,
- best_palette_color_map, best_rd, best_model_rd, rate,
- rate_tokenonly, &rate_overhead, distortion, skippable, ctx,
- best_blk_skip);
- }
- }
-
- if (best_mbmi->palette_mode_info.palette_size[0] > 0) {
- memcpy(color_map, best_palette_color_map,
- block_width * block_height * sizeof(best_palette_color_map[0]));
- }
- *mbmi = *best_mbmi;
- return rate_overhead;
-}
-
-// Return 1 if an filter intra mode is selected; return 0 otherwise.
-static int rd_pick_filter_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
- int mi_row, int mi_col, int *rate,
- int *rate_tokenonly, int64_t *distortion,
- int *skippable, BLOCK_SIZE bsize,
- int mode_cost, int64_t *best_rd,
- int64_t *best_model_rd,
- PICK_MODE_CONTEXT *ctx) {
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *mbmi = xd->mi[0];
- int filter_intra_selected_flag = 0;
- FILTER_INTRA_MODE mode;
- TX_SIZE best_tx_size = TX_8X8;
- FILTER_INTRA_MODE_INFO filter_intra_mode_info;
- TX_TYPE best_txk_type[TXK_TYPE_BUF_LEN];
- (void)ctx;
- av1_zero(filter_intra_mode_info);
- mbmi->filter_intra_mode_info.use_filter_intra = 1;
- mbmi->mode = DC_PRED;
- mbmi->palette_mode_info.palette_size[0] = 0;
-
- for (mode = 0; mode < FILTER_INTRA_MODES; ++mode) {
- int64_t this_rd, this_model_rd;
- RD_STATS tokenonly_rd_stats;
- mbmi->filter_intra_mode_info.filter_intra_mode = mode;
- this_model_rd = intra_model_yrd(cpi, x, bsize, mode_cost, mi_row, mi_col);
- if (*best_model_rd != INT64_MAX &&
- this_model_rd > *best_model_rd + (*best_model_rd >> 1))
- continue;
- if (this_model_rd < *best_model_rd) *best_model_rd = this_model_rd;
- super_block_yrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd);
- if (tokenonly_rd_stats.rate == INT_MAX) continue;
- const int this_rate =
- tokenonly_rd_stats.rate +
- intra_mode_info_cost_y(cpi, x, mbmi, bsize, mode_cost);
- this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
-
- if (this_rd < *best_rd) {
- *best_rd = this_rd;
- best_tx_size = mbmi->tx_size;
- filter_intra_mode_info = mbmi->filter_intra_mode_info;
- memcpy(best_txk_type, mbmi->txk_type,
- sizeof(best_txk_type[0]) * TXK_TYPE_BUF_LEN);
- memcpy(ctx->blk_skip, x->blk_skip,
- sizeof(x->blk_skip[0]) * ctx->num_4x4_blk);
- *rate = this_rate;
- *rate_tokenonly = tokenonly_rd_stats.rate;
- *distortion = tokenonly_rd_stats.dist;
- *skippable = tokenonly_rd_stats.skip;
- filter_intra_selected_flag = 1;
- }
- }
-
- if (filter_intra_selected_flag) {
- mbmi->mode = DC_PRED;
- mbmi->tx_size = best_tx_size;
- mbmi->filter_intra_mode_info = filter_intra_mode_info;
- memcpy(mbmi->txk_type, best_txk_type,
- sizeof(best_txk_type[0]) * TXK_TYPE_BUF_LEN);
- return 1;
- } else {
- return 0;
- }
-}
-
-// Run RD calculation with given luma intra prediction angle., and return
-// the RD cost. Update the best mode info. if the RD cost is the best so far.
-static int64_t calc_rd_given_intra_angle(
- const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row,
- int mi_col, int mode_cost, int64_t best_rd_in, int8_t angle_delta,
- int max_angle_delta, int *rate, RD_STATS *rd_stats, int *best_angle_delta,
- TX_SIZE *best_tx_size, int64_t *best_rd, int64_t *best_model_rd,
- TX_TYPE *best_txk_type, uint8_t *best_blk_skip) {
- RD_STATS tokenonly_rd_stats;
- int64_t this_rd, this_model_rd;
- MB_MODE_INFO *mbmi = x->e_mbd.mi[0];
- const int n4 = bsize_to_num_blk(bsize);
- assert(!is_inter_block(mbmi));
- mbmi->angle_delta[PLANE_TYPE_Y] = angle_delta;
- this_model_rd = intra_model_yrd(cpi, x, bsize, mode_cost, mi_row, mi_col);
- if (*best_model_rd != INT64_MAX &&
- this_model_rd > *best_model_rd + (*best_model_rd >> 1))
- return INT64_MAX;
- if (this_model_rd < *best_model_rd) *best_model_rd = this_model_rd;
- super_block_yrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd_in);
- if (tokenonly_rd_stats.rate == INT_MAX) return INT64_MAX;
-
- int this_rate =
- mode_cost + tokenonly_rd_stats.rate +
- x->angle_delta_cost[mbmi->mode - V_PRED][max_angle_delta + angle_delta];
- this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
-
- if (this_rd < *best_rd) {
- memcpy(best_txk_type, mbmi->txk_type,
- sizeof(*best_txk_type) * TXK_TYPE_BUF_LEN);
- memcpy(best_blk_skip, x->blk_skip, sizeof(best_blk_skip[0]) * n4);
- *best_rd = this_rd;
- *best_angle_delta = mbmi->angle_delta[PLANE_TYPE_Y];
- *best_tx_size = mbmi->tx_size;
- *rate = this_rate;
- rd_stats->rate = tokenonly_rd_stats.rate;
- rd_stats->dist = tokenonly_rd_stats.dist;
- rd_stats->skip = tokenonly_rd_stats.skip;
- }
- return this_rd;
-}
-
-// With given luma directional intra prediction mode, pick the best angle delta
-// Return the RD cost corresponding to the best angle delta.
-static int64_t rd_pick_intra_angle_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
- int mi_row, int mi_col, int *rate,
- RD_STATS *rd_stats, BLOCK_SIZE bsize,
- int mode_cost, int64_t best_rd,
- int64_t *best_model_rd) {
- MB_MODE_INFO *mbmi = x->e_mbd.mi[0];
- assert(!is_inter_block(mbmi));
-
- int best_angle_delta = 0;
- int64_t rd_cost[2 * (MAX_ANGLE_DELTA + 2)];
- TX_SIZE best_tx_size = mbmi->tx_size;
- TX_TYPE best_txk_type[TXK_TYPE_BUF_LEN];
- uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
-
- for (int i = 0; i < 2 * (MAX_ANGLE_DELTA + 2); ++i) rd_cost[i] = INT64_MAX;
-
- int first_try = 1;
- for (int angle_delta = 0; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) {
- for (int i = 0; i < 2; ++i) {
- const int64_t best_rd_in =
- (best_rd == INT64_MAX) ? INT64_MAX
- : (best_rd + (best_rd >> (first_try ? 3 : 5)));
- const int64_t this_rd = calc_rd_given_intra_angle(
- cpi, x, bsize, mi_row, mi_col, mode_cost, best_rd_in,
- (1 - 2 * i) * angle_delta, MAX_ANGLE_DELTA, rate, rd_stats,
- &best_angle_delta, &best_tx_size, &best_rd, best_model_rd,
- best_txk_type, best_blk_skip);
- rd_cost[2 * angle_delta + i] = this_rd;
- if (first_try && this_rd == INT64_MAX) return best_rd;
- first_try = 0;
- if (angle_delta == 0) {
- rd_cost[1] = this_rd;
- break;
- }
- }
- }
-
- assert(best_rd != INT64_MAX);
- for (int angle_delta = 1; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) {
- for (int i = 0; i < 2; ++i) {
- int skip_search = 0;
- const int64_t rd_thresh = best_rd + (best_rd >> 5);
- if (rd_cost[2 * (angle_delta + 1) + i] > rd_thresh &&
- rd_cost[2 * (angle_delta - 1) + i] > rd_thresh)
- skip_search = 1;
- if (!skip_search) {
- calc_rd_given_intra_angle(cpi, x, bsize, mi_row, mi_col, mode_cost,
- best_rd, (1 - 2 * i) * angle_delta,
- MAX_ANGLE_DELTA, rate, rd_stats,
- &best_angle_delta, &best_tx_size, &best_rd,
- best_model_rd, best_txk_type, best_blk_skip);
- }
- }
- }
-
- if (rd_stats->rate != INT_MAX) {
- mbmi->tx_size = best_tx_size;
- mbmi->angle_delta[PLANE_TYPE_Y] = best_angle_delta;
- memcpy(mbmi->txk_type, best_txk_type,
- sizeof(*best_txk_type) * TXK_TYPE_BUF_LEN);
- memcpy(x->blk_skip, best_blk_skip,
- sizeof(best_blk_skip[0]) * bsize_to_num_blk(bsize));
- }
- return best_rd;
-}
-
-// Indices are sign, integer, and fractional part of the gradient value
-static const uint8_t gradient_to_angle_bin[2][7][16] = {
- {
- { 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0 },
- { 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1 },
- { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
- { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
- { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
- { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
- { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
- },
- {
- { 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4 },
- { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3 },
- { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 },
- { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 },
- { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 },
- { 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
- { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
- },
-};
-
-/* clang-format off */
-static const uint8_t mode_to_angle_bin[INTRA_MODES] = {
- 0, 2, 6, 0, 4, 3, 5, 7, 1, 0,
- 0,
-};
-/* clang-format on */
-
-static void angle_estimation(const uint8_t *src, int src_stride, int rows,
- int cols, BLOCK_SIZE bsize,
- uint8_t *directional_mode_skip_mask) {
- memset(directional_mode_skip_mask, 0,
- INTRA_MODES * sizeof(*directional_mode_skip_mask));
- // Check if angle_delta is used
- if (!av1_use_angle_delta(bsize)) return;
- uint64_t hist[DIRECTIONAL_MODES];
- memset(hist, 0, DIRECTIONAL_MODES * sizeof(hist[0]));
- src += src_stride;
- int r, c, dx, dy;
- for (r = 1; r < rows; ++r) {
- for (c = 1; c < cols; ++c) {
- dx = src[c] - src[c - 1];
- dy = src[c] - src[c - src_stride];
- int index;
- const int temp = dx * dx + dy * dy;
- if (dy == 0) {
- index = 2;
- } else {
- const int sn = (dx > 0) ^ (dy > 0);
- dx = abs(dx);
- dy = abs(dy);
- const int remd = (dx % dy) * 16 / dy;
- const int quot = dx / dy;
- index = gradient_to_angle_bin[sn][AOMMIN(quot, 6)][AOMMIN(remd, 15)];
- }
- hist[index] += temp;
- }
- src += src_stride;
- }
-
- int i;
- uint64_t hist_sum = 0;
- for (i = 0; i < DIRECTIONAL_MODES; ++i) hist_sum += hist[i];
- for (i = 0; i < INTRA_MODES; ++i) {
- if (av1_is_directional_mode(i)) {
- const uint8_t angle_bin = mode_to_angle_bin[i];
- uint64_t score = 2 * hist[angle_bin];
- int weight = 2;
- if (angle_bin > 0) {
- score += hist[angle_bin - 1];
- ++weight;
- }
- if (angle_bin < DIRECTIONAL_MODES - 1) {
- score += hist[angle_bin + 1];
- ++weight;
- }
- if (score * ANGLE_SKIP_THRESH < hist_sum * weight)
- directional_mode_skip_mask[i] = 1;
- }
- }
-}
-
-static void highbd_angle_estimation(const uint8_t *src8, int src_stride,
- int rows, int cols, BLOCK_SIZE bsize,
- uint8_t *directional_mode_skip_mask) {
- memset(directional_mode_skip_mask, 0,
- INTRA_MODES * sizeof(*directional_mode_skip_mask));
- // Check if angle_delta is used
- if (!av1_use_angle_delta(bsize)) return;
- uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- uint64_t hist[DIRECTIONAL_MODES];
- memset(hist, 0, DIRECTIONAL_MODES * sizeof(hist[0]));
- src += src_stride;
- int r, c, dx, dy;
- for (r = 1; r < rows; ++r) {
- for (c = 1; c < cols; ++c) {
- dx = src[c] - src[c - 1];
- dy = src[c] - src[c - src_stride];
- int index;
- const int temp = dx * dx + dy * dy;
- if (dy == 0) {
- index = 2;
- } else {
- const int sn = (dx > 0) ^ (dy > 0);
- dx = abs(dx);
- dy = abs(dy);
- const int remd = (dx % dy) * 16 / dy;
- const int quot = dx / dy;
- index = gradient_to_angle_bin[sn][AOMMIN(quot, 6)][AOMMIN(remd, 15)];
- }
- hist[index] += temp;
- }
- src += src_stride;
- }
-
- int i;
- uint64_t hist_sum = 0;
- for (i = 0; i < DIRECTIONAL_MODES; ++i) hist_sum += hist[i];
- for (i = 0; i < INTRA_MODES; ++i) {
- if (av1_is_directional_mode(i)) {
- const uint8_t angle_bin = mode_to_angle_bin[i];
- uint64_t score = 2 * hist[angle_bin];
- int weight = 2;
- if (angle_bin > 0) {
- score += hist[angle_bin - 1];
- ++weight;
- }
- if (angle_bin < DIRECTIONAL_MODES - 1) {
- score += hist[angle_bin + 1];
- ++weight;
- }
- if (score * ANGLE_SKIP_THRESH < hist_sum * weight)
- directional_mode_skip_mask[i] = 1;
- }
- }
-}
-
-// Given selected prediction mode, search for the best tx type and size.
-static void intra_block_yrd(const AV1_COMP *const cpi, MACROBLOCK *x,
- BLOCK_SIZE bsize, const int *bmode_costs,
- int64_t *best_rd, int *rate, int *rate_tokenonly,
- int64_t *distortion, int *skippable,
- MB_MODE_INFO *best_mbmi, PICK_MODE_CONTEXT *ctx) {
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = xd->mi[0];
- RD_STATS rd_stats;
- super_block_yrd(cpi, x, &rd_stats, bsize, *best_rd);
- if (rd_stats.rate == INT_MAX) return;
- int this_rate_tokenonly = rd_stats.rate;
- if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(mbmi->sb_type)) {
- // super_block_yrd above includes the cost of the tx_size in the
- // tokenonly rate, but for intra blocks, tx_size is always coded
- // (prediction granularity), so we account for it in the full rate,
- // not the tokenonly rate.
- this_rate_tokenonly -= tx_size_cost(&cpi->common, x, bsize, mbmi->tx_size);
- }
- const int this_rate =
- rd_stats.rate +
- intra_mode_info_cost_y(cpi, x, mbmi, bsize, bmode_costs[mbmi->mode]);
- const int64_t this_rd = RDCOST(x->rdmult, this_rate, rd_stats.dist);
- if (this_rd < *best_rd) {
- *best_mbmi = *mbmi;
- *best_rd = this_rd;
- *rate = this_rate;
- *rate_tokenonly = this_rate_tokenonly;
- *distortion = rd_stats.dist;
- *skippable = rd_stats.skip;
- memcpy(ctx->blk_skip, x->blk_skip,
- sizeof(x->blk_skip[0]) * ctx->num_4x4_blk);
- }
-}
-
-// This function is used only for intra_only frames
-static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
- int mi_row, int mi_col, int *rate,
- int *rate_tokenonly, int64_t *distortion,
- int *skippable, BLOCK_SIZE bsize,
- int64_t best_rd, PICK_MODE_CONTEXT *ctx) {
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = xd->mi[0];
- assert(!is_inter_block(mbmi));
- int64_t best_model_rd = INT64_MAX;
- const int rows = block_size_high[bsize];
- const int cols = block_size_wide[bsize];
- int is_directional_mode;
- uint8_t directional_mode_skip_mask[INTRA_MODES];
- const int src_stride = x->plane[0].src.stride;
- const uint8_t *src = x->plane[0].src.buf;
- int beat_best_rd = 0;
- const int *bmode_costs;
- PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
- const int try_palette =
- av1_allow_palette(cpi->common.allow_screen_content_tools, mbmi->sb_type);
- uint8_t *best_palette_color_map =
- try_palette ? x->palette_buffer->best_palette_color_map : NULL;
- const MB_MODE_INFO *above_mi = xd->above_mbmi;
- const MB_MODE_INFO *left_mi = xd->left_mbmi;
- const PREDICTION_MODE A = av1_above_block_mode(above_mi);
- const PREDICTION_MODE L = av1_left_block_mode(left_mi);
- const int above_ctx = intra_mode_context[A];
- const int left_ctx = intra_mode_context[L];
- bmode_costs = x->y_mode_costs[above_ctx][left_ctx];
-
- mbmi->angle_delta[PLANE_TYPE_Y] = 0;
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
- highbd_angle_estimation(src, src_stride, rows, cols, bsize,
- directional_mode_skip_mask);
- else
- angle_estimation(src, src_stride, rows, cols, bsize,
- directional_mode_skip_mask);
- mbmi->filter_intra_mode_info.use_filter_intra = 0;
- pmi->palette_size[0] = 0;
-
- if (cpi->sf.tx_type_search.fast_intra_tx_type_search)
- x->use_default_intra_tx_type = 1;
- else
- x->use_default_intra_tx_type = 0;
-
- MB_MODE_INFO best_mbmi = *mbmi;
- /* Y Search for intra prediction mode */
- for (int mode_idx = INTRA_MODE_START; mode_idx < INTRA_MODE_END; ++mode_idx) {
- RD_STATS this_rd_stats;
- int this_rate, this_rate_tokenonly, s;
- int64_t this_distortion, this_rd, this_model_rd;
- mbmi->mode = intra_rd_search_mode_order[mode_idx];
- mbmi->angle_delta[PLANE_TYPE_Y] = 0;
- this_model_rd =
- intra_model_yrd(cpi, x, bsize, bmode_costs[mbmi->mode], mi_row, mi_col);
- if (best_model_rd != INT64_MAX &&
- this_model_rd > best_model_rd + (best_model_rd >> 1))
- continue;
- if (this_model_rd < best_model_rd) best_model_rd = this_model_rd;
- is_directional_mode = av1_is_directional_mode(mbmi->mode);
- if (is_directional_mode && directional_mode_skip_mask[mbmi->mode]) continue;
- if (is_directional_mode && av1_use_angle_delta(bsize)) {
- this_rd_stats.rate = INT_MAX;
- rd_pick_intra_angle_sby(cpi, x, mi_row, mi_col, &this_rate,
- &this_rd_stats, bsize, bmode_costs[mbmi->mode],
- best_rd, &best_model_rd);
- } else {
- super_block_yrd(cpi, x, &this_rd_stats, bsize, best_rd);
- }
- this_rate_tokenonly = this_rd_stats.rate;
- this_distortion = this_rd_stats.dist;
- s = this_rd_stats.skip;
-
- if (this_rate_tokenonly == INT_MAX) continue;
-
- if (!xd->lossless[mbmi->segment_id] &&
- block_signals_txsize(mbmi->sb_type)) {
- // super_block_yrd above includes the cost of the tx_size in the
- // tokenonly rate, but for intra blocks, tx_size is always coded
- // (prediction granularity), so we account for it in the full rate,
- // not the tokenonly rate.
- this_rate_tokenonly -=
- tx_size_cost(&cpi->common, x, bsize, mbmi->tx_size);
- }
- this_rate =
- this_rd_stats.rate +
- intra_mode_info_cost_y(cpi, x, mbmi, bsize, bmode_costs[mbmi->mode]);
- this_rd = RDCOST(x->rdmult, this_rate, this_distortion);
- if (this_rd < best_rd) {
- best_mbmi = *mbmi;
- best_rd = this_rd;
- beat_best_rd = 1;
- *rate = this_rate;
- *rate_tokenonly = this_rate_tokenonly;
- *distortion = this_distortion;
- *skippable = s;
- memcpy(ctx->blk_skip, x->blk_skip,
- sizeof(x->blk_skip[0]) * ctx->num_4x4_blk);
- }
- }
-
- if (try_palette) {
- rd_pick_palette_intra_sby(
- cpi, x, bsize, mi_row, mi_col, bmode_costs[DC_PRED], &best_mbmi,
- best_palette_color_map, &best_rd, &best_model_rd, rate, rate_tokenonly,
- distortion, skippable, ctx, ctx->blk_skip);
- }
-
- if (beat_best_rd && av1_filter_intra_allowed_bsize(&cpi->common, bsize)) {
- if (rd_pick_filter_intra_sby(
- cpi, x, mi_row, mi_col, rate, rate_tokenonly, distortion, skippable,
- bsize, bmode_costs[DC_PRED], &best_rd, &best_model_rd, ctx)) {
- best_mbmi = *mbmi;
- }
- }
-
- // If previous searches use only the default tx type, do an extra search for
- // the best tx type.
- if (x->use_default_intra_tx_type) {
- *mbmi = best_mbmi;
- x->use_default_intra_tx_type = 0;
- intra_block_yrd(cpi, x, bsize, bmode_costs, &best_rd, rate, rate_tokenonly,
- distortion, skippable, &best_mbmi, ctx);
- }
-
- *mbmi = best_mbmi;
- return best_rd;
-}
-
-// Return value 0: early termination triggered, no valid rd cost available;
-// 1: rd cost values are valid.
-static int super_block_uvrd(const AV1_COMP *const cpi, MACROBLOCK *x,
- RD_STATS *rd_stats, BLOCK_SIZE bsize,
- int64_t ref_best_rd) {
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = xd->mi[0];
- struct macroblockd_plane *const pd = &xd->plane[AOM_PLANE_U];
- const TX_SIZE uv_tx_size = av1_get_tx_size(AOM_PLANE_U, xd);
- int plane;
- int is_cost_valid = 1;
- av1_init_rd_stats(rd_stats);
-
- if (ref_best_rd < 0) is_cost_valid = 0;
-
- if (x->skip_chroma_rd) return is_cost_valid;
-
- bsize = scale_chroma_bsize(bsize, pd->subsampling_x, pd->subsampling_y);
-
- if (is_inter_block(mbmi) && is_cost_valid) {
- for (plane = 1; plane < MAX_MB_PLANE; ++plane)
- av1_subtract_plane(x, bsize, plane);
- }
-
- if (is_cost_valid) {
- for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
- RD_STATS pn_rd_stats;
- txfm_rd_in_plane(x, cpi, &pn_rd_stats, ref_best_rd, plane, bsize,
- uv_tx_size, cpi->sf.use_fast_coef_costing, FTXS_NONE);
- if (pn_rd_stats.rate == INT_MAX) {
- is_cost_valid = 0;
- break;
- }
- av1_merge_rd_stats(rd_stats, &pn_rd_stats);
- if (RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist) > ref_best_rd &&
- RDCOST(x->rdmult, 0, rd_stats->sse) > ref_best_rd) {
- is_cost_valid = 0;
- break;
- }
- }
- }
-
- if (!is_cost_valid) {
- // reset cost value
- av1_invalid_rd_stats(rd_stats);
- }
-
- return is_cost_valid;
-}
-
-static void tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
- int blk_row, int blk_col, int plane, int block,
- int plane_bsize, TXB_CTX *txb_ctx, RD_STATS *rd_stats,
- FAST_TX_SEARCH_MODE ftxs_mode, int64_t ref_rdcost,
- TXB_RD_INFO *rd_info_array) {
- const struct macroblock_plane *const p = &x->plane[plane];
- const uint16_t cur_joint_ctx =
- (txb_ctx->dc_sign_ctx << 8) + txb_ctx->txb_skip_ctx;
- const int txk_type_idx =
- av1_get_txk_type_index(plane_bsize, blk_row, blk_col);
- // Look up RD and terminate early in case when we've already processed exactly
- // the same residual with exactly the same entropy context.
- if (rd_info_array != NULL && rd_info_array->valid &&
- rd_info_array->entropy_context == cur_joint_ctx) {
- if (plane == 0)
- x->e_mbd.mi[0]->txk_type[txk_type_idx] = rd_info_array->tx_type;
- const TX_TYPE ref_tx_type =
- av1_get_tx_type(get_plane_type(plane), &x->e_mbd, blk_row, blk_col,
- tx_size, cpi->common.reduced_tx_set_used);
- if (ref_tx_type == rd_info_array->tx_type) {
- rd_stats->rate += rd_info_array->rate;
- rd_stats->dist += rd_info_array->dist;
- rd_stats->sse += rd_info_array->sse;
- rd_stats->skip &= rd_info_array->eob == 0;
- p->eobs[block] = rd_info_array->eob;
- p->txb_entropy_ctx[block] = rd_info_array->txb_entropy_ctx;
- return;
- }
- }
-
- RD_STATS this_rd_stats;
- search_txk_type(cpi, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
- txb_ctx, ftxs_mode, 0, ref_rdcost, &this_rd_stats);
-
- av1_merge_rd_stats(rd_stats, &this_rd_stats);
-
- // Save RD results for possible reuse in future.
- if (rd_info_array != NULL) {
- rd_info_array->valid = 1;
- rd_info_array->entropy_context = cur_joint_ctx;
- rd_info_array->rate = this_rd_stats.rate;
- rd_info_array->dist = this_rd_stats.dist;
- rd_info_array->sse = this_rd_stats.sse;
- rd_info_array->eob = p->eobs[block];
- rd_info_array->txb_entropy_ctx = p->txb_entropy_ctx[block];
- if (plane == 0) {
- rd_info_array->tx_type = x->e_mbd.mi[0]->txk_type[txk_type_idx];
- }
- }
-}
-
-static void get_mean_and_dev(const int16_t *data, int stride, int bw, int bh,
- float *mean, float *dev) {
- int x_sum = 0;
- uint64_t x2_sum = 0;
- for (int i = 0; i < bh; ++i) {
- for (int j = 0; j < bw; ++j) {
- const int val = data[j];
- x_sum += val;
- x2_sum += val * val;
- }
- data += stride;
- }
-
- const int num = bw * bh;
- const float e_x = (float)x_sum / num;
- const float e_x2 = (float)((double)x2_sum / num);
- const float diff = e_x2 - e_x * e_x;
- *dev = (diff > 0) ? sqrtf(diff) : 0;
- *mean = e_x;
-}
-
-static void get_mean_and_dev_float(const float *data, int stride, int bw,
- int bh, float *mean, float *dev) {
- float x_sum = 0;
- float x2_sum = 0;
- for (int i = 0; i < bh; ++i) {
- for (int j = 0; j < bw; ++j) {
- const float val = data[j];
- x_sum += val;
- x2_sum += val * val;
- }
- data += stride;
- }
-
- const int num = bw * bh;
- const float e_x = x_sum / num;
- const float e_x2 = x2_sum / num;
- const float diff = e_x2 - e_x * e_x;
- *dev = (diff > 0) ? sqrtf(diff) : 0;
- *mean = e_x;
-}
-
-// Feature used by the model to predict tx split: the mean and standard
-// deviation values of the block and sub-blocks.
-static void get_mean_dev_features(const int16_t *data, int stride, int bw,
- int bh, int levels, float *feature) {
- int feature_idx = 0;
- int width = bw;
- int height = bh;
- const int16_t *const data_ptr = &data[0];
- for (int lv = 0; lv < levels; ++lv) {
- if (width < 2 || height < 2) break;
- float mean_buf[16];
- float dev_buf[16];
- int blk_idx = 0;
- for (int row = 0; row < bh; row += height) {
- for (int col = 0; col < bw; col += width) {
- float mean, dev;
- get_mean_and_dev(data_ptr + row * stride + col, stride, width, height,
- &mean, &dev);
- feature[feature_idx++] = mean;
- feature[feature_idx++] = dev;
- mean_buf[blk_idx] = mean;
- dev_buf[blk_idx++] = dev;
- }
- }
- if (blk_idx > 1) {
- float mean, dev;
- // Deviation of means.
- get_mean_and_dev_float(mean_buf, 1, 1, blk_idx, &mean, &dev);
- feature[feature_idx++] = dev;
- // Mean of deviations.
- get_mean_and_dev_float(dev_buf, 1, 1, blk_idx, &mean, &dev);
- feature[feature_idx++] = mean;
- }
- // Reduce the block size when proceeding to the next level.
- if (height == width) {
- height = height >> 1;
- width = width >> 1;
- } else if (height > width) {
- height = height >> 1;
- } else {
- width = width >> 1;
- }
- }
-}
-
-static int ml_predict_tx_split(MACROBLOCK *x, BLOCK_SIZE bsize, int blk_row,
- int blk_col, TX_SIZE tx_size) {
- const NN_CONFIG *nn_config = av1_tx_split_nnconfig_map[tx_size];
- if (!nn_config) return -1;
-
- const int diff_stride = block_size_wide[bsize];
- const int16_t *diff =
- x->plane[0].src_diff + 4 * blk_row * diff_stride + 4 * blk_col;
- const int bw = tx_size_wide[tx_size];
- const int bh = tx_size_high[tx_size];
- aom_clear_system_state();
-
- float features[64] = { 0.0f };
- get_mean_dev_features(diff, diff_stride, bw, bh, 2, features);
-
- float score = 0.0f;
- av1_nn_predict(features, nn_config, &score);
- if (score > 8.0f) return 100;
- if (score < -8.0f) return 0;
- score = 1.0f / (1.0f + (float)exp(-score));
- return (int)(score * 100);
-}
-
-typedef struct {
- int64_t rd;
- int txb_entropy_ctx;
- TX_TYPE tx_type;
-} TxCandidateInfo;
-
-static void try_tx_block_no_split(
- const AV1_COMP *cpi, MACROBLOCK *x, int blk_row, int blk_col, int block,
- TX_SIZE tx_size, int depth, BLOCK_SIZE plane_bsize,
- const ENTROPY_CONTEXT *ta, const ENTROPY_CONTEXT *tl,
- int txfm_partition_ctx, RD_STATS *rd_stats, int64_t ref_best_rd,
- FAST_TX_SEARCH_MODE ftxs_mode, TXB_RD_INFO_NODE *rd_info_node,
- TxCandidateInfo *no_split) {
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = xd->mi[0];
- struct macroblock_plane *const p = &x->plane[0];
- const int bw = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
-
- no_split->rd = INT64_MAX;
- no_split->txb_entropy_ctx = 0;
- no_split->tx_type = TX_TYPES;
-
- const ENTROPY_CONTEXT *const pta = ta + blk_col;
- const ENTROPY_CONTEXT *const ptl = tl + blk_row;
-
- const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size);
- TXB_CTX txb_ctx;
- get_txb_ctx(plane_bsize, tx_size, 0, pta, ptl, &txb_ctx);
- const int zero_blk_rate = x->coeff_costs[txs_ctx][PLANE_TYPE_Y]
- .txb_skip_cost[txb_ctx.txb_skip_ctx][1];
-
- rd_stats->ref_rdcost = ref_best_rd;
- rd_stats->zero_rate = zero_blk_rate;
- const int index = av1_get_txb_size_index(plane_bsize, blk_row, blk_col);
- mbmi->inter_tx_size[index] = tx_size;
- tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, 0, block, plane_bsize,
- &txb_ctx, rd_stats, ftxs_mode, ref_best_rd,
- rd_info_node != NULL ? rd_info_node->rd_info_array : NULL);
- assert(rd_stats->rate < INT_MAX);
-
- if ((RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist) >=
- RDCOST(x->rdmult, zero_blk_rate, rd_stats->sse) ||
- rd_stats->skip == 1) &&
- !xd->lossless[mbmi->segment_id]) {
-#if CONFIG_RD_DEBUG
- av1_update_txb_coeff_cost(rd_stats, plane, tx_size, blk_row, blk_col,
- zero_blk_rate - rd_stats->rate);
-#endif // CONFIG_RD_DEBUG
- rd_stats->rate = zero_blk_rate;
- rd_stats->dist = rd_stats->sse;
- rd_stats->skip = 1;
- set_blk_skip(x, 0, blk_row * bw + blk_col, 1);
- p->eobs[block] = 0;
- update_txk_array(mbmi->txk_type, plane_bsize, blk_row, blk_col, tx_size,
- DCT_DCT);
- } else {
- set_blk_skip(x, 0, blk_row * bw + blk_col, 0);
- rd_stats->skip = 0;
- }
-
- if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH)
- rd_stats->rate += x->txfm_partition_cost[txfm_partition_ctx][0];
-
- no_split->rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
- no_split->txb_entropy_ctx = p->txb_entropy_ctx[block];
- const int txk_type_idx =
- av1_get_txk_type_index(plane_bsize, blk_row, blk_col);
- no_split->tx_type = mbmi->txk_type[txk_type_idx];
-}
-
-static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
- int blk_col, int block, TX_SIZE tx_size, int depth,
- BLOCK_SIZE plane_bsize, ENTROPY_CONTEXT *ta,
- ENTROPY_CONTEXT *tl, TXFM_CONTEXT *tx_above,
- TXFM_CONTEXT *tx_left, RD_STATS *rd_stats,
- int64_t ref_best_rd, int *is_cost_valid,
- FAST_TX_SEARCH_MODE ftxs_mode,
- TXB_RD_INFO_NODE *rd_info_node);
-
-static void try_tx_block_split(
- const AV1_COMP *cpi, MACROBLOCK *x, int blk_row, int blk_col, int block,
- TX_SIZE tx_size, int depth, BLOCK_SIZE plane_bsize, ENTROPY_CONTEXT *ta,
- ENTROPY_CONTEXT *tl, TXFM_CONTEXT *tx_above, TXFM_CONTEXT *tx_left,
- int txfm_partition_ctx, int64_t no_split_rd, int64_t ref_best_rd,
- FAST_TX_SEARCH_MODE ftxs_mode, TXB_RD_INFO_NODE *rd_info_node,
- RD_STATS *split_rd_stats, int64_t *split_rd) {
- MACROBLOCKD *const xd = &x->e_mbd;
- const int max_blocks_high = max_block_high(xd, plane_bsize, 0);
- const int max_blocks_wide = max_block_wide(xd, plane_bsize, 0);
- const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
- const int bsw = tx_size_wide_unit[sub_txs];
- const int bsh = tx_size_high_unit[sub_txs];
- const int sub_step = bsw * bsh;
- RD_STATS this_rd_stats;
- int this_cost_valid = 1;
- int64_t tmp_rd = 0;
-
- split_rd_stats->rate = x->txfm_partition_cost[txfm_partition_ctx][1];
-
- assert(tx_size < TX_SIZES_ALL);
-
- int blk_idx = 0;
- for (int r = 0; r < tx_size_high_unit[tx_size]; r += bsh) {
- for (int c = 0; c < tx_size_wide_unit[tx_size]; c += bsw, ++blk_idx) {
- const int offsetr = blk_row + r;
- const int offsetc = blk_col + c;
- if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
- assert(blk_idx < 4);
- select_tx_block(
- cpi, x, offsetr, offsetc, block, sub_txs, depth + 1, plane_bsize, ta,
- tl, tx_above, tx_left, &this_rd_stats, ref_best_rd - tmp_rd,
- &this_cost_valid, ftxs_mode,
- (rd_info_node != NULL) ? rd_info_node->children[blk_idx] : NULL);
-
- if (!this_cost_valid) goto LOOP_EXIT;
-
- av1_merge_rd_stats(split_rd_stats, &this_rd_stats);
-
- tmp_rd = RDCOST(x->rdmult, split_rd_stats->rate, split_rd_stats->dist);
-
- if (no_split_rd < tmp_rd) {
- this_cost_valid = 0;
- goto LOOP_EXIT;
- }
- block += sub_step;
- }
- }
-
-LOOP_EXIT : {}
-
- if (this_cost_valid) *split_rd = tmp_rd;
-}
-
-// Search for the best tx partition/type for a given luma block.
-static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
- int blk_col, int block, TX_SIZE tx_size, int depth,
- BLOCK_SIZE plane_bsize, ENTROPY_CONTEXT *ta,
- ENTROPY_CONTEXT *tl, TXFM_CONTEXT *tx_above,
- TXFM_CONTEXT *tx_left, RD_STATS *rd_stats,
- int64_t ref_best_rd, int *is_cost_valid,
- FAST_TX_SEARCH_MODE ftxs_mode,
- TXB_RD_INFO_NODE *rd_info_node) {
- assert(tx_size < TX_SIZES_ALL);
- av1_init_rd_stats(rd_stats);
- if (ref_best_rd < 0) {
- *is_cost_valid = 0;
- return;
- }
-
- MACROBLOCKD *const xd = &x->e_mbd;
- const int max_blocks_high = max_block_high(xd, plane_bsize, 0);
- const int max_blocks_wide = max_block_wide(xd, plane_bsize, 0);
- if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
-
- const int bw = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
- MB_MODE_INFO *const mbmi = xd->mi[0];
- const int ctx = txfm_partition_context(tx_above + blk_col, tx_left + blk_row,
- mbmi->sb_type, tx_size);
- struct macroblock_plane *const p = &x->plane[0];
-
- const int try_no_split = 1;
- int try_split = tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH;
-#if CONFIG_DIST_8X8
- if (x->using_dist_8x8)
- try_split &= tx_size_wide[tx_size] >= 16 && tx_size_high[tx_size] >= 16;
-#endif
- TxCandidateInfo no_split = { INT64_MAX, 0, TX_TYPES };
-
- // TX no split
- if (try_no_split) {
- try_tx_block_no_split(cpi, x, blk_row, blk_col, block, tx_size, depth,
- plane_bsize, ta, tl, ctx, rd_stats, ref_best_rd,
- ftxs_mode, rd_info_node, &no_split);
-
- if (cpi->sf.adaptive_txb_search_level &&
- (no_split.rd -
- (no_split.rd >> (1 + cpi->sf.adaptive_txb_search_level))) >
- ref_best_rd) {
- *is_cost_valid = 0;
- return;
- }
-
- if (cpi->sf.txb_split_cap) {
- if (p->eobs[block] == 0) try_split = 0;
- }
- }
-
- if (x->e_mbd.bd == 8 && !x->cb_partition_scan && try_split) {
- const int threshold = cpi->sf.tx_type_search.ml_tx_split_thresh;
- if (threshold >= 0) {
- const int split_score =
- ml_predict_tx_split(x, plane_bsize, blk_row, blk_col, tx_size);
- if (split_score >= 0 && split_score < threshold) try_split = 0;
- }
- }
-
- // TX split
- int64_t split_rd = INT64_MAX;
- RD_STATS split_rd_stats;
- av1_init_rd_stats(&split_rd_stats);
- if (try_split) {
- try_tx_block_split(cpi, x, blk_row, blk_col, block, tx_size, depth,
- plane_bsize, ta, tl, tx_above, tx_left, ctx, no_split.rd,
- AOMMIN(no_split.rd, ref_best_rd), ftxs_mode,
- rd_info_node, &split_rd_stats, &split_rd);
- }
-
- if (no_split.rd < split_rd) {
- ENTROPY_CONTEXT *pta = ta + blk_col;
- ENTROPY_CONTEXT *ptl = tl + blk_row;
- const TX_SIZE tx_size_selected = tx_size;
- p->txb_entropy_ctx[block] = no_split.txb_entropy_ctx;
- av1_set_txb_context(x, 0, block, tx_size_selected, pta, ptl);
- txfm_partition_update(tx_above + blk_col, tx_left + blk_row, tx_size,
- tx_size);
- for (int idy = 0; idy < tx_size_high_unit[tx_size]; ++idy) {
- for (int idx = 0; idx < tx_size_wide_unit[tx_size]; ++idx) {
- const int index =
- av1_get_txb_size_index(plane_bsize, blk_row + idy, blk_col + idx);
- mbmi->inter_tx_size[index] = tx_size_selected;
- }
- }
- mbmi->tx_size = tx_size_selected;
- update_txk_array(mbmi->txk_type, plane_bsize, blk_row, blk_col, tx_size,
- no_split.tx_type);
- set_blk_skip(x, 0, blk_row * bw + blk_col, rd_stats->skip);
- } else {
- *rd_stats = split_rd_stats;
- if (split_rd == INT64_MAX) *is_cost_valid = 0;
- }
-}
-
-static void select_inter_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
- RD_STATS *rd_stats, BLOCK_SIZE bsize,
- int64_t ref_best_rd,
- FAST_TX_SEARCH_MODE ftxs_mode,
- TXB_RD_INFO_NODE *rd_info_tree) {
- MACROBLOCKD *const xd = &x->e_mbd;
- int is_cost_valid = 1;
- int64_t this_rd = 0, skip_rd = 0;
-
- if (ref_best_rd < 0) is_cost_valid = 0;
-
- av1_init_rd_stats(rd_stats);
-
- if (is_cost_valid) {
- const struct macroblockd_plane *const pd = &xd->plane[0];
- const BLOCK_SIZE plane_bsize =
- get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
- const int mi_width = mi_size_wide[plane_bsize];
- const int mi_height = mi_size_high[plane_bsize];
- const TX_SIZE max_tx_size = max_txsize_rect_lookup[plane_bsize];
- const int bh = tx_size_high_unit[max_tx_size];
- const int bw = tx_size_wide_unit[max_tx_size];
- int idx, idy;
- int block = 0;
- int step = tx_size_wide_unit[max_tx_size] * tx_size_high_unit[max_tx_size];
- ENTROPY_CONTEXT ctxa[MAX_MIB_SIZE];
- ENTROPY_CONTEXT ctxl[MAX_MIB_SIZE];
- TXFM_CONTEXT tx_above[MAX_MIB_SIZE];
- TXFM_CONTEXT tx_left[MAX_MIB_SIZE];
-
- RD_STATS pn_rd_stats;
- const int init_depth =
- get_search_init_depth(mi_width, mi_height, 1, &cpi->sf);
- av1_init_rd_stats(&pn_rd_stats);
-
- av1_get_entropy_contexts(bsize, pd, ctxa, ctxl);
- memcpy(tx_above, xd->above_txfm_context, sizeof(TXFM_CONTEXT) * mi_width);
- memcpy(tx_left, xd->left_txfm_context, sizeof(TXFM_CONTEXT) * mi_height);
- const int skip_ctx = av1_get_skip_context(xd);
- const int s0 = x->skip_cost[skip_ctx][0];
- const int s1 = x->skip_cost[skip_ctx][1];
-
- skip_rd = RDCOST(x->rdmult, s1, 0);
- this_rd = RDCOST(x->rdmult, s0, 0);
- for (idy = 0; idy < mi_height; idy += bh) {
- for (idx = 0; idx < mi_width; idx += bw) {
- int64_t best_rd_sofar = (ref_best_rd - (AOMMIN(skip_rd, this_rd)));
- select_tx_block(cpi, x, idy, idx, block, max_tx_size, init_depth,
- plane_bsize, ctxa, ctxl, tx_above, tx_left,
- &pn_rd_stats, best_rd_sofar, &is_cost_valid, ftxs_mode,
- rd_info_tree);
- if (!is_cost_valid || pn_rd_stats.rate == INT_MAX) {
- av1_invalid_rd_stats(rd_stats);
- return;
- }
- av1_merge_rd_stats(rd_stats, &pn_rd_stats);
- skip_rd = RDCOST(x->rdmult, s1, rd_stats->sse);
- this_rd = RDCOST(x->rdmult, rd_stats->rate + s0, rd_stats->dist);
- block += step;
- if (rd_info_tree != NULL) rd_info_tree += 1;
- }
- }
- if (skip_rd <= this_rd) {
- rd_stats->rate = 0;
- rd_stats->dist = rd_stats->sse;
- rd_stats->skip = 1;
- } else {
- rd_stats->skip = 0;
- }
- }
-
- if (!is_cost_valid) {
- // reset cost value
- av1_invalid_rd_stats(rd_stats);
- }
-}
-
-static int64_t select_tx_size_fix_type(const AV1_COMP *cpi, MACROBLOCK *x,
- RD_STATS *rd_stats, BLOCK_SIZE bsize,
- int64_t ref_best_rd,
- TXB_RD_INFO_NODE *rd_info_tree) {
- const int fast_tx_search = cpi->sf.tx_size_search_method > USE_FULL_RD;
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = xd->mi[0];
- const int is_inter = is_inter_block(mbmi);
- const int skip_ctx = av1_get_skip_context(xd);
- int s0 = x->skip_cost[skip_ctx][0];
- int s1 = x->skip_cost[skip_ctx][1];
- int64_t rd;
-
- // TODO(debargha): enable this as a speed feature where the
- // select_inter_block_yrd() function above will use a simplified search
- // such as not using full optimize, but the inter_block_yrd() function
- // will use more complex search given that the transform partitions have
- // already been decided.
-
- int64_t rd_thresh = ref_best_rd;
- if (fast_tx_search && rd_thresh < INT64_MAX) {
- if (INT64_MAX - rd_thresh > (rd_thresh >> 3)) rd_thresh += (rd_thresh >> 3);
- }
- assert(rd_thresh > 0);
-
- FAST_TX_SEARCH_MODE ftxs_mode =
- fast_tx_search ? FTXS_DCT_AND_1D_DCT_ONLY : FTXS_NONE;
- select_inter_block_yrd(cpi, x, rd_stats, bsize, rd_thresh, ftxs_mode,
- rd_info_tree);
- if (rd_stats->rate == INT_MAX) return INT64_MAX;
-
- // If fast_tx_search is true, only DCT and 1D DCT were tested in
- // select_inter_block_yrd() above. Do a better search for tx type with
- // tx sizes already decided.
- if (fast_tx_search) {
- if (!inter_block_yrd(cpi, x, rd_stats, bsize, ref_best_rd, FTXS_NONE))
- return INT64_MAX;
- }
-
- if (rd_stats->skip)
- rd = RDCOST(x->rdmult, s1, rd_stats->sse);
- else
- rd = RDCOST(x->rdmult, rd_stats->rate + s0, rd_stats->dist);
-
- if (is_inter && !xd->lossless[xd->mi[0]->segment_id] && !(rd_stats->skip))
- rd = AOMMIN(rd, RDCOST(x->rdmult, s1, rd_stats->sse));
-
- return rd;
-}
-
-// Finds rd cost for a y block, given the transform size partitions
-static void tx_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
- int blk_col, int block, TX_SIZE tx_size,
- BLOCK_SIZE plane_bsize, int depth,
- ENTROPY_CONTEXT *above_ctx, ENTROPY_CONTEXT *left_ctx,
- TXFM_CONTEXT *tx_above, TXFM_CONTEXT *tx_left,
- int64_t ref_best_rd, RD_STATS *rd_stats,
- FAST_TX_SEARCH_MODE ftxs_mode) {
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = xd->mi[0];
- const int max_blocks_high = max_block_high(xd, plane_bsize, 0);
- const int max_blocks_wide = max_block_wide(xd, plane_bsize, 0);
-
- assert(tx_size < TX_SIZES_ALL);
-
- if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
-
- const TX_SIZE plane_tx_size = mbmi->inter_tx_size[av1_get_txb_size_index(
- plane_bsize, blk_row, blk_col)];
-
- int ctx = txfm_partition_context(tx_above + blk_col, tx_left + blk_row,
- mbmi->sb_type, tx_size);
-
- av1_init_rd_stats(rd_stats);
- if (tx_size == plane_tx_size) {
- ENTROPY_CONTEXT *ta = above_ctx + blk_col;
- ENTROPY_CONTEXT *tl = left_ctx + blk_row;
- const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size);
- TXB_CTX txb_ctx;
- get_txb_ctx(plane_bsize, tx_size, 0, ta, tl, &txb_ctx);
-
- const int zero_blk_rate = x->coeff_costs[txs_ctx][get_plane_type(0)]
- .txb_skip_cost[txb_ctx.txb_skip_ctx][1];
- rd_stats->zero_rate = zero_blk_rate;
- rd_stats->ref_rdcost = ref_best_rd;
- tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, 0, block, plane_bsize,
- &txb_ctx, rd_stats, ftxs_mode, ref_best_rd, NULL);
- const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
- if (RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist) >=
- RDCOST(x->rdmult, zero_blk_rate, rd_stats->sse) ||
- rd_stats->skip == 1) {
- rd_stats->rate = zero_blk_rate;
- rd_stats->dist = rd_stats->sse;
- rd_stats->skip = 1;
- set_blk_skip(x, 0, blk_row * mi_width + blk_col, 1);
- x->plane[0].eobs[block] = 0;
- x->plane[0].txb_entropy_ctx[block] = 0;
- update_txk_array(mbmi->txk_type, plane_bsize, blk_row, blk_col, tx_size,
- DCT_DCT);
- } else {
- rd_stats->skip = 0;
- set_blk_skip(x, 0, blk_row * mi_width + blk_col, 0);
- }
- if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH)
- rd_stats->rate += x->txfm_partition_cost[ctx][0];
- av1_set_txb_context(x, 0, block, tx_size, ta, tl);
- txfm_partition_update(tx_above + blk_col, tx_left + blk_row, tx_size,
- tx_size);
- } else {
- const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
- const int bsw = tx_size_wide_unit[sub_txs];
- const int bsh = tx_size_high_unit[sub_txs];
- const int step = bsh * bsw;
- RD_STATS pn_rd_stats;
- int64_t this_rd = 0;
- assert(bsw > 0 && bsh > 0);
-
- for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh) {
- for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) {
- const int offsetr = blk_row + row;
- const int offsetc = blk_col + col;
-
- if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
-
- av1_init_rd_stats(&pn_rd_stats);
- tx_block_yrd(cpi, x, offsetr, offsetc, block, sub_txs, plane_bsize,
- depth + 1, above_ctx, left_ctx, tx_above, tx_left,
- ref_best_rd - this_rd, &pn_rd_stats, ftxs_mode);
- if (pn_rd_stats.rate == INT_MAX) {
- av1_invalid_rd_stats(rd_stats);
- return;
- }
- av1_merge_rd_stats(rd_stats, &pn_rd_stats);
- this_rd += RDCOST(x->rdmult, pn_rd_stats.rate, pn_rd_stats.dist);
- block += step;
- }
- }
-
- if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH)
- rd_stats->rate += x->txfm_partition_cost[ctx][1];
- }
-}
-
-// Return value 0: early termination triggered, no valid rd cost available;
-// 1: rd cost values are valid.
-static int inter_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
- RD_STATS *rd_stats, BLOCK_SIZE bsize,
- int64_t ref_best_rd, FAST_TX_SEARCH_MODE ftxs_mode) {
- MACROBLOCKD *const xd = &x->e_mbd;
- int is_cost_valid = 1;
- int64_t this_rd = 0;
-
- if (ref_best_rd < 0) is_cost_valid = 0;
-
- av1_init_rd_stats(rd_stats);
-
- if (is_cost_valid) {
- const struct macroblockd_plane *const pd = &xd->plane[0];
- const BLOCK_SIZE plane_bsize =
- get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
- const int mi_width = mi_size_wide[plane_bsize];
- const int mi_height = mi_size_high[plane_bsize];
- const TX_SIZE max_tx_size = get_vartx_max_txsize(xd, plane_bsize, 0);
- const int bh = tx_size_high_unit[max_tx_size];
- const int bw = tx_size_wide_unit[max_tx_size];
- const int init_depth =
- get_search_init_depth(mi_width, mi_height, 1, &cpi->sf);
- int idx, idy;
- int block = 0;
- int step = tx_size_wide_unit[max_tx_size] * tx_size_high_unit[max_tx_size];
- ENTROPY_CONTEXT ctxa[MAX_MIB_SIZE];
- ENTROPY_CONTEXT ctxl[MAX_MIB_SIZE];
- TXFM_CONTEXT tx_above[MAX_MIB_SIZE];
- TXFM_CONTEXT tx_left[MAX_MIB_SIZE];
- RD_STATS pn_rd_stats;
-
- av1_get_entropy_contexts(bsize, pd, ctxa, ctxl);
- memcpy(tx_above, xd->above_txfm_context, sizeof(TXFM_CONTEXT) * mi_width);
- memcpy(tx_left, xd->left_txfm_context, sizeof(TXFM_CONTEXT) * mi_height);
-
- for (idy = 0; idy < mi_height; idy += bh) {
- for (idx = 0; idx < mi_width; idx += bw) {
- av1_init_rd_stats(&pn_rd_stats);
- tx_block_yrd(cpi, x, idy, idx, block, max_tx_size, plane_bsize,
- init_depth, ctxa, ctxl, tx_above, tx_left,
- ref_best_rd - this_rd, &pn_rd_stats, ftxs_mode);
- if (pn_rd_stats.rate == INT_MAX) {
- av1_invalid_rd_stats(rd_stats);
- return 0;
- }
- av1_merge_rd_stats(rd_stats, &pn_rd_stats);
- this_rd +=
- AOMMIN(RDCOST(x->rdmult, pn_rd_stats.rate, pn_rd_stats.dist),
- RDCOST(x->rdmult, pn_rd_stats.zero_rate, pn_rd_stats.sse));
- block += step;
- }
- }
- }
-
- const int skip_ctx = av1_get_skip_context(xd);
- const int s0 = x->skip_cost[skip_ctx][0];
- const int s1 = x->skip_cost[skip_ctx][1];
- int64_t skip_rd = RDCOST(x->rdmult, s1, rd_stats->sse);
- this_rd = RDCOST(x->rdmult, rd_stats->rate + s0, rd_stats->dist);
- if (skip_rd < this_rd) {
- this_rd = skip_rd;
- rd_stats->rate = 0;
- rd_stats->dist = rd_stats->sse;
- rd_stats->skip = 1;
- }
- if (this_rd > ref_best_rd) is_cost_valid = 0;
-
- if (!is_cost_valid) {
- // reset cost value
- av1_invalid_rd_stats(rd_stats);
- }
- return is_cost_valid;
-}
-
-static INLINE uint32_t get_block_residue_hash(MACROBLOCK *x, BLOCK_SIZE bsize) {
- const int rows = block_size_high[bsize];
- const int cols = block_size_wide[bsize];
- const int16_t *diff = x->plane[0].src_diff;
- const uint32_t hash = av1_get_crc32c_value(&x->mb_rd_record.crc_calculator,
- (uint8_t *)diff, 2 * rows * cols);
- return (hash << 5) + bsize;
-}
-
-static void save_tx_rd_info(int n4, uint32_t hash, const MACROBLOCK *const x,
- const RD_STATS *const rd_stats,
- MB_RD_RECORD *tx_rd_record) {
- int index;
- if (tx_rd_record->num < RD_RECORD_BUFFER_LEN) {
- index =
- (tx_rd_record->index_start + tx_rd_record->num) % RD_RECORD_BUFFER_LEN;
- ++tx_rd_record->num;
- } else {
- index = tx_rd_record->index_start;
- tx_rd_record->index_start =
- (tx_rd_record->index_start + 1) % RD_RECORD_BUFFER_LEN;
- }
- MB_RD_INFO *const tx_rd_info = &tx_rd_record->tx_rd_info[index];
- const MACROBLOCKD *const xd = &x->e_mbd;
- const MB_MODE_INFO *const mbmi = xd->mi[0];
- tx_rd_info->hash_value = hash;
- tx_rd_info->tx_size = mbmi->tx_size;
- memcpy(tx_rd_info->blk_skip, x->blk_skip,
- sizeof(tx_rd_info->blk_skip[0]) * n4);
- av1_copy(tx_rd_info->inter_tx_size, mbmi->inter_tx_size);
- av1_copy(tx_rd_info->txk_type, mbmi->txk_type);
- tx_rd_info->rd_stats = *rd_stats;
-}
-
-static void fetch_tx_rd_info(int n4, const MB_RD_INFO *const tx_rd_info,
- RD_STATS *const rd_stats, MACROBLOCK *const x) {
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = xd->mi[0];
- mbmi->tx_size = tx_rd_info->tx_size;
- memcpy(x->blk_skip, tx_rd_info->blk_skip,
- sizeof(tx_rd_info->blk_skip[0]) * n4);
- av1_copy(mbmi->inter_tx_size, tx_rd_info->inter_tx_size);
- av1_copy(mbmi->txk_type, tx_rd_info->txk_type);
- *rd_stats = tx_rd_info->rd_stats;
-}
-
-static int find_tx_size_rd_info(TXB_RD_RECORD *cur_record,
- const uint32_t hash) {
- // Linear search through the circular buffer to find matching hash.
- for (int i = cur_record->index_start - 1; i >= 0; i--) {
- if (cur_record->hash_vals[i] == hash) return i;
- }
- for (int i = cur_record->num - 1; i >= cur_record->index_start; i--) {
- if (cur_record->hash_vals[i] == hash) return i;
- }
- int index;
- // If not found - add new RD info into the buffer and return its index
- if (cur_record->num < TX_SIZE_RD_RECORD_BUFFER_LEN) {
- index = (cur_record->index_start + cur_record->num) %
- TX_SIZE_RD_RECORD_BUFFER_LEN;
- cur_record->num++;
- } else {
- index = cur_record->index_start;
- cur_record->index_start =
- (cur_record->index_start + 1) % TX_SIZE_RD_RECORD_BUFFER_LEN;
- }
-
- cur_record->hash_vals[index] = hash;
- av1_zero(cur_record->tx_rd_info[index]);
- return index;
-}
-
-typedef struct {
- int leaf;
- int8_t children[4];
-} RD_RECORD_IDX_NODE;
-
-static const RD_RECORD_IDX_NODE rd_record_tree_8x8[] = {
- { 1, { 0 } },
-};
-
-static const RD_RECORD_IDX_NODE rd_record_tree_8x16[] = {
- { 0, { 1, 2, -1, -1 } },
- { 1, { 0, 0, 0, 0 } },
- { 1, { 0, 0, 0, 0 } },
-};
-
-static const RD_RECORD_IDX_NODE rd_record_tree_16x8[] = {
- { 0, { 1, 2, -1, -1 } },
- { 1, { 0 } },
- { 1, { 0 } },
-};
-
-static const RD_RECORD_IDX_NODE rd_record_tree_16x16[] = {
- { 0, { 1, 2, 3, 4 } }, { 1, { 0 } }, { 1, { 0 } }, { 1, { 0 } }, { 1, { 0 } },
-};
-
-static const RD_RECORD_IDX_NODE rd_record_tree_1_2[] = {
- { 0, { 1, 2, -1, -1 } },
- { 0, { 3, 4, 5, 6 } },
- { 0, { 7, 8, 9, 10 } },
-};
-
-static const RD_RECORD_IDX_NODE rd_record_tree_2_1[] = {
- { 0, { 1, 2, -1, -1 } },
- { 0, { 3, 4, 7, 8 } },
- { 0, { 5, 6, 9, 10 } },
-};
-
-static const RD_RECORD_IDX_NODE rd_record_tree_sqr[] = {
- { 0, { 1, 2, 3, 4 } }, { 0, { 5, 6, 9, 10 } }, { 0, { 7, 8, 11, 12 } },
- { 0, { 13, 14, 17, 18 } }, { 0, { 15, 16, 19, 20 } },
-};
-
-static const RD_RECORD_IDX_NODE rd_record_tree_64x128[] = {
- { 0, { 2, 3, 4, 5 } }, { 0, { 6, 7, 8, 9 } },
- { 0, { 10, 11, 14, 15 } }, { 0, { 12, 13, 16, 17 } },
- { 0, { 18, 19, 22, 23 } }, { 0, { 20, 21, 24, 25 } },
- { 0, { 26, 27, 30, 31 } }, { 0, { 28, 29, 32, 33 } },
- { 0, { 34, 35, 38, 39 } }, { 0, { 36, 37, 40, 41 } },
-};
-
-static const RD_RECORD_IDX_NODE rd_record_tree_128x64[] = {
- { 0, { 2, 3, 6, 7 } }, { 0, { 4, 5, 8, 9 } },
- { 0, { 10, 11, 18, 19 } }, { 0, { 12, 13, 20, 21 } },
- { 0, { 14, 15, 22, 23 } }, { 0, { 16, 17, 24, 25 } },
- { 0, { 26, 27, 34, 35 } }, { 0, { 28, 29, 36, 37 } },
- { 0, { 30, 31, 38, 39 } }, { 0, { 32, 33, 40, 41 } },
-};
-
-static const RD_RECORD_IDX_NODE rd_record_tree_128x128[] = {
- { 0, { 4, 5, 8, 9 } }, { 0, { 6, 7, 10, 11 } },
- { 0, { 12, 13, 16, 17 } }, { 0, { 14, 15, 18, 19 } },
- { 0, { 20, 21, 28, 29 } }, { 0, { 22, 23, 30, 31 } },
- { 0, { 24, 25, 32, 33 } }, { 0, { 26, 27, 34, 35 } },
- { 0, { 36, 37, 44, 45 } }, { 0, { 38, 39, 46, 47 } },
- { 0, { 40, 41, 48, 49 } }, { 0, { 42, 43, 50, 51 } },
- { 0, { 52, 53, 60, 61 } }, { 0, { 54, 55, 62, 63 } },
- { 0, { 56, 57, 64, 65 } }, { 0, { 58, 59, 66, 67 } },
- { 0, { 68, 69, 76, 77 } }, { 0, { 70, 71, 78, 79 } },
- { 0, { 72, 73, 80, 81 } }, { 0, { 74, 75, 82, 83 } },
-};
-
-static const RD_RECORD_IDX_NODE rd_record_tree_1_4[] = {
- { 0, { 1, -1, 2, -1 } },
- { 0, { 3, 4, -1, -1 } },
- { 0, { 5, 6, -1, -1 } },
-};
-
-static const RD_RECORD_IDX_NODE rd_record_tree_4_1[] = {
- { 0, { 1, 2, -1, -1 } },
- { 0, { 3, 4, -1, -1 } },
- { 0, { 5, 6, -1, -1 } },
-};
-
-static const RD_RECORD_IDX_NODE *rd_record_tree[BLOCK_SIZES_ALL] = {
- NULL, // BLOCK_4X4
- NULL, // BLOCK_4X8
- NULL, // BLOCK_8X4
- rd_record_tree_8x8, // BLOCK_8X8
- rd_record_tree_8x16, // BLOCK_8X16
- rd_record_tree_16x8, // BLOCK_16X8
- rd_record_tree_16x16, // BLOCK_16X16
- rd_record_tree_1_2, // BLOCK_16X32
- rd_record_tree_2_1, // BLOCK_32X16
- rd_record_tree_sqr, // BLOCK_32X32
- rd_record_tree_1_2, // BLOCK_32X64
- rd_record_tree_2_1, // BLOCK_64X32
- rd_record_tree_sqr, // BLOCK_64X64
- rd_record_tree_64x128, // BLOCK_64X128
- rd_record_tree_128x64, // BLOCK_128X64
- rd_record_tree_128x128, // BLOCK_128X128
- NULL, // BLOCK_4X16
- NULL, // BLOCK_16X4
- rd_record_tree_1_4, // BLOCK_8X32
- rd_record_tree_4_1, // BLOCK_32X8
- rd_record_tree_1_4, // BLOCK_16X64
- rd_record_tree_4_1, // BLOCK_64X16
-};
-
-static const int rd_record_tree_size[BLOCK_SIZES_ALL] = {
- 0, // BLOCK_4X4
- 0, // BLOCK_4X8
- 0, // BLOCK_8X4
- sizeof(rd_record_tree_8x8) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_8X8
- sizeof(rd_record_tree_8x16) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_8X16
- sizeof(rd_record_tree_16x8) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_16X8
- sizeof(rd_record_tree_16x16) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_16X16
- sizeof(rd_record_tree_1_2) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_16X32
- sizeof(rd_record_tree_2_1) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_32X16
- sizeof(rd_record_tree_sqr) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_32X32
- sizeof(rd_record_tree_1_2) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_32X64
- sizeof(rd_record_tree_2_1) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_64X32
- sizeof(rd_record_tree_sqr) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_64X64
- sizeof(rd_record_tree_64x128) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_64X128
- sizeof(rd_record_tree_128x64) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_128X64
- sizeof(rd_record_tree_128x128) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_128X128
- 0, // BLOCK_4X16
- 0, // BLOCK_16X4
- sizeof(rd_record_tree_1_4) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_8X32
- sizeof(rd_record_tree_4_1) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_32X8
- sizeof(rd_record_tree_1_4) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_16X64
- sizeof(rd_record_tree_4_1) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_64X16
-};
-
-static INLINE void init_rd_record_tree(TXB_RD_INFO_NODE *tree,
- BLOCK_SIZE bsize) {
- const RD_RECORD_IDX_NODE *rd_record = rd_record_tree[bsize];
- const int size = rd_record_tree_size[bsize];
- for (int i = 0; i < size; ++i) {
- if (rd_record[i].leaf) {
- av1_zero(tree[i].children);
- } else {
- for (int j = 0; j < 4; ++j) {
- const int8_t idx = rd_record[i].children[j];
- tree[i].children[j] = idx > 0 ? &tree[idx] : NULL;
- }
- }
- }
-}
-
-// Go through all TX blocks that could be used in TX size search, compute
-// residual hash values for them and find matching RD info that stores previous
-// RD search results for these TX blocks. The idea is to prevent repeated
-// rate/distortion computations that happen because of the combination of
-// partition and TX size search. The resulting RD info records are returned in
-// the form of a quadtree for easier access in actual TX size search.
-static int find_tx_size_rd_records(MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row,
- int mi_col, TXB_RD_INFO_NODE *dst_rd_info) {
- TXB_RD_RECORD *rd_records_table[4] = { x->txb_rd_record_8X8,
- x->txb_rd_record_16X16,
- x->txb_rd_record_32X32,
- x->txb_rd_record_64X64 };
- const TX_SIZE max_square_tx_size = max_txsize_lookup[bsize];
- const int bw = block_size_wide[bsize];
- const int bh = block_size_high[bsize];
-
- // Hashing is performed only for square TX sizes larger than TX_4X4
- if (max_square_tx_size < TX_8X8) return 0;
- const int diff_stride = bw;
- const struct macroblock_plane *const p = &x->plane[0];
- const int16_t *diff = &p->src_diff[0];
- init_rd_record_tree(dst_rd_info, bsize);
- // Coordinates of the top-left corner of current block within the superblock
- // measured in pixels:
- const int mi_row_in_sb = (mi_row % MAX_MIB_SIZE) << MI_SIZE_LOG2;
- const int mi_col_in_sb = (mi_col % MAX_MIB_SIZE) << MI_SIZE_LOG2;
- int cur_rd_info_idx = 0;
- int cur_tx_depth = 0;
- TX_SIZE cur_tx_size = max_txsize_rect_lookup[bsize];
- while (cur_tx_depth <= MAX_VARTX_DEPTH) {
- const int cur_tx_bw = tx_size_wide[cur_tx_size];
- const int cur_tx_bh = tx_size_high[cur_tx_size];
- if (cur_tx_bw < 8 || cur_tx_bh < 8) break;
- const TX_SIZE next_tx_size = sub_tx_size_map[cur_tx_size];
- const int tx_size_idx = cur_tx_size - TX_8X8;
- for (int row = 0; row < bh; row += cur_tx_bh) {
- for (int col = 0; col < bw; col += cur_tx_bw) {
- if (cur_tx_bw != cur_tx_bh) {
- // Use dummy nodes for all rectangular transforms within the
- // TX size search tree.
- dst_rd_info[cur_rd_info_idx].rd_info_array = NULL;
- } else {
- // Get spatial location of this TX block within the superblock
- // (measured in cur_tx_bsize units).
- const int row_in_sb = (mi_row_in_sb + row) / cur_tx_bh;
- const int col_in_sb = (mi_col_in_sb + col) / cur_tx_bw;
-
- int16_t hash_data[MAX_SB_SQUARE];
- int16_t *cur_hash_row = hash_data;
- const int16_t *cur_diff_row = diff + row * diff_stride + col;
- for (int i = 0; i < cur_tx_bh; i++) {
- memcpy(cur_hash_row, cur_diff_row, sizeof(*hash_data) * cur_tx_bw);
- cur_hash_row += cur_tx_bw;
- cur_diff_row += diff_stride;
- }
- const int hash = av1_get_crc32c_value(&x->mb_rd_record.crc_calculator,
- (uint8_t *)hash_data,
- 2 * cur_tx_bw * cur_tx_bh);
- // Find corresponding RD info based on the hash value.
- const int record_idx =
- row_in_sb * (MAX_MIB_SIZE >> (tx_size_idx + 1)) + col_in_sb;
- TXB_RD_RECORD *records = &rd_records_table[tx_size_idx][record_idx];
- int idx = find_tx_size_rd_info(records, hash);
- dst_rd_info[cur_rd_info_idx].rd_info_array =
- &records->tx_rd_info[idx];
- }
- ++cur_rd_info_idx;
- }
- }
- cur_tx_size = next_tx_size;
- ++cur_tx_depth;
- }
- return 1;
-}
-
-// origin_threshold * 128 / 100
-static const uint32_t skip_pred_threshold[3][BLOCK_SIZES_ALL] = {
- {
- 64, 64, 64, 70, 60, 60, 68, 68, 68, 68, 68,
- 68, 68, 68, 68, 68, 64, 64, 70, 70, 68, 68,
- },
- {
- 88, 88, 88, 86, 87, 87, 68, 68, 68, 68, 68,
- 68, 68, 68, 68, 68, 88, 88, 86, 86, 68, 68,
- },
- {
- 90, 93, 93, 90, 93, 93, 74, 74, 74, 74, 74,
- 74, 74, 74, 74, 74, 90, 90, 90, 90, 74, 74,
- },
-};
-
-// lookup table for predict_skip_flag
-// int max_tx_size = max_txsize_rect_lookup[bsize];
-// if (tx_size_high[max_tx_size] > 16 || tx_size_wide[max_tx_size] > 16)
-// max_tx_size = AOMMIN(max_txsize_lookup[bsize], TX_16X16);
-static const TX_SIZE max_predict_sf_tx_size[BLOCK_SIZES_ALL] = {
- TX_4X4, TX_4X8, TX_8X4, TX_8X8, TX_8X16, TX_16X8,
- TX_16X16, TX_16X16, TX_16X16, TX_16X16, TX_16X16, TX_16X16,
- TX_16X16, TX_16X16, TX_16X16, TX_16X16, TX_4X16, TX_16X4,
- TX_8X8, TX_8X8, TX_16X16, TX_16X16,
-};
-
-// Uses simple features on top of DCT coefficients to quickly predict
-// whether optimal RD decision is to skip encoding the residual.
-// The sse value is stored in dist.
-static int predict_skip_flag(MACROBLOCK *x, BLOCK_SIZE bsize, int64_t *dist,
- int reduced_tx_set) {
- const int bw = block_size_wide[bsize];
- const int bh = block_size_high[bsize];
- const MACROBLOCKD *xd = &x->e_mbd;
- const int16_t dc_q = av1_dc_quant_QTX(x->qindex, 0, xd->bd);
-
- *dist = pixel_diff_dist(x, 0, 0, 0, bsize, bsize);
- const int64_t mse = *dist / bw / bh;
- // Normalized quantizer takes the transform upscaling factor (8 for tx size
- // smaller than 32) into account.
- const int16_t normalized_dc_q = dc_q >> 3;
- const int64_t mse_thresh = (int64_t)normalized_dc_q * normalized_dc_q / 8;
- // Predict not to skip when mse is larger than threshold.
- if (mse > mse_thresh) return 0;
-
- const int max_tx_size = max_predict_sf_tx_size[bsize];
- const int tx_h = tx_size_high[max_tx_size];
- const int tx_w = tx_size_wide[max_tx_size];
- DECLARE_ALIGNED(32, tran_low_t, coefs[32 * 32]);
- TxfmParam param;
- param.tx_type = DCT_DCT;
- param.tx_size = max_tx_size;
- param.bd = xd->bd;
- param.is_hbd = get_bitdepth_data_path_index(xd);
- param.lossless = 0;
- param.tx_set_type = av1_get_ext_tx_set_type(
- param.tx_size, is_inter_block(xd->mi[0]), reduced_tx_set);
- const int bd_idx = (xd->bd == 8) ? 0 : ((xd->bd == 10) ? 1 : 2);
- const uint32_t max_qcoef_thresh = skip_pred_threshold[bd_idx][bsize];
- const int16_t *src_diff = x->plane[0].src_diff;
- const int n_coeff = tx_w * tx_h;
- const int16_t ac_q = av1_ac_quant_QTX(x->qindex, 0, xd->bd);
- const uint32_t dc_thresh = max_qcoef_thresh * dc_q;
- const uint32_t ac_thresh = max_qcoef_thresh * ac_q;
- for (int row = 0; row < bh; row += tx_h) {
- for (int col = 0; col < bw; col += tx_w) {
- av1_fwd_txfm(src_diff + col, coefs, bw, &param);
- // Operating on TX domain, not pixels; we want the QTX quantizers
- const uint32_t dc_coef = (((uint32_t)abs(coefs[0])) << 7);
- if (dc_coef >= dc_thresh) return 0;
- for (int i = 1; i < n_coeff; ++i) {
- const uint32_t ac_coef = (((uint32_t)abs(coefs[i])) << 7);
- if (ac_coef >= ac_thresh) return 0;
- }
- }
- src_diff += tx_h * bw;
- }
- return 1;
-}
-
-// Used to set proper context for early termination with skip = 1.
-static void set_skip_flag(MACROBLOCK *x, RD_STATS *rd_stats, int bsize,
- int64_t dist) {
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = xd->mi[0];
- const int n4 = bsize_to_num_blk(bsize);
- const TX_SIZE tx_size = max_txsize_rect_lookup[bsize];
- memset(mbmi->txk_type, DCT_DCT, sizeof(mbmi->txk_type[0]) * TXK_TYPE_BUF_LEN);
- memset(mbmi->inter_tx_size, tx_size, sizeof(mbmi->inter_tx_size));
- mbmi->tx_size = tx_size;
- for (int i = 0; i < n4; ++i) set_blk_skip(x, 0, i, 1);
- rd_stats->skip = 1;
- rd_stats->rate = 0;
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
- dist = ROUND_POWER_OF_TWO(dist, (xd->bd - 8) * 2);
- rd_stats->dist = rd_stats->sse = (dist << 4);
-}
-
-static void select_tx_type_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
- RD_STATS *rd_stats, BLOCK_SIZE bsize, int mi_row,
- int mi_col, int64_t ref_best_rd) {
- const AV1_COMMON *cm = &cpi->common;
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = xd->mi[0];
- int64_t rd = INT64_MAX;
- int64_t best_rd = INT64_MAX;
- const int is_inter = is_inter_block(mbmi);
- const int n4 = bsize_to_num_blk(bsize);
- // Get the tx_size 1 level down
- const TX_SIZE min_tx_size = sub_tx_size_map[max_txsize_rect_lookup[bsize]];
- const TxSetType tx_set_type =
- av1_get_ext_tx_set_type(min_tx_size, is_inter, cm->reduced_tx_set_used);
- const int within_border =
- mi_row >= xd->tile.mi_row_start &&
- (mi_row + mi_size_high[bsize] < xd->tile.mi_row_end) &&
- mi_col >= xd->tile.mi_col_start &&
- (mi_col + mi_size_wide[bsize] < xd->tile.mi_col_end);
-
- av1_invalid_rd_stats(rd_stats);
-
- if (cpi->sf.model_based_prune_tx_search_level && ref_best_rd != INT64_MAX) {
- int model_rate;
- int64_t model_dist;
- int model_skip;
- model_rd_sb_fn[MODELRD_TYPE_TX_SEARCH_PRUNE](
- cpi, bsize, x, xd, 0, 0, mi_row, mi_col, &model_rate, &model_dist,
- &model_skip, NULL, NULL, NULL, NULL);
- const int64_t model_rd = RDCOST(x->rdmult, model_rate, model_dist);
- // If the modeled rd is a lot worse than the best so far, breakout.
- // TODO(debargha, urvang): Improve the model and make the check below
- // tighter.
- assert(cpi->sf.model_based_prune_tx_search_level >= 0 &&
- cpi->sf.model_based_prune_tx_search_level <= 2);
- static const int prune_factor_by8[] = { 2 + MODELRD_TYPE_TX_SEARCH_PRUNE,
- 4 + MODELRD_TYPE_TX_SEARCH_PRUNE };
- if (!model_skip &&
- ((model_rd *
- prune_factor_by8[cpi->sf.model_based_prune_tx_search_level - 1]) >>
- 3) > ref_best_rd)
- return;
- }
-
- const uint32_t hash = get_block_residue_hash(x, bsize);
- MB_RD_RECORD *mb_rd_record = &x->mb_rd_record;
-
- if (ref_best_rd != INT64_MAX && within_border && cpi->sf.use_mb_rd_hash) {
- for (int i = 0; i < mb_rd_record->num; ++i) {
- const int index = (mb_rd_record->index_start + i) % RD_RECORD_BUFFER_LEN;
- // If there is a match in the tx_rd_record, fetch the RD decision and
- // terminate early.
- if (mb_rd_record->tx_rd_info[index].hash_value == hash) {
- MB_RD_INFO *tx_rd_info = &mb_rd_record->tx_rd_info[index];
- fetch_tx_rd_info(n4, tx_rd_info, rd_stats, x);
- return;
- }
- }
- }
-
- // If we predict that skip is the optimal RD decision - set the respective
- // context and terminate early.
- int64_t dist;
- if (is_inter && cpi->sf.tx_type_search.use_skip_flag_prediction &&
- predict_skip_flag(x, bsize, &dist, cm->reduced_tx_set_used)) {
- set_skip_flag(x, rd_stats, bsize, dist);
- // Save the RD search results into tx_rd_record.
- if (within_border) save_tx_rd_info(n4, hash, x, rd_stats, mb_rd_record);
- return;
- }
-
- // Precompute residual hashes and find existing or add new RD records to
- // store and reuse rate and distortion values to speed up TX size search.
- TXB_RD_INFO_NODE matched_rd_info[4 + 16 + 64];
- int found_rd_info = 0;
- if (ref_best_rd != INT64_MAX && within_border && cpi->sf.use_inter_txb_hash) {
- found_rd_info =
- find_tx_size_rd_records(x, bsize, mi_row, mi_col, matched_rd_info);
- }
-
- prune_tx(cpi, bsize, x, xd, tx_set_type);
-
- int found = 0;
-
- RD_STATS this_rd_stats;
- av1_init_rd_stats(&this_rd_stats);
-
- rd = select_tx_size_fix_type(cpi, x, &this_rd_stats, bsize, ref_best_rd,
- found_rd_info ? matched_rd_info : NULL);
- assert(IMPLIES(this_rd_stats.skip && !this_rd_stats.invalid_rate,
- this_rd_stats.rate == 0));
-
- ref_best_rd = AOMMIN(rd, ref_best_rd);
- if (rd < best_rd) {
- *rd_stats = this_rd_stats;
- found = 1;
- }
-
- // Reset the pruning flags.
- av1_zero(x->tx_search_prune);
- x->tx_split_prune_flag = 0;
-
- // We should always find at least one candidate unless ref_best_rd is less
- // than INT64_MAX (in which case, all the calls to select_tx_size_fix_type
- // might have failed to find something better)
- assert(IMPLIES(!found, ref_best_rd != INT64_MAX));
- if (!found) return;
-
- // Save the RD search results into tx_rd_record.
- if (within_border && cpi->sf.use_mb_rd_hash)
- save_tx_rd_info(n4, hash, x, rd_stats, mb_rd_record);
-}
-
-static void tx_block_uvrd(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
- int blk_col, int plane, int block, TX_SIZE tx_size,
- BLOCK_SIZE plane_bsize, ENTROPY_CONTEXT *above_ctx,
- ENTROPY_CONTEXT *left_ctx, RD_STATS *rd_stats,
- FAST_TX_SEARCH_MODE ftxs_mode) {
- assert(plane > 0);
- assert(tx_size < TX_SIZES_ALL);
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = xd->mi[0];
- const int max_blocks_high = max_block_high(xd, plane_bsize, plane);
- const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
- if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
-
- ENTROPY_CONTEXT *ta = above_ctx + blk_col;
- ENTROPY_CONTEXT *tl = left_ctx + blk_row;
- TXB_CTX txb_ctx;
- get_txb_ctx(plane_bsize, tx_size, plane, ta, tl, &txb_ctx);
- const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size);
- const int zero_blk_rate = x->coeff_costs[txs_ctx][PLANE_TYPE_UV]
- .txb_skip_cost[txb_ctx.txb_skip_ctx][1];
- tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block, plane_bsize,
- &txb_ctx, rd_stats, ftxs_mode, INT64_MAX, NULL);
-
- const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
- const int blk_idx = blk_row * mi_width + blk_col;
-
- av1_set_txb_context(x, plane, block, tx_size, ta, tl);
- if ((RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist) >=
- RDCOST(x->rdmult, zero_blk_rate, rd_stats->sse) ||
- rd_stats->skip == 1) &&
- !xd->lossless[mbmi->segment_id]) {
- rd_stats->rate = zero_blk_rate;
- rd_stats->dist = rd_stats->sse;
- }
-
- // Set chroma blk_skip to 0
- set_blk_skip(x, plane, blk_idx, 0);
-}
-
-// Return value 0: early termination triggered, no valid rd cost available;
-// 1: rd cost values are valid.
-static int inter_block_uvrd(const AV1_COMP *cpi, MACROBLOCK *x,
- RD_STATS *rd_stats, BLOCK_SIZE bsize,
- int64_t non_skip_ref_best_rd,
- int64_t skip_ref_best_rd,
- FAST_TX_SEARCH_MODE ftxs_mode) {
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = xd->mi[0];
- int plane;
- int is_cost_valid = 1;
- int64_t this_rd = 0;
- int64_t skip_rd = 0;
-
- if ((non_skip_ref_best_rd < 0) && (skip_ref_best_rd < 0)) is_cost_valid = 0;
-
- av1_init_rd_stats(rd_stats);
-
- if (x->skip_chroma_rd) {
- if (!is_cost_valid) av1_invalid_rd_stats(rd_stats);
-
- return is_cost_valid;
- }
-
- const BLOCK_SIZE bsizec = scale_chroma_bsize(
- bsize, xd->plane[1].subsampling_x, xd->plane[1].subsampling_y);
-
- if (is_inter_block(mbmi) && is_cost_valid) {
- for (plane = 1; plane < MAX_MB_PLANE; ++plane)
- av1_subtract_plane(x, bsizec, plane);
- }
-
- if (is_cost_valid) {
- for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
- const struct macroblockd_plane *const pd = &xd->plane[plane];
- const BLOCK_SIZE plane_bsize =
- get_plane_block_size(bsizec, pd->subsampling_x, pd->subsampling_y);
- const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
- const int mi_height =
- block_size_high[plane_bsize] >> tx_size_high_log2[0];
- const TX_SIZE max_tx_size = get_vartx_max_txsize(xd, plane_bsize, plane);
- const int bh = tx_size_high_unit[max_tx_size];
- const int bw = tx_size_wide_unit[max_tx_size];
- int idx, idy;
- int block = 0;
- const int step = bh * bw;
- ENTROPY_CONTEXT ta[MAX_MIB_SIZE];
- ENTROPY_CONTEXT tl[MAX_MIB_SIZE];
- av1_get_entropy_contexts(bsizec, pd, ta, tl);
-
- for (idy = 0; idy < mi_height; idy += bh) {
- for (idx = 0; idx < mi_width; idx += bw) {
- RD_STATS pn_rd_stats;
- av1_init_rd_stats(&pn_rd_stats);
- tx_block_uvrd(cpi, x, idy, idx, plane, block, max_tx_size,
- plane_bsize, ta, tl, &pn_rd_stats, ftxs_mode);
- if (pn_rd_stats.rate == INT_MAX) {
- av1_invalid_rd_stats(rd_stats);
- return 0;
- }
- av1_merge_rd_stats(rd_stats, &pn_rd_stats);
- this_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
- skip_rd = RDCOST(x->rdmult, 0, rd_stats->sse);
- if ((this_rd > non_skip_ref_best_rd) &&
- (skip_rd > skip_ref_best_rd)) {
- av1_invalid_rd_stats(rd_stats);
- return 0;
- }
- block += step;
- }
- }
- }
- } else {
- // reset cost value
- av1_invalid_rd_stats(rd_stats);
- }
-
- return is_cost_valid;
-}
-
-static void rd_pick_palette_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
- int dc_mode_cost,
- uint8_t *best_palette_color_map,
- MB_MODE_INFO *const best_mbmi,
- int64_t *best_rd, int *rate,
- int *rate_tokenonly, int64_t *distortion,
- int *skippable) {
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = xd->mi[0];
- assert(!is_inter_block(mbmi));
- assert(
- av1_allow_palette(cpi->common.allow_screen_content_tools, mbmi->sb_type));
- PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
- const BLOCK_SIZE bsize = mbmi->sb_type;
- const SequenceHeader *const seq_params = &cpi->common.seq_params;
- int this_rate;
- int64_t this_rd;
- int colors_u, colors_v, colors;
- const int src_stride = x->plane[1].src.stride;
- const uint8_t *const src_u = x->plane[1].src.buf;
- const uint8_t *const src_v = x->plane[2].src.buf;
- uint8_t *const color_map = xd->plane[1].color_index_map;
- RD_STATS tokenonly_rd_stats;
- int plane_block_width, plane_block_height, rows, cols;
- av1_get_block_dimensions(bsize, 1, xd, &plane_block_width,
- &plane_block_height, &rows, &cols);
-
- mbmi->uv_mode = UV_DC_PRED;
-
- int count_buf[1 << 12]; // Maximum (1 << 12) color levels.
- if (seq_params->use_highbitdepth) {
- colors_u = av1_count_colors_highbd(src_u, src_stride, rows, cols,
- seq_params->bit_depth, count_buf);
- colors_v = av1_count_colors_highbd(src_v, src_stride, rows, cols,
- seq_params->bit_depth, count_buf);
- } else {
- colors_u = av1_count_colors(src_u, src_stride, rows, cols, count_buf);
- colors_v = av1_count_colors(src_v, src_stride, rows, cols, count_buf);
- }
-
- uint16_t color_cache[2 * PALETTE_MAX_SIZE];
- const int n_cache = av1_get_palette_cache(xd, 1, color_cache);
-
- colors = colors_u > colors_v ? colors_u : colors_v;
- if (colors > 1 && colors <= 64) {
- int r, c, n, i, j;
- const int max_itr = 50;
- int lb_u, ub_u, val_u;
- int lb_v, ub_v, val_v;
- int *const data = x->palette_buffer->kmeans_data_buf;
- int centroids[2 * PALETTE_MAX_SIZE];
-
- uint16_t *src_u16 = CONVERT_TO_SHORTPTR(src_u);
- uint16_t *src_v16 = CONVERT_TO_SHORTPTR(src_v);
- if (seq_params->use_highbitdepth) {
- lb_u = src_u16[0];
- ub_u = src_u16[0];
- lb_v = src_v16[0];
- ub_v = src_v16[0];
- } else {
- lb_u = src_u[0];
- ub_u = src_u[0];
- lb_v = src_v[0];
- ub_v = src_v[0];
- }
-
- for (r = 0; r < rows; ++r) {
- for (c = 0; c < cols; ++c) {
- if (seq_params->use_highbitdepth) {
- val_u = src_u16[r * src_stride + c];
- val_v = src_v16[r * src_stride + c];
- data[(r * cols + c) * 2] = val_u;
- data[(r * cols + c) * 2 + 1] = val_v;
- } else {
- val_u = src_u[r * src_stride + c];
- val_v = src_v[r * src_stride + c];
- data[(r * cols + c) * 2] = val_u;
- data[(r * cols + c) * 2 + 1] = val_v;
- }
- if (val_u < lb_u)
- lb_u = val_u;
- else if (val_u > ub_u)
- ub_u = val_u;
- if (val_v < lb_v)
- lb_v = val_v;
- else if (val_v > ub_v)
- ub_v = val_v;
- }
- }
-
- for (n = colors > PALETTE_MAX_SIZE ? PALETTE_MAX_SIZE : colors; n >= 2;
- --n) {
- for (i = 0; i < n; ++i) {
- centroids[i * 2] = lb_u + (2 * i + 1) * (ub_u - lb_u) / n / 2;
- centroids[i * 2 + 1] = lb_v + (2 * i + 1) * (ub_v - lb_v) / n / 2;
- }
- av1_k_means(data, centroids, color_map, rows * cols, n, 2, max_itr);
- optimize_palette_colors(color_cache, n_cache, n, 2, centroids);
- // Sort the U channel colors in ascending order.
- for (i = 0; i < 2 * (n - 1); i += 2) {
- int min_idx = i;
- int min_val = centroids[i];
- for (j = i + 2; j < 2 * n; j += 2)
- if (centroids[j] < min_val) min_val = centroids[j], min_idx = j;
- if (min_idx != i) {
- int temp_u = centroids[i], temp_v = centroids[i + 1];
- centroids[i] = centroids[min_idx];
- centroids[i + 1] = centroids[min_idx + 1];
- centroids[min_idx] = temp_u, centroids[min_idx + 1] = temp_v;
- }
- }
- av1_calc_indices(data, centroids, color_map, rows * cols, n, 2);
- extend_palette_color_map(color_map, cols, rows, plane_block_width,
- plane_block_height);
- pmi->palette_size[1] = n;
- for (i = 1; i < 3; ++i) {
- for (j = 0; j < n; ++j) {
- if (seq_params->use_highbitdepth)
- pmi->palette_colors[i * PALETTE_MAX_SIZE + j] = clip_pixel_highbd(
- (int)centroids[j * 2 + i - 1], seq_params->bit_depth);
- else
- pmi->palette_colors[i * PALETTE_MAX_SIZE + j] =
- clip_pixel((int)centroids[j * 2 + i - 1]);
- }
- }
-
- super_block_uvrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd);
- if (tokenonly_rd_stats.rate == INT_MAX) continue;
- this_rate = tokenonly_rd_stats.rate +
- intra_mode_info_cost_uv(cpi, x, mbmi, bsize, dc_mode_cost);
- this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
- if (this_rd < *best_rd) {
- *best_rd = this_rd;
- *best_mbmi = *mbmi;
- memcpy(best_palette_color_map, color_map,
- plane_block_width * plane_block_height *
- sizeof(best_palette_color_map[0]));
- *rate = this_rate;
- *distortion = tokenonly_rd_stats.dist;
- *rate_tokenonly = tokenonly_rd_stats.rate;
- *skippable = tokenonly_rd_stats.skip;
- }
- }
- }
- if (best_mbmi->palette_mode_info.palette_size[1] > 0) {
- memcpy(color_map, best_palette_color_map,
- plane_block_width * plane_block_height *
- sizeof(best_palette_color_map[0]));
- }
-}
-
-// Run RD calculation with given chroma intra prediction angle., and return
-// the RD cost. Update the best mode info. if the RD cost is the best so far.
-static int64_t pick_intra_angle_routine_sbuv(
- const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
- int rate_overhead, int64_t best_rd_in, int *rate, RD_STATS *rd_stats,
- int *best_angle_delta, int64_t *best_rd) {
- MB_MODE_INFO *mbmi = x->e_mbd.mi[0];
- assert(!is_inter_block(mbmi));
- int this_rate;
- int64_t this_rd;
- RD_STATS tokenonly_rd_stats;
-
- if (!super_block_uvrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd_in))
- return INT64_MAX;
- this_rate = tokenonly_rd_stats.rate +
- intra_mode_info_cost_uv(cpi, x, mbmi, bsize, rate_overhead);
- this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
- if (this_rd < *best_rd) {
- *best_rd = this_rd;
- *best_angle_delta = mbmi->angle_delta[PLANE_TYPE_UV];
- *rate = this_rate;
- rd_stats->rate = tokenonly_rd_stats.rate;
- rd_stats->dist = tokenonly_rd_stats.dist;
- rd_stats->skip = tokenonly_rd_stats.skip;
- }
- return this_rd;
-}
-
-// With given chroma directional intra prediction mode, pick the best angle
-// delta. Return true if a RD cost that is smaller than the input one is found.
-static int rd_pick_intra_angle_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
- BLOCK_SIZE bsize, int rate_overhead,
- int64_t best_rd, int *rate,
- RD_STATS *rd_stats) {
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *mbmi = xd->mi[0];
- assert(!is_inter_block(mbmi));
- int i, angle_delta, best_angle_delta = 0;
- int64_t this_rd, best_rd_in, rd_cost[2 * (MAX_ANGLE_DELTA + 2)];
-
- rd_stats->rate = INT_MAX;
- rd_stats->skip = 0;
- rd_stats->dist = INT64_MAX;
- for (i = 0; i < 2 * (MAX_ANGLE_DELTA + 2); ++i) rd_cost[i] = INT64_MAX;
-
- for (angle_delta = 0; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) {
- for (i = 0; i < 2; ++i) {
- best_rd_in = (best_rd == INT64_MAX)
- ? INT64_MAX
- : (best_rd + (best_rd >> ((angle_delta == 0) ? 3 : 5)));
- mbmi->angle_delta[PLANE_TYPE_UV] = (1 - 2 * i) * angle_delta;
- this_rd = pick_intra_angle_routine_sbuv(cpi, x, bsize, rate_overhead,
- best_rd_in, rate, rd_stats,
- &best_angle_delta, &best_rd);
- rd_cost[2 * angle_delta + i] = this_rd;
- if (angle_delta == 0) {
- if (this_rd == INT64_MAX) return 0;
- rd_cost[1] = this_rd;
- break;
- }
- }
- }
-
- assert(best_rd != INT64_MAX);
- for (angle_delta = 1; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) {
- int64_t rd_thresh;
- for (i = 0; i < 2; ++i) {
- int skip_search = 0;
- rd_thresh = best_rd + (best_rd >> 5);
- if (rd_cost[2 * (angle_delta + 1) + i] > rd_thresh &&
- rd_cost[2 * (angle_delta - 1) + i] > rd_thresh)
- skip_search = 1;
- if (!skip_search) {
- mbmi->angle_delta[PLANE_TYPE_UV] = (1 - 2 * i) * angle_delta;
- pick_intra_angle_routine_sbuv(cpi, x, bsize, rate_overhead, best_rd,
- rate, rd_stats, &best_angle_delta,
- &best_rd);
- }
- }
- }
-
- mbmi->angle_delta[PLANE_TYPE_UV] = best_angle_delta;
- return rd_stats->rate != INT_MAX;
-}
-
-#define PLANE_SIGN_TO_JOINT_SIGN(plane, a, b) \
- (plane == CFL_PRED_U ? a * CFL_SIGNS + b - 1 : b * CFL_SIGNS + a - 1)
-static int cfl_rd_pick_alpha(MACROBLOCK *const x, const AV1_COMP *const cpi,
- TX_SIZE tx_size, int64_t best_rd) {
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = xd->mi[0];
-
- const BLOCK_SIZE bsize = mbmi->sb_type;
-#if CONFIG_DEBUG
- assert(is_cfl_allowed(xd));
- const int ssx = xd->plane[AOM_PLANE_U].subsampling_x;
- const int ssy = xd->plane[AOM_PLANE_U].subsampling_y;
- const BLOCK_SIZE plane_bsize = get_plane_block_size(mbmi->sb_type, ssx, ssy);
- (void)plane_bsize;
- assert(plane_bsize < BLOCK_SIZES_ALL);
- if (!xd->lossless[mbmi->segment_id]) {
- assert(block_size_wide[plane_bsize] == tx_size_wide[tx_size]);
- assert(block_size_high[plane_bsize] == tx_size_high[tx_size]);
- }
-#endif // CONFIG_DEBUG
-
- xd->cfl.use_dc_pred_cache = 1;
- const int64_t mode_rd =
- RDCOST(x->rdmult,
- x->intra_uv_mode_cost[CFL_ALLOWED][mbmi->mode][UV_CFL_PRED], 0);
- int64_t best_rd_uv[CFL_JOINT_SIGNS][CFL_PRED_PLANES];
- int best_c[CFL_JOINT_SIGNS][CFL_PRED_PLANES];
-#if CONFIG_DEBUG
- int best_rate_uv[CFL_JOINT_SIGNS][CFL_PRED_PLANES];
-#endif // CONFIG_DEBUG
-
- for (int plane = 0; plane < CFL_PRED_PLANES; plane++) {
- RD_STATS rd_stats;
- av1_init_rd_stats(&rd_stats);
- for (int joint_sign = 0; joint_sign < CFL_JOINT_SIGNS; joint_sign++) {
- best_rd_uv[joint_sign][plane] = INT64_MAX;
- best_c[joint_sign][plane] = 0;
- }
- // Collect RD stats for an alpha value of zero in this plane.
- // Skip i == CFL_SIGN_ZERO as (0, 0) is invalid.
- for (int i = CFL_SIGN_NEG; i < CFL_SIGNS; i++) {
- const int joint_sign = PLANE_SIGN_TO_JOINT_SIGN(plane, CFL_SIGN_ZERO, i);
- if (i == CFL_SIGN_NEG) {
- mbmi->cfl_alpha_idx = 0;
- mbmi->cfl_alpha_signs = joint_sign;
- txfm_rd_in_plane(x, cpi, &rd_stats, best_rd, plane + 1, bsize, tx_size,
- cpi->sf.use_fast_coef_costing, FTXS_NONE);
- if (rd_stats.rate == INT_MAX) break;
- }
- const int alpha_rate = x->cfl_cost[joint_sign][plane][0];
- best_rd_uv[joint_sign][plane] =
- RDCOST(x->rdmult, rd_stats.rate + alpha_rate, rd_stats.dist);
-#if CONFIG_DEBUG
- best_rate_uv[joint_sign][plane] = rd_stats.rate;
-#endif // CONFIG_DEBUG
- }
- }
-
- int best_joint_sign = -1;
-
- for (int plane = 0; plane < CFL_PRED_PLANES; plane++) {
- for (int pn_sign = CFL_SIGN_NEG; pn_sign < CFL_SIGNS; pn_sign++) {
- int progress = 0;
- for (int c = 0; c < CFL_ALPHABET_SIZE; c++) {
- int flag = 0;
- RD_STATS rd_stats;
- if (c > 2 && progress < c) break;
- av1_init_rd_stats(&rd_stats);
- for (int i = 0; i < CFL_SIGNS; i++) {
- const int joint_sign = PLANE_SIGN_TO_JOINT_SIGN(plane, pn_sign, i);
- if (i == 0) {
- mbmi->cfl_alpha_idx = (c << CFL_ALPHABET_SIZE_LOG2) + c;
- mbmi->cfl_alpha_signs = joint_sign;
- txfm_rd_in_plane(x, cpi, &rd_stats, best_rd, plane + 1, bsize,
- tx_size, cpi->sf.use_fast_coef_costing, FTXS_NONE);
- if (rd_stats.rate == INT_MAX) break;
- }
- const int alpha_rate = x->cfl_cost[joint_sign][plane][c];
- int64_t this_rd =
- RDCOST(x->rdmult, rd_stats.rate + alpha_rate, rd_stats.dist);
- if (this_rd >= best_rd_uv[joint_sign][plane]) continue;
- best_rd_uv[joint_sign][plane] = this_rd;
- best_c[joint_sign][plane] = c;
-#if CONFIG_DEBUG
- best_rate_uv[joint_sign][plane] = rd_stats.rate;
-#endif // CONFIG_DEBUG
- flag = 2;
- if (best_rd_uv[joint_sign][!plane] == INT64_MAX) continue;
- this_rd += mode_rd + best_rd_uv[joint_sign][!plane];
- if (this_rd >= best_rd) continue;
- best_rd = this_rd;
- best_joint_sign = joint_sign;
- }
- progress += flag;
- }
- }
- }
-
- int best_rate_overhead = INT_MAX;
- int ind = 0;
- if (best_joint_sign >= 0) {
- const int u = best_c[best_joint_sign][CFL_PRED_U];
- const int v = best_c[best_joint_sign][CFL_PRED_V];
- ind = (u << CFL_ALPHABET_SIZE_LOG2) + v;
- best_rate_overhead = x->cfl_cost[best_joint_sign][CFL_PRED_U][u] +
- x->cfl_cost[best_joint_sign][CFL_PRED_V][v];
-#if CONFIG_DEBUG
- xd->cfl.rate = x->intra_uv_mode_cost[CFL_ALLOWED][mbmi->mode][UV_CFL_PRED] +
- best_rate_overhead +
- best_rate_uv[best_joint_sign][CFL_PRED_U] +
- best_rate_uv[best_joint_sign][CFL_PRED_V];
-#endif // CONFIG_DEBUG
- } else {
- best_joint_sign = 0;
- }
-
- mbmi->cfl_alpha_idx = ind;
- mbmi->cfl_alpha_signs = best_joint_sign;
- xd->cfl.use_dc_pred_cache = 0;
- xd->cfl.dc_pred_is_cached[0] = 0;
- xd->cfl.dc_pred_is_cached[1] = 0;
- return best_rate_overhead;
-}
-
-static void init_sbuv_mode(MB_MODE_INFO *const mbmi) {
- mbmi->uv_mode = UV_DC_PRED;
- mbmi->palette_mode_info.palette_size[1] = 0;
-}
-
-static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
- int *rate, int *rate_tokenonly,
- int64_t *distortion, int *skippable,
- BLOCK_SIZE bsize, TX_SIZE max_tx_size) {
- MACROBLOCKD *xd = &x->e_mbd;
- MB_MODE_INFO *mbmi = xd->mi[0];
- assert(!is_inter_block(mbmi));
- MB_MODE_INFO best_mbmi = *mbmi;
- int64_t best_rd = INT64_MAX, this_rd;
-
- for (int mode_idx = 0; mode_idx < UV_INTRA_MODES; ++mode_idx) {
- int this_rate;
- RD_STATS tokenonly_rd_stats;
- UV_PREDICTION_MODE mode = uv_rd_search_mode_order[mode_idx];
- const int is_directional_mode = av1_is_directional_mode(get_uv_mode(mode));
- if (!(cpi->sf.intra_uv_mode_mask[txsize_sqr_up_map[max_tx_size]] &
- (1 << mode)))
- continue;
-
- mbmi->uv_mode = mode;
- int cfl_alpha_rate = 0;
- if (mode == UV_CFL_PRED) {
- if (!is_cfl_allowed(xd)) continue;
- assert(!is_directional_mode);
- const TX_SIZE uv_tx_size = av1_get_tx_size(AOM_PLANE_U, xd);
- cfl_alpha_rate = cfl_rd_pick_alpha(x, cpi, uv_tx_size, best_rd);
- if (cfl_alpha_rate == INT_MAX) continue;
- }
- mbmi->angle_delta[PLANE_TYPE_UV] = 0;
- if (is_directional_mode && av1_use_angle_delta(mbmi->sb_type)) {
- const int rate_overhead =
- x->intra_uv_mode_cost[is_cfl_allowed(xd)][mbmi->mode][mode];
- if (!rd_pick_intra_angle_sbuv(cpi, x, bsize, rate_overhead, best_rd,
- &this_rate, &tokenonly_rd_stats))
- continue;
- } else {
- if (!super_block_uvrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd)) {
- continue;
- }
- }
- const int mode_cost =
- x->intra_uv_mode_cost[is_cfl_allowed(xd)][mbmi->mode][mode] +
- cfl_alpha_rate;
- this_rate = tokenonly_rd_stats.rate +
- intra_mode_info_cost_uv(cpi, x, mbmi, bsize, mode_cost);
- if (mode == UV_CFL_PRED) {
- assert(is_cfl_allowed(xd));
-#if CONFIG_DEBUG
- if (!xd->lossless[mbmi->segment_id])
- assert(xd->cfl.rate == tokenonly_rd_stats.rate + mode_cost);
-#endif // CONFIG_DEBUG
- }
- this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
-
- if (this_rd < best_rd) {
- best_mbmi = *mbmi;
- best_rd = this_rd;
- *rate = this_rate;
- *rate_tokenonly = tokenonly_rd_stats.rate;
- *distortion = tokenonly_rd_stats.dist;
- *skippable = tokenonly_rd_stats.skip;
- }
- }
-
- const int try_palette =
- av1_allow_palette(cpi->common.allow_screen_content_tools, mbmi->sb_type);
- if (try_palette) {
- uint8_t *best_palette_color_map = x->palette_buffer->best_palette_color_map;
- rd_pick_palette_intra_sbuv(
- cpi, x,
- x->intra_uv_mode_cost[is_cfl_allowed(xd)][mbmi->mode][UV_DC_PRED],
- best_palette_color_map, &best_mbmi, &best_rd, rate, rate_tokenonly,
- distortion, skippable);
- }
-
- *mbmi = best_mbmi;
- // Make sure we actually chose a mode
- assert(best_rd < INT64_MAX);
- return best_rd;
-}
-
-static void choose_intra_uv_mode(const AV1_COMP *const cpi, MACROBLOCK *const x,
- BLOCK_SIZE bsize, TX_SIZE max_tx_size,
- int *rate_uv, int *rate_uv_tokenonly,
- int64_t *dist_uv, int *skip_uv,
- UV_PREDICTION_MODE *mode_uv) {
- const AV1_COMMON *const cm = &cpi->common;
- MACROBLOCKD *xd = &x->e_mbd;
- MB_MODE_INFO *mbmi = xd->mi[0];
- const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2);
- const int mi_col = -xd->mb_to_left_edge >> (3 + MI_SIZE_LOG2);
- // Use an estimated rd for uv_intra based on DC_PRED if the
- // appropriate speed flag is set.
- init_sbuv_mode(mbmi);
- if (x->skip_chroma_rd) {
- *rate_uv = 0;
- *rate_uv_tokenonly = 0;
- *dist_uv = 0;
- *skip_uv = 1;
- *mode_uv = UV_DC_PRED;
- return;
- }
- xd->cfl.is_chroma_reference =
- is_chroma_reference(mi_row, mi_col, bsize, cm->seq_params.subsampling_x,
- cm->seq_params.subsampling_y);
- bsize = scale_chroma_bsize(bsize, xd->plane[AOM_PLANE_U].subsampling_x,
- xd->plane[AOM_PLANE_U].subsampling_y);
- // Only store reconstructed luma when there's chroma RDO. When there's no
- // chroma RDO, the reconstructed luma will be stored in encode_superblock().
- xd->cfl.store_y = store_cfl_required_rdo(cm, x);
- if (xd->cfl.store_y) {
- // Restore reconstructed luma values.
- av1_encode_intra_block_plane(cpi, x, mbmi->sb_type, AOM_PLANE_Y,
- cpi->optimize_seg_arr[mbmi->segment_id],
- mi_row, mi_col);
- xd->cfl.store_y = 0;
- }
- rd_pick_intra_sbuv_mode(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
- bsize, max_tx_size);
- *mode_uv = mbmi->uv_mode;
-}
-
-static int cost_mv_ref(const MACROBLOCK *const x, PREDICTION_MODE mode,
- int16_t mode_context) {
- if (is_inter_compound_mode(mode)) {
- return x
- ->inter_compound_mode_cost[mode_context][INTER_COMPOUND_OFFSET(mode)];
- }
-
- int mode_cost = 0;
- int16_t mode_ctx = mode_context & NEWMV_CTX_MASK;
-
- assert(is_inter_mode(mode));
-
- if (mode == NEWMV) {
- mode_cost = x->newmv_mode_cost[mode_ctx][0];
- return mode_cost;
- } else {
- mode_cost = x->newmv_mode_cost[mode_ctx][1];
- mode_ctx = (mode_context >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK;
-
- if (mode == GLOBALMV) {
- mode_cost += x->zeromv_mode_cost[mode_ctx][0];
- return mode_cost;
- } else {
- mode_cost += x->zeromv_mode_cost[mode_ctx][1];
- mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK;
- mode_cost += x->refmv_mode_cost[mode_ctx][mode != NEARESTMV];
- return mode_cost;
- }
- }
-}
-
-static int get_interinter_compound_mask_rate(const MACROBLOCK *const x,
- const MB_MODE_INFO *const mbmi) {
- switch (mbmi->interinter_comp.type) {
- case COMPOUND_AVERAGE: return 0;
- case COMPOUND_WEDGE:
- return get_interinter_wedge_bits(mbmi->sb_type) > 0
- ? av1_cost_literal(1) +
- x->wedge_idx_cost[mbmi->sb_type]
- [mbmi->interinter_comp.wedge_index]
- : 0;
- case COMPOUND_DIFFWTD: return av1_cost_literal(1);
- default: assert(0); return 0;
- }
-}
-
-typedef struct {
- int eobs;
- int brate;
- int byrate;
- int64_t bdist;
- int64_t bsse;
- int64_t brdcost;
- int_mv mvs[2];
- int_mv pred_mv[2];
- int_mv ref_mv[2];
-
- ENTROPY_CONTEXT ta[2];
- ENTROPY_CONTEXT tl[2];
-} SEG_RDSTAT;
-
-typedef struct {
- int_mv *ref_mv[2];
- int_mv mvp;
-
- int64_t segment_rd;
- int r;
- int64_t d;
- int64_t sse;
- int segment_yrate;
- PREDICTION_MODE modes[4];
- SEG_RDSTAT rdstat[4][INTER_MODES + INTER_COMPOUND_MODES];
- int mvthresh;
-} BEST_SEG_INFO;
-
-static INLINE int mv_check_bounds(const MvLimits *mv_limits, const MV *mv) {
- return (mv->row >> 3) < mv_limits->row_min ||
- (mv->row >> 3) > mv_limits->row_max ||
- (mv->col >> 3) < mv_limits->col_min ||
- (mv->col >> 3) > mv_limits->col_max;
-}
-
-static INLINE PREDICTION_MODE get_single_mode(PREDICTION_MODE this_mode,
- int ref_idx, int is_comp_pred) {
- PREDICTION_MODE single_mode;
- if (is_comp_pred) {
- single_mode =
- ref_idx ? compound_ref1_mode(this_mode) : compound_ref0_mode(this_mode);
- } else {
- single_mode = this_mode;
- }
- return single_mode;
-}
-
-static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
- BLOCK_SIZE bsize, int_mv *cur_mv, int mi_row,
- int mi_col, int_mv *ref_mv_sub8x8[2],
- const uint8_t *mask, int mask_stride,
- int *rate_mv, const int block) {
- const AV1_COMMON *const cm = &cpi->common;
- const int num_planes = av1_num_planes(cm);
- const int pw = block_size_wide[bsize];
- const int ph = block_size_high[bsize];
- const int plane = 0;
- MACROBLOCKD *xd = &x->e_mbd;
- MB_MODE_INFO *mbmi = xd->mi[0];
- // This function should only ever be called for compound modes
- assert(has_second_ref(mbmi));
- const int_mv init_mv[2] = { cur_mv[0], cur_mv[1] };
- const int refs[2] = { mbmi->ref_frame[0], mbmi->ref_frame[1] };
- int_mv ref_mv[2];
- int ite, ref;
- // ic and ir are the 4x4 coordinates of the sub8x8 at index "block"
- const int ic = block & 1;
- const int ir = (block - ic) >> 1;
- struct macroblockd_plane *const pd = &xd->plane[0];
- const int p_col = ((mi_col * MI_SIZE) >> pd->subsampling_x) + 4 * ic;
- const int p_row = ((mi_row * MI_SIZE) >> pd->subsampling_y) + 4 * ir;
-
- ConvolveParams conv_params = get_conv_params(0, plane, xd->bd);
- conv_params.use_jnt_comp_avg = 0;
- WarpTypesAllowed warp_types[2];
- for (ref = 0; ref < 2; ++ref) {
- const WarpedMotionParams *const wm =
- &xd->global_motion[xd->mi[0]->ref_frame[ref]];
- const int is_global = is_global_mv_block(xd->mi[0], wm->wmtype);
- warp_types[ref].global_warp_allowed = is_global;
- warp_types[ref].local_warp_allowed = mbmi->motion_mode == WARPED_CAUSAL;
- }
-
- // Do joint motion search in compound mode to get more accurate mv.
- struct buf_2d backup_yv12[2][MAX_MB_PLANE];
- int last_besterr[2] = { INT_MAX, INT_MAX };
- const YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = {
- av1_get_scaled_ref_frame(cpi, refs[0]),
- av1_get_scaled_ref_frame(cpi, refs[1])
- };
-
- // Prediction buffer from second frame.
- DECLARE_ALIGNED(16, uint8_t, second_pred16[MAX_SB_SQUARE * sizeof(uint16_t)]);
- uint8_t *second_pred = get_buf_by_bd(xd, second_pred16);
- (void)ref_mv_sub8x8;
-
- const int have_newmv = have_nearmv_in_inter_mode(mbmi->mode);
- const int ref_mv_idx = mbmi->ref_mv_idx + (have_newmv ? 1 : 0);
- MV *const best_mv = &x->best_mv.as_mv;
- const int search_range = SEARCH_RANGE_8P;
- const int sadpb = x->sadperbit16;
- // Allow joint search multiple times iteratively for each reference frame
- // and break out of the search loop if it couldn't find a better mv.
- for (ite = 0; ite < 4; ite++) {
- struct buf_2d ref_yv12[2];
- int bestsme = INT_MAX;
- MvLimits tmp_mv_limits = x->mv_limits;
- int id = ite % 2; // Even iterations search in the first reference frame,
- // odd iterations search in the second. The predictor
- // found for the 'other' reference frame is factored in.
- if (ite >= 2 && cur_mv[!id].as_int == init_mv[!id].as_int) {
- if (cur_mv[id].as_int == init_mv[id].as_int) {
- break;
- } else {
- int_mv cur_int_mv, init_int_mv;
- cur_int_mv.as_mv.col = cur_mv[id].as_mv.col >> 3;
- cur_int_mv.as_mv.row = cur_mv[id].as_mv.col >> 3;
- init_int_mv.as_mv.row = init_mv[id].as_mv.row >> 3;
- init_int_mv.as_mv.col = init_mv[id].as_mv.col >> 3;
- if (cur_int_mv.as_int == init_int_mv.as_int) {
- break;
- }
- }
- }
- for (ref = 0; ref < 2; ++ref) {
- ref_mv[ref] = av1_get_ref_mv(x, ref);
- // Swap out the reference frame for a version that's been scaled to
- // match the resolution of the current frame, allowing the existing
- // motion search code to be used without additional modifications.
- if (scaled_ref_frame[ref]) {
- int i;
- for (i = 0; i < num_planes; i++)
- backup_yv12[ref][i] = xd->plane[i].pre[ref];
- av1_setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col,
- NULL, num_planes);
- }
- }
-
- assert(IMPLIES(scaled_ref_frame[0] != NULL,
- cm->width == scaled_ref_frame[0]->y_crop_width &&
- cm->height == scaled_ref_frame[0]->y_crop_height));
- assert(IMPLIES(scaled_ref_frame[1] != NULL,
- cm->width == scaled_ref_frame[1]->y_crop_width &&
- cm->height == scaled_ref_frame[1]->y_crop_height));
-
- // Initialize based on (possibly scaled) prediction buffers.
- ref_yv12[0] = xd->plane[plane].pre[0];
- ref_yv12[1] = xd->plane[plane].pre[1];
-
- // Get the prediction block from the 'other' reference frame.
- const InterpFilters interp_filters = EIGHTTAP_REGULAR;
-
- // Since we have scaled the reference frames to match the size of the
- // current frame we must use a unit scaling factor during mode selection.
- av1_build_inter_predictor(ref_yv12[!id].buf, ref_yv12[!id].stride,
- second_pred, pw, &cur_mv[!id].as_mv,
- &cm->sf_identity, pw, ph, &conv_params,
- interp_filters, &warp_types[!id], p_col, p_row,
- plane, !id, MV_PRECISION_Q3, mi_col * MI_SIZE,
- mi_row * MI_SIZE, xd, cm->allow_warped_motion);
-
- const int order_idx = id != 0;
- av1_jnt_comp_weight_assign(cm, mbmi, order_idx, &xd->jcp_param.fwd_offset,
- &xd->jcp_param.bck_offset,
- &xd->jcp_param.use_jnt_comp_avg, 1);
-
- // Do full-pixel compound motion search on the current reference frame.
- if (id) xd->plane[plane].pre[0] = ref_yv12[id];
- av1_set_mv_search_range(&x->mv_limits, &ref_mv[id].as_mv);
-
- // Use the mv result from the single mode as mv predictor.
- *best_mv = cur_mv[id].as_mv;
-
- best_mv->col >>= 3;
- best_mv->row >>= 3;
-
- av1_set_mvcost(x, id, ref_mv_idx);
-
- // Small-range full-pixel motion search.
- bestsme = av1_refining_search_8p_c(x, sadpb, search_range,
- &cpi->fn_ptr[bsize], mask, mask_stride,
- id, &ref_mv[id].as_mv, second_pred);
- if (bestsme < INT_MAX) {
- if (mask)
- bestsme = av1_get_mvpred_mask_var(x, best_mv, &ref_mv[id].as_mv,
- second_pred, mask, mask_stride, id,
- &cpi->fn_ptr[bsize], 1);
- else
- bestsme = av1_get_mvpred_av_var(x, best_mv, &ref_mv[id].as_mv,
- second_pred, &cpi->fn_ptr[bsize], 1);
- }
-
- x->mv_limits = tmp_mv_limits;
-
- // Restore the pointer to the first (possibly scaled) prediction buffer.
- if (id) xd->plane[plane].pre[0] = ref_yv12[0];
-
- for (ref = 0; ref < 2; ++ref) {
- if (scaled_ref_frame[ref]) {
- // Swap back the original buffers for subpel motion search.
- for (int i = 0; i < num_planes; i++) {
- xd->plane[i].pre[ref] = backup_yv12[ref][i];
- }
- // Re-initialize based on unscaled prediction buffers.
- ref_yv12[ref] = xd->plane[plane].pre[ref];
- }
- }
-
- // Do sub-pixel compound motion search on the current reference frame.
- if (id) xd->plane[plane].pre[0] = ref_yv12[id];
-
- if (cpi->common.cur_frame_force_integer_mv) {
- x->best_mv.as_mv.row *= 8;
- x->best_mv.as_mv.col *= 8;
- }
- if (bestsme < INT_MAX && cpi->common.cur_frame_force_integer_mv == 0) {
- int dis; /* TODO: use dis in distortion calculation later. */
- unsigned int sse;
- bestsme = cpi->find_fractional_mv_step(
- x, cm, mi_row, mi_col, &ref_mv[id].as_mv,
- cpi->common.allow_high_precision_mv, x->errorperbit,
- &cpi->fn_ptr[bsize], 0, cpi->sf.mv.subpel_iters_per_step, NULL,
- x->nmvjointcost, x->mvcost, &dis, &sse, second_pred, mask,
- mask_stride, id, pw, ph, cpi->sf.use_accurate_subpel_search);
- }
-
- // Restore the pointer to the first prediction buffer.
- if (id) xd->plane[plane].pre[0] = ref_yv12[0];
- if (bestsme < last_besterr[id]) {
- cur_mv[id].as_mv = *best_mv;
- last_besterr[id] = bestsme;
- } else {
- break;
- }
- }
-
- *rate_mv = 0;
-
- for (ref = 0; ref < 2; ++ref) {
- av1_set_mvcost(x, ref, ref_mv_idx);
- const int_mv curr_ref_mv = av1_get_ref_mv(x, ref);
- *rate_mv += av1_mv_bit_cost(&cur_mv[ref].as_mv, &curr_ref_mv.as_mv,
- x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
- }
-}
-
-static void estimate_ref_frame_costs(
- const AV1_COMMON *cm, const MACROBLOCKD *xd, const MACROBLOCK *x,
- int segment_id, unsigned int *ref_costs_single,
- unsigned int (*ref_costs_comp)[REF_FRAMES]) {
- int seg_ref_active =
- segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
- if (seg_ref_active) {
- memset(ref_costs_single, 0, REF_FRAMES * sizeof(*ref_costs_single));
- int ref_frame;
- for (ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame)
- memset(ref_costs_comp[ref_frame], 0,
- REF_FRAMES * sizeof((*ref_costs_comp)[0]));
- } else {
- int intra_inter_ctx = av1_get_intra_inter_context(xd);
- ref_costs_single[INTRA_FRAME] = x->intra_inter_cost[intra_inter_ctx][0];
- unsigned int base_cost = x->intra_inter_cost[intra_inter_ctx][1];
-
- for (int i = LAST_FRAME; i <= ALTREF_FRAME; ++i)
- ref_costs_single[i] = base_cost;
-
- const int ctx_p1 = av1_get_pred_context_single_ref_p1(xd);
- const int ctx_p2 = av1_get_pred_context_single_ref_p2(xd);
- const int ctx_p3 = av1_get_pred_context_single_ref_p3(xd);
- const int ctx_p4 = av1_get_pred_context_single_ref_p4(xd);
- const int ctx_p5 = av1_get_pred_context_single_ref_p5(xd);
- const int ctx_p6 = av1_get_pred_context_single_ref_p6(xd);
-
- // Determine cost of a single ref frame, where frame types are represented
- // by a tree:
- // Level 0: add cost whether this ref is a forward or backward ref
- ref_costs_single[LAST_FRAME] += x->single_ref_cost[ctx_p1][0][0];
- ref_costs_single[LAST2_FRAME] += x->single_ref_cost[ctx_p1][0][0];
- ref_costs_single[LAST3_FRAME] += x->single_ref_cost[ctx_p1][0][0];
- ref_costs_single[GOLDEN_FRAME] += x->single_ref_cost[ctx_p1][0][0];
- ref_costs_single[BWDREF_FRAME] += x->single_ref_cost[ctx_p1][0][1];
- ref_costs_single[ALTREF2_FRAME] += x->single_ref_cost[ctx_p1][0][1];
- ref_costs_single[ALTREF_FRAME] += x->single_ref_cost[ctx_p1][0][1];
-
- // Level 1: if this ref is forward ref,
- // add cost whether it is last/last2 or last3/golden
- ref_costs_single[LAST_FRAME] += x->single_ref_cost[ctx_p3][2][0];
- ref_costs_single[LAST2_FRAME] += x->single_ref_cost[ctx_p3][2][0];
- ref_costs_single[LAST3_FRAME] += x->single_ref_cost[ctx_p3][2][1];
- ref_costs_single[GOLDEN_FRAME] += x->single_ref_cost[ctx_p3][2][1];
-
- // Level 1: if this ref is backward ref
- // then add cost whether this ref is altref or backward ref
- ref_costs_single[BWDREF_FRAME] += x->single_ref_cost[ctx_p2][1][0];
- ref_costs_single[ALTREF2_FRAME] += x->single_ref_cost[ctx_p2][1][0];
- ref_costs_single[ALTREF_FRAME] += x->single_ref_cost[ctx_p2][1][1];
-
- // Level 2: further add cost whether this ref is last or last2
- ref_costs_single[LAST_FRAME] += x->single_ref_cost[ctx_p4][3][0];
- ref_costs_single[LAST2_FRAME] += x->single_ref_cost[ctx_p4][3][1];
-
- // Level 2: last3 or golden
- ref_costs_single[LAST3_FRAME] += x->single_ref_cost[ctx_p5][4][0];
- ref_costs_single[GOLDEN_FRAME] += x->single_ref_cost[ctx_p5][4][1];
-
- // Level 2: bwdref or altref2
- ref_costs_single[BWDREF_FRAME] += x->single_ref_cost[ctx_p6][5][0];
- ref_costs_single[ALTREF2_FRAME] += x->single_ref_cost[ctx_p6][5][1];
-
- if (cm->reference_mode != SINGLE_REFERENCE) {
- // Similar to single ref, determine cost of compound ref frames.
- // cost_compound_refs = cost_first_ref + cost_second_ref
- const int bwdref_comp_ctx_p = av1_get_pred_context_comp_bwdref_p(xd);
- const int bwdref_comp_ctx_p1 = av1_get_pred_context_comp_bwdref_p1(xd);
- const int ref_comp_ctx_p = av1_get_pred_context_comp_ref_p(xd);
- const int ref_comp_ctx_p1 = av1_get_pred_context_comp_ref_p1(xd);
- const int ref_comp_ctx_p2 = av1_get_pred_context_comp_ref_p2(xd);
-
- const int comp_ref_type_ctx = av1_get_comp_reference_type_context(xd);
- unsigned int ref_bicomp_costs[REF_FRAMES] = { 0 };
-
- ref_bicomp_costs[LAST_FRAME] = ref_bicomp_costs[LAST2_FRAME] =
- ref_bicomp_costs[LAST3_FRAME] = ref_bicomp_costs[GOLDEN_FRAME] =
- base_cost + x->comp_ref_type_cost[comp_ref_type_ctx][1];
- ref_bicomp_costs[BWDREF_FRAME] = ref_bicomp_costs[ALTREF2_FRAME] = 0;
- ref_bicomp_costs[ALTREF_FRAME] = 0;
-
- // cost of first ref frame
- ref_bicomp_costs[LAST_FRAME] += x->comp_ref_cost[ref_comp_ctx_p][0][0];
- ref_bicomp_costs[LAST2_FRAME] += x->comp_ref_cost[ref_comp_ctx_p][0][0];
- ref_bicomp_costs[LAST3_FRAME] += x->comp_ref_cost[ref_comp_ctx_p][0][1];
- ref_bicomp_costs[GOLDEN_FRAME] += x->comp_ref_cost[ref_comp_ctx_p][0][1];
-
- ref_bicomp_costs[LAST_FRAME] += x->comp_ref_cost[ref_comp_ctx_p1][1][0];
- ref_bicomp_costs[LAST2_FRAME] += x->comp_ref_cost[ref_comp_ctx_p1][1][1];
-
- ref_bicomp_costs[LAST3_FRAME] += x->comp_ref_cost[ref_comp_ctx_p2][2][0];
- ref_bicomp_costs[GOLDEN_FRAME] += x->comp_ref_cost[ref_comp_ctx_p2][2][1];
-
- // cost of second ref frame
- ref_bicomp_costs[BWDREF_FRAME] +=
- x->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
- ref_bicomp_costs[ALTREF2_FRAME] +=
- x->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
- ref_bicomp_costs[ALTREF_FRAME] +=
- x->comp_bwdref_cost[bwdref_comp_ctx_p][0][1];
-
- ref_bicomp_costs[BWDREF_FRAME] +=
- x->comp_bwdref_cost[bwdref_comp_ctx_p1][1][0];
- ref_bicomp_costs[ALTREF2_FRAME] +=
- x->comp_bwdref_cost[bwdref_comp_ctx_p1][1][1];
-
- // cost: if one ref frame is forward ref, the other ref is backward ref
- int ref0, ref1;
- for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
- for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1) {
- ref_costs_comp[ref0][ref1] =
- ref_bicomp_costs[ref0] + ref_bicomp_costs[ref1];
- }
- }
-
- // cost: if both ref frames are the same side.
- const int uni_comp_ref_ctx_p = av1_get_pred_context_uni_comp_ref_p(xd);
- const int uni_comp_ref_ctx_p1 = av1_get_pred_context_uni_comp_ref_p1(xd);
- const int uni_comp_ref_ctx_p2 = av1_get_pred_context_uni_comp_ref_p2(xd);
- ref_costs_comp[LAST_FRAME][LAST2_FRAME] =
- base_cost + x->comp_ref_type_cost[comp_ref_type_ctx][0] +
- x->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
- x->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][0];
- ref_costs_comp[LAST_FRAME][LAST3_FRAME] =
- base_cost + x->comp_ref_type_cost[comp_ref_type_ctx][0] +
- x->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
- x->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] +
- x->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][0];
- ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] =
- base_cost + x->comp_ref_type_cost[comp_ref_type_ctx][0] +
- x->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
- x->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] +
- x->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][1];
- ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] =
- base_cost + x->comp_ref_type_cost[comp_ref_type_ctx][0] +
- x->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][1];
- } else {
- int ref0, ref1;
- for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
- for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1)
- ref_costs_comp[ref0][ref1] = 512;
- }
- ref_costs_comp[LAST_FRAME][LAST2_FRAME] = 512;
- ref_costs_comp[LAST_FRAME][LAST3_FRAME] = 512;
- ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] = 512;
- ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] = 512;
- }
- }
-}
-
-static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
- int mode_index,
- int64_t comp_pred_diff[REFERENCE_MODES],
- int skippable) {
- MACROBLOCKD *const xd = &x->e_mbd;
-
- // Take a snapshot of the coding context so it can be
- // restored if we decide to encode this way
- ctx->skip = x->skip;
- ctx->skippable = skippable;
- ctx->best_mode_index = mode_index;
- ctx->mic = *xd->mi[0];
- ctx->mbmi_ext = *x->mbmi_ext;
- ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE];
- ctx->comp_pred_diff = (int)comp_pred_diff[COMPOUND_REFERENCE];
- ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT];
-}
-
-static void setup_buffer_ref_mvs_inter(
- const AV1_COMP *const cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame,
- BLOCK_SIZE block_size, int mi_row, int mi_col,
- struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) {
- const AV1_COMMON *cm = &cpi->common;
- const int num_planes = av1_num_planes(cm);
- const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = xd->mi[0];
- const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;
- MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
-
- assert(yv12 != NULL);
-
- // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
- // use the UV scaling factors.
- av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf,
- num_planes);
-
- // Gets an initial list of candidate vectors from neighbours and orders them
- av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
- mbmi_ext->ref_mv_stack, NULL, mbmi_ext->global_mvs, mi_row,
- mi_col, mbmi_ext->mode_context);
-
- // Further refinement that is encode side only to test the top few candidates
- // in full and choose the best as the centre point for subsequent searches.
- // The current implementation doesn't support scaling.
- (void)block_size;
- av1_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride, ref_frame,
- block_size);
-}
-
-static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
- BLOCK_SIZE bsize, int mi_row, int mi_col,
- int ref_idx, int *rate_mv) {
- MACROBLOCKD *xd = &x->e_mbd;
- const AV1_COMMON *cm = &cpi->common;
- const int num_planes = av1_num_planes(cm);
- MB_MODE_INFO *mbmi = xd->mi[0];
- struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0, 0, 0, 0 } };
- int bestsme = INT_MAX;
- int step_param;
- int sadpb = x->sadperbit16;
- MV mvp_full;
- int ref = mbmi->ref_frame[ref_idx];
- MV ref_mv = av1_get_ref_mv(x, ref_idx).as_mv;
-
- MvLimits tmp_mv_limits = x->mv_limits;
- int cost_list[5];
-
- const YV12_BUFFER_CONFIG *scaled_ref_frame =
- av1_get_scaled_ref_frame(cpi, ref);
-
- if (scaled_ref_frame) {
- // Swap out the reference frame for a version that's been scaled to
- // match the resolution of the current frame, allowing the existing
- // full-pixel motion search code to be used without additional
- // modifications.
- for (int i = 0; i < num_planes; i++) {
- backup_yv12[i] = xd->plane[i].pre[ref_idx];
- }
- av1_setup_pre_planes(xd, ref_idx, scaled_ref_frame, mi_row, mi_col, NULL,
- num_planes);
- }
-
- av1_set_mvcost(
- x, ref_idx,
- mbmi->ref_mv_idx + (have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0));
-
- // Work out the size of the first step in the mv step search.
- // 0 here is maximum length first step. 1 is AOMMAX >> 1 etc.
- if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
- // Take the weighted average of the step_params based on the last frame's
- // max mv magnitude and that based on the best ref mvs of the current
- // block for the given reference.
- step_param =
- (av1_init_search_range(x->max_mv_context[ref]) + cpi->mv_step_param) /
- 2;
- } else {
- step_param = cpi->mv_step_param;
- }
-
- if (cpi->sf.adaptive_motion_search && bsize < cm->seq_params.sb_size) {
- int boffset =
- 2 * (mi_size_wide_log2[cm->seq_params.sb_size] -
- AOMMIN(mi_size_high_log2[bsize], mi_size_wide_log2[bsize]));
- step_param = AOMMAX(step_param, boffset);
- }
-
- if (cpi->sf.adaptive_motion_search) {
- int bwl = mi_size_wide_log2[bsize];
- int bhl = mi_size_high_log2[bsize];
- int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4);
-
- if (tlevel < 5) {
- step_param += 2;
- step_param = AOMMIN(step_param, MAX_MVSEARCH_STEPS - 1);
- }
-
- // prev_mv_sad is not setup for dynamically scaled frames.
- if (cpi->oxcf.resize_mode != RESIZE_RANDOM) {
- int i;
- for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) {
- if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
- x->pred_mv[ref].row = 0;
- x->pred_mv[ref].col = 0;
- x->best_mv.as_int = INVALID_MV;
-
- if (scaled_ref_frame) {
- // Swap back the original buffers before returning.
- for (int j = 0; j < num_planes; ++j)
- xd->plane[j].pre[ref_idx] = backup_yv12[j];
- }
- return;
- }
- }
- }
- }
-
- // Note: MV limits are modified here. Always restore the original values
- // after full-pixel motion search.
- av1_set_mv_search_range(&x->mv_limits, &ref_mv);
-
- if (mbmi->motion_mode != SIMPLE_TRANSLATION)
- mvp_full = mbmi->mv[0].as_mv;
- else
- mvp_full = ref_mv;
-
- mvp_full.col >>= 3;
- mvp_full.row >>= 3;
-
- x->best_mv.as_int = x->second_best_mv.as_int = INVALID_MV;
-
- switch (mbmi->motion_mode) {
- case SIMPLE_TRANSLATION:
- bestsme = av1_full_pixel_search(
- cpi, x, bsize, &mvp_full, step_param, cpi->sf.mv.search_method, 0,
- sadpb, cond_cost_list(cpi, cost_list), &ref_mv, INT_MAX, 1,
- (MI_SIZE * mi_col), (MI_SIZE * mi_row), 0);
- break;
- case OBMC_CAUSAL:
- bestsme = av1_obmc_full_pixel_search(cpi, x, &mvp_full, step_param, sadpb,
- MAX_MVSEARCH_STEPS - 1 - step_param,
- 1, &cpi->fn_ptr[bsize], &ref_mv,
- &(x->best_mv.as_mv), 0);
- break;
- default: assert(0 && "Invalid motion mode!\n");
- }
-
- if (scaled_ref_frame) {
- // Swap back the original buffers for subpel motion search.
- for (int i = 0; i < num_planes; i++) {
- xd->plane[i].pre[ref_idx] = backup_yv12[i];
- }
- }
-
- x->mv_limits = tmp_mv_limits;
-
- if (cpi->common.cur_frame_force_integer_mv) {
- x->best_mv.as_mv.row *= 8;
- x->best_mv.as_mv.col *= 8;
- }
- const int use_fractional_mv =
- bestsme < INT_MAX && cpi->common.cur_frame_force_integer_mv == 0;
- if (use_fractional_mv) {
- int dis; /* TODO: use dis in distortion calculation later. */
- switch (mbmi->motion_mode) {
- case SIMPLE_TRANSLATION:
- if (cpi->sf.use_accurate_subpel_search) {
- int best_mv_var;
- const int try_second = x->second_best_mv.as_int != INVALID_MV &&
- x->second_best_mv.as_int != x->best_mv.as_int;
- const int pw = block_size_wide[bsize];
- const int ph = block_size_high[bsize];
-
- best_mv_var = cpi->find_fractional_mv_step(
- x, cm, mi_row, mi_col, &ref_mv, cm->allow_high_precision_mv,
- x->errorperbit, &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
- cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
- x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, NULL,
- 0, 0, pw, ph, 1);
-
- if (try_second) {
- const int minc =
- AOMMAX(x->mv_limits.col_min * 8, ref_mv.col - MV_MAX);
- const int maxc =
- AOMMIN(x->mv_limits.col_max * 8, ref_mv.col + MV_MAX);
- const int minr =
- AOMMAX(x->mv_limits.row_min * 8, ref_mv.row - MV_MAX);
- const int maxr =
- AOMMIN(x->mv_limits.row_max * 8, ref_mv.row + MV_MAX);
- int this_var;
- MV best_mv = x->best_mv.as_mv;
-
- x->best_mv = x->second_best_mv;
- if (x->best_mv.as_mv.row * 8 <= maxr &&
- x->best_mv.as_mv.row * 8 >= minr &&
- x->best_mv.as_mv.col * 8 <= maxc &&
- x->best_mv.as_mv.col * 8 >= minc) {
- this_var = cpi->find_fractional_mv_step(
- x, cm, mi_row, mi_col, &ref_mv, cm->allow_high_precision_mv,
- x->errorperbit, &cpi->fn_ptr[bsize],
- cpi->sf.mv.subpel_force_stop,
- cpi->sf.mv.subpel_iters_per_step,
- cond_cost_list(cpi, cost_list), x->nmvjointcost, x->mvcost,
- &dis, &x->pred_sse[ref], NULL, NULL, 0, 0, pw, ph, 1);
- if (this_var < best_mv_var) best_mv = x->best_mv.as_mv;
- x->best_mv.as_mv = best_mv;
- }
- }
- } else {
- cpi->find_fractional_mv_step(
- x, cm, mi_row, mi_col, &ref_mv, cm->allow_high_precision_mv,
- x->errorperbit, &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
- cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
- x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, NULL,
- 0, 0, 0, 0, 0);
- }
- break;
- case OBMC_CAUSAL:
- av1_find_best_obmc_sub_pixel_tree_up(
- x, cm, mi_row, mi_col, &x->best_mv.as_mv, &ref_mv,
- cm->allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize],
- cpi->sf.mv.subpel_force_stop, cpi->sf.mv.subpel_iters_per_step,
- x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], 0,
- cpi->sf.use_accurate_subpel_search);
- break;
- default: assert(0 && "Invalid motion mode!\n");
- }
- }
- *rate_mv = av1_mv_bit_cost(&x->best_mv.as_mv, &ref_mv, x->nmvjointcost,
- x->mvcost, MV_COST_WEIGHT);
-
- if (cpi->sf.adaptive_motion_search && mbmi->motion_mode == SIMPLE_TRANSLATION)
- x->pred_mv[ref] = x->best_mv.as_mv;
-}
-
-static INLINE void restore_dst_buf(MACROBLOCKD *xd, BUFFER_SET dst,
- const int num_planes) {
- int i;
- for (i = 0; i < num_planes; i++) {
- xd->plane[i].dst.buf = dst.plane[i];
- xd->plane[i].dst.stride = dst.stride[i];
- }
-}
-
-static void build_second_inter_pred(const AV1_COMP *cpi, MACROBLOCK *x,
- BLOCK_SIZE bsize, const MV *other_mv,
- int mi_row, int mi_col, const int block,
- int ref_idx, uint8_t *second_pred) {
- const AV1_COMMON *const cm = &cpi->common;
- const int pw = block_size_wide[bsize];
- const int ph = block_size_high[bsize];
- MACROBLOCKD *xd = &x->e_mbd;
- MB_MODE_INFO *mbmi = xd->mi[0];
- const int other_ref = mbmi->ref_frame[!ref_idx];
- struct macroblockd_plane *const pd = &xd->plane[0];
- // ic and ir are the 4x4 coordinates of the sub8x8 at index "block"
- const int ic = block & 1;
- const int ir = (block - ic) >> 1;
- const int p_col = ((mi_col * MI_SIZE) >> pd->subsampling_x) + 4 * ic;
- const int p_row = ((mi_row * MI_SIZE) >> pd->subsampling_y) + 4 * ir;
- const WarpedMotionParams *const wm = &xd->global_motion[other_ref];
- int is_global = is_global_mv_block(xd->mi[0], wm->wmtype);
-
- // This function should only ever be called for compound modes
- assert(has_second_ref(mbmi));
-
- const int plane = 0;
- struct buf_2d ref_yv12 = xd->plane[plane].pre[!ref_idx];
-
- struct scale_factors sf;
- av1_setup_scale_factors_for_frame(&sf, ref_yv12.width, ref_yv12.height,
- cm->width, cm->height);
-
- ConvolveParams conv_params = get_conv_params(0, plane, xd->bd);
- WarpTypesAllowed warp_types;
- warp_types.global_warp_allowed = is_global;
- warp_types.local_warp_allowed = mbmi->motion_mode == WARPED_CAUSAL;
-
- // Get the prediction block from the 'other' reference frame.
- av1_build_inter_predictor(ref_yv12.buf, ref_yv12.stride, second_pred, pw,
- other_mv, &sf, pw, ph, &conv_params,
- mbmi->interp_filters, &warp_types, p_col, p_row,
- plane, !ref_idx, MV_PRECISION_Q3, mi_col * MI_SIZE,
- mi_row * MI_SIZE, xd, cm->allow_warped_motion);
-
- av1_jnt_comp_weight_assign(cm, mbmi, 0, &xd->jcp_param.fwd_offset,
- &xd->jcp_param.bck_offset,
- &xd->jcp_param.use_jnt_comp_avg, 1);
-}
-
-// Search for the best mv for one component of a compound,
-// given that the other component is fixed.
-static void compound_single_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
- BLOCK_SIZE bsize, MV *this_mv,
- int mi_row, int mi_col,
- const uint8_t *second_pred,
- const uint8_t *mask, int mask_stride,
- int *rate_mv, int ref_idx) {
- const AV1_COMMON *const cm = &cpi->common;
- const int num_planes = av1_num_planes(cm);
- const int pw = block_size_wide[bsize];
- const int ph = block_size_high[bsize];
- MACROBLOCKD *xd = &x->e_mbd;
- MB_MODE_INFO *mbmi = xd->mi[0];
- const int ref = mbmi->ref_frame[ref_idx];
- const int_mv ref_mv = av1_get_ref_mv(x, ref_idx);
- struct macroblockd_plane *const pd = &xd->plane[0];
-
- struct buf_2d backup_yv12[MAX_MB_PLANE];
- const YV12_BUFFER_CONFIG *const scaled_ref_frame =
- av1_get_scaled_ref_frame(cpi, ref);
-
- // Check that this is either an interinter or an interintra block
- assert(has_second_ref(mbmi) || (ref_idx == 0 && is_interintra_mode(mbmi)));
-
- // Store the first prediction buffer.
- struct buf_2d orig_yv12;
- if (ref_idx) {
- orig_yv12 = pd->pre[0];
- pd->pre[0] = pd->pre[ref_idx];
- }
-
- if (scaled_ref_frame) {
- int i;
- // Swap out the reference frame for a version that's been scaled to
- // match the resolution of the current frame, allowing the existing
- // full-pixel motion search code to be used without additional
- // modifications.
- for (i = 0; i < num_planes; i++) backup_yv12[i] = xd->plane[i].pre[ref_idx];
- av1_setup_pre_planes(xd, ref_idx, scaled_ref_frame, mi_row, mi_col, NULL,
- num_planes);
- }
-
- int bestsme = INT_MAX;
- int sadpb = x->sadperbit16;
- MV *const best_mv = &x->best_mv.as_mv;
- int search_range = SEARCH_RANGE_8P;
-
- MvLimits tmp_mv_limits = x->mv_limits;
-
- // Do compound motion search on the current reference frame.
- av1_set_mv_search_range(&x->mv_limits, &ref_mv.as_mv);
-
- // Use the mv result from the single mode as mv predictor.
- *best_mv = *this_mv;
-
- best_mv->col >>= 3;
- best_mv->row >>= 3;
-
- av1_set_mvcost(
- x, ref_idx,
- mbmi->ref_mv_idx + (have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0));
-
- // Small-range full-pixel motion search.
- bestsme = av1_refining_search_8p_c(x, sadpb, search_range,
- &cpi->fn_ptr[bsize], mask, mask_stride,
- ref_idx, &ref_mv.as_mv, second_pred);
- if (bestsme < INT_MAX) {
- if (mask)
- bestsme =
- av1_get_mvpred_mask_var(x, best_mv, &ref_mv.as_mv, second_pred, mask,
- mask_stride, ref_idx, &cpi->fn_ptr[bsize], 1);
- else
- bestsme = av1_get_mvpred_av_var(x, best_mv, &ref_mv.as_mv, second_pred,
- &cpi->fn_ptr[bsize], 1);
- }
-
- x->mv_limits = tmp_mv_limits;
-
- if (scaled_ref_frame) {
- // Swap back the original buffers for subpel motion search.
- for (int i = 0; i < num_planes; i++) {
- xd->plane[i].pre[ref_idx] = backup_yv12[i];
- }
- }
-
- if (cpi->common.cur_frame_force_integer_mv) {
- x->best_mv.as_mv.row *= 8;
- x->best_mv.as_mv.col *= 8;
- }
- const int use_fractional_mv =
- bestsme < INT_MAX && cpi->common.cur_frame_force_integer_mv == 0;
- if (use_fractional_mv) {
- int dis; /* TODO: use dis in distortion calculation later. */
- unsigned int sse;
- bestsme = cpi->find_fractional_mv_step(
- x, cm, mi_row, mi_col, &ref_mv.as_mv,
- cpi->common.allow_high_precision_mv, x->errorperbit,
- &cpi->fn_ptr[bsize], 0, cpi->sf.mv.subpel_iters_per_step, NULL,
- x->nmvjointcost, x->mvcost, &dis, &sse, second_pred, mask, mask_stride,
- ref_idx, pw, ph, cpi->sf.use_accurate_subpel_search);
- }
-
- // Restore the pointer to the first unscaled prediction buffer.
- if (ref_idx) pd->pre[0] = orig_yv12;
-
- if (bestsme < INT_MAX) *this_mv = *best_mv;
-
- *rate_mv = 0;
-
- av1_set_mvcost(
- x, ref_idx,
- mbmi->ref_mv_idx + (have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0));
- *rate_mv += av1_mv_bit_cost(this_mv, &ref_mv.as_mv, x->nmvjointcost,
- x->mvcost, MV_COST_WEIGHT);
-}
-
-// Wrapper for compound_single_motion_search, for the common case
-// where the second prediction is also an inter mode.
-static void compound_single_motion_search_interinter(
- const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int_mv *cur_mv,
- int mi_row, int mi_col, const uint8_t *mask, int mask_stride, int *rate_mv,
- const int block, int ref_idx) {
- MACROBLOCKD *xd = &x->e_mbd;
- // This function should only ever be called for compound modes
- assert(has_second_ref(xd->mi[0]));
-
- // Prediction buffer from second frame.
- DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[MAX_SB_SQUARE]);
- uint8_t *second_pred;
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
- second_pred = CONVERT_TO_BYTEPTR(second_pred_alloc_16);
- else
- second_pred = (uint8_t *)second_pred_alloc_16;
-
- MV *this_mv = &cur_mv[ref_idx].as_mv;
- const MV *other_mv = &cur_mv[!ref_idx].as_mv;
-
- build_second_inter_pred(cpi, x, bsize, other_mv, mi_row, mi_col, block,
- ref_idx, second_pred);
-
- compound_single_motion_search(cpi, x, bsize, this_mv, mi_row, mi_col,
- second_pred, mask, mask_stride, rate_mv,
- ref_idx);
-}
-
-static void do_masked_motion_search_indexed(
- const AV1_COMP *const cpi, MACROBLOCK *x, const int_mv *const cur_mv,
- const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE bsize,
- int mi_row, int mi_col, int_mv *tmp_mv, int *rate_mv, int which) {
- // NOTE: which values: 0 - 0 only, 1 - 1 only, 2 - both
- MACROBLOCKD *xd = &x->e_mbd;
- MB_MODE_INFO *mbmi = xd->mi[0];
- BLOCK_SIZE sb_type = mbmi->sb_type;
- const uint8_t *mask;
- const int mask_stride = block_size_wide[bsize];
-
- mask = av1_get_compound_type_mask(comp_data, sb_type);
-
- tmp_mv[0].as_int = cur_mv[0].as_int;
- tmp_mv[1].as_int = cur_mv[1].as_int;
- if (which == 0 || which == 1) {
- compound_single_motion_search_interinter(cpi, x, bsize, tmp_mv, mi_row,
- mi_col, mask, mask_stride, rate_mv,
- 0, which);
- } else if (which == 2) {
- joint_motion_search(cpi, x, bsize, tmp_mv, mi_row, mi_col, NULL, mask,
- mask_stride, rate_mv, 0);
- }
-}
-
-#define USE_DISCOUNT_NEWMV_TEST 0
-#if USE_DISCOUNT_NEWMV_TEST
-// In some situations we want to discount the apparent cost of a new motion
-// vector. Where there is a subtle motion field and especially where there is
-// low spatial complexity then it can be hard to cover the cost of a new motion
-// vector in a single block, even if that motion vector reduces distortion.
-// However, once established that vector may be usable through the nearest and
-// near mv modes to reduce distortion in subsequent blocks and also improve
-// visual quality.
-#define NEW_MV_DISCOUNT_FACTOR 8
-static INLINE void get_this_mv(int_mv *this_mv, PREDICTION_MODE this_mode,
- int ref_idx, int ref_mv_idx,
- const MV_REFERENCE_FRAME *ref_frame,
- const MB_MODE_INFO_EXT *mbmi_ext);
-static int discount_newmv_test(const AV1_COMP *const cpi, const MACROBLOCK *x,
- PREDICTION_MODE this_mode, int_mv this_mv) {
- if (this_mode == NEWMV && this_mv.as_int != 0 &&
- !cpi->rc.is_src_frame_alt_ref) {
- // Only discount new_mv when nearst_mv and all near_mv are zero, and the
- // new_mv is not equal to global_mv
- const AV1_COMMON *const cm = &cpi->common;
- const MACROBLOCKD *const xd = &x->e_mbd;
- const MB_MODE_INFO *const mbmi = xd->mi[0];
- const MV_REFERENCE_FRAME tmp_ref_frames[2] = { mbmi->ref_frame[0],
- NONE_FRAME };
- const uint8_t ref_frame_type = av1_ref_frame_type(tmp_ref_frames);
- int_mv nearest_mv;
- get_this_mv(&nearest_mv, NEARESTMV, 0, 0, tmp_ref_frames, x->mbmi_ext);
- int ret = nearest_mv.as_int == 0;
- for (int ref_mv_idx = 0;
- ref_mv_idx < x->mbmi_ext->ref_mv_count[ref_frame_type]; ++ref_mv_idx) {
- int_mv near_mv;
- get_this_mv(&near_mv, NEARMV, 0, ref_mv_idx, tmp_ref_frames, x->mbmi_ext);
- ret &= near_mv.as_int == 0;
- }
- if (cm->global_motion[tmp_ref_frames[0]].wmtype <= TRANSLATION) {
- int_mv global_mv;
- get_this_mv(&global_mv, GLOBALMV, 0, 0, tmp_ref_frames, x->mbmi_ext);
- ret &= global_mv.as_int != this_mv.as_int;
- }
- return ret;
- }
- return 0;
-}
-#endif
-
-#define LEFT_TOP_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
-#define RIGHT_BOTTOM_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
-
-// TODO(jingning): this mv clamping function should be block size dependent.
-static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
- clamp_mv(mv, xd->mb_to_left_edge - LEFT_TOP_MARGIN,
- xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
- xd->mb_to_top_edge - LEFT_TOP_MARGIN,
- xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);
-}
-
-static int estimate_wedge_sign(const AV1_COMP *cpi, const MACROBLOCK *x,
- const BLOCK_SIZE bsize, const uint8_t *pred0,
- int stride0, const uint8_t *pred1, int stride1) {
- static const BLOCK_SIZE split_qtr[BLOCK_SIZES_ALL] = {
- // 4X4
- BLOCK_INVALID,
- // 4X8, 8X4, 8X8
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X4,
- // 8X16, 16X8, 16X16
- BLOCK_4X8, BLOCK_8X4, BLOCK_8X8,
- // 16X32, 32X16, 32X32
- BLOCK_8X16, BLOCK_16X8, BLOCK_16X16,
- // 32X64, 64X32, 64X64
- BLOCK_16X32, BLOCK_32X16, BLOCK_32X32,
- // 64x128, 128x64, 128x128
- BLOCK_32X64, BLOCK_64X32, BLOCK_64X64,
- // 4X16, 16X4, 8X32
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X16,
- // 32X8, 16X64, 64X16
- BLOCK_16X4, BLOCK_8X32, BLOCK_32X8
- };
- const struct macroblock_plane *const p = &x->plane[0];
- const uint8_t *src = p->src.buf;
- int src_stride = p->src.stride;
- const int bw = block_size_wide[bsize];
- const int bh = block_size_high[bsize];
- uint32_t esq[2][4];
- int64_t tl, br;
-
- const BLOCK_SIZE f_index = split_qtr[bsize];
- assert(f_index != BLOCK_INVALID);
-
- if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- pred0 = CONVERT_TO_BYTEPTR(pred0);
- pred1 = CONVERT_TO_BYTEPTR(pred1);
- }
-
- cpi->fn_ptr[f_index].vf(src, src_stride, pred0, stride0, &esq[0][0]);
- cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, pred0 + bw / 2, stride0,
- &esq[0][1]);
- cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride, src_stride,
- pred0 + bh / 2 * stride0, stride0, &esq[0][2]);
- cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride + bw / 2, src_stride,
- pred0 + bh / 2 * stride0 + bw / 2, stride0,
- &esq[0][3]);
- cpi->fn_ptr[f_index].vf(src, src_stride, pred1, stride1, &esq[1][0]);
- cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, pred1 + bw / 2, stride1,
- &esq[1][1]);
- cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride, src_stride,
- pred1 + bh / 2 * stride1, stride0, &esq[1][2]);
- cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride + bw / 2, src_stride,
- pred1 + bh / 2 * stride1 + bw / 2, stride0,
- &esq[1][3]);
-
- tl = ((int64_t)esq[0][0] + esq[0][1] + esq[0][2]) -
- ((int64_t)esq[1][0] + esq[1][1] + esq[1][2]);
- br = ((int64_t)esq[1][3] + esq[1][1] + esq[1][2]) -
- ((int64_t)esq[0][3] + esq[0][1] + esq[0][2]);
- return (tl + br > 0);
-}
-
-// Choose the best wedge index and sign
-static int64_t pick_wedge(const AV1_COMP *const cpi, const MACROBLOCK *const x,
- const BLOCK_SIZE bsize, const uint8_t *const p0,
- const int16_t *const residual1,
- const int16_t *const diff10,
- int *const best_wedge_sign,
- int *const best_wedge_index) {
- const MACROBLOCKD *const xd = &x->e_mbd;
- const struct buf_2d *const src = &x->plane[0].src;
- const int bw = block_size_wide[bsize];
- const int bh = block_size_high[bsize];
- const int N = bw * bh;
- assert(N >= 64);
- int rate;
- int64_t dist;
- int64_t rd, best_rd = INT64_MAX;
- int wedge_index;
- int wedge_sign;
- int wedge_types = (1 << get_wedge_bits_lookup(bsize));
- const uint8_t *mask;
- uint64_t sse;
- const int hbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH;
- const int bd_round = hbd ? (xd->bd - 8) * 2 : 0;
-
- DECLARE_ALIGNED(32, int16_t, residual0[MAX_SB_SQUARE]); // src - pred0
- if (hbd) {
- aom_highbd_subtract_block(bh, bw, residual0, bw, src->buf, src->stride,
- CONVERT_TO_BYTEPTR(p0), bw, xd->bd);
- } else {
- aom_subtract_block(bh, bw, residual0, bw, src->buf, src->stride, p0, bw);
- }
-
- int64_t sign_limit = ((int64_t)aom_sum_squares_i16(residual0, N) -
- (int64_t)aom_sum_squares_i16(residual1, N)) *
- (1 << WEDGE_WEIGHT_BITS) / 2;
- int16_t *ds = residual0;
-
- av1_wedge_compute_delta_squares(ds, residual0, residual1, N);
-
- for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
- mask = av1_get_contiguous_soft_mask(wedge_index, 0, bsize);
-
- wedge_sign = av1_wedge_sign_from_residuals(ds, mask, N, sign_limit);
-
- mask = av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
- sse = av1_wedge_sse_from_residuals(residual1, diff10, mask, N);
- sse = ROUND_POWER_OF_TWO(sse, bd_round);
-
- model_rd_sse_fn[MODELRD_TYPE_MASKED_COMPOUND](cpi, x, bsize, 0, sse, N,
- &rate, &dist);
- // int rate2;
- // int64_t dist2;
- // model_rd_with_curvfit(cpi, x, bsize, 0, sse, N, &rate2, &dist2);
- // printf("sse %"PRId64": leagacy: %d %"PRId64", curvfit %d %"PRId64"\n",
- // sse, rate, dist, rate2, dist2); dist = dist2;
- // rate = rate2;
-
- rate += x->wedge_idx_cost[bsize][wedge_index];
- rd = RDCOST(x->rdmult, rate, dist);
-
- if (rd < best_rd) {
- *best_wedge_index = wedge_index;
- *best_wedge_sign = wedge_sign;
- best_rd = rd;
- }
- }
-
- return best_rd -
- RDCOST(x->rdmult, x->wedge_idx_cost[bsize][*best_wedge_index], 0);
-}
-
-// Choose the best wedge index the specified sign
-static int64_t pick_wedge_fixed_sign(const AV1_COMP *const cpi,
- const MACROBLOCK *const x,
- const BLOCK_SIZE bsize,
- const int16_t *const residual1,
- const int16_t *const diff10,
- const int wedge_sign,
- int *const best_wedge_index) {
- const MACROBLOCKD *const xd = &x->e_mbd;
-
- const int bw = block_size_wide[bsize];
- const int bh = block_size_high[bsize];
- const int N = bw * bh;
- assert(N >= 64);
- int rate;
- int64_t dist;
- int64_t rd, best_rd = INT64_MAX;
- int wedge_index;
- int wedge_types = (1 << get_wedge_bits_lookup(bsize));
- const uint8_t *mask;
- uint64_t sse;
- const int hbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH;
- const int bd_round = hbd ? (xd->bd - 8) * 2 : 0;
- for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
- mask = av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
- sse = av1_wedge_sse_from_residuals(residual1, diff10, mask, N);
- sse = ROUND_POWER_OF_TWO(sse, bd_round);
-
- model_rd_sse_fn[MODELRD_TYPE_MASKED_COMPOUND](cpi, x, bsize, 0, sse, N,
- &rate, &dist);
- rate += x->wedge_idx_cost[bsize][wedge_index];
- rd = RDCOST(x->rdmult, rate, dist);
-
- if (rd < best_rd) {
- *best_wedge_index = wedge_index;
- best_rd = rd;
- }
- }
- return best_rd -
- RDCOST(x->rdmult, x->wedge_idx_cost[bsize][*best_wedge_index], 0);
-}
-
-static int64_t pick_interinter_wedge(
- const AV1_COMP *const cpi, MACROBLOCK *const x, const BLOCK_SIZE bsize,
- const uint8_t *const p0, const uint8_t *const p1,
- const int16_t *const residual1, const int16_t *const diff10) {
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = xd->mi[0];
- const int bw = block_size_wide[bsize];
-
- int64_t rd;
- int wedge_index = -1;
- int wedge_sign = 0;
-
- assert(is_interinter_compound_used(COMPOUND_WEDGE, bsize));
- assert(cpi->common.seq_params.enable_masked_compound);
-
- if (cpi->sf.fast_wedge_sign_estimate) {
- wedge_sign = estimate_wedge_sign(cpi, x, bsize, p0, bw, p1, bw);
- rd = pick_wedge_fixed_sign(cpi, x, bsize, residual1, diff10, wedge_sign,
- &wedge_index);
- } else {
- rd = pick_wedge(cpi, x, bsize, p0, residual1, diff10, &wedge_sign,
- &wedge_index);
- }
-
- mbmi->interinter_comp.wedge_sign = wedge_sign;
- mbmi->interinter_comp.wedge_index = wedge_index;
- return rd;
-}
-
-static int64_t pick_interinter_seg(const AV1_COMP *const cpi,
- MACROBLOCK *const x, const BLOCK_SIZE bsize,
- const uint8_t *const p0,
- const uint8_t *const p1,
- const int16_t *const residual1,
- const int16_t *const diff10) {
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = xd->mi[0];
- const int bw = block_size_wide[bsize];
- const int bh = block_size_high[bsize];
- const int N = 1 << num_pels_log2_lookup[bsize];
- int rate;
- int64_t dist;
- DIFFWTD_MASK_TYPE cur_mask_type;
- int64_t best_rd = INT64_MAX;
- DIFFWTD_MASK_TYPE best_mask_type = 0;
- const int hbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH;
- const int bd_round = hbd ? (xd->bd - 8) * 2 : 0;
- DECLARE_ALIGNED(16, uint8_t, seg_mask[2 * MAX_SB_SQUARE]);
- uint8_t *tmp_mask[2] = { xd->seg_mask, seg_mask };
- // try each mask type and its inverse
- for (cur_mask_type = 0; cur_mask_type < DIFFWTD_MASK_TYPES; cur_mask_type++) {
- // build mask and inverse
- if (hbd)
- av1_build_compound_diffwtd_mask_highbd(
- tmp_mask[cur_mask_type], cur_mask_type, CONVERT_TO_BYTEPTR(p0), bw,
- CONVERT_TO_BYTEPTR(p1), bw, bh, bw, xd->bd);
- else
- av1_build_compound_diffwtd_mask(tmp_mask[cur_mask_type], cur_mask_type,
- p0, bw, p1, bw, bh, bw);
-
- // compute rd for mask
- uint64_t sse = av1_wedge_sse_from_residuals(residual1, diff10,
- tmp_mask[cur_mask_type], N);
- sse = ROUND_POWER_OF_TWO(sse, bd_round);
-
- model_rd_sse_fn[MODELRD_TYPE_MASKED_COMPOUND](cpi, x, bsize, 0, sse, N,
- &rate, &dist);
- const int64_t rd0 = RDCOST(x->rdmult, rate, dist);
-
- if (rd0 < best_rd) {
- best_mask_type = cur_mask_type;
- best_rd = rd0;
- }
- }
- mbmi->interinter_comp.mask_type = best_mask_type;
- if (best_mask_type == DIFFWTD_38_INV) {
- memcpy(xd->seg_mask, seg_mask, N * 2);
- }
- return best_rd;
-}
-
-static int64_t pick_interintra_wedge(const AV1_COMP *const cpi,
- const MACROBLOCK *const x,
- const BLOCK_SIZE bsize,
- const uint8_t *const p0,
- const uint8_t *const p1) {
- const MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = xd->mi[0];
- assert(is_interintra_wedge_used(bsize));
- assert(cpi->common.seq_params.enable_interintra_compound);
-
- const struct buf_2d *const src = &x->plane[0].src;
- const int bw = block_size_wide[bsize];
- const int bh = block_size_high[bsize];
- DECLARE_ALIGNED(32, int16_t, residual1[MAX_SB_SQUARE]); // src - pred1
- DECLARE_ALIGNED(32, int16_t, diff10[MAX_SB_SQUARE]); // pred1 - pred0
- if (get_bitdepth_data_path_index(xd)) {
- aom_highbd_subtract_block(bh, bw, residual1, bw, src->buf, src->stride,
- CONVERT_TO_BYTEPTR(p1), bw, xd->bd);
- aom_highbd_subtract_block(bh, bw, diff10, bw, CONVERT_TO_BYTEPTR(p1), bw,
- CONVERT_TO_BYTEPTR(p0), bw, xd->bd);
- } else {
- aom_subtract_block(bh, bw, residual1, bw, src->buf, src->stride, p1, bw);
- aom_subtract_block(bh, bw, diff10, bw, p1, bw, p0, bw);
- }
- int wedge_index = -1;
- int64_t rd =
- pick_wedge_fixed_sign(cpi, x, bsize, residual1, diff10, 0, &wedge_index);
-
- mbmi->interintra_wedge_sign = 0;
- mbmi->interintra_wedge_index = wedge_index;
- return rd;
-}
-
-static int64_t pick_interinter_mask(const AV1_COMP *const cpi, MACROBLOCK *x,
- const BLOCK_SIZE bsize,
- const uint8_t *const p0,
- const uint8_t *const p1,
- const int16_t *const residual1,
- const int16_t *const diff10) {
- const COMPOUND_TYPE compound_type = x->e_mbd.mi[0]->interinter_comp.type;
- switch (compound_type) {
- case COMPOUND_WEDGE:
- return pick_interinter_wedge(cpi, x, bsize, p0, p1, residual1, diff10);
- case COMPOUND_DIFFWTD:
- return pick_interinter_seg(cpi, x, bsize, p0, p1, residual1, diff10);
- default: assert(0); return 0;
- }
-}
-
-static int interinter_compound_motion_search(const AV1_COMP *const cpi,
- MACROBLOCK *x,
- const int_mv *const cur_mv,
- const BLOCK_SIZE bsize,
- const PREDICTION_MODE this_mode,
- int mi_row, int mi_col) {
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = xd->mi[0];
- int_mv tmp_mv[2];
- int tmp_rate_mv = 0;
- mbmi->interinter_comp.seg_mask = xd->seg_mask;
- const INTERINTER_COMPOUND_DATA *compound_data = &mbmi->interinter_comp;
-
- if (this_mode == NEW_NEWMV) {
- do_masked_motion_search_indexed(cpi, x, cur_mv, compound_data, bsize,
- mi_row, mi_col, tmp_mv, &tmp_rate_mv, 2);
- mbmi->mv[0].as_int = tmp_mv[0].as_int;
- mbmi->mv[1].as_int = tmp_mv[1].as_int;
- } else if (this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV) {
- do_masked_motion_search_indexed(cpi, x, cur_mv, compound_data, bsize,
- mi_row, mi_col, tmp_mv, &tmp_rate_mv, 0);
- mbmi->mv[0].as_int = tmp_mv[0].as_int;
- } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) {
- do_masked_motion_search_indexed(cpi, x, cur_mv, compound_data, bsize,
- mi_row, mi_col, tmp_mv, &tmp_rate_mv, 1);
- mbmi->mv[1].as_int = tmp_mv[1].as_int;
- }
- return tmp_rate_mv;
-}
-
-static void get_inter_predictors_masked_compound(
- const AV1_COMP *const cpi, MACROBLOCK *x, const BLOCK_SIZE bsize,
- int mi_row, int mi_col, uint8_t **preds0, uint8_t **preds1,
- int16_t *residual1, int16_t *diff10, int *strides) {
- const AV1_COMMON *cm = &cpi->common;
- MACROBLOCKD *xd = &x->e_mbd;
- const int bw = block_size_wide[bsize];
- const int bh = block_size_high[bsize];
- int can_use_previous = cm->allow_warped_motion;
- // get inter predictors to use for masked compound modes
- av1_build_inter_predictors_for_planes_single_buf(
- xd, bsize, 0, 0, mi_row, mi_col, 0, preds0, strides, can_use_previous);
- av1_build_inter_predictors_for_planes_single_buf(
- xd, bsize, 0, 0, mi_row, mi_col, 1, preds1, strides, can_use_previous);
- const struct buf_2d *const src = &x->plane[0].src;
- if (get_bitdepth_data_path_index(xd)) {
- aom_highbd_subtract_block(bh, bw, residual1, bw, src->buf, src->stride,
- CONVERT_TO_BYTEPTR(*preds1), bw, xd->bd);
- aom_highbd_subtract_block(bh, bw, diff10, bw, CONVERT_TO_BYTEPTR(*preds1),
- bw, CONVERT_TO_BYTEPTR(*preds0), bw, xd->bd);
- } else {
- aom_subtract_block(bh, bw, residual1, bw, src->buf, src->stride, *preds1,
- bw);
- aom_subtract_block(bh, bw, diff10, bw, *preds1, bw, *preds0, bw);
- }
-}
-
-static int64_t build_and_cost_compound_type(
- const AV1_COMP *const cpi, MACROBLOCK *x, const int_mv *const cur_mv,
- const BLOCK_SIZE bsize, const PREDICTION_MODE this_mode, int *rs2,
- int rate_mv, BUFFER_SET *ctx, int *out_rate_mv, uint8_t **preds0,
- uint8_t **preds1, int16_t *residual1, int16_t *diff10, int *strides,
- int mi_row, int mi_col, int mode_rate, int64_t ref_best_rd,
- int *calc_pred_masked_compound) {
- const AV1_COMMON *const cm = &cpi->common;
- MACROBLOCKD *xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = xd->mi[0];
- int rate_sum;
- int64_t dist_sum;
- int64_t best_rd_cur = INT64_MAX;
- int64_t rd = INT64_MAX;
- int tmp_skip_txfm_sb;
- int64_t tmp_skip_sse_sb;
- const COMPOUND_TYPE compound_type = mbmi->interinter_comp.type;
-
- if (*calc_pred_masked_compound) {
- get_inter_predictors_masked_compound(cpi, x, bsize, mi_row, mi_col, preds0,
- preds1, residual1, diff10, strides);
- *calc_pred_masked_compound = 0;
- }
-
- best_rd_cur =
- pick_interinter_mask(cpi, x, bsize, *preds0, *preds1, residual1, diff10);
- *rs2 += get_interinter_compound_mask_rate(x, mbmi);
- best_rd_cur += RDCOST(x->rdmult, *rs2 + rate_mv, 0);
-
- // Although the true rate_mv might be different after motion search, but it
- // is unlikely to be the best mode considering the transform rd cost and other
- // mode overhead cost
- int64_t mode_rd = RDCOST(x->rdmult, *rs2 + mode_rate, 0);
- if (mode_rd > ref_best_rd) return INT64_MAX;
-
- if (have_newmv_in_inter_mode(this_mode) && compound_type == COMPOUND_WEDGE) {
- *out_rate_mv = interinter_compound_motion_search(cpi, x, cur_mv, bsize,
- this_mode, mi_row, mi_col);
- av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, ctx, bsize);
- model_rd_sb_fn[MODELRD_TYPE_MASKED_COMPOUND](
- cpi, bsize, x, xd, 0, 0, mi_row, mi_col, &rate_sum, &dist_sum,
- &tmp_skip_txfm_sb, &tmp_skip_sse_sb, NULL, NULL, NULL);
- rd = RDCOST(x->rdmult, *rs2 + *out_rate_mv + rate_sum, dist_sum);
- if (rd >= best_rd_cur) {
- mbmi->mv[0].as_int = cur_mv[0].as_int;
- mbmi->mv[1].as_int = cur_mv[1].as_int;
- *out_rate_mv = rate_mv;
- av1_build_wedge_inter_predictor_from_buf(xd, bsize, 0, 0, preds0, strides,
- preds1, strides);
- }
- rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
- &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
- if (rd != INT64_MAX)
- rd = RDCOST(x->rdmult, *rs2 + *out_rate_mv + rate_sum, dist_sum);
- best_rd_cur = rd;
-
- } else {
- av1_build_wedge_inter_predictor_from_buf(xd, bsize, 0, 0, preds0, strides,
- preds1, strides);
- rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
- &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
- if (rd != INT64_MAX)
- rd = RDCOST(x->rdmult, *rs2 + rate_mv + rate_sum, dist_sum);
- best_rd_cur = rd;
- }
- return best_rd_cur;
-}
-
-typedef struct {
- // OBMC secondary prediction buffers and respective strides
- uint8_t *above_pred_buf[MAX_MB_PLANE];
- int above_pred_stride[MAX_MB_PLANE];
- uint8_t *left_pred_buf[MAX_MB_PLANE];
- int left_pred_stride[MAX_MB_PLANE];
- int_mv (*single_newmv)[REF_FRAMES];
- // Pointer to array of motion vectors to use for each ref and their rates
- // Should point to first of 2 arrays in 2D array
- int (*single_newmv_rate)[REF_FRAMES];
- int (*single_newmv_valid)[REF_FRAMES];
- // Pointer to array of predicted rate-distortion
- // Should point to first of 2 arrays in 2D array
- int64_t (*modelled_rd)[MAX_REF_MV_SERCH][REF_FRAMES];
- InterpFilter single_filter[MB_MODE_COUNT][REF_FRAMES];
- int ref_frame_cost;
- int single_comp_cost;
- int64_t (*simple_rd)[MAX_REF_MV_SERCH][REF_FRAMES];
- int skip_motion_mode;
- INTERINTRA_MODE *inter_intra_mode;
-} HandleInterModeArgs;
-
-/* If the current mode shares the same mv with other modes with higher cost,
- * skip this mode. */
-static int skip_repeated_mv(const AV1_COMMON *const cm,
- const MACROBLOCK *const x,
- PREDICTION_MODE this_mode,
- const MV_REFERENCE_FRAME ref_frames[2],
- InterModeSearchState *search_state) {
- const int is_comp_pred = ref_frames[1] > INTRA_FRAME;
- const uint8_t ref_frame_type = av1_ref_frame_type(ref_frames);
- const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
- const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
- PREDICTION_MODE compare_mode = MB_MODE_COUNT;
- if (!is_comp_pred) {
- if (this_mode == NEARMV) {
- if (ref_mv_count == 0) {
- // NEARMV has the same motion vector as NEARESTMV
- compare_mode = NEARESTMV;
- }
- if (ref_mv_count == 1 &&
- cm->global_motion[ref_frames[0]].wmtype <= TRANSLATION) {
- // NEARMV has the same motion vector as GLOBALMV
- compare_mode = GLOBALMV;
- }
- }
- if (this_mode == GLOBALMV) {
- if (ref_mv_count == 0 &&
- cm->global_motion[ref_frames[0]].wmtype <= TRANSLATION) {
- // GLOBALMV has the same motion vector as NEARESTMV
- compare_mode = NEARESTMV;
- }
- if (ref_mv_count == 1) {
- // GLOBALMV has the same motion vector as NEARMV
- compare_mode = NEARMV;
- }
- }
-
- if (compare_mode != MB_MODE_COUNT) {
- // Use modelled_rd to check whether compare mode was searched
- if (search_state->modelled_rd[compare_mode][0][ref_frames[0]] !=
- INT64_MAX) {
- const int16_t mode_ctx =
- av1_mode_context_analyzer(mbmi_ext->mode_context, ref_frames);
- const int compare_cost = cost_mv_ref(x, compare_mode, mode_ctx);
- const int this_cost = cost_mv_ref(x, this_mode, mode_ctx);
-
- // Only skip if the mode cost is larger than compare mode cost
- if (this_cost > compare_cost) {
- search_state->modelled_rd[this_mode][0][ref_frames[0]] =
- search_state->modelled_rd[compare_mode][0][ref_frames[0]];
- return 1;
- }
- }
- }
- }
- return 0;
-}
-
-static INLINE int clamp_and_check_mv(int_mv *out_mv, int_mv in_mv,
- const AV1_COMMON *cm,
- const MACROBLOCK *x) {
- const MACROBLOCKD *const xd = &x->e_mbd;
- *out_mv = in_mv;
- lower_mv_precision(&out_mv->as_mv, cm->allow_high_precision_mv,
- cm->cur_frame_force_integer_mv);
- clamp_mv2(&out_mv->as_mv, xd);
- return !mv_check_bounds(&x->mv_limits, &out_mv->as_mv);
-}
-
-static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
- const BLOCK_SIZE bsize, int_mv *cur_mv,
- const int mi_row, const int mi_col,
- int *const rate_mv,
- HandleInterModeArgs *const args) {
- const MACROBLOCKD *const xd = &x->e_mbd;
- const MB_MODE_INFO *const mbmi = xd->mi[0];
- const int is_comp_pred = has_second_ref(mbmi);
- const PREDICTION_MODE this_mode = mbmi->mode;
- const int refs[2] = { mbmi->ref_frame[0],
- mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
- const int ref_mv_idx = mbmi->ref_mv_idx;
- int i;
-
- (void)args;
-
- if (is_comp_pred) {
- if (this_mode == NEW_NEWMV) {
- cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int;
- cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int;
-
- if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
- joint_motion_search(cpi, x, bsize, cur_mv, mi_row, mi_col, NULL, NULL,
- 0, rate_mv, 0);
- } else {
- *rate_mv = 0;
- for (i = 0; i < 2; ++i) {
- const int_mv ref_mv = av1_get_ref_mv(x, i);
- av1_set_mvcost(x, i, mbmi->ref_mv_idx);
- *rate_mv +=
- av1_mv_bit_cost(&cur_mv[i].as_mv, &ref_mv.as_mv, x->nmvjointcost,
- x->mvcost, MV_COST_WEIGHT);
- }
- }
- } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) {
- cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int;
- if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
- compound_single_motion_search_interinter(
- cpi, x, bsize, cur_mv, mi_row, mi_col, NULL, 0, rate_mv, 0, 1);
- } else {
- av1_set_mvcost(x, 1,
- mbmi->ref_mv_idx + (this_mode == NEAR_NEWMV ? 1 : 0));
- const int_mv ref_mv = av1_get_ref_mv(x, 1);
- *rate_mv = av1_mv_bit_cost(&cur_mv[1].as_mv, &ref_mv.as_mv,
- x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
- }
- } else {
- assert(this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV);
- cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int;
- if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
- compound_single_motion_search_interinter(
- cpi, x, bsize, cur_mv, mi_row, mi_col, NULL, 0, rate_mv, 0, 0);
- } else {
- const int_mv ref_mv = av1_get_ref_mv(x, 0);
- av1_set_mvcost(x, 0,
- mbmi->ref_mv_idx + (this_mode == NEW_NEARMV ? 1 : 0));
- *rate_mv = av1_mv_bit_cost(&cur_mv[0].as_mv, &ref_mv.as_mv,
- x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
- }
- }
- } else {
- single_motion_search(cpi, x, bsize, mi_row, mi_col, 0, rate_mv);
- if (x->best_mv.as_int == INVALID_MV) return INT64_MAX;
-
- args->single_newmv[ref_mv_idx][refs[0]] = x->best_mv;
- args->single_newmv_rate[ref_mv_idx][refs[0]] = *rate_mv;
- args->single_newmv_valid[ref_mv_idx][refs[0]] = 1;
-
- cur_mv[0].as_int = x->best_mv.as_int;
-
-#if USE_DISCOUNT_NEWMV_TEST
- // Estimate the rate implications of a new mv but discount this
- // under certain circumstances where we want to help initiate a weak
- // motion field, where the distortion gain for a single block may not
- // be enough to overcome the cost of a new mv.
- if (discount_newmv_test(cpi, x, this_mode, x->best_mv)) {
- *rate_mv = AOMMAX(*rate_mv / NEW_MV_DISCOUNT_FACTOR, 1);
- }
-#endif
- }
-
- return 0;
-}
-
-static INLINE void swap_dst_buf(MACROBLOCKD *xd, const BUFFER_SET *dst_bufs[2],
- int num_planes) {
- const BUFFER_SET *buf0 = dst_bufs[0];
- dst_bufs[0] = dst_bufs[1];
- dst_bufs[1] = buf0;
- restore_dst_buf(xd, *dst_bufs[0], num_planes);
-}
-
-static INLINE int get_switchable_rate(MACROBLOCK *const x,
- const InterpFilters filters,
- const int ctx[2]) {
- int inter_filter_cost;
- const InterpFilter filter0 = av1_extract_interp_filter(filters, 0);
- const InterpFilter filter1 = av1_extract_interp_filter(filters, 1);
- inter_filter_cost = x->switchable_interp_costs[ctx[0]][filter0];
- inter_filter_cost += x->switchable_interp_costs[ctx[1]][filter1];
- return SWITCHABLE_INTERP_RATE_FACTOR * inter_filter_cost;
-}
-
-// calculate the rdcost of given interpolation_filter
-static INLINE int64_t interpolation_filter_rd(
- MACROBLOCK *const x, const AV1_COMP *const cpi, BLOCK_SIZE bsize,
- int mi_row, int mi_col, BUFFER_SET *const orig_dst, int64_t *const rd,
- int *const switchable_rate, int *const skip_txfm_sb,
- int64_t *const skip_sse_sb, const BUFFER_SET *dst_bufs[2], int filter_idx,
- const int switchable_ctx[2], const int skip_pred, int *rate,
- int64_t *dist) {
- const AV1_COMMON *cm = &cpi->common;
- const int num_planes = av1_num_planes(cm);
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = xd->mi[0];
- int tmp_rate[2], tmp_skip_sb[2] = { 1, 1 };
- int64_t tmp_dist[2], tmp_skip_sse[2] = { 0, 0 };
-
- const InterpFilters last_best = mbmi->interp_filters;
- mbmi->interp_filters = filter_sets[filter_idx];
- const int tmp_rs =
- get_switchable_rate(x, mbmi->interp_filters, switchable_ctx);
-
- assert(skip_pred != 2);
- assert((skip_pred >= 0) && (skip_pred <= cpi->default_interp_skip_flags));
- assert(rate[0] >= 0);
- assert(dist[0] >= 0);
- assert((skip_txfm_sb[0] == 0) || (skip_txfm_sb[0] == 1));
- assert(skip_sse_sb[0] >= 0);
- assert(rate[1] >= 0);
- assert(dist[1] >= 0);
- assert((skip_txfm_sb[1] == 0) || (skip_txfm_sb[1] == 1));
- assert(skip_sse_sb[1] >= 0);
-
- if (skip_pred != cpi->default_interp_skip_flags) {
- if (skip_pred != DEFAULT_LUMA_INTERP_SKIP_FLAG) {
- av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, orig_dst, bsize);
-#if CONFIG_COLLECT_RD_STATS == 3
- RD_STATS rd_stats_y;
- select_tx_type_yrd(cpi, x, &rd_stats_y, bsize, mi_row, mi_col, INT64_MAX);
- PrintPredictionUnitStats(cpi, x, &rd_stats_y, bsize);
-#endif // CONFIG_COLLECT_RD_STATS == 3
- model_rd_sb_fn[MODELRD_TYPE_INTERP_FILTER](
- cpi, bsize, x, xd, 0, 0, mi_row, mi_col, &tmp_rate[0], &tmp_dist[0],
- &tmp_skip_sb[0], &tmp_skip_sse[0], NULL, NULL, NULL);
- tmp_rate[1] = tmp_rate[0];
- tmp_dist[1] = tmp_dist[0];
- } else {
- // only luma MC is skipped
- tmp_rate[1] = rate[0];
- tmp_dist[1] = dist[0];
- }
- if (num_planes > 1) {
- for (int plane = 1; plane < num_planes; ++plane) {
- int tmp_rate_uv, tmp_skip_sb_uv;
- int64_t tmp_dist_uv, tmp_skip_sse_uv;
- int64_t tmp_rd = RDCOST(x->rdmult, tmp_rs + tmp_rate[1], tmp_dist[1]);
- if (tmp_rd >= *rd) {
- mbmi->interp_filters = last_best;
- return 0;
- }
- av1_build_inter_predictors_sbp(cm, xd, mi_row, mi_col, orig_dst, bsize,
- plane);
- model_rd_sb_fn[MODELRD_TYPE_INTERP_FILTER](
- cpi, bsize, x, xd, plane, plane, mi_row, mi_col, &tmp_rate_uv,
- &tmp_dist_uv, &tmp_skip_sb_uv, &tmp_skip_sse_uv, NULL, NULL, NULL);
- tmp_rate[1] =
- (int)AOMMIN(((int64_t)tmp_rate[1] + (int64_t)tmp_rate_uv), INT_MAX);
- tmp_dist[1] += tmp_dist_uv;
- tmp_skip_sb[1] &= tmp_skip_sb_uv;
- tmp_skip_sse[1] += tmp_skip_sse_uv;
- }
- }
- } else {
- // both luma and chroma MC is skipped
- tmp_rate[1] = rate[1];
- tmp_dist[1] = dist[1];
- }
- int64_t tmp_rd = RDCOST(x->rdmult, tmp_rs + tmp_rate[1], tmp_dist[1]);
-
- if (tmp_rd < *rd) {
- *rd = tmp_rd;
- *switchable_rate = tmp_rs;
- if (skip_pred != cpi->default_interp_skip_flags) {
- if (skip_pred == 0) {
- // Overwrite the data as current filter is the best one
- tmp_skip_sb[1] = tmp_skip_sb[0] & tmp_skip_sb[1];
- tmp_skip_sse[1] = tmp_skip_sse[0] + tmp_skip_sse[1];
- memcpy(rate, tmp_rate, sizeof(*rate) * 2);
- memcpy(dist, tmp_dist, sizeof(*dist) * 2);
- memcpy(skip_txfm_sb, tmp_skip_sb, sizeof(*skip_txfm_sb) * 2);
- memcpy(skip_sse_sb, tmp_skip_sse, sizeof(*skip_sse_sb) * 2);
- // As luma MC data is computed, no need to recompute after the search
- x->recalc_luma_mc_data = 0;
- } else if (skip_pred == DEFAULT_LUMA_INTERP_SKIP_FLAG) {
- // As luma MC data is not computed, update of luma data can be skipped
- rate[1] = tmp_rate[1];
- dist[1] = tmp_dist[1];
- skip_txfm_sb[1] = skip_txfm_sb[0] & tmp_skip_sb[1];
- skip_sse_sb[1] = skip_sse_sb[0] + tmp_skip_sse[1];
- // As luma MC data is not recomputed and current filter is the best,
- // indicate the possibility of recomputing MC data
- // If current buffer contains valid MC data, toggle to indicate that
- // luma MC data needs to be recomputed
- x->recalc_luma_mc_data ^= 1;
- }
- swap_dst_buf(xd, dst_bufs, num_planes);
- }
- return 1;
- }
- mbmi->interp_filters = last_best;
- return 0;
-}
-
-// Find the best rd filter in horizontal direction
-static INLINE int find_best_horiz_interp_filter_rd(
- MACROBLOCK *const x, const AV1_COMP *const cpi, BLOCK_SIZE bsize,
- int mi_row, int mi_col, BUFFER_SET *const orig_dst, int64_t *const rd,
- int *const switchable_rate, int *const skip_txfm_sb,
- int64_t *const skip_sse_sb, const BUFFER_SET *dst_bufs[2],
- const int switchable_ctx[2], const int skip_hor, int *rate, int64_t *dist,
- int best_dual_mode) {
- int i;
- const int bw = block_size_wide[bsize];
- assert(best_dual_mode == 0);
- if ((bw <= 4) && (skip_hor != cpi->default_interp_skip_flags)) {
- int skip_pred = cpi->default_interp_skip_flags;
- // Process the filters in reverse order to enable reusing rate and
- // distortion (calcuated during EIGHTTAP_REGULAR) for MULTITAP_SHARP
- for (i = (SWITCHABLE_FILTERS - 1); i >= 1; --i) {
- if (interpolation_filter_rd(x, cpi, bsize, mi_row, mi_col, orig_dst, rd,
- switchable_rate, skip_txfm_sb, skip_sse_sb,
- dst_bufs, i, switchable_ctx, skip_pred, rate,
- dist)) {
- best_dual_mode = i;
- }
- skip_pred = skip_hor;
- }
- } else {
- for (i = 1; i < SWITCHABLE_FILTERS; ++i) {
- if (interpolation_filter_rd(x, cpi, bsize, mi_row, mi_col, orig_dst, rd,
- switchable_rate, skip_txfm_sb, skip_sse_sb,
- dst_bufs, i, switchable_ctx, skip_hor, rate,
- dist)) {
- best_dual_mode = i;
- }
- }
- }
- return best_dual_mode;
-}
-
-// Find the best rd filter in vertical direction
-static INLINE void find_best_vert_interp_filter_rd(
- MACROBLOCK *const x, const AV1_COMP *const cpi, BLOCK_SIZE bsize,
- int mi_row, int mi_col, BUFFER_SET *const orig_dst, int64_t *const rd,
- int *const switchable_rate, int *const skip_txfm_sb,
- int64_t *const skip_sse_sb, const BUFFER_SET *dst_bufs[2],
- const int switchable_ctx[2], const int skip_ver, int *rate, int64_t *dist,
- int best_dual_mode, int filter_set_size) {
- int i;
- const int bh = block_size_high[bsize];
- if ((bh <= 4) && (skip_ver != cpi->default_interp_skip_flags)) {
- int skip_pred = cpi->default_interp_skip_flags;
- // Process the filters in reverse order to enable reusing rate and
- // distortion (calcuated during EIGHTTAP_REGULAR) for MULTITAP_SHARP
- assert(filter_set_size == DUAL_FILTER_SET_SIZE);
- for (i = (filter_set_size - SWITCHABLE_FILTERS + best_dual_mode);
- i >= (best_dual_mode + SWITCHABLE_FILTERS); i -= SWITCHABLE_FILTERS) {
- interpolation_filter_rd(x, cpi, bsize, mi_row, mi_col, orig_dst, rd,
- switchable_rate, skip_txfm_sb, skip_sse_sb,
- dst_bufs, i, switchable_ctx, skip_pred, rate,
- dist);
- skip_pred = skip_ver;
- }
- } else {
- for (i = best_dual_mode + SWITCHABLE_FILTERS; i < filter_set_size;
- i += SWITCHABLE_FILTERS) {
- interpolation_filter_rd(x, cpi, bsize, mi_row, mi_col, orig_dst, rd,
- switchable_rate, skip_txfm_sb, skip_sse_sb,
- dst_bufs, i, switchable_ctx, skip_ver, rate,
- dist);
- }
- }
-}
-
-// check if there is saved result match with this search
-static INLINE int is_interp_filter_match(const INTERPOLATION_FILTER_STATS *st,
- MB_MODE_INFO *const mi) {
- for (int i = 0; i < 2; ++i) {
- if ((st->ref_frames[i] != mi->ref_frame[i]) ||
- (st->mv[i].as_int != mi->mv[i].as_int)) {
- return 0;
- }
- }
- if (has_second_ref(mi) && st->comp_type != mi->interinter_comp.type) return 0;
- return 1;
-}
-
-static INLINE int find_interp_filter_in_stats(MACROBLOCK *x,
- MB_MODE_INFO *const mbmi) {
- const int comp_idx = mbmi->compound_idx;
- const int offset = x->interp_filter_stats_idx[comp_idx];
- for (int j = 0; j < offset; ++j) {
- const INTERPOLATION_FILTER_STATS *st = &x->interp_filter_stats[comp_idx][j];
- if (is_interp_filter_match(st, mbmi)) {
- mbmi->interp_filters = st->filters;
- return j;
- }
- }
- return -1; // no match result found
-}
-
-static INLINE void save_interp_filter_search_stat(MACROBLOCK *x,
- MB_MODE_INFO *const mbmi) {
- const int comp_idx = mbmi->compound_idx;
- const int offset = x->interp_filter_stats_idx[comp_idx];
- if (offset < MAX_INTERP_FILTER_STATS) {
- INTERPOLATION_FILTER_STATS stat = { mbmi->interp_filters,
- { mbmi->mv[0], mbmi->mv[1] },
- { mbmi->ref_frame[0],
- mbmi->ref_frame[1] },
- mbmi->interinter_comp.type };
- x->interp_filter_stats[comp_idx][offset] = stat;
- x->interp_filter_stats_idx[comp_idx]++;
- }
-}
-
-static int64_t interpolation_filter_search(
- MACROBLOCK *const x, const AV1_COMP *const cpi, BLOCK_SIZE bsize,
- int mi_row, int mi_col, const BUFFER_SET *const tmp_dst,
- BUFFER_SET *const orig_dst, InterpFilter (*const single_filter)[REF_FRAMES],
- int64_t *const rd, int *const switchable_rate, int *const skip_txfm_sb,
- int64_t *const skip_sse_sb, const int skip_build_pred,
- HandleInterModeArgs *args, int64_t ref_best_rd) {
- const AV1_COMMON *cm = &cpi->common;
- const int num_planes = av1_num_planes(cm);
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = xd->mi[0];
- const int need_search =
- av1_is_interp_needed(xd) && av1_is_interp_search_needed(xd);
- int i;
- // Index 0 corresponds to luma rd data and index 1 corresponds to cummulative
- // data of all planes
- int tmp_rate[2] = { 0, 0 };
- int64_t tmp_dist[2] = { 0, 0 };
- int best_skip_txfm_sb[2] = { 1, 1 };
- int64_t best_skip_sse_sb[2] = { 0, 0 };
- const int ref_frame = xd->mi[0]->ref_frame[0];
-
- (void)single_filter;
- int match_found = -1;
- const InterpFilter assign_filter = cm->interp_filter;
- if (cpi->sf.skip_repeat_interpolation_filter_search && need_search) {
- match_found = find_interp_filter_in_stats(x, mbmi);
- }
- if (!need_search || match_found == -1) {
- set_default_interp_filters(mbmi, assign_filter);
- }
- int switchable_ctx[2];
- switchable_ctx[0] = av1_get_pred_context_switchable_interp(xd, 0);
- switchable_ctx[1] = av1_get_pred_context_switchable_interp(xd, 1);
- *switchable_rate =
- get_switchable_rate(x, mbmi->interp_filters, switchable_ctx);
- if (!skip_build_pred)
- av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize);
-
-#if CONFIG_COLLECT_RD_STATS == 3
- RD_STATS rd_stats_y;
- select_tx_type_yrd(cpi, x, &rd_stats_y, bsize, mi_row, mi_col, INT64_MAX);
- PrintPredictionUnitStats(cpi, x, &rd_stats_y, bsize);
-#endif // CONFIG_COLLECT_RD_STATS == 3
- model_rd_sb_fn[MODELRD_TYPE_INTERP_FILTER](
- cpi, bsize, x, xd, 0, 0, mi_row, mi_col, &tmp_rate[0], &tmp_dist[0],
- &best_skip_txfm_sb[0], &best_skip_sse_sb[0], NULL, NULL, NULL);
- if (num_planes > 1)
- model_rd_sb_fn[MODELRD_TYPE_INTERP_FILTER](
- cpi, bsize, x, xd, 1, num_planes - 1, mi_row, mi_col, &tmp_rate[1],
- &tmp_dist[1], &best_skip_txfm_sb[1], &best_skip_sse_sb[1], NULL, NULL,
- NULL);
- tmp_rate[1] =
- (int)AOMMIN((int64_t)tmp_rate[0] + (int64_t)tmp_rate[1], INT_MAX);
- assert(tmp_rate[1] >= 0);
- tmp_dist[1] = tmp_dist[0] + tmp_dist[1];
- best_skip_txfm_sb[1] = best_skip_txfm_sb[0] & best_skip_txfm_sb[1];
- best_skip_sse_sb[1] = best_skip_sse_sb[0] + best_skip_sse_sb[1];
- *rd = RDCOST(x->rdmult, (*switchable_rate + tmp_rate[1]), tmp_dist[1]);
- *skip_txfm_sb = best_skip_txfm_sb[1];
- *skip_sse_sb = best_skip_sse_sb[1];
- x->pred_sse[ref_frame] = (unsigned int)(best_skip_sse_sb[0] >> 4);
-
- if (assign_filter != SWITCHABLE || match_found != -1) {
- return 0;
- }
- if (!need_search) {
- assert(mbmi->interp_filters ==
- av1_broadcast_interp_filter(EIGHTTAP_REGULAR));
- return 0;
- }
- if (args->modelled_rd != NULL) {
- if (has_second_ref(mbmi)) {
- const int ref_mv_idx = mbmi->ref_mv_idx;
- int refs[2] = { mbmi->ref_frame[0],
- (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
- const int mode0 = compound_ref0_mode(mbmi->mode);
- const int mode1 = compound_ref1_mode(mbmi->mode);
- const int64_t mrd = AOMMIN(args->modelled_rd[mode0][ref_mv_idx][refs[0]],
- args->modelled_rd[mode1][ref_mv_idx][refs[1]]);
- if ((*rd >> 1) > mrd && ref_best_rd < INT64_MAX) {
- return INT64_MAX;
- }
- }
- }
-
- x->recalc_luma_mc_data = 0;
- // skip_flag=xx (in binary form)
- // Setting 0th flag corresonds to skipping luma MC and setting 1st bt
- // corresponds to skipping chroma MC skip_flag=0 corresponds to "Don't skip
- // luma and chroma MC" Skip flag=1 corresponds to "Skip Luma MC only"
- // Skip_flag=2 is not a valid case
- // skip_flag=3 corresponds to "Skip both luma and chroma MC"
- int skip_hor = cpi->default_interp_skip_flags;
- int skip_ver = cpi->default_interp_skip_flags;
- const int is_compound = has_second_ref(mbmi);
- assert(is_intrabc_block(mbmi) == 0);
- for (int j = 0; j < 1 + is_compound; ++j) {
- const RefBuffer *ref_buf = &cm->frame_refs[mbmi->ref_frame[j] - LAST_FRAME];
- const struct scale_factors *const sf = &ref_buf->sf;
- // TODO(any): Refine skip flag calculation considering scaling
- if (av1_is_scaled(sf)) {
- skip_hor = 0;
- skip_ver = 0;
- break;
- }
- const MV mv = mbmi->mv[j].as_mv;
- int skip_hor_plane = 0;
- int skip_ver_plane = 0;
- for (int k = 0; k < AOMMAX(1, (num_planes - 1)); ++k) {
- struct macroblockd_plane *const pd = &xd->plane[k];
- const int bw = pd->width;
- const int bh = pd->height;
- const MV mv_q4 = clamp_mv_to_umv_border_sb(
- xd, &mv, bw, bh, pd->subsampling_x, pd->subsampling_y);
- const int sub_x = (mv_q4.col & SUBPEL_MASK) << SCALE_EXTRA_BITS;
- const int sub_y = (mv_q4.row & SUBPEL_MASK) << SCALE_EXTRA_BITS;
- skip_hor_plane |= ((sub_x == 0) << k);
- skip_ver_plane |= ((sub_y == 0) << k);
- }
- skip_hor = skip_hor & skip_hor_plane;
- skip_ver = skip_ver & skip_ver_plane;
- // It is not valid that "luma MV is sub-pel, whereas chroma MV is not"
- assert(skip_hor != 2);
- assert(skip_ver != 2);
- }
- // When compond prediction type is compound segment wedge, luma MC and chroma
- // MC need to go hand in hand as mask generated during luma MC is reuired for
- // chroma MC. If skip_hor = 0 and skip_ver = 1, mask used for chroma MC during
- // vertical filter decision may be incorrect as temporary MC evaluation
- // overwrites the mask. Make skip_ver as 0 for this case so that mask is
- // populated during luma MC
- if (is_compound && mbmi->compound_idx == 1 &&
- mbmi->interinter_comp.type == COMPOUND_DIFFWTD) {
- assert(mbmi->comp_group_idx == 1);
- if (skip_hor == 0 && skip_ver == 1) skip_ver = 0;
- }
- // do interp_filter search
- const int filter_set_size = DUAL_FILTER_SET_SIZE;
- restore_dst_buf(xd, *tmp_dst, num_planes);
- const BUFFER_SET *dst_bufs[2] = { tmp_dst, orig_dst };
- if (cpi->sf.use_fast_interpolation_filter_search &&
- cm->seq_params.enable_dual_filter) {
- // default to (R,R): EIGHTTAP_REGULARxEIGHTTAP_REGULAR
- int best_dual_mode = 0;
- // Find best of {R}x{R,Sm,Sh}
- // EIGHTTAP_REGULAR mode is calculated beforehand
- best_dual_mode = find_best_horiz_interp_filter_rd(
- x, cpi, bsize, mi_row, mi_col, orig_dst, rd, switchable_rate,
- best_skip_txfm_sb, best_skip_sse_sb, dst_bufs, switchable_ctx, skip_hor,
- tmp_rate, tmp_dist, best_dual_mode);
-
- // From best of horizontal EIGHTTAP_REGULAR modes, check vertical modes
- find_best_vert_interp_filter_rd(
- x, cpi, bsize, mi_row, mi_col, orig_dst, rd, switchable_rate,
- best_skip_txfm_sb, best_skip_sse_sb, dst_bufs, switchable_ctx, skip_ver,
- tmp_rate, tmp_dist, best_dual_mode, filter_set_size);
- } else {
- // EIGHTTAP_REGULAR mode is calculated beforehand
- for (i = 1; i < filter_set_size; ++i) {
- if (cm->seq_params.enable_dual_filter == 0) {
- const int16_t filter_y = filter_sets[i] & 0xffff;
- const int16_t filter_x = filter_sets[i] >> 16;
- if (filter_x != filter_y) continue;
- }
- interpolation_filter_rd(x, cpi, bsize, mi_row, mi_col, orig_dst, rd,
- switchable_rate, best_skip_txfm_sb,
- best_skip_sse_sb, dst_bufs, i, switchable_ctx, 0,
- tmp_rate, tmp_dist);
- assert(x->recalc_luma_mc_data == 0);
- }
- }
- swap_dst_buf(xd, dst_bufs, num_planes);
- // Recompute final MC data if required
- if (x->recalc_luma_mc_data == 1) {
- // Recomputing final luma MC data is required only if the same was skipped
- // in either of the directions Condition below is necessary, but not
- // sufficient
- assert((skip_hor == 1) || (skip_ver == 1));
- av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, orig_dst, bsize);
- }
- *skip_txfm_sb = best_skip_txfm_sb[1];
- *skip_sse_sb = best_skip_sse_sb[1];
- x->pred_sse[ref_frame] = (unsigned int)(best_skip_sse_sb[0] >> 4);
-
- // save search results
- if (cpi->sf.skip_repeat_interpolation_filter_search) {
- assert(match_found == -1);
- save_interp_filter_search_stat(x, mbmi);
- }
- return 0;
-}
-
-static int txfm_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
- int mi_row, int mi_col, RD_STATS *rd_stats,
- RD_STATS *rd_stats_y, RD_STATS *rd_stats_uv,
- int mode_rate, int64_t ref_best_rd) {
- /*
- * This function combines y and uv planes' transform search processes
- * together, when the prediction is generated. It first does subtration to
- * obtain the prediction error. Then it calls
- * select_tx_type_yrd/super_block_yrd and inter_block_uvrd sequentially and
- * handles the early terminations happen in those functions. At the end, it
- * computes the rd_stats/_y/_uv accordingly.
- */
- const AV1_COMMON *cm = &cpi->common;
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = xd->mi[0];
- int skip_txfm_sb = 0;
- const int num_planes = av1_num_planes(cm);
- const int ref_frame_1 = mbmi->ref_frame[1];
- const int64_t mode_rd = RDCOST(x->rdmult, mode_rate, 0);
- const int64_t rd_thresh =
- ref_best_rd == INT64_MAX ? INT64_MAX : ref_best_rd - mode_rd;
- const int skip_ctx = av1_get_skip_context(xd);
- const int64_t min_header_rate =
- mode_rate + AOMMIN(x->skip_cost[skip_ctx][0], x->skip_cost[skip_ctx][1]);
- // Account for minimum skip and non_skip rd.
- // Eventually either one of them will be added to mode_rate
- const int64_t min_header_rd_possible = RDCOST(x->rdmult, min_header_rate, 0);
-
- if (min_header_rd_possible > ref_best_rd) {
- av1_invalid_rd_stats(rd_stats_y);
- av1_invalid_rd_stats(rd_stats);
- return 0;
- }
-
- av1_init_rd_stats(rd_stats);
- av1_init_rd_stats(rd_stats_y);
- av1_init_rd_stats(rd_stats_uv);
- rd_stats->rate = mode_rate;
-
- if (!cpi->common.all_lossless)
- check_block_skip(cpi, bsize, x, xd, 0, num_planes - 1, &skip_txfm_sb);
- if (!skip_txfm_sb) {
- int64_t non_skip_rdcosty = INT64_MAX;
- int64_t skip_rdcosty = INT64_MAX;
- int64_t min_rdcosty = INT64_MAX;
- int is_cost_valid_uv = 0;
-
- // cost and distortion
- av1_subtract_plane(x, bsize, 0);
- if (cm->tx_mode == TX_MODE_SELECT && !xd->lossless[mbmi->segment_id]) {
- // Motion mode
- select_tx_type_yrd(cpi, x, rd_stats_y, bsize, mi_row, mi_col, rd_thresh);
-#if CONFIG_COLLECT_RD_STATS == 2
- PrintPredictionUnitStats(cpi, x, rd_stats_y, bsize);
-#endif // CONFIG_COLLECT_RD_STATS == 2
- } else {
- super_block_yrd(cpi, x, rd_stats_y, bsize, rd_thresh);
- memset(mbmi->inter_tx_size, mbmi->tx_size, sizeof(mbmi->inter_tx_size));
- for (int i = 0; i < xd->n4_h * xd->n4_w; ++i)
- set_blk_skip(x, 0, i, rd_stats_y->skip);
- }
-
- if (rd_stats_y->rate == INT_MAX) {
- av1_invalid_rd_stats(rd_stats);
- // TODO(angiebird): check if we need this
- // restore_dst_buf(xd, *orig_dst, num_planes);
- mbmi->ref_frame[1] = ref_frame_1;
- return 0;
- }
-
- av1_merge_rd_stats(rd_stats, rd_stats_y);
-
- non_skip_rdcosty = RDCOST(
- x->rdmult, rd_stats->rate + x->skip_cost[skip_ctx][0], rd_stats->dist);
- skip_rdcosty =
- RDCOST(x->rdmult, mode_rate + x->skip_cost[skip_ctx][1], rd_stats->sse);
- min_rdcosty = AOMMIN(non_skip_rdcosty, skip_rdcosty);
-
- if (min_rdcosty > ref_best_rd) {
- int64_t tokenonly_rdy =
- AOMMIN(RDCOST(x->rdmult, rd_stats_y->rate, rd_stats_y->dist),
- RDCOST(x->rdmult, 0, rd_stats_y->sse));
- // Invalidate rd_stats_y to skip the rest of the motion modes search
- if (tokenonly_rdy - (tokenonly_rdy >> cpi->sf.adaptive_txb_search_level) >
- rd_thresh)
- av1_invalid_rd_stats(rd_stats_y);
- mbmi->ref_frame[1] = ref_frame_1;
- return 0;
- }
-
- if (num_planes > 1) {
- /* clang-format off */
- is_cost_valid_uv =
- inter_block_uvrd(cpi, x, rd_stats_uv, bsize,
- ref_best_rd - non_skip_rdcosty,
- ref_best_rd - skip_rdcosty, FTXS_NONE);
- if (!is_cost_valid_uv) {
- mbmi->ref_frame[1] = ref_frame_1;
- return 0;
- }
- /* clang-format on */
- av1_merge_rd_stats(rd_stats, rd_stats_uv);
- } else {
- av1_init_rd_stats(rd_stats_uv);
- }
- if (rd_stats->skip) {
- rd_stats->rate -= rd_stats_uv->rate + rd_stats_y->rate;
- rd_stats_y->rate = 0;
- rd_stats_uv->rate = 0;
- rd_stats->rate += x->skip_cost[skip_ctx][1];
- mbmi->skip = 0;
- // here mbmi->skip temporarily plays a role as what this_skip2 does
-
- int64_t tmprd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
- if (tmprd > ref_best_rd) {
- mbmi->ref_frame[1] = ref_frame_1;
- return 0;
- }
- } else if (!xd->lossless[mbmi->segment_id] &&
- (RDCOST(x->rdmult,
- rd_stats_y->rate + rd_stats_uv->rate +
- x->skip_cost[skip_ctx][0],
- rd_stats->dist) >=
- RDCOST(x->rdmult, x->skip_cost[skip_ctx][1], rd_stats->sse))) {
- rd_stats->rate -= rd_stats_uv->rate + rd_stats_y->rate;
- rd_stats->rate += x->skip_cost[skip_ctx][1];
- rd_stats->dist = rd_stats->sse;
- rd_stats_y->rate = 0;
- rd_stats_uv->rate = 0;
- mbmi->skip = 1;
- } else {
- rd_stats->rate += x->skip_cost[skip_ctx][0];
- mbmi->skip = 0;
- }
- } else {
- x->skip = 1;
- mbmi->tx_size = tx_size_from_tx_mode(bsize, cm->tx_mode);
- // The cost of skip bit needs to be added.
- mbmi->skip = 0;
- rd_stats->rate += x->skip_cost[skip_ctx][1];
-
- rd_stats->dist = 0;
- rd_stats->sse = 0;
- rd_stats_y->rate = 0;
- rd_stats_uv->rate = 0;
- rd_stats->skip = 1;
- int64_t tmprd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
- if (tmprd > ref_best_rd) {
- mbmi->ref_frame[1] = ref_frame_1;
- return 0;
- }
- }
- return 1;
-}
-
-static int handle_inter_intra_mode(const AV1_COMP *const cpi,
- MACROBLOCK *const x, BLOCK_SIZE bsize,
- int mi_row, int mi_col, MB_MODE_INFO *mbmi,
- HandleInterModeArgs *args,
- int64_t ref_best_rd, int *rate_mv,
- int *tmp_rate2, BUFFER_SET *orig_dst) {
- const AV1_COMMON *const cm = &cpi->common;
- const int num_planes = av1_num_planes(cm);
- MACROBLOCKD *xd = &x->e_mbd;
-
- INTERINTRA_MODE best_interintra_mode = II_DC_PRED;
- int64_t rd, best_interintra_rd = INT64_MAX;
- int rmode, rate_sum;
- int64_t dist_sum;
- int tmp_rate_mv = 0;
- int tmp_skip_txfm_sb;
- int bw = block_size_wide[bsize];
- int64_t tmp_skip_sse_sb;
- DECLARE_ALIGNED(16, uint8_t, tmp_buf_[2 * MAX_INTERINTRA_SB_SQUARE]);
- DECLARE_ALIGNED(16, uint8_t, intrapred_[2 * MAX_INTERINTRA_SB_SQUARE]);
- uint8_t *tmp_buf = get_buf_by_bd(xd, tmp_buf_);
- uint8_t *intrapred = get_buf_by_bd(xd, intrapred_);
- const int *const interintra_mode_cost =
- x->interintra_mode_cost[size_group_lookup[bsize]];
- const int_mv mv0 = mbmi->mv[0];
- const int is_wedge_used = is_interintra_wedge_used(bsize);
- int rwedge = is_wedge_used ? x->wedge_interintra_cost[bsize][0] : 0;
- mbmi->ref_frame[1] = NONE_FRAME;
- xd->plane[0].dst.buf = tmp_buf;
- xd->plane[0].dst.stride = bw;
- av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, NULL, bsize);
-
- restore_dst_buf(xd, *orig_dst, num_planes);
- mbmi->ref_frame[1] = INTRA_FRAME;
- mbmi->use_wedge_interintra = 0;
- best_interintra_mode = args->inter_intra_mode[mbmi->ref_frame[0]];
- int j = 0;
- if (cpi->sf.reuse_inter_intra_mode == 0 ||
- best_interintra_mode == INTERINTRA_MODES) {
- for (j = 0; j < INTERINTRA_MODES; ++j) {
- mbmi->interintra_mode = (INTERINTRA_MODE)j;
- rmode = interintra_mode_cost[mbmi->interintra_mode];
- av1_build_intra_predictors_for_interintra(cm, xd, bsize, 0, orig_dst,
- intrapred, bw);
- av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw);
- model_rd_sb_fn[MODELRD_TYPE_INTERINTRA](
- cpi, bsize, x, xd, 0, 0, mi_row, mi_col, &rate_sum, &dist_sum,
- &tmp_skip_txfm_sb, &tmp_skip_sse_sb, NULL, NULL, NULL);
- rd = RDCOST(x->rdmult, tmp_rate_mv + rate_sum + rmode, dist_sum);
- if (rd < best_interintra_rd) {
- best_interintra_rd = rd;
- best_interintra_mode = mbmi->interintra_mode;
- }
- }
- args->inter_intra_mode[mbmi->ref_frame[0]] = best_interintra_mode;
- }
- if (j == 0 || best_interintra_mode != II_SMOOTH_PRED) {
- mbmi->interintra_mode = best_interintra_mode;
- rmode = interintra_mode_cost[mbmi->interintra_mode];
- av1_build_intra_predictors_for_interintra(cm, xd, bsize, 0, orig_dst,
- intrapred, bw);
- av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw);
- }
- rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
- &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
- if (rd != INT64_MAX)
- rd = RDCOST(x->rdmult, *rate_mv + rmode + rate_sum + rwedge, dist_sum);
- best_interintra_rd = rd;
- if (ref_best_rd < INT64_MAX && (best_interintra_rd >> 1) > ref_best_rd) {
- return -1;
- }
- if (is_wedge_used) {
- int64_t best_interintra_rd_nowedge = rd;
- int64_t best_interintra_rd_wedge = INT64_MAX;
- int_mv tmp_mv;
- // Disable wedge search if source variance is small
- if (x->source_variance > cpi->sf.disable_wedge_search_var_thresh) {
- mbmi->use_wedge_interintra = 1;
-
- rwedge = av1_cost_literal(get_interintra_wedge_bits(bsize)) +
- x->wedge_interintra_cost[bsize][1];
-
- best_interintra_rd_wedge =
- pick_interintra_wedge(cpi, x, bsize, intrapred_, tmp_buf_);
-
- best_interintra_rd_wedge +=
- RDCOST(x->rdmult, rmode + *rate_mv + rwedge, 0);
- rd = INT64_MAX;
- // Refine motion vector.
- if (have_newmv_in_inter_mode(mbmi->mode)) {
- // get negative of mask
- const uint8_t *mask = av1_get_contiguous_soft_mask(
- mbmi->interintra_wedge_index, 1, bsize);
- tmp_mv = mbmi->mv[0];
- compound_single_motion_search(cpi, x, bsize, &tmp_mv.as_mv, mi_row,
- mi_col, intrapred, mask, bw, &tmp_rate_mv,
- 0);
- if (mbmi->mv[0].as_int != tmp_mv.as_int) {
- mbmi->mv[0].as_int = tmp_mv.as_int;
- av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, orig_dst,
- bsize);
- model_rd_sb_fn[MODELRD_TYPE_MASKED_COMPOUND](
- cpi, bsize, x, xd, 0, 0, mi_row, mi_col, &rate_sum, &dist_sum,
- &tmp_skip_txfm_sb, &tmp_skip_sse_sb, NULL, NULL, NULL);
- rd = RDCOST(x->rdmult, tmp_rate_mv + rmode + rate_sum + rwedge,
- dist_sum);
- }
- }
- if (rd >= best_interintra_rd_wedge) {
- tmp_mv.as_int = mv0.as_int;
- tmp_rate_mv = *rate_mv;
- av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw);
- }
- // Evaluate closer to true rd
- rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
- &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
- if (rd != INT64_MAX)
- rd = RDCOST(x->rdmult, rmode + tmp_rate_mv + rwedge + rate_sum,
- dist_sum);
- best_interintra_rd_wedge = rd;
- if (best_interintra_rd_wedge < best_interintra_rd_nowedge) {
- mbmi->use_wedge_interintra = 1;
- mbmi->mv[0].as_int = tmp_mv.as_int;
- *tmp_rate2 += tmp_rate_mv - *rate_mv;
- *rate_mv = tmp_rate_mv;
- } else {
- mbmi->use_wedge_interintra = 0;
- mbmi->mv[0].as_int = mv0.as_int;
- av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, orig_dst, bsize);
- }
- } else {
- mbmi->use_wedge_interintra = 0;
- }
- } // if (is_interintra_wedge_used(bsize))
- if (num_planes > 1) {
- av1_build_inter_predictors_sbuv(cm, xd, mi_row, mi_col, orig_dst, bsize);
- }
- return 0;
-}
-
-// TODO(afergs): Refactor the MBMI references in here - there's four
-// TODO(afergs): Refactor optional args - add them to a struct or remove
-static int64_t motion_mode_rd(const AV1_COMP *const cpi, MACROBLOCK *const x,
- BLOCK_SIZE bsize, RD_STATS *rd_stats,
- RD_STATS *rd_stats_y, RD_STATS *rd_stats_uv,
- int *disable_skip, int mi_row, int mi_col,
- HandleInterModeArgs *const args,
- int64_t ref_best_rd, const int *refs,
- int *rate_mv, BUFFER_SET *orig_dst
-#if CONFIG_COLLECT_INTER_MODE_RD_STATS
- ,
- TileDataEnc *tile_data, int64_t *best_est_rd,
- int do_tx_search, InterModesInfo *inter_modes_info
-#endif
-) {
- const AV1_COMMON *const cm = &cpi->common;
- const int num_planes = av1_num_planes(cm);
- MACROBLOCKD *xd = &x->e_mbd;
- MB_MODE_INFO *mbmi = xd->mi[0];
- const int is_comp_pred = has_second_ref(mbmi);
- const PREDICTION_MODE this_mode = mbmi->mode;
- const int rate2_nocoeff = rd_stats->rate;
- int best_xskip, best_disable_skip = 0;
- RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
- MB_MODE_INFO base_mbmi, best_mbmi;
- uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
- const int rate_mv0 = *rate_mv;
-
- int interintra_allowed = cm->seq_params.enable_interintra_compound &&
- is_interintra_allowed(mbmi) && mbmi->compound_idx;
- int pts0[SAMPLES_ARRAY_SIZE], pts_inref0[SAMPLES_ARRAY_SIZE];
-
- assert(mbmi->ref_frame[1] != INTRA_FRAME);
- const MV_REFERENCE_FRAME ref_frame_1 = mbmi->ref_frame[1];
- av1_invalid_rd_stats(&best_rd_stats);
- aom_clear_system_state();
- mbmi->num_proj_ref = 1; // assume num_proj_ref >=1
- MOTION_MODE last_motion_mode_allowed = SIMPLE_TRANSLATION;
- if (cm->switchable_motion_mode) {
- last_motion_mode_allowed = motion_mode_allowed(xd->global_motion, xd, mbmi,
- cm->allow_warped_motion);
- }
- if (last_motion_mode_allowed == WARPED_CAUSAL) {
- mbmi->num_proj_ref = findSamples(cm, xd, mi_row, mi_col, pts0, pts_inref0);
- }
- int total_samples = mbmi->num_proj_ref;
- if (total_samples == 0) {
- last_motion_mode_allowed = OBMC_CAUSAL;
- }
- base_mbmi = *mbmi;
-
- const int switchable_rate =
- av1_is_interp_needed(xd) ? av1_get_switchable_rate(cm, x, xd) : 0;
- int64_t best_rd = INT64_MAX;
- int best_rate_mv = rate_mv0;
- for (int mode_index = (int)SIMPLE_TRANSLATION;
- mode_index <= (int)last_motion_mode_allowed + interintra_allowed;
- mode_index++) {
- if (args->skip_motion_mode && mode_index) continue;
- int64_t tmp_rd = INT64_MAX;
- int tmp_rate2 = rate2_nocoeff;
- int is_interintra_mode = mode_index > (int)last_motion_mode_allowed;
- int skip_txfm_sb = 0;
- int tmp_rate_mv = rate_mv0;
-
- *mbmi = base_mbmi;
- if (is_interintra_mode) {
- mbmi->motion_mode = SIMPLE_TRANSLATION;
- } else {
- mbmi->motion_mode = (MOTION_MODE)mode_index;
- assert(mbmi->ref_frame[1] != INTRA_FRAME);
- }
-
- if (mbmi->motion_mode == SIMPLE_TRANSLATION && !is_interintra_mode) {
- // SIMPLE_TRANSLATION mode: no need to recalculate.
- // The prediction is calculated before motion_mode_rd() is called in
- // handle_inter_mode()
- } else if (mbmi->motion_mode == OBMC_CAUSAL) {
- uint32_t cur_mv = mbmi->mv[0].as_int;
- assert(!is_comp_pred);
- if (have_newmv_in_inter_mode(this_mode)) {
- single_motion_search(cpi, x, bsize, mi_row, mi_col, 0, &tmp_rate_mv);
- mbmi->mv[0].as_int = x->best_mv.as_int;
-#if USE_DISCOUNT_NEWMV_TEST
- if (discount_newmv_test(cpi, x, this_mode, mbmi->mv[0])) {
- tmp_rate_mv = AOMMAX((tmp_rate_mv / NEW_MV_DISCOUNT_FACTOR), 1);
- }
-#endif
- tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
- }
- if (mbmi->mv[0].as_int != cur_mv) {
- av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize);
- }
- av1_build_obmc_inter_prediction(
- cm, xd, mi_row, mi_col, args->above_pred_buf, args->above_pred_stride,
- args->left_pred_buf, args->left_pred_stride);
- } else if (mbmi->motion_mode == WARPED_CAUSAL) {
- int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
- mbmi->motion_mode = WARPED_CAUSAL;
- mbmi->wm_params.wmtype = DEFAULT_WMTYPE;
- mbmi->interp_filters = av1_broadcast_interp_filter(
- av1_unswitchable_filter(cm->interp_filter));
-
- memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0));
- memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0));
- // Select the samples according to motion vector difference
- if (mbmi->num_proj_ref > 1) {
- mbmi->num_proj_ref = selectSamples(&mbmi->mv[0].as_mv, pts, pts_inref,
- mbmi->num_proj_ref, bsize);
- }
-
- if (!find_projection(mbmi->num_proj_ref, pts, pts_inref, bsize,
- mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col,
- &mbmi->wm_params, mi_row, mi_col)) {
- // Refine MV for NEWMV mode
- assert(!is_comp_pred);
- if (have_newmv_in_inter_mode(this_mode)) {
- const int_mv mv0 = mbmi->mv[0];
- const WarpedMotionParams wm_params0 = mbmi->wm_params;
- int num_proj_ref0 = mbmi->num_proj_ref;
-
- // Refine MV in a small range.
- av1_refine_warped_mv(cpi, x, bsize, mi_row, mi_col, pts0, pts_inref0,
- total_samples);
-
- // Keep the refined MV and WM parameters.
- if (mv0.as_int != mbmi->mv[0].as_int) {
- const int ref = refs[0];
- const int_mv ref_mv = av1_get_ref_mv(x, 0);
- tmp_rate_mv =
- av1_mv_bit_cost(&mbmi->mv[0].as_mv, &ref_mv.as_mv,
- x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
-
- if (cpi->sf.adaptive_motion_search)
- x->pred_mv[ref] = mbmi->mv[0].as_mv;
-
-#if USE_DISCOUNT_NEWMV_TEST
- if (discount_newmv_test(cpi, x, this_mode, mbmi->mv[0])) {
- tmp_rate_mv = AOMMAX((tmp_rate_mv / NEW_MV_DISCOUNT_FACTOR), 1);
- }
-#endif
- tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
- } else {
- // Restore the old MV and WM parameters.
- mbmi->mv[0] = mv0;
- mbmi->wm_params = wm_params0;
- mbmi->num_proj_ref = num_proj_ref0;
- }
- }
-
- av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
- } else {
- continue;
- }
- } else if (is_interintra_mode) {
- const int ret = handle_inter_intra_mode(
- cpi, x, bsize, mi_row, mi_col, mbmi, args, ref_best_rd, &tmp_rate_mv,
- &tmp_rate2, orig_dst);
- if (ret < 0) continue;
- }
-
- if (!cpi->common.all_lossless)
- check_block_skip(cpi, bsize, x, xd, 0, num_planes - 1, &skip_txfm_sb);
-
- x->skip = 0;
-
- rd_stats->dist = 0;
- rd_stats->sse = 0;
- rd_stats->skip = 1;
- rd_stats->rate = tmp_rate2;
- if (mbmi->motion_mode != WARPED_CAUSAL) rd_stats->rate += switchable_rate;
- if (interintra_allowed) {
- rd_stats->rate += x->interintra_cost[size_group_lookup[bsize]]
- [mbmi->ref_frame[1] == INTRA_FRAME];
- if (mbmi->ref_frame[1] == INTRA_FRAME) {
- rd_stats->rate += x->interintra_mode_cost[size_group_lookup[bsize]]
- [mbmi->interintra_mode];
- if (is_interintra_wedge_used(bsize)) {
- rd_stats->rate +=
- x->wedge_interintra_cost[bsize][mbmi->use_wedge_interintra];
- if (mbmi->use_wedge_interintra) {
- rd_stats->rate +=
- av1_cost_literal(get_interintra_wedge_bits(bsize));
- }
- }
- }
- }
- if ((last_motion_mode_allowed > SIMPLE_TRANSLATION) &&
- (mbmi->ref_frame[1] != INTRA_FRAME)) {
- if (last_motion_mode_allowed == WARPED_CAUSAL) {
- rd_stats->rate += x->motion_mode_cost[bsize][mbmi->motion_mode];
- } else {
- rd_stats->rate += x->motion_mode_cost1[bsize][mbmi->motion_mode];
- }
- }
-
- if (!skip_txfm_sb) {
-#if CONFIG_COLLECT_INTER_MODE_RD_STATS
- int64_t est_rd = 0;
- int est_skip = 0;
- if (cpi->sf.inter_mode_rd_model_estimation && cm->tile_cols == 1 &&
- cm->tile_rows == 1) {
- InterModeRdModel *md = &tile_data->inter_mode_rd_models[mbmi->sb_type];
- if (md->ready) {
- const int64_t curr_sse = get_sse(cpi, x);
- est_rd = get_est_rd(tile_data, mbmi->sb_type, x->rdmult, curr_sse,
- rd_stats->rate);
- est_skip = est_rd * 0.8 > *best_est_rd;
- if (est_skip) {
- mbmi->ref_frame[1] = ref_frame_1;
- continue;
- } else {
- if (est_rd < *best_est_rd) {
- *best_est_rd = est_rd;
- }
- }
- }
- }
-#endif // CONFIG_COLLECT_INTER_MODE_RD_STATS
- }
-
-#if CONFIG_COLLECT_INTER_MODE_RD_STATS
- if (!do_tx_search) {
- const int64_t curr_sse = get_sse(cpi, x);
- int est_residue_cost = 0;
- int64_t est_dist = 0;
- const int has_est_rd = get_est_rate_dist(tile_data, bsize, curr_sse,
- &est_residue_cost, &est_dist);
- (void)has_est_rd;
- assert(has_est_rd);
- const int mode_rate = rd_stats->rate;
- rd_stats->rate += est_residue_cost;
- rd_stats->dist = est_dist;
- rd_stats->rdcost = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
- if (cm->reference_mode == SINGLE_REFERENCE) {
- if (!is_comp_pred) {
- inter_modes_info_push(inter_modes_info, mode_rate, curr_sse,
- rd_stats->rdcost, mbmi);
- }
- } else {
- inter_modes_info_push(inter_modes_info, mode_rate, curr_sse,
- rd_stats->rdcost, mbmi);
- }
- } else {
-#endif
- int mode_rate = rd_stats->rate;
- if (!txfm_search(cpi, x, bsize, mi_row, mi_col, rd_stats, rd_stats_y,
- rd_stats_uv, mode_rate, ref_best_rd)) {
- if (rd_stats_y->rate == INT_MAX && mode_index == 0) {
- return INT64_MAX;
- }
- continue;
- }
- if (!skip_txfm_sb) {
- const int64_t curr_rd =
- RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
- if (curr_rd < ref_best_rd) {
- ref_best_rd = curr_rd;
- }
- *disable_skip = 0;
-#if CONFIG_COLLECT_INTER_MODE_RD_STATS
- if (cpi->sf.inter_mode_rd_model_estimation) {
- const int skip_ctx = av1_get_skip_context(xd);
- inter_mode_data_push(tile_data, mbmi->sb_type, rd_stats->sse,
- rd_stats->dist,
- rd_stats_y->rate + rd_stats_uv->rate +
- x->skip_cost[skip_ctx][mbmi->skip]);
- }
-#endif // CONFIG_COLLECT_INTER_MODE_RD_STATS
- } else {
- *disable_skip = 1;
- }
-#if CONFIG_COLLECT_INTER_MODE_RD_STATS
- }
-#endif
-
- if (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV) {
- if (is_nontrans_global_motion(xd, xd->mi[0])) {
- mbmi->interp_filters = av1_broadcast_interp_filter(
- av1_unswitchable_filter(cm->interp_filter));
- }
- }
-
- tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
- if (mode_index == 0)
- args->simple_rd[this_mode][mbmi->ref_mv_idx][mbmi->ref_frame[0]] = tmp_rd;
- if ((mode_index == 0) || (tmp_rd < best_rd)) {
- best_mbmi = *mbmi;
- best_rd = tmp_rd;
- best_rd_stats = *rd_stats;
- best_rd_stats_y = *rd_stats_y;
- best_rate_mv = tmp_rate_mv;
- if (num_planes > 1) best_rd_stats_uv = *rd_stats_uv;
- memcpy(best_blk_skip, x->blk_skip,
- sizeof(x->blk_skip[0]) * xd->n4_h * xd->n4_w);
- best_xskip = x->skip;
- best_disable_skip = *disable_skip;
- if (best_xskip) break;
- }
- }
- mbmi->ref_frame[1] = ref_frame_1;
- *rate_mv = best_rate_mv;
- if (best_rd == INT64_MAX) {
- av1_invalid_rd_stats(rd_stats);
- restore_dst_buf(xd, *orig_dst, num_planes);
- return INT64_MAX;
- }
- *mbmi = best_mbmi;
- *rd_stats = best_rd_stats;
- *rd_stats_y = best_rd_stats_y;
- if (num_planes > 1) *rd_stats_uv = best_rd_stats_uv;
- memcpy(x->blk_skip, best_blk_skip,
- sizeof(x->blk_skip[0]) * xd->n4_h * xd->n4_w);
- x->skip = best_xskip;
- *disable_skip = best_disable_skip;
-
- restore_dst_buf(xd, *orig_dst, num_planes);
- return 0;
-}
-
-static int64_t skip_mode_rd(RD_STATS *rd_stats, const AV1_COMP *const cpi,
- MACROBLOCK *const x, BLOCK_SIZE bsize, int mi_row,
- int mi_col, BUFFER_SET *const orig_dst) {
- const AV1_COMMON *cm = &cpi->common;
- const int num_planes = av1_num_planes(cm);
- MACROBLOCKD *const xd = &x->e_mbd;
- av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize);
-
- int64_t total_sse = 0;
- for (int plane = 0; plane < num_planes; ++plane) {
- const struct macroblock_plane *const p = &x->plane[plane];
- const struct macroblockd_plane *const pd = &xd->plane[plane];
- const BLOCK_SIZE plane_bsize =
- get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
- const int bw = block_size_wide[plane_bsize];
- const int bh = block_size_high[plane_bsize];
-
- av1_subtract_plane(x, bsize, plane);
- int64_t sse = aom_sum_squares_2d_i16(p->src_diff, bw, bw, bh);
- sse = sse << 4;
- total_sse += sse;
- }
- const int skip_mode_ctx = av1_get_skip_mode_context(xd);
- rd_stats->dist = rd_stats->sse = total_sse;
- rd_stats->rate = x->skip_mode_cost[skip_mode_ctx][1];
- rd_stats->rdcost = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
-
- restore_dst_buf(xd, *orig_dst, num_planes);
- return 0;
-}
-
-static INLINE int get_ref_mv_offset(PREDICTION_MODE single_mode,
- uint8_t ref_mv_idx) {
- assert(is_inter_singleref_mode(single_mode));
- int ref_mv_offset;
- if (single_mode == NEARESTMV) {
- ref_mv_offset = 0;
- } else if (single_mode == NEARMV) {
- ref_mv_offset = ref_mv_idx + 1;
- } else {
- ref_mv_offset = -1;
- }
- return ref_mv_offset;
-}
-
-static INLINE void get_this_mv(int_mv *this_mv, PREDICTION_MODE this_mode,
- int ref_idx, int ref_mv_idx,
- const MV_REFERENCE_FRAME *ref_frame,
- const MB_MODE_INFO_EXT *mbmi_ext) {
- const uint8_t ref_frame_type = av1_ref_frame_type(ref_frame);
- const int is_comp_pred = ref_frame[1] > INTRA_FRAME;
- const PREDICTION_MODE single_mode =
- get_single_mode(this_mode, ref_idx, is_comp_pred);
- assert(is_inter_singleref_mode(single_mode));
- if (single_mode == NEWMV) {
- this_mv->as_int = INVALID_MV;
- } else if (single_mode == GLOBALMV) {
- *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]];
- } else {
- assert(single_mode == NEARMV || single_mode == NEARESTMV);
- const int ref_mv_offset = get_ref_mv_offset(single_mode, ref_mv_idx);
- if (ref_mv_offset < mbmi_ext->ref_mv_count[ref_frame_type]) {
- assert(ref_mv_offset >= 0);
- if (ref_idx == 0) {
- *this_mv =
- mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].this_mv;
- } else {
- *this_mv =
- mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].comp_mv;
- }
- } else {
- *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]];
- }
- }
-}
-
-// This function update the non-new mv for the current prediction mode
-static INLINE int build_cur_mv(int_mv *cur_mv, PREDICTION_MODE this_mode,
- const AV1_COMMON *cm, const MACROBLOCK *x) {
- const MACROBLOCKD *xd = &x->e_mbd;
- const MB_MODE_INFO *mbmi = xd->mi[0];
- const int is_comp_pred = has_second_ref(mbmi);
- int ret = 1;
- for (int i = 0; i < is_comp_pred + 1; ++i) {
- int_mv this_mv;
- get_this_mv(&this_mv, this_mode, i, mbmi->ref_mv_idx, mbmi->ref_frame,
- x->mbmi_ext);
- const PREDICTION_MODE single_mode =
- get_single_mode(this_mode, i, is_comp_pred);
- if (single_mode == NEWMV) {
- cur_mv[i] = this_mv;
- } else {
- ret &= clamp_and_check_mv(cur_mv + i, this_mv, cm, x);
- }
- }
- return ret;
-}
-
-static INLINE int get_drl_cost(const MB_MODE_INFO *mbmi,
- const MB_MODE_INFO_EXT *mbmi_ext,
- int (*drl_mode_cost0)[2],
- int8_t ref_frame_type) {
- int cost = 0;
- if (mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV) {
- for (int idx = 0; idx < 2; ++idx) {
- if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
- uint8_t drl_ctx =
- av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx);
- cost += drl_mode_cost0[drl_ctx][mbmi->ref_mv_idx != idx];
- if (mbmi->ref_mv_idx == idx) return cost;
- }
- }
- return cost;
- }
-
- if (have_nearmv_in_inter_mode(mbmi->mode)) {
- for (int idx = 1; idx < 3; ++idx) {
- if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
- uint8_t drl_ctx =
- av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx);
- cost += drl_mode_cost0[drl_ctx][mbmi->ref_mv_idx != (idx - 1)];
- if (mbmi->ref_mv_idx == (idx - 1)) return cost;
- }
- }
- return cost;
- }
- return cost;
-}
-
-// Struct for buffers used by compound_type_rd() function.
-// For sizes and alignment of these arrays, refer to
-// alloc_compound_type_rd_buffers() function.
-typedef struct {
- uint8_t *pred0;
- uint8_t *pred1;
- int16_t *residual1; // src - pred1
- int16_t *diff10; // pred1 - pred0
- uint8_t *tmp_best_mask_buf; // backup of the best segmentation mask
-} CompoundTypeRdBuffers;
-
-static int compound_type_rd(const AV1_COMP *const cpi, MACROBLOCK *x,
- BLOCK_SIZE bsize, int mi_col, int mi_row,
- int_mv *cur_mv, int masked_compound_used,
- BUFFER_SET *orig_dst, const BUFFER_SET *tmp_dst,
- CompoundTypeRdBuffers *buffers, int *rate_mv,
- int64_t *rd, RD_STATS *rd_stats,
- int64_t ref_best_rd) {
- const AV1_COMMON *cm = &cpi->common;
- MACROBLOCKD *xd = &x->e_mbd;
- MB_MODE_INFO *mbmi = xd->mi[0];
- const PREDICTION_MODE this_mode = mbmi->mode;
- const int bw = block_size_wide[bsize];
- int rate_sum, rs2;
- int64_t dist_sum;
-
- int_mv best_mv[2];
- int best_tmp_rate_mv = *rate_mv;
- int tmp_skip_txfm_sb;
- int64_t tmp_skip_sse_sb;
- INTERINTER_COMPOUND_DATA best_compound_data;
- best_compound_data.type = COMPOUND_AVERAGE;
- uint8_t *preds0[1] = { buffers->pred0 };
- uint8_t *preds1[1] = { buffers->pred1 };
- int strides[1] = { bw };
- int tmp_rate_mv;
- const int num_pix = 1 << num_pels_log2_lookup[bsize];
- const int mask_len = 2 * num_pix * sizeof(uint8_t);
- COMPOUND_TYPE cur_type;
- int best_compmode_interinter_cost = 0;
- int calc_pred_masked_compound = 1;
-
- best_mv[0].as_int = cur_mv[0].as_int;
- best_mv[1].as_int = cur_mv[1].as_int;
- *rd = INT64_MAX;
- for (cur_type = COMPOUND_AVERAGE; cur_type < COMPOUND_TYPES; cur_type++) {
- if (cur_type != COMPOUND_AVERAGE && !masked_compound_used) break;
- if (!is_interinter_compound_used(cur_type, bsize)) continue;
- tmp_rate_mv = *rate_mv;
- int64_t best_rd_cur = INT64_MAX;
- mbmi->interinter_comp.type = cur_type;
- int masked_type_cost = 0;
-
- const int comp_group_idx_ctx = get_comp_group_idx_context(xd);
- const int comp_index_ctx = get_comp_index_context(cm, xd);
- mbmi->compound_idx = 1;
- if (cur_type == COMPOUND_AVERAGE) {
- mbmi->comp_group_idx = 0;
- if (masked_compound_used) {
- masked_type_cost += x->comp_group_idx_cost[comp_group_idx_ctx][0];
- }
- masked_type_cost += x->comp_idx_cost[comp_index_ctx][1];
- rs2 = masked_type_cost;
- const int64_t mode_rd = RDCOST(x->rdmult, rs2 + rd_stats->rate, 0);
- if (mode_rd < ref_best_rd) {
- av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, orig_dst, bsize);
- int64_t est_rd =
- estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
- &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
- if (est_rd != INT64_MAX)
- best_rd_cur = RDCOST(x->rdmult, rs2 + *rate_mv + rate_sum, dist_sum);
- }
- // use spare buffer for following compound type try
- restore_dst_buf(xd, *tmp_dst, 1);
- } else {
- mbmi->comp_group_idx = 1;
- masked_type_cost += x->comp_group_idx_cost[comp_group_idx_ctx][1];
- masked_type_cost += x->compound_type_cost[bsize][cur_type - 1];
- rs2 = masked_type_cost;
- if (x->source_variance > cpi->sf.disable_wedge_search_var_thresh &&
- *rd / 3 < ref_best_rd) {
- best_rd_cur = build_and_cost_compound_type(
- cpi, x, cur_mv, bsize, this_mode, &rs2, *rate_mv, orig_dst,
- &tmp_rate_mv, preds0, preds1, buffers->residual1, buffers->diff10,
- strides, mi_row, mi_col, rd_stats->rate, ref_best_rd,
- &calc_pred_masked_compound);
- }
- }
- if (best_rd_cur < *rd) {
- *rd = best_rd_cur;
- best_compound_data = mbmi->interinter_comp;
- if (masked_compound_used && cur_type != COMPOUND_TYPES - 1) {
- memcpy(buffers->tmp_best_mask_buf, xd->seg_mask, mask_len);
- }
- best_compmode_interinter_cost = rs2;
- if (have_newmv_in_inter_mode(this_mode)) {
- if (cur_type == COMPOUND_WEDGE) {
- best_tmp_rate_mv = tmp_rate_mv;
- best_mv[0].as_int = mbmi->mv[0].as_int;
- best_mv[1].as_int = mbmi->mv[1].as_int;
- } else {
- best_mv[0].as_int = cur_mv[0].as_int;
- best_mv[1].as_int = cur_mv[1].as_int;
- }
- }
- }
- // reset to original mvs for next iteration
- mbmi->mv[0].as_int = cur_mv[0].as_int;
- mbmi->mv[1].as_int = cur_mv[1].as_int;
- }
- if (mbmi->interinter_comp.type != best_compound_data.type) {
- mbmi->comp_group_idx =
- (best_compound_data.type == COMPOUND_AVERAGE) ? 0 : 1;
- mbmi->interinter_comp = best_compound_data;
- memcpy(xd->seg_mask, buffers->tmp_best_mask_buf, mask_len);
- }
- if (have_newmv_in_inter_mode(this_mode)) {
- mbmi->mv[0].as_int = best_mv[0].as_int;
- mbmi->mv[1].as_int = best_mv[1].as_int;
- if (mbmi->interinter_comp.type == COMPOUND_WEDGE) {
- rd_stats->rate += best_tmp_rate_mv - *rate_mv;
- *rate_mv = best_tmp_rate_mv;
- }
- }
- restore_dst_buf(xd, *orig_dst, 1);
- return best_compmode_interinter_cost;
-}
-
-static INLINE int is_single_newmv_valid(HandleInterModeArgs *args,
- MB_MODE_INFO *mbmi,
- PREDICTION_MODE this_mode) {
- for (int ref_idx = 0; ref_idx < 2; ++ref_idx) {
- const PREDICTION_MODE single_mode = get_single_mode(this_mode, ref_idx, 1);
- const MV_REFERENCE_FRAME ref = mbmi->ref_frame[ref_idx];
- if (single_mode == NEWMV &&
- args->single_newmv_valid[mbmi->ref_mv_idx][ref] == 0) {
- return 0;
- }
- }
- return 1;
-}
-
-static int get_drl_refmv_count(const MACROBLOCK *const x,
- const MV_REFERENCE_FRAME *ref_frame,
- PREDICTION_MODE mode) {
- MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
- const int8_t ref_frame_type = av1_ref_frame_type(ref_frame);
- const int has_nearmv = have_nearmv_in_inter_mode(mode) ? 1 : 0;
- const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
- const int only_newmv = (mode == NEWMV || mode == NEW_NEWMV);
- const int has_drl =
- (has_nearmv && ref_mv_count > 2) || (only_newmv && ref_mv_count > 1);
- const int ref_set =
- has_drl ? AOMMIN(MAX_REF_MV_SERCH, ref_mv_count - has_nearmv) : 1;
-
- return ref_set;
-}
-
-typedef struct {
- int64_t rd;
- int drl_cost;
- int rate_mv;
- int_mv mv;
-} inter_mode_info;
-
-static int64_t handle_inter_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
- BLOCK_SIZE bsize, RD_STATS *rd_stats,
- RD_STATS *rd_stats_y, RD_STATS *rd_stats_uv,
- int *disable_skip, int mi_row, int mi_col,
- HandleInterModeArgs *args, int64_t ref_best_rd,
- uint8_t *const tmp_buf,
- CompoundTypeRdBuffers *rd_buffers
-#if CONFIG_COLLECT_INTER_MODE_RD_STATS
- ,
- TileDataEnc *tile_data, int64_t *best_est_rd,
- const int do_tx_search,
- InterModesInfo *inter_modes_info
-#endif
-) {
- const AV1_COMMON *cm = &cpi->common;
- const int num_planes = av1_num_planes(cm);
- MACROBLOCKD *xd = &x->e_mbd;
- MB_MODE_INFO *mbmi = xd->mi[0];
- MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
- const int is_comp_pred = has_second_ref(mbmi);
- const PREDICTION_MODE this_mode = mbmi->mode;
- int i;
- int refs[2] = { mbmi->ref_frame[0],
- (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
- int rate_mv = 0;
- int64_t rd = INT64_MAX;
-
- // do first prediction into the destination buffer. Do the next
- // prediction into a temporary buffer. Then keep track of which one
- // of these currently holds the best predictor, and use the other
- // one for future predictions. In the end, copy from tmp_buf to
- // dst if necessary.
- struct macroblockd_plane *p = xd->plane;
- BUFFER_SET orig_dst = {
- { p[0].dst.buf, p[1].dst.buf, p[2].dst.buf },
- { p[0].dst.stride, p[1].dst.stride, p[2].dst.stride },
- };
- const BUFFER_SET tmp_dst = { { tmp_buf, tmp_buf + 1 * MAX_SB_SQUARE,
- tmp_buf + 2 * MAX_SB_SQUARE },
- { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE } };
-
- int skip_txfm_sb = 0;
- int64_t skip_sse_sb = INT64_MAX;
- int16_t mode_ctx;
- const int masked_compound_used = is_any_masked_compound_used(bsize) &&
- cm->seq_params.enable_masked_compound;
- int64_t ret_val = INT64_MAX;
- const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
- RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
- int64_t best_rd = INT64_MAX;
- uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
- MB_MODE_INFO best_mbmi = *mbmi;
- int best_disable_skip;
- int best_xskip;
- int64_t newmv_ret_val = INT64_MAX;
- int_mv backup_mv[2] = { { 0 } };
- int backup_rate_mv = 0;
- inter_mode_info mode_info[MAX_REF_MV_SERCH];
-
- int comp_idx;
- const int search_jnt_comp = is_comp_pred & cm->seq_params.enable_jnt_comp &
- (mbmi->mode != GLOBAL_GLOBALMV);
-
- // TODO(jingning): This should be deprecated shortly.
- const int has_nearmv = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0;
- const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode);
-
- for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) {
- mode_info[ref_mv_idx].mv.as_int = INVALID_MV;
- mode_info[ref_mv_idx].rd = INT64_MAX;
-
- if (cpi->sf.reduce_inter_modes && ref_mv_idx > 0) {
- if (mbmi->ref_frame[0] == LAST2_FRAME ||
- mbmi->ref_frame[0] == LAST3_FRAME ||
- mbmi->ref_frame[1] == LAST2_FRAME ||
- mbmi->ref_frame[1] == LAST3_FRAME) {
- if (mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx + has_nearmv]
- .weight < REF_CAT_LEVEL) {
- continue;
- }
- }
- }
-
- av1_init_rd_stats(rd_stats);
-
- mbmi->interinter_comp.type = COMPOUND_AVERAGE;
- mbmi->comp_group_idx = 0;
- mbmi->compound_idx = 1;
- if (mbmi->ref_frame[1] == INTRA_FRAME) mbmi->ref_frame[1] = NONE_FRAME;
-
- mode_ctx =
- av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
-
- mbmi->num_proj_ref = 0;
- mbmi->motion_mode = SIMPLE_TRANSLATION;
- mbmi->ref_mv_idx = ref_mv_idx;
-
- if (is_comp_pred && (!is_single_newmv_valid(args, mbmi, this_mode))) {
- continue;
- }
-
- rd_stats->rate += args->ref_frame_cost + args->single_comp_cost;
- const int drl_cost =
- get_drl_cost(mbmi, mbmi_ext, x->drl_mode_cost0, ref_frame_type);
- rd_stats->rate += drl_cost;
- mode_info[ref_mv_idx].drl_cost = drl_cost;
-
- if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd &&
- mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) {
- continue;
- }
-
- int64_t best_rd2 = INT64_MAX;
-
- const RD_STATS backup_rd_stats = *rd_stats;
- // If !search_jnt_comp, we need to force mbmi->compound_idx = 1.
- for (comp_idx = 1; comp_idx >= !search_jnt_comp; --comp_idx) {
- int rs = 0;
- int compmode_interinter_cost = 0;
- mbmi->compound_idx = comp_idx;
- if (is_comp_pred && comp_idx == 0) {
- *rd_stats = backup_rd_stats;
- mbmi->interinter_comp.type = COMPOUND_AVERAGE;
- if (mbmi->ref_frame[1] == INTRA_FRAME) mbmi->ref_frame[1] = NONE_FRAME;
- mbmi->num_proj_ref = 0;
- mbmi->motion_mode = SIMPLE_TRANSLATION;
- mbmi->comp_group_idx = 0;
-
- const int comp_group_idx_ctx = get_comp_group_idx_context(xd);
- const int comp_index_ctx = get_comp_index_context(cm, xd);
- if (masked_compound_used) {
- compmode_interinter_cost +=
- x->comp_group_idx_cost[comp_group_idx_ctx][0];
- }
- compmode_interinter_cost += x->comp_idx_cost[comp_index_ctx][0];
- }
-
- int_mv cur_mv[2];
- if (!build_cur_mv(cur_mv, this_mode, cm, x)) {
- continue;
- }
- if (have_newmv_in_inter_mode(this_mode)) {
- if (comp_idx == 0) {
- cur_mv[0] = backup_mv[0];
- cur_mv[1] = backup_mv[1];
- rate_mv = backup_rate_mv;
- }
-
- // when jnt_comp_skip_mv_search flag is on, new mv will be searched once
- if (!(search_jnt_comp && cpi->sf.jnt_comp_skip_mv_search &&
- comp_idx == 0)) {
- newmv_ret_val = handle_newmv(cpi, x, bsize, cur_mv, mi_row, mi_col,
- &rate_mv, args);
-
- // Store cur_mv and rate_mv so that they can be restored in the next
- // iteration of the loop
- backup_mv[0] = cur_mv[0];
- backup_mv[1] = cur_mv[1];
- backup_rate_mv = rate_mv;
- }
-
- if (newmv_ret_val != 0) {
- continue;
- } else {
- rd_stats->rate += rate_mv;
- }
-
- if (cpi->sf.skip_repeated_newmv) {
- if (!is_comp_pred && this_mode == NEWMV && ref_mv_idx > 0) {
- int skip = 0;
- int this_rate_mv = 0;
- for (i = 0; i < ref_mv_idx; ++i) {
- // Check if the motion search result same as previous results
- if (cur_mv[0].as_int == args->single_newmv[i][refs[0]].as_int) {
- // If the compared mode has no valid rd, it is unlikely this
- // mode will be the best mode
- if (mode_info[i].rd == INT64_MAX) {
- skip = 1;
- break;
- }
- // Compare the cost difference including drl cost and mv cost
- if (mode_info[i].mv.as_int != INVALID_MV) {
- const int compare_cost =
- mode_info[i].rate_mv + mode_info[i].drl_cost;
- const int_mv ref_mv = av1_get_ref_mv(x, 0);
- this_rate_mv = av1_mv_bit_cost(&mode_info[i].mv.as_mv,
- &ref_mv.as_mv, x->nmvjointcost,
- x->mvcost, MV_COST_WEIGHT);
- const int this_cost = this_rate_mv + drl_cost;
-
- if (compare_cost < this_cost) {
- skip = 1;
- break;
- } else {
- // If the cost is less than current best result, make this
- // the best and update corresponding variables
- if (best_mbmi.ref_mv_idx == i) {
- assert(best_rd != INT64_MAX);
- best_mbmi.ref_mv_idx = ref_mv_idx;
- best_rd_stats.rate += this_cost - compare_cost;
- best_rd = RDCOST(x->rdmult, best_rd_stats.rate,
- best_rd_stats.dist);
- if (best_rd < ref_best_rd) ref_best_rd = best_rd;
-
- skip = 1;
- break;
- }
- }
- }
- }
- }
- if (skip) {
- args->modelled_rd[this_mode][ref_mv_idx][refs[0]] =
- args->modelled_rd[this_mode][i][refs[0]];
- args->simple_rd[this_mode][ref_mv_idx][refs[0]] =
- args->simple_rd[this_mode][i][refs[0]];
- mode_info[ref_mv_idx].rd = mode_info[i].rd;
- mode_info[ref_mv_idx].rate_mv = this_rate_mv;
- mode_info[ref_mv_idx].mv.as_int = mode_info[i].mv.as_int;
-
- restore_dst_buf(xd, orig_dst, num_planes);
- continue;
- }
- }
- }
- }
- for (i = 0; i < is_comp_pred + 1; ++i) {
- mbmi->mv[i].as_int = cur_mv[i].as_int;
- }
- const int ref_mv_cost = cost_mv_ref(x, this_mode, mode_ctx);
-#if USE_DISCOUNT_NEWMV_TEST
- // We don't include the cost of the second reference here, because there
- // are only three options: Last/Golden, ARF/Last or Golden/ARF, or in
- // other words if you present them in that order, the second one is always
- // known if the first is known.
- //
- // Under some circumstances we discount the cost of new mv mode to
- // encourage initiation of a motion field.
- if (discount_newmv_test(cpi, x, this_mode, mbmi->mv[0])) {
- // discount_newmv_test only applies discount on NEWMV mode.
- assert(this_mode == NEWMV);
- rd_stats->rate += AOMMIN(cost_mv_ref(x, this_mode, mode_ctx),
- cost_mv_ref(x, NEARESTMV, mode_ctx));
- } else {
- rd_stats->rate += ref_mv_cost;
- }
-#else
- rd_stats->rate += ref_mv_cost;
-#endif
-
- if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd &&
- mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) {
- continue;
- }
-
- int skip_build_pred = 0;
- if (is_comp_pred && comp_idx) {
- // Find matching interp filter or set to default interp filter
- const int need_search =
- av1_is_interp_needed(xd) && av1_is_interp_search_needed(xd);
- int match_found = -1;
- const InterpFilter assign_filter = cm->interp_filter;
- if (cpi->sf.skip_repeat_interpolation_filter_search && need_search) {
- match_found = find_interp_filter_in_stats(x, mbmi);
- }
- if (!need_search || match_found == -1) {
- set_default_interp_filters(mbmi, assign_filter);
- }
-
- int64_t best_rd_compound;
- compmode_interinter_cost = compound_type_rd(
- cpi, x, bsize, mi_col, mi_row, cur_mv, masked_compound_used,
- &orig_dst, &tmp_dst, rd_buffers, &rate_mv, &best_rd_compound,
- rd_stats, ref_best_rd);
- if (ref_best_rd < INT64_MAX && best_rd_compound / 3 > ref_best_rd) {
- restore_dst_buf(xd, orig_dst, num_planes);
- continue;
- }
- // No need to call av1_build_inter_predictors_sby if
- // COMPOUND_AVERAGE is selected because it is the first
- // candidate in compound_type_rd, and the following
- // compound types searching uses tmp_dst buffer
- if (mbmi->interinter_comp.type == COMPOUND_AVERAGE) {
- if (num_planes > 1)
- av1_build_inter_predictors_sbuv(cm, xd, mi_row, mi_col, &orig_dst,
- bsize);
- skip_build_pred = 1;
- }
- }
-
- ret_val = interpolation_filter_search(
- x, cpi, bsize, mi_row, mi_col, &tmp_dst, &orig_dst,
- args->single_filter, &rd, &rs, &skip_txfm_sb, &skip_sse_sb,
- skip_build_pred, args, ref_best_rd);
- if (args->modelled_rd != NULL && !is_comp_pred) {
- args->modelled_rd[this_mode][ref_mv_idx][refs[0]] = rd;
- }
- if (ret_val != 0) {
- restore_dst_buf(xd, orig_dst, num_planes);
- continue;
- } else if (cpi->sf.model_based_post_interp_filter_breakout &&
- ref_best_rd != INT64_MAX && (rd >> 3) * 3 > ref_best_rd) {
- restore_dst_buf(xd, orig_dst, num_planes);
- if ((rd >> 3) * 2 > ref_best_rd) break;
- continue;
- }
-
- if (search_jnt_comp) {
- // if 1/2 model rd is larger than best_rd in jnt_comp mode,
- // use jnt_comp mode, save additional search
- if ((rd >> 3) * 4 > best_rd) {
- restore_dst_buf(xd, orig_dst, num_planes);
- continue;
- }
- }
-
- if (!is_comp_pred)
- args->single_filter[this_mode][refs[0]] =
- av1_extract_interp_filter(mbmi->interp_filters, 0);
-
- if (args->modelled_rd != NULL) {
- if (is_comp_pred) {
- const int mode0 = compound_ref0_mode(this_mode);
- const int mode1 = compound_ref1_mode(this_mode);
- const int64_t mrd =
- AOMMIN(args->modelled_rd[mode0][ref_mv_idx][refs[0]],
- args->modelled_rd[mode1][ref_mv_idx][refs[1]]);
- if ((rd >> 3) * 6 > mrd && ref_best_rd < INT64_MAX) {
- restore_dst_buf(xd, orig_dst, num_planes);
- continue;
- }
- }
- }
- rd_stats->rate += compmode_interinter_cost;
-
- if (search_jnt_comp && cpi->sf.jnt_comp_fast_tx_search && comp_idx == 0) {
- // TODO(chengchen): this speed feature introduces big loss.
- // Need better estimation of rate distortion.
- int dummy_rate;
- int64_t dummy_dist;
- int plane_rate[MAX_MB_PLANE] = { 0 };
- int64_t plane_sse[MAX_MB_PLANE] = { 0 };
- int64_t plane_dist[MAX_MB_PLANE] = { 0 };
-
- model_rd_sb_fn[MODELRD_TYPE_JNT_COMPOUND](
- cpi, bsize, x, xd, 0, num_planes - 1, mi_row, mi_col, &dummy_rate,
- &dummy_dist, &skip_txfm_sb, &skip_sse_sb, plane_rate, plane_sse,
- plane_dist);
-
- rd_stats->rate += rs;
- rd_stats->rate += plane_rate[0] + plane_rate[1] + plane_rate[2];
- rd_stats_y->rate = plane_rate[0];
- rd_stats_uv->rate = plane_rate[1] + plane_rate[2];
- rd_stats->sse = plane_sse[0] + plane_sse[1] + plane_sse[2];
- rd_stats_y->sse = plane_sse[0];
- rd_stats_uv->sse = plane_sse[1] + plane_sse[2];
- rd_stats->dist = plane_dist[0] + plane_dist[1] + plane_dist[2];
- rd_stats_y->dist = plane_dist[0];
- rd_stats_uv->dist = plane_dist[1] + plane_dist[2];
- } else {
-#if CONFIG_COLLECT_INTER_MODE_RD_STATS
- ret_val = motion_mode_rd(
- cpi, x, bsize, rd_stats, rd_stats_y, rd_stats_uv, disable_skip,
- mi_row, mi_col, args, ref_best_rd, refs, &rate_mv, &orig_dst,
- tile_data, best_est_rd, do_tx_search, inter_modes_info);
-#else
- ret_val = motion_mode_rd(cpi, x, bsize, rd_stats, rd_stats_y,
- rd_stats_uv, disable_skip, mi_row, mi_col,
- args, ref_best_rd, refs, &rate_mv, &orig_dst);
-#endif
- }
- mode_info[ref_mv_idx].mv.as_int = mbmi->mv[0].as_int;
- mode_info[ref_mv_idx].rate_mv = rate_mv;
- if (ret_val != INT64_MAX) {
- int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
- mode_info[ref_mv_idx].rd = tmp_rd;
- if (tmp_rd < best_rd) {
- best_rd_stats = *rd_stats;
- best_rd_stats_y = *rd_stats_y;
- best_rd_stats_uv = *rd_stats_uv;
- best_rd = tmp_rd;
- best_mbmi = *mbmi;
- best_disable_skip = *disable_skip;
- best_xskip = x->skip;
- memcpy(best_blk_skip, x->blk_skip,
- sizeof(best_blk_skip[0]) * xd->n4_h * xd->n4_w);
- }
-
- if (tmp_rd < best_rd2) {
- best_rd2 = tmp_rd;
- }
-
- if (tmp_rd < ref_best_rd) {
- ref_best_rd = tmp_rd;
- }
- }
- restore_dst_buf(xd, orig_dst, num_planes);
- }
- }
-
- if (best_rd == INT64_MAX) return INT64_MAX;
-
- // re-instate status of the best choice
- *rd_stats = best_rd_stats;
- *rd_stats_y = best_rd_stats_y;
- *rd_stats_uv = best_rd_stats_uv;
- *mbmi = best_mbmi;
- *disable_skip = best_disable_skip;
- x->skip = best_xskip;
- assert(IMPLIES(mbmi->comp_group_idx == 1,
- mbmi->interinter_comp.type != COMPOUND_AVERAGE));
- memcpy(x->blk_skip, best_blk_skip,
- sizeof(best_blk_skip[0]) * xd->n4_h * xd->n4_w);
-
- return RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
-}
-
-static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
- RD_STATS *rd_cost, BLOCK_SIZE bsize,
- int64_t best_rd) {
- const AV1_COMMON *const cm = &cpi->common;
- if (!av1_allow_intrabc(cm)) return INT64_MAX;
- const int num_planes = av1_num_planes(cm);
-
- MACROBLOCKD *const xd = &x->e_mbd;
- const TileInfo *tile = &xd->tile;
- MB_MODE_INFO *mbmi = xd->mi[0];
- const int mi_row = -xd->mb_to_top_edge / (8 * MI_SIZE);
- const int mi_col = -xd->mb_to_left_edge / (8 * MI_SIZE);
- const int w = block_size_wide[bsize];
- const int h = block_size_high[bsize];
- const int sb_row = mi_row >> cm->seq_params.mib_size_log2;
- const int sb_col = mi_col >> cm->seq_params.mib_size_log2;
-
- MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
- MV_REFERENCE_FRAME ref_frame = INTRA_FRAME;
- av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
- mbmi_ext->ref_mv_stack, NULL, mbmi_ext->global_mvs, mi_row,
- mi_col, mbmi_ext->mode_context);
-
- int_mv nearestmv, nearmv;
- av1_find_best_ref_mvs_from_stack(0, mbmi_ext, ref_frame, &nearestmv, &nearmv,
- 0);
-
- if (nearestmv.as_int == INVALID_MV) {
- nearestmv.as_int = 0;
- }
- if (nearmv.as_int == INVALID_MV) {
- nearmv.as_int = 0;
- }
-
- int_mv dv_ref = nearestmv.as_int == 0 ? nearmv : nearestmv;
- if (dv_ref.as_int == 0)
- av1_find_ref_dv(&dv_ref, tile, cm->seq_params.mib_size, mi_row, mi_col);
- // Ref DV should not have sub-pel.
- assert((dv_ref.as_mv.col & 7) == 0);
- assert((dv_ref.as_mv.row & 7) == 0);
- mbmi_ext->ref_mv_stack[INTRA_FRAME][0].this_mv = dv_ref;
-
- struct buf_2d yv12_mb[MAX_MB_PLANE];
- av1_setup_pred_block(xd, yv12_mb, xd->cur_buf, mi_row, mi_col, NULL, NULL,
- num_planes);
- for (int i = 0; i < num_planes; ++i) {
- xd->plane[i].pre[0] = yv12_mb[i];
- }
-
- enum IntrabcMotionDirection {
- IBC_MOTION_ABOVE,
- IBC_MOTION_LEFT,
- IBC_MOTION_DIRECTIONS
- };
-
- MB_MODE_INFO best_mbmi = *mbmi;
- RD_STATS best_rdcost = *rd_cost;
- int best_skip = x->skip;
-
- uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE] = { 0 };
- for (enum IntrabcMotionDirection dir = IBC_MOTION_ABOVE;
- dir < IBC_MOTION_DIRECTIONS; ++dir) {
- const MvLimits tmp_mv_limits = x->mv_limits;
- switch (dir) {
- case IBC_MOTION_ABOVE:
- x->mv_limits.col_min = (tile->mi_col_start - mi_col) * MI_SIZE;
- x->mv_limits.col_max = (tile->mi_col_end - mi_col) * MI_SIZE - w;
- x->mv_limits.row_min = (tile->mi_row_start - mi_row) * MI_SIZE;
- x->mv_limits.row_max =
- (sb_row * cm->seq_params.mib_size - mi_row) * MI_SIZE - h;
- break;
- case IBC_MOTION_LEFT:
- x->mv_limits.col_min = (tile->mi_col_start - mi_col) * MI_SIZE;
- x->mv_limits.col_max =
- (sb_col * cm->seq_params.mib_size - mi_col) * MI_SIZE - w;
- // TODO(aconverse@google.com): Minimize the overlap between above and
- // left areas.
- x->mv_limits.row_min = (tile->mi_row_start - mi_row) * MI_SIZE;
- int bottom_coded_mi_edge =
- AOMMIN((sb_row + 1) * cm->seq_params.mib_size, tile->mi_row_end);
- x->mv_limits.row_max = (bottom_coded_mi_edge - mi_row) * MI_SIZE - h;
- break;
- default: assert(0);
- }
- assert(x->mv_limits.col_min >= tmp_mv_limits.col_min);
- assert(x->mv_limits.col_max <= tmp_mv_limits.col_max);
- assert(x->mv_limits.row_min >= tmp_mv_limits.row_min);
- assert(x->mv_limits.row_max <= tmp_mv_limits.row_max);
- av1_set_mv_search_range(&x->mv_limits, &dv_ref.as_mv);
-
- if (x->mv_limits.col_max < x->mv_limits.col_min ||
- x->mv_limits.row_max < x->mv_limits.row_min) {
- x->mv_limits = tmp_mv_limits;
- continue;
- }
-
- int step_param = cpi->mv_step_param;
- MV mvp_full = dv_ref.as_mv;
- mvp_full.col >>= 3;
- mvp_full.row >>= 3;
- int sadpb = x->sadperbit16;
- int cost_list[5];
- int bestsme = av1_full_pixel_search(
- cpi, x, bsize, &mvp_full, step_param, cpi->sf.mv.search_method, 0,
- sadpb, cond_cost_list(cpi, cost_list), &dv_ref.as_mv, INT_MAX, 1,
- (MI_SIZE * mi_col), (MI_SIZE * mi_row), 1);
-
- x->mv_limits = tmp_mv_limits;
- if (bestsme == INT_MAX) continue;
- mvp_full = x->best_mv.as_mv;
- MV dv = { .row = mvp_full.row * 8, .col = mvp_full.col * 8 };
- if (mv_check_bounds(&x->mv_limits, &dv)) continue;
- if (!av1_is_dv_valid(dv, cm, xd, mi_row, mi_col, bsize,
- cm->seq_params.mib_size_log2))
- continue;
-
- // DV should not have sub-pel.
- assert((dv.col & 7) == 0);
- assert((dv.row & 7) == 0);
- memset(&mbmi->palette_mode_info, 0, sizeof(mbmi->palette_mode_info));
- mbmi->filter_intra_mode_info.use_filter_intra = 0;
- mbmi->use_intrabc = 1;
- mbmi->mode = DC_PRED;
- mbmi->uv_mode = UV_DC_PRED;
- mbmi->motion_mode = SIMPLE_TRANSLATION;
- mbmi->mv[0].as_mv = dv;
- mbmi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
- mbmi->skip = 0;
- x->skip = 0;
- av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
-
- int *dvcost[2] = { (int *)&cpi->dv_cost[0][MV_MAX],
- (int *)&cpi->dv_cost[1][MV_MAX] };
- // TODO(aconverse@google.com): The full motion field defining discount
- // in MV_COST_WEIGHT is too large. Explore other values.
- int rate_mv = av1_mv_bit_cost(&dv, &dv_ref.as_mv, cpi->dv_joint_cost,
- dvcost, MV_COST_WEIGHT_SUB);
- const int rate_mode = x->intrabc_cost[1];
- RD_STATS rd_stats, rd_stats_uv;
- av1_subtract_plane(x, bsize, 0);
- if (cm->tx_mode == TX_MODE_SELECT && !xd->lossless[mbmi->segment_id]) {
- // Intrabc
- select_tx_type_yrd(cpi, x, &rd_stats, bsize, mi_row, mi_col, INT64_MAX);
- } else {
- super_block_yrd(cpi, x, &rd_stats, bsize, INT64_MAX);
- memset(mbmi->inter_tx_size, mbmi->tx_size, sizeof(mbmi->inter_tx_size));
- for (int i = 0; i < xd->n4_h * xd->n4_w; ++i)
- set_blk_skip(x, 0, i, rd_stats.skip);
- }
- if (num_planes > 1) {
- super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
- av1_merge_rd_stats(&rd_stats, &rd_stats_uv);
- }
-#if CONFIG_RD_DEBUG
- mbmi->rd_stats = rd_stats;
-#endif
-
- const int skip_ctx = av1_get_skip_context(xd);
-
- RD_STATS rdc_noskip;
- av1_init_rd_stats(&rdc_noskip);
- rdc_noskip.rate =
- rate_mode + rate_mv + rd_stats.rate + x->skip_cost[skip_ctx][0];
- rdc_noskip.dist = rd_stats.dist;
- rdc_noskip.rdcost = RDCOST(x->rdmult, rdc_noskip.rate, rdc_noskip.dist);
- if (rdc_noskip.rdcost < best_rd) {
- best_rd = rdc_noskip.rdcost;
- best_mbmi = *mbmi;
- best_skip = x->skip;
- best_rdcost = rdc_noskip;
- memcpy(best_blk_skip, x->blk_skip,
- sizeof(x->blk_skip[0]) * xd->n4_h * xd->n4_w);
- }
-
- if (!xd->lossless[mbmi->segment_id]) {
- x->skip = 1;
- mbmi->skip = 1;
- RD_STATS rdc_skip;
- av1_init_rd_stats(&rdc_skip);
- rdc_skip.rate = rate_mode + rate_mv + x->skip_cost[skip_ctx][1];
- rdc_skip.dist = rd_stats.sse;
- rdc_skip.rdcost = RDCOST(x->rdmult, rdc_skip.rate, rdc_skip.dist);
- if (rdc_skip.rdcost < best_rd) {
- best_rd = rdc_skip.rdcost;
- best_mbmi = *mbmi;
- best_skip = x->skip;
- best_rdcost = rdc_skip;
- memcpy(best_blk_skip, x->blk_skip,
- sizeof(x->blk_skip[0]) * xd->n4_h * xd->n4_w);
- }
- }
- }
- *mbmi = best_mbmi;
- *rd_cost = best_rdcost;
- x->skip = best_skip;
- memcpy(x->blk_skip, best_blk_skip,
- sizeof(x->blk_skip[0]) * xd->n4_h * xd->n4_w);
- return best_rd;
-}
-
-void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x, int mi_row,
- int mi_col, RD_STATS *rd_cost, BLOCK_SIZE bsize,
- PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
- const AV1_COMMON *const cm = &cpi->common;
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = xd->mi[0];
- const int num_planes = av1_num_planes(cm);
- int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
- int y_skip = 0, uv_skip = 0;
- int64_t dist_y = 0, dist_uv = 0;
- TX_SIZE max_uv_tx_size;
-
- ctx->skip = 0;
- mbmi->ref_frame[0] = INTRA_FRAME;
- mbmi->ref_frame[1] = NONE_FRAME;
- mbmi->use_intrabc = 0;
- mbmi->mv[0].as_int = 0;
-
- const int64_t intra_yrd =
- rd_pick_intra_sby_mode(cpi, x, mi_row, mi_col, &rate_y, &rate_y_tokenonly,
- &dist_y, &y_skip, bsize, best_rd, ctx);
-
- if (intra_yrd < best_rd) {
- // Only store reconstructed luma when there's chroma RDO. When there's no
- // chroma RDO, the reconstructed luma will be stored in encode_superblock().
- xd->cfl.is_chroma_reference =
- is_chroma_reference(mi_row, mi_col, bsize, cm->seq_params.subsampling_x,
- cm->seq_params.subsampling_y);
- xd->cfl.store_y = store_cfl_required_rdo(cm, x);
- if (xd->cfl.store_y) {
- // Restore reconstructed luma values.
- memcpy(x->blk_skip, ctx->blk_skip,
- sizeof(x->blk_skip[0]) * ctx->num_4x4_blk);
- av1_encode_intra_block_plane(cpi, x, bsize, AOM_PLANE_Y,
- cpi->optimize_seg_arr[mbmi->segment_id],
- mi_row, mi_col);
- xd->cfl.store_y = 0;
- }
- if (num_planes > 1) {
- max_uv_tx_size = av1_get_tx_size(AOM_PLANE_U, xd);
- init_sbuv_mode(mbmi);
- if (!x->skip_chroma_rd)
- rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, &dist_uv,
- &uv_skip, bsize, max_uv_tx_size);
- }
-
- if (y_skip && (uv_skip || x->skip_chroma_rd)) {
- rd_cost->rate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
- x->skip_cost[av1_get_skip_context(xd)][1];
- rd_cost->dist = dist_y + dist_uv;
- } else {
- rd_cost->rate =
- rate_y + rate_uv + x->skip_cost[av1_get_skip_context(xd)][0];
- rd_cost->dist = dist_y + dist_uv;
- }
- rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
- } else {
- rd_cost->rate = INT_MAX;
- }
-
- if (rd_cost->rate != INT_MAX && rd_cost->rdcost < best_rd)
- best_rd = rd_cost->rdcost;
- if (rd_pick_intrabc_mode_sb(cpi, x, rd_cost, bsize, best_rd) < best_rd) {
- ctx->skip = x->skip;
- memcpy(ctx->blk_skip, x->blk_skip,
- sizeof(x->blk_skip[0]) * ctx->num_4x4_blk);
- assert(rd_cost->rate != INT_MAX);
- }
- if (rd_cost->rate == INT_MAX) return;
-
- ctx->mic = *xd->mi[0];
- ctx->mbmi_ext = *x->mbmi_ext;
-}
-
-static void restore_uv_color_map(const AV1_COMP *const cpi, MACROBLOCK *x) {
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = xd->mi[0];
- PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
- const BLOCK_SIZE bsize = mbmi->sb_type;
- int src_stride = x->plane[1].src.stride;
- const uint8_t *const src_u = x->plane[1].src.buf;
- const uint8_t *const src_v = x->plane[2].src.buf;
- int *const data = x->palette_buffer->kmeans_data_buf;
- int centroids[2 * PALETTE_MAX_SIZE];
- uint8_t *const color_map = xd->plane[1].color_index_map;
- int r, c;
- const uint16_t *const src_u16 = CONVERT_TO_SHORTPTR(src_u);
- const uint16_t *const src_v16 = CONVERT_TO_SHORTPTR(src_v);
- int plane_block_width, plane_block_height, rows, cols;
- av1_get_block_dimensions(bsize, 1, xd, &plane_block_width,
- &plane_block_height, &rows, &cols);
-
- for (r = 0; r < rows; ++r) {
- for (c = 0; c < cols; ++c) {
- if (cpi->common.seq_params.use_highbitdepth) {
- data[(r * cols + c) * 2] = src_u16[r * src_stride + c];
- data[(r * cols + c) * 2 + 1] = src_v16[r * src_stride + c];
- } else {
- data[(r * cols + c) * 2] = src_u[r * src_stride + c];
- data[(r * cols + c) * 2 + 1] = src_v[r * src_stride + c];
- }
- }
- }
-
- for (r = 1; r < 3; ++r) {
- for (c = 0; c < pmi->palette_size[1]; ++c) {
- centroids[c * 2 + r - 1] = pmi->palette_colors[r * PALETTE_MAX_SIZE + c];
- }
- }
-
- av1_calc_indices(data, centroids, color_map, rows * cols,
- pmi->palette_size[1], 2);
- extend_palette_color_map(color_map, cols, rows, plane_block_width,
- plane_block_height);
-}
-
-static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x,
- const MACROBLOCKD *xd, int mi_row,
- int mi_col, const uint8_t *above,
- int above_stride, const uint8_t *left,
- int left_stride);
-
-static const int ref_frame_flag_list[REF_FRAMES] = { 0,
- AOM_LAST_FLAG,
- AOM_LAST2_FLAG,
- AOM_LAST3_FLAG,
- AOM_GOLD_FLAG,
- AOM_BWD_FLAG,
- AOM_ALT2_FLAG,
- AOM_ALT_FLAG };
-
-static void rd_pick_skip_mode(RD_STATS *rd_cost,
- InterModeSearchState *search_state,
- const AV1_COMP *const cpi, MACROBLOCK *const x,
- BLOCK_SIZE bsize, int mi_row, int mi_col,
- struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) {
- const AV1_COMMON *const cm = &cpi->common;
- const int num_planes = av1_num_planes(cm);
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = xd->mi[0];
-
- x->compound_idx = 1; // COMPOUND_AVERAGE
- RD_STATS skip_mode_rd_stats;
- av1_invalid_rd_stats(&skip_mode_rd_stats);
-
- if (cm->ref_frame_idx_0 == INVALID_IDX ||
- cm->ref_frame_idx_1 == INVALID_IDX) {
- return;
- }
-
- const MV_REFERENCE_FRAME ref_frame = LAST_FRAME + cm->ref_frame_idx_0;
- const MV_REFERENCE_FRAME second_ref_frame = LAST_FRAME + cm->ref_frame_idx_1;
- const PREDICTION_MODE this_mode = NEAREST_NEARESTMV;
- const int mode_index =
- get_prediction_mode_idx(this_mode, ref_frame, second_ref_frame);
-
- if (mode_index == -1) {
- return;
- }
-
- mbmi->mode = this_mode;
- mbmi->uv_mode = UV_DC_PRED;
- mbmi->ref_frame[0] = ref_frame;
- mbmi->ref_frame[1] = second_ref_frame;
- const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
- if (x->mbmi_ext->ref_mv_count[ref_frame_type] == UINT8_MAX) {
- if (x->mbmi_ext->ref_mv_count[ref_frame] == UINT8_MAX ||
- x->mbmi_ext->ref_mv_count[second_ref_frame] == UINT8_MAX) {
- return;
- }
- MB_MODE_INFO_EXT *mbmi_ext = x->mbmi_ext;
- av1_find_mv_refs(cm, xd, mbmi, ref_frame_type, mbmi_ext->ref_mv_count,
- mbmi_ext->ref_mv_stack, NULL, mbmi_ext->global_mvs, mi_row,
- mi_col, mbmi_ext->mode_context);
- }
-
- assert(this_mode == NEAREST_NEARESTMV);
- if (!build_cur_mv(mbmi->mv, this_mode, cm, x)) {
- return;
- }
-
- mbmi->filter_intra_mode_info.use_filter_intra = 0;
- mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
- mbmi->comp_group_idx = 0;
- mbmi->compound_idx = x->compound_idx;
- mbmi->interinter_comp.type = COMPOUND_AVERAGE;
- mbmi->motion_mode = SIMPLE_TRANSLATION;
- mbmi->ref_mv_idx = 0;
- mbmi->skip_mode = mbmi->skip = 1;
-
- set_default_interp_filters(mbmi, cm->interp_filter);
-
- set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
- for (int i = 0; i < num_planes; i++) {
- xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
- xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
- }
-
- BUFFER_SET orig_dst;
- for (int i = 0; i < num_planes; i++) {
- orig_dst.plane[i] = xd->plane[i].dst.buf;
- orig_dst.stride[i] = xd->plane[i].dst.stride;
- }
-
- // Obtain the rdcost for skip_mode.
- skip_mode_rd(&skip_mode_rd_stats, cpi, x, bsize, mi_row, mi_col, &orig_dst);
-
- // Compare the use of skip_mode with the best intra/inter mode obtained.
- const int skip_mode_ctx = av1_get_skip_mode_context(xd);
- const int64_t best_intra_inter_mode_cost =
- (rd_cost->dist < INT64_MAX && rd_cost->rate < INT32_MAX)
- ? RDCOST(x->rdmult,
- rd_cost->rate + x->skip_mode_cost[skip_mode_ctx][0],
- rd_cost->dist)
- : INT64_MAX;
-
- if (skip_mode_rd_stats.rdcost <= best_intra_inter_mode_cost) {
- assert(mode_index != -1);
- search_state->best_mbmode.skip_mode = 1;
- search_state->best_mbmode = *mbmi;
-
- search_state->best_mbmode.skip_mode = search_state->best_mbmode.skip = 1;
- search_state->best_mbmode.mode = NEAREST_NEARESTMV;
- search_state->best_mbmode.ref_frame[0] = mbmi->ref_frame[0];
- search_state->best_mbmode.ref_frame[1] = mbmi->ref_frame[1];
- search_state->best_mbmode.mv[0].as_int = mbmi->mv[0].as_int;
- search_state->best_mbmode.mv[1].as_int = mbmi->mv[1].as_int;
- search_state->best_mbmode.ref_mv_idx = 0;
-
- // Set up tx_size related variables for skip-specific loop filtering.
- search_state->best_mbmode.tx_size =
- block_signals_txsize(bsize) ? tx_size_from_tx_mode(bsize, cm->tx_mode)
- : max_txsize_rect_lookup[bsize];
- memset(search_state->best_mbmode.inter_tx_size,
- search_state->best_mbmode.tx_size,
- sizeof(search_state->best_mbmode.inter_tx_size));
- set_txfm_ctxs(search_state->best_mbmode.tx_size, xd->n4_w, xd->n4_h,
- search_state->best_mbmode.skip && is_inter_block(mbmi), xd);
-
- // Set up color-related variables for skip mode.
- search_state->best_mbmode.uv_mode = UV_DC_PRED;
- search_state->best_mbmode.palette_mode_info.palette_size[0] = 0;
- search_state->best_mbmode.palette_mode_info.palette_size[1] = 0;
-
- search_state->best_mbmode.comp_group_idx = 0;
- search_state->best_mbmode.compound_idx = x->compound_idx;
- search_state->best_mbmode.interinter_comp.type = COMPOUND_AVERAGE;
- search_state->best_mbmode.motion_mode = SIMPLE_TRANSLATION;
-
- search_state->best_mbmode.interintra_mode =
- (INTERINTRA_MODE)(II_DC_PRED - 1);
- search_state->best_mbmode.filter_intra_mode_info.use_filter_intra = 0;
-
- set_default_interp_filters(&search_state->best_mbmode, cm->interp_filter);
-
- search_state->best_mode_index = mode_index;
-
- // Update rd_cost
- rd_cost->rate = skip_mode_rd_stats.rate;
- rd_cost->dist = rd_cost->sse = skip_mode_rd_stats.dist;
- rd_cost->rdcost = skip_mode_rd_stats.rdcost;
-
- search_state->best_rd = rd_cost->rdcost;
- search_state->best_skip2 = 1;
- search_state->best_mode_skippable = (skip_mode_rd_stats.sse == 0);
-
- x->skip = 1;
- }
-}
-
-// speed feature: fast intra/inter transform type search
-// Used for speed >= 2
-// When this speed feature is on, in rd mode search, only DCT is used.
-// After the mode is determined, this function is called, to select
-// transform types and get accurate rdcost.
-static void sf_refine_fast_tx_type_search(
- const AV1_COMP *cpi, MACROBLOCK *x, int mi_row, int mi_col,
- RD_STATS *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
- int best_mode_index, MB_MODE_INFO *best_mbmode,
- struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE], int best_rate_y,
- int best_rate_uv, int *best_skip2) {
- const AV1_COMMON *const cm = &cpi->common;
- const SPEED_FEATURES *const sf = &cpi->sf;
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = xd->mi[0];
- const int num_planes = av1_num_planes(cm);
-
- if (xd->lossless[mbmi->segment_id] == 0 && best_mode_index >= 0 &&
- ((sf->tx_type_search.fast_inter_tx_type_search == 1 &&
- is_inter_mode(best_mbmode->mode)) ||
- (sf->tx_type_search.fast_intra_tx_type_search == 1 &&
- !is_inter_mode(best_mbmode->mode)))) {
- int skip_blk = 0;
- RD_STATS rd_stats_y, rd_stats_uv;
-
- x->use_default_inter_tx_type = 0;
- x->use_default_intra_tx_type = 0;
-
- *mbmi = *best_mbmode;
-
- set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
-
- // Select prediction reference frames.
- for (int i = 0; i < num_planes; i++) {
- xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
- if (has_second_ref(mbmi))
- xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
- }
-
- if (is_inter_mode(mbmi->mode)) {
- av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
- if (mbmi->motion_mode == OBMC_CAUSAL)
- av1_build_obmc_inter_predictors_sb(cm, xd, mi_row, mi_col);
-
- av1_subtract_plane(x, bsize, 0);
- if (cm->tx_mode == TX_MODE_SELECT && !xd->lossless[mbmi->segment_id]) {
- // av1_rd_pick_inter_mode_sb
- select_tx_type_yrd(cpi, x, &rd_stats_y, bsize, mi_row, mi_col,
- INT64_MAX);
- assert(rd_stats_y.rate != INT_MAX);
- } else {
- super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
- memset(mbmi->inter_tx_size, mbmi->tx_size, sizeof(mbmi->inter_tx_size));
- for (int i = 0; i < xd->n4_h * xd->n4_w; ++i)
- set_blk_skip(x, 0, i, rd_stats_y.skip);
- }
- if (num_planes > 1) {
- inter_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX, INT64_MAX,
- FTXS_NONE);
- } else {
- av1_init_rd_stats(&rd_stats_uv);
- }
- } else {
- super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
- if (num_planes > 1) {
- super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
- } else {
- av1_init_rd_stats(&rd_stats_uv);
- }
- }
-
- if (RDCOST(x->rdmult, rd_stats_y.rate + rd_stats_uv.rate,
- (rd_stats_y.dist + rd_stats_uv.dist)) >
- RDCOST(x->rdmult, 0, (rd_stats_y.sse + rd_stats_uv.sse))) {
- skip_blk = 1;
- rd_stats_y.rate = x->skip_cost[av1_get_skip_context(xd)][1];
- rd_stats_uv.rate = 0;
- rd_stats_y.dist = rd_stats_y.sse;
- rd_stats_uv.dist = rd_stats_uv.sse;
- } else {
- skip_blk = 0;
- rd_stats_y.rate += x->skip_cost[av1_get_skip_context(xd)][0];
- }
-
- if (RDCOST(x->rdmult, best_rate_y + best_rate_uv, rd_cost->dist) >
- RDCOST(x->rdmult, rd_stats_y.rate + rd_stats_uv.rate,
- (rd_stats_y.dist + rd_stats_uv.dist))) {
- best_mbmode->tx_size = mbmi->tx_size;
- av1_copy(best_mbmode->inter_tx_size, mbmi->inter_tx_size);
- memcpy(ctx->blk_skip, x->blk_skip,
- sizeof(x->blk_skip[0]) * ctx->num_4x4_blk);
- av1_copy(best_mbmode->txk_type, mbmi->txk_type);
- rd_cost->rate +=
- (rd_stats_y.rate + rd_stats_uv.rate - best_rate_y - best_rate_uv);
- rd_cost->dist = rd_stats_y.dist + rd_stats_uv.dist;
- rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
- *best_skip2 = skip_blk;
- }
- }
-}
-
-// Please add/modify parameter setting in this function, making it consistent
-// and easy to read and maintain.
-static void set_params_rd_pick_inter_mode(
- const AV1_COMP *cpi, MACROBLOCK *x, HandleInterModeArgs *args,
- BLOCK_SIZE bsize, int mi_row, int mi_col, uint16_t ref_frame_skip_mask[2],
- uint32_t mode_skip_mask[REF_FRAMES], int skip_ref_frame_mask,
- unsigned int ref_costs_single[REF_FRAMES],
- unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES],
- struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) {
- const AV1_COMMON *const cm = &cpi->common;
- const int num_planes = av1_num_planes(cm);
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = xd->mi[0];
- MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
- const struct segmentation *const seg = &cm->seg;
- const SPEED_FEATURES *const sf = &cpi->sf;
- unsigned char segment_id = mbmi->segment_id;
- int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
- int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
- MAX_SB_SIZE >> 1 };
- int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
- MAX_SB_SIZE >> 1 };
- int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
-
- for (int i = 0; i < MB_MODE_COUNT; ++i)
- for (int k = 0; k < REF_FRAMES; ++k) args->single_filter[i][k] = SWITCHABLE;
-
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- int len = sizeof(uint16_t);
- args->above_pred_buf[0] = CONVERT_TO_BYTEPTR(x->above_pred_buf);
- args->above_pred_buf[1] =
- CONVERT_TO_BYTEPTR(x->above_pred_buf + (MAX_SB_SQUARE >> 1) * len);
- args->above_pred_buf[2] =
- CONVERT_TO_BYTEPTR(x->above_pred_buf + MAX_SB_SQUARE * len);
- args->left_pred_buf[0] = CONVERT_TO_BYTEPTR(x->left_pred_buf);
- args->left_pred_buf[1] =
- CONVERT_TO_BYTEPTR(x->left_pred_buf + (MAX_SB_SQUARE >> 1) * len);
- args->left_pred_buf[2] =
- CONVERT_TO_BYTEPTR(x->left_pred_buf + MAX_SB_SQUARE * len);
- } else {
- args->above_pred_buf[0] = x->above_pred_buf;
- args->above_pred_buf[1] = x->above_pred_buf + (MAX_SB_SQUARE >> 1);
- args->above_pred_buf[2] = x->above_pred_buf + MAX_SB_SQUARE;
- args->left_pred_buf[0] = x->left_pred_buf;
- args->left_pred_buf[1] = x->left_pred_buf + (MAX_SB_SQUARE >> 1);
- args->left_pred_buf[2] = x->left_pred_buf + MAX_SB_SQUARE;
- }
-
- av1_collect_neighbors_ref_counts(xd);
-
- estimate_ref_frame_costs(cm, xd, x, segment_id, ref_costs_single,
- ref_costs_comp);
-
- MV_REFERENCE_FRAME ref_frame;
- for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
- x->pred_mv_sad[ref_frame] = INT_MAX;
- x->mbmi_ext->mode_context[ref_frame] = 0;
- x->mbmi_ext->compound_mode_context[ref_frame] = 0;
- mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX;
- if (cpi->ref_frame_flags & ref_frame_flag_list[ref_frame]) {
- if (mbmi->partition != PARTITION_NONE &&
- mbmi->partition != PARTITION_SPLIT) {
- if (skip_ref_frame_mask & (1 << ref_frame)) {
- int skip = 1;
- for (int r = ALTREF_FRAME + 1; r < MODE_CTX_REF_FRAMES; ++r) {
- if (!(skip_ref_frame_mask & (1 << r))) {
- const MV_REFERENCE_FRAME *rf = ref_frame_map[r - REF_FRAMES];
- if (rf[0] == ref_frame || rf[1] == ref_frame) {
- skip = 0;
- break;
- }
- }
- }
- if (skip) continue;
- }
- }
- assert(get_ref_frame_buffer(cpi, ref_frame) != NULL);
- setup_buffer_ref_mvs_inter(cpi, x, ref_frame, bsize, mi_row, mi_col,
- yv12_mb);
- }
- }
- // ref_frame = ALTREF_FRAME
- for (; ref_frame < MODE_CTX_REF_FRAMES; ++ref_frame) {
- x->mbmi_ext->mode_context[ref_frame] = 0;
- mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX;
- const MV_REFERENCE_FRAME *rf = ref_frame_map[ref_frame - REF_FRAMES];
- if (!((cpi->ref_frame_flags & ref_frame_flag_list[rf[0]]) &&
- (cpi->ref_frame_flags & ref_frame_flag_list[rf[1]]))) {
- continue;
- }
-
- if (mbmi->partition != PARTITION_NONE &&
- mbmi->partition != PARTITION_SPLIT) {
- if (skip_ref_frame_mask & (1 << ref_frame)) {
- continue;
- }
- }
- av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
- mbmi_ext->ref_mv_stack, NULL, mbmi_ext->global_mvs, mi_row,
- mi_col, mbmi_ext->mode_context);
- }
-
- av1_count_overlappable_neighbors(cm, xd, mi_row, mi_col);
-
- if (check_num_overlappable_neighbors(mbmi) &&
- is_motion_variation_allowed_bsize(bsize)) {
- av1_build_prediction_by_above_preds(cm, xd, mi_row, mi_col,
- args->above_pred_buf, dst_width1,
- dst_height1, args->above_pred_stride);
- av1_build_prediction_by_left_preds(cm, xd, mi_row, mi_col,
- args->left_pred_buf, dst_width2,
- dst_height2, args->left_pred_stride);
- av1_setup_dst_planes(xd->plane, bsize, get_frame_new_buffer(cm), mi_row,
- mi_col, 0, num_planes);
- calc_target_weighted_pred(
- cm, x, xd, mi_row, mi_col, args->above_pred_buf[0],
- args->above_pred_stride[0], args->left_pred_buf[0],
- args->left_pred_stride[0]);
- }
-
- int min_pred_mv_sad = INT_MAX;
- for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame)
- min_pred_mv_sad = AOMMIN(min_pred_mv_sad, x->pred_mv_sad[ref_frame]);
-
- for (int i = 0; i < 2; ++i) {
- ref_frame_skip_mask[i] = 0;
- }
- memset(mode_skip_mask, 0, REF_FRAMES * sizeof(*mode_skip_mask));
- for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
- if (!(cpi->ref_frame_flags & ref_frame_flag_list[ref_frame])) {
- // Skip checking missing references in both single and compound reference
- // modes. Note that a mode will be skipped iff both reference frames
- // are masked out.
- ref_frame_skip_mask[0] |= (1 << ref_frame);
- ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
- } else {
- // Skip fixed mv modes for poor references
- if ((x->pred_mv_sad[ref_frame] >> 2) > min_pred_mv_sad) {
- mode_skip_mask[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
- }
- }
- // If the segment reference frame feature is enabled....
- // then do nothing if the current ref frame is not allowed..
- if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
- get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
- ref_frame_skip_mask[0] |= (1 << ref_frame);
- ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
- }
- }
-
- // Disable this drop out case if the ref frame
- // segment level feature is enabled for this segment. This is to
- // prevent the possibility that we end up unable to pick any mode.
- if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
- // Only consider GLOBALMV/ALTREF_FRAME for alt ref frame,
- // unless ARNR filtering is enabled in which case we want
- // an unfiltered alternative. We allow near/nearest as well
- // because they may result in zero-zero MVs but be cheaper.
- if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
- ref_frame_skip_mask[0] = (1 << LAST_FRAME) | (1 << LAST2_FRAME) |
- (1 << LAST3_FRAME) | (1 << BWDREF_FRAME) |
- (1 << ALTREF2_FRAME) | (1 << GOLDEN_FRAME);
- ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
- // TODO(zoeliu): To further explore whether following needs to be done for
- // BWDREF_FRAME as well.
- mode_skip_mask[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO;
- const MV_REFERENCE_FRAME tmp_ref_frames[2] = { ALTREF_FRAME, NONE_FRAME };
- int_mv near_mv, nearest_mv, global_mv;
- get_this_mv(&nearest_mv, NEARESTMV, 0, 0, tmp_ref_frames, x->mbmi_ext);
- get_this_mv(&near_mv, NEARMV, 0, 0, tmp_ref_frames, x->mbmi_ext);
- get_this_mv(&global_mv, GLOBALMV, 0, 0, tmp_ref_frames, x->mbmi_ext);
-
- if (near_mv.as_int != global_mv.as_int)
- mode_skip_mask[ALTREF_FRAME] |= (1 << NEARMV);
- if (nearest_mv.as_int != global_mv.as_int)
- mode_skip_mask[ALTREF_FRAME] |= (1 << NEARESTMV);
- }
- }
-
- if (cpi->rc.is_src_frame_alt_ref) {
- if (sf->alt_ref_search_fp) {
- assert(cpi->ref_frame_flags & ref_frame_flag_list[ALTREF_FRAME]);
- mode_skip_mask[ALTREF_FRAME] = 0;
- ref_frame_skip_mask[0] = ~(1 << ALTREF_FRAME);
- ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
- }
- }
-
- if (sf->alt_ref_search_fp)
- if (!cm->show_frame && x->pred_mv_sad[GOLDEN_FRAME] < INT_MAX)
- if (x->pred_mv_sad[ALTREF_FRAME] > (x->pred_mv_sad[GOLDEN_FRAME] << 1))
- mode_skip_mask[ALTREF_FRAME] |= INTER_ALL;
-
- if (sf->adaptive_mode_search) {
- if (cm->show_frame && !cpi->rc.is_src_frame_alt_ref &&
- cpi->rc.frames_since_golden >= 3)
- if ((x->pred_mv_sad[GOLDEN_FRAME] >> 1) > x->pred_mv_sad[LAST_FRAME])
- mode_skip_mask[GOLDEN_FRAME] |= INTER_ALL;
- }
-
- if (bsize > sf->max_intra_bsize) {
- ref_frame_skip_mask[0] |= (1 << INTRA_FRAME);
- ref_frame_skip_mask[1] |= (1 << INTRA_FRAME);
- }
-
- mode_skip_mask[INTRA_FRAME] |=
- ~(sf->intra_y_mode_mask[max_txsize_lookup[bsize]]);
-
- if (cpi->sf.tx_type_search.fast_intra_tx_type_search)
- x->use_default_intra_tx_type = 1;
- else
- x->use_default_intra_tx_type = 0;
-
- if (cpi->sf.tx_type_search.fast_inter_tx_type_search)
- x->use_default_inter_tx_type = 1;
- else
- x->use_default_inter_tx_type = 0;
- if (cpi->sf.skip_repeat_interpolation_filter_search) {
- x->interp_filter_stats_idx[0] = 0;
- x->interp_filter_stats_idx[1] = 0;
- }
-}
-
-static void search_palette_mode(const AV1_COMP *cpi, MACROBLOCK *x, int mi_row,
- int mi_col, RD_STATS *rd_cost,
- PICK_MODE_CONTEXT *ctx, BLOCK_SIZE bsize,
- MB_MODE_INFO *const mbmi,
- PALETTE_MODE_INFO *const pmi,
- unsigned int *ref_costs_single,
- InterModeSearchState *search_state) {
- const AV1_COMMON *const cm = &cpi->common;
- const int num_planes = av1_num_planes(cm);
- MACROBLOCKD *const xd = &x->e_mbd;
- int rate2 = 0;
- int64_t distortion2 = 0, best_rd_palette = search_state->best_rd, this_rd,
- best_model_rd_palette = INT64_MAX;
- int skippable = 0, rate_overhead_palette = 0;
- RD_STATS rd_stats_y;
- TX_SIZE uv_tx = TX_4X4;
- uint8_t *const best_palette_color_map =
- x->palette_buffer->best_palette_color_map;
- uint8_t *const color_map = xd->plane[0].color_index_map;
- MB_MODE_INFO best_mbmi_palette = *mbmi;
- uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
- const int *const intra_mode_cost = x->mbmode_cost[size_group_lookup[bsize]];
- const int rows = block_size_high[bsize];
- const int cols = block_size_wide[bsize];
-
- mbmi->mode = DC_PRED;
- mbmi->uv_mode = UV_DC_PRED;
- mbmi->ref_frame[0] = INTRA_FRAME;
- mbmi->ref_frame[1] = NONE_FRAME;
- rate_overhead_palette = rd_pick_palette_intra_sby(
- cpi, x, bsize, mi_row, mi_col, intra_mode_cost[DC_PRED],
- &best_mbmi_palette, best_palette_color_map, &best_rd_palette,
- &best_model_rd_palette, NULL, NULL, NULL, NULL, ctx, best_blk_skip);
- if (pmi->palette_size[0] == 0) return;
-
- memcpy(x->blk_skip, best_blk_skip,
- sizeof(best_blk_skip[0]) * bsize_to_num_blk(bsize));
-
- memcpy(color_map, best_palette_color_map,
- rows * cols * sizeof(best_palette_color_map[0]));
- super_block_yrd(cpi, x, &rd_stats_y, bsize, search_state->best_rd);
- if (rd_stats_y.rate == INT_MAX) return;
-
- skippable = rd_stats_y.skip;
- distortion2 = rd_stats_y.dist;
- rate2 = rd_stats_y.rate + rate_overhead_palette;
- rate2 += ref_costs_single[INTRA_FRAME];
- if (num_planes > 1) {
- uv_tx = av1_get_tx_size(AOM_PLANE_U, xd);
- if (search_state->rate_uv_intra[uv_tx] == INT_MAX) {
- choose_intra_uv_mode(
- cpi, x, bsize, uv_tx, &search_state->rate_uv_intra[uv_tx],
- &search_state->rate_uv_tokenonly[uv_tx],
- &search_state->dist_uvs[uv_tx], &search_state->skip_uvs[uv_tx],
- &search_state->mode_uv[uv_tx]);
- search_state->pmi_uv[uv_tx] = *pmi;
- search_state->uv_angle_delta[uv_tx] = mbmi->angle_delta[PLANE_TYPE_UV];
- }
- mbmi->uv_mode = search_state->mode_uv[uv_tx];
- pmi->palette_size[1] = search_state->pmi_uv[uv_tx].palette_size[1];
- if (pmi->palette_size[1] > 0) {
- memcpy(pmi->palette_colors + PALETTE_MAX_SIZE,
- search_state->pmi_uv[uv_tx].palette_colors + PALETTE_MAX_SIZE,
- 2 * PALETTE_MAX_SIZE * sizeof(pmi->palette_colors[0]));
- }
- mbmi->angle_delta[PLANE_TYPE_UV] = search_state->uv_angle_delta[uv_tx];
- skippable = skippable && search_state->skip_uvs[uv_tx];
- distortion2 += search_state->dist_uvs[uv_tx];
- rate2 += search_state->rate_uv_intra[uv_tx];
- }
-
- if (skippable) {
- rate2 -= rd_stats_y.rate;
- if (num_planes > 1) rate2 -= search_state->rate_uv_tokenonly[uv_tx];
- rate2 += x->skip_cost[av1_get_skip_context(xd)][1];
- } else {
- rate2 += x->skip_cost[av1_get_skip_context(xd)][0];
- }
- this_rd = RDCOST(x->rdmult, rate2, distortion2);
- if (this_rd < search_state->best_rd) {
- search_state->best_mode_index = 3;
- mbmi->mv[0].as_int = 0;
- rd_cost->rate = rate2;
- rd_cost->dist = distortion2;
- rd_cost->rdcost = this_rd;
- search_state->best_rd = this_rd;
- search_state->best_mbmode = *mbmi;
- search_state->best_skip2 = 0;
- search_state->best_mode_skippable = skippable;
- memcpy(ctx->blk_skip, x->blk_skip,
- sizeof(x->blk_skip[0]) * ctx->num_4x4_blk);
- }
-}
-
-static void init_inter_mode_search_state(InterModeSearchState *search_state,
- const AV1_COMP *cpi,
- const TileDataEnc *tile_data,
- const MACROBLOCK *x, BLOCK_SIZE bsize,
- int64_t best_rd_so_far) {
- search_state->best_rd = best_rd_so_far;
-
- av1_zero(search_state->best_mbmode);
-
- search_state->best_rate_y = INT_MAX;
-
- search_state->best_rate_uv = INT_MAX;
-
- search_state->best_mode_skippable = 0;
-
- search_state->best_skip2 = 0;
-
- search_state->best_mode_index = -1;
-
- const MACROBLOCKD *const xd = &x->e_mbd;
- const MB_MODE_INFO *const mbmi = xd->mi[0];
- const unsigned char segment_id = mbmi->segment_id;
-
- search_state->skip_intra_modes = 0;
-
- search_state->num_available_refs = 0;
- memset(search_state->dist_refs, -1, sizeof(search_state->dist_refs));
- memset(search_state->dist_order_refs, -1,
- sizeof(search_state->dist_order_refs));
-
- for (int i = 0; i <= LAST_NEW_MV_INDEX; ++i)
- search_state->mode_threshold[i] = 0;
- const int *const rd_threshes = cpi->rd.threshes[segment_id][bsize];
- for (int i = LAST_NEW_MV_INDEX + 1; i < MAX_MODES; ++i)
- search_state->mode_threshold[i] =
- ((int64_t)rd_threshes[i] * tile_data->thresh_freq_fact[bsize][i]) >> 5;
-
- search_state->best_intra_mode = DC_PRED;
- search_state->best_intra_rd = INT64_MAX;
-
- search_state->angle_stats_ready = 0;
-
- search_state->best_pred_sse = UINT_MAX;
-
- for (int i = 0; i < TX_SIZES_ALL; i++)
- search_state->rate_uv_intra[i] = INT_MAX;
-
- av1_zero(search_state->pmi_uv);
-
- for (int i = 0; i < REFERENCE_MODES; ++i)
- search_state->best_pred_rd[i] = INT64_MAX;
-
- av1_zero(search_state->single_newmv);
- av1_zero(search_state->single_newmv_rate);
- av1_zero(search_state->single_newmv_valid);
- for (int i = 0; i < MB_MODE_COUNT; ++i) {
- for (int j = 0; j < MAX_REF_MV_SERCH; ++j) {
- for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
- search_state->modelled_rd[i][j][ref_frame] = INT64_MAX;
- search_state->simple_rd[i][j][ref_frame] = INT64_MAX;
- }
- }
- }
-
- for (int dir = 0; dir < 2; ++dir) {
- for (int mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
- for (int ref_frame = 0; ref_frame < FWD_REFS; ++ref_frame) {
- SingleInterModeState *state;
-
- state = &search_state->single_state[dir][mode][ref_frame];
- state->ref_frame = NONE_FRAME;
- state->rd = INT64_MAX;
-
- state = &search_state->single_state_modelled[dir][mode][ref_frame];
- state->ref_frame = NONE_FRAME;
- state->rd = INT64_MAX;
- }
- }
- }
- for (int dir = 0; dir < 2; ++dir) {
- for (int mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
- for (int ref_frame = 0; ref_frame < FWD_REFS; ++ref_frame) {
- search_state->single_rd_order[dir][mode][ref_frame] = NONE_FRAME;
- }
- }
- }
- av1_zero(search_state->single_state_cnt);
- av1_zero(search_state->single_state_modelled_cnt);
-}
-
-// Case 1: return 0, means don't skip this mode
-// Case 2: return 1, means skip this mode completely
-// Case 3: return 2, means skip compound only, but still try single motion modes
-static int inter_mode_search_order_independent_skip(
- const AV1_COMP *cpi, const PICK_MODE_CONTEXT *ctx, const MACROBLOCK *x,
- BLOCK_SIZE bsize, int mode_index, int mi_row, int mi_col,
- uint32_t *mode_skip_mask, uint16_t *ref_frame_skip_mask,
- InterModeSearchState *search_state) {
- const SPEED_FEATURES *const sf = &cpi->sf;
- const AV1_COMMON *const cm = &cpi->common;
- const struct segmentation *const seg = &cm->seg;
- const MACROBLOCKD *const xd = &x->e_mbd;
- const MB_MODE_INFO *const mbmi = xd->mi[0];
- const unsigned char segment_id = mbmi->segment_id;
- const MV_REFERENCE_FRAME *ref_frame = av1_mode_order[mode_index].ref_frame;
- const PREDICTION_MODE this_mode = av1_mode_order[mode_index].mode;
- int skip_motion_mode = 0;
- if (mbmi->partition != PARTITION_NONE && mbmi->partition != PARTITION_SPLIT) {
- const int ref_type = av1_ref_frame_type(ref_frame);
- int skip_ref = ctx->skip_ref_frame_mask & (1 << ref_type);
- if (ref_type <= ALTREF_FRAME && skip_ref) {
- // Since the compound ref modes depends on the motion estimation result of
- // two single ref modes( best mv of single ref modes as the start point )
- // If current single ref mode is marked skip, we need to check if it will
- // be used in compound ref modes.
- for (int r = ALTREF_FRAME + 1; r < MODE_CTX_REF_FRAMES; ++r) {
- if (!(ctx->skip_ref_frame_mask & (1 << r))) {
- const MV_REFERENCE_FRAME *rf = ref_frame_map[r - REF_FRAMES];
- if (rf[0] == ref_type || rf[1] == ref_type) {
- // Found a not skipped compound ref mode which contains current
- // single ref. So this single ref can't be skipped completly
- // Just skip it's motion mode search, still try it's simple
- // transition mode.
- skip_motion_mode = 1;
- skip_ref = 0;
- break;
- }
- }
- }
- }
- if (skip_ref) return 1;
- }
-
- if (cpi->sf.mode_pruning_based_on_two_pass_partition_search &&
- !x->cb_partition_scan) {
- const int mi_width = mi_size_wide[bsize];
- const int mi_height = mi_size_high[bsize];
- int found = 0;
- // Search in the stats table to see if the ref frames have been used in the
- // first pass of partition search.
- for (int row = mi_row; row < mi_row + mi_width && !found;
- row += FIRST_PARTITION_PASS_SAMPLE_REGION) {
- for (int col = mi_col; col < mi_col + mi_height && !found;
- col += FIRST_PARTITION_PASS_SAMPLE_REGION) {
- const int index = av1_first_partition_pass_stats_index(row, col);
- const FIRST_PARTITION_PASS_STATS *const stats =
- &x->first_partition_pass_stats[index];
- if (stats->ref0_counts[ref_frame[0]] &&
- (ref_frame[1] < 0 || stats->ref1_counts[ref_frame[1]])) {
- found = 1;
- break;
- }
- }
- }
- if (!found) return 1;
- }
-
- if (ref_frame[0] > INTRA_FRAME && ref_frame[1] == INTRA_FRAME) {
- // Mode must by compatible
- if (!is_interintra_allowed_mode(this_mode)) return 1;
- if (!is_interintra_allowed_bsize(bsize)) return 1;
- }
-
- // This is only used in motion vector unit test.
- if (cpi->oxcf.motion_vector_unit_test && ref_frame[0] == INTRA_FRAME)
- return 1;
-
- if (ref_frame[0] == INTRA_FRAME) {
- if (this_mode != DC_PRED) {
- // Disable intra modes other than DC_PRED for blocks with low variance
- // Threshold for intra skipping based on source variance
- // TODO(debargha): Specialize the threshold for super block sizes
- const unsigned int skip_intra_var_thresh = 64;
- if ((sf->mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
- x->source_variance < skip_intra_var_thresh)
- return 1;
- }
- } else {
- if (!is_comp_ref_allowed(bsize) && ref_frame[1] > INTRA_FRAME) return 1;
- }
-
- const int comp_pred = ref_frame[1] > INTRA_FRAME;
- if (comp_pred) {
- if (!cpi->allow_comp_inter_inter) return 1;
-
- if (cm->reference_mode == SINGLE_REFERENCE) return 1;
-
- // Skip compound inter modes if ARF is not available.
- if (!(cpi->ref_frame_flags & ref_frame_flag_list[ref_frame[1]])) return 1;
-
- // Do not allow compound prediction if the segment level reference frame
- // feature is in use as in this case there can only be one reference.
- if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) return 1;
- }
-
- if (sf->selective_ref_frame) {
- if (sf->selective_ref_frame >= 2 || x->cb_partition_scan) {
- if (ref_frame[0] == ALTREF2_FRAME || ref_frame[1] == ALTREF2_FRAME)
- if (get_relative_dist(
- cm, cm->cur_frame->ref_frame_offset[ALTREF2_FRAME - LAST_FRAME],
- cm->frame_offset) < 0)
- return 1;
- if (ref_frame[0] == BWDREF_FRAME || ref_frame[1] == BWDREF_FRAME)
- if (get_relative_dist(
- cm, cm->cur_frame->ref_frame_offset[BWDREF_FRAME - LAST_FRAME],
- cm->frame_offset) < 0)
- return 1;
- }
- if (ref_frame[0] == LAST3_FRAME || ref_frame[1] == LAST3_FRAME)
- if (get_relative_dist(
- cm, cm->cur_frame->ref_frame_offset[LAST3_FRAME - LAST_FRAME],
- cm->cur_frame->ref_frame_offset[GOLDEN_FRAME - LAST_FRAME]) <= 0)
- return 1;
- if (ref_frame[0] == LAST2_FRAME || ref_frame[1] == LAST2_FRAME)
- if (get_relative_dist(
- cm, cm->cur_frame->ref_frame_offset[LAST2_FRAME - LAST_FRAME],
- cm->cur_frame->ref_frame_offset[GOLDEN_FRAME - LAST_FRAME]) <= 0)
- return 1;
- }
-
- // One-sided compound is used only when all reference frames are one-sided.
- if (sf->selective_ref_frame && comp_pred && !cpi->all_one_sided_refs) {
- unsigned int ref_offsets[2];
- for (int i = 0; i < 2; ++i) {
- const int buf_idx = cm->frame_refs[ref_frame[i] - LAST_FRAME].idx;
- assert(buf_idx >= 0);
- ref_offsets[i] = cm->buffer_pool->frame_bufs[buf_idx].cur_frame_offset;
- }
- if ((get_relative_dist(cm, ref_offsets[0], cm->frame_offset) <= 0 &&
- get_relative_dist(cm, ref_offsets[1], cm->frame_offset) <= 0) ||
- (get_relative_dist(cm, ref_offsets[0], cm->frame_offset) > 0 &&
- get_relative_dist(cm, ref_offsets[1], cm->frame_offset) > 0))
- return 1;
- }
-
- if (mode_skip_mask[ref_frame[0]] & (1 << this_mode)) {
- return 1;
- }
-
- if ((ref_frame_skip_mask[0] & (1 << ref_frame[0])) &&
- (ref_frame_skip_mask[1] & (1 << AOMMAX(0, ref_frame[1])))) {
- return 1;
- }
-
- if (skip_repeated_mv(cm, x, this_mode, ref_frame, search_state)) {
- return 1;
- }
- if (skip_motion_mode) {
- return 2;
- }
- return 0;
-}
-
-static INLINE void init_mbmi(MB_MODE_INFO *mbmi, int mode_index,
- const AV1_COMMON *cm) {
- PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
- PREDICTION_MODE this_mode = av1_mode_order[mode_index].mode;
- mbmi->ref_mv_idx = 0;
- mbmi->mode = this_mode;
- mbmi->uv_mode = UV_DC_PRED;
- mbmi->ref_frame[0] = av1_mode_order[mode_index].ref_frame[0];
- mbmi->ref_frame[1] = av1_mode_order[mode_index].ref_frame[1];
- pmi->palette_size[0] = 0;
- pmi->palette_size[1] = 0;
- mbmi->filter_intra_mode_info.use_filter_intra = 0;
- mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
- mbmi->motion_mode = SIMPLE_TRANSLATION;
- mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
- set_default_interp_filters(mbmi, cm->interp_filter);
-}
-
-static int64_t handle_intra_mode(InterModeSearchState *search_state,
- const AV1_COMP *cpi, MACROBLOCK *x,
- BLOCK_SIZE bsize, int mi_row, int mi_col,
- int ref_frame_cost,
- const PICK_MODE_CONTEXT *ctx, int disable_skip,
- RD_STATS *rd_stats, RD_STATS *rd_stats_y,
- RD_STATS *rd_stats_uv) {
- const AV1_COMMON *cm = &cpi->common;
- const SPEED_FEATURES *const sf = &cpi->sf;
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = xd->mi[0];
- assert(mbmi->ref_frame[0] == INTRA_FRAME);
- PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
- const int try_palette =
- av1_allow_palette(cm->allow_screen_content_tools, mbmi->sb_type);
- const int *const intra_mode_cost = x->mbmode_cost[size_group_lookup[bsize]];
- const int intra_cost_penalty = av1_get_intra_cost_penalty(
- cm->base_qindex, cm->y_dc_delta_q, cm->seq_params.bit_depth);
- const int rows = block_size_high[bsize];
- const int cols = block_size_wide[bsize];
- const int num_planes = av1_num_planes(cm);
- const int skip_ctx = av1_get_skip_context(xd);
-
- int known_rate = intra_mode_cost[mbmi->mode];
- known_rate += ref_frame_cost;
- if (mbmi->mode != DC_PRED && mbmi->mode != PAETH_PRED)
- known_rate += intra_cost_penalty;
- known_rate += AOMMIN(x->skip_cost[skip_ctx][0], x->skip_cost[skip_ctx][1]);
- const int64_t known_rd = RDCOST(x->rdmult, known_rate, 0);
- if (known_rd > search_state->best_rd) {
- search_state->skip_intra_modes = 1;
- return INT64_MAX;
- }
-
- TX_SIZE uv_tx;
- int is_directional_mode = av1_is_directional_mode(mbmi->mode);
- if (is_directional_mode && av1_use_angle_delta(bsize)) {
- int rate_dummy;
- int64_t model_rd = INT64_MAX;
- if (!search_state->angle_stats_ready) {
- const int src_stride = x->plane[0].src.stride;
- const uint8_t *src = x->plane[0].src.buf;
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
- highbd_angle_estimation(src, src_stride, rows, cols, bsize,
- search_state->directional_mode_skip_mask);
- else
- angle_estimation(src, src_stride, rows, cols, bsize,
- search_state->directional_mode_skip_mask);
- search_state->angle_stats_ready = 1;
- }
- if (search_state->directional_mode_skip_mask[mbmi->mode]) return INT64_MAX;
- av1_init_rd_stats(rd_stats_y);
- rd_stats_y->rate = INT_MAX;
- rd_pick_intra_angle_sby(cpi, x, mi_row, mi_col, &rate_dummy, rd_stats_y,
- bsize, intra_mode_cost[mbmi->mode],
- search_state->best_rd, &model_rd);
- } else {
- av1_init_rd_stats(rd_stats_y);
- mbmi->angle_delta[PLANE_TYPE_Y] = 0;
- super_block_yrd(cpi, x, rd_stats_y, bsize, search_state->best_rd);
- }
- uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
- memcpy(best_blk_skip, x->blk_skip,
- sizeof(best_blk_skip[0]) * ctx->num_4x4_blk);
- int try_filter_intra = 0;
- int64_t best_rd_tmp = INT64_MAX;
- if (mbmi->mode == DC_PRED && av1_filter_intra_allowed_bsize(cm, bsize)) {
- if (rd_stats_y->rate != INT_MAX) {
- const int tmp_rate = rd_stats_y->rate + x->filter_intra_cost[bsize][0] +
- intra_mode_cost[mbmi->mode];
- best_rd_tmp = RDCOST(x->rdmult, tmp_rate, rd_stats_y->dist);
- try_filter_intra = !((best_rd_tmp / 2) > search_state->best_rd);
- } else {
- try_filter_intra = !(search_state->best_mbmode.skip);
- }
- }
- if (try_filter_intra) {
- RD_STATS rd_stats_y_fi;
- int filter_intra_selected_flag = 0;
- TX_SIZE best_tx_size = mbmi->tx_size;
- TX_TYPE best_txk_type[TXK_TYPE_BUF_LEN];
- memcpy(best_txk_type, mbmi->txk_type,
- sizeof(*best_txk_type) * TXK_TYPE_BUF_LEN);
- FILTER_INTRA_MODE best_fi_mode = FILTER_DC_PRED;
-
- mbmi->filter_intra_mode_info.use_filter_intra = 1;
- for (FILTER_INTRA_MODE fi_mode = FILTER_DC_PRED;
- fi_mode < FILTER_INTRA_MODES; ++fi_mode) {
- int64_t this_rd_tmp;
- mbmi->filter_intra_mode_info.filter_intra_mode = fi_mode;
- super_block_yrd(cpi, x, &rd_stats_y_fi, bsize, search_state->best_rd);
- if (rd_stats_y_fi.rate == INT_MAX) {
- continue;
- }
- const int this_rate_tmp =
- rd_stats_y_fi.rate +
- intra_mode_info_cost_y(cpi, x, mbmi, bsize,
- intra_mode_cost[mbmi->mode]);
- this_rd_tmp = RDCOST(x->rdmult, this_rate_tmp, rd_stats_y_fi.dist);
-
- if (this_rd_tmp != INT64_MAX && this_rd_tmp / 2 > search_state->best_rd) {
- break;
- }
- if (this_rd_tmp < best_rd_tmp) {
- best_tx_size = mbmi->tx_size;
- memcpy(best_txk_type, mbmi->txk_type,
- sizeof(*best_txk_type) * TXK_TYPE_BUF_LEN);
- memcpy(best_blk_skip, x->blk_skip,
- sizeof(best_blk_skip[0]) * ctx->num_4x4_blk);
- best_fi_mode = fi_mode;
- *rd_stats_y = rd_stats_y_fi;
- filter_intra_selected_flag = 1;
- best_rd_tmp = this_rd_tmp;
- }
- }
-
- mbmi->tx_size = best_tx_size;
- memcpy(mbmi->txk_type, best_txk_type,
- sizeof(*best_txk_type) * TXK_TYPE_BUF_LEN);
- memcpy(x->blk_skip, best_blk_skip,
- sizeof(x->blk_skip[0]) * ctx->num_4x4_blk);
-
- if (filter_intra_selected_flag) {
- mbmi->filter_intra_mode_info.use_filter_intra = 1;
- mbmi->filter_intra_mode_info.filter_intra_mode = best_fi_mode;
- } else {
- mbmi->filter_intra_mode_info.use_filter_intra = 0;
- }
- }
- if (rd_stats_y->rate == INT_MAX) return INT64_MAX;
- const int mode_cost_y =
- intra_mode_info_cost_y(cpi, x, mbmi, bsize, intra_mode_cost[mbmi->mode]);
- av1_init_rd_stats(rd_stats);
- av1_init_rd_stats(rd_stats_uv);
- if (num_planes > 1) {
- uv_tx = av1_get_tx_size(AOM_PLANE_U, xd);
- if (search_state->rate_uv_intra[uv_tx] == INT_MAX) {
- int rate_y =
- rd_stats_y->skip ? x->skip_cost[skip_ctx][1] : rd_stats_y->rate;
- const int64_t rdy =
- RDCOST(x->rdmult, rate_y + mode_cost_y, rd_stats_y->dist);
- if (search_state->best_rd < (INT64_MAX / 2) &&
- rdy > (search_state->best_rd + (search_state->best_rd >> 2))) {
- search_state->skip_intra_modes = 1;
- return INT64_MAX;
- }
- choose_intra_uv_mode(
- cpi, x, bsize, uv_tx, &search_state->rate_uv_intra[uv_tx],
- &search_state->rate_uv_tokenonly[uv_tx],
- &search_state->dist_uvs[uv_tx], &search_state->skip_uvs[uv_tx],
- &search_state->mode_uv[uv_tx]);
- if (try_palette) search_state->pmi_uv[uv_tx] = *pmi;
- search_state->uv_angle_delta[uv_tx] = mbmi->angle_delta[PLANE_TYPE_UV];
-
- const int uv_rate = search_state->rate_uv_tokenonly[uv_tx];
- const int64_t uv_dist = search_state->dist_uvs[uv_tx];
- const int64_t uv_rd = RDCOST(x->rdmult, uv_rate, uv_dist);
- if (uv_rd > search_state->best_rd) {
- search_state->skip_intra_modes = 1;
- return INT64_MAX;
- }
- }
-
- rd_stats_uv->rate = search_state->rate_uv_tokenonly[uv_tx];
- rd_stats_uv->dist = search_state->dist_uvs[uv_tx];
- rd_stats_uv->skip = search_state->skip_uvs[uv_tx];
- rd_stats->skip = rd_stats_y->skip && rd_stats_uv->skip;
- mbmi->uv_mode = search_state->mode_uv[uv_tx];
- if (try_palette) {
- pmi->palette_size[1] = search_state->pmi_uv[uv_tx].palette_size[1];
- memcpy(pmi->palette_colors + PALETTE_MAX_SIZE,
- search_state->pmi_uv[uv_tx].palette_colors + PALETTE_MAX_SIZE,
- 2 * PALETTE_MAX_SIZE * sizeof(pmi->palette_colors[0]));
- }
- mbmi->angle_delta[PLANE_TYPE_UV] = search_state->uv_angle_delta[uv_tx];
- }
- rd_stats->rate = rd_stats_y->rate + mode_cost_y;
- if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(bsize)) {
- // super_block_yrd above includes the cost of the tx_size in the
- // tokenonly rate, but for intra blocks, tx_size is always coded
- // (prediction granularity), so we account for it in the full rate,
- // not the tokenonly rate.
- rd_stats_y->rate -= tx_size_cost(cm, x, bsize, mbmi->tx_size);
- }
- if (num_planes > 1 && !x->skip_chroma_rd) {
- const int uv_mode_cost =
- x->intra_uv_mode_cost[is_cfl_allowed(xd)][mbmi->mode][mbmi->uv_mode];
- rd_stats->rate +=
- rd_stats_uv->rate +
- intra_mode_info_cost_uv(cpi, x, mbmi, bsize, uv_mode_cost);
- }
- if (mbmi->mode != DC_PRED && mbmi->mode != PAETH_PRED)
- rd_stats->rate += intra_cost_penalty;
- rd_stats->dist = rd_stats_y->dist + rd_stats_uv->dist;
-
- // Estimate the reference frame signaling cost and add it
- // to the rolling cost variable.
- rd_stats->rate += ref_frame_cost;
- if (rd_stats->skip) {
- // Back out the coefficient coding costs
- rd_stats->rate -= (rd_stats_y->rate + rd_stats_uv->rate);
- rd_stats_y->rate = 0;
- rd_stats_uv->rate = 0;
- // Cost the skip mb case
- rd_stats->rate += x->skip_cost[skip_ctx][1];
- } else {
- // Add in the cost of the no skip flag.
- rd_stats->rate += x->skip_cost[skip_ctx][0];
- }
- // Calculate the final RD estimate for this mode.
- const int64_t this_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
- // Keep record of best intra rd
- if (this_rd < search_state->best_intra_rd) {
- search_state->best_intra_rd = this_rd;
- search_state->best_intra_mode = mbmi->mode;
- }
-
- if (sf->skip_intra_in_interframe) {
- if (search_state->best_rd < (INT64_MAX / 2) &&
- this_rd > (search_state->best_rd + (search_state->best_rd >> 1)))
- search_state->skip_intra_modes = 1;
- }
-
- if (!disable_skip) {
- for (int i = 0; i < REFERENCE_MODES; ++i)
- search_state->best_pred_rd[i] =
- AOMMIN(search_state->best_pred_rd[i], this_rd);
- }
- return this_rd;
-}
-
-static void collect_single_states(MACROBLOCK *x,
- InterModeSearchState *search_state,
- const MB_MODE_INFO *const mbmi) {
- int i, j;
- const MV_REFERENCE_FRAME ref_frame = mbmi->ref_frame[0];
- const PREDICTION_MODE this_mode = mbmi->mode;
- const int dir = ref_frame <= GOLDEN_FRAME ? 0 : 1;
- const int mode_offset = INTER_OFFSET(this_mode);
- const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode);
-
- // Simple rd
- int64_t simple_rd = search_state->simple_rd[this_mode][0][ref_frame];
- for (int ref_mv_idx = 1; ref_mv_idx < ref_set; ++ref_mv_idx) {
- int64_t rd = search_state->simple_rd[this_mode][ref_mv_idx][ref_frame];
- if (rd < simple_rd) simple_rd = rd;
- }
-
- // Insertion sort of single_state
- SingleInterModeState this_state_s = { simple_rd, ref_frame, 1 };
- SingleInterModeState *state_s = search_state->single_state[dir][mode_offset];
- i = search_state->single_state_cnt[dir][mode_offset];
- for (j = i; j > 0 && state_s[j - 1].rd > this_state_s.rd; --j)
- state_s[j] = state_s[j - 1];
- state_s[j] = this_state_s;
- search_state->single_state_cnt[dir][mode_offset]++;
-
- // Modelled rd
- int64_t modelled_rd = search_state->modelled_rd[this_mode][0][ref_frame];
- for (int ref_mv_idx = 1; ref_mv_idx < ref_set; ++ref_mv_idx) {
- int64_t rd = search_state->modelled_rd[this_mode][ref_mv_idx][ref_frame];
- if (rd < modelled_rd) modelled_rd = rd;
- }
-
- // Insertion sort of single_state_modelled
- SingleInterModeState this_state_m = { modelled_rd, ref_frame, 1 };
- SingleInterModeState *state_m =
- search_state->single_state_modelled[dir][mode_offset];
- i = search_state->single_state_modelled_cnt[dir][mode_offset];
- for (j = i; j > 0 && state_m[j - 1].rd > this_state_m.rd; --j)
- state_m[j] = state_m[j - 1];
- state_m[j] = this_state_m;
- search_state->single_state_modelled_cnt[dir][mode_offset]++;
-}
-
-static void analyze_single_states(const AV1_COMP *cpi,
- InterModeSearchState *search_state) {
- int i, j, dir, mode;
- if (cpi->sf.prune_comp_search_by_single_result >= 1) {
- for (dir = 0; dir < 2; ++dir) {
- int64_t best_rd;
- SingleInterModeState(*state)[FWD_REFS];
-
- // Use the best rd of GLOBALMV or NEWMV to prune the unlikely
- // reference frames for all the modes (NEARESTMV and NEARMV may not
- // have same motion vectors). Always keep the best of each mode
- // because it might form the best possible combination with other mode.
- state = search_state->single_state[dir];
- best_rd = AOMMIN(state[INTER_OFFSET(NEWMV)][0].rd,
- state[INTER_OFFSET(GLOBALMV)][0].rd);
- for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
- for (i = 1; i < search_state->single_state_cnt[dir][mode]; ++i) {
- if (state[mode][i].rd != INT64_MAX &&
- (state[mode][i].rd >> 1) > best_rd) {
- state[mode][i].valid = 0;
- }
- }
- }
-
- state = search_state->single_state_modelled[dir];
- best_rd = AOMMIN(state[INTER_OFFSET(NEWMV)][0].rd,
- state[INTER_OFFSET(GLOBALMV)][0].rd);
- for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
- for (i = 1; i < search_state->single_state_modelled_cnt[dir][mode];
- ++i) {
- if (state[mode][i].rd != INT64_MAX &&
- (state[mode][i].rd >> 1) > best_rd) {
- state[mode][i].valid = 0;
- }
- }
- }
- }
- }
-
- // Ordering by simple rd first, then by modelled rd
- for (dir = 0; dir < 2; ++dir) {
- for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
- const int state_cnt_s = search_state->single_state_cnt[dir][mode];
- const int state_cnt_m =
- search_state->single_state_modelled_cnt[dir][mode];
- SingleInterModeState *state_s = search_state->single_state[dir][mode];
- SingleInterModeState *state_m =
- search_state->single_state_modelled[dir][mode];
- int count = 0;
- const int max_candidates = AOMMAX(state_cnt_s, state_cnt_m);
- for (i = 0; i < state_cnt_s; ++i) {
- if (state_s[i].rd == INT64_MAX) break;
- if (state_s[i].valid)
- search_state->single_rd_order[dir][mode][count++] =
- state_s[i].ref_frame;
- }
- if (count < max_candidates) {
- for (i = 0; i < state_cnt_m; ++i) {
- if (state_m[i].rd == INT64_MAX) break;
- if (state_m[i].valid) {
- int ref_frame = state_m[i].ref_frame;
- int match = 0;
- // Check if existing already
- for (j = 0; j < count; ++j) {
- if (search_state->single_rd_order[dir][mode][j] == ref_frame) {
- match = 1;
- break;
- }
- }
- if (!match) {
- // Check if this ref_frame is removed in simple rd
- int valid = 1;
- for (j = 0; j < state_cnt_s; j++) {
- if (ref_frame == state_s[j].ref_frame && !state_s[j].valid) {
- valid = 0;
- break;
- }
- }
- if (valid)
- search_state->single_rd_order[dir][mode][count++] = ref_frame;
- }
- if (count >= max_candidates) break;
- }
- }
- }
- }
- }
-}
-
-static int compound_skip_get_candidates(
- const AV1_COMP *cpi, const InterModeSearchState *search_state,
- const int dir, const PREDICTION_MODE mode) {
- const int mode_offset = INTER_OFFSET(mode);
- const SingleInterModeState *state =
- search_state->single_state[dir][mode_offset];
- const SingleInterModeState *state_modelled =
- search_state->single_state_modelled[dir][mode_offset];
- int max_candidates = 0;
- int candidates;
-
- for (int i = 0; i < FWD_REFS; ++i) {
- if (search_state->single_rd_order[dir][mode_offset][i] == NONE_FRAME) break;
- max_candidates++;
- }
-
- candidates = max_candidates;
- if (cpi->sf.prune_comp_search_by_single_result >= 2) {
- candidates = AOMMIN(2, max_candidates);
- }
- if (cpi->sf.prune_comp_search_by_single_result >= 3) {
- if (state[0].rd != INT64_MAX && state_modelled[0].rd != INT64_MAX &&
- state[0].ref_frame == state_modelled[0].ref_frame)
- candidates = 1;
- if (mode == NEARMV || mode == GLOBALMV) candidates = 1;
- }
- return candidates;
-}
-
-static int compound_skip_by_single_states(
- const AV1_COMP *cpi, const InterModeSearchState *search_state,
- const PREDICTION_MODE this_mode, const MV_REFERENCE_FRAME ref_frame,
- const MV_REFERENCE_FRAME second_ref_frame, const MACROBLOCK *x) {
- const MV_REFERENCE_FRAME refs[2] = { ref_frame, second_ref_frame };
- const int mode[2] = { compound_ref0_mode(this_mode),
- compound_ref1_mode(this_mode) };
- const int mode_offset[2] = { INTER_OFFSET(mode[0]), INTER_OFFSET(mode[1]) };
- const int mode_dir[2] = { refs[0] <= GOLDEN_FRAME ? 0 : 1,
- refs[1] <= GOLDEN_FRAME ? 0 : 1 };
- int ref_searched[2] = { 0, 0 };
- int ref_mv_match[2] = { 1, 1 };
- int i, j;
-
- for (i = 0; i < 2; ++i) {
- const SingleInterModeState *state =
- search_state->single_state[mode_dir[i]][mode_offset[i]];
- const int state_cnt =
- search_state->single_state_cnt[mode_dir[i]][mode_offset[i]];
- for (j = 0; j < state_cnt; ++j) {
- if (state[j].ref_frame == refs[i]) {
- ref_searched[i] = 1;
- break;
- }
- }
- }
-
- const int ref_set = get_drl_refmv_count(x, refs, this_mode);
- for (i = 0; i < 2; ++i) {
- if (mode[i] == NEARESTMV || mode[i] == NEARMV) {
- const MV_REFERENCE_FRAME single_refs[2] = { refs[i], NONE_FRAME };
- int idential = 1;
- for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ref_mv_idx++) {
- int_mv single_mv;
- int_mv comp_mv;
- get_this_mv(&single_mv, mode[i], 0, ref_mv_idx, single_refs,
- x->mbmi_ext);
- get_this_mv(&comp_mv, this_mode, i, ref_mv_idx, refs, x->mbmi_ext);
-
- idential &= (single_mv.as_int == comp_mv.as_int);
- if (!idential) {
- ref_mv_match[i] = 0;
- break;
- }
- }
- }
- }
-
- for (i = 0; i < 2; ++i) {
- if (ref_searched[i] && ref_mv_match[i]) {
- const int candidates =
- compound_skip_get_candidates(cpi, search_state, mode_dir[i], mode[i]);
- const MV_REFERENCE_FRAME *ref_order =
- search_state->single_rd_order[mode_dir[i]][mode_offset[i]];
- int match = 0;
- for (j = 0; j < candidates; ++j) {
- if (refs[i] == ref_order[j]) {
- match = 1;
- break;
- }
- }
- if (!match) return 1;
- }
- }
-
- return 0;
-}
-
-static INLINE int sf_check_is_drop_ref(const MODE_DEFINITION *mode,
- InterModeSearchState *search_state) {
- const MV_REFERENCE_FRAME ref_frame = mode->ref_frame[0];
- const MV_REFERENCE_FRAME second_ref_frame = mode->ref_frame[1];
- if (search_state->num_available_refs > 2) {
- if ((ref_frame == search_state->dist_order_refs[0] &&
- second_ref_frame == search_state->dist_order_refs[1]) ||
- (ref_frame == search_state->dist_order_refs[1] &&
- second_ref_frame == search_state->dist_order_refs[0]))
- return 1; // drop this pair of refs
- }
- return 0;
-}
-
-static INLINE void sf_drop_ref_analyze(InterModeSearchState *search_state,
- const MODE_DEFINITION *mode,
- int64_t distortion2) {
- const PREDICTION_MODE this_mode = mode->mode;
- MV_REFERENCE_FRAME ref_frame = mode->ref_frame[0];
- const int idx = ref_frame - LAST_FRAME;
- if (idx && distortion2 > search_state->dist_refs[idx]) {
- search_state->dist_refs[idx] = distortion2;
- search_state->dist_order_refs[idx] = ref_frame;
- }
-
- // Reach the last single ref prediction mode
- if (ref_frame == ALTREF_FRAME && this_mode == GLOBALMV) {
- // bubble sort dist_refs and the order index
- for (int i = 0; i < REF_FRAMES; ++i) {
- for (int k = i + 1; k < REF_FRAMES; ++k) {
- if (search_state->dist_refs[i] < search_state->dist_refs[k]) {
- int64_t tmp_dist = search_state->dist_refs[i];
- search_state->dist_refs[i] = search_state->dist_refs[k];
- search_state->dist_refs[k] = tmp_dist;
-
- int tmp_idx = search_state->dist_order_refs[i];
- search_state->dist_order_refs[i] = search_state->dist_order_refs[k];
- search_state->dist_order_refs[k] = tmp_idx;
- }
- }
- }
- for (int i = 0; i < REF_FRAMES; ++i) {
- if (search_state->dist_refs[i] == -1) break;
- search_state->num_available_refs = i;
- }
- search_state->num_available_refs++;
- }
-}
-
-static void alloc_compound_type_rd_buffers(AV1_COMMON *const cm,
- CompoundTypeRdBuffers *const bufs) {
- CHECK_MEM_ERROR(
- cm, bufs->pred0,
- (uint8_t *)aom_memalign(16, 2 * MAX_SB_SQUARE * sizeof(*bufs->pred0)));
- CHECK_MEM_ERROR(
- cm, bufs->pred1,
- (uint8_t *)aom_memalign(16, 2 * MAX_SB_SQUARE * sizeof(*bufs->pred1)));
- CHECK_MEM_ERROR(
- cm, bufs->residual1,
- (int16_t *)aom_memalign(32, MAX_SB_SQUARE * sizeof(*bufs->residual1)));
- CHECK_MEM_ERROR(
- cm, bufs->diff10,
- (int16_t *)aom_memalign(32, MAX_SB_SQUARE * sizeof(*bufs->diff10)));
- CHECK_MEM_ERROR(cm, bufs->tmp_best_mask_buf,
- (uint8_t *)aom_malloc(2 * MAX_SB_SQUARE *
- sizeof(*bufs->tmp_best_mask_buf)));
-}
-
-static void release_compound_type_rd_buffers(
- CompoundTypeRdBuffers *const bufs) {
- aom_free(bufs->pred0);
- aom_free(bufs->pred1);
- aom_free(bufs->residual1);
- aom_free(bufs->diff10);
- aom_free(bufs->tmp_best_mask_buf);
- av1_zero(*bufs); // Set all pointers to NULL for safety.
-}
-
-void av1_rd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
- MACROBLOCK *x, int mi_row, int mi_col,
- RD_STATS *rd_cost, BLOCK_SIZE bsize,
- PICK_MODE_CONTEXT *ctx, int64_t best_rd_so_far) {
- AV1_COMMON *const cm = &cpi->common;
- const int num_planes = av1_num_planes(cm);
- const SPEED_FEATURES *const sf = &cpi->sf;
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = xd->mi[0];
- const int try_palette =
- av1_allow_palette(cm->allow_screen_content_tools, mbmi->sb_type);
- PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
- const struct segmentation *const seg = &cm->seg;
- PREDICTION_MODE this_mode;
- unsigned char segment_id = mbmi->segment_id;
- int i;
- struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE];
- unsigned int ref_costs_single[REF_FRAMES];
- unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES];
- int *comp_inter_cost = x->comp_inter_cost[av1_get_reference_mode_context(xd)];
- int *mode_map = tile_data->mode_map[bsize];
- uint32_t mode_skip_mask[REF_FRAMES];
- uint16_t ref_frame_skip_mask[2];
-
- InterModeSearchState search_state;
- init_inter_mode_search_state(&search_state, cpi, tile_data, x, bsize,
- best_rd_so_far);
- INTERINTRA_MODE interintra_modes[REF_FRAMES] = {
- INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES,
- INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES
- };
- HandleInterModeArgs args = {
- { NULL }, { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE },
- { NULL }, { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1 },
- NULL, NULL,
- NULL, search_state.modelled_rd,
- { { 0 } }, INT_MAX,
- INT_MAX, search_state.simple_rd,
- 0, interintra_modes
- };
- for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
-
- av1_invalid_rd_stats(rd_cost);
-
- // init params, set frame modes, speed features
- set_params_rd_pick_inter_mode(
- cpi, x, &args, bsize, mi_row, mi_col, ref_frame_skip_mask, mode_skip_mask,
- ctx->skip_ref_frame_mask, ref_costs_single, ref_costs_comp, yv12_mb);
-
-#if CONFIG_COLLECT_INTER_MODE_RD_STATS
- int64_t best_est_rd = INT64_MAX;
- // TODO(angiebird): Turn this on when this speed feature is well tested
-#if 1
- const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
- const int do_tx_search = !md->ready;
-#else
- const int do_tx_search = 1;
-#endif
- InterModesInfo *inter_modes_info = &tile_data->inter_modes_info;
- inter_modes_info->num = 0;
-#endif
-
- int intra_mode_num = 0;
- int intra_mode_idx_ls[MAX_MODES];
- int reach_first_comp_mode = 0;
-
- // Temporary buffers used by handle_inter_mode().
- // We allocate them once and reuse it in every call to that function.
- // Note: Must be allocated on the heap due to large size of the arrays.
- uint8_t *tmp_buf_orig;
- CHECK_MEM_ERROR(
- cm, tmp_buf_orig,
- (uint8_t *)aom_memalign(32, 2 * MAX_MB_PLANE * MAX_SB_SQUARE));
- uint8_t *const tmp_buf = get_buf_by_bd(xd, tmp_buf_orig);
-
- CompoundTypeRdBuffers rd_buffers;
- alloc_compound_type_rd_buffers(cm, &rd_buffers);
-
- for (int midx = 0; midx < MAX_MODES; ++midx) {
- int mode_index = mode_map[midx];
- int64_t this_rd = INT64_MAX;
- int disable_skip = 0;
- int rate2 = 0, rate_y = 0, rate_uv = 0;
- int64_t distortion2 = 0;
- int skippable = 0;
- int this_skip2 = 0;
- const MODE_DEFINITION *mode_order = &av1_mode_order[mode_index];
- const MV_REFERENCE_FRAME ref_frame = mode_order->ref_frame[0];
- const MV_REFERENCE_FRAME second_ref_frame = mode_order->ref_frame[1];
- const int comp_pred = second_ref_frame > INTRA_FRAME;
- this_mode = mode_order->mode;
-
- init_mbmi(mbmi, mode_index, cm);
-
- x->skip = 0;
- set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
-
- // Reach the first compound prediction mode
- if (sf->prune_comp_search_by_single_result > 0 && comp_pred &&
- reach_first_comp_mode == 0) {
- analyze_single_states(cpi, &search_state);
- reach_first_comp_mode = 1;
- }
- const int ret = inter_mode_search_order_independent_skip(
- cpi, ctx, x, bsize, mode_index, mi_row, mi_col, mode_skip_mask,
- ref_frame_skip_mask, &search_state);
- if (ret == 1) continue;
- args.skip_motion_mode = (ret == 2);
-
- if (sf->drop_ref && comp_pred) {
- if (sf_check_is_drop_ref(mode_order, &search_state)) {
- continue;
- }
- }
-
- if (search_state.best_rd < search_state.mode_threshold[mode_index])
- continue;
-
- if (sf->prune_comp_search_by_single_result > 0 && comp_pred) {
- if (compound_skip_by_single_states(cpi, &search_state, this_mode,
- ref_frame, second_ref_frame, x))
- continue;
- }
-
- const int ref_frame_cost = comp_pred
- ? ref_costs_comp[ref_frame][second_ref_frame]
- : ref_costs_single[ref_frame];
- const int compmode_cost =
- is_comp_ref_allowed(mbmi->sb_type) ? comp_inter_cost[comp_pred] : 0;
- const int real_compmode_cost =
- cm->reference_mode == REFERENCE_MODE_SELECT ? compmode_cost : 0;
-
- if (comp_pred) {
- if ((sf->mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
- search_state.best_mode_index >= 0 &&
- search_state.best_mbmode.ref_frame[0] == INTRA_FRAME)
- continue;
- }
-
- if (ref_frame == INTRA_FRAME) {
- if (sf->adaptive_mode_search)
- if ((x->source_variance << num_pels_log2_lookup[bsize]) >
- search_state.best_pred_sse)
- continue;
-
- if (this_mode != DC_PRED) {
- // Only search the oblique modes if the best so far is
- // one of the neighboring directional modes
- if ((sf->mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
- (this_mode >= D45_PRED && this_mode <= PAETH_PRED)) {
- if (search_state.best_mode_index >= 0 &&
- search_state.best_mbmode.ref_frame[0] > INTRA_FRAME)
- continue;
- }
- if (sf->mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
- if (conditional_skipintra(this_mode, search_state.best_intra_mode))
- continue;
- }
- }
- }
-
- // Select prediction reference frames.
- for (i = 0; i < num_planes; i++) {
- xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
- if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
- }
-
- if (ref_frame == INTRA_FRAME) {
- intra_mode_idx_ls[intra_mode_num++] = mode_index;
- continue;
- } else {
- mbmi->angle_delta[PLANE_TYPE_Y] = 0;
- mbmi->angle_delta[PLANE_TYPE_UV] = 0;
- mbmi->filter_intra_mode_info.use_filter_intra = 0;
- mbmi->ref_mv_idx = 0;
- int64_t ref_best_rd = search_state.best_rd;
- {
- RD_STATS rd_stats, rd_stats_y, rd_stats_uv;
- av1_init_rd_stats(&rd_stats);
- rd_stats.rate = rate2;
-
- // Point to variables that are maintained between loop iterations
- args.single_newmv = search_state.single_newmv;
- args.single_newmv_rate = search_state.single_newmv_rate;
- args.single_newmv_valid = search_state.single_newmv_valid;
- args.single_comp_cost = real_compmode_cost;
- args.ref_frame_cost = ref_frame_cost;
-#if CONFIG_COLLECT_INTER_MODE_RD_STATS
- this_rd = handle_inter_mode(
- cpi, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, &disable_skip,
- mi_row, mi_col, &args, ref_best_rd, tmp_buf, &rd_buffers, tile_data,
- &best_est_rd, do_tx_search, inter_modes_info);
-#else
- this_rd = handle_inter_mode(cpi, x, bsize, &rd_stats, &rd_stats_y,
- &rd_stats_uv, &disable_skip, mi_row, mi_col,
- &args, ref_best_rd, tmp_buf, &rd_buffers);
-#endif
- rate2 = rd_stats.rate;
- skippable = rd_stats.skip;
- distortion2 = rd_stats.dist;
- rate_y = rd_stats_y.rate;
- rate_uv = rd_stats_uv.rate;
- }
-
- if (sf->prune_comp_search_by_single_result > 0 &&
- is_inter_singleref_mode(this_mode)) {
- collect_single_states(x, &search_state, mbmi);
- }
-
- if (this_rd == INT64_MAX) continue;
-
- this_skip2 = mbmi->skip;
- this_rd = RDCOST(x->rdmult, rate2, distortion2);
- if (this_skip2) {
- rate_y = 0;
- rate_uv = 0;
- }
- }
-
- // Did this mode help.. i.e. is it the new best mode
- if (this_rd < search_state.best_rd || x->skip) {
- int mode_excluded = 0;
- if (comp_pred) {
- mode_excluded = cm->reference_mode == SINGLE_REFERENCE;
- }
- if (!mode_excluded) {
- // Note index of best mode so far
- search_state.best_mode_index = mode_index;
-
- if (ref_frame == INTRA_FRAME) {
- /* required for left and above block mv */
- mbmi->mv[0].as_int = 0;
- } else {
- search_state.best_pred_sse = x->pred_sse[ref_frame];
- }
-
- rd_cost->rate = rate2;
- rd_cost->dist = distortion2;
- rd_cost->rdcost = this_rd;
- search_state.best_rd = this_rd;
- search_state.best_mbmode = *mbmi;
- search_state.best_skip2 = this_skip2;
- search_state.best_mode_skippable = skippable;
-#if CONFIG_COLLECT_INTER_MODE_RD_STATS
- if (do_tx_search) {
- // When do_tx_search == 0, handle_inter_mode won't provide correct
- // rate_y and rate_uv because txfm_search process is replaced by
- // rd estimation.
- // Therfore, we should avoid updating best_rate_y and best_rate_uv
- // here. These two values will be updated when txfm_search is called
- search_state.best_rate_y =
- rate_y +
- x->skip_cost[av1_get_skip_context(xd)][this_skip2 || skippable];
- search_state.best_rate_uv = rate_uv;
- }
-#else // CONFIG_COLLECT_INTER_MODE_RD_STATS
- search_state.best_rate_y =
- rate_y +
- x->skip_cost[av1_get_skip_context(xd)][this_skip2 || skippable];
- search_state.best_rate_uv = rate_uv;
-#endif // CONFIG_COLLECT_INTER_MODE_RD_STATS
- memcpy(ctx->blk_skip, x->blk_skip,
- sizeof(x->blk_skip[0]) * ctx->num_4x4_blk);
- }
- }
-
- /* keep record of best compound/single-only prediction */
- if (!disable_skip && ref_frame != INTRA_FRAME) {
- int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
-
- if (cm->reference_mode == REFERENCE_MODE_SELECT) {
- single_rate = rate2 - compmode_cost;
- hybrid_rate = rate2;
- } else {
- single_rate = rate2;
- hybrid_rate = rate2 + compmode_cost;
- }
-
- single_rd = RDCOST(x->rdmult, single_rate, distortion2);
- hybrid_rd = RDCOST(x->rdmult, hybrid_rate, distortion2);
-
- if (!comp_pred) {
- if (single_rd < search_state.best_pred_rd[SINGLE_REFERENCE])
- search_state.best_pred_rd[SINGLE_REFERENCE] = single_rd;
- } else {
- if (single_rd < search_state.best_pred_rd[COMPOUND_REFERENCE])
- search_state.best_pred_rd[COMPOUND_REFERENCE] = single_rd;
- }
- if (hybrid_rd < search_state.best_pred_rd[REFERENCE_MODE_SELECT])
- search_state.best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
- }
- if (sf->drop_ref && second_ref_frame == NONE_FRAME) {
- // Collect data from single ref mode, and analyze data.
- sf_drop_ref_analyze(&search_state, mode_order, distortion2);
- }
-
- if (x->skip && !comp_pred) break;
- }
-
- aom_free(tmp_buf_orig);
- tmp_buf_orig = NULL;
- release_compound_type_rd_buffers(&rd_buffers);
-
-#if CONFIG_COLLECT_INTER_MODE_RD_STATS
- if (!do_tx_search) {
- inter_modes_info_sort(inter_modes_info, inter_modes_info->rd_idx_pair_arr);
- search_state.best_rd = INT64_MAX;
-
- int64_t top_est_rd =
- inter_modes_info->est_rd_arr[inter_modes_info->rd_idx_pair_arr[0].idx];
- for (int j = 0; j < inter_modes_info->num; ++j) {
- const int data_idx = inter_modes_info->rd_idx_pair_arr[j].idx;
- *mbmi = inter_modes_info->mbmi_arr[data_idx];
- int64_t curr_est_rd = inter_modes_info->est_rd_arr[data_idx];
- if (curr_est_rd * 0.9 > top_est_rd) {
- continue;
- }
- const int mode_rate = inter_modes_info->mode_rate_arr[data_idx];
-
- x->skip = 0;
- set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
-
- // Select prediction reference frames.
- const int is_comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
- for (i = 0; i < num_planes; i++) {
- xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
- if (is_comp_pred) xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
- }
-
- RD_STATS rd_stats;
- RD_STATS rd_stats_y;
- RD_STATS rd_stats_uv;
-
- av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
- if (mbmi->motion_mode == OBMC_CAUSAL)
- av1_build_obmc_inter_predictors_sb(cm, xd, mi_row, mi_col);
-
- if (!txfm_search(cpi, x, bsize, mi_row, mi_col, &rd_stats, &rd_stats_y,
- &rd_stats_uv, mode_rate, search_state.best_rd)) {
- continue;
- } else {
- const int skip_ctx = av1_get_skip_context(xd);
- inter_mode_data_push(tile_data, mbmi->sb_type, rd_stats.sse,
- rd_stats.dist,
- rd_stats_y.rate + rd_stats_uv.rate +
- x->skip_cost[skip_ctx][mbmi->skip]);
- }
- rd_stats.rdcost = RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist);
-
- if (rd_stats.rdcost < search_state.best_rd) {
- search_state.best_rd = rd_stats.rdcost;
- // Note index of best mode so far
- const int mode_index = get_prediction_mode_idx(
- mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
- search_state.best_mode_index = mode_index;
- *rd_cost = rd_stats;
- search_state.best_rd = rd_stats.rdcost;
- search_state.best_mbmode = *mbmi;
- search_state.best_skip2 = mbmi->skip;
- search_state.best_mode_skippable = rd_stats.skip;
- search_state.best_rate_y =
- rd_stats_y.rate +
- x->skip_cost[av1_get_skip_context(xd)][rd_stats.skip || mbmi->skip];
- search_state.best_rate_uv = rd_stats_uv.rate;
- memcpy(ctx->blk_skip, x->blk_skip,
- sizeof(x->blk_skip[0]) * ctx->num_4x4_blk);
- }
- }
- }
-#endif
-
- for (int j = 0; j < intra_mode_num; ++j) {
- const int mode_index = intra_mode_idx_ls[j];
- const MV_REFERENCE_FRAME ref_frame =
- av1_mode_order[mode_index].ref_frame[0];
- assert(av1_mode_order[mode_index].ref_frame[1] == NONE_FRAME);
- assert(ref_frame == INTRA_FRAME);
- if (sf->skip_intra_in_interframe && search_state.skip_intra_modes) break;
- init_mbmi(mbmi, mode_index, cm);
- x->skip = 0;
- set_ref_ptrs(cm, xd, INTRA_FRAME, NONE_FRAME);
-
- // Select prediction reference frames.
- for (i = 0; i < num_planes; i++) {
- xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
- }
-
- RD_STATS intra_rd_stats, intra_rd_stats_y, intra_rd_stats_uv;
-
- const int ref_frame_cost = ref_costs_single[ref_frame];
- intra_rd_stats.rdcost = handle_intra_mode(
- &search_state, cpi, x, bsize, mi_row, mi_col, ref_frame_cost, ctx, 0,
- &intra_rd_stats, &intra_rd_stats_y, &intra_rd_stats_uv);
- if (intra_rd_stats.rdcost < search_state.best_rd) {
- search_state.best_rd = intra_rd_stats.rdcost;
- // Note index of best mode so far
- search_state.best_mode_index = mode_index;
- *rd_cost = intra_rd_stats;
- search_state.best_rd = intra_rd_stats.rdcost;
- search_state.best_mbmode = *mbmi;
- search_state.best_skip2 = 0;
- search_state.best_mode_skippable = intra_rd_stats.skip;
- search_state.best_rate_y =
- intra_rd_stats_y.rate +
- x->skip_cost[av1_get_skip_context(xd)][intra_rd_stats.skip];
- search_state.best_rate_uv = intra_rd_stats_uv.rate;
- memcpy(ctx->blk_skip, x->blk_skip,
- sizeof(x->blk_skip[0]) * ctx->num_4x4_blk);
- }
- }
-
- // In effect only when speed >= 2.
- sf_refine_fast_tx_type_search(
- cpi, x, mi_row, mi_col, rd_cost, bsize, ctx, search_state.best_mode_index,
- &search_state.best_mbmode, yv12_mb, search_state.best_rate_y,
- search_state.best_rate_uv, &search_state.best_skip2);
-
- // Only try palette mode when the best mode so far is an intra mode.
- if (try_palette && !is_inter_mode(search_state.best_mbmode.mode)) {
- search_palette_mode(cpi, x, mi_row, mi_col, rd_cost, ctx, bsize, mbmi, pmi,
- ref_costs_single, &search_state);
- }
-
- search_state.best_mbmode.skip_mode = 0;
- if (cm->skip_mode_flag &&
- !segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
- is_comp_ref_allowed(bsize)) {
- rd_pick_skip_mode(rd_cost, &search_state, cpi, x, bsize, mi_row, mi_col,
- yv12_mb);
- }
-
- // Make sure that the ref_mv_idx is only nonzero when we're
- // using a mode which can support ref_mv_idx
- if (search_state.best_mbmode.ref_mv_idx != 0 &&
- !(search_state.best_mbmode.mode == NEWMV ||
- search_state.best_mbmode.mode == NEW_NEWMV ||
- have_nearmv_in_inter_mode(search_state.best_mbmode.mode))) {
- search_state.best_mbmode.ref_mv_idx = 0;
- }
-
- if (search_state.best_mode_index < 0 ||
- search_state.best_rd >= best_rd_so_far) {
- rd_cost->rate = INT_MAX;
- rd_cost->rdcost = INT64_MAX;
- return;
- }
-
- assert(
- (cm->interp_filter == SWITCHABLE) ||
- (cm->interp_filter ==
- av1_extract_interp_filter(search_state.best_mbmode.interp_filters, 0)) ||
- !is_inter_block(&search_state.best_mbmode));
- assert(
- (cm->interp_filter == SWITCHABLE) ||
- (cm->interp_filter ==
- av1_extract_interp_filter(search_state.best_mbmode.interp_filters, 1)) ||
- !is_inter_block(&search_state.best_mbmode));
-
- if (!cpi->rc.is_src_frame_alt_ref)
- av1_update_rd_thresh_fact(cm, tile_data->thresh_freq_fact,
- sf->adaptive_rd_thresh, bsize,
- search_state.best_mode_index);
-
- // macroblock modes
- *mbmi = search_state.best_mbmode;
- x->skip |= search_state.best_skip2;
-
- // Note: this section is needed since the mode may have been forced to
- // GLOBALMV by the all-zero mode handling of ref-mv.
- if (mbmi->mode == GLOBALMV || mbmi->mode == GLOBAL_GLOBALMV) {
- // Correct the interp filters for GLOBALMV
- if (is_nontrans_global_motion(xd, xd->mi[0])) {
- assert(mbmi->interp_filters ==
- av1_broadcast_interp_filter(
- av1_unswitchable_filter(cm->interp_filter)));
- }
- }
-
- for (i = 0; i < REFERENCE_MODES; ++i) {
- if (search_state.best_pred_rd[i] == INT64_MAX)
- search_state.best_pred_diff[i] = INT_MIN;
- else
- search_state.best_pred_diff[i] =
- search_state.best_rd - search_state.best_pred_rd[i];
- }
-
- x->skip |= search_state.best_mode_skippable;
-
- assert(search_state.best_mode_index >= 0);
-
- store_coding_context(x, ctx, search_state.best_mode_index,
- search_state.best_pred_diff,
- search_state.best_mode_skippable);
-
- if (pmi->palette_size[1] > 0) {
- assert(try_palette);
- restore_uv_color_map(cpi, x);
- }
-}
-
-void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
- TileDataEnc *tile_data, MACROBLOCK *x,
- int mi_row, int mi_col,
- RD_STATS *rd_cost, BLOCK_SIZE bsize,
- PICK_MODE_CONTEXT *ctx,
- int64_t best_rd_so_far) {
- const AV1_COMMON *const cm = &cpi->common;
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = xd->mi[0];
- unsigned char segment_id = mbmi->segment_id;
- const int comp_pred = 0;
- int i;
- int64_t best_pred_diff[REFERENCE_MODES];
- unsigned int ref_costs_single[REF_FRAMES];
- unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES];
- int *comp_inter_cost = x->comp_inter_cost[av1_get_reference_mode_context(xd)];
- InterpFilter best_filter = SWITCHABLE;
- int64_t this_rd = INT64_MAX;
- int rate2 = 0;
- const int64_t distortion2 = 0;
- (void)mi_row;
- (void)mi_col;
-
- av1_collect_neighbors_ref_counts(xd);
-
- estimate_ref_frame_costs(cm, xd, x, segment_id, ref_costs_single,
- ref_costs_comp);
-
- for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
- for (i = LAST_FRAME; i < REF_FRAMES; ++i) x->pred_mv_sad[i] = INT_MAX;
-
- rd_cost->rate = INT_MAX;
-
- assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
-
- mbmi->palette_mode_info.palette_size[0] = 0;
- mbmi->palette_mode_info.palette_size[1] = 0;
- mbmi->filter_intra_mode_info.use_filter_intra = 0;
- mbmi->mode = GLOBALMV;
- mbmi->motion_mode = SIMPLE_TRANSLATION;
- mbmi->uv_mode = UV_DC_PRED;
- if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME))
- mbmi->ref_frame[0] = get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
- else
- mbmi->ref_frame[0] = LAST_FRAME;
- mbmi->ref_frame[1] = NONE_FRAME;
- mbmi->mv[0].as_int =
- gm_get_motion_vector(&cm->global_motion[mbmi->ref_frame[0]],
- cm->allow_high_precision_mv, bsize, mi_col, mi_row,
- cm->cur_frame_force_integer_mv)
- .as_int;
- mbmi->tx_size = max_txsize_lookup[bsize];
- x->skip = 1;
-
- mbmi->ref_mv_idx = 0;
-
- mbmi->motion_mode = SIMPLE_TRANSLATION;
- av1_count_overlappable_neighbors(cm, xd, mi_row, mi_col);
- if (is_motion_variation_allowed_bsize(bsize) && !has_second_ref(mbmi)) {
- int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
- mbmi->num_proj_ref = findSamples(cm, xd, mi_row, mi_col, pts, pts_inref);
- // Select the samples according to motion vector difference
- if (mbmi->num_proj_ref > 1)
- mbmi->num_proj_ref = selectSamples(&mbmi->mv[0].as_mv, pts, pts_inref,
- mbmi->num_proj_ref, bsize);
- }
-
- set_default_interp_filters(mbmi, cm->interp_filter);
-
- if (cm->interp_filter != SWITCHABLE) {
- best_filter = cm->interp_filter;
- } else {
- best_filter = EIGHTTAP_REGULAR;
- if (av1_is_interp_needed(xd) && av1_is_interp_search_needed(xd) &&
- x->source_variance >= cpi->sf.disable_filter_search_var_thresh) {
- int rs;
- int best_rs = INT_MAX;
- for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
- mbmi->interp_filters = av1_broadcast_interp_filter(i);
- rs = av1_get_switchable_rate(cm, x, xd);
- if (rs < best_rs) {
- best_rs = rs;
- best_filter = av1_extract_interp_filter(mbmi->interp_filters, 0);
- }
- }
- }
- }
- // Set the appropriate filter
- mbmi->interp_filters = av1_broadcast_interp_filter(best_filter);
- rate2 += av1_get_switchable_rate(cm, x, xd);
-
- if (cm->reference_mode == REFERENCE_MODE_SELECT)
- rate2 += comp_inter_cost[comp_pred];
-
- // Estimate the reference frame signaling cost and add it
- // to the rolling cost variable.
- rate2 += ref_costs_single[LAST_FRAME];
- this_rd = RDCOST(x->rdmult, rate2, distortion2);
-
- rd_cost->rate = rate2;
- rd_cost->dist = distortion2;
- rd_cost->rdcost = this_rd;
-
- if (this_rd >= best_rd_so_far) {
- rd_cost->rate = INT_MAX;
- rd_cost->rdcost = INT64_MAX;
- return;
- }
-
- assert((cm->interp_filter == SWITCHABLE) ||
- (cm->interp_filter ==
- av1_extract_interp_filter(mbmi->interp_filters, 0)));
-
- av1_update_rd_thresh_fact(cm, tile_data->thresh_freq_fact,
- cpi->sf.adaptive_rd_thresh, bsize, THR_GLOBALMV);
-
- av1_zero(best_pred_diff);
-
- store_coding_context(x, ctx, THR_GLOBALMV, best_pred_diff, 0);
-}
-
-struct calc_target_weighted_pred_ctxt {
- const MACROBLOCK *x;
- const uint8_t *tmp;
- int tmp_stride;
- int overlap;
-};
-
-static INLINE void calc_target_weighted_pred_above(
- MACROBLOCKD *xd, int rel_mi_col, uint8_t nb_mi_width, MB_MODE_INFO *nb_mi,
- void *fun_ctxt, const int num_planes) {
- (void)nb_mi;
- (void)num_planes;
-
- struct calc_target_weighted_pred_ctxt *ctxt =
- (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
-
- const int bw = xd->n4_w << MI_SIZE_LOG2;
- const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
-
- int32_t *wsrc = ctxt->x->wsrc_buf + (rel_mi_col * MI_SIZE);
- int32_t *mask = ctxt->x->mask_buf + (rel_mi_col * MI_SIZE);
- const uint8_t *tmp = ctxt->tmp + rel_mi_col * MI_SIZE;
- const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
-
- if (!is_hbd) {
- for (int row = 0; row < ctxt->overlap; ++row) {
- const uint8_t m0 = mask1d[row];
- const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
- for (int col = 0; col < nb_mi_width * MI_SIZE; ++col) {
- wsrc[col] = m1 * tmp[col];
- mask[col] = m0;
- }
- wsrc += bw;
- mask += bw;
- tmp += ctxt->tmp_stride;
- }
- } else {
- const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
-
- for (int row = 0; row < ctxt->overlap; ++row) {
- const uint8_t m0 = mask1d[row];
- const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
- for (int col = 0; col < nb_mi_width * MI_SIZE; ++col) {
- wsrc[col] = m1 * tmp16[col];
- mask[col] = m0;
- }
- wsrc += bw;
- mask += bw;
- tmp16 += ctxt->tmp_stride;
- }
- }
-}
-
-static INLINE void calc_target_weighted_pred_left(
- MACROBLOCKD *xd, int rel_mi_row, uint8_t nb_mi_height, MB_MODE_INFO *nb_mi,
- void *fun_ctxt, const int num_planes) {
- (void)nb_mi;
- (void)num_planes;
-
- struct calc_target_weighted_pred_ctxt *ctxt =
- (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
-
- const int bw = xd->n4_w << MI_SIZE_LOG2;
- const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
-
- int32_t *wsrc = ctxt->x->wsrc_buf + (rel_mi_row * MI_SIZE * bw);
- int32_t *mask = ctxt->x->mask_buf + (rel_mi_row * MI_SIZE * bw);
- const uint8_t *tmp = ctxt->tmp + (rel_mi_row * MI_SIZE * ctxt->tmp_stride);
- const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
-
- if (!is_hbd) {
- for (int row = 0; row < nb_mi_height * MI_SIZE; ++row) {
- for (int col = 0; col < ctxt->overlap; ++col) {
- const uint8_t m0 = mask1d[col];
- const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
- wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
- (tmp[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
- mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
- }
- wsrc += bw;
- mask += bw;
- tmp += ctxt->tmp_stride;
- }
- } else {
- const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
-
- for (int row = 0; row < nb_mi_height * MI_SIZE; ++row) {
- for (int col = 0; col < ctxt->overlap; ++col) {
- const uint8_t m0 = mask1d[col];
- const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
- wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
- (tmp16[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
- mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
- }
- wsrc += bw;
- mask += bw;
- tmp16 += ctxt->tmp_stride;
- }
- }
-}
-
-// This function has a structure similar to av1_build_obmc_inter_prediction
-//
-// The OBMC predictor is computed as:
-//
-// PObmc(x,y) =
-// AOM_BLEND_A64(Mh(x),
-// AOM_BLEND_A64(Mv(y), P(x,y), PAbove(x,y)),
-// PLeft(x, y))
-//
-// Scaling up by AOM_BLEND_A64_MAX_ALPHA ** 2 and omitting the intermediate
-// rounding, this can be written as:
-//
-// AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * Pobmc(x,y) =
-// Mh(x) * Mv(y) * P(x,y) +
-// Mh(x) * Cv(y) * Pabove(x,y) +
-// AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
-//
-// Where :
-//
-// Cv(y) = AOM_BLEND_A64_MAX_ALPHA - Mv(y)
-// Ch(y) = AOM_BLEND_A64_MAX_ALPHA - Mh(y)
-//
-// This function computes 'wsrc' and 'mask' as:
-//
-// wsrc(x, y) =
-// AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * src(x, y) -
-// Mh(x) * Cv(y) * Pabove(x,y) +
-// AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
-//
-// mask(x, y) = Mh(x) * Mv(y)
-//
-// These can then be used to efficiently approximate the error for any
-// predictor P in the context of the provided neighbouring predictors by
-// computing:
-//
-// error(x, y) =
-// wsrc(x, y) - mask(x, y) * P(x, y) / (AOM_BLEND_A64_MAX_ALPHA ** 2)
-//
-static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x,
- const MACROBLOCKD *xd, int mi_row,
- int mi_col, const uint8_t *above,
- int above_stride, const uint8_t *left,
- int left_stride) {
- const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
- const int bw = xd->n4_w << MI_SIZE_LOG2;
- const int bh = xd->n4_h << MI_SIZE_LOG2;
- int32_t *mask_buf = x->mask_buf;
- int32_t *wsrc_buf = x->wsrc_buf;
-
- const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
- const int src_scale = AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA;
-
- // plane 0 should not be subsampled
- assert(xd->plane[0].subsampling_x == 0);
- assert(xd->plane[0].subsampling_y == 0);
-
- av1_zero_array(wsrc_buf, bw * bh);
- for (int i = 0; i < bw * bh; ++i) mask_buf[i] = AOM_BLEND_A64_MAX_ALPHA;
-
- // handle above row
- if (xd->up_available) {
- const int overlap =
- AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
- struct calc_target_weighted_pred_ctxt ctxt = { x, above, above_stride,
- overlap };
- foreach_overlappable_nb_above(cm, (MACROBLOCKD *)xd, mi_col,
- max_neighbor_obmc[mi_size_wide_log2[bsize]],
- calc_target_weighted_pred_above, &ctxt);
- }
-
- for (int i = 0; i < bw * bh; ++i) {
- wsrc_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
- mask_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
- }
-
- // handle left column
- if (xd->left_available) {
- const int overlap =
- AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
- struct calc_target_weighted_pred_ctxt ctxt = { x, left, left_stride,
- overlap };
- foreach_overlappable_nb_left(cm, (MACROBLOCKD *)xd, mi_row,
- max_neighbor_obmc[mi_size_high_log2[bsize]],
- calc_target_weighted_pred_left, &ctxt);
- }
-
- if (!is_hbd) {
- const uint8_t *src = x->plane[0].src.buf;
-
- for (int row = 0; row < bh; ++row) {
- for (int col = 0; col < bw; ++col) {
- wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
- }
- wsrc_buf += bw;
- src += x->plane[0].src.stride;
- }
- } else {
- const uint16_t *src = CONVERT_TO_SHORTPTR(x->plane[0].src.buf);
-
- for (int row = 0; row < bh; ++row) {
- for (int col = 0; col < bw; ++col) {
- wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
- }
- wsrc_buf += bw;
- src += x->plane[0].src.stride;
- }
- }
-}
diff --git a/third_party/aom/av1/encoder/rdopt.h b/third_party/aom/av1/encoder/rdopt.h
deleted file mode 100644
index 4c11f90b8..000000000
--- a/third_party/aom/av1/encoder/rdopt.h
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_RDOPT_H_
-#define AOM_AV1_ENCODER_RDOPT_H_
-
-#include "av1/common/blockd.h"
-#include "av1/common/txb_common.h"
-
-#include "av1/encoder/block.h"
-#include "av1/encoder/context_tree.h"
-#include "av1/encoder/encoder.h"
-#include "av1/encoder/encodetxb.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define MAX_REF_MV_SERCH 3
-#define DEFAULT_LUMA_INTERP_SKIP_FLAG 1
-#define DEFAULT_CHROMA_INTERP_SKIP_FLAG 2
-#define DEFAULT_INTERP_SKIP_FLAG \
- (DEFAULT_LUMA_INTERP_SKIP_FLAG | DEFAULT_CHROMA_INTERP_SKIP_FLAG)
-
-struct TileInfo;
-struct macroblock;
-struct RD_STATS;
-
-#if CONFIG_RD_DEBUG
-static INLINE void av1_update_txb_coeff_cost(RD_STATS *rd_stats, int plane,
- TX_SIZE tx_size, int blk_row,
- int blk_col, int txb_coeff_cost) {
- (void)blk_row;
- (void)blk_col;
- (void)tx_size;
- rd_stats->txb_coeff_cost[plane] += txb_coeff_cost;
-
- {
- const int txb_h = tx_size_high_unit[tx_size];
- const int txb_w = tx_size_wide_unit[tx_size];
- int idx, idy;
- for (idy = 0; idy < txb_h; ++idy)
- for (idx = 0; idx < txb_w; ++idx)
- rd_stats->txb_coeff_cost_map[plane][blk_row + idy][blk_col + idx] = 0;
-
- rd_stats->txb_coeff_cost_map[plane][blk_row][blk_col] = txb_coeff_cost;
- }
- assert(blk_row < TXB_COEFF_COST_MAP_SIZE);
- assert(blk_col < TXB_COEFF_COST_MAP_SIZE);
-}
-#endif
-
-// Returns the number of colors in 'src'.
-int av1_count_colors(const uint8_t *src, int stride, int rows, int cols,
- int *val_count);
-// Same as av1_count_colors(), but for high-bitdepth mode.
-int av1_count_colors_highbd(const uint8_t *src8, int stride, int rows, int cols,
- int bit_depth, int *val_count);
-
-#if CONFIG_DIST_8X8
-int64_t av1_dist_8x8(const struct AV1_COMP *const cpi, const MACROBLOCK *x,
- const uint8_t *src, int src_stride, const uint8_t *dst,
- int dst_stride, const BLOCK_SIZE tx_bsize, int bsw,
- int bsh, int visible_w, int visible_h, int qindex);
-#endif
-
-static INLINE int av1_cost_skip_txb(MACROBLOCK *x, const TXB_CTX *const txb_ctx,
- int plane, TX_SIZE tx_size) {
- const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size);
- const PLANE_TYPE plane_type = get_plane_type(plane);
- const LV_MAP_COEFF_COST *const coeff_costs =
- &x->coeff_costs[txs_ctx][plane_type];
- return coeff_costs->txb_skip_cost[txb_ctx->txb_skip_ctx][1];
-}
-
-static INLINE int av1_cost_coeffs(const AV1_COMMON *const cm, MACROBLOCK *x,
- int plane, int block, TX_SIZE tx_size,
- const TX_TYPE tx_type,
- const TXB_CTX *const txb_ctx,
- int use_fast_coef_costing) {
-#if TXCOEFF_COST_TIMER
- struct aom_usec_timer timer;
- aom_usec_timer_start(&timer);
-#endif
- (void)use_fast_coef_costing;
- const int cost =
- av1_cost_coeffs_txb(cm, x, plane, block, tx_size, tx_type, txb_ctx);
-#if TXCOEFF_COST_TIMER
- AV1_COMMON *tmp_cm = (AV1_COMMON *)&cpi->common;
- aom_usec_timer_mark(&timer);
- const int64_t elapsed_time = aom_usec_timer_elapsed(&timer);
- tmp_cm->txcoeff_cost_timer += elapsed_time;
- ++tmp_cm->txcoeff_cost_count;
-#endif
- return cost;
-}
-
-void av1_rd_pick_intra_mode_sb(const struct AV1_COMP *cpi, struct macroblock *x,
- int mi_row, int mi_col, struct RD_STATS *rd_cost,
- BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
- int64_t best_rd);
-
-unsigned int av1_get_sby_perpixel_variance(const struct AV1_COMP *cpi,
- const struct buf_2d *ref,
- BLOCK_SIZE bs);
-unsigned int av1_high_get_sby_perpixel_variance(const struct AV1_COMP *cpi,
- const struct buf_2d *ref,
- BLOCK_SIZE bs, int bd);
-
-void av1_rd_pick_inter_mode_sb(struct AV1_COMP *cpi,
- struct TileDataEnc *tile_data,
- struct macroblock *x, int mi_row, int mi_col,
- struct RD_STATS *rd_cost, BLOCK_SIZE bsize,
- PICK_MODE_CONTEXT *ctx, int64_t best_rd_so_far);
-
-void av1_rd_pick_inter_mode_sb_seg_skip(
- const struct AV1_COMP *cpi, struct TileDataEnc *tile_data,
- struct macroblock *x, int mi_row, int mi_col, struct RD_STATS *rd_cost,
- BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, int64_t best_rd_so_far);
-
-#if CONFIG_COLLECT_INTER_MODE_RD_STATS
-void av1_inter_mode_data_init(struct TileDataEnc *tile_data);
-void av1_inter_mode_data_fit(TileDataEnc *tile_data, int rdmult);
-#endif
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_ENCODER_RDOPT_H_
diff --git a/third_party/aom/av1/encoder/reconinter_enc.c b/third_party/aom/av1/encoder/reconinter_enc.c
deleted file mode 100644
index 23d920fc3..000000000
--- a/third_party/aom/av1/encoder/reconinter_enc.c
+++ /dev/null
@@ -1,627 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <stdio.h>
-#include <limits.h>
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-#include "config/aom_scale_rtcd.h"
-
-#include "aom/aom_integer.h"
-#include "aom_dsp/blend.h"
-
-#include "av1/common/blockd.h"
-#include "av1/common/mvref_common.h"
-#include "av1/common/reconinter.h"
-#include "av1/common/reconintra.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/common/obmc.h"
-#include "av1/encoder/reconinter_enc.h"
-
-static INLINE void calc_subpel_params(
- MACROBLOCKD *xd, const struct scale_factors *const sf, const MV mv,
- int plane, const int pre_x, const int pre_y, int x, int y,
- struct buf_2d *const pre_buf, uint8_t **pre, SubpelParams *subpel_params,
- int bw, int bh) {
- struct macroblockd_plane *const pd = &xd->plane[plane];
- const int is_scaled = av1_is_scaled(sf);
- if (is_scaled) {
- int ssx = pd->subsampling_x;
- int ssy = pd->subsampling_y;
- int orig_pos_y = (pre_y + y) << SUBPEL_BITS;
- orig_pos_y += mv.row * (1 << (1 - ssy));
- int orig_pos_x = (pre_x + x) << SUBPEL_BITS;
- orig_pos_x += mv.col * (1 << (1 - ssx));
- int pos_y = sf->scale_value_y(orig_pos_y, sf);
- int pos_x = sf->scale_value_x(orig_pos_x, sf);
- pos_x += SCALE_EXTRA_OFF;
- pos_y += SCALE_EXTRA_OFF;
-
- const int top = -AOM_LEFT_TOP_MARGIN_SCALED(ssy);
- const int left = -AOM_LEFT_TOP_MARGIN_SCALED(ssx);
- const int bottom = (pre_buf->height + AOM_INTERP_EXTEND)
- << SCALE_SUBPEL_BITS;
- const int right = (pre_buf->width + AOM_INTERP_EXTEND) << SCALE_SUBPEL_BITS;
- pos_y = clamp(pos_y, top, bottom);
- pos_x = clamp(pos_x, left, right);
-
- *pre = pre_buf->buf0 + (pos_y >> SCALE_SUBPEL_BITS) * pre_buf->stride +
- (pos_x >> SCALE_SUBPEL_BITS);
- subpel_params->subpel_x = pos_x & SCALE_SUBPEL_MASK;
- subpel_params->subpel_y = pos_y & SCALE_SUBPEL_MASK;
- subpel_params->xs = sf->x_step_q4;
- subpel_params->ys = sf->y_step_q4;
- } else {
- const MV mv_q4 = clamp_mv_to_umv_border_sb(
- xd, &mv, bw, bh, pd->subsampling_x, pd->subsampling_y);
- subpel_params->xs = subpel_params->ys = SCALE_SUBPEL_SHIFTS;
- subpel_params->subpel_x = (mv_q4.col & SUBPEL_MASK) << SCALE_EXTRA_BITS;
- subpel_params->subpel_y = (mv_q4.row & SUBPEL_MASK) << SCALE_EXTRA_BITS;
- *pre = pre_buf->buf + (y + (mv_q4.row >> SUBPEL_BITS)) * pre_buf->stride +
- (x + (mv_q4.col >> SUBPEL_BITS));
- }
-}
-
-static INLINE void build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd,
- int plane, const MB_MODE_INFO *mi,
- int build_for_obmc, int bw, int bh,
- int mi_x, int mi_y) {
- struct macroblockd_plane *const pd = &xd->plane[plane];
- int is_compound = has_second_ref(mi);
- int ref;
- const int is_intrabc = is_intrabc_block(mi);
- assert(IMPLIES(is_intrabc, !is_compound));
- int is_global[2] = { 0, 0 };
- for (ref = 0; ref < 1 + is_compound; ++ref) {
- const WarpedMotionParams *const wm = &xd->global_motion[mi->ref_frame[ref]];
- is_global[ref] = is_global_mv_block(mi, wm->wmtype);
- }
-
- const BLOCK_SIZE bsize = mi->sb_type;
- const int ss_x = pd->subsampling_x;
- const int ss_y = pd->subsampling_y;
- int sub8x8_inter = (block_size_wide[bsize] < 8 && ss_x) ||
- (block_size_high[bsize] < 8 && ss_y);
-
- if (is_intrabc) sub8x8_inter = 0;
-
- // For sub8x8 chroma blocks, we may be covering more than one luma block's
- // worth of pixels. Thus (mi_x, mi_y) may not be the correct coordinates for
- // the top-left corner of the prediction source - the correct top-left corner
- // is at (pre_x, pre_y).
- const int row_start =
- (block_size_high[bsize] == 4) && ss_y && !build_for_obmc ? -1 : 0;
- const int col_start =
- (block_size_wide[bsize] == 4) && ss_x && !build_for_obmc ? -1 : 0;
- const int pre_x = (mi_x + MI_SIZE * col_start) >> ss_x;
- const int pre_y = (mi_y + MI_SIZE * row_start) >> ss_y;
-
- sub8x8_inter = sub8x8_inter && !build_for_obmc;
- if (sub8x8_inter) {
- for (int row = row_start; row <= 0 && sub8x8_inter; ++row) {
- for (int col = col_start; col <= 0; ++col) {
- const MB_MODE_INFO *this_mbmi = xd->mi[row * xd->mi_stride + col];
- if (!is_inter_block(this_mbmi)) sub8x8_inter = 0;
- if (is_intrabc_block(this_mbmi)) sub8x8_inter = 0;
- }
- }
- }
-
- if (sub8x8_inter) {
- // block size
- const int b4_w = block_size_wide[bsize] >> ss_x;
- const int b4_h = block_size_high[bsize] >> ss_y;
- const BLOCK_SIZE plane_bsize = scale_chroma_bsize(bsize, ss_x, ss_y);
- const int b8_w = block_size_wide[plane_bsize] >> ss_x;
- const int b8_h = block_size_high[plane_bsize] >> ss_y;
- assert(!is_compound);
-
- const struct buf_2d orig_pred_buf[2] = { pd->pre[0], pd->pre[1] };
-
- int row = row_start;
- for (int y = 0; y < b8_h; y += b4_h) {
- int col = col_start;
- for (int x = 0; x < b8_w; x += b4_w) {
- MB_MODE_INFO *this_mbmi = xd->mi[row * xd->mi_stride + col];
- is_compound = has_second_ref(this_mbmi);
- int tmp_dst_stride = 8;
- assert(bw < 8 || bh < 8);
- ConvolveParams conv_params = get_conv_params_no_round(
- 0, plane, xd->tmp_conv_dst, tmp_dst_stride, is_compound, xd->bd);
- conv_params.use_jnt_comp_avg = 0;
- struct buf_2d *const dst_buf = &pd->dst;
- uint8_t *dst = dst_buf->buf + dst_buf->stride * y + x;
-
- ref = 0;
- const RefBuffer *ref_buf =
- &cm->frame_refs[this_mbmi->ref_frame[ref] - LAST_FRAME];
-
- pd->pre[ref].buf0 =
- (plane == 1) ? ref_buf->buf->u_buffer : ref_buf->buf->v_buffer;
- pd->pre[ref].buf =
- pd->pre[ref].buf0 + scaled_buffer_offset(pre_x, pre_y,
- ref_buf->buf->uv_stride,
- &ref_buf->sf);
- pd->pre[ref].width = ref_buf->buf->uv_crop_width;
- pd->pre[ref].height = ref_buf->buf->uv_crop_height;
- pd->pre[ref].stride = ref_buf->buf->uv_stride;
-
- const struct scale_factors *const sf =
- is_intrabc ? &cm->sf_identity : &ref_buf->sf;
- struct buf_2d *const pre_buf = is_intrabc ? dst_buf : &pd->pre[ref];
-
- const MV mv = this_mbmi->mv[ref].as_mv;
-
- uint8_t *pre;
- SubpelParams subpel_params;
- WarpTypesAllowed warp_types;
- warp_types.global_warp_allowed = is_global[ref];
- warp_types.local_warp_allowed = this_mbmi->motion_mode == WARPED_CAUSAL;
-
- calc_subpel_params(xd, sf, mv, plane, pre_x, pre_y, x, y, pre_buf, &pre,
- &subpel_params, bw, bh);
- conv_params.do_average = ref;
- if (is_masked_compound_type(mi->interinter_comp.type)) {
- // masked compound type has its own average mechanism
- conv_params.do_average = 0;
- }
-
- av1_make_inter_predictor(
- pre, pre_buf->stride, dst, dst_buf->stride, &subpel_params, sf,
- b4_w, b4_h, &conv_params, this_mbmi->interp_filters, &warp_types,
- (mi_x >> pd->subsampling_x) + x, (mi_y >> pd->subsampling_y) + y,
- plane, ref, mi, build_for_obmc, xd, cm->allow_warped_motion);
-
- ++col;
- }
- ++row;
- }
-
- for (ref = 0; ref < 2; ++ref) pd->pre[ref] = orig_pred_buf[ref];
- return;
- }
-
- {
- ConvolveParams conv_params = get_conv_params_no_round(
- 0, plane, xd->tmp_conv_dst, MAX_SB_SIZE, is_compound, xd->bd);
- av1_jnt_comp_weight_assign(cm, mi, 0, &conv_params.fwd_offset,
- &conv_params.bck_offset,
- &conv_params.use_jnt_comp_avg, is_compound);
-
- struct buf_2d *const dst_buf = &pd->dst;
- uint8_t *const dst = dst_buf->buf;
- for (ref = 0; ref < 1 + is_compound; ++ref) {
- const struct scale_factors *const sf =
- is_intrabc ? &cm->sf_identity : &xd->block_refs[ref]->sf;
- struct buf_2d *const pre_buf = is_intrabc ? dst_buf : &pd->pre[ref];
- const MV mv = mi->mv[ref].as_mv;
-
- uint8_t *pre;
- SubpelParams subpel_params;
- calc_subpel_params(xd, sf, mv, plane, pre_x, pre_y, 0, 0, pre_buf, &pre,
- &subpel_params, bw, bh);
-
- WarpTypesAllowed warp_types;
- warp_types.global_warp_allowed = is_global[ref];
- warp_types.local_warp_allowed = mi->motion_mode == WARPED_CAUSAL;
-
- if (ref && is_masked_compound_type(mi->interinter_comp.type)) {
- // masked compound type has its own average mechanism
- conv_params.do_average = 0;
- av1_make_masked_inter_predictor(
- pre, pre_buf->stride, dst, dst_buf->stride, &subpel_params, sf, bw,
- bh, &conv_params, mi->interp_filters, plane, &warp_types,
- mi_x >> pd->subsampling_x, mi_y >> pd->subsampling_y, ref, xd,
- cm->allow_warped_motion);
- } else {
- conv_params.do_average = ref;
- av1_make_inter_predictor(
- pre, pre_buf->stride, dst, dst_buf->stride, &subpel_params, sf, bw,
- bh, &conv_params, mi->interp_filters, &warp_types,
- mi_x >> pd->subsampling_x, mi_y >> pd->subsampling_y, plane, ref,
- mi, build_for_obmc, xd, cm->allow_warped_motion);
- }
- }
- }
-}
-
-static void build_inter_predictors_for_planes(const AV1_COMMON *cm,
- MACROBLOCKD *xd, BLOCK_SIZE bsize,
- int mi_row, int mi_col,
- int plane_from, int plane_to) {
- int plane;
- const int mi_x = mi_col * MI_SIZE;
- const int mi_y = mi_row * MI_SIZE;
- for (plane = plane_from; plane <= plane_to; ++plane) {
- const struct macroblockd_plane *pd = &xd->plane[plane];
- const int bw = pd->width;
- const int bh = pd->height;
-
- if (!is_chroma_reference(mi_row, mi_col, bsize, pd->subsampling_x,
- pd->subsampling_y))
- continue;
-
- build_inter_predictors(cm, xd, plane, xd->mi[0], 0, bw, bh, mi_x, mi_y);
- }
-}
-
-void av1_build_inter_predictors_sby(const AV1_COMMON *cm, MACROBLOCKD *xd,
- int mi_row, int mi_col, BUFFER_SET *ctx,
- BLOCK_SIZE bsize) {
- av1_build_inter_predictors_sbp(cm, xd, mi_row, mi_col, ctx, bsize, 0);
-}
-
-void av1_build_inter_predictors_sbuv(const AV1_COMMON *cm, MACROBLOCKD *xd,
- int mi_row, int mi_col, BUFFER_SET *ctx,
- BLOCK_SIZE bsize) {
- for (int plane_idx = 1; plane_idx < MAX_MB_PLANE; plane_idx++) {
- av1_build_inter_predictors_sbp(cm, xd, mi_row, mi_col, ctx, bsize,
- plane_idx);
- }
-}
-
-void av1_build_inter_predictors_sbp(const AV1_COMMON *cm, MACROBLOCKD *xd,
- int mi_row, int mi_col, BUFFER_SET *ctx,
- BLOCK_SIZE bsize, int plane_idx) {
- build_inter_predictors_for_planes(cm, xd, bsize, mi_row, mi_col, plane_idx,
- plane_idx);
-
- if (is_interintra_pred(xd->mi[0])) {
- BUFFER_SET default_ctx = { { NULL, NULL, NULL }, { 0, 0, 0 } };
- if (!ctx) {
- default_ctx.plane[plane_idx] = xd->plane[plane_idx].dst.buf;
- default_ctx.stride[plane_idx] = xd->plane[plane_idx].dst.stride;
- ctx = &default_ctx;
- }
- av1_build_interintra_predictors_sbp(cm, xd, xd->plane[plane_idx].dst.buf,
- xd->plane[plane_idx].dst.stride, ctx,
- plane_idx, bsize);
- }
-}
-
-void av1_build_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd,
- int mi_row, int mi_col, BUFFER_SET *ctx,
- BLOCK_SIZE bsize) {
- const int num_planes = av1_num_planes(cm);
- av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, ctx, bsize);
- if (num_planes > 1)
- av1_build_inter_predictors_sbuv(cm, xd, mi_row, mi_col, ctx, bsize);
-}
-
-// TODO(sarahparker):
-// av1_build_inter_predictor should be combined with
-// av1_make_inter_predictor
-void av1_build_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst,
- int dst_stride, const MV *src_mv,
- const struct scale_factors *sf, int w, int h,
- ConvolveParams *conv_params,
- InterpFilters interp_filters,
- const WarpTypesAllowed *warp_types, int p_col,
- int p_row, int plane, int ref,
- enum mv_precision precision, int x, int y,
- const MACROBLOCKD *xd, int can_use_previous) {
- const int is_q4 = precision == MV_PRECISION_Q4;
- const MV mv_q4 = { is_q4 ? src_mv->row : src_mv->row * 2,
- is_q4 ? src_mv->col : src_mv->col * 2 };
- MV32 mv = av1_scale_mv(&mv_q4, x, y, sf);
- mv.col += SCALE_EXTRA_OFF;
- mv.row += SCALE_EXTRA_OFF;
-
- const SubpelParams subpel_params = { sf->x_step_q4, sf->y_step_q4,
- mv.col & SCALE_SUBPEL_MASK,
- mv.row & SCALE_SUBPEL_MASK };
- src += (mv.row >> SCALE_SUBPEL_BITS) * src_stride +
- (mv.col >> SCALE_SUBPEL_BITS);
-
- av1_make_inter_predictor(src, src_stride, dst, dst_stride, &subpel_params, sf,
- w, h, conv_params, interp_filters, warp_types, p_col,
- p_row, plane, ref, xd->mi[0], 0, xd,
- can_use_previous);
-}
-
-static INLINE void build_prediction_by_above_pred(
- MACROBLOCKD *xd, int rel_mi_col, uint8_t above_mi_width,
- MB_MODE_INFO *above_mbmi, void *fun_ctxt, const int num_planes) {
- struct build_prediction_ctxt *ctxt = (struct build_prediction_ctxt *)fun_ctxt;
- const int above_mi_col = ctxt->mi_col + rel_mi_col;
- int mi_x, mi_y;
- MB_MODE_INFO backup_mbmi = *above_mbmi;
-
- av1_setup_build_prediction_by_above_pred(xd, rel_mi_col, above_mi_width,
- above_mbmi, ctxt, num_planes);
- mi_x = above_mi_col << MI_SIZE_LOG2;
- mi_y = ctxt->mi_row << MI_SIZE_LOG2;
-
- const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
-
- for (int j = 0; j < num_planes; ++j) {
- const struct macroblockd_plane *pd = &xd->plane[j];
- int bw = (above_mi_width * MI_SIZE) >> pd->subsampling_x;
- int bh = clamp(block_size_high[bsize] >> (pd->subsampling_y + 1), 4,
- block_size_high[BLOCK_64X64] >> (pd->subsampling_y + 1));
-
- if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 0)) continue;
- build_inter_predictors(ctxt->cm, xd, j, above_mbmi, 1, bw, bh, mi_x, mi_y);
- }
- *above_mbmi = backup_mbmi;
-}
-
-void av1_build_prediction_by_above_preds(const AV1_COMMON *cm, MACROBLOCKD *xd,
- int mi_row, int mi_col,
- uint8_t *tmp_buf[MAX_MB_PLANE],
- int tmp_width[MAX_MB_PLANE],
- int tmp_height[MAX_MB_PLANE],
- int tmp_stride[MAX_MB_PLANE]) {
- if (!xd->up_available) return;
-
- // Adjust mb_to_bottom_edge to have the correct value for the OBMC
- // prediction block. This is half the height of the original block,
- // except for 128-wide blocks, where we only use a height of 32.
- int this_height = xd->n4_h * MI_SIZE;
- int pred_height = AOMMIN(this_height / 2, 32);
- xd->mb_to_bottom_edge += (this_height - pred_height) * 8;
-
- struct build_prediction_ctxt ctxt = { cm, mi_row,
- mi_col, tmp_buf,
- tmp_width, tmp_height,
- tmp_stride, xd->mb_to_right_edge };
- BLOCK_SIZE bsize = xd->mi[0]->sb_type;
- foreach_overlappable_nb_above(cm, xd, mi_col,
- max_neighbor_obmc[mi_size_wide_log2[bsize]],
- build_prediction_by_above_pred, &ctxt);
-
- xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8);
- xd->mb_to_right_edge = ctxt.mb_to_far_edge;
- xd->mb_to_bottom_edge -= (this_height - pred_height) * 8;
-}
-
-static INLINE void build_prediction_by_left_pred(
- MACROBLOCKD *xd, int rel_mi_row, uint8_t left_mi_height,
- MB_MODE_INFO *left_mbmi, void *fun_ctxt, const int num_planes) {
- struct build_prediction_ctxt *ctxt = (struct build_prediction_ctxt *)fun_ctxt;
- const int left_mi_row = ctxt->mi_row + rel_mi_row;
- int mi_x, mi_y;
- MB_MODE_INFO backup_mbmi = *left_mbmi;
-
- av1_setup_build_prediction_by_left_pred(xd, rel_mi_row, left_mi_height,
- left_mbmi, ctxt, num_planes);
- mi_x = ctxt->mi_col << MI_SIZE_LOG2;
- mi_y = left_mi_row << MI_SIZE_LOG2;
- const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
-
- for (int j = 0; j < num_planes; ++j) {
- const struct macroblockd_plane *pd = &xd->plane[j];
- int bw = clamp(block_size_wide[bsize] >> (pd->subsampling_x + 1), 4,
- block_size_wide[BLOCK_64X64] >> (pd->subsampling_x + 1));
- int bh = (left_mi_height << MI_SIZE_LOG2) >> pd->subsampling_y;
-
- if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 1)) continue;
- build_inter_predictors(ctxt->cm, xd, j, left_mbmi, 1, bw, bh, mi_x, mi_y);
- }
- *left_mbmi = backup_mbmi;
-}
-
-void av1_build_prediction_by_left_preds(const AV1_COMMON *cm, MACROBLOCKD *xd,
- int mi_row, int mi_col,
- uint8_t *tmp_buf[MAX_MB_PLANE],
- int tmp_width[MAX_MB_PLANE],
- int tmp_height[MAX_MB_PLANE],
- int tmp_stride[MAX_MB_PLANE]) {
- if (!xd->left_available) return;
-
- // Adjust mb_to_right_edge to have the correct value for the OBMC
- // prediction block. This is half the width of the original block,
- // except for 128-wide blocks, where we only use a width of 32.
- int this_width = xd->n4_w * MI_SIZE;
- int pred_width = AOMMIN(this_width / 2, 32);
- xd->mb_to_right_edge += (this_width - pred_width) * 8;
-
- struct build_prediction_ctxt ctxt = { cm, mi_row,
- mi_col, tmp_buf,
- tmp_width, tmp_height,
- tmp_stride, xd->mb_to_bottom_edge };
- BLOCK_SIZE bsize = xd->mi[0]->sb_type;
- foreach_overlappable_nb_left(cm, xd, mi_row,
- max_neighbor_obmc[mi_size_high_log2[bsize]],
- build_prediction_by_left_pred, &ctxt);
-
- xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8);
- xd->mb_to_right_edge -= (this_width - pred_width) * 8;
- xd->mb_to_bottom_edge = ctxt.mb_to_far_edge;
-}
-
-void av1_build_obmc_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd,
- int mi_row, int mi_col) {
- const int num_planes = av1_num_planes(cm);
- uint8_t *dst_buf1[MAX_MB_PLANE], *dst_buf2[MAX_MB_PLANE];
- int dst_stride1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
- int dst_stride2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
- int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
- int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
- int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
- int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
-
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- int len = sizeof(uint16_t);
- dst_buf1[0] = CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0]);
- dst_buf1[1] =
- CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * len);
- dst_buf1[2] =
- CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * 2 * len);
- dst_buf2[0] = CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1]);
- dst_buf2[1] =
- CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * len);
- dst_buf2[2] =
- CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * 2 * len);
- } else {
- dst_buf1[0] = xd->tmp_obmc_bufs[0];
- dst_buf1[1] = xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE;
- dst_buf1[2] = xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * 2;
- dst_buf2[0] = xd->tmp_obmc_bufs[1];
- dst_buf2[1] = xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE;
- dst_buf2[2] = xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * 2;
- }
- av1_build_prediction_by_above_preds(cm, xd, mi_row, mi_col, dst_buf1,
- dst_width1, dst_height1, dst_stride1);
- av1_build_prediction_by_left_preds(cm, xd, mi_row, mi_col, dst_buf2,
- dst_width2, dst_height2, dst_stride2);
- av1_setup_dst_planes(xd->plane, xd->mi[0]->sb_type, get_frame_new_buffer(cm),
- mi_row, mi_col, 0, num_planes);
- av1_build_obmc_inter_prediction(cm, xd, mi_row, mi_col, dst_buf1, dst_stride1,
- dst_buf2, dst_stride2);
-}
-
-// Builds the inter-predictor for the single ref case
-// for use in the encoder to search the wedges efficiently.
-static void build_inter_predictors_single_buf(MACROBLOCKD *xd, int plane,
- int bw, int bh, int x, int y,
- int w, int h, int mi_x, int mi_y,
- int ref, uint8_t *const ext_dst,
- int ext_dst_stride,
- int can_use_previous) {
- struct macroblockd_plane *const pd = &xd->plane[plane];
- const MB_MODE_INFO *mi = xd->mi[0];
-
- const struct scale_factors *const sf = &xd->block_refs[ref]->sf;
- struct buf_2d *const pre_buf = &pd->pre[ref];
- uint8_t *const dst = get_buf_by_bd(xd, ext_dst) + ext_dst_stride * y + x;
- const MV mv = mi->mv[ref].as_mv;
-
- ConvolveParams conv_params = get_conv_params(0, plane, xd->bd);
- WarpTypesAllowed warp_types;
- const WarpedMotionParams *const wm = &xd->global_motion[mi->ref_frame[ref]];
- warp_types.global_warp_allowed = is_global_mv_block(mi, wm->wmtype);
- warp_types.local_warp_allowed = mi->motion_mode == WARPED_CAUSAL;
- const int pre_x = (mi_x) >> pd->subsampling_x;
- const int pre_y = (mi_y) >> pd->subsampling_y;
- uint8_t *pre;
- SubpelParams subpel_params;
- calc_subpel_params(xd, sf, mv, plane, pre_x, pre_y, x, y, pre_buf, &pre,
- &subpel_params, bw, bh);
-
- av1_make_inter_predictor(pre, pre_buf->stride, dst, ext_dst_stride,
- &subpel_params, sf, w, h, &conv_params,
- mi->interp_filters, &warp_types, pre_x + x,
- pre_y + y, plane, ref, mi, 0, xd, can_use_previous);
-}
-
-void av1_build_inter_predictors_for_planes_single_buf(
- MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane_from, int plane_to, int mi_row,
- int mi_col, int ref, uint8_t *ext_dst[3], int ext_dst_stride[3],
- int can_use_previous) {
- int plane;
- const int mi_x = mi_col * MI_SIZE;
- const int mi_y = mi_row * MI_SIZE;
- for (plane = plane_from; plane <= plane_to; ++plane) {
- const BLOCK_SIZE plane_bsize = get_plane_block_size(
- bsize, xd->plane[plane].subsampling_x, xd->plane[plane].subsampling_y);
- const int bw = block_size_wide[plane_bsize];
- const int bh = block_size_high[plane_bsize];
- build_inter_predictors_single_buf(xd, plane, bw, bh, 0, 0, bw, bh, mi_x,
- mi_y, ref, ext_dst[plane],
- ext_dst_stride[plane], can_use_previous);
- }
-}
-
-static void build_masked_compound(
- uint8_t *dst, int dst_stride, const uint8_t *src0, int src0_stride,
- const uint8_t *src1, int src1_stride,
- const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h,
- int w) {
- // Derive subsampling from h and w passed in. May be refactored to
- // pass in subsampling factors directly.
- const int subh = (2 << mi_size_high_log2[sb_type]) == h;
- const int subw = (2 << mi_size_wide_log2[sb_type]) == w;
- const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type);
- aom_blend_a64_mask(dst, dst_stride, src0, src0_stride, src1, src1_stride,
- mask, block_size_wide[sb_type], w, h, subw, subh);
-}
-
-static void build_masked_compound_highbd(
- uint8_t *dst_8, int dst_stride, const uint8_t *src0_8, int src0_stride,
- const uint8_t *src1_8, int src1_stride,
- const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h,
- int w, int bd) {
- // Derive subsampling from h and w passed in. May be refactored to
- // pass in subsampling factors directly.
- const int subh = (2 << mi_size_high_log2[sb_type]) == h;
- const int subw = (2 << mi_size_wide_log2[sb_type]) == w;
- const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type);
- // const uint8_t *mask =
- // av1_get_contiguous_soft_mask(wedge_index, wedge_sign, sb_type);
- aom_highbd_blend_a64_mask(dst_8, dst_stride, src0_8, src0_stride, src1_8,
- src1_stride, mask, block_size_wide[sb_type], w, h,
- subw, subh, bd);
-}
-
-static void build_wedge_inter_predictor_from_buf(
- MACROBLOCKD *xd, int plane, int x, int y, int w, int h, uint8_t *ext_dst0,
- int ext_dst_stride0, uint8_t *ext_dst1, int ext_dst_stride1) {
- MB_MODE_INFO *const mbmi = xd->mi[0];
- const int is_compound = has_second_ref(mbmi);
- MACROBLOCKD_PLANE *const pd = &xd->plane[plane];
- struct buf_2d *const dst_buf = &pd->dst;
- uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x;
- mbmi->interinter_comp.seg_mask = xd->seg_mask;
- const INTERINTER_COMPOUND_DATA *comp_data = &mbmi->interinter_comp;
-
- if (is_compound && is_masked_compound_type(comp_data->type)) {
- if (!plane && comp_data->type == COMPOUND_DIFFWTD) {
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
- av1_build_compound_diffwtd_mask_highbd(
- comp_data->seg_mask, comp_data->mask_type,
- CONVERT_TO_BYTEPTR(ext_dst0), ext_dst_stride0,
- CONVERT_TO_BYTEPTR(ext_dst1), ext_dst_stride1, h, w, xd->bd);
- else
- av1_build_compound_diffwtd_mask(
- comp_data->seg_mask, comp_data->mask_type, ext_dst0,
- ext_dst_stride0, ext_dst1, ext_dst_stride1, h, w);
- }
-
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
- build_masked_compound_highbd(
- dst, dst_buf->stride, CONVERT_TO_BYTEPTR(ext_dst0), ext_dst_stride0,
- CONVERT_TO_BYTEPTR(ext_dst1), ext_dst_stride1, comp_data,
- mbmi->sb_type, h, w, xd->bd);
- else
- build_masked_compound(dst, dst_buf->stride, ext_dst0, ext_dst_stride0,
- ext_dst1, ext_dst_stride1, comp_data, mbmi->sb_type,
- h, w);
- } else {
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
- aom_highbd_convolve_copy(CONVERT_TO_BYTEPTR(ext_dst0), ext_dst_stride0,
- dst, dst_buf->stride, NULL, 0, NULL, 0, w, h,
- xd->bd);
- else
- aom_convolve_copy(ext_dst0, ext_dst_stride0, dst, dst_buf->stride, NULL,
- 0, NULL, 0, w, h);
- }
-}
-
-void av1_build_wedge_inter_predictor_from_buf(MACROBLOCKD *xd, BLOCK_SIZE bsize,
- int plane_from, int plane_to,
- uint8_t *ext_dst0[3],
- int ext_dst_stride0[3],
- uint8_t *ext_dst1[3],
- int ext_dst_stride1[3]) {
- int plane;
- for (plane = plane_from; plane <= plane_to; ++plane) {
- const BLOCK_SIZE plane_bsize = get_plane_block_size(
- bsize, xd->plane[plane].subsampling_x, xd->plane[plane].subsampling_y);
- const int bw = block_size_wide[plane_bsize];
- const int bh = block_size_high[plane_bsize];
- build_wedge_inter_predictor_from_buf(
- xd, plane, 0, 0, bw, bh, ext_dst0[plane], ext_dst_stride0[plane],
- ext_dst1[plane], ext_dst_stride1[plane]);
- }
-}
diff --git a/third_party/aom/av1/encoder/reconinter_enc.h b/third_party/aom/av1/encoder/reconinter_enc.h
deleted file mode 100644
index 10d5e8c28..000000000
--- a/third_party/aom/av1/encoder/reconinter_enc.h
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_RECONINTER_ENC_H_
-#define AOM_AV1_ENCODER_RECONINTER_ENC_H_
-
-#include "aom/aom_integer.h"
-#include "av1/common/filter.h"
-#include "av1/common/blockd.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/common/convolve.h"
-#include "av1/common/warped_motion.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void av1_build_inter_predictors_sby(const AV1_COMMON *cm, MACROBLOCKD *xd,
- int mi_row, int mi_col, BUFFER_SET *ctx,
- BLOCK_SIZE bsize);
-
-void av1_build_inter_predictors_sbuv(const AV1_COMMON *cm, MACROBLOCKD *xd,
- int mi_row, int mi_col, BUFFER_SET *ctx,
- BLOCK_SIZE bsize);
-
-void av1_build_inter_predictors_sbp(const AV1_COMMON *cm, MACROBLOCKD *xd,
- int mi_row, int mi_col, BUFFER_SET *ctx,
- BLOCK_SIZE bsize, int plane_idx);
-
-void av1_build_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd,
- int mi_row, int mi_col, BUFFER_SET *ctx,
- BLOCK_SIZE bsize);
-
-void av1_build_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst,
- int dst_stride, const MV *src_mv,
- const struct scale_factors *sf, int w, int h,
- ConvolveParams *conv_params,
- InterpFilters interp_filters,
- const WarpTypesAllowed *warp_types, int p_col,
- int p_row, int plane, int ref,
- enum mv_precision precision, int x, int y,
- const MACROBLOCKD *xd, int can_use_previous);
-
-// Detect if the block have sub-pixel level motion vectors
-// per component.
-#define CHECK_SUBPEL 0
-static INLINE int has_subpel_mv_component(const MB_MODE_INFO *const mbmi,
- const MACROBLOCKD *const xd,
- int dir) {
-#if CHECK_SUBPEL
- const BLOCK_SIZE bsize = mbmi->sb_type;
- int plane;
- int ref = (dir >> 1);
-
- if (dir & 0x01) {
- if (mbmi->mv[ref].as_mv.col & SUBPEL_MASK) return 1;
- } else {
- if (mbmi->mv[ref].as_mv.row & SUBPEL_MASK) return 1;
- }
-
- return 0;
-#else
- (void)mbmi;
- (void)xd;
- (void)dir;
- return 1;
-#endif
-}
-
-static INLINE int av1_is_interp_search_needed(const MACROBLOCKD *const xd) {
- MB_MODE_INFO *const mi = xd->mi[0];
- const int is_compound = has_second_ref(mi);
- int ref;
- for (ref = 0; ref < 1 + is_compound; ++ref) {
- int row_col;
- for (row_col = 0; row_col < 2; ++row_col) {
- const int dir = (ref << 1) + row_col;
- if (has_subpel_mv_component(mi, xd, dir)) {
- return 1;
- }
- }
- }
- return 0;
-}
-
-void av1_build_prediction_by_above_preds(const AV1_COMMON *cm, MACROBLOCKD *xd,
- int mi_row, int mi_col,
- uint8_t *tmp_buf[MAX_MB_PLANE],
- int tmp_width[MAX_MB_PLANE],
- int tmp_height[MAX_MB_PLANE],
- int tmp_stride[MAX_MB_PLANE]);
-
-void av1_build_prediction_by_left_preds(const AV1_COMMON *cm, MACROBLOCKD *xd,
- int mi_row, int mi_col,
- uint8_t *tmp_buf[MAX_MB_PLANE],
- int tmp_width[MAX_MB_PLANE],
- int tmp_height[MAX_MB_PLANE],
- int tmp_stride[MAX_MB_PLANE]);
-
-void av1_build_obmc_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd,
- int mi_row, int mi_col);
-
-void av1_build_inter_predictors_for_planes_single_buf(
- MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane_from, int plane_to, int mi_row,
- int mi_col, int ref, uint8_t *ext_dst[3], int ext_dst_stride[3],
- int can_use_previous);
-
-void av1_build_wedge_inter_predictor_from_buf(MACROBLOCKD *xd, BLOCK_SIZE bsize,
- int plane_from, int plane_to,
- uint8_t *ext_dst0[3],
- int ext_dst_stride0[3],
- uint8_t *ext_dst1[3],
- int ext_dst_stride1[3]);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_ENCODER_RECONINTER_ENC_H_
diff --git a/third_party/aom/av1/encoder/segmentation.c b/third_party/aom/av1/encoder/segmentation.c
deleted file mode 100644
index 2e9102745..000000000
--- a/third_party/aom/av1/encoder/segmentation.c
+++ /dev/null
@@ -1,244 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <limits.h>
-
-#include "aom_mem/aom_mem.h"
-
-#include "av1/common/pred_common.h"
-#include "av1/common/tile_common.h"
-
-#include "av1/encoder/cost.h"
-#include "av1/encoder/segmentation.h"
-
-void av1_enable_segmentation(struct segmentation *seg) {
- seg->enabled = 1;
- seg->update_map = 1;
- seg->update_data = 1;
- seg->temporal_update = 0;
-}
-
-void av1_disable_segmentation(struct segmentation *seg) {
- seg->enabled = 0;
- seg->update_map = 0;
- seg->update_data = 0;
- seg->temporal_update = 0;
-}
-
-void av1_disable_segfeature(struct segmentation *seg, int segment_id,
- SEG_LVL_FEATURES feature_id) {
- seg->feature_mask[segment_id] &= ~(1 << feature_id);
-}
-
-void av1_clear_segdata(struct segmentation *seg, int segment_id,
- SEG_LVL_FEATURES feature_id) {
- seg->feature_data[segment_id][feature_id] = 0;
-}
-
-static void count_segs(const AV1_COMMON *cm, MACROBLOCKD *xd,
- const TileInfo *tile, MB_MODE_INFO **mi,
- unsigned *no_pred_segcounts,
- unsigned (*temporal_predictor_count)[2],
- unsigned *t_unpred_seg_counts, int bw, int bh,
- int mi_row, int mi_col) {
- int segment_id;
-
- if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
-
- xd->mi = mi;
- segment_id = xd->mi[0]->segment_id;
-
- set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols);
-
- // Count the number of hits on each segment with no prediction
- no_pred_segcounts[segment_id]++;
-
- // Temporal prediction not allowed on key frames
- if (cm->frame_type != KEY_FRAME) {
- const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
- // Test to see if the segment id matches the predicted value.
- const int pred_segment_id =
- cm->last_frame_seg_map
- ? get_segment_id(cm, cm->last_frame_seg_map, bsize, mi_row, mi_col)
- : 0;
- const int pred_flag = pred_segment_id == segment_id;
- const int pred_context = av1_get_pred_context_seg_id(xd);
-
- // Store the prediction status for this mb and update counts
- // as appropriate
- xd->mi[0]->seg_id_predicted = pred_flag;
- temporal_predictor_count[pred_context][pred_flag]++;
-
- // Update the "unpredicted" segment count
- if (!pred_flag) t_unpred_seg_counts[segment_id]++;
- }
-}
-
-static void count_segs_sb(const AV1_COMMON *cm, MACROBLOCKD *xd,
- const TileInfo *tile, MB_MODE_INFO **mi,
- unsigned *no_pred_segcounts,
- unsigned (*temporal_predictor_count)[2],
- unsigned *t_unpred_seg_counts, int mi_row, int mi_col,
- BLOCK_SIZE bsize) {
- const int mis = cm->mi_stride;
- const int bs = mi_size_wide[bsize], hbs = bs / 2;
- PARTITION_TYPE partition;
- const int qbs = bs / 4;
-
- if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
-
-#define CSEGS(cs_bw, cs_bh, cs_rowoff, cs_coloff) \
- count_segs(cm, xd, tile, mi + mis * (cs_rowoff) + (cs_coloff), \
- no_pred_segcounts, temporal_predictor_count, t_unpred_seg_counts, \
- (cs_bw), (cs_bh), mi_row + (cs_rowoff), mi_col + (cs_coloff));
-
- if (bsize == BLOCK_8X8)
- partition = PARTITION_NONE;
- else
- partition = get_partition(cm, mi_row, mi_col, bsize);
- switch (partition) {
- case PARTITION_NONE: CSEGS(bs, bs, 0, 0); break;
- case PARTITION_HORZ:
- CSEGS(bs, hbs, 0, 0);
- CSEGS(bs, hbs, hbs, 0);
- break;
- case PARTITION_VERT:
- CSEGS(hbs, bs, 0, 0);
- CSEGS(hbs, bs, 0, hbs);
- break;
- case PARTITION_HORZ_A:
- CSEGS(hbs, hbs, 0, 0);
- CSEGS(hbs, hbs, 0, hbs);
- CSEGS(bs, hbs, hbs, 0);
- break;
- case PARTITION_HORZ_B:
- CSEGS(bs, hbs, 0, 0);
- CSEGS(hbs, hbs, hbs, 0);
- CSEGS(hbs, hbs, hbs, hbs);
- break;
- case PARTITION_VERT_A:
- CSEGS(hbs, hbs, 0, 0);
- CSEGS(hbs, hbs, hbs, 0);
- CSEGS(hbs, bs, 0, hbs);
- break;
- case PARTITION_VERT_B:
- CSEGS(hbs, bs, 0, 0);
- CSEGS(hbs, hbs, 0, hbs);
- CSEGS(hbs, hbs, hbs, hbs);
- break;
- case PARTITION_HORZ_4:
- CSEGS(bs, qbs, 0, 0);
- CSEGS(bs, qbs, qbs, 0);
- CSEGS(bs, qbs, 2 * qbs, 0);
- if (mi_row + 3 * qbs < cm->mi_rows) CSEGS(bs, qbs, 3 * qbs, 0);
- break;
-
- case PARTITION_VERT_4:
- CSEGS(qbs, bs, 0, 0);
- CSEGS(qbs, bs, 0, qbs);
- CSEGS(qbs, bs, 0, 2 * qbs);
- if (mi_col + 3 * qbs < cm->mi_cols) CSEGS(qbs, bs, 0, 3 * qbs);
- break;
-
- case PARTITION_SPLIT: {
- const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
- int n;
-
- for (n = 0; n < 4; n++) {
- const int mi_dc = hbs * (n & 1);
- const int mi_dr = hbs * (n >> 1);
-
- count_segs_sb(cm, xd, tile, &mi[mi_dr * mis + mi_dc], no_pred_segcounts,
- temporal_predictor_count, t_unpred_seg_counts,
- mi_row + mi_dr, mi_col + mi_dc, subsize);
- }
- } break;
- default: assert(0);
- }
-
-#undef CSEGS
-}
-
-void av1_choose_segmap_coding_method(AV1_COMMON *cm, MACROBLOCKD *xd) {
- struct segmentation *seg = &cm->seg;
- struct segmentation_probs *segp = &cm->fc->seg;
- int no_pred_cost;
- int t_pred_cost = INT_MAX;
- int tile_col, tile_row, mi_row, mi_col;
- unsigned temporal_predictor_count[SEG_TEMPORAL_PRED_CTXS][2] = { { 0 } };
- unsigned no_pred_segcounts[MAX_SEGMENTS] = { 0 };
- unsigned t_unpred_seg_counts[MAX_SEGMENTS] = { 0 };
- (void)xd;
-
- // First of all generate stats regarding how well the last segment map
- // predicts this one
- for (tile_row = 0; tile_row < cm->tile_rows; tile_row++) {
- TileInfo tile_info;
- av1_tile_set_row(&tile_info, cm, tile_row);
- for (tile_col = 0; tile_col < cm->tile_cols; tile_col++) {
- MB_MODE_INFO **mi_ptr;
- av1_tile_set_col(&tile_info, cm, tile_col);
- mi_ptr = cm->mi_grid_visible + tile_info.mi_row_start * cm->mi_stride +
- tile_info.mi_col_start;
- for (mi_row = tile_info.mi_row_start; mi_row < tile_info.mi_row_end;
- mi_row += cm->seq_params.mib_size,
- mi_ptr += cm->seq_params.mib_size * cm->mi_stride) {
- MB_MODE_INFO **mi = mi_ptr;
- for (mi_col = tile_info.mi_col_start; mi_col < tile_info.mi_col_end;
- mi_col += cm->seq_params.mib_size, mi += cm->seq_params.mib_size) {
- count_segs_sb(cm, xd, &tile_info, mi, no_pred_segcounts,
- temporal_predictor_count, t_unpred_seg_counts, mi_row,
- mi_col, cm->seq_params.sb_size);
- }
- }
- }
- }
-
- int seg_id_cost[MAX_SEGMENTS];
- av1_cost_tokens_from_cdf(seg_id_cost, segp->tree_cdf, NULL);
- no_pred_cost = 0;
- for (int i = 0; i < MAX_SEGMENTS; ++i)
- no_pred_cost += no_pred_segcounts[i] * seg_id_cost[i];
-
- // Frames without past dependency cannot use temporal prediction
- if (cm->primary_ref_frame != PRIMARY_REF_NONE) {
- int pred_flag_cost[SEG_TEMPORAL_PRED_CTXS][2];
- for (int i = 0; i < SEG_TEMPORAL_PRED_CTXS; ++i)
- av1_cost_tokens_from_cdf(pred_flag_cost[i], segp->pred_cdf[i], NULL);
- t_pred_cost = 0;
- // Cost for signaling the prediction flag.
- for (int i = 0; i < SEG_TEMPORAL_PRED_CTXS; ++i) {
- for (int j = 0; j < 2; ++j)
- t_pred_cost += temporal_predictor_count[i][j] * pred_flag_cost[i][j];
- }
- // Cost for signaling the unpredicted segment id.
- for (int i = 0; i < MAX_SEGMENTS; ++i)
- t_pred_cost += t_unpred_seg_counts[i] * seg_id_cost[i];
- }
-
- // Now choose which coding method to use.
- if (t_pred_cost < no_pred_cost) {
- assert(!cm->error_resilient_mode);
- seg->temporal_update = 1;
- } else {
- seg->temporal_update = 0;
- }
-}
-
-void av1_reset_segment_features(AV1_COMMON *cm) {
- struct segmentation *seg = &cm->seg;
-
- // Set up default state for MB feature flags
- seg->enabled = 0;
- seg->update_map = 0;
- seg->update_data = 0;
- av1_clearall_segfeatures(seg);
-}
diff --git a/third_party/aom/av1/encoder/segmentation.h b/third_party/aom/av1/encoder/segmentation.h
deleted file mode 100644
index 1ad13d66a..000000000
--- a/third_party/aom/av1/encoder/segmentation.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_SEGMENTATION_H_
-#define AOM_AV1_ENCODER_SEGMENTATION_H_
-
-#include "av1/common/blockd.h"
-#include "av1/encoder/encoder.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void av1_enable_segmentation(struct segmentation *seg);
-void av1_disable_segmentation(struct segmentation *seg);
-
-void av1_disable_segfeature(struct segmentation *seg, int segment_id,
- SEG_LVL_FEATURES feature_id);
-void av1_clear_segdata(struct segmentation *seg, int segment_id,
- SEG_LVL_FEATURES feature_id);
-
-void av1_choose_segmap_coding_method(AV1_COMMON *cm, MACROBLOCKD *xd);
-
-void av1_reset_segment_features(AV1_COMMON *cm);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_ENCODER_SEGMENTATION_H_
diff --git a/third_party/aom/av1/encoder/speed_features.c b/third_party/aom/av1/encoder/speed_features.c
deleted file mode 100644
index 4c35baae0..000000000
--- a/third_party/aom/av1/encoder/speed_features.c
+++ /dev/null
@@ -1,564 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <limits.h>
-
-#include "av1/encoder/encoder.h"
-#include "av1/encoder/speed_features.h"
-#include "av1/encoder/rdopt.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-
-// Setting this to 1 will disable trellis optimization completely.
-// Setting this to 2 will disable trellis optimization within the
-// transform search. Trellis optimization will still be applied
-// in the final encode.
-#define DISABLE_TRELLISQ_SEARCH 0
-
-#define MAX_MESH_SPEED 5 // Max speed setting for mesh motion method
-static MESH_PATTERN
- good_quality_mesh_patterns[MAX_MESH_SPEED + 1][MAX_MESH_STEP] = {
- { { 64, 8 }, { 28, 4 }, { 15, 1 }, { 7, 1 } },
- { { 64, 8 }, { 28, 4 }, { 15, 1 }, { 7, 1 } },
- { { 64, 8 }, { 14, 2 }, { 7, 1 }, { 7, 1 } },
- { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
- { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
- { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
- };
-static unsigned char good_quality_max_mesh_pct[MAX_MESH_SPEED + 1] = {
- 50, 50, 25, 15, 5, 1
-};
-
-// TODO(huisu@google.com): These settings are pretty relaxed, tune them for
-// each speed setting
-static MESH_PATTERN intrabc_mesh_patterns[MAX_MESH_SPEED + 1][MAX_MESH_STEP] = {
- { { 256, 1 }, { 256, 1 }, { 0, 0 }, { 0, 0 } },
- { { 256, 1 }, { 256, 1 }, { 0, 0 }, { 0, 0 } },
- { { 64, 1 }, { 64, 1 }, { 0, 0 }, { 0, 0 } },
- { { 64, 1 }, { 64, 1 }, { 0, 0 }, { 0, 0 } },
- { { 64, 4 }, { 16, 1 }, { 0, 0 }, { 0, 0 } },
- { { 64, 4 }, { 16, 1 }, { 0, 0 }, { 0, 0 } },
-};
-static uint8_t intrabc_max_mesh_pct[MAX_MESH_SPEED + 1] = { 100, 100, 100,
- 25, 25, 10 };
-
-// Intra only frames, golden frames (except alt ref overlays) and
-// alt ref frames tend to be coded at a higher than ambient quality
-static int frame_is_boosted(const AV1_COMP *cpi) {
- return frame_is_kf_gf_arf(cpi);
-}
-
-// Sets a partition size down to which the auto partition code will always
-// search (can go lower), based on the image dimensions. The logic here
-// is that the extent to which ringing artefacts are offensive, depends
-// partly on the screen area that over which they propogate. Propogation is
-// limited by transform block size but the screen area take up by a given block
-// size will be larger for a small image format stretched to full screen.
-static BLOCK_SIZE set_partition_min_limit(AV1_COMMON *const cm) {
- unsigned int screen_area = (cm->width * cm->height);
-
- // Select block size based on image format size.
- if (screen_area < 1280 * 720) {
- // Formats smaller in area than 720P
- return BLOCK_4X4;
- } else if (screen_area < 1920 * 1080) {
- // Format >= 720P and < 1080P
- return BLOCK_8X8;
- } else {
- // Formats 1080P and up
- return BLOCK_16X16;
- }
-}
-
-// Do we have an internal image edge (e.g. formatting bars).
-static int has_internal_image_edge(const AV1_COMP *cpi) {
- return (cpi->oxcf.pass == 2) &&
- ((cpi->twopass.this_frame_stats.inactive_zone_rows > 0) ||
- (cpi->twopass.this_frame_stats.inactive_zone_cols > 0));
-}
-
-static void set_good_speed_feature_framesize_dependent(AV1_COMP *cpi,
- SPEED_FEATURES *sf,
- int speed) {
- AV1_COMMON *const cm = &cpi->common;
- const int is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720;
- const int is_480p_or_larger = AOMMIN(cm->width, cm->height) >= 480;
-
- if (is_480p_or_larger) {
- sf->use_square_partition_only_threshold = BLOCK_128X128;
- } else {
- sf->use_square_partition_only_threshold = BLOCK_64X64;
- }
-
- // TODO(huisu@google.com): train models for 720P and above.
- if (!is_720p_or_larger) {
- sf->ml_partition_search_breakout_thresh[0] = 200; // BLOCK_8X8
- sf->ml_partition_search_breakout_thresh[1] = 250; // BLOCK_16X16
- sf->ml_partition_search_breakout_thresh[2] = 300; // BLOCK_32X32
- sf->ml_partition_search_breakout_thresh[3] = 500; // BLOCK_64X64
- sf->ml_partition_search_breakout_thresh[4] = -1; // BLOCK_128X128
- }
-
- if (speed >= 1) {
- if (is_720p_or_larger) {
- sf->use_square_partition_only_threshold = BLOCK_128X128;
- } else if (is_480p_or_larger) {
- sf->use_square_partition_only_threshold = BLOCK_64X64;
- } else {
- sf->use_square_partition_only_threshold = BLOCK_32X32;
- }
-
- if (!is_720p_or_larger) {
- sf->ml_partition_search_breakout_thresh[0] = 200; // BLOCK_8X8
- sf->ml_partition_search_breakout_thresh[1] = 250; // BLOCK_16X16
- sf->ml_partition_search_breakout_thresh[2] = 300; // BLOCK_32X32
- sf->ml_partition_search_breakout_thresh[3] = 300; // BLOCK_64X64
- sf->ml_partition_search_breakout_thresh[4] = -1; // BLOCK_128X128
- }
- }
-
- if (speed >= 2) {
- if (is_720p_or_larger) {
- sf->disable_split_mask =
- cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT;
- sf->adaptive_pred_interp_filter = 0;
- sf->partition_search_breakout_dist_thr = (1 << 24);
- sf->partition_search_breakout_rate_thr = 120;
- } else {
- sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY;
- sf->partition_search_breakout_dist_thr = (1 << 22);
- sf->partition_search_breakout_rate_thr = 100;
- }
- sf->rd_auto_partition_min_limit = set_partition_min_limit(cm);
- }
-
- if (speed >= 3) {
- if (is_720p_or_larger) {
- sf->disable_split_mask = DISABLE_ALL_SPLIT;
- sf->partition_search_breakout_dist_thr = (1 << 25);
- sf->partition_search_breakout_rate_thr = 200;
- } else {
- sf->max_intra_bsize = BLOCK_32X32;
- sf->disable_split_mask = DISABLE_ALL_INTER_SPLIT;
- sf->partition_search_breakout_dist_thr = (1 << 23);
- sf->partition_search_breakout_rate_thr = 120;
- }
- }
-
- // If this is a two pass clip that fits the criteria for animated or
- // graphics content then reset disable_split_mask for speeds 2+.
- // Also if the image edge is internal to the coded area.
- if ((speed >= 2) && (cpi->oxcf.pass == 2) &&
- ((cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) ||
- (has_internal_image_edge(cpi)))) {
- sf->disable_split_mask = DISABLE_COMPOUND_SPLIT;
- }
-
- if (speed >= 4) {
- if (is_720p_or_larger) {
- sf->partition_search_breakout_dist_thr = (1 << 26);
- } else {
- sf->partition_search_breakout_dist_thr = (1 << 24);
- }
- sf->disable_split_mask = DISABLE_ALL_SPLIT;
- }
-}
-
-static void set_good_speed_features_framesize_independent(AV1_COMP *cpi,
- SPEED_FEATURES *sf,
- int speed) {
- AV1_COMMON *const cm = &cpi->common;
- const int boosted = frame_is_boosted(cpi);
-
- // Speed 0 for all speed features that give neutral coding performance change.
- sf->reduce_inter_modes = 1;
- sf->prune_ext_partition_types_search_level = 1;
- sf->ml_prune_rect_partition = 1;
- sf->ml_prune_ab_partition = 1;
- sf->ml_prune_4_partition = 1;
- sf->adaptive_txb_search_level = 1;
- sf->jnt_comp_skip_mv_search = 1;
- sf->model_based_prune_tx_search_level = 1;
- sf->model_based_post_interp_filter_breakout = 1;
- sf->inter_mode_rd_model_estimation = 1;
- sf->prune_ref_frame_for_rect_partitions =
- !(boosted || cpi->refresh_bwd_ref_frame || cpi->refresh_alt2_ref_frame);
- sf->less_rectangular_check_level = 1;
- sf->gm_search_type = GM_REDUCED_REF_SEARCH;
- sf->gm_disable_recode = 1;
-
- if (speed >= 1) {
- sf->gm_erroradv_type = GM_ERRORADV_TR_1;
- sf->selective_ref_frame = 1;
- sf->inter_tx_size_search_init_depth_rect = 1;
- sf->inter_tx_size_search_init_depth_sqr = 1;
- sf->intra_tx_size_search_init_depth_rect = 1;
- sf->intra_tx_size_search_init_depth_sqr = 1;
- sf->tx_size_search_lgr_block = 1;
- if (speed >= CONFIG_2PASS_PARTITION_SEARCH_LVL) {
- sf->two_pass_partition_search = 1;
- sf->mode_pruning_based_on_two_pass_partition_search = 1;
- }
- sf->prune_ext_partition_types_search_level = 2;
- sf->use_fast_interpolation_filter_search = 1;
- sf->skip_repeat_interpolation_filter_search = 1;
- sf->tx_type_search.skip_tx_search = 1;
- sf->tx_type_search.ml_tx_split_thresh = 40;
- sf->model_based_prune_tx_search_level = 0;
- sf->model_based_post_interp_filter_breakout = 0;
- // TODO(angiebird): Re-evaluate the impact of inter_mode_rd_model_estimation
- // on speed 1
- sf->inter_mode_rd_model_estimation = 0;
- sf->adaptive_txb_search_level = 2;
- sf->use_intra_txb_hash = 1;
- sf->optimize_b_precheck = 1;
- sf->dual_sgr_penalty_level = 1;
- sf->use_accurate_subpel_search = 1;
- sf->reuse_inter_intra_mode = 1;
- sf->prune_comp_search_by_single_result = 1;
- sf->skip_repeated_newmv = 1;
- sf->obmc_full_pixel_search_level = 1;
- }
-
- if (speed >= 2) {
- sf->gm_erroradv_type = GM_ERRORADV_TR_2;
-
- sf->selective_ref_frame = 2;
- sf->fast_cdef_search = 1;
-
- sf->adaptive_rd_thresh = 1;
- sf->mv.auto_mv_step_size = 1;
- sf->mv.subpel_iters_per_step = 1;
- sf->disable_filter_search_var_thresh = 100;
- sf->comp_inter_joint_search_thresh = BLOCK_SIZES_ALL;
-
- sf->partition_search_breakout_rate_thr = 80;
- // Note: This speed feature is disable as it seems to be worse in
- // compression/quality and is also slower.
- // sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
- sf->allow_partition_search_skip = 1;
- sf->disable_wedge_search_var_thresh = 100;
- sf->fast_wedge_sign_estimate = 1;
- }
-
- if (speed >= 3) {
- sf->tx_size_search_method = boosted ? USE_FULL_RD : USE_LARGESTALL;
- sf->less_rectangular_check_level = 2;
- sf->adaptive_pred_interp_filter = 1;
- // adaptive_motion_search breaks encoder multi-thread tests.
- // The values in x->pred_mv[] differ for single and multi-thread cases.
- // See aomedia:1778.
- // sf->adaptive_motion_search = 1;
- sf->recode_loop = ALLOW_RECODE_KFARFGF;
- sf->use_transform_domain_distortion = 1;
- sf->use_accurate_subpel_search = 0;
- sf->adaptive_rd_thresh = 2;
- sf->tx_type_search.prune_mode = PRUNE_2D_FAST;
- sf->gm_search_type = GM_DISABLE_SEARCH;
- sf->prune_comp_search_by_single_result = 2;
- }
-
- if (speed >= 4) {
- sf->tx_type_search.fast_intra_tx_type_search = 1;
- sf->tx_type_search.fast_inter_tx_type_search = 1;
- sf->use_square_partition_only_threshold =
- boosted ? BLOCK_128X128 : BLOCK_4X4;
- sf->tx_size_search_method =
- frame_is_intra_only(cm) ? USE_FULL_RD : USE_LARGESTALL;
- sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED;
- sf->adaptive_pred_interp_filter = 0;
- sf->adaptive_mode_search = 1;
- sf->cb_partition_search = !boosted;
- sf->alt_ref_search_fp = 1;
- }
-
- if (speed >= 5) {
- sf->recode_loop = ALLOW_RECODE_KFMAXBW;
- sf->intra_y_mode_mask[TX_64X64] = INTRA_DC_H_V;
- sf->intra_uv_mode_mask[TX_64X64] = UV_INTRA_DC_H_V_CFL;
- sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
- sf->intra_uv_mode_mask[TX_32X32] = UV_INTRA_DC_H_V_CFL;
- sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V;
- sf->intra_uv_mode_mask[TX_16X16] = UV_INTRA_DC_H_V_CFL;
- sf->use_square_partition_only_threshold = BLOCK_4X4;
- sf->tx_size_search_method = USE_LARGESTALL;
- sf->mv.search_method = BIGDIA;
- sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
- sf->adaptive_rd_thresh = 4;
- sf->mode_search_skip_flags =
- (cm->frame_type == KEY_FRAME)
- ? 0
- : FLAG_SKIP_INTRA_DIRMISMATCH | FLAG_SKIP_INTRA_BESTINTER |
- FLAG_SKIP_COMP_BESTINTRA | FLAG_SKIP_INTRA_LOWVAR |
- FLAG_EARLY_TERMINATE;
- sf->disable_filter_search_var_thresh = 200;
- sf->use_fast_coef_costing = 1;
- sf->partition_search_breakout_rate_thr = 300;
- sf->use_transform_domain_distortion = 2;
- }
-
- if (speed >= 6) {
- int i;
- sf->optimize_coefficients = NO_TRELLIS_OPT;
- sf->mv.search_method = HEX;
- sf->disable_filter_search_var_thresh = 500;
- for (i = 0; i < TX_SIZES; ++i) {
- sf->intra_y_mode_mask[i] = INTRA_DC;
- sf->intra_uv_mode_mask[i] = UV_INTRA_DC_CFL;
- }
- sf->partition_search_breakout_rate_thr = 500;
- sf->mv.reduce_first_step_size = 1;
- sf->simple_model_rd_from_var = 1;
- }
- if (speed >= 7) {
- sf->default_max_partition_size = BLOCK_32X32;
- sf->default_min_partition_size = BLOCK_8X8;
- sf->intra_y_mode_mask[TX_64X64] = INTRA_DC;
- sf->intra_y_mode_mask[TX_32X32] = INTRA_DC;
- sf->frame_parameter_update = 0;
- sf->mv.search_method = FAST_HEX;
- sf->partition_search_type = REFERENCE_PARTITION;
- sf->mode_search_skip_flags |= FLAG_SKIP_INTRA_DIRMISMATCH;
- }
- if (speed >= 8) {
- sf->mv.search_method = FAST_DIAMOND;
- sf->mv.subpel_force_stop = 2;
- sf->lpf_pick = LPF_PICK_MINIMAL_LPF;
- }
-}
-
-void av1_set_speed_features_framesize_dependent(AV1_COMP *cpi) {
- SPEED_FEATURES *const sf = &cpi->sf;
- const AV1EncoderConfig *const oxcf = &cpi->oxcf;
- RD_OPT *const rd = &cpi->rd;
- int i;
-
- if (oxcf->mode == GOOD) {
- set_good_speed_feature_framesize_dependent(cpi, sf, oxcf->speed);
- }
-
- if (sf->disable_split_mask == DISABLE_ALL_SPLIT) {
- sf->adaptive_pred_interp_filter = 0;
- }
-
- // Check for masked out split cases.
- for (i = 0; i < MAX_REFS; ++i) {
- if (sf->disable_split_mask & (1 << i)) {
- rd->thresh_mult_sub8x8[i] = INT_MAX;
- }
- }
-
- // This is only used in motion vector unit test.
- if (cpi->oxcf.motion_vector_unit_test == 1)
- cpi->find_fractional_mv_step = av1_return_max_sub_pixel_mv;
- else if (cpi->oxcf.motion_vector_unit_test == 2)
- cpi->find_fractional_mv_step = av1_return_min_sub_pixel_mv;
-}
-
-void av1_set_speed_features_framesize_independent(AV1_COMP *cpi) {
- AV1_COMMON *const cm = &cpi->common;
- SPEED_FEATURES *const sf = &cpi->sf;
- MACROBLOCK *const x = &cpi->td.mb;
- const AV1EncoderConfig *const oxcf = &cpi->oxcf;
- int i;
-
- // best quality defaults
- sf->frame_parameter_update = 1;
- sf->mv.search_method = NSTEP;
- sf->recode_loop = ALLOW_RECODE;
- sf->mv.subpel_search_method = SUBPEL_TREE;
- sf->mv.subpel_iters_per_step = 2;
- sf->mv.subpel_force_stop = 0;
-#if DISABLE_TRELLISQ_SEARCH == 2
- sf->optimize_coefficients = !is_lossless_requested(&cpi->oxcf)
- ? FINAL_PASS_TRELLIS_OPT
- : NO_TRELLIS_OPT;
-#elif DISABLE_TRELLISQ_SEARCH == 1
- sf->optimize_coefficients = NO_TRELLIS_OPT;
-#else
- if (is_lossless_requested(&cpi->oxcf))
- sf->optimize_coefficients = NO_TRELLIS_OPT;
- else
- sf->optimize_coefficients = FULL_TRELLIS_OPT;
-#endif // DISABLE_TRELLISQ_SEARCH
- sf->gm_erroradv_type = GM_ERRORADV_TR_0;
- sf->mv.reduce_first_step_size = 0;
- sf->mv.auto_mv_step_size = 0;
- sf->comp_inter_joint_search_thresh = BLOCK_4X4;
- sf->adaptive_rd_thresh = 0;
- sf->tx_size_search_method = USE_FULL_RD;
- sf->inter_tx_size_search_init_depth_sqr = 0;
- sf->inter_tx_size_search_init_depth_rect = 0;
- sf->intra_tx_size_search_init_depth_rect = 0;
- sf->intra_tx_size_search_init_depth_sqr = 0;
- sf->tx_size_search_lgr_block = 0;
- sf->model_based_prune_tx_search_level = 0;
- sf->model_based_post_interp_filter_breakout = 0;
- sf->reduce_inter_modes = 0;
- sf->selective_ref_gm = 1;
- sf->adaptive_motion_search = 0;
- sf->adaptive_pred_interp_filter = 0;
- sf->adaptive_mode_search = 0;
- sf->cb_partition_search = 0;
- sf->alt_ref_search_fp = 0;
- sf->partition_search_type = SEARCH_PARTITION;
- sf->tx_type_search.prune_mode = PRUNE_2D_ACCURATE;
- sf->tx_type_search.ml_tx_split_thresh = 30;
- sf->tx_type_search.use_skip_flag_prediction = 1;
- sf->tx_type_search.fast_intra_tx_type_search = 0;
- sf->tx_type_search.fast_inter_tx_type_search = 0;
- sf->tx_type_search.skip_tx_search = 0;
- sf->selective_ref_frame = 0;
- sf->less_rectangular_check_level = 0;
- sf->use_square_partition_only_threshold = BLOCK_128X128;
- sf->prune_ref_frame_for_rect_partitions = 0;
- sf->auto_min_max_partition_size = NOT_IN_USE;
- sf->rd_auto_partition_min_limit = BLOCK_4X4;
- sf->default_max_partition_size = BLOCK_LARGEST;
- sf->default_min_partition_size = BLOCK_4X4;
- sf->adjust_partitioning_from_last_frame = 0;
- sf->disable_split_mask = 0;
- sf->mode_search_skip_flags = 0;
- sf->disable_filter_search_var_thresh = 0;
- sf->allow_partition_search_skip = 0;
- sf->use_accurate_subpel_search = 2;
- sf->disable_wedge_search_var_thresh = 0;
- sf->fast_wedge_sign_estimate = 0;
- sf->drop_ref = 0;
- sf->skip_intra_in_interframe = 1;
- sf->txb_split_cap = 1;
- sf->adaptive_txb_search_level = 0;
- sf->two_pass_partition_search = 0;
- sf->mode_pruning_based_on_two_pass_partition_search = 0;
- sf->use_intra_txb_hash = 0;
- sf->use_inter_txb_hash = 1;
- sf->use_mb_rd_hash = 1;
- sf->optimize_b_precheck = 0;
- sf->jnt_comp_fast_tx_search = 0;
- sf->jnt_comp_skip_mv_search = 0;
- sf->reuse_inter_intra_mode = 0;
-
- for (i = 0; i < TX_SIZES; i++) {
- sf->intra_y_mode_mask[i] = INTRA_ALL;
- sf->intra_uv_mode_mask[i] = UV_INTRA_ALL;
- }
- sf->lpf_pick = LPF_PICK_FROM_FULL_IMAGE;
- sf->use_fast_coef_costing = 0;
- sf->max_intra_bsize = BLOCK_LARGEST;
- // This setting only takes effect when partition_search_type is set
- // to FIXED_PARTITION.
- sf->always_this_block_size = BLOCK_16X16;
- // Recode loop tolerance %.
- sf->recode_tolerance = 25;
- sf->partition_search_breakout_dist_thr = 0;
- sf->partition_search_breakout_rate_thr = 0;
- sf->simple_model_rd_from_var = 0;
- sf->prune_ext_partition_types_search_level = 0;
- sf->ml_prune_rect_partition = 0;
- sf->ml_prune_ab_partition = 0;
- sf->ml_prune_4_partition = 0;
- sf->fast_cdef_search = 0;
- for (i = 0; i < PARTITION_BLOCK_SIZES; ++i)
- sf->ml_partition_search_breakout_thresh[i] = -1; // -1 means not enabled.
-
- // Set this at the appropriate speed levels
- sf->use_transform_domain_distortion = 0;
- sf->gm_search_type = GM_FULL_SEARCH;
- sf->gm_disable_recode = 0;
- sf->use_fast_interpolation_filter_search = 0;
- sf->skip_repeat_interpolation_filter_search = 0;
- sf->use_hash_based_trellis = 0;
- sf->prune_comp_search_by_single_result = 0;
- sf->skip_repeated_newmv = 0;
-
- // Set decoder side speed feature to use less dual sgr modes
- sf->dual_sgr_penalty_level = 0;
-
- sf->inter_mode_rd_model_estimation = 0;
- sf->obmc_full_pixel_search_level = 0;
-
- if (oxcf->mode == GOOD)
- set_good_speed_features_framesize_independent(cpi, sf, oxcf->speed);
-
- // sf->partition_search_breakout_dist_thr is set assuming max 64x64
- // blocks. Normalise this if the blocks are bigger.
- if (MAX_SB_SIZE_LOG2 > 6) {
- sf->partition_search_breakout_dist_thr <<= 2 * (MAX_SB_SIZE_LOG2 - 6);
- }
-
- cpi->diamond_search_sad = av1_diamond_search_sad;
-
- sf->allow_exhaustive_searches = 1;
- int speed = (oxcf->speed > MAX_MESH_SPEED) ? MAX_MESH_SPEED : oxcf->speed;
- if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION)
- sf->exhaustive_searches_thresh = (1 << 24);
- else
- sf->exhaustive_searches_thresh = (1 << 25);
- sf->max_exaustive_pct = good_quality_max_mesh_pct[speed];
- if (speed > 0)
- sf->exhaustive_searches_thresh = sf->exhaustive_searches_thresh << 1;
-
- for (i = 0; i < MAX_MESH_STEP; ++i) {
- sf->mesh_patterns[i].range = good_quality_mesh_patterns[speed][i].range;
- sf->mesh_patterns[i].interval =
- good_quality_mesh_patterns[speed][i].interval;
- }
- if ((frame_is_intra_only(cm) && cm->allow_screen_content_tools) &&
- (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION ||
- cpi->oxcf.content == AOM_CONTENT_SCREEN)) {
- for (i = 0; i < MAX_MESH_STEP; ++i) {
- sf->mesh_patterns[i].range = intrabc_mesh_patterns[speed][i].range;
- sf->mesh_patterns[i].interval = intrabc_mesh_patterns[speed][i].interval;
- }
- sf->max_exaustive_pct = intrabc_max_mesh_pct[speed];
- }
-
- // Slow quant, dct and trellis not worthwhile for first pass
- // so make sure they are always turned off.
- if (oxcf->pass == 1) sf->optimize_coefficients = NO_TRELLIS_OPT;
-
- // No recode for 1 pass.
- if (oxcf->pass == 0) {
- sf->recode_loop = DISALLOW_RECODE;
- sf->optimize_coefficients = NO_TRELLIS_OPT;
- }
-
- if (sf->mv.subpel_search_method == SUBPEL_TREE) {
- cpi->find_fractional_mv_step = av1_find_best_sub_pixel_tree;
- } else if (sf->mv.subpel_search_method == SUBPEL_TREE_PRUNED) {
- cpi->find_fractional_mv_step = av1_find_best_sub_pixel_tree_pruned;
- } else if (sf->mv.subpel_search_method == SUBPEL_TREE_PRUNED_MORE) {
- cpi->find_fractional_mv_step = av1_find_best_sub_pixel_tree_pruned_more;
- } else if (sf->mv.subpel_search_method == SUBPEL_TREE_PRUNED_EVENMORE) {
- cpi->find_fractional_mv_step = av1_find_best_sub_pixel_tree_pruned_evenmore;
- }
-
- cpi->optimize_speed_feature =
- oxcf->pass != 1 ? sf->optimize_coefficients : NO_TRELLIS_OPT;
- // FIXME: trellis not very efficient for quantisation matrices
- if (cm->using_qmatrix) cpi->optimize_speed_feature = NO_TRELLIS_OPT;
- if (oxcf->disable_trellis_quant) cpi->optimize_speed_feature = NO_TRELLIS_OPT;
-
- x->min_partition_size = sf->default_min_partition_size;
- x->max_partition_size = sf->default_max_partition_size;
-
- // This is only used in motion vector unit test.
- if (cpi->oxcf.motion_vector_unit_test == 1)
- cpi->find_fractional_mv_step = av1_return_max_sub_pixel_mv;
- else if (cpi->oxcf.motion_vector_unit_test == 2)
- cpi->find_fractional_mv_step = av1_return_min_sub_pixel_mv;
-
-#if CONFIG_DIST_8X8
- if (sf->use_transform_domain_distortion > 0) cpi->oxcf.using_dist_8x8 = 0;
-
- if (cpi->oxcf.using_dist_8x8) x->min_partition_size = BLOCK_8X8;
-#endif // CONFIG_DIST_8X8
-}
diff --git a/third_party/aom/av1/encoder/speed_features.h b/third_party/aom/av1/encoder/speed_features.h
deleted file mode 100644
index 41013b2e7..000000000
--- a/third_party/aom/av1/encoder/speed_features.h
+++ /dev/null
@@ -1,568 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_SPEED_FEATURES_H_
-#define AOM_AV1_ENCODER_SPEED_FEATURES_H_
-
-#include "av1/common/enums.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-enum {
- INTRA_ALL = (1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED) | (1 << D45_PRED) |
- (1 << D135_PRED) | (1 << D113_PRED) | (1 << D157_PRED) |
- (1 << D203_PRED) | (1 << D67_PRED) | (1 << SMOOTH_PRED) |
- (1 << SMOOTH_V_PRED) | (1 << SMOOTH_H_PRED) | (1 << PAETH_PRED),
- UV_INTRA_ALL =
- (1 << UV_DC_PRED) | (1 << UV_V_PRED) | (1 << UV_H_PRED) |
- (1 << UV_D45_PRED) | (1 << UV_D135_PRED) | (1 << UV_D113_PRED) |
- (1 << UV_D157_PRED) | (1 << UV_D203_PRED) | (1 << UV_D67_PRED) |
- (1 << UV_SMOOTH_PRED) | (1 << UV_SMOOTH_V_PRED) |
- (1 << UV_SMOOTH_H_PRED) | (1 << UV_PAETH_PRED) | (1 << UV_CFL_PRED),
- UV_INTRA_DC = (1 << UV_DC_PRED),
- UV_INTRA_DC_CFL = (1 << UV_DC_PRED) | (1 << UV_CFL_PRED),
- UV_INTRA_DC_TM = (1 << UV_DC_PRED) | (1 << UV_PAETH_PRED),
- UV_INTRA_DC_PAETH_CFL =
- (1 << UV_DC_PRED) | (1 << UV_PAETH_PRED) | (1 << UV_CFL_PRED),
- UV_INTRA_DC_H_V = (1 << UV_DC_PRED) | (1 << UV_V_PRED) | (1 << UV_H_PRED),
- UV_INTRA_DC_H_V_CFL = (1 << UV_DC_PRED) | (1 << UV_V_PRED) |
- (1 << UV_H_PRED) | (1 << UV_CFL_PRED),
- UV_INTRA_DC_PAETH_H_V = (1 << UV_DC_PRED) | (1 << UV_PAETH_PRED) |
- (1 << UV_V_PRED) | (1 << UV_H_PRED),
- UV_INTRA_DC_PAETH_H_V_CFL = (1 << UV_DC_PRED) | (1 << UV_PAETH_PRED) |
- (1 << UV_V_PRED) | (1 << UV_H_PRED) |
- (1 << UV_CFL_PRED),
- INTRA_DC = (1 << DC_PRED),
- INTRA_DC_TM = (1 << DC_PRED) | (1 << PAETH_PRED),
- INTRA_DC_H_V = (1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED),
- INTRA_DC_PAETH_H_V =
- (1 << DC_PRED) | (1 << PAETH_PRED) | (1 << V_PRED) | (1 << H_PRED)
-};
-
-enum {
- INTER_ALL = (1 << NEARESTMV) | (1 << NEARMV) | (1 << GLOBALMV) |
- (1 << NEWMV) | (1 << NEAREST_NEARESTMV) | (1 << NEAR_NEARMV) |
- (1 << NEW_NEWMV) | (1 << NEAREST_NEWMV) | (1 << NEAR_NEWMV) |
- (1 << NEW_NEARMV) | (1 << NEW_NEARESTMV) | (1 << GLOBAL_GLOBALMV),
- INTER_NEAREST_NEAR_ZERO = (1 << NEARESTMV) | (1 << NEARMV) | (1 << GLOBALMV) |
- (1 << NEAREST_NEARESTMV) | (1 << GLOBAL_GLOBALMV) |
- (1 << NEAREST_NEWMV) | (1 << NEW_NEARESTMV) |
- (1 << NEW_NEARMV) | (1 << NEAR_NEWMV) |
- (1 << NEAR_NEARMV),
-};
-
-enum {
- DISABLE_ALL_INTER_SPLIT = (1 << THR_COMP_GA) | (1 << THR_COMP_LA) |
- (1 << THR_ALTR) | (1 << THR_GOLD) | (1 << THR_LAST),
-
- DISABLE_ALL_SPLIT = (1 << THR_INTRA) | DISABLE_ALL_INTER_SPLIT,
-
- DISABLE_COMPOUND_SPLIT = (1 << THR_COMP_GA) | (1 << THR_COMP_LA),
-
- LAST_AND_INTRA_SPLIT_ONLY = (1 << THR_COMP_GA) | (1 << THR_COMP_LA) |
- (1 << THR_ALTR) | (1 << THR_GOLD)
-};
-
-typedef enum {
- TXFM_CODING_SF = 1,
- INTER_PRED_SF = 2,
- INTRA_PRED_SF = 4,
- PARTITION_SF = 8,
- LOOP_FILTER_SF = 16,
- RD_SKIP_SF = 32,
- RESERVE_2_SF = 64,
- RESERVE_3_SF = 128,
-} DEV_SPEED_FEATURES;
-
-typedef enum {
- DIAMOND = 0,
- NSTEP = 1,
- HEX = 2,
- BIGDIA = 3,
- SQUARE = 4,
- FAST_HEX = 5,
- FAST_DIAMOND = 6
-} SEARCH_METHODS;
-
-typedef enum {
- // No recode.
- DISALLOW_RECODE = 0,
- // Allow recode for KF and exceeding maximum frame bandwidth.
- ALLOW_RECODE_KFMAXBW = 1,
- // Allow recode only for KF/ARF/GF frames.
- ALLOW_RECODE_KFARFGF = 2,
- // Allow recode for all frames based on bitrate constraints.
- ALLOW_RECODE = 3,
-} RECODE_LOOP_TYPE;
-
-typedef enum {
- SUBPEL_TREE = 0,
- SUBPEL_TREE_PRUNED = 1, // Prunes 1/2-pel searches
- SUBPEL_TREE_PRUNED_MORE = 2, // Prunes 1/2-pel searches more aggressively
- SUBPEL_TREE_PRUNED_EVENMORE = 3, // Prunes 1/2- and 1/4-pel searches
- // Other methods to come
-} SUBPEL_SEARCH_METHODS;
-
-typedef enum {
- USE_FULL_RD = 0,
- USE_FAST_RD,
- USE_LARGESTALL,
-} TX_SIZE_SEARCH_METHOD;
-
-typedef enum {
- NOT_IN_USE = 0,
- RELAXED_NEIGHBORING_MIN_MAX = 1
-} AUTO_MIN_MAX_MODE;
-
-typedef enum {
- // Try the full image with different values.
- LPF_PICK_FROM_FULL_IMAGE,
- // Try a small portion of the image with different values.
- LPF_PICK_FROM_SUBIMAGE,
- // Estimate the level based on quantizer and frame type
- LPF_PICK_FROM_Q,
- // Pick 0 to disable LPF if LPF was enabled last frame
- LPF_PICK_MINIMAL_LPF
-} LPF_PICK_METHOD;
-
-typedef enum {
- // Terminate search early based on distortion so far compared to
- // qp step, distortion in the neighborhood of the frame, etc.
- FLAG_EARLY_TERMINATE = 1 << 0,
-
- // Skips comp inter modes if the best so far is an intra mode.
- FLAG_SKIP_COMP_BESTINTRA = 1 << 1,
-
- // Skips oblique intra modes if the best so far is an inter mode.
- FLAG_SKIP_INTRA_BESTINTER = 1 << 3,
-
- // Skips oblique intra modes at angles 27, 63, 117, 153 if the best
- // intra so far is not one of the neighboring directions.
- FLAG_SKIP_INTRA_DIRMISMATCH = 1 << 4,
-
- // Skips intra modes other than DC_PRED if the source variance is small
- FLAG_SKIP_INTRA_LOWVAR = 1 << 5,
-} MODE_SEARCH_SKIP_LOGIC;
-
-typedef enum {
- NO_PRUNE = 0,
- // eliminates one tx type in vertical and horizontal direction
- PRUNE_ONE = 1,
- // eliminates two tx types in each direction
- PRUNE_TWO = 2,
- // adaptively prunes the least perspective tx types out of all 16
- // (tuned to provide negligible quality loss)
- PRUNE_2D_ACCURATE = 3,
- // similar, but applies much more aggressive pruning to get better speed-up
- PRUNE_2D_FAST = 4,
-} TX_TYPE_PRUNE_MODE;
-
-typedef struct {
- TX_TYPE_PRUNE_MODE prune_mode;
- int fast_intra_tx_type_search;
- int fast_inter_tx_type_search;
-
- // Use a skip flag prediction model to detect blocks with skip = 1 early
- // and avoid doing full TX type search for such blocks.
- int use_skip_flag_prediction;
-
- // Threshold used by the ML based method to predict TX block split decisions.
- int ml_tx_split_thresh;
-
- // skip remaining transform type search when we found the rdcost of skip is
- // better than applying transform
- int skip_tx_search;
-} TX_TYPE_SEARCH;
-
-typedef enum {
- // Search partitions using RD criterion
- SEARCH_PARTITION,
-
- // Always use a fixed size partition
- FIXED_PARTITION,
-
- REFERENCE_PARTITION
-} PARTITION_SEARCH_TYPE;
-
-typedef struct MV_SPEED_FEATURES {
- // Motion search method (Diamond, NSTEP, Hex, Big Diamond, Square, etc).
- SEARCH_METHODS search_method;
-
- // This parameter controls which step in the n-step process we start at.
- // It's changed adaptively based on circumstances.
- int reduce_first_step_size;
-
- // If this is set to 1, we limit the motion search range to 2 times the
- // largest motion vector found in the last frame.
- int auto_mv_step_size;
-
- // Subpel_search_method can only be subpel_tree which does a subpixel
- // logarithmic search that keeps stepping at 1/2 pixel units until
- // you stop getting a gain, and then goes on to 1/4 and repeats
- // the same process. Along the way it skips many diagonals.
- SUBPEL_SEARCH_METHODS subpel_search_method;
-
- // Maximum number of steps in logarithmic subpel search before giving up.
- int subpel_iters_per_step;
-
- // Control when to stop subpel search
- int subpel_force_stop;
-} MV_SPEED_FEATURES;
-
-#define MAX_MESH_STEP 4
-
-typedef struct MESH_PATTERN {
- int range;
- int interval;
-} MESH_PATTERN;
-
-typedef enum {
- GM_FULL_SEARCH,
- GM_REDUCED_REF_SEARCH,
- GM_DISABLE_SEARCH
-} GM_SEARCH_TYPE;
-
-typedef enum {
- GM_ERRORADV_TR_0,
- GM_ERRORADV_TR_1,
- GM_ERRORADV_TR_2,
- GM_ERRORADV_TR_TYPES,
-} GM_ERRORADV_TYPE;
-
-typedef enum {
- NO_TRELLIS_OPT, // No trellis optimization
- FULL_TRELLIS_OPT, // Trellis optimization in all stages
- FINAL_PASS_TRELLIS_OPT // Trellis optimization in only the final encode pass
-} TRELLIS_OPT_TYPE;
-
-typedef enum {
- FULL_TXFM_RD,
- LOW_TXFM_RD,
-} TXFM_RD_MODEL;
-
-typedef struct SPEED_FEATURES {
- MV_SPEED_FEATURES mv;
-
- // Frame level coding parameter update
- int frame_parameter_update;
-
- RECODE_LOOP_TYPE recode_loop;
-
- // Trellis (dynamic programming) optimization of quantized values
- TRELLIS_OPT_TYPE optimize_coefficients;
-
- // Global motion warp error threshold
- GM_ERRORADV_TYPE gm_erroradv_type;
-
- // Always set to 0. If on it enables 0 cost background transmission
- // (except for the initial transmission of the segmentation). The feature is
- // disabled because the addition of very large block sizes make the
- // backgrounds very to cheap to encode, and the segmentation we have
- // adds overhead.
- int static_segmentation;
-
- // Limit the inter mode tested in the RD loop
- int reduce_inter_modes;
-
- // Do not compute the global motion parameters for a LAST2_FRAME or
- // LAST3_FRAME if the GOLDEN_FRAME is closer and it has a non identity
- // global model.
- int selective_ref_gm;
-
- // If 1 we iterate finding a best reference for 2 ref frames together - via
- // a log search that iterates 4 times (check around mv for last for best
- // error of combined predictor then check around mv for alt). If 0 we
- // we just use the best motion vector found for each frame by itself.
- BLOCK_SIZE comp_inter_joint_search_thresh;
-
- // This variable is used to cap the maximum number of times we skip testing a
- // mode to be evaluated. A high value means we will be faster.
- int adaptive_rd_thresh;
-
- // Determine which method we use to determine transform size. We can choose
- // between options like full rd, largest for prediction size, largest
- // for intra and model coefs for the rest.
- TX_SIZE_SEARCH_METHOD tx_size_search_method;
-
- // Init search depth for square and rectangular transform partitions.
- // Values:
- // 0 - search full tree, 1: search 1 level, 2: search the highest level only
- int inter_tx_size_search_init_depth_sqr;
- int inter_tx_size_search_init_depth_rect;
- int intra_tx_size_search_init_depth_sqr;
- int intra_tx_size_search_init_depth_rect;
- // If any dimension of a coding block size above 64, always search the
- // largest transform only, since the largest transform block size is 64x64.
- int tx_size_search_lgr_block;
-
- PARTITION_SEARCH_TYPE partition_search_type;
-
- TX_TYPE_SEARCH tx_type_search;
-
- // Skip split transform block partition when the collocated bigger block
- // is selected as all zero coefficients.
- int txb_split_cap;
-
- // Shortcut the transform block partition and type search when the target
- // rdcost is relatively lower.
- // Values are 0 (not used) , or 1 - 2 with progressively increasing
- // aggressiveness
- int adaptive_txb_search_level;
-
- // Prune level for tx_size_type search for inter based on rd model
- // 0: no pruning
- // 1-2: progressively increasing aggressiveness of pruning
- int model_based_prune_tx_search_level;
-
- // Model based breakout after interpolation filter search
- // 0: no breakout
- // 1: use model based rd breakout
- int model_based_post_interp_filter_breakout;
-
- // Used if partition_search_type = FIXED_SIZE_PARTITION
- BLOCK_SIZE always_this_block_size;
-
- // Drop less likely to be picked reference frames in the RD search.
- // Has three levels for now: 0, 1 and 2, where higher levels prune more
- // aggressively than lower ones. (0 means no pruning).
- int selective_ref_frame;
-
- // Prune extended partition types search
- // Can take values 0 - 2, 0 referring to no pruning, and 1 - 2 increasing
- // aggressiveness of pruning in order.
- int prune_ext_partition_types_search_level;
-
- // Use a ML model to prune horz and vert partitions
- int ml_prune_rect_partition;
-
- // Use a ML model to prune horz_a, horz_b, vert_a and vert_b partitions.
- int ml_prune_ab_partition;
-
- // Use a ML model to prune horz4 and vert4 partitions.
- int ml_prune_4_partition;
-
- int fast_cdef_search;
-
- // 2-pass coding block partition search
- int two_pass_partition_search;
-
- // Use the mode decisions made in the initial partition search to prune mode
- // candidates, e.g. ref frames.
- int mode_pruning_based_on_two_pass_partition_search;
-
- // Skip rectangular partition test when partition type none gives better
- // rd than partition type split. Can take values 0 - 2, 0 referring to no
- // skipping, and 1 - 2 increasing aggressiveness of skipping in order.
- int less_rectangular_check_level;
-
- // Use square partition only beyond this block size.
- BLOCK_SIZE use_square_partition_only_threshold;
-
- // Prune reference frames for rectangular partitions.
- int prune_ref_frame_for_rect_partitions;
-
- // Sets min and max partition sizes for this superblock based on the
- // same superblock in last encoded frame, and the left and above neighbor.
- AUTO_MIN_MAX_MODE auto_min_max_partition_size;
- // Ensures the rd based auto partition search will always
- // go down at least to the specified level.
- BLOCK_SIZE rd_auto_partition_min_limit;
-
- // Min and max partition size we enable (block_size) as per auto
- // min max, but also used by adjust partitioning, and pick_partitioning.
- BLOCK_SIZE default_min_partition_size;
- BLOCK_SIZE default_max_partition_size;
-
- // Whether or not we allow partitions one smaller or one greater than the last
- // frame's partitioning. Only used if use_lastframe_partitioning is set.
- int adjust_partitioning_from_last_frame;
-
- // Disables sub 8x8 blocksizes in different scenarios: Choices are to disable
- // it always, to allow it for only Last frame and Intra, disable it for all
- // inter modes or to enable it always.
- int disable_split_mask;
-
- // TODO(jingning): combine the related motion search speed features
- // This allows us to use motion search at other sizes as a starting
- // point for this motion search and limits the search range around it.
- int adaptive_motion_search;
-
- // Flag for allowing some use of exhaustive searches;
- int allow_exhaustive_searches;
-
- // Threshold for allowing exhaistive motion search.
- int exhaustive_searches_thresh;
-
- // Maximum number of exhaustive searches for a frame.
- int max_exaustive_pct;
-
- // Pattern to be used for any exhaustive mesh searches.
- MESH_PATTERN mesh_patterns[MAX_MESH_STEP];
-
- // Allows sub 8x8 modes to use the prediction filter that was determined
- // best for 8x8 mode. If set to 0 we always re check all the filters for
- // sizes less than 8x8, 1 means we check all filter modes if no 8x8 filter
- // was selected, and 2 means we use 8 tap if no 8x8 filter mode was selected.
- int adaptive_pred_interp_filter;
-
- // Adaptive prediction mode search
- int adaptive_mode_search;
-
- int cb_partition_search;
-
- int alt_ref_search_fp;
-
- // Implements various heuristics to skip searching modes
- // The heuristics selected are based on flags
- // defined in the MODE_SEARCH_SKIP_HEURISTICS enum
- unsigned int mode_search_skip_flags;
-
- // A source variance threshold below which filter search is disabled
- // Choose a very large value (UINT_MAX) to use 8-tap always
- unsigned int disable_filter_search_var_thresh;
-
- // A source variance threshold below which wedge search is disabled
- unsigned int disable_wedge_search_var_thresh;
-
- // Whether fast wedge sign estimate is used
- int fast_wedge_sign_estimate;
-
- // These bit masks allow you to enable or disable intra modes for each
- // transform size separately.
- int intra_y_mode_mask[TX_SIZES];
- int intra_uv_mode_mask[TX_SIZES];
-
- // This feature controls how the loop filter level is determined.
- LPF_PICK_METHOD lpf_pick;
-
- // This feature controls whether we do the expensive context update and
- // calculation in the rd coefficient costing loop.
- int use_fast_coef_costing;
-
- // This feature controls the tolerence vs target used in deciding whether to
- // recode a frame. It has no meaning if recode is disabled.
- int recode_tolerance;
-
- // This variable controls the maximum block size where intra blocks can be
- // used in inter frames.
- // TODO(aconverse): Fold this into one of the other many mode skips
- BLOCK_SIZE max_intra_bsize;
-
- // Partition search early breakout thresholds.
- int64_t partition_search_breakout_dist_thr;
- int partition_search_breakout_rate_thr;
-
- // Thresholds for ML based partition search breakout.
- int ml_partition_search_breakout_thresh[PARTITION_BLOCK_SIZES];
-
- // Allow skipping partition search for still image frame
- int allow_partition_search_skip;
-
- // Fast approximation of av1_model_rd_from_var_lapndz
- int simple_model_rd_from_var;
-
- // If true, sub-pixel search uses the exact convolve function used for final
- // encoding and decoding; otherwise, it uses bilinear interpolation.
- int use_accurate_subpel_search;
-
- // Whether to compute distortion in the image domain (slower but
- // more accurate), or in the transform domain (faster but less acurate).
- // 0: use image domain
- // 1: use transform domain in tx_type search, and use image domain for
- // RD_STATS
- // 2: use transform domain
- int use_transform_domain_distortion;
-
- GM_SEARCH_TYPE gm_search_type;
-
- // whether to disable the global motion recode loop
- int gm_disable_recode;
-
- // Do limited interpolation filter search for dual filters, since best choice
- // usually includes EIGHTTAP_REGULAR.
- int use_fast_interpolation_filter_search;
-
- // Save results of interpolation_filter_search for a block
- // Check mv and ref_frames before search, if they are same with previous
- // saved results, it can be skipped.
- int skip_repeat_interpolation_filter_search;
-
- // Use a hash table to store previously computed optimized qcoeffs from
- // expensive calls to optimize_txb.
- int use_hash_based_trellis;
-
- // flag to drop some ref frames in compound motion search
- int drop_ref;
-
- // flag to allow skipping intra mode for inter frame prediction
- int skip_intra_in_interframe;
-
- // Use hash table to store intra(keyframe only) txb transform search results
- // to avoid repeated search on the same residue signal.
- int use_intra_txb_hash;
-
- // Use hash table to store inter txb transform search results
- // to avoid repeated search on the same residue signal.
- int use_inter_txb_hash;
-
- // Use hash table to store macroblock RD search results
- // to avoid repeated search on the same residue signal.
- int use_mb_rd_hash;
-
- // Calculate RD cost before doing optimize_b, and skip if the cost is large.
- int optimize_b_precheck;
-
- // Use model rd instead of transform search in jnt_comp
- int jnt_comp_fast_tx_search;
-
- // Skip mv search in jnt_comp
- int jnt_comp_skip_mv_search;
-
- // Decoder side speed feature to add penalty for use of dual-sgr filters.
- // Takes values 0 - 10, 0 indicating no penalty and each additional level
- // adding a penalty of 1%
- int dual_sgr_penalty_level;
-
- // Dynamically estimate final rd from prediction error and mode cost
- int inter_mode_rd_model_estimation;
-
- // Skip some ref frames in compound motion search by single motion search
- // result. Has three levels for now: 0 referring to no skipping, and 1 - 3
- // increasing aggressiveness of skipping in order.
- // Note: The search order might affect the result. It is better to search same
- // single inter mode as a group.
- int prune_comp_search_by_single_result;
-
- // Reuse the inter_intra_mode search result from NEARESTMV mode to other
- // single ref modes
- int reuse_inter_intra_mode;
-
- // Set the full pixel search level of obmc
- // 0: obmc_full_pixel_diamond
- // 1: obmc_refining_search_sad (faster)
- int obmc_full_pixel_search_level;
-
- // flag to skip NEWMV mode in drl if the motion search result is the same
- int skip_repeated_newmv;
-} SPEED_FEATURES;
-
-struct AV1_COMP;
-
-void av1_set_speed_features_framesize_independent(struct AV1_COMP *cpi);
-void av1_set_speed_features_framesize_dependent(struct AV1_COMP *cpi);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_ENCODER_SPEED_FEATURES_H_
diff --git a/third_party/aom/av1/encoder/temporal_filter.c b/third_party/aom/av1/encoder/temporal_filter.c
deleted file mode 100644
index 75fdf02a5..000000000
--- a/third_party/aom/av1/encoder/temporal_filter.c
+++ /dev/null
@@ -1,602 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <math.h>
-#include <limits.h>
-
-#include "config/aom_config.h"
-
-#include "av1/common/alloccommon.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/common/quant_common.h"
-#include "av1/common/reconinter.h"
-#include "av1/common/odintrin.h"
-#include "av1/encoder/av1_quantize.h"
-#include "av1/encoder/extend.h"
-#include "av1/encoder/firstpass.h"
-#include "av1/encoder/mcomp.h"
-#include "av1/encoder/encoder.h"
-#include "av1/encoder/ratectrl.h"
-#include "av1/encoder/reconinter_enc.h"
-#include "av1/encoder/segmentation.h"
-#include "av1/encoder/temporal_filter.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_mem/aom_mem.h"
-#include "aom_ports/mem.h"
-#include "aom_ports/aom_timer.h"
-#include "aom_scale/aom_scale.h"
-
-static void temporal_filter_predictors_mb_c(
- MACROBLOCKD *xd, uint8_t *y_mb_ptr, uint8_t *u_mb_ptr, uint8_t *v_mb_ptr,
- int stride, int uv_block_width, int uv_block_height, int mv_row, int mv_col,
- uint8_t *pred, struct scale_factors *scale, int x, int y,
- int can_use_previous, int num_planes) {
- const MV mv = { mv_row, mv_col };
- enum mv_precision mv_precision_uv;
- int uv_stride;
- // TODO(angiebird): change plane setting accordingly
- ConvolveParams conv_params = get_conv_params(0, 0, xd->bd);
- const InterpFilters interp_filters = xd->mi[0]->interp_filters;
- WarpTypesAllowed warp_types;
- memset(&warp_types, 0, sizeof(WarpTypesAllowed));
-
- if (uv_block_width == 8) {
- uv_stride = (stride + 1) >> 1;
- mv_precision_uv = MV_PRECISION_Q4;
- } else {
- uv_stride = stride;
- mv_precision_uv = MV_PRECISION_Q3;
- }
- av1_build_inter_predictor(y_mb_ptr, stride, &pred[0], 16, &mv, scale, 16, 16,
- &conv_params, interp_filters, &warp_types, x, y, 0,
- 0, MV_PRECISION_Q3, x, y, xd, can_use_previous);
-
- if (num_planes > 1) {
- av1_build_inter_predictor(
- u_mb_ptr, uv_stride, &pred[256], uv_block_width, &mv, scale,
- uv_block_width, uv_block_height, &conv_params, interp_filters,
- &warp_types, x, y, 1, 0, mv_precision_uv, x, y, xd, can_use_previous);
-
- av1_build_inter_predictor(
- v_mb_ptr, uv_stride, &pred[512], uv_block_width, &mv, scale,
- uv_block_width, uv_block_height, &conv_params, interp_filters,
- &warp_types, x, y, 2, 0, mv_precision_uv, x, y, xd, can_use_previous);
- }
-}
-
-void av1_temporal_filter_apply_c(uint8_t *frame1, unsigned int stride,
- uint8_t *frame2, unsigned int block_width,
- unsigned int block_height, int strength,
- int filter_weight, unsigned int *accumulator,
- uint16_t *count) {
- unsigned int i, j, k;
- int modifier;
- int byte = 0;
- const int rounding = strength > 0 ? 1 << (strength - 1) : 0;
-
- for (i = 0, k = 0; i < block_height; i++) {
- for (j = 0; j < block_width; j++, k++) {
- int pixel_value = *frame2;
-
- // non-local mean approach
- int diff_sse[9] = { 0 };
- int idx, idy, index = 0;
-
- for (idy = -1; idy <= 1; ++idy) {
- for (idx = -1; idx <= 1; ++idx) {
- int row = (int)i + idy;
- int col = (int)j + idx;
-
- if (row >= 0 && row < (int)block_height && col >= 0 &&
- col < (int)block_width) {
- int diff = frame1[byte + idy * (int)stride + idx] -
- frame2[idy * (int)block_width + idx];
- diff_sse[index] = diff * diff;
- ++index;
- }
- }
- }
-
- assert(index > 0);
-
- modifier = 0;
- for (idx = 0; idx < 9; ++idx) modifier += diff_sse[idx];
-
- modifier *= 3;
- modifier /= index;
-
- ++frame2;
-
- modifier += rounding;
- modifier >>= strength;
-
- if (modifier > 16) modifier = 16;
-
- modifier = 16 - modifier;
- modifier *= filter_weight;
-
- count[k] += modifier;
- accumulator[k] += modifier * pixel_value;
-
- byte++;
- }
-
- byte += stride - block_width;
- }
-}
-
-void av1_highbd_temporal_filter_apply_c(
- uint8_t *frame1_8, unsigned int stride, uint8_t *frame2_8,
- unsigned int block_width, unsigned int block_height, int strength,
- int filter_weight, unsigned int *accumulator, uint16_t *count) {
- uint16_t *frame1 = CONVERT_TO_SHORTPTR(frame1_8);
- uint16_t *frame2 = CONVERT_TO_SHORTPTR(frame2_8);
- unsigned int i, j, k;
- int modifier;
- int byte = 0;
- const int rounding = strength > 0 ? 1 << (strength - 1) : 0;
-
- for (i = 0, k = 0; i < block_height; i++) {
- for (j = 0; j < block_width; j++, k++) {
- int pixel_value = *frame2;
-
- // non-local mean approach
- int diff_sse[9] = { 0 };
- int idx, idy, index = 0;
-
- for (idy = -1; idy <= 1; ++idy) {
- for (idx = -1; idx <= 1; ++idx) {
- int row = (int)i + idy;
- int col = (int)j + idx;
-
- if (row >= 0 && row < (int)block_height && col >= 0 &&
- col < (int)block_width) {
- int diff = frame1[byte + idy * (int)stride + idx] -
- frame2[idy * (int)block_width + idx];
- diff_sse[index] = diff * diff;
- ++index;
- }
- }
- }
-
- assert(index > 0);
-
- modifier = 0;
- for (idx = 0; idx < 9; ++idx) modifier += diff_sse[idx];
-
- modifier *= 3;
- modifier /= index;
-
- ++frame2;
-
- modifier += rounding;
- modifier >>= strength;
-
- if (modifier > 16) modifier = 16;
-
- modifier = 16 - modifier;
- modifier *= filter_weight;
-
- count[k] += modifier;
- accumulator[k] += modifier * pixel_value;
-
- byte++;
- }
-
- byte += stride - block_width;
- }
-}
-
-static int temporal_filter_find_matching_mb_c(AV1_COMP *cpi,
- uint8_t *arf_frame_buf,
- uint8_t *frame_ptr_buf,
- int stride, int x_pos,
- int y_pos) {
- MACROBLOCK *const x = &cpi->td.mb;
- MACROBLOCKD *const xd = &x->e_mbd;
- const MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
- int step_param;
- int sadpb = x->sadperbit16;
- int bestsme = INT_MAX;
- int distortion;
- unsigned int sse;
- int cost_list[5];
- MvLimits tmp_mv_limits = x->mv_limits;
-
- MV best_ref_mv1 = kZeroMv;
- MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
-
- // Save input state
- struct buf_2d src = x->plane[0].src;
- struct buf_2d pre = xd->plane[0].pre[0];
-
- best_ref_mv1_full.col = best_ref_mv1.col >> 3;
- best_ref_mv1_full.row = best_ref_mv1.row >> 3;
-
- // Setup frame pointers
- x->plane[0].src.buf = arf_frame_buf;
- x->plane[0].src.stride = stride;
- xd->plane[0].pre[0].buf = frame_ptr_buf;
- xd->plane[0].pre[0].stride = stride;
-
- step_param = mv_sf->reduce_first_step_size;
- step_param = AOMMIN(step_param, MAX_MVSEARCH_STEPS - 2);
-
- av1_set_mv_search_range(&x->mv_limits, &best_ref_mv1);
-
- x->mvcost = x->mv_cost_stack;
- x->nmvjointcost = x->nmv_vec_cost;
-
- av1_full_pixel_search(cpi, x, BLOCK_16X16, &best_ref_mv1_full, step_param,
- NSTEP, 1, sadpb, cond_cost_list(cpi, cost_list),
- &best_ref_mv1, 0, 0, x_pos, y_pos, 0);
- x->mv_limits = tmp_mv_limits;
-
- // Ignore mv costing by sending NULL pointer instead of cost array
- if (cpi->common.cur_frame_force_integer_mv == 1) {
- const uint8_t *const src_address = x->plane[0].src.buf;
- const int src_stride = x->plane[0].src.stride;
- const uint8_t *const y = xd->plane[0].pre[0].buf;
- const int y_stride = xd->plane[0].pre[0].stride;
- const int offset = x->best_mv.as_mv.row * y_stride + x->best_mv.as_mv.col;
-
- x->best_mv.as_mv.row *= 8;
- x->best_mv.as_mv.col *= 8;
-
- bestsme = cpi->fn_ptr[BLOCK_16X16].vf(y + offset, y_stride, src_address,
- src_stride, &sse);
- } else {
- bestsme = cpi->find_fractional_mv_step(
- x, &cpi->common, 0, 0, &best_ref_mv1,
- cpi->common.allow_high_precision_mv, x->errorperbit,
- &cpi->fn_ptr[BLOCK_16X16], 0, mv_sf->subpel_iters_per_step,
- cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL,
- NULL, 0, 0, 0, 0, 0);
- }
-
- x->e_mbd.mi[0]->mv[0] = x->best_mv;
-
- // Restore input state
- x->plane[0].src = src;
- xd->plane[0].pre[0] = pre;
-
- return bestsme;
-}
-
-static void temporal_filter_iterate_c(AV1_COMP *cpi,
- YV12_BUFFER_CONFIG **frames,
- int frame_count, int alt_ref_index,
- int strength,
- struct scale_factors *scale) {
- const AV1_COMMON *cm = &cpi->common;
- const int num_planes = av1_num_planes(cm);
- int byte;
- int frame;
- int mb_col, mb_row;
- unsigned int filter_weight;
- int mb_cols = (frames[alt_ref_index]->y_crop_width + 15) >> 4;
- int mb_rows = (frames[alt_ref_index]->y_crop_height + 15) >> 4;
- int mb_y_offset = 0;
- int mb_uv_offset = 0;
- DECLARE_ALIGNED(16, unsigned int, accumulator[16 * 16 * 3]);
- DECLARE_ALIGNED(16, uint16_t, count[16 * 16 * 3]);
- MACROBLOCKD *mbd = &cpi->td.mb.e_mbd;
- YV12_BUFFER_CONFIG *f = frames[alt_ref_index];
- uint8_t *dst1, *dst2;
- DECLARE_ALIGNED(32, uint16_t, predictor16[16 * 16 * 3]);
- DECLARE_ALIGNED(32, uint8_t, predictor8[16 * 16 * 3]);
- uint8_t *predictor;
- const int mb_uv_height = 16 >> mbd->plane[1].subsampling_y;
- const int mb_uv_width = 16 >> mbd->plane[1].subsampling_x;
-
- // Save input state
- uint8_t *input_buffer[MAX_MB_PLANE];
- int i;
- if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- predictor = CONVERT_TO_BYTEPTR(predictor16);
- } else {
- predictor = predictor8;
- }
-
- for (i = 0; i < num_planes; i++) input_buffer[i] = mbd->plane[i].pre[0].buf;
-
- for (mb_row = 0; mb_row < mb_rows; mb_row++) {
- // Source frames are extended to 16 pixels. This is different than
- // L/A/G reference frames that have a border of 32 (AV1ENCBORDERINPIXELS)
- // A 6/8 tap filter is used for motion search. This requires 2 pixels
- // before and 3 pixels after. So the largest Y mv on a border would
- // then be 16 - AOM_INTERP_EXTEND. The UV blocks are half the size of the
- // Y and therefore only extended by 8. The largest mv that a UV block
- // can support is 8 - AOM_INTERP_EXTEND. A UV mv is half of a Y mv.
- // (16 - AOM_INTERP_EXTEND) >> 1 which is greater than
- // 8 - AOM_INTERP_EXTEND.
- // To keep the mv in play for both Y and UV planes the max that it
- // can be on a border is therefore 16 - (2*AOM_INTERP_EXTEND+1).
- cpi->td.mb.mv_limits.row_min =
- -((mb_row * 16) + (17 - 2 * AOM_INTERP_EXTEND));
- cpi->td.mb.mv_limits.row_max =
- ((mb_rows - 1 - mb_row) * 16) + (17 - 2 * AOM_INTERP_EXTEND);
-
- for (mb_col = 0; mb_col < mb_cols; mb_col++) {
- int j, k;
- int stride;
-
- memset(accumulator, 0, 16 * 16 * 3 * sizeof(accumulator[0]));
- memset(count, 0, 16 * 16 * 3 * sizeof(count[0]));
-
- cpi->td.mb.mv_limits.col_min =
- -((mb_col * 16) + (17 - 2 * AOM_INTERP_EXTEND));
- cpi->td.mb.mv_limits.col_max =
- ((mb_cols - 1 - mb_col) * 16) + (17 - 2 * AOM_INTERP_EXTEND);
-
- for (frame = 0; frame < frame_count; frame++) {
- const int thresh_low = 10000;
- const int thresh_high = 20000;
-
- if (frames[frame] == NULL) continue;
-
- mbd->mi[0]->mv[0].as_mv.row = 0;
- mbd->mi[0]->mv[0].as_mv.col = 0;
- mbd->mi[0]->motion_mode = SIMPLE_TRANSLATION;
-
- if (frame == alt_ref_index) {
- filter_weight = 2;
- } else {
- // Find best match in this frame by MC
- int err = temporal_filter_find_matching_mb_c(
- cpi, frames[alt_ref_index]->y_buffer + mb_y_offset,
- frames[frame]->y_buffer + mb_y_offset, frames[frame]->y_stride,
- mb_col * 16, mb_row * 16);
-
- // Assign higher weight to matching MB if it's error
- // score is lower. If not applying MC default behavior
- // is to weight all MBs equal.
- filter_weight = err < thresh_low ? 2 : err < thresh_high ? 1 : 0;
- }
-
- if (filter_weight != 0) {
- // Construct the predictors
- temporal_filter_predictors_mb_c(
- mbd, frames[frame]->y_buffer + mb_y_offset,
- frames[frame]->u_buffer + mb_uv_offset,
- frames[frame]->v_buffer + mb_uv_offset, frames[frame]->y_stride,
- mb_uv_width, mb_uv_height, mbd->mi[0]->mv[0].as_mv.row,
- mbd->mi[0]->mv[0].as_mv.col, predictor, scale, mb_col * 16,
- mb_row * 16, cm->allow_warped_motion, num_planes);
-
- // Apply the filter (YUV)
- if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- int adj_strength = strength + 2 * (mbd->bd - 8);
- av1_highbd_temporal_filter_apply(
- f->y_buffer + mb_y_offset, f->y_stride, predictor, 16, 16,
- adj_strength, filter_weight, accumulator, count);
- if (num_planes > 1) {
- av1_highbd_temporal_filter_apply(
- f->u_buffer + mb_uv_offset, f->uv_stride, predictor + 256,
- mb_uv_width, mb_uv_height, adj_strength, filter_weight,
- accumulator + 256, count + 256);
- av1_highbd_temporal_filter_apply(
- f->v_buffer + mb_uv_offset, f->uv_stride, predictor + 512,
- mb_uv_width, mb_uv_height, adj_strength, filter_weight,
- accumulator + 512, count + 512);
- }
- } else {
- av1_temporal_filter_apply_c(f->y_buffer + mb_y_offset, f->y_stride,
- predictor, 16, 16, strength,
- filter_weight, accumulator, count);
- if (num_planes > 1) {
- av1_temporal_filter_apply_c(
- f->u_buffer + mb_uv_offset, f->uv_stride, predictor + 256,
- mb_uv_width, mb_uv_height, strength, filter_weight,
- accumulator + 256, count + 256);
- av1_temporal_filter_apply_c(
- f->v_buffer + mb_uv_offset, f->uv_stride, predictor + 512,
- mb_uv_width, mb_uv_height, strength, filter_weight,
- accumulator + 512, count + 512);
- }
- }
- }
- }
-
- // Normalize filter output to produce AltRef frame
- if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- uint16_t *dst1_16;
- uint16_t *dst2_16;
- dst1 = cpi->alt_ref_buffer.y_buffer;
- dst1_16 = CONVERT_TO_SHORTPTR(dst1);
- stride = cpi->alt_ref_buffer.y_stride;
- byte = mb_y_offset;
- for (i = 0, k = 0; i < 16; i++) {
- for (j = 0; j < 16; j++, k++) {
- dst1_16[byte] =
- (uint16_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]);
-
- // move to next pixel
- byte++;
- }
-
- byte += stride - 16;
- }
- if (num_planes > 1) {
- dst1 = cpi->alt_ref_buffer.u_buffer;
- dst2 = cpi->alt_ref_buffer.v_buffer;
- dst1_16 = CONVERT_TO_SHORTPTR(dst1);
- dst2_16 = CONVERT_TO_SHORTPTR(dst2);
- stride = cpi->alt_ref_buffer.uv_stride;
- byte = mb_uv_offset;
- for (i = 0, k = 256; i < mb_uv_height; i++) {
- for (j = 0; j < mb_uv_width; j++, k++) {
- int m = k + 256;
- // U
- dst1_16[byte] =
- (uint16_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]);
- // V
- dst2_16[byte] =
- (uint16_t)OD_DIVU(accumulator[m] + (count[m] >> 1), count[m]);
- // move to next pixel
- byte++;
- }
- byte += stride - mb_uv_width;
- }
- }
- } else {
- dst1 = cpi->alt_ref_buffer.y_buffer;
- stride = cpi->alt_ref_buffer.y_stride;
- byte = mb_y_offset;
- for (i = 0, k = 0; i < 16; i++) {
- for (j = 0; j < 16; j++, k++) {
- dst1[byte] =
- (uint8_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]);
-
- // move to next pixel
- byte++;
- }
- byte += stride - 16;
- }
- if (num_planes > 1) {
- dst1 = cpi->alt_ref_buffer.u_buffer;
- dst2 = cpi->alt_ref_buffer.v_buffer;
- stride = cpi->alt_ref_buffer.uv_stride;
- byte = mb_uv_offset;
- for (i = 0, k = 256; i < mb_uv_height; i++) {
- for (j = 0; j < mb_uv_width; j++, k++) {
- int m = k + 256;
- // U
- dst1[byte] =
- (uint8_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]);
- // V
- dst2[byte] =
- (uint8_t)OD_DIVU(accumulator[m] + (count[m] >> 1), count[m]);
- // move to next pixel
- byte++;
- }
- byte += stride - mb_uv_width;
- }
- }
- }
- mb_y_offset += 16;
- mb_uv_offset += mb_uv_width;
- }
- mb_y_offset += 16 * (f->y_stride - mb_cols);
- mb_uv_offset += mb_uv_height * f->uv_stride - mb_uv_width * mb_cols;
- }
-
- // Restore input state
- for (i = 0; i < num_planes; i++) mbd->plane[i].pre[0].buf = input_buffer[i];
-}
-
-// Apply buffer limits and context specific adjustments to arnr filter.
-static void adjust_arnr_filter(AV1_COMP *cpi, int distance, int group_boost,
- int *arnr_frames, int *arnr_strength) {
- const AV1EncoderConfig *const oxcf = &cpi->oxcf;
- const int frames_after_arf =
- av1_lookahead_depth(cpi->lookahead) - distance - 1;
- int frames_fwd = (cpi->oxcf.arnr_max_frames - 1) >> 1;
- int frames_bwd;
- int q, frames, strength;
-
- // Define the forward and backwards filter limits for this arnr group.
- if (frames_fwd > frames_after_arf) frames_fwd = frames_after_arf;
- if (frames_fwd > distance) frames_fwd = distance;
-
- frames_bwd = frames_fwd;
-
- // For even length filter there is one more frame backward
- // than forward: e.g. len=6 ==> bbbAff, len=7 ==> bbbAfff.
- if (frames_bwd < distance) frames_bwd += (oxcf->arnr_max_frames + 1) & 0x1;
-
- // Set the baseline active filter size.
- frames = frames_bwd + 1 + frames_fwd;
-
- // Adjust the strength based on active max q.
- if (cpi->common.current_video_frame > 1)
- q = ((int)av1_convert_qindex_to_q(cpi->rc.avg_frame_qindex[INTER_FRAME],
- cpi->common.seq_params.bit_depth));
- else
- q = ((int)av1_convert_qindex_to_q(cpi->rc.avg_frame_qindex[KEY_FRAME],
- cpi->common.seq_params.bit_depth));
- if (q > 16) {
- strength = oxcf->arnr_strength;
- } else {
- strength = oxcf->arnr_strength - ((16 - q) / 2);
- if (strength < 0) strength = 0;
- }
-
- // Adjust number of frames in filter and strength based on gf boost level.
- if (frames > group_boost / 150) {
- frames = group_boost / 150;
- frames += !(frames & 1);
- }
-
- if (strength > group_boost / 300) {
- strength = group_boost / 300;
- }
-
- *arnr_frames = frames;
- *arnr_strength = strength;
-}
-
-void av1_temporal_filter(AV1_COMP *cpi, int distance) {
- RATE_CONTROL *const rc = &cpi->rc;
- int frame;
- int frames_to_blur;
- int start_frame;
- int strength;
- int frames_to_blur_backward;
- int frames_to_blur_forward;
- struct scale_factors sf;
- YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS] = { NULL };
- const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
-
- // Apply context specific adjustments to the arnr filter parameters.
- adjust_arnr_filter(cpi, distance, rc->gfu_boost, &frames_to_blur, &strength);
- // TODO(weitinglin): Currently, we enforce the filtering strength on
- // extra ARFs' to be zeros. We should investigate in which
- // case it is more beneficial to use non-zero strength
- // filtering.
- if (gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE) {
- strength = 0;
- frames_to_blur = 1;
- }
-
- int which_arf = gf_group->arf_update_idx[gf_group->index];
-
- // Set the temporal filtering status for the corresponding OVERLAY frame
- if (strength == 0 && frames_to_blur == 1)
- cpi->is_arf_filter_off[which_arf] = 1;
- else
- cpi->is_arf_filter_off[which_arf] = 0;
- cpi->common.showable_frame = cpi->is_arf_filter_off[which_arf];
-
- frames_to_blur_backward = (frames_to_blur / 2);
- frames_to_blur_forward = ((frames_to_blur - 1) / 2);
- start_frame = distance + frames_to_blur_forward;
-
- // Setup frame pointers, NULL indicates frame not included in filter.
- for (frame = 0; frame < frames_to_blur; ++frame) {
- const int which_buffer = start_frame - frame;
- struct lookahead_entry *buf =
- av1_lookahead_peek(cpi->lookahead, which_buffer);
- frames[frames_to_blur - 1 - frame] = &buf->img;
- }
-
- if (frames_to_blur > 0) {
- // Setup scaling factors. Scaling on each of the arnr frames is not
- // supported.
- // ARF is produced at the native frame size and resized when coded.
- av1_setup_scale_factors_for_frame(
- &sf, frames[0]->y_crop_width, frames[0]->y_crop_height,
- frames[0]->y_crop_width, frames[0]->y_crop_height);
- }
-
- temporal_filter_iterate_c(cpi, frames, frames_to_blur,
- frames_to_blur_backward, strength, &sf);
-}
diff --git a/third_party/aom/av1/encoder/temporal_filter.h b/third_party/aom/av1/encoder/temporal_filter.h
deleted file mode 100644
index 2ddc68b2c..000000000
--- a/third_party/aom/av1/encoder/temporal_filter.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_TEMPORAL_FILTER_H_
-#define AOM_AV1_ENCODER_TEMPORAL_FILTER_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void av1_temporal_filter(AV1_COMP *cpi, int distance);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_ENCODER_TEMPORAL_FILTER_H_
diff --git a/third_party/aom/av1/encoder/tokenize.c b/third_party/aom/av1/encoder/tokenize.c
deleted file mode 100644
index 16a6a9a35..000000000
--- a/third_party/aom/av1/encoder/tokenize.c
+++ /dev/null
@@ -1,248 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <math.h>
-#include <stdio.h>
-#include <string.h>
-
-#include "aom_mem/aom_mem.h"
-
-#include "av1/common/entropy.h"
-#include "av1/common/pred_common.h"
-#include "av1/common/scan.h"
-#include "av1/common/seg_common.h"
-
-#include "av1/encoder/cost.h"
-#include "av1/encoder/encoder.h"
-#include "av1/encoder/encodetxb.h"
-#include "av1/encoder/rdopt.h"
-#include "av1/encoder/tokenize.h"
-
-static int cost_and_tokenize_map(Av1ColorMapParam *param, TOKENEXTRA **t,
- int plane, int calc_rate, int allow_update_cdf,
- FRAME_COUNTS *counts) {
- const uint8_t *const color_map = param->color_map;
- MapCdf map_cdf = param->map_cdf;
- ColorCost color_cost = param->color_cost;
- const int plane_block_width = param->plane_width;
- const int rows = param->rows;
- const int cols = param->cols;
- const int n = param->n_colors;
- const int palette_size_idx = n - PALETTE_MIN_SIZE;
- int this_rate = 0;
- uint8_t color_order[PALETTE_MAX_SIZE];
-
- (void)plane;
- (void)counts;
-
- for (int k = 1; k < rows + cols - 1; ++k) {
- for (int j = AOMMIN(k, cols - 1); j >= AOMMAX(0, k - rows + 1); --j) {
- int i = k - j;
- int color_new_idx;
- const int color_ctx = av1_get_palette_color_index_context(
- color_map, plane_block_width, i, j, n, color_order, &color_new_idx);
- assert(color_new_idx >= 0 && color_new_idx < n);
- if (calc_rate) {
- this_rate += (*color_cost)[palette_size_idx][color_ctx][color_new_idx];
- } else {
- (*t)->token = color_new_idx;
- (*t)->color_map_cdf = map_cdf[palette_size_idx][color_ctx];
- ++(*t);
- if (allow_update_cdf)
- update_cdf(map_cdf[palette_size_idx][color_ctx], color_new_idx, n);
-#if CONFIG_ENTROPY_STATS
- if (plane) {
- ++counts->palette_uv_color_index[palette_size_idx][color_ctx]
- [color_new_idx];
- } else {
- ++counts->palette_y_color_index[palette_size_idx][color_ctx]
- [color_new_idx];
- }
-#endif
- }
- }
- }
- if (calc_rate) return this_rate;
- return 0;
-}
-
-static void get_palette_params(const MACROBLOCK *const x, int plane,
- BLOCK_SIZE bsize, Av1ColorMapParam *params) {
- const MACROBLOCKD *const xd = &x->e_mbd;
- const MB_MODE_INFO *const mbmi = xd->mi[0];
- const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
- params->color_map = xd->plane[plane].color_index_map;
- params->map_cdf = plane ? xd->tile_ctx->palette_uv_color_index_cdf
- : xd->tile_ctx->palette_y_color_index_cdf;
- params->color_cost =
- plane ? &x->palette_uv_color_cost : &x->palette_y_color_cost;
- params->n_colors = pmi->palette_size[plane];
- av1_get_block_dimensions(bsize, plane, xd, &params->plane_width, NULL,
- &params->rows, &params->cols);
-}
-
-static void get_color_map_params(const MACROBLOCK *const x, int plane,
- BLOCK_SIZE bsize, TX_SIZE tx_size,
- COLOR_MAP_TYPE type,
- Av1ColorMapParam *params) {
- (void)tx_size;
- memset(params, 0, sizeof(*params));
- switch (type) {
- case PALETTE_MAP: get_palette_params(x, plane, bsize, params); break;
- default: assert(0 && "Invalid color map type"); return;
- }
-}
-
-int av1_cost_color_map(const MACROBLOCK *const x, int plane, BLOCK_SIZE bsize,
- TX_SIZE tx_size, COLOR_MAP_TYPE type) {
- assert(plane == 0 || plane == 1);
- Av1ColorMapParam color_map_params;
- get_color_map_params(x, plane, bsize, tx_size, type, &color_map_params);
- return cost_and_tokenize_map(&color_map_params, NULL, plane, 1, 0, NULL);
-}
-
-void av1_tokenize_color_map(const MACROBLOCK *const x, int plane,
- TOKENEXTRA **t, BLOCK_SIZE bsize, TX_SIZE tx_size,
- COLOR_MAP_TYPE type, int allow_update_cdf,
- FRAME_COUNTS *counts) {
- assert(plane == 0 || plane == 1);
- Av1ColorMapParam color_map_params;
- get_color_map_params(x, plane, bsize, tx_size, type, &color_map_params);
- // The first color index does not use context or entropy.
- (*t)->token = color_map_params.color_map[0];
- (*t)->color_map_cdf = NULL;
- ++(*t);
- cost_and_tokenize_map(&color_map_params, t, plane, 0, allow_update_cdf,
- counts);
-}
-
-void tokenize_vartx(ThreadData *td, TOKENEXTRA **t, RUN_TYPE dry_run,
- TX_SIZE tx_size, BLOCK_SIZE plane_bsize, int blk_row,
- int blk_col, int block, int plane, void *arg) {
- MACROBLOCK *const x = &td->mb;
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = xd->mi[0];
- const struct macroblockd_plane *const pd = &xd->plane[plane];
- const int max_blocks_high = max_block_high(xd, plane_bsize, plane);
- const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
-
- if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
-
- const TX_SIZE plane_tx_size =
- plane ? av1_get_max_uv_txsize(mbmi->sb_type, pd->subsampling_x,
- pd->subsampling_y)
- : mbmi->inter_tx_size[av1_get_txb_size_index(plane_bsize, blk_row,
- blk_col)];
-
- if (tx_size == plane_tx_size || plane) {
- plane_bsize = get_plane_block_size(mbmi->sb_type, pd->subsampling_x,
- pd->subsampling_y);
- if (!dry_run) {
- av1_update_and_record_txb_context(plane, block, blk_row, blk_col,
- plane_bsize, tx_size, arg);
- } else if (dry_run == DRY_RUN_NORMAL) {
- av1_update_txb_context_b(plane, block, blk_row, blk_col, plane_bsize,
- tx_size, arg);
- } else {
- printf("DRY_RUN_COSTCOEFFS is not supported yet\n");
- assert(0);
- }
- } else {
- // Half the block size in transform block unit.
- const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
- const int bsw = tx_size_wide_unit[sub_txs];
- const int bsh = tx_size_high_unit[sub_txs];
- const int step = bsw * bsh;
-
- assert(bsw > 0 && bsh > 0);
-
- for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh) {
- for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) {
- const int offsetr = blk_row + row;
- const int offsetc = blk_col + col;
-
- if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
-
- tokenize_vartx(td, t, dry_run, sub_txs, plane_bsize, offsetr, offsetc,
- block, plane, arg);
- block += step;
- }
- }
- }
-}
-
-void av1_tokenize_sb_vartx(const AV1_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
- RUN_TYPE dry_run, int mi_row, int mi_col,
- BLOCK_SIZE bsize, int *rate,
- uint8_t allow_update_cdf) {
- const AV1_COMMON *const cm = &cpi->common;
- const int num_planes = av1_num_planes(cm);
- MACROBLOCK *const x = &td->mb;
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = xd->mi[0];
- (void)t;
- struct tokenize_b_args arg = { cpi, td, t, 0, allow_update_cdf };
- if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
-
- if (mbmi->skip) {
- av1_reset_skip_context(xd, mi_row, mi_col, bsize, num_planes);
- return;
- }
-
- for (int plane = 0; plane < num_planes; ++plane) {
- if (!is_chroma_reference(mi_row, mi_col, bsize,
- xd->plane[plane].subsampling_x,
- xd->plane[plane].subsampling_y)) {
- continue;
- }
- const struct macroblockd_plane *const pd = &xd->plane[plane];
- const BLOCK_SIZE bsizec =
- scale_chroma_bsize(bsize, pd->subsampling_x, pd->subsampling_y);
- const BLOCK_SIZE plane_bsize =
- get_plane_block_size(bsizec, pd->subsampling_x, pd->subsampling_y);
- const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
- const int mi_height = block_size_high[plane_bsize] >> tx_size_high_log2[0];
- const TX_SIZE max_tx_size = get_vartx_max_txsize(xd, plane_bsize, plane);
- const BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size];
- int bw = block_size_wide[txb_size] >> tx_size_wide_log2[0];
- int bh = block_size_high[txb_size] >> tx_size_high_log2[0];
- int idx, idy;
- int block = 0;
- int step = tx_size_wide_unit[max_tx_size] * tx_size_high_unit[max_tx_size];
-
- const BLOCK_SIZE max_unit_bsize =
- get_plane_block_size(BLOCK_64X64, pd->subsampling_x, pd->subsampling_y);
- int mu_blocks_wide =
- block_size_wide[max_unit_bsize] >> tx_size_wide_log2[0];
- int mu_blocks_high =
- block_size_high[max_unit_bsize] >> tx_size_high_log2[0];
-
- mu_blocks_wide = AOMMIN(mi_width, mu_blocks_wide);
- mu_blocks_high = AOMMIN(mi_height, mu_blocks_high);
-
- for (idy = 0; idy < mi_height; idy += mu_blocks_high) {
- for (idx = 0; idx < mi_width; idx += mu_blocks_wide) {
- int blk_row, blk_col;
- const int unit_height = AOMMIN(mu_blocks_high + idy, mi_height);
- const int unit_width = AOMMIN(mu_blocks_wide + idx, mi_width);
- for (blk_row = idy; blk_row < unit_height; blk_row += bh) {
- for (blk_col = idx; blk_col < unit_width; blk_col += bw) {
- tokenize_vartx(td, t, dry_run, max_tx_size, plane_bsize, blk_row,
- blk_col, block, plane, &arg);
- block += step;
- }
- }
- }
- }
- }
- if (rate) *rate += arg.this_rate;
-}
diff --git a/third_party/aom/av1/encoder/tokenize.h b/third_party/aom/av1/encoder/tokenize.h
deleted file mode 100644
index 63b505f36..000000000
--- a/third_party/aom/av1/encoder/tokenize.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_TOKENIZE_H_
-#define AOM_AV1_ENCODER_TOKENIZE_H_
-
-#include "av1/common/entropy.h"
-#include "av1/encoder/block.h"
-#include "aom_dsp/bitwriter.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct {
- aom_cdf_prob *color_map_cdf;
- // TODO(yaowu: use packed enum type if appropriate)
- uint8_t token;
-} TOKENEXTRA;
-
-struct AV1_COMP;
-struct ThreadData;
-struct FRAME_COUNTS;
-
-struct tokenize_b_args {
- const struct AV1_COMP *cpi;
- struct ThreadData *td;
- TOKENEXTRA **tp;
- int this_rate;
- uint8_t allow_update_cdf;
-};
-
-typedef enum {
- OUTPUT_ENABLED = 0,
- DRY_RUN_NORMAL,
- DRY_RUN_COSTCOEFFS,
-} RUN_TYPE;
-
-// Note in all the tokenize functions rate if non NULL is incremented
-// with the coefficient token cost only if dry_run = DRY_RUN_COSTCOEFS,
-// otherwise rate is not incremented.
-void av1_tokenize_sb_vartx(const struct AV1_COMP *cpi, struct ThreadData *td,
- TOKENEXTRA **t, RUN_TYPE dry_run, int mi_row,
- int mi_col, BLOCK_SIZE bsize, int *rate,
- uint8_t allow_update_cdf);
-
-int av1_cost_color_map(const MACROBLOCK *const x, int plane, BLOCK_SIZE bsize,
- TX_SIZE tx_size, COLOR_MAP_TYPE type);
-
-void av1_tokenize_color_map(const MACROBLOCK *const x, int plane,
- TOKENEXTRA **t, BLOCK_SIZE bsize, TX_SIZE tx_size,
- COLOR_MAP_TYPE type, int allow_update_cdf,
- struct FRAME_COUNTS *counts);
-
-static INLINE int av1_get_tx_eob(const struct segmentation *seg, int segment_id,
- TX_SIZE tx_size) {
- const int eob_max = av1_get_max_eob(tx_size);
- return segfeature_active(seg, segment_id, SEG_LVL_SKIP) ? 0 : eob_max;
-}
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_ENCODER_TOKENIZE_H_
diff --git a/third_party/aom/av1/encoder/tx_prune_model_weights.h b/third_party/aom/av1/encoder/tx_prune_model_weights.h
deleted file mode 100644
index 405bc9e6e..000000000
--- a/third_party/aom/av1/encoder/tx_prune_model_weights.h
+++ /dev/null
@@ -1,1944 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_TX_PRUNE_MODEL_WEIGHTS_H_
-#define AOM_AV1_ENCODER_TX_PRUNE_MODEL_WEIGHTS_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "av1/encoder/ml.h"
-
-// Tx type model for 4x4 block.
-static const float av1_tx_type_nn_weights_4x4_hor_layer0[32] = {
- -1.64947f, -1.54497f, -1.62832f, -0.17774f, -2.89498f, -0.72498f, 0.72036f,
- 0.17996f, 1.20000f, -0.27654f, 0.77396f, 1.21684f, -1.75909f, -0.51272f,
- -1.25923f, 0.35005f, -0.04257f, -0.23389f, -0.41841f, -0.08229f, 0.09503f,
- 2.73144f, -0.16875f, -0.23482f, 0.02194f, -0.26427f, 0.28049f, 0.21260f,
- 1.35792f, 0.27733f, 0.88660f, -0.68304f,
-};
-
-static const float av1_tx_type_nn_bias_4x4_hor_layer0[8] = {
- 1.38742f, 0.59540f, -1.37622f, 1.92114f,
- 0.00000f, -0.38998f, -0.32726f, -0.15650f,
-};
-
-static const float av1_tx_type_nn_weights_4x4_hor_layer1[32] = {
- 1.65254f, 1.00915f, -0.89318f, -2.05142f, -0.23235f, 0.96781f, -0.37145f,
- -0.21056f, 1.13891f, 0.38675f, 0.87739f, -1.42697f, 0.48015f, 0.61883f,
- -0.03979f, 0.11487f, 0.48042f, 0.45200f, -0.23242f, 0.75166f, 0.55458f,
- 0.39452f, -0.35285f, 1.59120f, -1.49221f, -0.48349f, -0.64692f, 1.49297f,
- -0.26782f, -0.65416f, -0.10648f, 0.05568f,
-};
-
-static const float av1_tx_type_nn_bias_4x4_hor_layer1[4] = {
- 4.07177f,
- 3.26961f,
- 0.58083f,
- 1.21199f,
-};
-
-static const NN_CONFIG av1_tx_type_nnconfig_4x4_hor = {
- 4, // num_inputs
- 4, // num_outputs
- 1, // num_hidden_layers
- {
- 8,
- }, // num_hidden_nodes
- { av1_tx_type_nn_weights_4x4_hor_layer0,
- av1_tx_type_nn_weights_4x4_hor_layer1 },
- { av1_tx_type_nn_bias_4x4_hor_layer0, av1_tx_type_nn_bias_4x4_hor_layer1 }
-};
-
-static const float av1_tx_type_nn_weights_4x4_ver_layer0[32] = {
- -0.02032f, 2.61610f, 0.02098f, -0.30217f, 0.12637f, 0.11017f, -3.01996f,
- 0.35144f, 1.93776f, -0.20463f, 1.64102f, -1.41986f, -3.66717f, -0.51655f,
- 0.43910f, 0.37778f, -1.02634f, 0.85337f, -0.69753f, 1.00206f, 2.11784f,
- 1.89427f, 1.92919f, 0.43201f, -1.67358f, -1.67035f, -1.54623f, 0.16714f,
- -0.06589f, -0.28142f, -0.33118f, 1.72227f,
-};
-
-static const float av1_tx_type_nn_bias_4x4_ver_layer0[8] = {
- -0.33685f, 0.22025f, 0.28140f, 0.56138f,
- 0.93489f, -1.77048f, 1.34989f, -0.93747f,
-};
-
-static const float av1_tx_type_nn_weights_4x4_ver_layer1[32] = {
- -1.39506f, -1.06271f, -1.10886f, -1.69719f, 0.19699f, -2.39850f, -1.26457f,
- 0.75328f, -1.26005f, -0.82738f, -0.12015f, -1.02702f, 1.40828f, -2.37739f,
- -0.65639f, -0.71992f, -0.90453f, -1.12510f, -2.41362f, -1.16061f, -1.85577f,
- -0.99165f, -1.91366f, 0.16785f, 0.34776f, 0.58154f, -0.18217f, -0.29257f,
- -0.86315f, -0.53336f, 0.30320f, -1.32331f,
-};
-
-static const float av1_tx_type_nn_bias_4x4_ver_layer1[4] = {
- -1.31519f,
- -3.26321f,
- 1.71794f,
- -1.90778f,
-};
-
-static const NN_CONFIG av1_tx_type_nnconfig_4x4_ver = {
- 4, // num_inputs
- 4, // num_outputs
- 1, // num_hidden_layers
- {
- 8,
- }, // num_hidden_nodes
- { av1_tx_type_nn_weights_4x4_ver_layer0,
- av1_tx_type_nn_weights_4x4_ver_layer1 },
- { av1_tx_type_nn_bias_4x4_ver_layer0, av1_tx_type_nn_bias_4x4_ver_layer1 }
-};
-/******************************************************************************/
-
-// Tx type model for 4x8 block.
-static const float av1_tx_type_nn_weights_4x8_hor_layer0[32] = {
- 0.00218f, -0.41880f, -0.61215f, -0.92588f, 0.54291f, -0.10898f, 0.70691f,
- 0.46819f, -1.61598f, -0.08834f, -0.96839f, 1.18489f, -0.45171f, -0.65445f,
- -0.32179f, -0.10399f, 1.04379f, 0.91895f, 0.85589f, 0.08267f, 1.35388f,
- -2.03096f, 0.08168f, -0.06372f, -0.26732f, -0.48262f, -0.08682f, 2.44071f,
- -1.35896f, -1.17121f, 1.68866f, 0.10357f,
-};
-
-static const float av1_tx_type_nn_bias_4x8_hor_layer0[8] = {
- 2.93391f, 0.66831f, -0.21419f, 0.00000f,
- -0.72878f, 0.15127f, -1.46755f, 0.16658f,
-};
-
-static const float av1_tx_type_nn_weights_4x8_hor_layer1[32] = {
- -1.52077f, -1.06243f, 0.35319f, -0.49207f, 0.54524f, 0.44271f, 1.37117f,
- -0.38957f, -1.28889f, -0.57133f, 0.04658f, 0.62278f, 0.37984f, 0.33247f,
- 1.65547f, -0.56806f, -1.38645f, -0.76258f, 0.67926f, 0.08783f, -0.01443f,
- 0.34950f, 1.45812f, -0.51332f, -1.41331f, -0.16453f, 0.05755f, 0.31405f,
- -0.50191f, 0.18219f, 1.83664f, -0.75276f,
-};
-
-static const float av1_tx_type_nn_bias_4x8_hor_layer1[4] = {
- -1.17455f,
- -2.26089f,
- -1.79863f,
- -2.26333f,
-};
-
-static const NN_CONFIG av1_tx_type_nnconfig_4x8_hor = {
- 4, // num_inputs
- 4, // num_outputs
- 1, // num_hidden_layers
- {
- 8,
- }, // num_hidden_nodes
- { av1_tx_type_nn_weights_4x8_hor_layer0,
- av1_tx_type_nn_weights_4x8_hor_layer1 },
- { av1_tx_type_nn_bias_4x8_hor_layer0, av1_tx_type_nn_bias_4x8_hor_layer1 }
-};
-
-static const float av1_tx_type_nn_weights_4x8_ver_layer0[128] = {
- -0.00952f, -0.98858f, -0.93181f, 1.39594f, 0.96559f, 0.18162f, -0.76064f,
- -0.06066f, 0.07907f, -0.09365f, -0.21313f, -0.02187f, -2.61707f, -2.68702f,
- -0.10982f, 0.18559f, 1.17049f, 1.11387f, 1.12697f, 1.05804f, 1.12764f,
- 1.06318f, 1.12052f, 0.17406f, 1.83157f, 0.19362f, 0.46910f, 0.39608f,
- 0.33342f, 0.40083f, 0.27645f, 1.06864f, -4.06645f, -0.38775f, -0.11070f,
- 0.03781f, -0.09141f, 0.06185f, -0.04852f, 0.20163f, 0.16784f, 0.16641f,
- -0.50941f, -0.61087f, 2.07008f, -0.82381f, -0.85558f, 0.05528f, -0.10535f,
- -2.81150f, 0.67038f, 0.43643f, 0.49062f, -0.04465f, 0.90438f, 0.00977f,
- 0.46272f, 1.59751f, 0.95234f, 0.35086f, 0.85624f, 0.73149f, 1.67779f,
- -2.21511f, -1.24746f, -1.09014f, -0.92441f, -1.22591f, -1.06961f, -0.95897f,
- -1.24956f, 0.73797f, 1.23275f, -0.60064f, -0.07851f, 0.14397f, 0.22110f,
- -0.04422f, 0.14350f, 0.75926f, 0.35032f, 0.48104f, 2.81408f, 0.34662f,
- 0.42090f, 0.35521f, -1.36804f, -0.14974f, -0.47696f, -0.07892f, 0.36910f,
- 0.32299f, 0.23916f, 0.06032f, -0.17844f, -0.17558f, -1.42746f, -0.55828f,
- -1.00418f, -0.64823f, -0.73654f, -0.85197f, -1.50989f, 1.69385f, -0.04973f,
- -0.09273f, 1.04249f, 0.79235f, 1.13229f, 0.99617f, 0.03851f, 0.56334f,
- 0.90795f, 1.08296f, 0.58519f, 1.74765f, 0.63971f, 1.35951f, 0.07803f,
- -0.05127f, 0.26514f, -0.84629f, -0.66343f, -2.10630f, 0.11017f, 2.18528f,
- -0.21958f, 0.05970f,
-};
-
-static const float av1_tx_type_nn_bias_4x8_ver_layer0[16] = {
- 0.04205f, 0.22260f, -1.03870f, -1.19568f, 0.44283f, 0.01143f,
- 0.00235f, 4.26772f, 0.44364f, -0.33199f, -0.39076f, -0.35129f,
- 0.08288f, 0.18195f, -0.79890f, 0.10047f,
-};
-
-static const float av1_tx_type_nn_weights_4x8_ver_layer1[64] = {
- -0.38193f, -0.12095f, 1.57802f, 0.34932f, -0.47333f, -0.12304f, -0.01736f,
- -2.52445f, 0.18983f, -0.64707f, -0.60889f, -0.53750f, 0.91666f, -0.62823f,
- -0.13377f, -0.43594f, -0.38618f, -0.01328f, 0.97457f, 1.48589f, -1.03238f,
- -0.33459f, -0.35108f, -2.42417f, 0.60229f, 0.06824f, -0.75495f, 0.26902f,
- 0.65311f, -0.23887f, -0.44604f, -0.55800f, -0.33842f, 0.04259f, -0.59589f,
- 0.49738f, -0.62301f, -0.30896f, -0.29602f, -2.57052f, 2.00943f, -0.66490f,
- -0.76312f, 0.28256f, 1.06311f, -0.38364f, -0.63508f, -0.57609f, -0.88765f,
- -1.04403f, -0.46531f, 0.34084f, -1.20498f, -0.68352f, -0.72251f, -2.63242f,
- -0.68736f, -0.37904f, -1.32371f, 0.47288f, 1.51904f, 0.78372f, -1.01830f,
- -1.01848f,
-};
-
-static const float av1_tx_type_nn_bias_4x8_ver_layer1[4] = {
- -1.45955f,
- -2.08949f,
- -1.24813f,
- -1.55368f,
-};
-
-static const NN_CONFIG av1_tx_type_nnconfig_4x8_ver = {
- 8, // num_inputs
- 4, // num_outputs
- 1, // num_hidden_layers
- {
- 16,
- }, // num_hidden_nodes
- { av1_tx_type_nn_weights_4x8_ver_layer0,
- av1_tx_type_nn_weights_4x8_ver_layer1 },
- { av1_tx_type_nn_bias_4x8_ver_layer0, av1_tx_type_nn_bias_4x8_ver_layer1 }
-};
-/******************************************************************************/
-
-// Tx type model for 8x4 block.
-static const float av1_tx_type_nn_weights_8x4_hor_layer0[128] = {
- -0.22492f, 0.13341f, -4.03243f, -0.64015f, 0.02783f, 0.60466f, -0.13335f,
- 0.16828f, 0.12336f, 0.52904f, 1.18455f, -0.32425f, 0.13052f, 0.93810f,
- -3.71165f, 0.02990f, -4.63558f, 0.05666f, 0.03524f, -0.07449f, -0.44006f,
- -0.33215f, -0.33713f, 0.08097f, 0.60873f, 0.29582f, 0.21696f, -0.78729f,
- -0.16757f, -0.26567f, -0.00720f, -1.11226f, 1.58189f, 1.58463f, 1.48536f,
- 1.54374f, 1.60069f, 1.46125f, 1.53932f, 0.05974f, -1.82192f, 0.47043f,
- 0.38090f, 0.20833f, -0.05637f, 0.05183f, 0.01323f, -0.25662f, 0.78634f,
- -0.55069f, -0.02975f, -1.29294f, -0.77192f, -2.34299f, -1.28074f, 0.77894f,
- -1.69740f, -1.66032f, -1.44323f, -1.55063f, -1.50845f, -1.23690f, -1.80663f,
- 0.75079f, 2.32551f, 0.05878f, 0.80438f, 0.88584f, 0.69153f, 0.89060f,
- 0.73660f, 0.87259f, -0.00745f, -1.30044f, -0.59430f, 2.07270f, 1.03307f,
- -0.84697f, -1.19393f, 0.17549f, -0.24978f, -3.67234f, 0.20781f, -0.53946f,
- -0.05068f, 0.88274f, 1.30371f, 0.10288f, 0.07585f, 0.12259f, -0.30815f,
- 0.25437f, -2.82096f, -2.69482f, 0.02370f, 0.12500f, -0.21019f, -0.49220f,
- 0.03638f, -0.29795f, 0.28645f, -0.48432f, -0.38584f, -0.32148f, -0.47197f,
- 0.32437f, 0.32528f, -0.19437f, 0.30383f, -0.31879f, 0.26359f, -0.12164f,
- -0.43647f, -0.08288f, -0.33438f, -0.63608f, -0.46647f, -0.46574f, 0.47806f,
- -0.49012f, -1.51234f, -1.13502f, -1.20470f, -1.02913f, -1.09182f, -0.93921f,
- -1.85523f, 0.92532f,
-};
-
-static const float av1_tx_type_nn_bias_8x4_hor_layer0[16] = {
- 0.36631f, 0.02901f, 0.64305f, 1.53074f, -1.40229f, 0.03852f,
- -0.05043f, 0.89632f, -1.23312f, 0.07036f, 0.17070f, 0.56250f,
- -0.28958f, -0.32869f, -0.01704f, 0.68171f,
-};
-
-static const float av1_tx_type_nn_weights_8x4_hor_layer1[64] = {
- -0.49441f, -0.31960f, -0.84946f, -0.85800f, -2.37767f, 0.81373f, -0.73172f,
- -0.69337f, 0.88807f, -0.49242f, -0.44717f, -0.11436f, 0.09978f, 0.15393f,
- 0.17083f, 1.44850f, -0.20582f, -0.04906f, 0.42990f, -0.61939f, -1.09692f,
- -1.14885f, -1.36879f, -1.30828f, -0.59558f, -0.30903f, -0.08906f, 0.06953f,
- 0.15383f, -0.04193f, -0.54858f, 1.82676f, -0.22411f, 0.05264f, -0.45848f,
- -0.72985f, 0.87553f, 0.04116f, -1.29774f, -2.63018f, 1.09089f, -0.36048f,
- -0.16725f, 0.11627f, 0.49918f, 0.07539f, 0.00763f, 0.73706f, 0.87800f,
- 0.57049f, 0.60969f, 1.02779f, 1.53339f, -0.35915f, 0.06410f, 1.44582f,
- 0.09698f, 0.71888f, 0.60594f, 0.84103f, -0.50440f, -0.38825f, 0.15626f,
- -1.10654f,
-};
-
-static const float av1_tx_type_nn_bias_8x4_hor_layer1[4] = {
- -0.92861f,
- -1.45151f,
- -1.33588f,
- -4.33853f,
-};
-
-static const NN_CONFIG av1_tx_type_nnconfig_8x4_hor = {
- 8, // num_inputs
- 4, // num_outputs
- 1, // num_hidden_layers
- {
- 16,
- }, // num_hidden_nodes
- { av1_tx_type_nn_weights_8x4_hor_layer0,
- av1_tx_type_nn_weights_8x4_hor_layer1 },
- { av1_tx_type_nn_bias_8x4_hor_layer0, av1_tx_type_nn_bias_8x4_hor_layer1 }
-};
-
-static const float av1_tx_type_nn_weights_8x4_ver_layer0[32] = {
- -1.10946f, 1.86574f, -1.59343f, 0.27018f, -1.70676f, -0.73982f, -0.19021f,
- -1.94208f, -2.29759f, -1.44402f, 0.28700f, -1.18340f, -1.50158f, -0.44175f,
- -1.36831f, 1.00374f, 2.59312f, 0.50291f, -0.71042f, -0.12238f, -0.15901f,
- -0.22807f, -0.67376f, -0.30215f, 0.54407f, -0.45538f, 1.18262f, 2.28687f,
- 1.66212f, 1.70826f, 1.55182f, 0.12230f,
-};
-
-static const float av1_tx_type_nn_bias_8x4_ver_layer0[8] = {
- 0.10943f, 2.09789f, 2.16578f, 0.15766f,
- -0.42461f, 0.00000f, 1.22090f, -1.28717f,
-};
-
-static const float av1_tx_type_nn_weights_8x4_ver_layer1[32] = {
- 1.20426f, -1.23237f, 2.41053f, -0.72488f, 1.25249f, 0.18018f, -0.09586f,
- 2.17901f, 0.15364f, 1.21535f, -0.38263f, -0.74309f, 0.50551f, -0.54208f,
- 0.59139f, 1.16095f, 0.55919f, -0.60183f, 1.18949f, 1.60787f, 0.54002f,
- -0.10712f, -0.16153f, 0.16207f, -0.32338f, 2.68712f, -2.83483f, -0.27086f,
- -1.15005f, -0.39311f, 1.51236f, -1.68973f,
-};
-
-static const float av1_tx_type_nn_bias_8x4_ver_layer1[4] = {
- 1.81013f,
- 1.10517f,
- 2.90059f,
- 0.95391f,
-};
-
-static const NN_CONFIG av1_tx_type_nnconfig_8x4_ver = {
- 4, // num_inputs
- 4, // num_outputs
- 1, // num_hidden_layers
- {
- 8,
- }, // num_hidden_nodes
- { av1_tx_type_nn_weights_8x4_ver_layer0,
- av1_tx_type_nn_weights_8x4_ver_layer1 },
- { av1_tx_type_nn_bias_8x4_ver_layer0, av1_tx_type_nn_bias_8x4_ver_layer1 }
-};
-/******************************************************************************/
-
-// Tx type model for 8x8 block.
-static const float av1_tx_type_nn_weights_8x8_hor_layer0[128] = {
- -0.85529f, 0.37619f, 0.12754f, 0.08622f, 0.45278f, 0.54929f, 1.60651f,
- -0.62654f, -0.54929f, -0.10131f, -0.17569f, 0.13948f, 0.31695f, -0.05616f,
- 0.20483f, -0.36448f, 2.27203f, -0.33087f, 0.47679f, 0.86888f, 0.39370f,
- 0.46239f, 0.01113f, 1.50327f, -1.48226f, -1.69621f, -1.49777f, -1.38885f,
- -1.37753f, -1.22681f, -1.70576f, 0.51329f, -1.65662f, 1.74197f, -0.13579f,
- -0.13133f, -0.58396f, -0.55510f, -1.10709f, -2.34975f, 0.22445f, -0.56491f,
- -0.83432f, 0.13492f, 1.32147f, 2.85285f, 0.13819f, 0.03792f, -1.30792f,
- 0.04155f, -0.70644f, -0.43430f, -0.16212f, -0.86945f, -1.16976f, 1.68339f,
- 0.29540f, 0.01137f, -0.25335f, -0.16856f, 0.12028f, 0.05207f, 0.39357f,
- -0.01545f, -0.21980f, -1.94091f, -1.01315f, -0.68270f, -0.40590f, -0.67111f,
- 2.08283f, 0.19291f, -4.81426f, -0.65044f, -0.24598f, 0.06371f, -0.10272f,
- -0.14502f, -0.06821f, 0.45202f, 0.21091f, -0.80864f, 0.39255f, 1.79189f,
- 1.80453f, 1.10484f, 1.17608f, 0.96901f, -0.35871f, -0.94311f, 0.63147f,
- 2.95157f, 0.45917f, -0.42849f, -0.55643f, -0.06097f, 3.49299f, -0.50972f,
- 0.11075f, -0.08405f, -0.09274f, -0.22694f, -0.42426f, 0.48632f, -1.61074f,
- 1.82998f, 0.37623f, -1.20330f, -0.01142f, -1.33307f, -0.27492f, -2.23621f,
- 1.38846f, 1.42085f, 1.42568f, 1.36152f, 1.46910f, 1.27473f, 1.34752f,
- 0.12753f, -1.08197f, -1.08280f, -0.79489f, -1.12338f, -1.06795f, -0.87857f,
- -0.99892f, 1.09823f,
-};
-
-static const float av1_tx_type_nn_bias_8x8_hor_layer0[16] = {
- -0.49232f, -0.29685f, -1.44020f, 1.10940f, 1.16452f, -0.34862f,
- -0.38761f, -0.36243f, 0.21776f, 0.28234f, 2.34269f, -0.04104f,
- -0.26319f, 2.65579f, -1.30137f, -0.01487f,
-};
-
-static const float av1_tx_type_nn_weights_8x8_hor_layer1[64] = {
- -0.38058f, -0.41295f, -1.26884f, -0.75560f, -1.57450f, 0.56072f, -1.42322f,
- -0.29106f, 0.07228f, 0.04391f, 1.61388f, -0.03055f, 0.81637f, 2.06045f,
- 0.27119f, -0.48328f, -0.45528f, -0.60534f, -1.61209f, -0.78157f, -1.65034f,
- 0.60958f, -1.30523f, 0.25143f, 0.11398f, 0.37860f, 1.54829f, 0.02309f,
- 0.67288f, 2.11447f, 0.44845f, -0.70406f, -0.67897f, -0.38759f, -1.30383f,
- -1.22646f, -1.54571f, 0.60552f, -1.52565f, 0.11469f, 0.17344f, 0.08622f,
- 1.57906f, -0.00909f, 0.81634f, 2.04909f, 1.26466f, -1.45741f, -0.75229f,
- 0.06200f, -1.05835f, -0.66257f, -1.73766f, 0.99923f, -1.87082f, 0.14580f,
- 0.49525f, 0.46839f, 1.32203f, 0.33923f, 0.97001f, 2.38584f, 1.58811f,
- 0.06161f,
-};
-
-static const float av1_tx_type_nn_bias_8x8_hor_layer1[4] = {
- 1.70385f,
- 1.82373f,
- 1.78496f,
- 1.80826f,
-};
-
-static const NN_CONFIG av1_tx_type_nnconfig_8x8_hor = {
- 8, // num_inputs
- 4, // num_outputs
- 1, // num_hidden_layers
- {
- 16,
- }, // num_hidden_nodes
- { av1_tx_type_nn_weights_8x8_hor_layer0,
- av1_tx_type_nn_weights_8x8_hor_layer1 },
- { av1_tx_type_nn_bias_8x8_hor_layer0, av1_tx_type_nn_bias_8x8_hor_layer1 }
-};
-
-static const float av1_tx_type_nn_weights_8x8_ver_layer0[128] = {
- -0.67016f, -1.72366f, -1.86576f, -1.50962f, -1.70419f, -1.73964f, -1.84615f,
- 2.09681f, -0.05081f, -0.61030f, 2.02541f, 0.60222f, 0.99936f, 2.02114f,
- -0.53893f, -0.23757f, 0.73566f, 0.25443f, 0.00132f, -0.74036f, -0.75351f,
- -0.76964f, -1.71007f, -0.15770f, 1.60982f, 2.17638f, 0.90681f, 0.64973f,
- 0.85914f, 0.58786f, -1.46228f, 0.05187f, 1.18804f, 0.30850f, 0.29512f,
- 0.40526f, 0.37635f, 0.32311f, 0.37471f, 1.12346f, 3.41856f, -0.36653f,
- 0.42537f, -0.19240f, 0.00155f, 0.30826f, -0.02116f, -0.53435f, -0.34829f,
- -0.52466f, -0.11521f, -0.29163f, -2.05689f, -2.87372f, -0.62626f, 0.09585f,
- -0.75257f, 0.10057f, 1.43474f, 0.89450f, 0.75900f, 1.11147f, 1.00558f,
- 0.25886f, 2.22095f, -0.17926f, 0.57161f, 0.39546f, 0.47846f, 0.40452f,
- 0.54298f, 0.45814f, -3.62788f, -3.02374f, 0.03716f, -0.13937f, -0.09415f,
- -0.12463f, 0.05682f, 0.03672f, 1.20746f, 1.25003f, 1.27071f, 1.31883f,
- 1.27473f, 1.34943f, 1.23158f, 0.09039f, 0.19388f, 0.63420f, 2.79612f,
- 0.93803f, -0.11323f, -0.02027f, 0.41286f, -0.05979f, -3.80705f, -0.52451f,
- -0.77098f, -0.68132f, -0.65559f, -0.60975f, -1.26165f, 0.25582f, 0.05346f,
- 0.61403f, 0.32140f, -2.39831f, -1.42355f, 1.30541f, 1.02361f, 0.12930f,
- -1.61469f, -0.77036f, -0.59144f, 1.27769f, 1.52068f, 0.82137f, 1.83159f,
- -0.66626f, -0.69806f, -1.00564f, -0.85995f, -0.90889f, -0.84412f, -0.85712f,
- -1.29848f, 0.39308f,
-};
-
-static const float av1_tx_type_nn_bias_8x8_ver_layer0[16] = {
- -0.14868f, -0.48343f, 3.94416f, -0.78037f, -1.33789f, -0.60611f,
- 0.51793f, 0.44030f, -0.71563f, 0.22561f, -1.19083f, -0.46149f,
- 0.83015f, 0.06024f, 1.17180f, 0.65122f,
-};
-
-static const float av1_tx_type_nn_weights_8x8_ver_layer1[64] = {
- -1.42711f, -0.21683f, 2.12061f, 0.20489f, -0.50228f, -0.24770f, 0.23391f,
- 1.03470f, -0.44847f, -0.63225f, -0.21583f, -0.06467f, -0.21892f, -0.07786f,
- 1.43322f, 0.00280f, -1.53057f, -0.18912f, 1.95333f, 0.31151f, -2.07601f,
- 0.06776f, 0.25529f, 0.94800f, -1.11453f, -0.20594f, -0.13281f, 0.01485f,
- 0.17650f, -0.07955f, 1.43734f, -0.23193f, -2.06463f, -0.21238f, 2.13707f,
- 0.30351f, 0.27594f, -0.36245f, 0.19539f, 0.91045f, -0.24068f, -0.37616f,
- 0.88792f, 0.02947f, -0.16903f, -0.04932f, 1.51293f, -0.95967f, -1.62903f,
- 0.05326f, 2.30703f, 0.64445f, -1.09464f, -0.16623f, 1.00240f, 0.07548f,
- -0.50406f, 0.63854f, 1.02340f, 0.49833f, 0.13671f, 0.26722f, 2.09516f,
- -0.41305f,
-};
-
-static const float av1_tx_type_nn_bias_8x8_ver_layer1[4] = {
- 2.14067f,
- 2.76699f,
- 2.04233f,
- 1.34803f,
-};
-
-static const NN_CONFIG av1_tx_type_nnconfig_8x8_ver = {
- 8, // num_inputs
- 4, // num_outputs
- 1, // num_hidden_layers
- {
- 16,
- }, // num_hidden_nodes
- { av1_tx_type_nn_weights_8x8_ver_layer0,
- av1_tx_type_nn_weights_8x8_ver_layer1 },
- { av1_tx_type_nn_bias_8x8_ver_layer0, av1_tx_type_nn_bias_8x8_ver_layer1 }
-};
-/******************************************************************************/
-
-// Tx type model for 8x16 block.
-static const float av1_tx_type_nn_weights_8x16_hor_layer0[128] = {
- -1.61872f, -1.58520f, -1.41236f, -1.53255f, -1.59794f, -1.25769f, -1.90043f,
- 0.73431f, 1.10135f, 0.47054f, 0.43230f, -0.43009f, -0.09135f, -0.07289f,
- -0.38785f, 1.23775f, -0.35312f, 0.73789f, 0.88864f, 0.75957f, 0.62579f,
- 0.46974f, 0.21851f, 1.63821f, -2.27289f, -0.68522f, -0.69814f, -0.84368f,
- -0.91320f, -0.63055f, -1.03296f, 0.55778f, -0.00071f, 1.27539f, 1.60068f,
- 1.40975f, 0.97372f, 0.92843f, 1.90853f, 0.12626f, 1.71953f, 1.41978f,
- -0.12234f, -1.27058f, 0.76207f, 0.02495f, -0.67038f, -0.05255f, 1.72923f,
- 1.47630f, 1.47058f, 1.47614f, 1.49354f, 1.66131f, 1.50801f, 0.17145f,
- -2.30947f, -2.10850f, -1.25636f, -0.24900f, 0.72602f, 1.26572f, 0.97865f,
- -0.65466f, 1.31129f, 0.26916f, 0.12139f, -0.12761f, -0.39143f, -0.28134f,
- 0.06584f, 2.24418f, 0.22516f, 0.05011f, -0.01671f, -0.29476f, -0.40326f,
- 0.21138f, -0.11573f, -0.31154f, -0.36828f, 0.03694f, -0.07172f, -0.63419f,
- -3.14351f, -1.23125f, 0.65311f, -0.11406f, 1.97287f, -0.10422f, 0.83896f,
- 0.85033f, 0.49724f, 0.80482f, 0.51454f, 1.06447f, 0.76693f, 0.72599f,
- -0.78573f, -0.53950f, 0.40894f, 0.00086f, 0.10784f, -0.70498f, 1.16395f,
- 1.14597f, 1.13496f, 1.12177f, 1.02100f, -1.37574f, -2.97144f, 0.33899f,
- 0.42013f, 0.86327f, 2.31983f, 2.04008f, 0.95503f, 0.15081f, 0.11530f,
- -0.02574f, -4.77119f, 0.13257f, -0.01704f, -0.23087f, -0.00825f, 0.07029f,
- -0.28136f, 0.42556f,
-};
-
-static const float av1_tx_type_nn_bias_8x16_hor_layer0[16] = {
- 0.93617f, -0.24000f, -1.26821f, 0.78780f, 0.13690f, -0.21948f,
- -1.45162f, 0.44584f, -1.92582f, -0.23169f, 0.56004f, -1.19937f,
- 1.81560f, -1.02643f, -0.81690f, 0.08302f,
-};
-
-static const float av1_tx_type_nn_weights_8x16_hor_layer1[64] = {
- 0.06696f, -0.11538f, -1.42029f, 0.32965f, 0.81046f, 0.01146f, 1.20945f,
- -0.16899f, 0.53224f, -0.40232f, 0.01786f, -0.73242f, 1.29750f, 1.95185f,
- 0.70143f, 1.43287f, 0.76220f, 0.79937f, -1.79011f, -1.15178f, 0.42526f,
- -0.67519f, 0.77267f, -0.30697f, 2.46004f, -0.49828f, 0.02875f, 1.09972f,
- 1.47662f, 0.61719f, 0.61417f, -0.12363f, 2.53048f, 0.00418f, -1.38964f,
- 0.88117f, 0.39239f, -0.19347f, -2.58600f, -0.33715f, 1.09323f, -0.32127f,
- 0.02456f, -0.19125f, 1.12728f, 0.66502f, 0.34296f, 1.14897f, 0.29967f,
- 1.19209f, 0.22108f, -0.11975f, 1.49776f, -1.34624f, -2.58478f, -1.34632f,
- 1.53207f, 0.45634f, -1.48476f, 0.17489f, 0.71790f, -2.12086f, -1.21778f,
- -1.31243f,
-};
-
-static const float av1_tx_type_nn_bias_8x16_hor_layer1[4] = {
- 0.83359f,
- 1.06875f,
- 1.77645f,
- 1.49570f,
-};
-
-static const NN_CONFIG av1_tx_type_nnconfig_8x16_hor = {
- 8, // num_inputs
- 4, // num_outputs
- 1, // num_hidden_layers
- {
- 16,
- }, // num_hidden_nodes
- { av1_tx_type_nn_weights_8x16_hor_layer0,
- av1_tx_type_nn_weights_8x16_hor_layer1 },
- { av1_tx_type_nn_bias_8x16_hor_layer0, av1_tx_type_nn_bias_8x16_hor_layer1 }
-};
-
-static const float av1_tx_type_nn_weights_8x16_ver_layer0[128] = {
- 0.32858f, -1.28887f, 0.25632f, -0.05262f, 2.69203f, -0.07004f, 1.37337f,
- -0.05725f, -0.05659f, 0.05592f, 0.01039f, -0.29343f, 1.58628f, -0.30003f,
- -3.43118f, 0.00272f, 1.70928f, -0.76348f, 0.05889f, -0.03263f, -0.07724f,
- 0.03523f, -0.19890f, 1.18005f, -0.03605f, -0.20530f, -4.00733f, 0.10210f,
- -0.05368f, -0.17650f, -0.15317f, 0.06499f, 0.56705f, 1.04341f, 0.62890f,
- 0.73451f, -0.22199f, 0.86659f, 0.78443f, -0.61664f, -0.50606f, 0.30247f,
- 0.14455f, 0.39276f, 0.49203f, 0.65019f, 0.12269f, 1.64080f, 1.68289f,
- 1.42694f, 1.60825f, 1.58501f, 1.47252f, 1.62589f, 1.48218f, 0.17726f,
- -0.04884f, 0.35376f, -0.04796f, 0.32589f, 0.35087f, 0.35258f, -0.46103f,
- -0.31176f, -0.05203f, 0.07247f, -0.26756f, 0.22019f, 0.03412f, 0.33773f,
- 0.29811f, -0.11140f, 0.12831f, -0.44673f, -0.09858f, 0.07889f, 0.15137f,
- 0.00347f, -0.23394f, 0.08886f, -0.31201f, -0.79912f, -0.51092f, 0.14123f,
- -1.09599f, -4.26020f, -0.68675f, -0.02842f, -1.54538f, -1.28977f, -1.30558f,
- -1.21074f, -1.37142f, -1.14743f, -1.85397f, 0.82985f, -0.30681f, 0.04494f,
- -0.24023f, -4.18053f, -0.16096f, -0.55492f, -0.27882f, 0.05829f, -0.41224f,
- -2.52088f, -0.56162f, -1.04547f, -1.70685f, -0.28842f, -1.43673f, -0.01468f,
- -3.20585f, -0.69120f, -0.43931f, -0.46270f, -0.65885f, -0.55884f, -0.75138f,
- 0.36381f, -5.70858f, -0.14548f, -0.15745f, -0.11812f, -0.07605f, -0.07693f,
- -0.12236f, 0.16075f,
-};
-
-static const float av1_tx_type_nn_bias_8x16_ver_layer0[16] = {
- -0.35385f, 0.30491f, -0.90011f, 0.42941f, 1.20928f, -0.88331f,
- -1.48818f, -0.34785f, -0.32668f, -0.22695f, 0.89188f, 0.65521f,
- 0.57598f, 0.99819f, 0.75175f, 0.17044f,
-};
-
-static const float av1_tx_type_nn_weights_8x16_ver_layer1[64] = {
- -0.62913f, -0.34304f, 0.42963f, -0.17440f, -1.44092f, 0.69142f, -1.36067f,
- 0.52211f, 0.44658f, -0.26501f, -0.41657f, 0.34428f, -0.34390f, -0.58567f,
- -0.84097f, -1.96311f, -0.37215f, -0.22250f, -1.23811f, -0.07247f, -0.81731f,
- 0.58755f, -1.30559f, 0.39551f, 0.41743f, -0.09940f, -0.33230f, 0.14458f,
- -0.25139f, -0.54517f, 0.13469f, -0.38157f, -0.39109f, -0.18205f, 0.06834f,
- -0.08395f, -0.92187f, 0.56724f, 1.44381f, 0.53226f, -0.22356f, 0.12285f,
- -0.29418f, -1.86749f, -0.22372f, -0.60204f, -0.87746f, -1.16936f, 0.56884f,
- 0.62641f, -0.11823f, 1.00395f, 1.64794f, -0.64535f, 2.29322f, -0.23397f,
- 0.17251f, -0.35927f, 0.65631f, -0.26812f, 0.80128f, 0.85748f, 0.47404f,
- 2.20547f,
-};
-
-static const float av1_tx_type_nn_bias_8x16_ver_layer1[4] = {
- -0.44080f,
- -1.67455f,
- -1.46332f,
- -6.13206f,
-};
-
-static const NN_CONFIG av1_tx_type_nnconfig_8x16_ver = {
- 8, // num_inputs
- 4, // num_outputs
- 1, // num_hidden_layers
- {
- 16,
- }, // num_hidden_nodes
- { av1_tx_type_nn_weights_8x16_ver_layer0,
- av1_tx_type_nn_weights_8x16_ver_layer1 },
- { av1_tx_type_nn_bias_8x16_ver_layer0, av1_tx_type_nn_bias_8x16_ver_layer1 }
-};
-/******************************************************************************/
-
-// Tx type model for 16x8 block.
-static const float av1_tx_type_nn_weights_16x8_hor_layer0[128] = {
- 0.02600f, 0.09786f, -1.05107f, -0.35594f, -0.15658f, 2.99828f, -0.07106f,
- -0.10101f, -0.14412f, -0.83790f, -0.19434f, 2.28368f, 1.91727f, -0.00956f,
- -0.90640f, 0.09174f, 1.58895f, 1.38945f, 1.49431f, 1.51381f, 1.44803f,
- 1.53544f, 1.44694f, 0.17753f, 1.69735f, -0.78652f, 0.31092f, -0.23736f,
- 0.02231f, -0.09884f, -0.00493f, 1.21189f, -1.94382f, -0.34629f, -0.58309f,
- 0.72291f, -0.30056f, 0.90660f, -0.57495f, 3.07809f, 0.73644f, 1.43050f,
- 1.34356f, -0.66554f, 0.50102f, -0.64305f, 0.42044f, -1.66165f, -0.05733f,
- -2.51402f, -1.01067f, -0.33390f, -0.32986f, -0.92431f, 1.86281f, -0.07290f,
- -0.26290f, -0.68941f, 1.81156f, 0.66125f, -2.09974f, 0.17032f, -0.67461f,
- -0.00876f, -1.50154f, 1.17153f, 1.00377f, 0.33022f, 0.74689f, 0.42878f,
- 0.61725f, -0.83967f, 0.09467f, -0.39892f, 0.33863f, 0.10656f, -0.09249f,
- -0.39757f, 0.48481f, -0.35162f, 1.47014f, 1.67827f, -1.84051f, 0.16291f,
- -0.50135f, -2.29911f, -0.42217f, -0.13358f, 1.45899f, -0.14743f, -0.02763f,
- -0.28003f, -0.01364f, 0.21014f, -0.29026f, -0.20198f, 1.38782f, 0.56731f,
- 0.27489f, 0.43227f, 0.41326f, 0.42721f, 0.87720f, -1.90067f, -5.04951f,
- -0.17638f, -0.58119f, -0.08954f, -0.13692f, -0.12325f, -0.38548f, 0.66462f,
- -1.42377f, -1.21917f, -1.38193f, -1.36539f, -1.39378f, -1.19629f, -1.59812f,
- 0.28689f, 0.32394f, 0.52128f, 0.01013f, -0.28948f, -0.26293f, -0.44331f,
- -0.36570f, -0.50757f,
-};
-
-static const float av1_tx_type_nn_bias_16x8_hor_layer0[16] = {
- -0.08696f, -0.22110f, -1.43604f, -1.00451f, -1.51029f, 0.63736f,
- 0.45260f, 0.16229f, 4.01393f, -0.21748f, 0.36411f, -0.08764f,
- -0.12329f, 0.08986f, 1.08117f, -0.00220f,
-};
-
-static const float av1_tx_type_nn_weights_16x8_hor_layer1[64] = {
- 0.55824f, -0.14648f, 0.81947f, -0.45867f, -1.86078f, -0.17291f, 0.34849f,
- 0.15153f, 1.75625f, -0.25760f, 0.72015f, -0.30059f, -0.57975f, 0.07609f,
- -0.02036f, 0.07912f, 0.57080f, -0.13792f, 0.74184f, -0.87669f, -1.87572f,
- -0.27270f, 0.39751f, 0.19652f, 2.03514f, -0.32944f, 0.76251f, 0.04399f,
- -0.63175f, 0.37420f, 0.08309f, 0.04466f, 0.60255f, -0.12820f, 1.66065f,
- -0.59496f, -1.94794f, -0.14847f, 0.39424f, 0.16273f, 1.80587f, 0.41197f,
- 0.74691f, -0.21217f, -0.63173f, 0.09510f, -0.35538f, -0.04407f, 0.92847f,
- 0.20141f, 1.68680f, -0.56528f, -2.26960f, 0.12978f, 0.73748f, 0.42438f,
- 2.00673f, -0.40189f, 0.95423f, 0.23234f, -0.80953f, 0.65814f, 0.49444f,
- -0.23347f,
-};
-
-static const float av1_tx_type_nn_bias_16x8_hor_layer1[4] = {
- 3.57175f,
- 2.42612f,
- 3.31259f,
- 2.08287f,
-};
-
-static const NN_CONFIG av1_tx_type_nnconfig_16x8_hor = {
- 8, // num_inputs
- 4, // num_outputs
- 1, // num_hidden_layers
- {
- 16,
- }, // num_hidden_nodes
- { av1_tx_type_nn_weights_16x8_hor_layer0,
- av1_tx_type_nn_weights_16x8_hor_layer1 },
- { av1_tx_type_nn_bias_16x8_hor_layer0, av1_tx_type_nn_bias_16x8_hor_layer1 }
-};
-
-static const float av1_tx_type_nn_weights_16x8_ver_layer0[128] = {
- 0.46633f, 1.55328f, -0.11230f, -0.29571f, 0.18814f, -1.52430f, -2.34660f,
- 0.08644f, -1.97718f, -1.29140f, -1.12262f, -1.12985f, -1.25911f, -0.96506f,
- -1.57129f, 0.96021f, 1.34192f, 1.28623f, 1.21655f, 1.28758f, 1.25482f,
- 1.30195f, 1.19190f, 0.09310f, 0.52072f, 0.91487f, 1.24100f, 1.61236f,
- 1.72166f, 2.20750f, 1.62379f, -1.43936f, 0.50665f, 0.40213f, 0.66502f,
- -1.66699f, -3.07618f, 0.05877f, 0.60987f, -0.09995f, -0.10916f, 0.48049f,
- 0.23812f, 0.39847f, -0.21682f, -0.63455f, 0.33453f, -0.67939f, -4.14355f,
- -0.62756f, -0.22502f, -0.17215f, 0.01062f, 0.27049f, -0.10748f, 0.30945f,
- 2.72445f, -0.89181f, -0.06800f, 0.20595f, -0.73385f, 0.04071f, -1.30294f,
- 1.83507f, 0.92570f, 0.69609f, 0.76285f, 0.69892f, 0.76409f, 0.63104f,
- 0.73397f, 1.09575f, -0.20129f, -0.24022f, -0.24599f, -0.59107f, -0.88755f,
- -0.68987f, -0.75495f, -1.31002f, -1.30237f, -0.94093f, -2.15678f, -1.49303f,
- -1.17498f, -1.39952f, -0.91270f, -0.05587f, 1.02381f, -0.75580f, -0.65263f,
- -0.78996f, -0.71075f, -0.71018f, -0.70350f, -1.26196f, 2.34208f, -0.53611f,
- 0.19752f, -0.16842f, -0.24828f, 0.21857f, 0.08222f, -2.55894f, -1.75702f,
- 0.11394f, 1.03083f, 0.79972f, -1.54112f, -1.82341f, -0.57597f, -0.02077f,
- -0.39616f, -0.00995f, -0.12809f, 0.01188f, -0.25117f, 0.09202f, 0.09336f,
- -0.05614f, -0.30039f, 0.25834f, 1.19944f, 1.22533f, 0.92330f, 0.75967f,
- -0.81945f, -0.41647f,
-};
-
-static const float av1_tx_type_nn_bias_16x8_ver_layer0[16] = {
- 0.17841f, 0.67315f, -1.24450f, 3.13859f, 0.16203f, -0.14992f,
- 0.29553f, -1.15567f, -0.71421f, 1.15977f, 1.14585f, 3.02460f,
- -0.04510f, 0.48000f, -0.09354f, -0.42422f,
-};
-
-static const float av1_tx_type_nn_weights_16x8_ver_layer1[64] = {
- 0.29912f, -0.10009f, -1.11478f, 1.76812f, -0.27719f, 0.52148f, 0.17622f,
- -1.17116f, 0.73397f, -0.69279f, -0.11080f, 1.53751f, -1.42003f, 0.14731f,
- 0.13592f, -0.04883f, 0.39186f, -0.13655f, -0.43994f, 1.82759f, -0.25601f,
- -0.15018f, 0.51920f, -1.56070f, 0.31683f, -0.79367f, -0.02904f, 1.28637f,
- -1.15203f, 0.26627f, 0.42828f, -0.24258f, 0.38647f, -0.83352f, 0.32553f,
- 2.09522f, -0.26822f, -0.42191f, 0.32825f, -1.30748f, 1.50551f, -0.52669f,
- 0.20045f, 1.69318f, -1.47839f, 0.30802f, -0.07290f, -0.28106f, 0.68192f,
- -0.15522f, 1.12579f, 2.21921f, 0.09720f, -0.50265f, 0.83165f, -1.31721f,
- 0.72422f, -1.24952f, 0.61653f, 2.04117f, -1.42406f, 0.52568f, -0.46180f,
- -0.00873f,
-};
-
-static const float av1_tx_type_nn_bias_16x8_ver_layer1[4] = {
- 3.34981f,
- 3.74710f,
- 1.38339f,
- 0.45176f,
-};
-
-static const NN_CONFIG av1_tx_type_nnconfig_16x8_ver = {
- 8, // num_inputs
- 4, // num_outputs
- 1, // num_hidden_layers
- {
- 16,
- }, // num_hidden_nodes
- { av1_tx_type_nn_weights_16x8_ver_layer0,
- av1_tx_type_nn_weights_16x8_ver_layer1 },
- { av1_tx_type_nn_bias_16x8_ver_layer0, av1_tx_type_nn_bias_16x8_ver_layer1 }
-};
-/******************************************************************************/
-
-// Tx type model for 16x16 block.
-static const float av1_tx_type_nn_weights_16x16_layer0[128] = {
- 1.26592f, 1.36313f, 1.30956f, 1.29926f, 1.48816f, 1.68851f, 1.32000f,
- 0.13321f, -0.22477f, -0.88906f, -0.19622f, 1.69605f, 1.22180f, -1.57771f,
- -1.15765f, 0.05710f, -1.13355f, -0.85486f, -0.99971f, -0.91571f, -1.06031f,
- -0.77952f, -1.15723f, 1.17809f, 1.35602f, -0.05243f, -0.37596f, 0.26108f,
- 0.17611f, -0.10323f, 0.77279f, -0.48911f, -0.79308f, 0.55112f, 0.43918f,
- 0.27872f, 0.28714f, 0.45830f, 1.05689f, 0.03705f, -2.49975f, -0.01940f,
- 0.05709f, 0.07942f, -0.13290f, -0.10359f, 0.00143f, 0.37303f, 0.96470f,
- 0.53293f, 1.14459f, 0.89185f, 0.43378f, 0.47764f, 0.90924f, 0.15279f,
- -0.15361f, 0.02949f, 0.42240f, 0.68143f, 0.89588f, 0.73754f, 0.10974f,
- 1.57755f, -0.39870f, -0.32914f, 0.35638f, 0.34991f, -0.00003f, -0.23373f,
- 0.29630f, -0.76699f, -0.01356f, 0.04234f, 0.84253f, 1.92078f, 0.93160f,
- 0.71993f, 0.71604f, 0.76455f, -1.59782f, 0.32332f, 1.11628f, 0.33062f,
- -0.03728f, -0.05710f, 0.80447f, -0.14719f, 1.34658f, -0.05718f, 0.64015f,
- 0.21926f, 0.41653f, 0.12720f, 0.54092f, 1.39411f, 1.81819f, -0.24513f,
- 0.00955f, 0.38011f, -0.57787f, -0.41759f, 0.68834f, -0.31783f, -0.40607f,
- -0.10107f, -0.79374f, 0.75599f, -0.16282f, -0.14490f, -0.20783f, -0.55019f,
- -0.13793f, -0.22293f, 0.18305f, 0.12445f, 0.56830f, 0.24567f, 0.09278f,
- 0.70803f, 0.35803f, -1.52676f, -0.89624f, 0.77665f, 0.19877f, 0.77175f,
- 0.50355f, 0.08592f,
-};
-
-static const float av1_tx_type_nn_bias_16x16_layer0[16] = {
- -1.31834f, 0.14346f, -0.10062f, 0.84489f, 0.95617f, -0.06720f,
- -0.68502f, -0.91442f, -0.31932f, 0.25276f, -0.15138f, -1.57661f,
- -0.14062f, -0.42120f, 0.94573f, -0.09287f,
-};
-
-static const float av1_tx_type_nn_weights_16x16_layer1[64] = {
- -1.80333f, -1.06353f, 0.55139f, 0.74644f, 0.13747f, -0.93018f, -0.10286f,
- 0.67133f, 0.24460f, 1.44583f, 0.02173f, 0.26037f, -0.73687f, 0.19566f,
- 0.61846f, -0.58601f, -1.03196f, -0.74415f, 0.30041f, -0.41967f, 1.08740f,
- 0.96224f, -0.59139f, 0.03813f, 0.05403f, 1.33427f, -0.54375f, -1.92181f,
- 0.54704f, 0.13608f, 0.22151f, -0.38076f, 1.18390f, -0.77508f, -1.84283f,
- 1.00894f, 0.62318f, -0.15296f, 1.27600f, 0.22822f, 0.12751f, 0.93910f,
- -0.28502f, 0.53912f, -0.96889f, 0.10182f, 0.81508f, -0.43028f, 2.67386f,
- 0.52204f, 0.49820f, -0.41711f, 1.05038f, 1.12192f, 0.74349f, -0.75417f,
- -0.03718f, -0.35769f, 0.89651f, 0.63236f, 0.54215f, -0.07894f, 0.48274f,
- 1.08829f,
-};
-
-static const float av1_tx_type_nn_bias_16x16_layer1[4] = {
- 0.81986f,
- 1.26865f,
- 0.11118f,
- 2.48404f,
-};
-
-static const NN_CONFIG av1_tx_type_nnconfig_16x16 = {
- 8, // num_inputs
- 4, // num_outputs
- 1, // num_hidden_layers
- {
- 16,
- }, // num_hidden_nodes
- {
- av1_tx_type_nn_weights_16x16_layer0,
- av1_tx_type_nn_weights_16x16_layer1,
- },
- {
- av1_tx_type_nn_bias_16x16_layer0,
- av1_tx_type_nn_bias_16x16_layer1,
- },
-};
-/******************************************************************************/
-
-// Tx type model for 4x16 block.
-static const float av1_tx_type_nn_weights_4x16_hor_layer0[32] = {
- 0.36539f, 0.25667f, 0.01491f, -0.21959f, 2.55105f, 0.17615f, 1.79884f,
- 1.65936f, -0.44363f, 0.00706f, -0.68004f, -0.64360f, 1.75760f, 1.91906f,
- 1.47682f, 0.09650f, -3.59244f, -0.35004f, 0.93295f, 0.25806f, -0.08154f,
- 0.79332f, 0.79535f, 1.09467f, 1.57855f, -0.51359f, 0.90553f, -1.67744f,
- -1.74563f, -0.88830f, -1.77603f, 2.15935f,
-};
-
-static const float av1_tx_type_nn_bias_4x16_hor_layer0[8] = {
- -0.36435f, -2.22731f, -0.00837f, -1.34546f,
- 0.62806f, -0.20675f, 4.91940f, -0.56079f,
-};
-
-static const float av1_tx_type_nn_weights_4x16_hor_layer1[32] = {
- -0.57191f, -1.46418f, 0.67331f, -1.15027f, 0.46288f, 0.81251f, 2.51768f,
- -0.27147f, 0.00761f, -2.15214f, -0.69650f, -0.50808f, 0.92832f, 0.45668f,
- 2.34201f, -0.52941f, 0.51008f, -1.55496f, -0.01371f, -0.12356f, 0.66624f,
- 0.88043f, 2.64862f, -1.28024f, -0.17578f, -1.80034f, -0.32217f, 0.89519f,
- 1.28413f, -0.30326f, 2.45329f, -0.83335f,
-};
-
-static const float av1_tx_type_nn_bias_4x16_hor_layer1[4] = {
- 2.33198f,
- 3.36245f,
- 1.62603f,
- 2.91056f,
-};
-
-static const NN_CONFIG av1_tx_type_nnconfig_4x16_hor = {
- 4, // num_inputs
- 4, // num_outputs
- 1, // num_hidden_layers
- {
- 8,
- }, // num_hidden_nodes
- { av1_tx_type_nn_weights_4x16_hor_layer0,
- av1_tx_type_nn_weights_4x16_hor_layer1 },
- { av1_tx_type_nn_bias_4x16_hor_layer0, av1_tx_type_nn_bias_4x16_hor_layer1 }
-};
-
-static const float av1_tx_type_nn_weights_4x16_ver_layer0[128] = {
- 1.61392f, 1.41239f, 1.47646f, 1.47325f, 1.46110f, 1.49208f, 1.49414f,
- 0.12835f, -0.76986f, 0.07087f, -0.24572f, -0.93168f, 3.07935f, -0.18183f,
- -0.09831f, -0.07703f, -0.03222f, -0.25473f, -0.06090f, 2.93713f, -0.38711f,
- -0.12884f, -0.18329f, -0.06262f, -0.00327f, -0.02930f, -0.01641f, -0.00622f,
- -0.03305f, -4.07069f, -2.76643f, 0.04413f, -1.03176f, -0.19217f, -0.44980f,
- -2.48615f, -2.58112f, -0.87695f, 0.16187f, -0.04891f, -0.06854f, 1.08104f,
- 0.75245f, 1.49302f, 0.63363f, 1.45715f, 0.92574f, 1.72029f, 0.33326f,
- 3.86646f, 0.04422f, 0.41019f, 0.36212f, 0.56600f, -1.01552f, 0.05128f,
- 0.40454f, -1.05100f, -0.47461f, -1.33168f, -0.46145f, -1.36870f, -0.88838f,
- -1.05358f, -0.18537f, -0.34357f, -0.03698f, 0.68905f, 0.41010f, 0.31223f,
- -0.43382f, -0.74715f, 2.03366f, -0.30419f, 0.45747f, 0.09526f, 0.31678f,
- 0.22915f, 0.21832f, 1.26385f, -0.06814f, -0.71417f, -1.18947f, 0.03762f,
- 0.10936f, 2.97396f, -0.42638f, -0.03123f, -5.49756f, -0.17029f, -0.11323f,
- 0.05173f, -0.44274f, -0.15738f, 0.11311f, 0.43872f, 0.16837f, -0.52849f,
- 2.90050f, -0.54735f, -0.29591f, 1.24030f, 0.21696f, -0.04443f, -1.60877f,
- -1.36365f, -1.27432f, -1.52060f, -1.34397f, -1.13371f, -1.87554f, 0.80123f,
- 0.42820f, -0.14157f, -2.73963f, -0.68040f, -0.35236f, 0.14490f, 2.23477f,
- 0.01370f, -0.20426f, -1.51411f, -0.72293f, 0.64516f, 0.97638f, 0.32616f,
- -0.27975f, -0.01149f,
-};
-
-static const float av1_tx_type_nn_bias_4x16_ver_layer0[16] = {
- -1.37863f, -0.05763f, -0.07041f, 0.15306f, 0.96026f, -1.42105f,
- -0.55822f, 1.04845f, -0.17662f, -1.25345f, -0.11927f, 0.49845f,
- -0.32530f, 0.73483f, 0.08322f, -0.23890f,
-};
-
-static const float av1_tx_type_nn_weights_4x16_ver_layer1[64] = {
- 0.27194f, 0.50607f, 0.49229f, -0.48192f, 0.15667f, -1.38891f, 0.38102f,
- -0.58825f, -0.07337f, -0.52909f, 0.36975f, 0.28710f, 0.34992f, -0.73630f,
- 0.30386f, -0.58822f, 0.36127f, 0.57950f, 0.55878f, -0.42796f, 0.19967f,
- -1.45517f, 0.42529f, -0.54630f, -0.38169f, -0.84899f, 0.41622f, 0.46935f,
- 0.39077f, -0.75448f, 0.31698f, -0.76187f, 0.97765f, 0.57052f, 0.55825f,
- -0.54273f, 0.20466f, -1.46347f, 0.41813f, -0.55019f, -0.19948f, -0.57982f,
- 0.41206f, 0.32373f, 0.38537f, -1.11657f, 0.32887f, -0.76911f, 1.12259f,
- 0.72163f, 0.82603f, 0.37786f, 0.34976f, -1.86642f, 0.59961f, -0.16329f,
- -0.36631f, -0.56814f, 0.60410f, 0.53158f, 0.56389f, -0.70508f, 0.51009f,
- -0.56513f,
-};
-
-static const float av1_tx_type_nn_bias_4x16_ver_layer1[4] = {
- 4.60896f,
- 4.53551f,
- 4.53124f,
- 4.27435f,
-};
-
-static const NN_CONFIG av1_tx_type_nnconfig_4x16_ver = {
- 8, // num_inputs
- 4, // num_outputs
- 1, // num_hidden_layers
- {
- 16,
- }, // num_hidden_nodes
- { av1_tx_type_nn_weights_4x16_ver_layer0,
- av1_tx_type_nn_weights_4x16_ver_layer1 },
- { av1_tx_type_nn_bias_4x16_ver_layer0, av1_tx_type_nn_bias_4x16_ver_layer1 }
-};
-/******************************************************************************/
-
-// Tx type model for 16x4 block.
-static const float av1_tx_type_nn_weights_16x4_hor_layer0[128] = {
- 1.45347f, -0.15743f, 0.44236f, 0.25808f, 0.33944f, 0.38678f, 0.24428f,
- 1.67287f, 0.09539f, -0.42940f, -0.31507f, -0.00154f, -2.98755f, -2.27744f,
- -0.49183f, 0.09333f, -0.99026f, -0.22157f, 0.53701f, 0.60447f, 0.15686f,
- -0.04646f, 0.26341f, 2.12361f, 0.27090f, -1.14716f, -0.64146f, -0.91604f,
- -0.75335f, -0.60056f, -1.25084f, 1.68473f, -3.24075f, -4.03867f, -2.07877f,
- -0.02347f, 0.00333f, -0.01259f, -0.00465f, 0.02526f, 0.36286f, -0.10324f,
- 2.12780f, -0.74584f, -1.05052f, 1.78467f, -0.55065f, -0.03326f, 2.46781f,
- 1.18349f, 0.96015f, 1.01696f, 1.10584f, 1.07263f, 1.11531f, -1.06413f,
- 0.32389f, -1.87360f, -0.14435f, 1.77926f, 1.09966f, -0.12680f, -0.61386f,
- -0.09724f, -0.33095f, 1.12122f, 1.00791f, 1.52416f, 1.35004f, 1.32657f,
- 0.60950f, -1.13538f, -0.38654f, 0.06473f, 2.10669f, 0.27734f, -0.38359f,
- -1.91455f, -1.22676f, 0.05786f, 0.97432f, 2.19967f, 0.50457f, 0.78976f,
- 0.95183f, -0.32414f, 0.49437f, -0.04506f, 0.18993f, -0.07971f, 0.23889f,
- -0.09872f, -0.66036f, 0.05377f, 2.69638f, -0.08259f, -0.69210f, -1.08296f,
- -1.96504f, -2.31947f, -0.80161f, -0.80456f, -1.35556f, -0.05323f, -4.42658f,
- -0.30732f, -0.12043f, 0.11126f, 0.10771f, -0.14956f, -0.02218f, 0.41016f,
- 1.16599f, 1.14629f, 1.12881f, 1.18676f, 1.24677f, 1.28695f, 1.11270f,
- 0.08233f, 1.75440f, 0.49228f, -0.34858f, -0.17032f, 0.29288f, 0.47175f,
- 0.19055f, -1.56413f,
-};
-
-static const float av1_tx_type_nn_bias_16x4_hor_layer0[16] = {
- -1.71227f, 0.47291f, -0.97536f, -0.66216f, 0.11729f, -0.21451f,
- 2.75281f, 0.04318f, 2.03965f, 0.14618f, -0.70483f, -0.24517f,
- 1.14048f, 0.33308f, -1.10886f, 0.41184f,
-};
-
-static const float av1_tx_type_nn_weights_16x4_hor_layer1[64] = {
- -1.17079f, 0.19096f, -1.05753f, -0.30803f, -1.21680f, -0.67255f, 1.60115f,
- 0.05972f, 1.44759f, -0.04068f, -0.26331f, 0.31400f, 0.96923f, 0.33443f,
- -0.77215f, -0.91316f, -1.78928f, 0.21483f, -1.24008f, -0.46190f, -0.12127f,
- -0.62144f, 1.37593f, 0.08373f, 1.56215f, 0.00279f, -0.14556f, 0.38710f,
- 0.96228f, 0.66433f, -0.51798f, -0.80738f, -0.18539f, 0.19377f, -1.03090f,
- -1.51044f, -0.59485f, -0.62589f, 1.90742f, 0.09078f, 1.49113f, 0.00205f,
- -0.15918f, 0.40827f, 1.08553f, 0.43431f, 0.33519f, -1.12669f, -1.10274f,
- 0.80004f, -1.83599f, -0.53134f, 2.00515f, -0.32670f, 1.37124f, 0.51136f,
- 1.62563f, 0.24787f, 0.31757f, 0.81751f, 1.57262f, 0.83214f, 1.04661f,
- -0.43819f,
-};
-
-static const float av1_tx_type_nn_bias_16x4_hor_layer1[4] = {
- 2.32575f,
- 2.75703f,
- 1.12304f,
- 2.15567f,
-};
-
-static const NN_CONFIG av1_tx_type_nnconfig_16x4_hor = {
- 8, // num_inputs
- 4, // num_outputs
- 1, // num_hidden_layers
- {
- 16,
- }, // num_hidden_nodes
- { av1_tx_type_nn_weights_16x4_hor_layer0,
- av1_tx_type_nn_weights_16x4_hor_layer1 },
- { av1_tx_type_nn_bias_16x4_hor_layer0, av1_tx_type_nn_bias_16x4_hor_layer1 }
-};
-
-static const float av1_tx_type_nn_weights_16x4_ver_layer0[32] = {
- 0.26047f, 0.99930f, 1.16484f, -0.28196f, -2.67483f, -0.21456f, -0.16854f,
- 0.46375f, 1.47951f, 1.13735f, 1.12356f, 0.27385f, 0.50978f, 2.09967f,
- -1.47386f, 0.01950f, -0.06362f, 0.26014f, 1.04544f, -0.03099f, 0.07478f,
- -0.39701f, 0.05545f, 2.73633f, -0.56305f, -0.02208f, -0.44517f, -0.00897f,
- -0.17967f, -0.96622f, 0.42635f, -1.04784f,
-};
-
-static const float av1_tx_type_nn_bias_16x4_ver_layer0[8] = {
- -0.52088f, 0.52844f, -1.03655f, -0.30974f,
- 2.59952f, -1.93604f, 0.00000f, 2.51787f,
-};
-
-static const float av1_tx_type_nn_weights_16x4_ver_layer1[32] = {
- 0.10916f, -0.21219f, -0.51340f, 0.69161f, 1.45988f, -1.36942f, -0.40899f,
- 1.05136f, -0.08486f, 0.10008f, -0.55304f, 0.88012f, 1.61177f, -1.64507f,
- 0.63428f, 1.15130f, -0.17287f, -0.18592f, -0.01143f, 0.88293f, 1.73326f,
- -1.63624f, 0.09359f, 1.18393f, 0.26531f, 0.22378f, 0.15170f, 1.06965f,
- 1.26814f, -1.93873f, -0.00768f, 1.58309f,
-};
-
-static const float av1_tx_type_nn_bias_16x4_ver_layer1[4] = {
- 2.34713f,
- 1.68667f,
- 1.25488f,
- 1.69812f,
-};
-
-static const NN_CONFIG av1_tx_type_nnconfig_16x4_ver = {
- 4, // num_inputs
- 4, // num_outputs
- 1, // num_hidden_layers
- {
- 8,
- }, // num_hidden_nodes
- { av1_tx_type_nn_weights_16x4_ver_layer0,
- av1_tx_type_nn_weights_16x4_ver_layer1 },
- { av1_tx_type_nn_bias_16x4_ver_layer0, av1_tx_type_nn_bias_16x4_ver_layer1 }
-};
-/******************************************************************************/
-
-// Map tx_size to its corresponding neural net model for tx type prediction.
-static const NN_CONFIG *av1_tx_type_nnconfig_map_hor[] = {
- &av1_tx_type_nnconfig_4x4_hor, // 4x4 transform
- &av1_tx_type_nnconfig_8x8_hor, // 8x8 transform
- &av1_tx_type_nnconfig_16x16, // 16x16 transform
- NULL, // 32x32 transform
- NULL, // 64x64 transform
- &av1_tx_type_nnconfig_4x8_hor, // 4x8 transform
- &av1_tx_type_nnconfig_8x4_hor, // 8x4 transform
- &av1_tx_type_nnconfig_8x16_hor, // 8x16 transform
- &av1_tx_type_nnconfig_16x8_hor, // 16x8 transform
- NULL, // 16x32 transform
- NULL, // 32x16 transform
- NULL, // 32x64 transform
- NULL, // 64x32 transform
- &av1_tx_type_nnconfig_4x16_hor, // 4x16 transform
- &av1_tx_type_nnconfig_16x4_hor, // 16x4 transform
- NULL, // 8x32 transform
- NULL, // 32x8 transform
- NULL, // 16x64 transform
- NULL, // 64x16 transform
-};
-
-static const NN_CONFIG *av1_tx_type_nnconfig_map_ver[] = {
- &av1_tx_type_nnconfig_4x4_ver, // 4x4 transform
- &av1_tx_type_nnconfig_8x8_ver, // 8x8 transform
- &av1_tx_type_nnconfig_16x16, // 16x16 transform
- NULL, // 32x32 transform
- NULL, // 64x64 transform
- &av1_tx_type_nnconfig_4x8_ver, // 4x8 transform
- &av1_tx_type_nnconfig_8x4_ver, // 8x4 transform
- &av1_tx_type_nnconfig_8x16_ver, // 8x16 transform
- &av1_tx_type_nnconfig_16x8_ver, // 16x8 transform
- NULL, // 16x32 transform
- NULL, // 32x16 transform
- NULL, // 32x64 transform
- NULL, // 64x32 transform
- &av1_tx_type_nnconfig_4x16_ver, // 4x16 transform
- &av1_tx_type_nnconfig_16x4_ver, // 16x4 transform
- NULL, // 8x32 transform
- NULL, // 32x8 transform
- NULL, // 16x64 transform
- NULL, // 64x16 transform
-};
-
-// Tx split model for 4x8 block.
-static const float av1_tx_split_nn_weights_4x8_layer0[8 * 16] = {
- 0.068650f, -0.732073f, -0.040361f, 0.322550f, -0.021123f, 0.212518f,
- -0.350546f, 0.435987f, -0.111756f, -0.401568f, 0.069548f, -0.313000f,
- 0.073918f, -0.373805f, -0.775810f, -0.124753f, 0.181094f, -0.602641f,
- -0.026219f, -0.350112f, 0.020599f, -0.311752f, -0.476482f, -0.669465f,
- -0.310921f, 0.348869f, -0.115984f, 0.154250f, 0.200485f, -0.016689f,
- 0.020392f, 0.413810f, 0.634064f, -0.627530f, 0.399178f, -0.012284f,
- 0.472030f, 0.091087f, -0.706100f, -0.447944f, -0.274226f, 0.445656f,
- 0.309339f, 0.505522f, 0.038496f, -0.152809f, 0.408684f, -0.068151f,
- 0.271612f, 0.353233f, -0.150365f, 0.075212f, -0.035096f, 0.346615f,
- 0.124382f, 0.477072f, 0.216288f, 0.070548f, -0.106362f, 0.681613f,
- -0.145502f, -0.218631f, -0.099248f, -0.001983f, -0.196819f, -0.969045f,
- 0.063009f, -0.123053f, 0.104875f, -0.137581f, -0.282933f, -0.003624f,
- -0.315659f, -0.333523f, -0.503000f, -0.100063f, -0.536711f, -0.059978f,
- -0.670248f, -0.353762f, 0.181109f, 0.289715f, -0.071206f, 0.261141f,
- 0.052796f, -0.114554f, -0.139214f, -0.261380f, 0.075984f, -0.647925f,
- -0.099528f, -0.677814f, 0.015712f, -0.389385f, -0.095622f, -0.165117f,
- -0.109454f, -0.175240f, -0.393914f, 0.212330f, 0.037822f, 0.248280f,
- 0.180197f, 0.110493f, -0.525727f, -0.092329f, -0.524029f, -0.407364f,
- -0.542373f, -0.435626f, -0.912194f, 0.062794f, 0.160433f, 0.741485f,
- -0.103659f, -0.119327f, -0.055275f, 0.334358f, 0.014713f, 0.046327f,
- 0.831114f, -0.576682f, 0.354369f, -0.082088f, 0.452331f, 0.039730f,
- -0.792429f, -0.385862f,
-};
-
-static const float av1_tx_split_nn_bias_4x8_layer0[16] = {
- 0.238621f, 2.186830f, 1.383035f, -0.867139f, 1.257119f, -0.351571f,
- -0.240650f, -0.971692f, 2.744843f, 1.116991f, 0.139062f, -0.165332f,
- 0.262171f, -1.598153f, -1.427340f, -1.602306f,
-};
-
-static const float av1_tx_split_nn_weights_4x8_layer1[16] = {
- -0.367134f, 1.373058f, -0.897039f, -0.326819f, -0.734030f, -0.290413f,
- -0.501249f, 0.505321f, -0.537692f, -0.767893f, 0.268697f, 0.278987f,
- 0.085082f, 0.614986f, 0.847904f, 0.637578f,
-};
-
-static const float av1_tx_split_nn_bias_4x8_layer1[1] = {
- 0.20586078f,
-};
-
-static const NN_CONFIG av1_tx_split_nnconfig_4x8 = {
- 8, // num_inputs
- 1, // num_outputs
- 1, // num_hidden_layers
- {
- 16,
- }, // num_hidden_nodes
- {
- av1_tx_split_nn_weights_4x8_layer0,
- av1_tx_split_nn_weights_4x8_layer1,
- },
- {
- av1_tx_split_nn_bias_4x8_layer0,
- av1_tx_split_nn_bias_4x8_layer1,
- },
-};
-/******************************************************************************/
-
-// Tx split model for 8x8 block.
-static const float av1_tx_split_nn_weights_8x8_layer0[144] = {
- 0.177983f, -0.938386f, -0.074460f, -0.221843f, -0.073182f, -0.295155f,
- -0.098202f, -0.279510f, 0.001054f, -0.119319f, -1.835282f, -0.581507f,
- -1.222222f, -1.049006f, -0.807508f, -0.454252f, -0.774879f, -0.180607f,
- -0.886976f, -0.231971f, -0.824677f, -0.351872f, -1.323819f, 0.235378f,
- 0.015331f, -0.341818f, 0.145549f, -0.348362f, 0.147647f, -0.323400f,
- 0.047558f, -0.553025f, -0.295485f, -0.330368f, -0.530605f, -0.407516f,
- 0.447740f, 0.782381f, -0.179164f, -0.584675f, -0.052645f, 0.038656f,
- -0.096783f, 0.038342f, -0.170762f, -0.405844f, -0.552665f, -0.509866f,
- 0.757204f, -1.296465f, 0.631015f, 0.009265f, 0.646192f, 0.044523f,
- 0.653161f, 0.033820f, 0.849639f, -0.068555f, -1.036085f, -0.511652f,
- 0.104693f, -1.458690f, 0.286051f, -0.089800f, 0.381564f, -0.302640f,
- 0.304465f, -0.268706f, 0.432603f, -0.117914f, -2.070031f, -0.565696f,
- -0.073027f, -1.783570f, -0.318144f, -0.320990f, -0.343966f, -0.140996f,
- -0.322977f, -0.232147f, -0.373210f, -0.158266f, -1.922305f, -0.634373f,
- 0.101894f, -0.221847f, 0.018412f, -0.423887f, -0.266684f, -0.444930f,
- -0.196237f, 0.106638f, -0.065834f, -0.538401f, -0.280772f, -0.620348f,
- 1.089957f, -0.799928f, 0.504112f, -0.165763f, 0.578741f, -0.172653f,
- 0.547316f, -0.143484f, 0.717220f, -0.297190f, -1.237854f, -0.074819f,
- -0.977304f, -0.484092f, -0.646427f, -0.451443f, -0.612126f, -0.224475f,
- -0.731608f, -0.257077f, -0.665857f, -0.346742f, -1.216372f, 0.227267f,
- 0.231249f, -1.693073f, -0.035899f, 0.380845f, -0.058476f, 0.409405f,
- -0.066679f, 0.406731f, -0.068501f, 0.396748f, 0.639462f, 0.150834f,
- -0.418659f, -1.421931f, 0.101889f, 0.083573f, 0.129746f, 0.134460f,
- 0.081185f, 0.127420f, 0.083664f, 0.051096f, 1.361688f, 0.386093f,
-};
-
-static const float av1_tx_split_nn_bias_8x8_layer0[12] = {
- 4.280443f, 2.218902f, -0.256953f, 3.161431f, 2.082548f, 2.506052f,
- 2.563224f, 1.421976f, -1.627813f, -1.436085f, 2.297265f, 1.500469f,
-};
-
-static const float av1_tx_split_nn_weights_8x8_layer1[12] = {
- 1.178833f, -0.428527f, -0.078737f, 0.381434f, -0.466895f, -0.901745f,
- -0.766968f, -0.356663f, 0.450146f, 0.509370f, -0.356604f, -0.443506f,
-};
-
-static const float av1_tx_split_nn_bias_8x8_layer1[1] = {
- -0.156294f,
-};
-
-static const NN_CONFIG av1_tx_split_nnconfig_8x8 = {
- 12, // num_inputs
- 1, // num_outputs
- 1, // num_hidden_layers
- {
- 12,
- }, // num_hidden_nodes
- {
- av1_tx_split_nn_weights_8x8_layer0,
- av1_tx_split_nn_weights_8x8_layer1,
- },
- {
- av1_tx_split_nn_bias_8x8_layer0,
- av1_tx_split_nn_bias_8x8_layer1,
- },
-};
-/******************************************************************************/
-
-// Tx split model for 8x16 block.
-static const float av1_tx_split_nn_weights_8x16_layer0[8 * 64] = {
- 0.374660f, 0.218905f, -0.139779f, 0.212141f, 0.056517f, 0.051114f,
- 0.042860f, -0.273258f, -0.340809f, 0.138983f, -0.216996f, -0.241519f,
- -0.123244f, 0.078577f, -0.472273f, -0.194201f, 0.125056f, 0.239761f,
- -0.332782f, 0.174782f, -0.211400f, -0.129795f, 0.062195f, 0.113176f,
- -0.008869f, 0.140764f, 0.059833f, 0.163826f, 0.359293f, -0.109797f,
- -0.022091f, -0.059536f, -0.188226f, 0.179709f, 0.031386f, 0.164790f,
- 0.214364f, 0.198555f, 0.152262f, -0.242980f, 0.319367f, -0.136902f,
- 0.046524f, -0.043591f, 0.342178f, -0.011757f, -0.014286f, 0.072871f,
- -0.278314f, -0.345303f, -0.252103f, -0.107154f, -0.235101f, -0.106739f,
- -0.120865f, -0.160042f, 0.240028f, 0.112902f, -0.141587f, -0.703012f,
- -0.136591f, 0.318993f, -0.154417f, -0.054668f, 0.192870f, 0.176166f,
- -0.029965f, 0.266942f, -0.178384f, 0.038680f, 0.134403f, -0.002426f,
- 0.534825f, -0.070923f, 0.413281f, 0.418148f, 0.093729f, 0.016454f,
- 0.305358f, -0.040512f, 0.069904f, -0.227588f, -0.362220f, -0.031604f,
- -0.394901f, 0.071506f, -0.342833f, -0.142550f, -0.164005f, 0.182600f,
- 0.213062f, 0.076805f, 0.278758f, 0.125613f, -0.035552f, 0.040971f,
- 0.182785f, -0.227961f, -0.105413f, -0.074949f, -0.084629f, -0.254767f,
- 0.114657f, 0.047121f, 0.195902f, 0.264759f, 0.017799f, 0.210230f,
- 0.150749f, -0.142142f, 0.182494f, -0.142415f, -0.259782f, -0.114830f,
- -0.198826f, 0.000061f, -0.375668f, -0.276656f, -0.373202f, 0.210298f,
- 0.422680f, 0.066960f, 0.351106f, -0.209034f, 0.367195f, -0.110274f,
- 0.115573f, -0.066642f, -0.389673f, -0.260447f, 0.056949f, -0.180425f,
- 0.069922f, -0.153506f, -0.097053f, -0.111757f, 0.094069f, 0.144837f,
- -0.052984f, -0.506681f, -0.034474f, 0.279057f, -0.105025f, 0.006656f,
- -0.125017f, -0.114096f, 0.103153f, -0.117402f, -0.359472f, 0.072534f,
- 0.110291f, 0.003088f, -0.456897f, 0.038331f, -0.322298f, 0.113942f,
- -0.119916f, -0.194392f, 0.093167f, 0.193459f, 0.074671f, 0.033602f,
- 0.004440f, -0.179578f, -0.036637f, -0.216172f, -0.296530f, -0.318992f,
- 0.319160f, -0.066218f, 0.291246f, 0.181292f, 0.089914f, 0.025273f,
- 0.303128f, 0.019063f, 0.078545f, -0.396919f, 0.014065f, -0.122121f,
- 0.037107f, -0.151886f, -0.299392f, -0.172207f, -0.124571f, -0.232553f,
- 0.102970f, -0.225040f, 0.061059f, -0.258188f, -0.469871f, -0.099607f,
- -0.061524f, -0.213700f, 0.070237f, -0.289134f, -0.238225f, 0.256403f,
- -0.119344f, 0.067782f, -0.398983f, -0.123975f, -0.200205f, -0.047038f,
- 0.026569f, 0.031037f, 0.094302f, -0.101239f, 0.433307f, -0.303612f,
- 0.088537f, -0.164436f, 0.202471f, -0.048592f, -0.251904f, 0.122577f,
- -0.309874f, -0.263405f, -0.292503f, 0.216589f, 0.035378f, 0.136599f,
- -0.145844f, -0.018211f, 0.174084f, -0.449941f, -0.001428f, 0.064134f,
- 0.039652f, 0.111083f, -0.246076f, -0.204733f, 0.056559f, -0.000123f,
- 0.104049f, 0.138512f, -0.128309f, 0.087855f, 0.232784f, 0.247138f,
- 0.162766f, 0.154829f, 0.313605f, -0.164115f, -0.050844f, 0.156549f,
- 0.185279f, -0.238962f, -0.308281f, -0.179592f, -0.193262f, 0.201670f,
- -0.203399f, -0.096831f, -0.127867f, 0.310674f, -0.008181f, 0.004078f,
- -0.211038f, -0.193480f, -0.185639f, -0.150202f, -0.204858f, -0.240758f,
- 0.114268f, -0.032535f, -0.052403f, -0.234333f, -0.064072f, -0.208444f,
- -0.352853f, -0.224001f, -0.156330f, 0.215436f, 0.171846f, 0.291849f,
- 0.108832f, 0.046991f, -0.127801f, 0.032485f, 0.141493f, 0.123319f,
- -0.057250f, 0.315346f, -0.061317f, -0.465086f, -0.130179f, -0.217841f,
- -0.239089f, -0.073251f, -0.327718f, 0.054905f, -0.283169f, -0.028900f,
- 0.071450f, 0.270072f, 0.248891f, 0.088052f, 0.253319f, 0.122808f,
- 0.175490f, -0.147805f, 0.089169f, -0.045457f, -0.330788f, 0.099791f,
- -0.137376f, -0.195977f, -0.350942f, -0.284930f, -0.559037f, 0.030504f,
- 0.162554f, -0.199100f, -0.050453f, -0.131320f, -0.077863f, -0.066253f,
- -0.379723f, -0.424047f, -0.081182f, -0.252261f, -0.102815f, 0.058240f,
- -0.182036f, 0.176772f, -0.070823f, 0.216054f, -0.211533f, -0.232992f,
- 0.279346f, 0.117984f, 0.236674f, 0.126625f, -0.046220f, 0.044919f,
- 0.278492f, 0.083944f, 0.180512f, 0.217994f, 0.401170f, -0.064417f,
- 0.011636f, -0.139597f, -0.050020f, -0.268438f, -0.032803f, 0.024908f,
- -0.085713f, -0.012984f, -0.055192f, -0.338657f, 0.045826f, -0.312849f,
- -0.023393f, -0.168800f, -0.030886f, -0.131816f, -0.253542f, -0.104812f,
- -0.354389f, 0.169464f, 0.094151f, -0.217122f, -0.456397f, 0.211478f,
- 0.219232f, -0.155519f, -0.353700f, -0.264759f, -0.034709f, 0.034409f,
- -0.148639f, -0.132850f, -0.216791f, -0.118492f, 0.173721f, -0.144181f,
- 0.335028f, 0.176439f, 0.105980f, 0.169390f, 0.155615f, -0.040618f,
- -0.176029f, 0.155569f, -0.184833f, -0.171099f, -0.178663f, -0.032051f,
- -0.434334f, 0.092238f, -0.263103f, 0.061804f, -0.172957f, 0.005962f,
- -0.100176f, 0.125898f, 0.048092f, -0.088141f, 0.247196f, -0.221601f,
- -0.114474f, -0.124410f, -0.156393f, -0.181782f, -0.083562f, 0.034937f,
- 0.403401f, -0.046200f, 0.322259f, 0.219678f, 0.109850f, 0.051837f,
- 0.196861f, -0.019118f, 0.248818f, -0.137567f, 0.127862f, 0.052293f,
- 0.298726f, 0.275788f, 0.015344f, 0.058714f, 0.283691f, -0.053794f,
- -0.123270f, -0.227761f, -0.141744f, -0.268515f, -0.007189f, -0.242117f,
- -0.252396f, -0.069017f, 0.034803f, -0.003388f, -0.262577f, 0.062115f,
- -0.298393f, 0.215415f, -0.153615f, 0.289902f, 0.085886f, -0.504290f,
- 0.077178f, 0.150861f, -0.228848f, -0.261020f, 0.198204f, 0.162113f,
- 0.346418f, -0.286950f, 0.354756f, -0.226419f, 0.024720f, 0.208037f,
- 0.107286f, -0.110849f, 0.104415f, -0.207725f, 0.063932f, -0.037748f,
- -0.167037f, -0.068282f, 0.320815f, -0.051884f, 0.099989f, -0.078388f,
- 0.127071f, 0.046675f, -0.336571f, -0.273080f, 0.264694f, -0.007352f,
- -0.093828f, 0.094773f, -0.144434f, 0.091795f, -0.031615f, 0.056914f,
- 0.064673f, -0.136669f, 0.344734f, 0.225926f, 0.283451f, -0.068354f,
- 0.030572f, 0.180784f, -0.378047f, -0.092962f, -0.083291f, 0.038970f,
- 0.052094f, -0.017932f, 0.216302f, -0.184396f, 0.079888f, 0.210406f,
- -0.020627f, 0.244744f, 0.336972f, -0.182914f, -0.220976f, -0.304225f,
- -0.330974f, -0.370868f, -0.084935f, -0.136489f, -0.210082f, -0.188088f,
- -0.408768f, 0.184693f,
-};
-
-static const float av1_tx_split_nn_bias_8x16_layer0[64] = {
- -0.274107f, 0.445751f, 0.234359f, 0.291593f, 0.163298f, 0.183707f,
- -0.548839f, -0.190779f, -0.163346f, -0.669028f, 0.399209f, -0.354974f,
- 0.000000f, -0.254630f, 0.220149f, 0.371104f, 0.789759f, 0.270300f,
- 0.195126f, -0.206958f, 0.917708f, -0.256232f, 1.131933f, 1.178944f,
- 0.461270f, 0.246169f, -0.818614f, -0.111986f, 0.759355f, 0.154889f,
- 0.470299f, -1.025250f, 0.678678f, 0.959346f, -0.164105f, 0.544079f,
- -0.448733f, 0.649221f, -0.536672f, 0.962758f, -0.256427f, 0.808664f,
- -0.118694f, 0.684873f, -0.015635f, -0.046469f, 0.075481f, 0.412647f,
- 0.454456f, -0.107169f, 0.775235f, -0.261629f, -1.194849f, 0.010093f,
- -0.231289f, 0.658286f, -0.769320f, 0.564545f, 0.482962f, -0.131378f,
- -0.255844f, -0.078400f, 0.476752f, 0.643001f,
-};
-
-static const float av1_tx_split_nn_weights_8x16_layer1[64] = {
- -0.145065f, -0.145101f, 0.174786f, 0.196692f, 0.102025f, -0.087735f,
- 0.386353f, -0.660539f, -0.183940f, 0.490045f, -0.276404f, -0.145669f,
- 0.209846f, -0.085574f, -0.156821f, -0.377450f, -0.950010f, 0.450709f,
- -0.108545f, -0.261181f, 1.435606f, -0.176621f, -1.158548f, 2.035680f,
- 0.218069f, -0.138629f, 0.305958f, -0.277194f, -0.602468f, 0.203873f,
- 0.120720f, 0.216095f, -0.434502f, -0.579746f, -0.239450f, 0.755529f,
- 0.545643f, 0.232091f, 0.330169f, 0.988136f, -0.070465f, -0.345584f,
- -0.162455f, -0.617064f, 0.123881f, -0.201098f, 0.222756f, 0.112932f,
- 0.048647f, -0.147890f, 0.394584f, -0.262148f, 0.280564f, -0.195432f,
- -0.047515f, 1.133410f, 0.255415f, -0.299032f, -0.397807f, -0.153246f,
- -0.256734f, 0.177370f, 0.213522f, -0.530158f,
-};
-
-static const float av1_tx_split_nn_bias_8x16_layer1[1] = {
- 0.14910713f,
-};
-
-static const NN_CONFIG av1_tx_split_nnconfig_8x16 = {
- 8, // num_inputs
- 1, // num_outputs
- 1, // num_hidden_layers
- {
- 64,
- }, // num_hidden_nodes
- {
- av1_tx_split_nn_weights_8x16_layer0,
- av1_tx_split_nn_weights_8x16_layer1,
- },
- {
- av1_tx_split_nn_bias_8x16_layer0,
- av1_tx_split_nn_bias_8x16_layer1,
- },
-};
-/******************************************************************************/
-
-// Tx split model for 16x16 block.
-static const float av1_tx_split_nn_weights_16x16_layer0[12 * 24] = {
- -0.177215f, -0.297166f, 0.299924f, 0.207878f, 0.216871f, 0.173264f,
- 0.295464f, 0.048395f, 0.154731f, 0.305880f, 0.056787f, -0.166617f,
- 0.115653f, -0.529477f, -0.073995f, -0.211746f, -0.018169f, 0.000788f,
- -0.024940f, -0.007055f, 0.001392f, 0.021678f, -1.594600f, -0.099593f,
- 0.332930f, 0.103574f, 0.158249f, 0.182601f, 0.332665f, 0.226207f,
- -0.139566f, 0.185531f, 0.099074f, -0.185654f, -0.203121f, -0.285678f,
- -0.313453f, -0.294452f, -0.143707f, -0.031265f, -0.453030f, -0.061874f,
- -0.066150f, -0.099058f, -0.458879f, 0.127544f, 0.338314f, -0.161350f,
- 0.030091f, -0.075528f, 0.004320f, 0.353690f, -0.013480f, -0.420402f,
- -0.004659f, -0.329401f, -0.001745f, 0.227384f, -0.055183f, 0.121405f,
- 0.160340f, 0.143603f, -0.221813f, 0.079107f, -0.657639f, -0.084348f,
- -0.303414f, 0.046774f, -0.367679f, 0.060005f, 0.168645f, 0.084421f,
- -0.133625f, 0.301375f, 0.079412f, -0.419303f, 0.017235f, 0.068637f,
- 0.018384f, -0.428325f, -0.019753f, 0.149444f, -0.474836f, -0.287162f,
- 0.198083f, 0.028292f, -0.299092f, -0.005849f, -0.256245f, 0.233277f,
- -0.217561f, -0.264003f, 0.269411f, 0.207032f, -0.339411f, -0.198431f,
- -0.028521f, 0.158076f, 0.177116f, 0.345702f, -0.145132f, 0.064623f,
- -0.090867f, 0.288816f, -0.263198f, -0.071028f, -0.044546f, 0.380017f,
- -0.014100f, -0.271192f, -0.318559f, 0.129015f, -0.050314f, -0.093355f,
- -0.578498f, 0.099090f, -0.133080f, -0.029975f, -0.059828f, -0.157765f,
- -0.321153f, -0.343671f, -0.242959f, 0.128304f, 0.017170f, 0.072787f,
- -0.475838f, -0.003806f, -0.068615f, 0.150556f, -0.159903f, -0.416513f,
- 0.218794f, -0.290456f, -0.084569f, -0.170014f, -0.044414f, -0.153069f,
- -0.077329f, -0.089747f, -0.096526f, 0.537952f, 0.134725f, -0.006469f,
- -0.323335f, -0.168183f, -0.107163f, -0.139954f, 0.011286f, -0.021712f,
- -0.513992f, 0.259135f, -0.319808f, 0.077811f, 0.104613f, 0.370571f,
- 0.185244f, 0.065530f, -0.091098f, -0.573741f, 0.111934f, 0.437417f,
- -0.123691f, 0.220641f, -0.024783f, -0.149460f, -0.354185f, -0.134127f,
- 0.038015f, -0.380596f, 0.250980f, 0.142208f, 0.135170f, -0.131129f,
- -0.357556f, -0.530945f, 0.159672f, -0.147025f, -0.377829f, -0.504508f,
- -0.492870f, 0.020753f, 0.142818f, 0.025172f, 0.086140f, 0.091283f,
- 0.087491f, -0.186415f, 0.177785f, -0.195121f, -1.191148f, -0.477102f,
- 0.023371f, 0.227004f, -0.023502f, -0.242913f, -0.074398f, -0.153480f,
- 0.162900f, 0.415509f, -0.162565f, -0.131709f, -0.258852f, -0.252027f,
- -0.080845f, -0.330274f, 0.021874f, 0.232398f, 0.069277f, 0.220567f,
- -0.024237f, -0.366771f, 0.081673f, -0.429906f, -0.302170f, 0.061045f,
- 0.352777f, -0.230376f, 0.408153f, 0.064758f, 0.142051f, 0.007219f,
- 0.622878f, 0.212577f, 0.036489f, 0.081150f, -0.284767f, 0.107763f,
- -0.529786f, -0.072190f, -0.300421f, -0.287959f, -0.568900f, 0.011547f,
- -0.131696f, -0.356854f, -0.587962f, -0.026598f, 0.405829f, 0.057565f,
- 0.414265f, -0.159155f, 0.221456f, 0.146314f, 0.265776f, -0.006516f,
- 0.473978f, -0.186431f, 0.288672f, -0.060437f, 0.083380f, -0.205641f,
- 0.360016f, 0.222041f, 0.420011f, 0.024579f, 0.377546f, 0.250380f,
- -0.069900f, 0.296743f, 0.073532f, -0.243225f, -0.374987f, -0.387288f,
- -0.237255f, -0.287013f, 0.417831f, -0.252988f, -0.257652f, -0.066775f,
- -0.253926f, 0.057841f, 0.346133f, -0.157797f, -0.406028f, -0.286893f,
- 0.274507f, -0.452561f, 0.143381f, -0.097755f, 0.021242f, 0.034561f,
- 0.044115f, 0.004065f, 0.066729f, 0.043558f, 0.102991f, -0.477574f,
-};
-
-static const float av1_tx_split_nn_bias_16x16_layer0[24] = {
- -0.479033f, 1.467402f, -0.366291f, 0.372511f, 0.715322f, -0.605500f,
- 0.176848f, 0.032318f, 0.237429f, -0.046047f, 0.452082f, 0.451805f,
- -0.822845f, 0.636762f, -0.057350f, 1.163978f, 0.728287f, 0.603654f,
- -0.245519f, -0.893569f, -1.428185f, 0.808870f, -0.076159f, 1.231976f,
-};
-
-static const float av1_tx_split_nn_weights_16x16_layer1[24] = {
- -0.176161f, 1.670188f, -0.180755f, -0.321326f, 0.249728f, -0.170504f,
- -0.538432f, 0.033893f, 0.149842f, 0.404140f, -0.377812f, 0.338838f,
- -0.176091f, 0.249844f, -0.362533f, 1.412460f, 0.196862f, 0.278194f,
- -0.140444f, 0.297746f, 0.172533f, 0.116470f, -0.151656f, -0.603250f,
-};
-
-static const float av1_tx_split_nn_bias_16x16_layer1[1] = {
- 0.184803f,
-};
-
-static const NN_CONFIG av1_tx_split_nnconfig_16x16 = {
- 12, // num_inputs
- 1, // num_outputs
- 1, // num_hidden_layers
- {
- 24,
- }, // num_hidden_nodes
- {
- av1_tx_split_nn_weights_16x16_layer0,
- av1_tx_split_nn_weights_16x16_layer1,
- },
- {
- av1_tx_split_nn_bias_16x16_layer0,
- av1_tx_split_nn_bias_16x16_layer1,
- },
-};
-/******************************************************************************/
-
-// Tx split model for 32x32 block.
-static const float av1_tx_split_nn_weights_32x32_layer0[12 * 32] = {
- -0.439303f, 0.004813f, -0.365052f, -0.116868f, -0.356716f, -0.196537f,
- -0.196770f, -0.076096f, 0.357004f, -0.044909f, -0.112910f, -0.129081f,
- 0.156725f, -0.386346f, 0.038971f, 0.160696f, 0.204923f, -0.384333f,
- -0.319546f, 0.028179f, -0.250524f, -0.289669f, -0.284138f, -0.258963f,
- -0.180854f, -0.000807f, -0.029620f, -0.353134f, 0.212408f, 0.141414f,
- 0.303016f, 0.098066f, 0.482455f, 0.036069f, -0.166279f, 0.210119f,
- -0.086337f, -0.023550f, -0.250796f, -0.183945f, -0.393856f, 0.170608f,
- -0.306403f, 0.026318f, -0.277296f, 0.092684f, -0.033584f, -0.018371f,
- -0.025043f, -0.257659f, -0.139163f, -0.206949f, -0.190105f, 0.028053f,
- 0.361851f, -0.364726f, -0.096771f, -0.184166f, -0.433228f, -0.182191f,
- -0.097051f, 0.259172f, 0.016432f, 0.259358f, 0.145059f, 0.037196f,
- 0.091581f, -0.219644f, 0.140384f, -0.446837f, -0.234531f, 0.149508f,
- -0.083429f, 0.186189f, -0.099890f, -0.111277f, 0.495214f, 0.085053f,
- -0.266613f, -0.051366f, 0.148593f, 0.111875f, 0.077787f, -0.371653f,
- -0.146157f, -0.229235f, 0.076203f, 0.488975f, 0.096771f, -0.009483f,
- 0.192985f, 0.246273f, -0.192671f, -0.557890f, -0.292650f, -0.088907f,
- -0.106892f, -0.329659f, 0.012105f, -0.359326f, 0.170723f, -0.004357f,
- 0.171593f, -0.478768f, -0.236016f, -0.035077f, 0.133731f, 0.137962f,
- -0.397926f, -0.155164f, -0.276709f, -0.186602f, -0.258301f, 0.036965f,
- -0.649359f, 0.127605f, 0.097930f, 0.182775f, -0.313324f, 0.053349f,
- 0.204203f, -0.222948f, -0.059008f, -0.049759f, -0.056848f, 0.087497f,
- -0.039987f, -0.055042f, -0.041623f, -0.078424f, -0.317291f, -0.191398f,
- 0.632147f, 0.221825f, 0.268394f, -0.096357f, 0.442545f, -0.007117f,
- -0.036125f, 0.000525f, 0.088092f, -0.203653f, 0.086925f, 0.439141f,
- 0.329889f, -0.370050f, -0.194306f, -0.207430f, 0.132779f, -0.217614f,
- -0.039444f, -0.053019f, -0.260725f, -0.116563f, -0.271048f, 0.283737f,
- -0.007300f, 0.062257f, -0.347865f, -0.296767f, -0.359123f, 0.230459f,
- -0.189117f, -0.087622f, -0.561091f, 0.184182f, -0.044980f, 0.012643f,
- 0.241672f, 0.050272f, -0.204851f, -0.159285f, -0.064081f, -0.118666f,
- -0.269471f, 0.231668f, 0.135749f, -0.131162f, 0.062760f, 0.100949f,
- 0.074967f, -0.056918f, 0.251707f, 0.034098f, 0.341290f, -0.105027f,
- 0.313246f, -0.092679f, -0.014632f, -0.390967f, 0.136881f, -0.241554f,
- 0.097674f, 0.110832f, -0.390245f, 0.017654f, -0.506222f, 0.065252f,
- 0.244834f, -0.171352f, -0.331702f, 0.111043f, 0.125217f, -0.058116f,
- -0.382595f, -0.052545f, 0.114261f, -0.493617f, 0.243984f, -0.171053f,
- 0.165009f, -0.063020f, 0.096502f, 0.341339f, -0.013443f, 0.056372f,
- 0.339284f, 0.398376f, 0.389409f, 0.257252f, 0.517368f, 0.078856f,
- 0.087716f, -0.171092f, 0.227461f, 0.125307f, -0.054423f, -0.143161f,
- 0.224041f, -0.086477f, -0.092548f, 0.072392f, -0.061608f, 0.258347f,
- 0.147033f, -0.478244f, -0.204869f, 0.038552f, -0.144563f, 0.224087f,
- -0.296705f, 0.153889f, -0.064624f, 0.085265f, -0.103826f, 0.127971f,
- 0.019965f, 0.111937f, -0.074187f, -0.029518f, -0.127305f, -0.012210f,
- 0.042714f, 0.070052f, -0.202360f, 0.348144f, -0.132097f, -0.209585f,
- -0.248286f, -0.065774f, -0.089482f, -0.133226f, 0.325430f, -0.013468f,
- -0.406090f, -0.144936f, 0.208620f, 0.343445f, -0.059639f, 0.114857f,
- -0.069431f, -0.218725f, 0.190575f, -0.368101f, 0.030030f, 0.062815f,
- -0.239369f, -0.537852f, 0.022487f, 0.023038f, 0.190788f, 0.040123f,
- -0.004304f, 0.060749f, -0.108929f, 0.136796f, -0.542875f, -0.227074f,
- -0.182244f, 0.082559f, 0.019149f, 0.178854f, 0.120284f, 0.009070f,
- 0.068268f, -0.544822f, 0.120536f, 0.354028f, -0.119890f, -0.122055f,
- -0.405335f, 0.122341f, -0.304412f, 0.062405f, -0.302568f, -0.276505f,
- -0.120915f, -0.221841f, 0.282007f, -0.253971f, 0.059517f, -0.144976f,
- 0.149391f, -0.047355f, -0.167742f, -0.392333f, -0.041132f, 0.342135f,
- 0.017485f, 0.021038f, -0.023728f, -0.192181f, -0.103996f, 0.092873f,
- -0.114365f, -0.397732f, -0.065421f, 0.053084f, 0.035201f, 0.053019f,
- -0.105377f, -0.039500f, 0.131904f, -0.123911f, -0.390328f, -0.125198f,
- -0.000126f, 0.014864f, -0.220187f, 0.084056f, -0.492155f, -0.164979f,
- 0.133592f, 0.121519f, -0.240813f, 0.186680f, 0.118673f, 0.235006f,
- -0.239894f, -0.185759f, -0.336992f, 0.209620f, -0.298845f, 0.127803f,
- -0.083992f, 0.194340f, -0.245378f, 0.212308f, 0.142512f, -0.163324f,
- 0.383495f, 0.291065f, 0.286620f, -0.239957f, 0.225127f, -0.174424f,
- 0.297231f, -0.045434f, 0.156444f, -0.184273f, -0.204567f, 0.202551f,
- 0.370019f, -0.073910f, 0.344897f, 0.063100f, 0.338547f, -0.099145f,
- 0.391863f, -0.214244f, -0.241734f, -0.281851f, -0.035133f, -0.153157f,
-};
-
-static const float av1_tx_split_nn_bias_32x32_layer0[32] = {
- 0.143343f, -0.021982f, -0.314939f, 0.170867f, -0.081248f, 0.125758f,
- -0.355762f, 0.279798f, 1.027712f, -0.434660f, 1.072005f, 0.668893f,
- -0.031216f, -0.528650f, 0.328349f, 0.543645f, -0.188810f, 0.221110f,
- -1.638637f, 0.058045f, -1.731105f, -0.444284f, 0.513693f, 0.890025f,
- 0.160288f, 0.393312f, 0.332856f, -0.080767f, 0.299822f, 0.235876f,
- 0.254942f, -0.017796f,
-};
-
-static const float av1_tx_split_nn_weights_32x32_layer1[32] = {
- -0.090326f, -0.267553f, -0.026071f, 0.100912f, 0.279137f, 0.079064f,
- -0.074885f, 0.053804f, 0.736810f, -0.031693f, -0.970514f, 0.174069f,
- 0.095940f, -0.065047f, 0.052911f, 0.176728f, -0.058274f, 0.148364f,
- -0.162210f, 0.093875f, -0.367663f, 0.020876f, 0.137280f, -1.099116f,
- 0.146854f, 0.075590f, 0.228534f, 0.141993f, 0.072143f, 0.101421f,
- -0.068547f, -0.154148f,
-};
-
-static const float av1_tx_split_nn_bias_32x32_layer1[1] = {
- 0.316622f,
-};
-
-static const NN_CONFIG av1_tx_split_nnconfig_32x32 = {
- 12, // num_inputs
- 1, // num_outputs
- 1, // num_hidden_layers
- {
- 32,
- }, // num_hidden_nodes
- {
- av1_tx_split_nn_weights_32x32_layer0,
- av1_tx_split_nn_weights_32x32_layer1,
- },
- {
- av1_tx_split_nn_bias_32x32_layer0,
- av1_tx_split_nn_bias_32x32_layer1,
- },
-};
-/******************************************************************************/
-
-// Tx split model for 64x64 block.
-static const float av1_tx_split_nn_weights_64x64_layer0[12 * 32] = {
- -0.006828f, 0.149944f, -0.017614f, -0.044599f, -0.024517f, 0.507698f,
- 0.001039f, 0.037164f, 0.015091f, -0.306620f, -0.162047f, -0.369440f,
- 0.396310f, 0.087121f, 0.208609f, -0.083068f, 0.493774f, 0.217682f,
- 0.377393f, 0.172879f, 0.397422f, 0.078919f, 0.741350f, 0.064169f,
- -0.099989f, -0.192983f, -0.278230f, -0.310048f, -0.439965f, -0.226698f,
- -0.436596f, -0.007551f, -0.396721f, 0.153570f, -0.190838f, -0.071869f,
- 0.048799f, -0.301301f, -0.005015f, 0.500480f, -0.030622f, -0.559095f,
- -0.032634f, -0.054160f, -0.056979f, -0.456545f, 0.306536f, -0.411323f,
- -0.005366f, -0.069496f, 0.019990f, 0.327931f, -0.002516f, 0.393190f,
- 0.001759f, 0.035093f, -0.030302f, -0.528984f, 0.174781f, 0.241462f,
- -0.415427f, -0.164502f, 0.143065f, -0.122595f, 0.082049f, -0.143346f,
- 0.055642f, -0.124701f, 0.004050f, -0.216235f, -2.681730f, 0.101658f,
- 0.381239f, 0.465936f, 0.331154f, 0.301708f, -0.360171f, 0.054886f,
- -0.118658f, 0.287921f, 0.277859f, 0.203784f, 0.247809f, 0.656924f,
- -0.354628f, 0.315081f, 0.105108f, -0.510179f, 0.059267f, 0.061386f,
- 0.076423f, 0.347119f, 0.100134f, 0.028402f, -0.118621f, -0.238689f,
- 0.080141f, -0.138863f, 0.009009f, -0.100526f, -0.138875f, 0.066992f,
- 0.005949f, 0.564336f, 0.046994f, 0.004655f, 0.366047f, 0.014695f,
- -0.146928f, -0.024665f, -0.440357f, -0.109395f, 0.527231f, -0.020925f,
- -0.227236f, -0.068141f, 0.282009f, 0.040192f, -0.267100f, 0.229228f,
- 0.133861f, 0.338706f, -0.030178f, -0.040919f, -0.026343f, -0.330338f,
- -0.066931f, -0.110580f, -0.072056f, 0.599457f, -0.020738f, 0.169200f,
- 0.836240f, -0.157548f, 0.386273f, 0.002404f, 0.329410f, -0.007020f,
- 0.351705f, -0.041259f, 0.388861f, 0.003899f, 0.582627f, 0.023572f,
- 0.409912f, -0.158472f, 0.536383f, 0.525093f, 0.604247f, 0.439159f,
- 0.692832f, 0.046272f, 0.590367f, -0.082166f, 0.262357f, 0.478671f,
- 0.031935f, 0.042675f, 0.120002f, 0.398616f, -0.078967f, 0.227986f,
- -0.044679f, 0.151061f, -0.085564f, 0.220205f, -0.265606f, -0.203623f,
- 0.204719f, -0.125922f, 0.038544f, -0.269379f, 0.025866f, 0.109967f,
- 0.019064f, -0.237297f, -0.309746f, -0.329118f, -0.278368f, -0.063859f,
- 0.278496f, 0.018620f, 0.209971f, 0.296250f, 0.142850f, 0.288689f,
- 0.137084f, 0.130517f, 0.128171f, -0.155396f, -0.008449f, -0.099845f,
- 0.173455f, -0.059909f, -0.147318f, 0.102851f, -0.251389f, -0.001448f,
- 0.103907f, 0.297273f, -0.027846f, 0.028260f, -0.382601f, 0.346695f,
- -0.601641f, 0.162366f, -0.477495f, -0.042731f, -0.387871f, -0.051791f,
- -0.401498f, -0.048446f, -0.456270f, -0.062287f, 0.493919f, 0.003008f,
- 0.099917f, -0.358525f, -0.094903f, -0.022811f, -0.062259f, 0.019455f,
- -0.050644f, 0.020041f, -0.132912f, -0.061578f, -3.083691f, -0.014961f,
- -0.129115f, -0.710559f, 0.157213f, -0.844037f, -0.121991f, -0.943386f,
- -0.231269f, -0.003462f, 0.331478f, -0.132703f, -1.285993f, -0.120957f,
- -0.373755f, -0.322609f, 0.309059f, -0.131523f, -0.118334f, -0.063805f,
- -0.104251f, 0.012166f, -0.094699f, -0.283753f, 0.128168f, -0.526929f,
- -0.050331f, 0.186153f, 0.005913f, -0.221236f, 0.036363f, 0.160909f,
- -0.001342f, -0.382749f, 0.037820f, 0.281689f, -0.024275f, 0.028854f,
- 0.318291f, 0.318526f, 0.035778f, 0.034031f, 0.189663f, -0.293367f,
- 0.082022f, 0.127923f, 0.078866f, -0.081361f, -0.268117f, 0.246675f,
- 0.248605f, -0.215479f, -0.073084f, 0.496140f, -0.067327f, 0.396237f,
- -0.120739f, 0.033752f, -0.044120f, -0.218941f, -0.028078f, 0.195132f,
- -0.040400f, 0.281604f, -0.100471f, 0.415207f, -0.258503f, -0.429749f,
- 0.150569f, -0.010859f, 0.136448f, 0.026589f, 0.148466f, 0.110764f,
- 0.380967f, 0.009177f, 0.103075f, 0.116417f, 0.226273f, -0.327746f,
- 0.169346f, 0.284553f, -0.094986f, 0.312745f, -0.147840f, 0.025062f,
- -0.494482f, 0.112388f, -0.213962f, 0.107050f, -0.433371f, -0.096276f,
- -0.244835f, -0.003518f, -0.459148f, -0.145080f, 0.017150f, 0.042846f,
- -0.237479f, 0.104746f, 0.158677f, 0.358937f, 0.099921f, 0.277109f,
- 0.012410f, -0.062897f, 0.116130f, 0.255309f, 0.341628f, 0.145002f,
- -0.429344f, -0.016433f, -0.068985f, 0.285194f, -0.286719f, -0.018298f,
- -0.179369f, -0.194655f, -0.165380f, 0.026071f, -0.428268f, -0.379929f,
- -0.727543f, 0.179610f, -0.963979f, -0.042026f, -0.616202f, 0.133401f,
- -0.784966f, 0.061205f, -0.713357f, 0.129795f, 0.120512f, -0.339545f,
- 0.353557f, 0.114906f, -0.329813f, -0.209987f, 0.085410f, 0.214313f,
- -0.122082f, 0.335770f, -0.020937f, 0.202456f, 0.289023f, -0.421186f,
- 0.337905f, 0.407663f, 0.132771f, 0.071734f, 0.213914f, 0.128595f,
- 0.302659f, -0.209501f, 0.217756f, 0.253079f, -0.089505f, -0.205614f,
-};
-
-static const float av1_tx_split_nn_bias_64x64_layer0[32] = {
- 0.296914f, -1.826816f, 0.346130f, 0.969520f, -0.528154f, 1.175862f,
- -0.075985f, -0.097323f, -0.233059f, 0.004846f, 0.401279f, -2.272435f,
- 0.086257f, 0.414162f, -0.194786f, -0.233887f, -0.113215f, -2.453546f,
- 0.861214f, 0.298361f, 0.267397f, -0.158557f, -0.119911f, -0.098134f,
- -0.339263f, 0.385871f, -0.678123f, 0.263218f, 0.251611f, -1.155773f,
- -0.365437f, 0.229255f,
-};
-
-static const float av1_tx_split_nn_weights_64x64_layer1[32] = {
- 0.502104f, -0.708023f, 0.419648f, 1.583418f, 0.419355f, -1.462981f,
- -0.439623f, 0.405691f, 0.823257f, 0.061654f, 0.750875f, 0.775031f,
- -0.387909f, 0.447385f, 0.284690f, 0.353262f, -0.224347f, 0.832864f,
- -1.708491f, -1.042447f, -0.272829f, 0.540640f, 0.310509f, 0.723745f,
- 0.245592f, -0.218417f, -0.597987f, -0.362301f, 0.702217f, -0.692614f,
- 0.207812f, 0.513560f,
-};
-
-static const float av1_tx_split_nn_bias_64x64_layer1[1] = { -0.2307045f };
-
-static const NN_CONFIG av1_tx_split_nnconfig_64x64 = {
- 12, // num_inputs
- 1, // num_outputs
- 1, // num_hidden_layers
- {
- 32,
- }, // num_hidden_nodes
- {
- av1_tx_split_nn_weights_64x64_layer0,
- av1_tx_split_nn_weights_64x64_layer1,
- },
- {
- av1_tx_split_nn_bias_64x64_layer0,
- av1_tx_split_nn_bias_64x64_layer1,
- },
-};
-/******************************************************************************/
-
-// Tx split model for 4x16 block.
-static const float av1_tx_split_nn_weights_4x16_layer0[8 * 16] = {
- -1.344184f, -1.454625f, -0.703110f, -0.140570f, -0.841536f, -0.068131f,
- -2.128968f, -0.655518f, 0.432180f, 0.879752f, -0.222211f, 0.061615f,
- -0.230969f, 0.569496f, 1.424188f, 0.598063f, -0.436005f, -0.737606f,
- -0.137875f, -0.085730f, -0.076512f, -0.583101f, -0.937377f, -0.203556f,
- -0.215797f, -0.015361f, -0.124098f, -0.411917f, 0.340441f, -0.331752f,
- -0.472607f, -0.097714f, -0.930572f, -1.354713f, -0.550724f, 0.176212f,
- -0.636060f, 0.183271f, -0.610212f, 0.345895f, -1.100906f, -1.605713f,
- 0.111888f, -0.140937f, 0.063013f, -0.013315f, -0.273472f, -0.255870f,
- 1.200328f, 0.274002f, 1.005776f, 0.322392f, 1.222373f, 0.158227f,
- 0.408810f, 0.145022f, 0.139842f, -1.249412f, 0.286672f, -0.635699f,
- 0.312562f, -0.495606f, -1.117034f, -0.085107f, -0.097484f, -0.341521f,
- -0.132199f, -0.863055f, 0.217579f, -1.161425f, -0.302087f, -1.357271f,
- -0.520724f, -1.211069f, -1.048729f, -0.333087f, -1.171527f, -0.280824f,
- -2.057684f, -0.228755f, 0.606278f, 0.101198f, -0.314847f, -1.303255f,
- -0.294964f, 1.301923f, 0.041712f, 0.077593f, -1.152746f, 0.495315f,
- -0.751566f, 0.230249f, -0.840661f, 0.100731f, 1.346269f, 0.649898f,
- -1.432258f, -0.456710f, -1.018123f, -0.348559f, -1.225226f, -0.170717f,
- -0.354072f, 0.068292f, -0.234168f, 0.277503f, 0.179134f, 0.907420f,
- 0.354626f, -0.627210f, 0.905779f, 0.512612f, 0.161190f, -0.843177f,
- 0.014953f, -0.354983f, 0.011116f, -0.429598f, -1.017138f, -0.211432f,
- 0.941840f, -0.281747f, 0.957776f, -0.541914f, 1.041880f, -0.433580f,
- -1.416451f, -0.166467f,
-};
-
-static const float av1_tx_split_nn_bias_4x16_layer0[16] = {
- 3.086118f, -3.235095f, 4.830956f, -0.165706f, 0.955031f, 4.055783f,
- -0.311489f, 4.660205f, -0.576277f, -0.248111f, -0.790519f, -1.686412f,
- -1.191704f, -3.800073f, 4.121552f, -1.399397f,
-};
-
-static const float av1_tx_split_nn_weights_4x16_layer1[16] = {
- -0.758677f, 0.388776f, 0.439906f, 0.011390f, -0.084319f, -0.667969f,
- -0.467316f, -0.875491f, -0.160668f, 0.805292f, 0.114393f, -0.549682f,
- 0.462109f, 0.343315f, 1.092593f, 0.483152f,
-};
-
-static const float av1_tx_split_nn_bias_4x16_layer1[1] = {
- 0.8205083f,
-};
-
-static const NN_CONFIG av1_tx_split_nnconfig_4x16 = {
- 8, // num_inputs
- 1, // num_outputs
- 1, // num_hidden_layers
- {
- 16,
- }, // num_hidden_nodes
- {
- av1_tx_split_nn_weights_4x16_layer0,
- av1_tx_split_nn_weights_4x16_layer1,
- },
- {
- av1_tx_split_nn_bias_4x16_layer0,
- av1_tx_split_nn_bias_4x16_layer1,
- },
-};
-/******************************************************************************/
-
-// Tx split model for 16x32 block.
-static const float av1_tx_split_nn_weights_16x32_layer0[8 * 32] = {
- 0.180713f, 0.033211f, 0.607561f, 0.138642f, 0.637204f, -0.000940f,
- 0.012630f, 0.358109f, 0.022238f, 0.190418f, 0.079088f, 0.065925f,
- 0.038242f, 0.162380f, -0.122728f, 0.379382f, -0.303283f, -0.327550f,
- 0.029120f, -0.284553f, 0.269588f, -0.309805f, -0.241036f, -0.161103f,
- -0.304887f, 0.239843f, -0.149146f, 0.311234f, -0.073640f, -0.132718f,
- 0.178901f, 0.474712f, 0.020280f, 0.063685f, -0.609170f, -0.013658f,
- -0.338074f, 0.250429f, 0.082978f, -0.186315f, -0.788959f, 0.039859f,
- -0.426461f, -0.001524f, -0.447211f, 0.378102f, 0.315617f, 0.017428f,
- 0.745494f, -0.219024f, 0.512836f, 0.200522f, 0.680449f, 0.313686f,
- -0.412569f, -0.132927f, 0.631120f, 0.042735f, 0.336153f, 0.044772f,
- 0.432606f, 0.175681f, -0.634411f, -0.073509f, -0.040643f, -0.559260f,
- -0.104034f, -0.570495f, -0.247365f, 0.063256f, -0.582021f, -0.492585f,
- -0.194955f, -0.207934f, -0.506627f, 0.021743f, -0.416518f, 0.320876f,
- 0.115889f, 0.149399f, -0.229376f, 0.095505f, 0.115191f, -0.471921f,
- 0.113068f, 0.343684f, -0.036831f, 0.021240f, 0.295112f, 0.031166f,
- 0.448201f, -0.132241f, 0.164032f, 0.355572f, 0.072154f, 0.017335f,
- -0.046113f, 0.178719f, -0.026881f, -0.242590f, 0.055073f, -0.012958f,
- 0.077904f, 0.351356f, 0.107655f, 0.260568f, -0.080052f, -0.197553f,
- 0.085763f, 0.263416f, -0.327741f, 0.158855f, 0.056899f, -0.162121f,
- 0.339518f, -0.571204f, 0.264966f, -0.252214f, -0.202560f, -0.134213f,
- -0.330188f, 0.009470f, -0.468376f, -0.065240f, -0.307957f, 0.116479f,
- -0.222238f, -0.458716f, 0.186493f, -0.391415f, 0.118649f, -0.104653f,
- -0.259958f, -0.332081f, -0.403785f, -0.050147f, -0.573511f, 0.177117f,
- -0.598358f, 0.164947f, -0.119694f, -0.058520f, 0.203829f, -0.267404f,
- -0.048202f, -0.600006f, 0.181594f, -0.731805f, 0.146417f, -0.687148f,
- -1.210525f, -0.450101f, -0.620635f, 0.208825f, -0.611357f, 0.112202f,
- -0.309468f, -0.323545f, 0.357770f, 0.308061f, 0.553199f, 0.049012f,
- 0.530093f, -0.208597f, 0.607882f, -0.058120f, -0.527634f, 0.018136f,
- 0.060753f, 0.118894f, 0.175649f, 0.014731f, 0.428318f, -0.106465f,
- -0.119077f, 0.080179f, 0.524997f, 0.368286f, 0.528286f, 0.213659f,
- 0.639286f, 0.195079f, -0.049815f, -0.092008f, -0.302958f, 0.298149f,
- -0.173870f, -0.145205f, -0.233589f, -0.303368f, 0.141275f, 0.325622f,
- -0.115293f, 0.155188f, 0.047225f, 0.231050f, -0.167447f, 0.349754f,
- 0.295544f, -0.319466f, 0.095144f, 0.174612f, -0.194652f, 0.305915f,
- -0.239008f, -0.037453f, 0.280696f, 0.125850f, 0.749196f, -0.101919f,
- 0.791808f, -0.236811f, 0.064157f, 0.032865f, -0.225911f, 0.350384f,
- 0.723183f, -0.103992f, 0.483085f, -0.123992f, 0.602138f, 0.023895f,
- -0.692601f, -0.118387f, 0.162527f, 0.145178f, -0.184702f, -0.017753f,
- -0.159436f, 0.124105f, -0.131067f, 0.310275f, 0.151499f, 0.138924f,
- 0.537459f, 0.263212f, 0.615896f, 0.281255f, 0.021293f, -0.473459f,
- 0.210145f, -0.056682f, 0.063658f, 0.377254f, -0.314410f, -0.183487f,
- 0.300384f, 0.328471f, 0.164694f, -0.159272f, -0.160942f, -0.502861f,
- -0.129147f, 0.045916f, -0.606865f, -0.101378f,
-};
-
-static const float av1_tx_split_nn_bias_16x32_layer0[32] = {
- 0.051664f, -0.212487f, -0.077596f, -0.818467f, 0.638475f, -0.759937f,
- 0.157198f, 0.989640f, 1.586035f, 0.431144f, 0.041605f, 0.543085f,
- 0.498379f, 0.320504f, 0.134233f, 0.670979f, -0.105562f, -1.574879f,
- 1.261812f, -0.287530f, -1.610592f, 0.730899f, -0.894240f, -0.657790f,
- 0.270806f, -0.181708f, 0.298578f, 0.817240f, -0.221508f, -0.201771f,
- -0.294389f, 1.456413f,
-};
-
-static const float av1_tx_split_nn_weights_16x32_layer1[32] = {
- 1.208914f, 0.324728f, 0.383352f, -0.874321f, 0.172565f, -0.580927f,
- -0.432927f, 0.433698f, -0.801935f, 0.672028f, 0.563493f, 0.260077f,
- -0.200557f, -0.121638f, 0.530735f, -0.525196f, 0.281799f, 0.624204f,
- -0.662775f, -0.230887f, 0.980989f, 0.223437f, -0.790591f, 0.600724f,
- -0.273445f, 0.427635f, -0.501641f, -0.878390f, 0.234731f, -0.172550f,
- 0.418904f, 1.792187f,
-};
-
-static const float av1_tx_split_nn_bias_16x32_layer1[1] = {
- -0.29233751f,
-};
-
-static const NN_CONFIG av1_tx_split_nnconfig_16x32 = {
- 8, // num_inputs
- 1, // num_outputs
- 1, // num_hidden_layers
- {
- 32,
- }, // num_hidden_nodes
- {
- av1_tx_split_nn_weights_16x32_layer0,
- av1_tx_split_nn_weights_16x32_layer1,
- },
- {
- av1_tx_split_nn_bias_16x32_layer0,
- av1_tx_split_nn_bias_16x32_layer1,
- },
-};
-/******************************************************************************/
-
-// Tx split model for 32x64 block.
-static const float av1_tx_split_nn_weights_32x64_layer0[8 * 32] = {
- 0.031614f, -0.110926f, 0.052418f, -0.702506f, 0.045708f, 0.238329f,
- -0.021806f, -0.208128f, 0.509745f, -0.293891f, 0.277788f, 0.113937f,
- 0.741576f, 0.062848f, 0.351878f, 0.212532f, 0.385842f, 0.081517f,
- 0.398502f, -0.015156f, 0.242616f, 0.214619f, -0.182678f, -0.170546f,
- 0.110605f, -0.236749f, -0.023831f, -0.285243f, 0.147156f, -0.257639f,
- 0.341355f, -0.571641f, -0.721797f, 0.139588f, -0.518494f, -0.206526f,
- -0.570560f, -0.184295f, 0.110271f, 0.210292f, -0.109132f, -0.001080f,
- 0.129251f, -0.204230f, -0.396312f, -0.183024f, 0.421243f, -0.013154f,
- 0.222627f, 0.169826f, 0.226037f, 0.218153f, -0.343528f, 0.274906f,
- -0.156632f, 0.250261f, -0.484020f, 0.019909f, -0.349575f, -0.286643f,
- -0.507396f, 0.202446f, -0.154110f, -0.292644f, 0.122666f, 0.306963f,
- 0.424895f, 0.005579f, 0.494094f, -0.079551f, 0.473740f, 0.352414f,
- -0.356917f, 0.264331f, -0.554487f, 0.119978f, 0.012291f, -0.141641f,
- -0.254714f, -0.213723f, -0.116701f, -0.011267f, 0.190025f, -0.118501f,
- 0.305151f, -0.316782f, -0.220801f, -0.308420f, -0.324285f, 0.421329f,
- -0.177066f, -0.055114f, 0.229698f, -0.199523f, 0.054278f, 0.365020f,
- -0.060586f, -0.300618f, 0.157563f, -0.064338f, -0.005711f, -0.176991f,
- -0.424502f, -0.111914f, 0.092608f, 0.126621f, 0.078547f, 0.148008f,
- 0.024221f, 0.124599f, 0.001343f, 0.059402f, 0.453753f, 0.047102f,
- 0.242544f, 0.055735f, -0.067451f, -0.170061f, -0.170469f, -0.232173f,
- 0.214908f, 0.248889f, 0.544348f, -0.084566f, 0.402478f, 0.298031f,
- 0.099038f, -0.238019f, -0.475085f, -0.070042f, -0.754955f, -0.049095f,
- -0.783801f, -0.099857f, -0.582008f, -0.055194f, -0.103655f, 0.143689f,
- 0.100219f, 0.293934f, 0.099271f, -0.036320f, 0.356626f, -0.261445f,
- 0.879544f, 0.000878f, 0.532920f, -0.093918f, 0.508867f, -0.040215f,
- -0.789042f, -0.145380f, -0.090040f, -0.066636f, 0.015212f, 0.352989f,
- -0.058831f, -0.164588f, 0.039890f, 0.122861f, 0.222508f, 0.061217f,
- 0.466487f, 0.022666f, 0.423777f, -0.002200f, -0.656835f, -0.099760f,
- -0.520606f, 0.303204f, -0.563620f, -0.160922f, -0.243203f, 0.313354f,
- -0.336516f, -0.206764f, -0.236040f, 0.325899f, -0.418748f, 0.163205f,
- -0.476242f, -0.121928f, 0.139178f, -0.157193f, -0.531766f, -0.180202f,
- -0.485254f, 0.187703f, -0.440072f, 0.137854f, 0.029139f, 0.109530f,
- -0.078475f, -0.360618f, -0.334672f, -0.350890f, -0.403976f, 0.180336f,
- -0.304542f, 0.005123f, 0.413995f, 0.314639f, 0.342648f, -0.293264f,
- 0.358135f, -0.180425f, -0.369530f, -0.048413f, 0.498366f, 0.121875f,
- 0.270948f, -0.187966f, 0.342503f, 0.174420f, -0.352105f, 0.088080f,
- 0.008277f, 0.020275f, -0.002381f, 0.504389f, -0.018832f, -0.366047f,
- -0.090947f, -0.168150f, 0.016184f, -0.328914f, 0.089579f, -0.017349f,
- 0.005844f, -0.005010f, -1.857514f, -0.282426f, 0.010177f, -0.214727f,
- -0.182529f, 0.156943f, -0.162032f, -0.472654f, 0.069432f, 0.016901f,
- -0.767905f, 0.137129f, -0.411463f, 0.049056f, -0.431657f, -0.037641f,
- 0.785500f, 0.046225f, 0.195831f, 0.245204f, 0.368614f, 0.212261f,
- 0.440626f, -0.158048f, -0.461031f, -0.146280f,
-};
-
-static const float av1_tx_split_nn_bias_32x64_layer0[32] = {
- 0.490777f, -1.894238f, 0.621333f, -0.076756f, 0.286298f, 0.286375f,
- -0.126431f, -0.350034f, -1.017572f, 0.620125f, 0.408128f, 0.238756f,
- -0.060728f, 0.210912f, 0.043124f, 0.445649f, 0.907025f, 0.360272f,
- 1.083101f, -0.068952f, 1.062348f, 0.396354f, 0.280075f, 0.501732f,
- 0.328422f, 0.066241f, 0.474697f, 0.126313f, 0.741206f, 0.314796f,
- 0.552712f, 0.299410f,
-};
-
-static const float av1_tx_split_nn_weights_32x64_layer1[32] = {
- 1.033823f, 0.603439f, 0.304591f, -0.279940f, -0.780909f, -0.132801f,
- 0.154059f, 0.662014f, -0.718368f, 0.198733f, 0.039766f, -0.208516f,
- -0.104909f, -0.394209f, 0.081617f, 0.365041f, -0.874960f, -0.063315f,
- -1.189897f, 0.337225f, 0.410893f, 0.307519f, 0.221323f, 0.233895f,
- 0.469536f, 0.438557f, 0.280144f, 0.422423f, -1.394513f, 0.781900f,
- 0.352981f, 0.111265f,
-};
-
-static const float av1_tx_split_nn_bias_32x64_layer1[1] = {
- -0.18160765f,
-};
-
-static const NN_CONFIG av1_tx_split_nnconfig_32x64 = {
- 8, // num_inputs
- 1, // num_outputs
- 1, // num_hidden_layers
- {
- 32,
- }, // num_hidden_nodes
- {
- av1_tx_split_nn_weights_32x64_layer0,
- av1_tx_split_nn_weights_32x64_layer1,
- },
- {
- av1_tx_split_nn_bias_32x64_layer0,
- av1_tx_split_nn_bias_32x64_layer1,
- },
-};
-/******************************************************************************/
-
-// Tx split model for 8x32 block.
-static const float av1_tx_split_nn_weights_8x32_layer0[8 * 24] = {
- -0.687846f, 0.121404f, -0.372905f, 0.126770f, -0.103298f, -0.101650f,
- -0.148490f, -0.271740f, 0.682915f, -0.079765f, 0.634347f, -0.151503f,
- 0.287692f, -0.079072f, -0.236948f, 0.065064f, 0.713383f, 0.397123f,
- 0.553621f, 0.368529f, 0.767663f, -0.046601f, -0.392402f, -0.294822f,
- -0.292325f, -0.010573f, -0.837945f, 0.050113f, -0.811360f, 0.199162f,
- 0.150832f, 0.011602f, 0.369694f, -0.225876f, 0.234113f, -0.269808f,
- 0.303805f, -0.190281f, -0.451136f, 0.209755f, -0.308894f, 0.326956f,
- 0.313591f, 0.089923f, -0.095754f, 0.390981f, 0.467366f, 0.169670f,
- 0.853322f, 0.054055f, 0.830319f, -0.121918f, 0.262019f, -0.093526f,
- 0.385558f, 0.419174f, 0.040198f, -0.347030f, -0.450492f, -0.106764f,
- 0.487502f, -0.204188f, 0.430374f, -0.116388f, 0.236407f, -0.157376f,
- 0.732294f, -0.651387f, 0.347446f, 0.342575f, 0.048406f, 0.187657f,
- 0.434899f, -0.447782f, 0.032728f, -0.071168f, -0.255327f, 0.104174f,
- 0.095689f, -0.431743f, 0.725694f, 0.031797f, 0.523171f, 0.061801f,
- 0.469804f, -0.071068f, -0.059024f, -0.211937f, 0.392134f, -0.321490f,
- 0.366060f, -0.427798f, 0.166771f, 0.299652f, 0.044660f, 0.205142f,
- 0.039133f, -0.051835f, -0.465475f, 0.216976f, -0.341156f, 0.095358f,
- 0.230807f, 0.201674f, 0.279266f, -0.713534f, -0.091690f, -0.569708f,
- -0.119001f, 0.252160f, -1.544578f, -0.284477f, 0.555348f, 0.226471f,
- 0.347690f, 0.034365f, 0.770835f, -0.241859f, -0.130241f, 0.292936f,
- 0.396622f, -0.417916f, 0.492224f, 0.125517f, 0.344824f, 0.232172f,
- -0.432106f, -0.278745f, 0.035069f, -0.307247f, -0.120760f, 0.170950f,
- 0.433601f, 0.044286f, 0.141463f, -0.041382f, 0.529346f, 0.010868f,
- -0.323674f, 0.185205f, 0.623459f, 0.232842f, -0.406693f, -0.142944f,
- 0.222988f, 0.343634f, 0.065401f, 0.002621f, 0.805335f, -0.426926f,
- 0.279181f, 0.131364f, 0.192339f, -0.402391f, 0.544120f, -0.060618f,
- 0.467780f, 0.165224f, -0.373131f, 0.002427f, 0.688064f, 0.322317f,
- 0.259713f, 0.130583f, 0.185032f, -0.189111f, -0.067821f, 0.010875f,
- 0.644724f, -0.179291f, 0.463222f, 0.155230f, 0.721384f, -0.046019f,
- 0.438501f, 0.440027f, -0.462090f, -0.002039f, -0.468026f, -0.008890f,
- -0.328530f, 0.370102f, 0.482531f, 0.043471f, -0.469732f, -0.532663f,
- 0.122081f, -0.379659f, 0.037219f, -0.519913f, -0.128975f, -0.404365f,
-};
-
-static const float av1_tx_split_nn_bias_8x32_layer0[24] = {
- -1.198965f, 0.395204f, -0.408627f, -0.021654f, -0.658355f, 0.154525f,
- -0.288354f, 1.207574f, 0.411608f, 0.964678f, -1.176893f, 1.059006f,
- -0.472969f, 2.087975f, 1.065536f, 0.595569f, 0.197907f, -0.349938f,
- 1.013651f, -0.931093f, -0.973595f, -0.459094f, -1.253062f, 1.624782f,
-};
-
-static const float av1_tx_split_nn_weights_8x32_layer1[24] = {
- 0.815787f, -0.393465f, -0.483427f, -0.565592f, 0.493494f, 0.430229f,
- -0.507073f, -0.251379f, -0.353418f, -0.495445f, 0.820029f, 0.649146f,
- -0.487383f, 1.844503f, 0.480324f, -0.982705f, -0.501446f, -0.220584f,
- 0.334299f, 0.802238f, 0.805838f, -0.487848f, 0.300772f, -1.232857f,
-};
-
-static const float av1_tx_split_nn_bias_8x32_layer1[1] = {
- 0.13435879f,
-};
-
-static const NN_CONFIG av1_tx_split_nnconfig_8x32 = {
- 8, // num_inputs
- 1, // num_outputs
- 1, // num_hidden_layers
- {
- 24,
- }, // num_hidden_nodes
- {
- av1_tx_split_nn_weights_8x32_layer0,
- av1_tx_split_nn_weights_8x32_layer1,
- },
- {
- av1_tx_split_nn_bias_8x32_layer0,
- av1_tx_split_nn_bias_8x32_layer1,
- },
-};
-/******************************************************************************/
-
-// Tx split model for 16x32 block.
-static const float av1_tx_split_nn_weights_16x64_layer0[8 * 16] = {
- -0.378223f, -0.124216f, -0.514089f, -0.110117f, -0.585801f, -0.094838f,
- -0.455385f, -0.220254f, -0.504568f, -0.082351f, -0.476420f, -0.253993f,
- -0.454709f, -0.059461f, 0.210313f, -0.155683f, 0.192968f, -0.127804f,
- 0.471996f, 0.253377f, 0.472625f, 0.485322f, 0.150560f, 0.164868f,
- -0.475587f, 0.447559f, -0.455759f, -0.306665f, -0.194866f, -0.283716f,
- -0.243897f, 0.293020f, -0.308298f, -0.191904f, -0.468568f, 0.014053f,
- -0.618848f, 0.096273f, -0.444586f, 0.347750f, -0.280643f, -0.062872f,
- 0.118661f, 0.540099f, 0.104141f, -0.279300f, -0.098721f, -0.173427f,
- -0.984558f, -0.424559f, -0.411928f, -0.120875f, -0.488999f, -0.050716f,
- -0.523103f, 0.093620f, -0.930396f, -0.431997f, -1.163297f, 0.190384f,
- -0.422581f, -0.005354f, 0.450552f, 0.369210f, 0.562484f, 0.679922f,
- 0.282099f, -0.039075f, 0.404196f, 0.006371f, 0.069679f, -0.196160f,
- -0.213675f, 0.275187f, -0.104235f, -0.193090f, 0.003116f, -0.252454f,
- -0.094591f, 0.210439f, -0.137070f, 0.145043f, 0.024558f, 0.121718f,
- 0.010138f, 0.301651f, -0.377990f, 0.444414f, 0.001845f, -0.095334f,
- 0.550259f, 0.087603f, 0.792492f, -0.044584f, 0.641706f, -0.328458f,
- -0.447791f, 0.135376f, 0.356385f, 0.135748f, 0.310370f, 0.293757f,
- -0.062000f, -0.056368f, 0.343930f, 0.312039f, 0.370763f, 0.452381f,
- -0.023630f, -0.185909f, 0.422277f, -0.006306f, 0.045166f, 0.423359f,
- -0.157735f, -0.084901f, 0.219527f, -0.209510f, 0.575057f, 0.249276f,
- 0.069267f, 0.233898f, -0.229392f, 0.117197f, -0.038551f, 0.293976f,
- 0.101996f, 0.120878f,
-};
-
-static const float av1_tx_split_nn_bias_16x64_layer0[16] = {
- 1.036995f, 0.160249f, 0.100264f, 0.694881f, 0.694677f, 0.128379f,
- -0.843405f, -0.405515f, 0.104139f, 0.182980f, -0.025472f, 0.901067f,
- -0.299866f, -0.103079f, -0.190352f, -0.048121f,
-};
-
-static const float av1_tx_split_nn_weights_16x64_layer1[16] = {
- -1.778868f, 0.174690f, 0.211991f, 0.712138f, 0.589352f, 0.466652f,
- 1.029146f, -0.490044f, 0.483015f, 0.600215f, -0.577776f, -0.755546f,
- 0.348337f, -0.205082f, 0.347129f, -0.322277f,
-};
-
-static const float av1_tx_split_nn_bias_16x64_layer1[1] = {
- 0.04230947f,
-};
-
-static const NN_CONFIG av1_tx_split_nnconfig_16x64 = {
- 8, // num_inputs
- 1, // num_outputs
- 1, // num_hidden_layers
- {
- 16,
- }, // num_hidden_nodes
- {
- av1_tx_split_nn_weights_16x64_layer0,
- av1_tx_split_nn_weights_16x64_layer1,
- },
- {
- av1_tx_split_nn_bias_16x64_layer0,
- av1_tx_split_nn_bias_16x64_layer1,
- },
-};
-/******************************************************************************/
-
-// Map block size to its corresponding neural net model for tx split prediction.
-static const NN_CONFIG *av1_tx_split_nnconfig_map[TX_SIZES_ALL] = {
- NULL, // TX_4X4,
- &av1_tx_split_nnconfig_8x8, // TX_8X8,
- &av1_tx_split_nnconfig_16x16, // TX_16X16,
- &av1_tx_split_nnconfig_32x32, // TX_32X32,
- &av1_tx_split_nnconfig_64x64, // TX_64X64,
- &av1_tx_split_nnconfig_4x8, // TX_4X8,
- &av1_tx_split_nnconfig_4x8, // TX_8X4,
- &av1_tx_split_nnconfig_8x16, // TX_8X16,
- &av1_tx_split_nnconfig_8x16, // TX_16X8,
- &av1_tx_split_nnconfig_16x32, // TX_16X32,
- &av1_tx_split_nnconfig_16x32, // TX_32X16,
- &av1_tx_split_nnconfig_32x64, // TX_32X64,
- &av1_tx_split_nnconfig_32x64, // TX_64X32,
- &av1_tx_split_nnconfig_4x16, // TX_4X16,
- &av1_tx_split_nnconfig_4x16, // TX_16X4,
- &av1_tx_split_nnconfig_8x32, // TX_8X32,
- &av1_tx_split_nnconfig_8x32, // TX_32X8,
- &av1_tx_split_nnconfig_16x64, // TX_16X64,
- &av1_tx_split_nnconfig_16x64, // TX_64X16,
-};
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_AV1_ENCODER_TX_PRUNE_MODEL_WEIGHTS_H_
diff --git a/third_party/aom/av1/encoder/wedge_utils.c b/third_party/aom/av1/encoder/wedge_utils.c
deleted file mode 100644
index e6edbb6af..000000000
--- a/third_party/aom/av1/encoder/wedge_utils.c
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-
-#include "aom/aom_integer.h"
-
-#include "aom_ports/mem.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-
-#include "av1/common/reconinter.h"
-
-#define MAX_MASK_VALUE (1 << WEDGE_WEIGHT_BITS)
-
-/**
- * Computes SSE of a compound predictor constructed from 2 fundamental
- * predictors p0 and p1 using blending with mask.
- *
- * r1: Residuals of p1.
- * (source - p1)
- * d: Difference of p1 and p0.
- * (p1 - p0)
- * m: The blending mask
- * N: Number of pixels
- *
- * 'r1', 'd', and 'm' are contiguous.
- *
- * Computes:
- * Sum((MAX_MASK_VALUE*r1 + mask*d)**2), which is equivalent to:
- * Sum((mask*r0 + (MAX_MASK_VALUE-mask)*r1)**2),
- * where r0 is (source - p0), and r1 is (source - p1), which is in turn
- * is equivalent to:
- * Sum((source*MAX_MASK_VALUE - (mask*p0 + (MAX_MASK_VALUE-mask)*p1))**2),
- * which is the SSE of the residuals of the compound predictor scaled up by
- * MAX_MASK_VALUE**2.
- *
- * Note that we clamp the partial term in the loop to 16 bits signed. This is
- * to facilitate equivalent SIMD implementation. It should have no effect if
- * residuals are within 16 - WEDGE_WEIGHT_BITS (=10) signed, which always
- * holds for 8 bit input, and on real input, it should hold practically always,
- * as residuals are expected to be small.
- */
-uint64_t av1_wedge_sse_from_residuals_c(const int16_t *r1, const int16_t *d,
- const uint8_t *m, int N) {
- uint64_t csse = 0;
- int i;
-
- for (i = 0; i < N; i++) {
- int32_t t = MAX_MASK_VALUE * r1[i] + m[i] * d[i];
- t = clamp(t, INT16_MIN, INT16_MAX);
- csse += t * t;
- }
- return ROUND_POWER_OF_TWO(csse, 2 * WEDGE_WEIGHT_BITS);
-}
-
-/**
- * Choose the mask sign for a compound predictor.
- *
- * ds: Difference of the squares of the residuals.
- * r0**2 - r1**2
- * m: The blending mask
- * N: Number of pixels
- * limit: Pre-computed threshold value.
- * MAX_MASK_VALUE/2 * (sum(r0**2) - sum(r1**2))
- *
- * 'ds' and 'm' are contiguous.
- *
- * Returns true if the negated mask has lower SSE compared to the positive
- * mask. Computation is based on:
- * Sum((mask*r0 + (MAX_MASK_VALUE-mask)*r1)**2)
- * >
- * Sum(((MAX_MASK_VALUE-mask)*r0 + mask*r1)**2)
- *
- * which can be simplified to:
- *
- * Sum(mask*(r0**2 - r1**2)) > MAX_MASK_VALUE/2 * (sum(r0**2) - sum(r1**2))
- *
- * The right hand side does not depend on the mask, and needs to be passed as
- * the 'limit' parameter.
- *
- * After pre-computing (r0**2 - r1**2), which is passed in as 'ds', the left
- * hand side is simply a scalar product between an int16_t and uint8_t vector.
- *
- * Note that for efficiency, ds is stored on 16 bits. Real input residuals
- * being small, this should not cause a noticeable issue.
- */
-int av1_wedge_sign_from_residuals_c(const int16_t *ds, const uint8_t *m, int N,
- int64_t limit) {
- int64_t acc = 0;
-
- do {
- acc += *ds++ * *m++;
- } while (--N);
-
- return acc > limit;
-}
-
-/**
- * Compute the element-wise difference of the squares of 2 arrays.
- *
- * d: Difference of the squares of the inputs: a**2 - b**2
- * a: First input array
- * b: Second input array
- * N: Number of elements
- *
- * 'd', 'a', and 'b' are contiguous.
- *
- * The result is saturated to signed 16 bits.
- */
-void av1_wedge_compute_delta_squares_c(int16_t *d, const int16_t *a,
- const int16_t *b, int N) {
- int i;
-
- for (i = 0; i < N; i++)
- d[i] = clamp(a[i] * a[i] - b[i] * b[i], INT16_MIN, INT16_MAX);
-}
diff --git a/third_party/aom/av1/encoder/x86/av1_fwd_txfm1d_sse4.c b/third_party/aom/av1/encoder/x86/av1_fwd_txfm1d_sse4.c
deleted file mode 100644
index 07615543c..000000000
--- a/third_party/aom/av1/encoder/x86/av1_fwd_txfm1d_sse4.c
+++ /dev/null
@@ -1,1217 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "av1/encoder/x86/av1_txfm1d_sse4.h"
-
-void av1_fdct32_new_sse4_1(const __m128i *input, __m128i *output,
- int8_t cos_bit) {
- __m128i buf0[32];
- __m128i buf1[32];
- const int32_t *cospi;
- // stage 0
- // stage 1
- buf1[0] = _mm_add_epi32(input[0], input[31]);
- buf1[31] = _mm_sub_epi32(input[0], input[31]);
- buf1[1] = _mm_add_epi32(input[1], input[30]);
- buf1[30] = _mm_sub_epi32(input[1], input[30]);
- buf1[2] = _mm_add_epi32(input[2], input[29]);
- buf1[29] = _mm_sub_epi32(input[2], input[29]);
- buf1[3] = _mm_add_epi32(input[3], input[28]);
- buf1[28] = _mm_sub_epi32(input[3], input[28]);
- buf1[4] = _mm_add_epi32(input[4], input[27]);
- buf1[27] = _mm_sub_epi32(input[4], input[27]);
- buf1[5] = _mm_add_epi32(input[5], input[26]);
- buf1[26] = _mm_sub_epi32(input[5], input[26]);
- buf1[6] = _mm_add_epi32(input[6], input[25]);
- buf1[25] = _mm_sub_epi32(input[6], input[25]);
- buf1[7] = _mm_add_epi32(input[7], input[24]);
- buf1[24] = _mm_sub_epi32(input[7], input[24]);
- buf1[8] = _mm_add_epi32(input[8], input[23]);
- buf1[23] = _mm_sub_epi32(input[8], input[23]);
- buf1[9] = _mm_add_epi32(input[9], input[22]);
- buf1[22] = _mm_sub_epi32(input[9], input[22]);
- buf1[10] = _mm_add_epi32(input[10], input[21]);
- buf1[21] = _mm_sub_epi32(input[10], input[21]);
- buf1[11] = _mm_add_epi32(input[11], input[20]);
- buf1[20] = _mm_sub_epi32(input[11], input[20]);
- buf1[12] = _mm_add_epi32(input[12], input[19]);
- buf1[19] = _mm_sub_epi32(input[12], input[19]);
- buf1[13] = _mm_add_epi32(input[13], input[18]);
- buf1[18] = _mm_sub_epi32(input[13], input[18]);
- buf1[14] = _mm_add_epi32(input[14], input[17]);
- buf1[17] = _mm_sub_epi32(input[14], input[17]);
- buf1[15] = _mm_add_epi32(input[15], input[16]);
- buf1[16] = _mm_sub_epi32(input[15], input[16]);
-
- // stage 2
- cospi = cospi_arr(cos_bit);
- buf0[0] = _mm_add_epi32(buf1[0], buf1[15]);
- buf0[15] = _mm_sub_epi32(buf1[0], buf1[15]);
- buf0[1] = _mm_add_epi32(buf1[1], buf1[14]);
- buf0[14] = _mm_sub_epi32(buf1[1], buf1[14]);
- buf0[2] = _mm_add_epi32(buf1[2], buf1[13]);
- buf0[13] = _mm_sub_epi32(buf1[2], buf1[13]);
- buf0[3] = _mm_add_epi32(buf1[3], buf1[12]);
- buf0[12] = _mm_sub_epi32(buf1[3], buf1[12]);
- buf0[4] = _mm_add_epi32(buf1[4], buf1[11]);
- buf0[11] = _mm_sub_epi32(buf1[4], buf1[11]);
- buf0[5] = _mm_add_epi32(buf1[5], buf1[10]);
- buf0[10] = _mm_sub_epi32(buf1[5], buf1[10]);
- buf0[6] = _mm_add_epi32(buf1[6], buf1[9]);
- buf0[9] = _mm_sub_epi32(buf1[6], buf1[9]);
- buf0[7] = _mm_add_epi32(buf1[7], buf1[8]);
- buf0[8] = _mm_sub_epi32(buf1[7], buf1[8]);
- buf0[16] = buf1[16];
- buf0[17] = buf1[17];
- buf0[18] = buf1[18];
- buf0[19] = buf1[19];
- btf_32_sse4_1_type0(-cospi[32], cospi[32], buf1[20], buf1[27], buf0[20],
- buf0[27], cos_bit);
- btf_32_sse4_1_type0(-cospi[32], cospi[32], buf1[21], buf1[26], buf0[21],
- buf0[26], cos_bit);
- btf_32_sse4_1_type0(-cospi[32], cospi[32], buf1[22], buf1[25], buf0[22],
- buf0[25], cos_bit);
- btf_32_sse4_1_type0(-cospi[32], cospi[32], buf1[23], buf1[24], buf0[23],
- buf0[24], cos_bit);
- buf0[28] = buf1[28];
- buf0[29] = buf1[29];
- buf0[30] = buf1[30];
- buf0[31] = buf1[31];
-
- // stage 3
- cospi = cospi_arr(cos_bit);
- buf1[0] = _mm_add_epi32(buf0[0], buf0[7]);
- buf1[7] = _mm_sub_epi32(buf0[0], buf0[7]);
- buf1[1] = _mm_add_epi32(buf0[1], buf0[6]);
- buf1[6] = _mm_sub_epi32(buf0[1], buf0[6]);
- buf1[2] = _mm_add_epi32(buf0[2], buf0[5]);
- buf1[5] = _mm_sub_epi32(buf0[2], buf0[5]);
- buf1[3] = _mm_add_epi32(buf0[3], buf0[4]);
- buf1[4] = _mm_sub_epi32(buf0[3], buf0[4]);
- buf1[8] = buf0[8];
- buf1[9] = buf0[9];
- btf_32_sse4_1_type0(-cospi[32], cospi[32], buf0[10], buf0[13], buf1[10],
- buf1[13], cos_bit);
- btf_32_sse4_1_type0(-cospi[32], cospi[32], buf0[11], buf0[12], buf1[11],
- buf1[12], cos_bit);
- buf1[14] = buf0[14];
- buf1[15] = buf0[15];
- buf1[16] = _mm_add_epi32(buf0[16], buf0[23]);
- buf1[23] = _mm_sub_epi32(buf0[16], buf0[23]);
- buf1[17] = _mm_add_epi32(buf0[17], buf0[22]);
- buf1[22] = _mm_sub_epi32(buf0[17], buf0[22]);
- buf1[18] = _mm_add_epi32(buf0[18], buf0[21]);
- buf1[21] = _mm_sub_epi32(buf0[18], buf0[21]);
- buf1[19] = _mm_add_epi32(buf0[19], buf0[20]);
- buf1[20] = _mm_sub_epi32(buf0[19], buf0[20]);
- buf1[24] = _mm_sub_epi32(buf0[31], buf0[24]);
- buf1[31] = _mm_add_epi32(buf0[31], buf0[24]);
- buf1[25] = _mm_sub_epi32(buf0[30], buf0[25]);
- buf1[30] = _mm_add_epi32(buf0[30], buf0[25]);
- buf1[26] = _mm_sub_epi32(buf0[29], buf0[26]);
- buf1[29] = _mm_add_epi32(buf0[29], buf0[26]);
- buf1[27] = _mm_sub_epi32(buf0[28], buf0[27]);
- buf1[28] = _mm_add_epi32(buf0[28], buf0[27]);
-
- // stage 4
- cospi = cospi_arr(cos_bit);
- buf0[0] = _mm_add_epi32(buf1[0], buf1[3]);
- buf0[3] = _mm_sub_epi32(buf1[0], buf1[3]);
- buf0[1] = _mm_add_epi32(buf1[1], buf1[2]);
- buf0[2] = _mm_sub_epi32(buf1[1], buf1[2]);
- buf0[4] = buf1[4];
- btf_32_sse4_1_type0(-cospi[32], cospi[32], buf1[5], buf1[6], buf0[5], buf0[6],
- cos_bit);
- buf0[7] = buf1[7];
- buf0[8] = _mm_add_epi32(buf1[8], buf1[11]);
- buf0[11] = _mm_sub_epi32(buf1[8], buf1[11]);
- buf0[9] = _mm_add_epi32(buf1[9], buf1[10]);
- buf0[10] = _mm_sub_epi32(buf1[9], buf1[10]);
- buf0[12] = _mm_sub_epi32(buf1[15], buf1[12]);
- buf0[15] = _mm_add_epi32(buf1[15], buf1[12]);
- buf0[13] = _mm_sub_epi32(buf1[14], buf1[13]);
- buf0[14] = _mm_add_epi32(buf1[14], buf1[13]);
- buf0[16] = buf1[16];
- buf0[17] = buf1[17];
- btf_32_sse4_1_type0(-cospi[16], cospi[48], buf1[18], buf1[29], buf0[18],
- buf0[29], cos_bit);
- btf_32_sse4_1_type0(-cospi[16], cospi[48], buf1[19], buf1[28], buf0[19],
- buf0[28], cos_bit);
- btf_32_sse4_1_type0(-cospi[48], -cospi[16], buf1[20], buf1[27], buf0[20],
- buf0[27], cos_bit);
- btf_32_sse4_1_type0(-cospi[48], -cospi[16], buf1[21], buf1[26], buf0[21],
- buf0[26], cos_bit);
- buf0[22] = buf1[22];
- buf0[23] = buf1[23];
- buf0[24] = buf1[24];
- buf0[25] = buf1[25];
- buf0[30] = buf1[30];
- buf0[31] = buf1[31];
-
- // stage 5
- cospi = cospi_arr(cos_bit);
- btf_32_sse4_1_type0(cospi[32], cospi[32], buf0[0], buf0[1], buf1[0], buf1[1],
- cos_bit);
- btf_32_sse4_1_type1(cospi[48], cospi[16], buf0[2], buf0[3], buf1[2], buf1[3],
- cos_bit);
- buf1[4] = _mm_add_epi32(buf0[4], buf0[5]);
- buf1[5] = _mm_sub_epi32(buf0[4], buf0[5]);
- buf1[6] = _mm_sub_epi32(buf0[7], buf0[6]);
- buf1[7] = _mm_add_epi32(buf0[7], buf0[6]);
- buf1[8] = buf0[8];
- btf_32_sse4_1_type0(-cospi[16], cospi[48], buf0[9], buf0[14], buf1[9],
- buf1[14], cos_bit);
- btf_32_sse4_1_type0(-cospi[48], -cospi[16], buf0[10], buf0[13], buf1[10],
- buf1[13], cos_bit);
- buf1[11] = buf0[11];
- buf1[12] = buf0[12];
- buf1[15] = buf0[15];
- buf1[16] = _mm_add_epi32(buf0[16], buf0[19]);
- buf1[19] = _mm_sub_epi32(buf0[16], buf0[19]);
- buf1[17] = _mm_add_epi32(buf0[17], buf0[18]);
- buf1[18] = _mm_sub_epi32(buf0[17], buf0[18]);
- buf1[20] = _mm_sub_epi32(buf0[23], buf0[20]);
- buf1[23] = _mm_add_epi32(buf0[23], buf0[20]);
- buf1[21] = _mm_sub_epi32(buf0[22], buf0[21]);
- buf1[22] = _mm_add_epi32(buf0[22], buf0[21]);
- buf1[24] = _mm_add_epi32(buf0[24], buf0[27]);
- buf1[27] = _mm_sub_epi32(buf0[24], buf0[27]);
- buf1[25] = _mm_add_epi32(buf0[25], buf0[26]);
- buf1[26] = _mm_sub_epi32(buf0[25], buf0[26]);
- buf1[28] = _mm_sub_epi32(buf0[31], buf0[28]);
- buf1[31] = _mm_add_epi32(buf0[31], buf0[28]);
- buf1[29] = _mm_sub_epi32(buf0[30], buf0[29]);
- buf1[30] = _mm_add_epi32(buf0[30], buf0[29]);
-
- // stage 6
- cospi = cospi_arr(cos_bit);
- buf0[0] = buf1[0];
- buf0[1] = buf1[1];
- buf0[2] = buf1[2];
- buf0[3] = buf1[3];
- btf_32_sse4_1_type1(cospi[56], cospi[8], buf1[4], buf1[7], buf0[4], buf0[7],
- cos_bit);
- btf_32_sse4_1_type1(cospi[24], cospi[40], buf1[5], buf1[6], buf0[5], buf0[6],
- cos_bit);
- buf0[8] = _mm_add_epi32(buf1[8], buf1[9]);
- buf0[9] = _mm_sub_epi32(buf1[8], buf1[9]);
- buf0[10] = _mm_sub_epi32(buf1[11], buf1[10]);
- buf0[11] = _mm_add_epi32(buf1[11], buf1[10]);
- buf0[12] = _mm_add_epi32(buf1[12], buf1[13]);
- buf0[13] = _mm_sub_epi32(buf1[12], buf1[13]);
- buf0[14] = _mm_sub_epi32(buf1[15], buf1[14]);
- buf0[15] = _mm_add_epi32(buf1[15], buf1[14]);
- buf0[16] = buf1[16];
- btf_32_sse4_1_type0(-cospi[8], cospi[56], buf1[17], buf1[30], buf0[17],
- buf0[30], cos_bit);
- btf_32_sse4_1_type0(-cospi[56], -cospi[8], buf1[18], buf1[29], buf0[18],
- buf0[29], cos_bit);
- buf0[19] = buf1[19];
- buf0[20] = buf1[20];
- btf_32_sse4_1_type0(-cospi[40], cospi[24], buf1[21], buf1[26], buf0[21],
- buf0[26], cos_bit);
- btf_32_sse4_1_type0(-cospi[24], -cospi[40], buf1[22], buf1[25], buf0[22],
- buf0[25], cos_bit);
- buf0[23] = buf1[23];
- buf0[24] = buf1[24];
- buf0[27] = buf1[27];
- buf0[28] = buf1[28];
- buf0[31] = buf1[31];
-
- // stage 7
- cospi = cospi_arr(cos_bit);
- buf1[0] = buf0[0];
- buf1[1] = buf0[1];
- buf1[2] = buf0[2];
- buf1[3] = buf0[3];
- buf1[4] = buf0[4];
- buf1[5] = buf0[5];
- buf1[6] = buf0[6];
- buf1[7] = buf0[7];
- btf_32_sse4_1_type1(cospi[60], cospi[4], buf0[8], buf0[15], buf1[8], buf1[15],
- cos_bit);
- btf_32_sse4_1_type1(cospi[28], cospi[36], buf0[9], buf0[14], buf1[9],
- buf1[14], cos_bit);
- btf_32_sse4_1_type1(cospi[44], cospi[20], buf0[10], buf0[13], buf1[10],
- buf1[13], cos_bit);
- btf_32_sse4_1_type1(cospi[12], cospi[52], buf0[11], buf0[12], buf1[11],
- buf1[12], cos_bit);
- buf1[16] = _mm_add_epi32(buf0[16], buf0[17]);
- buf1[17] = _mm_sub_epi32(buf0[16], buf0[17]);
- buf1[18] = _mm_sub_epi32(buf0[19], buf0[18]);
- buf1[19] = _mm_add_epi32(buf0[19], buf0[18]);
- buf1[20] = _mm_add_epi32(buf0[20], buf0[21]);
- buf1[21] = _mm_sub_epi32(buf0[20], buf0[21]);
- buf1[22] = _mm_sub_epi32(buf0[23], buf0[22]);
- buf1[23] = _mm_add_epi32(buf0[23], buf0[22]);
- buf1[24] = _mm_add_epi32(buf0[24], buf0[25]);
- buf1[25] = _mm_sub_epi32(buf0[24], buf0[25]);
- buf1[26] = _mm_sub_epi32(buf0[27], buf0[26]);
- buf1[27] = _mm_add_epi32(buf0[27], buf0[26]);
- buf1[28] = _mm_add_epi32(buf0[28], buf0[29]);
- buf1[29] = _mm_sub_epi32(buf0[28], buf0[29]);
- buf1[30] = _mm_sub_epi32(buf0[31], buf0[30]);
- buf1[31] = _mm_add_epi32(buf0[31], buf0[30]);
-
- // stage 8
- cospi = cospi_arr(cos_bit);
- buf0[0] = buf1[0];
- buf0[1] = buf1[1];
- buf0[2] = buf1[2];
- buf0[3] = buf1[3];
- buf0[4] = buf1[4];
- buf0[5] = buf1[5];
- buf0[6] = buf1[6];
- buf0[7] = buf1[7];
- buf0[8] = buf1[8];
- buf0[9] = buf1[9];
- buf0[10] = buf1[10];
- buf0[11] = buf1[11];
- buf0[12] = buf1[12];
- buf0[13] = buf1[13];
- buf0[14] = buf1[14];
- buf0[15] = buf1[15];
- btf_32_sse4_1_type1(cospi[62], cospi[2], buf1[16], buf1[31], buf0[16],
- buf0[31], cos_bit);
- btf_32_sse4_1_type1(cospi[30], cospi[34], buf1[17], buf1[30], buf0[17],
- buf0[30], cos_bit);
- btf_32_sse4_1_type1(cospi[46], cospi[18], buf1[18], buf1[29], buf0[18],
- buf0[29], cos_bit);
- btf_32_sse4_1_type1(cospi[14], cospi[50], buf1[19], buf1[28], buf0[19],
- buf0[28], cos_bit);
- btf_32_sse4_1_type1(cospi[54], cospi[10], buf1[20], buf1[27], buf0[20],
- buf0[27], cos_bit);
- btf_32_sse4_1_type1(cospi[22], cospi[42], buf1[21], buf1[26], buf0[21],
- buf0[26], cos_bit);
- btf_32_sse4_1_type1(cospi[38], cospi[26], buf1[22], buf1[25], buf0[22],
- buf0[25], cos_bit);
- btf_32_sse4_1_type1(cospi[6], cospi[58], buf1[23], buf1[24], buf0[23],
- buf0[24], cos_bit);
-
- // stage 9
- output[0] = buf0[0];
- output[1] = buf0[16];
- output[2] = buf0[8];
- output[3] = buf0[24];
- output[4] = buf0[4];
- output[5] = buf0[20];
- output[6] = buf0[12];
- output[7] = buf0[28];
- output[8] = buf0[2];
- output[9] = buf0[18];
- output[10] = buf0[10];
- output[11] = buf0[26];
- output[12] = buf0[6];
- output[13] = buf0[22];
- output[14] = buf0[14];
- output[15] = buf0[30];
- output[16] = buf0[1];
- output[17] = buf0[17];
- output[18] = buf0[9];
- output[19] = buf0[25];
- output[20] = buf0[5];
- output[21] = buf0[21];
- output[22] = buf0[13];
- output[23] = buf0[29];
- output[24] = buf0[3];
- output[25] = buf0[19];
- output[26] = buf0[11];
- output[27] = buf0[27];
- output[28] = buf0[7];
- output[29] = buf0[23];
- output[30] = buf0[15];
- output[31] = buf0[31];
-}
-
-void av1_fadst4_new_sse4_1(const __m128i *input, __m128i *output,
- const int8_t cos_bit, const int8_t *stage_range) {
- const int txfm_size = 4;
- const int num_per_128 = 4;
- const int32_t *cospi;
- __m128i buf0[4];
- __m128i buf1[4];
- int col_num = txfm_size / num_per_128;
- int col;
- (void)stage_range;
- for (col = 0; col < col_num; col++) {
- // stage 0;
- int32_t stage_idx = 0;
- int j;
- for (j = 0; j < 4; ++j) {
- buf0[j] = input[j * col_num + col];
- }
-
- // stage 1
- stage_idx++;
- buf1[0] = buf0[3];
- buf1[1] = buf0[0];
- buf1[2] = buf0[1];
- buf1[3] = buf0[2];
-
- // stage 2
- stage_idx++;
-
- cospi = cospi_arr(cos_bit);
- btf_32_sse4_1_type0(cospi[8], cospi[56], buf1[0], buf1[1], buf0[0], buf0[1],
- cos_bit);
- btf_32_sse4_1_type0(cospi[40], cospi[24], buf1[2], buf1[3], buf0[2],
- buf0[3], cos_bit);
-
- // stage 3
- stage_idx++;
- buf1[0] = _mm_add_epi32(buf0[0], buf0[2]);
- buf1[2] = _mm_sub_epi32(buf0[0], buf0[2]);
- buf1[1] = _mm_add_epi32(buf0[1], buf0[3]);
- buf1[3] = _mm_sub_epi32(buf0[1], buf0[3]);
-
- // stage 4
- stage_idx++;
-
- cospi = cospi_arr(cos_bit);
- buf0[0] = buf1[0];
- buf0[1] = buf1[1];
- btf_32_sse4_1_type0(cospi[32], cospi[32], buf1[2], buf1[3], buf0[2],
- buf0[3], cos_bit);
-
- // stage 5
- stage_idx++;
- buf1[0] = buf0[0];
- buf1[1] = _mm_sub_epi32(_mm_setzero_si128(), buf0[2]);
- buf1[2] = buf0[3];
- buf1[3] = _mm_sub_epi32(_mm_setzero_si128(), buf0[1]);
-
- for (j = 0; j < 4; ++j) {
- output[j * col_num + col] = buf1[j];
- }
- }
-}
-
-void av1_fdct64_new_sse4_1(const __m128i *input, __m128i *output,
- int8_t cos_bit, const int instride,
- const int outstride) {
- const int32_t *cospi = cospi_arr(cos_bit);
- const __m128i __rounding = _mm_set1_epi32(1 << (cos_bit - 1));
-
- __m128i cospi_m32 = _mm_set1_epi32(-cospi[32]);
- __m128i cospi_p32 = _mm_set1_epi32(cospi[32]);
- __m128i cospi_m16 = _mm_set1_epi32(-cospi[16]);
- __m128i cospi_p48 = _mm_set1_epi32(cospi[48]);
- __m128i cospi_m48 = _mm_set1_epi32(-cospi[48]);
- __m128i cospi_p16 = _mm_set1_epi32(cospi[16]);
- __m128i cospi_m08 = _mm_set1_epi32(-cospi[8]);
- __m128i cospi_p56 = _mm_set1_epi32(cospi[56]);
- __m128i cospi_m56 = _mm_set1_epi32(-cospi[56]);
- __m128i cospi_m40 = _mm_set1_epi32(-cospi[40]);
- __m128i cospi_p24 = _mm_set1_epi32(cospi[24]);
- __m128i cospi_m24 = _mm_set1_epi32(-cospi[24]);
- __m128i cospi_p08 = _mm_set1_epi32(cospi[8]);
- __m128i cospi_p40 = _mm_set1_epi32(cospi[40]);
- __m128i cospi_p60 = _mm_set1_epi32(cospi[60]);
- __m128i cospi_p04 = _mm_set1_epi32(cospi[4]);
- __m128i cospi_p28 = _mm_set1_epi32(cospi[28]);
- __m128i cospi_p36 = _mm_set1_epi32(cospi[36]);
- __m128i cospi_p44 = _mm_set1_epi32(cospi[44]);
- __m128i cospi_p20 = _mm_set1_epi32(cospi[20]);
- __m128i cospi_p12 = _mm_set1_epi32(cospi[12]);
- __m128i cospi_p52 = _mm_set1_epi32(cospi[52]);
- __m128i cospi_m04 = _mm_set1_epi32(-cospi[4]);
- __m128i cospi_m60 = _mm_set1_epi32(-cospi[60]);
- __m128i cospi_m36 = _mm_set1_epi32(-cospi[36]);
- __m128i cospi_m28 = _mm_set1_epi32(-cospi[28]);
- __m128i cospi_m20 = _mm_set1_epi32(-cospi[20]);
- __m128i cospi_m44 = _mm_set1_epi32(-cospi[44]);
- __m128i cospi_m52 = _mm_set1_epi32(-cospi[52]);
- __m128i cospi_m12 = _mm_set1_epi32(-cospi[12]);
- __m128i cospi_p62 = _mm_set1_epi32(cospi[62]);
- __m128i cospi_p02 = _mm_set1_epi32(cospi[2]);
- __m128i cospi_p30 = _mm_set1_epi32(cospi[30]);
- __m128i cospi_p34 = _mm_set1_epi32(cospi[34]);
- __m128i cospi_p46 = _mm_set1_epi32(cospi[46]);
- __m128i cospi_p18 = _mm_set1_epi32(cospi[18]);
- __m128i cospi_p14 = _mm_set1_epi32(cospi[14]);
- __m128i cospi_p50 = _mm_set1_epi32(cospi[50]);
- __m128i cospi_p54 = _mm_set1_epi32(cospi[54]);
- __m128i cospi_p10 = _mm_set1_epi32(cospi[10]);
- __m128i cospi_p22 = _mm_set1_epi32(cospi[22]);
- __m128i cospi_p42 = _mm_set1_epi32(cospi[42]);
- __m128i cospi_p38 = _mm_set1_epi32(cospi[38]);
- __m128i cospi_p26 = _mm_set1_epi32(cospi[26]);
- __m128i cospi_p06 = _mm_set1_epi32(cospi[6]);
- __m128i cospi_p58 = _mm_set1_epi32(cospi[58]);
- __m128i cospi_p63 = _mm_set1_epi32(cospi[63]);
- __m128i cospi_p01 = _mm_set1_epi32(cospi[1]);
- __m128i cospi_p31 = _mm_set1_epi32(cospi[31]);
- __m128i cospi_p33 = _mm_set1_epi32(cospi[33]);
- __m128i cospi_p47 = _mm_set1_epi32(cospi[47]);
- __m128i cospi_p17 = _mm_set1_epi32(cospi[17]);
- __m128i cospi_p15 = _mm_set1_epi32(cospi[15]);
- __m128i cospi_p49 = _mm_set1_epi32(cospi[49]);
- __m128i cospi_p55 = _mm_set1_epi32(cospi[55]);
- __m128i cospi_p09 = _mm_set1_epi32(cospi[9]);
- __m128i cospi_p23 = _mm_set1_epi32(cospi[23]);
- __m128i cospi_p41 = _mm_set1_epi32(cospi[41]);
- __m128i cospi_p39 = _mm_set1_epi32(cospi[39]);
- __m128i cospi_p25 = _mm_set1_epi32(cospi[25]);
- __m128i cospi_p07 = _mm_set1_epi32(cospi[7]);
- __m128i cospi_p57 = _mm_set1_epi32(cospi[57]);
- __m128i cospi_p59 = _mm_set1_epi32(cospi[59]);
- __m128i cospi_p05 = _mm_set1_epi32(cospi[5]);
- __m128i cospi_p27 = _mm_set1_epi32(cospi[27]);
- __m128i cospi_p37 = _mm_set1_epi32(cospi[37]);
- __m128i cospi_p43 = _mm_set1_epi32(cospi[43]);
- __m128i cospi_p21 = _mm_set1_epi32(cospi[21]);
- __m128i cospi_p11 = _mm_set1_epi32(cospi[11]);
- __m128i cospi_p53 = _mm_set1_epi32(cospi[53]);
- __m128i cospi_p51 = _mm_set1_epi32(cospi[51]);
- __m128i cospi_p13 = _mm_set1_epi32(cospi[13]);
- __m128i cospi_p19 = _mm_set1_epi32(cospi[19]);
- __m128i cospi_p45 = _mm_set1_epi32(cospi[45]);
- __m128i cospi_p35 = _mm_set1_epi32(cospi[35]);
- __m128i cospi_p29 = _mm_set1_epi32(cospi[29]);
- __m128i cospi_p03 = _mm_set1_epi32(cospi[3]);
- __m128i cospi_p61 = _mm_set1_epi32(cospi[61]);
-
- // stage 1
- __m128i x1[64];
- x1[0] = _mm_add_epi32(input[0 * instride], input[63 * instride]);
- x1[63] = _mm_sub_epi32(input[0 * instride], input[63 * instride]);
- x1[1] = _mm_add_epi32(input[1 * instride], input[62 * instride]);
- x1[62] = _mm_sub_epi32(input[1 * instride], input[62 * instride]);
- x1[2] = _mm_add_epi32(input[2 * instride], input[61 * instride]);
- x1[61] = _mm_sub_epi32(input[2 * instride], input[61 * instride]);
- x1[3] = _mm_add_epi32(input[3 * instride], input[60 * instride]);
- x1[60] = _mm_sub_epi32(input[3 * instride], input[60 * instride]);
- x1[4] = _mm_add_epi32(input[4 * instride], input[59 * instride]);
- x1[59] = _mm_sub_epi32(input[4 * instride], input[59 * instride]);
- x1[5] = _mm_add_epi32(input[5 * instride], input[58 * instride]);
- x1[58] = _mm_sub_epi32(input[5 * instride], input[58 * instride]);
- x1[6] = _mm_add_epi32(input[6 * instride], input[57 * instride]);
- x1[57] = _mm_sub_epi32(input[6 * instride], input[57 * instride]);
- x1[7] = _mm_add_epi32(input[7 * instride], input[56 * instride]);
- x1[56] = _mm_sub_epi32(input[7 * instride], input[56 * instride]);
- x1[8] = _mm_add_epi32(input[8 * instride], input[55 * instride]);
- x1[55] = _mm_sub_epi32(input[8 * instride], input[55 * instride]);
- x1[9] = _mm_add_epi32(input[9 * instride], input[54 * instride]);
- x1[54] = _mm_sub_epi32(input[9 * instride], input[54 * instride]);
- x1[10] = _mm_add_epi32(input[10 * instride], input[53 * instride]);
- x1[53] = _mm_sub_epi32(input[10 * instride], input[53 * instride]);
- x1[11] = _mm_add_epi32(input[11 * instride], input[52 * instride]);
- x1[52] = _mm_sub_epi32(input[11 * instride], input[52 * instride]);
- x1[12] = _mm_add_epi32(input[12 * instride], input[51 * instride]);
- x1[51] = _mm_sub_epi32(input[12 * instride], input[51 * instride]);
- x1[13] = _mm_add_epi32(input[13 * instride], input[50 * instride]);
- x1[50] = _mm_sub_epi32(input[13 * instride], input[50 * instride]);
- x1[14] = _mm_add_epi32(input[14 * instride], input[49 * instride]);
- x1[49] = _mm_sub_epi32(input[14 * instride], input[49 * instride]);
- x1[15] = _mm_add_epi32(input[15 * instride], input[48 * instride]);
- x1[48] = _mm_sub_epi32(input[15 * instride], input[48 * instride]);
- x1[16] = _mm_add_epi32(input[16 * instride], input[47 * instride]);
- x1[47] = _mm_sub_epi32(input[16 * instride], input[47 * instride]);
- x1[17] = _mm_add_epi32(input[17 * instride], input[46 * instride]);
- x1[46] = _mm_sub_epi32(input[17 * instride], input[46 * instride]);
- x1[18] = _mm_add_epi32(input[18 * instride], input[45 * instride]);
- x1[45] = _mm_sub_epi32(input[18 * instride], input[45 * instride]);
- x1[19] = _mm_add_epi32(input[19 * instride], input[44 * instride]);
- x1[44] = _mm_sub_epi32(input[19 * instride], input[44 * instride]);
- x1[20] = _mm_add_epi32(input[20 * instride], input[43 * instride]);
- x1[43] = _mm_sub_epi32(input[20 * instride], input[43 * instride]);
- x1[21] = _mm_add_epi32(input[21 * instride], input[42 * instride]);
- x1[42] = _mm_sub_epi32(input[21 * instride], input[42 * instride]);
- x1[22] = _mm_add_epi32(input[22 * instride], input[41 * instride]);
- x1[41] = _mm_sub_epi32(input[22 * instride], input[41 * instride]);
- x1[23] = _mm_add_epi32(input[23 * instride], input[40 * instride]);
- x1[40] = _mm_sub_epi32(input[23 * instride], input[40 * instride]);
- x1[24] = _mm_add_epi32(input[24 * instride], input[39 * instride]);
- x1[39] = _mm_sub_epi32(input[24 * instride], input[39 * instride]);
- x1[25] = _mm_add_epi32(input[25 * instride], input[38 * instride]);
- x1[38] = _mm_sub_epi32(input[25 * instride], input[38 * instride]);
- x1[26] = _mm_add_epi32(input[26 * instride], input[37 * instride]);
- x1[37] = _mm_sub_epi32(input[26 * instride], input[37 * instride]);
- x1[27] = _mm_add_epi32(input[27 * instride], input[36 * instride]);
- x1[36] = _mm_sub_epi32(input[27 * instride], input[36 * instride]);
- x1[28] = _mm_add_epi32(input[28 * instride], input[35 * instride]);
- x1[35] = _mm_sub_epi32(input[28 * instride], input[35 * instride]);
- x1[29] = _mm_add_epi32(input[29 * instride], input[34 * instride]);
- x1[34] = _mm_sub_epi32(input[29 * instride], input[34 * instride]);
- x1[30] = _mm_add_epi32(input[30 * instride], input[33 * instride]);
- x1[33] = _mm_sub_epi32(input[30 * instride], input[33 * instride]);
- x1[31] = _mm_add_epi32(input[31 * instride], input[32 * instride]);
- x1[32] = _mm_sub_epi32(input[31 * instride], input[32 * instride]);
-
- // stage 2
- __m128i x2[64];
- x2[0] = _mm_add_epi32(x1[0], x1[31]);
- x2[31] = _mm_sub_epi32(x1[0], x1[31]);
- x2[1] = _mm_add_epi32(x1[1], x1[30]);
- x2[30] = _mm_sub_epi32(x1[1], x1[30]);
- x2[2] = _mm_add_epi32(x1[2], x1[29]);
- x2[29] = _mm_sub_epi32(x1[2], x1[29]);
- x2[3] = _mm_add_epi32(x1[3], x1[28]);
- x2[28] = _mm_sub_epi32(x1[3], x1[28]);
- x2[4] = _mm_add_epi32(x1[4], x1[27]);
- x2[27] = _mm_sub_epi32(x1[4], x1[27]);
- x2[5] = _mm_add_epi32(x1[5], x1[26]);
- x2[26] = _mm_sub_epi32(x1[5], x1[26]);
- x2[6] = _mm_add_epi32(x1[6], x1[25]);
- x2[25] = _mm_sub_epi32(x1[6], x1[25]);
- x2[7] = _mm_add_epi32(x1[7], x1[24]);
- x2[24] = _mm_sub_epi32(x1[7], x1[24]);
- x2[8] = _mm_add_epi32(x1[8], x1[23]);
- x2[23] = _mm_sub_epi32(x1[8], x1[23]);
- x2[9] = _mm_add_epi32(x1[9], x1[22]);
- x2[22] = _mm_sub_epi32(x1[9], x1[22]);
- x2[10] = _mm_add_epi32(x1[10], x1[21]);
- x2[21] = _mm_sub_epi32(x1[10], x1[21]);
- x2[11] = _mm_add_epi32(x1[11], x1[20]);
- x2[20] = _mm_sub_epi32(x1[11], x1[20]);
- x2[12] = _mm_add_epi32(x1[12], x1[19]);
- x2[19] = _mm_sub_epi32(x1[12], x1[19]);
- x2[13] = _mm_add_epi32(x1[13], x1[18]);
- x2[18] = _mm_sub_epi32(x1[13], x1[18]);
- x2[14] = _mm_add_epi32(x1[14], x1[17]);
- x2[17] = _mm_sub_epi32(x1[14], x1[17]);
- x2[15] = _mm_add_epi32(x1[15], x1[16]);
- x2[16] = _mm_sub_epi32(x1[15], x1[16]);
- x2[32] = x1[32];
- x2[33] = x1[33];
- x2[34] = x1[34];
- x2[35] = x1[35];
- x2[36] = x1[36];
- x2[37] = x1[37];
- x2[38] = x1[38];
- x2[39] = x1[39];
- btf_32_type0_sse4_1_new(cospi_m32, cospi_p32, x1[40], x1[55], x2[40], x2[55],
- __rounding, cos_bit);
- btf_32_type0_sse4_1_new(cospi_m32, cospi_p32, x1[41], x1[54], x2[41], x2[54],
- __rounding, cos_bit);
- btf_32_type0_sse4_1_new(cospi_m32, cospi_p32, x1[42], x1[53], x2[42], x2[53],
- __rounding, cos_bit);
- btf_32_type0_sse4_1_new(cospi_m32, cospi_p32, x1[43], x1[52], x2[43], x2[52],
- __rounding, cos_bit);
- btf_32_type0_sse4_1_new(cospi_m32, cospi_p32, x1[44], x1[51], x2[44], x2[51],
- __rounding, cos_bit);
- btf_32_type0_sse4_1_new(cospi_m32, cospi_p32, x1[45], x1[50], x2[45], x2[50],
- __rounding, cos_bit);
- btf_32_type0_sse4_1_new(cospi_m32, cospi_p32, x1[46], x1[49], x2[46], x2[49],
- __rounding, cos_bit);
- btf_32_type0_sse4_1_new(cospi_m32, cospi_p32, x1[47], x1[48], x2[47], x2[48],
- __rounding, cos_bit);
- x2[56] = x1[56];
- x2[57] = x1[57];
- x2[58] = x1[58];
- x2[59] = x1[59];
- x2[60] = x1[60];
- x2[61] = x1[61];
- x2[62] = x1[62];
- x2[63] = x1[63];
-
- // stage 3
- __m128i x3[64];
- x3[0] = _mm_add_epi32(x2[0], x2[15]);
- x3[15] = _mm_sub_epi32(x2[0], x2[15]);
- x3[1] = _mm_add_epi32(x2[1], x2[14]);
- x3[14] = _mm_sub_epi32(x2[1], x2[14]);
- x3[2] = _mm_add_epi32(x2[2], x2[13]);
- x3[13] = _mm_sub_epi32(x2[2], x2[13]);
- x3[3] = _mm_add_epi32(x2[3], x2[12]);
- x3[12] = _mm_sub_epi32(x2[3], x2[12]);
- x3[4] = _mm_add_epi32(x2[4], x2[11]);
- x3[11] = _mm_sub_epi32(x2[4], x2[11]);
- x3[5] = _mm_add_epi32(x2[5], x2[10]);
- x3[10] = _mm_sub_epi32(x2[5], x2[10]);
- x3[6] = _mm_add_epi32(x2[6], x2[9]);
- x3[9] = _mm_sub_epi32(x2[6], x2[9]);
- x3[7] = _mm_add_epi32(x2[7], x2[8]);
- x3[8] = _mm_sub_epi32(x2[7], x2[8]);
- x3[16] = x2[16];
- x3[17] = x2[17];
- x3[18] = x2[18];
- x3[19] = x2[19];
- btf_32_type0_sse4_1_new(cospi_m32, cospi_p32, x2[20], x2[27], x3[20], x3[27],
- __rounding, cos_bit);
- btf_32_type0_sse4_1_new(cospi_m32, cospi_p32, x2[21], x2[26], x3[21], x3[26],
- __rounding, cos_bit);
- btf_32_type0_sse4_1_new(cospi_m32, cospi_p32, x2[22], x2[25], x3[22], x3[25],
- __rounding, cos_bit);
- btf_32_type0_sse4_1_new(cospi_m32, cospi_p32, x2[23], x2[24], x3[23], x3[24],
- __rounding, cos_bit);
- x3[28] = x2[28];
- x3[29] = x2[29];
- x3[30] = x2[30];
- x3[31] = x2[31];
- x3[32] = _mm_add_epi32(x2[32], x2[47]);
- x3[47] = _mm_sub_epi32(x2[32], x2[47]);
- x3[33] = _mm_add_epi32(x2[33], x2[46]);
- x3[46] = _mm_sub_epi32(x2[33], x2[46]);
- x3[34] = _mm_add_epi32(x2[34], x2[45]);
- x3[45] = _mm_sub_epi32(x2[34], x2[45]);
- x3[35] = _mm_add_epi32(x2[35], x2[44]);
- x3[44] = _mm_sub_epi32(x2[35], x2[44]);
- x3[36] = _mm_add_epi32(x2[36], x2[43]);
- x3[43] = _mm_sub_epi32(x2[36], x2[43]);
- x3[37] = _mm_add_epi32(x2[37], x2[42]);
- x3[42] = _mm_sub_epi32(x2[37], x2[42]);
- x3[38] = _mm_add_epi32(x2[38], x2[41]);
- x3[41] = _mm_sub_epi32(x2[38], x2[41]);
- x3[39] = _mm_add_epi32(x2[39], x2[40]);
- x3[40] = _mm_sub_epi32(x2[39], x2[40]);
- x3[48] = _mm_sub_epi32(x2[63], x2[48]);
- x3[63] = _mm_add_epi32(x2[63], x2[48]);
- x3[49] = _mm_sub_epi32(x2[62], x2[49]);
- x3[62] = _mm_add_epi32(x2[62], x2[49]);
- x3[50] = _mm_sub_epi32(x2[61], x2[50]);
- x3[61] = _mm_add_epi32(x2[61], x2[50]);
- x3[51] = _mm_sub_epi32(x2[60], x2[51]);
- x3[60] = _mm_add_epi32(x2[60], x2[51]);
- x3[52] = _mm_sub_epi32(x2[59], x2[52]);
- x3[59] = _mm_add_epi32(x2[59], x2[52]);
- x3[53] = _mm_sub_epi32(x2[58], x2[53]);
- x3[58] = _mm_add_epi32(x2[58], x2[53]);
- x3[54] = _mm_sub_epi32(x2[57], x2[54]);
- x3[57] = _mm_add_epi32(x2[57], x2[54]);
- x3[55] = _mm_sub_epi32(x2[56], x2[55]);
- x3[56] = _mm_add_epi32(x2[56], x2[55]);
-
- // stage 4
- __m128i x4[64];
- x4[0] = _mm_add_epi32(x3[0], x3[7]);
- x4[7] = _mm_sub_epi32(x3[0], x3[7]);
- x4[1] = _mm_add_epi32(x3[1], x3[6]);
- x4[6] = _mm_sub_epi32(x3[1], x3[6]);
- x4[2] = _mm_add_epi32(x3[2], x3[5]);
- x4[5] = _mm_sub_epi32(x3[2], x3[5]);
- x4[3] = _mm_add_epi32(x3[3], x3[4]);
- x4[4] = _mm_sub_epi32(x3[3], x3[4]);
- x4[8] = x3[8];
- x4[9] = x3[9];
- btf_32_type0_sse4_1_new(cospi_m32, cospi_p32, x3[10], x3[13], x4[10], x4[13],
- __rounding, cos_bit);
- btf_32_type0_sse4_1_new(cospi_m32, cospi_p32, x3[11], x3[12], x4[11], x4[12],
- __rounding, cos_bit);
- x4[14] = x3[14];
- x4[15] = x3[15];
- x4[16] = _mm_add_epi32(x3[16], x3[23]);
- x4[23] = _mm_sub_epi32(x3[16], x3[23]);
- x4[17] = _mm_add_epi32(x3[17], x3[22]);
- x4[22] = _mm_sub_epi32(x3[17], x3[22]);
- x4[18] = _mm_add_epi32(x3[18], x3[21]);
- x4[21] = _mm_sub_epi32(x3[18], x3[21]);
- x4[19] = _mm_add_epi32(x3[19], x3[20]);
- x4[20] = _mm_sub_epi32(x3[19], x3[20]);
- x4[24] = _mm_sub_epi32(x3[31], x3[24]);
- x4[31] = _mm_add_epi32(x3[31], x3[24]);
- x4[25] = _mm_sub_epi32(x3[30], x3[25]);
- x4[30] = _mm_add_epi32(x3[30], x3[25]);
- x4[26] = _mm_sub_epi32(x3[29], x3[26]);
- x4[29] = _mm_add_epi32(x3[29], x3[26]);
- x4[27] = _mm_sub_epi32(x3[28], x3[27]);
- x4[28] = _mm_add_epi32(x3[28], x3[27]);
- x4[32] = x3[32];
- x4[33] = x3[33];
- x4[34] = x3[34];
- x4[35] = x3[35];
- btf_32_type0_sse4_1_new(cospi_m16, cospi_p48, x3[36], x3[59], x4[36], x4[59],
- __rounding, cos_bit);
- btf_32_type0_sse4_1_new(cospi_m16, cospi_p48, x3[37], x3[58], x4[37], x4[58],
- __rounding, cos_bit);
- btf_32_type0_sse4_1_new(cospi_m16, cospi_p48, x3[38], x3[57], x4[38], x4[57],
- __rounding, cos_bit);
- btf_32_type0_sse4_1_new(cospi_m16, cospi_p48, x3[39], x3[56], x4[39], x4[56],
- __rounding, cos_bit);
- btf_32_type0_sse4_1_new(cospi_m48, cospi_m16, x3[40], x3[55], x4[40], x4[55],
- __rounding, cos_bit);
- btf_32_type0_sse4_1_new(cospi_m48, cospi_m16, x3[41], x3[54], x4[41], x4[54],
- __rounding, cos_bit);
- btf_32_type0_sse4_1_new(cospi_m48, cospi_m16, x3[42], x3[53], x4[42], x4[53],
- __rounding, cos_bit);
- btf_32_type0_sse4_1_new(cospi_m48, cospi_m16, x3[43], x3[52], x4[43], x4[52],
- __rounding, cos_bit);
- x4[44] = x3[44];
- x4[45] = x3[45];
- x4[46] = x3[46];
- x4[47] = x3[47];
- x4[48] = x3[48];
- x4[49] = x3[49];
- x4[50] = x3[50];
- x4[51] = x3[51];
- x4[60] = x3[60];
- x4[61] = x3[61];
- x4[62] = x3[62];
- x4[63] = x3[63];
-
- // stage 5
- __m128i x5[64];
- x5[0] = _mm_add_epi32(x4[0], x4[3]);
- x5[3] = _mm_sub_epi32(x4[0], x4[3]);
- x5[1] = _mm_add_epi32(x4[1], x4[2]);
- x5[2] = _mm_sub_epi32(x4[1], x4[2]);
- x5[4] = x4[4];
- btf_32_type0_sse4_1_new(cospi_m32, cospi_p32, x4[5], x4[6], x5[5], x5[6],
- __rounding, cos_bit);
- x5[7] = x4[7];
- x5[8] = _mm_add_epi32(x4[8], x4[11]);
- x5[11] = _mm_sub_epi32(x4[8], x4[11]);
- x5[9] = _mm_add_epi32(x4[9], x4[10]);
- x5[10] = _mm_sub_epi32(x4[9], x4[10]);
- x5[12] = _mm_sub_epi32(x4[15], x4[12]);
- x5[15] = _mm_add_epi32(x4[15], x4[12]);
- x5[13] = _mm_sub_epi32(x4[14], x4[13]);
- x5[14] = _mm_add_epi32(x4[14], x4[13]);
- x5[16] = x4[16];
- x5[17] = x4[17];
- btf_32_type0_sse4_1_new(cospi_m16, cospi_p48, x4[18], x4[29], x5[18], x5[29],
- __rounding, cos_bit);
- btf_32_type0_sse4_1_new(cospi_m16, cospi_p48, x4[19], x4[28], x5[19], x5[28],
- __rounding, cos_bit);
- btf_32_type0_sse4_1_new(cospi_m48, cospi_m16, x4[20], x4[27], x5[20], x5[27],
- __rounding, cos_bit);
- btf_32_type0_sse4_1_new(cospi_m48, cospi_m16, x4[21], x4[26], x5[21], x5[26],
- __rounding, cos_bit);
- x5[22] = x4[22];
- x5[23] = x4[23];
- x5[24] = x4[24];
- x5[25] = x4[25];
- x5[30] = x4[30];
- x5[31] = x4[31];
- x5[32] = _mm_add_epi32(x4[32], x4[39]);
- x5[39] = _mm_sub_epi32(x4[32], x4[39]);
- x5[33] = _mm_add_epi32(x4[33], x4[38]);
- x5[38] = _mm_sub_epi32(x4[33], x4[38]);
- x5[34] = _mm_add_epi32(x4[34], x4[37]);
- x5[37] = _mm_sub_epi32(x4[34], x4[37]);
- x5[35] = _mm_add_epi32(x4[35], x4[36]);
- x5[36] = _mm_sub_epi32(x4[35], x4[36]);
- x5[40] = _mm_sub_epi32(x4[47], x4[40]);
- x5[47] = _mm_add_epi32(x4[47], x4[40]);
- x5[41] = _mm_sub_epi32(x4[46], x4[41]);
- x5[46] = _mm_add_epi32(x4[46], x4[41]);
- x5[42] = _mm_sub_epi32(x4[45], x4[42]);
- x5[45] = _mm_add_epi32(x4[45], x4[42]);
- x5[43] = _mm_sub_epi32(x4[44], x4[43]);
- x5[44] = _mm_add_epi32(x4[44], x4[43]);
- x5[48] = _mm_add_epi32(x4[48], x4[55]);
- x5[55] = _mm_sub_epi32(x4[48], x4[55]);
- x5[49] = _mm_add_epi32(x4[49], x4[54]);
- x5[54] = _mm_sub_epi32(x4[49], x4[54]);
- x5[50] = _mm_add_epi32(x4[50], x4[53]);
- x5[53] = _mm_sub_epi32(x4[50], x4[53]);
- x5[51] = _mm_add_epi32(x4[51], x4[52]);
- x5[52] = _mm_sub_epi32(x4[51], x4[52]);
- x5[56] = _mm_sub_epi32(x4[63], x4[56]);
- x5[63] = _mm_add_epi32(x4[63], x4[56]);
- x5[57] = _mm_sub_epi32(x4[62], x4[57]);
- x5[62] = _mm_add_epi32(x4[62], x4[57]);
- x5[58] = _mm_sub_epi32(x4[61], x4[58]);
- x5[61] = _mm_add_epi32(x4[61], x4[58]);
- x5[59] = _mm_sub_epi32(x4[60], x4[59]);
- x5[60] = _mm_add_epi32(x4[60], x4[59]);
-
- // stage 6
- __m128i x6[64];
- btf_32_type0_sse4_1_new(cospi_p32, cospi_p32, x5[0], x5[1], x6[0], x6[1],
- __rounding, cos_bit);
- btf_32_type1_sse4_1_new(cospi_p48, cospi_p16, x5[2], x5[3], x6[2], x6[3],
- __rounding, cos_bit);
- x6[4] = _mm_add_epi32(x5[4], x5[5]);
- x6[5] = _mm_sub_epi32(x5[4], x5[5]);
- x6[6] = _mm_sub_epi32(x5[7], x5[6]);
- x6[7] = _mm_add_epi32(x5[7], x5[6]);
- x6[8] = x5[8];
- btf_32_type0_sse4_1_new(cospi_m16, cospi_p48, x5[9], x5[14], x6[9], x6[14],
- __rounding, cos_bit);
- btf_32_type0_sse4_1_new(cospi_m48, cospi_m16, x5[10], x5[13], x6[10], x6[13],
- __rounding, cos_bit);
- x6[11] = x5[11];
- x6[12] = x5[12];
- x6[15] = x5[15];
- x6[16] = _mm_add_epi32(x5[16], x5[19]);
- x6[19] = _mm_sub_epi32(x5[16], x5[19]);
- x6[17] = _mm_add_epi32(x5[17], x5[18]);
- x6[18] = _mm_sub_epi32(x5[17], x5[18]);
- x6[20] = _mm_sub_epi32(x5[23], x5[20]);
- x6[23] = _mm_add_epi32(x5[23], x5[20]);
- x6[21] = _mm_sub_epi32(x5[22], x5[21]);
- x6[22] = _mm_add_epi32(x5[22], x5[21]);
- x6[24] = _mm_add_epi32(x5[24], x5[27]);
- x6[27] = _mm_sub_epi32(x5[24], x5[27]);
- x6[25] = _mm_add_epi32(x5[25], x5[26]);
- x6[26] = _mm_sub_epi32(x5[25], x5[26]);
- x6[28] = _mm_sub_epi32(x5[31], x5[28]);
- x6[31] = _mm_add_epi32(x5[31], x5[28]);
- x6[29] = _mm_sub_epi32(x5[30], x5[29]);
- x6[30] = _mm_add_epi32(x5[30], x5[29]);
- x6[32] = x5[32];
- x6[33] = x5[33];
- btf_32_type0_sse4_1_new(cospi_m08, cospi_p56, x5[34], x5[61], x6[34], x6[61],
- __rounding, cos_bit);
- btf_32_type0_sse4_1_new(cospi_m08, cospi_p56, x5[35], x5[60], x6[35], x6[60],
- __rounding, cos_bit);
- btf_32_type0_sse4_1_new(cospi_m56, cospi_m08, x5[36], x5[59], x6[36], x6[59],
- __rounding, cos_bit);
- btf_32_type0_sse4_1_new(cospi_m56, cospi_m08, x5[37], x5[58], x6[37], x6[58],
- __rounding, cos_bit);
- x6[38] = x5[38];
- x6[39] = x5[39];
- x6[40] = x5[40];
- x6[41] = x5[41];
- btf_32_type0_sse4_1_new(cospi_m40, cospi_p24, x5[42], x5[53], x6[42], x6[53],
- __rounding, cos_bit);
- btf_32_type0_sse4_1_new(cospi_m40, cospi_p24, x5[43], x5[52], x6[43], x6[52],
- __rounding, cos_bit);
- btf_32_type0_sse4_1_new(cospi_m24, cospi_m40, x5[44], x5[51], x6[44], x6[51],
- __rounding, cos_bit);
- btf_32_type0_sse4_1_new(cospi_m24, cospi_m40, x5[45], x5[50], x6[45], x6[50],
- __rounding, cos_bit);
- x6[46] = x5[46];
- x6[47] = x5[47];
- x6[48] = x5[48];
- x6[49] = x5[49];
- x6[54] = x5[54];
- x6[55] = x5[55];
- x6[56] = x5[56];
- x6[57] = x5[57];
- x6[62] = x5[62];
- x6[63] = x5[63];
-
- // stage 7
- __m128i x7[64];
- x7[0] = x6[0];
- x7[1] = x6[1];
- x7[2] = x6[2];
- x7[3] = x6[3];
- btf_32_type1_sse4_1_new(cospi_p56, cospi_p08, x6[4], x6[7], x7[4], x7[7],
- __rounding, cos_bit);
- btf_32_type1_sse4_1_new(cospi_p24, cospi_p40, x6[5], x6[6], x7[5], x7[6],
- __rounding, cos_bit);
- x7[8] = _mm_add_epi32(x6[8], x6[9]);
- x7[9] = _mm_sub_epi32(x6[8], x6[9]);
- x7[10] = _mm_sub_epi32(x6[11], x6[10]);
- x7[11] = _mm_add_epi32(x6[11], x6[10]);
- x7[12] = _mm_add_epi32(x6[12], x6[13]);
- x7[13] = _mm_sub_epi32(x6[12], x6[13]);
- x7[14] = _mm_sub_epi32(x6[15], x6[14]);
- x7[15] = _mm_add_epi32(x6[15], x6[14]);
- x7[16] = x6[16];
- btf_32_type0_sse4_1_new(cospi_m08, cospi_p56, x6[17], x6[30], x7[17], x7[30],
- __rounding, cos_bit);
- btf_32_type0_sse4_1_new(cospi_m56, cospi_m08, x6[18], x6[29], x7[18], x7[29],
- __rounding, cos_bit);
- x7[19] = x6[19];
- x7[20] = x6[20];
- btf_32_type0_sse4_1_new(cospi_m40, cospi_p24, x6[21], x6[26], x7[21], x7[26],
- __rounding, cos_bit);
- btf_32_type0_sse4_1_new(cospi_m24, cospi_m40, x6[22], x6[25], x7[22], x7[25],
- __rounding, cos_bit);
- x7[23] = x6[23];
- x7[24] = x6[24];
- x7[27] = x6[27];
- x7[28] = x6[28];
- x7[31] = x6[31];
- x7[32] = _mm_add_epi32(x6[32], x6[35]);
- x7[35] = _mm_sub_epi32(x6[32], x6[35]);
- x7[33] = _mm_add_epi32(x6[33], x6[34]);
- x7[34] = _mm_sub_epi32(x6[33], x6[34]);
- x7[36] = _mm_sub_epi32(x6[39], x6[36]);
- x7[39] = _mm_add_epi32(x6[39], x6[36]);
- x7[37] = _mm_sub_epi32(x6[38], x6[37]);
- x7[38] = _mm_add_epi32(x6[38], x6[37]);
- x7[40] = _mm_add_epi32(x6[40], x6[43]);
- x7[43] = _mm_sub_epi32(x6[40], x6[43]);
- x7[41] = _mm_add_epi32(x6[41], x6[42]);
- x7[42] = _mm_sub_epi32(x6[41], x6[42]);
- x7[44] = _mm_sub_epi32(x6[47], x6[44]);
- x7[47] = _mm_add_epi32(x6[47], x6[44]);
- x7[45] = _mm_sub_epi32(x6[46], x6[45]);
- x7[46] = _mm_add_epi32(x6[46], x6[45]);
- x7[48] = _mm_add_epi32(x6[48], x6[51]);
- x7[51] = _mm_sub_epi32(x6[48], x6[51]);
- x7[49] = _mm_add_epi32(x6[49], x6[50]);
- x7[50] = _mm_sub_epi32(x6[49], x6[50]);
- x7[52] = _mm_sub_epi32(x6[55], x6[52]);
- x7[55] = _mm_add_epi32(x6[55], x6[52]);
- x7[53] = _mm_sub_epi32(x6[54], x6[53]);
- x7[54] = _mm_add_epi32(x6[54], x6[53]);
- x7[56] = _mm_add_epi32(x6[56], x6[59]);
- x7[59] = _mm_sub_epi32(x6[56], x6[59]);
- x7[57] = _mm_add_epi32(x6[57], x6[58]);
- x7[58] = _mm_sub_epi32(x6[57], x6[58]);
- x7[60] = _mm_sub_epi32(x6[63], x6[60]);
- x7[63] = _mm_add_epi32(x6[63], x6[60]);
- x7[61] = _mm_sub_epi32(x6[62], x6[61]);
- x7[62] = _mm_add_epi32(x6[62], x6[61]);
-
- // stage 8
- __m128i x8[64];
- x8[0] = x7[0];
- x8[1] = x7[1];
- x8[2] = x7[2];
- x8[3] = x7[3];
- x8[4] = x7[4];
- x8[5] = x7[5];
- x8[6] = x7[6];
- x8[7] = x7[7];
- btf_32_type1_sse4_1_new(cospi_p60, cospi_p04, x7[8], x7[15], x8[8], x8[15],
- __rounding, cos_bit);
- btf_32_type1_sse4_1_new(cospi_p28, cospi_p36, x7[9], x7[14], x8[9], x8[14],
- __rounding, cos_bit);
- btf_32_type1_sse4_1_new(cospi_p44, cospi_p20, x7[10], x7[13], x8[10], x8[13],
- __rounding, cos_bit);
- btf_32_type1_sse4_1_new(cospi_p12, cospi_p52, x7[11], x7[12], x8[11], x8[12],
- __rounding, cos_bit);
- x8[16] = _mm_add_epi32(x7[16], x7[17]);
- x8[17] = _mm_sub_epi32(x7[16], x7[17]);
- x8[18] = _mm_sub_epi32(x7[19], x7[18]);
- x8[19] = _mm_add_epi32(x7[19], x7[18]);
- x8[20] = _mm_add_epi32(x7[20], x7[21]);
- x8[21] = _mm_sub_epi32(x7[20], x7[21]);
- x8[22] = _mm_sub_epi32(x7[23], x7[22]);
- x8[23] = _mm_add_epi32(x7[23], x7[22]);
- x8[24] = _mm_add_epi32(x7[24], x7[25]);
- x8[25] = _mm_sub_epi32(x7[24], x7[25]);
- x8[26] = _mm_sub_epi32(x7[27], x7[26]);
- x8[27] = _mm_add_epi32(x7[27], x7[26]);
- x8[28] = _mm_add_epi32(x7[28], x7[29]);
- x8[29] = _mm_sub_epi32(x7[28], x7[29]);
- x8[30] = _mm_sub_epi32(x7[31], x7[30]);
- x8[31] = _mm_add_epi32(x7[31], x7[30]);
- x8[32] = x7[32];
- btf_32_type0_sse4_1_new(cospi_m04, cospi_p60, x7[33], x7[62], x8[33], x8[62],
- __rounding, cos_bit);
- btf_32_type0_sse4_1_new(cospi_m60, cospi_m04, x7[34], x7[61], x8[34], x8[61],
- __rounding, cos_bit);
- x8[35] = x7[35];
- x8[36] = x7[36];
- btf_32_type0_sse4_1_new(cospi_m36, cospi_p28, x7[37], x7[58], x8[37], x8[58],
- __rounding, cos_bit);
- btf_32_type0_sse4_1_new(cospi_m28, cospi_m36, x7[38], x7[57], x8[38], x8[57],
- __rounding, cos_bit);
- x8[39] = x7[39];
- x8[40] = x7[40];
- btf_32_type0_sse4_1_new(cospi_m20, cospi_p44, x7[41], x7[54], x8[41], x8[54],
- __rounding, cos_bit);
- btf_32_type0_sse4_1_new(cospi_m44, cospi_m20, x7[42], x7[53], x8[42], x8[53],
- __rounding, cos_bit);
- x8[43] = x7[43];
- x8[44] = x7[44];
- btf_32_type0_sse4_1_new(cospi_m52, cospi_p12, x7[45], x7[50], x8[45], x8[50],
- __rounding, cos_bit);
- btf_32_type0_sse4_1_new(cospi_m12, cospi_m52, x7[46], x7[49], x8[46], x8[49],
- __rounding, cos_bit);
- x8[47] = x7[47];
- x8[48] = x7[48];
- x8[51] = x7[51];
- x8[52] = x7[52];
- x8[55] = x7[55];
- x8[56] = x7[56];
- x8[59] = x7[59];
- x8[60] = x7[60];
- x8[63] = x7[63];
-
- // stage 9
- __m128i x9[64];
- x9[0] = x8[0];
- x9[1] = x8[1];
- x9[2] = x8[2];
- x9[3] = x8[3];
- x9[4] = x8[4];
- x9[5] = x8[5];
- x9[6] = x8[6];
- x9[7] = x8[7];
- x9[8] = x8[8];
- x9[9] = x8[9];
- x9[10] = x8[10];
- x9[11] = x8[11];
- x9[12] = x8[12];
- x9[13] = x8[13];
- x9[14] = x8[14];
- x9[15] = x8[15];
- btf_32_type1_sse4_1_new(cospi_p62, cospi_p02, x8[16], x8[31], x9[16], x9[31],
- __rounding, cos_bit);
- btf_32_type1_sse4_1_new(cospi_p30, cospi_p34, x8[17], x8[30], x9[17], x9[30],
- __rounding, cos_bit);
- btf_32_type1_sse4_1_new(cospi_p46, cospi_p18, x8[18], x8[29], x9[18], x9[29],
- __rounding, cos_bit);
- btf_32_type1_sse4_1_new(cospi_p14, cospi_p50, x8[19], x8[28], x9[19], x9[28],
- __rounding, cos_bit);
- btf_32_type1_sse4_1_new(cospi_p54, cospi_p10, x8[20], x8[27], x9[20], x9[27],
- __rounding, cos_bit);
- btf_32_type1_sse4_1_new(cospi_p22, cospi_p42, x8[21], x8[26], x9[21], x9[26],
- __rounding, cos_bit);
- btf_32_type1_sse4_1_new(cospi_p38, cospi_p26, x8[22], x8[25], x9[22], x9[25],
- __rounding, cos_bit);
- btf_32_type1_sse4_1_new(cospi_p06, cospi_p58, x8[23], x8[24], x9[23], x9[24],
- __rounding, cos_bit);
- x9[32] = _mm_add_epi32(x8[32], x8[33]);
- x9[33] = _mm_sub_epi32(x8[32], x8[33]);
- x9[34] = _mm_sub_epi32(x8[35], x8[34]);
- x9[35] = _mm_add_epi32(x8[35], x8[34]);
- x9[36] = _mm_add_epi32(x8[36], x8[37]);
- x9[37] = _mm_sub_epi32(x8[36], x8[37]);
- x9[38] = _mm_sub_epi32(x8[39], x8[38]);
- x9[39] = _mm_add_epi32(x8[39], x8[38]);
- x9[40] = _mm_add_epi32(x8[40], x8[41]);
- x9[41] = _mm_sub_epi32(x8[40], x8[41]);
- x9[42] = _mm_sub_epi32(x8[43], x8[42]);
- x9[43] = _mm_add_epi32(x8[43], x8[42]);
- x9[44] = _mm_add_epi32(x8[44], x8[45]);
- x9[45] = _mm_sub_epi32(x8[44], x8[45]);
- x9[46] = _mm_sub_epi32(x8[47], x8[46]);
- x9[47] = _mm_add_epi32(x8[47], x8[46]);
- x9[48] = _mm_add_epi32(x8[48], x8[49]);
- x9[49] = _mm_sub_epi32(x8[48], x8[49]);
- x9[50] = _mm_sub_epi32(x8[51], x8[50]);
- x9[51] = _mm_add_epi32(x8[51], x8[50]);
- x9[52] = _mm_add_epi32(x8[52], x8[53]);
- x9[53] = _mm_sub_epi32(x8[52], x8[53]);
- x9[54] = _mm_sub_epi32(x8[55], x8[54]);
- x9[55] = _mm_add_epi32(x8[55], x8[54]);
- x9[56] = _mm_add_epi32(x8[56], x8[57]);
- x9[57] = _mm_sub_epi32(x8[56], x8[57]);
- x9[58] = _mm_sub_epi32(x8[59], x8[58]);
- x9[59] = _mm_add_epi32(x8[59], x8[58]);
- x9[60] = _mm_add_epi32(x8[60], x8[61]);
- x9[61] = _mm_sub_epi32(x8[60], x8[61]);
- x9[62] = _mm_sub_epi32(x8[63], x8[62]);
- x9[63] = _mm_add_epi32(x8[63], x8[62]);
-
- // stage 10
- __m128i x10[64];
- x10[0] = x9[0];
- x10[1] = x9[1];
- x10[2] = x9[2];
- x10[3] = x9[3];
- x10[4] = x9[4];
- x10[5] = x9[5];
- x10[6] = x9[6];
- x10[7] = x9[7];
- x10[8] = x9[8];
- x10[9] = x9[9];
- x10[10] = x9[10];
- x10[11] = x9[11];
- x10[12] = x9[12];
- x10[13] = x9[13];
- x10[14] = x9[14];
- x10[15] = x9[15];
- x10[16] = x9[16];
- x10[17] = x9[17];
- x10[18] = x9[18];
- x10[19] = x9[19];
- x10[20] = x9[20];
- x10[21] = x9[21];
- x10[22] = x9[22];
- x10[23] = x9[23];
- x10[24] = x9[24];
- x10[25] = x9[25];
- x10[26] = x9[26];
- x10[27] = x9[27];
- x10[28] = x9[28];
- x10[29] = x9[29];
- x10[30] = x9[30];
- x10[31] = x9[31];
- btf_32_type1_sse4_1_new(cospi_p63, cospi_p01, x9[32], x9[63], x10[32],
- x10[63], __rounding, cos_bit);
- btf_32_type1_sse4_1_new(cospi_p31, cospi_p33, x9[33], x9[62], x10[33],
- x10[62], __rounding, cos_bit);
- btf_32_type1_sse4_1_new(cospi_p47, cospi_p17, x9[34], x9[61], x10[34],
- x10[61], __rounding, cos_bit);
- btf_32_type1_sse4_1_new(cospi_p15, cospi_p49, x9[35], x9[60], x10[35],
- x10[60], __rounding, cos_bit);
- btf_32_type1_sse4_1_new(cospi_p55, cospi_p09, x9[36], x9[59], x10[36],
- x10[59], __rounding, cos_bit);
- btf_32_type1_sse4_1_new(cospi_p23, cospi_p41, x9[37], x9[58], x10[37],
- x10[58], __rounding, cos_bit);
- btf_32_type1_sse4_1_new(cospi_p39, cospi_p25, x9[38], x9[57], x10[38],
- x10[57], __rounding, cos_bit);
- btf_32_type1_sse4_1_new(cospi_p07, cospi_p57, x9[39], x9[56], x10[39],
- x10[56], __rounding, cos_bit);
- btf_32_type1_sse4_1_new(cospi_p59, cospi_p05, x9[40], x9[55], x10[40],
- x10[55], __rounding, cos_bit);
- btf_32_type1_sse4_1_new(cospi_p27, cospi_p37, x9[41], x9[54], x10[41],
- x10[54], __rounding, cos_bit);
- btf_32_type1_sse4_1_new(cospi_p43, cospi_p21, x9[42], x9[53], x10[42],
- x10[53], __rounding, cos_bit);
- btf_32_type1_sse4_1_new(cospi_p11, cospi_p53, x9[43], x9[52], x10[43],
- x10[52], __rounding, cos_bit);
- btf_32_type1_sse4_1_new(cospi_p51, cospi_p13, x9[44], x9[51], x10[44],
- x10[51], __rounding, cos_bit);
- btf_32_type1_sse4_1_new(cospi_p19, cospi_p45, x9[45], x9[50], x10[45],
- x10[50], __rounding, cos_bit);
- btf_32_type1_sse4_1_new(cospi_p35, cospi_p29, x9[46], x9[49], x10[46],
- x10[49], __rounding, cos_bit);
- btf_32_type1_sse4_1_new(cospi_p03, cospi_p61, x9[47], x9[48], x10[47],
- x10[48], __rounding, cos_bit);
-
- // stage 11
- output[0 * outstride] = x10[0];
- output[1 * outstride] = x10[32];
- output[2 * outstride] = x10[16];
- output[3 * outstride] = x10[48];
- output[4 * outstride] = x10[8];
- output[5 * outstride] = x10[40];
- output[6 * outstride] = x10[24];
- output[7 * outstride] = x10[56];
- output[8 * outstride] = x10[4];
- output[9 * outstride] = x10[36];
- output[10 * outstride] = x10[20];
- output[11 * outstride] = x10[52];
- output[12 * outstride] = x10[12];
- output[13 * outstride] = x10[44];
- output[14 * outstride] = x10[28];
- output[15 * outstride] = x10[60];
- output[16 * outstride] = x10[2];
- output[17 * outstride] = x10[34];
- output[18 * outstride] = x10[18];
- output[19 * outstride] = x10[50];
- output[20 * outstride] = x10[10];
- output[21 * outstride] = x10[42];
- output[22 * outstride] = x10[26];
- output[23 * outstride] = x10[58];
- output[24 * outstride] = x10[6];
- output[25 * outstride] = x10[38];
- output[26 * outstride] = x10[22];
- output[27 * outstride] = x10[54];
- output[28 * outstride] = x10[14];
- output[29 * outstride] = x10[46];
- output[30 * outstride] = x10[30];
- output[31 * outstride] = x10[62];
- output[32 * outstride] = x10[1];
- output[33 * outstride] = x10[33];
- output[34 * outstride] = x10[17];
- output[35 * outstride] = x10[49];
- output[36 * outstride] = x10[9];
- output[37 * outstride] = x10[41];
- output[38 * outstride] = x10[25];
- output[39 * outstride] = x10[57];
- output[40 * outstride] = x10[5];
- output[41 * outstride] = x10[37];
- output[42 * outstride] = x10[21];
- output[43 * outstride] = x10[53];
- output[44 * outstride] = x10[13];
- output[45 * outstride] = x10[45];
- output[46 * outstride] = x10[29];
- output[47 * outstride] = x10[61];
- output[48 * outstride] = x10[3];
- output[49 * outstride] = x10[35];
- output[50 * outstride] = x10[19];
- output[51 * outstride] = x10[51];
- output[52 * outstride] = x10[11];
- output[53 * outstride] = x10[43];
- output[54 * outstride] = x10[27];
- output[55 * outstride] = x10[59];
- output[56 * outstride] = x10[7];
- output[57 * outstride] = x10[39];
- output[58 * outstride] = x10[23];
- output[59 * outstride] = x10[55];
- output[60 * outstride] = x10[15];
- output[61 * outstride] = x10[47];
- output[62 * outstride] = x10[31];
- output[63 * outstride] = x10[63];
-}
diff --git a/third_party/aom/av1/encoder/x86/av1_fwd_txfm2d_avx2.c b/third_party/aom/av1/encoder/x86/av1_fwd_txfm2d_avx2.c
deleted file mode 100644
index 592462e20..000000000
--- a/third_party/aom/av1/encoder/x86/av1_fwd_txfm2d_avx2.c
+++ /dev/null
@@ -1,2068 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "config/av1_rtcd.h"
-
-#include "av1/common/enums.h"
-#include "av1/common/av1_txfm.h"
-#include "av1/encoder/x86/av1_fwd_txfm_avx2.h"
-#include "av1/common/x86/av1_txfm_sse2.h"
-#include "av1/encoder/av1_fwd_txfm1d_cfg.h"
-#include "av1/encoder/x86/av1_txfm1d_sse4.h"
-#include "av1/encoder/x86/av1_fwd_txfm_sse2.h"
-#include "aom_dsp/x86/txfm_common_avx2.h"
-
-static INLINE void fdct16x16_new_avx2(const __m256i *input, __m256i *output,
- int8_t cos_bit) {
- const int32_t *cospi = cospi_arr(cos_bit);
- const __m256i _r = _mm256_set1_epi32(1 << (cos_bit - 1));
-
- __m256i cospi_m32_p32 = pair_set_w16_epi16(-cospi[32], cospi[32]);
- __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]);
- __m256i cospi_p32_m32 = pair_set_w16_epi16(cospi[32], -cospi[32]);
- __m256i cospi_p48_p16 = pair_set_w16_epi16(cospi[48], cospi[16]);
- __m256i cospi_m16_p48 = pair_set_w16_epi16(-cospi[16], cospi[48]);
- __m256i cospi_m48_m16 = pair_set_w16_epi16(-cospi[48], -cospi[16]);
- __m256i cospi_p56_p08 = pair_set_w16_epi16(cospi[56], cospi[8]);
- __m256i cospi_m08_p56 = pair_set_w16_epi16(-cospi[8], cospi[56]);
- __m256i cospi_p24_p40 = pair_set_w16_epi16(cospi[24], cospi[40]);
- __m256i cospi_m40_p24 = pair_set_w16_epi16(-cospi[40], cospi[24]);
- __m256i cospi_p60_p04 = pair_set_w16_epi16(cospi[60], cospi[4]);
- __m256i cospi_m04_p60 = pair_set_w16_epi16(-cospi[4], cospi[60]);
- __m256i cospi_p28_p36 = pair_set_w16_epi16(cospi[28], cospi[36]);
- __m256i cospi_m36_p28 = pair_set_w16_epi16(-cospi[36], cospi[28]);
- __m256i cospi_p44_p20 = pair_set_w16_epi16(cospi[44], cospi[20]);
- __m256i cospi_m20_p44 = pair_set_w16_epi16(-cospi[20], cospi[44]);
- __m256i cospi_p12_p52 = pair_set_w16_epi16(cospi[12], cospi[52]);
- __m256i cospi_m52_p12 = pair_set_w16_epi16(-cospi[52], cospi[12]);
-
- // stage 1
- __m256i x1[16];
- btf_16_adds_subs_out_avx2(&x1[0], &x1[15], input[0], input[15]);
- btf_16_adds_subs_out_avx2(&x1[1], &x1[14], input[1], input[14]);
- btf_16_adds_subs_out_avx2(&x1[2], &x1[13], input[2], input[13]);
- btf_16_adds_subs_out_avx2(&x1[3], &x1[12], input[3], input[12]);
- btf_16_adds_subs_out_avx2(&x1[4], &x1[11], input[4], input[11]);
- btf_16_adds_subs_out_avx2(&x1[5], &x1[10], input[5], input[10]);
- btf_16_adds_subs_out_avx2(&x1[6], &x1[9], input[6], input[9]);
- btf_16_adds_subs_out_avx2(&x1[7], &x1[8], input[7], input[8]);
-
- // stage 2
- btf_16_adds_subs_avx2(&x1[0], &x1[7]);
- btf_16_adds_subs_avx2(&x1[1], &x1[6]);
- btf_16_adds_subs_avx2(&x1[2], &x1[5]);
- btf_16_adds_subs_avx2(&x1[3], &x1[4]);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[10], &x1[13], _r, cos_bit);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[11], &x1[12], _r, cos_bit);
-
- // stage 3
- btf_16_adds_subs_avx2(&x1[0], &x1[3]);
- btf_16_adds_subs_avx2(&x1[1], &x1[2]);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[5], &x1[6], _r, cos_bit);
- btf_16_adds_subs_avx2(&x1[8], &x1[11]);
- btf_16_adds_subs_avx2(&x1[9], &x1[10]);
- btf_16_adds_subs_avx2(&x1[15], &x1[12]);
- btf_16_adds_subs_avx2(&x1[14], &x1[13]);
-
- // stage 4
- btf_16_w16_avx2(cospi_p32_p32, cospi_p32_m32, &x1[0], &x1[1], _r, cos_bit);
- btf_16_w16_avx2(cospi_p48_p16, cospi_m16_p48, &x1[2], &x1[3], _r, cos_bit);
- btf_16_adds_subs_avx2(&x1[4], &x1[5]);
- btf_16_adds_subs_avx2(&x1[7], &x1[6]);
- btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x1[9], &x1[14], _r, cos_bit);
- btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x1[10], &x1[13], _r, cos_bit);
-
- // stage 5
- btf_16_w16_avx2(cospi_p56_p08, cospi_m08_p56, &x1[4], &x1[7], _r, cos_bit);
- btf_16_w16_avx2(cospi_p24_p40, cospi_m40_p24, &x1[5], &x1[6], _r, cos_bit);
- btf_16_adds_subs_avx2(&x1[8], &x1[9]);
- btf_16_adds_subs_avx2(&x1[11], &x1[10]);
- btf_16_adds_subs_avx2(&x1[12], &x1[13]);
- btf_16_adds_subs_avx2(&x1[15], &x1[14]);
-
- // stage 6
- btf_16_w16_avx2(cospi_p60_p04, cospi_m04_p60, &x1[8], &x1[15], _r, cos_bit);
- btf_16_w16_avx2(cospi_p28_p36, cospi_m36_p28, &x1[9], &x1[14], _r, cos_bit);
- btf_16_w16_avx2(cospi_p44_p20, cospi_m20_p44, &x1[10], &x1[13], _r, cos_bit);
- btf_16_w16_avx2(cospi_p12_p52, cospi_m52_p12, &x1[11], &x1[12], _r, cos_bit);
-
- // stage 7
- output[0] = x1[0];
- output[1] = x1[8];
- output[2] = x1[4];
- output[3] = x1[12];
- output[4] = x1[2];
- output[5] = x1[10];
- output[6] = x1[6];
- output[7] = x1[14];
- output[8] = x1[1];
- output[9] = x1[9];
- output[10] = x1[5];
- output[11] = x1[13];
- output[12] = x1[3];
- output[13] = x1[11];
- output[14] = x1[7];
- output[15] = x1[15];
-}
-
-static INLINE void fdct16x32_new_avx2(const __m256i *input, __m256i *output,
- int8_t cos_bit) {
- const int32_t *cospi = cospi_arr(cos_bit);
- const __m256i _r = _mm256_set1_epi32(1 << (cos_bit - 1));
-
- __m256i cospi_m32_p32 = pair_set_w16_epi16(-cospi[32], cospi[32]);
- __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]);
- __m256i cospi_m16_p48 = pair_set_w16_epi16(-cospi[16], cospi[48]);
- __m256i cospi_p48_p16 = pair_set_w16_epi16(cospi[48], cospi[16]);
- __m256i cospi_m48_m16 = pair_set_w16_epi16(-cospi[48], -cospi[16]);
- __m256i cospi_p32_m32 = pair_set_w16_epi16(cospi[32], -cospi[32]);
- __m256i cospi_p56_p08 = pair_set_w16_epi16(cospi[56], cospi[8]);
- __m256i cospi_m08_p56 = pair_set_w16_epi16(-cospi[8], cospi[56]);
- __m256i cospi_p24_p40 = pair_set_w16_epi16(cospi[24], cospi[40]);
- __m256i cospi_m40_p24 = pair_set_w16_epi16(-cospi[40], cospi[24]);
- __m256i cospi_m56_m08 = pair_set_w16_epi16(-cospi[56], -cospi[8]);
- __m256i cospi_m24_m40 = pair_set_w16_epi16(-cospi[24], -cospi[40]);
- __m256i cospi_p60_p04 = pair_set_w16_epi16(cospi[60], cospi[4]);
- __m256i cospi_m04_p60 = pair_set_w16_epi16(-cospi[4], cospi[60]);
- __m256i cospi_p28_p36 = pair_set_w16_epi16(cospi[28], cospi[36]);
- __m256i cospi_m36_p28 = pair_set_w16_epi16(-cospi[36], cospi[28]);
- __m256i cospi_p44_p20 = pair_set_w16_epi16(cospi[44], cospi[20]);
- __m256i cospi_m20_p44 = pair_set_w16_epi16(-cospi[20], cospi[44]);
- __m256i cospi_p12_p52 = pair_set_w16_epi16(cospi[12], cospi[52]);
- __m256i cospi_m52_p12 = pair_set_w16_epi16(-cospi[52], cospi[12]);
- __m256i cospi_p62_p02 = pair_set_w16_epi16(cospi[62], cospi[2]);
- __m256i cospi_m02_p62 = pair_set_w16_epi16(-cospi[2], cospi[62]);
- __m256i cospi_p30_p34 = pair_set_w16_epi16(cospi[30], cospi[34]);
- __m256i cospi_m34_p30 = pair_set_w16_epi16(-cospi[34], cospi[30]);
- __m256i cospi_p46_p18 = pair_set_w16_epi16(cospi[46], cospi[18]);
- __m256i cospi_m18_p46 = pair_set_w16_epi16(-cospi[18], cospi[46]);
- __m256i cospi_p14_p50 = pair_set_w16_epi16(cospi[14], cospi[50]);
- __m256i cospi_m50_p14 = pair_set_w16_epi16(-cospi[50], cospi[14]);
- __m256i cospi_p54_p10 = pair_set_w16_epi16(cospi[54], cospi[10]);
- __m256i cospi_m10_p54 = pair_set_w16_epi16(-cospi[10], cospi[54]);
- __m256i cospi_p22_p42 = pair_set_w16_epi16(cospi[22], cospi[42]);
- __m256i cospi_m42_p22 = pair_set_w16_epi16(-cospi[42], cospi[22]);
- __m256i cospi_p38_p26 = pair_set_w16_epi16(cospi[38], cospi[26]);
- __m256i cospi_m26_p38 = pair_set_w16_epi16(-cospi[26], cospi[38]);
- __m256i cospi_p06_p58 = pair_set_w16_epi16(cospi[6], cospi[58]);
- __m256i cospi_m58_p06 = pair_set_w16_epi16(-cospi[58], cospi[6]);
-
- // stage 1
- __m256i x1[32];
- btf_16_adds_subs_out_avx2(&x1[0], &x1[31], input[0], input[31]);
- btf_16_adds_subs_out_avx2(&x1[1], &x1[30], input[1], input[30]);
- btf_16_adds_subs_out_avx2(&x1[2], &x1[29], input[2], input[29]);
- btf_16_adds_subs_out_avx2(&x1[3], &x1[28], input[3], input[28]);
- btf_16_adds_subs_out_avx2(&x1[4], &x1[27], input[4], input[27]);
- btf_16_adds_subs_out_avx2(&x1[5], &x1[26], input[5], input[26]);
- btf_16_adds_subs_out_avx2(&x1[6], &x1[25], input[6], input[25]);
- btf_16_adds_subs_out_avx2(&x1[7], &x1[24], input[7], input[24]);
- btf_16_adds_subs_out_avx2(&x1[8], &x1[23], input[8], input[23]);
- btf_16_adds_subs_out_avx2(&x1[9], &x1[22], input[9], input[22]);
- btf_16_adds_subs_out_avx2(&x1[10], &x1[21], input[10], input[21]);
- btf_16_adds_subs_out_avx2(&x1[11], &x1[20], input[11], input[20]);
- btf_16_adds_subs_out_avx2(&x1[12], &x1[19], input[12], input[19]);
- btf_16_adds_subs_out_avx2(&x1[13], &x1[18], input[13], input[18]);
- btf_16_adds_subs_out_avx2(&x1[14], &x1[17], input[14], input[17]);
- btf_16_adds_subs_out_avx2(&x1[15], &x1[16], input[15], input[16]);
-
- // stage 2
- btf_16_adds_subs_avx2(&x1[0], &x1[15]);
- btf_16_adds_subs_avx2(&x1[1], &x1[14]);
- btf_16_adds_subs_avx2(&x1[2], &x1[13]);
- btf_16_adds_subs_avx2(&x1[3], &x1[12]);
- btf_16_adds_subs_avx2(&x1[4], &x1[11]);
- btf_16_adds_subs_avx2(&x1[5], &x1[10]);
- btf_16_adds_subs_avx2(&x1[6], &x1[9]);
- btf_16_adds_subs_avx2(&x1[7], &x1[8]);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[20], &x1[27], _r, cos_bit);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[21], &x1[26], _r, cos_bit);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[22], &x1[25], _r, cos_bit);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[23], &x1[24], _r, cos_bit);
-
- // stage 3
- btf_16_adds_subs_avx2(&x1[0], &x1[7]);
- btf_16_adds_subs_avx2(&x1[1], &x1[6]);
- btf_16_adds_subs_avx2(&x1[2], &x1[5]);
- btf_16_adds_subs_avx2(&x1[3], &x1[4]);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[10], &x1[13], _r, cos_bit);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[11], &x1[12], _r, cos_bit);
- btf_16_adds_subs_avx2(&x1[16], &x1[23]);
- btf_16_adds_subs_avx2(&x1[17], &x1[22]);
- btf_16_adds_subs_avx2(&x1[18], &x1[21]);
- btf_16_adds_subs_avx2(&x1[19], &x1[20]);
- btf_16_adds_subs_avx2(&x1[31], &x1[24]);
- btf_16_adds_subs_avx2(&x1[30], &x1[25]);
- btf_16_adds_subs_avx2(&x1[29], &x1[26]);
- btf_16_adds_subs_avx2(&x1[28], &x1[27]);
-
- // stage 4
- btf_16_adds_subs_avx2(&x1[0], &x1[3]);
- btf_16_adds_subs_avx2(&x1[1], &x1[2]);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[5], &x1[6], _r, cos_bit);
- btf_16_adds_subs_avx2(&x1[8], &x1[11]);
- btf_16_adds_subs_avx2(&x1[9], &x1[10]);
- btf_16_adds_subs_avx2(&x1[15], &x1[12]);
- btf_16_adds_subs_avx2(&x1[14], &x1[13]);
- btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x1[18], &x1[29], _r, cos_bit);
- btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x1[19], &x1[28], _r, cos_bit);
- btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x1[20], &x1[27], _r, cos_bit);
- btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x1[21], &x1[26], _r, cos_bit);
-
- // stage 5
- btf_16_w16_avx2(cospi_p32_p32, cospi_p32_m32, &x1[0], &x1[1], _r, cos_bit);
- btf_16_w16_avx2(cospi_p48_p16, cospi_m16_p48, &x1[2], &x1[3], _r, cos_bit);
- btf_16_adds_subs_avx2(&x1[4], &x1[5]);
- btf_16_adds_subs_avx2(&x1[7], &x1[6]);
- btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x1[9], &x1[14], _r, cos_bit);
- btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x1[10], &x1[13], _r, cos_bit);
- btf_16_adds_subs_avx2(&x1[16], &x1[19]);
- btf_16_adds_subs_avx2(&x1[17], &x1[18]);
- btf_16_adds_subs_avx2(&x1[23], &x1[20]);
- btf_16_adds_subs_avx2(&x1[22], &x1[21]);
- btf_16_adds_subs_avx2(&x1[24], &x1[27]);
- btf_16_adds_subs_avx2(&x1[25], &x1[26]);
- btf_16_adds_subs_avx2(&x1[31], &x1[28]);
- btf_16_adds_subs_avx2(&x1[30], &x1[29]);
-
- // stage 6
- btf_16_w16_avx2(cospi_p56_p08, cospi_m08_p56, &x1[4], &x1[7], _r, cos_bit);
- btf_16_w16_avx2(cospi_p24_p40, cospi_m40_p24, &x1[5], &x1[6], _r, cos_bit);
- btf_16_adds_subs_avx2(&x1[8], &x1[9]);
- btf_16_adds_subs_avx2(&x1[11], &x1[10]);
- btf_16_adds_subs_avx2(&x1[12], &x1[13]);
- btf_16_adds_subs_avx2(&x1[15], &x1[14]);
- btf_16_w16_avx2(cospi_m08_p56, cospi_p56_p08, &x1[17], &x1[30], _r, cos_bit);
- btf_16_w16_avx2(cospi_m56_m08, cospi_m08_p56, &x1[18], &x1[29], _r, cos_bit);
- btf_16_w16_avx2(cospi_m40_p24, cospi_p24_p40, &x1[21], &x1[26], _r, cos_bit);
- btf_16_w16_avx2(cospi_m24_m40, cospi_m40_p24, &x1[22], &x1[25], _r, cos_bit);
-
- // stage 7
- btf_16_w16_avx2(cospi_p60_p04, cospi_m04_p60, &x1[8], &x1[15], _r, cos_bit);
- btf_16_w16_avx2(cospi_p28_p36, cospi_m36_p28, &x1[9], &x1[14], _r, cos_bit);
- btf_16_w16_avx2(cospi_p44_p20, cospi_m20_p44, &x1[10], &x1[13], _r, cos_bit);
- btf_16_w16_avx2(cospi_p12_p52, cospi_m52_p12, &x1[11], &x1[12], _r, cos_bit);
- btf_16_adds_subs_avx2(&x1[16], &x1[17]);
- btf_16_adds_subs_avx2(&x1[19], &x1[18]);
- btf_16_adds_subs_avx2(&x1[20], &x1[21]);
- btf_16_adds_subs_avx2(&x1[23], &x1[22]);
- btf_16_adds_subs_avx2(&x1[24], &x1[25]);
- btf_16_adds_subs_avx2(&x1[27], &x1[26]);
- btf_16_adds_subs_avx2(&x1[28], &x1[29]);
- btf_16_adds_subs_avx2(&x1[31], &x1[30]);
-
- // stage 8
- btf_16_w16_avx2(cospi_p62_p02, cospi_m02_p62, &x1[16], &x1[31], _r, cos_bit);
- btf_16_w16_avx2(cospi_p30_p34, cospi_m34_p30, &x1[17], &x1[30], _r, cos_bit);
- btf_16_w16_avx2(cospi_p46_p18, cospi_m18_p46, &x1[18], &x1[29], _r, cos_bit);
- btf_16_w16_avx2(cospi_p14_p50, cospi_m50_p14, &x1[19], &x1[28], _r, cos_bit);
- btf_16_w16_avx2(cospi_p54_p10, cospi_m10_p54, &x1[20], &x1[27], _r, cos_bit);
- btf_16_w16_avx2(cospi_p22_p42, cospi_m42_p22, &x1[21], &x1[26], _r, cos_bit);
- btf_16_w16_avx2(cospi_p38_p26, cospi_m26_p38, &x1[22], &x1[25], _r, cos_bit);
- btf_16_w16_avx2(cospi_p06_p58, cospi_m58_p06, &x1[23], &x1[24], _r, cos_bit);
-
- // stage 9
- output[0] = x1[0];
- output[1] = x1[16];
- output[2] = x1[8];
- output[3] = x1[24];
- output[4] = x1[4];
- output[5] = x1[20];
- output[6] = x1[12];
- output[7] = x1[28];
- output[8] = x1[2];
- output[9] = x1[18];
- output[10] = x1[10];
- output[11] = x1[26];
- output[12] = x1[6];
- output[13] = x1[22];
- output[14] = x1[14];
- output[15] = x1[30];
- output[16] = x1[1];
- output[17] = x1[17];
- output[18] = x1[9];
- output[19] = x1[25];
- output[20] = x1[5];
- output[21] = x1[21];
- output[22] = x1[13];
- output[23] = x1[29];
- output[24] = x1[3];
- output[25] = x1[19];
- output[26] = x1[11];
- output[27] = x1[27];
- output[28] = x1[7];
- output[29] = x1[23];
- output[30] = x1[15];
- output[31] = x1[31];
-}
-
-static INLINE void fdct16x64_new_avx2(const __m256i *input, __m256i *output,
- int8_t cos_bit) {
- const int32_t *cospi = cospi_arr(cos_bit);
- const __m256i _r = _mm256_set1_epi32(1 << (cos_bit - 1));
-
- __m256i cospi_m32_p32 = pair_set_w16_epi16(-cospi[32], cospi[32]);
- __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]);
- __m256i cospi_m16_p48 = pair_set_w16_epi16(-cospi[16], cospi[48]);
- __m256i cospi_p48_p16 = pair_set_w16_epi16(cospi[48], cospi[16]);
- __m256i cospi_m48_m16 = pair_set_w16_epi16(-cospi[48], -cospi[16]);
- __m256i cospi_p32_m32 = pair_set_w16_epi16(cospi[32], -cospi[32]);
- __m256i cospi_m08_p56 = pair_set_w16_epi16(-cospi[8], cospi[56]);
- __m256i cospi_p56_p08 = pair_set_w16_epi16(cospi[56], cospi[8]);
- __m256i cospi_m56_m08 = pair_set_w16_epi16(-cospi[56], -cospi[8]);
- __m256i cospi_m40_p24 = pair_set_w16_epi16(-cospi[40], cospi[24]);
- __m256i cospi_p24_p40 = pair_set_w16_epi16(cospi[24], cospi[40]);
- __m256i cospi_m24_m40 = pair_set_w16_epi16(-cospi[24], -cospi[40]);
- __m256i cospi_p60_p04 = pair_set_w16_epi16(cospi[60], cospi[4]);
- __m256i cospi_m04_p60 = pair_set_w16_epi16(-cospi[4], cospi[60]);
- __m256i cospi_p28_p36 = pair_set_w16_epi16(cospi[28], cospi[36]);
- __m256i cospi_m36_p28 = pair_set_w16_epi16(-cospi[36], cospi[28]);
- __m256i cospi_p44_p20 = pair_set_w16_epi16(cospi[44], cospi[20]);
- __m256i cospi_m20_p44 = pair_set_w16_epi16(-cospi[20], cospi[44]);
- __m256i cospi_p12_p52 = pair_set_w16_epi16(cospi[12], cospi[52]);
- __m256i cospi_m52_p12 = pair_set_w16_epi16(-cospi[52], cospi[12]);
- __m256i cospi_m60_m04 = pair_set_w16_epi16(-cospi[60], -cospi[4]);
- __m256i cospi_m28_m36 = pair_set_w16_epi16(-cospi[28], -cospi[36]);
- __m256i cospi_m44_m20 = pair_set_w16_epi16(-cospi[44], -cospi[20]);
- __m256i cospi_m12_m52 = pair_set_w16_epi16(-cospi[12], -cospi[52]);
- __m256i cospi_p62_p02 = pair_set_w16_epi16(cospi[62], cospi[2]);
- __m256i cospi_m02_p62 = pair_set_w16_epi16(-cospi[2], cospi[62]);
- __m256i cospi_p30_p34 = pair_set_w16_epi16(cospi[30], cospi[34]);
- __m256i cospi_m34_p30 = pair_set_w16_epi16(-cospi[34], cospi[30]);
- __m256i cospi_p46_p18 = pair_set_w16_epi16(cospi[46], cospi[18]);
- __m256i cospi_m18_p46 = pair_set_w16_epi16(-cospi[18], cospi[46]);
- __m256i cospi_p14_p50 = pair_set_w16_epi16(cospi[14], cospi[50]);
- __m256i cospi_m50_p14 = pair_set_w16_epi16(-cospi[50], cospi[14]);
- __m256i cospi_p54_p10 = pair_set_w16_epi16(cospi[54], cospi[10]);
- __m256i cospi_m10_p54 = pair_set_w16_epi16(-cospi[10], cospi[54]);
- __m256i cospi_p22_p42 = pair_set_w16_epi16(cospi[22], cospi[42]);
- __m256i cospi_m42_p22 = pair_set_w16_epi16(-cospi[42], cospi[22]);
- __m256i cospi_p38_p26 = pair_set_w16_epi16(cospi[38], cospi[26]);
- __m256i cospi_m26_p38 = pair_set_w16_epi16(-cospi[26], cospi[38]);
- __m256i cospi_p06_p58 = pair_set_w16_epi16(cospi[6], cospi[58]);
- __m256i cospi_m58_p06 = pair_set_w16_epi16(-cospi[58], cospi[6]);
- __m256i cospi_p63_p01 = pair_set_w16_epi16(cospi[63], cospi[1]);
- __m256i cospi_m01_p63 = pair_set_w16_epi16(-cospi[1], cospi[63]);
- __m256i cospi_p31_p33 = pair_set_w16_epi16(cospi[31], cospi[33]);
- __m256i cospi_m33_p31 = pair_set_w16_epi16(-cospi[33], cospi[31]);
- __m256i cospi_p47_p17 = pair_set_w16_epi16(cospi[47], cospi[17]);
- __m256i cospi_m17_p47 = pair_set_w16_epi16(-cospi[17], cospi[47]);
- __m256i cospi_p15_p49 = pair_set_w16_epi16(cospi[15], cospi[49]);
- __m256i cospi_m49_p15 = pair_set_w16_epi16(-cospi[49], cospi[15]);
- __m256i cospi_p55_p09 = pair_set_w16_epi16(cospi[55], cospi[9]);
- __m256i cospi_m09_p55 = pair_set_w16_epi16(-cospi[9], cospi[55]);
- __m256i cospi_p23_p41 = pair_set_w16_epi16(cospi[23], cospi[41]);
- __m256i cospi_m41_p23 = pair_set_w16_epi16(-cospi[41], cospi[23]);
- __m256i cospi_p39_p25 = pair_set_w16_epi16(cospi[39], cospi[25]);
- __m256i cospi_m25_p39 = pair_set_w16_epi16(-cospi[25], cospi[39]);
- __m256i cospi_p07_p57 = pair_set_w16_epi16(cospi[7], cospi[57]);
- __m256i cospi_m57_p07 = pair_set_w16_epi16(-cospi[57], cospi[7]);
- __m256i cospi_p59_p05 = pair_set_w16_epi16(cospi[59], cospi[5]);
- __m256i cospi_m05_p59 = pair_set_w16_epi16(-cospi[5], cospi[59]);
- __m256i cospi_p27_p37 = pair_set_w16_epi16(cospi[27], cospi[37]);
- __m256i cospi_m37_p27 = pair_set_w16_epi16(-cospi[37], cospi[27]);
- __m256i cospi_p43_p21 = pair_set_w16_epi16(cospi[43], cospi[21]);
- __m256i cospi_m21_p43 = pair_set_w16_epi16(-cospi[21], cospi[43]);
- __m256i cospi_p11_p53 = pair_set_w16_epi16(cospi[11], cospi[53]);
- __m256i cospi_m53_p11 = pair_set_w16_epi16(-cospi[53], cospi[11]);
- __m256i cospi_p51_p13 = pair_set_w16_epi16(cospi[51], cospi[13]);
- __m256i cospi_m13_p51 = pair_set_w16_epi16(-cospi[13], cospi[51]);
- __m256i cospi_p19_p45 = pair_set_w16_epi16(cospi[19], cospi[45]);
- __m256i cospi_m45_p19 = pair_set_w16_epi16(-cospi[45], cospi[19]);
- __m256i cospi_p35_p29 = pair_set_w16_epi16(cospi[35], cospi[29]);
- __m256i cospi_m29_p35 = pair_set_w16_epi16(-cospi[29], cospi[35]);
- __m256i cospi_p03_p61 = pair_set_w16_epi16(cospi[3], cospi[61]);
- __m256i cospi_m61_p03 = pair_set_w16_epi16(-cospi[61], cospi[3]);
-
- // stage 1
- __m256i x1[64];
- btf_16_adds_subs_out_avx2(&x1[0], &x1[63], input[0], input[63]);
- btf_16_adds_subs_out_avx2(&x1[1], &x1[62], input[1], input[62]);
- btf_16_adds_subs_out_avx2(&x1[2], &x1[61], input[2], input[61]);
- btf_16_adds_subs_out_avx2(&x1[3], &x1[60], input[3], input[60]);
- btf_16_adds_subs_out_avx2(&x1[4], &x1[59], input[4], input[59]);
- btf_16_adds_subs_out_avx2(&x1[5], &x1[58], input[5], input[58]);
- btf_16_adds_subs_out_avx2(&x1[6], &x1[57], input[6], input[57]);
- btf_16_adds_subs_out_avx2(&x1[7], &x1[56], input[7], input[56]);
- btf_16_adds_subs_out_avx2(&x1[8], &x1[55], input[8], input[55]);
- btf_16_adds_subs_out_avx2(&x1[9], &x1[54], input[9], input[54]);
- btf_16_adds_subs_out_avx2(&x1[10], &x1[53], input[10], input[53]);
- btf_16_adds_subs_out_avx2(&x1[11], &x1[52], input[11], input[52]);
- btf_16_adds_subs_out_avx2(&x1[12], &x1[51], input[12], input[51]);
- btf_16_adds_subs_out_avx2(&x1[13], &x1[50], input[13], input[50]);
- btf_16_adds_subs_out_avx2(&x1[14], &x1[49], input[14], input[49]);
- btf_16_adds_subs_out_avx2(&x1[15], &x1[48], input[15], input[48]);
- btf_16_adds_subs_out_avx2(&x1[16], &x1[47], input[16], input[47]);
- btf_16_adds_subs_out_avx2(&x1[17], &x1[46], input[17], input[46]);
- btf_16_adds_subs_out_avx2(&x1[18], &x1[45], input[18], input[45]);
- btf_16_adds_subs_out_avx2(&x1[19], &x1[44], input[19], input[44]);
- btf_16_adds_subs_out_avx2(&x1[20], &x1[43], input[20], input[43]);
- btf_16_adds_subs_out_avx2(&x1[21], &x1[42], input[21], input[42]);
- btf_16_adds_subs_out_avx2(&x1[22], &x1[41], input[22], input[41]);
- btf_16_adds_subs_out_avx2(&x1[23], &x1[40], input[23], input[40]);
- btf_16_adds_subs_out_avx2(&x1[24], &x1[39], input[24], input[39]);
- btf_16_adds_subs_out_avx2(&x1[25], &x1[38], input[25], input[38]);
- btf_16_adds_subs_out_avx2(&x1[26], &x1[37], input[26], input[37]);
- btf_16_adds_subs_out_avx2(&x1[27], &x1[36], input[27], input[36]);
- btf_16_adds_subs_out_avx2(&x1[28], &x1[35], input[28], input[35]);
- btf_16_adds_subs_out_avx2(&x1[29], &x1[34], input[29], input[34]);
- btf_16_adds_subs_out_avx2(&x1[30], &x1[33], input[30], input[33]);
- btf_16_adds_subs_out_avx2(&x1[31], &x1[32], input[31], input[32]);
-
- // stage 2
- btf_16_adds_subs_avx2(&x1[0], &x1[31]);
- btf_16_adds_subs_avx2(&x1[1], &x1[30]);
- btf_16_adds_subs_avx2(&x1[2], &x1[29]);
- btf_16_adds_subs_avx2(&x1[3], &x1[28]);
- btf_16_adds_subs_avx2(&x1[4], &x1[27]);
- btf_16_adds_subs_avx2(&x1[5], &x1[26]);
- btf_16_adds_subs_avx2(&x1[6], &x1[25]);
- btf_16_adds_subs_avx2(&x1[7], &x1[24]);
- btf_16_adds_subs_avx2(&x1[8], &x1[23]);
- btf_16_adds_subs_avx2(&x1[9], &x1[22]);
- btf_16_adds_subs_avx2(&x1[10], &x1[21]);
- btf_16_adds_subs_avx2(&x1[11], &x1[20]);
- btf_16_adds_subs_avx2(&x1[12], &x1[19]);
- btf_16_adds_subs_avx2(&x1[13], &x1[18]);
- btf_16_adds_subs_avx2(&x1[14], &x1[17]);
- btf_16_adds_subs_avx2(&x1[15], &x1[16]);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[40], &x1[55], _r, cos_bit);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[41], &x1[54], _r, cos_bit);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[42], &x1[53], _r, cos_bit);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[43], &x1[52], _r, cos_bit);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[44], &x1[51], _r, cos_bit);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[45], &x1[50], _r, cos_bit);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[46], &x1[49], _r, cos_bit);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[47], &x1[48], _r, cos_bit);
-
- // stage 3
- btf_16_adds_subs_avx2(&x1[0], &x1[15]);
- btf_16_adds_subs_avx2(&x1[1], &x1[14]);
- btf_16_adds_subs_avx2(&x1[2], &x1[13]);
- btf_16_adds_subs_avx2(&x1[3], &x1[12]);
- btf_16_adds_subs_avx2(&x1[4], &x1[11]);
- btf_16_adds_subs_avx2(&x1[5], &x1[10]);
- btf_16_adds_subs_avx2(&x1[6], &x1[9]);
- btf_16_adds_subs_avx2(&x1[7], &x1[8]);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[20], &x1[27], _r, cos_bit);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[21], &x1[26], _r, cos_bit);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[22], &x1[25], _r, cos_bit);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[23], &x1[24], _r, cos_bit);
- btf_16_adds_subs_avx2(&x1[32], &x1[47]);
- btf_16_adds_subs_avx2(&x1[33], &x1[46]);
- btf_16_adds_subs_avx2(&x1[34], &x1[45]);
- btf_16_adds_subs_avx2(&x1[35], &x1[44]);
- btf_16_adds_subs_avx2(&x1[36], &x1[43]);
- btf_16_adds_subs_avx2(&x1[37], &x1[42]);
- btf_16_adds_subs_avx2(&x1[38], &x1[41]);
- btf_16_adds_subs_avx2(&x1[39], &x1[40]);
- btf_16_adds_subs_avx2(&x1[63], &x1[48]);
- btf_16_adds_subs_avx2(&x1[62], &x1[49]);
- btf_16_adds_subs_avx2(&x1[61], &x1[50]);
- btf_16_adds_subs_avx2(&x1[60], &x1[51]);
- btf_16_adds_subs_avx2(&x1[59], &x1[52]);
- btf_16_adds_subs_avx2(&x1[58], &x1[53]);
- btf_16_adds_subs_avx2(&x1[57], &x1[54]);
- btf_16_adds_subs_avx2(&x1[56], &x1[55]);
-
- // stage 4
- btf_16_adds_subs_avx2(&x1[0], &x1[7]);
- btf_16_adds_subs_avx2(&x1[1], &x1[6]);
- btf_16_adds_subs_avx2(&x1[2], &x1[5]);
- btf_16_adds_subs_avx2(&x1[3], &x1[4]);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[10], &x1[13], _r, cos_bit);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[11], &x1[12], _r, cos_bit);
- btf_16_adds_subs_avx2(&x1[16], &x1[23]);
- btf_16_adds_subs_avx2(&x1[17], &x1[22]);
- btf_16_adds_subs_avx2(&x1[18], &x1[21]);
- btf_16_adds_subs_avx2(&x1[19], &x1[20]);
- btf_16_adds_subs_avx2(&x1[31], &x1[24]);
- btf_16_adds_subs_avx2(&x1[30], &x1[25]);
- btf_16_adds_subs_avx2(&x1[29], &x1[26]);
- btf_16_adds_subs_avx2(&x1[28], &x1[27]);
- btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x1[36], &x1[59], _r, cos_bit);
- btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x1[37], &x1[58], _r, cos_bit);
- btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x1[38], &x1[57], _r, cos_bit);
- btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x1[39], &x1[56], _r, cos_bit);
- btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x1[40], &x1[55], _r, cos_bit);
- btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x1[41], &x1[54], _r, cos_bit);
- btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x1[42], &x1[53], _r, cos_bit);
- btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x1[43], &x1[52], _r, cos_bit);
-
- // stage 5
- btf_16_adds_subs_avx2(&x1[0], &x1[3]);
- btf_16_adds_subs_avx2(&x1[1], &x1[2]);
- btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[5], &x1[6], _r, cos_bit);
- btf_16_adds_subs_avx2(&x1[8], &x1[11]);
- btf_16_adds_subs_avx2(&x1[9], &x1[10]);
- btf_16_adds_subs_avx2(&x1[15], &x1[12]);
- btf_16_adds_subs_avx2(&x1[14], &x1[13]);
- btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x1[18], &x1[29], _r, cos_bit);
- btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x1[19], &x1[28], _r, cos_bit);
- btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x1[20], &x1[27], _r, cos_bit);
- btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x1[21], &x1[26], _r, cos_bit);
- btf_16_adds_subs_avx2(&x1[32], &x1[39]);
- btf_16_adds_subs_avx2(&x1[33], &x1[38]);
- btf_16_adds_subs_avx2(&x1[34], &x1[37]);
- btf_16_adds_subs_avx2(&x1[35], &x1[36]);
- btf_16_adds_subs_avx2(&x1[47], &x1[40]);
- btf_16_adds_subs_avx2(&x1[46], &x1[41]);
- btf_16_adds_subs_avx2(&x1[45], &x1[42]);
- btf_16_adds_subs_avx2(&x1[44], &x1[43]);
- btf_16_adds_subs_avx2(&x1[48], &x1[55]);
- btf_16_adds_subs_avx2(&x1[49], &x1[54]);
- btf_16_adds_subs_avx2(&x1[50], &x1[53]);
- btf_16_adds_subs_avx2(&x1[51], &x1[52]);
- btf_16_adds_subs_avx2(&x1[63], &x1[56]);
- btf_16_adds_subs_avx2(&x1[62], &x1[57]);
- btf_16_adds_subs_avx2(&x1[61], &x1[58]);
- btf_16_adds_subs_avx2(&x1[60], &x1[59]);
-
- // stage 6
- btf_16_w16_avx2(cospi_p32_p32, cospi_p32_m32, &x1[0], &x1[1], _r, cos_bit);
- btf_16_w16_avx2(cospi_p48_p16, cospi_m16_p48, &x1[2], &x1[3], _r, cos_bit);
- btf_16_adds_subs_avx2(&x1[4], &x1[5]);
- btf_16_adds_subs_avx2(&x1[7], &x1[6]);
- btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x1[9], &x1[14], _r, cos_bit);
- btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x1[10], &x1[13], _r, cos_bit);
- btf_16_adds_subs_avx2(&x1[16], &x1[19]);
- btf_16_adds_subs_avx2(&x1[17], &x1[18]);
- btf_16_adds_subs_avx2(&x1[23], &x1[20]);
- btf_16_adds_subs_avx2(&x1[22], &x1[21]);
- btf_16_adds_subs_avx2(&x1[24], &x1[27]);
- btf_16_adds_subs_avx2(&x1[25], &x1[26]);
- btf_16_adds_subs_avx2(&x1[31], &x1[28]);
- btf_16_adds_subs_avx2(&x1[30], &x1[29]);
- btf_16_w16_avx2(cospi_m08_p56, cospi_p56_p08, &x1[34], &x1[61], _r, cos_bit);
- btf_16_w16_avx2(cospi_m08_p56, cospi_p56_p08, &x1[35], &x1[60], _r, cos_bit);
- btf_16_w16_avx2(cospi_m56_m08, cospi_m08_p56, &x1[36], &x1[59], _r, cos_bit);
- btf_16_w16_avx2(cospi_m56_m08, cospi_m08_p56, &x1[37], &x1[58], _r, cos_bit);
- btf_16_w16_avx2(cospi_m40_p24, cospi_p24_p40, &x1[42], &x1[53], _r, cos_bit);
- btf_16_w16_avx2(cospi_m40_p24, cospi_p24_p40, &x1[43], &x1[52], _r, cos_bit);
- btf_16_w16_avx2(cospi_m24_m40, cospi_m40_p24, &x1[44], &x1[51], _r, cos_bit);
- btf_16_w16_avx2(cospi_m24_m40, cospi_m40_p24, &x1[45], &x1[50], _r, cos_bit);
-
- // stage 7
- btf_16_w16_avx2(cospi_p56_p08, cospi_m08_p56, &x1[4], &x1[7], _r, cos_bit);
- btf_16_w16_avx2(cospi_p24_p40, cospi_m40_p24, &x1[5], &x1[6], _r, cos_bit);
- btf_16_adds_subs_avx2(&x1[8], &x1[9]);
- btf_16_adds_subs_avx2(&x1[11], &x1[10]);
- btf_16_adds_subs_avx2(&x1[12], &x1[13]);
- btf_16_adds_subs_avx2(&x1[15], &x1[14]);
- btf_16_w16_avx2(cospi_m08_p56, cospi_p56_p08, &x1[17], &x1[30], _r, cos_bit);
- btf_16_w16_avx2(cospi_m56_m08, cospi_m08_p56, &x1[18], &x1[29], _r, cos_bit);
- btf_16_w16_avx2(cospi_m40_p24, cospi_p24_p40, &x1[21], &x1[26], _r, cos_bit);
- btf_16_w16_avx2(cospi_m24_m40, cospi_m40_p24, &x1[22], &x1[25], _r, cos_bit);
- btf_16_adds_subs_avx2(&x1[32], &x1[35]);
- btf_16_adds_subs_avx2(&x1[33], &x1[34]);
- btf_16_adds_subs_avx2(&x1[39], &x1[36]);
- btf_16_adds_subs_avx2(&x1[38], &x1[37]);
- btf_16_adds_subs_avx2(&x1[40], &x1[43]);
- btf_16_adds_subs_avx2(&x1[41], &x1[42]);
- btf_16_adds_subs_avx2(&x1[47], &x1[44]);
- btf_16_adds_subs_avx2(&x1[46], &x1[45]);
- btf_16_adds_subs_avx2(&x1[48], &x1[51]);
- btf_16_adds_subs_avx2(&x1[49], &x1[50]);
- btf_16_adds_subs_avx2(&x1[55], &x1[52]);
- btf_16_adds_subs_avx2(&x1[54], &x1[53]);
- btf_16_adds_subs_avx2(&x1[56], &x1[59]);
- btf_16_adds_subs_avx2(&x1[57], &x1[58]);
- btf_16_adds_subs_avx2(&x1[63], &x1[60]);
- btf_16_adds_subs_avx2(&x1[62], &x1[61]);
-
- // stage 8
- btf_16_w16_avx2(cospi_p60_p04, cospi_m04_p60, &x1[8], &x1[15], _r, cos_bit);
- btf_16_w16_avx2(cospi_p28_p36, cospi_m36_p28, &x1[9], &x1[14], _r, cos_bit);
- btf_16_w16_avx2(cospi_p44_p20, cospi_m20_p44, &x1[10], &x1[13], _r, cos_bit);
- btf_16_w16_avx2(cospi_p12_p52, cospi_m52_p12, &x1[11], &x1[12], _r, cos_bit);
- btf_16_adds_subs_avx2(&x1[16], &x1[17]);
- btf_16_adds_subs_avx2(&x1[19], &x1[18]);
- btf_16_adds_subs_avx2(&x1[20], &x1[21]);
- btf_16_adds_subs_avx2(&x1[23], &x1[22]);
- btf_16_adds_subs_avx2(&x1[24], &x1[25]);
- btf_16_adds_subs_avx2(&x1[27], &x1[26]);
- btf_16_adds_subs_avx2(&x1[28], &x1[29]);
- btf_16_adds_subs_avx2(&x1[31], &x1[30]);
- btf_16_w16_avx2(cospi_m04_p60, cospi_p60_p04, &x1[33], &x1[62], _r, cos_bit);
- btf_16_w16_avx2(cospi_m60_m04, cospi_m04_p60, &x1[34], &x1[61], _r, cos_bit);
- btf_16_w16_avx2(cospi_m36_p28, cospi_p28_p36, &x1[37], &x1[58], _r, cos_bit);
- btf_16_w16_avx2(cospi_m28_m36, cospi_m36_p28, &x1[38], &x1[57], _r, cos_bit);
- btf_16_w16_avx2(cospi_m20_p44, cospi_p44_p20, &x1[41], &x1[54], _r, cos_bit);
- btf_16_w16_avx2(cospi_m44_m20, cospi_m20_p44, &x1[42], &x1[53], _r, cos_bit);
- btf_16_w16_avx2(cospi_m52_p12, cospi_p12_p52, &x1[45], &x1[50], _r, cos_bit);
- btf_16_w16_avx2(cospi_m12_m52, cospi_m52_p12, &x1[46], &x1[49], _r, cos_bit);
-
- // stage 9
- btf_16_w16_avx2(cospi_p62_p02, cospi_m02_p62, &x1[16], &x1[31], _r, cos_bit);
- btf_16_w16_avx2(cospi_p30_p34, cospi_m34_p30, &x1[17], &x1[30], _r, cos_bit);
- btf_16_w16_avx2(cospi_p46_p18, cospi_m18_p46, &x1[18], &x1[29], _r, cos_bit);
- btf_16_w16_avx2(cospi_p14_p50, cospi_m50_p14, &x1[19], &x1[28], _r, cos_bit);
- btf_16_w16_avx2(cospi_p54_p10, cospi_m10_p54, &x1[20], &x1[27], _r, cos_bit);
- btf_16_w16_avx2(cospi_p22_p42, cospi_m42_p22, &x1[21], &x1[26], _r, cos_bit);
- btf_16_w16_avx2(cospi_p38_p26, cospi_m26_p38, &x1[22], &x1[25], _r, cos_bit);
- btf_16_w16_avx2(cospi_p06_p58, cospi_m58_p06, &x1[23], &x1[24], _r, cos_bit);
- btf_16_adds_subs_avx2(&x1[32], &x1[33]);
- btf_16_adds_subs_avx2(&x1[35], &x1[34]);
- btf_16_adds_subs_avx2(&x1[36], &x1[37]);
- btf_16_adds_subs_avx2(&x1[39], &x1[38]);
- btf_16_adds_subs_avx2(&x1[40], &x1[41]);
- btf_16_adds_subs_avx2(&x1[43], &x1[42]);
- btf_16_adds_subs_avx2(&x1[44], &x1[45]);
- btf_16_adds_subs_avx2(&x1[47], &x1[46]);
- btf_16_adds_subs_avx2(&x1[48], &x1[49]);
- btf_16_adds_subs_avx2(&x1[51], &x1[50]);
- btf_16_adds_subs_avx2(&x1[52], &x1[53]);
- btf_16_adds_subs_avx2(&x1[55], &x1[54]);
- btf_16_adds_subs_avx2(&x1[56], &x1[57]);
- btf_16_adds_subs_avx2(&x1[59], &x1[58]);
- btf_16_adds_subs_avx2(&x1[60], &x1[61]);
- btf_16_adds_subs_avx2(&x1[63], &x1[62]);
-
- // stage 10
- btf_16_w16_avx2(cospi_p63_p01, cospi_m01_p63, &x1[32], &x1[63], _r, cos_bit);
- btf_16_w16_avx2(cospi_p31_p33, cospi_m33_p31, &x1[33], &x1[62], _r, cos_bit);
- btf_16_w16_avx2(cospi_p47_p17, cospi_m17_p47, &x1[34], &x1[61], _r, cos_bit);
- btf_16_w16_avx2(cospi_p15_p49, cospi_m49_p15, &x1[35], &x1[60], _r, cos_bit);
- btf_16_w16_avx2(cospi_p55_p09, cospi_m09_p55, &x1[36], &x1[59], _r, cos_bit);
- btf_16_w16_avx2(cospi_p23_p41, cospi_m41_p23, &x1[37], &x1[58], _r, cos_bit);
- btf_16_w16_avx2(cospi_p39_p25, cospi_m25_p39, &x1[38], &x1[57], _r, cos_bit);
- btf_16_w16_avx2(cospi_p07_p57, cospi_m57_p07, &x1[39], &x1[56], _r, cos_bit);
- btf_16_w16_avx2(cospi_p59_p05, cospi_m05_p59, &x1[40], &x1[55], _r, cos_bit);
- btf_16_w16_avx2(cospi_p27_p37, cospi_m37_p27, &x1[41], &x1[54], _r, cos_bit);
- btf_16_w16_avx2(cospi_p43_p21, cospi_m21_p43, &x1[42], &x1[53], _r, cos_bit);
- btf_16_w16_avx2(cospi_p11_p53, cospi_m53_p11, &x1[43], &x1[52], _r, cos_bit);
- btf_16_w16_avx2(cospi_p51_p13, cospi_m13_p51, &x1[44], &x1[51], _r, cos_bit);
- btf_16_w16_avx2(cospi_p19_p45, cospi_m45_p19, &x1[45], &x1[50], _r, cos_bit);
- btf_16_w16_avx2(cospi_p35_p29, cospi_m29_p35, &x1[46], &x1[49], _r, cos_bit);
- btf_16_w16_avx2(cospi_p03_p61, cospi_m61_p03, &x1[47], &x1[48], _r, cos_bit);
-
- // stage 11
- output[0] = x1[0];
- output[1] = x1[32];
- output[2] = x1[16];
- output[3] = x1[48];
- output[4] = x1[8];
- output[5] = x1[40];
- output[6] = x1[24];
- output[7] = x1[56];
- output[8] = x1[4];
- output[9] = x1[36];
- output[10] = x1[20];
- output[11] = x1[52];
- output[12] = x1[12];
- output[13] = x1[44];
- output[14] = x1[28];
- output[15] = x1[60];
- output[16] = x1[2];
- output[17] = x1[34];
- output[18] = x1[18];
- output[19] = x1[50];
- output[20] = x1[10];
- output[21] = x1[42];
- output[22] = x1[26];
- output[23] = x1[58];
- output[24] = x1[6];
- output[25] = x1[38];
- output[26] = x1[22];
- output[27] = x1[54];
- output[28] = x1[14];
- output[29] = x1[46];
- output[30] = x1[30];
- output[31] = x1[62];
- output[32] = x1[1];
- output[33] = x1[33];
- output[34] = x1[17];
- output[35] = x1[49];
- output[36] = x1[9];
- output[37] = x1[41];
- output[38] = x1[25];
- output[39] = x1[57];
- output[40] = x1[5];
- output[41] = x1[37];
- output[42] = x1[21];
- output[43] = x1[53];
- output[44] = x1[13];
- output[45] = x1[45];
- output[46] = x1[29];
- output[47] = x1[61];
- output[48] = x1[3];
- output[49] = x1[35];
- output[50] = x1[19];
- output[51] = x1[51];
- output[52] = x1[11];
- output[53] = x1[43];
- output[54] = x1[27];
- output[55] = x1[59];
- output[56] = x1[7];
- output[57] = x1[39];
- output[58] = x1[23];
- output[59] = x1[55];
- output[60] = x1[15];
- output[61] = x1[47];
- output[62] = x1[31];
- output[63] = x1[63];
-}
-
-static INLINE void av1_fdct32_new_avx2(const __m256i *input, __m256i *output,
- int8_t cos_bit) {
- __m256i x1[32];
- const int32_t *cospi = cospi_arr(cos_bit);
- const __m256i _r = _mm256_set1_epi32(1 << (cos_bit - 1));
- // stage 0
- // stage 1
- btf_32_add_sub_out_avx2(&x1[0], &x1[31], input[0], input[31]);
- btf_32_add_sub_out_avx2(&x1[1], &x1[30], input[1], input[30]);
- btf_32_add_sub_out_avx2(&x1[2], &x1[29], input[2], input[29]);
- btf_32_add_sub_out_avx2(&x1[3], &x1[28], input[3], input[28]);
- btf_32_add_sub_out_avx2(&x1[4], &x1[27], input[4], input[27]);
- btf_32_add_sub_out_avx2(&x1[5], &x1[26], input[5], input[26]);
- btf_32_add_sub_out_avx2(&x1[6], &x1[25], input[6], input[25]);
- btf_32_add_sub_out_avx2(&x1[7], &x1[24], input[7], input[24]);
- btf_32_add_sub_out_avx2(&x1[8], &x1[23], input[8], input[23]);
- btf_32_add_sub_out_avx2(&x1[9], &x1[22], input[9], input[22]);
- btf_32_add_sub_out_avx2(&x1[10], &x1[21], input[10], input[21]);
- btf_32_add_sub_out_avx2(&x1[11], &x1[20], input[11], input[20]);
- btf_32_add_sub_out_avx2(&x1[12], &x1[19], input[12], input[19]);
- btf_32_add_sub_out_avx2(&x1[13], &x1[18], input[13], input[18]);
- btf_32_add_sub_out_avx2(&x1[14], &x1[17], input[14], input[17]);
- btf_32_add_sub_out_avx2(&x1[15], &x1[16], input[15], input[16]);
-
- // stage 2
- btf_32_add_sub_avx2(&x1[0], &x1[15]);
- btf_32_add_sub_avx2(&x1[1], &x1[14]);
- btf_32_add_sub_avx2(&x1[2], &x1[13]);
- btf_32_add_sub_avx2(&x1[3], &x1[12]);
- btf_32_add_sub_avx2(&x1[4], &x1[11]);
- btf_32_add_sub_avx2(&x1[5], &x1[10]);
- btf_32_add_sub_avx2(&x1[6], &x1[9]);
- btf_32_add_sub_avx2(&x1[7], &x1[8]);
- btf_32_avx2_type0(-cospi[32], cospi[32], &x1[20], &x1[27], _r, cos_bit);
- btf_32_avx2_type0(-cospi[32], cospi[32], &x1[21], &x1[26], _r, cos_bit);
- btf_32_avx2_type0(-cospi[32], cospi[32], &x1[22], &x1[25], _r, cos_bit);
- btf_32_avx2_type0(-cospi[32], cospi[32], &x1[23], &x1[24], _r, cos_bit);
-
- // stage 3
- btf_32_add_sub_avx2(&x1[0], &x1[7]);
- btf_32_add_sub_avx2(&x1[1], &x1[6]);
- btf_32_add_sub_avx2(&x1[2], &x1[5]);
- btf_32_add_sub_avx2(&x1[3], &x1[4]);
- btf_32_avx2_type0(-cospi[32], cospi[32], &x1[10], &x1[13], _r, cos_bit);
- btf_32_avx2_type0(-cospi[32], cospi[32], &x1[11], &x1[12], _r, cos_bit);
- btf_32_add_sub_avx2(&x1[16], &x1[23]);
- btf_32_add_sub_avx2(&x1[17], &x1[22]);
- btf_32_add_sub_avx2(&x1[18], &x1[21]);
- btf_32_add_sub_avx2(&x1[19], &x1[20]);
- btf_32_add_sub_avx2(&x1[31], &x1[24]);
- btf_32_add_sub_avx2(&x1[30], &x1[25]);
- btf_32_add_sub_avx2(&x1[29], &x1[26]);
- btf_32_add_sub_avx2(&x1[28], &x1[27]);
-
- // stage 4
- btf_32_add_sub_avx2(&x1[0], &x1[3]);
- btf_32_add_sub_avx2(&x1[1], &x1[2]);
- btf_32_avx2_type0(-cospi[32], cospi[32], &x1[5], &x1[6], _r, cos_bit);
- btf_32_add_sub_avx2(&x1[8], &x1[11]);
- btf_32_add_sub_avx2(&x1[9], &x1[10]);
- btf_32_add_sub_avx2(&x1[15], &x1[12]);
- btf_32_add_sub_avx2(&x1[14], &x1[13]);
- btf_32_avx2_type0(-cospi[16], cospi[48], &x1[18], &x1[29], _r, cos_bit);
- btf_32_avx2_type0(-cospi[16], cospi[48], &x1[19], &x1[28], _r, cos_bit);
- btf_32_avx2_type0(-cospi[48], -cospi[16], &x1[20], &x1[27], _r, cos_bit);
- btf_32_avx2_type0(-cospi[48], -cospi[16], &x1[21], &x1[26], _r, cos_bit);
-
- // stage 5
- btf_32_avx2_type0(cospi[32], cospi[32], &x1[0], &x1[1], _r, cos_bit);
- btf_32_avx2_type1(cospi[48], cospi[16], &x1[2], &x1[3], _r, cos_bit);
- btf_32_add_sub_avx2(&x1[4], &x1[5]);
- btf_32_add_sub_avx2(&x1[7], &x1[6]);
- btf_32_avx2_type0(-cospi[16], cospi[48], &x1[9], &x1[14], _r, cos_bit);
- btf_32_avx2_type0(-cospi[48], -cospi[16], &x1[10], &x1[13], _r, cos_bit);
- btf_32_add_sub_avx2(&x1[16], &x1[19]);
- btf_32_add_sub_avx2(&x1[17], &x1[18]);
- btf_32_add_sub_avx2(&x1[23], &x1[20]);
- btf_32_add_sub_avx2(&x1[22], &x1[21]);
- btf_32_add_sub_avx2(&x1[24], &x1[27]);
- btf_32_add_sub_avx2(&x1[25], &x1[26]);
- btf_32_add_sub_avx2(&x1[31], &x1[28]);
- btf_32_add_sub_avx2(&x1[30], &x1[29]);
-
- // stage 6
- btf_32_avx2_type1(cospi[56], cospi[8], &x1[4], &x1[7], _r, cos_bit);
- btf_32_avx2_type1(cospi[24], cospi[40], &x1[5], &x1[6], _r, cos_bit);
- btf_32_add_sub_avx2(&x1[8], &x1[9]);
- btf_32_add_sub_avx2(&x1[11], &x1[10]);
- btf_32_add_sub_avx2(&x1[12], &x1[13]);
- btf_32_add_sub_avx2(&x1[15], &x1[14]);
- btf_32_avx2_type0(-cospi[8], cospi[56], &x1[17], &x1[30], _r, cos_bit);
- btf_32_avx2_type0(-cospi[56], -cospi[8], &x1[18], &x1[29], _r, cos_bit);
- btf_32_avx2_type0(-cospi[40], cospi[24], &x1[21], &x1[26], _r, cos_bit);
- btf_32_avx2_type0(-cospi[24], -cospi[40], &x1[22], &x1[25], _r, cos_bit);
-
- // stage 7
- btf_32_avx2_type1(cospi[60], cospi[4], &x1[8], &x1[15], _r, cos_bit);
- btf_32_avx2_type1(cospi[28], cospi[36], &x1[9], &x1[14], _r, cos_bit);
- btf_32_avx2_type1(cospi[44], cospi[20], &x1[10], &x1[13], _r, cos_bit);
- btf_32_avx2_type1(cospi[12], cospi[52], &x1[11], &x1[12], _r, cos_bit);
- btf_32_add_sub_avx2(&x1[16], &x1[17]);
- btf_32_add_sub_avx2(&x1[19], &x1[18]);
- btf_32_add_sub_avx2(&x1[20], &x1[21]);
- btf_32_add_sub_avx2(&x1[23], &x1[22]);
- btf_32_add_sub_avx2(&x1[24], &x1[25]);
- btf_32_add_sub_avx2(&x1[27], &x1[26]);
- btf_32_add_sub_avx2(&x1[28], &x1[29]);
- btf_32_add_sub_avx2(&x1[31], &x1[30]);
-
- // stage 8
- btf_32_avx2_type1(cospi[62], cospi[2], &x1[16], &x1[31], _r, cos_bit);
- btf_32_avx2_type1(cospi[30], cospi[34], &x1[17], &x1[30], _r, cos_bit);
- btf_32_avx2_type1(cospi[46], cospi[18], &x1[18], &x1[29], _r, cos_bit);
- btf_32_avx2_type1(cospi[14], cospi[50], &x1[19], &x1[28], _r, cos_bit);
- btf_32_avx2_type1(cospi[54], cospi[10], &x1[20], &x1[27], _r, cos_bit);
- btf_32_avx2_type1(cospi[22], cospi[42], &x1[21], &x1[26], _r, cos_bit);
- btf_32_avx2_type1(cospi[38], cospi[26], &x1[22], &x1[25], _r, cos_bit);
- btf_32_avx2_type1(cospi[6], cospi[58], &x1[23], &x1[24], _r, cos_bit);
-
- // stage 9
- output[0] = x1[0];
- output[1] = x1[16];
- output[2] = x1[8];
- output[3] = x1[24];
- output[4] = x1[4];
- output[5] = x1[20];
- output[6] = x1[12];
- output[7] = x1[28];
- output[8] = x1[2];
- output[9] = x1[18];
- output[10] = x1[10];
- output[11] = x1[26];
- output[12] = x1[6];
- output[13] = x1[22];
- output[14] = x1[14];
- output[15] = x1[30];
- output[16] = x1[1];
- output[17] = x1[17];
- output[18] = x1[9];
- output[19] = x1[25];
- output[20] = x1[5];
- output[21] = x1[21];
- output[22] = x1[13];
- output[23] = x1[29];
- output[24] = x1[3];
- output[25] = x1[19];
- output[26] = x1[11];
- output[27] = x1[27];
- output[28] = x1[7];
- output[29] = x1[23];
- output[30] = x1[15];
- output[31] = x1[31];
-}
-
-static INLINE void av1_fdct64_new_avx2(const __m256i *input, __m256i *output,
- int8_t cos_bit) {
- const int32_t *cospi = cospi_arr(cos_bit);
- const __m256i _r = _mm256_set1_epi32(1 << (cos_bit - 1));
-
- __m256i cospi_m32 = _mm256_set1_epi32(-cospi[32]);
- __m256i cospi_p32 = _mm256_set1_epi32(cospi[32]);
- __m256i cospi_m16 = _mm256_set1_epi32(-cospi[16]);
- __m256i cospi_p48 = _mm256_set1_epi32(cospi[48]);
- __m256i cospi_m48 = _mm256_set1_epi32(-cospi[48]);
- __m256i cospi_p16 = _mm256_set1_epi32(cospi[16]);
- __m256i cospi_m08 = _mm256_set1_epi32(-cospi[8]);
- __m256i cospi_p56 = _mm256_set1_epi32(cospi[56]);
- __m256i cospi_m56 = _mm256_set1_epi32(-cospi[56]);
- __m256i cospi_m40 = _mm256_set1_epi32(-cospi[40]);
- __m256i cospi_p24 = _mm256_set1_epi32(cospi[24]);
- __m256i cospi_m24 = _mm256_set1_epi32(-cospi[24]);
- __m256i cospi_p08 = _mm256_set1_epi32(cospi[8]);
- __m256i cospi_p40 = _mm256_set1_epi32(cospi[40]);
- __m256i cospi_p60 = _mm256_set1_epi32(cospi[60]);
- __m256i cospi_p04 = _mm256_set1_epi32(cospi[4]);
- __m256i cospi_p28 = _mm256_set1_epi32(cospi[28]);
- __m256i cospi_p36 = _mm256_set1_epi32(cospi[36]);
- __m256i cospi_p44 = _mm256_set1_epi32(cospi[44]);
- __m256i cospi_p20 = _mm256_set1_epi32(cospi[20]);
- __m256i cospi_p12 = _mm256_set1_epi32(cospi[12]);
- __m256i cospi_p52 = _mm256_set1_epi32(cospi[52]);
- __m256i cospi_m04 = _mm256_set1_epi32(-cospi[4]);
- __m256i cospi_m60 = _mm256_set1_epi32(-cospi[60]);
- __m256i cospi_m36 = _mm256_set1_epi32(-cospi[36]);
- __m256i cospi_m28 = _mm256_set1_epi32(-cospi[28]);
- __m256i cospi_m20 = _mm256_set1_epi32(-cospi[20]);
- __m256i cospi_m44 = _mm256_set1_epi32(-cospi[44]);
- __m256i cospi_m52 = _mm256_set1_epi32(-cospi[52]);
- __m256i cospi_m12 = _mm256_set1_epi32(-cospi[12]);
- __m256i cospi_p62 = _mm256_set1_epi32(cospi[62]);
- __m256i cospi_p02 = _mm256_set1_epi32(cospi[2]);
- __m256i cospi_p30 = _mm256_set1_epi32(cospi[30]);
- __m256i cospi_p34 = _mm256_set1_epi32(cospi[34]);
- __m256i cospi_p46 = _mm256_set1_epi32(cospi[46]);
- __m256i cospi_p18 = _mm256_set1_epi32(cospi[18]);
- __m256i cospi_p14 = _mm256_set1_epi32(cospi[14]);
- __m256i cospi_p50 = _mm256_set1_epi32(cospi[50]);
- __m256i cospi_p54 = _mm256_set1_epi32(cospi[54]);
- __m256i cospi_p10 = _mm256_set1_epi32(cospi[10]);
- __m256i cospi_p22 = _mm256_set1_epi32(cospi[22]);
- __m256i cospi_p42 = _mm256_set1_epi32(cospi[42]);
- __m256i cospi_p38 = _mm256_set1_epi32(cospi[38]);
- __m256i cospi_p26 = _mm256_set1_epi32(cospi[26]);
- __m256i cospi_p06 = _mm256_set1_epi32(cospi[6]);
- __m256i cospi_p58 = _mm256_set1_epi32(cospi[58]);
- __m256i cospi_p63 = _mm256_set1_epi32(cospi[63]);
- __m256i cospi_p01 = _mm256_set1_epi32(cospi[1]);
- __m256i cospi_p31 = _mm256_set1_epi32(cospi[31]);
- __m256i cospi_p33 = _mm256_set1_epi32(cospi[33]);
- __m256i cospi_p47 = _mm256_set1_epi32(cospi[47]);
- __m256i cospi_p17 = _mm256_set1_epi32(cospi[17]);
- __m256i cospi_p15 = _mm256_set1_epi32(cospi[15]);
- __m256i cospi_p49 = _mm256_set1_epi32(cospi[49]);
- __m256i cospi_p55 = _mm256_set1_epi32(cospi[55]);
- __m256i cospi_p09 = _mm256_set1_epi32(cospi[9]);
- __m256i cospi_p23 = _mm256_set1_epi32(cospi[23]);
- __m256i cospi_p41 = _mm256_set1_epi32(cospi[41]);
- __m256i cospi_p39 = _mm256_set1_epi32(cospi[39]);
- __m256i cospi_p25 = _mm256_set1_epi32(cospi[25]);
- __m256i cospi_p07 = _mm256_set1_epi32(cospi[7]);
- __m256i cospi_p57 = _mm256_set1_epi32(cospi[57]);
- __m256i cospi_p59 = _mm256_set1_epi32(cospi[59]);
- __m256i cospi_p05 = _mm256_set1_epi32(cospi[5]);
- __m256i cospi_p27 = _mm256_set1_epi32(cospi[27]);
- __m256i cospi_p37 = _mm256_set1_epi32(cospi[37]);
- __m256i cospi_p43 = _mm256_set1_epi32(cospi[43]);
- __m256i cospi_p21 = _mm256_set1_epi32(cospi[21]);
- __m256i cospi_p11 = _mm256_set1_epi32(cospi[11]);
- __m256i cospi_p53 = _mm256_set1_epi32(cospi[53]);
- __m256i cospi_p51 = _mm256_set1_epi32(cospi[51]);
- __m256i cospi_p13 = _mm256_set1_epi32(cospi[13]);
- __m256i cospi_p19 = _mm256_set1_epi32(cospi[19]);
- __m256i cospi_p45 = _mm256_set1_epi32(cospi[45]);
- __m256i cospi_p35 = _mm256_set1_epi32(cospi[35]);
- __m256i cospi_p29 = _mm256_set1_epi32(cospi[29]);
- __m256i cospi_p03 = _mm256_set1_epi32(cospi[3]);
- __m256i cospi_p61 = _mm256_set1_epi32(cospi[61]);
-
- // stage 1
- __m256i x1[64];
- btf_32_add_sub_out_avx2(&x1[0], &x1[63], input[0], input[63]);
- btf_32_add_sub_out_avx2(&x1[1], &x1[62], input[1], input[62]);
- btf_32_add_sub_out_avx2(&x1[2], &x1[61], input[2], input[61]);
- btf_32_add_sub_out_avx2(&x1[3], &x1[60], input[3], input[60]);
- btf_32_add_sub_out_avx2(&x1[4], &x1[59], input[4], input[59]);
- btf_32_add_sub_out_avx2(&x1[5], &x1[58], input[5], input[58]);
- btf_32_add_sub_out_avx2(&x1[6], &x1[57], input[6], input[57]);
- btf_32_add_sub_out_avx2(&x1[7], &x1[56], input[7], input[56]);
- btf_32_add_sub_out_avx2(&x1[8], &x1[55], input[8], input[55]);
- btf_32_add_sub_out_avx2(&x1[9], &x1[54], input[9], input[54]);
- btf_32_add_sub_out_avx2(&x1[10], &x1[53], input[10], input[53]);
- btf_32_add_sub_out_avx2(&x1[11], &x1[52], input[11], input[52]);
- btf_32_add_sub_out_avx2(&x1[12], &x1[51], input[12], input[51]);
- btf_32_add_sub_out_avx2(&x1[13], &x1[50], input[13], input[50]);
- btf_32_add_sub_out_avx2(&x1[14], &x1[49], input[14], input[49]);
- btf_32_add_sub_out_avx2(&x1[15], &x1[48], input[15], input[48]);
- btf_32_add_sub_out_avx2(&x1[16], &x1[47], input[16], input[47]);
- btf_32_add_sub_out_avx2(&x1[17], &x1[46], input[17], input[46]);
- btf_32_add_sub_out_avx2(&x1[18], &x1[45], input[18], input[45]);
- btf_32_add_sub_out_avx2(&x1[19], &x1[44], input[19], input[44]);
- btf_32_add_sub_out_avx2(&x1[20], &x1[43], input[20], input[43]);
- btf_32_add_sub_out_avx2(&x1[21], &x1[42], input[21], input[42]);
- btf_32_add_sub_out_avx2(&x1[22], &x1[41], input[22], input[41]);
- btf_32_add_sub_out_avx2(&x1[23], &x1[40], input[23], input[40]);
- btf_32_add_sub_out_avx2(&x1[24], &x1[39], input[24], input[39]);
- btf_32_add_sub_out_avx2(&x1[25], &x1[38], input[25], input[38]);
- btf_32_add_sub_out_avx2(&x1[26], &x1[37], input[26], input[37]);
- btf_32_add_sub_out_avx2(&x1[27], &x1[36], input[27], input[36]);
- btf_32_add_sub_out_avx2(&x1[28], &x1[35], input[28], input[35]);
- btf_32_add_sub_out_avx2(&x1[29], &x1[34], input[29], input[34]);
- btf_32_add_sub_out_avx2(&x1[30], &x1[33], input[30], input[33]);
- btf_32_add_sub_out_avx2(&x1[31], &x1[32], input[31], input[32]);
-
- // stage 2
- btf_32_add_sub_avx2(&x1[0], &x1[31]);
- btf_32_add_sub_avx2(&x1[1], &x1[30]);
- btf_32_add_sub_avx2(&x1[2], &x1[29]);
- btf_32_add_sub_avx2(&x1[3], &x1[28]);
- btf_32_add_sub_avx2(&x1[4], &x1[27]);
- btf_32_add_sub_avx2(&x1[5], &x1[26]);
- btf_32_add_sub_avx2(&x1[6], &x1[25]);
- btf_32_add_sub_avx2(&x1[7], &x1[24]);
- btf_32_add_sub_avx2(&x1[8], &x1[23]);
- btf_32_add_sub_avx2(&x1[9], &x1[22]);
- btf_32_add_sub_avx2(&x1[10], &x1[21]);
- btf_32_add_sub_avx2(&x1[11], &x1[20]);
- btf_32_add_sub_avx2(&x1[12], &x1[19]);
- btf_32_add_sub_avx2(&x1[13], &x1[18]);
- btf_32_add_sub_avx2(&x1[14], &x1[17]);
- btf_32_add_sub_avx2(&x1[15], &x1[16]);
- btf_32_avx2_type0_new(cospi_m32, cospi_p32, &x1[40], &x1[55], _r, cos_bit);
- btf_32_avx2_type0_new(cospi_m32, cospi_p32, &x1[41], &x1[54], _r, cos_bit);
- btf_32_avx2_type0_new(cospi_m32, cospi_p32, &x1[42], &x1[53], _r, cos_bit);
- btf_32_avx2_type0_new(cospi_m32, cospi_p32, &x1[43], &x1[52], _r, cos_bit);
- btf_32_avx2_type0_new(cospi_m32, cospi_p32, &x1[44], &x1[51], _r, cos_bit);
- btf_32_avx2_type0_new(cospi_m32, cospi_p32, &x1[45], &x1[50], _r, cos_bit);
- btf_32_avx2_type0_new(cospi_m32, cospi_p32, &x1[46], &x1[49], _r, cos_bit);
- btf_32_avx2_type0_new(cospi_m32, cospi_p32, &x1[47], &x1[48], _r, cos_bit);
-
- // stage 3
- btf_32_add_sub_avx2(&x1[0], &x1[15]);
- btf_32_add_sub_avx2(&x1[1], &x1[14]);
- btf_32_add_sub_avx2(&x1[2], &x1[13]);
- btf_32_add_sub_avx2(&x1[3], &x1[12]);
- btf_32_add_sub_avx2(&x1[4], &x1[11]);
- btf_32_add_sub_avx2(&x1[5], &x1[10]);
- btf_32_add_sub_avx2(&x1[6], &x1[9]);
- btf_32_add_sub_avx2(&x1[7], &x1[8]);
- btf_32_avx2_type0_new(cospi_m32, cospi_p32, &x1[20], &x1[27], _r, cos_bit);
- btf_32_avx2_type0_new(cospi_m32, cospi_p32, &x1[21], &x1[26], _r, cos_bit);
- btf_32_avx2_type0_new(cospi_m32, cospi_p32, &x1[22], &x1[25], _r, cos_bit);
- btf_32_avx2_type0_new(cospi_m32, cospi_p32, &x1[23], &x1[24], _r, cos_bit);
- btf_32_add_sub_avx2(&x1[32], &x1[47]);
- btf_32_add_sub_avx2(&x1[33], &x1[46]);
- btf_32_add_sub_avx2(&x1[34], &x1[45]);
- btf_32_add_sub_avx2(&x1[35], &x1[44]);
- btf_32_add_sub_avx2(&x1[36], &x1[43]);
- btf_32_add_sub_avx2(&x1[37], &x1[42]);
- btf_32_add_sub_avx2(&x1[38], &x1[41]);
- btf_32_add_sub_avx2(&x1[39], &x1[40]);
- btf_32_add_sub_avx2(&x1[63], &x1[48]);
- btf_32_add_sub_avx2(&x1[62], &x1[49]);
- btf_32_add_sub_avx2(&x1[61], &x1[50]);
- btf_32_add_sub_avx2(&x1[60], &x1[51]);
- btf_32_add_sub_avx2(&x1[59], &x1[52]);
- btf_32_add_sub_avx2(&x1[58], &x1[53]);
- btf_32_add_sub_avx2(&x1[57], &x1[54]);
- btf_32_add_sub_avx2(&x1[56], &x1[55]);
-
- // stage 4
- btf_32_add_sub_avx2(&x1[0], &x1[7]);
- btf_32_add_sub_avx2(&x1[1], &x1[6]);
- btf_32_add_sub_avx2(&x1[2], &x1[5]);
- btf_32_add_sub_avx2(&x1[3], &x1[4]);
- btf_32_avx2_type0_new(cospi_m32, cospi_p32, &x1[10], &x1[13], _r, cos_bit);
- btf_32_avx2_type0_new(cospi_m32, cospi_p32, &x1[11], &x1[12], _r, cos_bit);
- btf_32_add_sub_avx2(&x1[16], &x1[23]);
- btf_32_add_sub_avx2(&x1[17], &x1[22]);
- btf_32_add_sub_avx2(&x1[18], &x1[21]);
- btf_32_add_sub_avx2(&x1[19], &x1[20]);
- btf_32_add_sub_avx2(&x1[31], &x1[24]);
- btf_32_add_sub_avx2(&x1[30], &x1[25]);
- btf_32_add_sub_avx2(&x1[29], &x1[26]);
- btf_32_add_sub_avx2(&x1[28], &x1[27]);
- btf_32_avx2_type0_new(cospi_m16, cospi_p48, &x1[36], &x1[59], _r, cos_bit);
- btf_32_avx2_type0_new(cospi_m16, cospi_p48, &x1[37], &x1[58], _r, cos_bit);
- btf_32_avx2_type0_new(cospi_m16, cospi_p48, &x1[38], &x1[57], _r, cos_bit);
- btf_32_avx2_type0_new(cospi_m16, cospi_p48, &x1[39], &x1[56], _r, cos_bit);
- btf_32_avx2_type0_new(cospi_m48, cospi_m16, &x1[40], &x1[55], _r, cos_bit);
- btf_32_avx2_type0_new(cospi_m48, cospi_m16, &x1[41], &x1[54], _r, cos_bit);
- btf_32_avx2_type0_new(cospi_m48, cospi_m16, &x1[42], &x1[53], _r, cos_bit);
- btf_32_avx2_type0_new(cospi_m48, cospi_m16, &x1[43], &x1[52], _r, cos_bit);
-
- // stage 5
- btf_32_add_sub_avx2(&x1[0], &x1[3]);
- btf_32_add_sub_avx2(&x1[1], &x1[2]);
- btf_32_avx2_type0_new(cospi_m32, cospi_p32, &x1[5], &x1[6], _r, cos_bit);
- btf_32_add_sub_avx2(&x1[8], &x1[11]);
- btf_32_add_sub_avx2(&x1[9], &x1[10]);
- btf_32_add_sub_avx2(&x1[15], &x1[12]);
- btf_32_add_sub_avx2(&x1[14], &x1[13]);
- btf_32_avx2_type0_new(cospi_m16, cospi_p48, &x1[18], &x1[29], _r, cos_bit);
- btf_32_avx2_type0_new(cospi_m16, cospi_p48, &x1[19], &x1[28], _r, cos_bit);
- btf_32_avx2_type0_new(cospi_m48, cospi_m16, &x1[20], &x1[27], _r, cos_bit);
- btf_32_avx2_type0_new(cospi_m48, cospi_m16, &x1[21], &x1[26], _r, cos_bit);
- btf_32_add_sub_avx2(&x1[32], &x1[39]);
- btf_32_add_sub_avx2(&x1[33], &x1[38]);
- btf_32_add_sub_avx2(&x1[34], &x1[37]);
- btf_32_add_sub_avx2(&x1[35], &x1[36]);
- btf_32_add_sub_avx2(&x1[47], &x1[40]);
- btf_32_add_sub_avx2(&x1[46], &x1[41]);
- btf_32_add_sub_avx2(&x1[45], &x1[42]);
- btf_32_add_sub_avx2(&x1[44], &x1[43]);
- btf_32_add_sub_avx2(&x1[48], &x1[55]);
- btf_32_add_sub_avx2(&x1[49], &x1[54]);
- btf_32_add_sub_avx2(&x1[50], &x1[53]);
- btf_32_add_sub_avx2(&x1[51], &x1[52]);
- btf_32_add_sub_avx2(&x1[63], &x1[56]);
- btf_32_add_sub_avx2(&x1[62], &x1[57]);
- btf_32_add_sub_avx2(&x1[61], &x1[58]);
- btf_32_add_sub_avx2(&x1[60], &x1[59]);
-
- // stage 6
- btf_32_avx2_type0_new(cospi_p32, cospi_p32, &x1[0], &x1[1], _r, cos_bit);
- btf_32_avx2_type1_new(cospi_p48, cospi_p16, &x1[2], &x1[3], _r, cos_bit);
- btf_32_add_sub_avx2(&x1[4], &x1[5]);
- btf_32_add_sub_avx2(&x1[7], &x1[6]);
- btf_32_avx2_type0_new(cospi_m16, cospi_p48, &x1[9], &x1[14], _r, cos_bit);
- btf_32_avx2_type0_new(cospi_m48, cospi_m16, &x1[10], &x1[13], _r, cos_bit);
- btf_32_add_sub_avx2(&x1[16], &x1[19]);
- btf_32_add_sub_avx2(&x1[17], &x1[18]);
- btf_32_add_sub_avx2(&x1[23], &x1[20]);
- btf_32_add_sub_avx2(&x1[22], &x1[21]);
- btf_32_add_sub_avx2(&x1[24], &x1[27]);
- btf_32_add_sub_avx2(&x1[25], &x1[26]);
- btf_32_add_sub_avx2(&x1[31], &x1[28]);
- btf_32_add_sub_avx2(&x1[30], &x1[29]);
- btf_32_avx2_type0_new(cospi_m08, cospi_p56, &x1[34], &x1[61], _r, cos_bit);
- btf_32_avx2_type0_new(cospi_m08, cospi_p56, &x1[35], &x1[60], _r, cos_bit);
- btf_32_avx2_type0_new(cospi_m56, cospi_m08, &x1[36], &x1[59], _r, cos_bit);
- btf_32_avx2_type0_new(cospi_m56, cospi_m08, &x1[37], &x1[58], _r, cos_bit);
- btf_32_avx2_type0_new(cospi_m40, cospi_p24, &x1[42], &x1[53], _r, cos_bit);
- btf_32_avx2_type0_new(cospi_m40, cospi_p24, &x1[43], &x1[52], _r, cos_bit);
- btf_32_avx2_type0_new(cospi_m24, cospi_m40, &x1[44], &x1[51], _r, cos_bit);
- btf_32_avx2_type0_new(cospi_m24, cospi_m40, &x1[45], &x1[50], _r, cos_bit);
-
- // stage 7
- btf_32_avx2_type1_new(cospi_p56, cospi_p08, &x1[4], &x1[7], _r, cos_bit);
- btf_32_avx2_type1_new(cospi_p24, cospi_p40, &x1[5], &x1[6], _r, cos_bit);
- btf_32_add_sub_avx2(&x1[8], &x1[9]);
- btf_32_add_sub_avx2(&x1[11], &x1[10]);
- btf_32_add_sub_avx2(&x1[12], &x1[13]);
- btf_32_add_sub_avx2(&x1[15], &x1[14]);
- btf_32_avx2_type0_new(cospi_m08, cospi_p56, &x1[17], &x1[30], _r, cos_bit);
- btf_32_avx2_type0_new(cospi_m56, cospi_m08, &x1[18], &x1[29], _r, cos_bit);
- btf_32_avx2_type0_new(cospi_m40, cospi_p24, &x1[21], &x1[26], _r, cos_bit);
- btf_32_avx2_type0_new(cospi_m24, cospi_m40, &x1[22], &x1[25], _r, cos_bit);
- btf_32_add_sub_avx2(&x1[32], &x1[35]);
- btf_32_add_sub_avx2(&x1[33], &x1[34]);
- btf_32_add_sub_avx2(&x1[39], &x1[36]);
- btf_32_add_sub_avx2(&x1[38], &x1[37]);
- btf_32_add_sub_avx2(&x1[40], &x1[43]);
- btf_32_add_sub_avx2(&x1[41], &x1[42]);
- btf_32_add_sub_avx2(&x1[47], &x1[44]);
- btf_32_add_sub_avx2(&x1[46], &x1[45]);
- btf_32_add_sub_avx2(&x1[48], &x1[51]);
- btf_32_add_sub_avx2(&x1[49], &x1[50]);
- btf_32_add_sub_avx2(&x1[55], &x1[52]);
- btf_32_add_sub_avx2(&x1[54], &x1[53]);
- btf_32_add_sub_avx2(&x1[56], &x1[59]);
- btf_32_add_sub_avx2(&x1[57], &x1[58]);
- btf_32_add_sub_avx2(&x1[63], &x1[60]);
- btf_32_add_sub_avx2(&x1[62], &x1[61]);
-
- // stage 8
- btf_32_avx2_type1_new(cospi_p60, cospi_p04, &x1[8], &x1[15], _r, cos_bit);
- btf_32_avx2_type1_new(cospi_p28, cospi_p36, &x1[9], &x1[14], _r, cos_bit);
- btf_32_avx2_type1_new(cospi_p44, cospi_p20, &x1[10], &x1[13], _r, cos_bit);
- btf_32_avx2_type1_new(cospi_p12, cospi_p52, &x1[11], &x1[12], _r, cos_bit);
- btf_32_add_sub_avx2(&x1[16], &x1[17]);
- btf_32_add_sub_avx2(&x1[19], &x1[18]);
- btf_32_add_sub_avx2(&x1[20], &x1[21]);
- btf_32_add_sub_avx2(&x1[23], &x1[22]);
- btf_32_add_sub_avx2(&x1[24], &x1[25]);
- btf_32_add_sub_avx2(&x1[27], &x1[26]);
- btf_32_add_sub_avx2(&x1[28], &x1[29]);
- btf_32_add_sub_avx2(&x1[31], &x1[30]);
- btf_32_avx2_type0_new(cospi_m04, cospi_p60, &x1[33], &x1[62], _r, cos_bit);
- btf_32_avx2_type0_new(cospi_m60, cospi_m04, &x1[34], &x1[61], _r, cos_bit);
- btf_32_avx2_type0_new(cospi_m36, cospi_p28, &x1[37], &x1[58], _r, cos_bit);
- btf_32_avx2_type0_new(cospi_m28, cospi_m36, &x1[38], &x1[57], _r, cos_bit);
- btf_32_avx2_type0_new(cospi_m20, cospi_p44, &x1[41], &x1[54], _r, cos_bit);
- btf_32_avx2_type0_new(cospi_m44, cospi_m20, &x1[42], &x1[53], _r, cos_bit);
- btf_32_avx2_type0_new(cospi_m52, cospi_p12, &x1[45], &x1[50], _r, cos_bit);
- btf_32_avx2_type0_new(cospi_m12, cospi_m52, &x1[46], &x1[49], _r, cos_bit);
-
- // stage 9
- btf_32_avx2_type1_new(cospi_p62, cospi_p02, &x1[16], &x1[31], _r, cos_bit);
- btf_32_avx2_type1_new(cospi_p30, cospi_p34, &x1[17], &x1[30], _r, cos_bit);
- btf_32_avx2_type1_new(cospi_p46, cospi_p18, &x1[18], &x1[29], _r, cos_bit);
- btf_32_avx2_type1_new(cospi_p14, cospi_p50, &x1[19], &x1[28], _r, cos_bit);
- btf_32_avx2_type1_new(cospi_p54, cospi_p10, &x1[20], &x1[27], _r, cos_bit);
- btf_32_avx2_type1_new(cospi_p22, cospi_p42, &x1[21], &x1[26], _r, cos_bit);
- btf_32_avx2_type1_new(cospi_p38, cospi_p26, &x1[22], &x1[25], _r, cos_bit);
- btf_32_avx2_type1_new(cospi_p06, cospi_p58, &x1[23], &x1[24], _r, cos_bit);
- btf_32_add_sub_avx2(&x1[32], &x1[33]);
- btf_32_add_sub_avx2(&x1[35], &x1[34]);
- btf_32_add_sub_avx2(&x1[36], &x1[37]);
- btf_32_add_sub_avx2(&x1[39], &x1[38]);
- btf_32_add_sub_avx2(&x1[40], &x1[41]);
- btf_32_add_sub_avx2(&x1[43], &x1[42]);
- btf_32_add_sub_avx2(&x1[44], &x1[45]);
- btf_32_add_sub_avx2(&x1[47], &x1[46]);
- btf_32_add_sub_avx2(&x1[48], &x1[49]);
- btf_32_add_sub_avx2(&x1[51], &x1[50]);
- btf_32_add_sub_avx2(&x1[52], &x1[53]);
- btf_32_add_sub_avx2(&x1[55], &x1[54]);
- btf_32_add_sub_avx2(&x1[56], &x1[57]);
- btf_32_add_sub_avx2(&x1[59], &x1[58]);
- btf_32_add_sub_avx2(&x1[60], &x1[61]);
- btf_32_add_sub_avx2(&x1[63], &x1[62]);
-
- // stage 10
- btf_32_avx2_type1_new(cospi_p63, cospi_p01, &x1[32], &x1[63], _r, cos_bit);
- btf_32_avx2_type1_new(cospi_p31, cospi_p33, &x1[33], &x1[62], _r, cos_bit);
- btf_32_avx2_type1_new(cospi_p47, cospi_p17, &x1[34], &x1[61], _r, cos_bit);
- btf_32_avx2_type1_new(cospi_p15, cospi_p49, &x1[35], &x1[60], _r, cos_bit);
- btf_32_avx2_type1_new(cospi_p55, cospi_p09, &x1[36], &x1[59], _r, cos_bit);
- btf_32_avx2_type1_new(cospi_p23, cospi_p41, &x1[37], &x1[58], _r, cos_bit);
- btf_32_avx2_type1_new(cospi_p39, cospi_p25, &x1[38], &x1[57], _r, cos_bit);
- btf_32_avx2_type1_new(cospi_p07, cospi_p57, &x1[39], &x1[56], _r, cos_bit);
- btf_32_avx2_type1_new(cospi_p59, cospi_p05, &x1[40], &x1[55], _r, cos_bit);
- btf_32_avx2_type1_new(cospi_p27, cospi_p37, &x1[41], &x1[54], _r, cos_bit);
- btf_32_avx2_type1_new(cospi_p43, cospi_p21, &x1[42], &x1[53], _r, cos_bit);
- btf_32_avx2_type1_new(cospi_p11, cospi_p53, &x1[43], &x1[52], _r, cos_bit);
- btf_32_avx2_type1_new(cospi_p51, cospi_p13, &x1[44], &x1[51], _r, cos_bit);
- btf_32_avx2_type1_new(cospi_p19, cospi_p45, &x1[45], &x1[50], _r, cos_bit);
- btf_32_avx2_type1_new(cospi_p35, cospi_p29, &x1[46], &x1[49], _r, cos_bit);
- btf_32_avx2_type1_new(cospi_p03, cospi_p61, &x1[47], &x1[48], _r, cos_bit);
-
- // stage 11
- output[0] = x1[0];
- output[1] = x1[32];
- output[2] = x1[16];
- output[3] = x1[48];
- output[4] = x1[8];
- output[5] = x1[40];
- output[6] = x1[24];
- output[7] = x1[56];
- output[8] = x1[4];
- output[9] = x1[36];
- output[10] = x1[20];
- output[11] = x1[52];
- output[12] = x1[12];
- output[13] = x1[44];
- output[14] = x1[28];
- output[15] = x1[60];
- output[16] = x1[2];
- output[17] = x1[34];
- output[18] = x1[18];
- output[19] = x1[50];
- output[20] = x1[10];
- output[21] = x1[42];
- output[22] = x1[26];
- output[23] = x1[58];
- output[24] = x1[6];
- output[25] = x1[38];
- output[26] = x1[22];
- output[27] = x1[54];
- output[28] = x1[14];
- output[29] = x1[46];
- output[30] = x1[30];
- output[31] = x1[62];
- output[32] = x1[1];
- output[33] = x1[33];
- output[34] = x1[17];
- output[35] = x1[49];
- output[36] = x1[9];
- output[37] = x1[41];
- output[38] = x1[25];
- output[39] = x1[57];
- output[40] = x1[5];
- output[41] = x1[37];
- output[42] = x1[21];
- output[43] = x1[53];
- output[44] = x1[13];
- output[45] = x1[45];
- output[46] = x1[29];
- output[47] = x1[61];
- output[48] = x1[3];
- output[49] = x1[35];
- output[50] = x1[19];
- output[51] = x1[51];
- output[52] = x1[11];
- output[53] = x1[43];
- output[54] = x1[27];
- output[55] = x1[59];
- output[56] = x1[7];
- output[57] = x1[39];
- output[58] = x1[23];
- output[59] = x1[55];
- output[60] = x1[15];
- output[61] = x1[47];
- output[62] = x1[31];
- output[63] = x1[63];
-}
-
-static INLINE void fadst16x16_new_avx2(const __m256i *input, __m256i *output,
- int8_t cos_bit) {
- const int32_t *cospi = cospi_arr(cos_bit);
- const __m256i __zero = _mm256_setzero_si256();
- const __m256i _r = _mm256_set1_epi32(1 << (cos_bit - 1));
-
- __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]);
- __m256i cospi_p32_m32 = pair_set_w16_epi16(cospi[32], -cospi[32]);
- __m256i cospi_p16_p48 = pair_set_w16_epi16(cospi[16], cospi[48]);
- __m256i cospi_p48_m16 = pair_set_w16_epi16(cospi[48], -cospi[16]);
- __m256i cospi_m48_p16 = pair_set_w16_epi16(-cospi[48], cospi[16]);
- __m256i cospi_p08_p56 = pair_set_w16_epi16(cospi[8], cospi[56]);
- __m256i cospi_p56_m08 = pair_set_w16_epi16(cospi[56], -cospi[8]);
- __m256i cospi_p40_p24 = pair_set_w16_epi16(cospi[40], cospi[24]);
- __m256i cospi_p24_m40 = pair_set_w16_epi16(cospi[24], -cospi[40]);
- __m256i cospi_m56_p08 = pair_set_w16_epi16(-cospi[56], cospi[8]);
- __m256i cospi_m24_p40 = pair_set_w16_epi16(-cospi[24], cospi[40]);
- __m256i cospi_p02_p62 = pair_set_w16_epi16(cospi[2], cospi[62]);
- __m256i cospi_p62_m02 = pair_set_w16_epi16(cospi[62], -cospi[2]);
- __m256i cospi_p10_p54 = pair_set_w16_epi16(cospi[10], cospi[54]);
- __m256i cospi_p54_m10 = pair_set_w16_epi16(cospi[54], -cospi[10]);
- __m256i cospi_p18_p46 = pair_set_w16_epi16(cospi[18], cospi[46]);
- __m256i cospi_p46_m18 = pair_set_w16_epi16(cospi[46], -cospi[18]);
- __m256i cospi_p26_p38 = pair_set_w16_epi16(cospi[26], cospi[38]);
- __m256i cospi_p38_m26 = pair_set_w16_epi16(cospi[38], -cospi[26]);
- __m256i cospi_p34_p30 = pair_set_w16_epi16(cospi[34], cospi[30]);
- __m256i cospi_p30_m34 = pair_set_w16_epi16(cospi[30], -cospi[34]);
- __m256i cospi_p42_p22 = pair_set_w16_epi16(cospi[42], cospi[22]);
- __m256i cospi_p22_m42 = pair_set_w16_epi16(cospi[22], -cospi[42]);
- __m256i cospi_p50_p14 = pair_set_w16_epi16(cospi[50], cospi[14]);
- __m256i cospi_p14_m50 = pair_set_w16_epi16(cospi[14], -cospi[50]);
- __m256i cospi_p58_p06 = pair_set_w16_epi16(cospi[58], cospi[6]);
- __m256i cospi_p06_m58 = pair_set_w16_epi16(cospi[6], -cospi[58]);
-
- // stage 1
- __m256i x1[16];
- x1[0] = input[0];
- x1[1] = _mm256_subs_epi16(__zero, input[15]);
- x1[2] = _mm256_subs_epi16(__zero, input[7]);
- x1[3] = input[8];
- x1[4] = _mm256_subs_epi16(__zero, input[3]);
- x1[5] = input[12];
- x1[6] = input[4];
- x1[7] = _mm256_subs_epi16(__zero, input[11]);
- x1[8] = _mm256_subs_epi16(__zero, input[1]);
- x1[9] = input[14];
- x1[10] = input[6];
- x1[11] = _mm256_subs_epi16(__zero, input[9]);
- x1[12] = input[2];
- x1[13] = _mm256_subs_epi16(__zero, input[13]);
- x1[14] = _mm256_subs_epi16(__zero, input[5]);
- x1[15] = input[10];
-
- // stage 2
- btf_16_w16_avx2(cospi_p32_p32, cospi_p32_m32, &x1[2], &x1[3], _r, cos_bit);
- btf_16_w16_avx2(cospi_p32_p32, cospi_p32_m32, &x1[6], &x1[7], _r, cos_bit);
- btf_16_w16_avx2(cospi_p32_p32, cospi_p32_m32, &x1[10], &x1[11], _r, cos_bit);
- btf_16_w16_avx2(cospi_p32_p32, cospi_p32_m32, &x1[14], &x1[15], _r, cos_bit);
-
- // stage 3
- btf_16_adds_subs_avx2(&x1[0], &x1[2]);
- btf_16_adds_subs_avx2(&x1[1], &x1[3]);
- btf_16_adds_subs_avx2(&x1[4], &x1[6]);
- btf_16_adds_subs_avx2(&x1[5], &x1[7]);
- btf_16_adds_subs_avx2(&x1[8], &x1[10]);
- btf_16_adds_subs_avx2(&x1[9], &x1[11]);
- btf_16_adds_subs_avx2(&x1[12], &x1[14]);
- btf_16_adds_subs_avx2(&x1[13], &x1[15]);
-
- // stage 4
- btf_16_w16_avx2(cospi_p16_p48, cospi_p48_m16, &x1[4], &x1[5], _r, cos_bit);
- btf_16_w16_avx2(cospi_m48_p16, cospi_p16_p48, &x1[6], &x1[7], _r, cos_bit);
- btf_16_w16_avx2(cospi_p16_p48, cospi_p48_m16, &x1[12], &x1[13], _r, cos_bit);
- btf_16_w16_avx2(cospi_m48_p16, cospi_p16_p48, &x1[14], &x1[15], _r, cos_bit);
-
- // stage 5
- btf_16_adds_subs_avx2(&x1[0], &x1[4]);
- btf_16_adds_subs_avx2(&x1[1], &x1[5]);
- btf_16_adds_subs_avx2(&x1[2], &x1[6]);
- btf_16_adds_subs_avx2(&x1[3], &x1[7]);
- btf_16_adds_subs_avx2(&x1[8], &x1[12]);
- btf_16_adds_subs_avx2(&x1[9], &x1[13]);
- btf_16_adds_subs_avx2(&x1[10], &x1[14]);
- btf_16_adds_subs_avx2(&x1[11], &x1[15]);
-
- // stage 6
- btf_16_w16_avx2(cospi_p08_p56, cospi_p56_m08, &x1[8], &x1[9], _r, cos_bit);
- btf_16_w16_avx2(cospi_p40_p24, cospi_p24_m40, &x1[10], &x1[11], _r, cos_bit);
- btf_16_w16_avx2(cospi_m56_p08, cospi_p08_p56, &x1[12], &x1[13], _r, cos_bit);
- btf_16_w16_avx2(cospi_m24_p40, cospi_p40_p24, &x1[14], &x1[15], _r, cos_bit);
-
- // stage 7
- btf_16_adds_subs_avx2(&x1[0], &x1[8]);
- btf_16_adds_subs_avx2(&x1[1], &x1[9]);
- btf_16_adds_subs_avx2(&x1[2], &x1[10]);
- btf_16_adds_subs_avx2(&x1[3], &x1[11]);
- btf_16_adds_subs_avx2(&x1[4], &x1[12]);
- btf_16_adds_subs_avx2(&x1[5], &x1[13]);
- btf_16_adds_subs_avx2(&x1[6], &x1[14]);
- btf_16_adds_subs_avx2(&x1[7], &x1[15]);
-
- // stage 8
- btf_16_w16_avx2(cospi_p02_p62, cospi_p62_m02, &x1[0], &x1[1], _r, cos_bit);
- btf_16_w16_avx2(cospi_p10_p54, cospi_p54_m10, &x1[2], &x1[3], _r, cos_bit);
- btf_16_w16_avx2(cospi_p18_p46, cospi_p46_m18, &x1[4], &x1[5], _r, cos_bit);
- btf_16_w16_avx2(cospi_p26_p38, cospi_p38_m26, &x1[6], &x1[7], _r, cos_bit);
- btf_16_w16_avx2(cospi_p34_p30, cospi_p30_m34, &x1[8], &x1[9], _r, cos_bit);
- btf_16_w16_avx2(cospi_p42_p22, cospi_p22_m42, &x1[10], &x1[11], _r, cos_bit);
- btf_16_w16_avx2(cospi_p50_p14, cospi_p14_m50, &x1[12], &x1[13], _r, cos_bit);
- btf_16_w16_avx2(cospi_p58_p06, cospi_p06_m58, &x1[14], &x1[15], _r, cos_bit);
-
- // stage 9
- output[0] = x1[1];
- output[1] = x1[14];
- output[2] = x1[3];
- output[3] = x1[12];
- output[4] = x1[5];
- output[5] = x1[10];
- output[6] = x1[7];
- output[7] = x1[8];
- output[8] = x1[9];
- output[9] = x1[6];
- output[10] = x1[11];
- output[11] = x1[4];
- output[12] = x1[13];
- output[13] = x1[2];
- output[14] = x1[15];
- output[15] = x1[0];
-}
-
-static INLINE __m256i scale_round_avx2(const __m256i a, const int scale) {
- const __m256i scale__r = pair_set_w16_epi16(scale, 1 << (NewSqrt2Bits - 1));
- const __m256i b = _mm256_madd_epi16(a, scale__r);
- return _mm256_srai_epi32(b, NewSqrt2Bits);
-}
-
-static INLINE void fidentity16x16_new_avx2(const __m256i *input,
- __m256i *output, int8_t cos_bit) {
- (void)cos_bit;
- const __m256i one = _mm256_set1_epi16(1);
-
- for (int i = 0; i < 16; ++i) {
- const __m256i a_lo = _mm256_unpacklo_epi16(input[i], one);
- const __m256i a_hi = _mm256_unpackhi_epi16(input[i], one);
- const __m256i b_lo = scale_round_avx2(a_lo, 2 * NewSqrt2);
- const __m256i b_hi = scale_round_avx2(a_hi, 2 * NewSqrt2);
- output[i] = _mm256_packs_epi32(b_lo, b_hi);
- }
-}
-
-static INLINE void fidentity16x32_new_avx2(const __m256i *input,
- __m256i *output, int8_t cos_bit) {
- (void)cos_bit;
- for (int i = 0; i < 32; ++i) {
- output[i] = _mm256_slli_epi16(input[i], 2);
- }
-}
-
-static INLINE void av1_round_shift_array_32_avx2(__m256i *input,
- __m256i *output,
- const int size,
- const int bit) {
- if (bit > 0) {
- int i;
- for (i = 0; i < size; i++) {
- output[i] = av1_round_shift_32_avx2(input[i], bit);
- }
- } else {
- int i;
- for (i = 0; i < size; i++) {
- output[i] = _mm256_slli_epi32(input[i], -bit);
- }
- }
-}
-
-static INLINE void av1_round_shift_rect_array_32_avx2(__m256i *input,
- __m256i *output,
- const int size,
- const int bit) {
- const __m256i sqrt2 = _mm256_set1_epi32(NewSqrt2);
- if (bit > 0) {
- int i;
- for (i = 0; i < size; i++) {
- const __m256i r0 = av1_round_shift_32_avx2(input[i], bit);
- const __m256i r1 = _mm256_mullo_epi32(sqrt2, r0);
- output[i] = av1_round_shift_32_avx2(r1, NewSqrt2Bits);
- }
- } else {
- int i;
- for (i = 0; i < size; i++) {
- const __m256i r0 = _mm256_slli_epi32(input[i], -bit);
- const __m256i r1 = _mm256_mullo_epi32(sqrt2, r0);
- output[i] = av1_round_shift_32_avx2(r1, NewSqrt2Bits);
- }
- }
-}
-
-static INLINE void transpose_32_8x8_avx2(int stride, const __m256i *inputA,
- __m256i *output) {
- __m256i temp0 = _mm256_unpacklo_epi32(inputA[0], inputA[2]);
- __m256i temp1 = _mm256_unpackhi_epi32(inputA[0], inputA[2]);
- __m256i temp2 = _mm256_unpacklo_epi32(inputA[1], inputA[3]);
- __m256i temp3 = _mm256_unpackhi_epi32(inputA[1], inputA[3]);
- __m256i temp4 = _mm256_unpacklo_epi32(inputA[4], inputA[6]);
- __m256i temp5 = _mm256_unpackhi_epi32(inputA[4], inputA[6]);
- __m256i temp6 = _mm256_unpacklo_epi32(inputA[5], inputA[7]);
- __m256i temp7 = _mm256_unpackhi_epi32(inputA[5], inputA[7]);
-
- __m256i t0 = _mm256_unpacklo_epi32(temp0, temp2);
- __m256i t1 = _mm256_unpackhi_epi32(temp0, temp2);
- __m256i t2 = _mm256_unpacklo_epi32(temp1, temp3);
- __m256i t3 = _mm256_unpackhi_epi32(temp1, temp3);
- __m256i t4 = _mm256_unpacklo_epi32(temp4, temp6);
- __m256i t5 = _mm256_unpackhi_epi32(temp4, temp6);
- __m256i t6 = _mm256_unpacklo_epi32(temp5, temp7);
- __m256i t7 = _mm256_unpackhi_epi32(temp5, temp7);
-
- output[0 * stride] = _mm256_permute2x128_si256(t0, t4, 0x20);
- output[1 * stride] = _mm256_permute2x128_si256(t1, t5, 0x20);
- output[2 * stride] = _mm256_permute2x128_si256(t2, t6, 0x20);
- output[3 * stride] = _mm256_permute2x128_si256(t3, t7, 0x20);
- output[4 * stride] = _mm256_permute2x128_si256(t0, t4, 0x31);
- output[5 * stride] = _mm256_permute2x128_si256(t1, t5, 0x31);
- output[6 * stride] = _mm256_permute2x128_si256(t2, t6, 0x31);
- output[7 * stride] = _mm256_permute2x128_si256(t3, t7, 0x31);
-}
-
-// Store 8 16 bit values. Sign extend the values.
-static INLINE void store_buffer_16bit_to_32bit_w16_avx2(const __m256i *const in,
- int32_t *out,
- const int stride,
- const int out_size) {
- for (int i = 0; i < out_size; ++i) {
- _mm256_store_si256((__m256i *)(out),
- _mm256_cvtepi16_epi32(_mm256_castsi256_si128(in[i])));
- _mm256_store_si256(
- (__m256i *)(out + 8),
- _mm256_cvtepi16_epi32(_mm256_extracti128_si256(in[i], 1)));
- out += stride;
- }
-}
-
-static INLINE void store_rect_16bit_to_32bit_avx2(const __m256i a,
- int32_t *const b) {
- const __m256i one = _mm256_set1_epi16(1);
- const __m256i a_reoder = _mm256_permute4x64_epi64(a, 0xd8);
- const __m256i a_lo = _mm256_unpacklo_epi16(a_reoder, one);
- const __m256i a_hi = _mm256_unpackhi_epi16(a_reoder, one);
- const __m256i b_lo = scale_round_avx2(a_lo, NewSqrt2);
- const __m256i b_hi = scale_round_avx2(a_hi, NewSqrt2);
- _mm256_store_si256((__m256i *)b, b_lo);
- _mm256_store_si256((__m256i *)(b + 8), b_hi);
-}
-
-static INLINE void store_rect_buffer_16bit_to_32bit_w16_avx2(
- const __m256i *const in, int32_t *const out, const int stride,
- const int out_size) {
- for (int i = 0; i < out_size; ++i) {
- store_rect_16bit_to_32bit_avx2(in[i], out + i * stride);
- }
-}
-
-static const transform_1d_avx2 col_txfm16x32_arr[TX_TYPES] = {
- fdct16x32_new_avx2, // DCT_DCT
- NULL, // ADST_DCT
- NULL, // DCT_ADST
- NULL, // ADST_ADST
- NULL, // FLIPADST_DCT
- NULL, // DCT_FLIPADST
- NULL, // FLIPADST_FLIPADST
- NULL, // ADST_FLIPADST
- NULL, // FLIPADST_ADST
- fidentity16x32_new_avx2, // IDTX
- fdct16x32_new_avx2, // V_DCT
- fidentity16x32_new_avx2, // H_DCT
- NULL, // V_ADST
- NULL, // H_ADST
- NULL, // V_FLIPADST
- NULL // H_FLIPADST
-};
-
-static const transform_1d_avx2 row_txfm16x32_arr[TX_TYPES] = {
- fdct16x32_new_avx2, // DCT_DCT
- NULL, // ADST_DCT
- NULL, // DCT_ADST
- NULL, // ADST_ADST
- NULL, // FLIPADST_DCT
- NULL, // DCT_FLIPADST
- NULL, // FLIPADST_FLIPADST
- NULL, // ADST_FLIPADST
- NULL, // FLIPADST_ADST
- fidentity16x32_new_avx2, // IDTX
- fidentity16x32_new_avx2, // V_DCT
- fdct16x32_new_avx2, // H_DCT
- NULL, // V_ADST
- NULL, // H_ADST
- NULL, // V_FLIPADST
- NULL // H_FLIPADST
-};
-
-static const transform_1d_avx2 col_txfm16x16_arr[TX_TYPES] = {
- fdct16x16_new_avx2, // DCT_DCT
- fadst16x16_new_avx2, // ADST_DCT
- fdct16x16_new_avx2, // DCT_ADST
- fadst16x16_new_avx2, // ADST_ADST
- fadst16x16_new_avx2, // FLIPADST_DCT
- fdct16x16_new_avx2, // DCT_FLIPADST
- fadst16x16_new_avx2, // FLIPADST_FLIPADST
- fadst16x16_new_avx2, // ADST_FLIPADST
- fadst16x16_new_avx2, // FLIPADST_ADST
- fidentity16x16_new_avx2, // IDTX
- fdct16x16_new_avx2, // V_DCT
- fidentity16x16_new_avx2, // H_DCT
- fadst16x16_new_avx2, // V_ADST
- fidentity16x16_new_avx2, // H_ADST
- fadst16x16_new_avx2, // V_FLIPADST
- fidentity16x16_new_avx2 // H_FLIPADST
-};
-
-static const transform_1d_avx2 row_txfm16x16_arr[TX_TYPES] = {
- fdct16x16_new_avx2, // DCT_DCT
- fdct16x16_new_avx2, // ADST_DCT
- fadst16x16_new_avx2, // DCT_ADST
- fadst16x16_new_avx2, // ADST_ADST
- fdct16x16_new_avx2, // FLIPADST_DCT
- fadst16x16_new_avx2, // DCT_FLIPADST
- fadst16x16_new_avx2, // FLIPADST_FLIPADST
- fadst16x16_new_avx2, // ADST_FLIPADST
- fadst16x16_new_avx2, // FLIPADST_ADST
- fidentity16x16_new_avx2, // IDTX
- fidentity16x16_new_avx2, // V_DCT
- fdct16x16_new_avx2, // H_DCT
- fidentity16x16_new_avx2, // V_ADST
- fadst16x16_new_avx2, // H_ADST
- fidentity16x16_new_avx2, // V_FLIPADST
- fadst16x16_new_avx2 // H_FLIPADST
-};
-
-static void lowbd_fwd_txfm2d_16x16_avx2(const int16_t *input, int32_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- (void)bd;
- const TX_SIZE tx_size = TX_16X16;
- __m256i buf0[16], buf1[16];
- const int8_t *shift = fwd_txfm_shift_ls[tx_size];
- const int txw_idx = get_txw_idx(tx_size);
- const int txh_idx = get_txh_idx(tx_size);
- const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
- const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
- const int width = tx_size_wide[tx_size];
- const int height = tx_size_high[tx_size];
- const transform_1d_avx2 col_txfm = col_txfm16x16_arr[tx_type];
- const transform_1d_avx2 row_txfm = row_txfm16x16_arr[tx_type];
- int ud_flip, lr_flip;
-
- get_flip_cfg(tx_type, &ud_flip, &lr_flip);
- const int32_t i = 0;
- if (ud_flip) {
- load_buffer_16bit_to_16bit_flip_avx2(input + 16 * i, stride, buf0, height);
- } else {
- load_buffer_16bit_to_16bit_avx2(input + 16 * i, stride, buf0, height);
- }
- round_shift_16bit_w16_avx2(buf0, height, shift[0]);
- col_txfm(buf0, buf0, cos_bit_col);
- round_shift_16bit_w16_avx2(buf0, height, shift[1]);
- transpose_16bit_16x16_avx2(buf0, buf1 + 0 * width + 16 * i);
-
- __m256i *buf;
- if (lr_flip) {
- buf = buf0;
- flip_buf_avx2(buf1 + width * i, buf, width);
- } else {
- buf = buf1 + width * i;
- }
- row_txfm(buf, buf, cos_bit_row);
- round_shift_16bit_w16_avx2(buf, width, shift[2]);
- transpose_16bit_16x16_avx2(buf, buf);
- store_buffer_16bit_to_32bit_w16_avx2(buf, output + 16 * width * i, width, 16);
-}
-
-static void lowbd_fwd_txfm2d_32x32_avx2(const int16_t *input, int32_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- (void)bd;
- const TX_SIZE tx_size = TX_32X32;
- __m256i buf0[32], buf1[128];
- const int8_t *shift = fwd_txfm_shift_ls[tx_size];
- const int txw_idx = get_txw_idx(tx_size);
- const int txh_idx = get_txh_idx(tx_size);
- const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
- const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
- const int width = tx_size_wide[tx_size];
- const int height = tx_size_high[tx_size];
- const transform_1d_avx2 col_txfm = col_txfm16x32_arr[tx_type];
- const transform_1d_avx2 row_txfm = row_txfm16x32_arr[tx_type];
-
- int ud_flip, lr_flip;
- get_flip_cfg(tx_type, &ud_flip, &lr_flip);
-
- for (int i = 0; i < 2; i++) {
- if (ud_flip) {
- load_buffer_16bit_to_16bit_flip_avx2(input + 16 * i, stride, buf0,
- height);
- } else {
- load_buffer_16bit_to_16bit_avx2(input + 16 * i, stride, buf0, height);
- }
- round_shift_16bit_w16_avx2(buf0, height, shift[0]);
- col_txfm(buf0, buf0, cos_bit_col);
- round_shift_16bit_w16_avx2(buf0, height, shift[1]);
- transpose_16bit_16x16_avx2(buf0 + 0 * 16, buf1 + 0 * width + 16 * i);
- transpose_16bit_16x16_avx2(buf0 + 1 * 16, buf1 + 1 * width + 16 * i);
- }
-
- for (int i = 0; i < 2; i++) {
- __m256i *buf;
- if (lr_flip) {
- buf = buf0;
- flip_buf_avx2(buf1 + width * i, buf, width);
- } else {
- buf = buf1 + width * i;
- }
- row_txfm(buf, buf, cos_bit_row);
- round_shift_16bit_w16_avx2(buf, width, shift[2]);
- transpose_16bit_16x16_avx2(buf, buf);
- store_buffer_16bit_to_32bit_w16_avx2(buf, output + 16 * width * i, width,
- 16);
- transpose_16bit_16x16_avx2(buf + 16, buf + 16);
- store_buffer_16bit_to_32bit_w16_avx2(buf + 16, output + 16 * width * i + 16,
- width, 16);
- }
-}
-
-static void lowbd_fwd_txfm2d_64x64_avx2(const int16_t *input, int32_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- (void)bd;
- (void)tx_type;
- assert(tx_type == DCT_DCT);
- const TX_SIZE tx_size = TX_64X64;
- __m256i buf0[64], buf1[256];
- const int8_t *shift = fwd_txfm_shift_ls[tx_size];
- const int txw_idx = get_txw_idx(tx_size);
- const int txh_idx = get_txh_idx(tx_size);
- const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
- const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
- const int width = tx_size_wide[tx_size];
- const int height = tx_size_high[tx_size];
- const transform_1d_avx2 col_txfm = fdct16x64_new_avx2;
- const int width_div16 = (width >> 4);
- const int height_div16 = (height >> 4);
-
- for (int i = 0; i < width_div16; i++) {
- load_buffer_16bit_to_16bit_avx2(input + 16 * i, stride, buf0, height);
- round_shift_16bit_w16_avx2(buf0, height, shift[0]);
- col_txfm(buf0, buf0, cos_bit_col);
- round_shift_16bit_w16_avx2(buf0, height, shift[1]);
- for (int j = 0; j < AOMMIN(2, height_div16); ++j) {
- transpose_16bit_16x16_avx2(buf0 + j * 16, buf1 + j * width + 16 * i);
- }
- }
-
- for (int i = 0; i < AOMMIN(2, height_div16); i++) {
- __m256i bufA[64];
- __m256i bufB[64];
- __m128i *buf = (__m128i *)(buf1 + width * i);
- for (int j = 0; j < width; ++j) {
- bufA[j] = _mm256_cvtepi16_epi32(buf[j * 2]);
- bufB[j] = _mm256_cvtepi16_epi32(buf[j * 2 + 1]);
- }
- av1_fdct64_new_avx2(bufA, bufA, cos_bit_row);
- av1_fdct64_new_avx2(bufB, bufB, cos_bit_row);
- av1_round_shift_array_32_avx2(bufA, bufA, 32, -shift[2]);
- av1_round_shift_array_32_avx2(bufB, bufB, 32, -shift[2]);
-
- int32_t *output8 = output + 16 * 32 * i;
- for (int j = 0; j < 4; ++j) {
- __m256i *out = (__m256i *)(output8 + 8 * j);
- transpose_32_8x8_avx2(4, bufA + 8 * j, out);
- transpose_32_8x8_avx2(4, bufB + 8 * j, out + 8 * 4);
- }
- }
-}
-
-static void lowbd_fwd_txfm2d_16x32_avx2(const int16_t *input, int32_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- (void)bd;
- const TX_SIZE tx_size = TX_16X32;
- __m256i buf0[32], buf1[32];
- const int8_t *shift = fwd_txfm_shift_ls[tx_size];
- const int txw_idx = get_txw_idx(tx_size);
- const int txh_idx = get_txh_idx(tx_size);
- const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
- const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
- const int width = tx_size_wide[tx_size];
- const int height = tx_size_high[tx_size];
- const transform_1d_avx2 col_txfm = col_txfm16x32_arr[tx_type];
- const transform_1d_avx2 row_txfm = row_txfm16x16_arr[tx_type];
-
- int ud_flip, lr_flip;
- get_flip_cfg(tx_type, &ud_flip, &lr_flip);
-
- if (ud_flip) {
- load_buffer_16bit_to_16bit_flip_avx2(input, stride, buf0, height);
- } else {
- load_buffer_16bit_to_16bit_avx2(input, stride, buf0, height);
- }
- round_shift_16bit_w16_avx2(buf0, height, shift[0]);
- col_txfm(buf0, buf0, cos_bit_col);
- round_shift_16bit_w16_avx2(buf0, height, shift[1]);
- transpose_16bit_16x16_avx2(buf0, buf1);
- transpose_16bit_16x16_avx2(buf0 + 16, buf1 + 16);
-
- for (int i = 0; i < 2; i++) {
- __m256i *buf;
- if (lr_flip) {
- buf = buf0;
- flip_buf_avx2(buf1 + width * i, buf, width);
- } else {
- buf = buf1 + width * i;
- }
- row_txfm(buf, buf, cos_bit_row);
- round_shift_16bit_w16_avx2(buf, width, shift[2]);
- transpose_16bit_16x16_avx2(buf, buf);
- store_rect_buffer_16bit_to_32bit_w16_avx2(buf, output + 16 * width * i,
- width, 16);
- }
-}
-
-static void lowbd_fwd_txfm2d_32x16_avx2(const int16_t *input, int32_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- (void)bd;
- __m256i buf0[32], buf1[64];
- const int8_t *shift = fwd_txfm_shift_ls[TX_32X16];
- const int txw_idx = get_txw_idx(TX_32X16);
- const int txh_idx = get_txh_idx(TX_32X16);
- const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
- const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
- const int width = 32;
- const int height = 16;
- const transform_1d_avx2 col_txfm = col_txfm16x16_arr[tx_type];
- const transform_1d_avx2 row_txfm = row_txfm16x32_arr[tx_type];
-
- int ud_flip, lr_flip;
- get_flip_cfg(tx_type, &ud_flip, &lr_flip);
-
- for (int i = 0; i < 2; i++) {
- if (ud_flip) {
- load_buffer_16bit_to_16bit_flip_avx2(input + 16 * i, stride, buf0,
- height);
- } else {
- load_buffer_16bit_to_16bit_avx2(input + 16 * i, stride, buf0, height);
- }
- round_shift_16bit_w16_avx2(buf0, height, shift[0]);
- col_txfm(buf0, buf0, cos_bit_col);
- round_shift_16bit_w16_avx2(buf0, height, shift[1]);
- transpose_16bit_16x16_avx2(buf0, buf1 + 0 * width + 16 * i);
- }
-
- __m256i *buf;
- if (lr_flip) {
- buf = buf0;
- flip_buf_avx2(buf1, buf, width);
- } else {
- buf = buf1;
- }
- row_txfm(buf, buf, cos_bit_row);
- round_shift_16bit_w16_avx2(buf, width, shift[2]);
- transpose_16bit_16x16_avx2(buf, buf);
- store_rect_buffer_16bit_to_32bit_w16_avx2(buf, output, width, 16);
-
- transpose_16bit_16x16_avx2(buf + 16, buf + 16);
- store_rect_buffer_16bit_to_32bit_w16_avx2(buf + 16, output + 16, width, 16);
-}
-
-static void lowbd_fwd_txfm2d_64x32_avx2(const int16_t *input, int32_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- (void)bd;
- const TX_SIZE tx_size = TX_64X32;
- __m256i buf0[64], buf1[256];
- const int8_t *shift = fwd_txfm_shift_ls[tx_size];
- const int txw_idx = get_txw_idx(tx_size);
- const int txh_idx = get_txh_idx(tx_size);
- const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
- const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
- const int width = tx_size_wide[tx_size];
- const int height = tx_size_high[tx_size];
- const transform_1d_avx2 col_txfm = col_txfm16x32_arr[tx_type];
- const int width_div16 = (width >> 4);
- const int height_div16 = (height >> 4);
-
- for (int i = 0; i < width_div16; i++) {
- load_buffer_16bit_to_16bit_avx2(input + 16 * i, stride, buf0, height);
- round_shift_16bit_w16_avx2(buf0, height, shift[0]);
- col_txfm(buf0, buf0, cos_bit_col);
- round_shift_16bit_w16_avx2(buf0, height, shift[1]);
- for (int j = 0; j < AOMMIN(4, height_div16); ++j) {
- transpose_16bit_16x16_avx2(buf0 + j * 16, buf1 + j * width + 16 * i);
- }
- }
- assert(tx_type == DCT_DCT);
- for (int i = 0; i < AOMMIN(2, height_div16); i++) {
- __m256i bufA[64];
- __m256i bufB[64];
- __m128i *buf = (__m128i *)(buf1 + width * i);
- for (int j = 0; j < width; ++j) {
- bufA[j] = _mm256_cvtepi16_epi32(buf[j * 2]);
- bufB[j] = _mm256_cvtepi16_epi32(buf[j * 2 + 1]);
- }
- av1_fdct64_new_avx2(bufA, bufA, cos_bit_row);
- av1_fdct64_new_avx2(bufB, bufB, cos_bit_row);
- av1_round_shift_rect_array_32_avx2(bufA, bufA, 32, -shift[2]);
- av1_round_shift_rect_array_32_avx2(bufB, bufB, 32, -shift[2]);
-
- int32_t *output8 = output + 16 * 32 * i;
- for (int j = 0; j < 4; ++j) {
- __m256i *out = (__m256i *)(output8 + 8 * j);
- transpose_32_8x8_avx2(4, bufA + 8 * j, out);
- transpose_32_8x8_avx2(4, bufB + 8 * j, out + 8 * 4);
- }
- }
-}
-
-static void lowbd_fwd_txfm2d_32x64_avx2(const int16_t *input, int32_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- (void)bd;
- (void)tx_type;
- assert(tx_type == DCT_DCT);
- const TX_SIZE tx_size = TX_32X64;
- __m256i buf0[64], buf1[256];
- const int8_t *shift = fwd_txfm_shift_ls[tx_size];
- const int txw_idx = get_txw_idx(tx_size);
- const int txh_idx = get_txh_idx(tx_size);
- const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
- const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
- const int width = tx_size_wide[tx_size];
- const int height = tx_size_high[tx_size];
- const transform_1d_avx2 col_txfm = fdct16x64_new_avx2;
- const int width_div16 = (width >> 4);
- const int height_div16 = (height >> 4);
-
- for (int i = 0; i < width_div16; i++) {
- load_buffer_16bit_to_16bit_avx2(input + 16 * i, stride, buf0, height);
- round_shift_16bit_w16_avx2(buf0, height, shift[0]);
- col_txfm(buf0, buf0, cos_bit_col);
- round_shift_16bit_w16_avx2(buf0, height, shift[1]);
- for (int j = 0; j < AOMMIN(2, height_div16); ++j) {
- transpose_16bit_16x16_avx2(buf0 + j * 16, buf1 + j * width + 16 * i);
- }
- }
-
- for (int i = 0; i < AOMMIN(2, height_div16); i++) {
- __m256i bufA[32];
- __m256i bufB[32];
- __m128i *buf = (__m128i *)(buf1 + width * i);
- for (int j = 0; j < width; ++j) {
- bufA[j] = _mm256_cvtepi16_epi32(buf[j * 2]);
- bufB[j] = _mm256_cvtepi16_epi32(buf[j * 2 + 1]);
- }
- av1_fdct32_new_avx2(bufA, bufA, cos_bit_row);
- av1_fdct32_new_avx2(bufB, bufB, cos_bit_row);
- av1_round_shift_rect_array_32_avx2(bufA, bufA, 32, -shift[2]);
- av1_round_shift_rect_array_32_avx2(bufB, bufB, 32, -shift[2]);
-
- int32_t *output8 = output + 16 * 32 * i;
- for (int j = 0; j < 4; ++j) {
- __m256i *out = (__m256i *)(output8 + 8 * j);
- transpose_32_8x8_avx2(4, bufA + 8 * j, out);
- transpose_32_8x8_avx2(4, bufB + 8 * j, out + 8 * 4);
- }
- }
-}
-
-static void lowbd_fwd_txfm2d_16x64_avx2(const int16_t *input, int32_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- (void)bd;
- (void)tx_type;
- assert(tx_type == DCT_DCT);
- const TX_SIZE tx_size = TX_16X64;
- __m256i buf0[64], buf1[64];
- const int8_t *shift = fwd_txfm_shift_ls[tx_size];
- const int txw_idx = get_txw_idx(tx_size);
- const int txh_idx = get_txh_idx(tx_size);
- const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
- const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
- const int width = tx_size_wide[tx_size];
- const int height = tx_size_high[tx_size];
- const transform_1d_avx2 col_txfm = fdct16x64_new_avx2;
- const transform_1d_avx2 row_txfm = fdct16x16_new_avx2;
- const int width_div16 = (width >> 4);
- const int height_div16 = (height >> 4);
-
- for (int i = 0; i < width_div16; i++) {
- load_buffer_16bit_to_16bit_avx2(input + 16 * i, stride, buf0, height);
- round_shift_16bit_w16_avx2(buf0, height, shift[0]);
- col_txfm(buf0, buf0, cos_bit_col);
- round_shift_16bit_w16_avx2(buf0, height, shift[1]);
- for (int j = 0; j < height_div16; ++j) {
- transpose_16bit_16x16_avx2(buf0 + j * 16, buf1 + j * width + 16 * i);
- }
- }
-
- for (int i = 0; i < AOMMIN(4, height_div16); i++) {
- __m256i *buf = buf1 + width * i;
- row_txfm(buf, buf, cos_bit_row);
- round_shift_16bit_w16_avx2(buf, width, shift[2]);
- int32_t *output16 = output + 16 * width * i;
- for (int j = 0; j < width_div16; ++j) {
- __m256i *buf16 = buf + 16 * j;
- transpose_16bit_16x16_avx2(buf16, buf16);
- store_buffer_16bit_to_32bit_w16_avx2(buf16, output16 + 16 * j, width, 16);
- }
- }
- // Zero out the bottom 16x32 area.
- memset(output + 16 * 32, 0, 16 * 32 * sizeof(*output));
-}
-
-static void lowbd_fwd_txfm2d_64x16_avx2(const int16_t *input, int32_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- (void)bd;
- (void)tx_type;
- assert(tx_type == DCT_DCT);
- const TX_SIZE tx_size = TX_64X16;
- __m256i buf0[64], buf1[64];
- const int8_t *shift = fwd_txfm_shift_ls[tx_size];
- const int txw_idx = get_txw_idx(tx_size);
- const int txh_idx = get_txh_idx(tx_size);
- const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
- const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
- const int width = tx_size_wide[tx_size];
- const int height = tx_size_high[tx_size];
- const transform_1d_avx2 col_txfm = fdct16x16_new_avx2;
- const transform_1d_avx2 row_txfm = fdct16x64_new_avx2;
- const int width_div16 = (width >> 4);
- const int height_div16 = (height >> 4);
-
- for (int i = 0; i < width_div16; i++) {
- load_buffer_16bit_to_16bit_avx2(input + 16 * i, stride, buf0, height);
- round_shift_16bit_w16_avx2(buf0, height, shift[0]);
- col_txfm(buf0, buf0, cos_bit_col);
- round_shift_16bit_w16_avx2(buf0, height, shift[1]);
- for (int j = 0; j < height_div16; ++j) {
- transpose_16bit_16x16_avx2(buf0 + j * 16, buf1 + j * width + 16 * i);
- }
- }
-
- for (int i = 0; i < height_div16; i++) {
- __m256i *buf = buf1 + width * i;
- row_txfm(buf, buf, cos_bit_row);
- round_shift_16bit_w16_avx2(buf, width, shift[2]);
- int32_t *output16 = output + 16 * 32 * i;
- for (int j = 0; j < 2; ++j) {
- __m256i *buf16 = buf + 16 * j;
- transpose_16bit_16x16_avx2(buf16, buf16);
- store_buffer_16bit_to_32bit_w16_avx2(buf16, output16 + 16 * j, 32, 16);
- }
- }
-}
-
-static FwdTxfm2dFunc fwd_txfm2d_func_ls[TX_SIZES_ALL] = {
- av1_lowbd_fwd_txfm2d_4x4_sse2, // 4x4 transform
- av1_lowbd_fwd_txfm2d_8x8_sse2, // 8x8 transform
- lowbd_fwd_txfm2d_16x16_avx2, // 16x16 transform
- lowbd_fwd_txfm2d_32x32_avx2, // 32x32 transform
- lowbd_fwd_txfm2d_64x64_avx2, // 64x64 transform
- av1_lowbd_fwd_txfm2d_4x8_sse2, // 4x8 transform
- av1_lowbd_fwd_txfm2d_8x4_sse2, // 8x4 transform
- av1_lowbd_fwd_txfm2d_8x16_sse2, // 8x16 transform
- av1_lowbd_fwd_txfm2d_16x8_sse2, // 16x8 transform
- lowbd_fwd_txfm2d_16x32_avx2, // 16x32 transform
- lowbd_fwd_txfm2d_32x16_avx2, // 32x16 transform
- lowbd_fwd_txfm2d_32x64_avx2, // 32x64 transform
- lowbd_fwd_txfm2d_64x32_avx2, // 64x32 transform
- av1_lowbd_fwd_txfm2d_4x16_sse2, // 4x16 transform
- av1_lowbd_fwd_txfm2d_16x4_sse2, // 16x4 transform
- av1_lowbd_fwd_txfm2d_8x32_sse2, // 8x32 transform
- av1_lowbd_fwd_txfm2d_32x8_sse2, // 32x8 transform
- lowbd_fwd_txfm2d_16x64_avx2, // 16x64 transform
- lowbd_fwd_txfm2d_64x16_avx2, // 64x16 transform
-};
-
-void av1_lowbd_fwd_txfm_avx2(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TxfmParam *txfm_param) {
- FwdTxfm2dFunc fwd_txfm2d_func = fwd_txfm2d_func_ls[txfm_param->tx_size];
- if ((fwd_txfm2d_func == NULL) ||
- (txfm_param->lossless && txfm_param->tx_size == TX_4X4)) {
- av1_lowbd_fwd_txfm_c(src_diff, coeff, diff_stride, txfm_param);
- } else {
- fwd_txfm2d_func(src_diff, coeff, diff_stride, txfm_param->tx_type,
- txfm_param->bd);
- }
-}
diff --git a/third_party/aom/av1/encoder/x86/av1_fwd_txfm2d_sse4.c b/third_party/aom/av1/encoder/x86/av1_fwd_txfm2d_sse4.c
deleted file mode 100644
index 8ec0256eb..000000000
--- a/third_party/aom/av1/encoder/x86/av1_fwd_txfm2d_sse4.c
+++ /dev/null
@@ -1,365 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "config/av1_rtcd.h"
-
-#include "av1/common/enums.h"
-#include "av1/common/av1_txfm.h"
-#include "av1/common/x86/av1_txfm_sse2.h"
-#include "av1/common/x86/highbd_txfm_utility_sse4.h"
-#include "av1/encoder/av1_fwd_txfm1d_cfg.h"
-#include "av1/encoder/x86/av1_txfm1d_sse4.h"
-#include "av1/encoder/x86/av1_fwd_txfm_sse2.h"
-
-static INLINE void int16_array_with_stride_to_int32_array_without_stride(
- const int16_t *input, int stride, int32_t *output, int txfm1d_size) {
- int r, c;
- for (r = 0; r < txfm1d_size; r++) {
- for (c = 0; c < txfm1d_size; c++) {
- output[r * txfm1d_size + c] = (int32_t)input[r * stride + c];
- }
- }
-}
-
-typedef void (*TxfmFuncSSE2)(const __m128i *input, __m128i *output,
- const int8_t cos_bit, const int8_t *stage_range);
-
-static void fdct32_new_sse4_1(const __m128i *input, __m128i *output,
- const int8_t cos_bit, const int8_t *stage_range) {
- const int txfm_size = 32;
- const int num_per_128 = 4;
- __m128i buf0[32];
- __m128i buf1[32];
- int col_num = txfm_size / num_per_128;
- int col;
- (void)stage_range;
- for (col = 0; col < col_num; col++) {
- int j;
- for (j = 0; j < 32; ++j) {
- buf0[j] = input[j * col_num + col];
- }
- av1_fdct32_new_sse4_1(buf0, buf1, cos_bit);
- for (j = 0; j < 32; ++j) {
- output[j * col_num + col] = buf1[j];
- }
- }
-}
-
-static void fdct64_new_sse4_1(const __m128i *input, __m128i *output,
- const int8_t cos_bit, const int8_t *stage_range) {
- const int txfm_size = 64;
- const int num_per_128 = 4;
- int col_num = txfm_size / num_per_128;
- (void)stage_range;
- for (int col = 0; col < col_num; col++) {
- av1_fdct64_new_sse4_1((input + col), (output + col), cos_bit, col_num,
- col_num);
- }
-}
-
-static INLINE TxfmFuncSSE2 fwd_txfm_type_to_func(TXFM_TYPE txfm_type) {
- switch (txfm_type) {
- case TXFM_TYPE_DCT32: return fdct32_new_sse4_1; break;
- case TXFM_TYPE_DCT64: return fdct64_new_sse4_1; break;
- default: assert(0);
- }
- return NULL;
-}
-
-static INLINE void fwd_txfm2d_sse4_1(const int16_t *input, int32_t *output,
- const int stride,
- const TXFM_2D_FLIP_CFG *cfg,
- int32_t *txfm_buf) {
- // TODO(sarahparker) This does not currently support rectangular transforms
- // and will break without splitting txfm_size out into row and col size.
- // Rectangular transforms use c code only, so it should be ok for now.
- // It will be corrected when there are sse implementations for rectangular
- // transforms.
- assert(cfg->tx_size < TX_SIZES);
- const int txfm_size = tx_size_wide[cfg->tx_size];
- const int8_t *shift = cfg->shift;
- const int8_t *stage_range_col = cfg->stage_range_col;
- const int8_t *stage_range_row = cfg->stage_range_row;
- const int8_t cos_bit_col = cfg->cos_bit_col;
- const int8_t cos_bit_row = cfg->cos_bit_row;
- const TxfmFuncSSE2 txfm_func_col = fwd_txfm_type_to_func(cfg->txfm_type_col);
- const TxfmFuncSSE2 txfm_func_row = fwd_txfm_type_to_func(cfg->txfm_type_row);
-
- __m128i *buf_128 = (__m128i *)txfm_buf;
- __m128i *out_128 = (__m128i *)output;
- int num_per_128 = 4;
- int txfm2d_size_128 = txfm_size * txfm_size / num_per_128;
-
- int16_array_with_stride_to_int32_array_without_stride(input, stride, txfm_buf,
- txfm_size);
- av1_round_shift_array_32_sse4_1(buf_128, out_128, txfm2d_size_128, -shift[0]);
- txfm_func_col(out_128, buf_128, cos_bit_col, stage_range_col);
- av1_round_shift_array_32_sse4_1(buf_128, out_128, txfm2d_size_128, -shift[1]);
- transpose_32(txfm_size, out_128, buf_128);
- txfm_func_row(buf_128, out_128, cos_bit_row, stage_range_row);
- av1_round_shift_array_32_sse4_1(out_128, buf_128, txfm2d_size_128, -shift[2]);
- transpose_32(txfm_size, buf_128, out_128);
-}
-
-static INLINE void fwd_txfm2d_64x64_sse4_1(const int16_t *input,
- int32_t *output, const int stride,
- const TXFM_2D_FLIP_CFG *cfg,
- int32_t *txfm_buf) {
- assert(cfg->tx_size < TX_SIZES);
- const int txfm_size = tx_size_wide[cfg->tx_size];
- const int8_t *shift = cfg->shift;
- const int8_t *stage_range_col = cfg->stage_range_col;
- const int8_t cos_bit_col = cfg->cos_bit_col;
- const int8_t cos_bit_row = cfg->cos_bit_row;
- const TxfmFuncSSE2 txfm_func_col = fwd_txfm_type_to_func(cfg->txfm_type_col);
- __m128i *buf_128 = (__m128i *)txfm_buf;
- __m128i *out_128 = (__m128i *)output;
-
- const int num_per_128 = 4;
- int txfm2d_size_128 = txfm_size * txfm_size / num_per_128;
- int col_num = txfm_size / num_per_128;
-
- int16_array_with_stride_to_int32_array_without_stride(input, stride, output,
- txfm_size);
- /*col wise transform*/
- txfm_func_col(out_128, buf_128, cos_bit_col, stage_range_col);
- av1_round_shift_array_32_sse4_1(buf_128, out_128, txfm2d_size_128, -shift[1]);
- transpose_32(txfm_size, out_128, buf_128);
-
- /*row wise transform*/
- for (int col = 0; col < (col_num >> 1); col++) {
- av1_fdct64_new_sse4_1((buf_128 + col), (out_128 + col), cos_bit_row,
- col_num, (col_num >> 1));
- }
-
- txfm2d_size_128 = (col_num >> 1) * (txfm_size >> 1);
- av1_round_shift_array_32_sse4_1(out_128, buf_128, txfm2d_size_128, -shift[2]);
- transpose_32x32(buf_128, out_128);
-}
-
-void av1_fwd_txfm2d_32x32_sse4_1(const int16_t *input, int32_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- DECLARE_ALIGNED(16, int32_t, txfm_buf[1024]);
- TXFM_2D_FLIP_CFG cfg;
- av1_get_fwd_txfm_cfg(tx_type, TX_32X32, &cfg);
- (void)bd;
- fwd_txfm2d_sse4_1(input, output, stride, &cfg, txfm_buf);
-}
-
-void av1_fwd_txfm2d_64x64_sse4_1(const int16_t *input, int32_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- DECLARE_ALIGNED(16, int32_t, txfm_buf[4096]);
- TXFM_2D_FLIP_CFG cfg;
- av1_get_fwd_txfm_cfg(tx_type, TX_64X64, &cfg);
- (void)bd;
- fwd_txfm2d_64x64_sse4_1(input, output, stride, &cfg, txfm_buf);
-}
-
-static INLINE void transpose_32_4x4x2(int stride, const __m128i *inputA,
- const __m128i *inputB, __m128i *output) {
- __m128i temp0 = _mm_unpacklo_epi32(inputA[0], inputA[2]);
- __m128i temp1 = _mm_unpackhi_epi32(inputA[0], inputA[2]);
- __m128i temp2 = _mm_unpacklo_epi32(inputA[1], inputA[3]);
- __m128i temp3 = _mm_unpackhi_epi32(inputA[1], inputA[3]);
-
- output[0 * stride] = _mm_unpacklo_epi32(temp0, temp2);
- output[1 * stride] = _mm_unpackhi_epi32(temp0, temp2);
- output[2 * stride] = _mm_unpacklo_epi32(temp1, temp3);
- output[3 * stride] = _mm_unpackhi_epi32(temp1, temp3);
-
- temp0 = _mm_unpacklo_epi32(inputB[0], inputB[2]);
- temp1 = _mm_unpackhi_epi32(inputB[0], inputB[2]);
- temp2 = _mm_unpacklo_epi32(inputB[1], inputB[3]);
- temp3 = _mm_unpackhi_epi32(inputB[1], inputB[3]);
-
- output[4 * stride] = _mm_unpacklo_epi32(temp0, temp2);
- output[5 * stride] = _mm_unpackhi_epi32(temp0, temp2);
- output[6 * stride] = _mm_unpacklo_epi32(temp1, temp3);
- output[7 * stride] = _mm_unpackhi_epi32(temp1, temp3);
-}
-
-static void lowbd_fwd_txfm2d_64x64_sse4_1(const int16_t *input, int32_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- (void)bd;
- (void)tx_type;
- assert(tx_type == DCT_DCT);
- const TX_SIZE tx_size = TX_64X64;
- __m128i buf0[64], buf1[512];
- const int8_t *shift = fwd_txfm_shift_ls[tx_size];
- const int txw_idx = get_txw_idx(tx_size);
- const int txh_idx = get_txh_idx(tx_size);
- const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
- const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
- const int width = tx_size_wide[tx_size];
- const int height = tx_size_high[tx_size];
- const transform_1d_sse2 col_txfm = fdct8x64_new_sse2;
- const int width_div8 = (width >> 3);
- const int height_div8 = (height >> 3);
-
- for (int i = 0; i < width_div8; i++) {
- load_buffer_16bit_to_16bit(input + 8 * i, stride, buf0, height);
- round_shift_16bit(buf0, height, shift[0]);
- col_txfm(buf0, buf0, cos_bit_col);
- round_shift_16bit(buf0, height, shift[1]);
- for (int j = 0; j < AOMMIN(4, height_div8); ++j) {
- transpose_16bit_8x8(buf0 + j * 8, buf1 + j * width + 8 * i);
- }
- }
- for (int i = 0; i < AOMMIN(4, height_div8); i++) {
- __m128i bufA[64];
- __m128i bufB[64];
- __m128i *buf = buf1 + width * i;
- for (int j = 0; j < width; ++j) {
- bufA[j] = _mm_cvtepi16_epi32(buf[j]);
- bufB[j] = _mm_cvtepi16_epi32(_mm_unpackhi_epi64(buf[j], buf[j]));
- }
- av1_fdct64_new_sse4_1(bufA, bufA, cos_bit_row, 1, 1);
- av1_fdct64_new_sse4_1(bufB, bufB, cos_bit_row, 1, 1);
- av1_round_shift_array_32_sse4_1(bufA, bufA, 32, -shift[2]);
- av1_round_shift_array_32_sse4_1(bufB, bufB, 32, -shift[2]);
-
- int32_t *output8 = output + 8 * 32 * i;
- for (int j = 0; j < width_div8; ++j) {
- __m128i *out = (__m128i *)(output8 + 4 * j);
- transpose_32_4x4x2(8, bufA + 4 * j, bufB + 4 * j, out);
- }
- }
-}
-
-static void lowbd_fwd_txfm2d_64x32_sse4_1(const int16_t *input, int32_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- (void)bd;
- const TX_SIZE tx_size = TX_64X32;
- __m128i buf0[64], buf1[256];
- const int8_t *shift = fwd_txfm_shift_ls[tx_size];
- const int txw_idx = get_txw_idx(tx_size);
- const int txh_idx = get_txh_idx(tx_size);
- const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
- const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
- const int width = tx_size_wide[tx_size];
- const int height = tx_size_high[tx_size];
- const transform_1d_sse2 col_txfm = col_txfm8x32_arr[tx_type];
- const int width_div8 = (width >> 3);
- const int height_div8 = (height >> 3);
-
- for (int i = 0; i < width_div8; i++) {
- load_buffer_16bit_to_16bit(input + 8 * i, stride, buf0, height);
- round_shift_16bit(buf0, height, shift[0]);
- col_txfm(buf0, buf0, cos_bit_col);
- round_shift_16bit(buf0, height, shift[1]);
- for (int j = 0; j < AOMMIN(4, height_div8); ++j) {
- transpose_16bit_8x8(buf0 + j * 8, buf1 + j * width + 8 * i);
- }
- }
- assert(tx_type == DCT_DCT);
- for (int i = 0; i < AOMMIN(4, height_div8); i++) {
- __m128i bufA[64];
- __m128i bufB[64];
- __m128i *buf = buf1 + width * i;
- for (int j = 0; j < width; ++j) {
- bufA[j] = _mm_cvtepi16_epi32(buf[j]);
- bufB[j] = _mm_cvtepi16_epi32(_mm_unpackhi_epi64(buf[j], buf[j]));
- }
- av1_fdct64_new_sse4_1(bufA, bufA, cos_bit_row, 1, 1);
- av1_fdct64_new_sse4_1(bufB, bufB, cos_bit_row, 1, 1);
- av1_round_shift_rect_array_32_sse4_1(bufA, bufA, 32, -shift[2], NewSqrt2);
- av1_round_shift_rect_array_32_sse4_1(bufB, bufB, 32, -shift[2], NewSqrt2);
-
- int32_t *output8 = output + 8 * 32 * i;
- for (int j = 0; j < width_div8; ++j) {
- __m128i *out = (__m128i *)(output8 + 4 * j);
- transpose_32_4x4x2(8, bufA + 4 * j, bufB + 4 * j, out);
- }
- }
-}
-
-static void lowbd_fwd_txfm2d_32x64_sse4_1(const int16_t *input, int32_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- (void)bd;
- (void)tx_type;
- assert(tx_type == DCT_DCT);
- const TX_SIZE tx_size = TX_32X64;
- __m128i buf0[64], buf1[256];
- const int8_t *shift = fwd_txfm_shift_ls[tx_size];
- const int txw_idx = get_txw_idx(tx_size);
- const int txh_idx = get_txh_idx(tx_size);
- const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
- const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
- const int width = tx_size_wide[tx_size];
- const int height = tx_size_high[tx_size];
- const transform_1d_sse2 col_txfm = fdct8x64_new_sse2;
- const int width_div8 = (width >> 3);
- const int height_div8 = (height >> 3);
-
- for (int i = 0; i < width_div8; i++) {
- load_buffer_16bit_to_16bit(input + 8 * i, stride, buf0, height);
- round_shift_16bit(buf0, height, shift[0]);
- col_txfm(buf0, buf0, cos_bit_col);
- round_shift_16bit(buf0, height, shift[1]);
- for (int j = 0; j < AOMMIN(4, height_div8); ++j) {
- transpose_16bit_8x8(buf0 + j * 8, buf1 + j * width + 8 * i);
- }
- }
-
- for (int i = 0; i < AOMMIN(4, height_div8); i++) {
- __m128i bufA[32];
- __m128i bufB[32];
- __m128i *buf = buf1 + width * i;
- for (int j = 0; j < width; ++j) {
- bufA[j] = _mm_cvtepi16_epi32(buf[j]);
- bufB[j] = _mm_cvtepi16_epi32(_mm_unpackhi_epi64(buf[j], buf[j]));
- }
- av1_fdct32_new_sse4_1(bufA, bufA, cos_bit_row);
- av1_fdct32_new_sse4_1(bufB, bufB, cos_bit_row);
- av1_round_shift_rect_array_32_sse4_1(bufA, bufA, 32, -shift[2], NewSqrt2);
- av1_round_shift_rect_array_32_sse4_1(bufB, bufB, 32, -shift[2], NewSqrt2);
-
- int32_t *output8 = output + 8 * 32 * i;
- for (int j = 0; j < (32 / 4); ++j) {
- __m128i *out = (__m128i *)(output8 + 4 * j);
- transpose_32_4x4x2(8, bufA + 4 * j, bufB + 4 * j, out);
- }
- }
-}
-
-static FwdTxfm2dFunc fwd_txfm2d_func_ls[TX_SIZES_ALL] = {
- av1_lowbd_fwd_txfm2d_4x4_sse2, // 4x4 transform
- av1_lowbd_fwd_txfm2d_8x8_sse2, // 8x8 transform
- av1_lowbd_fwd_txfm2d_16x16_sse2, // 16x16 transform
- av1_lowbd_fwd_txfm2d_32x32_sse2, // 32x32 transform
- lowbd_fwd_txfm2d_64x64_sse4_1, // 64x64 transform
- av1_lowbd_fwd_txfm2d_4x8_sse2, // 4x8 transform
- av1_lowbd_fwd_txfm2d_8x4_sse2, // 8x4 transform
- av1_lowbd_fwd_txfm2d_8x16_sse2, // 8x16 transform
- av1_lowbd_fwd_txfm2d_16x8_sse2, // 16x8 transform
- av1_lowbd_fwd_txfm2d_16x32_sse2, // 16x32 transform
- av1_lowbd_fwd_txfm2d_32x16_sse2, // 32x16 transform
- lowbd_fwd_txfm2d_32x64_sse4_1, // 32x64 transform
- lowbd_fwd_txfm2d_64x32_sse4_1, // 64x32 transform
- av1_lowbd_fwd_txfm2d_4x16_sse2, // 4x16 transform
- av1_lowbd_fwd_txfm2d_16x4_sse2, // 16x4 transform
- av1_lowbd_fwd_txfm2d_8x32_sse2, // 8x32 transform
- av1_lowbd_fwd_txfm2d_32x8_sse2, // 32x8 transform
- av1_lowbd_fwd_txfm2d_16x64_sse2, // 16x64 transform
- av1_lowbd_fwd_txfm2d_64x16_sse2, // 64x16 transform
-};
-
-void av1_lowbd_fwd_txfm_sse4_1(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TxfmParam *txfm_param) {
- FwdTxfm2dFunc fwd_txfm2d_func = fwd_txfm2d_func_ls[txfm_param->tx_size];
- if ((fwd_txfm2d_func == NULL) ||
- (txfm_param->lossless && txfm_param->tx_size == TX_4X4)) {
- av1_lowbd_fwd_txfm_c(src_diff, coeff, diff_stride, txfm_param);
- } else {
- fwd_txfm2d_func(src_diff, coeff, diff_stride, txfm_param->tx_type,
- txfm_param->bd);
- }
-}
diff --git a/third_party/aom/av1/encoder/x86/av1_fwd_txfm_avx2.h b/third_party/aom/av1/encoder/x86/av1_fwd_txfm_avx2.h
deleted file mode 100644
index 38707137c..000000000
--- a/third_party/aom/av1/encoder/x86/av1_fwd_txfm_avx2.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_X86_AV1_FWD_TXFM_AVX2_H_
-#define AOM_AV1_ENCODER_X86_AV1_FWD_TXFM_AVX2_H_
-#include <immintrin.h>
-
-static INLINE __m256i av1_round_shift_32_avx2(__m256i vec, int bit) {
- __m256i tmp, round;
- round = _mm256_set1_epi32(1 << (bit - 1));
- tmp = _mm256_add_epi32(vec, round);
- return _mm256_srai_epi32(tmp, bit);
-}
-
-// out0 = in0*w0 + in1*w1
-// out1 = -in1*w0 + in0*w1
-static INLINE void btf_32_avx2_type0(const int32_t w0, const int32_t w1,
- __m256i *in0, __m256i *in1,
- const __m256i _r, const int32_t cos_bit) {
- __m256i _in0 = *in0;
- __m256i _in1 = *in1;
- const __m256i ww0 = _mm256_set1_epi32(w0);
- const __m256i ww1 = _mm256_set1_epi32(w1);
- const __m256i in0_w0 = _mm256_mullo_epi32(_in0, ww0);
- const __m256i in1_w1 = _mm256_mullo_epi32(_in1, ww1);
- __m256i temp0 = _mm256_add_epi32(in0_w0, in1_w1);
- temp0 = _mm256_add_epi32(temp0, _r);
- *in0 = _mm256_srai_epi32(temp0, cos_bit);
- const __m256i in0_w1 = _mm256_mullo_epi32(_in0, ww1);
- const __m256i in1_w0 = _mm256_mullo_epi32(_in1, ww0);
- __m256i temp1 = _mm256_sub_epi32(in0_w1, in1_w0);
- temp1 = _mm256_add_epi32(temp1, _r);
- *in1 = _mm256_srai_epi32(temp1, cos_bit);
-}
-
-static INLINE void btf_32_avx2_type1(const int32_t w0, const int32_t w1,
- __m256i *in0, __m256i *in1,
- const __m256i _r, const int32_t cos_bit) {
- __m256i _in0 = *in0;
- __m256i _in1 = *in1;
- const __m256i ww0 = _mm256_set1_epi32(w0);
- const __m256i ww1 = _mm256_set1_epi32(w1);
- const __m256i in0_w0 = _mm256_mullo_epi32(_in0, ww0);
- const __m256i in1_w1 = _mm256_mullo_epi32(_in1, ww1);
- __m256i temp0 = _mm256_add_epi32(in0_w0, in1_w1);
- temp0 = _mm256_add_epi32(temp0, _r);
- *in0 = _mm256_srai_epi32(temp0, cos_bit);
- const __m256i in0_w1 = _mm256_mullo_epi32(_in0, ww1);
- const __m256i in1_w0 = _mm256_mullo_epi32(_in1, ww0);
- __m256i temp1 = _mm256_sub_epi32(in1_w0, in0_w1);
- temp1 = _mm256_add_epi32(temp1, _r);
- *in1 = _mm256_srai_epi32(temp1, cos_bit);
-}
-
-// out0 = in0*w0 + in1*w1
-// out1 = -in1*w0 + in0*w1
-static INLINE void btf_32_avx2_type0_new(const __m256i ww0, const __m256i ww1,
- __m256i *in0, __m256i *in1,
- const __m256i _r,
- const int32_t cos_bit) {
- __m256i _in0 = *in0;
- __m256i _in1 = *in1;
- const __m256i in0_w0 = _mm256_mullo_epi32(_in0, ww0);
- const __m256i in1_w1 = _mm256_mullo_epi32(_in1, ww1);
- __m256i temp0 = _mm256_add_epi32(in0_w0, in1_w1);
- temp0 = _mm256_add_epi32(temp0, _r);
- *in0 = _mm256_srai_epi32(temp0, cos_bit);
- const __m256i in0_w1 = _mm256_mullo_epi32(_in0, ww1);
- const __m256i in1_w0 = _mm256_mullo_epi32(_in1, ww0);
- __m256i temp1 = _mm256_sub_epi32(in0_w1, in1_w0);
- temp1 = _mm256_add_epi32(temp1, _r);
- *in1 = _mm256_srai_epi32(temp1, cos_bit);
-}
-
-// out0 = in0*w0 + in1*w1
-// out1 = in1*w0 - in0*w1
-static INLINE void btf_32_avx2_type1_new(const __m256i ww0, const __m256i ww1,
- __m256i *in0, __m256i *in1,
- const __m256i _r,
- const int32_t cos_bit) {
- __m256i _in0 = *in0;
- __m256i _in1 = *in1;
- const __m256i in0_w0 = _mm256_mullo_epi32(_in0, ww0);
- const __m256i in1_w1 = _mm256_mullo_epi32(_in1, ww1);
- __m256i temp0 = _mm256_add_epi32(in0_w0, in1_w1);
- temp0 = _mm256_add_epi32(temp0, _r);
- *in0 = _mm256_srai_epi32(temp0, cos_bit);
- const __m256i in0_w1 = _mm256_mullo_epi32(_in0, ww1);
- const __m256i in1_w0 = _mm256_mullo_epi32(_in1, ww0);
- __m256i temp1 = _mm256_sub_epi32(in1_w0, in0_w1);
- temp1 = _mm256_add_epi32(temp1, _r);
- *in1 = _mm256_srai_epi32(temp1, cos_bit);
-}
-
-#endif // AOM_AV1_ENCODER_X86_AV1_FWD_TXFM_AVX2_H_
diff --git a/third_party/aom/av1/encoder/x86/av1_fwd_txfm_sse2.c b/third_party/aom/av1/encoder/x86/av1_fwd_txfm_sse2.c
deleted file mode 100644
index 6aae7ce1e..000000000
--- a/third_party/aom/av1/encoder/x86/av1_fwd_txfm_sse2.c
+++ /dev/null
@@ -1,2889 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "av1/common/x86/av1_txfm_sse2.h"
-#include "av1/encoder/av1_fwd_txfm1d_cfg.h"
-#include "av1/encoder/x86/av1_fwd_txfm_sse2.h"
-
-// TODO(linfengz): refine fdct4x8 and fadst4x8 optimization (if possible).
-
-static void fdct4x4_new_sse2(const __m128i *input, __m128i *output,
- int8_t cos_bit) {
- const int32_t *cospi = cospi_arr(cos_bit);
- const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
- const __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]);
- const __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]);
- const __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]);
- const __m128i __rounding = _mm_set1_epi32(1 << (cos_bit - 1));
- __m128i u[4], v[4];
-
- u[0] = _mm_unpacklo_epi16(input[0], input[1]);
- u[1] = _mm_unpacklo_epi16(input[3], input[2]);
-
- v[0] = _mm_add_epi16(u[0], u[1]);
- v[1] = _mm_sub_epi16(u[0], u[1]);
-
- u[0] = _mm_madd_epi16(v[0], cospi_p32_p32); // 0
- u[1] = _mm_madd_epi16(v[0], cospi_p32_m32); // 2
- u[2] = _mm_madd_epi16(v[1], cospi_p16_p48); // 1
- u[3] = _mm_madd_epi16(v[1], cospi_p48_m16); // 3
-
- v[0] = _mm_add_epi32(u[0], __rounding);
- v[1] = _mm_add_epi32(u[1], __rounding);
- v[2] = _mm_add_epi32(u[2], __rounding);
- v[3] = _mm_add_epi32(u[3], __rounding);
- u[0] = _mm_srai_epi32(v[0], cos_bit);
- u[1] = _mm_srai_epi32(v[1], cos_bit);
- u[2] = _mm_srai_epi32(v[2], cos_bit);
- u[3] = _mm_srai_epi32(v[3], cos_bit);
-
- output[0] = _mm_packs_epi32(u[0], u[1]);
- output[1] = _mm_packs_epi32(u[2], u[3]);
- output[2] = _mm_srli_si128(output[0], 8);
- output[3] = _mm_srli_si128(output[1], 8);
-}
-
-static void fdct8x4_new_sse2(const __m128i *input, __m128i *output,
- int8_t cos_bit) {
- const int32_t *cospi = cospi_arr(cos_bit);
- const __m128i __rounding = _mm_set1_epi32(1 << (cos_bit - 1));
-
- __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
- __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]);
- __m128i cospi_p48_p16 = pair_set_epi16(cospi[48], cospi[16]);
- __m128i cospi_m16_p48 = pair_set_epi16(-cospi[16], cospi[48]);
-
- // stage 1
- __m128i x1[4];
- x1[0] = _mm_adds_epi16(input[0], input[3]);
- x1[3] = _mm_subs_epi16(input[0], input[3]);
- x1[1] = _mm_adds_epi16(input[1], input[2]);
- x1[2] = _mm_subs_epi16(input[1], input[2]);
-
- // stage 2
- __m128i x2[4];
- btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x1[0], x1[1], x2[0], x2[1]);
- btf_16_sse2(cospi_p48_p16, cospi_m16_p48, x1[2], x1[3], x2[2], x2[3]);
-
- // stage 3
- output[0] = x2[0];
- output[1] = x2[2];
- output[2] = x2[1];
- output[3] = x2[3];
-}
-
-static void fdct4x8_new_sse2(const __m128i *input, __m128i *output,
- int8_t cos_bit) {
- const int32_t *cospi = cospi_arr(cos_bit);
- const __m128i __rounding = _mm_set1_epi32(1 << (cos_bit - 1));
-
- __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]);
- __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
- __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]);
- __m128i cospi_p48_p16 = pair_set_epi16(cospi[48], cospi[16]);
- __m128i cospi_m16_p48 = pair_set_epi16(-cospi[16], cospi[48]);
- __m128i cospi_p56_p08 = pair_set_epi16(cospi[56], cospi[8]);
- __m128i cospi_m08_p56 = pair_set_epi16(-cospi[8], cospi[56]);
- __m128i cospi_p24_p40 = pair_set_epi16(cospi[24], cospi[40]);
- __m128i cospi_m40_p24 = pair_set_epi16(-cospi[40], cospi[24]);
-
- // stage 1
- __m128i x1[8];
- x1[0] = _mm_adds_epi16(input[0], input[7]);
- x1[7] = _mm_subs_epi16(input[0], input[7]);
- x1[1] = _mm_adds_epi16(input[1], input[6]);
- x1[6] = _mm_subs_epi16(input[1], input[6]);
- x1[2] = _mm_adds_epi16(input[2], input[5]);
- x1[5] = _mm_subs_epi16(input[2], input[5]);
- x1[3] = _mm_adds_epi16(input[3], input[4]);
- x1[4] = _mm_subs_epi16(input[3], input[4]);
-
- // stage 2
- __m128i x2[8];
- x2[0] = _mm_adds_epi16(x1[0], x1[3]);
- x2[3] = _mm_subs_epi16(x1[0], x1[3]);
- x2[1] = _mm_adds_epi16(x1[1], x1[2]);
- x2[2] = _mm_subs_epi16(x1[1], x1[2]);
- x2[4] = x1[4];
- btf_16_w4_sse2(&cospi_m32_p32, &cospi_p32_p32, __rounding, cos_bit, &x1[5],
- &x1[6], &x2[5], &x2[6]);
- x2[7] = x1[7];
-
- // stage 3
- __m128i x3[8];
- btf_16_w4_sse2(&cospi_p32_p32, &cospi_p32_m32, __rounding, cos_bit, &x2[0],
- &x2[1], &x3[0], &x3[1]);
- btf_16_w4_sse2(&cospi_p48_p16, &cospi_m16_p48, __rounding, cos_bit, &x2[2],
- &x2[3], &x3[2], &x3[3]);
- x3[4] = _mm_adds_epi16(x2[4], x2[5]);
- x3[5] = _mm_subs_epi16(x2[4], x2[5]);
- x3[6] = _mm_subs_epi16(x2[7], x2[6]);
- x3[7] = _mm_adds_epi16(x2[7], x2[6]);
-
- // stage 4
- __m128i x4[8];
- x4[0] = x3[0];
- x4[1] = x3[1];
- x4[2] = x3[2];
- x4[3] = x3[3];
- btf_16_w4_sse2(&cospi_p56_p08, &cospi_m08_p56, __rounding, cos_bit, &x3[4],
- &x3[7], &x4[4], &x4[7]);
- btf_16_w4_sse2(&cospi_p24_p40, &cospi_m40_p24, __rounding, cos_bit, &x3[5],
- &x3[6], &x4[5], &x4[6]);
-
- // stage 5
- output[0] = x4[0];
- output[1] = x4[4];
- output[2] = x4[2];
- output[3] = x4[6];
- output[4] = x4[1];
- output[5] = x4[5];
- output[6] = x4[3];
- output[7] = x4[7];
-}
-
-static void fdct8x8_new_sse2(const __m128i *input, __m128i *output,
- int8_t cos_bit) {
- const int32_t *cospi = cospi_arr(cos_bit);
- const __m128i __rounding = _mm_set1_epi32(1 << (cos_bit - 1));
-
- __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]);
- __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
- __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]);
- __m128i cospi_p48_p16 = pair_set_epi16(cospi[48], cospi[16]);
- __m128i cospi_m16_p48 = pair_set_epi16(-cospi[16], cospi[48]);
- __m128i cospi_p56_p08 = pair_set_epi16(cospi[56], cospi[8]);
- __m128i cospi_m08_p56 = pair_set_epi16(-cospi[8], cospi[56]);
- __m128i cospi_p24_p40 = pair_set_epi16(cospi[24], cospi[40]);
- __m128i cospi_m40_p24 = pair_set_epi16(-cospi[40], cospi[24]);
-
- // stage 1
- __m128i x1[8];
- x1[0] = _mm_adds_epi16(input[0], input[7]);
- x1[7] = _mm_subs_epi16(input[0], input[7]);
- x1[1] = _mm_adds_epi16(input[1], input[6]);
- x1[6] = _mm_subs_epi16(input[1], input[6]);
- x1[2] = _mm_adds_epi16(input[2], input[5]);
- x1[5] = _mm_subs_epi16(input[2], input[5]);
- x1[3] = _mm_adds_epi16(input[3], input[4]);
- x1[4] = _mm_subs_epi16(input[3], input[4]);
-
- // stage 2
- __m128i x2[8];
- x2[0] = _mm_adds_epi16(x1[0], x1[3]);
- x2[3] = _mm_subs_epi16(x1[0], x1[3]);
- x2[1] = _mm_adds_epi16(x1[1], x1[2]);
- x2[2] = _mm_subs_epi16(x1[1], x1[2]);
- x2[4] = x1[4];
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x1[5], x1[6], x2[5], x2[6]);
- x2[7] = x1[7];
-
- // stage 3
- __m128i x3[8];
- btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x2[0], x2[1], x3[0], x3[1]);
- btf_16_sse2(cospi_p48_p16, cospi_m16_p48, x2[2], x2[3], x3[2], x3[3]);
- x3[4] = _mm_adds_epi16(x2[4], x2[5]);
- x3[5] = _mm_subs_epi16(x2[4], x2[5]);
- x3[6] = _mm_subs_epi16(x2[7], x2[6]);
- x3[7] = _mm_adds_epi16(x2[7], x2[6]);
-
- // stage 4
- __m128i x4[8];
- x4[0] = x3[0];
- x4[1] = x3[1];
- x4[2] = x3[2];
- x4[3] = x3[3];
- btf_16_sse2(cospi_p56_p08, cospi_m08_p56, x3[4], x3[7], x4[4], x4[7]);
- btf_16_sse2(cospi_p24_p40, cospi_m40_p24, x3[5], x3[6], x4[5], x4[6]);
-
- // stage 5
- output[0] = x4[0];
- output[1] = x4[4];
- output[2] = x4[2];
- output[3] = x4[6];
- output[4] = x4[1];
- output[5] = x4[5];
- output[6] = x4[3];
- output[7] = x4[7];
-}
-
-static void fdct8x16_new_sse2(const __m128i *input, __m128i *output,
- int8_t cos_bit) {
- const int32_t *cospi = cospi_arr(cos_bit);
- const __m128i __rounding = _mm_set1_epi32(1 << (cos_bit - 1));
-
- __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]);
- __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
- __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]);
- __m128i cospi_p48_p16 = pair_set_epi16(cospi[48], cospi[16]);
- __m128i cospi_m16_p48 = pair_set_epi16(-cospi[16], cospi[48]);
- __m128i cospi_m48_m16 = pair_set_epi16(-cospi[48], -cospi[16]);
- __m128i cospi_p56_p08 = pair_set_epi16(cospi[56], cospi[8]);
- __m128i cospi_m08_p56 = pair_set_epi16(-cospi[8], cospi[56]);
- __m128i cospi_p24_p40 = pair_set_epi16(cospi[24], cospi[40]);
- __m128i cospi_m40_p24 = pair_set_epi16(-cospi[40], cospi[24]);
- __m128i cospi_p60_p04 = pair_set_epi16(cospi[60], cospi[4]);
- __m128i cospi_m04_p60 = pair_set_epi16(-cospi[4], cospi[60]);
- __m128i cospi_p28_p36 = pair_set_epi16(cospi[28], cospi[36]);
- __m128i cospi_m36_p28 = pair_set_epi16(-cospi[36], cospi[28]);
- __m128i cospi_p44_p20 = pair_set_epi16(cospi[44], cospi[20]);
- __m128i cospi_m20_p44 = pair_set_epi16(-cospi[20], cospi[44]);
- __m128i cospi_p12_p52 = pair_set_epi16(cospi[12], cospi[52]);
- __m128i cospi_m52_p12 = pair_set_epi16(-cospi[52], cospi[12]);
-
- // stage 1
- __m128i x1[16];
- x1[0] = _mm_adds_epi16(input[0], input[15]);
- x1[15] = _mm_subs_epi16(input[0], input[15]);
- x1[1] = _mm_adds_epi16(input[1], input[14]);
- x1[14] = _mm_subs_epi16(input[1], input[14]);
- x1[2] = _mm_adds_epi16(input[2], input[13]);
- x1[13] = _mm_subs_epi16(input[2], input[13]);
- x1[3] = _mm_adds_epi16(input[3], input[12]);
- x1[12] = _mm_subs_epi16(input[3], input[12]);
- x1[4] = _mm_adds_epi16(input[4], input[11]);
- x1[11] = _mm_subs_epi16(input[4], input[11]);
- x1[5] = _mm_adds_epi16(input[5], input[10]);
- x1[10] = _mm_subs_epi16(input[5], input[10]);
- x1[6] = _mm_adds_epi16(input[6], input[9]);
- x1[9] = _mm_subs_epi16(input[6], input[9]);
- x1[7] = _mm_adds_epi16(input[7], input[8]);
- x1[8] = _mm_subs_epi16(input[7], input[8]);
-
- // stage 2
- __m128i x2[16];
- x2[0] = _mm_adds_epi16(x1[0], x1[7]);
- x2[7] = _mm_subs_epi16(x1[0], x1[7]);
- x2[1] = _mm_adds_epi16(x1[1], x1[6]);
- x2[6] = _mm_subs_epi16(x1[1], x1[6]);
- x2[2] = _mm_adds_epi16(x1[2], x1[5]);
- x2[5] = _mm_subs_epi16(x1[2], x1[5]);
- x2[3] = _mm_adds_epi16(x1[3], x1[4]);
- x2[4] = _mm_subs_epi16(x1[3], x1[4]);
- x2[8] = x1[8];
- x2[9] = x1[9];
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x1[10], x1[13], x2[10], x2[13]);
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x1[11], x1[12], x2[11], x2[12]);
- x2[14] = x1[14];
- x2[15] = x1[15];
-
- // stage 3
- __m128i x3[16];
- x3[0] = _mm_adds_epi16(x2[0], x2[3]);
- x3[3] = _mm_subs_epi16(x2[0], x2[3]);
- x3[1] = _mm_adds_epi16(x2[1], x2[2]);
- x3[2] = _mm_subs_epi16(x2[1], x2[2]);
- x3[4] = x2[4];
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x2[5], x2[6], x3[5], x3[6]);
- x3[7] = x2[7];
- x3[8] = _mm_adds_epi16(x2[8], x2[11]);
- x3[11] = _mm_subs_epi16(x2[8], x2[11]);
- x3[9] = _mm_adds_epi16(x2[9], x2[10]);
- x3[10] = _mm_subs_epi16(x2[9], x2[10]);
- x3[12] = _mm_subs_epi16(x2[15], x2[12]);
- x3[15] = _mm_adds_epi16(x2[15], x2[12]);
- x3[13] = _mm_subs_epi16(x2[14], x2[13]);
- x3[14] = _mm_adds_epi16(x2[14], x2[13]);
-
- // stage 4
- __m128i x4[16];
- btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x3[0], x3[1], x4[0], x4[1]);
- btf_16_sse2(cospi_p48_p16, cospi_m16_p48, x3[2], x3[3], x4[2], x4[3]);
- x4[4] = _mm_adds_epi16(x3[4], x3[5]);
- x4[5] = _mm_subs_epi16(x3[4], x3[5]);
- x4[6] = _mm_subs_epi16(x3[7], x3[6]);
- x4[7] = _mm_adds_epi16(x3[7], x3[6]);
- x4[8] = x3[8];
- btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x3[9], x3[14], x4[9], x4[14]);
- btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x3[10], x3[13], x4[10], x4[13]);
- x4[11] = x3[11];
- x4[12] = x3[12];
- x4[15] = x3[15];
-
- // stage 5
- __m128i x5[16];
- x5[0] = x4[0];
- x5[1] = x4[1];
- x5[2] = x4[2];
- x5[3] = x4[3];
- btf_16_sse2(cospi_p56_p08, cospi_m08_p56, x4[4], x4[7], x5[4], x5[7]);
- btf_16_sse2(cospi_p24_p40, cospi_m40_p24, x4[5], x4[6], x5[5], x5[6]);
- x5[8] = _mm_adds_epi16(x4[8], x4[9]);
- x5[9] = _mm_subs_epi16(x4[8], x4[9]);
- x5[10] = _mm_subs_epi16(x4[11], x4[10]);
- x5[11] = _mm_adds_epi16(x4[11], x4[10]);
- x5[12] = _mm_adds_epi16(x4[12], x4[13]);
- x5[13] = _mm_subs_epi16(x4[12], x4[13]);
- x5[14] = _mm_subs_epi16(x4[15], x4[14]);
- x5[15] = _mm_adds_epi16(x4[15], x4[14]);
-
- // stage 6
- __m128i x6[16];
- x6[0] = x5[0];
- x6[1] = x5[1];
- x6[2] = x5[2];
- x6[3] = x5[3];
- x6[4] = x5[4];
- x6[5] = x5[5];
- x6[6] = x5[6];
- x6[7] = x5[7];
- btf_16_sse2(cospi_p60_p04, cospi_m04_p60, x5[8], x5[15], x6[8], x6[15]);
- btf_16_sse2(cospi_p28_p36, cospi_m36_p28, x5[9], x5[14], x6[9], x6[14]);
- btf_16_sse2(cospi_p44_p20, cospi_m20_p44, x5[10], x5[13], x6[10], x6[13]);
- btf_16_sse2(cospi_p12_p52, cospi_m52_p12, x5[11], x5[12], x6[11], x6[12]);
-
- // stage 7
- output[0] = x6[0];
- output[1] = x6[8];
- output[2] = x6[4];
- output[3] = x6[12];
- output[4] = x6[2];
- output[5] = x6[10];
- output[6] = x6[6];
- output[7] = x6[14];
- output[8] = x6[1];
- output[9] = x6[9];
- output[10] = x6[5];
- output[11] = x6[13];
- output[12] = x6[3];
- output[13] = x6[11];
- output[14] = x6[7];
- output[15] = x6[15];
-}
-
-void fdct8x32_new_sse2(const __m128i *input, __m128i *output, int8_t cos_bit) {
- const int32_t *cospi = cospi_arr(cos_bit);
- const __m128i __rounding = _mm_set1_epi32(1 << (cos_bit - 1));
-
- __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]);
- __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
- __m128i cospi_m16_p48 = pair_set_epi16(-cospi[16], cospi[48]);
- __m128i cospi_p48_p16 = pair_set_epi16(cospi[48], cospi[16]);
- __m128i cospi_m48_m16 = pair_set_epi16(-cospi[48], -cospi[16]);
- __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]);
- __m128i cospi_p56_p08 = pair_set_epi16(cospi[56], cospi[8]);
- __m128i cospi_m08_p56 = pair_set_epi16(-cospi[8], cospi[56]);
- __m128i cospi_p24_p40 = pair_set_epi16(cospi[24], cospi[40]);
- __m128i cospi_m40_p24 = pair_set_epi16(-cospi[40], cospi[24]);
- __m128i cospi_m56_m08 = pair_set_epi16(-cospi[56], -cospi[8]);
- __m128i cospi_m24_m40 = pair_set_epi16(-cospi[24], -cospi[40]);
- __m128i cospi_p60_p04 = pair_set_epi16(cospi[60], cospi[4]);
- __m128i cospi_m04_p60 = pair_set_epi16(-cospi[4], cospi[60]);
- __m128i cospi_p28_p36 = pair_set_epi16(cospi[28], cospi[36]);
- __m128i cospi_m36_p28 = pair_set_epi16(-cospi[36], cospi[28]);
- __m128i cospi_p44_p20 = pair_set_epi16(cospi[44], cospi[20]);
- __m128i cospi_m20_p44 = pair_set_epi16(-cospi[20], cospi[44]);
- __m128i cospi_p12_p52 = pair_set_epi16(cospi[12], cospi[52]);
- __m128i cospi_m52_p12 = pair_set_epi16(-cospi[52], cospi[12]);
- __m128i cospi_p62_p02 = pair_set_epi16(cospi[62], cospi[2]);
- __m128i cospi_m02_p62 = pair_set_epi16(-cospi[2], cospi[62]);
- __m128i cospi_p30_p34 = pair_set_epi16(cospi[30], cospi[34]);
- __m128i cospi_m34_p30 = pair_set_epi16(-cospi[34], cospi[30]);
- __m128i cospi_p46_p18 = pair_set_epi16(cospi[46], cospi[18]);
- __m128i cospi_m18_p46 = pair_set_epi16(-cospi[18], cospi[46]);
- __m128i cospi_p14_p50 = pair_set_epi16(cospi[14], cospi[50]);
- __m128i cospi_m50_p14 = pair_set_epi16(-cospi[50], cospi[14]);
- __m128i cospi_p54_p10 = pair_set_epi16(cospi[54], cospi[10]);
- __m128i cospi_m10_p54 = pair_set_epi16(-cospi[10], cospi[54]);
- __m128i cospi_p22_p42 = pair_set_epi16(cospi[22], cospi[42]);
- __m128i cospi_m42_p22 = pair_set_epi16(-cospi[42], cospi[22]);
- __m128i cospi_p38_p26 = pair_set_epi16(cospi[38], cospi[26]);
- __m128i cospi_m26_p38 = pair_set_epi16(-cospi[26], cospi[38]);
- __m128i cospi_p06_p58 = pair_set_epi16(cospi[6], cospi[58]);
- __m128i cospi_m58_p06 = pair_set_epi16(-cospi[58], cospi[6]);
-
- // stage 1
- __m128i x1[32];
- x1[0] = _mm_adds_epi16(input[0], input[31]);
- x1[31] = _mm_subs_epi16(input[0], input[31]);
- x1[1] = _mm_adds_epi16(input[1], input[30]);
- x1[30] = _mm_subs_epi16(input[1], input[30]);
- x1[2] = _mm_adds_epi16(input[2], input[29]);
- x1[29] = _mm_subs_epi16(input[2], input[29]);
- x1[3] = _mm_adds_epi16(input[3], input[28]);
- x1[28] = _mm_subs_epi16(input[3], input[28]);
- x1[4] = _mm_adds_epi16(input[4], input[27]);
- x1[27] = _mm_subs_epi16(input[4], input[27]);
- x1[5] = _mm_adds_epi16(input[5], input[26]);
- x1[26] = _mm_subs_epi16(input[5], input[26]);
- x1[6] = _mm_adds_epi16(input[6], input[25]);
- x1[25] = _mm_subs_epi16(input[6], input[25]);
- x1[7] = _mm_adds_epi16(input[7], input[24]);
- x1[24] = _mm_subs_epi16(input[7], input[24]);
- x1[8] = _mm_adds_epi16(input[8], input[23]);
- x1[23] = _mm_subs_epi16(input[8], input[23]);
- x1[9] = _mm_adds_epi16(input[9], input[22]);
- x1[22] = _mm_subs_epi16(input[9], input[22]);
- x1[10] = _mm_adds_epi16(input[10], input[21]);
- x1[21] = _mm_subs_epi16(input[10], input[21]);
- x1[11] = _mm_adds_epi16(input[11], input[20]);
- x1[20] = _mm_subs_epi16(input[11], input[20]);
- x1[12] = _mm_adds_epi16(input[12], input[19]);
- x1[19] = _mm_subs_epi16(input[12], input[19]);
- x1[13] = _mm_adds_epi16(input[13], input[18]);
- x1[18] = _mm_subs_epi16(input[13], input[18]);
- x1[14] = _mm_adds_epi16(input[14], input[17]);
- x1[17] = _mm_subs_epi16(input[14], input[17]);
- x1[15] = _mm_adds_epi16(input[15], input[16]);
- x1[16] = _mm_subs_epi16(input[15], input[16]);
-
- // stage 2
- __m128i x2[32];
- x2[0] = _mm_adds_epi16(x1[0], x1[15]);
- x2[15] = _mm_subs_epi16(x1[0], x1[15]);
- x2[1] = _mm_adds_epi16(x1[1], x1[14]);
- x2[14] = _mm_subs_epi16(x1[1], x1[14]);
- x2[2] = _mm_adds_epi16(x1[2], x1[13]);
- x2[13] = _mm_subs_epi16(x1[2], x1[13]);
- x2[3] = _mm_adds_epi16(x1[3], x1[12]);
- x2[12] = _mm_subs_epi16(x1[3], x1[12]);
- x2[4] = _mm_adds_epi16(x1[4], x1[11]);
- x2[11] = _mm_subs_epi16(x1[4], x1[11]);
- x2[5] = _mm_adds_epi16(x1[5], x1[10]);
- x2[10] = _mm_subs_epi16(x1[5], x1[10]);
- x2[6] = _mm_adds_epi16(x1[6], x1[9]);
- x2[9] = _mm_subs_epi16(x1[6], x1[9]);
- x2[7] = _mm_adds_epi16(x1[7], x1[8]);
- x2[8] = _mm_subs_epi16(x1[7], x1[8]);
- x2[16] = x1[16];
- x2[17] = x1[17];
- x2[18] = x1[18];
- x2[19] = x1[19];
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x1[20], x1[27], x2[20], x2[27]);
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x1[21], x1[26], x2[21], x2[26]);
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x1[22], x1[25], x2[22], x2[25]);
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x1[23], x1[24], x2[23], x2[24]);
- x2[28] = x1[28];
- x2[29] = x1[29];
- x2[30] = x1[30];
- x2[31] = x1[31];
-
- // stage 3
- __m128i x3[32];
- x3[0] = _mm_adds_epi16(x2[0], x2[7]);
- x3[7] = _mm_subs_epi16(x2[0], x2[7]);
- x3[1] = _mm_adds_epi16(x2[1], x2[6]);
- x3[6] = _mm_subs_epi16(x2[1], x2[6]);
- x3[2] = _mm_adds_epi16(x2[2], x2[5]);
- x3[5] = _mm_subs_epi16(x2[2], x2[5]);
- x3[3] = _mm_adds_epi16(x2[3], x2[4]);
- x3[4] = _mm_subs_epi16(x2[3], x2[4]);
- x3[8] = x2[8];
- x3[9] = x2[9];
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x2[10], x2[13], x3[10], x3[13]);
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x2[11], x2[12], x3[11], x3[12]);
- x3[14] = x2[14];
- x3[15] = x2[15];
- x3[16] = _mm_adds_epi16(x2[16], x2[23]);
- x3[23] = _mm_subs_epi16(x2[16], x2[23]);
- x3[17] = _mm_adds_epi16(x2[17], x2[22]);
- x3[22] = _mm_subs_epi16(x2[17], x2[22]);
- x3[18] = _mm_adds_epi16(x2[18], x2[21]);
- x3[21] = _mm_subs_epi16(x2[18], x2[21]);
- x3[19] = _mm_adds_epi16(x2[19], x2[20]);
- x3[20] = _mm_subs_epi16(x2[19], x2[20]);
- x3[24] = _mm_subs_epi16(x2[31], x2[24]);
- x3[31] = _mm_adds_epi16(x2[31], x2[24]);
- x3[25] = _mm_subs_epi16(x2[30], x2[25]);
- x3[30] = _mm_adds_epi16(x2[30], x2[25]);
- x3[26] = _mm_subs_epi16(x2[29], x2[26]);
- x3[29] = _mm_adds_epi16(x2[29], x2[26]);
- x3[27] = _mm_subs_epi16(x2[28], x2[27]);
- x3[28] = _mm_adds_epi16(x2[28], x2[27]);
-
- // stage 4
- __m128i x4[32];
- x4[0] = _mm_adds_epi16(x3[0], x3[3]);
- x4[3] = _mm_subs_epi16(x3[0], x3[3]);
- x4[1] = _mm_adds_epi16(x3[1], x3[2]);
- x4[2] = _mm_subs_epi16(x3[1], x3[2]);
- x4[4] = x3[4];
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x3[5], x3[6], x4[5], x4[6]);
- x4[7] = x3[7];
- x4[8] = _mm_adds_epi16(x3[8], x3[11]);
- x4[11] = _mm_subs_epi16(x3[8], x3[11]);
- x4[9] = _mm_adds_epi16(x3[9], x3[10]);
- x4[10] = _mm_subs_epi16(x3[9], x3[10]);
- x4[12] = _mm_subs_epi16(x3[15], x3[12]);
- x4[15] = _mm_adds_epi16(x3[15], x3[12]);
- x4[13] = _mm_subs_epi16(x3[14], x3[13]);
- x4[14] = _mm_adds_epi16(x3[14], x3[13]);
- x4[16] = x3[16];
- x4[17] = x3[17];
- btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x3[18], x3[29], x4[18], x4[29]);
- btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x3[19], x3[28], x4[19], x4[28]);
- btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x3[20], x3[27], x4[20], x4[27]);
- btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x3[21], x3[26], x4[21], x4[26]);
- x4[22] = x3[22];
- x4[23] = x3[23];
- x4[24] = x3[24];
- x4[25] = x3[25];
- x4[30] = x3[30];
- x4[31] = x3[31];
-
- // stage 5
- __m128i x5[32];
- btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x4[0], x4[1], x5[0], x5[1]);
- btf_16_sse2(cospi_p48_p16, cospi_m16_p48, x4[2], x4[3], x5[2], x5[3]);
- x5[4] = _mm_adds_epi16(x4[4], x4[5]);
- x5[5] = _mm_subs_epi16(x4[4], x4[5]);
- x5[6] = _mm_subs_epi16(x4[7], x4[6]);
- x5[7] = _mm_adds_epi16(x4[7], x4[6]);
- x5[8] = x4[8];
- btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x4[9], x4[14], x5[9], x5[14]);
- btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x4[10], x4[13], x5[10], x5[13]);
- x5[11] = x4[11];
- x5[12] = x4[12];
- x5[15] = x4[15];
- x5[16] = _mm_adds_epi16(x4[16], x4[19]);
- x5[19] = _mm_subs_epi16(x4[16], x4[19]);
- x5[17] = _mm_adds_epi16(x4[17], x4[18]);
- x5[18] = _mm_subs_epi16(x4[17], x4[18]);
- x5[20] = _mm_subs_epi16(x4[23], x4[20]);
- x5[23] = _mm_adds_epi16(x4[23], x4[20]);
- x5[21] = _mm_subs_epi16(x4[22], x4[21]);
- x5[22] = _mm_adds_epi16(x4[22], x4[21]);
- x5[24] = _mm_adds_epi16(x4[24], x4[27]);
- x5[27] = _mm_subs_epi16(x4[24], x4[27]);
- x5[25] = _mm_adds_epi16(x4[25], x4[26]);
- x5[26] = _mm_subs_epi16(x4[25], x4[26]);
- x5[28] = _mm_subs_epi16(x4[31], x4[28]);
- x5[31] = _mm_adds_epi16(x4[31], x4[28]);
- x5[29] = _mm_subs_epi16(x4[30], x4[29]);
- x5[30] = _mm_adds_epi16(x4[30], x4[29]);
-
- // stage 6
- __m128i x6[32];
- x6[0] = x5[0];
- x6[1] = x5[1];
- x6[2] = x5[2];
- x6[3] = x5[3];
- btf_16_sse2(cospi_p56_p08, cospi_m08_p56, x5[4], x5[7], x6[4], x6[7]);
- btf_16_sse2(cospi_p24_p40, cospi_m40_p24, x5[5], x5[6], x6[5], x6[6]);
- x6[8] = _mm_adds_epi16(x5[8], x5[9]);
- x6[9] = _mm_subs_epi16(x5[8], x5[9]);
- x6[10] = _mm_subs_epi16(x5[11], x5[10]);
- x6[11] = _mm_adds_epi16(x5[11], x5[10]);
- x6[12] = _mm_adds_epi16(x5[12], x5[13]);
- x6[13] = _mm_subs_epi16(x5[12], x5[13]);
- x6[14] = _mm_subs_epi16(x5[15], x5[14]);
- x6[15] = _mm_adds_epi16(x5[15], x5[14]);
- x6[16] = x5[16];
- btf_16_sse2(cospi_m08_p56, cospi_p56_p08, x5[17], x5[30], x6[17], x6[30]);
- btf_16_sse2(cospi_m56_m08, cospi_m08_p56, x5[18], x5[29], x6[18], x6[29]);
- x6[19] = x5[19];
- x6[20] = x5[20];
- btf_16_sse2(cospi_m40_p24, cospi_p24_p40, x5[21], x5[26], x6[21], x6[26]);
- btf_16_sse2(cospi_m24_m40, cospi_m40_p24, x5[22], x5[25], x6[22], x6[25]);
- x6[23] = x5[23];
- x6[24] = x5[24];
- x6[27] = x5[27];
- x6[28] = x5[28];
- x6[31] = x5[31];
-
- // stage 7
- __m128i x7[32];
- x7[0] = x6[0];
- x7[1] = x6[1];
- x7[2] = x6[2];
- x7[3] = x6[3];
- x7[4] = x6[4];
- x7[5] = x6[5];
- x7[6] = x6[6];
- x7[7] = x6[7];
- btf_16_sse2(cospi_p60_p04, cospi_m04_p60, x6[8], x6[15], x7[8], x7[15]);
- btf_16_sse2(cospi_p28_p36, cospi_m36_p28, x6[9], x6[14], x7[9], x7[14]);
- btf_16_sse2(cospi_p44_p20, cospi_m20_p44, x6[10], x6[13], x7[10], x7[13]);
- btf_16_sse2(cospi_p12_p52, cospi_m52_p12, x6[11], x6[12], x7[11], x7[12]);
- x7[16] = _mm_adds_epi16(x6[16], x6[17]);
- x7[17] = _mm_subs_epi16(x6[16], x6[17]);
- x7[18] = _mm_subs_epi16(x6[19], x6[18]);
- x7[19] = _mm_adds_epi16(x6[19], x6[18]);
- x7[20] = _mm_adds_epi16(x6[20], x6[21]);
- x7[21] = _mm_subs_epi16(x6[20], x6[21]);
- x7[22] = _mm_subs_epi16(x6[23], x6[22]);
- x7[23] = _mm_adds_epi16(x6[23], x6[22]);
- x7[24] = _mm_adds_epi16(x6[24], x6[25]);
- x7[25] = _mm_subs_epi16(x6[24], x6[25]);
- x7[26] = _mm_subs_epi16(x6[27], x6[26]);
- x7[27] = _mm_adds_epi16(x6[27], x6[26]);
- x7[28] = _mm_adds_epi16(x6[28], x6[29]);
- x7[29] = _mm_subs_epi16(x6[28], x6[29]);
- x7[30] = _mm_subs_epi16(x6[31], x6[30]);
- x7[31] = _mm_adds_epi16(x6[31], x6[30]);
-
- // stage 8
- __m128i x8[32];
- x8[0] = x7[0];
- x8[1] = x7[1];
- x8[2] = x7[2];
- x8[3] = x7[3];
- x8[4] = x7[4];
- x8[5] = x7[5];
- x8[6] = x7[6];
- x8[7] = x7[7];
- x8[8] = x7[8];
- x8[9] = x7[9];
- x8[10] = x7[10];
- x8[11] = x7[11];
- x8[12] = x7[12];
- x8[13] = x7[13];
- x8[14] = x7[14];
- x8[15] = x7[15];
- btf_16_sse2(cospi_p62_p02, cospi_m02_p62, x7[16], x7[31], x8[16], x8[31]);
- btf_16_sse2(cospi_p30_p34, cospi_m34_p30, x7[17], x7[30], x8[17], x8[30]);
- btf_16_sse2(cospi_p46_p18, cospi_m18_p46, x7[18], x7[29], x8[18], x8[29]);
- btf_16_sse2(cospi_p14_p50, cospi_m50_p14, x7[19], x7[28], x8[19], x8[28]);
- btf_16_sse2(cospi_p54_p10, cospi_m10_p54, x7[20], x7[27], x8[20], x8[27]);
- btf_16_sse2(cospi_p22_p42, cospi_m42_p22, x7[21], x7[26], x8[21], x8[26]);
- btf_16_sse2(cospi_p38_p26, cospi_m26_p38, x7[22], x7[25], x8[22], x8[25]);
- btf_16_sse2(cospi_p06_p58, cospi_m58_p06, x7[23], x7[24], x8[23], x8[24]);
-
- // stage 9
- output[0] = x8[0];
- output[1] = x8[16];
- output[2] = x8[8];
- output[3] = x8[24];
- output[4] = x8[4];
- output[5] = x8[20];
- output[6] = x8[12];
- output[7] = x8[28];
- output[8] = x8[2];
- output[9] = x8[18];
- output[10] = x8[10];
- output[11] = x8[26];
- output[12] = x8[6];
- output[13] = x8[22];
- output[14] = x8[14];
- output[15] = x8[30];
- output[16] = x8[1];
- output[17] = x8[17];
- output[18] = x8[9];
- output[19] = x8[25];
- output[20] = x8[5];
- output[21] = x8[21];
- output[22] = x8[13];
- output[23] = x8[29];
- output[24] = x8[3];
- output[25] = x8[19];
- output[26] = x8[11];
- output[27] = x8[27];
- output[28] = x8[7];
- output[29] = x8[23];
- output[30] = x8[15];
- output[31] = x8[31];
-}
-
-void fdct8x64_new_sse2(const __m128i *input, __m128i *output, int8_t cos_bit) {
- const int32_t *cospi = cospi_arr(cos_bit);
- const __m128i __rounding = _mm_set1_epi32(1 << (cos_bit - 1));
-
- __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]);
- __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
- __m128i cospi_m16_p48 = pair_set_epi16(-cospi[16], cospi[48]);
- __m128i cospi_p48_p16 = pair_set_epi16(cospi[48], cospi[16]);
- __m128i cospi_m48_m16 = pair_set_epi16(-cospi[48], -cospi[16]);
- __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]);
- __m128i cospi_m08_p56 = pair_set_epi16(-cospi[8], cospi[56]);
- __m128i cospi_p56_p08 = pair_set_epi16(cospi[56], cospi[8]);
- __m128i cospi_m56_m08 = pair_set_epi16(-cospi[56], -cospi[8]);
- __m128i cospi_m40_p24 = pair_set_epi16(-cospi[40], cospi[24]);
- __m128i cospi_p24_p40 = pair_set_epi16(cospi[24], cospi[40]);
- __m128i cospi_m24_m40 = pair_set_epi16(-cospi[24], -cospi[40]);
- __m128i cospi_p60_p04 = pair_set_epi16(cospi[60], cospi[4]);
- __m128i cospi_m04_p60 = pair_set_epi16(-cospi[4], cospi[60]);
- __m128i cospi_p28_p36 = pair_set_epi16(cospi[28], cospi[36]);
- __m128i cospi_m36_p28 = pair_set_epi16(-cospi[36], cospi[28]);
- __m128i cospi_p44_p20 = pair_set_epi16(cospi[44], cospi[20]);
- __m128i cospi_m20_p44 = pair_set_epi16(-cospi[20], cospi[44]);
- __m128i cospi_p12_p52 = pair_set_epi16(cospi[12], cospi[52]);
- __m128i cospi_m52_p12 = pair_set_epi16(-cospi[52], cospi[12]);
- __m128i cospi_m60_m04 = pair_set_epi16(-cospi[60], -cospi[4]);
- __m128i cospi_m28_m36 = pair_set_epi16(-cospi[28], -cospi[36]);
- __m128i cospi_m44_m20 = pair_set_epi16(-cospi[44], -cospi[20]);
- __m128i cospi_m12_m52 = pair_set_epi16(-cospi[12], -cospi[52]);
- __m128i cospi_p62_p02 = pair_set_epi16(cospi[62], cospi[2]);
- __m128i cospi_m02_p62 = pair_set_epi16(-cospi[2], cospi[62]);
- __m128i cospi_p30_p34 = pair_set_epi16(cospi[30], cospi[34]);
- __m128i cospi_m34_p30 = pair_set_epi16(-cospi[34], cospi[30]);
- __m128i cospi_p46_p18 = pair_set_epi16(cospi[46], cospi[18]);
- __m128i cospi_m18_p46 = pair_set_epi16(-cospi[18], cospi[46]);
- __m128i cospi_p14_p50 = pair_set_epi16(cospi[14], cospi[50]);
- __m128i cospi_m50_p14 = pair_set_epi16(-cospi[50], cospi[14]);
- __m128i cospi_p54_p10 = pair_set_epi16(cospi[54], cospi[10]);
- __m128i cospi_m10_p54 = pair_set_epi16(-cospi[10], cospi[54]);
- __m128i cospi_p22_p42 = pair_set_epi16(cospi[22], cospi[42]);
- __m128i cospi_m42_p22 = pair_set_epi16(-cospi[42], cospi[22]);
- __m128i cospi_p38_p26 = pair_set_epi16(cospi[38], cospi[26]);
- __m128i cospi_m26_p38 = pair_set_epi16(-cospi[26], cospi[38]);
- __m128i cospi_p06_p58 = pair_set_epi16(cospi[6], cospi[58]);
- __m128i cospi_m58_p06 = pair_set_epi16(-cospi[58], cospi[6]);
- __m128i cospi_p63_p01 = pair_set_epi16(cospi[63], cospi[1]);
- __m128i cospi_m01_p63 = pair_set_epi16(-cospi[1], cospi[63]);
- __m128i cospi_p31_p33 = pair_set_epi16(cospi[31], cospi[33]);
- __m128i cospi_m33_p31 = pair_set_epi16(-cospi[33], cospi[31]);
- __m128i cospi_p47_p17 = pair_set_epi16(cospi[47], cospi[17]);
- __m128i cospi_m17_p47 = pair_set_epi16(-cospi[17], cospi[47]);
- __m128i cospi_p15_p49 = pair_set_epi16(cospi[15], cospi[49]);
- __m128i cospi_m49_p15 = pair_set_epi16(-cospi[49], cospi[15]);
- __m128i cospi_p55_p09 = pair_set_epi16(cospi[55], cospi[9]);
- __m128i cospi_m09_p55 = pair_set_epi16(-cospi[9], cospi[55]);
- __m128i cospi_p23_p41 = pair_set_epi16(cospi[23], cospi[41]);
- __m128i cospi_m41_p23 = pair_set_epi16(-cospi[41], cospi[23]);
- __m128i cospi_p39_p25 = pair_set_epi16(cospi[39], cospi[25]);
- __m128i cospi_m25_p39 = pair_set_epi16(-cospi[25], cospi[39]);
- __m128i cospi_p07_p57 = pair_set_epi16(cospi[7], cospi[57]);
- __m128i cospi_m57_p07 = pair_set_epi16(-cospi[57], cospi[7]);
- __m128i cospi_p59_p05 = pair_set_epi16(cospi[59], cospi[5]);
- __m128i cospi_m05_p59 = pair_set_epi16(-cospi[5], cospi[59]);
- __m128i cospi_p27_p37 = pair_set_epi16(cospi[27], cospi[37]);
- __m128i cospi_m37_p27 = pair_set_epi16(-cospi[37], cospi[27]);
- __m128i cospi_p43_p21 = pair_set_epi16(cospi[43], cospi[21]);
- __m128i cospi_m21_p43 = pair_set_epi16(-cospi[21], cospi[43]);
- __m128i cospi_p11_p53 = pair_set_epi16(cospi[11], cospi[53]);
- __m128i cospi_m53_p11 = pair_set_epi16(-cospi[53], cospi[11]);
- __m128i cospi_p51_p13 = pair_set_epi16(cospi[51], cospi[13]);
- __m128i cospi_m13_p51 = pair_set_epi16(-cospi[13], cospi[51]);
- __m128i cospi_p19_p45 = pair_set_epi16(cospi[19], cospi[45]);
- __m128i cospi_m45_p19 = pair_set_epi16(-cospi[45], cospi[19]);
- __m128i cospi_p35_p29 = pair_set_epi16(cospi[35], cospi[29]);
- __m128i cospi_m29_p35 = pair_set_epi16(-cospi[29], cospi[35]);
- __m128i cospi_p03_p61 = pair_set_epi16(cospi[3], cospi[61]);
- __m128i cospi_m61_p03 = pair_set_epi16(-cospi[61], cospi[3]);
-
- // stage 1
- __m128i x1[64];
- x1[0] = _mm_adds_epi16(input[0], input[63]);
- x1[63] = _mm_subs_epi16(input[0], input[63]);
- x1[1] = _mm_adds_epi16(input[1], input[62]);
- x1[62] = _mm_subs_epi16(input[1], input[62]);
- x1[2] = _mm_adds_epi16(input[2], input[61]);
- x1[61] = _mm_subs_epi16(input[2], input[61]);
- x1[3] = _mm_adds_epi16(input[3], input[60]);
- x1[60] = _mm_subs_epi16(input[3], input[60]);
- x1[4] = _mm_adds_epi16(input[4], input[59]);
- x1[59] = _mm_subs_epi16(input[4], input[59]);
- x1[5] = _mm_adds_epi16(input[5], input[58]);
- x1[58] = _mm_subs_epi16(input[5], input[58]);
- x1[6] = _mm_adds_epi16(input[6], input[57]);
- x1[57] = _mm_subs_epi16(input[6], input[57]);
- x1[7] = _mm_adds_epi16(input[7], input[56]);
- x1[56] = _mm_subs_epi16(input[7], input[56]);
- x1[8] = _mm_adds_epi16(input[8], input[55]);
- x1[55] = _mm_subs_epi16(input[8], input[55]);
- x1[9] = _mm_adds_epi16(input[9], input[54]);
- x1[54] = _mm_subs_epi16(input[9], input[54]);
- x1[10] = _mm_adds_epi16(input[10], input[53]);
- x1[53] = _mm_subs_epi16(input[10], input[53]);
- x1[11] = _mm_adds_epi16(input[11], input[52]);
- x1[52] = _mm_subs_epi16(input[11], input[52]);
- x1[12] = _mm_adds_epi16(input[12], input[51]);
- x1[51] = _mm_subs_epi16(input[12], input[51]);
- x1[13] = _mm_adds_epi16(input[13], input[50]);
- x1[50] = _mm_subs_epi16(input[13], input[50]);
- x1[14] = _mm_adds_epi16(input[14], input[49]);
- x1[49] = _mm_subs_epi16(input[14], input[49]);
- x1[15] = _mm_adds_epi16(input[15], input[48]);
- x1[48] = _mm_subs_epi16(input[15], input[48]);
- x1[16] = _mm_adds_epi16(input[16], input[47]);
- x1[47] = _mm_subs_epi16(input[16], input[47]);
- x1[17] = _mm_adds_epi16(input[17], input[46]);
- x1[46] = _mm_subs_epi16(input[17], input[46]);
- x1[18] = _mm_adds_epi16(input[18], input[45]);
- x1[45] = _mm_subs_epi16(input[18], input[45]);
- x1[19] = _mm_adds_epi16(input[19], input[44]);
- x1[44] = _mm_subs_epi16(input[19], input[44]);
- x1[20] = _mm_adds_epi16(input[20], input[43]);
- x1[43] = _mm_subs_epi16(input[20], input[43]);
- x1[21] = _mm_adds_epi16(input[21], input[42]);
- x1[42] = _mm_subs_epi16(input[21], input[42]);
- x1[22] = _mm_adds_epi16(input[22], input[41]);
- x1[41] = _mm_subs_epi16(input[22], input[41]);
- x1[23] = _mm_adds_epi16(input[23], input[40]);
- x1[40] = _mm_subs_epi16(input[23], input[40]);
- x1[24] = _mm_adds_epi16(input[24], input[39]);
- x1[39] = _mm_subs_epi16(input[24], input[39]);
- x1[25] = _mm_adds_epi16(input[25], input[38]);
- x1[38] = _mm_subs_epi16(input[25], input[38]);
- x1[26] = _mm_adds_epi16(input[26], input[37]);
- x1[37] = _mm_subs_epi16(input[26], input[37]);
- x1[27] = _mm_adds_epi16(input[27], input[36]);
- x1[36] = _mm_subs_epi16(input[27], input[36]);
- x1[28] = _mm_adds_epi16(input[28], input[35]);
- x1[35] = _mm_subs_epi16(input[28], input[35]);
- x1[29] = _mm_adds_epi16(input[29], input[34]);
- x1[34] = _mm_subs_epi16(input[29], input[34]);
- x1[30] = _mm_adds_epi16(input[30], input[33]);
- x1[33] = _mm_subs_epi16(input[30], input[33]);
- x1[31] = _mm_adds_epi16(input[31], input[32]);
- x1[32] = _mm_subs_epi16(input[31], input[32]);
-
- // stage 2
- __m128i x2[64];
- x2[0] = _mm_adds_epi16(x1[0], x1[31]);
- x2[31] = _mm_subs_epi16(x1[0], x1[31]);
- x2[1] = _mm_adds_epi16(x1[1], x1[30]);
- x2[30] = _mm_subs_epi16(x1[1], x1[30]);
- x2[2] = _mm_adds_epi16(x1[2], x1[29]);
- x2[29] = _mm_subs_epi16(x1[2], x1[29]);
- x2[3] = _mm_adds_epi16(x1[3], x1[28]);
- x2[28] = _mm_subs_epi16(x1[3], x1[28]);
- x2[4] = _mm_adds_epi16(x1[4], x1[27]);
- x2[27] = _mm_subs_epi16(x1[4], x1[27]);
- x2[5] = _mm_adds_epi16(x1[5], x1[26]);
- x2[26] = _mm_subs_epi16(x1[5], x1[26]);
- x2[6] = _mm_adds_epi16(x1[6], x1[25]);
- x2[25] = _mm_subs_epi16(x1[6], x1[25]);
- x2[7] = _mm_adds_epi16(x1[7], x1[24]);
- x2[24] = _mm_subs_epi16(x1[7], x1[24]);
- x2[8] = _mm_adds_epi16(x1[8], x1[23]);
- x2[23] = _mm_subs_epi16(x1[8], x1[23]);
- x2[9] = _mm_adds_epi16(x1[9], x1[22]);
- x2[22] = _mm_subs_epi16(x1[9], x1[22]);
- x2[10] = _mm_adds_epi16(x1[10], x1[21]);
- x2[21] = _mm_subs_epi16(x1[10], x1[21]);
- x2[11] = _mm_adds_epi16(x1[11], x1[20]);
- x2[20] = _mm_subs_epi16(x1[11], x1[20]);
- x2[12] = _mm_adds_epi16(x1[12], x1[19]);
- x2[19] = _mm_subs_epi16(x1[12], x1[19]);
- x2[13] = _mm_adds_epi16(x1[13], x1[18]);
- x2[18] = _mm_subs_epi16(x1[13], x1[18]);
- x2[14] = _mm_adds_epi16(x1[14], x1[17]);
- x2[17] = _mm_subs_epi16(x1[14], x1[17]);
- x2[15] = _mm_adds_epi16(x1[15], x1[16]);
- x2[16] = _mm_subs_epi16(x1[15], x1[16]);
- x2[32] = x1[32];
- x2[33] = x1[33];
- x2[34] = x1[34];
- x2[35] = x1[35];
- x2[36] = x1[36];
- x2[37] = x1[37];
- x2[38] = x1[38];
- x2[39] = x1[39];
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x1[40], x1[55], x2[40], x2[55]);
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x1[41], x1[54], x2[41], x2[54]);
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x1[42], x1[53], x2[42], x2[53]);
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x1[43], x1[52], x2[43], x2[52]);
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x1[44], x1[51], x2[44], x2[51]);
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x1[45], x1[50], x2[45], x2[50]);
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x1[46], x1[49], x2[46], x2[49]);
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x1[47], x1[48], x2[47], x2[48]);
- x2[56] = x1[56];
- x2[57] = x1[57];
- x2[58] = x1[58];
- x2[59] = x1[59];
- x2[60] = x1[60];
- x2[61] = x1[61];
- x2[62] = x1[62];
- x2[63] = x1[63];
-
- // stage 3
- __m128i x3[64];
- x3[0] = _mm_adds_epi16(x2[0], x2[15]);
- x3[15] = _mm_subs_epi16(x2[0], x2[15]);
- x3[1] = _mm_adds_epi16(x2[1], x2[14]);
- x3[14] = _mm_subs_epi16(x2[1], x2[14]);
- x3[2] = _mm_adds_epi16(x2[2], x2[13]);
- x3[13] = _mm_subs_epi16(x2[2], x2[13]);
- x3[3] = _mm_adds_epi16(x2[3], x2[12]);
- x3[12] = _mm_subs_epi16(x2[3], x2[12]);
- x3[4] = _mm_adds_epi16(x2[4], x2[11]);
- x3[11] = _mm_subs_epi16(x2[4], x2[11]);
- x3[5] = _mm_adds_epi16(x2[5], x2[10]);
- x3[10] = _mm_subs_epi16(x2[5], x2[10]);
- x3[6] = _mm_adds_epi16(x2[6], x2[9]);
- x3[9] = _mm_subs_epi16(x2[6], x2[9]);
- x3[7] = _mm_adds_epi16(x2[7], x2[8]);
- x3[8] = _mm_subs_epi16(x2[7], x2[8]);
- x3[16] = x2[16];
- x3[17] = x2[17];
- x3[18] = x2[18];
- x3[19] = x2[19];
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x2[20], x2[27], x3[20], x3[27]);
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x2[21], x2[26], x3[21], x3[26]);
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x2[22], x2[25], x3[22], x3[25]);
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x2[23], x2[24], x3[23], x3[24]);
- x3[28] = x2[28];
- x3[29] = x2[29];
- x3[30] = x2[30];
- x3[31] = x2[31];
- x3[32] = _mm_adds_epi16(x2[32], x2[47]);
- x3[47] = _mm_subs_epi16(x2[32], x2[47]);
- x3[33] = _mm_adds_epi16(x2[33], x2[46]);
- x3[46] = _mm_subs_epi16(x2[33], x2[46]);
- x3[34] = _mm_adds_epi16(x2[34], x2[45]);
- x3[45] = _mm_subs_epi16(x2[34], x2[45]);
- x3[35] = _mm_adds_epi16(x2[35], x2[44]);
- x3[44] = _mm_subs_epi16(x2[35], x2[44]);
- x3[36] = _mm_adds_epi16(x2[36], x2[43]);
- x3[43] = _mm_subs_epi16(x2[36], x2[43]);
- x3[37] = _mm_adds_epi16(x2[37], x2[42]);
- x3[42] = _mm_subs_epi16(x2[37], x2[42]);
- x3[38] = _mm_adds_epi16(x2[38], x2[41]);
- x3[41] = _mm_subs_epi16(x2[38], x2[41]);
- x3[39] = _mm_adds_epi16(x2[39], x2[40]);
- x3[40] = _mm_subs_epi16(x2[39], x2[40]);
- x3[48] = _mm_subs_epi16(x2[63], x2[48]);
- x3[63] = _mm_adds_epi16(x2[63], x2[48]);
- x3[49] = _mm_subs_epi16(x2[62], x2[49]);
- x3[62] = _mm_adds_epi16(x2[62], x2[49]);
- x3[50] = _mm_subs_epi16(x2[61], x2[50]);
- x3[61] = _mm_adds_epi16(x2[61], x2[50]);
- x3[51] = _mm_subs_epi16(x2[60], x2[51]);
- x3[60] = _mm_adds_epi16(x2[60], x2[51]);
- x3[52] = _mm_subs_epi16(x2[59], x2[52]);
- x3[59] = _mm_adds_epi16(x2[59], x2[52]);
- x3[53] = _mm_subs_epi16(x2[58], x2[53]);
- x3[58] = _mm_adds_epi16(x2[58], x2[53]);
- x3[54] = _mm_subs_epi16(x2[57], x2[54]);
- x3[57] = _mm_adds_epi16(x2[57], x2[54]);
- x3[55] = _mm_subs_epi16(x2[56], x2[55]);
- x3[56] = _mm_adds_epi16(x2[56], x2[55]);
-
- // stage 4
- __m128i x4[64];
- x4[0] = _mm_adds_epi16(x3[0], x3[7]);
- x4[7] = _mm_subs_epi16(x3[0], x3[7]);
- x4[1] = _mm_adds_epi16(x3[1], x3[6]);
- x4[6] = _mm_subs_epi16(x3[1], x3[6]);
- x4[2] = _mm_adds_epi16(x3[2], x3[5]);
- x4[5] = _mm_subs_epi16(x3[2], x3[5]);
- x4[3] = _mm_adds_epi16(x3[3], x3[4]);
- x4[4] = _mm_subs_epi16(x3[3], x3[4]);
- x4[8] = x3[8];
- x4[9] = x3[9];
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x3[10], x3[13], x4[10], x4[13]);
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x3[11], x3[12], x4[11], x4[12]);
- x4[14] = x3[14];
- x4[15] = x3[15];
- x4[16] = _mm_adds_epi16(x3[16], x3[23]);
- x4[23] = _mm_subs_epi16(x3[16], x3[23]);
- x4[17] = _mm_adds_epi16(x3[17], x3[22]);
- x4[22] = _mm_subs_epi16(x3[17], x3[22]);
- x4[18] = _mm_adds_epi16(x3[18], x3[21]);
- x4[21] = _mm_subs_epi16(x3[18], x3[21]);
- x4[19] = _mm_adds_epi16(x3[19], x3[20]);
- x4[20] = _mm_subs_epi16(x3[19], x3[20]);
- x4[24] = _mm_subs_epi16(x3[31], x3[24]);
- x4[31] = _mm_adds_epi16(x3[31], x3[24]);
- x4[25] = _mm_subs_epi16(x3[30], x3[25]);
- x4[30] = _mm_adds_epi16(x3[30], x3[25]);
- x4[26] = _mm_subs_epi16(x3[29], x3[26]);
- x4[29] = _mm_adds_epi16(x3[29], x3[26]);
- x4[27] = _mm_subs_epi16(x3[28], x3[27]);
- x4[28] = _mm_adds_epi16(x3[28], x3[27]);
- x4[32] = x3[32];
- x4[33] = x3[33];
- x4[34] = x3[34];
- x4[35] = x3[35];
- btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x3[36], x3[59], x4[36], x4[59]);
- btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x3[37], x3[58], x4[37], x4[58]);
- btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x3[38], x3[57], x4[38], x4[57]);
- btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x3[39], x3[56], x4[39], x4[56]);
- btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x3[40], x3[55], x4[40], x4[55]);
- btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x3[41], x3[54], x4[41], x4[54]);
- btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x3[42], x3[53], x4[42], x4[53]);
- btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x3[43], x3[52], x4[43], x4[52]);
- x4[44] = x3[44];
- x4[45] = x3[45];
- x4[46] = x3[46];
- x4[47] = x3[47];
- x4[48] = x3[48];
- x4[49] = x3[49];
- x4[50] = x3[50];
- x4[51] = x3[51];
- x4[60] = x3[60];
- x4[61] = x3[61];
- x4[62] = x3[62];
- x4[63] = x3[63];
-
- // stage 5
- __m128i x5[64];
- x5[0] = _mm_adds_epi16(x4[0], x4[3]);
- x5[3] = _mm_subs_epi16(x4[0], x4[3]);
- x5[1] = _mm_adds_epi16(x4[1], x4[2]);
- x5[2] = _mm_subs_epi16(x4[1], x4[2]);
- x5[4] = x4[4];
- btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x4[5], x4[6], x5[5], x5[6]);
- x5[7] = x4[7];
- x5[8] = _mm_adds_epi16(x4[8], x4[11]);
- x5[11] = _mm_subs_epi16(x4[8], x4[11]);
- x5[9] = _mm_adds_epi16(x4[9], x4[10]);
- x5[10] = _mm_subs_epi16(x4[9], x4[10]);
- x5[12] = _mm_subs_epi16(x4[15], x4[12]);
- x5[15] = _mm_adds_epi16(x4[15], x4[12]);
- x5[13] = _mm_subs_epi16(x4[14], x4[13]);
- x5[14] = _mm_adds_epi16(x4[14], x4[13]);
- x5[16] = x4[16];
- x5[17] = x4[17];
- btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x4[18], x4[29], x5[18], x5[29]);
- btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x4[19], x4[28], x5[19], x5[28]);
- btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x4[20], x4[27], x5[20], x5[27]);
- btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x4[21], x4[26], x5[21], x5[26]);
- x5[22] = x4[22];
- x5[23] = x4[23];
- x5[24] = x4[24];
- x5[25] = x4[25];
- x5[30] = x4[30];
- x5[31] = x4[31];
- x5[32] = _mm_adds_epi16(x4[32], x4[39]);
- x5[39] = _mm_subs_epi16(x4[32], x4[39]);
- x5[33] = _mm_adds_epi16(x4[33], x4[38]);
- x5[38] = _mm_subs_epi16(x4[33], x4[38]);
- x5[34] = _mm_adds_epi16(x4[34], x4[37]);
- x5[37] = _mm_subs_epi16(x4[34], x4[37]);
- x5[35] = _mm_adds_epi16(x4[35], x4[36]);
- x5[36] = _mm_subs_epi16(x4[35], x4[36]);
- x5[40] = _mm_subs_epi16(x4[47], x4[40]);
- x5[47] = _mm_adds_epi16(x4[47], x4[40]);
- x5[41] = _mm_subs_epi16(x4[46], x4[41]);
- x5[46] = _mm_adds_epi16(x4[46], x4[41]);
- x5[42] = _mm_subs_epi16(x4[45], x4[42]);
- x5[45] = _mm_adds_epi16(x4[45], x4[42]);
- x5[43] = _mm_subs_epi16(x4[44], x4[43]);
- x5[44] = _mm_adds_epi16(x4[44], x4[43]);
- x5[48] = _mm_adds_epi16(x4[48], x4[55]);
- x5[55] = _mm_subs_epi16(x4[48], x4[55]);
- x5[49] = _mm_adds_epi16(x4[49], x4[54]);
- x5[54] = _mm_subs_epi16(x4[49], x4[54]);
- x5[50] = _mm_adds_epi16(x4[50], x4[53]);
- x5[53] = _mm_subs_epi16(x4[50], x4[53]);
- x5[51] = _mm_adds_epi16(x4[51], x4[52]);
- x5[52] = _mm_subs_epi16(x4[51], x4[52]);
- x5[56] = _mm_subs_epi16(x4[63], x4[56]);
- x5[63] = _mm_adds_epi16(x4[63], x4[56]);
- x5[57] = _mm_subs_epi16(x4[62], x4[57]);
- x5[62] = _mm_adds_epi16(x4[62], x4[57]);
- x5[58] = _mm_subs_epi16(x4[61], x4[58]);
- x5[61] = _mm_adds_epi16(x4[61], x4[58]);
- x5[59] = _mm_subs_epi16(x4[60], x4[59]);
- x5[60] = _mm_adds_epi16(x4[60], x4[59]);
-
- // stage 6
- __m128i x6[64];
- btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x5[0], x5[1], x6[0], x6[1]);
- btf_16_sse2(cospi_p48_p16, cospi_m16_p48, x5[2], x5[3], x6[2], x6[3]);
- x6[4] = _mm_adds_epi16(x5[4], x5[5]);
- x6[5] = _mm_subs_epi16(x5[4], x5[5]);
- x6[6] = _mm_subs_epi16(x5[7], x5[6]);
- x6[7] = _mm_adds_epi16(x5[7], x5[6]);
- x6[8] = x5[8];
- btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x5[9], x5[14], x6[9], x6[14]);
- btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x5[10], x5[13], x6[10], x6[13]);
- x6[11] = x5[11];
- x6[12] = x5[12];
- x6[15] = x5[15];
- x6[16] = _mm_adds_epi16(x5[16], x5[19]);
- x6[19] = _mm_subs_epi16(x5[16], x5[19]);
- x6[17] = _mm_adds_epi16(x5[17], x5[18]);
- x6[18] = _mm_subs_epi16(x5[17], x5[18]);
- x6[20] = _mm_subs_epi16(x5[23], x5[20]);
- x6[23] = _mm_adds_epi16(x5[23], x5[20]);
- x6[21] = _mm_subs_epi16(x5[22], x5[21]);
- x6[22] = _mm_adds_epi16(x5[22], x5[21]);
- x6[24] = _mm_adds_epi16(x5[24], x5[27]);
- x6[27] = _mm_subs_epi16(x5[24], x5[27]);
- x6[25] = _mm_adds_epi16(x5[25], x5[26]);
- x6[26] = _mm_subs_epi16(x5[25], x5[26]);
- x6[28] = _mm_subs_epi16(x5[31], x5[28]);
- x6[31] = _mm_adds_epi16(x5[31], x5[28]);
- x6[29] = _mm_subs_epi16(x5[30], x5[29]);
- x6[30] = _mm_adds_epi16(x5[30], x5[29]);
- x6[32] = x5[32];
- x6[33] = x5[33];
- btf_16_sse2(cospi_m08_p56, cospi_p56_p08, x5[34], x5[61], x6[34], x6[61]);
- btf_16_sse2(cospi_m08_p56, cospi_p56_p08, x5[35], x5[60], x6[35], x6[60]);
- btf_16_sse2(cospi_m56_m08, cospi_m08_p56, x5[36], x5[59], x6[36], x6[59]);
- btf_16_sse2(cospi_m56_m08, cospi_m08_p56, x5[37], x5[58], x6[37], x6[58]);
- x6[38] = x5[38];
- x6[39] = x5[39];
- x6[40] = x5[40];
- x6[41] = x5[41];
- btf_16_sse2(cospi_m40_p24, cospi_p24_p40, x5[42], x5[53], x6[42], x6[53]);
- btf_16_sse2(cospi_m40_p24, cospi_p24_p40, x5[43], x5[52], x6[43], x6[52]);
- btf_16_sse2(cospi_m24_m40, cospi_m40_p24, x5[44], x5[51], x6[44], x6[51]);
- btf_16_sse2(cospi_m24_m40, cospi_m40_p24, x5[45], x5[50], x6[45], x6[50]);
- x6[46] = x5[46];
- x6[47] = x5[47];
- x6[48] = x5[48];
- x6[49] = x5[49];
- x6[54] = x5[54];
- x6[55] = x5[55];
- x6[56] = x5[56];
- x6[57] = x5[57];
- x6[62] = x5[62];
- x6[63] = x5[63];
-
- // stage 7
- __m128i x7[64];
- x7[0] = x6[0];
- x7[1] = x6[1];
- x7[2] = x6[2];
- x7[3] = x6[3];
- btf_16_sse2(cospi_p56_p08, cospi_m08_p56, x6[4], x6[7], x7[4], x7[7]);
- btf_16_sse2(cospi_p24_p40, cospi_m40_p24, x6[5], x6[6], x7[5], x7[6]);
- x7[8] = _mm_adds_epi16(x6[8], x6[9]);
- x7[9] = _mm_subs_epi16(x6[8], x6[9]);
- x7[10] = _mm_subs_epi16(x6[11], x6[10]);
- x7[11] = _mm_adds_epi16(x6[11], x6[10]);
- x7[12] = _mm_adds_epi16(x6[12], x6[13]);
- x7[13] = _mm_subs_epi16(x6[12], x6[13]);
- x7[14] = _mm_subs_epi16(x6[15], x6[14]);
- x7[15] = _mm_adds_epi16(x6[15], x6[14]);
- x7[16] = x6[16];
- btf_16_sse2(cospi_m08_p56, cospi_p56_p08, x6[17], x6[30], x7[17], x7[30]);
- btf_16_sse2(cospi_m56_m08, cospi_m08_p56, x6[18], x6[29], x7[18], x7[29]);
- x7[19] = x6[19];
- x7[20] = x6[20];
- btf_16_sse2(cospi_m40_p24, cospi_p24_p40, x6[21], x6[26], x7[21], x7[26]);
- btf_16_sse2(cospi_m24_m40, cospi_m40_p24, x6[22], x6[25], x7[22], x7[25]);
- x7[23] = x6[23];
- x7[24] = x6[24];
- x7[27] = x6[27];
- x7[28] = x6[28];
- x7[31] = x6[31];
- x7[32] = _mm_adds_epi16(x6[32], x6[35]);
- x7[35] = _mm_subs_epi16(x6[32], x6[35]);
- x7[33] = _mm_adds_epi16(x6[33], x6[34]);
- x7[34] = _mm_subs_epi16(x6[33], x6[34]);
- x7[36] = _mm_subs_epi16(x6[39], x6[36]);
- x7[39] = _mm_adds_epi16(x6[39], x6[36]);
- x7[37] = _mm_subs_epi16(x6[38], x6[37]);
- x7[38] = _mm_adds_epi16(x6[38], x6[37]);
- x7[40] = _mm_adds_epi16(x6[40], x6[43]);
- x7[43] = _mm_subs_epi16(x6[40], x6[43]);
- x7[41] = _mm_adds_epi16(x6[41], x6[42]);
- x7[42] = _mm_subs_epi16(x6[41], x6[42]);
- x7[44] = _mm_subs_epi16(x6[47], x6[44]);
- x7[47] = _mm_adds_epi16(x6[47], x6[44]);
- x7[45] = _mm_subs_epi16(x6[46], x6[45]);
- x7[46] = _mm_adds_epi16(x6[46], x6[45]);
- x7[48] = _mm_adds_epi16(x6[48], x6[51]);
- x7[51] = _mm_subs_epi16(x6[48], x6[51]);
- x7[49] = _mm_adds_epi16(x6[49], x6[50]);
- x7[50] = _mm_subs_epi16(x6[49], x6[50]);
- x7[52] = _mm_subs_epi16(x6[55], x6[52]);
- x7[55] = _mm_adds_epi16(x6[55], x6[52]);
- x7[53] = _mm_subs_epi16(x6[54], x6[53]);
- x7[54] = _mm_adds_epi16(x6[54], x6[53]);
- x7[56] = _mm_adds_epi16(x6[56], x6[59]);
- x7[59] = _mm_subs_epi16(x6[56], x6[59]);
- x7[57] = _mm_adds_epi16(x6[57], x6[58]);
- x7[58] = _mm_subs_epi16(x6[57], x6[58]);
- x7[60] = _mm_subs_epi16(x6[63], x6[60]);
- x7[63] = _mm_adds_epi16(x6[63], x6[60]);
- x7[61] = _mm_subs_epi16(x6[62], x6[61]);
- x7[62] = _mm_adds_epi16(x6[62], x6[61]);
-
- // stage 8
- __m128i x8[64];
- x8[0] = x7[0];
- x8[1] = x7[1];
- x8[2] = x7[2];
- x8[3] = x7[3];
- x8[4] = x7[4];
- x8[5] = x7[5];
- x8[6] = x7[6];
- x8[7] = x7[7];
- btf_16_sse2(cospi_p60_p04, cospi_m04_p60, x7[8], x7[15], x8[8], x8[15]);
- btf_16_sse2(cospi_p28_p36, cospi_m36_p28, x7[9], x7[14], x8[9], x8[14]);
- btf_16_sse2(cospi_p44_p20, cospi_m20_p44, x7[10], x7[13], x8[10], x8[13]);
- btf_16_sse2(cospi_p12_p52, cospi_m52_p12, x7[11], x7[12], x8[11], x8[12]);
- x8[16] = _mm_adds_epi16(x7[16], x7[17]);
- x8[17] = _mm_subs_epi16(x7[16], x7[17]);
- x8[18] = _mm_subs_epi16(x7[19], x7[18]);
- x8[19] = _mm_adds_epi16(x7[19], x7[18]);
- x8[20] = _mm_adds_epi16(x7[20], x7[21]);
- x8[21] = _mm_subs_epi16(x7[20], x7[21]);
- x8[22] = _mm_subs_epi16(x7[23], x7[22]);
- x8[23] = _mm_adds_epi16(x7[23], x7[22]);
- x8[24] = _mm_adds_epi16(x7[24], x7[25]);
- x8[25] = _mm_subs_epi16(x7[24], x7[25]);
- x8[26] = _mm_subs_epi16(x7[27], x7[26]);
- x8[27] = _mm_adds_epi16(x7[27], x7[26]);
- x8[28] = _mm_adds_epi16(x7[28], x7[29]);
- x8[29] = _mm_subs_epi16(x7[28], x7[29]);
- x8[30] = _mm_subs_epi16(x7[31], x7[30]);
- x8[31] = _mm_adds_epi16(x7[31], x7[30]);
- x8[32] = x7[32];
- btf_16_sse2(cospi_m04_p60, cospi_p60_p04, x7[33], x7[62], x8[33], x8[62]);
- btf_16_sse2(cospi_m60_m04, cospi_m04_p60, x7[34], x7[61], x8[34], x8[61]);
- x8[35] = x7[35];
- x8[36] = x7[36];
- btf_16_sse2(cospi_m36_p28, cospi_p28_p36, x7[37], x7[58], x8[37], x8[58]);
- btf_16_sse2(cospi_m28_m36, cospi_m36_p28, x7[38], x7[57], x8[38], x8[57]);
- x8[39] = x7[39];
- x8[40] = x7[40];
- btf_16_sse2(cospi_m20_p44, cospi_p44_p20, x7[41], x7[54], x8[41], x8[54]);
- btf_16_sse2(cospi_m44_m20, cospi_m20_p44, x7[42], x7[53], x8[42], x8[53]);
- x8[43] = x7[43];
- x8[44] = x7[44];
- btf_16_sse2(cospi_m52_p12, cospi_p12_p52, x7[45], x7[50], x8[45], x8[50]);
- btf_16_sse2(cospi_m12_m52, cospi_m52_p12, x7[46], x7[49], x8[46], x8[49]);
- x8[47] = x7[47];
- x8[48] = x7[48];
- x8[51] = x7[51];
- x8[52] = x7[52];
- x8[55] = x7[55];
- x8[56] = x7[56];
- x8[59] = x7[59];
- x8[60] = x7[60];
- x8[63] = x7[63];
-
- // stage 9
- __m128i x9[64];
- x9[0] = x8[0];
- x9[1] = x8[1];
- x9[2] = x8[2];
- x9[3] = x8[3];
- x9[4] = x8[4];
- x9[5] = x8[5];
- x9[6] = x8[6];
- x9[7] = x8[7];
- x9[8] = x8[8];
- x9[9] = x8[9];
- x9[10] = x8[10];
- x9[11] = x8[11];
- x9[12] = x8[12];
- x9[13] = x8[13];
- x9[14] = x8[14];
- x9[15] = x8[15];
- btf_16_sse2(cospi_p62_p02, cospi_m02_p62, x8[16], x8[31], x9[16], x9[31]);
- btf_16_sse2(cospi_p30_p34, cospi_m34_p30, x8[17], x8[30], x9[17], x9[30]);
- btf_16_sse2(cospi_p46_p18, cospi_m18_p46, x8[18], x8[29], x9[18], x9[29]);
- btf_16_sse2(cospi_p14_p50, cospi_m50_p14, x8[19], x8[28], x9[19], x9[28]);
- btf_16_sse2(cospi_p54_p10, cospi_m10_p54, x8[20], x8[27], x9[20], x9[27]);
- btf_16_sse2(cospi_p22_p42, cospi_m42_p22, x8[21], x8[26], x9[21], x9[26]);
- btf_16_sse2(cospi_p38_p26, cospi_m26_p38, x8[22], x8[25], x9[22], x9[25]);
- btf_16_sse2(cospi_p06_p58, cospi_m58_p06, x8[23], x8[24], x9[23], x9[24]);
- x9[32] = _mm_adds_epi16(x8[32], x8[33]);
- x9[33] = _mm_subs_epi16(x8[32], x8[33]);
- x9[34] = _mm_subs_epi16(x8[35], x8[34]);
- x9[35] = _mm_adds_epi16(x8[35], x8[34]);
- x9[36] = _mm_adds_epi16(x8[36], x8[37]);
- x9[37] = _mm_subs_epi16(x8[36], x8[37]);
- x9[38] = _mm_subs_epi16(x8[39], x8[38]);
- x9[39] = _mm_adds_epi16(x8[39], x8[38]);
- x9[40] = _mm_adds_epi16(x8[40], x8[41]);
- x9[41] = _mm_subs_epi16(x8[40], x8[41]);
- x9[42] = _mm_subs_epi16(x8[43], x8[42]);
- x9[43] = _mm_adds_epi16(x8[43], x8[42]);
- x9[44] = _mm_adds_epi16(x8[44], x8[45]);
- x9[45] = _mm_subs_epi16(x8[44], x8[45]);
- x9[46] = _mm_subs_epi16(x8[47], x8[46]);
- x9[47] = _mm_adds_epi16(x8[47], x8[46]);
- x9[48] = _mm_adds_epi16(x8[48], x8[49]);
- x9[49] = _mm_subs_epi16(x8[48], x8[49]);
- x9[50] = _mm_subs_epi16(x8[51], x8[50]);
- x9[51] = _mm_adds_epi16(x8[51], x8[50]);
- x9[52] = _mm_adds_epi16(x8[52], x8[53]);
- x9[53] = _mm_subs_epi16(x8[52], x8[53]);
- x9[54] = _mm_subs_epi16(x8[55], x8[54]);
- x9[55] = _mm_adds_epi16(x8[55], x8[54]);
- x9[56] = _mm_adds_epi16(x8[56], x8[57]);
- x9[57] = _mm_subs_epi16(x8[56], x8[57]);
- x9[58] = _mm_subs_epi16(x8[59], x8[58]);
- x9[59] = _mm_adds_epi16(x8[59], x8[58]);
- x9[60] = _mm_adds_epi16(x8[60], x8[61]);
- x9[61] = _mm_subs_epi16(x8[60], x8[61]);
- x9[62] = _mm_subs_epi16(x8[63], x8[62]);
- x9[63] = _mm_adds_epi16(x8[63], x8[62]);
-
- // stage 10
- __m128i x10[64];
- x10[0] = x9[0];
- x10[1] = x9[1];
- x10[2] = x9[2];
- x10[3] = x9[3];
- x10[4] = x9[4];
- x10[5] = x9[5];
- x10[6] = x9[6];
- x10[7] = x9[7];
- x10[8] = x9[8];
- x10[9] = x9[9];
- x10[10] = x9[10];
- x10[11] = x9[11];
- x10[12] = x9[12];
- x10[13] = x9[13];
- x10[14] = x9[14];
- x10[15] = x9[15];
- x10[16] = x9[16];
- x10[17] = x9[17];
- x10[18] = x9[18];
- x10[19] = x9[19];
- x10[20] = x9[20];
- x10[21] = x9[21];
- x10[22] = x9[22];
- x10[23] = x9[23];
- x10[24] = x9[24];
- x10[25] = x9[25];
- x10[26] = x9[26];
- x10[27] = x9[27];
- x10[28] = x9[28];
- x10[29] = x9[29];
- x10[30] = x9[30];
- x10[31] = x9[31];
- btf_16_sse2(cospi_p63_p01, cospi_m01_p63, x9[32], x9[63], x10[32], x10[63]);
- btf_16_sse2(cospi_p31_p33, cospi_m33_p31, x9[33], x9[62], x10[33], x10[62]);
- btf_16_sse2(cospi_p47_p17, cospi_m17_p47, x9[34], x9[61], x10[34], x10[61]);
- btf_16_sse2(cospi_p15_p49, cospi_m49_p15, x9[35], x9[60], x10[35], x10[60]);
- btf_16_sse2(cospi_p55_p09, cospi_m09_p55, x9[36], x9[59], x10[36], x10[59]);
- btf_16_sse2(cospi_p23_p41, cospi_m41_p23, x9[37], x9[58], x10[37], x10[58]);
- btf_16_sse2(cospi_p39_p25, cospi_m25_p39, x9[38], x9[57], x10[38], x10[57]);
- btf_16_sse2(cospi_p07_p57, cospi_m57_p07, x9[39], x9[56], x10[39], x10[56]);
- btf_16_sse2(cospi_p59_p05, cospi_m05_p59, x9[40], x9[55], x10[40], x10[55]);
- btf_16_sse2(cospi_p27_p37, cospi_m37_p27, x9[41], x9[54], x10[41], x10[54]);
- btf_16_sse2(cospi_p43_p21, cospi_m21_p43, x9[42], x9[53], x10[42], x10[53]);
- btf_16_sse2(cospi_p11_p53, cospi_m53_p11, x9[43], x9[52], x10[43], x10[52]);
- btf_16_sse2(cospi_p51_p13, cospi_m13_p51, x9[44], x9[51], x10[44], x10[51]);
- btf_16_sse2(cospi_p19_p45, cospi_m45_p19, x9[45], x9[50], x10[45], x10[50]);
- btf_16_sse2(cospi_p35_p29, cospi_m29_p35, x9[46], x9[49], x10[46], x10[49]);
- btf_16_sse2(cospi_p03_p61, cospi_m61_p03, x9[47], x9[48], x10[47], x10[48]);
-
- // stage 11
- output[0] = x10[0];
- output[1] = x10[32];
- output[2] = x10[16];
- output[3] = x10[48];
- output[4] = x10[8];
- output[5] = x10[40];
- output[6] = x10[24];
- output[7] = x10[56];
- output[8] = x10[4];
- output[9] = x10[36];
- output[10] = x10[20];
- output[11] = x10[52];
- output[12] = x10[12];
- output[13] = x10[44];
- output[14] = x10[28];
- output[15] = x10[60];
- output[16] = x10[2];
- output[17] = x10[34];
- output[18] = x10[18];
- output[19] = x10[50];
- output[20] = x10[10];
- output[21] = x10[42];
- output[22] = x10[26];
- output[23] = x10[58];
- output[24] = x10[6];
- output[25] = x10[38];
- output[26] = x10[22];
- output[27] = x10[54];
- output[28] = x10[14];
- output[29] = x10[46];
- output[30] = x10[30];
- output[31] = x10[62];
- output[32] = x10[1];
- output[33] = x10[33];
- output[34] = x10[17];
- output[35] = x10[49];
- output[36] = x10[9];
- output[37] = x10[41];
- output[38] = x10[25];
- output[39] = x10[57];
- output[40] = x10[5];
- output[41] = x10[37];
- output[42] = x10[21];
- output[43] = x10[53];
- output[44] = x10[13];
- output[45] = x10[45];
- output[46] = x10[29];
- output[47] = x10[61];
- output[48] = x10[3];
- output[49] = x10[35];
- output[50] = x10[19];
- output[51] = x10[51];
- output[52] = x10[11];
- output[53] = x10[43];
- output[54] = x10[27];
- output[55] = x10[59];
- output[56] = x10[7];
- output[57] = x10[39];
- output[58] = x10[23];
- output[59] = x10[55];
- output[60] = x10[15];
- output[61] = x10[47];
- output[62] = x10[31];
- output[63] = x10[63];
-}
-
-static void fadst4x4_new_sse2(const __m128i *input, __m128i *output,
- int8_t cos_bit) {
- const int32_t *sinpi = sinpi_arr(cos_bit);
- const __m128i sinpi_p01_p02 = pair_set_epi16(sinpi[1], sinpi[2]);
- const __m128i sinpi_p04_m01 = pair_set_epi16(sinpi[4], -sinpi[1]);
- const __m128i sinpi_p03_p04 = pair_set_epi16(sinpi[3], sinpi[4]);
- const __m128i sinpi_m03_p02 = pair_set_epi16(-sinpi[3], sinpi[2]);
- const __m128i sinpi_p03_p03 = _mm_set1_epi16((int16_t)sinpi[3]);
- const __m128i __zero = _mm_set1_epi16(0);
- const __m128i __rounding = _mm_set1_epi32(1 << (cos_bit - 1));
- const __m128i in7 = _mm_add_epi16(input[0], input[1]);
- __m128i u[8], v[8];
-
- u[0] = _mm_unpacklo_epi16(input[0], input[1]);
- u[1] = _mm_unpacklo_epi16(input[2], input[3]);
- u[2] = _mm_unpacklo_epi16(in7, __zero);
- u[3] = _mm_unpacklo_epi16(input[2], __zero);
- u[4] = _mm_unpacklo_epi16(input[3], __zero);
-
- v[0] = _mm_madd_epi16(u[0], sinpi_p01_p02); // s0 + s2
- v[1] = _mm_madd_epi16(u[1], sinpi_p03_p04); // s4 + s5
- v[2] = _mm_madd_epi16(u[2], sinpi_p03_p03); // x1
- v[3] = _mm_madd_epi16(u[0], sinpi_p04_m01); // s1 - s3
- v[4] = _mm_madd_epi16(u[1], sinpi_m03_p02); // -s4 + s6
- v[5] = _mm_madd_epi16(u[3], sinpi_p03_p03); // s4
- v[6] = _mm_madd_epi16(u[4], sinpi_p03_p03);
-
- u[0] = _mm_add_epi32(v[0], v[1]);
- u[1] = _mm_sub_epi32(v[2], v[6]);
- u[2] = _mm_add_epi32(v[3], v[4]);
- u[3] = _mm_sub_epi32(u[2], u[0]);
- u[4] = _mm_slli_epi32(v[5], 2);
- u[5] = _mm_sub_epi32(u[4], v[5]);
- u[6] = _mm_add_epi32(u[3], u[5]);
-
- v[0] = _mm_add_epi32(u[0], __rounding);
- v[1] = _mm_add_epi32(u[1], __rounding);
- v[2] = _mm_add_epi32(u[2], __rounding);
- v[3] = _mm_add_epi32(u[6], __rounding);
-
- u[0] = _mm_srai_epi32(v[0], cos_bit);
- u[1] = _mm_srai_epi32(v[1], cos_bit);
- u[2] = _mm_srai_epi32(v[2], cos_bit);
- u[3] = _mm_srai_epi32(v[3], cos_bit);
-
- output[0] = _mm_packs_epi32(u[0], u[2]);
- output[1] = _mm_packs_epi32(u[1], u[3]);
- output[2] = _mm_srli_si128(output[0], 8);
- output[3] = _mm_srli_si128(output[1], 8);
-}
-
-static void fadst4x8_new_sse2(const __m128i *input, __m128i *output,
- int8_t cos_bit) {
- const int32_t *cospi = cospi_arr(cos_bit);
- const __m128i __zero = _mm_setzero_si128();
- const __m128i __rounding = _mm_set1_epi32(1 << (cos_bit - 1));
-
- __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
- __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]);
- __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]);
- __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]);
- __m128i cospi_m48_p16 = pair_set_epi16(-cospi[48], cospi[16]);
- __m128i cospi_p04_p60 = pair_set_epi16(cospi[4], cospi[60]);
- __m128i cospi_p60_m04 = pair_set_epi16(cospi[60], -cospi[4]);
- __m128i cospi_p20_p44 = pair_set_epi16(cospi[20], cospi[44]);
- __m128i cospi_p44_m20 = pair_set_epi16(cospi[44], -cospi[20]);
- __m128i cospi_p36_p28 = pair_set_epi16(cospi[36], cospi[28]);
- __m128i cospi_p28_m36 = pair_set_epi16(cospi[28], -cospi[36]);
- __m128i cospi_p52_p12 = pair_set_epi16(cospi[52], cospi[12]);
- __m128i cospi_p12_m52 = pair_set_epi16(cospi[12], -cospi[52]);
-
- // stage 1
- __m128i x1[8];
- x1[0] = input[0];
- x1[1] = _mm_subs_epi16(__zero, input[7]);
- x1[2] = _mm_subs_epi16(__zero, input[3]);
- x1[3] = input[4];
- x1[4] = _mm_subs_epi16(__zero, input[1]);
- x1[5] = input[6];
- x1[6] = input[2];
- x1[7] = _mm_subs_epi16(__zero, input[5]);
-
- // stage 2
- __m128i x2[8];
- x2[0] = x1[0];
- x2[1] = x1[1];
- btf_16_w4_sse2(&cospi_p32_p32, &cospi_p32_m32, __rounding, cos_bit, &x1[2],
- &x1[3], &x2[2], &x2[3]);
- x2[4] = x1[4];
- x2[5] = x1[5];
- btf_16_w4_sse2(&cospi_p32_p32, &cospi_p32_m32, __rounding, cos_bit, &x1[6],
- &x1[7], &x2[6], &x2[7]);
-
- // stage 3
- __m128i x3[8];
- x3[0] = _mm_adds_epi16(x2[0], x2[2]);
- x3[2] = _mm_subs_epi16(x2[0], x2[2]);
- x3[1] = _mm_adds_epi16(x2[1], x2[3]);
- x3[3] = _mm_subs_epi16(x2[1], x2[3]);
- x3[4] = _mm_adds_epi16(x2[4], x2[6]);
- x3[6] = _mm_subs_epi16(x2[4], x2[6]);
- x3[5] = _mm_adds_epi16(x2[5], x2[7]);
- x3[7] = _mm_subs_epi16(x2[5], x2[7]);
-
- // stage 4
- __m128i x4[8];
- x4[0] = x3[0];
- x4[1] = x3[1];
- x4[2] = x3[2];
- x4[3] = x3[3];
- btf_16_w4_sse2(&cospi_p16_p48, &cospi_p48_m16, __rounding, cos_bit, &x3[4],
- &x3[5], &x4[4], &x4[5]);
- btf_16_w4_sse2(&cospi_m48_p16, &cospi_p16_p48, __rounding, cos_bit, &x3[6],
- &x3[7], &x4[6], &x4[7]);
-
- // stage 5
- __m128i x5[8];
- x5[0] = _mm_adds_epi16(x4[0], x4[4]);
- x5[4] = _mm_subs_epi16(x4[0], x4[4]);
- x5[1] = _mm_adds_epi16(x4[1], x4[5]);
- x5[5] = _mm_subs_epi16(x4[1], x4[5]);
- x5[2] = _mm_adds_epi16(x4[2], x4[6]);
- x5[6] = _mm_subs_epi16(x4[2], x4[6]);
- x5[3] = _mm_adds_epi16(x4[3], x4[7]);
- x5[7] = _mm_subs_epi16(x4[3], x4[7]);
-
- // stage 6
- __m128i x6[8];
- btf_16_w4_sse2(&cospi_p04_p60, &cospi_p60_m04, __rounding, cos_bit, &x5[0],
- &x5[1], &x6[0], &x6[1]);
- btf_16_w4_sse2(&cospi_p20_p44, &cospi_p44_m20, __rounding, cos_bit, &x5[2],
- &x5[3], &x6[2], &x6[3]);
- btf_16_w4_sse2(&cospi_p36_p28, &cospi_p28_m36, __rounding, cos_bit, &x5[4],
- &x5[5], &x6[4], &x6[5]);
- btf_16_w4_sse2(&cospi_p52_p12, &cospi_p12_m52, __rounding, cos_bit, &x5[6],
- &x5[7], &x6[6], &x6[7]);
-
- // stage 7
- output[0] = x6[1];
- output[1] = x6[6];
- output[2] = x6[3];
- output[3] = x6[4];
- output[4] = x6[5];
- output[5] = x6[2];
- output[6] = x6[7];
- output[7] = x6[0];
-}
-
-static void fadst8x4_new_sse2(const __m128i *input, __m128i *output,
- int8_t cos_bit) {
- const int32_t *sinpi = sinpi_arr(cos_bit);
- const __m128i sinpi_p01_p02 = pair_set_epi16(sinpi[1], sinpi[2]);
- const __m128i sinpi_p04_m01 = pair_set_epi16(sinpi[4], -sinpi[1]);
- const __m128i sinpi_p03_p04 = pair_set_epi16(sinpi[3], sinpi[4]);
- const __m128i sinpi_m03_p02 = pair_set_epi16(-sinpi[3], sinpi[2]);
- const __m128i sinpi_p03_p03 = _mm_set1_epi16((int16_t)sinpi[3]);
- const __m128i __zero = _mm_set1_epi16(0);
- const __m128i __rounding = _mm_set1_epi32(1 << (cos_bit - 1));
- const __m128i in7 = _mm_add_epi16(input[0], input[1]);
- __m128i u_lo[8], u_hi[8], v_lo[8], v_hi[8];
-
- u_lo[0] = _mm_unpacklo_epi16(input[0], input[1]);
- u_hi[0] = _mm_unpackhi_epi16(input[0], input[1]);
- u_lo[1] = _mm_unpacklo_epi16(input[2], input[3]);
- u_hi[1] = _mm_unpackhi_epi16(input[2], input[3]);
- u_lo[2] = _mm_unpacklo_epi16(in7, __zero);
- u_hi[2] = _mm_unpackhi_epi16(in7, __zero);
- u_lo[3] = _mm_unpacklo_epi16(input[2], __zero);
- u_hi[3] = _mm_unpackhi_epi16(input[2], __zero);
- u_lo[4] = _mm_unpacklo_epi16(input[3], __zero);
- u_hi[4] = _mm_unpackhi_epi16(input[3], __zero);
-
- v_lo[0] = _mm_madd_epi16(u_lo[0], sinpi_p01_p02); // s0 + s2
- v_hi[0] = _mm_madd_epi16(u_hi[0], sinpi_p01_p02); // s0 + s2
- v_lo[1] = _mm_madd_epi16(u_lo[1], sinpi_p03_p04); // s4 + s5
- v_hi[1] = _mm_madd_epi16(u_hi[1], sinpi_p03_p04); // s4 + s5
- v_lo[2] = _mm_madd_epi16(u_lo[2], sinpi_p03_p03); // x1
- v_hi[2] = _mm_madd_epi16(u_hi[2], sinpi_p03_p03); // x1
- v_lo[3] = _mm_madd_epi16(u_lo[0], sinpi_p04_m01); // s1 - s3
- v_hi[3] = _mm_madd_epi16(u_hi[0], sinpi_p04_m01); // s1 - s3
- v_lo[4] = _mm_madd_epi16(u_lo[1], sinpi_m03_p02); // -s4 + s6
- v_hi[4] = _mm_madd_epi16(u_hi[1], sinpi_m03_p02); // -s4 + s6
- v_lo[5] = _mm_madd_epi16(u_lo[3], sinpi_p03_p03); // s4
- v_hi[5] = _mm_madd_epi16(u_hi[3], sinpi_p03_p03); // s4
- v_lo[6] = _mm_madd_epi16(u_lo[4], sinpi_p03_p03);
- v_hi[6] = _mm_madd_epi16(u_hi[4], sinpi_p03_p03);
-
- u_lo[0] = _mm_add_epi32(v_lo[0], v_lo[1]);
- u_hi[0] = _mm_add_epi32(v_hi[0], v_hi[1]);
- u_lo[1] = _mm_sub_epi32(v_lo[2], v_lo[6]);
- u_hi[1] = _mm_sub_epi32(v_hi[2], v_hi[6]);
- u_lo[2] = _mm_add_epi32(v_lo[3], v_lo[4]);
- u_hi[2] = _mm_add_epi32(v_hi[3], v_hi[4]);
- u_lo[3] = _mm_sub_epi32(u_lo[2], u_lo[0]);
- u_hi[3] = _mm_sub_epi32(u_hi[2], u_hi[0]);
- u_lo[4] = _mm_slli_epi32(v_lo[5], 2);
- u_hi[4] = _mm_slli_epi32(v_hi[5], 2);
- u_lo[5] = _mm_sub_epi32(u_lo[4], v_lo[5]);
- u_hi[5] = _mm_sub_epi32(u_hi[4], v_hi[5]);
- u_lo[6] = _mm_add_epi32(u_lo[3], u_lo[5]);
- u_hi[6] = _mm_add_epi32(u_hi[3], u_hi[5]);
-
- v_lo[0] = _mm_add_epi32(u_lo[0], __rounding);
- v_hi[0] = _mm_add_epi32(u_hi[0], __rounding);
- v_lo[1] = _mm_add_epi32(u_lo[1], __rounding);
- v_hi[1] = _mm_add_epi32(u_hi[1], __rounding);
- v_lo[2] = _mm_add_epi32(u_lo[2], __rounding);
- v_hi[2] = _mm_add_epi32(u_hi[2], __rounding);
- v_lo[3] = _mm_add_epi32(u_lo[6], __rounding);
- v_hi[3] = _mm_add_epi32(u_hi[6], __rounding);
-
- u_lo[0] = _mm_srai_epi32(v_lo[0], cos_bit);
- u_hi[0] = _mm_srai_epi32(v_hi[0], cos_bit);
- u_lo[1] = _mm_srai_epi32(v_lo[1], cos_bit);
- u_hi[1] = _mm_srai_epi32(v_hi[1], cos_bit);
- u_lo[2] = _mm_srai_epi32(v_lo[2], cos_bit);
- u_hi[2] = _mm_srai_epi32(v_hi[2], cos_bit);
- u_lo[3] = _mm_srai_epi32(v_lo[3], cos_bit);
- u_hi[3] = _mm_srai_epi32(v_hi[3], cos_bit);
-
- output[0] = _mm_packs_epi32(u_lo[0], u_hi[0]);
- output[1] = _mm_packs_epi32(u_lo[1], u_hi[1]);
- output[2] = _mm_packs_epi32(u_lo[2], u_hi[2]);
- output[3] = _mm_packs_epi32(u_lo[3], u_hi[3]);
-}
-
-static void fadst8x8_new_sse2(const __m128i *input, __m128i *output,
- int8_t cos_bit) {
- const int32_t *cospi = cospi_arr(cos_bit);
- const __m128i __zero = _mm_setzero_si128();
- const __m128i __rounding = _mm_set1_epi32(1 << (cos_bit - 1));
-
- __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
- __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]);
- __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]);
- __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]);
- __m128i cospi_m48_p16 = pair_set_epi16(-cospi[48], cospi[16]);
- __m128i cospi_p04_p60 = pair_set_epi16(cospi[4], cospi[60]);
- __m128i cospi_p60_m04 = pair_set_epi16(cospi[60], -cospi[4]);
- __m128i cospi_p20_p44 = pair_set_epi16(cospi[20], cospi[44]);
- __m128i cospi_p44_m20 = pair_set_epi16(cospi[44], -cospi[20]);
- __m128i cospi_p36_p28 = pair_set_epi16(cospi[36], cospi[28]);
- __m128i cospi_p28_m36 = pair_set_epi16(cospi[28], -cospi[36]);
- __m128i cospi_p52_p12 = pair_set_epi16(cospi[52], cospi[12]);
- __m128i cospi_p12_m52 = pair_set_epi16(cospi[12], -cospi[52]);
-
- // stage 1
- __m128i x1[8];
- x1[0] = input[0];
- x1[1] = _mm_subs_epi16(__zero, input[7]);
- x1[2] = _mm_subs_epi16(__zero, input[3]);
- x1[3] = input[4];
- x1[4] = _mm_subs_epi16(__zero, input[1]);
- x1[5] = input[6];
- x1[6] = input[2];
- x1[7] = _mm_subs_epi16(__zero, input[5]);
-
- // stage 2
- __m128i x2[8];
- x2[0] = x1[0];
- x2[1] = x1[1];
- btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x1[2], x1[3], x2[2], x2[3]);
- x2[4] = x1[4];
- x2[5] = x1[5];
- btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x1[6], x1[7], x2[6], x2[7]);
-
- // stage 3
- __m128i x3[8];
- x3[0] = _mm_adds_epi16(x2[0], x2[2]);
- x3[2] = _mm_subs_epi16(x2[0], x2[2]);
- x3[1] = _mm_adds_epi16(x2[1], x2[3]);
- x3[3] = _mm_subs_epi16(x2[1], x2[3]);
- x3[4] = _mm_adds_epi16(x2[4], x2[6]);
- x3[6] = _mm_subs_epi16(x2[4], x2[6]);
- x3[5] = _mm_adds_epi16(x2[5], x2[7]);
- x3[7] = _mm_subs_epi16(x2[5], x2[7]);
-
- // stage 4
- __m128i x4[8];
- x4[0] = x3[0];
- x4[1] = x3[1];
- x4[2] = x3[2];
- x4[3] = x3[3];
- btf_16_sse2(cospi_p16_p48, cospi_p48_m16, x3[4], x3[5], x4[4], x4[5]);
- btf_16_sse2(cospi_m48_p16, cospi_p16_p48, x3[6], x3[7], x4[6], x4[7]);
-
- // stage 5
- __m128i x5[8];
- x5[0] = _mm_adds_epi16(x4[0], x4[4]);
- x5[4] = _mm_subs_epi16(x4[0], x4[4]);
- x5[1] = _mm_adds_epi16(x4[1], x4[5]);
- x5[5] = _mm_subs_epi16(x4[1], x4[5]);
- x5[2] = _mm_adds_epi16(x4[2], x4[6]);
- x5[6] = _mm_subs_epi16(x4[2], x4[6]);
- x5[3] = _mm_adds_epi16(x4[3], x4[7]);
- x5[7] = _mm_subs_epi16(x4[3], x4[7]);
-
- // stage 6
- __m128i x6[8];
- btf_16_sse2(cospi_p04_p60, cospi_p60_m04, x5[0], x5[1], x6[0], x6[1]);
- btf_16_sse2(cospi_p20_p44, cospi_p44_m20, x5[2], x5[3], x6[2], x6[3]);
- btf_16_sse2(cospi_p36_p28, cospi_p28_m36, x5[4], x5[5], x6[4], x6[5]);
- btf_16_sse2(cospi_p52_p12, cospi_p12_m52, x5[6], x5[7], x6[6], x6[7]);
-
- // stage 7
- output[0] = x6[1];
- output[1] = x6[6];
- output[2] = x6[3];
- output[3] = x6[4];
- output[4] = x6[5];
- output[5] = x6[2];
- output[6] = x6[7];
- output[7] = x6[0];
-}
-
-static void fadst8x16_new_sse2(const __m128i *input, __m128i *output,
- int8_t cos_bit) {
- const int32_t *cospi = cospi_arr(cos_bit);
- const __m128i __zero = _mm_setzero_si128();
- const __m128i __rounding = _mm_set1_epi32(1 << (cos_bit - 1));
-
- __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
- __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]);
- __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]);
- __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]);
- __m128i cospi_m48_p16 = pair_set_epi16(-cospi[48], cospi[16]);
- __m128i cospi_p08_p56 = pair_set_epi16(cospi[8], cospi[56]);
- __m128i cospi_p56_m08 = pair_set_epi16(cospi[56], -cospi[8]);
- __m128i cospi_p40_p24 = pair_set_epi16(cospi[40], cospi[24]);
- __m128i cospi_p24_m40 = pair_set_epi16(cospi[24], -cospi[40]);
- __m128i cospi_m56_p08 = pair_set_epi16(-cospi[56], cospi[8]);
- __m128i cospi_m24_p40 = pair_set_epi16(-cospi[24], cospi[40]);
- __m128i cospi_p02_p62 = pair_set_epi16(cospi[2], cospi[62]);
- __m128i cospi_p62_m02 = pair_set_epi16(cospi[62], -cospi[2]);
- __m128i cospi_p10_p54 = pair_set_epi16(cospi[10], cospi[54]);
- __m128i cospi_p54_m10 = pair_set_epi16(cospi[54], -cospi[10]);
- __m128i cospi_p18_p46 = pair_set_epi16(cospi[18], cospi[46]);
- __m128i cospi_p46_m18 = pair_set_epi16(cospi[46], -cospi[18]);
- __m128i cospi_p26_p38 = pair_set_epi16(cospi[26], cospi[38]);
- __m128i cospi_p38_m26 = pair_set_epi16(cospi[38], -cospi[26]);
- __m128i cospi_p34_p30 = pair_set_epi16(cospi[34], cospi[30]);
- __m128i cospi_p30_m34 = pair_set_epi16(cospi[30], -cospi[34]);
- __m128i cospi_p42_p22 = pair_set_epi16(cospi[42], cospi[22]);
- __m128i cospi_p22_m42 = pair_set_epi16(cospi[22], -cospi[42]);
- __m128i cospi_p50_p14 = pair_set_epi16(cospi[50], cospi[14]);
- __m128i cospi_p14_m50 = pair_set_epi16(cospi[14], -cospi[50]);
- __m128i cospi_p58_p06 = pair_set_epi16(cospi[58], cospi[6]);
- __m128i cospi_p06_m58 = pair_set_epi16(cospi[6], -cospi[58]);
-
- // stage 1
- __m128i x1[16];
- x1[0] = input[0];
- x1[1] = _mm_subs_epi16(__zero, input[15]);
- x1[2] = _mm_subs_epi16(__zero, input[7]);
- x1[3] = input[8];
- x1[4] = _mm_subs_epi16(__zero, input[3]);
- x1[5] = input[12];
- x1[6] = input[4];
- x1[7] = _mm_subs_epi16(__zero, input[11]);
- x1[8] = _mm_subs_epi16(__zero, input[1]);
- x1[9] = input[14];
- x1[10] = input[6];
- x1[11] = _mm_subs_epi16(__zero, input[9]);
- x1[12] = input[2];
- x1[13] = _mm_subs_epi16(__zero, input[13]);
- x1[14] = _mm_subs_epi16(__zero, input[5]);
- x1[15] = input[10];
-
- // stage 2
- __m128i x2[16];
- x2[0] = x1[0];
- x2[1] = x1[1];
- btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x1[2], x1[3], x2[2], x2[3]);
- x2[4] = x1[4];
- x2[5] = x1[5];
- btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x1[6], x1[7], x2[6], x2[7]);
- x2[8] = x1[8];
- x2[9] = x1[9];
- btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x1[10], x1[11], x2[10], x2[11]);
- x2[12] = x1[12];
- x2[13] = x1[13];
- btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x1[14], x1[15], x2[14], x2[15]);
-
- // stage 3
- __m128i x3[16];
- x3[0] = _mm_adds_epi16(x2[0], x2[2]);
- x3[2] = _mm_subs_epi16(x2[0], x2[2]);
- x3[1] = _mm_adds_epi16(x2[1], x2[3]);
- x3[3] = _mm_subs_epi16(x2[1], x2[3]);
- x3[4] = _mm_adds_epi16(x2[4], x2[6]);
- x3[6] = _mm_subs_epi16(x2[4], x2[6]);
- x3[5] = _mm_adds_epi16(x2[5], x2[7]);
- x3[7] = _mm_subs_epi16(x2[5], x2[7]);
- x3[8] = _mm_adds_epi16(x2[8], x2[10]);
- x3[10] = _mm_subs_epi16(x2[8], x2[10]);
- x3[9] = _mm_adds_epi16(x2[9], x2[11]);
- x3[11] = _mm_subs_epi16(x2[9], x2[11]);
- x3[12] = _mm_adds_epi16(x2[12], x2[14]);
- x3[14] = _mm_subs_epi16(x2[12], x2[14]);
- x3[13] = _mm_adds_epi16(x2[13], x2[15]);
- x3[15] = _mm_subs_epi16(x2[13], x2[15]);
-
- // stage 4
- __m128i x4[16];
- x4[0] = x3[0];
- x4[1] = x3[1];
- x4[2] = x3[2];
- x4[3] = x3[3];
- btf_16_sse2(cospi_p16_p48, cospi_p48_m16, x3[4], x3[5], x4[4], x4[5]);
- btf_16_sse2(cospi_m48_p16, cospi_p16_p48, x3[6], x3[7], x4[6], x4[7]);
- x4[8] = x3[8];
- x4[9] = x3[9];
- x4[10] = x3[10];
- x4[11] = x3[11];
- btf_16_sse2(cospi_p16_p48, cospi_p48_m16, x3[12], x3[13], x4[12], x4[13]);
- btf_16_sse2(cospi_m48_p16, cospi_p16_p48, x3[14], x3[15], x4[14], x4[15]);
-
- // stage 5
- __m128i x5[16];
- x5[0] = _mm_adds_epi16(x4[0], x4[4]);
- x5[4] = _mm_subs_epi16(x4[0], x4[4]);
- x5[1] = _mm_adds_epi16(x4[1], x4[5]);
- x5[5] = _mm_subs_epi16(x4[1], x4[5]);
- x5[2] = _mm_adds_epi16(x4[2], x4[6]);
- x5[6] = _mm_subs_epi16(x4[2], x4[6]);
- x5[3] = _mm_adds_epi16(x4[3], x4[7]);
- x5[7] = _mm_subs_epi16(x4[3], x4[7]);
- x5[8] = _mm_adds_epi16(x4[8], x4[12]);
- x5[12] = _mm_subs_epi16(x4[8], x4[12]);
- x5[9] = _mm_adds_epi16(x4[9], x4[13]);
- x5[13] = _mm_subs_epi16(x4[9], x4[13]);
- x5[10] = _mm_adds_epi16(x4[10], x4[14]);
- x5[14] = _mm_subs_epi16(x4[10], x4[14]);
- x5[11] = _mm_adds_epi16(x4[11], x4[15]);
- x5[15] = _mm_subs_epi16(x4[11], x4[15]);
-
- // stage 6
- __m128i x6[16];
- x6[0] = x5[0];
- x6[1] = x5[1];
- x6[2] = x5[2];
- x6[3] = x5[3];
- x6[4] = x5[4];
- x6[5] = x5[5];
- x6[6] = x5[6];
- x6[7] = x5[7];
- btf_16_sse2(cospi_p08_p56, cospi_p56_m08, x5[8], x5[9], x6[8], x6[9]);
- btf_16_sse2(cospi_p40_p24, cospi_p24_m40, x5[10], x5[11], x6[10], x6[11]);
- btf_16_sse2(cospi_m56_p08, cospi_p08_p56, x5[12], x5[13], x6[12], x6[13]);
- btf_16_sse2(cospi_m24_p40, cospi_p40_p24, x5[14], x5[15], x6[14], x6[15]);
-
- // stage 7
- __m128i x7[16];
- x7[0] = _mm_adds_epi16(x6[0], x6[8]);
- x7[8] = _mm_subs_epi16(x6[0], x6[8]);
- x7[1] = _mm_adds_epi16(x6[1], x6[9]);
- x7[9] = _mm_subs_epi16(x6[1], x6[9]);
- x7[2] = _mm_adds_epi16(x6[2], x6[10]);
- x7[10] = _mm_subs_epi16(x6[2], x6[10]);
- x7[3] = _mm_adds_epi16(x6[3], x6[11]);
- x7[11] = _mm_subs_epi16(x6[3], x6[11]);
- x7[4] = _mm_adds_epi16(x6[4], x6[12]);
- x7[12] = _mm_subs_epi16(x6[4], x6[12]);
- x7[5] = _mm_adds_epi16(x6[5], x6[13]);
- x7[13] = _mm_subs_epi16(x6[5], x6[13]);
- x7[6] = _mm_adds_epi16(x6[6], x6[14]);
- x7[14] = _mm_subs_epi16(x6[6], x6[14]);
- x7[7] = _mm_adds_epi16(x6[7], x6[15]);
- x7[15] = _mm_subs_epi16(x6[7], x6[15]);
-
- // stage 8
- __m128i x8[16];
- btf_16_sse2(cospi_p02_p62, cospi_p62_m02, x7[0], x7[1], x8[0], x8[1]);
- btf_16_sse2(cospi_p10_p54, cospi_p54_m10, x7[2], x7[3], x8[2], x8[3]);
- btf_16_sse2(cospi_p18_p46, cospi_p46_m18, x7[4], x7[5], x8[4], x8[5]);
- btf_16_sse2(cospi_p26_p38, cospi_p38_m26, x7[6], x7[7], x8[6], x8[7]);
- btf_16_sse2(cospi_p34_p30, cospi_p30_m34, x7[8], x7[9], x8[8], x8[9]);
- btf_16_sse2(cospi_p42_p22, cospi_p22_m42, x7[10], x7[11], x8[10], x8[11]);
- btf_16_sse2(cospi_p50_p14, cospi_p14_m50, x7[12], x7[13], x8[12], x8[13]);
- btf_16_sse2(cospi_p58_p06, cospi_p06_m58, x7[14], x7[15], x8[14], x8[15]);
-
- // stage 9
- output[0] = x8[1];
- output[1] = x8[14];
- output[2] = x8[3];
- output[3] = x8[12];
- output[4] = x8[5];
- output[5] = x8[10];
- output[6] = x8[7];
- output[7] = x8[8];
- output[8] = x8[9];
- output[9] = x8[6];
- output[10] = x8[11];
- output[11] = x8[4];
- output[12] = x8[13];
- output[13] = x8[2];
- output[14] = x8[15];
- output[15] = x8[0];
-}
-
-static const transform_1d_sse2 col_txfm4x4_arr[TX_TYPES] = {
- fdct4x4_new_sse2, // DCT_DCT
- fadst4x4_new_sse2, // ADST_DCT
- fdct4x4_new_sse2, // DCT_ADST
- fadst4x4_new_sse2, // ADST_ADST
- fadst4x4_new_sse2, // FLIPADST_DCT
- fdct4x4_new_sse2, // DCT_FLIPADST
- fadst4x4_new_sse2, // FLIPADST_FLIPADST
- fadst4x4_new_sse2, // ADST_FLIPADST
- fadst4x4_new_sse2, // FLIPADST_ADST
- fidentity4x4_new_sse2, // IDTX
- fdct4x4_new_sse2, // V_DCT
- fidentity4x4_new_sse2, // H_DCT
- fadst4x4_new_sse2, // V_ADST
- fidentity4x4_new_sse2, // H_ADST
- fadst4x4_new_sse2, // V_FLIPADST
- fidentity4x4_new_sse2 // H_FLIPADST
-};
-
-static const transform_1d_sse2 row_txfm4x4_arr[TX_TYPES] = {
- fdct4x4_new_sse2, // DCT_DCT
- fdct4x4_new_sse2, // ADST_DCT
- fadst4x4_new_sse2, // DCT_ADST
- fadst4x4_new_sse2, // ADST_ADST
- fdct4x4_new_sse2, // FLIPADST_DCT
- fadst4x4_new_sse2, // DCT_FLIPADST
- fadst4x4_new_sse2, // FLIPADST_FLIPADST
- fadst4x4_new_sse2, // ADST_FLIPADST
- fadst4x4_new_sse2, // FLIPADST_ADST
- fidentity4x4_new_sse2, // IDTX
- fidentity4x4_new_sse2, // V_DCT
- fdct4x4_new_sse2, // H_DCT
- fidentity4x4_new_sse2, // V_ADST
- fadst4x4_new_sse2, // H_ADST
- fidentity4x4_new_sse2, // V_FLIPADST
- fadst4x4_new_sse2 // H_FLIPADST
-};
-
-static const transform_1d_sse2 col_txfm4x8_arr[TX_TYPES] = {
- fdct4x8_new_sse2, // DCT_DCT
- fadst4x8_new_sse2, // ADST_DCT
- fdct4x8_new_sse2, // DCT_ADST
- fadst4x8_new_sse2, // ADST_ADST
- fadst4x8_new_sse2, // FLIPADST_DCT
- fdct4x8_new_sse2, // DCT_FLIPADST
- fadst4x8_new_sse2, // FLIPADST_FLIPADST
- fadst4x8_new_sse2, // ADST_FLIPADST
- fadst4x8_new_sse2, // FLIPADST_ADST
- fidentity8x8_new_sse2, // IDTX
- fdct4x8_new_sse2, // V_DCT
- fidentity8x8_new_sse2, // H_DCT
- fadst4x8_new_sse2, // V_ADST
- fidentity8x8_new_sse2, // H_ADST
- fadst4x8_new_sse2, // V_FLIPADST
- fidentity8x8_new_sse2 // H_FLIPADST
-};
-
-static const transform_1d_sse2 row_txfm8x4_arr[TX_TYPES] = {
- fdct8x4_new_sse2, // DCT_DCT
- fdct8x4_new_sse2, // ADST_DCT
- fadst8x4_new_sse2, // DCT_ADST
- fadst8x4_new_sse2, // ADST_ADST
- fdct8x4_new_sse2, // FLIPADST_DCT
- fadst8x4_new_sse2, // DCT_FLIPADST
- fadst8x4_new_sse2, // FLIPADST_FLIPADST
- fadst8x4_new_sse2, // ADST_FLIPADST
- fadst8x4_new_sse2, // FLIPADST_ADST
- fidentity8x4_new_sse2, // IDTX
- fidentity8x4_new_sse2, // V_DCT
- fdct8x4_new_sse2, // H_DCT
- fidentity8x4_new_sse2, // V_ADST
- fadst8x4_new_sse2, // H_ADST
- fidentity8x4_new_sse2, // V_FLIPADST
- fadst8x4_new_sse2 // H_FLIPADST
-};
-
-static const transform_1d_sse2 col_txfm8x4_arr[TX_TYPES] = {
- fdct8x4_new_sse2, // DCT_DCT
- fadst8x4_new_sse2, // ADST_DCT
- fdct8x4_new_sse2, // DCT_ADST
- fadst8x4_new_sse2, // ADST_ADST
- fadst8x4_new_sse2, // FLIPADST_DCT
- fdct8x4_new_sse2, // DCT_FLIPADST
- fadst8x4_new_sse2, // FLIPADST_FLIPADST
- fadst8x4_new_sse2, // ADST_FLIPADST
- fadst8x4_new_sse2, // FLIPADST_ADST
- fidentity8x4_new_sse2, // IDTX
- fdct8x4_new_sse2, // V_DCT
- fidentity8x4_new_sse2, // H_DCT
- fadst8x4_new_sse2, // V_ADST
- fidentity8x4_new_sse2, // H_ADST
- fadst8x4_new_sse2, // V_FLIPADST
- fidentity8x4_new_sse2 // H_FLIPADST
-};
-
-static const transform_1d_sse2 row_txfm4x8_arr[TX_TYPES] = {
- fdct4x8_new_sse2, // DCT_DCT
- fdct4x8_new_sse2, // ADST_DCT
- fadst4x8_new_sse2, // DCT_ADST
- fadst4x8_new_sse2, // ADST_ADST
- fdct4x8_new_sse2, // FLIPADST_DCT
- fadst4x8_new_sse2, // DCT_FLIPADST
- fadst4x8_new_sse2, // FLIPADST_FLIPADST
- fadst4x8_new_sse2, // ADST_FLIPADST
- fadst4x8_new_sse2, // FLIPADST_ADST
- fidentity8x8_new_sse2, // IDTX
- fidentity8x8_new_sse2, // V_DCT
- fdct4x8_new_sse2, // H_DCT
- fidentity8x8_new_sse2, // V_ADST
- fadst4x8_new_sse2, // H_ADST
- fidentity8x8_new_sse2, // V_FLIPADST
- fadst4x8_new_sse2 // H_FLIPADST
-};
-
-static const transform_1d_sse2 col_txfm8x8_arr[TX_TYPES] = {
- fdct8x8_new_sse2, // DCT_DCT
- fadst8x8_new_sse2, // ADST_DCT
- fdct8x8_new_sse2, // DCT_ADST
- fadst8x8_new_sse2, // ADST_ADST
- fadst8x8_new_sse2, // FLIPADST_DCT
- fdct8x8_new_sse2, // DCT_FLIPADST
- fadst8x8_new_sse2, // FLIPADST_FLIPADST
- fadst8x8_new_sse2, // ADST_FLIPADST
- fadst8x8_new_sse2, // FLIPADST_ADST
- fidentity8x8_new_sse2, // IDTX
- fdct8x8_new_sse2, // V_DCT
- fidentity8x8_new_sse2, // H_DCT
- fadst8x8_new_sse2, // V_ADST
- fidentity8x8_new_sse2, // H_ADST
- fadst8x8_new_sse2, // V_FLIPADST
- fidentity8x8_new_sse2, // H_FLIPADST
-};
-
-static const transform_1d_sse2 row_txfm8x8_arr[TX_TYPES] = {
- fdct8x8_new_sse2, // DCT_DCT
- fdct8x8_new_sse2, // ADST_DCT
- fadst8x8_new_sse2, // DCT_ADST
- fadst8x8_new_sse2, // ADST_ADST
- fdct8x8_new_sse2, // FLIPADST_DCT
- fadst8x8_new_sse2, // DCT_FLIPADST
- fadst8x8_new_sse2, // FLIPADST_FLIPADST
- fadst8x8_new_sse2, // ADST_FLIPADST
- fadst8x8_new_sse2, // FLIPADST_ADST
- fidentity8x8_new_sse2, // IDTX
- fidentity8x8_new_sse2, // V_DCT
- fdct8x8_new_sse2, // H_DCT
- fidentity8x8_new_sse2, // V_ADST
- fadst8x8_new_sse2, // H_ADST
- fidentity8x8_new_sse2, // V_FLIPADST
- fadst8x8_new_sse2 // H_FLIPADST
-};
-
-static const transform_1d_sse2 col_txfm8x16_arr[TX_TYPES] = {
- fdct8x16_new_sse2, // DCT_DCT
- fadst8x16_new_sse2, // ADST_DCT
- fdct8x16_new_sse2, // DCT_ADST
- fadst8x16_new_sse2, // ADST_ADST
- fadst8x16_new_sse2, // FLIPADST_DCT
- fdct8x16_new_sse2, // DCT_FLIPADST
- fadst8x16_new_sse2, // FLIPADST_FLIPADST
- fadst8x16_new_sse2, // ADST_FLIPADST
- fadst8x16_new_sse2, // FLIPADST_ADST
- fidentity8x16_new_sse2, // IDTX
- fdct8x16_new_sse2, // V_DCT
- fidentity8x16_new_sse2, // H_DCT
- fadst8x16_new_sse2, // V_ADST
- fidentity8x16_new_sse2, // H_ADST
- fadst8x16_new_sse2, // V_FLIPADST
- fidentity8x16_new_sse2 // H_FLIPADST
-};
-
-static const transform_1d_sse2 row_txfm8x16_arr[TX_TYPES] = {
- fdct8x16_new_sse2, // DCT_DCT
- fdct8x16_new_sse2, // ADST_DCT
- fadst8x16_new_sse2, // DCT_ADST
- fadst8x16_new_sse2, // ADST_ADST
- fdct8x16_new_sse2, // FLIPADST_DCT
- fadst8x16_new_sse2, // DCT_FLIPADST
- fadst8x16_new_sse2, // FLIPADST_FLIPADST
- fadst8x16_new_sse2, // ADST_FLIPADST
- fadst8x16_new_sse2, // FLIPADST_ADST
- fidentity8x16_new_sse2, // IDTX
- fidentity8x16_new_sse2, // V_DCT
- fdct8x16_new_sse2, // H_DCT
- fidentity8x16_new_sse2, // V_ADST
- fadst8x16_new_sse2, // H_ADST
- fidentity8x16_new_sse2, // V_FLIPADST
- fadst8x16_new_sse2 // H_FLIPADST
-};
-
-static const transform_1d_sse2 row_txfm8x32_arr[TX_TYPES] = {
- fdct8x32_new_sse2, // DCT_DCT
- NULL, // ADST_DCT
- NULL, // DCT_ADST
- NULL, // ADST_ADST
- NULL, // FLIPADST_DCT
- NULL, // DCT_FLIPADST
- NULL, // FLIPADST_FLIPADST
- NULL, // ADST_FLIPADST
- NULL, // FLIPADST_ADST
- fidentity8x32_new_sse2, // IDTX
- fidentity8x32_new_sse2, // V_DCT
- fdct8x32_new_sse2, // H_DCT
- NULL, // V_ADST
- NULL, // H_ADST
- NULL, // V_FLIPADST
- NULL // H_FLIPADST
-};
-
-void av1_lowbd_fwd_txfm2d_4x4_sse2(const int16_t *input, int32_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- (void)bd;
- __m128i buf0[4], buf1[4], *buf;
- const int8_t *shift = fwd_txfm_shift_ls[TX_4X4];
- const int txw_idx = get_txw_idx(TX_4X4);
- const int txh_idx = get_txh_idx(TX_4X4);
- const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
- const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
- const int width = 4;
- const int height = 4;
- const transform_1d_sse2 col_txfm = col_txfm4x4_arr[tx_type];
- const transform_1d_sse2 row_txfm = row_txfm4x4_arr[tx_type];
- int ud_flip, lr_flip;
-
- get_flip_cfg(tx_type, &ud_flip, &lr_flip);
- if (ud_flip) {
- load_buffer_16bit_to_16bit_w4_flip(input, stride, buf0, height);
- } else {
- load_buffer_16bit_to_16bit_w4(input, stride, buf0, height);
- }
- round_shift_16bit(buf0, height, shift[0]);
- col_txfm(buf0, buf0, cos_bit_col);
- round_shift_16bit(buf0, height, shift[1]);
- transpose_16bit_4x4(buf0, buf1);
-
- if (lr_flip) {
- buf = buf0;
- flip_buf_sse2(buf1, buf, width);
- } else {
- buf = buf1;
- }
- row_txfm(buf, buf, cos_bit_row);
- round_shift_16bit(buf, width, shift[2]);
- transpose_16bit_4x4(buf, buf);
- store_buffer_16bit_to_32bit_w4(buf, output, width, height);
-}
-
-void av1_lowbd_fwd_txfm2d_4x8_sse2(const int16_t *input, int32_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- (void)stride;
- (void)bd;
- __m128i buf0[8], buf1[8], *buf;
- const int8_t *shift = fwd_txfm_shift_ls[TX_4X8];
- const int txw_idx = get_txw_idx(TX_4X8);
- const int txh_idx = get_txh_idx(TX_4X8);
- const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
- const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
- const int width = 4;
- const int height = 8;
- const transform_1d_sse2 col_txfm = col_txfm4x8_arr[tx_type];
- const transform_1d_sse2 row_txfm = row_txfm8x4_arr[tx_type];
- int ud_flip, lr_flip;
-
- get_flip_cfg(tx_type, &ud_flip, &lr_flip);
- if (ud_flip) {
- load_buffer_16bit_to_16bit_w4_flip(input, stride, buf0, height);
- } else {
- load_buffer_16bit_to_16bit_w4(input, stride, buf0, height);
- }
- round_shift_16bit(buf0, height, shift[0]);
- col_txfm(buf0, buf0, cos_bit_col);
- round_shift_16bit(buf0, height, shift[1]);
- transpose_16bit_4x8(buf0, buf1);
-
- if (lr_flip) {
- buf = buf0;
- flip_buf_sse2(buf1, buf, width);
- } else {
- buf = buf1;
- }
- row_txfm(buf, buf, cos_bit_row);
- round_shift_16bit(buf, width, shift[2]);
- transpose_16bit_8x4(buf, buf);
- store_rect_buffer_16bit_to_32bit_w4(buf, output, width, height);
-}
-
-void av1_lowbd_fwd_txfm2d_4x16_sse2(const int16_t *input, int32_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- (void)bd;
- __m128i buf0[16], buf1[16];
- const int8_t *shift = fwd_txfm_shift_ls[TX_4X16];
- const int txw_idx = get_txw_idx(TX_4X16);
- const int txh_idx = get_txh_idx(TX_4X16);
- const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
- const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
- const int width = 4;
- const int height = 16;
- const transform_1d_sse2 col_txfm = col_txfm8x16_arr[tx_type];
- const transform_1d_sse2 row_txfm = row_txfm8x4_arr[tx_type];
- int ud_flip, lr_flip;
-
- get_flip_cfg(tx_type, &ud_flip, &lr_flip);
- if (ud_flip) {
- load_buffer_16bit_to_16bit_w4_flip(input, stride, buf0, height);
- } else {
- load_buffer_16bit_to_16bit_w4(input, stride, buf0, height);
- }
- round_shift_16bit(buf0, height, shift[0]);
- col_txfm(buf0, buf0, cos_bit_col);
- round_shift_16bit(buf0, height, shift[1]);
- transpose_16bit_4x8(buf0, buf1);
- transpose_16bit_4x8(buf0 + 8, buf1 + 8);
-
- for (int i = 0; i < 2; i++) {
- __m128i *buf;
- if (lr_flip) {
- buf = buf0;
- flip_buf_sse2(buf1 + 8 * i, buf, width);
- } else {
- buf = buf1 + 8 * i;
- }
- row_txfm(buf, buf, cos_bit_row);
- round_shift_16bit(buf, width, shift[2]);
- transpose_16bit_8x4(buf, buf);
- store_buffer_16bit_to_32bit_w4(buf, output + 8 * width * i, width, 8);
- }
-}
-
-void av1_lowbd_fwd_txfm2d_8x4_sse2(const int16_t *input, int32_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- (void)bd;
- __m128i buf0[8], buf1[8], *buf;
- const int8_t *shift = fwd_txfm_shift_ls[TX_8X4];
- const int txw_idx = get_txw_idx(TX_8X4);
- const int txh_idx = get_txh_idx(TX_8X4);
- const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
- const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
- const int width = 8;
- const int height = 4;
- const transform_1d_sse2 col_txfm = col_txfm8x4_arr[tx_type];
- const transform_1d_sse2 row_txfm = row_txfm4x8_arr[tx_type];
- int ud_flip, lr_flip;
-
- get_flip_cfg(tx_type, &ud_flip, &lr_flip);
- if (ud_flip)
- load_buffer_16bit_to_16bit_flip(input, stride, buf0, height);
- else
- load_buffer_16bit_to_16bit(input, stride, buf0, height);
- round_shift_16bit(buf0, height, shift[0]);
- col_txfm(buf0, buf0, cos_bit_col);
- round_shift_16bit(buf0, height, shift[1]);
- transpose_16bit_8x8(buf0, buf1);
-
- if (lr_flip) {
- buf = buf0;
- flip_buf_sse2(buf1, buf, width);
- } else {
- buf = buf1;
- }
- row_txfm(buf, buf, cos_bit_row);
- round_shift_16bit(buf, width, shift[2]);
- transpose_16bit_8x8(buf, buf);
- store_rect_buffer_16bit_to_32bit_w8(buf, output, width, height);
-}
-
-void av1_lowbd_fwd_txfm2d_8x8_sse2(const int16_t *input, int32_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- (void)bd;
- __m128i buf0[8], buf1[8], *buf;
- const int8_t *shift = fwd_txfm_shift_ls[TX_8X8];
- const int txw_idx = get_txw_idx(TX_8X8);
- const int txh_idx = get_txh_idx(TX_8X8);
- const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
- const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
- const int width = 8;
- const int height = 8;
- const transform_1d_sse2 col_txfm = col_txfm8x8_arr[tx_type];
- const transform_1d_sse2 row_txfm = row_txfm8x8_arr[tx_type];
- int ud_flip, lr_flip;
-
- get_flip_cfg(tx_type, &ud_flip, &lr_flip);
- if (ud_flip)
- load_buffer_16bit_to_16bit_flip(input, stride, buf0, height);
- else
- load_buffer_16bit_to_16bit(input, stride, buf0, height);
- round_shift_16bit(buf0, height, shift[0]);
- col_txfm(buf0, buf0, cos_bit_col);
- round_shift_16bit(buf0, height, shift[1]);
- transpose_16bit_8x8(buf0, buf1);
-
- if (lr_flip) {
- buf = buf0;
- flip_buf_sse2(buf1, buf, width);
- } else {
- buf = buf1;
- }
- row_txfm(buf, buf, cos_bit_row);
- round_shift_16bit(buf, width, shift[2]);
- transpose_16bit_8x8(buf, buf);
- store_buffer_16bit_to_32bit_w8(buf, output, width, height);
-}
-
-void av1_lowbd_fwd_txfm2d_8x16_sse2(const int16_t *input, int32_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- (void)bd;
- __m128i buf0[16], buf1[16];
- const int8_t *shift = fwd_txfm_shift_ls[TX_8X16];
- const int txw_idx = get_txw_idx(TX_8X16);
- const int txh_idx = get_txh_idx(TX_8X16);
- const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
- const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
- const int width = 8;
- const int height = 16;
- const transform_1d_sse2 col_txfm = col_txfm8x16_arr[tx_type];
- const transform_1d_sse2 row_txfm = row_txfm8x8_arr[tx_type];
- int ud_flip, lr_flip;
-
- get_flip_cfg(tx_type, &ud_flip, &lr_flip);
- if (ud_flip) {
- load_buffer_16bit_to_16bit_flip(input, stride, buf0, height);
- } else {
- load_buffer_16bit_to_16bit(input, stride, buf0, height);
- }
- round_shift_16bit(buf0, height, shift[0]);
- col_txfm(buf0, buf0, cos_bit_col);
- round_shift_16bit(buf0, height, shift[1]);
- transpose_16bit_8x8(buf0, buf1);
- transpose_16bit_8x8(buf0 + 8, buf1 + 8);
-
- for (int i = 0; i < 2; i++) {
- __m128i *buf;
- if (lr_flip) {
- buf = buf0;
- flip_buf_sse2(buf1 + width * i, buf, width);
- } else {
- buf = buf1 + width * i;
- }
- row_txfm(buf, buf, cos_bit_row);
- round_shift_16bit(buf, width, shift[2]);
- transpose_16bit_8x8(buf, buf);
- store_rect_buffer_16bit_to_32bit_w8(buf, output + 8 * width * i, width, 8);
- }
-}
-
-void av1_lowbd_fwd_txfm2d_8x32_sse2(const int16_t *input, int32_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- (void)bd;
- __m128i buf0[32], buf1[32];
- const int8_t *shift = fwd_txfm_shift_ls[TX_8X32];
- const int txw_idx = get_txw_idx(TX_8X32);
- const int txh_idx = get_txh_idx(TX_8X32);
- const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
- const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
- const int width = 8;
- const int height = 32;
- const transform_1d_sse2 col_txfm = col_txfm8x32_arr[tx_type];
- const transform_1d_sse2 row_txfm = row_txfm8x8_arr[tx_type];
- int ud_flip, lr_flip;
-
- get_flip_cfg(tx_type, &ud_flip, &lr_flip);
- if (ud_flip) {
- load_buffer_16bit_to_16bit_flip(input, stride, buf0, height);
- } else {
- load_buffer_16bit_to_16bit(input, stride, buf0, height);
- }
- round_shift_16bit(buf0, height, shift[0]);
- col_txfm(buf0, buf0, cos_bit_col);
- round_shift_16bit(buf0, height, shift[1]);
- transpose_16bit_8x8(buf0, buf1);
- transpose_16bit_8x8(buf0 + 8, buf1 + 8);
- transpose_16bit_8x8(buf0 + 16, buf1 + 16);
- transpose_16bit_8x8(buf0 + 24, buf1 + 24);
-
- for (int i = 0; i < 4; i++) {
- __m128i *buf;
- if (lr_flip) {
- buf = buf0;
- flip_buf_sse2(buf1 + width * i, buf, width);
- } else {
- buf = buf1 + width * i;
- }
- row_txfm(buf, buf, cos_bit_row);
- round_shift_16bit(buf, width, shift[2]);
- transpose_16bit_8x8(buf, buf);
- store_buffer_16bit_to_32bit_w8(buf, output + 8 * width * i, width, 8);
- }
-}
-
-void av1_lowbd_fwd_txfm2d_16x4_sse2(const int16_t *input, int32_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- (void)bd;
- __m128i buf0[16], buf1[16];
- const int8_t *shift = fwd_txfm_shift_ls[TX_16X4];
- const int txw_idx = get_txw_idx(TX_16X4);
- const int txh_idx = get_txh_idx(TX_16X4);
- const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
- const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
- const int width = 16;
- const int height = 4;
- const transform_1d_sse2 col_txfm = col_txfm8x4_arr[tx_type];
- const transform_1d_sse2 row_txfm = row_txfm8x16_arr[tx_type];
- __m128i *buf;
- int ud_flip, lr_flip;
-
- get_flip_cfg(tx_type, &ud_flip, &lr_flip);
- for (int i = 0; i < 2; i++) {
- if (ud_flip) {
- load_buffer_16bit_to_16bit_flip(input + 8 * i, stride, buf0, height);
- } else {
- load_buffer_16bit_to_16bit(input + 8 * i, stride, buf0, height);
- }
- round_shift_16bit(buf0, height, shift[0]);
- col_txfm(buf0, buf0, cos_bit_col);
- round_shift_16bit(buf0, height, shift[1]);
- transpose_16bit_8x4(buf0, buf1 + 8 * i);
- }
-
- if (lr_flip) {
- buf = buf0;
- flip_buf_sse2(buf1, buf, width);
- } else {
- buf = buf1;
- }
- row_txfm(buf, buf, cos_bit_row);
- round_shift_16bit(buf, width, shift[2]);
- transpose_16bit_4x8(buf, buf);
- store_buffer_16bit_to_32bit_w8(buf, output, width, height);
- transpose_16bit_4x8(buf + 8, buf + 8);
- store_buffer_16bit_to_32bit_w8(buf + 8, output + 8, width, height);
-}
-
-void av1_lowbd_fwd_txfm2d_16x8_sse2(const int16_t *input, int32_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- (void)bd;
- __m128i buf0[16], buf1[16];
- const int8_t *shift = fwd_txfm_shift_ls[TX_16X8];
- const int txw_idx = get_txw_idx(TX_16X8);
- const int txh_idx = get_txh_idx(TX_16X8);
- const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
- const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
- const int width = 16;
- const int height = 8;
- const transform_1d_sse2 col_txfm = col_txfm8x8_arr[tx_type];
- const transform_1d_sse2 row_txfm = row_txfm8x16_arr[tx_type];
- __m128i *buf;
- int ud_flip, lr_flip;
-
- get_flip_cfg(tx_type, &ud_flip, &lr_flip);
- for (int i = 0; i < 2; i++) {
- if (ud_flip) {
- load_buffer_16bit_to_16bit_flip(input + 8 * i, stride, buf0, height);
- } else {
- load_buffer_16bit_to_16bit(input + 8 * i, stride, buf0, height);
- }
- round_shift_16bit(buf0, height, shift[0]);
- col_txfm(buf0, buf0, cos_bit_col);
- round_shift_16bit(buf0, height, shift[1]);
- transpose_16bit_8x8(buf0, buf1 + 8 * i);
- }
-
- if (lr_flip) {
- buf = buf0;
- flip_buf_sse2(buf1, buf, width);
- } else {
- buf = buf1;
- }
- row_txfm(buf, buf, cos_bit_row);
- round_shift_16bit(buf, width, shift[2]);
- transpose_16bit_8x8(buf, buf);
- store_rect_buffer_16bit_to_32bit_w8(buf, output, width, height);
- transpose_16bit_8x8(buf + 8, buf + 8);
- store_rect_buffer_16bit_to_32bit_w8(buf + 8, output + 8, width, height);
-}
-
-void av1_lowbd_fwd_txfm2d_16x16_sse2(const int16_t *input, int32_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- (void)bd;
- __m128i buf0[16], buf1[32];
- const int8_t *shift = fwd_txfm_shift_ls[TX_16X16];
- const int txw_idx = get_txw_idx(TX_16X16);
- const int txh_idx = get_txh_idx(TX_16X16);
- const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
- const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
- const int width = 16;
- const int height = 16;
- const transform_1d_sse2 col_txfm = col_txfm8x16_arr[tx_type];
- const transform_1d_sse2 row_txfm = row_txfm8x16_arr[tx_type];
- int ud_flip, lr_flip;
-
- get_flip_cfg(tx_type, &ud_flip, &lr_flip);
- for (int i = 0; i < 2; i++) {
- if (ud_flip) {
- load_buffer_16bit_to_16bit_flip(input + 8 * i, stride, buf0, height);
- } else {
- load_buffer_16bit_to_16bit(input + 8 * i, stride, buf0, height);
- }
- round_shift_16bit(buf0, height, shift[0]);
- col_txfm(buf0, buf0, cos_bit_col);
- round_shift_16bit(buf0, height, shift[1]);
- transpose_16bit_8x8(buf0, buf1 + 0 * width + 8 * i);
- transpose_16bit_8x8(buf0 + 8, buf1 + 1 * width + 8 * i);
- }
-
- for (int i = 0; i < 2; i++) {
- __m128i *buf;
- if (lr_flip) {
- buf = buf0;
- flip_buf_sse2(buf1 + width * i, buf, width);
- } else {
- buf = buf1 + width * i;
- }
- row_txfm(buf, buf, cos_bit_row);
- round_shift_16bit(buf, width, shift[2]);
- transpose_16bit_8x8(buf, buf);
- store_buffer_16bit_to_32bit_w8(buf, output + 8 * width * i, width, 8);
- transpose_16bit_8x8(buf + 8, buf + 8);
- store_buffer_16bit_to_32bit_w8(buf + 8, output + 8 * width * i + 8, width,
- 8);
- }
-}
-
-void av1_lowbd_fwd_txfm2d_16x32_sse2(const int16_t *input, int32_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- (void)bd;
- __m128i buf0[32], buf1[64];
- const int8_t *shift = fwd_txfm_shift_ls[TX_16X32];
- const int txw_idx = get_txw_idx(TX_16X32);
- const int txh_idx = get_txh_idx(TX_16X32);
- const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
- const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
- const int width = 16;
- const int height = 32;
- const transform_1d_sse2 col_txfm = col_txfm8x32_arr[tx_type];
- const transform_1d_sse2 row_txfm = row_txfm8x16_arr[tx_type];
-
- if (col_txfm != NULL && row_txfm != NULL) {
- int ud_flip, lr_flip;
- get_flip_cfg(tx_type, &ud_flip, &lr_flip);
-
- for (int i = 0; i < 2; i++) {
- if (ud_flip) {
- load_buffer_16bit_to_16bit_flip(input + 8 * i, stride, buf0, height);
- } else {
- load_buffer_16bit_to_16bit(input + 8 * i, stride, buf0, height);
- }
- round_shift_16bit(buf0, height, shift[0]);
- col_txfm(buf0, buf0, cos_bit_col);
- round_shift_16bit(buf0, height, shift[1]);
- transpose_16bit_8x8(buf0 + 0 * 8, buf1 + 0 * width + 8 * i);
- transpose_16bit_8x8(buf0 + 1 * 8, buf1 + 1 * width + 8 * i);
- transpose_16bit_8x8(buf0 + 2 * 8, buf1 + 2 * width + 8 * i);
- transpose_16bit_8x8(buf0 + 3 * 8, buf1 + 3 * width + 8 * i);
- }
-
- for (int i = 0; i < 4; i++) {
- __m128i *buf;
- if (lr_flip) {
- buf = buf0;
- flip_buf_sse2(buf1 + width * i, buf, width);
- } else {
- buf = buf1 + width * i;
- }
- row_txfm(buf, buf, cos_bit_row);
- round_shift_16bit(buf, width, shift[2]);
- transpose_16bit_8x8(buf, buf);
- store_rect_buffer_16bit_to_32bit_w8(buf, output + 8 * width * i, width,
- 8);
- transpose_16bit_8x8(buf + 8, buf + 8);
- store_rect_buffer_16bit_to_32bit_w8(buf + 8, output + 8 * width * i + 8,
- width, 8);
- }
- } else {
- av1_fwd_txfm2d_16x32_c(input, output, stride, tx_type, bd);
- }
-}
-
-void av1_lowbd_fwd_txfm2d_32x8_sse2(const int16_t *input, int32_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- (void)bd;
- __m128i buf0[32], buf1[32];
- const int8_t *shift = fwd_txfm_shift_ls[TX_32X8];
- const int txw_idx = get_txw_idx(TX_32X8);
- const int txh_idx = get_txh_idx(TX_32X8);
- const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
- const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
- const int width = 32;
- const int height = 8;
- const transform_1d_sse2 col_txfm = col_txfm8x8_arr[tx_type];
- const transform_1d_sse2 row_txfm = row_txfm8x32_arr[tx_type];
-
- if (col_txfm != NULL && row_txfm != NULL) {
- int ud_flip, lr_flip;
- get_flip_cfg(tx_type, &ud_flip, &lr_flip);
-
- for (int i = 0; i < 4; i++) {
- if (ud_flip) {
- load_buffer_16bit_to_16bit_flip(input + 8 * i, stride, buf0, height);
- } else {
- load_buffer_16bit_to_16bit(input + 8 * i, stride, buf0, height);
- }
- round_shift_16bit(buf0, height, shift[0]);
- col_txfm(buf0, buf0, cos_bit_col);
- round_shift_16bit(buf0, height, shift[1]);
- transpose_16bit_8x8(buf0, buf1 + 0 * width + 8 * i);
- }
-
- for (int i = 0; i < 1; i++) {
- __m128i *buf;
- if (lr_flip) {
- buf = buf0;
- flip_buf_sse2(buf1 + width * i, buf, width);
- } else {
- buf = buf1 + width * i;
- }
- row_txfm(buf, buf, cos_bit_row);
- round_shift_16bit(buf, width, shift[2]);
- transpose_16bit_8x8(buf, buf);
- store_buffer_16bit_to_32bit_w8(buf, output + 8 * width * i, width,
- height);
- transpose_16bit_8x8(buf + 8, buf + 8);
- store_buffer_16bit_to_32bit_w8(buf + 8, output + 8 * width * i + 8, width,
- height);
- transpose_16bit_8x8(buf + 16, buf + 16);
- store_buffer_16bit_to_32bit_w8(buf + 16, output + 8 * width * i + 16,
- width, height);
- transpose_16bit_8x8(buf + 24, buf + 24);
- store_buffer_16bit_to_32bit_w8(buf + 24, output + 8 * width * i + 24,
- width, height);
- }
- } else {
- av1_fwd_txfm2d_32x16_c(input, output, stride, tx_type, bd);
- }
-}
-
-void av1_lowbd_fwd_txfm2d_32x16_sse2(const int16_t *input, int32_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- (void)bd;
- __m128i buf0[32], buf1[64];
- const int8_t *shift = fwd_txfm_shift_ls[TX_32X16];
- const int txw_idx = get_txw_idx(TX_32X16);
- const int txh_idx = get_txh_idx(TX_32X16);
- const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
- const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
- const int width = 32;
- const int height = 16;
- const transform_1d_sse2 col_txfm = col_txfm8x16_arr[tx_type];
- const transform_1d_sse2 row_txfm = row_txfm8x32_arr[tx_type];
-
- if (col_txfm != NULL && row_txfm != NULL) {
- int ud_flip, lr_flip;
- get_flip_cfg(tx_type, &ud_flip, &lr_flip);
-
- for (int i = 0; i < 4; i++) {
- if (ud_flip) {
- load_buffer_16bit_to_16bit_flip(input + 8 * i, stride, buf0, height);
- } else {
- load_buffer_16bit_to_16bit(input + 8 * i, stride, buf0, height);
- }
- round_shift_16bit(buf0, height, shift[0]);
- col_txfm(buf0, buf0, cos_bit_col);
- round_shift_16bit(buf0, height, shift[1]);
- transpose_16bit_8x8(buf0, buf1 + 0 * width + 8 * i);
- transpose_16bit_8x8(buf0 + 8, buf1 + 1 * width + 8 * i);
- }
-
- for (int i = 0; i < 2; i++) {
- __m128i *buf;
- if (lr_flip) {
- buf = buf0;
- flip_buf_sse2(buf1 + width * i, buf, width);
- } else {
- buf = buf1 + width * i;
- }
- row_txfm(buf, buf, cos_bit_row);
- round_shift_16bit(buf, width, shift[2]);
- transpose_16bit_8x8(buf, buf);
- store_rect_buffer_16bit_to_32bit_w8(buf, output + 8 * width * i, width,
- 8);
- transpose_16bit_8x8(buf + 8, buf + 8);
- store_rect_buffer_16bit_to_32bit_w8(buf + 8, output + 8 * width * i + 8,
- width, 8);
- transpose_16bit_8x8(buf + 16, buf + 16);
- store_rect_buffer_16bit_to_32bit_w8(buf + 16, output + 8 * width * i + 16,
- width, 8);
- transpose_16bit_8x8(buf + 24, buf + 24);
- store_rect_buffer_16bit_to_32bit_w8(buf + 24, output + 8 * width * i + 24,
- width, 8);
- }
- } else {
- av1_fwd_txfm2d_32x16_c(input, output, stride, tx_type, bd);
- }
-}
-
-void av1_lowbd_fwd_txfm2d_32x32_sse2(const int16_t *input, int32_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- (void)bd;
- __m128i buf0[32], buf1[128];
- const int8_t *shift = fwd_txfm_shift_ls[TX_32X32];
- const int txw_idx = get_txw_idx(TX_32X32);
- const int txh_idx = get_txh_idx(TX_32X32);
- const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
- const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
- const int width = 32;
- const int height = 32;
- const transform_1d_sse2 col_txfm = col_txfm8x32_arr[tx_type];
- const transform_1d_sse2 row_txfm = row_txfm8x32_arr[tx_type];
-
- if (col_txfm != NULL && row_txfm != NULL) {
- int ud_flip, lr_flip;
- get_flip_cfg(tx_type, &ud_flip, &lr_flip);
-
- for (int i = 0; i < 4; i++) {
- if (ud_flip) {
- load_buffer_16bit_to_16bit_flip(input + 8 * i, stride, buf0, height);
- } else {
- load_buffer_16bit_to_16bit(input + 8 * i, stride, buf0, height);
- }
- round_shift_16bit(buf0, height, shift[0]);
- col_txfm(buf0, buf0, cos_bit_col);
- round_shift_16bit(buf0, height, shift[1]);
- transpose_16bit_8x8(buf0 + 0 * 8, buf1 + 0 * width + 8 * i);
- transpose_16bit_8x8(buf0 + 1 * 8, buf1 + 1 * width + 8 * i);
- transpose_16bit_8x8(buf0 + 2 * 8, buf1 + 2 * width + 8 * i);
- transpose_16bit_8x8(buf0 + 3 * 8, buf1 + 3 * width + 8 * i);
- }
-
- for (int i = 0; i < 4; i++) {
- __m128i *buf;
- if (lr_flip) {
- buf = buf0;
- flip_buf_sse2(buf1 + width * i, buf, width);
- } else {
- buf = buf1 + width * i;
- }
- row_txfm(buf, buf, cos_bit_row);
- round_shift_16bit(buf, width, shift[2]);
- transpose_16bit_8x8(buf, buf);
- store_buffer_16bit_to_32bit_w8(buf, output + 8 * width * i, width, 8);
- transpose_16bit_8x8(buf + 8, buf + 8);
- store_buffer_16bit_to_32bit_w8(buf + 8, output + 8 * width * i + 8, width,
- 8);
- transpose_16bit_8x8(buf + 16, buf + 16);
- store_buffer_16bit_to_32bit_w8(buf + 16, output + 8 * width * i + 16,
- width, 8);
- transpose_16bit_8x8(buf + 24, buf + 24);
- store_buffer_16bit_to_32bit_w8(buf + 24, output + 8 * width * i + 24,
- width, 8);
- }
- } else {
- av1_fwd_txfm2d_32x32_c(input, output, stride, tx_type, bd);
- }
-}
-
-void av1_lowbd_fwd_txfm2d_64x16_sse2(const int16_t *input, int32_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- (void)bd;
- (void)tx_type;
- assert(tx_type == DCT_DCT);
- const TX_SIZE tx_size = TX_64X16;
- __m128i buf0[64], buf1[128];
- const int8_t *shift = fwd_txfm_shift_ls[tx_size];
- const int txw_idx = get_txw_idx(tx_size);
- const int txh_idx = get_txh_idx(tx_size);
- const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
- const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
- const int width = tx_size_wide[tx_size];
- const int height = tx_size_high[tx_size];
- const transform_1d_sse2 col_txfm = fdct8x16_new_sse2;
- const transform_1d_sse2 row_txfm = fdct8x64_new_sse2;
- const int width_div8 = (width >> 3);
- const int height_div8 = (height >> 3);
-
- for (int i = 0; i < width_div8; i++) {
- load_buffer_16bit_to_16bit(input + 8 * i, stride, buf0, height);
- round_shift_16bit(buf0, height, shift[0]);
- col_txfm(buf0, buf0, cos_bit_col);
- round_shift_16bit(buf0, height, shift[1]);
- for (int j = 0; j < height_div8; ++j) {
- transpose_16bit_8x8(buf0 + j * 8, buf1 + j * width + 8 * i);
- }
- }
-
- for (int i = 0; i < height_div8; i++) {
- __m128i *buf = buf1 + width * i;
- row_txfm(buf, buf, cos_bit_row);
- round_shift_16bit(buf, width, shift[2]);
- int32_t *output8 = output + 8 * 32 * i;
- for (int j = 0; j < 4; ++j) {
- __m128i *buf8 = buf + 8 * j;
- transpose_16bit_8x8(buf8, buf8);
- store_buffer_16bit_to_32bit_w8(buf8, output8 + 8 * j, 32, 8);
- }
- }
-}
-
-void av1_lowbd_fwd_txfm2d_16x64_sse2(const int16_t *input, int32_t *output,
- int stride, TX_TYPE tx_type, int bd) {
- (void)bd;
- (void)tx_type;
- assert(tx_type == DCT_DCT);
- const TX_SIZE tx_size = TX_16X64;
- __m128i buf0[64], buf1[128];
- const int8_t *shift = fwd_txfm_shift_ls[tx_size];
- const int txw_idx = get_txw_idx(tx_size);
- const int txh_idx = get_txh_idx(tx_size);
- const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
- const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
- const int width = tx_size_wide[tx_size];
- const int height = tx_size_high[tx_size];
- const transform_1d_sse2 col_txfm = fdct8x64_new_sse2;
- const transform_1d_sse2 row_txfm = fdct8x16_new_sse2;
- const int width_div8 = (width >> 3);
- const int height_div8 = (height >> 3);
-
- for (int i = 0; i < width_div8; i++) {
- load_buffer_16bit_to_16bit(input + 8 * i, stride, buf0, height);
- round_shift_16bit(buf0, height, shift[0]);
- col_txfm(buf0, buf0, cos_bit_col);
- round_shift_16bit(buf0, height, shift[1]);
- for (int j = 0; j < height_div8; ++j) {
- transpose_16bit_8x8(buf0 + j * 8, buf1 + j * width + 8 * i);
- }
- }
-
- for (int i = 0; i < AOMMIN(4, height_div8); i++) {
- __m128i *buf = buf1 + width * i;
- row_txfm(buf, buf, cos_bit_row);
- round_shift_16bit(buf, width, shift[2]);
- int32_t *output8 = output + 8 * width * i;
- for (int j = 0; j < width_div8; ++j) {
- __m128i *buf8 = buf + 8 * j;
- transpose_16bit_8x8(buf8, buf8);
- store_buffer_16bit_to_32bit_w8(buf8, output8 + 8 * j, width, 8);
- }
- }
- // Zero out the bottom 16x32 area.
- memset(output + 16 * 32, 0, 16 * 32 * sizeof(*output));
-}
-
-static FwdTxfm2dFunc fwd_txfm2d_func_ls[TX_SIZES_ALL] = {
- av1_lowbd_fwd_txfm2d_4x4_sse2, // 4x4 transform
- av1_lowbd_fwd_txfm2d_8x8_sse2, // 8x8 transform
- av1_lowbd_fwd_txfm2d_16x16_sse2, // 16x16 transform
- av1_lowbd_fwd_txfm2d_32x32_sse2, // 32x32 transform
- NULL, // 64x64 transform
- av1_lowbd_fwd_txfm2d_4x8_sse2, // 4x8 transform
- av1_lowbd_fwd_txfm2d_8x4_sse2, // 8x4 transform
- av1_lowbd_fwd_txfm2d_8x16_sse2, // 8x16 transform
- av1_lowbd_fwd_txfm2d_16x8_sse2, // 16x8 transform
- av1_lowbd_fwd_txfm2d_16x32_sse2, // 16x32 transform
- av1_lowbd_fwd_txfm2d_32x16_sse2, // 32x16 transform
- NULL, // 32x64 transform
- NULL, // 64x32 transform
- av1_lowbd_fwd_txfm2d_4x16_sse2, // 4x16 transform
- av1_lowbd_fwd_txfm2d_16x4_sse2, // 16x4 transform
- av1_lowbd_fwd_txfm2d_8x32_sse2, // 8x32 transform
- av1_lowbd_fwd_txfm2d_32x8_sse2, // 32x8 transform
- av1_lowbd_fwd_txfm2d_16x64_sse2, // 16x64 transform
- av1_lowbd_fwd_txfm2d_64x16_sse2, // 64x16 transform
-};
-
-void av1_lowbd_fwd_txfm_sse2(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TxfmParam *txfm_param) {
- FwdTxfm2dFunc fwd_txfm2d_func = fwd_txfm2d_func_ls[txfm_param->tx_size];
-
- if ((fwd_txfm2d_func == NULL) ||
- (txfm_param->lossless && txfm_param->tx_size == TX_4X4))
- av1_lowbd_fwd_txfm_c(src_diff, coeff, diff_stride, txfm_param);
- else
- fwd_txfm2d_func(src_diff, coeff, diff_stride, txfm_param->tx_type,
- txfm_param->bd);
-}
diff --git a/third_party/aom/av1/encoder/x86/av1_fwd_txfm_sse2.h b/third_party/aom/av1/encoder/x86/av1_fwd_txfm_sse2.h
deleted file mode 100644
index 99a6b9082..000000000
--- a/third_party/aom/av1/encoder/x86/av1_fwd_txfm_sse2.h
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#ifndef AOM_AV1_ENCODER_X86_AV1_FWD_TXFM_SSE2_H_
-#define AOM_AV1_ENCODER_X86_AV1_FWD_TXFM_SSE2_H_
-
-#include <immintrin.h>
-
-#include "config/aom_config.h"
-#include "config/av1_rtcd.h"
-
-#include "aom/aom_integer.h"
-#include "aom_dsp/x86/transpose_sse2.h"
-#include "aom_dsp/x86/txfm_common_sse2.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void fdct8x32_new_sse2(const __m128i *input, __m128i *output, int8_t cos_bit);
-void fdct8x64_new_sse2(const __m128i *input, __m128i *output, int8_t cos_bit);
-
-static INLINE void fidentity4x4_new_sse2(const __m128i *const input,
- __m128i *const output,
- const int8_t cos_bit) {
- (void)cos_bit;
- const __m128i one = _mm_set1_epi16(1);
-
- for (int i = 0; i < 4; ++i) {
- const __m128i a = _mm_unpacklo_epi16(input[i], one);
- const __m128i b = scale_round_sse2(a, NewSqrt2);
- output[i] = _mm_packs_epi32(b, b);
- }
-}
-
-static INLINE void fidentity8x4_new_sse2(const __m128i *const input,
- __m128i *const output,
- const int8_t cos_bit) {
- (void)cos_bit;
- const __m128i one = _mm_set1_epi16(1);
-
- for (int i = 0; i < 4; ++i) {
- const __m128i a_lo = _mm_unpacklo_epi16(input[i], one);
- const __m128i a_hi = _mm_unpackhi_epi16(input[i], one);
- const __m128i b_lo = scale_round_sse2(a_lo, NewSqrt2);
- const __m128i b_hi = scale_round_sse2(a_hi, NewSqrt2);
- output[i] = _mm_packs_epi32(b_lo, b_hi);
- }
-}
-
-static INLINE void fidentity8x8_new_sse2(const __m128i *input, __m128i *output,
- int8_t cos_bit) {
- (void)cos_bit;
-
- output[0] = _mm_adds_epi16(input[0], input[0]);
- output[1] = _mm_adds_epi16(input[1], input[1]);
- output[2] = _mm_adds_epi16(input[2], input[2]);
- output[3] = _mm_adds_epi16(input[3], input[3]);
- output[4] = _mm_adds_epi16(input[4], input[4]);
- output[5] = _mm_adds_epi16(input[5], input[5]);
- output[6] = _mm_adds_epi16(input[6], input[6]);
- output[7] = _mm_adds_epi16(input[7], input[7]);
-}
-
-static INLINE void fidentity8x16_new_sse2(const __m128i *input, __m128i *output,
- int8_t cos_bit) {
- (void)cos_bit;
- const __m128i one = _mm_set1_epi16(1);
-
- for (int i = 0; i < 16; ++i) {
- const __m128i a_lo = _mm_unpacklo_epi16(input[i], one);
- const __m128i a_hi = _mm_unpackhi_epi16(input[i], one);
- const __m128i b_lo = scale_round_sse2(a_lo, 2 * NewSqrt2);
- const __m128i b_hi = scale_round_sse2(a_hi, 2 * NewSqrt2);
- output[i] = _mm_packs_epi32(b_lo, b_hi);
- }
-}
-
-static INLINE void fidentity8x32_new_sse2(const __m128i *input, __m128i *output,
- int8_t cos_bit) {
- (void)cos_bit;
- for (int i = 0; i < 32; ++i) {
- output[i] = _mm_slli_epi16(input[i], 2);
- }
-}
-
-static const transform_1d_sse2 col_txfm8x32_arr[TX_TYPES] = {
- fdct8x32_new_sse2, // DCT_DCT
- NULL, // ADST_DCT
- NULL, // DCT_ADST
- NULL, // ADST_ADST
- NULL, // FLIPADST_DCT
- NULL, // DCT_FLIPADST
- NULL, // FLIPADST_FLIPADST
- NULL, // ADST_FLIPADST
- NULL, // FLIPADST_ADST
- fidentity8x32_new_sse2, // IDTX
- fdct8x32_new_sse2, // V_DCT
- fidentity8x32_new_sse2, // H_DCT
- NULL, // V_ADST
- NULL, // H_ADST
- NULL, // V_FLIPADST
- NULL // H_FLIPADST
-};
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // AOM_AV1_ENCODER_X86_AV1_FWD_TXFM_SSE2_H_
diff --git a/third_party/aom/av1/encoder/x86/av1_highbd_quantize_avx2.c b/third_party/aom/av1/encoder/x86/av1_highbd_quantize_avx2.c
deleted file mode 100644
index b58911fcb..000000000
--- a/third_party/aom/av1/encoder/x86/av1_highbd_quantize_avx2.c
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <immintrin.h>
-
-#include "config/av1_rtcd.h"
-
-#include "aom/aom_integer.h"
-#include "aom_dsp/aom_dsp_common.h"
-
-static INLINE void init_one_qp(const __m128i *p, __m256i *qp) {
- const __m128i zero = _mm_setzero_si128();
- const __m128i dc = _mm_unpacklo_epi16(*p, zero);
- const __m128i ac = _mm_unpackhi_epi16(*p, zero);
- *qp = _mm256_insertf128_si256(_mm256_castsi128_si256(dc), ac, 1);
-}
-
-static INLINE void update_qp(__m256i *qp) {
- qp[0] = _mm256_permute2x128_si256(qp[0], qp[0], 0x11);
- qp[1] = _mm256_permute2x128_si256(qp[1], qp[1], 0x11);
- qp[2] = _mm256_permute2x128_si256(qp[2], qp[2], 0x11);
-}
-
-static INLINE void init_qp(const int16_t *round_ptr, const int16_t *quant_ptr,
- const int16_t *dequant_ptr, int log_scale,
- __m256i *qp) {
- __m128i round = _mm_loadu_si128((const __m128i *)round_ptr);
- if (log_scale) {
- const __m128i round_scale = _mm_set1_epi16(1 << (15 - log_scale));
- round = _mm_mulhrs_epi16(round, round_scale);
- }
- const __m128i quant = _mm_loadu_si128((const __m128i *)quant_ptr);
- const __m128i dequant = _mm_loadu_si128((const __m128i *)dequant_ptr);
-
- init_one_qp(&round, &qp[0]);
- init_one_qp(&quant, &qp[1]);
- init_one_qp(&dequant, &qp[2]);
-}
-
-static INLINE void quantize(const __m256i *qp, __m256i *c,
- const int16_t *iscan_ptr, int log_scale,
- tran_low_t *qcoeff, tran_low_t *dqcoeff,
- __m256i *eob) {
- const __m256i abs_coeff = _mm256_abs_epi32(*c);
- __m256i q = _mm256_add_epi32(abs_coeff, qp[0]);
-
- __m256i q_lo = _mm256_mul_epi32(q, qp[1]);
- __m256i q_hi = _mm256_srli_epi64(q, 32);
- const __m256i qp_hi = _mm256_srli_epi64(qp[1], 32);
- q_hi = _mm256_mul_epi32(q_hi, qp_hi);
- q_lo = _mm256_srli_epi64(q_lo, 16 - log_scale);
- q_hi = _mm256_srli_epi64(q_hi, 16 - log_scale);
- q_hi = _mm256_slli_epi64(q_hi, 32);
- q = _mm256_or_si256(q_lo, q_hi);
- const __m256i abs_s = _mm256_slli_epi32(abs_coeff, 1 + log_scale);
- const __m256i mask = _mm256_cmpgt_epi32(qp[2], abs_s);
- q = _mm256_andnot_si256(mask, q);
-
- __m256i dq = _mm256_mullo_epi32(q, qp[2]);
- dq = _mm256_srai_epi32(dq, log_scale);
- q = _mm256_sign_epi32(q, *c);
- dq = _mm256_sign_epi32(dq, *c);
-
- _mm256_storeu_si256((__m256i *)qcoeff, q);
- _mm256_storeu_si256((__m256i *)dqcoeff, dq);
-
- const __m128i isc = _mm_loadu_si128((const __m128i *)iscan_ptr);
- const __m128i zr = _mm_setzero_si128();
- const __m128i lo = _mm_unpacklo_epi16(isc, zr);
- const __m128i hi = _mm_unpackhi_epi16(isc, zr);
- const __m256i iscan =
- _mm256_insertf128_si256(_mm256_castsi128_si256(lo), hi, 1);
-
- const __m256i zero = _mm256_setzero_si256();
- const __m256i zc = _mm256_cmpeq_epi32(dq, zero);
- const __m256i nz = _mm256_cmpeq_epi32(zc, zero);
- __m256i cur_eob = _mm256_sub_epi32(iscan, nz);
- cur_eob = _mm256_and_si256(cur_eob, nz);
- *eob = _mm256_max_epi32(cur_eob, *eob);
-}
-
-void av1_highbd_quantize_fp_avx2(
- const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
- const int16_t *round_ptr, const int16_t *quant_ptr,
- const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
- tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan, int log_scale) {
- (void)scan;
- (void)zbin_ptr;
- (void)quant_shift_ptr;
- const unsigned int step = 8;
- __m256i qp[3], coeff;
-
- init_qp(round_ptr, quant_ptr, dequant_ptr, log_scale, qp);
- coeff = _mm256_loadu_si256((const __m256i *)coeff_ptr);
-
- __m256i eob = _mm256_setzero_si256();
- quantize(qp, &coeff, iscan, log_scale, qcoeff_ptr, dqcoeff_ptr, &eob);
-
- coeff_ptr += step;
- qcoeff_ptr += step;
- dqcoeff_ptr += step;
- iscan += step;
- n_coeffs -= step;
-
- update_qp(qp);
- while (n_coeffs > 0) {
- coeff = _mm256_loadu_si256((const __m256i *)coeff_ptr);
- quantize(qp, &coeff, iscan, log_scale, qcoeff_ptr, dqcoeff_ptr, &eob);
-
- coeff_ptr += step;
- qcoeff_ptr += step;
- dqcoeff_ptr += step;
- iscan += step;
- n_coeffs -= step;
- }
- {
- __m256i eob_s;
- eob_s = _mm256_shuffle_epi32(eob, 0xe);
- eob = _mm256_max_epi16(eob, eob_s);
- eob_s = _mm256_shufflelo_epi16(eob, 0xe);
- eob = _mm256_max_epi16(eob, eob_s);
- eob_s = _mm256_shufflelo_epi16(eob, 1);
- eob = _mm256_max_epi16(eob, eob_s);
- const __m128i final_eob = _mm_max_epi16(_mm256_castsi256_si128(eob),
- _mm256_extractf128_si256(eob, 1));
- *eob_ptr = _mm_extract_epi16(final_eob, 0);
- }
-}
diff --git a/third_party/aom/av1/encoder/x86/av1_highbd_quantize_sse4.c b/third_party/aom/av1/encoder/x86/av1_highbd_quantize_sse4.c
deleted file mode 100644
index 40b3b460b..000000000
--- a/third_party/aom/av1/encoder/x86/av1_highbd_quantize_sse4.c
+++ /dev/null
@@ -1,195 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <smmintrin.h>
-#include <stdint.h>
-
-#include "config/av1_rtcd.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/x86/synonyms.h"
-
-// Coefficient quantization phase 1
-// param[0-2] : rounding/quan/dequan constants
-static INLINE void quantize_coeff_phase1(__m128i *coeff, const __m128i *param,
- const int shift, const int scale,
- __m128i *qcoeff, __m128i *dquan,
- __m128i *sign) {
- const __m128i zero = _mm_setzero_si128();
- const __m128i one = _mm_set1_epi32(1);
-
- *sign = _mm_cmplt_epi32(*coeff, zero);
- *sign = _mm_or_si128(*sign, one);
- *coeff = _mm_abs_epi32(*coeff);
-
- qcoeff[0] = _mm_add_epi32(*coeff, param[0]);
- qcoeff[1] = _mm_unpackhi_epi32(qcoeff[0], zero);
- qcoeff[0] = _mm_unpacklo_epi32(qcoeff[0], zero);
-
- qcoeff[0] = _mm_mul_epi32(qcoeff[0], param[1]);
- qcoeff[0] = _mm_srli_epi64(qcoeff[0], shift);
- dquan[0] = _mm_mul_epi32(qcoeff[0], param[2]);
- dquan[0] = _mm_srli_epi64(dquan[0], scale);
- const __m128i abs_s = _mm_slli_epi32(*coeff, 1 + scale);
- qcoeff[2] = _mm_cmplt_epi32(abs_s, param[3]);
-}
-
-// Coefficient quantization phase 2
-static INLINE void quantize_coeff_phase2(__m128i *qcoeff, __m128i *dquan,
- const __m128i *sign,
- const __m128i *param, const int shift,
- const int scale, tran_low_t *qAddr,
- tran_low_t *dqAddr) {
- __m128i mask0L = _mm_set_epi32(-1, -1, 0, 0);
- __m128i mask0H = _mm_set_epi32(0, 0, -1, -1);
-
- qcoeff[1] = _mm_mul_epi32(qcoeff[1], param[1]);
- qcoeff[1] = _mm_srli_epi64(qcoeff[1], shift);
- dquan[1] = _mm_mul_epi32(qcoeff[1], param[2]);
- dquan[1] = _mm_srli_epi64(dquan[1], scale);
-
- // combine L&H
- qcoeff[0] = _mm_shuffle_epi32(qcoeff[0], 0xd8);
- qcoeff[1] = _mm_shuffle_epi32(qcoeff[1], 0x8d);
-
- qcoeff[0] = _mm_and_si128(qcoeff[0], mask0H);
- qcoeff[1] = _mm_and_si128(qcoeff[1], mask0L);
-
- dquan[0] = _mm_shuffle_epi32(dquan[0], 0xd8);
- dquan[1] = _mm_shuffle_epi32(dquan[1], 0x8d);
-
- dquan[0] = _mm_and_si128(dquan[0], mask0H);
- dquan[1] = _mm_and_si128(dquan[1], mask0L);
-
- qcoeff[0] = _mm_or_si128(qcoeff[0], qcoeff[1]);
- dquan[0] = _mm_or_si128(dquan[0], dquan[1]);
-
- qcoeff[0] = _mm_sign_epi32(qcoeff[0], *sign);
- dquan[0] = _mm_sign_epi32(dquan[0], *sign);
- qcoeff[0] = _mm_andnot_si128(qcoeff[2], qcoeff[0]);
- dquan[0] = _mm_andnot_si128(qcoeff[2], dquan[0]);
- _mm_storeu_si128((__m128i *)qAddr, qcoeff[0]);
- _mm_storeu_si128((__m128i *)dqAddr, dquan[0]);
-}
-
-static INLINE void find_eob(tran_low_t *qcoeff_ptr, const int16_t *iscan,
- __m128i *eob) {
- const __m128i zero = _mm_setzero_si128();
- __m128i mask, iscanIdx;
- const __m128i q0 = _mm_loadu_si128((__m128i const *)qcoeff_ptr);
- const __m128i q1 = _mm_loadu_si128((__m128i const *)(qcoeff_ptr + 4));
- __m128i nz_flag0 = _mm_cmpeq_epi32(q0, zero);
- __m128i nz_flag1 = _mm_cmpeq_epi32(q1, zero);
-
- nz_flag0 = _mm_cmpeq_epi32(nz_flag0, zero);
- nz_flag1 = _mm_cmpeq_epi32(nz_flag1, zero);
-
- mask = _mm_packs_epi32(nz_flag0, nz_flag1);
- iscanIdx = _mm_loadu_si128((__m128i const *)iscan);
- iscanIdx = _mm_sub_epi16(iscanIdx, mask);
- iscanIdx = _mm_and_si128(iscanIdx, mask);
- *eob = _mm_max_epi16(*eob, iscanIdx);
-}
-
-static INLINE uint16_t get_accumulated_eob(__m128i *eob) {
- __m128i eob_shuffled;
- uint16_t eobValue;
- eob_shuffled = _mm_shuffle_epi32(*eob, 0xe);
- *eob = _mm_max_epi16(*eob, eob_shuffled);
- eob_shuffled = _mm_shufflelo_epi16(*eob, 0xe);
- *eob = _mm_max_epi16(*eob, eob_shuffled);
- eob_shuffled = _mm_shufflelo_epi16(*eob, 0x1);
- *eob = _mm_max_epi16(*eob, eob_shuffled);
- eobValue = _mm_extract_epi16(*eob, 0);
- return eobValue;
-}
-
-void av1_highbd_quantize_fp_sse4_1(
- const tran_low_t *coeff_ptr, intptr_t count, const int16_t *zbin_ptr,
- const int16_t *round_ptr, const int16_t *quant_ptr,
- const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
- tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan, int log_scale) {
- __m128i coeff[2], qcoeff[3], dequant[2], qparam[4], coeff_sign;
- __m128i eob = _mm_setzero_si128();
- const tran_low_t *src = coeff_ptr;
- tran_low_t *quanAddr = qcoeff_ptr;
- tran_low_t *dquanAddr = dqcoeff_ptr;
- const int shift = 16 - log_scale;
- const int coeff_stride = 4;
- const int quan_stride = coeff_stride;
- (void)zbin_ptr;
- (void)quant_shift_ptr;
- (void)scan;
-
- memset(quanAddr, 0, count * sizeof(quanAddr[0]));
- memset(dquanAddr, 0, count * sizeof(dquanAddr[0]));
-
- coeff[0] = _mm_loadu_si128((__m128i const *)src);
- const int round1 = ROUND_POWER_OF_TWO(round_ptr[1], log_scale);
- const int round0 = ROUND_POWER_OF_TWO(round_ptr[0], log_scale);
-
- qparam[0] = _mm_set_epi32(round1, round1, round1, round0);
- qparam[1] = xx_set_64_from_32i(quant_ptr[1], quant_ptr[0]);
- qparam[2] = xx_set_64_from_32i(dequant_ptr[1], dequant_ptr[0]);
- qparam[3] = _mm_set_epi32(dequant_ptr[1], dequant_ptr[1], dequant_ptr[1],
- dequant_ptr[0]);
-
- // DC and first 3 AC
- quantize_coeff_phase1(&coeff[0], qparam, shift, log_scale, qcoeff, dequant,
- &coeff_sign);
-
- // update round/quan/dquan for AC
- qparam[0] = _mm_unpackhi_epi64(qparam[0], qparam[0]);
- qparam[1] = xx_set1_64_from_32i(quant_ptr[1]);
- qparam[2] = xx_set1_64_from_32i(dequant_ptr[1]);
- qparam[3] = _mm_set1_epi32(dequant_ptr[1]);
- quantize_coeff_phase2(qcoeff, dequant, &coeff_sign, qparam, shift, log_scale,
- quanAddr, dquanAddr);
-
- // next 4 AC
- coeff[1] = _mm_loadu_si128((__m128i const *)(src + coeff_stride));
- quantize_coeff_phase1(&coeff[1], qparam, shift, log_scale, qcoeff, dequant,
- &coeff_sign);
- quantize_coeff_phase2(qcoeff, dequant, &coeff_sign, qparam, shift, log_scale,
- quanAddr + quan_stride, dquanAddr + quan_stride);
-
- find_eob(quanAddr, iscan, &eob);
-
- count -= 8;
-
- // loop for the rest of AC
- while (count > 0) {
- src += coeff_stride << 1;
- quanAddr += quan_stride << 1;
- dquanAddr += quan_stride << 1;
- iscan += quan_stride << 1;
-
- coeff[0] = _mm_loadu_si128((__m128i const *)src);
- coeff[1] = _mm_loadu_si128((__m128i const *)(src + coeff_stride));
-
- quantize_coeff_phase1(&coeff[0], qparam, shift, log_scale, qcoeff, dequant,
- &coeff_sign);
- quantize_coeff_phase2(qcoeff, dequant, &coeff_sign, qparam, shift,
- log_scale, quanAddr, dquanAddr);
-
- quantize_coeff_phase1(&coeff[1], qparam, shift, log_scale, qcoeff, dequant,
- &coeff_sign);
- quantize_coeff_phase2(qcoeff, dequant, &coeff_sign, qparam, shift,
- log_scale, quanAddr + quan_stride,
- dquanAddr + quan_stride);
-
- find_eob(quanAddr, iscan, &eob);
-
- count -= 8;
- }
- *eob_ptr = get_accumulated_eob(&eob);
-}
diff --git a/third_party/aom/av1/encoder/x86/av1_quantize_avx2.c b/third_party/aom/av1/encoder/x86/av1_quantize_avx2.c
deleted file mode 100644
index df22aaba7..000000000
--- a/third_party/aom/av1/encoder/x86/av1_quantize_avx2.c
+++ /dev/null
@@ -1,330 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <immintrin.h>
-
-#include "config/av1_rtcd.h"
-
-#include "aom/aom_integer.h"
-#include "aom_dsp/aom_dsp_common.h"
-
-static INLINE void read_coeff(const tran_low_t *coeff, __m256i *c) {
- if (sizeof(tran_low_t) == 4) {
- const __m256i x0 = _mm256_loadu_si256((const __m256i *)coeff);
- const __m256i x1 = _mm256_loadu_si256((const __m256i *)coeff + 1);
- *c = _mm256_packs_epi32(x0, x1);
- *c = _mm256_permute4x64_epi64(*c, 0xD8);
- } else {
- *c = _mm256_loadu_si256((const __m256i *)coeff);
- }
-}
-
-static INLINE void write_zero(tran_low_t *qcoeff) {
- const __m256i zero = _mm256_setzero_si256();
- if (sizeof(tran_low_t) == 4) {
- _mm256_storeu_si256((__m256i *)qcoeff, zero);
- _mm256_storeu_si256((__m256i *)qcoeff + 1, zero);
- } else {
- _mm256_storeu_si256((__m256i *)qcoeff, zero);
- }
-}
-
-static INLINE void init_one_qp(const __m128i *p, __m256i *qp) {
- const __m128i ac = _mm_unpackhi_epi64(*p, *p);
- *qp = _mm256_insertf128_si256(_mm256_castsi128_si256(*p), ac, 1);
-}
-
-static INLINE void init_qp(const int16_t *round_ptr, const int16_t *quant_ptr,
- const int16_t *dequant_ptr, int log_scale,
- __m256i *thr, __m256i *qp) {
- __m128i round = _mm_loadu_si128((const __m128i *)round_ptr);
- const __m128i quant = _mm_loadu_si128((const __m128i *)quant_ptr);
- const __m128i dequant = _mm_loadu_si128((const __m128i *)dequant_ptr);
-
- if (log_scale > 0) {
- const __m128i rnd = _mm_set1_epi16((int16_t)1 << (log_scale - 1));
- round = _mm_add_epi16(round, rnd);
- round = _mm_srai_epi16(round, log_scale);
- }
-
- init_one_qp(&round, &qp[0]);
- init_one_qp(&quant, &qp[1]);
-
- if (log_scale == 1) {
- qp[1] = _mm256_slli_epi16(qp[1], log_scale);
- }
-
- init_one_qp(&dequant, &qp[2]);
- *thr = _mm256_srai_epi16(qp[2], 1 + log_scale);
-}
-
-static INLINE void update_qp(int log_scale, __m256i *thr, __m256i *qp) {
- qp[0] = _mm256_permute2x128_si256(qp[0], qp[0], 0x11);
- qp[1] = _mm256_permute2x128_si256(qp[1], qp[1], 0x11);
- qp[2] = _mm256_permute2x128_si256(qp[2], qp[2], 0x11);
- *thr = _mm256_srai_epi16(qp[2], 1 + log_scale);
-}
-
-#define store_quan(q, addr) \
- do { \
- __m256i sign_bits = _mm256_srai_epi16(q, 15); \
- __m256i y0 = _mm256_unpacklo_epi16(q, sign_bits); \
- __m256i y1 = _mm256_unpackhi_epi16(q, sign_bits); \
- __m256i x0 = _mm256_permute2x128_si256(y0, y1, 0x20); \
- __m256i x1 = _mm256_permute2x128_si256(y0, y1, 0x31); \
- _mm256_storeu_si256((__m256i *)addr, x0); \
- _mm256_storeu_si256((__m256i *)addr + 1, x1); \
- } while (0)
-
-#define store_two_quan(q, addr1, dq, addr2) \
- do { \
- if (sizeof(tran_low_t) == 4) { \
- store_quan(q, addr1); \
- store_quan(dq, addr2); \
- } else { \
- _mm256_storeu_si256((__m256i *)addr1, q); \
- _mm256_storeu_si256((__m256i *)addr2, dq); \
- } \
- } while (0)
-
-static INLINE uint16_t quant_gather_eob(__m256i eob) {
- const __m128i eob_lo = _mm256_castsi256_si128(eob);
- const __m128i eob_hi = _mm256_extractf128_si256(eob, 1);
- __m128i eob_s = _mm_max_epi16(eob_lo, eob_hi);
- eob_s = _mm_subs_epu16(_mm_set1_epi16(INT16_MAX), eob_s);
- eob_s = _mm_minpos_epu16(eob_s);
- return INT16_MAX - _mm_extract_epi16(eob_s, 0);
-}
-
-static INLINE void quantize(const __m256i *thr, const __m256i *qp, __m256i *c,
- const int16_t *iscan_ptr, tran_low_t *qcoeff,
- tran_low_t *dqcoeff, __m256i *eob) {
- const __m256i abs_coeff = _mm256_abs_epi16(*c);
- __m256i mask = _mm256_cmpgt_epi16(abs_coeff, *thr);
- mask = _mm256_or_si256(mask, _mm256_cmpeq_epi16(abs_coeff, *thr));
- const int nzflag = _mm256_movemask_epi8(mask);
-
- if (nzflag) {
- __m256i q = _mm256_adds_epi16(abs_coeff, qp[0]);
- q = _mm256_mulhi_epi16(q, qp[1]);
- q = _mm256_sign_epi16(q, *c);
- const __m256i dq = _mm256_mullo_epi16(q, qp[2]);
-
- store_two_quan(q, qcoeff, dq, dqcoeff);
- const __m256i zero = _mm256_setzero_si256();
- const __m256i iscan = _mm256_loadu_si256((const __m256i *)iscan_ptr);
- const __m256i zero_coeff = _mm256_cmpeq_epi16(dq, zero);
- const __m256i nzero_coeff = _mm256_cmpeq_epi16(zero_coeff, zero);
- __m256i cur_eob = _mm256_sub_epi16(iscan, nzero_coeff);
- cur_eob = _mm256_and_si256(cur_eob, nzero_coeff);
- *eob = _mm256_max_epi16(*eob, cur_eob);
- } else {
- write_zero(qcoeff);
- write_zero(dqcoeff);
- }
-}
-
-void av1_quantize_fp_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
- const int16_t *zbin_ptr, const int16_t *round_ptr,
- const int16_t *quant_ptr,
- const int16_t *quant_shift_ptr,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan_ptr, const int16_t *iscan_ptr) {
- (void)scan_ptr;
- (void)zbin_ptr;
- (void)quant_shift_ptr;
- const unsigned int step = 16;
-
- __m256i qp[3];
- __m256i coeff, thr;
- const int log_scale = 0;
-
- init_qp(round_ptr, quant_ptr, dequant_ptr, log_scale, &thr, qp);
- read_coeff(coeff_ptr, &coeff);
-
- __m256i eob = _mm256_setzero_si256();
- quantize(&thr, qp, &coeff, iscan_ptr, qcoeff_ptr, dqcoeff_ptr, &eob);
-
- coeff_ptr += step;
- qcoeff_ptr += step;
- dqcoeff_ptr += step;
- iscan_ptr += step;
- n_coeffs -= step;
-
- update_qp(log_scale, &thr, qp);
-
- while (n_coeffs > 0) {
- read_coeff(coeff_ptr, &coeff);
- quantize(&thr, qp, &coeff, iscan_ptr, qcoeff_ptr, dqcoeff_ptr, &eob);
-
- coeff_ptr += step;
- qcoeff_ptr += step;
- dqcoeff_ptr += step;
- iscan_ptr += step;
- n_coeffs -= step;
- }
- *eob_ptr = quant_gather_eob(eob);
-}
-
-static INLINE void quantize_32x32(const __m256i *thr, const __m256i *qp,
- __m256i *c, const int16_t *iscan_ptr,
- tran_low_t *qcoeff, tran_low_t *dqcoeff,
- __m256i *eob) {
- const __m256i abs_coeff = _mm256_abs_epi16(*c);
- __m256i mask = _mm256_cmpgt_epi16(abs_coeff, *thr);
- mask = _mm256_or_si256(mask, _mm256_cmpeq_epi16(abs_coeff, *thr));
- const int nzflag = _mm256_movemask_epi8(mask);
-
- if (nzflag) {
- __m256i q = _mm256_adds_epi16(abs_coeff, qp[0]);
- q = _mm256_mulhi_epu16(q, qp[1]);
-
- __m256i dq = _mm256_mullo_epi16(q, qp[2]);
- dq = _mm256_srli_epi16(dq, 1);
-
- q = _mm256_sign_epi16(q, *c);
- dq = _mm256_sign_epi16(dq, *c);
-
- store_two_quan(q, qcoeff, dq, dqcoeff);
- const __m256i zero = _mm256_setzero_si256();
- const __m256i iscan = _mm256_loadu_si256((const __m256i *)iscan_ptr);
- const __m256i zero_coeff = _mm256_cmpeq_epi16(dq, zero);
- const __m256i nzero_coeff = _mm256_cmpeq_epi16(zero_coeff, zero);
- __m256i cur_eob = _mm256_sub_epi16(iscan, nzero_coeff);
- cur_eob = _mm256_and_si256(cur_eob, nzero_coeff);
- *eob = _mm256_max_epi16(*eob, cur_eob);
- } else {
- write_zero(qcoeff);
- write_zero(dqcoeff);
- }
-}
-
-void av1_quantize_fp_32x32_avx2(
- const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
- const int16_t *round_ptr, const int16_t *quant_ptr,
- const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
- tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan_ptr, const int16_t *iscan_ptr) {
- (void)scan_ptr;
- (void)zbin_ptr;
- (void)quant_shift_ptr;
- const unsigned int step = 16;
-
- __m256i qp[3];
- __m256i coeff, thr;
- const int log_scale = 1;
-
- init_qp(round_ptr, quant_ptr, dequant_ptr, log_scale, &thr, qp);
- read_coeff(coeff_ptr, &coeff);
-
- __m256i eob = _mm256_setzero_si256();
- quantize_32x32(&thr, qp, &coeff, iscan_ptr, qcoeff_ptr, dqcoeff_ptr, &eob);
-
- coeff_ptr += step;
- qcoeff_ptr += step;
- dqcoeff_ptr += step;
- iscan_ptr += step;
- n_coeffs -= step;
-
- update_qp(log_scale, &thr, qp);
-
- while (n_coeffs > 0) {
- read_coeff(coeff_ptr, &coeff);
- quantize_32x32(&thr, qp, &coeff, iscan_ptr, qcoeff_ptr, dqcoeff_ptr, &eob);
-
- coeff_ptr += step;
- qcoeff_ptr += step;
- dqcoeff_ptr += step;
- iscan_ptr += step;
- n_coeffs -= step;
- }
- *eob_ptr = quant_gather_eob(eob);
-}
-
-static INLINE void quantize_64x64(const __m256i *thr, const __m256i *qp,
- __m256i *c, const int16_t *iscan_ptr,
- tran_low_t *qcoeff, tran_low_t *dqcoeff,
- __m256i *eob) {
- const __m256i abs_coeff = _mm256_abs_epi16(*c);
- __m256i mask = _mm256_cmpgt_epi16(abs_coeff, *thr);
- mask = _mm256_or_si256(mask, _mm256_cmpeq_epi16(abs_coeff, *thr));
- const int nzflag = _mm256_movemask_epi8(mask);
-
- if (nzflag) {
- __m256i q = _mm256_adds_epi16(abs_coeff, qp[0]);
- __m256i qh = _mm256_mulhi_epi16(q, qp[1]);
- __m256i ql = _mm256_mullo_epi16(q, qp[1]);
- qh = _mm256_slli_epi16(qh, 2);
- ql = _mm256_srli_epi16(ql, 14);
- q = _mm256_or_si256(qh, ql);
- const __m256i dqh = _mm256_slli_epi16(_mm256_mulhi_epi16(q, qp[2]), 14);
- const __m256i dql = _mm256_srli_epi16(_mm256_mullo_epi16(q, qp[2]), 2);
- __m256i dq = _mm256_or_si256(dqh, dql);
-
- q = _mm256_sign_epi16(q, *c);
- dq = _mm256_sign_epi16(dq, *c);
-
- store_two_quan(q, qcoeff, dq, dqcoeff);
- const __m256i zero = _mm256_setzero_si256();
- const __m256i iscan = _mm256_loadu_si256((const __m256i *)iscan_ptr);
- const __m256i zero_coeff = _mm256_cmpeq_epi16(dq, zero);
- const __m256i nzero_coeff = _mm256_cmpeq_epi16(zero_coeff, zero);
- __m256i cur_eob = _mm256_sub_epi16(iscan, nzero_coeff);
- cur_eob = _mm256_and_si256(cur_eob, nzero_coeff);
- *eob = _mm256_max_epi16(*eob, cur_eob);
- } else {
- write_zero(qcoeff);
- write_zero(dqcoeff);
- }
-}
-
-void av1_quantize_fp_64x64_avx2(
- const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
- const int16_t *round_ptr, const int16_t *quant_ptr,
- const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
- tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan_ptr, const int16_t *iscan_ptr) {
- (void)scan_ptr;
- (void)zbin_ptr;
- (void)quant_shift_ptr;
- const unsigned int step = 16;
-
- __m256i qp[3];
- __m256i coeff, thr;
- const int log_scale = 2;
-
- init_qp(round_ptr, quant_ptr, dequant_ptr, log_scale, &thr, qp);
- read_coeff(coeff_ptr, &coeff);
-
- __m256i eob = _mm256_setzero_si256();
- quantize_64x64(&thr, qp, &coeff, iscan_ptr, qcoeff_ptr, dqcoeff_ptr, &eob);
-
- coeff_ptr += step;
- qcoeff_ptr += step;
- dqcoeff_ptr += step;
- iscan_ptr += step;
- n_coeffs -= step;
-
- update_qp(log_scale, &thr, qp);
-
- while (n_coeffs > 0) {
- read_coeff(coeff_ptr, &coeff);
- quantize_64x64(&thr, qp, &coeff, iscan_ptr, qcoeff_ptr, dqcoeff_ptr, &eob);
-
- coeff_ptr += step;
- qcoeff_ptr += step;
- dqcoeff_ptr += step;
- iscan_ptr += step;
- n_coeffs -= step;
- }
- *eob_ptr = quant_gather_eob(eob);
-}
diff --git a/third_party/aom/av1/encoder/x86/av1_quantize_sse2.c b/third_party/aom/av1/encoder/x86/av1_quantize_sse2.c
deleted file mode 100644
index b07e7717f..000000000
--- a/third_party/aom/av1/encoder/x86/av1_quantize_sse2.c
+++ /dev/null
@@ -1,189 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <emmintrin.h>
-#include <xmmintrin.h>
-
-#include "config/av1_rtcd.h"
-
-#include "aom/aom_integer.h"
-
-static INLINE void read_coeff(const tran_low_t *coeff, intptr_t offset,
- __m128i *c0, __m128i *c1) {
- const tran_low_t *addr = coeff + offset;
- if (sizeof(tran_low_t) == 4) {
- const __m128i x0 = _mm_load_si128((const __m128i *)addr);
- const __m128i x1 = _mm_load_si128((const __m128i *)addr + 1);
- const __m128i x2 = _mm_load_si128((const __m128i *)addr + 2);
- const __m128i x3 = _mm_load_si128((const __m128i *)addr + 3);
- *c0 = _mm_packs_epi32(x0, x1);
- *c1 = _mm_packs_epi32(x2, x3);
- } else {
- *c0 = _mm_load_si128((const __m128i *)addr);
- *c1 = _mm_load_si128((const __m128i *)addr + 1);
- }
-}
-
-static INLINE void write_qcoeff(const __m128i *qc0, const __m128i *qc1,
- tran_low_t *qcoeff, intptr_t offset) {
- tran_low_t *addr = qcoeff + offset;
- if (sizeof(tran_low_t) == 4) {
- const __m128i zero = _mm_setzero_si128();
- __m128i sign_bits = _mm_cmplt_epi16(*qc0, zero);
- __m128i y0 = _mm_unpacklo_epi16(*qc0, sign_bits);
- __m128i y1 = _mm_unpackhi_epi16(*qc0, sign_bits);
- _mm_store_si128((__m128i *)addr, y0);
- _mm_store_si128((__m128i *)addr + 1, y1);
-
- sign_bits = _mm_cmplt_epi16(*qc1, zero);
- y0 = _mm_unpacklo_epi16(*qc1, sign_bits);
- y1 = _mm_unpackhi_epi16(*qc1, sign_bits);
- _mm_store_si128((__m128i *)addr + 2, y0);
- _mm_store_si128((__m128i *)addr + 3, y1);
- } else {
- _mm_store_si128((__m128i *)addr, *qc0);
- _mm_store_si128((__m128i *)addr + 1, *qc1);
- }
-}
-
-static INLINE void write_zero(tran_low_t *qcoeff, intptr_t offset) {
- const __m128i zero = _mm_setzero_si128();
- tran_low_t *addr = qcoeff + offset;
- if (sizeof(tran_low_t) == 4) {
- _mm_store_si128((__m128i *)addr, zero);
- _mm_store_si128((__m128i *)addr + 1, zero);
- _mm_store_si128((__m128i *)addr + 2, zero);
- _mm_store_si128((__m128i *)addr + 3, zero);
- } else {
- _mm_store_si128((__m128i *)addr, zero);
- _mm_store_si128((__m128i *)addr + 1, zero);
- }
-}
-
-static INLINE void quantize(const int16_t *iscan_ptr,
- const tran_low_t *coeff_ptr, intptr_t n_coeffs,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const __m128i *round0, const __m128i *round1,
- const __m128i *quant0, const __m128i *quant1,
- const __m128i *dequant0, const __m128i *dequant1,
- const __m128i *thr0, const __m128i *thr1,
- __m128i *eob) {
- __m128i coeff0, coeff1;
- // Do DC and first 15 AC
- read_coeff(coeff_ptr, n_coeffs, &coeff0, &coeff1);
-
- // Poor man's sign extract
- const __m128i coeff0_sign = _mm_srai_epi16(coeff0, 15);
- const __m128i coeff1_sign = _mm_srai_epi16(coeff1, 15);
- __m128i qcoeff0 = _mm_xor_si128(coeff0, coeff0_sign);
- __m128i qcoeff1 = _mm_xor_si128(coeff1, coeff1_sign);
- qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
- qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
- const __m128i mask0 = _mm_or_si128(_mm_cmpgt_epi16(qcoeff0, *thr0),
- _mm_cmpeq_epi16(qcoeff0, *thr0));
- const __m128i mask1 = _mm_or_si128(_mm_cmpgt_epi16(qcoeff1, *thr1),
- _mm_cmpeq_epi16(qcoeff1, *thr1));
- const int16_t nzflag = _mm_movemask_epi8(mask0) | _mm_movemask_epi8(mask1);
-
- if (nzflag) {
- qcoeff0 = _mm_adds_epi16(qcoeff0, *round0);
- qcoeff1 = _mm_adds_epi16(qcoeff1, *round1);
- const __m128i qtmp0 = _mm_mulhi_epi16(qcoeff0, *quant0);
- const __m128i qtmp1 = _mm_mulhi_epi16(qcoeff1, *quant1);
-
- // Reinsert signs
- qcoeff0 = _mm_xor_si128(qtmp0, coeff0_sign);
- qcoeff1 = _mm_xor_si128(qtmp1, coeff1_sign);
- qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
- qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
-
- write_qcoeff(&qcoeff0, &qcoeff1, qcoeff_ptr, n_coeffs);
-
- coeff0 = _mm_mullo_epi16(qcoeff0, *dequant0);
- coeff1 = _mm_mullo_epi16(qcoeff1, *dequant1);
-
- write_qcoeff(&coeff0, &coeff1, dqcoeff_ptr, n_coeffs);
-
- const __m128i zero = _mm_setzero_si128();
- // Scan for eob
- const __m128i zero_coeff0 = _mm_cmpeq_epi16(coeff0, zero);
- const __m128i zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
- const __m128i nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
- const __m128i nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
- const __m128i iscan0 =
- _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
- const __m128i iscan1 =
- _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
- // Add one to convert from indices to counts
- const __m128i iscan0_nz = _mm_sub_epi16(iscan0, nzero_coeff0);
- const __m128i iscan1_nz = _mm_sub_epi16(iscan1, nzero_coeff1);
- const __m128i eob0 = _mm_and_si128(iscan0_nz, nzero_coeff0);
- const __m128i eob1 = _mm_and_si128(iscan1_nz, nzero_coeff1);
- const __m128i eob2 = _mm_max_epi16(eob0, eob1);
- *eob = _mm_max_epi16(*eob, eob2);
- } else {
- write_zero(qcoeff_ptr, n_coeffs);
- write_zero(dqcoeff_ptr, n_coeffs);
- }
-}
-
-void av1_quantize_fp_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
- const int16_t *zbin_ptr, const int16_t *round_ptr,
- const int16_t *quant_ptr,
- const int16_t *quant_shift_ptr,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan_ptr, const int16_t *iscan_ptr) {
- (void)scan_ptr;
- (void)zbin_ptr;
- (void)quant_shift_ptr;
-
- coeff_ptr += n_coeffs;
- iscan_ptr += n_coeffs;
- qcoeff_ptr += n_coeffs;
- dqcoeff_ptr += n_coeffs;
- n_coeffs = -n_coeffs;
-
- const __m128i round0 = _mm_load_si128((const __m128i *)round_ptr);
- const __m128i round1 = _mm_unpackhi_epi64(round0, round0);
- const __m128i quant0 = _mm_load_si128((const __m128i *)quant_ptr);
- const __m128i quant1 = _mm_unpackhi_epi64(quant0, quant0);
- const __m128i dequant0 = _mm_load_si128((const __m128i *)dequant_ptr);
- const __m128i dequant1 = _mm_unpackhi_epi64(dequant0, dequant0);
- const __m128i thr0 = _mm_srai_epi16(dequant0, 1);
- const __m128i thr1 = _mm_srai_epi16(dequant1, 1);
- __m128i eob = _mm_setzero_si128();
-
- quantize(iscan_ptr, coeff_ptr, n_coeffs, qcoeff_ptr, dqcoeff_ptr, &round0,
- &round1, &quant0, &quant1, &dequant0, &dequant1, &thr0, &thr1, &eob);
-
- n_coeffs += 8 * 2;
-
- // AC only loop
- while (n_coeffs < 0) {
- quantize(iscan_ptr, coeff_ptr, n_coeffs, qcoeff_ptr, dqcoeff_ptr, &round1,
- &round1, &quant1, &quant1, &dequant1, &dequant1, &thr1, &thr1,
- &eob);
- n_coeffs += 8 * 2;
- }
-
- // Accumulate EOB
- {
- __m128i eob_shuffled;
- eob_shuffled = _mm_shuffle_epi32(eob, 0xe);
- eob = _mm_max_epi16(eob, eob_shuffled);
- eob_shuffled = _mm_shufflelo_epi16(eob, 0xe);
- eob = _mm_max_epi16(eob, eob_shuffled);
- eob_shuffled = _mm_shufflelo_epi16(eob, 0x1);
- eob = _mm_max_epi16(eob, eob_shuffled);
- *eob_ptr = _mm_extract_epi16(eob, 1);
- }
-}
diff --git a/third_party/aom/av1/encoder/x86/av1_quantize_ssse3_x86_64.asm b/third_party/aom/av1/encoder/x86/av1_quantize_ssse3_x86_64.asm
deleted file mode 100644
index ad4ae274e..000000000
--- a/third_party/aom/av1/encoder/x86/av1_quantize_ssse3_x86_64.asm
+++ /dev/null
@@ -1,204 +0,0 @@
-;
-; Copyright (c) 2016, Alliance for Open Media. All rights reserved
-;
-; This source code is subject to the terms of the BSD 2 Clause License and
-; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-; was not distributed with this source code in the LICENSE file, you can
-; obtain it at www.aomedia.org/license/software. If the Alliance for Open
-; Media Patent License 1.0 was not distributed with this source code in the
-; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-;
-
-;
-
-%define private_prefix av1
-
-%include "third_party/x86inc/x86inc.asm"
-
-SECTION_RODATA
-pw_1: times 8 dw 1
-
-SECTION .text
-
-%macro QUANTIZE_FP 2
-cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
- shift, qcoeff, dqcoeff, dequant, \
- eob, scan, iscan
- cmp dword skipm, 0
- jne .blank
-
- ; actual quantize loop - setup pointers, rounders, etc.
- movifnidn coeffq, coeffmp
- movifnidn ncoeffq, ncoeffmp
- mov r2, dequantmp
- movifnidn zbinq, zbinmp
- movifnidn roundq, roundmp
- movifnidn quantq, quantmp
- mova m1, [roundq] ; m1 = round
- mova m2, [quantq] ; m2 = quant
-%ifidn %1, fp_32x32
- pcmpeqw m5, m5
- psrlw m5, 15
- paddw m1, m5
- psrlw m1, 1 ; m1 = (m1 + 1) / 2
-%endif
- mova m3, [r2q] ; m3 = dequant
- mov r3, qcoeffmp
- mov r4, dqcoeffmp
- mov r5, iscanmp
-%ifidn %1, fp_32x32
- psllw m2, 1
-%endif
- pxor m5, m5 ; m5 = dedicated zero
-
- lea coeffq, [ coeffq+ncoeffq*2]
- lea r5q, [ r5q+ncoeffq*2]
- lea r3q, [ r3q+ncoeffq*2]
- lea r4q, [r4q+ncoeffq*2]
- neg ncoeffq
-
- ; get DC and first 15 AC coeffs
- mova m9, [ coeffq+ncoeffq*2+ 0] ; m9 = c[i]
- mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i]
- pabsw m6, m9 ; m6 = abs(m9)
- pabsw m11, m10 ; m11 = abs(m10)
- pcmpeqw m7, m7
-
- paddsw m6, m1 ; m6 += round
- punpckhqdq m1, m1
- paddsw m11, m1 ; m11 += round
- pmulhw m8, m6, m2 ; m8 = m6*q>>16
- punpckhqdq m2, m2
- pmulhw m13, m11, m2 ; m13 = m11*q>>16
- psignw m8, m9 ; m8 = reinsert sign
- psignw m13, m10 ; m13 = reinsert sign
- mova [r3q+ncoeffq*2+ 0], m8
- mova [r3q+ncoeffq*2+16], m13
-%ifidn %1, fp_32x32
- pabsw m8, m8
- pabsw m13, m13
-%endif
- pmullw m8, m3 ; r4[i] = r3[i] * q
- punpckhqdq m3, m3
- pmullw m13, m3 ; r4[i] = r3[i] * q
-%ifidn %1, fp_32x32
- psrlw m8, 1
- psrlw m13, 1
- psignw m8, m9
- psignw m13, m10
- psrlw m0, m3, 2
-%else
- psrlw m0, m3, 1
-%endif
- mova [r4q+ncoeffq*2+ 0], m8
- mova [r4q+ncoeffq*2+16], m13
- pcmpeqw m8, m5 ; m8 = c[i] == 0
- pcmpeqw m13, m5 ; m13 = c[i] == 0
- mova m6, [ r5q+ncoeffq*2+ 0] ; m6 = scan[i]
- mova m11, [ r5q+ncoeffq*2+16] ; m11 = scan[i]
- psubw m6, m7 ; m6 = scan[i] + 1
- psubw m11, m7 ; m11 = scan[i] + 1
- pandn m8, m6 ; m8 = max(eob)
- pandn m13, m11 ; m13 = max(eob)
- pmaxsw m8, m13
- add ncoeffq, mmsize
- jz .accumulate_eob
-
-.ac_only_loop:
- mova m9, [ coeffq+ncoeffq*2+ 0] ; m9 = c[i]
- mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i]
- pabsw m6, m9 ; m6 = abs(m9)
- pabsw m11, m10 ; m11 = abs(m10)
-
- pcmpgtw m7, m6, m0
- pcmpgtw m12, m11, m0
- pmovmskb r6d, m7
- pmovmskb r2d, m12
-
- or r6, r2
- jz .skip_iter
-
- pcmpeqw m7, m7
-
- paddsw m6, m1 ; m6 += round
- paddsw m11, m1 ; m11 += round
- pmulhw m14, m6, m2 ; m14 = m6*q>>16
- pmulhw m13, m11, m2 ; m13 = m11*q>>16
- psignw m14, m9 ; m14 = reinsert sign
- psignw m13, m10 ; m13 = reinsert sign
- mova [r3q+ncoeffq*2+ 0], m14
- mova [r3q+ncoeffq*2+16], m13
-%ifidn %1, fp_32x32
- pabsw m14, m14
- pabsw m13, m13
-%endif
- pmullw m14, m3 ; r4[i] = r3[i] * q
- pmullw m13, m3 ; r4[i] = r3[i] * q
-%ifidn %1, fp_32x32
- psrlw m14, 1
- psrlw m13, 1
- psignw m14, m9
- psignw m13, m10
-%endif
- mova [r4q+ncoeffq*2+ 0], m14
- mova [r4q+ncoeffq*2+16], m13
- pcmpeqw m14, m5 ; m14 = c[i] == 0
- pcmpeqw m13, m5 ; m13 = c[i] == 0
- mova m6, [ r5q+ncoeffq*2+ 0] ; m6 = scan[i]
- mova m11, [ r5q+ncoeffq*2+16] ; m11 = scan[i]
- psubw m6, m7 ; m6 = scan[i] + 1
- psubw m11, m7 ; m11 = scan[i] + 1
- pandn m14, m6 ; m14 = max(eob)
- pandn m13, m11 ; m13 = max(eob)
- pmaxsw m8, m14
- pmaxsw m8, m13
- add ncoeffq, mmsize
- jl .ac_only_loop
-
- jmp .accumulate_eob
-.skip_iter:
- mova [r3q+ncoeffq*2+ 0], m5
- mova [r3q+ncoeffq*2+16], m5
- mova [r4q+ncoeffq*2+ 0], m5
- mova [r4q+ncoeffq*2+16], m5
- add ncoeffq, mmsize
- jl .ac_only_loop
-
-.accumulate_eob:
- ; horizontally accumulate/max eobs and write into [eob] memory pointer
- mov r2, eobmp
- pshufd m7, m8, 0xe
- pmaxsw m8, m7
- pshuflw m7, m8, 0xe
- pmaxsw m8, m7
- pshuflw m7, m8, 0x1
- pmaxsw m8, m7
- pextrw r6, m8, 0
- mov [r2], r6
- RET
-
- ; skip-block, i.e. just write all zeroes
-.blank:
- mov r0, dqcoeffmp
- movifnidn ncoeffq, ncoeffmp
- mov r2, qcoeffmp
- mov r3, eobmp
-
- lea r0q, [r0q+ncoeffq*2]
- lea r2q, [r2q+ncoeffq*2]
- neg ncoeffq
- pxor m7, m7
-.blank_loop:
- mova [r0q+ncoeffq*2+ 0], m7
- mova [r0q+ncoeffq*2+16], m7
- mova [r2q+ncoeffq*2+ 0], m7
- mova [r2q+ncoeffq*2+16], m7
- add ncoeffq, mmsize
- jl .blank_loop
- mov word [r3q], 0
- RET
-%endmacro
-
-INIT_XMM ssse3
-QUANTIZE_FP fp, 7
-QUANTIZE_FP fp_32x32, 7
diff --git a/third_party/aom/av1/encoder/x86/av1_ssim_opt_x86_64.asm b/third_party/aom/av1/encoder/x86/av1_ssim_opt_x86_64.asm
deleted file mode 100644
index faa2a232a..000000000
--- a/third_party/aom/av1/encoder/x86/av1_ssim_opt_x86_64.asm
+++ /dev/null
@@ -1,222 +0,0 @@
-;
-; Copyright (c) 2016, Alliance for Open Media. All rights reserved
-;
-; This source code is subject to the terms of the BSD 2 Clause License and
-; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-; was not distributed with this source code in the LICENSE file, you can
-; obtain it at www.aomedia.org/license/software. If the Alliance for Open
-; Media Patent License 1.0 was not distributed with this source code in the
-; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-;
-
-;
-
-%include "aom_ports/x86_abi_support.asm"
-
-; tabulate_ssim - sums sum_s,sum_r,sum_sq_s,sum_sq_r, sum_sxr
-%macro TABULATE_SSIM 0
- paddusw xmm15, xmm3 ; sum_s
- paddusw xmm14, xmm4 ; sum_r
- movdqa xmm1, xmm3
- pmaddwd xmm1, xmm1
- paddd xmm13, xmm1 ; sum_sq_s
- movdqa xmm2, xmm4
- pmaddwd xmm2, xmm2
- paddd xmm12, xmm2 ; sum_sq_r
- pmaddwd xmm3, xmm4
- paddd xmm11, xmm3 ; sum_sxr
-%endmacro
-
-; Sum across the register %1 starting with q words
-%macro SUM_ACROSS_Q 1
- movdqa xmm2,%1
- punpckldq %1,xmm0
- punpckhdq xmm2,xmm0
- paddq %1,xmm2
- movdqa xmm2,%1
- punpcklqdq %1,xmm0
- punpckhqdq xmm2,xmm0
- paddq %1,xmm2
-%endmacro
-
-; Sum across the register %1 starting with q words
-%macro SUM_ACROSS_W 1
- movdqa xmm1, %1
- punpcklwd %1,xmm0
- punpckhwd xmm1,xmm0
- paddd %1, xmm1
- SUM_ACROSS_Q %1
-%endmacro
-
-SECTION .text
-
-;void ssim_parms_sse2(
-; unsigned char *s,
-; int sp,
-; unsigned char *r,
-; int rp
-; unsigned long *sum_s,
-; unsigned long *sum_r,
-; unsigned long *sum_sq_s,
-; unsigned long *sum_sq_r,
-; unsigned long *sum_sxr);
-;
-; TODO: Use parm passing through structure, probably don't need the pxors
-; ( calling app will initialize to 0 ) could easily fit everything in sse2
-; without too much hastle, and can probably do better estimates with psadw
-; or pavgb At this point this is just meant to be first pass for calculating
-; all the parms needed for 16x16 ssim so we can play with dssim as distortion
-; in mode selection code.
-global sym(av1_ssim_parms_16x16_sse2) PRIVATE
-sym(av1_ssim_parms_16x16_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 9
- SAVE_XMM 15
- push rsi
- push rdi
- ; end prolog
-
- mov rsi, arg(0) ;s
- mov rcx, arg(1) ;sp
- mov rdi, arg(2) ;r
- mov rax, arg(3) ;rp
-
- pxor xmm0, xmm0
- pxor xmm15,xmm15 ;sum_s
- pxor xmm14,xmm14 ;sum_r
- pxor xmm13,xmm13 ;sum_sq_s
- pxor xmm12,xmm12 ;sum_sq_r
- pxor xmm11,xmm11 ;sum_sxr
-
- mov rdx, 16 ;row counter
-.NextRow:
-
- ;grab source and reference pixels
- movdqu xmm5, [rsi]
- movdqu xmm6, [rdi]
- movdqa xmm3, xmm5
- movdqa xmm4, xmm6
- punpckhbw xmm3, xmm0 ; high_s
- punpckhbw xmm4, xmm0 ; high_r
-
- TABULATE_SSIM
-
- movdqa xmm3, xmm5
- movdqa xmm4, xmm6
- punpcklbw xmm3, xmm0 ; low_s
- punpcklbw xmm4, xmm0 ; low_r
-
- TABULATE_SSIM
-
- add rsi, rcx ; next s row
- add rdi, rax ; next r row
-
- dec rdx ; counter
- jnz .NextRow
-
- SUM_ACROSS_W xmm15
- SUM_ACROSS_W xmm14
- SUM_ACROSS_Q xmm13
- SUM_ACROSS_Q xmm12
- SUM_ACROSS_Q xmm11
-
- mov rdi,arg(4)
- movd [rdi], xmm15;
- mov rdi,arg(5)
- movd [rdi], xmm14;
- mov rdi,arg(6)
- movd [rdi], xmm13;
- mov rdi,arg(7)
- movd [rdi], xmm12;
- mov rdi,arg(8)
- movd [rdi], xmm11;
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void ssim_parms_sse2(
-; unsigned char *s,
-; int sp,
-; unsigned char *r,
-; int rp
-; unsigned long *sum_s,
-; unsigned long *sum_r,
-; unsigned long *sum_sq_s,
-; unsigned long *sum_sq_r,
-; unsigned long *sum_sxr);
-;
-; TODO: Use parm passing through structure, probably don't need the pxors
-; ( calling app will initialize to 0 ) could easily fit everything in sse2
-; without too much hastle, and can probably do better estimates with psadw
-; or pavgb At this point this is just meant to be first pass for calculating
-; all the parms needed for 16x16 ssim so we can play with dssim as distortion
-; in mode selection code.
-global sym(av1_ssim_parms_8x8_sse2) PRIVATE
-sym(av1_ssim_parms_8x8_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 9
- SAVE_XMM 15
- push rsi
- push rdi
- ; end prolog
-
- mov rsi, arg(0) ;s
- mov rcx, arg(1) ;sp
- mov rdi, arg(2) ;r
- mov rax, arg(3) ;rp
-
- pxor xmm0, xmm0
- pxor xmm15,xmm15 ;sum_s
- pxor xmm14,xmm14 ;sum_r
- pxor xmm13,xmm13 ;sum_sq_s
- pxor xmm12,xmm12 ;sum_sq_r
- pxor xmm11,xmm11 ;sum_sxr
-
- mov rdx, 8 ;row counter
-.NextRow:
-
- ;grab source and reference pixels
- movq xmm3, [rsi]
- movq xmm4, [rdi]
- punpcklbw xmm3, xmm0 ; low_s
- punpcklbw xmm4, xmm0 ; low_r
-
- TABULATE_SSIM
-
- add rsi, rcx ; next s row
- add rdi, rax ; next r row
-
- dec rdx ; counter
- jnz .NextRow
-
- SUM_ACROSS_W xmm15
- SUM_ACROSS_W xmm14
- SUM_ACROSS_Q xmm13
- SUM_ACROSS_Q xmm12
- SUM_ACROSS_Q xmm11
-
- mov rdi,arg(4)
- movd [rdi], xmm15;
- mov rdi,arg(5)
- movd [rdi], xmm14;
- mov rdi,arg(6)
- movd [rdi], xmm13;
- mov rdi,arg(7)
- movd [rdi], xmm12;
- mov rdi,arg(8)
- movd [rdi], xmm11;
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
diff --git a/third_party/aom/av1/encoder/x86/av1_txfm1d_sse4.h b/third_party/aom/av1/encoder/x86/av1_txfm1d_sse4.h
deleted file mode 100644
index 6df2a8bdb..000000000
--- a/third_party/aom/av1/encoder/x86/av1_txfm1d_sse4.h
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_X86_AV1_TXFM1D_SSE4_H_
-#define AOM_AV1_ENCODER_X86_AV1_TXFM1D_SSE4_H_
-
-#include <smmintrin.h>
-#include "av1/common/av1_txfm.h"
-#include "av1/common/x86/av1_txfm_sse4.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void av1_fdct4_new_sse4_1(const __m128i *input, __m128i *output,
- const int8_t cos_bit, const int8_t *stage_range);
-void av1_fdct8_new_sse4_1(const __m128i *input, __m128i *output,
- const int8_t cos_bit, const int8_t *stage_range);
-void av1_fdct16_new_sse4_1(const __m128i *input, __m128i *output,
- const int8_t cos_bit, const int8_t *stage_range);
-void av1_fdct32_new_sse4_1(const __m128i *input, __m128i *output,
- int8_t cos_bit);
-void av1_fdct64_new_sse4_1(const __m128i *input, __m128i *output,
- int8_t cos_bit, const int instride,
- const int outstride);
-
-void av1_fadst4_new_sse4_1(const __m128i *input, __m128i *output,
- const int8_t cos_bit, const int8_t *stage_range);
-void av1_fadst8_new_sse4_1(const __m128i *input, __m128i *output,
- const int8_t cos_bit, const int8_t *stage_range);
-void av1_fadst16_new_sse4_1(const __m128i *input, __m128i *output,
- const int8_t cos_bit, const int8_t *stage_range);
-
-void av1_idct4_new_sse4_1(const __m128i *input, __m128i *output,
- const int8_t cos_bit, const int8_t *stage_range);
-void av1_idct8_new_sse4_1(const __m128i *input, __m128i *output,
- const int8_t cos_bit, const int8_t *stage_range);
-void av1_idct16_new_sse4_1(const __m128i *input, __m128i *output,
- const int8_t cos_bit, const int8_t *stage_range);
-void av1_idct32_new_sse4_1(const __m128i *input, __m128i *output,
- const int8_t cos_bit, const int8_t *stage_range);
-void av1_idct64_new_sse4_1(const __m128i *input, __m128i *output,
- const int8_t cos_bit, const int8_t *stage_range);
-
-void av1_iadst4_new_sse4_1(const __m128i *input, __m128i *output,
- const int8_t cos_bit, const int8_t *stage_range);
-void av1_iadst8_new_sse4_1(const __m128i *input, __m128i *output,
- const int8_t cos_bit, const int8_t *stage_range);
-void av1_iadst16_new_sse4_1(const __m128i *input, __m128i *output,
- const int8_t cos_bit, const int8_t *stage_range);
-static INLINE void transpose_32_4x4(int stride, const __m128i *input,
- __m128i *output) {
- __m128i temp0 = _mm_unpacklo_epi32(input[0 * stride], input[2 * stride]);
- __m128i temp1 = _mm_unpackhi_epi32(input[0 * stride], input[2 * stride]);
- __m128i temp2 = _mm_unpacklo_epi32(input[1 * stride], input[3 * stride]);
- __m128i temp3 = _mm_unpackhi_epi32(input[1 * stride], input[3 * stride]);
-
- output[0 * stride] = _mm_unpacklo_epi32(temp0, temp2);
- output[1 * stride] = _mm_unpackhi_epi32(temp0, temp2);
- output[2 * stride] = _mm_unpacklo_epi32(temp1, temp3);
- output[3 * stride] = _mm_unpackhi_epi32(temp1, temp3);
-}
-
-// the entire input block can be represent by a grid of 4x4 blocks
-// each 4x4 blocks can be represent by 4 vertical __m128i
-// we first transpose each 4x4 block internally
-// then transpose the grid
-static INLINE void transpose_32(int txfm_size, const __m128i *input,
- __m128i *output) {
- const int num_per_128 = 4;
- const int row_size = txfm_size;
- const int col_size = txfm_size / num_per_128;
- int r, c;
-
- // transpose each 4x4 block internally
- for (r = 0; r < row_size; r += 4) {
- for (c = 0; c < col_size; c++) {
- transpose_32_4x4(col_size, &input[r * col_size + c],
- &output[c * 4 * col_size + r / 4]);
- }
- }
-}
-
-// out0 = in0*w0 + in1*w1
-// out1 = -in1*w0 + in0*w1
-#define btf_32_sse4_1_type0(w0, w1, in0, in1, out0, out1, bit) \
- do { \
- const __m128i ww0 = _mm_set1_epi32(w0); \
- const __m128i ww1 = _mm_set1_epi32(w1); \
- const __m128i in0_w0 = _mm_mullo_epi32(in0, ww0); \
- const __m128i in1_w1 = _mm_mullo_epi32(in1, ww1); \
- out0 = _mm_add_epi32(in0_w0, in1_w1); \
- out0 = av1_round_shift_32_sse4_1(out0, bit); \
- const __m128i in0_w1 = _mm_mullo_epi32(in0, ww1); \
- const __m128i in1_w0 = _mm_mullo_epi32(in1, ww0); \
- out1 = _mm_sub_epi32(in0_w1, in1_w0); \
- out1 = av1_round_shift_32_sse4_1(out1, bit); \
- } while (0)
-
-// out0 = in0*w0 + in1*w1
-// out1 = in1*w0 - in0*w1
-#define btf_32_sse4_1_type1(w0, w1, in0, in1, out0, out1, bit) \
- do { \
- btf_32_sse4_1_type0(w1, w0, in1, in0, out0, out1, bit); \
- } while (0)
-
-// out0 = in0*w0 + in1*w1
-// out1 = -in1*w0 + in0*w1
-#define btf_32_type0_sse4_1_new(ww0, ww1, in0, in1, out0, out1, r, bit) \
- do { \
- const __m128i in0_w0 = _mm_mullo_epi32(in0, ww0); \
- const __m128i in1_w1 = _mm_mullo_epi32(in1, ww1); \
- out0 = _mm_add_epi32(in0_w0, in1_w1); \
- out0 = _mm_add_epi32(out0, r); \
- out0 = _mm_srai_epi32(out0, bit); \
- const __m128i in0_w1 = _mm_mullo_epi32(in0, ww1); \
- const __m128i in1_w0 = _mm_mullo_epi32(in1, ww0); \
- out1 = _mm_sub_epi32(in0_w1, in1_w0); \
- out1 = _mm_add_epi32(out1, r); \
- out1 = _mm_srai_epi32(out1, bit); \
- } while (0)
-
-// out0 = in0*w0 + in1*w1
-// out1 = in1*w0 - in0*w1
-#define btf_32_type1_sse4_1_new(ww0, ww1, in0, in1, out0, out1, r, bit) \
- do { \
- btf_32_type0_sse4_1_new(ww1, ww0, in1, in0, out0, out1, r, bit); \
- } while (0)
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // AOM_AV1_ENCODER_X86_AV1_TXFM1D_SSE4_H_
diff --git a/third_party/aom/av1/encoder/x86/corner_match_sse4.c b/third_party/aom/av1/encoder/x86/corner_match_sse4.c
deleted file mode 100644
index 93f37b71d..000000000
--- a/third_party/aom/av1/encoder/x86/corner_match_sse4.c
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <stdlib.h>
-#include <memory.h>
-#include <math.h>
-#include <assert.h>
-
-#include <smmintrin.h>
-
-#include "config/av1_rtcd.h"
-
-#include "aom_ports/mem.h"
-#include "av1/encoder/corner_match.h"
-
-DECLARE_ALIGNED(16, static const uint8_t, byte_mask[16]) = {
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 0, 0, 0
-};
-#if MATCH_SZ != 13
-#error "Need to change byte_mask in corner_match_sse4.c if MATCH_SZ != 13"
-#endif
-
-/* Compute corr(im1, im2) * MATCH_SZ * stddev(im1), where the
- correlation/standard deviation are taken over MATCH_SZ by MATCH_SZ windows
- of each image, centered at (x1, y1) and (x2, y2) respectively.
-*/
-double compute_cross_correlation_sse4_1(unsigned char *im1, int stride1, int x1,
- int y1, unsigned char *im2, int stride2,
- int x2, int y2) {
- int i;
- // 2 16-bit partial sums in lanes 0, 4 (== 2 32-bit partial sums in lanes 0,
- // 2)
- __m128i sum1_vec = _mm_setzero_si128();
- __m128i sum2_vec = _mm_setzero_si128();
- // 4 32-bit partial sums of squares
- __m128i sumsq2_vec = _mm_setzero_si128();
- __m128i cross_vec = _mm_setzero_si128();
-
- const __m128i mask = _mm_load_si128((__m128i *)byte_mask);
- const __m128i zero = _mm_setzero_si128();
-
- im1 += (y1 - MATCH_SZ_BY2) * stride1 + (x1 - MATCH_SZ_BY2);
- im2 += (y2 - MATCH_SZ_BY2) * stride2 + (x2 - MATCH_SZ_BY2);
-
- for (i = 0; i < MATCH_SZ; ++i) {
- const __m128i v1 =
- _mm_and_si128(_mm_loadu_si128((__m128i *)&im1[i * stride1]), mask);
- const __m128i v2 =
- _mm_and_si128(_mm_loadu_si128((__m128i *)&im2[i * stride2]), mask);
-
- // Using the 'sad' intrinsic here is a bit faster than adding
- // v1_l + v1_r and v2_l + v2_r, plus it avoids the need for a 16->32 bit
- // conversion step later, for a net speedup of ~10%
- sum1_vec = _mm_add_epi16(sum1_vec, _mm_sad_epu8(v1, zero));
- sum2_vec = _mm_add_epi16(sum2_vec, _mm_sad_epu8(v2, zero));
-
- const __m128i v1_l = _mm_cvtepu8_epi16(v1);
- const __m128i v1_r = _mm_cvtepu8_epi16(_mm_srli_si128(v1, 8));
- const __m128i v2_l = _mm_cvtepu8_epi16(v2);
- const __m128i v2_r = _mm_cvtepu8_epi16(_mm_srli_si128(v2, 8));
-
- sumsq2_vec = _mm_add_epi32(
- sumsq2_vec,
- _mm_add_epi32(_mm_madd_epi16(v2_l, v2_l), _mm_madd_epi16(v2_r, v2_r)));
- cross_vec = _mm_add_epi32(
- cross_vec,
- _mm_add_epi32(_mm_madd_epi16(v1_l, v2_l), _mm_madd_epi16(v1_r, v2_r)));
- }
-
- // Now we can treat the four registers (sum1_vec, sum2_vec, sumsq2_vec,
- // cross_vec)
- // as holding 4 32-bit elements each, which we want to sum horizontally.
- // We do this by transposing and then summing vertically.
- __m128i tmp_0 = _mm_unpacklo_epi32(sum1_vec, sum2_vec);
- __m128i tmp_1 = _mm_unpackhi_epi32(sum1_vec, sum2_vec);
- __m128i tmp_2 = _mm_unpacklo_epi32(sumsq2_vec, cross_vec);
- __m128i tmp_3 = _mm_unpackhi_epi32(sumsq2_vec, cross_vec);
-
- __m128i tmp_4 = _mm_unpacklo_epi64(tmp_0, tmp_2);
- __m128i tmp_5 = _mm_unpackhi_epi64(tmp_0, tmp_2);
- __m128i tmp_6 = _mm_unpacklo_epi64(tmp_1, tmp_3);
- __m128i tmp_7 = _mm_unpackhi_epi64(tmp_1, tmp_3);
-
- __m128i res =
- _mm_add_epi32(_mm_add_epi32(tmp_4, tmp_5), _mm_add_epi32(tmp_6, tmp_7));
-
- int sum1 = _mm_extract_epi32(res, 0);
- int sum2 = _mm_extract_epi32(res, 1);
- int sumsq2 = _mm_extract_epi32(res, 2);
- int cross = _mm_extract_epi32(res, 3);
-
- int var2 = sumsq2 * MATCH_SZ_SQ - sum2 * sum2;
- int cov = cross * MATCH_SZ_SQ - sum1 * sum2;
- return cov / sqrt((double)var2);
-}
diff --git a/third_party/aom/av1/encoder/x86/dct_sse2.asm b/third_party/aom/av1/encoder/x86/dct_sse2.asm
deleted file mode 100644
index b18554818..000000000
--- a/third_party/aom/av1/encoder/x86/dct_sse2.asm
+++ /dev/null
@@ -1,82 +0,0 @@
-;
-; Copyright (c) 2016, Alliance for Open Media. All rights reserved
-;
-; This source code is subject to the terms of the BSD 2 Clause License and
-; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-; was not distributed with this source code in the LICENSE file, you can
-; obtain it at www.aomedia.org/license/software. If the Alliance for Open
-; Media Patent License 1.0 was not distributed with this source code in the
-; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-;
-
-%define private_prefix av1
-
-%include "third_party/x86inc/x86inc.asm"
-
-SECTION .text
-
-%macro TRANSFORM_COLS 0
- paddw m0, m1
- movq m4, m0
- psubw m3, m2
- psubw m4, m3
- psraw m4, 1
- movq m5, m4
- psubw m5, m1 ;b1
- psubw m4, m2 ;c1
- psubw m0, m4
- paddw m3, m5
- ; m0 a0
- SWAP 1, 4 ; m1 c1
- SWAP 2, 3 ; m2 d1
- SWAP 3, 5 ; m3 b1
-%endmacro
-
-%macro TRANSPOSE_4X4 0
- ; 00 01 02 03
- ; 10 11 12 13
- ; 20 21 22 23
- ; 30 31 32 33
- punpcklwd m0, m1 ; 00 10 01 11 02 12 03 13
- punpcklwd m2, m3 ; 20 30 21 31 22 32 23 33
- mova m1, m0
- punpckldq m0, m2 ; 00 10 20 30 01 11 21 31
- punpckhdq m1, m2 ; 02 12 22 32 03 13 23 33
-%endmacro
-
-INIT_XMM sse2
-cglobal fwht4x4, 3, 4, 8, input, output, stride
- lea r3q, [inputq + strideq*4]
- movq m0, [inputq] ;a1
- movq m1, [inputq + strideq*2] ;b1
- movq m2, [r3q] ;c1
- movq m3, [r3q + strideq*2] ;d1
-
- TRANSFORM_COLS
- TRANSPOSE_4X4
- SWAP 1, 2
- psrldq m1, m0, 8
- psrldq m3, m2, 8
- TRANSFORM_COLS
- TRANSPOSE_4X4
-
- psllw m0, 2
- psllw m1, 2
-
- ; sign extension
- mova m2, m0
- mova m3, m1
- punpcklwd m0, m0
- punpcklwd m1, m1
- punpckhwd m2, m2
- punpckhwd m3, m3
- psrad m0, 16
- psrad m1, 16
- psrad m2, 16
- psrad m3, 16
- mova [outputq], m0
- mova [outputq + 16], m2
- mova [outputq + 32], m1
- mova [outputq + 48], m3
-
- RET
diff --git a/third_party/aom/av1/encoder/x86/encodetxb_avx2.c b/third_party/aom/av1/encoder/x86/encodetxb_avx2.c
deleted file mode 100644
index 7642f57d1..000000000
--- a/third_party/aom/av1/encoder/x86/encodetxb_avx2.c
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <emmintrin.h> // SSE2
-#include <smmintrin.h> /* SSE4.1 */
-#include <immintrin.h> /* AVX2 */
-
-#include "aom/aom_integer.h"
-#include "aom_dsp/x86/mem_sse2.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/common/txb_common.h"
-#include "aom_dsp/x86/synonyms.h"
-#include "aom_dsp/x86/synonyms_avx2.h"
-
-void av1_txb_init_levels_avx2(const tran_low_t *const coeff, const int width,
- const int height, uint8_t *const levels) {
- const int stride = width + TX_PAD_HOR;
- const __m256i y_zeros = _mm256_setzero_si256();
-
- const int32_t pre_len = sizeof(*levels) * TX_PAD_TOP * stride;
- uint8_t *pre_buf = levels - TX_PAD_TOP * stride;
- uint8_t *pre_buf_end = pre_buf + pre_len;
- do {
- yy_storeu_256(pre_buf, y_zeros);
- pre_buf += 32;
- } while (pre_buf < pre_buf_end);
-
- const int32_t bottom_len = sizeof(*levels) * (TX_PAD_BOTTOM * stride);
- uint8_t *bottom_buf_end = levels + (height + TX_PAD_BOTTOM) * stride;
- uint8_t *bottom_buf = bottom_buf_end - ((bottom_len + 31) & (~31));
-
- do {
- yy_storeu_256(bottom_buf, y_zeros);
- bottom_buf += 32;
- } while (bottom_buf < bottom_buf_end);
-
- int i = 0;
- uint8_t *ls = levels;
- const tran_low_t *cf = coeff;
- if (width == 4) {
- do {
- const __m256i c0 = yy_loadu_256(cf);
- const __m256i c1 = yy_loadu_256(cf + 8);
- const __m256i abs01 = _mm256_abs_epi16(_mm256_packs_epi32(c0, c1));
- const __m256i abs01_8 = _mm256_packs_epi16(abs01, y_zeros);
- const __m256i res_ = _mm256_shuffle_epi32(abs01_8, 0xd8);
- const __m256i res = _mm256_permute4x64_epi64(res_, 0xd8);
- yy_storeu_256(ls, res);
- ls += 32;
- cf += 16;
- i += 4;
- } while (i < height);
- } else if (width == 8) {
- do {
- const __m256i coeffA = yy_loadu_256(cf);
- const __m256i coeffB = yy_loadu_256(cf + 8);
- const __m256i coeffC = yy_loadu_256(cf + 16);
- const __m256i coeffD = yy_loadu_256(cf + 24);
- const __m256i coeffAB = _mm256_packs_epi32(coeffA, coeffB);
- const __m256i coeffCD = _mm256_packs_epi32(coeffC, coeffD);
- const __m256i absAB = _mm256_abs_epi16(coeffAB);
- const __m256i absCD = _mm256_abs_epi16(coeffCD);
- const __m256i absABCD = _mm256_packs_epi16(absAB, absCD);
- const __m256i res_ = _mm256_permute4x64_epi64(absABCD, 0xd8);
- const __m256i res = _mm256_shuffle_epi32(res_, 0xd8);
- const __m128i res0 = _mm256_castsi256_si128(res);
- const __m128i res1 = _mm256_extracti128_si256(res, 1);
- xx_storel_64(ls, res0);
- *(int32_t *)(ls + width) = 0;
- xx_storel_64(ls + stride, _mm_srli_si128(res0, 8));
- *(int32_t *)(ls + width + stride) = 0;
- xx_storel_64(ls + stride * 2, res1);
- *(int32_t *)(ls + width + stride * 2) = 0;
- xx_storel_64(ls + stride * 3, _mm_srli_si128(res1, 8));
- *(int32_t *)(ls + width + stride * 3) = 0;
- cf += 32;
- ls += stride << 2;
- i += 4;
- } while (i < height);
- } else if (width == 16) {
- do {
- const __m256i coeffA = yy_loadu_256(cf);
- const __m256i coeffB = yy_loadu_256(cf + 8);
- const __m256i coeffC = yy_loadu_256(cf + 16);
- const __m256i coeffD = yy_loadu_256(cf + 24);
- const __m256i coeffAB = _mm256_packs_epi32(coeffA, coeffB);
- const __m256i coeffCD = _mm256_packs_epi32(coeffC, coeffD);
- const __m256i absAB = _mm256_abs_epi16(coeffAB);
- const __m256i absCD = _mm256_abs_epi16(coeffCD);
- const __m256i absABCD = _mm256_packs_epi16(absAB, absCD);
- const __m256i res_ = _mm256_permute4x64_epi64(absABCD, 0xd8);
- const __m256i res = _mm256_shuffle_epi32(res_, 0xd8);
- xx_storeu_128(ls, _mm256_castsi256_si128(res));
- xx_storeu_128(ls + stride, _mm256_extracti128_si256(res, 1));
- cf += 32;
- *(int32_t *)(ls + width) = 0;
- *(int32_t *)(ls + stride + width) = 0;
- ls += stride << 1;
- i += 2;
- } while (i < height);
- } else {
- do {
- const __m256i coeffA = yy_loadu_256(cf);
- const __m256i coeffB = yy_loadu_256(cf + 8);
- const __m256i coeffC = yy_loadu_256(cf + 16);
- const __m256i coeffD = yy_loadu_256(cf + 24);
- const __m256i coeffAB = _mm256_packs_epi32(coeffA, coeffB);
- const __m256i coeffCD = _mm256_packs_epi32(coeffC, coeffD);
- const __m256i absAB = _mm256_abs_epi16(coeffAB);
- const __m256i absCD = _mm256_abs_epi16(coeffCD);
- const __m256i absABCD = _mm256_packs_epi16(absAB, absCD);
- const __m256i res_ = _mm256_permute4x64_epi64(absABCD, 0xd8);
- const __m256i res = _mm256_shuffle_epi32(res_, 0xd8);
- yy_storeu_256(ls, res);
- cf += 32;
- *(int32_t *)(ls + width) = 0;
- ls += stride;
- i += 1;
- } while (i < height);
- }
-}
diff --git a/third_party/aom/av1/encoder/x86/encodetxb_sse2.c b/third_party/aom/av1/encoder/x86/encodetxb_sse2.c
deleted file mode 100644
index dedb4d02f..000000000
--- a/third_party/aom/av1/encoder/x86/encodetxb_sse2.c
+++ /dev/null
@@ -1,505 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <emmintrin.h> // SSE2
-
-#include "aom/aom_integer.h"
-#include "aom_dsp/x86/mem_sse2.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/common/txb_common.h"
-
-static INLINE void load_levels_4x4x5_sse2(const uint8_t *const src,
- const int stride,
- const ptrdiff_t *const offsets,
- __m128i *const level) {
- level[0] = load_8bit_4x4_to_1_reg_sse2(src + 1, stride);
- level[1] = load_8bit_4x4_to_1_reg_sse2(src + stride, stride);
- level[2] = load_8bit_4x4_to_1_reg_sse2(src + offsets[0], stride);
- level[3] = load_8bit_4x4_to_1_reg_sse2(src + offsets[1], stride);
- level[4] = load_8bit_4x4_to_1_reg_sse2(src + offsets[2], stride);
-}
-
-static INLINE void load_levels_8x2x5_sse2(const uint8_t *const src,
- const int stride,
- const ptrdiff_t *const offsets,
- __m128i *const level) {
- level[0] = load_8bit_8x2_to_1_reg_sse2(src + 1, stride);
- level[1] = load_8bit_8x2_to_1_reg_sse2(src + stride, stride);
- level[2] = load_8bit_8x2_to_1_reg_sse2(src + offsets[0], stride);
- level[3] = load_8bit_8x2_to_1_reg_sse2(src + offsets[1], stride);
- level[4] = load_8bit_8x2_to_1_reg_sse2(src + offsets[2], stride);
-}
-
-static INLINE void load_levels_16x1x5_sse2(const uint8_t *const src,
- const int stride,
- const ptrdiff_t *const offsets,
- __m128i *const level) {
- level[0] = _mm_loadu_si128((__m128i *)(src + 1));
- level[1] = _mm_loadu_si128((__m128i *)(src + stride));
- level[2] = _mm_loadu_si128((__m128i *)(src + offsets[0]));
- level[3] = _mm_loadu_si128((__m128i *)(src + offsets[1]));
- level[4] = _mm_loadu_si128((__m128i *)(src + offsets[2]));
-}
-
-static INLINE __m128i get_coeff_contexts_kernel_sse2(__m128i *const level) {
- const __m128i const_3 = _mm_set1_epi8(3);
- const __m128i const_4 = _mm_set1_epi8(4);
- __m128i count;
-
- count = _mm_min_epu8(level[0], const_3);
- level[1] = _mm_min_epu8(level[1], const_3);
- level[2] = _mm_min_epu8(level[2], const_3);
- level[3] = _mm_min_epu8(level[3], const_3);
- level[4] = _mm_min_epu8(level[4], const_3);
- count = _mm_add_epi8(count, level[1]);
- count = _mm_add_epi8(count, level[2]);
- count = _mm_add_epi8(count, level[3]);
- count = _mm_add_epi8(count, level[4]);
- count = _mm_avg_epu8(count, _mm_setzero_si128());
- count = _mm_min_epu8(count, const_4);
- return count;
-}
-
-static INLINE void get_4_nz_map_contexts_2d(const uint8_t *levels,
- const int height,
- const ptrdiff_t *const offsets,
- int8_t *const coeff_contexts) {
- const int stride = 4 + TX_PAD_HOR;
- const __m128i pos_to_offset_large = _mm_set1_epi8(21);
- __m128i pos_to_offset =
- (height == 4)
- ? _mm_setr_epi8(0, 1, 6, 6, 1, 6, 6, 21, 6, 6, 21, 21, 6, 21, 21, 21)
- : _mm_setr_epi8(0, 11, 11, 11, 11, 11, 11, 11, 6, 6, 21, 21, 6, 21,
- 21, 21);
- __m128i count;
- __m128i level[5];
- int8_t *cc = coeff_contexts;
- int row = height;
-
- assert(!(height % 4));
-
- do {
- load_levels_4x4x5_sse2(levels, stride, offsets, level);
- count = get_coeff_contexts_kernel_sse2(level);
- count = _mm_add_epi8(count, pos_to_offset);
- _mm_store_si128((__m128i *)cc, count);
- pos_to_offset = pos_to_offset_large;
- levels += 4 * stride;
- cc += 16;
- row -= 4;
- } while (row);
-
- coeff_contexts[0] = 0;
-}
-
-static INLINE void get_4_nz_map_contexts_hor(const uint8_t *levels,
- const int height,
- const ptrdiff_t *const offsets,
- int8_t *coeff_contexts) {
- const int stride = 4 + TX_PAD_HOR;
- const __m128i pos_to_offset =
- _mm_setr_epi8(SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 5,
- SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
- SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 5,
- SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
- SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 5,
- SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
- SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 5,
- SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10);
- __m128i count;
- __m128i level[5];
- int row = height;
-
- assert(!(height % 4));
-
- do {
- load_levels_4x4x5_sse2(levels, stride, offsets, level);
- count = get_coeff_contexts_kernel_sse2(level);
- count = _mm_add_epi8(count, pos_to_offset);
- _mm_store_si128((__m128i *)coeff_contexts, count);
- levels += 4 * stride;
- coeff_contexts += 16;
- row -= 4;
- } while (row);
-}
-
-static INLINE void get_4_nz_map_contexts_ver(const uint8_t *levels,
- const int height,
- const ptrdiff_t *const offsets,
- int8_t *coeff_contexts) {
- const int stride = 4 + TX_PAD_HOR;
- const __m128i pos_to_offset_large = _mm_set1_epi8(SIG_COEF_CONTEXTS_2D + 10);
- __m128i pos_to_offset =
- _mm_setr_epi8(SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 0,
- SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 0,
- SIG_COEF_CONTEXTS_2D + 5, SIG_COEF_CONTEXTS_2D + 5,
- SIG_COEF_CONTEXTS_2D + 5, SIG_COEF_CONTEXTS_2D + 5,
- SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
- SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
- SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
- SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10);
- __m128i count;
- __m128i level[5];
- int row = height;
-
- assert(!(height % 4));
-
- do {
- load_levels_4x4x5_sse2(levels, stride, offsets, level);
- count = get_coeff_contexts_kernel_sse2(level);
- count = _mm_add_epi8(count, pos_to_offset);
- _mm_store_si128((__m128i *)coeff_contexts, count);
- pos_to_offset = pos_to_offset_large;
- levels += 4 * stride;
- coeff_contexts += 16;
- row -= 4;
- } while (row);
-}
-
-static INLINE void get_8_coeff_contexts_2d(const uint8_t *levels,
- const int height,
- const ptrdiff_t *const offsets,
- int8_t *coeff_contexts) {
- const int stride = 8 + TX_PAD_HOR;
- int8_t *cc = coeff_contexts;
- int row = height;
- __m128i count;
- __m128i level[5];
- __m128i pos_to_offset[3];
-
- assert(!(height % 2));
-
- if (height == 8) {
- pos_to_offset[0] =
- _mm_setr_epi8(0, 1, 6, 6, 21, 21, 21, 21, 1, 6, 6, 21, 21, 21, 21, 21);
- pos_to_offset[1] = _mm_setr_epi8(6, 6, 21, 21, 21, 21, 21, 21, 6, 21, 21,
- 21, 21, 21, 21, 21);
- } else if (height < 8) {
- pos_to_offset[0] = _mm_setr_epi8(0, 16, 6, 6, 21, 21, 21, 21, 16, 16, 6, 21,
- 21, 21, 21, 21);
- pos_to_offset[1] = _mm_setr_epi8(16, 16, 21, 21, 21, 21, 21, 21, 16, 16, 21,
- 21, 21, 21, 21, 21);
- } else {
- pos_to_offset[0] = _mm_setr_epi8(0, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
- 11, 11, 11, 11, 11);
- pos_to_offset[1] = _mm_setr_epi8(6, 6, 21, 21, 21, 21, 21, 21, 6, 21, 21,
- 21, 21, 21, 21, 21);
- }
- pos_to_offset[2] = _mm_set1_epi8(21);
-
- do {
- load_levels_8x2x5_sse2(levels, stride, offsets, level);
- count = get_coeff_contexts_kernel_sse2(level);
- count = _mm_add_epi8(count, pos_to_offset[0]);
- _mm_store_si128((__m128i *)cc, count);
- pos_to_offset[0] = pos_to_offset[1];
- pos_to_offset[1] = pos_to_offset[2];
- levels += 2 * stride;
- cc += 16;
- row -= 2;
- } while (row);
-
- coeff_contexts[0] = 0;
-}
-
-static INLINE void get_8_coeff_contexts_hor(const uint8_t *levels,
- const int height,
- const ptrdiff_t *const offsets,
- int8_t *coeff_contexts) {
- const int stride = 8 + TX_PAD_HOR;
- const __m128i pos_to_offset =
- _mm_setr_epi8(SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 5,
- SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
- SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
- SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
- SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 5,
- SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
- SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
- SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10);
- int row = height;
- __m128i count;
- __m128i level[5];
-
- assert(!(height % 2));
-
- do {
- load_levels_8x2x5_sse2(levels, stride, offsets, level);
- count = get_coeff_contexts_kernel_sse2(level);
- count = _mm_add_epi8(count, pos_to_offset);
- _mm_store_si128((__m128i *)coeff_contexts, count);
- levels += 2 * stride;
- coeff_contexts += 16;
- row -= 2;
- } while (row);
-}
-
-static INLINE void get_8_coeff_contexts_ver(const uint8_t *levels,
- const int height,
- const ptrdiff_t *const offsets,
- int8_t *coeff_contexts) {
- const int stride = 8 + TX_PAD_HOR;
- const __m128i pos_to_offset_large = _mm_set1_epi8(SIG_COEF_CONTEXTS_2D + 10);
- __m128i pos_to_offset =
- _mm_setr_epi8(SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 0,
- SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 0,
- SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 0,
- SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 0,
- SIG_COEF_CONTEXTS_2D + 5, SIG_COEF_CONTEXTS_2D + 5,
- SIG_COEF_CONTEXTS_2D + 5, SIG_COEF_CONTEXTS_2D + 5,
- SIG_COEF_CONTEXTS_2D + 5, SIG_COEF_CONTEXTS_2D + 5,
- SIG_COEF_CONTEXTS_2D + 5, SIG_COEF_CONTEXTS_2D + 5);
- int row = height;
- __m128i count;
- __m128i level[5];
-
- assert(!(height % 2));
-
- do {
- load_levels_8x2x5_sse2(levels, stride, offsets, level);
- count = get_coeff_contexts_kernel_sse2(level);
- count = _mm_add_epi8(count, pos_to_offset);
- _mm_store_si128((__m128i *)coeff_contexts, count);
- pos_to_offset = pos_to_offset_large;
- levels += 2 * stride;
- coeff_contexts += 16;
- row -= 2;
- } while (row);
-}
-
-static INLINE void get_16n_coeff_contexts_2d(const uint8_t *levels,
- const int real_width,
- const int real_height,
- const int width, const int height,
- const ptrdiff_t *const offsets,
- int8_t *coeff_contexts) {
- const int stride = width + TX_PAD_HOR;
- int8_t *cc = coeff_contexts;
- int row = height;
- __m128i pos_to_offset[5];
- __m128i pos_to_offset_large[3];
- __m128i count;
- __m128i level[5];
-
- assert(!(width % 16));
-
- pos_to_offset_large[2] = _mm_set1_epi8(21);
- if (real_width == real_height) {
- pos_to_offset[0] = _mm_setr_epi8(0, 1, 6, 6, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21);
- pos_to_offset[1] = _mm_setr_epi8(1, 6, 6, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21);
- pos_to_offset[2] = _mm_setr_epi8(6, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21);
- pos_to_offset[3] = _mm_setr_epi8(6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21);
- pos_to_offset[4] = pos_to_offset_large[0] = pos_to_offset_large[1] =
- pos_to_offset_large[2];
- } else if (real_width > real_height) {
- pos_to_offset[0] = _mm_setr_epi8(0, 16, 6, 6, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21);
- pos_to_offset[1] = _mm_setr_epi8(16, 16, 6, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21);
- pos_to_offset[2] = pos_to_offset[3] = pos_to_offset[4] = _mm_setr_epi8(
- 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21);
- pos_to_offset_large[0] = pos_to_offset_large[1] = pos_to_offset_large[2];
- } else { // real_width < real_height
- pos_to_offset[0] = pos_to_offset[1] = _mm_setr_epi8(
- 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11);
- pos_to_offset[2] = _mm_setr_epi8(6, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21);
- pos_to_offset[3] = _mm_setr_epi8(6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21);
- pos_to_offset[4] = pos_to_offset_large[2];
- pos_to_offset_large[0] = pos_to_offset_large[1] = _mm_set1_epi8(11);
- }
-
- do {
- int w = width;
-
- do {
- load_levels_16x1x5_sse2(levels, stride, offsets, level);
- count = get_coeff_contexts_kernel_sse2(level);
- count = _mm_add_epi8(count, pos_to_offset[0]);
- _mm_store_si128((__m128i *)cc, count);
- levels += 16;
- cc += 16;
- w -= 16;
- pos_to_offset[0] = pos_to_offset_large[0];
- } while (w);
-
- pos_to_offset[0] = pos_to_offset[1];
- pos_to_offset[1] = pos_to_offset[2];
- pos_to_offset[2] = pos_to_offset[3];
- pos_to_offset[3] = pos_to_offset[4];
- pos_to_offset_large[0] = pos_to_offset_large[1];
- pos_to_offset_large[1] = pos_to_offset_large[2];
- levels += TX_PAD_HOR;
- } while (--row);
-
- coeff_contexts[0] = 0;
-}
-
-static INLINE void get_16n_coeff_contexts_hor(const uint8_t *levels,
- const int width, const int height,
- const ptrdiff_t *const offsets,
- int8_t *coeff_contexts) {
- const int stride = width + TX_PAD_HOR;
- const __m128i pos_to_offset_large =
- _mm_setr_epi8(SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
- SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
- SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
- SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
- SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
- SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
- SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
- SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10);
- __m128i count;
- __m128i level[5];
- int row = height;
-
- assert(!(width % 16));
-
- do {
- __m128i pos_to_offset =
- _mm_setr_epi8(SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 5,
- SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
- SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
- SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
- SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
- SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
- SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
- SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10);
- int w = width;
-
- do {
- load_levels_16x1x5_sse2(levels, stride, offsets, level);
- count = get_coeff_contexts_kernel_sse2(level);
- count = _mm_add_epi8(count, pos_to_offset);
- _mm_store_si128((__m128i *)coeff_contexts, count);
- pos_to_offset = pos_to_offset_large;
- levels += 16;
- coeff_contexts += 16;
- w -= 16;
- } while (w);
-
- levels += TX_PAD_HOR;
- } while (--row);
-}
-
-static INLINE void get_16n_coeff_contexts_ver(const uint8_t *levels,
- const int width, const int height,
- const ptrdiff_t *const offsets,
- int8_t *coeff_contexts) {
- const int stride = width + TX_PAD_HOR;
- __m128i pos_to_offset[3];
- __m128i count;
- __m128i level[5];
- int row = height;
-
- assert(!(width % 16));
-
- pos_to_offset[0] = _mm_set1_epi8(SIG_COEF_CONTEXTS_2D + 0);
- pos_to_offset[1] = _mm_set1_epi8(SIG_COEF_CONTEXTS_2D + 5);
- pos_to_offset[2] = _mm_set1_epi8(SIG_COEF_CONTEXTS_2D + 10);
-
- do {
- int w = width;
-
- do {
- load_levels_16x1x5_sse2(levels, stride, offsets, level);
- count = get_coeff_contexts_kernel_sse2(level);
- count = _mm_add_epi8(count, pos_to_offset[0]);
- _mm_store_si128((__m128i *)coeff_contexts, count);
- levels += 16;
- coeff_contexts += 16;
- w -= 16;
- } while (w);
-
- pos_to_offset[0] = pos_to_offset[1];
- pos_to_offset[1] = pos_to_offset[2];
- levels += TX_PAD_HOR;
- } while (--row);
-}
-
-// Note: levels[] must be in the range [0, 127], inclusive.
-void av1_get_nz_map_contexts_sse2(const uint8_t *const levels,
- const int16_t *const scan, const uint16_t eob,
- const TX_SIZE tx_size,
- const TX_CLASS tx_class,
- int8_t *const coeff_contexts) {
- const int last_idx = eob - 1;
- if (!last_idx) {
- coeff_contexts[0] = 0;
- return;
- }
-
- const int real_width = tx_size_wide[tx_size];
- const int real_height = tx_size_high[tx_size];
- const int width = get_txb_wide(tx_size);
- const int height = get_txb_high(tx_size);
- const int stride = width + TX_PAD_HOR;
- ptrdiff_t offsets[3];
-
- /* coeff_contexts must be 16 byte aligned. */
- assert(!((intptr_t)coeff_contexts & 0xf));
-
- if (tx_class == TX_CLASS_2D) {
- offsets[0] = 0 * stride + 2;
- offsets[1] = 1 * stride + 1;
- offsets[2] = 2 * stride + 0;
-
- if (width == 4) {
- get_4_nz_map_contexts_2d(levels, height, offsets, coeff_contexts);
- } else if (width == 8) {
- get_8_coeff_contexts_2d(levels, height, offsets, coeff_contexts);
- } else if (width == 16) {
- get_16n_coeff_contexts_2d(levels, real_width, real_height, width, height,
- offsets, coeff_contexts);
- } else {
- get_16n_coeff_contexts_2d(levels, real_width, real_height, width, height,
- offsets, coeff_contexts);
- }
- } else if (tx_class == TX_CLASS_HORIZ) {
- offsets[0] = 2;
- offsets[1] = 3;
- offsets[2] = 4;
- if (width == 4) {
- get_4_nz_map_contexts_hor(levels, height, offsets, coeff_contexts);
- } else if (width == 8) {
- get_8_coeff_contexts_hor(levels, height, offsets, coeff_contexts);
- } else {
- get_16n_coeff_contexts_hor(levels, width, height, offsets,
- coeff_contexts);
- }
- } else { // TX_CLASS_VERT
- offsets[0] = 2 * stride;
- offsets[1] = 3 * stride;
- offsets[2] = 4 * stride;
- if (width == 4) {
- get_4_nz_map_contexts_ver(levels, height, offsets, coeff_contexts);
- } else if (width == 8) {
- get_8_coeff_contexts_ver(levels, height, offsets, coeff_contexts);
- } else {
- get_16n_coeff_contexts_ver(levels, width, height, offsets,
- coeff_contexts);
- }
- }
-
- const int bwl = get_txb_bwl(tx_size);
- const int pos = scan[last_idx];
- if (last_idx <= (height << bwl) / 8)
- coeff_contexts[pos] = 1;
- else if (last_idx <= (height << bwl) / 4)
- coeff_contexts[pos] = 2;
- else
- coeff_contexts[pos] = 3;
-}
diff --git a/third_party/aom/av1/encoder/x86/encodetxb_sse4.c b/third_party/aom/av1/encoder/x86/encodetxb_sse4.c
deleted file mode 100644
index 5e0687cd3..000000000
--- a/third_party/aom/av1/encoder/x86/encodetxb_sse4.c
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <emmintrin.h> // SSE2
-#include <smmintrin.h> /* SSE4.1 */
-
-#include "aom/aom_integer.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/common/txb_common.h"
-#include "aom_dsp/x86/synonyms.h"
-
-void av1_txb_init_levels_sse4_1(const tran_low_t *const coeff, const int width,
- const int height, uint8_t *const levels) {
- const int stride = width + TX_PAD_HOR;
- const __m128i zeros = _mm_setzero_si128();
-
- const int32_t pre_len = sizeof(*levels) * TX_PAD_TOP * stride;
- uint8_t *pre_buf = levels - TX_PAD_TOP * stride;
- uint8_t *pre_buf_end = pre_buf + pre_len;
- do {
- _mm_storeu_si128((__m128i *)(pre_buf), zeros);
- pre_buf += 16;
- } while (pre_buf < pre_buf_end);
-
- const int32_t bottom_len = sizeof(*levels) * (TX_PAD_BOTTOM * stride);
- uint8_t *bottom_buf = levels + stride * height;
- uint8_t *bottom_buf_end = bottom_buf + bottom_len;
- do {
- _mm_storeu_si128((__m128i *)(bottom_buf), zeros);
- bottom_buf += 16;
- } while (bottom_buf < bottom_buf_end);
-
- int i = 0;
- uint8_t *ls = levels;
- const tran_low_t *cf = coeff;
- if (width == 4) {
- do {
- const __m128i coeffA = xx_loadu_128(cf);
- const __m128i coeffB = xx_loadu_128(cf + 4);
- const __m128i coeffAB = _mm_packs_epi32(coeffA, coeffB);
- const __m128i absAB = _mm_abs_epi16(coeffAB);
- const __m128i absAB8 = _mm_packs_epi16(absAB, zeros);
- const __m128i lsAB = _mm_unpacklo_epi32(absAB8, zeros);
- xx_storeu_128(ls, lsAB);
- ls += (stride << 1);
- cf += (width << 1);
- i += 2;
- } while (i < height);
- } else if (width == 8) {
- do {
- const __m128i coeffA = xx_loadu_128(cf);
- const __m128i coeffB = xx_loadu_128(cf + 4);
- const __m128i coeffAB = _mm_packs_epi32(coeffA, coeffB);
- const __m128i absAB = _mm_abs_epi16(coeffAB);
- const __m128i absAB8 = _mm_packs_epi16(absAB, zeros);
- xx_storeu_128(ls, absAB8);
- ls += stride;
- cf += width;
- i += 1;
- } while (i < height);
- } else {
- do {
- int j = 0;
- do {
- const __m128i coeffA = xx_loadu_128(cf);
- const __m128i coeffB = xx_loadu_128(cf + 4);
- const __m128i coeffC = xx_loadu_128(cf + 8);
- const __m128i coeffD = xx_loadu_128(cf + 12);
- const __m128i coeffAB = _mm_packs_epi32(coeffA, coeffB);
- const __m128i coeffCD = _mm_packs_epi32(coeffC, coeffD);
- const __m128i absAB = _mm_abs_epi16(coeffAB);
- const __m128i absCD = _mm_abs_epi16(coeffCD);
- const __m128i absABCD = _mm_packs_epi16(absAB, absCD);
- xx_storeu_128(ls + j, absABCD);
- j += 16;
- cf += 16;
- } while (j < width);
- *(int32_t *)(ls + width) = 0;
- ls += stride;
- i += 1;
- } while (i < height);
- }
-}
diff --git a/third_party/aom/av1/encoder/x86/error_intrin_avx2.c b/third_party/aom/av1/encoder/x86/error_intrin_avx2.c
deleted file mode 100644
index 7d4f69585..000000000
--- a/third_party/aom/av1/encoder/x86/error_intrin_avx2.c
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <immintrin.h> // AVX2
-
-#include "config/av1_rtcd.h"
-
-#include "aom/aom_integer.h"
-
-static INLINE void read_coeff(const tran_low_t *coeff, intptr_t offset,
- __m256i *c) {
- const tran_low_t *addr = coeff + offset;
-
- if (sizeof(tran_low_t) == 4) {
- const __m256i x0 = _mm256_loadu_si256((const __m256i *)addr);
- const __m256i x1 = _mm256_loadu_si256((const __m256i *)addr + 1);
- const __m256i y = _mm256_packs_epi32(x0, x1);
- *c = _mm256_permute4x64_epi64(y, 0xD8);
- } else {
- *c = _mm256_loadu_si256((const __m256i *)addr);
- }
-}
-
-int64_t av1_block_error_avx2(const tran_low_t *coeff, const tran_low_t *dqcoeff,
- intptr_t block_size, int64_t *ssz) {
- __m256i sse_reg, ssz_reg, coeff_reg, dqcoeff_reg;
- __m256i exp_dqcoeff_lo, exp_dqcoeff_hi, exp_coeff_lo, exp_coeff_hi;
- __m256i sse_reg_64hi, ssz_reg_64hi;
- __m128i sse_reg128, ssz_reg128;
- int64_t sse;
- int i;
- const __m256i zero_reg = _mm256_setzero_si256();
-
- // init sse and ssz registerd to zero
- sse_reg = _mm256_setzero_si256();
- ssz_reg = _mm256_setzero_si256();
-
- for (i = 0; i < block_size; i += 16) {
- // load 32 bytes from coeff and dqcoeff
- read_coeff(coeff, i, &coeff_reg);
- read_coeff(dqcoeff, i, &dqcoeff_reg);
- // dqcoeff - coeff
- dqcoeff_reg = _mm256_sub_epi16(dqcoeff_reg, coeff_reg);
- // madd (dqcoeff - coeff)
- dqcoeff_reg = _mm256_madd_epi16(dqcoeff_reg, dqcoeff_reg);
- // madd coeff
- coeff_reg = _mm256_madd_epi16(coeff_reg, coeff_reg);
- // expand each double word of madd (dqcoeff - coeff) to quad word
- exp_dqcoeff_lo = _mm256_unpacklo_epi32(dqcoeff_reg, zero_reg);
- exp_dqcoeff_hi = _mm256_unpackhi_epi32(dqcoeff_reg, zero_reg);
- // expand each double word of madd (coeff) to quad word
- exp_coeff_lo = _mm256_unpacklo_epi32(coeff_reg, zero_reg);
- exp_coeff_hi = _mm256_unpackhi_epi32(coeff_reg, zero_reg);
- // add each quad word of madd (dqcoeff - coeff) and madd (coeff)
- sse_reg = _mm256_add_epi64(sse_reg, exp_dqcoeff_lo);
- ssz_reg = _mm256_add_epi64(ssz_reg, exp_coeff_lo);
- sse_reg = _mm256_add_epi64(sse_reg, exp_dqcoeff_hi);
- ssz_reg = _mm256_add_epi64(ssz_reg, exp_coeff_hi);
- }
- // save the higher 64 bit of each 128 bit lane
- sse_reg_64hi = _mm256_srli_si256(sse_reg, 8);
- ssz_reg_64hi = _mm256_srli_si256(ssz_reg, 8);
- // add the higher 64 bit to the low 64 bit
- sse_reg = _mm256_add_epi64(sse_reg, sse_reg_64hi);
- ssz_reg = _mm256_add_epi64(ssz_reg, ssz_reg_64hi);
-
- // add each 64 bit from each of the 128 bit lane of the 256 bit
- sse_reg128 = _mm_add_epi64(_mm256_castsi256_si128(sse_reg),
- _mm256_extractf128_si256(sse_reg, 1));
-
- ssz_reg128 = _mm_add_epi64(_mm256_castsi256_si128(ssz_reg),
- _mm256_extractf128_si256(ssz_reg, 1));
-
- // store the results
- _mm_storel_epi64((__m128i *)(&sse), sse_reg128);
-
- _mm_storel_epi64((__m128i *)(ssz), ssz_reg128);
- _mm256_zeroupper();
- return sse;
-}
diff --git a/third_party/aom/av1/encoder/x86/error_sse2.asm b/third_party/aom/av1/encoder/x86/error_sse2.asm
deleted file mode 100644
index 72e9e22b1..000000000
--- a/third_party/aom/av1/encoder/x86/error_sse2.asm
+++ /dev/null
@@ -1,79 +0,0 @@
-;
-; Copyright (c) 2016, Alliance for Open Media. All rights reserved
-;
-; This source code is subject to the terms of the BSD 2 Clause License and
-; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-; was not distributed with this source code in the LICENSE file, you can
-; obtain it at www.aomedia.org/license/software. If the Alliance for Open
-; Media Patent License 1.0 was not distributed with this source code in the
-; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-;
-
-;
-
-%define private_prefix av1
-
-%include "third_party/x86inc/x86inc.asm"
-
-SECTION .text
-
-; int64_t av1_block_error(int16_t *coeff, int16_t *dqcoeff, intptr_t block_size,
-; int64_t *ssz)
-
-INIT_XMM sse2
-cglobal block_error, 3, 3, 8, uqc, dqc, size, ssz
- pxor m4, m4 ; sse accumulator
- pxor m6, m6 ; ssz accumulator
- pxor m5, m5 ; dedicated zero register
- lea uqcq, [uqcq+sizeq*2]
- lea dqcq, [dqcq+sizeq*2]
- neg sizeq
-.loop:
- mova m2, [uqcq+sizeq*2]
- mova m0, [dqcq+sizeq*2]
- mova m3, [uqcq+sizeq*2+mmsize]
- mova m1, [dqcq+sizeq*2+mmsize]
- psubw m0, m2
- psubw m1, m3
- ; individual errors are max. 15bit+sign, so squares are 30bit, and
- ; thus the sum of 2 should fit in a 31bit integer (+ unused sign bit)
- pmaddwd m0, m0
- pmaddwd m1, m1
- pmaddwd m2, m2
- pmaddwd m3, m3
- ; accumulate in 64bit
- punpckldq m7, m0, m5
- punpckhdq m0, m5
- paddq m4, m7
- punpckldq m7, m1, m5
- paddq m4, m0
- punpckhdq m1, m5
- paddq m4, m7
- punpckldq m7, m2, m5
- paddq m4, m1
- punpckhdq m2, m5
- paddq m6, m7
- punpckldq m7, m3, m5
- paddq m6, m2
- punpckhdq m3, m5
- paddq m6, m7
- paddq m6, m3
- add sizeq, mmsize
- jl .loop
-
- ; accumulate horizontally and store in return value
- movhlps m5, m4
- movhlps m7, m6
- paddq m4, m5
- paddq m6, m7
-%if ARCH_X86_64
- movq rax, m4
- movq [sszq], m6
-%else
- mov eax, sszm
- pshufd m5, m4, 0x1
- movq [eax], m6
- movd eax, m4
- movd edx, m5
-%endif
- RET
diff --git a/third_party/aom/av1/encoder/x86/hash_sse42.c b/third_party/aom/av1/encoder/x86/hash_sse42.c
deleted file mode 100644
index 65fa46311..000000000
--- a/third_party/aom/av1/encoder/x86/hash_sse42.c
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <stdint.h>
-#include <smmintrin.h>
-
-// Byte-boundary alignment issues
-#define ALIGN_SIZE 8
-#define ALIGN_MASK (ALIGN_SIZE - 1)
-
-#define CALC_CRC(op, crc, type, buf, len) \
- while ((len) >= sizeof(type)) { \
- (crc) = op((crc), *(type *)(buf)); \
- (len) -= sizeof(type); \
- buf += sizeof(type); \
- }
-
-/**
- * Calculates 32-bit CRC for the input buffer
- * polynomial is 0x11EDC6F41
- * @return A 32-bit unsigned integer representing the CRC
- */
-uint32_t av1_get_crc32c_value_sse4_2(void *crc_calculator, uint8_t *p,
- size_t len) {
- (void)crc_calculator;
- const uint8_t *buf = p;
- uint32_t crc = 0xFFFFFFFF;
-
- // Align the input to the word boundary
- for (; (len > 0) && ((intptr_t)buf & ALIGN_MASK); len--, buf++) {
- crc = _mm_crc32_u8(crc, *buf);
- }
-
-#ifdef __x86_64__
- uint64_t crc64 = crc;
- CALC_CRC(_mm_crc32_u64, crc64, uint64_t, buf, len);
- crc = (uint32_t)crc64;
-#endif
- CALC_CRC(_mm_crc32_u32, crc, uint32_t, buf, len);
- CALC_CRC(_mm_crc32_u16, crc, uint16_t, buf, len);
- CALC_CRC(_mm_crc32_u8, crc, uint8_t, buf, len);
- return (crc ^= 0xFFFFFFFF);
-}
diff --git a/third_party/aom/av1/encoder/x86/highbd_block_error_intrin_sse2.c b/third_party/aom/av1/encoder/x86/highbd_block_error_intrin_sse2.c
deleted file mode 100644
index 777304ace..000000000
--- a/third_party/aom/av1/encoder/x86/highbd_block_error_intrin_sse2.c
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <emmintrin.h>
-#include <stdio.h>
-
-#include "av1/common/common.h"
-
-int64_t av1_highbd_block_error_sse2(tran_low_t *coeff, tran_low_t *dqcoeff,
- intptr_t block_size, int64_t *ssz,
- int bps) {
- int i, j, test;
- uint32_t temp[4];
- __m128i max, min, cmp0, cmp1, cmp2, cmp3;
- int64_t error = 0, sqcoeff = 0;
- const int shift = 2 * (bps - 8);
- const int rounding = shift > 0 ? 1 << (shift - 1) : 0;
-
- for (i = 0; i < block_size; i += 8) {
- // Load the data into xmm registers
- __m128i mm_coeff = _mm_load_si128((__m128i *)(coeff + i));
- __m128i mm_coeff2 = _mm_load_si128((__m128i *)(coeff + i + 4));
- __m128i mm_dqcoeff = _mm_load_si128((__m128i *)(dqcoeff + i));
- __m128i mm_dqcoeff2 = _mm_load_si128((__m128i *)(dqcoeff + i + 4));
- // Check if any values require more than 15 bit
- max = _mm_set1_epi32(0x3fff);
- min = _mm_set1_epi32(0xffffc000);
- cmp0 = _mm_xor_si128(_mm_cmpgt_epi32(mm_coeff, max),
- _mm_cmplt_epi32(mm_coeff, min));
- cmp1 = _mm_xor_si128(_mm_cmpgt_epi32(mm_coeff2, max),
- _mm_cmplt_epi32(mm_coeff2, min));
- cmp2 = _mm_xor_si128(_mm_cmpgt_epi32(mm_dqcoeff, max),
- _mm_cmplt_epi32(mm_dqcoeff, min));
- cmp3 = _mm_xor_si128(_mm_cmpgt_epi32(mm_dqcoeff2, max),
- _mm_cmplt_epi32(mm_dqcoeff2, min));
- test = _mm_movemask_epi8(
- _mm_or_si128(_mm_or_si128(cmp0, cmp1), _mm_or_si128(cmp2, cmp3)));
-
- if (!test) {
- __m128i mm_diff, error_sse2, sqcoeff_sse2;
- mm_coeff = _mm_packs_epi32(mm_coeff, mm_coeff2);
- mm_dqcoeff = _mm_packs_epi32(mm_dqcoeff, mm_dqcoeff2);
- mm_diff = _mm_sub_epi16(mm_coeff, mm_dqcoeff);
- error_sse2 = _mm_madd_epi16(mm_diff, mm_diff);
- sqcoeff_sse2 = _mm_madd_epi16(mm_coeff, mm_coeff);
- _mm_storeu_si128((__m128i *)temp, error_sse2);
- error = error + temp[0] + temp[1] + temp[2] + temp[3];
- _mm_storeu_si128((__m128i *)temp, sqcoeff_sse2);
- sqcoeff += temp[0] + temp[1] + temp[2] + temp[3];
- } else {
- for (j = 0; j < 8; j++) {
- const int64_t diff = coeff[i + j] - dqcoeff[i + j];
- error += diff * diff;
- sqcoeff += (int64_t)coeff[i + j] * (int64_t)coeff[i + j];
- }
- }
- }
- assert(error >= 0 && sqcoeff >= 0);
- error = (error + rounding) >> shift;
- sqcoeff = (sqcoeff + rounding) >> shift;
-
- *ssz = sqcoeff;
- return error;
-}
diff --git a/third_party/aom/av1/encoder/x86/highbd_fwd_txfm_sse4.c b/third_party/aom/av1/encoder/x86/highbd_fwd_txfm_sse4.c
deleted file mode 100644
index 535485ae8..000000000
--- a/third_party/aom/av1/encoder/x86/highbd_fwd_txfm_sse4.c
+++ /dev/null
@@ -1,1783 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#include <assert.h>
-#include <smmintrin.h> /* SSE4.1 */
-
-#include "config/aom_config.h"
-#include "config/av1_rtcd.h"
-
-#include "av1/common/av1_txfm.h"
-#include "av1/common/x86/highbd_txfm_utility_sse4.h"
-#include "av1/encoder/av1_fwd_txfm1d_cfg.h"
-#include "av1/encoder/x86/av1_txfm1d_sse4.h"
-#include "aom_dsp/txfm_common.h"
-#include "aom_dsp/x86/txfm_common_sse2.h"
-#include "aom_ports/mem.h"
-
-static INLINE void load_buffer_4x4(const int16_t *input, __m128i *in,
- int stride, int flipud, int fliplr,
- int shift) {
- if (!flipud) {
- in[0] = _mm_loadl_epi64((const __m128i *)(input + 0 * stride));
- in[1] = _mm_loadl_epi64((const __m128i *)(input + 1 * stride));
- in[2] = _mm_loadl_epi64((const __m128i *)(input + 2 * stride));
- in[3] = _mm_loadl_epi64((const __m128i *)(input + 3 * stride));
- } else {
- in[0] = _mm_loadl_epi64((const __m128i *)(input + 3 * stride));
- in[1] = _mm_loadl_epi64((const __m128i *)(input + 2 * stride));
- in[2] = _mm_loadl_epi64((const __m128i *)(input + 1 * stride));
- in[3] = _mm_loadl_epi64((const __m128i *)(input + 0 * stride));
- }
-
- if (fliplr) {
- in[0] = _mm_shufflelo_epi16(in[0], 0x1b);
- in[1] = _mm_shufflelo_epi16(in[1], 0x1b);
- in[2] = _mm_shufflelo_epi16(in[2], 0x1b);
- in[3] = _mm_shufflelo_epi16(in[3], 0x1b);
- }
-
- in[0] = _mm_cvtepi16_epi32(in[0]);
- in[1] = _mm_cvtepi16_epi32(in[1]);
- in[2] = _mm_cvtepi16_epi32(in[2]);
- in[3] = _mm_cvtepi16_epi32(in[3]);
-
- in[0] = _mm_slli_epi32(in[0], shift);
- in[1] = _mm_slli_epi32(in[1], shift);
- in[2] = _mm_slli_epi32(in[2], shift);
- in[3] = _mm_slli_epi32(in[3], shift);
-}
-
-// We only use stage-2 bit;
-// shift[0] is used in load_buffer_4x4()
-// shift[1] is used in txfm_func_col()
-// shift[2] is used in txfm_func_row()
-static void fdct4x4_sse4_1(__m128i *in, int bit) {
- const int32_t *cospi = cospi_arr(bit);
- const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
- const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
- const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
- const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
- __m128i s0, s1, s2, s3;
- __m128i u0, u1, u2, u3;
- __m128i v0, v1, v2, v3;
-
- s0 = _mm_add_epi32(in[0], in[3]);
- s1 = _mm_add_epi32(in[1], in[2]);
- s2 = _mm_sub_epi32(in[1], in[2]);
- s3 = _mm_sub_epi32(in[0], in[3]);
-
- // btf_32_sse4_1_type0(cospi32, cospi32, s[01], u[02], bit);
- u0 = _mm_mullo_epi32(s0, cospi32);
- u1 = _mm_mullo_epi32(s1, cospi32);
- u2 = _mm_add_epi32(u0, u1);
- v0 = _mm_sub_epi32(u0, u1);
-
- u3 = _mm_add_epi32(u2, rnding);
- v1 = _mm_add_epi32(v0, rnding);
-
- u0 = _mm_srai_epi32(u3, bit);
- u2 = _mm_srai_epi32(v1, bit);
-
- // btf_32_sse4_1_type1(cospi48, cospi16, s[23], u[13], bit);
- v0 = _mm_mullo_epi32(s2, cospi48);
- v1 = _mm_mullo_epi32(s3, cospi16);
- v2 = _mm_add_epi32(v0, v1);
-
- v3 = _mm_add_epi32(v2, rnding);
- u1 = _mm_srai_epi32(v3, bit);
-
- v0 = _mm_mullo_epi32(s2, cospi16);
- v1 = _mm_mullo_epi32(s3, cospi48);
- v2 = _mm_sub_epi32(v1, v0);
-
- v3 = _mm_add_epi32(v2, rnding);
- u3 = _mm_srai_epi32(v3, bit);
-
- // Note: shift[1] and shift[2] are zeros
-
- // Transpose 4x4 32-bit
- v0 = _mm_unpacklo_epi32(u0, u1);
- v1 = _mm_unpackhi_epi32(u0, u1);
- v2 = _mm_unpacklo_epi32(u2, u3);
- v3 = _mm_unpackhi_epi32(u2, u3);
-
- in[0] = _mm_unpacklo_epi64(v0, v2);
- in[1] = _mm_unpackhi_epi64(v0, v2);
- in[2] = _mm_unpacklo_epi64(v1, v3);
- in[3] = _mm_unpackhi_epi64(v1, v3);
-}
-
-static INLINE void write_buffer_4x4(__m128i *res, int32_t *output) {
- _mm_store_si128((__m128i *)(output + 0 * 4), res[0]);
- _mm_store_si128((__m128i *)(output + 1 * 4), res[1]);
- _mm_store_si128((__m128i *)(output + 2 * 4), res[2]);
- _mm_store_si128((__m128i *)(output + 3 * 4), res[3]);
-}
-
-static void fadst4x4_sse4_1(__m128i *in, int bit) {
- const int32_t *sinpi = sinpi_arr(bit);
- const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
- const __m128i sinpi1 = _mm_set1_epi32((int)sinpi[1]);
- const __m128i sinpi2 = _mm_set1_epi32((int)sinpi[2]);
- const __m128i sinpi3 = _mm_set1_epi32((int)sinpi[3]);
- const __m128i sinpi4 = _mm_set1_epi32((int)sinpi[4]);
- __m128i t;
- __m128i s0, s1, s2, s3, s4, s5, s6, s7;
- __m128i x0, x1, x2, x3;
- __m128i u0, u1, u2, u3;
- __m128i v0, v1, v2, v3;
-
- s0 = _mm_mullo_epi32(in[0], sinpi1);
- s1 = _mm_mullo_epi32(in[0], sinpi4);
- s2 = _mm_mullo_epi32(in[1], sinpi2);
- s3 = _mm_mullo_epi32(in[1], sinpi1);
- s4 = _mm_mullo_epi32(in[2], sinpi3);
- s5 = _mm_mullo_epi32(in[3], sinpi4);
- s6 = _mm_mullo_epi32(in[3], sinpi2);
- t = _mm_add_epi32(in[0], in[1]);
- s7 = _mm_sub_epi32(t, in[3]);
-
- t = _mm_add_epi32(s0, s2);
- x0 = _mm_add_epi32(t, s5);
- x1 = _mm_mullo_epi32(s7, sinpi3);
- t = _mm_sub_epi32(s1, s3);
- x2 = _mm_add_epi32(t, s6);
- x3 = s4;
-
- s0 = _mm_add_epi32(x0, x3);
- s1 = x1;
- s2 = _mm_sub_epi32(x2, x3);
- t = _mm_sub_epi32(x2, x0);
- s3 = _mm_add_epi32(t, x3);
-
- u0 = _mm_add_epi32(s0, rnding);
- u0 = _mm_srai_epi32(u0, bit);
-
- u1 = _mm_add_epi32(s1, rnding);
- u1 = _mm_srai_epi32(u1, bit);
-
- u2 = _mm_add_epi32(s2, rnding);
- u2 = _mm_srai_epi32(u2, bit);
-
- u3 = _mm_add_epi32(s3, rnding);
- u3 = _mm_srai_epi32(u3, bit);
-
- v0 = _mm_unpacklo_epi32(u0, u1);
- v1 = _mm_unpackhi_epi32(u0, u1);
- v2 = _mm_unpacklo_epi32(u2, u3);
- v3 = _mm_unpackhi_epi32(u2, u3);
-
- in[0] = _mm_unpacklo_epi64(v0, v2);
- in[1] = _mm_unpackhi_epi64(v0, v2);
- in[2] = _mm_unpacklo_epi64(v1, v3);
- in[3] = _mm_unpackhi_epi64(v1, v3);
-}
-
-void av1_fwd_txfm2d_4x4_sse4_1(const int16_t *input, int32_t *coeff,
- int input_stride, TX_TYPE tx_type, int bd) {
- __m128i in[4];
- const int8_t *shift = fwd_txfm_shift_ls[TX_4X4];
- const int txw_idx = get_txw_idx(TX_4X4);
- const int txh_idx = get_txh_idx(TX_4X4);
-
- switch (tx_type) {
- case DCT_DCT:
- load_buffer_4x4(input, in, input_stride, 0, 0, shift[0]);
- fdct4x4_sse4_1(in, fwd_cos_bit_col[txw_idx][txh_idx]);
- fdct4x4_sse4_1(in, fwd_cos_bit_row[txw_idx][txh_idx]);
- write_buffer_4x4(in, coeff);
- break;
- case ADST_DCT:
- load_buffer_4x4(input, in, input_stride, 0, 0, shift[0]);
- fadst4x4_sse4_1(in, fwd_cos_bit_col[txw_idx][txh_idx]);
- fdct4x4_sse4_1(in, fwd_cos_bit_row[txw_idx][txh_idx]);
- write_buffer_4x4(in, coeff);
- break;
- case DCT_ADST:
- load_buffer_4x4(input, in, input_stride, 0, 0, shift[0]);
- fdct4x4_sse4_1(in, fwd_cos_bit_col[txw_idx][txh_idx]);
- fadst4x4_sse4_1(in, fwd_cos_bit_row[txw_idx][txh_idx]);
- write_buffer_4x4(in, coeff);
- break;
- case ADST_ADST:
- load_buffer_4x4(input, in, input_stride, 0, 0, shift[0]);
- fadst4x4_sse4_1(in, fwd_cos_bit_col[txw_idx][txh_idx]);
- fadst4x4_sse4_1(in, fwd_cos_bit_row[txw_idx][txh_idx]);
- write_buffer_4x4(in, coeff);
- break;
- case FLIPADST_DCT:
- load_buffer_4x4(input, in, input_stride, 1, 0, shift[0]);
- fadst4x4_sse4_1(in, fwd_cos_bit_col[txw_idx][txh_idx]);
- fdct4x4_sse4_1(in, fwd_cos_bit_row[txw_idx][txh_idx]);
- write_buffer_4x4(in, coeff);
- break;
- case DCT_FLIPADST:
- load_buffer_4x4(input, in, input_stride, 0, 1, shift[0]);
- fdct4x4_sse4_1(in, fwd_cos_bit_col[txw_idx][txh_idx]);
- fadst4x4_sse4_1(in, fwd_cos_bit_row[txw_idx][txh_idx]);
- write_buffer_4x4(in, coeff);
- break;
- case FLIPADST_FLIPADST:
- load_buffer_4x4(input, in, input_stride, 1, 1, shift[0]);
- fadst4x4_sse4_1(in, fwd_cos_bit_col[txw_idx][txh_idx]);
- fadst4x4_sse4_1(in, fwd_cos_bit_row[txw_idx][txh_idx]);
- write_buffer_4x4(in, coeff);
- break;
- case ADST_FLIPADST:
- load_buffer_4x4(input, in, input_stride, 0, 1, shift[0]);
- fadst4x4_sse4_1(in, fwd_cos_bit_col[txw_idx][txh_idx]);
- fadst4x4_sse4_1(in, fwd_cos_bit_row[txw_idx][txh_idx]);
- write_buffer_4x4(in, coeff);
- break;
- case FLIPADST_ADST:
- load_buffer_4x4(input, in, input_stride, 1, 0, shift[0]);
- fadst4x4_sse4_1(in, fwd_cos_bit_col[txw_idx][txh_idx]);
- fadst4x4_sse4_1(in, fwd_cos_bit_row[txw_idx][txh_idx]);
- write_buffer_4x4(in, coeff);
- break;
- default: assert(0);
- }
- (void)bd;
-}
-
-static INLINE void load_buffer_8x8(const int16_t *input, __m128i *in,
- int stride, int flipud, int fliplr,
- int shift) {
- __m128i u;
- if (!flipud) {
- in[0] = _mm_load_si128((const __m128i *)(input + 0 * stride));
- in[1] = _mm_load_si128((const __m128i *)(input + 1 * stride));
- in[2] = _mm_load_si128((const __m128i *)(input + 2 * stride));
- in[3] = _mm_load_si128((const __m128i *)(input + 3 * stride));
- in[4] = _mm_load_si128((const __m128i *)(input + 4 * stride));
- in[5] = _mm_load_si128((const __m128i *)(input + 5 * stride));
- in[6] = _mm_load_si128((const __m128i *)(input + 6 * stride));
- in[7] = _mm_load_si128((const __m128i *)(input + 7 * stride));
- } else {
- in[0] = _mm_load_si128((const __m128i *)(input + 7 * stride));
- in[1] = _mm_load_si128((const __m128i *)(input + 6 * stride));
- in[2] = _mm_load_si128((const __m128i *)(input + 5 * stride));
- in[3] = _mm_load_si128((const __m128i *)(input + 4 * stride));
- in[4] = _mm_load_si128((const __m128i *)(input + 3 * stride));
- in[5] = _mm_load_si128((const __m128i *)(input + 2 * stride));
- in[6] = _mm_load_si128((const __m128i *)(input + 1 * stride));
- in[7] = _mm_load_si128((const __m128i *)(input + 0 * stride));
- }
-
- if (fliplr) {
- in[0] = mm_reverse_epi16(in[0]);
- in[1] = mm_reverse_epi16(in[1]);
- in[2] = mm_reverse_epi16(in[2]);
- in[3] = mm_reverse_epi16(in[3]);
- in[4] = mm_reverse_epi16(in[4]);
- in[5] = mm_reverse_epi16(in[5]);
- in[6] = mm_reverse_epi16(in[6]);
- in[7] = mm_reverse_epi16(in[7]);
- }
-
- u = _mm_unpackhi_epi64(in[4], in[4]);
- in[8] = _mm_cvtepi16_epi32(in[4]);
- in[9] = _mm_cvtepi16_epi32(u);
-
- u = _mm_unpackhi_epi64(in[5], in[5]);
- in[10] = _mm_cvtepi16_epi32(in[5]);
- in[11] = _mm_cvtepi16_epi32(u);
-
- u = _mm_unpackhi_epi64(in[6], in[6]);
- in[12] = _mm_cvtepi16_epi32(in[6]);
- in[13] = _mm_cvtepi16_epi32(u);
-
- u = _mm_unpackhi_epi64(in[7], in[7]);
- in[14] = _mm_cvtepi16_epi32(in[7]);
- in[15] = _mm_cvtepi16_epi32(u);
-
- u = _mm_unpackhi_epi64(in[3], in[3]);
- in[6] = _mm_cvtepi16_epi32(in[3]);
- in[7] = _mm_cvtepi16_epi32(u);
-
- u = _mm_unpackhi_epi64(in[2], in[2]);
- in[4] = _mm_cvtepi16_epi32(in[2]);
- in[5] = _mm_cvtepi16_epi32(u);
-
- u = _mm_unpackhi_epi64(in[1], in[1]);
- in[2] = _mm_cvtepi16_epi32(in[1]);
- in[3] = _mm_cvtepi16_epi32(u);
-
- u = _mm_unpackhi_epi64(in[0], in[0]);
- in[0] = _mm_cvtepi16_epi32(in[0]);
- in[1] = _mm_cvtepi16_epi32(u);
-
- in[0] = _mm_slli_epi32(in[0], shift);
- in[1] = _mm_slli_epi32(in[1], shift);
- in[2] = _mm_slli_epi32(in[2], shift);
- in[3] = _mm_slli_epi32(in[3], shift);
- in[4] = _mm_slli_epi32(in[4], shift);
- in[5] = _mm_slli_epi32(in[5], shift);
- in[6] = _mm_slli_epi32(in[6], shift);
- in[7] = _mm_slli_epi32(in[7], shift);
-
- in[8] = _mm_slli_epi32(in[8], shift);
- in[9] = _mm_slli_epi32(in[9], shift);
- in[10] = _mm_slli_epi32(in[10], shift);
- in[11] = _mm_slli_epi32(in[11], shift);
- in[12] = _mm_slli_epi32(in[12], shift);
- in[13] = _mm_slli_epi32(in[13], shift);
- in[14] = _mm_slli_epi32(in[14], shift);
- in[15] = _mm_slli_epi32(in[15], shift);
-}
-
-static INLINE void col_txfm_8x8_rounding(__m128i *in, int shift) {
- const __m128i rounding = _mm_set1_epi32(1 << (shift - 1));
-
- in[0] = _mm_add_epi32(in[0], rounding);
- in[1] = _mm_add_epi32(in[1], rounding);
- in[2] = _mm_add_epi32(in[2], rounding);
- in[3] = _mm_add_epi32(in[3], rounding);
- in[4] = _mm_add_epi32(in[4], rounding);
- in[5] = _mm_add_epi32(in[5], rounding);
- in[6] = _mm_add_epi32(in[6], rounding);
- in[7] = _mm_add_epi32(in[7], rounding);
- in[8] = _mm_add_epi32(in[8], rounding);
- in[9] = _mm_add_epi32(in[9], rounding);
- in[10] = _mm_add_epi32(in[10], rounding);
- in[11] = _mm_add_epi32(in[11], rounding);
- in[12] = _mm_add_epi32(in[12], rounding);
- in[13] = _mm_add_epi32(in[13], rounding);
- in[14] = _mm_add_epi32(in[14], rounding);
- in[15] = _mm_add_epi32(in[15], rounding);
-
- in[0] = _mm_srai_epi32(in[0], shift);
- in[1] = _mm_srai_epi32(in[1], shift);
- in[2] = _mm_srai_epi32(in[2], shift);
- in[3] = _mm_srai_epi32(in[3], shift);
- in[4] = _mm_srai_epi32(in[4], shift);
- in[5] = _mm_srai_epi32(in[5], shift);
- in[6] = _mm_srai_epi32(in[6], shift);
- in[7] = _mm_srai_epi32(in[7], shift);
- in[8] = _mm_srai_epi32(in[8], shift);
- in[9] = _mm_srai_epi32(in[9], shift);
- in[10] = _mm_srai_epi32(in[10], shift);
- in[11] = _mm_srai_epi32(in[11], shift);
- in[12] = _mm_srai_epi32(in[12], shift);
- in[13] = _mm_srai_epi32(in[13], shift);
- in[14] = _mm_srai_epi32(in[14], shift);
- in[15] = _mm_srai_epi32(in[15], shift);
-}
-
-static INLINE void write_buffer_8x8(const __m128i *res, int32_t *output) {
- _mm_store_si128((__m128i *)(output + 0 * 4), res[0]);
- _mm_store_si128((__m128i *)(output + 1 * 4), res[1]);
- _mm_store_si128((__m128i *)(output + 2 * 4), res[2]);
- _mm_store_si128((__m128i *)(output + 3 * 4), res[3]);
-
- _mm_store_si128((__m128i *)(output + 4 * 4), res[4]);
- _mm_store_si128((__m128i *)(output + 5 * 4), res[5]);
- _mm_store_si128((__m128i *)(output + 6 * 4), res[6]);
- _mm_store_si128((__m128i *)(output + 7 * 4), res[7]);
-
- _mm_store_si128((__m128i *)(output + 8 * 4), res[8]);
- _mm_store_si128((__m128i *)(output + 9 * 4), res[9]);
- _mm_store_si128((__m128i *)(output + 10 * 4), res[10]);
- _mm_store_si128((__m128i *)(output + 11 * 4), res[11]);
-
- _mm_store_si128((__m128i *)(output + 12 * 4), res[12]);
- _mm_store_si128((__m128i *)(output + 13 * 4), res[13]);
- _mm_store_si128((__m128i *)(output + 14 * 4), res[14]);
- _mm_store_si128((__m128i *)(output + 15 * 4), res[15]);
-}
-
-static INLINE void write_buffer_16x8(const __m128i *res, int32_t *output,
- const int stride) {
- _mm_storeu_si128((__m128i *)(output), res[0]);
- _mm_storeu_si128((__m128i *)(output + 4), res[1]);
- _mm_storeu_si128((__m128i *)(output + stride), res[2]);
- _mm_storeu_si128((__m128i *)(output + stride + 4), res[3]);
-
- _mm_storeu_si128((__m128i *)(output + (stride * 2)), res[4]);
- _mm_storeu_si128((__m128i *)(output + (stride * 2) + 4), res[5]);
- _mm_storeu_si128((__m128i *)(output + (stride * 3)), res[6]);
- _mm_storeu_si128((__m128i *)(output + (stride * 3) + 4), res[7]);
-
- _mm_storeu_si128((__m128i *)(output + (stride * 4)), res[8]);
- _mm_storeu_si128((__m128i *)(output + (stride * 4) + 4), res[9]);
- _mm_storeu_si128((__m128i *)(output + (stride * 5)), res[10]);
- _mm_storeu_si128((__m128i *)(output + (stride * 5) + 4), res[11]);
-
- _mm_storeu_si128((__m128i *)(output + (stride * 6)), res[12]);
- _mm_storeu_si128((__m128i *)(output + (stride * 6) + 4), res[13]);
- _mm_storeu_si128((__m128i *)(output + (stride * 7)), res[14]);
- _mm_storeu_si128((__m128i *)(output + (stride * 7) + 4), res[15]);
-}
-
-static void fdct8x8_sse4_1(__m128i *in, __m128i *out, int bit,
- const int col_num) {
- (void)(col_num);
- const int32_t *cospi = cospi_arr(bit);
- const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
- const __m128i cospim32 = _mm_set1_epi32(-cospi[32]);
- const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
- const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
- const __m128i cospi56 = _mm_set1_epi32(cospi[56]);
- const __m128i cospi8 = _mm_set1_epi32(cospi[8]);
- const __m128i cospi24 = _mm_set1_epi32(cospi[24]);
- const __m128i cospi40 = _mm_set1_epi32(cospi[40]);
- const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
- __m128i u[8], v[8];
-
- // Even 8 points 0, 2, ..., 14
- // stage 0
- // stage 1
- u[0] = _mm_add_epi32(in[0], in[14]);
- v[7] = _mm_sub_epi32(in[0], in[14]); // v[7]
- u[1] = _mm_add_epi32(in[2], in[12]);
- u[6] = _mm_sub_epi32(in[2], in[12]);
- u[2] = _mm_add_epi32(in[4], in[10]);
- u[5] = _mm_sub_epi32(in[4], in[10]);
- u[3] = _mm_add_epi32(in[6], in[8]);
- v[4] = _mm_sub_epi32(in[6], in[8]); // v[4]
-
- // stage 2
- v[0] = _mm_add_epi32(u[0], u[3]);
- v[3] = _mm_sub_epi32(u[0], u[3]);
- v[1] = _mm_add_epi32(u[1], u[2]);
- v[2] = _mm_sub_epi32(u[1], u[2]);
-
- v[5] = _mm_mullo_epi32(u[5], cospim32);
- v[6] = _mm_mullo_epi32(u[6], cospi32);
- v[5] = _mm_add_epi32(v[5], v[6]);
- v[5] = _mm_add_epi32(v[5], rnding);
- v[5] = _mm_srai_epi32(v[5], bit);
-
- u[0] = _mm_mullo_epi32(u[5], cospi32);
- v[6] = _mm_mullo_epi32(u[6], cospim32);
- v[6] = _mm_sub_epi32(u[0], v[6]);
- v[6] = _mm_add_epi32(v[6], rnding);
- v[6] = _mm_srai_epi32(v[6], bit);
-
- // stage 3
- // type 0
- v[0] = _mm_mullo_epi32(v[0], cospi32);
- v[1] = _mm_mullo_epi32(v[1], cospi32);
- u[0] = _mm_add_epi32(v[0], v[1]);
- u[0] = _mm_add_epi32(u[0], rnding);
- u[0] = _mm_srai_epi32(u[0], bit);
-
- u[1] = _mm_sub_epi32(v[0], v[1]);
- u[1] = _mm_add_epi32(u[1], rnding);
- u[1] = _mm_srai_epi32(u[1], bit);
-
- // type 1
- v[0] = _mm_mullo_epi32(v[2], cospi48);
- v[1] = _mm_mullo_epi32(v[3], cospi16);
- u[2] = _mm_add_epi32(v[0], v[1]);
- u[2] = _mm_add_epi32(u[2], rnding);
- u[2] = _mm_srai_epi32(u[2], bit);
-
- v[0] = _mm_mullo_epi32(v[2], cospi16);
- v[1] = _mm_mullo_epi32(v[3], cospi48);
- u[3] = _mm_sub_epi32(v[1], v[0]);
- u[3] = _mm_add_epi32(u[3], rnding);
- u[3] = _mm_srai_epi32(u[3], bit);
-
- u[4] = _mm_add_epi32(v[4], v[5]);
- u[5] = _mm_sub_epi32(v[4], v[5]);
- u[6] = _mm_sub_epi32(v[7], v[6]);
- u[7] = _mm_add_epi32(v[7], v[6]);
-
- // stage 4
- // stage 5
- v[0] = _mm_mullo_epi32(u[4], cospi56);
- v[1] = _mm_mullo_epi32(u[7], cospi8);
- v[0] = _mm_add_epi32(v[0], v[1]);
- v[0] = _mm_add_epi32(v[0], rnding);
- out[2] = _mm_srai_epi32(v[0], bit); // buf0[4]
-
- v[0] = _mm_mullo_epi32(u[4], cospi8);
- v[1] = _mm_mullo_epi32(u[7], cospi56);
- v[0] = _mm_sub_epi32(v[1], v[0]);
- v[0] = _mm_add_epi32(v[0], rnding);
- out[14] = _mm_srai_epi32(v[0], bit); // buf0[7]
-
- v[0] = _mm_mullo_epi32(u[5], cospi24);
- v[1] = _mm_mullo_epi32(u[6], cospi40);
- v[0] = _mm_add_epi32(v[0], v[1]);
- v[0] = _mm_add_epi32(v[0], rnding);
- out[10] = _mm_srai_epi32(v[0], bit); // buf0[5]
-
- v[0] = _mm_mullo_epi32(u[5], cospi40);
- v[1] = _mm_mullo_epi32(u[6], cospi24);
- v[0] = _mm_sub_epi32(v[1], v[0]);
- v[0] = _mm_add_epi32(v[0], rnding);
- out[6] = _mm_srai_epi32(v[0], bit); // buf0[6]
-
- out[0] = u[0]; // buf0[0]
- out[8] = u[1]; // buf0[1]
- out[4] = u[2]; // buf0[2]
- out[12] = u[3]; // buf0[3]
-
- // Odd 8 points: 1, 3, ..., 15
- // stage 0
- // stage 1
- u[0] = _mm_add_epi32(in[1], in[15]);
- v[7] = _mm_sub_epi32(in[1], in[15]); // v[7]
- u[1] = _mm_add_epi32(in[3], in[13]);
- u[6] = _mm_sub_epi32(in[3], in[13]);
- u[2] = _mm_add_epi32(in[5], in[11]);
- u[5] = _mm_sub_epi32(in[5], in[11]);
- u[3] = _mm_add_epi32(in[7], in[9]);
- v[4] = _mm_sub_epi32(in[7], in[9]); // v[4]
-
- // stage 2
- v[0] = _mm_add_epi32(u[0], u[3]);
- v[3] = _mm_sub_epi32(u[0], u[3]);
- v[1] = _mm_add_epi32(u[1], u[2]);
- v[2] = _mm_sub_epi32(u[1], u[2]);
-
- v[5] = _mm_mullo_epi32(u[5], cospim32);
- v[6] = _mm_mullo_epi32(u[6], cospi32);
- v[5] = _mm_add_epi32(v[5], v[6]);
- v[5] = _mm_add_epi32(v[5], rnding);
- v[5] = _mm_srai_epi32(v[5], bit);
-
- u[0] = _mm_mullo_epi32(u[5], cospi32);
- v[6] = _mm_mullo_epi32(u[6], cospim32);
- v[6] = _mm_sub_epi32(u[0], v[6]);
- v[6] = _mm_add_epi32(v[6], rnding);
- v[6] = _mm_srai_epi32(v[6], bit);
-
- // stage 3
- // type 0
- v[0] = _mm_mullo_epi32(v[0], cospi32);
- v[1] = _mm_mullo_epi32(v[1], cospi32);
- u[0] = _mm_add_epi32(v[0], v[1]);
- u[0] = _mm_add_epi32(u[0], rnding);
- u[0] = _mm_srai_epi32(u[0], bit);
-
- u[1] = _mm_sub_epi32(v[0], v[1]);
- u[1] = _mm_add_epi32(u[1], rnding);
- u[1] = _mm_srai_epi32(u[1], bit);
-
- // type 1
- v[0] = _mm_mullo_epi32(v[2], cospi48);
- v[1] = _mm_mullo_epi32(v[3], cospi16);
- u[2] = _mm_add_epi32(v[0], v[1]);
- u[2] = _mm_add_epi32(u[2], rnding);
- u[2] = _mm_srai_epi32(u[2], bit);
-
- v[0] = _mm_mullo_epi32(v[2], cospi16);
- v[1] = _mm_mullo_epi32(v[3], cospi48);
- u[3] = _mm_sub_epi32(v[1], v[0]);
- u[3] = _mm_add_epi32(u[3], rnding);
- u[3] = _mm_srai_epi32(u[3], bit);
-
- u[4] = _mm_add_epi32(v[4], v[5]);
- u[5] = _mm_sub_epi32(v[4], v[5]);
- u[6] = _mm_sub_epi32(v[7], v[6]);
- u[7] = _mm_add_epi32(v[7], v[6]);
-
- // stage 4
- // stage 5
- v[0] = _mm_mullo_epi32(u[4], cospi56);
- v[1] = _mm_mullo_epi32(u[7], cospi8);
- v[0] = _mm_add_epi32(v[0], v[1]);
- v[0] = _mm_add_epi32(v[0], rnding);
- out[3] = _mm_srai_epi32(v[0], bit); // buf0[4]
-
- v[0] = _mm_mullo_epi32(u[4], cospi8);
- v[1] = _mm_mullo_epi32(u[7], cospi56);
- v[0] = _mm_sub_epi32(v[1], v[0]);
- v[0] = _mm_add_epi32(v[0], rnding);
- out[15] = _mm_srai_epi32(v[0], bit); // buf0[7]
-
- v[0] = _mm_mullo_epi32(u[5], cospi24);
- v[1] = _mm_mullo_epi32(u[6], cospi40);
- v[0] = _mm_add_epi32(v[0], v[1]);
- v[0] = _mm_add_epi32(v[0], rnding);
- out[11] = _mm_srai_epi32(v[0], bit); // buf0[5]
-
- v[0] = _mm_mullo_epi32(u[5], cospi40);
- v[1] = _mm_mullo_epi32(u[6], cospi24);
- v[0] = _mm_sub_epi32(v[1], v[0]);
- v[0] = _mm_add_epi32(v[0], rnding);
- out[7] = _mm_srai_epi32(v[0], bit); // buf0[6]
-
- out[1] = u[0]; // buf0[0]
- out[9] = u[1]; // buf0[1]
- out[5] = u[2]; // buf0[2]
- out[13] = u[3]; // buf0[3]
-}
-
-static void fadst8x8_sse4_1(__m128i *in, __m128i *out, int bit,
- const int col_num) {
- (void)(col_num);
- const int32_t *cospi = cospi_arr(bit);
- const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
- const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
- const __m128i cospim16 = _mm_set1_epi32(-cospi[16]);
- const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
- const __m128i cospim48 = _mm_set1_epi32(-cospi[48]);
- const __m128i cospi4 = _mm_set1_epi32(cospi[4]);
- const __m128i cospim4 = _mm_set1_epi32(-cospi[4]);
- const __m128i cospi60 = _mm_set1_epi32(cospi[60]);
- const __m128i cospi20 = _mm_set1_epi32(cospi[20]);
- const __m128i cospim20 = _mm_set1_epi32(-cospi[20]);
- const __m128i cospi44 = _mm_set1_epi32(cospi[44]);
- const __m128i cospi28 = _mm_set1_epi32(cospi[28]);
- const __m128i cospi36 = _mm_set1_epi32(cospi[36]);
- const __m128i cospim36 = _mm_set1_epi32(-cospi[36]);
- const __m128i cospi52 = _mm_set1_epi32(cospi[52]);
- const __m128i cospim52 = _mm_set1_epi32(-cospi[52]);
- const __m128i cospi12 = _mm_set1_epi32(cospi[12]);
- const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
- const __m128i zero = _mm_setzero_si128();
- __m128i u0, u1, u2, u3, u4, u5, u6, u7;
- __m128i v0, v1, v2, v3, v4, v5, v6, v7;
- __m128i x, y;
- int col;
-
- // Note:
- // Even column: 0, 2, ..., 14
- // Odd column: 1, 3, ..., 15
- // one even column plus one odd column constructs one row (8 coeffs)
- // total we have 8 rows (8x8).
- for (col = 0; col < 2; ++col) {
- // stage 0
- // stage 1
- u0 = in[2 * 0 + col];
- u1 = _mm_sub_epi32(zero, in[2 * 7 + col]);
- u2 = _mm_sub_epi32(zero, in[2 * 3 + col]);
- u3 = in[2 * 4 + col];
- u4 = _mm_sub_epi32(zero, in[2 * 1 + col]);
- u5 = in[2 * 6 + col];
- u6 = in[2 * 2 + col];
- u7 = _mm_sub_epi32(zero, in[2 * 5 + col]);
-
- // stage 2
- v0 = u0;
- v1 = u1;
-
- x = _mm_mullo_epi32(u2, cospi32);
- y = _mm_mullo_epi32(u3, cospi32);
- v2 = _mm_add_epi32(x, y);
- v2 = _mm_add_epi32(v2, rnding);
- v2 = _mm_srai_epi32(v2, bit);
-
- v3 = _mm_sub_epi32(x, y);
- v3 = _mm_add_epi32(v3, rnding);
- v3 = _mm_srai_epi32(v3, bit);
-
- v4 = u4;
- v5 = u5;
-
- x = _mm_mullo_epi32(u6, cospi32);
- y = _mm_mullo_epi32(u7, cospi32);
- v6 = _mm_add_epi32(x, y);
- v6 = _mm_add_epi32(v6, rnding);
- v6 = _mm_srai_epi32(v6, bit);
-
- v7 = _mm_sub_epi32(x, y);
- v7 = _mm_add_epi32(v7, rnding);
- v7 = _mm_srai_epi32(v7, bit);
-
- // stage 3
- u0 = _mm_add_epi32(v0, v2);
- u1 = _mm_add_epi32(v1, v3);
- u2 = _mm_sub_epi32(v0, v2);
- u3 = _mm_sub_epi32(v1, v3);
- u4 = _mm_add_epi32(v4, v6);
- u5 = _mm_add_epi32(v5, v7);
- u6 = _mm_sub_epi32(v4, v6);
- u7 = _mm_sub_epi32(v5, v7);
-
- // stage 4
- v0 = u0;
- v1 = u1;
- v2 = u2;
- v3 = u3;
-
- x = _mm_mullo_epi32(u4, cospi16);
- y = _mm_mullo_epi32(u5, cospi48);
- v4 = _mm_add_epi32(x, y);
- v4 = _mm_add_epi32(v4, rnding);
- v4 = _mm_srai_epi32(v4, bit);
-
- x = _mm_mullo_epi32(u4, cospi48);
- y = _mm_mullo_epi32(u5, cospim16);
- v5 = _mm_add_epi32(x, y);
- v5 = _mm_add_epi32(v5, rnding);
- v5 = _mm_srai_epi32(v5, bit);
-
- x = _mm_mullo_epi32(u6, cospim48);
- y = _mm_mullo_epi32(u7, cospi16);
- v6 = _mm_add_epi32(x, y);
- v6 = _mm_add_epi32(v6, rnding);
- v6 = _mm_srai_epi32(v6, bit);
-
- x = _mm_mullo_epi32(u6, cospi16);
- y = _mm_mullo_epi32(u7, cospi48);
- v7 = _mm_add_epi32(x, y);
- v7 = _mm_add_epi32(v7, rnding);
- v7 = _mm_srai_epi32(v7, bit);
-
- // stage 5
- u0 = _mm_add_epi32(v0, v4);
- u1 = _mm_add_epi32(v1, v5);
- u2 = _mm_add_epi32(v2, v6);
- u3 = _mm_add_epi32(v3, v7);
- u4 = _mm_sub_epi32(v0, v4);
- u5 = _mm_sub_epi32(v1, v5);
- u6 = _mm_sub_epi32(v2, v6);
- u7 = _mm_sub_epi32(v3, v7);
-
- // stage 6
- x = _mm_mullo_epi32(u0, cospi4);
- y = _mm_mullo_epi32(u1, cospi60);
- v0 = _mm_add_epi32(x, y);
- v0 = _mm_add_epi32(v0, rnding);
- v0 = _mm_srai_epi32(v0, bit);
-
- x = _mm_mullo_epi32(u0, cospi60);
- y = _mm_mullo_epi32(u1, cospim4);
- v1 = _mm_add_epi32(x, y);
- v1 = _mm_add_epi32(v1, rnding);
- v1 = _mm_srai_epi32(v1, bit);
-
- x = _mm_mullo_epi32(u2, cospi20);
- y = _mm_mullo_epi32(u3, cospi44);
- v2 = _mm_add_epi32(x, y);
- v2 = _mm_add_epi32(v2, rnding);
- v2 = _mm_srai_epi32(v2, bit);
-
- x = _mm_mullo_epi32(u2, cospi44);
- y = _mm_mullo_epi32(u3, cospim20);
- v3 = _mm_add_epi32(x, y);
- v3 = _mm_add_epi32(v3, rnding);
- v3 = _mm_srai_epi32(v3, bit);
-
- x = _mm_mullo_epi32(u4, cospi36);
- y = _mm_mullo_epi32(u5, cospi28);
- v4 = _mm_add_epi32(x, y);
- v4 = _mm_add_epi32(v4, rnding);
- v4 = _mm_srai_epi32(v4, bit);
-
- x = _mm_mullo_epi32(u4, cospi28);
- y = _mm_mullo_epi32(u5, cospim36);
- v5 = _mm_add_epi32(x, y);
- v5 = _mm_add_epi32(v5, rnding);
- v5 = _mm_srai_epi32(v5, bit);
-
- x = _mm_mullo_epi32(u6, cospi52);
- y = _mm_mullo_epi32(u7, cospi12);
- v6 = _mm_add_epi32(x, y);
- v6 = _mm_add_epi32(v6, rnding);
- v6 = _mm_srai_epi32(v6, bit);
-
- x = _mm_mullo_epi32(u6, cospi12);
- y = _mm_mullo_epi32(u7, cospim52);
- v7 = _mm_add_epi32(x, y);
- v7 = _mm_add_epi32(v7, rnding);
- v7 = _mm_srai_epi32(v7, bit);
-
- // stage 7
- out[2 * 0 + col] = v1;
- out[2 * 1 + col] = v6;
- out[2 * 2 + col] = v3;
- out[2 * 3 + col] = v4;
- out[2 * 4 + col] = v5;
- out[2 * 5 + col] = v2;
- out[2 * 6 + col] = v7;
- out[2 * 7 + col] = v0;
- }
-}
-
-void av1_fwd_txfm2d_8x8_sse4_1(const int16_t *input, int32_t *coeff, int stride,
- TX_TYPE tx_type, int bd) {
- __m128i in[16], out[16];
- const int8_t *shift = fwd_txfm_shift_ls[TX_8X8];
- const int txw_idx = get_txw_idx(TX_8X8);
- const int txh_idx = get_txh_idx(TX_8X8);
-
- switch (tx_type) {
- case DCT_DCT:
- load_buffer_8x8(input, in, stride, 0, 0, shift[0]);
- fdct8x8_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], 0);
- col_txfm_8x8_rounding(out, -shift[1]);
- transpose_8x8(out, in);
- fdct8x8_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], 0);
- transpose_8x8(out, in);
- write_buffer_8x8(in, coeff);
- break;
- case ADST_DCT:
- load_buffer_8x8(input, in, stride, 0, 0, shift[0]);
- fadst8x8_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], 0);
- col_txfm_8x8_rounding(out, -shift[1]);
- transpose_8x8(out, in);
- fdct8x8_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], 0);
- transpose_8x8(out, in);
- write_buffer_8x8(in, coeff);
- break;
- case DCT_ADST:
- load_buffer_8x8(input, in, stride, 0, 0, shift[0]);
- fdct8x8_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], 0);
- col_txfm_8x8_rounding(out, -shift[1]);
- transpose_8x8(out, in);
- fadst8x8_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], 0);
- transpose_8x8(out, in);
- write_buffer_8x8(in, coeff);
- break;
- case ADST_ADST:
- load_buffer_8x8(input, in, stride, 0, 0, shift[0]);
- fadst8x8_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], 0);
- col_txfm_8x8_rounding(out, -shift[1]);
- transpose_8x8(out, in);
- fadst8x8_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], 0);
- transpose_8x8(out, in);
- write_buffer_8x8(in, coeff);
- break;
- case FLIPADST_DCT:
- load_buffer_8x8(input, in, stride, 1, 0, shift[0]);
- fadst8x8_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], 0);
- col_txfm_8x8_rounding(out, -shift[1]);
- transpose_8x8(out, in);
- fdct8x8_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], 0);
- transpose_8x8(out, in);
- write_buffer_8x8(in, coeff);
- break;
- case DCT_FLIPADST:
- load_buffer_8x8(input, in, stride, 0, 1, shift[0]);
- fdct8x8_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], 0);
- col_txfm_8x8_rounding(out, -shift[1]);
- transpose_8x8(out, in);
- fadst8x8_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], 0);
- transpose_8x8(out, in);
- write_buffer_8x8(in, coeff);
- break;
- case FLIPADST_FLIPADST:
- load_buffer_8x8(input, in, stride, 1, 1, shift[0]);
- fadst8x8_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], 0);
- col_txfm_8x8_rounding(out, -shift[1]);
- transpose_8x8(out, in);
- fadst8x8_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], 0);
- transpose_8x8(out, in);
- write_buffer_8x8(in, coeff);
- break;
- case ADST_FLIPADST:
- load_buffer_8x8(input, in, stride, 0, 1, shift[0]);
- fadst8x8_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], 0);
- col_txfm_8x8_rounding(out, -shift[1]);
- transpose_8x8(out, in);
- fadst8x8_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], 0);
- transpose_8x8(out, in);
- write_buffer_8x8(in, coeff);
- break;
- case FLIPADST_ADST:
- load_buffer_8x8(input, in, stride, 1, 0, shift[0]);
- fadst8x8_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], 0);
- col_txfm_8x8_rounding(out, -shift[1]);
- transpose_8x8(out, in);
- fadst8x8_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], 0);
- transpose_8x8(out, in);
- write_buffer_8x8(in, coeff);
- break;
- default: assert(0);
- }
- (void)bd;
-}
-
-// Hybrid Transform 16x16
-
-static INLINE void convert_8x8_to_16x16(const __m128i *in, __m128i *out) {
- int row_index = 0;
- int dst_index = 0;
- int src_index = 0;
-
- // row 0, 1, .., 7
- do {
- out[dst_index] = in[src_index];
- out[dst_index + 1] = in[src_index + 1];
- out[dst_index + 2] = in[src_index + 16];
- out[dst_index + 3] = in[src_index + 17];
- dst_index += 4;
- src_index += 2;
- row_index += 1;
- } while (row_index < 8);
-
- // row 8, 9, ..., 15
- src_index += 16;
- do {
- out[dst_index] = in[src_index];
- out[dst_index + 1] = in[src_index + 1];
- out[dst_index + 2] = in[src_index + 16];
- out[dst_index + 3] = in[src_index + 17];
- dst_index += 4;
- src_index += 2;
- row_index += 1;
- } while (row_index < 16);
-}
-
-static INLINE void load_buffer_16x16(const int16_t *input, __m128i *out,
- int stride, int flipud, int fliplr,
- int shift) {
- __m128i in[64];
- // Load 4 8x8 blocks
- const int16_t *topL = input;
- const int16_t *topR = input + 8;
- const int16_t *botL = input + 8 * stride;
- const int16_t *botR = input + 8 * stride + 8;
-
- const int16_t *tmp;
-
- if (flipud) {
- // Swap left columns
- tmp = topL;
- topL = botL;
- botL = tmp;
- // Swap right columns
- tmp = topR;
- topR = botR;
- botR = tmp;
- }
-
- if (fliplr) {
- // Swap top rows
- tmp = topL;
- topL = topR;
- topR = tmp;
- // Swap bottom rows
- tmp = botL;
- botL = botR;
- botR = tmp;
- }
-
- // load first 8 columns
- load_buffer_8x8(topL, &in[0], stride, flipud, fliplr, shift);
- load_buffer_8x8(botL, &in[32], stride, flipud, fliplr, shift);
-
- // load second 8 columns
- load_buffer_8x8(topR, &in[16], stride, flipud, fliplr, shift);
- load_buffer_8x8(botR, &in[48], stride, flipud, fliplr, shift);
-
- convert_8x8_to_16x16(in, out);
-}
-
-static INLINE void load_buffer_8x16(const int16_t *input, __m128i *out,
- int stride, int flipud, int fliplr,
- int shift) {
- const int16_t *topL = input;
- const int16_t *botL = input + 8 * stride;
-
- const int16_t *tmp;
-
- if (flipud) {
- tmp = topL;
- topL = botL;
- botL = tmp;
- }
-
- load_buffer_8x8(topL, out, stride, flipud, fliplr, shift);
- load_buffer_8x8(botL, out + 16, stride, flipud, fliplr, shift);
-}
-
-static void fdct16x16_sse4_1(__m128i *in, __m128i *out, int bit,
- const int col_num) {
- const int32_t *cospi = cospi_arr(bit);
- const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
- const __m128i cospim32 = _mm_set1_epi32(-cospi[32]);
- const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
- const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
- const __m128i cospim48 = _mm_set1_epi32(-cospi[48]);
- const __m128i cospim16 = _mm_set1_epi32(-cospi[16]);
- const __m128i cospi56 = _mm_set1_epi32(cospi[56]);
- const __m128i cospi8 = _mm_set1_epi32(cospi[8]);
- const __m128i cospi24 = _mm_set1_epi32(cospi[24]);
- const __m128i cospi40 = _mm_set1_epi32(cospi[40]);
- const __m128i cospi60 = _mm_set1_epi32(cospi[60]);
- const __m128i cospi4 = _mm_set1_epi32(cospi[4]);
- const __m128i cospi28 = _mm_set1_epi32(cospi[28]);
- const __m128i cospi36 = _mm_set1_epi32(cospi[36]);
- const __m128i cospi44 = _mm_set1_epi32(cospi[44]);
- const __m128i cospi20 = _mm_set1_epi32(cospi[20]);
- const __m128i cospi12 = _mm_set1_epi32(cospi[12]);
- const __m128i cospi52 = _mm_set1_epi32(cospi[52]);
- const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
- __m128i u[16], v[16], x;
- int col;
-
- // Calculate the column 0, 1, 2, 3
- for (col = 0; col < col_num; ++col) {
- // stage 0
- // stage 1
- u[0] = _mm_add_epi32(in[0 * col_num + col], in[15 * col_num + col]);
- u[15] = _mm_sub_epi32(in[0 * col_num + col], in[15 * col_num + col]);
- u[1] = _mm_add_epi32(in[1 * col_num + col], in[14 * col_num + col]);
- u[14] = _mm_sub_epi32(in[1 * col_num + col], in[14 * col_num + col]);
- u[2] = _mm_add_epi32(in[2 * col_num + col], in[13 * col_num + col]);
- u[13] = _mm_sub_epi32(in[2 * col_num + col], in[13 * col_num + col]);
- u[3] = _mm_add_epi32(in[3 * col_num + col], in[12 * col_num + col]);
- u[12] = _mm_sub_epi32(in[3 * col_num + col], in[12 * col_num + col]);
- u[4] = _mm_add_epi32(in[4 * col_num + col], in[11 * col_num + col]);
- u[11] = _mm_sub_epi32(in[4 * col_num + col], in[11 * col_num + col]);
- u[5] = _mm_add_epi32(in[5 * col_num + col], in[10 * col_num + col]);
- u[10] = _mm_sub_epi32(in[5 * col_num + col], in[10 * col_num + col]);
- u[6] = _mm_add_epi32(in[6 * col_num + col], in[9 * col_num + col]);
- u[9] = _mm_sub_epi32(in[6 * col_num + col], in[9 * col_num + col]);
- u[7] = _mm_add_epi32(in[7 * col_num + col], in[8 * col_num + col]);
- u[8] = _mm_sub_epi32(in[7 * col_num + col], in[8 * col_num + col]);
-
- // stage 2
- v[0] = _mm_add_epi32(u[0], u[7]);
- v[7] = _mm_sub_epi32(u[0], u[7]);
- v[1] = _mm_add_epi32(u[1], u[6]);
- v[6] = _mm_sub_epi32(u[1], u[6]);
- v[2] = _mm_add_epi32(u[2], u[5]);
- v[5] = _mm_sub_epi32(u[2], u[5]);
- v[3] = _mm_add_epi32(u[3], u[4]);
- v[4] = _mm_sub_epi32(u[3], u[4]);
- v[8] = u[8];
- v[9] = u[9];
-
- v[10] = _mm_mullo_epi32(u[10], cospim32);
- x = _mm_mullo_epi32(u[13], cospi32);
- v[10] = _mm_add_epi32(v[10], x);
- v[10] = _mm_add_epi32(v[10], rnding);
- v[10] = _mm_srai_epi32(v[10], bit);
-
- v[13] = _mm_mullo_epi32(u[10], cospi32);
- x = _mm_mullo_epi32(u[13], cospim32);
- v[13] = _mm_sub_epi32(v[13], x);
- v[13] = _mm_add_epi32(v[13], rnding);
- v[13] = _mm_srai_epi32(v[13], bit);
-
- v[11] = _mm_mullo_epi32(u[11], cospim32);
- x = _mm_mullo_epi32(u[12], cospi32);
- v[11] = _mm_add_epi32(v[11], x);
- v[11] = _mm_add_epi32(v[11], rnding);
- v[11] = _mm_srai_epi32(v[11], bit);
-
- v[12] = _mm_mullo_epi32(u[11], cospi32);
- x = _mm_mullo_epi32(u[12], cospim32);
- v[12] = _mm_sub_epi32(v[12], x);
- v[12] = _mm_add_epi32(v[12], rnding);
- v[12] = _mm_srai_epi32(v[12], bit);
- v[14] = u[14];
- v[15] = u[15];
-
- // stage 3
- u[0] = _mm_add_epi32(v[0], v[3]);
- u[3] = _mm_sub_epi32(v[0], v[3]);
- u[1] = _mm_add_epi32(v[1], v[2]);
- u[2] = _mm_sub_epi32(v[1], v[2]);
- u[4] = v[4];
-
- u[5] = _mm_mullo_epi32(v[5], cospim32);
- x = _mm_mullo_epi32(v[6], cospi32);
- u[5] = _mm_add_epi32(u[5], x);
- u[5] = _mm_add_epi32(u[5], rnding);
- u[5] = _mm_srai_epi32(u[5], bit);
-
- u[6] = _mm_mullo_epi32(v[5], cospi32);
- x = _mm_mullo_epi32(v[6], cospim32);
- u[6] = _mm_sub_epi32(u[6], x);
- u[6] = _mm_add_epi32(u[6], rnding);
- u[6] = _mm_srai_epi32(u[6], bit);
-
- u[7] = v[7];
- u[8] = _mm_add_epi32(v[8], v[11]);
- u[11] = _mm_sub_epi32(v[8], v[11]);
- u[9] = _mm_add_epi32(v[9], v[10]);
- u[10] = _mm_sub_epi32(v[9], v[10]);
- u[12] = _mm_sub_epi32(v[15], v[12]);
- u[15] = _mm_add_epi32(v[15], v[12]);
- u[13] = _mm_sub_epi32(v[14], v[13]);
- u[14] = _mm_add_epi32(v[14], v[13]);
-
- // stage 4
- u[0] = _mm_mullo_epi32(u[0], cospi32);
- u[1] = _mm_mullo_epi32(u[1], cospi32);
- v[0] = _mm_add_epi32(u[0], u[1]);
- v[0] = _mm_add_epi32(v[0], rnding);
- v[0] = _mm_srai_epi32(v[0], bit);
-
- v[1] = _mm_sub_epi32(u[0], u[1]);
- v[1] = _mm_add_epi32(v[1], rnding);
- v[1] = _mm_srai_epi32(v[1], bit);
-
- v[2] = _mm_mullo_epi32(u[2], cospi48);
- x = _mm_mullo_epi32(u[3], cospi16);
- v[2] = _mm_add_epi32(v[2], x);
- v[2] = _mm_add_epi32(v[2], rnding);
- v[2] = _mm_srai_epi32(v[2], bit);
-
- v[3] = _mm_mullo_epi32(u[2], cospi16);
- x = _mm_mullo_epi32(u[3], cospi48);
- v[3] = _mm_sub_epi32(x, v[3]);
- v[3] = _mm_add_epi32(v[3], rnding);
- v[3] = _mm_srai_epi32(v[3], bit);
-
- v[4] = _mm_add_epi32(u[4], u[5]);
- v[5] = _mm_sub_epi32(u[4], u[5]);
- v[6] = _mm_sub_epi32(u[7], u[6]);
- v[7] = _mm_add_epi32(u[7], u[6]);
- v[8] = u[8];
-
- v[9] = _mm_mullo_epi32(u[9], cospim16);
- x = _mm_mullo_epi32(u[14], cospi48);
- v[9] = _mm_add_epi32(v[9], x);
- v[9] = _mm_add_epi32(v[9], rnding);
- v[9] = _mm_srai_epi32(v[9], bit);
-
- v[14] = _mm_mullo_epi32(u[9], cospi48);
- x = _mm_mullo_epi32(u[14], cospim16);
- v[14] = _mm_sub_epi32(v[14], x);
- v[14] = _mm_add_epi32(v[14], rnding);
- v[14] = _mm_srai_epi32(v[14], bit);
-
- v[10] = _mm_mullo_epi32(u[10], cospim48);
- x = _mm_mullo_epi32(u[13], cospim16);
- v[10] = _mm_add_epi32(v[10], x);
- v[10] = _mm_add_epi32(v[10], rnding);
- v[10] = _mm_srai_epi32(v[10], bit);
-
- v[13] = _mm_mullo_epi32(u[10], cospim16);
- x = _mm_mullo_epi32(u[13], cospim48);
- v[13] = _mm_sub_epi32(v[13], x);
- v[13] = _mm_add_epi32(v[13], rnding);
- v[13] = _mm_srai_epi32(v[13], bit);
-
- v[11] = u[11];
- v[12] = u[12];
- v[15] = u[15];
-
- // stage 5
- u[0] = v[0];
- u[1] = v[1];
- u[2] = v[2];
- u[3] = v[3];
-
- u[4] = _mm_mullo_epi32(v[4], cospi56);
- x = _mm_mullo_epi32(v[7], cospi8);
- u[4] = _mm_add_epi32(u[4], x);
- u[4] = _mm_add_epi32(u[4], rnding);
- u[4] = _mm_srai_epi32(u[4], bit);
-
- u[7] = _mm_mullo_epi32(v[4], cospi8);
- x = _mm_mullo_epi32(v[7], cospi56);
- u[7] = _mm_sub_epi32(x, u[7]);
- u[7] = _mm_add_epi32(u[7], rnding);
- u[7] = _mm_srai_epi32(u[7], bit);
-
- u[5] = _mm_mullo_epi32(v[5], cospi24);
- x = _mm_mullo_epi32(v[6], cospi40);
- u[5] = _mm_add_epi32(u[5], x);
- u[5] = _mm_add_epi32(u[5], rnding);
- u[5] = _mm_srai_epi32(u[5], bit);
-
- u[6] = _mm_mullo_epi32(v[5], cospi40);
- x = _mm_mullo_epi32(v[6], cospi24);
- u[6] = _mm_sub_epi32(x, u[6]);
- u[6] = _mm_add_epi32(u[6], rnding);
- u[6] = _mm_srai_epi32(u[6], bit);
-
- u[8] = _mm_add_epi32(v[8], v[9]);
- u[9] = _mm_sub_epi32(v[8], v[9]);
- u[10] = _mm_sub_epi32(v[11], v[10]);
- u[11] = _mm_add_epi32(v[11], v[10]);
- u[12] = _mm_add_epi32(v[12], v[13]);
- u[13] = _mm_sub_epi32(v[12], v[13]);
- u[14] = _mm_sub_epi32(v[15], v[14]);
- u[15] = _mm_add_epi32(v[15], v[14]);
-
- // stage 6
- v[0] = u[0];
- v[1] = u[1];
- v[2] = u[2];
- v[3] = u[3];
- v[4] = u[4];
- v[5] = u[5];
- v[6] = u[6];
- v[7] = u[7];
-
- v[8] = _mm_mullo_epi32(u[8], cospi60);
- x = _mm_mullo_epi32(u[15], cospi4);
- v[8] = _mm_add_epi32(v[8], x);
- v[8] = _mm_add_epi32(v[8], rnding);
- v[8] = _mm_srai_epi32(v[8], bit);
-
- v[15] = _mm_mullo_epi32(u[8], cospi4);
- x = _mm_mullo_epi32(u[15], cospi60);
- v[15] = _mm_sub_epi32(x, v[15]);
- v[15] = _mm_add_epi32(v[15], rnding);
- v[15] = _mm_srai_epi32(v[15], bit);
-
- v[9] = _mm_mullo_epi32(u[9], cospi28);
- x = _mm_mullo_epi32(u[14], cospi36);
- v[9] = _mm_add_epi32(v[9], x);
- v[9] = _mm_add_epi32(v[9], rnding);
- v[9] = _mm_srai_epi32(v[9], bit);
-
- v[14] = _mm_mullo_epi32(u[9], cospi36);
- x = _mm_mullo_epi32(u[14], cospi28);
- v[14] = _mm_sub_epi32(x, v[14]);
- v[14] = _mm_add_epi32(v[14], rnding);
- v[14] = _mm_srai_epi32(v[14], bit);
-
- v[10] = _mm_mullo_epi32(u[10], cospi44);
- x = _mm_mullo_epi32(u[13], cospi20);
- v[10] = _mm_add_epi32(v[10], x);
- v[10] = _mm_add_epi32(v[10], rnding);
- v[10] = _mm_srai_epi32(v[10], bit);
-
- v[13] = _mm_mullo_epi32(u[10], cospi20);
- x = _mm_mullo_epi32(u[13], cospi44);
- v[13] = _mm_sub_epi32(x, v[13]);
- v[13] = _mm_add_epi32(v[13], rnding);
- v[13] = _mm_srai_epi32(v[13], bit);
-
- v[11] = _mm_mullo_epi32(u[11], cospi12);
- x = _mm_mullo_epi32(u[12], cospi52);
- v[11] = _mm_add_epi32(v[11], x);
- v[11] = _mm_add_epi32(v[11], rnding);
- v[11] = _mm_srai_epi32(v[11], bit);
-
- v[12] = _mm_mullo_epi32(u[11], cospi52);
- x = _mm_mullo_epi32(u[12], cospi12);
- v[12] = _mm_sub_epi32(x, v[12]);
- v[12] = _mm_add_epi32(v[12], rnding);
- v[12] = _mm_srai_epi32(v[12], bit);
-
- out[0 * col_num + col] = v[0];
- out[1 * col_num + col] = v[8];
- out[2 * col_num + col] = v[4];
- out[3 * col_num + col] = v[12];
- out[4 * col_num + col] = v[2];
- out[5 * col_num + col] = v[10];
- out[6 * col_num + col] = v[6];
- out[7 * col_num + col] = v[14];
- out[8 * col_num + col] = v[1];
- out[9 * col_num + col] = v[9];
- out[10 * col_num + col] = v[5];
- out[11 * col_num + col] = v[13];
- out[12 * col_num + col] = v[3];
- out[13 * col_num + col] = v[11];
- out[14 * col_num + col] = v[7];
- out[15 * col_num + col] = v[15];
- }
-}
-
-static void fadst16x16_sse4_1(__m128i *in, __m128i *out, int bit,
- const int num_cols) {
- const int32_t *cospi = cospi_arr(bit);
- const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
- const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
- const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
- const __m128i cospim16 = _mm_set1_epi32(-cospi[16]);
- const __m128i cospim48 = _mm_set1_epi32(-cospi[48]);
- const __m128i cospi8 = _mm_set1_epi32(cospi[8]);
- const __m128i cospi56 = _mm_set1_epi32(cospi[56]);
- const __m128i cospim56 = _mm_set1_epi32(-cospi[56]);
- const __m128i cospim8 = _mm_set1_epi32(-cospi[8]);
- const __m128i cospi24 = _mm_set1_epi32(cospi[24]);
- const __m128i cospim24 = _mm_set1_epi32(-cospi[24]);
- const __m128i cospim40 = _mm_set1_epi32(-cospi[40]);
- const __m128i cospi40 = _mm_set1_epi32(cospi[40]);
- const __m128i cospi2 = _mm_set1_epi32(cospi[2]);
- const __m128i cospi62 = _mm_set1_epi32(cospi[62]);
- const __m128i cospim2 = _mm_set1_epi32(-cospi[2]);
- const __m128i cospi10 = _mm_set1_epi32(cospi[10]);
- const __m128i cospi54 = _mm_set1_epi32(cospi[54]);
- const __m128i cospim10 = _mm_set1_epi32(-cospi[10]);
- const __m128i cospi18 = _mm_set1_epi32(cospi[18]);
- const __m128i cospi46 = _mm_set1_epi32(cospi[46]);
- const __m128i cospim18 = _mm_set1_epi32(-cospi[18]);
- const __m128i cospi26 = _mm_set1_epi32(cospi[26]);
- const __m128i cospi38 = _mm_set1_epi32(cospi[38]);
- const __m128i cospim26 = _mm_set1_epi32(-cospi[26]);
- const __m128i cospi34 = _mm_set1_epi32(cospi[34]);
- const __m128i cospi30 = _mm_set1_epi32(cospi[30]);
- const __m128i cospim34 = _mm_set1_epi32(-cospi[34]);
- const __m128i cospi42 = _mm_set1_epi32(cospi[42]);
- const __m128i cospi22 = _mm_set1_epi32(cospi[22]);
- const __m128i cospim42 = _mm_set1_epi32(-cospi[42]);
- const __m128i cospi50 = _mm_set1_epi32(cospi[50]);
- const __m128i cospi14 = _mm_set1_epi32(cospi[14]);
- const __m128i cospim50 = _mm_set1_epi32(-cospi[50]);
- const __m128i cospi58 = _mm_set1_epi32(cospi[58]);
- const __m128i cospi6 = _mm_set1_epi32(cospi[6]);
- const __m128i cospim58 = _mm_set1_epi32(-cospi[58]);
- const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
- const __m128i zero = _mm_setzero_si128();
-
- __m128i u[16], v[16], x, y;
- int col;
-
- for (col = 0; col < num_cols; ++col) {
- // stage 0
- // stage 1
- u[0] = in[0 * num_cols + col];
- u[1] = _mm_sub_epi32(zero, in[15 * num_cols + col]);
- u[2] = _mm_sub_epi32(zero, in[7 * num_cols + col]);
- u[3] = in[8 * num_cols + col];
- u[4] = _mm_sub_epi32(zero, in[3 * num_cols + col]);
- u[5] = in[12 * num_cols + col];
- u[6] = in[4 * num_cols + col];
- u[7] = _mm_sub_epi32(zero, in[11 * num_cols + col]);
- u[8] = _mm_sub_epi32(zero, in[1 * num_cols + col]);
- u[9] = in[14 * num_cols + col];
- u[10] = in[6 * num_cols + col];
- u[11] = _mm_sub_epi32(zero, in[9 * num_cols + col]);
- u[12] = in[2 * num_cols + col];
- u[13] = _mm_sub_epi32(zero, in[13 * num_cols + col]);
- u[14] = _mm_sub_epi32(zero, in[5 * num_cols + col]);
- u[15] = in[10 * num_cols + col];
-
- // stage 2
- v[0] = u[0];
- v[1] = u[1];
-
- x = _mm_mullo_epi32(u[2], cospi32);
- y = _mm_mullo_epi32(u[3], cospi32);
- v[2] = _mm_add_epi32(x, y);
- v[2] = _mm_add_epi32(v[2], rnding);
- v[2] = _mm_srai_epi32(v[2], bit);
-
- v[3] = _mm_sub_epi32(x, y);
- v[3] = _mm_add_epi32(v[3], rnding);
- v[3] = _mm_srai_epi32(v[3], bit);
-
- v[4] = u[4];
- v[5] = u[5];
-
- x = _mm_mullo_epi32(u[6], cospi32);
- y = _mm_mullo_epi32(u[7], cospi32);
- v[6] = _mm_add_epi32(x, y);
- v[6] = _mm_add_epi32(v[6], rnding);
- v[6] = _mm_srai_epi32(v[6], bit);
-
- v[7] = _mm_sub_epi32(x, y);
- v[7] = _mm_add_epi32(v[7], rnding);
- v[7] = _mm_srai_epi32(v[7], bit);
-
- v[8] = u[8];
- v[9] = u[9];
-
- x = _mm_mullo_epi32(u[10], cospi32);
- y = _mm_mullo_epi32(u[11], cospi32);
- v[10] = _mm_add_epi32(x, y);
- v[10] = _mm_add_epi32(v[10], rnding);
- v[10] = _mm_srai_epi32(v[10], bit);
-
- v[11] = _mm_sub_epi32(x, y);
- v[11] = _mm_add_epi32(v[11], rnding);
- v[11] = _mm_srai_epi32(v[11], bit);
-
- v[12] = u[12];
- v[13] = u[13];
-
- x = _mm_mullo_epi32(u[14], cospi32);
- y = _mm_mullo_epi32(u[15], cospi32);
- v[14] = _mm_add_epi32(x, y);
- v[14] = _mm_add_epi32(v[14], rnding);
- v[14] = _mm_srai_epi32(v[14], bit);
-
- v[15] = _mm_sub_epi32(x, y);
- v[15] = _mm_add_epi32(v[15], rnding);
- v[15] = _mm_srai_epi32(v[15], bit);
-
- // stage 3
- u[0] = _mm_add_epi32(v[0], v[2]);
- u[1] = _mm_add_epi32(v[1], v[3]);
- u[2] = _mm_sub_epi32(v[0], v[2]);
- u[3] = _mm_sub_epi32(v[1], v[3]);
- u[4] = _mm_add_epi32(v[4], v[6]);
- u[5] = _mm_add_epi32(v[5], v[7]);
- u[6] = _mm_sub_epi32(v[4], v[6]);
- u[7] = _mm_sub_epi32(v[5], v[7]);
- u[8] = _mm_add_epi32(v[8], v[10]);
- u[9] = _mm_add_epi32(v[9], v[11]);
- u[10] = _mm_sub_epi32(v[8], v[10]);
- u[11] = _mm_sub_epi32(v[9], v[11]);
- u[12] = _mm_add_epi32(v[12], v[14]);
- u[13] = _mm_add_epi32(v[13], v[15]);
- u[14] = _mm_sub_epi32(v[12], v[14]);
- u[15] = _mm_sub_epi32(v[13], v[15]);
-
- // stage 4
- v[0] = u[0];
- v[1] = u[1];
- v[2] = u[2];
- v[3] = u[3];
- v[4] = half_btf_sse4_1(&cospi16, &u[4], &cospi48, &u[5], &rnding, bit);
- v[5] = half_btf_sse4_1(&cospi48, &u[4], &cospim16, &u[5], &rnding, bit);
- v[6] = half_btf_sse4_1(&cospim48, &u[6], &cospi16, &u[7], &rnding, bit);
- v[7] = half_btf_sse4_1(&cospi16, &u[6], &cospi48, &u[7], &rnding, bit);
- v[8] = u[8];
- v[9] = u[9];
- v[10] = u[10];
- v[11] = u[11];
- v[12] = half_btf_sse4_1(&cospi16, &u[12], &cospi48, &u[13], &rnding, bit);
- v[13] = half_btf_sse4_1(&cospi48, &u[12], &cospim16, &u[13], &rnding, bit);
- v[14] = half_btf_sse4_1(&cospim48, &u[14], &cospi16, &u[15], &rnding, bit);
- v[15] = half_btf_sse4_1(&cospi16, &u[14], &cospi48, &u[15], &rnding, bit);
-
- // stage 5
- u[0] = _mm_add_epi32(v[0], v[4]);
- u[1] = _mm_add_epi32(v[1], v[5]);
- u[2] = _mm_add_epi32(v[2], v[6]);
- u[3] = _mm_add_epi32(v[3], v[7]);
- u[4] = _mm_sub_epi32(v[0], v[4]);
- u[5] = _mm_sub_epi32(v[1], v[5]);
- u[6] = _mm_sub_epi32(v[2], v[6]);
- u[7] = _mm_sub_epi32(v[3], v[7]);
- u[8] = _mm_add_epi32(v[8], v[12]);
- u[9] = _mm_add_epi32(v[9], v[13]);
- u[10] = _mm_add_epi32(v[10], v[14]);
- u[11] = _mm_add_epi32(v[11], v[15]);
- u[12] = _mm_sub_epi32(v[8], v[12]);
- u[13] = _mm_sub_epi32(v[9], v[13]);
- u[14] = _mm_sub_epi32(v[10], v[14]);
- u[15] = _mm_sub_epi32(v[11], v[15]);
-
- // stage 6
- v[0] = u[0];
- v[1] = u[1];
- v[2] = u[2];
- v[3] = u[3];
- v[4] = u[4];
- v[5] = u[5];
- v[6] = u[6];
- v[7] = u[7];
- v[8] = half_btf_sse4_1(&cospi8, &u[8], &cospi56, &u[9], &rnding, bit);
- v[9] = half_btf_sse4_1(&cospi56, &u[8], &cospim8, &u[9], &rnding, bit);
- v[10] = half_btf_sse4_1(&cospi40, &u[10], &cospi24, &u[11], &rnding, bit);
- v[11] = half_btf_sse4_1(&cospi24, &u[10], &cospim40, &u[11], &rnding, bit);
- v[12] = half_btf_sse4_1(&cospim56, &u[12], &cospi8, &u[13], &rnding, bit);
- v[13] = half_btf_sse4_1(&cospi8, &u[12], &cospi56, &u[13], &rnding, bit);
- v[14] = half_btf_sse4_1(&cospim24, &u[14], &cospi40, &u[15], &rnding, bit);
- v[15] = half_btf_sse4_1(&cospi40, &u[14], &cospi24, &u[15], &rnding, bit);
-
- // stage 7
- u[0] = _mm_add_epi32(v[0], v[8]);
- u[1] = _mm_add_epi32(v[1], v[9]);
- u[2] = _mm_add_epi32(v[2], v[10]);
- u[3] = _mm_add_epi32(v[3], v[11]);
- u[4] = _mm_add_epi32(v[4], v[12]);
- u[5] = _mm_add_epi32(v[5], v[13]);
- u[6] = _mm_add_epi32(v[6], v[14]);
- u[7] = _mm_add_epi32(v[7], v[15]);
- u[8] = _mm_sub_epi32(v[0], v[8]);
- u[9] = _mm_sub_epi32(v[1], v[9]);
- u[10] = _mm_sub_epi32(v[2], v[10]);
- u[11] = _mm_sub_epi32(v[3], v[11]);
- u[12] = _mm_sub_epi32(v[4], v[12]);
- u[13] = _mm_sub_epi32(v[5], v[13]);
- u[14] = _mm_sub_epi32(v[6], v[14]);
- u[15] = _mm_sub_epi32(v[7], v[15]);
-
- // stage 8
- v[0] = half_btf_sse4_1(&cospi2, &u[0], &cospi62, &u[1], &rnding, bit);
- v[1] = half_btf_sse4_1(&cospi62, &u[0], &cospim2, &u[1], &rnding, bit);
- v[2] = half_btf_sse4_1(&cospi10, &u[2], &cospi54, &u[3], &rnding, bit);
- v[3] = half_btf_sse4_1(&cospi54, &u[2], &cospim10, &u[3], &rnding, bit);
- v[4] = half_btf_sse4_1(&cospi18, &u[4], &cospi46, &u[5], &rnding, bit);
- v[5] = half_btf_sse4_1(&cospi46, &u[4], &cospim18, &u[5], &rnding, bit);
- v[6] = half_btf_sse4_1(&cospi26, &u[6], &cospi38, &u[7], &rnding, bit);
- v[7] = half_btf_sse4_1(&cospi38, &u[6], &cospim26, &u[7], &rnding, bit);
- v[8] = half_btf_sse4_1(&cospi34, &u[8], &cospi30, &u[9], &rnding, bit);
- v[9] = half_btf_sse4_1(&cospi30, &u[8], &cospim34, &u[9], &rnding, bit);
- v[10] = half_btf_sse4_1(&cospi42, &u[10], &cospi22, &u[11], &rnding, bit);
- v[11] = half_btf_sse4_1(&cospi22, &u[10], &cospim42, &u[11], &rnding, bit);
- v[12] = half_btf_sse4_1(&cospi50, &u[12], &cospi14, &u[13], &rnding, bit);
- v[13] = half_btf_sse4_1(&cospi14, &u[12], &cospim50, &u[13], &rnding, bit);
- v[14] = half_btf_sse4_1(&cospi58, &u[14], &cospi6, &u[15], &rnding, bit);
- v[15] = half_btf_sse4_1(&cospi6, &u[14], &cospim58, &u[15], &rnding, bit);
-
- // stage 9
- out[0 * num_cols + col] = v[1];
- out[1 * num_cols + col] = v[14];
- out[2 * num_cols + col] = v[3];
- out[3 * num_cols + col] = v[12];
- out[4 * num_cols + col] = v[5];
- out[5 * num_cols + col] = v[10];
- out[6 * num_cols + col] = v[7];
- out[7 * num_cols + col] = v[8];
- out[8 * num_cols + col] = v[9];
- out[9 * num_cols + col] = v[6];
- out[10 * num_cols + col] = v[11];
- out[11 * num_cols + col] = v[4];
- out[12 * num_cols + col] = v[13];
- out[13 * num_cols + col] = v[2];
- out[14 * num_cols + col] = v[15];
- out[15 * num_cols + col] = v[0];
- }
-}
-
-static void col_txfm_16x16_rounding(__m128i *in, int shift) {
- // Note:
- // We split 16x16 rounding into 4 sections of 8x8 rounding,
- // instead of 4 columns
- col_txfm_8x8_rounding(&in[0], shift);
- col_txfm_8x8_rounding(&in[16], shift);
- col_txfm_8x8_rounding(&in[32], shift);
- col_txfm_8x8_rounding(&in[48], shift);
-}
-
-static void col_txfm_8x16_rounding(__m128i *in, int shift) {
- col_txfm_8x8_rounding(&in[0], shift);
- col_txfm_8x8_rounding(&in[16], shift);
-}
-
-static void write_buffer_16x16(const __m128i *in, int32_t *output) {
- const int size_8x8 = 16 * 4;
- write_buffer_8x8(&in[0], output);
- output += size_8x8;
- write_buffer_8x8(&in[16], output);
- output += size_8x8;
- write_buffer_8x8(&in[32], output);
- output += size_8x8;
- write_buffer_8x8(&in[48], output);
-}
-
-void av1_fwd_txfm2d_16x16_sse4_1(const int16_t *input, int32_t *coeff,
- int stride, TX_TYPE tx_type, int bd) {
- __m128i in[64], out[64];
- const int8_t *shift = fwd_txfm_shift_ls[TX_16X16];
- const int txw_idx = get_txw_idx(TX_16X16);
- const int txh_idx = get_txh_idx(TX_16X16);
- const int col_num = 4;
- switch (tx_type) {
- case DCT_DCT:
- load_buffer_16x16(input, in, stride, 0, 0, shift[0]);
- fdct16x16_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], col_num);
- col_txfm_16x16_rounding(out, -shift[1]);
- transpose_16x16(out, in);
- fdct16x16_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], col_num);
- transpose_16x16(out, in);
- write_buffer_16x16(in, coeff);
- break;
- case ADST_DCT:
- load_buffer_16x16(input, in, stride, 0, 0, shift[0]);
- fadst16x16_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], col_num);
- col_txfm_16x16_rounding(out, -shift[1]);
- transpose_16x16(out, in);
- fdct16x16_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], col_num);
- transpose_16x16(out, in);
- write_buffer_16x16(in, coeff);
- break;
- case DCT_ADST:
- load_buffer_16x16(input, in, stride, 0, 0, shift[0]);
- fdct16x16_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], col_num);
- col_txfm_16x16_rounding(out, -shift[1]);
- transpose_16x16(out, in);
- fadst16x16_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], col_num);
- transpose_16x16(out, in);
- write_buffer_16x16(in, coeff);
- break;
- case ADST_ADST:
- load_buffer_16x16(input, in, stride, 0, 0, shift[0]);
- fadst16x16_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], col_num);
- col_txfm_16x16_rounding(out, -shift[1]);
- transpose_16x16(out, in);
- fadst16x16_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], col_num);
- transpose_16x16(out, in);
- write_buffer_16x16(in, coeff);
- break;
- case FLIPADST_DCT:
- load_buffer_16x16(input, in, stride, 1, 0, shift[0]);
- fadst16x16_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], col_num);
- col_txfm_16x16_rounding(out, -shift[1]);
- transpose_16x16(out, in);
- fdct16x16_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], col_num);
- transpose_16x16(out, in);
- write_buffer_16x16(in, coeff);
- break;
- case DCT_FLIPADST:
- load_buffer_16x16(input, in, stride, 0, 1, shift[0]);
- fdct16x16_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], col_num);
- col_txfm_16x16_rounding(out, -shift[1]);
- transpose_16x16(out, in);
- fadst16x16_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], col_num);
- transpose_16x16(out, in);
- write_buffer_16x16(in, coeff);
- break;
- case FLIPADST_FLIPADST:
- load_buffer_16x16(input, in, stride, 1, 1, shift[0]);
- fadst16x16_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], col_num);
- col_txfm_16x16_rounding(out, -shift[1]);
- transpose_16x16(out, in);
- fadst16x16_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], col_num);
- transpose_16x16(out, in);
- write_buffer_16x16(in, coeff);
- break;
- case ADST_FLIPADST:
- load_buffer_16x16(input, in, stride, 0, 1, shift[0]);
- fadst16x16_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], col_num);
- col_txfm_16x16_rounding(out, -shift[1]);
- transpose_16x16(out, in);
- fadst16x16_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], col_num);
- transpose_16x16(out, in);
- write_buffer_16x16(in, coeff);
- break;
- case FLIPADST_ADST:
- load_buffer_16x16(input, in, stride, 1, 0, shift[0]);
- fadst16x16_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], col_num);
- col_txfm_16x16_rounding(out, -shift[1]);
- transpose_16x16(out, in);
- fadst16x16_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], col_num);
- transpose_16x16(out, in);
- write_buffer_16x16(in, coeff);
- break;
- default: assert(0);
- }
- (void)bd;
-}
-
-static INLINE void flip_buf_sse4_1(__m128i *in, __m128i *out, int size) {
- for (int i = 0; i < size; i += 2) in[30 - i] = out[i];
- for (int i = 1; i < size; i += 2) in[size - i] = out[i];
-}
-
-static const fwd_transform_1d_sse4_1 col_highbd_txfm8x8_arr[TX_TYPES] = {
- fdct8x8_sse4_1, // DCT_DCT
- fadst8x8_sse4_1, // ADST_DCT
- fdct8x8_sse4_1, // DCT_ADST
- fadst8x8_sse4_1, // ADST_ADST
- fadst8x8_sse4_1, // FLIPADST_DCT
- fdct8x8_sse4_1, // DCT_FLIPADST
- fadst8x8_sse4_1, // FLIPADST_FLIPADST
- fadst8x8_sse4_1, // ADST_FLIPADST
- fadst8x8_sse4_1, // FLIPADST_ADST
- NULL, // IDTX
- NULL, // V_DCT
- NULL, // H_DCT
- NULL, // V_ADST
- NULL, // H_ADST
- NULL, // V_FLIPADST
- NULL // H_FLIPADST
-};
-
-static const fwd_transform_1d_sse4_1 row_highbd_txfm8x16_arr[TX_TYPES] = {
- fdct16x16_sse4_1, // DCT_DCT
- fdct16x16_sse4_1, // ADST_DCT
- fadst16x16_sse4_1, // DCT_ADST
- fadst16x16_sse4_1, // ADST_ADST
- fdct16x16_sse4_1, // FLIPADST_DCT
- fadst16x16_sse4_1, // DCT_FLIPADST
- fadst16x16_sse4_1, // FLIPADST_FLIPADST
- fadst16x16_sse4_1, // ADST_FLIPADST
- fadst16x16_sse4_1, // FLIPADST_ADST
- NULL, // IDTX
- NULL, // V_DCT
- NULL, // H_DCT
- NULL, // V_ADST
- NULL, // H_ADST
- NULL, // V_FLIPADST
- NULL // H_FLIPADST
-};
-
-static const fwd_transform_1d_sse4_1 col_highbd_txfm8x16_arr[TX_TYPES] = {
- fdct16x16_sse4_1, // DCT_DCT
- fadst16x16_sse4_1, // ADST_DCT
- fdct16x16_sse4_1, // DCT_ADST
- fadst16x16_sse4_1, // ADST_ADST
- fadst16x16_sse4_1, // FLIPADST_DCT
- fdct16x16_sse4_1, // DCT_FLIPADST
- fadst16x16_sse4_1, // FLIPADST_FLIPADST
- fadst16x16_sse4_1, // ADST_FLIPADST
- fadst16x16_sse4_1, // FLIPADST_ADST
- NULL, // IDTX
- NULL, // V_DCT
- NULL, // H_DCT
- NULL, // V_ADST
- NULL, // H_ADST
- NULL, // V_FLIPADST
- NULL // H_FLIPADST
-};
-static const fwd_transform_1d_sse4_1 row_highbd_txfm8x8_arr[TX_TYPES] = {
- fdct8x8_sse4_1, // DCT_DCT
- fdct8x8_sse4_1, // ADST_DCT
- fadst8x8_sse4_1, // DCT_ADST
- fadst8x8_sse4_1, // ADST_ADST
- fdct8x8_sse4_1, // FLIPADST_DCT
- fadst8x8_sse4_1, // DCT_FLIPADST
- fadst8x8_sse4_1, // FLIPADST_FLIPADST
- fadst8x8_sse4_1, // ADST_FLIPADST
- fadst8x8_sse4_1, // FLIPADST_ADST
- NULL, // IDTX
- NULL, // V_DCT
- NULL, // H_DCT
- NULL, // V_ADST
- NULL, // H_ADST
- NULL, // V_FLIPADST
- NULL // H_FLIPADST
-};
-
-void av1_fwd_txfm2d_16x8_sse4_1(const int16_t *input, int32_t *coeff,
- int stride, TX_TYPE tx_type, int bd) {
- __m128i in[32], out[32];
- const int8_t *shift = fwd_txfm_shift_ls[TX_16X8];
- const int txw_idx = get_txw_idx(TX_16X8);
- const int txh_idx = get_txh_idx(TX_16X8);
- const fwd_transform_1d_sse4_1 col_txfm = col_highbd_txfm8x8_arr[tx_type];
- const fwd_transform_1d_sse4_1 row_txfm = row_highbd_txfm8x16_arr[tx_type];
- int bit = fwd_cos_bit_col[txw_idx][txh_idx];
- int ud_flip, lr_flip;
- get_flip_cfg(tx_type, &ud_flip, &lr_flip);
-
- for (int i = 0; i < 2; i++) {
- load_buffer_8x8(input + i * 8, in, stride, ud_flip, 0, shift[0]);
- col_txfm(in, in, bit, 0);
- col_txfm_8x8_rounding(in, -shift[1]);
- transpose_8x8(in, out + i * 16);
- }
-
- if (lr_flip) {
- flip_buf_sse4_1(in, out, 32);
- row_txfm(in, out, bit, 2);
- } else {
- row_txfm(out, out, bit, 2);
- }
-
- for (int i = 0; i < 2; i++) {
- transpose_8x8(out + i * 16, in);
- av1_round_shift_rect_array_32_sse4_1(in, in, 16, -shift[2], NewSqrt2);
- write_buffer_16x8(in, coeff + i * 8, 16);
- }
-
- (void)bd;
-}
-
-void av1_fwd_txfm2d_8x16_sse4_1(const int16_t *input, int32_t *coeff,
- int stride, TX_TYPE tx_type, int bd) {
- __m128i in[32], out[32];
- const int8_t *shift = fwd_txfm_shift_ls[TX_8X16];
- const int txw_idx = get_txw_idx(TX_8X16);
- const int txh_idx = get_txh_idx(TX_8X16);
- const fwd_transform_1d_sse4_1 col_txfm = col_highbd_txfm8x16_arr[tx_type];
- const fwd_transform_1d_sse4_1 row_txfm = row_highbd_txfm8x8_arr[tx_type];
- int bit = fwd_cos_bit_col[txw_idx][txh_idx];
- int ud_flip, lr_flip;
- get_flip_cfg(tx_type, &ud_flip, &lr_flip);
-
- load_buffer_8x16(input, in, stride, ud_flip, lr_flip, shift[0]);
- col_txfm(in, in, bit, 2);
- col_txfm_8x16_rounding(in, -shift[1]);
- transpose_8x8(in, out);
- transpose_8x8(in + 16, out + 16);
-
- for (int i = 0; i < 2; i++) {
- row_txfm(out + i * 16, out, bit, 0);
- transpose_8x8(out, in);
- av1_round_shift_rect_array_32_sse4_1(in, in, 16, -shift[2], NewSqrt2);
- write_buffer_8x8(in, coeff + i * 64);
- }
-
- (void)bd;
-}
diff --git a/third_party/aom/av1/encoder/x86/pickrst_avx2.c b/third_party/aom/av1/encoder/x86/pickrst_avx2.c
deleted file mode 100644
index 06aaaa7ee..000000000
--- a/third_party/aom/av1/encoder/x86/pickrst_avx2.c
+++ /dev/null
@@ -1,403 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <immintrin.h> // AVX2
-#include "aom_dsp/x86/synonyms.h"
-#include "aom_dsp/x86/synonyms_avx2.h"
-#include "aom_dsp/x86/transpose_sse2.h"
-
-#include "config/av1_rtcd.h"
-#include "av1/common/restoration.h"
-#include "av1/encoder/pickrst.h"
-
-static INLINE void acc_stat_avx2(int32_t *dst, const uint8_t *src,
- const __m128i *shuffle, const __m256i *kl) {
- const __m128i s = _mm_shuffle_epi8(xx_loadu_128(src), *shuffle);
- const __m256i d0 = _mm256_madd_epi16(*kl, _mm256_cvtepu8_epi16(s));
- const __m256i dst0 = yy_loadu_256(dst);
- const __m256i r0 = _mm256_add_epi32(dst0, d0);
- yy_storeu_256(dst, r0);
-}
-
-static INLINE void acc_stat_win7_one_line_avx2(
- const uint8_t *dgd, const uint8_t *src, int h_start, int h_end,
- int dgd_stride, const __m128i *shuffle, int32_t *sumX,
- int32_t sumY[WIENER_WIN][WIENER_WIN], int32_t M_int[WIENER_WIN][WIENER_WIN],
- int32_t H_int[WIENER_WIN2][WIENER_WIN * 8]) {
- int j, k, l;
- const int wiener_win = WIENER_WIN;
- for (j = h_start; j < h_end; j += 2) {
- const uint8_t X1 = src[j];
- const uint8_t X2 = src[j + 1];
- *sumX += X1 + X2;
- const uint8_t *dgd_ij = dgd + j;
- for (k = 0; k < wiener_win; k++) {
- const uint8_t *dgd_ijk = dgd_ij + k * dgd_stride;
- for (l = 0; l < wiener_win; l++) {
- int32_t *H_ = &H_int[(l * wiener_win + k)][0];
- const uint8_t D1 = dgd_ijk[l];
- const uint8_t D2 = dgd_ijk[l + 1];
- sumY[k][l] += D1 + D2;
- M_int[k][l] += D1 * X1 + D2 * X2;
-
- const __m256i kl =
- _mm256_cvtepu8_epi16(_mm_set1_epi16(*((uint16_t *)(dgd_ijk + l))));
- acc_stat_avx2(H_ + 0 * 8, dgd_ij + 0 * dgd_stride, shuffle, &kl);
- acc_stat_avx2(H_ + 1 * 8, dgd_ij + 1 * dgd_stride, shuffle, &kl);
- acc_stat_avx2(H_ + 2 * 8, dgd_ij + 2 * dgd_stride, shuffle, &kl);
- acc_stat_avx2(H_ + 3 * 8, dgd_ij + 3 * dgd_stride, shuffle, &kl);
- acc_stat_avx2(H_ + 4 * 8, dgd_ij + 4 * dgd_stride, shuffle, &kl);
- acc_stat_avx2(H_ + 5 * 8, dgd_ij + 5 * dgd_stride, shuffle, &kl);
- acc_stat_avx2(H_ + 6 * 8, dgd_ij + 6 * dgd_stride, shuffle, &kl);
- }
- }
- }
-}
-
-static INLINE void compute_stats_win7_opt_avx2(
- const uint8_t *dgd, const uint8_t *src, int h_start, int h_end, int v_start,
- int v_end, int dgd_stride, int src_stride, double *M, double *H) {
- int i, j, k, l, m, n;
- const int wiener_win = WIENER_WIN;
- const int pixel_count = (h_end - h_start) * (v_end - v_start);
- const int wiener_win2 = wiener_win * wiener_win;
- const int wiener_halfwin = (wiener_win >> 1);
- const double avg =
- find_average(dgd, h_start, h_end, v_start, v_end, dgd_stride);
-
- int32_t M_int32[WIENER_WIN][WIENER_WIN] = { { 0 } };
- int64_t M_int64[WIENER_WIN][WIENER_WIN] = { { 0 } };
- int32_t H_int32[WIENER_WIN2][WIENER_WIN * 8] = { { 0 } };
- int64_t H_int64[WIENER_WIN2][WIENER_WIN * 8] = { { 0 } };
- int32_t sumY[WIENER_WIN][WIENER_WIN] = { { 0 } };
- int32_t sumX = 0;
- const uint8_t *dgd_win = dgd - wiener_halfwin * dgd_stride - wiener_halfwin;
-
- const __m128i shuffle = xx_loadu_128(g_shuffle_stats_data);
- for (j = v_start; j < v_end; j += 64) {
- const int vert_end = AOMMIN(64, v_end - j) + j;
- for (i = j; i < vert_end; i++) {
- acc_stat_win7_one_line_avx2(
- dgd_win + i * dgd_stride, src + i * src_stride, h_start, h_end,
- dgd_stride, &shuffle, &sumX, sumY, M_int32, H_int32);
- }
- for (k = 0; k < wiener_win; ++k) {
- for (l = 0; l < wiener_win; ++l) {
- M_int64[k][l] += M_int32[k][l];
- M_int32[k][l] = 0;
- }
- }
- for (k = 0; k < WIENER_WIN2; ++k) {
- for (l = 0; l < WIENER_WIN * 8; ++l) {
- H_int64[k][l] += H_int32[k][l];
- H_int32[k][l] = 0;
- }
- }
- }
-
- const double avg_square_sum = avg * avg * pixel_count;
- for (k = 0; k < wiener_win; k++) {
- for (l = 0; l < wiener_win; l++) {
- const int32_t idx0 = l * wiener_win + k;
- M[idx0] = M_int64[k][l] + avg_square_sum - avg * (sumX + sumY[k][l]);
- double *H_ = H + idx0 * wiener_win2;
- int64_t *H_int_ = &H_int64[idx0][0];
- for (m = 0; m < wiener_win; m++) {
- for (n = 0; n < wiener_win; n++) {
- H_[m * wiener_win + n] = H_int_[n * 8 + m] + avg_square_sum -
- avg * (sumY[k][l] + sumY[n][m]);
- }
- }
- }
- }
-}
-
-static INLINE void acc_stat_win5_one_line_avx2(
- const uint8_t *dgd, const uint8_t *src, int h_start, int h_end,
- int dgd_stride, const __m128i *shuffle, int32_t *sumX,
- int32_t sumY[WIENER_WIN_CHROMA][WIENER_WIN_CHROMA],
- int32_t M_int[WIENER_WIN_CHROMA][WIENER_WIN_CHROMA],
- int32_t H_int[WIENER_WIN2_CHROMA][WIENER_WIN_CHROMA * 8]) {
- int j, k, l;
- const int wiener_win = WIENER_WIN_CHROMA;
- for (j = h_start; j < h_end; j += 2) {
- const uint8_t X1 = src[j];
- const uint8_t X2 = src[j + 1];
- *sumX += X1 + X2;
- const uint8_t *dgd_ij = dgd + j;
- for (k = 0; k < wiener_win; k++) {
- const uint8_t *dgd_ijk = dgd_ij + k * dgd_stride;
- for (l = 0; l < wiener_win; l++) {
- int32_t *H_ = &H_int[(l * wiener_win + k)][0];
- const uint8_t D1 = dgd_ijk[l];
- const uint8_t D2 = dgd_ijk[l + 1];
- sumY[k][l] += D1 + D2;
- M_int[k][l] += D1 * X1 + D2 * X2;
-
- const __m256i kl =
- _mm256_cvtepu8_epi16(_mm_set1_epi16(*((uint16_t *)(dgd_ijk + l))));
- acc_stat_avx2(H_ + 0 * 8, dgd_ij + 0 * dgd_stride, shuffle, &kl);
- acc_stat_avx2(H_ + 1 * 8, dgd_ij + 1 * dgd_stride, shuffle, &kl);
- acc_stat_avx2(H_ + 2 * 8, dgd_ij + 2 * dgd_stride, shuffle, &kl);
- acc_stat_avx2(H_ + 3 * 8, dgd_ij + 3 * dgd_stride, shuffle, &kl);
- acc_stat_avx2(H_ + 4 * 8, dgd_ij + 4 * dgd_stride, shuffle, &kl);
- }
- }
- }
-}
-
-static INLINE void compute_stats_win5_opt_avx2(
- const uint8_t *dgd, const uint8_t *src, int h_start, int h_end, int v_start,
- int v_end, int dgd_stride, int src_stride, double *M, double *H) {
- int i, j, k, l, m, n;
- const int wiener_win = WIENER_WIN_CHROMA;
- const int pixel_count = (h_end - h_start) * (v_end - v_start);
- const int wiener_win2 = wiener_win * wiener_win;
- const int wiener_halfwin = (wiener_win >> 1);
- const double avg =
- find_average(dgd, h_start, h_end, v_start, v_end, dgd_stride);
-
- int32_t M_int32[WIENER_WIN_CHROMA][WIENER_WIN_CHROMA] = { { 0 } };
- int64_t M_int64[WIENER_WIN_CHROMA][WIENER_WIN_CHROMA] = { { 0 } };
- int32_t H_int32[WIENER_WIN2_CHROMA][WIENER_WIN_CHROMA * 8] = { { 0 } };
- int64_t H_int64[WIENER_WIN2_CHROMA][WIENER_WIN_CHROMA * 8] = { { 0 } };
- int32_t sumY[WIENER_WIN_CHROMA][WIENER_WIN_CHROMA] = { { 0 } };
- int32_t sumX = 0;
- const uint8_t *dgd_win = dgd - wiener_halfwin * dgd_stride - wiener_halfwin;
-
- const __m128i shuffle = xx_loadu_128(g_shuffle_stats_data);
- for (j = v_start; j < v_end; j += 64) {
- const int vert_end = AOMMIN(64, v_end - j) + j;
- for (i = j; i < vert_end; i++) {
- acc_stat_win5_one_line_avx2(
- dgd_win + i * dgd_stride, src + i * src_stride, h_start, h_end,
- dgd_stride, &shuffle, &sumX, sumY, M_int32, H_int32);
- }
- for (k = 0; k < wiener_win; ++k) {
- for (l = 0; l < wiener_win; ++l) {
- M_int64[k][l] += M_int32[k][l];
- M_int32[k][l] = 0;
- }
- }
- for (k = 0; k < WIENER_WIN2_CHROMA; ++k) {
- for (l = 0; l < WIENER_WIN_CHROMA * 8; ++l) {
- H_int64[k][l] += H_int32[k][l];
- H_int32[k][l] = 0;
- }
- }
- }
-
- const double avg_square_sum = avg * avg * pixel_count;
- for (k = 0; k < wiener_win; k++) {
- for (l = 0; l < wiener_win; l++) {
- const int32_t idx0 = l * wiener_win + k;
- M[idx0] = M_int64[k][l] + avg_square_sum - avg * (sumX + sumY[k][l]);
- double *H_ = H + idx0 * wiener_win2;
- int64_t *H_int_ = &H_int64[idx0][0];
- for (m = 0; m < wiener_win; m++) {
- for (n = 0; n < wiener_win; n++) {
- H_[m * wiener_win + n] = H_int_[n * 8 + m] + avg_square_sum -
- avg * (sumY[k][l] + sumY[n][m]);
- }
- }
- }
- }
-}
-
-void av1_compute_stats_avx2(int wiener_win, const uint8_t *dgd,
- const uint8_t *src, int h_start, int h_end,
- int v_start, int v_end, int dgd_stride,
- int src_stride, double *M, double *H) {
- if (wiener_win == WIENER_WIN) {
- compute_stats_win7_opt_avx2(dgd, src, h_start, h_end, v_start, v_end,
- dgd_stride, src_stride, M, H);
- } else if (wiener_win == WIENER_WIN_CHROMA) {
- compute_stats_win5_opt_avx2(dgd, src, h_start, h_end, v_start, v_end,
- dgd_stride, src_stride, M, H);
- } else {
- av1_compute_stats_c(wiener_win, dgd, src, h_start, h_end, v_start, v_end,
- dgd_stride, src_stride, M, H);
- }
-}
-
-static INLINE __m256i pair_set_epi16(uint16_t a, uint16_t b) {
- return _mm256_set1_epi32(
- (int32_t)(((uint16_t)(a)) | (((uint32_t)(b)) << 16)));
-}
-
-int64_t av1_lowbd_pixel_proj_error_avx2(
- const uint8_t *src8, int width, int height, int src_stride,
- const uint8_t *dat8, int dat_stride, int32_t *flt0, int flt0_stride,
- int32_t *flt1, int flt1_stride, int xq[2], const sgr_params_type *params) {
- int i, j, k;
- const int32_t shift = SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS;
- const __m256i rounding = _mm256_set1_epi32(1 << (shift - 1));
- __m256i sum64 = _mm256_setzero_si256();
- const uint8_t *src = src8;
- const uint8_t *dat = dat8;
- int64_t err = 0;
- if (params->r[0] > 0 && params->r[1] > 0) {
- __m256i xq_coeff = pair_set_epi16(xq[0], xq[1]);
- for (i = 0; i < height; ++i) {
- __m256i sum32 = _mm256_setzero_si256();
- for (j = 0; j <= width - 16; j += 16) {
- const __m256i d0 = _mm256_cvtepu8_epi16(xx_loadu_128(dat + j));
- const __m256i s0 = _mm256_cvtepu8_epi16(xx_loadu_128(src + j));
- const __m256i flt0_16b = _mm256_permute4x64_epi64(
- _mm256_packs_epi32(yy_loadu_256(flt0 + j),
- yy_loadu_256(flt0 + j + 8)),
- 0xd8);
- const __m256i flt1_16b = _mm256_permute4x64_epi64(
- _mm256_packs_epi32(yy_loadu_256(flt1 + j),
- yy_loadu_256(flt1 + j + 8)),
- 0xd8);
- const __m256i u0 = _mm256_slli_epi16(d0, SGRPROJ_RST_BITS);
- const __m256i flt0_0_sub_u = _mm256_sub_epi16(flt0_16b, u0);
- const __m256i flt1_0_sub_u = _mm256_sub_epi16(flt1_16b, u0);
- const __m256i v0 = _mm256_madd_epi16(
- xq_coeff, _mm256_unpacklo_epi16(flt0_0_sub_u, flt1_0_sub_u));
- const __m256i v1 = _mm256_madd_epi16(
- xq_coeff, _mm256_unpackhi_epi16(flt0_0_sub_u, flt1_0_sub_u));
- const __m256i vr0 =
- _mm256_srai_epi32(_mm256_add_epi32(v0, rounding), shift);
- const __m256i vr1 =
- _mm256_srai_epi32(_mm256_add_epi32(v1, rounding), shift);
- const __m256i e0 = _mm256_sub_epi16(
- _mm256_add_epi16(_mm256_packs_epi32(vr0, vr1), d0), s0);
- const __m256i err0 = _mm256_madd_epi16(e0, e0);
- sum32 = _mm256_add_epi32(sum32, err0);
- }
- for (k = j; k < width; ++k) {
- const int32_t u = (int32_t)(dat[k] << SGRPROJ_RST_BITS);
- int32_t v = xq[0] * (flt0[k] - u) + xq[1] * (flt1[k] - u);
- const int32_t e = ROUND_POWER_OF_TWO(v, shift) + dat[k] - src[k];
- err += e * e;
- }
- dat += dat_stride;
- src += src_stride;
- flt0 += flt0_stride;
- flt1 += flt1_stride;
- const __m256i sum64_0 =
- _mm256_cvtepi32_epi64(_mm256_castsi256_si128(sum32));
- const __m256i sum64_1 =
- _mm256_cvtepi32_epi64(_mm256_extracti128_si256(sum32, 1));
- sum64 = _mm256_add_epi64(sum64, sum64_0);
- sum64 = _mm256_add_epi64(sum64, sum64_1);
- }
- } else if (params->r[0] > 0) {
- __m256i xq_coeff =
- pair_set_epi16(xq[0], (-xq[0] * (1 << SGRPROJ_RST_BITS)));
- for (i = 0; i < height; ++i) {
- __m256i sum32 = _mm256_setzero_si256();
- for (j = 0; j <= width - 16; j += 16) {
- const __m256i d0 = _mm256_cvtepu8_epi16(xx_loadu_128(dat + j));
- const __m256i s0 = _mm256_cvtepu8_epi16(xx_loadu_128(src + j));
- const __m256i flt0_16b = _mm256_permute4x64_epi64(
- _mm256_packs_epi32(yy_loadu_256(flt0 + j),
- yy_loadu_256(flt0 + j + 8)),
- 0xd8);
- const __m256i v0 =
- _mm256_madd_epi16(xq_coeff, _mm256_unpacklo_epi16(flt0_16b, d0));
- const __m256i v1 =
- _mm256_madd_epi16(xq_coeff, _mm256_unpackhi_epi16(flt0_16b, d0));
- const __m256i vr0 =
- _mm256_srai_epi32(_mm256_add_epi32(v0, rounding), shift);
- const __m256i vr1 =
- _mm256_srai_epi32(_mm256_add_epi32(v1, rounding), shift);
- const __m256i e0 = _mm256_sub_epi16(
- _mm256_add_epi16(_mm256_packs_epi32(vr0, vr1), d0), s0);
- const __m256i err0 = _mm256_madd_epi16(e0, e0);
- sum32 = _mm256_add_epi32(sum32, err0);
- }
- for (k = j; k < width; ++k) {
- const int32_t u = (int32_t)(dat[k] << SGRPROJ_RST_BITS);
- int32_t v = xq[0] * (flt0[k] - u);
- const int32_t e = ROUND_POWER_OF_TWO(v, shift) + dat[k] - src[k];
- err += e * e;
- }
- dat += dat_stride;
- src += src_stride;
- flt0 += flt0_stride;
- const __m256i sum64_0 =
- _mm256_cvtepi32_epi64(_mm256_castsi256_si128(sum32));
- const __m256i sum64_1 =
- _mm256_cvtepi32_epi64(_mm256_extracti128_si256(sum32, 1));
- sum64 = _mm256_add_epi64(sum64, sum64_0);
- sum64 = _mm256_add_epi64(sum64, sum64_1);
- }
- } else if (params->r[1] > 0) {
- __m256i xq_coeff = pair_set_epi16(xq[1], -(xq[1] << SGRPROJ_RST_BITS));
- for (i = 0; i < height; ++i) {
- __m256i sum32 = _mm256_setzero_si256();
- for (j = 0; j <= width - 16; j += 16) {
- const __m256i d0 = _mm256_cvtepu8_epi16(xx_loadu_128(dat + j));
- const __m256i s0 = _mm256_cvtepu8_epi16(xx_loadu_128(src + j));
- const __m256i flt1_16b = _mm256_permute4x64_epi64(
- _mm256_packs_epi32(yy_loadu_256(flt1 + j),
- yy_loadu_256(flt1 + j + 8)),
- 0xd8);
- const __m256i v0 =
- _mm256_madd_epi16(xq_coeff, _mm256_unpacklo_epi16(flt1_16b, d0));
- const __m256i v1 =
- _mm256_madd_epi16(xq_coeff, _mm256_unpackhi_epi16(flt1_16b, d0));
- const __m256i vr0 =
- _mm256_srai_epi32(_mm256_add_epi32(v0, rounding), shift);
- const __m256i vr1 =
- _mm256_srai_epi32(_mm256_add_epi32(v1, rounding), shift);
- const __m256i e0 = _mm256_sub_epi16(
- _mm256_add_epi16(_mm256_packs_epi32(vr0, vr1), d0), s0);
- const __m256i err0 = _mm256_madd_epi16(e0, e0);
- sum32 = _mm256_add_epi32(sum32, err0);
- }
- for (k = j; k < width; ++k) {
- const int32_t u = (int32_t)(dat[k] << SGRPROJ_RST_BITS);
- int32_t v = xq[1] * (flt1[k] - u);
- const int32_t e = ROUND_POWER_OF_TWO(v, shift) + dat[k] - src[k];
- err += e * e;
- }
- dat += dat_stride;
- src += src_stride;
- flt1 += flt1_stride;
- const __m256i sum64_0 =
- _mm256_cvtepi32_epi64(_mm256_castsi256_si128(sum32));
- const __m256i sum64_1 =
- _mm256_cvtepi32_epi64(_mm256_extracti128_si256(sum32, 1));
- sum64 = _mm256_add_epi64(sum64, sum64_0);
- sum64 = _mm256_add_epi64(sum64, sum64_1);
- }
- } else {
- __m256i sum32 = _mm256_setzero_si256();
- for (i = 0; i < height; ++i) {
- for (j = 0; j <= width - 16; j += 16) {
- const __m256i d0 = _mm256_cvtepu8_epi16(xx_loadu_128(dat + j));
- const __m256i s0 = _mm256_cvtepu8_epi16(xx_loadu_128(src + j));
- const __m256i diff0 = _mm256_sub_epi16(d0, s0);
- const __m256i err0 = _mm256_madd_epi16(diff0, diff0);
- sum32 = _mm256_add_epi32(sum32, err0);
- }
- for (k = j; k < width; ++k) {
- const int32_t e = (int32_t)(dat[k]) - src[k];
- err += e * e;
- }
- dat += dat_stride;
- src += src_stride;
- }
- const __m256i sum64_0 =
- _mm256_cvtepi32_epi64(_mm256_castsi256_si128(sum32));
- const __m256i sum64_1 =
- _mm256_cvtepi32_epi64(_mm256_extracti128_si256(sum32, 1));
- sum64 = _mm256_add_epi64(sum64_0, sum64_1);
- }
- int64_t sum[4];
- yy_storeu_256(sum, sum64);
- err += sum[0] + sum[1] + sum[2] + sum[3];
- return err;
-}
diff --git a/third_party/aom/av1/encoder/x86/pickrst_sse4.c b/third_party/aom/av1/encoder/x86/pickrst_sse4.c
deleted file mode 100644
index 04e4d1afc..000000000
--- a/third_party/aom/av1/encoder/x86/pickrst_sse4.c
+++ /dev/null
@@ -1,389 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <emmintrin.h>
-#include "aom_dsp/x86/synonyms.h"
-
-#include "config/av1_rtcd.h"
-#include "av1/common/restoration.h"
-#include "av1/encoder/pickrst.h"
-
-static INLINE void acc_stat_sse41(int32_t *dst, const uint8_t *src,
- const __m128i *shuffle, const __m128i *kl) {
- const __m128i s = _mm_shuffle_epi8(xx_loadu_128(src), *shuffle);
- const __m128i d0 = _mm_madd_epi16(*kl, _mm_cvtepu8_epi16(s));
- const __m128i d1 =
- _mm_madd_epi16(*kl, _mm_cvtepu8_epi16(_mm_srli_si128(s, 8)));
- const __m128i dst0 = xx_loadu_128(dst);
- const __m128i dst1 = xx_loadu_128(dst + 4);
- const __m128i r0 = _mm_add_epi32(dst0, d0);
- const __m128i r1 = _mm_add_epi32(dst1, d1);
- xx_storeu_128(dst, r0);
- xx_storeu_128(dst + 4, r1);
-}
-
-static INLINE void acc_stat_win7_one_line_sse4_1(
- const uint8_t *dgd, const uint8_t *src, int h_start, int h_end,
- int dgd_stride, const __m128i *shuffle, int32_t *sumX,
- int32_t sumY[WIENER_WIN][WIENER_WIN], int32_t M_int[WIENER_WIN][WIENER_WIN],
- int32_t H_int[WIENER_WIN2][WIENER_WIN * 8]) {
- const int wiener_win = 7;
- int j, k, l;
- for (j = h_start; j < h_end; j += 2) {
- const uint8_t *dgd_ij = dgd + j;
- const uint8_t X1 = src[j];
- const uint8_t X2 = src[j + 1];
- *sumX += X1 + X2;
- for (k = 0; k < wiener_win; k++) {
- const uint8_t *dgd_ijk = dgd_ij + k * dgd_stride;
- for (l = 0; l < wiener_win; l++) {
- int32_t *H_ = &H_int[(l * wiener_win + k)][0];
- const uint8_t D1 = dgd_ijk[l];
- const uint8_t D2 = dgd_ijk[l + 1];
- sumY[k][l] += D1 + D2;
- M_int[k][l] += D1 * X1 + D2 * X2;
-
- const __m128i kl =
- _mm_cvtepu8_epi16(_mm_set1_epi16(*((uint16_t *)(dgd_ijk + l))));
- acc_stat_sse41(H_ + 0 * 8, dgd_ij + 0 * dgd_stride, shuffle, &kl);
- acc_stat_sse41(H_ + 1 * 8, dgd_ij + 1 * dgd_stride, shuffle, &kl);
- acc_stat_sse41(H_ + 2 * 8, dgd_ij + 2 * dgd_stride, shuffle, &kl);
- acc_stat_sse41(H_ + 3 * 8, dgd_ij + 3 * dgd_stride, shuffle, &kl);
- acc_stat_sse41(H_ + 4 * 8, dgd_ij + 4 * dgd_stride, shuffle, &kl);
- acc_stat_sse41(H_ + 5 * 8, dgd_ij + 5 * dgd_stride, shuffle, &kl);
- acc_stat_sse41(H_ + 6 * 8, dgd_ij + 6 * dgd_stride, shuffle, &kl);
- }
- }
- }
-}
-
-static INLINE void compute_stats_win7_opt_sse4_1(
- const uint8_t *dgd, const uint8_t *src, int h_start, int h_end, int v_start,
- int v_end, int dgd_stride, int src_stride, double *M, double *H) {
- int i, j, k, l, m, n;
- const int wiener_win = WIENER_WIN;
- const int pixel_count = (h_end - h_start) * (v_end - v_start);
- const int wiener_win2 = wiener_win * wiener_win;
- const int wiener_halfwin = (wiener_win >> 1);
- const double avg =
- find_average(dgd, h_start, h_end, v_start, v_end, dgd_stride);
-
- int32_t M_int32[WIENER_WIN][WIENER_WIN] = { { 0 } };
- int64_t M_int64[WIENER_WIN][WIENER_WIN] = { { 0 } };
- int32_t H_int32[WIENER_WIN2][WIENER_WIN * 8] = { { 0 } };
- int64_t H_int64[WIENER_WIN2][WIENER_WIN * 8] = { { 0 } };
- int32_t sumY[WIENER_WIN][WIENER_WIN] = { { 0 } };
- int32_t sumX = 0;
- const uint8_t *dgd_win = dgd - wiener_halfwin * dgd_stride - wiener_halfwin;
-
- const __m128i shuffle = xx_loadu_128(g_shuffle_stats_data);
- for (j = v_start; j < v_end; j += 64) {
- const int vert_end = AOMMIN(64, v_end - j) + j;
- for (i = j; i < vert_end; i++) {
- acc_stat_win7_one_line_sse4_1(
- dgd_win + i * dgd_stride, src + i * src_stride, h_start, h_end,
- dgd_stride, &shuffle, &sumX, sumY, M_int32, H_int32);
- }
- for (k = 0; k < wiener_win; ++k) {
- for (l = 0; l < wiener_win; ++l) {
- M_int64[k][l] += M_int32[k][l];
- M_int32[k][l] = 0;
- }
- }
- for (k = 0; k < WIENER_WIN2; ++k) {
- for (l = 0; l < WIENER_WIN * 8; ++l) {
- H_int64[k][l] += H_int32[k][l];
- H_int32[k][l] = 0;
- }
- }
- }
-
- const double avg_square_sum = avg * avg * pixel_count;
- for (k = 0; k < wiener_win; k++) {
- for (l = 0; l < wiener_win; l++) {
- const int32_t idx0 = l * wiener_win + k;
- M[idx0] = M_int64[k][l] + avg_square_sum - avg * (sumX + sumY[k][l]);
- double *H_ = H + idx0 * wiener_win2;
- int64_t *H_int_ = &H_int64[idx0][0];
- for (m = 0; m < wiener_win; m++) {
- for (n = 0; n < wiener_win; n++) {
- H_[m * wiener_win + n] = H_int_[n * 8 + m] + avg_square_sum -
- avg * (sumY[k][l] + sumY[n][m]);
- }
- }
- }
- }
-}
-
-static INLINE void acc_stat_win5_one_line_sse4_1(
- const uint8_t *dgd, const uint8_t *src, int h_start, int h_end,
- int dgd_stride, const __m128i *shuffle, int32_t *sumX,
- int32_t sumY[WIENER_WIN_CHROMA][WIENER_WIN_CHROMA],
- int32_t M_int[WIENER_WIN_CHROMA][WIENER_WIN_CHROMA],
- int32_t H_int[WIENER_WIN2_CHROMA][WIENER_WIN_CHROMA * 8]) {
- const int wiener_win = WIENER_WIN_CHROMA;
- int j, k, l;
- for (j = h_start; j < h_end; j += 2) {
- const uint8_t *dgd_ij = dgd + j;
- const uint8_t X1 = src[j];
- const uint8_t X2 = src[j + 1];
- *sumX += X1 + X2;
- for (k = 0; k < wiener_win; k++) {
- const uint8_t *dgd_ijk = dgd_ij + k * dgd_stride;
- for (l = 0; l < wiener_win; l++) {
- int32_t *H_ = &H_int[(l * wiener_win + k)][0];
- const uint8_t D1 = dgd_ijk[l];
- const uint8_t D2 = dgd_ijk[l + 1];
- sumY[k][l] += D1 + D2;
- M_int[k][l] += D1 * X1 + D2 * X2;
-
- const __m128i kl =
- _mm_cvtepu8_epi16(_mm_set1_epi16(*((uint16_t *)(dgd_ijk + l))));
- acc_stat_sse41(H_ + 0 * 8, dgd_ij + 0 * dgd_stride, shuffle, &kl);
- acc_stat_sse41(H_ + 1 * 8, dgd_ij + 1 * dgd_stride, shuffle, &kl);
- acc_stat_sse41(H_ + 2 * 8, dgd_ij + 2 * dgd_stride, shuffle, &kl);
- acc_stat_sse41(H_ + 3 * 8, dgd_ij + 3 * dgd_stride, shuffle, &kl);
- acc_stat_sse41(H_ + 4 * 8, dgd_ij + 4 * dgd_stride, shuffle, &kl);
- }
- }
- }
-}
-
-static INLINE void compute_stats_win5_opt_sse4_1(
- const uint8_t *dgd, const uint8_t *src, int h_start, int h_end, int v_start,
- int v_end, int dgd_stride, int src_stride, double *M, double *H) {
- int i, j, k, l, m, n;
- const int wiener_win = WIENER_WIN_CHROMA;
- const int pixel_count = (h_end - h_start) * (v_end - v_start);
- const int wiener_win2 = wiener_win * wiener_win;
- const int wiener_halfwin = (wiener_win >> 1);
- const double avg =
- find_average(dgd, h_start, h_end, v_start, v_end, dgd_stride);
-
- int32_t M_int32[WIENER_WIN_CHROMA][WIENER_WIN_CHROMA] = { { 0 } };
- int64_t M_int64[WIENER_WIN_CHROMA][WIENER_WIN_CHROMA] = { { 0 } };
- int32_t H_int32[WIENER_WIN2_CHROMA][WIENER_WIN_CHROMA * 8] = { { 0 } };
- int64_t H_int64[WIENER_WIN2_CHROMA][WIENER_WIN_CHROMA * 8] = { { 0 } };
- int32_t sumY[WIENER_WIN_CHROMA][WIENER_WIN_CHROMA] = { { 0 } };
- int32_t sumX = 0;
- const uint8_t *dgd_win = dgd - wiener_halfwin * dgd_stride - wiener_halfwin;
-
- const __m128i shuffle = xx_loadu_128(g_shuffle_stats_data);
- for (j = v_start; j < v_end; j += 64) {
- const int vert_end = AOMMIN(64, v_end - j) + j;
- for (i = j; i < vert_end; i++) {
- acc_stat_win5_one_line_sse4_1(
- dgd_win + i * dgd_stride, src + i * src_stride, h_start, h_end,
- dgd_stride, &shuffle, &sumX, sumY, M_int32, H_int32);
- }
- for (k = 0; k < wiener_win; ++k) {
- for (l = 0; l < wiener_win; ++l) {
- M_int64[k][l] += M_int32[k][l];
- M_int32[k][l] = 0;
- }
- }
- for (k = 0; k < WIENER_WIN_CHROMA * WIENER_WIN_CHROMA; ++k) {
- for (l = 0; l < WIENER_WIN_CHROMA * 8; ++l) {
- H_int64[k][l] += H_int32[k][l];
- H_int32[k][l] = 0;
- }
- }
- }
-
- const double avg_square_sum = avg * avg * pixel_count;
- for (k = 0; k < wiener_win; k++) {
- for (l = 0; l < wiener_win; l++) {
- const int32_t idx0 = l * wiener_win + k;
- M[idx0] = M_int64[k][l] + avg_square_sum - avg * (sumX + sumY[k][l]);
- double *H_ = H + idx0 * wiener_win2;
- int64_t *H_int_ = &H_int64[idx0][0];
- for (m = 0; m < wiener_win; m++) {
- for (n = 0; n < wiener_win; n++) {
- H_[m * wiener_win + n] = H_int_[n * 8 + m] + avg_square_sum -
- avg * (sumY[k][l] + sumY[n][m]);
- }
- }
- }
- }
-}
-void av1_compute_stats_sse4_1(int wiener_win, const uint8_t *dgd,
- const uint8_t *src, int h_start, int h_end,
- int v_start, int v_end, int dgd_stride,
- int src_stride, double *M, double *H) {
- if (wiener_win == WIENER_WIN) {
- compute_stats_win7_opt_sse4_1(dgd, src, h_start, h_end, v_start, v_end,
- dgd_stride, src_stride, M, H);
- } else if (wiener_win == WIENER_WIN_CHROMA) {
- compute_stats_win5_opt_sse4_1(dgd, src, h_start, h_end, v_start, v_end,
- dgd_stride, src_stride, M, H);
- } else {
- av1_compute_stats_c(wiener_win, dgd, src, h_start, h_end, v_start, v_end,
- dgd_stride, src_stride, M, H);
- }
-}
-
-static INLINE __m128i pair_set_epi16(uint16_t a, uint16_t b) {
- return _mm_set1_epi32((int32_t)(((uint16_t)(a)) | (((uint32_t)(b)) << 16)));
-}
-
-int64_t av1_lowbd_pixel_proj_error_sse4_1(
- const uint8_t *src8, int width, int height, int src_stride,
- const uint8_t *dat8, int dat_stride, int32_t *flt0, int flt0_stride,
- int32_t *flt1, int flt1_stride, int xq[2], const sgr_params_type *params) {
- int i, j, k;
- const int32_t shift = SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS;
- const __m128i rounding = _mm_set1_epi32(1 << (shift - 1));
- __m128i sum64 = _mm_setzero_si128();
- const uint8_t *src = src8;
- const uint8_t *dat = dat8;
- int64_t err = 0;
- if (params->r[0] > 0 && params->r[1] > 0) {
- __m128i xq_coeff = pair_set_epi16(xq[0], xq[1]);
- for (i = 0; i < height; ++i) {
- __m128i sum32 = _mm_setzero_si128();
- for (j = 0; j < width - 8; j += 8) {
- const __m128i d0 = _mm_cvtepu8_epi16(xx_loadl_64(dat + j));
- const __m128i s0 = _mm_cvtepu8_epi16(xx_loadl_64(src + j));
- const __m128i flt0_16b =
- _mm_packs_epi32(xx_loadu_128(flt0 + j), xx_loadu_128(flt0 + j + 4));
- const __m128i flt1_16b =
- _mm_packs_epi32(xx_loadu_128(flt1 + j), xx_loadu_128(flt1 + j + 4));
- const __m128i u0 = _mm_slli_epi16(d0, SGRPROJ_RST_BITS);
- const __m128i flt0_0_sub_u = _mm_sub_epi16(flt0_16b, u0);
- const __m128i flt1_0_sub_u = _mm_sub_epi16(flt1_16b, u0);
- const __m128i v0 = _mm_madd_epi16(
- xq_coeff, _mm_unpacklo_epi16(flt0_0_sub_u, flt1_0_sub_u));
- const __m128i v1 = _mm_madd_epi16(
- xq_coeff, _mm_unpackhi_epi16(flt0_0_sub_u, flt1_0_sub_u));
- const __m128i vr0 = _mm_srai_epi32(_mm_add_epi32(v0, rounding), shift);
- const __m128i vr1 = _mm_srai_epi32(_mm_add_epi32(v1, rounding), shift);
- const __m128i e0 =
- _mm_sub_epi16(_mm_add_epi16(_mm_packs_epi32(vr0, vr1), d0), s0);
- const __m128i err0 = _mm_madd_epi16(e0, e0);
- sum32 = _mm_add_epi32(sum32, err0);
- }
- for (k = j; k < width; ++k) {
- const int32_t u = (int32_t)(dat[k] << SGRPROJ_RST_BITS);
- int32_t v = xq[0] * (flt0[k] - u) + xq[1] * (flt1[k] - u);
- const int32_t e = ROUND_POWER_OF_TWO(v, shift) + dat[k] - src[k];
- err += e * e;
- }
- dat += dat_stride;
- src += src_stride;
- flt0 += flt0_stride;
- flt1 += flt1_stride;
- const __m128i sum64_0 = _mm_cvtepi32_epi64(sum32);
- const __m128i sum64_1 = _mm_cvtepi32_epi64(_mm_srli_si128(sum32, 8));
- sum64 = _mm_add_epi64(sum64, sum64_0);
- sum64 = _mm_add_epi64(sum64, sum64_1);
- }
- } else if (params->r[0] > 0) {
- __m128i xq_coeff = pair_set_epi16(xq[0], -(xq[0] << SGRPROJ_RST_BITS));
- for (i = 0; i < height; ++i) {
- __m128i sum32 = _mm_setzero_si128();
- for (j = 0; j < width - 8; j += 8) {
- const __m128i d0 = _mm_cvtepu8_epi16(xx_loadl_64(dat + j));
- const __m128i s0 = _mm_cvtepu8_epi16(xx_loadl_64(src + j));
- const __m128i flt0_16b =
- _mm_packs_epi32(xx_loadu_128(flt0 + j), xx_loadu_128(flt0 + j + 4));
- const __m128i v0 =
- _mm_madd_epi16(xq_coeff, _mm_unpacklo_epi16(flt0_16b, d0));
- const __m128i v1 =
- _mm_madd_epi16(xq_coeff, _mm_unpackhi_epi16(flt0_16b, d0));
- const __m128i vr0 = _mm_srai_epi32(_mm_add_epi32(v0, rounding), shift);
- const __m128i vr1 = _mm_srai_epi32(_mm_add_epi32(v1, rounding), shift);
- const __m128i e0 =
- _mm_sub_epi16(_mm_add_epi16(_mm_packs_epi32(vr0, vr1), d0), s0);
- const __m128i err0 = _mm_madd_epi16(e0, e0);
- sum32 = _mm_add_epi32(sum32, err0);
- }
- for (k = j; k < width; ++k) {
- const int32_t u = (int32_t)(dat[k] << SGRPROJ_RST_BITS);
- int32_t v = xq[0] * (flt0[k] - u);
- const int32_t e = ROUND_POWER_OF_TWO(v, shift) + dat[k] - src[k];
- err += e * e;
- }
- dat += dat_stride;
- src += src_stride;
- flt0 += flt0_stride;
- const __m128i sum64_0 = _mm_cvtepi32_epi64(sum32);
- const __m128i sum64_1 = _mm_cvtepi32_epi64(_mm_srli_si128(sum32, 8));
- sum64 = _mm_add_epi64(sum64, sum64_0);
- sum64 = _mm_add_epi64(sum64, sum64_1);
- }
- } else if (params->r[1] > 0) {
- __m128i xq_coeff = pair_set_epi16(xq[1], -(xq[1] << SGRPROJ_RST_BITS));
- for (i = 0; i < height; ++i) {
- __m128i sum32 = _mm_setzero_si128();
- for (j = 0; j < width - 8; j += 8) {
- const __m128i d0 = _mm_cvtepu8_epi16(xx_loadl_64(dat + j));
- const __m128i s0 = _mm_cvtepu8_epi16(xx_loadl_64(src + j));
- const __m128i flt1_16b =
- _mm_packs_epi32(xx_loadu_128(flt1 + j), xx_loadu_128(flt1 + j + 4));
- const __m128i v0 =
- _mm_madd_epi16(xq_coeff, _mm_unpacklo_epi16(flt1_16b, d0));
- const __m128i v1 =
- _mm_madd_epi16(xq_coeff, _mm_unpackhi_epi16(flt1_16b, d0));
- const __m128i vr0 = _mm_srai_epi32(_mm_add_epi32(v0, rounding), shift);
- const __m128i vr1 = _mm_srai_epi32(_mm_add_epi32(v1, rounding), shift);
- const __m128i e0 =
- _mm_sub_epi16(_mm_add_epi16(_mm_packs_epi32(vr0, vr1), d0), s0);
- const __m128i err0 = _mm_madd_epi16(e0, e0);
- sum32 = _mm_add_epi32(sum32, err0);
- }
- for (k = j; k < width; ++k) {
- const int32_t u = (int32_t)(dat[k] << SGRPROJ_RST_BITS);
- int32_t v = xq[1] * (flt1[k] - u);
- const int32_t e = ROUND_POWER_OF_TWO(v, shift) + dat[k] - src[k];
- err += e * e;
- }
- dat += dat_stride;
- src += src_stride;
- flt1 += flt1_stride;
- const __m128i sum64_0 = _mm_cvtepi32_epi64(sum32);
- const __m128i sum64_1 = _mm_cvtepi32_epi64(_mm_srli_si128(sum32, 8));
- sum64 = _mm_add_epi64(sum64, sum64_0);
- sum64 = _mm_add_epi64(sum64, sum64_1);
- }
- } else {
- __m128i sum32 = _mm_setzero_si128();
- for (i = 0; i < height; ++i) {
- for (j = 0; j < width - 16; j += 16) {
- const __m128i d = xx_loadu_128(dat + j);
- const __m128i s = xx_loadu_128(src + j);
- const __m128i d0 = _mm_cvtepu8_epi16(d);
- const __m128i d1 = _mm_cvtepu8_epi16(_mm_srli_si128(d, 8));
- const __m128i s0 = _mm_cvtepu8_epi16(s);
- const __m128i s1 = _mm_cvtepu8_epi16(_mm_srli_si128(s, 8));
- const __m128i diff0 = _mm_sub_epi16(d0, s0);
- const __m128i diff1 = _mm_sub_epi16(d1, s1);
- const __m128i err0 = _mm_madd_epi16(diff0, diff0);
- const __m128i err1 = _mm_madd_epi16(diff1, diff1);
- sum32 = _mm_add_epi32(sum32, err0);
- sum32 = _mm_add_epi32(sum32, err1);
- }
- for (k = j; k < width; ++k) {
- const int32_t e = (int32_t)(dat[k]) - src[k];
- err += e * e;
- }
- dat += dat_stride;
- src += src_stride;
- }
- const __m128i sum64_0 = _mm_cvtepi32_epi64(sum32);
- const __m128i sum64_1 = _mm_cvtepi32_epi64(_mm_srli_si128(sum32, 8));
- sum64 = _mm_add_epi64(sum64_0, sum64_1);
- }
- int64_t sum[2];
- xx_storeu_128(sum, sum64);
- err += sum[0] + sum[1];
- return err;
-}
diff --git a/third_party/aom/av1/encoder/x86/temporal_filter_apply_sse2.asm b/third_party/aom/av1/encoder/x86/temporal_filter_apply_sse2.asm
deleted file mode 100644
index 30983d1c1..000000000
--- a/third_party/aom/av1/encoder/x86/temporal_filter_apply_sse2.asm
+++ /dev/null
@@ -1,217 +0,0 @@
-;
-; Copyright (c) 2016, Alliance for Open Media. All rights reserved
-;
-; This source code is subject to the terms of the BSD 2 Clause License and
-; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-; was not distributed with this source code in the LICENSE file, you can
-; obtain it at www.aomedia.org/license/software. If the Alliance for Open
-; Media Patent License 1.0 was not distributed with this source code in the
-; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-;
-
-;
-
-
-%include "aom_ports/x86_abi_support.asm"
-
-SECTION .text
-
-; void av1_temporal_filter_apply_sse2 | arg
-; (unsigned char *frame1, | 0
-; unsigned int stride, | 1
-; unsigned char *frame2, | 2
-; unsigned int block_width, | 3
-; unsigned int block_height, | 4
-; int strength, | 5
-; int filter_weight, | 6
-; unsigned int *accumulator, | 7
-; unsigned short *count) | 8
-global sym(av1_temporal_filter_apply_sse2) PRIVATE
-sym(av1_temporal_filter_apply_sse2):
-
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 9
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ALIGN_STACK 16, rax
- %define block_width 0
- %define block_height 16
- %define strength 32
- %define filter_weight 48
- %define rounding_bit 64
- %define rbp_backup 80
- %define stack_size 96
- sub rsp, stack_size
- mov [rsp + rbp_backup], rbp
- ; end prolog
-
- mov edx, arg(3)
- mov [rsp + block_width], rdx
- mov edx, arg(4)
- mov [rsp + block_height], rdx
- movd xmm6, arg(5)
- movdqa [rsp + strength], xmm6 ; where strength is used, all 16 bytes are read
-
- ; calculate the rounding bit outside the loop
- ; 0x8000 >> (16 - strength)
- mov rdx, 16
- sub rdx, arg(5) ; 16 - strength
- movq xmm4, rdx ; can't use rdx w/ shift
- movdqa xmm5, [GLOBAL(_const_top_bit)]
- psrlw xmm5, xmm4
- movdqa [rsp + rounding_bit], xmm5
-
- mov rsi, arg(0) ; src/frame1
- mov rdx, arg(2) ; predictor frame
- mov rdi, arg(7) ; accumulator
- mov rax, arg(8) ; count
-
- ; dup the filter weight and store for later
- movd xmm0, arg(6) ; filter_weight
- pshuflw xmm0, xmm0, 0
- punpcklwd xmm0, xmm0
- movdqa [rsp + filter_weight], xmm0
-
- mov rbp, arg(1) ; stride
- pxor xmm7, xmm7 ; zero for extraction
-
- mov rcx, [rsp + block_width]
- imul rcx, [rsp + block_height]
- add rcx, rdx
- cmp dword ptr [rsp + block_width], 8
- jne .temporal_filter_apply_load_16
-
-.temporal_filter_apply_load_8:
- movq xmm0, [rsi] ; first row
- lea rsi, [rsi + rbp] ; += stride
- punpcklbw xmm0, xmm7 ; src[ 0- 7]
- movq xmm1, [rsi] ; second row
- lea rsi, [rsi + rbp] ; += stride
- punpcklbw xmm1, xmm7 ; src[ 8-15]
- jmp .temporal_filter_apply_load_finished
-
-.temporal_filter_apply_load_16:
- movdqa xmm0, [rsi] ; src (frame1)
- lea rsi, [rsi + rbp] ; += stride
- movdqa xmm1, xmm0
- punpcklbw xmm0, xmm7 ; src[ 0- 7]
- punpckhbw xmm1, xmm7 ; src[ 8-15]
-
-.temporal_filter_apply_load_finished:
- movdqa xmm2, [rdx] ; predictor (frame2)
- movdqa xmm3, xmm2
- punpcklbw xmm2, xmm7 ; pred[ 0- 7]
- punpckhbw xmm3, xmm7 ; pred[ 8-15]
-
- ; modifier = src_byte - pixel_value
- psubw xmm0, xmm2 ; src - pred[ 0- 7]
- psubw xmm1, xmm3 ; src - pred[ 8-15]
-
- ; modifier *= modifier
- pmullw xmm0, xmm0 ; modifer[ 0- 7]^2
- pmullw xmm1, xmm1 ; modifer[ 8-15]^2
-
- ; modifier *= 3
- pmullw xmm0, [GLOBAL(_const_3w)]
- pmullw xmm1, [GLOBAL(_const_3w)]
-
- ; modifer += 0x8000 >> (16 - strength)
- paddw xmm0, [rsp + rounding_bit]
- paddw xmm1, [rsp + rounding_bit]
-
- ; modifier >>= strength
- psrlw xmm0, [rsp + strength]
- psrlw xmm1, [rsp + strength]
-
- ; modifier = 16 - modifier
- ; saturation takes care of modifier > 16
- movdqa xmm3, [GLOBAL(_const_16w)]
- movdqa xmm2, [GLOBAL(_const_16w)]
- psubusw xmm3, xmm1
- psubusw xmm2, xmm0
-
- ; modifier *= filter_weight
- pmullw xmm2, [rsp + filter_weight]
- pmullw xmm3, [rsp + filter_weight]
-
- ; count
- movdqa xmm4, [rax]
- movdqa xmm5, [rax+16]
- ; += modifier
- paddw xmm4, xmm2
- paddw xmm5, xmm3
- ; write back
- movdqa [rax], xmm4
- movdqa [rax+16], xmm5
- lea rax, [rax + 16*2] ; count += 16*(sizeof(short))
-
- ; load and extract the predictor up to shorts
- pxor xmm7, xmm7
- movdqa xmm0, [rdx]
- lea rdx, [rdx + 16*1] ; pred += 16*(sizeof(char))
- movdqa xmm1, xmm0
- punpcklbw xmm0, xmm7 ; pred[ 0- 7]
- punpckhbw xmm1, xmm7 ; pred[ 8-15]
-
- ; modifier *= pixel_value
- pmullw xmm0, xmm2
- pmullw xmm1, xmm3
-
- ; expand to double words
- movdqa xmm2, xmm0
- punpcklwd xmm0, xmm7 ; [ 0- 3]
- punpckhwd xmm2, xmm7 ; [ 4- 7]
- movdqa xmm3, xmm1
- punpcklwd xmm1, xmm7 ; [ 8-11]
- punpckhwd xmm3, xmm7 ; [12-15]
-
- ; accumulator
- movdqa xmm4, [rdi]
- movdqa xmm5, [rdi+16]
- movdqa xmm6, [rdi+32]
- movdqa xmm7, [rdi+48]
- ; += modifier
- paddd xmm4, xmm0
- paddd xmm5, xmm2
- paddd xmm6, xmm1
- paddd xmm7, xmm3
- ; write back
- movdqa [rdi], xmm4
- movdqa [rdi+16], xmm5
- movdqa [rdi+32], xmm6
- movdqa [rdi+48], xmm7
- lea rdi, [rdi + 16*4] ; accumulator += 16*(sizeof(int))
-
- cmp rdx, rcx
- je .temporal_filter_apply_epilog
- pxor xmm7, xmm7 ; zero for extraction
- cmp dword ptr [rsp + block_width], 16
- je .temporal_filter_apply_load_16
- jmp .temporal_filter_apply_load_8
-
-.temporal_filter_apply_epilog:
- ; begin epilog
- mov rbp, [rsp + rbp_backup]
- add rsp, stack_size
- pop rsp
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-SECTION_RODATA
-align 16
-_const_3w:
- times 8 dw 3
-align 16
-_const_top_bit:
- times 8 dw 1<<15
-align 16
-_const_16w:
- times 8 dw 16
diff --git a/third_party/aom/av1/encoder/x86/wedge_utils_avx2.c b/third_party/aom/av1/encoder/x86/wedge_utils_avx2.c
deleted file mode 100644
index 2a792f14e..000000000
--- a/third_party/aom/av1/encoder/x86/wedge_utils_avx2.c
+++ /dev/null
@@ -1,215 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <immintrin.h>
-#include <smmintrin.h>
-
-#include "aom_dsp/x86/synonyms.h"
-#include "aom_dsp/x86/synonyms_avx2.h"
-#include "aom/aom_integer.h"
-
-#include "av1/common/reconinter.h"
-
-#define MAX_MASK_VALUE (1 << WEDGE_WEIGHT_BITS)
-
-/**
- * See av1_wedge_sse_from_residuals_c
- */
-uint64_t av1_wedge_sse_from_residuals_avx2(const int16_t *r1, const int16_t *d,
- const uint8_t *m, int N) {
- int n = -N;
-
- uint64_t csse;
-
- const __m256i v_mask_max_w = _mm256_set1_epi16(MAX_MASK_VALUE);
- const __m256i v_zext_q = yy_set1_64_from_32i(0xffffffff);
-
- __m256i v_acc0_q = _mm256_setzero_si256();
-
- assert(N % 64 == 0);
-
- r1 += N;
- d += N;
- m += N;
-
- do {
- const __m256i v_r0_w = _mm256_lddqu_si256((__m256i *)(r1 + n));
- const __m256i v_d0_w = _mm256_lddqu_si256((__m256i *)(d + n));
- const __m128i v_m01_b = _mm_lddqu_si128((__m128i *)(m + n));
-
- const __m256i v_rd0l_w = _mm256_unpacklo_epi16(v_d0_w, v_r0_w);
- const __m256i v_rd0h_w = _mm256_unpackhi_epi16(v_d0_w, v_r0_w);
- const __m256i v_m0_w = _mm256_cvtepu8_epi16(v_m01_b);
-
- const __m256i v_m0l_w = _mm256_unpacklo_epi16(v_m0_w, v_mask_max_w);
- const __m256i v_m0h_w = _mm256_unpackhi_epi16(v_m0_w, v_mask_max_w);
-
- const __m256i v_t0l_d = _mm256_madd_epi16(v_rd0l_w, v_m0l_w);
- const __m256i v_t0h_d = _mm256_madd_epi16(v_rd0h_w, v_m0h_w);
-
- const __m256i v_t0_w = _mm256_packs_epi32(v_t0l_d, v_t0h_d);
-
- const __m256i v_sq0_d = _mm256_madd_epi16(v_t0_w, v_t0_w);
-
- const __m256i v_sum0_q = _mm256_add_epi64(
- _mm256_and_si256(v_sq0_d, v_zext_q), _mm256_srli_epi64(v_sq0_d, 32));
-
- v_acc0_q = _mm256_add_epi64(v_acc0_q, v_sum0_q);
-
- n += 16;
- } while (n);
-
- v_acc0_q = _mm256_add_epi64(v_acc0_q, _mm256_srli_si256(v_acc0_q, 8));
- __m128i v_acc_q_0 = _mm256_castsi256_si128(v_acc0_q);
- __m128i v_acc_q_1 = _mm256_extracti128_si256(v_acc0_q, 1);
- v_acc_q_0 = _mm_add_epi64(v_acc_q_0, v_acc_q_1);
-#if ARCH_X86_64
- csse = (uint64_t)_mm_extract_epi64(v_acc_q_0, 0);
-#else
- xx_storel_64(&csse, v_acc_q_0);
-#endif
-
- return ROUND_POWER_OF_TWO(csse, 2 * WEDGE_WEIGHT_BITS);
-}
-
-/**
- * See av1_wedge_sign_from_residuals_c
- */
-int av1_wedge_sign_from_residuals_avx2(const int16_t *ds, const uint8_t *m,
- int N, int64_t limit) {
- int64_t acc;
- __m256i v_acc0_d = _mm256_setzero_si256();
-
- // Input size limited to 8192 by the use of 32 bit accumulators and m
- // being between [0, 64]. Overflow might happen at larger sizes,
- // though it is practically impossible on real video input.
- assert(N < 8192);
- assert(N % 64 == 0);
-
- do {
- const __m256i v_m01_b = _mm256_lddqu_si256((__m256i *)(m));
- const __m256i v_m23_b = _mm256_lddqu_si256((__m256i *)(m + 32));
-
- const __m256i v_d0_w = _mm256_lddqu_si256((__m256i *)(ds));
- const __m256i v_d1_w = _mm256_lddqu_si256((__m256i *)(ds + 16));
- const __m256i v_d2_w = _mm256_lddqu_si256((__m256i *)(ds + 32));
- const __m256i v_d3_w = _mm256_lddqu_si256((__m256i *)(ds + 48));
-
- const __m256i v_m0_w =
- _mm256_cvtepu8_epi16(_mm256_castsi256_si128(v_m01_b));
- const __m256i v_m1_w =
- _mm256_cvtepu8_epi16(_mm256_extracti128_si256(v_m01_b, 1));
- const __m256i v_m2_w =
- _mm256_cvtepu8_epi16(_mm256_castsi256_si128(v_m23_b));
- const __m256i v_m3_w =
- _mm256_cvtepu8_epi16(_mm256_extracti128_si256(v_m23_b, 1));
-
- const __m256i v_p0_d = _mm256_madd_epi16(v_d0_w, v_m0_w);
- const __m256i v_p1_d = _mm256_madd_epi16(v_d1_w, v_m1_w);
- const __m256i v_p2_d = _mm256_madd_epi16(v_d2_w, v_m2_w);
- const __m256i v_p3_d = _mm256_madd_epi16(v_d3_w, v_m3_w);
-
- const __m256i v_p01_d = _mm256_add_epi32(v_p0_d, v_p1_d);
- const __m256i v_p23_d = _mm256_add_epi32(v_p2_d, v_p3_d);
-
- const __m256i v_p0123_d = _mm256_add_epi32(v_p01_d, v_p23_d);
-
- v_acc0_d = _mm256_add_epi32(v_acc0_d, v_p0123_d);
-
- ds += 64;
- m += 64;
-
- N -= 64;
- } while (N);
-
- __m256i v_sign_d = _mm256_srai_epi32(v_acc0_d, 31);
- v_acc0_d = _mm256_add_epi64(_mm256_unpacklo_epi32(v_acc0_d, v_sign_d),
- _mm256_unpackhi_epi32(v_acc0_d, v_sign_d));
-
- __m256i v_acc_q = _mm256_add_epi64(v_acc0_d, _mm256_srli_si256(v_acc0_d, 8));
-
- __m128i v_acc_q_0 = _mm256_castsi256_si128(v_acc_q);
- __m128i v_acc_q_1 = _mm256_extracti128_si256(v_acc_q, 1);
- v_acc_q_0 = _mm_add_epi64(v_acc_q_0, v_acc_q_1);
-
-#if ARCH_X86_64
- acc = (uint64_t)_mm_extract_epi64(v_acc_q_0, 0);
-#else
- xx_storel_64(&acc, v_acc_q_0);
-#endif
-
- return acc > limit;
-}
-
-/**
- * av1_wedge_compute_delta_squares_c
- */
-void av1_wedge_compute_delta_squares_avx2(int16_t *d, const int16_t *a,
- const int16_t *b, int N) {
- const __m256i v_neg_w = _mm256_set1_epi32(0xffff0001);
-
- assert(N % 64 == 0);
-
- do {
- const __m256i v_a0_w = _mm256_lddqu_si256((__m256i *)(a));
- const __m256i v_b0_w = _mm256_lddqu_si256((__m256i *)(b));
- const __m256i v_a1_w = _mm256_lddqu_si256((__m256i *)(a + 16));
- const __m256i v_b1_w = _mm256_lddqu_si256((__m256i *)(b + 16));
- const __m256i v_a2_w = _mm256_lddqu_si256((__m256i *)(a + 32));
- const __m256i v_b2_w = _mm256_lddqu_si256((__m256i *)(b + 32));
- const __m256i v_a3_w = _mm256_lddqu_si256((__m256i *)(a + 48));
- const __m256i v_b3_w = _mm256_lddqu_si256((__m256i *)(b + 48));
-
- const __m256i v_ab0l_w = _mm256_unpacklo_epi16(v_a0_w, v_b0_w);
- const __m256i v_ab0h_w = _mm256_unpackhi_epi16(v_a0_w, v_b0_w);
- const __m256i v_ab1l_w = _mm256_unpacklo_epi16(v_a1_w, v_b1_w);
- const __m256i v_ab1h_w = _mm256_unpackhi_epi16(v_a1_w, v_b1_w);
- const __m256i v_ab2l_w = _mm256_unpacklo_epi16(v_a2_w, v_b2_w);
- const __m256i v_ab2h_w = _mm256_unpackhi_epi16(v_a2_w, v_b2_w);
- const __m256i v_ab3l_w = _mm256_unpacklo_epi16(v_a3_w, v_b3_w);
- const __m256i v_ab3h_w = _mm256_unpackhi_epi16(v_a3_w, v_b3_w);
-
- // Negate top word of pairs
- const __m256i v_abl0n_w = _mm256_sign_epi16(v_ab0l_w, v_neg_w);
- const __m256i v_abh0n_w = _mm256_sign_epi16(v_ab0h_w, v_neg_w);
- const __m256i v_abl1n_w = _mm256_sign_epi16(v_ab1l_w, v_neg_w);
- const __m256i v_abh1n_w = _mm256_sign_epi16(v_ab1h_w, v_neg_w);
- const __m256i v_abl2n_w = _mm256_sign_epi16(v_ab2l_w, v_neg_w);
- const __m256i v_abh2n_w = _mm256_sign_epi16(v_ab2h_w, v_neg_w);
- const __m256i v_abl3n_w = _mm256_sign_epi16(v_ab3l_w, v_neg_w);
- const __m256i v_abh3n_w = _mm256_sign_epi16(v_ab3h_w, v_neg_w);
-
- const __m256i v_r0l_w = _mm256_madd_epi16(v_ab0l_w, v_abl0n_w);
- const __m256i v_r0h_w = _mm256_madd_epi16(v_ab0h_w, v_abh0n_w);
- const __m256i v_r1l_w = _mm256_madd_epi16(v_ab1l_w, v_abl1n_w);
- const __m256i v_r1h_w = _mm256_madd_epi16(v_ab1h_w, v_abh1n_w);
- const __m256i v_r2l_w = _mm256_madd_epi16(v_ab2l_w, v_abl2n_w);
- const __m256i v_r2h_w = _mm256_madd_epi16(v_ab2h_w, v_abh2n_w);
- const __m256i v_r3l_w = _mm256_madd_epi16(v_ab3l_w, v_abl3n_w);
- const __m256i v_r3h_w = _mm256_madd_epi16(v_ab3h_w, v_abh3n_w);
-
- const __m256i v_r0_w = _mm256_packs_epi32(v_r0l_w, v_r0h_w);
- const __m256i v_r1_w = _mm256_packs_epi32(v_r1l_w, v_r1h_w);
- const __m256i v_r2_w = _mm256_packs_epi32(v_r2l_w, v_r2h_w);
- const __m256i v_r3_w = _mm256_packs_epi32(v_r3l_w, v_r3h_w);
-
- _mm256_store_si256((__m256i *)(d), v_r0_w);
- _mm256_store_si256((__m256i *)(d + 16), v_r1_w);
- _mm256_store_si256((__m256i *)(d + 32), v_r2_w);
- _mm256_store_si256((__m256i *)(d + 48), v_r3_w);
-
- a += 64;
- b += 64;
- d += 64;
- N -= 64;
- } while (N);
-}
diff --git a/third_party/aom/av1/encoder/x86/wedge_utils_sse2.c b/third_party/aom/av1/encoder/x86/wedge_utils_sse2.c
deleted file mode 100644
index 4d2e99f25..000000000
--- a/third_party/aom/av1/encoder/x86/wedge_utils_sse2.c
+++ /dev/null
@@ -1,254 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <immintrin.h>
-
-#include "aom_dsp/x86/synonyms.h"
-
-#include "aom/aom_integer.h"
-
-#include "av1/common/reconinter.h"
-
-#define MAX_MASK_VALUE (1 << WEDGE_WEIGHT_BITS)
-
-/**
- * See av1_wedge_sse_from_residuals_c
- */
-uint64_t av1_wedge_sse_from_residuals_sse2(const int16_t *r1, const int16_t *d,
- const uint8_t *m, int N) {
- int n = -N;
- int n8 = n + 8;
-
- uint64_t csse;
-
- const __m128i v_mask_max_w = _mm_set1_epi16(MAX_MASK_VALUE);
- const __m128i v_zext_q = xx_set1_64_from_32i(0xffffffff);
-
- __m128i v_acc0_q = _mm_setzero_si128();
-
- assert(N % 64 == 0);
-
- r1 += N;
- d += N;
- m += N;
-
- do {
- const __m128i v_r0_w = xx_load_128(r1 + n);
- const __m128i v_r1_w = xx_load_128(r1 + n8);
- const __m128i v_d0_w = xx_load_128(d + n);
- const __m128i v_d1_w = xx_load_128(d + n8);
- const __m128i v_m01_b = xx_load_128(m + n);
-
- const __m128i v_rd0l_w = _mm_unpacklo_epi16(v_d0_w, v_r0_w);
- const __m128i v_rd0h_w = _mm_unpackhi_epi16(v_d0_w, v_r0_w);
- const __m128i v_rd1l_w = _mm_unpacklo_epi16(v_d1_w, v_r1_w);
- const __m128i v_rd1h_w = _mm_unpackhi_epi16(v_d1_w, v_r1_w);
- const __m128i v_m0_w = _mm_unpacklo_epi8(v_m01_b, _mm_setzero_si128());
- const __m128i v_m1_w = _mm_unpackhi_epi8(v_m01_b, _mm_setzero_si128());
-
- const __m128i v_m0l_w = _mm_unpacklo_epi16(v_m0_w, v_mask_max_w);
- const __m128i v_m0h_w = _mm_unpackhi_epi16(v_m0_w, v_mask_max_w);
- const __m128i v_m1l_w = _mm_unpacklo_epi16(v_m1_w, v_mask_max_w);
- const __m128i v_m1h_w = _mm_unpackhi_epi16(v_m1_w, v_mask_max_w);
-
- const __m128i v_t0l_d = _mm_madd_epi16(v_rd0l_w, v_m0l_w);
- const __m128i v_t0h_d = _mm_madd_epi16(v_rd0h_w, v_m0h_w);
- const __m128i v_t1l_d = _mm_madd_epi16(v_rd1l_w, v_m1l_w);
- const __m128i v_t1h_d = _mm_madd_epi16(v_rd1h_w, v_m1h_w);
-
- const __m128i v_t0_w = _mm_packs_epi32(v_t0l_d, v_t0h_d);
- const __m128i v_t1_w = _mm_packs_epi32(v_t1l_d, v_t1h_d);
-
- const __m128i v_sq0_d = _mm_madd_epi16(v_t0_w, v_t0_w);
- const __m128i v_sq1_d = _mm_madd_epi16(v_t1_w, v_t1_w);
-
- const __m128i v_sum0_q = _mm_add_epi64(_mm_and_si128(v_sq0_d, v_zext_q),
- _mm_srli_epi64(v_sq0_d, 32));
- const __m128i v_sum1_q = _mm_add_epi64(_mm_and_si128(v_sq1_d, v_zext_q),
- _mm_srli_epi64(v_sq1_d, 32));
-
- v_acc0_q = _mm_add_epi64(v_acc0_q, v_sum0_q);
- v_acc0_q = _mm_add_epi64(v_acc0_q, v_sum1_q);
-
- n8 += 16;
- n += 16;
- } while (n);
-
- v_acc0_q = _mm_add_epi64(v_acc0_q, _mm_srli_si128(v_acc0_q, 8));
-
-#if ARCH_X86_64
- csse = (uint64_t)_mm_cvtsi128_si64(v_acc0_q);
-#else
- xx_storel_64(&csse, v_acc0_q);
-#endif
-
- return ROUND_POWER_OF_TWO(csse, 2 * WEDGE_WEIGHT_BITS);
-}
-
-/**
- * See av1_wedge_sign_from_residuals_c
- */
-int av1_wedge_sign_from_residuals_sse2(const int16_t *ds, const uint8_t *m,
- int N, int64_t limit) {
- int64_t acc;
-
- __m128i v_sign_d;
- __m128i v_acc0_d = _mm_setzero_si128();
- __m128i v_acc1_d = _mm_setzero_si128();
- __m128i v_acc_q;
-
- // Input size limited to 8192 by the use of 32 bit accumulators and m
- // being between [0, 64]. Overflow might happen at larger sizes,
- // though it is practically impossible on real video input.
- assert(N < 8192);
- assert(N % 64 == 0);
-
- do {
- const __m128i v_m01_b = xx_load_128(m);
- const __m128i v_m23_b = xx_load_128(m + 16);
- const __m128i v_m45_b = xx_load_128(m + 32);
- const __m128i v_m67_b = xx_load_128(m + 48);
-
- const __m128i v_d0_w = xx_load_128(ds);
- const __m128i v_d1_w = xx_load_128(ds + 8);
- const __m128i v_d2_w = xx_load_128(ds + 16);
- const __m128i v_d3_w = xx_load_128(ds + 24);
- const __m128i v_d4_w = xx_load_128(ds + 32);
- const __m128i v_d5_w = xx_load_128(ds + 40);
- const __m128i v_d6_w = xx_load_128(ds + 48);
- const __m128i v_d7_w = xx_load_128(ds + 56);
-
- const __m128i v_m0_w = _mm_unpacklo_epi8(v_m01_b, _mm_setzero_si128());
- const __m128i v_m1_w = _mm_unpackhi_epi8(v_m01_b, _mm_setzero_si128());
- const __m128i v_m2_w = _mm_unpacklo_epi8(v_m23_b, _mm_setzero_si128());
- const __m128i v_m3_w = _mm_unpackhi_epi8(v_m23_b, _mm_setzero_si128());
- const __m128i v_m4_w = _mm_unpacklo_epi8(v_m45_b, _mm_setzero_si128());
- const __m128i v_m5_w = _mm_unpackhi_epi8(v_m45_b, _mm_setzero_si128());
- const __m128i v_m6_w = _mm_unpacklo_epi8(v_m67_b, _mm_setzero_si128());
- const __m128i v_m7_w = _mm_unpackhi_epi8(v_m67_b, _mm_setzero_si128());
-
- const __m128i v_p0_d = _mm_madd_epi16(v_d0_w, v_m0_w);
- const __m128i v_p1_d = _mm_madd_epi16(v_d1_w, v_m1_w);
- const __m128i v_p2_d = _mm_madd_epi16(v_d2_w, v_m2_w);
- const __m128i v_p3_d = _mm_madd_epi16(v_d3_w, v_m3_w);
- const __m128i v_p4_d = _mm_madd_epi16(v_d4_w, v_m4_w);
- const __m128i v_p5_d = _mm_madd_epi16(v_d5_w, v_m5_w);
- const __m128i v_p6_d = _mm_madd_epi16(v_d6_w, v_m6_w);
- const __m128i v_p7_d = _mm_madd_epi16(v_d7_w, v_m7_w);
-
- const __m128i v_p01_d = _mm_add_epi32(v_p0_d, v_p1_d);
- const __m128i v_p23_d = _mm_add_epi32(v_p2_d, v_p3_d);
- const __m128i v_p45_d = _mm_add_epi32(v_p4_d, v_p5_d);
- const __m128i v_p67_d = _mm_add_epi32(v_p6_d, v_p7_d);
-
- const __m128i v_p0123_d = _mm_add_epi32(v_p01_d, v_p23_d);
- const __m128i v_p4567_d = _mm_add_epi32(v_p45_d, v_p67_d);
-
- v_acc0_d = _mm_add_epi32(v_acc0_d, v_p0123_d);
- v_acc1_d = _mm_add_epi32(v_acc1_d, v_p4567_d);
-
- ds += 64;
- m += 64;
-
- N -= 64;
- } while (N);
-
- v_sign_d = _mm_cmplt_epi32(v_acc0_d, _mm_setzero_si128());
- v_acc0_d = _mm_add_epi64(_mm_unpacklo_epi32(v_acc0_d, v_sign_d),
- _mm_unpackhi_epi32(v_acc0_d, v_sign_d));
-
- v_sign_d = _mm_cmplt_epi32(v_acc1_d, _mm_setzero_si128());
- v_acc1_d = _mm_add_epi64(_mm_unpacklo_epi32(v_acc1_d, v_sign_d),
- _mm_unpackhi_epi32(v_acc1_d, v_sign_d));
-
- v_acc_q = _mm_add_epi64(v_acc0_d, v_acc1_d);
-
- v_acc_q = _mm_add_epi64(v_acc_q, _mm_srli_si128(v_acc_q, 8));
-
-#if ARCH_X86_64
- acc = (uint64_t)_mm_cvtsi128_si64(v_acc_q);
-#else
- xx_storel_64(&acc, v_acc_q);
-#endif
-
- return acc > limit;
-}
-
-// Negate under mask
-static INLINE __m128i negm_epi16(__m128i v_v_w, __m128i v_mask_w) {
- return _mm_sub_epi16(_mm_xor_si128(v_v_w, v_mask_w), v_mask_w);
-}
-
-/**
- * av1_wedge_compute_delta_squares_c
- */
-void av1_wedge_compute_delta_squares_sse2(int16_t *d, const int16_t *a,
- const int16_t *b, int N) {
- const __m128i v_neg_w =
- _mm_set_epi16(0xffff, 0, 0xffff, 0, 0xffff, 0, 0xffff, 0);
-
- assert(N % 64 == 0);
-
- do {
- const __m128i v_a0_w = xx_load_128(a);
- const __m128i v_b0_w = xx_load_128(b);
- const __m128i v_a1_w = xx_load_128(a + 8);
- const __m128i v_b1_w = xx_load_128(b + 8);
- const __m128i v_a2_w = xx_load_128(a + 16);
- const __m128i v_b2_w = xx_load_128(b + 16);
- const __m128i v_a3_w = xx_load_128(a + 24);
- const __m128i v_b3_w = xx_load_128(b + 24);
-
- const __m128i v_ab0l_w = _mm_unpacklo_epi16(v_a0_w, v_b0_w);
- const __m128i v_ab0h_w = _mm_unpackhi_epi16(v_a0_w, v_b0_w);
- const __m128i v_ab1l_w = _mm_unpacklo_epi16(v_a1_w, v_b1_w);
- const __m128i v_ab1h_w = _mm_unpackhi_epi16(v_a1_w, v_b1_w);
- const __m128i v_ab2l_w = _mm_unpacklo_epi16(v_a2_w, v_b2_w);
- const __m128i v_ab2h_w = _mm_unpackhi_epi16(v_a2_w, v_b2_w);
- const __m128i v_ab3l_w = _mm_unpacklo_epi16(v_a3_w, v_b3_w);
- const __m128i v_ab3h_w = _mm_unpackhi_epi16(v_a3_w, v_b3_w);
-
- // Negate top word of pairs
- const __m128i v_abl0n_w = negm_epi16(v_ab0l_w, v_neg_w);
- const __m128i v_abh0n_w = negm_epi16(v_ab0h_w, v_neg_w);
- const __m128i v_abl1n_w = negm_epi16(v_ab1l_w, v_neg_w);
- const __m128i v_abh1n_w = negm_epi16(v_ab1h_w, v_neg_w);
- const __m128i v_abl2n_w = negm_epi16(v_ab2l_w, v_neg_w);
- const __m128i v_abh2n_w = negm_epi16(v_ab2h_w, v_neg_w);
- const __m128i v_abl3n_w = negm_epi16(v_ab3l_w, v_neg_w);
- const __m128i v_abh3n_w = negm_epi16(v_ab3h_w, v_neg_w);
-
- const __m128i v_r0l_w = _mm_madd_epi16(v_ab0l_w, v_abl0n_w);
- const __m128i v_r0h_w = _mm_madd_epi16(v_ab0h_w, v_abh0n_w);
- const __m128i v_r1l_w = _mm_madd_epi16(v_ab1l_w, v_abl1n_w);
- const __m128i v_r1h_w = _mm_madd_epi16(v_ab1h_w, v_abh1n_w);
- const __m128i v_r2l_w = _mm_madd_epi16(v_ab2l_w, v_abl2n_w);
- const __m128i v_r2h_w = _mm_madd_epi16(v_ab2h_w, v_abh2n_w);
- const __m128i v_r3l_w = _mm_madd_epi16(v_ab3l_w, v_abl3n_w);
- const __m128i v_r3h_w = _mm_madd_epi16(v_ab3h_w, v_abh3n_w);
-
- const __m128i v_r0_w = _mm_packs_epi32(v_r0l_w, v_r0h_w);
- const __m128i v_r1_w = _mm_packs_epi32(v_r1l_w, v_r1h_w);
- const __m128i v_r2_w = _mm_packs_epi32(v_r2l_w, v_r2h_w);
- const __m128i v_r3_w = _mm_packs_epi32(v_r3l_w, v_r3h_w);
-
- xx_store_128(d, v_r0_w);
- xx_store_128(d + 8, v_r1_w);
- xx_store_128(d + 16, v_r2_w);
- xx_store_128(d + 24, v_r3_w);
-
- a += 32;
- b += 32;
- d += 32;
- N -= 32;
- } while (N);
-}
diff --git a/third_party/aom/av1/exports_com b/third_party/aom/av1/exports_com
deleted file mode 100644
index 5c8e0e09d..000000000
--- a/third_party/aom/av1/exports_com
+++ /dev/null
@@ -1,2 +0,0 @@
-text aom_read_obu_header_and_size
-text av1_resize_frame420
diff --git a/third_party/aom/av1/exports_dec b/third_party/aom/av1/exports_dec
deleted file mode 100644
index daabf6766..000000000
--- a/third_party/aom/av1/exports_dec
+++ /dev/null
@@ -1,3 +0,0 @@
-data aom_codec_av1_dx_algo
-text aom_codec_av1_dx
-text av1_add_film_grain
diff --git a/third_party/aom/av1/exports_enc b/third_party/aom/av1/exports_enc
deleted file mode 100644
index dc4a9eae7..000000000
--- a/third_party/aom/av1/exports_enc
+++ /dev/null
@@ -1,2 +0,0 @@
-data aom_codec_av1_cx_algo
-text aom_codec_av1_cx
diff --git a/third_party/aom/av1/exports_test b/third_party/aom/av1/exports_test
deleted file mode 100644
index dab377575..000000000
--- a/third_party/aom/av1/exports_test
+++ /dev/null
@@ -1,2 +0,0 @@
-text av1_get_fwd_txfm_cfg
-text av1_rtcd
diff --git a/third_party/aom/build/cmake/aom_config.c.template b/third_party/aom/build/cmake/aom_config.c.template
deleted file mode 100644
index 62f0a10ab..000000000
--- a/third_party/aom/build/cmake/aom_config.c.template
+++ /dev/null
@@ -1,13 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#include "aom/aom_codec.h"
-static const char* const cfg = "${AOM_CMAKE_CONFIG}";
-const char *aom_codec_build_config(void) {return cfg;}
diff --git a/third_party/aom/build/cmake/aom_config_defaults.cmake b/third_party/aom/build/cmake/aom_config_defaults.cmake
deleted file mode 100644
index a07438cfe..000000000
--- a/third_party/aom/build/cmake/aom_config_defaults.cmake
+++ /dev/null
@@ -1,196 +0,0 @@
-#
-# Copyright (c) 2016, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and the
-# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
-# not distributed with this source code in the LICENSE file, you can obtain it
-# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
-# License 1.0 was not distributed with this source code in the PATENTS file, you
-# can obtain it at www.aomedia.org/license/patent.
-
-include("${AOM_ROOT}/build/cmake/util.cmake")
-
-# This file sets default values for libaom configuration variables. All libaom
-# config variables are added to the CMake variable cache via the macros provided
-# in util.cmake.
-
-#
-# The variables in this section of the file are detected at configuration time,
-# but can be overridden via the use of CONFIG_* and ENABLE_* values also defined
-# in this file.
-#
-
-set_aom_detect_var(INLINE "" STRING "Sets INLINE value for current target.")
-
-# CPUs.
-set_aom_detect_var(ARCH_ARM 0 NUMBER "Enables ARM architecture.")
-set_aom_detect_var(ARCH_MIPS 0 NUMBER "Enables MIPS architecture.")
-set_aom_detect_var(ARCH_PPC 0 NUMBER "Enables PPC architecture.")
-set_aom_detect_var(ARCH_X86 0 NUMBER "Enables X86 architecture.")
-set_aom_detect_var(ARCH_X86_64 0 NUMBER "Enables X86_64 architecture.")
-
-# ARM feature flags.
-set_aom_detect_var(HAVE_NEON 0 NUMBER "Enables NEON intrinsics optimizations.")
-
-# MIPS feature flags.
-set_aom_detect_var(HAVE_DSPR2 0 NUMBER "Enables DSPR2 optimizations.")
-set_aom_detect_var(HAVE_MIPS32 0 NUMBER "Enables MIPS32 optimizations.")
-set_aom_detect_var(HAVE_MIPS64 0 NUMBER "Enables MIPS64 optimizations. ")
-set_aom_detect_var(HAVE_MSA 0 NUMBER "Enables MSA optimizations.")
-
-# PPC feature flags.
-set_aom_detect_var(HAVE_VSX 0 NUMBER "Enables VSX optimizations.")
-
-# x86/x86_64 feature flags.
-set_aom_detect_var(HAVE_AVX 0 NUMBER "Enables AVX optimizations.")
-set_aom_detect_var(HAVE_AVX2 0 NUMBER "Enables AVX2 optimizations.")
-set_aom_detect_var(HAVE_MMX 0 NUMBER "Enables MMX optimizations. ")
-set_aom_detect_var(HAVE_SSE 0 NUMBER "Enables SSE optimizations.")
-set_aom_detect_var(HAVE_SSE2 0 NUMBER "Enables SSE2 optimizations.")
-set_aom_detect_var(HAVE_SSE3 0 NUMBER "Enables SSE3 optimizations.")
-set_aom_detect_var(HAVE_SSE4_1 0 NUMBER "Enables SSE 4.1 optimizations.")
-set_aom_detect_var(HAVE_SSE4_2 0 NUMBER "Enables SSE 4.2 optimizations.")
-set_aom_detect_var(HAVE_SSSE3 0 NUMBER "Enables SSSE3 optimizations.")
-
-# Flags describing the build environment.
-set_aom_detect_var(HAVE_FEXCEPT 0 NUMBER
- "Internal flag, GNU fenv.h present for target.")
-set_aom_detect_var(HAVE_PTHREAD_H 0 NUMBER
- "Internal flag, target pthread support.")
-set_aom_detect_var(HAVE_UNISTD_H 0 NUMBER
- "Internal flag, unistd.h present for target.")
-set_aom_detect_var(HAVE_WXWIDGETS 0 NUMBER "WxWidgets present.")
-
-#
-# Variables in this section can be set from the CMake command line or from
-# within the CMake GUI. The variables control libaom features.
-#
-
-# Build configuration flags.
-set_aom_config_var(AOM_RTCD_FLAGS "" STRING
- "Arguments to pass to rtcd.pl. Separate with ';'")
-set_aom_config_var(CONFIG_AV1_DECODER 1 NUMBER "Enable AV1 decoder.")
-set_aom_config_var(CONFIG_AV1_ENCODER 1 NUMBER "Enable AV1 encoder.")
-set_aom_config_var(CONFIG_BIG_ENDIAN 0 NUMBER "Internal flag.")
-set_aom_config_var(CONFIG_GCC 0 NUMBER "Building with GCC (detect).")
-set_aom_config_var(CONFIG_GCOV 0 NUMBER "Enable gcov support.")
-set_aom_config_var(CONFIG_GPROF 0 NUMBER "Enable gprof support.")
-set_aom_config_var(CONFIG_LIBYUV 1 NUMBER
- "Enables libyuv scaling/conversion support.")
-
-set_aom_config_var(CONFIG_MULTITHREAD 1 NUMBER "Multithread support.")
-set_aom_config_var(CONFIG_OS_SUPPORT 0 NUMBER "Internal flag.")
-set_aom_config_var(CONFIG_PIC 0 NUMBER "Build with PIC enabled.")
-set_aom_config_var(CONFIG_RUNTIME_CPU_DETECT 1 NUMBER
- "Runtime CPU detection support.")
-set_aom_config_var(CONFIG_SHARED 0 NUMBER "Build shared libs.")
-set_aom_config_var(CONFIG_STATIC 1 NUMBER "Build static libs.")
-set_aom_config_var(CONFIG_WEBM_IO 1 NUMBER "Enables WebM support.")
-
-# Debugging flags.
-set_aom_config_var(CONFIG_BITSTREAM_DEBUG 0 NUMBER "Bitstream debugging flag.")
-set_aom_config_var(CONFIG_DEBUG 0 NUMBER "Debug build flag.")
-set_aom_config_var(CONFIG_MISMATCH_DEBUG 0 NUMBER "Mismatch debugging flag.")
-
-# AV1 feature flags.
-set_aom_config_var(CONFIG_ACCOUNTING 0 NUMBER "Enables bit accounting.")
-set_aom_config_var(CONFIG_ANALYZER 0 NUMBER "Enables bit stream analyzer.")
-set_aom_config_var(CONFIG_COEFFICIENT_RANGE_CHECKING 0 NUMBER
- "Coefficient range check.")
-set_aom_config_var(CONFIG_DENOISE 1 NUMBER
- "Denoise/noise modeling support in encoder.")
-set_aom_config_var(CONFIG_FILEOPTIONS 1 NUMBER
- "Enables encoder config file support.")
-set_aom_config_var(CONFIG_FIX_GF_LENGTH 1 NUMBER
- "Fix the GF length if possible")
-set_aom_config_var(CONFIG_INSPECTION 0 NUMBER "Enables bitstream inspection.")
-set_aom_config_var(CONFIG_INTERNAL_STATS 0 NUMBER
- "Enables internal encoder stats.")
-set_aom_config_var(CONFIG_LOWBITDEPTH 0 NUMBER
- "Enables 8-bit optimized pipeline.")
-set_aom_config_var(CONFIG_MAX_DECODE_PROFILE 2 NUMBER
- "Max profile to support decoding.")
-set_aom_config_var(CONFIG_NORMAL_TILE_MODE 0 NUMBER
- "Only enables normal tile mode.")
-set_aom_config_var(
- CONFIG_REDUCED_ENCODER_BORDER 0 NUMBER
- "Enable reduced border extention for encoder. \
- Disables superres and resize support."
- )
-set_aom_config_var(CONFIG_SIZE_LIMIT 0 NUMBER "Limit max decode width/height.")
-set_aom_config_var(CONFIG_SPATIAL_RESAMPLING 1 NUMBER "Spatial resampling.")
-set_aom_config_var(DECODE_HEIGHT_LIMIT 0 NUMBER "Set limit for decode height.")
-set_aom_config_var(DECODE_WIDTH_LIMIT 0 NUMBER "Set limit for decode width.")
-set_aom_config_var(CONFIG_GLOBAL_MOTION_SEARCH 1 NUMBER
- "Global motion search flag.")
-
-# AV1 experiment flags.
-set_aom_config_var(CONFIG_COLLECT_INTER_MODE_RD_STATS 1 NUMBER
- "AV1 experiment flag.")
-set_aom_config_var(CONFIG_COLLECT_RD_STATS 0 NUMBER "AV1 experiment flag.")
-set_aom_config_var(CONFIG_DIST_8X8 0 NUMBER "AV1 experiment flag.")
-set_aom_config_var(CONFIG_ENTROPY_STATS 0 NUMBER "AV1 experiment flag.")
-set_aom_config_var(CONFIG_FP_MB_STATS 0 NUMBER "AV1 experiment flag.")
-set_aom_config_var(CONFIG_INTER_STATS_ONLY 0 NUMBER "AV1 experiment flag.")
-set_aom_config_var(CONFIG_RD_DEBUG 0 NUMBER "AV1 experiment flag.")
-set_aom_config_var(CONFIG_2PASS_PARTITION_SEARCH_LVL 1 NUMBER
- "AV1 experiment flag.")
-set_aom_config_var(CONFIG_SHARP_SETTINGS 0 NUMBER
- "Use sharper encoding settings")
-
-#
-# Variables in this section control optional features of the build system.
-#
-set_aom_option_var(ENABLE_CCACHE "Enable ccache support." OFF)
-set_aom_option_var(ENABLE_DECODE_PERF_TESTS "Enables decoder performance tests"
- OFF)
-set_aom_option_var(ENABLE_DISTCC "Enable distcc support." OFF)
-set_aom_option_var(ENABLE_DOCS
- "Enable documentation generation (doxygen required)." ON)
-set_aom_option_var(ENABLE_ENCODE_PERF_TESTS "Enables encoder performance tests"
- OFF)
-set_aom_option_var(ENABLE_EXAMPLES "Enables build of example code." ON)
-set_aom_option_var(ENABLE_GOMA "Enable goma support." OFF)
-set_aom_option_var(
- ENABLE_IDE_TEST_HOSTING
- "Enables running tests within IDEs like Visual Studio and Xcode." OFF)
-set_aom_option_var(ENABLE_NASM "Use nasm instead of yasm for x86 assembly." OFF)
-set_aom_option_var(ENABLE_TESTDATA "Enables unit test data download targets."
- ON)
-set_aom_option_var(ENABLE_TESTS "Enables unit tests." ON)
-set_aom_option_var(ENABLE_TOOLS "Enable applications in tools sub directory."
- ON)
-set_aom_option_var(ENABLE_WERROR "Converts warnings to errors at compile time."
- OFF)
-
-# ARM assembly/intrinsics flags.
-set_aom_option_var(ENABLE_NEON "Enables NEON optimizations on ARM targets." ON)
-
-# MIPS assembly/intrinsics flags.
-set_aom_option_var(ENABLE_DSPR2 "Enables DSPR2 optimizations on MIPS targets."
- OFF)
-set_aom_option_var(ENABLE_MSA "Enables MSA optimizations on MIPS targets." OFF)
-
-# VSX intrinsics flags.
-set_aom_option_var(ENABLE_VSX "Enables VSX optimizations on PowerPC targets."
- ON)
-
-# x86/x86_64 assembly/intrinsics flags.
-set_aom_option_var(ENABLE_MMX
- "Enables MMX optimizations on x86/x86_64 targets." ON)
-set_aom_option_var(ENABLE_SSE
- "Enables SSE optimizations on x86/x86_64 targets." ON)
-set_aom_option_var(ENABLE_SSE2
- "Enables SSE2 optimizations on x86/x86_64 targets." ON)
-set_aom_option_var(ENABLE_SSE3
- "Enables SSE3 optimizations on x86/x86_64 targets." ON)
-set_aom_option_var(ENABLE_SSSE3
- "Enables SSSE3 optimizations on x86/x86_64 targets." ON)
-set_aom_option_var(ENABLE_SSE4_1
- "Enables SSE4_1 optimizations on x86/x86_64 targets." ON)
-set_aom_option_var(ENABLE_SSE4_2
- "Enables SSE4_2 optimizations on x86/x86_64 targets." ON)
-set_aom_option_var(ENABLE_AVX
- "Enables AVX optimizations on x86/x86_64 targets." ON)
-set_aom_option_var(ENABLE_AVX2
- "Enables AVX2 optimizations on x86/x86_64 targets." ON)
diff --git a/third_party/aom/build/cmake/aom_configure.cmake b/third_party/aom/build/cmake/aom_configure.cmake
deleted file mode 100644
index c0c7381e8..000000000
--- a/third_party/aom/build/cmake/aom_configure.cmake
+++ /dev/null
@@ -1,377 +0,0 @@
-#
-# Copyright (c) 2016, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and the
-# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
-# not distributed with this source code in the LICENSE file, you can obtain it
-# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
-# License 1.0 was not distributed with this source code in the PATENTS file, you
-# can obtain it at www.aomedia.org/license/patent.
-#
-if(AOM_BUILD_CMAKE_AOM_CONFIGURE_CMAKE_)
- return()
-endif() # AOM_BUILD_CMAKE_AOM_CONFIGURE_CMAKE_
-set(AOM_BUILD_CMAKE_AOM_CONFIGURE_CMAKE_ 1)
-
-include(FindGit)
-include(FindPerl)
-include(FindThreads)
-
-set(AOM_SUPPORTED_CPU_TARGETS
- "arm64 armv7 armv7s generic mips32 mips64 ppc x86 x86_64")
-
-include("${AOM_ROOT}/build/cmake/aom_config_defaults.cmake")
-include("${AOM_ROOT}/build/cmake/aom_experiment_deps.cmake")
-include("${AOM_ROOT}/build/cmake/aom_optimization.cmake")
-include("${AOM_ROOT}/build/cmake/compiler_flags.cmake")
-include("${AOM_ROOT}/build/cmake/compiler_tests.cmake")
-include("${AOM_ROOT}/build/cmake/util.cmake")
-
-# Generate the user config settings.
-list(APPEND aom_build_vars ${AOM_CONFIG_VARS} ${AOM_OPTION_VARS})
-foreach(cache_var ${aom_build_vars})
- get_property(cache_var_helpstring CACHE ${cache_var} PROPERTY HELPSTRING)
- if("${cache_var_helpstring}" STREQUAL "${cmake_cmdline_helpstring}")
- set(AOM_CMAKE_CONFIG "${AOM_CMAKE_CONFIG} -D${cache_var}=${${cache_var}}")
- endif()
-endforeach()
-string(STRIP "${AOM_CMAKE_CONFIG}" AOM_CMAKE_CONFIG)
-
-# Detect target CPU.
-if(NOT AOM_TARGET_CPU)
- if("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "AMD64" OR
- "${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64")
- if(${CMAKE_SIZEOF_VOID_P} EQUAL 4)
- set(AOM_TARGET_CPU "x86")
- elseif(${CMAKE_SIZEOF_VOID_P} EQUAL 8)
- set(AOM_TARGET_CPU "x86_64")
- else()
- message(FATAL_ERROR
- "--- Unexpected pointer size (${CMAKE_SIZEOF_VOID_P}) for\n"
- " CMAKE_SYSTEM_NAME=${CMAKE_SYSTEM_NAME}\n"
- " CMAKE_SYSTEM_PROCESSOR=${CMAKE_SYSTEM_PROCESSOR}\n"
- " CMAKE_GENERATOR=${CMAKE_GENERATOR}\n")
- endif()
- elseif("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "i386" OR
- "${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86")
- set(AOM_TARGET_CPU "x86")
- elseif("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "^arm" OR
- "${CMAKE_SYSTEM_PROCESSOR}" MATCHES "^mips")
- set(AOM_TARGET_CPU "${CMAKE_SYSTEM_PROCESSOR}")
- elseif("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "aarch64")
- set(AOM_TARGET_CPU "arm64")
- elseif("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "^ppc")
- set(AOM_TARGET_CPU "ppc")
- else()
- message(WARNING "The architecture ${CMAKE_SYSTEM_PROCESSOR} is not "
- "supported, falling back to the generic target")
- set(AOM_TARGET_CPU "generic")
- endif()
-endif()
-
-if(CMAKE_TOOLCHAIN_FILE) # Add toolchain file to config string.
- file(RELATIVE_PATH toolchain_path "${AOM_CONFIG_DIR}"
- "${CMAKE_TOOLCHAIN_FILE}")
- set(toolchain_string "-DCMAKE_TOOLCHAIN_FILE=\\\"${toolchain_path}\\\"")
- set(AOM_CMAKE_CONFIG "${toolchain_string} ${AOM_CMAKE_CONFIG}")
-else()
-
- # Add detected CPU to the config string.
- set(AOM_CMAKE_CONFIG "-DAOM_TARGET_CPU=${AOM_TARGET_CPU} ${AOM_CMAKE_CONFIG}")
-endif()
-set(AOM_CMAKE_CONFIG "-G \\\"${CMAKE_GENERATOR}\\\" ${AOM_CMAKE_CONFIG}")
-file(RELATIVE_PATH source_path "${AOM_CONFIG_DIR}" "${AOM_ROOT}")
-set(AOM_CMAKE_CONFIG "cmake ${source_path} ${AOM_CMAKE_CONFIG}")
-string(STRIP "${AOM_CMAKE_CONFIG}" AOM_CMAKE_CONFIG)
-
-message("--- aom_configure: Detected CPU: ${AOM_TARGET_CPU}")
-set(AOM_TARGET_SYSTEM ${CMAKE_SYSTEM_NAME})
-
-if("${CMAKE_BUILD_TYPE}" MATCHES "Deb")
- set(CONFIG_DEBUG 1)
-endif()
-
-if(BUILD_SHARED_LIBS)
- set(CONFIG_PIC 1)
- set(CONFIG_SHARED 1)
- set(CONFIG_STATIC 0)
-endif()
-
-if(NOT MSVC)
- if(CONFIG_PIC)
-
- # TODO(tomfinegan): clang needs -pie in CMAKE_EXE_LINKER_FLAGS for this to
- # work.
- set(CMAKE_POSITION_INDEPENDENT_CODE ON)
- if("${AOM_TARGET_SYSTEM}" STREQUAL "Linux" AND "${AOM_TARGET_CPU}" MATCHES
- "^armv7")
- set(AOM_AS_FLAGS ${AOM_AS_FLAGS} --defsym PIC=1)
- else()
- set(AOM_AS_FLAGS ${AOM_AS_FLAGS} -DPIC)
- endif()
- endif()
-endif()
-
-if(NOT "${AOM_SUPPORTED_CPU_TARGETS}" MATCHES "${AOM_TARGET_CPU}")
- message(FATAL_ERROR
- "No RTCD support for ${AOM_TARGET_CPU}. Create it, or "
- "add -DAOM_TARGET_CPU=generic to your cmake command line for a "
- "generic build of libaom and tools.")
-endif()
-
-if("${AOM_TARGET_CPU}" STREQUAL "x86" OR "${AOM_TARGET_CPU}" STREQUAL "x86_64")
- find_program(AS_EXECUTABLE yasm $ENV{YASM_PATH})
- if(NOT AS_EXECUTABLE OR ENABLE_NASM)
- unset(AS_EXECUTABLE CACHE)
- find_program(AS_EXECUTABLE nasm $ENV{NASM_PATH})
- if(AS_EXECUTABLE)
- test_nasm()
- endif()
- endif()
-
- if(NOT AS_EXECUTABLE)
- message(FATAL_ERROR
- "Unable to find assembler. Install 'yasm' or 'nasm.' "
- "To build without optimizations, add -DAOM_TARGET_CPU=generic to "
- "your cmake command line.")
- endif()
- get_asm_obj_format("objformat")
- set(AOM_AS_FLAGS -f ${objformat} ${AOM_AS_FLAGS})
- string(STRIP "${AOM_AS_FLAGS}" AOM_AS_FLAGS)
-elseif("${AOM_TARGET_CPU}" MATCHES "arm")
- if("${AOM_TARGET_SYSTEM}" STREQUAL "Darwin")
- set(AS_EXECUTABLE as)
- set(AOM_AS_FLAGS -arch ${AOM_TARGET_CPU} -isysroot ${CMAKE_OSX_SYSROOT})
- elseif("${AOM_TARGET_SYSTEM}" STREQUAL "Linux")
- if(NOT AS_EXECUTABLE)
- set(AS_EXECUTABLE as)
- endif()
- elseif("${AOM_TARGET_SYSTEM}" STREQUAL "Windows")
- if(NOT AS_EXECUTABLE)
- set(AS_EXECUTABLE ${CMAKE_C_COMPILER} -c -mimplicit-it=always)
- endif()
- endif()
- if(NOT AS_EXECUTABLE)
- message(FATAL_ERROR
- "Unknown assembler for: ${AOM_TARGET_CPU}-${AOM_TARGET_SYSTEM}")
- endif()
-
- string(STRIP "${AOM_AS_FLAGS}" AOM_AS_FLAGS)
-endif()
-
-if(CONFIG_ANALYZER)
- include(FindwxWidgets)
- find_package(wxWidgets REQUIRED adv base core)
- include(${wxWidgets_USE_FILE})
-endif()
-
-if(NOT MSVC AND CMAKE_C_COMPILER_ID MATCHES "GNU\|Clang")
- set(CONFIG_GCC 1)
-endif()
-
-if(CONFIG_GCOV)
- message("--- Testing for CONFIG_GCOV support.")
- require_linker_flag("-fprofile-arcs -ftest-coverage")
- require_compiler_flag("-fprofile-arcs -ftest-coverage" YES)
-endif()
-
-if(CONFIG_GPROF)
- message("--- Testing for CONFIG_GPROF support.")
- require_compiler_flag("-pg" YES)
-endif()
-
-if("${AOM_TARGET_SYSTEM}" MATCHES "Darwin\|Linux\|Windows")
- set(CONFIG_OS_SUPPORT 1)
-endif()
-
-#
-# Fix CONFIG_* dependencies. This must be done before including cpu.cmake to
-# ensure RTCD_CONFIG_* are properly set.
-fix_experiment_configs()
-
-# Test compiler support.
-aom_get_inline("INLINE")
-
-# Don't just check for pthread.h, but use the result of the full pthreads
-# including a linking check in FindThreads above.
-set(HAVE_PTHREAD_H ${CMAKE_USE_PTHREADS_INIT})
-aom_check_source_compiles("unistd_check" "#include <unistd.h>" HAVE_UNISTD_H)
-
-if(NOT MSVC)
- aom_push_var(CMAKE_REQUIRED_LIBRARIES "m")
- aom_check_c_compiles(
- "fenv_check"
- "#define _GNU_SOURCE
- #include <fenv.h>
- void unused(void) {
- (void)unused;
- (void)feenableexcept(FE_DIVBYZERO | FE_INVALID);
- }"
- HAVE_FEXCEPT)
- aom_pop_var(CMAKE_REQUIRED_LIBRARIES)
-endif()
-
-include("${AOM_ROOT}/build/cmake/cpu.cmake")
-
-if(ENABLE_CCACHE)
- set_compiler_launcher(ENABLE_CCACHE ccache)
-endif()
-
-if(ENABLE_DISTCC)
- set_compiler_launcher(ENABLE_DISTCC distcc)
-endif()
-
-if(ENABLE_GOMA)
- set_compiler_launcher(ENABLE_GOMA gomacc)
-endif()
-
-if(NOT CONFIG_AV1_DECODER AND NOT CONFIG_AV1_ENCODER)
- message(FATAL_ERROR "Decoder and encoder disabled, nothing to build.")
-endif()
-
-if(DECODE_HEIGHT_LIMIT OR DECODE_WIDTH_LIMIT)
- change_config_and_warn(CONFIG_SIZE_LIMIT 1
- "DECODE_HEIGHT_LIMIT and DECODE_WIDTH_LIMIT")
-endif()
-
-if(CONFIG_SIZE_LIMIT)
- if(NOT DECODE_HEIGHT_LIMIT OR NOT DECODE_WIDTH_LIMIT)
- message(FATAL_ERROR "When setting CONFIG_SIZE_LIMIT, DECODE_HEIGHT_LIMIT "
- "and DECODE_WIDTH_LIMIT must be set.")
- endif()
-endif()
-
-# Test compiler flags.
-if(MSVC)
- add_compiler_flag_if_supported("/W3")
-
- # Disable MSVC warnings that suggest making code non-portable.
- add_compiler_flag_if_supported("/wd4996")
- if(ENABLE_WERROR)
- add_compiler_flag_if_supported("/WX")
- endif()
-else()
- require_c_flag("-std=c99" YES)
- add_compiler_flag_if_supported("-Wall")
- add_compiler_flag_if_supported("-Wdisabled-optimization")
- add_compiler_flag_if_supported("-Wextra")
- add_compiler_flag_if_supported("-Wfloat-conversion")
- add_compiler_flag_if_supported("-Wimplicit-function-declaration")
- add_compiler_flag_if_supported("-Wlogical-op")
- add_compiler_flag_if_supported("-Wpointer-arith")
- add_compiler_flag_if_supported("-Wsign-compare")
- add_compiler_flag_if_supported("-Wstring-conversion")
- add_compiler_flag_if_supported("-Wtype-limits")
- add_compiler_flag_if_supported("-Wuninitialized")
- add_compiler_flag_if_supported("-Wunused")
- add_compiler_flag_if_supported("-Wvla")
-
- add_c_flag_if_supported("-Wstack-usage=100000")
- add_cxx_flag_if_supported("-Wstack-usage=360000")
-
- # TODO(jzern): this could be added as a cxx flags for test/*.cc only, avoiding
- # third_party.
- add_c_flag_if_supported("-Wshorten-64-to-32")
-
- # Add -Wshadow only for C files to avoid massive gtest warning spam.
- add_c_flag_if_supported("-Wshadow")
-
- # Add -Wundef only for C files to avoid massive gtest warning spam.
- add_c_flag_if_supported("-Wundef")
-
- # Quiet gcc 6 vs 7 abi warnings:
- # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=77728
- if("${AOM_TARGET_CPU}" MATCHES "arm")
- add_cxx_flag_if_supported("-Wno-psabi")
- endif()
-
- if(ENABLE_WERROR)
- add_compiler_flag_if_supported("-Werror")
- endif()
-
- if("${CMAKE_BUILD_TYPE}" MATCHES "Rel")
- add_compiler_flag_if_supported("-U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0")
- endif()
- add_compiler_flag_if_supported("-D_LARGEFILE_SOURCE")
- add_compiler_flag_if_supported("-D_FILE_OFFSET_BITS=64")
-endif()
-
-set(AOM_LIB_LINK_TYPE PUBLIC)
-if(EMSCRIPTEN)
-
- # Avoid CMake generation time errors resulting from collisions with the form
- # of target_link_libraries() used by Emscripten.cmake.
- unset(AOM_LIB_LINK_TYPE)
-endif()
-
-# Generate aom_config templates.
-set(aom_config_asm_template "${AOM_CONFIG_DIR}/config/aom_config.asm.cmake")
-set(aom_config_h_template "${AOM_CONFIG_DIR}/config/aom_config.h.cmake")
-execute_process(COMMAND
- ${CMAKE_COMMAND} -DAOM_CONFIG_DIR=${AOM_CONFIG_DIR}
- -DAOM_ROOT=${AOM_ROOT} -P
- "${AOM_ROOT}/build/cmake/generate_aom_config_templates.cmake")
-
-# Generate aom_config.{asm,h}.
-configure_file("${aom_config_asm_template}"
- "${AOM_CONFIG_DIR}/config/aom_config.asm")
-configure_file("${aom_config_h_template}"
- "${AOM_CONFIG_DIR}/config/aom_config.h")
-
-# Read the current git hash.
-find_package(Git)
-if(NOT GIT_FOUND)
- message("--- Git missing, version will be read from CHANGELOG.")
-endif()
-
-configure_file("${AOM_ROOT}/build/cmake/aom_config.c.template"
- "${AOM_CONFIG_DIR}/config/aom_config.c")
-
-# Find Perl and generate the RTCD sources.
-find_package(Perl)
-if(NOT PERL_FOUND)
- message(FATAL_ERROR "Perl is required to build libaom.")
-endif()
-
-set(AOM_RTCD_CONFIG_FILE_LIST "${AOM_ROOT}/aom_dsp/aom_dsp_rtcd_defs.pl"
- "${AOM_ROOT}/aom_scale/aom_scale_rtcd.pl"
- "${AOM_ROOT}/av1/common/av1_rtcd_defs.pl")
-set(AOM_RTCD_HEADER_FILE_LIST "${AOM_CONFIG_DIR}/config/aom_dsp_rtcd.h"
- "${AOM_CONFIG_DIR}/config/aom_scale_rtcd.h"
- "${AOM_CONFIG_DIR}/config/av1_rtcd.h")
-set(AOM_RTCD_SOURCE_FILE_LIST "${AOM_ROOT}/aom_dsp/aom_dsp_rtcd.c"
- "${AOM_ROOT}/aom_scale/aom_scale_rtcd.c"
- "${AOM_ROOT}/av1/common/av1_rtcd.c")
-set(AOM_RTCD_SYMBOL_LIST aom_dsp_rtcd aom_scale_rtcd av1_rtcd)
-list(LENGTH AOM_RTCD_SYMBOL_LIST AOM_RTCD_CUSTOM_COMMAND_COUNT)
-math(EXPR AOM_RTCD_CUSTOM_COMMAND_COUNT "${AOM_RTCD_CUSTOM_COMMAND_COUNT} - 1")
-
-foreach(NUM RANGE ${AOM_RTCD_CUSTOM_COMMAND_COUNT})
- list(GET AOM_RTCD_CONFIG_FILE_LIST ${NUM} AOM_RTCD_CONFIG_FILE)
- list(GET AOM_RTCD_HEADER_FILE_LIST ${NUM} AOM_RTCD_HEADER_FILE)
- list(GET AOM_RTCD_SOURCE_FILE_LIST ${NUM} AOM_RTCD_SOURCE_FILE)
- list(GET AOM_RTCD_SYMBOL_LIST ${NUM} AOM_RTCD_SYMBOL)
- execute_process(COMMAND ${PERL_EXECUTABLE} "${AOM_ROOT}/build/cmake/rtcd.pl"
- --arch=${AOM_TARGET_CPU}
- --sym=${AOM_RTCD_SYMBOL} ${AOM_RTCD_FLAGS}
- --config=${AOM_CONFIG_DIR}/config/aom_config.h
- ${AOM_RTCD_CONFIG_FILE}
- OUTPUT_FILE ${AOM_RTCD_HEADER_FILE})
-endforeach()
-
-# Generate aom_version.h.
-execute_process(COMMAND ${CMAKE_COMMAND} -DAOM_CONFIG_DIR=${AOM_CONFIG_DIR}
- -DAOM_ROOT=${AOM_ROOT}
- -DGIT_EXECUTABLE=${GIT_EXECUTABLE}
- -DPERL_EXECUTABLE=${PERL_EXECUTABLE} -P
- "${AOM_ROOT}/build/cmake/version.cmake")
-
-if(NOT MSVC) # Generate aom.pc (pkg-config file).
- execute_process(COMMAND ${CMAKE_COMMAND} -DAOM_CONFIG_DIR=${AOM_CONFIG_DIR}
- -DAOM_ROOT=${AOM_ROOT}
- -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}
- -DCMAKE_PROJECT_NAME=${CMAKE_PROJECT_NAME}
- -DCONFIG_MULTITHREAD=${CONFIG_MULTITHREAD}
- -DHAVE_PTHREAD_H=${HAVE_PTHREAD_H} -P
- "${AOM_ROOT}/build/cmake/pkg_config.cmake")
-endif()
diff --git a/third_party/aom/build/cmake/aom_experiment_deps.cmake b/third_party/aom/build/cmake/aom_experiment_deps.cmake
deleted file mode 100644
index 0688704e5..000000000
--- a/third_party/aom/build/cmake/aom_experiment_deps.cmake
+++ /dev/null
@@ -1,32 +0,0 @@
-#
-# Copyright (c) 2017, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and the
-# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
-# not distributed with this source code in the LICENSE file, you can obtain it
-# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
-# License 1.0 was not distributed with this source code in the PATENTS file, you
-# can obtain it at www.aomedia.org/license/patent.
-#
-if(AOM_BUILD_CMAKE_AOM_EXPERIMENT_DEPS_CMAKE_)
- return()
-endif() # AOM_BUILD_CMAKE_AOM_EXPERIMENT_DEPS_CMAKE_
-set(AOM_BUILD_CMAKE_AOM_EXPERIMENT_DEPS_CMAKE_ 1)
-
-# Adjusts CONFIG_* CMake variables to address conflicts between active AV1
-# experiments.
-macro(fix_experiment_configs)
-
- if(CONFIG_ANALYZER)
- change_config_and_warn(CONFIG_INSPECTION 1 CONFIG_ANALYZER)
- endif()
-
- if(CONFIG_RD_DEBUG)
- change_config_and_warn(CONFIG_RD_DEBUG 0 CONFIG_JNT_COMP)
- endif()
-
- if(CONFIG_DIST_8X8 AND CONFIG_MULTITHREAD)
- change_config_and_warn(CONFIG_DIST_8X8 0 CONFIG_MULTITHREAD)
- endif()
-
-endmacro()
diff --git a/third_party/aom/build/cmake/aom_optimization.cmake b/third_party/aom/build/cmake/aom_optimization.cmake
deleted file mode 100644
index be32a3212..000000000
--- a/third_party/aom/build/cmake/aom_optimization.cmake
+++ /dev/null
@@ -1,212 +0,0 @@
-#
-# Copyright (c) 2017, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and the
-# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
-# not distributed with this source code in the LICENSE file, you can obtain it
-# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
-# License 1.0 was not distributed with this source code in the PATENTS file, you
-# can obtain it at www.aomedia.org/license/patent.
-#
-if(AOM_BUILD_CMAKE_AOM_OPTIMIZATION_CMAKE_)
- return()
-endif() # AOM_BUILD_CMAKE_AOM_OPTIMIZATION_CMAKE_
-set(AOM_BUILD_CMAKE_AOM_OPTIMIZATION_CMAKE_ 1)
-
-include("${AOM_ROOT}/build/cmake/util.cmake")
-
-# Translate $flag to one which MSVC understands, and write the new flag to the
-# variable named by $translated_flag (or unset it, when MSVC needs no flag).
-function(get_msvc_intrinsic_flag flag translated_flag)
- if("${flag}" STREQUAL "-mavx")
- set(${translated_flag} "/arch:AVX" PARENT_SCOPE)
- elseif("${flag}" STREQUAL "-mavx2")
- set(${translated_flag} "/arch:AVX2" PARENT_SCOPE)
- else()
-
- # MSVC does not need flags for intrinsics flavors other than AVX/AVX2.
- unset(${translated_flag} PARENT_SCOPE)
- endif()
-endfunction()
-
-# Adds an object library target. Terminates generation if $flag is not supported
-# by the current compiler. $flag is the intrinsics flag required by the current
-# compiler, and is added to the compile flags for all sources in $sources.
-# $opt_name is used to name the target. $target_to_update is made dependent upon
-# the created target.
-#
-# Note: the libaom target is always updated because OBJECT libraries have rules
-# that disallow the direct addition of .o files to them as dependencies. Static
-# libraries do not have this limitation.
-function(add_intrinsics_object_library flag opt_name target_to_update sources
- dependent_target)
- if("${${sources}}" STREQUAL "")
- return()
- endif()
- set(target_name ${target_to_update}_${opt_name}_intrinsics)
- add_library(${target_name} OBJECT ${${sources}})
-
- if(MSVC)
- get_msvc_intrinsic_flag(${flag} "flag")
- endif()
-
- if(flag)
- separate_arguments(flag)
- target_compile_options(${target_name} PUBLIC ${flag})
- endif()
-
- target_sources(${dependent_target} PRIVATE $<TARGET_OBJECTS:${target_name}>)
-
- # Add the new lib target to the global list of aom library targets.
- list(APPEND AOM_LIB_TARGETS ${target_name})
- set(AOM_LIB_TARGETS ${AOM_LIB_TARGETS} PARENT_SCOPE)
-endfunction()
-
-# Adds sources in list named by $sources to $target and adds $flag to the
-# compile flags for each source file.
-function(add_intrinsics_source_to_target flag target sources)
- target_sources(${target} PRIVATE ${${sources}})
- if(MSVC)
- get_msvc_intrinsic_flag(${flag} "flag")
- endif()
- if(flag)
- foreach(source ${${sources}})
- set_property(SOURCE ${source} APPEND PROPERTY COMPILE_FLAGS ${flag})
- endforeach()
- endif()
-endfunction()
-
-# Writes object format for the current target to the var named by $out_format,
-# or terminates the build when the object format for the current target is
-# unknown.
-function(get_asm_obj_format out_format)
- if("${AOM_TARGET_CPU}" STREQUAL "x86_64")
- if("${AOM_TARGET_SYSTEM}" STREQUAL "Darwin")
- set(objformat "macho64")
- elseif("${AOM_TARGET_SYSTEM}" STREQUAL "Linux")
- set(objformat "elf64")
- elseif("${AOM_TARGET_SYSTEM}" STREQUAL "MSYS" OR "${AOM_TARGET_SYSTEM}"
- STREQUAL "Windows")
- set(objformat "win64")
- else()
- message(FATAL_ERROR "Unknown obj format: ${AOM_TARGET_SYSTEM}")
- endif()
- elseif("${AOM_TARGET_CPU}" STREQUAL "x86")
- if("${AOM_TARGET_SYSTEM}" STREQUAL "Darwin")
- set(objformat "macho32")
- elseif("${AOM_TARGET_SYSTEM}" STREQUAL "Linux")
- set(objformat "elf32")
- elseif("${AOM_TARGET_SYSTEM}" STREQUAL "MSYS" OR "${AOM_TARGET_SYSTEM}"
- STREQUAL "Windows")
- set(objformat "win32")
- else()
- message(FATAL_ERROR "Unknown obj format: ${AOM_TARGET_SYSTEM}")
- endif()
- else()
- message(FATAL_ERROR
- "Unknown obj format: ${AOM_TARGET_CPU}-${AOM_TARGET_SYSTEM}")
- endif()
-
- set(${out_format} ${objformat} PARENT_SCOPE)
-endfunction()
-
-# Adds library target named $lib_name for ASM files in variable named by
-# $asm_sources. Builds an output directory path from $lib_name. Links $lib_name
-# into $dependent_target. Generates a dummy C file with a dummy function to
-# ensure that all cmake generators can determine the linker language, and that
-# build tools don't complain that an object exposes no symbols.
-function(add_asm_library lib_name asm_sources dependent_target)
- if("${${asm_sources}}" STREQUAL "")
- return()
- endif()
- set(asm_lib_obj_dir "${AOM_CONFIG_DIR}/asm_objects/${lib_name}")
- if(NOT EXISTS "${asm_lib_obj_dir}")
- file(MAKE_DIRECTORY "${asm_lib_obj_dir}")
- endif()
-
- # TODO(tomfinegan): If cmake ever allows addition of .o files to OBJECT lib
- # targets, make this OBJECT instead of STATIC to hide the target from
- # consumers of the AOM cmake build.
- add_library(${lib_name} STATIC ${${asm_sources}})
-
- foreach(asm_source ${${asm_sources}})
- get_filename_component(asm_source_name "${asm_source}" NAME)
- set(asm_object "${asm_lib_obj_dir}/${asm_source_name}.o")
- add_custom_command(OUTPUT "${asm_object}"
- COMMAND ${AS_EXECUTABLE} ARGS ${AOM_AS_FLAGS}
- -I${AOM_ROOT}/ -I${AOM_CONFIG_DIR}/ -o
- "${asm_object}" "${asm_source}"
- DEPENDS "${asm_source}"
- COMMENT "Building ASM object ${asm_object}"
- WORKING_DIRECTORY "${AOM_CONFIG_DIR}" VERBATIM)
- target_sources(aom PRIVATE "${asm_object}")
- endforeach()
-
- # The above created a target containing only ASM sources. Cmake needs help
- # here to determine the linker language. Add a dummy C file to force the
- # linker language to C. We don't bother with setting the LINKER_LANGUAGE
- # property on the library target because not all generators obey it (looking
- # at you, xcode generator).
- add_dummy_source_file_to_target("${lib_name}" "c")
-
- # Add the new lib target to the global list of aom library targets.
- list(APPEND AOM_LIB_TARGETS ${lib_name})
- set(AOM_LIB_TARGETS ${AOM_LIB_TARGETS} PARENT_SCOPE)
-endfunction()
-
-# Terminates generation if nasm found in PATH does not meet requirements.
-# Currently checks only for presence of required object formats and support for
-# the -Ox argument (multipass optimization).
-function(test_nasm)
- execute_process(COMMAND ${AS_EXECUTABLE} -hf OUTPUT_VARIABLE nasm_helptext)
-
- if(NOT "${nasm_helptext}" MATCHES "-Ox")
- message(FATAL_ERROR
- "Unsupported nasm: multipass optimization not supported.")
- endif()
-
- if("${AOM_TARGET_CPU}" STREQUAL "x86")
- if("${AOM_TARGET_SYSTEM}" STREQUAL "Darwin")
- if(NOT "${nasm_helptext}" MATCHES "macho32")
- message(FATAL_ERROR
- "Unsupported nasm: macho32 object format not supported.")
- endif()
- elseif("${AOM_TARGET_SYSTEM}" STREQUAL "Linux")
- if(NOT "${nasm_helptext}" MATCHES "elf32")
- message(FATAL_ERROR
- "Unsupported nasm: elf32 object format not supported.")
- endif()
- endif()
- else()
- if("${AOM_TARGET_SYSTEM}" STREQUAL "Darwin")
- if(NOT "${nasm_helptext}" MATCHES "macho64")
- message(FATAL_ERROR
- "Unsupported nasm: macho64 object format not supported.")
- endif()
- elseif("${AOM_TARGET_SYSTEM}" STREQUAL "Linux")
- if(NOT "${nasm_helptext}" MATCHES "elf64")
- message(FATAL_ERROR
- "Unsupported nasm: elf64 object format not supported.")
- endif()
- endif()
- endif()
-endfunction()
-
-# Adds build command for generation of rtcd C source files using
-# build/cmake/rtcd.pl. $config is the input perl file, $output is the output C
-# include file, $source is the C source file, and $symbol is used for the symbol
-# argument passed to rtcd.pl.
-function(add_rtcd_build_step config output source symbol)
- add_custom_command(OUTPUT ${output}
- COMMAND ${PERL_EXECUTABLE} ARGS
- "${AOM_ROOT}/build/cmake/rtcd.pl"
- --arch=${AOM_TARGET_CPU}
- --sym=${symbol} ${AOM_RTCD_FLAGS}
- --config=${AOM_CONFIG_DIR}/config/aom_config.h
- ${config} > ${output}
- DEPENDS ${config}
- COMMENT "Generating ${output}"
- WORKING_DIRECTORY ${AOM_CONFIG_DIR} VERBATIM)
- set_property(SOURCE ${source} PROPERTY OBJECT_DEPENDS ${output})
- set_property(SOURCE ${output} PROPERTY GENERATED)
-endfunction()
diff --git a/third_party/aom/build/cmake/compiler_flags.cmake b/third_party/aom/build/cmake/compiler_flags.cmake
deleted file mode 100644
index 79192c1fa..000000000
--- a/third_party/aom/build/cmake/compiler_flags.cmake
+++ /dev/null
@@ -1,373 +0,0 @@
-#
-# Copyright (c) 2016, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and the
-# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
-# not distributed with this source code in the LICENSE file, you can obtain it
-# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
-# License 1.0 was not distributed with this source code in the PATENTS file, you
-# can obtain it at www.aomedia.org/license/patent.
-#
-if(AOM_BUILD_CMAKE_COMPILER_FLAGS_CMAKE_)
- return()
-endif() # AOM_BUILD_CMAKE_COMPILER_FLAGS_CMAKE_
-set(AOM_BUILD_CMAKE_COMPILER_FLAGS_CMAKE_ 1)
-
-include(CheckCCompilerFlag)
-include(CheckCXXCompilerFlag)
-include("${AOM_ROOT}/build/cmake/compiler_tests.cmake")
-
-# Strings used to cache flags.
-set(AOM_C_FLAGS)
-set(AOM_CXX_FLAGS)
-set(AOM_EXE_LINKER_FLAGS)
-set(AOM_FAILED_C_FLAGS)
-set(AOM_FAILED_CXX_FLAGS)
-
-# Sets variable named by $out_is_present to YES in the caller's scope when $flag
-# is found in the string variable named by $flag_cache. Sets the var to NO
-# otherwise.
-function(is_flag_present flag_cache flag out_is_present)
- string(FIND "${${flag_cache}}" "${flag}" flag_pos)
- if(${flag_pos} EQUAL -1)
- set(${out_is_present} NO PARENT_SCOPE)
- else()
- set(${out_is_present} YES PARENT_SCOPE)
- endif()
-endfunction()
-
-# Appends $flag to $flags. Ignores scope via use of FORCE with set() call.
-function(append_flag flags flag)
- string(FIND "${${flags}}" "${flag}" found)
- if(${found} EQUAL -1)
- set(${flags} "${${flags}} ${flag}" CACHE STRING "" FORCE)
- endif()
-endfunction()
-
-# Checks C compiler for support of $c_flag. Adds $c_flag to all
-# $CMAKE_C_FLAGS_<CONFIG>s stored in AOM_C_CONFIGS when the compile test passes.
-# Caches $c_flag in $AOM_C_FLAGS or $AOM_FAILED_C_FLAGS depending on test
-# outcome.
-function(add_c_flag_if_supported c_flag)
- if(DEBUG_CMAKE_DISABLE_COMPILER_TESTS)
- return()
- endif()
-
- is_flag_present(AOM_C_FLAGS "${c_flag}" flag_ok)
- is_flag_present(AOM_FAILED_C_FLAGS "${c_flag}" flag_failed)
- if(${flag_ok} OR ${flag_failed})
- return()
- endif()
-
- unset(C_FLAG_SUPPORTED CACHE)
- message("Checking C compiler flag support for: " ${c_flag})
- check_c_compiler_flag("${c_flag}" C_FLAG_SUPPORTED)
-
- if(${C_FLAG_SUPPORTED})
- append_flag(AOM_C_FLAGS "${c_flag}")
- foreach(config ${AOM_C_CONFIGS})
- unset(C_FLAG_FOUND)
- append_flag("${config}" "${c_flag}")
- endforeach()
- else()
- append_flag(AOM_FAILED_C_FLAGS "${c_flag}")
- endif()
-endfunction()
-
-# Checks C++ compiler for support of $cxx_flag. Adds $cxx_flag to all
-# $CMAKE_CXX_FLAGS_<CONFIG>s stored in AOM_CXX_CONFIGS when the compile test
-# passes. Caches $cxx_flag in $AOM_CXX_FLAGS or $AOM_FAILED_CXX_FLAGS depending
-# on test outcome.
-function(add_cxx_flag_if_supported cxx_flag)
- if(DEBUG_CMAKE_DISABLE_COMPILER_TESTS)
- return()
- endif()
-
- is_flag_present(AOM_CXX_FLAGS "${cxx_flag}" flag_ok)
- is_flag_present(AOM_FAILED_CXX_FLAGS "${cxx_flag}" flag_failed)
- if(${flag_ok} OR ${flag_failed})
- return()
- endif()
-
- unset(CXX_FLAG_SUPPORTED CACHE)
- message("Checking C++ compiler flag support for: " ${cxx_flag})
- check_cxx_compiler_flag("${cxx_flag}" CXX_FLAG_SUPPORTED)
-
- if(${CXX_FLAG_SUPPORTED})
- append_flag(AOM_CXX_FLAGS "${cxx_flag}")
- foreach(config ${AOM_CXX_CONFIGS})
- unset(CXX_FLAG_FOUND)
- append_flag("${config}" "${cxx_flag}")
- endforeach()
- else()
- append_flag(AOM_FAILED_CXX_FLAGS "${cxx_flag}")
- endif()
-endfunction()
-
-# Convenience method for adding a flag to both the C and C++ compiler command
-# lines.
-function(add_compiler_flag_if_supported flag)
- add_c_flag_if_supported(${flag})
- add_cxx_flag_if_supported(${flag})
-endfunction()
-
-# Checks C compiler for support of $c_flag and terminates generation when
-# support is not present.
-function(require_c_flag c_flag update_c_flags)
- if(DEBUG_CMAKE_DISABLE_COMPILER_TESTS)
- return()
- endif()
-
- is_flag_present(AOM_C_FLAGS "${c_flag}" flag_ok)
- if(${flag_ok})
- return()
- endif()
-
- if(NOT "${AOM_EXE_LINKER_FLAGS}" STREQUAL "")
- aom_push_var(CMAKE_EXE_LINKER_FLAGS "${AOM_EXE_LINKER_FLAGS}")
- endif()
-
- unset(HAVE_C_FLAG CACHE)
- message("Checking C compiler flag support for: " ${c_flag})
- check_c_compiler_flag("${c_flag}" HAVE_C_FLAG)
- if(NOT HAVE_C_FLAG)
- message(FATAL_ERROR
- "${PROJECT_NAME} requires support for C flag: ${c_flag}.")
- endif()
-
- if(NOT "${AOM_EXE_LINKER_FLAGS}" STREQUAL "")
- aom_pop_var(CMAKE_EXE_LINKER_FLAGS)
- endif()
-
- append_flag(AOM_C_FLAGS "${c_flag}")
- if(update_c_flags)
- foreach(config ${AOM_C_CONFIGS})
- set(${config} "${${config}} ${c_flag}" CACHE STRING "" FORCE)
- endforeach()
- endif()
-endfunction()
-
-# Checks CXX compiler for support of $cxx_flag and terminates generation when
-# support is not present.
-function(require_cxx_flag cxx_flag update_cxx_flags)
- if(DEBUG_CMAKE_DISABLE_COMPILER_TESTS)
- return()
- endif()
-
- is_flag_present(AOM_CXX_FLAGS "${cxx_flag}" flag_ok)
- if(${flag_ok})
- return()
- endif()
-
- if(NOT "${AOM_EXE_LINKER_FLAGS}" STREQUAL "")
- aom_push_var(CMAKE_EXE_LINKER_FLAGS "${AOM_EXE_LINKER_FLAGS}")
- endif()
-
- unset(HAVE_CXX_FLAG CACHE)
- message("Checking C compiler flag support for: " ${cxx_flag})
- check_cxx_compiler_flag("${cxx_flag}" HAVE_CXX_FLAG)
- if(NOT HAVE_CXX_FLAG)
- message(FATAL_ERROR
- "${PROJECT_NAME} requires support for C flag: ${cxx_flag}.")
- endif()
-
- if(NOT "${AOM_EXE_LINKER_FLAGS}" STREQUAL "")
- aom_pop_var(CMAKE_EXE_LINKER_FLAGS)
- endif()
-
- append_flag(AOM_CXX_FLAGS "${cxx_flag}")
- if(update_cxx_flags)
- foreach(config ${AOM_CXX_CONFIGS})
- set(${config} "${${config}} ${cxx_flag}" CACHE STRING "" FORCE)
- endforeach()
- endif()
-endfunction()
-
-# Checks for support of $flag by both the C and CXX compilers. Terminates
-# generation when support is not present in both compilers.
-function(require_compiler_flag flag update_cmake_flags)
- require_c_flag(${flag} ${update_cmake_flags})
- require_cxx_flag(${flag} ${update_cmake_flags})
-endfunction()
-
-# Checks only non-MSVC targets for support of $c_flag and terminates generation
-# when support is not present.
-function(require_c_flag_nomsvc c_flag update_c_flags)
- if(NOT MSVC)
- require_c_flag(${c_flag} ${update_c_flags})
- endif()
-endfunction()
-
-# Checks only non-MSVC targets for support of $cxx_flag and terminates
-# generation when support is not present.
-function(require_cxx_flag_nomsvc cxx_flag update_cxx_flags)
- if(NOT MSVC)
- require_cxx_flag(${cxx_flag} ${update_cxx_flags})
- endif()
-endfunction()
-
-# Checks only non-MSVC targets for support of $flag by both the C and CXX
-# compilers. Terminates generation when support is not present in both
-# compilers.
-function(require_compiler_flag_nomsvc flag update_cmake_flags)
- require_c_flag_nomsvc(${flag} ${update_cmake_flags})
- require_cxx_flag_nomsvc(${flag} ${update_cmake_flags})
-endfunction()
-
-# Adds $preproc_def to C compiler command line (as -D$preproc_def) if not
-# already present.
-function(add_c_preproc_definition preproc_def)
- set(preproc_def "-D${preproc_def}")
- is_flag_present(AOM_C_FLAGS "${preproc_def}" flag_cached)
- if(${flag_cached})
- return()
- endif()
-
- foreach(config ${AOM_C_CONFIGS})
- set(${config} "${${config}} ${preproc_def}" CACHE STRING "" FORCE)
- endforeach()
-endfunction()
-
-# Adds $preproc_def to CXX compiler command line (as -D$preproc_def) if not
-# already present.
-function(add_cxx_preproc_definition preproc_def)
- set(preproc_def "-D${preproc_def}")
- is_flag_present(AOM_CXX_FLAGS "${preproc_def}" flag_cached)
- if(${flag_cached})
- return()
- endif()
-
- foreach(config ${AOM_CXX_CONFIGS})
- set(${config} "${${config}} ${preproc_def}" CACHE STRING "" FORCE)
- endforeach()
-endfunction()
-
-# Adds $preproc_def to C and CXX compiler command line (as -D$preproc_def) if
-# not already present.
-function(add_preproc_definition preproc_def)
- add_c_preproc_definition(${preproc_def})
- add_cxx_preproc_definition(${preproc_def})
-endfunction()
-
-# Adds $flag to assembler command line.
-function(append_as_flag flag)
- is_flag_present(AOM_AS_FLAGS "${flag}" flag_cached)
- if(${flag_cached})
- return()
- endif()
- append_flag(AOM_AS_FLAGS "${flag}")
-endfunction()
-
-# Adds $flag to the C compiler command line.
-function(append_c_flag flag)
- is_flag_present(AOM_C_FLAGS "${flag}" flag_cached)
- if(${flag_cached})
- return()
- endif()
-
- foreach(config ${AOM_C_CONFIGS})
- append_flag(${config} "${flag}")
- endforeach()
-endfunction()
-
-# Adds $flag to the CXX compiler command line.
-function(append_cxx_flag flag)
- is_flag_present(AOM_CXX_FLAGS "${flag}" flag_cached)
- if(${flag_cached})
- return()
- endif()
-
- foreach(config ${AOM_CXX_CONFIGS})
- append_flag(${config} "${flag}")
- endforeach()
-endfunction()
-
-# Adds $flag to the C and CXX compiler command lines.
-function(append_compiler_flag flag)
- append_c_flag(${flag})
- append_cxx_flag(${flag})
-endfunction()
-
-# Adds $flag to the executable linker command line when not present.
-function(append_exe_linker_flag flag)
- is_flag_present(AOM_EXE_LINKER_FLAGS "${flag}" flag_cached)
- if(${flag_cached})
- return()
- endif()
-
- append_flag(AOM_EXE_LINKER_FLAGS "${flag}")
- foreach(config ${AOM_EXE_LINKER_CONFIGS})
- append_flag(${config} "${flag}")
- endforeach()
-endfunction()
-
-# Adds $flag to the link flags for $target.
-function(append_link_flag_to_target target flag)
- unset(target_link_flags)
- get_target_property(target_link_flags ${target} LINK_FLAGS)
-
- if(target_link_flags)
- is_flag_present(target_link_flags "${flag}" flag_found)
- if(${flag_found})
- return()
- endif()
- set(target_link_flags "${target_link_flags} ${flag}")
- else()
- set(target_link_flags "${flag}")
- endif()
-
- set_target_properties(${target} PROPERTIES LINK_FLAGS ${target_link_flags})
-endfunction()
-
-# Adds $flag to executable linker flags, and makes sure C/CXX builds still work.
-function(require_linker_flag flag)
- if(DEBUG_CMAKE_DISABLE_COMPILER_TESTS)
- return()
- endif()
-
- append_exe_linker_flag(${flag})
-
- unset(c_passed)
- aom_check_c_compiles("LINKER_FLAG_C_TEST(${flag})" "" c_passed)
- unset(cxx_passed)
- aom_check_cxx_compiles("LINKER_FLAG_CXX_TEST(${flag})" "" cxx_passed)
-
- if(NOT c_passed OR NOT cxx_passed)
- message(FATAL_ERROR "Linker flag test for ${flag} failed.")
- endif()
-endfunction()
-
-# Appends flags in $AOM_EXTRA_<TYPE>_FLAGS variables to the flags used at build
-# time.
-function(set_user_flags)
-
- # Linker flags are handled first because some C/CXX flags require that a
- # linker flag is present at link time.
- if(AOM_EXTRA_EXE_LINKER_FLAGS)
- is_flag_present(AOM_EXE_LINKER_FLAGS "${AOM_EXTRA_EXE_LINKER_FLAGS}"
- extra_present)
- if(NOT ${extra_present})
- require_linker_flag("${AOM_EXTRA_EXE_LINKER_FLAGS}")
- endif()
- endif()
- if(AOM_EXTRA_AS_FLAGS)
-
- # TODO(tomfinegan): assembler flag testing would be a good thing to have.
- is_flag_present(AOM_AS_FLAGS "${AOM_EXTRA_AS_FLAGS}" extra_present)
- if(NOT ${extra_present})
- append_flag(AOM_AS_FLAGS "${AOM_EXTRA_AS_FLAGS}")
- endif()
- endif()
- if(AOM_EXTRA_C_FLAGS)
- is_flag_present(AOM_C_FLAGS "${AOM_EXTRA_C_FLAGS}" extra_present)
- if(NOT ${extra_present})
- require_c_flag("${AOM_EXTRA_C_FLAGS}" YES)
- endif()
- endif()
- if(AOM_EXTRA_CXX_FLAGS)
- is_flag_present(AOM_CXX_FLAGS "${AOM_EXTRA_CXX_FLAGS}" extra_present)
- if(NOT ${extra_present})
- require_cxx_flag("${AOM_EXTRA_CXX_FLAGS}" YES)
- endif()
- endif()
-endfunction()
diff --git a/third_party/aom/build/cmake/compiler_tests.cmake b/third_party/aom/build/cmake/compiler_tests.cmake
deleted file mode 100644
index f115610ba..000000000
--- a/third_party/aom/build/cmake/compiler_tests.cmake
+++ /dev/null
@@ -1,175 +0,0 @@
-#
-# Copyright (c) 2016, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and the
-# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
-# not distributed with this source code in the LICENSE file, you can obtain it
-# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
-# License 1.0 was not distributed with this source code in the PATENTS file, you
-# can obtain it at www.aomedia.org/license/patent.
-#
-if(AOM_BUILD_CMAKE_COMPILER_TESTS_CMAKE_)
- return()
-endif() # AOM_BUILD_CMAKE_COMPILER_TESTS_CMAKE_
-set(AOM_BUILD_CMAKE_COMPILER_TESTS_CMAKE_ 1)
-
-include(CheckCSourceCompiles)
-include(CheckCXXSourceCompiles)
-
-# CMake passes command line flags like this:
-#
-# * $compiler $lang_flags $lang_flags_config ...
-#
-# To ensure the flags tested here and elsewhere are obeyed a list of active
-# build configuration types is built, and flags are applied to the flag strings
-# for each configuration currently active for C and CXX builds as determined by
-# reading $CMAKE_CONFIGURATION_TYPES and $CMAKE_BUILD_TYPE. When
-# $CMAKE_CONFIGURATION_TYPES is non-empty a multi- configuration generator is in
-# use: currently this includes MSVC and Xcode. For other generators
-# $CMAKE_BUILD_TYPE is used. For both cases AOM_<LANG>_CONFIGS is populated with
-# CMake string variable names that contain flags for the currently available
-# configuration(s).
-unset(AOM_C_CONFIGS)
-unset(AOM_CXX_CONFIGS)
-list(LENGTH CMAKE_CONFIGURATION_TYPES num_configs)
-if(${num_configs} GREATER 0)
- foreach(config ${CMAKE_CONFIGURATION_TYPES})
- string(TOUPPER ${config} config)
- list(APPEND AOM_C_CONFIGS "CMAKE_C_FLAGS_${config}")
- list(APPEND AOM_CXX_CONFIGS "CMAKE_CXX_FLAGS_${config}")
- list(APPEND AOM_EXE_LINKER_CONFIGS "CMAKE_EXE_LINKER_FLAGS_${config}")
- endforeach()
-else()
- string(TOUPPER ${CMAKE_BUILD_TYPE} config)
- set(AOM_C_CONFIGS "CMAKE_C_FLAGS_${config}")
- set(AOM_CXX_CONFIGS "CMAKE_CXX_FLAGS_${config}")
- set(AOM_EXE_LINKER_CONFIGS "CMAKE_EXE_LINKER_FLAGS_${config}")
-endif()
-
-# The basic main() function used in all compile tests.
-set(AOM_C_MAIN "\nint main(void) { return 0; }")
-set(AOM_CXX_MAIN "\nint main() { return 0; }")
-
-# Strings containing the names of passed and failed tests.
-set(AOM_C_PASSED_TESTS)
-set(AOM_C_FAILED_TESTS)
-set(AOM_CXX_PASSED_TESTS)
-set(AOM_CXX_FAILED_TESTS)
-
-function(aom_push_var var new_value)
- set(SAVED_${var} ${${var}} PARENT_SCOPE)
- set(${var} "${${var}} ${new_value}" PARENT_SCOPE)
-endfunction()
-
-function(aom_pop_var var)
- set(var ${SAVED_${var}} PARENT_SCOPE)
- unset(SAVED_${var} PARENT_SCOPE)
-endfunction()
-
-# Confirms $test_source compiles and stores $test_name in one of
-# $AOM_C_PASSED_TESTS or $AOM_C_FAILED_TESTS depending on out come. When the
-# test passes $result_var is set to 1. When it fails $result_var is unset. The
-# test is not run if the test name is found in either of the passed or failed
-# test variables.
-function(aom_check_c_compiles test_name test_source result_var)
- if(DEBUG_CMAKE_DISABLE_COMPILER_TESTS)
- return()
- endif()
-
- unset(C_TEST_PASSED CACHE)
- unset(C_TEST_FAILED CACHE)
- string(FIND "${AOM_C_PASSED_TESTS}" "${test_name}" C_TEST_PASSED)
- string(FIND "${AOM_C_FAILED_TESTS}" "${test_name}" C_TEST_FAILED)
- if(${C_TEST_PASSED} EQUAL -1 AND ${C_TEST_FAILED} EQUAL -1)
- unset(C_TEST_COMPILED CACHE)
- message("Running C compiler test: ${test_name}")
- check_c_source_compiles("${test_source} ${AOM_C_MAIN}" C_TEST_COMPILED)
- set(${result_var} ${C_TEST_COMPILED} PARENT_SCOPE)
-
- if(C_TEST_COMPILED)
- set(AOM_C_PASSED_TESTS "${AOM_C_PASSED_TESTS} ${test_name}"
- CACHE STRING "" FORCE)
- else()
- set(AOM_C_FAILED_TESTS "${AOM_C_FAILED_TESTS} ${test_name}"
- CACHE STRING "" FORCE)
- message("C Compiler test ${test_name} failed.")
- endif()
- elseif(NOT ${C_TEST_PASSED} EQUAL -1)
- set(${result_var} 1 PARENT_SCOPE)
- else() # ${C_TEST_FAILED} NOT EQUAL -1
- unset(${result_var} PARENT_SCOPE)
- endif()
-endfunction()
-
-# Confirms $test_source compiles and stores $test_name in one of
-# $AOM_CXX_PASSED_TESTS or $AOM_CXX_FAILED_TESTS depending on out come. When the
-# test passes $result_var is set to 1. When it fails $result_var is unset. The
-# test is not run if the test name is found in either of the passed or failed
-# test variables.
-function(aom_check_cxx_compiles test_name test_source result_var)
- if(DEBUG_CMAKE_DISABLE_COMPILER_TESTS)
- return()
- endif()
-
- unset(CXX_TEST_PASSED CACHE)
- unset(CXX_TEST_FAILED CACHE)
- string(FIND "${AOM_CXX_PASSED_TESTS}" "${test_name}" CXX_TEST_PASSED)
- string(FIND "${AOM_CXX_FAILED_TESTS}" "${test_name}" CXX_TEST_FAILED)
- if(${CXX_TEST_PASSED} EQUAL -1 AND ${CXX_TEST_FAILED} EQUAL -1)
- unset(CXX_TEST_COMPILED CACHE)
- message("Running CXX compiler test: ${test_name}")
- check_cxx_source_compiles("${test_source} ${AOM_CXX_MAIN}"
- CXX_TEST_COMPILED)
- set(${result_var} ${CXX_TEST_COMPILED} PARENT_SCOPE)
-
- if(CXX_TEST_COMPILED)
- set(AOM_CXX_PASSED_TESTS "${AOM_CXX_PASSED_TESTS} ${test_name}"
- CACHE STRING "" FORCE)
- else()
- set(AOM_CXX_FAILED_TESTS "${AOM_CXX_FAILED_TESTS} ${test_name}"
- CACHE STRING "" FORCE)
- message("CXX Compiler test ${test_name} failed.")
- endif()
- elseif(NOT ${CXX_TEST_PASSED} EQUAL -1)
- set(${result_var} 1 PARENT_SCOPE)
- else() # ${CXX_TEST_FAILED} NOT EQUAL -1
- unset(${result_var} PARENT_SCOPE)
- endif()
-endfunction()
-
-# Convenience function that confirms $test_source compiles as C and C++.
-# $result_var is set to 1 when both tests are successful, and 0 when one or both
-# tests fail. Note: This function is intended to be used to write to result
-# variables that are expanded via configure_file(). $result_var is set to 1 or 0
-# to allow direct usage of the value in generated source files.
-function(aom_check_source_compiles test_name test_source result_var)
- unset(C_PASSED)
- unset(CXX_PASSED)
- aom_check_c_compiles(${test_name} ${test_source} C_PASSED)
- aom_check_cxx_compiles(${test_name} ${test_source} CXX_PASSED)
- if(C_PASSED AND CXX_PASSED)
- set(${result_var} 1 PARENT_SCOPE)
- else()
- set(${result_var} 0 PARENT_SCOPE)
- endif()
-endfunction()
-
-# When inline support is detected for the current compiler the supported
-# inlining keyword is written to $result in caller scope.
-function(aom_get_inline result)
- aom_check_source_compiles("inline_check_1"
- "static inline void function(void) {}"
- HAVE_INLINE_1)
- if(HAVE_INLINE_1 EQUAL 1)
- set(${result} "inline" PARENT_SCOPE)
- return()
- endif()
-
- # Check __inline.
- aom_check_source_compiles("inline_check_2"
- "static __inline void function(void) {}"
- HAVE_INLINE_2)
- if(HAVE_INLINE_2 EQUAL 1)
- set(${result} "__inline" PARENT_SCOPE)
- endif()
-endfunction()
diff --git a/third_party/aom/build/cmake/cpu.cmake b/third_party/aom/build/cmake/cpu.cmake
deleted file mode 100644
index 6e8089e63..000000000
--- a/third_party/aom/build/cmake/cpu.cmake
+++ /dev/null
@@ -1,93 +0,0 @@
-#
-# Copyright (c) 2017, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and the
-# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
-# not distributed with this source code in the LICENSE file, you can obtain it
-# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
-# License 1.0 was not distributed with this source code in the PATENTS file, you
-# can obtain it at www.aomedia.org/license/patent.
-#
-
-if("${AOM_TARGET_CPU}" STREQUAL "arm64")
- set(ARCH_ARM 1)
- set(RTCD_ARCH_ARM "yes")
-
- if(ENABLE_NEON)
- set(HAVE_NEON 1)
- set(RTCD_HAVE_NEON "yes")
- else()
- set(HAVE_NEON 0)
- set(AOM_RTCD_FLAGS ${AOM_RTCD_FLAGS} --disable-neon)
- endif()
-elseif("${AOM_TARGET_CPU}" MATCHES "^armv7")
- set(ARCH_ARM 1)
- set(RTCD_ARCH_ARM "yes")
-
- if(ENABLE_NEON)
- set(HAVE_NEON 1)
- set(RTCD_HAVE_NEON "yes")
- else()
- set(HAVE_NEON 0)
- set(AOM_RTCD_FLAGS ${AOM_RTCD_FLAGS} --disable-neon)
- endif()
-elseif("${AOM_TARGET_CPU}" MATCHES "^mips")
- set(ARCH_MIPS 1)
- set(RTCD_ARCH_MIPS "yes")
-
- if("${AOM_TARGET_CPU}" STREQUAL "mips32")
- set(HAVE_MIPS32 1)
- set(RTCD_HAVE_MIPS32 "yes")
- elseif("${AOM_TARGET_CPU}" STREQUAL "mips64")
- set(HAVE_MIPS64 1)
- set(RTCD_HAVE_MIPS64 "yes")
- endif()
-
- # HAVE_DSPR2 is set by mips toolchain files.
- if(ENABLE_DSPR2 AND HAVE_DSPR2)
- set(RTCD_HAVE_DSPR2 "yes")
- else()
- set(HAVE_DSPR2 0)
- set(AOM_RTCD_FLAGS ${AOM_RTCD_FLAGS} --disable-dspr2)
- endif()
-
- # HAVE_MSA is set by mips toolchain files.
- if(ENABLE_MSA AND HAVE_MSA)
- set(RTCD_HAVE_MSA "yes")
- else()
- set(HAVE_MSA 0)
- set(AOM_RTCD_FLAGS ${AOM_RTCD_FLAGS} --disable-msa)
- endif()
-elseif("${AOM_TARGET_CPU}" MATCHES "ppc")
- set(ARCH_PPC 1)
- set(RTCD_ARCH_PPC "yes")
-
- if(ENABLE_VSX)
- set(HAVE_VSX 1)
- set(RTCD_HAVE_VSX "yes")
- else()
- set(HAVE_VSX 0)
- set(AOM_RTCD_FLAGS ${AOM_RTCD_FLAGS} --disable-vsx)
- endif()
-elseif("${AOM_TARGET_CPU}" MATCHES "^x86")
- if("${AOM_TARGET_CPU}" STREQUAL "x86")
- set(ARCH_X86 1)
- set(RTCD_ARCH_X86 "yes")
- elseif("${AOM_TARGET_CPU}" STREQUAL "x86_64")
- set(ARCH_X86_64 1)
- set(RTCD_ARCH_X86_64 "yes")
- endif()
-
- set(X86_FLAVORS "MMX;SSE;SSE2;SSE3;SSSE3;SSE4_1;SSE4_2;AVX;AVX2")
- foreach(flavor ${X86_FLAVORS})
- if(ENABLE_${flavor} AND NOT disable_remaining_flavors)
- set(HAVE_${flavor} 1)
- set(RTCD_HAVE_${flavor} "yes")
- else()
- set(disable_remaining_flavors 1)
- set(HAVE_${flavor} 0)
- string(TOLOWER ${flavor} flavor)
- set(AOM_RTCD_FLAGS ${AOM_RTCD_FLAGS} --disable-${flavor})
- endif()
- endforeach()
-endif()
diff --git a/third_party/aom/build/cmake/dist.cmake b/third_party/aom/build/cmake/dist.cmake
deleted file mode 100644
index 6f81736f0..000000000
--- a/third_party/aom/build/cmake/dist.cmake
+++ /dev/null
@@ -1,64 +0,0 @@
-#
-# Copyright (c) 2017, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and the
-# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
-# not distributed with this source code in the LICENSE file, you can obtain it
-# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
-# License 1.0 was not distributed with this source code in the PATENTS file, you
-# can obtain it at www.aomedia.org/license/patent.
-#
-cmake_minimum_required(VERSION 3.5)
-
-# Converts spaces in $in_string to semicolons and writes the output to
-# $out_string. In CMake's eyes this converts the input string to a list.
-function(listify_string in_string out_string)
- string(REPLACE " " ";" ${out_string} ${in_string})
- set(${out_string} "${${out_string}}" PARENT_SCOPE)
-endfunction()
-
-set(REQUIRED_ARGS "AOM_ROOT" "AOM_CONFIG_DIR" "AOM_DIST_DIR"
- "AOM_DIST_INCLUDES" "AOM_DIST_LIBS" "ENABLE_DOCS")
-
-foreach(arg ${REQUIRED_ARGS})
- if("${${arg}}" STREQUAL "")
- message(FATAL_ERROR "${arg} must not be empty.")
- endif()
-endforeach()
-
-if(ENABLE_DOCS)
- file(INSTALL "${AOM_CONFIG_DIR}/docs" DESTINATION "${AOM_DIST_DIR}")
-endif()
-
-if(AOM_DIST_EXAMPLES)
- listify_string("${AOM_DIST_EXAMPLES}" "AOM_DIST_EXAMPLES")
- foreach(example ${AOM_DIST_EXAMPLES})
- if(NOT "${example}" MATCHES "aomdec\|aomenc")
- file(INSTALL "${example}" DESTINATION "${AOM_DIST_DIR}/bin/examples")
- endif()
- endforeach()
-endif()
-
-if(AOM_DIST_TOOLS)
- listify_string("${AOM_DIST_TOOLS}" "AOM_DIST_TOOLS")
- foreach(tool ${AOM_DIST_TOOLS})
- file(INSTALL "${tool}" DESTINATION "${AOM_DIST_DIR}/bin/tools")
- endforeach()
-endif()
-
-if(AOM_DIST_APPS)
- listify_string("${AOM_DIST_APPS}" "AOM_DIST_APPS")
- foreach(app ${AOM_DIST_APPS})
- file(INSTALL "${app}" DESTINATION "${AOM_DIST_DIR}/bin")
- endforeach()
-endif()
-
-listify_string("${AOM_DIST_INCLUDES}" "AOM_DIST_INCLUDES")
-foreach(inc ${AOM_DIST_INCLUDES})
- file(INSTALL "${inc}" DESTINATION "${AOM_DIST_DIR}/include/aom")
-endforeach()
-
-listify_string("${AOM_DIST_LIBS}" "AOM_DIST_LIBS")
-foreach(lib ${AOM_DIST_LIBS})
- file(INSTALL "${lib}" DESTINATION "${AOM_DIST_DIR}/lib")
-endforeach()
diff --git a/third_party/aom/build/cmake/exports.cmake b/third_party/aom/build/cmake/exports.cmake
deleted file mode 100644
index e0813dc0f..000000000
--- a/third_party/aom/build/cmake/exports.cmake
+++ /dev/null
@@ -1,65 +0,0 @@
-#
-# Copyright (c) 2017, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and the
-# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
-# not distributed with this source code in the LICENSE file, you can obtain it
-# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
-# License 1.0 was not distributed with this source code in the PATENTS file, you
-# can obtain it at www.aomedia.org/license/patent.
-#
-if(AOM_BUILD_CMAKE_EXPORTS_CMAKE_)
- return()
-endif() # AOM_BUILD_CMAKE_EXPORTS_CMAKE_
-set(AOM_BUILD_CMAKE_EXPORTS_CMAKE_ 1)
-
-include("${AOM_ROOT}/build/cmake/exports_sources.cmake")
-
-# Creates the custom target which handles generation of the symbol export lists.
-function(setup_exports_target)
- if("${AOM_TARGET_SYSTEM}" STREQUAL "Darwin")
- set(symbol_file_ext "syms")
- elseif("${AOM_TARGET_SYSTEM}" MATCHES "Windows\|MSYS" AND MSVC)
- set(symbol_file_ext "def")
- else()
- set(symbol_file_ext "ver")
- endif()
-
- set(aom_sym_file "${AOM_CONFIG_DIR}/libaom.${symbol_file_ext}")
-
- add_custom_target(generate_exports
- COMMAND ${CMAKE_COMMAND} -DAOM_ROOT="${AOM_ROOT}"
- -DAOM_CONFIG_DIR="${AOM_CONFIG_DIR}"
- -DAOM_TARGET_SYSTEM=${AOM_TARGET_SYSTEM}
- -DAOM_SYM_FILE="${aom_sym_file}" -DAOM_MSVC=${MSVC}
- -DAOM_XCODE=${XCODE} -DCONFIG_NAME=$<CONFIG>
- -DCONFIG_AV1_DECODER=${CONFIG_AV1_DECODER}
- -DCONFIG_AV1_ENCODER=${CONFIG_AV1_ENCODER}
- -DENABLE_TESTS=${ENABLE_TESTS} -P
- "${AOM_ROOT}/build/cmake/generate_exports.cmake"
- SOURCES ${AOM_EXPORTS_SOURCES}
- DEPENDS ${AOM_EXPORTS_SOURCES})
-
- # Make libaom depend on the exports file, and set flags to pick it up when
- # creating the dylib.
- add_dependencies(aom generate_exports)
-
- if(APPLE)
- set_property(TARGET aom APPEND_STRING
- PROPERTY LINK_FLAGS "-exported_symbols_list ${aom_sym_file}")
- elseif(WIN32)
- if(NOT MSVC)
- set_property(TARGET aom APPEND_STRING
- PROPERTY LINK_FLAGS "-Wl,--version-script ${aom_sym_file}")
- else()
- set_property(TARGET aom APPEND_STRING
- PROPERTY LINK_FLAGS "/DEF:${aom_sym_file}")
- endif()
-
- # TODO(tomfinegan): Sort out the import lib situation and flags for MSVC.
-
- else()
- set_property(TARGET aom APPEND_STRING
- PROPERTY LINK_FLAGS "-Wl,--version-script,${aom_sym_file}")
- endif()
-endfunction()
diff --git a/third_party/aom/build/cmake/exports_sources.cmake b/third_party/aom/build/cmake/exports_sources.cmake
deleted file mode 100644
index 576920e36..000000000
--- a/third_party/aom/build/cmake/exports_sources.cmake
+++ /dev/null
@@ -1,32 +0,0 @@
-#
-# Copyright (c) 2017, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and the
-# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
-# not distributed with this source code in the LICENSE file, you can obtain it
-# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
-# License 1.0 was not distributed with this source code in the PATENTS file, you
-# can obtain it at www.aomedia.org/license/patent.
-#
-if(AOM_BUILD_CMAKE_EXPORTS_SOURCES_CMAKE_)
- return()
-endif() # AOM_BUILD_CMAKE_EXPORTS_SOURCES_CMAKE_
-set(AOM_BUILD_CMAKE_EXPORTS_SOURCES_CMAKE_ 1)
-
-list(APPEND AOM_EXPORTS_SOURCES "${AOM_ROOT}/aom/exports_com"
- "${AOM_ROOT}/av1/exports_com")
-
-if(CONFIG_AV1_DECODER)
- list(APPEND AOM_EXPORTS_SOURCES "${AOM_ROOT}/aom/exports_dec"
- "${AOM_ROOT}/av1/exports_dec")
-endif()
-
-if(CONFIG_AV1_ENCODER)
- list(APPEND AOM_EXPORTS_SOURCES "${AOM_ROOT}/aom/exports_enc"
- "${AOM_ROOT}/av1/exports_enc")
-endif()
-
-if(ENABLE_TESTS)
- list(APPEND AOM_EXPORTS_SOURCES "${AOM_ROOT}/aom/exports_test"
- "${AOM_ROOT}/av1/exports_test")
-endif()
diff --git a/third_party/aom/build/cmake/generate_aom_config_templates.cmake b/third_party/aom/build/cmake/generate_aom_config_templates.cmake
deleted file mode 100644
index b91c036de..000000000
--- a/third_party/aom/build/cmake/generate_aom_config_templates.cmake
+++ /dev/null
@@ -1,101 +0,0 @@
-#
-# Copyright (c) 2017, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and the
-# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
-# not distributed with this source code in the LICENSE file, you can obtain it
-# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
-# License 1.0 was not distributed with this source code in the PATENTS file, you
-# can obtain it at www.aomedia.org/license/patent.
-#
-cmake_minimum_required(VERSION 3.5)
-
-string(TIMESTAMP year "%Y")
-set(
- asm_file_header_block
- "\;
-\; Copyright (c) ${year}, Alliance for Open Media. All rights reserved
-\;
-\; This source code is subject to the terms of the BSD 2 Clause License and
-\; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-\; was not distributed with this source code in the LICENSE file, you can
-\; obtain it at www.aomedia.org/license/software. If the Alliance for Open
-\; Media Patent License 1.0 was not distributed with this source code in the
-\; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-\;
-"
- )
-set(
- h_file_header_block
- "/*
- * Copyright (c) ${year}, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-\#ifndef AOM_CONFIG_H_
-\#define AOM_CONFIG_H_
-"
- )
-set(
- cmake_file_header_block
- "##
-## Copyright (c) ${year}, Alliance for Open Media. All rights reserved
-##
-## This source code is subject to the terms of the BSD 2 Clause License and
-## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-## was not distributed with this source code in the LICENSE file, you can
-## obtain it at www.aomedia.org/license/software. If the Alliance for Open
-## Media Patent License 1.0 was not distributed with this source code in the
-## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-##
-"
- )
-
-# Terminates cmake execution when $var_name is an empty string, or the variable
-# name it contains does not expand to an existing directory.
-function(check_directory_var var_name)
- if("${var_name}" STREQUAL "")
- message(FATAL_ERROR "The CMake variable ${var_name} must be defined.")
- endif()
-
- if(NOT EXISTS "${${var_name}}")
- message(FATAL_ERROR "${${var_name}} (${var_name}) missing.")
- endif()
-endfunction()
-
-check_directory_var(AOM_CONFIG_DIR)
-check_directory_var(AOM_ROOT)
-
-set(AOM_DEFAULTS "${AOM_ROOT}/build/cmake/aom_config_defaults.cmake")
-if(NOT EXISTS "${AOM_DEFAULTS}")
- message(FATAL_ERROR
- "Configuration default values file (${AOM_DEFAULTS}) missing.")
-endif()
-
-include("${AOM_ROOT}/build/cmake/aom_config_defaults.cmake")
-list(APPEND aom_build_vars ${AOM_DETECT_VARS} ${AOM_CONFIG_VARS})
-list(SORT aom_build_vars)
-
-set(aom_config_h_template "${AOM_CONFIG_DIR}/config/aom_config.h.cmake")
-file(WRITE "${aom_config_h_template}" ${h_file_header_block})
-foreach(aom_var ${aom_build_vars})
- if(NOT "${aom_var}" STREQUAL "AOM_RTCD_FLAGS")
- file(APPEND "${aom_config_h_template}"
- "\#define ${aom_var} \${${aom_var}}\n")
- endif()
-endforeach()
-file(APPEND "${aom_config_h_template}" "\#endif // AOM_CONFIG_H_")
-
-set(aom_asm_config_template "${AOM_CONFIG_DIR}/config/aom_config.asm.cmake")
-file(WRITE "${aom_asm_config_template}" ${asm_file_header_block})
-foreach(aom_var ${aom_build_vars})
- if(NOT "${aom_var}" STREQUAL "INLINE" AND NOT "${aom_var}" STREQUAL
- "AOM_RTCD_FLAGS")
- file(APPEND "${aom_asm_config_template}" "${aom_var} equ \${${aom_var}}\n")
- endif()
-endforeach()
diff --git a/third_party/aom/build/cmake/generate_exports.cmake b/third_party/aom/build/cmake/generate_exports.cmake
deleted file mode 100644
index 7ab5aaef8..000000000
--- a/third_party/aom/build/cmake/generate_exports.cmake
+++ /dev/null
@@ -1,66 +0,0 @@
-#
-# Copyright (c) 2017, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and the
-# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
-# not distributed with this source code in the LICENSE file, you can obtain it
-# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
-# License 1.0 was not distributed with this source code in the PATENTS file, you
-# can obtain it at www.aomedia.org/license/patent.
-#
-cmake_minimum_required(VERSION 3.5)
-
-set(REQUIRED_ARGS "AOM_ROOT" "AOM_CONFIG_DIR" "AOM_TARGET_SYSTEM"
- "AOM_SYM_FILE" "CONFIG_AV1_DECODER" "CONFIG_AV1_ENCODER")
-
-foreach(arg ${REQUIRED_ARGS})
- if("${${arg}}" STREQUAL "")
- message(FATAL_ERROR "${arg} must not be empty.")
- endif()
-endforeach()
-
-include("${AOM_ROOT}/build/cmake/exports_sources.cmake")
-
-if("${AOM_TARGET_SYSTEM}" STREQUAL "Darwin")
- set(symbol_prefix "_")
-elseif("${AOM_TARGET_SYSTEM}" MATCHES "Windows\|MSYS" AND AOM_MSVC)
- file(WRITE "${AOM_SYM_FILE}" "LIBRARY aom\n" "EXPORTS\n")
-else()
- set(symbol_suffix ";")
-endif()
-
-set(aom_sym_file "${AOM_SYM_FILE}")
-
-if("${AOM_TARGET_SYSTEM}" STREQUAL "Darwin")
- file(REMOVE "${aom_sym_file}")
-elseif("${AOM_TARGET_SYSTEM}" MATCHES "Windows\|MSYS" AND AOM_MSVC)
- file(WRITE "${aom_sym_file}" "LIBRARY aom\n" "EXPORTS\n")
-else()
- file(WRITE "${aom_sym_file}" "{\nglobal:\n")
-endif()
-
-foreach(export_file ${AOM_EXPORTS_SOURCES})
- file(STRINGS "${export_file}" exported_file_data)
- set(exported_symbols "${exported_symbols} ${exported_file_data};")
- string(STRIP "${exported_symbols}" exported_symbols)
-endforeach()
-
-foreach(exported_symbol ${exported_symbols})
- string(STRIP "${exported_symbol}" exported_symbol)
- if("${AOM_TARGET_SYSTEM}" MATCHES "Windows\|MSYS" AND AOM_MSVC)
- string(SUBSTRING ${exported_symbol} 0 4 export_type)
- string(COMPARE EQUAL "${export_type}" "data" is_data)
- if(is_data)
- set(symbol_suffix " DATA")
- else()
- set(symbol_suffix "")
- endif()
- endif()
- string(REGEX REPLACE "text \|data " "" "exported_symbol" "${exported_symbol}")
- set(exported_symbol " ${symbol_prefix}${exported_symbol}${symbol_suffix}")
- file(APPEND "${aom_sym_file}" "${exported_symbol}\n")
-endforeach()
-
-if("${aom_sym_file}" MATCHES "ver$")
- file(APPEND "${aom_sym_file}" " \nlocal:\n *;\n};")
-endif()
diff --git a/third_party/aom/build/cmake/ios-Info.plist b/third_party/aom/build/cmake/ios-Info.plist
deleted file mode 100644
index 300e3e310..000000000
--- a/third_party/aom/build/cmake/ios-Info.plist
+++ /dev/null
@@ -1,37 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
-<plist version="1.0">
-<dict>
- <key>CFBundleDevelopmentRegion</key>
- <string>en</string>
- <key>CFBundleExecutable</key>
- <string>AOM</string>
- <key>CFBundleIdentifier</key>
- <string>org.webmproject.AOM</string>
- <key>CFBundleInfoDictionaryVersion</key>
- <string>6.0</string>
- <key>CFBundleName</key>
- <string>AOM</string>
- <key>CFBundlePackageType</key>
- <string>FMWK</string>
- <key>CFBundleShortVersionString</key>
- <string>${VERSION}</string>
- <key>CFBundleSignature</key>
- <string>????</string>
- <key>CFBundleSupportedPlatforms</key>
- <array>
- <string>iPhoneOS</string>
- </array>
- <key>CFBundleVersion</key>
- <string>${VERSION}</string>
- <key>MinimumOSVersion</key>
- <string>${IOS_VERSION_MIN}</string>
- <key>UIDeviceFamily</key>
- <array>
- <integer>1</integer>
- <integer>2</integer>
- </array>
- <key>AOMFullVersion</key>
- <string>${FULLVERSION}</string>
-</dict>
-</plist>
diff --git a/third_party/aom/build/cmake/iosbuild.sh b/third_party/aom/build/cmake/iosbuild.sh
deleted file mode 100755
index 167ece200..000000000
--- a/third_party/aom/build/cmake/iosbuild.sh
+++ /dev/null
@@ -1,384 +0,0 @@
-#!/bin/sh
-## Copyright (c) 2016, Alliance for Open Media. All rights reserved
-##
-## This source code is subject to the terms of the BSD 2 Clause License and
-## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-## was not distributed with this source code in the LICENSE file, you can
-## obtain it at www.aomedia.org/license/software. If the Alliance for Open
-## Media Patent License 1.0 was not distributed with this source code in the
-## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-##
-## This script generates 'AOM.framework'. An iOS app can encode and decode AVx
-## video by including 'AOM.framework'.
-##
-## Run iosbuild.sh to create 'AOM.framework' in the current directory.
-##
-set -e
-devnull='> /dev/null 2>&1'
-
-BUILD_ROOT="_iosbuild"
-CONFIGURE_ARGS="--disable-docs
- --disable-examples
- --disable-libyuv
- --disable-unit-tests"
-DIST_DIR="_dist"
-FRAMEWORK_DIR="AOM.framework"
-FRAMEWORK_LIB="AOM.framework/AOM"
-HEADER_DIR="${FRAMEWORK_DIR}/Headers/aom"
-SCRIPT_DIR=$(dirname "$0")
-LIBAOM_SOURCE_DIR=$(cd ${SCRIPT_DIR}/../..; pwd)
-LIPO=$(xcrun -sdk iphoneos${SDK} -find lipo)
-ORIG_PWD="$(pwd)"
-ARM_TARGETS="arm64-darwin-gcc
- armv7-darwin-gcc
- armv7s-darwin-gcc"
-SIM_TARGETS="x86-iphonesimulator-gcc
- x86_64-iphonesimulator-gcc"
-OSX_TARGETS="x86-darwin16-gcc
- x86_64-darwin16-gcc"
-TARGETS="${ARM_TARGETS} ${SIM_TARGETS}"
-
-# Configures for the target specified by $1, and invokes make with the dist
-# target using $ as the distribution output directory.
-build_target() {
- local target="$1"
- local old_pwd="$(pwd)"
- local target_specific_flags=""
-
- vlog "***Building target: ${target}***"
-
- case "${target}" in
- x86-*)
- target_specific_flags="--enable-pic"
- vlog "Enabled PIC for ${target}"
- ;;
- esac
-
- mkdir "${target}"
- cd "${target}"
- # TODO(tomfinegan@google.com): switch to cmake.
- eval "${LIBAOM_SOURCE_DIR}/configure" --target="${target}" \
- ${CONFIGURE_ARGS} ${EXTRA_CONFIGURE_ARGS} ${target_specific_flags} \
- ${devnull}
- export DIST_DIR
- eval make dist ${devnull}
- cd "${old_pwd}"
-
- vlog "***Done building target: ${target}***"
-}
-
-# Returns the preprocessor symbol for the target specified by $1.
-target_to_preproc_symbol() {
- target="$1"
- case "${target}" in
- arm64-*)
- echo "__aarch64__"
- ;;
- armv7-*)
- echo "__ARM_ARCH_7A__"
- ;;
- armv7s-*)
- echo "__ARM_ARCH_7S__"
- ;;
- x86-*)
- echo "__i386__"
- ;;
- x86_64-*)
- echo "__x86_64__"
- ;;
- *)
- echo "#error ${target} unknown/unsupported"
- return 1
- ;;
- esac
-}
-
-# Create a aom_config.h shim that, based on preprocessor settings for the
-# current target CPU, includes the real aom_config.h for the current target.
-# $1 is the list of targets.
-create_aom_framework_config_shim() {
- local targets="$1"
- local config_file="${HEADER_DIR}/aom_config.h"
- local preproc_symbol=""
- local target=""
- local include_guard="AOM_FRAMEWORK_HEADERS_AOM_AOM_CONFIG_H_"
-
- local file_header="/*
- * Copyright (c) $(date +%Y), Alliance for Open Media. All rights reserved.
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-/* GENERATED FILE: DO NOT EDIT! */
-
-#ifndef ${include_guard}
-#define ${include_guard}
-
-#if defined"
-
- printf "%s" "${file_header}" > "${config_file}"
- for target in ${targets}; do
- preproc_symbol=$(target_to_preproc_symbol "${target}")
- printf " ${preproc_symbol}\n" >> "${config_file}"
- printf "#define AOM_FRAMEWORK_TARGET \"${target}\"\n" >> "${config_file}"
- printf "#include \"AOM/aom/${target}/aom_config.h\"\n" >> "${config_file}"
- printf "#elif defined" >> "${config_file}"
- mkdir "${HEADER_DIR}/${target}"
- cp -p "${BUILD_ROOT}/${target}/aom_config.h" "${HEADER_DIR}/${target}"
- done
-
- # Consume the last line of output from the loop: We don't want it.
- sed -i '' -e '$d' "${config_file}"
-
- printf "#endif\n\n" >> "${config_file}"
- printf "#endif // ${include_guard}" >> "${config_file}"
-}
-
-# Verifies that $FRAMEWORK_LIB fat library contains requested builds.
-verify_framework_targets() {
- local requested_cpus=""
- local cpu=""
-
- # Extract CPU from full target name.
- for target; do
- cpu="${target%%-*}"
- if [ "${cpu}" = "x86" ]; then
- # lipo -info outputs i386 for libaom x86 targets.
- cpu="i386"
- fi
- requested_cpus="${requested_cpus}${cpu} "
- done
-
- # Get target CPUs present in framework library.
- local targets_built=$(${LIPO} -info ${FRAMEWORK_LIB})
-
- # $LIPO -info outputs a string like the following:
- # Architectures in the fat file: $FRAMEWORK_LIB <architectures>
- # Capture only the architecture strings.
- targets_built=${targets_built##*: }
-
- # Sort CPU strings to make the next step a simple string compare.
- local actual=$(echo ${targets_built} | tr " " "\n" | sort | tr "\n" " ")
- local requested=$(echo ${requested_cpus} | tr " " "\n" | sort | tr "\n" " ")
-
- vlog "Requested ${FRAMEWORK_LIB} CPUs: ${requested}"
- vlog "Actual ${FRAMEWORK_LIB} CPUs: ${actual}"
-
- if [ "${requested}" != "${actual}" ]; then
- elog "Actual ${FRAMEWORK_LIB} targets do not match requested target list."
- elog " Requested target CPUs: ${requested}"
- elog " Actual target CPUs: ${actual}"
- return 1
- fi
-}
-
-# Configures and builds each target specified by $1, and then builds
-# AOM.framework.
-build_framework() {
- local lib_list=""
- local targets="$1"
- local target=""
- local target_dist_dir=""
-
- # Clean up from previous build(s).
- rm -rf "${BUILD_ROOT}" "${FRAMEWORK_DIR}"
-
- # Create output dirs.
- mkdir -p "${BUILD_ROOT}"
- mkdir -p "${HEADER_DIR}"
-
- cd "${BUILD_ROOT}"
-
- for target in ${targets}; do
- build_target "${target}"
- target_dist_dir="${BUILD_ROOT}/${target}/${DIST_DIR}"
- if [ "${ENABLE_SHARED}" = "yes" ]; then
- local suffix="dylib"
- else
- local suffix="a"
- fi
- lib_list="${lib_list} ${target_dist_dir}/lib/libaom.${suffix}"
- done
-
- cd "${ORIG_PWD}"
-
- # The basic libaom API includes are all the same; just grab the most recent
- # set.
- cp -p "${target_dist_dir}"/include/aom/* "${HEADER_DIR}"
-
- # Build the fat library.
- ${LIPO} -create ${lib_list} -output ${FRAMEWORK_DIR}/AOM
-
- # Create the aom_config.h shim that allows usage of aom_config.h from
- # within AOM.framework.
- create_aom_framework_config_shim "${targets}"
-
- # Copy in aom_version.h.
- cp -p "${BUILD_ROOT}/${target}/aom_version.h" "${HEADER_DIR}"
-
- if [ "${ENABLE_SHARED}" = "yes" ]; then
- # Adjust the dylib's name so dynamic linking in apps works as expected.
- install_name_tool -id '@rpath/AOM.framework/AOM' ${FRAMEWORK_DIR}/AOM
-
- # Copy in Info.plist.
- cat "${SCRIPT_DIR}/ios-Info.plist" \
- | sed "s/\${FULLVERSION}/${FULLVERSION}/g" \
- | sed "s/\${VERSION}/${VERSION}/g" \
- | sed "s/\${IOS_VERSION_MIN}/${IOS_VERSION_MIN}/g" \
- > "${FRAMEWORK_DIR}/Info.plist"
- fi
-
- # Confirm AOM.framework/AOM contains the targets requested.
- verify_framework_targets ${targets}
-
- vlog "Created fat library ${FRAMEWORK_LIB} containing:"
- for lib in ${lib_list}; do
- vlog " $(echo ${lib} | awk -F / '{print $2, $NF}')"
- done
-}
-
-# Trap function. Cleans up the subtree used to build all targets contained in
-# $TARGETS.
-cleanup() {
- local res=$?
- cd "${ORIG_PWD}"
-
- if [ $res -ne 0 ]; then
- elog "build exited with error ($res)"
- fi
-
- if [ "${PRESERVE_BUILD_OUTPUT}" != "yes" ]; then
- rm -rf "${BUILD_ROOT}"
- fi
-}
-
-print_list() {
- local indent="$1"
- shift
- local list="$@"
- for entry in ${list}; do
- echo "${indent}${entry}"
- done
-}
-
-iosbuild_usage() {
-cat << EOF
- Usage: ${0##*/} [arguments]
- --help: Display this message and exit.
- --enable-shared: Build a dynamic framework for use on iOS 8 or later.
- --extra-configure-args <args>: Extra args to pass when configuring libaom.
- --macosx: Uses darwin16 targets instead of iphonesimulator targets for x86
- and x86_64. Allows linking to framework when builds target MacOSX
- instead of iOS.
- --preserve-build-output: Do not delete the build directory.
- --show-build-output: Show output from each library build.
- --targets <targets>: Override default target list. Defaults:
-$(print_list " " ${TARGETS})
- --test-link: Confirms all targets can be linked. Functionally identical to
- passing --enable-examples via --extra-configure-args.
- --verbose: Output information about the environment and each stage of the
- build.
-EOF
-}
-
-elog() {
- echo "${0##*/} failed because: $@" 1>&2
-}
-
-vlog() {
- if [ "${VERBOSE}" = "yes" ]; then
- echo "$@"
- fi
-}
-
-trap cleanup EXIT
-
-# Parse the command line.
-while [ -n "$1" ]; do
- case "$1" in
- --extra-configure-args)
- EXTRA_CONFIGURE_ARGS="$2"
- shift
- ;;
- --help)
- iosbuild_usage
- exit
- ;;
- --enable-shared)
- ENABLE_SHARED=yes
- ;;
- --preserve-build-output)
- PRESERVE_BUILD_OUTPUT=yes
- ;;
- --show-build-output)
- devnull=
- ;;
- --test-link)
- EXTRA_CONFIGURE_ARGS="${EXTRA_CONFIGURE_ARGS} --enable-examples"
- ;;
- --targets)
- TARGETS="$2"
- shift
- ;;
- --macosx)
- TARGETS="${ARM_TARGETS} ${OSX_TARGETS}"
- ;;
- --verbose)
- VERBOSE=yes
- ;;
- *)
- iosbuild_usage
- exit 1
- ;;
- esac
- shift
-done
-
-if [ "${ENABLE_SHARED}" = "yes" ]; then
- CONFIGURE_ARGS="--enable-shared ${CONFIGURE_ARGS}"
-fi
-
-FULLVERSION=$("${SCRIPT_DIR}"/version.sh --bare "${LIBAOM_SOURCE_DIR}")
-VERSION=$(echo "${FULLVERSION}" | sed -E 's/^v([0-9]+\.[0-9]+\.[0-9]+).*$/\1/')
-
-if [ "$ENABLE_SHARED" = "yes" ]; then
- IOS_VERSION_OPTIONS="--enable-shared"
- IOS_VERSION_MIN="8.0"
-else
- IOS_VERSION_OPTIONS=""
- IOS_VERSION_MIN="6.0"
-fi
-
-if [ "${VERBOSE}" = "yes" ]; then
-cat << EOF
- BUILD_ROOT=${BUILD_ROOT}
- DIST_DIR=${DIST_DIR}
- CONFIGURE_ARGS=${CONFIGURE_ARGS}
- EXTRA_CONFIGURE_ARGS=${EXTRA_CONFIGURE_ARGS}
- FRAMEWORK_DIR=${FRAMEWORK_DIR}
- FRAMEWORK_LIB=${FRAMEWORK_LIB}
- HEADER_DIR=${HEADER_DIR}
- LIBAOM_SOURCE_DIR=${LIBAOM_SOURCE_DIR}
- LIPO=${LIPO}
- MAKEFLAGS=${MAKEFLAGS}
- ORIG_PWD=${ORIG_PWD}
- PRESERVE_BUILD_OUTPUT=${PRESERVE_BUILD_OUTPUT}
- TARGETS="$(print_list "" ${TARGETS})"
- ENABLE_SHARED=${ENABLE_SHARED}
- OSX_TARGETS="${OSX_TARGETS}"
- SIM_TARGETS="${SIM_TARGETS}"
- SCRIPT_DIR="${SCRIPT_DIR}"
- FULLVERSION="${FULLVERSION}"
- VERSION="${VERSION}"
- IOS_VERSION_MIN="${IOS_VERSION_MIN}"
-EOF
-fi
-
-build_framework "${TARGETS}"
-echo "Successfully built '${FRAMEWORK_DIR}' for:"
-print_list "" ${TARGETS}
diff --git a/third_party/aom/build/cmake/msvc_runtime.cmake b/third_party/aom/build/cmake/msvc_runtime.cmake
deleted file mode 100644
index 9e4cbea43..000000000
--- a/third_party/aom/build/cmake/msvc_runtime.cmake
+++ /dev/null
@@ -1,37 +0,0 @@
-#
-# Copyright (c) 2017, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and the
-# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
-# not distributed with this source code in the LICENSE file, you can obtain it
-# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
-# License 1.0 was not distributed with this source code in the PATENTS file, you
-# can obtain it at www.aomedia.org/license/patent.
-#
-if(AOM_BUILD_CMAKE_MSVC_RUNTIME_CMAKE_)
- return()
-endif() # AOM_BUILD_CMAKE_MSVC_RUNTIME_CMAKE_
-set(AOM_BUILD_CMAKE_MSVC_RUNTIME_CMAKE_ 1)
-
-if(MSVC)
-
- # CMake defaults to producing code linked to the DLL MSVC runtime. That will
- # not work with googletest, and isn't what we want anyway.
- if(NOT "${MSVC_RUNTIME}" STREQUAL "dll")
- foreach(flag_var
- CMAKE_C_FLAGS
- CMAKE_C_FLAGS_DEBUG
- CMAKE_C_FLAGS_RELEASE
- CMAKE_C_FLAGS_MINSIZEREL
- CMAKE_C_FLAGS_RELWITHDEBINFO
- CMAKE_CXX_FLAGS
- CMAKE_CXX_FLAGS_DEBUG
- CMAKE_CXX_FLAGS_RELEASE
- CMAKE_CXX_FLAGS_MINSIZEREL
- CMAKE_CXX_FLAGS_RELWITHDEBINFO)
- if(${flag_var} MATCHES "/MD")
- string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
- endif(${flag_var} MATCHES "/MD")
- endforeach(flag_var)
- endif()
-endif()
diff --git a/third_party/aom/build/cmake/pkg_config.cmake b/third_party/aom/build/cmake/pkg_config.cmake
deleted file mode 100644
index 64e20214e..000000000
--- a/third_party/aom/build/cmake/pkg_config.cmake
+++ /dev/null
@@ -1,58 +0,0 @@
-#
-# Copyright (c) 2017, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and the
-# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
-# not distributed with this source code in the LICENSE file, you can obtain it
-# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
-# License 1.0 was not distributed with this source code in the PATENTS file, you
-# can obtain it at www.aomedia.org/license/patent.
-#
-cmake_minimum_required(VERSION 3.5)
-
-set(REQUIRED_ARGS "AOM_ROOT" "AOM_CONFIG_DIR" "CMAKE_INSTALL_PREFIX"
- "CMAKE_PROJECT_NAME" "CONFIG_MULTITHREAD" "HAVE_PTHREAD_H")
-
-foreach(arg ${REQUIRED_ARGS})
- if("${${arg}}" STREQUAL "")
- message(FATAL_ERROR "${arg} must not be empty.")
- endif()
-endforeach()
-
-include("${AOM_ROOT}/build/cmake/util.cmake")
-
-extract_version_string("${AOM_CONFIG_DIR}/config/aom_version.h" aom_version)
-
-# Create a version string suitable for comparison using the RPM version compare
-# algorithm: strip out everything after the number.
-string(FIND "${aom_version}" "-" dash_pos)
-if(${dash_pos} EQUAL -1)
- set(package_version "${aom_version}")
-else()
- string(SUBSTRING "${aom_version}" 0 ${dash_pos} package_version)
-endif()
-
-# Write pkg-config info.
-set(prefix "${CMAKE_INSTALL_PREFIX}")
-set(pkgconfig_file "${AOM_CONFIG_DIR}/aom.pc")
-string(TOLOWER ${CMAKE_PROJECT_NAME} pkg_name)
-file(WRITE "${pkgconfig_file}" "# libaom pkg-config.\n")
-file(APPEND "${pkgconfig_file}" "prefix=${prefix}\n")
-file(APPEND "${pkgconfig_file}" "exec_prefix=\${prefix}/bin\n")
-file(APPEND "${pkgconfig_file}" "libdir=\${prefix}/lib\n")
-file(APPEND "${pkgconfig_file}" "includedir=\${prefix}/include\n\n")
-file(APPEND "${pkgconfig_file}" "Name: ${pkg_name}\n")
-file(APPEND "${pkgconfig_file}"
- "Description: AV1 codec library v${aom_version}.\n")
-file(APPEND "${pkgconfig_file}" "Version: ${package_version}\n")
-file(APPEND "${pkgconfig_file}" "Requires:\n")
-file(APPEND "${pkgconfig_file}" "Conflicts:\n")
-if(CONFIG_MULTITHREAD AND HAVE_PTHREAD_H)
- file(APPEND "${pkgconfig_file}"
- "Libs: -L\${prefix}/lib -l${pkg_name} -lm -lpthread\n")
- file(APPEND "${pkgconfig_file}" "Libs.private: -lm -lpthread\n")
-else()
- file(APPEND "${pkgconfig_file}" "Libs: -L\${prefix}/lib -l${pkg_name} -lm\n")
- file(APPEND "${pkgconfig_file}" "Libs.private: -lm\n")
-endif()
-file(APPEND "${pkgconfig_file}" "Cflags: -I\${prefix}/include\n")
diff --git a/third_party/aom/build/cmake/rtcd.pl b/third_party/aom/build/cmake/rtcd.pl
deleted file mode 100755
index 46e06907c..000000000
--- a/third_party/aom/build/cmake/rtcd.pl
+++ /dev/null
@@ -1,467 +0,0 @@
-#!/usr/bin/env perl
-##
-## Copyright (c) 2017, Alliance for Open Media. All rights reserved
-##
-## This source code is subject to the terms of the BSD 2 Clause License and
-## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-## was not distributed with this source code in the LICENSE file, you can
-## obtain it at www.aomedia.org/license/software. If the Alliance for Open
-## Media Patent License 1.0 was not distributed with this source code in the
-## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-##
-no strict 'refs';
-use warnings;
-use Getopt::Long;
-Getopt::Long::Configure("auto_help") if $Getopt::Long::VERSION > 2.32;
-
-my %ALL_FUNCS = ();
-my @ALL_ARCHS;
-my @ALL_FORWARD_DECLS;
-my @REQUIRES;
-
-my %opts = ();
-my %disabled = ();
-my %required = ();
-
-my @argv;
-foreach (@ARGV) {
- $disabled{$1} = 1, next if /--disable-(.*)/;
- $required{$1} = 1, next if /--require-(.*)/;
- push @argv, $_;
-}
-
-# NB: use GetOptions() instead of GetOptionsFromArray() for compatibility.
-@ARGV = @argv;
-GetOptions(
- \%opts,
- 'arch=s',
- 'sym=s',
- 'config=s',
-);
-
-foreach my $opt (qw/arch config/) {
- if (!defined($opts{$opt})) {
- warn "--$opt is required!\n";
- Getopt::Long::HelpMessage('-exit' => 1);
- }
-}
-
-foreach my $defs_file (@ARGV) {
- if (!-f $defs_file) {
- warn "$defs_file: $!\n";
- Getopt::Long::HelpMessage('-exit' => 1);
- }
-}
-
-open CONFIG_FILE, $opts{config} or
- die "Error opening config file '$opts{config}': $!\n";
-
-my %config = ();
-while (<CONFIG_FILE>) {
- next if !/^#define\s+(?:CONFIG_|HAVE_)/;
- chomp;
- my @line_components = split /\s/;
- scalar @line_components > 2 or
- die "Invalid input passed to rtcd.pl via $opts{config}.";
- # $line_components[0] = #define
- # $line_components[1] = flag name (CONFIG_SOMETHING or HAVE_SOMETHING)
- # $line_components[2] = flag value (0 or 1)
- $config{$line_components[1]} = "$line_components[2]" eq "1" ? "yes" : "";
-}
-close CONFIG_FILE;
-
-#
-# Routines for the RTCD DSL to call
-#
-sub aom_config($) {
- return (defined $config{$_[0]}) ? $config{$_[0]} : "";
-}
-
-sub specialize {
- if (@_ <= 1) {
- die "'specialize' must be called with a function name and at least one ",
- "architecture ('C' is implied): \n@_\n";
- }
- my $fn=$_[0];
- shift;
- foreach my $opt (@_) {
- eval "\$${fn}_${opt}=${fn}_${opt}";
- }
-}
-
-sub add_proto {
- my $fn = splice(@_, -2, 1);
- $ALL_FUNCS{$fn} = \@_;
- specialize $fn, "c";
-}
-
-sub require {
- foreach my $fn (keys %ALL_FUNCS) {
- foreach my $opt (@_) {
- my $ofn = eval "\$${fn}_${opt}";
- next if !$ofn;
-
- # if we already have a default, then we can disable it, as we know
- # we can do better.
- my $best = eval "\$${fn}_default";
- if ($best) {
- my $best_ofn = eval "\$${best}";
- if ($best_ofn && "$best_ofn" ne "$ofn") {
- eval "\$${best}_link = 'false'";
- }
- }
- eval "\$${fn}_default=${fn}_${opt}";
- eval "\$${fn}_${opt}_link='true'";
- }
- }
-}
-
-sub forward_decls {
- push @ALL_FORWARD_DECLS, @_;
-}
-
-#
-# Include the user's directives
-#
-foreach my $f (@ARGV) {
- open FILE, "<", $f or die "cannot open $f: $!\n";
- my $contents = join('', <FILE>);
- close FILE;
- eval $contents or warn "eval failed: $@\n";
-}
-
-#
-# Process the directives according to the command line
-#
-sub process_forward_decls() {
- foreach (@ALL_FORWARD_DECLS) {
- $_->();
- }
-}
-
-sub determine_indirection {
- aom_config("CONFIG_RUNTIME_CPU_DETECT") eq "yes" or &require(@ALL_ARCHS);
- foreach my $fn (keys %ALL_FUNCS) {
- my $n = "";
- my @val = @{$ALL_FUNCS{$fn}};
- my $args = pop @val;
- my $rtyp = "@val";
- my $dfn = eval "\$${fn}_default";
- $dfn = eval "\$${dfn}";
- foreach my $opt (@_) {
- my $ofn = eval "\$${fn}_${opt}";
- next if !$ofn;
- my $link = eval "\$${fn}_${opt}_link";
- next if $link && $link eq "false";
- $n .= "x";
- }
- if ($n eq "x") {
- eval "\$${fn}_indirect = 'false'";
- } else {
- eval "\$${fn}_indirect = 'true'";
- }
- }
-}
-
-sub declare_function_pointers {
- foreach my $fn (sort keys %ALL_FUNCS) {
- my @val = @{$ALL_FUNCS{$fn}};
- my $args = pop @val;
- my $rtyp = "@val";
- my $dfn = eval "\$${fn}_default";
- $dfn = eval "\$${dfn}";
- foreach my $opt (@_) {
- my $ofn = eval "\$${fn}_${opt}";
- next if !$ofn;
- print "$rtyp ${ofn}($args);\n";
- }
- if (eval "\$${fn}_indirect" eq "false") {
- print "#define ${fn} ${dfn}\n";
- } else {
- print "RTCD_EXTERN $rtyp (*${fn})($args);\n";
- }
- print "\n";
- }
-}
-
-sub set_function_pointers {
- foreach my $fn (sort keys %ALL_FUNCS) {
- my @val = @{$ALL_FUNCS{$fn}};
- my $args = pop @val;
- my $rtyp = "@val";
- my $dfn = eval "\$${fn}_default";
- $dfn = eval "\$${dfn}";
- if (eval "\$${fn}_indirect" eq "true") {
- print " $fn = $dfn;\n";
- foreach my $opt (@_) {
- my $ofn = eval "\$${fn}_${opt}";
- next if !$ofn;
- next if "$ofn" eq "$dfn";
- my $link = eval "\$${fn}_${opt}_link";
- next if $link && $link eq "false";
- my $cond = eval "\$have_${opt}";
- print " if (${cond}) $fn = $ofn;\n"
- }
- }
- }
-}
-
-sub filter {
- my @filtered;
- foreach (@_) { push @filtered, $_ unless $disabled{$_}; }
- return @filtered;
-}
-
-#
-# Helper functions for generating the arch specific RTCD files
-#
-sub common_top() {
- my $include_guard = uc($opts{sym})."_H_";
- print <<EOF;
-// This file is generated. Do not edit.
-#ifndef ${include_guard}
-#define ${include_guard}
-
-#ifdef RTCD_C
-#define RTCD_EXTERN
-#else
-#define RTCD_EXTERN extern
-#endif
-
-EOF
-
-process_forward_decls();
-print <<EOF;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-EOF
-declare_function_pointers("c", @ALL_ARCHS);
-
-print <<EOF;
-void $opts{sym}(void);
-
-EOF
-}
-
-sub common_bottom() {
- print <<EOF;
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif
-EOF
-}
-
-sub x86() {
- determine_indirection("c", @ALL_ARCHS);
-
- # Assign the helper variable for each enabled extension
- foreach my $opt (@ALL_ARCHS) {
- my $opt_uc = uc $opt;
- eval "\$have_${opt}=\"flags & HAS_${opt_uc}\"";
- }
-
- common_top;
- print <<EOF;
-#ifdef RTCD_C
-#include "aom_ports/x86.h"
-static void setup_rtcd_internal(void)
-{
- int flags = x86_simd_caps();
-
- (void)flags;
-
-EOF
-
- set_function_pointers("c", @ALL_ARCHS);
-
- print <<EOF;
-}
-#endif
-EOF
- common_bottom;
-}
-
-sub arm() {
- determine_indirection("c", @ALL_ARCHS);
-
- # Assign the helper variable for each enabled extension
- foreach my $opt (@ALL_ARCHS) {
- my $opt_uc = uc $opt;
- eval "\$have_${opt}=\"flags & HAS_${opt_uc}\"";
- }
-
- common_top;
- print <<EOF;
-#include "config/aom_config.h"
-
-#ifdef RTCD_C
-#include "aom_ports/arm.h"
-static void setup_rtcd_internal(void)
-{
- int flags = aom_arm_cpu_caps();
-
- (void)flags;
-
-EOF
-
- set_function_pointers("c", @ALL_ARCHS);
-
- print <<EOF;
-}
-#endif
-EOF
- common_bottom;
-}
-
-sub mips() {
- determine_indirection("c", @ALL_ARCHS);
-
- # Assign the helper variable for each enabled extension
- foreach my $opt (@ALL_ARCHS) {
- my $opt_uc = uc $opt;
- eval "\$have_${opt}=\"flags & HAS_${opt_uc}\"";
- }
-
- common_top;
-
- print <<EOF;
-#include "config/aom_config.h"
-
-#ifdef RTCD_C
-static void setup_rtcd_internal(void)
-{
-EOF
-
- set_function_pointers("c", @ALL_ARCHS);
-
- print <<EOF;
-#if HAVE_DSPR2
-void aom_dsputil_static_init();
-aom_dsputil_static_init();
-#endif
-}
-#endif
-EOF
- common_bottom;
-}
-
-sub ppc() {
- determine_indirection("c", @ALL_ARCHS);
-
- # Assign the helper variable for each enabled extension
- foreach my $opt (@ALL_ARCHS) {
- my $opt_uc = uc $opt;
- eval "\$have_${opt}=\"flags & HAS_${opt_uc}\"";
- }
-
- common_top;
-
- print <<EOF;
-#include "config/aom_config.h"
-
-#ifdef RTCD_C
-#include "aom_ports/ppc.h"
-static void setup_rtcd_internal(void)
-{
- int flags = ppc_simd_caps();
-
- (void)flags;
-
-EOF
-
- set_function_pointers("c", @ALL_ARCHS);
-
- print <<EOF;
-}
-#endif
-EOF
- common_bottom;
-}
-
-sub unoptimized() {
- determine_indirection "c";
- common_top;
- print <<EOF;
-#include "config/aom_config.h"
-
-#ifdef RTCD_C
-static void setup_rtcd_internal(void)
-{
-EOF
-
- set_function_pointers "c";
-
- print <<EOF;
-}
-#endif
-EOF
- common_bottom;
-}
-
-#
-# Main Driver
-#
-
-&require("c");
-&require(keys %required);
-if ($opts{arch} eq 'x86') {
- @ALL_ARCHS = filter(qw/mmx sse sse2 sse3 ssse3 sse4_1 sse4_2 avx avx2/);
- x86;
-} elsif ($opts{arch} eq 'x86_64') {
- @ALL_ARCHS = filter(qw/mmx sse sse2 sse3 ssse3 sse4_1 sse4_2 avx avx2/);
- @REQUIRES = filter(qw/mmx sse sse2/);
- &require(@REQUIRES);
- x86;
-} elsif ($opts{arch} eq 'mips32' || $opts{arch} eq 'mips64') {
- @ALL_ARCHS = filter("$opts{arch}");
- if (aom_config("HAVE_DSPR2") eq "yes") {
- @ALL_ARCHS = filter("$opts{arch}", qw/dspr2/);
- } elsif (aom_config("HAVE_MSA") eq "yes") {
- @ALL_ARCHS = filter("$opts{arch}", qw/msa/);
- }
- mips;
-} elsif ($opts{arch} =~ /armv7\w?/) {
- @ALL_ARCHS = filter(qw/neon/);
- arm;
-} elsif ($opts{arch} eq 'armv8' || $opts{arch} eq 'arm64' ) {
- @ALL_ARCHS = filter(qw/neon/);
- &require("neon");
- arm;
-} elsif ($opts{arch} eq 'ppc') {
- @ALL_ARCHS = filter(qw/vsx/);
- ppc;
-} else {
- unoptimized;
-}
-
-__END__
-
-=head1 NAME
-
-rtcd -
-
-=head1 SYNOPSIS
-
-Usage: rtcd.pl [options] FILE
-
-See 'perldoc rtcd.pl' for more details.
-
-=head1 DESCRIPTION
-
-Reads the Run Time CPU Detections definitions from FILE and generates a
-C header file on stdout.
-
-=head1 OPTIONS
-
-Options:
- --arch=ARCH Architecture to generate defs for (required)
- --disable-EXT Disable support for EXT extensions
- --require-EXT Require support for EXT extensions
- --sym=SYMBOL Unique symbol to use for RTCD initialization function
- --config=FILE Path to file containing C preprocessor directives to parse
diff --git a/third_party/aom/build/cmake/sanitizers.cmake b/third_party/aom/build/cmake/sanitizers.cmake
deleted file mode 100644
index 77708e101..000000000
--- a/third_party/aom/build/cmake/sanitizers.cmake
+++ /dev/null
@@ -1,38 +0,0 @@
-#
-# Copyright (c) 2017, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and the
-# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
-# not distributed with this source code in the LICENSE file, you can obtain it
-# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
-# License 1.0 was not distributed with this source code in the PATENTS file, you
-# can obtain it at www.aomedia.org/license/patent.
-#
-if(AOM_BUILD_CMAKE_SANITIZERS_CMAKE_)
- return()
-endif() # AOM_BUILD_CMAKE_SANITIZERS_CMAKE_
-set(AOM_BUILD_CMAKE_SANITIZERS_CMAKE_ 1)
-
-if(MSVC OR NOT SANITIZE)
- return()
-endif()
-
-include("${AOM_ROOT}/build/cmake/compiler_flags.cmake")
-
-string(TOLOWER ${SANITIZE} SANITIZE)
-
-# Require the sanitizer requested.
-require_linker_flag("-fsanitize=${SANITIZE}")
-require_compiler_flag("-fsanitize=${SANITIZE}" YES)
-
-# Make callstacks accurate.
-require_compiler_flag("-fno-omit-frame-pointer -fno-optimize-sibling-calls" YES)
-
-# Fix link errors due to missing rt compiler lib in 32-bit builds.
-# http://llvm.org/bugs/show_bug.cgi?id=17693
-if(CMAKE_C_COMPILER_ID MATCHES "Clang")
- if(${CMAKE_SIZEOF_VOID_P} EQUAL 4 AND "${SANITIZE}" MATCHES
- "integer|undefined")
- require_linker_flag("--rtlib=compiler-rt -lgcc_s")
- endif()
-endif()
diff --git a/third_party/aom/build/cmake/toolchains/arm-ios-common.cmake b/third_party/aom/build/cmake/toolchains/arm-ios-common.cmake
deleted file mode 100644
index 8f4095145..000000000
--- a/third_party/aom/build/cmake/toolchains/arm-ios-common.cmake
+++ /dev/null
@@ -1,26 +0,0 @@
-#
-# Copyright (c) 2017, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and the
-# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
-# not distributed with this source code in the LICENSE file, you can obtain it
-# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
-# License 1.0 was not distributed with this source code in the PATENTS file, you
-# can obtain it at www.aomedia.org/license/patent.
-#
-if(AOM_BUILD_CMAKE_TOOLCHAINS_ARM_IOS_COMMON_CMAKE_)
- return()
-endif() # AOM_BUILD_CMAKE_TOOLCHAINS_ARM_IOS_COMMON_CMAKE_
-set(AOM_BUILD_CMAKE_ARM_IOS_COMMON_CMAKE_ 1)
-
-set(CMAKE_SYSTEM_NAME "Darwin")
-set(CMAKE_OSX_SYSROOT iphoneos)
-set(CMAKE_C_COMPILER clang)
-set(CMAKE_C_COMPILER_ARG1 "-arch ${CMAKE_SYSTEM_PROCESSOR}")
-set(CMAKE_CXX_COMPILER clang++)
-set(CMAKE_CXX_COMPILER_ARG1 "-arch ${CMAKE_SYSTEM_PROCESSOR}")
-
-# No runtime cpu detect for arm*-ios targets.
-set(CONFIG_RUNTIME_CPU_DETECT 0 CACHE NUMBER "")
-
-# TODO(tomfinegan): Handle bit code embedding.
diff --git a/third_party/aom/build/cmake/toolchains/arm64-ios.cmake b/third_party/aom/build/cmake/toolchains/arm64-ios.cmake
deleted file mode 100644
index 6feb1090f..000000000
--- a/third_party/aom/build/cmake/toolchains/arm64-ios.cmake
+++ /dev/null
@@ -1,23 +0,0 @@
-#
-# Copyright (c) 2017, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and the
-# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
-# not distributed with this source code in the LICENSE file, you can obtain it
-# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
-# License 1.0 was not distributed with this source code in the PATENTS file, you
-# can obtain it at www.aomedia.org/license/patent.
-#
-if(AOM_BUILD_CMAKE_TOOLCHAINS_ARM64_IOS_CMAKE_)
- return()
-endif() # AOM_BUILD_CMAKE_TOOLCHAINS_ARM64_IOS_CMAKE_
-set(AOM_BUILD_CMAKE_TOOLCHAINS_ARM64_IOS_CMAKE_ 1)
-
-if(XCODE) # TODO(tomfinegan): Handle arm builds in Xcode.
- message(FATAL_ERROR "This toolchain does not support Xcode.")
-endif()
-
-set(CMAKE_SYSTEM_PROCESSOR "arm64")
-set(CMAKE_OSX_ARCHITECTURES "arm64")
-
-include("${CMAKE_CURRENT_LIST_DIR}/arm-ios-common.cmake")
diff --git a/third_party/aom/build/cmake/toolchains/arm64-linux-gcc.cmake b/third_party/aom/build/cmake/toolchains/arm64-linux-gcc.cmake
deleted file mode 100644
index 590a97a8e..000000000
--- a/third_party/aom/build/cmake/toolchains/arm64-linux-gcc.cmake
+++ /dev/null
@@ -1,36 +0,0 @@
-#
-# Copyright (c) 2017, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and the
-# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
-# not distributed with this source code in the LICENSE file, you can obtain it
-# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
-# License 1.0 was not distributed with this source code in the PATENTS file, you
-# can obtain it at www.aomedia.org/license/patent.
-#
-if(AOM_BUILD_CMAKE_TOOLCHAINS_ARM64_LINUX_GCC_CMAKE_)
- return()
-endif() # AOM_BUILD_CMAKE_TOOLCHAINS_ARM64_LINUX_GCC_CMAKE_
-set(AOM_BUILD_CMAKE_TOOLCHAINS_ARM64_LINUX_GCC_CMAKE_ 1)
-
-set(CMAKE_SYSTEM_NAME "Linux")
-
-if("${CROSS}" STREQUAL "")
-
- # Default the cross compiler prefix to something known to work.
- set(CROSS aarch64-linux-gnu-)
-endif()
-
-set(CMAKE_C_COMPILER ${CROSS}gcc)
-set(CMAKE_CXX_COMPILER ${CROSS}g++)
-set(AS_EXECUTABLE ${CROSS}as)
-set(CMAKE_C_COMPILER_ARG1 "-march=armv8-a")
-set(CMAKE_CXX_COMPILER_ARG1 "-march=armv8-a")
-set(AOM_AS_FLAGS "-march=armv8-a")
-set(CMAKE_SYSTEM_PROCESSOR "arm64")
-
-# No intrinsics flag required for arm64-linux-gcc.
-set(AOM_NEON_INTRIN_FLAG "")
-
-# No runtime cpu detect for arm64-linux-gcc.
-set(CONFIG_RUNTIME_CPU_DETECT 0 CACHE NUMBER "")
diff --git a/third_party/aom/build/cmake/toolchains/arm64-mingw-gcc.cmake b/third_party/aom/build/cmake/toolchains/arm64-mingw-gcc.cmake
deleted file mode 100644
index b5b2ff1cd..000000000
--- a/third_party/aom/build/cmake/toolchains/arm64-mingw-gcc.cmake
+++ /dev/null
@@ -1,32 +0,0 @@
-#
-# Copyright (c) 2018, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and the
-# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
-# not distributed with this source code in the LICENSE file, you can obtain it
-# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
-# License 1.0 was not distributed with this source code in the PATENTS file, you
-# can obtain it at www.aomedia.org/license/patent.
-#
-if(AOM_BUILD_CMAKE_TOOLCHAINS_ARM64_MINGW_GCC_CMAKE_)
- return()
-endif() # AOM_BUILD_CMAKE_TOOLCHAINS_ARM64_MINGW_GCC_CMAKE_
-set(AOM_BUILD_CMAKE_TOOLCHAINS_ARM64_MINGW_GCC_CMAKE_ 1)
-
-set(CMAKE_SYSTEM_PROCESSOR "arm64")
-set(CMAKE_SYSTEM_NAME "Windows")
-
-if("${CROSS}" STREQUAL "")
- set(CROSS aarch64-w64-mingw32-)
-endif()
-
-set(CMAKE_C_COMPILER ${CROSS}gcc)
-set(CMAKE_CXX_COMPILER ${CROSS}g++)
-set(CMAKE_AR ${CROSS}ar CACHE FILEPATH Archiver)
-set(CMAKE_RANLIB ${CROSS}ranlib CACHE FILEPATH Indexer)
-
-# No runtime cpu detect for arm64-mingw-gcc.
-set(CONFIG_RUNTIME_CPU_DETECT 0 CACHE NUMBER "")
-
-# Disable the use of the gtest's CMake support.
-set(AOM_DISABLE_GTEST_CMAKE 1)
diff --git a/third_party/aom/build/cmake/toolchains/armv7-ios.cmake b/third_party/aom/build/cmake/toolchains/armv7-ios.cmake
deleted file mode 100644
index 32a1b534a..000000000
--- a/third_party/aom/build/cmake/toolchains/armv7-ios.cmake
+++ /dev/null
@@ -1,31 +0,0 @@
-#
-# Copyright (c) 2017, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and the
-# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
-# not distributed with this source code in the LICENSE file, you can obtain it
-# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
-# License 1.0 was not distributed with this source code in the PATENTS file, you
-# can obtain it at www.aomedia.org/license/patent.
-#
-if(AOM_BUILD_CMAKE_TOOLCHAINS_ARMV7_IOS_CMAKE_)
- return()
-endif() # AOM_BUILD_CMAKE_TOOLCHAINS_ARMV7_IOS_CMAKE_
-set(AOM_BUILD_CMAKE_TOOLCHAINS_ARMV7_IOS_CMAKE_ 1)
-
-if(XCODE)
-
- # TODO(tomfinegan): Handle arm builds in Xcode.
- message(FATAL_ERROR "This toolchain does not support Xcode.")
-endif()
-
-set(CMAKE_SYSTEM_PROCESSOR "armv7")
-set(CMAKE_OSX_ARCHITECTURES "armv7")
-
-include("${CMAKE_CURRENT_LIST_DIR}/arm-ios-common.cmake")
-
-# No intrinsics flag required for armv7s-ios.
-set(AOM_NEON_INTRIN_FLAG "")
-
-# No runtime cpu detect for armv7s-ios.
-set(CONFIG_RUNTIME_CPU_DETECT 0 CACHE NUMBER "")
diff --git a/third_party/aom/build/cmake/toolchains/armv7-linux-gcc.cmake b/third_party/aom/build/cmake/toolchains/armv7-linux-gcc.cmake
deleted file mode 100644
index 7d3d63085..000000000
--- a/third_party/aom/build/cmake/toolchains/armv7-linux-gcc.cmake
+++ /dev/null
@@ -1,43 +0,0 @@
-#
-# Copyright (c) 2017, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and the
-# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
-# not distributed with this source code in the LICENSE file, you can obtain it
-# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
-# License 1.0 was not distributed with this source code in the PATENTS file, you
-# can obtain it at www.aomedia.org/license/patent.
-#
-if(AOM_BUILD_CMAKE_TOOLCHAINS_ARMV7_LINUX_GCC_CMAKE_)
- return()
-endif() # AOM_BUILD_CMAKE_TOOLCHAINS_ARMV7_LINUX_GCC_CMAKE_
-set(AOM_BUILD_CMAKE_TOOLCHAINS_ARMV7_LINUX_GCC_CMAKE_ 1)
-
-set(CMAKE_SYSTEM_NAME "Linux")
-
-if("${CROSS}" STREQUAL "")
-
- # Default the cross compiler prefix to something known to work.
- set(CROSS arm-linux-gnueabihf-)
-endif()
-
-if(NOT ${CROSS} MATCHES hf-$)
- set(AOM_EXTRA_TOOLCHAIN_FLAGS "-mfloat-abi=softfp")
-endif()
-
-set(CMAKE_C_COMPILER ${CROSS}gcc)
-set(CMAKE_CXX_COMPILER ${CROSS}g++)
-set(AS_EXECUTABLE ${CROSS}as)
-set(CMAKE_C_COMPILER_ARG1
- "-march=armv7-a -mfpu=neon ${AOM_EXTRA_TOOLCHAIN_FLAGS}")
-set(CMAKE_CXX_COMPILER_ARG1
- "-march=armv7-a -mfpu=neon ${AOM_EXTRA_TOOLCHAIN_FLAGS}")
-set(AOM_AS_FLAGS --defsym ARCHITECTURE=7 -march=armv7-a -mfpu=neon
- ${AOM_EXTRA_TOOLCHAIN_FLAGS})
-set(CMAKE_SYSTEM_PROCESSOR "armv7")
-
-# No intrinsics flag required for armv7-linux-gcc.
-set(AOM_NEON_INTRIN_FLAG "")
-
-# No runtime cpu detect for armv7-linux-gcc.
-set(CONFIG_RUNTIME_CPU_DETECT 0 CACHE NUMBER "")
diff --git a/third_party/aom/build/cmake/toolchains/armv7-mingw-gcc.cmake b/third_party/aom/build/cmake/toolchains/armv7-mingw-gcc.cmake
deleted file mode 100644
index cf06a11b3..000000000
--- a/third_party/aom/build/cmake/toolchains/armv7-mingw-gcc.cmake
+++ /dev/null
@@ -1,32 +0,0 @@
-#
-# Copyright (c) 2018, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and the
-# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
-# not distributed with this source code in the LICENSE file, you can obtain it
-# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
-# License 1.0 was not distributed with this source code in the PATENTS file, you
-# can obtain it at www.aomedia.org/license/patent.
-#
-if(AOM_BUILD_CMAKE_TOOLCHAINS_ARMV7_MINGW_GCC_CMAKE_)
- return()
-endif() # AOM_BUILD_CMAKE_TOOLCHAINS_ARMV7_MINGW_GCC_CMAKE_
-set(AOM_BUILD_CMAKE_TOOLCHAINS_ARMV7_MINGW_GCC_CMAKE_ 1)
-
-set(CMAKE_SYSTEM_PROCESSOR "armv7")
-set(CMAKE_SYSTEM_NAME "Windows")
-
-if("${CROSS}" STREQUAL "")
- set(CROSS armv7-w64-mingw32-)
-endif()
-
-set(CMAKE_C_COMPILER ${CROSS}gcc)
-set(CMAKE_CXX_COMPILER ${CROSS}g++)
-set(CMAKE_AR ${CROSS}ar CACHE FILEPATH Archiver)
-set(CMAKE_RANLIB ${CROSS}ranlib CACHE FILEPATH Indexer)
-
-# No runtime cpu detect for armv7-mingw-gcc.
-set(CONFIG_RUNTIME_CPU_DETECT 0 CACHE NUMBER "")
-
-# Disable the use of the gtest's CMake support.
-set(AOM_DISABLE_GTEST_CMAKE 1)
diff --git a/third_party/aom/build/cmake/toolchains/armv7s-ios.cmake b/third_party/aom/build/cmake/toolchains/armv7s-ios.cmake
deleted file mode 100644
index 0940a6ee8..000000000
--- a/third_party/aom/build/cmake/toolchains/armv7s-ios.cmake
+++ /dev/null
@@ -1,31 +0,0 @@
-#
-# Copyright (c) 2017, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and the
-# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
-# not distributed with this source code in the LICENSE file, you can obtain it
-# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
-# License 1.0 was not distributed with this source code in the PATENTS file, you
-# can obtain it at www.aomedia.org/license/patent.
-#
-if(AOM_BUILD_CMAKE_TOOLCHAINS_ARMV7S_IOS_CMAKE_)
- return()
-endif() # AOM_BUILD_CMAKE_TOOLCHAINS_ARMV7S_IOS_CMAKE_
-set(AOM_BUILD_CMAKE_TOOLCHAINS_ARMV7S_IOS_CMAKE_ 1)
-
-if(XCODE)
-
- # TODO(tomfinegan): Handle arm builds in Xcode.
- message(FATAL_ERROR "This toolchain does not support Xcode.")
-endif()
-
-set(CMAKE_SYSTEM_PROCESSOR "armv7s")
-set(CMAKE_OSX_ARCHITECTURES "armv7s")
-
-include("${CMAKE_CURRENT_LIST_DIR}/arm-ios-common.cmake")
-
-# No intrinsics flag required for armv7s-ios.
-set(AOM_NEON_INTRIN_FLAG "")
-
-# No runtime cpu detect for armv7s-ios.
-set(CONFIG_RUNTIME_CPU_DETECT 0 CACHE NUMBER "")
diff --git a/third_party/aom/build/cmake/toolchains/ios-simulator-common.cmake b/third_party/aom/build/cmake/toolchains/ios-simulator-common.cmake
deleted file mode 100644
index 76e0bd140..000000000
--- a/third_party/aom/build/cmake/toolchains/ios-simulator-common.cmake
+++ /dev/null
@@ -1,23 +0,0 @@
-#
-# Copyright (c) 2017, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and the
-# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
-# not distributed with this source code in the LICENSE file, you can obtain it
-# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
-# License 1.0 was not distributed with this source code in the PATENTS file, you
-# can obtain it at www.aomedia.org/license/patent.
-#
-if(AOM_BUILD_CMAKE_TOOLCHAINS_IOS_SIMULATOR_COMMON_CMAKE_)
- return()
-endif() # AOM_BUILD_CMAKE_TOOLCHAINS_IOS_SIMULATOR_COMMON_CMAKE_
-set(AOM_BUILD_CMAKE_IOS_SIMULATOR_COMMON_CMAKE_ 1)
-
-set(CMAKE_SYSTEM_NAME "Darwin")
-set(CMAKE_OSX_SYSROOT iphonesimulator)
-set(CMAKE_C_COMPILER clang)
-set(CMAKE_C_COMPILER_ARG1 "-arch ${CMAKE_SYSTEM_PROCESSOR}")
-set(CMAKE_CXX_COMPILER clang++)
-set(CMAKE_CXX_COMPILER_ARG1 "-arch ${CMAKE_SYSTEM_PROCESSOR}")
-
-# TODO(tomfinegan): Handle bit code embedding.
diff --git a/third_party/aom/build/cmake/toolchains/mips32-linux-gcc.cmake b/third_party/aom/build/cmake/toolchains/mips32-linux-gcc.cmake
deleted file mode 100644
index 0f93490b1..000000000
--- a/third_party/aom/build/cmake/toolchains/mips32-linux-gcc.cmake
+++ /dev/null
@@ -1,77 +0,0 @@
-#
-# Copyright (c) 2017, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and the
-# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
-# not distributed with this source code in the LICENSE file, you can obtain it
-# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
-# License 1.0 was not distributed with this source code in the PATENTS file, you
-# can obtain it at www.aomedia.org/license/patent.
-#
-if(AOM_BUILD_CMAKE_TOOLCHAINS_MIPS32_LINUX_GCC_CMAKE_)
- return()
-endif() # AOM_BUILD_CMAKE_TOOLCHAINS_MIPS32_LINUX_GCC_CMAKE_
-set(AOM_BUILD_CMAKE_TOOLCHAINS_MIPS32_LINUX_GCC_CMAKE_ 1)
-
-set(CMAKE_SYSTEM_NAME "Linux")
-
-if(ENABLE_DSPR2 AND ENABLE_MSA)
- message(FATAL_ERROR "ENABLE_DSPR2 and ENABLE_MSA cannot be combined.")
-endif()
-
-if(ENABLE_DSPR2)
- set(HAVE_DSPR2 1 CACHE BOOL "" FORCE)
-
- if("${CROSS}" STREQUAL "")
-
- # Default the cross compiler prefix to something known to work.
- set(CROSS mips-linux-gnu-)
- endif()
-
- set(MIPS_CFLAGS "-mdspr2")
- set(MIPS_CXXFLAGS "-mdspr2")
-elseif(ENABLE_MSA)
- set(HAVE_MSA 1 CACHE BOOL "" FORCE)
-
- if("${CROSS}" STREQUAL "")
-
- # Default the cross compiler prefix to something known to work.
- set(CROSS mips-mti-linux-gnu-)
- endif()
-
- set(MIPS_CFLAGS "-mmsa")
- set(MIPS_CXXFLAGS "-mmsa")
-endif()
-
-if("${CROSS}" STREQUAL "")
-
- # TODO(tomfinegan): Make it possible to turn this off. The $CROSS prefix won't
- # be desired on a mips host. Default cross compiler prefix to something that
- # might work for an unoptimized build.
- set(CROSS mips-linux-gnu-)
-endif()
-
-if("${MIPS_CPU}" STREQUAL "")
- set(MIPS_CFLAGS "${MIPS_CFLAGS} -mips32r2")
- set(MIPS_CXXFLAGS "${MIPS_CXXFLAGS} -mips32r2")
-elseif("${MIPS_CPU}" STREQUAL "p5600")
- set(P56_FLAGS
- "-mips32r5 -mload-store-pairs -msched-weight -mhard-float -mfp64")
- set(MIPS_CFLAGS "${MIPS_CFLAGS} ${P56_FLAGS}")
- set(MIPS_CXXFLAGS "${MIPS_CXXFLAGS} ${P56_FLAGS}")
- set(CMAKE_EXE_LINKER_FLAGS "-mfp64 ${CMAKE_EXE_LINKER_FLAGS}")
-endif()
-
-set(CMAKE_C_COMPILER ${CROSS}gcc)
-set(CMAKE_CXX_COMPILER ${CROSS}g++)
-set(AS_EXECUTABLE ${CROSS}as)
-set(CMAKE_C_COMPILER_ARG1 "-EL ${MIPS_CFLAGS}")
-set(CMAKE_CXX_COMPILER_ARG1 "-EL ${MIPS_CXXFLAGS}")
-set(CMAKE_SYSTEM_PROCESSOR "mips32")
-
-# No runtime cpu detect for mips32-linux-gcc.
-if(CONFIG_RUNTIME_CPU_DETECT)
- message("--- CONFIG_RUNTIME_CPU_DETECT not supported for mips32 targets.")
-endif()
-
-set(CONFIG_RUNTIME_CPU_DETECT 0 CACHE NUMBER "" FORCE)
diff --git a/third_party/aom/build/cmake/toolchains/mips64-linux-gcc.cmake b/third_party/aom/build/cmake/toolchains/mips64-linux-gcc.cmake
deleted file mode 100644
index ad9aab09d..000000000
--- a/third_party/aom/build/cmake/toolchains/mips64-linux-gcc.cmake
+++ /dev/null
@@ -1,54 +0,0 @@
-#
-# Copyright (c) 2017, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and the
-# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
-# not distributed with this source code in the LICENSE file, you can obtain it
-# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
-# License 1.0 was not distributed with this source code in the PATENTS file, you
-# can obtain it at www.aomedia.org/license/patent.
-#
-if(AOM_BUILD_CMAKE_TOOLCHAINS_MIPS64_LINUX_GCC_CMAKE_)
- return()
-endif() # AOM_BUILD_CMAKE_TOOLCHAINS_MIPS64_LINUX_GCC_CMAKE_
-set(AOM_BUILD_CMAKE_TOOLCHAINS_MIPS64_LINUX_GCC_CMAKE_ 1)
-
-set(CMAKE_SYSTEM_NAME "Linux")
-
-if("${CROSS}" STREQUAL "")
-
- # TODO(tomfinegan): Make it possible to turn this off. The $CROSS prefix won't
- # be desired on a mips host.
- #
- # Default the cross compiler prefix to something known to work.
- set(CROSS mips-img-linux-gnu-)
-endif()
-
-if(ENABLE_MSA)
- set(HAVE_MSA 1 CACHE BOOL "" FORCE)
- set(MIPS_CFLAGS "-mmsa")
- set(MIPS_CXXFLAGS "-mmsa")
-endif()
-
-if("${MIPS_CPU}" STREQUAL "i6400" OR "${MIPS_CPU}" STREQUAL "p6600")
- set(MIPS_CPU_FLAGS "-mips64r6 -mabi=64 -mload-store-pairs -msched-weight")
- set(MIPS_CPU_FLAGS "${MIPS_CPU_FLAGS} -mhard-float -mfp64")
- set(MIPS_CFLAGS "${MIPS_CFLAGS} ${MIPS_CPU_FLAGS}")
- set(MIPS_CXXFLAGS "${MIPS_CXXFLAGS} ${MIPS_CPU_FLAGS}")
- set(CMAKE_EXE_LINKER_FLAGS
- "-mips64r6 -mabi64 -mfp64 ${CMAKE_EXE_LINKER_FLAGS}")
-endif()
-
-set(CMAKE_C_COMPILER ${CROSS}gcc)
-set(CMAKE_CXX_COMPILER ${CROSS}g++)
-set(AS_EXECUTABLE ${CROSS}as)
-set(CMAKE_C_COMPILER_ARG1 "-EL ${MIPS_CFLAGS}")
-set(CMAKE_CXX_COMPILER_ARG1 "-EL ${MIPS_CXXFLAGS}")
-set(CMAKE_SYSTEM_PROCESSOR "mips64")
-
-# No runtime cpu detect for mips64-linux-gcc.
-if(CONFIG_RUNTIME_CPU_DETECT)
- message("--- CONFIG_RUNTIME_CPU_DETECT not supported for mips64 targets.")
-endif()
-
-set(CONFIG_RUNTIME_CPU_DETECT 0 CACHE NUMBER "" FORCE)
diff --git a/third_party/aom/build/cmake/toolchains/ppc-linux-gcc.cmake b/third_party/aom/build/cmake/toolchains/ppc-linux-gcc.cmake
deleted file mode 100644
index c86cc27e3..000000000
--- a/third_party/aom/build/cmake/toolchains/ppc-linux-gcc.cmake
+++ /dev/null
@@ -1,29 +0,0 @@
-#
-# Copyright (c) 2018, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and the
-# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
-# not distributed with this source code in the LICENSE file, you can obtain it
-# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
-# License 1.0 was not distributed with this source code in the PATENTS file, you
-# can obtain it at www.aomedia.org/license/patent.
-#
-if(AOM_BUILD_CMAKE_TOOLCHAINS_PPC_LINUX_GCC_CMAKE_)
- return()
-endif() # AOM_BUILD_CMAKE_TOOLCHAINS_PPC_LINUX_GCC_CMAKE_
-set(AOM_BUILD_CMAKE_TOOLCHAINS_PPC_LINUX_GCC_CMAKE_ 1)
-
-set(CMAKE_SYSTEM_NAME "Linux")
-
-if("${CROSS}" STREQUAL "")
-
- # Default the cross compiler prefix to something known to work.
- set(CROSS powerpc64le-unknown-linux-gnu-)
-endif()
-
-set(CMAKE_C_COMPILER ${CROSS}gcc)
-set(CMAKE_CXX_COMPILER ${CROSS}g++)
-set(AS_EXECUTABLE ${CROSS}as)
-set(CMAKE_SYSTEM_PROCESSOR "ppc")
-
-set(CONFIG_RUNTIME_CPU_DETECT 0 CACHE NUMBER "")
diff --git a/third_party/aom/build/cmake/toolchains/x86-ios-simulator.cmake b/third_party/aom/build/cmake/toolchains/x86-ios-simulator.cmake
deleted file mode 100644
index 6b6f52cac..000000000
--- a/third_party/aom/build/cmake/toolchains/x86-ios-simulator.cmake
+++ /dev/null
@@ -1,28 +0,0 @@
-#
-# Copyright (c) 2017, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and the
-# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
-# not distributed with this source code in the LICENSE file, you can obtain it
-# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
-# License 1.0 was not distributed with this source code in the PATENTS file, you
-# can obtain it at www.aomedia.org/license/patent.
-#
-if(AOM_BUILD_CMAKE_TOOLCHAINS_X86_IOS_SIMULATOR_CMAKE_)
- return()
-endif() # AOM_BUILD_CMAKE_TOOLCHAINS_X86_IOS_SIMULATOR_CMAKE_
-set(AOM_BUILD_CMAKE_TOOLCHAINS_X86_IOS_SIMULATOR_CMAKE_ 1)
-
-if(XCODE)
-
- # TODO(tomfinegan): Handle ios sim builds in Xcode.
- message(FATAL_ERROR "This toolchain does not support Xcode.")
-endif()
-
-set(CMAKE_SYSTEM_PROCESSOR "i386")
-set(CMAKE_OSX_ARCHITECTURES "i386")
-
-# Avoid noisy PIC/PIE warnings.
-set(CONFIG_PIC 1 CACHE NUMBER "")
-
-include("${CMAKE_CURRENT_LIST_DIR}/ios-simulator-common.cmake")
diff --git a/third_party/aom/build/cmake/toolchains/x86-linux.cmake b/third_party/aom/build/cmake/toolchains/x86-linux.cmake
deleted file mode 100644
index c2a700bfe..000000000
--- a/third_party/aom/build/cmake/toolchains/x86-linux.cmake
+++ /dev/null
@@ -1,19 +0,0 @@
-#
-# Copyright (c) 2017, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and the
-# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
-# not distributed with this source code in the LICENSE file, you can obtain it
-# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
-# License 1.0 was not distributed with this source code in the PATENTS file, you
-# can obtain it at www.aomedia.org/license/patent.
-#
-if(AOM_BUILD_CMAKE_TOOLCHAINS_X86_LINUX_CMAKE_)
- return()
-endif() # AOM_BUILD_CMAKE_TOOLCHAINS_X86_LINUX_CMAKE_
-set(AOM_BUILD_CMAKE_TOOLCHAINS_X86_LINUX_CMAKE_ 1)
-
-set(CMAKE_SYSTEM_PROCESSOR "x86")
-set(CMAKE_SYSTEM_NAME "Linux")
-set(CMAKE_C_COMPILER_ARG1 "-m32")
-set(CMAKE_CXX_COMPILER_ARG1 "-m32")
diff --git a/third_party/aom/build/cmake/toolchains/x86-macos.cmake b/third_party/aom/build/cmake/toolchains/x86-macos.cmake
deleted file mode 100644
index 7a46e06a9..000000000
--- a/third_party/aom/build/cmake/toolchains/x86-macos.cmake
+++ /dev/null
@@ -1,18 +0,0 @@
-#
-# Copyright (c) 2017, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and the
-# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
-# not distributed with this source code in the LICENSE file, you can obtain it
-# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
-# License 1.0 was not distributed with this source code in the PATENTS file, you
-# can obtain it at www.aomedia.org/license/patent.
-#
-set(CMAKE_SYSTEM_PROCESSOR "x86")
-set(CMAKE_SYSTEM_NAME "Darwin")
-set(CMAKE_OSX_ARCHITECTURES "i386")
-set(CMAKE_C_COMPILER_ARG1 "-arch i386")
-set(CMAKE_CXX_COMPILER_ARG1 "-arch i386")
-
-# Apple tools always complain in 32 bit mode without PIC.
-set(CONFIG_PIC 1 CACHE NUMBER "")
diff --git a/third_party/aom/build/cmake/toolchains/x86-mingw-gcc.cmake b/third_party/aom/build/cmake/toolchains/x86-mingw-gcc.cmake
deleted file mode 100644
index c986c4ee3..000000000
--- a/third_party/aom/build/cmake/toolchains/x86-mingw-gcc.cmake
+++ /dev/null
@@ -1,31 +0,0 @@
-#
-# Copyright (c) 2017, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and the
-# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
-# not distributed with this source code in the LICENSE file, you can obtain it
-# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
-# License 1.0 was not distributed with this source code in the PATENTS file, you
-# can obtain it at www.aomedia.org/license/patent.
-#
-if(AOM_BUILD_CMAKE_TOOLCHAINS_X86_MINGW_GCC_CMAKE_)
- return()
-endif() # AOM_BUILD_CMAKE_TOOLCHAINS_X86_MINGW_GCC_CMAKE_
-set(AOM_BUILD_CMAKE_TOOLCHAINS_X86_MINGW_GCC_CMAKE_ 1)
-
-set(CMAKE_SYSTEM_PROCESSOR "x86")
-set(CMAKE_SYSTEM_NAME "Windows")
-set(CMAKE_C_COMPILER_ARG1 "-m32")
-set(CMAKE_CXX_COMPILER_ARG1 "-m32")
-
-if("${CROSS}" STREQUAL "")
- set(CROSS i686-w64-mingw32-)
-endif()
-
-set(CMAKE_C_COMPILER ${CROSS}gcc)
-set(CMAKE_CXX_COMPILER ${CROSS}g++)
-set(CMAKE_AR ${CROSS}ar CACHE FILEPATH Archiver)
-set(CMAKE_RANLIB ${CROSS}ranlib CACHE FILEPATH Indexer)
-
-# Disable the use of the gtest's CMake support.
-set(AOM_DISABLE_GTEST_CMAKE 1)
diff --git a/third_party/aom/build/cmake/toolchains/x86_64-ios-simulator.cmake b/third_party/aom/build/cmake/toolchains/x86_64-ios-simulator.cmake
deleted file mode 100644
index d4b40ed09..000000000
--- a/third_party/aom/build/cmake/toolchains/x86_64-ios-simulator.cmake
+++ /dev/null
@@ -1,25 +0,0 @@
-#
-# Copyright (c) 2017, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and the
-# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
-# not distributed with this source code in the LICENSE file, you can obtain it
-# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
-# License 1.0 was not distributed with this source code in the PATENTS file, you
-# can obtain it at www.aomedia.org/license/patent.
-#
-if(AOM_BUILD_CMAKE_TOOLCHAINS_X86_64_IOS_SIMULATOR_CMAKE_)
- return()
-endif() # AOM_BUILD_CMAKE_TOOLCHAINS_X86_64_IOS_SIMULATOR_CMAKE_
-set(AOM_BUILD_CMAKE_TOOLCHAINS_X86_64_IOS_SIMULATOR_CMAKE_ 1)
-
-if(XCODE)
-
- # TODO(tomfinegan): Handle ios sim builds in Xcode.
- message(FATAL_ERROR "This toolchain does not support Xcode.")
-endif()
-
-set(CMAKE_SYSTEM_PROCESSOR "x86_64")
-set(CMAKE_OSX_ARCHITECTURES "x86_64")
-
-include("${CMAKE_CURRENT_LIST_DIR}/ios-simulator-common.cmake")
diff --git a/third_party/aom/build/cmake/toolchains/x86_64-mingw-gcc.cmake b/third_party/aom/build/cmake/toolchains/x86_64-mingw-gcc.cmake
deleted file mode 100644
index 00d94d5f1..000000000
--- a/third_party/aom/build/cmake/toolchains/x86_64-mingw-gcc.cmake
+++ /dev/null
@@ -1,29 +0,0 @@
-#
-# Copyright (c) 2017, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and the
-# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
-# not distributed with this source code in the LICENSE file, you can obtain it
-# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
-# License 1.0 was not distributed with this source code in the PATENTS file, you
-# can obtain it at www.aomedia.org/license/patent.
-#
-if(AOM_BUILD_CMAKE_TOOLCHAINS_X86_64_MINGW_GCC_CMAKE_)
- return()
-endif() # AOM_BUILD_CMAKE_TOOLCHAINS_X86_64_MINGW_GCC_CMAKE_
-set(AOM_BUILD_CMAKE_TOOLCHAINS_X86_64_MINGW_GCC_CMAKE_ 1)
-
-set(CMAKE_SYSTEM_PROCESSOR "x86_64")
-set(CMAKE_SYSTEM_NAME "Windows")
-
-if("${CROSS}" STREQUAL "")
- set(CROSS x86_64-w64-mingw32-)
-endif()
-
-set(CMAKE_C_COMPILER ${CROSS}gcc)
-set(CMAKE_CXX_COMPILER ${CROSS}g++)
-set(CMAKE_AR ${CROSS}ar CACHE FILEPATH Archiver)
-set(CMAKE_RANLIB ${CROSS}ranlib CACHE FILEPATH Indexer)
-
-# Disable the use of the gtest's CMake support.
-set(AOM_DISABLE_GTEST_CMAKE 1)
diff --git a/third_party/aom/build/cmake/util.cmake b/third_party/aom/build/cmake/util.cmake
deleted file mode 100644
index b70ec4013..000000000
--- a/third_party/aom/build/cmake/util.cmake
+++ /dev/null
@@ -1,171 +0,0 @@
-#
-# Copyright (c) 2017, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and the
-# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
-# not distributed with this source code in the LICENSE file, you can obtain it
-# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
-# License 1.0 was not distributed with this source code in the PATENTS file, you
-# can obtain it at www.aomedia.org/license/patent.
-#
-if(AOM_BUILD_CMAKE_UTIL_CMAKE_)
- return()
-endif() # AOM_BUILD_CMAKE_UTIL_CMAKE_
-set(AOM_BUILD_CMAKE_UTIL_CMAKE_ 1)
-
-# Directory where generated sources will be written.
-set(AOM_GEN_SRC_DIR "${AOM_CONFIG_DIR}/gen_src")
-
-# Creates dummy source file in $AOM_CONFIG_DIR named $basename.$extension and
-# returns the full path to the dummy source file via the $out_file_path
-# parameter.
-macro(create_dummy_source_file basename extension out_file_path)
- set(dummy_source_file "${AOM_GEN_SRC_DIR}/${basename}_dummy.${extension}")
- file(
- WRITE
- "${dummy_source_file}" "// Generated file. DO NOT EDIT!\n"
- "// ${target_name} needs a ${extension} file to force link language, \n"
- "// or to silence a harmless CMake warning: Ignore me.\n"
- "void ${target_name}_dummy_function(void) {}\n")
-endmacro()
-
-# Convenience function for adding a dummy source file to $target_name using
-# $extension as the file extension. Wraps create_dummy_source_file().
-function(add_dummy_source_file_to_target target_name extension)
- create_dummy_source_file("${target_name}" "${extension}" "dummy_source_file")
- target_sources(${target_name} PRIVATE ${dummy_source_file})
-endfunction()
-
-# Sets the value of the variable referenced by $feature to $value, and reports
-# the change to the user via call to message(WARNING ...). $cause is expected to
-# be a configuration variable that conflicts with $feature in some way. This
-# function is a noop if $feature is already set to $value.
-function(change_config_and_warn feature value cause)
- if(${feature} EQUAL ${value})
- return()
- endif()
- set(${feature} ${value} PARENT_SCOPE)
- if(${value} EQUAL 1)
- set(verb "Enabled")
- set(reason "required for")
- else()
- set(verb "Disabled")
- set(reason "incompatible with")
- endif()
- set(warning_message "${verb} ${feature}, ${reason} ${cause}.")
- message(WARNING "--- ${warning_message}")
-endfunction()
-
-# Extracts the version string from $version_file and returns it to the user via
-# $version_string_out_var. To achieve this VERSION_STRING_NOSP is located in
-# $version_file and then everything but the string literal assigned to the
-# variable is removed. Quotes and the leading 'v' are stripped from the returned
-# string.
-function(extract_version_string version_file version_string_out_var)
- file(STRINGS "${version_file}" aom_version REGEX "VERSION_STRING_NOSP")
- string(REPLACE "#define VERSION_STRING_NOSP " "" aom_version "${aom_version}")
- string(REPLACE "\"" "" aom_version "${aom_version}")
- string(REPLACE " " "" aom_version "${aom_version}")
- string(FIND "${aom_version}" "v" v_pos)
- if(${v_pos} EQUAL 0)
- string(SUBSTRING "${aom_version}" 1 -1 aom_version)
- endif()
- set("${version_string_out_var}" "${aom_version}" PARENT_SCOPE)
-endfunction()
-
-# Sets CMake compiler launcher to $launcher_name when $launcher_name is found in
-# $PATH. Warns user about ignoring build flag $launcher_flag when $launcher_name
-# is not found in $PATH.
-function(set_compiler_launcher launcher_flag launcher_name)
- find_program(launcher_path "${launcher_name}")
- if(launcher_path)
- set(CMAKE_C_COMPILER_LAUNCHER "${launcher_path}" PARENT_SCOPE)
- set(CMAKE_CXX_COMPILER_LAUNCHER "${launcher_path}" PARENT_SCOPE)
- message("--- Using ${launcher_name} as compiler launcher.")
- else()
- message(WARNING
- "--- Cannot find ${launcher_name}, ${launcher_flag} ignored.")
- endif()
-endfunction()
-
-# Sentinel value used to detect when a variable has been set via the -D argument
-# passed to CMake on the command line.
-set(cmake_cmdline_helpstring "No help, variable specified on the command line.")
-
-# Wrapper macro for set() that does some book keeping to help with storage of
-# build configuration information.
-#
-# Sets the default value for variable $name when the value of $name has not
-# already been set via the CMake command line.
-#
-# The names of variables defaulted through this macro are added to
-# $AOM_CONFIG_VARS to facilitate build logging and diagnostics.
-macro(set_aom_detect_var name value type helpstring)
- unset(list_index)
- list(FIND AOM_DETECT_VARS ${name} list_index)
- if(${list_index} EQUAL -1)
- list(APPEND AOM_DETECT_VARS ${name})
- endif()
-
- # Update the variable only when it does not carry the CMake assigned help
- # string for variables specified via the command line.
- unset(cache_helpstring)
- get_property(cache_helpstring CACHE ${name} PROPERTY HELPSTRING)
- if(NOT "${cache_helpstring}" STREQUAL "${cmake_cmdline_helpstring}")
- set(${name} ${value} CACHE ${type} "${helpstring}")
- mark_as_advanced(${name})
- else()
- message(
- WARNING
- "${name} has been set by CMake, but it may be overridden by the build "
- "system during environment detection")
- endif()
-endmacro()
-
-# Wrapper macro for set() that does some book keeping to help with storage of
-# build configuration information.
-#
-# Sets the default value for variable $name when the value of $name has not
-# already been set via the CMake command line.
-#
-# The names of variables defaulted through this macro are added to
-# $AOM_CONFIG_VARS to facilitate build logging and diagnostics.
-macro(set_aom_config_var name value type helpstring)
- unset(list_index)
- list(FIND AOM_CONFIG_VARS ${name} list_index)
- if(${list_index} EQUAL -1)
- list(APPEND AOM_CONFIG_VARS ${name})
- endif()
-
- # Update the variable only when it does not carry the CMake assigned help
- # string for variables specified via the command line.
- unset(cache_helpstring)
- get_property(cache_helpstring CACHE ${name} PROPERTY HELPSTRING)
- if(NOT "${cache_helpstring}" STREQUAL "${cmake_cmdline_helpstring}")
- set(${name} ${value} CACHE ${type} "${helpstring}")
- endif()
-endmacro()
-
-# Wrapper macro for option() that does some book keeping to help with storage of
-# build configuration information.
-#
-# Sets the default value for variable $name when the value of $name has not
-# already been set via the CMake command line.
-#
-# The names of variables defaulted through this macro are added to
-# $AOM_OPTION_VARS to facilitate build logging and diagnostics.
-macro(set_aom_option_var name helpstring value)
- unset(list_index)
- list(FIND AOM_OPTION_VARS ${name} list_index)
- if(${list_index} EQUAL -1)
- list(APPEND AOM_OPTION_VARS ${name})
- endif()
-
- # Update the variable only when it does not carry the CMake assigned help
- # string for variables specified via the command line.
- unset(cache_helpstring)
- get_property(cache_helpstring CACHE ${name} PROPERTY HELPSTRING)
- if(NOT "${cache_helpstring}" STREQUAL "${cmake_cmdline_helpstring}")
- option(${name} "${helpstring}" ${value})
- endif()
-endmacro()
diff --git a/third_party/aom/build/cmake/version.cmake b/third_party/aom/build/cmake/version.cmake
deleted file mode 100644
index d169b12ac..000000000
--- a/third_party/aom/build/cmake/version.cmake
+++ /dev/null
@@ -1,57 +0,0 @@
-#
-# Copyright (c) 2017, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and the
-# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
-# not distributed with this source code in the LICENSE file, you can obtain it
-# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
-# License 1.0 was not distributed with this source code in the PATENTS file, you
-# can obtain it at www.aomedia.org/license/patent.
-#
-cmake_minimum_required(VERSION 3.5)
-
-set(REQUIRED_ARGS "AOM_ROOT" "AOM_CONFIG_DIR" "GIT_EXECUTABLE"
- "PERL_EXECUTABLE")
-
-foreach(arg ${REQUIRED_ARGS})
- if("${${arg}}" STREQUAL "")
- message(FATAL_ERROR "${arg} must not be empty.")
- endif()
-endforeach()
-
-include("${AOM_ROOT}/build/cmake/util.cmake")
-
-# Generate the version string for this run.
-unset(aom_version)
-if(EXISTS "${GIT_EXECUTABLE}")
- execute_process(COMMAND ${GIT_EXECUTABLE} --git-dir=${AOM_ROOT}/.git describe
- OUTPUT_VARIABLE aom_version ERROR_QUIET)
- string(STRIP "${aom_version}" aom_version)
-
- # Remove the leading 'v' from the version string.
- string(FIND "${aom_version}" "v" v_pos)
- if(${v_pos} EQUAL 0)
- string(SUBSTRING "${aom_version}" 1 -1 aom_version)
- endif()
-endif()
-
-if("${aom_version}" STREQUAL "")
- set(aom_version "${AOM_ROOT}/CHANGELOG")
-endif()
-
-unset(last_aom_version)
-if(EXISTS "${AOM_CONFIG_DIR}/config/aom_version.h")
- extract_version_string("${AOM_CONFIG_DIR}/config/aom_version.h"
- last_aom_version)
-endif()
-
-if(NOT "${aom_version}" STREQUAL "${last_aom_version}")
-
- # TODO(tomfinegan): Perl dependency is unnecessary. CMake can do everything
- # that is done by version.pl on its own (if a bit more verbose...).
- execute_process(COMMAND
- ${PERL_EXECUTABLE} "${AOM_ROOT}/build/cmake/version.pl"
- --version_data=${aom_version}
- --version_filename=${AOM_CONFIG_DIR}/config/aom_version.h
- VERBATIM)
-endif()
diff --git a/third_party/aom/build/cmake/version.pl b/third_party/aom/build/cmake/version.pl
deleted file mode 100755
index 7d23f2b27..000000000
--- a/third_party/aom/build/cmake/version.pl
+++ /dev/null
@@ -1,112 +0,0 @@
-#!/usr/bin/env perl
-##
-## Copyright (c) 2016, Alliance for Open Media. All rights reserved
-##
-## This source code is subject to the terms of the BSD 2 Clause License and
-## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-## was not distributed with this source code in the LICENSE file, you can
-## obtain it at www.aomedia.org/license/software. If the Alliance for Open
-## Media Patent License 1.0 was not distributed with this source code in the
-## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-##
-use strict;
-use warnings;
-use 5.010;
-use Getopt::Long;
-
-my $git_desc = '';
-my $version_data;
-my $version_filename;
-GetOptions('version_data=s' => \$version_data,
- 'version_filename=s' => \$version_filename) or
- die("Invalid arg(s): $!");
-
-if (!defined $version_data || length($version_data) == 0 ||
- !defined $version_filename || length($version_filename) == 0) {
- die("--version_data and --version_filename are required.");
-}
-
-# Determine if $version_data is a filename or a git tag/description.
-my $version_string;
-chomp($version_data);
-if (-r $version_data) {
- # $version_data is the path to the CHANGELOG. Parse the most recent version.
- my $changelog_filename = $version_data;
- open(my $changelog_file, '<', $changelog_filename) or
- die("Unable to open CHANGELOG @ $changelog_filename: $!.");
-
- while (my $line = <$changelog_file>) {
- my @split_line = split(" ", $line, 3);
- next if @split_line < 2;
- $version_string = $split_line[1];
- last if substr($version_string, 0, 1) eq "v";
- }
- close($changelog_file);
-} else {
- # $version_data is either a tag name or a full git description, one of:
- # tagName OR tagName-commitsSinceTag-shortCommitHash
- # In either case we want the first element of the array returned by split.
- $version_string = (split("-", $version_data))[0];
- $git_desc = $version_data;
-}
-
-if (substr($version_string, 0, 1) eq "v") {
- $version_string = substr($version_string, 1);
-}
-
-my @version_components = split('\.', $version_string, 4);
-my $version_major = $version_components[0];
-my $version_minor = $version_components[1];
-my $version_patch = $version_components[2];
-
-my $version_extra = "";
-if (length($git_desc) > 0) {
- my @git_desc_components = split('-', $git_desc, 2);
- $version_extra = $git_desc_components[1];
-}
-
-open(my $version_file, '>', $version_filename) or
- die("Cannot open $version_filename: $!");
-
-my $version_packed = "((VERSION_MAJOR << 16) | (VERSION_MINOR << 8) | (VERSION_PATCH))";
-my $year = (localtime)[5] + 1900;
-my $lic_block = << "EOF";
-/*
- * Copyright (c) $year, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-EOF
-
-select $version_file;
-if (length($git_desc)) {
- print << "EOF";
-$lic_block
-#define VERSION_MAJOR $version_major
-#define VERSION_MINOR $version_minor
-#define VERSION_PATCH $version_patch
-#define VERSION_EXTRA \"$version_extra\"
-#define VERSION_PACKED \\
- $version_packed
-#define VERSION_STRING_NOSP \"$git_desc\"
-#define VERSION_STRING \" $git_desc\"
-EOF
-} else {
- print << "EOF";
-$lic_block
-#define VERSION_MAJOR $version_major
-#define VERSION_MINOR $version_minor
-#define VERSION_PATCH $version_patch
-#define VERSION_EXTRA \"$version_extra\"
-#define VERSION_PACKED \\
- $version_packed
-#define VERSION_STRING_NOSP \"v$version_string\"
-#define VERSION_STRING \" v$version_string\"
-EOF
-}
-close($version_file);
diff --git a/third_party/aom/codereview.settings b/third_party/aom/codereview.settings
deleted file mode 100644
index 185e9344c..000000000
--- a/third_party/aom/codereview.settings
+++ /dev/null
@@ -1,4 +0,0 @@
-# This file is used by git cl to get repository specific information.
-GERRIT_HOST: True
-CODE_REVIEW_SERVER: aomedia-review.googlesource.com
-GERRIT_SQUASH_UPLOADS: False
diff --git a/third_party/aom/common/args.c b/third_party/aom/common/args.c
deleted file mode 100644
index 7131e24de..000000000
--- a/third_party/aom/common/args.c
+++ /dev/null
@@ -1,297 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "common/args.h"
-
-#include <stdlib.h>
-#include <string.h>
-#include <limits.h>
-
-#include "aom/aom_integer.h"
-#include "aom_ports/msvc.h"
-
-#if defined(__GNUC__) && __GNUC__
-extern void die(const char *fmt, ...) __attribute__((noreturn));
-#else
-extern void die(const char *fmt, ...);
-#endif
-
-struct arg arg_init(char **argv) {
- struct arg a;
-
- a.argv = argv;
- a.argv_step = 1;
- a.name = NULL;
- a.val = NULL;
- a.def = NULL;
- return a;
-}
-
-char *ignore_front_spaces(const char *str) {
- while (str[0] == ' ' || str[0] == '\t') ++str;
- return (char *)str;
-}
-
-void ignore_end_spaces(char *str) {
- char *end = str + strlen(str);
- while (end > str && (end[0] == ' ' || end[0] == '\t' || end[0] == '\n' ||
- end[0] == '\r' || end[0] == '\0'))
- --end;
- if (end >= str) end[1] = '\0';
-}
-
-int arg_cfg(int *argc, char ***argv, const char *file) {
- char **argv_local = (char **)*argv;
- char **argv_org = (char **)*argv;
- char line[1024 * 10];
- FILE *f = fopen(file, "r");
- if (!f) return 1;
-
- while (fgets(line, sizeof(line) - 1, f)) {
- char *actual_line = ignore_front_spaces(line);
- char *left, *right, *comment;
- size_t length = strlen(actual_line);
-
- if (length == 0 || actual_line[0] == '#') continue;
- right = strchr(actual_line, ':');
- if (right == NULL) continue;
- right[0] = '\0';
-
- left = ignore_front_spaces(actual_line);
- right = ignore_front_spaces(right + 1);
-
- comment = strchr(right, '#');
- if (comment != NULL) comment[0] = '\0';
-
- ignore_end_spaces(left);
- ignore_end_spaces(right);
-
- char **new_args = argv_dup(*argc, (const char **)argv_local);
- char *new_line = (char *)malloc(sizeof(*new_line) * 128);
-
- if (argv_local != argv_org) free(argv_local);
-
- if (!strcmp(right, "ON"))
- snprintf(new_line, sizeof(*new_line) * 128, "--%s", left);
- else
- snprintf(new_line, sizeof(*new_line) * 128, "--%s=%s", left, right);
-
- new_args[(*argc) - 1] = new_args[(*argc) - 2];
- new_args[(*argc) - 2] = new_line;
- argv_local = new_args;
- *argv = new_args;
- (*argc)++;
- }
- fclose(f);
- return 0;
-}
-
-int arg_match(struct arg *arg_, const struct arg_def *def, char **argv) {
- struct arg arg;
-
- if (!argv[0] || argv[0][0] != '-') return 0;
-
- arg = arg_init(argv);
-
- if (def->short_name && strlen(arg.argv[0]) == strlen(def->short_name) + 1 &&
- !strcmp(arg.argv[0] + 1, def->short_name)) {
- arg.name = arg.argv[0] + 1;
- arg.val = def->has_val ? arg.argv[1] : NULL;
- arg.argv_step = def->has_val ? 2 : 1;
- } else if (def->long_name) {
- const size_t name_len = strlen(def->long_name);
-
- if (strlen(arg.argv[0]) >= name_len + 2 && arg.argv[0][1] == '-' &&
- !strncmp(arg.argv[0] + 2, def->long_name, name_len) &&
- (arg.argv[0][name_len + 2] == '=' ||
- arg.argv[0][name_len + 2] == '\0')) {
- arg.name = arg.argv[0] + 2;
- arg.val = arg.name[name_len] == '=' ? arg.name + name_len + 1 : NULL;
- arg.argv_step = 1;
- }
- }
-
- if (arg.name && !arg.val && def->has_val)
- die("Error: option %s requires argument.\n", arg.name);
-
- if (arg.name && arg.val && !def->has_val)
- die("Error: option %s requires no argument.\n", arg.name);
-
- if (arg.name && (arg.val || !def->has_val)) {
- arg.def = def;
- *arg_ = arg;
- return 1;
- }
-
- return 0;
-}
-
-const char *arg_next(struct arg *arg) {
- if (arg->argv[0]) arg->argv += arg->argv_step;
-
- return *arg->argv;
-}
-
-char **argv_dup(int argc, const char **argv) {
- char **new_argv = malloc((argc + 1) * sizeof(*argv));
-
- memcpy(new_argv, argv, argc * sizeof(*argv));
- new_argv[argc] = NULL;
- return new_argv;
-}
-
-void arg_show_usage(FILE *fp, const struct arg_def *const *defs) {
- char option_text[40] = { 0 };
-
- for (; *defs; defs++) {
- const struct arg_def *def = *defs;
- char *short_val = def->has_val ? " <arg>" : "";
- char *long_val = def->has_val ? "=<arg>" : "";
-
- if (def->short_name && def->long_name) {
- char *comma = def->has_val ? "," : ", ";
-
- snprintf(option_text, 37, "-%s%s%s --%s%6s", def->short_name, short_val,
- comma, def->long_name, long_val);
- } else if (def->short_name)
- snprintf(option_text, 37, "-%s%s", def->short_name, short_val);
- else if (def->long_name)
- snprintf(option_text, 37, " --%s%s", def->long_name, long_val);
-
- fprintf(fp, " %-37s\t%s\n", option_text, def->desc);
-
- if (def->enums) {
- const struct arg_enum_list *listptr;
-
- fprintf(fp, " %-37s\t ", "");
-
- for (listptr = def->enums; listptr->name; listptr++)
- fprintf(fp, "%s%s", listptr->name, listptr[1].name ? ", " : "\n");
- }
- }
-}
-
-unsigned int arg_parse_uint(const struct arg *arg) {
- char *endptr;
- const unsigned long rawval = strtoul(arg->val, &endptr, 10); // NOLINT
-
- if (arg->val[0] != '\0' && endptr[0] == '\0') {
- if (rawval <= UINT_MAX) return (unsigned int)rawval;
-
- die("Option %s: Value %lu out of range for unsigned int\n", arg->name,
- rawval);
- }
-
- die("Option %s: Invalid character '%c'\n", arg->name, *endptr);
- return 0;
-}
-
-int arg_parse_int(const struct arg *arg) {
- char *endptr;
- const long rawval = strtol(arg->val, &endptr, 10); // NOLINT
-
- if (arg->val[0] != '\0' && endptr[0] == '\0') {
- if (rawval >= INT_MIN && rawval <= INT_MAX) return (int)rawval;
-
- die("Option %s: Value %ld out of range for signed int\n", arg->name,
- rawval);
- }
-
- die("Option %s: Invalid character '%c'\n", arg->name, *endptr);
- return 0;
-}
-
-struct aom_rational {
- int num; /**< fraction numerator */
- int den; /**< fraction denominator */
-};
-struct aom_rational arg_parse_rational(const struct arg *arg) {
- long int rawval;
- char *endptr;
- struct aom_rational rat;
-
- /* parse numerator */
- rawval = strtol(arg->val, &endptr, 10);
-
- if (arg->val[0] != '\0' && endptr[0] == '/') {
- if (rawval >= INT_MIN && rawval <= INT_MAX)
- rat.num = (int)rawval;
- else
- die("Option %s: Value %ld out of range for signed int\n", arg->name,
- rawval);
- } else
- die("Option %s: Expected / at '%c'\n", arg->name, *endptr);
-
- /* parse denominator */
- rawval = strtol(endptr + 1, &endptr, 10);
-
- if (arg->val[0] != '\0' && endptr[0] == '\0') {
- if (rawval >= INT_MIN && rawval <= INT_MAX)
- rat.den = (int)rawval;
- else
- die("Option %s: Value %ld out of range for signed int\n", arg->name,
- rawval);
- } else
- die("Option %s: Invalid character '%c'\n", arg->name, *endptr);
-
- return rat;
-}
-
-int arg_parse_enum(const struct arg *arg) {
- const struct arg_enum_list *listptr;
- long int rawval;
- char *endptr;
-
- /* First see if the value can be parsed as a raw value */
- rawval = strtol(arg->val, &endptr, 10);
- if (arg->val[0] != '\0' && endptr[0] == '\0') {
- /* Got a raw value, make sure it's valid */
- for (listptr = arg->def->enums; listptr->name; listptr++)
- if (listptr->val == rawval) return (int)rawval;
- }
-
- /* Next see if it can be parsed as a string */
- for (listptr = arg->def->enums; listptr->name; listptr++)
- if (!strcmp(arg->val, listptr->name)) return listptr->val;
-
- die("Option %s: Invalid value '%s'\n", arg->name, arg->val);
- return 0;
-}
-
-int arg_parse_enum_or_int(const struct arg *arg) {
- if (arg->def->enums) return arg_parse_enum(arg);
- return arg_parse_int(arg);
-}
-
-// parse a comma separated list of at most n integers
-// return the number of elements in the list
-int arg_parse_list(const struct arg *arg, int *list, int n) {
- const char *ptr = arg->val;
- char *endptr;
- int i = 0;
-
- while (ptr[0] != '\0') {
- int32_t rawval = (int32_t)strtol(ptr, &endptr, 10);
- if (rawval < INT_MIN || rawval > INT_MAX) {
- die("Option %s: Value %ld out of range for signed int\n", arg->name,
- rawval);
- } else if (i >= n) {
- die("Option %s: List has more than %d entries\n", arg->name, n);
- } else if (*endptr == ',') {
- endptr++;
- } else if (*endptr != '\0') {
- die("Option %s: Bad list separator '%c'\n", arg->name, *endptr);
- }
- list[i++] = (int)rawval;
- ptr = endptr;
- }
- return i;
-}
diff --git a/third_party/aom/common/args.h b/third_party/aom/common/args.h
deleted file mode 100644
index 6a2664269..000000000
--- a/third_party/aom/common/args.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_COMMON_ARGS_H_
-#define AOM_COMMON_ARGS_H_
-#include <stdio.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct arg {
- char **argv;
- const char *name;
- const char *val;
- unsigned int argv_step;
- const struct arg_def *def;
-};
-
-struct arg_enum_list {
- const char *name;
- int val;
-};
-#define ARG_ENUM_LIST_END \
- { 0 }
-
-typedef struct arg_def {
- const char *short_name;
- const char *long_name;
- int has_val;
- const char *desc;
- const struct arg_enum_list *enums;
-} arg_def_t;
-#define ARG_DEF(s, l, v, d) \
- { s, l, v, d, NULL }
-#define ARG_DEF_ENUM(s, l, v, d, e) \
- { s, l, v, d, e }
-#define ARG_DEF_LIST_END \
- { 0 }
-
-struct arg arg_init(char **argv);
-int arg_match(struct arg *arg_, const struct arg_def *def, char **argv);
-char *ignore_front_spaces(const char *str);
-void ignore_end_spaces(char *str);
-int arg_cfg(int *argc, char ***argv, const char *file);
-const char *arg_next(struct arg *arg);
-void arg_show_usage(FILE *fp, const struct arg_def *const *defs);
-char **argv_dup(int argc, const char **argv);
-
-unsigned int arg_parse_uint(const struct arg *arg);
-int arg_parse_int(const struct arg *arg);
-struct aom_rational arg_parse_rational(const struct arg *arg);
-int arg_parse_enum(const struct arg *arg);
-int arg_parse_enum_or_int(const struct arg *arg);
-int arg_parse_list(const struct arg *arg, int *list, int n);
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_COMMON_ARGS_H_
diff --git a/third_party/aom/common/av1_config.c b/third_party/aom/common/av1_config.c
deleted file mode 100644
index e8decf76f..000000000
--- a/third_party/aom/common/av1_config.c
+++ /dev/null
@@ -1,511 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#include <stdio.h>
-#include <string.h>
-
-#include "aom/aom_image.h"
-#include "aom/aom_integer.h"
-#include "aom_dsp/bitreader_buffer.h"
-#include "aom_dsp/bitwriter_buffer.h"
-#include "av1/common/obu_util.h"
-#include "common/av1_config.h"
-#include "config/aom_config.h"
-
-// Helper macros to reduce verbosity required to check for read errors.
-//
-// Note that when using these macros, even single line if statements should use
-// curly braces to avoid unexpected behavior because all but the
-// AV1C_POP_ERROR_HANDLER_DATA() macro consist of multiple statements.
-#define AV1C_READ_BIT_OR_RETURN_ERROR(field) \
- int field = 0; \
- do { \
- field = aom_rb_read_bit(reader); \
- if (result == -1) { \
- fprintf(stderr, \
- "av1c: Error reading bit for " #field ", value=%d result=%d.\n", \
- field, result); \
- return -1; \
- } \
- } while (0)
-
-#define AV1C_READ_BITS_OR_RETURN_ERROR(field, length) \
- int field = 0; \
- do { \
- field = aom_rb_read_literal(reader, (length)); \
- if (result == -1) { \
- fprintf(stderr, \
- "av1c: Could not read bits for " #field \
- ", value=%d result=%d.\n", \
- field, result); \
- return -1; \
- } \
- } while (0)
-
-// Helper macros for setting/restoring the error handler data in
-// aom_read_bit_buffer.
-#define AV1C_PUSH_ERROR_HANDLER_DATA(new_data) \
- void *original_error_handler_data = NULL; \
- do { \
- original_error_handler_data = reader->error_handler_data; \
- reader->error_handler_data = &new_data; \
- } while (0)
-
-#define AV1C_POP_ERROR_HANDLER_DATA() \
- do { \
- reader->error_handler_data = original_error_handler_data; \
- } while (0)
-
-static const size_t kAv1cSize = 4;
-
-static void bitreader_error_handler(void *data) {
- int *error_val = (int *)data;
- *error_val = -1;
-}
-
-// Parse the AV1 timing_info() structure:
-// timing_info( ) {
-// num_units_in_display_tick f(32)
-// time_scale f(32)
-// equal_picture_interval f(1)
-// if (equal_picture_interval)
-// num_ticks_per_picture_minus_1 uvlc()
-// }
-static int parse_timing_info(struct aom_read_bit_buffer *reader) {
- int result = 0;
- AV1C_PUSH_ERROR_HANDLER_DATA(result);
-
- AV1C_READ_BITS_OR_RETURN_ERROR(num_units_in_display_tick, 32);
- AV1C_READ_BITS_OR_RETURN_ERROR(time_scale, 32);
-
- AV1C_READ_BIT_OR_RETURN_ERROR(equal_picture_interval);
- if (equal_picture_interval) {
- uint32_t num_ticks_per_picture_minus_1 = aom_rb_read_uvlc(reader);
- if (result == -1) {
- fprintf(stderr,
- "av1c: Could not read bits for "
- "num_ticks_per_picture_minus_1, value=%u.\n",
- num_ticks_per_picture_minus_1);
- return result;
- }
- }
-
- AV1C_POP_ERROR_HANDLER_DATA();
- return result;
-}
-
-// Parse the AV1 decoder_model_info() structure:
-// decoder_model_info( ) {
-// buffer_delay_length_minus_1 f(5)
-// num_units_in_decoding_tick f(32)
-// buffer_removal_time_length_minus_1 f(5)
-// frame_presentation_time_length_minus_1 f(5)
-// }
-//
-// Returns -1 upon failure, or the value of buffer_delay_length_minus_1 + 1.
-static int parse_decoder_model_info(struct aom_read_bit_buffer *reader) {
- int result = 0;
- AV1C_PUSH_ERROR_HANDLER_DATA(result);
-
- AV1C_READ_BITS_OR_RETURN_ERROR(buffer_delay_length_minus_1, 5);
- AV1C_READ_BITS_OR_RETURN_ERROR(num_units_in_decoding_tick, 32);
- AV1C_READ_BITS_OR_RETURN_ERROR(buffer_removal_time_length_minus_1, 5);
- AV1C_READ_BITS_OR_RETURN_ERROR(frame_presentation_time_length_minus_1, 5);
-
- AV1C_POP_ERROR_HANDLER_DATA();
- return buffer_delay_length_minus_1 + 1;
-}
-
-// Parse the AV1 operating_parameters_info() structure:
-// operating_parameters_info( op ) {
-// n = buffer_delay_length_minus_1 + 1
-// decoder_buffer_delay[ op ] f(n)
-// encoder_buffer_delay[ op ] f(n)
-// low_delay_mode_flag[ op ] f(1)
-// }
-static int parse_operating_parameters_info(struct aom_read_bit_buffer *reader,
- int buffer_delay_length_minus_1) {
- int result = 0;
- AV1C_PUSH_ERROR_HANDLER_DATA(result);
-
- const int buffer_delay_length = buffer_delay_length_minus_1 + 1;
- AV1C_READ_BITS_OR_RETURN_ERROR(decoder_buffer_delay, buffer_delay_length);
- AV1C_READ_BITS_OR_RETURN_ERROR(encoder_buffer_delay, buffer_delay_length);
- AV1C_READ_BIT_OR_RETURN_ERROR(low_delay_mode_flag);
-
- AV1C_POP_ERROR_HANDLER_DATA();
- return result;
-}
-
-// Parse the AV1 color_config() structure..See:
-// https://aomediacodec.github.io/av1-spec/av1-spec.pdf#page=44
-static int parse_color_config(struct aom_read_bit_buffer *reader,
- Av1Config *config) {
- int result = 0;
- AV1C_PUSH_ERROR_HANDLER_DATA(result);
-
- AV1C_READ_BIT_OR_RETURN_ERROR(high_bitdepth);
- config->high_bitdepth = high_bitdepth;
-
- int bit_depth = 0;
- if (config->seq_profile == 2 && config->high_bitdepth) {
- AV1C_READ_BIT_OR_RETURN_ERROR(twelve_bit);
- config->twelve_bit = twelve_bit;
- bit_depth = config->twelve_bit ? 12 : 10;
- } else {
- bit_depth = config->high_bitdepth ? 10 : 8;
- }
-
- if (config->seq_profile != 1) {
- AV1C_READ_BIT_OR_RETURN_ERROR(mono_chrome);
- config->monochrome = mono_chrome;
- }
-
- int color_primaries = AOM_CICP_CP_UNSPECIFIED;
- int transfer_characteristics = AOM_CICP_TC_UNSPECIFIED;
- int matrix_coefficients = AOM_CICP_MC_UNSPECIFIED;
-
- AV1C_READ_BIT_OR_RETURN_ERROR(color_description_present_flag);
- if (color_description_present_flag) {
- AV1C_READ_BITS_OR_RETURN_ERROR(color_primaries_val, 8);
- color_primaries = color_primaries_val;
- AV1C_READ_BITS_OR_RETURN_ERROR(transfer_characteristics_val, 8);
- transfer_characteristics = transfer_characteristics_val;
- AV1C_READ_BITS_OR_RETURN_ERROR(matrix_coefficients_val, 8);
- matrix_coefficients = matrix_coefficients_val;
- }
-
- if (config->monochrome) {
- AV1C_READ_BIT_OR_RETURN_ERROR(color_range);
- config->chroma_subsampling_x = 1;
- config->chroma_subsampling_y = 1;
- } else if (color_primaries == AOM_CICP_CP_BT_709 &&
- transfer_characteristics == AOM_CICP_TC_SRGB &&
- matrix_coefficients == AOM_CICP_MC_IDENTITY) {
- config->chroma_subsampling_x = 0;
- config->chroma_subsampling_y = 0;
- } else {
- AV1C_READ_BIT_OR_RETURN_ERROR(color_range);
- if (config->seq_profile == 0) {
- config->chroma_subsampling_x = 1;
- config->chroma_subsampling_y = 1;
- } else if (config->seq_profile == 1) {
- config->chroma_subsampling_x = 0;
- config->chroma_subsampling_y = 0;
- } else {
- if (bit_depth == 12) {
- AV1C_READ_BIT_OR_RETURN_ERROR(subsampling_x);
- config->chroma_subsampling_x = subsampling_x;
- if (subsampling_x) {
- AV1C_READ_BIT_OR_RETURN_ERROR(subsampling_y);
- config->chroma_subsampling_y = subsampling_y;
- } else {
- config->chroma_subsampling_y = 0;
- }
- } else {
- config->chroma_subsampling_x = 1;
- config->chroma_subsampling_y = 0;
- }
- }
-
- if (config->chroma_subsampling_x && config->chroma_subsampling_y) {
- AV1C_READ_BITS_OR_RETURN_ERROR(chroma_sample_position, 2);
- config->chroma_sample_position = chroma_sample_position;
- }
- }
-
- if (!config->monochrome) {
- AV1C_READ_BIT_OR_RETURN_ERROR(separate_uv_delta_q);
- }
-
- AV1C_POP_ERROR_HANDLER_DATA();
- return result;
-}
-
-// Parse AV1 Sequence Header OBU. See:
-// https://aomediacodec.github.io/av1-spec/av1-spec.pdf#page=41
-static int parse_sequence_header(const uint8_t *const buffer, size_t length,
- Av1Config *config) {
- int result = 0;
- // The reader instance is local to this function, but a pointer to the
- // reader instance is used within this function and throughout this file to
- // allow use of the helper macros that reduce parse error checking verbosity.
- struct aom_read_bit_buffer reader_instance = {
- buffer, buffer + length, 0, &result, bitreader_error_handler
- };
- struct aom_read_bit_buffer *reader = &reader_instance;
-
- AV1C_READ_BITS_OR_RETURN_ERROR(seq_profile, 3);
- config->seq_profile = seq_profile;
- AV1C_READ_BIT_OR_RETURN_ERROR(still_picture);
- AV1C_READ_BIT_OR_RETURN_ERROR(reduced_still_picture_header);
- if (reduced_still_picture_header) {
- config->initial_presentation_delay_present = 0;
- AV1C_READ_BITS_OR_RETURN_ERROR(seq_level_idx_0, 5);
- config->seq_level_idx_0 = seq_level_idx_0;
- config->seq_tier_0 = 0;
- } else {
- int has_decoder_model = 0;
- int buffer_delay_length = 0;
-
- AV1C_READ_BIT_OR_RETURN_ERROR(timing_info_present_flag);
- if (timing_info_present_flag) {
- if (parse_timing_info(reader) != 0) return -1;
-
- AV1C_READ_BIT_OR_RETURN_ERROR(decoder_model_info_present_flag);
- if (decoder_model_info_present_flag &&
- (buffer_delay_length = parse_decoder_model_info(reader)) == -1) {
- return -1;
- }
- has_decoder_model = 1;
- }
-
- AV1C_READ_BIT_OR_RETURN_ERROR(initial_presentation_delay_present);
- config->initial_presentation_delay_present =
- initial_presentation_delay_present;
-
- AV1C_READ_BITS_OR_RETURN_ERROR(operating_points_cnt_minus_1, 5);
- const int num_operating_points = operating_points_cnt_minus_1 + 1;
-
- for (int op_index = 0; op_index < num_operating_points; ++op_index) {
- AV1C_READ_BITS_OR_RETURN_ERROR(operating_point_idc, 12);
- AV1C_READ_BITS_OR_RETURN_ERROR(seq_level_idx, 5);
-
- int seq_tier = 0;
- if (seq_level_idx > 7) {
- AV1C_READ_BIT_OR_RETURN_ERROR(seq_tier_this_op);
- seq_tier = seq_tier_this_op;
- }
-
- if (has_decoder_model) {
- AV1C_READ_BIT_OR_RETURN_ERROR(decoder_model_present_for_op);
- if (decoder_model_present_for_op) {
- if (parse_operating_parameters_info(reader, buffer_delay_length) ==
- -1) {
- return -1;
- }
- }
- }
-
- if (config->initial_presentation_delay_present) {
- // Skip the initial presentation delay bits if present since this
- // function has no access to the data required to properly set the
- // field.
- AV1C_READ_BIT_OR_RETURN_ERROR(
- initial_presentation_delay_present_for_this_op);
- if (initial_presentation_delay_present_for_this_op) {
- AV1C_READ_BITS_OR_RETURN_ERROR(initial_presentation_delay_minus_1, 4);
- }
- }
-
- if (op_index == 0) {
- // Av1Config needs only the values from the first operating point.
- config->seq_level_idx_0 = seq_level_idx;
- config->seq_tier_0 = seq_tier;
- config->initial_presentation_delay_present = 0;
- config->initial_presentation_delay_minus_one = 0;
- }
- }
- }
-
- AV1C_READ_BITS_OR_RETURN_ERROR(frame_width_bits_minus_1, 4);
- AV1C_READ_BITS_OR_RETURN_ERROR(frame_height_bits_minus_1, 4);
- AV1C_READ_BITS_OR_RETURN_ERROR(max_frame_width_minus_1,
- frame_width_bits_minus_1 + 1);
- AV1C_READ_BITS_OR_RETURN_ERROR(max_frame_height_minus_1,
- frame_height_bits_minus_1 + 1);
-
- int frame_id_numbers_present = 0;
- if (!reduced_still_picture_header) {
- AV1C_READ_BIT_OR_RETURN_ERROR(frame_id_numbers_present_flag);
- frame_id_numbers_present = frame_id_numbers_present_flag;
- }
-
- if (frame_id_numbers_present) {
- AV1C_READ_BITS_OR_RETURN_ERROR(delta_frame_id_length_minus_2, 4);
- AV1C_READ_BITS_OR_RETURN_ERROR(additional_frame_id_length_minus_1, 3);
- }
-
- AV1C_READ_BIT_OR_RETURN_ERROR(use_128x128_superblock);
- AV1C_READ_BIT_OR_RETURN_ERROR(enable_filter_intra);
- AV1C_READ_BIT_OR_RETURN_ERROR(enable_intra_edge_filter);
-
- if (!reduced_still_picture_header) {
- AV1C_READ_BIT_OR_RETURN_ERROR(enable_interintra_compound);
- AV1C_READ_BIT_OR_RETURN_ERROR(enable_masked_compound);
- AV1C_READ_BIT_OR_RETURN_ERROR(enable_warped_motion);
- AV1C_READ_BIT_OR_RETURN_ERROR(enable_dual_filter);
-
- AV1C_READ_BIT_OR_RETURN_ERROR(enable_order_hint);
- if (enable_order_hint) {
- AV1C_READ_BIT_OR_RETURN_ERROR(enable_jnt_comp);
- AV1C_READ_BIT_OR_RETURN_ERROR(enable_ref_frame_mvs);
- }
-
- const int SELECT_SCREEN_CONTENT_TOOLS = 2;
- int seq_force_screen_content_tools = SELECT_SCREEN_CONTENT_TOOLS;
- AV1C_READ_BIT_OR_RETURN_ERROR(seq_choose_screen_content_tools);
- if (!seq_choose_screen_content_tools) {
- AV1C_READ_BIT_OR_RETURN_ERROR(seq_force_screen_content_tools_val);
- seq_force_screen_content_tools = seq_force_screen_content_tools_val;
- }
-
- if (seq_force_screen_content_tools > 0) {
- AV1C_READ_BIT_OR_RETURN_ERROR(seq_choose_integer_mv);
-
- if (!seq_choose_integer_mv) {
- AV1C_READ_BIT_OR_RETURN_ERROR(seq_force_integer_mv);
- }
- }
-
- if (enable_order_hint) {
- AV1C_READ_BITS_OR_RETURN_ERROR(order_hint_bits_minus_1, 3);
- }
- }
-
- AV1C_READ_BIT_OR_RETURN_ERROR(enable_superres);
- AV1C_READ_BIT_OR_RETURN_ERROR(enable_cdef);
- AV1C_READ_BIT_OR_RETURN_ERROR(enable_restoration);
-
- if (parse_color_config(reader, config) != 0) {
- fprintf(stderr, "av1c: color_config() parse failed.\n");
- return -1;
- }
-
- AV1C_READ_BIT_OR_RETURN_ERROR(film_grain_params_present);
- return 0;
-}
-
-int get_av1config_from_obu(const uint8_t *buffer, size_t length, int is_annexb,
- Av1Config *config) {
- if (!buffer || length == 0 || !config) {
- return -1;
- }
-
- ObuHeader obu_header;
- memset(&obu_header, 0, sizeof(obu_header));
-
- size_t sequence_header_length = 0;
- size_t obu_header_length = 0;
- if (aom_read_obu_header_and_size(buffer, length, is_annexb, &obu_header,
- &sequence_header_length,
- &obu_header_length) != AOM_CODEC_OK ||
- obu_header.type != OBU_SEQUENCE_HEADER ||
- sequence_header_length + obu_header_length > length) {
- return -1;
- }
-
- memset(config, 0, sizeof(*config));
- config->marker = 1;
- config->version = 1;
- return parse_sequence_header(buffer + obu_header_length,
- sequence_header_length, config);
-}
-
-int read_av1config(const uint8_t *buffer, size_t buffer_length,
- size_t *bytes_read, Av1Config *config) {
- if (!buffer || buffer_length < kAv1cSize || !bytes_read || !config) return -1;
-
- *bytes_read = 0;
-
- int result = 0;
- struct aom_read_bit_buffer reader_instance = {
- buffer, buffer + buffer_length, 0, &result, bitreader_error_handler
- };
- struct aom_read_bit_buffer *reader = &reader_instance;
-
- memset(config, 0, sizeof(*config));
-
- AV1C_READ_BIT_OR_RETURN_ERROR(marker);
- config->marker = marker;
-
- AV1C_READ_BITS_OR_RETURN_ERROR(version, 7);
- config->version = version;
-
- AV1C_READ_BITS_OR_RETURN_ERROR(seq_profile, 3);
- config->seq_profile = seq_profile;
-
- AV1C_READ_BITS_OR_RETURN_ERROR(seq_level_idx_0, 5);
- config->seq_level_idx_0 = seq_level_idx_0;
-
- AV1C_READ_BIT_OR_RETURN_ERROR(seq_tier_0);
- config->seq_tier_0 = seq_tier_0;
-
- AV1C_READ_BIT_OR_RETURN_ERROR(high_bitdepth);
- config->high_bitdepth = high_bitdepth;
-
- AV1C_READ_BIT_OR_RETURN_ERROR(twelve_bit);
- config->twelve_bit = twelve_bit;
-
- AV1C_READ_BIT_OR_RETURN_ERROR(monochrome);
- config->monochrome = monochrome;
-
- AV1C_READ_BIT_OR_RETURN_ERROR(chroma_subsampling_x);
- config->chroma_subsampling_x = chroma_subsampling_x;
-
- AV1C_READ_BIT_OR_RETURN_ERROR(chroma_subsampling_y);
- config->chroma_subsampling_y = chroma_subsampling_y;
-
- AV1C_READ_BITS_OR_RETURN_ERROR(chroma_sample_position, 2);
- config->chroma_sample_position = chroma_sample_position;
-
- AV1C_READ_BITS_OR_RETURN_ERROR(reserved, 3);
-
- AV1C_READ_BIT_OR_RETURN_ERROR(initial_presentation_delay_present);
- config->initial_presentation_delay_present =
- initial_presentation_delay_present;
-
- AV1C_READ_BITS_OR_RETURN_ERROR(initial_presentation_delay_minus_one, 4);
- config->initial_presentation_delay_minus_one =
- initial_presentation_delay_minus_one;
-
- *bytes_read = aom_rb_bytes_read(reader);
-
- return 0;
-}
-
-int write_av1config(const Av1Config *config, size_t capacity,
- size_t *bytes_written, uint8_t *buffer) {
- if (!config || !buffer || capacity < kAv1cSize || !bytes_written) return -1;
-
- *bytes_written = 0;
- memset(buffer, 0, kAv1cSize);
-
- struct aom_write_bit_buffer writer = { buffer, 0 };
-
- aom_wb_write_bit(&writer, config->marker);
- aom_wb_write_literal(&writer, config->version, 7);
- aom_wb_write_literal(&writer, config->seq_profile, 3);
- aom_wb_write_literal(&writer, config->seq_level_idx_0, 5);
- aom_wb_write_bit(&writer, config->seq_tier_0);
- aom_wb_write_bit(&writer, config->high_bitdepth);
- aom_wb_write_bit(&writer, config->twelve_bit);
- aom_wb_write_bit(&writer, config->monochrome);
- aom_wb_write_bit(&writer, config->chroma_subsampling_x);
- aom_wb_write_bit(&writer, config->chroma_subsampling_y);
- aom_wb_write_literal(&writer, config->chroma_sample_position, 2);
- aom_wb_write_literal(&writer, 0, 3); // reserved
- aom_wb_write_bit(&writer, config->initial_presentation_delay_present);
-
- if (config->initial_presentation_delay_present) {
- aom_wb_write_literal(&writer, config->initial_presentation_delay_minus_one,
- 4);
- } else {
- aom_wb_write_literal(&writer, 0, 4); // reserved
- }
-
- *bytes_written = aom_wb_bytes_written(&writer);
- return 0;
-}
-
-#undef AV1C_READ_BIT_OR_RETURN_ERROR
-#undef AV1C_READ_BITS_OR_RETURN_ERROR
-#undef AV1C_PUSH_ERROR_HANDLER_DATA
-#undef AV1C_POP_ERROR_HANDLER_DATA
diff --git a/third_party/aom/common/av1_config.h b/third_party/aom/common/av1_config.h
deleted file mode 100644
index a15bedb30..000000000
--- a/third_party/aom/common/av1_config.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#ifndef AOM_COMMON_AV1_CONFIG_H_
-#define AOM_COMMON_AV1_CONFIG_H_
-
-#include "aom/aom_integer.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// Struct representing ISOBMFF/Matroska AV1 config. See:
-// https://aomediacodec.github.io/av1-isobmff/#av1codecconfigurationbox-syntax
-//
-// The AV1 config has the following format:
-//
-// unsigned int (1) marker = 1;
-// unsigned int (7) version = 1;
-// unsigned int (3) seq_profile;
-// unsigned int (5) seq_level_idx_0;
-// unsigned int (1) seq_tier_0;
-// unsigned int (1) high_bitdepth;
-// unsigned int (1) twelve_bit;
-// unsigned int (1) monochrome;
-// unsigned int (1) chroma_subsampling_x;
-// unsigned int (1) chroma_subsampling_y;
-// unsigned int (2) chroma_sample_position;
-// unsigned int (3) reserved = 0;
-//
-// unsigned int (1) initial_presentation_delay_present;
-// if (initial_presentation_delay_present) {
-// unsigned int (4) initial_presentation_delay_minus_one;
-// } else {
-// unsigned int (4) reserved = 0;
-// }
-//
-// unsigned int (8)[] configOBUs;
-//
-// Note: get_av1config_from_obu() does not currently store 'configOBUs' data, so
-// the field is omitted.
-typedef struct _Av1Config {
- uint8_t marker;
- uint8_t version;
- uint8_t seq_profile;
- uint8_t seq_level_idx_0;
- uint8_t seq_tier_0;
- uint8_t high_bitdepth;
- uint8_t twelve_bit;
- uint8_t monochrome;
- uint8_t chroma_subsampling_x;
- uint8_t chroma_subsampling_y;
- uint8_t chroma_sample_position;
- uint8_t initial_presentation_delay_present;
- uint8_t initial_presentation_delay_minus_one;
-} Av1Config;
-
-// Attempts to parse a Sequence Header OBU and set the paramenters of 'config'.
-// Returns 0 upon success, and -1 upon failure. 'buffer' can contain multiple
-// OBUs, but the Sequence Header OBU must be the first OBU within the buffer.
-int get_av1config_from_obu(const uint8_t *buffer, size_t length, int is_annexb,
- Av1Config *config);
-
-// Attempts to parse an AV1 config from 'buffer'. Returns 0 upon success.
-// Returns -1 when 'buffer_length' is less than 4, when passed NULL pointers, or
-// when parsing of 'buffer' fails.
-int read_av1config(const uint8_t *buffer, size_t buffer_length,
- size_t *bytes_read, Av1Config *config);
-
-// Writes 'config' to 'buffer'. Returns 0 upon successful write to 'buffer'.
-// Returns -1 when passed NULL pointers or when 'capacity' insufficient.
-int write_av1config(const Av1Config *config, size_t capacity,
- size_t *bytes_written, uint8_t *buffer);
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif // AOM_COMMON_AV1_CONFIG_H_
diff --git a/third_party/aom/common/ivfdec.c b/third_party/aom/common/ivfdec.c
deleted file mode 100644
index 80d73b04c..000000000
--- a/third_party/aom/common/ivfdec.c
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "common/ivfdec.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "aom_ports/mem_ops.h"
-#include "aom_ports/sanitizer.h"
-
-static const char *IVF_SIGNATURE = "DKIF";
-
-static void fix_framerate(int *num, int *den) {
- if (*den <= 0 || *den >= 1000000000 || *num <= 0 || *num >= 1000) {
- // framerate seems to be invalid, just default to 30fps.
- *num = 30;
- *den = 1;
- }
-}
-
-int file_is_ivf(struct AvxInputContext *input_ctx) {
- char raw_hdr[32];
- int is_ivf = 0;
-
- if (fread(raw_hdr, 1, 32, input_ctx->file) == 32) {
- if (memcmp(IVF_SIGNATURE, raw_hdr, 4) == 0) {
- is_ivf = 1;
-
- if (mem_get_le16(raw_hdr + 4) != 0) {
- fprintf(stderr,
- "Error: Unrecognized IVF version! This file may not"
- " decode properly.");
- }
-
- input_ctx->fourcc = mem_get_le32(raw_hdr + 8);
- input_ctx->width = mem_get_le16(raw_hdr + 12);
- input_ctx->height = mem_get_le16(raw_hdr + 14);
- input_ctx->framerate.numerator = mem_get_le32(raw_hdr + 16);
- input_ctx->framerate.denominator = mem_get_le32(raw_hdr + 20);
- fix_framerate(&input_ctx->framerate.numerator,
- &input_ctx->framerate.denominator);
- }
- }
-
- if (!is_ivf) {
- rewind(input_ctx->file);
- input_ctx->detect.buf_read = 0;
- } else {
- input_ctx->detect.position = 4;
- }
- return is_ivf;
-}
-
-int ivf_read_frame(FILE *infile, uint8_t **buffer, size_t *bytes_read,
- size_t *buffer_size, aom_codec_pts_t *pts) {
- char raw_header[IVF_FRAME_HDR_SZ] = { 0 };
- size_t frame_size = 0;
-
- if (fread(raw_header, IVF_FRAME_HDR_SZ, 1, infile) != 1) {
- if (!feof(infile)) warn("Failed to read frame size");
- } else {
- frame_size = mem_get_le32(raw_header);
-
- if (frame_size > 256 * 1024 * 1024) {
- warn("Read invalid frame size (%u)", (unsigned int)frame_size);
- frame_size = 0;
- }
-
- if (frame_size > *buffer_size) {
- uint8_t *new_buffer = (uint8_t *)realloc(*buffer, 2 * frame_size);
-
- if (new_buffer) {
- *buffer = new_buffer;
- *buffer_size = 2 * frame_size;
- } else {
- warn("Failed to allocate compressed data buffer");
- frame_size = 0;
- }
- }
-
- if (pts) {
- *pts = mem_get_le32(&raw_header[4]);
- *pts += ((aom_codec_pts_t)mem_get_le32(&raw_header[8]) << 32);
- }
- }
-
- if (!feof(infile)) {
- ASAN_UNPOISON_MEMORY_REGION(*buffer, *buffer_size);
- if (fread(*buffer, 1, frame_size, infile) != frame_size) {
- warn("Failed to read full frame");
- return 1;
- }
-
- ASAN_POISON_MEMORY_REGION(*buffer + frame_size, *buffer_size - frame_size);
- *bytes_read = frame_size;
- return 0;
- }
-
- return 1;
-}
diff --git a/third_party/aom/common/ivfdec.h b/third_party/aom/common/ivfdec.h
deleted file mode 100644
index ea294faa1..000000000
--- a/third_party/aom/common/ivfdec.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#ifndef AOM_COMMON_IVFDEC_H_
-#define AOM_COMMON_IVFDEC_H_
-
-#include "common/tools_common.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-int file_is_ivf(struct AvxInputContext *input);
-
-typedef int64_t aom_codec_pts_t;
-int ivf_read_frame(FILE *infile, uint8_t **buffer, size_t *bytes_read,
- size_t *buffer_size, aom_codec_pts_t *pts);
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif // AOM_COMMON_IVFDEC_H_
diff --git a/third_party/aom/common/ivfenc.c b/third_party/aom/common/ivfenc.c
deleted file mode 100644
index 64715f4d7..000000000
--- a/third_party/aom/common/ivfenc.c
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "common/ivfenc.h"
-
-#include "aom/aom_encoder.h"
-#include "aom_ports/mem_ops.h"
-
-void ivf_write_file_header(FILE *outfile, const struct aom_codec_enc_cfg *cfg,
- unsigned int fourcc, int frame_cnt) {
- char header[32];
-
- header[0] = 'D';
- header[1] = 'K';
- header[2] = 'I';
- header[3] = 'F';
- mem_put_le16(header + 4, 0); // version
- mem_put_le16(header + 6, 32); // header size
- mem_put_le32(header + 8, fourcc); // fourcc
- mem_put_le16(header + 12, cfg->g_w); // width
- mem_put_le16(header + 14, cfg->g_h); // height
- mem_put_le32(header + 16, cfg->g_timebase.den); // rate
- mem_put_le32(header + 20, cfg->g_timebase.num); // scale
- mem_put_le32(header + 24, frame_cnt); // length
- mem_put_le32(header + 28, 0); // unused
-
- fwrite(header, 1, 32, outfile);
-}
-
-void ivf_write_frame_header(FILE *outfile, int64_t pts, size_t frame_size) {
- char header[12];
-
- mem_put_le32(header, (int)frame_size);
- mem_put_le32(header + 4, (int)(pts & 0xFFFFFFFF));
- mem_put_le32(header + 8, (int)(pts >> 32));
- fwrite(header, 1, 12, outfile);
-}
-
-void ivf_write_frame_size(FILE *outfile, size_t frame_size) {
- char header[4];
-
- mem_put_le32(header, (int)frame_size);
- fwrite(header, 1, 4, outfile);
-}
diff --git a/third_party/aom/common/ivfenc.h b/third_party/aom/common/ivfenc.h
deleted file mode 100644
index 8f6d947d4..000000000
--- a/third_party/aom/common/ivfenc.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#ifndef AOM_COMMON_IVFENC_H_
-#define AOM_COMMON_IVFENC_H_
-
-#include "common/tools_common.h"
-
-struct aom_codec_enc_cfg;
-struct aom_codec_cx_pkt;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void ivf_write_file_header(FILE *outfile, const struct aom_codec_enc_cfg *cfg,
- uint32_t fourcc, int frame_cnt);
-
-void ivf_write_frame_header(FILE *outfile, int64_t pts, size_t frame_size);
-
-void ivf_write_frame_size(FILE *outfile, size_t frame_size);
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif // AOM_COMMON_IVFENC_H_
diff --git a/third_party/aom/common/md5_utils.c b/third_party/aom/common/md5_utils.c
deleted file mode 100644
index b69e1cc72..000000000
--- a/third_party/aom/common/md5_utils.c
+++ /dev/null
@@ -1,249 +0,0 @@
-/*
- * This code implements the MD5 message-digest algorithm.
- * The algorithm is due to Ron Rivest. This code was
- * written by Colin Plumb in 1993, no copyright is claimed.
- * This code is in the public domain; do with it what you wish.
- *
- * Equivalent code is available from RSA Data Security, Inc.
- * This code has been tested against that, and is equivalent,
- * except that you don't need to include two pages of legalese
- * with every copy.
- *
- * To compute the message digest of a chunk of bytes, declare an
- * MD5Context structure, pass it to MD5Init, call MD5Update as
- * needed on buffers full of bytes, and then call MD5Final, which
- * will fill a supplied 16-byte array with the digest.
- *
- * Changed so as no longer to depend on Colin Plumb's `usual.h' header
- * definitions
- * - Ian Jackson <ian@chiark.greenend.org.uk>.
- * Still in the public domain.
- */
-
-#include <string.h> /* for memcpy() */
-
-#include "common/md5_utils.h"
-
-static void byteSwap(UWORD32 *buf, unsigned words) {
- md5byte *p;
-
- /* Only swap bytes for big endian machines */
- int i = 1;
-
- if (*(char *)&i == 1) return;
-
- p = (md5byte *)buf;
-
- do {
- *buf++ = (UWORD32)((unsigned)p[3] << 8 | p[2]) << 16 |
- ((unsigned)p[1] << 8 | p[0]);
- p += 4;
- } while (--words);
-}
-
-/*
- * Start MD5 accumulation. Set bit count to 0 and buffer to mysterious
- * initialization constants.
- */
-void MD5Init(struct MD5Context *ctx) {
- ctx->buf[0] = 0x67452301;
- ctx->buf[1] = 0xefcdab89;
- ctx->buf[2] = 0x98badcfe;
- ctx->buf[3] = 0x10325476;
-
- ctx->bytes[0] = 0;
- ctx->bytes[1] = 0;
-}
-
-/*
- * Update context to reflect the concatenation of another buffer full
- * of bytes.
- */
-void MD5Update(struct MD5Context *ctx, md5byte const *buf, unsigned len) {
- UWORD32 t;
-
- /* Update byte count */
-
- t = ctx->bytes[0];
-
- if ((ctx->bytes[0] = t + len) < t)
- ctx->bytes[1]++; /* Carry from low to high */
-
- t = 64 - (t & 0x3f); /* Space available in ctx->in (at least 1) */
-
- if (t > len) {
- memcpy((md5byte *)ctx->in + 64 - t, buf, len);
- return;
- }
-
- /* First chunk is an odd size */
- memcpy((md5byte *)ctx->in + 64 - t, buf, t);
- byteSwap(ctx->in, 16);
- MD5Transform(ctx->buf, ctx->in);
- buf += t;
- len -= t;
-
- /* Process data in 64-byte chunks */
- while (len >= 64) {
- memcpy(ctx->in, buf, 64);
- byteSwap(ctx->in, 16);
- MD5Transform(ctx->buf, ctx->in);
- buf += 64;
- len -= 64;
- }
-
- /* Handle any remaining bytes of data. */
- memcpy(ctx->in, buf, len);
-}
-
-/*
- * Final wrapup - pad to 64-byte boundary with the bit pattern
- * 1 0* (64-bit count of bits processed, MSB-first)
- */
-void MD5Final(md5byte digest[16], struct MD5Context *ctx) {
- int count = ctx->bytes[0] & 0x3f; /* Number of bytes in ctx->in */
- md5byte *p = (md5byte *)ctx->in + count;
-
- /* Set the first char of padding to 0x80. There is always room. */
- *p++ = 0x80;
-
- /* Bytes of padding needed to make 56 bytes (-8..55) */
- count = 56 - 1 - count;
-
- if (count < 0) { /* Padding forces an extra block */
- memset(p, 0, count + 8);
- byteSwap(ctx->in, 16);
- MD5Transform(ctx->buf, ctx->in);
- p = (md5byte *)ctx->in;
- count = 56;
- }
-
- memset(p, 0, count);
- byteSwap(ctx->in, 14);
-
- /* Append length in bits and transform */
- ctx->in[14] = ctx->bytes[0] << 3;
- ctx->in[15] = ctx->bytes[1] << 3 | ctx->bytes[0] >> 29;
- MD5Transform(ctx->buf, ctx->in);
-
- byteSwap(ctx->buf, 4);
- memcpy(digest, ctx->buf, 16);
- memset(ctx, 0, sizeof(*ctx)); /* In case it's sensitive */
-}
-
-#ifndef ASM_MD5
-
-/* The four core functions - F1 is optimized somewhat */
-
-/* #define F1(x, y, z) (x & y | ~x & z) */
-#define F1(x, y, z) (z ^ (x & (y ^ z)))
-#define F2(x, y, z) F1(z, x, y)
-#define F3(x, y, z) (x ^ y ^ z)
-#define F4(x, y, z) (y ^ (x | ~z))
-
-/* This is the central step in the MD5 algorithm. */
-#define MD5STEP(f, w, x, y, z, in, s) \
- (w += f(x, y, z) + in, w = (w << s | w >> (32 - s)) + x)
-
-#if defined(__clang__) && defined(__has_attribute)
-#if __has_attribute(no_sanitize)
-#define AOM_NO_UNSIGNED_OVERFLOW_CHECK \
- __attribute__((no_sanitize("unsigned-integer-overflow")))
-#endif
-#endif
-
-#ifndef AOM_NO_UNSIGNED_OVERFLOW_CHECK
-#define AOM_NO_UNSIGNED_OVERFLOW_CHECK
-#endif
-
-/*
- * The core of the MD5 algorithm, this alters an existing MD5 hash to
- * reflect the addition of 16 longwords of new data. MD5Update blocks
- * the data and converts bytes into longwords for this routine.
- */
-AOM_NO_UNSIGNED_OVERFLOW_CHECK void MD5Transform(UWORD32 buf[4],
- UWORD32 const in[16]) {
- register UWORD32 a, b, c, d;
-
- a = buf[0];
- b = buf[1];
- c = buf[2];
- d = buf[3];
-
- MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7);
- MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12);
- MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17);
- MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22);
- MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7);
- MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12);
- MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17);
- MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22);
- MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7);
- MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12);
- MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17);
- MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22);
- MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7);
- MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12);
- MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17);
- MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22);
-
- MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5);
- MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9);
- MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14);
- MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20);
- MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5);
- MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9);
- MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14);
- MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20);
- MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5);
- MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9);
- MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14);
- MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20);
- MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5);
- MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9);
- MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14);
- MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20);
-
- MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4);
- MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11);
- MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16);
- MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23);
- MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4);
- MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11);
- MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16);
- MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23);
- MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4);
- MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11);
- MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16);
- MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23);
- MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4);
- MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11);
- MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16);
- MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23);
-
- MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6);
- MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10);
- MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15);
- MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21);
- MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6);
- MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10);
- MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15);
- MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21);
- MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6);
- MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10);
- MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15);
- MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21);
- MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6);
- MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10);
- MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15);
- MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21);
-
- buf[0] += a;
- buf[1] += b;
- buf[2] += c;
- buf[3] += d;
-}
-
-#undef AOM_NO_UNSIGNED_OVERFLOW_CHECK
-
-#endif
diff --git a/third_party/aom/common/md5_utils.h b/third_party/aom/common/md5_utils.h
deleted file mode 100644
index 144fa3ad2..000000000
--- a/third_party/aom/common/md5_utils.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * This is the header file for the MD5 message-digest algorithm.
- * The algorithm is due to Ron Rivest. This code was
- * written by Colin Plumb in 1993, no copyright is claimed.
- * This code is in the public domain; do with it what you wish.
- *
- * Equivalent code is available from RSA Data Security, Inc.
- * This code has been tested against that, and is equivalent,
- * except that you don't need to include two pages of legalese
- * with every copy.
- *
- * To compute the message digest of a chunk of bytes, declare an
- * MD5Context structure, pass it to MD5Init, call MD5Update as
- * needed on buffers full of bytes, and then call MD5Final, which
- * will fill a supplied 16-byte array with the digest.
- *
- * Changed so as no longer to depend on Colin Plumb's `usual.h'
- * header definitions
- * - Ian Jackson <ian@chiark.greenend.org.uk>.
- * Still in the public domain.
- */
-
-#ifndef AOM_COMMON_MD5_UTILS_H_
-#define AOM_COMMON_MD5_UTILS_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define md5byte unsigned char
-#define UWORD32 unsigned int
-
-typedef struct MD5Context MD5Context;
-struct MD5Context {
- UWORD32 buf[4];
- UWORD32 bytes[2];
- UWORD32 in[16];
-};
-
-void MD5Init(struct MD5Context *context);
-void MD5Update(struct MD5Context *context, md5byte const *buf, unsigned len);
-void MD5Final(unsigned char digest[16], struct MD5Context *context);
-void MD5Transform(UWORD32 buf[4], UWORD32 const in[16]);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_COMMON_MD5_UTILS_H_
diff --git a/third_party/aom/common/obudec.c b/third_party/aom/common/obudec.c
deleted file mode 100644
index acbd12e0c..000000000
--- a/third_party/aom/common/obudec.c
+++ /dev/null
@@ -1,448 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "common/obudec.h"
-
-#include "aom_ports/mem_ops.h"
-#include "av1/common/common.h"
-#include "av1/common/obu_util.h"
-
-#define OBU_BUFFER_SIZE (500 * 1024)
-
-#define OBU_HEADER_SIZE 1
-#define OBU_EXTENSION_SIZE 1
-#define OBU_MAX_LENGTH_FIELD_SIZE 8
-#define OBU_DETECTION_SIZE \
- (OBU_HEADER_SIZE + OBU_EXTENSION_SIZE + 3 * OBU_MAX_LENGTH_FIELD_SIZE)
-
-// Reads unsigned LEB128 integer and returns 0 upon successful read and decode.
-// Stores raw bytes in 'value_buffer', length of the number in 'value_length',
-// and decoded value in 'value'.
-static int obudec_read_leb128(FILE *f, uint8_t *value_buffer,
- size_t *value_length, uint64_t *value) {
- if (!f || !value_buffer || !value_length || !value) return -1;
- size_t len;
- for (len = 0; len < OBU_MAX_LENGTH_FIELD_SIZE; ++len) {
- const size_t num_read = fread(&value_buffer[len], 1, 1, f);
- if (num_read == 0) {
- if (len == 0 && feof(f)) {
- *value_length = 0;
- return 0;
- }
- // Ran out of data before completing read of value.
- return -1;
- }
- if ((value_buffer[len] >> 7) == 0) {
- ++len;
- *value_length = len;
- break;
- }
- }
-
- return aom_uleb_decode(value_buffer, len, value, NULL);
-}
-
-// Reads OBU header from 'f'. The 'buffer_capacity' passed in must be large
-// enough to store an OBU header with extension (2 bytes). Raw OBU data is
-// written to 'obu_data', parsed OBU header values are written to 'obu_header',
-// and total bytes read from file are written to 'bytes_read'. Returns 0 for
-// success, and non-zero on failure. When end of file is reached, the return
-// value is 0 and the 'bytes_read' value is set to 0.
-static int obudec_read_obu_header(FILE *f, size_t buffer_capacity,
- int is_annexb, uint8_t *obu_data,
- ObuHeader *obu_header, size_t *bytes_read) {
- if (!f || buffer_capacity < (OBU_HEADER_SIZE + OBU_EXTENSION_SIZE) ||
- !obu_data || !obu_header || !bytes_read) {
- return -1;
- }
- *bytes_read = fread(obu_data, 1, 1, f);
-
- if (feof(f) && *bytes_read == 0) {
- return 0;
- } else if (*bytes_read != 1) {
- fprintf(stderr, "obudec: Failure reading OBU header.\n");
- return -1;
- }
-
- const int has_extension = (obu_data[0] >> 2) & 0x1;
- if (has_extension) {
- if (fread(&obu_data[1], 1, 1, f) != 1) {
- fprintf(stderr, "obudec: Failure reading OBU extension.");
- return -1;
- }
- ++*bytes_read;
- }
-
- size_t obu_bytes_parsed = 0;
- const aom_codec_err_t parse_result = aom_read_obu_header(
- obu_data, *bytes_read, &obu_bytes_parsed, obu_header, is_annexb);
- if (parse_result != AOM_CODEC_OK || *bytes_read != obu_bytes_parsed) {
- fprintf(stderr, "obudec: Error parsing OBU header.\n");
- return -1;
- }
-
- return 0;
-}
-
-// Reads OBU payload from 'f' and returns 0 for success when all payload bytes
-// are read from the file. Payload data is written to 'obu_data', and actual
-// bytes read added to 'bytes_read'.
-static int obudec_read_obu_payload(FILE *f, size_t payload_length,
- uint8_t *obu_data, size_t *bytes_read) {
- if (!f || payload_length == 0 || !obu_data || !bytes_read) return -1;
-
- if (fread(obu_data, 1, payload_length, f) != payload_length) {
- fprintf(stderr, "obudec: Failure reading OBU payload.\n");
- return -1;
- }
-
- *bytes_read += payload_length;
- return 0;
-}
-
-static int obudec_read_obu_header_and_size(FILE *f, size_t buffer_capacity,
- int is_annexb, uint8_t *buffer,
- size_t *bytes_read,
- size_t *payload_length,
- ObuHeader *obu_header) {
- const size_t kMinimumBufferSize =
- (OBU_HEADER_SIZE + OBU_EXTENSION_SIZE + OBU_MAX_LENGTH_FIELD_SIZE);
- if (!f || !buffer || !bytes_read || !payload_length || !obu_header ||
- buffer_capacity < kMinimumBufferSize) {
- return -1;
- }
-
- size_t leb128_length = 0;
- uint64_t obu_size = 0;
- if (is_annexb) {
- if (obudec_read_leb128(f, &buffer[0], &leb128_length, &obu_size) != 0) {
- fprintf(stderr, "obudec: Failure reading OBU size length.\n");
- return -1;
- } else if (leb128_length == 0) {
- *payload_length = 0;
- return 0;
- }
- if (obu_size > UINT32_MAX) {
- fprintf(stderr, "obudec: OBU payload length too large.\n");
- return -1;
- }
- }
-
- size_t header_size = 0;
- if (obudec_read_obu_header(f, buffer_capacity - leb128_length, is_annexb,
- buffer + leb128_length, obu_header,
- &header_size) != 0) {
- return -1;
- } else if (header_size == 0) {
- *payload_length = 0;
- return 0;
- }
-
- if (is_annexb) {
- if (obu_size < header_size) {
- fprintf(stderr, "obudec: OBU size is too small.\n");
- return -1;
- }
- *payload_length = (size_t)obu_size - header_size;
- } else {
- uint64_t u64_payload_length = 0;
- if (obudec_read_leb128(f, &buffer[header_size], &leb128_length,
- &u64_payload_length) != 0) {
- fprintf(stderr, "obudec: Failure reading OBU payload length.\n");
- return -1;
- }
- if (u64_payload_length > UINT32_MAX) {
- fprintf(stderr, "obudec: OBU payload length too large.\n");
- return -1;
- }
-
- *payload_length = (size_t)u64_payload_length;
- }
-
- *bytes_read = leb128_length + header_size;
- return 0;
-}
-
-static int obudec_read_one_obu(FILE *f, uint8_t **obu_buffer,
- size_t obu_bytes_buffered,
- size_t *obu_buffer_capacity, size_t *obu_length,
- ObuHeader *obu_header, int is_annexb) {
- size_t available_buffer_capacity = *obu_buffer_capacity - obu_bytes_buffered;
-
- if (!(*obu_buffer)) return -1;
-
- size_t bytes_read = 0;
- size_t obu_payload_length = 0;
- const int status = obudec_read_obu_header_and_size(
- f, available_buffer_capacity, is_annexb, *obu_buffer + obu_bytes_buffered,
- &bytes_read, &obu_payload_length, obu_header);
- if (status < 0) return status;
-
- if (obu_payload_length > SIZE_MAX - bytes_read) return -1;
-
- if (obu_payload_length > 256 * 1024 * 1024) {
- fprintf(stderr, "obudec: Read invalid OBU size (%u)\n",
- (unsigned int)obu_payload_length);
- *obu_length = bytes_read + obu_payload_length;
- return -1;
- }
-
- if (bytes_read + obu_payload_length > available_buffer_capacity) {
- // TODO(tomfinegan): Add overflow check.
- const size_t new_capacity =
- obu_bytes_buffered + bytes_read + 2 * obu_payload_length;
-
-#if defined AOM_MAX_ALLOCABLE_MEMORY
- if (new_capacity > AOM_MAX_ALLOCABLE_MEMORY) {
- fprintf(stderr, "obudec: OBU size exceeds max alloc size.\n");
- return -1;
- }
-#endif
-
- uint8_t *new_buffer = (uint8_t *)realloc(*obu_buffer, new_capacity);
-
- if (new_buffer) {
- *obu_buffer = new_buffer;
- *obu_buffer_capacity = new_capacity;
- } else {
- fprintf(stderr, "obudec: Failed to allocate compressed data buffer\n");
- *obu_length = bytes_read + obu_payload_length;
- return -1;
- }
- }
-
- if (obu_payload_length > 0 &&
- obudec_read_obu_payload(f, obu_payload_length,
- *obu_buffer + obu_bytes_buffered + bytes_read,
- &bytes_read) != 0) {
- return -1;
- }
-
- *obu_length = bytes_read;
- return 0;
-}
-
-int file_is_obu(struct ObuDecInputContext *obu_ctx) {
- if (!obu_ctx || !obu_ctx->avx_ctx) return 0;
-
- struct AvxInputContext *avx_ctx = obu_ctx->avx_ctx;
- uint8_t detect_buf[OBU_DETECTION_SIZE] = { 0 };
- const int is_annexb = obu_ctx->is_annexb;
- FILE *f = avx_ctx->file;
- size_t payload_length = 0;
- ObuHeader obu_header;
- memset(&obu_header, 0, sizeof(obu_header));
- size_t length_of_unit_size = 0;
- size_t annexb_header_length = 0;
- uint64_t unit_size = 0;
-
- if (is_annexb) {
- // read the size of first temporal unit
- if (obudec_read_leb128(f, &detect_buf[0], &length_of_unit_size,
- &unit_size) != 0) {
- fprintf(stderr, "obudec: Failure reading temporal unit header\n");
- return 0;
- }
-
- // read the size of first frame unit
- if (obudec_read_leb128(f, &detect_buf[length_of_unit_size],
- &annexb_header_length, &unit_size) != 0) {
- fprintf(stderr, "obudec: Failure reading frame unit header\n");
- return 0;
- }
- annexb_header_length += length_of_unit_size;
- }
-
- size_t bytes_read = 0;
- if (obudec_read_obu_header_and_size(
- f, OBU_DETECTION_SIZE - annexb_header_length, is_annexb,
- &detect_buf[annexb_header_length], &bytes_read, &payload_length,
- &obu_header) != 0) {
- fprintf(stderr, "obudec: Failure reading first OBU.\n");
- rewind(f);
- return 0;
- }
-
- if (is_annexb) {
- bytes_read += annexb_header_length;
- }
-
- if (obu_header.type != OBU_TEMPORAL_DELIMITER &&
- obu_header.type != OBU_SEQUENCE_HEADER) {
- return 0;
- }
-
- if (obu_header.has_size_field) {
- if (obu_header.type == OBU_TEMPORAL_DELIMITER && payload_length != 0) {
- fprintf(
- stderr,
- "obudec: Invalid OBU_TEMPORAL_DELIMITER payload length (non-zero).");
- rewind(f);
- return 0;
- }
- } else if (!is_annexb) {
- fprintf(stderr, "obudec: OBU size fields required, cannot decode input.\n");
- rewind(f);
- return 0;
- }
-
- // Appears that input is valid Section 5 AV1 stream.
- obu_ctx->buffer = (uint8_t *)malloc(OBU_BUFFER_SIZE);
- if (!obu_ctx->buffer) {
- fprintf(stderr, "Out of memory.\n");
- rewind(f);
- return 0;
- }
- obu_ctx->buffer_capacity = OBU_BUFFER_SIZE;
-
- memcpy(obu_ctx->buffer, &detect_buf[0], bytes_read);
- obu_ctx->bytes_buffered = bytes_read;
- // If the first OBU is a SEQUENCE_HEADER, then it will have a payload.
- // We need to read this in so that our buffer only contains complete OBUs.
- if (payload_length > 0) {
- if (payload_length > (obu_ctx->buffer_capacity - bytes_read)) {
- fprintf(stderr, "obudec: First OBU's payload is too large\n");
- rewind(f);
- return 0;
- }
-
- size_t payload_bytes = 0;
- const int status = obudec_read_obu_payload(
- f, payload_length, &obu_ctx->buffer[bytes_read], &payload_bytes);
- if (status < 0) {
- rewind(f);
- return 0;
- }
- obu_ctx->bytes_buffered += payload_bytes;
- }
- return 1;
-}
-
-int obudec_read_temporal_unit(struct ObuDecInputContext *obu_ctx,
- uint8_t **buffer, size_t *bytes_read,
- size_t *buffer_size) {
- FILE *f = obu_ctx->avx_ctx->file;
- if (!f) return -1;
-
- *buffer_size = 0;
- *bytes_read = 0;
-
- if (feof(f)) {
- return 1;
- }
-
- size_t tu_size;
- size_t obu_size = 0;
- size_t length_of_temporal_unit_size = 0;
- uint8_t tuheader[OBU_MAX_LENGTH_FIELD_SIZE] = { 0 };
-
- if (obu_ctx->is_annexb) {
- uint64_t size = 0;
-
- if (obu_ctx->bytes_buffered == 0) {
- if (obudec_read_leb128(f, &tuheader[0], &length_of_temporal_unit_size,
- &size) != 0) {
- fprintf(stderr, "obudec: Failure reading temporal unit header\n");
- return -1;
- }
- if (size == 0 && feof(f)) {
- return 1;
- }
- } else {
- // temporal unit size was already stored in buffer
- if (aom_uleb_decode(obu_ctx->buffer, obu_ctx->bytes_buffered, &size,
- &length_of_temporal_unit_size) != 0) {
- fprintf(stderr, "obudec: Failure reading temporal unit header\n");
- return -1;
- }
- }
-
- if (size > UINT32_MAX || size + length_of_temporal_unit_size > UINT32_MAX) {
- fprintf(stderr, "obudec: TU too large.\n");
- return -1;
- }
-
- size += length_of_temporal_unit_size;
- tu_size = (size_t)size;
- } else {
- while (1) {
- ObuHeader obu_header;
- memset(&obu_header, 0, sizeof(obu_header));
-
- if (obudec_read_one_obu(f, &obu_ctx->buffer, obu_ctx->bytes_buffered,
- &obu_ctx->buffer_capacity, &obu_size, &obu_header,
- 0) != 0) {
- fprintf(stderr, "obudec: read_one_obu failed in TU loop\n");
- return -1;
- }
-
- if (obu_header.type == OBU_TEMPORAL_DELIMITER || obu_size == 0) {
- tu_size = obu_ctx->bytes_buffered;
- break;
- } else {
- obu_ctx->bytes_buffered += obu_size;
- }
- }
- }
-
-#if defined AOM_MAX_ALLOCABLE_MEMORY
- if (tu_size > AOM_MAX_ALLOCABLE_MEMORY) {
- fprintf(stderr, "obudec: Temporal Unit size exceeds max alloc size.\n");
- return -1;
- }
-#endif
- uint8_t *new_buffer = (uint8_t *)realloc(*buffer, tu_size);
- if (!new_buffer) {
- free(*buffer);
- fprintf(stderr, "obudec: Out of memory.\n");
- return -1;
- }
- *buffer = new_buffer;
- *bytes_read = tu_size;
- *buffer_size = tu_size;
-
- if (!obu_ctx->is_annexb) {
- memcpy(*buffer, obu_ctx->buffer, tu_size);
-
- // At this point, (obu_ctx->buffer + obu_ctx->bytes_buffered + obu_size)
- // points to the end of the buffer.
- memmove(obu_ctx->buffer, obu_ctx->buffer + obu_ctx->bytes_buffered,
- obu_size);
- obu_ctx->bytes_buffered = obu_size;
- } else {
- if (!feof(f)) {
- size_t data_size;
- size_t offset;
- if (!obu_ctx->bytes_buffered) {
- data_size = tu_size - length_of_temporal_unit_size;
- memcpy(*buffer, &tuheader[0], length_of_temporal_unit_size);
- offset = length_of_temporal_unit_size;
- } else {
- memcpy(*buffer, obu_ctx->buffer, obu_ctx->bytes_buffered);
- offset = obu_ctx->bytes_buffered;
- data_size = tu_size - obu_ctx->bytes_buffered;
- obu_ctx->bytes_buffered = 0;
- }
-
- if (fread(*buffer + offset, 1, data_size, f) != data_size) {
- fprintf(stderr, "obudec: Failed to read full temporal unit\n");
- return -1;
- }
- }
- }
- return 0;
-}
-
-void obudec_free(struct ObuDecInputContext *obu_ctx) { free(obu_ctx->buffer); }
diff --git a/third_party/aom/common/obudec.h b/third_party/aom/common/obudec.h
deleted file mode 100644
index b2adb1e3d..000000000
--- a/third_party/aom/common/obudec.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#ifndef AOM_COMMON_OBUDEC_H_
-#define AOM_COMMON_OBUDEC_H_
-
-#include "common/tools_common.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct ObuDecInputContext {
- struct AvxInputContext *avx_ctx;
- uint8_t *buffer;
- size_t buffer_capacity;
- size_t bytes_buffered;
- int is_annexb;
-};
-
-// Returns 1 when file data starts (if Annex B stream, after reading the
-// size of the OBU) with what appears to be a Temporal Delimiter
-// OBU as defined by Section 5 of the AV1 bitstream specification.
-int file_is_obu(struct ObuDecInputContext *obu_ctx);
-
-// Reads one Temporal Unit from the input file. Returns 0 when a TU is
-// successfully read, 1 when end of file is reached, and less than 0 when an
-// error occurs. Stores TU data in 'buffer'. Reallocs buffer to match TU size,
-// returns buffer capacity via 'buffer_size', and returns size of buffered data
-// via 'bytes_read'.
-int obudec_read_temporal_unit(struct ObuDecInputContext *obu_ctx,
- uint8_t **buffer, size_t *bytes_read,
- size_t *buffer_size);
-
-void obudec_free(struct ObuDecInputContext *obu_ctx);
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif // AOM_COMMON_OBUDEC_H_
diff --git a/third_party/aom/common/rawenc.c b/third_party/aom/common/rawenc.c
deleted file mode 100644
index 5a2731d3a..000000000
--- a/third_party/aom/common/rawenc.c
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "common/rawenc.h"
-
-void raw_write_image_file(const aom_image_t *img, const int *planes,
- const int num_planes, FILE *file) {
- const int bytes_per_sample = ((img->fmt & AOM_IMG_FMT_HIGHBITDEPTH) ? 2 : 1);
- for (int i = 0; i < num_planes; ++i) {
- const int plane = planes[i];
- const unsigned char *buf = img->planes[plane];
- const int stride = img->stride[plane];
- const int w = aom_img_plane_width(img, plane);
- const int h = aom_img_plane_height(img, plane);
- for (int y = 0; y < h; ++y) {
- fwrite(buf, bytes_per_sample, w, file);
- buf += stride;
- }
- }
-}
-
-void raw_update_image_md5(const aom_image_t *img, const int *planes,
- const int num_planes, MD5Context *md5) {
- for (int i = 0; i < num_planes; ++i) {
- const int plane = planes[i];
- const unsigned char *buf = img->planes[plane];
- const int stride = img->stride[plane];
- const int w = aom_img_plane_width(img, plane) *
- ((img->fmt & AOM_IMG_FMT_HIGHBITDEPTH) ? 2 : 1);
- const int h = aom_img_plane_height(img, plane);
- for (int y = 0; y < h; ++y) {
- MD5Update(md5, buf, w);
- buf += stride;
- }
- }
-}
diff --git a/third_party/aom/common/rawenc.h b/third_party/aom/common/rawenc.h
deleted file mode 100644
index cf5e00e6f..000000000
--- a/third_party/aom/common/rawenc.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_COMMON_RAWENC_H_
-#define AOM_COMMON_RAWENC_H_
-
-#include "aom/aom_decoder.h"
-#include "common/md5_utils.h"
-#include "common/tools_common.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void raw_write_image_file(const aom_image_t *img, const int *planes,
- const int num_planes, FILE *file);
-void raw_update_image_md5(const aom_image_t *img, const int *planes,
- const int num_planes, MD5Context *md5);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_COMMON_RAWENC_H_
diff --git a/third_party/aom/common/tools_common.c b/third_party/aom/common/tools_common.c
deleted file mode 100644
index 21cd80026..000000000
--- a/third_party/aom/common/tools_common.c
+++ /dev/null
@@ -1,425 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "common/tools_common.h"
-
-#include <math.h>
-#include <stdarg.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#if CONFIG_AV1_ENCODER
-#include "aom/aomcx.h"
-#endif
-
-#if CONFIG_AV1_DECODER
-#include "aom/aomdx.h"
-#endif
-
-#if defined(_WIN32) || defined(__OS2__)
-#include <io.h>
-#include <fcntl.h>
-
-#ifdef __OS2__
-#define _setmode setmode
-#define _fileno fileno
-#define _O_BINARY O_BINARY
-#endif
-#endif
-
-#define LOG_ERROR(label) \
- do { \
- const char *l = label; \
- va_list ap; \
- va_start(ap, fmt); \
- if (l) fprintf(stderr, "%s: ", l); \
- vfprintf(stderr, fmt, ap); \
- fprintf(stderr, "\n"); \
- va_end(ap); \
- } while (0)
-
-FILE *set_binary_mode(FILE *stream) {
- (void)stream;
-#if defined(_WIN32) || defined(__OS2__)
- _setmode(_fileno(stream), _O_BINARY);
-#endif
- return stream;
-}
-
-void die(const char *fmt, ...) {
- LOG_ERROR(NULL);
- usage_exit();
-}
-
-void fatal(const char *fmt, ...) {
- LOG_ERROR("Fatal");
- exit(EXIT_FAILURE);
-}
-
-void warn(const char *fmt, ...) { LOG_ERROR("Warning"); }
-
-void die_codec(aom_codec_ctx_t *ctx, const char *s) {
- const char *detail = aom_codec_error_detail(ctx);
-
- printf("%s: %s\n", s, aom_codec_error(ctx));
- if (detail) printf(" %s\n", detail);
- exit(EXIT_FAILURE);
-}
-
-int read_yuv_frame(struct AvxInputContext *input_ctx, aom_image_t *yuv_frame) {
- FILE *f = input_ctx->file;
- struct FileTypeDetectionBuffer *detect = &input_ctx->detect;
- int plane = 0;
- int shortread = 0;
- const int bytespp = (yuv_frame->fmt & AOM_IMG_FMT_HIGHBITDEPTH) ? 2 : 1;
-
- for (plane = 0; plane < 3; ++plane) {
- uint8_t *ptr;
- const int w = aom_img_plane_width(yuv_frame, plane);
- const int h = aom_img_plane_height(yuv_frame, plane);
- int r;
-
- /* Determine the correct plane based on the image format. The for-loop
- * always counts in Y,U,V order, but this may not match the order of
- * the data on disk.
- */
- switch (plane) {
- case 1:
- ptr =
- yuv_frame->planes[yuv_frame->fmt == AOM_IMG_FMT_YV12 ? AOM_PLANE_V
- : AOM_PLANE_U];
- break;
- case 2:
- ptr =
- yuv_frame->planes[yuv_frame->fmt == AOM_IMG_FMT_YV12 ? AOM_PLANE_U
- : AOM_PLANE_V];
- break;
- default: ptr = yuv_frame->planes[plane];
- }
-
- for (r = 0; r < h; ++r) {
- size_t needed = w * bytespp;
- size_t buf_position = 0;
- const size_t left = detect->buf_read - detect->position;
- if (left > 0) {
- const size_t more = (left < needed) ? left : needed;
- memcpy(ptr, detect->buf + detect->position, more);
- buf_position = more;
- needed -= more;
- detect->position += more;
- }
- if (needed > 0) {
- shortread |= (fread(ptr + buf_position, 1, needed, f) < needed);
- }
-
- ptr += yuv_frame->stride[plane];
- }
- }
-
- return shortread;
-}
-
-#if CONFIG_AV1_ENCODER
-static const AvxInterface aom_encoders[] = {
- { "av1", AV1_FOURCC, &aom_codec_av1_cx },
-};
-
-int get_aom_encoder_count(void) {
- return sizeof(aom_encoders) / sizeof(aom_encoders[0]);
-}
-
-const AvxInterface *get_aom_encoder_by_index(int i) { return &aom_encoders[i]; }
-
-const AvxInterface *get_aom_encoder_by_name(const char *name) {
- int i;
-
- for (i = 0; i < get_aom_encoder_count(); ++i) {
- const AvxInterface *encoder = get_aom_encoder_by_index(i);
- if (strcmp(encoder->name, name) == 0) return encoder;
- }
-
- return NULL;
-}
-#endif // CONFIG_AV1_ENCODER
-
-#if CONFIG_AV1_DECODER
-static const AvxInterface aom_decoders[] = {
- { "av1", AV1_FOURCC, &aom_codec_av1_dx },
-};
-
-int get_aom_decoder_count(void) {
- return sizeof(aom_decoders) / sizeof(aom_decoders[0]);
-}
-
-const AvxInterface *get_aom_decoder_by_index(int i) { return &aom_decoders[i]; }
-
-const AvxInterface *get_aom_decoder_by_name(const char *name) {
- int i;
-
- for (i = 0; i < get_aom_decoder_count(); ++i) {
- const AvxInterface *const decoder = get_aom_decoder_by_index(i);
- if (strcmp(decoder->name, name) == 0) return decoder;
- }
-
- return NULL;
-}
-
-const AvxInterface *get_aom_decoder_by_fourcc(uint32_t fourcc) {
- int i;
-
- for (i = 0; i < get_aom_decoder_count(); ++i) {
- const AvxInterface *const decoder = get_aom_decoder_by_index(i);
- if (decoder->fourcc == fourcc) return decoder;
- }
-
- return NULL;
-}
-#endif // CONFIG_AV1_DECODER
-
-void aom_img_write(const aom_image_t *img, FILE *file) {
- int plane;
-
- for (plane = 0; plane < 3; ++plane) {
- const unsigned char *buf = img->planes[plane];
- const int stride = img->stride[plane];
- const int w = aom_img_plane_width(img, plane) *
- ((img->fmt & AOM_IMG_FMT_HIGHBITDEPTH) ? 2 : 1);
- const int h = aom_img_plane_height(img, plane);
- int y;
-
- for (y = 0; y < h; ++y) {
- fwrite(buf, 1, w, file);
- buf += stride;
- }
- }
-}
-
-int aom_img_read(aom_image_t *img, FILE *file) {
- int plane;
-
- for (plane = 0; plane < 3; ++plane) {
- unsigned char *buf = img->planes[plane];
- const int stride = img->stride[plane];
- const int w = aom_img_plane_width(img, plane) *
- ((img->fmt & AOM_IMG_FMT_HIGHBITDEPTH) ? 2 : 1);
- const int h = aom_img_plane_height(img, plane);
- int y;
-
- for (y = 0; y < h; ++y) {
- if (fread(buf, 1, w, file) != (size_t)w) return 0;
- buf += stride;
- }
- }
-
- return 1;
-}
-
-// TODO(dkovalev) change sse_to_psnr signature: double -> int64_t
-double sse_to_psnr(double samples, double peak, double sse) {
- static const double kMaxPSNR = 100.0;
-
- if (sse > 0.0) {
- const double psnr = 10.0 * log10(samples * peak * peak / sse);
- return psnr > kMaxPSNR ? kMaxPSNR : psnr;
- } else {
- return kMaxPSNR;
- }
-}
-
-// TODO(debargha): Consolidate the functions below into a separate file.
-static void highbd_img_upshift(aom_image_t *dst, const aom_image_t *src,
- int input_shift) {
- // Note the offset is 1 less than half.
- const int offset = input_shift > 0 ? (1 << (input_shift - 1)) - 1 : 0;
- int plane;
- if (dst->d_w != src->d_w || dst->d_h != src->d_h ||
- dst->x_chroma_shift != src->x_chroma_shift ||
- dst->y_chroma_shift != src->y_chroma_shift || dst->fmt != src->fmt ||
- input_shift < 0) {
- fatal("Unsupported image conversion");
- }
- switch (src->fmt) {
- case AOM_IMG_FMT_I42016:
- case AOM_IMG_FMT_I42216:
- case AOM_IMG_FMT_I44416: break;
- default: fatal("Unsupported image conversion"); break;
- }
- for (plane = 0; plane < 3; plane++) {
- int w = src->d_w;
- int h = src->d_h;
- int x, y;
- if (plane) {
- w = (w + src->x_chroma_shift) >> src->x_chroma_shift;
- h = (h + src->y_chroma_shift) >> src->y_chroma_shift;
- }
- for (y = 0; y < h; y++) {
- const uint16_t *p_src =
- (const uint16_t *)(src->planes[plane] + y * src->stride[plane]);
- uint16_t *p_dst =
- (uint16_t *)(dst->planes[plane] + y * dst->stride[plane]);
- for (x = 0; x < w; x++) *p_dst++ = (*p_src++ << input_shift) + offset;
- }
- }
-}
-
-static void lowbd_img_upshift(aom_image_t *dst, const aom_image_t *src,
- int input_shift) {
- // Note the offset is 1 less than half.
- const int offset = input_shift > 0 ? (1 << (input_shift - 1)) - 1 : 0;
- int plane;
- if (dst->d_w != src->d_w || dst->d_h != src->d_h ||
- dst->x_chroma_shift != src->x_chroma_shift ||
- dst->y_chroma_shift != src->y_chroma_shift ||
- dst->fmt != src->fmt + AOM_IMG_FMT_HIGHBITDEPTH || input_shift < 0) {
- fatal("Unsupported image conversion");
- }
- switch (src->fmt) {
- case AOM_IMG_FMT_I420:
- case AOM_IMG_FMT_I422:
- case AOM_IMG_FMT_I444: break;
- default: fatal("Unsupported image conversion"); break;
- }
- for (plane = 0; plane < 3; plane++) {
- int w = src->d_w;
- int h = src->d_h;
- int x, y;
- if (plane) {
- w = (w + src->x_chroma_shift) >> src->x_chroma_shift;
- h = (h + src->y_chroma_shift) >> src->y_chroma_shift;
- }
- for (y = 0; y < h; y++) {
- const uint8_t *p_src = src->planes[plane] + y * src->stride[plane];
- uint16_t *p_dst =
- (uint16_t *)(dst->planes[plane] + y * dst->stride[plane]);
- for (x = 0; x < w; x++) {
- *p_dst++ = (*p_src++ << input_shift) + offset;
- }
- }
- }
-}
-
-void aom_img_upshift(aom_image_t *dst, const aom_image_t *src,
- int input_shift) {
- if (src->fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
- highbd_img_upshift(dst, src, input_shift);
- } else {
- lowbd_img_upshift(dst, src, input_shift);
- }
-}
-
-void aom_img_truncate_16_to_8(aom_image_t *dst, const aom_image_t *src) {
- int plane;
- if (dst->fmt + AOM_IMG_FMT_HIGHBITDEPTH != src->fmt || dst->d_w != src->d_w ||
- dst->d_h != src->d_h || dst->x_chroma_shift != src->x_chroma_shift ||
- dst->y_chroma_shift != src->y_chroma_shift) {
- fatal("Unsupported image conversion");
- }
- switch (dst->fmt) {
- case AOM_IMG_FMT_I420:
- case AOM_IMG_FMT_I422:
- case AOM_IMG_FMT_I444: break;
- default: fatal("Unsupported image conversion"); break;
- }
- for (plane = 0; plane < 3; plane++) {
- int w = src->d_w;
- int h = src->d_h;
- int x, y;
- if (plane) {
- w = (w + src->x_chroma_shift) >> src->x_chroma_shift;
- h = (h + src->y_chroma_shift) >> src->y_chroma_shift;
- }
- for (y = 0; y < h; y++) {
- const uint16_t *p_src =
- (const uint16_t *)(src->planes[plane] + y * src->stride[plane]);
- uint8_t *p_dst = dst->planes[plane] + y * dst->stride[plane];
- for (x = 0; x < w; x++) {
- *p_dst++ = (uint8_t)(*p_src++);
- }
- }
- }
-}
-
-static void highbd_img_downshift(aom_image_t *dst, const aom_image_t *src,
- int down_shift) {
- int plane;
- if (dst->d_w != src->d_w || dst->d_h != src->d_h ||
- dst->x_chroma_shift != src->x_chroma_shift ||
- dst->y_chroma_shift != src->y_chroma_shift || dst->fmt != src->fmt ||
- down_shift < 0) {
- fatal("Unsupported image conversion");
- }
- switch (src->fmt) {
- case AOM_IMG_FMT_I42016:
- case AOM_IMG_FMT_I42216:
- case AOM_IMG_FMT_I44416: break;
- default: fatal("Unsupported image conversion"); break;
- }
- for (plane = 0; plane < 3; plane++) {
- int w = src->d_w;
- int h = src->d_h;
- int x, y;
- if (plane) {
- w = (w + src->x_chroma_shift) >> src->x_chroma_shift;
- h = (h + src->y_chroma_shift) >> src->y_chroma_shift;
- }
- for (y = 0; y < h; y++) {
- const uint16_t *p_src =
- (const uint16_t *)(src->planes[plane] + y * src->stride[plane]);
- uint16_t *p_dst =
- (uint16_t *)(dst->planes[plane] + y * dst->stride[plane]);
- for (x = 0; x < w; x++) *p_dst++ = *p_src++ >> down_shift;
- }
- }
-}
-
-static void lowbd_img_downshift(aom_image_t *dst, const aom_image_t *src,
- int down_shift) {
- int plane;
- if (dst->d_w != src->d_w || dst->d_h != src->d_h ||
- dst->x_chroma_shift != src->x_chroma_shift ||
- dst->y_chroma_shift != src->y_chroma_shift ||
- src->fmt != dst->fmt + AOM_IMG_FMT_HIGHBITDEPTH || down_shift < 0) {
- fatal("Unsupported image conversion");
- }
- switch (dst->fmt) {
- case AOM_IMG_FMT_I420:
- case AOM_IMG_FMT_I422:
- case AOM_IMG_FMT_I444: break;
- default: fatal("Unsupported image conversion"); break;
- }
- for (plane = 0; plane < 3; plane++) {
- int w = src->d_w;
- int h = src->d_h;
- int x, y;
- if (plane) {
- w = (w + src->x_chroma_shift) >> src->x_chroma_shift;
- h = (h + src->y_chroma_shift) >> src->y_chroma_shift;
- }
- for (y = 0; y < h; y++) {
- const uint16_t *p_src =
- (const uint16_t *)(src->planes[plane] + y * src->stride[plane]);
- uint8_t *p_dst = dst->planes[plane] + y * dst->stride[plane];
- for (x = 0; x < w; x++) {
- *p_dst++ = *p_src++ >> down_shift;
- }
- }
- }
-}
-
-void aom_img_downshift(aom_image_t *dst, const aom_image_t *src,
- int down_shift) {
- if (dst->fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
- highbd_img_downshift(dst, src, down_shift);
- } else {
- lowbd_img_downshift(dst, src, down_shift);
- }
-}
diff --git a/third_party/aom/common/tools_common.h b/third_party/aom/common/tools_common.h
deleted file mode 100644
index 4e1d12f4a..000000000
--- a/third_party/aom/common/tools_common.h
+++ /dev/null
@@ -1,164 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#ifndef AOM_COMMON_TOOLS_COMMON_H_
-#define AOM_COMMON_TOOLS_COMMON_H_
-
-#include <stdio.h>
-
-#include "config/aom_config.h"
-
-#include "aom/aom_codec.h"
-#include "aom/aom_image.h"
-#include "aom/aom_integer.h"
-#include "aom_ports/msvc.h"
-
-#if CONFIG_AV1_ENCODER
-#include "common/y4minput.h"
-#endif
-
-#if defined(_MSC_VER)
-/* MSVS uses _f{seek,tell}i64. */
-#define fseeko _fseeki64
-#define ftello _ftelli64
-typedef int64_t FileOffset;
-#elif defined(_WIN32)
-#include <sys/types.h> /* NOLINT*/
-/* MinGW uses f{seek,tell}o64 for large files. */
-#define fseeko fseeko64
-#define ftello ftello64
-typedef off64_t FileOffset;
-#elif CONFIG_OS_SUPPORT
-#include <sys/types.h> /* NOLINT*/
-typedef off_t FileOffset;
-/* Use 32-bit file operations in WebM file format when building ARM
- * executables (.axf) with RVCT. */
-#else
-#define fseeko fseek
-#define ftello ftell
-typedef long FileOffset; /* NOLINT */
-#endif /* CONFIG_OS_SUPPORT */
-
-#if CONFIG_OS_SUPPORT
-#if defined(_MSC_VER)
-#include <io.h> /* NOLINT */
-#define isatty _isatty
-#define fileno _fileno
-#else
-#include <unistd.h> /* NOLINT */
-#endif /* _MSC_VER */
-#endif /* CONFIG_OS_SUPPORT */
-
-#define LITERALU64(hi, lo) ((((uint64_t)hi) << 32) | lo)
-
-#ifndef PATH_MAX
-#define PATH_MAX 512
-#endif
-
-#define IVF_FRAME_HDR_SZ (4 + 8) /* 4 byte size + 8 byte timestamp */
-#define IVF_FILE_HDR_SZ 32
-
-#define RAW_FRAME_HDR_SZ sizeof(uint32_t)
-
-#define AV1_FOURCC 0x31305641
-
-enum VideoFileType {
- FILE_TYPE_OBU,
- FILE_TYPE_RAW,
- FILE_TYPE_IVF,
- FILE_TYPE_Y4M,
- FILE_TYPE_WEBM
-};
-
-struct FileTypeDetectionBuffer {
- char buf[4];
- size_t buf_read;
- size_t position;
-};
-
-struct AvxRational {
- int numerator;
- int denominator;
-};
-
-struct AvxInputContext {
- const char *filename;
- FILE *file;
- int64_t length;
- struct FileTypeDetectionBuffer detect;
- enum VideoFileType file_type;
- uint32_t width;
- uint32_t height;
- struct AvxRational pixel_aspect_ratio;
- aom_img_fmt_t fmt;
- aom_bit_depth_t bit_depth;
- int only_i420;
- uint32_t fourcc;
- struct AvxRational framerate;
-#if CONFIG_AV1_ENCODER
- y4m_input y4m;
-#endif
-};
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if defined(__GNUC__)
-#define AOM_NO_RETURN __attribute__((noreturn))
-#else
-#define AOM_NO_RETURN
-#endif
-
-/* Sets a stdio stream into binary mode */
-FILE *set_binary_mode(FILE *stream);
-
-void die(const char *fmt, ...) AOM_NO_RETURN;
-void fatal(const char *fmt, ...) AOM_NO_RETURN;
-void warn(const char *fmt, ...);
-
-void die_codec(aom_codec_ctx_t *ctx, const char *s) AOM_NO_RETURN;
-
-/* The tool including this file must define usage_exit() */
-void usage_exit(void) AOM_NO_RETURN;
-
-#undef AOM_NO_RETURN
-
-int read_yuv_frame(struct AvxInputContext *input_ctx, aom_image_t *yuv_frame);
-
-typedef struct AvxInterface {
- const char *const name;
- const uint32_t fourcc;
- aom_codec_iface_t *(*const codec_interface)();
-} AvxInterface;
-
-int get_aom_encoder_count(void);
-const AvxInterface *get_aom_encoder_by_index(int i);
-const AvxInterface *get_aom_encoder_by_name(const char *name);
-
-int get_aom_decoder_count(void);
-const AvxInterface *get_aom_decoder_by_index(int i);
-const AvxInterface *get_aom_decoder_by_name(const char *name);
-const AvxInterface *get_aom_decoder_by_fourcc(uint32_t fourcc);
-
-void aom_img_write(const aom_image_t *img, FILE *file);
-int aom_img_read(aom_image_t *img, FILE *file);
-
-double sse_to_psnr(double samples, double peak, double mse);
-void aom_img_upshift(aom_image_t *dst, const aom_image_t *src, int input_shift);
-void aom_img_downshift(aom_image_t *dst, const aom_image_t *src,
- int down_shift);
-void aom_img_truncate_16_to_8(aom_image_t *dst, const aom_image_t *src);
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif // AOM_COMMON_TOOLS_COMMON_H_
diff --git a/third_party/aom/common/video_common.h b/third_party/aom/common/video_common.h
deleted file mode 100644
index bf95031be..000000000
--- a/third_party/aom/common/video_common.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_COMMON_VIDEO_COMMON_H_
-#define AOM_COMMON_VIDEO_COMMON_H_
-
-#include "common/tools_common.h"
-
-typedef struct {
- uint32_t codec_fourcc;
- int frame_width;
- int frame_height;
- struct AvxRational time_base;
- unsigned int is_annexb;
-} AvxVideoInfo;
-
-#endif // AOM_COMMON_VIDEO_COMMON_H_
diff --git a/third_party/aom/common/video_reader.c b/third_party/aom/common/video_reader.c
deleted file mode 100644
index 47ad6e189..000000000
--- a/third_party/aom/common/video_reader.c
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#include <stdlib.h>
-#include <string.h>
-#include <assert.h>
-
-#include "aom_ports/mem_ops.h"
-#include "common/ivfdec.h"
-#include "common/obudec.h"
-#include "common/tools_common.h"
-#include "common/video_reader.h"
-#include "common/webmdec.h"
-
-struct AvxVideoReaderStruct {
- AvxVideoInfo info;
- struct AvxInputContext input_ctx;
- struct ObuDecInputContext obu_ctx;
- struct WebmInputContext webm_ctx;
- uint8_t *buffer;
- size_t buffer_size;
- size_t frame_size;
- aom_codec_pts_t pts;
-};
-
-AvxVideoReader *aom_video_reader_open(const char *filename) {
- AvxVideoReader *reader = NULL;
- FILE *const file = fopen(filename, "rb");
- if (!file) return NULL; // Can't open file
-
- reader = (AvxVideoReader *)calloc(1, sizeof(*reader));
- if (!reader) {
- fclose(file);
- return NULL; // Can't allocate AvxVideoReader
- }
-
- reader->input_ctx.filename = filename;
- reader->input_ctx.file = file;
- reader->obu_ctx.avx_ctx = &reader->input_ctx;
- reader->obu_ctx.is_annexb = 1;
-
- if (file_is_ivf(&reader->input_ctx)) {
- reader->input_ctx.file_type = FILE_TYPE_IVF;
- reader->info.codec_fourcc = reader->input_ctx.fourcc;
- reader->info.frame_width = reader->input_ctx.width;
- reader->info.frame_height = reader->input_ctx.height;
-#if CONFIG_WEBM_IO
- } else if (file_is_webm(&reader->webm_ctx, &reader->input_ctx)) {
- reader->input_ctx.file_type = FILE_TYPE_WEBM;
- reader->info.codec_fourcc = reader->input_ctx.fourcc;
- reader->info.frame_width = reader->input_ctx.width;
- reader->info.frame_height = reader->input_ctx.height;
-#endif
- } else if (file_is_obu(&reader->obu_ctx)) {
- reader->input_ctx.file_type = FILE_TYPE_OBU;
- // assume AV1
- reader->info.codec_fourcc = AV1_FOURCC;
- reader->info.is_annexb = reader->obu_ctx.is_annexb;
- } else {
- fclose(file);
- free(reader);
- return NULL; // Unknown file type
- }
-
- return reader;
-}
-
-void aom_video_reader_close(AvxVideoReader *reader) {
- if (reader) {
- fclose(reader->input_ctx.file);
- if (reader->input_ctx.file_type == FILE_TYPE_OBU) {
- obudec_free(&reader->obu_ctx);
- }
- free(reader->buffer);
- free(reader);
- }
-}
-
-int aom_video_reader_read_frame(AvxVideoReader *reader) {
- if (reader->input_ctx.file_type == FILE_TYPE_IVF) {
- return !ivf_read_frame(reader->input_ctx.file, &reader->buffer,
- &reader->frame_size, &reader->buffer_size,
- &reader->pts);
- } else if (reader->input_ctx.file_type == FILE_TYPE_OBU) {
- return !obudec_read_temporal_unit(&reader->obu_ctx, &reader->buffer,
- &reader->frame_size,
- &reader->buffer_size);
-#if CONFIG_WEBM_IO
- } else if (reader->input_ctx.file_type == FILE_TYPE_WEBM) {
- return !webm_read_frame(&reader->webm_ctx, &reader->buffer,
- &reader->frame_size, &reader->buffer_size);
-#endif
- } else {
- assert(0);
- return 0;
- }
-}
-
-const uint8_t *aom_video_reader_get_frame(AvxVideoReader *reader,
- size_t *size) {
- if (size) *size = reader->frame_size;
-
- return reader->buffer;
-}
-
-int64_t aom_video_reader_get_frame_pts(AvxVideoReader *reader) {
- return (int64_t)reader->pts;
-}
-
-FILE *aom_video_reader_get_file(AvxVideoReader *reader) {
- return reader->input_ctx.file;
-}
-
-const AvxVideoInfo *aom_video_reader_get_info(AvxVideoReader *reader) {
- return &reader->info;
-}
diff --git a/third_party/aom/common/video_reader.h b/third_party/aom/common/video_reader.h
deleted file mode 100644
index 903deae84..000000000
--- a/third_party/aom/common/video_reader.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_COMMON_VIDEO_READER_H_
-#define AOM_COMMON_VIDEO_READER_H_
-
-#include "common/video_common.h"
-
-// The following code is work in progress. It is going to support transparent
-// reading of input files. Right now only IVF format is supported for
-// simplicity. The main goal the API is to be simple and easy to use in example
-// code and in aomenc/aomdec later. All low-level details like memory
-// buffer management are hidden from API users.
-struct AvxVideoReaderStruct;
-typedef struct AvxVideoReaderStruct AvxVideoReader;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// Opens the input file for reading and inspects it to determine file type.
-// Returns an opaque AvxVideoReader* upon success, or NULL upon failure.
-// Right now only IVF format is supported.
-AvxVideoReader *aom_video_reader_open(const char *filename);
-
-// Frees all resources associated with AvxVideoReader* returned from
-// aom_video_reader_open() call.
-void aom_video_reader_close(AvxVideoReader *reader);
-
-// Reads frame from the file and stores it in internal buffer.
-int aom_video_reader_read_frame(AvxVideoReader *reader);
-
-// Returns the pointer to memory buffer with frame data read by last call to
-// aom_video_reader_read_frame().
-const uint8_t *aom_video_reader_get_frame(AvxVideoReader *reader, size_t *size);
-
-// Returns the pts of the frame.
-int64_t aom_video_reader_get_frame_pts(AvxVideoReader *reader);
-// Return the reader file.
-FILE *aom_video_reader_get_file(AvxVideoReader *reader);
-
-// Fills AvxVideoInfo with information from opened video file.
-const AvxVideoInfo *aom_video_reader_get_info(AvxVideoReader *reader);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_COMMON_VIDEO_READER_H_
diff --git a/third_party/aom/common/video_writer.c b/third_party/aom/common/video_writer.c
deleted file mode 100644
index a7ec309fc..000000000
--- a/third_party/aom/common/video_writer.c
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#include "common/video_writer.h"
-
-#include <stdlib.h>
-
-#include "aom/aom_encoder.h"
-#include "common/ivfenc.h"
-
-struct AvxVideoWriterStruct {
- AvxVideoInfo info;
- FILE *file;
- int frame_count;
-};
-
-static void write_header(FILE *file, const AvxVideoInfo *info,
- int frame_count) {
- struct aom_codec_enc_cfg cfg;
- cfg.g_w = info->frame_width;
- cfg.g_h = info->frame_height;
- cfg.g_timebase.num = info->time_base.numerator;
- cfg.g_timebase.den = info->time_base.denominator;
-
- ivf_write_file_header(file, &cfg, info->codec_fourcc, frame_count);
-}
-
-AvxVideoWriter *aom_video_writer_open(const char *filename,
- AvxContainer container,
- const AvxVideoInfo *info) {
- if (container == kContainerIVF) {
- AvxVideoWriter *writer = NULL;
- FILE *const file = fopen(filename, "wb");
- if (!file) return NULL;
-
- writer = malloc(sizeof(*writer));
- if (!writer) return NULL;
-
- writer->frame_count = 0;
- writer->info = *info;
- writer->file = file;
-
- write_header(writer->file, info, 0);
-
- return writer;
- }
-
- return NULL;
-}
-
-void aom_video_writer_close(AvxVideoWriter *writer) {
- if (writer) {
- // Rewriting frame header with real frame count
- rewind(writer->file);
- write_header(writer->file, &writer->info, writer->frame_count);
-
- fclose(writer->file);
- free(writer);
- }
-}
-
-int aom_video_writer_write_frame(AvxVideoWriter *writer, const uint8_t *buffer,
- size_t size, int64_t pts) {
- ivf_write_frame_header(writer->file, pts, size);
- if (fwrite(buffer, 1, size, writer->file) != size) return 0;
-
- ++writer->frame_count;
-
- return 1;
-}
diff --git a/third_party/aom/common/video_writer.h b/third_party/aom/common/video_writer.h
deleted file mode 100644
index 3e2b6554b..000000000
--- a/third_party/aom/common/video_writer.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_COMMON_VIDEO_WRITER_H_
-#define AOM_COMMON_VIDEO_WRITER_H_
-
-#include "common/video_common.h"
-
-typedef enum { kContainerIVF } AvxContainer;
-
-struct AvxVideoWriterStruct;
-typedef struct AvxVideoWriterStruct AvxVideoWriter;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// Finds and opens writer for specified container format.
-// Returns an opaque AvxVideoWriter* upon success, or NULL upon failure.
-// Right now only IVF format is supported.
-AvxVideoWriter *aom_video_writer_open(const char *filename,
- AvxContainer container,
- const AvxVideoInfo *info);
-
-// Frees all resources associated with AvxVideoWriter* returned from
-// aom_video_writer_open() call.
-void aom_video_writer_close(AvxVideoWriter *writer);
-
-// Writes frame bytes to the file.
-int aom_video_writer_write_frame(AvxVideoWriter *writer, const uint8_t *buffer,
- size_t size, int64_t pts);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_COMMON_VIDEO_WRITER_H_
diff --git a/third_party/aom/common/warnings.c b/third_party/aom/common/warnings.c
deleted file mode 100644
index 2facee252..000000000
--- a/third_party/aom/common/warnings.c
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "common/warnings.h"
-
-#include <assert.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "aom/aom_encoder.h"
-#include "apps/aomenc.h"
-#include "common/tools_common.h"
-
-static const char quantizer_warning_string[] =
- "Bad quantizer values. Quantizer values should not be equal, and should "
- "differ by at least 8.";
-
-struct WarningListNode {
- const char *warning_string;
- struct WarningListNode *next_warning;
-};
-
-struct WarningList {
- struct WarningListNode *warning_node;
-};
-
-static void add_warning(const char *warning_string,
- struct WarningList *warning_list) {
- struct WarningListNode **node = &warning_list->warning_node;
-
- struct WarningListNode *new_node = malloc(sizeof(*new_node));
- if (new_node == NULL) {
- fatal("Unable to allocate warning node.");
- }
-
- new_node->warning_string = warning_string;
- new_node->next_warning = NULL;
-
- while (*node != NULL) node = &(*node)->next_warning;
-
- *node = new_node;
-}
-
-static void free_warning_list(struct WarningList *warning_list) {
- while (warning_list->warning_node != NULL) {
- struct WarningListNode *const node = warning_list->warning_node;
- warning_list->warning_node = node->next_warning;
- free(node);
- }
-}
-
-static int continue_prompt(int num_warnings) {
- int c;
- fprintf(stderr,
- "%d encoder configuration warning(s). Continue? (y to continue) ",
- num_warnings);
- c = getchar();
- return c == 'y';
-}
-
-static void check_quantizer(int min_q, int max_q,
- struct WarningList *warning_list) {
- const int lossless = min_q == 0 && max_q == 0;
- if (!lossless && (min_q == max_q || abs(max_q - min_q) < 8))
- add_warning(quantizer_warning_string, warning_list);
-}
-
-void check_encoder_config(int disable_prompt,
- const struct AvxEncoderConfig *global_config,
- const struct aom_codec_enc_cfg *stream_config) {
- int num_warnings = 0;
- struct WarningListNode *warning = NULL;
- struct WarningList warning_list = { 0 };
- (void)global_config;
- check_quantizer(stream_config->rc_min_quantizer,
- stream_config->rc_max_quantizer, &warning_list);
- /* Count and print warnings. */
- for (warning = warning_list.warning_node; warning != NULL;
- warning = warning->next_warning, ++num_warnings) {
- warn(warning->warning_string);
- }
-
- free_warning_list(&warning_list);
-
- if (num_warnings) {
- if (!disable_prompt && !continue_prompt(num_warnings)) exit(EXIT_FAILURE);
- }
-}
diff --git a/third_party/aom/common/warnings.h b/third_party/aom/common/warnings.h
deleted file mode 100644
index 36f1fe070..000000000
--- a/third_party/aom/common/warnings.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#ifndef AOM_COMMON_WARNINGS_H_
-#define AOM_COMMON_WARNINGS_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct aom_codec_enc_cfg;
-struct AvxEncoderConfig;
-
-/*
- * Checks config for improperly used settings. Warns user upon encountering
- * settings that will lead to poor output quality. Prompts user to continue
- * when warnings are issued.
- */
-void check_encoder_config(int disable_prompt,
- const struct AvxEncoderConfig *global_config,
- const struct aom_codec_enc_cfg *stream_config);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_COMMON_WARNINGS_H_
diff --git a/third_party/aom/common/webmdec.cc b/third_party/aom/common/webmdec.cc
deleted file mode 100644
index 17ac53c93..000000000
--- a/third_party/aom/common/webmdec.cc
+++ /dev/null
@@ -1,229 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "common/webmdec.h"
-
-#include <cassert>
-#include <cstring>
-#include <cstdio>
-
-#include "third_party/libwebm/mkvparser/mkvparser.h"
-#include "third_party/libwebm/mkvparser/mkvreader.h"
-
-namespace {
-
-void reset(struct WebmInputContext *const webm_ctx) {
- if (webm_ctx->reader != NULL) {
- mkvparser::MkvReader *const reader =
- reinterpret_cast<mkvparser::MkvReader *>(webm_ctx->reader);
- delete reader;
- }
- if (webm_ctx->segment != NULL) {
- mkvparser::Segment *const segment =
- reinterpret_cast<mkvparser::Segment *>(webm_ctx->segment);
- delete segment;
- }
- if (webm_ctx->buffer != NULL) {
- delete[] webm_ctx->buffer;
- }
- webm_ctx->reader = NULL;
- webm_ctx->segment = NULL;
- webm_ctx->buffer = NULL;
- webm_ctx->cluster = NULL;
- webm_ctx->block_entry = NULL;
- webm_ctx->block = NULL;
- webm_ctx->block_frame_index = 0;
- webm_ctx->video_track_index = 0;
- webm_ctx->timestamp_ns = 0;
- webm_ctx->is_key_frame = false;
-}
-
-void get_first_cluster(struct WebmInputContext *const webm_ctx) {
- mkvparser::Segment *const segment =
- reinterpret_cast<mkvparser::Segment *>(webm_ctx->segment);
- const mkvparser::Cluster *const cluster = segment->GetFirst();
- webm_ctx->cluster = cluster;
-}
-
-void rewind_and_reset(struct WebmInputContext *const webm_ctx,
- struct AvxInputContext *const aom_ctx) {
- rewind(aom_ctx->file);
- reset(webm_ctx);
-}
-
-} // namespace
-
-int file_is_webm(struct WebmInputContext *webm_ctx,
- struct AvxInputContext *aom_ctx) {
- mkvparser::MkvReader *const reader = new mkvparser::MkvReader(aom_ctx->file);
- webm_ctx->reader = reader;
- webm_ctx->reached_eos = 0;
-
- mkvparser::EBMLHeader header;
- long long pos = 0;
- if (header.Parse(reader, pos) < 0) {
- rewind_and_reset(webm_ctx, aom_ctx);
- return 0;
- }
-
- mkvparser::Segment *segment;
- if (mkvparser::Segment::CreateInstance(reader, pos, segment)) {
- rewind_and_reset(webm_ctx, aom_ctx);
- return 0;
- }
- webm_ctx->segment = segment;
- if (segment->Load() < 0) {
- rewind_and_reset(webm_ctx, aom_ctx);
- return 0;
- }
-
- const mkvparser::Tracks *const tracks = segment->GetTracks();
- const mkvparser::VideoTrack *video_track = NULL;
- for (unsigned long i = 0; i < tracks->GetTracksCount(); ++i) {
- const mkvparser::Track *const track = tracks->GetTrackByIndex(i);
- if (track->GetType() == mkvparser::Track::kVideo) {
- video_track = static_cast<const mkvparser::VideoTrack *>(track);
- webm_ctx->video_track_index = static_cast<int>(track->GetNumber());
- break;
- }
- }
-
- if (video_track == NULL || video_track->GetCodecId() == NULL) {
- rewind_and_reset(webm_ctx, aom_ctx);
- return 0;
- }
-
- if (!strncmp(video_track->GetCodecId(), "V_AV1", 5)) {
- aom_ctx->fourcc = AV1_FOURCC;
- } else {
- rewind_and_reset(webm_ctx, aom_ctx);
- return 0;
- }
-
- aom_ctx->framerate.denominator = 0;
- aom_ctx->framerate.numerator = 0;
- aom_ctx->width = static_cast<uint32_t>(video_track->GetWidth());
- aom_ctx->height = static_cast<uint32_t>(video_track->GetHeight());
-
- get_first_cluster(webm_ctx);
-
- return 1;
-}
-
-int webm_read_frame(struct WebmInputContext *webm_ctx, uint8_t **buffer,
- size_t *bytes_read, size_t *buffer_size) {
- assert(webm_ctx->buffer == *buffer);
- // This check is needed for frame parallel decoding, in which case this
- // function could be called even after it has reached end of input stream.
- if (webm_ctx->reached_eos) {
- return 1;
- }
- mkvparser::Segment *const segment =
- reinterpret_cast<mkvparser::Segment *>(webm_ctx->segment);
- const mkvparser::Cluster *cluster =
- reinterpret_cast<const mkvparser::Cluster *>(webm_ctx->cluster);
- const mkvparser::Block *block =
- reinterpret_cast<const mkvparser::Block *>(webm_ctx->block);
- const mkvparser::BlockEntry *block_entry =
- reinterpret_cast<const mkvparser::BlockEntry *>(webm_ctx->block_entry);
- bool block_entry_eos = false;
- do {
- long status = 0;
- bool get_new_block = false;
- if (block_entry == NULL && !block_entry_eos) {
- status = cluster->GetFirst(block_entry);
- get_new_block = true;
- } else if (block_entry_eos || block_entry->EOS()) {
- cluster = segment->GetNext(cluster);
- if (cluster == NULL || cluster->EOS()) {
- *bytes_read = 0;
- webm_ctx->reached_eos = 1;
- return 1;
- }
- status = cluster->GetFirst(block_entry);
- block_entry_eos = false;
- get_new_block = true;
- } else if (block == NULL ||
- webm_ctx->block_frame_index == block->GetFrameCount() ||
- block->GetTrackNumber() != webm_ctx->video_track_index) {
- status = cluster->GetNext(block_entry, block_entry);
- if (block_entry == NULL || block_entry->EOS()) {
- block_entry_eos = true;
- continue;
- }
- get_new_block = true;
- }
- if (status || block_entry == NULL) {
- return -1;
- }
- if (get_new_block) {
- block = block_entry->GetBlock();
- if (block == NULL) return -1;
- webm_ctx->block_frame_index = 0;
- }
- } while (block_entry_eos ||
- block->GetTrackNumber() != webm_ctx->video_track_index);
-
- webm_ctx->cluster = cluster;
- webm_ctx->block_entry = block_entry;
- webm_ctx->block = block;
-
- const mkvparser::Block::Frame &frame =
- block->GetFrame(webm_ctx->block_frame_index);
- ++webm_ctx->block_frame_index;
- if (frame.len > static_cast<long>(*buffer_size)) {
- delete[] * buffer;
- *buffer = new uint8_t[frame.len];
- webm_ctx->buffer = *buffer;
- if (*buffer == NULL) {
- return -1;
- }
- *buffer_size = frame.len;
- }
- *bytes_read = frame.len;
- webm_ctx->timestamp_ns = block->GetTime(cluster);
- webm_ctx->is_key_frame = block->IsKey();
-
- mkvparser::MkvReader *const reader =
- reinterpret_cast<mkvparser::MkvReader *>(webm_ctx->reader);
- return frame.Read(reader, *buffer) ? -1 : 0;
-}
-
-int webm_guess_framerate(struct WebmInputContext *webm_ctx,
- struct AvxInputContext *aom_ctx) {
- uint32_t i = 0;
- uint8_t *buffer = NULL;
- size_t buffer_size = 0;
- size_t bytes_read = 0;
- assert(webm_ctx->buffer == NULL);
- while (webm_ctx->timestamp_ns < 1000000000 && i < 50) {
- if (webm_read_frame(webm_ctx, &buffer, &bytes_read, &buffer_size)) {
- break;
- }
- ++i;
- }
- aom_ctx->framerate.numerator = (i - 1) * 1000000;
- aom_ctx->framerate.denominator =
- static_cast<int>(webm_ctx->timestamp_ns / 1000);
- delete[] buffer;
- webm_ctx->buffer = NULL;
-
- get_first_cluster(webm_ctx);
- webm_ctx->block = NULL;
- webm_ctx->block_entry = NULL;
- webm_ctx->block_frame_index = 0;
- webm_ctx->timestamp_ns = 0;
- webm_ctx->reached_eos = 0;
-
- return 0;
-}
-
-void webm_free(struct WebmInputContext *webm_ctx) { reset(webm_ctx); }
diff --git a/third_party/aom/common/webmdec.h b/third_party/aom/common/webmdec.h
deleted file mode 100644
index 5ac75cb30..000000000
--- a/third_party/aom/common/webmdec.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#ifndef AOM_COMMON_WEBMDEC_H_
-#define AOM_COMMON_WEBMDEC_H_
-
-#include "common/tools_common.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct AvxInputContext;
-
-struct WebmInputContext {
- void *reader;
- void *segment;
- uint8_t *buffer;
- const void *cluster;
- const void *block_entry;
- const void *block;
- int block_frame_index;
- int video_track_index;
- uint64_t timestamp_ns;
- int is_key_frame;
- int reached_eos;
-};
-
-// Checks if the input is a WebM file. If so, initializes WebMInputContext so
-// that webm_read_frame can be called to retrieve a video frame.
-// Returns 1 on success and 0 on failure or input is not WebM file.
-// TODO(vigneshv): Refactor this function into two smaller functions specific
-// to their task.
-int file_is_webm(struct WebmInputContext *webm_ctx,
- struct AvxInputContext *aom_ctx);
-
-// Reads a WebM Video Frame. Memory for the buffer is created, owned and managed
-// by this function. For the first call, |buffer| should be NULL and
-// |*buffer_size| should be 0. Once all the frames are read and used,
-// webm_free() should be called, otherwise there will be a leak.
-// Parameters:
-// webm_ctx - WebmInputContext object
-// buffer - pointer where the frame data will be filled.
-// bytes_read - pointer to bytes read.
-// buffer_size - pointer to buffer size.
-// Return values:
-// 0 - Success
-// 1 - End of Stream
-// -1 - Error
-int webm_read_frame(struct WebmInputContext *webm_ctx, uint8_t **buffer,
- size_t *bytes_read, size_t *buffer_size);
-
-// Guesses the frame rate of the input file based on the container timestamps.
-int webm_guess_framerate(struct WebmInputContext *webm_ctx,
- struct AvxInputContext *aom_ctx);
-
-// Resets the WebMInputContext.
-void webm_free(struct WebmInputContext *webm_ctx);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_COMMON_WEBMDEC_H_
diff --git a/third_party/aom/common/webmenc.cc b/third_party/aom/common/webmenc.cc
deleted file mode 100644
index 58ab33670..000000000
--- a/third_party/aom/common/webmenc.cc
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "common/webmenc.h"
-
-#include <string>
-
-#include "third_party/libwebm/mkvmuxer/mkvmuxer.h"
-#include "third_party/libwebm/mkvmuxer/mkvmuxerutil.h"
-#include "third_party/libwebm/mkvmuxer/mkvwriter.h"
-
-namespace {
-const uint64_t kDebugTrackUid = 0xDEADBEEF;
-const int kVideoTrackNumber = 1;
-} // namespace
-
-void write_webm_file_header(struct WebmOutputContext *webm_ctx,
- const aom_codec_enc_cfg_t *cfg,
- stereo_format_t stereo_fmt, unsigned int fourcc,
- const struct AvxRational *par) {
- mkvmuxer::MkvWriter *const writer = new mkvmuxer::MkvWriter(webm_ctx->stream);
- mkvmuxer::Segment *const segment = new mkvmuxer::Segment();
- segment->Init(writer);
- segment->set_mode(mkvmuxer::Segment::kFile);
- segment->OutputCues(true);
-
- mkvmuxer::SegmentInfo *const info = segment->GetSegmentInfo();
- const uint64_t kTimecodeScale = 1000000;
- info->set_timecode_scale(kTimecodeScale);
- std::string version = "aomenc";
- if (!webm_ctx->debug) {
- version.append(std::string(" ") + aom_codec_version_str());
- }
- info->set_writing_app(version.c_str());
-
- const uint64_t video_track_id =
- segment->AddVideoTrack(static_cast<int>(cfg->g_w),
- static_cast<int>(cfg->g_h), kVideoTrackNumber);
- mkvmuxer::VideoTrack *const video_track = static_cast<mkvmuxer::VideoTrack *>(
- segment->GetTrackByNumber(video_track_id));
- video_track->SetStereoMode(stereo_fmt);
- const char *codec_id;
- switch (fourcc) {
- case AV1_FOURCC: codec_id = "V_AV1"; break;
- default: codec_id = "V_AV1"; break;
- }
- video_track->set_codec_id(codec_id);
- if (par->numerator > 1 || par->denominator > 1) {
- // TODO(fgalligan): Add support of DisplayUnit, Display Aspect Ratio type
- // to WebM format.
- const uint64_t display_width = static_cast<uint64_t>(
- ((cfg->g_w * par->numerator * 1.0) / par->denominator) + .5);
- video_track->set_display_width(display_width);
- video_track->set_display_height(cfg->g_h);
- }
- if (webm_ctx->debug) {
- video_track->set_uid(kDebugTrackUid);
- }
- webm_ctx->writer = writer;
- webm_ctx->segment = segment;
-}
-
-void write_webm_block(struct WebmOutputContext *webm_ctx,
- const aom_codec_enc_cfg_t *cfg,
- const aom_codec_cx_pkt_t *pkt) {
- mkvmuxer::Segment *const segment =
- reinterpret_cast<mkvmuxer::Segment *>(webm_ctx->segment);
- int64_t pts_ns = pkt->data.frame.pts * 1000000000ll * cfg->g_timebase.num /
- cfg->g_timebase.den;
- if (pts_ns <= webm_ctx->last_pts_ns) pts_ns = webm_ctx->last_pts_ns + 1000000;
- webm_ctx->last_pts_ns = pts_ns;
-
- segment->AddFrame(static_cast<uint8_t *>(pkt->data.frame.buf),
- pkt->data.frame.sz, kVideoTrackNumber, pts_ns,
- pkt->data.frame.flags & AOM_FRAME_IS_KEY);
-}
-
-void write_webm_file_footer(struct WebmOutputContext *webm_ctx) {
- mkvmuxer::MkvWriter *const writer =
- reinterpret_cast<mkvmuxer::MkvWriter *>(webm_ctx->writer);
- mkvmuxer::Segment *const segment =
- reinterpret_cast<mkvmuxer::Segment *>(webm_ctx->segment);
- segment->Finalize();
- delete segment;
- delete writer;
- webm_ctx->writer = NULL;
- webm_ctx->segment = NULL;
-}
diff --git a/third_party/aom/common/webmenc.h b/third_party/aom/common/webmenc.h
deleted file mode 100644
index aa9832fba..000000000
--- a/third_party/aom/common/webmenc.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#ifndef AOM_COMMON_WEBMENC_H_
-#define AOM_COMMON_WEBMENC_H_
-
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "tools_common.h"
-#include "aom/aom_encoder.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct WebmOutputContext {
- int debug;
- FILE *stream;
- int64_t last_pts_ns;
- void *writer;
- void *segment;
-};
-
-/* Stereo 3D packed frame format */
-typedef enum stereo_format {
- STEREO_FORMAT_MONO = 0,
- STEREO_FORMAT_LEFT_RIGHT = 1,
- STEREO_FORMAT_BOTTOM_TOP = 2,
- STEREO_FORMAT_TOP_BOTTOM = 3,
- STEREO_FORMAT_RIGHT_LEFT = 11
-} stereo_format_t;
-
-void write_webm_file_header(struct WebmOutputContext *webm_ctx,
- const aom_codec_enc_cfg_t *cfg,
- stereo_format_t stereo_fmt, unsigned int fourcc,
- const struct AvxRational *par);
-
-void write_webm_block(struct WebmOutputContext *webm_ctx,
- const aom_codec_enc_cfg_t *cfg,
- const aom_codec_cx_pkt_t *pkt);
-
-void write_webm_file_footer(struct WebmOutputContext *webm_ctx);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_COMMON_WEBMENC_H_
diff --git a/third_party/aom/common/y4menc.c b/third_party/aom/common/y4menc.c
deleted file mode 100644
index 585d22197..000000000
--- a/third_party/aom/common/y4menc.c
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-
-#include "common/rawenc.h"
-#include "common/y4menc.h"
-
-// Returns the Y4M name associated with the monochrome colorspace.
-const char *monochrome_colorspace(unsigned int bit_depth) {
- switch (bit_depth) {
- case 8: return "Cmono";
- case 9: return "Cmono9";
- case 10: return "Cmono10";
- case 12: return "Cmono12";
- case 16: return "Cmono16";
- default: assert(0); return NULL;
- }
-}
-
-// Return the Y4M name of the 8-bit colorspace, given the chroma position and
-// image format.
-const char *colorspace8(aom_chroma_sample_position_t csp, aom_img_fmt_t fmt) {
- switch (fmt) {
- case AOM_IMG_FMT_444A: return "C444alpha";
- case AOM_IMG_FMT_I444: return "C444";
- case AOM_IMG_FMT_I422: return "C422";
- default:
- if (csp == AOM_CSP_VERTICAL) {
- return "C420mpeg2 XYSCSS=420MPEG2";
- } else {
- return "C420jpeg";
- }
- }
-}
-
-// Return the Y4M name of the colorspace, given the bit depth and image format.
-const char *colorspace(unsigned int bit_depth, aom_chroma_sample_position_t csp,
- aom_img_fmt_t fmt) {
- switch (bit_depth) {
- case 8: return colorspace8(csp, fmt);
- case 9:
- return fmt == AOM_IMG_FMT_I44416
- ? "C444p9 XYSCSS=444P9"
- : fmt == AOM_IMG_FMT_I42216 ? "C422p9 XYSCSS=422P9"
- : "C420p9 XYSCSS=420P9";
- case 10:
- return fmt == AOM_IMG_FMT_I44416
- ? "C444p10 XYSCSS=444P10"
- : fmt == AOM_IMG_FMT_I42216 ? "C422p10 XYSCSS=422P10"
- : "C420p10 XYSCSS=420P10";
- case 12:
- return fmt == AOM_IMG_FMT_I44416
- ? "C444p12 XYSCSS=444P12"
- : fmt == AOM_IMG_FMT_I42216 ? "C422p12 XYSCSS=422P12"
- : "C420p12 XYSCSS=420P12";
- case 14:
- return fmt == AOM_IMG_FMT_I44416
- ? "C444p14 XYSCSS=444P14"
- : fmt == AOM_IMG_FMT_I42216 ? "C422p14 XYSCSS=422P14"
- : "C420p14 XYSCSS=420P14";
- case 16:
- return fmt == AOM_IMG_FMT_I44416
- ? "C444p16 XYSCSS=444P16"
- : fmt == AOM_IMG_FMT_I42216 ? "C422p16 XYSCSS=422P16"
- : "C420p16 XYSCSS=420P16";
- default: assert(0); return NULL;
- }
-}
-
-int y4m_write_file_header(char *buf, size_t len, int width, int height,
- const struct AvxRational *framerate, int monochrome,
- aom_chroma_sample_position_t csp, aom_img_fmt_t fmt,
- unsigned int bit_depth) {
- const char *color = monochrome ? monochrome_colorspace(bit_depth)
- : colorspace(bit_depth, csp, fmt);
- return snprintf(buf, len, "YUV4MPEG2 W%u H%u F%u:%u I%c %s\n", width, height,
- framerate->numerator, framerate->denominator, 'p', color);
-}
-
-int y4m_write_frame_header(char *buf, size_t len) {
- return snprintf(buf, len, "FRAME\n");
-}
-
-void y4m_write_image_file(const aom_image_t *img, const int *planes,
- FILE *file) {
- int num_planes = img->monochrome ? 1 : 3;
- raw_write_image_file(img, planes, num_planes, file);
-}
-
-void y4m_update_image_md5(const aom_image_t *img, const int *planes,
- MD5Context *md5) {
- int num_planes = img->monochrome ? 1 : 3;
- raw_update_image_md5(img, planes, num_planes, md5);
-}
diff --git a/third_party/aom/common/y4menc.h b/third_party/aom/common/y4menc.h
deleted file mode 100644
index f6d5fd86b..000000000
--- a/third_party/aom/common/y4menc.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_COMMON_Y4MENC_H_
-#define AOM_COMMON_Y4MENC_H_
-
-#include "aom/aom_decoder.h"
-#include "common/md5_utils.h"
-#include "common/tools_common.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define Y4M_BUFFER_SIZE 128
-
-int y4m_write_file_header(char *buf, size_t len, int width, int height,
- const struct AvxRational *framerate, int monochrome,
- aom_chroma_sample_position_t csp, aom_img_fmt_t fmt,
- unsigned int bit_depth);
-int y4m_write_frame_header(char *buf, size_t len);
-void y4m_write_image_file(const aom_image_t *img, const int *planes,
- FILE *file);
-void y4m_update_image_md5(const aom_image_t *img, const int *planes,
- MD5Context *md5);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_COMMON_Y4MENC_H_
diff --git a/third_party/aom/common/y4minput.c b/third_party/aom/common/y4minput.c
deleted file mode 100644
index eca8b1bba..000000000
--- a/third_party/aom/common/y4minput.c
+++ /dev/null
@@ -1,1142 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- *
- * Based on code from the OggTheora software codec source code,
- * Copyright (C) 2002-2010 The Xiph.Org Foundation and contributors.
- */
-#include <errno.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "aom/aom_integer.h"
-#include "y4minput.h"
-
-// Reads 'size' bytes from 'file' into 'buf' with some fault tolerance.
-// Returns true on success.
-static int file_read(void *buf, size_t size, FILE *file) {
- const int kMaxRetries = 5;
- int retry_count = 0;
- int file_error;
- size_t len = 0;
- do {
- const size_t n = fread((uint8_t *)buf + len, 1, size - len, file);
- len += n;
- file_error = ferror(file);
- if (file_error) {
- if (errno == EINTR || errno == EAGAIN) {
- clearerr(file);
- continue;
- } else {
- fprintf(stderr, "Error reading file: %u of %u bytes read, %d: %s\n",
- (uint32_t)len, (uint32_t)size, errno, strerror(errno));
- return 0;
- }
- }
- } while (!feof(file) && len < size && ++retry_count < kMaxRetries);
-
- if (!feof(file) && len != size) {
- fprintf(stderr,
- "Error reading file: %u of %u bytes read,"
- " error: %d, retries: %d, %d: %s\n",
- (uint32_t)len, (uint32_t)size, file_error, retry_count, errno,
- strerror(errno));
- }
- return len == size;
-}
-
-static int y4m_parse_tags(y4m_input *_y4m, char *_tags) {
- int got_w;
- int got_h;
- int got_fps;
- int got_interlace;
- int got_par;
- int got_chroma;
- char *p;
- char *q;
- got_w = got_h = got_fps = got_interlace = got_par = got_chroma = 0;
- for (p = _tags;; p = q) {
- /*Skip any leading spaces.*/
- while (*p == ' ') p++;
- /*If that's all we have, stop.*/
- if (p[0] == '\0') break;
- /*Find the end of this tag.*/
- for (q = p + 1; *q != '\0' && *q != ' '; q++) {
- }
- /*Process the tag.*/
- switch (p[0]) {
- case 'W': {
- if (sscanf(p + 1, "%d", &_y4m->pic_w) != 1) return -1;
- got_w = 1;
- } break;
- case 'H': {
- if (sscanf(p + 1, "%d", &_y4m->pic_h) != 1) return -1;
- got_h = 1;
- } break;
- case 'F': {
- if (sscanf(p + 1, "%d:%d", &_y4m->fps_n, &_y4m->fps_d) != 2) {
- return -1;
- }
- got_fps = 1;
- } break;
- case 'I': {
- _y4m->interlace = p[1];
- got_interlace = 1;
- } break;
- case 'A': {
- if (sscanf(p + 1, "%d:%d", &_y4m->par_n, &_y4m->par_d) != 2) {
- return -1;
- }
- got_par = 1;
- } break;
- case 'C': {
- if (q - p > 16) return -1;
- memcpy(_y4m->chroma_type, p + 1, q - p - 1);
- _y4m->chroma_type[q - p - 1] = '\0';
- got_chroma = 1;
- } break;
- /*Ignore unknown tags.*/
- }
- }
- if (!got_w || !got_h || !got_fps) return -1;
- if (!got_interlace) _y4m->interlace = '?';
- if (!got_par) _y4m->par_n = _y4m->par_d = 0;
- /*Chroma-type is not specified in older files, e.g., those generated by
- mplayer.*/
- if (!got_chroma) strcpy(_y4m->chroma_type, "420");
- return 0;
-}
-
-/*All anti-aliasing filters in the following conversion functions are based on
- one of two window functions:
- The 6-tap Lanczos window (for down-sampling and shifts):
- sinc(\pi*t)*sinc(\pi*t/3), |t|<3 (sinc(t)==sin(t)/t)
- 0, |t|>=3
- The 4-tap Mitchell window (for up-sampling):
- 7|t|^3-12|t|^2+16/3, |t|<1
- -(7/3)|x|^3+12|x|^2-20|x|+32/3, |t|<2
- 0, |t|>=2
- The number of taps is intentionally kept small to reduce computational
- overhead and limit ringing.
-
- The taps from these filters are scaled so that their sum is 1, and the
- result is scaled by 128 and rounded to integers to create a filter whose
- intermediate values fit inside 16 bits.
- Coefficients are rounded in such a way as to ensure their sum is still 128,
- which is usually equivalent to normal rounding.
-
- Conversions which require both horizontal and vertical filtering could
- have these steps pipelined, for less memory consumption and better cache
- performance, but we do them separately for simplicity.*/
-#define OC_MINI(_a, _b) ((_a) > (_b) ? (_b) : (_a))
-#define OC_MAXI(_a, _b) ((_a) < (_b) ? (_b) : (_a))
-#define OC_CLAMPI(_a, _b, _c) (OC_MAXI(_a, OC_MINI(_b, _c)))
-
-/*420jpeg chroma samples are sited like:
- Y-------Y-------Y-------Y-------
- | | | |
- | BR | | BR |
- | | | |
- Y-------Y-------Y-------Y-------
- | | | |
- | | | |
- | | | |
- Y-------Y-------Y-------Y-------
- | | | |
- | BR | | BR |
- | | | |
- Y-------Y-------Y-------Y-------
- | | | |
- | | | |
- | | | |
-
- 420mpeg2 chroma samples are sited like:
- Y-------Y-------Y-------Y-------
- | | | |
- BR | BR |
- | | | |
- Y-------Y-------Y-------Y-------
- | | | |
- | | | |
- | | | |
- Y-------Y-------Y-------Y-------
- | | | |
- BR | BR |
- | | | |
- Y-------Y-------Y-------Y-------
- | | | |
- | | | |
- | | | |
-
- We use a resampling filter to shift the site locations one quarter pixel (at
- the chroma plane's resolution) to the right.
- The 4:2:2 modes look exactly the same, except there are twice as many chroma
- lines, and they are vertically co-sited with the luma samples in both the
- mpeg2 and jpeg cases (thus requiring no vertical resampling).*/
-static void y4m_42xmpeg2_42xjpeg_helper(unsigned char *_dst,
- const unsigned char *_src, int _c_w,
- int _c_h) {
- int y;
- int x;
- for (y = 0; y < _c_h; y++) {
- /*Filter: [4 -17 114 35 -9 1]/128, derived from a 6-tap Lanczos
- window.*/
- for (x = 0; x < OC_MINI(_c_w, 2); x++) {
- _dst[x] = (unsigned char)OC_CLAMPI(
- 0,
- (4 * _src[0] - 17 * _src[OC_MAXI(x - 1, 0)] + 114 * _src[x] +
- 35 * _src[OC_MINI(x + 1, _c_w - 1)] -
- 9 * _src[OC_MINI(x + 2, _c_w - 1)] + _src[OC_MINI(x + 3, _c_w - 1)] +
- 64) >>
- 7,
- 255);
- }
- for (; x < _c_w - 3; x++) {
- _dst[x] = (unsigned char)OC_CLAMPI(
- 0,
- (4 * _src[x - 2] - 17 * _src[x - 1] + 114 * _src[x] +
- 35 * _src[x + 1] - 9 * _src[x + 2] + _src[x + 3] + 64) >>
- 7,
- 255);
- }
- for (; x < _c_w; x++) {
- _dst[x] = (unsigned char)OC_CLAMPI(
- 0,
- (4 * _src[x - 2] - 17 * _src[x - 1] + 114 * _src[x] +
- 35 * _src[OC_MINI(x + 1, _c_w - 1)] -
- 9 * _src[OC_MINI(x + 2, _c_w - 1)] + _src[_c_w - 1] + 64) >>
- 7,
- 255);
- }
- _dst += _c_w;
- _src += _c_w;
- }
-}
-
-/*Handles both 422 and 420mpeg2 to 422jpeg and 420jpeg, respectively.*/
-static void y4m_convert_42xmpeg2_42xjpeg(y4m_input *_y4m, unsigned char *_dst,
- unsigned char *_aux) {
- int c_w;
- int c_h;
- int c_sz;
- int pli;
- /*Skip past the luma data.*/
- _dst += _y4m->pic_w * _y4m->pic_h;
- /*Compute the size of each chroma plane.*/
- c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
- c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
- c_sz = c_w * c_h;
- for (pli = 1; pli < 3; pli++) {
- y4m_42xmpeg2_42xjpeg_helper(_dst, _aux, c_w, c_h);
- _dst += c_sz;
- _aux += c_sz;
- }
-}
-
-/*This format is only used for interlaced content, but is included for
- completeness.
-
- 420jpeg chroma samples are sited like:
- Y-------Y-------Y-------Y-------
- | | | |
- | BR | | BR |
- | | | |
- Y-------Y-------Y-------Y-------
- | | | |
- | | | |
- | | | |
- Y-------Y-------Y-------Y-------
- | | | |
- | BR | | BR |
- | | | |
- Y-------Y-------Y-------Y-------
- | | | |
- | | | |
- | | | |
-
- 420paldv chroma samples are sited like:
- YR------Y-------YR------Y-------
- | | | |
- | | | |
- | | | |
- YB------Y-------YB------Y-------
- | | | |
- | | | |
- | | | |
- YR------Y-------YR------Y-------
- | | | |
- | | | |
- | | | |
- YB------Y-------YB------Y-------
- | | | |
- | | | |
- | | | |
-
- We use a resampling filter to shift the site locations one quarter pixel (at
- the chroma plane's resolution) to the right.
- Then we use another filter to move the C_r location down one quarter pixel,
- and the C_b location up one quarter pixel.*/
-static void y4m_convert_42xpaldv_42xjpeg(y4m_input *_y4m, unsigned char *_dst,
- unsigned char *_aux) {
- unsigned char *tmp;
- int c_w;
- int c_h;
- int c_sz;
- int pli;
- int y;
- int x;
- /*Skip past the luma data.*/
- _dst += _y4m->pic_w * _y4m->pic_h;
- /*Compute the size of each chroma plane.*/
- c_w = (_y4m->pic_w + 1) / 2;
- c_h = (_y4m->pic_h + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
- c_sz = c_w * c_h;
- tmp = _aux + 2 * c_sz;
- for (pli = 1; pli < 3; pli++) {
- /*First do the horizontal re-sampling.
- This is the same as the mpeg2 case, except that after the horizontal
- case, we need to apply a second vertical filter.*/
- y4m_42xmpeg2_42xjpeg_helper(tmp, _aux, c_w, c_h);
- _aux += c_sz;
- switch (pli) {
- case 1: {
- /*Slide C_b up a quarter-pel.
- This is the same filter used above, but in the other order.*/
- for (x = 0; x < c_w; x++) {
- for (y = 0; y < OC_MINI(c_h, 3); y++) {
- _dst[y * c_w] = (unsigned char)OC_CLAMPI(
- 0,
- (tmp[0] - 9 * tmp[OC_MAXI(y - 2, 0) * c_w] +
- 35 * tmp[OC_MAXI(y - 1, 0) * c_w] + 114 * tmp[y * c_w] -
- 17 * tmp[OC_MINI(y + 1, c_h - 1) * c_w] +
- 4 * tmp[OC_MINI(y + 2, c_h - 1) * c_w] + 64) >>
- 7,
- 255);
- }
- for (; y < c_h - 2; y++) {
- _dst[y * c_w] = (unsigned char)OC_CLAMPI(
- 0,
- (tmp[(y - 3) * c_w] - 9 * tmp[(y - 2) * c_w] +
- 35 * tmp[(y - 1) * c_w] + 114 * tmp[y * c_w] -
- 17 * tmp[(y + 1) * c_w] + 4 * tmp[(y + 2) * c_w] + 64) >>
- 7,
- 255);
- }
- for (; y < c_h; y++) {
- _dst[y * c_w] = (unsigned char)OC_CLAMPI(
- 0,
- (tmp[(y - 3) * c_w] - 9 * tmp[(y - 2) * c_w] +
- 35 * tmp[(y - 1) * c_w] + 114 * tmp[y * c_w] -
- 17 * tmp[OC_MINI(y + 1, c_h - 1) * c_w] +
- 4 * tmp[(c_h - 1) * c_w] + 64) >>
- 7,
- 255);
- }
- _dst++;
- tmp++;
- }
- _dst += c_sz - c_w;
- tmp -= c_w;
- } break;
- case 2: {
- /*Slide C_r down a quarter-pel.
- This is the same as the horizontal filter.*/
- for (x = 0; x < c_w; x++) {
- for (y = 0; y < OC_MINI(c_h, 2); y++) {
- _dst[y * c_w] = (unsigned char)OC_CLAMPI(
- 0,
- (4 * tmp[0] - 17 * tmp[OC_MAXI(y - 1, 0) * c_w] +
- 114 * tmp[y * c_w] + 35 * tmp[OC_MINI(y + 1, c_h - 1) * c_w] -
- 9 * tmp[OC_MINI(y + 2, c_h - 1) * c_w] +
- tmp[OC_MINI(y + 3, c_h - 1) * c_w] + 64) >>
- 7,
- 255);
- }
- for (; y < c_h - 3; y++) {
- _dst[y * c_w] = (unsigned char)OC_CLAMPI(
- 0,
- (4 * tmp[(y - 2) * c_w] - 17 * tmp[(y - 1) * c_w] +
- 114 * tmp[y * c_w] + 35 * tmp[(y + 1) * c_w] -
- 9 * tmp[(y + 2) * c_w] + tmp[(y + 3) * c_w] + 64) >>
- 7,
- 255);
- }
- for (; y < c_h; y++) {
- _dst[y * c_w] = (unsigned char)OC_CLAMPI(
- 0,
- (4 * tmp[(y - 2) * c_w] - 17 * tmp[(y - 1) * c_w] +
- 114 * tmp[y * c_w] + 35 * tmp[OC_MINI(y + 1, c_h - 1) * c_w] -
- 9 * tmp[OC_MINI(y + 2, c_h - 1) * c_w] + tmp[(c_h - 1) * c_w] +
- 64) >>
- 7,
- 255);
- }
- _dst++;
- tmp++;
- }
- } break;
- }
- /*For actual interlaced material, this would have to be done separately on
- each field, and the shift amounts would be different.
- C_r moves down 1/8, C_b up 3/8 in the top field, and C_r moves down 3/8,
- C_b up 1/8 in the bottom field.
- The corresponding filters would be:
- Down 1/8 (reverse order for up): [3 -11 125 15 -4 0]/128
- Down 3/8 (reverse order for up): [4 -19 98 56 -13 2]/128*/
- }
-}
-
-/*Perform vertical filtering to reduce a single plane from 4:2:2 to 4:2:0.
- This is used as a helper by several conversion routines.*/
-static void y4m_422jpeg_420jpeg_helper(unsigned char *_dst,
- const unsigned char *_src, int _c_w,
- int _c_h) {
- int y;
- int x;
- /*Filter: [3 -17 78 78 -17 3]/128, derived from a 6-tap Lanczos window.*/
- for (x = 0; x < _c_w; x++) {
- for (y = 0; y < OC_MINI(_c_h, 2); y += 2) {
- _dst[(y >> 1) * _c_w] =
- OC_CLAMPI(0,
- (64 * _src[0] + 78 * _src[OC_MINI(1, _c_h - 1) * _c_w] -
- 17 * _src[OC_MINI(2, _c_h - 1) * _c_w] +
- 3 * _src[OC_MINI(3, _c_h - 1) * _c_w] + 64) >>
- 7,
- 255);
- }
- for (; y < _c_h - 3; y += 2) {
- _dst[(y >> 1) * _c_w] =
- OC_CLAMPI(0,
- (3 * (_src[(y - 2) * _c_w] + _src[(y + 3) * _c_w]) -
- 17 * (_src[(y - 1) * _c_w] + _src[(y + 2) * _c_w]) +
- 78 * (_src[y * _c_w] + _src[(y + 1) * _c_w]) + 64) >>
- 7,
- 255);
- }
- for (; y < _c_h; y += 2) {
- _dst[(y >> 1) * _c_w] = OC_CLAMPI(
- 0,
- (3 * (_src[(y - 2) * _c_w] + _src[(_c_h - 1) * _c_w]) -
- 17 * (_src[(y - 1) * _c_w] + _src[OC_MINI(y + 2, _c_h - 1) * _c_w]) +
- 78 * (_src[y * _c_w] + _src[OC_MINI(y + 1, _c_h - 1) * _c_w]) +
- 64) >>
- 7,
- 255);
- }
- _src++;
- _dst++;
- }
-}
-
-/*420jpeg chroma samples are sited like:
- Y-------Y-------Y-------Y-------
- | | | |
- | BR | | BR |
- | | | |
- Y-------Y-------Y-------Y-------
- | | | |
- | | | |
- | | | |
- Y-------Y-------Y-------Y-------
- | | | |
- | BR | | BR |
- | | | |
- Y-------Y-------Y-------Y-------
- | | | |
- | | | |
- | | | |
-
- 422jpeg chroma samples are sited like:
- Y---BR--Y-------Y---BR--Y-------
- | | | |
- | | | |
- | | | |
- Y---BR--Y-------Y---BR--Y-------
- | | | |
- | | | |
- | | | |
- Y---BR--Y-------Y---BR--Y-------
- | | | |
- | | | |
- | | | |
- Y---BR--Y-------Y---BR--Y-------
- | | | |
- | | | |
- | | | |
-
- We use a resampling filter to decimate the chroma planes by two in the
- vertical direction.*/
-static void y4m_convert_422jpeg_420jpeg(y4m_input *_y4m, unsigned char *_dst,
- unsigned char *_aux) {
- int c_w;
- int c_h;
- int c_sz;
- int dst_c_w;
- int dst_c_h;
- int dst_c_sz;
- int pli;
- /*Skip past the luma data.*/
- _dst += _y4m->pic_w * _y4m->pic_h;
- /*Compute the size of each chroma plane.*/
- c_w = (_y4m->pic_w + _y4m->src_c_dec_h - 1) / _y4m->src_c_dec_h;
- c_h = _y4m->pic_h;
- dst_c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
- dst_c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
- c_sz = c_w * c_h;
- dst_c_sz = dst_c_w * dst_c_h;
- for (pli = 1; pli < 3; pli++) {
- y4m_422jpeg_420jpeg_helper(_dst, _aux, c_w, c_h);
- _aux += c_sz;
- _dst += dst_c_sz;
- }
-}
-
-/*420jpeg chroma samples are sited like:
- Y-------Y-------Y-------Y-------
- | | | |
- | BR | | BR |
- | | | |
- Y-------Y-------Y-------Y-------
- | | | |
- | | | |
- | | | |
- Y-------Y-------Y-------Y-------
- | | | |
- | BR | | BR |
- | | | |
- Y-------Y-------Y-------Y-------
- | | | |
- | | | |
- | | | |
-
- 422 chroma samples are sited like:
- YBR-----Y-------YBR-----Y-------
- | | | |
- | | | |
- | | | |
- YBR-----Y-------YBR-----Y-------
- | | | |
- | | | |
- | | | |
- YBR-----Y-------YBR-----Y-------
- | | | |
- | | | |
- | | | |
- YBR-----Y-------YBR-----Y-------
- | | | |
- | | | |
- | | | |
-
- We use a resampling filter to shift the original site locations one quarter
- pixel (at the original chroma resolution) to the right.
- Then we use a second resampling filter to decimate the chroma planes by two
- in the vertical direction.*/
-static void y4m_convert_422_420jpeg(y4m_input *_y4m, unsigned char *_dst,
- unsigned char *_aux) {
- unsigned char *tmp;
- int c_w;
- int c_h;
- int c_sz;
- int dst_c_h;
- int dst_c_sz;
- int pli;
- /*Skip past the luma data.*/
- _dst += _y4m->pic_w * _y4m->pic_h;
- /*Compute the size of each chroma plane.*/
- c_w = (_y4m->pic_w + _y4m->src_c_dec_h - 1) / _y4m->src_c_dec_h;
- c_h = _y4m->pic_h;
- dst_c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
- c_sz = c_w * c_h;
- dst_c_sz = c_w * dst_c_h;
- tmp = _aux + 2 * c_sz;
- for (pli = 1; pli < 3; pli++) {
- /*In reality, the horizontal and vertical steps could be pipelined, for
- less memory consumption and better cache performance, but we do them
- separately for simplicity.*/
- /*First do horizontal filtering (convert to 422jpeg)*/
- y4m_42xmpeg2_42xjpeg_helper(tmp, _aux, c_w, c_h);
- /*Now do the vertical filtering.*/
- y4m_422jpeg_420jpeg_helper(_dst, tmp, c_w, c_h);
- _aux += c_sz;
- _dst += dst_c_sz;
- }
-}
-
-/*420jpeg chroma samples are sited like:
- Y-------Y-------Y-------Y-------
- | | | |
- | BR | | BR |
- | | | |
- Y-------Y-------Y-------Y-------
- | | | |
- | | | |
- | | | |
- Y-------Y-------Y-------Y-------
- | | | |
- | BR | | BR |
- | | | |
- Y-------Y-------Y-------Y-------
- | | | |
- | | | |
- | | | |
-
- 411 chroma samples are sited like:
- YBR-----Y-------Y-------Y-------
- | | | |
- | | | |
- | | | |
- YBR-----Y-------Y-------Y-------
- | | | |
- | | | |
- | | | |
- YBR-----Y-------Y-------Y-------
- | | | |
- | | | |
- | | | |
- YBR-----Y-------Y-------Y-------
- | | | |
- | | | |
- | | | |
-
- We use a filter to resample at site locations one eighth pixel (at the source
- chroma plane's horizontal resolution) and five eighths of a pixel to the
- right.
- Then we use another filter to decimate the planes by 2 in the vertical
- direction.*/
-static void y4m_convert_411_420jpeg(y4m_input *_y4m, unsigned char *_dst,
- unsigned char *_aux) {
- unsigned char *tmp;
- int c_w;
- int c_h;
- int c_sz;
- int dst_c_w;
- int dst_c_h;
- int dst_c_sz;
- int tmp_sz;
- int pli;
- int y;
- int x;
- /*Skip past the luma data.*/
- _dst += _y4m->pic_w * _y4m->pic_h;
- /*Compute the size of each chroma plane.*/
- c_w = (_y4m->pic_w + _y4m->src_c_dec_h - 1) / _y4m->src_c_dec_h;
- c_h = _y4m->pic_h;
- dst_c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
- dst_c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
- c_sz = c_w * c_h;
- dst_c_sz = dst_c_w * dst_c_h;
- tmp_sz = dst_c_w * c_h;
- tmp = _aux + 2 * c_sz;
- for (pli = 1; pli < 3; pli++) {
- /*In reality, the horizontal and vertical steps could be pipelined, for
- less memory consumption and better cache performance, but we do them
- separately for simplicity.*/
- /*First do horizontal filtering (convert to 422jpeg)*/
- for (y = 0; y < c_h; y++) {
- /*Filters: [1 110 18 -1]/128 and [-3 50 86 -5]/128, both derived from a
- 4-tap Mitchell window.*/
- for (x = 0; x < OC_MINI(c_w, 1); x++) {
- tmp[x << 1] = (unsigned char)OC_CLAMPI(
- 0,
- (111 * _aux[0] + 18 * _aux[OC_MINI(1, c_w - 1)] -
- _aux[OC_MINI(2, c_w - 1)] + 64) >>
- 7,
- 255);
- tmp[x << 1 | 1] = (unsigned char)OC_CLAMPI(
- 0,
- (47 * _aux[0] + 86 * _aux[OC_MINI(1, c_w - 1)] -
- 5 * _aux[OC_MINI(2, c_w - 1)] + 64) >>
- 7,
- 255);
- }
- for (; x < c_w - 2; x++) {
- tmp[x << 1] =
- (unsigned char)OC_CLAMPI(0,
- (_aux[x - 1] + 110 * _aux[x] +
- 18 * _aux[x + 1] - _aux[x + 2] + 64) >>
- 7,
- 255);
- tmp[x << 1 | 1] = (unsigned char)OC_CLAMPI(
- 0,
- (-3 * _aux[x - 1] + 50 * _aux[x] + 86 * _aux[x + 1] -
- 5 * _aux[x + 2] + 64) >>
- 7,
- 255);
- }
- for (; x < c_w; x++) {
- tmp[x << 1] = (unsigned char)OC_CLAMPI(
- 0,
- (_aux[x - 1] + 110 * _aux[x] + 18 * _aux[OC_MINI(x + 1, c_w - 1)] -
- _aux[c_w - 1] + 64) >>
- 7,
- 255);
- if ((x << 1 | 1) < dst_c_w) {
- tmp[x << 1 | 1] = (unsigned char)OC_CLAMPI(
- 0,
- (-3 * _aux[x - 1] + 50 * _aux[x] +
- 86 * _aux[OC_MINI(x + 1, c_w - 1)] - 5 * _aux[c_w - 1] + 64) >>
- 7,
- 255);
- }
- }
- tmp += dst_c_w;
- _aux += c_w;
- }
- tmp -= tmp_sz;
- /*Now do the vertical filtering.*/
- y4m_422jpeg_420jpeg_helper(_dst, tmp, dst_c_w, c_h);
- _dst += dst_c_sz;
- }
-}
-
-/*Convert 444 to 420jpeg.*/
-static void y4m_convert_444_420jpeg(y4m_input *_y4m, unsigned char *_dst,
- unsigned char *_aux) {
- unsigned char *tmp;
- int c_w;
- int c_h;
- int c_sz;
- int dst_c_w;
- int dst_c_h;
- int dst_c_sz;
- int tmp_sz;
- int pli;
- int y;
- int x;
- /*Skip past the luma data.*/
- _dst += _y4m->pic_w * _y4m->pic_h;
- /*Compute the size of each chroma plane.*/
- c_w = (_y4m->pic_w + _y4m->src_c_dec_h - 1) / _y4m->src_c_dec_h;
- c_h = _y4m->pic_h;
- dst_c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
- dst_c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
- c_sz = c_w * c_h;
- dst_c_sz = dst_c_w * dst_c_h;
- tmp_sz = dst_c_w * c_h;
- tmp = _aux + 2 * c_sz;
- for (pli = 1; pli < 3; pli++) {
- /*Filter: [3 -17 78 78 -17 3]/128, derived from a 6-tap Lanczos window.*/
- for (y = 0; y < c_h; y++) {
- for (x = 0; x < OC_MINI(c_w, 2); x += 2) {
- tmp[x >> 1] = OC_CLAMPI(0,
- (64 * _aux[0] + 78 * _aux[OC_MINI(1, c_w - 1)] -
- 17 * _aux[OC_MINI(2, c_w - 1)] +
- 3 * _aux[OC_MINI(3, c_w - 1)] + 64) >>
- 7,
- 255);
- }
- for (; x < c_w - 3; x += 2) {
- tmp[x >> 1] = OC_CLAMPI(0,
- (3 * (_aux[x - 2] + _aux[x + 3]) -
- 17 * (_aux[x - 1] + _aux[x + 2]) +
- 78 * (_aux[x] + _aux[x + 1]) + 64) >>
- 7,
- 255);
- }
- for (; x < c_w; x += 2) {
- tmp[x >> 1] =
- OC_CLAMPI(0,
- (3 * (_aux[x - 2] + _aux[c_w - 1]) -
- 17 * (_aux[x - 1] + _aux[OC_MINI(x + 2, c_w - 1)]) +
- 78 * (_aux[x] + _aux[OC_MINI(x + 1, c_w - 1)]) + 64) >>
- 7,
- 255);
- }
- tmp += dst_c_w;
- _aux += c_w;
- }
- tmp -= tmp_sz;
- /*Now do the vertical filtering.*/
- y4m_422jpeg_420jpeg_helper(_dst, tmp, dst_c_w, c_h);
- _dst += dst_c_sz;
- }
-}
-
-/*The image is padded with empty chroma components at 4:2:0.*/
-static void y4m_convert_mono_420jpeg(y4m_input *_y4m, unsigned char *_dst,
- unsigned char *_aux) {
- int c_sz;
- (void)_aux;
- _dst += _y4m->pic_w * _y4m->pic_h;
- c_sz = ((_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h) *
- ((_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v);
- memset(_dst, 128, c_sz * 2);
-}
-
-/*No conversion function needed.*/
-static void y4m_convert_null(y4m_input *_y4m, unsigned char *_dst,
- unsigned char *_aux) {
- (void)_y4m;
- (void)_dst;
- (void)_aux;
-}
-
-int y4m_input_open(y4m_input *_y4m, FILE *_fin, char *_skip, int _nskip,
- int only_420) {
- char buffer[80] = { 0 };
- int ret;
- int i;
- /*Read until newline, or 80 cols, whichever happens first.*/
- for (i = 0; i < 79; i++) {
- if (_nskip > 0) {
- buffer[i] = *_skip++;
- _nskip--;
- } else {
- if (!file_read(buffer + i, 1, _fin)) return -1;
- }
- if (buffer[i] == '\n') break;
- }
- /*We skipped too much header data.*/
- if (_nskip > 0) return -1;
- if (i == 79) {
- fprintf(stderr, "Error parsing header; not a YUV2MPEG2 file?\n");
- return -1;
- }
- buffer[i] = '\0';
- if (memcmp(buffer, "YUV4MPEG", 8)) {
- fprintf(stderr, "Incomplete magic for YUV4MPEG file.\n");
- return -1;
- }
- if (buffer[8] != '2') {
- fprintf(stderr, "Incorrect YUV input file version; YUV4MPEG2 required.\n");
- }
- ret = y4m_parse_tags(_y4m, buffer + 5);
- if (ret < 0) {
- fprintf(stderr, "Error parsing YUV4MPEG2 header.\n");
- return ret;
- }
- if (_y4m->interlace == '?') {
- fprintf(stderr,
- "Warning: Input video interlacing format unknown; "
- "assuming progressive scan.\n");
- } else if (_y4m->interlace != 'p') {
- fprintf(stderr,
- "Input video is interlaced; "
- "Only progressive scan handled.\n");
- return -1;
- }
- _y4m->aom_fmt = AOM_IMG_FMT_I420;
- _y4m->bps = 12;
- _y4m->bit_depth = 8;
- if (strcmp(_y4m->chroma_type, "420") == 0 ||
- strcmp(_y4m->chroma_type, "420jpeg") == 0) {
- _y4m->src_c_dec_h = _y4m->dst_c_dec_h = _y4m->src_c_dec_v =
- _y4m->dst_c_dec_v = 2;
- _y4m->dst_buf_read_sz =
- _y4m->pic_w * _y4m->pic_h +
- 2 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2);
- /* Natively supported: no conversion required. */
- _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
- _y4m->convert = y4m_convert_null;
- } else if (strcmp(_y4m->chroma_type, "420p10") == 0) {
- _y4m->src_c_dec_h = 2;
- _y4m->dst_c_dec_h = 2;
- _y4m->src_c_dec_v = 2;
- _y4m->dst_c_dec_v = 2;
- _y4m->dst_buf_read_sz =
- 2 * (_y4m->pic_w * _y4m->pic_h +
- 2 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2));
- /* Natively supported: no conversion required. */
- _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
- _y4m->convert = y4m_convert_null;
- _y4m->bit_depth = 10;
- _y4m->bps = 15;
- _y4m->aom_fmt = AOM_IMG_FMT_I42016;
- if (only_420) {
- fprintf(stderr, "Unsupported conversion from 420p10 to 420jpeg\n");
- return -1;
- }
- } else if (strcmp(_y4m->chroma_type, "420p12") == 0) {
- _y4m->src_c_dec_h = 2;
- _y4m->dst_c_dec_h = 2;
- _y4m->src_c_dec_v = 2;
- _y4m->dst_c_dec_v = 2;
- _y4m->dst_buf_read_sz =
- 2 * (_y4m->pic_w * _y4m->pic_h +
- 2 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2));
- /* Natively supported: no conversion required. */
- _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
- _y4m->convert = y4m_convert_null;
- _y4m->bit_depth = 12;
- _y4m->bps = 18;
- _y4m->aom_fmt = AOM_IMG_FMT_I42016;
- if (only_420) {
- fprintf(stderr, "Unsupported conversion from 420p12 to 420jpeg\n");
- return -1;
- }
- } else if (strcmp(_y4m->chroma_type, "420mpeg2") == 0) {
- _y4m->src_c_dec_h = _y4m->dst_c_dec_h = _y4m->src_c_dec_v =
- _y4m->dst_c_dec_v = 2;
- _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
- /*Chroma filter required: read into the aux buf first.*/
- _y4m->aux_buf_sz = _y4m->aux_buf_read_sz =
- 2 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2);
- _y4m->convert = y4m_convert_42xmpeg2_42xjpeg;
- } else if (strcmp(_y4m->chroma_type, "420paldv") == 0) {
- _y4m->src_c_dec_h = _y4m->dst_c_dec_h = _y4m->src_c_dec_v =
- _y4m->dst_c_dec_v = 2;
- _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
- /*Chroma filter required: read into the aux buf first.
- We need to make two filter passes, so we need some extra space in the
- aux buffer.*/
- _y4m->aux_buf_sz = 3 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2);
- _y4m->aux_buf_read_sz =
- 2 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2);
- _y4m->convert = y4m_convert_42xpaldv_42xjpeg;
- } else if (strcmp(_y4m->chroma_type, "422jpeg") == 0) {
- _y4m->src_c_dec_h = _y4m->dst_c_dec_h = 2;
- _y4m->src_c_dec_v = 1;
- _y4m->dst_c_dec_v = 2;
- _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
- /*Chroma filter required: read into the aux buf first.*/
- _y4m->aux_buf_sz = _y4m->aux_buf_read_sz =
- 2 * ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
- _y4m->convert = y4m_convert_422jpeg_420jpeg;
- } else if (strcmp(_y4m->chroma_type, "422") == 0) {
- _y4m->src_c_dec_h = 2;
- _y4m->src_c_dec_v = 1;
- if (only_420) {
- _y4m->dst_c_dec_h = 2;
- _y4m->dst_c_dec_v = 2;
- _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
- /*Chroma filter required: read into the aux buf first.
- We need to make two filter passes, so we need some extra space in the
- aux buffer.*/
- _y4m->aux_buf_read_sz = 2 * ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
- _y4m->aux_buf_sz =
- _y4m->aux_buf_read_sz + ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
- _y4m->convert = y4m_convert_422_420jpeg;
- } else {
- _y4m->aom_fmt = AOM_IMG_FMT_I422;
- _y4m->bps = 16;
- _y4m->dst_c_dec_h = _y4m->src_c_dec_h;
- _y4m->dst_c_dec_v = _y4m->src_c_dec_v;
- _y4m->dst_buf_read_sz =
- _y4m->pic_w * _y4m->pic_h + 2 * ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
- /*Natively supported: no conversion required.*/
- _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
- _y4m->convert = y4m_convert_null;
- }
- } else if (strcmp(_y4m->chroma_type, "422p10") == 0) {
- _y4m->src_c_dec_h = 2;
- _y4m->src_c_dec_v = 1;
- _y4m->aom_fmt = AOM_IMG_FMT_I42216;
- _y4m->bps = 20;
- _y4m->bit_depth = 10;
- _y4m->dst_c_dec_h = _y4m->src_c_dec_h;
- _y4m->dst_c_dec_v = _y4m->src_c_dec_v;
- _y4m->dst_buf_read_sz = 2 * (_y4m->pic_w * _y4m->pic_h +
- 2 * ((_y4m->pic_w + 1) / 2) * _y4m->pic_h);
- _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
- _y4m->convert = y4m_convert_null;
- if (only_420) {
- fprintf(stderr, "Unsupported conversion from 422p10 to 420jpeg\n");
- return -1;
- }
- } else if (strcmp(_y4m->chroma_type, "422p12") == 0) {
- _y4m->src_c_dec_h = 2;
- _y4m->src_c_dec_v = 1;
- _y4m->aom_fmt = AOM_IMG_FMT_I42216;
- _y4m->bps = 24;
- _y4m->bit_depth = 12;
- _y4m->dst_c_dec_h = _y4m->src_c_dec_h;
- _y4m->dst_c_dec_v = _y4m->src_c_dec_v;
- _y4m->dst_buf_read_sz = 2 * (_y4m->pic_w * _y4m->pic_h +
- 2 * ((_y4m->pic_w + 1) / 2) * _y4m->pic_h);
- _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
- _y4m->convert = y4m_convert_null;
- if (only_420) {
- fprintf(stderr, "Unsupported conversion from 422p12 to 420jpeg\n");
- return -1;
- }
- } else if (strcmp(_y4m->chroma_type, "411") == 0) {
- _y4m->src_c_dec_h = 4;
- _y4m->dst_c_dec_h = 2;
- _y4m->src_c_dec_v = 1;
- _y4m->dst_c_dec_v = 2;
- _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
- /*Chroma filter required: read into the aux buf first.
- We need to make two filter passes, so we need some extra space in the
- aux buffer.*/
- _y4m->aux_buf_read_sz = 2 * ((_y4m->pic_w + 3) / 4) * _y4m->pic_h;
- _y4m->aux_buf_sz =
- _y4m->aux_buf_read_sz + ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
- _y4m->convert = y4m_convert_411_420jpeg;
- } else if (strcmp(_y4m->chroma_type, "444") == 0) {
- _y4m->src_c_dec_h = 1;
- _y4m->src_c_dec_v = 1;
- if (only_420) {
- _y4m->dst_c_dec_h = 2;
- _y4m->dst_c_dec_v = 2;
- _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
- /*Chroma filter required: read into the aux buf first.
- We need to make two filter passes, so we need some extra space in the
- aux buffer.*/
- _y4m->aux_buf_read_sz = 2 * _y4m->pic_w * _y4m->pic_h;
- _y4m->aux_buf_sz =
- _y4m->aux_buf_read_sz + ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
- _y4m->convert = y4m_convert_444_420jpeg;
- } else {
- _y4m->aom_fmt = AOM_IMG_FMT_I444;
- _y4m->bps = 24;
- _y4m->dst_c_dec_h = _y4m->src_c_dec_h;
- _y4m->dst_c_dec_v = _y4m->src_c_dec_v;
- _y4m->dst_buf_read_sz = 3 * _y4m->pic_w * _y4m->pic_h;
- /*Natively supported: no conversion required.*/
- _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
- _y4m->convert = y4m_convert_null;
- }
- } else if (strcmp(_y4m->chroma_type, "444p10") == 0) {
- _y4m->src_c_dec_h = 1;
- _y4m->src_c_dec_v = 1;
- _y4m->aom_fmt = AOM_IMG_FMT_I44416;
- _y4m->bps = 30;
- _y4m->bit_depth = 10;
- _y4m->dst_c_dec_h = _y4m->src_c_dec_h;
- _y4m->dst_c_dec_v = _y4m->src_c_dec_v;
- _y4m->dst_buf_read_sz = 2 * 3 * _y4m->pic_w * _y4m->pic_h;
- _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
- _y4m->convert = y4m_convert_null;
- if (only_420) {
- fprintf(stderr, "Unsupported conversion from 444p10 to 420jpeg\n");
- return -1;
- }
- } else if (strcmp(_y4m->chroma_type, "444p12") == 0) {
- _y4m->src_c_dec_h = 1;
- _y4m->src_c_dec_v = 1;
- _y4m->aom_fmt = AOM_IMG_FMT_I44416;
- _y4m->bps = 36;
- _y4m->bit_depth = 12;
- _y4m->dst_c_dec_h = _y4m->src_c_dec_h;
- _y4m->dst_c_dec_v = _y4m->src_c_dec_v;
- _y4m->dst_buf_read_sz = 2 * 3 * _y4m->pic_w * _y4m->pic_h;
- _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
- _y4m->convert = y4m_convert_null;
- if (only_420) {
- fprintf(stderr, "Unsupported conversion from 444p12 to 420jpeg\n");
- return -1;
- }
- } else if (strcmp(_y4m->chroma_type, "444alpha") == 0) {
- _y4m->src_c_dec_h = 1;
- _y4m->src_c_dec_v = 1;
- if (only_420) {
- _y4m->dst_c_dec_h = 2;
- _y4m->dst_c_dec_v = 2;
- _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
- /*Chroma filter required: read into the aux buf first.
- We need to make two filter passes, so we need some extra space in the
- aux buffer.
- The extra plane also gets read into the aux buf.
- It will be discarded.*/
- _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 3 * _y4m->pic_w * _y4m->pic_h;
- _y4m->convert = y4m_convert_444_420jpeg;
- } else {
- _y4m->aom_fmt = AOM_IMG_FMT_444A;
- _y4m->bps = 32;
- _y4m->dst_c_dec_h = _y4m->src_c_dec_h;
- _y4m->dst_c_dec_v = _y4m->src_c_dec_v;
- _y4m->dst_buf_read_sz = 4 * _y4m->pic_w * _y4m->pic_h;
- /*Natively supported: no conversion required.*/
- _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
- _y4m->convert = y4m_convert_null;
- }
- } else if (strcmp(_y4m->chroma_type, "mono") == 0) {
- _y4m->src_c_dec_h = _y4m->src_c_dec_v = 0;
- _y4m->dst_c_dec_h = _y4m->dst_c_dec_v = 2;
- _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
- /*No extra space required, but we need to clear the chroma planes.*/
- _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
- _y4m->convert = y4m_convert_mono_420jpeg;
- } else {
- fprintf(stderr, "Unknown chroma sampling type: %s\n", _y4m->chroma_type);
- return -1;
- }
- /*The size of the final frame buffers is always computed from the
- destination chroma decimation type.*/
- _y4m->dst_buf_sz =
- _y4m->pic_w * _y4m->pic_h +
- 2 * ((_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h) *
- ((_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v);
- if (_y4m->bit_depth == 8)
- _y4m->dst_buf = (unsigned char *)malloc(_y4m->dst_buf_sz);
- else
- _y4m->dst_buf = (unsigned char *)malloc(2 * _y4m->dst_buf_sz);
-
- if (_y4m->aux_buf_sz > 0)
- _y4m->aux_buf = (unsigned char *)malloc(_y4m->aux_buf_sz);
- return 0;
-}
-
-void y4m_input_close(y4m_input *_y4m) {
- free(_y4m->dst_buf);
- free(_y4m->aux_buf);
-}
-
-int y4m_input_fetch_frame(y4m_input *_y4m, FILE *_fin, aom_image_t *_img) {
- char frame[6];
- int pic_sz;
- int c_w;
- int c_h;
- int c_sz;
- int bytes_per_sample = _y4m->bit_depth > 8 ? 2 : 1;
- /*Read and skip the frame header.*/
- if (!file_read(frame, 6, _fin)) return 0;
- if (memcmp(frame, "FRAME", 5)) {
- fprintf(stderr, "Loss of framing in Y4M input data\n");
- return -1;
- }
- if (frame[5] != '\n') {
- char c;
- int j;
- for (j = 0; j < 79 && file_read(&c, 1, _fin) && c != '\n'; j++) {
- }
- if (j == 79) {
- fprintf(stderr, "Error parsing Y4M frame header\n");
- return -1;
- }
- }
- /*Read the frame data that needs no conversion.*/
- if (!file_read(_y4m->dst_buf, _y4m->dst_buf_read_sz, _fin)) {
- fprintf(stderr, "Error reading Y4M frame data.\n");
- return -1;
- }
- /*Read the frame data that does need conversion.*/
- if (!file_read(_y4m->aux_buf, _y4m->aux_buf_read_sz, _fin)) {
- fprintf(stderr, "Error reading Y4M frame data.\n");
- return -1;
- }
- /*Now convert the just read frame.*/
- (*_y4m->convert)(_y4m, _y4m->dst_buf, _y4m->aux_buf);
- /*Fill in the frame buffer pointers.
- We don't use aom_img_wrap() because it forces padding for odd picture
- sizes, which would require a separate fread call for every row.*/
- memset(_img, 0, sizeof(*_img));
- /*Y4M has the planes in Y'CbCr order, which libaom calls Y, U, and V.*/
- _img->fmt = _y4m->aom_fmt;
- _img->w = _img->d_w = _y4m->pic_w;
- _img->h = _img->d_h = _y4m->pic_h;
- _img->x_chroma_shift = _y4m->dst_c_dec_h >> 1;
- _img->y_chroma_shift = _y4m->dst_c_dec_v >> 1;
- _img->bps = _y4m->bps;
-
- /*Set up the buffer pointers.*/
- pic_sz = _y4m->pic_w * _y4m->pic_h * bytes_per_sample;
- c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
- c_w *= bytes_per_sample;
- c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
- c_sz = c_w * c_h;
- _img->stride[AOM_PLANE_Y] = _img->stride[AOM_PLANE_ALPHA] =
- _y4m->pic_w * bytes_per_sample;
- _img->stride[AOM_PLANE_U] = _img->stride[AOM_PLANE_V] = c_w;
- _img->planes[AOM_PLANE_Y] = _y4m->dst_buf;
- _img->planes[AOM_PLANE_U] = _y4m->dst_buf + pic_sz;
- _img->planes[AOM_PLANE_V] = _y4m->dst_buf + pic_sz + c_sz;
- _img->planes[AOM_PLANE_ALPHA] = _y4m->dst_buf + pic_sz + 2 * c_sz;
- return 1;
-}
diff --git a/third_party/aom/common/y4minput.h b/third_party/aom/common/y4minput.h
deleted file mode 100644
index 01b9ce972..000000000
--- a/third_party/aom/common/y4minput.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- *
- * Based on code from the OggTheora software codec source code,
- * Copyright (C) 2002-2010 The Xiph.Org Foundation and contributors.
- */
-
-#ifndef AOM_COMMON_Y4MINPUT_H_
-#define AOM_COMMON_Y4MINPUT_H_
-
-#include <stdio.h>
-#include "aom/aom_image.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct y4m_input y4m_input;
-
-/*The function used to perform chroma conversion.*/
-typedef void (*y4m_convert_func)(y4m_input *_y4m, unsigned char *_dst,
- unsigned char *_src);
-
-struct y4m_input {
- int pic_w;
- int pic_h;
- int fps_n;
- int fps_d;
- int par_n;
- int par_d;
- char interlace;
- int src_c_dec_h;
- int src_c_dec_v;
- int dst_c_dec_h;
- int dst_c_dec_v;
- char chroma_type[16];
- /*The size of each converted frame buffer.*/
- size_t dst_buf_sz;
- /*The amount to read directly into the converted frame buffer.*/
- size_t dst_buf_read_sz;
- /*The size of the auxilliary buffer.*/
- size_t aux_buf_sz;
- /*The amount to read into the auxilliary buffer.*/
- size_t aux_buf_read_sz;
- y4m_convert_func convert;
- unsigned char *dst_buf;
- unsigned char *aux_buf;
- enum aom_img_fmt aom_fmt;
- int bps;
- unsigned int bit_depth;
-};
-
-int y4m_input_open(y4m_input *_y4m, FILE *_fin, char *_skip, int _nskip,
- int only_420);
-void y4m_input_close(y4m_input *_y4m);
-int y4m_input_fetch_frame(y4m_input *_y4m, FILE *_fin, aom_image_t *img);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_COMMON_Y4MINPUT_H_
diff --git a/third_party/aom/docs.cmake b/third_party/aom/docs.cmake
deleted file mode 100644
index b5bfa9b56..000000000
--- a/third_party/aom/docs.cmake
+++ /dev/null
@@ -1,251 +0,0 @@
-#
-# Copyright (c) 2017, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and the
-# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
-# not distributed with this source code in the LICENSE file, you can obtain it
-# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
-# License 1.0 was not distributed with this source code in the PATENTS file, you
-# can obtain it at www.aomedia.org/license/patent.
-#
-if(AOM_DOCS_CMAKE_)
- return()
-endif() # AOM_DOCS_CMAKE_
-set(AOM_DOCS_CMAKE_ 1)
-
-cmake_minimum_required(VERSION 3.5)
-
-set(AOM_DOXYFILE "${AOM_CONFIG_DIR}/doxyfile")
-set(AOM_DOXYGEN_CONFIG_TEMPLATE "libs.doxy_template")
-set(AOM_DOXYGEN_OUTPUT_DIR "${AOM_CONFIG_DIR}/dox")
-set(AOM_DOXYGEN_SECTIONS "av1")
-
-set(AOM_DOXYGEN_SOURCES
- "${AOM_ROOT}/aom/aom.h"
- "${AOM_ROOT}/aom/aom_codec.h"
- "${AOM_ROOT}/aom/aom_decoder.h"
- "${AOM_ROOT}/aom/aom_encoder.h"
- "${AOM_ROOT}/aom/aom_frame_buffer.h"
- "${AOM_ROOT}/aom/aom_image.h"
- "${AOM_ROOT}/aom/aom_integer.h"
- "${AOM_ROOT}/keywords.dox"
- "${AOM_ROOT}/mainpage.dox"
- "${AOM_ROOT}/usage.dox")
-
-if(CONFIG_AV1_DECODER)
- set(AOM_DOXYGEN_EXAMPLE_SOURCES ${AOM_DOXYGEN_EXAMPLE_SOURCES}
- "${AOM_ROOT}/apps/aomdec.c" "${AOM_ROOT}/examples/decode_to_md5.c"
- "${AOM_ROOT}/examples/decode_with_drops.c"
- "${AOM_ROOT}/examples/simple_decoder.c")
-
- set(AOM_DOXYGEN_EXAMPLE_DESCRIPTIONS ${AOM_DOXYGEN_EXAMPLE_DESCRIPTIONS}
- "Full featured decoder." "Frame by frame MD5 checksum."
- "Drops frames while decoding." "Simplified decoder loop.")
-
- set(AOM_DOXYGEN_SECTIONS ${AOM_DOXYGEN_SECTIONS} "av1_decoder decoder")
-
- set(AOM_DOXYGEN_SOURCES ${AOM_DOXYGEN_SOURCES} "${AOM_ROOT}/aom/aomdx.h"
- "${AOM_ROOT}/usage_dx.dox")
-
- if(CONFIG_ANALYZER)
- set(AOM_DOXYGEN_EXAMPLE_SOURCES ${AOM_DOXYGEN_EXAMPLE_SOURCES}
- "${AOM_ROOT}/examples/analyzer.cc")
-
- set(AOM_DOXYGEN_EXAMPLE_DESCRIPTIONS ${AOM_DOXYGEN_EXAMPLE_DESCRIPTIONS}
- "Bitstream analyzer.")
- endif()
-
- if(CONFIG_INSPECTION)
- set(AOM_DOXYGEN_EXAMPLE_SOURCES ${AOM_DOXYGEN_EXAMPLE_SOURCES}
- "${AOM_ROOT}/examples/inspect.c")
-
- set(AOM_DOXYGEN_EXAMPLE_DESCRIPTIONS ${AOM_DOXYGEN_EXAMPLE_DESCRIPTIONS}
- "Bitstream inspector.")
- endif()
-endif()
-
-if(CONFIG_AV1_ENCODER)
- set(AOM_DOXYGEN_EXAMPLE_SOURCES ${AOM_DOXYGEN_EXAMPLE_SOURCES}
- "${AOM_ROOT}/apps/aomenc.c" "${AOM_ROOT}/examples/lossless_encoder.c"
- "${AOM_ROOT}/examples/set_maps.c" "${AOM_ROOT}/examples/simple_encoder.c"
- "${AOM_ROOT}/examples/twopass_encoder.c")
-
- set(AOM_DOXYGEN_EXAMPLE_DESCRIPTIONS ${AOM_DOXYGEN_EXAMPLE_DESCRIPTIONS}
- "Full featured encoder." "Simplified lossless encoder."
- "Set active and ROI maps." "Simplified encoder loop."
- "Two-pass encoder loop.")
-
- set(AOM_DOXYGEN_EXAMPLE_SOURCES ${AOM_DOXYGEN_EXAMPLE_SOURCES}
- "${AOM_ROOT}/examples/scalable_encoder.c")
-
- set(AOM_DOXYGEN_EXAMPLE_DESCRIPTIONS ${AOM_DOXYGEN_EXAMPLE_DESCRIPTIONS}
- "Scalable encoder loop.")
-
- set(AOM_DOXYGEN_SECTIONS ${AOM_DOXYGEN_SECTIONS} "av1_encoder encoder")
-
- set(AOM_DOXYGEN_SOURCES ${AOM_DOXYGEN_SOURCES} "${AOM_ROOT}/aom/aomcx.h"
- "${AOM_ROOT}/usage_cx.dox")
-endif()
-
-if(CONFIG_AV1_DECODER AND CONFIG_AV1_ENCODER)
- set(AOM_DOXYGEN_EXAMPLE_SOURCES ${AOM_DOXYGEN_EXAMPLE_SOURCES}
- "${AOM_ROOT}/examples/aom_cx_set_ref.c")
-
- set(AOM_DOXYGEN_EXAMPLE_DESCRIPTIONS ${AOM_DOXYGEN_EXAMPLE_DESCRIPTIONS}
- "Set encoder reference frame.")
-endif()
-
-if(CONFIG_AV1_ENCODER)
- set(AOM_DOXYGEN_EXAMPLE_SOURCES ${AOM_DOXYGEN_EXAMPLE_SOURCES}
- "${AOM_ROOT}/examples/lightfield_encoder.c")
-
- set(AOM_DOXYGEN_EXAMPLE_DESCRIPTIONS ${AOM_DOXYGEN_EXAMPLE_DESCRIPTIONS}
- "Lightfield encoder example.")
-endif()
-
-if(CONFIG_AV1_DECODER)
- set(AOM_DOXYGEN_EXAMPLE_SOURCES ${AOM_DOXYGEN_EXAMPLE_SOURCES}
- "${AOM_ROOT}/examples/lightfield_tile_list_decoder.c")
-
- set(AOM_DOXYGEN_EXAMPLE_DESCRIPTIONS ${AOM_DOXYGEN_EXAMPLE_DESCRIPTIONS}
- "Lightfield tile list decoder example.")
-endif()
-
-if(CONFIG_AV1_DECODER)
- set(AOM_DOXYGEN_EXAMPLE_SOURCES ${AOM_DOXYGEN_EXAMPLE_SOURCES}
- "${AOM_ROOT}/examples/lightfield_decoder.c")
-
- set(AOM_DOXYGEN_EXAMPLE_DESCRIPTIONS ${AOM_DOXYGEN_EXAMPLE_DESCRIPTIONS}
- "Lightfield decoder example.")
-endif()
-
-if(CONFIG_AV1_DECODER AND CONFIG_AV1_ENCODER)
- set(AOM_DOXYGEN_EXAMPLE_SOURCES ${AOM_DOXYGEN_EXAMPLE_SOURCES}
- "${AOM_ROOT}/examples/lightfield_bitstream_parsing.c")
-
- set(AOM_DOXYGEN_EXAMPLE_DESCRIPTIONS ${AOM_DOXYGEN_EXAMPLE_DESCRIPTIONS}
- "Lightfield bitstream parsing example.")
-endif()
-
-# Iterates over list named by $list_name and appends each item to $AOM_DOXYFILE
-# as values assigned to $var_name with no line breaks between list items.
-# Appends a new line after the entire config variable is expanded.
-function(write_cmake_list_to_doxygen_config_var var_name list_name)
- unset(output_string)
- foreach(list_item ${${list_name}})
- set(output_string "${output_string} ${list_item} ")
- endforeach()
- string(STRIP "${output_string}" output_string)
- file(APPEND "${AOM_DOXYFILE}" "${var_name} += ${output_string}\n")
-endfunction()
-
-function(get_name file_path name_var)
- get_filename_component(file_basename ${file_path} NAME)
- get_filename_component(${name_var} ${file_basename} NAME_WE)
- set(${name_var} ${${name_var}} PARENT_SCOPE)
-endfunction()
-
-function(setup_documentation_targets)
-
- # Sanity check: the lengths of these lists must match.
- list(LENGTH AOM_DOXYGEN_EXAMPLE_SOURCES num_sources)
- list(LENGTH AOM_DOXYGEN_EXAMPLE_DESCRIPTIONS num_descs)
- if(NOT ${num_sources} EQUAL ${num_descs})
- message(FATAL_ERROR "Unqeual example and description totals.")
- endif()
-
- # Take the list of examples and produce example_basename.dox for each file in
- # the list.
- file(MAKE_DIRECTORY "${AOM_DOXYGEN_OUTPUT_DIR}")
- foreach(example_file ${AOM_DOXYGEN_EXAMPLE_SOURCES})
- unset(example_basename)
- get_name("${example_file}" "example_name")
- set(example_dox "${AOM_DOXYGEN_OUTPUT_DIR}/${example_name}.dox")
- set(dox_string "/*!\\page example_${example_name} ${example_name}\n")
- set(dox_string "${dox_string} \\includelineno ${example_file}\n*/\n")
- file(WRITE "${example_dox}" ${dox_string})
- set(AOM_DOXYGEN_SOURCES ${AOM_DOXYGEN_SOURCES} "${example_dox}")
- endforeach()
-
- # Generate samples.dox, an index page that refers to the example_basename.dox
- # files that were just created.
- set(
- samples_header
- "
-/*!\\page samples Sample Code
-This SDK includes a number of sample applications. Each sample documents a
-feature of the SDK in both prose and the associated C code. The following
-samples are included:
-"
- )
-
- set(
- utils_desc
- "
-In addition, the SDK contains a number of utilities. Since these utilities are
-built upon the concepts described in the sample code listed above, they are not
-documented in pieces like the samples are. Their source is included here for
-reference. The following utilities are included:
-"
- )
-
- # Write the description for the samples section.
- set(samples_dox "${AOM_CONFIG_DIR}/samples.dox")
- file(WRITE "${samples_dox}" "${samples_header}\n")
-
- # Iterate over $AOM_DOXYGEN_EXAMPLE_SOURCES and
- # $AOM_DOXYGEN_EXAMPLE_DESCRIPTIONS and massage example names as required by
- # AV1's doxygen setup.
- math(EXPR max_example_index "${num_sources} - 1")
- foreach(NUM RANGE ${max_example_index})
- list(GET AOM_DOXYGEN_EXAMPLE_SOURCES ${NUM} ex_name)
- get_name("${ex_name}" "ex_name")
-
- # AV1's doxygen lists aomdec and aomenc as utils apart from the examples.
- # Save the indexes for another pass.
- if("${ex_name}" MATCHES "aomdec\|aomenc")
- set(util_indexes "${util_indexes}" "${NUM}")
- continue()
- endif()
- list(GET AOM_DOXYGEN_EXAMPLE_DESCRIPTIONS ${NUM} ex_desc)
- file(APPEND "${samples_dox}" " - \\subpage example_${ex_name} ${ex_desc}\n")
- endforeach()
-
- # Write the description and index for the utils.
- file(APPEND "${samples_dox}" "${utils_desc}\n")
- foreach(util_index ${util_indexes})
- list(GET AOM_DOXYGEN_EXAMPLE_SOURCES ${util_index} ex_name)
- get_name("${ex_name}" "ex_name")
- list(GET AOM_DOXYGEN_EXAMPLE_DESCRIPTIONS ${util_index} ex_desc)
- file(APPEND "${samples_dox}" " - \\subpage example_${ex_name} ${ex_desc}\n")
- endforeach()
- file(APPEND "${samples_dox}" "*/")
-
- # Add $samples_dox to the doxygen inputs.
- get_filename_component(samples_dox ${samples_dox} NAME)
- set(AOM_DOXYGEN_SOURCES ${AOM_DOXYGEN_SOURCES} ${samples_dox})
-
- # Generate libaom's doxyfile.
- file(WRITE "${AOM_DOXYFILE}" "##\n## GENERATED FILE. DO NOT EDIT\n##\n")
- file(READ "${AOM_ROOT}/${AOM_DOXYGEN_CONFIG_TEMPLATE}" doxygen_template_data)
- file(APPEND "${AOM_DOXYFILE}" ${doxygen_template_data})
- file(APPEND "${AOM_DOXYFILE}"
- "EXAMPLE_PATH += ${AOM_ROOT} ${AOM_ROOT}/examples\n")
- file(APPEND "${AOM_DOXYFILE}"
- "INCLUDE_PATH += ${AOM_CONFIG_DIR} ${AOM_ROOT}\n")
- file(APPEND "${AOM_DOXYFILE}"
- "STRIP_FROM_PATH += ${AOM_ROOT} ${AOM_CONFIG_DIR}\n")
- write_cmake_list_to_doxygen_config_var("INPUT" "AOM_DOXYGEN_SOURCES")
- write_cmake_list_to_doxygen_config_var("ENABLED_SECTIONS"
- "AOM_DOXYGEN_SECTIONS")
-
- # Add the doxygen generation rule.
- add_custom_target(docs ALL
- COMMAND "${DOXYGEN_EXECUTABLE}" "${AOM_DOXYFILE}"
- DEPENDS "${AOM_DOXYFILE}" ${AOM_DOXYGEN_SOURCES}
- ${AOM_DOXYGEN_EXAMPLE_SOURCES}
- "${AOM_DOXYGEN_CONFIG_TEMPLATE}"
- SOURCES "${AOM_DOXYFILE}" ${AOM_DOXYGEN_SOURCES}
- ${AOM_DOXYGEN_EXAMPLE_SOURCES}
- "${AOM_DOXYGEN_CONFIG_TEMPLATE}")
-endfunction()
diff --git a/third_party/aom/examples/analyzer.cc b/third_party/aom/examples/analyzer.cc
deleted file mode 100644
index 6a42eca24..000000000
--- a/third_party/aom/examples/analyzer.cc
+++ /dev/null
@@ -1,723 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#include <wx/wx.h>
-#include <wx/aboutdlg.h>
-#include <wx/cmdline.h>
-#include <wx/dcbuffer.h>
-
-#include "aom/aom_decoder.h"
-#include "aom/aomdx.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/decoder/accounting.h"
-#include "av1/decoder/inspection.h"
-#include "common/tools_common.h"
-#include "common/video_reader.h"
-
-#define OD_SIGNMASK(a) (-((a) < 0))
-#define OD_FLIPSIGNI(a, b) (((a) + OD_SIGNMASK(b)) ^ OD_SIGNMASK(b))
-#define OD_DIV_ROUND(x, y) (((x) + OD_FLIPSIGNI((y) >> 1, x)) / (y))
-
-enum {
- OD_LUMA_MASK = 1 << 0,
- OD_CB_MASK = 1 << 1,
- OD_CR_MASK = 1 << 2,
- OD_ALL_MASK = OD_LUMA_MASK | OD_CB_MASK | OD_CR_MASK
-};
-
-class AV1Decoder {
- private:
- FILE *input;
- wxString path;
-
- AvxVideoReader *reader;
- const AvxVideoInfo *info;
- const AvxInterface *decoder;
-
- insp_frame_data frame_data;
-
- aom_codec_ctx_t codec;
- bool show_padding;
-
- public:
- aom_image_t *image;
- int frame;
-
- int plane_mask;
-
- AV1Decoder();
- ~AV1Decoder();
-
- bool open(const wxString &path);
- void close();
- bool step();
-
- int getWidthPadding() const;
- int getHeightPadding() const;
- void togglePadding();
- int getWidth() const;
- int getHeight() const;
-
- bool getAccountingStruct(Accounting **acct);
- bool setInspectionCallback();
-
- static void inspect(void *decoder, void *data);
-};
-
-AV1Decoder::AV1Decoder()
- : reader(NULL), info(NULL), decoder(NULL), show_padding(false), image(NULL),
- frame(0) {}
-
-AV1Decoder::~AV1Decoder() {}
-
-void AV1Decoder::togglePadding() { show_padding = !show_padding; }
-
-bool AV1Decoder::open(const wxString &path) {
- reader = aom_video_reader_open(path.mb_str());
- if (!reader) {
- fprintf(stderr, "Failed to open %s for reading.", path.mb_str().data());
- return false;
- }
- this->path = path;
- info = aom_video_reader_get_info(reader);
- decoder = get_aom_decoder_by_fourcc(info->codec_fourcc);
- if (!decoder) {
- fprintf(stderr, "Unknown input codec.");
- return false;
- }
- printf("Using %s\n", aom_codec_iface_name(decoder->codec_interface()));
- if (aom_codec_dec_init(&codec, decoder->codec_interface(), NULL, 0)) {
- fprintf(stderr, "Failed to initialize decoder.");
- return false;
- }
- ifd_init(&frame_data, info->frame_width, info->frame_height);
- setInspectionCallback();
- return true;
-}
-
-void AV1Decoder::close() {}
-
-bool AV1Decoder::step() {
- if (aom_video_reader_read_frame(reader)) {
- size_t frame_size;
- const unsigned char *frame_data;
- frame_data = aom_video_reader_get_frame(reader, &frame_size);
- if (aom_codec_decode(&codec, frame_data, frame_size, NULL)) {
- fprintf(stderr, "Failed to decode frame.");
- return false;
- } else {
- aom_codec_iter_t iter = NULL;
- image = aom_codec_get_frame(&codec, &iter);
- if (image != NULL) {
- frame++;
- return true;
- }
- return false;
- }
- }
- return false;
-}
-
-int AV1Decoder::getWidth() const {
- return info->frame_width + 2 * getWidthPadding();
-}
-
-int AV1Decoder::getWidthPadding() const {
- return show_padding ? AOMMAX(info->frame_width + 16,
- ALIGN_POWER_OF_TWO(info->frame_width, 6)) -
- info->frame_width
- : 0;
-}
-
-int AV1Decoder::getHeight() const {
- return info->frame_height + 2 * getHeightPadding();
-}
-
-int AV1Decoder::getHeightPadding() const {
- return show_padding ? AOMMAX(info->frame_height + 16,
- ALIGN_POWER_OF_TWO(info->frame_height, 6)) -
- info->frame_height
- : 0;
-}
-
-bool AV1Decoder::getAccountingStruct(Accounting **accounting) {
- return aom_codec_control(&codec, AV1_GET_ACCOUNTING, accounting) ==
- AOM_CODEC_OK;
-}
-
-bool AV1Decoder::setInspectionCallback() {
- aom_inspect_init ii;
- ii.inspect_cb = AV1Decoder::inspect;
- ii.inspect_ctx = (void *)this;
- return aom_codec_control(&codec, AV1_SET_INSPECTION_CALLBACK, &ii) ==
- AOM_CODEC_OK;
-}
-
-void AV1Decoder::inspect(void *pbi, void *data) {
- AV1Decoder *decoder = (AV1Decoder *)data;
- ifd_inspect(&decoder->frame_data, pbi);
-}
-
-#define MIN_ZOOM (1)
-#define MAX_ZOOM (4)
-
-class AnalyzerPanel : public wxPanel {
- DECLARE_EVENT_TABLE()
-
- private:
- AV1Decoder decoder;
- const wxString path;
-
- int zoom;
- unsigned char *pixels;
-
- const bool bit_accounting;
- double *bpp_q3;
-
- int plane_mask;
-
- // The display size is the decode size, scaled by the zoom.
- int getDisplayWidth() const;
- int getDisplayHeight() const;
-
- bool updateDisplaySize();
-
- void computeBitsPerPixel();
-
- public:
- AnalyzerPanel(wxWindow *parent, const wxString &path,
- const bool bit_accounting);
- ~AnalyzerPanel();
-
- bool open(const wxString &path);
- void close();
- void render();
- void togglePadding();
- bool nextFrame();
- void refresh();
-
- int getZoom() const;
- bool setZoom(int zoom);
-
- void setShowPlane(bool show_plane, int mask);
-
- void onPaint(wxPaintEvent &event); // NOLINT
-};
-
-BEGIN_EVENT_TABLE(AnalyzerPanel, wxPanel)
-EVT_PAINT(AnalyzerPanel::onPaint)
-END_EVENT_TABLE()
-
-AnalyzerPanel::AnalyzerPanel(wxWindow *parent, const wxString &path,
- const bool bit_accounting)
- : wxPanel(parent), path(path), zoom(0), pixels(NULL),
- bit_accounting(bit_accounting), bpp_q3(NULL), plane_mask(OD_ALL_MASK) {}
-
-AnalyzerPanel::~AnalyzerPanel() { close(); }
-
-void AnalyzerPanel::setShowPlane(bool show_plane, int mask) {
- if (show_plane) {
- plane_mask |= mask;
- } else {
- plane_mask &= ~mask;
- }
-}
-
-void AnalyzerPanel::render() {
- aom_image_t *img = decoder.image;
- const int hbd = !!(img->fmt & AOM_IMG_FMT_HIGHBITDEPTH);
- int y_stride = img->stride[0] >> hbd;
- int cb_stride = img->stride[1] >> hbd;
- int cr_stride = img->stride[2] >> hbd;
- int p_stride = 3 * getDisplayWidth();
- unsigned char *y_row = img->planes[0];
- unsigned char *cb_row = img->planes[1];
- unsigned char *cr_row = img->planes[2];
- uint16_t *y_row16 = reinterpret_cast<uint16_t *>(y_row);
- uint16_t *cb_row16 = reinterpret_cast<uint16_t *>(cb_row);
- uint16_t *cr_row16 = reinterpret_cast<uint16_t *>(cr_row);
- unsigned char *p_row = pixels;
- int y_width_padding = decoder.getWidthPadding();
- int cb_width_padding = y_width_padding >> 1;
- int cr_width_padding = y_width_padding >> 1;
- int y_height_padding = decoder.getHeightPadding();
- int cb_height_padding = y_height_padding >> 1;
- int cr_height_padding = y_height_padding >> 1;
- for (int j = 0; j < decoder.getHeight(); j++) {
- unsigned char *y = y_row - y_stride * y_height_padding;
- unsigned char *cb = cb_row - cb_stride * cb_height_padding;
- unsigned char *cr = cr_row - cr_stride * cr_height_padding;
- uint16_t *y16 = y_row16 - y_stride * y_height_padding;
- uint16_t *cb16 = cb_row16 - cb_stride * cb_height_padding;
- uint16_t *cr16 = cr_row16 - cr_stride * cr_height_padding;
- unsigned char *p = p_row;
- for (int i = 0; i < decoder.getWidth(); i++) {
- int64_t yval;
- int64_t cbval;
- int64_t crval;
- int pmask;
- unsigned rval;
- unsigned gval;
- unsigned bval;
- if (hbd) {
- yval = *(y16 - y_width_padding);
- cbval = *(cb16 - cb_width_padding);
- crval = *(cr16 - cr_width_padding);
- } else {
- yval = *(y - y_width_padding);
- cbval = *(cb - cb_width_padding);
- crval = *(cr - cr_width_padding);
- }
- pmask = plane_mask;
- if (pmask & OD_LUMA_MASK) {
- yval -= 16;
- } else {
- yval = 128;
- }
- cbval = ((pmask & OD_CB_MASK) >> 1) * (cbval - 128);
- crval = ((pmask & OD_CR_MASK) >> 2) * (crval - 128);
- /*This is intentionally slow and very accurate.*/
- rval = OD_CLAMPI(
- 0,
- (int32_t)OD_DIV_ROUND(
- 2916394880000LL * yval + 4490222169144LL * crval, 9745792000LL),
- 65535);
- gval = OD_CLAMPI(0,
- (int32_t)OD_DIV_ROUND(2916394880000LL * yval -
- 534117096223LL * cbval -
- 1334761232047LL * crval,
- 9745792000LL),
- 65535);
- bval = OD_CLAMPI(
- 0,
- (int32_t)OD_DIV_ROUND(
- 2916394880000LL * yval + 5290866304968LL * cbval, 9745792000LL),
- 65535);
- unsigned char *px_row = p;
- for (int v = 0; v < zoom; v++) {
- unsigned char *px = px_row;
- for (int u = 0; u < zoom; u++) {
- *(px + 0) = (unsigned char)(rval >> 8);
- *(px + 1) = (unsigned char)(gval >> 8);
- *(px + 2) = (unsigned char)(bval >> 8);
- px += 3;
- }
- px_row += p_stride;
- }
- if (hbd) {
- int dc = ((y16 - y_row16) & 1) | (1 - img->x_chroma_shift);
- y16++;
- cb16 += dc;
- cr16 += dc;
- } else {
- int dc = ((y - y_row) & 1) | (1 - img->x_chroma_shift);
- y++;
- cb += dc;
- cr += dc;
- }
- p += zoom * 3;
- }
- int dc = -((j & 1) | (1 - img->y_chroma_shift));
- if (hbd) {
- y_row16 += y_stride;
- cb_row16 += dc & cb_stride;
- cr_row16 += dc & cr_stride;
- } else {
- y_row += y_stride;
- cb_row += dc & cb_stride;
- cr_row += dc & cr_stride;
- }
- p_row += zoom * p_stride;
- }
-}
-
-void AnalyzerPanel::computeBitsPerPixel() {
- Accounting *acct;
- double bpp_total;
- int totals_q3[MAX_SYMBOL_TYPES] = { 0 };
- int sym_count[MAX_SYMBOL_TYPES] = { 0 };
- decoder.getAccountingStruct(&acct);
- for (int j = 0; j < decoder.getHeight(); j++) {
- for (int i = 0; i < decoder.getWidth(); i++) {
- bpp_q3[j * decoder.getWidth() + i] = 0.0;
- }
- }
- bpp_total = 0;
- for (int i = 0; i < acct->syms.num_syms; i++) {
- AccountingSymbol *s;
- s = &acct->syms.syms[i];
- totals_q3[s->id] += s->bits;
- sym_count[s->id] += s->samples;
- }
- printf("=== Frame: %-3i ===\n", decoder.frame - 1);
- for (int i = 0; i < acct->syms.dictionary.num_strs; i++) {
- if (totals_q3[i]) {
- printf("%30s = %10.3f (%f bit/symbol)\n", acct->syms.dictionary.strs[i],
- (float)totals_q3[i] / 8, (float)totals_q3[i] / 8 / sym_count[i]);
- }
- }
- printf("\n");
-}
-
-void AnalyzerPanel::togglePadding() {
- decoder.togglePadding();
- updateDisplaySize();
-}
-
-bool AnalyzerPanel::nextFrame() {
- if (decoder.step()) {
- refresh();
- return true;
- }
- return false;
-}
-
-void AnalyzerPanel::refresh() {
- if (bit_accounting) {
- computeBitsPerPixel();
- }
- render();
-}
-
-int AnalyzerPanel::getDisplayWidth() const { return zoom * decoder.getWidth(); }
-
-int AnalyzerPanel::getDisplayHeight() const {
- return zoom * decoder.getHeight();
-}
-
-bool AnalyzerPanel::updateDisplaySize() {
- unsigned char *p = (unsigned char *)malloc(
- sizeof(*p) * 3 * getDisplayWidth() * getDisplayHeight());
- if (p == NULL) {
- return false;
- }
- free(pixels);
- pixels = p;
- SetSize(getDisplayWidth(), getDisplayHeight());
- return true;
-}
-
-bool AnalyzerPanel::open(const wxString &path) {
- if (!decoder.open(path)) {
- return false;
- }
- if (!setZoom(MIN_ZOOM)) {
- return false;
- }
- if (bit_accounting) {
- bpp_q3 = (double *)malloc(sizeof(*bpp_q3) * decoder.getWidth() *
- decoder.getHeight());
- if (bpp_q3 == NULL) {
- fprintf(stderr, "Could not allocate memory for bit accounting\n");
- close();
- return false;
- }
- }
- if (!nextFrame()) {
- close();
- return false;
- }
- SetFocus();
- return true;
-}
-
-void AnalyzerPanel::close() {
- decoder.close();
- free(pixels);
- pixels = NULL;
- free(bpp_q3);
- bpp_q3 = NULL;
-}
-
-int AnalyzerPanel::getZoom() const { return zoom; }
-
-bool AnalyzerPanel::setZoom(int z) {
- if (z <= MAX_ZOOM && z >= MIN_ZOOM && zoom != z) {
- int old_zoom = zoom;
- zoom = z;
- if (!updateDisplaySize()) {
- zoom = old_zoom;
- return false;
- }
- return true;
- }
- return false;
-}
-
-void AnalyzerPanel::onPaint(wxPaintEvent &) {
- wxBitmap bmp(wxImage(getDisplayWidth(), getDisplayHeight(), pixels, true));
- wxBufferedPaintDC dc(this, bmp);
-}
-
-class AnalyzerFrame : public wxFrame {
- DECLARE_EVENT_TABLE()
-
- private:
- AnalyzerPanel *panel;
- const bool bit_accounting;
-
- wxMenu *fileMenu;
- wxMenu *viewMenu;
- wxMenu *playbackMenu;
-
- public:
- AnalyzerFrame(const bool bit_accounting); // NOLINT
-
- void onOpen(wxCommandEvent &event); // NOLINT
- void onClose(wxCommandEvent &event); // NOLINT
- void onQuit(wxCommandEvent &event); // NOLINT
-
- void onTogglePadding(wxCommandEvent &event); // NOLINT
- void onZoomIn(wxCommandEvent &event); // NOLINT
- void onZoomOut(wxCommandEvent &event); // NOLINT
- void onActualSize(wxCommandEvent &event); // NOLINT
-
- void onToggleViewMenuCheckBox(wxCommandEvent &event); // NOLINT
- void onResetAndToggleViewMenuCheckBox(wxCommandEvent &event); // NOLINT
-
- void onNextFrame(wxCommandEvent &event); // NOLINT
- void onGotoFrame(wxCommandEvent &event); // NOLINT
- void onRestart(wxCommandEvent &event); // NOLINT
-
- void onAbout(wxCommandEvent &event); // NOLINT
-
- bool open(const wxString &path);
- bool setZoom(int zoom);
- void updateViewMenu();
-};
-
-enum {
- wxID_NEXT_FRAME = 6000,
- wxID_SHOW_Y,
- wxID_SHOW_U,
- wxID_SHOW_V,
- wxID_GOTO_FRAME,
- wxID_RESTART,
- wxID_ACTUAL_SIZE,
- wxID_PADDING
-};
-
-BEGIN_EVENT_TABLE(AnalyzerFrame, wxFrame)
-EVT_MENU(wxID_OPEN, AnalyzerFrame::onOpen)
-EVT_MENU(wxID_CLOSE, AnalyzerFrame::onClose)
-EVT_MENU(wxID_EXIT, AnalyzerFrame::onQuit)
-EVT_MENU(wxID_PADDING, AnalyzerFrame::onTogglePadding)
-EVT_MENU(wxID_ZOOM_IN, AnalyzerFrame::onZoomIn)
-EVT_MENU(wxID_ZOOM_OUT, AnalyzerFrame::onZoomOut)
-EVT_MENU(wxID_ACTUAL_SIZE, AnalyzerFrame::onActualSize)
-EVT_MENU(wxID_SHOW_Y, AnalyzerFrame::onResetAndToggleViewMenuCheckBox)
-EVT_MENU(wxID_SHOW_U, AnalyzerFrame::onResetAndToggleViewMenuCheckBox)
-EVT_MENU(wxID_SHOW_V, AnalyzerFrame::onResetAndToggleViewMenuCheckBox)
-EVT_MENU(wxID_NEXT_FRAME, AnalyzerFrame::onNextFrame)
-EVT_MENU(wxID_GOTO_FRAME, AnalyzerFrame::onGotoFrame)
-EVT_MENU(wxID_RESTART, AnalyzerFrame::onRestart)
-EVT_MENU(wxID_ABOUT, AnalyzerFrame::onAbout)
-END_EVENT_TABLE()
-
-AnalyzerFrame::AnalyzerFrame(const bool bit_accounting)
- : wxFrame(NULL, wxID_ANY, _("AV1 Stream Analyzer"), wxDefaultPosition,
- wxDefaultSize, wxDEFAULT_FRAME_STYLE),
- panel(NULL), bit_accounting(bit_accounting) {
- wxMenuBar *mb = new wxMenuBar();
-
- fileMenu = new wxMenu();
- fileMenu->Append(wxID_OPEN, _("&Open...\tCtrl-O"), _("Open daala file"));
- fileMenu->Append(wxID_CLOSE, _("&Close\tCtrl-W"), _("Close daala file"));
- fileMenu->Enable(wxID_CLOSE, false);
- fileMenu->Append(wxID_EXIT, _("E&xit\tCtrl-Q"), _("Quit this program"));
- mb->Append(fileMenu, _("&File"));
-
- wxAcceleratorEntry entries[2];
- entries[0].Set(wxACCEL_CTRL, (int)'=', wxID_ZOOM_IN);
- entries[1].Set(wxACCEL_CTRL | wxACCEL_SHIFT, (int)'-', wxID_ZOOM_OUT);
- wxAcceleratorTable accel(2, entries);
- this->SetAcceleratorTable(accel);
-
- viewMenu = new wxMenu();
- +viewMenu->Append(wxID_PADDING, _("Toggle padding\tCtrl-p"),
- _("Show padding"));
- viewMenu->Append(wxID_ZOOM_IN, _("Zoom-In\tCtrl-+"), _("Double image size"));
- viewMenu->Append(wxID_ZOOM_OUT, _("Zoom-Out\tCtrl--"), _("Half image size"));
- viewMenu->Append(wxID_ACTUAL_SIZE, _("Actual size\tCtrl-0"),
- _("Actual size of the frame"));
- viewMenu->AppendSeparator();
- viewMenu->AppendCheckItem(wxID_SHOW_Y, _("&Y plane\tCtrl-Y"),
- _("Show Y plane"));
- viewMenu->AppendCheckItem(wxID_SHOW_U, _("&U plane\tCtrl-U"),
- _("Show U plane"));
- viewMenu->AppendCheckItem(wxID_SHOW_V, _("&V plane\tCtrl-V"),
- _("Show V plane"));
- mb->Append(viewMenu, _("&View"));
-
- playbackMenu = new wxMenu();
- playbackMenu->Append(wxID_NEXT_FRAME, _("Next frame\tCtrl-."),
- _("Go to next frame"));
- /*playbackMenu->Append(wxID_RESTART, _("&Restart\tCtrl-R"),
- _("Set video to frame 0"));
- playbackMenu->Append(wxID_GOTO_FRAME, _("Jump to Frame\tCtrl-J"),
- _("Go to frame number"));*/
- mb->Append(playbackMenu, _("&Playback"));
-
- wxMenu *helpMenu = new wxMenu();
- helpMenu->Append(wxID_ABOUT, _("&About...\tF1"), _("Show about dialog"));
- mb->Append(helpMenu, _("&Help"));
-
- SetMenuBar(mb);
-
- CreateStatusBar(1);
-}
-
-void AnalyzerFrame::onOpen(wxCommandEvent &WXUNUSED(event)) {
- wxFileDialog openFileDialog(this, _("Open file"), wxEmptyString,
- wxEmptyString, _("AV1 files (*.ivf)|*.ivf"),
- wxFD_OPEN | wxFD_FILE_MUST_EXIST);
- if (openFileDialog.ShowModal() != wxID_CANCEL) {
- open(openFileDialog.GetPath());
- }
-}
-
-void AnalyzerFrame::onClose(wxCommandEvent &WXUNUSED(event)) {}
-
-void AnalyzerFrame::onQuit(wxCommandEvent &WXUNUSED(event)) { Close(true); }
-
-void AnalyzerFrame::onTogglePadding(wxCommandEvent &WXUNUSED(event)) {
- panel->togglePadding();
- SetClientSize(panel->GetSize());
- panel->render();
- panel->Refresh();
-}
-
-void AnalyzerFrame::onZoomIn(wxCommandEvent &WXUNUSED(event)) {
- setZoom(panel->getZoom() + 1);
-}
-
-void AnalyzerFrame::onZoomOut(wxCommandEvent &WXUNUSED(event)) {
- setZoom(panel->getZoom() - 1);
-}
-
-void AnalyzerFrame::onActualSize(wxCommandEvent &WXUNUSED(event)) {
- setZoom(MIN_ZOOM);
-}
-
-void AnalyzerFrame::onToggleViewMenuCheckBox(wxCommandEvent &event) { // NOLINT
- GetMenuBar()->Check(event.GetId(), event.IsChecked());
- updateViewMenu();
-}
-
-void AnalyzerFrame::onResetAndToggleViewMenuCheckBox(
- wxCommandEvent &event) { // NOLINT
- int id = event.GetId();
- if (id != wxID_SHOW_Y && id != wxID_SHOW_U && id != wxID_SHOW_V) {
- GetMenuBar()->Check(wxID_SHOW_Y, true);
- GetMenuBar()->Check(wxID_SHOW_U, true);
- GetMenuBar()->Check(wxID_SHOW_V, true);
- }
- onToggleViewMenuCheckBox(event);
-}
-
-void AnalyzerFrame::onNextFrame(wxCommandEvent &WXUNUSED(event)) {
- panel->nextFrame();
- panel->Refresh(false);
-}
-
-void AnalyzerFrame::onGotoFrame(wxCommandEvent &WXUNUSED(event)) {}
-
-void AnalyzerFrame::onRestart(wxCommandEvent &WXUNUSED(event)) {}
-
-void AnalyzerFrame::onAbout(wxCommandEvent &WXUNUSED(event)) {
- wxAboutDialogInfo info;
- info.SetName(_("AV1 Bitstream Analyzer"));
- info.SetVersion(_("0.1-beta"));
- info.SetDescription(
- _("This program implements a bitstream analyzer for AV1"));
- info.SetCopyright(
- wxT("(C) 2017 Alliance for Open Media <negge@mozilla.com>"));
- wxAboutBox(info);
-}
-
-bool AnalyzerFrame::open(const wxString &path) {
- panel = new AnalyzerPanel(this, path, bit_accounting);
- if (panel->open(path)) {
- SetClientSize(panel->GetSize());
- return true;
- } else {
- delete panel;
- return false;
- }
-}
-
-bool AnalyzerFrame::setZoom(int zoom) {
- if (panel->setZoom(zoom)) {
- GetMenuBar()->Enable(wxID_ACTUAL_SIZE, zoom != MIN_ZOOM);
- GetMenuBar()->Enable(wxID_ZOOM_IN, zoom != MAX_ZOOM);
- GetMenuBar()->Enable(wxID_ZOOM_OUT, zoom != MIN_ZOOM);
- SetClientSize(panel->GetSize());
- panel->render();
- panel->Refresh();
- return true;
- }
- return false;
-}
-
-void AnalyzerFrame::updateViewMenu() {
- panel->setShowPlane(GetMenuBar()->IsChecked(wxID_SHOW_Y), OD_LUMA_MASK);
- panel->setShowPlane(GetMenuBar()->IsChecked(wxID_SHOW_U), OD_CB_MASK);
- panel->setShowPlane(GetMenuBar()->IsChecked(wxID_SHOW_V), OD_CR_MASK);
- SetClientSize(panel->GetSize());
- panel->render();
- panel->Refresh(false);
-}
-
-class Analyzer : public wxApp {
- private:
- AnalyzerFrame *frame;
-
- public:
- void OnInitCmdLine(wxCmdLineParser &parser); // NOLINT
- bool OnCmdLineParsed(wxCmdLineParser &parser); // NOLINT
-};
-
-static const wxCmdLineEntryDesc CMD_LINE_DESC[] = {
- { wxCMD_LINE_SWITCH, _("h"), _("help"), _("Display this help and exit."),
- wxCMD_LINE_VAL_NONE, wxCMD_LINE_OPTION_HELP },
- { wxCMD_LINE_SWITCH, _("a"), _("bit-accounting"), _("Enable bit accounting"),
- wxCMD_LINE_VAL_NONE, wxCMD_LINE_PARAM_OPTIONAL },
- { wxCMD_LINE_PARAM, NULL, NULL, _("input.ivf"), wxCMD_LINE_VAL_STRING,
- wxCMD_LINE_PARAM_OPTIONAL },
- { wxCMD_LINE_NONE }
-};
-
-void Analyzer::OnInitCmdLine(wxCmdLineParser &parser) { // NOLINT
- parser.SetDesc(CMD_LINE_DESC);
- parser.SetSwitchChars(_("-"));
-}
-
-bool Analyzer::OnCmdLineParsed(wxCmdLineParser &parser) { // NOLINT
- bool bit_accounting = parser.Found(_("a"));
- if (bit_accounting && !CONFIG_ACCOUNTING) {
- fprintf(stderr,
- "Bit accounting support not found. "
- "Recompile with:\n./cmake -DCONFIG_ACCOUNTING=1\n");
- return false;
- }
- frame = new AnalyzerFrame(parser.Found(_("a")));
- frame->Show();
- if (parser.GetParamCount() > 0) {
- return frame->open(parser.GetParam(0));
- }
- return true;
-}
-
-void usage_exit(void) {
- fprintf(stderr, "uhh\n");
- exit(EXIT_FAILURE);
-}
-
-IMPLEMENT_APP(Analyzer)
diff --git a/third_party/aom/examples/aom_cx_set_ref.c b/third_party/aom/examples/aom_cx_set_ref.c
deleted file mode 100644
index 8e3d216fe..000000000
--- a/third_party/aom/examples/aom_cx_set_ref.c
+++ /dev/null
@@ -1,385 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-// AV1 Set Reference Frame
-// ============================
-//
-// This is an example demonstrating how to overwrite the AV1 encoder's
-// internal reference frame. In the sample we set the last frame to the
-// current frame. This technique could be used to bounce between two cameras.
-//
-// The decoder would also have to set the reference frame to the same value
-// on the same frame, or the video will become corrupt. The 'test_decode'
-// variable is set to 1 in this example that tests if the encoder and decoder
-// results are matching.
-//
-// Usage
-// -----
-// This example encodes a raw video. And the last argument passed in specifies
-// the frame number to update the reference frame on. For example, run
-// examples/aom_cx_set_ref av1 352 288 in.yuv out.ivf 4 30
-// The parameter is parsed as follows:
-//
-//
-// Extra Variables
-// ---------------
-// This example maintains the frame number passed on the command line
-// in the `update_frame_num` variable.
-//
-//
-// Configuration
-// -------------
-//
-// The reference frame is updated on the frame specified on the command
-// line.
-//
-// Observing The Effects
-// ---------------------
-// The encoder and decoder results should be matching when the same reference
-// frame setting operation is done in both encoder and decoder. Otherwise,
-// the encoder/decoder mismatch would be seen.
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "aom/aom_decoder.h"
-#include "aom/aom_encoder.h"
-#include "aom/aomcx.h"
-#include "aom_scale/yv12config.h"
-#include "common/tools_common.h"
-#include "common/video_writer.h"
-#include "examples/encoder_util.h"
-
-static const char *exec_name;
-
-void usage_exit() {
- fprintf(stderr,
- "Usage: %s <codec> <width> <height> <infile> <outfile> "
- "<frame> <limit(optional)>\n",
- exec_name);
- exit(EXIT_FAILURE);
-}
-
-static void testing_decode(aom_codec_ctx_t *encoder, aom_codec_ctx_t *decoder,
- unsigned int frame_out, int *mismatch_seen) {
- aom_image_t enc_img, dec_img;
-
- if (*mismatch_seen) return;
-
- /* Get the internal reference frame */
- if (aom_codec_control(encoder, AV1_GET_NEW_FRAME_IMAGE, &enc_img))
- die_codec(encoder, "Failed to get encoder reference frame");
- if (aom_codec_control(decoder, AV1_GET_NEW_FRAME_IMAGE, &dec_img))
- die_codec(decoder, "Failed to get decoder reference frame");
-
- if ((enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) !=
- (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH)) {
- if (enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
- aom_image_t enc_hbd_img;
- aom_img_alloc(&enc_hbd_img, enc_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH,
- enc_img.d_w, enc_img.d_h, 16);
- aom_img_truncate_16_to_8(&enc_hbd_img, &enc_img);
- enc_img = enc_hbd_img;
- }
- if (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
- aom_image_t dec_hbd_img;
- aom_img_alloc(&dec_hbd_img, dec_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH,
- dec_img.d_w, dec_img.d_h, 16);
- aom_img_truncate_16_to_8(&dec_hbd_img, &dec_img);
- dec_img = dec_hbd_img;
- }
- }
-
- if (!aom_compare_img(&enc_img, &dec_img)) {
- int y[4], u[4], v[4];
- if (enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
- aom_find_mismatch_high(&enc_img, &dec_img, y, u, v);
- } else {
- aom_find_mismatch(&enc_img, &dec_img, y, u, v);
- }
-
- printf(
- "Encode/decode mismatch on frame %d at"
- " Y[%d, %d] {%d/%d},"
- " U[%d, %d] {%d/%d},"
- " V[%d, %d] {%d/%d}",
- frame_out, y[0], y[1], y[2], y[3], u[0], u[1], u[2], u[3], v[0], v[1],
- v[2], v[3]);
- *mismatch_seen = 1;
- }
-
- aom_img_free(&enc_img);
- aom_img_free(&dec_img);
-}
-
-static int encode_frame(aom_codec_ctx_t *ecodec, aom_image_t *img,
- unsigned int frame_in, AvxVideoWriter *writer,
- int test_decode, aom_codec_ctx_t *dcodec,
- unsigned int *frame_out, int *mismatch_seen,
- aom_image_t *ext_ref) {
- int got_pkts = 0;
- aom_codec_iter_t iter = NULL;
- const aom_codec_cx_pkt_t *pkt = NULL;
- int got_data;
- const aom_codec_err_t res = aom_codec_encode(ecodec, img, frame_in, 1, 0);
- if (res != AOM_CODEC_OK) die_codec(ecodec, "Failed to encode frame");
-
- got_data = 0;
-
- while ((pkt = aom_codec_get_cx_data(ecodec, &iter)) != NULL) {
- got_pkts = 1;
-
- if (pkt->kind == AOM_CODEC_CX_FRAME_PKT) {
- const int keyframe = (pkt->data.frame.flags & AOM_FRAME_IS_KEY) != 0;
-
- if (!(pkt->data.frame.flags & AOM_FRAME_IS_FRAGMENT)) {
- *frame_out += 1;
- }
-
- if (!aom_video_writer_write_frame(writer, pkt->data.frame.buf,
- pkt->data.frame.sz,
- pkt->data.frame.pts)) {
- die_codec(ecodec, "Failed to write compressed frame");
- }
- printf(keyframe ? "K" : ".");
- fflush(stdout);
- got_data = 1;
-
- // Decode 1 frame.
- if (test_decode) {
- if (aom_codec_decode(dcodec, pkt->data.frame.buf,
- (unsigned int)pkt->data.frame.sz, NULL))
- die_codec(dcodec, "Failed to decode frame.");
-
- // Copy out first decoded frame, and use it as reference later.
- if (*frame_out == 1 && ext_ref != NULL)
- if (aom_codec_control(dcodec, AV1_COPY_NEW_FRAME_IMAGE, ext_ref))
- die_codec(dcodec, "Failed to get decoder new frame");
- }
- }
- }
-
- // Mismatch checking
- if (got_data && test_decode) {
- testing_decode(ecodec, dcodec, *frame_out, mismatch_seen);
- }
-
- return got_pkts;
-}
-
-int main(int argc, char **argv) {
- FILE *infile = NULL;
- // Encoder
- aom_codec_ctx_t ecodec;
- aom_codec_enc_cfg_t cfg;
- unsigned int frame_in = 0;
- aom_image_t raw;
- aom_image_t raw_shift;
- aom_image_t ext_ref;
- aom_codec_err_t res;
- AvxVideoInfo info;
- AvxVideoWriter *writer = NULL;
- const AvxInterface *encoder = NULL;
- int flags = 0;
- int allocated_raw_shift = 0;
- aom_img_fmt_t raw_fmt = AOM_IMG_FMT_I420;
- aom_img_fmt_t ref_fmt = AOM_IMG_FMT_I420;
-
- // Test encoder/decoder mismatch.
- int test_decode = 1;
- // Decoder
- aom_codec_ctx_t dcodec;
- unsigned int frame_out = 0;
-
- // The frame number to set reference frame on
- unsigned int update_frame_num = 0;
- int mismatch_seen = 0;
-
- const int fps = 30;
- const int bitrate = 500;
-
- const char *codec_arg = NULL;
- const char *width_arg = NULL;
- const char *height_arg = NULL;
- const char *infile_arg = NULL;
- const char *outfile_arg = NULL;
- const char *update_frame_num_arg = NULL;
- unsigned int limit = 0;
- exec_name = argv[0];
-
- // Clear explicitly, as simply assigning "{ 0 }" generates
- // "missing-field-initializers" warning in some compilers.
- memset(&ecodec, 0, sizeof(ecodec));
- memset(&cfg, 0, sizeof(cfg));
- memset(&info, 0, sizeof(info));
-
- if (argc < 7) die("Invalid number of arguments");
-
- codec_arg = argv[1];
- width_arg = argv[2];
- height_arg = argv[3];
- infile_arg = argv[4];
- outfile_arg = argv[5];
- update_frame_num_arg = argv[6];
-
- encoder = get_aom_encoder_by_name(codec_arg);
- if (!encoder) die("Unsupported codec.");
-
- update_frame_num = (unsigned int)strtoul(update_frame_num_arg, NULL, 0);
- // In AV1, the reference buffers (cm->buffer_pool->frame_bufs[i].buf) are
- // allocated while calling aom_codec_encode(), thus, setting reference for
- // 1st frame isn't supported.
- if (update_frame_num <= 1) {
- die("Couldn't parse frame number '%s'\n", update_frame_num_arg);
- }
-
- if (argc > 7) {
- limit = (unsigned int)strtoul(argv[7], NULL, 0);
- if (update_frame_num > limit)
- die("Update frame number couldn't larger than limit\n");
- }
-
- info.codec_fourcc = encoder->fourcc;
- info.frame_width = (int)strtol(width_arg, NULL, 0);
- info.frame_height = (int)strtol(height_arg, NULL, 0);
- info.time_base.numerator = 1;
- info.time_base.denominator = fps;
-
- if (info.frame_width <= 0 || info.frame_height <= 0) {
- die("Invalid frame size: %dx%d", info.frame_width, info.frame_height);
- }
-
- // In this test, the bit depth of input video is 8-bit, and the input format
- // is AOM_IMG_FMT_I420.
- if (!aom_img_alloc(&raw, raw_fmt, info.frame_width, info.frame_height, 32)) {
- die("Failed to allocate image.");
- }
-
- if (!CONFIG_LOWBITDEPTH) ref_fmt |= AOM_IMG_FMT_HIGHBITDEPTH;
- // Allocate memory with the border so that it can be used as a reference.
- if (!aom_img_alloc_with_border(&ext_ref, ref_fmt, info.frame_width,
- info.frame_height, 32, 8,
- AOM_BORDER_IN_PIXELS)) {
- die("Failed to allocate image.");
- }
-
- printf("Using %s\n", aom_codec_iface_name(encoder->codec_interface()));
-
- res = aom_codec_enc_config_default(encoder->codec_interface(), &cfg, 0);
- if (res) die_codec(&ecodec, "Failed to get default codec config.");
-
- cfg.g_w = info.frame_width;
- cfg.g_h = info.frame_height;
- cfg.g_timebase.num = info.time_base.numerator;
- cfg.g_timebase.den = info.time_base.denominator;
- cfg.rc_target_bitrate = bitrate;
- cfg.g_lag_in_frames = 3;
- cfg.g_bit_depth = AOM_BITS_8;
-
- flags |= (cfg.g_bit_depth > AOM_BITS_8 || !CONFIG_LOWBITDEPTH)
- ? AOM_CODEC_USE_HIGHBITDEPTH
- : 0;
-
- writer = aom_video_writer_open(outfile_arg, kContainerIVF, &info);
- if (!writer) die("Failed to open %s for writing.", outfile_arg);
-
- if (!(infile = fopen(infile_arg, "rb")))
- die("Failed to open %s for reading.", infile_arg);
-
- if (aom_codec_enc_init(&ecodec, encoder->codec_interface(), &cfg, flags))
- die_codec(&ecodec, "Failed to initialize encoder");
-
- // Disable alt_ref.
- if (aom_codec_control(&ecodec, AOME_SET_ENABLEAUTOALTREF, 0))
- die_codec(&ecodec, "Failed to set enable auto alt ref");
-
- if (test_decode) {
- const AvxInterface *decoder = get_aom_decoder_by_name(codec_arg);
- if (aom_codec_dec_init(&dcodec, decoder->codec_interface(), NULL, 0))
- die_codec(&dcodec, "Failed to initialize decoder.");
- }
-
- // Encode frames.
- while (aom_img_read(&raw, infile)) {
- if (limit && frame_in >= limit) break;
- aom_image_t *frame_to_encode;
-
- if (!CONFIG_LOWBITDEPTH) {
- // Need to allocate larger buffer to use hbd internal.
- int input_shift = 0;
- if (!allocated_raw_shift) {
- aom_img_alloc(&raw_shift, raw_fmt | AOM_IMG_FMT_HIGHBITDEPTH,
- info.frame_width, info.frame_height, 32);
- allocated_raw_shift = 1;
- }
- aom_img_upshift(&raw_shift, &raw, input_shift);
- frame_to_encode = &raw_shift;
- } else {
- frame_to_encode = &raw;
- }
-
- if (update_frame_num > 1 && frame_out + 1 == update_frame_num) {
- av1_ref_frame_t ref;
- ref.idx = 0;
- ref.use_external_ref = 0;
- ref.img = ext_ref;
- // Set reference frame in encoder.
- if (aom_codec_control(&ecodec, AV1_SET_REFERENCE, &ref))
- die_codec(&ecodec, "Failed to set encoder reference frame");
- printf(" <SET_REF>");
-
- // If set_reference in decoder is commented out, the enc/dec mismatch
- // would be seen.
- if (test_decode) {
- ref.use_external_ref = 1;
- if (aom_codec_control(&dcodec, AV1_SET_REFERENCE, &ref))
- die_codec(&dcodec, "Failed to set decoder reference frame");
- }
- }
-
- encode_frame(&ecodec, frame_to_encode, frame_in, writer, test_decode,
- &dcodec, &frame_out, &mismatch_seen, &ext_ref);
- frame_in++;
- if (mismatch_seen) break;
- }
-
- // Flush encoder.
- if (!mismatch_seen)
- while (encode_frame(&ecodec, NULL, frame_in, writer, test_decode, &dcodec,
- &frame_out, &mismatch_seen, NULL)) {
- }
-
- printf("\n");
- fclose(infile);
- printf("Processed %d frames.\n", frame_out);
-
- if (test_decode) {
- if (!mismatch_seen)
- printf("Encoder/decoder results are matching.\n");
- else
- printf("Encoder/decoder results are NOT matching.\n");
- }
-
- if (test_decode)
- if (aom_codec_destroy(&dcodec))
- die_codec(&dcodec, "Failed to destroy decoder");
-
- if (allocated_raw_shift) aom_img_free(&raw_shift);
- aom_img_free(&ext_ref);
- aom_img_free(&raw);
- if (aom_codec_destroy(&ecodec))
- die_codec(&ecodec, "Failed to destroy encoder.");
-
- aom_video_writer_close(writer);
-
- return EXIT_SUCCESS;
-}
diff --git a/third_party/aom/examples/decode_to_md5.c b/third_party/aom/examples/decode_to_md5.c
deleted file mode 100644
index bc127b78d..000000000
--- a/third_party/aom/examples/decode_to_md5.c
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-// Frame-by-frame MD5 Checksum
-// ===========================
-//
-// This example builds upon the simple decoder loop to show how checksums
-// of the decoded output can be generated. These are used for validating
-// decoder implementations against the reference implementation, for example.
-//
-// MD5 algorithm
-// -------------
-// The Message-Digest 5 (MD5) is a well known hash function. We have provided
-// an implementation derived from the RSA Data Security, Inc. MD5 Message-Digest
-// Algorithm for your use. Our implmentation only changes the interface of this
-// reference code. You must include the `md5_utils.h` header for access to these
-// functions.
-//
-// Processing The Decoded Data
-// ---------------------------
-// Each row of the image is passed to the MD5 accumulator. First the Y plane
-// is processed, then U, then V. It is important to honor the image's `stride`
-// values.
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "aom/aom_decoder.h"
-#include "aom/aomdx.h"
-#include "common/md5_utils.h"
-#include "common/tools_common.h"
-#include "common/video_reader.h"
-
-static void get_image_md5(const aom_image_t *img, unsigned char digest[16]) {
- int plane, y;
- MD5Context md5;
-
- MD5Init(&md5);
-
- for (plane = 0; plane < 3; ++plane) {
- const unsigned char *buf = img->planes[plane];
- const int stride = img->stride[plane];
- const int w = plane ? (img->d_w + 1) >> 1 : img->d_w;
- const int h = plane ? (img->d_h + 1) >> 1 : img->d_h;
-
- for (y = 0; y < h; ++y) {
- MD5Update(&md5, buf, w);
- buf += stride;
- }
- }
-
- MD5Final(digest, &md5);
-}
-
-static void print_md5(FILE *stream, unsigned char digest[16]) {
- int i;
-
- for (i = 0; i < 16; ++i) fprintf(stream, "%02x", digest[i]);
-}
-
-static const char *exec_name;
-
-void usage_exit(void) {
- fprintf(stderr, "Usage: %s <infile> <outfile>\n", exec_name);
- exit(EXIT_FAILURE);
-}
-
-int main(int argc, char **argv) {
- int frame_cnt = 0;
- FILE *outfile = NULL;
- aom_codec_ctx_t codec;
- AvxVideoReader *reader = NULL;
- const AvxVideoInfo *info = NULL;
- const AvxInterface *decoder = NULL;
-
- exec_name = argv[0];
-
- if (argc != 3) die("Invalid number of arguments.");
-
- reader = aom_video_reader_open(argv[1]);
- if (!reader) die("Failed to open %s for reading.", argv[1]);
-
- if (!(outfile = fopen(argv[2], "wb")))
- die("Failed to open %s for writing.", argv[2]);
-
- info = aom_video_reader_get_info(reader);
-
- decoder = get_aom_decoder_by_fourcc(info->codec_fourcc);
- if (!decoder) die("Unknown input codec.");
-
- printf("Using %s\n", aom_codec_iface_name(decoder->codec_interface()));
-
- if (aom_codec_dec_init(&codec, decoder->codec_interface(), NULL, 0))
- die_codec(&codec, "Failed to initialize decoder");
-
- while (aom_video_reader_read_frame(reader)) {
- aom_codec_iter_t iter = NULL;
- aom_image_t *img = NULL;
- size_t frame_size = 0;
- const unsigned char *frame =
- aom_video_reader_get_frame(reader, &frame_size);
- if (aom_codec_decode(&codec, frame, frame_size, NULL))
- die_codec(&codec, "Failed to decode frame");
-
- while ((img = aom_codec_get_frame(&codec, &iter)) != NULL) {
- unsigned char digest[16];
-
- get_image_md5(img, digest);
- print_md5(outfile, digest);
- fprintf(outfile, " img-%dx%d-%04d.i420\n", img->d_w, img->d_h,
- ++frame_cnt);
- }
- }
-
- printf("Processed %d frames.\n", frame_cnt);
- if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec.");
-
- aom_video_reader_close(reader);
-
- fclose(outfile);
- return EXIT_SUCCESS;
-}
diff --git a/third_party/aom/examples/decode_with_drops.c b/third_party/aom/examples/decode_with_drops.c
deleted file mode 100644
index 214401958..000000000
--- a/third_party/aom/examples/decode_with_drops.c
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-// Decode With Drops Example
-// =========================
-//
-// This is an example utility which drops a series of frames, as specified
-// on the command line. This is useful for observing the error recovery
-// features of the codec.
-//
-// Usage
-// -----
-// This example adds a single argument to the `simple_decoder` example,
-// which specifies the range or pattern of frames to drop. The parameter is
-// parsed as follows:
-//
-// Dropping A Range Of Frames
-// --------------------------
-// To drop a range of frames, specify the starting frame and the ending
-// frame to drop, separated by a dash. The following command will drop
-// frames 5 through 10 (base 1).
-//
-// $ ./decode_with_drops in.ivf out.i420 5-10
-//
-//
-// Dropping A Pattern Of Frames
-// ----------------------------
-// To drop a pattern of frames, specify the number of frames to drop and
-// the number of frames after which to repeat the pattern, separated by
-// a forward-slash. The following command will drop 3 of 7 frames.
-// Specifically, it will decode 4 frames, then drop 3 frames, and then
-// repeat.
-//
-// $ ./decode_with_drops in.ivf out.i420 3/7
-//
-//
-// Extra Variables
-// ---------------
-// This example maintains the pattern passed on the command line in the
-// `n`, `m`, and `is_range` variables:
-//
-//
-// Making The Drop Decision
-// ------------------------
-// The example decides whether to drop the frame based on the current
-// frame number, immediately before decoding the frame.
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "aom/aom_decoder.h"
-#include "aom/aomdx.h"
-#include "common/tools_common.h"
-#include "common/video_reader.h"
-
-static const char *exec_name;
-
-void usage_exit(void) {
- fprintf(stderr, "Usage: %s <infile> <outfile> <N-M|N/M>\n", exec_name);
- exit(EXIT_FAILURE);
-}
-
-int main(int argc, char **argv) {
- int frame_cnt = 0;
- FILE *outfile = NULL;
- aom_codec_ctx_t codec;
- const AvxInterface *decoder = NULL;
- AvxVideoReader *reader = NULL;
- const AvxVideoInfo *info = NULL;
- int n = 0;
- int m = 0;
- int is_range = 0;
- char *nptr = NULL;
-
- exec_name = argv[0];
-
- if (argc != 4) die("Invalid number of arguments.");
-
- reader = aom_video_reader_open(argv[1]);
- if (!reader) die("Failed to open %s for reading.", argv[1]);
-
- if (!(outfile = fopen(argv[2], "wb")))
- die("Failed to open %s for writing.", argv[2]);
-
- n = (int)strtol(argv[3], &nptr, 0);
- m = (int)strtol(nptr + 1, NULL, 0);
- is_range = (*nptr == '-');
- if (!n || !m || (*nptr != '-' && *nptr != '/'))
- die("Couldn't parse pattern %s.\n", argv[3]);
-
- info = aom_video_reader_get_info(reader);
-
- decoder = get_aom_decoder_by_fourcc(info->codec_fourcc);
- if (!decoder) die("Unknown input codec.");
-
- printf("Using %s\n", aom_codec_iface_name(decoder->codec_interface()));
-
- if (aom_codec_dec_init(&codec, decoder->codec_interface(), NULL, 0))
- die_codec(&codec, "Failed to initialize decoder.");
-
- while (aom_video_reader_read_frame(reader)) {
- aom_codec_iter_t iter = NULL;
- aom_image_t *img = NULL;
- size_t frame_size = 0;
- int skip;
- const unsigned char *frame =
- aom_video_reader_get_frame(reader, &frame_size);
- ++frame_cnt;
-
- skip = (is_range && frame_cnt >= n && frame_cnt <= m) ||
- (!is_range && m - (frame_cnt - 1) % m <= n);
-
- if (!skip) {
- putc('.', stdout);
- if (aom_codec_decode(&codec, frame, frame_size, NULL))
- die_codec(&codec, "Failed to decode frame.");
-
- while ((img = aom_codec_get_frame(&codec, &iter)) != NULL)
- aom_img_write(img, outfile);
- } else {
- putc('X', stdout);
- }
-
- fflush(stdout);
- }
-
- printf("Processed %d frames.\n", frame_cnt);
- if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec.");
-
- printf("Play: ffplay -f rawvideo -pix_fmt yuv420p -s %dx%d %s\n",
- info->frame_width, info->frame_height, argv[2]);
-
- aom_video_reader_close(reader);
- fclose(outfile);
-
- return EXIT_SUCCESS;
-}
diff --git a/third_party/aom/examples/encoder_util.c b/third_party/aom/examples/encoder_util.c
deleted file mode 100644
index e43b37250..000000000
--- a/third_party/aom/examples/encoder_util.c
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-// Utility functions used by encoder binaries.
-
-#include "examples/encoder_util.h"
-
-#include <assert.h>
-#include <string.h>
-
-#include "aom/aom_integer.h"
-
-#define mmin(a, b) ((a) < (b) ? (a) : (b))
-
-static void find_mismatch_plane(const aom_image_t *const img1,
- const aom_image_t *const img2, int plane,
- int use_highbitdepth, int loc[4]) {
- const unsigned char *const p1 = img1->planes[plane];
- const int p1_stride = img1->stride[plane] >> use_highbitdepth;
- const unsigned char *const p2 = img2->planes[plane];
- const int p2_stride = img2->stride[plane] >> use_highbitdepth;
- const uint32_t bsize = 64;
- const int is_y_plane = (plane == AOM_PLANE_Y);
- const uint32_t bsizex = is_y_plane ? bsize : bsize >> img1->x_chroma_shift;
- const uint32_t bsizey = is_y_plane ? bsize : bsize >> img1->y_chroma_shift;
- const uint32_t c_w =
- is_y_plane ? img1->d_w
- : (img1->d_w + img1->x_chroma_shift) >> img1->x_chroma_shift;
- const uint32_t c_h =
- is_y_plane ? img1->d_h
- : (img1->d_h + img1->y_chroma_shift) >> img1->y_chroma_shift;
- assert(img1->d_w == img2->d_w && img1->d_h == img2->d_h);
- assert(img1->x_chroma_shift == img2->x_chroma_shift &&
- img1->y_chroma_shift == img2->y_chroma_shift);
- loc[0] = loc[1] = loc[2] = loc[3] = -1;
- if (img1->monochrome && img2->monochrome && plane) return;
- int match = 1;
- uint32_t i, j;
- for (i = 0; match && i < c_h; i += bsizey) {
- for (j = 0; match && j < c_w; j += bsizex) {
- const int si =
- is_y_plane ? mmin(i + bsizey, c_h) - i : mmin(i + bsizey, c_h - i);
- const int sj =
- is_y_plane ? mmin(j + bsizex, c_w) - j : mmin(j + bsizex, c_w - j);
- int k, l;
- for (k = 0; match && k < si; ++k) {
- for (l = 0; match && l < sj; ++l) {
- const int row = i + k;
- const int col = j + l;
- const int offset1 = row * p1_stride + col;
- const int offset2 = row * p2_stride + col;
- const int val1 = use_highbitdepth
- ? p1[2 * offset1] | (p1[2 * offset1 + 1] << 8)
- : p1[offset1];
- const int val2 = use_highbitdepth
- ? p2[2 * offset2] | (p2[2 * offset2 + 1] << 8)
- : p2[offset2];
- if (val1 != val2) {
- loc[0] = row;
- loc[1] = col;
- loc[2] = val1;
- loc[3] = val2;
- match = 0;
- break;
- }
- }
- }
- }
- }
-}
-
-static void find_mismatch_helper(const aom_image_t *const img1,
- const aom_image_t *const img2,
- int use_highbitdepth, int yloc[4], int uloc[4],
- int vloc[4]) {
- find_mismatch_plane(img1, img2, AOM_PLANE_Y, use_highbitdepth, yloc);
- find_mismatch_plane(img1, img2, AOM_PLANE_U, use_highbitdepth, uloc);
- find_mismatch_plane(img1, img2, AOM_PLANE_V, use_highbitdepth, vloc);
-}
-
-void aom_find_mismatch_high(const aom_image_t *const img1,
- const aom_image_t *const img2, int yloc[4],
- int uloc[4], int vloc[4]) {
- find_mismatch_helper(img1, img2, 1, yloc, uloc, vloc);
-}
-
-void aom_find_mismatch(const aom_image_t *const img1,
- const aom_image_t *const img2, int yloc[4], int uloc[4],
- int vloc[4]) {
- find_mismatch_helper(img1, img2, 0, yloc, uloc, vloc);
-}
-
-int aom_compare_img(const aom_image_t *const img1,
- const aom_image_t *const img2) {
- assert(img1->cp == img2->cp);
- assert(img1->tc == img2->tc);
- assert(img1->mc == img2->mc);
- assert(img1->monochrome == img2->monochrome);
-
- int num_planes = img1->monochrome ? 1 : 3;
-
- uint32_t l_w = img1->d_w;
- uint32_t c_w = (img1->d_w + img1->x_chroma_shift) >> img1->x_chroma_shift;
- const uint32_t c_h =
- (img1->d_h + img1->y_chroma_shift) >> img1->y_chroma_shift;
- int match = 1;
-
- match &= (img1->fmt == img2->fmt);
- match &= (img1->d_w == img2->d_w);
- match &= (img1->d_h == img2->d_h);
- if (img1->fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
- l_w *= 2;
- c_w *= 2;
- }
-
- for (int plane = 0; plane < num_planes; ++plane) {
- uint32_t height = plane ? c_h : img1->d_h;
- uint32_t width = plane ? c_w : l_w;
-
- for (uint32_t i = 0; i < height; ++i) {
- match &=
- (memcmp(img1->planes[plane] + i * img1->stride[plane],
- img2->planes[plane] + i * img2->stride[plane], width) == 0);
- }
- }
-
- return match;
-}
diff --git a/third_party/aom/examples/encoder_util.h b/third_party/aom/examples/encoder_util.h
deleted file mode 100644
index a6bb3fb48..000000000
--- a/third_party/aom/examples/encoder_util.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-// Utility functions used by encoder binaries.
-
-#ifndef AOM_EXAMPLES_ENCODER_UTIL_H_
-#define AOM_EXAMPLES_ENCODER_UTIL_H_
-
-#include "aom/aom_image.h"
-
-// Returns mismatch location (?loc[0],?loc[1]) and the values at that location
-// in img1 (?loc[2]) and img2 (?loc[3]).
-void aom_find_mismatch_high(const aom_image_t *const img1,
- const aom_image_t *const img2, int yloc[4],
- int uloc[4], int vloc[4]);
-
-void aom_find_mismatch(const aom_image_t *const img1,
- const aom_image_t *const img2, int yloc[4], int uloc[4],
- int vloc[4]);
-
-// Returns 1 if the two images match.
-int aom_compare_img(const aom_image_t *const img1,
- const aom_image_t *const img2);
-
-#endif // AOM_EXAMPLES_ENCODER_UTIL_H_
diff --git a/third_party/aom/examples/inspect.c b/third_party/aom/examples/inspect.c
deleted file mode 100644
index 9d5f0dcfc..000000000
--- a/third_party/aom/examples/inspect.c
+++ /dev/null
@@ -1,763 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-// Inspect Decoder
-// ================
-//
-// This is a simple decoder loop that writes JSON stats to stdout. This tool
-// can also be compiled with Emscripten and used as a library.
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#ifdef __EMSCRIPTEN__
-#include <emscripten.h>
-#else
-#define EMSCRIPTEN_KEEPALIVE
-#endif
-
-#include "config/aom_config.h"
-
-#include "aom/aom_decoder.h"
-#include "aom/aomdx.h"
-#include "av1/common/onyxc_int.h"
-
-#if CONFIG_ACCOUNTING
-#include "av1/decoder/accounting.h"
-#endif
-
-#include "av1/decoder/inspection.h"
-#include "common/args.h"
-#include "common/tools_common.h"
-#include "common/video_common.h"
-#include "common/video_reader.h"
-
-// Max JSON buffer size.
-const int MAX_BUFFER = 1024 * 1024 * 32;
-
-typedef enum {
- ACCOUNTING_LAYER = 1,
- BLOCK_SIZE_LAYER = 1 << 1,
- TRANSFORM_SIZE_LAYER = 1 << 2,
- TRANSFORM_TYPE_LAYER = 1 << 3,
- MODE_LAYER = 1 << 4,
- SKIP_LAYER = 1 << 5,
- FILTER_LAYER = 1 << 6,
- CDEF_LAYER = 1 << 7,
- REFERENCE_FRAME_LAYER = 1 << 8,
- MOTION_VECTORS_LAYER = 1 << 9,
- UV_MODE_LAYER = 1 << 10,
- CFL_LAYER = 1 << 11,
- DUAL_FILTER_LAYER = 1 << 12,
- Q_INDEX_LAYER = 1 << 13,
- SEGMENT_ID_LAYER = 1 << 14,
- ALL_LAYERS = (1 << 15) - 1
-} LayerType;
-
-static LayerType layers = 0;
-
-static int stop_after = 0;
-static int compress = 0;
-
-static const arg_def_t limit_arg =
- ARG_DEF(NULL, "limit", 1, "Stop decoding after n frames");
-static const arg_def_t dump_all_arg = ARG_DEF("A", "all", 0, "Dump All");
-static const arg_def_t compress_arg =
- ARG_DEF("x", "compress", 0, "Compress JSON using RLE");
-static const arg_def_t dump_accounting_arg =
- ARG_DEF("a", "accounting", 0, "Dump Accounting");
-static const arg_def_t dump_block_size_arg =
- ARG_DEF("bs", "blockSize", 0, "Dump Block Size");
-static const arg_def_t dump_motion_vectors_arg =
- ARG_DEF("mv", "motionVectors", 0, "Dump Motion Vectors");
-static const arg_def_t dump_transform_size_arg =
- ARG_DEF("ts", "transformSize", 0, "Dump Transform Size");
-static const arg_def_t dump_transform_type_arg =
- ARG_DEF("tt", "transformType", 0, "Dump Transform Type");
-static const arg_def_t dump_mode_arg = ARG_DEF("m", "mode", 0, "Dump Mode");
-static const arg_def_t dump_uv_mode_arg =
- ARG_DEF("uvm", "uv_mode", 0, "Dump UV Intra Prediction Modes");
-static const arg_def_t dump_skip_arg = ARG_DEF("s", "skip", 0, "Dump Skip");
-static const arg_def_t dump_filter_arg =
- ARG_DEF("f", "filter", 0, "Dump Filter");
-static const arg_def_t dump_cdef_arg = ARG_DEF("c", "cdef", 0, "Dump CDEF");
-static const arg_def_t dump_cfl_arg =
- ARG_DEF("cfl", "chroma_from_luma", 0, "Dump Chroma from Luma Alphas");
-static const arg_def_t dump_dual_filter_type_arg =
- ARG_DEF("df", "dualFilterType", 0, "Dump Dual Filter Type");
-static const arg_def_t dump_reference_frame_arg =
- ARG_DEF("r", "referenceFrame", 0, "Dump Reference Frame");
-static const arg_def_t dump_delta_q_arg =
- ARG_DEF("dq", "delta_q", 0, "Dump QIndex");
-static const arg_def_t dump_seg_id_arg =
- ARG_DEF("si", "seg_id", 0, "Dump Segment ID");
-static const arg_def_t usage_arg = ARG_DEF("h", "help", 0, "Help");
-
-static const arg_def_t *main_args[] = { &limit_arg,
- &dump_all_arg,
- &compress_arg,
-#if CONFIG_ACCOUNTING
- &dump_accounting_arg,
-#endif
- &dump_block_size_arg,
- &dump_transform_size_arg,
- &dump_transform_type_arg,
- &dump_mode_arg,
- &dump_uv_mode_arg,
- &dump_skip_arg,
- &dump_filter_arg,
- &dump_cdef_arg,
- &dump_dual_filter_type_arg,
- &dump_cfl_arg,
- &dump_reference_frame_arg,
- &dump_motion_vectors_arg,
- &dump_delta_q_arg,
- &dump_seg_id_arg,
- &usage_arg,
- NULL };
-#define ENUM(name) \
- { #name, name }
-#define LAST_ENUM \
- { NULL, 0 }
-typedef struct map_entry {
- const char *name;
- int value;
-} map_entry;
-
-const map_entry refs_map[] = {
- ENUM(INTRA_FRAME), ENUM(LAST_FRAME), ENUM(LAST2_FRAME),
- ENUM(LAST3_FRAME), ENUM(GOLDEN_FRAME), ENUM(BWDREF_FRAME),
- ENUM(ALTREF2_FRAME), ENUM(ALTREF_FRAME), LAST_ENUM
-};
-
-const map_entry block_size_map[] = {
- ENUM(BLOCK_4X4), ENUM(BLOCK_4X8), ENUM(BLOCK_8X4),
- ENUM(BLOCK_8X8), ENUM(BLOCK_8X16), ENUM(BLOCK_16X8),
- ENUM(BLOCK_16X16), ENUM(BLOCK_16X32), ENUM(BLOCK_32X16),
- ENUM(BLOCK_32X32), ENUM(BLOCK_32X64), ENUM(BLOCK_64X32),
- ENUM(BLOCK_64X64), ENUM(BLOCK_64X128), ENUM(BLOCK_128X64),
- ENUM(BLOCK_128X128), ENUM(BLOCK_4X16), ENUM(BLOCK_16X4),
- ENUM(BLOCK_8X32), ENUM(BLOCK_32X8), ENUM(BLOCK_16X64),
- ENUM(BLOCK_64X16), LAST_ENUM
-};
-
-const map_entry tx_size_map[] = {
- ENUM(TX_4X4), ENUM(TX_8X8), ENUM(TX_16X16), ENUM(TX_32X32),
- ENUM(TX_64X64), ENUM(TX_4X8), ENUM(TX_8X4), ENUM(TX_8X16),
- ENUM(TX_16X8), ENUM(TX_16X32), ENUM(TX_32X16), ENUM(TX_32X64),
- ENUM(TX_64X32), ENUM(TX_4X16), ENUM(TX_16X4), ENUM(TX_8X32),
- ENUM(TX_32X8), LAST_ENUM
-};
-
-const map_entry tx_type_map[] = { ENUM(DCT_DCT),
- ENUM(ADST_DCT),
- ENUM(DCT_ADST),
- ENUM(ADST_ADST),
- ENUM(FLIPADST_DCT),
- ENUM(DCT_FLIPADST),
- ENUM(FLIPADST_FLIPADST),
- ENUM(ADST_FLIPADST),
- ENUM(FLIPADST_ADST),
- ENUM(IDTX),
- ENUM(V_DCT),
- ENUM(H_DCT),
- ENUM(V_ADST),
- ENUM(H_ADST),
- ENUM(V_FLIPADST),
- ENUM(H_FLIPADST),
- LAST_ENUM };
-const map_entry dual_filter_map[] = { ENUM(REG_REG), ENUM(REG_SMOOTH),
- ENUM(REG_SHARP), ENUM(SMOOTH_REG),
- ENUM(SMOOTH_SMOOTH), ENUM(SMOOTH_SHARP),
- ENUM(SHARP_REG), ENUM(SHARP_SMOOTH),
- ENUM(SHARP_SHARP), LAST_ENUM };
-
-const map_entry prediction_mode_map[] = {
- ENUM(DC_PRED), ENUM(V_PRED), ENUM(H_PRED),
- ENUM(D45_PRED), ENUM(D135_PRED), ENUM(D113_PRED),
- ENUM(D157_PRED), ENUM(D203_PRED), ENUM(D67_PRED),
- ENUM(SMOOTH_PRED), ENUM(SMOOTH_V_PRED), ENUM(SMOOTH_H_PRED),
- ENUM(PAETH_PRED), ENUM(NEARESTMV), ENUM(NEARMV),
- ENUM(GLOBALMV), ENUM(NEWMV), ENUM(NEAREST_NEARESTMV),
- ENUM(NEAR_NEARMV), ENUM(NEAREST_NEWMV), ENUM(NEW_NEARESTMV),
- ENUM(NEAR_NEWMV), ENUM(NEW_NEARMV), ENUM(GLOBAL_GLOBALMV),
- ENUM(NEW_NEWMV), ENUM(INTRA_INVALID), LAST_ENUM
-};
-
-const map_entry uv_prediction_mode_map[] = {
- ENUM(UV_DC_PRED), ENUM(UV_V_PRED),
- ENUM(UV_H_PRED), ENUM(UV_D45_PRED),
- ENUM(UV_D135_PRED), ENUM(UV_D113_PRED),
- ENUM(UV_D157_PRED), ENUM(UV_D203_PRED),
- ENUM(UV_D67_PRED), ENUM(UV_SMOOTH_PRED),
- ENUM(UV_SMOOTH_V_PRED), ENUM(UV_SMOOTH_H_PRED),
- ENUM(UV_PAETH_PRED), ENUM(UV_CFL_PRED),
- ENUM(UV_MODE_INVALID), LAST_ENUM
-};
-#define NO_SKIP 0
-#define SKIP 1
-
-const map_entry skip_map[] = { ENUM(SKIP), ENUM(NO_SKIP), LAST_ENUM };
-
-const map_entry config_map[] = { ENUM(MI_SIZE), LAST_ENUM };
-
-static const char *exec_name;
-
-insp_frame_data frame_data;
-int frame_count = 0;
-int decoded_frame_count = 0;
-aom_codec_ctx_t codec;
-AvxVideoReader *reader = NULL;
-const AvxVideoInfo *info = NULL;
-aom_image_t *img = NULL;
-
-void on_frame_decoded_dump(char *json) {
-#ifdef __EMSCRIPTEN__
- EM_ASM_({ Module.on_frame_decoded_json($0); }, json);
-#else
- printf("%s", json);
-#endif
-}
-
-// Writing out the JSON buffer using snprintf is very slow, especially when
-// compiled with emscripten, these functions speed things up quite a bit.
-int put_str(char *buffer, const char *str) {
- int i;
- for (i = 0; str[i] != '\0'; i++) {
- buffer[i] = str[i];
- }
- return i;
-}
-
-int put_str_with_escape(char *buffer, const char *str) {
- int i;
- int j = 0;
- for (i = 0; str[i] != '\0'; i++) {
- if (str[i] < ' ') {
- continue;
- } else if (str[i] == '"' || str[i] == '\\') {
- buffer[j++] = '\\';
- }
- buffer[j++] = str[i];
- }
- return j;
-}
-
-int put_num(char *buffer, char prefix, int num, char suffix) {
- int i = 0;
- char *buf = buffer;
- int is_neg = 0;
- if (prefix) {
- buf[i++] = prefix;
- }
- if (num == 0) {
- buf[i++] = '0';
- } else {
- if (num < 0) {
- num = -num;
- is_neg = 1;
- }
- int s = i;
- while (num != 0) {
- buf[i++] = '0' + (num % 10);
- num = num / 10;
- }
- if (is_neg) {
- buf[i++] = '-';
- }
- int e = i - 1;
- while (s < e) {
- int t = buf[s];
- buf[s] = buf[e];
- buf[e] = t;
- s++;
- e--;
- }
- }
- if (suffix) {
- buf[i++] = suffix;
- }
- return i;
-}
-
-int put_map(char *buffer, const map_entry *map) {
- char *buf = buffer;
- const map_entry *entry = map;
- while (entry->name != NULL) {
- *(buf++) = '"';
- buf += put_str(buf, entry->name);
- *(buf++) = '"';
- buf += put_num(buf, ':', entry->value, 0);
- entry++;
- if (entry->name != NULL) {
- *(buf++) = ',';
- }
- }
- return (int)(buf - buffer);
-}
-
-int put_reference_frame(char *buffer) {
- const int mi_rows = frame_data.mi_rows;
- const int mi_cols = frame_data.mi_cols;
- char *buf = buffer;
- int r, c, t;
- buf += put_str(buf, " \"referenceFrameMap\": {");
- buf += put_map(buf, refs_map);
- buf += put_str(buf, "},\n");
- buf += put_str(buf, " \"referenceFrame\": [");
- for (r = 0; r < mi_rows; ++r) {
- *(buf++) = '[';
- for (c = 0; c < mi_cols; ++c) {
- insp_mi_data *mi = &frame_data.mi_grid[r * mi_cols + c];
- buf += put_num(buf, '[', mi->ref_frame[0], 0);
- buf += put_num(buf, ',', mi->ref_frame[1], ']');
- if (compress) { // RLE
- for (t = c + 1; t < mi_cols; ++t) {
- insp_mi_data *next_mi = &frame_data.mi_grid[r * mi_cols + t];
- if (mi->ref_frame[0] != next_mi->ref_frame[0] ||
- mi->ref_frame[1] != next_mi->ref_frame[1]) {
- break;
- }
- }
- if (t - c > 1) {
- *(buf++) = ',';
- buf += put_num(buf, '[', t - c - 1, ']');
- c = t - 1;
- }
- }
- if (c < mi_cols - 1) *(buf++) = ',';
- }
- *(buf++) = ']';
- if (r < mi_rows - 1) *(buf++) = ',';
- }
- buf += put_str(buf, "],\n");
- return (int)(buf - buffer);
-}
-
-int put_motion_vectors(char *buffer) {
- const int mi_rows = frame_data.mi_rows;
- const int mi_cols = frame_data.mi_cols;
- char *buf = buffer;
- int r, c, t;
- buf += put_str(buf, " \"motionVectors\": [");
- for (r = 0; r < mi_rows; ++r) {
- *(buf++) = '[';
- for (c = 0; c < mi_cols; ++c) {
- insp_mi_data *mi = &frame_data.mi_grid[r * mi_cols + c];
- buf += put_num(buf, '[', mi->mv[0].col, 0);
- buf += put_num(buf, ',', mi->mv[0].row, 0);
- buf += put_num(buf, ',', mi->mv[1].col, 0);
- buf += put_num(buf, ',', mi->mv[1].row, ']');
- if (compress) { // RLE
- for (t = c + 1; t < mi_cols; ++t) {
- insp_mi_data *next_mi = &frame_data.mi_grid[r * mi_cols + t];
- if (mi->mv[0].col != next_mi->mv[0].col ||
- mi->mv[0].row != next_mi->mv[0].row ||
- mi->mv[1].col != next_mi->mv[1].col ||
- mi->mv[1].row != next_mi->mv[1].row) {
- break;
- }
- }
- if (t - c > 1) {
- *(buf++) = ',';
- buf += put_num(buf, '[', t - c - 1, ']');
- c = t - 1;
- }
- }
- if (c < mi_cols - 1) *(buf++) = ',';
- }
- *(buf++) = ']';
- if (r < mi_rows - 1) *(buf++) = ',';
- }
- buf += put_str(buf, "],\n");
- return (int)(buf - buffer);
-}
-
-int put_block_info(char *buffer, const map_entry *map, const char *name,
- size_t offset, int len) {
- const int mi_rows = frame_data.mi_rows;
- const int mi_cols = frame_data.mi_cols;
- char *buf = buffer;
- int r, c, t, i;
- if (compress && len == 1) {
- die("Can't encode scalars as arrays when RLE compression is enabled.");
- return -1;
- }
- if (map) {
- buf += snprintf(buf, MAX_BUFFER, " \"%sMap\": {", name);
- buf += put_map(buf, map);
- buf += put_str(buf, "},\n");
- }
- buf += snprintf(buf, MAX_BUFFER, " \"%s\": [", name);
- for (r = 0; r < mi_rows; ++r) {
- *(buf++) = '[';
- for (c = 0; c < mi_cols; ++c) {
- insp_mi_data *mi = &frame_data.mi_grid[r * mi_cols + c];
- int16_t *v = (int16_t *)(((int8_t *)mi) + offset);
- if (len == 0) {
- buf += put_num(buf, 0, v[0], 0);
- } else {
- buf += put_str(buf, "[");
- for (i = 0; i < len; i++) {
- buf += put_num(buf, 0, v[i], 0);
- if (i < len - 1) {
- buf += put_str(buf, ",");
- }
- }
- buf += put_str(buf, "]");
- }
- if (compress) { // RLE
- for (t = c + 1; t < mi_cols; ++t) {
- insp_mi_data *next_mi = &frame_data.mi_grid[r * mi_cols + t];
- int16_t *nv = (int16_t *)(((int8_t *)next_mi) + offset);
- int same = 0;
- if (len == 0) {
- same = v[0] == nv[0];
- } else {
- for (i = 0; i < len; i++) {
- same = v[i] == nv[i];
- if (!same) {
- break;
- }
- }
- }
- if (!same) {
- break;
- }
- }
- if (t - c > 1) {
- *(buf++) = ',';
- buf += put_num(buf, '[', t - c - 1, ']');
- c = t - 1;
- }
- }
- if (c < mi_cols - 1) *(buf++) = ',';
- }
- *(buf++) = ']';
- if (r < mi_rows - 1) *(buf++) = ',';
- }
- buf += put_str(buf, "],\n");
- return (int)(buf - buffer);
-}
-
-#if CONFIG_ACCOUNTING
-int put_accounting(char *buffer) {
- char *buf = buffer;
- int i;
- const Accounting *accounting = frame_data.accounting;
- if (accounting == NULL) {
- printf("XXX\n");
- return 0;
- }
- const int num_syms = accounting->syms.num_syms;
- const int num_strs = accounting->syms.dictionary.num_strs;
- buf += put_str(buf, " \"symbolsMap\": [");
- for (i = 0; i < num_strs; i++) {
- buf += snprintf(buf, MAX_BUFFER, "\"%s\"",
- accounting->syms.dictionary.strs[i]);
- if (i < num_strs - 1) *(buf++) = ',';
- }
- buf += put_str(buf, "],\n");
- buf += put_str(buf, " \"symbols\": [\n ");
- AccountingSymbolContext context;
- context.x = -2;
- context.y = -2;
- AccountingSymbol *sym;
- for (i = 0; i < num_syms; i++) {
- sym = &accounting->syms.syms[i];
- if (memcmp(&context, &sym->context, sizeof(AccountingSymbolContext)) != 0) {
- buf += put_num(buf, '[', sym->context.x, 0);
- buf += put_num(buf, ',', sym->context.y, ']');
- } else {
- buf += put_num(buf, '[', sym->id, 0);
- buf += put_num(buf, ',', sym->bits, 0);
- buf += put_num(buf, ',', sym->samples, ']');
- }
- context = sym->context;
- if (i < num_syms - 1) *(buf++) = ',';
- }
- buf += put_str(buf, "],\n");
- return (int)(buf - buffer);
-}
-#endif
-
-void inspect(void *pbi, void *data) {
- /* Fetch frame data. */
- ifd_inspect(&frame_data, pbi);
- (void)data;
- // We allocate enough space and hope we don't write out of bounds. Totally
- // unsafe but this speeds things up, especially when compiled to Javascript.
- char *buffer = aom_malloc(MAX_BUFFER);
- char *buf = buffer;
- buf += put_str(buf, "{\n");
- if (layers & BLOCK_SIZE_LAYER) {
- buf += put_block_info(buf, block_size_map, "blockSize",
- offsetof(insp_mi_data, sb_type), 0);
- }
- if (layers & TRANSFORM_SIZE_LAYER) {
- buf += put_block_info(buf, tx_size_map, "transformSize",
- offsetof(insp_mi_data, tx_size), 0);
- }
- if (layers & TRANSFORM_TYPE_LAYER) {
- buf += put_block_info(buf, tx_type_map, "transformType",
- offsetof(insp_mi_data, tx_type), 0);
- }
- if (layers & DUAL_FILTER_LAYER) {
- buf += put_block_info(buf, dual_filter_map, "dualFilterType",
- offsetof(insp_mi_data, dual_filter_type), 0);
- }
- if (layers & MODE_LAYER) {
- buf += put_block_info(buf, prediction_mode_map, "mode",
- offsetof(insp_mi_data, mode), 0);
- }
- if (layers & UV_MODE_LAYER) {
- buf += put_block_info(buf, uv_prediction_mode_map, "uv_mode",
- offsetof(insp_mi_data, uv_mode), 0);
- }
- if (layers & SKIP_LAYER) {
- buf +=
- put_block_info(buf, skip_map, "skip", offsetof(insp_mi_data, skip), 0);
- }
- if (layers & FILTER_LAYER) {
- buf +=
- put_block_info(buf, NULL, "filter", offsetof(insp_mi_data, filter), 2);
- }
- if (layers & CDEF_LAYER) {
- buf += put_block_info(buf, NULL, "cdef_level",
- offsetof(insp_mi_data, cdef_level), 0);
- buf += put_block_info(buf, NULL, "cdef_strength",
- offsetof(insp_mi_data, cdef_strength), 0);
- }
- if (layers & CFL_LAYER) {
- buf += put_block_info(buf, NULL, "cfl_alpha_idx",
- offsetof(insp_mi_data, cfl_alpha_idx), 0);
- buf += put_block_info(buf, NULL, "cfl_alpha_sign",
- offsetof(insp_mi_data, cfl_alpha_sign), 0);
- }
- if (layers & Q_INDEX_LAYER) {
- buf += put_block_info(buf, NULL, "delta_q",
- offsetof(insp_mi_data, current_qindex), 0);
- }
- if (layers & SEGMENT_ID_LAYER) {
- buf += put_block_info(buf, NULL, "seg_id",
- offsetof(insp_mi_data, segment_id), 0);
- }
- if (layers & MOTION_VECTORS_LAYER) {
- buf += put_motion_vectors(buf);
- }
- if (layers & REFERENCE_FRAME_LAYER) {
- buf += put_block_info(buf, refs_map, "referenceFrame",
- offsetof(insp_mi_data, ref_frame), 2);
- }
-#if CONFIG_ACCOUNTING
- if (layers & ACCOUNTING_LAYER) {
- buf += put_accounting(buf);
- }
-#endif
- buf += snprintf(buf, MAX_BUFFER, " \"frame\": %d,\n", decoded_frame_count);
- buf += snprintf(buf, MAX_BUFFER, " \"showFrame\": %d,\n",
- frame_data.show_frame);
- buf += snprintf(buf, MAX_BUFFER, " \"frameType\": %d,\n",
- frame_data.frame_type);
- buf += snprintf(buf, MAX_BUFFER, " \"baseQIndex\": %d,\n",
- frame_data.base_qindex);
- buf += snprintf(buf, MAX_BUFFER, " \"tileCols\": %d,\n",
- frame_data.tile_mi_cols);
- buf += snprintf(buf, MAX_BUFFER, " \"tileRows\": %d,\n",
- frame_data.tile_mi_rows);
- buf += snprintf(buf, MAX_BUFFER, " \"deltaQPresentFlag\": %d,\n",
- frame_data.delta_q_present_flag);
- buf += snprintf(buf, MAX_BUFFER, " \"deltaQRes\": %d,\n",
- frame_data.delta_q_res);
- buf += put_str(buf, " \"config\": {");
- buf += put_map(buf, config_map);
- buf += put_str(buf, "},\n");
- buf += put_str(buf, " \"configString\": \"");
- buf += put_str_with_escape(buf, aom_codec_build_config());
- buf += put_str(buf, "\"\n");
- decoded_frame_count++;
- buf += put_str(buf, "},\n");
- *(buf++) = 0;
- on_frame_decoded_dump(buffer);
- aom_free(buffer);
-}
-
-void ifd_init_cb() {
- aom_inspect_init ii;
- ii.inspect_cb = inspect;
- ii.inspect_ctx = NULL;
- aom_codec_control(&codec, AV1_SET_INSPECTION_CALLBACK, &ii);
-}
-
-EMSCRIPTEN_KEEPALIVE
-int open_file(char *file) {
- if (file == NULL) {
- // The JS analyzer puts the .ivf file at this location.
- file = "/tmp/input.ivf";
- }
- reader = aom_video_reader_open(file);
- if (!reader) die("Failed to open %s for reading.", file);
- info = aom_video_reader_get_info(reader);
- const AvxInterface *decoder = get_aom_decoder_by_fourcc(info->codec_fourcc);
- if (!decoder) die("Unknown input codec.");
- fprintf(stderr, "Using %s\n",
- aom_codec_iface_name(decoder->codec_interface()));
- if (aom_codec_dec_init(&codec, decoder->codec_interface(), NULL, 0))
- die_codec(&codec, "Failed to initialize decoder.");
- ifd_init(&frame_data, info->frame_width, info->frame_height);
- ifd_init_cb();
- return EXIT_SUCCESS;
-}
-
-EMSCRIPTEN_KEEPALIVE
-int read_frame() {
- if (!aom_video_reader_read_frame(reader)) return EXIT_FAILURE;
- img = NULL;
- aom_codec_iter_t iter = NULL;
- size_t frame_size = 0;
- const unsigned char *frame = aom_video_reader_get_frame(reader, &frame_size);
- if (aom_codec_decode(&codec, frame, (unsigned int)frame_size, NULL) !=
- AOM_CODEC_OK) {
- die_codec(&codec, "Failed to decode frame.");
- }
- int got_any_frames = 0;
- aom_image_t *frame_img;
- while ((frame_img = aom_codec_get_frame(&codec, &iter))) {
- img = frame_img;
- ++frame_count;
- got_any_frames = 1;
- }
- if (!got_any_frames) {
- return EXIT_FAILURE;
- }
- return EXIT_SUCCESS;
-}
-
-EMSCRIPTEN_KEEPALIVE
-const char *get_aom_codec_build_config() { return aom_codec_build_config(); }
-
-EMSCRIPTEN_KEEPALIVE
-int get_bit_depth() { return img->bit_depth; }
-
-EMSCRIPTEN_KEEPALIVE
-int get_bits_per_sample() { return img->bps; }
-
-EMSCRIPTEN_KEEPALIVE
-int get_image_format() { return img->fmt; }
-
-EMSCRIPTEN_KEEPALIVE
-unsigned char *get_plane(int plane) { return img->planes[plane]; }
-
-EMSCRIPTEN_KEEPALIVE
-int get_plane_stride(int plane) { return img->stride[plane]; }
-
-EMSCRIPTEN_KEEPALIVE
-int get_plane_width(int plane) { return aom_img_plane_width(img, plane); }
-
-EMSCRIPTEN_KEEPALIVE
-int get_plane_height(int plane) { return aom_img_plane_height(img, plane); }
-
-EMSCRIPTEN_KEEPALIVE
-int get_frame_width() { return info->frame_width; }
-
-EMSCRIPTEN_KEEPALIVE
-int get_frame_height() { return info->frame_height; }
-
-static void parse_args(char **argv) {
- char **argi, **argj;
- struct arg arg;
- (void)dump_accounting_arg;
- (void)dump_cdef_arg;
- for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
- arg.argv_step = 1;
- if (arg_match(&arg, &dump_block_size_arg, argi)) layers |= BLOCK_SIZE_LAYER;
-#if CONFIG_ACCOUNTING
- else if (arg_match(&arg, &dump_accounting_arg, argi))
- layers |= ACCOUNTING_LAYER;
-#endif
- else if (arg_match(&arg, &dump_transform_size_arg, argi))
- layers |= TRANSFORM_SIZE_LAYER;
- else if (arg_match(&arg, &dump_transform_type_arg, argi))
- layers |= TRANSFORM_TYPE_LAYER;
- else if (arg_match(&arg, &dump_mode_arg, argi))
- layers |= MODE_LAYER;
- else if (arg_match(&arg, &dump_uv_mode_arg, argi))
- layers |= UV_MODE_LAYER;
- else if (arg_match(&arg, &dump_skip_arg, argi))
- layers |= SKIP_LAYER;
- else if (arg_match(&arg, &dump_filter_arg, argi))
- layers |= FILTER_LAYER;
- else if (arg_match(&arg, &dump_cdef_arg, argi))
- layers |= CDEF_LAYER;
- else if (arg_match(&arg, &dump_cfl_arg, argi))
- layers |= CFL_LAYER;
- else if (arg_match(&arg, &dump_reference_frame_arg, argi))
- layers |= REFERENCE_FRAME_LAYER;
- else if (arg_match(&arg, &dump_motion_vectors_arg, argi))
- layers |= MOTION_VECTORS_LAYER;
- else if (arg_match(&arg, &dump_dual_filter_type_arg, argi))
- layers |= DUAL_FILTER_LAYER;
- else if (arg_match(&arg, &dump_delta_q_arg, argi))
- layers |= Q_INDEX_LAYER;
- else if (arg_match(&arg, &dump_seg_id_arg, argi))
- layers |= SEGMENT_ID_LAYER;
- else if (arg_match(&arg, &dump_all_arg, argi))
- layers |= ALL_LAYERS;
- else if (arg_match(&arg, &compress_arg, argi))
- compress = 1;
- else if (arg_match(&arg, &usage_arg, argi))
- usage_exit();
- else if (arg_match(&arg, &limit_arg, argi))
- stop_after = arg_parse_uint(&arg);
- else
- argj++;
- }
-}
-
-static const char *exec_name;
-
-void usage_exit(void) {
- fprintf(stderr, "Usage: %s src_filename <options>\n", exec_name);
- fprintf(stderr, "\nOptions:\n");
- arg_show_usage(stderr, main_args);
- exit(EXIT_FAILURE);
-}
-
-EMSCRIPTEN_KEEPALIVE
-int main(int argc, char **argv) {
- exec_name = argv[0];
- parse_args(argv);
- if (argc >= 2) {
- open_file(argv[1]);
- printf("[\n");
- while (1) {
- if (stop_after && (decoded_frame_count >= stop_after)) break;
- if (read_frame()) break;
- }
- printf("null\n");
- printf("]");
- } else {
- usage_exit();
- }
-}
-
-EMSCRIPTEN_KEEPALIVE
-void quit() {
- if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec");
- aom_video_reader_close(reader);
-}
-
-EMSCRIPTEN_KEEPALIVE
-void set_layers(LayerType v) { layers = v; }
-
-EMSCRIPTEN_KEEPALIVE
-void set_compress(int v) { compress = v; }
diff --git a/third_party/aom/examples/lightfield_bitstream_parsing.c b/third_party/aom/examples/lightfield_bitstream_parsing.c
deleted file mode 100644
index 159f1617a..000000000
--- a/third_party/aom/examples/lightfield_bitstream_parsing.c
+++ /dev/null
@@ -1,348 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-// Lightfield Bitstream Parsing
-// ============================
-//
-// This is a lightfield bitstream parsing example. It takes an input file
-// containing the whole compressed lightfield bitstream(ivf file), and parses it
-// and constructs and outputs a new bitstream that can be decoded by an AV1
-// decoder. The output bitstream contains reference frames(i.e. anchor frames),
-// camera frame header, and tile list OBUs. num_references is the number of
-// anchor frames coded at the beginning of the light field file.
-// After running the lightfield encoder, run lightfield bitstream parsing:
-// examples/lightfield_bitstream_parsing vase10x10.ivf vase_tile_list.ivf 4
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "aom/aom_decoder.h"
-#include "aom/aom_encoder.h"
-#include "aom/aom_integer.h"
-#include "aom/aomdx.h"
-#include "aom_dsp/bitwriter_buffer.h"
-#include "common/tools_common.h"
-#include "common/video_reader.h"
-#include "common/video_writer.h"
-
-#define MAX_TILES 512
-
-static const char *exec_name;
-
-void usage_exit(void) {
- fprintf(stderr, "Usage: %s <infile> <outfile> <num_references> \n",
- exec_name);
- exit(EXIT_FAILURE);
-}
-
-#define ALIGN_POWER_OF_TWO(value, n) \
- (((value) + ((1 << (n)) - 1)) & ~((1 << (n)) - 1))
-
-// SB size: 64x64
-const uint8_t output_frame_width_in_tiles_minus_1 = 512 / 64 - 1;
-const uint8_t output_frame_height_in_tiles_minus_1 = 512 / 64 - 1;
-
-// Spec:
-// typedef struct {
-// uint8_t anchor_frame_idx;
-// uint8_t tile_row;
-// uint8_t tile_col;
-// uint16_t coded_tile_data_size_minus_1;
-// uint8_t *coded_tile_data;
-// } TILE_LIST_ENTRY;
-
-// Tile list entry provided by the application
-typedef struct {
- int image_idx;
- int reference_idx;
- int tile_col;
- int tile_row;
-} TILE_LIST_INFO;
-
-// M references: 0 - M-1; N images(including references): 0 - N-1;
-// Note: order the image index incrementally, so that we only go through the
-// bitstream once to construct the tile list.
-const int num_tile_lists = 2;
-const uint16_t tile_count_minus_1 = 9 - 1;
-const TILE_LIST_INFO tile_list[2][9] = {
- { { 16, 0, 4, 5 },
- { 83, 3, 13, 2 },
- { 57, 2, 2, 6 },
- { 31, 1, 11, 5 },
- { 2, 0, 7, 4 },
- { 77, 3, 9, 9 },
- { 49, 1, 0, 1 },
- { 6, 0, 3, 10 },
- { 63, 2, 5, 8 } },
- { { 65, 2, 11, 1 },
- { 42, 1, 3, 7 },
- { 88, 3, 8, 4 },
- { 76, 3, 1, 15 },
- { 1, 0, 2, 2 },
- { 19, 0, 5, 6 },
- { 60, 2, 4, 0 },
- { 25, 1, 11, 15 },
- { 50, 2, 5, 4 } },
-};
-
-static int get_image_bps(aom_img_fmt_t fmt) {
- switch (fmt) {
- case AOM_IMG_FMT_I420: return 12;
- case AOM_IMG_FMT_I422: return 16;
- case AOM_IMG_FMT_I444: return 24;
- case AOM_IMG_FMT_I42016: return 24;
- case AOM_IMG_FMT_I42216: return 32;
- case AOM_IMG_FMT_I44416: return 48;
- default: die("Invalid image format");
- }
- return 0;
-}
-
-int main(int argc, char **argv) {
- aom_codec_ctx_t codec;
- AvxVideoReader *reader = NULL;
- AvxVideoWriter *writer = NULL;
- const AvxInterface *decoder = NULL;
- const AvxVideoInfo *info = NULL;
- int num_references;
- int n, i;
- aom_codec_pts_t pts;
-
- exec_name = argv[0];
- if (argc != 4) die("Invalid number of arguments.");
-
- reader = aom_video_reader_open(argv[1]);
- if (!reader) die("Failed to open %s for reading.", argv[1]);
-
- num_references = (int)strtol(argv[3], NULL, 0);
- info = aom_video_reader_get_info(reader);
-
- // The writer to write out ivf file in tile list OBU, which can be decoded by
- // AV1 decoder.
- writer = aom_video_writer_open(argv[2], kContainerIVF, info);
- if (!writer) die("Failed to open %s for writing", argv[2]);
-
- decoder = get_aom_decoder_by_fourcc(info->codec_fourcc);
- if (!decoder) die("Unknown input codec.");
- printf("Using %s\n", aom_codec_iface_name(decoder->codec_interface()));
-
- if (aom_codec_dec_init(&codec, decoder->codec_interface(), NULL, 0))
- die_codec(&codec, "Failed to initialize decoder.");
-
- // Decode anchor frames.
- aom_codec_control_(&codec, AV1_SET_TILE_MODE, 0);
-
- for (i = 0; i < num_references; ++i) {
- aom_video_reader_read_frame(reader);
-
- size_t frame_size = 0;
- const unsigned char *frame =
- aom_video_reader_get_frame(reader, &frame_size);
- pts = (aom_codec_pts_t)aom_video_reader_get_frame_pts(reader);
-
- // Copy references bitstream directly.
- if (!aom_video_writer_write_frame(writer, frame, frame_size, pts))
- die_codec(&codec, "Failed to copy compressed anchor frame.");
-
- if (aom_codec_decode(&codec, frame, frame_size, NULL))
- die_codec(&codec, "Failed to decode frame.");
- }
-
- // Decode camera frames.
- aom_codec_control_(&codec, AV1_SET_TILE_MODE, 1);
- aom_codec_control_(&codec, AV1D_EXT_TILE_DEBUG, 1);
-
- FILE *infile = aom_video_reader_get_file(reader);
- // Record the offset of the first camera image.
- const FileOffset camera_frame_pos = ftello(infile);
-
- // Read out the first camera frame.
- aom_video_reader_read_frame(reader);
-
- // Copy first camera frame for getting camera frame header. This is done
- // only once.
- {
- size_t frame_size = 0;
- const unsigned char *frame =
- aom_video_reader_get_frame(reader, &frame_size);
- pts = (aom_codec_pts_t)aom_video_reader_get_frame_pts(reader);
- aom_tile_data frame_header_info = { 0, NULL, 0 };
-
- // Need to decode frame header to get camera frame header info. So, here
- // decoding 1 tile is enough.
- aom_codec_control_(&codec, AV1_SET_DECODE_TILE_ROW, 0);
- aom_codec_control_(&codec, AV1_SET_DECODE_TILE_COL, 0);
-
- aom_codec_err_t aom_status =
- aom_codec_decode(&codec, frame, frame_size, NULL);
- if (aom_status) die_codec(&codec, "Failed to decode tile.");
-
- aom_codec_control_(&codec, AV1D_GET_FRAME_HEADER_INFO, &frame_header_info);
-
- size_t obu_size_offset =
- (uint8_t *)frame_header_info.coded_tile_data - frame;
- size_t length_field_size = frame_header_info.coded_tile_data_size;
- // Remove ext-tile tile info.
- uint32_t frame_header_size = (uint32_t)frame_header_info.extra_size - 1;
- size_t bytes_to_copy =
- obu_size_offset + length_field_size + frame_header_size;
-
- unsigned char *frame_hdr_buf = (unsigned char *)malloc(bytes_to_copy);
- if (frame_hdr_buf == NULL)
- die_codec(&codec, "Failed to allocate frame header buffer.");
-
- memcpy(frame_hdr_buf, frame, bytes_to_copy);
-
- // Update frame header OBU size.
- size_t bytes_written = 0;
- if (aom_uleb_encode_fixed_size(
- frame_header_size, length_field_size, length_field_size,
- frame_hdr_buf + obu_size_offset, &bytes_written))
- die_codec(&codec, "Failed to encode the tile list obu size.");
-
- // Copy camera frame header bitstream.
- if (!aom_video_writer_write_frame(writer, frame_hdr_buf, bytes_to_copy,
- pts))
- die_codec(&codec, "Failed to copy compressed camera frame header.");
- free(frame_hdr_buf);
- }
-
- // Read out the image format.
- aom_img_fmt_t ref_fmt = 0;
- if (aom_codec_control(&codec, AV1D_GET_IMG_FORMAT, &ref_fmt))
- die_codec(&codec, "Failed to get the image format");
- const int bps = get_image_bps(ref_fmt);
- if (!bps) die_codec(&codec, "Invalid image format.");
- // read out the tile size.
- unsigned int tile_size = 0;
- if (aom_codec_control(&codec, AV1D_GET_TILE_SIZE, &tile_size))
- die_codec(&codec, "Failed to get the tile size");
- const unsigned int tile_width = tile_size >> 16;
- const unsigned int tile_height = tile_size & 65535;
- // Allocate a buffer to store tile list bitstream.
- const size_t data_sz = MAX_TILES * ALIGN_POWER_OF_TWO(tile_width, 5) *
- ALIGN_POWER_OF_TWO(tile_height, 5) * bps / 8;
- unsigned char *tl_buf = (unsigned char *)malloc(data_sz);
- if (tl_buf == NULL) die_codec(&codec, "Failed to allocate tile list buffer.");
-
- aom_codec_pts_t tl_pts = pts;
-
- // Process 1 tile list.
- for (n = 0; n < num_tile_lists; n++) {
- unsigned char *tl = tl_buf;
- struct aom_write_bit_buffer wb = { tl, 0 };
- unsigned char *saved_obu_size_loc = NULL;
- uint32_t tile_list_obu_header_size = 0;
- uint32_t tile_list_obu_size = 0;
-
- // Write the tile list OBU header that is 1 byte long.
- aom_wb_write_literal(&wb, 0, 1); // forbidden bit.
- aom_wb_write_literal(&wb, 8, 4); // tile list OBU: "1000"
- aom_wb_write_literal(&wb, 0, 1); // obu_extension = 0
- aom_wb_write_literal(&wb, 1, 1); // obu_has_size_field
- aom_wb_write_literal(&wb, 0, 1); // reserved
- tl++;
- tile_list_obu_header_size++;
-
- // Write the OBU size using a fixed length_field_size of 4 bytes.
- saved_obu_size_loc = tl;
- // aom_wb_write_unsigned_literal(&wb, data, bits) requires that bits <= 32.
- aom_wb_write_unsigned_literal(&wb, 0, 32);
- tl += 4;
- tile_list_obu_header_size += 4;
-
- // write_tile_list_obu()
- aom_wb_write_literal(&wb, output_frame_width_in_tiles_minus_1, 8);
- aom_wb_write_literal(&wb, output_frame_height_in_tiles_minus_1, 8);
- aom_wb_write_literal(&wb, tile_count_minus_1, 16);
- tl += 4;
- tile_list_obu_size += 4;
-
- // Write each tile's data
- for (i = 0; i <= tile_count_minus_1; i++) {
- aom_tile_data tile_data = { 0, NULL, 0 };
-
- int image_idx = tile_list[n][i].image_idx;
- int ref_idx = tile_list[n][i].reference_idx;
- int tc = tile_list[n][i].tile_col;
- int tr = tile_list[n][i].tile_row;
- int frame_cnt = -1;
-
- // Reset bit writer to the right location.
- wb.bit_buffer = tl;
- wb.bit_offset = 0;
-
- // Seek to the first camera image.
- fseeko(infile, camera_frame_pos, SEEK_SET);
-
- // Read out the camera image
- while (frame_cnt != image_idx) {
- aom_video_reader_read_frame(reader);
- frame_cnt++;
- }
-
- size_t frame_size = 0;
- const unsigned char *frame =
- aom_video_reader_get_frame(reader, &frame_size);
-
- aom_codec_control_(&codec, AV1_SET_DECODE_TILE_ROW, tr);
- aom_codec_control_(&codec, AV1_SET_DECODE_TILE_COL, tc);
-
- aom_codec_err_t aom_status =
- aom_codec_decode(&codec, frame, frame_size, NULL);
- if (aom_status) die_codec(&codec, "Failed to decode tile.");
-
- aom_codec_control_(&codec, AV1D_GET_TILE_DATA, &tile_data);
-
- // Copy over tile info.
- // uint8_t anchor_frame_idx;
- // uint8_t tile_row;
- // uint8_t tile_col;
- // uint16_t coded_tile_data_size_minus_1;
- // uint8_t *coded_tile_data;
- uint32_t tile_info_bytes = 5;
- aom_wb_write_literal(&wb, ref_idx, 8);
- aom_wb_write_literal(&wb, tr, 8);
- aom_wb_write_literal(&wb, tc, 8);
- aom_wb_write_literal(&wb, (int)tile_data.coded_tile_data_size - 1, 16);
- tl += tile_info_bytes;
-
- memcpy(tl, (uint8_t *)tile_data.coded_tile_data,
- tile_data.coded_tile_data_size);
- tl += tile_data.coded_tile_data_size;
-
- tile_list_obu_size +=
- tile_info_bytes + (uint32_t)tile_data.coded_tile_data_size;
- }
-
- // Write tile list OBU size.
- size_t bytes_written = 0;
- if (aom_uleb_encode_fixed_size(tile_list_obu_size, 4, 4, saved_obu_size_loc,
- &bytes_written))
- die_codec(&codec, "Failed to encode the tile list obu size.");
-
- // Copy the tile list.
- if (!aom_video_writer_write_frame(
- writer, tl_buf, tile_list_obu_header_size + tile_list_obu_size,
- tl_pts))
- die_codec(&codec, "Failed to copy compressed tile list.");
-
- tl_pts++;
- }
-
- free(tl_buf);
- if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec");
- aom_video_writer_close(writer);
- aom_video_reader_close(reader);
-
- return EXIT_SUCCESS;
-}
diff --git a/third_party/aom/examples/lightfield_decoder.c b/third_party/aom/examples/lightfield_decoder.c
deleted file mode 100644
index f5e54db7f..000000000
--- a/third_party/aom/examples/lightfield_decoder.c
+++ /dev/null
@@ -1,208 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-// Lightfield Decoder
-// ==================
-//
-// This is an example of a simple lightfield decoder. It builds upon the
-// simple_decoder.c example. It takes an input file containing the compressed
-// data (in ivf format), treating it as a lightfield instead of a video.
-// After running the lightfield encoder, run lightfield decoder to decode a
-// batch of tiles:
-// examples/lightfield_decoder vase10x10.ivf vase_reference.yuv 4
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "aom/aom_decoder.h"
-#include "aom/aomdx.h"
-#include "aom_scale/yv12config.h"
-#include "av1/common/enums.h"
-#include "common/tools_common.h"
-#include "common/video_reader.h"
-
-static const char *exec_name;
-
-void usage_exit(void) {
- fprintf(stderr, "Usage: %s <infile> <outfile> <num_references>\n", exec_name);
- exit(EXIT_FAILURE);
-}
-
-// Tile list entry provided by the application
-typedef struct {
- int image_idx;
- int reference_idx;
- int tile_col;
- int tile_row;
-} TILE_LIST_INFO;
-
-// M references: 0 - M-1; N images(including references): 0 - N-1;
-// Note: order the image index incrementally, so that we only go through the
-// bitstream once to construct the tile list.
-const int num_tile_lists = 2;
-const uint16_t tile_count_minus_1 = 9 - 1;
-const TILE_LIST_INFO tile_list[2][9] = {
- { { 16, 0, 4, 5 },
- { 83, 3, 13, 2 },
- { 57, 2, 2, 6 },
- { 31, 1, 11, 5 },
- { 2, 0, 7, 4 },
- { 77, 3, 9, 9 },
- { 49, 1, 0, 1 },
- { 6, 0, 3, 10 },
- { 63, 2, 5, 8 } },
- { { 65, 2, 11, 1 },
- { 42, 1, 3, 7 },
- { 88, 3, 8, 4 },
- { 76, 3, 1, 15 },
- { 1, 0, 2, 2 },
- { 19, 0, 5, 6 },
- { 60, 2, 4, 0 },
- { 25, 1, 11, 15 },
- { 50, 2, 5, 4 } },
-};
-
-int main(int argc, char **argv) {
- FILE *outfile = NULL;
- aom_codec_ctx_t codec;
- AvxVideoReader *reader = NULL;
- const AvxInterface *decoder = NULL;
- const AvxVideoInfo *info = NULL;
- int num_references;
- aom_image_t reference_images[MAX_EXTERNAL_REFERENCES];
- size_t frame_size = 0;
- const unsigned char *frame = NULL;
- int n, i, j;
- exec_name = argv[0];
-
- if (argc != 4) die("Invalid number of arguments.");
-
- reader = aom_video_reader_open(argv[1]);
- if (!reader) die("Failed to open %s for reading.", argv[1]);
-
- if (!(outfile = fopen(argv[2], "wb")))
- die("Failed to open %s for writing.", argv[2]);
-
- num_references = (int)strtol(argv[3], NULL, 0);
-
- info = aom_video_reader_get_info(reader);
-
- decoder = get_aom_decoder_by_fourcc(info->codec_fourcc);
- if (!decoder) die("Unknown input codec.");
- printf("Using %s\n", aom_codec_iface_name(decoder->codec_interface()));
-
- if (aom_codec_dec_init(&codec, decoder->codec_interface(), NULL, 0))
- die_codec(&codec, "Failed to initialize decoder.");
-
- if (aom_codec_control(&codec, AV1D_SET_IS_ANNEXB, info->is_annexb)) {
- die("Failed to set annex b status");
- }
-
- // Decode anchor frames.
- aom_codec_control_(&codec, AV1_SET_TILE_MODE, 0);
- for (i = 0; i < num_references; ++i) {
- aom_video_reader_read_frame(reader);
- frame = aom_video_reader_get_frame(reader, &frame_size);
- if (aom_codec_decode(&codec, frame, frame_size, NULL))
- die_codec(&codec, "Failed to decode frame.");
-
- if (i == 0) {
- aom_img_fmt_t ref_fmt = 0;
- if (aom_codec_control(&codec, AV1D_GET_IMG_FORMAT, &ref_fmt))
- die_codec(&codec, "Failed to get the image format");
-
- int frame_res[2];
- if (aom_codec_control(&codec, AV1D_GET_FRAME_SIZE, frame_res))
- die_codec(&codec, "Failed to get the image frame size");
-
- // Allocate memory to store decoded references. Allocate memory with the
- // border so that it can be used as a reference.
- for (j = 0; j < num_references; j++) {
- unsigned int border = AOM_BORDER_IN_PIXELS;
- if (!aom_img_alloc_with_border(&reference_images[j], ref_fmt,
- frame_res[0], frame_res[1], 32, 8,
- border)) {
- die("Failed to allocate references.");
- }
- }
- }
-
- if (aom_codec_control(&codec, AV1_COPY_NEW_FRAME_IMAGE,
- &reference_images[i]))
- die_codec(&codec, "Failed to copy decoded reference frame");
-
- aom_codec_iter_t iter = NULL;
- aom_image_t *img = NULL;
- while ((img = aom_codec_get_frame(&codec, &iter)) != NULL) {
- char name[1024];
- snprintf(name, sizeof(name), "ref_%d.yuv", i);
- printf("writing ref image to %s, %d, %d\n", name, img->d_w, img->d_h);
- FILE *ref_file = fopen(name, "wb");
- aom_img_write(img, ref_file);
- fclose(ref_file);
- }
- }
-
- FILE *infile = aom_video_reader_get_file(reader);
- // Record the offset of the first camera image.
- const FileOffset camera_frame_pos = ftello(infile);
-
- // Process 1 tile.
- for (n = 0; n < num_tile_lists; n++) {
- for (i = 0; i <= tile_count_minus_1; i++) {
- int image_idx = tile_list[n][i].image_idx;
- int ref_idx = tile_list[n][i].reference_idx;
- int tc = tile_list[n][i].tile_col;
- int tr = tile_list[n][i].tile_row;
- int frame_cnt = -1;
-
- // Seek to the first camera image.
- fseeko(infile, camera_frame_pos, SEEK_SET);
-
- // Read out the camera image
- while (frame_cnt != image_idx) {
- aom_video_reader_read_frame(reader);
- frame_cnt++;
- }
-
- frame = aom_video_reader_get_frame(reader, &frame_size);
-
- aom_codec_control_(&codec, AV1_SET_TILE_MODE, 1);
- aom_codec_control_(&codec, AV1D_EXT_TILE_DEBUG, 1);
- aom_codec_control_(&codec, AV1_SET_DECODE_TILE_ROW, tr);
- aom_codec_control_(&codec, AV1_SET_DECODE_TILE_COL, tc);
-
- av1_ref_frame_t ref;
- ref.idx = 0;
- ref.use_external_ref = 1;
- ref.img = reference_images[ref_idx];
- if (aom_codec_control(&codec, AV1_SET_REFERENCE, &ref)) {
- die_codec(&codec, "Failed to set reference frame.");
- }
-
- aom_codec_err_t aom_status =
- aom_codec_decode(&codec, frame, frame_size, NULL);
- if (aom_status) die_codec(&codec, "Failed to decode tile.");
-
- aom_codec_iter_t iter = NULL;
- aom_image_t *img = aom_codec_get_frame(&codec, &iter);
- aom_img_write(img, outfile);
- }
- }
-
- for (i = 0; i < num_references; i++) aom_img_free(&reference_images[i]);
- if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec");
- aom_video_reader_close(reader);
- fclose(outfile);
-
- return EXIT_SUCCESS;
-}
diff --git a/third_party/aom/examples/lightfield_encoder.c b/third_party/aom/examples/lightfield_encoder.c
deleted file mode 100644
index e55cd5ce3..000000000
--- a/third_party/aom/examples/lightfield_encoder.c
+++ /dev/null
@@ -1,499 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-// Lightfield Encoder
-// ==================
-//
-// This is an example of a simple lightfield encoder. It builds upon the
-// twopass_encoder.c example. It takes an input file in YV12 format,
-// treating it as a planar lightfield instead of a video. The img_width
-// and img_height arguments are the dimensions of the lightfield images,
-// while the lf_width and lf_height arguments are the number of
-// lightfield images in each dimension. The lf_blocksize determines the
-// number of reference images used for MCP. For example, 5 means that there
-// is a reference image for every 5x5 lightfield image block. All images
-// within a block will use the center image in that block as the reference
-// image for MCP.
-// Run "make test" to download lightfield test data: vase10x10.yuv.
-// Run lightfield encoder to encode whole lightfield:
-// examples/lightfield_encoder 1024 1024 vase10x10.yuv vase10x10.ivf 10 10 5
-
-// Note: In bitstream.c and encoder.c, define EXT_TILE_DEBUG as 1 will print
-// out the uncompressed header and the frame contexts, which can be used to
-// test the bit exactness of the headers and the frame contexts for large scale
-// tile coded frames.
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "aom/aom_encoder.h"
-#include "aom/aomcx.h"
-#include "aom_scale/yv12config.h"
-#include "av1/common/enums.h"
-#include "common/tools_common.h"
-#include "common/video_writer.h"
-
-static const char *exec_name;
-
-void usage_exit(void) {
- fprintf(stderr,
- "Usage: %s <img_width> <img_height> <infile> <outfile> "
- "<lf_width> <lf_height> <lf_blocksize>\n",
- exec_name);
- exit(EXIT_FAILURE);
-}
-
-static int aom_img_size_bytes(aom_image_t *img) {
- int image_size_bytes = 0;
- int plane;
- for (plane = 0; plane < 3; ++plane) {
- const int w = aom_img_plane_width(img, plane) *
- ((img->fmt & AOM_IMG_FMT_HIGHBITDEPTH) ? 2 : 1);
- const int h = aom_img_plane_height(img, plane);
- image_size_bytes += w * h;
- }
- return image_size_bytes;
-}
-
-static int get_frame_stats(aom_codec_ctx_t *ctx, const aom_image_t *img,
- aom_codec_pts_t pts, unsigned int duration,
- aom_enc_frame_flags_t flags,
- aom_fixed_buf_t *stats) {
- int got_pkts = 0;
- aom_codec_iter_t iter = NULL;
- const aom_codec_cx_pkt_t *pkt = NULL;
- const aom_codec_err_t res = aom_codec_encode(ctx, img, pts, duration, flags);
- if (res != AOM_CODEC_OK) die_codec(ctx, "Failed to get frame stats.");
-
- while ((pkt = aom_codec_get_cx_data(ctx, &iter)) != NULL) {
- got_pkts = 1;
-
- if (pkt->kind == AOM_CODEC_STATS_PKT) {
- const uint8_t *const pkt_buf = pkt->data.twopass_stats.buf;
- const size_t pkt_size = pkt->data.twopass_stats.sz;
- stats->buf = realloc(stats->buf, stats->sz + pkt_size);
- memcpy((uint8_t *)stats->buf + stats->sz, pkt_buf, pkt_size);
- stats->sz += pkt_size;
- }
- }
-
- return got_pkts;
-}
-
-static int encode_frame(aom_codec_ctx_t *ctx, const aom_image_t *img,
- aom_codec_pts_t pts, unsigned int duration,
- aom_enc_frame_flags_t flags, AvxVideoWriter *writer) {
- int got_pkts = 0;
- aom_codec_iter_t iter = NULL;
- const aom_codec_cx_pkt_t *pkt = NULL;
- const aom_codec_err_t res = aom_codec_encode(ctx, img, pts, duration, flags);
- if (res != AOM_CODEC_OK) die_codec(ctx, "Failed to encode frame.");
-
- while ((pkt = aom_codec_get_cx_data(ctx, &iter)) != NULL) {
- got_pkts = 1;
- if (pkt->kind == AOM_CODEC_CX_FRAME_PKT) {
- const int keyframe = (pkt->data.frame.flags & AOM_FRAME_IS_KEY) != 0;
-
- if (!aom_video_writer_write_frame(writer, pkt->data.frame.buf,
- pkt->data.frame.sz,
- pkt->data.frame.pts))
- die_codec(ctx, "Failed to write compressed frame.");
- printf(keyframe ? "K" : ".");
- fflush(stdout);
- }
- }
-
- return got_pkts;
-}
-
-static void get_raw_image(aom_image_t **frame_to_encode, aom_image_t *raw,
- aom_image_t *raw_shift) {
- if (!CONFIG_LOWBITDEPTH) {
- // Need to allocate larger buffer to use hbd internal.
- int input_shift = 0;
- aom_img_upshift(raw_shift, raw, input_shift);
- *frame_to_encode = raw_shift;
- } else {
- *frame_to_encode = raw;
- }
-}
-
-static aom_fixed_buf_t pass0(aom_image_t *raw, FILE *infile,
- const AvxInterface *encoder,
- const aom_codec_enc_cfg_t *cfg, int lf_width,
- int lf_height, int lf_blocksize, int flags,
- aom_image_t *raw_shift) {
- aom_codec_ctx_t codec;
- int frame_count = 0;
- int image_size_bytes = aom_img_size_bytes(raw);
- int u_blocks, v_blocks;
- int bu, bv;
- aom_fixed_buf_t stats = { NULL, 0 };
- aom_image_t *frame_to_encode;
-
- if (aom_codec_enc_init(&codec, encoder->codec_interface(), cfg, flags))
- die_codec(&codec, "Failed to initialize encoder");
- if (aom_codec_control(&codec, AOME_SET_ENABLEAUTOALTREF, 0))
- die_codec(&codec, "Failed to turn off auto altref");
- if (aom_codec_control(&codec, AV1E_SET_FRAME_PARALLEL_DECODING, 0))
- die_codec(&codec, "Failed to set frame parallel decoding");
-
- // How many reference images we need to encode.
- u_blocks = (lf_width + lf_blocksize - 1) / lf_blocksize;
- v_blocks = (lf_height + lf_blocksize - 1) / lf_blocksize;
-
- printf("\n First pass: ");
-
- for (bv = 0; bv < v_blocks; ++bv) {
- for (bu = 0; bu < u_blocks; ++bu) {
- const int block_u_min = bu * lf_blocksize;
- const int block_v_min = bv * lf_blocksize;
- int block_u_end = (bu + 1) * lf_blocksize;
- int block_v_end = (bv + 1) * lf_blocksize;
- int u_block_size, v_block_size;
- int block_ref_u, block_ref_v;
-
- block_u_end = block_u_end < lf_width ? block_u_end : lf_width;
- block_v_end = block_v_end < lf_height ? block_v_end : lf_height;
- u_block_size = block_u_end - block_u_min;
- v_block_size = block_v_end - block_v_min;
- block_ref_u = block_u_min + u_block_size / 2;
- block_ref_v = block_v_min + v_block_size / 2;
-
- printf("A%d, ", (block_ref_u + block_ref_v * lf_width));
- fseek(infile, (block_ref_u + block_ref_v * lf_width) * image_size_bytes,
- SEEK_SET);
- aom_img_read(raw, infile);
- get_raw_image(&frame_to_encode, raw, raw_shift);
-
- // Reference frames can be encoded encoded without tiles.
- ++frame_count;
- get_frame_stats(&codec, frame_to_encode, frame_count, 1,
- AOM_EFLAG_NO_REF_LAST2 | AOM_EFLAG_NO_REF_LAST3 |
- AOM_EFLAG_NO_REF_GF | AOM_EFLAG_NO_REF_ARF |
- AOM_EFLAG_NO_REF_BWD | AOM_EFLAG_NO_REF_ARF2 |
- AOM_EFLAG_NO_UPD_LAST | AOM_EFLAG_NO_UPD_GF |
- AOM_EFLAG_NO_UPD_ARF,
- &stats);
- }
- }
-
- if (aom_codec_control(&codec, AV1E_SET_FRAME_PARALLEL_DECODING, 1))
- die_codec(&codec, "Failed to set frame parallel decoding");
-
- for (bv = 0; bv < v_blocks; ++bv) {
- for (bu = 0; bu < u_blocks; ++bu) {
- const int block_u_min = bu * lf_blocksize;
- const int block_v_min = bv * lf_blocksize;
- int block_u_end = (bu + 1) * lf_blocksize;
- int block_v_end = (bv + 1) * lf_blocksize;
- int u, v;
- block_u_end = block_u_end < lf_width ? block_u_end : lf_width;
- block_v_end = block_v_end < lf_height ? block_v_end : lf_height;
- for (v = block_v_min; v < block_v_end; ++v) {
- for (u = block_u_min; u < block_u_end; ++u) {
- printf("C%d, ", (u + v * lf_width));
- fseek(infile, (u + v * lf_width) * image_size_bytes, SEEK_SET);
- aom_img_read(raw, infile);
- get_raw_image(&frame_to_encode, raw, raw_shift);
-
- ++frame_count;
- get_frame_stats(&codec, frame_to_encode, frame_count, 1,
- AOM_EFLAG_NO_REF_LAST2 | AOM_EFLAG_NO_REF_LAST3 |
- AOM_EFLAG_NO_REF_GF | AOM_EFLAG_NO_REF_ARF |
- AOM_EFLAG_NO_REF_BWD | AOM_EFLAG_NO_REF_ARF2 |
- AOM_EFLAG_NO_UPD_LAST | AOM_EFLAG_NO_UPD_GF |
- AOM_EFLAG_NO_UPD_ARF | AOM_EFLAG_NO_UPD_ENTROPY,
- &stats);
- }
- }
- }
- }
- // Flush encoder.
- // No ARF, this should not be needed.
- while (get_frame_stats(&codec, NULL, frame_count, 1, 0, &stats)) {
- }
-
- if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec.");
-
- printf("\nFirst pass complete. Processed %d frames.\n", frame_count);
-
- return stats;
-}
-
-static void pass1(aom_image_t *raw, FILE *infile, const char *outfile_name,
- const AvxInterface *encoder, aom_codec_enc_cfg_t *cfg,
- int lf_width, int lf_height, int lf_blocksize, int flags,
- aom_image_t *raw_shift) {
- AvxVideoInfo info = { encoder->fourcc,
- cfg->g_w,
- cfg->g_h,
- { cfg->g_timebase.num, cfg->g_timebase.den },
- 0 };
- AvxVideoWriter *writer = NULL;
- aom_codec_ctx_t codec;
- int frame_count = 0;
- int image_size_bytes = aom_img_size_bytes(raw);
- int bu, bv;
- int u_blocks, v_blocks;
- aom_image_t *frame_to_encode;
- aom_image_t reference_images[MAX_EXTERNAL_REFERENCES];
- int reference_image_num = 0;
- int i;
-
- writer = aom_video_writer_open(outfile_name, kContainerIVF, &info);
- if (!writer) die("Failed to open %s for writing", outfile_name);
-
- if (aom_codec_enc_init(&codec, encoder->codec_interface(), cfg, flags))
- die_codec(&codec, "Failed to initialize encoder");
- if (aom_codec_control(&codec, AOME_SET_ENABLEAUTOALTREF, 0))
- die_codec(&codec, "Failed to turn off auto altref");
- if (aom_codec_control(&codec, AV1E_SET_FRAME_PARALLEL_DECODING, 0))
- die_codec(&codec, "Failed to set frame parallel decoding");
- // Note: The superblock is a sequence parameter and has to be the same for 1
- // sequence. In lightfield application, must choose the superblock size(either
- // 64x64 or 128x128) before the encoding starts. Otherwise, the default is
- // AOM_SUPERBLOCK_SIZE_DYNAMIC, and the superblock size will be set to 64x64
- // internally.
- if (aom_codec_control(&codec, AV1E_SET_SUPERBLOCK_SIZE,
- AOM_SUPERBLOCK_SIZE_64X64))
- die_codec(&codec, "Failed to set SB size");
-
- u_blocks = (lf_width + lf_blocksize - 1) / lf_blocksize;
- v_blocks = (lf_height + lf_blocksize - 1) / lf_blocksize;
-
- reference_image_num = u_blocks * v_blocks;
- aom_img_fmt_t ref_fmt = AOM_IMG_FMT_I420;
- if (!CONFIG_LOWBITDEPTH) ref_fmt |= AOM_IMG_FMT_HIGHBITDEPTH;
- // Allocate memory with the border so that it can be used as a reference.
- for (i = 0; i < reference_image_num; i++) {
- if (!aom_img_alloc_with_border(&reference_images[i], ref_fmt, cfg->g_w,
- cfg->g_h, 32, 8, AOM_BORDER_IN_PIXELS)) {
- die("Failed to allocate image.");
- }
- }
-
- printf("\n Second pass: ");
-
- // Encode reference images first.
- printf("Encoding Reference Images\n");
- for (bv = 0; bv < v_blocks; ++bv) {
- for (bu = 0; bu < u_blocks; ++bu) {
- const int block_u_min = bu * lf_blocksize;
- const int block_v_min = bv * lf_blocksize;
- int block_u_end = (bu + 1) * lf_blocksize;
- int block_v_end = (bv + 1) * lf_blocksize;
- int u_block_size, v_block_size;
- int block_ref_u, block_ref_v;
-
- block_u_end = block_u_end < lf_width ? block_u_end : lf_width;
- block_v_end = block_v_end < lf_height ? block_v_end : lf_height;
- u_block_size = block_u_end - block_u_min;
- v_block_size = block_v_end - block_v_min;
- block_ref_u = block_u_min + u_block_size / 2;
- block_ref_v = block_v_min + v_block_size / 2;
-
- printf("A%d, ", (block_ref_u + block_ref_v * lf_width));
- fseek(infile, (block_ref_u + block_ref_v * lf_width) * image_size_bytes,
- SEEK_SET);
- aom_img_read(raw, infile);
-
- get_raw_image(&frame_to_encode, raw, raw_shift);
-
- // Reference frames may be encoded without tiles.
- ++frame_count;
- printf("Encoding reference image %d of %d\n", bv * u_blocks + bu,
- u_blocks * v_blocks);
- encode_frame(&codec, frame_to_encode, frame_count, 1,
- AOM_EFLAG_NO_REF_LAST2 | AOM_EFLAG_NO_REF_LAST3 |
- AOM_EFLAG_NO_REF_GF | AOM_EFLAG_NO_REF_ARF |
- AOM_EFLAG_NO_REF_BWD | AOM_EFLAG_NO_REF_ARF2 |
- AOM_EFLAG_NO_UPD_LAST | AOM_EFLAG_NO_UPD_GF |
- AOM_EFLAG_NO_UPD_ARF | AOM_EFLAG_NO_UPD_ENTROPY,
- writer);
-
- if (aom_codec_control(&codec, AV1_COPY_NEW_FRAME_IMAGE,
- &reference_images[frame_count - 1]))
- die_codec(&codec, "Failed to copy decoder reference frame");
- }
- }
-
- cfg->large_scale_tile = 1;
- // Fixed q encoding for camera frames.
- cfg->rc_end_usage = AOM_Q;
- if (aom_codec_enc_config_set(&codec, cfg))
- die_codec(&codec, "Failed to configure encoder");
-
- // The fixed q value used in encoding.
- if (aom_codec_control(&codec, AOME_SET_CQ_LEVEL, 36))
- die_codec(&codec, "Failed to set cq level");
- if (aom_codec_control(&codec, AV1E_SET_FRAME_PARALLEL_DECODING, 1))
- die_codec(&codec, "Failed to set frame parallel decoding");
- if (aom_codec_control(&codec, AV1E_SET_SINGLE_TILE_DECODING, 1))
- die_codec(&codec, "Failed to turn on single tile decoding");
- // Set tile_columns and tile_rows to MAX values, which guarantees the tile
- // size of 64 x 64 pixels(i.e. 1 SB) for <= 4k resolution.
- if (aom_codec_control(&codec, AV1E_SET_TILE_COLUMNS, 6))
- die_codec(&codec, "Failed to set tile width");
- if (aom_codec_control(&codec, AV1E_SET_TILE_ROWS, 6))
- die_codec(&codec, "Failed to set tile height");
-
- for (bv = 0; bv < v_blocks; ++bv) {
- for (bu = 0; bu < u_blocks; ++bu) {
- const int block_u_min = bu * lf_blocksize;
- const int block_v_min = bv * lf_blocksize;
- int block_u_end = (bu + 1) * lf_blocksize;
- int block_v_end = (bv + 1) * lf_blocksize;
- int u, v;
- block_u_end = block_u_end < lf_width ? block_u_end : lf_width;
- block_v_end = block_v_end < lf_height ? block_v_end : lf_height;
- for (v = block_v_min; v < block_v_end; ++v) {
- for (u = block_u_min; u < block_u_end; ++u) {
- av1_ref_frame_t ref;
- ref.idx = 0;
- ref.use_external_ref = 1;
- ref.img = reference_images[bv * u_blocks + bu];
- if (aom_codec_control(&codec, AV1_SET_REFERENCE, &ref))
- die_codec(&codec, "Failed to set reference frame");
-
- printf("C%d, ", (u + v * lf_width));
- fseek(infile, (u + v * lf_width) * image_size_bytes, SEEK_SET);
- aom_img_read(raw, infile);
- get_raw_image(&frame_to_encode, raw, raw_shift);
-
- ++frame_count;
- printf("Encoding image %d of %d\n",
- frame_count - (u_blocks * v_blocks), lf_width * lf_height);
- encode_frame(&codec, frame_to_encode, frame_count, 1,
- AOM_EFLAG_NO_REF_LAST2 | AOM_EFLAG_NO_REF_LAST3 |
- AOM_EFLAG_NO_REF_GF | AOM_EFLAG_NO_REF_ARF |
- AOM_EFLAG_NO_REF_BWD | AOM_EFLAG_NO_REF_ARF2 |
- AOM_EFLAG_NO_UPD_LAST | AOM_EFLAG_NO_UPD_GF |
- AOM_EFLAG_NO_UPD_ARF | AOM_EFLAG_NO_UPD_ENTROPY,
- writer);
- }
- }
- }
- }
-
- // Flush encoder.
- // No ARF, this should not be needed.
- while (encode_frame(&codec, NULL, -1, 1, 0, writer)) {
- }
-
- for (i = 0; i < reference_image_num; i++) aom_img_free(&reference_images[i]);
-
- if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec.");
- aom_video_writer_close(writer);
-
- printf("\nSecond pass complete. Processed %d frames.\n", frame_count);
-}
-
-int main(int argc, char **argv) {
- FILE *infile = NULL;
- int w, h;
- // The number of lightfield images in the u and v dimensions.
- int lf_width, lf_height;
- // Defines how many images refer to the same reference image for MCP.
- // lf_blocksize X lf_blocksize images will all use the reference image
- // in the middle of the block of images.
- int lf_blocksize;
- aom_codec_ctx_t codec;
- aom_codec_enc_cfg_t cfg;
- aom_image_t raw;
- aom_image_t raw_shift;
- aom_codec_err_t res;
- aom_fixed_buf_t stats;
- int flags = 0;
-
- const AvxInterface *encoder = NULL;
- const int fps = 30;
- const int bitrate = 200; // kbit/s
- const char *const width_arg = argv[1];
- const char *const height_arg = argv[2];
- const char *const infile_arg = argv[3];
- const char *const outfile_arg = argv[4];
- const char *const lf_width_arg = argv[5];
- const char *const lf_height_arg = argv[6];
- const char *lf_blocksize_arg = argv[7];
- exec_name = argv[0];
-
- if (argc < 8) die("Invalid number of arguments");
-
- encoder = get_aom_encoder_by_name("av1");
- if (!encoder) die("Unsupported codec.");
-
- w = (int)strtol(width_arg, NULL, 0);
- h = (int)strtol(height_arg, NULL, 0);
- lf_width = (int)strtol(lf_width_arg, NULL, 0);
- lf_height = (int)strtol(lf_height_arg, NULL, 0);
- lf_blocksize = (int)strtol(lf_blocksize_arg, NULL, 0);
- lf_blocksize = lf_blocksize < lf_width ? lf_blocksize : lf_width;
- lf_blocksize = lf_blocksize < lf_height ? lf_blocksize : lf_height;
-
- if (w <= 0 || h <= 0 || (w % 2) != 0 || (h % 2) != 0)
- die("Invalid frame size: %dx%d", w, h);
- if (lf_width <= 0 || lf_height <= 0)
- die("Invalid lf_width and/or lf_height: %dx%d", lf_width, lf_height);
- if (lf_blocksize <= 0) die("Invalid lf_blocksize: %d", lf_blocksize);
-
- if (!aom_img_alloc(&raw, AOM_IMG_FMT_I420, w, h, 32)) {
- die("Failed to allocate image.");
- }
- if (!CONFIG_LOWBITDEPTH) {
- // Need to allocate larger buffer to use hbd internal.
- aom_img_alloc(&raw_shift, AOM_IMG_FMT_I420 | AOM_IMG_FMT_HIGHBITDEPTH, w, h,
- 32);
- }
-
- printf("Using %s\n", aom_codec_iface_name(encoder->codec_interface()));
-
- // Configuration
- res = aom_codec_enc_config_default(encoder->codec_interface(), &cfg, 0);
- if (res) die_codec(&codec, "Failed to get default codec config.");
-
- cfg.g_w = w;
- cfg.g_h = h;
- cfg.g_timebase.num = 1;
- cfg.g_timebase.den = fps;
- cfg.rc_target_bitrate = bitrate;
- cfg.g_error_resilient = 0; // This is required.
- cfg.g_lag_in_frames = 0; // need to set this since default is 19.
- cfg.kf_mode = AOM_KF_DISABLED;
- cfg.large_scale_tile = 0; // Only set it to 1 for camera frame encoding.
- cfg.g_bit_depth = AOM_BITS_8;
- flags |= (cfg.g_bit_depth > AOM_BITS_8 || !CONFIG_LOWBITDEPTH)
- ? AOM_CODEC_USE_HIGHBITDEPTH
- : 0;
-
- if (!(infile = fopen(infile_arg, "rb")))
- die("Failed to open %s for reading", infile_arg);
-
- // Pass 0
- cfg.g_pass = AOM_RC_FIRST_PASS;
- stats = pass0(&raw, infile, encoder, &cfg, lf_width, lf_height, lf_blocksize,
- flags, &raw_shift);
-
- // Pass 1
- rewind(infile);
- cfg.g_pass = AOM_RC_LAST_PASS;
- cfg.rc_twopass_stats_in = stats;
- pass1(&raw, infile, outfile_arg, encoder, &cfg, lf_width, lf_height,
- lf_blocksize, flags, &raw_shift);
- free(stats.buf);
-
- if (!CONFIG_LOWBITDEPTH) aom_img_free(&raw_shift);
- aom_img_free(&raw);
- fclose(infile);
-
- return EXIT_SUCCESS;
-}
diff --git a/third_party/aom/examples/lightfield_tile_list_decoder.c b/third_party/aom/examples/lightfield_tile_list_decoder.c
deleted file mode 100644
index 5556bf0e7..000000000
--- a/third_party/aom/examples/lightfield_tile_list_decoder.c
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-// Lightfield Tile List Decoder
-// ============================
-//
-// This is a lightfield tile list decoder example. It takes an input file that
-// contains the anchor frames that are references of the coded tiles, the camera
-// frame header, and tile list OBUs that include the tile information and the
-// compressed tile data. This input file is reconstructed from the encoded
-// lightfield ivf file, and is decodable by AV1 decoder. num_references is
-// the number of anchor frames coded at the beginning of the light field file.
-// num_tile_lists is the number of tile lists need to be decoded.
-// Run lightfield tile list decoder to decode an AV1 tile list file:
-// examples/lightfield_tile_list_decoder vase_tile_list.ivf vase_tile_list.yuv
-// 4 2
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <assert.h>
-
-#include "aom/aom_decoder.h"
-#include "aom/aomdx.h"
-#include "aom_scale/yv12config.h"
-#include "av1/common/enums.h"
-#include "common/tools_common.h"
-#include "common/video_reader.h"
-
-static const char *exec_name;
-
-void usage_exit(void) {
- fprintf(stderr,
- "Usage: %s <infile> <outfile> <num_references> <num_tile_lists>\n",
- exec_name);
- exit(EXIT_FAILURE);
-}
-
-int main(int argc, char **argv) {
- FILE *outfile = NULL;
- aom_codec_ctx_t codec;
- AvxVideoReader *reader = NULL;
- const AvxInterface *decoder = NULL;
- const AvxVideoInfo *info = NULL;
- int num_references;
- int num_tile_lists;
- aom_image_t reference_images[MAX_EXTERNAL_REFERENCES];
- size_t frame_size = 0;
- const unsigned char *frame = NULL;
- int i, j, n;
-
- exec_name = argv[0];
-
- if (argc != 5) die("Invalid number of arguments.");
-
- reader = aom_video_reader_open(argv[1]);
- if (!reader) die("Failed to open %s for reading.", argv[1]);
-
- if (!(outfile = fopen(argv[2], "wb")))
- die("Failed to open %s for writing.", argv[2]);
-
- num_references = (int)strtol(argv[3], NULL, 0);
- num_tile_lists = (int)strtol(argv[4], NULL, 0);
-
- info = aom_video_reader_get_info(reader);
-
- decoder = get_aom_decoder_by_fourcc(info->codec_fourcc);
- if (!decoder) die("Unknown input codec.");
- printf("Using %s\n", aom_codec_iface_name(decoder->codec_interface()));
-
- if (aom_codec_dec_init(&codec, decoder->codec_interface(), NULL, 0))
- die_codec(&codec, "Failed to initialize decoder.");
-
- if (aom_codec_control(&codec, AV1D_SET_IS_ANNEXB, info->is_annexb)) {
- die("Failed to set annex b status");
- }
-
- // Decode anchor frames.
- aom_codec_control_(&codec, AV1_SET_TILE_MODE, 0);
- for (i = 0; i < num_references; ++i) {
- aom_video_reader_read_frame(reader);
- frame = aom_video_reader_get_frame(reader, &frame_size);
- if (aom_codec_decode(&codec, frame, frame_size, NULL))
- die_codec(&codec, "Failed to decode frame.");
-
- if (i == 0) {
- aom_img_fmt_t ref_fmt = 0;
- if (aom_codec_control(&codec, AV1D_GET_IMG_FORMAT, &ref_fmt))
- die_codec(&codec, "Failed to get the image format");
-
- int frame_res[2];
- if (aom_codec_control(&codec, AV1D_GET_FRAME_SIZE, frame_res))
- die_codec(&codec, "Failed to get the image frame size");
-
- // Allocate memory to store decoded references. Allocate memory with the
- // border so that it can be used as a reference.
- for (j = 0; j < num_references; j++) {
- unsigned int border = AOM_BORDER_IN_PIXELS;
- if (!aom_img_alloc_with_border(&reference_images[j], ref_fmt,
- frame_res[0], frame_res[1], 32, 8,
- border)) {
- die("Failed to allocate references.");
- }
- }
- }
-
- if (aom_codec_control(&codec, AV1_COPY_NEW_FRAME_IMAGE,
- &reference_images[i]))
- die_codec(&codec, "Failed to copy decoded reference frame");
-
- aom_codec_iter_t iter = NULL;
- aom_image_t *img = NULL;
- while ((img = aom_codec_get_frame(&codec, &iter)) != NULL) {
- char name[1024];
- snprintf(name, sizeof(name), "ref_%d.yuv", i);
- printf("writing ref image to %s, %d, %d\n", name, img->d_w, img->d_h);
- FILE *ref_file = fopen(name, "wb");
- aom_img_write(img, ref_file);
- fclose(ref_file);
- }
- }
-
- // Decode the lightfield.
- aom_codec_control_(&codec, AV1_SET_TILE_MODE, 1);
-
- // Set external references.
- av1_ext_ref_frame_t set_ext_ref = { &reference_images[0], num_references };
- aom_codec_control_(&codec, AV1D_SET_EXT_REF_PTR, &set_ext_ref);
- // Must decode the camera frame header first.
- aom_video_reader_read_frame(reader);
- frame = aom_video_reader_get_frame(reader, &frame_size);
- if (aom_codec_decode(&codec, frame, frame_size, NULL))
- die_codec(&codec, "Failed to decode the frame.");
- // Decode tile lists one by one.
- for (n = 0; n < num_tile_lists; n++) {
- aom_video_reader_read_frame(reader);
- frame = aom_video_reader_get_frame(reader, &frame_size);
-
- if (aom_codec_decode(&codec, frame, frame_size, NULL))
- die_codec(&codec, "Failed to decode the tile list.");
- aom_codec_iter_t iter = NULL;
- aom_image_t *img;
- while ((img = aom_codec_get_frame(&codec, &iter)))
- fwrite(img->img_data, 1, img->sz, outfile);
- }
-
- for (i = 0; i < num_references; i++) aom_img_free(&reference_images[i]);
- if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec");
- aom_video_reader_close(reader);
- fclose(outfile);
-
- return EXIT_SUCCESS;
-}
diff --git a/third_party/aom/examples/lossless_encoder.c b/third_party/aom/examples/lossless_encoder.c
deleted file mode 100644
index 438ff21c6..000000000
--- a/third_party/aom/examples/lossless_encoder.c
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "aom/aom_encoder.h"
-#include "aom/aomcx.h"
-#include "common/tools_common.h"
-#include "common/video_writer.h"
-
-static const char *exec_name;
-
-void usage_exit(void) {
- fprintf(stderr,
- "lossless_encoder: Example demonstrating lossless "
- "encoding feature. Supports raw input only.\n");
- fprintf(stderr, "Usage: %s <width> <height> <infile> <outfile>\n", exec_name);
- exit(EXIT_FAILURE);
-}
-
-static int encode_frame(aom_codec_ctx_t *codec, aom_image_t *img,
- int frame_index, int flags, AvxVideoWriter *writer) {
- int got_pkts = 0;
- aom_codec_iter_t iter = NULL;
- const aom_codec_cx_pkt_t *pkt = NULL;
- const aom_codec_err_t res =
- aom_codec_encode(codec, img, frame_index, 1, flags);
- if (res != AOM_CODEC_OK) die_codec(codec, "Failed to encode frame");
-
- while ((pkt = aom_codec_get_cx_data(codec, &iter)) != NULL) {
- got_pkts = 1;
-
- if (pkt->kind == AOM_CODEC_CX_FRAME_PKT) {
- const int keyframe = (pkt->data.frame.flags & AOM_FRAME_IS_KEY) != 0;
- if (!aom_video_writer_write_frame(writer, pkt->data.frame.buf,
- pkt->data.frame.sz,
- pkt->data.frame.pts)) {
- die_codec(codec, "Failed to write compressed frame");
- }
- printf(keyframe ? "K" : ".");
- fflush(stdout);
- }
- }
-
- return got_pkts;
-}
-
-int main(int argc, char **argv) {
- FILE *infile = NULL;
- aom_codec_ctx_t codec;
- aom_codec_enc_cfg_t cfg;
- int frame_count = 0;
- aom_image_t raw;
- aom_codec_err_t res;
- AvxVideoInfo info;
- AvxVideoWriter *writer = NULL;
- const AvxInterface *encoder = NULL;
- const int fps = 30;
-
- exec_name = argv[0];
-
- // Clear explicitly, as simply assigning "{ 0 }" generates
- // "missing-field-initializers" warning in some compilers.
- memset(&info, 0, sizeof(info));
-
- if (argc < 5) die("Invalid number of arguments");
-
- encoder = get_aom_encoder_by_name("av1");
- if (!encoder) die("Unsupported codec.");
-
- info.codec_fourcc = encoder->fourcc;
- info.frame_width = (int)strtol(argv[1], NULL, 0);
- info.frame_height = (int)strtol(argv[2], NULL, 0);
- info.time_base.numerator = 1;
- info.time_base.denominator = fps;
-
- if (info.frame_width <= 0 || info.frame_height <= 0 ||
- (info.frame_width % 2) != 0 || (info.frame_height % 2) != 0) {
- die("Invalid frame size: %dx%d", info.frame_width, info.frame_height);
- }
-
- if (!aom_img_alloc(&raw, AOM_IMG_FMT_I420, info.frame_width,
- info.frame_height, 1)) {
- die("Failed to allocate image.");
- }
-
- printf("Using %s\n", aom_codec_iface_name(encoder->codec_interface()));
-
- res = aom_codec_enc_config_default(encoder->codec_interface(), &cfg, 0);
- if (res) die_codec(&codec, "Failed to get default codec config.");
-
- cfg.g_w = info.frame_width;
- cfg.g_h = info.frame_height;
- cfg.g_timebase.num = info.time_base.numerator;
- cfg.g_timebase.den = info.time_base.denominator;
-
- writer = aom_video_writer_open(argv[4], kContainerIVF, &info);
- if (!writer) die("Failed to open %s for writing.", argv[4]);
-
- if (!(infile = fopen(argv[3], "rb")))
- die("Failed to open %s for reading.", argv[3]);
-
- if (aom_codec_enc_init(&codec, encoder->codec_interface(), &cfg, 0))
- die_codec(&codec, "Failed to initialize encoder");
-
- if (aom_codec_control_(&codec, AV1E_SET_LOSSLESS, 1))
- die_codec(&codec, "Failed to use lossless mode");
-
- // Encode frames.
- while (aom_img_read(&raw, infile)) {
- encode_frame(&codec, &raw, frame_count++, 0, writer);
- }
-
- // Flush encoder.
- while (encode_frame(&codec, NULL, -1, 0, writer)) {
- }
-
- printf("\n");
- fclose(infile);
- printf("Processed %d frames.\n", frame_count);
-
- aom_img_free(&raw);
- if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec.");
-
- aom_video_writer_close(writer);
-
- return EXIT_SUCCESS;
-}
diff --git a/third_party/aom/examples/noise_model.c b/third_party/aom/examples/noise_model.c
deleted file mode 100644
index 5cc6003b6..000000000
--- a/third_party/aom/examples/noise_model.c
+++ /dev/null
@@ -1,431 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-/*!\file
- * \brief This is an sample binary to create noise params from input video.
- *
- * To allow for external denoising applications, this sample binary illustrates
- * how to create a film grain table (film grain params as a function of time)
- * from an input video and its corresponding denoised source.
- *
- * The --output-grain-table file can be passed as input to the encoder (in
- * aomenc this is done through the "--film-grain-table" parameter).
- *
- * As an example, where the input source is an 854x480 yuv420p 8-bit video
- * named "input.854_480.yuv" you would use steps similar to the following:
- *
- * # Run your denoiser (e.g, using hqdn3d filter):
- * ffmpeg -vcodec rawvideo -video_size 854x480 -i input.854_480.yuv \
- * -vf hqdn3d=5:5:5:5 -vcodec rawvideo -an -f rawvideo \
- * denoised.854_480.yuv
- *
- * # Model the noise between the denoised version and original source:
- * ./examples/noise_model --fps=25/1 --width=854 --height=480 --i420 \
- * --input-denoised=denoised.854_480.yuv --input=original.854_480.yuv \
- * --output-grain-table=film_grain.tbl
- *
- * # Encode with your favorite settings (including the grain table):
- * aomenc --limit=100 --cpu-used=4 --input-bit-depth=8 \
- * --i420 -w 854 -h 480 --end-usage=q --cq-level=25 --lag-in-frames=25 \
- * --auto-alt-ref=2 --bit-depth=8 --film-grain-table=film_grain.tbl \
- * -o denoised_with_grain_params.ivf denoised.854_480.yuv
- */
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "aom/aom_encoder.h"
-#include "aom_dsp/aom_dsp_common.h"
-
-#if CONFIG_AV1_DECODER
-#include "aom_dsp/grain_synthesis.h"
-#endif
-
-#include "aom_dsp/grain_table.h"
-#include "aom_dsp/noise_model.h"
-#include "aom_dsp/noise_util.h"
-#include "aom_mem/aom_mem.h"
-#include "common/args.h"
-#include "common/tools_common.h"
-#include "common/video_writer.h"
-
-static const char *exec_name;
-
-void usage_exit(void) {
- fprintf(stderr,
- "Usage: %s --input=<input> --input-denoised=<denoised> "
- "--output-grain-table=<outfile> "
- "See comments in noise_model.c for more information.\n",
- exec_name);
- exit(EXIT_FAILURE);
-}
-
-static const arg_def_t help =
- ARG_DEF(NULL, "help", 0, "Show usage options and exit");
-static const arg_def_t width_arg =
- ARG_DEF("w", "width", 1, "Input width (if rawvideo)");
-static const arg_def_t height_arg =
- ARG_DEF("h", "height", 1, "Input height (if rawvideo)");
-static const arg_def_t skip_frames_arg =
- ARG_DEF("s", "skip-frames", 1, "Number of frames to skip (default = 1)");
-static const arg_def_t fps_arg = ARG_DEF(NULL, "fps", 1, "Frame rate");
-static const arg_def_t input_arg = ARG_DEF("-i", "input", 1, "Input filename");
-static const arg_def_t output_grain_table_arg =
- ARG_DEF("n", "output-grain-table", 1, "Output noise file");
-static const arg_def_t input_denoised_arg =
- ARG_DEF("d", "input-denoised", 1, "Input denoised filename (YUV) only");
-static const arg_def_t flat_block_finder_arg =
- ARG_DEF("b", "flat-block-finder", 1, "Run the flat block finder");
-static const arg_def_t block_size_arg =
- ARG_DEF("b", "block-size", 1, "Block size");
-static const arg_def_t bit_depth_arg =
- ARG_DEF(NULL, "bit-depth", 1, "Bit depth of input");
-static const arg_def_t use_i420 =
- ARG_DEF(NULL, "i420", 0, "Input file (and denoised) is I420 (default)");
-static const arg_def_t use_i422 =
- ARG_DEF(NULL, "i422", 0, "Input file (and denoised) is I422");
-static const arg_def_t use_i444 =
- ARG_DEF(NULL, "i444", 0, "Input file (and denoised) is I444");
-static const arg_def_t debug_file_arg =
- ARG_DEF(NULL, "debug-file", 1, "File to output debug info");
-
-typedef struct {
- int width;
- int height;
- struct aom_rational fps;
- const char *input;
- const char *input_denoised;
- const char *output_grain_table;
- int img_fmt;
- int block_size;
- int bit_depth;
- int run_flat_block_finder;
- int force_flat_psd;
- int skip_frames;
- const char *debug_file;
-} noise_model_args_t;
-
-void parse_args(noise_model_args_t *noise_args, int *argc, char **argv) {
- struct arg arg;
- static const arg_def_t *main_args[] = { &help,
- &input_arg,
- &fps_arg,
- &width_arg,
- &height_arg,
- &block_size_arg,
- &output_grain_table_arg,
- &input_denoised_arg,
- &use_i420,
- &use_i422,
- &use_i444,
- &debug_file_arg,
- NULL };
- for (int argi = *argc + 1; *argv; argi++, argv++) {
- if (arg_match(&arg, &help, argv)) {
- fprintf(stdout, "\nOptions:\n");
- arg_show_usage(stdout, main_args);
- exit(0);
- } else if (arg_match(&arg, &width_arg, argv)) {
- noise_args->width = atoi(arg.val);
- } else if (arg_match(&arg, &height_arg, argv)) {
- noise_args->height = atoi(arg.val);
- } else if (arg_match(&arg, &input_arg, argv)) {
- noise_args->input = arg.val;
- } else if (arg_match(&arg, &input_denoised_arg, argv)) {
- noise_args->input_denoised = arg.val;
- } else if (arg_match(&arg, &output_grain_table_arg, argv)) {
- noise_args->output_grain_table = arg.val;
- } else if (arg_match(&arg, &block_size_arg, argv)) {
- noise_args->block_size = atoi(arg.val);
- } else if (arg_match(&arg, &bit_depth_arg, argv)) {
- noise_args->bit_depth = atoi(arg.val);
- } else if (arg_match(&arg, &flat_block_finder_arg, argv)) {
- noise_args->run_flat_block_finder = atoi(arg.val);
- } else if (arg_match(&arg, &fps_arg, argv)) {
- noise_args->fps = arg_parse_rational(&arg);
- } else if (arg_match(&arg, &use_i420, argv)) {
- noise_args->img_fmt = AOM_IMG_FMT_I420;
- } else if (arg_match(&arg, &use_i422, argv)) {
- noise_args->img_fmt = AOM_IMG_FMT_I422;
- } else if (arg_match(&arg, &use_i444, argv)) {
- noise_args->img_fmt = AOM_IMG_FMT_I444;
- } else if (arg_match(&arg, &skip_frames_arg, argv)) {
- noise_args->skip_frames = atoi(arg.val);
- } else if (arg_match(&arg, &debug_file_arg, argv)) {
- noise_args->debug_file = arg.val;
- } else {
- fprintf(stdout, "Unknown arg: %s\n\nUsage:\n", *argv);
- arg_show_usage(stdout, main_args);
- exit(0);
- }
- }
- if (noise_args->bit_depth > 8) {
- noise_args->img_fmt |= AOM_IMG_FMT_HIGHBITDEPTH;
- }
-}
-
-#if CONFIG_AV1_DECODER
-static void print_variance_y(FILE *debug_file, aom_image_t *raw,
- aom_image_t *denoised, const uint8_t *flat_blocks,
- int block_size, aom_film_grain_t *grain) {
- aom_image_t renoised;
- grain->apply_grain = 1;
- grain->random_seed = 7391;
- aom_img_alloc(&renoised, raw->fmt, raw->w, raw->h, 1);
-
- if (av1_add_film_grain(grain, denoised, &renoised)) {
- fprintf(stderr, "Internal failure in av1_add_film_grain().\n");
- aom_img_free(&renoised);
- return;
- }
-
- const int num_blocks_w = (raw->w + block_size - 1) / block_size;
- const int num_blocks_h = (raw->h + block_size - 1) / block_size;
- fprintf(debug_file, "x = [");
- for (int by = 0; by < num_blocks_h; by++) {
- for (int bx = 0; bx < num_blocks_w; bx++) {
- double block_mean = 0;
- double noise_std = 0, noise_mean = 0;
- double renoise_std = 0, renoise_mean = 0;
- for (int yi = 0; yi < block_size; ++yi) {
- const int y = by * block_size + yi;
- for (int xi = 0; xi < block_size; ++xi) {
- const int x = bx * block_size + xi;
- const double noise_v = (raw->planes[0][y * raw->stride[0] + x] -
- denoised->planes[0][y * raw->stride[0] + x]);
- noise_mean += noise_v;
- noise_std += noise_v * noise_v;
-
- block_mean += raw->planes[0][y * raw->stride[0] + x];
-
- const double renoise_v =
- (renoised.planes[0][y * raw->stride[0] + x] -
- denoised->planes[0][y * raw->stride[0] + x]);
- renoise_mean += renoise_v;
- renoise_std += renoise_v * renoise_v;
- }
- }
- int n = (block_size * block_size);
- block_mean /= n;
- noise_mean /= n;
- renoise_mean /= n;
- noise_std = sqrt(noise_std / n - noise_mean * noise_mean);
- renoise_std = sqrt(renoise_std / n - renoise_mean * renoise_mean);
- fprintf(debug_file, "%d %3.2lf %3.2lf %3.2lf ",
- flat_blocks[by * num_blocks_w + bx], block_mean, noise_std,
- renoise_std);
- }
- fprintf(debug_file, "\n");
- }
- fprintf(debug_file, "];\n");
-
- if (raw->fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
- fprintf(stderr,
- "Detailed debug info not supported for high bit"
- "depth formats\n");
- } else {
- fprintf(debug_file, "figure(2); clf;\n");
- fprintf(debug_file,
- "scatter(x(:, 2:4:end), x(:, 3:4:end), 'r'); hold on;\n");
- fprintf(debug_file, "scatter(x(:, 2:4:end), x(:, 4:4:end), 'b');\n");
- fprintf(debug_file,
- "plot(linspace(0, 255, length(noise_strength_0)), "
- "noise_strength_0, 'b');\n");
- fprintf(debug_file,
- "title('Scatter plot of intensity vs noise strength');\n");
- fprintf(debug_file,
- "legend('Actual', 'Estimated', 'Estimated strength');\n");
- fprintf(debug_file, "figure(3); clf;\n");
- fprintf(debug_file, "scatter(x(:, 3:4:end), x(:, 4:4:end), 'k');\n");
- fprintf(debug_file, "title('Actual vs Estimated');\n");
- fprintf(debug_file, "pause(3);\n");
- }
- aom_img_free(&renoised);
-}
-#endif
-
-static void print_debug_info(FILE *debug_file, aom_image_t *raw,
- aom_image_t *denoised, uint8_t *flat_blocks,
- int block_size, aom_noise_model_t *noise_model) {
- (void)raw;
- (void)denoised;
- (void)flat_blocks;
- (void)block_size;
- fprintf(debug_file, "figure(3); clf;\n");
- fprintf(debug_file, "figure(2); clf;\n");
- fprintf(debug_file, "figure(1); clf;\n");
- for (int c = 0; c < 3; ++c) {
- fprintf(debug_file, "noise_strength_%d = [\n", c);
- const aom_equation_system_t *eqns =
- &noise_model->combined_state[c].strength_solver.eqns;
- for (int k = 0; k < eqns->n; ++k) {
- fprintf(debug_file, "%lf ", eqns->x[k]);
- }
- fprintf(debug_file, "];\n");
- fprintf(debug_file, "plot(noise_strength_%d); hold on;\n", c);
- }
- fprintf(debug_file, "legend('Y', 'cb', 'cr');\n");
- fprintf(debug_file, "title('Noise strength function');\n");
-
-#if CONFIG_AV1_DECODER
- aom_film_grain_t grain;
- aom_noise_model_get_grain_parameters(noise_model, &grain);
- print_variance_y(debug_file, raw, denoised, flat_blocks, block_size, &grain);
-#endif
- fflush(debug_file);
-}
-
-int main(int argc, char *argv[]) {
- noise_model_args_t args = { 0, 0, { 25, 1 }, 0, 0, 0, AOM_IMG_FMT_I420,
- 32, 8, 1, 0, 1, NULL };
- aom_image_t raw, denoised;
- FILE *infile = NULL;
- AvxVideoInfo info;
-
- memset(&info, 0, sizeof(info));
-
- exec_name = argv[0];
- parse_args(&args, &argc, argv + 1);
-
- info.frame_width = args.width;
- info.frame_height = args.height;
- info.time_base.numerator = args.fps.den;
- info.time_base.denominator = args.fps.num;
-
- if (info.frame_width <= 0 || info.frame_height <= 0 ||
- (info.frame_width % 2) != 0 || (info.frame_height % 2) != 0) {
- die("Invalid frame size: %dx%d", info.frame_width, info.frame_height);
- }
- if (!aom_img_alloc(&raw, args.img_fmt, info.frame_width, info.frame_height,
- 1)) {
- die("Failed to allocate image.");
- }
- if (!aom_img_alloc(&denoised, args.img_fmt, info.frame_width,
- info.frame_height, 1)) {
- die("Failed to allocate image.");
- }
- infile = fopen(args.input, "r");
- if (!infile) {
- die("Failed to open input file:", args.input);
- }
- fprintf(stderr, "Bit depth: %d stride:%d\n", args.bit_depth, raw.stride[0]);
-
- const int high_bd = args.bit_depth > 8;
- const int block_size = args.block_size;
- aom_flat_block_finder_t block_finder;
- aom_flat_block_finder_init(&block_finder, block_size, args.bit_depth,
- high_bd);
-
- const int num_blocks_w = (info.frame_width + block_size - 1) / block_size;
- const int num_blocks_h = (info.frame_height + block_size - 1) / block_size;
- uint8_t *flat_blocks = (uint8_t *)aom_malloc(num_blocks_w * num_blocks_h);
- // Sets the random seed on the first entry in the output table
- int16_t random_seed = 7391;
- aom_noise_model_t noise_model;
- aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, 3, args.bit_depth,
- high_bd };
- aom_noise_model_init(&noise_model, params);
-
- FILE *denoised_file = 0;
- if (args.input_denoised) {
- denoised_file = fopen(args.input_denoised, "rb");
- if (!denoised_file)
- die("Unable to open input_denoised: %s", args.input_denoised);
- } else {
- die("--input-denoised file must be specified");
- }
- FILE *debug_file = 0;
- if (args.debug_file) {
- debug_file = fopen(args.debug_file, "w");
- }
- aom_film_grain_table_t grain_table = { 0, 0 };
-
- int64_t prev_timestamp = 0;
- int frame_count = 0;
- while (aom_img_read(&raw, infile)) {
- if (args.input_denoised) {
- if (!aom_img_read(&denoised, denoised_file)) {
- die("Unable to read input denoised file");
- }
- }
- if (frame_count % args.skip_frames == 0) {
- int num_flat_blocks = num_blocks_w * num_blocks_h;
- memset(flat_blocks, 1, num_flat_blocks);
- if (args.run_flat_block_finder) {
- memset(flat_blocks, 0, num_flat_blocks);
- num_flat_blocks = aom_flat_block_finder_run(
- &block_finder, raw.planes[0], info.frame_width, info.frame_height,
- info.frame_width, flat_blocks);
- fprintf(stdout, "Num flat blocks %d\n", num_flat_blocks);
- }
-
- const uint8_t *planes[3] = { raw.planes[0], raw.planes[1],
- raw.planes[2] };
- uint8_t *denoised_planes[3] = { denoised.planes[0], denoised.planes[1],
- denoised.planes[2] };
- int strides[3] = { raw.stride[0] >> high_bd, raw.stride[1] >> high_bd,
- raw.stride[2] >> high_bd };
- int chroma_sub[3] = { raw.x_chroma_shift, raw.y_chroma_shift, 0 };
-
- fprintf(stdout, "Updating noise model...\n");
- aom_noise_status_t status = aom_noise_model_update(
- &noise_model, (const uint8_t *const *)planes,
- (const uint8_t *const *)denoised_planes, info.frame_width,
- info.frame_height, strides, chroma_sub, flat_blocks, block_size);
-
- int64_t cur_timestamp =
- frame_count * 10000000ULL * args.fps.den / args.fps.num;
- if (status == AOM_NOISE_STATUS_DIFFERENT_NOISE_TYPE) {
- fprintf(stdout,
- "Noise type is different, updating parameters for time "
- "[ %" PRId64 ", %" PRId64 ")\n",
- prev_timestamp, cur_timestamp);
- aom_film_grain_t grain;
- aom_noise_model_get_grain_parameters(&noise_model, &grain);
- grain.random_seed = random_seed;
- random_seed = 0;
- aom_film_grain_table_append(&grain_table, prev_timestamp, cur_timestamp,
- &grain);
- aom_noise_model_save_latest(&noise_model);
- prev_timestamp = cur_timestamp;
- }
- if (debug_file) {
- print_debug_info(debug_file, &raw, &denoised, flat_blocks, block_size,
- &noise_model);
- }
- fprintf(stdout, "Done noise model update, status = %d\n", status);
- }
- frame_count++;
- }
-
- aom_film_grain_t grain;
- aom_noise_model_get_grain_parameters(&noise_model, &grain);
- grain.random_seed = random_seed;
- aom_film_grain_table_append(&grain_table, prev_timestamp, INT64_MAX, &grain);
- if (args.output_grain_table) {
- struct aom_internal_error_info error_info;
- if (AOM_CODEC_OK != aom_film_grain_table_write(&grain_table,
- args.output_grain_table,
- &error_info)) {
- die("Unable to write output film grain table");
- }
- }
- aom_film_grain_table_free(&grain_table);
-
- if (infile) fclose(infile);
- if (denoised_file) fclose(denoised_file);
- if (debug_file) fclose(debug_file);
- aom_img_free(&raw);
- aom_img_free(&denoised);
-
- return EXIT_SUCCESS;
-}
diff --git a/third_party/aom/examples/resize_util.c b/third_party/aom/examples/resize_util.c
deleted file mode 100644
index 6a84d5740..000000000
--- a/third_party/aom/examples/resize_util.c
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <limits.h>
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "av1/common/resize.h"
-#include "common/tools_common.h"
-
-static const char *exec_name = NULL;
-
-static void usage() {
- printf("Usage:\n");
- printf("%s <input_yuv> <width>x<height> <target_width>x<target_height> ",
- exec_name);
- printf("<output_yuv> [<frames>]\n");
-}
-
-void usage_exit(void) {
- usage();
- exit(EXIT_FAILURE);
-}
-
-static int parse_dim(char *v, int *width, int *height) {
- char *x = strchr(v, 'x');
- if (x == NULL) x = strchr(v, 'X');
- if (x == NULL) return 0;
- *width = atoi(v);
- *height = atoi(&x[1]);
- if (*width <= 0 || *height <= 0)
- return 0;
- else
- return 1;
-}
-
-int main(int argc, char *argv[]) {
- char *fin, *fout;
- FILE *fpin, *fpout;
- uint8_t *inbuf, *outbuf;
- uint8_t *inbuf_u, *outbuf_u;
- uint8_t *inbuf_v, *outbuf_v;
- int f, frames;
- int width, height, target_width, target_height;
-
- exec_name = argv[0];
-
- if (argc < 5) {
- printf("Incorrect parameters:\n");
- usage();
- return 1;
- }
-
- fin = argv[1];
- fout = argv[4];
- if (!parse_dim(argv[2], &width, &height)) {
- printf("Incorrect parameters: %s\n", argv[2]);
- usage();
- return 1;
- }
- if (!parse_dim(argv[3], &target_width, &target_height)) {
- printf("Incorrect parameters: %s\n", argv[3]);
- usage();
- return 1;
- }
-
- fpin = fopen(fin, "rb");
- if (fpin == NULL) {
- printf("Can't open file %s to read\n", fin);
- usage();
- return 1;
- }
- fpout = fopen(fout, "wb");
- if (fpout == NULL) {
- printf("Can't open file %s to write\n", fout);
- usage();
- return 1;
- }
- if (argc >= 6)
- frames = atoi(argv[5]);
- else
- frames = INT_MAX;
-
- printf("Input size: %dx%d\n", width, height);
- printf("Target size: %dx%d, Frames: ", target_width, target_height);
- if (frames == INT_MAX)
- printf("All\n");
- else
- printf("%d\n", frames);
-
- inbuf = (uint8_t *)malloc(width * height * 3 / 2);
- outbuf = (uint8_t *)malloc(target_width * target_height * 3 / 2);
- inbuf_u = inbuf + width * height;
- inbuf_v = inbuf_u + width * height / 4;
- outbuf_u = outbuf + target_width * target_height;
- outbuf_v = outbuf_u + target_width * target_height / 4;
- f = 0;
- while (f < frames) {
- if (fread(inbuf, width * height * 3 / 2, 1, fpin) != 1) break;
- av1_resize_frame420(inbuf, width, inbuf_u, inbuf_v, width / 2, height,
- width, outbuf, target_width, outbuf_u, outbuf_v,
- target_width / 2, target_height, target_width);
- fwrite(outbuf, target_width * target_height * 3 / 2, 1, fpout);
- f++;
- }
- printf("%d frames processed\n", f);
- fclose(fpin);
- fclose(fpout);
-
- free(inbuf);
- free(outbuf);
- return 0;
-}
diff --git a/third_party/aom/examples/scalable_decoder.c b/third_party/aom/examples/scalable_decoder.c
deleted file mode 100644
index c22924223..000000000
--- a/third_party/aom/examples/scalable_decoder.c
+++ /dev/null
@@ -1,185 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-// Scalable Decoder
-// ==============
-//
-// This is an example of a scalable decoder loop. It takes a 2-spatial-layer
-// input file
-// containing the compressed data (in OBU format), passes it through the
-// decoder, and writes the decompressed frames to disk. The base layer and
-// enhancement layers are stored as separate files, out_lyr0.yuv and
-// out_lyr1.yuv, respectively.
-//
-// Standard Includes
-// -----------------
-// For decoders, you only have to include `aom_decoder.h` and then any
-// header files for the specific codecs you use. In this case, we're using
-// av1.
-//
-// Initializing The Codec
-// ----------------------
-// The libaom decoder is initialized by the call to aom_codec_dec_init().
-// Determining the codec interface to use is handled by AvxVideoReader and the
-// functions prefixed with aom_video_reader_. Discussion of those functions is
-// beyond the scope of this example, but the main gist is to open the input file
-// and parse just enough of it to determine if it's a AVx file and which AVx
-// codec is contained within the file.
-// Note the NULL pointer passed to aom_codec_dec_init(). We do that in this
-// example because we want the algorithm to determine the stream configuration
-// (width/height) and allocate memory automatically.
-//
-// Decoding A Frame
-// ----------------
-// Once the frame has been read into memory, it is decoded using the
-// `aom_codec_decode` function. The call takes a pointer to the data
-// (`frame`) and the length of the data (`frame_size`). No application data
-// is associated with the frame in this example, so the `user_priv`
-// parameter is NULL. The `deadline` parameter is left at zero for this
-// example. This parameter is generally only used when doing adaptive post
-// processing.
-//
-// Codecs may produce a variable number of output frames for every call to
-// `aom_codec_decode`. These frames are retrieved by the
-// `aom_codec_get_frame` iterator function. The iterator variable `iter` is
-// initialized to NULL each time `aom_codec_decode` is called.
-// `aom_codec_get_frame` is called in a loop, returning a pointer to a
-// decoded image or NULL to indicate the end of list.
-//
-// Processing The Decoded Data
-// ---------------------------
-// In this example, we simply write the encoded data to disk. It is
-// important to honor the image's `stride` values.
-//
-// Cleanup
-// -------
-// The `aom_codec_destroy` call frees any memory allocated by the codec.
-//
-// Error Handling
-// --------------
-// This example does not special case any error return codes. If there was
-// an error, a descriptive message is printed and the program exits. With
-// few exceptions, aom_codec functions return an enumerated error status,
-// with the value `0` indicating success.
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "aom/aom_decoder.h"
-#include "aom/aomdx.h"
-#include "common/obudec.h"
-#include "common/tools_common.h"
-#include "common/video_reader.h"
-
-static const char *exec_name;
-
-#define MAX_LAYERS 5
-
-void usage_exit(void) {
- fprintf(stderr, "Usage: %s <infile>\n", exec_name);
- exit(EXIT_FAILURE);
-}
-
-int main(int argc, char **argv) {
- int frame_cnt = 0;
- FILE *outfile[MAX_LAYERS];
- char filename[80];
- aom_codec_ctx_t codec;
- const AvxInterface *decoder = NULL;
- FILE *inputfile = NULL;
- uint8_t *buf = NULL;
- size_t bytes_in_buffer = 0;
- size_t buffer_size = 0;
- struct AvxInputContext aom_input_ctx;
- struct ObuDecInputContext obu_ctx = { &aom_input_ctx, NULL, 0, 0, 0 };
- aom_codec_stream_info_t si;
- uint8_t tmpbuf[32];
- unsigned int i;
-
- exec_name = argv[0];
-
- if (argc != 2) die("Invalid number of arguments.");
-
- if (!(inputfile = fopen(argv[1], "rb")))
- die("Failed to open %s for read.", argv[1]);
- obu_ctx.avx_ctx->file = inputfile;
- obu_ctx.avx_ctx->filename = argv[1];
-
- decoder = get_aom_decoder_by_index(0);
- printf("Using %s\n", aom_codec_iface_name(decoder->codec_interface()));
-
- if (aom_codec_dec_init(&codec, decoder->codec_interface(), NULL, 0))
- die_codec(&codec, "Failed to initialize decoder.");
-
- if (aom_codec_control(&codec, AV1D_SET_OUTPUT_ALL_LAYERS, 1)) {
- die_codec(&codec, "Failed to set output_all_layers control.");
- }
-
- // peak sequence header OBU to get number of spatial layers
- const size_t ret = fread(tmpbuf, 1, 32, inputfile);
- if (ret != 32) die_codec(&codec, "Input is not a valid obu file");
- si.is_annexb = 0;
- if (aom_codec_peek_stream_info(decoder->codec_interface(), tmpbuf, 32, &si)) {
- die_codec(&codec, "Input is not a valid obu file");
- }
- fseek(inputfile, -32, SEEK_CUR);
-
- if (!file_is_obu(&obu_ctx))
- die_codec(&codec, "Input is not a valid obu file");
-
- // open base layer output yuv file
- snprintf(filename, sizeof(filename), "out_lyr%d.yuv", 0);
- if (!(outfile[0] = fopen(filename, "wb")))
- die("Failed top open output for writing.");
-
- // open any enhancement layer output yuv files
- for (i = 1; i < si.number_spatial_layers; i++) {
- snprintf(filename, sizeof(filename), "out_lyr%d.yuv", i);
- if (!(outfile[i] = fopen(filename, "wb")))
- die("Failed to open output for writing.");
- }
-
- while (!obudec_read_temporal_unit(&obu_ctx, &buf, &bytes_in_buffer,
- &buffer_size)) {
- aom_codec_iter_t iter = NULL;
- aom_image_t *img = NULL;
- if (aom_codec_decode(&codec, buf, bytes_in_buffer, NULL))
- die_codec(&codec, "Failed to decode frame.");
-
- while ((img = aom_codec_get_frame(&codec, &iter)) != NULL) {
- aom_image_t *img_shifted =
- aom_img_alloc(NULL, AOM_IMG_FMT_I420, img->d_w, img->d_h, 16);
- img_shifted->bit_depth = 8;
- aom_img_downshift(img_shifted, img,
- img->bit_depth - img_shifted->bit_depth);
- if (img->spatial_id == 0) {
- printf("Writing base layer 0 %d\n", frame_cnt);
- aom_img_write(img_shifted, outfile[0]);
- } else if (img->spatial_id <= (int)(si.number_spatial_layers - 1)) {
- printf("Writing enhancement layer %d %d\n", img->spatial_id, frame_cnt);
- aom_img_write(img_shifted, outfile[img->spatial_id]);
- } else {
- die_codec(&codec, "Invalid bitstream. Layer id exceeds layer count");
- }
- if (img->spatial_id == (int)(si.number_spatial_layers - 1)) ++frame_cnt;
- }
- }
-
- printf("Processed %d frames.\n", frame_cnt);
- if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec");
-
- for (i = 0; i < si.number_spatial_layers; i++) fclose(outfile[i]);
-
- fclose(inputfile);
-
- return EXIT_SUCCESS;
-}
diff --git a/third_party/aom/examples/scalable_encoder.c b/third_party/aom/examples/scalable_encoder.c
deleted file mode 100644
index 7af03e29f..000000000
--- a/third_party/aom/examples/scalable_encoder.c
+++ /dev/null
@@ -1,289 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-// Scalable Encoder
-// ==============
-//
-// This is an example of a scalable encoder loop. It takes two input files in
-// YV12 format, passes it through the encoder, and writes the compressed
-// frames to disk in OBU format.
-//
-// Getting The Default Configuration
-// ---------------------------------
-// Encoders have the notion of "usage profiles." For example, an encoder
-// may want to publish default configurations for both a video
-// conferencing application and a best quality offline encoder. These
-// obviously have very different default settings. Consult the
-// documentation for your codec to see if it provides any default
-// configurations. All codecs provide a default configuration, number 0,
-// which is valid for material in the vacinity of QCIF/QVGA.
-//
-// Updating The Configuration
-// ---------------------------------
-// Almost all applications will want to update the default configuration
-// with settings specific to their usage. Here we set the width and height
-// of the video file to that specified on the command line. We also scale
-// the default bitrate based on the ratio between the default resolution
-// and the resolution specified on the command line.
-//
-// Encoding A Frame
-// ----------------
-// The frame is read as a continuous block (size = width * height * 3 / 2)
-// from the input file. If a frame was read (the input file has not hit
-// EOF) then the frame is passed to the encoder. Otherwise, a NULL
-// is passed, indicating the End-Of-Stream condition to the encoder. The
-// `frame_cnt` is reused as the presentation time stamp (PTS) and each
-// frame is shown for one frame-time in duration. The flags parameter is
-// unused in this example.
-
-// Forced Keyframes
-// ----------------
-// Keyframes can be forced by setting the AOM_EFLAG_FORCE_KF bit of the
-// flags passed to `aom_codec_control()`. In this example, we force a
-// keyframe every <keyframe-interval> frames. Note, the output stream can
-// contain additional keyframes beyond those that have been forced using the
-// AOM_EFLAG_FORCE_KF flag because of automatic keyframe placement by the
-// encoder.
-//
-// Processing The Encoded Data
-// ---------------------------
-// Each packet of type `AOM_CODEC_CX_FRAME_PKT` contains the encoded data
-// for this frame. We write a IVF frame header, followed by the raw data.
-//
-// Cleanup
-// -------
-// The `aom_codec_destroy` call frees any memory allocated by the codec.
-//
-// Error Handling
-// --------------
-// This example does not special case any error return codes. If there was
-// an error, a descriptive message is printed and the program exits. With
-// few exeptions, aom_codec functions return an enumerated error status,
-// with the value `0` indicating success.
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "aom/aom_encoder.h"
-#include "aom/aomcx.h"
-#include "av1/common/enums.h"
-#include "common/tools_common.h"
-#include "common/video_writer.h"
-
-static const char *exec_name;
-
-void usage_exit(void) {
- fprintf(stderr,
- "Usage: %s <codec> <width> <height> <infile0> <infile1> "
- "<outfile> <frames to encode>\n"
- "See comments in scalable_encoder.c for more information.\n",
- exec_name);
- exit(EXIT_FAILURE);
-}
-
-static int encode_frame(aom_codec_ctx_t *codec, aom_image_t *img,
- int frame_index, int flags, FILE *outfile) {
- int got_pkts = 0;
- aom_codec_iter_t iter = NULL;
- const aom_codec_cx_pkt_t *pkt = NULL;
- const aom_codec_err_t res =
- aom_codec_encode(codec, img, frame_index, 1, flags);
- if (res != AOM_CODEC_OK) die_codec(codec, "Failed to encode frame");
-
- while ((pkt = aom_codec_get_cx_data(codec, &iter)) != NULL) {
- got_pkts = 1;
-
- if (pkt->kind == AOM_CODEC_CX_FRAME_PKT) {
- const int keyframe = (pkt->data.frame.flags & AOM_FRAME_IS_KEY) != 0;
- if (fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile) !=
- pkt->data.frame.sz) {
- die_codec(codec, "Failed to write compressed frame");
- }
- printf(keyframe ? "K" : ".");
- printf(" %6d\n", (int)pkt->data.frame.sz);
- fflush(stdout);
- }
- }
-
- return got_pkts;
-}
-
-int main(int argc, char **argv) {
- FILE *infile0 = NULL;
- FILE *infile1 = NULL;
- aom_codec_ctx_t codec;
- aom_codec_enc_cfg_t cfg;
- int frame_count = 0;
- aom_image_t raw0, raw1;
- aom_codec_err_t res;
- AvxVideoInfo info;
- const AvxInterface *encoder = NULL;
- const int fps = 30;
- const int bitrate = 200;
- int keyframe_interval = 0;
- int max_frames = 0;
- int frames_encoded = 0;
- const char *codec_arg = NULL;
- const char *width_arg = NULL;
- const char *height_arg = NULL;
- const char *infile0_arg = NULL;
- const char *infile1_arg = NULL;
- const char *outfile_arg = NULL;
- // const char *keyframe_interval_arg = NULL;
- FILE *outfile = NULL;
-
- exec_name = argv[0];
-
- // Clear explicitly, as simply assigning "{ 0 }" generates
- // "missing-field-initializers" warning in some compilers.
- memset(&info, 0, sizeof(info));
-
- if (argc != 8) die("Invalid number of arguments");
-
- codec_arg = argv[1];
- width_arg = argv[2];
- height_arg = argv[3];
- infile0_arg = argv[4];
- infile1_arg = argv[5];
- outfile_arg = argv[6];
- max_frames = (int)strtol(argv[7], NULL, 0);
-
- encoder = get_aom_encoder_by_name(codec_arg);
- if (!encoder) die("Unsupported codec.");
-
- info.codec_fourcc = encoder->fourcc;
- info.frame_width = (int)strtol(width_arg, NULL, 0);
- info.frame_height = (int)strtol(height_arg, NULL, 0);
- info.time_base.numerator = 1;
- info.time_base.denominator = fps;
-
- if (info.frame_width <= 0 || info.frame_height <= 0 ||
- (info.frame_width % 2) != 0 || (info.frame_height % 2) != 0) {
- die("Invalid frame size: %dx%d", info.frame_width, info.frame_height);
- }
-
- if (!aom_img_alloc(&raw0, AOM_IMG_FMT_I420, info.frame_width,
- info.frame_height, 1)) {
- die("Failed to allocate image for layer 0.");
- }
- if (!aom_img_alloc(&raw1, AOM_IMG_FMT_I420, info.frame_width,
- info.frame_height, 1)) {
- die("Failed to allocate image for layer 1.");
- }
-
- // keyframe_interval = (int)strtol(keyframe_interval_arg, NULL, 0);
- keyframe_interval = 100;
- if (keyframe_interval < 0) die("Invalid keyframe interval value.");
-
- printf("Using %s\n", aom_codec_iface_name(encoder->codec_interface()));
-
- res = aom_codec_enc_config_default(encoder->codec_interface(), &cfg, 0);
- if (res) die_codec(&codec, "Failed to get default codec config.");
-
- cfg.g_w = info.frame_width;
- cfg.g_h = info.frame_height;
- cfg.g_timebase.num = info.time_base.numerator;
- cfg.g_timebase.den = info.time_base.denominator;
- cfg.rc_target_bitrate = bitrate;
- cfg.g_error_resilient = 0;
- cfg.g_lag_in_frames = 0;
- cfg.rc_end_usage = AOM_Q;
- cfg.save_as_annexb = 0;
-
- outfile = fopen(outfile_arg, "wb");
- if (!outfile) die("Failed to open %s for writing.", outfile_arg);
-
- if (!(infile0 = fopen(infile0_arg, "rb")))
- die("Failed to open %s for reading.", infile0_arg);
- if (!(infile1 = fopen(infile1_arg, "rb")))
- die("Failed to open %s for reading.", infile0_arg);
-
- if (aom_codec_enc_init(&codec, encoder->codec_interface(), &cfg, 0))
- die_codec(&codec, "Failed to initialize encoder");
- if (aom_codec_control(&codec, AOME_SET_CPUUSED, 8))
- die_codec(&codec, "Failed to set cpu to 8");
-
- if (aom_codec_control(&codec, AV1E_SET_TILE_COLUMNS, 2))
- die_codec(&codec, "Failed to set tile columns to 2");
- if (aom_codec_control(&codec, AV1E_SET_NUM_TG, 3))
- die_codec(&codec, "Failed to set num of tile groups to 3");
-
- if (aom_codec_control(&codec, AOME_SET_NUMBER_SPATIAL_LAYERS, 2))
- die_codec(&codec, "Failed to set number of spatial layers to 2");
-
- // Encode frames.
- while (aom_img_read(&raw0, infile0)) {
- int flags = 0;
-
- // configure and encode base layer
-
- if (keyframe_interval > 0 && frames_encoded % keyframe_interval == 0)
- flags |= AOM_EFLAG_FORCE_KF;
- else
- // use previous base layer (LAST) as sole reference
- // save this frame as LAST to be used as reference by enhanmcent layer
- // and next base layer
- flags |= AOM_EFLAG_NO_REF_LAST2 | AOM_EFLAG_NO_REF_LAST3 |
- AOM_EFLAG_NO_REF_GF | AOM_EFLAG_NO_REF_ARF |
- AOM_EFLAG_NO_REF_BWD | AOM_EFLAG_NO_REF_ARF2 |
- AOM_EFLAG_NO_UPD_GF | AOM_EFLAG_NO_UPD_ARF |
- AOM_EFLAG_NO_UPD_ENTROPY;
- cfg.g_w = info.frame_width;
- cfg.g_h = info.frame_height;
- if (aom_codec_enc_config_set(&codec, &cfg))
- die_codec(&codec, "Failed to set enc cfg for layer 0");
- if (aom_codec_control(&codec, AOME_SET_SPATIAL_LAYER_ID, 0))
- die_codec(&codec, "Failed to set layer id to 0");
- if (aom_codec_control(&codec, AOME_SET_CQ_LEVEL, 62))
- die_codec(&codec, "Failed to set cq level");
- encode_frame(&codec, &raw0, frame_count++, flags, outfile);
-
- // configure and encode enhancement layer
-
- // use LAST (base layer) as sole reference
- flags = AOM_EFLAG_NO_REF_LAST2 | AOM_EFLAG_NO_REF_LAST3 |
- AOM_EFLAG_NO_REF_GF | AOM_EFLAG_NO_REF_ARF | AOM_EFLAG_NO_REF_BWD |
- AOM_EFLAG_NO_REF_ARF2 | AOM_EFLAG_NO_UPD_LAST |
- AOM_EFLAG_NO_UPD_GF | AOM_EFLAG_NO_UPD_ARF |
- AOM_EFLAG_NO_UPD_ENTROPY;
- cfg.g_w = info.frame_width;
- cfg.g_h = info.frame_height;
- aom_img_read(&raw1, infile1);
- if (aom_codec_enc_config_set(&codec, &cfg))
- die_codec(&codec, "Failed to set enc cfg for layer 1");
- if (aom_codec_control(&codec, AOME_SET_SPATIAL_LAYER_ID, 1))
- die_codec(&codec, "Failed to set layer id to 1");
- if (aom_codec_control(&codec, AOME_SET_CQ_LEVEL, 10))
- die_codec(&codec, "Failed to set cq level");
- encode_frame(&codec, &raw1, frame_count++, flags, outfile);
-
- frames_encoded++;
-
- if (max_frames > 0 && frames_encoded >= max_frames) break;
- }
-
- // Flush encoder.
- while (encode_frame(&codec, NULL, -1, 0, outfile)) continue;
-
- printf("\n");
- fclose(infile0);
- fclose(infile1);
- printf("Processed %d frames.\n", frame_count / 2);
-
- aom_img_free(&raw0);
- aom_img_free(&raw1);
- if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec.");
-
- fclose(outfile);
-
- return EXIT_SUCCESS;
-}
diff --git a/third_party/aom/examples/set_maps.c b/third_party/aom/examples/set_maps.c
deleted file mode 100644
index 9aeb96e43..000000000
--- a/third_party/aom/examples/set_maps.c
+++ /dev/null
@@ -1,208 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-// AOM Set Active and ROI Maps
-// ===========================
-//
-// This is an example demonstrating how to control the AOM encoder's
-// ROI and Active maps.
-//
-// ROI (Reigon of Interest) maps are a way for the application to assign
-// each macroblock in the image to a region, and then set quantizer and
-// filtering parameters on that image.
-//
-// Active maps are a way for the application to specify on a
-// macroblock-by-macroblock basis whether there is any activity in that
-// macroblock.
-//
-//
-// Configuration
-// -------------
-// An ROI map is set on frame 22. If the width of the image in macroblocks
-// is evenly divisble by 4, then the output will appear to have distinct
-// columns, where the quantizer, loopfilter, and static threshold differ
-// from column to column.
-//
-// An active map is set on frame 33. If the width of the image in macroblocks
-// is evenly divisble by 4, then the output will appear to have distinct
-// columns, where one column will have motion and the next will not.
-//
-// The active map is cleared on frame 44.
-//
-// Observing The Effects
-// ---------------------
-// Use the `simple_decoder` example to decode this sample, and observe
-// the change in the image at frames 22, 33, and 44.
-
-#include <assert.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "aom/aom_encoder.h"
-#include "aom/aomcx.h"
-#include "common/tools_common.h"
-#include "common/video_writer.h"
-
-static const char *exec_name;
-
-void usage_exit(void) {
- fprintf(stderr, "Usage: %s <codec> <width> <height> <infile> <outfile>\n",
- exec_name);
- exit(EXIT_FAILURE);
-}
-
-static void set_active_map(const aom_codec_enc_cfg_t *cfg,
- aom_codec_ctx_t *codec) {
- unsigned int i;
- aom_active_map_t map = { 0, 0, 0 };
-
- map.rows = (cfg->g_h + 15) / 16;
- map.cols = (cfg->g_w + 15) / 16;
-
- map.active_map = (uint8_t *)malloc(map.rows * map.cols);
- for (i = 0; i < map.rows * map.cols; ++i) map.active_map[i] = i % 2;
-
- if (aom_codec_control(codec, AOME_SET_ACTIVEMAP, &map))
- die_codec(codec, "Failed to set active map");
-
- free(map.active_map);
-}
-
-static void unset_active_map(const aom_codec_enc_cfg_t *cfg,
- aom_codec_ctx_t *codec) {
- aom_active_map_t map = { 0, 0, 0 };
-
- map.rows = (cfg->g_h + 15) / 16;
- map.cols = (cfg->g_w + 15) / 16;
- map.active_map = NULL;
-
- if (aom_codec_control(codec, AOME_SET_ACTIVEMAP, &map))
- die_codec(codec, "Failed to set active map");
-}
-
-static int encode_frame(aom_codec_ctx_t *codec, aom_image_t *img,
- int frame_index, AvxVideoWriter *writer) {
- int got_pkts = 0;
- aom_codec_iter_t iter = NULL;
- const aom_codec_cx_pkt_t *pkt = NULL;
- const aom_codec_err_t res = aom_codec_encode(codec, img, frame_index, 1, 0);
- if (res != AOM_CODEC_OK) die_codec(codec, "Failed to encode frame");
-
- while ((pkt = aom_codec_get_cx_data(codec, &iter)) != NULL) {
- got_pkts = 1;
-
- if (pkt->kind == AOM_CODEC_CX_FRAME_PKT) {
- const int keyframe = (pkt->data.frame.flags & AOM_FRAME_IS_KEY) != 0;
- if (!aom_video_writer_write_frame(writer, pkt->data.frame.buf,
- pkt->data.frame.sz,
- pkt->data.frame.pts)) {
- die_codec(codec, "Failed to write compressed frame");
- }
-
- printf(keyframe ? "K" : ".");
- fflush(stdout);
- }
- }
-
- return got_pkts;
-}
-
-int main(int argc, char **argv) {
- FILE *infile = NULL;
- aom_codec_ctx_t codec;
- aom_codec_enc_cfg_t cfg;
- int frame_count = 0;
- const int limit = 15;
- aom_image_t raw;
- aom_codec_err_t res;
- AvxVideoInfo info;
- AvxVideoWriter *writer = NULL;
- const AvxInterface *encoder = NULL;
- const int fps = 2; // TODO(dkovalev) add command line argument
- const double bits_per_pixel_per_frame = 0.067;
-
- exec_name = argv[0];
- if (argc != 6) die("Invalid number of arguments");
-
- memset(&info, 0, sizeof(info));
-
- encoder = get_aom_encoder_by_name(argv[1]);
- if (encoder == NULL) {
- die("Unsupported codec.");
- }
- assert(encoder != NULL);
- info.codec_fourcc = encoder->fourcc;
- info.frame_width = (int)strtol(argv[2], NULL, 0);
- info.frame_height = (int)strtol(argv[3], NULL, 0);
- info.time_base.numerator = 1;
- info.time_base.denominator = fps;
-
- if (info.frame_width <= 0 || info.frame_height <= 0 ||
- (info.frame_width % 2) != 0 || (info.frame_height % 2) != 0) {
- die("Invalid frame size: %dx%d", info.frame_width, info.frame_height);
- }
-
- if (!aom_img_alloc(&raw, AOM_IMG_FMT_I420, info.frame_width,
- info.frame_height, 1)) {
- die("Failed to allocate image.");
- }
-
- printf("Using %s\n", aom_codec_iface_name(encoder->codec_interface()));
-
- res = aom_codec_enc_config_default(encoder->codec_interface(), &cfg, 0);
- if (res) die_codec(&codec, "Failed to get default codec config.");
-
- cfg.g_w = info.frame_width;
- cfg.g_h = info.frame_height;
- cfg.g_timebase.num = info.time_base.numerator;
- cfg.g_timebase.den = info.time_base.denominator;
- cfg.rc_target_bitrate =
- (unsigned int)(bits_per_pixel_per_frame * cfg.g_w * cfg.g_h * fps / 1000);
- cfg.g_lag_in_frames = 0;
-
- writer = aom_video_writer_open(argv[5], kContainerIVF, &info);
- if (!writer) die("Failed to open %s for writing.", argv[5]);
-
- if (!(infile = fopen(argv[4], "rb")))
- die("Failed to open %s for reading.", argv[4]);
-
- if (aom_codec_enc_init(&codec, encoder->codec_interface(), &cfg, 0))
- die_codec(&codec, "Failed to initialize encoder");
-
- // Encode frames.
- while (aom_img_read(&raw, infile) && frame_count < limit) {
- ++frame_count;
-
- if (frame_count == 5) {
- set_active_map(&cfg, &codec);
- } else if (frame_count == 11) {
- unset_active_map(&cfg, &codec);
- }
-
- encode_frame(&codec, &raw, frame_count, writer);
- }
-
- // Flush encoder.
- while (encode_frame(&codec, NULL, -1, writer)) {
- }
-
- printf("\n");
- fclose(infile);
- printf("Processed %d frames.\n", frame_count);
-
- aom_img_free(&raw);
- if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec.");
-
- aom_video_writer_close(writer);
-
- return EXIT_SUCCESS;
-}
diff --git a/third_party/aom/examples/simple_decoder.c b/third_party/aom/examples/simple_decoder.c
deleted file mode 100644
index d098d1e0b..000000000
--- a/third_party/aom/examples/simple_decoder.c
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-// Simple Decoder
-// ==============
-//
-// This is an example of a simple decoder loop. It takes an input file
-// containing the compressed data (in IVF format), passes it through the
-// decoder, and writes the decompressed frames to disk. Other decoder
-// examples build upon this one.
-//
-// The details of the IVF format have been elided from this example for
-// simplicity of presentation, as IVF files will not generally be used by
-// your application. In general, an IVF file consists of a file header,
-// followed by a variable number of frames. Each frame consists of a frame
-// header followed by a variable length payload. The length of the payload
-// is specified in the first four bytes of the frame header. The payload is
-// the raw compressed data.
-//
-// Standard Includes
-// -----------------
-// For decoders, you only have to include `aom_decoder.h` and then any
-// header files for the specific codecs you use. In this case, we're using
-// aom.
-//
-// Initializing The Codec
-// ----------------------
-// The libaom decoder is initialized by the call to aom_codec_dec_init().
-// Determining the codec interface to use is handled by AvxVideoReader and the
-// functions prefixed with aom_video_reader_. Discussion of those functions is
-// beyond the scope of this example, but the main gist is to open the input file
-// and parse just enough of it to determine if it's a AVx file and which AVx
-// codec is contained within the file.
-// Note the NULL pointer passed to aom_codec_dec_init(). We do that in this
-// example because we want the algorithm to determine the stream configuration
-// (width/height) and allocate memory automatically.
-//
-// Decoding A Frame
-// ----------------
-// Once the frame has been read into memory, it is decoded using the
-// `aom_codec_decode` function. The call takes a pointer to the data
-// (`frame`) and the length of the data (`frame_size`). No application data
-// is associated with the frame in this example, so the `user_priv`
-// parameter is NULL.
-//
-// Codecs may produce a variable number of output frames for every call to
-// `aom_codec_decode`. These frames are retrieved by the
-// `aom_codec_get_frame` iterator function. The iterator variable `iter` is
-// initialized to NULL each time `aom_codec_decode` is called.
-// `aom_codec_get_frame` is called in a loop, returning a pointer to a
-// decoded image or NULL to indicate the end of list.
-//
-// Processing The Decoded Data
-// ---------------------------
-// In this example, we simply write the encoded data to disk. It is
-// important to honor the image's `stride` values.
-//
-// Cleanup
-// -------
-// The `aom_codec_destroy` call frees any memory allocated by the codec.
-//
-// Error Handling
-// --------------
-// This example does not special case any error return codes. If there was
-// an error, a descriptive message is printed and the program exits. With
-// few exceptions, aom_codec functions return an enumerated error status,
-// with the value `0` indicating success.
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "aom/aom_decoder.h"
-#include "common/tools_common.h"
-#include "common/video_reader.h"
-
-static const char *exec_name;
-
-void usage_exit(void) {
- fprintf(stderr, "Usage: %s <infile> <outfile>\n", exec_name);
- exit(EXIT_FAILURE);
-}
-
-int main(int argc, char **argv) {
- int frame_cnt = 0;
- FILE *outfile = NULL;
- aom_codec_ctx_t codec;
- AvxVideoReader *reader = NULL;
- const AvxInterface *decoder = NULL;
- const AvxVideoInfo *info = NULL;
-
- exec_name = argv[0];
-
- if (argc != 3) die("Invalid number of arguments.");
-
- reader = aom_video_reader_open(argv[1]);
- if (!reader) die("Failed to open %s for reading.", argv[1]);
-
- if (!(outfile = fopen(argv[2], "wb")))
- die("Failed to open %s for writing.", argv[2]);
-
- info = aom_video_reader_get_info(reader);
-
- decoder = get_aom_decoder_by_fourcc(info->codec_fourcc);
- if (!decoder) die("Unknown input codec.");
-
- printf("Using %s\n", aom_codec_iface_name(decoder->codec_interface()));
-
- if (aom_codec_dec_init(&codec, decoder->codec_interface(), NULL, 0))
- die_codec(&codec, "Failed to initialize decoder.");
-
- while (aom_video_reader_read_frame(reader)) {
- aom_codec_iter_t iter = NULL;
- aom_image_t *img = NULL;
- size_t frame_size = 0;
- const unsigned char *frame =
- aom_video_reader_get_frame(reader, &frame_size);
- if (aom_codec_decode(&codec, frame, frame_size, NULL))
- die_codec(&codec, "Failed to decode frame.");
-
- while ((img = aom_codec_get_frame(&codec, &iter)) != NULL) {
- aom_img_write(img, outfile);
- ++frame_cnt;
- }
- }
-
- printf("Processed %d frames.\n", frame_cnt);
- if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec");
-
- printf("Play: ffplay -f rawvideo -pix_fmt yuv420p -s %dx%d %s\n",
- info->frame_width, info->frame_height, argv[2]);
-
- aom_video_reader_close(reader);
-
- fclose(outfile);
-
- return EXIT_SUCCESS;
-}
diff --git a/third_party/aom/examples/simple_encoder.c b/third_party/aom/examples/simple_encoder.c
deleted file mode 100644
index 01a37cf0c..000000000
--- a/third_party/aom/examples/simple_encoder.c
+++ /dev/null
@@ -1,249 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-// Simple Encoder
-// ==============
-//
-// This is an example of a simple encoder loop. It takes an input file in
-// YV12 format, passes it through the encoder, and writes the compressed
-// frames to disk in IVF format. Other decoder examples build upon this
-// one.
-//
-// The details of the IVF format have been elided from this example for
-// simplicity of presentation, as IVF files will not generally be used by
-// your application. In general, an IVF file consists of a file header,
-// followed by a variable number of frames. Each frame consists of a frame
-// header followed by a variable length payload. The length of the payload
-// is specified in the first four bytes of the frame header. The payload is
-// the raw compressed data.
-//
-// Standard Includes
-// -----------------
-// For encoders, you only have to include `aom_encoder.h` and then any
-// header files for the specific codecs you use. In this case, we're using
-// aom.
-//
-// Getting The Default Configuration
-// ---------------------------------
-// Encoders have the notion of "usage profiles." For example, an encoder
-// may want to publish default configurations for both a video
-// conferencing application and a best quality offline encoder. These
-// obviously have very different default settings. Consult the
-// documentation for your codec to see if it provides any default
-// configurations. All codecs provide a default configuration, number 0,
-// which is valid for material in the vacinity of QCIF/QVGA.
-//
-// Updating The Configuration
-// ---------------------------------
-// Almost all applications will want to update the default configuration
-// with settings specific to their usage. Here we set the width and height
-// of the video file to that specified on the command line. We also scale
-// the default bitrate based on the ratio between the default resolution
-// and the resolution specified on the command line.
-//
-// Initializing The Codec
-// ----------------------
-// The encoder is initialized by the following code.
-//
-// Encoding A Frame
-// ----------------
-// The frame is read as a continuous block (size width * height * 3 / 2)
-// from the input file. If a frame was read (the input file has not hit
-// EOF) then the frame is passed to the encoder. Otherwise, a NULL
-// is passed, indicating the End-Of-Stream condition to the encoder. The
-// `frame_cnt` is reused as the presentation time stamp (PTS) and each
-// frame is shown for one frame-time in duration. The flags parameter is
-// unused in this example.
-
-// Forced Keyframes
-// ----------------
-// Keyframes can be forced by setting the AOM_EFLAG_FORCE_KF bit of the
-// flags passed to `aom_codec_control()`. In this example, we force a
-// keyframe every <keyframe-interval> frames. Note, the output stream can
-// contain additional keyframes beyond those that have been forced using the
-// AOM_EFLAG_FORCE_KF flag because of automatic keyframe placement by the
-// encoder.
-//
-// Processing The Encoded Data
-// ---------------------------
-// Each packet of type `AOM_CODEC_CX_FRAME_PKT` contains the encoded data
-// for this frame. We write a IVF frame header, followed by the raw data.
-//
-// Cleanup
-// -------
-// The `aom_codec_destroy` call frees any memory allocated by the codec.
-//
-// Error Handling
-// --------------
-// This example does not special case any error return codes. If there was
-// an error, a descriptive message is printed and the program exits. With
-// few exeptions, aom_codec functions return an enumerated error status,
-// with the value `0` indicating success.
-//
-// Error Resiliency Features
-// -------------------------
-// Error resiliency is controlled by the g_error_resilient member of the
-// configuration structure. Use the `decode_with_drops` example to decode with
-// frames 5-10 dropped. Compare the output for a file encoded with this example
-// versus one encoded with the `simple_encoder` example.
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "aom/aom_encoder.h"
-#include "common/tools_common.h"
-#include "common/video_writer.h"
-
-static const char *exec_name;
-
-void usage_exit(void) {
- fprintf(stderr,
- "Usage: %s <codec> <width> <height> <infile> <outfile> "
- "<keyframe-interval> <error-resilient> <frames to encode>\n"
- "See comments in simple_encoder.c for more information.\n",
- exec_name);
- exit(EXIT_FAILURE);
-}
-
-static int encode_frame(aom_codec_ctx_t *codec, aom_image_t *img,
- int frame_index, int flags, AvxVideoWriter *writer) {
- int got_pkts = 0;
- aom_codec_iter_t iter = NULL;
- const aom_codec_cx_pkt_t *pkt = NULL;
- const aom_codec_err_t res =
- aom_codec_encode(codec, img, frame_index, 1, flags);
- if (res != AOM_CODEC_OK) die_codec(codec, "Failed to encode frame");
-
- while ((pkt = aom_codec_get_cx_data(codec, &iter)) != NULL) {
- got_pkts = 1;
-
- if (pkt->kind == AOM_CODEC_CX_FRAME_PKT) {
- const int keyframe = (pkt->data.frame.flags & AOM_FRAME_IS_KEY) != 0;
- if (!aom_video_writer_write_frame(writer, pkt->data.frame.buf,
- pkt->data.frame.sz,
- pkt->data.frame.pts)) {
- die_codec(codec, "Failed to write compressed frame");
- }
- printf(keyframe ? "K" : ".");
- fflush(stdout);
- }
- }
-
- return got_pkts;
-}
-
-// TODO(tomfinegan): Improve command line parsing and add args for bitrate/fps.
-int main(int argc, char **argv) {
- FILE *infile = NULL;
- aom_codec_ctx_t codec;
- aom_codec_enc_cfg_t cfg;
- int frame_count = 0;
- aom_image_t raw;
- aom_codec_err_t res;
- AvxVideoInfo info;
- AvxVideoWriter *writer = NULL;
- const AvxInterface *encoder = NULL;
- const int fps = 30;
- const int bitrate = 200;
- int keyframe_interval = 0;
- int max_frames = 0;
- int frames_encoded = 0;
- const char *codec_arg = NULL;
- const char *width_arg = NULL;
- const char *height_arg = NULL;
- const char *infile_arg = NULL;
- const char *outfile_arg = NULL;
- const char *keyframe_interval_arg = NULL;
-
- exec_name = argv[0];
-
- // Clear explicitly, as simply assigning "{ 0 }" generates
- // "missing-field-initializers" warning in some compilers.
- memset(&info, 0, sizeof(info));
-
- if (argc != 9) die("Invalid number of arguments");
-
- codec_arg = argv[1];
- width_arg = argv[2];
- height_arg = argv[3];
- infile_arg = argv[4];
- outfile_arg = argv[5];
- keyframe_interval_arg = argv[6];
- max_frames = (int)strtol(argv[8], NULL, 0);
-
- encoder = get_aom_encoder_by_name(codec_arg);
- if (!encoder) die("Unsupported codec.");
-
- info.codec_fourcc = encoder->fourcc;
- info.frame_width = (int)strtol(width_arg, NULL, 0);
- info.frame_height = (int)strtol(height_arg, NULL, 0);
- info.time_base.numerator = 1;
- info.time_base.denominator = fps;
-
- if (info.frame_width <= 0 || info.frame_height <= 0 ||
- (info.frame_width % 2) != 0 || (info.frame_height % 2) != 0) {
- die("Invalid frame size: %dx%d", info.frame_width, info.frame_height);
- }
-
- if (!aom_img_alloc(&raw, AOM_IMG_FMT_I420, info.frame_width,
- info.frame_height, 1)) {
- die("Failed to allocate image.");
- }
-
- keyframe_interval = (int)strtol(keyframe_interval_arg, NULL, 0);
- if (keyframe_interval < 0) die("Invalid keyframe interval value.");
-
- printf("Using %s\n", aom_codec_iface_name(encoder->codec_interface()));
-
- res = aom_codec_enc_config_default(encoder->codec_interface(), &cfg, 0);
- if (res) die_codec(&codec, "Failed to get default codec config.");
-
- cfg.g_w = info.frame_width;
- cfg.g_h = info.frame_height;
- cfg.g_timebase.num = info.time_base.numerator;
- cfg.g_timebase.den = info.time_base.denominator;
- cfg.rc_target_bitrate = bitrate;
- cfg.g_error_resilient = (aom_codec_er_flags_t)strtoul(argv[7], NULL, 0);
-
- writer = aom_video_writer_open(outfile_arg, kContainerIVF, &info);
- if (!writer) die("Failed to open %s for writing.", outfile_arg);
-
- if (!(infile = fopen(infile_arg, "rb")))
- die("Failed to open %s for reading.", infile_arg);
-
- if (aom_codec_enc_init(&codec, encoder->codec_interface(), &cfg, 0))
- die_codec(&codec, "Failed to initialize encoder");
-
- // Encode frames.
- while (aom_img_read(&raw, infile)) {
- int flags = 0;
- if (keyframe_interval > 0 && frame_count % keyframe_interval == 0)
- flags |= AOM_EFLAG_FORCE_KF;
- encode_frame(&codec, &raw, frame_count++, flags, writer);
- frames_encoded++;
- if (max_frames > 0 && frames_encoded >= max_frames) break;
- }
-
- // Flush encoder.
- while (encode_frame(&codec, NULL, -1, 0, writer)) continue;
-
- printf("\n");
- fclose(infile);
- printf("Processed %d frames.\n", frame_count);
-
- aom_img_free(&raw);
- if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec.");
-
- aom_video_writer_close(writer);
-
- return EXIT_SUCCESS;
-}
diff --git a/third_party/aom/examples/twopass_encoder.c b/third_party/aom/examples/twopass_encoder.c
deleted file mode 100644
index a03bc6cc2..000000000
--- a/third_party/aom/examples/twopass_encoder.c
+++ /dev/null
@@ -1,250 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-// Two Pass Encoder
-// ================
-//
-// This is an example of a two pass encoder loop. It takes an input file in
-// YV12 format, passes it through the encoder twice, and writes the compressed
-// frames to disk in IVF format. It builds upon the simple_encoder example.
-//
-// Twopass Variables
-// -----------------
-// Twopass mode needs to track the current pass number and the buffer of
-// statistics packets.
-//
-// Updating The Configuration
-// ---------------------------------
-// In two pass mode, the configuration has to be updated on each pass. The
-// statistics buffer is passed on the last pass.
-//
-// Encoding A Frame
-// ----------------
-// Encoding a frame in two pass mode is identical to the simple encoder
-// example.
-//
-// Processing Statistics Packets
-// -----------------------------
-// Each packet of type `AOM_CODEC_CX_FRAME_PKT` contains the encoded data
-// for this frame. We write a IVF frame header, followed by the raw data.
-//
-//
-// Pass Progress Reporting
-// -----------------------------
-// It's sometimes helpful to see when each pass completes.
-//
-//
-// Clean-up
-// -----------------------------
-// Destruction of the encoder instance must be done on each pass. The
-// raw image should be destroyed at the end as usual.
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "aom/aom_encoder.h"
-#include "common/tools_common.h"
-#include "common/video_writer.h"
-
-static const char *exec_name;
-
-void usage_exit(void) {
- fprintf(stderr,
- "Usage: %s <codec> <width> <height> <infile> <outfile> "
- "<limit(optional)>\n",
- exec_name);
- exit(EXIT_FAILURE);
-}
-
-static int get_frame_stats(aom_codec_ctx_t *ctx, const aom_image_t *img,
- aom_codec_pts_t pts, unsigned int duration,
- aom_enc_frame_flags_t flags,
- aom_fixed_buf_t *stats) {
- int got_pkts = 0;
- aom_codec_iter_t iter = NULL;
- const aom_codec_cx_pkt_t *pkt = NULL;
- const aom_codec_err_t res = aom_codec_encode(ctx, img, pts, duration, flags);
- if (res != AOM_CODEC_OK) die_codec(ctx, "Failed to get frame stats.");
-
- while ((pkt = aom_codec_get_cx_data(ctx, &iter)) != NULL) {
- got_pkts = 1;
-
- if (pkt->kind == AOM_CODEC_STATS_PKT) {
- const uint8_t *const pkt_buf = pkt->data.twopass_stats.buf;
- const size_t pkt_size = pkt->data.twopass_stats.sz;
- stats->buf = realloc(stats->buf, stats->sz + pkt_size);
- memcpy((uint8_t *)stats->buf + stats->sz, pkt_buf, pkt_size);
- stats->sz += pkt_size;
- }
- }
-
- return got_pkts;
-}
-
-static int encode_frame(aom_codec_ctx_t *ctx, const aom_image_t *img,
- aom_codec_pts_t pts, unsigned int duration,
- aom_enc_frame_flags_t flags, AvxVideoWriter *writer) {
- int got_pkts = 0;
- aom_codec_iter_t iter = NULL;
- const aom_codec_cx_pkt_t *pkt = NULL;
- const aom_codec_err_t res = aom_codec_encode(ctx, img, pts, duration, flags);
- if (res != AOM_CODEC_OK) die_codec(ctx, "Failed to encode frame.");
-
- while ((pkt = aom_codec_get_cx_data(ctx, &iter)) != NULL) {
- got_pkts = 1;
- if (pkt->kind == AOM_CODEC_CX_FRAME_PKT) {
- const int keyframe = (pkt->data.frame.flags & AOM_FRAME_IS_KEY) != 0;
-
- if (!aom_video_writer_write_frame(writer, pkt->data.frame.buf,
- pkt->data.frame.sz,
- pkt->data.frame.pts))
- die_codec(ctx, "Failed to write compressed frame.");
- printf(keyframe ? "K" : ".");
- fflush(stdout);
- }
- }
-
- return got_pkts;
-}
-
-static aom_fixed_buf_t pass0(aom_image_t *raw, FILE *infile,
- const AvxInterface *encoder,
- const aom_codec_enc_cfg_t *cfg, int limit) {
- aom_codec_ctx_t codec;
- int frame_count = 0;
- aom_fixed_buf_t stats = { NULL, 0 };
-
- if (aom_codec_enc_init(&codec, encoder->codec_interface(), cfg, 0))
- die_codec(&codec, "Failed to initialize encoder");
-
- // Calculate frame statistics.
- while (aom_img_read(raw, infile) && frame_count < limit) {
- ++frame_count;
- get_frame_stats(&codec, raw, frame_count, 1, 0, &stats);
- }
-
- // Flush encoder.
- while (get_frame_stats(&codec, NULL, frame_count, 1, 0, &stats)) {
- }
-
- printf("Pass 0 complete. Processed %d frames.\n", frame_count);
- if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec.");
-
- return stats;
-}
-
-static void pass1(aom_image_t *raw, FILE *infile, const char *outfile_name,
- const AvxInterface *encoder, const aom_codec_enc_cfg_t *cfg,
- int limit) {
- AvxVideoInfo info = { encoder->fourcc,
- cfg->g_w,
- cfg->g_h,
- { cfg->g_timebase.num, cfg->g_timebase.den },
- 0 };
- AvxVideoWriter *writer = NULL;
- aom_codec_ctx_t codec;
- int frame_count = 0;
-
- writer = aom_video_writer_open(outfile_name, kContainerIVF, &info);
- if (!writer) die("Failed to open %s for writing", outfile_name);
-
- if (aom_codec_enc_init(&codec, encoder->codec_interface(), cfg, 0))
- die_codec(&codec, "Failed to initialize encoder");
-
- // Encode frames.
- while (aom_img_read(raw, infile) && frame_count < limit) {
- ++frame_count;
- encode_frame(&codec, raw, frame_count, 1, 0, writer);
- }
-
- // Flush encoder.
- while (encode_frame(&codec, NULL, -1, 1, 0, writer)) {
- }
-
- printf("\n");
-
- if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec.");
-
- aom_video_writer_close(writer);
-
- printf("Pass 1 complete. Processed %d frames.\n", frame_count);
-}
-
-int main(int argc, char **argv) {
- FILE *infile = NULL;
- int w, h;
- aom_codec_ctx_t codec;
- aom_codec_enc_cfg_t cfg;
- aom_image_t raw;
- aom_codec_err_t res;
- aom_fixed_buf_t stats;
-
- const AvxInterface *encoder = NULL;
- const int fps = 30; // TODO(dkovalev) add command line argument
- const int bitrate = 200; // kbit/s TODO(dkovalev) add command line argument
- const char *const codec_arg = argv[1];
- const char *const width_arg = argv[2];
- const char *const height_arg = argv[3];
- const char *const infile_arg = argv[4];
- const char *const outfile_arg = argv[5];
- int limit = 0;
- exec_name = argv[0];
-
- if (argc < 6) die("Invalid number of arguments");
-
- if (argc > 6) limit = (int)strtol(argv[6], NULL, 0);
-
- if (limit == 0) limit = 100;
-
- encoder = get_aom_encoder_by_name(codec_arg);
- if (!encoder) die("Unsupported codec.");
-
- w = (int)strtol(width_arg, NULL, 0);
- h = (int)strtol(height_arg, NULL, 0);
-
- if (w <= 0 || h <= 0 || (w % 2) != 0 || (h % 2) != 0)
- die("Invalid frame size: %dx%d", w, h);
-
- if (!aom_img_alloc(&raw, AOM_IMG_FMT_I420, w, h, 1))
- die("Failed to allocate image", w, h);
-
- printf("Using %s\n", aom_codec_iface_name(encoder->codec_interface()));
-
- // Configuration
- res = aom_codec_enc_config_default(encoder->codec_interface(), &cfg, 0);
- if (res) die_codec(&codec, "Failed to get default codec config.");
-
- cfg.g_w = w;
- cfg.g_h = h;
- cfg.g_timebase.num = 1;
- cfg.g_timebase.den = fps;
- cfg.rc_target_bitrate = bitrate;
-
- if (!(infile = fopen(infile_arg, "rb")))
- die("Failed to open %s for reading", infile_arg);
-
- // Pass 0
- cfg.g_pass = AOM_RC_FIRST_PASS;
- stats = pass0(&raw, infile, encoder, &cfg, limit);
-
- // Pass 1
- rewind(infile);
- cfg.g_pass = AOM_RC_LAST_PASS;
- cfg.rc_twopass_stats_in = stats;
- pass1(&raw, infile, outfile_arg, encoder, &cfg, limit);
- free(stats.buf);
-
- aom_img_free(&raw);
- fclose(infile);
-
- return EXIT_SUCCESS;
-}
diff --git a/third_party/aom/keywords.dox b/third_party/aom/keywords.dox
deleted file mode 100644
index 56f536890..000000000
--- a/third_party/aom/keywords.dox
+++ /dev/null
@@ -1,51 +0,0 @@
-/*!\page rfc2119 RFC2119 Keywords
-
- The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL
- NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and
- "OPTIONAL" in this document are to be interpreted as described in
- <a href="http://www.ietf.org/rfc/rfc2119.txt">RFC 2119.</a>
-
-Specifically, the following definitions are used:
-
-\section MUST
-\anchor REQUIRED
-\anchor SHALL
- This word, or the terms "REQUIRED" or "SHALL", mean that the
- definition is an absolute requirement of the specification.
-
-\section MUSTNOT MUST NOT
-\anchor SHALLNOT
- This phrase, or the phrase "SHALL NOT", mean that the
- definition is an absolute prohibition of the specification.
-
-\section SHOULD
-\anchor RECOMMENDED
- This word, or the adjective "RECOMMENDED", mean that there
- may exist valid reasons in particular circumstances to ignore a
- particular item, but the full implications must be understood and
- carefully weighed before choosing a different course.
-
-\section SHOULDNOT SHOULD NOT
-\anchor NOTRECOMMENDED
- This phrase, or the phrase "NOT RECOMMENDED" mean that
- there may exist valid reasons in particular circumstances when the
- particular behavior is acceptable or even useful, but the full
- implications should be understood and the case carefully weighed
- before implementing any behavior described with this label.
-
-\section MAY
-\anchor OPTIONAL
- This word, or the adjective "OPTIONAL", mean that an item is
- truly optional. One vendor may choose to include the item because a
- particular marketplace requires it or because the vendor feels that
- it enhances the product while another vendor may omit the same item.
- An implementation which does not include a particular option \ref MUST be
- prepared to interoperate with another implementation which does
- include the option, though perhaps with reduced functionality. In the
- same vein an implementation which does include a particular option
- \ref MUST be prepared to interoperate with another implementation which
- does not include the option (except, of course, for the feature the
- option provides.)
-
-
-*/
diff --git a/third_party/aom/libs.doxy_template b/third_party/aom/libs.doxy_template
deleted file mode 100644
index c522e21d3..000000000
--- a/third_party/aom/libs.doxy_template
+++ /dev/null
@@ -1,1260 +0,0 @@
-## Copyright (c) 2016, Alliance for Open Media. All rights reserved
-##
-## This source code is subject to the terms of the BSD 2 Clause License and
-## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-## was not distributed with this source code in the LICENSE file, you can
-## obtain it at www.aomedia.org/license/software. If the Alliance for Open
-## Media Patent License 1.0 was not distributed with this source code in the
-## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-##
-
-# Doxyfile 1.5.4
-
-# This file describes the settings to be used by the documentation system
-# doxygen (www.doxygen.org) for a project
-#
-# All text after a hash (#) is considered a comment and will be ignored
-# The format is:
-# TAG = value [value, ...]
-# For lists items can also be appended using:
-# TAG += value [value, ...]
-# Values that contain spaces should be placed between quotes (" ")
-
-#---------------------------------------------------------------------------
-# Project related configuration options
-#---------------------------------------------------------------------------
-
-# This tag specifies the encoding used for all characters in the config file that
-# follow. The default is UTF-8 which is also the encoding used for all text before
-# the first occurrence of this tag. Doxygen uses libiconv (or the iconv built into
-# libc) for the transcoding. See http://www.gnu.org/software/libiconv for the list of
-# possible encodings.
-
-DOXYFILE_ENCODING = UTF-8
-
-# The PROJECT_NAME tag is a single word (or a sequence of words surrounded
-# by quotes) that should identify the project.
-
-PROJECT_NAME = "AOMedia Codec SDK"
-
-# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
-# base path where the generated documentation will be put.
-# If a relative path is entered, it will be relative to the location
-# where doxygen was started. If left blank the current directory will be used.
-
-OUTPUT_DIRECTORY = docs
-
-# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create
-# 4096 sub-directories (in 2 levels) under the output directory of each output
-# format and will distribute the generated files over these directories.
-# Enabling this option can be useful when feeding doxygen a huge amount of
-# source files, where putting all generated files in the same directory would
-# otherwise cause performance problems for the file system.
-
-CREATE_SUBDIRS = NO
-
-# The OUTPUT_LANGUAGE tag is used to specify the language in which all
-# documentation generated by doxygen is written. Doxygen will use this
-# information to generate all constant output in the proper language.
-# The default language is English, other supported languages are:
-# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional,
-# Croatian, Czech, Danish, Dutch, Finnish, French, German, Greek, Hungarian,
-# Italian, Japanese, Japanese-en (Japanese with English messages), Korean,
-# Korean-en, Lithuanian, Norwegian, Polish, Portuguese, Romanian, Russian,
-# Serbian, Slovak, Slovene, Spanish, Swedish, and Ukrainian.
-
-OUTPUT_LANGUAGE = English
-
-# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will
-# include brief member descriptions after the members that are listed in
-# the file and class documentation (similar to java_doc).
-# Set to NO to disable this.
-
-BRIEF_MEMBER_DESC = YES
-
-# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend
-# the brief description of a member or function before the detailed description.
-# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
-# brief descriptions will be completely suppressed.
-
-REPEAT_BRIEF = YES
-
-# This tag implements a quasi-intelligent brief description abbreviator
-# that is used to form the text in various listings. Each string
-# in this list, if found as the leading text of the brief description, will be
-# stripped from the text and the result after processing the whole list, is
-# used as the annotated text. Otherwise, the brief description is used as-is.
-# If left blank, the following values are used ("$name" is automatically
-# replaced with the name of the entity): "The $name class" "The $name widget"
-# "The $name file" "is" "provides" "specifies" "contains"
-# "represents" "a" "an" "the"
-
-ABBREVIATE_BRIEF =
-
-# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
-# Doxygen will generate a detailed section even if there is only a brief
-# description.
-
-ALWAYS_DETAILED_SEC = NO
-
-# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
-# inherited members of a class in the documentation of that class as if those
-# members were ordinary class members. Constructors, destructors and assignment
-# operators of the base classes will not be shown.
-
-INLINE_INHERITED_MEMB = NO
-
-# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full
-# path before files name in the file list and in the header files. If set
-# to NO the shortest path that makes the file name unique will be used.
-
-FULL_PATH_NAMES = YES
-
-# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag
-# can be used to strip a user-defined part of the path. Stripping is
-# only done if one of the specified strings matches the left-hand part of
-# the path. The tag can be used to show relative paths in the file list.
-# If left blank the directory from which doxygen is run is used as the
-# path to strip.
-
-STRIP_FROM_PATH =
-
-# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of
-# the path mentioned in the documentation of a class, which tells
-# the reader which header file to include in order to use a class.
-# If left blank only the name of the header file containing the class
-# definition is used. Otherwise one should specify the include paths that
-# are normally passed to the compiler using the -I flag.
-
-STRIP_FROM_INC_PATH =
-
-# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter
-# (but less readable) file names. This can be useful is your file systems
-# doesn't support long names like on DOS, Mac, or CD-ROM.
-
-SHORT_NAMES = NO
-
-# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen
-# will interpret the first line (until the first dot) of a java_doc-style
-# comment as the brief description. If set to NO, the java_doc
-# comments will behave just like regular Qt-style comments
-# (thus requiring an explicit @brief command for a brief description.)
-
-JAVADOC_AUTOBRIEF = NO
-
-# If the QT_AUTOBRIEF tag is set to YES then Doxygen will
-# interpret the first line (until the first dot) of a Qt-style
-# comment as the brief description. If set to NO, the comments
-# will behave just like regular Qt-style comments (thus requiring
-# an explicit \brief command for a brief description.)
-
-QT_AUTOBRIEF = NO
-
-# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen
-# treat a multi-line C++ special comment block (i.e. a block of //! or ///
-# comments) as a brief description. This used to be the default behaviour.
-# The new default is to treat a multi-line C++ comment block as a detailed
-# description. Set this tag to YES if you prefer the old behaviour instead.
-
-MULTILINE_CPP_IS_BRIEF = NO
-
-# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented
-# member inherits the documentation from any documented member that it
-# re-implements.
-
-INHERIT_DOCS = YES
-
-# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce
-# a new page for each member. If set to NO, the documentation of a member will
-# be part of the file/class/namespace that contains it.
-
-SEPARATE_MEMBER_PAGES = NO
-
-# The TAB_SIZE tag can be used to set the number of spaces in a tab.
-# Doxygen uses this value to replace tabs by spaces in code fragments.
-
-TAB_SIZE = 4
-
-# This tag can be used to specify a number of aliases that acts
-# as commands in the documentation. An alias has the form "name=value".
-# For example adding "sideeffect=\par Side Effects:\n" will allow you to
-# put the command \sideeffect (or @sideeffect) in the documentation, which
-# will result in a user-defined paragraph with heading "Side Effects:".
-# You can put \n's in the value part of an alias to insert newlines.
-
-ALIASES =
-
-# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C
-# sources only. Doxygen will then generate output that is more tailored for C.
-# For instance, some of the names that are used will be different. The list
-# of all members will be omitted, etc.
-
-OPTIMIZE_OUTPUT_FOR_C = YES
-
-# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java
-# sources only. Doxygen will then generate output that is more tailored for Java.
-# For instance, namespaces will be presented as packages, qualified scopes
-# will look different, etc.
-
-OPTIMIZE_OUTPUT_JAVA = NO
-
-# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want to
-# include (a tag file for) the STL sources as input, then you should
-# set this tag to YES in order to let doxygen match functions declarations and
-# definitions whose arguments contain STL classes (e.g. func(std::string); v.s.
-# func(std::string) {}). This also make the inheritance and collaboration
-# diagrams that involve STL classes more complete and accurate.
-
-BUILTIN_STL_SUPPORT = NO
-
-# If you use Microsoft's C++/CLI language, you should set this option to YES to
-# enable parsing support.
-
-CPP_CLI_SUPPORT = NO
-
-# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only.
-# Doxygen will parse them like normal C++ but will assume all classes use public
-# instead of private inheritance when no explicit protection keyword is present.
-
-SIP_SUPPORT = NO
-
-# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
-# tag is set to YES, then doxygen will reuse the documentation of the first
-# member in the group (if any) for the other members of the group. By default
-# all members of a group must be documented explicitly.
-
-DISTRIBUTE_GROUP_DOC = NO
-
-# Set the SUBGROUPING tag to YES (the defqault) to allow class member groups of
-# the same type (for instance a group of public functions) to be put as a
-# subgroup of that type (e.g. under the Public Functions section). Set it to
-# NO to prevent subgrouping. Alternatively, this can be done per class using
-# the \nosubgrouping command.
-
-SUBGROUPING = YES
-
-# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct (or union) is
-# documented as struct with the name of the typedef. So
-# typedef struct type_s {} type_t, will appear in the documentation as a struct
-# with name type_t. When disabled the typedef will appear as a member of a file,
-# namespace, or class. And the struct will be named type_s. This can typically
-# be useful for C code where the coding convention is that all structs are
-# typedef'ed and only the typedef is referenced never the struct's name.
-
-TYPEDEF_HIDES_STRUCT = NO
-
-#---------------------------------------------------------------------------
-# Build related configuration options
-#---------------------------------------------------------------------------
-
-# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in
-# documentation are documented, even if no documentation was available.
-# Private class members and static file members will be hidden unless
-# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES
-
-EXTRACT_ALL = NO
-
-# If the EXTRACT_PRIVATE tag is set to YES all private members of a class
-# will be included in the documentation.
-
-EXTRACT_PRIVATE = NO
-
-# If the EXTRACT_STATIC tag is set to YES all static members of a file
-# will be included in the documentation.
-
-EXTRACT_STATIC = NO
-
-# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs)
-# defined locally in source files will be included in the documentation.
-# If set to NO only classes defined in header files are included.
-
-EXTRACT_LOCAL_CLASSES = YES
-
-# This flag is only useful for Objective-C code. When set to YES local
-# methods, which are defined in the implementation section but not in
-# the interface are included in the documentation.
-# If set to NO (the default) only methods in the interface are included.
-
-EXTRACT_LOCAL_METHODS = NO
-
-# If this flag is set to YES, the members of anonymous namespaces will be extracted
-# and appear in the documentation as a namespace called 'anonymous_namespace{file}',
-# where file will be replaced with the base name of the file that contains the anonymous
-# namespace. By default anonymous namespace are hidden.
-
-EXTRACT_ANON_NSPACES = NO
-
-# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all
-# undocumented members of documented classes, files or namespaces.
-# If set to NO (the default) these members will be included in the
-# various overviews, but no documentation section is generated.
-# This option has no effect if EXTRACT_ALL is enabled.
-
-HIDE_UNDOC_MEMBERS = NO
-
-# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all
-# undocumented classes that are normally visible in the class hierarchy.
-# If set to NO (the default) these classes will be included in the various
-# overviews. This option has no effect if EXTRACT_ALL is enabled.
-
-HIDE_UNDOC_CLASSES = NO
-
-# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all
-# friend (class|struct|union) declarations.
-# If set to NO (the default) these declarations will be included in the
-# documentation.
-
-HIDE_FRIEND_COMPOUNDS = NO
-
-# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any
-# documentation blocks found inside the body of a function.
-# If set to NO (the default) these blocks will be appended to the
-# function's detailed documentation block.
-
-HIDE_IN_BODY_DOCS = NO
-
-# The INTERNAL_DOCS tag determines if documentation
-# that is typed after a \internal command is included. If the tag is set
-# to NO (the default) then the documentation will be excluded.
-# Set it to YES to include the internal documentation.
-
-INTERNAL_DOCS = NO
-
-# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate
-# file names in lower-case letters. If set to YES upper-case letters are also
-# allowed. This is useful if you have classes or files whose names only differ
-# in case and if your file system supports case sensitive file names. Windows
-# and Mac users are advised to set this option to NO.
-
-CASE_SENSE_NAMES = YES
-
-# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen
-# will show members with their full class and namespace scopes in the
-# documentation. If set to YES the scope will be hidden.
-
-HIDE_SCOPE_NAMES = NO
-
-# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen
-# will put a list of the files that are included by a file in the documentation
-# of that file.
-
-SHOW_INCLUDE_FILES = YES
-
-# If the INLINE_INFO tag is set to YES (the default) then a tag [inline]
-# is inserted in the documentation for inline members.
-
-INLINE_INFO = YES
-
-# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen
-# will sort the (detailed) documentation of file and class members
-# alphabetically by member name. If set to NO the members will appear in
-# declaration order.
-
-SORT_MEMBER_DOCS = NO
-
-# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the
-# brief documentation of file, namespace and class members alphabetically
-# by member name. If set to NO (the default) the members will appear in
-# declaration order.
-
-SORT_BRIEF_DOCS = NO
-
-# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be
-# sorted by fully-qualified names, including namespaces. If set to
-# NO (the default), the class list will be sorted only by class name,
-# not including the namespace part.
-# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
-# Note: This option applies only to the class list, not to the
-# alphabetical list.
-
-SORT_BY_SCOPE_NAME = NO
-
-# The GENERATE_TODOLIST tag can be used to enable (YES) or
-# disable (NO) the todo list. This list is created by putting \todo
-# commands in the documentation.
-
-GENERATE_TODOLIST = YES
-
-# The GENERATE_TESTLIST tag can be used to enable (YES) or
-# disable (NO) the test list. This list is created by putting \test
-# commands in the documentation.
-
-GENERATE_TESTLIST = YES
-
-# The GENERATE_BUGLIST tag can be used to enable (YES) or
-# disable (NO) the bug list. This list is created by putting \bug
-# commands in the documentation.
-
-GENERATE_BUGLIST = YES
-
-# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or
-# disable (NO) the deprecated list. This list is created by putting
-# \deprecated commands in the documentation.
-
-GENERATE_DEPRECATEDLIST= YES
-
-# The ENABLED_SECTIONS tag can be used to enable conditional
-# documentation sections, marked by \if sectionname ... \endif.
-
-ENABLED_SECTIONS =
-
-# The MAX_INITIALIZER_LINES tag determines the maximum number of lines
-# the initial value of a variable or define consists of for it to appear in
-# the documentation. If the initializer consists of more lines than specified
-# here it will be hidden. Use a value of 0 to hide initializers completely.
-# The appearance of the initializer of individual variables and defines in the
-# documentation can be controlled using \showinitializer or \hideinitializer
-# command in the documentation regardless of this setting.
-
-MAX_INITIALIZER_LINES = 30
-
-# Set the SHOW_USED_FILES tag to NO to disable the list of files generated
-# at the bottom of the documentation of classes and structs. If set to YES the
-# list will mention the files that were used to generate the documentation.
-
-SHOW_USED_FILES = YES
-
-# The FILE_VERSION_FILTER tag can be used to specify a program or script that
-# doxygen should invoke to get the current version for each file (typically from the
-# version control system). Doxygen will invoke the program by executing (via
-# popen()) the command <command> <input-file>, where <command> is the value of
-# the FILE_VERSION_FILTER tag, and <input-file> is the name of an input file
-# provided by doxygen. Whatever the program writes to standard output
-# is used as the file version. See the manual for examples.
-
-FILE_VERSION_FILTER =
-
-#---------------------------------------------------------------------------
-# configuration options related to warning and progress messages
-#---------------------------------------------------------------------------
-
-# The QUIET tag can be used to turn on/off the messages that are generated
-# by doxygen. Possible values are YES and NO. If left blank NO is used.
-
-QUIET = YES
-
-# The WARNINGS tag can be used to turn on/off the warning messages that are
-# generated by doxygen. Possible values are YES and NO. If left blank
-# NO is used.
-
-WARNINGS = YES
-
-# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings
-# for undocumented members. If EXTRACT_ALL is set to YES then this flag will
-# automatically be disabled.
-
-WARN_IF_UNDOCUMENTED = YES
-
-# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for
-# potential errors in the documentation, such as not documenting some
-# parameters in a documented function, or documenting parameters that
-# don't exist or using markup commands wrongly.
-
-WARN_IF_DOC_ERROR = YES
-
-# This WARN_NO_PARAMDOC option can be abled to get warnings for
-# functions that are documented, but have no documentation for their parameters
-# or return value. If set to NO (the default) doxygen will only warn about
-# wrong or incomplete parameter documentation, but not about the absence of
-# documentation.
-
-WARN_NO_PARAMDOC = NO
-
-# The WARN_FORMAT tag determines the format of the warning messages that
-# doxygen can produce. The string should contain the $file, $line, and $text
-# tags, which will be replaced by the file and line number from which the
-# warning originated and the warning text. Optionally the format may contain
-# $version, which will be replaced by the version of the file (if it could
-# be obtained via FILE_VERSION_FILTER)
-
-WARN_FORMAT = "$file:$line: $text"
-
-# The WARN_LOGFILE tag can be used to specify a file to which warning
-# and error messages should be written. If left blank the output is written
-# to stderr.
-
-WARN_LOGFILE =
-
-#---------------------------------------------------------------------------
-# configuration options related to the input files
-#---------------------------------------------------------------------------
-
-# The INPUT tag can be used to specify the files and/or directories that contain
-# documented source files. You may enter file names like "myfile.cpp" or
-# directories like "/usr/src/myproject". Separate the files or directories
-# with spaces.
-
-INPUT =
-
-# This tag can be used to specify the character encoding of the source files that
-# doxygen parses. Internally doxygen uses the UTF-8 encoding, which is also the default
-# input encoding. Doxygen uses libiconv (or the iconv built into libc) for the transcoding.
-# See http://www.gnu.org/software/libiconv for the list of possible encodings.
-
-INPUT_ENCODING = UTF-8
-
-# If the value of the INPUT tag contains directories, you can use the
-# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
-# and *.h) to filter out the source-files in the directories. If left
-# blank the following patterns are tested:
-# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx
-# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90
-
-FILE_PATTERNS =
-
-# The RECURSIVE tag can be used to turn specify whether or not subdirectories
-# should be searched for input files as well. Possible values are YES and NO.
-# If left blank NO is used.
-
-RECURSIVE = NO
-
-# The EXCLUDE tag can be used to specify files and/or directories that should
-# excluded from the INPUT source files. This way you can easily exclude a
-# subdirectory from a directory tree whose root is specified with the INPUT tag.
-
-EXCLUDE =
-
-# The EXCLUDE_SYMLINKS tag can be used select whether or not files or
-# directories that are symbolic links (a Unix filesystem feature) are excluded
-# from the input.
-
-EXCLUDE_SYMLINKS = NO
-
-# If the value of the INPUT tag contains directories, you can use the
-# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
-# certain files from those directories. Note that the wildcards are matched
-# against the file with absolute path, so to exclude all test directories
-# for example use the pattern */test/*
-
-EXCLUDE_PATTERNS =
-
-# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
-# (namespaces, classes, functions, etc.) that should be excluded from the output.
-# The symbol name can be a fully qualified name, a word, or if the wildcard * is used,
-# a substring. Examples: ANamespace, AClass, AClass::ANamespace, ANamespace::*Test
-
-EXCLUDE_SYMBOLS =
-
-# The EXAMPLE_PATH tag can be used to specify one or more files or
-# directories that contain example code fragments that are included (see
-# the \include command).
-
-EXAMPLE_PATH =
-
-# If the value of the EXAMPLE_PATH tag contains directories, you can use the
-# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
-# and *.h) to filter out the source-files in the directories. If left
-# blank all files are included.
-
-EXAMPLE_PATTERNS =
-
-# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
-# searched for input files to be used with the \include or \dontinclude
-# commands irrespective of the value of the RECURSIVE tag.
-# Possible values are YES and NO. If left blank NO is used.
-
-EXAMPLE_RECURSIVE = NO
-
-# The IMAGE_PATH tag can be used to specify one or more files or
-# directories that contain image that are included in the documentation (see
-# the \image command).
-
-IMAGE_PATH =
-
-# The INPUT_FILTER tag can be used to specify a program that doxygen should
-# invoke to filter for each input file. Doxygen will invoke the filter program
-# by executing (via popen()) the command <filter> <input-file>, where <filter>
-# is the value of the INPUT_FILTER tag, and <input-file> is the name of an
-# input file. Doxygen will then use the output that the filter program writes
-# to standard output. If FILTER_PATTERNS is specified, this tag will be
-# ignored.
-
-INPUT_FILTER =
-
-# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
-# basis. Doxygen will compare the file name with each pattern and apply the
-# filter if there is a match. The filters are a list of the form:
-# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further
-# info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER
-# is applied to all files.
-
-FILTER_PATTERNS =
-
-# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
-# INPUT_FILTER) will be used to filter the input files when producing source
-# files to browse (i.e. when SOURCE_BROWSER is set to YES).
-
-FILTER_SOURCE_FILES = NO
-
-#---------------------------------------------------------------------------
-# configuration options related to source browsing
-#---------------------------------------------------------------------------
-
-# If the SOURCE_BROWSER tag is set to YES then a list of source files will
-# be generated. Documented entities will be cross-referenced with these sources.
-# Note: To get rid of all source code in the generated output, make sure also
-# VERBATIM_HEADERS is set to NO. If you have enabled CALL_GRAPH or CALLER_GRAPH
-# then you must also enable this option. If you don't then doxygen will produce
-# a warning and turn it on anyway
-
-SOURCE_BROWSER = NO
-
-# Setting the INLINE_SOURCES tag to YES will include the body
-# of functions and classes directly in the documentation.
-
-INLINE_SOURCES = NO
-
-# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct
-# doxygen to hide any special comment blocks from generated source code
-# fragments. Normal C and C++ comments will always remain visible.
-
-STRIP_CODE_COMMENTS = YES
-
-# If the REFERENCED_BY_RELATION tag is set to YES (the default)
-# then for each documented function all documented
-# functions referencing it will be listed.
-
-REFERENCED_BY_RELATION = YES
-
-# If the REFERENCES_RELATION tag is set to YES (the default)
-# then for each documented function all documented entities
-# called/used by that function will be listed.
-
-REFERENCES_RELATION = YES
-
-# If the REFERENCES_LINK_SOURCE tag is set to YES (the default)
-# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from
-# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will
-# link to the source code. Otherwise they will link to the documentstion.
-
-REFERENCES_LINK_SOURCE = YES
-
-# If the USE_HTAGS tag is set to YES then the references to source code
-# will point to the HTML generated by the htags(1) tool instead of doxygen
-# built-in source browser. The htags tool is part of GNU's global source
-# tagging system (see http://www.gnu.org/software/global/global.html). You
-# will need version 4.8.6 or higher.
-
-USE_HTAGS = NO
-
-# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen
-# will generate a verbatim copy of the header file for each class for
-# which an include is specified. Set to NO to disable this.
-
-VERBATIM_HEADERS = YES
-
-#---------------------------------------------------------------------------
-# configuration options related to the alphabetical class index
-#---------------------------------------------------------------------------
-
-# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index
-# of all compounds will be generated. Enable this if the project
-# contains a lot of classes, structs, unions or interfaces.
-
-ALPHABETICAL_INDEX = NO
-
-# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then
-# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns
-# in which this list will be split (can be a number in the range [1..20])
-
-COLS_IN_ALPHA_INDEX = 5
-
-# In case all classes in a project start with a common prefix, all
-# classes will be put under the same header in the alphabetical index.
-# The IGNORE_PREFIX tag can be used to specify one or more prefixes that
-# should be ignored while generating the index headers.
-
-IGNORE_PREFIX =
-
-#---------------------------------------------------------------------------
-# configuration options related to the HTML output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_HTML tag is set to YES (the default) Doxygen will
-# generate HTML output.
-
-GENERATE_HTML = YES
-
-# The HTML_OUTPUT tag is used to specify where the HTML docs will be put.
-# If a relative path is entered the value of OUTPUT_DIRECTORY will be
-# put in front of it. If left blank `html' will be used as the default path.
-
-HTML_OUTPUT = html
-
-# The HTML_FILE_EXTENSION tag can be used to specify the file extension for
-# each generated HTML page (for example: .htm,.php,.asp). If it is left blank
-# doxygen will generate files with .html extension.
-
-HTML_FILE_EXTENSION = .html
-
-# The HTML_HEADER tag can be used to specify a personal HTML header for
-# each generated HTML page. If it is left blank doxygen will generate a
-# standard header.
-
-HTML_HEADER =
-
-# The HTML_FOOTER tag can be used to specify a personal HTML footer for
-# each generated HTML page. If it is left blank doxygen will generate a
-# standard footer.
-
-HTML_FOOTER =
-
-# The HTML_STYLESHEET tag can be used to specify a user-defined cascading
-# style sheet that is used by each HTML page. It can be used to
-# fine-tune the look of the HTML output. If the tag is left blank doxygen
-# will generate a default style sheet. Note that doxygen will try to copy
-# the style sheet file to the HTML output directory, so don't put your own
-# stylesheet in the HTML output directory as well, or it will be erased!
-
-HTML_STYLESHEET =
-
-# If the GENERATE_HTMLHELP tag is set to YES, additional index files
-# will be generated that can be used as input for tools like the
-# Microsoft HTML help workshop to generate a compressed HTML help file (.chm)
-# of the generated HTML documentation.
-
-GENERATE_HTMLHELP = NO
-
-# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
-# documentation will contain sections that can be hidden and shown after the
-# page has loaded. For this to work a browser that supports
-# java_script and DHTML is required (for instance Mozilla 1.0+, Firefox
-# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari).
-
-HTML_DYNAMIC_SECTIONS = NO
-
-# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can
-# be used to specify the file name of the resulting .chm file. You
-# can add a path in front of the file if the result should not be
-# written to the html output directory.
-
-CHM_FILE =
-
-# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can
-# be used to specify the location (absolute path including file name) of
-# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run
-# the HTML help compiler on the generated index.hhp.
-
-HHC_LOCATION =
-
-# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag
-# controls if a separate .chi index file is generated (YES) or that
-# it should be included in the master .chm file (NO).
-
-GENERATE_CHI = NO
-
-# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag
-# controls whether a binary table of contents is generated (YES) or a
-# normal table of contents (NO) in the .chm file.
-
-BINARY_TOC = NO
-
-# The TOC_EXPAND flag can be set to YES to add extra items for group members
-# to the contents of the HTML help documentation and to the tree view.
-
-TOC_EXPAND = NO
-
-# The DISABLE_INDEX tag can be used to turn on/off the condensed index at
-# top of each HTML page. The value NO (the default) enables the index and
-# the value YES disables it.
-
-DISABLE_INDEX = NO
-
-# This tag can be used to set the number of enum values (range [1..20])
-# that doxygen will group on one line in the generated HTML documentation.
-
-ENUM_VALUES_PER_LINE = 4
-
-# If the GENERATE_TREEVIEW tag is set to YES, a side panel will be
-# generated containing a tree-like index structure (just like the one that
-# is generated for HTML Help). For this to work a browser that supports
-# java_script, DHTML, CSS and frames is required (for instance Mozilla 1.0+,
-# Netscape 6.0+, Internet explorer 5.0+, or Konqueror). Windows users are
-# probably better off using the HTML help feature.
-
-GENERATE_TREEVIEW = NO
-
-# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be
-# used to set the initial width (in pixels) of the frame in which the tree
-# is shown.
-
-TREEVIEW_WIDTH = 250
-
-#---------------------------------------------------------------------------
-# configuration options related to the la_te_x output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will
-# generate Latex output.
-
-GENERATE_LATEX = YES
-
-# The LATEX_OUTPUT tag is used to specify where the la_te_x docs will be put.
-# If a relative path is entered the value of OUTPUT_DIRECTORY will be
-# put in front of it. If left blank `latex' will be used as the default path.
-
-LATEX_OUTPUT = latex
-
-# The LATEX_CMD_NAME tag can be used to specify the la_te_x command name to be
-# invoked. If left blank `latex' will be used as the default command name.
-
-LATEX_CMD_NAME = latex
-
-# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to
-# generate index for la_te_x. If left blank `makeindex' will be used as the
-# default command name.
-
-MAKEINDEX_CMD_NAME = makeindex
-
-# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact
-# la_te_x documents. This may be useful for small projects and may help to
-# save some trees in general.
-
-COMPACT_LATEX = YES
-
-# The PAPER_TYPE tag can be used to set the paper type that is used
-# by the printer. Possible values are: a4, a4wide, letter, legal and
-# executive. If left blank a4wide will be used.
-
-PAPER_TYPE = letter
-
-# The EXTRA_PACKAGES tag can be to specify one or more names of la_te_x
-# packages that should be included in the la_te_x output.
-
-EXTRA_PACKAGES =
-
-# The LATEX_HEADER tag can be used to specify a personal la_te_x header for
-# the generated latex document. The header should contain everything until
-# the first chapter. If it is left blank doxygen will generate a
-# standard header. Notice: only use this tag if you know what you are doing!
-
-LATEX_HEADER =
-
-# If the PDF_HYPERLINKS tag is set to YES, the la_te_x that is generated
-# is prepared for conversion to pdf (using ps2pdf). The pdf file will
-# contain links (just like the HTML output) instead of page references
-# This makes the output suitable for online browsing using a pdf viewer.
-
-PDF_HYPERLINKS = YES
-
-# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of
-# plain latex in the generated Makefile. Set this option to YES to get a
-# higher quality PDF documentation.
-
-USE_PDFLATEX = YES
-
-# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode.
-# command to the generated la_te_x files. This will instruct la_te_x to keep
-# running if errors occur, instead of asking the user for help.
-# This option is also used when generating formulas in HTML.
-
-LATEX_BATCHMODE = NO
-
-# If LATEX_HIDE_INDICES is set to YES then doxygen will not
-# include the index chapters (such as File Index, Compound Index, etc.)
-# in the output.
-
-LATEX_HIDE_INDICES = NO
-
-#---------------------------------------------------------------------------
-# configuration options related to the RTF output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output
-# The RTF output is optimized for Word 97 and may not look very pretty with
-# other RTF readers or editors.
-
-GENERATE_RTF = NO
-
-# The RTF_OUTPUT tag is used to specify where the RTF docs will be put.
-# If a relative path is entered the value of OUTPUT_DIRECTORY will be
-# put in front of it. If left blank `rtf' will be used as the default path.
-
-RTF_OUTPUT = rtf
-
-# If the COMPACT_RTF tag is set to YES Doxygen generates more compact
-# RTF documents. This may be useful for small projects and may help to
-# save some trees in general.
-
-COMPACT_RTF = NO
-
-# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated
-# will contain hyperlink fields. The RTF file will
-# contain links (just like the HTML output) instead of page references.
-# This makes the output suitable for online browsing using WORD or other
-# programs which support those fields.
-# Note: wordpad (write) and others do not support links.
-
-RTF_HYPERLINKS = NO
-
-# Load stylesheet definitions from file. Syntax is similar to doxygen's
-# config file, i.e. a series of assignments. You only have to provide
-# replacements, missing definitions are set to their default value.
-
-RTF_STYLESHEET_FILE =
-
-# Set optional variables used in the generation of an rtf document.
-# Syntax is similar to doxygen's config file.
-
-RTF_EXTENSIONS_FILE =
-
-#---------------------------------------------------------------------------
-# configuration options related to the man page output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_MAN tag is set to YES (the default) Doxygen will
-# generate man pages
-
-GENERATE_MAN = NO
-
-# The MAN_OUTPUT tag is used to specify where the man pages will be put.
-# If a relative path is entered the value of OUTPUT_DIRECTORY will be
-# put in front of it. If left blank `man' will be used as the default path.
-
-MAN_OUTPUT = man
-
-# The MAN_EXTENSION tag determines the extension that is added to
-# the generated man pages (default is the subroutine's section .3)
-
-MAN_EXTENSION = .3
-
-# If the MAN_LINKS tag is set to YES and Doxygen generates man output,
-# then it will generate one additional man file for each entity
-# documented in the real man page(s). These additional files
-# only source the real man page, but without them the man command
-# would be unable to find the correct page. The default is NO.
-
-MAN_LINKS = YES
-
-#---------------------------------------------------------------------------
-# configuration options for the auto_gen Definitions output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will
-# generate an auto_gen Definitions (see autogen.sf.net) file
-# that captures the structure of the code including all
-# documentation. Note that this feature is still experimental
-# and incomplete at the moment.
-
-GENERATE_AUTOGEN_DEF = NO
-
-#---------------------------------------------------------------------------
-# configuration options related to the Perl module output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_PERLMOD tag is set to YES Doxygen will
-# generate a Perl module file that captures the structure of
-# the code including all documentation. Note that this
-# feature is still experimental and incomplete at the
-# moment.
-
-GENERATE_PERLMOD = NO
-
-# If the PERLMOD_LATEX tag is set to YES Doxygen will generate
-# the necessary Makefile rules, Perl scripts and la_te_x code to be able
-# to generate PDF and DVI output from the Perl module output.
-
-PERLMOD_LATEX = NO
-
-# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be
-# nicely formatted so it can be parsed by a human reader. This is useful
-# if you want to understand what is going on. On the other hand, if this
-# tag is set to NO the size of the Perl module output will be much smaller
-# and Perl will parse it just the same.
-
-PERLMOD_PRETTY = YES
-
-# The names of the make variables in the generated doxyrules.make file
-# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX.
-# This is useful so different doxyrules.make files included by the same
-# Makefile don't overwrite each other's variables.
-
-PERLMOD_MAKEVAR_PREFIX =
-
-#---------------------------------------------------------------------------
-# Configuration options related to the preprocessor
-#---------------------------------------------------------------------------
-
-# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will
-# evaluate all C-preprocessor directives found in the sources and include
-# files.
-
-ENABLE_PREPROCESSING = YES
-
-# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro
-# names in the source code. If set to NO (the default) only conditional
-# compilation will be performed. Macro expansion can be done in a controlled
-# way by setting EXPAND_ONLY_PREDEF to YES.
-
-MACRO_EXPANSION = YES
-
-# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES
-# then the macro expansion is limited to the macros specified with the
-# PREDEFINED and EXPAND_AS_DEFINED tags.
-
-EXPAND_ONLY_PREDEF = NO
-
-# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files
-# in the INCLUDE_PATH (see below) will be search if a #include is found.
-
-SEARCH_INCLUDES = YES
-
-# The INCLUDE_PATH tag can be used to specify one or more directories that
-# contain include files that are not input files but should be processed by
-# the preprocessor.
-
-INCLUDE_PATH =
-
-# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
-# patterns (like *.h and *.hpp) to filter out the header-files in the
-# directories. If left blank, the patterns specified with FILE_PATTERNS will
-# be used.
-
-INCLUDE_FILE_PATTERNS = *.h
-
-# The PREDEFINED tag can be used to specify one or more macro names that
-# are defined before the preprocessor is started (similar to the -D option of
-# gcc). The argument of the tag is a list of macros of the form: name
-# or name=definition (no spaces). If the definition and the = are
-# omitted =1 is assumed. To prevent a macro definition from being
-# undefined via #undef or recursively expanded use the := operator
-# instead of the = operator.
-
-PREDEFINED =
-
-# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
-# this tag can be used to specify a list of macro names that should be expanded.
-# The macro definition that is found in the sources will be used.
-# Use the PREDEFINED tag if you want to use a different macro definition.
-
-EXPAND_AS_DEFINED =
-
-# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then
-# doxygen's preprocessor will remove all function-like macros that are alone
-# on a line, have an all uppercase name, and do not end with a semicolon. Such
-# function macros are typically used for boiler-plate code, and will confuse
-# the parser if not removed.
-
-SKIP_FUNCTION_MACROS = YES
-
-#---------------------------------------------------------------------------
-# Configuration::additions related to external references
-#---------------------------------------------------------------------------
-
-# The TAGFILES option can be used to specify one or more tagfiles.
-# Optionally an initial location of the external documentation
-# can be added for each tagfile. The format of a tag file without
-# this location is as follows:
-# TAGFILES = file1 file2 ...
-# Adding location for the tag files is done as follows:
-# TAGFILES = file1=loc1 "file2 = loc2" ...
-# where "loc1" and "loc2" can be relative or absolute paths or
-# URLs. If a location is present for each tag, the installdox tool
-# does not have to be run to correct the links.
-# Note that each tag file must have a unique name
-# (where the name does NOT include the path)
-# If a tag file is not located in the directory in which doxygen
-# is run, you must also specify the path to the tagfile here.
-
-TAGFILES =
-
-# When a file name is specified after GENERATE_TAGFILE, doxygen will create
-# a tag file that is based on the input files it reads.
-
-GENERATE_TAGFILE =
-
-# If the ALLEXTERNALS tag is set to YES all external classes will be listed
-# in the class index. If set to NO only the inherited external classes
-# will be listed.
-
-ALLEXTERNALS = NO
-
-# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed
-# in the modules index. If set to NO, only the current project's groups will
-# be listed.
-
-EXTERNAL_GROUPS = YES
-
-# The PERL_PATH should be the absolute path and name of the perl script
-# interpreter (i.e. the result of `which perl').
-
-PERL_PATH = /usr/bin/perl
-
-#---------------------------------------------------------------------------
-# Configuration options related to the dot tool
-#---------------------------------------------------------------------------
-
-# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will
-# generate a inheritance diagram (in HTML, RTF and la_te_x) for classes with base
-# or super classes. Setting the tag to NO turns the diagrams off. Note that
-# this option is superseded by the HAVE_DOT option below. This is only a
-# fallback. It is recommended to install and use dot, since it yields more
-# powerful graphs.
-
-CLASS_DIAGRAMS = YES
-
-# You can define message sequence charts within doxygen comments using the \msc
-# command. Doxygen will then run the mscgen tool (see http://www.mcternan.me.uk/mscgen/) to
-# produce the chart and insert it in the documentation. The MSCGEN_PATH tag allows you to
-# specify the directory where the mscgen tool resides. If left empty the tool is assumed to
-# be found in the default search path.
-
-MSCGEN_PATH =
-
-# If set to YES, the inheritance and collaboration graphs will hide
-# inheritance and usage relations if the target is undocumented
-# or is not a class.
-
-HIDE_UNDOC_RELATIONS = YES
-
-# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
-# available from the path. This tool is part of Graphviz, a graph visualization
-# toolkit from AT&T and Lucent Bell Labs. The other options in this section
-# have no effect if this option is set to NO (the default)
-
-HAVE_DOT = NO
-
-# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen
-# will generate a graph for each documented class showing the direct and
-# indirect inheritance relations. Setting this tag to YES will force the
-# the CLASS_DIAGRAMS tag to NO.
-
-CLASS_GRAPH = YES
-
-# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen
-# will generate a graph for each documented class showing the direct and
-# indirect implementation dependencies (inheritance, containment, and
-# class references variables) of the class with other documented classes.
-
-COLLABORATION_GRAPH = YES
-
-# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen
-# will generate a graph for groups, showing the direct groups dependencies
-
-GROUP_GRAPHS = YES
-
-# If the UML_LOOK tag is set to YES doxygen will generate inheritance and
-# collaboration diagrams in a style similar to the OMG's Unified Modeling
-# Language.
-
-UML_LOOK = NO
-
-# If set to YES, the inheritance and collaboration graphs will show the
-# relations between templates and their instances.
-
-TEMPLATE_RELATIONS = NO
-
-# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT
-# tags are set to YES then doxygen will generate a graph for each documented
-# file showing the direct and indirect include dependencies of the file with
-# other documented files.
-
-INCLUDE_GRAPH = YES
-
-# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and
-# HAVE_DOT tags are set to YES then doxygen will generate a graph for each
-# documented header file showing the documented files that directly or
-# indirectly include this file.
-
-INCLUDED_BY_GRAPH = YES
-
-# If the CALL_GRAPH, SOURCE_BROWSER and HAVE_DOT tags are set to YES then doxygen will
-# generate a call dependency graph for every global function or class method.
-# Note that enabling this option will significantly increase the time of a run.
-# So in most cases it will be better to enable call graphs for selected
-# functions only using the \callgraph command.
-
-CALL_GRAPH = NO
-
-# If the CALLER_GRAPH, SOURCE_BROWSER and HAVE_DOT tags are set to YES then doxygen will
-# generate a caller dependency graph for every global function or class method.
-# Note that enabling this option will significantly increase the time of a run.
-# So in most cases it will be better to enable caller graphs for selected
-# functions only using the \callergraph command.
-
-CALLER_GRAPH = NO
-
-# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen
-# will graphical hierarchy of all classes instead of a textual one.
-
-GRAPHICAL_HIERARCHY = YES
-
-# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES
-# then doxygen will show the dependencies a directory has on other directories
-# in a graphical way. The dependency relations are determined by the #include
-# relations between the files in the directories.
-
-DIRECTORY_GRAPH = YES
-
-# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
-# generated by dot. Possible values are png, jpg, or gif
-# If left blank png will be used.
-
-DOT_IMAGE_FORMAT = png
-
-# The tag DOT_PATH can be used to specify the path where the dot tool can be
-# found. If left blank, it is assumed the dot tool can be found in the path.
-
-DOT_PATH =
-
-# The DOTFILE_DIRS tag can be used to specify one or more directories that
-# contain dot files that are included in the documentation (see the
-# \dotfile command).
-
-DOTFILE_DIRS =
-
-# The MAX_DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of
-# nodes that will be shown in the graph. If the number of nodes in a graph
-# becomes larger than this value, doxygen will truncate the graph, which is
-# visualized by representing a node as a red box. Note that doxygen if the number
-# of direct children of the root node in a graph is already larger than
-# MAX_DOT_GRAPH_NOTES then the graph will not be shown at all. Also note
-# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
-
-DOT_GRAPH_MAX_NODES = 50
-
-# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the
-# graphs generated by dot. A depth value of 3 means that only nodes reachable
-# from the root by following a path via at most 3 edges will be shown. Nodes
-# that lay further from the root node will be omitted. Note that setting this
-# option to 1 or 2 may greatly reduce the computation time needed for large
-# code bases. Also note that the size of a graph can be further restricted by
-# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
-
-MAX_DOT_GRAPH_DEPTH = 0
-
-# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
-# background. This is disabled by default, which results in a white background.
-# Warning: Depending on the platform used, enabling this option may lead to
-# badly anti-aliased labels on the edges of a graph (i.e. they become hard to
-# read).
-
-DOT_TRANSPARENT = YES
-
-# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output
-# files in one run (i.e. multiple -o and -T options on the command line). This
-# makes dot run faster, but since only newer versions of dot (>1.8.10)
-# support this, this feature is disabled by default.
-
-DOT_MULTI_TARGETS = NO
-
-# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will
-# generate a legend page explaining the meaning of the various boxes and
-# arrows in the dot generated graphs.
-
-GENERATE_LEGEND = YES
-
-# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will
-# remove the intermediate dot files that are used to generate
-# the various graphs.
-
-DOT_CLEANUP = YES
-
-#---------------------------------------------------------------------------
-# Configuration::additions related to the search engine
-#---------------------------------------------------------------------------
-
-# The SEARCHENGINE tag specifies whether or not a search engine should be
-# used. If set to NO the values of all tags below this one will be ignored.
-
-SEARCHENGINE = NO
diff --git a/third_party/aom/mainpage.dox b/third_party/aom/mainpage.dox
deleted file mode 100644
index 03a299ae1..000000000
--- a/third_party/aom/mainpage.dox
+++ /dev/null
@@ -1,52 +0,0 @@
-/*!\mainpage AMedia Codec SDK
-
- \section main_contents Page Contents
- - \ref main_intro
- - \ref main_startpoints
- - \ref main_support
-
- \section main_intro Introduction
- Welcome to the AMedia Codec SDK. This SDK allows you to integrate your
- applications with the AOM and AV1 video codecs.
-
- This distribution of the AOMedia Codec SDK includes the following support:
-
- \if aom_encoder
- - \ref aom_encoder
- \endif
- \if aom_decoder
- - \ref aom_decoder
- \endif
-
-
- \section main_startpoints Starting Points
- - Consult the \ref changelog for a complete list of improvements in this
- release.
- - \ref readme contains instructions on compiling the sample applications.
- - Read the \ref usage "usage" for a narrative on codec usage.
- - Read the \ref samples "sample code" for examples of how to interact with the
- codec.
- - \ref codec reference
- \if encoder
- - \ref encoder reference
- \endif
- \if decoder
- - \ref decoder reference
- \endif
-
- \section main_support Support Options & FAQ
- The AOMedia project is an open source project supported by its community. For
- questions about this SDK, please mail the apps-devel@webmproject.org list.
- To contribute, see http://www.webmproject.org/code/contribute and mail
- codec-devel@webmproject.org.
-*/
-
-/*!\page changelog CHANGELOG
- \verbinclude CHANGELOG
-*/
-
-/*!\page readme README.md
- \include README.md
-*/
-
-/*!\defgroup codecs Supported Codecs */
diff --git a/third_party/aom/stats/aomstats.c b/third_party/aom/stats/aomstats.c
deleted file mode 100644
index 4a15adf02..000000000
--- a/third_party/aom/stats/aomstats.c
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "stats/aomstats.h"
-
-#include <math.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "common/tools_common.h"
-
-int stats_open_file(stats_io_t *stats, const char *fpf, int pass) {
- int res;
- stats->pass = pass;
-
- if (pass == 0) {
- stats->file = fopen(fpf, "wb");
- stats->buf.sz = 0;
- stats->buf.buf = NULL;
- res = (stats->file != NULL);
- } else {
- size_t nbytes;
-
- stats->file = fopen(fpf, "rb");
-
- if (stats->file == NULL) fatal("First-pass stats file does not exist!");
-
- if (fseek(stats->file, 0, SEEK_END))
- fatal("First-pass stats file must be seekable!");
-
- stats->buf.sz = stats->buf_alloc_sz = ftell(stats->file);
- rewind(stats->file);
-
- stats->buf.buf = malloc(stats->buf_alloc_sz);
-
- if (!stats->buf.buf)
- fatal("Failed to allocate first-pass stats buffer (%lu bytes)",
- (unsigned int)stats->buf_alloc_sz);
-
- nbytes = fread(stats->buf.buf, 1, stats->buf.sz, stats->file);
- res = (nbytes == stats->buf.sz);
- }
-
- return res;
-}
-
-int stats_open_mem(stats_io_t *stats, int pass) {
- int res;
- stats->pass = pass;
-
- if (!pass) {
- stats->buf.sz = 0;
- stats->buf_alloc_sz = 64 * 1024;
- stats->buf.buf = malloc(stats->buf_alloc_sz);
- }
-
- stats->buf_ptr = stats->buf.buf;
- res = (stats->buf.buf != NULL);
- return res;
-}
-
-void stats_close(stats_io_t *stats, int last_pass) {
- if (stats->file) {
- if (stats->pass == last_pass) {
- free(stats->buf.buf);
- }
-
- fclose(stats->file);
- stats->file = NULL;
- } else {
- if (stats->pass == last_pass) free(stats->buf.buf);
- }
-}
-
-void stats_write(stats_io_t *stats, const void *pkt, size_t len) {
- if (stats->file) {
- (void)fwrite(pkt, 1, len, stats->file);
- } else {
- if (stats->buf.sz + len > stats->buf_alloc_sz) {
- size_t new_sz = stats->buf_alloc_sz + 64 * 1024;
- char *new_ptr = realloc(stats->buf.buf, new_sz);
-
- if (new_ptr) {
- stats->buf_ptr = new_ptr + (stats->buf_ptr - (char *)stats->buf.buf);
- stats->buf.buf = new_ptr;
- stats->buf_alloc_sz = new_sz;
- } else {
- fatal("Failed to realloc firstpass stats buffer.");
- }
- }
-
- memcpy(stats->buf_ptr, pkt, len);
- stats->buf.sz += len;
- stats->buf_ptr += len;
- }
-}
-
-aom_fixed_buf_t stats_get(stats_io_t *stats) { return stats->buf; }
diff --git a/third_party/aom/stats/aomstats.h b/third_party/aom/stats/aomstats.h
deleted file mode 100644
index b9c71871a..000000000
--- a/third_party/aom/stats/aomstats.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_STATS_AOMSTATS_H_
-#define AOM_STATS_AOMSTATS_H_
-
-#include <stdio.h>
-
-#include "aom/aom_encoder.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* This structure is used to abstract the different ways of handling
- * first pass statistics
- */
-typedef struct {
- aom_fixed_buf_t buf;
- int pass;
- FILE *file;
- char *buf_ptr;
- size_t buf_alloc_sz;
-} stats_io_t;
-
-int stats_open_file(stats_io_t *stats, const char *fpf, int pass);
-int stats_open_mem(stats_io_t *stats, int pass);
-void stats_close(stats_io_t *stats, int last_pass);
-void stats_write(stats_io_t *stats, const void *pkt, size_t len);
-aom_fixed_buf_t stats_get(stats_io_t *stats);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_STATS_AOMSTATS_H_
diff --git a/third_party/aom/stats/rate_hist.c b/third_party/aom/stats/rate_hist.c
deleted file mode 100644
index 71eb78b72..000000000
--- a/third_party/aom/stats/rate_hist.c
+++ /dev/null
@@ -1,271 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "stats/rate_hist.h"
-
-#include <assert.h>
-#include <stdlib.h>
-#include <limits.h>
-#include <stdio.h>
-#include <math.h>
-
-#define RATE_BINS 100
-#define HIST_BAR_MAX 40
-
-struct hist_bucket {
- int low;
- int high;
- int count;
-};
-
-struct rate_hist {
- int64_t *pts;
- int *sz;
- int samples;
- int frames;
- struct hist_bucket bucket[RATE_BINS];
- int total;
-};
-
-struct rate_hist *init_rate_histogram(const aom_codec_enc_cfg_t *cfg,
- const aom_rational_t *fps) {
- int i;
- struct rate_hist *hist = malloc(sizeof(*hist));
-
- // Determine the number of samples in the buffer. Use the file's framerate
- // to determine the number of frames in rc_buf_sz milliseconds, with an
- // adjustment (5/4) to account for alt-refs
- hist->samples = cfg->rc_buf_sz * 5 / 4 * fps->num / fps->den / 1000;
-
- // prevent division by zero
- if (hist->samples == 0) hist->samples = 1;
-
- hist->frames = 0;
- hist->total = 0;
-
- hist->pts = calloc(hist->samples, sizeof(*hist->pts));
- hist->sz = calloc(hist->samples, sizeof(*hist->sz));
- for (i = 0; i < RATE_BINS; i++) {
- hist->bucket[i].low = INT_MAX;
- hist->bucket[i].high = 0;
- hist->bucket[i].count = 0;
- }
-
- return hist;
-}
-
-void destroy_rate_histogram(struct rate_hist *hist) {
- if (hist) {
- free(hist->pts);
- free(hist->sz);
- free(hist);
- }
-}
-
-void update_rate_histogram(struct rate_hist *hist,
- const aom_codec_enc_cfg_t *cfg,
- const aom_codec_cx_pkt_t *pkt) {
- int i;
- int64_t then = 0;
- int64_t avg_bitrate = 0;
- int64_t sum_sz = 0;
- const int64_t now = pkt->data.frame.pts * 1000 *
- (uint64_t)cfg->g_timebase.num /
- (uint64_t)cfg->g_timebase.den;
-
- int idx = hist->frames++ % hist->samples;
- hist->pts[idx] = now;
- hist->sz[idx] = (int)pkt->data.frame.sz;
-
- if (now < cfg->rc_buf_initial_sz) return;
-
- if (!cfg->rc_target_bitrate) return;
-
- then = now;
-
- /* Sum the size over the past rc_buf_sz ms */
- for (i = hist->frames; i > 0 && hist->frames - i < hist->samples; i--) {
- const int i_idx = (i - 1) % hist->samples;
-
- then = hist->pts[i_idx];
- if (now - then > cfg->rc_buf_sz) break;
- sum_sz += hist->sz[i_idx];
- }
-
- if (now == then) return;
-
- avg_bitrate = sum_sz * 8 * 1000 / (now - then);
- idx = (int)(avg_bitrate * (RATE_BINS / 2) / (cfg->rc_target_bitrate * 1000));
- if (idx < 0) idx = 0;
- if (idx > RATE_BINS - 1) idx = RATE_BINS - 1;
- if (hist->bucket[idx].low > avg_bitrate)
- hist->bucket[idx].low = (int)avg_bitrate;
- if (hist->bucket[idx].high < avg_bitrate)
- hist->bucket[idx].high = (int)avg_bitrate;
- hist->bucket[idx].count++;
- hist->total++;
-}
-
-static int merge_hist_buckets(struct hist_bucket *bucket, int max_buckets,
- int *num_buckets) {
- int small_bucket = 0, merge_bucket = INT_MAX, big_bucket = 0;
- int buckets = *num_buckets;
- int i;
-
- /* Find the extrema for this list of buckets */
- big_bucket = small_bucket = 0;
- for (i = 0; i < buckets; i++) {
- if (bucket[i].count < bucket[small_bucket].count) small_bucket = i;
- if (bucket[i].count > bucket[big_bucket].count) big_bucket = i;
- }
-
- /* If we have too many buckets, merge the smallest with an adjacent
- * bucket.
- */
- while (buckets > max_buckets) {
- int last_bucket = buckets - 1;
-
- /* merge the small bucket with an adjacent one. */
- if (small_bucket == 0)
- merge_bucket = 1;
- else if (small_bucket == last_bucket)
- merge_bucket = last_bucket - 1;
- else if (bucket[small_bucket - 1].count < bucket[small_bucket + 1].count)
- merge_bucket = small_bucket - 1;
- else
- merge_bucket = small_bucket + 1;
-
- assert(abs(merge_bucket - small_bucket) <= 1);
- assert(small_bucket < buckets);
- assert(big_bucket < buckets);
- assert(merge_bucket < buckets);
-
- if (merge_bucket < small_bucket) {
- bucket[merge_bucket].high = bucket[small_bucket].high;
- bucket[merge_bucket].count += bucket[small_bucket].count;
- } else {
- bucket[small_bucket].high = bucket[merge_bucket].high;
- bucket[small_bucket].count += bucket[merge_bucket].count;
- merge_bucket = small_bucket;
- }
-
- assert(bucket[merge_bucket].low != bucket[merge_bucket].high);
-
- buckets--;
-
- /* Remove the merge_bucket from the list, and find the new small
- * and big buckets while we're at it
- */
- big_bucket = small_bucket = 0;
- for (i = 0; i < buckets; i++) {
- if (i > merge_bucket) bucket[i] = bucket[i + 1];
-
- if (bucket[i].count < bucket[small_bucket].count) small_bucket = i;
- if (bucket[i].count > bucket[big_bucket].count) big_bucket = i;
- }
- }
-
- *num_buckets = buckets;
- return bucket[big_bucket].count;
-}
-
-static void show_histogram(const struct hist_bucket *bucket, int buckets,
- int total, int scale) {
- const char *pat1, *pat2;
- int i;
-
- switch ((int)(log(bucket[buckets - 1].high) / log(10)) + 1) {
- case 1:
- case 2:
- pat1 = "%4d %2s: ";
- pat2 = "%4d-%2d: ";
- break;
- case 3:
- pat1 = "%5d %3s: ";
- pat2 = "%5d-%3d: ";
- break;
- case 4:
- pat1 = "%6d %4s: ";
- pat2 = "%6d-%4d: ";
- break;
- case 5:
- pat1 = "%7d %5s: ";
- pat2 = "%7d-%5d: ";
- break;
- case 6:
- pat1 = "%8d %6s: ";
- pat2 = "%8d-%6d: ";
- break;
- case 7:
- pat1 = "%9d %7s: ";
- pat2 = "%9d-%7d: ";
- break;
- default:
- pat1 = "%12d %10s: ";
- pat2 = "%12d-%10d: ";
- break;
- }
-
- for (i = 0; i < buckets; i++) {
- int len;
- int j;
- float pct;
-
- pct = (float)(100.0 * bucket[i].count / total);
- len = HIST_BAR_MAX * bucket[i].count / scale;
- if (len < 1) len = 1;
- assert(len <= HIST_BAR_MAX);
-
- if (bucket[i].low == bucket[i].high)
- fprintf(stderr, pat1, bucket[i].low, "");
- else
- fprintf(stderr, pat2, bucket[i].low, bucket[i].high);
-
- for (j = 0; j < HIST_BAR_MAX; j++) fprintf(stderr, j < len ? "=" : " ");
- fprintf(stderr, "\t%5d (%6.2f%%)\n", bucket[i].count, pct);
- }
-}
-
-void show_q_histogram(const int counts[64], int max_buckets) {
- struct hist_bucket bucket[64];
- int buckets = 0;
- int total = 0;
- int scale;
- int i;
-
- for (i = 0; i < 64; i++) {
- if (counts[i]) {
- bucket[buckets].low = bucket[buckets].high = i;
- bucket[buckets].count = counts[i];
- buckets++;
- total += counts[i];
- }
- }
-
- fprintf(stderr, "\nQuantizer Selection:\n");
- scale = merge_hist_buckets(bucket, max_buckets, &buckets);
- show_histogram(bucket, buckets, total, scale);
-}
-
-void show_rate_histogram(struct rate_hist *hist, const aom_codec_enc_cfg_t *cfg,
- int max_buckets) {
- int i, scale;
- int buckets = 0;
-
- for (i = 0; i < RATE_BINS; i++) {
- if (hist->bucket[i].low == INT_MAX) continue;
- hist->bucket[buckets++] = hist->bucket[i];
- }
-
- fprintf(stderr, "\nRate (over %dms window):\n", cfg->rc_buf_sz);
- scale = merge_hist_buckets(hist->bucket, max_buckets, &buckets);
- show_histogram(hist->bucket, buckets, hist->total, scale);
-}
diff --git a/third_party/aom/stats/rate_hist.h b/third_party/aom/stats/rate_hist.h
deleted file mode 100644
index 55b8c5d43..000000000
--- a/third_party/aom/stats/rate_hist.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_STATS_RATE_HIST_H_
-#define AOM_STATS_RATE_HIST_H_
-
-#include "aom/aom_encoder.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct rate_hist;
-
-struct rate_hist *init_rate_histogram(const aom_codec_enc_cfg_t *cfg,
- const aom_rational_t *fps);
-
-void destroy_rate_histogram(struct rate_hist *hist);
-
-void update_rate_histogram(struct rate_hist *hist,
- const aom_codec_enc_cfg_t *cfg,
- const aom_codec_cx_pkt_t *pkt);
-
-void show_q_histogram(const int counts[64], int max_buckets);
-
-void show_rate_histogram(struct rate_hist *hist, const aom_codec_enc_cfg_t *cfg,
- int max_buckets);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // AOM_STATS_RATE_HIST_H_
diff --git a/third_party/aom/test/accounting_test.cc b/third_party/aom/test/accounting_test.cc
deleted file mode 100644
index 8b5c8af13..000000000
--- a/third_party/aom/test/accounting_test.cc
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <math.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "test/acm_random.h"
-#include "aom/aom_integer.h"
-#include "aom_dsp/bitreader.h"
-#include "aom_dsp/bitwriter.h"
-
-using libaom_test::ACMRandom;
-
-TEST(AV1, TestAccounting) {
- const int kBufferSize = 10000;
- const int kSymbols = 1024;
- aom_writer bw;
- uint8_t bw_buffer[kBufferSize];
- aom_start_encode(&bw, bw_buffer);
- for (int i = 0; i < kSymbols; i++) {
- aom_write(&bw, 0, 32);
- aom_write(&bw, 0, 32);
- aom_write(&bw, 0, 32);
- }
- aom_stop_encode(&bw);
- aom_reader br;
- aom_reader_init(&br, bw_buffer, bw.pos);
-
- Accounting accounting;
- aom_accounting_init(&accounting);
- br.accounting = &accounting;
- for (int i = 0; i < kSymbols; i++) {
- aom_read(&br, 32, "A");
- }
- // Consecutive symbols that are the same are coalesced.
- GTEST_ASSERT_EQ(accounting.syms.num_syms, 1);
- GTEST_ASSERT_EQ(accounting.syms.syms[0].samples, (unsigned int)kSymbols);
-
- aom_accounting_reset(&accounting);
- GTEST_ASSERT_EQ(accounting.syms.num_syms, 0);
-
- // Should record 2 * kSymbols accounting symbols.
- aom_reader_init(&br, bw_buffer, bw.pos);
- br.accounting = &accounting;
- for (int i = 0; i < kSymbols; i++) {
- aom_read(&br, 32, "A");
- aom_read(&br, 32, "B");
- aom_read(&br, 32, "B");
- }
- GTEST_ASSERT_EQ(accounting.syms.num_syms, kSymbols * 2);
- uint32_t tell_frac = aom_reader_tell_frac(&br);
- for (int i = 0; i < accounting.syms.num_syms; i++) {
- tell_frac -= accounting.syms.syms[i].bits;
- }
- GTEST_ASSERT_EQ(tell_frac, 0U);
-
- GTEST_ASSERT_EQ(aom_accounting_dictionary_lookup(&accounting, "A"),
- aom_accounting_dictionary_lookup(&accounting, "A"));
-
- // Check for collisions. The current aom_accounting_hash function returns
- // the same hash code for AB and BA.
- GTEST_ASSERT_NE(aom_accounting_dictionary_lookup(&accounting, "AB"),
- aom_accounting_dictionary_lookup(&accounting, "BA"));
-}
diff --git a/third_party/aom/test/acm_random.h b/third_party/aom/test/acm_random.h
deleted file mode 100644
index 0a8317fd5..000000000
--- a/third_party/aom/test/acm_random.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_TEST_ACM_RANDOM_H_
-#define AOM_TEST_ACM_RANDOM_H_
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "aom/aom_integer.h"
-
-namespace libaom_test {
-
-class ACMRandom {
- public:
- ACMRandom() : random_(DeterministicSeed()) {}
-
- explicit ACMRandom(int seed) : random_(seed) {}
-
- void Reset(int seed) { random_.Reseed(seed); }
-
- uint32_t Rand31(void) {
- return random_.Generate(testing::internal::Random::kMaxRange);
- }
-
- uint16_t Rand16(void) {
- const uint32_t value =
- random_.Generate(testing::internal::Random::kMaxRange);
- return (value >> 15) & 0xffff;
- }
-
- int16_t Rand15Signed(void) {
- const uint32_t value =
- random_.Generate(testing::internal::Random::kMaxRange);
- return (value >> 17) & 0xffff;
- }
-
- uint16_t Rand12(void) {
- const uint32_t value =
- random_.Generate(testing::internal::Random::kMaxRange);
- // There's a bit more entropy in the upper bits of this implementation.
- return (value >> 19) & 0xfff;
- }
-
- int16_t Rand9Signed(void) {
- // Use 9 bits: values between 255 (0x0FF) and -256 (0x100).
- const uint32_t value = random_.Generate(512);
- return static_cast<int16_t>(value) - 256;
- }
-
- uint8_t Rand8(void) {
- const uint32_t value =
- random_.Generate(testing::internal::Random::kMaxRange);
- // There's a bit more entropy in the upper bits of this implementation.
- return (value >> 23) & 0xff;
- }
-
- uint8_t Rand8Extremes(void) {
- // Returns a random value near 0 or near 255, to better exercise
- // saturation behavior.
- const uint8_t r = Rand8();
- return r < 128 ? r << 4 : r >> 4;
- }
-
- int PseudoUniform(int range) { return random_.Generate(range); }
-
- int operator()(int n) { return PseudoUniform(n); }
-
- static int DeterministicSeed(void) { return 0xbaba; }
-
- private:
- testing::internal::Random random_;
-};
-
-} // namespace libaom_test
-
-#endif // AOM_TEST_ACM_RANDOM_H_
diff --git a/third_party/aom/test/active_map_test.cc b/third_party/aom/test/active_map_test.cc
deleted file mode 100644
index a2b0546ed..000000000
--- a/third_party/aom/test/active_map_test.cc
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <climits>
-#include <vector>
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-#include "test/codec_factory.h"
-#include "test/encode_test_driver.h"
-#include "test/i420_video_source.h"
-#include "test/util.h"
-
-namespace {
-
-class ActiveMapTest
- : public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int>,
- public ::libaom_test::EncoderTest {
- protected:
- static const int kWidth = 208;
- static const int kHeight = 144;
-
- ActiveMapTest() : EncoderTest(GET_PARAM(0)) {}
- virtual ~ActiveMapTest() {}
-
- virtual void SetUp() {
- InitializeConfig();
- SetMode(GET_PARAM(1));
- cpu_used_ = GET_PARAM(2);
- }
-
- virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
- ::libaom_test::Encoder *encoder) {
- if (video->frame() == 1) {
- encoder->Control(AOME_SET_CPUUSED, cpu_used_);
- } else if (video->frame() == 3) {
- aom_active_map_t map = aom_active_map_t();
- /* clang-format off */
- uint8_t active_map[9 * 13] = {
- 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0,
- 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0,
- 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0,
- 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0,
- 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1,
- 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1,
- 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1,
- 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1,
- 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0,
- };
- /* clang-format on */
- map.cols = (kWidth + 15) / 16;
- map.rows = (kHeight + 15) / 16;
- ASSERT_EQ(map.cols, 13u);
- ASSERT_EQ(map.rows, 9u);
- map.active_map = active_map;
- encoder->Control(AOME_SET_ACTIVEMAP, &map);
- } else if (video->frame() == 15) {
- aom_active_map_t map = aom_active_map_t();
- map.cols = (kWidth + 15) / 16;
- map.rows = (kHeight + 15) / 16;
- map.active_map = NULL;
- encoder->Control(AOME_SET_ACTIVEMAP, &map);
- }
- }
-
- void DoTest() {
- // Validate that this non multiple of 64 wide clip encodes
- cfg_.g_lag_in_frames = 0;
- cfg_.rc_target_bitrate = 400;
- cfg_.rc_resize_mode = 0;
- cfg_.g_pass = AOM_RC_ONE_PASS;
- cfg_.rc_end_usage = AOM_CBR;
- cfg_.kf_max_dist = 90000;
- ::libaom_test::I420VideoSource video("hantro_odd.yuv", kWidth, kHeight, 30,
- 1, 0, 20);
-
- ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
- }
-
- int cpu_used_;
-};
-
-TEST_P(ActiveMapTest, Test) { DoTest(); }
-
-class ActiveMapTestLarge : public ActiveMapTest {};
-
-TEST_P(ActiveMapTestLarge, Test) { DoTest(); }
-
-AV1_INSTANTIATE_TEST_CASE(ActiveMapTestLarge,
- ::testing::Values(::libaom_test::kRealTime),
- ::testing::Range(0, 5));
-
-AV1_INSTANTIATE_TEST_CASE(ActiveMapTest,
- ::testing::Values(::libaom_test::kRealTime),
- ::testing::Range(5, 9));
-
-} // namespace
diff --git a/third_party/aom/test/altref_test.cc b/third_party/aom/test/altref_test.cc
deleted file mode 100644
index dabb1475a..000000000
--- a/third_party/aom/test/altref_test.cc
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-#include "test/codec_factory.h"
-#include "test/encode_test_driver.h"
-#include "test/i420_video_source.h"
-#include "test/util.h"
-namespace {
-
-class AltRefForcedKeyTestLarge
- : public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int>,
- public ::libaom_test::EncoderTest {
- protected:
- AltRefForcedKeyTestLarge()
- : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)),
- cpu_used_(GET_PARAM(2)), forced_kf_frame_num_(1), frame_num_(0) {}
- virtual ~AltRefForcedKeyTestLarge() {}
-
- virtual void SetUp() {
- InitializeConfig();
- SetMode(encoding_mode_);
- cfg_.rc_end_usage = AOM_VBR;
- cfg_.g_threads = 0;
- }
-
- virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
- ::libaom_test::Encoder *encoder) {
- if (video->frame() == 0) {
- encoder->Control(AOME_SET_CPUUSED, cpu_used_);
- encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
-#if CONFIG_AV1_ENCODER
- // override test default for tile columns if necessary.
- if (GET_PARAM(0) == &libaom_test::kAV1) {
- encoder->Control(AV1E_SET_TILE_COLUMNS, 6);
- }
-#endif
- }
- frame_flags_ =
- (video->frame() == forced_kf_frame_num_) ? AOM_EFLAG_FORCE_KF : 0;
- }
-
- virtual void FramePktHook(const aom_codec_cx_pkt_t *pkt) {
- if (frame_num_ == forced_kf_frame_num_) {
- ASSERT_TRUE(!!(pkt->data.frame.flags & AOM_FRAME_IS_KEY))
- << "Frame #" << frame_num_ << " isn't a keyframe!";
- }
- ++frame_num_;
- }
-
- ::libaom_test::TestMode encoding_mode_;
- int cpu_used_;
- unsigned int forced_kf_frame_num_;
- unsigned int frame_num_;
-};
-
-TEST_P(AltRefForcedKeyTestLarge, Frame1IsKey) {
- const aom_rational timebase = { 1, 30 };
- const int lag_values[] = { 3, 15, 25, -1 };
-
- forced_kf_frame_num_ = 1;
- for (int i = 0; lag_values[i] != -1; ++i) {
- frame_num_ = 0;
- cfg_.g_lag_in_frames = lag_values[i];
- libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
- timebase.den, timebase.num, 0, 30);
- ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
- }
-}
-
-TEST_P(AltRefForcedKeyTestLarge, ForcedFrameIsKey) {
- const aom_rational timebase = { 1, 30 };
- const int lag_values[] = { 3, 15, 25, -1 };
-
- for (int i = 0; lag_values[i] != -1; ++i) {
- frame_num_ = 0;
- forced_kf_frame_num_ = lag_values[i] - 1;
- cfg_.g_lag_in_frames = lag_values[i];
- libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
- timebase.den, timebase.num, 0, 30);
- ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
- }
-}
-
-AV1_INSTANTIATE_TEST_CASE(AltRefForcedKeyTestLarge,
- ::testing::Values(::libaom_test::kOnePassGood),
- ::testing::Values(2, 5));
-
-} // namespace
diff --git a/third_party/aom/test/aom_integer_test.cc b/third_party/aom/test/aom_integer_test.cc
deleted file mode 100644
index fe88a54e9..000000000
--- a/third_party/aom/test/aom_integer_test.cc
+++ /dev/null
@@ -1,177 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "aom/aom_integer.h"
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-namespace {
-const uint64_t kMaximumLeb128CodedSize = 8;
-const uint8_t kLeb128PadByte = 0x80; // Binary: 10000000
-const uint64_t kMaximumLeb128Value = UINT32_MAX;
-const uint32_t kSizeTestNumValues = 6;
-const uint32_t kSizeTestExpectedSizes[kSizeTestNumValues] = {
- 1, 1, 2, 3, 4, 5
-};
-const uint64_t kSizeTestInputs[kSizeTestNumValues] = {
- 0, 0x7f, 0x3fff, 0x1fffff, 0xffffff, 0x10000000
-};
-
-const uint8_t kOutOfRangeLeb128Value[5] = { 0x80, 0x80, 0x80, 0x80,
- 0x10 }; // UINT32_MAX + 1
-} // namespace
-
-TEST(AomLeb128, DecodeTest) {
- const size_t num_leb128_bytes = 3;
- const uint8_t leb128_bytes[num_leb128_bytes] = { 0xE5, 0x8E, 0x26 };
- const uint64_t expected_value = 0x98765; // 624485
- const size_t expected_length = 3;
- uint64_t value = ~0ULL; // make sure value is cleared by the function
- size_t length;
- ASSERT_EQ(
- aom_uleb_decode(&leb128_bytes[0], num_leb128_bytes, &value, &length), 0);
- ASSERT_EQ(expected_value, value);
- ASSERT_EQ(expected_length, length);
-
- // Make sure the decoder stops on the last marked LEB128 byte.
- aom_uleb_decode(&leb128_bytes[0], num_leb128_bytes + 1, &value, &length);
- ASSERT_EQ(expected_value, value);
- ASSERT_EQ(expected_length, length);
-}
-
-TEST(AomLeb128, EncodeTest) {
- const uint32_t test_value = 0x98765; // 624485
- const uint8_t expected_bytes[3] = { 0xE5, 0x8E, 0x26 };
- const size_t kWriteBufferSize = 4;
- uint8_t write_buffer[kWriteBufferSize] = { 0 };
- size_t bytes_written = 0;
- ASSERT_EQ(aom_uleb_encode(test_value, kWriteBufferSize, &write_buffer[0],
- &bytes_written),
- 0);
- ASSERT_EQ(bytes_written, 3u);
- for (size_t i = 0; i < bytes_written; ++i) {
- ASSERT_EQ(write_buffer[i], expected_bytes[i]);
- }
-}
-
-TEST(AomLeb128, EncodeDecodeTest) {
- const uint32_t value = 0x98765; // 624485
- const size_t kWriteBufferSize = 4;
- uint8_t write_buffer[kWriteBufferSize] = { 0 };
- size_t bytes_written = 0;
- ASSERT_EQ(aom_uleb_encode(value, kWriteBufferSize, &write_buffer[0],
- &bytes_written),
- 0);
- ASSERT_EQ(bytes_written, 3u);
- uint64_t decoded_value;
- size_t decoded_length;
- aom_uleb_decode(&write_buffer[0], bytes_written, &decoded_value,
- &decoded_length);
- ASSERT_EQ(value, decoded_value);
- ASSERT_EQ(bytes_written, decoded_length);
-}
-
-TEST(AomLeb128, FixedSizeEncodeTest) {
- const uint32_t test_value = 0x123;
- const uint8_t expected_bytes[4] = { 0xa3, 0x82, 0x80, 0x00 };
- const size_t kWriteBufferSize = 4;
- uint8_t write_buffer[kWriteBufferSize] = { 0 };
- size_t bytes_written = 0;
- ASSERT_EQ(0, aom_uleb_encode_fixed_size(test_value, kWriteBufferSize,
- kWriteBufferSize, &write_buffer[0],
- &bytes_written));
- ASSERT_EQ(kWriteBufferSize, bytes_written);
- for (size_t i = 0; i < bytes_written; ++i) {
- ASSERT_EQ(write_buffer[i], expected_bytes[i]);
- }
-}
-
-TEST(AomLeb128, FixedSizeEncodeDecodeTest) {
- const uint32_t value = 0x1;
- const size_t kWriteBufferSize = 4;
- uint8_t write_buffer[kWriteBufferSize] = { 0 };
- size_t bytes_written = 0;
- ASSERT_EQ(
- aom_uleb_encode_fixed_size(value, kWriteBufferSize, kWriteBufferSize,
- &write_buffer[0], &bytes_written),
- 0);
- ASSERT_EQ(bytes_written, 4u);
- uint64_t decoded_value;
- size_t decoded_length;
- aom_uleb_decode(&write_buffer[0], bytes_written, &decoded_value,
- &decoded_length);
- ASSERT_EQ(value, decoded_value);
- ASSERT_EQ(bytes_written, decoded_length);
-}
-
-TEST(AomLeb128, SizeTest) {
- for (size_t i = 0; i < kSizeTestNumValues; ++i) {
- ASSERT_EQ(kSizeTestExpectedSizes[i],
- aom_uleb_size_in_bytes(kSizeTestInputs[i]));
- }
-}
-
-TEST(AomLeb128, DecodeFailTest) {
- // Input buffer containing what would be a valid 9 byte LEB128 encoded
- // unsigned integer.
- const uint8_t kAllPadBytesBuffer[kMaximumLeb128CodedSize + 1] = {
- kLeb128PadByte, kLeb128PadByte, kLeb128PadByte,
- kLeb128PadByte, kLeb128PadByte, kLeb128PadByte,
- kLeb128PadByte, kLeb128PadByte, 0
- };
- uint64_t decoded_value;
-
- // Test that decode fails when result would be valid 9 byte integer.
- ASSERT_EQ(aom_uleb_decode(&kAllPadBytesBuffer[0], kMaximumLeb128CodedSize + 1,
- &decoded_value, NULL),
- -1);
-
- // Test that encoded value missing terminator byte within available buffer
- // range causes decode error.
- ASSERT_EQ(aom_uleb_decode(&kAllPadBytesBuffer[0], kMaximumLeb128CodedSize,
- &decoded_value, NULL),
- -1);
-
- // Test that LEB128 input that decodes to a value larger than 32-bits fails.
- size_t value_size = 0;
- ASSERT_EQ(aom_uleb_decode(&kOutOfRangeLeb128Value[0],
- sizeof(kOutOfRangeLeb128Value), &decoded_value,
- &value_size),
- -1);
-}
-
-TEST(AomLeb128, EncodeFailTest) {
- const size_t kWriteBufferSize = 4;
- const uint32_t kValidTestValue = 1;
- uint8_t write_buffer[kWriteBufferSize] = { 0 };
- size_t coded_size = 0;
- ASSERT_EQ(
- aom_uleb_encode(kValidTestValue, kWriteBufferSize, NULL, &coded_size),
- -1);
- ASSERT_EQ(aom_uleb_encode(kValidTestValue, kWriteBufferSize, &write_buffer[0],
- NULL),
- -1);
-
- const uint32_t kValueOutOfRangeForBuffer = 0xFFFFFFFF;
- ASSERT_EQ(aom_uleb_encode(kValueOutOfRangeForBuffer, kWriteBufferSize,
- &write_buffer[0], &coded_size),
- -1);
-
- const uint64_t kValueOutOfRange = kMaximumLeb128Value + 1;
- ASSERT_EQ(aom_uleb_encode(kValueOutOfRange, kWriteBufferSize,
- &write_buffer[0], &coded_size),
- -1);
-
- const size_t kPadSizeOutOfRange = 5;
- ASSERT_EQ(aom_uleb_encode_fixed_size(kValidTestValue, kWriteBufferSize,
- kPadSizeOutOfRange, &write_buffer[0],
- &coded_size),
- -1);
-}
diff --git a/third_party/aom/test/aomcx_set_ref.sh b/third_party/aom/test/aomcx_set_ref.sh
deleted file mode 100755
index f51b73c58..000000000
--- a/third_party/aom/test/aomcx_set_ref.sh
+++ /dev/null
@@ -1,58 +0,0 @@
-#!/bin/sh
-## Copyright (c) 2016, Alliance for Open Media. All rights reserved
-##
-## This source code is subject to the terms of the BSD 2 Clause License and
-## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-## was not distributed with this source code in the LICENSE file, you can
-## obtain it at www.aomedia.org/license/software. If the Alliance for Open
-## Media Patent License 1.0 was not distributed with this source code in the
-## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-##
-## This file tests the libaom aom_cx_set_ref example. To add new tests to this
-## file, do the following:
-## 1. Write a shell function (this is your test).
-## 2. Add the function to aom_cx_set_ref_tests (on a new line).
-##
-. $(dirname $0)/tools_common.sh
-
-# Environment check: $YUV_RAW_INPUT is required.
-aom_cx_set_ref_verify_environment() {
- if [ ! -e "${YUV_RAW_INPUT}" ]; then
- echo "Libaom test data must exist in LIBAOM_TEST_DATA_PATH."
- return 1
- fi
-}
-
-# Runs aom_cx_set_ref and updates the reference frame before encoding frame 90.
-# $1 is the codec name, which aom_cx_set_ref does not support at present: It's
-# currently used only to name the output file.
-# TODO(tomfinegan): Pass the codec param once the example is updated to support
-# AV1.
-aom_set_ref() {
- local encoder="${LIBAOM_BIN_PATH}/aom_cx_set_ref${AOM_TEST_EXE_SUFFIX}"
- local codec="$1"
- local output_file="${AOM_TEST_OUTPUT_DIR}/aom_cx_set_ref_${codec}.ivf"
- local ref_frame_num=4
- local limit=10
- if [ ! -x "${encoder}" ]; then
- elog "${encoder} does not exist or is not executable."
- return 1
- fi
-
- eval "${AOM_TEST_PREFIX}" "${encoder}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" \
- "${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" \
- "${ref_frame_num}" "${limit}" ${devnull}
-
- [ -e "${output_file}" ] || return 1
-}
-
-aom_cx_set_ref_av1() {
- if [ "$(av1_encode_available)" = "yes" ]; then
- aom_set_ref av1 || return 1
- fi
-}
-
-aom_cx_set_ref_tests="aom_cx_set_ref_av1"
-
-run_tests aom_cx_set_ref_verify_environment "${aom_cx_set_ref_tests}"
-
diff --git a/third_party/aom/test/aomdec.sh b/third_party/aom/test/aomdec.sh
deleted file mode 100755
index 927142287..000000000
--- a/third_party/aom/test/aomdec.sh
+++ /dev/null
@@ -1,147 +0,0 @@
-#!/bin/sh
-## Copyright (c) 2016, Alliance for Open Media. All rights reserved
-##
-## This source code is subject to the terms of the BSD 2 Clause License and
-## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-## was not distributed with this source code in the LICENSE file, you can
-## obtain it at www.aomedia.org/license/software. If the Alliance for Open
-## Media Patent License 1.0 was not distributed with this source code in the
-## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-##
-## This file tests aomdec. To add new tests to this file, do the following:
-## 1. Write a shell function (this is your test).
-## 2. Add the function to aomdec_tests (on a new line).
-##
-. $(dirname $0)/tools_common.sh
-
-# Environment check: Make sure input is available.
-aomdec_verify_environment() {
- if [ "$(av1_encode_available)" != "yes" ] ; then
- if [ ! -e "${AV1_IVF_FILE}" ] || \
- [ ! -e "${AV1_OBU_ANNEXB_FILE}" ] || \
- [ ! -e "${AV1_OBU_SEC5_FILE}" ] || \
- [ ! -e "${AV1_WEBM_FILE}" ]; then
- elog "Libaom test data must exist before running this test script when " \
- " encoding is disabled. "
- return 1
- fi
- fi
- if [ -z "$(aom_tool_path aomdec)" ]; then
- elog "aomdec not found. It must exist in LIBAOM_BIN_PATH or its parent."
- return 1
- fi
-}
-
-# Wrapper function for running aomdec with pipe input. Requires that
-# LIBAOM_BIN_PATH points to the directory containing aomdec. $1 is used as the
-# input file path and shifted away. All remaining parameters are passed through
-# to aomdec.
-aomdec_pipe() {
- local input="$1"
- shift
- if [ ! -e "${input}" ]; then
- elog "Input file ($input) missing in aomdec_pipe()"
- return 1
- fi
- cat "${file}" | aomdec - "$@" ${devnull}
-}
-
-
-# Wrapper function for running aomdec. Requires that LIBAOM_BIN_PATH points to
-# the directory containing aomdec. $1 one is used as the input file path and
-# shifted away. All remaining parameters are passed through to aomdec.
-aomdec() {
- local decoder="$(aom_tool_path aomdec)"
- local input="$1"
- shift
- eval "${AOM_TEST_PREFIX}" "${decoder}" "$input" "$@" ${devnull}
-}
-
-aomdec_can_decode_av1() {
- if [ "$(av1_decode_available)" = "yes" ]; then
- echo yes
- fi
-}
-
-aomdec_av1_ivf() {
- if [ "$(aomdec_can_decode_av1)" = "yes" ]; then
- local file="${AV1_IVF_FILE}"
- if [ ! -e "${file}" ]; then
- encode_yuv_raw_input_av1 "${file}" --ivf
- fi
- aomdec "${AV1_IVF_FILE}" --summary --noblit
- fi
-}
-
-aomdec_av1_ivf_error_resilient() {
- if [ "$(aomdec_can_decode_av1)" = "yes" ]; then
- local file="av1.error-resilient.ivf"
- if [ ! -e "${file}" ]; then
- encode_yuv_raw_input_av1 "${file}" --ivf --error-resilient=1
- fi
- aomdec "${file}" --summary --noblit
- fi
-}
-
-aomdec_av1_ivf_multithread() {
- if [ "$(aomdec_can_decode_av1)" = "yes" ]; then
- local file="${AV1_IVF_FILE}"
- if [ ! -e "${file}" ]; then
- encode_yuv_raw_input_av1 "${file}" --ivf
- fi
- for threads in 2 3 4 5 6 7 8; do
- aomdec "${file}" --summary --noblit --threads=$threads
- done
- fi
-}
-
-aomdec_aom_ivf_pipe_input() {
- if [ "$(aomdec_can_decode_av1)" = "yes" ]; then
- local file="${AV1_IVF_FILE}"
- if [ ! -e "${file}" ]; then
- encode_yuv_raw_input_av1 "${file}" --ivf
- fi
- aomdec_pipe "${AV1_IVF_FILE}" --summary --noblit
- fi
-}
-
-aomdec_av1_obu_annexb() {
- if [ "$(aomdec_can_decode_av1)" = "yes" ]; then
- local file="${AV1_OBU_ANNEXB_FILE}"
- if [ ! -e "${file}" ]; then
- encode_yuv_raw_input_av1 "${file}" --obu --annexb=1
- fi
- aomdec "${file}" --summary --noblit --annexb
- fi
-}
-
-aomdec_av1_obu_section5() {
- if [ "$(aomdec_can_decode_av1)" = "yes" ]; then
- local file="${AV1_OBU_SEC5_FILE}"
- if [ ! -e "${file}" ]; then
- encode_yuv_raw_input_av1 "${file}" --obu
- fi
- aomdec "${file}" --summary --noblit
- fi
-}
-
-aomdec_av1_webm() {
- if [ "$(aomdec_can_decode_av1)" = "yes" ] && \
- [ "$(webm_io_available)" = "yes" ]; then
- local file="${AV1_WEBM_FILE}"
- if [ ! -e "${file}" ]; then
- encode_yuv_raw_input_av1 "${file}"
- fi
- aomdec "${AV1_WEBM_FILE}" --summary --noblit
- fi
-}
-
-aomdec_tests="aomdec_av1_ivf
- aomdec_av1_ivf_error_resilient
- aomdec_av1_ivf_multithread
- aomdec_aom_ivf_pipe_input
- aomdec_av1_obu_annexb
- aomdec_av1_obu_section5
- aomdec_av1_webm"
-
-run_tests aomdec_verify_environment "${aomdec_tests}"
diff --git a/third_party/aom/test/aomenc.sh b/third_party/aom/test/aomenc.sh
deleted file mode 100755
index b030397a3..000000000
--- a/third_party/aom/test/aomenc.sh
+++ /dev/null
@@ -1,269 +0,0 @@
-#!/bin/sh
-## Copyright (c) 2016, Alliance for Open Media. All rights reserved
-##
-## This source code is subject to the terms of the BSD 2 Clause License and
-## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-## was not distributed with this source code in the LICENSE file, you can
-## obtain it at www.aomedia.org/license/software. If the Alliance for Open
-## Media Patent License 1.0 was not distributed with this source code in the
-## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-##
-## This file tests aomenc using hantro_collage_w352h288.yuv as input. To add
-## new tests to this file, do the following:
-## 1. Write a shell function (this is your test).
-## 2. Add the function to aomenc_tests (on a new line).
-##
-. $(dirname $0)/tools_common.sh
-
-# Environment check: Make sure input is available.
-aomenc_verify_environment() {
- if [ ! -e "${YUV_RAW_INPUT}" ]; then
- elog "The file ${YUV_RAW_INPUT##*/} must exist in LIBAOM_TEST_DATA_PATH."
- return 1
- fi
- if [ "$(aomenc_can_encode_av1)" = "yes" ]; then
- if [ ! -e "${Y4M_NOSQ_PAR_INPUT}" ]; then
- elog "The file ${Y4M_NOSQ_PAR_INPUT##*/} must exist in"
- elog "LIBAOM_TEST_DATA_PATH."
- return 1
- fi
- fi
- if [ -z "$(aom_tool_path aomenc)" ]; then
- elog "aomenc not found. It must exist in LIBAOM_BIN_PATH or its parent."
- return 1
- fi
-}
-
-aomenc_can_encode_av1() {
- if [ "$(av1_encode_available)" = "yes" ]; then
- echo yes
- fi
-}
-
-aomenc_can_encode_av1() {
- if [ "$(av1_encode_available)" = "yes" ]; then
- echo yes
- fi
-}
-
-# Utilities that echo aomenc input file parameters.
-y4m_input_non_square_par() {
- echo ""${Y4M_NOSQ_PAR_INPUT}""
-}
-
-y4m_input_720p() {
- echo ""${Y4M_720P_INPUT}""
-}
-
-# Wrapper function for running aomenc with pipe input. Requires that
-# LIBAOM_BIN_PATH points to the directory containing aomenc. $1 is used as the
-# input file path and shifted away. All remaining parameters are passed through
-# to aomenc.
-aomenc_pipe() {
- local encoder="$(aom_tool_path aomenc)"
- local input="$1"
- shift
- cat "${input}" | eval "${AOM_TEST_PREFIX}" "${encoder}" - \
- --test-decode=fatal \
- "$@" ${devnull}
-}
-
-# Wrapper function for running aomenc. Requires that LIBAOM_BIN_PATH points to
-# the directory containing aomenc. $1 one is used as the input file path and
-# shifted away. All remaining parameters are passed through to aomenc.
-aomenc() {
- local encoder="$(aom_tool_path aomenc)"
- local input="$1"
- shift
- eval "${AOM_TEST_PREFIX}" "${encoder}" "${input}" \
- --test-decode=fatal \
- "$@" ${devnull}
-}
-
-aomenc_av1_ivf() {
- if [ "$(aomenc_can_encode_av1)" = "yes" ]; then
- local output="${AV1_IVF_FILE}"
- if [ -e "${AV1_IVF_FILE}" ]; then
- output="${AOM_TEST_OUTPUT_DIR}/av1_test.ivf"
- fi
- aomenc $(yuv_raw_input) \
- $(aomenc_encode_test_fast_params) \
- --ivf \
- --output="${output}"
-
- if [ ! -e "${output}" ]; then
- elog "Output file does not exist."
- return 1
- fi
- fi
-}
-
-aomenc_av1_obu_annexb() {
- if [ "$(aomenc_can_encode_av1)" = "yes" ]; then
- local output="${AV1_OBU_ANNEXB_FILE}"
- if [ -e "${AV1_OBU_ANNEXB_FILE}" ]; then
- output="${AOM_TEST_OUTPUT_DIR}/av1_test.annexb.obu"
- fi
- aomenc $(yuv_raw_input) \
- $(aomenc_encode_test_fast_params) \
- --obu \
- --annexb=1 \
- --output="${output}"
-
- if [ ! -e "${output}" ]; then
- elog "Output file does not exist."
- return 1
- fi
- fi
-}
-
-aomenc_av1_obu_section5() {
- if [ "$(aomenc_can_encode_av1)" = "yes" ]; then
- local output="${AV1_OBU_SEC5_FILE}"
- if [ -e "${AV1_OBU_SEC5_FILE}" ]; then
- output="${AOM_TEST_OUTPUT_DIR}/av1_test.section5.obu"
- fi
- aomenc $(yuv_raw_input) \
- $(aomenc_encode_test_fast_params) \
- --obu \
- --output="${output}"
-
- if [ ! -e "${output}" ]; then
- elog "Output file does not exist."
- return 1
- fi
- fi
-}
-
-aomenc_av1_webm() {
- if [ "$(aomenc_can_encode_av1)" = "yes" ] && \
- [ "$(webm_io_available)" = "yes" ]; then
- local output="${AV1_WEBM_FILE}"
- if [ -e "${AV1_WEBM_FILE}" ]; then
- output="${AOM_TEST_OUTPUT_DIR}/av1_test.webm"
- fi
- aomenc $(yuv_raw_input) \
- $(aomenc_encode_test_fast_params) \
- --output="${output}"
-
- if [ ! -e "${output}" ]; then
- elog "Output file does not exist."
- return 1
- fi
- fi
-}
-
-aomenc_av1_webm_1pass() {
- if [ "$(aomenc_can_encode_av1)" = "yes" ] && \
- [ "$(webm_io_available)" = "yes" ]; then
- local output="${AOM_TEST_OUTPUT_DIR}/av1_test.webm"
- aomenc $(yuv_raw_input) \
- $(aomenc_encode_test_fast_params) \
- --passes=1 \
- --output="${output}"
-
- if [ ! -e "${output}" ]; then
- elog "Output file does not exist."
- return 1
- fi
- fi
-}
-
-aomenc_av1_ivf_lossless() {
- if [ "$(aomenc_can_encode_av1)" = "yes" ]; then
- local output="${AOM_TEST_OUTPUT_DIR}/av1_lossless.ivf"
- aomenc $(yuv_raw_input) \
- $(aomenc_encode_test_fast_params) \
- --ivf \
- --output="${output}" \
- --lossless=1
-
- if [ ! -e "${output}" ]; then
- elog "Output file does not exist."
- return 1
- fi
- fi
-}
-
-aomenc_av1_ivf_minq0_maxq0() {
- if [ "$(aomenc_can_encode_av1)" = "yes" ]; then
- local output="${AOM_TEST_OUTPUT_DIR}/av1_lossless_minq0_maxq0.ivf"
- aomenc $(yuv_raw_input) \
- $(aomenc_encode_test_fast_params) \
- --ivf \
- --output="${output}" \
- --min-q=0 \
- --max-q=0
-
- if [ ! -e "${output}" ]; then
- elog "Output file does not exist."
- return 1
- fi
- fi
-}
-
-aomenc_av1_webm_lag5_frames10() {
- if [ "$(aomenc_can_encode_av1)" = "yes" ] && \
- [ "$(webm_io_available)" = "yes" ]; then
- local lag_total_frames=10
- local lag_frames=5
- local output="${AOM_TEST_OUTPUT_DIR}/av1_lag5_frames10.webm"
- aomenc $(yuv_raw_input) \
- $(aomenc_encode_test_fast_params) \
- --limit=${lag_total_frames} \
- --lag-in-frames=${lag_frames} \
- --output="${output}"
-
- if [ ! -e "${output}" ]; then
- elog "Output file does not exist."
- return 1
- fi
- fi
-}
-
-# TODO(fgalligan): Test that DisplayWidth is different than video width.
-aomenc_av1_webm_non_square_par() {
- if [ "$(aomenc_can_encode_av1)" = "yes" ] && \
- [ "$(webm_io_available)" = "yes" ]; then
- local output="${AOM_TEST_OUTPUT_DIR}/av1_non_square_par.webm"
- aomenc $(y4m_input_non_square_par) \
- $(aomenc_encode_test_fast_params) \
- --output="${output}"
-
- if [ ! -e "${output}" ]; then
- elog "Output file does not exist."
- return 1
- fi
- fi
-}
-
-aomenc_av1_webm_cdf_update_mode() {
- if [ "$(aomenc_can_encode_av1)" = "yes" ] && \
- [ "$(webm_io_available)" = "yes" ]; then
- for mode in 0 1 2; do
- local output="${AOM_TEST_OUTPUT_DIR}/cdf_mode_${mode}.webm"
- aomenc $(yuv_raw_input) \
- $(aomenc_encode_test_fast_params) \
- --cdf-update-mode=${mode} \
- --output="${output}"
-
- if [ ! -e "${output}" ]; then
- elog "Output file does not exist."
- return 1
- fi
- done
- fi
-}
-
-aomenc_tests="aomenc_av1_ivf
- aomenc_av1_obu_annexb
- aomenc_av1_obu_section5
- aomenc_av1_webm
- aomenc_av1_webm_1pass
- aomenc_av1_ivf_lossless
- aomenc_av1_ivf_minq0_maxq0
- aomenc_av1_webm_lag5_frames10
- aomenc_av1_webm_non_square_par
- aomenc_av1_webm_cdf_update_mode"
-
-run_tests aomenc_verify_environment "${aomenc_tests}"
diff --git a/third_party/aom/test/aq_segment_test.cc b/third_party/aom/test/aq_segment_test.cc
deleted file mode 100644
index bbb5027d4..000000000
--- a/third_party/aom/test/aq_segment_test.cc
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "config/aom_config.h"
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-#include "test/codec_factory.h"
-#include "test/encode_test_driver.h"
-#include "test/i420_video_source.h"
-#include "test/util.h"
-
-namespace {
-
-class AqSegmentTest
- : public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int>,
- public ::libaom_test::EncoderTest {
- protected:
- AqSegmentTest() : EncoderTest(GET_PARAM(0)) {}
- virtual ~AqSegmentTest() {}
-
- virtual void SetUp() {
- InitializeConfig();
- SetMode(GET_PARAM(1));
- set_cpu_used_ = GET_PARAM(2);
- aq_mode_ = 0;
- }
-
- virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
- ::libaom_test::Encoder *encoder) {
- if (video->frame() == 1) {
- encoder->Control(AOME_SET_CPUUSED, set_cpu_used_);
- encoder->Control(AV1E_SET_AQ_MODE, aq_mode_);
- encoder->Control(AV1E_SET_DELTAQ_MODE, deltaq_mode_);
- encoder->Control(AOME_SET_MAX_INTRA_BITRATE_PCT, 100);
- }
- }
-
- void DoTest(int aq_mode) {
- aq_mode_ = aq_mode;
- deltaq_mode_ = 0;
- cfg_.kf_max_dist = 12;
- cfg_.rc_min_quantizer = 8;
- cfg_.rc_max_quantizer = 56;
- cfg_.rc_end_usage = AOM_CBR;
- cfg_.g_lag_in_frames = 6;
- cfg_.rc_buf_initial_sz = 500;
- cfg_.rc_buf_optimal_sz = 500;
- cfg_.rc_buf_sz = 1000;
- cfg_.rc_target_bitrate = 300;
- ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352,
- 288, 30, 1, 0, 15);
- ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
- }
-
- int set_cpu_used_;
- int aq_mode_;
- int deltaq_mode_;
-};
-
-// Validate that this AQ segmentation mode (AQ=1, variance_ap)
-// encodes and decodes without a mismatch.
-TEST_P(AqSegmentTest, TestNoMisMatchAQ1) { DoTest(1); }
-
-// Validate that this AQ segmentation mode (AQ=2, complexity_aq)
-// encodes and decodes without a mismatch.
-TEST_P(AqSegmentTest, TestNoMisMatchAQ2) { DoTest(2); }
-
-// Validate that this AQ segmentation mode (AQ=3, cyclic_refresh_aq)
-// encodes and decodes without a mismatch.
-TEST_P(AqSegmentTest, TestNoMisMatchAQ3) { DoTest(3); }
-
-class AqSegmentTestLarge : public AqSegmentTest {};
-
-TEST_P(AqSegmentTestLarge, TestNoMisMatchAQ1) { DoTest(1); }
-
-TEST_P(AqSegmentTestLarge, TestNoMisMatchAQ2) { DoTest(2); }
-
-TEST_P(AqSegmentTestLarge, TestNoMisMatchAQ3) { DoTest(3); }
-
-// Validate that this delta q mode
-// encodes and decodes without a mismatch.
-TEST_P(AqSegmentTest, TestNoMisMatchExtDeltaQ) {
- cfg_.rc_end_usage = AOM_CQ;
- aq_mode_ = 0;
- deltaq_mode_ = 2;
- ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
- 30, 1, 0, 15);
-
- ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-}
-
-AV1_INSTANTIATE_TEST_CASE(AqSegmentTest,
- ::testing::Values(::libaom_test::kRealTime,
- ::libaom_test::kOnePassGood),
- ::testing::Range(5, 9));
-AV1_INSTANTIATE_TEST_CASE(AqSegmentTestLarge,
- ::testing::Values(::libaom_test::kRealTime,
- ::libaom_test::kOnePassGood),
- ::testing::Range(3, 5));
-} // namespace
diff --git a/third_party/aom/test/arf_freq_test.cc b/third_party/aom/test/arf_freq_test.cc
deleted file mode 100644
index 083f4022f..000000000
--- a/third_party/aom/test/arf_freq_test.cc
+++ /dev/null
@@ -1,223 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "test/codec_factory.h"
-#include "test/encode_test_driver.h"
-#include "test/util.h"
-#include "test/y4m_video_source.h"
-#include "test/yuv_video_source.h"
-#include "av1/encoder/ratectrl.h"
-
-namespace {
-
-const unsigned int kFrames = 100;
-const int kBitrate = 500;
-
-#define ARF_NOT_SEEN 1000001
-#define ARF_SEEN_ONCE 1000000
-
-typedef struct {
- const char *filename;
- unsigned int width;
- unsigned int height;
- unsigned int framerate_num;
- unsigned int framerate_den;
- unsigned int input_bit_depth;
- aom_img_fmt fmt;
- aom_bit_depth_t bit_depth;
- unsigned int profile;
-} TestVideoParam;
-
-typedef struct {
- libaom_test::TestMode mode;
- int cpu_used;
-} TestEncodeParam;
-
-const TestVideoParam kTestVectors[] = {
- // artificially increase framerate to trigger default check
- { "hantro_collage_w352h288.yuv", 352, 288, 5000, 1, 8, AOM_IMG_FMT_I420,
- AOM_BITS_8, 0 },
- { "hantro_collage_w352h288.yuv", 352, 288, 30, 1, 8, AOM_IMG_FMT_I420,
- AOM_BITS_8, 0 },
- { "rush_hour_444.y4m", 352, 288, 30, 1, 8, AOM_IMG_FMT_I444, AOM_BITS_8, 1 },
- // Add list of profile 2/3 test videos here ...
-};
-
-const TestEncodeParam kEncodeVectors[] = {
- { ::libaom_test::kOnePassGood, 2 }, { ::libaom_test::kOnePassGood, 5 },
- { ::libaom_test::kTwoPassGood, 1 }, { ::libaom_test::kTwoPassGood, 2 },
- { ::libaom_test::kTwoPassGood, 5 }, { ::libaom_test::kRealTime, 5 },
-};
-
-const int kMinArfVectors[] = {
- // NOTE: 0 refers to the default built-in logic in:
- // av1_rc_get_default_min_gf_interval(...)
- 0, 4, 8, 12, 15
-};
-
-int is_extension_y4m(const char *filename) {
- const char *dot = strrchr(filename, '.');
- if (!dot || dot == filename)
- return 0;
- else
- return !strcmp(dot, ".y4m");
-}
-
-class ArfFreqTestLarge
- : public ::libaom_test::CodecTestWith3Params<TestVideoParam,
- TestEncodeParam, int>,
- public ::libaom_test::EncoderTest {
- protected:
- ArfFreqTestLarge()
- : EncoderTest(GET_PARAM(0)), test_video_param_(GET_PARAM(1)),
- test_encode_param_(GET_PARAM(2)), min_arf_requested_(GET_PARAM(3)) {}
-
- virtual ~ArfFreqTestLarge() {}
-
- virtual void SetUp() {
- InitializeConfig();
- SetMode(test_encode_param_.mode);
- if (test_encode_param_.mode != ::libaom_test::kRealTime) {
- cfg_.g_lag_in_frames = 25;
- cfg_.rc_end_usage = AOM_VBR;
- } else {
- cfg_.g_lag_in_frames = 0;
- cfg_.rc_end_usage = AOM_CBR;
- cfg_.rc_buf_sz = 1000;
- cfg_.rc_buf_initial_sz = 500;
- cfg_.rc_buf_optimal_sz = 600;
- }
- }
-
- virtual void BeginPassHook(unsigned int) {
- min_run_ = ARF_NOT_SEEN;
- run_of_visible_frames_ = 0;
- }
-
- int GetNumFramesInPkt(const aom_codec_cx_pkt_t *pkt) {
- const uint8_t *buffer = reinterpret_cast<uint8_t *>(pkt->data.frame.buf);
- const uint8_t marker = buffer[pkt->data.frame.sz - 1];
- const int mag = ((marker >> 3) & 3) + 1;
- int frames = (marker & 0x7) + 1;
- const unsigned int index_sz = 2 + mag * frames;
- // Check for superframe or not.
- // Assume superframe has only one visible frame, the rest being
- // invisible. If superframe index is not found, then there is only
- // one frame.
- if (!((marker & 0xe0) == 0xc0 && pkt->data.frame.sz >= index_sz &&
- buffer[pkt->data.frame.sz - index_sz] == marker)) {
- frames = 1;
- }
- return frames;
- }
-
- virtual void FramePktHook(const aom_codec_cx_pkt_t *pkt) {
- if (pkt->kind != AOM_CODEC_CX_FRAME_PKT) return;
- const int frames = GetNumFramesInPkt(pkt);
- if (frames == 1) {
- run_of_visible_frames_++;
- } else if (frames == 2) {
- if (min_run_ == ARF_NOT_SEEN) {
- min_run_ = ARF_SEEN_ONCE;
- } else if (min_run_ == ARF_SEEN_ONCE ||
- run_of_visible_frames_ < min_run_) {
- min_run_ = run_of_visible_frames_;
- }
- run_of_visible_frames_ = 1;
- } else {
- min_run_ = 0;
- run_of_visible_frames_ = 1;
- }
- }
-
- virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
- ::libaom_test::Encoder *encoder) {
- if (video->frame() == 0) {
- encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1);
- encoder->Control(AV1E_SET_TILE_COLUMNS, 4);
- encoder->Control(AOME_SET_CPUUSED, test_encode_param_.cpu_used);
- encoder->Control(AV1E_SET_MIN_GF_INTERVAL, min_arf_requested_);
- if (test_encode_param_.mode != ::libaom_test::kRealTime) {
- encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
- encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7);
- encoder->Control(AOME_SET_ARNR_STRENGTH, 5);
- }
- }
- }
-
- int GetMinVisibleRun() const { return min_run_; }
-
- int GetMinArfDistanceRequested() const {
- if (min_arf_requested_)
- return min_arf_requested_;
- else
- return av1_rc_get_default_min_gf_interval(
- test_video_param_.width, test_video_param_.height,
- (double)test_video_param_.framerate_num /
- test_video_param_.framerate_den);
- }
-
- TestVideoParam test_video_param_;
- TestEncodeParam test_encode_param_;
-
- private:
- int min_arf_requested_;
- int min_run_;
- int run_of_visible_frames_;
-};
-
-TEST_P(ArfFreqTestLarge, MinArfFreqTest) {
- cfg_.rc_target_bitrate = kBitrate;
- cfg_.g_error_resilient = 0;
- cfg_.g_profile = test_video_param_.profile;
- cfg_.g_input_bit_depth = test_video_param_.input_bit_depth;
- cfg_.g_bit_depth = test_video_param_.bit_depth;
- init_flags_ = AOM_CODEC_USE_PSNR;
- if (cfg_.g_bit_depth > 8) init_flags_ |= AOM_CODEC_USE_HIGHBITDEPTH;
-
- testing::internal::scoped_ptr<libaom_test::VideoSource> video;
- if (is_extension_y4m(test_video_param_.filename)) {
- video.reset(new libaom_test::Y4mVideoSource(test_video_param_.filename, 0,
- kFrames));
- } else {
- video.reset(new libaom_test::YUVVideoSource(
- test_video_param_.filename, test_video_param_.fmt,
- test_video_param_.width, test_video_param_.height,
- test_video_param_.framerate_num, test_video_param_.framerate_den, 0,
- kFrames));
- }
-
- ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
- const int min_run = GetMinVisibleRun();
- const int min_arf_dist_requested = GetMinArfDistanceRequested();
- if (min_run != ARF_NOT_SEEN && min_run != ARF_SEEN_ONCE) {
- const int min_arf_dist = min_run + 1;
- EXPECT_GE(min_arf_dist, min_arf_dist_requested);
- }
-}
-
-#if CONFIG_AV1_ENCODER
-// TODO(angiebird): 25-29 fail in high bitdepth mode.
-// TODO(zoeliu): This ArfFreqTest does not work with BWDREF_FRAME, as
-// BWDREF_FRAME is also a non-show frame, and the minimum run between two
-// consecutive BWDREF_FRAME's may vary between 1 and any arbitrary positive
-// number as long as it does not exceed the gf_group interval.
-INSTANTIATE_TEST_CASE_P(
- DISABLED_AV1, ArfFreqTestLarge,
- ::testing::Combine(
- ::testing::Values(
- static_cast<const libaom_test::CodecFactory *>(&libaom_test::kAV1)),
- ::testing::ValuesIn(kTestVectors), ::testing::ValuesIn(kEncodeVectors),
- ::testing::ValuesIn(kMinArfVectors)));
-#endif // CONFIG_AV1_ENCODER
-} // namespace
diff --git a/third_party/aom/test/av1_config_test.cc b/third_party/aom/test/av1_config_test.cc
deleted file mode 100644
index e2f2c5390..000000000
--- a/third_party/aom/test/av1_config_test.cc
+++ /dev/null
@@ -1,164 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#include <string.h>
-
-#include "common/av1_config.h"
-#include "test/util.h"
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-namespace {
-
-//
-// Input buffers containing exactly one Sequence Header OBU.
-//
-// Each buffer is named according to the OBU storage format (Annex-B vs Low
-// Overhead Bitstream Format) and the type of Sequence Header OBU ("Full"
-// Sequence Header OBUs vs Sequence Header OBUs with the
-// reduced_still_image_flag set).
-//
-const uint8_t kAnnexBFullSequenceHeaderObu[] = {
- 0x0c, 0x08, 0x00, 0x00, 0x00, 0x04, 0x45, 0x7e, 0x3e, 0xff, 0xfc, 0xc0, 0x20
-};
-const uint8_t kAnnexBReducedStillImageSequenceHeaderObu[] = {
- 0x08, 0x08, 0x18, 0x22, 0x2b, 0xf1, 0xfe, 0xc0, 0x20
-};
-
-const uint8_t kLobfFullSequenceHeaderObu[] = {
- 0x0a, 0x0b, 0x00, 0x00, 0x00, 0x04, 0x45, 0x7e, 0x3e, 0xff, 0xfc, 0xc0, 0x20
-};
-
-const uint8_t kLobfReducedStillImageSequenceHeaderObu[] = {
- 0x0a, 0x07, 0x18, 0x22, 0x2b, 0xf1, 0xfe, 0xc0, 0x20
-};
-
-const uint8_t kAv1cAllZero[] = { 0, 0, 0, 0 };
-
-// The size of AV1 config when no configOBUs are present at the end of the
-// configuration structure.
-const size_t kAv1cNoConfigObusSize = 4;
-
-bool VerifyAv1c(const uint8_t *const obu_buffer, size_t obu_buffer_length,
- bool is_annexb) {
- Av1Config av1_config;
- memset(&av1_config, 0, sizeof(av1_config));
- bool parse_ok = get_av1config_from_obu(obu_buffer, obu_buffer_length,
- is_annexb, &av1_config) == 0;
- if (parse_ok) {
- EXPECT_EQ(1, av1_config.marker);
- EXPECT_EQ(1, av1_config.version);
- EXPECT_EQ(0, av1_config.seq_profile);
- EXPECT_EQ(0, av1_config.seq_level_idx_0);
- EXPECT_EQ(0, av1_config.seq_tier_0);
- EXPECT_EQ(0, av1_config.high_bitdepth);
- EXPECT_EQ(0, av1_config.twelve_bit);
- EXPECT_EQ(0, av1_config.monochrome);
- EXPECT_EQ(1, av1_config.chroma_subsampling_x);
- EXPECT_EQ(1, av1_config.chroma_subsampling_y);
- EXPECT_EQ(0, av1_config.chroma_sample_position);
- EXPECT_EQ(0, av1_config.initial_presentation_delay_present);
- EXPECT_EQ(0, av1_config.initial_presentation_delay_minus_one);
- }
- return parse_ok && ::testing::Test::HasFailure() == false;
-}
-
-TEST(Av1Config, ObuInvalidInputs) {
- Av1Config av1_config;
- memset(&av1_config, 0, sizeof(av1_config));
- ASSERT_EQ(-1, get_av1config_from_obu(NULL, 0, 0, NULL));
- ASSERT_EQ(-1,
- get_av1config_from_obu(&kLobfFullSequenceHeaderObu[0], 0, 0, NULL));
- ASSERT_EQ(
- -1, get_av1config_from_obu(&kLobfFullSequenceHeaderObu[0],
- sizeof(kLobfFullSequenceHeaderObu), 0, NULL));
- ASSERT_EQ(-1, get_av1config_from_obu(NULL, sizeof(kLobfFullSequenceHeaderObu),
- 0, NULL));
- ASSERT_EQ(-1, get_av1config_from_obu(&kLobfFullSequenceHeaderObu[0], 0, 0,
- &av1_config));
-}
-
-TEST(Av1Config, ReadInvalidInputs) {
- Av1Config av1_config;
- memset(&av1_config, 0, sizeof(av1_config));
- size_t bytes_read = 0;
- ASSERT_EQ(-1, read_av1config(NULL, 0, NULL, NULL));
- ASSERT_EQ(-1, read_av1config(NULL, 4, NULL, NULL));
- ASSERT_EQ(-1, read_av1config(&kAv1cAllZero[0], 0, NULL, NULL));
- ASSERT_EQ(-1, read_av1config(&kAv1cAllZero[0], 4, &bytes_read, NULL));
- ASSERT_EQ(-1, read_av1config(NULL, 4, &bytes_read, &av1_config));
-}
-
-TEST(Av1Config, WriteInvalidInputs) {
- Av1Config av1_config;
- memset(&av1_config, 0, sizeof(av1_config));
- size_t bytes_written = 0;
- uint8_t av1c_buffer[4] = { 0 };
- ASSERT_EQ(-1, write_av1config(NULL, 0, NULL, NULL));
- ASSERT_EQ(-1, write_av1config(&av1_config, 0, NULL, NULL));
- ASSERT_EQ(-1, write_av1config(&av1_config, 0, &bytes_written, NULL));
-
- ASSERT_EQ(-1,
- write_av1config(&av1_config, 0, &bytes_written, &av1c_buffer[0]));
- ASSERT_EQ(-1, write_av1config(&av1_config, 4, &bytes_written, NULL));
-}
-
-TEST(Av1Config, GetAv1ConfigFromLobfObu) {
- // Test parsing of a Sequence Header OBU with the reduced_still_picture_header
- // unset-- aka a full Sequence Header OBU.
- ASSERT_TRUE(VerifyAv1c(kLobfFullSequenceHeaderObu,
- sizeof(kLobfFullSequenceHeaderObu), false));
-
- // Test parsing of a reduced still image Sequence Header OBU.
- ASSERT_TRUE(VerifyAv1c(kLobfReducedStillImageSequenceHeaderObu,
- sizeof(kLobfReducedStillImageSequenceHeaderObu),
- false));
-}
-
-TEST(Av1Config, GetAv1ConfigFromAnnexBObu) {
- // Test parsing of a Sequence Header OBU with the reduced_still_picture_header
- // unset-- aka a full Sequence Header OBU.
- ASSERT_TRUE(VerifyAv1c(kAnnexBFullSequenceHeaderObu,
- sizeof(kAnnexBFullSequenceHeaderObu), true));
-
- // Test parsing of a reduced still image Sequence Header OBU.
- ASSERT_TRUE(VerifyAv1c(kAnnexBReducedStillImageSequenceHeaderObu,
- sizeof(kAnnexBReducedStillImageSequenceHeaderObu),
- true));
-}
-
-TEST(Av1Config, ReadWriteConfig) {
- Av1Config av1_config;
- memset(&av1_config, 0, sizeof(av1_config));
-
- // Test writing out the AV1 config.
- size_t bytes_written = 0;
- uint8_t av1c_buffer[4] = { 0 };
- ASSERT_EQ(0, write_av1config(&av1_config, sizeof(av1c_buffer), &bytes_written,
- &av1c_buffer[0]));
- ASSERT_EQ(kAv1cNoConfigObusSize, bytes_written);
- for (size_t i = 0; i < kAv1cNoConfigObusSize; ++i) {
- ASSERT_EQ(kAv1cAllZero[i], av1c_buffer[i])
- << "Mismatch in output Av1Config at offset=" << i;
- }
-
- // Test reading the AV1 config.
- size_t bytes_read = 0;
- ASSERT_EQ(0, read_av1config(&kAv1cAllZero[0], sizeof(kAv1cAllZero),
- &bytes_read, &av1_config));
- ASSERT_EQ(kAv1cNoConfigObusSize, bytes_read);
- ASSERT_EQ(0, write_av1config(&av1_config, sizeof(av1c_buffer), &bytes_written,
- &av1c_buffer[0]));
- for (size_t i = 0; i < kAv1cNoConfigObusSize; ++i) {
- ASSERT_EQ(kAv1cAllZero[i], av1c_buffer[i])
- << "Mismatch in output Av1Config at offset=" << i;
- }
-}
-
-} // namespace
diff --git a/third_party/aom/test/av1_convolve_2d_test.cc b/third_party/aom/test/av1_convolve_2d_test.cc
deleted file mode 100644
index 03286260e..000000000
--- a/third_party/aom/test/av1_convolve_2d_test.cc
+++ /dev/null
@@ -1,249 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-#include "test/av1_convolve_2d_test_util.h"
-
-using ::testing::make_tuple;
-using ::testing::tuple;
-using libaom_test::ACMRandom;
-using libaom_test::AV1Convolve2D::AV1Convolve2DSrTest;
-using libaom_test::AV1Convolve2D::AV1JntConvolve2DTest;
-using libaom_test::AV1HighbdConvolve2D::AV1HighbdConvolve2DSrTest;
-using libaom_test::AV1HighbdConvolve2D::AV1HighbdJntConvolve2DTest;
-namespace {
-
-TEST_P(AV1Convolve2DSrTest, DISABLED_Speed) { RunSpeedTest(GET_PARAM(0)); }
-
-TEST_P(AV1Convolve2DSrTest, CheckOutput) { RunCheckOutput(GET_PARAM(0)); }
-
-INSTANTIATE_TEST_CASE_P(
- C_COPY, AV1Convolve2DSrTest,
- libaom_test::AV1Convolve2D::BuildParams(av1_convolve_2d_copy_sr_c, 0, 0));
-INSTANTIATE_TEST_CASE_P(
- C_X, AV1Convolve2DSrTest,
- libaom_test::AV1Convolve2D::BuildParams(av1_convolve_x_sr_c, 1, 0));
-INSTANTIATE_TEST_CASE_P(
- C_Y, AV1Convolve2DSrTest,
- libaom_test::AV1Convolve2D::BuildParams(av1_convolve_y_sr_c, 0, 1));
-INSTANTIATE_TEST_CASE_P(
- C, AV1Convolve2DSrTest,
- libaom_test::AV1Convolve2D::BuildParams(av1_convolve_2d_sr_c, 1, 1));
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(SSE2_COPY, AV1Convolve2DSrTest,
- libaom_test::AV1Convolve2D::BuildParams(
- av1_convolve_2d_copy_sr_sse2, 0, 0));
-INSTANTIATE_TEST_CASE_P(
- SSE2_X, AV1Convolve2DSrTest,
- libaom_test::AV1Convolve2D::BuildParams(av1_convolve_x_sr_sse2, 1, 0));
-INSTANTIATE_TEST_CASE_P(
- SSE2_Y, AV1Convolve2DSrTest,
- libaom_test::AV1Convolve2D::BuildParams(av1_convolve_y_sr_sse2, 0, 1));
-INSTANTIATE_TEST_CASE_P(
- SSE2, AV1Convolve2DSrTest,
- libaom_test::AV1Convolve2D::BuildParams(av1_convolve_2d_sr_sse2, 1, 1));
-#if HAVE_AVX2
-INSTANTIATE_TEST_CASE_P(AVX2_COPY, AV1Convolve2DSrTest,
- libaom_test::AV1Convolve2D::BuildParams(
- av1_convolve_2d_copy_sr_avx2, 0, 0));
-INSTANTIATE_TEST_CASE_P(
- AVX2_X, AV1Convolve2DSrTest,
- libaom_test::AV1Convolve2D::BuildParams(av1_convolve_x_sr_avx2, 1, 0));
-
-INSTANTIATE_TEST_CASE_P(
- AVX2_Y, AV1Convolve2DSrTest,
- libaom_test::AV1Convolve2D::BuildParams(av1_convolve_y_sr_avx2, 0, 1));
-
-INSTANTIATE_TEST_CASE_P(
- AVX2, AV1Convolve2DSrTest,
- libaom_test::AV1Convolve2D::BuildParams(av1_convolve_2d_sr_avx2, 1, 1));
-#endif // HAVE_AVX2
-#endif // HAVE_SSE2
-
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(
- NEON_X, AV1Convolve2DSrTest,
- libaom_test::AV1Convolve2D::BuildParams(av1_convolve_x_sr_neon, 1, 0));
-
-INSTANTIATE_TEST_CASE_P(
- NEON_Y, AV1Convolve2DSrTest,
- libaom_test::AV1Convolve2D::BuildParams(av1_convolve_y_sr_neon, 0, 1));
-
-INSTANTIATE_TEST_CASE_P(
- NEON, AV1Convolve2DSrTest,
- libaom_test::AV1Convolve2D::BuildParams(av1_convolve_2d_sr_neon, 1, 1));
-
-INSTANTIATE_TEST_CASE_P(NEON_COPY, AV1Convolve2DSrTest,
- libaom_test::AV1Convolve2D::BuildParams(
- av1_convolve_2d_copy_sr_neon, 0, 0));
-#endif // HAVE_NEON
-
-TEST_P(AV1JntConvolve2DTest, CheckOutput) { RunCheckOutput(GET_PARAM(0)); }
-TEST_P(AV1JntConvolve2DTest, DISABLED_Speed) { RunSpeedTest(GET_PARAM(0)); }
-
-INSTANTIATE_TEST_CASE_P(
- C_COPY, AV1JntConvolve2DTest,
- libaom_test::AV1Convolve2D::BuildParams(av1_jnt_convolve_2d_copy_c, 0, 0));
-
-INSTANTIATE_TEST_CASE_P(
- C_X, AV1JntConvolve2DTest,
- libaom_test::AV1Convolve2D::BuildParams(av1_jnt_convolve_x_c, 1, 0));
-
-INSTANTIATE_TEST_CASE_P(
- C_Y, AV1JntConvolve2DTest,
- libaom_test::AV1Convolve2D::BuildParams(av1_jnt_convolve_y_c, 0, 1));
-
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(SSE2_COPY, AV1JntConvolve2DTest,
- libaom_test::AV1Convolve2D::BuildParams(
- av1_jnt_convolve_2d_copy_sse2, 0, 0));
-#if HAVE_SSE4_1
-INSTANTIATE_TEST_CASE_P(
- SSE2_X, AV1JntConvolve2DTest,
- libaom_test::AV1Convolve2D::BuildParams(av1_jnt_convolve_x_sse2, 1, 0));
-
-INSTANTIATE_TEST_CASE_P(
- SSE2_Y, AV1JntConvolve2DTest,
- libaom_test::AV1Convolve2D::BuildParams(av1_jnt_convolve_y_sse2, 0, 1));
-
-INSTANTIATE_TEST_CASE_P(
- SSSE3, AV1JntConvolve2DTest,
- libaom_test::AV1Convolve2D::BuildParams(av1_jnt_convolve_2d_ssse3, 1, 1));
-
-#if HAVE_AVX2
-INSTANTIATE_TEST_CASE_P(AVX2_COPY, AV1JntConvolve2DTest,
- libaom_test::AV1Convolve2D::BuildParams(
- av1_jnt_convolve_2d_copy_avx2, 0, 0));
-INSTANTIATE_TEST_CASE_P(
- AVX2_X, AV1JntConvolve2DTest,
- libaom_test::AV1Convolve2D::BuildParams(av1_jnt_convolve_x_avx2, 1, 0));
-
-INSTANTIATE_TEST_CASE_P(
- AVX2_Y, AV1JntConvolve2DTest,
- libaom_test::AV1Convolve2D::BuildParams(av1_jnt_convolve_y_avx2, 0, 1));
-
-INSTANTIATE_TEST_CASE_P(
- AVX2, AV1JntConvolve2DTest,
- libaom_test::AV1Convolve2D::BuildParams(av1_jnt_convolve_2d_avx2, 1, 1));
-#endif // HAVE_AVX2
-#endif // HAVE_SSE4_1
-#endif // HAVE_SSE2
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(NEON_COPY, AV1JntConvolve2DTest,
- libaom_test::AV1Convolve2D::BuildParams(
- av1_jnt_convolve_2d_copy_neon, 0, 0));
-
-INSTANTIATE_TEST_CASE_P(
- NEON, AV1JntConvolve2DTest,
- libaom_test::AV1Convolve2D::BuildParams(av1_jnt_convolve_2d_neon, 1, 1));
-INSTANTIATE_TEST_CASE_P(
- NEON_X, AV1JntConvolve2DTest,
- libaom_test::AV1Convolve2D::BuildParams(av1_jnt_convolve_x_neon, 1, 0));
-
-INSTANTIATE_TEST_CASE_P(
- NEON_Y, AV1JntConvolve2DTest,
- libaom_test::AV1Convolve2D::BuildParams(av1_jnt_convolve_y_neon, 0, 1));
-#endif // HAVE_NEON
-
-TEST_P(AV1HighbdConvolve2DSrTest, CheckOutput) { RunCheckOutput(GET_PARAM(1)); }
-TEST_P(AV1HighbdConvolve2DSrTest, DISABLED_Speed) {
- RunSpeedTest(GET_PARAM(1));
-}
-
-INSTANTIATE_TEST_CASE_P(C_X, AV1HighbdConvolve2DSrTest,
- libaom_test::AV1HighbdConvolve2D::BuildParams(
- av1_highbd_convolve_x_sr_c, 1, 0));
-
-INSTANTIATE_TEST_CASE_P(C_Y, AV1HighbdConvolve2DSrTest,
- libaom_test::AV1HighbdConvolve2D::BuildParams(
- av1_highbd_convolve_y_sr_c, 0, 1));
-
-INSTANTIATE_TEST_CASE_P(C_COPY, AV1HighbdConvolve2DSrTest,
- libaom_test::AV1HighbdConvolve2D::BuildParams(
- av1_highbd_convolve_2d_copy_sr_c, 0, 0));
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(SSE2_COPY, AV1HighbdConvolve2DSrTest,
- libaom_test::AV1HighbdConvolve2D::BuildParams(
- av1_highbd_convolve_2d_copy_sr_sse2, 0, 0));
-#if HAVE_SSSE3
-INSTANTIATE_TEST_CASE_P(SSSE3, AV1HighbdConvolve2DSrTest,
- libaom_test::AV1HighbdConvolve2D::BuildParams(
- av1_highbd_convolve_2d_sr_ssse3, 1, 1));
-INSTANTIATE_TEST_CASE_P(SSSE3_X, AV1HighbdConvolve2DSrTest,
- libaom_test::AV1HighbdConvolve2D::BuildParams(
- av1_highbd_convolve_x_sr_ssse3, 1, 0));
-INSTANTIATE_TEST_CASE_P(SSSE3_Y, AV1HighbdConvolve2DSrTest,
- libaom_test::AV1HighbdConvolve2D::BuildParams(
- av1_highbd_convolve_y_sr_ssse3, 0, 1));
-#if HAVE_AVX2
-INSTANTIATE_TEST_CASE_P(AVX2, AV1HighbdConvolve2DSrTest,
- libaom_test::AV1HighbdConvolve2D::BuildParams(
- av1_highbd_convolve_2d_sr_avx2, 1, 1));
-INSTANTIATE_TEST_CASE_P(AVX2_X, AV1HighbdConvolve2DSrTest,
- libaom_test::AV1HighbdConvolve2D::BuildParams(
- av1_highbd_convolve_x_sr_avx2, 1, 0));
-INSTANTIATE_TEST_CASE_P(AVX2_Y, AV1HighbdConvolve2DSrTest,
- libaom_test::AV1HighbdConvolve2D::BuildParams(
- av1_highbd_convolve_y_sr_avx2, 0, 1));
-INSTANTIATE_TEST_CASE_P(AVX2_COPY, AV1HighbdConvolve2DSrTest,
- libaom_test::AV1HighbdConvolve2D::BuildParams(
- av1_highbd_convolve_2d_copy_sr_avx2, 0, 0));
-#endif // HAVE_AVX2
-#endif // HAVE_SSSE3
-#endif // HAVE_SSE2
-TEST_P(AV1HighbdJntConvolve2DTest, CheckOutput) {
- RunCheckOutput(GET_PARAM(1));
-}
-
-TEST_P(AV1HighbdJntConvolve2DTest, DISABLED_Speed) {
- RunSpeedTest(GET_PARAM(1));
-}
-
-INSTANTIATE_TEST_CASE_P(C_X, AV1HighbdJntConvolve2DTest,
- libaom_test::AV1HighbdConvolve2D::BuildParams(
- av1_highbd_jnt_convolve_x_c, 1, 0));
-
-INSTANTIATE_TEST_CASE_P(C_Y, AV1HighbdJntConvolve2DTest,
- libaom_test::AV1HighbdConvolve2D::BuildParams(
- av1_highbd_jnt_convolve_y_c, 0, 1));
-
-INSTANTIATE_TEST_CASE_P(C_COPY, AV1HighbdJntConvolve2DTest,
- libaom_test::AV1HighbdConvolve2D::BuildParams(
- av1_highbd_jnt_convolve_2d_copy_c, 0, 0));
-#if HAVE_SSE4_1
-INSTANTIATE_TEST_CASE_P(SSE4_1_COPY, AV1HighbdJntConvolve2DTest,
- libaom_test::AV1HighbdConvolve2D::BuildParams(
- av1_highbd_jnt_convolve_2d_copy_sse4_1, 0, 0));
-INSTANTIATE_TEST_CASE_P(SSE4_1, AV1HighbdJntConvolve2DTest,
- libaom_test::AV1HighbdConvolve2D::BuildParams(
- av1_highbd_jnt_convolve_2d_sse4_1, 1, 1));
-INSTANTIATE_TEST_CASE_P(SSE4_1_X, AV1HighbdJntConvolve2DTest,
- libaom_test::AV1HighbdConvolve2D::BuildParams(
- av1_highbd_jnt_convolve_x_sse4_1, 1, 0));
-INSTANTIATE_TEST_CASE_P(SSE4_1_Y, AV1HighbdJntConvolve2DTest,
- libaom_test::AV1HighbdConvolve2D::BuildParams(
- av1_highbd_jnt_convolve_y_sse4_1, 0, 1));
-#if HAVE_AVX2
-INSTANTIATE_TEST_CASE_P(AVX2_COPY, AV1HighbdJntConvolve2DTest,
- libaom_test::AV1HighbdConvolve2D::BuildParams(
- av1_highbd_jnt_convolve_2d_copy_avx2, 0, 0));
-INSTANTIATE_TEST_CASE_P(AVX2, AV1HighbdJntConvolve2DTest,
- libaom_test::AV1HighbdConvolve2D::BuildParams(
- av1_highbd_jnt_convolve_2d_avx2, 1, 1));
-INSTANTIATE_TEST_CASE_P(AVX2_X, AV1HighbdJntConvolve2DTest,
- libaom_test::AV1HighbdConvolve2D::BuildParams(
- av1_highbd_jnt_convolve_x_avx2, 1, 0));
-INSTANTIATE_TEST_CASE_P(AVX2_Y, AV1HighbdJntConvolve2DTest,
- libaom_test::AV1HighbdConvolve2D::BuildParams(
- av1_highbd_jnt_convolve_y_avx2, 0, 1));
-#endif // HAVE_AVX2
-#endif // HAVE_SSE4_1
-} // namespace
diff --git a/third_party/aom/test/av1_convolve_2d_test_util.cc b/third_party/aom/test/av1_convolve_2d_test_util.cc
deleted file mode 100644
index 409fd23e1..000000000
--- a/third_party/aom/test/av1_convolve_2d_test_util.cc
+++ /dev/null
@@ -1,705 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "test/av1_convolve_2d_test_util.h"
-
-#include "aom_ports/aom_timer.h"
-#include "av1/common/common_data.h"
-#include "av1/common/convolve.h"
-
-using ::testing::make_tuple;
-using ::testing::tuple;
-
-namespace libaom_test {
-
-const int kMaxSize = 128 + 32; // padding
-namespace AV1Convolve2D {
-
-::testing::internal::ParamGenerator<Convolve2DParam> BuildParams(
- convolve_2d_func filter, int has_subx, int has_suby) {
- return ::testing::Combine(::testing::Values(filter),
- ::testing::Values(has_subx),
- ::testing::Values(has_suby),
- ::testing::Range(BLOCK_4X4, BLOCK_SIZES_ALL));
-}
-
-AV1Convolve2DSrTest::~AV1Convolve2DSrTest() {}
-void AV1Convolve2DSrTest::SetUp() {
- rnd_.Reset(ACMRandom::DeterministicSeed());
-}
-
-void AV1Convolve2DSrTest::TearDown() { libaom_test::ClearSystemState(); }
-
-void AV1Convolve2DSrTest::RunCheckOutput(convolve_2d_func test_impl) {
- const int w = kMaxSize, h = kMaxSize;
- const int has_subx = GET_PARAM(1);
- const int has_suby = GET_PARAM(2);
- const int block_idx = GET_PARAM(3);
- int hfilter, vfilter, subx, suby;
- uint8_t input[kMaxSize * kMaxSize];
- DECLARE_ALIGNED(32, uint8_t, output[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, uint8_t, output2[MAX_SB_SQUARE]);
-
- for (int i = 0; i < h; ++i)
- for (int j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand8();
- for (int i = 0; i < MAX_SB_SQUARE; ++i)
- output[i] = output2[i] = rnd_.Rand31();
-
- // Make sure that sizes 2xN and Nx2 are also tested for chroma.
- const int num_sizes =
- (block_size_wide[block_idx] == 4 || block_size_high[block_idx] == 4) ? 2
- : 1;
- for (int shift = 0; shift < num_sizes; ++shift) { // luma and chroma
- const int out_w = block_size_wide[block_idx] >> shift;
- const int out_h = block_size_high[block_idx] >> shift;
- for (hfilter = EIGHTTAP_REGULAR; hfilter < INTERP_FILTERS_ALL; ++hfilter) {
- for (vfilter = EIGHTTAP_REGULAR; vfilter < INTERP_FILTERS_ALL;
- ++vfilter) {
- const InterpFilterParams *filter_params_x =
- av1_get_interp_filter_params_with_block_size((InterpFilter)hfilter,
- out_w);
- const InterpFilterParams *filter_params_y =
- av1_get_interp_filter_params_with_block_size((InterpFilter)vfilter,
- out_h);
- for (int do_average = 0; do_average < 1; ++do_average) {
- ConvolveParams conv_params1 =
- get_conv_params_no_round(do_average, 0, NULL, 0, 0, 8);
- ConvolveParams conv_params2 =
- get_conv_params_no_round(do_average, 0, NULL, 0, 0, 8);
-
- const int subx_range = has_subx ? 16 : 1;
- const int suby_range = has_suby ? 16 : 1;
- for (subx = 0; subx < subx_range; ++subx) {
- for (suby = 0; suby < suby_range; ++suby) {
- // Choose random locations within the source block
- const int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
- const int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
- av1_convolve_2d_sr_c(input + offset_r * w + offset_c, w, output,
- MAX_SB_SIZE, out_w, out_h, filter_params_x,
- filter_params_y, subx, suby, &conv_params1);
- test_impl(input + offset_r * w + offset_c, w, output2,
- MAX_SB_SIZE, out_w, out_h, filter_params_x,
- filter_params_y, subx, suby, &conv_params2);
-
- if (memcmp(output, output2, sizeof(output))) {
- for (int i = 0; i < MAX_SB_SIZE; ++i) {
- for (int j = 0; j < MAX_SB_SIZE; ++j) {
- int idx = i * MAX_SB_SIZE + j;
- ASSERT_EQ(output[idx], output2[idx])
- << out_w << "x" << out_h << " Pixel mismatch at index "
- << idx << " = (" << i << ", " << j
- << "), sub pixel offset = (" << suby << ", " << subx
- << ")";
- }
- }
- }
- }
- }
- }
- }
- }
- }
-}
-
-void AV1Convolve2DSrTest::RunSpeedTest(convolve_2d_func test_impl) {
- const int w = kMaxSize, h = kMaxSize;
- const int has_subx = GET_PARAM(1);
- const int has_suby = GET_PARAM(2);
- const int block_idx = GET_PARAM(3);
-
- uint8_t input[kMaxSize * kMaxSize];
- DECLARE_ALIGNED(32, uint8_t, output[MAX_SB_SQUARE]);
-
- for (int i = 0; i < h; ++i)
- for (int j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand8();
-
- int hfilter = EIGHTTAP_REGULAR, vfilter = EIGHTTAP_REGULAR;
- int subx = 0, suby = 0;
-
- const int do_average = 0;
- ConvolveParams conv_params2 =
- get_conv_params_no_round(do_average, 0, NULL, 0, 0, 8);
-
- // Make sure that sizes 2xN and Nx2 are also tested for chroma.
- const int num_sizes =
- (block_size_wide[block_idx] == 4 || block_size_high[block_idx] == 4) ? 2
- : 1;
- for (int shift = 0; shift < num_sizes; ++shift) { // luma and chroma
- const int out_w = block_size_wide[block_idx] >> shift;
- const int out_h = block_size_high[block_idx] >> shift;
- const int num_loops = 1000000000 / (out_w + out_h);
-
- const InterpFilterParams *filter_params_x =
- av1_get_interp_filter_params_with_block_size((InterpFilter)hfilter,
- out_w);
- const InterpFilterParams *filter_params_y =
- av1_get_interp_filter_params_with_block_size((InterpFilter)vfilter,
- out_h);
-
- aom_usec_timer timer;
- aom_usec_timer_start(&timer);
-
- for (int i = 0; i < num_loops; ++i)
- test_impl(input, w, output, MAX_SB_SIZE, out_w, out_h, filter_params_x,
- filter_params_y, subx, suby, &conv_params2);
-
- aom_usec_timer_mark(&timer);
- const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
- printf("%d,%d convolve %3dx%-3d: %7.2f us\n", has_subx, has_suby, out_w,
- out_h, 1000.0 * elapsed_time / num_loops);
- }
-}
-
-AV1JntConvolve2DTest::~AV1JntConvolve2DTest() {}
-void AV1JntConvolve2DTest::SetUp() {
- rnd_.Reset(ACMRandom::DeterministicSeed());
-}
-
-void AV1JntConvolve2DTest::TearDown() { libaom_test::ClearSystemState(); }
-
-void AV1JntConvolve2DTest::RunCheckOutput(convolve_2d_func test_impl) {
- const int w = kMaxSize, h = kMaxSize;
- const int has_subx = GET_PARAM(1);
- const int has_suby = GET_PARAM(2);
- const int block_idx = GET_PARAM(3);
- int hfilter, vfilter, subx, suby;
- uint8_t input[kMaxSize * kMaxSize];
- DECLARE_ALIGNED(32, CONV_BUF_TYPE, output1[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, CONV_BUF_TYPE, output2[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(16, uint8_t, output8_1[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(16, uint8_t, output8_2[MAX_SB_SQUARE]);
-
- for (int i = 0; i < h; ++i)
- for (int j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand8();
- for (int i = 0; i < MAX_SB_SQUARE; ++i) {
- output1[i] = output2[i] = rnd_.Rand16();
- output8_1[i] = output8_2[i] = rnd_.Rand8();
- }
-
- const int out_w = block_size_wide[block_idx];
- const int out_h = block_size_high[block_idx];
- for (hfilter = EIGHTTAP_REGULAR; hfilter < INTERP_FILTERS_ALL; ++hfilter) {
- for (vfilter = EIGHTTAP_REGULAR; vfilter < INTERP_FILTERS_ALL; ++vfilter) {
- const InterpFilterParams *filter_params_x =
- av1_get_interp_filter_params_with_block_size((InterpFilter)hfilter,
- out_w);
- const InterpFilterParams *filter_params_y =
- av1_get_interp_filter_params_with_block_size((InterpFilter)vfilter,
- out_h);
- for (int do_average = 0; do_average <= 1; ++do_average) {
- ConvolveParams conv_params1 =
- get_conv_params_no_round(do_average, 0, output1, MAX_SB_SIZE, 1, 8);
- ConvolveParams conv_params2 =
- get_conv_params_no_round(do_average, 0, output2, MAX_SB_SIZE, 1, 8);
-
- // Test special case where jnt_comp_avg is not used
- conv_params1.use_jnt_comp_avg = 0;
- conv_params2.use_jnt_comp_avg = 0;
-
- const int subx_range = has_subx ? 16 : 1;
- const int suby_range = has_suby ? 16 : 1;
- for (subx = 0; subx < subx_range; ++subx) {
- for (suby = 0; suby < suby_range; ++suby) {
- // Choose random locations within the source block
- const int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
- const int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
- av1_jnt_convolve_2d_c(input + offset_r * w + offset_c, w, output8_1,
- MAX_SB_SIZE, out_w, out_h, filter_params_x,
- filter_params_y, subx, suby, &conv_params1);
- test_impl(input + offset_r * w + offset_c, w, output8_2,
- MAX_SB_SIZE, out_w, out_h, filter_params_x,
- filter_params_y, subx, suby, &conv_params2);
-
- for (int i = 0; i < out_h; ++i) {
- for (int j = 0; j < out_w; ++j) {
- int idx = i * MAX_SB_SIZE + j;
- ASSERT_EQ(output1[idx], output2[idx])
- << "Mismatch at unit tests for av1_jnt_convolve_2d\n"
- << out_w << "x" << out_h << " Pixel mismatch at index "
- << idx << " = (" << i << ", " << j
- << "), sub pixel offset = (" << suby << ", " << subx << ")";
- }
- }
-
- if (memcmp(output8_1, output8_2, sizeof(output8_1))) {
- for (int i = 0; i < MAX_SB_SIZE; ++i) {
- for (int j = 0; j < MAX_SB_SIZE; ++j) {
- int idx = i * MAX_SB_SIZE + j;
- ASSERT_EQ(output8_1[idx], output8_2[idx])
- << out_w << "x" << out_h << " Pixel mismatch at index "
- << idx << " = (" << i << ", " << j
- << "), sub pixel offset = (" << suby << ", " << subx
- << ")";
- }
- }
- }
- }
- }
-
- // Test different combination of fwd and bck offset weights
- for (int k = 0; k < 2; ++k) {
- for (int l = 0; l < 4; ++l) {
- conv_params1.use_jnt_comp_avg = 1;
- conv_params2.use_jnt_comp_avg = 1;
- conv_params1.fwd_offset = quant_dist_lookup_table[k][l][0];
- conv_params1.bck_offset = quant_dist_lookup_table[k][l][1];
- conv_params2.fwd_offset = quant_dist_lookup_table[k][l][0];
- conv_params2.bck_offset = quant_dist_lookup_table[k][l][1];
-
- for (subx = 0; subx < subx_range; ++subx) {
- for (suby = 0; suby < suby_range; ++suby) {
- // Choose random locations within the source block
- const int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
- const int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
- av1_jnt_convolve_2d_c(input + offset_r * w + offset_c, w,
- output8_1, MAX_SB_SIZE, out_w, out_h,
- filter_params_x, filter_params_y, subx,
- suby, &conv_params1);
- test_impl(input + offset_r * w + offset_c, w, output8_2,
- MAX_SB_SIZE, out_w, out_h, filter_params_x,
- filter_params_y, subx, suby, &conv_params2);
-
- for (int i = 0; i < out_h; ++i) {
- for (int j = 0; j < out_w; ++j) {
- int idx = i * MAX_SB_SIZE + j;
- ASSERT_EQ(output1[idx], output2[idx])
- << "Mismatch at unit tests for "
- "av1_jnt_convolve_2d\n"
- << out_w << "x" << out_h << " Pixel mismatch at index "
- << idx << " = (" << i << ", " << j
- << "), sub pixel offset = (" << suby << ", " << subx
- << ")";
- }
- }
- if (memcmp(output8_1, output8_2, sizeof(output8_1))) {
- for (int i = 0; i < MAX_SB_SIZE; ++i) {
- for (int j = 0; j < MAX_SB_SIZE; ++j) {
- int idx = i * MAX_SB_SIZE + j;
- ASSERT_EQ(output8_1[idx], output8_2[idx])
- << out_w << "x" << out_h
- << " Pixel mismatch at index " << idx << " = (" << i
- << ", " << j << "), sub pixel offset = (" << suby
- << ", " << subx << ")";
- }
- }
- }
- }
- }
- }
- }
- }
- }
- }
-}
-
-void AV1JntConvolve2DTest::RunSpeedTest(convolve_2d_func test_impl) {
- const int w = kMaxSize, h = kMaxSize;
- const int has_subx = GET_PARAM(1);
- const int has_suby = GET_PARAM(2);
- const int block_idx = GET_PARAM(3);
-
- int subx = 0, suby = 0;
- uint8_t input[kMaxSize * kMaxSize];
- DECLARE_ALIGNED(32, CONV_BUF_TYPE, output[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(16, uint8_t, output8[MAX_SB_SQUARE]);
- int hfilter = EIGHTTAP_REGULAR, vfilter = EIGHTTAP_REGULAR;
- for (int i = 0; i < h; ++i)
- for (int j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand8();
- for (int i = 0; i < MAX_SB_SQUARE; ++i) {
- output[i] = rnd_.Rand16();
- output8[i] = rnd_.Rand8();
- }
-
- const int out_w = block_size_wide[block_idx];
- const int out_h = block_size_high[block_idx];
- const int num_loops = 1000000000 / (out_w + out_h);
- const int do_average = 0;
-
- const InterpFilterParams *filter_params_x =
- av1_get_interp_filter_params_with_block_size((InterpFilter)hfilter,
- out_w);
- const InterpFilterParams *filter_params_y =
- av1_get_interp_filter_params_with_block_size((InterpFilter)vfilter,
- out_h);
-
- ConvolveParams conv_params =
- get_conv_params_no_round(do_average, 0, output, MAX_SB_SIZE, 1, 8);
-
- conv_params.use_jnt_comp_avg = 0;
-
- // Choose random locations within the source block
- const int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
- const int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
-
- aom_usec_timer timer;
- aom_usec_timer_start(&timer);
-
- for (int i = 0; i < num_loops; ++i)
- test_impl(input + offset_r * w + offset_c, w, output8, MAX_SB_SIZE, out_w,
- out_h, filter_params_x, filter_params_y, subx, suby,
- &conv_params);
-
- aom_usec_timer_mark(&timer);
- const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
- printf("%d,%d convolve %3dx%-3d: %7.2f us\n", has_subx, has_suby, out_w,
- out_h, 1000.0 * elapsed_time / num_loops);
-}
-} // namespace AV1Convolve2D
-
-namespace AV1HighbdConvolve2D {
-::testing::internal::ParamGenerator<HighbdConvolve2DParam> BuildParams(
- highbd_convolve_2d_func filter, int has_subx, int has_suby) {
- return ::testing::Combine(
- ::testing::Range(8, 13, 2), ::testing::Values(filter),
- ::testing::Values(has_subx), ::testing::Values(has_suby),
- ::testing::Range(BLOCK_4X4, BLOCK_SIZES_ALL));
-}
-
-AV1HighbdConvolve2DSrTest::~AV1HighbdConvolve2DSrTest() {}
-void AV1HighbdConvolve2DSrTest::SetUp() {
- rnd_.Reset(ACMRandom::DeterministicSeed());
-}
-
-void AV1HighbdConvolve2DSrTest::TearDown() { libaom_test::ClearSystemState(); }
-
-void AV1HighbdConvolve2DSrTest::RunSpeedTest(
- highbd_convolve_2d_func test_impl) {
- const int w = kMaxSize, h = kMaxSize;
- const int bd = GET_PARAM(0);
- const int has_subx = GET_PARAM(2);
- const int has_suby = GET_PARAM(3);
- const int block_idx = GET_PARAM(4);
- int hfilter, vfilter, subx, suby;
- uint16_t input[kMaxSize * kMaxSize];
- DECLARE_ALIGNED(32, uint16_t, output[MAX_SB_SQUARE]);
-
- for (int i = 0; i < h; ++i)
- for (int j = 0; j < w; ++j)
- input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
-
- hfilter = EIGHTTAP_REGULAR;
- vfilter = EIGHTTAP_REGULAR;
- int do_average = 0;
-
- const int offset_r = 3;
- const int offset_c = 3;
- subx = 0;
- suby = 0;
-
- ConvolveParams conv_params =
- get_conv_params_no_round(do_average, 0, NULL, 0, 0, bd);
-
- // Make sure that sizes 2xN and Nx2 are also tested for chroma.
- const int num_sizes =
- (block_size_wide[block_idx] == 4 || block_size_high[block_idx] == 4) ? 2
- : 1;
-
- for (int shift = 0; shift < num_sizes; ++shift) { // luma and chroma
- const int out_w = block_size_wide[block_idx] >> shift;
- const int out_h = block_size_high[block_idx] >> shift;
- const int num_loops = 1000000000 / (out_w + out_h);
-
- const InterpFilterParams *filter_params_x =
- av1_get_interp_filter_params_with_block_size((InterpFilter)hfilter,
- out_w);
- const InterpFilterParams *filter_params_y =
- av1_get_interp_filter_params_with_block_size((InterpFilter)vfilter,
- out_h);
-
- aom_usec_timer timer;
- aom_usec_timer_start(&timer);
- for (int i = 0; i < num_loops; ++i)
- test_impl(input + offset_r * w + offset_c, w, output, MAX_SB_SIZE, out_w,
- out_h, filter_params_x, filter_params_y, subx, suby,
- &conv_params, bd);
-
- aom_usec_timer_mark(&timer);
- const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
- printf("%d,%d convolve %3dx%-3d: %7.2f us\n", has_subx, has_suby, out_w,
- out_h, 1000.0 * elapsed_time / num_loops);
- }
-}
-
-void AV1HighbdConvolve2DSrTest::RunCheckOutput(
- highbd_convolve_2d_func test_impl) {
- const int w = kMaxSize, h = kMaxSize;
- const int bd = GET_PARAM(0);
- const int has_subx = GET_PARAM(2);
- const int has_suby = GET_PARAM(3);
- const int block_idx = GET_PARAM(4);
- int hfilter, vfilter, subx, suby;
- uint16_t input[kMaxSize * kMaxSize];
- DECLARE_ALIGNED(32, uint16_t, output[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, uint16_t, output2[MAX_SB_SQUARE]);
-
- for (int i = 0; i < h; ++i)
- for (int j = 0; j < w; ++j)
- input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
- for (int i = 0; i < MAX_SB_SQUARE; ++i)
- output[i] = output2[i] = rnd_.Rand31();
-
- // Make sure that sizes 2xN and Nx2 are also tested for chroma.
- const int num_sizes =
- (block_size_wide[block_idx] == 4 || block_size_high[block_idx] == 4) ? 2
- : 1;
- for (int shift = 0; shift < num_sizes; ++shift) { // luma and chroma
- const int out_w = block_size_wide[block_idx] >> shift;
- const int out_h = block_size_high[block_idx] >> shift;
- for (hfilter = EIGHTTAP_REGULAR; hfilter < INTERP_FILTERS_ALL; ++hfilter) {
- for (vfilter = EIGHTTAP_REGULAR; vfilter < INTERP_FILTERS_ALL;
- ++vfilter) {
- const InterpFilterParams *filter_params_x =
- av1_get_interp_filter_params_with_block_size((InterpFilter)hfilter,
- out_w);
- const InterpFilterParams *filter_params_y =
- av1_get_interp_filter_params_with_block_size((InterpFilter)vfilter,
- out_h);
- for (int do_average = 0; do_average < 1; ++do_average) {
- ConvolveParams conv_params1 =
- get_conv_params_no_round(do_average, 0, NULL, 0, 0, bd);
- ConvolveParams conv_params2 =
- get_conv_params_no_round(do_average, 0, NULL, 0, 0, bd);
-
- const int subx_range = has_subx ? 16 : 1;
- const int suby_range = has_suby ? 16 : 1;
- for (subx = 0; subx < subx_range; ++subx) {
- for (suby = 0; suby < suby_range; ++suby) {
- // Choose random locations within the source block
- const int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
- const int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
- av1_highbd_convolve_2d_sr_c(input + offset_r * w + offset_c, w,
- output, MAX_SB_SIZE, out_w, out_h,
- filter_params_x, filter_params_y,
- subx, suby, &conv_params1, bd);
- test_impl(input + offset_r * w + offset_c, w, output2,
- MAX_SB_SIZE, out_w, out_h, filter_params_x,
- filter_params_y, subx, suby, &conv_params2, bd);
-
- if (memcmp(output, output2, sizeof(output))) {
- for (int i = 0; i < MAX_SB_SIZE; ++i) {
- for (int j = 0; j < MAX_SB_SIZE; ++j) {
- int idx = i * MAX_SB_SIZE + j;
- ASSERT_EQ(output[idx], output2[idx])
- << out_w << "x" << out_h << " Pixel mismatch at index "
- << idx << " = (" << i << ", " << j
- << "), sub pixel offset = (" << suby << ", " << subx
- << ")";
- }
- }
- }
- }
- }
- }
- }
- }
- }
-}
-
-AV1HighbdJntConvolve2DTest::~AV1HighbdJntConvolve2DTest() {}
-void AV1HighbdJntConvolve2DTest::SetUp() {
- rnd_.Reset(ACMRandom::DeterministicSeed());
-}
-
-void AV1HighbdJntConvolve2DTest::TearDown() { libaom_test::ClearSystemState(); }
-
-void AV1HighbdJntConvolve2DTest::RunSpeedTest(
- highbd_convolve_2d_func test_impl) {
- const int w = kMaxSize, h = kMaxSize;
- const int bd = GET_PARAM(0);
- const int block_idx = GET_PARAM(4);
- int hfilter, vfilter, subx, suby;
- uint16_t input[kMaxSize * kMaxSize];
- DECLARE_ALIGNED(32, CONV_BUF_TYPE, output[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, uint16_t, output16[MAX_SB_SQUARE]);
-
- for (int i = 0; i < h; ++i)
- for (int j = 0; j < w; ++j)
- input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
- for (int i = 0; i < MAX_SB_SQUARE; ++i) output[i] = rnd_.Rand16();
- hfilter = EIGHTTAP_REGULAR;
- vfilter = EIGHTTAP_REGULAR;
- int do_average = 0;
- const int out_w = block_size_wide[block_idx];
- const int out_h = block_size_high[block_idx];
-
- const InterpFilterParams *filter_params_x =
- av1_get_interp_filter_params_with_block_size((InterpFilter)hfilter,
- out_w);
- const InterpFilterParams *filter_params_y =
- av1_get_interp_filter_params_with_block_size((InterpFilter)vfilter,
- out_h);
-
- ConvolveParams conv_params =
- get_conv_params_no_round(do_average, 0, output, MAX_SB_SIZE, 1, bd);
-
- // Test special case where jnt_comp_avg is not used
- conv_params.use_jnt_comp_avg = 0;
-
- subx = 0;
- suby = 0;
- // Choose random locations within the source block
- const int offset_r = 3;
- const int offset_c = 3;
-
- const int num_loops = 1000000000 / (out_w + out_h);
- aom_usec_timer timer;
- aom_usec_timer_start(&timer);
- for (int i = 0; i < num_loops; ++i)
- test_impl(input + offset_r * w + offset_c, w, output16, MAX_SB_SIZE, out_w,
- out_h, filter_params_x, filter_params_y, subx, suby, &conv_params,
- bd);
-
- aom_usec_timer_mark(&timer);
- const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
- printf("convolve %3dx%-3d: %7.2f us\n", out_w, out_h,
- 1000.0 * elapsed_time / num_loops);
-}
-
-void AV1HighbdJntConvolve2DTest::RunCheckOutput(
- highbd_convolve_2d_func test_impl) {
- const int w = kMaxSize, h = kMaxSize;
- const int bd = GET_PARAM(0);
- const int has_subx = GET_PARAM(2);
- const int has_suby = GET_PARAM(3);
- const int block_idx = GET_PARAM(4);
- int hfilter, vfilter, subx, suby;
- uint16_t input[kMaxSize * kMaxSize];
- DECLARE_ALIGNED(32, CONV_BUF_TYPE, output1[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, CONV_BUF_TYPE, output2[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, uint16_t, output16_1[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, uint16_t, output16_2[MAX_SB_SQUARE]);
-
- for (int i = 0; i < h; ++i)
- for (int j = 0; j < w; ++j)
- input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
- for (int i = 0; i < MAX_SB_SQUARE; ++i) {
- output1[i] = output2[i] = rnd_.Rand16();
- output16_1[i] = output16_2[i] = rnd_.Rand16();
- }
-
- const int out_w = block_size_wide[block_idx];
- const int out_h = block_size_high[block_idx];
- for (hfilter = EIGHTTAP_REGULAR; hfilter < INTERP_FILTERS_ALL; ++hfilter) {
- for (vfilter = EIGHTTAP_REGULAR; vfilter < INTERP_FILTERS_ALL; ++vfilter) {
- const InterpFilterParams *filter_params_x =
- av1_get_interp_filter_params_with_block_size((InterpFilter)hfilter,
- out_w);
- const InterpFilterParams *filter_params_y =
- av1_get_interp_filter_params_with_block_size((InterpFilter)vfilter,
- out_h);
- for (int do_average = 0; do_average <= 1; ++do_average) {
- ConvolveParams conv_params1 = get_conv_params_no_round(
- do_average, 0, output1, MAX_SB_SIZE, 1, bd);
- ConvolveParams conv_params2 = get_conv_params_no_round(
- do_average, 0, output2, MAX_SB_SIZE, 1, bd);
-
- // Test special case where jnt_comp_avg is not used
- conv_params1.use_jnt_comp_avg = 0;
- conv_params2.use_jnt_comp_avg = 0;
-
- const int subx_range = has_subx ? 16 : 1;
- const int suby_range = has_suby ? 16 : 1;
- for (subx = 0; subx < subx_range; ++subx) {
- for (suby = 0; suby < suby_range; ++suby) {
- // Choose random locations within the source block
- const int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
- const int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
- av1_highbd_jnt_convolve_2d_c(input + offset_r * w + offset_c, w,
- output16_1, MAX_SB_SIZE, out_w, out_h,
- filter_params_x, filter_params_y, subx,
- suby, &conv_params1, bd);
- test_impl(input + offset_r * w + offset_c, w, output16_2,
- MAX_SB_SIZE, out_w, out_h, filter_params_x,
- filter_params_y, subx, suby, &conv_params2, bd);
-
- for (int i = 0; i < out_h; ++i) {
- for (int j = 0; j < out_w; ++j) {
- int idx = i * MAX_SB_SIZE + j;
- ASSERT_EQ(output1[idx], output2[idx])
- << out_w << "x" << out_h << " Pixel mismatch at index "
- << idx << " = (" << i << ", " << j
- << "), sub pixel offset = (" << suby << ", " << subx << ")";
- }
- }
-
- if (memcmp(output16_1, output16_2, sizeof(output16_1))) {
- for (int i = 0; i < MAX_SB_SIZE; ++i) {
- for (int j = 0; j < MAX_SB_SIZE; ++j) {
- int idx = i * MAX_SB_SIZE + j;
- ASSERT_EQ(output16_1[idx], output16_2[idx])
- << out_w << "x" << out_h << " Pixel mismatch at index "
- << idx << " = (" << i << ", " << j
- << "), sub pixel offset = (" << suby << ", " << subx
- << ")";
- }
- }
- }
- }
- }
-
- // Test different combination of fwd and bck offset weights
- for (int k = 0; k < 2; ++k) {
- for (int l = 0; l < 4; ++l) {
- conv_params1.use_jnt_comp_avg = 1;
- conv_params2.use_jnt_comp_avg = 1;
- conv_params1.fwd_offset = quant_dist_lookup_table[k][l][0];
- conv_params1.bck_offset = quant_dist_lookup_table[k][l][1];
- conv_params2.fwd_offset = quant_dist_lookup_table[k][l][0];
- conv_params2.bck_offset = quant_dist_lookup_table[k][l][1];
-
- const int subx_range = has_subx ? 16 : 1;
- const int suby_range = has_suby ? 16 : 1;
- for (subx = 0; subx < subx_range; ++subx) {
- for (suby = 0; suby < suby_range; ++suby) {
- // Choose random locations within the source block
- const int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
- const int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
- av1_highbd_jnt_convolve_2d_c(
- input + offset_r * w + offset_c, w, output16_1, MAX_SB_SIZE,
- out_w, out_h, filter_params_x, filter_params_y, subx, suby,
- &conv_params1, bd);
- test_impl(input + offset_r * w + offset_c, w, output16_2,
- MAX_SB_SIZE, out_w, out_h, filter_params_x,
- filter_params_y, subx, suby, &conv_params2, bd);
-
- for (int i = 0; i < out_h; ++i) {
- for (int j = 0; j < out_w; ++j) {
- int idx = i * MAX_SB_SIZE + j;
- ASSERT_EQ(output1[idx], output2[idx])
- << out_w << "x" << out_h << " Pixel mismatch at index "
- << idx << " = (" << i << ", " << j
- << "), sub pixel offset = (" << suby << ", " << subx
- << ")";
- }
- }
-
- if (memcmp(output16_1, output16_2, sizeof(output16_1))) {
- for (int i = 0; i < MAX_SB_SIZE; ++i) {
- for (int j = 0; j < MAX_SB_SIZE; ++j) {
- int idx = i * MAX_SB_SIZE + j;
- ASSERT_EQ(output16_1[idx], output16_2[idx])
- << out_w << "x" << out_h
- << " Pixel mismatch at index " << idx << " = (" << i
- << ", " << j << "), sub pixel offset = (" << suby
- << ", " << subx << ")";
- }
- }
- }
- }
- }
- }
- }
- }
- }
- }
-}
-} // namespace AV1HighbdConvolve2D
-} // namespace libaom_test
diff --git a/third_party/aom/test/av1_convolve_2d_test_util.h b/third_party/aom/test/av1_convolve_2d_test_util.h
deleted file mode 100644
index e0eb58410..000000000
--- a/third_party/aom/test/av1_convolve_2d_test_util.h
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_TEST_AV1_CONVOLVE_2D_TEST_UTIL_H_
-#define AOM_TEST_AV1_CONVOLVE_2D_TEST_UTIL_H_
-
-#include "config/av1_rtcd.h"
-#include "config/aom_dsp_rtcd.h"
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-#include "test/acm_random.h"
-#include "test/util.h"
-
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-
-namespace libaom_test {
-
-namespace AV1Convolve2D {
-
-typedef void (*convolve_2d_func)(const uint8_t *src, int src_stride,
- uint8_t *dst, int dst_stride, int w, int h,
- const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y,
- const int subpel_x_q4, const int subpel_y_q4,
- ConvolveParams *conv_params);
-
-typedef ::testing::tuple<convolve_2d_func, int, int, BLOCK_SIZE>
- Convolve2DParam;
-
-::testing::internal::ParamGenerator<Convolve2DParam> BuildParams(
- convolve_2d_func filter, int subx_exist, int suby_exist);
-
-class AV1Convolve2DSrTest : public ::testing::TestWithParam<Convolve2DParam> {
- public:
- virtual ~AV1Convolve2DSrTest();
- virtual void SetUp();
-
- virtual void TearDown();
-
- protected:
- void RunCheckOutput(convolve_2d_func test_impl);
- void RunSpeedTest(convolve_2d_func test_impl);
-
- libaom_test::ACMRandom rnd_;
-};
-
-class AV1JntConvolve2DTest : public ::testing::TestWithParam<Convolve2DParam> {
- public:
- virtual ~AV1JntConvolve2DTest();
- virtual void SetUp();
-
- virtual void TearDown();
-
- protected:
- void RunCheckOutput(convolve_2d_func test_impl);
- void RunSpeedTest(convolve_2d_func test_impl);
-
- libaom_test::ACMRandom rnd_;
-};
-} // namespace AV1Convolve2D
-
-namespace AV1HighbdConvolve2D {
-typedef void (*highbd_convolve_2d_func)(
- const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w,
- int h, const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y, const int subpel_x_q4,
- const int subpel_y_q4, ConvolveParams *conv_params, int bd);
-
-typedef ::testing::tuple<int, highbd_convolve_2d_func, int, int, BLOCK_SIZE>
- HighbdConvolve2DParam;
-
-::testing::internal::ParamGenerator<HighbdConvolve2DParam> BuildParams(
- highbd_convolve_2d_func filter, int subx_exist, int suby_exist);
-
-class AV1HighbdConvolve2DSrTest
- : public ::testing::TestWithParam<HighbdConvolve2DParam> {
- public:
- virtual ~AV1HighbdConvolve2DSrTest();
- virtual void SetUp();
-
- virtual void TearDown();
-
- protected:
- void RunCheckOutput(highbd_convolve_2d_func test_impl);
- void RunSpeedTest(highbd_convolve_2d_func test_impl);
-
- libaom_test::ACMRandom rnd_;
-};
-
-class AV1HighbdJntConvolve2DTest
- : public ::testing::TestWithParam<HighbdConvolve2DParam> {
- public:
- virtual ~AV1HighbdJntConvolve2DTest();
- virtual void SetUp();
-
- virtual void TearDown();
-
- protected:
- void RunCheckOutput(highbd_convolve_2d_func test_impl);
- void RunSpeedTest(highbd_convolve_2d_func test_impl);
-
- libaom_test::ACMRandom rnd_;
-};
-} // namespace AV1HighbdConvolve2D
-
-} // namespace libaom_test
-
-#endif // AOM_TEST_AV1_CONVOLVE_2D_TEST_UTIL_H_
diff --git a/third_party/aom/test/av1_convolve_scale_test.cc b/third_party/aom/test/av1_convolve_scale_test.cc
deleted file mode 100644
index 3b1698eeb..000000000
--- a/third_party/aom/test/av1_convolve_scale_test.cc
+++ /dev/null
@@ -1,529 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <vector>
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "config/av1_rtcd.h"
-
-#include "aom_ports/aom_timer.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
-
-#include "av1/common/common_data.h"
-
-namespace {
-const int kTestIters = 10;
-const int kPerfIters = 1000;
-
-const int kVPad = 32;
-const int kHPad = 32;
-const int kXStepQn = 16;
-const int kYStepQn = 20;
-
-using ::testing::make_tuple;
-using ::testing::tuple;
-using libaom_test::ACMRandom;
-
-enum NTaps { EIGHT_TAP, TEN_TAP, TWELVE_TAP };
-int NTapsToInt(NTaps ntaps) { return 8 + static_cast<int>(ntaps) * 2; }
-
-// A 16-bit filter with a configurable number of taps.
-class TestFilter {
- public:
- void set(NTaps ntaps, bool backwards);
-
- InterpFilterParams params_;
-
- private:
- std::vector<int16_t> coeffs_;
-};
-
-void TestFilter::set(NTaps ntaps, bool backwards) {
- const int n = NTapsToInt(ntaps);
- assert(n >= 8 && n <= 12);
-
- // The filter has n * SUBPEL_SHIFTS proper elements and an extra 8 bogus
- // elements at the end so that convolutions can read off the end safely.
- coeffs_.resize(n * SUBPEL_SHIFTS + 8);
-
- // The coefficients are pretty much arbitrary, but convolutions shouldn't
- // over or underflow. For the first filter (subpels = 0), we use an
- // increasing or decreasing ramp (depending on the backwards parameter). We
- // don't want any zero coefficients, so we make it have an x-intercept at -1
- // or n. To ensure absence of under/overflow, we normalise the area under the
- // ramp to be I = 1 << FILTER_BITS (so that convolving a constant function
- // gives the identity).
- //
- // When increasing, the function has the form:
- //
- // f(x) = A * (x + 1)
- //
- // Summing and rearranging for A gives A = 2 * I / (n * (n + 1)). If the
- // filter is reversed, we have the same A but with formula
- //
- // g(x) = A * (n - x)
- const int I = 1 << FILTER_BITS;
- const float A = 2.f * I / (n * (n + 1.f));
- for (int i = 0; i < n; ++i) {
- coeffs_[i] = static_cast<int16_t>(A * (backwards ? (n - i) : (i + 1)));
- }
-
- // For the other filters, make them slightly different by swapping two
- // columns. Filter k will have the columns (k % n) and (7 * k) % n swapped.
- const size_t filter_size = sizeof(coeffs_[0] * n);
- int16_t *const filter0 = &coeffs_[0];
- for (int k = 1; k < SUBPEL_SHIFTS; ++k) {
- int16_t *filterk = &coeffs_[k * n];
- memcpy(filterk, filter0, filter_size);
-
- const int idx0 = k % n;
- const int idx1 = (7 * k) % n;
-
- const int16_t tmp = filterk[idx0];
- filterk[idx0] = filterk[idx1];
- filterk[idx1] = tmp;
- }
-
- // Finally, write some rubbish at the end to make sure we don't use it.
- for (int i = 0; i < 8; ++i) coeffs_[n * SUBPEL_SHIFTS + i] = 123 + i;
-
- // Fill in params
- params_.filter_ptr = &coeffs_[0];
- params_.taps = n;
- // These are ignored by the functions being tested. Set them to whatever.
- params_.subpel_shifts = SUBPEL_SHIFTS;
- params_.interp_filter = EIGHTTAP_REGULAR;
-}
-
-template <typename SrcPixel>
-class TestImage {
- public:
- TestImage(int w, int h, int bd) : w_(w), h_(h), bd_(bd) {
- assert(bd < 16);
- assert(bd <= 8 * static_cast<int>(sizeof(SrcPixel)));
-
- // Pad width by 2*kHPad and then round up to the next multiple of 16
- // to get src_stride_. Add another 16 for dst_stride_ (to make sure
- // something goes wrong if we use the wrong one)
- src_stride_ = (w_ + 2 * kHPad + 15) & ~15;
- dst_stride_ = src_stride_ + 16;
-
- // Allocate image data
- src_data_.resize(2 * src_block_size());
- dst_data_.resize(2 * dst_block_size());
- dst_16_data_.resize(2 * dst_block_size());
- }
-
- void Initialize(ACMRandom *rnd);
- void Check() const;
-
- int src_stride() const { return src_stride_; }
- int dst_stride() const { return dst_stride_; }
-
- int src_block_size() const { return (h_ + 2 * kVPad) * src_stride(); }
- int dst_block_size() const { return (h_ + 2 * kVPad) * dst_stride(); }
-
- const SrcPixel *GetSrcData(bool ref, bool borders) const {
- const SrcPixel *block = &src_data_[ref ? 0 : src_block_size()];
- return borders ? block : block + kHPad + src_stride_ * kVPad;
- }
-
- SrcPixel *GetDstData(bool ref, bool borders) {
- SrcPixel *block = &dst_data_[ref ? 0 : dst_block_size()];
- return borders ? block : block + kHPad + dst_stride_ * kVPad;
- }
-
- CONV_BUF_TYPE *GetDst16Data(bool ref, bool borders) {
- CONV_BUF_TYPE *block = &dst_16_data_[ref ? 0 : dst_block_size()];
- return borders ? block : block + kHPad + dst_stride_ * kVPad;
- }
-
- private:
- int w_, h_, bd_;
- int src_stride_, dst_stride_;
-
- std::vector<SrcPixel> src_data_;
- std::vector<SrcPixel> dst_data_;
- std::vector<CONV_BUF_TYPE> dst_16_data_;
-};
-
-template <typename Pixel>
-void FillEdge(ACMRandom *rnd, int num_pixels, int bd, bool trash, Pixel *data) {
- if (!trash) {
- memset(data, 0, sizeof(*data) * num_pixels);
- return;
- }
- const Pixel mask = (1 << bd) - 1;
- for (int i = 0; i < num_pixels; ++i) data[i] = rnd->Rand16() & mask;
-}
-
-template <typename Pixel>
-void PrepBuffers(ACMRandom *rnd, int w, int h, int stride, int bd,
- bool trash_edges, Pixel *data) {
- assert(rnd);
- const Pixel mask = (1 << bd) - 1;
-
- // Fill in the first buffer with random data
- // Top border
- FillEdge(rnd, stride * kVPad, bd, trash_edges, data);
- for (int r = 0; r < h; ++r) {
- Pixel *row_data = data + (kVPad + r) * stride;
- // Left border, contents, right border
- FillEdge(rnd, kHPad, bd, trash_edges, row_data);
- for (int c = 0; c < w; ++c) row_data[kHPad + c] = rnd->Rand16() & mask;
- FillEdge(rnd, kHPad, bd, trash_edges, row_data + kHPad + w);
- }
- // Bottom border
- FillEdge(rnd, stride * kVPad, bd, trash_edges, data + stride * (kVPad + h));
-
- const int bpp = sizeof(*data);
- const int block_elts = stride * (h + 2 * kVPad);
- const int block_size = bpp * block_elts;
-
- // Now copy that to the second buffer
- memcpy(data + block_elts, data, block_size);
-}
-
-template <typename SrcPixel>
-void TestImage<SrcPixel>::Initialize(ACMRandom *rnd) {
- PrepBuffers(rnd, w_, h_, src_stride_, bd_, false, &src_data_[0]);
- PrepBuffers(rnd, w_, h_, dst_stride_, bd_, true, &dst_data_[0]);
- PrepBuffers(rnd, w_, h_, dst_stride_, bd_, true, &dst_16_data_[0]);
-}
-
-template <typename SrcPixel>
-void TestImage<SrcPixel>::Check() const {
- // If memcmp returns 0, there's nothing to do.
- const int num_pixels = dst_block_size();
- const SrcPixel *ref_dst = &dst_data_[0];
- const SrcPixel *tst_dst = &dst_data_[num_pixels];
-
- const CONV_BUF_TYPE *ref_16_dst = &dst_16_data_[0];
- const CONV_BUF_TYPE *tst_16_dst = &dst_16_data_[num_pixels];
-
- if (0 == memcmp(ref_dst, tst_dst, sizeof(*ref_dst) * num_pixels)) {
- if (0 == memcmp(ref_16_dst, tst_16_dst, sizeof(*ref_16_dst) * num_pixels))
- return;
- }
- // Otherwise, iterate through the buffer looking for differences (including
- // the edges)
- const int stride = dst_stride_;
- for (int r = 0; r < h_ + 2 * kVPad; ++r) {
- for (int c = 0; c < w_ + 2 * kHPad; ++c) {
- const int32_t ref_value = ref_dst[r * stride + c];
- const int32_t tst_value = tst_dst[r * stride + c];
-
- EXPECT_EQ(tst_value, ref_value)
- << "Error at row: " << (r - kVPad) << ", col: " << (c - kHPad);
- }
- }
-
- for (int r = 0; r < h_ + 2 * kVPad; ++r) {
- for (int c = 0; c < w_ + 2 * kHPad; ++c) {
- const int32_t ref_value = ref_16_dst[r * stride + c];
- const int32_t tst_value = tst_16_dst[r * stride + c];
-
- EXPECT_EQ(tst_value, ref_value)
- << "Error in 16 bit buffer "
- << "Error at row: " << (r - kVPad) << ", col: " << (c - kHPad);
- }
- }
-}
-
-typedef tuple<int, int> BlockDimension;
-
-struct BaseParams {
- BaseParams(BlockDimension dims, NTaps ntaps_x, NTaps ntaps_y, bool avg)
- : dims(dims), ntaps_x(ntaps_x), ntaps_y(ntaps_y), avg(avg) {}
-
- BlockDimension dims;
- NTaps ntaps_x, ntaps_y;
- bool avg;
-};
-
-template <typename SrcPixel>
-class ConvolveScaleTestBase : public ::testing::Test {
- public:
- ConvolveScaleTestBase() : image_(NULL) {}
- virtual ~ConvolveScaleTestBase() { delete image_; }
- virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- // Implemented by subclasses (SetUp depends on the parameters passed
- // in and RunOne depends on the function to be tested. These can't
- // be templated for low/high bit depths because they have different
- // numbers of parameters)
- virtual void SetUp() = 0;
- virtual void RunOne(bool ref) = 0;
-
- protected:
- void SetParams(const BaseParams &params, int bd) {
- width_ = ::testing::get<0>(params.dims);
- height_ = ::testing::get<1>(params.dims);
- ntaps_x_ = params.ntaps_x;
- ntaps_y_ = params.ntaps_y;
- bd_ = bd;
- avg_ = params.avg;
-
- filter_x_.set(ntaps_x_, false);
- filter_y_.set(ntaps_y_, true);
- convolve_params_ =
- get_conv_params_no_round(avg_ != false, 0, NULL, 0, 1, bd);
-
- delete image_;
- image_ = new TestImage<SrcPixel>(width_, height_, bd_);
- }
-
- void SetConvParamOffset(int i, int j, int is_compound, int do_average,
- int use_jnt_comp_avg) {
- if (i == -1 && j == -1) {
- convolve_params_.use_jnt_comp_avg = use_jnt_comp_avg;
- convolve_params_.is_compound = is_compound;
- convolve_params_.do_average = do_average;
- } else {
- convolve_params_.use_jnt_comp_avg = use_jnt_comp_avg;
- convolve_params_.fwd_offset = quant_dist_lookup_table[i][j][0];
- convolve_params_.bck_offset = quant_dist_lookup_table[i][j][1];
- convolve_params_.is_compound = is_compound;
- convolve_params_.do_average = do_average;
- }
- }
-
- void Run() {
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- for (int i = 0; i < kTestIters; ++i) {
- int is_compound = 0;
- SetConvParamOffset(-1, -1, is_compound, 0, 0);
- Prep(&rnd);
- RunOne(true);
- RunOne(false);
- image_->Check();
-
- is_compound = 1;
- for (int do_average = 0; do_average < 2; do_average++) {
- for (int use_jnt_comp_avg = 0; use_jnt_comp_avg < 2;
- use_jnt_comp_avg++) {
- for (int j = 0; j < 2; ++j) {
- for (int k = 0; k < 4; ++k) {
- SetConvParamOffset(j, k, is_compound, do_average,
- use_jnt_comp_avg);
- Prep(&rnd);
- RunOne(true);
- RunOne(false);
- image_->Check();
- }
- }
- }
- }
- }
- }
-
- void SpeedTest() {
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- Prep(&rnd);
-
- aom_usec_timer ref_timer;
- aom_usec_timer_start(&ref_timer);
- for (int i = 0; i < kPerfIters; ++i) RunOne(true);
- aom_usec_timer_mark(&ref_timer);
- const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);
-
- aom_usec_timer tst_timer;
- aom_usec_timer_start(&tst_timer);
- for (int i = 0; i < kPerfIters; ++i) RunOne(false);
- aom_usec_timer_mark(&tst_timer);
- const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);
-
- std::cout << "[ ] C time = " << ref_time / 1000
- << " ms, SIMD time = " << tst_time / 1000 << " ms\n";
-
- EXPECT_GT(ref_time, tst_time)
- << "Error: CDEFSpeedTest, SIMD slower than C.\n"
- << "C time: " << ref_time << " us\n"
- << "SIMD time: " << tst_time << " us\n";
- }
-
- static int RandomSubpel(ACMRandom *rnd) {
- const uint8_t subpel_mode = rnd->Rand8();
- if ((subpel_mode & 7) == 0) {
- return 0;
- } else if ((subpel_mode & 7) == 1) {
- return SCALE_SUBPEL_SHIFTS - 1;
- } else {
- return 1 + rnd->PseudoUniform(SCALE_SUBPEL_SHIFTS - 2);
- }
- }
-
- void Prep(ACMRandom *rnd) {
- assert(rnd);
-
- // Choose subpel_x_ and subpel_y_. They should be less than
- // SCALE_SUBPEL_SHIFTS; we also want to add extra weight to "interesting"
- // values: 0 and SCALE_SUBPEL_SHIFTS - 1
- subpel_x_ = RandomSubpel(rnd);
- subpel_y_ = RandomSubpel(rnd);
-
- image_->Initialize(rnd);
- }
-
- int width_, height_, bd_;
- NTaps ntaps_x_, ntaps_y_;
- bool avg_;
- int subpel_x_, subpel_y_;
- TestFilter filter_x_, filter_y_;
- TestImage<SrcPixel> *image_;
- ConvolveParams convolve_params_;
-};
-
-typedef tuple<int, int> BlockDimension;
-
-typedef void (*LowbdConvolveFunc)(const uint8_t *src, int src_stride,
- uint8_t *dst, int dst_stride, int w, int h,
- const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y,
- const int subpel_x_qn, const int x_step_qn,
- const int subpel_y_qn, const int y_step_qn,
- ConvolveParams *conv_params);
-
-// Test parameter list:
-// <tst_fun, dims, ntaps_x, ntaps_y, avg>
-typedef tuple<LowbdConvolveFunc, BlockDimension, NTaps, NTaps, bool>
- LowBDParams;
-
-class LowBDConvolveScaleTest
- : public ConvolveScaleTestBase<uint8_t>,
- public ::testing::WithParamInterface<LowBDParams> {
- public:
- virtual ~LowBDConvolveScaleTest() {}
-
- void SetUp() {
- tst_fun_ = GET_PARAM(0);
-
- const BlockDimension &block = GET_PARAM(1);
- const NTaps ntaps_x = GET_PARAM(2);
- const NTaps ntaps_y = GET_PARAM(3);
- const int bd = 8;
- const bool avg = GET_PARAM(4);
-
- SetParams(BaseParams(block, ntaps_x, ntaps_y, avg), bd);
- }
-
- void RunOne(bool ref) {
- const uint8_t *src = image_->GetSrcData(ref, false);
- uint8_t *dst = image_->GetDstData(ref, false);
- convolve_params_.dst = image_->GetDst16Data(ref, false);
- const int src_stride = image_->src_stride();
- const int dst_stride = image_->dst_stride();
- if (ref) {
- av1_convolve_2d_scale_c(src, src_stride, dst, dst_stride, width_, height_,
- &filter_x_.params_, &filter_y_.params_, subpel_x_,
- kXStepQn, subpel_y_, kYStepQn, &convolve_params_);
- } else {
- tst_fun_(src, src_stride, dst, dst_stride, width_, height_,
- &filter_x_.params_, &filter_y_.params_, subpel_x_, kXStepQn,
- subpel_y_, kYStepQn, &convolve_params_);
- }
- }
-
- private:
- LowbdConvolveFunc tst_fun_;
-};
-
-const BlockDimension kBlockDim[] = {
- make_tuple(2, 2), make_tuple(2, 4), make_tuple(4, 4),
- make_tuple(4, 8), make_tuple(8, 4), make_tuple(8, 8),
- make_tuple(8, 16), make_tuple(16, 8), make_tuple(16, 16),
- make_tuple(16, 32), make_tuple(32, 16), make_tuple(32, 32),
- make_tuple(32, 64), make_tuple(64, 32), make_tuple(64, 64),
- make_tuple(64, 128), make_tuple(128, 64), make_tuple(128, 128),
-};
-
-const NTaps kNTaps[] = { EIGHT_TAP };
-
-TEST_P(LowBDConvolveScaleTest, Check) { Run(); }
-TEST_P(LowBDConvolveScaleTest, DISABLED_Speed) { SpeedTest(); }
-
-INSTANTIATE_TEST_CASE_P(
- SSE4_1, LowBDConvolveScaleTest,
- ::testing::Combine(::testing::Values(av1_convolve_2d_scale_sse4_1),
- ::testing::ValuesIn(kBlockDim),
- ::testing::ValuesIn(kNTaps), ::testing::ValuesIn(kNTaps),
- ::testing::Bool()));
-
-typedef void (*HighbdConvolveFunc)(const uint16_t *src, int src_stride,
- uint16_t *dst, int dst_stride, int w, int h,
- const InterpFilterParams *filter_params_x,
- const InterpFilterParams *filter_params_y,
- const int subpel_x_qn, const int x_step_qn,
- const int subpel_y_qn, const int y_step_qn,
- ConvolveParams *conv_params, int bd);
-
-// Test parameter list:
-// <tst_fun, dims, ntaps_x, ntaps_y, avg, bd>
-typedef tuple<HighbdConvolveFunc, BlockDimension, NTaps, NTaps, bool, int>
- HighBDParams;
-
-class HighBDConvolveScaleTest
- : public ConvolveScaleTestBase<uint16_t>,
- public ::testing::WithParamInterface<HighBDParams> {
- public:
- virtual ~HighBDConvolveScaleTest() {}
-
- void SetUp() {
- tst_fun_ = GET_PARAM(0);
-
- const BlockDimension &block = GET_PARAM(1);
- const NTaps ntaps_x = GET_PARAM(2);
- const NTaps ntaps_y = GET_PARAM(3);
- const bool avg = GET_PARAM(4);
- const int bd = GET_PARAM(5);
-
- SetParams(BaseParams(block, ntaps_x, ntaps_y, avg), bd);
- }
-
- void RunOne(bool ref) {
- const uint16_t *src = image_->GetSrcData(ref, false);
- uint16_t *dst = image_->GetDstData(ref, false);
- convolve_params_.dst = image_->GetDst16Data(ref, false);
- const int src_stride = image_->src_stride();
- const int dst_stride = image_->dst_stride();
-
- if (ref) {
- av1_highbd_convolve_2d_scale_c(
- src, src_stride, dst, dst_stride, width_, height_, &filter_x_.params_,
- &filter_y_.params_, subpel_x_, kXStepQn, subpel_y_, kYStepQn,
- &convolve_params_, bd_);
- } else {
- tst_fun_(src, src_stride, dst, dst_stride, width_, height_,
- &filter_x_.params_, &filter_y_.params_, subpel_x_, kXStepQn,
- subpel_y_, kYStepQn, &convolve_params_, bd_);
- }
- }
-
- private:
- HighbdConvolveFunc tst_fun_;
-};
-
-const int kBDs[] = { 8, 10, 12 };
-
-TEST_P(HighBDConvolveScaleTest, Check) { Run(); }
-TEST_P(HighBDConvolveScaleTest, DISABLED_Speed) { SpeedTest(); }
-
-INSTANTIATE_TEST_CASE_P(
- SSE4_1, HighBDConvolveScaleTest,
- ::testing::Combine(::testing::Values(av1_highbd_convolve_2d_scale_sse4_1),
- ::testing::ValuesIn(kBlockDim),
- ::testing::ValuesIn(kNTaps), ::testing::ValuesIn(kNTaps),
- ::testing::Bool(), ::testing::ValuesIn(kBDs)));
-} // namespace
diff --git a/third_party/aom/test/av1_encoder_parms_get_to_decoder.cc b/third_party/aom/test/av1_encoder_parms_get_to_decoder.cc
deleted file mode 100644
index e8470e5d5..000000000
--- a/third_party/aom/test/av1_encoder_parms_get_to_decoder.cc
+++ /dev/null
@@ -1,158 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "test/codec_factory.h"
-#include "test/encode_test_driver.h"
-#include "test/util.h"
-#include "test/y4m_video_source.h"
-
-#include "aom/aom_decoder.h"
-#include "av1/decoder/decoder.h"
-
-namespace {
-
-const int kMaxPsnr = 100;
-
-struct ParamPassingTestVideo {
- const char *name;
- uint32_t width;
- uint32_t height;
- uint32_t bitrate;
- int frames;
-};
-
-const ParamPassingTestVideo kAV1ParamPassingTestVector = {
- "niklas_1280_720_30.y4m", 1280, 720, 600, 3
-};
-
-struct EncodeParameters {
- int32_t lossless;
- aom_color_primaries_t color_primaries;
- aom_transfer_characteristics_t transfer_characteristics;
- aom_matrix_coefficients_t matrix_coefficients;
- aom_color_range_t color_range;
- aom_chroma_sample_position_t chroma_sample_position;
- int32_t render_size[2];
-};
-
-const EncodeParameters kAV1EncodeParameterSet[] = {
- { 1,
- AOM_CICP_CP_BT_709,
- AOM_CICP_TC_BT_709,
- AOM_CICP_MC_BT_709,
- AOM_CR_STUDIO_RANGE,
- AOM_CSP_UNKNOWN,
- { 0, 0 } },
- { 0,
- AOM_CICP_CP_BT_470_M,
- AOM_CICP_TC_BT_470_M,
- AOM_CICP_MC_BT_470_B_G,
- AOM_CR_FULL_RANGE,
- AOM_CSP_VERTICAL,
- { 0, 0 } },
- { 1,
- AOM_CICP_CP_BT_601,
- AOM_CICP_TC_BT_601,
- AOM_CICP_MC_BT_601,
- AOM_CR_STUDIO_RANGE,
- AOM_CSP_COLOCATED,
- { 0, 0 } },
- { 0,
- AOM_CICP_CP_BT_2020,
- AOM_CICP_TC_BT_2020_10_BIT,
- AOM_CICP_MC_BT_2020_NCL,
- AOM_CR_FULL_RANGE,
- AOM_CSP_RESERVED,
- { 640, 480 } },
-};
-
-class AVxEncoderParmsGetToDecoder
- : public ::libaom_test::EncoderTest,
- public ::libaom_test::CodecTestWithParam<EncodeParameters> {
- protected:
- AVxEncoderParmsGetToDecoder()
- : EncoderTest(GET_PARAM(0)), encode_parms(GET_PARAM(1)) {}
-
- virtual ~AVxEncoderParmsGetToDecoder() {}
-
- virtual void SetUp() {
- InitializeConfig();
- SetMode(::libaom_test::kTwoPassGood);
- cfg_.g_lag_in_frames = 25;
- test_video_ = kAV1ParamPassingTestVector;
- cfg_.rc_target_bitrate = test_video_.bitrate;
- }
-
- virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
- ::libaom_test::Encoder *encoder) {
- if (video->frame() == 1) {
- encoder->Control(AV1E_SET_COLOR_PRIMARIES, encode_parms.color_primaries);
- encoder->Control(AV1E_SET_TRANSFER_CHARACTERISTICS,
- encode_parms.transfer_characteristics);
- encoder->Control(AV1E_SET_MATRIX_COEFFICIENTS,
- encode_parms.matrix_coefficients);
- encoder->Control(AV1E_SET_COLOR_RANGE, encode_parms.color_range);
- encoder->Control(AV1E_SET_CHROMA_SAMPLE_POSITION,
- encode_parms.chroma_sample_position);
- encoder->Control(AV1E_SET_LOSSLESS, encode_parms.lossless);
- if (encode_parms.render_size[0] > 0 && encode_parms.render_size[1] > 0) {
- encoder->Control(AV1E_SET_RENDER_SIZE, encode_parms.render_size);
- }
- }
- }
-
- virtual void DecompressedFrameHook(const aom_image_t &img,
- aom_codec_pts_t pts) {
- (void)pts;
- if (encode_parms.render_size[0] > 0 && encode_parms.render_size[1] > 0) {
- EXPECT_EQ(encode_parms.render_size[0], (int)img.r_w);
- EXPECT_EQ(encode_parms.render_size[1], (int)img.r_h);
- }
- EXPECT_EQ(encode_parms.color_primaries, img.cp);
- EXPECT_EQ(encode_parms.transfer_characteristics, img.tc);
- EXPECT_EQ(encode_parms.matrix_coefficients, img.mc);
- EXPECT_EQ(encode_parms.color_range, img.range);
- EXPECT_EQ(encode_parms.chroma_sample_position, img.csp);
- }
-
- virtual void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) {
- if (encode_parms.lossless) {
- EXPECT_EQ(kMaxPsnr, pkt->data.psnr.psnr[0]);
- }
- }
-
- virtual bool HandleDecodeResult(const aom_codec_err_t res_dec,
- libaom_test::Decoder *decoder) {
- EXPECT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError();
- return AOM_CODEC_OK == res_dec;
- }
-
- ParamPassingTestVideo test_video_;
-
- private:
- EncodeParameters encode_parms;
-};
-
-TEST_P(AVxEncoderParmsGetToDecoder, BitstreamParms) {
- init_flags_ = AOM_CODEC_USE_PSNR;
-
- testing::internal::scoped_ptr<libaom_test::VideoSource> video(
- new libaom_test::Y4mVideoSource(test_video_.name, 0, test_video_.frames));
- ASSERT_TRUE(video.get() != NULL);
-
- ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
-}
-
-AV1_INSTANTIATE_TEST_CASE(AVxEncoderParmsGetToDecoder,
- ::testing::ValuesIn(kAV1EncodeParameterSet));
-} // namespace
diff --git a/third_party/aom/test/av1_ext_tile_test.cc b/third_party/aom/test/av1_ext_tile_test.cc
deleted file mode 100644
index 424d2f065..000000000
--- a/third_party/aom/test/av1_ext_tile_test.cc
+++ /dev/null
@@ -1,215 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <string>
-#include <vector>
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-#include "test/codec_factory.h"
-#include "test/encode_test_driver.h"
-#include "test/i420_video_source.h"
-#include "test/md5_helper.h"
-#include "test/util.h"
-
-namespace {
-// The number of frames to be encoded/decoded
-const int kLimit = 8;
-// Skip 1 frame to check the frame decoding independency.
-const int kSkip = 5;
-const int kTileSize = 1;
-const int kTIleSizeInPixels = (kTileSize << 6);
-// Fake width and height so that they can be multiples of the tile size.
-const int kImgWidth = 704;
-const int kImgHeight = 576;
-
-// This test tests large scale tile coding case. Non-large-scale tile coding
-// is tested by the tile_independence test.
-class AV1ExtTileTest
- : public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int>,
- public ::libaom_test::EncoderTest {
- protected:
- AV1ExtTileTest()
- : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)),
- set_cpu_used_(GET_PARAM(2)) {
- init_flags_ = AOM_CODEC_USE_PSNR;
- aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t();
- cfg.w = kImgWidth;
- cfg.h = kImgHeight;
- cfg.allow_lowbitdepth = 1;
-
- decoder_ = codec_->CreateDecoder(cfg, 0);
- decoder_->Control(AV1_SET_TILE_MODE, 1);
- decoder_->Control(AV1D_EXT_TILE_DEBUG, 1);
- decoder_->Control(AV1_SET_DECODE_TILE_ROW, -1);
- decoder_->Control(AV1_SET_DECODE_TILE_COL, -1);
-
- // Allocate buffer to store tile image.
- aom_img_alloc(&tile_img_, AOM_IMG_FMT_I420, kImgWidth, kImgHeight, 32);
-
- md5_.clear();
- tile_md5_.clear();
- }
-
- virtual ~AV1ExtTileTest() {
- aom_img_free(&tile_img_);
- delete decoder_;
- }
-
- virtual void SetUp() {
- InitializeConfig();
- SetMode(encoding_mode_);
-
- cfg_.g_lag_in_frames = 0;
- cfg_.rc_end_usage = AOM_VBR;
- cfg_.g_error_resilient = 1;
-
- cfg_.rc_max_quantizer = 56;
- cfg_.rc_min_quantizer = 0;
- }
-
- virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
- ::libaom_test::Encoder *encoder) {
- if (video->frame() == 0) {
- // Encode setting
- encoder->Control(AOME_SET_CPUUSED, set_cpu_used_);
- encoder->Control(AOME_SET_ENABLEAUTOALTREF, 0);
- encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1);
-
- // TODO(yunqingwang): test single_tile_decoding = 0.
- encoder->Control(AV1E_SET_SINGLE_TILE_DECODING, 1);
- // Always use 64x64 max partition.
- encoder->Control(AV1E_SET_SUPERBLOCK_SIZE, AOM_SUPERBLOCK_SIZE_64X64);
- // Set tile_columns and tile_rows to MAX values, which guarantees the tile
- // size of 64 x 64 pixels(i.e. 1 SB) for <= 4k resolution.
- encoder->Control(AV1E_SET_TILE_COLUMNS, 6);
- encoder->Control(AV1E_SET_TILE_ROWS, 6);
- }
-
- if (video->frame() == 1) {
- frame_flags_ =
- AOM_EFLAG_NO_UPD_LAST | AOM_EFLAG_NO_UPD_GF | AOM_EFLAG_NO_UPD_ARF;
- }
- }
-
- virtual void DecompressedFrameHook(const aom_image_t &img,
- aom_codec_pts_t pts) {
- // Skip 1 already decoded frame to be consistent with the decoder in this
- // test.
- if (pts == (aom_codec_pts_t)kSkip) return;
-
- // Calculate MD5 as the reference.
- ::libaom_test::MD5 md5_res;
- md5_res.Add(&img);
- md5_.push_back(md5_res.Get());
- }
-
- virtual void FramePktHook(const aom_codec_cx_pkt_t *pkt) {
- // Skip decoding 1 frame.
- if (pkt->data.frame.pts == (aom_codec_pts_t)kSkip) return;
-
- bool IsLastFrame = (pkt->data.frame.pts == (aom_codec_pts_t)(kLimit - 1));
-
- // Decode the first (kLimit - 1) frames as whole frame, and decode the last
- // frame in single tiles.
- for (int r = 0; r < kImgHeight / kTIleSizeInPixels; ++r) {
- for (int c = 0; c < kImgWidth / kTIleSizeInPixels; ++c) {
- if (!IsLastFrame) {
- decoder_->Control(AV1_SET_DECODE_TILE_ROW, -1);
- decoder_->Control(AV1_SET_DECODE_TILE_COL, -1);
- } else {
- decoder_->Control(AV1_SET_DECODE_TILE_ROW, r);
- decoder_->Control(AV1_SET_DECODE_TILE_COL, c);
- }
-
- const aom_codec_err_t res = decoder_->DecodeFrame(
- reinterpret_cast<uint8_t *>(pkt->data.frame.buf),
- pkt->data.frame.sz);
- if (res != AOM_CODEC_OK) {
- abort_ = true;
- ASSERT_EQ(AOM_CODEC_OK, res);
- }
- const aom_image_t *img = decoder_->GetDxData().Next();
-
- if (!IsLastFrame) {
- if (img) {
- ::libaom_test::MD5 md5_res;
- md5_res.Add(img);
- tile_md5_.push_back(md5_res.Get());
- }
- break;
- }
-
- const int kMaxMBPlane = 3;
- for (int plane = 0; plane < kMaxMBPlane; ++plane) {
- const int shift = (plane == 0) ? 0 : 1;
- int tile_height = kTIleSizeInPixels >> shift;
- int tile_width = kTIleSizeInPixels >> shift;
-
- for (int tr = 0; tr < tile_height; ++tr) {
- memcpy(tile_img_.planes[plane] +
- tile_img_.stride[plane] * (r * tile_height + tr) +
- c * tile_width,
- img->planes[plane] + img->stride[plane] * tr, tile_width);
- }
- }
- }
-
- if (!IsLastFrame) break;
- }
-
- if (IsLastFrame) {
- ::libaom_test::MD5 md5_res;
- md5_res.Add(&tile_img_);
- tile_md5_.push_back(md5_res.Get());
- }
- }
-
- void TestRoundTrip() {
- ::libaom_test::I420VideoSource video(
- "hantro_collage_w352h288.yuv", kImgWidth, kImgHeight, 30, 1, 0, kLimit);
- cfg_.rc_target_bitrate = 500;
- cfg_.g_error_resilient = AOM_ERROR_RESILIENT_DEFAULT;
- cfg_.large_scale_tile = 1;
- cfg_.g_lag_in_frames = 0;
- cfg_.g_threads = 1;
-
- // Tile encoding
- init_flags_ = AOM_CODEC_USE_PSNR;
- ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-
- // Compare to check if two vectors are equal.
- ASSERT_EQ(md5_, tile_md5_);
- }
-
- ::libaom_test::TestMode encoding_mode_;
- int set_cpu_used_;
- ::libaom_test::Decoder *decoder_;
- aom_image_t tile_img_;
- std::vector<std::string> md5_;
- std::vector<std::string> tile_md5_;
-};
-
-TEST_P(AV1ExtTileTest, DecoderResultTest) { TestRoundTrip(); }
-
-AV1_INSTANTIATE_TEST_CASE(
- // Now only test 2-pass mode.
- AV1ExtTileTest, ::testing::Values(::libaom_test::kTwoPassGood),
- ::testing::Range(1, 4));
-
-class AV1ExtTileTestLarge : public AV1ExtTileTest {};
-
-TEST_P(AV1ExtTileTestLarge, DecoderResultTest) { TestRoundTrip(); }
-
-AV1_INSTANTIATE_TEST_CASE(
- // Now only test 2-pass mode.
- AV1ExtTileTestLarge, ::testing::Values(::libaom_test::kTwoPassGood),
- ::testing::Range(0, 1));
-} // namespace
diff --git a/third_party/aom/test/av1_fwd_txfm1d_test.cc b/third_party/aom/test/av1_fwd_txfm1d_test.cc
deleted file mode 100644
index 49a666879..000000000
--- a/third_party/aom/test/av1_fwd_txfm1d_test.cc
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "av1/encoder/av1_fwd_txfm1d.h"
-#include "test/av1_txfm_test.h"
-
-using libaom_test::ACMRandom;
-using libaom_test::TYPE_ADST;
-using libaom_test::TYPE_DCT;
-using libaom_test::TYPE_IDTX;
-using libaom_test::TYPE_TXFM;
-using libaom_test::input_base;
-using libaom_test::reference_hybrid_1d;
-
-namespace {
-const int txfm_type_num = 3;
-const TYPE_TXFM txfm_type_ls[txfm_type_num] = { TYPE_DCT, TYPE_ADST,
- TYPE_IDTX };
-
-const int txfm_size_num = 5;
-
-const int txfm_size_ls[] = { 4, 8, 16, 32, 64 };
-
-const TxfmFunc fwd_txfm_func_ls[][txfm_type_num] = {
- { av1_fdct4_new, av1_fadst4_new, av1_fidentity4_c },
- { av1_fdct8_new, av1_fadst8_new, av1_fidentity8_c },
- { av1_fdct16_new, av1_fadst16_new, av1_fidentity16_c },
- { av1_fdct32_new, NULL, av1_fidentity32_c },
- { av1_fdct64_new, NULL, NULL },
-};
-
-// the maximum stage number of fwd/inv 1d dct/adst txfm is 12
-const int8_t cos_bit = 14;
-const int8_t range_bit[12] = { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20 };
-
-TEST(av1_fwd_txfm1d, round_shift) {
- EXPECT_EQ(round_shift(7, 1), 4);
- EXPECT_EQ(round_shift(-7, 1), -3);
-
- EXPECT_EQ(round_shift(7, 2), 2);
- EXPECT_EQ(round_shift(-7, 2), -2);
-
- EXPECT_EQ(round_shift(8, 2), 2);
- EXPECT_EQ(round_shift(-8, 2), -2);
-}
-
-TEST(av1_fwd_txfm1d, av1_cospi_arr_data) {
- for (int i = 0; i < 7; i++) {
- for (int j = 0; j < 64; j++) {
- EXPECT_EQ(av1_cospi_arr_data[i][j],
- (int32_t)round(cos(M_PI * j / 128) * (1 << (cos_bit_min + i))));
- }
- }
-}
-
-TEST(av1_fwd_txfm1d, accuracy) {
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- for (int si = 0; si < txfm_size_num; ++si) {
- int txfm_size = txfm_size_ls[si];
- int32_t *input = new int32_t[txfm_size];
- int32_t *output = new int32_t[txfm_size];
- double *ref_input = new double[txfm_size];
- double *ref_output = new double[txfm_size];
-
- for (int ti = 0; ti < txfm_type_num; ++ti) {
- TYPE_TXFM txfm_type = txfm_type_ls[ti];
- TxfmFunc fwd_txfm_func = fwd_txfm_func_ls[si][ti];
- int max_error = 7;
-
- const int count_test_block = 5000;
- if (fwd_txfm_func != NULL) {
- for (int ti = 0; ti < count_test_block; ++ti) {
- for (int ni = 0; ni < txfm_size; ++ni) {
- input[ni] = rnd.Rand16() % input_base - rnd.Rand16() % input_base;
- ref_input[ni] = static_cast<double>(input[ni]);
- }
-
- fwd_txfm_func(input, output, cos_bit, range_bit);
- reference_hybrid_1d(ref_input, ref_output, txfm_size, txfm_type);
-
- for (int ni = 0; ni < txfm_size; ++ni) {
- ASSERT_LE(
- abs(output[ni] - static_cast<int32_t>(round(ref_output[ni]))),
- max_error)
- << "tx size = " << txfm_size << ", tx type = " << txfm_type;
- }
- }
- }
- }
-
- delete[] input;
- delete[] output;
- delete[] ref_input;
- delete[] ref_output;
- }
-}
-} // namespace
diff --git a/third_party/aom/test/av1_fwd_txfm2d_test.cc b/third_party/aom/test/av1_fwd_txfm2d_test.cc
deleted file mode 100644
index 75f20536b..000000000
--- a/third_party/aom/test/av1_fwd_txfm2d_test.cc
+++ /dev/null
@@ -1,511 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <vector>
-
-#include "config/av1_rtcd.h"
-
-#include "test/acm_random.h"
-#include "test/util.h"
-#include "test/av1_txfm_test.h"
-#include "av1/common/av1_txfm.h"
-#include "av1/encoder/hybrid_fwd_txfm.h"
-
-using libaom_test::ACMRandom;
-using libaom_test::TYPE_TXFM;
-using libaom_test::bd;
-using libaom_test::compute_avg_abs_error;
-using libaom_test::input_base;
-
-using std::vector;
-
-namespace {
-// tx_type_, tx_size_, max_error_, max_avg_error_
-typedef ::testing::tuple<TX_TYPE, TX_SIZE, double, double> AV1FwdTxfm2dParam;
-
-class AV1FwdTxfm2d : public ::testing::TestWithParam<AV1FwdTxfm2dParam> {
- public:
- virtual void SetUp() {
- tx_type_ = GET_PARAM(0);
- tx_size_ = GET_PARAM(1);
- max_error_ = GET_PARAM(2);
- max_avg_error_ = GET_PARAM(3);
- count_ = 500;
- TXFM_2D_FLIP_CFG fwd_txfm_flip_cfg;
- av1_get_fwd_txfm_cfg(tx_type_, tx_size_, &fwd_txfm_flip_cfg);
- amplify_factor_ = libaom_test::get_amplification_factor(tx_type_, tx_size_);
- tx_width_ = tx_size_wide[fwd_txfm_flip_cfg.tx_size];
- tx_height_ = tx_size_high[fwd_txfm_flip_cfg.tx_size];
- ud_flip_ = fwd_txfm_flip_cfg.ud_flip;
- lr_flip_ = fwd_txfm_flip_cfg.lr_flip;
-
- fwd_txfm_ = libaom_test::fwd_txfm_func_ls[tx_size_];
- txfm2d_size_ = tx_width_ * tx_height_;
- input_ = reinterpret_cast<int16_t *>(
- aom_memalign(16, sizeof(input_[0]) * txfm2d_size_));
- output_ = reinterpret_cast<int32_t *>(
- aom_memalign(16, sizeof(output_[0]) * txfm2d_size_));
- ref_input_ = reinterpret_cast<double *>(
- aom_memalign(16, sizeof(ref_input_[0]) * txfm2d_size_));
- ref_output_ = reinterpret_cast<double *>(
- aom_memalign(16, sizeof(ref_output_[0]) * txfm2d_size_));
- }
-
- void RunFwdAccuracyCheck() {
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- double avg_abs_error = 0;
- for (int ci = 0; ci < count_; ci++) {
- for (int ni = 0; ni < txfm2d_size_; ++ni) {
- input_[ni] = rnd.Rand16() % input_base;
- ref_input_[ni] = static_cast<double>(input_[ni]);
- output_[ni] = 0;
- ref_output_[ni] = 0;
- }
-
- fwd_txfm_(input_, output_, tx_width_, tx_type_, bd);
-
- if (lr_flip_ && ud_flip_) {
- libaom_test::fliplrud(ref_input_, tx_width_, tx_height_, tx_width_);
- } else if (lr_flip_) {
- libaom_test::fliplr(ref_input_, tx_width_, tx_height_, tx_width_);
- } else if (ud_flip_) {
- libaom_test::flipud(ref_input_, tx_width_, tx_height_, tx_width_);
- }
-
- libaom_test::reference_hybrid_2d(ref_input_, ref_output_, tx_type_,
- tx_size_);
-
- double actual_max_error = 0;
- for (int ni = 0; ni < txfm2d_size_; ++ni) {
- ref_output_[ni] = round(ref_output_[ni]);
- const double this_error =
- fabs(output_[ni] - ref_output_[ni]) / amplify_factor_;
- actual_max_error = AOMMAX(actual_max_error, this_error);
- }
- EXPECT_GE(max_error_, actual_max_error)
- << "tx_size = " << tx_size_ << ", tx_type = " << tx_type_;
- if (actual_max_error > max_error_) { // exit early.
- break;
- }
-
- avg_abs_error += compute_avg_abs_error<int32_t, double>(
- output_, ref_output_, txfm2d_size_);
- }
-
- avg_abs_error /= amplify_factor_;
- avg_abs_error /= count_;
- EXPECT_GE(max_avg_error_, avg_abs_error)
- << "tx_size = " << tx_size_ << ", tx_type = " << tx_type_;
- }
-
- virtual void TearDown() {
- aom_free(input_);
- aom_free(output_);
- aom_free(ref_input_);
- aom_free(ref_output_);
- }
-
- private:
- double max_error_;
- double max_avg_error_;
- int count_;
- double amplify_factor_;
- TX_TYPE tx_type_;
- TX_SIZE tx_size_;
- int tx_width_;
- int tx_height_;
- int txfm2d_size_;
- FwdTxfm2dFunc fwd_txfm_;
- int16_t *input_;
- int32_t *output_;
- double *ref_input_;
- double *ref_output_;
- int ud_flip_; // flip upside down
- int lr_flip_; // flip left to right
-};
-
-static double avg_error_ls[TX_SIZES_ALL] = {
- 0.5, // 4x4 transform
- 0.5, // 8x8 transform
- 1.2, // 16x16 transform
- 6.1, // 32x32 transform
- 3.4, // 64x64 transform
- 0.57, // 4x8 transform
- 0.68, // 8x4 transform
- 0.92, // 8x16 transform
- 1.1, // 16x8 transform
- 4.1, // 16x32 transform
- 6, // 32x16 transform
- 3.5, // 32x64 transform
- 5.7, // 64x32 transform
- 0.6, // 4x16 transform
- 0.9, // 16x4 transform
- 1.2, // 8x32 transform
- 1.7, // 32x8 transform
- 2.0, // 16x64 transform
- 4.7, // 64x16 transform
-};
-
-static double max_error_ls[TX_SIZES_ALL] = {
- 3, // 4x4 transform
- 5, // 8x8 transform
- 11, // 16x16 transform
- 70, // 32x32 transform
- 64, // 64x64 transform
- 3.9, // 4x8 transform
- 4.3, // 8x4 transform
- 12, // 8x16 transform
- 12, // 16x8 transform
- 32, // 16x32 transform
- 46, // 32x16 transform
- 136, // 32x64 transform
- 136, // 64x32 transform
- 5, // 4x16 transform
- 6, // 16x4 transform
- 21, // 8x32 transform
- 13, // 32x8 transform
- 30, // 16x64 transform
- 36, // 64x16 transform
-};
-
-vector<AV1FwdTxfm2dParam> GetTxfm2dParamList() {
- vector<AV1FwdTxfm2dParam> param_list;
- for (int s = 0; s < TX_SIZES; ++s) {
- const double max_error = max_error_ls[s];
- const double avg_error = avg_error_ls[s];
- for (int t = 0; t < TX_TYPES; ++t) {
- const TX_TYPE tx_type = static_cast<TX_TYPE>(t);
- const TX_SIZE tx_size = static_cast<TX_SIZE>(s);
- if (libaom_test::IsTxSizeTypeValid(tx_size, tx_type)) {
- param_list.push_back(
- AV1FwdTxfm2dParam(tx_type, tx_size, max_error, avg_error));
- }
- }
- }
- return param_list;
-}
-
-INSTANTIATE_TEST_CASE_P(C, AV1FwdTxfm2d,
- ::testing::ValuesIn(GetTxfm2dParamList()));
-
-TEST_P(AV1FwdTxfm2d, RunFwdAccuracyCheck) { RunFwdAccuracyCheck(); }
-
-TEST(AV1FwdTxfm2d, CfgTest) {
- for (int bd_idx = 0; bd_idx < BD_NUM; ++bd_idx) {
- int bd = libaom_test::bd_arr[bd_idx];
- int8_t low_range = libaom_test::low_range_arr[bd_idx];
- int8_t high_range = libaom_test::high_range_arr[bd_idx];
- for (int tx_size = 0; tx_size < TX_SIZES_ALL; ++tx_size) {
- for (int tx_type = 0; tx_type < TX_TYPES; ++tx_type) {
- if (libaom_test::IsTxSizeTypeValid(static_cast<TX_SIZE>(tx_size),
- static_cast<TX_TYPE>(tx_type)) ==
- false) {
- continue;
- }
- TXFM_2D_FLIP_CFG cfg;
- av1_get_fwd_txfm_cfg(static_cast<TX_TYPE>(tx_type),
- static_cast<TX_SIZE>(tx_size), &cfg);
- int8_t stage_range_col[MAX_TXFM_STAGE_NUM];
- int8_t stage_range_row[MAX_TXFM_STAGE_NUM];
- av1_gen_fwd_stage_range(stage_range_col, stage_range_row, &cfg, bd);
- libaom_test::txfm_stage_range_check(stage_range_col, cfg.stage_num_col,
- cfg.cos_bit_col, low_range,
- high_range);
- libaom_test::txfm_stage_range_check(stage_range_row, cfg.stage_num_row,
- cfg.cos_bit_row, low_range,
- high_range);
- }
- }
- }
-}
-
-typedef void (*lowbd_fwd_txfm_func)(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TxfmParam *txfm_param);
-
-void AV1FwdTxfm2dMatchTest(TX_SIZE tx_size, lowbd_fwd_txfm_func target_func) {
- const int bd = 8;
- TxfmParam param;
- memset(&param, 0, sizeof(param));
- const int rows = tx_size_high[tx_size];
- const int cols = tx_size_wide[tx_size];
- // printf("%d x %d\n", cols, rows);
- for (int tx_type = 0; tx_type < TX_TYPES; ++tx_type) {
- if (libaom_test::IsTxSizeTypeValid(
- tx_size, static_cast<TX_TYPE>(tx_type)) == false) {
- continue;
- }
-
- FwdTxfm2dFunc ref_func = libaom_test::fwd_txfm_func_ls[tx_size];
- if (ref_func != NULL) {
- DECLARE_ALIGNED(32, int16_t, input[64 * 64]) = { 0 };
- DECLARE_ALIGNED(32, int32_t, output[64 * 64]);
- DECLARE_ALIGNED(32, int32_t, ref_output[64 * 64]);
- int input_stride = 64;
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- for (int cnt = 0; cnt < 500; ++cnt) {
- if (cnt == 0) {
- for (int r = 0; r < rows; ++r) {
- for (int c = 0; c < cols; ++c) {
- input[r * input_stride + c] = (1 << bd) - 1;
- }
- }
- } else {
- for (int r = 0; r < rows; ++r) {
- for (int c = 0; c < cols; ++c) {
- input[r * input_stride + c] = rnd.Rand16() % (1 << bd);
- }
- }
- }
- param.tx_type = (TX_TYPE)tx_type;
- param.tx_size = (TX_SIZE)tx_size;
- param.tx_set_type = EXT_TX_SET_ALL16;
- param.bd = bd;
- ref_func(input, ref_output, input_stride, (TX_TYPE)tx_type, bd);
- target_func(input, output, input_stride, &param);
- const int check_rows = AOMMIN(32, rows);
- const int check_cols = AOMMIN(32, rows * cols / check_rows);
- for (int r = 0; r < check_rows; ++r) {
- for (int c = 0; c < check_cols; ++c) {
- ASSERT_EQ(ref_output[r * check_cols + c],
- output[r * check_cols + c])
- << "[" << r << "," << c << "] cnt:" << cnt
- << " tx_size: " << tx_size << " tx_type: " << tx_type;
- }
- }
- }
- }
- }
-}
-
-typedef ::testing::tuple<TX_SIZE, lowbd_fwd_txfm_func> LbdFwdTxfm2dParam;
-
-class AV1FwdTxfm2dTest : public ::testing::TestWithParam<LbdFwdTxfm2dParam> {};
-
-TEST_P(AV1FwdTxfm2dTest, match) {
- AV1FwdTxfm2dMatchTest(GET_PARAM(0), GET_PARAM(1));
-}
-
-using ::testing::Combine;
-using ::testing::Values;
-using ::testing::ValuesIn;
-
-#if HAVE_SSE2
-static TX_SIZE fwd_txfm_for_sse2[] = {
- TX_4X4,
- TX_8X8,
- TX_16X16,
- TX_32X32,
- // TX_64X64,
- TX_4X8,
- TX_8X4,
- TX_8X16,
- TX_16X8,
- TX_16X32,
- TX_32X16,
- // TX_32X64,
- // TX_64X32,
- TX_4X16,
- TX_16X4,
- TX_8X32,
- TX_32X8,
- TX_16X64,
- TX_64X16,
-};
-
-INSTANTIATE_TEST_CASE_P(SSE2, AV1FwdTxfm2dTest,
- Combine(ValuesIn(fwd_txfm_for_sse2),
- Values(av1_lowbd_fwd_txfm_sse2)));
-#endif // HAVE_SSE2
-
-#if HAVE_SSE4_1
-static TX_SIZE fwd_txfm_for_sse41[] = {
- TX_4X4,
- TX_64X64,
- TX_32X64,
- TX_64X32,
-};
-
-INSTANTIATE_TEST_CASE_P(SSE4_1, AV1FwdTxfm2dTest,
- Combine(ValuesIn(fwd_txfm_for_sse41),
- Values(av1_lowbd_fwd_txfm_sse4_1)));
-#endif // HAVE_SSE4_1
-
-#if HAVE_AVX2
-static TX_SIZE fwd_txfm_for_avx2[] = {
- TX_4X4, TX_8X8, TX_16X16, TX_32X32, TX_64X64, TX_4X8, TX_8X4,
- TX_8X16, TX_16X8, TX_16X32, TX_32X16, TX_32X64, TX_64X32, TX_4X16,
- TX_16X4, TX_8X32, TX_32X8, TX_16X64, TX_64X16,
-};
-
-INSTANTIATE_TEST_CASE_P(AVX2, AV1FwdTxfm2dTest,
- Combine(ValuesIn(fwd_txfm_for_avx2),
- Values(av1_lowbd_fwd_txfm_avx2)));
-#endif // HAVE_AVX2
-
-typedef void (*Highbd_fwd_txfm_func)(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TxfmParam *txfm_param);
-
-void AV1HighbdFwdTxfm2dMatchTest(TX_SIZE tx_size,
- Highbd_fwd_txfm_func target_func) {
- const int bd_ar[2] = { 10, 12 };
- TxfmParam param;
- memset(&param, 0, sizeof(param));
- const int rows = tx_size_high[tx_size];
- const int cols = tx_size_wide[tx_size];
- for (int i = 0; i < 2; ++i) {
- const int bd = bd_ar[i];
- for (int tx_type = 0; tx_type < TX_TYPES; ++tx_type) {
- if (libaom_test::IsTxSizeTypeValid(
- tx_size, static_cast<TX_TYPE>(tx_type)) == false) {
- continue;
- }
-
- FwdTxfm2dFunc ref_func = libaom_test::fwd_txfm_func_ls[tx_size];
- if (ref_func != NULL) {
- DECLARE_ALIGNED(32, int16_t, input[64 * 64]) = { 0 };
- DECLARE_ALIGNED(32, int32_t, output[64 * 64]);
- DECLARE_ALIGNED(32, int32_t, ref_output[64 * 64]);
- int input_stride = 64;
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- for (int cnt = 0; cnt < 500; ++cnt) {
- if (cnt == 0) {
- for (int r = 0; r < rows; ++r) {
- for (int c = 0; c < cols; ++c) {
- input[r * input_stride + c] = (1 << bd) - 1;
- }
- }
- } else {
- for (int r = 0; r < rows; ++r) {
- for (int c = 0; c < cols; ++c) {
- input[r * input_stride + c] = rnd.Rand16() % (1 << bd);
- }
- }
- }
- param.tx_type = (TX_TYPE)tx_type;
- param.tx_size = (TX_SIZE)tx_size;
- param.tx_set_type = EXT_TX_SET_ALL16;
- param.bd = bd;
-
- ref_func(input, ref_output, input_stride, (TX_TYPE)tx_type, bd);
- target_func(input, output, input_stride, &param);
- const int check_rows = AOMMIN(32, rows);
- const int check_cols = AOMMIN(32, rows * cols / check_rows);
- for (int r = 0; r < check_rows; ++r) {
- for (int c = 0; c < check_cols; ++c) {
- ASSERT_EQ(ref_output[r * check_cols + c],
- output[r * check_cols + c])
- << "[" << r << "," << c << "] cnt:" << cnt
- << " tx_size: " << tx_size << " tx_type: " << tx_type;
- }
- }
- }
- }
- }
- }
-}
-
-void AV1HighbdFwdTxfm2dSpeedTest(TX_SIZE tx_size,
- Highbd_fwd_txfm_func target_func) {
- const int bd_ar[2] = { 10, 12 };
- TxfmParam param;
- memset(&param, 0, sizeof(param));
- const int rows = tx_size_high[tx_size];
- const int cols = tx_size_wide[tx_size];
- const int num_loops = 1000000 / (rows * cols);
-
- for (int i = 0; i < 2; ++i) {
- const int bd = bd_ar[i];
- for (int tx_type = 0; tx_type < TX_TYPES; ++tx_type) {
- if (libaom_test::IsTxSizeTypeValid(
- tx_size, static_cast<TX_TYPE>(tx_type)) == false) {
- continue;
- }
-
- FwdTxfm2dFunc ref_func = libaom_test::fwd_txfm_func_ls[tx_size];
- if (ref_func != NULL) {
- DECLARE_ALIGNED(32, int16_t, input[64 * 64]) = { 0 };
- DECLARE_ALIGNED(32, int32_t, output[64 * 64]);
- DECLARE_ALIGNED(32, int32_t, ref_output[64 * 64]);
- int input_stride = 64;
- ACMRandom rnd(ACMRandom::DeterministicSeed());
-
- for (int r = 0; r < rows; ++r) {
- for (int c = 0; c < cols; ++c) {
- input[r * input_stride + c] = rnd.Rand16() % (1 << bd);
- }
- }
-
- param.tx_type = (TX_TYPE)tx_type;
- param.tx_size = (TX_SIZE)tx_size;
- param.tx_set_type = EXT_TX_SET_ALL16;
- param.bd = bd;
-
- aom_usec_timer ref_timer, test_timer;
-
- aom_usec_timer_start(&ref_timer);
- for (int i = 0; i < num_loops; ++i) {
- ref_func(input, ref_output, input_stride, (TX_TYPE)tx_type, bd);
- }
- aom_usec_timer_mark(&ref_timer);
- const int elapsed_time_c =
- static_cast<int>(aom_usec_timer_elapsed(&ref_timer));
-
- aom_usec_timer_start(&test_timer);
- for (int i = 0; i < num_loops; ++i) {
- target_func(input, output, input_stride, &param);
- }
- aom_usec_timer_mark(&test_timer);
- const int elapsed_time_simd =
- static_cast<int>(aom_usec_timer_elapsed(&test_timer));
-
- printf(
- "txfm_size[%d] \t txfm_type[%d] \t c_time=%d \t simd_time=%d \t "
- "gain=%d \n",
- tx_size, tx_type, elapsed_time_c, elapsed_time_simd,
- (elapsed_time_c / elapsed_time_simd));
- }
- }
- }
-}
-
-typedef ::testing::tuple<TX_SIZE, Highbd_fwd_txfm_func> HighbdFwdTxfm2dParam;
-
-class AV1HighbdFwdTxfm2dTest
- : public ::testing::TestWithParam<HighbdFwdTxfm2dParam> {};
-
-TEST_P(AV1HighbdFwdTxfm2dTest, match) {
- AV1HighbdFwdTxfm2dMatchTest(GET_PARAM(0), GET_PARAM(1));
-}
-
-TEST_P(AV1HighbdFwdTxfm2dTest, DISABLED_Speed) {
- AV1HighbdFwdTxfm2dSpeedTest(GET_PARAM(0), GET_PARAM(1));
-}
-
-using ::testing::Combine;
-using ::testing::Values;
-using ::testing::ValuesIn;
-
-#if HAVE_SSE4_1
-static TX_SIZE Highbd_fwd_txfm_for_sse4_1[] = {
- TX_4X4, TX_8X8, TX_16X16, TX_32X32, TX_64X64, TX_4X8, TX_8X4,
- TX_8X16, TX_16X8, TX_16X32, TX_32X16, TX_32X64, TX_64X32, TX_4X16,
- TX_16X4, TX_8X32, TX_32X8, TX_16X64, TX_64X16,
-};
-
-INSTANTIATE_TEST_CASE_P(SSE4_1, AV1HighbdFwdTxfm2dTest,
- Combine(ValuesIn(Highbd_fwd_txfm_for_sse4_1),
- Values(av1_highbd_fwd_txfm)));
-#endif // HAVE_SSE4_1
-
-} // namespace
diff --git a/third_party/aom/test/av1_highbd_iht_test.cc b/third_party/aom/test/av1_highbd_iht_test.cc
deleted file mode 100644
index 2d6490c2a..000000000
--- a/third_party/aom/test/av1_highbd_iht_test.cc
+++ /dev/null
@@ -1,315 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "config/av1_rtcd.h"
-
-#include "test/acm_random.h"
-#include "test/av1_txfm_test.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
-#include "av1/common/enums.h"
-#include "av1/common/scan.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_ports/mem.h"
-
-namespace {
-
-using ::testing::tuple;
-using libaom_test::ACMRandom;
-
-typedef void (*HbdHtFunc)(const int16_t *input, int32_t *output, int stride,
- TX_TYPE tx_type, int bd);
-
-typedef void (*IHbdHtFunc)(const int32_t *coeff, uint16_t *output, int stride,
- TX_TYPE tx_type, int bd);
-
-// Test parameter argument list:
-// <transform reference function,
-// optimized inverse transform function,
-// inverse transform reference function,
-// num_coeffs,
-// tx_type,
-// bit_depth>
-typedef tuple<HbdHtFunc, IHbdHtFunc, IHbdHtFunc, int, TX_TYPE, int> IHbdHtParam;
-
-class AV1HighbdInvHTNxN : public ::testing::TestWithParam<IHbdHtParam> {
- public:
- virtual ~AV1HighbdInvHTNxN() {}
-
- virtual void SetUp() {
- txfm_ref_ = GET_PARAM(0);
- inv_txfm_ = GET_PARAM(1);
- inv_txfm_ref_ = GET_PARAM(2);
- num_coeffs_ = GET_PARAM(3);
- tx_type_ = GET_PARAM(4);
- bit_depth_ = GET_PARAM(5);
-
- input_ = reinterpret_cast<int16_t *>(
- aom_memalign(16, sizeof(input_[0]) * num_coeffs_));
-
- // Note:
- // Inverse transform input buffer is 32-byte aligned
- // Refer to <root>/av1/encoder/context_tree.c, function,
- // void alloc_mode_context().
- coeffs_ = reinterpret_cast<int32_t *>(
- aom_memalign(32, sizeof(coeffs_[0]) * num_coeffs_));
- output_ = reinterpret_cast<uint16_t *>(
- aom_memalign(32, sizeof(output_[0]) * num_coeffs_));
- output_ref_ = reinterpret_cast<uint16_t *>(
- aom_memalign(32, sizeof(output_ref_[0]) * num_coeffs_));
- }
-
- virtual void TearDown() {
- aom_free(input_);
- aom_free(coeffs_);
- aom_free(output_);
- aom_free(output_ref_);
- libaom_test::ClearSystemState();
- }
-
- protected:
- void RunBitexactCheck();
-
- private:
- int GetStride() const {
- if (16 == num_coeffs_) {
- return 4;
- } else if (64 == num_coeffs_) {
- return 8;
- } else if (256 == num_coeffs_) {
- return 16;
- } else if (1024 == num_coeffs_) {
- return 32;
- } else if (4096 == num_coeffs_) {
- return 64;
- } else {
- return 0;
- }
- }
-
- HbdHtFunc txfm_ref_;
- IHbdHtFunc inv_txfm_;
- IHbdHtFunc inv_txfm_ref_;
- int num_coeffs_;
- TX_TYPE tx_type_;
- int bit_depth_;
-
- int16_t *input_;
- int32_t *coeffs_;
- uint16_t *output_;
- uint16_t *output_ref_;
-};
-
-void AV1HighbdInvHTNxN::RunBitexactCheck() {
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- const int stride = GetStride();
- const int num_tests = 20000;
- const uint16_t mask = (1 << bit_depth_) - 1;
-
- for (int i = 0; i < num_tests; ++i) {
- for (int j = 0; j < num_coeffs_; ++j) {
- input_[j] = (rnd.Rand16() & mask) - (rnd.Rand16() & mask);
- output_ref_[j] = rnd.Rand16() & mask;
- output_[j] = output_ref_[j];
- }
-
- txfm_ref_(input_, coeffs_, stride, tx_type_, bit_depth_);
- inv_txfm_ref_(coeffs_, output_ref_, stride, tx_type_, bit_depth_);
- ASM_REGISTER_STATE_CHECK(
- inv_txfm_(coeffs_, output_, stride, tx_type_, bit_depth_));
-
- for (int j = 0; j < num_coeffs_; ++j) {
- EXPECT_EQ(output_ref_[j], output_[j])
- << "Not bit-exact result at index: " << j << " At test block: " << i;
- }
- }
-}
-
-TEST_P(AV1HighbdInvHTNxN, InvTransResultCheck) { RunBitexactCheck(); }
-
-using ::testing::make_tuple;
-
-#if HAVE_SSE4_1
-#define PARAM_LIST_4X4 \
- &av1_fwd_txfm2d_4x4_c, &av1_inv_txfm2d_add_4x4_sse4_1, \
- &av1_inv_txfm2d_add_4x4_c, 16
-
-const IHbdHtParam kArrayIhtParam[] = {
- // 4x4
- make_tuple(PARAM_LIST_4X4, DCT_DCT, 10),
- make_tuple(PARAM_LIST_4X4, DCT_DCT, 12),
- make_tuple(PARAM_LIST_4X4, ADST_DCT, 10),
- make_tuple(PARAM_LIST_4X4, ADST_DCT, 12),
- make_tuple(PARAM_LIST_4X4, DCT_ADST, 10),
- make_tuple(PARAM_LIST_4X4, DCT_ADST, 12),
- make_tuple(PARAM_LIST_4X4, ADST_ADST, 10),
- make_tuple(PARAM_LIST_4X4, ADST_ADST, 12),
- make_tuple(PARAM_LIST_4X4, FLIPADST_DCT, 10),
- make_tuple(PARAM_LIST_4X4, FLIPADST_DCT, 12),
- make_tuple(PARAM_LIST_4X4, DCT_FLIPADST, 10),
- make_tuple(PARAM_LIST_4X4, DCT_FLIPADST, 12),
- make_tuple(PARAM_LIST_4X4, FLIPADST_FLIPADST, 10),
- make_tuple(PARAM_LIST_4X4, FLIPADST_FLIPADST, 12),
- make_tuple(PARAM_LIST_4X4, ADST_FLIPADST, 10),
- make_tuple(PARAM_LIST_4X4, ADST_FLIPADST, 12),
- make_tuple(PARAM_LIST_4X4, FLIPADST_ADST, 10),
- make_tuple(PARAM_LIST_4X4, FLIPADST_ADST, 12),
-};
-
-INSTANTIATE_TEST_CASE_P(SSE4_1, AV1HighbdInvHTNxN,
- ::testing::ValuesIn(kArrayIhtParam));
-#endif // HAVE_SSE4_1
-
-typedef void (*HighbdInvTxfm2dFunc)(const int32_t *input, uint8_t *output,
- int stride, const TxfmParam *txfm_param);
-
-typedef ::testing::tuple<const HighbdInvTxfm2dFunc> AV1HighbdInvTxfm2dParam;
-class AV1HighbdInvTxfm2d
- : public ::testing::TestWithParam<AV1HighbdInvTxfm2dParam> {
- public:
- virtual void SetUp() { target_func_ = GET_PARAM(0); }
- void RunAV1InvTxfm2dTest(TX_TYPE tx_type, TX_SIZE tx_size, int run_times,
- int bit_depth);
-
- private:
- HighbdInvTxfm2dFunc target_func_;
-};
-
-void AV1HighbdInvTxfm2d::RunAV1InvTxfm2dTest(TX_TYPE tx_type_, TX_SIZE tx_size_,
- int run_times, int bit_depth_) {
- FwdTxfm2dFunc fwd_func_ = libaom_test::fwd_txfm_func_ls[tx_size_];
- TxfmParam txfm_param;
- const int BLK_WIDTH = 64;
- const int BLK_SIZE = BLK_WIDTH * BLK_WIDTH;
- DECLARE_ALIGNED(16, int16_t, input[BLK_SIZE]) = { 0 };
- DECLARE_ALIGNED(32, int32_t, inv_input[BLK_SIZE]) = { 0 };
- DECLARE_ALIGNED(32, uint16_t, output[BLK_SIZE]) = { 0 };
- DECLARE_ALIGNED(32, uint16_t, ref_output[BLK_SIZE]) = { 0 };
- int stride = BLK_WIDTH;
- int rows = tx_size_high[tx_size_];
- int cols = tx_size_wide[tx_size_];
- const int rows_nonezero = AOMMIN(32, rows);
- const int cols_nonezero = AOMMIN(32, cols);
- const uint16_t mask = (1 << bit_depth_) - 1;
- run_times /= (rows * cols);
- run_times = AOMMAX(1, run_times);
- const SCAN_ORDER *scan_order = get_default_scan(tx_size_, tx_type_);
- const int16_t *scan = scan_order->scan;
- const int16_t eobmax = rows_nonezero * cols_nonezero;
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- int randTimes = run_times == 1 ? (eobmax) : 1;
-
- txfm_param.tx_type = tx_type_;
- txfm_param.tx_size = tx_size_;
- txfm_param.lossless = 0;
- txfm_param.bd = bit_depth_;
- txfm_param.is_hbd = 1;
- txfm_param.tx_set_type = EXT_TX_SET_ALL16;
-
- for (int cnt = 0; cnt < randTimes; ++cnt) {
- for (int r = 0; r < BLK_WIDTH; ++r) {
- for (int c = 0; c < BLK_WIDTH; ++c) {
- input[r * cols + c] = (rnd.Rand16() & mask) - (rnd.Rand16() & mask);
- output[r * stride + c] = rnd.Rand16() & mask;
-
- ref_output[r * stride + c] = output[r * stride + c];
- }
- }
- fwd_func_(input, inv_input, stride, tx_type_, bit_depth_);
-
- // produce eob input by setting high freq coeffs to zero
- const int eob = AOMMIN(cnt + 1, eobmax);
- for (int i = eob; i < eobmax; i++) {
- inv_input[scan[i]] = 0;
- }
- txfm_param.eob = eob;
- aom_usec_timer ref_timer, test_timer;
-
- aom_usec_timer_start(&ref_timer);
- for (int i = 0; i < run_times; ++i) {
- av1_highbd_inv_txfm_add_c(inv_input, CONVERT_TO_BYTEPTR(ref_output),
- stride, &txfm_param);
- }
- aom_usec_timer_mark(&ref_timer);
- const int elapsed_time_c =
- static_cast<int>(aom_usec_timer_elapsed(&ref_timer));
-
- aom_usec_timer_start(&test_timer);
- for (int i = 0; i < run_times; ++i) {
- target_func_(inv_input, CONVERT_TO_BYTEPTR(output), stride, &txfm_param);
- }
- aom_usec_timer_mark(&test_timer);
- const int elapsed_time_simd =
- static_cast<int>(aom_usec_timer_elapsed(&test_timer));
- if (run_times > 10) {
- printf(
- "txfm_size[%d] \t txfm_type[%d] \t c_time=%d \t simd_time=%d \t "
- "gain=%d \n",
- tx_size_, tx_type_, elapsed_time_c, elapsed_time_simd,
- (elapsed_time_c / elapsed_time_simd));
- } else {
- for (int r = 0; r < rows; ++r) {
- for (int c = 0; c < cols; ++c) {
- ASSERT_EQ(ref_output[r * stride + c], output[r * stride + c])
- << "[" << r << "," << c << "] " << cnt
- << " tx_size: " << static_cast<int>(tx_size_)
- << " tx_type: " << tx_type_ << " eob " << eob;
- }
- }
- }
- }
-}
-
-TEST_P(AV1HighbdInvTxfm2d, match) {
- int bitdepth_ar[2] = { 10, 12 };
- for (int k = 0; k < 2; ++k) {
- int bd = bitdepth_ar[k];
- for (int j = 0; j < (int)(TX_SIZES_ALL); ++j) {
- for (int i = 0; i < (int)TX_TYPES; ++i) {
- if (libaom_test::IsTxSizeTypeValid(static_cast<TX_SIZE>(j),
- static_cast<TX_TYPE>(i))) {
- RunAV1InvTxfm2dTest(static_cast<TX_TYPE>(i), static_cast<TX_SIZE>(j),
- 1, bd);
- }
- }
- }
- }
-}
-
-TEST_P(AV1HighbdInvTxfm2d, DISABLED_Speed) {
- int bitdepth_ar[2] = { 10, 12 };
- for (int k = 0; k < 2; ++k) {
- int bd = bitdepth_ar[k];
- for (int j = 0; j < (int)(TX_SIZES_ALL); ++j) {
- for (int i = 0; i < (int)TX_TYPES; ++i) {
- if (libaom_test::IsTxSizeTypeValid(static_cast<TX_SIZE>(j),
- static_cast<TX_TYPE>(i))) {
- RunAV1InvTxfm2dTest(static_cast<TX_TYPE>(i), static_cast<TX_SIZE>(j),
- 1000000, bd);
- }
- }
- }
- }
-}
-
-#if HAVE_SSE4_1
-INSTANTIATE_TEST_CASE_P(SSE4_1, AV1HighbdInvTxfm2d,
- ::testing::Values(av1_highbd_inv_txfm_add_sse4_1));
-#endif
-
-#if HAVE_AVX2
-INSTANTIATE_TEST_CASE_P(AVX2, AV1HighbdInvTxfm2d,
- ::testing::Values(av1_highbd_inv_txfm_add_avx2));
-#endif
-} // namespace
diff --git a/third_party/aom/test/av1_horz_only_frame_superres_test.cc b/third_party/aom/test/av1_horz_only_frame_superres_test.cc
deleted file mode 100644
index fd77ef35d..000000000
--- a/third_party/aom/test/av1_horz_only_frame_superres_test.cc
+++ /dev/null
@@ -1,362 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <vector>
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "config/av1_rtcd.h"
-
-#include "aom_ports/aom_timer.h"
-#include "av1/common/convolve.h"
-#include "av1/common/resize.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
-
-namespace {
-const int kTestIters = 10;
-const int kPerfIters = 1000;
-
-const int kVPad = 32;
-const int kHPad = 32;
-
-using ::testing::make_tuple;
-using ::testing::tuple;
-using libaom_test::ACMRandom;
-
-template <typename Pixel>
-class TestImage {
- public:
- TestImage(int w_src, int h, int superres_denom, int x0, int bd)
- : w_src_(w_src), h_(h), superres_denom_(superres_denom), x0_(x0),
- bd_(bd) {
- assert(bd < 16);
- assert(bd <= 8 * static_cast<int>(sizeof(Pixel)));
- assert(9 <= superres_denom && superres_denom <= 16);
- assert(SCALE_NUMERATOR == 8);
- assert(0 <= x0_ && x0_ <= RS_SCALE_SUBPEL_MASK);
-
- w_dst_ = w_src_;
- av1_calculate_unscaled_superres_size(&w_dst_, NULL, superres_denom);
-
- src_stride_ = ALIGN_POWER_OF_TWO(w_src_ + 2 * kHPad, 4);
- dst_stride_ = ALIGN_POWER_OF_TWO(w_dst_ + 2 * kHPad, 4);
-
- // Allocate image data
- src_data_.resize(2 * src_block_size());
- dst_data_.resize(2 * dst_block_size());
- }
-
- void Initialize(ACMRandom *rnd);
- void Check() const;
-
- int src_stride() const { return src_stride_; }
- int dst_stride() const { return dst_stride_; }
-
- int src_block_size() const { return (h_ + 2 * kVPad) * src_stride(); }
- int dst_block_size() const { return (h_ + 2 * kVPad) * dst_stride(); }
-
- int src_width() const { return w_src_; }
- int dst_width() const { return w_dst_; }
- int height() const { return h_; }
- int x0() const { return x0_; }
-
- const Pixel *GetSrcData(bool ref, bool borders) const {
- const Pixel *block = &src_data_[ref ? 0 : src_block_size()];
- return borders ? block : block + kHPad + src_stride_ * kVPad;
- }
-
- Pixel *GetDstData(bool ref, bool borders) {
- Pixel *block = &dst_data_[ref ? 0 : dst_block_size()];
- return borders ? block : block + kHPad + dst_stride_ * kVPad;
- }
-
- private:
- int w_src_, w_dst_, h_, superres_denom_, x0_, bd_;
- int src_stride_, dst_stride_;
-
- std::vector<Pixel> src_data_;
- std::vector<Pixel> dst_data_;
-};
-
-template <typename Pixel>
-void FillEdge(ACMRandom *rnd, int num_pixels, int bd, bool trash, Pixel *data) {
- if (!trash) {
- memset(data, 0, sizeof(*data) * num_pixels);
- return;
- }
- const Pixel mask = (1 << bd) - 1;
- for (int i = 0; i < num_pixels; ++i) data[i] = rnd->Rand16() & mask;
-}
-
-template <typename Pixel>
-void PrepBuffers(ACMRandom *rnd, int w, int h, int stride, int bd,
- bool trash_edges, Pixel *data) {
- assert(rnd);
- const Pixel mask = (1 << bd) - 1;
-
- // Fill in the first buffer with random data
- // Top border
- FillEdge(rnd, stride * kVPad, bd, trash_edges, data);
- for (int r = 0; r < h; ++r) {
- Pixel *row_data = data + (kVPad + r) * stride;
- // Left border, contents, right border
- FillEdge(rnd, kHPad, bd, trash_edges, row_data);
- for (int c = 0; c < w; ++c) row_data[kHPad + c] = rnd->Rand16() & mask;
- FillEdge(rnd, kHPad, bd, trash_edges, row_data + kHPad + w);
- }
- // Bottom border
- FillEdge(rnd, stride * kVPad, bd, trash_edges, data + stride * (kVPad + h));
-
- const int bpp = sizeof(*data);
- const int block_elts = stride * (h + 2 * kVPad);
- const int block_size = bpp * block_elts;
-
- // Now copy that to the second buffer
- memcpy(data + block_elts, data, block_size);
-}
-
-template <typename Pixel>
-void TestImage<Pixel>::Initialize(ACMRandom *rnd) {
- PrepBuffers(rnd, w_src_, h_, src_stride_, bd_, false, &src_data_[0]);
- PrepBuffers(rnd, w_dst_, h_, dst_stride_, bd_, true, &dst_data_[0]);
-}
-
-template <typename Pixel>
-void TestImage<Pixel>::Check() const {
- const int num_pixels = dst_block_size();
- const Pixel *ref_dst = &dst_data_[0];
- const Pixel *tst_dst = &dst_data_[num_pixels];
-
- // If memcmp returns 0, there's nothing to do.
- if (0 == memcmp(ref_dst, tst_dst, sizeof(*ref_dst) * num_pixels)) return;
-
- // Otherwise, iterate through the buffer looking for differences, *ignoring
- // the edges*
- const int stride = dst_stride_;
- for (int r = kVPad; r < h_ + kVPad; ++r) {
- for (int c = kVPad; c < w_dst_ + kHPad; ++c) {
- const int32_t ref_value = ref_dst[r * stride + c];
- const int32_t tst_value = tst_dst[r * stride + c];
-
- EXPECT_EQ(tst_value, ref_value)
- << "Error at row: " << (r - kVPad) << ", col: " << (c - kHPad)
- << ", superres_denom: " << superres_denom_ << ", height: " << h_
- << ", src_width: " << w_src_ << ", dst_width: " << w_dst_
- << ", x0: " << x0_;
- }
- }
-}
-
-template <typename Pixel>
-class ConvolveHorizRSTestBase : public ::testing::Test {
- public:
- ConvolveHorizRSTestBase() : image_(NULL) {}
- virtual ~ConvolveHorizRSTestBase() {}
- virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- // Implemented by subclasses (SetUp depends on the parameters passed
- // in and RunOne depends on the function to be tested. These can't
- // be templated for low/high bit depths because they have different
- // numbers of parameters)
- virtual void SetUp() = 0;
- virtual void RunOne(bool ref) = 0;
-
- protected:
- void SetBitDepth(int bd) { bd_ = bd; }
-
- void CorrectnessTest() {
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- for (int i = 0; i < kTestIters; ++i) {
- for (int superres_denom = 9; superres_denom <= 16; superres_denom++) {
- // Get a random height between 512 and 767
- int height = rnd.Rand8() + 512;
-
- // Get a random src width between 128 and 383
- int width_src = rnd.Rand8() + 128;
-
- // x0 is normally calculated by get_upscale_convolve_x0 in
- // av1/common/resize.c. However, this test should work for
- // any value of x0 between 0 and RS_SCALE_SUBPEL_MASK
- // (inclusive), so we choose one at random.
- int x0 = rnd.Rand16() % (RS_SCALE_SUBPEL_MASK + 1);
-
- image_ =
- new TestImage<Pixel>(width_src, height, superres_denom, x0, bd_);
-
- Prep(&rnd);
- RunOne(true);
- RunOne(false);
- image_->Check();
-
- delete image_;
- }
- }
- }
-
- void SpeedTest() {
- // Pick some specific parameters to test
- int height = 767;
- int width_src = 129;
- int superres_denom = 13;
- int x0 = RS_SCALE_SUBPEL_MASK >> 1;
-
- image_ = new TestImage<Pixel>(width_src, height, superres_denom, x0, bd_);
-
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- Prep(&rnd);
-
- aom_usec_timer ref_timer;
- aom_usec_timer_start(&ref_timer);
- for (int i = 0; i < kPerfIters; ++i) RunOne(true);
- aom_usec_timer_mark(&ref_timer);
- const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);
-
- aom_usec_timer tst_timer;
- aom_usec_timer_start(&tst_timer);
- for (int i = 0; i < kPerfIters; ++i) RunOne(false);
- aom_usec_timer_mark(&tst_timer);
- const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);
-
- std::cout << "[ ] C time = " << ref_time / 1000
- << " ms, SIMD time = " << tst_time / 1000 << " ms\n";
-
- EXPECT_GT(ref_time, tst_time)
- << "Error: ConvolveHorizRSTest (Speed Test), SIMD slower than C.\n"
- << "C time: " << ref_time << " us\n"
- << "SIMD time: " << tst_time << " us\n";
- }
-
- void Prep(ACMRandom *rnd) {
- assert(rnd);
- image_->Initialize(rnd);
- }
-
- int bd_;
- TestImage<Pixel> *image_;
-};
-
-typedef void (*LowBDConvolveHorizRsFunc)(const uint8_t *src, int src_stride,
- uint8_t *dst, int dst_stride, int w,
- int h, const int16_t *x_filters,
- const int x0_qn, const int x_step_qn);
-
-// Test parameter list:
-// <tst_fun_>
-typedef tuple<LowBDConvolveHorizRsFunc> LowBDParams;
-
-class LowBDConvolveHorizRSTest
- : public ConvolveHorizRSTestBase<uint8_t>,
- public ::testing::WithParamInterface<LowBDParams> {
- public:
- virtual ~LowBDConvolveHorizRSTest() {}
-
- void SetUp() {
- tst_fun_ = GET_PARAM(0);
- const int bd = 8;
- SetBitDepth(bd);
- }
-
- void RunOne(bool ref) {
- const uint8_t *src = image_->GetSrcData(ref, false);
- uint8_t *dst = image_->GetDstData(ref, false);
- const int src_stride = image_->src_stride();
- const int dst_stride = image_->dst_stride();
- const int width_src = image_->src_width();
- const int width_dst = image_->dst_width();
- const int height = image_->height();
- const int x0_qn = image_->x0();
-
- const int32_t x_step_qn =
- av1_get_upscale_convolve_step(width_src, width_dst);
-
- if (ref) {
- av1_convolve_horiz_rs_c(src, src_stride, dst, dst_stride, width_dst,
- height, &av1_resize_filter_normative[0][0], x0_qn,
- x_step_qn);
- } else {
- tst_fun_(src, src_stride, dst, dst_stride, width_dst, height,
- &av1_resize_filter_normative[0][0], x0_qn, x_step_qn);
- }
- }
-
- private:
- LowBDConvolveHorizRsFunc tst_fun_;
-};
-
-TEST_P(LowBDConvolveHorizRSTest, Correctness) { CorrectnessTest(); }
-TEST_P(LowBDConvolveHorizRSTest, DISABLED_Speed) { SpeedTest(); }
-
-INSTANTIATE_TEST_CASE_P(SSE4_1, LowBDConvolveHorizRSTest,
- ::testing::Values(av1_convolve_horiz_rs_sse4_1));
-
-typedef void (*HighBDConvolveHorizRsFunc)(const uint16_t *src, int src_stride,
- uint16_t *dst, int dst_stride, int w,
- int h, const int16_t *x_filters,
- const int x0_qn, const int x_step_qn,
- int bd);
-
-// Test parameter list:
-// <tst_fun_, bd_>
-typedef tuple<HighBDConvolveHorizRsFunc, int> HighBDParams;
-
-class HighBDConvolveHorizRSTest
- : public ConvolveHorizRSTestBase<uint16_t>,
- public ::testing::WithParamInterface<HighBDParams> {
- public:
- virtual ~HighBDConvolveHorizRSTest() {}
-
- void SetUp() {
- tst_fun_ = GET_PARAM(0);
- const int bd = GET_PARAM(1);
- SetBitDepth(bd);
- }
-
- void RunOne(bool ref) {
- const uint16_t *src = image_->GetSrcData(ref, false);
- uint16_t *dst = image_->GetDstData(ref, false);
- const int src_stride = image_->src_stride();
- const int dst_stride = image_->dst_stride();
- const int width_src = image_->src_width();
- const int width_dst = image_->dst_width();
- const int height = image_->height();
- const int x0_qn = image_->x0();
-
- const int32_t x_step_qn =
- av1_get_upscale_convolve_step(width_src, width_dst);
-
- if (ref) {
- av1_highbd_convolve_horiz_rs_c(
- src, src_stride, dst, dst_stride, width_dst, height,
- &av1_resize_filter_normative[0][0], x0_qn, x_step_qn, bd_);
- } else {
- tst_fun_(src, src_stride, dst, dst_stride, width_dst, height,
- &av1_resize_filter_normative[0][0], x0_qn, x_step_qn, bd_);
- }
- }
-
- private:
- HighBDConvolveHorizRsFunc tst_fun_;
-};
-
-const int kBDs[] = { 8, 10, 12 };
-
-TEST_P(HighBDConvolveHorizRSTest, Correctness) { CorrectnessTest(); }
-TEST_P(HighBDConvolveHorizRSTest, DISABLED_Speed) { SpeedTest(); }
-
-INSTANTIATE_TEST_CASE_P(
- SSE4_1, HighBDConvolveHorizRSTest,
- ::testing::Combine(::testing::Values(av1_highbd_convolve_horiz_rs_sse4_1),
- ::testing::ValuesIn(kBDs)));
-
-} // namespace
diff --git a/third_party/aom/test/av1_inv_txfm1d_test.cc b/third_party/aom/test/av1_inv_txfm1d_test.cc
deleted file mode 100644
index bf3a44ed1..000000000
--- a/third_party/aom/test/av1_inv_txfm1d_test.cc
+++ /dev/null
@@ -1,157 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <math.h>
-
-#include "test/av1_txfm_test.h"
-#include "test/util.h"
-#include "av1/common/av1_inv_txfm1d.h"
-#include "av1/encoder/av1_fwd_txfm1d.h"
-
-using libaom_test::ACMRandom;
-using libaom_test::input_base;
-
-namespace {
-const int txfm_type_num = 2;
-const int txfm_size_ls[] = { 4, 8, 16, 32, 64 };
-
-const TxfmFunc fwd_txfm_func_ls[][txfm_type_num] = {
- { av1_fdct4_new, av1_fadst4_new },
- { av1_fdct8_new, av1_fadst8_new },
- { av1_fdct16_new, av1_fadst16_new },
- { av1_fdct32_new, NULL },
- { av1_fdct64_new, NULL },
-};
-
-const TxfmFunc inv_txfm_func_ls[][txfm_type_num] = {
- { av1_idct4_new, av1_iadst4_new },
- { av1_idct8_new, av1_iadst8_new },
- { av1_idct16_new, av1_iadst16_new },
- { av1_idct32_new, NULL },
- { av1_idct64_new, NULL },
-};
-
-// the maximum stage number of fwd/inv 1d dct/adst txfm is 12
-const int8_t cos_bit = 13;
-const int8_t range_bit[12] = { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20 };
-
-void reference_idct_1d_int(const int32_t *in, int32_t *out, int size) {
- double input[64];
- for (int i = 0; i < size; ++i) input[i] = in[i];
-
- double output[64];
- libaom_test::reference_idct_1d(input, output, size);
-
- for (int i = 0; i < size; ++i) {
- ASSERT_GE(output[i], INT32_MIN);
- ASSERT_LE(output[i], INT32_MAX);
- out[i] = static_cast<int32_t>(round(output[i]));
- }
-}
-
-void random_matrix(int32_t *dst, int len, ACMRandom *rnd) {
- const int bits = 16;
- const int maxVal = (1 << (bits - 1)) - 1;
- const int minVal = -(1 << (bits - 1));
- for (int i = 0; i < len; ++i) {
- if (rnd->Rand8() % 10)
- dst[i] = minVal + rnd->Rand16() % (1 << bits);
- else
- dst[i] = rnd->Rand8() % 2 ? minVal : maxVal;
- }
-}
-
-TEST(av1_inv_txfm1d, InvAccuracyCheck) {
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- const int count_test_block = 20000;
- const int max_error[] = { 6, 10, 19, 31, 40 };
- ASSERT_EQ(NELEMENTS(max_error), TX_SIZES);
- ASSERT_EQ(NELEMENTS(inv_txfm_func_ls), TX_SIZES);
- for (int k = 0; k < count_test_block; ++k) {
- // choose a random transform to test
- const TX_SIZE tx_size = static_cast<TX_SIZE>(rnd.Rand8() % TX_SIZES);
- const int tx_size_pix = txfm_size_ls[tx_size];
- const TxfmFunc inv_txfm_func = inv_txfm_func_ls[tx_size][0];
-
- int32_t input[64];
- random_matrix(input, tx_size_pix, &rnd);
-
- // 64x64 transform assumes last 32 values are zero.
- memset(input + 32, 0, 32 * sizeof(input[0]));
-
- int32_t ref_output[64];
- reference_idct_1d_int(input, ref_output, tx_size_pix);
-
- int32_t output[64];
- inv_txfm_func(input, output, cos_bit, range_bit);
-
- for (int i = 0; i < tx_size_pix; ++i) {
- EXPECT_LE(abs(output[i] - ref_output[i]), max_error[tx_size])
- << "tx_size = " << tx_size << ", i = " << i
- << ", output[i] = " << output[i]
- << ", ref_output[i] = " << ref_output[i];
- }
- }
-}
-
-static INLINE int get_max_bit(int x) {
- int max_bit = -1;
- while (x) {
- x = x >> 1;
- max_bit++;
- }
- return max_bit;
-}
-
-TEST(av1_inv_txfm1d, get_max_bit) {
- int max_bit = get_max_bit(8);
- EXPECT_EQ(max_bit, 3);
-}
-
-TEST(av1_inv_txfm1d, round_trip) {
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- for (int si = 0; si < NELEMENTS(fwd_txfm_func_ls); ++si) {
- int txfm_size = txfm_size_ls[si];
-
- for (int ti = 0; ti < txfm_type_num; ++ti) {
- TxfmFunc fwd_txfm_func = fwd_txfm_func_ls[si][ti];
- TxfmFunc inv_txfm_func = inv_txfm_func_ls[si][ti];
- int max_error = 2;
-
- if (!fwd_txfm_func) continue;
-
- const int count_test_block = 5000;
- for (int ci = 0; ci < count_test_block; ++ci) {
- int32_t input[64];
- int32_t output[64];
- int32_t round_trip_output[64];
-
- ASSERT_LE(txfm_size, NELEMENTS(input));
-
- for (int ni = 0; ni < txfm_size; ++ni) {
- input[ni] = rnd.Rand16() % input_base - rnd.Rand16() % input_base;
- }
-
- fwd_txfm_func(input, output, cos_bit, range_bit);
- inv_txfm_func(output, round_trip_output, cos_bit, range_bit);
-
- for (int ni = 0; ni < txfm_size; ++ni) {
- int node_err =
- abs(input[ni] - round_shift(round_trip_output[ni],
- get_max_bit(txfm_size) - 1));
- EXPECT_LE(node_err, max_error);
- }
- }
- }
- }
-}
-
-} // namespace
diff --git a/third_party/aom/test/av1_inv_txfm2d_test.cc b/third_party/aom/test/av1_inv_txfm2d_test.cc
deleted file mode 100644
index 11e231ba6..000000000
--- a/third_party/aom/test/av1_inv_txfm2d_test.cc
+++ /dev/null
@@ -1,378 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <vector>
-
-#include "config/av1_rtcd.h"
-
-#include "aom_ports/aom_timer.h"
-#include "av1/common/av1_inv_txfm1d_cfg.h"
-#include "av1/common/scan.h"
-#include "test/acm_random.h"
-#include "test/av1_txfm_test.h"
-#include "test/util.h"
-
-using libaom_test::ACMRandom;
-using libaom_test::InvTxfm2dFunc;
-using libaom_test::LbdInvTxfm2dFunc;
-using libaom_test::bd;
-using libaom_test::compute_avg_abs_error;
-using libaom_test::input_base;
-
-using ::testing::Combine;
-using ::testing::Range;
-using ::testing::Values;
-
-using std::vector;
-
-namespace {
-
-// AV1InvTxfm2dParam argument list:
-// tx_type_, tx_size_, max_error_, max_avg_error_
-typedef ::testing::tuple<TX_TYPE, TX_SIZE, int, double> AV1InvTxfm2dParam;
-
-class AV1InvTxfm2d : public ::testing::TestWithParam<AV1InvTxfm2dParam> {
- public:
- virtual void SetUp() {
- tx_type_ = GET_PARAM(0);
- tx_size_ = GET_PARAM(1);
- max_error_ = GET_PARAM(2);
- max_avg_error_ = GET_PARAM(3);
- }
-
- void RunRoundtripCheck() {
- int tx_w = tx_size_wide[tx_size_];
- int tx_h = tx_size_high[tx_size_];
- int txfm2d_size = tx_w * tx_h;
- const FwdTxfm2dFunc fwd_txfm_func = libaom_test::fwd_txfm_func_ls[tx_size_];
- const InvTxfm2dFunc inv_txfm_func = libaom_test::inv_txfm_func_ls[tx_size_];
- double avg_abs_error = 0;
- ACMRandom rnd(ACMRandom::DeterministicSeed());
-
- const int count = 500;
-
- for (int ci = 0; ci < count; ci++) {
- DECLARE_ALIGNED(16, int16_t, input[64 * 64]) = { 0 };
- ASSERT_LE(txfm2d_size, NELEMENTS(input));
-
- for (int ni = 0; ni < txfm2d_size; ++ni) {
- if (ci == 0) {
- int extreme_input = input_base - 1;
- input[ni] = extreme_input; // extreme case
- } else {
- input[ni] = rnd.Rand16() % input_base;
- }
- }
-
- DECLARE_ALIGNED(16, uint16_t, expected[64 * 64]) = { 0 };
- ASSERT_LE(txfm2d_size, NELEMENTS(expected));
- if (TxfmUsesApproximation()) {
- // Compare reference forward HT + inverse HT vs forward HT + inverse HT.
- double ref_input[64 * 64];
- ASSERT_LE(txfm2d_size, NELEMENTS(ref_input));
- for (int ni = 0; ni < txfm2d_size; ++ni) {
- ref_input[ni] = input[ni];
- }
- double ref_coeffs[64 * 64] = { 0 };
- ASSERT_LE(txfm2d_size, NELEMENTS(ref_coeffs));
- ASSERT_EQ(tx_type_, DCT_DCT);
- libaom_test::reference_hybrid_2d(ref_input, ref_coeffs, tx_type_,
- tx_size_);
- DECLARE_ALIGNED(16, int32_t, ref_coeffs_int[64 * 64]) = { 0 };
- ASSERT_LE(txfm2d_size, NELEMENTS(ref_coeffs_int));
- for (int ni = 0; ni < txfm2d_size; ++ni) {
- ref_coeffs_int[ni] = (int32_t)round(ref_coeffs[ni]);
- }
- inv_txfm_func(ref_coeffs_int, expected, tx_w, tx_type_, bd);
- } else {
- // Compare original input vs forward HT + inverse HT.
- for (int ni = 0; ni < txfm2d_size; ++ni) {
- expected[ni] = input[ni];
- }
- }
-
- DECLARE_ALIGNED(16, int32_t, coeffs[64 * 64]) = { 0 };
- ASSERT_LE(txfm2d_size, NELEMENTS(coeffs));
- fwd_txfm_func(input, coeffs, tx_w, tx_type_, bd);
-
- DECLARE_ALIGNED(16, uint16_t, actual[64 * 64]) = { 0 };
- ASSERT_LE(txfm2d_size, NELEMENTS(actual));
- inv_txfm_func(coeffs, actual, tx_w, tx_type_, bd);
-
- double actual_max_error = 0;
- for (int ni = 0; ni < txfm2d_size; ++ni) {
- const double this_error = abs(expected[ni] - actual[ni]);
- actual_max_error = AOMMAX(actual_max_error, this_error);
- }
- EXPECT_GE(max_error_, actual_max_error)
- << " tx_w: " << tx_w << " tx_h " << tx_h << " tx_type: " << tx_type_;
- if (actual_max_error > max_error_) { // exit early.
- break;
- }
- avg_abs_error += compute_avg_abs_error<uint16_t, uint16_t>(
- expected, actual, txfm2d_size);
- }
-
- avg_abs_error /= count;
- EXPECT_GE(max_avg_error_, avg_abs_error)
- << " tx_w: " << tx_w << " tx_h " << tx_h << " tx_type: " << tx_type_;
- }
-
- private:
- bool TxfmUsesApproximation() {
- if (tx_size_wide[tx_size_] == 64 || tx_size_high[tx_size_] == 64) {
- return true;
- }
- return false;
- }
-
- int max_error_;
- double max_avg_error_;
- TX_TYPE tx_type_;
- TX_SIZE tx_size_;
-};
-
-static int max_error_ls[TX_SIZES_ALL] = {
- 2, // 4x4 transform
- 2, // 8x8 transform
- 2, // 16x16 transform
- 4, // 32x32 transform
- 3, // 64x64 transform
- 2, // 4x8 transform
- 2, // 8x4 transform
- 2, // 8x16 transform
- 2, // 16x8 transform
- 3, // 16x32 transform
- 3, // 32x16 transform
- 5, // 32x64 transform
- 5, // 64x32 transform
- 2, // 4x16 transform
- 2, // 16x4 transform
- 2, // 8x32 transform
- 2, // 32x8 transform
- 3, // 16x64 transform
- 3, // 64x16 transform
-};
-
-static double avg_error_ls[TX_SIZES_ALL] = {
- 0.002, // 4x4 transform
- 0.05, // 8x8 transform
- 0.07, // 16x16 transform
- 0.4, // 32x32 transform
- 0.3, // 64x64 transform
- 0.02, // 4x8 transform
- 0.02, // 8x4 transform
- 0.04, // 8x16 transform
- 0.07, // 16x8 transform
- 0.4, // 16x32 transform
- 0.5, // 32x16 transform
- 0.38, // 32x64 transform
- 0.39, // 64x32 transform
- 0.2, // 4x16 transform
- 0.2, // 16x4 transform
- 0.2, // 8x32 transform
- 0.2, // 32x8 transform
- 0.38, // 16x64 transform
- 0.38, // 64x16 transform
-};
-
-vector<AV1InvTxfm2dParam> GetInvTxfm2dParamList() {
- vector<AV1InvTxfm2dParam> param_list;
- for (int s = 0; s < TX_SIZES; ++s) {
- const int max_error = max_error_ls[s];
- const double avg_error = avg_error_ls[s];
- for (int t = 0; t < TX_TYPES; ++t) {
- const TX_TYPE tx_type = static_cast<TX_TYPE>(t);
- const TX_SIZE tx_size = static_cast<TX_SIZE>(s);
- if (libaom_test::IsTxSizeTypeValid(tx_size, tx_type)) {
- param_list.push_back(
- AV1InvTxfm2dParam(tx_type, tx_size, max_error, avg_error));
- }
- }
- }
- return param_list;
-}
-
-INSTANTIATE_TEST_CASE_P(C, AV1InvTxfm2d,
- ::testing::ValuesIn(GetInvTxfm2dParamList()));
-
-TEST_P(AV1InvTxfm2d, RunRoundtripCheck) { RunRoundtripCheck(); }
-
-TEST(AV1InvTxfm2d, CfgTest) {
- for (int bd_idx = 0; bd_idx < BD_NUM; ++bd_idx) {
- int bd = libaom_test::bd_arr[bd_idx];
- int8_t low_range = libaom_test::low_range_arr[bd_idx];
- int8_t high_range = libaom_test::high_range_arr[bd_idx];
- for (int tx_size = 0; tx_size < TX_SIZES_ALL; ++tx_size) {
- for (int tx_type = 0; tx_type < TX_TYPES; ++tx_type) {
- if (libaom_test::IsTxSizeTypeValid(static_cast<TX_SIZE>(tx_size),
- static_cast<TX_TYPE>(tx_type)) ==
- false) {
- continue;
- }
- TXFM_2D_FLIP_CFG cfg;
- av1_get_inv_txfm_cfg(static_cast<TX_TYPE>(tx_type),
- static_cast<TX_SIZE>(tx_size), &cfg);
- int8_t stage_range_col[MAX_TXFM_STAGE_NUM];
- int8_t stage_range_row[MAX_TXFM_STAGE_NUM];
- av1_gen_inv_stage_range(stage_range_col, stage_range_row, &cfg,
- (TX_SIZE)tx_size, bd);
- libaom_test::txfm_stage_range_check(stage_range_col, cfg.stage_num_col,
- cfg.cos_bit_col, low_range,
- high_range);
- libaom_test::txfm_stage_range_check(stage_range_row, cfg.stage_num_row,
- cfg.cos_bit_row, low_range,
- high_range);
- }
- }
- }
-}
-
-typedef ::testing::tuple<const LbdInvTxfm2dFunc> AV1LbdInvTxfm2dParam;
-class AV1LbdInvTxfm2d : public ::testing::TestWithParam<AV1LbdInvTxfm2dParam> {
- public:
- virtual void SetUp() { target_func_ = GET_PARAM(0); }
- void RunAV1InvTxfm2dTest(TX_TYPE tx_type, TX_SIZE tx_size, int run_times);
-
- private:
- LbdInvTxfm2dFunc target_func_;
-};
-
-void AV1LbdInvTxfm2d::RunAV1InvTxfm2dTest(TX_TYPE tx_type, TX_SIZE tx_size,
- int run_times) {
- FwdTxfm2dFunc fwd_func_ = libaom_test::fwd_txfm_func_ls[tx_size];
- InvTxfm2dFunc ref_func_ = libaom_test::inv_txfm_func_ls[tx_size];
- if (fwd_func_ == NULL || ref_func_ == NULL || target_func_ == NULL) {
- return;
- }
- const int bd = 8;
- const int BLK_WIDTH = 64;
- const int BLK_SIZE = BLK_WIDTH * BLK_WIDTH;
- DECLARE_ALIGNED(16, int16_t, input[BLK_SIZE]) = { 0 };
- DECLARE_ALIGNED(32, int32_t, inv_input[BLK_SIZE]) = { 0 };
- DECLARE_ALIGNED(16, uint8_t, output[BLK_SIZE]) = { 0 };
- DECLARE_ALIGNED(16, uint16_t, ref_output[BLK_SIZE]) = { 0 };
- int stride = BLK_WIDTH;
- int rows = tx_size_high[tx_size];
- int cols = tx_size_wide[tx_size];
- const int rows_nonezero = AOMMIN(32, rows);
- const int cols_nonezero = AOMMIN(32, cols);
- run_times /= (rows * cols);
- run_times = AOMMAX(1, run_times);
- const SCAN_ORDER *scan_order = get_default_scan(tx_size, tx_type);
- const int16_t *scan = scan_order->scan;
- const int16_t eobmax = rows_nonezero * cols_nonezero;
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- int randTimes = run_times == 1 ? (eobmax + 500) : 1;
- for (int cnt = 0; cnt < randTimes; ++cnt) {
- const int16_t max_in = (1 << (bd)) - 1;
- for (int r = 0; r < BLK_WIDTH; ++r) {
- for (int c = 0; c < BLK_WIDTH; ++c) {
- input[r * cols + c] = (cnt == 0) ? max_in : rnd.Rand8Extremes();
- output[r * stride + c] = (cnt == 0) ? 128 : rnd.Rand8();
- ref_output[r * stride + c] = output[r * stride + c];
- }
- }
- fwd_func_(input, inv_input, stride, tx_type, bd);
-
- // produce eob input by setting high freq coeffs to zero
- const int eob = AOMMIN(cnt + 1, eobmax);
- for (int i = eob; i < eobmax; i++) {
- inv_input[scan[i]] = 0;
- }
-
- aom_usec_timer timer;
- aom_usec_timer_start(&timer);
- for (int i = 0; i < run_times; ++i) {
- ref_func_(inv_input, ref_output, stride, tx_type, bd);
- }
- aom_usec_timer_mark(&timer);
- const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
- aom_usec_timer_start(&timer);
- for (int i = 0; i < run_times; ++i) {
- target_func_(inv_input, output, stride, tx_type, tx_size, eob);
- }
- aom_usec_timer_mark(&timer);
- const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
- if (run_times > 10) {
- printf("txfm[%d] %3dx%-3d:%7.2f/%7.2fns", tx_type, cols, rows, time1,
- time2);
- printf("(%3.2f)\n", time1 / time2);
- }
- for (int r = 0; r < rows; ++r) {
- for (int c = 0; c < cols; ++c) {
- uint8_t ref_value = static_cast<uint8_t>(ref_output[r * stride + c]);
- ASSERT_EQ(ref_value, output[r * stride + c])
- << "[" << r << "," << c << "] " << cnt
- << " tx_size: " << static_cast<int>(tx_size)
- << " tx_type: " << tx_type << " eob " << eob;
- }
- }
- }
-}
-
-TEST_P(AV1LbdInvTxfm2d, match) {
- for (int j = 0; j < (int)(TX_SIZES_ALL); ++j) {
- for (int i = 0; i < (int)TX_TYPES; ++i) {
- if (libaom_test::IsTxSizeTypeValid(static_cast<TX_SIZE>(j),
- static_cast<TX_TYPE>(i))) {
- RunAV1InvTxfm2dTest(static_cast<TX_TYPE>(i), static_cast<TX_SIZE>(j),
- 1);
- }
- }
- }
-}
-
-TEST_P(AV1LbdInvTxfm2d, DISABLED_Speed) {
- for (int j = 0; j < (int)(TX_SIZES_ALL); ++j) {
- for (int i = 0; i < (int)TX_TYPES; ++i) {
- if (libaom_test::IsTxSizeTypeValid(static_cast<TX_SIZE>(j),
- static_cast<TX_TYPE>(i))) {
- RunAV1InvTxfm2dTest(static_cast<TX_TYPE>(i), static_cast<TX_SIZE>(j),
- 10000000);
- }
- }
- }
-}
-
-#if HAVE_SSSE3
-#if defined(_MSC_VER) || defined(__SSSE3__)
-#include "av1/common/x86/av1_inv_txfm_ssse3.h"
-INSTANTIATE_TEST_CASE_P(SSSE3, AV1LbdInvTxfm2d,
- ::testing::Values(av1_lowbd_inv_txfm2d_add_ssse3));
-#endif // _MSC_VER || __SSSE3__
-#endif // HAVE_SSSE3
-
-#if HAVE_AVX2
-extern "C" void av1_lowbd_inv_txfm2d_add_avx2(const int32_t *input,
- uint8_t *output, int stride,
- TX_TYPE tx_type, TX_SIZE tx_size,
- int eob);
-
-INSTANTIATE_TEST_CASE_P(AVX2, AV1LbdInvTxfm2d,
- ::testing::Values(av1_lowbd_inv_txfm2d_add_avx2));
-#endif // HAVE_AVX2
-
-#if HAVE_NEON
-
-extern "C" void av1_lowbd_inv_txfm2d_add_neon(const int32_t *input,
- uint8_t *output, int stride,
- TX_TYPE tx_type, TX_SIZE tx_size,
- int eob);
-
-INSTANTIATE_TEST_CASE_P(NEON, AV1LbdInvTxfm2d,
- ::testing::Values(av1_lowbd_inv_txfm2d_add_neon));
-#endif // HAVE_NEON
-
-} // namespace
diff --git a/third_party/aom/test/av1_quantize_test.cc b/third_party/aom/test/av1_quantize_test.cc
deleted file mode 100644
index aaf093918..000000000
--- a/third_party/aom/test/av1_quantize_test.cc
+++ /dev/null
@@ -1,239 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#include <stdlib.h>
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "config/aom_config.h"
-#include "config/av1_rtcd.h"
-
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "av1/common/scan.h"
-
-namespace {
-
-typedef void (*QuantizeFpFunc)(
- const tran_low_t *coeff_ptr, intptr_t count, const int16_t *zbin_ptr,
- const int16_t *round_ptr, const int16_t *quant_ptr,
- const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
- tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan, int log_scale);
-
-struct QuantizeFuncParams {
- QuantizeFuncParams(QuantizeFpFunc qF = NULL, QuantizeFpFunc qRefF = NULL,
- int count = 16)
- : qFunc(qF), qFuncRef(qRefF), coeffCount(count) {}
- QuantizeFpFunc qFunc;
- QuantizeFpFunc qFuncRef;
- int coeffCount;
-};
-
-using libaom_test::ACMRandom;
-
-const int numTests = 1000;
-const int maxSize = 1024;
-const int roundFactorRange = 127;
-const int dequantRange = 32768;
-const int coeffRange = (1 << 20) - 1;
-
-class AV1QuantizeTest : public ::testing::TestWithParam<QuantizeFuncParams> {
- public:
- void RunQuantizeTest() {
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- DECLARE_ALIGNED(16, tran_low_t, coeff_ptr[maxSize]);
- DECLARE_ALIGNED(16, int16_t, zbin_ptr[8]);
- DECLARE_ALIGNED(16, int16_t, round_ptr[8]);
- DECLARE_ALIGNED(16, int16_t, quant_ptr[8]);
- DECLARE_ALIGNED(16, int16_t, quant_shift_ptr[8]);
- DECLARE_ALIGNED(16, tran_low_t, qcoeff_ptr[maxSize]);
- DECLARE_ALIGNED(16, tran_low_t, dqcoeff_ptr[maxSize]);
- DECLARE_ALIGNED(16, tran_low_t, ref_qcoeff_ptr[maxSize]);
- DECLARE_ALIGNED(16, tran_low_t, ref_dqcoeff_ptr[maxSize]);
- DECLARE_ALIGNED(16, int16_t, dequant_ptr[8]);
- uint16_t eob;
- uint16_t ref_eob;
- int err_count_total = 0;
- int first_failure = -1;
- int count = params_.coeffCount;
- const TX_SIZE txSize = getTxSize(count);
- int log_scale = (txSize == TX_32X32);
- QuantizeFpFunc quanFunc = params_.qFunc;
- QuantizeFpFunc quanFuncRef = params_.qFuncRef;
-
- const SCAN_ORDER scanOrder = av1_default_scan_orders[txSize];
- for (int i = 0; i < numTests; i++) {
- int err_count = 0;
- ref_eob = eob = -1;
- for (int j = 0; j < count; j++) {
- coeff_ptr[j] = rnd(coeffRange);
- }
-
- for (int j = 0; j < 2; j++) {
- zbin_ptr[j] = rnd.Rand16();
- quant_shift_ptr[j] = rnd.Rand16();
- // int16_t positive
- dequant_ptr[j] = abs(rnd(dequantRange));
- quant_ptr[j] = (1 << 16) / dequant_ptr[j];
- round_ptr[j] = (abs(rnd(roundFactorRange)) * dequant_ptr[j]) >> 7;
- }
- for (int j = 2; j < 8; ++j) {
- zbin_ptr[j] = zbin_ptr[1];
- quant_shift_ptr[j] = quant_shift_ptr[1];
- dequant_ptr[j] = dequant_ptr[1];
- quant_ptr[j] = quant_ptr[1];
- round_ptr[j] = round_ptr[1];
- }
- quanFuncRef(coeff_ptr, count, zbin_ptr, round_ptr, quant_ptr,
- quant_shift_ptr, ref_qcoeff_ptr, ref_dqcoeff_ptr, dequant_ptr,
- &ref_eob, scanOrder.scan, scanOrder.iscan, log_scale);
-
- ASM_REGISTER_STATE_CHECK(
- quanFunc(coeff_ptr, count, zbin_ptr, round_ptr, quant_ptr,
- quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr, &eob,
- scanOrder.scan, scanOrder.iscan, log_scale));
-
- for (int j = 0; j < count; ++j) {
- err_count += (ref_qcoeff_ptr[j] != qcoeff_ptr[j]) |
- (ref_dqcoeff_ptr[j] != dqcoeff_ptr[j]);
- ASSERT_EQ(ref_qcoeff_ptr[j], qcoeff_ptr[j])
- << "qcoeff error: i = " << i << " j = " << j << "\n";
- EXPECT_EQ(ref_dqcoeff_ptr[j], dqcoeff_ptr[j])
- << "dqcoeff error: i = " << i << " j = " << j << "\n";
- }
- EXPECT_EQ(ref_eob, eob) << "eob error: "
- << "i = " << i << "\n";
- err_count += (ref_eob != eob);
- if (err_count && !err_count_total) {
- first_failure = i;
- }
- err_count_total += err_count;
- }
- EXPECT_EQ(0, err_count_total)
- << "Error: Quantization Test, C output doesn't match SSE2 output. "
- << "First failed at test case " << first_failure;
- }
-
- void RunEobTest() {
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- DECLARE_ALIGNED(16, tran_low_t, coeff_ptr[maxSize]);
- DECLARE_ALIGNED(16, int16_t, zbin_ptr[8]);
- DECLARE_ALIGNED(16, int16_t, round_ptr[8]);
- DECLARE_ALIGNED(16, int16_t, quant_ptr[8]);
- DECLARE_ALIGNED(16, int16_t, quant_shift_ptr[8]);
- DECLARE_ALIGNED(16, tran_low_t, qcoeff_ptr[maxSize]);
- DECLARE_ALIGNED(16, tran_low_t, dqcoeff_ptr[maxSize]);
- DECLARE_ALIGNED(16, tran_low_t, ref_qcoeff_ptr[maxSize]);
- DECLARE_ALIGNED(16, tran_low_t, ref_dqcoeff_ptr[maxSize]);
- DECLARE_ALIGNED(16, int16_t, dequant_ptr[8]);
- uint16_t eob;
- uint16_t ref_eob;
- int count = params_.coeffCount;
- const TX_SIZE txSize = getTxSize(count);
- int log_scale = (txSize == TX_32X32);
- QuantizeFpFunc quanFunc = params_.qFunc;
- QuantizeFpFunc quanFuncRef = params_.qFuncRef;
- const SCAN_ORDER scanOrder = av1_default_scan_orders[txSize];
-
- for (int i = 0; i < numTests; i++) {
- ref_eob = eob = -1;
- for (int j = 0; j < count; j++) {
- coeff_ptr[j] = 0;
- }
-
- coeff_ptr[rnd(count)] = rnd(coeffRange);
- coeff_ptr[rnd(count)] = rnd(coeffRange);
- coeff_ptr[rnd(count)] = rnd(coeffRange);
-
- for (int j = 0; j < 2; j++) {
- zbin_ptr[j] = rnd.Rand16();
- quant_shift_ptr[j] = rnd.Rand16();
- // int16_t positive
- dequant_ptr[j] = abs(rnd(dequantRange));
- quant_ptr[j] = (1 << 16) / dequant_ptr[j];
- round_ptr[j] = (abs(rnd(roundFactorRange)) * dequant_ptr[j]) >> 7;
- }
- for (int j = 2; j < 8; ++j) {
- zbin_ptr[j] = zbin_ptr[1];
- quant_shift_ptr[j] = quant_shift_ptr[1];
- dequant_ptr[j] = dequant_ptr[1];
- quant_ptr[j] = quant_ptr[1];
- round_ptr[j] = round_ptr[1];
- }
-
- quanFuncRef(coeff_ptr, count, zbin_ptr, round_ptr, quant_ptr,
- quant_shift_ptr, ref_qcoeff_ptr, ref_dqcoeff_ptr, dequant_ptr,
- &ref_eob, scanOrder.scan, scanOrder.iscan, log_scale);
-
- ASM_REGISTER_STATE_CHECK(
- quanFunc(coeff_ptr, count, zbin_ptr, round_ptr, quant_ptr,
- quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr, &eob,
- scanOrder.scan, scanOrder.iscan, log_scale));
- EXPECT_EQ(ref_eob, eob) << "eob error: "
- << "i = " << i << "\n";
- }
- }
-
- virtual void SetUp() { params_ = GetParam(); }
-
- virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- virtual ~AV1QuantizeTest() {}
-
- private:
- TX_SIZE getTxSize(int count) {
- switch (count) {
- case 16: return TX_4X4;
- case 64: return TX_8X8;
- case 256: return TX_16X16;
- case 1024: return TX_32X32;
- default: return TX_4X4;
- }
- }
-
- QuantizeFuncParams params_;
-};
-
-TEST_P(AV1QuantizeTest, BitExactCheck) { RunQuantizeTest(); }
-TEST_P(AV1QuantizeTest, EobVerify) { RunEobTest(); }
-
-#if HAVE_SSE4_1
-const QuantizeFuncParams qfps[4] = {
- QuantizeFuncParams(&av1_highbd_quantize_fp_sse4_1, &av1_highbd_quantize_fp_c,
- 16),
- QuantizeFuncParams(&av1_highbd_quantize_fp_sse4_1, &av1_highbd_quantize_fp_c,
- 64),
- QuantizeFuncParams(&av1_highbd_quantize_fp_sse4_1, &av1_highbd_quantize_fp_c,
- 256),
- QuantizeFuncParams(&av1_highbd_quantize_fp_sse4_1, &av1_highbd_quantize_fp_c,
- 1024),
-};
-
-INSTANTIATE_TEST_CASE_P(SSE4_1, AV1QuantizeTest, ::testing::ValuesIn(qfps));
-#endif // HAVE_SSE4_1
-
-#if HAVE_AVX2
-const QuantizeFuncParams qfps_avx2[4] = {
- QuantizeFuncParams(&av1_highbd_quantize_fp_avx2, &av1_highbd_quantize_fp_c,
- 16),
- QuantizeFuncParams(&av1_highbd_quantize_fp_avx2, &av1_highbd_quantize_fp_c,
- 64),
- QuantizeFuncParams(&av1_highbd_quantize_fp_avx2, &av1_highbd_quantize_fp_c,
- 256),
- QuantizeFuncParams(&av1_highbd_quantize_fp_avx2, &av1_highbd_quantize_fp_c,
- 1024),
-};
-
-INSTANTIATE_TEST_CASE_P(AVX2, AV1QuantizeTest, ::testing::ValuesIn(qfps_avx2));
-#endif // HAVE_AVX2
-
-} // namespace
diff --git a/third_party/aom/test/av1_round_shift_array_test.cc b/third_party/aom/test/av1_round_shift_array_test.cc
deleted file mode 100644
index 181a39460..000000000
--- a/third_party/aom/test/av1_round_shift_array_test.cc
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_mem/aom_mem.h"
-#include "aom_ports/aom_timer.h"
-#include "aom_ports/mem.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/util.h"
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-namespace AV1CompRoundShift {
-
-typedef void (*comp_round_shift_array_func)(int32_t *arr, int size, int bit);
-
-#if HAVE_SSE4_1 || HAVE_NEON
-const int kValidBitCheck[] = {
- -4, -3, -2, -1, 0, 1, 2, 3, 4,
-};
-#endif // HAVE_SSE4_1 || HAVE_NEON
-
-typedef ::testing::tuple<comp_round_shift_array_func, BLOCK_SIZE, int>
- CompRoundShiftParam;
-
-class AV1CompRoundShiftTest
- : public ::testing::TestWithParam<CompRoundShiftParam> {
- public:
- ~AV1CompRoundShiftTest();
-
- void SetUp() { rnd_.Reset(libaom_test::ACMRandom::DeterministicSeed()); }
- void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
- void RunCheckOutput(comp_round_shift_array_func test_impl, BLOCK_SIZE bsize,
- int bit);
- void RunSpeedTest(comp_round_shift_array_func test_impl, BLOCK_SIZE bsize,
- int bit);
-
- libaom_test::ACMRandom rnd_;
-};
-
-AV1CompRoundShiftTest::~AV1CompRoundShiftTest() { ; }
-
-void AV1CompRoundShiftTest::RunCheckOutput(
- comp_round_shift_array_func test_impl, BLOCK_SIZE bsize, int bit) {
- const int w = block_size_wide[bsize];
- const int h = block_size_high[bsize];
- const int blk_wd = 64;
- DECLARE_ALIGNED(32, int32_t, pred_[blk_wd]);
- DECLARE_ALIGNED(32, int32_t, ref_buffer_[blk_wd]);
- for (int i = 0; i < (blk_wd); ++i) {
- ref_buffer_[i] = pred_[i] = rnd_.Rand31() / 16;
- }
- av1_round_shift_array_c(ref_buffer_, w, bit);
- test_impl(pred_, w, bit);
- for (int x = 0; x < w; ++x) {
- ASSERT_EQ(ref_buffer_[x], pred_[x]) << w << "x" << h << "mismatch @"
- << "(" << x << ")";
- }
-}
-
-void AV1CompRoundShiftTest::RunSpeedTest(comp_round_shift_array_func test_impl,
- BLOCK_SIZE bsize, int bit) {
- const int w = block_size_wide[bsize];
- const int h = block_size_high[bsize];
- const int blk_wd = 64;
- DECLARE_ALIGNED(32, int32_t, ref_buffer_[blk_wd]);
- for (int i = 0; i < (blk_wd); ++i) {
- ref_buffer_[i] = rnd_.Rand31();
- }
-
- const int num_loops = 1000000000 / (w + h);
- comp_round_shift_array_func funcs[2] = { av1_round_shift_array_c, test_impl };
- double elapsed_time[2] = { 0 };
- for (int i = 0; i < 2; ++i) {
- aom_usec_timer timer;
- aom_usec_timer_start(&timer);
- comp_round_shift_array_func func = funcs[i];
- for (int j = 0; j < num_loops; ++j) {
- func(ref_buffer_, w, bit);
- }
- aom_usec_timer_mark(&timer);
- double time = static_cast<double>(aom_usec_timer_elapsed(&timer));
- elapsed_time[i] = 1000.0 * time / num_loops;
- }
- printf("av1_round_shift_array %3dx%-3d: bit : %d %7.2f/%7.2fns", w, h, bit,
- elapsed_time[0], elapsed_time[1]);
- printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]);
-}
-
-TEST_P(AV1CompRoundShiftTest, CheckOutput) {
- RunCheckOutput(GET_PARAM(0), GET_PARAM(1), GET_PARAM(2));
-}
-
-TEST_P(AV1CompRoundShiftTest, DISABLED_Speed) {
- RunSpeedTest(GET_PARAM(0), GET_PARAM(1), GET_PARAM(2));
-}
-
-#if HAVE_SSE4_1
-INSTANTIATE_TEST_CASE_P(
- SSE4_1, AV1CompRoundShiftTest,
- ::testing::Combine(::testing::Values(&av1_round_shift_array_sse4_1),
- ::testing::ValuesIn(txsize_to_bsize),
- ::testing::ValuesIn(kValidBitCheck)));
-#endif
-
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(
- NEON, AV1CompRoundShiftTest,
- ::testing::Combine(::testing::Values(&av1_round_shift_array_neon),
- ::testing::ValuesIn(txsize_to_bsize),
- ::testing::ValuesIn(kValidBitCheck)));
-#endif
-
-}; // namespace AV1CompRoundShift
diff --git a/third_party/aom/test/av1_txfm_test.cc b/third_party/aom/test/av1_txfm_test.cc
deleted file mode 100644
index d5b0ce325..000000000
--- a/third_party/aom/test/av1_txfm_test.cc
+++ /dev/null
@@ -1,371 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <stdio.h>
-#include "test/av1_txfm_test.h"
-
-namespace libaom_test {
-
-int get_txfm1d_size(TX_SIZE tx_size) { return tx_size_wide[tx_size]; }
-
-void get_txfm1d_type(TX_TYPE txfm2d_type, TYPE_TXFM *type0, TYPE_TXFM *type1) {
- switch (txfm2d_type) {
- case DCT_DCT:
- *type0 = TYPE_DCT;
- *type1 = TYPE_DCT;
- break;
- case ADST_DCT:
- *type0 = TYPE_ADST;
- *type1 = TYPE_DCT;
- break;
- case DCT_ADST:
- *type0 = TYPE_DCT;
- *type1 = TYPE_ADST;
- break;
- case ADST_ADST:
- *type0 = TYPE_ADST;
- *type1 = TYPE_ADST;
- break;
- case FLIPADST_DCT:
- *type0 = TYPE_ADST;
- *type1 = TYPE_DCT;
- break;
- case DCT_FLIPADST:
- *type0 = TYPE_DCT;
- *type1 = TYPE_ADST;
- break;
- case FLIPADST_FLIPADST:
- *type0 = TYPE_ADST;
- *type1 = TYPE_ADST;
- break;
- case ADST_FLIPADST:
- *type0 = TYPE_ADST;
- *type1 = TYPE_ADST;
- break;
- case FLIPADST_ADST:
- *type0 = TYPE_ADST;
- *type1 = TYPE_ADST;
- break;
- case IDTX:
- *type0 = TYPE_IDTX;
- *type1 = TYPE_IDTX;
- break;
- case H_DCT:
- *type0 = TYPE_IDTX;
- *type1 = TYPE_DCT;
- break;
- case V_DCT:
- *type0 = TYPE_DCT;
- *type1 = TYPE_IDTX;
- break;
- case H_ADST:
- *type0 = TYPE_IDTX;
- *type1 = TYPE_ADST;
- break;
- case V_ADST:
- *type0 = TYPE_ADST;
- *type1 = TYPE_IDTX;
- break;
- case H_FLIPADST:
- *type0 = TYPE_IDTX;
- *type1 = TYPE_ADST;
- break;
- case V_FLIPADST:
- *type0 = TYPE_ADST;
- *type1 = TYPE_IDTX;
- break;
- default:
- *type0 = TYPE_DCT;
- *type1 = TYPE_DCT;
- assert(0);
- break;
- }
-}
-
-double Sqrt2 = pow(2, 0.5);
-double invSqrt2 = 1 / pow(2, 0.5);
-
-double dct_matrix(double n, double k, int size) {
- return cos(M_PI * (2 * n + 1) * k / (2 * size));
-}
-
-void reference_dct_1d(const double *in, double *out, int size) {
- for (int k = 0; k < size; ++k) {
- out[k] = 0;
- for (int n = 0; n < size; ++n) {
- out[k] += in[n] * dct_matrix(n, k, size);
- }
- if (k == 0) out[k] = out[k] * invSqrt2;
- }
-}
-
-void reference_idct_1d(const double *in, double *out, int size) {
- for (int k = 0; k < size; ++k) {
- out[k] = 0;
- for (int n = 0; n < size; ++n) {
- if (n == 0)
- out[k] += invSqrt2 * in[n] * dct_matrix(k, n, size);
- else
- out[k] += in[n] * dct_matrix(k, n, size);
- }
- }
-}
-
-// TODO(any): Copied from the old 'fadst4' (same as the new 'av1_fadst4_new'
-// function). Should be replaced by a proper reference function that takes
-// 'double' input & output.
-static void fadst4_new(const tran_low_t *input, tran_low_t *output) {
- tran_high_t x0, x1, x2, x3;
- tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
-
- x0 = input[0];
- x1 = input[1];
- x2 = input[2];
- x3 = input[3];
-
- if (!(x0 | x1 | x2 | x3)) {
- output[0] = output[1] = output[2] = output[3] = 0;
- return;
- }
-
- s0 = sinpi_1_9 * x0;
- s1 = sinpi_4_9 * x0;
- s2 = sinpi_2_9 * x1;
- s3 = sinpi_1_9 * x1;
- s4 = sinpi_3_9 * x2;
- s5 = sinpi_4_9 * x3;
- s6 = sinpi_2_9 * x3;
- s7 = x0 + x1 - x3;
-
- x0 = s0 + s2 + s5;
- x1 = sinpi_3_9 * s7;
- x2 = s1 - s3 + s6;
- x3 = s4;
-
- s0 = x0 + x3;
- s1 = x1;
- s2 = x2 - x3;
- s3 = x2 - x0 + x3;
-
- // 1-D transform scaling factor is sqrt(2).
- output[0] = (tran_low_t)fdct_round_shift(s0);
- output[1] = (tran_low_t)fdct_round_shift(s1);
- output[2] = (tran_low_t)fdct_round_shift(s2);
- output[3] = (tran_low_t)fdct_round_shift(s3);
-}
-
-void reference_adst_1d(const double *in, double *out, int size) {
- if (size == 4) { // Special case.
- tran_low_t int_input[4];
- for (int i = 0; i < 4; ++i) {
- int_input[i] = static_cast<tran_low_t>(round(in[i]));
- }
- tran_low_t int_output[4];
- fadst4_new(int_input, int_output);
- for (int i = 0; i < 4; ++i) {
- out[i] = int_output[i];
- }
- return;
- }
-
- for (int k = 0; k < size; ++k) {
- out[k] = 0;
- for (int n = 0; n < size; ++n) {
- out[k] += in[n] * sin(M_PI * (2 * n + 1) * (2 * k + 1) / (4 * size));
- }
- }
-}
-
-void reference_idtx_1d(const double *in, double *out, int size) {
- double scale = 0;
- if (size == 4)
- scale = Sqrt2;
- else if (size == 8)
- scale = 2;
- else if (size == 16)
- scale = 2 * Sqrt2;
- else if (size == 32)
- scale = 4;
- else if (size == 64)
- scale = 4 * Sqrt2;
- for (int k = 0; k < size; ++k) {
- out[k] = in[k] * scale;
- }
-}
-
-void reference_hybrid_1d(double *in, double *out, int size, int type) {
- if (type == TYPE_DCT)
- reference_dct_1d(in, out, size);
- else if (type == TYPE_ADST)
- reference_adst_1d(in, out, size);
- else
- reference_idtx_1d(in, out, size);
-}
-
-double get_amplification_factor(TX_TYPE tx_type, TX_SIZE tx_size) {
- TXFM_2D_FLIP_CFG fwd_txfm_flip_cfg;
- av1_get_fwd_txfm_cfg(tx_type, tx_size, &fwd_txfm_flip_cfg);
- const int tx_width = tx_size_wide[fwd_txfm_flip_cfg.tx_size];
- const int tx_height = tx_size_high[fwd_txfm_flip_cfg.tx_size];
- const int8_t *shift = fwd_txfm_flip_cfg.shift;
- const int amplify_bit = shift[0] + shift[1] + shift[2];
- double amplify_factor =
- amplify_bit >= 0 ? (1 << amplify_bit) : (1.0 / (1 << -amplify_bit));
-
- // For rectangular transforms, we need to multiply by an extra factor.
- const int rect_type = get_rect_tx_log_ratio(tx_width, tx_height);
- if (abs(rect_type) == 1) {
- amplify_factor *= pow(2, 0.5);
- }
- return amplify_factor;
-}
-
-void reference_hybrid_2d(double *in, double *out, TX_TYPE tx_type,
- TX_SIZE tx_size) {
- // Get transform type and size of each dimension.
- TYPE_TXFM type0;
- TYPE_TXFM type1;
- get_txfm1d_type(tx_type, &type0, &type1);
- const int tx_width = tx_size_wide[tx_size];
- const int tx_height = tx_size_high[tx_size];
-
- double *const temp_in = new double[AOMMAX(tx_width, tx_height)];
- double *const temp_out = new double[AOMMAX(tx_width, tx_height)];
- double *const out_interm = new double[tx_width * tx_height];
- const int stride = tx_width;
-
- // Transform columns.
- for (int c = 0; c < tx_width; ++c) {
- for (int r = 0; r < tx_height; ++r) {
- temp_in[r] = in[r * stride + c];
- }
- reference_hybrid_1d(temp_in, temp_out, tx_height, type0);
- for (int r = 0; r < tx_height; ++r) {
- out_interm[r * stride + c] = temp_out[r];
- }
- }
-
- // Transform rows.
- for (int r = 0; r < tx_height; ++r) {
- reference_hybrid_1d(out_interm + r * stride, out + r * stride, tx_width,
- type1);
- }
-
- delete[] temp_in;
- delete[] temp_out;
- delete[] out_interm;
-
- // These transforms use an approximate 2D DCT transform, by only keeping the
- // top-left quarter of the coefficients, and repacking them in the first
- // quarter indices.
- // TODO(urvang): Refactor this code.
- if (tx_width == 64 && tx_height == 64) { // tx_size == TX_64X64
- // Zero out top-right 32x32 area.
- for (int row = 0; row < 32; ++row) {
- memset(out + row * 64 + 32, 0, 32 * sizeof(*out));
- }
- // Zero out the bottom 64x32 area.
- memset(out + 32 * 64, 0, 32 * 64 * sizeof(*out));
- // Re-pack non-zero coeffs in the first 32x32 indices.
- for (int row = 1; row < 32; ++row) {
- memcpy(out + row * 32, out + row * 64, 32 * sizeof(*out));
- }
- } else if (tx_width == 32 && tx_height == 64) { // tx_size == TX_32X64
- // Zero out the bottom 32x32 area.
- memset(out + 32 * 32, 0, 32 * 32 * sizeof(*out));
- // Note: no repacking needed here.
- } else if (tx_width == 64 && tx_height == 32) { // tx_size == TX_64X32
- // Zero out right 32x32 area.
- for (int row = 0; row < 32; ++row) {
- memset(out + row * 64 + 32, 0, 32 * sizeof(*out));
- }
- // Re-pack non-zero coeffs in the first 32x32 indices.
- for (int row = 1; row < 32; ++row) {
- memcpy(out + row * 32, out + row * 64, 32 * sizeof(*out));
- }
- } else if (tx_width == 16 && tx_height == 64) { // tx_size == TX_16X64
- // Zero out the bottom 16x32 area.
- memset(out + 16 * 32, 0, 16 * 32 * sizeof(*out));
- // Note: no repacking needed here.
- } else if (tx_width == 64 && tx_height == 16) { // tx_size == TX_64X16
- // Zero out right 32x16 area.
- for (int row = 0; row < 16; ++row) {
- memset(out + row * 64 + 32, 0, 32 * sizeof(*out));
- }
- // Re-pack non-zero coeffs in the first 32x16 indices.
- for (int row = 1; row < 16; ++row) {
- memcpy(out + row * 32, out + row * 64, 32 * sizeof(*out));
- }
- }
-
- // Apply appropriate scale.
- const double amplify_factor = get_amplification_factor(tx_type, tx_size);
- for (int c = 0; c < tx_width; ++c) {
- for (int r = 0; r < tx_height; ++r) {
- out[r * stride + c] *= amplify_factor;
- }
- }
-}
-
-template <typename Type>
-void fliplr(Type *dest, int width, int height, int stride) {
- for (int r = 0; r < height; ++r) {
- for (int c = 0; c < width / 2; ++c) {
- const Type tmp = dest[r * stride + c];
- dest[r * stride + c] = dest[r * stride + width - 1 - c];
- dest[r * stride + width - 1 - c] = tmp;
- }
- }
-}
-
-template <typename Type>
-void flipud(Type *dest, int width, int height, int stride) {
- for (int c = 0; c < width; ++c) {
- for (int r = 0; r < height / 2; ++r) {
- const Type tmp = dest[r * stride + c];
- dest[r * stride + c] = dest[(height - 1 - r) * stride + c];
- dest[(height - 1 - r) * stride + c] = tmp;
- }
- }
-}
-
-template <typename Type>
-void fliplrud(Type *dest, int width, int height, int stride) {
- for (int r = 0; r < height / 2; ++r) {
- for (int c = 0; c < width; ++c) {
- const Type tmp = dest[r * stride + c];
- dest[r * stride + c] = dest[(height - 1 - r) * stride + width - 1 - c];
- dest[(height - 1 - r) * stride + width - 1 - c] = tmp;
- }
- }
-}
-
-template void fliplr<double>(double *dest, int width, int height, int stride);
-template void flipud<double>(double *dest, int width, int height, int stride);
-template void fliplrud<double>(double *dest, int width, int height, int stride);
-
-int bd_arr[BD_NUM] = { 8, 10, 12 };
-
-int8_t low_range_arr[BD_NUM] = { 18, 32, 32 };
-int8_t high_range_arr[BD_NUM] = { 32, 32, 32 };
-
-void txfm_stage_range_check(const int8_t *stage_range, int stage_num,
- int8_t cos_bit, int low_range, int high_range) {
- for (int i = 0; i < stage_num; ++i) {
- EXPECT_LE(stage_range[i], low_range);
- ASSERT_LE(stage_range[i] + cos_bit, high_range) << "stage = " << i;
- }
- for (int i = 0; i < stage_num - 1; ++i) {
- // make sure there is no overflow while doing half_btf()
- ASSERT_LE(stage_range[i + 1] + cos_bit, high_range) << "stage = " << i;
- }
-}
-} // namespace libaom_test
diff --git a/third_party/aom/test/av1_txfm_test.h b/third_party/aom/test/av1_txfm_test.h
deleted file mode 100644
index a18164741..000000000
--- a/third_party/aom/test/av1_txfm_test.h
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_TEST_AV1_TXFM_TEST_H_
-#define AOM_TEST_AV1_TXFM_TEST_H_
-
-#include <stdio.h>
-#include <stdlib.h>
-#ifdef _MSC_VER
-#define _USE_MATH_DEFINES
-#endif
-#include <math.h>
-
-#include "config/av1_rtcd.h"
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "test/acm_random.h"
-#include "av1/common/av1_txfm.h"
-#include "av1/common/blockd.h"
-#include "av1/common/enums.h"
-
-namespace libaom_test {
-typedef enum {
- TYPE_DCT = 0,
- TYPE_ADST,
- TYPE_IDTX,
- TYPE_IDCT,
- TYPE_IADST,
- TYPE_LAST
-} TYPE_TXFM;
-
-int get_txfm1d_size(TX_SIZE tx_size);
-
-void get_txfm1d_type(TX_TYPE txfm2d_type, TYPE_TXFM *type0, TYPE_TXFM *type1);
-
-void reference_dct_1d(const double *in, double *out, int size);
-void reference_idct_1d(const double *in, double *out, int size);
-
-void reference_adst_1d(const double *in, double *out, int size);
-
-void reference_hybrid_1d(double *in, double *out, int size, int type);
-
-double get_amplification_factor(TX_TYPE tx_type, TX_SIZE tx_size);
-
-void reference_hybrid_2d(double *in, double *out, TX_TYPE tx_type,
- TX_SIZE tx_size);
-template <typename Type1, typename Type2>
-static double compute_avg_abs_error(const Type1 *a, const Type2 *b,
- const int size) {
- double error = 0;
- for (int i = 0; i < size; i++) {
- error += fabs(static_cast<double>(a[i]) - static_cast<double>(b[i]));
- }
- error = error / size;
- return error;
-}
-
-template <typename Type>
-void fliplr(Type *dest, int width, int height, int stride);
-
-template <typename Type>
-void flipud(Type *dest, int width, int height, int stride);
-
-template <typename Type>
-void fliplrud(Type *dest, int width, int height, int stride);
-
-typedef void (*TxfmFunc)(const int32_t *in, int32_t *out, const int8_t cos_bit,
- const int8_t *range_bit);
-
-typedef void (*InvTxfm2dFunc)(const int32_t *, uint16_t *, int, TX_TYPE, int);
-typedef void (*LbdInvTxfm2dFunc)(const int32_t *, uint8_t *, int, TX_TYPE,
- TX_SIZE, int);
-
-static const int bd = 10;
-static const int input_base = (1 << bd);
-
-static INLINE bool IsTxSizeTypeValid(TX_SIZE tx_size, TX_TYPE tx_type) {
- const TX_SIZE tx_size_sqr_up = txsize_sqr_up_map[tx_size];
- TxSetType tx_set_type;
- if (tx_size_sqr_up > TX_32X32) {
- tx_set_type = EXT_TX_SET_DCTONLY;
- } else if (tx_size_sqr_up == TX_32X32) {
- tx_set_type = EXT_TX_SET_DCT_IDTX;
- } else {
- tx_set_type = EXT_TX_SET_ALL16;
- }
- return av1_ext_tx_used[tx_set_type][tx_type] != 0;
-}
-
-#if CONFIG_AV1_ENCODER
-
-static const FwdTxfm2dFunc fwd_txfm_func_ls[TX_SIZES_ALL] = {
- av1_fwd_txfm2d_4x4_c, av1_fwd_txfm2d_8x8_c, av1_fwd_txfm2d_16x16_c,
- av1_fwd_txfm2d_32x32_c, av1_fwd_txfm2d_64x64_c, av1_fwd_txfm2d_4x8_c,
- av1_fwd_txfm2d_8x4_c, av1_fwd_txfm2d_8x16_c, av1_fwd_txfm2d_16x8_c,
- av1_fwd_txfm2d_16x32_c, av1_fwd_txfm2d_32x16_c, av1_fwd_txfm2d_32x64_c,
- av1_fwd_txfm2d_64x32_c, av1_fwd_txfm2d_4x16_c, av1_fwd_txfm2d_16x4_c,
- av1_fwd_txfm2d_8x32_c, av1_fwd_txfm2d_32x8_c, av1_fwd_txfm2d_16x64_c,
- av1_fwd_txfm2d_64x16_c,
-};
-#endif
-
-static const InvTxfm2dFunc inv_txfm_func_ls[TX_SIZES_ALL] = {
- av1_inv_txfm2d_add_4x4_c, av1_inv_txfm2d_add_8x8_c,
- av1_inv_txfm2d_add_16x16_c, av1_inv_txfm2d_add_32x32_c,
- av1_inv_txfm2d_add_64x64_c, av1_inv_txfm2d_add_4x8_c,
- av1_inv_txfm2d_add_8x4_c, av1_inv_txfm2d_add_8x16_c,
- av1_inv_txfm2d_add_16x8_c, av1_inv_txfm2d_add_16x32_c,
- av1_inv_txfm2d_add_32x16_c, av1_inv_txfm2d_add_32x64_c,
- av1_inv_txfm2d_add_64x32_c, av1_inv_txfm2d_add_4x16_c,
- av1_inv_txfm2d_add_16x4_c, av1_inv_txfm2d_add_8x32_c,
- av1_inv_txfm2d_add_32x8_c, av1_inv_txfm2d_add_16x64_c,
- av1_inv_txfm2d_add_64x16_c,
-};
-
-#define BD_NUM 3
-
-extern int bd_arr[];
-extern int8_t low_range_arr[];
-extern int8_t high_range_arr[];
-
-void txfm_stage_range_check(const int8_t *stage_range, int stage_num,
- const int8_t cos_bit, int low_range,
- int high_range);
-} // namespace libaom_test
-#endif // AOM_TEST_AV1_TXFM_TEST_H_
diff --git a/third_party/aom/test/av1_wedge_utils_test.cc b/third_party/aom/test/av1_wedge_utils_test.cc
deleted file mode 100644
index e8fbe69a4..000000000
--- a/third_party/aom/test/av1_wedge_utils_test.cc
+++ /dev/null
@@ -1,390 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-#include "config/av1_rtcd.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-
-#include "av1/common/enums.h"
-
-#include "test/acm_random.h"
-#include "test/function_equivalence_test.h"
-#include "test/register_state_check.h"
-
-#define WEDGE_WEIGHT_BITS 6
-#define MAX_MASK_VALUE (1 << (WEDGE_WEIGHT_BITS))
-
-using libaom_test::ACMRandom;
-using libaom_test::FunctionEquivalenceTest;
-
-namespace {
-
-static const int16_t kInt13Max = (1 << 12) - 1;
-
-//////////////////////////////////////////////////////////////////////////////
-// av1_wedge_sse_from_residuals - functionality
-//////////////////////////////////////////////////////////////////////////////
-
-class WedgeUtilsSSEFuncTest : public testing::Test {
- protected:
- WedgeUtilsSSEFuncTest() : rng_(ACMRandom::DeterministicSeed()) {}
-
- static const int kIterations = 1000;
-
- ACMRandom rng_;
-};
-
-static void equiv_blend_residuals(int16_t *r, const int16_t *r0,
- const int16_t *r1, const uint8_t *m, int N) {
- for (int i = 0; i < N; i++) {
- const int32_t m0 = m[i];
- const int32_t m1 = MAX_MASK_VALUE - m0;
- const int16_t R = m0 * r0[i] + m1 * r1[i];
- // Note that this rounding is designed to match the result
- // you would get when actually blending the 2 predictors and computing
- // the residuals.
- r[i] = ROUND_POWER_OF_TWO(R - 1, WEDGE_WEIGHT_BITS);
- }
-}
-
-static uint64_t equiv_sse_from_residuals(const int16_t *r0, const int16_t *r1,
- const uint8_t *m, int N) {
- uint64_t acc = 0;
- for (int i = 0; i < N; i++) {
- const int32_t m0 = m[i];
- const int32_t m1 = MAX_MASK_VALUE - m0;
- const int16_t R = m0 * r0[i] + m1 * r1[i];
- const int32_t r = ROUND_POWER_OF_TWO(R - 1, WEDGE_WEIGHT_BITS);
- acc += r * r;
- }
- return acc;
-}
-
-TEST_F(WedgeUtilsSSEFuncTest, ResidualBlendingEquiv) {
- DECLARE_ALIGNED(32, uint8_t, s[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, uint8_t, p0[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, uint8_t, p1[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, uint8_t, p[MAX_SB_SQUARE]);
-
- DECLARE_ALIGNED(32, int16_t, r0[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, int16_t, r_ref[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, int16_t, r_tst[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, uint8_t, m[MAX_SB_SQUARE]);
-
- for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
- for (int i = 0; i < MAX_SB_SQUARE; ++i) {
- s[i] = rng_.Rand8();
- m[i] = rng_(MAX_MASK_VALUE + 1);
- }
-
- const int w = 1 << (rng_(MAX_SB_SIZE_LOG2 + 1 - 3) + 3);
- const int h = 1 << (rng_(MAX_SB_SIZE_LOG2 + 1 - 3) + 3);
- const int N = w * h;
-
- for (int j = 0; j < N; j++) {
- p0[j] = clamp(s[j] + rng_(33) - 16, 0, UINT8_MAX);
- p1[j] = clamp(s[j] + rng_(33) - 16, 0, UINT8_MAX);
- }
-
- aom_blend_a64_mask(p, w, p0, w, p1, w, m, w, w, h, 0, 0);
-
- aom_subtract_block(h, w, r0, w, s, w, p0, w);
- aom_subtract_block(h, w, r1, w, s, w, p1, w);
-
- aom_subtract_block(h, w, r_ref, w, s, w, p, w);
- equiv_blend_residuals(r_tst, r0, r1, m, N);
-
- for (int i = 0; i < N; ++i) ASSERT_EQ(r_ref[i], r_tst[i]);
-
- uint64_t ref_sse = aom_sum_squares_i16(r_ref, N);
- uint64_t tst_sse = equiv_sse_from_residuals(r0, r1, m, N);
-
- ASSERT_EQ(ref_sse, tst_sse);
- }
-}
-
-static uint64_t sse_from_residuals(const int16_t *r0, const int16_t *r1,
- const uint8_t *m, int N) {
- uint64_t acc = 0;
- for (int i = 0; i < N; i++) {
- const int32_t m0 = m[i];
- const int32_t m1 = MAX_MASK_VALUE - m0;
- const int32_t r = m0 * r0[i] + m1 * r1[i];
- acc += r * r;
- }
- return ROUND_POWER_OF_TWO(acc, 2 * WEDGE_WEIGHT_BITS);
-}
-
-TEST_F(WedgeUtilsSSEFuncTest, ResidualBlendingMethod) {
- DECLARE_ALIGNED(32, int16_t, r0[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, int16_t, d[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, uint8_t, m[MAX_SB_SQUARE]);
-
- for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
- for (int i = 0; i < MAX_SB_SQUARE; ++i) {
- r1[i] = rng_(2 * INT8_MAX - 2 * INT8_MIN + 1) + 2 * INT8_MIN;
- d[i] = rng_(2 * INT8_MAX - 2 * INT8_MIN + 1) + 2 * INT8_MIN;
- m[i] = rng_(MAX_MASK_VALUE + 1);
- }
-
- const int N = 64 * (rng_(MAX_SB_SQUARE / 64) + 1);
-
- for (int i = 0; i < N; i++) r0[i] = r1[i] + d[i];
-
- const uint64_t ref_res = sse_from_residuals(r0, r1, m, N);
- const uint64_t tst_res = av1_wedge_sse_from_residuals(r1, d, m, N);
-
- ASSERT_EQ(ref_res, tst_res);
- }
-}
-
-//////////////////////////////////////////////////////////////////////////////
-// av1_wedge_sse_from_residuals - optimizations
-//////////////////////////////////////////////////////////////////////////////
-
-typedef uint64_t (*FSSE)(const int16_t *r1, const int16_t *d, const uint8_t *m,
- int N);
-typedef libaom_test::FuncParam<FSSE> TestFuncsFSSE;
-
-class WedgeUtilsSSEOptTest : public FunctionEquivalenceTest<FSSE> {
- protected:
- static const int kIterations = 10000;
-};
-
-TEST_P(WedgeUtilsSSEOptTest, RandomValues) {
- DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, int16_t, d[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, uint8_t, m[MAX_SB_SQUARE]);
-
- for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
- for (int i = 0; i < MAX_SB_SQUARE; ++i) {
- r1[i] = rng_(2 * kInt13Max + 1) - kInt13Max;
- d[i] = rng_(2 * kInt13Max + 1) - kInt13Max;
- m[i] = rng_(MAX_MASK_VALUE + 1);
- }
-
- const int N = 64 * (rng_(MAX_SB_SQUARE / 64) + 1);
-
- const uint64_t ref_res = params_.ref_func(r1, d, m, N);
- uint64_t tst_res;
- ASM_REGISTER_STATE_CHECK(tst_res = params_.tst_func(r1, d, m, N));
-
- ASSERT_EQ(ref_res, tst_res);
- }
-}
-
-TEST_P(WedgeUtilsSSEOptTest, ExtremeValues) {
- DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, int16_t, d[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, uint8_t, m[MAX_SB_SQUARE]);
-
- for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
- if (rng_(2)) {
- for (int i = 0; i < MAX_SB_SQUARE; ++i) r1[i] = kInt13Max;
- } else {
- for (int i = 0; i < MAX_SB_SQUARE; ++i) r1[i] = -kInt13Max;
- }
-
- if (rng_(2)) {
- for (int i = 0; i < MAX_SB_SQUARE; ++i) d[i] = kInt13Max;
- } else {
- for (int i = 0; i < MAX_SB_SQUARE; ++i) d[i] = -kInt13Max;
- }
-
- for (int i = 0; i < MAX_SB_SQUARE; ++i) m[i] = MAX_MASK_VALUE;
-
- const int N = 64 * (rng_(MAX_SB_SQUARE / 64) + 1);
-
- const uint64_t ref_res = params_.ref_func(r1, d, m, N);
- uint64_t tst_res;
- ASM_REGISTER_STATE_CHECK(tst_res = params_.tst_func(r1, d, m, N));
-
- ASSERT_EQ(ref_res, tst_res);
- }
-}
-
-//////////////////////////////////////////////////////////////////////////////
-// av1_wedge_sign_from_residuals
-//////////////////////////////////////////////////////////////////////////////
-
-typedef int (*FSign)(const int16_t *ds, const uint8_t *m, int N, int64_t limit);
-typedef libaom_test::FuncParam<FSign> TestFuncsFSign;
-
-class WedgeUtilsSignOptTest : public FunctionEquivalenceTest<FSign> {
- protected:
- static const int kIterations = 10000;
- static const int kMaxSize = 8196; // Size limited by SIMD implementation.
-};
-
-TEST_P(WedgeUtilsSignOptTest, RandomValues) {
- DECLARE_ALIGNED(32, int16_t, r0[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, int16_t, ds[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, uint8_t, m[MAX_SB_SQUARE]);
-
- for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
- for (int i = 0; i < MAX_SB_SQUARE; ++i) {
- r0[i] = rng_(2 * kInt13Max + 1) - kInt13Max;
- r1[i] = rng_(2 * kInt13Max + 1) - kInt13Max;
- m[i] = rng_(MAX_MASK_VALUE + 1);
- }
-
- const int maxN = AOMMIN(kMaxSize, MAX_SB_SQUARE);
- const int N = 64 * (rng_(maxN / 64 - 1) + 1);
-
- int64_t limit;
- limit = (int64_t)aom_sum_squares_i16(r0, N);
- limit -= (int64_t)aom_sum_squares_i16(r1, N);
- limit *= (1 << WEDGE_WEIGHT_BITS) / 2;
-
- for (int i = 0; i < N; i++)
- ds[i] = clamp(r0[i] * r0[i] - r1[i] * r1[i], INT16_MIN, INT16_MAX);
-
- const int ref_res = params_.ref_func(ds, m, N, limit);
- int tst_res;
- ASM_REGISTER_STATE_CHECK(tst_res = params_.tst_func(ds, m, N, limit));
-
- ASSERT_EQ(ref_res, tst_res);
- }
-}
-
-TEST_P(WedgeUtilsSignOptTest, ExtremeValues) {
- DECLARE_ALIGNED(32, int16_t, r0[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, int16_t, ds[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, uint8_t, m[MAX_SB_SQUARE]);
-
- for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
- switch (rng_(4)) {
- case 0:
- for (int i = 0; i < MAX_SB_SQUARE; ++i) {
- r0[i] = 0;
- r1[i] = kInt13Max;
- }
- break;
- case 1:
- for (int i = 0; i < MAX_SB_SQUARE; ++i) {
- r0[i] = kInt13Max;
- r1[i] = 0;
- }
- break;
- case 2:
- for (int i = 0; i < MAX_SB_SQUARE; ++i) {
- r0[i] = 0;
- r1[i] = -kInt13Max;
- }
- break;
- default:
- for (int i = 0; i < MAX_SB_SQUARE; ++i) {
- r0[i] = -kInt13Max;
- r1[i] = 0;
- }
- break;
- }
-
- for (int i = 0; i < MAX_SB_SQUARE; ++i) m[i] = MAX_MASK_VALUE;
-
- const int maxN = AOMMIN(kMaxSize, MAX_SB_SQUARE);
- const int N = 64 * (rng_(maxN / 64 - 1) + 1);
-
- int64_t limit;
- limit = (int64_t)aom_sum_squares_i16(r0, N);
- limit -= (int64_t)aom_sum_squares_i16(r1, N);
- limit *= (1 << WEDGE_WEIGHT_BITS) / 2;
-
- for (int i = 0; i < N; i++)
- ds[i] = clamp(r0[i] * r0[i] - r1[i] * r1[i], INT16_MIN, INT16_MAX);
-
- const int ref_res = params_.ref_func(ds, m, N, limit);
- int tst_res;
- ASM_REGISTER_STATE_CHECK(tst_res = params_.tst_func(ds, m, N, limit));
-
- ASSERT_EQ(ref_res, tst_res);
- }
-}
-
-//////////////////////////////////////////////////////////////////////////////
-// av1_wedge_compute_delta_squares
-//////////////////////////////////////////////////////////////////////////////
-
-typedef void (*FDS)(int16_t *d, const int16_t *a, const int16_t *b, int N);
-typedef libaom_test::FuncParam<FDS> TestFuncsFDS;
-
-class WedgeUtilsDeltaSquaresOptTest : public FunctionEquivalenceTest<FDS> {
- protected:
- static const int kIterations = 10000;
-};
-
-TEST_P(WedgeUtilsDeltaSquaresOptTest, RandomValues) {
- DECLARE_ALIGNED(32, int16_t, a[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, int16_t, b[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, int16_t, d_ref[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, int16_t, d_tst[MAX_SB_SQUARE]);
-
- for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
- for (int i = 0; i < MAX_SB_SQUARE; ++i) {
- a[i] = rng_.Rand16();
- b[i] = rng_(2 * INT16_MAX + 1) - INT16_MAX;
- }
-
- const int N = 64 * (rng_(MAX_SB_SQUARE / 64) + 1);
-
- memset(&d_ref, INT16_MAX, sizeof(d_ref));
- memset(&d_tst, INT16_MAX, sizeof(d_tst));
-
- params_.ref_func(d_ref, a, b, N);
- ASM_REGISTER_STATE_CHECK(params_.tst_func(d_tst, a, b, N));
-
- for (int i = 0; i < MAX_SB_SQUARE; ++i) ASSERT_EQ(d_ref[i], d_tst[i]);
- }
-}
-
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(
- SSE2, WedgeUtilsSSEOptTest,
- ::testing::Values(TestFuncsFSSE(av1_wedge_sse_from_residuals_c,
- av1_wedge_sse_from_residuals_sse2)));
-
-INSTANTIATE_TEST_CASE_P(
- SSE2, WedgeUtilsSignOptTest,
- ::testing::Values(TestFuncsFSign(av1_wedge_sign_from_residuals_c,
- av1_wedge_sign_from_residuals_sse2)));
-
-INSTANTIATE_TEST_CASE_P(
- SSE2, WedgeUtilsDeltaSquaresOptTest,
- ::testing::Values(TestFuncsFDS(av1_wedge_compute_delta_squares_c,
- av1_wedge_compute_delta_squares_sse2)));
-#endif // HAVE_SSE2
-
-#if HAVE_AVX2
-INSTANTIATE_TEST_CASE_P(
- AVX2, WedgeUtilsSSEOptTest,
- ::testing::Values(TestFuncsFSSE(av1_wedge_sse_from_residuals_sse2,
- av1_wedge_sse_from_residuals_avx2)));
-
-INSTANTIATE_TEST_CASE_P(
- AVX2, WedgeUtilsSignOptTest,
- ::testing::Values(TestFuncsFSign(av1_wedge_sign_from_residuals_sse2,
- av1_wedge_sign_from_residuals_avx2)));
-
-INSTANTIATE_TEST_CASE_P(
- AVX2, WedgeUtilsDeltaSquaresOptTest,
- ::testing::Values(TestFuncsFDS(av1_wedge_compute_delta_squares_sse2,
- av1_wedge_compute_delta_squares_avx2)));
-#endif // HAVE_AVX2
-
-} // namespace
diff --git a/third_party/aom/test/best_encode.sh b/third_party/aom/test/best_encode.sh
deleted file mode 100755
index fe31a01cb..000000000
--- a/third_party/aom/test/best_encode.sh
+++ /dev/null
@@ -1,103 +0,0 @@
-#!/bin/bash
-#
-# Copyright (c) 2016, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and
-# the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-# was not distributed with this source code in the LICENSE file, you can
-# obtain it at www.aomedia.org/license/software. If the Alliance for Open
-# Media Patent License 1.0 was not distributed with this source code in the
-# PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-#
-# Author: jimbankoski@google.com (Jim Bankoski)
-
-if [[ $# -ne 2 ]]; then
- echo "Encodes a file using best known settings (slow!)"
- echo " Usage: be [FILE] [BITRATE]"
- echo " Example: be akiyo_cif.y4m 200"
- exit
-fi
-
-f=$1 # file is first parameter
-b=$2 # bitrate is second parameter
-
-if [[ -e $f.fpf ]]; then
- # First-pass file found, do second pass only
- aomenc \
- $f \
- -o $f-$b.av1.webm \
- -p 2 \
- --pass=2 \
- --fpf=$f.fpf \
- --best \
- --cpu-used=0 \
- --target-bitrate=$b \
- --auto-alt-ref=1 \
- -v \
- --minsection-pct=0 \
- --maxsection-pct=800 \
- --lag-in-frames=25 \
- --kf-min-dist=0 \
- --kf-max-dist=99999 \
- --static-thresh=0 \
- --min-q=0 \
- --max-q=63 \
- --drop-frame=0 \
- --bias-pct=50 \
- --minsection-pct=0 \
- --maxsection-pct=800 \
- --psnr \
- --arnr-maxframes=7 \
- --arnr-strength=3 \
- --arnr-type=3
-else
- # No first-pass file found, do 2-pass encode
- aomenc \
- $f \
- -o $f-$b.av1.webm \
- -p 2 \
- --pass=1 \
- --fpf=$f.fpf \
- --best \
- --cpu-used=0 \
- --target-bitrate=$b \
- --auto-alt-ref=1 \
- -v \
- --minsection-pct=0 \
- --maxsection-pct=800 \
- --lag-in-frames=25 \
- --kf-min-dist=0 \
- --kf-max-dist=99999 \
- --static-thresh=0 \
- --min-q=0 \
- --max-q=63 \
- --drop-frame=0
-
- aomenc \
- $f \
- -o $f-$b.av1.webm \
- -p 2 \
- --pass=2 \
- --fpf=$f.fpf \
- --best \
- --cpu-used=0 \
- --target-bitrate=$b \
- --auto-alt-ref=1 \
- -v \
- --minsection-pct=0 \
- --maxsection-pct=800 \
- --lag-in-frames=25 \
- --kf-min-dist=0 \
- --kf-max-dist=99999 \
- --static-thresh=0 \
- --min-q=0 \
- --max-q=63 \
- --drop-frame=0 \
- --bias-pct=50 \
- --minsection-pct=0 \
- --maxsection-pct=800 \
- --psnr \
- --arnr-maxframes=7 \
- --arnr-strength=3 \
- --arnr-type=3
-fi
diff --git a/third_party/aom/test/binary_codes_test.cc b/third_party/aom/test/binary_codes_test.cc
deleted file mode 100644
index 45660cf85..000000000
--- a/third_party/aom/test/binary_codes_test.cc
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <math.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "config/aom_config.h"
-
-#include "test/acm_random.h"
-#include "aom/aom_integer.h"
-#include "aom_dsp/bitreader.h"
-#include "aom_dsp/bitwriter.h"
-#include "aom_dsp/binary_codes_reader.h"
-#include "aom_dsp/binary_codes_writer.h"
-
-#define ACCT_STR __func__
-
-using libaom_test::ACMRandom;
-
-namespace {
-
-// Test for Finite subexponential code with reference
-TEST(AV1, TestPrimitiveRefsubexpfin) {
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- const int kBufferSize = 65536;
- aom_writer bw;
- uint8_t bw_buffer[kBufferSize];
- const uint16_t kRanges = 8;
- const uint16_t kSubexpParams = 6;
- const uint16_t kReferences = 8;
- const uint16_t kValues = 16;
- uint16_t enc_values[kRanges][kSubexpParams][kReferences][kValues][4];
- const uint16_t range_vals[kRanges] = { 1, 13, 64, 120, 230, 420, 1100, 8000 };
- aom_start_encode(&bw, bw_buffer);
- for (int n = 0; n < kRanges; ++n) {
- const uint16_t range = range_vals[n];
- for (int k = 0; k < kSubexpParams; ++k) {
- for (int r = 0; r < kReferences; ++r) {
- const uint16_t ref = rnd(range);
- for (int v = 0; v < kValues; ++v) {
- const uint16_t value = rnd(range);
- enc_values[n][k][r][v][0] = range;
- enc_values[n][k][r][v][1] = k;
- enc_values[n][k][r][v][2] = ref;
- enc_values[n][k][r][v][3] = value;
- aom_write_primitive_refsubexpfin(&bw, range, k, ref, value);
- }
- }
- }
- }
- aom_stop_encode(&bw);
- aom_reader br;
- aom_reader_init(&br, bw_buffer, bw.pos);
- GTEST_ASSERT_GE(aom_reader_tell(&br), 0u);
- GTEST_ASSERT_LE(aom_reader_tell(&br), 1u);
- for (int n = 0; n < kRanges; ++n) {
- for (int k = 0; k < kSubexpParams; ++k) {
- for (int r = 0; r < kReferences; ++r) {
- for (int v = 0; v < kValues; ++v) {
- const uint16_t range = enc_values[n][k][r][v][0];
- assert(k == enc_values[n][k][r][v][1]);
- const uint16_t ref = enc_values[n][k][r][v][2];
- const uint16_t value =
- aom_read_primitive_refsubexpfin(&br, range, k, ref, ACCT_STR);
- GTEST_ASSERT_EQ(value, enc_values[n][k][r][v][3]);
- }
- }
- }
- }
-}
-// TODO(debargha): Adds tests for other primitives
-} // namespace
diff --git a/third_party/aom/test/blend_a64_mask_1d_test.cc b/third_party/aom/test/blend_a64_mask_1d_test.cc
deleted file mode 100644
index f8844eef8..000000000
--- a/third_party/aom/test/blend_a64_mask_1d_test.cc
+++ /dev/null
@@ -1,339 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <math.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-#include "test/register_state_check.h"
-#include "test/function_equivalence_test.h"
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-#include "config/av1_rtcd.h"
-
-#include "aom/aom_integer.h"
-
-#include "av1/common/enums.h"
-
-#include "aom_dsp/blend.h"
-
-using libaom_test::FunctionEquivalenceTest;
-
-namespace {
-
-template <typename F, typename T>
-class BlendA64Mask1DTest : public FunctionEquivalenceTest<F> {
- public:
- static const int kIterations = 10000;
- static const int kMaxWidth = MAX_SB_SIZE * 5; // * 5 to cover longer strides
- static const int kMaxHeight = MAX_SB_SIZE;
- static const int kBufSize = kMaxWidth * kMaxHeight;
- static const int kMaxMaskWidth = 2 * MAX_SB_SIZE;
- static const int kMaxMaskSize = kMaxMaskWidth;
-
- virtual ~BlendA64Mask1DTest() {}
-
- virtual void Execute(const T *p_src0, const T *p_src1) = 0;
-
- void Common() {
- w_ = 2 << this->rng_(MAX_SB_SIZE_LOG2);
- h_ = 2 << this->rng_(MAX_SB_SIZE_LOG2);
-
- dst_offset_ = this->rng_(33);
- dst_stride_ = this->rng_(kMaxWidth + 1 - w_) + w_;
-
- src0_offset_ = this->rng_(33);
- src0_stride_ = this->rng_(kMaxWidth + 1 - w_) + w_;
-
- src1_offset_ = this->rng_(33);
- src1_stride_ = this->rng_(kMaxWidth + 1 - w_) + w_;
-
- T *p_src0;
- T *p_src1;
-
- switch (this->rng_(3)) {
- case 0: // Separate sources
- p_src0 = src0_;
- p_src1 = src1_;
- break;
- case 1: // src0 == dst
- p_src0 = dst_tst_;
- src0_stride_ = dst_stride_;
- src0_offset_ = dst_offset_;
- p_src1 = src1_;
- break;
- case 2: // src1 == dst
- p_src0 = src0_;
- p_src1 = dst_tst_;
- src1_stride_ = dst_stride_;
- src1_offset_ = dst_offset_;
- break;
- default: FAIL();
- }
-
- Execute(p_src0, p_src1);
-
- for (int r = 0; r < h_; ++r) {
- for (int c = 0; c < w_; ++c) {
- ASSERT_EQ(dst_ref_[dst_offset_ + r * dst_stride_ + c],
- dst_tst_[dst_offset_ + r * dst_stride_ + c]);
- }
- }
- }
-
- T dst_ref_[kBufSize];
- T dst_tst_[kBufSize];
- uint32_t dst_stride_;
- uint32_t dst_offset_;
-
- T src0_[kBufSize];
- uint32_t src0_stride_;
- uint32_t src0_offset_;
-
- T src1_[kBufSize];
- uint32_t src1_stride_;
- uint32_t src1_offset_;
-
- uint8_t mask_[kMaxMaskSize];
-
- int w_;
- int h_;
-};
-
-//////////////////////////////////////////////////////////////////////////////
-// 8 bit version
-//////////////////////////////////////////////////////////////////////////////
-
-typedef void (*F8B)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
- uint32_t src0_stride, const uint8_t *src1,
- uint32_t src1_stride, const uint8_t *mask, int w, int h);
-typedef libaom_test::FuncParam<F8B> TestFuncs;
-
-class BlendA64Mask1DTest8B : public BlendA64Mask1DTest<F8B, uint8_t> {
- protected:
- void Execute(const uint8_t *p_src0, const uint8_t *p_src1) {
- params_.ref_func(dst_ref_ + dst_offset_, dst_stride_, p_src0 + src0_offset_,
- src0_stride_, p_src1 + src1_offset_, src1_stride_, mask_,
- w_, h_);
- ASM_REGISTER_STATE_CHECK(params_.tst_func(
- dst_tst_ + dst_offset_, dst_stride_, p_src0 + src0_offset_,
- src0_stride_, p_src1 + src1_offset_, src1_stride_, mask_, w_, h_));
- }
-};
-
-TEST_P(BlendA64Mask1DTest8B, RandomValues) {
- for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
- for (int i = 0; i < kBufSize; ++i) {
- dst_ref_[i] = rng_.Rand8();
- dst_tst_[i] = rng_.Rand8();
-
- src0_[i] = rng_.Rand8();
- src1_[i] = rng_.Rand8();
- }
-
- for (int i = 0; i < kMaxMaskSize; ++i)
- mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
-
- Common();
- }
-}
-
-TEST_P(BlendA64Mask1DTest8B, ExtremeValues) {
- for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
- for (int i = 0; i < kBufSize; ++i) {
- dst_ref_[i] = rng_(2) + 254;
- dst_tst_[i] = rng_(2) + 254;
- src0_[i] = rng_(2) + 254;
- src1_[i] = rng_(2) + 254;
- }
-
- for (int i = 0; i < kMaxMaskSize; ++i)
- mask_[i] = rng_(2) + AOM_BLEND_A64_MAX_ALPHA - 1;
-
- Common();
- }
-}
-
-static void blend_a64_hmask_ref(uint8_t *dst, uint32_t dst_stride,
- const uint8_t *src0, uint32_t src0_stride,
- const uint8_t *src1, uint32_t src1_stride,
- const uint8_t *mask, int w, int h) {
- uint8_t mask2d[BlendA64Mask1DTest8B::kMaxMaskSize]
- [BlendA64Mask1DTest8B::kMaxMaskSize];
-
- for (int row = 0; row < h; ++row)
- for (int col = 0; col < w; ++col) mask2d[row][col] = mask[col];
-
- aom_blend_a64_mask_c(dst, dst_stride, src0, src0_stride, src1, src1_stride,
- &mask2d[0][0], BlendA64Mask1DTest8B::kMaxMaskSize, w, h,
- 0, 0);
-}
-
-static void blend_a64_vmask_ref(uint8_t *dst, uint32_t dst_stride,
- const uint8_t *src0, uint32_t src0_stride,
- const uint8_t *src1, uint32_t src1_stride,
- const uint8_t *mask, int w, int h) {
- uint8_t mask2d[BlendA64Mask1DTest8B::kMaxMaskSize]
- [BlendA64Mask1DTest8B::kMaxMaskSize];
-
- for (int row = 0; row < h; ++row)
- for (int col = 0; col < w; ++col) mask2d[row][col] = mask[row];
-
- aom_blend_a64_mask_c(dst, dst_stride, src0, src0_stride, src1, src1_stride,
- &mask2d[0][0], BlendA64Mask1DTest8B::kMaxMaskSize, w, h,
- 0, 0);
-}
-
-INSTANTIATE_TEST_CASE_P(
- C, BlendA64Mask1DTest8B,
- ::testing::Values(TestFuncs(blend_a64_hmask_ref, aom_blend_a64_hmask_c),
- TestFuncs(blend_a64_vmask_ref, aom_blend_a64_vmask_c)));
-
-#if HAVE_SSE4_1
-INSTANTIATE_TEST_CASE_P(
- SSE4_1, BlendA64Mask1DTest8B,
- ::testing::Values(
- TestFuncs(blend_a64_hmask_ref, aom_blend_a64_hmask_sse4_1),
- TestFuncs(blend_a64_vmask_ref, aom_blend_a64_vmask_sse4_1)));
-#endif // HAVE_SSE4_1
-
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(NEON, BlendA64Mask1DTest8B,
- ::testing::Values(TestFuncs(blend_a64_hmask_ref,
- aom_blend_a64_hmask_neon),
- TestFuncs(blend_a64_vmask_ref,
- aom_blend_a64_vmask_neon)));
-#endif // HAVE_NEON
-
-//////////////////////////////////////////////////////////////////////////////
-// High bit-depth version
-//////////////////////////////////////////////////////////////////////////////
-
-typedef void (*FHBD)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
- uint32_t src0_stride, const uint8_t *src1,
- uint32_t src1_stride, const uint8_t *mask, int w, int h,
- int bd);
-typedef libaom_test::FuncParam<FHBD> TestFuncsHBD;
-
-class BlendA64Mask1DTestHBD : public BlendA64Mask1DTest<FHBD, uint16_t> {
- protected:
- void Execute(const uint16_t *p_src0, const uint16_t *p_src1) {
- params_.ref_func(CONVERT_TO_BYTEPTR(dst_ref_ + dst_offset_), dst_stride_,
- CONVERT_TO_BYTEPTR(p_src0 + src0_offset_), src0_stride_,
- CONVERT_TO_BYTEPTR(p_src1 + src1_offset_), src1_stride_,
- mask_, w_, h_, bit_depth_);
- ASM_REGISTER_STATE_CHECK(params_.tst_func(
- CONVERT_TO_BYTEPTR(dst_tst_ + dst_offset_), dst_stride_,
- CONVERT_TO_BYTEPTR(p_src0 + src0_offset_), src0_stride_,
- CONVERT_TO_BYTEPTR(p_src1 + src1_offset_), src1_stride_, mask_, w_, h_,
- bit_depth_));
- }
-
- int bit_depth_;
-};
-
-TEST_P(BlendA64Mask1DTestHBD, RandomValues) {
- for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
- switch (rng_(3)) {
- case 0: bit_depth_ = 8; break;
- case 1: bit_depth_ = 10; break;
- default: bit_depth_ = 12; break;
- }
-
- const int hi = 1 << bit_depth_;
-
- for (int i = 0; i < kBufSize; ++i) {
- dst_ref_[i] = rng_(hi);
- dst_tst_[i] = rng_(hi);
- src0_[i] = rng_(hi);
- src1_[i] = rng_(hi);
- }
-
- for (int i = 0; i < kMaxMaskSize; ++i)
- mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
-
- Common();
- }
-}
-
-TEST_P(BlendA64Mask1DTestHBD, ExtremeValues) {
- for (int iter = 0; iter < 1000 && !HasFatalFailure(); ++iter) {
- switch (rng_(3)) {
- case 0: bit_depth_ = 8; break;
- case 1: bit_depth_ = 10; break;
- default: bit_depth_ = 12; break;
- }
-
- const int hi = 1 << bit_depth_;
- const int lo = hi - 2;
-
- for (int i = 0; i < kBufSize; ++i) {
- dst_ref_[i] = rng_(hi - lo) + lo;
- dst_tst_[i] = rng_(hi - lo) + lo;
- src0_[i] = rng_(hi - lo) + lo;
- src1_[i] = rng_(hi - lo) + lo;
- }
-
- for (int i = 0; i < kMaxMaskSize; ++i)
- mask_[i] = rng_(2) + AOM_BLEND_A64_MAX_ALPHA - 1;
-
- Common();
- }
-}
-
-static void highbd_blend_a64_hmask_ref(
- uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
- uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride,
- const uint8_t *mask, int w, int h, int bd) {
- uint8_t mask2d[BlendA64Mask1DTestHBD::kMaxMaskSize]
- [BlendA64Mask1DTestHBD::kMaxMaskSize];
-
- for (int row = 0; row < h; ++row)
- for (int col = 0; col < w; ++col) mask2d[row][col] = mask[col];
-
- aom_highbd_blend_a64_mask_c(
- dst, dst_stride, src0, src0_stride, src1, src1_stride, &mask2d[0][0],
- BlendA64Mask1DTestHBD::kMaxMaskSize, w, h, 0, 0, bd);
-}
-
-static void highbd_blend_a64_vmask_ref(
- uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
- uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride,
- const uint8_t *mask, int w, int h, int bd) {
- uint8_t mask2d[BlendA64Mask1DTestHBD::kMaxMaskSize]
- [BlendA64Mask1DTestHBD::kMaxMaskSize];
-
- for (int row = 0; row < h; ++row)
- for (int col = 0; col < w; ++col) mask2d[row][col] = mask[row];
-
- aom_highbd_blend_a64_mask_c(
- dst, dst_stride, src0, src0_stride, src1, src1_stride, &mask2d[0][0],
- BlendA64Mask1DTestHBD::kMaxMaskSize, w, h, 0, 0, bd);
-}
-
-INSTANTIATE_TEST_CASE_P(
- C, BlendA64Mask1DTestHBD,
- ::testing::Values(TestFuncsHBD(highbd_blend_a64_hmask_ref,
- aom_highbd_blend_a64_hmask_c),
- TestFuncsHBD(highbd_blend_a64_vmask_ref,
- aom_highbd_blend_a64_vmask_c)));
-
-#if HAVE_SSE4_1
-INSTANTIATE_TEST_CASE_P(
- SSE4_1, BlendA64Mask1DTestHBD,
- ::testing::Values(TestFuncsHBD(highbd_blend_a64_hmask_ref,
- aom_highbd_blend_a64_hmask_sse4_1),
- TestFuncsHBD(highbd_blend_a64_vmask_ref,
- aom_highbd_blend_a64_vmask_sse4_1)));
-#endif // HAVE_SSE4_1
-} // namespace
diff --git a/third_party/aom/test/blend_a64_mask_test.cc b/third_party/aom/test/blend_a64_mask_test.cc
deleted file mode 100644
index 66ca6fc5f..000000000
--- a/third_party/aom/test/blend_a64_mask_test.cc
+++ /dev/null
@@ -1,583 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <math.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-#include "test/register_state_check.h"
-#include "test/function_equivalence_test.h"
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-#include "config/av1_rtcd.h"
-
-#include "aom/aom_integer.h"
-
-#include "av1/common/enums.h"
-
-#include "aom_dsp/blend.h"
-
-using libaom_test::FunctionEquivalenceTest;
-
-namespace {
-
-template <typename BlendA64Func, typename SrcPixel, typename DstPixel>
-class BlendA64MaskTest : public FunctionEquivalenceTest<BlendA64Func> {
- protected:
- static const int kIterations = 10000;
- static const int kMaxWidth = MAX_SB_SIZE * 5; // * 5 to cover longer strides
- static const int kMaxHeight = MAX_SB_SIZE;
- static const int kBufSize = kMaxWidth * kMaxHeight;
- static const int kMaxMaskWidth = 2 * MAX_SB_SIZE;
- static const int kMaxMaskSize = kMaxMaskWidth * kMaxMaskWidth;
-
- virtual ~BlendA64MaskTest() {}
-
- virtual void Execute(const SrcPixel *p_src0, const SrcPixel *p_src1,
- int run_times) = 0;
-
- template <typename Pixel>
- void GetSources(Pixel **src0, Pixel **src1, Pixel * /*dst*/, int run_times) {
- if (run_times > 1) {
- *src0 = src0_;
- *src1 = src1_;
- return;
- }
- switch (this->rng_(3)) {
- case 0: // Separate sources
- *src0 = src0_;
- *src1 = src1_;
- break;
- case 1: // src0 == dst
- *src0 = dst_tst_;
- src0_stride_ = dst_stride_;
- src0_offset_ = dst_offset_;
- *src1 = src1_;
- break;
- case 2: // src1 == dst
- *src0 = src0_;
- *src1 = dst_tst_;
- src1_stride_ = dst_stride_;
- src1_offset_ = dst_offset_;
- break;
- default: FAIL();
- }
- }
-
- void GetSources(uint16_t **src0, uint16_t **src1, uint8_t * /*dst*/,
- int /*run_times*/) {
- *src0 = src0_;
- *src1 = src1_;
- }
-
- uint8_t Rand1() { return this->rng_.Rand8() & 1; }
-
- void RunOneTest(int block_size, int subx, int suby, int run_times) {
- w_ = block_size_wide[block_size];
- h_ = block_size_high[block_size];
- run_times = run_times > 1 ? run_times / w_ : 1;
- subx_ = subx;
- suby_ = suby;
-
- dst_offset_ = this->rng_(33);
- dst_stride_ = this->rng_(kMaxWidth + 1 - w_) + w_;
-
- src0_offset_ = this->rng_(33);
- src0_stride_ = this->rng_(kMaxWidth + 1 - w_) + w_;
-
- src1_offset_ = this->rng_(33);
- src1_stride_ = this->rng_(kMaxWidth + 1 - w_) + w_;
-
- mask_stride_ =
- this->rng_(kMaxWidth + 1 - w_ * (subx_ ? 2 : 1)) + w_ * (subx_ ? 2 : 1);
-
- SrcPixel *p_src0;
- SrcPixel *p_src1;
-
- p_src0 = src0_;
- p_src1 = src1_;
-
- GetSources(&p_src0, &p_src1, &dst_ref_[0], run_times);
-
- Execute(p_src0, p_src1, run_times);
-
- for (int r = 0; r < h_; ++r) {
- for (int c = 0; c < w_; ++c) {
- ASSERT_EQ(dst_ref_[dst_offset_ + r * dst_stride_ + c],
- dst_tst_[dst_offset_ + r * dst_stride_ + c])
- << w_ << "x" << h_ << " subx " << subx_ << " suby " << suby_
- << " r: " << r << " c: " << c;
- }
- }
- }
-
- void RunTest(int block_size, int run_times) {
- subx_ = Rand1();
- suby_ = Rand1();
- RunOneTest(block_size, subx_, suby_, run_times);
- }
-
- DstPixel dst_ref_[kBufSize];
- DstPixel dst_tst_[kBufSize];
- uint32_t dst_stride_;
- uint32_t dst_offset_;
-
- SrcPixel src0_[kBufSize];
- uint32_t src0_stride_;
- uint32_t src0_offset_;
-
- SrcPixel src1_[kBufSize];
- uint32_t src1_stride_;
- uint32_t src1_offset_;
-
- uint8_t mask_[kMaxMaskSize];
- size_t mask_stride_;
-
- int w_;
- int h_;
-
- int suby_;
- int subx_;
-};
-
-//////////////////////////////////////////////////////////////////////////////
-// 8 bit version
-//////////////////////////////////////////////////////////////////////////////
-
-typedef void (*F8B)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
- uint32_t src0_stride, const uint8_t *src1,
- uint32_t src1_stride, const uint8_t *mask,
- uint32_t mask_stride, int w, int h, int subx, int suby);
-typedef libaom_test::FuncParam<F8B> TestFuncs;
-
-class BlendA64MaskTest8B : public BlendA64MaskTest<F8B, uint8_t, uint8_t> {
- protected:
- void Execute(const uint8_t *p_src0, const uint8_t *p_src1, int run_times) {
- aom_usec_timer timer;
- aom_usec_timer_start(&timer);
- for (int i = 0; i < run_times; ++i) {
- params_.ref_func(dst_ref_ + dst_offset_, dst_stride_,
- p_src0 + src0_offset_, src0_stride_,
- p_src1 + src1_offset_, src1_stride_, mask_,
- kMaxMaskWidth, w_, h_, subx_, suby_);
- }
- aom_usec_timer_mark(&timer);
- const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
- aom_usec_timer_start(&timer);
- for (int i = 0; i < run_times; ++i) {
- params_.tst_func(dst_tst_ + dst_offset_, dst_stride_,
- p_src0 + src0_offset_, src0_stride_,
- p_src1 + src1_offset_, src1_stride_, mask_,
- kMaxMaskWidth, w_, h_, subx_, suby_);
- }
- aom_usec_timer_mark(&timer);
- const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
- if (run_times > 1) {
- printf("%3dx%-3d subx %d suby %d :%7.2f/%7.2fns", w_, h_, subx_, suby_,
- time1, time2);
- printf("(%3.2f)\n", time1 / time2);
- }
- }
-};
-
-TEST_P(BlendA64MaskTest8B, RandomValues) {
- for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
- int bsize = rng_.Rand8() % BLOCK_SIZES_ALL;
- for (int i = 0; i < kBufSize; ++i) {
- dst_ref_[i] = rng_.Rand8();
- dst_tst_[i] = rng_.Rand8();
-
- src0_[i] = rng_.Rand8();
- src1_[i] = rng_.Rand8();
- }
-
- for (int i = 0; i < kMaxMaskSize; ++i)
- mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
-
- RunTest(bsize, 1);
- }
-}
-
-TEST_P(BlendA64MaskTest8B, ExtremeValues) {
- for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
- int bsize = rng_.Rand8() % BLOCK_SIZES_ALL;
- for (int i = 0; i < kBufSize; ++i) {
- dst_ref_[i] = rng_(2) + 254;
- dst_tst_[i] = rng_(2) + 254;
- src0_[i] = rng_(2) + 254;
- src1_[i] = rng_(2) + 254;
- }
-
- for (int i = 0; i < kMaxMaskSize; ++i)
- mask_[i] = rng_(2) + AOM_BLEND_A64_MAX_ALPHA - 1;
-
- RunTest(bsize, 1);
- }
-}
-TEST_P(BlendA64MaskTest8B, DISABLED_Speed) {
- const int kRunTimes = 10000000;
- for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
- for (int i = 0; i < kBufSize; ++i) {
- dst_ref_[i] = rng_.Rand8();
- dst_tst_[i] = rng_.Rand8();
-
- src0_[i] = rng_.Rand8();
- src1_[i] = rng_.Rand8();
- }
-
- for (int i = 0; i < kMaxMaskSize; ++i)
- mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
-
- RunOneTest(bsize, 1, 1, kRunTimes);
- RunOneTest(bsize, 1, 0, kRunTimes);
- RunOneTest(bsize, 0, 1, kRunTimes);
- RunOneTest(bsize, 0, 0, kRunTimes);
- }
-}
-#if HAVE_SSE4_1
-INSTANTIATE_TEST_CASE_P(SSE4_1, BlendA64MaskTest8B,
- ::testing::Values(TestFuncs(
- aom_blend_a64_mask_c, aom_blend_a64_mask_sse4_1)));
-#endif // HAVE_AVX2
-
-#if HAVE_AVX2
-INSTANTIATE_TEST_CASE_P(AVX2, BlendA64MaskTest8B,
- ::testing::Values(TestFuncs(aom_blend_a64_mask_sse4_1,
- aom_blend_a64_mask_avx2)));
-#endif // HAVE_SSE4_1
-
-//////////////////////////////////////////////////////////////////////////////
-// 8 bit _d16 version
-//////////////////////////////////////////////////////////////////////////////
-
-typedef void (*F8B_D16)(uint8_t *dst, uint32_t dst_stride, const uint16_t *src0,
- uint32_t src0_stride, const uint16_t *src1,
- uint32_t src1_stride, const uint8_t *mask,
- uint32_t mask_stride, int w, int h, int subx, int suby,
- ConvolveParams *conv_params);
-typedef libaom_test::FuncParam<F8B_D16> TestFuncs_d16;
-
-class BlendA64MaskTest8B_d16
- : public BlendA64MaskTest<F8B_D16, uint16_t, uint8_t> {
- protected:
- // max number of bits used by the source
- static const int kSrcMaxBitsMask = 0x3fff;
-
- void Execute(const uint16_t *p_src0, const uint16_t *p_src1, int run_times) {
- ConvolveParams conv_params;
- conv_params.round_0 = ROUND0_BITS;
- conv_params.round_1 = COMPOUND_ROUND1_BITS;
- aom_usec_timer timer;
- aom_usec_timer_start(&timer);
- for (int i = 0; i < run_times; ++i) {
- params_.ref_func(dst_ref_ + dst_offset_, dst_stride_,
- p_src0 + src0_offset_, src0_stride_,
- p_src1 + src1_offset_, src1_stride_, mask_,
- kMaxMaskWidth, w_, h_, subx_, suby_, &conv_params);
- }
- aom_usec_timer_mark(&timer);
- const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
- aom_usec_timer_start(&timer);
- for (int i = 0; i < run_times; ++i) {
- params_.tst_func(dst_tst_ + dst_offset_, dst_stride_,
- p_src0 + src0_offset_, src0_stride_,
- p_src1 + src1_offset_, src1_stride_, mask_,
- kMaxMaskWidth, w_, h_, subx_, suby_, &conv_params);
- }
- aom_usec_timer_mark(&timer);
- const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
- if (run_times > 1) {
- printf("%3dx%-3d subx %d suby %d :%7.2f/%7.2fns", w_, h_, subx_, suby_,
- time1, time2);
- printf("(%3.2f)\n", time1 / time2);
- }
- }
-};
-
-TEST_P(BlendA64MaskTest8B_d16, RandomValues) {
- for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
- int bsize = rng_.Rand8() % BLOCK_SIZES_ALL;
- for (int i = 0; i < kBufSize; ++i) {
- dst_ref_[i] = rng_.Rand8();
- dst_tst_[i] = rng_.Rand8();
-
- src0_[i] = rng_.Rand16() & kSrcMaxBitsMask;
- src1_[i] = rng_.Rand16() & kSrcMaxBitsMask;
- }
-
- for (int i = 0; i < kMaxMaskSize; ++i)
- mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
-
- RunTest(bsize, 1);
- }
-}
-
-TEST_P(BlendA64MaskTest8B_d16, ExtremeValues) {
- for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
- int bsize = rng_.Rand8() % BLOCK_SIZES_ALL;
- for (int i = 0; i < kBufSize; ++i) {
- dst_ref_[i] = 255;
- dst_tst_[i] = 255;
-
- src0_[i] = kSrcMaxBitsMask;
- src1_[i] = kSrcMaxBitsMask;
- }
-
- for (int i = 0; i < kMaxMaskSize; ++i)
- mask_[i] = AOM_BLEND_A64_MAX_ALPHA - 1;
-
- RunTest(bsize, 1);
- }
-}
-
-#if HAVE_SSE4_1
-INSTANTIATE_TEST_CASE_P(
- SSE4_1, BlendA64MaskTest8B_d16,
- ::testing::Values(TestFuncs_d16(aom_lowbd_blend_a64_d16_mask_c,
- aom_lowbd_blend_a64_d16_mask_sse4_1)));
-#endif // HAVE_SSE4_1
-
-#if HAVE_AVX2
-INSTANTIATE_TEST_CASE_P(
- AVX2, BlendA64MaskTest8B_d16,
- ::testing::Values(TestFuncs_d16(aom_lowbd_blend_a64_d16_mask_c,
- aom_lowbd_blend_a64_d16_mask_avx2)));
-#endif // HAVE_AVX2
-
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(
- NEON, BlendA64MaskTest8B_d16,
- ::testing::Values(TestFuncs_d16(aom_lowbd_blend_a64_d16_mask_c,
- aom_lowbd_blend_a64_d16_mask_neon)));
-#endif // HAVE_NEON
-
-//////////////////////////////////////////////////////////////////////////////
-// High bit-depth version
-//////////////////////////////////////////////////////////////////////////////
-
-typedef void (*FHBD)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
- uint32_t src0_stride, const uint8_t *src1,
- uint32_t src1_stride, const uint8_t *mask,
- uint32_t mask_stride, int w, int h, int subx, int suby,
- int bd);
-typedef libaom_test::FuncParam<FHBD> TestFuncsHBD;
-
-class BlendA64MaskTestHBD : public BlendA64MaskTest<FHBD, uint16_t, uint16_t> {
- protected:
- void Execute(const uint16_t *p_src0, const uint16_t *p_src1, int run_times) {
- aom_usec_timer timer;
- aom_usec_timer_start(&timer);
- for (int i = 0; i < run_times; ++i) {
- params_.ref_func(CONVERT_TO_BYTEPTR(dst_ref_ + dst_offset_), dst_stride_,
- CONVERT_TO_BYTEPTR(p_src0 + src0_offset_), src0_stride_,
- CONVERT_TO_BYTEPTR(p_src1 + src1_offset_), src1_stride_,
- mask_, kMaxMaskWidth, w_, h_, subx_, suby_, bit_depth_);
- }
- aom_usec_timer_mark(&timer);
- const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
- aom_usec_timer_start(&timer);
- for (int i = 0; i < run_times; ++i) {
- params_.tst_func(CONVERT_TO_BYTEPTR(dst_tst_ + dst_offset_), dst_stride_,
- CONVERT_TO_BYTEPTR(p_src0 + src0_offset_), src0_stride_,
- CONVERT_TO_BYTEPTR(p_src1 + src1_offset_), src1_stride_,
- mask_, kMaxMaskWidth, w_, h_, subx_, suby_, bit_depth_);
- }
- aom_usec_timer_mark(&timer);
- const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
- if (run_times > 1) {
- printf("%3dx%-3d subx %d suby %d :%7.2f/%7.2fns", w_, h_, subx_, suby_,
- time1, time2);
- printf("(%3.2f)\n", time1 / time2);
- }
- }
-
- int bit_depth_;
-};
-
-TEST_P(BlendA64MaskTestHBD, RandomValues) {
- for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
- int bsize = rng_.Rand8() % BLOCK_SIZES_ALL;
- switch (rng_(3)) {
- case 0: bit_depth_ = 8; break;
- case 1: bit_depth_ = 10; break;
- default: bit_depth_ = 12; break;
- }
-
- const int hi = 1 << bit_depth_;
-
- for (int i = 0; i < kBufSize; ++i) {
- dst_ref_[i] = rng_(hi);
- dst_tst_[i] = rng_(hi);
- src0_[i] = rng_(hi);
- src1_[i] = rng_(hi);
- }
-
- for (int i = 0; i < kMaxMaskSize; ++i)
- mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
-
- RunTest(bsize, 1);
- }
-}
-
-TEST_P(BlendA64MaskTestHBD, ExtremeValues) {
- for (int iter = 0; iter < 1000 && !HasFatalFailure(); ++iter) {
- int bsize = rng_.Rand8() % BLOCK_SIZES_ALL;
- switch (rng_(3)) {
- case 0: bit_depth_ = 8; break;
- case 1: bit_depth_ = 10; break;
- default: bit_depth_ = 12; break;
- }
-
- const int hi = 1 << bit_depth_;
- const int lo = hi - 2;
-
- for (int i = 0; i < kBufSize; ++i) {
- dst_ref_[i] = rng_(hi - lo) + lo;
- dst_tst_[i] = rng_(hi - lo) + lo;
- src0_[i] = rng_(hi - lo) + lo;
- src1_[i] = rng_(hi - lo) + lo;
- }
-
- for (int i = 0; i < kMaxMaskSize; ++i)
- mask_[i] = rng_(2) + AOM_BLEND_A64_MAX_ALPHA - 1;
-
- RunTest(bsize, 1);
- }
-}
-
-#if HAVE_SSE4_1
-INSTANTIATE_TEST_CASE_P(
- SSE4_1, BlendA64MaskTestHBD,
- ::testing::Values(TestFuncsHBD(aom_highbd_blend_a64_mask_c,
- aom_highbd_blend_a64_mask_sse4_1)));
-#endif // HAVE_SSE4_1
-
-//////////////////////////////////////////////////////////////////////////////
-// HBD _d16 version
-//////////////////////////////////////////////////////////////////////////////
-
-typedef void (*FHBD_D16)(uint8_t *dst, uint32_t dst_stride,
- const CONV_BUF_TYPE *src0, uint32_t src0_stride,
- const CONV_BUF_TYPE *src1, uint32_t src1_stride,
- const uint8_t *mask, uint32_t mask_stride, int w,
- int h, int subx, int suby, ConvolveParams *conv_params,
- const int bd);
-typedef libaom_test::FuncParam<FHBD_D16> TestFuncsHBD_d16;
-
-class BlendA64MaskTestHBD_d16
- : public BlendA64MaskTest<FHBD_D16, uint16_t, uint16_t> {
- protected:
- // max number of bits used by the source
- static const int kSrcMaxBitsMask = (1 << 14) - 1;
- static const int kSrcMaxBitsMaskHBD = (1 << 16) - 1;
-
- void Execute(const uint16_t *p_src0, const uint16_t *p_src1, int run_times) {
- ConvolveParams conv_params;
- conv_params.round_0 = (bit_depth_ == 12) ? ROUND0_BITS + 2 : ROUND0_BITS;
- conv_params.round_1 = COMPOUND_ROUND1_BITS;
- aom_usec_timer timer;
- aom_usec_timer_start(&timer);
- for (int i = 0; i < run_times; ++i) {
- params_.ref_func(CONVERT_TO_BYTEPTR(dst_ref_ + dst_offset_), dst_stride_,
- p_src0 + src0_offset_, src0_stride_,
- p_src1 + src1_offset_, src1_stride_, mask_,
- kMaxMaskWidth, w_, h_, subx_, suby_, &conv_params,
- bit_depth_);
- }
- if (params_.tst_func) {
- aom_usec_timer_mark(&timer);
- const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
- aom_usec_timer_start(&timer);
- for (int i = 0; i < run_times; ++i) {
- params_.tst_func(CONVERT_TO_BYTEPTR(dst_tst_ + dst_offset_),
- dst_stride_, p_src0 + src0_offset_, src0_stride_,
- p_src1 + src1_offset_, src1_stride_, mask_,
- kMaxMaskWidth, w_, h_, subx_, suby_, &conv_params,
- bit_depth_);
- }
- aom_usec_timer_mark(&timer);
- const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
- if (run_times > 1) {
- printf("%3dx%-3d subx %d suby %d :%7.2f/%7.2fns", w_, h_, subx_, suby_,
- time1, time2);
- printf("(%3.2f)\n", time1 / time2);
- }
- }
- }
-
- int bit_depth_;
- int src_max_bits_mask_;
-};
-
-TEST_P(BlendA64MaskTestHBD_d16, RandomValues) {
- if (params_.tst_func == NULL) return;
- for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
- int bsize = rng_.Rand8() % BLOCK_SIZES_ALL;
- switch (rng_(3)) {
- case 0: bit_depth_ = 8; break;
- case 1: bit_depth_ = 10; break;
- default: bit_depth_ = 12; break;
- }
- src_max_bits_mask_ =
- (bit_depth_ == 8) ? kSrcMaxBitsMask : kSrcMaxBitsMaskHBD;
-
- for (int i = 0; i < kBufSize; ++i) {
- dst_ref_[i] = rng_.Rand8();
- dst_tst_[i] = rng_.Rand8();
-
- src0_[i] = rng_.Rand16() & src_max_bits_mask_;
- src1_[i] = rng_.Rand16() & src_max_bits_mask_;
- }
-
- for (int i = 0; i < kMaxMaskSize; ++i)
- mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
-
- RunTest(bsize, 1);
- }
-}
-// TODO (Scott LaVarnway), fix this test
-TEST_P(BlendA64MaskTestHBD_d16, DISABLED_SaturatedValues) {
- for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
- for (bit_depth_ = 8; bit_depth_ <= 12; bit_depth_ += 2) {
- src_max_bits_mask_ =
- (bit_depth_ == 8) ? kSrcMaxBitsMask : kSrcMaxBitsMaskHBD;
-
- for (int i = 0; i < kBufSize; ++i) {
- dst_ref_[i] = 0;
- dst_tst_[i] = (1 << bit_depth_) - 1;
-
- src0_[i] = src_max_bits_mask_;
- src1_[i] = src_max_bits_mask_;
- }
-
- for (int i = 0; i < kMaxMaskSize; ++i) mask_[i] = AOM_BLEND_A64_MAX_ALPHA;
-
- RunTest(bsize, 1);
- }
- }
-}
-
-INSTANTIATE_TEST_CASE_P(
- C, BlendA64MaskTestHBD_d16,
- ::testing::Values(TestFuncsHBD_d16(aom_highbd_blend_a64_d16_mask_c, NULL)));
-
-// TODO(slavarnway): Enable the following in the avx2 commit. (56501)
-#if 0
-#if HAVE_AVX2
-INSTANTIATE_TEST_CASE_P(
- SSE4_1, BlendA64MaskTestHBD,
- ::testing::Values(TestFuncsHBD(aom_highbd_blend_a64_mask_c,
- aom_highbd_blend_a64_mask_avx2)));
-#endif // HAVE_AVX2
-#endif
-} // namespace
diff --git a/third_party/aom/test/blockd_test.cc b/third_party/aom/test/blockd_test.cc
deleted file mode 100644
index ab624007c..000000000
--- a/third_party/aom/test/blockd_test.cc
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "av1/common/blockd.h"
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-// Verify the optimized implementation of get_partition_subsize() produces the
-// same results as the Partition_Subsize lookup table in the spec.
-TEST(BlockdTest, GetPartitionSubsize) {
- // The Partition_Subsize table in the spec (Section 9.3. Conversion tables).
- /* clang-format off */
- static const BLOCK_SIZE kPartitionSubsize[10][BLOCK_SIZES_ALL] = {
- {
- BLOCK_4X4,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X8,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X16,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X32,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X64,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_128X128,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID
- }, {
- BLOCK_INVALID,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X4,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X8,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X16,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X32,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_128X64,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID
- }, {
- BLOCK_INVALID,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X8,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X16,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X32,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X64,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X128,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID
- }, {
- BLOCK_INVALID,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X4,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X8,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X16,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X32,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X64,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID
- }, {
- BLOCK_INVALID,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X4,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X8,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X16,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X32,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_128X64,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID
- }, {
- BLOCK_INVALID,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X4,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X8,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X16,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X32,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_128X64,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID
- }, {
- BLOCK_INVALID,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X8,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X16,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X32,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X64,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X128,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID
- }, {
- BLOCK_INVALID,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X8,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X16,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X32,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X64,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X128,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID
- }, {
- BLOCK_INVALID,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X4,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X8,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X16,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID
- }, {
- BLOCK_INVALID,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X16,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X32,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X64,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
- BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID
- }
- };
- /* clang-format on */
-
- for (int partition = 0; partition < 10; partition++) {
- for (int bsize = BLOCK_4X4; bsize < BLOCK_SIZES_ALL; bsize++) {
- EXPECT_EQ(kPartitionSubsize[partition][bsize],
- get_partition_subsize(static_cast<BLOCK_SIZE>(bsize),
- static_cast<PARTITION_TYPE>(partition)));
- }
- }
-}
diff --git a/third_party/aom/test/boolcoder_test.cc b/third_party/aom/test/boolcoder_test.cc
deleted file mode 100644
index 680ec1877..000000000
--- a/third_party/aom/test/boolcoder_test.cc
+++ /dev/null
@@ -1,173 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <math.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "test/acm_random.h"
-#include "aom/aom_integer.h"
-#include "aom_dsp/bitreader.h"
-#include "aom_dsp/bitwriter.h"
-
-using libaom_test::ACMRandom;
-
-namespace {
-const int num_tests = 10;
-} // namespace
-
-TEST(AV1, TestBitIO) {
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- for (int n = 0; n < num_tests; ++n) {
- for (int method = 0; method <= 7; ++method) { // we generate various proba
- const int kBitsToTest = 1000;
- uint8_t probas[kBitsToTest];
-
- for (int i = 0; i < kBitsToTest; ++i) {
- const int parity = i & 1;
- /* clang-format off */
- probas[i] =
- (method == 0) ? 0 : (method == 1) ? 255 :
- (method == 2) ? 128 :
- (method == 3) ? rnd.Rand8() :
- (method == 4) ? (parity ? 0 : 255) :
- // alternate between low and high proba:
- (method == 5) ? (parity ? rnd(128) : 255 - rnd(128)) :
- (method == 6) ?
- (parity ? rnd(64) : 255 - rnd(64)) :
- (parity ? rnd(32) : 255 - rnd(32));
- /* clang-format on */
- }
- for (int bit_method = 0; bit_method <= 3; ++bit_method) {
- const int random_seed = 6432;
- const int kBufferSize = 10000;
- ACMRandom bit_rnd(random_seed);
- aom_writer bw;
- uint8_t bw_buffer[kBufferSize];
- aom_start_encode(&bw, bw_buffer);
-
- int bit = (bit_method == 0) ? 0 : (bit_method == 1) ? 1 : 0;
- for (int i = 0; i < kBitsToTest; ++i) {
- if (bit_method == 2) {
- bit = (i & 1);
- } else if (bit_method == 3) {
- bit = bit_rnd(2);
- }
- aom_write(&bw, bit, static_cast<int>(probas[i]));
- }
-
- aom_stop_encode(&bw);
-
- aom_reader br;
- aom_reader_init(&br, bw_buffer, bw.pos);
- bit_rnd.Reset(random_seed);
- for (int i = 0; i < kBitsToTest; ++i) {
- if (bit_method == 2) {
- bit = (i & 1);
- } else if (bit_method == 3) {
- bit = bit_rnd(2);
- }
- GTEST_ASSERT_EQ(aom_read(&br, probas[i], NULL), bit)
- << "pos: " << i << " / " << kBitsToTest
- << " bit_method: " << bit_method << " method: " << method;
- }
- }
- }
- }
-}
-
-#define FRAC_DIFF_TOTAL_ERROR 0.18
-
-TEST(AV1, TestTell) {
- const int kBufferSize = 10000;
- aom_writer bw;
- uint8_t bw_buffer[kBufferSize];
- const int kSymbols = 1024;
- // Coders are noisier at low probabilities, so we start at p = 4.
- for (int p = 4; p < 256; p++) {
- double probability = p / 256.;
- aom_start_encode(&bw, bw_buffer);
- for (int i = 0; i < kSymbols; i++) {
- aom_write(&bw, 0, p);
- }
- aom_stop_encode(&bw);
- aom_reader br;
- aom_reader_init(&br, bw_buffer, bw.pos);
- uint32_t last_tell = aom_reader_tell(&br);
- uint32_t last_tell_frac = aom_reader_tell_frac(&br);
- double frac_diff_total = 0;
- GTEST_ASSERT_GE(aom_reader_tell(&br), 0u);
- GTEST_ASSERT_LE(aom_reader_tell(&br), 1u);
- ASSERT_FALSE(aom_reader_has_overflowed(&br));
- for (int i = 0; i < kSymbols; i++) {
- aom_read(&br, p, NULL);
- uint32_t tell = aom_reader_tell(&br);
- uint32_t tell_frac = aom_reader_tell_frac(&br);
- GTEST_ASSERT_GE(tell, last_tell)
- << "tell: " << tell << ", last_tell: " << last_tell;
- GTEST_ASSERT_GE(tell_frac, last_tell_frac)
- << "tell_frac: " << tell_frac
- << ", last_tell_frac: " << last_tell_frac;
- // Frac tell should round up to tell.
- GTEST_ASSERT_EQ(tell, (tell_frac + 7) >> 3);
- last_tell = tell;
- frac_diff_total +=
- fabs(((tell_frac - last_tell_frac) / 8.0) + log2(probability));
- last_tell_frac = tell_frac;
- }
- const uint32_t expected = (uint32_t)(-kSymbols * log2(probability));
- // Last tell should be close to the expected value.
- GTEST_ASSERT_LE(last_tell, expected + 20) << " last_tell: " << last_tell;
- // The average frac_diff error should be pretty small.
- GTEST_ASSERT_LE(frac_diff_total / kSymbols, FRAC_DIFF_TOTAL_ERROR)
- << " frac_diff_total: " << frac_diff_total;
- ASSERT_FALSE(aom_reader_has_overflowed(&br));
- }
-}
-
-TEST(AV1, TestHasOverflowed) {
- const int kBufferSize = 10000;
- aom_writer bw;
- uint8_t bw_buffer[kBufferSize];
- const int kSymbols = 1024;
- // Coders are noisier at low probabilities, so we start at p = 4.
- for (int p = 4; p < 256; p++) {
- aom_start_encode(&bw, bw_buffer);
- for (int i = 0; i < kSymbols; i++) {
- aom_write(&bw, 1, p);
- }
- aom_stop_encode(&bw);
- aom_reader br;
- aom_reader_init(&br, bw_buffer, bw.pos);
- ASSERT_FALSE(aom_reader_has_overflowed(&br));
- for (int i = 0; i < kSymbols; i++) {
- GTEST_ASSERT_EQ(aom_read(&br, p, NULL), 1);
- ASSERT_FALSE(aom_reader_has_overflowed(&br));
- }
- // In the worst case, the encoder uses just a tiny fraction of the last
- // byte in the buffer. So to guarantee that aom_reader_has_overflowed()
- // returns true, we have to consume very nearly 8 additional bits of data.
- // In the worse case, one of the bits in that byte will be 1, and the rest
- // will be zero. Once we are past that 1 bit, when the probability of
- // reading zero symbol from aom_read() is high, each additional symbol read
- // will consume very little additional data (in the case that p == 255,
- // approximately -log_2(255/256) ~= 0.0056 bits). In that case it would
- // take around 178 calls to consume more than 8 bits. That is only an upper
- // bound. In practice we are not guaranteed to hit the worse case and can
- // get away with 174 calls.
- for (int i = 0; i < 174; i++) {
- aom_read(&br, p, NULL);
- }
- ASSERT_TRUE(aom_reader_has_overflowed(&br));
- }
-}
diff --git a/third_party/aom/test/borders_test.cc b/third_party/aom/test/borders_test.cc
deleted file mode 100644
index 893237ef3..000000000
--- a/third_party/aom/test/borders_test.cc
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <climits>
-#include <vector>
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-#include "test/codec_factory.h"
-#include "test/encode_test_driver.h"
-#include "test/i420_video_source.h"
-#include "test/util.h"
-
-namespace {
-
-class BordersTestLarge
- : public ::libaom_test::CodecTestWithParam<libaom_test::TestMode>,
- public ::libaom_test::EncoderTest {
- protected:
- BordersTestLarge() : EncoderTest(GET_PARAM(0)) {}
- virtual ~BordersTestLarge() {}
-
- virtual void SetUp() {
- InitializeConfig();
- SetMode(GET_PARAM(1));
- }
-
- virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
- ::libaom_test::Encoder *encoder) {
- if (video->frame() == 1) {
- encoder->Control(AOME_SET_CPUUSED, 1);
- encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
- encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7);
- encoder->Control(AOME_SET_ARNR_STRENGTH, 5);
- }
- }
-
- virtual void FramePktHook(const aom_codec_cx_pkt_t *pkt) {
- if (pkt->data.frame.flags & AOM_FRAME_IS_KEY) {
- }
- }
-};
-
-TEST_P(BordersTestLarge, TestEncodeHighBitrate) {
- // Validate that this non multiple of 64 wide clip encodes and decodes
- // without a mismatch when passing in a very low max q. This pushes
- // the encoder to producing lots of big partitions which will likely
- // extend into the border and test the border condition.
- cfg_.g_lag_in_frames = 25;
- cfg_.rc_2pass_vbr_minsection_pct = 5;
- cfg_.rc_2pass_vbr_maxsection_pct = 2000;
- cfg_.rc_target_bitrate = 2000;
- cfg_.rc_max_quantizer = 10;
-
- ::libaom_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
- 10);
-
- ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-}
-TEST_P(BordersTestLarge, TestLowBitrate) {
- // Validate that this clip encodes and decodes without a mismatch
- // when passing in a very high min q. This pushes the encoder to producing
- // lots of small partitions which might will test the other condition.
-
- cfg_.g_lag_in_frames = 25;
- cfg_.rc_2pass_vbr_minsection_pct = 5;
- cfg_.rc_2pass_vbr_maxsection_pct = 2000;
- cfg_.rc_target_bitrate = 200;
- cfg_.rc_min_quantizer = 40;
-
- ::libaom_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
- 10);
-
- ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-}
-
-AV1_INSTANTIATE_TEST_CASE(BordersTestLarge,
- ::testing::Values(::libaom_test::kTwoPassGood));
-} // namespace
diff --git a/third_party/aom/test/cdef_test.cc b/third_party/aom/test/cdef_test.cc
deleted file mode 100644
index becc07291..000000000
--- a/third_party/aom/test/cdef_test.cc
+++ /dev/null
@@ -1,425 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <cstdlib>
-#include <string>
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "config/aom_config.h"
-#include "config/av1_rtcd.h"
-
-#include "aom_ports/aom_timer.h"
-#include "av1/common/cdef_block.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
-
-using libaom_test::ACMRandom;
-
-namespace {
-
-typedef ::testing::tuple<cdef_filter_block_func, cdef_filter_block_func,
- BLOCK_SIZE, int, int>
- cdef_dir_param_t;
-
-class CDEFBlockTest : public ::testing::TestWithParam<cdef_dir_param_t> {
- public:
- virtual ~CDEFBlockTest() {}
- virtual void SetUp() {
- cdef = GET_PARAM(0);
- ref_cdef = GET_PARAM(1);
- bsize = GET_PARAM(2);
- boundary = GET_PARAM(3);
- depth = GET_PARAM(4);
- }
-
- virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
- int bsize;
- int boundary;
- int depth;
- cdef_filter_block_func cdef;
- cdef_filter_block_func ref_cdef;
-};
-
-typedef CDEFBlockTest CDEFSpeedTest;
-
-void test_cdef(int bsize, int iterations, cdef_filter_block_func cdef,
- cdef_filter_block_func ref_cdef, int boundary, int depth) {
- const int size = 8;
- const int ysize = size + 2 * CDEF_VBORDER;
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- DECLARE_ALIGNED(16, uint16_t, s[ysize * CDEF_BSTRIDE]);
- DECLARE_ALIGNED(16, static uint16_t, d[size * size]);
- DECLARE_ALIGNED(16, static uint16_t, ref_d[size * size]);
- memset(ref_d, 0, sizeof(ref_d));
- memset(d, 0, sizeof(d));
-
- int error = 0, pristrength = 0, secstrength, dir;
- int pridamping, secdamping, bits, level, count,
- errdepth = 0, errpristrength = 0, errsecstrength = 0, errboundary = 0,
- errpridamping = 0, errsecdamping = 0;
- unsigned int pos = 0;
-
- const unsigned int max_pos = size * size >> static_cast<int>(depth == 8);
- for (pridamping = 3 + depth - 8; pridamping < 7 - 3 * !!boundary + depth - 8;
- pridamping++) {
- for (secdamping = 3 + depth - 8;
- secdamping < 7 - 3 * !!boundary + depth - 8; secdamping++) {
- for (count = 0; count < iterations; count++) {
- for (level = 0; level < (1 << depth) && !error;
- level += (2 + 6 * !!boundary) << (depth - 8)) {
- for (bits = 1; bits <= depth && !error; bits += 1 + 3 * !!boundary) {
- for (unsigned int i = 0; i < sizeof(s) / sizeof(*s); i++)
- s[i] = clamp((rnd.Rand16() & ((1 << bits) - 1)) + level, 0,
- (1 << depth) - 1);
- if (boundary) {
- if (boundary & 1) { // Left
- for (int i = 0; i < ysize; i++)
- for (int j = 0; j < CDEF_HBORDER; j++)
- s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
- }
- if (boundary & 2) { // Right
- for (int i = 0; i < ysize; i++)
- for (int j = CDEF_HBORDER + size; j < CDEF_BSTRIDE; j++)
- s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
- }
- if (boundary & 4) { // Above
- for (int i = 0; i < CDEF_VBORDER; i++)
- for (int j = 0; j < CDEF_BSTRIDE; j++)
- s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
- }
- if (boundary & 8) { // Below
- for (int i = CDEF_VBORDER + size; i < ysize; i++)
- for (int j = 0; j < CDEF_BSTRIDE; j++)
- s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
- }
- }
- for (dir = 0; dir < 8; dir++) {
- for (pristrength = 0; pristrength <= 19 << (depth - 8) && !error;
- pristrength += (1 + 4 * !!boundary) << (depth - 8)) {
- if (pristrength == 16) pristrength = 19;
- for (secstrength = 0; secstrength <= 4 << (depth - 8) && !error;
- secstrength += 1 << (depth - 8)) {
- if (secstrength == 3 << (depth - 8)) continue;
- ref_cdef(depth == 8 ? (uint8_t *)ref_d : 0, ref_d, size,
- s + CDEF_HBORDER + CDEF_VBORDER * CDEF_BSTRIDE,
- pristrength, secstrength, dir, pridamping,
- secdamping, bsize, (1 << depth) - 1, depth - 8);
- // If cdef and ref_cdef are the same, we're just testing
- // speed
- if (cdef != ref_cdef)
- ASM_REGISTER_STATE_CHECK(
- cdef(depth == 8 ? (uint8_t *)d : 0, d, size,
- s + CDEF_HBORDER + CDEF_VBORDER * CDEF_BSTRIDE,
- pristrength, secstrength, dir, pridamping,
- secdamping, bsize, (1 << depth) - 1, depth - 8));
- if (ref_cdef != cdef) {
- for (pos = 0; pos < max_pos && !error; pos++) {
- error = ref_d[pos] != d[pos];
- errdepth = depth;
- errpristrength = pristrength;
- errsecstrength = secstrength;
- errboundary = boundary;
- errpridamping = pridamping;
- errsecdamping = secdamping;
- }
- }
- }
- }
- }
- }
- }
- }
- }
- }
-
- pos--;
- EXPECT_EQ(0, error) << "Error: CDEFBlockTest, SIMD and C mismatch."
- << std::endl
- << "First error at " << pos % size << "," << pos / size
- << " (" << (int16_t)ref_d[pos] << " : " << (int16_t)d[pos]
- << ") " << std::endl
- << "pristrength: " << errpristrength << std::endl
- << "pridamping: " << errpridamping << std::endl
- << "secstrength: " << errsecstrength << std::endl
- << "secdamping: " << errsecdamping << std::endl
- << "depth: " << errdepth << std::endl
- << "size: " << bsize << std::endl
- << "boundary: " << errboundary << std::endl
- << std::endl;
-}
-
-void test_cdef_speed(int bsize, int iterations, cdef_filter_block_func cdef,
- cdef_filter_block_func ref_cdef, int boundary, int depth) {
- aom_usec_timer ref_timer;
- aom_usec_timer timer;
-
- aom_usec_timer_start(&ref_timer);
- test_cdef(bsize, iterations, ref_cdef, ref_cdef, boundary, depth);
- aom_usec_timer_mark(&ref_timer);
- int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
-
- aom_usec_timer_start(&timer);
- test_cdef(bsize, iterations, cdef, cdef, boundary, depth);
- aom_usec_timer_mark(&timer);
- int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
-
- EXPECT_GT(ref_elapsed_time, elapsed_time)
- << "Error: CDEFSpeedTest, SIMD slower than C." << std::endl
- << "C time: " << ref_elapsed_time << " us" << std::endl
- << "SIMD time: " << elapsed_time << " us" << std::endl;
-}
-
-typedef int (*find_dir_t)(const uint16_t *img, int stride, int32_t *var,
- int coeff_shift);
-
-typedef ::testing::tuple<find_dir_t, find_dir_t> find_dir_param_t;
-
-class CDEFFindDirTest : public ::testing::TestWithParam<find_dir_param_t> {
- public:
- virtual ~CDEFFindDirTest() {}
- virtual void SetUp() {
- finddir = GET_PARAM(0);
- ref_finddir = GET_PARAM(1);
- }
-
- virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
- find_dir_t finddir;
- find_dir_t ref_finddir;
-};
-
-typedef CDEFFindDirTest CDEFFindDirSpeedTest;
-
-void test_finddir(int (*finddir)(const uint16_t *img, int stride, int32_t *var,
- int coeff_shift),
- int (*ref_finddir)(const uint16_t *img, int stride,
- int32_t *var, int coeff_shift)) {
- const int size = 8;
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- DECLARE_ALIGNED(16, uint16_t, s[size * size]);
-
- int error = 0;
- int depth, bits, level, count, errdepth = 0;
- int ref_res = 0, res = 0;
- int32_t ref_var = 0, var = 0;
-
- for (depth = 8; depth <= 12 && !error; depth += 2) {
- for (count = 0; count < 512 && !error; count++) {
- for (level = 0; level < (1 << depth) && !error;
- level += 1 << (depth - 8)) {
- for (bits = 1; bits <= depth && !error; bits++) {
- for (unsigned int i = 0; i < sizeof(s) / sizeof(*s); i++)
- s[i] = clamp((rnd.Rand16() & ((1 << bits) - 1)) + level, 0,
- (1 << depth) - 1);
- for (int c = 0; c < 1 + 9 * (finddir == ref_finddir); c++)
- ref_res = ref_finddir(s, size, &ref_var, depth - 8);
- if (finddir != ref_finddir)
- ASM_REGISTER_STATE_CHECK(res = finddir(s, size, &var, depth - 8));
- if (ref_finddir != finddir) {
- if (res != ref_res || var != ref_var) error = 1;
- errdepth = depth;
- }
- }
- }
- }
- }
-
- EXPECT_EQ(0, error) << "Error: CDEFFindDirTest, SIMD and C mismatch."
- << std::endl
- << "return: " << res << " : " << ref_res << std::endl
- << "var: " << var << " : " << ref_var << std::endl
- << "depth: " << errdepth << std::endl
- << std::endl;
-}
-
-void test_finddir_speed(int (*finddir)(const uint16_t *img, int stride,
- int32_t *var, int coeff_shift),
- int (*ref_finddir)(const uint16_t *img, int stride,
- int32_t *var, int coeff_shift)) {
- aom_usec_timer ref_timer;
- aom_usec_timer timer;
-
- aom_usec_timer_start(&ref_timer);
- test_finddir(ref_finddir, ref_finddir);
- aom_usec_timer_mark(&ref_timer);
- int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
-
- aom_usec_timer_start(&timer);
- test_finddir(finddir, finddir);
- aom_usec_timer_mark(&timer);
- int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
-
- EXPECT_GT(ref_elapsed_time, elapsed_time)
- << "Error: CDEFFindDirSpeedTest, SIMD slower than C." << std::endl
- << "C time: " << ref_elapsed_time << " us" << std::endl
- << "SIMD time: " << elapsed_time << " us" << std::endl;
-}
-
-TEST_P(CDEFBlockTest, TestSIMDNoMismatch) {
- test_cdef(bsize, 1, cdef, ref_cdef, boundary, depth);
-}
-
-TEST_P(CDEFSpeedTest, DISABLED_TestSpeed) {
- test_cdef_speed(bsize, 4, cdef, ref_cdef, boundary, depth);
-}
-
-TEST_P(CDEFFindDirTest, TestSIMDNoMismatch) {
- test_finddir(finddir, ref_finddir);
-}
-
-TEST_P(CDEFFindDirSpeedTest, DISABLED_TestSpeed) {
- test_finddir_speed(finddir, ref_finddir);
-}
-
-using ::testing::make_tuple;
-
-// VS compiling for 32 bit targets does not support vector types in
-// structs as arguments, which makes the v256 type of the intrinsics
-// hard to support, so optimizations for this target are disabled.
-#if defined(_WIN64) || !defined(_MSC_VER) || defined(__clang__)
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(
- SSE2, CDEFBlockTest,
- ::testing::Combine(::testing::Values(&cdef_filter_block_sse2),
- ::testing::Values(&cdef_filter_block_c),
- ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
- BLOCK_8X8),
- ::testing::Range(0, 16), ::testing::Range(8, 13, 2)));
-INSTANTIATE_TEST_CASE_P(SSE2, CDEFFindDirTest,
- ::testing::Values(make_tuple(&cdef_find_dir_sse2,
- &cdef_find_dir_c)));
-#endif
-#if HAVE_SSSE3
-INSTANTIATE_TEST_CASE_P(
- SSSE3, CDEFBlockTest,
- ::testing::Combine(::testing::Values(&cdef_filter_block_ssse3),
- ::testing::Values(&cdef_filter_block_c),
- ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
- BLOCK_8X8),
- ::testing::Range(0, 16), ::testing::Range(8, 13, 2)));
-INSTANTIATE_TEST_CASE_P(SSSE3, CDEFFindDirTest,
- ::testing::Values(make_tuple(&cdef_find_dir_ssse3,
- &cdef_find_dir_c)));
-#endif
-
-#if HAVE_SSE4_1
-INSTANTIATE_TEST_CASE_P(
- SSE4_1, CDEFBlockTest,
- ::testing::Combine(::testing::Values(&cdef_filter_block_sse4_1),
- ::testing::Values(&cdef_filter_block_c),
- ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
- BLOCK_8X8),
- ::testing::Range(0, 16), ::testing::Range(8, 13, 2)));
-INSTANTIATE_TEST_CASE_P(SSE4_1, CDEFFindDirTest,
- ::testing::Values(make_tuple(&cdef_find_dir_sse4_1,
- &cdef_find_dir_c)));
-#endif
-
-#if HAVE_AVX2
-INSTANTIATE_TEST_CASE_P(
- AVX2, CDEFBlockTest,
- ::testing::Combine(::testing::Values(&cdef_filter_block_avx2),
- ::testing::Values(&cdef_filter_block_c),
- ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
- BLOCK_8X8),
- ::testing::Range(0, 16), ::testing::Range(8, 13, 2)));
-INSTANTIATE_TEST_CASE_P(AVX2, CDEFFindDirTest,
- ::testing::Values(make_tuple(&cdef_find_dir_avx2,
- &cdef_find_dir_c)));
-#endif
-
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(
- NEON, CDEFBlockTest,
- ::testing::Combine(::testing::Values(&cdef_filter_block_neon),
- ::testing::Values(&cdef_filter_block_c),
- ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
- BLOCK_8X8),
- ::testing::Range(0, 16), ::testing::Range(8, 13, 2)));
-INSTANTIATE_TEST_CASE_P(NEON, CDEFFindDirTest,
- ::testing::Values(make_tuple(&cdef_find_dir_neon,
- &cdef_find_dir_c)));
-#endif
-
-// Test speed for all supported architectures
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(
- SSE2, CDEFSpeedTest,
- ::testing::Combine(::testing::Values(&cdef_filter_block_sse2),
- ::testing::Values(&cdef_filter_block_c),
- ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
- BLOCK_8X8),
- ::testing::Range(0, 16), ::testing::Range(8, 13, 2)));
-INSTANTIATE_TEST_CASE_P(SSE2, CDEFFindDirSpeedTest,
- ::testing::Values(make_tuple(&cdef_find_dir_sse2,
- &cdef_find_dir_c)));
-#endif
-
-#if HAVE_SSSE3
-INSTANTIATE_TEST_CASE_P(
- SSSE3, CDEFSpeedTest,
- ::testing::Combine(::testing::Values(&cdef_filter_block_ssse3),
- ::testing::Values(&cdef_filter_block_c),
- ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
- BLOCK_8X8),
- ::testing::Range(0, 16), ::testing::Range(8, 13, 2)));
-INSTANTIATE_TEST_CASE_P(SSSE3, CDEFFindDirSpeedTest,
- ::testing::Values(make_tuple(&cdef_find_dir_ssse3,
- &cdef_find_dir_c)));
-#endif
-
-#if HAVE_SSE4_1
-INSTANTIATE_TEST_CASE_P(
- SSE4_1, CDEFSpeedTest,
- ::testing::Combine(::testing::Values(&cdef_filter_block_sse4_1),
- ::testing::Values(&cdef_filter_block_c),
- ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
- BLOCK_8X8),
- ::testing::Range(0, 16), ::testing::Range(8, 13, 2)));
-INSTANTIATE_TEST_CASE_P(SSE4_1, CDEFFindDirSpeedTest,
- ::testing::Values(make_tuple(&cdef_find_dir_sse4_1,
- &cdef_find_dir_c)));
-#endif
-
-#if HAVE_AVX2
-INSTANTIATE_TEST_CASE_P(
- AVX2, CDEFSpeedTest,
- ::testing::Combine(::testing::Values(&cdef_filter_block_avx2),
- ::testing::Values(&cdef_filter_block_c),
- ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
- BLOCK_8X8),
- ::testing::Range(0, 16), ::testing::Range(8, 13, 2)));
-INSTANTIATE_TEST_CASE_P(AVX2, CDEFFindDirSpeedTest,
- ::testing::Values(make_tuple(&cdef_find_dir_avx2,
- &cdef_find_dir_c)));
-#endif
-
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(
- NEON, CDEFSpeedTest,
- ::testing::Combine(::testing::Values(&cdef_filter_block_neon),
- ::testing::Values(&cdef_filter_block_c),
- ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
- BLOCK_8X8),
- ::testing::Range(0, 16), ::testing::Range(8, 13, 2)));
-INSTANTIATE_TEST_CASE_P(NEON, CDEFFindDirSpeedTest,
- ::testing::Values(make_tuple(&cdef_find_dir_neon,
- &cdef_find_dir_c)));
-#endif
-
-#endif // defined(_WIN64) || !defined(_MSC_VER)
-} // namespace
diff --git a/third_party/aom/test/cfl_test.cc b/third_party/aom/test/cfl_test.cc
deleted file mode 100644
index e4d438d6a..000000000
--- a/third_party/aom/test/cfl_test.cc
+++ /dev/null
@@ -1,567 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "config/av1_rtcd.h"
-
-#include "aom_ports/aom_timer.h"
-#include "test/util.h"
-#include "test/acm_random.h"
-
-using ::testing::make_tuple;
-
-using libaom_test::ACMRandom;
-
-#define NUM_ITERATIONS (100)
-#define NUM_ITERATIONS_SPEED (INT16_MAX)
-
-#define ALL_CFL_TX_SIZES(function) \
- make_tuple(TX_4X4, &function), make_tuple(TX_4X8, &function), \
- make_tuple(TX_4X16, &function), make_tuple(TX_8X4, &function), \
- make_tuple(TX_8X8, &function), make_tuple(TX_8X16, &function), \
- make_tuple(TX_8X32, &function), make_tuple(TX_16X4, &function), \
- make_tuple(TX_16X8, &function), make_tuple(TX_16X16, &function), \
- make_tuple(TX_16X32, &function), make_tuple(TX_32X8, &function), \
- make_tuple(TX_32X16, &function), make_tuple(TX_32X32, &function)
-
-#define ALL_CFL_TX_SIZES_SUBSAMPLE(fun420, fun422, fun444) \
- make_tuple(TX_4X4, &fun420, &fun422, &fun444), \
- make_tuple(TX_4X8, &fun420, &fun422, &fun444), \
- make_tuple(TX_4X16, &fun420, &fun422, &fun444), \
- make_tuple(TX_8X4, &fun420, &fun422, &fun444), \
- make_tuple(TX_8X8, &fun420, &fun422, &fun444), \
- make_tuple(TX_8X16, &fun420, &fun422, &fun444), \
- make_tuple(TX_8X32, &fun420, &fun422, &fun444), \
- make_tuple(TX_16X4, &fun420, &fun422, &fun444), \
- make_tuple(TX_16X8, &fun420, &fun422, &fun444), \
- make_tuple(TX_16X16, &fun420, &fun422, &fun444), \
- make_tuple(TX_16X32, &fun420, &fun422, &fun444), \
- make_tuple(TX_32X8, &fun420, &fun422, &fun444), \
- make_tuple(TX_32X16, &fun420, &fun422, &fun444), \
- make_tuple(TX_32X32, &fun420, &fun422, &fun444)
-
-namespace {
-
-template <typename A>
-static void assert_eq(const A *a, const A *b, int width, int height) {
- for (int j = 0; j < height; j++) {
- for (int i = 0; i < width; i++) {
- ASSERT_EQ(a[j * CFL_BUF_LINE + i], b[j * CFL_BUF_LINE + i]);
- }
- }
-}
-
-static void assertFaster(int ref_elapsed_time, int elapsed_time) {
- EXPECT_GT(ref_elapsed_time, elapsed_time)
- << "Error: CFLSubtractSpeedTest, SIMD slower than C." << std::endl
- << "C time: " << ref_elapsed_time << " us" << std::endl
- << "SIMD time: " << elapsed_time << " us" << std::endl;
-}
-
-static void printSpeed(int ref_elapsed_time, int elapsed_time, int width,
- int height) {
- std::cout.precision(2);
- std::cout << "[ ] " << width << "x" << height
- << ": C time = " << ref_elapsed_time
- << " us, SIMD time = " << elapsed_time << " us"
- << " (~" << ref_elapsed_time / (double)elapsed_time << "x) "
- << std::endl;
-}
-
-class CFLTest {
- public:
- virtual ~CFLTest() {}
- void init(TX_SIZE tx) {
- tx_size = tx;
- width = tx_size_wide[tx_size];
- height = tx_size_high[tx_size];
- rnd(ACMRandom::DeterministicSeed());
- }
-
- protected:
- TX_SIZE tx_size;
- int width;
- int height;
- ACMRandom rnd;
-};
-
-template <typename I>
-class CFLTestWithData : public CFLTest {
- public:
- virtual ~CFLTestWithData() {}
-
- protected:
- I data[CFL_BUF_SQUARE];
- I data_ref[CFL_BUF_SQUARE];
- void randData(I (ACMRandom::*random)()) {
- for (int j = 0; j < this->height; j++) {
- for (int i = 0; i < this->width; i++) {
- const I d = (this->rnd.*random)();
- data[j * CFL_BUF_LINE + i] = d;
- data_ref[j * CFL_BUF_LINE + i] = d;
- }
- }
- }
-};
-
-template <typename I>
-class CFLTestWithAlignedData : public CFLTest {
- public:
- CFLTestWithAlignedData() {
- chroma_pels_ref =
- reinterpret_cast<I *>(aom_memalign(32, sizeof(I) * CFL_BUF_SQUARE));
- chroma_pels =
- reinterpret_cast<I *>(aom_memalign(32, sizeof(I) * CFL_BUF_SQUARE));
- sub_luma_pels_ref = reinterpret_cast<int16_t *>(
- aom_memalign(32, sizeof(int16_t) * CFL_BUF_SQUARE));
- sub_luma_pels = reinterpret_cast<int16_t *>(
- aom_memalign(32, sizeof(int16_t) * CFL_BUF_SQUARE));
- memset(chroma_pels_ref, 0, sizeof(I) * CFL_BUF_SQUARE);
- memset(chroma_pels, 0, sizeof(I) * CFL_BUF_SQUARE);
- memset(sub_luma_pels_ref, 0, sizeof(int16_t) * CFL_BUF_SQUARE);
- memset(sub_luma_pels, 0, sizeof(int16_t) * CFL_BUF_SQUARE);
- }
- ~CFLTestWithAlignedData() {
- aom_free(chroma_pels_ref);
- aom_free(sub_luma_pels_ref);
- aom_free(chroma_pels);
- aom_free(sub_luma_pels);
- }
-
- protected:
- I *chroma_pels_ref;
- I *chroma_pels;
- int16_t *sub_luma_pels_ref;
- int16_t *sub_luma_pels;
- int alpha_q3;
- I dc;
- void randData(int bd) {
- alpha_q3 = this->rnd(33) - 16;
- dc = this->rnd(1 << bd);
- for (int j = 0; j < this->height; j++) {
- for (int i = 0; i < this->width; i++) {
- chroma_pels[j * CFL_BUF_LINE + i] = dc;
- chroma_pels_ref[j * CFL_BUF_LINE + i] = dc;
- sub_luma_pels_ref[j * CFL_BUF_LINE + i] =
- sub_luma_pels[j * CFL_BUF_LINE + i] = this->rnd(1 << (bd + 3));
- }
- }
- }
-};
-
-typedef cfl_subtract_average_fn (*sub_avg_fn)(TX_SIZE tx_size);
-typedef ::testing::tuple<TX_SIZE, sub_avg_fn> sub_avg_param;
-class CFLSubAvgTest : public ::testing::TestWithParam<sub_avg_param>,
- public CFLTestWithData<int16_t> {
- public:
- virtual void SetUp() {
- CFLTest::init(::testing::get<0>(this->GetParam()));
- sub_avg = ::testing::get<1>(this->GetParam())(tx_size);
- sub_avg_ref = get_subtract_average_fn_c(tx_size);
- }
- virtual ~CFLSubAvgTest() {}
-
- protected:
- cfl_subtract_average_fn sub_avg;
- cfl_subtract_average_fn sub_avg_ref;
-};
-
-TEST_P(CFLSubAvgTest, SubAvgTest) {
- for (int it = 0; it < NUM_ITERATIONS; it++) {
- randData(&ACMRandom::Rand15Signed);
- sub_avg((uint16_t *)data, data);
- sub_avg_ref((uint16_t *)data_ref, data_ref);
- assert_eq<int16_t>(data, data_ref, width, height);
- }
-}
-
-TEST_P(CFLSubAvgTest, DISABLED_SubAvgSpeedTest) {
- aom_usec_timer ref_timer;
- aom_usec_timer timer;
- randData(&ACMRandom::Rand15Signed);
- aom_usec_timer_start(&ref_timer);
- for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
- sub_avg_ref((uint16_t *)data_ref, data_ref);
- }
- aom_usec_timer_mark(&ref_timer);
- int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
- aom_usec_timer_start(&timer);
- for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
- sub_avg((uint16_t *)data, data);
- }
- aom_usec_timer_mark(&timer);
- int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
- printSpeed(ref_elapsed_time, elapsed_time, width, height);
- assertFaster(ref_elapsed_time, elapsed_time);
-}
-
-template <typename S, typename T, typename I>
-class CFLSubsampleTest : public ::testing::TestWithParam<S>,
- public CFLTestWithData<I> {
- public:
- virtual void SetUp() {
- CFLTest::init(::testing::get<0>(this->GetParam()));
- fun_420 = ::testing::get<1>(this->GetParam())(this->tx_size);
- fun_422 = ::testing::get<2>(this->GetParam())(this->tx_size);
- fun_444 = ::testing::get<3>(this->GetParam())(this->tx_size);
- }
-
- protected:
- T fun_420;
- T fun_422;
- T fun_444;
- T fun_420_ref;
- T fun_422_ref;
- T fun_444_ref;
-
- void subsampleTest(T fun, T fun_ref, int sub_width, int sub_height,
- I (ACMRandom::*random)()) {
- uint16_t sub_luma_pels[CFL_BUF_SQUARE];
- uint16_t sub_luma_pels_ref[CFL_BUF_SQUARE];
-
- for (int it = 0; it < NUM_ITERATIONS; it++) {
- CFLTestWithData<I>::randData(random);
- fun(this->data, CFL_BUF_LINE, sub_luma_pels);
- fun_ref(this->data_ref, CFL_BUF_LINE, sub_luma_pels_ref);
- assert_eq<uint16_t>(sub_luma_pels, sub_luma_pels_ref, sub_width,
- sub_height);
- }
- }
-
- void subsampleSpeedTest(T fun, T fun_ref, I (ACMRandom::*random)()) {
- uint16_t sub_luma_pels[CFL_BUF_SQUARE];
- uint16_t sub_luma_pels_ref[CFL_BUF_SQUARE];
- aom_usec_timer ref_timer;
- aom_usec_timer timer;
-
- CFLTestWithData<I>::randData(random);
- aom_usec_timer_start(&ref_timer);
- for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
- fun_ref(this->data_ref, CFL_BUF_LINE, sub_luma_pels);
- }
- aom_usec_timer_mark(&ref_timer);
- int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
- aom_usec_timer_start(&timer);
- for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
- fun(this->data, CFL_BUF_LINE, sub_luma_pels_ref);
- }
- aom_usec_timer_mark(&timer);
- int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
- printSpeed(ref_elapsed_time, elapsed_time, this->width, this->height);
- assertFaster(ref_elapsed_time, elapsed_time);
- }
-};
-
-typedef cfl_subsample_lbd_fn (*get_subsample_lbd_fn)(TX_SIZE tx_size);
-typedef ::testing::tuple<TX_SIZE, get_subsample_lbd_fn, get_subsample_lbd_fn,
- get_subsample_lbd_fn>
- subsample_lbd_param;
-class CFLSubsampleLBDTest
- : public CFLSubsampleTest<subsample_lbd_param, cfl_subsample_lbd_fn,
- uint8_t> {
- public:
- virtual ~CFLSubsampleLBDTest() {}
- virtual void SetUp() {
- CFLSubsampleTest::SetUp();
- fun_420_ref = cfl_get_luma_subsampling_420_lbd_c(tx_size);
- fun_422_ref = cfl_get_luma_subsampling_422_lbd_c(tx_size);
- fun_444_ref = cfl_get_luma_subsampling_444_lbd_c(tx_size);
- }
-};
-
-TEST_P(CFLSubsampleLBDTest, SubsampleLBD420Test) {
- subsampleTest(fun_420, fun_420_ref, width >> 1, height >> 1,
- &ACMRandom::Rand8);
-}
-
-TEST_P(CFLSubsampleLBDTest, DISABLED_SubsampleLBD420SpeedTest) {
- subsampleSpeedTest(fun_420, fun_420_ref, &ACMRandom::Rand8);
-}
-
-TEST_P(CFLSubsampleLBDTest, SubsampleLBD422Test) {
- subsampleTest(fun_422, fun_422_ref, width >> 1, height, &ACMRandom::Rand8);
-}
-
-TEST_P(CFLSubsampleLBDTest, DISABLED_SubsampleLBD422SpeedTest) {
- subsampleSpeedTest(fun_422, fun_422_ref, &ACMRandom::Rand8);
-}
-
-TEST_P(CFLSubsampleLBDTest, SubsampleLBD444Test) {
- subsampleTest(fun_444, fun_444_ref, width, height, &ACMRandom::Rand8);
-}
-
-TEST_P(CFLSubsampleLBDTest, DISABLED_SubsampleLBD444SpeedTest) {
- subsampleSpeedTest(fun_444, fun_444_ref, &ACMRandom::Rand8);
-}
-
-typedef cfl_subsample_hbd_fn (*get_subsample_hbd_fn)(TX_SIZE tx_size);
-typedef ::testing::tuple<TX_SIZE, get_subsample_hbd_fn, get_subsample_hbd_fn,
- get_subsample_hbd_fn>
- subsample_hbd_param;
-class CFLSubsampleHBDTest
- : public CFLSubsampleTest<subsample_hbd_param, cfl_subsample_hbd_fn,
- uint16_t> {
- public:
- virtual ~CFLSubsampleHBDTest() {}
- virtual void SetUp() {
- CFLSubsampleTest::SetUp();
- fun_420_ref = cfl_get_luma_subsampling_420_hbd_c(tx_size);
- fun_422_ref = cfl_get_luma_subsampling_422_hbd_c(tx_size);
- fun_444_ref = cfl_get_luma_subsampling_444_hbd_c(tx_size);
- }
-};
-
-TEST_P(CFLSubsampleHBDTest, SubsampleHBD420Test) {
- subsampleTest(fun_420, fun_420_ref, width >> 1, height >> 1,
- &ACMRandom::Rand12);
-}
-
-TEST_P(CFLSubsampleHBDTest, DISABLED_SubsampleHBD420SpeedTest) {
- subsampleSpeedTest(fun_420, fun_420_ref, &ACMRandom::Rand12);
-}
-
-TEST_P(CFLSubsampleHBDTest, SubsampleHBD422Test) {
- subsampleTest(fun_422, fun_422_ref, width >> 1, height, &ACMRandom::Rand12);
-}
-
-TEST_P(CFLSubsampleHBDTest, DISABLED_SubsampleHBD422SpeedTest) {
- subsampleSpeedTest(fun_422, fun_422_ref, &ACMRandom::Rand12);
-}
-
-TEST_P(CFLSubsampleHBDTest, SubsampleHBD444Test) {
- subsampleTest(fun_444, fun_444_ref, width, height, &ACMRandom::Rand12);
-}
-
-TEST_P(CFLSubsampleHBDTest, DISABLED_SubsampleHBD444SpeedTest) {
- subsampleSpeedTest(fun_444, fun_444_ref, &ACMRandom::Rand12);
-}
-
-typedef cfl_predict_lbd_fn (*get_predict_fn)(TX_SIZE tx_size);
-typedef ::testing::tuple<TX_SIZE, get_predict_fn> predict_param;
-class CFLPredictTest : public ::testing::TestWithParam<predict_param>,
- public CFLTestWithAlignedData<uint8_t> {
- public:
- virtual void SetUp() {
- CFLTest::init(::testing::get<0>(this->GetParam()));
- predict = ::testing::get<1>(this->GetParam())(tx_size);
- predict_ref = get_predict_lbd_fn_c(tx_size);
- }
- virtual ~CFLPredictTest() {}
-
- protected:
- cfl_predict_lbd_fn predict;
- cfl_predict_lbd_fn predict_ref;
-};
-
-TEST_P(CFLPredictTest, PredictTest) {
- for (int it = 0; it < NUM_ITERATIONS; it++) {
- randData(8);
- predict(sub_luma_pels, chroma_pels, CFL_BUF_LINE, alpha_q3);
- predict_ref(sub_luma_pels_ref, chroma_pels_ref, CFL_BUF_LINE, alpha_q3);
- assert_eq<uint8_t>(chroma_pels, chroma_pels_ref, width, height);
- }
-}
-TEST_P(CFLPredictTest, DISABLED_PredictSpeedTest) {
- aom_usec_timer ref_timer;
- aom_usec_timer timer;
- randData(8);
- aom_usec_timer_start(&ref_timer);
- for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
- predict_ref(sub_luma_pels_ref, chroma_pels_ref, CFL_BUF_LINE, alpha_q3);
- }
- aom_usec_timer_mark(&ref_timer);
- int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
-
- aom_usec_timer_start(&timer);
- for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
- predict(sub_luma_pels, chroma_pels, CFL_BUF_LINE, alpha_q3);
- }
- aom_usec_timer_mark(&timer);
- int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
- printSpeed(ref_elapsed_time, elapsed_time, width, height);
- assertFaster(ref_elapsed_time, elapsed_time);
-}
-
-typedef cfl_predict_hbd_fn (*get_predict_fn_hbd)(TX_SIZE tx_size);
-typedef ::testing::tuple<TX_SIZE, get_predict_fn_hbd> predict_param_hbd;
-class CFLPredictHBDTest : public ::testing::TestWithParam<predict_param_hbd>,
- public CFLTestWithAlignedData<uint16_t> {
- public:
- virtual void SetUp() {
- CFLTest::init(::testing::get<0>(this->GetParam()));
- predict = ::testing::get<1>(this->GetParam())(tx_size);
- predict_ref = get_predict_hbd_fn_c(tx_size);
- }
- virtual ~CFLPredictHBDTest() {}
-
- protected:
- cfl_predict_hbd_fn predict;
- cfl_predict_hbd_fn predict_ref;
-};
-
-TEST_P(CFLPredictHBDTest, PredictHBDTest) {
- int bd = 12;
- for (int it = 0; it < NUM_ITERATIONS; it++) {
- randData(bd);
- predict(sub_luma_pels, chroma_pels, CFL_BUF_LINE, alpha_q3, bd);
- predict_ref(sub_luma_pels_ref, chroma_pels_ref, CFL_BUF_LINE, alpha_q3, bd);
- assert_eq<uint16_t>(chroma_pels, chroma_pels_ref, width, height);
- }
-}
-TEST_P(CFLPredictHBDTest, DISABLED_PredictHBDSpeedTest) {
- aom_usec_timer ref_timer;
- aom_usec_timer timer;
- const int bd = 12;
- randData(bd);
- aom_usec_timer_start(&ref_timer);
- for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
- predict_ref(sub_luma_pels_ref, chroma_pels_ref, CFL_BUF_LINE, alpha_q3, bd);
- }
- aom_usec_timer_mark(&ref_timer);
- int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
-
- aom_usec_timer_start(&timer);
- for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
- predict(sub_luma_pels, chroma_pels, CFL_BUF_LINE, alpha_q3, bd);
- }
- aom_usec_timer_mark(&timer);
- int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
- printSpeed(ref_elapsed_time, elapsed_time, width, height);
- assertFaster(ref_elapsed_time, elapsed_time);
-}
-
-#if HAVE_SSE2
-const sub_avg_param sub_avg_sizes_sse2[] = { ALL_CFL_TX_SIZES(
- get_subtract_average_fn_sse2) };
-
-INSTANTIATE_TEST_CASE_P(SSE2, CFLSubAvgTest,
- ::testing::ValuesIn(sub_avg_sizes_sse2));
-
-#endif
-
-#if HAVE_SSSE3
-const subsample_lbd_param subsample_lbd_sizes_ssse3[] = {
- ALL_CFL_TX_SIZES_SUBSAMPLE(cfl_get_luma_subsampling_420_lbd_ssse3,
- cfl_get_luma_subsampling_422_lbd_ssse3,
- cfl_get_luma_subsampling_444_lbd_ssse3)
-};
-
-const subsample_hbd_param subsample_hbd_sizes_ssse3[] = {
- ALL_CFL_TX_SIZES_SUBSAMPLE(cfl_get_luma_subsampling_420_hbd_ssse3,
- cfl_get_luma_subsampling_422_hbd_ssse3,
- cfl_get_luma_subsampling_444_hbd_ssse3)
-};
-
-const predict_param predict_sizes_ssse3[] = { ALL_CFL_TX_SIZES(
- get_predict_lbd_fn_ssse3) };
-
-const predict_param_hbd predict_sizes_hbd_ssse3[] = { ALL_CFL_TX_SIZES(
- get_predict_hbd_fn_ssse3) };
-
-INSTANTIATE_TEST_CASE_P(SSSE3, CFLSubsampleLBDTest,
- ::testing::ValuesIn(subsample_lbd_sizes_ssse3));
-
-INSTANTIATE_TEST_CASE_P(SSSE3, CFLSubsampleHBDTest,
- ::testing::ValuesIn(subsample_hbd_sizes_ssse3));
-
-INSTANTIATE_TEST_CASE_P(SSSE3, CFLPredictTest,
- ::testing::ValuesIn(predict_sizes_ssse3));
-
-INSTANTIATE_TEST_CASE_P(SSSE3, CFLPredictHBDTest,
- ::testing::ValuesIn(predict_sizes_hbd_ssse3));
-#endif
-
-#if HAVE_AVX2
-const sub_avg_param sub_avg_sizes_avx2[] = { ALL_CFL_TX_SIZES(
- get_subtract_average_fn_avx2) };
-
-const subsample_lbd_param subsample_lbd_sizes_avx2[] = {
- ALL_CFL_TX_SIZES_SUBSAMPLE(cfl_get_luma_subsampling_420_lbd_avx2,
- cfl_get_luma_subsampling_422_lbd_avx2,
- cfl_get_luma_subsampling_444_lbd_avx2)
-};
-
-const subsample_hbd_param subsample_hbd_sizes_avx2[] = {
- ALL_CFL_TX_SIZES_SUBSAMPLE(cfl_get_luma_subsampling_420_hbd_avx2,
- cfl_get_luma_subsampling_422_hbd_avx2,
- cfl_get_luma_subsampling_444_hbd_avx2)
-};
-
-const predict_param predict_sizes_avx2[] = { ALL_CFL_TX_SIZES(
- get_predict_lbd_fn_avx2) };
-
-const predict_param_hbd predict_sizes_hbd_avx2[] = { ALL_CFL_TX_SIZES(
- get_predict_hbd_fn_avx2) };
-
-INSTANTIATE_TEST_CASE_P(AVX2, CFLSubAvgTest,
- ::testing::ValuesIn(sub_avg_sizes_avx2));
-
-INSTANTIATE_TEST_CASE_P(AVX2, CFLSubsampleLBDTest,
- ::testing::ValuesIn(subsample_lbd_sizes_avx2));
-
-INSTANTIATE_TEST_CASE_P(AVX2, CFLSubsampleHBDTest,
- ::testing::ValuesIn(subsample_hbd_sizes_avx2));
-
-INSTANTIATE_TEST_CASE_P(AVX2, CFLPredictTest,
- ::testing::ValuesIn(predict_sizes_avx2));
-
-INSTANTIATE_TEST_CASE_P(AVX2, CFLPredictHBDTest,
- ::testing::ValuesIn(predict_sizes_hbd_avx2));
-#endif
-
-#if HAVE_NEON
-
-const sub_avg_param sub_avg_sizes_neon[] = { ALL_CFL_TX_SIZES(
- get_subtract_average_fn_neon) };
-
-const subsample_lbd_param subsample_lbd_sizes_neon[] = {
- ALL_CFL_TX_SIZES_SUBSAMPLE(cfl_get_luma_subsampling_420_lbd_neon,
- cfl_get_luma_subsampling_422_lbd_neon,
- cfl_get_luma_subsampling_444_lbd_neon)
-};
-
-const subsample_hbd_param subsample_hbd_sizes_neon[] = {
- ALL_CFL_TX_SIZES_SUBSAMPLE(cfl_get_luma_subsampling_420_hbd_neon,
- cfl_get_luma_subsampling_422_hbd_neon,
- cfl_get_luma_subsampling_444_hbd_neon)
-};
-
-const predict_param predict_sizes_neon[] = { ALL_CFL_TX_SIZES(
- get_predict_lbd_fn_neon) };
-
-const predict_param_hbd predict_sizes_hbd_neon[] = { ALL_CFL_TX_SIZES(
- get_predict_hbd_fn_neon) };
-
-INSTANTIATE_TEST_CASE_P(NEON, CFLSubAvgTest,
- ::testing::ValuesIn(sub_avg_sizes_neon));
-
-INSTANTIATE_TEST_CASE_P(NEON, CFLSubsampleLBDTest,
- ::testing::ValuesIn(subsample_lbd_sizes_neon));
-
-INSTANTIATE_TEST_CASE_P(NEON, CFLSubsampleHBDTest,
- ::testing::ValuesIn(subsample_hbd_sizes_neon));
-
-INSTANTIATE_TEST_CASE_P(NEON, CFLPredictTest,
- ::testing::ValuesIn(predict_sizes_neon));
-
-INSTANTIATE_TEST_CASE_P(NEON, CFLPredictHBDTest,
- ::testing::ValuesIn(predict_sizes_hbd_neon));
-#endif
-
-#if HAVE_VSX
-const sub_avg_param sub_avg_sizes_vsx[] = { ALL_CFL_TX_SIZES(
- get_subtract_average_fn_vsx) };
-
-INSTANTIATE_TEST_CASE_P(VSX, CFLSubAvgTest,
- ::testing::ValuesIn(sub_avg_sizes_vsx));
-#endif
-} // namespace
diff --git a/third_party/aom/test/clear_system_state.h b/third_party/aom/test/clear_system_state.h
deleted file mode 100644
index d38ff5dd5..000000000
--- a/third_party/aom/test/clear_system_state.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#ifndef AOM_TEST_CLEAR_SYSTEM_STATE_H_
-#define AOM_TEST_CLEAR_SYSTEM_STATE_H_
-
-#include "config/aom_config.h"
-
-#if ARCH_X86 || ARCH_X86_64
-#include "aom_ports/x86.h"
-#endif
-
-namespace libaom_test {
-
-// Reset system to a known state. This function should be used for all non-API
-// test cases.
-inline void ClearSystemState() {
-#if ARCH_X86 || ARCH_X86_64
- aom_reset_mmx_state();
-#endif
-}
-
-} // namespace libaom_test
-#endif // AOM_TEST_CLEAR_SYSTEM_STATE_H_
diff --git a/third_party/aom/test/codec_factory.h b/third_party/aom/test/codec_factory.h
deleted file mode 100644
index dd99110ee..000000000
--- a/third_party/aom/test/codec_factory.h
+++ /dev/null
@@ -1,170 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#ifndef AOM_TEST_CODEC_FACTORY_H_
-#define AOM_TEST_CODEC_FACTORY_H_
-
-#include "config/aom_config.h"
-
-#include "aom/aom_decoder.h"
-#include "aom/aom_encoder.h"
-#if CONFIG_AV1_ENCODER
-#include "aom/aomcx.h"
-#endif
-#if CONFIG_AV1_DECODER
-#include "aom/aomdx.h"
-#endif
-
-#include "test/decode_test_driver.h"
-#include "test/encode_test_driver.h"
-namespace libaom_test {
-
-const int kCodecFactoryParam = 0;
-
-class CodecFactory {
- public:
- CodecFactory() {}
-
- virtual ~CodecFactory() {}
-
- virtual Decoder *CreateDecoder(aom_codec_dec_cfg_t cfg) const = 0;
-
- virtual Decoder *CreateDecoder(aom_codec_dec_cfg_t cfg,
- const aom_codec_flags_t flags) const = 0;
-
- virtual Encoder *CreateEncoder(aom_codec_enc_cfg_t cfg,
- const unsigned long init_flags,
- TwopassStatsStore *stats) const = 0;
-
- virtual aom_codec_err_t DefaultEncoderConfig(aom_codec_enc_cfg_t *cfg,
- int usage) const = 0;
-};
-
-/* Provide CodecTestWith<n>Params classes for a variable number of parameters
- * to avoid having to include a pointer to the CodecFactory in every test
- * definition.
- */
-template <class T1>
-class CodecTestWithParam
- : public ::testing::TestWithParam<
- ::testing::tuple<const libaom_test::CodecFactory *, T1> > {};
-
-template <class T1, class T2>
-class CodecTestWith2Params
- : public ::testing::TestWithParam<
- ::testing::tuple<const libaom_test::CodecFactory *, T1, T2> > {};
-
-template <class T1, class T2, class T3>
-class CodecTestWith3Params
- : public ::testing::TestWithParam<
- ::testing::tuple<const libaom_test::CodecFactory *, T1, T2, T3> > {};
-
-template <class T1, class T2, class T3, class T4>
-class CodecTestWith4Params
- : public ::testing::TestWithParam< ::testing::tuple<
- const libaom_test::CodecFactory *, T1, T2, T3, T4> > {};
-
-template <class T1, class T2, class T3, class T4, class T5>
-class CodecTestWith5Params
- : public ::testing::TestWithParam< ::testing::tuple<
- const libaom_test::CodecFactory *, T1, T2, T3, T4, T5> > {};
-
-/*
- * AV1 Codec Definitions
- */
-class AV1Decoder : public Decoder {
- public:
- explicit AV1Decoder(aom_codec_dec_cfg_t cfg) : Decoder(cfg) {}
-
- AV1Decoder(aom_codec_dec_cfg_t cfg, const aom_codec_flags_t flag)
- : Decoder(cfg, flag) {}
-
- protected:
- virtual aom_codec_iface_t *CodecInterface() const {
-#if CONFIG_AV1_DECODER
- return aom_codec_av1_dx();
-#else
- return NULL;
-#endif
- }
-};
-
-class AV1Encoder : public Encoder {
- public:
- AV1Encoder(aom_codec_enc_cfg_t cfg, const uint32_t init_flags,
- TwopassStatsStore *stats)
- : Encoder(cfg, init_flags, stats) {}
-
- protected:
- virtual aom_codec_iface_t *CodecInterface() const {
-#if CONFIG_AV1_ENCODER
- return aom_codec_av1_cx();
-#else
- return NULL;
-#endif
- }
-};
-
-class AV1CodecFactory : public CodecFactory {
- public:
- AV1CodecFactory() : CodecFactory() {}
-
- virtual Decoder *CreateDecoder(aom_codec_dec_cfg_t cfg) const {
- return CreateDecoder(cfg, 0);
- }
-
- virtual Decoder *CreateDecoder(aom_codec_dec_cfg_t cfg,
- const aom_codec_flags_t flags) const {
-#if CONFIG_AV1_DECODER
- return new AV1Decoder(cfg, flags);
-#else
- (void)cfg;
- (void)flags;
- return NULL;
-#endif
- }
-
- virtual Encoder *CreateEncoder(aom_codec_enc_cfg_t cfg,
- const unsigned long init_flags,
- TwopassStatsStore *stats) const {
-#if CONFIG_AV1_ENCODER
- return new AV1Encoder(cfg, init_flags, stats);
-#else
- (void)cfg;
- (void)init_flags;
- (void)stats;
- return NULL;
-#endif
- }
-
- virtual aom_codec_err_t DefaultEncoderConfig(aom_codec_enc_cfg_t *cfg,
- int usage) const {
-#if CONFIG_AV1_ENCODER
- return aom_codec_enc_config_default(aom_codec_av1_cx(), cfg, usage);
-#else
- (void)cfg;
- (void)usage;
- return AOM_CODEC_INCAPABLE;
-#endif
- }
-};
-
-const libaom_test::AV1CodecFactory kAV1;
-
-#define AV1_INSTANTIATE_TEST_CASE(test, ...) \
- INSTANTIATE_TEST_CASE_P( \
- AV1, test, \
- ::testing::Combine( \
- ::testing::Values(static_cast<const libaom_test::CodecFactory *>( \
- &libaom_test::kAV1)), \
- __VA_ARGS__))
-
-} // namespace libaom_test
-#endif // AOM_TEST_CODEC_FACTORY_H_
diff --git a/third_party/aom/test/coding_path_sync.cc b/third_party/aom/test/coding_path_sync.cc
deleted file mode 100644
index 6735236cc..000000000
--- a/third_party/aom/test/coding_path_sync.cc
+++ /dev/null
@@ -1,205 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <vector>
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-#include "test/acm_random.h"
-
-#include "config/aom_config.h"
-
-#include "aom_ports/mem.h" // ROUND_POWER_OF_TWO
-#include "aom/aomcx.h"
-#include "aom/aomdx.h"
-#include "aom/aom_encoder.h"
-#include "aom/aom_decoder.h"
-
-using libaom_test::ACMRandom;
-namespace {
-
-class CompressedSource {
- public:
- explicit CompressedSource(int seed) : rnd_(seed), frame_count_(0) {
- aom_codec_iface_t *algo = aom_codec_av1_cx();
-
- aom_codec_enc_cfg_t cfg;
- aom_codec_enc_config_default(algo, &cfg, 0);
-
- // force the quantizer, to reduce the sensitivity on encoding choices.
- // e.g, we don't want this test to break when the rate control is modified.
- {
- const int max_q = cfg.rc_max_quantizer;
- const int min_q = cfg.rc_min_quantizer;
- const int q = rnd_.PseudoUniform(max_q - min_q + 1) + min_q;
-
- cfg.rc_end_usage = AOM_Q;
- cfg.rc_max_quantizer = q;
- cfg.rc_min_quantizer = q;
- }
-
- // choose the picture size
- {
- width_ = rnd_.PseudoUniform(kWidth - 8) + 8;
- height_ = rnd_.PseudoUniform(kHeight - 8) + 8;
- }
-
- // choose the chroma subsampling
- {
- const aom_img_fmt_t fmts[] = {
- AOM_IMG_FMT_I420,
- AOM_IMG_FMT_I422,
- AOM_IMG_FMT_I444,
- };
-
- format_ = fmts[rnd_.PseudoUniform(NELEMENTS(fmts))];
- }
-
- cfg.g_w = width_;
- cfg.g_h = height_;
- cfg.g_lag_in_frames = 0;
- if (format_ == AOM_IMG_FMT_I420)
- cfg.g_profile = 0;
- else if (format_ == AOM_IMG_FMT_I444)
- cfg.g_profile = 1;
- else if (format_ == AOM_IMG_FMT_I422)
- cfg.g_profile = 2;
-
- aom_codec_enc_init(&enc_, algo, &cfg, 0);
- }
-
- ~CompressedSource() { aom_codec_destroy(&enc_); }
-
- const aom_codec_cx_pkt_t *ReadFrame() {
- uint8_t buf[kWidth * kHeight * 3] = { 0 };
-
- // render regular pattern
- const int period = rnd_.Rand8() % 32 + 1;
- const int phase = rnd_.Rand8() % period;
-
- const int val_a = rnd_.Rand8();
- const int val_b = rnd_.Rand8();
-
- for (int i = 0; i < (int)sizeof buf; ++i)
- buf[i] = (i + phase) % period < period / 2 ? val_a : val_b;
-
- aom_image_t img;
- aom_img_wrap(&img, format_, width_, height_, 0, buf);
- aom_codec_encode(&enc_, &img, frame_count_++, 1, 0);
-
- aom_codec_iter_t iter = NULL;
-
- const aom_codec_cx_pkt_t *pkt = NULL;
-
- do {
- pkt = aom_codec_get_cx_data(&enc_, &iter);
- } while (pkt && pkt->kind != AOM_CODEC_CX_FRAME_PKT);
-
- return pkt;
- }
-
- private:
- static const int kWidth = 128;
- static const int kHeight = 128;
-
- ACMRandom rnd_;
- aom_img_fmt_t format_;
- aom_codec_ctx_t enc_;
- int frame_count_;
- int width_, height_;
-};
-
-// lowers an aom_image_t to a easily comparable/printable form
-std::vector<int16_t> Serialize(const aom_image_t *img) {
- std::vector<int16_t> bytes;
- bytes.reserve(img->d_w * img->d_h * 3);
- for (int plane = 0; plane < 3; ++plane) {
- const int w = aom_img_plane_width(img, plane);
- const int h = aom_img_plane_height(img, plane);
-
- for (int r = 0; r < h; ++r) {
- for (int c = 0; c < w; ++c) {
- unsigned char *row = img->planes[plane] + r * img->stride[plane];
- if (img->fmt & AOM_IMG_FMT_HIGHBITDEPTH)
- bytes.push_back(row[c * 2]);
- else
- bytes.push_back(row[c]);
- }
- }
- }
-
- return bytes;
-}
-
-class Decoder {
- public:
- explicit Decoder(int allowLowbitdepth) {
- aom_codec_iface_t *algo = aom_codec_av1_dx();
-
- aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t();
- cfg.allow_lowbitdepth = allowLowbitdepth;
-
- aom_codec_dec_init(&dec_, algo, &cfg, 0);
- }
-
- ~Decoder() { aom_codec_destroy(&dec_); }
-
- std::vector<int16_t> decode(const aom_codec_cx_pkt_t *pkt) {
- aom_codec_decode(&dec_, static_cast<uint8_t *>(pkt->data.frame.buf),
- pkt->data.frame.sz, NULL);
-
- aom_codec_iter_t iter = NULL;
- return Serialize(aom_codec_get_frame(&dec_, &iter));
- }
-
- private:
- aom_codec_ctx_t dec_;
-};
-
-// Try to reveal a mismatch between LBD and HBD coding paths.
-TEST(CodingPathSync, SearchForHbdLbdMismatch) {
- const int count_tests = 10;
- for (int i = 0; i < count_tests; ++i) {
- Decoder dec_hbd(0);
- Decoder dec_lbd(1);
-
- CompressedSource enc(i);
-
- for (int k = 0; k < 3; ++k) {
- const aom_codec_cx_pkt_t *frame = enc.ReadFrame();
-
- std::vector<int16_t> lbd_yuv = dec_lbd.decode(frame);
- std::vector<int16_t> hbd_yuv = dec_hbd.decode(frame);
-
- ASSERT_EQ(lbd_yuv, hbd_yuv);
- }
- }
-}
-
-TEST(CodingPathSyncLarge, SearchForHbdLbdMismatchLarge) {
- const int count_tests = 100;
- const int seed = 1234;
- for (int i = 0; i < count_tests; ++i) {
- Decoder dec_hbd(0);
- Decoder dec_lbd(1);
-
- CompressedSource enc(seed + i);
-
- for (int k = 0; k < 5; ++k) {
- const aom_codec_cx_pkt_t *frame = enc.ReadFrame();
-
- std::vector<int16_t> lbd_yuv = dec_lbd.decode(frame);
- std::vector<int16_t> hbd_yuv = dec_hbd.decode(frame);
-
- ASSERT_EQ(lbd_yuv, hbd_yuv);
- }
- }
-}
-
-} // namespace
diff --git a/third_party/aom/test/comp_avg_pred_test.cc b/third_party/aom/test/comp_avg_pred_test.cc
deleted file mode 100644
index 9ad8973f0..000000000
--- a/third_party/aom/test/comp_avg_pred_test.cc
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "test/comp_avg_pred_test.h"
-
-using ::testing::make_tuple;
-using ::testing::tuple;
-using libaom_test::ACMRandom;
-using libaom_test::AV1JNTCOMPAVG::AV1HighBDJNTCOMPAVGTest;
-using libaom_test::AV1JNTCOMPAVG::AV1HighBDJNTCOMPAVGUPSAMPLEDTest;
-using libaom_test::AV1JNTCOMPAVG::AV1JNTCOMPAVGTest;
-using libaom_test::AV1JNTCOMPAVG::AV1JNTCOMPAVGUPSAMPLEDTest;
-
-namespace {
-
-TEST_P(AV1JNTCOMPAVGTest, DISABLED_Speed) { RunSpeedTest(GET_PARAM(0)); }
-
-TEST_P(AV1JNTCOMPAVGTest, CheckOutput) { RunCheckOutput(GET_PARAM(0)); }
-
-#if HAVE_SSSE3
-INSTANTIATE_TEST_CASE_P(
- SSSE3, AV1JNTCOMPAVGTest,
- libaom_test::AV1JNTCOMPAVG::BuildParams(aom_jnt_comp_avg_pred_ssse3));
-#endif
-
-TEST_P(AV1JNTCOMPAVGUPSAMPLEDTest, DISABLED_Speed) {
- RunSpeedTest(GET_PARAM(0));
-}
-
-TEST_P(AV1JNTCOMPAVGUPSAMPLEDTest, CheckOutput) {
- RunCheckOutput(GET_PARAM(0));
-}
-
-#if HAVE_SSSE3
-INSTANTIATE_TEST_CASE_P(SSSE3, AV1JNTCOMPAVGUPSAMPLEDTest,
- libaom_test::AV1JNTCOMPAVG::BuildParams(
- aom_jnt_comp_avg_upsampled_pred_ssse3));
-#endif
-
-TEST_P(AV1HighBDJNTCOMPAVGTest, DISABLED_Speed) { RunSpeedTest(GET_PARAM(1)); }
-
-TEST_P(AV1HighBDJNTCOMPAVGTest, CheckOutput) { RunCheckOutput(GET_PARAM(1)); }
-
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(SSE2, AV1HighBDJNTCOMPAVGTest,
- libaom_test::AV1JNTCOMPAVG::BuildParams(
- aom_highbd_jnt_comp_avg_pred_sse2, 1));
-#endif
-
-TEST_P(AV1HighBDJNTCOMPAVGUPSAMPLEDTest, DISABLED_Speed) {
- RunSpeedTest(GET_PARAM(1));
-}
-
-TEST_P(AV1HighBDJNTCOMPAVGUPSAMPLEDTest, CheckOutput) {
- RunCheckOutput(GET_PARAM(1));
-}
-
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(SSE2, AV1HighBDJNTCOMPAVGUPSAMPLEDTest,
- libaom_test::AV1JNTCOMPAVG::BuildParams(
- aom_highbd_jnt_comp_avg_upsampled_pred_sse2));
-#endif
-
-} // namespace
diff --git a/third_party/aom/test/comp_avg_pred_test.h b/third_party/aom/test/comp_avg_pred_test.h
deleted file mode 100644
index 9661dd9f5..000000000
--- a/third_party/aom/test/comp_avg_pred_test.h
+++ /dev/null
@@ -1,555 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_TEST_COMP_AVG_PRED_TEST_H_
-#define AOM_TEST_COMP_AVG_PRED_TEST_H_
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-#include "test/acm_random.h"
-#include "test/util.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "av1/common/common_data.h"
-#include "aom_ports/aom_timer.h"
-
-namespace libaom_test {
-const int kMaxSize = 128 + 32; // padding
-
-namespace AV1JNTCOMPAVG {
-
-typedef void (*jntcompavg_func)(uint8_t *comp_pred, const uint8_t *pred,
- int width, int height, const uint8_t *ref,
- int ref_stride,
- const JNT_COMP_PARAMS *jcp_param);
-
-typedef void (*jntcompavgupsampled_func)(
- MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
- const MV *const mv, uint8_t *comp_pred, const uint8_t *pred, int width,
- int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref,
- int ref_stride, const JNT_COMP_PARAMS *jcp_param, int subpel_search);
-
-typedef void (*highbdjntcompavgupsampled_func)(
- MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
- const MV *const mv, uint8_t *comp_pred8, const uint8_t *pred8, int width,
- int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref8,
- int ref_stride, int bd, const JNT_COMP_PARAMS *jcp_param,
- int subpel_search);
-
-typedef ::testing::tuple<jntcompavg_func, BLOCK_SIZE> JNTCOMPAVGParam;
-
-typedef ::testing::tuple<jntcompavgupsampled_func, BLOCK_SIZE>
- JNTCOMPAVGUPSAMPLEDParam;
-
-typedef ::testing::tuple<int, jntcompavg_func, BLOCK_SIZE>
- HighbdJNTCOMPAVGParam;
-
-typedef ::testing::tuple<int, highbdjntcompavgupsampled_func, BLOCK_SIZE>
- HighbdJNTCOMPAVGUPSAMPLEDParam;
-
-::testing::internal::ParamGenerator<JNTCOMPAVGParam> BuildParams(
- jntcompavg_func filter) {
- return ::testing::Combine(::testing::Values(filter),
- ::testing::Range(BLOCK_4X4, BLOCK_SIZES_ALL));
-}
-
-::testing::internal::ParamGenerator<JNTCOMPAVGUPSAMPLEDParam> BuildParams(
- jntcompavgupsampled_func filter) {
- return ::testing::Combine(::testing::Values(filter),
- ::testing::Range(BLOCK_4X4, BLOCK_SIZES_ALL));
-}
-
-::testing::internal::ParamGenerator<HighbdJNTCOMPAVGParam> BuildParams(
- jntcompavg_func filter, int is_hbd) {
- (void)is_hbd;
- return ::testing::Combine(::testing::Range(8, 13, 2),
- ::testing::Values(filter),
- ::testing::Range(BLOCK_4X4, BLOCK_SIZES_ALL));
-}
-
-::testing::internal::ParamGenerator<HighbdJNTCOMPAVGUPSAMPLEDParam> BuildParams(
- highbdjntcompavgupsampled_func filter) {
- return ::testing::Combine(::testing::Range(8, 13, 2),
- ::testing::Values(filter),
- ::testing::Range(BLOCK_4X4, BLOCK_SIZES_ALL));
-}
-
-class AV1JNTCOMPAVGTest : public ::testing::TestWithParam<JNTCOMPAVGParam> {
- public:
- ~AV1JNTCOMPAVGTest() {}
- void SetUp() { rnd_.Reset(ACMRandom::DeterministicSeed()); }
- void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
- void RunCheckOutput(jntcompavg_func test_impl) {
- const int w = kMaxSize, h = kMaxSize;
- const int block_idx = GET_PARAM(1);
-
- uint8_t pred8[kMaxSize * kMaxSize];
- uint8_t ref8[kMaxSize * kMaxSize];
- uint8_t output[kMaxSize * kMaxSize];
- uint8_t output2[kMaxSize * kMaxSize];
-
- for (int i = 0; i < h; ++i)
- for (int j = 0; j < w; ++j) {
- pred8[i * w + j] = rnd_.Rand8();
- ref8[i * w + j] = rnd_.Rand8();
- }
- const int in_w = block_size_wide[block_idx];
- const int in_h = block_size_high[block_idx];
-
- JNT_COMP_PARAMS jnt_comp_params;
- jnt_comp_params.use_jnt_comp_avg = 1;
-
- for (int ii = 0; ii < 2; ii++) {
- for (int jj = 0; jj < 4; jj++) {
- jnt_comp_params.fwd_offset = quant_dist_lookup_table[ii][jj][0];
- jnt_comp_params.bck_offset = quant_dist_lookup_table[ii][jj][1];
-
- const int offset_r = 3 + rnd_.PseudoUniform(h - in_h - 7);
- const int offset_c = 3 + rnd_.PseudoUniform(w - in_w - 7);
- aom_jnt_comp_avg_pred_c(output, pred8 + offset_r * w + offset_c, in_w,
- in_h, ref8 + offset_r * w + offset_c, in_w,
- &jnt_comp_params);
- test_impl(output2, pred8 + offset_r * w + offset_c, in_w, in_h,
- ref8 + offset_r * w + offset_c, in_w, &jnt_comp_params);
-
- for (int i = 0; i < in_h; ++i) {
- for (int j = 0; j < in_w; ++j) {
- int idx = i * in_w + j;
- ASSERT_EQ(output[idx], output2[idx])
- << "Mismatch at unit tests for AV1JNTCOMPAVGTest\n"
- << in_w << "x" << in_h << " Pixel mismatch at index " << idx
- << " = (" << i << ", " << j << ")";
- }
- }
- }
- }
- }
- void RunSpeedTest(jntcompavg_func test_impl) {
- const int w = kMaxSize, h = kMaxSize;
- const int block_idx = GET_PARAM(1);
-
- uint8_t pred8[kMaxSize * kMaxSize];
- uint8_t ref8[kMaxSize * kMaxSize];
- uint8_t output[kMaxSize * kMaxSize];
- uint8_t output2[kMaxSize * kMaxSize];
-
- for (int i = 0; i < h; ++i)
- for (int j = 0; j < w; ++j) {
- pred8[i * w + j] = rnd_.Rand8();
- ref8[i * w + j] = rnd_.Rand8();
- }
- const int in_w = block_size_wide[block_idx];
- const int in_h = block_size_high[block_idx];
-
- JNT_COMP_PARAMS jnt_comp_params;
- jnt_comp_params.use_jnt_comp_avg = 1;
-
- jnt_comp_params.fwd_offset = quant_dist_lookup_table[0][0][0];
- jnt_comp_params.bck_offset = quant_dist_lookup_table[0][0][1];
-
- const int num_loops = 1000000000 / (in_w + in_h);
- aom_usec_timer timer;
- aom_usec_timer_start(&timer);
-
- for (int i = 0; i < num_loops; ++i)
- aom_jnt_comp_avg_pred_c(output, pred8, in_w, in_h, ref8, in_w,
- &jnt_comp_params);
-
- aom_usec_timer_mark(&timer);
- const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
- printf("jntcompavg c_code %3dx%-3d: %7.2f us\n", in_w, in_h,
- 1000.0 * elapsed_time / num_loops);
-
- aom_usec_timer timer1;
- aom_usec_timer_start(&timer1);
-
- for (int i = 0; i < num_loops; ++i)
- test_impl(output2, pred8, in_w, in_h, ref8, in_w, &jnt_comp_params);
-
- aom_usec_timer_mark(&timer1);
- const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1));
- printf("jntcompavg test_code %3dx%-3d: %7.2f us\n", in_w, in_h,
- 1000.0 * elapsed_time1 / num_loops);
- }
-
- libaom_test::ACMRandom rnd_;
-}; // class AV1JNTCOMPAVGTest
-
-class AV1JNTCOMPAVGUPSAMPLEDTest
- : public ::testing::TestWithParam<JNTCOMPAVGUPSAMPLEDParam> {
- public:
- ~AV1JNTCOMPAVGUPSAMPLEDTest() {}
- void SetUp() { rnd_.Reset(ACMRandom::DeterministicSeed()); }
- void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
- void RunCheckOutput(jntcompavgupsampled_func test_impl) {
- const int w = kMaxSize, h = kMaxSize;
- const int block_idx = GET_PARAM(1);
-
- uint8_t pred8[kMaxSize * kMaxSize];
- uint8_t ref8[kMaxSize * kMaxSize];
- DECLARE_ALIGNED(16, uint8_t, output[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(16, uint8_t, output2[MAX_SB_SQUARE]);
-
- for (int i = 0; i < h; ++i)
- for (int j = 0; j < w; ++j) {
- pred8[i * w + j] = rnd_.Rand8();
- ref8[i * w + j] = rnd_.Rand8();
- }
- const int in_w = block_size_wide[block_idx];
- const int in_h = block_size_high[block_idx];
-
- JNT_COMP_PARAMS jnt_comp_params;
- jnt_comp_params.use_jnt_comp_avg = 1;
- int sub_x_q3, sub_y_q3;
- int subpel_search;
- for (subpel_search = 1; subpel_search <= 2; ++subpel_search) {
- for (sub_x_q3 = 0; sub_x_q3 < 8; ++sub_x_q3) {
- for (sub_y_q3 = 0; sub_y_q3 < 8; ++sub_y_q3) {
- for (int ii = 0; ii < 2; ii++) {
- for (int jj = 0; jj < 4; jj++) {
- jnt_comp_params.fwd_offset = quant_dist_lookup_table[ii][jj][0];
- jnt_comp_params.bck_offset = quant_dist_lookup_table[ii][jj][1];
-
- const int offset_r = 3 + rnd_.PseudoUniform(h - in_h - 7);
- const int offset_c = 3 + rnd_.PseudoUniform(w - in_w - 7);
-
- aom_jnt_comp_avg_upsampled_pred_c(
- NULL, NULL, 0, 0, NULL, output,
- pred8 + offset_r * w + offset_c, in_w, in_h, sub_x_q3,
- sub_y_q3, ref8 + offset_r * w + offset_c, in_w,
- &jnt_comp_params, subpel_search);
- test_impl(NULL, NULL, 0, 0, NULL, output2,
- pred8 + offset_r * w + offset_c, in_w, in_h, sub_x_q3,
- sub_y_q3, ref8 + offset_r * w + offset_c, in_w,
- &jnt_comp_params, subpel_search);
-
- for (int i = 0; i < in_h; ++i) {
- for (int j = 0; j < in_w; ++j) {
- int idx = i * in_w + j;
- ASSERT_EQ(output[idx], output2[idx])
- << "Mismatch at unit tests for "
- "AV1JNTCOMPAVGUPSAMPLEDTest\n"
- << in_w << "x" << in_h << " Pixel mismatch at index "
- << idx << " = (" << i << ", " << j
- << "), sub pixel offset = (" << sub_y_q3 << ", "
- << sub_x_q3 << ")";
- }
- }
- }
- }
- }
- }
- }
- }
- void RunSpeedTest(jntcompavgupsampled_func test_impl) {
- const int w = kMaxSize, h = kMaxSize;
- const int block_idx = GET_PARAM(1);
-
- uint8_t pred8[kMaxSize * kMaxSize];
- uint8_t ref8[kMaxSize * kMaxSize];
- DECLARE_ALIGNED(16, uint8_t, output[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(16, uint8_t, output2[MAX_SB_SQUARE]);
-
- for (int i = 0; i < h; ++i)
- for (int j = 0; j < w; ++j) {
- pred8[i * w + j] = rnd_.Rand8();
- ref8[i * w + j] = rnd_.Rand8();
- }
- const int in_w = block_size_wide[block_idx];
- const int in_h = block_size_high[block_idx];
-
- JNT_COMP_PARAMS jnt_comp_params;
- jnt_comp_params.use_jnt_comp_avg = 1;
-
- jnt_comp_params.fwd_offset = quant_dist_lookup_table[0][0][0];
- jnt_comp_params.bck_offset = quant_dist_lookup_table[0][0][1];
-
- int sub_x_q3 = 0;
- int sub_y_q3 = 0;
-
- const int num_loops = 1000000000 / (in_w + in_h);
- aom_usec_timer timer;
- aom_usec_timer_start(&timer);
- int subpel_search = 2; // set to 1 to test 4-tap filter.
-
- for (int i = 0; i < num_loops; ++i)
- aom_jnt_comp_avg_upsampled_pred_c(NULL, NULL, 0, 0, NULL, output, pred8,
- in_w, in_h, sub_x_q3, sub_y_q3, ref8,
- in_w, &jnt_comp_params, subpel_search);
-
- aom_usec_timer_mark(&timer);
- const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
- printf("jntcompavgupsampled c_code %3dx%-3d: %7.2f us\n", in_w, in_h,
- 1000.0 * elapsed_time / num_loops);
-
- aom_usec_timer timer1;
- aom_usec_timer_start(&timer1);
-
- for (int i = 0; i < num_loops; ++i)
- test_impl(NULL, NULL, 0, 0, NULL, output2, pred8, in_w, in_h, sub_x_q3,
- sub_y_q3, ref8, in_w, &jnt_comp_params, subpel_search);
-
- aom_usec_timer_mark(&timer1);
- const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1));
- printf("jntcompavgupsampled test_code %3dx%-3d: %7.2f us\n", in_w, in_h,
- 1000.0 * elapsed_time1 / num_loops);
- }
-
- libaom_test::ACMRandom rnd_;
-}; // class AV1JNTCOMPAVGUPSAMPLEDTest
-
-class AV1HighBDJNTCOMPAVGTest
- : public ::testing::TestWithParam<HighbdJNTCOMPAVGParam> {
- public:
- ~AV1HighBDJNTCOMPAVGTest() {}
- void SetUp() { rnd_.Reset(ACMRandom::DeterministicSeed()); }
-
- void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
- void RunCheckOutput(jntcompavg_func test_impl) {
- const int w = kMaxSize, h = kMaxSize;
- const int block_idx = GET_PARAM(2);
- const int bd = GET_PARAM(0);
- uint16_t pred8[kMaxSize * kMaxSize];
- uint16_t ref8[kMaxSize * kMaxSize];
- uint16_t output[kMaxSize * kMaxSize];
- uint16_t output2[kMaxSize * kMaxSize];
-
- for (int i = 0; i < h; ++i)
- for (int j = 0; j < w; ++j) {
- pred8[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
- ref8[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
- }
- const int in_w = block_size_wide[block_idx];
- const int in_h = block_size_high[block_idx];
-
- JNT_COMP_PARAMS jnt_comp_params;
- jnt_comp_params.use_jnt_comp_avg = 1;
-
- for (int ii = 0; ii < 2; ii++) {
- for (int jj = 0; jj < 4; jj++) {
- jnt_comp_params.fwd_offset = quant_dist_lookup_table[ii][jj][0];
- jnt_comp_params.bck_offset = quant_dist_lookup_table[ii][jj][1];
-
- const int offset_r = 3 + rnd_.PseudoUniform(h - in_h - 7);
- const int offset_c = 3 + rnd_.PseudoUniform(w - in_w - 7);
- aom_highbd_jnt_comp_avg_pred_c(
- CONVERT_TO_BYTEPTR(output),
- CONVERT_TO_BYTEPTR(pred8) + offset_r * w + offset_c, in_w, in_h,
- CONVERT_TO_BYTEPTR(ref8) + offset_r * w + offset_c, in_w,
- &jnt_comp_params);
- test_impl(CONVERT_TO_BYTEPTR(output2),
- CONVERT_TO_BYTEPTR(pred8) + offset_r * w + offset_c, in_w,
- in_h, CONVERT_TO_BYTEPTR(ref8) + offset_r * w + offset_c,
- in_w, &jnt_comp_params);
-
- for (int i = 0; i < in_h; ++i) {
- for (int j = 0; j < in_w; ++j) {
- int idx = i * in_w + j;
- ASSERT_EQ(output[idx], output2[idx])
- << "Mismatch at unit tests for AV1HighBDJNTCOMPAVGTest\n"
- << in_w << "x" << in_h << " Pixel mismatch at index " << idx
- << " = (" << i << ", " << j << ")";
- }
- }
- }
- }
- }
- void RunSpeedTest(jntcompavg_func test_impl) {
- const int w = kMaxSize, h = kMaxSize;
- const int block_idx = GET_PARAM(2);
- const int bd = GET_PARAM(0);
- uint16_t pred8[kMaxSize * kMaxSize];
- uint16_t ref8[kMaxSize * kMaxSize];
- uint16_t output[kMaxSize * kMaxSize];
- uint16_t output2[kMaxSize * kMaxSize];
-
- for (int i = 0; i < h; ++i)
- for (int j = 0; j < w; ++j) {
- pred8[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
- ref8[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
- }
- const int in_w = block_size_wide[block_idx];
- const int in_h = block_size_high[block_idx];
-
- JNT_COMP_PARAMS jnt_comp_params;
- jnt_comp_params.use_jnt_comp_avg = 1;
-
- jnt_comp_params.fwd_offset = quant_dist_lookup_table[0][0][0];
- jnt_comp_params.bck_offset = quant_dist_lookup_table[0][0][1];
-
- const int num_loops = 1000000000 / (in_w + in_h);
- aom_usec_timer timer;
- aom_usec_timer_start(&timer);
-
- for (int i = 0; i < num_loops; ++i)
- aom_highbd_jnt_comp_avg_pred_c(
- CONVERT_TO_BYTEPTR(output), CONVERT_TO_BYTEPTR(pred8), in_w, in_h,
- CONVERT_TO_BYTEPTR(ref8), in_w, &jnt_comp_params);
-
- aom_usec_timer_mark(&timer);
- const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
- printf("highbdjntcompavg c_code %3dx%-3d: %7.2f us\n", in_w, in_h,
- 1000.0 * elapsed_time / num_loops);
-
- aom_usec_timer timer1;
- aom_usec_timer_start(&timer1);
-
- for (int i = 0; i < num_loops; ++i)
- test_impl(CONVERT_TO_BYTEPTR(output2), CONVERT_TO_BYTEPTR(pred8), in_w,
- in_h, CONVERT_TO_BYTEPTR(ref8), in_w, &jnt_comp_params);
-
- aom_usec_timer_mark(&timer1);
- const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1));
- printf("highbdjntcompavg test_code %3dx%-3d: %7.2f us\n", in_w, in_h,
- 1000.0 * elapsed_time1 / num_loops);
- }
-
- libaom_test::ACMRandom rnd_;
-}; // class AV1HighBDJNTCOMPAVGTest
-
-class AV1HighBDJNTCOMPAVGUPSAMPLEDTest
- : public ::testing::TestWithParam<HighbdJNTCOMPAVGUPSAMPLEDParam> {
- public:
- ~AV1HighBDJNTCOMPAVGUPSAMPLEDTest() {}
- void SetUp() { rnd_.Reset(ACMRandom::DeterministicSeed()); }
- void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
- void RunCheckOutput(highbdjntcompavgupsampled_func test_impl) {
- const int w = kMaxSize, h = kMaxSize;
- const int block_idx = GET_PARAM(2);
- const int bd = GET_PARAM(0);
- uint16_t pred8[kMaxSize * kMaxSize];
- uint16_t ref8[kMaxSize * kMaxSize];
- uint16_t output[kMaxSize * kMaxSize];
- uint16_t output2[kMaxSize * kMaxSize];
-
- for (int i = 0; i < h; ++i)
- for (int j = 0; j < w; ++j) {
- pred8[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
- ref8[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
- }
- const int in_w = block_size_wide[block_idx];
- const int in_h = block_size_high[block_idx];
-
- JNT_COMP_PARAMS jnt_comp_params;
- jnt_comp_params.use_jnt_comp_avg = 1;
- int sub_x_q3, sub_y_q3;
- int subpel_search;
- for (subpel_search = 1; subpel_search <= 2; ++subpel_search) {
- for (sub_x_q3 = 0; sub_x_q3 < 8; ++sub_x_q3) {
- for (sub_y_q3 = 0; sub_y_q3 < 8; ++sub_y_q3) {
- for (int ii = 0; ii < 2; ii++) {
- for (int jj = 0; jj < 4; jj++) {
- jnt_comp_params.fwd_offset = quant_dist_lookup_table[ii][jj][0];
- jnt_comp_params.bck_offset = quant_dist_lookup_table[ii][jj][1];
-
- const int offset_r = 3 + rnd_.PseudoUniform(h - in_h - 7);
- const int offset_c = 3 + rnd_.PseudoUniform(w - in_w - 7);
-
- aom_highbd_jnt_comp_avg_upsampled_pred_c(
- NULL, NULL, 0, 0, NULL, CONVERT_TO_BYTEPTR(output),
- CONVERT_TO_BYTEPTR(pred8) + offset_r * w + offset_c, in_w,
- in_h, sub_x_q3, sub_y_q3,
- CONVERT_TO_BYTEPTR(ref8) + offset_r * w + offset_c, in_w, bd,
- &jnt_comp_params, subpel_search);
- test_impl(NULL, NULL, 0, 0, NULL, CONVERT_TO_BYTEPTR(output2),
- CONVERT_TO_BYTEPTR(pred8) + offset_r * w + offset_c,
- in_w, in_h, sub_x_q3, sub_y_q3,
- CONVERT_TO_BYTEPTR(ref8) + offset_r * w + offset_c,
- in_w, bd, &jnt_comp_params, subpel_search);
-
- for (int i = 0; i < in_h; ++i) {
- for (int j = 0; j < in_w; ++j) {
- int idx = i * in_w + j;
- ASSERT_EQ(output[idx], output2[idx])
- << "Mismatch at unit tests for "
- "AV1HighBDJNTCOMPAVGUPSAMPLEDTest\n"
- << in_w << "x" << in_h << " Pixel mismatch at index "
- << idx << " = (" << i << ", " << j
- << "), sub pixel offset = (" << sub_y_q3 << ", "
- << sub_x_q3 << ")";
- }
- }
- }
- }
- }
- }
- }
- }
- void RunSpeedTest(highbdjntcompavgupsampled_func test_impl) {
- const int w = kMaxSize, h = kMaxSize;
- const int block_idx = GET_PARAM(2);
- const int bd = GET_PARAM(0);
- uint16_t pred8[kMaxSize * kMaxSize];
- uint16_t ref8[kMaxSize * kMaxSize];
- uint16_t output[kMaxSize * kMaxSize];
- uint16_t output2[kMaxSize * kMaxSize];
-
- for (int i = 0; i < h; ++i)
- for (int j = 0; j < w; ++j) {
- pred8[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
- ref8[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
- }
- const int in_w = block_size_wide[block_idx];
- const int in_h = block_size_high[block_idx];
-
- JNT_COMP_PARAMS jnt_comp_params;
- jnt_comp_params.use_jnt_comp_avg = 1;
-
- jnt_comp_params.fwd_offset = quant_dist_lookup_table[0][0][0];
- jnt_comp_params.bck_offset = quant_dist_lookup_table[0][0][1];
- int sub_x_q3 = 0;
- int sub_y_q3 = 0;
- const int num_loops = 1000000000 / (in_w + in_h);
- aom_usec_timer timer;
- aom_usec_timer_start(&timer);
- int subpel_search = 2; // set to 1 to test 4-tap filter.
- for (int i = 0; i < num_loops; ++i)
- aom_highbd_jnt_comp_avg_upsampled_pred_c(
- NULL, NULL, 0, 0, NULL, CONVERT_TO_BYTEPTR(output),
- CONVERT_TO_BYTEPTR(pred8), in_w, in_h, sub_x_q3, sub_y_q3,
- CONVERT_TO_BYTEPTR(ref8), in_w, bd, &jnt_comp_params, subpel_search);
-
- aom_usec_timer_mark(&timer);
- const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
- printf("highbdjntcompavgupsampled c_code %3dx%-3d: %7.2f us\n", in_w, in_h,
- 1000.0 * elapsed_time / num_loops);
-
- aom_usec_timer timer1;
- aom_usec_timer_start(&timer1);
-
- for (int i = 0; i < num_loops; ++i)
- test_impl(NULL, NULL, 0, 0, NULL, CONVERT_TO_BYTEPTR(output2),
- CONVERT_TO_BYTEPTR(pred8), in_w, in_h, sub_x_q3, sub_y_q3,
- CONVERT_TO_BYTEPTR(ref8), in_w, bd, &jnt_comp_params,
- subpel_search);
-
- aom_usec_timer_mark(&timer1);
- const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1));
- printf("highbdjntcompavgupsampled test_code %3dx%-3d: %7.2f us\n", in_w,
- in_h, 1000.0 * elapsed_time1 / num_loops);
- }
-
- libaom_test::ACMRandom rnd_;
-}; // class AV1HighBDJNTCOMPAVGUPSAMPLEDTest
-
-} // namespace AV1JNTCOMPAVG
-} // namespace libaom_test
-
-#endif // AOM_TEST_COMP_AVG_PRED_TEST_H_
diff --git a/third_party/aom/test/comp_mask_variance_test.cc b/third_party/aom/test/comp_mask_variance_test.cc
deleted file mode 100644
index 34be2aa6d..000000000
--- a/third_party/aom/test/comp_mask_variance_test.cc
+++ /dev/null
@@ -1,574 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <cstdlib>
-#include <new>
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom/aom_codec.h"
-#include "aom/aom_integer.h"
-#include "aom_dsp/variance.h"
-#include "aom_mem/aom_mem.h"
-#include "aom_ports/aom_timer.h"
-#include "aom_ports/mem.h"
-#include "av1/common/reconinter.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-namespace AV1CompMaskVariance {
-typedef void (*comp_mask_pred_func)(uint8_t *comp_pred, const uint8_t *pred,
- int width, int height, const uint8_t *ref,
- int ref_stride, const uint8_t *mask,
- int mask_stride, int invert_mask);
-
-#if HAVE_SSSE3 || HAVE_AV2
-const BLOCK_SIZE kValidBlockSize[] = {
- BLOCK_8X8, BLOCK_8X16, BLOCK_8X32, BLOCK_16X8, BLOCK_16X16,
- BLOCK_16X32, BLOCK_32X8, BLOCK_32X16, BLOCK_32X32,
-};
-#endif
-typedef ::testing::tuple<comp_mask_pred_func, BLOCK_SIZE> CompMaskPredParam;
-
-class AV1CompMaskVarianceTest
- : public ::testing::TestWithParam<CompMaskPredParam> {
- public:
- ~AV1CompMaskVarianceTest();
- void SetUp();
-
- void TearDown();
-
- protected:
- void RunCheckOutput(comp_mask_pred_func test_impl, BLOCK_SIZE bsize, int inv);
- void RunSpeedTest(comp_mask_pred_func test_impl, BLOCK_SIZE bsize);
- bool CheckResult(int width, int height) {
- for (int y = 0; y < height; ++y) {
- for (int x = 0; x < width; ++x) {
- const int idx = y * width + x;
- if (comp_pred1_[idx] != comp_pred2_[idx]) {
- printf("%dx%d mismatch @%d(%d,%d) ", width, height, idx, y, x);
- printf("%d != %d ", comp_pred1_[idx], comp_pred2_[idx]);
- return false;
- }
- }
- }
- return true;
- }
-
- libaom_test::ACMRandom rnd_;
- uint8_t *comp_pred1_;
- uint8_t *comp_pred2_;
- uint8_t *pred_;
- uint8_t *ref_buffer_;
- uint8_t *ref_;
-};
-
-AV1CompMaskVarianceTest::~AV1CompMaskVarianceTest() { ; }
-
-void AV1CompMaskVarianceTest::SetUp() {
- rnd_.Reset(libaom_test::ACMRandom::DeterministicSeed());
- av1_init_wedge_masks();
- comp_pred1_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE);
- comp_pred2_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE);
- pred_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE);
- ref_buffer_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE + (8 * MAX_SB_SIZE));
- ref_ = ref_buffer_ + (8 * MAX_SB_SIZE);
- for (int i = 0; i < MAX_SB_SQUARE; ++i) {
- pred_[i] = rnd_.Rand8();
- }
- for (int i = 0; i < MAX_SB_SQUARE + (8 * MAX_SB_SIZE); ++i) {
- ref_buffer_[i] = rnd_.Rand8();
- }
-}
-
-void AV1CompMaskVarianceTest::TearDown() {
- aom_free(comp_pred1_);
- aom_free(comp_pred2_);
- aom_free(pred_);
- aom_free(ref_buffer_);
- libaom_test::ClearSystemState();
-}
-
-void AV1CompMaskVarianceTest::RunCheckOutput(comp_mask_pred_func test_impl,
- BLOCK_SIZE bsize, int inv) {
- const int w = block_size_wide[bsize];
- const int h = block_size_high[bsize];
-
- int wedge_types = (1 << get_wedge_bits_lookup(bsize));
- for (int wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
- const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
-
- aom_comp_mask_pred_c(comp_pred1_, pred_, w, h, ref_, MAX_SB_SIZE, mask, w,
- inv);
- test_impl(comp_pred2_, pred_, w, h, ref_, MAX_SB_SIZE, mask, w, inv);
-
- ASSERT_EQ(CheckResult(w, h), true)
- << " wedge " << wedge_index << " inv " << inv;
- }
-}
-
-void AV1CompMaskVarianceTest::RunSpeedTest(comp_mask_pred_func test_impl,
- BLOCK_SIZE bsize) {
- const int w = block_size_wide[bsize];
- const int h = block_size_high[bsize];
-
- int wedge_types = (1 << get_wedge_bits_lookup(bsize));
- int wedge_index = wedge_types / 2;
- const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
- const int num_loops = 1000000000 / (w + h);
-
- comp_mask_pred_func funcs[2] = { aom_comp_mask_pred_c, test_impl };
- double elapsed_time[2] = { 0 };
- for (int i = 0; i < 2; ++i) {
- aom_usec_timer timer;
- aom_usec_timer_start(&timer);
- comp_mask_pred_func func = funcs[i];
- for (int j = 0; j < num_loops; ++j) {
- func(comp_pred1_, pred_, w, h, ref_, MAX_SB_SIZE, mask, w, 0);
- }
- aom_usec_timer_mark(&timer);
- double time = static_cast<double>(aom_usec_timer_elapsed(&timer));
- elapsed_time[i] = 1000.0 * time / num_loops;
- }
- printf("compMask %3dx%-3d: %7.2f/%7.2fns", w, h, elapsed_time[0],
- elapsed_time[1]);
- printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]);
-}
-
-TEST_P(AV1CompMaskVarianceTest, CheckOutput) {
- // inv = 0, 1
- RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 0);
- RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 1);
-}
-
-TEST_P(AV1CompMaskVarianceTest, DISABLED_Speed) {
- RunSpeedTest(GET_PARAM(0), GET_PARAM(1));
-}
-
-#if HAVE_SSSE3
-INSTANTIATE_TEST_CASE_P(
- SSSE3, AV1CompMaskVarianceTest,
- ::testing::Combine(::testing::Values(&aom_comp_mask_pred_ssse3),
- ::testing::ValuesIn(kValidBlockSize)));
-#endif
-
-#if HAVE_AVX2
-INSTANTIATE_TEST_CASE_P(
- AVX2, AV1CompMaskVarianceTest,
- ::testing::Combine(::testing::Values(&aom_comp_mask_pred_avx2),
- ::testing::ValuesIn(kValidBlockSize)));
-#endif
-
-#ifndef aom_comp_mask_pred
-// can't run this test if aom_comp_mask_pred is defined to aom_comp_mask_pred_c
-class AV1CompMaskUpVarianceTest : public AV1CompMaskVarianceTest {
- public:
- ~AV1CompMaskUpVarianceTest();
-
- protected:
- void RunCheckOutput(comp_mask_pred_func test_impl, BLOCK_SIZE bsize, int inv);
- void RunSpeedTest(comp_mask_pred_func test_impl, BLOCK_SIZE bsize,
- int havSub);
-};
-
-AV1CompMaskUpVarianceTest::~AV1CompMaskUpVarianceTest() { ; }
-
-void AV1CompMaskUpVarianceTest::RunCheckOutput(comp_mask_pred_func test_impl,
- BLOCK_SIZE bsize, int inv) {
- const int w = block_size_wide[bsize];
- const int h = block_size_high[bsize];
- int wedge_types = (1 << get_wedge_bits_lookup(bsize));
- int subpel_search;
- for (subpel_search = 1; subpel_search <= 2; ++subpel_search) {
- // loop through subx and suby
- for (int sub = 0; sub < 8 * 8; ++sub) {
- int subx = sub & 0x7;
- int suby = (sub >> 3);
- for (int wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
- const uint8_t *mask =
- av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
-
- // ref
- aom_comp_mask_upsampled_pred_c(
- NULL, NULL, 0, 0, NULL, comp_pred1_, pred_, w, h, subx, suby, ref_,
- MAX_SB_SIZE, mask, w, inv, subpel_search);
-
- aom_comp_mask_pred = test_impl; // test
- aom_comp_mask_upsampled_pred(NULL, NULL, 0, 0, NULL, comp_pred2_, pred_,
- w, h, subx, suby, ref_, MAX_SB_SIZE, mask,
- w, inv, subpel_search);
- ASSERT_EQ(CheckResult(w, h), true)
- << " wedge " << wedge_index << " inv " << inv << "sub (" << subx
- << "," << suby << ")";
- }
- }
- }
-}
-
-void AV1CompMaskUpVarianceTest::RunSpeedTest(comp_mask_pred_func test_impl,
- BLOCK_SIZE bsize, int havSub) {
- const int w = block_size_wide[bsize];
- const int h = block_size_high[bsize];
- const int subx = havSub ? 3 : 0;
- const int suby = havSub ? 4 : 0;
-
- int wedge_types = (1 << get_wedge_bits_lookup(bsize));
- int wedge_index = wedge_types / 2;
- const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
-
- const int num_loops = 1000000000 / (w + h);
- comp_mask_pred_func funcs[2] = { &aom_comp_mask_pred_c, test_impl };
- double elapsed_time[2] = { 0 };
- int subpel_search = 2; // set to 1 to test 4-tap filter.
- for (int i = 0; i < 2; ++i) {
- aom_usec_timer timer;
- aom_usec_timer_start(&timer);
- aom_comp_mask_pred = funcs[i];
- for (int j = 0; j < num_loops; ++j) {
- aom_comp_mask_upsampled_pred(NULL, NULL, 0, 0, NULL, comp_pred1_, pred_,
- w, h, subx, suby, ref_, MAX_SB_SIZE, mask, w,
- 0, subpel_search);
- }
- aom_usec_timer_mark(&timer);
- double time = static_cast<double>(aom_usec_timer_elapsed(&timer));
- elapsed_time[i] = 1000.0 * time / num_loops;
- }
- printf("CompMaskUp[%d] %3dx%-3d:%7.2f/%7.2fns", havSub, w, h, elapsed_time[0],
- elapsed_time[1]);
- printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]);
-}
-
-TEST_P(AV1CompMaskUpVarianceTest, CheckOutput) {
- // inv mask = 0, 1
- RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 0);
- RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 1);
-}
-
-TEST_P(AV1CompMaskUpVarianceTest, DISABLED_Speed) {
- RunSpeedTest(GET_PARAM(0), GET_PARAM(1), 1);
-}
-
-#if HAVE_SSSE3
-INSTANTIATE_TEST_CASE_P(
- SSSE3, AV1CompMaskUpVarianceTest,
- ::testing::Combine(::testing::Values(&aom_comp_mask_pred_ssse3),
- ::testing::ValuesIn(kValidBlockSize)));
-#endif
-
-#if HAVE_AVX2
-INSTANTIATE_TEST_CASE_P(
- AVX2, AV1CompMaskUpVarianceTest,
- ::testing::Combine(::testing::Values(&aom_comp_mask_pred_avx2),
- ::testing::ValuesIn(kValidBlockSize)));
-#endif
-
-#endif // ifndef aom_comp_mask_pred
-
-typedef void (*highbd_comp_mask_pred_func)(uint8_t *comp_pred8,
- const uint8_t *pred8, int width,
- int height, const uint8_t *ref8,
- int ref_stride, const uint8_t *mask,
- int mask_stride, int invert_mask);
-
-typedef ::testing::tuple<highbd_comp_mask_pred_func, BLOCK_SIZE, int>
- HighbdCompMaskPredParam;
-
-class AV1HighbdCompMaskVarianceTest
- : public ::testing::TestWithParam<HighbdCompMaskPredParam> {
- public:
- ~AV1HighbdCompMaskVarianceTest();
- void SetUp();
-
- void TearDown();
-
- protected:
- void RunCheckOutput(highbd_comp_mask_pred_func test_impl, BLOCK_SIZE bsize,
- int inv);
- void RunSpeedTest(highbd_comp_mask_pred_func test_impl, BLOCK_SIZE bsize);
- bool CheckResult(int width, int height) {
- for (int y = 0; y < height; ++y) {
- for (int x = 0; x < width; ++x) {
- const int idx = y * width + x;
- if (comp_pred1_[idx] != comp_pred2_[idx]) {
- printf("%dx%d mismatch @%d(%d,%d) ", width, height, idx, y, x);
- printf("%d != %d ", comp_pred1_[idx], comp_pred2_[idx]);
- return false;
- }
- }
- }
- return true;
- }
-
- libaom_test::ACMRandom rnd_;
- uint16_t *comp_pred1_;
- uint16_t *comp_pred2_;
- uint16_t *pred_;
- uint16_t *ref_buffer_;
- uint16_t *ref_;
-};
-
-AV1HighbdCompMaskVarianceTest::~AV1HighbdCompMaskVarianceTest() { ; }
-
-void AV1HighbdCompMaskVarianceTest::SetUp() {
- rnd_.Reset(libaom_test::ACMRandom::DeterministicSeed());
- av1_init_wedge_masks();
-
- comp_pred1_ =
- (uint16_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*comp_pred1_));
- comp_pred2_ =
- (uint16_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*comp_pred2_));
- pred_ = (uint16_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*pred_));
- ref_buffer_ = (uint16_t *)aom_memalign(
- 16, (MAX_SB_SQUARE + (8 * MAX_SB_SIZE)) * sizeof(*ref_buffer_));
- ref_ = ref_buffer_ + (8 * MAX_SB_SIZE);
-}
-
-void AV1HighbdCompMaskVarianceTest::TearDown() {
- aom_free(comp_pred1_);
- aom_free(comp_pred2_);
- aom_free(pred_);
- aom_free(ref_buffer_);
- libaom_test::ClearSystemState();
-}
-
-void AV1HighbdCompMaskVarianceTest::RunCheckOutput(
- highbd_comp_mask_pred_func test_impl, BLOCK_SIZE bsize, int inv) {
- int bd_ = GET_PARAM(2);
-
- const int w = block_size_wide[bsize];
- const int h = block_size_high[bsize];
-
- int wedge_types = (1 << get_wedge_bits_lookup(bsize));
-
- for (int i = 0; i < MAX_SB_SQUARE; ++i) {
- pred_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
- }
- for (int i = 0; i < MAX_SB_SQUARE + (8 * MAX_SB_SIZE); ++i) {
- ref_buffer_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
- }
-
- for (int wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
- const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
-
- aom_highbd_comp_mask_pred_c(
- CONVERT_TO_BYTEPTR(comp_pred1_), CONVERT_TO_BYTEPTR(pred_), w, h,
- CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, mask, w, inv);
-
- test_impl(CONVERT_TO_BYTEPTR(comp_pred2_), CONVERT_TO_BYTEPTR(pred_), w, h,
- CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, mask, w, inv);
-
- ASSERT_EQ(CheckResult(w, h), true)
- << " wedge " << wedge_index << " inv " << inv;
- }
-}
-
-void AV1HighbdCompMaskVarianceTest::RunSpeedTest(
- highbd_comp_mask_pred_func test_impl, BLOCK_SIZE bsize) {
- int bd_ = GET_PARAM(2);
-
- const int w = block_size_wide[bsize];
- const int h = block_size_high[bsize];
-
- int wedge_types = (1 << get_wedge_bits_lookup(bsize));
- int wedge_index = wedge_types / 2;
-
- for (int i = 0; i < MAX_SB_SQUARE; ++i) {
- pred_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
- }
- for (int i = 0; i < MAX_SB_SQUARE + (8 * MAX_SB_SIZE); ++i) {
- ref_buffer_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
- }
-
- const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
- const int num_loops = 1000000000 / (w + h);
-
- highbd_comp_mask_pred_func funcs[2] = { aom_highbd_comp_mask_pred_c,
- test_impl };
- double elapsed_time[2] = { 0 };
- for (int i = 0; i < 2; ++i) {
- aom_usec_timer timer;
- aom_usec_timer_start(&timer);
- highbd_comp_mask_pred_func func = funcs[i];
- for (int j = 0; j < num_loops; ++j) {
- func(CONVERT_TO_BYTEPTR(comp_pred1_), CONVERT_TO_BYTEPTR(pred_), w, h,
- CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, mask, w, 0);
- }
- aom_usec_timer_mark(&timer);
- double time = static_cast<double>(aom_usec_timer_elapsed(&timer));
- elapsed_time[i] = 1000.0 * time / num_loops;
- }
- printf("compMask %3dx%-3d: %7.2f/%7.2fns", w, h, elapsed_time[0],
- elapsed_time[1]);
- printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]);
-}
-
-TEST_P(AV1HighbdCompMaskVarianceTest, CheckOutput) {
- // inv = 0, 1
- RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 0);
- RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 1);
-}
-
-TEST_P(AV1HighbdCompMaskVarianceTest, DISABLED_Speed) {
- RunSpeedTest(GET_PARAM(0), GET_PARAM(1));
-}
-
-#if HAVE_AVX2
-INSTANTIATE_TEST_CASE_P(
- AVX2, AV1HighbdCompMaskVarianceTest,
- ::testing::Combine(::testing::Values(&aom_highbd_comp_mask_pred_avx2),
- ::testing::ValuesIn(kValidBlockSize),
- ::testing::Range(8, 13, 2)));
-#endif
-
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(
- SSE2, AV1HighbdCompMaskVarianceTest,
- ::testing::Combine(::testing::Values(&aom_highbd_comp_mask_pred_sse2),
- ::testing::ValuesIn(kValidBlockSize),
- ::testing::Range(8, 13, 2)));
-#endif
-
-#ifndef aom_highbd_comp_mask_pred
-// can't run this test if aom_highbd_comp_mask_pred is defined to
-// aom_highbd_comp_mask_pred_c
-class AV1HighbdCompMaskUpVarianceTest : public AV1HighbdCompMaskVarianceTest {
- public:
- ~AV1HighbdCompMaskUpVarianceTest();
-
- protected:
- void RunCheckOutput(highbd_comp_mask_pred_func test_impl, BLOCK_SIZE bsize,
- int inv);
- void RunSpeedTest(highbd_comp_mask_pred_func test_impl, BLOCK_SIZE bsize,
- int havSub);
-};
-
-AV1HighbdCompMaskUpVarianceTest::~AV1HighbdCompMaskUpVarianceTest() { ; }
-
-void AV1HighbdCompMaskUpVarianceTest::RunCheckOutput(
- highbd_comp_mask_pred_func test_impl, BLOCK_SIZE bsize, int inv) {
- int bd_ = GET_PARAM(2);
- const int w = block_size_wide[bsize];
- const int h = block_size_high[bsize];
- int wedge_types = (1 << get_wedge_bits_lookup(bsize));
-
- for (int i = 0; i < MAX_SB_SQUARE; ++i) {
- pred_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
- }
- for (int i = 0; i < MAX_SB_SQUARE + (8 * MAX_SB_SIZE); ++i) {
- ref_buffer_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
- }
-
- int subpel_search;
- for (subpel_search = 1; subpel_search <= 2; ++subpel_search) {
- // loop through subx and suby
- for (int sub = 0; sub < 8 * 8; ++sub) {
- int subx = sub & 0x7;
- int suby = (sub >> 3);
- for (int wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
- const uint8_t *mask =
- av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
-
- aom_highbd_comp_mask_pred = aom_highbd_comp_mask_pred_c; // ref
- aom_highbd_comp_mask_upsampled_pred(
- NULL, NULL, 0, 0, NULL, CONVERT_TO_BYTEPTR(comp_pred1_),
- CONVERT_TO_BYTEPTR(pred_), w, h, subx, suby,
- CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, mask, w, inv, bd_,
- subpel_search);
-
- aom_highbd_comp_mask_pred = test_impl; // test
- aom_highbd_comp_mask_upsampled_pred(
- NULL, NULL, 0, 0, NULL, CONVERT_TO_BYTEPTR(comp_pred2_),
- CONVERT_TO_BYTEPTR(pred_), w, h, subx, suby,
- CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, mask, w, inv, bd_,
- subpel_search);
- ASSERT_EQ(CheckResult(w, h), true)
- << " wedge " << wedge_index << " inv " << inv << "sub (" << subx
- << "," << suby << ")";
- }
- }
- }
-}
-
-void AV1HighbdCompMaskUpVarianceTest::RunSpeedTest(
- highbd_comp_mask_pred_func test_impl, BLOCK_SIZE bsize, int havSub) {
- int bd_ = GET_PARAM(2);
- const int w = block_size_wide[bsize];
- const int h = block_size_high[bsize];
- const int subx = havSub ? 3 : 0;
- const int suby = havSub ? 4 : 0;
-
- int wedge_types = (1 << get_wedge_bits_lookup(bsize));
- int wedge_index = wedge_types / 2;
- const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
-
- for (int i = 0; i < MAX_SB_SQUARE; ++i) {
- pred_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
- }
- for (int i = 0; i < MAX_SB_SQUARE + (8 * MAX_SB_SIZE); ++i) {
- ref_buffer_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
- }
-
- const int num_loops = 1000000000 / (w + h);
- highbd_comp_mask_pred_func funcs[2] = { &aom_highbd_comp_mask_pred_c,
- test_impl };
- double elapsed_time[2] = { 0 };
- for (int i = 0; i < 2; ++i) {
- aom_usec_timer timer;
- aom_usec_timer_start(&timer);
- aom_highbd_comp_mask_pred = funcs[i];
- int subpel_search = 2; // set to 1 to test 4-tap filter.
- for (int j = 0; j < num_loops; ++j) {
- aom_highbd_comp_mask_upsampled_pred(
- NULL, NULL, 0, 0, NULL, CONVERT_TO_BYTEPTR(comp_pred1_),
- CONVERT_TO_BYTEPTR(pred_), w, h, subx, suby, CONVERT_TO_BYTEPTR(ref_),
- MAX_SB_SIZE, mask, w, 0, bd_, subpel_search);
- }
- aom_usec_timer_mark(&timer);
- double time = static_cast<double>(aom_usec_timer_elapsed(&timer));
- elapsed_time[i] = 1000.0 * time / num_loops;
- }
- printf("CompMaskUp[%d] %3dx%-3d:%7.2f/%7.2fns", havSub, w, h, elapsed_time[0],
- elapsed_time[1]);
- printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]);
-}
-
-TEST_P(AV1HighbdCompMaskUpVarianceTest, CheckOutput) {
- // inv mask = 0, 1
- RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 0);
- RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 1);
-}
-
-TEST_P(AV1HighbdCompMaskUpVarianceTest, DISABLED_Speed) {
- RunSpeedTest(GET_PARAM(0), GET_PARAM(1), 1);
-}
-
-#if HAVE_AVX2
-INSTANTIATE_TEST_CASE_P(
- AVX2, AV1HighbdCompMaskUpVarianceTest,
- ::testing::Combine(::testing::Values(&aom_highbd_comp_mask_pred_avx2),
- ::testing::ValuesIn(kValidBlockSize),
- ::testing::Range(8, 13, 2)));
-#endif
-
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(
- SSE2, AV1HighbdCompMaskUpVarianceTest,
- ::testing::Combine(::testing::Values(&aom_highbd_comp_mask_pred_sse2),
- ::testing::ValuesIn(kValidBlockSize),
- ::testing::Range(8, 13, 2)));
-#endif
-
-#endif // ifndef aom_highbd_comp_mask_pred
-} // namespace AV1CompMaskVariance
diff --git a/third_party/aom/test/convolve_round_test.cc b/third_party/aom/test/convolve_round_test.cc
deleted file mode 100644
index 2f801e7d4..000000000
--- a/third_party/aom/test/convolve_round_test.cc
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-
-#include "config/av1_rtcd.h"
-
-#include "aom/aom_integer.h"
-#include "aom_ports/aom_timer.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-using libaom_test::ACMRandom;
-
-namespace {
-#define CONVOLVE_ROUNDING_PARAM \
- const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, \
- int h, int bits
-
-typedef void (*ConvolveRoundFunc)(CONVOLVE_ROUNDING_PARAM);
-
-typedef void (*ConvolveRoundFuncHbd)(CONVOLVE_ROUNDING_PARAM, int bd);
-
-template <ConvolveRoundFuncHbd fn>
-void highbd_convolve_rounding_8(CONVOLVE_ROUNDING_PARAM) {
- const int bd = 8;
- fn(src, src_stride, dst, dst_stride, w, h, bits, bd);
-}
-
-template <ConvolveRoundFuncHbd fn>
-void highbd_convolve_rounding_10(CONVOLVE_ROUNDING_PARAM) {
- const int bd = 10;
- fn(src, src_stride, dst, dst_stride, w, h, bits, bd);
-}
-
-template <ConvolveRoundFuncHbd fn>
-void highbd_convolve_rounding_12(CONVOLVE_ROUNDING_PARAM) {
- const int bd = 12;
- fn(src, src_stride, dst, dst_stride, w, h, bits, bd);
-}
-
-typedef enum { LOWBITDEPTH_TEST, HIGHBITDEPTH_TEST } DataPathType;
-
-using ::testing::tuple;
-
-typedef tuple<ConvolveRoundFunc, ConvolveRoundFunc, DataPathType>
- ConvolveRoundParam;
-
-const int kTestNum = 5000;
-
-class ConvolveRoundTest : public ::testing::TestWithParam<ConvolveRoundParam> {
- protected:
- ConvolveRoundTest()
- : func_ref_(GET_PARAM(0)), func_(GET_PARAM(1)), data_path_(GET_PARAM(2)) {
- }
- virtual ~ConvolveRoundTest() {}
-
- virtual void SetUp() {
- const size_t block_size = 128 * 128;
- src_ = reinterpret_cast<int32_t *>(
- aom_memalign(16, block_size * sizeof(*src_)));
- dst_ref_ = reinterpret_cast<uint16_t *>(
- aom_memalign(16, block_size * sizeof(*dst_ref_)));
- dst_ = reinterpret_cast<uint16_t *>(
- aom_memalign(16, block_size * sizeof(*dst_)));
- }
-
- virtual void TearDown() {
- aom_free(src_);
- aom_free(dst_ref_);
- aom_free(dst_);
- }
-
- void ConvolveRoundingRun() {
- int test_num = 0;
- const int src_stride = 128;
- const int dst_stride = 128;
- int bits = 13;
- uint8_t *dst = 0;
- uint8_t *dst_ref = 0;
-
- if (data_path_ == LOWBITDEPTH_TEST) {
- dst = reinterpret_cast<uint8_t *>(dst_);
- dst_ref = reinterpret_cast<uint8_t *>(dst_ref_);
- } else if (data_path_ == HIGHBITDEPTH_TEST) {
- dst = CONVERT_TO_BYTEPTR(dst_);
- dst_ref = CONVERT_TO_BYTEPTR(dst_ref_);
- } else {
- assert(0);
- }
-
- while (test_num < kTestNum) {
- int block_size = test_num % BLOCK_SIZES_ALL;
- int w = block_size_wide[block_size];
- int h = block_size_high[block_size];
-
- if (test_num % 2 == 0)
- bits -= 1;
- else
- bits += 1;
-
- GenerateBufferWithRandom(src_, src_stride, bits, w, h);
-
- func_ref_(src_, src_stride, dst_ref, dst_stride, w, h, bits);
- ASM_REGISTER_STATE_CHECK(
- func_(src_, src_stride, dst, dst_stride, w, h, bits));
-
- if (data_path_ == LOWBITDEPTH_TEST) {
- for (int r = 0; r < h; ++r) {
- for (int c = 0; c < w; ++c) {
- ASSERT_EQ(dst_ref[r * dst_stride + c], dst[r * dst_stride + c])
- << "Mismatch at r: " << r << " c: " << c << " w: " << w
- << " h: " << h << " test: " << test_num;
- }
- }
- } else {
- for (int r = 0; r < h; ++r) {
- for (int c = 0; c < w; ++c) {
- ASSERT_EQ(dst_ref_[r * dst_stride + c], dst_[r * dst_stride + c])
- << "Mismatch at r: " << r << " c: " << c << " w: " << w
- << " h: " << h << " test: " << test_num;
- }
- }
- }
-
- test_num++;
- }
- }
-
- void GenerateBufferWithRandom(int32_t *src, int src_stride, int bits, int w,
- int h) {
- int32_t number;
- for (int r = 0; r < h; ++r) {
- for (int c = 0; c < w; ++c) {
- number = static_cast<int32_t>(rand_.Rand31());
- number %= 1 << (bits + 9);
- src[r * src_stride + c] = number;
- }
- }
- }
-
- ACMRandom rand_;
- int32_t *src_;
- uint16_t *dst_ref_;
- uint16_t *dst_;
-
- ConvolveRoundFunc func_ref_;
- ConvolveRoundFunc func_;
- DataPathType data_path_;
-};
-
-TEST_P(ConvolveRoundTest, BitExactCheck) { ConvolveRoundingRun(); }
-
-using ::testing::make_tuple;
-#if HAVE_AVX2
-const ConvolveRoundParam kConvRndParamArray[] = {
- make_tuple(&av1_convolve_rounding_c, &av1_convolve_rounding_avx2,
- LOWBITDEPTH_TEST),
- make_tuple(&highbd_convolve_rounding_8<av1_highbd_convolve_rounding_c>,
- &highbd_convolve_rounding_8<av1_highbd_convolve_rounding_avx2>,
- HIGHBITDEPTH_TEST),
- make_tuple(&highbd_convolve_rounding_10<av1_highbd_convolve_rounding_c>,
- &highbd_convolve_rounding_10<av1_highbd_convolve_rounding_avx2>,
- HIGHBITDEPTH_TEST),
- make_tuple(&highbd_convolve_rounding_12<av1_highbd_convolve_rounding_c>,
- &highbd_convolve_rounding_12<av1_highbd_convolve_rounding_avx2>,
- HIGHBITDEPTH_TEST)
-};
-INSTANTIATE_TEST_CASE_P(AVX2, ConvolveRoundTest,
- ::testing::ValuesIn(kConvRndParamArray));
-#endif // HAVE_AVX2
-} // namespace
diff --git a/third_party/aom/test/convolve_test.cc b/third_party/aom/test/convolve_test.cc
deleted file mode 100644
index de3f47628..000000000
--- a/third_party/aom/test/convolve_test.cc
+++ /dev/null
@@ -1,856 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <string.h>
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/aom_filter.h"
-#include "aom_mem/aom_mem.h"
-#include "aom_ports/aom_timer.h"
-#include "aom_ports/mem.h"
-#include "av1/common/filter.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
-
-namespace {
-
-static const unsigned int kMaxDimension = MAX_SB_SIZE;
-
-typedef void (*ConvolveFunc)(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int filter_x_stride,
- const int16_t *filter_y, int filter_y_stride,
- int w, int h);
-
-struct ConvolveFunctions {
- ConvolveFunctions(ConvolveFunc copy, ConvolveFunc h8, ConvolveFunc v8, int bd)
- : copy_(copy), h8_(h8), v8_(v8), use_highbd_(bd) {}
-
- ConvolveFunc copy_;
- ConvolveFunc h8_;
- ConvolveFunc v8_;
- int use_highbd_; // 0 if high bitdepth not used, else the actual bit depth.
-};
-
-typedef ::testing::tuple<int, int, const ConvolveFunctions *> ConvolveParam;
-
-#define ALL_SIZES_64(convolve_fn) \
- make_tuple(4, 4, &convolve_fn), make_tuple(8, 4, &convolve_fn), \
- make_tuple(4, 8, &convolve_fn), make_tuple(8, 8, &convolve_fn), \
- make_tuple(16, 8, &convolve_fn), make_tuple(8, 16, &convolve_fn), \
- make_tuple(16, 16, &convolve_fn), make_tuple(32, 16, &convolve_fn), \
- make_tuple(16, 32, &convolve_fn), make_tuple(32, 32, &convolve_fn), \
- make_tuple(64, 32, &convolve_fn), make_tuple(32, 64, &convolve_fn), \
- make_tuple(64, 64, &convolve_fn)
-
-#define ALL_SIZES(convolve_fn) \
- make_tuple(128, 64, &convolve_fn), make_tuple(64, 128, &convolve_fn), \
- make_tuple(128, 128, &convolve_fn), ALL_SIZES_64(convolve_fn)
-
-// Reference 8-tap subpixel filter, slightly modified to fit into this test.
-#define AV1_FILTER_WEIGHT 128
-#define AV1_FILTER_SHIFT 7
-uint8_t clip_pixel(int x) { return x < 0 ? 0 : x > 255 ? 255 : x; }
-
-void filter_block2d_8_c(const uint8_t *src_ptr, unsigned int src_stride,
- const int16_t *HFilter, const int16_t *VFilter,
- uint8_t *dst_ptr, unsigned int dst_stride,
- unsigned int output_width, unsigned int output_height) {
- // Between passes, we use an intermediate buffer whose height is extended to
- // have enough horizontally filtered values as input for the vertical pass.
- // This buffer is allocated to be big enough for the largest block type we
- // support.
- const int kInterp_Extend = 4;
- const unsigned int intermediate_height =
- (kInterp_Extend - 1) + output_height + kInterp_Extend;
- unsigned int i, j;
-
- assert(intermediate_height > 7);
-
- // Size of intermediate_buffer is max_intermediate_height * filter_max_width,
- // where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
- // + kInterp_Extend
- // = 3 + 16 + 4
- // = 23
- // and filter_max_width = 16
- //
- uint8_t intermediate_buffer[(kMaxDimension + 8) * kMaxDimension];
- const int intermediate_next_stride =
- 1 - static_cast<int>(intermediate_height * output_width);
-
- // Horizontal pass (src -> transposed intermediate).
- uint8_t *output_ptr = intermediate_buffer;
- const int src_next_row_stride = src_stride - output_width;
- src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
- for (i = 0; i < intermediate_height; ++i) {
- for (j = 0; j < output_width; ++j) {
- // Apply filter...
- const int temp = (src_ptr[0] * HFilter[0]) + (src_ptr[1] * HFilter[1]) +
- (src_ptr[2] * HFilter[2]) + (src_ptr[3] * HFilter[3]) +
- (src_ptr[4] * HFilter[4]) + (src_ptr[5] * HFilter[5]) +
- (src_ptr[6] * HFilter[6]) + (src_ptr[7] * HFilter[7]) +
- (AV1_FILTER_WEIGHT >> 1); // Rounding
-
- // Normalize back to 0-255...
- *output_ptr = clip_pixel(temp >> AV1_FILTER_SHIFT);
- ++src_ptr;
- output_ptr += intermediate_height;
- }
- src_ptr += src_next_row_stride;
- output_ptr += intermediate_next_stride;
- }
-
- // Vertical pass (transposed intermediate -> dst).
- src_ptr = intermediate_buffer;
- const int dst_next_row_stride = dst_stride - output_width;
- for (i = 0; i < output_height; ++i) {
- for (j = 0; j < output_width; ++j) {
- // Apply filter...
- const int temp = (src_ptr[0] * VFilter[0]) + (src_ptr[1] * VFilter[1]) +
- (src_ptr[2] * VFilter[2]) + (src_ptr[3] * VFilter[3]) +
- (src_ptr[4] * VFilter[4]) + (src_ptr[5] * VFilter[5]) +
- (src_ptr[6] * VFilter[6]) + (src_ptr[7] * VFilter[7]) +
- (AV1_FILTER_WEIGHT >> 1); // Rounding
-
- // Normalize back to 0-255...
- *dst_ptr++ = clip_pixel(temp >> AV1_FILTER_SHIFT);
- src_ptr += intermediate_height;
- }
- src_ptr += intermediate_next_stride;
- dst_ptr += dst_next_row_stride;
- }
-}
-
-void block2d_average_c(uint8_t *src, unsigned int src_stride,
- uint8_t *output_ptr, unsigned int output_stride,
- unsigned int output_width, unsigned int output_height) {
- unsigned int i, j;
- for (i = 0; i < output_height; ++i) {
- for (j = 0; j < output_width; ++j) {
- output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;
- }
- output_ptr += output_stride;
- }
-}
-
-void filter_average_block2d_8_c(const uint8_t *src_ptr,
- const unsigned int src_stride,
- const int16_t *HFilter, const int16_t *VFilter,
- uint8_t *dst_ptr, unsigned int dst_stride,
- unsigned int output_width,
- unsigned int output_height) {
- uint8_t tmp[kMaxDimension * kMaxDimension];
-
- assert(output_width <= kMaxDimension);
- assert(output_height <= kMaxDimension);
- filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, kMaxDimension,
- output_width, output_height);
- block2d_average_c(tmp, kMaxDimension, dst_ptr, dst_stride, output_width,
- output_height);
-}
-
-void highbd_filter_block2d_8_c(const uint16_t *src_ptr,
- const unsigned int src_stride,
- const int16_t *HFilter, const int16_t *VFilter,
- uint16_t *dst_ptr, unsigned int dst_stride,
- unsigned int output_width,
- unsigned int output_height, int bd) {
- // Between passes, we use an intermediate buffer whose height is extended to
- // have enough horizontally filtered values as input for the vertical pass.
- // This buffer is allocated to be big enough for the largest block type we
- // support.
- const int kInterp_Extend = 4;
- const unsigned int intermediate_height =
- (kInterp_Extend - 1) + output_height + kInterp_Extend;
-
- /* Size of intermediate_buffer is max_intermediate_height * filter_max_width,
- * where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
- * + kInterp_Extend
- * = 3 + 16 + 4
- * = 23
- * and filter_max_width = 16
- */
- uint16_t intermediate_buffer[(kMaxDimension + 8) * kMaxDimension] = { 0 };
- const int intermediate_next_stride =
- 1 - static_cast<int>(intermediate_height * output_width);
-
- // Horizontal pass (src -> transposed intermediate).
- {
- uint16_t *output_ptr = intermediate_buffer;
- const int src_next_row_stride = src_stride - output_width;
- unsigned int i, j;
- src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
- for (i = 0; i < intermediate_height; ++i) {
- for (j = 0; j < output_width; ++j) {
- // Apply filter...
- const int temp = (src_ptr[0] * HFilter[0]) + (src_ptr[1] * HFilter[1]) +
- (src_ptr[2] * HFilter[2]) + (src_ptr[3] * HFilter[3]) +
- (src_ptr[4] * HFilter[4]) + (src_ptr[5] * HFilter[5]) +
- (src_ptr[6] * HFilter[6]) + (src_ptr[7] * HFilter[7]) +
- (AV1_FILTER_WEIGHT >> 1); // Rounding
-
- // Normalize back to 0-255...
- *output_ptr = clip_pixel_highbd(temp >> AV1_FILTER_SHIFT, bd);
- ++src_ptr;
- output_ptr += intermediate_height;
- }
- src_ptr += src_next_row_stride;
- output_ptr += intermediate_next_stride;
- }
- }
-
- // Vertical pass (transposed intermediate -> dst).
- {
- const uint16_t *interm_ptr = intermediate_buffer;
- const int dst_next_row_stride = dst_stride - output_width;
- unsigned int i, j;
- for (i = 0; i < output_height; ++i) {
- for (j = 0; j < output_width; ++j) {
- // Apply filter...
- const int temp =
- (interm_ptr[0] * VFilter[0]) + (interm_ptr[1] * VFilter[1]) +
- (interm_ptr[2] * VFilter[2]) + (interm_ptr[3] * VFilter[3]) +
- (interm_ptr[4] * VFilter[4]) + (interm_ptr[5] * VFilter[5]) +
- (interm_ptr[6] * VFilter[6]) + (interm_ptr[7] * VFilter[7]) +
- (AV1_FILTER_WEIGHT >> 1); // Rounding
-
- // Normalize back to 0-255...
- *dst_ptr++ = clip_pixel_highbd(temp >> AV1_FILTER_SHIFT, bd);
- interm_ptr += intermediate_height;
- }
- interm_ptr += intermediate_next_stride;
- dst_ptr += dst_next_row_stride;
- }
- }
-}
-
-void highbd_block2d_average_c(uint16_t *src, unsigned int src_stride,
- uint16_t *output_ptr, unsigned int output_stride,
- unsigned int output_width,
- unsigned int output_height) {
- unsigned int i, j;
- for (i = 0; i < output_height; ++i) {
- for (j = 0; j < output_width; ++j) {
- output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;
- }
- output_ptr += output_stride;
- }
-}
-
-void highbd_filter_average_block2d_8_c(
- const uint16_t *src_ptr, unsigned int src_stride, const int16_t *HFilter,
- const int16_t *VFilter, uint16_t *dst_ptr, unsigned int dst_stride,
- unsigned int output_width, unsigned int output_height, int bd) {
- uint16_t tmp[kMaxDimension * kMaxDimension];
-
- assert(output_width <= kMaxDimension);
- assert(output_height <= kMaxDimension);
- highbd_filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp,
- kMaxDimension, output_width, output_height, bd);
- highbd_block2d_average_c(tmp, kMaxDimension, dst_ptr, dst_stride,
- output_width, output_height);
-}
-
-class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
- public:
- static void SetUpTestCase() {
- // Force input_ to be unaligned, output to be 16 byte aligned.
- input_ = reinterpret_cast<uint8_t *>(
- aom_memalign(kDataAlignment, kInputBufferSize + 1)) +
- 1;
- output_ = reinterpret_cast<uint8_t *>(
- aom_memalign(kDataAlignment, kOutputBufferSize));
- output_ref_ = reinterpret_cast<uint8_t *>(
- aom_memalign(kDataAlignment, kOutputBufferSize));
- input16_ = reinterpret_cast<uint16_t *>(aom_memalign(
- kDataAlignment, (kInputBufferSize + 1) * sizeof(uint16_t))) +
- 1;
- output16_ = reinterpret_cast<uint16_t *>(
- aom_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t)));
- output16_ref_ = reinterpret_cast<uint16_t *>(
- aom_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t)));
- }
-
- virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- static void TearDownTestCase() {
- aom_free(input_ - 1);
- input_ = NULL;
- aom_free(output_);
- output_ = NULL;
- aom_free(output_ref_);
- output_ref_ = NULL;
- aom_free(input16_ - 1);
- input16_ = NULL;
- aom_free(output16_);
- output16_ = NULL;
- aom_free(output16_ref_);
- output16_ref_ = NULL;
- }
-
- protected:
- static const int kDataAlignment = 16;
- static const int kOuterBlockSize = 4 * kMaxDimension;
- static const int kInputStride = kOuterBlockSize;
- static const int kOutputStride = kOuterBlockSize;
- static const int kInputBufferSize = kOuterBlockSize * kOuterBlockSize;
- static const int kOutputBufferSize = kOuterBlockSize * kOuterBlockSize;
-
- int Width() const { return GET_PARAM(0); }
- int Height() const { return GET_PARAM(1); }
- int BorderLeft() const {
- const int center = (kOuterBlockSize - Width()) / 2;
- return (center + (kDataAlignment - 1)) & ~(kDataAlignment - 1);
- }
- int BorderTop() const { return (kOuterBlockSize - Height()) / 2; }
-
- bool IsIndexInBorder(int i) {
- return (i < BorderTop() * kOuterBlockSize ||
- i >= (BorderTop() + Height()) * kOuterBlockSize ||
- i % kOuterBlockSize < BorderLeft() ||
- i % kOuterBlockSize >= (BorderLeft() + Width()));
- }
-
- virtual void SetUp() {
- UUT_ = GET_PARAM(2);
- if (UUT_->use_highbd_ != 0)
- mask_ = (1 << UUT_->use_highbd_) - 1;
- else
- mask_ = 255;
- /* Set up guard blocks for an inner block centered in the outer block */
- for (int i = 0; i < kOutputBufferSize; ++i) {
- if (IsIndexInBorder(i)) {
- output_[i] = 255;
- output16_[i] = mask_;
- } else {
- output_[i] = 0;
- output16_[i] = 0;
- }
- }
-
- ::libaom_test::ACMRandom prng;
- for (int i = 0; i < kInputBufferSize; ++i) {
- if (i & 1) {
- input_[i] = 255;
- input16_[i] = mask_;
- } else {
- input_[i] = prng.Rand8Extremes();
- input16_[i] = prng.Rand16() & mask_;
- }
- }
- }
-
- void SetConstantInput(int value) {
- memset(input_, value, kInputBufferSize);
- aom_memset16(input16_, value, kInputBufferSize);
- }
-
- void CopyOutputToRef() {
- memcpy(output_ref_, output_, kOutputBufferSize);
- // Copy 16-bit pixels values. The effective number of bytes is double.
- memcpy(output16_ref_, output16_, sizeof(output16_[0]) * kOutputBufferSize);
- }
-
- void CheckGuardBlocks() {
- for (int i = 0; i < kOutputBufferSize; ++i) {
- if (IsIndexInBorder(i)) {
- EXPECT_EQ(255, output_[i]);
- }
- }
- }
-
- uint8_t *input() const {
- const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
- if (UUT_->use_highbd_ == 0) {
- return input_ + offset;
- } else {
- return CONVERT_TO_BYTEPTR(input16_) + offset;
- }
- }
-
- uint8_t *output() const {
- const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
- if (UUT_->use_highbd_ == 0) {
- return output_ + offset;
- } else {
- return CONVERT_TO_BYTEPTR(output16_) + offset;
- }
- }
-
- uint8_t *output_ref() const {
- const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
- if (UUT_->use_highbd_ == 0) {
- return output_ref_ + offset;
- } else {
- return CONVERT_TO_BYTEPTR(output16_ref_) + offset;
- }
- }
-
- uint16_t lookup(uint8_t *list, int index) const {
- if (UUT_->use_highbd_ == 0) {
- return list[index];
- } else {
- return CONVERT_TO_SHORTPTR(list)[index];
- }
- }
-
- void assign_val(uint8_t *list, int index, uint16_t val) const {
- if (UUT_->use_highbd_ == 0) {
- list[index] = (uint8_t)val;
- } else {
- CONVERT_TO_SHORTPTR(list)[index] = val;
- }
- }
-
- void wrapper_filter_average_block2d_8_c(
- const uint8_t *src_ptr, unsigned int src_stride, const int16_t *HFilter,
- const int16_t *VFilter, uint8_t *dst_ptr, unsigned int dst_stride,
- unsigned int output_width, unsigned int output_height) {
- if (UUT_->use_highbd_ == 0) {
- filter_average_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, dst_ptr,
- dst_stride, output_width, output_height);
- } else {
- highbd_filter_average_block2d_8_c(
- CONVERT_TO_SHORTPTR(src_ptr), src_stride, HFilter, VFilter,
- CONVERT_TO_SHORTPTR(dst_ptr), dst_stride, output_width, output_height,
- UUT_->use_highbd_);
- }
- }
-
- void wrapper_filter_block2d_8_c(
- const uint8_t *src_ptr, unsigned int src_stride, const int16_t *HFilter,
- const int16_t *VFilter, uint8_t *dst_ptr, unsigned int dst_stride,
- unsigned int output_width, unsigned int output_height) {
- if (UUT_->use_highbd_ == 0) {
- filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, dst_ptr,
- dst_stride, output_width, output_height);
- } else {
- highbd_filter_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr), src_stride,
- HFilter, VFilter, CONVERT_TO_SHORTPTR(dst_ptr),
- dst_stride, output_width, output_height,
- UUT_->use_highbd_);
- }
- }
-
- const ConvolveFunctions *UUT_;
- static uint8_t *input_;
- static uint8_t *output_;
- static uint8_t *output_ref_;
- static uint16_t *input16_;
- static uint16_t *output16_;
- static uint16_t *output16_ref_;
- int mask_;
-};
-
-uint8_t *ConvolveTest::input_ = NULL;
-uint8_t *ConvolveTest::output_ = NULL;
-uint8_t *ConvolveTest::output_ref_ = NULL;
-uint16_t *ConvolveTest::input16_ = NULL;
-uint16_t *ConvolveTest::output16_ = NULL;
-uint16_t *ConvolveTest::output16_ref_ = NULL;
-
-TEST_P(ConvolveTest, GuardBlocks) { CheckGuardBlocks(); }
-
-TEST_P(ConvolveTest, Copy) {
- uint8_t *const in = input();
- uint8_t *const out = output();
-
- ASM_REGISTER_STATE_CHECK(UUT_->copy_(in, kInputStride, out, kOutputStride,
- NULL, 0, NULL, 0, Width(), Height()));
-
- CheckGuardBlocks();
-
- for (int y = 0; y < Height(); ++y)
- for (int x = 0; x < Width(); ++x)
- ASSERT_EQ(lookup(out, y * kOutputStride + x),
- lookup(in, y * kInputStride + x))
- << "(" << x << "," << y << ")";
-}
-
-const int kNumFilterBanks = SWITCHABLE_FILTERS;
-const int kNumFilters = 16;
-
-TEST(ConvolveTest, FiltersWontSaturateWhenAddedPairwise) {
- for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
- const InterpFilter filter = (InterpFilter)filter_bank;
- const InterpKernel *filters =
- (const InterpKernel *)av1_get_interp_filter_kernel(filter);
- const InterpFilterParams *filter_params =
- av1_get_interp_filter_params_with_block_size(filter, 8);
- if (filter_params->taps != SUBPEL_TAPS) continue;
- for (int i = 0; i < kNumFilters; i++) {
- const int p0 = filters[i][0] + filters[i][1];
- const int p1 = filters[i][2] + filters[i][3];
- const int p2 = filters[i][4] + filters[i][5];
- const int p3 = filters[i][6] + filters[i][7];
- EXPECT_LE(p0, 128);
- EXPECT_LE(p1, 128);
- EXPECT_LE(p2, 128);
- EXPECT_LE(p3, 128);
- EXPECT_LE(p0 + p3, 128);
- EXPECT_LE(p0 + p3 + p1, 128);
- EXPECT_LE(p0 + p3 + p1 + p2, 128);
- EXPECT_EQ(p0 + p1 + p2 + p3, 128);
- }
- }
-}
-
-const int16_t kInvalidFilter[8] = { 0 };
-
-TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) {
- uint8_t *const in = input();
- uint8_t *const out = output();
- uint8_t ref8[kOutputStride * kMaxDimension];
- uint16_t ref16[kOutputStride * kMaxDimension];
- uint8_t *ref;
- if (UUT_->use_highbd_ == 0) {
- ref = ref8;
- } else {
- ref = CONVERT_TO_BYTEPTR(ref16);
- }
-
- for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
- const InterpFilter filter = (InterpFilter)filter_bank;
- const InterpKernel *filters =
- (const InterpKernel *)av1_get_interp_filter_kernel(filter);
- const InterpFilterParams *filter_params =
- av1_get_interp_filter_params_with_block_size(filter, 8);
- if (filter_params->taps != SUBPEL_TAPS) continue;
-
- for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
- for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
- wrapper_filter_block2d_8_c(in, kInputStride, filters[filter_x],
- filters[filter_y], ref, kOutputStride,
- Width(), Height());
-
- if (filter_x && filter_y)
- continue;
- else if (filter_y)
- ASM_REGISTER_STATE_CHECK(
- UUT_->v8_(in, kInputStride, out, kOutputStride, kInvalidFilter,
- 16, filters[filter_y], 16, Width(), Height()));
- else if (filter_x)
- ASM_REGISTER_STATE_CHECK(
- UUT_->h8_(in, kInputStride, out, kOutputStride, filters[filter_x],
- 16, kInvalidFilter, 16, Width(), Height()));
- else
- ASM_REGISTER_STATE_CHECK(
- UUT_->copy_(in, kInputStride, out, kOutputStride, kInvalidFilter,
- 0, kInvalidFilter, 0, Width(), Height()));
-
- CheckGuardBlocks();
-
- for (int y = 0; y < Height(); ++y)
- for (int x = 0; x < Width(); ++x)
- ASSERT_EQ(lookup(ref, y * kOutputStride + x),
- lookup(out, y * kOutputStride + x))
- << "mismatch at (" << x << "," << y << "), "
- << "filters (" << filter_bank << "," << filter_x << ","
- << filter_y << ")";
- }
- }
- }
-}
-
-TEST_P(ConvolveTest, FilterExtremes) {
- uint8_t *const in = input();
- uint8_t *const out = output();
- uint8_t ref8[kOutputStride * kMaxDimension];
- uint16_t ref16[kOutputStride * kMaxDimension];
- uint8_t *ref;
- if (UUT_->use_highbd_ == 0) {
- ref = ref8;
- } else {
- ref = CONVERT_TO_BYTEPTR(ref16);
- }
-
- // Populate ref and out with some random data
- ::libaom_test::ACMRandom prng;
- for (int y = 0; y < Height(); ++y) {
- for (int x = 0; x < Width(); ++x) {
- uint16_t r;
- if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) {
- r = prng.Rand8Extremes();
- } else {
- r = prng.Rand16() & mask_;
- }
- assign_val(out, y * kOutputStride + x, r);
- assign_val(ref, y * kOutputStride + x, r);
- }
- }
-
- for (int axis = 0; axis < 2; axis++) {
- int seed_val = 0;
- while (seed_val < 256) {
- for (int y = 0; y < 8; ++y) {
- for (int x = 0; x < 8; ++x) {
- assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1,
- ((seed_val >> (axis ? y : x)) & 1) * mask_);
- if (axis) seed_val++;
- }
- if (axis)
- seed_val -= 8;
- else
- seed_val++;
- }
- if (axis) seed_val += 8;
-
- for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
- const InterpFilter filter = (InterpFilter)filter_bank;
- const InterpKernel *filters =
- (const InterpKernel *)av1_get_interp_filter_kernel(filter);
- const InterpFilterParams *filter_params =
- av1_get_interp_filter_params_with_block_size(filter, 8);
- if (filter_params->taps != SUBPEL_TAPS) continue;
- for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
- for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
- wrapper_filter_block2d_8_c(in, kInputStride, filters[filter_x],
- filters[filter_y], ref, kOutputStride,
- Width(), Height());
- if (filter_x && filter_y)
- continue;
- else if (filter_y)
- ASM_REGISTER_STATE_CHECK(UUT_->v8_(
- in, kInputStride, out, kOutputStride, kInvalidFilter, 16,
- filters[filter_y], 16, Width(), Height()));
- else if (filter_x)
- ASM_REGISTER_STATE_CHECK(UUT_->h8_(
- in, kInputStride, out, kOutputStride, filters[filter_x], 16,
- kInvalidFilter, 16, Width(), Height()));
- else
- ASM_REGISTER_STATE_CHECK(UUT_->copy_(
- in, kInputStride, out, kOutputStride, kInvalidFilter, 0,
- kInvalidFilter, 0, Width(), Height()));
-
- for (int y = 0; y < Height(); ++y)
- for (int x = 0; x < Width(); ++x)
- ASSERT_EQ(lookup(ref, y * kOutputStride + x),
- lookup(out, y * kOutputStride + x))
- << "mismatch at (" << x << "," << y << "), "
- << "filters (" << filter_bank << "," << filter_x << ","
- << filter_y << ")";
- }
- }
- }
- }
- }
-}
-
-TEST_P(ConvolveTest, DISABLED_Copy_Speed) {
- const uint8_t *const in = input();
- uint8_t *const out = output();
- const int kNumTests = 5000000;
- const int width = Width();
- const int height = Height();
- aom_usec_timer timer;
-
- aom_usec_timer_start(&timer);
- for (int n = 0; n < kNumTests; ++n) {
- UUT_->copy_(in, kInputStride, out, kOutputStride, NULL, 0, NULL, 0, width,
- height);
- }
- aom_usec_timer_mark(&timer);
-
- const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
- printf("convolve_copy_%dx%d_%d: %d us\n", width, height,
- UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time);
-}
-
-TEST_P(ConvolveTest, DISABLED_Speed) {
- uint8_t *const in = input();
- uint8_t *const out = output();
- uint8_t ref8[kOutputStride * kMaxDimension];
- uint16_t ref16[kOutputStride * kMaxDimension];
- uint8_t *ref;
- if (UUT_->use_highbd_ == 0) {
- ref = ref8;
- } else {
- ref = CONVERT_TO_BYTEPTR(ref16);
- }
-
- // Populate ref and out with some random data
- ::libaom_test::ACMRandom prng;
- for (int y = 0; y < Height(); ++y) {
- for (int x = 0; x < Width(); ++x) {
- uint16_t r;
- if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) {
- r = prng.Rand8Extremes();
- } else {
- r = prng.Rand16() & mask_;
- }
- assign_val(out, y * kOutputStride + x, r);
- assign_val(ref, y * kOutputStride + x, r);
- }
- }
-
- const InterpFilter filter = (InterpFilter)1;
- const InterpKernel *filters =
- (const InterpKernel *)av1_get_interp_filter_kernel(filter);
- wrapper_filter_average_block2d_8_c(in, kInputStride, filters[1], filters[1],
- out, kOutputStride, Width(), Height());
-
- aom_usec_timer timer;
- int tests_num = 1000;
-
- aom_usec_timer_start(&timer);
- while (tests_num > 0) {
- for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
- const InterpFilter filter = (InterpFilter)filter_bank;
- const InterpKernel *filters =
- (const InterpKernel *)av1_get_interp_filter_kernel(filter);
- const InterpFilterParams *filter_params =
- av1_get_interp_filter_params_with_block_size(filter, 8);
- if (filter_params->taps != SUBPEL_TAPS) continue;
-
- for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
- for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
- if (filter_x && filter_y) continue;
- if (filter_y)
- ASM_REGISTER_STATE_CHECK(
- UUT_->v8_(in, kInputStride, out, kOutputStride, kInvalidFilter,
- 16, filters[filter_y], 16, Width(), Height()));
- else if (filter_x)
- ASM_REGISTER_STATE_CHECK(UUT_->h8_(
- in, kInputStride, out, kOutputStride, filters[filter_x], 16,
- kInvalidFilter, 16, Width(), Height()));
- }
- }
- }
- tests_num--;
- }
- aom_usec_timer_mark(&timer);
-
- const int elapsed_time =
- static_cast<int>(aom_usec_timer_elapsed(&timer) / 1000);
- printf("%dx%d (bitdepth %d) time: %5d ms\n", Width(), Height(),
- UUT_->use_highbd_, elapsed_time);
-}
-
-using ::testing::make_tuple;
-
-#define WRAP(func, bd) \
- static void wrap_##func##_##bd( \
- const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \
- ptrdiff_t dst_stride, const int16_t *filter_x, int filter_x_stride, \
- const int16_t *filter_y, int filter_y_stride, int w, int h) { \
- aom_highbd_##func(src, src_stride, dst, dst_stride, filter_x, \
- filter_x_stride, filter_y, filter_y_stride, w, h, bd); \
- }
-#if HAVE_SSE2 && ARCH_X86_64
-WRAP(convolve_copy_sse2, 8)
-WRAP(convolve_copy_sse2, 10)
-WRAP(convolve_copy_sse2, 12)
-WRAP(convolve8_horiz_sse2, 8)
-WRAP(convolve8_vert_sse2, 8)
-WRAP(convolve8_horiz_sse2, 10)
-WRAP(convolve8_vert_sse2, 10)
-WRAP(convolve8_horiz_sse2, 12)
-WRAP(convolve8_vert_sse2, 12)
-#endif // HAVE_SSE2 && ARCH_X86_64
-
-WRAP(convolve_copy_c, 8)
-WRAP(convolve8_horiz_c, 8)
-WRAP(convolve8_vert_c, 8)
-WRAP(convolve_copy_c, 10)
-WRAP(convolve8_horiz_c, 10)
-WRAP(convolve8_vert_c, 10)
-WRAP(convolve_copy_c, 12)
-WRAP(convolve8_horiz_c, 12)
-WRAP(convolve8_vert_c, 12)
-
-#if HAVE_AVX2
-WRAP(convolve_copy_avx2, 8)
-WRAP(convolve8_horiz_avx2, 8)
-WRAP(convolve8_vert_avx2, 8)
-
-WRAP(convolve_copy_avx2, 10)
-WRAP(convolve8_horiz_avx2, 10)
-WRAP(convolve8_vert_avx2, 10)
-
-WRAP(convolve_copy_avx2, 12)
-WRAP(convolve8_horiz_avx2, 12)
-WRAP(convolve8_vert_avx2, 12)
-#endif // HAVE_AVX2
-
-#undef WRAP
-
-const ConvolveFunctions convolve8_c(wrap_convolve_copy_c_8,
- wrap_convolve8_horiz_c_8,
- wrap_convolve8_vert_c_8, 8);
-const ConvolveFunctions convolve10_c(wrap_convolve_copy_c_10,
- wrap_convolve8_horiz_c_10,
- wrap_convolve8_vert_c_10, 10);
-const ConvolveFunctions convolve12_c(wrap_convolve_copy_c_12,
- wrap_convolve8_horiz_c_12,
- wrap_convolve8_vert_c_12, 12);
-const ConvolveParam kArrayConvolve_c[] = {
- ALL_SIZES(convolve8_c), ALL_SIZES(convolve10_c), ALL_SIZES(convolve12_c)
-};
-
-INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::ValuesIn(kArrayConvolve_c));
-
-#if HAVE_SSE2 && ARCH_X86_64
-const ConvolveFunctions convolve8_sse2(wrap_convolve_copy_sse2_8,
- wrap_convolve8_horiz_sse2_8,
- wrap_convolve8_vert_sse2_8, 8);
-const ConvolveFunctions convolve10_sse2(wrap_convolve_copy_sse2_10,
- wrap_convolve8_horiz_sse2_10,
- wrap_convolve8_vert_sse2_10, 10);
-const ConvolveFunctions convolve12_sse2(wrap_convolve_copy_sse2_12,
- wrap_convolve8_horiz_sse2_12,
- wrap_convolve8_vert_sse2_12, 12);
-const ConvolveParam kArrayConvolve_sse2[] = { ALL_SIZES(convolve8_sse2),
- ALL_SIZES(convolve10_sse2),
- ALL_SIZES(convolve12_sse2) };
-INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest,
- ::testing::ValuesIn(kArrayConvolve_sse2));
-#endif
-
-#if HAVE_SSSE3
-const ConvolveFunctions convolve8_ssse3(aom_convolve_copy_c,
- aom_convolve8_horiz_ssse3,
- aom_convolve8_vert_ssse3, 0);
-
-const ConvolveParam kArrayConvolve8_ssse3[] = { ALL_SIZES(convolve8_ssse3) };
-INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest,
- ::testing::ValuesIn(kArrayConvolve8_ssse3));
-#endif
-
-#if HAVE_AVX2
-const ConvolveFunctions convolve8_avx2(aom_convolve_copy_c,
- aom_convolve8_horiz_avx2,
- aom_convolve8_vert_avx2, 0);
-
-const ConvolveFunctions wrap_convolve8_avx2(wrap_convolve_copy_avx2_8,
- wrap_convolve8_horiz_avx2_8,
- wrap_convolve8_vert_avx2_8, 8);
-const ConvolveFunctions wrap_convolve10_avx2(wrap_convolve_copy_avx2_10,
- wrap_convolve8_horiz_avx2_10,
- wrap_convolve8_vert_avx2_10, 10);
-const ConvolveFunctions wrap_convolve12_avx2(wrap_convolve_copy_avx2_12,
- wrap_convolve8_horiz_avx2_12,
- wrap_convolve8_vert_avx2_12, 12);
-const ConvolveParam kArray_Convolve8_avx2[] = {
- ALL_SIZES_64(wrap_convolve8_avx2), ALL_SIZES_64(wrap_convolve10_avx2),
- ALL_SIZES_64(wrap_convolve12_avx2), ALL_SIZES(convolve8_avx2)
-};
-INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest,
- ::testing::ValuesIn(kArray_Convolve8_avx2));
-#endif // HAVE_AVX2
-
-} // namespace
diff --git a/third_party/aom/test/corner_match_test.cc b/third_party/aom/test/corner_match_test.cc
deleted file mode 100644
index 58e3139c5..000000000
--- a/third_party/aom/test/corner_match_test.cc
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#include "config/av1_rtcd.h"
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-#include "test/acm_random.h"
-#include "test/util.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-
-#include "av1/encoder/corner_match.h"
-
-namespace test_libaom {
-
-namespace AV1CornerMatch {
-
-using libaom_test::ACMRandom;
-
-using ::testing::make_tuple;
-using ::testing::tuple;
-typedef tuple<int> CornerMatchParam;
-
-class AV1CornerMatchTest : public ::testing::TestWithParam<CornerMatchParam> {
- public:
- virtual ~AV1CornerMatchTest();
- virtual void SetUp();
-
- virtual void TearDown();
-
- protected:
- void RunCheckOutput();
-
- libaom_test::ACMRandom rnd_;
-};
-
-AV1CornerMatchTest::~AV1CornerMatchTest() {}
-void AV1CornerMatchTest::SetUp() { rnd_.Reset(ACMRandom::DeterministicSeed()); }
-void AV1CornerMatchTest::TearDown() { libaom_test::ClearSystemState(); }
-
-void AV1CornerMatchTest::RunCheckOutput() {
- const int w = 128, h = 128;
- const int num_iters = 10000;
- int i, j;
-
- uint8_t *input1 = new uint8_t[w * h];
- uint8_t *input2 = new uint8_t[w * h];
-
- // Test the two extreme cases:
- // i) Random data, should have correlation close to 0
- // ii) Linearly related data + noise, should have correlation close to 1
- int mode = GET_PARAM(0);
- if (mode == 0) {
- for (i = 0; i < h; ++i)
- for (j = 0; j < w; ++j) {
- input1[i * w + j] = rnd_.Rand8();
- input2[i * w + j] = rnd_.Rand8();
- }
- } else if (mode == 1) {
- for (i = 0; i < h; ++i)
- for (j = 0; j < w; ++j) {
- int v = rnd_.Rand8();
- input1[i * w + j] = v;
- input2[i * w + j] = (v / 2) + (rnd_.Rand8() & 15);
- }
- }
-
- for (i = 0; i < num_iters; ++i) {
- int x1 = MATCH_SZ_BY2 + rnd_.PseudoUniform(w - 2 * MATCH_SZ_BY2);
- int y1 = MATCH_SZ_BY2 + rnd_.PseudoUniform(h - 2 * MATCH_SZ_BY2);
- int x2 = MATCH_SZ_BY2 + rnd_.PseudoUniform(w - 2 * MATCH_SZ_BY2);
- int y2 = MATCH_SZ_BY2 + rnd_.PseudoUniform(h - 2 * MATCH_SZ_BY2);
-
- double res_c =
- compute_cross_correlation_c(input1, w, x1, y1, input2, w, x2, y2);
- double res_sse4 =
- compute_cross_correlation_sse4_1(input1, w, x1, y1, input2, w, x2, y2);
-
- ASSERT_EQ(res_sse4, res_c);
- }
-
- delete[] input1;
- delete[] input2;
-}
-
-TEST_P(AV1CornerMatchTest, CheckOutput) { RunCheckOutput(); }
-
-INSTANTIATE_TEST_CASE_P(SSE4_1, AV1CornerMatchTest,
- ::testing::Values(make_tuple(0), make_tuple(1)));
-
-} // namespace AV1CornerMatch
-
-} // namespace test_libaom
diff --git a/third_party/aom/test/cpu_speed_test.cc b/third_party/aom/test/cpu_speed_test.cc
deleted file mode 100644
index 8ea3e6965..000000000
--- a/third_party/aom/test/cpu_speed_test.cc
+++ /dev/null
@@ -1,180 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-#include "test/codec_factory.h"
-#include "test/encode_test_driver.h"
-#include "test/i420_video_source.h"
-#include "test/util.h"
-#include "test/y4m_video_source.h"
-
-namespace {
-
-const int kMaxPSNR = 100;
-
-class CpuSpeedTest
- : public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int>,
- public ::libaom_test::EncoderTest {
- protected:
- CpuSpeedTest()
- : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)),
- set_cpu_used_(GET_PARAM(2)), min_psnr_(kMaxPSNR),
- tune_content_(AOM_CONTENT_DEFAULT) {}
- virtual ~CpuSpeedTest() {}
-
- virtual void SetUp() {
- InitializeConfig();
- SetMode(encoding_mode_);
- if (encoding_mode_ != ::libaom_test::kRealTime) {
- cfg_.g_lag_in_frames = 25;
- cfg_.rc_end_usage = AOM_VBR;
- } else {
- cfg_.g_lag_in_frames = 0;
- cfg_.rc_end_usage = AOM_CBR;
- }
- }
-
- virtual void BeginPassHook(unsigned int /*pass*/) { min_psnr_ = kMaxPSNR; }
-
- virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
- ::libaom_test::Encoder *encoder) {
- if (video->frame() == 1) {
- encoder->Control(AOME_SET_CPUUSED, set_cpu_used_);
- encoder->Control(AV1E_SET_TUNE_CONTENT, tune_content_);
- if (encoding_mode_ != ::libaom_test::kRealTime) {
- encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
- encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7);
- encoder->Control(AOME_SET_ARNR_STRENGTH, 5);
- }
- }
- }
-
- virtual void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) {
- if (pkt->data.psnr.psnr[0] < min_psnr_) min_psnr_ = pkt->data.psnr.psnr[0];
- }
-
- void TestQ0();
- void TestScreencastQ0();
- void TestTuneScreen();
- void TestEncodeHighBitrate();
- void TestLowBitrate();
-
- ::libaom_test::TestMode encoding_mode_;
- int set_cpu_used_;
- double min_psnr_;
- int tune_content_;
-};
-
-void CpuSpeedTest::TestQ0() {
- // Validate that this non multiple of 64 wide clip encodes and decodes
- // without a mismatch when passing in a very low max q. This pushes
- // the encoder to producing lots of big partitions which will likely
- // extend into the border and test the border condition.
- cfg_.rc_2pass_vbr_minsection_pct = 5;
- cfg_.rc_2pass_vbr_maxsection_pct = 2000;
- cfg_.rc_target_bitrate = 400;
- cfg_.rc_max_quantizer = 0;
- cfg_.rc_min_quantizer = 0;
-
- ::libaom_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
- 10);
-
- init_flags_ = AOM_CODEC_USE_PSNR;
-
- ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
- EXPECT_GE(min_psnr_, kMaxPSNR);
-}
-
-void CpuSpeedTest::TestScreencastQ0() {
- ::libaom_test::Y4mVideoSource video("screendata.y4m", 0, 3);
- cfg_.g_timebase = video.timebase();
- cfg_.rc_2pass_vbr_minsection_pct = 5;
- cfg_.rc_2pass_vbr_maxsection_pct = 2000;
- cfg_.rc_target_bitrate = 400;
- cfg_.rc_max_quantizer = 0;
- cfg_.rc_min_quantizer = 0;
-
- init_flags_ = AOM_CODEC_USE_PSNR;
-
- ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
- EXPECT_GE(min_psnr_, kMaxPSNR);
-}
-
-void CpuSpeedTest::TestTuneScreen() {
- ::libaom_test::Y4mVideoSource video("screendata.y4m", 0, 3);
- cfg_.g_timebase = video.timebase();
- cfg_.rc_2pass_vbr_minsection_pct = 5;
- cfg_.rc_2pass_vbr_minsection_pct = 2000;
- cfg_.rc_target_bitrate = 2000;
- cfg_.rc_max_quantizer = 63;
- cfg_.rc_min_quantizer = 0;
- tune_content_ = AOM_CONTENT_SCREEN;
-
- init_flags_ = AOM_CODEC_USE_PSNR;
-
- ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-}
-
-void CpuSpeedTest::TestEncodeHighBitrate() {
- // Validate that this non multiple of 64 wide clip encodes and decodes
- // without a mismatch when passing in a very low max q. This pushes
- // the encoder to producing lots of big partitions which will likely
- // extend into the border and test the border condition.
- cfg_.rc_2pass_vbr_minsection_pct = 5;
- cfg_.rc_2pass_vbr_maxsection_pct = 2000;
- cfg_.rc_target_bitrate = 12000;
- cfg_.rc_max_quantizer = 10;
- cfg_.rc_min_quantizer = 0;
-
- ::libaom_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
- 10);
-
- ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-}
-
-void CpuSpeedTest::TestLowBitrate() {
- // Validate that this clip encodes and decodes without a mismatch
- // when passing in a very high min q. This pushes the encoder to producing
- // lots of small partitions which might will test the other condition.
- cfg_.rc_2pass_vbr_minsection_pct = 5;
- cfg_.rc_2pass_vbr_maxsection_pct = 2000;
- cfg_.rc_target_bitrate = 200;
- cfg_.rc_min_quantizer = 40;
-
- ::libaom_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
- 10);
-
- ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-}
-
-TEST_P(CpuSpeedTest, TestQ0) { TestQ0(); }
-TEST_P(CpuSpeedTest, TestScreencastQ0) { TestScreencastQ0(); }
-TEST_P(CpuSpeedTest, TestTuneScreen) { TestTuneScreen(); }
-TEST_P(CpuSpeedTest, TestEncodeHighBitrate) { TestEncodeHighBitrate(); }
-TEST_P(CpuSpeedTest, TestLowBitrate) { TestLowBitrate(); }
-
-class CpuSpeedTestLarge : public CpuSpeedTest {};
-
-TEST_P(CpuSpeedTestLarge, TestQ0) { TestQ0(); }
-TEST_P(CpuSpeedTestLarge, TestScreencastQ0) { TestScreencastQ0(); }
-TEST_P(CpuSpeedTestLarge, TestTuneScreen) { TestTuneScreen(); }
-TEST_P(CpuSpeedTestLarge, TestEncodeHighBitrate) { TestEncodeHighBitrate(); }
-TEST_P(CpuSpeedTestLarge, TestLowBitrate) { TestLowBitrate(); }
-
-AV1_INSTANTIATE_TEST_CASE(CpuSpeedTest,
- ::testing::Values(::libaom_test::kTwoPassGood,
- ::libaom_test::kOnePassGood),
- ::testing::Range(1, 3));
-AV1_INSTANTIATE_TEST_CASE(CpuSpeedTestLarge,
- ::testing::Values(::libaom_test::kTwoPassGood,
- ::libaom_test::kOnePassGood),
- ::testing::Range(0, 1));
-} // namespace
diff --git a/third_party/aom/test/datarate_test.cc b/third_party/aom/test/datarate_test.cc
deleted file mode 100644
index 1588d3cc1..000000000
--- a/third_party/aom/test/datarate_test.cc
+++ /dev/null
@@ -1,255 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "config/aom_config.h"
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-#include "test/codec_factory.h"
-#include "test/encode_test_driver.h"
-#include "test/i420_video_source.h"
-#include "test/util.h"
-#include "test/y4m_video_source.h"
-#include "aom/aom_codec.h"
-
-namespace {
-
-class DatarateTestLarge
- : public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int>,
- public ::libaom_test::EncoderTest {
- public:
- DatarateTestLarge() : EncoderTest(GET_PARAM(0)) {}
-
- protected:
- virtual ~DatarateTestLarge() {}
-
- virtual void SetUp() {
- InitializeConfig();
- SetMode(GET_PARAM(1));
- set_cpu_used_ = GET_PARAM(2);
- ResetModel();
- }
-
- virtual void ResetModel() {
- last_pts_ = 0;
- bits_in_buffer_model_ = cfg_.rc_target_bitrate * cfg_.rc_buf_initial_sz;
- frame_number_ = 0;
- tot_frame_number_ = 0;
- first_drop_ = 0;
- num_drops_ = 0;
- // Denoiser is off by default.
- denoiser_on_ = 0;
- bits_total_ = 0;
- denoiser_offon_test_ = 0;
- denoiser_offon_period_ = -1;
- }
-
- virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
- ::libaom_test::Encoder *encoder) {
- if (video->frame() == 0) encoder->Control(AOME_SET_CPUUSED, set_cpu_used_);
-
- if (denoiser_offon_test_) {
- ASSERT_GT(denoiser_offon_period_, 0)
- << "denoiser_offon_period_ is not positive.";
- if ((video->frame() + 1) % denoiser_offon_period_ == 0) {
- // Flip denoiser_on_ periodically
- denoiser_on_ ^= 1;
- }
- }
-
- encoder->Control(AV1E_SET_NOISE_SENSITIVITY, denoiser_on_);
-
- const aom_rational_t tb = video->timebase();
- timebase_ = static_cast<double>(tb.num) / tb.den;
- duration_ = 0;
- }
-
- virtual void FramePktHook(const aom_codec_cx_pkt_t *pkt) {
- // Time since last timestamp = duration.
- aom_codec_pts_t duration = pkt->data.frame.pts - last_pts_;
-
- if (duration > 1) {
- // If first drop not set and we have a drop set it to this time.
- if (!first_drop_) first_drop_ = last_pts_ + 1;
- // Update the number of frame drops.
- num_drops_ += static_cast<int>(duration - 1);
- // Update counter for total number of frames (#frames input to encoder).
- // Needed for setting the proper layer_id below.
- tot_frame_number_ += static_cast<int>(duration - 1);
- }
-
- // Add to the buffer the bits we'd expect from a constant bitrate server.
- bits_in_buffer_model_ += static_cast<int64_t>(
- duration * timebase_ * cfg_.rc_target_bitrate * 1000);
-
- // Buffer should not go negative.
- ASSERT_GE(bits_in_buffer_model_, 0)
- << "Buffer Underrun at frame " << pkt->data.frame.pts;
-
- const size_t frame_size_in_bits = pkt->data.frame.sz * 8;
-
- // Update the total encoded bits.
- bits_total_ += frame_size_in_bits;
-
- // Update the most recent pts.
- last_pts_ = pkt->data.frame.pts;
- ++frame_number_;
- ++tot_frame_number_;
- }
-
- virtual void EndPassHook(void) {
- duration_ = (last_pts_ + 1) * timebase_;
- // Effective file datarate:
- effective_datarate_ = (bits_total_ / 1000.0) / duration_;
- }
-
- aom_codec_pts_t last_pts_;
- double timebase_;
- int frame_number_; // Counter for number of non-dropped/encoded frames.
- int tot_frame_number_; // Counter for total number of input frames.
- int64_t bits_total_;
- double duration_;
- double effective_datarate_;
- int set_cpu_used_;
- int64_t bits_in_buffer_model_;
- aom_codec_pts_t first_drop_;
- int num_drops_;
- int denoiser_on_;
- int denoiser_offon_test_;
- int denoiser_offon_period_;
-};
-
-// Check basic rate targeting for VBR mode.
-TEST_P(DatarateTestLarge, BasicRateTargetingVBR) {
- cfg_.rc_min_quantizer = 0;
- cfg_.rc_max_quantizer = 63;
- cfg_.g_error_resilient = 0;
- cfg_.rc_end_usage = AOM_VBR;
- cfg_.g_lag_in_frames = 0;
-
- ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
- 30, 1, 0, 140);
- for (int i = 400; i <= 800; i += 400) {
- cfg_.rc_target_bitrate = i;
- ResetModel();
- ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
- ASSERT_GE(effective_datarate_, cfg_.rc_target_bitrate * 0.75)
- << " The datarate for the file is lower than target by too much!";
- ASSERT_LE(effective_datarate_, cfg_.rc_target_bitrate * 1.25)
- << " The datarate for the file is greater than target by too much!";
- }
-}
-
-// Check basic rate targeting for CBR,
-TEST_P(DatarateTestLarge, BasicRateTargeting) {
- cfg_.rc_buf_initial_sz = 500;
- cfg_.rc_buf_optimal_sz = 500;
- cfg_.rc_buf_sz = 1000;
- cfg_.rc_dropframe_thresh = 1;
- cfg_.rc_min_quantizer = 0;
- cfg_.rc_max_quantizer = 63;
- cfg_.rc_end_usage = AOM_CBR;
- cfg_.g_lag_in_frames = 0;
-
- ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
- 30, 1, 0, 140);
- for (int i = 150; i < 800; i += 400) {
- cfg_.rc_target_bitrate = i;
- ResetModel();
- ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
- ASSERT_GE(effective_datarate_, cfg_.rc_target_bitrate * 0.85)
- << " The datarate for the file is lower than target by too much!";
- ASSERT_LE(effective_datarate_, cfg_.rc_target_bitrate * 1.15)
- << " The datarate for the file is greater than target by too much!";
- }
-}
-
-// Check basic rate targeting for CBR.
-TEST_P(DatarateTestLarge, BasicRateTargeting444) {
- ::libaom_test::Y4mVideoSource video("rush_hour_444.y4m", 0, 140);
-
- cfg_.g_profile = 1;
- cfg_.g_timebase = video.timebase();
-
- cfg_.rc_buf_initial_sz = 500;
- cfg_.rc_buf_optimal_sz = 500;
- cfg_.rc_buf_sz = 1000;
- cfg_.rc_dropframe_thresh = 1;
- cfg_.rc_min_quantizer = 0;
- cfg_.rc_max_quantizer = 63;
- cfg_.rc_end_usage = AOM_CBR;
-
- for (int i = 250; i < 900; i += 400) {
- cfg_.rc_target_bitrate = i;
- ResetModel();
- ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
- ASSERT_GE(static_cast<double>(cfg_.rc_target_bitrate),
- effective_datarate_ * 0.85)
- << " The datarate for the file exceeds the target by too much!";
- ASSERT_LE(static_cast<double>(cfg_.rc_target_bitrate),
- effective_datarate_ * 1.15)
- << " The datarate for the file missed the target!"
- << cfg_.rc_target_bitrate << " " << effective_datarate_;
- }
-}
-
-// Check that (1) the first dropped frame gets earlier and earlier
-// as the drop frame threshold is increased, and (2) that the total number of
-// frame drops does not decrease as we increase frame drop threshold.
-// Use a lower qp-max to force some frame drops.
-TEST_P(DatarateTestLarge, ChangingDropFrameThresh) {
- cfg_.rc_buf_initial_sz = 500;
- cfg_.rc_buf_optimal_sz = 500;
- cfg_.rc_buf_sz = 1000;
- cfg_.rc_undershoot_pct = 20;
- cfg_.rc_undershoot_pct = 20;
- cfg_.rc_dropframe_thresh = 10;
- cfg_.rc_min_quantizer = 0;
- cfg_.rc_max_quantizer = 50;
- cfg_.rc_end_usage = AOM_CBR;
- cfg_.rc_target_bitrate = 200;
- cfg_.g_lag_in_frames = 0;
- cfg_.g_error_resilient = 1;
- // TODO(marpan): Investigate datarate target failures with a smaller keyframe
- // interval (128).
- cfg_.kf_max_dist = 9999;
-
- ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
- 30, 1, 0, 100);
-
- const int kDropFrameThreshTestStep = 30;
- aom_codec_pts_t last_drop = 140;
- int last_num_drops = 0;
- for (int i = 40; i < 100; i += kDropFrameThreshTestStep) {
- cfg_.rc_dropframe_thresh = i;
- ResetModel();
- ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
- ASSERT_GE(effective_datarate_, cfg_.rc_target_bitrate * 0.85)
- << " The datarate for the file is lower than target by too much!";
- ASSERT_LE(effective_datarate_, cfg_.rc_target_bitrate * 1.15)
- << " The datarate for the file is greater than target by too much!";
- ASSERT_LE(first_drop_, last_drop)
- << " The first dropped frame for drop_thresh " << i
- << " > first dropped frame for drop_thresh "
- << i - kDropFrameThreshTestStep;
- ASSERT_GE(num_drops_, last_num_drops * 0.85)
- << " The number of dropped frames for drop_thresh " << i
- << " < number of dropped frames for drop_thresh "
- << i - kDropFrameThreshTestStep;
- last_drop = first_drop_;
- last_num_drops = num_drops_;
- }
-}
-
-AV1_INSTANTIATE_TEST_CASE(DatarateTestLarge,
- ::testing::Values(::libaom_test::kOnePassGood,
- ::libaom_test::kRealTime),
- ::testing::Values(2, 5));
-} // namespace
diff --git a/third_party/aom/test/decode_api_test.cc b/third_party/aom/test/decode_api_test.cc
deleted file mode 100644
index c1beacee1..000000000
--- a/third_party/aom/test/decode_api_test.cc
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "config/aom_config.h"
-
-#include "test/util.h"
-#include "aom/aomdx.h"
-#include "aom/aom_decoder.h"
-
-namespace {
-
-TEST(DecodeAPI, InvalidParams) {
- static const aom_codec_iface_t *kCodecs[] = {
-#if CONFIG_AV1_DECODER
- aom_codec_av1_dx(),
-#endif
- };
- uint8_t buf[1] = { 0 };
- aom_codec_ctx_t dec;
-
- EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_dec_init(NULL, NULL, NULL, 0));
- EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_dec_init(&dec, NULL, NULL, 0));
- EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_decode(NULL, NULL, 0, NULL));
- EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_decode(NULL, buf, 0, NULL));
- EXPECT_EQ(AOM_CODEC_INVALID_PARAM,
- aom_codec_decode(NULL, buf, NELEMENTS(buf), NULL));
- EXPECT_EQ(AOM_CODEC_INVALID_PARAM,
- aom_codec_decode(NULL, NULL, NELEMENTS(buf), NULL));
- EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_destroy(NULL));
- EXPECT_TRUE(aom_codec_error(NULL) != NULL);
-
- for (int i = 0; i < NELEMENTS(kCodecs); ++i) {
- EXPECT_EQ(AOM_CODEC_INVALID_PARAM,
- aom_codec_dec_init(NULL, kCodecs[i], NULL, 0));
-
- EXPECT_EQ(AOM_CODEC_OK, aom_codec_dec_init(&dec, kCodecs[i], NULL, 0));
- EXPECT_EQ(AOM_CODEC_INVALID_PARAM,
- aom_codec_decode(&dec, NULL, NELEMENTS(buf), NULL));
- EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_decode(&dec, buf, 0, NULL));
-
- EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&dec));
- }
-}
-
-} // namespace
diff --git a/third_party/aom/test/decode_multithreaded_test.cc b/third_party/aom/test/decode_multithreaded_test.cc
deleted file mode 100644
index cea1d144f..000000000
--- a/third_party/aom/test/decode_multithreaded_test.cc
+++ /dev/null
@@ -1,185 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <cstdio>
-#include <cstdlib>
-#include <string>
-
-#include "aom_mem/aom_mem.h"
-#include "test/codec_factory.h"
-#include "test/encode_test_driver.h"
-#include "test/i420_video_source.h"
-#include "test/md5_helper.h"
-#include "test/util.h"
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-namespace {
-
-static const int kNumMultiThreadDecoders = 3;
-
-class AV1DecodeMultiThreadedTest
- : public ::libaom_test::CodecTestWith5Params<int, int, int, int, int>,
- public ::libaom_test::EncoderTest {
- protected:
- AV1DecodeMultiThreadedTest()
- : EncoderTest(GET_PARAM(0)), md5_single_thread_(), md5_multi_thread_(),
- n_tile_cols_(GET_PARAM(1)), n_tile_rows_(GET_PARAM(2)),
- n_tile_groups_(GET_PARAM(3)), set_cpu_used_(GET_PARAM(4)),
- row_mt_(GET_PARAM(5)) {
- init_flags_ = AOM_CODEC_USE_PSNR;
- aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t();
- cfg.w = 704;
- cfg.h = 576;
- cfg.threads = 1;
- cfg.allow_lowbitdepth = 1;
- single_thread_dec_ = codec_->CreateDecoder(cfg, 0);
-
- // Test cfg.threads == powers of 2.
- for (int i = 0; i < kNumMultiThreadDecoders; ++i) {
- cfg.threads <<= 1;
- multi_thread_dec_[i] = codec_->CreateDecoder(cfg, 0);
- multi_thread_dec_[i]->Control(AV1D_SET_ROW_MT, row_mt_);
- }
-
- if (single_thread_dec_->IsAV1()) {
- single_thread_dec_->Control(AV1D_EXT_TILE_DEBUG, 1);
- single_thread_dec_->Control(AV1_SET_DECODE_TILE_ROW, -1);
- single_thread_dec_->Control(AV1_SET_DECODE_TILE_COL, -1);
- }
- for (int i = 0; i < kNumMultiThreadDecoders; ++i) {
- if (multi_thread_dec_[i]->IsAV1()) {
- multi_thread_dec_[i]->Control(AV1D_EXT_TILE_DEBUG, 1);
- multi_thread_dec_[i]->Control(AV1_SET_DECODE_TILE_ROW, -1);
- multi_thread_dec_[i]->Control(AV1_SET_DECODE_TILE_COL, -1);
- }
- }
- }
-
- virtual ~AV1DecodeMultiThreadedTest() {
- delete single_thread_dec_;
- for (int i = 0; i < kNumMultiThreadDecoders; ++i)
- delete multi_thread_dec_[i];
- }
-
- virtual void SetUp() {
- InitializeConfig();
- SetMode(libaom_test::kTwoPassGood);
- }
-
- virtual void PreEncodeFrameHook(libaom_test::VideoSource *video,
- libaom_test::Encoder *encoder) {
- if (video->frame() == 1) {
- encoder->Control(AV1E_SET_TILE_COLUMNS, n_tile_cols_);
- encoder->Control(AV1E_SET_TILE_ROWS, n_tile_rows_);
- encoder->Control(AV1E_SET_NUM_TG, n_tile_groups_);
- encoder->Control(AOME_SET_CPUUSED, set_cpu_used_);
- }
- }
-
- void UpdateMD5(::libaom_test::Decoder *dec, const aom_codec_cx_pkt_t *pkt,
- ::libaom_test::MD5 *md5) {
- const aom_codec_err_t res = dec->DecodeFrame(
- reinterpret_cast<uint8_t *>(pkt->data.frame.buf), pkt->data.frame.sz);
- if (res != AOM_CODEC_OK) {
- abort_ = true;
- ASSERT_EQ(AOM_CODEC_OK, res);
- }
- const aom_image_t *img = dec->GetDxData().Next();
- md5->Add(img);
- }
-
- virtual void FramePktHook(const aom_codec_cx_pkt_t *pkt) {
- UpdateMD5(single_thread_dec_, pkt, &md5_single_thread_);
-
- for (int i = 0; i < kNumMultiThreadDecoders; ++i)
- UpdateMD5(multi_thread_dec_[i], pkt, &md5_multi_thread_[i]);
- }
-
- void DoTest() {
- const aom_rational timebase = { 33333333, 1000000000 };
- cfg_.g_timebase = timebase;
- cfg_.rc_target_bitrate = 500;
- cfg_.g_lag_in_frames = 12;
- cfg_.rc_end_usage = AOM_VBR;
-
- libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 704, 576,
- timebase.den, timebase.num, 0, 5);
- ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-
- const char *md5_single_thread_str = md5_single_thread_.Get();
-
- for (int i = 0; i < kNumMultiThreadDecoders; ++i) {
- const char *md5_multi_thread_str = md5_multi_thread_[i].Get();
- ASSERT_STREQ(md5_single_thread_str, md5_multi_thread_str);
- }
- }
-
- ::libaom_test::MD5 md5_single_thread_;
- ::libaom_test::MD5 md5_multi_thread_[kNumMultiThreadDecoders];
- ::libaom_test::Decoder *single_thread_dec_;
- ::libaom_test::Decoder *multi_thread_dec_[kNumMultiThreadDecoders];
-
- private:
- int n_tile_cols_;
- int n_tile_rows_;
- int n_tile_groups_;
- int set_cpu_used_;
- int row_mt_;
-};
-
-// run an encode and do the decode both in single thread
-// and multi thread. Ensure that the MD5 of the output in both cases
-// is identical. If so, the test passes.
-TEST_P(AV1DecodeMultiThreadedTest, MD5Match) {
- cfg_.large_scale_tile = 0;
- single_thread_dec_->Control(AV1_SET_TILE_MODE, 0);
- for (int i = 0; i < kNumMultiThreadDecoders; ++i)
- multi_thread_dec_[i]->Control(AV1_SET_TILE_MODE, 0);
- DoTest();
-}
-
-class AV1DecodeMultiThreadedTestLarge : public AV1DecodeMultiThreadedTest {};
-
-TEST_P(AV1DecodeMultiThreadedTestLarge, MD5Match) {
- cfg_.large_scale_tile = 0;
- single_thread_dec_->Control(AV1_SET_TILE_MODE, 0);
- for (int i = 0; i < kNumMultiThreadDecoders; ++i)
- multi_thread_dec_[i]->Control(AV1_SET_TILE_MODE, 0);
- DoTest();
-}
-
-// TODO(ranjit): More tests have to be added using pre-generated MD5.
-AV1_INSTANTIATE_TEST_CASE(AV1DecodeMultiThreadedTest, ::testing::Values(1, 2),
- ::testing::Values(1, 2), ::testing::Values(1),
- ::testing::Values(3), ::testing::Values(0, 1));
-AV1_INSTANTIATE_TEST_CASE(AV1DecodeMultiThreadedTestLarge,
- ::testing::Values(0, 1, 2, 6),
- ::testing::Values(0, 1, 2, 6),
- ::testing::Values(1, 4), ::testing::Values(0),
- ::testing::Values(0, 1));
-
-class AV1DecodeMultiThreadedLSTestLarge
- : public AV1DecodeMultiThreadedTestLarge {};
-
-TEST_P(AV1DecodeMultiThreadedLSTestLarge, MD5Match) {
- cfg_.large_scale_tile = 1;
- single_thread_dec_->Control(AV1_SET_TILE_MODE, 1);
- for (int i = 0; i < kNumMultiThreadDecoders; ++i)
- multi_thread_dec_[i]->Control(AV1_SET_TILE_MODE, 1);
- DoTest();
-}
-
-AV1_INSTANTIATE_TEST_CASE(AV1DecodeMultiThreadedLSTestLarge,
- ::testing::Values(6), ::testing::Values(6),
- ::testing::Values(1), ::testing::Values(0, 3),
- ::testing::Values(0, 1));
-
-} // namespace
diff --git a/third_party/aom/test/decode_perf_test.cc b/third_party/aom/test/decode_perf_test.cc
deleted file mode 100644
index bb7b00032..000000000
--- a/third_party/aom/test/decode_perf_test.cc
+++ /dev/null
@@ -1,246 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <string>
-
-#include "config/aom_version.h"
-
-#include "aom_ports/aom_timer.h"
-#include "common/ivfenc.h"
-#include "test/codec_factory.h"
-#include "test/decode_test_driver.h"
-#include "test/encode_test_driver.h"
-#include "test/i420_video_source.h"
-#include "test/ivf_video_source.h"
-#include "test/md5_helper.h"
-#include "test/util.h"
-#include "test/webm_video_source.h"
-
-using ::testing::make_tuple;
-
-namespace {
-
-#define VIDEO_NAME 0
-#define THREADS 1
-
-const double kUsecsInSec = 1000000.0;
-const char kNewEncodeOutputFile[] = "new_encode.ivf";
-
-/*
- DecodePerfTest takes a tuple of filename + number of threads to decode with
- */
-typedef ::testing::tuple<const char *, unsigned> DecodePerfParam;
-
-// TODO(jimbankoski): Add actual test vectors here when available.
-// const DecodePerfParam kAV1DecodePerfVectors[] = {};
-
-/*
- In order to reflect real world performance as much as possible, Perf tests
- *DO NOT* do any correctness checks. Please run them alongside correctness
- tests to ensure proper codec integrity. Furthermore, in this test we
- deliberately limit the amount of system calls we make to avoid OS
- preemption.
-
- TODO(joshualitt) create a more detailed perf measurement test to collect
- power/temp/min max frame decode times/etc
- */
-
-class DecodePerfTest : public ::testing::TestWithParam<DecodePerfParam> {};
-
-TEST_P(DecodePerfTest, PerfTest) {
- const char *const video_name = GET_PARAM(VIDEO_NAME);
- const unsigned threads = GET_PARAM(THREADS);
-
- libaom_test::WebMVideoSource video(video_name);
- video.Init();
-
- aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t();
- cfg.threads = threads;
- cfg.allow_lowbitdepth = 1;
- libaom_test::AV1Decoder decoder(cfg, 0);
-
- aom_usec_timer t;
- aom_usec_timer_start(&t);
-
- for (video.Begin(); video.cxdata() != NULL; video.Next()) {
- decoder.DecodeFrame(video.cxdata(), video.frame_size());
- }
-
- aom_usec_timer_mark(&t);
- const double elapsed_secs = double(aom_usec_timer_elapsed(&t)) / kUsecsInSec;
- const unsigned frames = video.frame_number();
- const double fps = double(frames) / elapsed_secs;
-
- printf("{\n");
- printf("\t\"type\" : \"decode_perf_test\",\n");
- printf("\t\"version\" : \"%s\",\n", VERSION_STRING_NOSP);
- printf("\t\"videoName\" : \"%s\",\n", video_name);
- printf("\t\"threadCount\" : %u,\n", threads);
- printf("\t\"decodeTimeSecs\" : %f,\n", elapsed_secs);
- printf("\t\"totalFrames\" : %u,\n", frames);
- printf("\t\"framesPerSecond\" : %f\n", fps);
- printf("}\n");
-}
-
-// TODO(jimbankoski): Enabled when we have actual AV1 Decode vectors.
-// INSTANTIATE_TEST_CASE_P(AV1, DecodePerfTest,
-// ::testing::ValuesIn(kAV1DecodePerfVectors));
-
-class AV1NewEncodeDecodePerfTest
- : public ::libaom_test::CodecTestWithParam<libaom_test::TestMode>,
- public ::libaom_test::EncoderTest {
- protected:
- AV1NewEncodeDecodePerfTest()
- : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)), speed_(0),
- outfile_(0), out_frames_(0) {}
-
- virtual ~AV1NewEncodeDecodePerfTest() {}
-
- virtual void SetUp() {
- InitializeConfig();
- SetMode(encoding_mode_);
-
- cfg_.g_lag_in_frames = 25;
- cfg_.rc_min_quantizer = 2;
- cfg_.rc_max_quantizer = 56;
- cfg_.rc_dropframe_thresh = 0;
- cfg_.rc_undershoot_pct = 50;
- cfg_.rc_overshoot_pct = 50;
- cfg_.rc_buf_sz = 1000;
- cfg_.rc_buf_initial_sz = 500;
- cfg_.rc_buf_optimal_sz = 600;
- cfg_.rc_end_usage = AOM_VBR;
- }
-
- virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
- ::libaom_test::Encoder *encoder) {
- if (video->frame() == 1) {
- encoder->Control(AOME_SET_CPUUSED, speed_);
- encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1);
- encoder->Control(AV1E_SET_TILE_COLUMNS, 2);
- }
- }
-
- virtual void BeginPassHook(unsigned int /*pass*/) {
- const char *const env = getenv("LIBAOM_TEST_DATA_PATH");
- const std::string data_path(env ? env : ".");
- const std::string path_to_source = data_path + "/" + kNewEncodeOutputFile;
- outfile_ = fopen(path_to_source.c_str(), "wb");
- ASSERT_TRUE(outfile_ != NULL);
- }
-
- virtual void EndPassHook() {
- if (outfile_ != NULL) {
- if (!fseek(outfile_, 0, SEEK_SET))
- ivf_write_file_header(outfile_, &cfg_, AV1_FOURCC, out_frames_);
- fclose(outfile_);
- outfile_ = NULL;
- }
- }
-
- virtual void FramePktHook(const aom_codec_cx_pkt_t *pkt) {
- ++out_frames_;
-
- // Write initial file header if first frame.
- if (pkt->data.frame.pts == 0)
- ivf_write_file_header(outfile_, &cfg_, AV1_FOURCC, out_frames_);
-
- // Write frame header and data.
- ivf_write_frame_header(outfile_, out_frames_, pkt->data.frame.sz);
- ASSERT_EQ(fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile_),
- pkt->data.frame.sz);
- }
-
- virtual bool DoDecode() const { return false; }
-
- void set_speed(unsigned int speed) { speed_ = speed; }
-
- private:
- libaom_test::TestMode encoding_mode_;
- uint32_t speed_;
- FILE *outfile_;
- uint32_t out_frames_;
-};
-
-struct EncodePerfTestVideo {
- EncodePerfTestVideo(const char *name_, uint32_t width_, uint32_t height_,
- uint32_t bitrate_, int frames_)
- : name(name_), width(width_), height(height_), bitrate(bitrate_),
- frames(frames_) {}
- const char *name;
- uint32_t width;
- uint32_t height;
- uint32_t bitrate;
- int frames;
-};
-
-const EncodePerfTestVideo kAV1EncodePerfTestVectors[] = {
- EncodePerfTestVideo("niklas_1280_720_30.yuv", 1280, 720, 600, 470),
-};
-
-TEST_P(AV1NewEncodeDecodePerfTest, PerfTest) {
- SetUp();
-
- // TODO(JBB): Make this work by going through the set of given files.
- const int i = 0;
- const aom_rational timebase = { 33333333, 1000000000 };
- cfg_.g_timebase = timebase;
- cfg_.rc_target_bitrate = kAV1EncodePerfTestVectors[i].bitrate;
-
- init_flags_ = AOM_CODEC_USE_PSNR;
-
- const char *video_name = kAV1EncodePerfTestVectors[i].name;
- libaom_test::I420VideoSource video(
- video_name, kAV1EncodePerfTestVectors[i].width,
- kAV1EncodePerfTestVectors[i].height, timebase.den, timebase.num, 0,
- kAV1EncodePerfTestVectors[i].frames);
- set_speed(2);
-
- ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-
- const uint32_t threads = 4;
-
- libaom_test::IVFVideoSource decode_video(kNewEncodeOutputFile);
- decode_video.Init();
-
- aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t();
- cfg.threads = threads;
- cfg.allow_lowbitdepth = 1;
- libaom_test::AV1Decoder decoder(cfg, 0);
-
- aom_usec_timer t;
- aom_usec_timer_start(&t);
-
- for (decode_video.Begin(); decode_video.cxdata() != NULL;
- decode_video.Next()) {
- decoder.DecodeFrame(decode_video.cxdata(), decode_video.frame_size());
- }
-
- aom_usec_timer_mark(&t);
- const double elapsed_secs =
- static_cast<double>(aom_usec_timer_elapsed(&t)) / kUsecsInSec;
- const unsigned decode_frames = decode_video.frame_number();
- const double fps = static_cast<double>(decode_frames) / elapsed_secs;
-
- printf("{\n");
- printf("\t\"type\" : \"decode_perf_test\",\n");
- printf("\t\"version\" : \"%s\",\n", VERSION_STRING_NOSP);
- printf("\t\"videoName\" : \"%s\",\n", kNewEncodeOutputFile);
- printf("\t\"threadCount\" : %u,\n", threads);
- printf("\t\"decodeTimeSecs\" : %f,\n", elapsed_secs);
- printf("\t\"totalFrames\" : %u,\n", decode_frames);
- printf("\t\"framesPerSecond\" : %f\n", fps);
- printf("}\n");
-}
-
-AV1_INSTANTIATE_TEST_CASE(AV1NewEncodeDecodePerfTest,
- ::testing::Values(::libaom_test::kTwoPassGood));
-} // namespace
diff --git a/third_party/aom/test/decode_test_driver.cc b/third_party/aom/test/decode_test_driver.cc
deleted file mode 100644
index 70de0cff6..000000000
--- a/third_party/aom/test/decode_test_driver.cc
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "test/codec_factory.h"
-#include "test/decode_test_driver.h"
-#include "test/register_state_check.h"
-#include "test/video_source.h"
-
-namespace libaom_test {
-
-const char kAV1Name[] = "AOMedia Project AV1 Decoder";
-
-aom_codec_err_t Decoder::PeekStream(const uint8_t *cxdata, size_t size,
- aom_codec_stream_info_t *stream_info) {
- return aom_codec_peek_stream_info(CodecInterface(), cxdata, size,
- stream_info);
-}
-
-aom_codec_err_t Decoder::DecodeFrame(const uint8_t *cxdata, size_t size) {
- return DecodeFrame(cxdata, size, NULL);
-}
-
-aom_codec_err_t Decoder::DecodeFrame(const uint8_t *cxdata, size_t size,
- void *user_priv) {
- aom_codec_err_t res_dec;
- InitOnce();
- API_REGISTER_STATE_CHECK(
- res_dec = aom_codec_decode(&decoder_, cxdata, size, user_priv));
- return res_dec;
-}
-
-bool Decoder::IsAV1() const {
- const char *codec_name = GetDecoderName();
- return strncmp(kAV1Name, codec_name, sizeof(kAV1Name) - 1) == 0;
-}
-
-void DecoderTest::HandlePeekResult(Decoder *const /*decoder*/,
- CompressedVideoSource * /*video*/,
- const aom_codec_err_t res_peek) {
- /* The Av1 implementation of PeekStream returns an error only if the
- * data passed to it isn't a valid Av1 chunk. */
- ASSERT_EQ(AOM_CODEC_OK, res_peek)
- << "Peek return failed: " << aom_codec_err_to_string(res_peek);
-}
-
-void DecoderTest::RunLoop(CompressedVideoSource *video,
- const aom_codec_dec_cfg_t &dec_cfg) {
- Decoder *const decoder = codec_->CreateDecoder(dec_cfg, flags_);
- ASSERT_TRUE(decoder != NULL);
- bool end_of_file = false;
- bool peeked_stream = false;
-
- // Decode frames.
- for (video->Begin(); !::testing::Test::HasFailure() && !end_of_file;
- video->Next()) {
- PreDecodeFrameHook(*video, decoder);
-
- aom_codec_stream_info_t stream_info;
- stream_info.is_annexb = 0;
-
- if (video->cxdata() != NULL) {
- if (!peeked_stream) {
- // TODO(yaowu): PeekStream returns error for non-sequence_header_obu,
- // therefore should only be tried once per sequence, this shall be fixed
- // once PeekStream is updated to properly operate on other obus.
- const aom_codec_err_t res_peek = decoder->PeekStream(
- video->cxdata(), video->frame_size(), &stream_info);
- HandlePeekResult(decoder, video, res_peek);
- ASSERT_FALSE(::testing::Test::HasFailure());
- peeked_stream = true;
- }
-
- aom_codec_err_t res_dec =
- decoder->DecodeFrame(video->cxdata(), video->frame_size());
- if (!HandleDecodeResult(res_dec, *video, decoder)) break;
- } else {
- // Signal end of the file to the decoder.
- const aom_codec_err_t res_dec = decoder->DecodeFrame(NULL, 0);
- ASSERT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError();
- end_of_file = true;
- }
-
- DxDataIterator dec_iter = decoder->GetDxData();
- const aom_image_t *img = NULL;
-
- // Get decompressed data
- while (!::testing::Test::HasFailure() && (img = dec_iter.Next()))
- DecompressedFrameHook(*img, video->frame_number());
- }
- delete decoder;
-}
-
-void DecoderTest::RunLoop(CompressedVideoSource *video) {
- aom_codec_dec_cfg_t dec_cfg = aom_codec_dec_cfg_t();
- RunLoop(video, dec_cfg);
-}
-
-void DecoderTest::set_cfg(const aom_codec_dec_cfg_t &dec_cfg) {
- memcpy(&cfg_, &dec_cfg, sizeof(cfg_));
-}
-
-void DecoderTest::set_flags(const aom_codec_flags_t flags) { flags_ = flags; }
-
-} // namespace libaom_test
diff --git a/third_party/aom/test/decode_test_driver.h b/third_party/aom/test/decode_test_driver.h
deleted file mode 100644
index d13e13ea1..000000000
--- a/third_party/aom/test/decode_test_driver.h
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_TEST_DECODE_TEST_DRIVER_H_
-#define AOM_TEST_DECODE_TEST_DRIVER_H_
-#include <cstring>
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "config/aom_config.h"
-
-#include "aom/aom_decoder.h"
-
-namespace libaom_test {
-
-class CodecFactory;
-class CompressedVideoSource;
-
-// Provides an object to handle decoding output
-class DxDataIterator {
- public:
- explicit DxDataIterator(aom_codec_ctx_t *decoder)
- : decoder_(decoder), iter_(NULL) {}
-
- const aom_image_t *Next() { return aom_codec_get_frame(decoder_, &iter_); }
-
- private:
- aom_codec_ctx_t *decoder_;
- aom_codec_iter_t iter_;
-};
-
-// Provides a simplified interface to manage one video decoding.
-// Similar to Encoder class, the exact services should be added
-// as more tests are added.
-class Decoder {
- public:
- explicit Decoder(aom_codec_dec_cfg_t cfg)
- : cfg_(cfg), flags_(0), init_done_(false) {
- memset(&decoder_, 0, sizeof(decoder_));
- }
-
- Decoder(aom_codec_dec_cfg_t cfg, const aom_codec_flags_t flag)
- : cfg_(cfg), flags_(flag), init_done_(false) {
- memset(&decoder_, 0, sizeof(decoder_));
- }
-
- virtual ~Decoder() { aom_codec_destroy(&decoder_); }
-
- aom_codec_err_t PeekStream(const uint8_t *cxdata, size_t size,
- aom_codec_stream_info_t *stream_info);
-
- aom_codec_err_t DecodeFrame(const uint8_t *cxdata, size_t size);
-
- aom_codec_err_t DecodeFrame(const uint8_t *cxdata, size_t size,
- void *user_priv);
-
- DxDataIterator GetDxData() { return DxDataIterator(&decoder_); }
-
- void Control(int ctrl_id, int arg) { Control(ctrl_id, arg, AOM_CODEC_OK); }
-
- void Control(int ctrl_id, const void *arg) {
- InitOnce();
- const aom_codec_err_t res = aom_codec_control_(&decoder_, ctrl_id, arg);
- ASSERT_EQ(AOM_CODEC_OK, res) << DecodeError();
- }
-
- void Control(int ctrl_id, int arg, aom_codec_err_t expected_value) {
- InitOnce();
- const aom_codec_err_t res = aom_codec_control_(&decoder_, ctrl_id, arg);
- ASSERT_EQ(expected_value, res) << DecodeError();
- }
-
- const char *DecodeError() {
- const char *detail = aom_codec_error_detail(&decoder_);
- return detail ? detail : aom_codec_error(&decoder_);
- }
-
- // Passes the external frame buffer information to libaom.
- aom_codec_err_t SetFrameBufferFunctions(
- aom_get_frame_buffer_cb_fn_t cb_get,
- aom_release_frame_buffer_cb_fn_t cb_release, void *user_priv) {
- InitOnce();
- return aom_codec_set_frame_buffer_functions(&decoder_, cb_get, cb_release,
- user_priv);
- }
-
- const char *GetDecoderName() const {
- return aom_codec_iface_name(CodecInterface());
- }
-
- bool IsAV1() const;
-
- aom_codec_ctx_t *GetDecoder() { return &decoder_; }
-
- protected:
- virtual aom_codec_iface_t *CodecInterface() const = 0;
-
- void InitOnce() {
- if (!init_done_) {
- const aom_codec_err_t res =
- aom_codec_dec_init(&decoder_, CodecInterface(), &cfg_, flags_);
- ASSERT_EQ(AOM_CODEC_OK, res) << DecodeError();
- init_done_ = true;
- }
- }
-
- aom_codec_ctx_t decoder_;
- aom_codec_dec_cfg_t cfg_;
- aom_codec_flags_t flags_;
- bool init_done_;
-};
-
-// Common test functionality for all Decoder tests.
-class DecoderTest {
- public:
- // Main decoding loop
- virtual void RunLoop(CompressedVideoSource *video);
- virtual void RunLoop(CompressedVideoSource *video,
- const aom_codec_dec_cfg_t &dec_cfg);
-
- virtual void set_cfg(const aom_codec_dec_cfg_t &dec_cfg);
- virtual void set_flags(const aom_codec_flags_t flags);
-
- // Hook to be called before decompressing every frame.
- virtual void PreDecodeFrameHook(const CompressedVideoSource & /*video*/,
- Decoder * /*decoder*/) {}
-
- // Hook to be called to handle decode result. Return true to continue.
- virtual bool HandleDecodeResult(const aom_codec_err_t res_dec,
- const CompressedVideoSource & /*video*/,
- Decoder *decoder) {
- EXPECT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError();
- return AOM_CODEC_OK == res_dec;
- }
-
- // Hook to be called on every decompressed frame.
- virtual void DecompressedFrameHook(const aom_image_t & /*img*/,
- const unsigned int /*frame_number*/) {}
-
- // Hook to be called on peek result
- virtual void HandlePeekResult(Decoder *const decoder,
- CompressedVideoSource *video,
- const aom_codec_err_t res_peek);
-
- protected:
- explicit DecoderTest(const CodecFactory *codec)
- : codec_(codec), cfg_(), flags_(0) {}
-
- virtual ~DecoderTest() {}
-
- const CodecFactory *codec_;
- aom_codec_dec_cfg_t cfg_;
- aom_codec_flags_t flags_;
-};
-
-} // namespace libaom_test
-
-#endif // AOM_TEST_DECODE_TEST_DRIVER_H_
diff --git a/third_party/aom/test/decode_to_md5.sh b/third_party/aom/test/decode_to_md5.sh
deleted file mode 100755
index 2edd1cb52..000000000
--- a/third_party/aom/test/decode_to_md5.sh
+++ /dev/null
@@ -1,77 +0,0 @@
-#!/bin/sh
-## Copyright (c) 2016, Alliance for Open Media. All rights reserved
-##
-## This source code is subject to the terms of the BSD 2 Clause License and
-## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-## was not distributed with this source code in the LICENSE file, you can
-## obtain it at www.aomedia.org/license/software. If the Alliance for Open
-## Media Patent License 1.0 was not distributed with this source code in the
-## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-##
-## This file tests the libaom decode_to_md5 example. To add new tests to this
-## file, do the following:
-## 1. Write a shell function (this is your test).
-## 2. Add the function to decode_to_md5_tests (on a new line).
-##
-. $(dirname $0)/tools_common.sh
-
-# Environment check: Make sure input is available:
-# $AV1_IVF_FILE is required.
-decode_to_md5_verify_environment() {
- if [ "$(av1_encode_available)" != "yes" ] && [ ! -e "${AV1_IVF_FILE}" ]; then
- return 1
- fi
-}
-
-# Runs decode_to_md5 on $1 and captures the md5 sum for the final frame. $2 is
-# interpreted as codec name and used solely to name the output file. $3 is the
-# expected md5 sum: It must match that of the final frame.
-decode_to_md5() {
- local decoder="$(aom_tool_path decode_to_md5)"
- local input_file="$1"
- local codec="$2"
- local expected_md5="$3"
- local output_file="${AOM_TEST_OUTPUT_DIR}/decode_to_md5_${codec}"
-
- if [ ! -x "${decoder}" ]; then
- elog "${decoder} does not exist or is not executable."
- return 1
- fi
-
- eval "${AOM_TEST_PREFIX}" "${decoder}" "${input_file}" "${output_file}" \
- ${devnull}
-
- [ -e "${output_file}" ] || return 1
-
- local md5_last_frame="$(tail -n1 "${output_file}" | awk '{print $1}')"
- local actual_md5="$(echo "${md5_last_frame}" | awk '{print $1}')"
- if [ "${actual_md5}" = "${expected_md5}" ]; then
- return 0
- else
- elog "MD5 mismatch:"
- elog "Expected: ${expected_md5}"
- elog "Actual: ${actual_md5}"
- return 1
- fi
-}
-
-DISABLED_decode_to_md5_av1() {
- # expected MD5 sum for the last frame.
- local expected_md5="567dd6d4b7a7170edddbf58bbcc3aff1"
- local file="${AV1_IVF_FILE}"
-
- # TODO(urvang): Check in the encoded file (like libvpx does) to avoid
- # encoding every time.
- if [ "$(av1_decode_available)" = "yes" ]; then
- if [ ! -e "${AV1_IVF_FILE}" ]; then
- file="${AOM_TEST_OUTPUT_DIR}/test_encode.ivf"
- encode_yuv_raw_input_av1 "${file}" --ivf
- fi
- decode_to_md5 "${file}" "av1" "${expected_md5}"
- fi
-}
-
-# TODO(tomfinegan): Enable when the bitstream stabilizes.
-decode_to_md5_tests="DISABLED_decode_to_md5_av1"
-
-run_tests decode_to_md5_verify_environment "${decode_to_md5_tests}"
diff --git a/third_party/aom/test/decode_with_drops.sh b/third_party/aom/test/decode_with_drops.sh
deleted file mode 100755
index 155ee9207..000000000
--- a/third_party/aom/test/decode_with_drops.sh
+++ /dev/null
@@ -1,68 +0,0 @@
-#!/bin/sh
-## Copyright (c) 2016, Alliance for Open Media. All rights reserved
-##
-## This source code is subject to the terms of the BSD 2 Clause License and
-## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-## was not distributed with this source code in the LICENSE file, you can
-## obtain it at www.aomedia.org/license/software. If the Alliance for Open
-## Media Patent License 1.0 was not distributed with this source code in the
-## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-##
-## This file tests the libaom decode_with_drops example. To add new tests to
-## this file, do the following:
-## 1. Write a shell function (this is your test).
-## 2. Add the function to decode_with_drops_tests (on a new line).
-##
-. $(dirname $0)/tools_common.sh
-
-# Environment check: Make sure input is available:
-# $AV1_IVF_FILE is required.
-decode_with_drops_verify_environment() {
- if [ "$(av1_encode_available)" != "yes" ] && [ ! -e "${AV1_IVF_FILE}" ]; then
- return 1
- fi
-}
-
-# Runs decode_with_drops on $1, $2 is interpreted as codec name and used solely
-# to name the output file. $3 is the drop mode, and is passed directly to
-# decode_with_drops.
-decode_with_drops() {
- local decoder="$(aom_tool_path decode_with_drops)"
- local input_file="$1"
- local codec="$2"
- local output_file="${AOM_TEST_OUTPUT_DIR}/decode_with_drops_${codec}"
- local drop_mode="$3"
-
- if [ ! -x "${decoder}" ]; then
- elog "${decoder} does not exist or is not executable."
- return 1
- fi
-
- eval "${AOM_TEST_PREFIX}" "${decoder}" "${input_file}" "${output_file}" \
- "${drop_mode}" ${devnull}
-
- [ -e "${output_file}" ] || return 1
-}
-
-
-# Decodes $AV1_IVF_FILE while dropping frames, twice: once in sequence mode,
-# and once in pattern mode.
-DISABLED_decode_with_drops_av1() {
- if [ "$(av1_decode_available)" = "yes" ]; then
- local file="${AV1_IVF_FILE}"
- if [ ! -e "${AV1_IVF_FILE}" ]; then
- file="${AOM_TEST_OUTPUT_DIR}/test_encode.ivf"
- encode_yuv_raw_input_av1 "${file}" --ivf
- fi
- # Drop frames 3 and 4.
- decode_with_drops "${file}" "av1" "3-4"
-
- # Test pattern mode: Drop 3 of every 4 frames.
- decode_with_drops "${file}" "av1" "3/4"
- fi
-}
-
-# TODO(yaowu): Disable this test as trailing_bit check is expected to fail
-decode_with_drops_tests="DISABLED_decode_with_drops_av1"
-
-run_tests decode_with_drops_verify_environment "${decode_with_drops_tests}"
diff --git a/third_party/aom/test/divu_small_test.cc b/third_party/aom/test/divu_small_test.cc
deleted file mode 100644
index 064f8ee45..000000000
--- a/third_party/aom/test/divu_small_test.cc
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <stdlib.h>
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "test/acm_random.h"
-#include "av1/common/odintrin.h"
-
-using libaom_test::ACMRandom;
-
-TEST(Daala, TestDIVUuptoMAX) {
- for (int d = 1; d <= OD_DIVU_DMAX; d++) {
- for (uint32_t x = 1; x <= 1000000; x++) {
- GTEST_ASSERT_EQ(x / d, OD_DIVU_SMALL(x, d))
- << "x=" << x << " d=" << d << " x/d=" << (x / d)
- << " != " << OD_DIVU_SMALL(x, d);
- }
- }
-}
-
-TEST(Daala, TestDIVUrandI31) {
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- for (int d = 1; d < OD_DIVU_DMAX; d++) {
- for (int i = 0; i < 1000000; i++) {
- uint32_t x = rnd.Rand31();
- GTEST_ASSERT_EQ(x / d, OD_DIVU_SMALL(x, d))
- << "x=" << x << " d=" << d << " x/d=" << (x / d)
- << " != " << OD_DIVU_SMALL(x, d);
- }
- }
-}
diff --git a/third_party/aom/test/dr_prediction_test.cc b/third_party/aom/test/dr_prediction_test.cc
deleted file mode 100644
index ff2c1de4e..000000000
--- a/third_party/aom/test/dr_prediction_test.cc
+++ /dev/null
@@ -1,369 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_mem/aom_mem.h"
-#include "aom_ports/aom_timer.h"
-#include "av1/common/blockd.h"
-#include "av1/common/pred_common.h"
-#include "av1/common/reconintra.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
-
-namespace {
-
-const int kZ1Start = 0;
-const int kZ2Start = 90;
-const int kZ3Start = 180;
-
-const TX_SIZE kTxSize[] = { TX_4X4, TX_8X8, TX_16X16, TX_32X32, TX_64X64,
- TX_4X8, TX_8X4, TX_8X16, TX_16X8, TX_16X32,
- TX_32X16, TX_32X64, TX_64X32, TX_4X16, TX_16X4,
- TX_8X32, TX_32X8, TX_16X64, TX_64X16 };
-
-const char *const kTxSizeStrings[] = {
- "TX_4X4", "TX_8X8", "TX_16X16", "TX_32X32", "TX_64X64",
- "TX_4X8", "TX_8X4", "TX_8X16", "TX_16X8", "TX_16X32",
- "TX_32X16", "TX_32X64", "TX_64X32", "TX_4X16", "TX_16X4",
- "TX_8X32", "TX_32X8", "TX_16X64", "TX_64X16"
-};
-
-using libaom_test::ACMRandom;
-
-typedef void (*DrPred_Hbd)(uint16_t *dst, ptrdiff_t stride, int bw, int bh,
- const uint16_t *above, const uint16_t *left,
- int upsample_above, int upsample_left, int dx,
- int dy, int bd);
-
-typedef void (*DrPred)(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
- const uint8_t *above, const uint8_t *left,
- int upsample_above, int upsample_left, int dx, int dy,
- int bd);
-
-typedef void (*Z1_Lbd)(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
- const uint8_t *above, const uint8_t *left,
- int upsample_above, int dx, int dy);
-template <Z1_Lbd fn>
-void z1_wrapper(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
- const uint8_t *above, const uint8_t *left, int upsample_above,
- int /*upsample_left*/, int dx, int dy, int /*bd*/) {
- fn(dst, stride, bw, bh, above, left, upsample_above, dx, dy);
-}
-
-typedef void (*Z2_Lbd)(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
- const uint8_t *above, const uint8_t *left,
- int upsample_above, int upsample_left, int dx, int dy);
-template <Z2_Lbd fn>
-void z2_wrapper(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
- const uint8_t *above, const uint8_t *left, int upsample_above,
- int upsample_left, int dx, int dy, int /*bd*/) {
- fn(dst, stride, bw, bh, above, left, upsample_above, upsample_left, dx, dy);
-}
-
-typedef void (*Z3_Lbd)(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
- const uint8_t *above, const uint8_t *left,
- int upsample_left, int dx, int dy);
-template <Z3_Lbd fn>
-void z3_wrapper(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
- const uint8_t *above, const uint8_t *left,
- int /*upsample_above*/, int upsample_left, int dx, int dy,
- int /*bd*/) {
- fn(dst, stride, bw, bh, above, left, upsample_left, dx, dy);
-}
-
-typedef void (*Z1_Hbd)(uint16_t *dst, ptrdiff_t stride, int bw, int bh,
- const uint16_t *above, const uint16_t *left,
- int upsample_above, int dx, int dy, int bd);
-template <Z1_Hbd fn>
-void z1_wrapper_hbd(uint16_t *dst, ptrdiff_t stride, int bw, int bh,
- const uint16_t *above, const uint16_t *left,
- int upsample_above, int /*upsample_left*/, int dx, int dy,
- int bd) {
- fn(dst, stride, bw, bh, above, left, upsample_above, dx, dy, bd);
-}
-
-typedef void (*Z2_Hbd)(uint16_t *dst, ptrdiff_t stride, int bw, int bh,
- const uint16_t *above, const uint16_t *left,
- int upsample_above, int upsample_left, int dx, int dy,
- int bd);
-template <Z2_Hbd fn>
-void z2_wrapper_hbd(uint16_t *dst, ptrdiff_t stride, int bw, int bh,
- const uint16_t *above, const uint16_t *left,
- int upsample_above, int upsample_left, int dx, int dy,
- int bd) {
- fn(dst, stride, bw, bh, above, left, upsample_above, upsample_left, dx, dy,
- bd);
-}
-
-typedef void (*Z3_Hbd)(uint16_t *dst, ptrdiff_t stride, int bw, int bh,
- const uint16_t *above, const uint16_t *left,
- int upsample_left, int dx, int dy, int bd);
-template <Z3_Hbd fn>
-void z3_wrapper_hbd(uint16_t *dst, ptrdiff_t stride, int bw, int bh,
- const uint16_t *above, const uint16_t *left,
- int /*upsample_above*/, int upsample_left, int dx, int dy,
- int bd) {
- fn(dst, stride, bw, bh, above, left, upsample_left, dx, dy, bd);
-}
-
-template <typename FuncType>
-struct DrPredFunc {
- DrPredFunc(FuncType pred = NULL, FuncType tst = NULL, int bit_depth_value = 0,
- int start_angle_value = 0)
- : ref_fn(pred), tst_fn(tst), bit_depth(bit_depth_value),
- start_angle(start_angle_value) {}
-
- FuncType ref_fn;
- FuncType tst_fn;
- int bit_depth;
- int start_angle;
-};
-
-template <typename Pixel, typename FuncType>
-class DrPredTest : public ::testing::TestWithParam<DrPredFunc<FuncType> > {
- protected:
- static const int kMaxNumTests = 100000;
- static const int kIterations = 10;
- static const int kDstStride = 64;
- static const int kDstSize = kDstStride * kDstStride;
- static const int kOffset = 16;
- static const int kBufSize = ((2 * MAX_TX_SIZE) << 1) + 16;
-
- DrPredTest()
- : enable_upsample_(0), upsample_above_(0), upsample_left_(0), bw_(0),
- bh_(0), dx_(1), dy_(1), bd_(8), txsize_(TX_4X4) {
- params_ = this->GetParam();
- start_angle_ = params_.start_angle;
- stop_angle_ = start_angle_ + 90;
-
- dst_ref_ = &dst_ref_data_[0];
- dst_tst_ = &dst_tst_data_[0];
- dst_stride_ = kDstStride;
- above_ = &above_data_[kOffset];
- left_ = &left_data_[kOffset];
-
- for (int i = 0; i < kBufSize; ++i) {
- above_data_[i] = rng_.Rand8();
- left_data_[i] = rng_.Rand8();
- }
-
- for (int i = 0; i < kDstSize; ++i) {
- dst_ref_[i] = 0;
- }
- }
-
- virtual ~DrPredTest() {}
-
- void Predict(bool speedtest, int tx) {
- const int kNumTests = speedtest ? kMaxNumTests : 1;
- aom_usec_timer timer;
-
- aom_usec_timer_start(&timer);
- for (int k = 0; k < kNumTests; ++k) {
- params_.ref_fn(dst_ref_, dst_stride_, bw_, bh_, above_, left_,
- upsample_above_, upsample_left_, dx_, dy_, bd_);
- }
- aom_usec_timer_mark(&timer);
- const int ref_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
-
- aom_usec_timer_start(&timer);
- if (params_.tst_fn) {
- for (int k = 0; k < kNumTests; ++k) {
- ASM_REGISTER_STATE_CHECK(params_.tst_fn(dst_tst_, dst_stride_, bw_, bh_,
- above_, left_, upsample_above_,
- upsample_left_, dx_, dy_, bd_));
- }
- }
- aom_usec_timer_mark(&timer);
- const int tst_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
-
- OutputTimes(kNumTests, ref_time, tst_time, tx);
- }
-
- void RunTest(bool speedtest, int p_angle) {
- for (int i = 0; i < kBufSize; ++i) {
- above_data_[i] = left_data_[i] = (1 << bd_) - 1;
- }
-
- for (int tx = 0; tx < TX_SIZES_ALL; ++tx) {
- if (params_.tst_fn == NULL) {
- for (int i = 0; i < kDstSize; ++i) {
- dst_tst_[i] = (1 << bd_) - 1;
- }
- } else {
- for (int i = 0; i < kDstSize; ++i) {
- dst_tst_[i] = 0;
- }
- }
-
- bw_ = tx_size_wide[kTxSize[tx]];
- bh_ = tx_size_high[kTxSize[tx]];
-
- if (enable_upsample_) {
- upsample_above_ =
- av1_use_intra_edge_upsample(bw_, bh_, p_angle - 90, 0);
- upsample_left_ =
- av1_use_intra_edge_upsample(bw_, bh_, p_angle - 180, 0);
- } else {
- upsample_above_ = upsample_left_ = 0;
- }
-
- Predict(speedtest, tx);
-
- for (int r = 0; r < bh_; ++r) {
- for (int c = 0; c < bw_; ++c) {
- ASSERT_EQ(dst_ref_[r * dst_stride_ + c],
- dst_tst_[r * dst_stride_ + c])
- << bw_ << "x" << bh_ << " r: " << r << " c: " << c
- << " dx: " << dx_ << " dy: " << dy_
- << " upsample_above: " << upsample_above_
- << " upsample_left: " << upsample_left_;
- }
- }
- }
- }
-
- void OutputTimes(int num_tests, int ref_time, int tst_time, int tx) {
- if (num_tests > 1) {
- if (params_.tst_fn) {
- const float x = static_cast<float>(ref_time) / tst_time;
- printf("\t[%8s] :: ref time %6d, tst time %6d %3.2f\n",
- kTxSizeStrings[tx], ref_time, tst_time, x);
- } else {
- printf("\t[%8s] :: ref time %6d\n", kTxSizeStrings[tx], ref_time);
- }
- }
- }
-
- Pixel dst_ref_data_[kDstSize];
- Pixel dst_tst_data_[kDstSize];
-
- Pixel left_data_[kBufSize];
- Pixel dummy_data_[kBufSize];
- Pixel above_data_[kBufSize];
-
- Pixel *dst_ref_;
- Pixel *dst_tst_;
- Pixel *above_;
- Pixel *left_;
- int dst_stride_;
-
- int enable_upsample_;
- int upsample_above_;
- int upsample_left_;
- int bw_;
- int bh_;
- int dx_;
- int dy_;
- int bd_;
- TX_SIZE txsize_;
-
- int start_angle_;
- int stop_angle_;
-
- ACMRandom rng_;
-
- DrPredFunc<FuncType> params_;
-};
-
-class LowbdDrPredTest : public DrPredTest<uint8_t, DrPred> {};
-
-TEST_P(LowbdDrPredTest, SaturatedValues) {
- for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
- enable_upsample_ = iter & 1;
- for (int angle = start_angle_; angle < stop_angle_; ++angle) {
- dx_ = av1_get_dx(angle);
- dy_ = av1_get_dy(angle);
- if (dx_ && dy_) RunTest(false, angle);
- }
- }
-}
-
-TEST_P(LowbdDrPredTest, DISABLED_Speed) {
- const int angles[] = { 3, 45, 87 };
- for (enable_upsample_ = 0; enable_upsample_ < 2; ++enable_upsample_) {
- for (int i = 0; i < 3; ++i) {
- const int angle = angles[i] + start_angle_;
- dx_ = av1_get_dx(angle);
- dy_ = av1_get_dy(angle);
- printf("enable_upsample: %d angle: %d ~~~~~~~~~~~~~~~\n",
- enable_upsample_, angle);
- if (dx_ && dy_) RunTest(true, angle);
- }
- }
-}
-
-using ::testing::make_tuple;
-
-INSTANTIATE_TEST_CASE_P(
- C, LowbdDrPredTest,
- ::testing::Values(DrPredFunc<DrPred>(&z1_wrapper<av1_dr_prediction_z1_c>,
- NULL, AOM_BITS_8, kZ1Start),
- DrPredFunc<DrPred>(&z2_wrapper<av1_dr_prediction_z2_c>,
- NULL, AOM_BITS_8, kZ2Start),
- DrPredFunc<DrPred>(&z3_wrapper<av1_dr_prediction_z3_c>,
- NULL, AOM_BITS_8, kZ3Start)));
-
-class HighbdDrPredTest : public DrPredTest<uint16_t, DrPred_Hbd> {};
-
-TEST_P(HighbdDrPredTest, SaturatedValues) {
- for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
- enable_upsample_ = iter & 1;
- for (int angle = start_angle_; angle < stop_angle_; ++angle) {
- dx_ = av1_get_dx(angle);
- dy_ = av1_get_dy(angle);
- if (dx_ && dy_) RunTest(false, angle);
- }
- }
-}
-
-TEST_P(HighbdDrPredTest, DISABLED_Speed) {
- const int angles[] = { 3, 45, 87 };
- for (enable_upsample_ = 0; enable_upsample_ < 2; ++enable_upsample_) {
- for (int i = 0; i < 3; ++i) {
- const int angle = angles[i] + start_angle_;
- dx_ = av1_get_dx(angle);
- dy_ = av1_get_dy(angle);
- printf("enable_upsample: %d angle: %d ~~~~~~~~~~~~~~~\n",
- enable_upsample_, angle);
- if (dx_ && dy_) RunTest(true, angle);
- }
- }
-}
-
-INSTANTIATE_TEST_CASE_P(
- C, HighbdDrPredTest,
- ::testing::Values(
- DrPredFunc<DrPred_Hbd>(&z1_wrapper_hbd<av1_highbd_dr_prediction_z1_c>,
- NULL, AOM_BITS_8, kZ1Start),
- DrPredFunc<DrPred_Hbd>(&z1_wrapper_hbd<av1_highbd_dr_prediction_z1_c>,
- NULL, AOM_BITS_10, kZ1Start),
- DrPredFunc<DrPred_Hbd>(&z1_wrapper_hbd<av1_highbd_dr_prediction_z1_c>,
- NULL, AOM_BITS_12, kZ1Start),
- DrPredFunc<DrPred_Hbd>(&z2_wrapper_hbd<av1_highbd_dr_prediction_z2_c>,
- NULL, AOM_BITS_8, kZ2Start),
- DrPredFunc<DrPred_Hbd>(&z2_wrapper_hbd<av1_highbd_dr_prediction_z2_c>,
- NULL, AOM_BITS_10, kZ2Start),
- DrPredFunc<DrPred_Hbd>(&z2_wrapper_hbd<av1_highbd_dr_prediction_z2_c>,
- NULL, AOM_BITS_12, kZ2Start),
- DrPredFunc<DrPred_Hbd>(&z3_wrapper_hbd<av1_highbd_dr_prediction_z3_c>,
- NULL, AOM_BITS_8, kZ3Start),
- DrPredFunc<DrPred_Hbd>(&z3_wrapper_hbd<av1_highbd_dr_prediction_z3_c>,
- NULL, AOM_BITS_10, kZ3Start),
- DrPredFunc<DrPred_Hbd>(&z3_wrapper_hbd<av1_highbd_dr_prediction_z3_c>,
- NULL, AOM_BITS_12, kZ3Start)));
-
-} // namespace
diff --git a/third_party/aom/test/dump_obu.sh b/third_party/aom/test/dump_obu.sh
deleted file mode 100755
index da44dd7e6..000000000
--- a/third_party/aom/test/dump_obu.sh
+++ /dev/null
@@ -1,70 +0,0 @@
-#!/bin/sh
-## Copyright (c) 2018, Alliance for Open Media. All rights reserved
-##
-## This source code is subject to the terms of the BSD 2 Clause License and
-## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-## was not distributed with this source code in the LICENSE file, you can
-## obtain it at www.aomedia.org/license/software. If the Alliance for Open
-## Media Patent License 1.0 was not distributed with this source code in the
-## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-##
-## This file tests the libaom dump_obu tool. To add new tests to this
-## file, do the following:
-## 1. Write a shell function (this is your test).
-## 2. Add the function to dump_obu_tests (on a new line).
-##
-. $(dirname $0)/tools_common.sh
-
-readonly dump_obu_test_file="${AOM_TEST_OUTPUT_DIR}/av1_obu_test.ivf"
-
-dump_obu_verify_environment() {
- if [ ! -e "${YUV_RAW_INPUT}" ]; then
- elog "The file ${YUV_RAW_INPUT##*/} must exist in LIBAOM_TEST_DATA_PATH."
- return 1
- fi
- if [ "$(dump_obu_available)" = "yes" ]; then
- if [ -z "$(aom_tool_path dump_obu)" ]; then
- elog "dump_obu not found in LIBAOM_BIN_PATH, its parent, or child tools/."
- fi
- fi
-}
-
-dump_obu_available() {
- if [ "$(av1_decode_available)" = "yes" ] && \
- [ "$(av1_encode_available)" = "yes" ]; then
- echo yes
- fi
-}
-
-aomenc_available() {
- if [ -x "$(aom_tool_path aomenc)" ]; then
- echo yes
- fi
-}
-
-encode_test_file() {
- if [ "$(aomenc_available)" = "yes" ]; then
- local encoder="$(aom_tool_path aomenc)"
-
- eval "${encoder}" \
- $(aomenc_encode_test_fast_params) \
- $(yuv_raw_input) \
- --ivf \
- --output=${dump_obu_test_file} \
- ${devnull}
-
- if [ ! -e "${dump_obu_test_file}" ]; then
- elog "dump_obu test input encode failed."
- return 1
- fi
- fi
-}
-
-dump_obu() {
- encode_test_file
- eval $(aom_tool_path dump_obu) "${dump_obu_test_file}" ${devnull}
-}
-
-dump_obu_tests="dump_obu"
-
-run_tests dump_obu_verify_environment "${dump_obu_tests}"
diff --git a/third_party/aom/test/ec_test.cc b/third_party/aom/test/ec_test.cc
deleted file mode 100644
index e6a5ea63b..000000000
--- a/third_party/aom/test/ec_test.cc
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include <cstdlib>
-
-#include "aom_dsp/entenc.h"
-#include "aom_dsp/entdec.h"
-
-TEST(EC_TEST, random_ec_test) {
- od_ec_enc enc;
- od_ec_dec dec;
- int sz;
- int i;
- int ret;
- unsigned int sym;
- unsigned int seed;
- unsigned char *ptr;
- uint32_t ptr_sz;
- char *seed_str;
- ret = 0;
- seed_str = getenv("EC_TEST_SEED");
- if (seed_str) {
- seed = atoi(seed_str);
- } else {
- seed = 0xdaa1a;
- }
- srand(seed);
- od_ec_enc_init(&enc, 1);
- /*Test compatibility between multiple different encode/decode routines.*/
- for (i = 0; i < 409600; i++) {
- unsigned *fz;
- unsigned *fts;
- unsigned *data;
- unsigned *tell;
- unsigned *enc_method;
- int j;
- sz = rand() / ((RAND_MAX >> (rand() % 9U)) + 1U);
- fz = (unsigned *)malloc(sz * sizeof(*fz));
- fts = (unsigned *)malloc(sz * sizeof(*fts));
- data = (unsigned *)malloc(sz * sizeof(*data));
- tell = (unsigned *)malloc((sz + 1) * sizeof(*tell));
- enc_method = (unsigned *)malloc(sz * sizeof(*enc_method));
- od_ec_enc_reset(&enc);
- tell[0] = od_ec_enc_tell_frac(&enc);
- for (j = 0; j < sz; j++) {
- data[j] = rand() / ((RAND_MAX >> 1) + 1);
-
- fts[j] = CDF_PROB_BITS;
- fz[j] = (rand() % (CDF_PROB_TOP - 2)) >> (CDF_PROB_BITS - fts[j]);
- fz[j] = OD_MAXI(fz[j], 1);
- enc_method[j] = 3 + (rand() & 1);
- switch (enc_method[j]) {
- case 3: {
- od_ec_encode_bool_q15(&enc, data[j],
- OD_ICDF(fz[j] << (CDF_PROB_BITS - fts[j])));
- break;
- }
- case 4: {
- uint16_t cdf[2];
- cdf[0] = OD_ICDF(fz[j]);
- cdf[1] = OD_ICDF(1U << fts[j]);
- od_ec_encode_cdf_q15(&enc, data[j], cdf, 2);
- break;
- }
- }
-
- tell[j + 1] = od_ec_enc_tell_frac(&enc);
- }
- ptr = od_ec_enc_done(&enc, &ptr_sz);
- EXPECT_GE(((od_ec_enc_tell(&enc) + 7U) >> 3), ptr_sz)
- << "od_ec_enc_tell() lied: "
- "there's "
- << ptr_sz << " bytes instead of " << ((od_ec_enc_tell(&enc) + 7) >> 3)
- << " (Random seed: " << seed << ")\n";
- od_ec_dec_init(&dec, ptr, ptr_sz);
- EXPECT_EQ(od_ec_dec_tell_frac(&dec), tell[0])
- << "od_ec_dec_tell() mismatch between encoder and decoder "
- "at symbol 0: "
- << (unsigned)od_ec_dec_tell_frac(&dec) << " instead of " << tell[0]
- << " (Random seed: " << seed << ").\n";
- for (j = 0; j < sz; j++) {
- int dec_method;
- if (CDF_SHIFT == 0) {
- dec_method = 3 + (rand() & 1);
- } else {
- dec_method = enc_method[j];
- }
- switch (dec_method) {
- case 3: {
- sym = od_ec_decode_bool_q15(
- &dec, OD_ICDF(fz[j] << (CDF_PROB_BITS - fts[j])));
- break;
- }
- case 4: {
- uint16_t cdf[2];
- cdf[0] = OD_ICDF(fz[j]);
- cdf[1] = OD_ICDF(1U << fts[j]);
- sym = od_ec_decode_cdf_q15(&dec, cdf, 2);
- break;
- }
- }
-
- EXPECT_EQ(sym, data[j])
- << "Decoded " << sym << " instead of " << data[j]
- << " with fz=" << fz[j] << " and ftb=" << fts[j] << "at position "
- << j << " of " << sz << " (Random seed: " << seed << ").\n"
- << "Encoding method: " << enc_method[j]
- << " decoding method: " << dec_method << "\n";
- EXPECT_EQ(od_ec_dec_tell_frac(&dec), tell[j + 1])
- << "od_ec_dec_tell() mismatch between encoder and "
- "decoder at symbol "
- << j + 1 << ": " << (unsigned)od_ec_dec_tell_frac(&dec)
- << " instead of " << tell[j + 1] << " (Random seed: " << seed
- << ").\n";
- }
- free(enc_method);
- free(tell);
- free(data);
- free(fts);
- free(fz);
- }
- od_ec_enc_reset(&enc);
- if (CDF_SHIFT == 0) {
- od_ec_encode_bool_q15(&enc, 0, OD_ICDF(16384));
- od_ec_encode_bool_q15(&enc, 0, OD_ICDF(16384));
- od_ec_encode_bool_q15(&enc, 0, OD_ICDF(16384));
- od_ec_encode_bool_q15(&enc, 0, OD_ICDF(16384));
- od_ec_encode_bool_q15(&enc, 0, OD_ICDF(24576));
- od_ec_enc_patch_initial_bits(&enc, 3, 2);
- EXPECT_FALSE(enc.error) << "od_ec_enc_patch_initial_bits() failed.\n";
- od_ec_enc_patch_initial_bits(&enc, 0, 5);
- EXPECT_TRUE(enc.error)
- << "od_ec_enc_patch_initial_bits() didn't fail when it should have.\n";
- od_ec_enc_reset(&enc);
- od_ec_encode_bool_q15(&enc, 0, OD_ICDF(16384));
- od_ec_encode_bool_q15(&enc, 0, OD_ICDF(16384));
- od_ec_encode_bool_q15(&enc, 1, OD_ICDF(32256));
- od_ec_encode_bool_q15(&enc, 0, OD_ICDF(24576));
- od_ec_enc_patch_initial_bits(&enc, 0, 2);
- EXPECT_FALSE(enc.error) << "od_ec_enc_patch_initial_bits() failed.\n";
- ptr = od_ec_enc_done(&enc, &ptr_sz);
- EXPECT_EQ(ptr_sz, 2u);
- EXPECT_EQ(ptr[0], 63)
- << "Got " << ptr[0]
- << " when expecting 63 for od_ec_enc_patch_initial_bits().\n";
- }
- od_ec_enc_clear(&enc);
- EXPECT_EQ(ret, 0);
-}
diff --git a/third_party/aom/test/encode_api_test.cc b/third_party/aom/test/encode_api_test.cc
deleted file mode 100644
index c26f5720f..000000000
--- a/third_party/aom/test/encode_api_test.cc
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "config/aom_config.h"
-
-#include "test/util.h"
-#include "aom/aomcx.h"
-#include "aom/aom_encoder.h"
-
-namespace {
-
-TEST(EncodeAPI, InvalidParams) {
- static const aom_codec_iface_t *kCodecs[] = {
-#if CONFIG_AV1_ENCODER
- aom_codec_av1_cx(),
-#endif
- };
- uint8_t buf[1] = { 0 };
- aom_image_t img;
- aom_codec_ctx_t enc;
- aom_codec_enc_cfg_t cfg;
-
- EXPECT_EQ(&img, aom_img_wrap(&img, AOM_IMG_FMT_I420, 1, 1, 1, buf));
-
- EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_enc_init(NULL, NULL, NULL, 0));
- EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_enc_init(&enc, NULL, NULL, 0));
- EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_encode(NULL, NULL, 0, 0, 0));
- EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_encode(NULL, &img, 0, 0, 0));
- EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_destroy(NULL));
- EXPECT_EQ(AOM_CODEC_INVALID_PARAM,
- aom_codec_enc_config_default(NULL, NULL, 0));
- EXPECT_EQ(AOM_CODEC_INVALID_PARAM,
- aom_codec_enc_config_default(NULL, &cfg, 0));
- EXPECT_TRUE(aom_codec_error(NULL) != NULL);
-
- for (int i = 0; i < NELEMENTS(kCodecs); ++i) {
- SCOPED_TRACE(aom_codec_iface_name(kCodecs[i]));
- EXPECT_EQ(AOM_CODEC_INVALID_PARAM,
- aom_codec_enc_init(NULL, kCodecs[i], NULL, 0));
- EXPECT_EQ(AOM_CODEC_INVALID_PARAM,
- aom_codec_enc_init(&enc, kCodecs[i], NULL, 0));
- EXPECT_EQ(AOM_CODEC_INVALID_PARAM,
- aom_codec_enc_config_default(kCodecs[i], &cfg, 1));
-
- EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_config_default(kCodecs[i], &cfg, 0));
- EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, kCodecs[i], &cfg, 0));
-
- EXPECT_EQ(NULL, aom_codec_get_global_headers(NULL));
-
- aom_fixed_buf_t *glob_headers = aom_codec_get_global_headers(&enc);
- EXPECT_TRUE(glob_headers->buf != NULL);
- if (glob_headers) {
- free(glob_headers->buf);
- free(glob_headers);
- }
-
- EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, NULL, 0, 0, 0));
-
- EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc));
- }
-}
-
-} // namespace
diff --git a/third_party/aom/test/encode_perf_test.cc b/third_party/aom/test/encode_perf_test.cc
deleted file mode 100644
index fe649b153..000000000
--- a/third_party/aom/test/encode_perf_test.cc
+++ /dev/null
@@ -1,188 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <string>
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "config/aom_config.h"
-#include "config/aom_version.h"
-
-#include "test/codec_factory.h"
-#include "test/encode_test_driver.h"
-#include "test/i420_video_source.h"
-#include "test/util.h"
-#include "test/y4m_video_source.h"
-#include "aom_ports/aom_timer.h"
-
-namespace {
-
-const int kMaxPsnr = 100;
-const double kUsecsInSec = 1000000.0;
-
-struct EncodePerfTestVideo {
- EncodePerfTestVideo(const char *name_, uint32_t width_, uint32_t height_,
- uint32_t bitrate_, int frames_)
- : name(name_), width(width_), height(height_), bitrate(bitrate_),
- frames(frames_) {}
- const char *name;
- uint32_t width;
- uint32_t height;
- uint32_t bitrate;
- int frames;
-};
-
-const EncodePerfTestVideo kAV1EncodePerfTestVectors[] = {
- EncodePerfTestVideo("desktop_640_360_30.yuv", 640, 360, 200, 2484),
- EncodePerfTestVideo("kirland_640_480_30.yuv", 640, 480, 200, 300),
- EncodePerfTestVideo("macmarcomoving_640_480_30.yuv", 640, 480, 200, 987),
- EncodePerfTestVideo("macmarcostationary_640_480_30.yuv", 640, 480, 200, 718),
- EncodePerfTestVideo("niklas_640_480_30.yuv", 640, 480, 200, 471),
- EncodePerfTestVideo("tacomanarrows_640_480_30.yuv", 640, 480, 200, 300),
- EncodePerfTestVideo("tacomasmallcameramovement_640_480_30.yuv", 640, 480, 200,
- 300),
- EncodePerfTestVideo("thaloundeskmtg_640_480_30.yuv", 640, 480, 200, 300),
- EncodePerfTestVideo("niklas_1280_720_30.yuv", 1280, 720, 600, 470),
-};
-
-const int kEncodePerfTestSpeeds[] = { 5, 6, 7, 8 };
-const int kEncodePerfTestThreads[] = { 1, 2, 4 };
-
-class AV1EncodePerfTest
- : public ::libaom_test::CodecTestWithParam<libaom_test::TestMode>,
- public ::libaom_test::EncoderTest {
- protected:
- AV1EncodePerfTest()
- : EncoderTest(GET_PARAM(0)), min_psnr_(kMaxPsnr), nframes_(0),
- encoding_mode_(GET_PARAM(1)), speed_(0), threads_(1) {}
-
- virtual ~AV1EncodePerfTest() {}
-
- virtual void SetUp() {
- InitializeConfig();
- SetMode(encoding_mode_);
-
- cfg_.g_lag_in_frames = 0;
- cfg_.rc_min_quantizer = 2;
- cfg_.rc_max_quantizer = 56;
- cfg_.rc_dropframe_thresh = 0;
- cfg_.rc_undershoot_pct = 50;
- cfg_.rc_overshoot_pct = 50;
- cfg_.rc_buf_sz = 1000;
- cfg_.rc_buf_initial_sz = 500;
- cfg_.rc_buf_optimal_sz = 600;
- cfg_.rc_end_usage = AOM_CBR;
- cfg_.g_error_resilient = 1;
- cfg_.g_threads = threads_;
- }
-
- virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
- ::libaom_test::Encoder *encoder) {
- if (video->frame() == 0) {
- const int log2_tile_columns = 3;
- encoder->Control(AOME_SET_CPUUSED, speed_);
- encoder->Control(AV1E_SET_TILE_COLUMNS, log2_tile_columns);
- encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1);
- encoder->Control(AOME_SET_ENABLEAUTOALTREF, 0);
- }
- }
-
- virtual void BeginPassHook(unsigned int /*pass*/) {
- min_psnr_ = kMaxPsnr;
- nframes_ = 0;
- }
-
- virtual void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) {
- if (pkt->data.psnr.psnr[0] < min_psnr_) {
- min_psnr_ = pkt->data.psnr.psnr[0];
- }
- }
-
- // for performance reasons don't decode
- virtual bool DoDecode() { return 0; }
-
- double min_psnr() const { return min_psnr_; }
-
- void set_speed(unsigned int speed) { speed_ = speed; }
-
- void set_threads(unsigned int threads) { threads_ = threads; }
-
- private:
- double min_psnr_;
- unsigned int nframes_;
- libaom_test::TestMode encoding_mode_;
- unsigned speed_;
- unsigned int threads_;
-};
-
-TEST_P(AV1EncodePerfTest, PerfTest) {
- for (size_t i = 0; i < NELEMENTS(kAV1EncodePerfTestVectors); ++i) {
- for (size_t j = 0; j < NELEMENTS(kEncodePerfTestSpeeds); ++j) {
- for (size_t k = 0; k < NELEMENTS(kEncodePerfTestThreads); ++k) {
- if (kAV1EncodePerfTestVectors[i].width < 512 &&
- kEncodePerfTestThreads[k] > 1)
- continue;
- else if (kAV1EncodePerfTestVectors[i].width < 1024 &&
- kEncodePerfTestThreads[k] > 2)
- continue;
-
- set_threads(kEncodePerfTestThreads[k]);
- SetUp();
-
- const aom_rational timebase = { 33333333, 1000000000 };
- cfg_.g_timebase = timebase;
- cfg_.rc_target_bitrate = kAV1EncodePerfTestVectors[i].bitrate;
-
- init_flags_ = AOM_CODEC_USE_PSNR;
-
- const unsigned frames = kAV1EncodePerfTestVectors[i].frames;
- const char *video_name = kAV1EncodePerfTestVectors[i].name;
- libaom_test::I420VideoSource video(
- video_name, kAV1EncodePerfTestVectors[i].width,
- kAV1EncodePerfTestVectors[i].height, timebase.den, timebase.num, 0,
- kAV1EncodePerfTestVectors[i].frames);
- set_speed(kEncodePerfTestSpeeds[j]);
-
- aom_usec_timer t;
- aom_usec_timer_start(&t);
-
- ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-
- aom_usec_timer_mark(&t);
- const double elapsed_secs = aom_usec_timer_elapsed(&t) / kUsecsInSec;
- const double fps = frames / elapsed_secs;
- const double minimum_psnr = min_psnr();
- std::string display_name(video_name);
- if (kEncodePerfTestThreads[k] > 1) {
- char thread_count[32];
- snprintf(thread_count, sizeof(thread_count), "_t-%d",
- kEncodePerfTestThreads[k]);
- display_name += thread_count;
- }
-
- printf("{\n");
- printf("\t\"type\" : \"encode_perf_test\",\n");
- printf("\t\"version\" : \"%s\",\n", VERSION_STRING_NOSP);
- printf("\t\"videoName\" : \"%s\",\n", display_name.c_str());
- printf("\t\"encodeTimeSecs\" : %f,\n", elapsed_secs);
- printf("\t\"totalFrames\" : %u,\n", frames);
- printf("\t\"framesPerSecond\" : %f,\n", fps);
- printf("\t\"minPsnr\" : %f,\n", minimum_psnr);
- printf("\t\"speed\" : %d,\n", kEncodePerfTestSpeeds[j]);
- printf("\t\"threads\" : %d\n", kEncodePerfTestThreads[k]);
- printf("}\n");
- }
- }
- }
-}
-
-AV1_INSTANTIATE_TEST_CASE(AV1EncodePerfTest,
- ::testing::Values(::libaom_test::kRealTime));
-} // namespace
diff --git a/third_party/aom/test/encode_test_driver.cc b/third_party/aom/test/encode_test_driver.cc
deleted file mode 100644
index f3d61dc36..000000000
--- a/third_party/aom/test/encode_test_driver.cc
+++ /dev/null
@@ -1,288 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <string>
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "config/aom_config.h"
-
-#include "aom_ports/mem.h"
-#include "test/codec_factory.h"
-#include "test/decode_test_driver.h"
-#include "test/encode_test_driver.h"
-#include "test/register_state_check.h"
-#include "test/video_source.h"
-
-namespace libaom_test {
-void Encoder::InitEncoder(VideoSource *video) {
- aom_codec_err_t res;
- const aom_image_t *img = video->img();
-
- if (video->img() && !encoder_.priv) {
- cfg_.g_w = img->d_w;
- cfg_.g_h = img->d_h;
- cfg_.g_timebase = video->timebase();
- cfg_.rc_twopass_stats_in = stats_->buf();
-
- res = aom_codec_enc_init(&encoder_, CodecInterface(), &cfg_, init_flags_);
- ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError();
- }
-}
-
-void Encoder::EncodeFrame(VideoSource *video, const unsigned long frame_flags) {
- if (video->img())
- EncodeFrameInternal(*video, frame_flags);
- else
- Flush();
-
- // Handle twopass stats
- CxDataIterator iter = GetCxData();
-
- while (const aom_codec_cx_pkt_t *pkt = iter.Next()) {
- if (pkt->kind != AOM_CODEC_STATS_PKT) continue;
-
- stats_->Append(*pkt);
- }
-}
-
-void Encoder::EncodeFrameInternal(const VideoSource &video,
- const unsigned long frame_flags) {
- aom_codec_err_t res;
- const aom_image_t *img = video.img();
-
- // Handle frame resizing
- if (cfg_.g_w != img->d_w || cfg_.g_h != img->d_h) {
- cfg_.g_w = img->d_w;
- cfg_.g_h = img->d_h;
- res = aom_codec_enc_config_set(&encoder_, &cfg_);
- ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError();
- }
-
- // Encode the frame
- API_REGISTER_STATE_CHECK(res =
- aom_codec_encode(&encoder_, img, video.pts(),
- video.duration(), frame_flags));
- ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError();
-}
-
-void Encoder::Flush() {
- const aom_codec_err_t res = aom_codec_encode(&encoder_, NULL, 0, 0, 0);
- if (!encoder_.priv)
- ASSERT_EQ(AOM_CODEC_ERROR, res) << EncoderError();
- else
- ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError();
-}
-
-void EncoderTest::InitializeConfig() {
- const aom_codec_err_t res = codec_->DefaultEncoderConfig(&cfg_, 0);
- ASSERT_EQ(AOM_CODEC_OK, res);
-}
-
-void EncoderTest::SetMode(TestMode mode) {
- switch (mode) {
- case kOnePassGood:
- case kTwoPassGood: break;
- case kRealTime: cfg_.g_lag_in_frames = 0; break;
- default: ASSERT_TRUE(false) << "Unexpected mode " << mode;
- }
- mode_ = mode;
- if (mode == kTwoPassGood)
- passes_ = 2;
- else
- passes_ = 1;
-}
-
-static bool compare_plane(const uint8_t *const buf1, int stride1,
- const uint8_t *const buf2, int stride2, int w, int h,
- int *const mismatch_row, int *const mismatch_col,
- int *const mismatch_pix1, int *const mismatch_pix2) {
- int r, c;
-
- for (r = 0; r < h; ++r) {
- for (c = 0; c < w; ++c) {
- const int pix1 = buf1[r * stride1 + c];
- const int pix2 = buf2[r * stride2 + c];
-
- if (pix1 != pix2) {
- if (mismatch_row != NULL) *mismatch_row = r;
- if (mismatch_col != NULL) *mismatch_col = c;
- if (mismatch_pix1 != NULL) *mismatch_pix1 = pix1;
- if (mismatch_pix2 != NULL) *mismatch_pix2 = pix2;
- return false;
- }
- }
- }
-
- return true;
-}
-
-// The function should return "true" most of the time, therefore no early
-// break-out is implemented within the match checking process.
-static bool compare_img(const aom_image_t *img1, const aom_image_t *img2,
- int *const mismatch_row, int *const mismatch_col,
- int *const mismatch_plane, int *const mismatch_pix1,
- int *const mismatch_pix2) {
- if (img1->fmt != img2->fmt || img1->cp != img2->cp || img1->tc != img2->tc ||
- img1->mc != img2->mc || img1->d_w != img2->d_w ||
- img1->d_h != img2->d_h || img1->monochrome != img2->monochrome) {
- if (mismatch_row != NULL) *mismatch_row = -1;
- if (mismatch_col != NULL) *mismatch_col = -1;
- return false;
- }
-
- const int num_planes = img1->monochrome ? 1 : 3;
- for (int plane = 0; plane < num_planes; plane++) {
- if (!compare_plane(img1->planes[plane], img1->stride[plane],
- img2->planes[plane], img2->stride[plane],
- aom_img_plane_width(img1, plane),
- aom_img_plane_height(img1, plane), mismatch_row,
- mismatch_col, mismatch_pix1, mismatch_pix2)) {
- if (mismatch_plane != NULL) *mismatch_plane = plane;
- return false;
- }
- }
-
- return true;
-}
-
-void EncoderTest::MismatchHook(const aom_image_t *img_enc,
- const aom_image_t *img_dec) {
- int mismatch_row = 0;
- int mismatch_col = 0;
- int mismatch_plane = 0;
- int mismatch_pix_enc = 0;
- int mismatch_pix_dec = 0;
-
- ASSERT_FALSE(compare_img(img_enc, img_dec, &mismatch_row, &mismatch_col,
- &mismatch_plane, &mismatch_pix_enc,
- &mismatch_pix_dec));
-
- GTEST_FAIL() << "Encode/Decode mismatch found:" << std::endl
- << " pixel value enc/dec: " << mismatch_pix_enc << "/"
- << mismatch_pix_dec << std::endl
- << " plane: " << mismatch_plane << std::endl
- << " row/col: " << mismatch_row << "/"
- << mismatch_col << std::endl;
-}
-
-void EncoderTest::RunLoop(VideoSource *video) {
- aom_codec_dec_cfg_t dec_cfg = aom_codec_dec_cfg_t();
- dec_cfg.allow_lowbitdepth = 1;
-
- stats_.Reset();
-
- ASSERT_TRUE(passes_ == 1 || passes_ == 2);
- for (unsigned int pass = 0; pass < passes_; pass++) {
- last_pts_ = 0;
-
- if (passes_ == 1)
- cfg_.g_pass = AOM_RC_ONE_PASS;
- else if (pass == 0)
- cfg_.g_pass = AOM_RC_FIRST_PASS;
- else
- cfg_.g_pass = AOM_RC_LAST_PASS;
-
- BeginPassHook(pass);
- testing::internal::scoped_ptr<Encoder> encoder(
- codec_->CreateEncoder(cfg_, init_flags_, &stats_));
- ASSERT_TRUE(encoder.get() != NULL);
-
- ASSERT_NO_FATAL_FAILURE(video->Begin());
- encoder->InitEncoder(video);
-
- if (mode_ == kRealTime) {
- encoder->Control(AOME_SET_ENABLEAUTOALTREF, 0);
- }
-
- ASSERT_FALSE(::testing::Test::HasFatalFailure());
-
- testing::internal::scoped_ptr<Decoder> decoder(
- codec_->CreateDecoder(dec_cfg, 0 /* flags */));
-#if CONFIG_AV1_DECODER
- if (decoder->IsAV1()) {
- // Set dec_cfg.tile_row = -1 and dec_cfg.tile_col = -1 so that the whole
- // frame is decoded.
- decoder->Control(AV1_SET_TILE_MODE, cfg_.large_scale_tile);
- decoder->Control(AV1D_EXT_TILE_DEBUG, 1);
- decoder->Control(AV1_SET_DECODE_TILE_ROW, -1);
- decoder->Control(AV1_SET_DECODE_TILE_COL, -1);
- }
-#endif
-
- bool again;
- for (again = true; again; video->Next()) {
- again = (video->img() != NULL);
-
- PreEncodeFrameHook(video);
- PreEncodeFrameHook(video, encoder.get());
- encoder->EncodeFrame(video, frame_flags_);
-
- CxDataIterator iter = encoder->GetCxData();
-
- bool has_cxdata = false;
- bool has_dxdata = false;
- while (const aom_codec_cx_pkt_t *pkt = iter.Next()) {
- pkt = MutateEncoderOutputHook(pkt);
- again = true;
- switch (pkt->kind) {
- case AOM_CODEC_CX_FRAME_PKT:
- has_cxdata = true;
- if (decoder.get() != NULL && DoDecode()) {
- aom_codec_err_t res_dec;
- if (DoDecodeInvisible()) {
- res_dec = decoder->DecodeFrame(
- (const uint8_t *)pkt->data.frame.buf, pkt->data.frame.sz);
- } else {
- res_dec = decoder->DecodeFrame(
- (const uint8_t *)pkt->data.frame.buf +
- (pkt->data.frame.sz - pkt->data.frame.vis_frame_size),
- pkt->data.frame.vis_frame_size);
- }
-
- if (!HandleDecodeResult(res_dec, decoder.get())) break;
-
- has_dxdata = true;
- }
- ASSERT_GE(pkt->data.frame.pts, last_pts_);
- last_pts_ = pkt->data.frame.pts;
- FramePktHook(pkt);
- break;
-
- case AOM_CODEC_PSNR_PKT: PSNRPktHook(pkt); break;
-
- default: break;
- }
- }
-
- if (has_dxdata && has_cxdata) {
- const aom_image_t *img_enc = encoder->GetPreviewFrame();
- DxDataIterator dec_iter = decoder->GetDxData();
- const aom_image_t *img_dec = dec_iter.Next();
- if (img_enc && img_dec) {
- const bool res =
- compare_img(img_enc, img_dec, NULL, NULL, NULL, NULL, NULL);
- if (!res) { // Mismatch
- MismatchHook(img_enc, img_dec);
- }
- }
- if (img_dec) DecompressedFrameHook(*img_dec, video->pts());
- }
- if (!Continue()) break;
- }
-
- EndPassHook();
-
- if (!Continue()) break;
- }
-}
-
-} // namespace libaom_test
diff --git a/third_party/aom/test/encode_test_driver.h b/third_party/aom/test/encode_test_driver.h
deleted file mode 100644
index 4f3f855cf..000000000
--- a/third_party/aom/test/encode_test_driver.h
+++ /dev/null
@@ -1,249 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#ifndef AOM_TEST_ENCODE_TEST_DRIVER_H_
-#define AOM_TEST_ENCODE_TEST_DRIVER_H_
-
-#include <string>
-#include <vector>
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "config/aom_config.h"
-
-#if CONFIG_AV1_ENCODER
-#include "aom/aomcx.h"
-#endif
-#include "aom/aom_encoder.h"
-
-namespace libaom_test {
-
-class CodecFactory;
-class VideoSource;
-
-enum TestMode { kRealTime, kOnePassGood, kTwoPassGood };
-#define ALL_TEST_MODES \
- ::testing::Values(::libaom_test::kRealTime, ::libaom_test::kOnePassGood, \
- ::libaom_test::kTwoPassGood)
-
-#define ONE_PASS_TEST_MODES \
- ::testing::Values(::libaom_test::kRealTime, ::libaom_test::kOnePassGood)
-
-#define TWO_PASS_TEST_MODES ::testing::Values(::libaom_test::kTwoPassGood)
-
-#define NONREALTIME_TEST_MODES \
- ::testing::Values(::libaom_test::kOnePassGood, ::libaom_test::kTwoPassGood)
-
-// Provides an object to handle the libaom get_cx_data() iteration pattern
-class CxDataIterator {
- public:
- explicit CxDataIterator(aom_codec_ctx_t *encoder)
- : encoder_(encoder), iter_(NULL) {}
-
- const aom_codec_cx_pkt_t *Next() {
- return aom_codec_get_cx_data(encoder_, &iter_);
- }
-
- private:
- aom_codec_ctx_t *encoder_;
- aom_codec_iter_t iter_;
-};
-
-// Implements an in-memory store for libaom twopass statistics
-class TwopassStatsStore {
- public:
- void Append(const aom_codec_cx_pkt_t &pkt) {
- buffer_.append(reinterpret_cast<char *>(pkt.data.twopass_stats.buf),
- pkt.data.twopass_stats.sz);
- }
-
- aom_fixed_buf_t buf() {
- const aom_fixed_buf_t buf = { &buffer_[0], buffer_.size() };
- return buf;
- }
-
- void Reset() { buffer_.clear(); }
-
- protected:
- std::string buffer_;
-};
-
-// Provides a simplified interface to manage one video encoding pass, given
-// a configuration and video source.
-//
-// TODO(jkoleszar): The exact services it provides and the appropriate
-// level of abstraction will be fleshed out as more tests are written.
-class Encoder {
- public:
- Encoder(aom_codec_enc_cfg_t cfg, const uint32_t init_flags,
- TwopassStatsStore *stats)
- : cfg_(cfg), init_flags_(init_flags), stats_(stats) {
- memset(&encoder_, 0, sizeof(encoder_));
- }
-
- virtual ~Encoder() { aom_codec_destroy(&encoder_); }
-
- CxDataIterator GetCxData() { return CxDataIterator(&encoder_); }
-
- void InitEncoder(VideoSource *video);
-
- const aom_image_t *GetPreviewFrame() {
- return aom_codec_get_preview_frame(&encoder_);
- }
- // This is a thin wrapper around aom_codec_encode(), so refer to
- // aom_encoder.h for its semantics.
- void EncodeFrame(VideoSource *video, const unsigned long frame_flags);
-
- // Convenience wrapper for EncodeFrame()
- void EncodeFrame(VideoSource *video) { EncodeFrame(video, 0); }
-
- void Control(int ctrl_id, int arg) {
- const aom_codec_err_t res = aom_codec_control_(&encoder_, ctrl_id, arg);
- ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError();
- }
-
- void Control(int ctrl_id, int *arg) {
- const aom_codec_err_t res = aom_codec_control_(&encoder_, ctrl_id, arg);
- ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError();
- }
-
- void Control(int ctrl_id, struct aom_scaling_mode *arg) {
- const aom_codec_err_t res = aom_codec_control_(&encoder_, ctrl_id, arg);
- ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError();
- }
-
-#if CONFIG_AV1_ENCODER
- void Control(int ctrl_id, aom_active_map_t *arg) {
- const aom_codec_err_t res = aom_codec_control_(&encoder_, ctrl_id, arg);
- ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError();
- }
-#endif
-
- void Config(const aom_codec_enc_cfg_t *cfg) {
- const aom_codec_err_t res = aom_codec_enc_config_set(&encoder_, cfg);
- ASSERT_EQ(AOM_CODEC_OK, res) << EncoderError();
- cfg_ = *cfg;
- }
-
- protected:
- virtual aom_codec_iface_t *CodecInterface() const = 0;
-
- const char *EncoderError() {
- const char *detail = aom_codec_error_detail(&encoder_);
- return detail ? detail : aom_codec_error(&encoder_);
- }
-
- // Encode an image
- void EncodeFrameInternal(const VideoSource &video,
- const unsigned long frame_flags);
-
- // Flush the encoder on EOS
- void Flush();
-
- aom_codec_ctx_t encoder_;
- aom_codec_enc_cfg_t cfg_;
- unsigned long init_flags_;
- TwopassStatsStore *stats_;
-};
-
-// Common test functionality for all Encoder tests.
-//
-// This class is a mixin which provides the main loop common to all
-// encoder tests. It provides hooks which can be overridden by subclasses
-// to implement each test's specific behavior, while centralizing the bulk
-// of the boilerplate. Note that it doesn't inherit the gtest testing
-// classes directly, so that tests can be parameterized differently.
-class EncoderTest {
- protected:
- explicit EncoderTest(const CodecFactory *codec)
- : codec_(codec), abort_(false), init_flags_(0), frame_flags_(0),
- last_pts_(0), mode_(kRealTime) {
- // Default to 1 thread.
- cfg_.g_threads = 1;
- }
-
- virtual ~EncoderTest() {}
-
- // Initialize the cfg_ member with the default configuration.
- void InitializeConfig();
-
- // Map the TestMode enum to the passes_ variables.
- void SetMode(TestMode mode);
-
- // Set encoder flag.
- void set_init_flags(unsigned long flag) { // NOLINT(runtime/int)
- init_flags_ = flag;
- }
-
- // Main loop
- virtual void RunLoop(VideoSource *video);
-
- // Hook to be called at the beginning of a pass.
- virtual void BeginPassHook(unsigned int /*pass*/) {}
-
- // Hook to be called at the end of a pass.
- virtual void EndPassHook() {}
-
- // Hook to be called before encoding a frame.
- virtual void PreEncodeFrameHook(VideoSource * /*video*/) {}
- virtual void PreEncodeFrameHook(VideoSource * /*video*/,
- Encoder * /*encoder*/) {}
-
- // Hook to be called on every compressed data packet.
- virtual void FramePktHook(const aom_codec_cx_pkt_t * /*pkt*/) {}
-
- // Hook to be called on every PSNR packet.
- virtual void PSNRPktHook(const aom_codec_cx_pkt_t * /*pkt*/) {}
-
- // Hook to determine whether the encode loop should continue.
- virtual bool Continue() const {
- return !(::testing::Test::HasFatalFailure() || abort_);
- }
-
- // Hook to determine whether to decode frame after encoding
- virtual bool DoDecode() const { return true; }
-
- // Hook to determine whether to decode invisible frames after encoding
- virtual bool DoDecodeInvisible() const { return true; }
-
- // Hook to handle encode/decode mismatch
- virtual void MismatchHook(const aom_image_t *img1, const aom_image_t *img2);
-
- // Hook to be called on every decompressed frame.
- virtual void DecompressedFrameHook(const aom_image_t & /*img*/,
- aom_codec_pts_t /*pts*/) {}
-
- // Hook to be called to handle decode result. Return true to continue.
- virtual bool HandleDecodeResult(const aom_codec_err_t res_dec,
- Decoder *decoder) {
- EXPECT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError();
- return AOM_CODEC_OK == res_dec;
- }
-
- // Hook that can modify the encoder's output data
- virtual const aom_codec_cx_pkt_t *MutateEncoderOutputHook(
- const aom_codec_cx_pkt_t *pkt) {
- return pkt;
- }
-
- const CodecFactory *codec_;
- bool abort_;
- aom_codec_enc_cfg_t cfg_;
- unsigned int passes_;
- TwopassStatsStore stats_;
- unsigned long init_flags_;
- unsigned long frame_flags_;
- aom_codec_pts_t last_pts_;
- TestMode mode_;
-};
-
-} // namespace libaom_test
-
-#endif // AOM_TEST_ENCODE_TEST_DRIVER_H_
diff --git a/third_party/aom/test/encodetxb_test.cc b/third_party/aom/test/encodetxb_test.cc
deleted file mode 100644
index 11cc07368..000000000
--- a/third_party/aom/test/encodetxb_test.cc
+++ /dev/null
@@ -1,262 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <stdint.h>
-#include <stdio.h>
-#include <string.h>
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "config/aom_config.h"
-#include "config/av1_rtcd.h"
-
-#include "aom_ports/aom_timer.h"
-#include "aom_ports/mem.h"
-#include "av1/common/idct.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/common/scan.h"
-#include "av1/common/txb_common.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
-
-namespace {
-using libaom_test::ACMRandom;
-
-typedef void (*GetNzMapContextsFunc)(const uint8_t *const levels,
- const int16_t *const scan,
- const uint16_t eob, const TX_SIZE tx_size,
- const TX_CLASS tx_class,
- int8_t *const coeff_contexts);
-
-class EncodeTxbTest : public ::testing::TestWithParam<GetNzMapContextsFunc> {
- public:
- EncodeTxbTest() : get_nz_map_contexts_func_(GetParam()) {}
-
- virtual ~EncodeTxbTest() {}
-
- virtual void SetUp() {
- coeff_contexts_ref_ = reinterpret_cast<int8_t *>(
- aom_memalign(16, sizeof(*coeff_contexts_ref_) * MAX_TX_SQUARE));
- ASSERT_TRUE(coeff_contexts_ref_ != NULL);
- coeff_contexts_ = reinterpret_cast<int8_t *>(
- aom_memalign(16, sizeof(*coeff_contexts_) * MAX_TX_SQUARE));
- ASSERT_TRUE(coeff_contexts_ != NULL);
- }
-
- virtual void TearDown() {
- aom_free(coeff_contexts_ref_);
- aom_free(coeff_contexts_);
- libaom_test::ClearSystemState();
- }
-
- void GetNzMapContextsRun() {
- const int kNumTests = 10;
- int result = 0;
-
- for (int is_inter = 0; is_inter < 2; ++is_inter) {
- for (int tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
- const TX_CLASS tx_class = tx_type_to_class[tx_type];
- for (int tx_size = TX_4X4; tx_size < TX_SIZES_ALL; ++tx_size) {
- const int bwl = get_txb_bwl((TX_SIZE)tx_size);
- const int width = get_txb_wide((TX_SIZE)tx_size);
- const int height = get_txb_high((TX_SIZE)tx_size);
- const int real_width = tx_size_wide[tx_size];
- const int real_height = tx_size_high[tx_size];
- const int16_t *const scan = av1_scan_orders[tx_size][tx_type].scan;
-
- levels_ = set_levels(levels_buf_, width);
- for (int i = 0; i < kNumTests && !result; ++i) {
- for (int eob = 1; eob <= width * height && !result; ++eob) {
- InitDataWithEob(scan, bwl, eob);
-
- av1_get_nz_map_contexts_c(levels_, scan, eob, (TX_SIZE)tx_size,
- tx_class, coeff_contexts_ref_);
- get_nz_map_contexts_func_(levels_, scan, eob, (TX_SIZE)tx_size,
- tx_class, coeff_contexts_);
-
- result = Compare(scan, eob);
-
- EXPECT_EQ(result, 0)
- << " tx_class " << tx_class << " width " << real_width
- << " height " << real_height << " eob " << eob;
- }
- }
- }
- }
- }
- }
-
- void SpeedTestGetNzMapContextsRun() {
- const int kNumTests = 2000000000;
- aom_usec_timer timer;
-
- printf("Note: Only test the largest possible eob case!\n");
- for (int tx_size = TX_4X4; tx_size < TX_SIZES_ALL; ++tx_size) {
- const int bwl = get_txb_bwl((TX_SIZE)tx_size);
- const int width = get_txb_wide((TX_SIZE)tx_size);
- const int height = get_txb_high((TX_SIZE)tx_size);
- const int real_width = tx_size_wide[tx_size];
- const int real_height = tx_size_high[tx_size];
- const TX_TYPE tx_type = DCT_DCT;
- const TX_CLASS tx_class = tx_type_to_class[tx_type];
- const int16_t *const scan = av1_scan_orders[tx_size][tx_type].scan;
- const int eob = width * height;
- const int numTests = kNumTests / (width * height);
-
- levels_ = set_levels(levels_buf_, width);
- InitDataWithEob(scan, bwl, eob);
-
- aom_usec_timer_start(&timer);
- for (int i = 0; i < numTests; ++i) {
- get_nz_map_contexts_func_(levels_, scan, eob, (TX_SIZE)tx_size,
- tx_class, coeff_contexts_);
- }
- aom_usec_timer_mark(&timer);
-
- const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
- printf("get_nz_map_contexts_%2dx%2d: %7.1f ms\n", real_width, real_height,
- elapsed_time / 1000.0);
- }
- }
-
- private:
- void InitDataWithEob(const int16_t *const scan, const int bwl,
- const int eob) {
- memset(levels_buf_, 0, sizeof(levels_buf_));
- memset(coeff_contexts_, 0, sizeof(*coeff_contexts_) * MAX_TX_SQUARE);
-
- for (int c = 0; c < eob; ++c) {
- levels_[get_padded_idx(scan[c], bwl)] =
- static_cast<uint8_t>(clamp(rnd_.Rand8(), 0, INT8_MAX));
- coeff_contexts_[scan[c]] = rnd_.Rand16() >> 1;
- }
-
- memcpy(coeff_contexts_ref_, coeff_contexts_,
- sizeof(*coeff_contexts_) * MAX_TX_SQUARE);
- }
-
- bool Compare(const int16_t *const scan, const int eob) const {
- bool result = false;
- if (memcmp(coeff_contexts_, coeff_contexts_ref_,
- sizeof(*coeff_contexts_ref_) * MAX_TX_SQUARE)) {
- for (int i = 0; i < eob; i++) {
- const int pos = scan[i];
- if (coeff_contexts_ref_[pos] != coeff_contexts_[pos]) {
- printf("coeff_contexts_[%d] diff:%6d (ref),%6d (opt)\n", pos,
- coeff_contexts_ref_[pos], coeff_contexts_[pos]);
- result = true;
- break;
- }
- }
- }
- return result;
- }
-
- GetNzMapContextsFunc get_nz_map_contexts_func_;
- ACMRandom rnd_;
- uint8_t levels_buf_[TX_PAD_2D];
- uint8_t *levels_;
- int8_t *coeff_contexts_ref_;
- int8_t *coeff_contexts_;
-};
-
-TEST_P(EncodeTxbTest, GetNzMapContexts) { GetNzMapContextsRun(); }
-
-TEST_P(EncodeTxbTest, DISABLED_SpeedTestGetNzMapContexts) {
- SpeedTestGetNzMapContextsRun();
-}
-
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(SSE2, EncodeTxbTest,
- ::testing::Values(av1_get_nz_map_contexts_sse2));
-#endif
-
-typedef void (*av1_txb_init_levels_func)(const tran_low_t *const coeff,
- const int width, const int height,
- uint8_t *const levels);
-
-typedef ::testing::tuple<av1_txb_init_levels_func, int> TxbInitLevelParam;
-
-class EncodeTxbInitLevelTest
- : public ::testing::TestWithParam<TxbInitLevelParam> {
- public:
- virtual ~EncodeTxbInitLevelTest() {}
- virtual void TearDown() { libaom_test::ClearSystemState(); }
- void RunTest(av1_txb_init_levels_func test_func, int tx_size, int is_speed);
-};
-
-void EncodeTxbInitLevelTest::RunTest(av1_txb_init_levels_func test_func,
- int tx_size, int is_speed) {
- const int width = get_txb_wide((TX_SIZE)tx_size);
- const int height = get_txb_high((TX_SIZE)tx_size);
- tran_low_t coeff[MAX_TX_SQUARE];
-
- uint8_t levels_buf[2][TX_PAD_2D];
- uint8_t *const levels0 = set_levels(levels_buf[0], width);
- uint8_t *const levels1 = set_levels(levels_buf[1], width);
-
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- for (int i = 0; i < width * height; i++) {
- coeff[i] = rnd.Rand15Signed() + rnd.Rand15Signed();
- }
- for (int i = 0; i < TX_PAD_2D; i++) {
- levels_buf[0][i] = rnd.Rand8();
- levels_buf[1][i] = rnd.Rand8();
- }
- const int run_times = is_speed ? (width * height) * 10000 : 1;
- aom_usec_timer timer;
- aom_usec_timer_start(&timer);
- for (int i = 0; i < run_times; ++i) {
- av1_txb_init_levels_c(coeff, width, height, levels0);
- }
- const double t1 = get_time_mark(&timer);
- aom_usec_timer_start(&timer);
- for (int i = 0; i < run_times; ++i) {
- test_func(coeff, width, height, levels1);
- }
- const double t2 = get_time_mark(&timer);
- if (is_speed) {
- printf("init %3dx%-3d:%7.2f/%7.2fns", width, height, t1, t2);
- printf("(%3.2f)\n", t1 / t2);
- }
- const int stride = width + TX_PAD_HOR;
- for (int r = 0; r < height + TX_PAD_VER; ++r) {
- for (int c = 0; c < stride; ++c) {
- ASSERT_EQ(levels_buf[0][c + r * stride], levels_buf[1][c + r * stride])
- << "[" << r << "," << c << "] " << run_times << width << "x"
- << height;
- }
- }
-}
-
-TEST_P(EncodeTxbInitLevelTest, match) {
- RunTest(GET_PARAM(0), GET_PARAM(1), 0);
-}
-
-TEST_P(EncodeTxbInitLevelTest, DISABLED_Speed) {
- RunTest(GET_PARAM(0), GET_PARAM(1), 1);
-}
-
-#if HAVE_SSE4_1
-INSTANTIATE_TEST_CASE_P(
- SSE4_1, EncodeTxbInitLevelTest,
- ::testing::Combine(::testing::Values(&av1_txb_init_levels_sse4_1),
- ::testing::Range(0, static_cast<int>(TX_SIZES_ALL), 1)));
-#endif
-#if HAVE_AVX2
-INSTANTIATE_TEST_CASE_P(
- AVX2, EncodeTxbInitLevelTest,
- ::testing::Combine(::testing::Values(&av1_txb_init_levels_avx2),
- ::testing::Range(0, static_cast<int>(TX_SIZES_ALL), 1)));
-#endif
-} // namespace
diff --git a/third_party/aom/test/end_to_end_test.cc b/third_party/aom/test/end_to_end_test.cc
deleted file mode 100644
index 1ac0ae931..000000000
--- a/third_party/aom/test/end_to_end_test.cc
+++ /dev/null
@@ -1,199 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "test/codec_factory.h"
-#include "test/encode_test_driver.h"
-#include "test/util.h"
-#include "test/y4m_video_source.h"
-#include "test/yuv_video_source.h"
-
-namespace {
-
-const unsigned int kWidth = 160;
-const unsigned int kHeight = 90;
-const unsigned int kFramerate = 50;
-const unsigned int kFrames = 10;
-const int kBitrate = 500;
-// List of psnr thresholds for speed settings 0-7 and 5 encoding modes
-const double kPsnrThreshold[][5] = {
-// Note:
-// AV1 HBD average PSNR is slightly lower than AV1.
-// We make two cases here to enable the testing and
-// guard picture quality.
-#if CONFIG_AV1_ENCODER
- { 36.0, 37.0, 37.0, 37.0, 37.0 }, { 31.0, 36.0, 36.0, 36.0, 36.0 },
- { 31.0, 35.0, 35.0, 35.0, 35.0 }, { 31.0, 34.0, 34.0, 34.0, 34.0 },
- { 31.0, 33.0, 33.0, 33.0, 33.0 }, { 31.0, 32.0, 32.0, 32.0, 32.0 },
- { 30.0, 31.0, 31.0, 31.0, 31.0 }, { 29.0, 30.0, 30.0, 30.0, 30.0 },
-#else
- { 36.0, 37.0, 37.0, 37.0, 37.0 }, { 35.0, 36.0, 36.0, 36.0, 36.0 },
- { 34.0, 35.0, 35.0, 35.0, 35.0 }, { 33.0, 34.0, 34.0, 34.0, 34.0 },
- { 32.0, 33.0, 33.0, 33.0, 33.0 }, { 31.0, 32.0, 32.0, 32.0, 32.0 },
- { 30.0, 31.0, 31.0, 31.0, 31.0 }, { 29.0, 30.0, 30.0, 30.0, 30.0 },
-#endif // CONFIG_AV1_ENCODER
-};
-
-typedef struct {
- const char *filename;
- unsigned int input_bit_depth;
- aom_img_fmt fmt;
- aom_bit_depth_t bit_depth;
- unsigned int profile;
-} TestVideoParam;
-
-const TestVideoParam kTestVectors[] = {
- { "park_joy_90p_8_420.y4m", 8, AOM_IMG_FMT_I420, AOM_BITS_8, 0 },
- { "park_joy_90p_8_422.y4m", 8, AOM_IMG_FMT_I422, AOM_BITS_8, 2 },
- { "park_joy_90p_8_444.y4m", 8, AOM_IMG_FMT_I444, AOM_BITS_8, 1 },
- { "park_joy_90p_10_420.y4m", 10, AOM_IMG_FMT_I42016, AOM_BITS_10, 0 },
- { "park_joy_90p_10_422.y4m", 10, AOM_IMG_FMT_I42216, AOM_BITS_10, 2 },
- { "park_joy_90p_10_444.y4m", 10, AOM_IMG_FMT_I44416, AOM_BITS_10, 1 },
- { "park_joy_90p_12_420.y4m", 12, AOM_IMG_FMT_I42016, AOM_BITS_12, 2 },
- { "park_joy_90p_12_422.y4m", 12, AOM_IMG_FMT_I42216, AOM_BITS_12, 2 },
- { "park_joy_90p_12_444.y4m", 12, AOM_IMG_FMT_I44416, AOM_BITS_12, 2 },
-};
-
-// Encoding modes tested
-const libaom_test::TestMode kEncodingModeVectors[] = {
- ::libaom_test::kTwoPassGood,
- ::libaom_test::kOnePassGood,
- ::libaom_test::kRealTime,
-};
-
-// Speed settings tested
-const int kCpuUsedVectors[] = { 1, 2, 3, 5, 6 };
-
-int is_extension_y4m(const char *filename) {
- const char *dot = strrchr(filename, '.');
- if (!dot || dot == filename)
- return 0;
- else
- return !strcmp(dot, ".y4m");
-}
-
-class EndToEndTest
- : public ::libaom_test::CodecTestWith3Params<libaom_test::TestMode,
- TestVideoParam, int>,
- public ::libaom_test::EncoderTest {
- protected:
- EndToEndTest()
- : EncoderTest(GET_PARAM(0)), test_video_param_(GET_PARAM(2)),
- cpu_used_(GET_PARAM(3)), psnr_(0.0), nframes_(0),
- encoding_mode_(GET_PARAM(1)) {}
-
- virtual ~EndToEndTest() {}
-
- virtual void SetUp() {
- InitializeConfig();
- SetMode(encoding_mode_);
- if (encoding_mode_ != ::libaom_test::kRealTime) {
- cfg_.g_lag_in_frames = 5;
- cfg_.rc_end_usage = AOM_VBR;
- } else {
- cfg_.g_lag_in_frames = 0;
- cfg_.rc_end_usage = AOM_CBR;
- cfg_.rc_buf_sz = 1000;
- cfg_.rc_buf_initial_sz = 500;
- cfg_.rc_buf_optimal_sz = 600;
- }
- }
-
- virtual void BeginPassHook(unsigned int) {
- psnr_ = 0.0;
- nframes_ = 0;
- }
-
- virtual void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) {
- psnr_ += pkt->data.psnr.psnr[0];
- nframes_++;
- }
-
- virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
- ::libaom_test::Encoder *encoder) {
- if (video->frame() == 1) {
- encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1);
- encoder->Control(AV1E_SET_TILE_COLUMNS, 4);
- encoder->Control(AOME_SET_CPUUSED, cpu_used_);
- // Test screen coding tools at cpu_used = 1 && encoding mode is two-pass.
- if (cpu_used_ == 1 && encoding_mode_ == ::libaom_test::kTwoPassGood)
- encoder->Control(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_SCREEN);
- else
- encoder->Control(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_DEFAULT);
- if (encoding_mode_ != ::libaom_test::kRealTime) {
- encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
- encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7);
- encoder->Control(AOME_SET_ARNR_STRENGTH, 5);
- }
- }
- }
-
- double GetAveragePsnr() const {
- if (nframes_) return psnr_ / nframes_;
- return 0.0;
- }
-
- double GetPsnrThreshold() {
- return kPsnrThreshold[cpu_used_][encoding_mode_];
- }
-
- void DoTest() {
- cfg_.rc_target_bitrate = kBitrate;
- cfg_.g_error_resilient = 0;
- cfg_.g_profile = test_video_param_.profile;
- cfg_.g_input_bit_depth = test_video_param_.input_bit_depth;
- cfg_.g_bit_depth = test_video_param_.bit_depth;
- init_flags_ = AOM_CODEC_USE_PSNR;
- if (cfg_.g_bit_depth > 8) init_flags_ |= AOM_CODEC_USE_HIGHBITDEPTH;
-
- testing::internal::scoped_ptr<libaom_test::VideoSource> video;
- if (is_extension_y4m(test_video_param_.filename)) {
- video.reset(new libaom_test::Y4mVideoSource(test_video_param_.filename, 0,
- kFrames));
- } else {
- video.reset(new libaom_test::YUVVideoSource(
- test_video_param_.filename, test_video_param_.fmt, kWidth, kHeight,
- kFramerate, 1, 0, kFrames));
- }
- ASSERT_TRUE(video.get() != NULL);
-
- ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
- const double psnr = GetAveragePsnr();
- EXPECT_GT(psnr, GetPsnrThreshold())
- << "cpu used = " << cpu_used_ << ", encoding mode = " << encoding_mode_;
- }
-
- TestVideoParam test_video_param_;
- int cpu_used_;
-
- private:
- double psnr_;
- unsigned int nframes_;
- libaom_test::TestMode encoding_mode_;
-};
-
-class EndToEndTestLarge : public EndToEndTest {};
-
-TEST_P(EndToEndTestLarge, EndtoEndPSNRTest) { DoTest(); }
-
-TEST_P(EndToEndTest, EndtoEndPSNRTest) { DoTest(); }
-
-AV1_INSTANTIATE_TEST_CASE(EndToEndTestLarge,
- ::testing::ValuesIn(kEncodingModeVectors),
- ::testing::ValuesIn(kTestVectors),
- ::testing::ValuesIn(kCpuUsedVectors));
-
-AV1_INSTANTIATE_TEST_CASE(EndToEndTest,
- ::testing::Values(kEncodingModeVectors[0]),
- ::testing::Values(kTestVectors[2]), // 444
- ::testing::Values(kCpuUsedVectors[2]));
-} // namespace
diff --git a/third_party/aom/test/error_block_test.cc b/third_party/aom/test/error_block_test.cc
deleted file mode 100644
index 353947c3d..000000000
--- a/third_party/aom/test/error_block_test.cc
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <cmath>
-#include <cstdlib>
-#include <string>
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "config/aom_config.h"
-#include "config/av1_rtcd.h"
-
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
-#include "av1/common/entropy.h"
-#include "aom/aom_codec.h"
-#include "aom/aom_integer.h"
-
-using libaom_test::ACMRandom;
-
-namespace {
-const int kNumIterations = 1000;
-
-typedef int64_t (*ErrorBlockFunc)(const tran_low_t *coeff,
- const tran_low_t *dqcoeff,
- intptr_t block_size, int64_t *ssz, int bps);
-
-typedef ::testing::tuple<ErrorBlockFunc, ErrorBlockFunc, aom_bit_depth_t>
- ErrorBlockParam;
-
-class ErrorBlockTest : public ::testing::TestWithParam<ErrorBlockParam> {
- public:
- virtual ~ErrorBlockTest() {}
- virtual void SetUp() {
- error_block_op_ = GET_PARAM(0);
- ref_error_block_op_ = GET_PARAM(1);
- bit_depth_ = GET_PARAM(2);
- }
-
- virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
- aom_bit_depth_t bit_depth_;
- ErrorBlockFunc error_block_op_;
- ErrorBlockFunc ref_error_block_op_;
-};
-
-TEST_P(ErrorBlockTest, OperationCheck) {
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- DECLARE_ALIGNED(16, tran_low_t, coeff[4096]);
- DECLARE_ALIGNED(16, tran_low_t, dqcoeff[4096]);
- int err_count_total = 0;
- int first_failure = -1;
- intptr_t block_size;
- int64_t ssz;
- int64_t ret;
- int64_t ref_ssz;
- int64_t ref_ret;
- const int msb = bit_depth_ + 8 - 1;
- for (int i = 0; i < kNumIterations; ++i) {
- int err_count = 0;
- block_size = 16 << (i % 9); // All block sizes from 4x4, 8x4 ..64x64
- for (int j = 0; j < block_size; j++) {
- // coeff and dqcoeff will always have at least the same sign, and this
- // can be used for optimization, so generate test input precisely.
- if (rnd(2)) {
- // Positive number
- coeff[j] = rnd(1 << msb);
- dqcoeff[j] = rnd(1 << msb);
- } else {
- // Negative number
- coeff[j] = -rnd(1 << msb);
- dqcoeff[j] = -rnd(1 << msb);
- }
- }
- ref_ret =
- ref_error_block_op_(coeff, dqcoeff, block_size, &ref_ssz, bit_depth_);
- ASM_REGISTER_STATE_CHECK(
- ret = error_block_op_(coeff, dqcoeff, block_size, &ssz, bit_depth_));
- err_count += (ref_ret != ret) | (ref_ssz != ssz);
- if (err_count && !err_count_total) {
- first_failure = i;
- }
- err_count_total += err_count;
- }
- EXPECT_EQ(0, err_count_total)
- << "Error: Error Block Test, C output doesn't match optimized output. "
- << "First failed at test case " << first_failure;
-}
-
-TEST_P(ErrorBlockTest, ExtremeValues) {
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- DECLARE_ALIGNED(16, tran_low_t, coeff[4096]);
- DECLARE_ALIGNED(16, tran_low_t, dqcoeff[4096]);
- int err_count_total = 0;
- int first_failure = -1;
- intptr_t block_size;
- int64_t ssz;
- int64_t ret;
- int64_t ref_ssz;
- int64_t ref_ret;
- const int msb = bit_depth_ + 8 - 1;
- int max_val = ((1 << msb) - 1);
- for (int i = 0; i < kNumIterations; ++i) {
- int err_count = 0;
- int k = (i / 9) % 9;
-
- // Change the maximum coeff value, to test different bit boundaries
- if (k == 8 && (i % 9) == 0) {
- max_val >>= 1;
- }
- block_size = 16 << (i % 9); // All block sizes from 4x4, 8x4 ..64x64
- for (int j = 0; j < block_size; j++) {
- if (k < 4) {
- // Test at positive maximum values
- coeff[j] = k % 2 ? max_val : 0;
- dqcoeff[j] = (k >> 1) % 2 ? max_val : 0;
- } else if (k < 8) {
- // Test at negative maximum values
- coeff[j] = k % 2 ? -max_val : 0;
- dqcoeff[j] = (k >> 1) % 2 ? -max_val : 0;
- } else {
- if (rnd(2)) {
- // Positive number
- coeff[j] = rnd(1 << 14);
- dqcoeff[j] = rnd(1 << 14);
- } else {
- // Negative number
- coeff[j] = -rnd(1 << 14);
- dqcoeff[j] = -rnd(1 << 14);
- }
- }
- }
- ref_ret =
- ref_error_block_op_(coeff, dqcoeff, block_size, &ref_ssz, bit_depth_);
- ASM_REGISTER_STATE_CHECK(
- ret = error_block_op_(coeff, dqcoeff, block_size, &ssz, bit_depth_));
- err_count += (ref_ret != ret) | (ref_ssz != ssz);
- if (err_count && !err_count_total) {
- first_failure = i;
- }
- err_count_total += err_count;
- }
- EXPECT_EQ(0, err_count_total)
- << "Error: Error Block Test, C output doesn't match optimized output. "
- << "First failed at test case " << first_failure;
-}
-
-#if (HAVE_SSE2 || HAVE_AVX)
-using ::testing::make_tuple;
-
-INSTANTIATE_TEST_CASE_P(
- SSE2, ErrorBlockTest,
- ::testing::Values(make_tuple(&av1_highbd_block_error_sse2,
- &av1_highbd_block_error_c, AOM_BITS_10),
- make_tuple(&av1_highbd_block_error_sse2,
- &av1_highbd_block_error_c, AOM_BITS_12),
- make_tuple(&av1_highbd_block_error_sse2,
- &av1_highbd_block_error_c, AOM_BITS_8)));
-#endif // HAVE_SSE2
-} // namespace
diff --git a/third_party/aom/test/error_resilience_test.cc b/third_party/aom/test/error_resilience_test.cc
deleted file mode 100644
index 13ac0bf93..000000000
--- a/third_party/aom/test/error_resilience_test.cc
+++ /dev/null
@@ -1,438 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-#include "test/codec_factory.h"
-#include "test/encode_test_driver.h"
-#include "test/i420_video_source.h"
-#include "test/util.h"
-
-namespace {
-
-const int kMaxErrorFrames = 12;
-const int kMaxInvisibleErrorFrames = 12;
-const int kMaxDroppableFrames = 12;
-const int kMaxErrorResilientFrames = 12;
-const int kMaxNoMFMVFrames = 12;
-const int kMaxPrimRefNoneFrames = 12;
-const int kMaxSFrames = 12;
-const int kCpuUsed = 1;
-
-class ErrorResilienceTestLarge
- : public ::libaom_test::CodecTestWithParam<libaom_test::TestMode>,
- public ::libaom_test::EncoderTest {
- protected:
- ErrorResilienceTestLarge()
- : EncoderTest(GET_PARAM(0)), psnr_(0.0), nframes_(0), mismatch_psnr_(0.0),
- mismatch_nframes_(0), encoding_mode_(GET_PARAM(1)), allow_mismatch_(0) {
- Reset();
- }
-
- virtual ~ErrorResilienceTestLarge() {}
-
- void Reset() {
- error_nframes_ = 0;
- invisible_error_nframes_ = 0;
- droppable_nframes_ = 0;
- error_resilient_nframes_ = 0;
- nomfmv_nframes_ = 0;
- prim_ref_none_nframes_ = 0;
- s_nframes_ = 0;
- }
-
- void SetupEncoder(int bitrate, int lag) {
- const aom_rational timebase = { 33333333, 1000000000 };
- cfg_.g_timebase = timebase;
- cfg_.rc_target_bitrate = bitrate;
- cfg_.kf_mode = AOM_KF_DISABLED;
- cfg_.g_lag_in_frames = lag;
- init_flags_ = AOM_CODEC_USE_PSNR;
- }
-
- virtual void SetUp() {
- InitializeConfig();
- SetMode(encoding_mode_);
- }
-
- virtual void BeginPassHook(unsigned int /*pass*/) {
- psnr_ = 0.0;
- nframes_ = 0;
- decoded_nframes_ = 0;
- mismatch_psnr_ = 0.0;
- mismatch_nframes_ = 0;
- }
-
- virtual void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) {
- psnr_ += pkt->data.psnr.psnr[0];
- nframes_++;
- }
-
- virtual void PreEncodeFrameHook(libaom_test::VideoSource *video,
- libaom_test::Encoder *encoder) {
- if (video->frame() == 0) encoder->Control(AOME_SET_CPUUSED, kCpuUsed);
- frame_flags_ &=
- ~(AOM_EFLAG_NO_UPD_LAST | AOM_EFLAG_NO_UPD_GF | AOM_EFLAG_NO_UPD_ARF |
- AOM_EFLAG_NO_REF_FRAME_MVS | AOM_EFLAG_ERROR_RESILIENT |
- AOM_EFLAG_SET_S_FRAME | AOM_EFLAG_SET_PRIMARY_REF_NONE);
- if (droppable_nframes_ > 0 &&
- (cfg_.g_pass == AOM_RC_LAST_PASS || cfg_.g_pass == AOM_RC_ONE_PASS)) {
- for (unsigned int i = 0; i < droppable_nframes_; ++i) {
- if (droppable_frames_[i] == video->frame()) {
- std::cout << " Encoding droppable frame: "
- << droppable_frames_[i] << "\n";
- frame_flags_ |= (AOM_EFLAG_NO_UPD_LAST | AOM_EFLAG_NO_UPD_GF |
- AOM_EFLAG_NO_UPD_ARF);
- break;
- }
- }
- }
-
- if (error_resilient_nframes_ > 0 &&
- (cfg_.g_pass == AOM_RC_LAST_PASS || cfg_.g_pass == AOM_RC_ONE_PASS)) {
- for (unsigned int i = 0; i < error_resilient_nframes_; ++i) {
- if (error_resilient_frames_[i] == video->frame()) {
- std::cout << " Encoding error_resilient frame: "
- << error_resilient_frames_[i] << "\n";
- frame_flags_ |= AOM_EFLAG_ERROR_RESILIENT;
- break;
- }
- }
- }
-
- if (nomfmv_nframes_ > 0 &&
- (cfg_.g_pass == AOM_RC_LAST_PASS || cfg_.g_pass == AOM_RC_ONE_PASS)) {
- for (unsigned int i = 0; i < nomfmv_nframes_; ++i) {
- if (nomfmv_frames_[i] == video->frame()) {
- std::cout << " Encoding no mfmv frame: "
- << nomfmv_frames_[i] << "\n";
- frame_flags_ |= AOM_EFLAG_NO_REF_FRAME_MVS;
- break;
- }
- }
- }
-
- if (prim_ref_none_nframes_ > 0 &&
- (cfg_.g_pass == AOM_RC_LAST_PASS || cfg_.g_pass == AOM_RC_ONE_PASS)) {
- for (unsigned int i = 0; i < prim_ref_none_nframes_; ++i) {
- if (prim_ref_none_frames_[i] == video->frame()) {
- std::cout << " Encoding no PRIMARY_REF_NONE frame: "
- << prim_ref_none_frames_[i] << "\n";
- frame_flags_ |= AOM_EFLAG_SET_PRIMARY_REF_NONE;
- break;
- }
- }
- }
-
- encoder->Control(AV1E_SET_S_FRAME_MODE, 0);
- if (s_nframes_ > 0 &&
- (cfg_.g_pass == AOM_RC_LAST_PASS || cfg_.g_pass == AOM_RC_ONE_PASS)) {
- for (unsigned int i = 0; i < s_nframes_; ++i) {
- if (s_frames_[i] == video->frame()) {
- std::cout << " Encoding S frame: " << s_frames_[i]
- << "\n";
- frame_flags_ |= AOM_EFLAG_SET_S_FRAME;
- break;
- }
- }
- }
- }
-
- double GetAveragePsnr() const {
- if (nframes_) return psnr_ / nframes_;
- return 0.0;
- }
-
- double GetAverageMismatchPsnr() const {
- if (mismatch_nframes_) return mismatch_psnr_ / mismatch_nframes_;
- return 0.0;
- }
-
- virtual bool DoDecode() const {
- if (error_nframes_ > 0 &&
- (cfg_.g_pass == AOM_RC_LAST_PASS || cfg_.g_pass == AOM_RC_ONE_PASS)) {
- for (unsigned int i = 0; i < error_nframes_; ++i) {
- if (error_frames_[i] == nframes_ - 1) {
- std::cout << " Skipping decoding frame: "
- << error_frames_[i] << "\n";
- return 0;
- }
- }
- }
- return 1;
- }
-
- virtual bool DoDecodeInvisible() const {
- if (invisible_error_nframes_ > 0 &&
- (cfg_.g_pass == AOM_RC_LAST_PASS || cfg_.g_pass == AOM_RC_ONE_PASS)) {
- for (unsigned int i = 0; i < invisible_error_nframes_; ++i) {
- if (invisible_error_frames_[i] == nframes_ - 1) {
- std::cout << " Skipping decoding all invisible frames in "
- "frame pkt: "
- << invisible_error_frames_[i] << "\n";
- return 0;
- }
- }
- }
- return 1;
- }
-
- virtual void MismatchHook(const aom_image_t *img1, const aom_image_t *img2) {
- if (allow_mismatch_) {
- double mismatch_psnr = compute_psnr(img1, img2);
- mismatch_psnr_ += mismatch_psnr;
- ++mismatch_nframes_;
- // std::cout << "Mismatch frame psnr: " << mismatch_psnr << "\n";
- } else {
- ::libaom_test::EncoderTest::MismatchHook(img1, img2);
- }
- }
-
- virtual void DecompressedFrameHook(const aom_image_t &img,
- aom_codec_pts_t pts) {
- (void)img;
- (void)pts;
- ++decoded_nframes_;
- }
-
- void SetErrorFrames(int num, unsigned int *list) {
- if (num > kMaxErrorFrames)
- num = kMaxErrorFrames;
- else if (num < 0)
- num = 0;
- error_nframes_ = num;
- for (unsigned int i = 0; i < error_nframes_; ++i)
- error_frames_[i] = list[i];
- }
-
- void SetInvisibleErrorFrames(int num, unsigned int *list) {
- if (num > kMaxInvisibleErrorFrames)
- num = kMaxInvisibleErrorFrames;
- else if (num < 0)
- num = 0;
- invisible_error_nframes_ = num;
- for (unsigned int i = 0; i < invisible_error_nframes_; ++i)
- invisible_error_frames_[i] = list[i];
- }
-
- void SetDroppableFrames(int num, unsigned int *list) {
- if (num > kMaxDroppableFrames)
- num = kMaxDroppableFrames;
- else if (num < 0)
- num = 0;
- droppable_nframes_ = num;
- for (unsigned int i = 0; i < droppable_nframes_; ++i)
- droppable_frames_[i] = list[i];
- }
-
- void SetErrorResilientFrames(int num, unsigned int *list) {
- if (num > kMaxErrorResilientFrames)
- num = kMaxErrorResilientFrames;
- else if (num < 0)
- num = 0;
- error_resilient_nframes_ = num;
- for (unsigned int i = 0; i < error_resilient_nframes_; ++i)
- error_resilient_frames_[i] = list[i];
- }
-
- void SetNoMFMVFrames(int num, unsigned int *list) {
- if (num > kMaxNoMFMVFrames)
- num = kMaxNoMFMVFrames;
- else if (num < 0)
- num = 0;
- nomfmv_nframes_ = num;
- for (unsigned int i = 0; i < nomfmv_nframes_; ++i)
- nomfmv_frames_[i] = list[i];
- }
-
- void SetPrimaryRefNoneFrames(int num, unsigned int *list) {
- if (num > kMaxPrimRefNoneFrames)
- num = kMaxPrimRefNoneFrames;
- else if (num < 0)
- num = 0;
- prim_ref_none_nframes_ = num;
- for (unsigned int i = 0; i < prim_ref_none_nframes_; ++i)
- prim_ref_none_frames_[i] = list[i];
- }
-
- void SetSFrames(int num, unsigned int *list) {
- if (num > kMaxSFrames)
- num = kMaxSFrames;
- else if (num < 0)
- num = 0;
- s_nframes_ = num;
- for (unsigned int i = 0; i < s_nframes_; ++i) s_frames_[i] = list[i];
- }
-
- unsigned int GetMismatchFrames() { return mismatch_nframes_; }
- unsigned int GetEncodedFrames() { return nframes_; }
- unsigned int GetDecodedFrames() { return decoded_nframes_; }
-
- void SetAllowMismatch(int allow) { allow_mismatch_ = allow; }
-
- private:
- double psnr_;
- unsigned int nframes_;
- unsigned int decoded_nframes_;
- unsigned int error_nframes_;
- unsigned int invisible_error_nframes_;
- unsigned int droppable_nframes_;
- unsigned int error_resilient_nframes_;
- unsigned int nomfmv_nframes_;
- unsigned int prim_ref_none_nframes_;
- unsigned int s_nframes_;
- double mismatch_psnr_;
- unsigned int mismatch_nframes_;
- unsigned int error_frames_[kMaxErrorFrames];
- unsigned int invisible_error_frames_[kMaxInvisibleErrorFrames];
- unsigned int droppable_frames_[kMaxDroppableFrames];
- unsigned int error_resilient_frames_[kMaxErrorResilientFrames];
- unsigned int nomfmv_frames_[kMaxNoMFMVFrames];
- unsigned int prim_ref_none_frames_[kMaxPrimRefNoneFrames];
- unsigned int s_frames_[kMaxSFrames];
- libaom_test::TestMode encoding_mode_;
- int allow_mismatch_;
-};
-
-TEST_P(ErrorResilienceTestLarge, OnVersusOff) {
- SetupEncoder(2000, 10);
- libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
- cfg_.g_timebase.den, cfg_.g_timebase.num,
- 0, 12);
-
- // Global error resilient mode OFF.
- cfg_.g_error_resilient = 0;
- ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
- const double psnr_resilience_off = GetAveragePsnr();
- EXPECT_GT(psnr_resilience_off, 25.0);
-
- Reset();
- // Error resilient mode ON for certain frames
- unsigned int num_error_resilient_frames = 5;
- unsigned int error_resilient_frame_list[] = { 3, 5, 6, 9, 11 };
- SetErrorResilientFrames(num_error_resilient_frames,
- error_resilient_frame_list);
- ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
- const double psnr_resilience_on = GetAveragePsnr();
- EXPECT_GT(psnr_resilience_on, 25.0);
-
- // Test that turning on error resilient mode hurts by 10% at most.
- if (psnr_resilience_off > 0.0) {
- const double psnr_ratio = psnr_resilience_on / psnr_resilience_off;
- EXPECT_GE(psnr_ratio, 0.9);
- EXPECT_LE(psnr_ratio, 1.1);
- }
-}
-
-// Check for successful decoding and no encoder/decoder mismatch
-// if we lose (i.e., drop before decoding) a set of droppable
-// frames (i.e., frames that don't update any reference buffers).
-TEST_P(ErrorResilienceTestLarge, DropFramesWithoutRecovery) {
- SetupEncoder(500, 10);
- libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
- cfg_.g_timebase.den, cfg_.g_timebase.num,
- 0, 20);
-
- // Set an arbitrary set of error frames same as droppable frames.
- unsigned int num_droppable_frames = 3;
- unsigned int droppable_frame_list[] = { 5, 10, 13 };
- SetDroppableFrames(num_droppable_frames, droppable_frame_list);
- SetErrorFrames(num_droppable_frames, droppable_frame_list);
- ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
- // Test that no mismatches have been found
- std::cout << " Encoded frames: " << GetEncodedFrames() << "\n";
- std::cout << " Decoded frames: " << GetDecodedFrames() << "\n";
- std::cout << " Mismatch frames: " << GetMismatchFrames() << "\n";
- EXPECT_EQ(GetEncodedFrames() - GetDecodedFrames(), num_droppable_frames);
-}
-
-// Check for ParseAbility property of an error-resilient frame.
-// Encode a frame in error-resilient mode (E-frame), and disallow all
-// subsequent frames from using MFMV. If frames are dropped before the
-// E frame, all frames starting from the E frame should be parse-able.
-TEST_P(ErrorResilienceTestLarge, ParseAbilityTest) {
- SetupEncoder(500, 10);
-
- libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
- cfg_.g_timebase.den, cfg_.g_timebase.num,
- 0, 15);
-
- SetAllowMismatch(1);
-
- // Note that an E-frame cannot be forced on a frame that is a
- // show_existing_frame, or a frame that comes directly after an invisible
- // frame. Currently, this will cause an assertion failure.
- // Set an arbitrary error resilient (E) frame
- unsigned int num_error_resilient_frames = 1;
- unsigned int error_resilient_frame_list[] = { 8 };
- SetErrorResilientFrames(num_error_resilient_frames,
- error_resilient_frame_list);
- // Ensure that any invisible frames before the E frame are dropped
- SetInvisibleErrorFrames(num_error_resilient_frames,
- error_resilient_frame_list);
- // Set all frames after the error resilient frame to not allow MFMV
- unsigned int num_post_error_resilient_frames = 6;
- unsigned int post_error_resilient_frame_list[] = { 9, 10, 11, 12, 13, 14 };
- SetNoMFMVFrames(num_post_error_resilient_frames,
- post_error_resilient_frame_list);
-
- // Set a few frames before the E frame that are lost (not decoded)
- unsigned int num_error_frames = 5;
- unsigned int error_frame_list[] = { 3, 4, 5, 6, 7 };
- SetErrorFrames(num_error_frames, error_frame_list);
-
- ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
- std::cout << " Encoded frames: " << GetEncodedFrames() << "\n";
- std::cout << " Decoded frames: " << GetDecodedFrames() << "\n";
- std::cout << " Mismatch frames: " << GetMismatchFrames() << "\n";
- EXPECT_EQ(GetEncodedFrames() - GetDecodedFrames(), num_error_frames);
- // All frames following the E-frame and the E-frame are expected to have
- // mismatches, but still be parse-able.
- EXPECT_LE(GetMismatchFrames(), num_post_error_resilient_frames + 1);
-}
-
-// Check for ParseAbility property of an S frame.
-// Encode an S-frame. If frames are dropped before the S-frame, all frames
-// starting from the S frame should be parse-able.
-TEST_P(ErrorResilienceTestLarge, SFrameTest) {
- SetupEncoder(500, 10);
-
- libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
- cfg_.g_timebase.den, cfg_.g_timebase.num,
- 0, 15);
-
- SetAllowMismatch(1);
-
- // Note that an S-frame cannot be forced on a frame that is a
- // show_existing_frame. This issue still needs to be addressed.
- // Set an arbitrary S-frame
- unsigned int num_s_frames = 1;
- unsigned int s_frame_list[] = { 6 };
- SetSFrames(num_s_frames, s_frame_list);
- // Ensure that any invisible frames before the S frame are dropped
- SetInvisibleErrorFrames(num_s_frames, s_frame_list);
-
- // Set a few frames before the S frame that are lost (not decoded)
- unsigned int num_error_frames = 4;
- unsigned int error_frame_list[] = { 2, 3, 4, 5 };
- SetErrorFrames(num_error_frames, error_frame_list);
-
- ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
- std::cout << " Encoded frames: " << GetEncodedFrames() << "\n";
- std::cout << " Decoded frames: " << GetDecodedFrames() << "\n";
- std::cout << " Mismatch frames: " << GetMismatchFrames() << "\n";
- EXPECT_EQ(GetEncodedFrames() - GetDecodedFrames(), num_error_frames);
- // All frames following the S-frame and the S-frame are expected to have
- // mismatches, but still be parse-able.
- EXPECT_LE(GetMismatchFrames(), GetEncodedFrames() - s_frame_list[0]);
-}
-
-AV1_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, NONREALTIME_TEST_MODES);
-} // namespace
diff --git a/third_party/aom/test/ethread_test.cc b/third_party/aom/test/ethread_test.cc
deleted file mode 100644
index d9ac78282..000000000
--- a/third_party/aom/test/ethread_test.cc
+++ /dev/null
@@ -1,273 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <string>
-#include <vector>
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-#include "test/codec_factory.h"
-#include "test/encode_test_driver.h"
-#include "test/md5_helper.h"
-#include "test/util.h"
-#include "test/yuv_video_source.h"
-
-namespace {
-class AVxEncoderThreadTest
- : public ::libaom_test::CodecTestWith4Params<libaom_test::TestMode, int,
- int, int>,
- public ::libaom_test::EncoderTest {
- protected:
- AVxEncoderThreadTest()
- : EncoderTest(GET_PARAM(0)), encoder_initialized_(false),
- encoding_mode_(GET_PARAM(1)), set_cpu_used_(GET_PARAM(2)),
- tile_cols_(GET_PARAM(3)), tile_rows_(GET_PARAM(4)) {
- init_flags_ = AOM_CODEC_USE_PSNR;
- aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t();
- cfg.w = 1280;
- cfg.h = 720;
- cfg.allow_lowbitdepth = 1;
- decoder_ = codec_->CreateDecoder(cfg, 0);
- if (decoder_->IsAV1()) {
- decoder_->Control(AV1_SET_DECODE_TILE_ROW, -1);
- decoder_->Control(AV1_SET_DECODE_TILE_COL, -1);
- }
-
- size_enc_.clear();
- md5_dec_.clear();
- md5_enc_.clear();
- }
- virtual ~AVxEncoderThreadTest() { delete decoder_; }
-
- virtual void SetUp() {
- InitializeConfig();
- SetMode(encoding_mode_);
-
- if (encoding_mode_ != ::libaom_test::kRealTime) {
- cfg_.g_lag_in_frames = 5;
- cfg_.rc_end_usage = AOM_VBR;
- cfg_.rc_2pass_vbr_minsection_pct = 5;
- cfg_.rc_2pass_vbr_maxsection_pct = 2000;
- } else {
- cfg_.g_lag_in_frames = 0;
- cfg_.rc_end_usage = AOM_CBR;
- cfg_.g_error_resilient = 1;
- }
- cfg_.rc_max_quantizer = 56;
- cfg_.rc_min_quantizer = 0;
- }
-
- virtual void BeginPassHook(unsigned int /*pass*/) {
- encoder_initialized_ = false;
- }
-
- virtual void PreEncodeFrameHook(::libaom_test::VideoSource * /*video*/,
- ::libaom_test::Encoder *encoder) {
- if (!encoder_initialized_) {
- SetTileSize(encoder);
- encoder->Control(AOME_SET_CPUUSED, set_cpu_used_);
- encoder->Control(AV1E_SET_ROW_MT, row_mt_);
- if (encoding_mode_ != ::libaom_test::kRealTime) {
- encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
- encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7);
- encoder->Control(AOME_SET_ARNR_STRENGTH, 5);
- encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 0);
- } else {
- encoder->Control(AOME_SET_ENABLEAUTOALTREF, 0);
- encoder->Control(AV1E_SET_AQ_MODE, 3);
- }
- encoder_initialized_ = true;
- }
- }
-
- virtual void SetTileSize(libaom_test::Encoder *encoder) {
- encoder->Control(AV1E_SET_TILE_COLUMNS, tile_cols_);
- encoder->Control(AV1E_SET_TILE_ROWS, tile_rows_);
- }
-
- virtual void FramePktHook(const aom_codec_cx_pkt_t *pkt) {
- size_enc_.push_back(pkt->data.frame.sz);
-
- ::libaom_test::MD5 md5_enc;
- md5_enc.Add(reinterpret_cast<uint8_t *>(pkt->data.frame.buf),
- pkt->data.frame.sz);
- md5_enc_.push_back(md5_enc.Get());
-
- const aom_codec_err_t res = decoder_->DecodeFrame(
- reinterpret_cast<uint8_t *>(pkt->data.frame.buf), pkt->data.frame.sz);
- if (res != AOM_CODEC_OK) {
- abort_ = true;
- ASSERT_EQ(AOM_CODEC_OK, res);
- }
- const aom_image_t *img = decoder_->GetDxData().Next();
-
- if (img) {
- ::libaom_test::MD5 md5_res;
- md5_res.Add(img);
- md5_dec_.push_back(md5_res.Get());
- }
- }
-
- void DoTest() {
- ::libaom_test::YUVVideoSource video(
- "niklas_640_480_30.yuv", AOM_IMG_FMT_I420, 640, 480, 30, 1, 15, 21);
- cfg_.rc_target_bitrate = 1000;
-
- // Encode using single thread.
- row_mt_ = 0;
- cfg_.g_threads = 1;
- init_flags_ = AOM_CODEC_USE_PSNR;
- ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
- std::vector<size_t> single_thr_size_enc;
- std::vector<std::string> single_thr_md5_enc;
- std::vector<std::string> single_thr_md5_dec;
- single_thr_size_enc = size_enc_;
- single_thr_md5_enc = md5_enc_;
- single_thr_md5_dec = md5_dec_;
- size_enc_.clear();
- md5_enc_.clear();
- md5_dec_.clear();
-
- // Encode using multiple threads.
- cfg_.g_threads = 4;
- ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
- std::vector<size_t> multi_thr_size_enc;
- std::vector<std::string> multi_thr_md5_enc;
- std::vector<std::string> multi_thr_md5_dec;
- multi_thr_size_enc = size_enc_;
- multi_thr_md5_enc = md5_enc_;
- multi_thr_md5_dec = md5_dec_;
- size_enc_.clear();
- md5_enc_.clear();
- md5_dec_.clear();
-
- // Check that the vectors are equal.
- ASSERT_EQ(single_thr_size_enc, multi_thr_size_enc);
- ASSERT_EQ(single_thr_md5_enc, multi_thr_md5_enc);
- ASSERT_EQ(single_thr_md5_dec, multi_thr_md5_dec);
-
- // Encode using multiple threads row-mt enabled.
- row_mt_ = 1;
- cfg_.g_threads = 2;
- ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
- std::vector<size_t> multi_thr2_row_mt_size_enc;
- std::vector<std::string> multi_thr2_row_mt_md5_enc;
- std::vector<std::string> multi_thr2_row_mt_md5_dec;
- multi_thr2_row_mt_size_enc = size_enc_;
- multi_thr2_row_mt_md5_enc = md5_enc_;
- multi_thr2_row_mt_md5_dec = md5_dec_;
- size_enc_.clear();
- md5_enc_.clear();
- md5_dec_.clear();
-
- // Disable threads=3 test for now to reduce the time so that the nightly
- // test would not time out.
- // cfg_.g_threads = 3;
- // ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
- // std::vector<size_t> multi_thr3_row_mt_size_enc;
- // std::vector<std::string> multi_thr3_row_mt_md5_enc;
- // std::vector<std::string> multi_thr3_row_mt_md5_dec;
- // multi_thr3_row_mt_size_enc = size_enc_;
- // multi_thr3_row_mt_md5_enc = md5_enc_;
- // multi_thr3_row_mt_md5_dec = md5_dec_;
- // size_enc_.clear();
- // md5_enc_.clear();
- // md5_dec_.clear();
- // Check that the vectors are equal.
- // ASSERT_EQ(multi_thr3_row_mt_size_enc, multi_thr2_row_mt_size_enc);
- // ASSERT_EQ(multi_thr3_row_mt_md5_enc, multi_thr2_row_mt_md5_enc);
- // ASSERT_EQ(multi_thr3_row_mt_md5_dec, multi_thr2_row_mt_md5_dec);
-
- cfg_.g_threads = 4;
- ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
- std::vector<size_t> multi_thr4_row_mt_size_enc;
- std::vector<std::string> multi_thr4_row_mt_md5_enc;
- std::vector<std::string> multi_thr4_row_mt_md5_dec;
- multi_thr4_row_mt_size_enc = size_enc_;
- multi_thr4_row_mt_md5_enc = md5_enc_;
- multi_thr4_row_mt_md5_dec = md5_dec_;
- size_enc_.clear();
- md5_enc_.clear();
- md5_dec_.clear();
-
- // Check that the vectors are equal.
- ASSERT_EQ(multi_thr4_row_mt_size_enc, multi_thr2_row_mt_size_enc);
- ASSERT_EQ(multi_thr4_row_mt_md5_enc, multi_thr2_row_mt_md5_enc);
- ASSERT_EQ(multi_thr4_row_mt_md5_dec, multi_thr2_row_mt_md5_dec);
- }
-
- bool encoder_initialized_;
- ::libaom_test::TestMode encoding_mode_;
- int set_cpu_used_;
- int tile_cols_;
- int tile_rows_;
- int row_mt_;
- ::libaom_test::Decoder *decoder_;
- std::vector<size_t> size_enc_;
- std::vector<std::string> md5_enc_;
- std::vector<std::string> md5_dec_;
-};
-
-TEST_P(AVxEncoderThreadTest, EncoderResultTest) {
- cfg_.large_scale_tile = 0;
- decoder_->Control(AV1_SET_TILE_MODE, 0);
- DoTest();
-}
-
-class AVxEncoderThreadTestLarge : public AVxEncoderThreadTest {};
-
-TEST_P(AVxEncoderThreadTestLarge, EncoderResultTest) {
- cfg_.large_scale_tile = 0;
- decoder_->Control(AV1_SET_TILE_MODE, 0);
- DoTest();
-}
-
-// For AV1, only test speed 0 to 3.
-// Here test cpu_used 2 and 3
-AV1_INSTANTIATE_TEST_CASE(AVxEncoderThreadTest,
- ::testing::Values(::libaom_test::kTwoPassGood),
- ::testing::Range(2, 4), ::testing::Values(0, 2),
- ::testing::Values(0, 1));
-
-// Test cpu_used 0 and 1.
-AV1_INSTANTIATE_TEST_CASE(AVxEncoderThreadTestLarge,
- ::testing::Values(::libaom_test::kTwoPassGood,
- ::libaom_test::kOnePassGood),
- ::testing::Range(0, 2), ::testing::Values(0, 1, 2, 6),
- ::testing::Values(0, 1, 2, 6));
-
-class AVxEncoderThreadLSTest : public AVxEncoderThreadTest {
- virtual void SetTileSize(libaom_test::Encoder *encoder) {
- encoder->Control(AV1E_SET_TILE_COLUMNS, tile_cols_);
- encoder->Control(AV1E_SET_TILE_ROWS, tile_rows_);
- }
-};
-
-TEST_P(AVxEncoderThreadLSTest, EncoderResultTest) {
- cfg_.large_scale_tile = 1;
- decoder_->Control(AV1_SET_TILE_MODE, 1);
- decoder_->Control(AV1D_EXT_TILE_DEBUG, 1);
- DoTest();
-}
-
-class AVxEncoderThreadLSTestLarge : public AVxEncoderThreadLSTest {};
-
-TEST_P(AVxEncoderThreadLSTestLarge, EncoderResultTest) {
- cfg_.large_scale_tile = 1;
- decoder_->Control(AV1_SET_TILE_MODE, 1);
- decoder_->Control(AV1D_EXT_TILE_DEBUG, 1);
- DoTest();
-}
-
-AV1_INSTANTIATE_TEST_CASE(AVxEncoderThreadLSTestLarge,
- ::testing::Values(::libaom_test::kTwoPassGood,
- ::libaom_test::kOnePassGood),
- ::testing::Range(0, 4), ::testing::Values(0, 6),
- ::testing::Values(0, 6));
-} // namespace
diff --git a/third_party/aom/test/examples.sh b/third_party/aom/test/examples.sh
deleted file mode 100755
index 2cdb89dd0..000000000
--- a/third_party/aom/test/examples.sh
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/bin/sh
-## Copyright (c) 2016, Alliance for Open Media. All rights reserved
-##
-## This source code is subject to the terms of the BSD 2 Clause License and
-## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-## was not distributed with this source code in the LICENSE file, you can
-## obtain it at www.aomedia.org/license/software. If the Alliance for Open
-## Media Patent License 1.0 was not distributed with this source code in the
-## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-##
-## This file runs all of the tests for the libaom examples.
-##
-. $(dirname $0)/tools_common.sh
-
-example_tests=$(ls -r $(dirname $0)/*.sh)
-
-# List of script names to exclude.
-exclude_list="best_encode examples run_encodes tools_common"
-
-# Filter out the scripts in $exclude_list.
-for word in ${exclude_list}; do
- example_tests=$(filter_strings "${example_tests}" "${word}" exclude)
-done
-
-for test in ${example_tests}; do
- # Source each test script so that exporting variables can be avoided.
- AOM_TEST_NAME="$(basename ${test%.*})"
- . "${test}"
-done
diff --git a/third_party/aom/test/external_frame_buffer_test.cc b/third_party/aom/test/external_frame_buffer_test.cc
deleted file mode 100644
index c2af059a4..000000000
--- a/third_party/aom/test/external_frame_buffer_test.cc
+++ /dev/null
@@ -1,512 +0,0 @@
-/*
- * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <string>
-
-#include "config/aom_config.h"
-#include "test/codec_factory.h"
-#include "test/decode_test_driver.h"
-#include "test/ivf_video_source.h"
-#include "test/md5_helper.h"
-#include "test/test_vectors.h"
-#include "test/util.h"
-#if CONFIG_WEBM_IO
-#include "test/webm_video_source.h"
-#endif
-
-namespace {
-
-const int kVideoNameParam = 1;
-
-struct ExternalFrameBuffer {
- uint8_t *data;
- size_t size;
- int in_use;
-};
-
-// Class to manipulate a list of external frame buffers.
-class ExternalFrameBufferList {
- public:
- ExternalFrameBufferList()
- : num_buffers_(0), num_used_buffers_(0), ext_fb_list_(NULL) {}
-
- virtual ~ExternalFrameBufferList() {
- for (int i = 0; i < num_buffers_; ++i) {
- delete[] ext_fb_list_[i].data;
- }
- delete[] ext_fb_list_;
- }
-
- // Creates the list to hold the external buffers. Returns true on success.
- bool CreateBufferList(int num_buffers) {
- if (num_buffers < 0) return false;
-
- num_buffers_ = num_buffers;
- ext_fb_list_ = new ExternalFrameBuffer[num_buffers_];
- EXPECT_TRUE(ext_fb_list_ != NULL);
- memset(ext_fb_list_, 0, sizeof(ext_fb_list_[0]) * num_buffers_);
- return true;
- }
-
- // Searches the frame buffer list for a free frame buffer. Makes sure
- // that the frame buffer is at least |min_size| in bytes. Marks that the
- // frame buffer is in use by libvpx. Finally sets |fb| to point to the
- // external frame buffer. Returns < 0 on an error.
- int GetFreeFrameBuffer(size_t min_size, aom_codec_frame_buffer_t *fb) {
- EXPECT_TRUE(fb != NULL);
- const int idx = FindFreeBufferIndex();
- if (idx == num_buffers_) return -1;
-
- if (ext_fb_list_[idx].size < min_size) {
- delete[] ext_fb_list_[idx].data;
- ext_fb_list_[idx].data = new uint8_t[min_size];
- memset(ext_fb_list_[idx].data, 0, min_size);
- ext_fb_list_[idx].size = min_size;
- }
-
- SetFrameBuffer(idx, fb);
-
- num_used_buffers_++;
- return 0;
- }
-
- // Test function that will not allocate any data for the frame buffer.
- // Returns < 0 on an error.
- int GetZeroFrameBuffer(size_t min_size, aom_codec_frame_buffer_t *fb) {
- EXPECT_TRUE(fb != NULL);
- const int idx = FindFreeBufferIndex();
- if (idx == num_buffers_) return -1;
-
- if (ext_fb_list_[idx].size < min_size) {
- delete[] ext_fb_list_[idx].data;
- ext_fb_list_[idx].data = NULL;
- ext_fb_list_[idx].size = min_size;
- }
-
- SetFrameBuffer(idx, fb);
- return 0;
- }
-
- // Marks the external frame buffer that |fb| is pointing to as free.
- // Returns < 0 on an error.
- int ReturnFrameBuffer(aom_codec_frame_buffer_t *fb) {
- if (fb == NULL) {
- EXPECT_TRUE(fb != NULL);
- return -1;
- }
- ExternalFrameBuffer *const ext_fb =
- reinterpret_cast<ExternalFrameBuffer *>(fb->priv);
- if (ext_fb == NULL) {
- EXPECT_TRUE(ext_fb != NULL);
- return -1;
- }
- EXPECT_EQ(1, ext_fb->in_use);
- ext_fb->in_use = 0;
- num_used_buffers_--;
- return 0;
- }
-
- // Checks that the ximage data is contained within the external frame buffer
- // private data passed back in the ximage.
- void CheckXImageFrameBuffer(const aom_image_t *img) {
- if (img->fb_priv != NULL) {
- const struct ExternalFrameBuffer *const ext_fb =
- reinterpret_cast<ExternalFrameBuffer *>(img->fb_priv);
-
- ASSERT_TRUE(img->planes[0] >= ext_fb->data &&
- img->planes[0] < (ext_fb->data + ext_fb->size));
- }
- }
-
- int num_used_buffers() const { return num_used_buffers_; }
-
- private:
- // Returns the index of the first free frame buffer. Returns |num_buffers_|
- // if there are no free frame buffers.
- int FindFreeBufferIndex() {
- int i;
- // Find a free frame buffer.
- for (i = 0; i < num_buffers_; ++i) {
- if (!ext_fb_list_[i].in_use) break;
- }
- return i;
- }
-
- // Sets |fb| to an external frame buffer. idx is the index into the frame
- // buffer list.
- void SetFrameBuffer(int idx, aom_codec_frame_buffer_t *fb) {
- ASSERT_TRUE(fb != NULL);
- fb->data = ext_fb_list_[idx].data;
- fb->size = ext_fb_list_[idx].size;
- ASSERT_EQ(0, ext_fb_list_[idx].in_use);
- ext_fb_list_[idx].in_use = 1;
- fb->priv = &ext_fb_list_[idx];
- }
-
- int num_buffers_;
- int num_used_buffers_;
- ExternalFrameBuffer *ext_fb_list_;
-};
-
-#if CONFIG_WEBM_IO
-
-// Callback used by libvpx to request the application to return a frame
-// buffer of at least |min_size| in bytes.
-int get_aom_frame_buffer(void *user_priv, size_t min_size,
- aom_codec_frame_buffer_t *fb) {
- ExternalFrameBufferList *const fb_list =
- reinterpret_cast<ExternalFrameBufferList *>(user_priv);
- return fb_list->GetFreeFrameBuffer(min_size, fb);
-}
-
-// Callback used by libvpx to tell the application that |fb| is not needed
-// anymore.
-int release_aom_frame_buffer(void *user_priv, aom_codec_frame_buffer_t *fb) {
- ExternalFrameBufferList *const fb_list =
- reinterpret_cast<ExternalFrameBufferList *>(user_priv);
- return fb_list->ReturnFrameBuffer(fb);
-}
-
-// Callback will not allocate data for frame buffer.
-int get_aom_zero_frame_buffer(void *user_priv, size_t min_size,
- aom_codec_frame_buffer_t *fb) {
- ExternalFrameBufferList *const fb_list =
- reinterpret_cast<ExternalFrameBufferList *>(user_priv);
- return fb_list->GetZeroFrameBuffer(min_size, fb);
-}
-
-// Callback will allocate one less byte than |min_size|.
-int get_aom_one_less_byte_frame_buffer(void *user_priv, size_t min_size,
- aom_codec_frame_buffer_t *fb) {
- ExternalFrameBufferList *const fb_list =
- reinterpret_cast<ExternalFrameBufferList *>(user_priv);
- return fb_list->GetFreeFrameBuffer(min_size - 1, fb);
-}
-
-// Callback will not release the external frame buffer.
-int do_not_release_aom_frame_buffer(void *user_priv,
- aom_codec_frame_buffer_t *fb) {
- (void)user_priv;
- (void)fb;
- return 0;
-}
-
-#endif // CONFIG_WEBM_IO
-
-// Class for testing passing in external frame buffers to libaom.
-class ExternalFrameBufferMD5Test
- : public ::libaom_test::DecoderTest,
- public ::libaom_test::CodecTestWithParam<const char *> {
- protected:
- ExternalFrameBufferMD5Test()
- : DecoderTest(GET_PARAM(::libaom_test::kCodecFactoryParam)),
- md5_file_(NULL), num_buffers_(0) {}
-
- virtual ~ExternalFrameBufferMD5Test() {
- if (md5_file_ != NULL) fclose(md5_file_);
- }
-
- virtual void PreDecodeFrameHook(
- const libaom_test::CompressedVideoSource &video,
- libaom_test::Decoder *decoder) {
- if (num_buffers_ > 0 && video.frame_number() == 0) {
- // Have libvpx use frame buffers we create.
- ASSERT_TRUE(fb_list_.CreateBufferList(num_buffers_));
- ASSERT_EQ(AOM_CODEC_OK,
- decoder->SetFrameBufferFunctions(GetAV1FrameBuffer,
- ReleaseAV1FrameBuffer, this));
- }
- }
-
- void OpenMD5File(const std::string &md5_file_name_) {
- md5_file_ = libaom_test::OpenTestDataFile(md5_file_name_);
- ASSERT_TRUE(md5_file_ != NULL)
- << "Md5 file open failed. Filename: " << md5_file_name_;
- }
-
- virtual void DecompressedFrameHook(const aom_image_t &img,
- const unsigned int frame_number) {
- ASSERT_TRUE(md5_file_ != NULL);
- char expected_md5[33];
- char junk[128];
-
- // Read correct md5 checksums.
- const int res = fscanf(md5_file_, "%s %s", expected_md5, junk);
- ASSERT_NE(EOF, res) << "Read md5 data failed";
- expected_md5[32] = '\0';
-
- ::libaom_test::MD5 md5_res;
- md5_res.Add(&img);
- const char *const actual_md5 = md5_res.Get();
-
- // Check md5 match.
- ASSERT_STREQ(expected_md5, actual_md5)
- << "Md5 checksums don't match: frame number = " << frame_number;
- }
-
- // Callback to get a free external frame buffer. Return value < 0 is an
- // error.
- static int GetAV1FrameBuffer(void *user_priv, size_t min_size,
- aom_codec_frame_buffer_t *fb) {
- ExternalFrameBufferMD5Test *const md5Test =
- reinterpret_cast<ExternalFrameBufferMD5Test *>(user_priv);
- return md5Test->fb_list_.GetFreeFrameBuffer(min_size, fb);
- }
-
- // Callback to release an external frame buffer. Return value < 0 is an
- // error.
- static int ReleaseAV1FrameBuffer(void *user_priv,
- aom_codec_frame_buffer_t *fb) {
- ExternalFrameBufferMD5Test *const md5Test =
- reinterpret_cast<ExternalFrameBufferMD5Test *>(user_priv);
- return md5Test->fb_list_.ReturnFrameBuffer(fb);
- }
-
- void set_num_buffers(int num_buffers) { num_buffers_ = num_buffers; }
- int num_buffers() const { return num_buffers_; }
-
- private:
- FILE *md5_file_;
- int num_buffers_;
- ExternalFrameBufferList fb_list_;
-};
-
-#if CONFIG_WEBM_IO
-const char kAV1TestFile[] = "av1-1-b8-01-size-226x226.ivf";
-const char kAV1NonRefTestFile[] = "av1-1-b8-01-size-226x226.ivf";
-
-// Class for testing passing in external frame buffers to libvpx.
-class ExternalFrameBufferTest : public ::testing::Test {
- protected:
- ExternalFrameBufferTest() : video_(NULL), decoder_(NULL), num_buffers_(0) {}
-
- virtual void SetUp() {
- video_ = new libaom_test::IVFVideoSource(kAV1TestFile);
- ASSERT_TRUE(video_ != NULL);
- video_->Init();
- video_->Begin();
-
- aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t();
- decoder_ = new libaom_test::AV1Decoder(cfg, 0);
- ASSERT_TRUE(decoder_ != NULL);
- }
-
- virtual void TearDown() {
- delete decoder_;
- decoder_ = NULL;
- delete video_;
- video_ = NULL;
- }
-
- // Passes the external frame buffer information to libvpx.
- aom_codec_err_t SetFrameBufferFunctions(
- int num_buffers, aom_get_frame_buffer_cb_fn_t cb_get,
- aom_release_frame_buffer_cb_fn_t cb_release) {
- if (num_buffers > 0) {
- num_buffers_ = num_buffers;
- EXPECT_TRUE(fb_list_.CreateBufferList(num_buffers_));
- }
-
- return decoder_->SetFrameBufferFunctions(cb_get, cb_release, &fb_list_);
- }
-
- aom_codec_err_t DecodeOneFrame() {
- const aom_codec_err_t res =
- decoder_->DecodeFrame(video_->cxdata(), video_->frame_size());
- CheckDecodedFrames();
- if (res == AOM_CODEC_OK) video_->Next();
- return res;
- }
-
- aom_codec_err_t DecodeRemainingFrames() {
- for (; video_->cxdata() != NULL; video_->Next()) {
- const aom_codec_err_t res =
- decoder_->DecodeFrame(video_->cxdata(), video_->frame_size());
- if (res != AOM_CODEC_OK) return res;
- CheckDecodedFrames();
- }
- return AOM_CODEC_OK;
- }
-
- protected:
- void CheckDecodedFrames() {
- libaom_test::DxDataIterator dec_iter = decoder_->GetDxData();
- const aom_image_t *img = NULL;
-
- // Get decompressed data
- while ((img = dec_iter.Next()) != NULL) {
- fb_list_.CheckXImageFrameBuffer(img);
- }
- }
-
- libaom_test::IVFVideoSource *video_;
- libaom_test::AV1Decoder *decoder_;
- int num_buffers_;
- ExternalFrameBufferList fb_list_;
-};
-
-class ExternalFrameBufferNonRefTest : public ExternalFrameBufferTest {
- protected:
- virtual void SetUp() {
- video_ = new libaom_test::IVFVideoSource(kAV1NonRefTestFile);
- ASSERT_TRUE(video_ != NULL);
- video_->Init();
- video_->Begin();
-
- aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t();
- decoder_ = new libaom_test::AV1Decoder(cfg, 0);
- ASSERT_TRUE(decoder_ != NULL);
- }
-
- virtual void CheckFrameBufferRelease() {
- TearDown();
- ASSERT_EQ(0, fb_list_.num_used_buffers());
- }
-};
-#endif // CONFIG_WEBM_IO
-
-// This test runs through the set of test vectors, and decodes them.
-// Libvpx will call into the application to allocate a frame buffer when
-// needed. The md5 checksums are computed for each frame in the video file.
-// If md5 checksums match the correct md5 data, then the test is passed.
-// Otherwise, the test failed.
-TEST_P(ExternalFrameBufferMD5Test, DISABLED_ExtFBMD5Match) {
- const std::string filename = GET_PARAM(kVideoNameParam);
-
- // Number of buffers equals #AOM_MAXIMUM_REF_BUFFERS +
- // #AOM_MAXIMUM_WORK_BUFFERS + four jitter buffers.
- const int jitter_buffers = 4;
- const int num_buffers =
- AOM_MAXIMUM_REF_BUFFERS + AOM_MAXIMUM_WORK_BUFFERS + jitter_buffers;
- set_num_buffers(num_buffers);
-
- // Open compressed video file.
- testing::internal::scoped_ptr<libaom_test::CompressedVideoSource> video;
- if (filename.substr(filename.length() - 3, 3) == "ivf") {
- video.reset(new libaom_test::IVFVideoSource(filename));
- } else {
-#if CONFIG_WEBM_IO
- video.reset(new libaom_test::WebMVideoSource(filename));
-#else
- fprintf(stderr, "WebM IO is disabled, skipping test vector %s\n",
- filename.c_str());
- return;
-#endif
- }
- ASSERT_TRUE(video.get() != NULL);
- video->Init();
-
- // Construct md5 file name.
- const std::string md5_filename = filename + ".md5";
- OpenMD5File(md5_filename);
-
- // Decode frame, and check the md5 matching.
- ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
-}
-
-#if CONFIG_WEBM_IO
-TEST_F(ExternalFrameBufferTest, MinFrameBuffers) {
- // Minimum number of external frame buffers for AV1 is
- // #AOM_MAXIMUM_REF_BUFFERS + #AOM_MAXIMUM_WORK_BUFFERS.
- const int num_buffers = AOM_MAXIMUM_REF_BUFFERS + AOM_MAXIMUM_WORK_BUFFERS;
- ASSERT_EQ(AOM_CODEC_OK,
- SetFrameBufferFunctions(num_buffers, get_aom_frame_buffer,
- release_aom_frame_buffer));
- ASSERT_EQ(AOM_CODEC_OK, DecodeRemainingFrames());
-}
-
-TEST_F(ExternalFrameBufferTest, EightJitterBuffers) {
- // Number of buffers equals #AOM_MAXIMUM_REF_BUFFERS +
- // #AOM_MAXIMUM_WORK_BUFFERS + eight jitter buffers.
- const int jitter_buffers = 8;
- const int num_buffers =
- AOM_MAXIMUM_REF_BUFFERS + AOM_MAXIMUM_WORK_BUFFERS + jitter_buffers;
- ASSERT_EQ(AOM_CODEC_OK,
- SetFrameBufferFunctions(num_buffers, get_aom_frame_buffer,
- release_aom_frame_buffer));
- ASSERT_EQ(AOM_CODEC_OK, DecodeRemainingFrames());
-}
-
-TEST_F(ExternalFrameBufferTest, DISABLED_NotEnoughBuffers) {
- // Minimum number of external frame buffers for AV1 is
- // #AOM_MAXIMUM_REF_BUFFERS + #AOM_MAXIMUM_WORK_BUFFERS. Most files will
- // only use 5 frame buffers at one time.
- const int num_buffers = 2;
- ASSERT_EQ(AOM_CODEC_OK,
- SetFrameBufferFunctions(num_buffers, get_aom_frame_buffer,
- release_aom_frame_buffer));
- ASSERT_EQ(AOM_CODEC_OK, DecodeOneFrame());
- // Only run this on long clips. Decoding a very short clip will return
- // AOM_CODEC_OK even with only 2 buffers.
- ASSERT_EQ(AOM_CODEC_MEM_ERROR, DecodeRemainingFrames());
-}
-
-TEST_F(ExternalFrameBufferTest, DISABLED_NoRelease) {
- const int num_buffers = AOM_MAXIMUM_REF_BUFFERS + AOM_MAXIMUM_WORK_BUFFERS;
- ASSERT_EQ(AOM_CODEC_OK,
- SetFrameBufferFunctions(num_buffers, get_aom_frame_buffer,
- do_not_release_aom_frame_buffer));
- ASSERT_EQ(AOM_CODEC_OK, DecodeOneFrame());
- ASSERT_EQ(AOM_CODEC_MEM_ERROR, DecodeRemainingFrames());
-}
-
-TEST_F(ExternalFrameBufferTest, NullRealloc) {
- const int num_buffers = AOM_MAXIMUM_REF_BUFFERS + AOM_MAXIMUM_WORK_BUFFERS;
- ASSERT_EQ(AOM_CODEC_OK,
- SetFrameBufferFunctions(num_buffers, get_aom_zero_frame_buffer,
- release_aom_frame_buffer));
- ASSERT_EQ(AOM_CODEC_MEM_ERROR, DecodeOneFrame());
-}
-
-TEST_F(ExternalFrameBufferTest, ReallocOneLessByte) {
- const int num_buffers = AOM_MAXIMUM_REF_BUFFERS + AOM_MAXIMUM_WORK_BUFFERS;
- ASSERT_EQ(AOM_CODEC_OK, SetFrameBufferFunctions(
- num_buffers, get_aom_one_less_byte_frame_buffer,
- release_aom_frame_buffer));
- ASSERT_EQ(AOM_CODEC_MEM_ERROR, DecodeOneFrame());
-}
-
-TEST_F(ExternalFrameBufferTest, NullGetFunction) {
- const int num_buffers = AOM_MAXIMUM_REF_BUFFERS + AOM_MAXIMUM_WORK_BUFFERS;
- ASSERT_EQ(
- AOM_CODEC_INVALID_PARAM,
- SetFrameBufferFunctions(num_buffers, NULL, release_aom_frame_buffer));
-}
-
-TEST_F(ExternalFrameBufferTest, NullReleaseFunction) {
- const int num_buffers = AOM_MAXIMUM_REF_BUFFERS + AOM_MAXIMUM_WORK_BUFFERS;
- ASSERT_EQ(AOM_CODEC_INVALID_PARAM,
- SetFrameBufferFunctions(num_buffers, get_aom_frame_buffer, NULL));
-}
-
-TEST_F(ExternalFrameBufferTest, SetAfterDecode) {
- const int num_buffers = AOM_MAXIMUM_REF_BUFFERS + AOM_MAXIMUM_WORK_BUFFERS;
- ASSERT_EQ(AOM_CODEC_OK, DecodeOneFrame());
- ASSERT_EQ(AOM_CODEC_ERROR,
- SetFrameBufferFunctions(num_buffers, get_aom_frame_buffer,
- release_aom_frame_buffer));
-}
-
-TEST_F(ExternalFrameBufferNonRefTest, ReleaseNonRefFrameBuffer) {
- const int num_buffers = AOM_MAXIMUM_REF_BUFFERS + AOM_MAXIMUM_WORK_BUFFERS;
- ASSERT_EQ(AOM_CODEC_OK,
- SetFrameBufferFunctions(num_buffers, get_aom_frame_buffer,
- release_aom_frame_buffer));
- ASSERT_EQ(AOM_CODEC_OK, DecodeRemainingFrames());
- CheckFrameBufferRelease();
-}
-#endif // CONFIG_WEBM_IO
-
-AV1_INSTANTIATE_TEST_CASE(
- ExternalFrameBufferMD5Test,
- ::testing::ValuesIn(libaom_test::kAV1TestVectors,
- libaom_test::kAV1TestVectors +
- libaom_test::kNumAV1TestVectors));
-} // namespace
diff --git a/third_party/aom/test/fft_test.cc b/third_party/aom/test/fft_test.cc
deleted file mode 100644
index e24e451a3..000000000
--- a/third_party/aom/test/fft_test.cc
+++ /dev/null
@@ -1,256 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <math.h>
-
-#include <algorithm>
-#include <complex>
-#include <vector>
-
-#include "aom_dsp/fft_common.h"
-#include "aom_mem/aom_mem.h"
-#include "av1/common/common.h"
-#include "config/aom_dsp_rtcd.h"
-#include "test/acm_random.h"
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-namespace {
-
-typedef void (*tform_fun_t)(const float *input, float *temp, float *output);
-
-// Simple 1D FFT implementation
-template <typename InputType>
-void fft(const InputType *data, std::complex<float> *result, int n) {
- if (n == 1) {
- result[0] = data[0];
- return;
- }
- std::vector<InputType> temp(n);
- for (int k = 0; k < n / 2; ++k) {
- temp[k] = data[2 * k];
- temp[n / 2 + k] = data[2 * k + 1];
- }
- fft(&temp[0], result, n / 2);
- fft(&temp[n / 2], result + n / 2, n / 2);
- for (int k = 0; k < n / 2; ++k) {
- std::complex<float> w = std::complex<float>((float)cos(2. * PI * k / n),
- (float)-sin(2. * PI * k / n));
- std::complex<float> a = result[k];
- std::complex<float> b = result[n / 2 + k];
- result[k] = a + w * b;
- result[n / 2 + k] = a - w * b;
- }
-}
-
-void transpose(std::vector<std::complex<float> > *data, int n) {
- for (int y = 0; y < n; ++y) {
- for (int x = y + 1; x < n; ++x) {
- std::swap((*data)[y * n + x], (*data)[x * n + y]);
- }
- }
-}
-
-// Simple 2D FFT implementation
-template <class InputType>
-std::vector<std::complex<float> > fft2d(const InputType *input, int n) {
- std::vector<std::complex<float> > rowfft(n * n);
- std::vector<std::complex<float> > result(n * n);
- for (int y = 0; y < n; ++y) {
- fft(input + y * n, &rowfft[y * n], n);
- }
- transpose(&rowfft, n);
- for (int y = 0; y < n; ++y) {
- fft(&rowfft[y * n], &result[y * n], n);
- }
- transpose(&result, n);
- return result;
-}
-
-struct FFTTestArg {
- int n;
- void (*fft)(const float *input, float *temp, float *output);
- FFTTestArg(int n_in, tform_fun_t fft_in) : n(n_in), fft(fft_in) {}
-};
-
-std::ostream &operator<<(std::ostream &os, const FFTTestArg &test_arg) {
- return os << "fft_arg { n:" << test_arg.n << " fft:" << test_arg.fft << " }";
-}
-
-class FFT2DTest : public ::testing::TestWithParam<FFTTestArg> {
- protected:
- void SetUp() {
- int n = GetParam().n;
- input_ = (float *)aom_memalign(32, sizeof(*input_) * n * n);
- temp_ = (float *)aom_memalign(32, sizeof(*temp_) * n * n);
- output_ = (float *)aom_memalign(32, sizeof(*output_) * n * n * 2);
- memset(input_, 0, sizeof(*input_) * n * n);
- memset(temp_, 0, sizeof(*temp_) * n * n);
- memset(output_, 0, sizeof(*output_) * n * n * 2);
- }
- void TearDown() {
- aom_free(input_);
- aom_free(temp_);
- aom_free(output_);
- }
- float *input_;
- float *temp_;
- float *output_;
-};
-
-TEST_P(FFT2DTest, Correct) {
- int n = GetParam().n;
- for (int i = 0; i < n * n; ++i) {
- input_[i] = 1;
- std::vector<std::complex<float> > expected = fft2d<float>(&input_[0], n);
- GetParam().fft(&input_[0], &temp_[0], &output_[0]);
- for (int y = 0; y < n; ++y) {
- for (int x = 0; x < (n / 2) + 1; ++x) {
- EXPECT_NEAR(expected[y * n + x].real(), output_[2 * (y * n + x)], 1e-5);
- EXPECT_NEAR(expected[y * n + x].imag(), output_[2 * (y * n + x) + 1],
- 1e-5);
- }
- }
- input_[i] = 0;
- }
-}
-
-TEST_P(FFT2DTest, Benchmark) {
- int n = GetParam().n;
- float sum = 0;
- for (int i = 0; i < 1000 * (64 - n); ++i) {
- input_[i % (n * n)] = 1;
- GetParam().fft(&input_[0], &temp_[0], &output_[0]);
- sum += output_[0];
- input_[i % (n * n)] = 0;
- }
-}
-
-INSTANTIATE_TEST_CASE_P(C, FFT2DTest,
- ::testing::Values(FFTTestArg(2, aom_fft2x2_float_c),
- FFTTestArg(4, aom_fft4x4_float_c),
- FFTTestArg(8, aom_fft8x8_float_c),
- FFTTestArg(16, aom_fft16x16_float_c),
- FFTTestArg(32,
- aom_fft32x32_float_c)));
-#if ARCH_X86 || ARCH_X86_64
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(
- SSE2, FFT2DTest,
- ::testing::Values(FFTTestArg(4, aom_fft4x4_float_sse2),
- FFTTestArg(8, aom_fft8x8_float_sse2),
- FFTTestArg(16, aom_fft16x16_float_sse2),
- FFTTestArg(32, aom_fft32x32_float_sse2)));
-#endif // HAVE_SSE2
-#if HAVE_AVX2
-INSTANTIATE_TEST_CASE_P(
- AVX2, FFT2DTest,
- ::testing::Values(FFTTestArg(8, aom_fft8x8_float_avx2),
- FFTTestArg(16, aom_fft16x16_float_avx2),
- FFTTestArg(32, aom_fft32x32_float_avx2)));
-#endif // HAVE_AVX2
-#endif // ARCH_X86 || ARCH_X86_64
-
-struct IFFTTestArg {
- int n;
- tform_fun_t ifft;
- IFFTTestArg(int n_in, tform_fun_t ifft_in) : n(n_in), ifft(ifft_in) {}
-};
-
-std::ostream &operator<<(std::ostream &os, const IFFTTestArg &test_arg) {
- return os << "ifft_arg { n:" << test_arg.n << " fft:" << test_arg.ifft
- << " }";
-}
-
-class IFFT2DTest : public ::testing::TestWithParam<IFFTTestArg> {
- protected:
- void SetUp() {
- int n = GetParam().n;
- input_ = (float *)aom_memalign(32, sizeof(*input_) * n * n * 2);
- temp_ = (float *)aom_memalign(32, sizeof(*temp_) * n * n * 2);
- output_ = (float *)aom_memalign(32, sizeof(*output_) * n * n);
- memset(input_, 0, sizeof(*input_) * n * n * 2);
- memset(temp_, 0, sizeof(*temp_) * n * n * 2);
- memset(output_, 0, sizeof(*output_) * n * n);
- }
- void TearDown() {
- aom_free(input_);
- aom_free(temp_);
- aom_free(output_);
- }
- float *input_;
- float *temp_;
- float *output_;
-};
-
-TEST_P(IFFT2DTest, Correctness) {
- int n = GetParam().n;
- ASSERT_GE(n, 2);
- std::vector<float> expected(n * n);
- std::vector<float> actual(n * n);
- // Do forward transform then invert to make sure we get back expected
- for (int y = 0; y < n; ++y) {
- for (int x = 0; x < n; ++x) {
- expected[y * n + x] = 1;
- std::vector<std::complex<float> > input_c = fft2d(&expected[0], n);
- for (int i = 0; i < n * n; ++i) {
- input_[2 * i + 0] = input_c[i].real();
- input_[2 * i + 1] = input_c[i].imag();
- }
- GetParam().ifft(&input_[0], &temp_[0], &output_[0]);
-
- for (int yy = 0; yy < n; ++yy) {
- for (int xx = 0; xx < n; ++xx) {
- EXPECT_NEAR(expected[yy * n + xx], output_[yy * n + xx] / (n * n),
- 1e-5);
- }
- }
- expected[y * n + x] = 0;
- }
- }
-};
-
-TEST_P(IFFT2DTest, Benchmark) {
- int n = GetParam().n;
- float sum = 0;
- for (int i = 0; i < 1000 * (64 - n); ++i) {
- input_[i % (n * n)] = 1;
- GetParam().ifft(&input_[0], &temp_[0], &output_[0]);
- sum += output_[0];
- input_[i % (n * n)] = 0;
- }
-}
-INSTANTIATE_TEST_CASE_P(
- C, IFFT2DTest,
- ::testing::Values(IFFTTestArg(2, aom_ifft2x2_float_c),
- IFFTTestArg(4, aom_ifft4x4_float_c),
- IFFTTestArg(8, aom_ifft8x8_float_c),
- IFFTTestArg(16, aom_ifft16x16_float_c),
- IFFTTestArg(32, aom_ifft32x32_float_c)));
-#if ARCH_X86 || ARCH_X86_64
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(
- SSE2, IFFT2DTest,
- ::testing::Values(IFFTTestArg(4, aom_ifft4x4_float_sse2),
- IFFTTestArg(8, aom_ifft8x8_float_sse2),
- IFFTTestArg(16, aom_ifft16x16_float_sse2),
- IFFTTestArg(32, aom_ifft32x32_float_sse2)));
-#endif // HAVE_SSE2
-
-#if HAVE_AVX2
-INSTANTIATE_TEST_CASE_P(
- AVX2, IFFT2DTest,
- ::testing::Values(IFFTTestArg(8, aom_ifft8x8_float_avx2),
- IFFTTestArg(16, aom_ifft16x16_float_avx2),
- IFFTTestArg(32, aom_ifft32x32_float_avx2)));
-#endif // HAVE_AVX2
-#endif // ARCH_X86 || ARCH_X86_64
-
-} // namespace
diff --git a/third_party/aom/test/film_grain_table_test.cc b/third_party/aom/test/film_grain_table_test.cc
deleted file mode 100644
index 524d67d7b..000000000
--- a/third_party/aom/test/film_grain_table_test.cc
+++ /dev/null
@@ -1,250 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <string>
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-#include "aom_dsp/grain_table.h"
-#include "aom/internal/aom_codec_internal.h"
-#include "av1/encoder/grain_test_vectors.h"
-#include "test/video_source.h"
-
-void grain_equal(const aom_film_grain_t *expected,
- const aom_film_grain_t *actual) {
- EXPECT_EQ(expected->apply_grain, actual->apply_grain);
- EXPECT_EQ(expected->update_parameters, actual->update_parameters);
- if (!expected->update_parameters) return;
- EXPECT_EQ(expected->num_y_points, actual->num_y_points);
- EXPECT_EQ(expected->num_cb_points, actual->num_cb_points);
- EXPECT_EQ(expected->num_cr_points, actual->num_cr_points);
- EXPECT_EQ(0, memcmp(expected->scaling_points_y, actual->scaling_points_y,
- expected->num_y_points *
- sizeof(expected->scaling_points_y[0])));
- EXPECT_EQ(0, memcmp(expected->scaling_points_cb, actual->scaling_points_cb,
- expected->num_cb_points *
- sizeof(expected->scaling_points_cb[0])));
- EXPECT_EQ(0, memcmp(expected->scaling_points_cr, actual->scaling_points_cr,
- expected->num_cr_points *
- sizeof(expected->scaling_points_cr[0])));
- EXPECT_EQ(expected->scaling_shift, actual->scaling_shift);
- EXPECT_EQ(expected->ar_coeff_lag, actual->ar_coeff_lag);
- EXPECT_EQ(expected->ar_coeff_shift, actual->ar_coeff_shift);
-
- const int num_pos_luma =
- 2 * expected->ar_coeff_lag * (expected->ar_coeff_lag + 1);
- const int num_pos_chroma = num_pos_luma;
- EXPECT_EQ(0, memcmp(expected->ar_coeffs_y, actual->ar_coeffs_y,
- sizeof(expected->ar_coeffs_y[0]) * num_pos_luma));
- if (actual->num_cb_points || actual->chroma_scaling_from_luma) {
- EXPECT_EQ(0, memcmp(expected->ar_coeffs_cb, actual->ar_coeffs_cb,
- sizeof(expected->ar_coeffs_cb[0]) * num_pos_chroma));
- }
- if (actual->num_cr_points || actual->chroma_scaling_from_luma) {
- EXPECT_EQ(0, memcmp(expected->ar_coeffs_cr, actual->ar_coeffs_cr,
- sizeof(expected->ar_coeffs_cr[0]) * num_pos_chroma));
- }
- EXPECT_EQ(expected->overlap_flag, actual->overlap_flag);
- EXPECT_EQ(expected->chroma_scaling_from_luma,
- actual->chroma_scaling_from_luma);
- EXPECT_EQ(expected->grain_scale_shift, actual->grain_scale_shift);
- // EXPECT_EQ(expected->random_seed, actual->random_seed);
-
- // clip_to_restricted and bit_depth aren't written
- if (expected->num_cb_points) {
- EXPECT_EQ(expected->cb_mult, actual->cb_mult);
- EXPECT_EQ(expected->cb_luma_mult, actual->cb_luma_mult);
- EXPECT_EQ(expected->cb_offset, actual->cb_offset);
- }
- if (expected->num_cr_points) {
- EXPECT_EQ(expected->cr_mult, actual->cr_mult);
- EXPECT_EQ(expected->cr_luma_mult, actual->cr_luma_mult);
- EXPECT_EQ(expected->cr_offset, actual->cr_offset);
- }
-}
-
-TEST(FilmGrainTableTest, AddAndLookupSingleSegment) {
- aom_film_grain_table_t table;
- memset(&table, 0, sizeof(table));
-
- aom_film_grain_t grain;
- EXPECT_FALSE(aom_film_grain_table_lookup(&table, 0, 1000, false, &grain));
-
- aom_film_grain_table_append(&table, 1000, 2000, film_grain_test_vectors + 0);
- EXPECT_FALSE(aom_film_grain_table_lookup(&table, 0, 1000, false, &grain));
- EXPECT_FALSE(aom_film_grain_table_lookup(&table, 2000, 3000, false, &grain));
-
- EXPECT_TRUE(aom_film_grain_table_lookup(&table, 1000, 2000, false, &grain));
-
- grain.bit_depth = film_grain_test_vectors[0].bit_depth;
- EXPECT_EQ(0, memcmp(&grain, film_grain_test_vectors + 0, sizeof(table)));
-
- // Extend the existing segment
- aom_film_grain_table_append(&table, 2000, 3000, film_grain_test_vectors + 0);
- EXPECT_EQ(0, table.head->next);
-
- // Lookup and remove and check that the entry is no longer there
- EXPECT_TRUE(aom_film_grain_table_lookup(&table, 1000, 2000, true, &grain));
- EXPECT_FALSE(aom_film_grain_table_lookup(&table, 1000, 2000, false, &grain));
-
- EXPECT_TRUE(aom_film_grain_table_lookup(&table, 2000, 3000, true, &grain));
- EXPECT_FALSE(aom_film_grain_table_lookup(&table, 2000, 3000, false, &grain));
-
- EXPECT_EQ(0, table.head);
- EXPECT_EQ(0, table.tail);
- aom_film_grain_table_free(&table);
-}
-
-TEST(FilmGrainTableTest, SplitSingleSegment) {
- aom_film_grain_table_t table;
- aom_film_grain_t grain;
- memset(&table, 0, sizeof(table));
-
- aom_film_grain_table_append(&table, 0, 1000, film_grain_test_vectors + 0);
-
- // Test lookup and remove that adjusts start time
- EXPECT_TRUE(aom_film_grain_table_lookup(&table, 0, 100, true, &grain));
- EXPECT_EQ(NULL, table.head->next);
- EXPECT_EQ(100, table.head->start_time);
-
- // Test lookup and remove that adjusts end time
- EXPECT_TRUE(aom_film_grain_table_lookup(&table, 900, 1000, true, &grain));
- EXPECT_EQ(NULL, table.head->next);
- EXPECT_EQ(100, table.head->start_time);
- EXPECT_EQ(900, table.head->end_time);
-
- // Test lookup and remove that splits the first entry
- EXPECT_TRUE(aom_film_grain_table_lookup(&table, 400, 600, true, &grain));
- EXPECT_EQ(100, table.head->start_time);
- EXPECT_EQ(400, table.head->end_time);
-
- ASSERT_NE((void *)NULL, table.head->next);
- EXPECT_EQ(table.tail, table.head->next);
- EXPECT_EQ(600, table.head->next->start_time);
- EXPECT_EQ(900, table.head->next->end_time);
-
- aom_film_grain_table_free(&table);
-}
-
-TEST(FilmGrainTableTest, AddAndLookupMultipleSegments) {
- aom_film_grain_table_t table;
- memset(&table, 0, sizeof(table));
-
- aom_film_grain_t grain;
- const int kNumTestVectors =
- sizeof(film_grain_test_vectors) / sizeof(film_grain_test_vectors[0]);
- for (int i = 0; i < kNumTestVectors; ++i) {
- aom_film_grain_table_append(&table, i * 1000, (i + 1) * 1000,
- film_grain_test_vectors + i);
- }
-
- for (int i = kNumTestVectors - 1; i >= 0; --i) {
- EXPECT_TRUE(aom_film_grain_table_lookup(&table, i * 1000, (i + 1) * 1000,
- true, &grain));
- grain_equal(film_grain_test_vectors + i, &grain);
- EXPECT_FALSE(aom_film_grain_table_lookup(&table, i * 1000, (i + 1) * 1000,
- true, &grain));
- }
-
- // Verify that all the data has been removed
- for (int i = 0; i < kNumTestVectors; ++i) {
- EXPECT_FALSE(aom_film_grain_table_lookup(&table, i * 1000, (i + 1) * 1000,
- true, &grain));
- }
- aom_film_grain_table_free(&table);
-}
-
-class FilmGrainTableIOTest : public ::testing::Test {
- protected:
- void SetUp() { memset(&error_, 0, sizeof(error_)); }
- struct aom_internal_error_info error_;
-};
-
-TEST_F(FilmGrainTableIOTest, ReadMissingFile) {
- aom_film_grain_table_t table;
- memset(&table, 0, sizeof(table));
- ASSERT_EQ(AOM_CODEC_ERROR, aom_film_grain_table_read(
- &table, "/path/to/missing/file", &error_));
-}
-
-TEST_F(FilmGrainTableIOTest, ReadTruncatedFile) {
- aom_film_grain_table_t table;
- memset(&table, 0, sizeof(table));
-
- std::string grain_file;
- FILE *file = libaom_test::GetTempOutFile(&grain_file);
- fwrite("deadbeef", 8, 1, file);
- fclose(file);
- ASSERT_EQ(AOM_CODEC_ERROR,
- aom_film_grain_table_read(&table, grain_file.c_str(), &error_));
- EXPECT_EQ(0, remove(grain_file.c_str()));
-}
-
-TEST_F(FilmGrainTableIOTest, RoundTripReadWrite) {
- aom_film_grain_table_t table;
- memset(&table, 0, sizeof(table));
-
- aom_film_grain_t expected_grain[16];
- const int kNumTestVectors =
- sizeof(film_grain_test_vectors) / sizeof(film_grain_test_vectors[0]);
- for (int i = 0; i < kNumTestVectors; ++i) {
- expected_grain[i] = film_grain_test_vectors[i];
- expected_grain[i].random_seed = i;
- expected_grain[i].update_parameters = i % 2;
- expected_grain[i].apply_grain = (i + 1) % 2;
- expected_grain[i].bit_depth = 0;
- aom_film_grain_table_append(&table, i * 1000, (i + 1) * 1000,
- expected_grain + i);
- }
- std::string grain_file;
- fclose(libaom_test::GetTempOutFile(&grain_file));
- ASSERT_EQ(AOM_CODEC_OK,
- aom_film_grain_table_write(&table, grain_file.c_str(), &error_));
- aom_film_grain_table_free(&table);
-
- memset(&table, 0, sizeof(table));
- ASSERT_EQ(AOM_CODEC_OK,
- aom_film_grain_table_read(&table, grain_file.c_str(), &error_));
- for (int i = 0; i < kNumTestVectors; ++i) {
- aom_film_grain_t grain;
- EXPECT_TRUE(aom_film_grain_table_lookup(&table, i * 1000, (i + 1) * 1000,
- true, &grain));
- grain_equal(expected_grain + i, &grain);
- }
- aom_film_grain_table_free(&table);
- EXPECT_EQ(0, remove(grain_file.c_str()));
-}
-
-TEST_F(FilmGrainTableIOTest, RoundTripSplit) {
- std::string grain_file;
- fclose(libaom_test::GetTempOutFile(&grain_file));
-
- aom_film_grain_table_t table;
- memset(&table, 0, sizeof(table));
-
- aom_film_grain_t grain = film_grain_test_vectors[0];
- aom_film_grain_table_append(&table, 0, 3000, &grain);
- ASSERT_TRUE(aom_film_grain_table_lookup(&table, 1000, 2000, true, &grain));
- ASSERT_TRUE(aom_film_grain_table_lookup(&table, 0, 1000, false, &grain));
- EXPECT_FALSE(aom_film_grain_table_lookup(&table, 1000, 2000, false, &grain));
- ASSERT_TRUE(aom_film_grain_table_lookup(&table, 2000, 3000, false, &grain));
- ASSERT_EQ(AOM_CODEC_OK,
- aom_film_grain_table_write(&table, grain_file.c_str(), &error_));
- aom_film_grain_table_free(&table);
-
- memset(&table, 0, sizeof(table));
- ASSERT_EQ(AOM_CODEC_OK,
- aom_film_grain_table_read(&table, grain_file.c_str(), &error_));
- ASSERT_TRUE(aom_film_grain_table_lookup(&table, 0, 1000, false, &grain));
- ASSERT_FALSE(aom_film_grain_table_lookup(&table, 1000, 2000, false, &grain));
- ASSERT_TRUE(aom_film_grain_table_lookup(&table, 2000, 3000, false, &grain));
- aom_film_grain_table_free(&table);
-
- EXPECT_EQ(0, remove(grain_file.c_str()));
-}
diff --git a/third_party/aom/test/filterintra_test.cc b/third_party/aom/test/filterintra_test.cc
deleted file mode 100644
index 597134940..000000000
--- a/third_party/aom/test/filterintra_test.cc
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "config/av1_rtcd.h"
-
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
-#include "av1/common/enums.h"
-
-namespace {
-
-using ::testing::tuple;
-using libaom_test::ACMRandom;
-
-typedef void (*Predictor)(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size,
- const uint8_t *above, const uint8_t *left, int mode);
-
-// Note:
-// Test parameter list:
-// Reference predictor, optimized predictor, prediction mode, tx size
-//
-typedef tuple<Predictor, Predictor, int> PredFuncMode;
-typedef tuple<PredFuncMode, TX_SIZE> PredParams;
-
-const int MaxTxSize = 32;
-
-const int MaxTestNum = 100;
-
-class AV1FilterIntraPredTest : public ::testing::TestWithParam<PredParams> {
- public:
- virtual ~AV1FilterIntraPredTest() {}
- virtual void SetUp() {
- PredFuncMode funcMode = GET_PARAM(0);
- predFuncRef_ = ::testing::get<0>(funcMode);
- predFunc_ = ::testing::get<1>(funcMode);
- mode_ = ::testing::get<2>(funcMode);
- txSize_ = GET_PARAM(1);
-
- alloc_ = new uint8_t[2 * MaxTxSize + 1];
- predRef_ = new uint8_t[MaxTxSize * MaxTxSize];
- pred_ = new uint8_t[MaxTxSize * MaxTxSize];
- }
-
- virtual void TearDown() {
- delete[] alloc_;
- delete[] predRef_;
- delete[] pred_;
- libaom_test::ClearSystemState();
- }
-
- protected:
- void RunTest() const {
- int tstIndex = 0;
- int stride = tx_size_wide[txSize_];
- uint8_t *left = alloc_;
- uint8_t *above = alloc_ + MaxTxSize;
- while (tstIndex < MaxTestNum) {
- PrepareBuffer();
- predFuncRef_(predRef_, stride, txSize_, &above[1], left, mode_);
- ASM_REGISTER_STATE_CHECK(
- predFunc_(pred_, stride, txSize_, &above[1], left, mode_));
- DiffPred(tstIndex);
- tstIndex += 1;
- }
- }
-
- private:
- void PrepareBuffer() const {
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- int i = 0;
- while (i < (2 * MaxTxSize + 1)) {
- alloc_[i] = rnd.Rand8();
- i++;
- }
- }
-
- void DiffPred(int testNum) const {
- int i = 0;
- while (i < tx_size_wide[txSize_] * tx_size_high[txSize_]) {
- EXPECT_EQ(predRef_[i], pred_[i]) << "Error at position: " << i << " "
- << "Tx size: " << tx_size_wide[txSize_]
- << "x" << tx_size_high[txSize_] << " "
- << "Test number: " << testNum;
- i++;
- }
- }
-
- Predictor predFunc_;
- Predictor predFuncRef_;
- int mode_;
- TX_SIZE txSize_;
- uint8_t *alloc_;
- uint8_t *pred_;
- uint8_t *predRef_;
-};
-
-TEST_P(AV1FilterIntraPredTest, BitExactCheck) { RunTest(); }
-
-using ::testing::make_tuple;
-
-const PredFuncMode kPredFuncMdArray[] = {
- make_tuple(&av1_filter_intra_predictor_c, &av1_filter_intra_predictor_sse4_1,
- FILTER_DC_PRED),
- make_tuple(&av1_filter_intra_predictor_c, &av1_filter_intra_predictor_sse4_1,
- FILTER_V_PRED),
- make_tuple(&av1_filter_intra_predictor_c, &av1_filter_intra_predictor_sse4_1,
- FILTER_H_PRED),
- make_tuple(&av1_filter_intra_predictor_c, &av1_filter_intra_predictor_sse4_1,
- FILTER_D157_PRED),
- make_tuple(&av1_filter_intra_predictor_c, &av1_filter_intra_predictor_sse4_1,
- FILTER_PAETH_PRED),
-};
-
-const TX_SIZE kTxSize[] = { TX_4X4, TX_8X8, TX_16X16, TX_32X32, TX_4X8,
- TX_8X4, TX_8X16, TX_16X8, TX_16X32, TX_32X16,
- TX_4X16, TX_16X4, TX_8X32, TX_32X8 };
-
-INSTANTIATE_TEST_CASE_P(
- SSE4_1, AV1FilterIntraPredTest,
- ::testing::Combine(::testing::ValuesIn(kPredFuncMdArray),
- ::testing::ValuesIn(kTxSize)));
-} // namespace
diff --git a/third_party/aom/test/frame_size_tests.cc b/third_party/aom/test/frame_size_tests.cc
deleted file mode 100644
index eaf0b8370..000000000
--- a/third_party/aom/test/frame_size_tests.cc
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-#include "test/codec_factory.h"
-#include "test/video_source.h"
-
-namespace {
-
-class AV1FrameSizeTests : public ::testing::Test,
- public ::libaom_test::EncoderTest {
- protected:
- AV1FrameSizeTests()
- : EncoderTest(&::libaom_test::kAV1), expected_res_(AOM_CODEC_OK) {}
- virtual ~AV1FrameSizeTests() {}
-
- virtual void SetUp() {
- InitializeConfig();
- SetMode(::libaom_test::kRealTime);
- }
-
- virtual bool HandleDecodeResult(const aom_codec_err_t res_dec,
- libaom_test::Decoder *decoder) {
- EXPECT_EQ(expected_res_, res_dec) << decoder->DecodeError();
- return !::testing::Test::HasFailure();
- }
-
- virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
- ::libaom_test::Encoder *encoder) {
- if (video->frame() == 1) {
- encoder->Control(AOME_SET_CPUUSED, 7);
- encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
- encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7);
- encoder->Control(AOME_SET_ARNR_STRENGTH, 5);
- }
- }
-
- int expected_res_;
-};
-
-#if CONFIG_SIZE_LIMIT
-TEST_F(AV1FrameSizeTests, TestInvalidSizes) {
- ::libaom_test::RandomVideoSource video;
-
- video.SetSize(DECODE_WIDTH_LIMIT + 16, DECODE_HEIGHT_LIMIT + 16);
- video.set_limit(2);
- expected_res_ = AOM_CODEC_CORRUPT_FRAME;
- ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-}
-
-TEST_F(AV1FrameSizeTests, LargeValidSizes) {
- ::libaom_test::RandomVideoSource video;
-
- video.SetSize(DECODE_WIDTH_LIMIT, DECODE_HEIGHT_LIMIT);
- video.set_limit(2);
- expected_res_ = AOM_CODEC_OK;
- ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-}
-#endif
-
-TEST_F(AV1FrameSizeTests, OneByOneVideo) {
- ::libaom_test::RandomVideoSource video;
-
- video.SetSize(1, 1);
- video.set_limit(2);
- expected_res_ = AOM_CODEC_OK;
- ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-}
-#undef ONE_BY_ONE_VIDEO_NAME
-} // namespace
diff --git a/third_party/aom/test/function_equivalence_test.h b/third_party/aom/test/function_equivalence_test.h
deleted file mode 100644
index f27068902..000000000
--- a/third_party/aom/test/function_equivalence_test.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_TEST_FUNCTION_EQUIVALENCE_TEST_H_
-#define AOM_TEST_FUNCTION_EQUIVALENCE_TEST_H_
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/util.h"
-
-using libaom_test::ACMRandom;
-
-namespace libaom_test {
-// Base class for tests that compare 2 implementations of the same function
-// for equivalence. The template parameter should be pointer to a function
-// that is being tested.
-//
-// The test takes a 3-parameters encapsulating struct 'FuncParam', containing:
-// - Pointer to reference function
-// - Pointer to tested function
-// - Integer bit depth (default to 0).
-//
-// These values are then accessible in the tests as member of params_:
-// params_.ref_func, params_.tst_func, and params_.bit_depth.
-//
-
-template <typename T>
-struct FuncParam {
- FuncParam(T ref = NULL, T tst = NULL, int bit_depth = 0)
- : ref_func(ref), tst_func(tst), bit_depth(bit_depth) {}
- T ref_func;
- T tst_func;
- int bit_depth;
-};
-
-template <typename T>
-std::ostream &operator<<(std::ostream &os, const FuncParam<T> &p) {
- return os << "bit_depth:" << p.bit_depth
- << " function:" << reinterpret_cast<const void *>(p.ref_func)
- << " function:" << reinterpret_cast<const void *>(p.tst_func);
-}
-
-template <typename T>
-class FunctionEquivalenceTest : public ::testing::TestWithParam<FuncParam<T> > {
- public:
- FunctionEquivalenceTest() : rng_(ACMRandom::DeterministicSeed()) {}
-
- virtual ~FunctionEquivalenceTest() {}
-
- virtual void SetUp() { params_ = this->GetParam(); }
-
- virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
- ACMRandom rng_;
- FuncParam<T> params_;
-};
-
-} // namespace libaom_test
-#endif // AOM_TEST_FUNCTION_EQUIVALENCE_TEST_H_
diff --git a/third_party/aom/test/fwht4x4_test.cc b/third_party/aom/test/fwht4x4_test.cc
deleted file mode 100644
index c8d98c519..000000000
--- a/third_party/aom/test/fwht4x4_test.cc
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <math.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "config/av1_rtcd.h"
-#include "config/aom_dsp_rtcd.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/transform_test_base.h"
-#include "test/util.h"
-#include "av1/common/entropy.h"
-#include "aom/aom_codec.h"
-#include "aom/aom_integer.h"
-#include "aom_ports/mem.h"
-
-using libaom_test::ACMRandom;
-
-namespace {
-typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
-typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
-
-using libaom_test::FhtFunc;
-
-typedef ::testing::tuple<FdctFunc, IdctFunc, TX_TYPE, aom_bit_depth_t, int>
- Dct4x4Param;
-
-void fwht4x4_ref(const int16_t *in, tran_low_t *out, int stride,
- TxfmParam * /*txfm_param*/) {
- av1_fwht4x4_c(in, out, stride);
-}
-
-void iwht4x4_10(const tran_low_t *in, uint8_t *out, int stride) {
- av1_highbd_iwht4x4_16_add_c(in, out, stride, 10);
-}
-
-void iwht4x4_12(const tran_low_t *in, uint8_t *out, int stride) {
- av1_highbd_iwht4x4_16_add_c(in, out, stride, 12);
-}
-
-class Trans4x4WHT : public libaom_test::TransformTestBase,
- public ::testing::TestWithParam<Dct4x4Param> {
- public:
- virtual ~Trans4x4WHT() {}
-
- virtual void SetUp() {
- fwd_txfm_ = GET_PARAM(0);
- inv_txfm_ = GET_PARAM(1);
- pitch_ = 4;
- height_ = 4;
- fwd_txfm_ref = fwht4x4_ref;
- bit_depth_ = GET_PARAM(3);
- mask_ = (1 << bit_depth_) - 1;
- num_coeffs_ = GET_PARAM(4);
- }
- virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
- void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
- fwd_txfm_(in, out, stride);
- }
- void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
- inv_txfm_(out, dst, stride);
- }
-
- FdctFunc fwd_txfm_;
- IdctFunc inv_txfm_;
-};
-
-TEST_P(Trans4x4WHT, AccuracyCheck) { RunAccuracyCheck(0, 0.00001); }
-
-TEST_P(Trans4x4WHT, CoeffCheck) { RunCoeffCheck(); }
-
-TEST_P(Trans4x4WHT, MemCheck) { RunMemCheck(); }
-
-TEST_P(Trans4x4WHT, InvAccuracyCheck) { RunInvAccuracyCheck(0); }
-using ::testing::make_tuple;
-
-INSTANTIATE_TEST_CASE_P(
- C, Trans4x4WHT,
- ::testing::Values(make_tuple(&av1_highbd_fwht4x4_c, &iwht4x4_10, DCT_DCT,
- AOM_BITS_10, 16),
- make_tuple(&av1_highbd_fwht4x4_c, &iwht4x4_12, DCT_DCT,
- AOM_BITS_12, 16)));
-} // namespace
diff --git a/third_party/aom/test/gviz_api.py b/third_party/aom/test/gviz_api.py
deleted file mode 100755
index d3a443dab..000000000
--- a/third_party/aom/test/gviz_api.py
+++ /dev/null
@@ -1,1087 +0,0 @@
-#!/usr/bin/python
-#
-# Copyright (c) 2016, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and
-# the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-# was not distributed with this source code in the LICENSE file, you can
-# obtain it at www.aomedia.org/license/software. If the Alliance for Open
-# Media Patent License 1.0 was not distributed with this source code in the
-# PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-#
-
-"""Converts Python data into data for Google Visualization API clients.
-
-This library can be used to create a google.visualization.DataTable usable by
-visualizations built on the Google Visualization API. Output formats are raw
-JSON, JSON response, JavaScript, CSV, and HTML table.
-
-See http://code.google.com/apis/visualization/ for documentation on the
-Google Visualization API.
-"""
-
-__author__ = "Amit Weinstein, Misha Seltzer, Jacob Baskin"
-
-import cgi
-import cStringIO
-import csv
-import datetime
-try:
- import json
-except ImportError:
- import simplejson as json
-import types
-
-
-class DataTableException(Exception):
- """The general exception object thrown by DataTable."""
- pass
-
-
-class DataTableJSONEncoder(json.JSONEncoder):
- """JSON encoder that handles date/time/datetime objects correctly."""
-
- def __init__(self):
- json.JSONEncoder.__init__(self,
- separators=(",", ":"),
- ensure_ascii=False)
-
- def default(self, o):
- if isinstance(o, datetime.datetime):
- if o.microsecond == 0:
- # If the time doesn't have ms-resolution, leave it out to keep
- # things smaller.
- return "Date(%d,%d,%d,%d,%d,%d)" % (
- o.year, o.month - 1, o.day, o.hour, o.minute, o.second)
- else:
- return "Date(%d,%d,%d,%d,%d,%d,%d)" % (
- o.year, o.month - 1, o.day, o.hour, o.minute, o.second,
- o.microsecond / 1000)
- elif isinstance(o, datetime.date):
- return "Date(%d,%d,%d)" % (o.year, o.month - 1, o.day)
- elif isinstance(o, datetime.time):
- return [o.hour, o.minute, o.second]
- else:
- return super(DataTableJSONEncoder, self).default(o)
-
-
-class DataTable(object):
- """Wraps the data to convert to a Google Visualization API DataTable.
-
- Create this object, populate it with data, then call one of the ToJS...
- methods to return a string representation of the data in the format described.
-
- You can clear all data from the object to reuse it, but you cannot clear
- individual cells, rows, or columns. You also cannot modify the table schema
- specified in the class constructor.
-
- You can add new data one or more rows at a time. All data added to an
- instantiated DataTable must conform to the schema passed in to __init__().
-
- You can reorder the columns in the output table, and also specify row sorting
- order by column. The default column order is according to the original
- table_description parameter. Default row sort order is ascending, by column
- 1 values. For a dictionary, we sort the keys for order.
-
- The data and the table_description are closely tied, as described here:
-
- The table schema is defined in the class constructor's table_description
- parameter. The user defines each column using a tuple of
- (id[, type[, label[, custom_properties]]]). The default value for type is
- string, label is the same as ID if not specified, and custom properties is
- an empty dictionary if not specified.
-
- table_description is a dictionary or list, containing one or more column
- descriptor tuples, nested dictionaries, and lists. Each dictionary key, list
- element, or dictionary element must eventually be defined as
- a column description tuple. Here's an example of a dictionary where the key
- is a tuple, and the value is a list of two tuples:
- {('a', 'number'): [('b', 'number'), ('c', 'string')]}
-
- This flexibility in data entry enables you to build and manipulate your data
- in a Python structure that makes sense for your program.
-
- Add data to the table using the same nested design as the table's
- table_description, replacing column descriptor tuples with cell data, and
- each row is an element in the top level collection. This will be a bit
- clearer after you look at the following examples showing the
- table_description, matching data, and the resulting table:
-
- Columns as list of tuples [col1, col2, col3]
- table_description: [('a', 'number'), ('b', 'string')]
- AppendData( [[1, 'z'], [2, 'w'], [4, 'o'], [5, 'k']] )
- Table:
- a b <--- these are column ids/labels
- 1 z
- 2 w
- 4 o
- 5 k
-
- Dictionary of columns, where key is a column, and value is a list of
- columns {col1: [col2, col3]}
- table_description: {('a', 'number'): [('b', 'number'), ('c', 'string')]}
- AppendData( data: {1: [2, 'z'], 3: [4, 'w']}
- Table:
- a b c
- 1 2 z
- 3 4 w
-
- Dictionary where key is a column, and the value is itself a dictionary of
- columns {col1: {col2, col3}}
- table_description: {('a', 'number'): {'b': 'number', 'c': 'string'}}
- AppendData( data: {1: {'b': 2, 'c': 'z'}, 3: {'b': 4, 'c': 'w'}}
- Table:
- a b c
- 1 2 z
- 3 4 w
- """
-
- def __init__(self, table_description, data=None, custom_properties=None):
- """Initialize the data table from a table schema and (optionally) data.
-
- See the class documentation for more information on table schema and data
- values.
-
- Args:
- table_description: A table schema, following one of the formats described
- in TableDescriptionParser(). Schemas describe the
- column names, data types, and labels. See
- TableDescriptionParser() for acceptable formats.
- data: Optional. If given, fills the table with the given data. The data
- structure must be consistent with schema in table_description. See
- the class documentation for more information on acceptable data. You
- can add data later by calling AppendData().
- custom_properties: Optional. A dictionary from string to string that
- goes into the table's custom properties. This can be
- later changed by changing self.custom_properties.
-
- Raises:
- DataTableException: Raised if the data and the description did not match,
- or did not use the supported formats.
- """
- self.__columns = self.TableDescriptionParser(table_description)
- self.__data = []
- self.custom_properties = {}
- if custom_properties is not None:
- self.custom_properties = custom_properties
- if data:
- self.LoadData(data)
-
- @staticmethod
- def CoerceValue(value, value_type):
- """Coerces a single value into the type expected for its column.
-
- Internal helper method.
-
- Args:
- value: The value which should be converted
- value_type: One of "string", "number", "boolean", "date", "datetime" or
- "timeofday".
-
- Returns:
- An item of the Python type appropriate to the given value_type. Strings
- are also converted to Unicode using UTF-8 encoding if necessary.
- If a tuple is given, it should be in one of the following forms:
- - (value, formatted value)
- - (value, formatted value, custom properties)
- where the formatted value is a string, and custom properties is a
- dictionary of the custom properties for this cell.
- To specify custom properties without specifying formatted value, one can
- pass None as the formatted value.
- One can also have a null-valued cell with formatted value and/or custom
- properties by specifying None for the value.
- This method ignores the custom properties except for checking that it is a
- dictionary. The custom properties are handled in the ToJSon and ToJSCode
- methods.
- The real type of the given value is not strictly checked. For example,
- any type can be used for string - as we simply take its str( ) and for
- boolean value we just check "if value".
- Examples:
- CoerceValue(None, "string") returns None
- CoerceValue((5, "5$"), "number") returns (5, "5$")
- CoerceValue(100, "string") returns "100"
- CoerceValue(0, "boolean") returns False
-
- Raises:
- DataTableException: The value and type did not match in a not-recoverable
- way, for example given value 'abc' for type 'number'.
- """
- if isinstance(value, tuple):
- # In case of a tuple, we run the same function on the value itself and
- # add the formatted value.
- if (len(value) not in [2, 3] or
- (len(value) == 3 and not isinstance(value[2], dict))):
- raise DataTableException("Wrong format for value and formatting - %s." %
- str(value))
- if not isinstance(value[1], types.StringTypes + (types.NoneType,)):
- raise DataTableException("Formatted value is not string, given %s." %
- type(value[1]))
- js_value = DataTable.CoerceValue(value[0], value_type)
- return (js_value,) + value[1:]
-
- t_value = type(value)
- if value is None:
- return value
- if value_type == "boolean":
- return bool(value)
-
- elif value_type == "number":
- if isinstance(value, (int, long, float)):
- return value
- raise DataTableException("Wrong type %s when expected number" % t_value)
-
- elif value_type == "string":
- if isinstance(value, unicode):
- return value
- else:
- return str(value).decode("utf-8")
-
- elif value_type == "date":
- if isinstance(value, datetime.datetime):
- return datetime.date(value.year, value.month, value.day)
- elif isinstance(value, datetime.date):
- return value
- else:
- raise DataTableException("Wrong type %s when expected date" % t_value)
-
- elif value_type == "timeofday":
- if isinstance(value, datetime.datetime):
- return datetime.time(value.hour, value.minute, value.second)
- elif isinstance(value, datetime.time):
- return value
- else:
- raise DataTableException("Wrong type %s when expected time" % t_value)
-
- elif value_type == "datetime":
- if isinstance(value, datetime.datetime):
- return value
- else:
- raise DataTableException("Wrong type %s when expected datetime" %
- t_value)
- # If we got here, it means the given value_type was not one of the
- # supported types.
- raise DataTableException("Unsupported type %s" % value_type)
-
- @staticmethod
- def EscapeForJSCode(encoder, value):
- if value is None:
- return "null"
- elif isinstance(value, datetime.datetime):
- if value.microsecond == 0:
- # If it's not ms-resolution, leave that out to save space.
- return "new Date(%d,%d,%d,%d,%d,%d)" % (value.year,
- value.month - 1, # To match JS
- value.day,
- value.hour,
- value.minute,
- value.second)
- else:
- return "new Date(%d,%d,%d,%d,%d,%d,%d)" % (value.year,
- value.month - 1, # match JS
- value.day,
- value.hour,
- value.minute,
- value.second,
- value.microsecond / 1000)
- elif isinstance(value, datetime.date):
- return "new Date(%d,%d,%d)" % (value.year, value.month - 1, value.day)
- else:
- return encoder.encode(value)
-
- @staticmethod
- def ToString(value):
- if value is None:
- return "(empty)"
- elif isinstance(value, (datetime.datetime,
- datetime.date,
- datetime.time)):
- return str(value)
- elif isinstance(value, unicode):
- return value
- elif isinstance(value, bool):
- return str(value).lower()
- else:
- return str(value).decode("utf-8")
-
- @staticmethod
- def ColumnTypeParser(description):
- """Parses a single column description. Internal helper method.
-
- Args:
- description: a column description in the possible formats:
- 'id'
- ('id',)
- ('id', 'type')
- ('id', 'type', 'label')
- ('id', 'type', 'label', {'custom_prop1': 'custom_val1'})
- Returns:
- Dictionary with the following keys: id, label, type, and
- custom_properties where:
- - If label not given, it equals the id.
- - If type not given, string is used by default.
- - If custom properties are not given, an empty dictionary is used by
- default.
-
- Raises:
- DataTableException: The column description did not match the RE, or
- unsupported type was passed.
- """
- if not description:
- raise DataTableException("Description error: empty description given")
-
- if not isinstance(description, (types.StringTypes, tuple)):
- raise DataTableException("Description error: expected either string or "
- "tuple, got %s." % type(description))
-
- if isinstance(description, types.StringTypes):
- description = (description,)
-
- # According to the tuple's length, we fill the keys
- # We verify everything is of type string
- for elem in description[:3]:
- if not isinstance(elem, types.StringTypes):
- raise DataTableException("Description error: expected tuple of "
- "strings, current element of type %s." %
- type(elem))
- desc_dict = {"id": description[0],
- "label": description[0],
- "type": "string",
- "custom_properties": {}}
- if len(description) > 1:
- desc_dict["type"] = description[1].lower()
- if len(description) > 2:
- desc_dict["label"] = description[2]
- if len(description) > 3:
- if not isinstance(description[3], dict):
- raise DataTableException("Description error: expected custom "
- "properties of type dict, current element "
- "of type %s." % type(description[3]))
- desc_dict["custom_properties"] = description[3]
- if len(description) > 4:
- raise DataTableException("Description error: tuple of length > 4")
- if desc_dict["type"] not in ["string", "number", "boolean",
- "date", "datetime", "timeofday"]:
- raise DataTableException(
- "Description error: unsupported type '%s'" % desc_dict["type"])
- return desc_dict
-
- @staticmethod
- def TableDescriptionParser(table_description, depth=0):
- """Parses the table_description object for internal use.
-
- Parses the user-submitted table description into an internal format used
- by the Python DataTable class. Returns the flat list of parsed columns.
-
- Args:
- table_description: A description of the table which should comply
- with one of the formats described below.
- depth: Optional. The depth of the first level in the current description.
- Used by recursive calls to this function.
-
- Returns:
- List of columns, where each column represented by a dictionary with the
- keys: id, label, type, depth, container which means the following:
- - id: the id of the column
- - name: The name of the column
- - type: The datatype of the elements in this column. Allowed types are
- described in ColumnTypeParser().
- - depth: The depth of this column in the table description
- - container: 'dict', 'iter' or 'scalar' for parsing the format easily.
- - custom_properties: The custom properties for this column.
- The returned description is flattened regardless of how it was given.
-
- Raises:
- DataTableException: Error in a column description or in the description
- structure.
-
- Examples:
- A column description can be of the following forms:
- 'id'
- ('id',)
- ('id', 'type')
- ('id', 'type', 'label')
- ('id', 'type', 'label', {'custom_prop1': 'custom_val1'})
- or as a dictionary:
- 'id': 'type'
- 'id': ('type',)
- 'id': ('type', 'label')
- 'id': ('type', 'label', {'custom_prop1': 'custom_val1'})
- If the type is not specified, we treat it as string.
- If no specific label is given, the label is simply the id.
- If no custom properties are given, we use an empty dictionary.
-
- input: [('a', 'date'), ('b', 'timeofday', 'b', {'foo': 'bar'})]
- output: [{'id': 'a', 'label': 'a', 'type': 'date',
- 'depth': 0, 'container': 'iter', 'custom_properties': {}},
- {'id': 'b', 'label': 'b', 'type': 'timeofday',
- 'depth': 0, 'container': 'iter',
- 'custom_properties': {'foo': 'bar'}}]
-
- input: {'a': [('b', 'number'), ('c', 'string', 'column c')]}
- output: [{'id': 'a', 'label': 'a', 'type': 'string',
- 'depth': 0, 'container': 'dict', 'custom_properties': {}},
- {'id': 'b', 'label': 'b', 'type': 'number',
- 'depth': 1, 'container': 'iter', 'custom_properties': {}},
- {'id': 'c', 'label': 'column c', 'type': 'string',
- 'depth': 1, 'container': 'iter', 'custom_properties': {}}]
-
- input: {('a', 'number', 'column a'): { 'b': 'number', 'c': 'string'}}
- output: [{'id': 'a', 'label': 'column a', 'type': 'number',
- 'depth': 0, 'container': 'dict', 'custom_properties': {}},
- {'id': 'b', 'label': 'b', 'type': 'number',
- 'depth': 1, 'container': 'dict', 'custom_properties': {}},
- {'id': 'c', 'label': 'c', 'type': 'string',
- 'depth': 1, 'container': 'dict', 'custom_properties': {}}]
-
- input: { ('w', 'string', 'word'): ('c', 'number', 'count') }
- output: [{'id': 'w', 'label': 'word', 'type': 'string',
- 'depth': 0, 'container': 'dict', 'custom_properties': {}},
- {'id': 'c', 'label': 'count', 'type': 'number',
- 'depth': 1, 'container': 'scalar', 'custom_properties': {}}]
-
- input: {'a': ('number', 'column a'), 'b': ('string', 'column b')}
- output: [{'id': 'a', 'label': 'column a', 'type': 'number', 'depth': 0,
- 'container': 'dict', 'custom_properties': {}},
- {'id': 'b', 'label': 'column b', 'type': 'string', 'depth': 0,
- 'container': 'dict', 'custom_properties': {}}
-
- NOTE: there might be ambiguity in the case of a dictionary representation
- of a single column. For example, the following description can be parsed
- in 2 different ways: {'a': ('b', 'c')} can be thought of a single column
- with the id 'a', of type 'b' and the label 'c', or as 2 columns: one named
- 'a', and the other named 'b' of type 'c'. We choose the first option by
- default, and in case the second option is the right one, it is possible to
- make the key into a tuple (i.e. {('a',): ('b', 'c')}) or add more info
- into the tuple, thus making it look like this: {'a': ('b', 'c', 'b', {})}
- -- second 'b' is the label, and {} is the custom properties field.
- """
- # For the recursion step, we check for a scalar object (string or tuple)
- if isinstance(table_description, (types.StringTypes, tuple)):
- parsed_col = DataTable.ColumnTypeParser(table_description)
- parsed_col["depth"] = depth
- parsed_col["container"] = "scalar"
- return [parsed_col]
-
- # Since it is not scalar, table_description must be iterable.
- if not hasattr(table_description, "__iter__"):
- raise DataTableException("Expected an iterable object, got %s" %
- type(table_description))
- if not isinstance(table_description, dict):
- # We expects a non-dictionary iterable item.
- columns = []
- for desc in table_description:
- parsed_col = DataTable.ColumnTypeParser(desc)
- parsed_col["depth"] = depth
- parsed_col["container"] = "iter"
- columns.append(parsed_col)
- if not columns:
- raise DataTableException("Description iterable objects should not"
- " be empty.")
- return columns
- # The other case is a dictionary
- if not table_description:
- raise DataTableException("Empty dictionaries are not allowed inside"
- " description")
-
- # To differentiate between the two cases of more levels below or this is
- # the most inner dictionary, we consider the number of keys (more then one
- # key is indication for most inner dictionary) and the type of the key and
- # value in case of only 1 key (if the type of key is string and the type of
- # the value is a tuple of 0-3 items, we assume this is the most inner
- # dictionary).
- # NOTE: this way of differentiating might create ambiguity. See docs.
- if (len(table_description) != 1 or
- (isinstance(table_description.keys()[0], types.StringTypes) and
- isinstance(table_description.values()[0], tuple) and
- len(table_description.values()[0]) < 4)):
- # This is the most inner dictionary. Parsing types.
- columns = []
- # We sort the items, equivalent to sort the keys since they are unique
- for key, value in sorted(table_description.items()):
- # We parse the column type as (key, type) or (key, type, label) using
- # ColumnTypeParser.
- if isinstance(value, tuple):
- parsed_col = DataTable.ColumnTypeParser((key,) + value)
- else:
- parsed_col = DataTable.ColumnTypeParser((key, value))
- parsed_col["depth"] = depth
- parsed_col["container"] = "dict"
- columns.append(parsed_col)
- return columns
- # This is an outer dictionary, must have at most one key.
- parsed_col = DataTable.ColumnTypeParser(table_description.keys()[0])
- parsed_col["depth"] = depth
- parsed_col["container"] = "dict"
- return ([parsed_col] +
- DataTable.TableDescriptionParser(table_description.values()[0],
- depth=depth + 1))
-
- @property
- def columns(self):
- """Returns the parsed table description."""
- return self.__columns
-
- def NumberOfRows(self):
- """Returns the number of rows in the current data stored in the table."""
- return len(self.__data)
-
- def SetRowsCustomProperties(self, rows, custom_properties):
- """Sets the custom properties for given row(s).
-
- Can accept a single row or an iterable of rows.
- Sets the given custom properties for all specified rows.
-
- Args:
- rows: The row, or rows, to set the custom properties for.
- custom_properties: A string to string dictionary of custom properties to
- set for all rows.
- """
- if not hasattr(rows, "__iter__"):
- rows = [rows]
- for row in rows:
- self.__data[row] = (self.__data[row][0], custom_properties)
-
- def LoadData(self, data, custom_properties=None):
- """Loads new rows to the data table, clearing existing rows.
-
- May also set the custom_properties for the added rows. The given custom
- properties dictionary specifies the dictionary that will be used for *all*
- given rows.
-
- Args:
- data: The rows that the table will contain.
- custom_properties: A dictionary of string to string to set as the custom
- properties for all rows.
- """
- self.__data = []
- self.AppendData(data, custom_properties)
-
- def AppendData(self, data, custom_properties=None):
- """Appends new data to the table.
-
- Data is appended in rows. Data must comply with
- the table schema passed in to __init__(). See CoerceValue() for a list
- of acceptable data types. See the class documentation for more information
- and examples of schema and data values.
-
- Args:
- data: The row to add to the table. The data must conform to the table
- description format.
- custom_properties: A dictionary of string to string, representing the
- custom properties to add to all the rows.
-
- Raises:
- DataTableException: The data structure does not match the description.
- """
- # If the maximal depth is 0, we simply iterate over the data table
- # lines and insert them using _InnerAppendData. Otherwise, we simply
- # let the _InnerAppendData handle all the levels.
- if not self.__columns[-1]["depth"]:
- for row in data:
- self._InnerAppendData(({}, custom_properties), row, 0)
- else:
- self._InnerAppendData(({}, custom_properties), data, 0)
-
- def _InnerAppendData(self, prev_col_values, data, col_index):
- """Inner function to assist LoadData."""
- # We first check that col_index has not exceeded the columns size
- if col_index >= len(self.__columns):
- raise DataTableException("The data does not match description, too deep")
-
- # Dealing with the scalar case, the data is the last value.
- if self.__columns[col_index]["container"] == "scalar":
- prev_col_values[0][self.__columns[col_index]["id"]] = data
- self.__data.append(prev_col_values)
- return
-
- if self.__columns[col_index]["container"] == "iter":
- if not hasattr(data, "__iter__") or isinstance(data, dict):
- raise DataTableException("Expected iterable object, got %s" %
- type(data))
- # We only need to insert the rest of the columns
- # If there are less items than expected, we only add what there is.
- for value in data:
- if col_index >= len(self.__columns):
- raise DataTableException("Too many elements given in data")
- prev_col_values[0][self.__columns[col_index]["id"]] = value
- col_index += 1
- self.__data.append(prev_col_values)
- return
-
- # We know the current level is a dictionary, we verify the type.
- if not isinstance(data, dict):
- raise DataTableException("Expected dictionary at current level, got %s" %
- type(data))
- # We check if this is the last level
- if self.__columns[col_index]["depth"] == self.__columns[-1]["depth"]:
- # We need to add the keys in the dictionary as they are
- for col in self.__columns[col_index:]:
- if col["id"] in data:
- prev_col_values[0][col["id"]] = data[col["id"]]
- self.__data.append(prev_col_values)
- return
-
- # We have a dictionary in an inner depth level.
- if not data.keys():
- # In case this is an empty dictionary, we add a record with the columns
- # filled only until this point.
- self.__data.append(prev_col_values)
- else:
- for key in sorted(data):
- col_values = dict(prev_col_values[0])
- col_values[self.__columns[col_index]["id"]] = key
- self._InnerAppendData((col_values, prev_col_values[1]),
- data[key], col_index + 1)
-
- def _PreparedData(self, order_by=()):
- """Prepares the data for enumeration - sorting it by order_by.
-
- Args:
- order_by: Optional. Specifies the name of the column(s) to sort by, and
- (optionally) which direction to sort in. Default sort direction
- is asc. Following formats are accepted:
- "string_col_name" -- For a single key in default (asc) order.
- ("string_col_name", "asc|desc") -- For a single key.
- [("col_1","asc|desc"), ("col_2","asc|desc")] -- For more than
- one column, an array of tuples of (col_name, "asc|desc").
-
- Returns:
- The data sorted by the keys given.
-
- Raises:
- DataTableException: Sort direction not in 'asc' or 'desc'
- """
- if not order_by:
- return self.__data
-
- proper_sort_keys = []
- if isinstance(order_by, types.StringTypes) or (
- isinstance(order_by, tuple) and len(order_by) == 2 and
- order_by[1].lower() in ["asc", "desc"]):
- order_by = (order_by,)
- for key in order_by:
- if isinstance(key, types.StringTypes):
- proper_sort_keys.append((key, 1))
- elif (isinstance(key, (list, tuple)) and len(key) == 2 and
- key[1].lower() in ("asc", "desc")):
- proper_sort_keys.append((key[0], key[1].lower() == "asc" and 1 or -1))
- else:
- raise DataTableException("Expected tuple with second value: "
- "'asc' or 'desc'")
-
- def SortCmpFunc(row1, row2):
- """cmp function for sorted. Compares by keys and 'asc'/'desc' keywords."""
- for key, asc_mult in proper_sort_keys:
- cmp_result = asc_mult * cmp(row1[0].get(key), row2[0].get(key))
- if cmp_result:
- return cmp_result
- return 0
-
- return sorted(self.__data, cmp=SortCmpFunc)
-
- def ToJSCode(self, name, columns_order=None, order_by=()):
- """Writes the data table as a JS code string.
-
- This method writes a string of JS code that can be run to
- generate a DataTable with the specified data. Typically used for debugging
- only.
-
- Args:
- name: The name of the table. The name would be used as the DataTable's
- variable name in the created JS code.
- columns_order: Optional. Specifies the order of columns in the
- output table. Specify a list of all column IDs in the order
- in which you want the table created.
- Note that you must list all column IDs in this parameter,
- if you use it.
- order_by: Optional. Specifies the name of the column(s) to sort by.
- Passed as is to _PreparedData.
-
- Returns:
- A string of JS code that, when run, generates a DataTable with the given
- name and the data stored in the DataTable object.
- Example result:
- "var tab1 = new google.visualization.DataTable();
- tab1.addColumn("string", "a", "a");
- tab1.addColumn("number", "b", "b");
- tab1.addColumn("boolean", "c", "c");
- tab1.addRows(10);
- tab1.setCell(0, 0, "a");
- tab1.setCell(0, 1, 1, null, {"foo": "bar"});
- tab1.setCell(0, 2, true);
- ...
- tab1.setCell(9, 0, "c");
- tab1.setCell(9, 1, 3, "3$");
- tab1.setCell(9, 2, false);"
-
- Raises:
- DataTableException: The data does not match the type.
- """
-
- encoder = DataTableJSONEncoder()
-
- if columns_order is None:
- columns_order = [col["id"] for col in self.__columns]
- col_dict = dict([(col["id"], col) for col in self.__columns])
-
- # We first create the table with the given name
- jscode = "var %s = new google.visualization.DataTable();\n" % name
- if self.custom_properties:
- jscode += "%s.setTableProperties(%s);\n" % (
- name, encoder.encode(self.custom_properties))
-
- # We add the columns to the table
- for i, col in enumerate(columns_order):
- jscode += "%s.addColumn(%s, %s, %s);\n" % (
- name,
- encoder.encode(col_dict[col]["type"]),
- encoder.encode(col_dict[col]["label"]),
- encoder.encode(col_dict[col]["id"]))
- if col_dict[col]["custom_properties"]:
- jscode += "%s.setColumnProperties(%d, %s);\n" % (
- name, i, encoder.encode(col_dict[col]["custom_properties"]))
- jscode += "%s.addRows(%d);\n" % (name, len(self.__data))
-
- # We now go over the data and add each row
- for (i, (row, cp)) in enumerate(self._PreparedData(order_by)):
- # We add all the elements of this row by their order
- for (j, col) in enumerate(columns_order):
- if col not in row or row[col] is None:
- continue
- value = self.CoerceValue(row[col], col_dict[col]["type"])
- if isinstance(value, tuple):
- cell_cp = ""
- if len(value) == 3:
- cell_cp = ", %s" % encoder.encode(row[col][2])
- # We have a formatted value or custom property as well
- jscode += ("%s.setCell(%d, %d, %s, %s%s);\n" %
- (name, i, j,
- self.EscapeForJSCode(encoder, value[0]),
- self.EscapeForJSCode(encoder, value[1]), cell_cp))
- else:
- jscode += "%s.setCell(%d, %d, %s);\n" % (
- name, i, j, self.EscapeForJSCode(encoder, value))
- if cp:
- jscode += "%s.setRowProperties(%d, %s);\n" % (
- name, i, encoder.encode(cp))
- return jscode
-
- def ToHtml(self, columns_order=None, order_by=()):
- """Writes the data table as an HTML table code string.
-
- Args:
- columns_order: Optional. Specifies the order of columns in the
- output table. Specify a list of all column IDs in the order
- in which you want the table created.
- Note that you must list all column IDs in this parameter,
- if you use it.
- order_by: Optional. Specifies the name of the column(s) to sort by.
- Passed as is to _PreparedData.
-
- Returns:
- An HTML table code string.
- Example result (the result is without the newlines):
- <html><body><table border="1">
- <thead><tr><th>a</th><th>b</th><th>c</th></tr></thead>
- <tbody>
- <tr><td>1</td><td>"z"</td><td>2</td></tr>
- <tr><td>"3$"</td><td>"w"</td><td></td></tr>
- </tbody>
- </table></body></html>
-
- Raises:
- DataTableException: The data does not match the type.
- """
- table_template = "<html><body><table border=\"1\">%s</table></body></html>"
- columns_template = "<thead><tr>%s</tr></thead>"
- rows_template = "<tbody>%s</tbody>"
- row_template = "<tr>%s</tr>"
- header_cell_template = "<th>%s</th>"
- cell_template = "<td>%s</td>"
-
- if columns_order is None:
- columns_order = [col["id"] for col in self.__columns]
- col_dict = dict([(col["id"], col) for col in self.__columns])
-
- columns_list = []
- for col in columns_order:
- columns_list.append(header_cell_template %
- cgi.escape(col_dict[col]["label"]))
- columns_html = columns_template % "".join(columns_list)
-
- rows_list = []
- # We now go over the data and add each row
- for row, unused_cp in self._PreparedData(order_by):
- cells_list = []
- # We add all the elements of this row by their order
- for col in columns_order:
- # For empty string we want empty quotes ("").
- value = ""
- if col in row and row[col] is not None:
- value = self.CoerceValue(row[col], col_dict[col]["type"])
- if isinstance(value, tuple):
- # We have a formatted value and we're going to use it
- cells_list.append(cell_template % cgi.escape(self.ToString(value[1])))
- else:
- cells_list.append(cell_template % cgi.escape(self.ToString(value)))
- rows_list.append(row_template % "".join(cells_list))
- rows_html = rows_template % "".join(rows_list)
-
- return table_template % (columns_html + rows_html)
-
- def ToCsv(self, columns_order=None, order_by=(), separator=","):
- """Writes the data table as a CSV string.
-
- Output is encoded in UTF-8 because the Python "csv" module can't handle
- Unicode properly according to its documentation.
-
- Args:
- columns_order: Optional. Specifies the order of columns in the
- output table. Specify a list of all column IDs in the order
- in which you want the table created.
- Note that you must list all column IDs in this parameter,
- if you use it.
- order_by: Optional. Specifies the name of the column(s) to sort by.
- Passed as is to _PreparedData.
- separator: Optional. The separator to use between the values.
-
- Returns:
- A CSV string representing the table.
- Example result:
- 'a','b','c'
- 1,'z',2
- 3,'w',''
-
- Raises:
- DataTableException: The data does not match the type.
- """
-
- csv_buffer = cStringIO.StringIO()
- writer = csv.writer(csv_buffer, delimiter=separator)
-
- if columns_order is None:
- columns_order = [col["id"] for col in self.__columns]
- col_dict = dict([(col["id"], col) for col in self.__columns])
-
- writer.writerow([col_dict[col]["label"].encode("utf-8")
- for col in columns_order])
-
- # We now go over the data and add each row
- for row, unused_cp in self._PreparedData(order_by):
- cells_list = []
- # We add all the elements of this row by their order
- for col in columns_order:
- value = ""
- if col in row and row[col] is not None:
- value = self.CoerceValue(row[col], col_dict[col]["type"])
- if isinstance(value, tuple):
- # We have a formatted value. Using it only for date/time types.
- if col_dict[col]["type"] in ["date", "datetime", "timeofday"]:
- cells_list.append(self.ToString(value[1]).encode("utf-8"))
- else:
- cells_list.append(self.ToString(value[0]).encode("utf-8"))
- else:
- cells_list.append(self.ToString(value).encode("utf-8"))
- writer.writerow(cells_list)
- return csv_buffer.getvalue()
-
- def ToTsvExcel(self, columns_order=None, order_by=()):
- """Returns a file in tab-separated-format readable by MS Excel.
-
- Returns a file in UTF-16 little endian encoding, with tabs separating the
- values.
-
- Args:
- columns_order: Delegated to ToCsv.
- order_by: Delegated to ToCsv.
-
- Returns:
- A tab-separated little endian UTF16 file representing the table.
- """
- return (self.ToCsv(columns_order, order_by, separator="\t")
- .decode("utf-8").encode("UTF-16LE"))
-
- def _ToJSonObj(self, columns_order=None, order_by=()):
- """Returns an object suitable to be converted to JSON.
-
- Args:
- columns_order: Optional. A list of all column IDs in the order in which
- you want them created in the output table. If specified,
- all column IDs must be present.
- order_by: Optional. Specifies the name of the column(s) to sort by.
- Passed as is to _PreparedData().
-
- Returns:
- A dictionary object for use by ToJSon or ToJSonResponse.
- """
- if columns_order is None:
- columns_order = [col["id"] for col in self.__columns]
- col_dict = dict([(col["id"], col) for col in self.__columns])
-
- # Creating the column JSON objects
- col_objs = []
- for col_id in columns_order:
- col_obj = {"id": col_dict[col_id]["id"],
- "label": col_dict[col_id]["label"],
- "type": col_dict[col_id]["type"]}
- if col_dict[col_id]["custom_properties"]:
- col_obj["p"] = col_dict[col_id]["custom_properties"]
- col_objs.append(col_obj)
-
- # Creating the rows jsons
- row_objs = []
- for row, cp in self._PreparedData(order_by):
- cell_objs = []
- for col in columns_order:
- value = self.CoerceValue(row.get(col, None), col_dict[col]["type"])
- if value is None:
- cell_obj = None
- elif isinstance(value, tuple):
- cell_obj = {"v": value[0]}
- if len(value) > 1 and value[1] is not None:
- cell_obj["f"] = value[1]
- if len(value) == 3:
- cell_obj["p"] = value[2]
- else:
- cell_obj = {"v": value}
- cell_objs.append(cell_obj)
- row_obj = {"c": cell_objs}
- if cp:
- row_obj["p"] = cp
- row_objs.append(row_obj)
-
- json_obj = {"cols": col_objs, "rows": row_objs}
- if self.custom_properties:
- json_obj["p"] = self.custom_properties
-
- return json_obj
-
- def ToJSon(self, columns_order=None, order_by=()):
- """Returns a string that can be used in a JS DataTable constructor.
-
- This method writes a JSON string that can be passed directly into a Google
- Visualization API DataTable constructor. Use this output if you are
- hosting the visualization HTML on your site, and want to code the data
- table in Python. Pass this string into the
- google.visualization.DataTable constructor, e.g,:
- ... on my page that hosts my visualization ...
- google.setOnLoadCallback(drawTable);
- function drawTable() {
- var data = new google.visualization.DataTable(_my_JSon_string, 0.6);
- myTable.draw(data);
- }
-
- Args:
- columns_order: Optional. Specifies the order of columns in the
- output table. Specify a list of all column IDs in the order
- in which you want the table created.
- Note that you must list all column IDs in this parameter,
- if you use it.
- order_by: Optional. Specifies the name of the column(s) to sort by.
- Passed as is to _PreparedData().
-
- Returns:
- A JSon constructor string to generate a JS DataTable with the data
- stored in the DataTable object.
- Example result (the result is without the newlines):
- {cols: [{id:"a",label:"a",type:"number"},
- {id:"b",label:"b",type:"string"},
- {id:"c",label:"c",type:"number"}],
- rows: [{c:[{v:1},{v:"z"},{v:2}]}, c:{[{v:3,f:"3$"},{v:"w"},{v:null}]}],
- p: {'foo': 'bar'}}
-
- Raises:
- DataTableException: The data does not match the type.
- """
-
- encoder = DataTableJSONEncoder()
- return encoder.encode(
- self._ToJSonObj(columns_order, order_by)).encode("utf-8")
-
- def ToJSonResponse(self, columns_order=None, order_by=(), req_id=0,
- response_handler="google.visualization.Query.setResponse"):
- """Writes a table as a JSON response that can be returned as-is to a client.
-
- This method writes a JSON response to return to a client in response to a
- Google Visualization API query. This string can be processed by the calling
- page, and is used to deliver a data table to a visualization hosted on
- a different page.
-
- Args:
- columns_order: Optional. Passed straight to self.ToJSon().
- order_by: Optional. Passed straight to self.ToJSon().
- req_id: Optional. The response id, as retrieved by the request.
- response_handler: Optional. The response handler, as retrieved by the
- request.
-
- Returns:
- A JSON response string to be received by JS the visualization Query
- object. This response would be translated into a DataTable on the
- client side.
- Example result (newlines added for readability):
- google.visualization.Query.setResponse({
- 'version':'0.6', 'reqId':'0', 'status':'OK',
- 'table': {cols: [...], rows: [...]}});
-
- Note: The URL returning this string can be used as a data source by Google
- Visualization Gadgets or from JS code.
- """
-
- response_obj = {
- "version": "0.6",
- "reqId": str(req_id),
- "table": self._ToJSonObj(columns_order, order_by),
- "status": "ok"
- }
- encoder = DataTableJSONEncoder()
- return "%s(%s);" % (response_handler,
- encoder.encode(response_obj).encode("utf-8"))
-
- def ToResponse(self, columns_order=None, order_by=(), tqx=""):
- """Writes the right response according to the request string passed in tqx.
-
- This method parses the tqx request string (format of which is defined in
- the documentation for implementing a data source of Google Visualization),
- and returns the right response according to the request.
- It parses out the "out" parameter of tqx, calls the relevant response
- (ToJSonResponse() for "json", ToCsv() for "csv", ToHtml() for "html",
- ToTsvExcel() for "tsv-excel") and passes the response function the rest of
- the relevant request keys.
-
- Args:
- columns_order: Optional. Passed as is to the relevant response function.
- order_by: Optional. Passed as is to the relevant response function.
- tqx: Optional. The request string as received by HTTP GET. Should be in
- the format "key1:value1;key2:value2...". All keys have a default
- value, so an empty string will just do the default (which is calling
- ToJSonResponse() with no extra parameters).
-
- Returns:
- A response string, as returned by the relevant response function.
-
- Raises:
- DataTableException: One of the parameters passed in tqx is not supported.
- """
- tqx_dict = {}
- if tqx:
- tqx_dict = dict(opt.split(":") for opt in tqx.split(";"))
- if tqx_dict.get("version", "0.6") != "0.6":
- raise DataTableException(
- "Version (%s) passed by request is not supported."
- % tqx_dict["version"])
-
- if tqx_dict.get("out", "json") == "json":
- response_handler = tqx_dict.get("responseHandler",
- "google.visualization.Query.setResponse")
- return self.ToJSonResponse(columns_order, order_by,
- req_id=tqx_dict.get("reqId", 0),
- response_handler=response_handler)
- elif tqx_dict["out"] == "html":
- return self.ToHtml(columns_order, order_by)
- elif tqx_dict["out"] == "csv":
- return self.ToCsv(columns_order, order_by)
- elif tqx_dict["out"] == "tsv-excel":
- return self.ToTsvExcel(columns_order, order_by)
- else:
- raise DataTableException(
- "'out' parameter: '%s' is not supported" % tqx_dict["out"])
diff --git a/third_party/aom/test/hash_test.cc b/third_party/aom/test/hash_test.cc
deleted file mode 100644
index e9f7f63c9..000000000
--- a/third_party/aom/test/hash_test.cc
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <cstdlib>
-#include <new>
-
-#include "config/aom_config.h"
-#include "config/av1_rtcd.h"
-
-#include "aom_ports/aom_timer.h"
-#include "av1/encoder/hash.h"
-#include "test/acm_random.h"
-#include "test/util.h"
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-namespace {
-
-typedef uint32_t (*get_crc32c_value_func)(void *calculator, uint8_t *p,
- int length);
-
-typedef ::testing::tuple<get_crc32c_value_func, int> HashParam;
-
-class AV1Crc32cHashTest : public ::testing::TestWithParam<HashParam> {
- public:
- ~AV1Crc32cHashTest();
- void SetUp();
-
- void TearDown();
-
- protected:
- void RunCheckOutput(get_crc32c_value_func test_impl);
- void RunSpeedTest(get_crc32c_value_func test_impl);
-
- void RunZeroTest(get_crc32c_value_func test_impl);
-
- libaom_test::ACMRandom rnd_;
- CRC32C calc_;
- uint8_t *buffer_;
- int bsize_;
- int length_;
-};
-
-AV1Crc32cHashTest::~AV1Crc32cHashTest() { ; }
-
-void AV1Crc32cHashTest::SetUp() {
- rnd_.Reset(libaom_test::ACMRandom::DeterministicSeed());
- av1_crc32c_calculator_init(&calc_);
-
- bsize_ = GET_PARAM(1);
- length_ = bsize_ * bsize_ * sizeof(uint16_t);
- buffer_ = new uint8_t[length_];
- ASSERT_TRUE(buffer_ != NULL);
- for (int i = 0; i < length_; ++i) {
- buffer_[i] = rnd_.Rand8();
- }
-}
-
-void AV1Crc32cHashTest::TearDown() { delete[] buffer_; }
-
-void AV1Crc32cHashTest::RunCheckOutput(get_crc32c_value_func test_impl) {
- get_crc32c_value_func ref_impl = av1_get_crc32c_value_c;
- // for the same buffer crc should be the same
- uint32_t crc0 = test_impl(&calc_, buffer_, length_);
- uint32_t crc1 = test_impl(&calc_, buffer_, length_);
- uint32_t crc2 = ref_impl(&calc_, buffer_, length_);
- ASSERT_EQ(crc0, crc1);
- ASSERT_EQ(crc0, crc2); // should equal to software version
- // modify buffer
- buffer_[0] += 1;
- uint32_t crc3 = test_impl(&calc_, buffer_, length_);
- uint32_t crc4 = ref_impl(&calc_, buffer_, length_);
- ASSERT_NE(crc0, crc3); // crc shoud not equal to previous one
- ASSERT_EQ(crc3, crc4);
-}
-
-void AV1Crc32cHashTest::RunSpeedTest(get_crc32c_value_func test_impl) {
- get_crc32c_value_func impls[] = { av1_get_crc32c_value_c, test_impl };
- const int repeat = 10000000 / (bsize_ + bsize_);
-
- aom_usec_timer timer;
- double time[2];
- for (int i = 0; i < 2; ++i) {
- aom_usec_timer_start(&timer);
- for (int j = 0; j < repeat; ++j) {
- impls[i](&calc_, buffer_, length_);
- }
- aom_usec_timer_mark(&timer);
- time[i] = static_cast<double>(aom_usec_timer_elapsed(&timer));
- }
- printf("hash %3dx%-3d:%7.2f/%7.2fus", bsize_, bsize_, time[0], time[1]);
- printf("(%3.2f)\n", time[0] / time[1]);
-}
-
-void AV1Crc32cHashTest::RunZeroTest(get_crc32c_value_func test_impl) {
- uint8_t buffer0[1024] = { 0 };
- // for buffer with different size the crc should not be the same
- const uint32_t crc0 = test_impl(&calc_, buffer0, 32);
- const uint32_t crc1 = test_impl(&calc_, buffer0, 128);
- const uint32_t crc2 = test_impl(&calc_, buffer0, 1024);
- ASSERT_NE(crc0, crc1);
- ASSERT_NE(crc0, crc2);
- ASSERT_NE(crc1, crc2);
-}
-
-TEST_P(AV1Crc32cHashTest, CheckOutput) { RunCheckOutput(GET_PARAM(0)); }
-
-TEST_P(AV1Crc32cHashTest, CheckZero) { RunZeroTest(GET_PARAM(0)); }
-
-TEST_P(AV1Crc32cHashTest, DISABLED_Speed) { RunSpeedTest(GET_PARAM(0)); }
-
-const int kValidBlockSize[] = { 64, 32, 8, 4 };
-
-INSTANTIATE_TEST_CASE_P(
- C, AV1Crc32cHashTest,
- ::testing::Combine(::testing::Values(&av1_get_crc32c_value_c),
- ::testing::ValuesIn(kValidBlockSize)));
-
-#if HAVE_SSE4_2
-INSTANTIATE_TEST_CASE_P(
- SSE4_2, AV1Crc32cHashTest,
- ::testing::Combine(::testing::Values(&av1_get_crc32c_value_sse4_2),
- ::testing::ValuesIn(kValidBlockSize)));
-#endif
-
-} // namespace
diff --git a/third_party/aom/test/hbd_metrics_test.cc b/third_party/aom/test/hbd_metrics_test.cc
deleted file mode 100644
index 09df9bde4..000000000
--- a/third_party/aom/test/hbd_metrics_test.cc
+++ /dev/null
@@ -1,239 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <math.h>
-#include <stdlib.h>
-#include <new>
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-#include "test/acm_random.h"
-#include "test/util.h"
-
-#include "config/aom_config.h"
-
-#include "aom_dsp/psnr.h"
-#include "aom_dsp/ssim.h"
-#include "aom_ports/mem.h"
-#include "aom_ports/msvc.h"
-#include "aom_scale/yv12config.h"
-
-using libaom_test::ACMRandom;
-
-namespace {
-
-typedef double (*LBDMetricFunc)(const YV12_BUFFER_CONFIG *source,
- const YV12_BUFFER_CONFIG *dest);
-typedef double (*HBDMetricFunc)(const YV12_BUFFER_CONFIG *source,
- const YV12_BUFFER_CONFIG *dest, uint32_t in_bd,
- uint32_t bd);
-
-double compute_hbd_psnr(const YV12_BUFFER_CONFIG *source,
- const YV12_BUFFER_CONFIG *dest, uint32_t in_bd,
- uint32_t bd) {
- PSNR_STATS psnr;
- aom_calc_highbd_psnr(source, dest, &psnr, bd, in_bd);
- return psnr.psnr[0];
-}
-
-double compute_psnr(const YV12_BUFFER_CONFIG *source,
- const YV12_BUFFER_CONFIG *dest) {
- PSNR_STATS psnr;
- aom_calc_psnr(source, dest, &psnr);
- return psnr.psnr[0];
-}
-
-double compute_hbd_psnrhvs(const YV12_BUFFER_CONFIG *source,
- const YV12_BUFFER_CONFIG *dest, uint32_t in_bd,
- uint32_t bd) {
- double tempy, tempu, tempv;
- return aom_psnrhvs(source, dest, &tempy, &tempu, &tempv, bd, in_bd);
-}
-
-double compute_psnrhvs(const YV12_BUFFER_CONFIG *source,
- const YV12_BUFFER_CONFIG *dest) {
- double tempy, tempu, tempv;
- return aom_psnrhvs(source, dest, &tempy, &tempu, &tempv, 8, 8);
-}
-
-double compute_hbd_fastssim(const YV12_BUFFER_CONFIG *source,
- const YV12_BUFFER_CONFIG *dest, uint32_t in_bd,
- uint32_t bd) {
- double tempy, tempu, tempv;
- return aom_calc_fastssim(source, dest, &tempy, &tempu, &tempv, bd, in_bd);
-}
-
-double compute_fastssim(const YV12_BUFFER_CONFIG *source,
- const YV12_BUFFER_CONFIG *dest) {
- double tempy, tempu, tempv;
- return aom_calc_fastssim(source, dest, &tempy, &tempu, &tempv, 8, 8);
-}
-
-double compute_hbd_aomssim(const YV12_BUFFER_CONFIG *source,
- const YV12_BUFFER_CONFIG *dest, uint32_t in_bd,
- uint32_t bd) {
- double ssim, weight;
- ssim = aom_highbd_calc_ssim(source, dest, &weight, bd, in_bd);
- return 100 * pow(ssim / weight, 8.0);
-}
-
-double compute_aomssim(const YV12_BUFFER_CONFIG *source,
- const YV12_BUFFER_CONFIG *dest) {
- double ssim, weight;
- ssim = aom_calc_ssim(source, dest, &weight);
- return 100 * pow(ssim / weight, 8.0);
-}
-
-class HBDMetricsTestBase {
- public:
- virtual ~HBDMetricsTestBase() {}
-
- protected:
- void RunAccuracyCheck() {
- const int width = 1920;
- const int height = 1080;
- size_t i = 0;
- const uint8_t kPixFiller = 128;
- YV12_BUFFER_CONFIG lbd_src, lbd_dst;
- YV12_BUFFER_CONFIG hbd_src, hbd_dst;
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- double lbd_db, hbd_db;
-
- memset(&lbd_src, 0, sizeof(lbd_src));
- memset(&lbd_dst, 0, sizeof(lbd_dst));
- memset(&hbd_src, 0, sizeof(hbd_src));
- memset(&hbd_dst, 0, sizeof(hbd_dst));
-
- aom_alloc_frame_buffer(&lbd_src, width, height, 1, 1, 0, 32, 16);
- aom_alloc_frame_buffer(&lbd_dst, width, height, 1, 1, 0, 32, 16);
- aom_alloc_frame_buffer(&hbd_src, width, height, 1, 1, 1, 32, 16);
- aom_alloc_frame_buffer(&hbd_dst, width, height, 1, 1, 1, 32, 16);
-
- memset(lbd_src.buffer_alloc, kPixFiller, lbd_src.buffer_alloc_sz);
- while (i < lbd_src.buffer_alloc_sz) {
- uint16_t spel, dpel;
- spel = lbd_src.buffer_alloc[i];
- // Create some distortion for dst buffer.
- dpel = rnd.Rand8();
- lbd_dst.buffer_alloc[i] = (uint8_t)dpel;
- ((uint16_t *)(hbd_src.buffer_alloc))[i] = spel << (bit_depth_ - 8);
- ((uint16_t *)(hbd_dst.buffer_alloc))[i] = dpel << (bit_depth_ - 8);
- i++;
- }
-
- lbd_db = lbd_metric_(&lbd_src, &lbd_dst);
- hbd_db = hbd_metric_(&hbd_src, &hbd_dst, input_bit_depth_, bit_depth_);
- EXPECT_LE(fabs(lbd_db - hbd_db), threshold_);
-
- i = 0;
- while (i < lbd_src.buffer_alloc_sz) {
- uint16_t dpel;
- // Create some small distortion for dst buffer.
- dpel = 120 + (rnd.Rand8() >> 4);
- lbd_dst.buffer_alloc[i] = (uint8_t)dpel;
- ((uint16_t *)(hbd_dst.buffer_alloc))[i] = dpel << (bit_depth_ - 8);
- i++;
- }
-
- lbd_db = lbd_metric_(&lbd_src, &lbd_dst);
- hbd_db = hbd_metric_(&hbd_src, &hbd_dst, input_bit_depth_, bit_depth_);
- EXPECT_LE(fabs(lbd_db - hbd_db), threshold_);
-
- i = 0;
- while (i < lbd_src.buffer_alloc_sz) {
- uint16_t dpel;
- // Create some small distortion for dst buffer.
- dpel = 126 + (rnd.Rand8() >> 6);
- lbd_dst.buffer_alloc[i] = (uint8_t)dpel;
- ((uint16_t *)(hbd_dst.buffer_alloc))[i] = dpel << (bit_depth_ - 8);
- i++;
- }
-
- lbd_db = lbd_metric_(&lbd_src, &lbd_dst);
- hbd_db = hbd_metric_(&hbd_src, &hbd_dst, input_bit_depth_, bit_depth_);
- EXPECT_LE(fabs(lbd_db - hbd_db), threshold_);
-
- aom_free_frame_buffer(&lbd_src);
- aom_free_frame_buffer(&lbd_dst);
- aom_free_frame_buffer(&hbd_src);
- aom_free_frame_buffer(&hbd_dst);
- }
-
- int input_bit_depth_;
- int bit_depth_;
- double threshold_;
- LBDMetricFunc lbd_metric_;
- HBDMetricFunc hbd_metric_;
-};
-
-typedef ::testing::tuple<LBDMetricFunc, HBDMetricFunc, int, int, double>
- MetricTestTParam;
-class HBDMetricsTest : public HBDMetricsTestBase,
- public ::testing::TestWithParam<MetricTestTParam> {
- public:
- virtual void SetUp() {
- lbd_metric_ = GET_PARAM(0);
- hbd_metric_ = GET_PARAM(1);
- input_bit_depth_ = GET_PARAM(2);
- bit_depth_ = GET_PARAM(3);
- threshold_ = GET_PARAM(4);
- }
- virtual void TearDown() {}
-};
-
-TEST_P(HBDMetricsTest, RunAccuracyCheck) { RunAccuracyCheck(); }
-
-// Allow small variation due to floating point operations.
-static const double kSsim_thresh = 0.001;
-// Allow some additional errors accumulated in floating point operations.
-static const double kFSsim_thresh = 0.03;
-// Allow some extra variation due to rounding error accumulated in dct.
-static const double kPhvs_thresh = 0.3;
-
-INSTANTIATE_TEST_CASE_P(
- AOMSSIM, HBDMetricsTest,
- ::testing::Values(MetricTestTParam(&compute_aomssim, &compute_hbd_aomssim,
- 8, 10, kSsim_thresh),
- MetricTestTParam(&compute_aomssim, &compute_hbd_aomssim,
- 10, 10, kPhvs_thresh),
- MetricTestTParam(&compute_aomssim, &compute_hbd_aomssim,
- 8, 12, kSsim_thresh),
- MetricTestTParam(&compute_aomssim, &compute_hbd_aomssim,
- 12, 12, kPhvs_thresh)));
-INSTANTIATE_TEST_CASE_P(
- FASTSSIM, HBDMetricsTest,
- ::testing::Values(MetricTestTParam(&compute_fastssim, &compute_hbd_fastssim,
- 8, 10, kFSsim_thresh),
- MetricTestTParam(&compute_fastssim, &compute_hbd_fastssim,
- 10, 10, kFSsim_thresh),
- MetricTestTParam(&compute_fastssim, &compute_hbd_fastssim,
- 8, 12, kFSsim_thresh),
- MetricTestTParam(&compute_fastssim, &compute_hbd_fastssim,
- 12, 12, kFSsim_thresh)));
-INSTANTIATE_TEST_CASE_P(
- PSNRHVS, HBDMetricsTest,
- ::testing::Values(MetricTestTParam(&compute_psnrhvs, &compute_hbd_psnrhvs,
- 8, 10, kPhvs_thresh),
- MetricTestTParam(&compute_psnrhvs, &compute_hbd_psnrhvs,
- 10, 10, kPhvs_thresh),
- MetricTestTParam(&compute_psnrhvs, &compute_hbd_psnrhvs,
- 8, 12, kPhvs_thresh),
- MetricTestTParam(&compute_psnrhvs, &compute_hbd_psnrhvs,
- 12, 12, kPhvs_thresh)));
-INSTANTIATE_TEST_CASE_P(
- PSNR, HBDMetricsTest,
- ::testing::Values(
- MetricTestTParam(&compute_psnr, &compute_hbd_psnr, 8, 10, kPhvs_thresh),
- MetricTestTParam(&compute_psnr, &compute_hbd_psnr, 10, 10,
- kPhvs_thresh),
- MetricTestTParam(&compute_psnr, &compute_hbd_psnr, 8, 12, kPhvs_thresh),
- MetricTestTParam(&compute_psnr, &compute_hbd_psnr, 12, 12,
- kPhvs_thresh)));
-} // namespace
diff --git a/third_party/aom/test/hiprec_convolve_test.cc b/third_party/aom/test/hiprec_convolve_test.cc
deleted file mode 100644
index f94a0730c..000000000
--- a/third_party/aom/test/hiprec_convolve_test.cc
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-#include "test/hiprec_convolve_test_util.h"
-
-using ::testing::make_tuple;
-using ::testing::tuple;
-using libaom_test::ACMRandom;
-using libaom_test::AV1HighbdHiprecConvolve::AV1HighbdHiprecConvolveTest;
-using libaom_test::AV1HiprecConvolve::AV1HiprecConvolveTest;
-
-namespace {
-
-TEST_P(AV1HiprecConvolveTest, CheckOutput) { RunCheckOutput(GET_PARAM(3)); }
-TEST_P(AV1HiprecConvolveTest, DISABLED_SpeedTest) {
- RunSpeedTest(GET_PARAM(3));
-}
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(SSE2, AV1HiprecConvolveTest,
- libaom_test::AV1HiprecConvolve::BuildParams(
- av1_wiener_convolve_add_src_sse2));
-#endif
-#if HAVE_AVX2
-INSTANTIATE_TEST_CASE_P(AVX2, AV1HiprecConvolveTest,
- libaom_test::AV1HiprecConvolve::BuildParams(
- av1_wiener_convolve_add_src_avx2));
-#endif
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(NEON, AV1HiprecConvolveTest,
- libaom_test::AV1HiprecConvolve::BuildParams(
- av1_wiener_convolve_add_src_neon));
-#endif
-
-#if HAVE_SSSE3 || HAVE_AVX2
-TEST_P(AV1HighbdHiprecConvolveTest, CheckOutput) {
- RunCheckOutput(GET_PARAM(4));
-}
-TEST_P(AV1HighbdHiprecConvolveTest, DISABLED_SpeedTest) {
- RunSpeedTest(GET_PARAM(4));
-}
-#if HAVE_SSSE3
-INSTANTIATE_TEST_CASE_P(SSSE3, AV1HighbdHiprecConvolveTest,
- libaom_test::AV1HighbdHiprecConvolve::BuildParams(
- av1_highbd_wiener_convolve_add_src_ssse3));
-#endif
-#if HAVE_AVX2
-INSTANTIATE_TEST_CASE_P(AVX2, AV1HighbdHiprecConvolveTest,
- libaom_test::AV1HighbdHiprecConvolve::BuildParams(
- av1_highbd_wiener_convolve_add_src_avx2));
-#endif
-#endif
-
-} // namespace
diff --git a/third_party/aom/test/hiprec_convolve_test_util.cc b/third_party/aom/test/hiprec_convolve_test_util.cc
deleted file mode 100644
index 2672bcec3..000000000
--- a/third_party/aom/test/hiprec_convolve_test_util.cc
+++ /dev/null
@@ -1,331 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "test/hiprec_convolve_test_util.h"
-
-#include "av1/common/restoration.h"
-
-using ::testing::make_tuple;
-using ::testing::tuple;
-
-namespace libaom_test {
-
-// Generate a random pair of filter kernels, using the ranges
-// of possible values from the loop-restoration experiment
-static void generate_kernels(ACMRandom *rnd, InterpKernel hkernel,
- InterpKernel vkernel) {
- hkernel[0] = hkernel[6] =
- WIENER_FILT_TAP0_MINV +
- rnd->PseudoUniform(WIENER_FILT_TAP0_MAXV + 1 - WIENER_FILT_TAP0_MINV);
- hkernel[1] = hkernel[5] =
- WIENER_FILT_TAP1_MINV +
- rnd->PseudoUniform(WIENER_FILT_TAP1_MAXV + 1 - WIENER_FILT_TAP1_MINV);
- hkernel[2] = hkernel[4] =
- WIENER_FILT_TAP2_MINV +
- rnd->PseudoUniform(WIENER_FILT_TAP2_MAXV + 1 - WIENER_FILT_TAP2_MINV);
- hkernel[3] = -(hkernel[0] + hkernel[1] + hkernel[2]);
- hkernel[7] = 0;
-
- vkernel[0] = vkernel[6] =
- WIENER_FILT_TAP0_MINV +
- rnd->PseudoUniform(WIENER_FILT_TAP0_MAXV + 1 - WIENER_FILT_TAP0_MINV);
- vkernel[1] = vkernel[5] =
- WIENER_FILT_TAP1_MINV +
- rnd->PseudoUniform(WIENER_FILT_TAP1_MAXV + 1 - WIENER_FILT_TAP1_MINV);
- vkernel[2] = vkernel[4] =
- WIENER_FILT_TAP2_MINV +
- rnd->PseudoUniform(WIENER_FILT_TAP2_MAXV + 1 - WIENER_FILT_TAP2_MINV);
- vkernel[3] = -(vkernel[0] + vkernel[1] + vkernel[2]);
- vkernel[7] = 0;
-}
-
-namespace AV1HiprecConvolve {
-
-::testing::internal::ParamGenerator<HiprecConvolveParam> BuildParams(
- hiprec_convolve_func filter) {
- const HiprecConvolveParam params[] = {
- make_tuple(8, 8, 50000, filter), make_tuple(8, 4, 50000, filter),
- make_tuple(64, 24, 1000, filter), make_tuple(64, 64, 1000, filter),
- make_tuple(64, 56, 1000, filter), make_tuple(32, 8, 10000, filter),
- make_tuple(32, 28, 10000, filter), make_tuple(32, 32, 10000, filter),
- make_tuple(16, 34, 10000, filter), make_tuple(32, 34, 10000, filter),
- make_tuple(64, 34, 1000, filter), make_tuple(8, 17, 10000, filter),
- make_tuple(16, 17, 10000, filter), make_tuple(32, 17, 10000, filter)
- };
- return ::testing::ValuesIn(params);
-}
-
-AV1HiprecConvolveTest::~AV1HiprecConvolveTest() {}
-void AV1HiprecConvolveTest::SetUp() {
- rnd_.Reset(ACMRandom::DeterministicSeed());
-}
-
-void AV1HiprecConvolveTest::TearDown() { libaom_test::ClearSystemState(); }
-
-void AV1HiprecConvolveTest::RunCheckOutput(hiprec_convolve_func test_impl) {
- const int w = 128, h = 128;
- const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
- const int num_iters = GET_PARAM(2);
- int i, j;
- const ConvolveParams conv_params = get_conv_params_wiener(8);
-
- uint8_t *input_ = new uint8_t[h * w];
- uint8_t *input = input_;
-
- // The AVX2 convolve functions always write rows with widths that are
- // multiples of 16. So to avoid a buffer overflow, we may need to pad
- // rows to a multiple of 16.
- int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
- uint8_t *output = new uint8_t[output_n];
- uint8_t *output2 = new uint8_t[output_n];
-
- // Generate random filter kernels
- DECLARE_ALIGNED(16, InterpKernel, hkernel);
- DECLARE_ALIGNED(16, InterpKernel, vkernel);
-
- generate_kernels(&rnd_, hkernel, vkernel);
-
- for (i = 0; i < h; ++i)
- for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand8();
-
- for (i = 0; i < num_iters; ++i) {
- // Choose random locations within the source block
- int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
- int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
- av1_wiener_convolve_add_src_c(input + offset_r * w + offset_c, w, output,
- out_w, hkernel, 16, vkernel, 16, out_w, out_h,
- &conv_params);
- test_impl(input + offset_r * w + offset_c, w, output2, out_w, hkernel, 16,
- vkernel, 16, out_w, out_h, &conv_params);
-
- for (j = 0; j < out_w * out_h; ++j)
- ASSERT_EQ(output[j], output2[j])
- << "Pixel mismatch at index " << j << " = (" << (j % out_w) << ", "
- << (j / out_w) << ") on iteration " << i;
- }
- delete[] input_;
- delete[] output;
- delete[] output2;
-}
-
-void AV1HiprecConvolveTest::RunSpeedTest(hiprec_convolve_func test_impl) {
- const int w = 128, h = 128;
- const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
- const int num_iters = GET_PARAM(2) / 500;
- int i, j, k;
- const ConvolveParams conv_params = get_conv_params_wiener(8);
-
- uint8_t *input_ = new uint8_t[h * w];
- uint8_t *input = input_;
-
- // The AVX2 convolve functions always write rows with widths that are
- // multiples of 16. So to avoid a buffer overflow, we may need to pad
- // rows to a multiple of 16.
- int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
- uint8_t *output = new uint8_t[output_n];
- uint8_t *output2 = new uint8_t[output_n];
-
- // Generate random filter kernels
- DECLARE_ALIGNED(16, InterpKernel, hkernel);
- DECLARE_ALIGNED(16, InterpKernel, vkernel);
-
- generate_kernels(&rnd_, hkernel, vkernel);
-
- for (i = 0; i < h; ++i)
- for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand8();
-
- aom_usec_timer ref_timer;
- aom_usec_timer_start(&ref_timer);
- for (i = 0; i < num_iters; ++i) {
- for (j = 3; j < h - out_h - 4; j++) {
- for (k = 3; k < w - out_w - 4; k++) {
- av1_wiener_convolve_add_src_c(input + j * w + k, w, output, out_w,
- hkernel, 16, vkernel, 16, out_w, out_h,
- &conv_params);
- }
- }
- }
- aom_usec_timer_mark(&ref_timer);
- const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);
-
- aom_usec_timer tst_timer;
- aom_usec_timer_start(&tst_timer);
- for (i = 0; i < num_iters; ++i) {
- for (j = 3; j < h - out_h - 4; j++) {
- for (k = 3; k < w - out_w - 4; k++) {
- test_impl(input + j * w + k, w, output2, out_w, hkernel, 16, vkernel,
- 16, out_w, out_h, &conv_params);
- }
- }
- }
- aom_usec_timer_mark(&tst_timer);
- const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);
-
- std::cout << "[ ] C time = " << ref_time / 1000
- << " ms, SIMD time = " << tst_time / 1000 << " ms\n";
-
- EXPECT_GT(ref_time, tst_time)
- << "Error: AV1HiprecConvolveTest.SpeedTest, SIMD slower than C.\n"
- << "C time: " << ref_time << " us\n"
- << "SIMD time: " << tst_time << " us\n";
-
- delete[] input_;
- delete[] output;
- delete[] output2;
-}
-} // namespace AV1HiprecConvolve
-
-namespace AV1HighbdHiprecConvolve {
-
-::testing::internal::ParamGenerator<HighbdHiprecConvolveParam> BuildParams(
- highbd_hiprec_convolve_func filter) {
- const HighbdHiprecConvolveParam params[] = {
- make_tuple(8, 8, 50000, 8, filter), make_tuple(64, 64, 1000, 8, filter),
- make_tuple(32, 8, 10000, 8, filter), make_tuple(8, 8, 50000, 10, filter),
- make_tuple(64, 64, 1000, 10, filter), make_tuple(32, 8, 10000, 10, filter),
- make_tuple(8, 8, 50000, 12, filter), make_tuple(64, 64, 1000, 12, filter),
- make_tuple(32, 8, 10000, 12, filter),
- };
- return ::testing::ValuesIn(params);
-}
-
-AV1HighbdHiprecConvolveTest::~AV1HighbdHiprecConvolveTest() {}
-void AV1HighbdHiprecConvolveTest::SetUp() {
- rnd_.Reset(ACMRandom::DeterministicSeed());
-}
-
-void AV1HighbdHiprecConvolveTest::TearDown() {
- libaom_test::ClearSystemState();
-}
-
-void AV1HighbdHiprecConvolveTest::RunCheckOutput(
- highbd_hiprec_convolve_func test_impl) {
- const int w = 128, h = 128;
- const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
- const int num_iters = GET_PARAM(2);
- const int bd = GET_PARAM(3);
- int i, j;
- const ConvolveParams conv_params = get_conv_params_wiener(bd);
-
- uint16_t *input = new uint16_t[h * w];
-
- // The AVX2 convolve functions always write rows with widths that are
- // multiples of 16. So to avoid a buffer overflow, we may need to pad
- // rows to a multiple of 16.
- int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
- uint16_t *output = new uint16_t[output_n];
- uint16_t *output2 = new uint16_t[output_n];
-
- // Generate random filter kernels
- DECLARE_ALIGNED(16, InterpKernel, hkernel);
- DECLARE_ALIGNED(16, InterpKernel, vkernel);
-
- generate_kernels(&rnd_, hkernel, vkernel);
-
- for (i = 0; i < h; ++i)
- for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
-
- uint8_t *input_ptr = CONVERT_TO_BYTEPTR(input);
- uint8_t *output_ptr = CONVERT_TO_BYTEPTR(output);
- uint8_t *output2_ptr = CONVERT_TO_BYTEPTR(output2);
-
- for (i = 0; i < num_iters; ++i) {
- // Choose random locations within the source block
- int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
- int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
- av1_highbd_wiener_convolve_add_src_c(
- input_ptr + offset_r * w + offset_c, w, output_ptr, out_w, hkernel, 16,
- vkernel, 16, out_w, out_h, &conv_params, bd);
- test_impl(input_ptr + offset_r * w + offset_c, w, output2_ptr, out_w,
- hkernel, 16, vkernel, 16, out_w, out_h, &conv_params, bd);
-
- for (j = 0; j < out_w * out_h; ++j)
- ASSERT_EQ(output[j], output2[j])
- << "Pixel mismatch at index " << j << " = (" << (j % out_w) << ", "
- << (j / out_w) << ") on iteration " << i;
- }
- delete[] input;
- delete[] output;
- delete[] output2;
-}
-
-void AV1HighbdHiprecConvolveTest::RunSpeedTest(
- highbd_hiprec_convolve_func test_impl) {
- const int w = 128, h = 128;
- const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
- const int num_iters = GET_PARAM(2) / 500;
- const int bd = GET_PARAM(3);
- int i, j, k;
- const ConvolveParams conv_params = get_conv_params_wiener(bd);
-
- uint16_t *input = new uint16_t[h * w];
-
- // The AVX2 convolve functions always write rows with widths that are
- // multiples of 16. So to avoid a buffer overflow, we may need to pad
- // rows to a multiple of 16.
- int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
- uint16_t *output = new uint16_t[output_n];
- uint16_t *output2 = new uint16_t[output_n];
-
- // Generate random filter kernels
- DECLARE_ALIGNED(16, InterpKernel, hkernel);
- DECLARE_ALIGNED(16, InterpKernel, vkernel);
-
- generate_kernels(&rnd_, hkernel, vkernel);
-
- for (i = 0; i < h; ++i)
- for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
-
- uint8_t *input_ptr = CONVERT_TO_BYTEPTR(input);
- uint8_t *output_ptr = CONVERT_TO_BYTEPTR(output);
- uint8_t *output2_ptr = CONVERT_TO_BYTEPTR(output2);
-
- aom_usec_timer ref_timer;
- aom_usec_timer_start(&ref_timer);
- for (i = 0; i < num_iters; ++i) {
- for (j = 3; j < h - out_h - 4; j++) {
- for (k = 3; k < w - out_w - 4; k++) {
- av1_highbd_wiener_convolve_add_src_c(
- input_ptr + j * w + k, w, output_ptr, out_w, hkernel, 16, vkernel,
- 16, out_w, out_h, &conv_params, bd);
- }
- }
- }
- aom_usec_timer_mark(&ref_timer);
- const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);
-
- aom_usec_timer tst_timer;
- aom_usec_timer_start(&tst_timer);
- for (i = 0; i < num_iters; ++i) {
- for (j = 3; j < h - out_h - 4; j++) {
- for (k = 3; k < w - out_w - 4; k++) {
- test_impl(input_ptr + j * w + k, w, output2_ptr, out_w, hkernel, 16,
- vkernel, 16, out_w, out_h, &conv_params, bd);
- }
- }
- }
- aom_usec_timer_mark(&tst_timer);
- const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);
-
- std::cout << "[ ] C time = " << ref_time / 1000
- << " ms, SIMD time = " << tst_time / 1000 << " ms\n";
-
- EXPECT_GT(ref_time, tst_time)
- << "Error: AV1HighbdHiprecConvolveTest.SpeedTest, SIMD slower than C.\n"
- << "C time: " << ref_time << " us\n"
- << "SIMD time: " << tst_time << " us\n";
-
- delete[] input;
- delete[] output;
- delete[] output2;
-}
-} // namespace AV1HighbdHiprecConvolve
-} // namespace libaom_test
diff --git a/third_party/aom/test/hiprec_convolve_test_util.h b/third_party/aom/test/hiprec_convolve_test_util.h
deleted file mode 100644
index 2abe24b57..000000000
--- a/third_party/aom/test/hiprec_convolve_test_util.h
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_TEST_HIPREC_CONVOLVE_TEST_UTIL_H_
-#define AOM_TEST_HIPREC_CONVOLVE_TEST_UTIL_H_
-
-#include "config/av1_rtcd.h"
-
-#include "test/acm_random.h"
-#include "test/util.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "aom_ports/aom_timer.h"
-#include "av1/common/convolve.h"
-#include "av1/common/mv.h"
-
-namespace libaom_test {
-
-namespace AV1HiprecConvolve {
-
-typedef void (*hiprec_convolve_func)(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h,
- const ConvolveParams *conv_params);
-
-typedef ::testing::tuple<int, int, int, hiprec_convolve_func>
- HiprecConvolveParam;
-
-::testing::internal::ParamGenerator<HiprecConvolveParam> BuildParams(
- hiprec_convolve_func filter);
-
-class AV1HiprecConvolveTest
- : public ::testing::TestWithParam<HiprecConvolveParam> {
- public:
- virtual ~AV1HiprecConvolveTest();
- virtual void SetUp();
-
- virtual void TearDown();
-
- protected:
- void RunCheckOutput(hiprec_convolve_func test_impl);
- void RunSpeedTest(hiprec_convolve_func test_impl);
-
- libaom_test::ACMRandom rnd_;
-};
-
-} // namespace AV1HiprecConvolve
-
-namespace AV1HighbdHiprecConvolve {
-typedef void (*highbd_hiprec_convolve_func)(
- const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
- ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4, int w, int h,
- const ConvolveParams *conv_params, int bps);
-
-typedef ::testing::tuple<int, int, int, int, highbd_hiprec_convolve_func>
- HighbdHiprecConvolveParam;
-
-::testing::internal::ParamGenerator<HighbdHiprecConvolveParam> BuildParams(
- highbd_hiprec_convolve_func filter);
-
-class AV1HighbdHiprecConvolveTest
- : public ::testing::TestWithParam<HighbdHiprecConvolveParam> {
- public:
- virtual ~AV1HighbdHiprecConvolveTest();
- virtual void SetUp();
-
- virtual void TearDown();
-
- protected:
- void RunCheckOutput(highbd_hiprec_convolve_func test_impl);
- void RunSpeedTest(highbd_hiprec_convolve_func test_impl);
-
- libaom_test::ACMRandom rnd_;
-};
-
-} // namespace AV1HighbdHiprecConvolve
-
-} // namespace libaom_test
-
-#endif // AOM_TEST_HIPREC_CONVOLVE_TEST_UTIL_H_
diff --git a/third_party/aom/test/horz_superres_test.cc b/third_party/aom/test/horz_superres_test.cc
deleted file mode 100644
index 973f55b66..000000000
--- a/third_party/aom/test/horz_superres_test.cc
+++ /dev/null
@@ -1,322 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "av1/encoder/encoder.h"
-
-#include "test/codec_factory.h"
-#include "test/encode_test_driver.h"
-#include "test/util.h"
-#include "test/y4m_video_source.h"
-#include "test/yuv_video_source.h"
-
-namespace {
-
-using ::testing::make_tuple;
-using ::testing::tuple;
-
-/* TESTING PARAMETERS */
-
-#define NUM_TEST_VIDEOS 3
-
-const int kBitrate = 40;
-
-// PSNR thresholds found by experiment
-const double kPSNRThresholds[] = { 26.0, 28.0, 20.0 };
-
-typedef struct {
- const char *filename;
- aom_img_fmt fmt;
- aom_bit_depth_t bit_depth;
- unsigned int profile;
- unsigned int limit;
- unsigned int screen_content;
-} TestVideoParam;
-
-const TestVideoParam kTestVideoVectors[] = {
- { "park_joy_90p_8_420.y4m", AOM_IMG_FMT_I420, AOM_BITS_8, 0, 5, 0 },
- { "park_joy_90p_10_444.y4m", AOM_IMG_FMT_I44416, AOM_BITS_10, 1, 5, 0 },
- { "screendata.y4m", AOM_IMG_FMT_I420, AOM_BITS_8, 0, 4, 1 },
-};
-
-// Superres modes tested
-// SUPERRES_QTHRESH is not included, as it has its own test
-const SUPERRES_MODE kSuperresModesNotQThresh[] = { SUPERRES_FIXED,
- SUPERRES_RANDOM };
-
-// Superres denominators and superres kf denominators to be tested
-typedef tuple<int, int> SuperresDenominatorPair;
-const SuperresDenominatorPair kSuperresDenominators[] = {
- make_tuple(16, 9), make_tuple(13, 11), make_tuple(9, 9),
- make_tuple(13, 13), make_tuple(11, 16), make_tuple(8, 16),
- make_tuple(16, 8), make_tuple(8, 8), make_tuple(9, 14),
-};
-
-// Superres q thresholds and superres kf q thresholds to be tested
-typedef tuple<int, int> SuperresQThresholdPair;
-const SuperresQThresholdPair kSuperresQThresholds[] = {
- make_tuple(63, 63), make_tuple(63, 41), make_tuple(17, 63),
- make_tuple(41, 11), make_tuple(1, 37), make_tuple(11, 11),
- make_tuple(1, 1), make_tuple(17, 29), make_tuple(29, 11),
-};
-
-/* END (TESTING PARAMETERS) */
-
-// Test parameter list:
-// <[needed for EncoderTest], test_video_idx_, superres_mode_,
-// tuple(superres_denom_, superres_kf_denom_)>
-typedef tuple<const libaom_test::CodecFactory *, int, SUPERRES_MODE,
- SuperresDenominatorPair>
- HorzSuperresTestParam;
-
-class HorzSuperresEndToEndTest
- : public ::testing::TestWithParam<HorzSuperresTestParam>,
- public ::libaom_test::EncoderTest {
- protected:
- HorzSuperresEndToEndTest()
- : EncoderTest(GET_PARAM(0)), test_video_idx_(GET_PARAM(1)),
- superres_mode_(GET_PARAM(2)), psnr_(0.0), frame_count_(0) {
- test_video_param_ = kTestVideoVectors[test_video_idx_];
-
- SuperresDenominatorPair denoms = GET_PARAM(3);
- superres_denom_ = ::testing::get<0>(denoms);
- superres_kf_denom_ = ::testing::get<1>(denoms);
- }
-
- virtual ~HorzSuperresEndToEndTest() {}
-
- virtual void SetUp() {
- InitializeConfig();
- SetMode(::libaom_test::kTwoPassGood);
- cfg_.g_lag_in_frames = 5;
- cfg_.rc_end_usage = AOM_VBR;
- cfg_.rc_target_bitrate = kBitrate;
- cfg_.g_error_resilient = 0;
- cfg_.g_profile = test_video_param_.profile;
- cfg_.g_input_bit_depth = (unsigned int)test_video_param_.bit_depth;
- cfg_.g_bit_depth = test_video_param_.bit_depth;
- init_flags_ = AOM_CODEC_USE_PSNR;
- if (cfg_.g_bit_depth > 8) init_flags_ |= AOM_CODEC_USE_HIGHBITDEPTH;
-
- // Set superres parameters
- cfg_.rc_superres_mode = superres_mode_;
- cfg_.rc_superres_denominator = superres_denom_;
- cfg_.rc_superres_kf_denominator = superres_kf_denom_;
- }
-
- virtual void BeginPassHook(unsigned int) {
- psnr_ = 0.0;
- frame_count_ = 0;
- }
-
- virtual void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) {
- psnr_ += pkt->data.psnr.psnr[0];
- frame_count_++;
- }
-
- virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
- ::libaom_test::Encoder *encoder) {
- if (video->frame() == 1) {
- encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1);
- encoder->Control(AV1E_SET_TILE_COLUMNS, 4);
-
- // Set cpu-used = 8 for speed
- encoder->Control(AOME_SET_CPUUSED, 8);
-
- // Test screen coding tools
- if (test_video_param_.screen_content)
- encoder->Control(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_SCREEN);
- else
- encoder->Control(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_DEFAULT);
-
- encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
- encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7);
- encoder->Control(AOME_SET_ARNR_STRENGTH, 5);
- }
- }
-
- double GetAveragePsnr() const {
- if (frame_count_) return psnr_ / frame_count_;
- return 0.0;
- }
-
- double GetPsnrThreshold() { return kPSNRThresholds[test_video_idx_]; }
-
- void DoTest() {
- testing::internal::scoped_ptr<libaom_test::VideoSource> video;
- video.reset(new libaom_test::Y4mVideoSource(test_video_param_.filename, 0,
- test_video_param_.limit));
- ASSERT_TRUE(video.get() != NULL);
-
- ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
- const double psnr = GetAveragePsnr();
- EXPECT_GT(psnr, GetPsnrThreshold())
- << "superres_mode_ = " << superres_mode_
- << ", superres_denom_ = " << superres_denom_
- << ", superres_kf_denom_ = " << superres_kf_denom_;
-
- EXPECT_EQ(test_video_param_.limit, frame_count_)
- << "superres_mode_ = " << superres_mode_
- << ", superres_denom_ = " << superres_denom_
- << ", superres_kf_denom_ = " << superres_kf_denom_;
- }
-
- int test_video_idx_;
- TestVideoParam test_video_param_;
- SUPERRES_MODE superres_mode_;
- int superres_denom_;
- int superres_kf_denom_;
-
- private:
- double psnr_;
- unsigned int frame_count_;
-};
-
-TEST_P(HorzSuperresEndToEndTest, HorzSuperresEndToEndPSNRTest) { DoTest(); }
-
-AV1_INSTANTIATE_TEST_CASE(HorzSuperresEndToEndTest,
- ::testing::Range(0, NUM_TEST_VIDEOS),
- ::testing::ValuesIn(kSuperresModesNotQThresh),
- ::testing::ValuesIn(kSuperresDenominators));
-
-// Test parameter list:
-// <[needed for EncoderTest], test_video_idx_, tuple(superres_denom_,
-// superres_kf_denom_), tuple(superres_qthresh_,superres_kf_qthresh_)>
-typedef tuple<const libaom_test::CodecFactory *, int, SuperresDenominatorPair,
- SuperresQThresholdPair>
- HorzSuperresQThreshTestParam;
-
-class HorzSuperresQThreshEndToEndTest
- : public ::testing::TestWithParam<HorzSuperresQThreshTestParam>,
- public ::libaom_test::EncoderTest {
- protected:
- HorzSuperresQThreshEndToEndTest()
- : EncoderTest(GET_PARAM(0)), test_video_idx_(GET_PARAM(1)),
- superres_mode_(SUPERRES_QTHRESH), psnr_(0.0), frame_count_(0) {
- test_video_param_ = kTestVideoVectors[test_video_idx_];
-
- SuperresDenominatorPair denoms = GET_PARAM(2);
- superres_denom_ = ::testing::get<0>(denoms);
- superres_kf_denom_ = ::testing::get<1>(denoms);
-
- SuperresQThresholdPair qthresholds = GET_PARAM(3);
- superres_qthresh_ = ::testing::get<0>(qthresholds);
- superres_kf_qthresh_ = ::testing::get<1>(qthresholds);
- }
-
- virtual ~HorzSuperresQThreshEndToEndTest() {}
-
- virtual void SetUp() {
- InitializeConfig();
- SetMode(::libaom_test::kTwoPassGood);
- cfg_.g_lag_in_frames = 5;
- cfg_.rc_end_usage = AOM_VBR;
- cfg_.rc_target_bitrate = kBitrate;
- cfg_.g_error_resilient = 0;
- cfg_.g_profile = test_video_param_.profile;
- cfg_.g_input_bit_depth = (unsigned int)test_video_param_.bit_depth;
- cfg_.g_bit_depth = test_video_param_.bit_depth;
- init_flags_ = AOM_CODEC_USE_PSNR;
- if (cfg_.g_bit_depth > 8) init_flags_ |= AOM_CODEC_USE_HIGHBITDEPTH;
-
- // Set superres parameters
- cfg_.rc_superres_mode = superres_mode_;
- cfg_.rc_superres_denominator = superres_denom_;
- cfg_.rc_superres_kf_denominator = superres_kf_denom_;
- cfg_.rc_superres_qthresh = superres_qthresh_;
- cfg_.rc_superres_kf_qthresh = superres_kf_qthresh_;
- }
-
- virtual void BeginPassHook(unsigned int) {
- psnr_ = 0.0;
- frame_count_ = 0;
- }
-
- virtual void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) {
- psnr_ += pkt->data.psnr.psnr[0];
- frame_count_++;
- }
-
- virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
- ::libaom_test::Encoder *encoder) {
- if (video->frame() == 1) {
- encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1);
- encoder->Control(AV1E_SET_TILE_COLUMNS, 0);
-
- // Set cpu-used = 8 for speed
- encoder->Control(AOME_SET_CPUUSED, 8);
-
- // Test screen coding tools
- if (test_video_param_.screen_content)
- encoder->Control(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_SCREEN);
- else
- encoder->Control(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_DEFAULT);
-
- encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
- encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7);
- encoder->Control(AOME_SET_ARNR_STRENGTH, 5);
- }
- }
-
- double GetAveragePsnr() const {
- if (frame_count_) return psnr_ / frame_count_;
- return 0.0;
- }
-
- double GetPsnrThreshold() { return kPSNRThresholds[test_video_idx_]; }
-
- void DoTest() {
- testing::internal::scoped_ptr<libaom_test::VideoSource> video;
- video.reset(new libaom_test::Y4mVideoSource(test_video_param_.filename, 0,
- test_video_param_.limit));
- ASSERT_TRUE(video.get() != NULL);
-
- ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
- const double psnr = GetAveragePsnr();
- EXPECT_GT(psnr, GetPsnrThreshold())
- << "superres_mode_ = " << superres_mode_
- << ", superres_denom_ = " << superres_denom_
- << ", superres_kf_denom_ = " << superres_kf_denom_
- << ", superres_qthresh_ = " << superres_qthresh_
- << ", superres_kf_qthresh_ = " << superres_kf_qthresh_;
-
- EXPECT_EQ(test_video_param_.limit, frame_count_)
- << "superres_mode_ = " << superres_mode_
- << ", superres_denom_ = " << superres_denom_
- << ", superres_kf_denom_ = " << superres_kf_denom_
- << ", superres_qthresh_ = " << superres_qthresh_
- << ", superres_kf_qthresh_ = " << superres_kf_qthresh_;
- }
-
- int test_video_idx_;
- TestVideoParam test_video_param_;
- SUPERRES_MODE superres_mode_;
- int superres_denom_;
- int superres_kf_denom_;
- int superres_qthresh_;
- int superres_kf_qthresh_;
-
- private:
- double psnr_;
- unsigned int frame_count_;
-};
-
-TEST_P(HorzSuperresQThreshEndToEndTest, HorzSuperresQThreshEndToEndPSNRTest) {
- DoTest();
-}
-
-AV1_INSTANTIATE_TEST_CASE(HorzSuperresQThreshEndToEndTest,
- ::testing::Range(0, NUM_TEST_VIDEOS),
- ::testing::ValuesIn(kSuperresDenominators),
- ::testing::ValuesIn(kSuperresQThresholds));
-
-} // namespace
diff --git a/third_party/aom/test/i420_video_source.h b/third_party/aom/test/i420_video_source.h
deleted file mode 100644
index 233e7152b..000000000
--- a/third_party/aom/test/i420_video_source.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#ifndef AOM_TEST_I420_VIDEO_SOURCE_H_
-#define AOM_TEST_I420_VIDEO_SOURCE_H_
-#include <cstdio>
-#include <cstdlib>
-#include <string>
-
-#include "test/yuv_video_source.h"
-
-namespace libaom_test {
-
-// This class extends VideoSource to allow parsing of raw yv12
-// so that we can do actual file encodes.
-class I420VideoSource : public YUVVideoSource {
- public:
- I420VideoSource(const std::string &file_name, unsigned int width,
- unsigned int height, int rate_numerator, int rate_denominator,
- unsigned int start, int limit)
- : YUVVideoSource(file_name, AOM_IMG_FMT_I420, width, height,
- rate_numerator, rate_denominator, start, limit) {}
-};
-
-} // namespace libaom_test
-
-#endif // AOM_TEST_I420_VIDEO_SOURCE_H_
diff --git a/third_party/aom/test/intra_edge_test.cc b/third_party/aom/test/intra_edge_test.cc
deleted file mode 100644
index ce61402ac..000000000
--- a/third_party/aom/test/intra_edge_test.cc
+++ /dev/null
@@ -1,337 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <math.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-#include "test/register_state_check.h"
-#include "test/function_equivalence_test.h"
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-#include "config/av1_rtcd.h"
-
-#include "aom/aom_integer.h"
-#include "av1/common/enums.h"
-
-using libaom_test::FunctionEquivalenceTest;
-
-namespace {
-
-template <typename F, typename T>
-class UpsampleTest : public FunctionEquivalenceTest<F> {
- protected:
- static const int kIterations = 1000000;
- static const int kMinEdge = 4;
- static const int kMaxEdge = 24;
- static const int kBufSize = 2 * 64 + 32;
- static const int kOffset = 16;
-
- virtual ~UpsampleTest() {}
-
- virtual void Execute(T *edge_tst) = 0;
-
- void Common() {
- edge_ref_ = &edge_ref_data_[kOffset];
- edge_tst_ = &edge_tst_data_[kOffset];
-
- Execute(edge_tst_);
-
- const int max_idx = (size_ - 1) * 2;
- for (int r = -2; r <= max_idx; ++r) {
- ASSERT_EQ(edge_ref_[r], edge_tst_[r]);
- }
- }
-
- T edge_ref_data_[kBufSize];
- T edge_tst_data_[kBufSize];
-
- T *edge_ref_;
- T *edge_tst_;
-
- int size_;
-};
-
-//////////////////////////////////////////////////////////////////////////////
-// 8 bit version
-//////////////////////////////////////////////////////////////////////////////
-
-typedef void (*UP8B)(uint8_t *p, int size);
-typedef libaom_test::FuncParam<UP8B> TestFuncs;
-
-class UpsampleTest8B : public UpsampleTest<UP8B, uint8_t> {
- protected:
- void Execute(uint8_t *edge_tst) {
- params_.ref_func(edge_ref_, size_);
- ASM_REGISTER_STATE_CHECK(params_.tst_func(edge_tst, size_));
- }
-};
-
-TEST_P(UpsampleTest8B, RandomValues) {
- for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
- size_ = 4 * (this->rng_(4) + 1);
-
- int i, pix = 0;
- for (i = 0; i < kOffset + size_; ++i) {
- pix = rng_.Rand8();
- edge_ref_data_[i] = pix;
- edge_tst_data_[i] = edge_ref_data_[i];
- }
-
- // Extend final sample
- while (i < kBufSize) {
- edge_ref_data_[i] = pix;
- edge_tst_data_[i] = pix;
- i++;
- }
-
- Common();
- }
-}
-
-#if HAVE_SSE4_1
-INSTANTIATE_TEST_CASE_P(
- SSE4_1, UpsampleTest8B,
- ::testing::Values(TestFuncs(av1_upsample_intra_edge_c,
- av1_upsample_intra_edge_sse4_1)));
-#endif // HAVE_SSE4_1
-
-//////////////////////////////////////////////////////////////////////////////
-// High bit-depth version
-//////////////////////////////////////////////////////////////////////////////
-
-typedef void (*UPHB)(uint16_t *p, int size, int bd);
-typedef libaom_test::FuncParam<UPHB> TestFuncsHBD;
-
-class UpsampleTestHB : public UpsampleTest<UPHB, uint16_t> {
- protected:
- void Execute(uint16_t *edge_tst) {
- params_.ref_func(edge_ref_, size_, bit_depth_);
- ASM_REGISTER_STATE_CHECK(params_.tst_func(edge_tst, size_, bit_depth_));
- }
- int bit_depth_;
-};
-
-TEST_P(UpsampleTestHB, RandomValues) {
- for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
- switch (rng_(3)) {
- case 0: bit_depth_ = 8; break;
- case 1: bit_depth_ = 10; break;
- default: bit_depth_ = 12; break;
- }
- const int hi = 1 << bit_depth_;
-
- size_ = 4 * (this->rng_(4) + 1);
-
- int i, pix = 0;
- for (i = 0; i < kOffset + size_; ++i) {
- pix = rng_(hi);
- edge_ref_data_[i] = pix;
- edge_tst_data_[i] = pix;
- }
-
- // Extend final sample
- while (i < kBufSize) {
- edge_ref_data_[i] = pix;
- edge_tst_data_[i] = pix;
- i++;
- }
-
- Common();
- }
-}
-
-#if HAVE_SSE4_1
-INSTANTIATE_TEST_CASE_P(
- SSE4_1, UpsampleTestHB,
- ::testing::Values(TestFuncsHBD(av1_upsample_intra_edge_high_c,
- av1_upsample_intra_edge_high_sse4_1)));
-#endif // HAVE_SSE4_1
-
-template <typename F, typename T>
-class FilterEdgeTest : public FunctionEquivalenceTest<F> {
- protected:
- static const int kIterations = 1000000;
- static const int kMaxEdge = 2 * 64;
- static const int kBufSize = kMaxEdge + 32;
- static const int kOffset = 15;
-
- virtual ~FilterEdgeTest() {}
-
- virtual void Execute(T *edge_tst) = 0;
-
- void Common() {
- edge_ref_ = &edge_ref_data_[kOffset];
- edge_tst_ = &edge_tst_data_[kOffset];
-
- Execute(edge_tst_);
-
- for (int r = 0; r < size_; ++r) {
- ASSERT_EQ(edge_ref_[r], edge_tst_[r]);
- }
- }
-
- T edge_ref_data_[kBufSize];
- T edge_tst_data_[kBufSize];
-
- T *edge_ref_;
- T *edge_tst_;
-
- int size_;
- int strength_;
-};
-
-//////////////////////////////////////////////////////////////////////////////
-// 8 bit version
-//////////////////////////////////////////////////////////////////////////////
-
-typedef void (*FE8B)(uint8_t *p, int size, int strength);
-typedef libaom_test::FuncParam<FE8B> FilterEdgeTestFuncs;
-
-class FilterEdgeTest8B : public FilterEdgeTest<FE8B, uint8_t> {
- protected:
- void Execute(uint8_t *edge_tst) {
- params_.ref_func(edge_ref_, size_, strength_);
- ASM_REGISTER_STATE_CHECK(params_.tst_func(edge_tst, size_, strength_));
- }
-};
-
-TEST_P(FilterEdgeTest8B, RandomValues) {
- for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
- strength_ = this->rng_(4);
- size_ = 4 * (this->rng_(128 / 4) + 1) + 1;
-
- int i, pix = 0;
- for (i = 0; i < kOffset + size_; ++i) {
- pix = rng_.Rand8();
- edge_ref_data_[i] = pix;
- edge_tst_data_[i] = pix;
- }
-
- Common();
- }
-}
-
-#if HAVE_SSE4_1
-INSTANTIATE_TEST_CASE_P(
- SSE4_1, FilterEdgeTest8B,
- ::testing::Values(FilterEdgeTestFuncs(av1_filter_intra_edge_c,
- av1_filter_intra_edge_sse4_1)));
-#endif // HAVE_SSE4_1
-
-//////////////////////////////////////////////////////////////////////////////
-// High bit-depth version
-//////////////////////////////////////////////////////////////////////////////
-
-typedef void (*FEHB)(uint16_t *p, int size, int strength);
-typedef libaom_test::FuncParam<FEHB> FilterEdgeTestFuncsHBD;
-
-class FilterEdgeTestHB : public FilterEdgeTest<FEHB, uint16_t> {
- protected:
- void Execute(uint16_t *edge_tst) {
- params_.ref_func(edge_ref_, size_, strength_);
- ASM_REGISTER_STATE_CHECK(params_.tst_func(edge_tst, size_, strength_));
- }
- int bit_depth_;
-};
-
-TEST_P(FilterEdgeTestHB, RandomValues) {
- for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
- switch (rng_(3)) {
- case 0: bit_depth_ = 8; break;
- case 1: bit_depth_ = 10; break;
- default: bit_depth_ = 12; break;
- }
- const int hi = 1 << bit_depth_;
- strength_ = this->rng_(4);
- size_ = 4 * (this->rng_(128 / 4) + 1) + 1;
-
- int i, pix = 0;
- for (i = 0; i < kOffset + size_; ++i) {
- pix = rng_(hi);
- edge_ref_data_[i] = pix;
- edge_tst_data_[i] = pix;
- }
-
- Common();
- }
-}
-
-#if HAVE_SSE4_1
-INSTANTIATE_TEST_CASE_P(SSE4_1, FilterEdgeTestHB,
- ::testing::Values(FilterEdgeTestFuncsHBD(
- av1_filter_intra_edge_high_c,
- av1_filter_intra_edge_high_sse4_1)));
-#endif // HAVE_SSE4_1
-
-// Speed tests
-
-TEST_P(UpsampleTest8B, DISABLED_Speed) {
- const int test_count = 10000000;
- size_ = kMaxEdge;
- for (int i = 0; i < kOffset + size_; ++i) {
- edge_tst_data_[i] = rng_.Rand8();
- }
- edge_tst_ = &edge_tst_data_[kOffset];
- for (int iter = 0; iter < test_count; ++iter) {
- ASM_REGISTER_STATE_CHECK(params_.tst_func(edge_tst_, size_));
- }
-}
-
-TEST_P(UpsampleTestHB, DISABLED_Speed) {
- const int test_count = 10000000;
- size_ = kMaxEdge;
- bit_depth_ = 12;
- const int hi = 1 << bit_depth_;
- for (int i = 0; i < kOffset + size_; ++i) {
- edge_tst_data_[i] = rng_(hi);
- }
- edge_tst_ = &edge_tst_data_[kOffset];
- for (int iter = 0; iter < test_count; ++iter) {
- ASM_REGISTER_STATE_CHECK(params_.tst_func(edge_tst_, size_, bit_depth_));
- }
-}
-
-TEST_P(FilterEdgeTest8B, DISABLED_Speed) {
- const int test_count = 10000000;
- size_ = kMaxEdge;
- strength_ = 1;
- for (int i = 0; i < kOffset + size_; ++i) {
- edge_tst_data_[i] = rng_.Rand8();
- }
- edge_tst_ = &edge_tst_data_[kOffset];
- for (int iter = 0; iter < test_count; ++iter) {
- ASM_REGISTER_STATE_CHECK(params_.tst_func(edge_tst_, size_, strength_));
- // iterate over filter strengths (1,2,3)
- strength_ = (strength_ == 3) ? 1 : strength_ + 1;
- }
-}
-
-TEST_P(FilterEdgeTestHB, DISABLED_Speed) {
- const int test_count = 10000000;
- size_ = kMaxEdge;
- strength_ = 1;
- bit_depth_ = 12;
- const int hi = 1 << bit_depth_;
- for (int i = 0; i < kOffset + size_; ++i) {
- edge_tst_data_[i] = rng_(hi);
- }
- edge_tst_ = &edge_tst_data_[kOffset];
- for (int iter = 0; iter < test_count; ++iter) {
- ASM_REGISTER_STATE_CHECK(params_.tst_func(edge_tst_, size_, strength_));
- // iterate over filter strengths (1,2,3)
- strength_ = (strength_ == 3) ? 1 : strength_ + 1;
- }
-}
-
-} // namespace
diff --git a/third_party/aom/test/intrabc_test.cc b/third_party/aom/test/intrabc_test.cc
deleted file mode 100644
index 3ea421708..000000000
--- a/third_party/aom/test/intrabc_test.cc
+++ /dev/null
@@ -1,168 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "config/aom_config.h"
-
-#include "av1/common/blockd.h"
-#include "av1/common/enums.h"
-#include "av1/common/mv.h"
-#include "av1/common/mvref_common.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/common/tile_common.h"
-
-namespace {
-TEST(IntrabcTest, DvValidation) {
- struct DvTestCase {
- MV dv;
- int mi_row_offset;
- int mi_col_offset;
- BLOCK_SIZE bsize;
- bool valid;
- };
- const int kSubPelScale = 8;
- const int kTileMaxMibWidth = 8;
- const DvTestCase kDvCases[] = {
- { { 0, 0 }, 0, 0, BLOCK_128X128, false },
- { { 0, 0 }, 0, 0, BLOCK_64X64, false },
- { { 0, 0 }, 0, 0, BLOCK_32X32, false },
- { { 0, 0 }, 0, 0, BLOCK_16X16, false },
- { { 0, 0 }, 0, 0, BLOCK_8X8, false },
- { { 0, 0 }, 0, 0, BLOCK_4X4, false },
- { { -MAX_SB_SIZE * kSubPelScale, -MAX_SB_SIZE * kSubPelScale },
- MAX_SB_SIZE / MI_SIZE,
- MAX_SB_SIZE / MI_SIZE,
- BLOCK_16X16,
- true },
- { { 0, -MAX_SB_SIZE * kSubPelScale },
- MAX_SB_SIZE / MI_SIZE,
- MAX_SB_SIZE / MI_SIZE,
- BLOCK_16X16,
- false },
- { { -MAX_SB_SIZE * kSubPelScale, 0 },
- MAX_SB_SIZE / MI_SIZE,
- MAX_SB_SIZE / MI_SIZE,
- BLOCK_16X16,
- true },
- { { MAX_SB_SIZE * kSubPelScale, 0 },
- MAX_SB_SIZE / MI_SIZE,
- MAX_SB_SIZE / MI_SIZE,
- BLOCK_16X16,
- false },
- { { 0, MAX_SB_SIZE * kSubPelScale },
- MAX_SB_SIZE / MI_SIZE,
- MAX_SB_SIZE / MI_SIZE,
- BLOCK_16X16,
- false },
- { { -32 * kSubPelScale, -32 * kSubPelScale },
- MAX_SB_SIZE / MI_SIZE,
- MAX_SB_SIZE / MI_SIZE,
- BLOCK_32X32,
- true },
- { { -32 * kSubPelScale, -32 * kSubPelScale },
- 32 / MI_SIZE,
- 32 / MI_SIZE,
- BLOCK_32X32,
- false },
- { { -32 * kSubPelScale - kSubPelScale / 2, -32 * kSubPelScale },
- MAX_SB_SIZE / MI_SIZE,
- MAX_SB_SIZE / MI_SIZE,
- BLOCK_32X32,
- false },
- { { -33 * kSubPelScale, -32 * kSubPelScale },
- MAX_SB_SIZE / MI_SIZE,
- MAX_SB_SIZE / MI_SIZE,
- BLOCK_32X32,
- true },
- { { -32 * kSubPelScale, -32 * kSubPelScale - kSubPelScale / 2 },
- MAX_SB_SIZE / MI_SIZE,
- MAX_SB_SIZE / MI_SIZE,
- BLOCK_32X32,
- false },
- { { -32 * kSubPelScale, -33 * kSubPelScale },
- MAX_SB_SIZE / MI_SIZE,
- MAX_SB_SIZE / MI_SIZE,
- BLOCK_32X32,
- true },
- { { -MAX_SB_SIZE * kSubPelScale, -MAX_SB_SIZE * kSubPelScale },
- MAX_SB_SIZE / MI_SIZE,
- MAX_SB_SIZE / MI_SIZE,
- BLOCK_LARGEST,
- true },
- { { -(MAX_SB_SIZE + 1) * kSubPelScale, -MAX_SB_SIZE * kSubPelScale },
- MAX_SB_SIZE / MI_SIZE,
- MAX_SB_SIZE / MI_SIZE,
- BLOCK_LARGEST,
- false },
- { { -MAX_SB_SIZE * kSubPelScale, -(MAX_SB_SIZE + 1) * kSubPelScale },
- MAX_SB_SIZE / MI_SIZE,
- MAX_SB_SIZE / MI_SIZE,
- BLOCK_LARGEST,
- false },
- { { -(MAX_SB_SIZE - 1) * kSubPelScale, -MAX_SB_SIZE * kSubPelScale },
- MAX_SB_SIZE / MI_SIZE,
- MAX_SB_SIZE / MI_SIZE,
- BLOCK_LARGEST,
- false },
- { { -MAX_SB_SIZE * kSubPelScale, -(MAX_SB_SIZE - 1) * kSubPelScale },
- MAX_SB_SIZE / MI_SIZE,
- MAX_SB_SIZE / MI_SIZE,
- BLOCK_LARGEST,
- true },
- { { -(MAX_SB_SIZE - 1) * kSubPelScale, -(MAX_SB_SIZE - 1) * kSubPelScale },
- MAX_SB_SIZE / MI_SIZE,
- MAX_SB_SIZE / MI_SIZE,
- BLOCK_LARGEST,
- false },
- { { -MAX_SB_SIZE * kSubPelScale, MAX_SB_SIZE * kSubPelScale },
- MAX_SB_SIZE / MI_SIZE,
- MAX_SB_SIZE / MI_SIZE,
- BLOCK_LARGEST,
- false },
- { { -MAX_SB_SIZE * kSubPelScale,
- (kTileMaxMibWidth - 2) * MAX_SB_SIZE * kSubPelScale },
- MAX_SB_SIZE / MI_SIZE,
- MAX_SB_SIZE / MI_SIZE,
- BLOCK_LARGEST,
- false },
- { { -MAX_SB_SIZE * kSubPelScale,
- ((kTileMaxMibWidth - 2) * MAX_SB_SIZE + 1) * kSubPelScale },
- MAX_SB_SIZE / MI_SIZE,
- MAX_SB_SIZE / MI_SIZE,
- BLOCK_LARGEST,
- false },
- };
-
- MACROBLOCKD xd;
- memset(&xd, 0, sizeof(xd));
- xd.tile.mi_row_start = 8 * MAX_MIB_SIZE;
- xd.tile.mi_row_end = 16 * MAX_MIB_SIZE;
- xd.tile.mi_col_start = 24 * MAX_MIB_SIZE;
- xd.tile.mi_col_end = xd.tile.mi_col_start + kTileMaxMibWidth * MAX_MIB_SIZE;
- xd.plane[1].subsampling_x = 1;
- xd.plane[1].subsampling_y = 1;
- xd.plane[2].subsampling_x = 1;
- xd.plane[2].subsampling_y = 1;
-
- AV1_COMMON cm;
- memset(&cm, 0, sizeof(cm));
-
- for (int i = 0; i < static_cast<int>(GTEST_ARRAY_SIZE_(kDvCases)); ++i) {
- EXPECT_EQ(static_cast<int>(kDvCases[i].valid),
- av1_is_dv_valid(kDvCases[i].dv, &cm, &xd,
- xd.tile.mi_row_start + kDvCases[i].mi_row_offset,
- xd.tile.mi_col_start + kDvCases[i].mi_col_offset,
- kDvCases[i].bsize, MAX_MIB_SIZE_LOG2))
- << "DvCases[" << i << "]";
- }
-}
-} // namespace
diff --git a/third_party/aom/test/intrapred_test.cc b/third_party/aom/test/intrapred_test.cc
deleted file mode 100644
index 1a1c0fc42..000000000
--- a/third_party/aom/test/intrapred_test.cc
+++ /dev/null
@@ -1,266 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <string>
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
-#include "av1/common/blockd.h"
-#include "av1/common/common.h"
-#include "av1/common/pred_common.h"
-#include "aom_mem/aom_mem.h"
-
-namespace {
-
-using libaom_test::ACMRandom;
-
-const int count_test_block = 100000;
-
-typedef void (*HighbdIntraPred)(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above, const uint16_t *left,
- int bps);
-typedef void (*IntraPred)(uint8_t *dst, ptrdiff_t stride, const uint8_t *above,
- const uint8_t *left);
-
-} // namespace
-
-// NOTE: Under gcc version 7.3.0 (Debian 7.3.0-5), if this template is in the
-// anonymous namespace, then we get a strange compiler warning in
-// the begin() and end() methods of the ParamGenerator template class in
-// gtest/internal/gtest-param-util.h:
-// warning: ‘<anonymous>’ is used uninitialized in this function
-// As a workaround, put this template outside the anonymous namespace.
-// See bug aomedia:2003.
-template <typename FuncType>
-struct IntraPredFunc {
- IntraPredFunc(FuncType pred = NULL, FuncType ref = NULL,
- int block_width_value = 0, int block_height_value = 0,
- int bit_depth_value = 0)
- : pred_fn(pred), ref_fn(ref), block_width(block_width_value),
- block_height(block_height_value), bit_depth(bit_depth_value) {}
-
- FuncType pred_fn;
- FuncType ref_fn;
- int block_width;
- int block_height;
- int bit_depth;
-};
-
-namespace {
-
-template <typename FuncType, typename Pixel>
-class AV1IntraPredTest
- : public ::testing::TestWithParam<IntraPredFunc<FuncType> > {
- public:
- void RunTest(Pixel *left_col, Pixel *above_data, Pixel *dst, Pixel *ref_dst) {
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- const int block_width = params_.block_width;
- const int block_height = params_.block_height;
- above_row_ = above_data + 16;
- left_col_ = left_col;
- dst_ = dst;
- ref_dst_ = ref_dst;
- int error_count = 0;
- for (int i = 0; i < count_test_block; ++i) {
- // Fill edges with random data, try first with saturated values.
- for (int x = -1; x <= block_width * 2; x++) {
- if (i == 0) {
- above_row_[x] = mask_;
- } else {
- above_row_[x] = rnd.Rand16() & mask_;
- }
- }
- for (int y = 0; y < block_height; y++) {
- if (i == 0) {
- left_col_[y] = mask_;
- } else {
- left_col_[y] = rnd.Rand16() & mask_;
- }
- }
- Predict();
- CheckPrediction(i, &error_count);
- }
- ASSERT_EQ(0, error_count);
- }
-
- protected:
- virtual void SetUp() {
- params_ = this->GetParam();
- stride_ = params_.block_width * 3;
- mask_ = (1 << params_.bit_depth) - 1;
- }
-
- virtual void Predict() = 0;
-
- void CheckPrediction(int test_case_number, int *error_count) const {
- // For each pixel ensure that the calculated value is the same as reference.
- const int block_width = params_.block_width;
- const int block_height = params_.block_height;
- for (int y = 0; y < block_height; y++) {
- for (int x = 0; x < block_width; x++) {
- *error_count += ref_dst_[x + y * stride_] != dst_[x + y * stride_];
- if (*error_count == 1) {
- ASSERT_EQ(ref_dst_[x + y * stride_], dst_[x + y * stride_])
- << " Failed on Test Case Number " << test_case_number
- << " location: x = " << x << " y = " << y;
- }
- }
- }
- }
-
- Pixel *above_row_;
- Pixel *left_col_;
- Pixel *dst_;
- Pixel *ref_dst_;
- ptrdiff_t stride_;
- int mask_;
-
- IntraPredFunc<FuncType> params_;
-};
-
-class HighbdIntraPredTest : public AV1IntraPredTest<HighbdIntraPred, uint16_t> {
- protected:
- void Predict() {
- const int bit_depth = params_.bit_depth;
- params_.ref_fn(ref_dst_, stride_, above_row_, left_col_, bit_depth);
- ASM_REGISTER_STATE_CHECK(
- params_.pred_fn(dst_, stride_, above_row_, left_col_, bit_depth));
- }
-};
-
-class LowbdIntraPredTest : public AV1IntraPredTest<IntraPred, uint8_t> {
- protected:
- void Predict() {
- params_.ref_fn(ref_dst_, stride_, above_row_, left_col_);
- ASM_REGISTER_STATE_CHECK(
- params_.pred_fn(dst_, stride_, above_row_, left_col_));
- }
-};
-
-// Suppress an unitialized warning. Once there are implementations to test then
-// this can be restored.
-TEST_P(HighbdIntraPredTest, Bitexact) {
- // max block size is 64
- DECLARE_ALIGNED(16, uint16_t, left_col[2 * 64]);
- DECLARE_ALIGNED(16, uint16_t, above_data[2 * 64 + 64]);
- DECLARE_ALIGNED(16, uint16_t, dst[3 * 64 * 64]);
- DECLARE_ALIGNED(16, uint16_t, ref_dst[3 * 64 * 64]);
- av1_zero(left_col);
- av1_zero(above_data);
- RunTest(left_col, above_data, dst, ref_dst);
-}
-
-// Same issue as above but for arm.
-#if !HAVE_NEON
-TEST_P(LowbdIntraPredTest, Bitexact) {
- // max block size is 32
- DECLARE_ALIGNED(16, uint8_t, left_col[2 * 32]);
- DECLARE_ALIGNED(16, uint8_t, above_data[2 * 32 + 32]);
- DECLARE_ALIGNED(16, uint8_t, dst[3 * 32 * 32]);
- DECLARE_ALIGNED(16, uint8_t, ref_dst[3 * 32 * 32]);
- av1_zero(left_col);
- av1_zero(above_data);
- RunTest(left_col, above_data, dst, ref_dst);
-}
-#endif // !HAVE_NEON
-
-// -----------------------------------------------------------------------------
-// High Bit Depth Tests
-#define highbd_entry(type, width, height, opt, bd) \
- IntraPredFunc<HighbdIntraPred>( \
- &aom_highbd_##type##_predictor_##width##x##height##_##opt, \
- &aom_highbd_##type##_predictor_##width##x##height##_c, width, height, \
- bd)
-
-#if 0
-#define highbd_intrapred(type, opt, bd) \
- highbd_entry(type, 4, 4, opt, bd), highbd_entry(type, 4, 8, opt, bd), \
- highbd_entry(type, 8, 4, opt, bd), highbd_entry(type, 8, 8, opt, bd), \
- highbd_entry(type, 8, 16, opt, bd), highbd_entry(type, 16, 8, opt, bd), \
- highbd_entry(type, 16, 16, opt, bd), \
- highbd_entry(type, 16, 32, opt, bd), \
- highbd_entry(type, 32, 16, opt, bd), highbd_entry(type, 32, 32, opt, bd)
-#endif
-
- // ---------------------------------------------------------------------------
- // Low Bit Depth Tests
-
-#define lowbd_entry(type, width, height, opt) \
- IntraPredFunc<IntraPred>(&aom_##type##_predictor_##width##x##height##_##opt, \
- &aom_##type##_predictor_##width##x##height##_c, \
- width, height, 8)
-
-#define lowbd_intrapred(type, opt) \
- lowbd_entry(type, 4, 4, opt), lowbd_entry(type, 4, 8, opt), \
- lowbd_entry(type, 8, 4, opt), lowbd_entry(type, 8, 8, opt), \
- lowbd_entry(type, 8, 16, opt), lowbd_entry(type, 16, 8, opt), \
- lowbd_entry(type, 16, 16, opt), lowbd_entry(type, 16, 32, opt), \
- lowbd_entry(type, 32, 16, opt), lowbd_entry(type, 32, 32, opt)
-
-#if HAVE_SSE2
-const IntraPredFunc<IntraPred> LowbdIntraPredTestVector[] = {
- lowbd_intrapred(dc, sse2), lowbd_intrapred(dc_top, sse2),
- lowbd_intrapred(dc_left, sse2), lowbd_intrapred(dc_128, sse2),
- lowbd_intrapred(v, sse2), lowbd_intrapred(h, sse2),
-};
-
-INSTANTIATE_TEST_CASE_P(SSE2, LowbdIntraPredTest,
- ::testing::ValuesIn(LowbdIntraPredTestVector));
-
-#endif // HAVE_SSE2
-
-#if HAVE_SSSE3
-const IntraPredFunc<IntraPred> LowbdIntraPredTestVectorSsse3[] = {
- lowbd_intrapred(paeth, ssse3),
- lowbd_intrapred(smooth, ssse3),
-};
-
-INSTANTIATE_TEST_CASE_P(SSSE3, LowbdIntraPredTest,
- ::testing::ValuesIn(LowbdIntraPredTestVectorSsse3));
-
-#endif // HAVE_SSSE3
-
-#if HAVE_AVX2
-const IntraPredFunc<IntraPred> LowbdIntraPredTestVectorAvx2[] = {
- lowbd_entry(dc, 32, 32, avx2), lowbd_entry(dc_top, 32, 32, avx2),
- lowbd_entry(dc_left, 32, 32, avx2), lowbd_entry(dc_128, 32, 32, avx2),
- lowbd_entry(v, 32, 32, avx2), lowbd_entry(h, 32, 32, avx2),
- lowbd_entry(dc, 32, 16, avx2), lowbd_entry(dc_top, 32, 16, avx2),
- lowbd_entry(dc_left, 32, 16, avx2), lowbd_entry(dc_128, 32, 16, avx2),
- lowbd_entry(v, 32, 16, avx2), lowbd_entry(paeth, 16, 8, avx2),
- lowbd_entry(paeth, 16, 16, avx2), lowbd_entry(paeth, 16, 32, avx2),
- lowbd_entry(paeth, 32, 16, avx2), lowbd_entry(paeth, 32, 32, avx2),
-};
-
-INSTANTIATE_TEST_CASE_P(AVX2, LowbdIntraPredTest,
- ::testing::ValuesIn(LowbdIntraPredTestVectorAvx2));
-
-#endif // HAVE_AVX2
-
-#if HAVE_NEON
-const IntraPredFunc<HighbdIntraPred> HighbdIntraPredTestVectorNeon[] = {
- highbd_entry(dc, 4, 4, neon, 8), highbd_entry(dc, 8, 8, neon, 8),
- highbd_entry(dc, 16, 16, neon, 8), highbd_entry(dc, 32, 32, neon, 8),
- highbd_entry(dc, 64, 64, neon, 8),
-};
-
-INSTANTIATE_TEST_CASE_P(NEON, HighbdIntraPredTest,
- ::testing::ValuesIn(HighbdIntraPredTestVectorNeon));
-
-#endif // HAVE_NEON
-} // namespace
diff --git a/third_party/aom/test/invalid_file_test.cc b/third_party/aom/test/invalid_file_test.cc
deleted file mode 100644
index 5b4f5a6c3..000000000
--- a/third_party/aom/test/invalid_file_test.cc
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <cstdio>
-#include <string>
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-#include "test/codec_factory.h"
-#include "test/ivf_video_source.h"
-#include "test/util.h"
-#include "test/video_source.h"
-
-namespace {
-
-struct DecodeParam {
- int threads;
- const char *filename;
-};
-
-std::ostream &operator<<(std::ostream &os, const DecodeParam &dp) {
- return os << "threads: " << dp.threads << " file: " << dp.filename;
-}
-
-class InvalidFileTest : public ::libaom_test::DecoderTest,
- public ::libaom_test::CodecTestWithParam<DecodeParam> {
- protected:
- InvalidFileTest() : DecoderTest(GET_PARAM(0)), res_file_(NULL) {}
-
- virtual ~InvalidFileTest() {
- if (res_file_ != NULL) fclose(res_file_);
- }
-
- void OpenResFile(const std::string &res_file_name) {
- res_file_ = libaom_test::OpenTestDataFile(res_file_name);
- ASSERT_TRUE(res_file_ != NULL)
- << "Result file open failed. Filename: " << res_file_name;
- }
-
- virtual bool HandleDecodeResult(
- const aom_codec_err_t res_dec,
- const libaom_test::CompressedVideoSource &video,
- libaom_test::Decoder *decoder) {
- EXPECT_TRUE(res_file_ != NULL);
- int expected_res_dec = -1;
-
- // Read integer result.
- const int res = fscanf(res_file_, "%d", &expected_res_dec);
- EXPECT_NE(res, EOF) << "Read result data failed";
-
- if (expected_res_dec != -1) {
- // Check results match.
- const DecodeParam input = GET_PARAM(1);
- if (input.threads > 1) {
- // The serial decode check is too strict for tile-threaded decoding as
- // there is no guarantee on the decode order nor which specific error
- // will take precedence. Currently a tile-level error is not forwarded
- // so the frame will simply be marked corrupt.
- EXPECT_TRUE(res_dec == expected_res_dec ||
- res_dec == AOM_CODEC_CORRUPT_FRAME)
- << "Results don't match: frame number = " << video.frame_number()
- << ". (" << decoder->DecodeError()
- << "). Expected: " << expected_res_dec << " or "
- << AOM_CODEC_CORRUPT_FRAME;
- } else {
- EXPECT_EQ(expected_res_dec, res_dec)
- << "Results don't match: frame number = " << video.frame_number()
- << ". (" << decoder->DecodeError() << ")";
- }
- }
-
- return !HasFailure();
- }
-
- virtual void HandlePeekResult(libaom_test::Decoder *const /*decoder*/,
- libaom_test::CompressedVideoSource * /*video*/,
- const aom_codec_err_t /*res_peek*/) {}
-
- void RunTest() {
- const DecodeParam input = GET_PARAM(1);
- aom_codec_dec_cfg_t cfg = { 0, 0, 0, CONFIG_LOWBITDEPTH, { 1 } };
- cfg.threads = input.threads;
- const std::string filename = input.filename;
- libaom_test::IVFVideoSource decode_video(filename);
- decode_video.Init();
-
- // Construct result file name. The file holds a list of expected integer
- // results, one for each decoded frame. Any result that doesn't match
- // the files list will cause a test failure.
- const std::string res_filename = filename + ".res";
- OpenResFile(res_filename);
-
- ASSERT_NO_FATAL_FAILURE(RunLoop(&decode_video, cfg));
- }
-
- private:
- FILE *res_file_;
-};
-
-TEST_P(InvalidFileTest, ReturnCode) { RunTest(); }
-
-const DecodeParam kAV1InvalidFileTests[] = {
- { 1, "invalid-bug-1814.ivf" },
- { 4, "invalid-oss-fuzz-9463.ivf" },
- { 1, "invalid-oss-fuzz-9482.ivf" },
- { 1, "invalid-oss-fuzz-9720.ivf" },
- { 1, "invalid-oss-fuzz-10061.ivf" },
- { 1, "invalid-oss-fuzz-10117-mc-buf-use-highbd.ivf" },
- { 1, "invalid-oss-fuzz-10227.ivf" },
-};
-
-AV1_INSTANTIATE_TEST_CASE(InvalidFileTest,
- ::testing::ValuesIn(kAV1InvalidFileTests));
-
-} // namespace
diff --git a/third_party/aom/test/ivf_video_source.h b/third_party/aom/test/ivf_video_source.h
deleted file mode 100644
index ff2841445..000000000
--- a/third_party/aom/test/ivf_video_source.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#ifndef AOM_TEST_IVF_VIDEO_SOURCE_H_
-#define AOM_TEST_IVF_VIDEO_SOURCE_H_
-
-#include <cstdio>
-#include <cstdlib>
-#include <new>
-#include <string>
-
-#include "aom_ports/sanitizer.h"
-#include "test/video_source.h"
-
-namespace libaom_test {
-const unsigned int kCodeBufferSize = 256 * 1024 * 1024;
-const unsigned int kIvfFileHdrSize = 32;
-const unsigned int kIvfFrameHdrSize = 12;
-
-static unsigned int MemGetLe32(const uint8_t *mem) {
- return (mem[3] << 24) | (mem[2] << 16) | (mem[1] << 8) | (mem[0]);
-}
-
-// This class extends VideoSource to allow parsing of ivf files,
-// so that we can do actual file decodes.
-class IVFVideoSource : public CompressedVideoSource {
- public:
- explicit IVFVideoSource(const std::string &file_name)
- : file_name_(file_name), input_file_(NULL), compressed_frame_buf_(NULL),
- frame_sz_(0), frame_(0), end_of_file_(false) {}
-
- virtual ~IVFVideoSource() {
- delete[] compressed_frame_buf_;
-
- if (input_file_) fclose(input_file_);
- }
-
- virtual void Init() {
- // Allocate a buffer for read in the compressed video frame.
- compressed_frame_buf_ = new uint8_t[kCodeBufferSize];
- ASSERT_TRUE(compressed_frame_buf_ != NULL)
- << "Allocate frame buffer failed";
- ASAN_POISON_MEMORY_REGION(compressed_frame_buf_, kCodeBufferSize);
- }
-
- virtual void Begin() {
- input_file_ = OpenTestDataFile(file_name_);
- ASSERT_TRUE(input_file_ != NULL)
- << "Input file open failed. Filename: " << file_name_;
-
- // Read file header
- uint8_t file_hdr[kIvfFileHdrSize];
- ASSERT_EQ(kIvfFileHdrSize, fread(file_hdr, 1, kIvfFileHdrSize, input_file_))
- << "File header read failed.";
- // Check file header
- ASSERT_TRUE(file_hdr[0] == 'D' && file_hdr[1] == 'K' &&
- file_hdr[2] == 'I' && file_hdr[3] == 'F')
- << "Input is not an IVF file.";
-
- FillFrame();
- }
-
- virtual void Next() {
- ++frame_;
- FillFrame();
- }
-
- void FillFrame() {
- ASSERT_TRUE(input_file_ != NULL);
- uint8_t frame_hdr[kIvfFrameHdrSize];
- // Check frame header and read a frame from input_file.
- if (fread(frame_hdr, 1, kIvfFrameHdrSize, input_file_) !=
- kIvfFrameHdrSize) {
- end_of_file_ = true;
- } else {
- end_of_file_ = false;
-
- frame_sz_ = MemGetLe32(frame_hdr);
- ASSERT_LE(frame_sz_, kCodeBufferSize)
- << "Frame is too big for allocated code buffer";
- ASAN_UNPOISON_MEMORY_REGION(compressed_frame_buf_, kCodeBufferSize);
- ASSERT_EQ(frame_sz_,
- fread(compressed_frame_buf_, 1, frame_sz_, input_file_))
- << "Failed to read complete frame";
- ASAN_POISON_MEMORY_REGION(compressed_frame_buf_ + frame_sz_,
- kCodeBufferSize - frame_sz_);
- }
- }
-
- virtual const uint8_t *cxdata() const {
- return end_of_file_ ? NULL : compressed_frame_buf_;
- }
- virtual size_t frame_size() const { return frame_sz_; }
- virtual unsigned int frame_number() const { return frame_; }
-
- protected:
- std::string file_name_;
- FILE *input_file_;
- uint8_t *compressed_frame_buf_;
- size_t frame_sz_;
- unsigned int frame_;
- bool end_of_file_;
-};
-
-} // namespace libaom_test
-
-#endif // AOM_TEST_IVF_VIDEO_SOURCE_H_
diff --git a/third_party/aom/test/lightfield_test.sh b/third_party/aom/test/lightfield_test.sh
deleted file mode 100755
index b957a6b79..000000000
--- a/third_party/aom/test/lightfield_test.sh
+++ /dev/null
@@ -1,98 +0,0 @@
-#!/bin/sh
-## Copyright (c) 2018, Alliance for Open Media. All rights reserved
-##
-## This source code is subject to the terms of the BSD 2 Clause License and
-## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-## was not distributed with this source code in the LICENSE file, you can
-## obtain it at www.aomedia.org/license/software. If the Alliance for Open
-## Media Patent License 1.0 was not distributed with this source code in the
-## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-##
-## This file tests the lightfield example.
-##
-. $(dirname $0)/tools_common.sh
-
-# Environment check: $infile is required.
-lightfield_test_verify_environment() {
- local infile="${LIBAOM_TEST_DATA_PATH}/vase10x10.yuv"
- if [ ! -e "${infile}" ]; then
- echo "Libaom test data must exist in LIBAOM_TEST_DATA_PATH."
- return 1
- fi
-}
-
-# Run the lightfield example
-lightfield_test() {
- local img_width=1024
- local img_height=1024
- local lf_width=10
- local lf_height=10
- local lf_blocksize=5
- local num_references=4
- local num_tile_lists=2
-
- # Encode the lightfield.
- local encoder="${LIBAOM_BIN_PATH}/lightfield_encoder${AOM_TEST_EXE_SUFFIX}"
- local yuv_file="${LIBAOM_TEST_DATA_PATH}/vase10x10.yuv"
- local lf_file="${AOM_TEST_OUTPUT_DIR}/vase10x10.ivf"
- if [ ! -x "${encoder}" ]; then
- elog "${encoder} does not exist or is not executable."
- return 1
- fi
-
- eval "${AOM_TEST_PREFIX}" "${encoder}" "${img_width}" "${img_height}" \
- "${yuv_file}" "${lf_file}" "${lf_width}" \
- "${lf_height}" "${lf_blocksize}" ${devnull}
-
- [ -e "${lf_file}" ] || return 1
-
- # Parse lightfield bitstream to construct and output a new bitstream that can
- # be decoded by an AV1 decoder.
- local bs_decoder="${LIBAOM_BIN_PATH}/lightfield_bitstream_parsing${AOM_TEST_EXE_SUFFIX}"
- local tl_file="${AOM_TEST_OUTPUT_DIR}/vase_tile_list.ivf"
- if [ ! -x "${bs_decoder}" ]; then
- elog "${bs_decoder} does not exist or is not executable."
- return 1
- fi
-
- eval "${AOM_TEST_PREFIX}" "${bs_decoder}" "${lf_file}" "${tl_file}" \
- "${num_references}" ${devnull}
-
- [ -e "${tl_file}" ] || return 1
-
- # Run lightfield tile list decoder
- local tl_decoder="${LIBAOM_BIN_PATH}/lightfield_tile_list_decoder${AOM_TEST_EXE_SUFFIX}"
- local tl_outfile="${AOM_TEST_OUTPUT_DIR}/vase_tile_list.yuv"
- if [ ! -x "${tl_decoder}" ]; then
- elog "${tl_decoder} does not exist or is not executable."
- return 1
- fi
-
- eval "${AOM_TEST_PREFIX}" "${tl_decoder}" "${tl_file}" "${tl_outfile}" \
- "${num_references}" "${num_tile_lists}" ${devnull}
-
- [ -e "${tl_outfile}" ] || return 1
-
- # Run reference lightfield decoder
- local ref_decoder="${LIBAOM_BIN_PATH}/lightfield_decoder${AOM_TEST_EXE_SUFFIX}"
- local tl_reffile="${AOM_TEST_OUTPUT_DIR}/vase_reference.yuv"
- if [ ! -x "${ref_decoder}" ]; then
- elog "${ref_decoder} does not exist or is not executable."
- return 1
- fi
-
- eval "${AOM_TEST_PREFIX}" "${ref_decoder}" "${lf_file}" "${tl_reffile}" \
- "${num_references}" ${devnull}
-
- [ -e "${tl_reffile}" ] || return 1
-
- # Check if tl_outfile and tl_reffile are identical. If not identical, this test fails.
- diff ${tl_outfile} ${tl_reffile} > /dev/null
- if [ $? -eq 1 ]; then
- return 1
- fi
-}
-
-lightfield_test_tests="lightfield_test"
-
-run_tests lightfield_test_verify_environment "${lightfield_test_tests}"
diff --git a/third_party/aom/test/log2_test.cc b/third_party/aom/test/log2_test.cc
deleted file mode 100644
index d7840c68b..000000000
--- a/third_party/aom/test/log2_test.cc
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <math.h>
-
-#include "aom_ports/bitops.h"
-#include "av1/common/entropymode.h"
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-TEST(Log2Test, GetMsb) {
- // Test small numbers exhaustively.
- for (unsigned int n = 1; n < 10000; n++) {
- EXPECT_EQ(get_msb(n), static_cast<int>(floor(log2(n))));
- }
-
- // Test every power of 2 and the two adjacent numbers.
- for (int exponent = 2; exponent < 32; exponent++) {
- const unsigned int power_of_2 = 1U << exponent;
- EXPECT_EQ(get_msb(power_of_2 - 1), exponent - 1);
- EXPECT_EQ(get_msb(power_of_2), exponent);
- EXPECT_EQ(get_msb(power_of_2 + 1), exponent);
- }
-}
-
-TEST(Log2Test, Av1CeilLog2) {
- // Test small numbers exhaustively.
- EXPECT_EQ(av1_ceil_log2(0), 0);
- for (int n = 1; n < 10000; n++) {
- EXPECT_EQ(av1_ceil_log2(n), static_cast<int>(ceil(log2(n))));
- }
-
- // Test every power of 2 and the two adjacent numbers.
- for (int exponent = 2; exponent < 31; exponent++) {
- const int power_of_2 = 1 << exponent;
- EXPECT_EQ(av1_ceil_log2(power_of_2 - 1), exponent);
- EXPECT_EQ(av1_ceil_log2(power_of_2), exponent);
- // The current implementation of av1_ceil_log2 only works up to 2^30.
- if (exponent < 30) {
- EXPECT_EQ(av1_ceil_log2(power_of_2 + 1), exponent + 1);
- }
- }
-}
diff --git a/third_party/aom/test/lossless_test.cc b/third_party/aom/test/lossless_test.cc
deleted file mode 100644
index 3f8e89c81..000000000
--- a/third_party/aom/test/lossless_test.cc
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "config/aom_config.h"
-
-#include "test/codec_factory.h"
-#include "test/encode_test_driver.h"
-#include "test/i420_video_source.h"
-#include "test/util.h"
-#include "test/y4m_video_source.h"
-
-namespace {
-
-const int kMaxPsnr = 100;
-
-class LosslessTestLarge
- : public ::libaom_test::CodecTestWithParam<libaom_test::TestMode>,
- public ::libaom_test::EncoderTest {
- protected:
- LosslessTestLarge()
- : EncoderTest(GET_PARAM(0)), psnr_(kMaxPsnr), nframes_(0),
- encoding_mode_(GET_PARAM(1)) {}
-
- virtual ~LosslessTestLarge() {}
-
- virtual void SetUp() {
- InitializeConfig();
- SetMode(encoding_mode_);
- }
-
- virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
- ::libaom_test::Encoder *encoder) {
- if (video->frame() == 1) {
- // Only call Control if quantizer > 0 to verify that using quantizer
- // alone will activate lossless
- if (cfg_.rc_max_quantizer > 0 || cfg_.rc_min_quantizer > 0) {
- encoder->Control(AV1E_SET_LOSSLESS, 1);
- }
- }
- }
-
- virtual void BeginPassHook(unsigned int /*pass*/) {
- psnr_ = kMaxPsnr;
- nframes_ = 0;
- }
-
- virtual void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) {
- if (pkt->data.psnr.psnr[0] < psnr_) psnr_ = pkt->data.psnr.psnr[0];
- }
-
- double GetMinPsnr() const { return psnr_; }
-
- private:
- double psnr_;
- unsigned int nframes_;
- libaom_test::TestMode encoding_mode_;
-};
-
-TEST_P(LosslessTestLarge, TestLossLessEncoding) {
- const aom_rational timebase = { 33333333, 1000000000 };
- cfg_.g_timebase = timebase;
- cfg_.rc_target_bitrate = 2000;
- cfg_.g_lag_in_frames = 25;
- cfg_.rc_min_quantizer = 0;
- cfg_.rc_max_quantizer = 0;
-
- init_flags_ = AOM_CODEC_USE_PSNR;
-
- // intentionally changed the dimension for better testing coverage
- libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
- timebase.den, timebase.num, 0, 5);
- ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
- const double psnr_lossless = GetMinPsnr();
- EXPECT_GE(psnr_lossless, kMaxPsnr);
-}
-
-TEST_P(LosslessTestLarge, TestLossLessEncoding444) {
- libaom_test::Y4mVideoSource video("rush_hour_444.y4m", 0, 5);
-
- cfg_.g_profile = 1;
- cfg_.g_timebase = video.timebase();
- cfg_.rc_target_bitrate = 2000;
- cfg_.g_lag_in_frames = 25;
- cfg_.rc_min_quantizer = 0;
- cfg_.rc_max_quantizer = 0;
-
- init_flags_ = AOM_CODEC_USE_PSNR;
-
- ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
- const double psnr_lossless = GetMinPsnr();
- EXPECT_GE(psnr_lossless, kMaxPsnr);
-}
-
-TEST_P(LosslessTestLarge, TestLossLessEncodingCtrl) {
- const aom_rational timebase = { 33333333, 1000000000 };
- cfg_.g_timebase = timebase;
- cfg_.rc_target_bitrate = 2000;
- cfg_.g_lag_in_frames = 25;
- // Intentionally set Q > 0, to make sure control can be used to activate
- // lossless
- cfg_.rc_min_quantizer = 10;
- cfg_.rc_max_quantizer = 20;
-
- init_flags_ = AOM_CODEC_USE_PSNR;
-
- libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
- timebase.den, timebase.num, 0, 5);
- ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
- const double psnr_lossless = GetMinPsnr();
- EXPECT_GE(psnr_lossless, kMaxPsnr);
-}
-
-AV1_INSTANTIATE_TEST_CASE(LosslessTestLarge,
- ::testing::Values(::libaom_test::kOnePassGood,
- ::libaom_test::kTwoPassGood));
-} // namespace
diff --git a/third_party/aom/test/lpf_test.cc b/third_party/aom/test/lpf_test.cc
deleted file mode 100644
index 451bffd2a..000000000
--- a/third_party/aom/test/lpf_test.cc
+++ /dev/null
@@ -1,627 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <cmath>
-#include <cstdlib>
-#include <string>
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
-#include "av1/common/av1_loopfilter.h"
-#include "av1/common/entropy.h"
-#include "aom/aom_integer.h"
-
-using libaom_test::ACMRandom;
-
-namespace {
-// Horizontally and Vertically need 32x32: 8 Coeffs preceeding filtered section
-// 16 Coefs within filtered section
-// 8 Coeffs following filtered section
-const int kNumCoeffs = 1024;
-
-const int number_of_iterations = 10000;
-
-const int kSpeedTestNum = 500000;
-
-#define LOOP_PARAM \
- int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh
-#define DUAL_LOOP_PARAM \
- int p, const uint8_t *blimit0, const uint8_t *limit0, \
- const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, \
- const uint8_t *thresh1
-
-typedef void (*loop_op_t)(uint8_t *s, LOOP_PARAM);
-typedef void (*dual_loop_op_t)(uint8_t *s, DUAL_LOOP_PARAM);
-typedef void (*hbdloop_op_t)(uint16_t *s, LOOP_PARAM, int bd);
-typedef void (*hbddual_loop_op_t)(uint16_t *s, DUAL_LOOP_PARAM, int bd);
-
-typedef ::testing::tuple<hbdloop_op_t, hbdloop_op_t, int> hbdloop_param_t;
-typedef ::testing::tuple<hbddual_loop_op_t, hbddual_loop_op_t, int>
- hbddual_loop_param_t;
-typedef ::testing::tuple<loop_op_t, loop_op_t, int> loop_param_t;
-typedef ::testing::tuple<dual_loop_op_t, dual_loop_op_t, int> dual_loop_param_t;
-
-template <typename Pixel_t, int PIXEL_WIDTH_t>
-void InitInput(Pixel_t *s, Pixel_t *ref_s, ACMRandom *rnd, const uint8_t limit,
- const int mask, const int32_t p, const int i) {
- uint16_t tmp_s[kNumCoeffs];
-
- for (int j = 0; j < kNumCoeffs;) {
- const uint8_t val = rnd->Rand8();
- if (val & 0x80) { // 50% chance to choose a new value.
- tmp_s[j] = rnd->Rand16();
- j++;
- } else { // 50% chance to repeat previous value in row X times.
- int k = 0;
- while (k++ < ((val & 0x1f) + 1) && j < kNumCoeffs) {
- if (j < 1) {
- tmp_s[j] = rnd->Rand16();
- } else if (val & 0x20) { // Increment by a value within the limit.
- tmp_s[j] = tmp_s[j - 1] + (limit - 1);
- } else { // Decrement by a value within the limit.
- tmp_s[j] = tmp_s[j - 1] - (limit - 1);
- }
- j++;
- }
- }
- }
-
- for (int j = 0; j < kNumCoeffs;) {
- const uint8_t val = rnd->Rand8();
- if (val & 0x80) {
- j++;
- } else { // 50% chance to repeat previous value in column X times.
- int k = 0;
- while (k++ < ((val & 0x1f) + 1) && j < kNumCoeffs) {
- if (j < 1) {
- tmp_s[j] = rnd->Rand16();
- } else if (val & 0x20) { // Increment by a value within the limit.
- tmp_s[(j % 32) * 32 + j / 32] =
- tmp_s[((j - 1) % 32) * 32 + (j - 1) / 32] + (limit - 1);
- } else { // Decrement by a value within the limit.
- tmp_s[(j % 32) * 32 + j / 32] =
- tmp_s[((j - 1) % 32) * 32 + (j - 1) / 32] - (limit - 1);
- }
- j++;
- }
- }
- }
-
- for (int j = 0; j < kNumCoeffs; j++) {
- if (i % 2) {
- s[j] = tmp_s[j] & mask;
- } else {
- s[j] = tmp_s[p * (j % p) + j / p] & mask;
- }
- ref_s[j] = s[j];
- }
-}
-
-uint8_t GetOuterThresh(ACMRandom *rnd) {
- return static_cast<uint8_t>(rnd->PseudoUniform(3 * MAX_LOOP_FILTER + 5));
-}
-
-uint8_t GetInnerThresh(ACMRandom *rnd) {
- return static_cast<uint8_t>(rnd->PseudoUniform(MAX_LOOP_FILTER + 1));
-}
-
-uint8_t GetHevThresh(ACMRandom *rnd) {
- return static_cast<uint8_t>(rnd->PseudoUniform(MAX_LOOP_FILTER + 1) >> 4);
-}
-
-template <typename func_type_t, typename params_t>
-class LoopTestParam : public ::testing::TestWithParam<params_t> {
- public:
- virtual ~LoopTestParam() {}
- virtual void SetUp() {
- loopfilter_op_ = ::testing::get<0>(this->GetParam());
- ref_loopfilter_op_ = ::testing::get<1>(this->GetParam());
- bit_depth_ = ::testing::get<2>(this->GetParam());
- mask_ = (1 << bit_depth_) - 1;
- }
-
- virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
- int bit_depth_;
- int mask_;
- func_type_t loopfilter_op_;
- func_type_t ref_loopfilter_op_;
-};
-
-void call_filter(uint16_t *s, LOOP_PARAM, int bd, hbdloop_op_t op) {
- op(s, p, blimit, limit, thresh, bd);
-}
-void call_filter(uint8_t *s, LOOP_PARAM, int bd, loop_op_t op) {
- (void)bd;
- op(s, p, blimit, limit, thresh);
-}
-void call_dualfilter(uint16_t *s, DUAL_LOOP_PARAM, int bd,
- hbddual_loop_op_t op) {
- op(s, p, blimit0, limit0, thresh0, blimit1, limit1, thresh1, bd);
-}
-void call_dualfilter(uint8_t *s, DUAL_LOOP_PARAM, int bd, dual_loop_op_t op) {
- (void)bd;
- op(s, p, blimit0, limit0, thresh0, blimit1, limit1, thresh1);
-};
-
-typedef LoopTestParam<hbdloop_op_t, hbdloop_param_t> Loop8Test6Param_hbd;
-typedef LoopTestParam<loop_op_t, loop_param_t> Loop8Test6Param_lbd;
-typedef LoopTestParam<hbddual_loop_op_t, hbddual_loop_param_t>
- Loop8Test9Param_hbd;
-typedef LoopTestParam<dual_loop_op_t, dual_loop_param_t> Loop8Test9Param_lbd;
-
-#define OPCHECK(a, b) \
- ACMRandom rnd(ACMRandom::DeterministicSeed()); \
- const int count_test_block = number_of_iterations; \
- const int32_t p = kNumCoeffs / 32; \
- DECLARE_ALIGNED(b, a, s[kNumCoeffs]); \
- DECLARE_ALIGNED(b, a, ref_s[kNumCoeffs]); \
- int err_count_total = 0; \
- int first_failure = -1; \
- for (int i = 0; i < count_test_block; ++i) { \
- int err_count = 0; \
- uint8_t tmp = GetOuterThresh(&rnd); \
- DECLARE_ALIGNED(16, const uint8_t, \
- blimit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, \
- tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
- tmp = GetInnerThresh(&rnd); \
- DECLARE_ALIGNED(16, const uint8_t, \
- limit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, \
- tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
- tmp = GetHevThresh(&rnd); \
- DECLARE_ALIGNED(16, const uint8_t, \
- thresh[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, \
- tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
- InitInput<a, b>(s, ref_s, &rnd, *limit, mask_, p, i); \
- call_filter(ref_s + 8 + p * 8, p, blimit, limit, thresh, bit_depth_, \
- ref_loopfilter_op_); \
- ASM_REGISTER_STATE_CHECK(call_filter(s + 8 + p * 8, p, blimit, limit, \
- thresh, bit_depth_, loopfilter_op_)); \
- for (int j = 0; j < kNumCoeffs; ++j) { \
- err_count += ref_s[j] != s[j]; \
- } \
- if (err_count && !err_count_total) { \
- first_failure = i; \
- } \
- err_count_total += err_count; \
- } \
- EXPECT_EQ(0, err_count_total) \
- << "Error: Loop8Test6Param, C output doesn't match SIMD " \
- "loopfilter output. " \
- << "First failed at test case " << first_failure;
-
-TEST_P(Loop8Test6Param_hbd, OperationCheck) { OPCHECK(uint16_t, 16); }
-TEST_P(Loop8Test6Param_lbd, OperationCheck) { OPCHECK(uint8_t, 8); }
-
-#define VALCHECK(a, b) \
- ACMRandom rnd(ACMRandom::DeterministicSeed()); \
- const int count_test_block = number_of_iterations; \
- DECLARE_ALIGNED(b, a, s[kNumCoeffs]); \
- DECLARE_ALIGNED(b, a, ref_s[kNumCoeffs]); \
- int err_count_total = 0; \
- int first_failure = -1; \
- for (int i = 0; i < count_test_block; ++i) { \
- int err_count = 0; \
- uint8_t tmp = GetOuterThresh(&rnd); \
- DECLARE_ALIGNED(16, const uint8_t, \
- blimit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, \
- tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
- tmp = GetInnerThresh(&rnd); \
- DECLARE_ALIGNED(16, const uint8_t, \
- limit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, \
- tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
- tmp = GetHevThresh(&rnd); \
- DECLARE_ALIGNED(16, const uint8_t, \
- thresh[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, \
- tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
- int32_t p = kNumCoeffs / 32; \
- for (int j = 0; j < kNumCoeffs; ++j) { \
- s[j] = rnd.Rand16() & mask_; \
- ref_s[j] = s[j]; \
- } \
- call_filter(ref_s + 8 + p * 8, p, blimit, limit, thresh, bit_depth_, \
- ref_loopfilter_op_); \
- ASM_REGISTER_STATE_CHECK(call_filter(s + 8 + p * 8, p, blimit, limit, \
- thresh, bit_depth_, loopfilter_op_)); \
- for (int j = 0; j < kNumCoeffs; ++j) { \
- err_count += ref_s[j] != s[j]; \
- } \
- if (err_count && !err_count_total) { \
- first_failure = i; \
- } \
- err_count_total += err_count; \
- } \
- EXPECT_EQ(0, err_count_total) \
- << "Error: Loop8Test6Param, C output doesn't match SIMD " \
- "loopfilter output. " \
- << "First failed at test case " << first_failure;
-
-TEST_P(Loop8Test6Param_hbd, ValueCheck) { VALCHECK(uint16_t, 16); }
-TEST_P(Loop8Test6Param_lbd, ValueCheck) { VALCHECK(uint8_t, 8); }
-
-#define SPEEDCHECK(a, b) \
- ACMRandom rnd(ACMRandom::DeterministicSeed()); \
- const int count_test_block = kSpeedTestNum; \
- const int32_t bd = bit_depth_; \
- DECLARE_ALIGNED(b, a, s[kNumCoeffs]); \
- uint8_t tmp = GetOuterThresh(&rnd); \
- DECLARE_ALIGNED(16, const uint8_t, \
- blimit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, \
- tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
- tmp = GetInnerThresh(&rnd); \
- DECLARE_ALIGNED(16, const uint8_t, \
- limit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, \
- tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
- tmp = GetHevThresh(&rnd); \
- DECLARE_ALIGNED(16, const uint8_t, \
- thresh[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, \
- tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
- int32_t p = kNumCoeffs / 32; \
- for (int j = 0; j < kNumCoeffs; ++j) { \
- s[j] = rnd.Rand16() & mask_; \
- } \
- for (int i = 0; i < count_test_block; ++i) { \
- call_filter(s + 8 + p * 8, p, blimit, limit, thresh, bd, loopfilter_op_); \
- }
-
-TEST_P(Loop8Test6Param_hbd, DISABLED_Speed) { SPEEDCHECK(uint16_t, 16); }
-TEST_P(Loop8Test6Param_lbd, DISABLED_Speed) { SPEEDCHECK(uint8_t, 8); }
-
-#define OPCHECKd(a, b) \
- ACMRandom rnd(ACMRandom::DeterministicSeed()); \
- const int count_test_block = number_of_iterations; \
- DECLARE_ALIGNED(b, a, s[kNumCoeffs]); \
- DECLARE_ALIGNED(b, a, ref_s[kNumCoeffs]); \
- int err_count_total = 0; \
- int first_failure = -1; \
- for (int i = 0; i < count_test_block; ++i) { \
- int err_count = 0; \
- uint8_t tmp = GetOuterThresh(&rnd); \
- DECLARE_ALIGNED(16, const uint8_t, \
- blimit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, \
- tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
- tmp = GetInnerThresh(&rnd); \
- DECLARE_ALIGNED(16, const uint8_t, \
- limit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, \
- tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
- tmp = GetHevThresh(&rnd); \
- DECLARE_ALIGNED(16, const uint8_t, \
- thresh0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, \
- tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
- tmp = GetOuterThresh(&rnd); \
- DECLARE_ALIGNED(16, const uint8_t, \
- blimit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, \
- tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
- tmp = GetInnerThresh(&rnd); \
- DECLARE_ALIGNED(16, const uint8_t, \
- limit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, \
- tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
- tmp = GetHevThresh(&rnd); \
- DECLARE_ALIGNED(16, const uint8_t, \
- thresh1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, \
- tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
- int32_t p = kNumCoeffs / 32; \
- const uint8_t limit = *limit0 < *limit1 ? *limit0 : *limit1; \
- InitInput<a, b>(s, ref_s, &rnd, limit, mask_, p, i); \
- call_dualfilter(ref_s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1, \
- limit1, thresh1, bit_depth_, ref_loopfilter_op_); \
- ASM_REGISTER_STATE_CHECK( \
- call_dualfilter(s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1, \
- limit1, thresh1, bit_depth_, loopfilter_op_)); \
- for (int j = 0; j < kNumCoeffs; ++j) { \
- err_count += ref_s[j] != s[j]; \
- } \
- if (err_count && !err_count_total) { \
- first_failure = i; \
- } \
- err_count_total += err_count; \
- } \
- EXPECT_EQ(0, err_count_total) \
- << "Error: Loop8Test9Param, C output doesn't match SIMD " \
- "loopfilter output. " \
- << "First failed at test case " << first_failure;
-
-TEST_P(Loop8Test9Param_hbd, OperationCheck) { OPCHECKd(uint16_t, 16); }
-TEST_P(Loop8Test9Param_lbd, OperationCheck) { OPCHECKd(uint8_t, 8); }
-
-#define VALCHECKd(a, b) \
- ACMRandom rnd(ACMRandom::DeterministicSeed()); \
- const int count_test_block = number_of_iterations; \
- DECLARE_ALIGNED(b, a, s[kNumCoeffs]); \
- DECLARE_ALIGNED(b, a, ref_s[kNumCoeffs]); \
- int err_count_total = 0; \
- int first_failure = -1; \
- for (int i = 0; i < count_test_block; ++i) { \
- int err_count = 0; \
- uint8_t tmp = GetOuterThresh(&rnd); \
- DECLARE_ALIGNED(16, const uint8_t, \
- blimit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, \
- tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
- tmp = GetInnerThresh(&rnd); \
- DECLARE_ALIGNED(16, const uint8_t, \
- limit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, \
- tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
- tmp = GetHevThresh(&rnd); \
- DECLARE_ALIGNED(16, const uint8_t, \
- thresh0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, \
- tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
- tmp = GetOuterThresh(&rnd); \
- DECLARE_ALIGNED(16, const uint8_t, \
- blimit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, \
- tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
- tmp = GetInnerThresh(&rnd); \
- DECLARE_ALIGNED(16, const uint8_t, \
- limit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, \
- tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
- tmp = GetHevThresh(&rnd); \
- DECLARE_ALIGNED(16, const uint8_t, \
- thresh1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, \
- tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
- int32_t p = kNumCoeffs / 32; \
- for (int j = 0; j < kNumCoeffs; ++j) { \
- s[j] = rnd.Rand16() & mask_; \
- ref_s[j] = s[j]; \
- } \
- call_dualfilter(ref_s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1, \
- limit1, thresh1, bit_depth_, ref_loopfilter_op_); \
- ASM_REGISTER_STATE_CHECK( \
- call_dualfilter(s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1, \
- limit1, thresh1, bit_depth_, loopfilter_op_)); \
- for (int j = 0; j < kNumCoeffs; ++j) { \
- err_count += ref_s[j] != s[j]; \
- } \
- if (err_count && !err_count_total) { \
- first_failure = i; \
- } \
- err_count_total += err_count; \
- } \
- EXPECT_EQ(0, err_count_total) \
- << "Error: Loop8Test9Param, C output doesn't match SIMD " \
- "loopfilter output. " \
- << "First failed at test case " << first_failure;
-
-TEST_P(Loop8Test9Param_hbd, ValueCheck) { VALCHECKd(uint16_t, 16); }
-TEST_P(Loop8Test9Param_lbd, ValueCheck) { VALCHECKd(uint8_t, 8); }
-
-#define SPEEDCHECKd(a, b) \
- ACMRandom rnd(ACMRandom::DeterministicSeed()); \
- const int count_test_block = kSpeedTestNum; \
- DECLARE_ALIGNED(b, a, s[kNumCoeffs]); \
- uint8_t tmp = GetOuterThresh(&rnd); \
- DECLARE_ALIGNED(16, const uint8_t, \
- blimit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, \
- tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
- tmp = GetInnerThresh(&rnd); \
- DECLARE_ALIGNED(16, const uint8_t, \
- limit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, \
- tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
- tmp = GetHevThresh(&rnd); \
- DECLARE_ALIGNED(16, const uint8_t, \
- thresh0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, \
- tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
- tmp = GetOuterThresh(&rnd); \
- DECLARE_ALIGNED(16, const uint8_t, \
- blimit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, \
- tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
- tmp = GetInnerThresh(&rnd); \
- DECLARE_ALIGNED(16, const uint8_t, \
- limit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, \
- tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
- tmp = GetHevThresh(&rnd); \
- DECLARE_ALIGNED(16, const uint8_t, \
- thresh1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, \
- tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; \
- int32_t p = kNumCoeffs / 32; \
- for (int j = 0; j < kNumCoeffs; ++j) { \
- s[j] = rnd.Rand16() & mask_; \
- } \
- for (int i = 0; i < count_test_block; ++i) { \
- call_dualfilter(s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1, \
- limit1, thresh1, bit_depth_, loopfilter_op_); \
- }
-
-TEST_P(Loop8Test9Param_hbd, DISABLED_Speed) { SPEEDCHECKd(uint16_t, 16); }
-TEST_P(Loop8Test9Param_lbd, DISABLED_Speed) { SPEEDCHECKd(uint8_t, 8); }
-
-using ::testing::make_tuple;
-
-#if HAVE_SSE2
-
-const hbdloop_param_t kHbdLoop8Test6[] = {
- make_tuple(&aom_highbd_lpf_horizontal_4_sse2, &aom_highbd_lpf_horizontal_4_c,
- 8),
- make_tuple(&aom_highbd_lpf_vertical_4_sse2, &aom_highbd_lpf_vertical_4_c, 8),
- make_tuple(&aom_highbd_lpf_horizontal_6_sse2, &aom_highbd_lpf_horizontal_6_c,
- 8),
- make_tuple(&aom_highbd_lpf_horizontal_8_sse2, &aom_highbd_lpf_horizontal_8_c,
- 8),
- make_tuple(&aom_highbd_lpf_horizontal_14_sse2,
- &aom_highbd_lpf_horizontal_14_c, 8),
- make_tuple(&aom_highbd_lpf_vertical_6_sse2, &aom_highbd_lpf_vertical_6_c, 8),
- make_tuple(&aom_highbd_lpf_vertical_8_sse2, &aom_highbd_lpf_vertical_8_c, 8),
-
- make_tuple(&aom_highbd_lpf_vertical_14_sse2, &aom_highbd_lpf_vertical_14_c,
- 8),
- make_tuple(&aom_highbd_lpf_horizontal_4_sse2, &aom_highbd_lpf_horizontal_4_c,
- 10),
- make_tuple(&aom_highbd_lpf_vertical_4_sse2, &aom_highbd_lpf_vertical_4_c, 10),
- make_tuple(&aom_highbd_lpf_horizontal_6_sse2, &aom_highbd_lpf_horizontal_6_c,
- 10),
- make_tuple(&aom_highbd_lpf_horizontal_8_sse2, &aom_highbd_lpf_horizontal_8_c,
- 10),
- make_tuple(&aom_highbd_lpf_horizontal_14_sse2,
- &aom_highbd_lpf_horizontal_14_c, 10),
- make_tuple(&aom_highbd_lpf_vertical_6_sse2, &aom_highbd_lpf_vertical_6_c, 10),
- make_tuple(&aom_highbd_lpf_vertical_8_sse2, &aom_highbd_lpf_vertical_8_c, 10),
- make_tuple(&aom_highbd_lpf_vertical_14_sse2, &aom_highbd_lpf_vertical_14_c,
- 10),
- make_tuple(&aom_highbd_lpf_horizontal_4_sse2, &aom_highbd_lpf_horizontal_4_c,
- 12),
- make_tuple(&aom_highbd_lpf_vertical_4_sse2, &aom_highbd_lpf_vertical_4_c, 12),
- make_tuple(&aom_highbd_lpf_horizontal_6_sse2, &aom_highbd_lpf_horizontal_6_c,
- 12),
- make_tuple(&aom_highbd_lpf_horizontal_8_sse2, &aom_highbd_lpf_horizontal_8_c,
- 12),
- make_tuple(&aom_highbd_lpf_horizontal_14_sse2,
- &aom_highbd_lpf_horizontal_14_c, 12),
- make_tuple(&aom_highbd_lpf_vertical_14_sse2, &aom_highbd_lpf_vertical_14_c,
- 12),
- make_tuple(&aom_highbd_lpf_vertical_6_sse2, &aom_highbd_lpf_vertical_6_c, 12),
- make_tuple(&aom_highbd_lpf_vertical_8_sse2, &aom_highbd_lpf_vertical_8_c, 12)
-};
-
-INSTANTIATE_TEST_CASE_P(SSE2, Loop8Test6Param_hbd,
- ::testing::ValuesIn(kHbdLoop8Test6));
-
-const loop_param_t kLoop8Test6[] = {
- make_tuple(&aom_lpf_horizontal_4_sse2, &aom_lpf_horizontal_4_c, 8),
- make_tuple(&aom_lpf_horizontal_8_sse2, &aom_lpf_horizontal_8_c, 8),
- make_tuple(&aom_lpf_horizontal_6_sse2, &aom_lpf_horizontal_6_c, 8),
- make_tuple(&aom_lpf_vertical_6_sse2, &aom_lpf_vertical_6_c, 8),
- make_tuple(&aom_lpf_horizontal_14_sse2, &aom_lpf_horizontal_14_c, 8),
- make_tuple(&aom_lpf_vertical_4_sse2, &aom_lpf_vertical_4_c, 8),
- make_tuple(&aom_lpf_vertical_8_sse2, &aom_lpf_vertical_8_c, 8),
- make_tuple(&aom_lpf_vertical_14_sse2, &aom_lpf_vertical_14_c, 8),
-};
-
-INSTANTIATE_TEST_CASE_P(SSE2, Loop8Test6Param_lbd,
- ::testing::ValuesIn(kLoop8Test6));
-
-const dual_loop_param_t kLoop8Test9[] = {
- make_tuple(&aom_lpf_horizontal_4_dual_sse2, &aom_lpf_horizontal_4_dual_c, 8),
- make_tuple(&aom_lpf_vertical_4_dual_sse2, &aom_lpf_vertical_4_dual_c, 8),
- make_tuple(&aom_lpf_horizontal_6_dual_sse2, &aom_lpf_horizontal_6_dual_c, 8),
- make_tuple(&aom_lpf_vertical_6_dual_sse2, &aom_lpf_vertical_6_dual_c, 8),
- make_tuple(&aom_lpf_horizontal_8_dual_sse2, &aom_lpf_horizontal_8_dual_c, 8),
- make_tuple(&aom_lpf_vertical_8_dual_sse2, &aom_lpf_vertical_8_dual_c, 8),
- make_tuple(&aom_lpf_horizontal_14_dual_sse2, &aom_lpf_horizontal_14_dual_c,
- 8),
- make_tuple(&aom_lpf_vertical_14_dual_sse2, &aom_lpf_vertical_14_dual_c, 8)
-};
-
-INSTANTIATE_TEST_CASE_P(SSE2, Loop8Test9Param_lbd,
- ::testing::ValuesIn(kLoop8Test9));
-
-#endif // HAVE_SSE2
-
-#if HAVE_SSE2
-const hbddual_loop_param_t kHbdLoop8Test9[] = {
- make_tuple(&aom_highbd_lpf_horizontal_4_dual_sse2,
- &aom_highbd_lpf_horizontal_4_dual_c, 8),
- make_tuple(&aom_highbd_lpf_horizontal_6_dual_sse2,
- &aom_highbd_lpf_horizontal_6_dual_c, 8),
- make_tuple(&aom_highbd_lpf_horizontal_8_dual_sse2,
- &aom_highbd_lpf_horizontal_8_dual_c, 8),
- make_tuple(&aom_highbd_lpf_horizontal_14_dual_sse2,
- &aom_highbd_lpf_horizontal_14_dual_c, 8),
- make_tuple(&aom_highbd_lpf_vertical_4_dual_sse2,
- &aom_highbd_lpf_vertical_4_dual_c, 8),
- make_tuple(&aom_highbd_lpf_vertical_6_dual_sse2,
- &aom_highbd_lpf_vertical_6_dual_c, 8),
- make_tuple(&aom_highbd_lpf_vertical_8_dual_sse2,
- &aom_highbd_lpf_vertical_8_dual_c, 8),
- make_tuple(&aom_highbd_lpf_vertical_14_dual_sse2,
- &aom_highbd_lpf_vertical_14_dual_c, 8),
- make_tuple(&aom_highbd_lpf_horizontal_4_dual_sse2,
- &aom_highbd_lpf_horizontal_4_dual_c, 10),
- make_tuple(&aom_highbd_lpf_horizontal_6_dual_sse2,
- &aom_highbd_lpf_horizontal_6_dual_c, 10),
- make_tuple(&aom_highbd_lpf_horizontal_8_dual_sse2,
- &aom_highbd_lpf_horizontal_8_dual_c, 10),
- make_tuple(&aom_highbd_lpf_horizontal_14_dual_sse2,
- &aom_highbd_lpf_horizontal_14_dual_c, 10),
- make_tuple(&aom_highbd_lpf_vertical_4_dual_sse2,
- &aom_highbd_lpf_vertical_4_dual_c, 10),
- make_tuple(&aom_highbd_lpf_vertical_6_dual_sse2,
- &aom_highbd_lpf_vertical_6_dual_c, 10),
- make_tuple(&aom_highbd_lpf_vertical_8_dual_sse2,
- &aom_highbd_lpf_vertical_8_dual_c, 10),
- make_tuple(&aom_highbd_lpf_vertical_14_dual_sse2,
- &aom_highbd_lpf_vertical_14_dual_c, 10),
- make_tuple(&aom_highbd_lpf_horizontal_4_dual_sse2,
- &aom_highbd_lpf_horizontal_4_dual_c, 12),
- make_tuple(&aom_highbd_lpf_horizontal_6_dual_sse2,
- &aom_highbd_lpf_horizontal_6_dual_c, 12),
- make_tuple(&aom_highbd_lpf_horizontal_8_dual_sse2,
- &aom_highbd_lpf_horizontal_8_dual_c, 12),
- make_tuple(&aom_highbd_lpf_horizontal_14_dual_sse2,
- &aom_highbd_lpf_horizontal_14_dual_c, 12),
- make_tuple(&aom_highbd_lpf_vertical_4_dual_sse2,
- &aom_highbd_lpf_vertical_4_dual_c, 12),
- make_tuple(&aom_highbd_lpf_vertical_6_dual_sse2,
- &aom_highbd_lpf_vertical_6_dual_c, 12),
- make_tuple(&aom_highbd_lpf_vertical_8_dual_sse2,
- &aom_highbd_lpf_vertical_8_dual_c, 12),
- make_tuple(&aom_highbd_lpf_vertical_14_dual_sse2,
- &aom_highbd_lpf_vertical_14_dual_c, 12),
-};
-
-INSTANTIATE_TEST_CASE_P(SSE2, Loop8Test9Param_hbd,
- ::testing::ValuesIn(kHbdLoop8Test9));
-
-#endif // HAVE_SSE2
-
-#if HAVE_NEON
-const loop_param_t kLoop8Test6[] = {
- make_tuple(&aom_lpf_vertical_14_neon, &aom_lpf_vertical_14_c, 8),
- make_tuple(&aom_lpf_vertical_8_neon, &aom_lpf_vertical_8_c, 8),
- make_tuple(&aom_lpf_vertical_6_neon, &aom_lpf_vertical_6_c, 8),
- make_tuple(&aom_lpf_vertical_4_neon, &aom_lpf_vertical_4_c, 8),
- make_tuple(&aom_lpf_horizontal_14_neon, &aom_lpf_horizontal_14_c, 8),
- make_tuple(&aom_lpf_horizontal_8_neon, &aom_lpf_horizontal_8_c, 8),
- make_tuple(&aom_lpf_horizontal_6_neon, &aom_lpf_horizontal_6_c, 8),
- make_tuple(&aom_lpf_horizontal_4_neon, &aom_lpf_horizontal_4_c, 8)
-};
-
-INSTANTIATE_TEST_CASE_P(NEON, Loop8Test6Param_lbd,
- ::testing::ValuesIn(kLoop8Test6));
-#endif // HAVE_NEON
-
-#if HAVE_AVX2
-const hbddual_loop_param_t kHbdLoop8Test9Avx2[] = {
- make_tuple(&aom_highbd_lpf_horizontal_4_dual_avx2,
- &aom_highbd_lpf_horizontal_4_dual_c, 8),
- make_tuple(&aom_highbd_lpf_horizontal_4_dual_avx2,
- &aom_highbd_lpf_horizontal_4_dual_c, 10),
- make_tuple(&aom_highbd_lpf_horizontal_4_dual_avx2,
- &aom_highbd_lpf_horizontal_4_dual_c, 12),
- make_tuple(&aom_highbd_lpf_horizontal_8_dual_avx2,
- &aom_highbd_lpf_horizontal_8_dual_c, 8),
- make_tuple(&aom_highbd_lpf_horizontal_8_dual_avx2,
- &aom_highbd_lpf_horizontal_8_dual_c, 10),
- make_tuple(&aom_highbd_lpf_horizontal_8_dual_avx2,
- &aom_highbd_lpf_horizontal_8_dual_c, 12),
- make_tuple(&aom_highbd_lpf_vertical_4_dual_avx2,
- &aom_highbd_lpf_vertical_4_dual_c, 8),
- make_tuple(&aom_highbd_lpf_vertical_4_dual_avx2,
- &aom_highbd_lpf_vertical_4_dual_c, 10),
- make_tuple(&aom_highbd_lpf_vertical_4_dual_avx2,
- &aom_highbd_lpf_vertical_4_dual_c, 12),
- make_tuple(&aom_highbd_lpf_vertical_8_dual_avx2,
- &aom_highbd_lpf_vertical_8_dual_c, 8),
- make_tuple(&aom_highbd_lpf_vertical_8_dual_avx2,
- &aom_highbd_lpf_vertical_8_dual_c, 10),
- make_tuple(&aom_highbd_lpf_vertical_8_dual_avx2,
- &aom_highbd_lpf_vertical_8_dual_c, 12),
-};
-
-INSTANTIATE_TEST_CASE_P(AVX2, Loop8Test9Param_hbd,
- ::testing::ValuesIn(kHbdLoop8Test9Avx2));
-#endif
-} // namespace
diff --git a/third_party/aom/test/masked_sad_test.cc b/third_party/aom/test/masked_sad_test.cc
deleted file mode 100644
index 311f1877d..000000000
--- a/third_party/aom/test/masked_sad_test.cc
+++ /dev/null
@@ -1,342 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#include <math.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom/aom_integer.h"
-
-using libaom_test::ACMRandom;
-
-namespace {
-const int number_of_iterations = 200;
-
-typedef unsigned int (*MaskedSADFunc)(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- const uint8_t *second_pred,
- const uint8_t *msk, int msk_stride,
- int invert_mask);
-typedef ::testing::tuple<MaskedSADFunc, MaskedSADFunc> MaskedSADParam;
-
-class MaskedSADTest : public ::testing::TestWithParam<MaskedSADParam> {
- public:
- virtual ~MaskedSADTest() {}
- virtual void SetUp() {
- maskedSAD_op_ = GET_PARAM(0);
- ref_maskedSAD_op_ = GET_PARAM(1);
- }
-
- virtual void TearDown() { libaom_test::ClearSystemState(); }
- void runMaskedSADTest(int run_times);
-
- protected:
- MaskedSADFunc maskedSAD_op_;
- MaskedSADFunc ref_maskedSAD_op_;
-};
-void MaskedSADTest::runMaskedSADTest(int run_times) {
- unsigned int ref_ret = 0, ret = 1;
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- DECLARE_ALIGNED(16, uint8_t, src_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
- DECLARE_ALIGNED(16, uint8_t, ref_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
- DECLARE_ALIGNED(16, uint8_t, second_pred_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
- DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
- int err_count = 0;
- int first_failure = -1;
- int src_stride = MAX_SB_SIZE;
- int ref_stride = MAX_SB_SIZE;
- int msk_stride = MAX_SB_SIZE;
- const int iters = run_times == 1 ? number_of_iterations : 1;
- for (int i = 0; i < iters; ++i) {
- for (int j = 0; j < MAX_SB_SIZE * MAX_SB_SIZE; j++) {
- src_ptr[j] = rnd.Rand8();
- ref_ptr[j] = rnd.Rand8();
- second_pred_ptr[j] = rnd.Rand8();
- msk_ptr[j] = ((rnd.Rand8() & 0x7f) > 64) ? rnd.Rand8() & 0x3f : 64;
- assert(msk_ptr[j] <= 64);
- }
-
- for (int invert_mask = 0; invert_mask < 2; ++invert_mask) {
- aom_usec_timer timer;
- aom_usec_timer_start(&timer);
- for (int repeat = 0; repeat < run_times; ++repeat) {
- ref_ret = ref_maskedSAD_op_(src_ptr, src_stride, ref_ptr, ref_stride,
- second_pred_ptr, msk_ptr, msk_stride,
- invert_mask);
- }
- aom_usec_timer_mark(&timer);
- const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
- aom_usec_timer_start(&timer);
- if (run_times == 1) {
- ASM_REGISTER_STATE_CHECK(ret = maskedSAD_op_(src_ptr, src_stride,
- ref_ptr, ref_stride,
- second_pred_ptr, msk_ptr,
- msk_stride, invert_mask));
- } else {
- for (int repeat = 0; repeat < run_times; ++repeat) {
- ret =
- maskedSAD_op_(src_ptr, src_stride, ref_ptr, ref_stride,
- second_pred_ptr, msk_ptr, msk_stride, invert_mask);
- }
- }
- aom_usec_timer_mark(&timer);
- const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
- if (run_times > 10) {
- printf("%7.2f/%7.2fns", time1, time2);
- printf("(%3.2f)\n", time1 / time2);
- }
- if (ret != ref_ret) {
- err_count++;
- if (first_failure == -1) first_failure = i;
- }
- }
- }
- EXPECT_EQ(0, err_count) << "Error: Masked SAD Test, output doesn't match. "
- << "First failed at test case " << first_failure;
-}
-
-TEST_P(MaskedSADTest, OperationCheck) { runMaskedSADTest(1); }
-
-TEST_P(MaskedSADTest, DISABLED_Speed) { runMaskedSADTest(2000000); }
-
-typedef unsigned int (*HighbdMaskedSADFunc)(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- const uint8_t *second_pred,
- const uint8_t *msk, int msk_stride,
- int invert_mask);
-typedef ::testing::tuple<HighbdMaskedSADFunc, HighbdMaskedSADFunc>
- HighbdMaskedSADParam;
-
-class HighbdMaskedSADTest
- : public ::testing::TestWithParam<HighbdMaskedSADParam> {
- public:
- virtual ~HighbdMaskedSADTest() {}
- virtual void SetUp() {
- maskedSAD_op_ = GET_PARAM(0);
- ref_maskedSAD_op_ = GET_PARAM(1);
- }
-
- virtual void TearDown() { libaom_test::ClearSystemState(); }
- void runHighbdMaskedSADTest(int run_times);
-
- protected:
- HighbdMaskedSADFunc maskedSAD_op_;
- HighbdMaskedSADFunc ref_maskedSAD_op_;
-};
-void HighbdMaskedSADTest::runHighbdMaskedSADTest(int run_times) {
- unsigned int ref_ret = 0, ret = 1;
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- DECLARE_ALIGNED(16, uint16_t, src_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
- DECLARE_ALIGNED(16, uint16_t, ref_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
- DECLARE_ALIGNED(16, uint16_t, second_pred_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
- DECLARE_ALIGNED(16, uint8_t, msk_ptr[MAX_SB_SIZE * MAX_SB_SIZE]);
- uint8_t *src8_ptr = CONVERT_TO_BYTEPTR(src_ptr);
- uint8_t *ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr);
- uint8_t *second_pred8_ptr = CONVERT_TO_BYTEPTR(second_pred_ptr);
- int err_count = 0;
- int first_failure = -1;
- int src_stride = MAX_SB_SIZE;
- int ref_stride = MAX_SB_SIZE;
- int msk_stride = MAX_SB_SIZE;
- const int iters = run_times == 1 ? number_of_iterations : 1;
- for (int i = 0; i < iters; ++i) {
- for (int j = 0; j < MAX_SB_SIZE * MAX_SB_SIZE; j++) {
- src_ptr[j] = rnd.Rand16() & 0xfff;
- ref_ptr[j] = rnd.Rand16() & 0xfff;
- second_pred_ptr[j] = rnd.Rand16() & 0xfff;
- msk_ptr[j] = ((rnd.Rand8() & 0x7f) > 64) ? rnd.Rand8() & 0x3f : 64;
- }
-
- for (int invert_mask = 0; invert_mask < 2; ++invert_mask) {
- aom_usec_timer timer;
- aom_usec_timer_start(&timer);
- for (int repeat = 0; repeat < run_times; ++repeat) {
- ref_ret = ref_maskedSAD_op_(src8_ptr, src_stride, ref8_ptr, ref_stride,
- second_pred8_ptr, msk_ptr, msk_stride,
- invert_mask);
- }
- aom_usec_timer_mark(&timer);
- const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
- aom_usec_timer_start(&timer);
- if (run_times == 1) {
- ASM_REGISTER_STATE_CHECK(ret = maskedSAD_op_(src8_ptr, src_stride,
- ref8_ptr, ref_stride,
- second_pred8_ptr, msk_ptr,
- msk_stride, invert_mask));
- } else {
- for (int repeat = 0; repeat < run_times; ++repeat) {
- ret =
- maskedSAD_op_(src8_ptr, src_stride, ref8_ptr, ref_stride,
- second_pred8_ptr, msk_ptr, msk_stride, invert_mask);
- }
- }
- aom_usec_timer_mark(&timer);
- const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
- if (run_times > 10) {
- printf("%7.2f/%7.2fns", time1, time2);
- printf("(%3.2f)\n", time1 / time2);
- }
- if (ret != ref_ret) {
- err_count++;
- if (first_failure == -1) first_failure = i;
- }
- }
- }
- EXPECT_EQ(0, err_count)
- << "Error: High BD Masked SAD Test, output doesn't match. "
- << "First failed at test case " << first_failure;
-}
-
-TEST_P(HighbdMaskedSADTest, OperationCheck) { runHighbdMaskedSADTest(1); }
-
-TEST_P(HighbdMaskedSADTest, DISABLED_Speed) { runHighbdMaskedSADTest(1000000); }
-
-using ::testing::make_tuple;
-
-#if HAVE_SSSE3
-const MaskedSADParam msad_test[] = {
- make_tuple(&aom_masked_sad4x4_ssse3, &aom_masked_sad4x4_c),
- make_tuple(&aom_masked_sad4x8_ssse3, &aom_masked_sad4x8_c),
- make_tuple(&aom_masked_sad8x4_ssse3, &aom_masked_sad8x4_c),
- make_tuple(&aom_masked_sad8x8_ssse3, &aom_masked_sad8x8_c),
- make_tuple(&aom_masked_sad8x16_ssse3, &aom_masked_sad8x16_c),
- make_tuple(&aom_masked_sad16x8_ssse3, &aom_masked_sad16x8_c),
- make_tuple(&aom_masked_sad16x16_ssse3, &aom_masked_sad16x16_c),
- make_tuple(&aom_masked_sad16x32_ssse3, &aom_masked_sad16x32_c),
- make_tuple(&aom_masked_sad32x16_ssse3, &aom_masked_sad32x16_c),
- make_tuple(&aom_masked_sad32x32_ssse3, &aom_masked_sad32x32_c),
- make_tuple(&aom_masked_sad32x64_ssse3, &aom_masked_sad32x64_c),
- make_tuple(&aom_masked_sad64x32_ssse3, &aom_masked_sad64x32_c),
- make_tuple(&aom_masked_sad64x64_ssse3, &aom_masked_sad64x64_c),
- make_tuple(&aom_masked_sad64x128_ssse3, &aom_masked_sad64x128_c),
- make_tuple(&aom_masked_sad128x64_ssse3, &aom_masked_sad128x64_c),
- make_tuple(&aom_masked_sad128x128_ssse3, &aom_masked_sad128x128_c),
- make_tuple(&aom_masked_sad4x16_ssse3, &aom_masked_sad4x16_c),
- make_tuple(&aom_masked_sad16x4_ssse3, &aom_masked_sad16x4_c),
- make_tuple(&aom_masked_sad8x32_ssse3, &aom_masked_sad8x32_c),
- make_tuple(&aom_masked_sad32x8_ssse3, &aom_masked_sad32x8_c),
- make_tuple(&aom_masked_sad16x64_ssse3, &aom_masked_sad16x64_c),
- make_tuple(&aom_masked_sad64x16_ssse3, &aom_masked_sad64x16_c),
-};
-
-INSTANTIATE_TEST_CASE_P(SSSE3, MaskedSADTest, ::testing::ValuesIn(msad_test));
-
-const HighbdMaskedSADParam hbd_msad_test[] = {
- make_tuple(&aom_highbd_masked_sad4x4_ssse3, &aom_highbd_masked_sad4x4_c),
- make_tuple(&aom_highbd_masked_sad4x8_ssse3, &aom_highbd_masked_sad4x8_c),
- make_tuple(&aom_highbd_masked_sad8x4_ssse3, &aom_highbd_masked_sad8x4_c),
- make_tuple(&aom_highbd_masked_sad8x8_ssse3, &aom_highbd_masked_sad8x8_c),
- make_tuple(&aom_highbd_masked_sad8x16_ssse3, &aom_highbd_masked_sad8x16_c),
- make_tuple(&aom_highbd_masked_sad16x8_ssse3, &aom_highbd_masked_sad16x8_c),
- make_tuple(&aom_highbd_masked_sad16x16_ssse3, &aom_highbd_masked_sad16x16_c),
- make_tuple(&aom_highbd_masked_sad16x32_ssse3, &aom_highbd_masked_sad16x32_c),
- make_tuple(&aom_highbd_masked_sad32x16_ssse3, &aom_highbd_masked_sad32x16_c),
- make_tuple(&aom_highbd_masked_sad32x32_ssse3, &aom_highbd_masked_sad32x32_c),
- make_tuple(&aom_highbd_masked_sad32x64_ssse3, &aom_highbd_masked_sad32x64_c),
- make_tuple(&aom_highbd_masked_sad64x32_ssse3, &aom_highbd_masked_sad64x32_c),
- make_tuple(&aom_highbd_masked_sad64x64_ssse3, &aom_highbd_masked_sad64x64_c),
- make_tuple(&aom_highbd_masked_sad64x128_ssse3,
- &aom_highbd_masked_sad64x128_c),
- make_tuple(&aom_highbd_masked_sad128x64_ssse3,
- &aom_highbd_masked_sad128x64_c),
- make_tuple(&aom_highbd_masked_sad128x128_ssse3,
- &aom_highbd_masked_sad128x128_c),
- make_tuple(&aom_highbd_masked_sad4x16_ssse3, &aom_highbd_masked_sad4x16_c),
- make_tuple(&aom_highbd_masked_sad16x4_ssse3, &aom_highbd_masked_sad16x4_c),
- make_tuple(&aom_highbd_masked_sad8x32_ssse3, &aom_highbd_masked_sad8x32_c),
- make_tuple(&aom_highbd_masked_sad32x8_ssse3, &aom_highbd_masked_sad32x8_c),
- make_tuple(&aom_highbd_masked_sad16x64_ssse3, &aom_highbd_masked_sad16x64_c),
- make_tuple(&aom_highbd_masked_sad64x16_ssse3, &aom_highbd_masked_sad64x16_c),
-};
-
-INSTANTIATE_TEST_CASE_P(SSSE3, HighbdMaskedSADTest,
- ::testing::ValuesIn(hbd_msad_test));
-#endif // HAVE_SSSE3
-
-#if HAVE_AVX2
-const MaskedSADParam msad_avx2_test[] = {
- make_tuple(&aom_masked_sad4x4_avx2, &aom_masked_sad4x4_ssse3),
- make_tuple(&aom_masked_sad4x8_avx2, &aom_masked_sad4x8_ssse3),
- make_tuple(&aom_masked_sad8x4_avx2, &aom_masked_sad8x4_ssse3),
- make_tuple(&aom_masked_sad8x8_avx2, &aom_masked_sad8x8_ssse3),
- make_tuple(&aom_masked_sad8x16_avx2, &aom_masked_sad8x16_ssse3),
- make_tuple(&aom_masked_sad16x8_avx2, &aom_masked_sad16x8_ssse3),
- make_tuple(&aom_masked_sad16x16_avx2, &aom_masked_sad16x16_ssse3),
- make_tuple(&aom_masked_sad16x32_avx2, &aom_masked_sad16x32_ssse3),
- make_tuple(&aom_masked_sad32x16_avx2, &aom_masked_sad32x16_ssse3),
- make_tuple(&aom_masked_sad32x32_avx2, &aom_masked_sad32x32_ssse3),
- make_tuple(&aom_masked_sad32x64_avx2, &aom_masked_sad32x64_ssse3),
- make_tuple(&aom_masked_sad64x32_avx2, &aom_masked_sad64x32_ssse3),
- make_tuple(&aom_masked_sad64x64_avx2, &aom_masked_sad64x64_ssse3),
- make_tuple(&aom_masked_sad64x128_avx2, &aom_masked_sad64x128_ssse3),
- make_tuple(&aom_masked_sad128x64_avx2, &aom_masked_sad128x64_ssse3),
- make_tuple(&aom_masked_sad128x128_avx2, &aom_masked_sad128x128_ssse3),
- make_tuple(&aom_masked_sad4x16_avx2, &aom_masked_sad4x16_ssse3),
- make_tuple(&aom_masked_sad16x4_avx2, &aom_masked_sad16x4_ssse3),
- make_tuple(&aom_masked_sad8x32_avx2, &aom_masked_sad8x32_ssse3),
- make_tuple(&aom_masked_sad32x8_avx2, &aom_masked_sad32x8_ssse3),
- make_tuple(&aom_masked_sad16x64_avx2, &aom_masked_sad16x64_ssse3),
- make_tuple(&aom_masked_sad64x16_avx2, &aom_masked_sad64x16_ssse3)
-};
-
-INSTANTIATE_TEST_CASE_P(AVX2, MaskedSADTest,
- ::testing::ValuesIn(msad_avx2_test));
-
-const HighbdMaskedSADParam hbd_msad_avx2_test[] = {
- make_tuple(&aom_highbd_masked_sad4x4_avx2, &aom_highbd_masked_sad4x4_ssse3),
- make_tuple(&aom_highbd_masked_sad4x8_avx2, &aom_highbd_masked_sad4x8_ssse3),
- make_tuple(&aom_highbd_masked_sad8x4_avx2, &aom_highbd_masked_sad8x4_ssse3),
- make_tuple(&aom_highbd_masked_sad8x8_avx2, &aom_highbd_masked_sad8x8_ssse3),
- make_tuple(&aom_highbd_masked_sad8x16_avx2, &aom_highbd_masked_sad8x16_ssse3),
- make_tuple(&aom_highbd_masked_sad16x8_avx2, &aom_highbd_masked_sad16x8_ssse3),
- make_tuple(&aom_highbd_masked_sad16x16_avx2,
- &aom_highbd_masked_sad16x16_ssse3),
- make_tuple(&aom_highbd_masked_sad16x32_avx2,
- &aom_highbd_masked_sad16x32_ssse3),
- make_tuple(&aom_highbd_masked_sad32x16_avx2,
- &aom_highbd_masked_sad32x16_ssse3),
- make_tuple(&aom_highbd_masked_sad32x32_avx2,
- &aom_highbd_masked_sad32x32_ssse3),
- make_tuple(&aom_highbd_masked_sad32x64_avx2,
- &aom_highbd_masked_sad32x64_ssse3),
- make_tuple(&aom_highbd_masked_sad64x32_avx2,
- &aom_highbd_masked_sad64x32_ssse3),
- make_tuple(&aom_highbd_masked_sad64x64_avx2,
- &aom_highbd_masked_sad64x64_ssse3),
- make_tuple(&aom_highbd_masked_sad64x128_avx2,
- &aom_highbd_masked_sad64x128_ssse3),
- make_tuple(&aom_highbd_masked_sad128x64_avx2,
- &aom_highbd_masked_sad128x64_ssse3),
- make_tuple(&aom_highbd_masked_sad128x128_avx2,
- &aom_highbd_masked_sad128x128_ssse3),
- make_tuple(&aom_highbd_masked_sad4x16_avx2, &aom_highbd_masked_sad4x16_ssse3),
- make_tuple(&aom_highbd_masked_sad16x4_avx2, &aom_highbd_masked_sad16x4_ssse3),
- make_tuple(&aom_highbd_masked_sad8x32_avx2, &aom_highbd_masked_sad8x32_ssse3),
- make_tuple(&aom_highbd_masked_sad32x8_avx2, &aom_highbd_masked_sad32x8_ssse3),
- make_tuple(&aom_highbd_masked_sad16x64_avx2,
- &aom_highbd_masked_sad16x64_ssse3),
- make_tuple(&aom_highbd_masked_sad64x16_avx2,
- &aom_highbd_masked_sad64x16_ssse3)
-};
-
-INSTANTIATE_TEST_CASE_P(AVX2, HighbdMaskedSADTest,
- ::testing::ValuesIn(hbd_msad_avx2_test));
-#endif // HAVE_AVX2
-
-} // namespace
diff --git a/third_party/aom/test/masked_variance_test.cc b/third_party/aom/test/masked_variance_test.cc
deleted file mode 100644
index 275b9feb6..000000000
--- a/third_party/aom/test/masked_variance_test.cc
+++ /dev/null
@@ -1,459 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <math.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom/aom_codec.h"
-#include "aom/aom_integer.h"
-#include "aom_dsp/aom_filter.h"
-#include "aom_mem/aom_mem.h"
-
-using libaom_test::ACMRandom;
-
-namespace {
-const int number_of_iterations = 200;
-
-typedef unsigned int (*MaskedSubPixelVarianceFunc)(
- const uint8_t *src, int src_stride, int xoffset, int yoffset,
- const uint8_t *ref, int ref_stride, const uint8_t *second_pred,
- const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse);
-
-typedef ::testing::tuple<MaskedSubPixelVarianceFunc, MaskedSubPixelVarianceFunc>
- MaskedSubPixelVarianceParam;
-
-class MaskedSubPixelVarianceTest
- : public ::testing::TestWithParam<MaskedSubPixelVarianceParam> {
- public:
- virtual ~MaskedSubPixelVarianceTest() {}
- virtual void SetUp() {
- opt_func_ = GET_PARAM(0);
- ref_func_ = GET_PARAM(1);
- }
-
- virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
- MaskedSubPixelVarianceFunc opt_func_;
- MaskedSubPixelVarianceFunc ref_func_;
-};
-
-TEST_P(MaskedSubPixelVarianceTest, OperationCheck) {
- unsigned int ref_ret, opt_ret;
- unsigned int ref_sse, opt_sse;
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- // Note: We pad out the input array to a multiple of 16 bytes wide, so that
- // consecutive rows keep the 16-byte alignment.
- DECLARE_ALIGNED(16, uint8_t, src_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16)]);
- DECLARE_ALIGNED(16, uint8_t, ref_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16)]);
- DECLARE_ALIGNED(16, uint8_t,
- second_pred_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16)]);
- DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16)]);
- int err_count = 0;
- int first_failure = -1;
- int src_stride = (MAX_SB_SIZE + 16);
- int ref_stride = (MAX_SB_SIZE + 16);
- int msk_stride = (MAX_SB_SIZE + 16);
- int xoffset;
- int yoffset;
-
- for (int i = 0; i < number_of_iterations; ++i) {
- int xoffsets[] = { 0, 4, rnd(BIL_SUBPEL_SHIFTS) };
- int yoffsets[] = { 0, 4, rnd(BIL_SUBPEL_SHIFTS) };
- for (int j = 0; j < (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16); j++) {
- src_ptr[j] = rnd.Rand8();
- ref_ptr[j] = rnd.Rand8();
- second_pred_ptr[j] = rnd.Rand8();
- msk_ptr[j] = rnd(65);
- }
- for (int k = 0; k < 3; k++) {
- for (int l = 0; l < 3; l++) {
- xoffset = xoffsets[k];
- yoffset = yoffsets[l];
- for (int invert_mask = 0; invert_mask < 2; ++invert_mask) {
- ref_ret = ref_func_(src_ptr, src_stride, xoffset, yoffset, ref_ptr,
- ref_stride, second_pred_ptr, msk_ptr, msk_stride,
- invert_mask, &ref_sse);
- ASM_REGISTER_STATE_CHECK(
- opt_ret = opt_func_(src_ptr, src_stride, xoffset, yoffset,
- ref_ptr, ref_stride, second_pred_ptr, msk_ptr,
- msk_stride, invert_mask, &opt_sse));
-
- if (opt_ret != ref_ret || opt_sse != ref_sse) {
- err_count++;
- if (first_failure == -1) first_failure = i;
- }
- }
- }
- }
- }
-
- EXPECT_EQ(0, err_count)
- << "Error: Masked Sub Pixel Variance Test OperationCheck,"
- << "C output doesn't match SSSE3 output. "
- << "First failed at test case " << first_failure;
-}
-
-TEST_P(MaskedSubPixelVarianceTest, ExtremeValues) {
- unsigned int ref_ret, opt_ret;
- unsigned int ref_sse, opt_sse;
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- DECLARE_ALIGNED(16, uint8_t, src_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16)]);
- DECLARE_ALIGNED(16, uint8_t, ref_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16)]);
- DECLARE_ALIGNED(16, uint8_t,
- second_pred_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16)]);
- DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16)]);
- int first_failure_x = -1;
- int first_failure_y = -1;
- int err_count = 0;
- int first_failure = -1;
- int src_stride = (MAX_SB_SIZE + 16);
- int ref_stride = (MAX_SB_SIZE + 16);
- int msk_stride = (MAX_SB_SIZE + 16);
-
- for (int xoffset = 0; xoffset < BIL_SUBPEL_SHIFTS; xoffset++) {
- for (int yoffset = 0; yoffset < BIL_SUBPEL_SHIFTS; yoffset++) {
- for (int i = 0; i < 16; ++i) {
- memset(src_ptr, (i & 0x1) ? 255 : 0,
- (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16));
- memset(ref_ptr, (i & 0x2) ? 255 : 0,
- (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16));
- memset(second_pred_ptr, (i & 0x4) ? 255 : 0,
- (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16));
- memset(msk_ptr, (i & 0x8) ? 64 : 0,
- (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16));
-
- for (int invert_mask = 0; invert_mask < 2; ++invert_mask) {
- ref_ret = ref_func_(src_ptr, src_stride, xoffset, yoffset, ref_ptr,
- ref_stride, second_pred_ptr, msk_ptr, msk_stride,
- invert_mask, &ref_sse);
- ASM_REGISTER_STATE_CHECK(
- opt_ret = opt_func_(src_ptr, src_stride, xoffset, yoffset,
- ref_ptr, ref_stride, second_pred_ptr, msk_ptr,
- msk_stride, invert_mask, &opt_sse));
-
- if (opt_ret != ref_ret || opt_sse != ref_sse) {
- err_count++;
- if (first_failure == -1) {
- first_failure = i;
- first_failure_x = xoffset;
- first_failure_y = yoffset;
- }
- }
- }
- }
- }
- }
-
- EXPECT_EQ(0, err_count) << "Error: Masked Variance Test ExtremeValues,"
- << "C output doesn't match SSSE3 output. "
- << "First failed at test case " << first_failure
- << " x_offset = " << first_failure_x
- << " y_offset = " << first_failure_y;
-}
-
-typedef ::testing::tuple<MaskedSubPixelVarianceFunc, MaskedSubPixelVarianceFunc,
- aom_bit_depth_t>
- HighbdMaskedSubPixelVarianceParam;
-
-class HighbdMaskedSubPixelVarianceTest
- : public ::testing::TestWithParam<HighbdMaskedSubPixelVarianceParam> {
- public:
- virtual ~HighbdMaskedSubPixelVarianceTest() {}
- virtual void SetUp() {
- opt_func_ = GET_PARAM(0);
- ref_func_ = GET_PARAM(1);
- bit_depth_ = GET_PARAM(2);
- }
-
- virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
- MaskedSubPixelVarianceFunc opt_func_;
- MaskedSubPixelVarianceFunc ref_func_;
- aom_bit_depth_t bit_depth_;
-};
-
-TEST_P(HighbdMaskedSubPixelVarianceTest, OperationCheck) {
- unsigned int ref_ret, opt_ret;
- unsigned int ref_sse, opt_sse;
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- DECLARE_ALIGNED(16, uint16_t, src_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8)]);
- DECLARE_ALIGNED(16, uint16_t, ref_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8)]);
- DECLARE_ALIGNED(16, uint16_t,
- second_pred_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8)]);
- DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8)]);
- uint8_t *src8_ptr = CONVERT_TO_BYTEPTR(src_ptr);
- uint8_t *ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr);
- uint8_t *second_pred8_ptr = CONVERT_TO_BYTEPTR(second_pred_ptr);
- int err_count = 0;
- int first_failure = -1;
- int first_failure_x = -1;
- int first_failure_y = -1;
- int src_stride = (MAX_SB_SIZE + 8);
- int ref_stride = (MAX_SB_SIZE + 8);
- int msk_stride = (MAX_SB_SIZE + 8);
- int xoffset, yoffset;
-
- for (int i = 0; i < number_of_iterations; ++i) {
- for (int j = 0; j < (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8); j++) {
- src_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1);
- ref_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1);
- second_pred_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1);
- msk_ptr[j] = rnd(65);
- }
- for (xoffset = 0; xoffset < BIL_SUBPEL_SHIFTS; xoffset++) {
- for (yoffset = 0; yoffset < BIL_SUBPEL_SHIFTS; yoffset++) {
- for (int invert_mask = 0; invert_mask < 2; ++invert_mask) {
- ref_ret = ref_func_(src8_ptr, src_stride, xoffset, yoffset, ref8_ptr,
- ref_stride, second_pred8_ptr, msk_ptr, msk_stride,
- invert_mask, &ref_sse);
- ASM_REGISTER_STATE_CHECK(
- opt_ret = opt_func_(src8_ptr, src_stride, xoffset, yoffset,
- ref8_ptr, ref_stride, second_pred8_ptr,
- msk_ptr, msk_stride, invert_mask, &opt_sse));
-
- if (opt_ret != ref_ret || opt_sse != ref_sse) {
- err_count++;
- if (first_failure == -1) {
- first_failure = i;
- first_failure_x = xoffset;
- first_failure_y = yoffset;
- }
- }
- }
- }
- }
- }
-
- EXPECT_EQ(0, err_count)
- << "Error: Masked Sub Pixel Variance Test OperationCheck,"
- << "C output doesn't match SSSE3 output. "
- << "First failed at test case " << first_failure
- << " x_offset = " << first_failure_x << " y_offset = " << first_failure_y;
-}
-
-TEST_P(HighbdMaskedSubPixelVarianceTest, ExtremeValues) {
- unsigned int ref_ret, opt_ret;
- unsigned int ref_sse, opt_sse;
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- DECLARE_ALIGNED(16, uint16_t, src_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8)]);
- DECLARE_ALIGNED(16, uint16_t, ref_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8)]);
- DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8)]);
- DECLARE_ALIGNED(16, uint16_t,
- second_pred_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8)]);
- uint8_t *src8_ptr = CONVERT_TO_BYTEPTR(src_ptr);
- uint8_t *ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr);
- uint8_t *second_pred8_ptr = CONVERT_TO_BYTEPTR(second_pred_ptr);
- int first_failure_x = -1;
- int first_failure_y = -1;
- int err_count = 0;
- int first_failure = -1;
- int src_stride = (MAX_SB_SIZE + 8);
- int ref_stride = (MAX_SB_SIZE + 8);
- int msk_stride = (MAX_SB_SIZE + 8);
-
- for (int xoffset = 0; xoffset < BIL_SUBPEL_SHIFTS; xoffset++) {
- for (int yoffset = 0; yoffset < BIL_SUBPEL_SHIFTS; yoffset++) {
- for (int i = 0; i < 16; ++i) {
- aom_memset16(src_ptr, (i & 0x1) ? ((1 << bit_depth_) - 1) : 0,
- (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8));
- aom_memset16(ref_ptr, (i & 0x2) ? ((1 << bit_depth_) - 1) : 0,
- (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8));
- aom_memset16(second_pred_ptr, (i & 0x4) ? ((1 << bit_depth_) - 1) : 0,
- (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8));
- memset(msk_ptr, (i & 0x8) ? 64 : 0,
- (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8));
-
- for (int invert_mask = 0; invert_mask < 2; ++invert_mask) {
- ref_ret = ref_func_(src8_ptr, src_stride, xoffset, yoffset, ref8_ptr,
- ref_stride, second_pred8_ptr, msk_ptr, msk_stride,
- invert_mask, &ref_sse);
- ASM_REGISTER_STATE_CHECK(
- opt_ret = opt_func_(src8_ptr, src_stride, xoffset, yoffset,
- ref8_ptr, ref_stride, second_pred8_ptr,
- msk_ptr, msk_stride, invert_mask, &opt_sse));
-
- if (opt_ret != ref_ret || opt_sse != ref_sse) {
- err_count++;
- if (first_failure == -1) {
- first_failure = i;
- first_failure_x = xoffset;
- first_failure_y = yoffset;
- }
- }
- }
- }
- }
- }
-
- EXPECT_EQ(0, err_count) << "Error: Masked Variance Test ExtremeValues,"
- << "C output doesn't match SSSE3 output. "
- << "First failed at test case " << first_failure
- << " x_offset = " << first_failure_x
- << " y_offset = " << first_failure_y;
-}
-
-using ::testing::make_tuple;
-
-#if HAVE_SSSE3
-
-const MaskedSubPixelVarianceParam sub_pel_var_test[] = {
- make_tuple(&aom_masked_sub_pixel_variance128x128_ssse3,
- &aom_masked_sub_pixel_variance128x128_c),
- make_tuple(&aom_masked_sub_pixel_variance128x64_ssse3,
- &aom_masked_sub_pixel_variance128x64_c),
- make_tuple(&aom_masked_sub_pixel_variance64x128_ssse3,
- &aom_masked_sub_pixel_variance64x128_c),
- make_tuple(&aom_masked_sub_pixel_variance64x64_ssse3,
- &aom_masked_sub_pixel_variance64x64_c),
- make_tuple(&aom_masked_sub_pixel_variance64x32_ssse3,
- &aom_masked_sub_pixel_variance64x32_c),
- make_tuple(&aom_masked_sub_pixel_variance32x64_ssse3,
- &aom_masked_sub_pixel_variance32x64_c),
- make_tuple(&aom_masked_sub_pixel_variance32x32_ssse3,
- &aom_masked_sub_pixel_variance32x32_c),
- make_tuple(&aom_masked_sub_pixel_variance32x16_ssse3,
- &aom_masked_sub_pixel_variance32x16_c),
- make_tuple(&aom_masked_sub_pixel_variance16x32_ssse3,
- &aom_masked_sub_pixel_variance16x32_c),
- make_tuple(&aom_masked_sub_pixel_variance16x16_ssse3,
- &aom_masked_sub_pixel_variance16x16_c),
- make_tuple(&aom_masked_sub_pixel_variance16x8_ssse3,
- &aom_masked_sub_pixel_variance16x8_c),
- make_tuple(&aom_masked_sub_pixel_variance8x16_ssse3,
- &aom_masked_sub_pixel_variance8x16_c),
- make_tuple(&aom_masked_sub_pixel_variance8x8_ssse3,
- &aom_masked_sub_pixel_variance8x8_c),
- make_tuple(&aom_masked_sub_pixel_variance8x4_ssse3,
- &aom_masked_sub_pixel_variance8x4_c),
- make_tuple(&aom_masked_sub_pixel_variance4x8_ssse3,
- &aom_masked_sub_pixel_variance4x8_c),
- make_tuple(&aom_masked_sub_pixel_variance4x4_ssse3,
- &aom_masked_sub_pixel_variance4x4_c)
-};
-
-INSTANTIATE_TEST_CASE_P(SSSE3_C_COMPARE, MaskedSubPixelVarianceTest,
- ::testing::ValuesIn(sub_pel_var_test));
-
-const HighbdMaskedSubPixelVarianceParam hbd_sub_pel_var_test[] = {
- make_tuple(&aom_highbd_8_masked_sub_pixel_variance128x128_ssse3,
- &aom_highbd_8_masked_sub_pixel_variance128x128_c, AOM_BITS_8),
- make_tuple(&aom_highbd_8_masked_sub_pixel_variance128x64_ssse3,
- &aom_highbd_8_masked_sub_pixel_variance128x64_c, AOM_BITS_8),
- make_tuple(&aom_highbd_8_masked_sub_pixel_variance64x128_ssse3,
- &aom_highbd_8_masked_sub_pixel_variance64x128_c, AOM_BITS_8),
- make_tuple(&aom_highbd_8_masked_sub_pixel_variance64x64_ssse3,
- &aom_highbd_8_masked_sub_pixel_variance64x64_c, AOM_BITS_8),
- make_tuple(&aom_highbd_8_masked_sub_pixel_variance64x32_ssse3,
- &aom_highbd_8_masked_sub_pixel_variance64x32_c, AOM_BITS_8),
- make_tuple(&aom_highbd_8_masked_sub_pixel_variance32x64_ssse3,
- &aom_highbd_8_masked_sub_pixel_variance32x64_c, AOM_BITS_8),
- make_tuple(&aom_highbd_8_masked_sub_pixel_variance32x32_ssse3,
- &aom_highbd_8_masked_sub_pixel_variance32x32_c, AOM_BITS_8),
- make_tuple(&aom_highbd_8_masked_sub_pixel_variance32x16_ssse3,
- &aom_highbd_8_masked_sub_pixel_variance32x16_c, AOM_BITS_8),
- make_tuple(&aom_highbd_8_masked_sub_pixel_variance16x32_ssse3,
- &aom_highbd_8_masked_sub_pixel_variance16x32_c, AOM_BITS_8),
- make_tuple(&aom_highbd_8_masked_sub_pixel_variance16x16_ssse3,
- &aom_highbd_8_masked_sub_pixel_variance16x16_c, AOM_BITS_8),
- make_tuple(&aom_highbd_8_masked_sub_pixel_variance16x8_ssse3,
- &aom_highbd_8_masked_sub_pixel_variance16x8_c, AOM_BITS_8),
- make_tuple(&aom_highbd_8_masked_sub_pixel_variance8x16_ssse3,
- &aom_highbd_8_masked_sub_pixel_variance8x16_c, AOM_BITS_8),
- make_tuple(&aom_highbd_8_masked_sub_pixel_variance8x8_ssse3,
- &aom_highbd_8_masked_sub_pixel_variance8x8_c, AOM_BITS_8),
- make_tuple(&aom_highbd_8_masked_sub_pixel_variance8x4_ssse3,
- &aom_highbd_8_masked_sub_pixel_variance8x4_c, AOM_BITS_8),
- make_tuple(&aom_highbd_8_masked_sub_pixel_variance4x8_ssse3,
- &aom_highbd_8_masked_sub_pixel_variance4x8_c, AOM_BITS_8),
- make_tuple(&aom_highbd_8_masked_sub_pixel_variance4x4_ssse3,
- &aom_highbd_8_masked_sub_pixel_variance4x4_c, AOM_BITS_8),
- make_tuple(&aom_highbd_10_masked_sub_pixel_variance128x128_ssse3,
- &aom_highbd_10_masked_sub_pixel_variance128x128_c, AOM_BITS_10),
- make_tuple(&aom_highbd_10_masked_sub_pixel_variance128x64_ssse3,
- &aom_highbd_10_masked_sub_pixel_variance128x64_c, AOM_BITS_10),
- make_tuple(&aom_highbd_10_masked_sub_pixel_variance64x128_ssse3,
- &aom_highbd_10_masked_sub_pixel_variance64x128_c, AOM_BITS_10),
- make_tuple(&aom_highbd_10_masked_sub_pixel_variance64x64_ssse3,
- &aom_highbd_10_masked_sub_pixel_variance64x64_c, AOM_BITS_10),
- make_tuple(&aom_highbd_10_masked_sub_pixel_variance64x32_ssse3,
- &aom_highbd_10_masked_sub_pixel_variance64x32_c, AOM_BITS_10),
- make_tuple(&aom_highbd_10_masked_sub_pixel_variance32x64_ssse3,
- &aom_highbd_10_masked_sub_pixel_variance32x64_c, AOM_BITS_10),
- make_tuple(&aom_highbd_10_masked_sub_pixel_variance32x32_ssse3,
- &aom_highbd_10_masked_sub_pixel_variance32x32_c, AOM_BITS_10),
- make_tuple(&aom_highbd_10_masked_sub_pixel_variance32x16_ssse3,
- &aom_highbd_10_masked_sub_pixel_variance32x16_c, AOM_BITS_10),
- make_tuple(&aom_highbd_10_masked_sub_pixel_variance16x32_ssse3,
- &aom_highbd_10_masked_sub_pixel_variance16x32_c, AOM_BITS_10),
- make_tuple(&aom_highbd_10_masked_sub_pixel_variance16x16_ssse3,
- &aom_highbd_10_masked_sub_pixel_variance16x16_c, AOM_BITS_10),
- make_tuple(&aom_highbd_10_masked_sub_pixel_variance16x8_ssse3,
- &aom_highbd_10_masked_sub_pixel_variance16x8_c, AOM_BITS_10),
- make_tuple(&aom_highbd_10_masked_sub_pixel_variance8x16_ssse3,
- &aom_highbd_10_masked_sub_pixel_variance8x16_c, AOM_BITS_10),
- make_tuple(&aom_highbd_10_masked_sub_pixel_variance8x8_ssse3,
- &aom_highbd_10_masked_sub_pixel_variance8x8_c, AOM_BITS_10),
- make_tuple(&aom_highbd_10_masked_sub_pixel_variance8x4_ssse3,
- &aom_highbd_10_masked_sub_pixel_variance8x4_c, AOM_BITS_10),
- make_tuple(&aom_highbd_10_masked_sub_pixel_variance4x8_ssse3,
- &aom_highbd_10_masked_sub_pixel_variance4x8_c, AOM_BITS_10),
- make_tuple(&aom_highbd_10_masked_sub_pixel_variance4x4_ssse3,
- &aom_highbd_10_masked_sub_pixel_variance4x4_c, AOM_BITS_10),
- make_tuple(&aom_highbd_12_masked_sub_pixel_variance128x128_ssse3,
- &aom_highbd_12_masked_sub_pixel_variance128x128_c, AOM_BITS_12),
- make_tuple(&aom_highbd_12_masked_sub_pixel_variance128x64_ssse3,
- &aom_highbd_12_masked_sub_pixel_variance128x64_c, AOM_BITS_12),
- make_tuple(&aom_highbd_12_masked_sub_pixel_variance64x128_ssse3,
- &aom_highbd_12_masked_sub_pixel_variance64x128_c, AOM_BITS_12),
- make_tuple(&aom_highbd_12_masked_sub_pixel_variance64x64_ssse3,
- &aom_highbd_12_masked_sub_pixel_variance64x64_c, AOM_BITS_12),
- make_tuple(&aom_highbd_12_masked_sub_pixel_variance64x32_ssse3,
- &aom_highbd_12_masked_sub_pixel_variance64x32_c, AOM_BITS_12),
- make_tuple(&aom_highbd_12_masked_sub_pixel_variance32x64_ssse3,
- &aom_highbd_12_masked_sub_pixel_variance32x64_c, AOM_BITS_12),
- make_tuple(&aom_highbd_12_masked_sub_pixel_variance32x32_ssse3,
- &aom_highbd_12_masked_sub_pixel_variance32x32_c, AOM_BITS_12),
- make_tuple(&aom_highbd_12_masked_sub_pixel_variance32x16_ssse3,
- &aom_highbd_12_masked_sub_pixel_variance32x16_c, AOM_BITS_12),
- make_tuple(&aom_highbd_12_masked_sub_pixel_variance16x32_ssse3,
- &aom_highbd_12_masked_sub_pixel_variance16x32_c, AOM_BITS_12),
- make_tuple(&aom_highbd_12_masked_sub_pixel_variance16x16_ssse3,
- &aom_highbd_12_masked_sub_pixel_variance16x16_c, AOM_BITS_12),
- make_tuple(&aom_highbd_12_masked_sub_pixel_variance16x8_ssse3,
- &aom_highbd_12_masked_sub_pixel_variance16x8_c, AOM_BITS_12),
- make_tuple(&aom_highbd_12_masked_sub_pixel_variance8x16_ssse3,
- &aom_highbd_12_masked_sub_pixel_variance8x16_c, AOM_BITS_12),
- make_tuple(&aom_highbd_12_masked_sub_pixel_variance8x8_ssse3,
- &aom_highbd_12_masked_sub_pixel_variance8x8_c, AOM_BITS_12),
- make_tuple(&aom_highbd_12_masked_sub_pixel_variance8x4_ssse3,
- &aom_highbd_12_masked_sub_pixel_variance8x4_c, AOM_BITS_12),
- make_tuple(&aom_highbd_12_masked_sub_pixel_variance4x8_ssse3,
- &aom_highbd_12_masked_sub_pixel_variance4x8_c, AOM_BITS_12),
- make_tuple(&aom_highbd_12_masked_sub_pixel_variance4x4_ssse3,
- &aom_highbd_12_masked_sub_pixel_variance4x4_c, AOM_BITS_12)
-};
-
-INSTANTIATE_TEST_CASE_P(SSSE3_C_COMPARE, HighbdMaskedSubPixelVarianceTest,
- ::testing::ValuesIn(hbd_sub_pel_var_test));
-#endif // HAVE_SSSE3
-} // namespace
diff --git a/third_party/aom/test/md5_helper.h b/third_party/aom/test/md5_helper.h
deleted file mode 100644
index 9443cb262..000000000
--- a/third_party/aom/test/md5_helper.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_TEST_MD5_HELPER_H_
-#define AOM_TEST_MD5_HELPER_H_
-
-#include "aom/aom_decoder.h"
-#include "common/md5_utils.h"
-
-namespace libaom_test {
-class MD5 {
- public:
- MD5() { MD5Init(&md5_); }
-
- void Add(const aom_image_t *img) {
- for (int plane = 0; plane < 3; ++plane) {
- const uint8_t *buf = img->planes[plane];
- // Calculate the width and height to do the md5 check. For the chroma
- // plane, we never want to round down and thus skip a pixel so if
- // we are shifting by 1 (chroma_shift) we add 1 before doing the shift.
- // This works only for chroma_shift of 0 and 1.
- const int bytes_per_sample =
- (img->fmt & AOM_IMG_FMT_HIGHBITDEPTH) ? 2 : 1;
- const int h =
- plane ? (img->d_h + img->y_chroma_shift) >> img->y_chroma_shift
- : img->d_h;
- const int w =
- (plane ? (img->d_w + img->x_chroma_shift) >> img->x_chroma_shift
- : img->d_w) *
- bytes_per_sample;
-
- for (int y = 0; y < h; ++y) {
- MD5Update(&md5_, buf, w);
- buf += img->stride[plane];
- }
- }
- }
-
- void Add(const uint8_t *data, size_t size) {
- MD5Update(&md5_, data, static_cast<uint32_t>(size));
- }
-
- const char *Get(void) {
- static const char hex[16] = {
- '0', '1', '2', '3', '4', '5', '6', '7',
- '8', '9', 'a', 'b', 'c', 'd', 'e', 'f',
- };
- uint8_t tmp[16];
- MD5Context ctx_tmp = md5_;
-
- MD5Final(tmp, &ctx_tmp);
- for (int i = 0; i < 16; i++) {
- res_[i * 2 + 0] = hex[tmp[i] >> 4];
- res_[i * 2 + 1] = hex[tmp[i] & 0xf];
- }
- res_[32] = 0;
-
- return res_;
- }
-
- protected:
- char res_[33];
- MD5Context md5_;
-};
-
-} // namespace libaom_test
-
-#endif // AOM_TEST_MD5_HELPER_H_
diff --git a/third_party/aom/test/metrics_template.html b/third_party/aom/test/metrics_template.html
deleted file mode 100644
index b57c62314..000000000
--- a/third_party/aom/test/metrics_template.html
+++ /dev/null
@@ -1,422 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-<head>
-<meta charset="utf-8">
-<title>Video Codec Test Results</title>
-<style type="text/css">
-<!-- Begin 960 reset -->
-a,abbr,acronym,address,applet,article,aside,audio,b,big,blockquote,body,canvas,caption,center,cite,c
-ode,dd,del,details,dfn,dialog,div,dl,dt,em,embed,fieldset,figcaption,figure,font,footer,form,h1,h2,h
-3,h4,h5,h6,header,hgroup,hr,html,i,iframe,img,ins,kbd,label,legend,li,mark,menu,meter,nav,object,ol,
-output,p,pre,progress,q,rp,rt,ruby,s,samp,section,small,span,strike,strong,sub,summary,sup,table,tbo
-dy,td,tfoot,th,thead,time,tr,tt,u,ul,var,video,xmp{border:0;margin:0;padding:0;font-size:100%}html,b
-ody{height:100%}article,aside,details,figcaption,figure,footer,header,hgroup,menu,nav,section{displa
-y:block}b,strong{font-weight:bold}img{color:transparent;font-size:0;vertical-align:middle;-ms-interp
-olation-mode:bicubic}ol,ul{list-style:none}li{display:list-item}table{border-collapse:collapse;borde
-r-spacing:0}th,td,caption{font-weight:normal;vertical-align:top;text-align:left}q{quotes:none}q:befo
-re,q:after{content:'';content:none}sub,sup,small{font-size:75%}sub,sup{line-height:0;position:relati
-ve;vertical-align:baseline}sub{bottom:-0.25em}sup{top:-0.5em}svg{overflow:hidden}
-<!-- End 960 reset -->
-<!-- Begin 960 text -->
-body{font:13px/1.5 'Helvetica Neue',Arial,'Liberation Sans',FreeSans,sans-serif}pre,code{font-family
-:'DejaVu Sans Mono',Menlo,Consolas,monospace}hr{border:0 #ccc solid;border-top-width:1px;clear:both;
-height:0}h1{font-size:25px}h2{font-size:23px}h3{font-size:21px}h4{font-size:19px}h5{font-size:17px}h
-6{font-size:15px}ol{list-style:decimal}ul{list-style:disc}li{margin-left:30px}p,dl,hr,h1,h2,h3,h4,h5
-,h6,ol,ul,pre,table,address,fieldset,figure{margin-bottom:20px}
-<!-- End 960 text -->
-<!-- Begin 960 grid (fluid variant)
- 12 columns, 1152px total width
- http://960.gs/ | http://grids.heroku.com/ -->
-.container_12{width:92%;margin-left:4%;margin-right:4%}.grid_1,.grid_2,.grid_3,.grid_4,.grid_5,.grid
-_6,.grid_7,.grid_8,.grid_9,.grid_10,.grid_11,.grid_12{display:inline;float:left;position:relative;ma
-rgin-left:1%;margin-right:1%}.alpha{margin-left:0}.omega{margin-right:0}.container_12 .grid_1{width:
-6.333%}.container_12 .grid_2{width:14.667%}.container_12 .grid_3{width:23.0%}.container_12 .grid_4{w
-idth:31.333%}.container_12 .grid_5{width:39.667%}.container_12 .grid_6{width:48.0%}.container_12 .gr
-id_7{width:56.333%}.container_12 .grid_8{width:64.667%}.container_12 .grid_9{width:73.0%}.container_
-12 .grid_10{width:81.333%}.container_12 .grid_11{width:89.667%}.container_12 .grid_12{width:98.0%}.c
-ontainer_12 .prefix_1{padding-left:8.333%}.container_12 .prefix_2{padding-left:16.667%}.container_12
- .prefix_3{padding-left:25.0%}.container_12 .prefix_4{padding-left:33.333%}.container_12 .prefix_5{p
-adding-left:41.667%}.container_12 .prefix_6{padding-left:50.0%}.container_12 .prefix_7{padding-left:
-58.333%}.container_12 .prefix_8{padding-left:66.667%}.container_12 .prefix_9{padding-left:75.0%}.con
-tainer_12 .prefix_10{padding-left:83.333%}.container_12 .prefix_11{padding-left:91.667%}.container_1
-2 .suffix_1{padding-right:8.333%}.container_12 .suffix_2{padding-right:16.667%}.container_12 .suffix
-_3{padding-right:25.0%}.container_12 .suffix_4{padding-right:33.333%}.container_12 .suffix_5{padding
--right:41.667%}.container_12 .suffix_6{padding-right:50.0%}.container_12 .suffix_7{padding-right:58.
-333%}.container_12 .suffix_8{padding-right:66.667%}.container_12 .suffix_9{padding-right:75.0%}.cont
-ainer_12 .suffix_10{padding-right:83.333%}.container_12 .suffix_11{padding-right:91.667%}.container_
-12 .push_1{left:8.333%}.container_12 .push_2{left:16.667%}.container_12 .push_3{left:25.0%}.containe
-r_12 .push_4{left:33.333%}.container_12 .push_5{left:41.667%}.container_12 .push_6{left:50.0%}.conta
-iner_12 .push_7{left:58.333%}.container_12 .push_8{left:66.667%}.container_12 .push_9{left:75.0%}.co
-ntainer_12 .push_10{left:83.333%}.container_12 .push_11{left:91.667%}.container_12 .pull_1{left:-8.3
-33%}.container_12 .pull_2{left:-16.667%}.container_12 .pull_3{left:-25.0%}.container_12 .pull_4{left
-:-33.333%}.container_12 .pull_5{left:-41.667%}.container_12 .pull_6{left:-50.0%}.container_12 .pull_
-7{left:-58.333%}.container_12 .pull_8{left:-66.667%}.container_12 .pull_9{left:-75.0%}.container_12
-.pull_10{left:-83.333%}.container_12 .pull_11{left:-91.667%}.clear{clear:both;display:block;overflow
-:hidden;visibility:hidden;width:0;height:0}.clearfix:after{clear:both;content:' ';display:block;font
--size:0;line-height:0;visibility:hidden;width:0;height:0}.clearfix{display:inline-block}* html .clea
-rfix{height:1%}.clearfix{display:block}
-<!-- End 960 grid -->
-
-div.metricgraph {
-
-}
-
-body {
-
-}
-
-div.header {
- font-family: Arial, sans-serif;
-}
-
-div.header h2 {
- margin: .5em auto;
-}
-
-div.radio {
- font-family: Arial, sans-serif;
- margin-bottom: 1em;
-}
-
-div.main {
-
-}
-
-div.cliplist {
- font-family: Arial, sans-serif;
- margin-top: 6px;
-}
-
-div.chartarea {
- font-family: Arial, sans-serif;
-}
-
-div.indicators {
- font-family: Arial, sans-serif;
- font-size: 13px;
- margin-top: 6px;
- min-height: 600px;
- background-color: #f7f7f7;
-}
-
-div.indicators div.content {
- margin: 1em;
-}
-
-div.indicators div.content h5 {
- font-size: 13px;
- text-align: center;
- margin: 0;
-}
-
-div.indicators div.content ul {
- margin-left: 0;
- padding-left: 0;
- margin-top: 0;
-}
-
-div.indicators div.content ul li {
- margin-left: 1.5em;
-}
-
-div.indicators div.content p:first-child {
- margin-bottom: .5em;
-}
-
-span.google-visualization-table-sortind {
- color: #000;
-}
-.header-style {
- font-weight: bold;
- border: 1px solid #fff;
- background-color: #ccc;
-}
-
-td.header-style+td {
-
-}
-
-.orange-background {
- background-color: orange;
-}
-
-.light-gray-background {
- background-color: #f0f0f0;
-}
-</style>
-<script type="text/javascript" src="https://www.google.com/jsapi"></script>
-<script type="text/javascript">
-var chart_left = 40;
-var chart_top = 6;
-var chart_height = document.documentElement.clientHeight-100;
-var chart_width = "100%";
-ftable='filestable_avg'
-var snrs = [];
-var filestable_dsnr = [];
-var filestable_drate = [];
-var filestable_avg = [];
-
-// Python template code replaces the following 2 lines.
-//%%metrics_js%%//
-//%%filestable_dpsnr%%//
-//%%filestable_avg%%//
-//%%filestable_drate%%//
-//%%snrs%%//
-
-var selected = 0
-var imagestr = '';
-var bettertable=0;
-var chart=0;
-var better=0;
-var metricdata=0;
-var metricView=0;
-var column=1;
-var formatter=0;
-
-function changeColumn(col) {
- column = col;
- console.log(col)
- draw_files();
-}
-
-function changeMetric(m) {
- ftable=m
- draw_files()
-}
-
-function setup_vis() {
- chart = new google.visualization.ScatterChart(
- document.getElementById("metricgraph"));
-
- bettertable = new google.visualization.Table(
- document.getElementById("bettertable"));
-
- draw_files();
- build_metrics_radio();
-}
-
-function build_metrics_radio() {
- for (metric=1; metric < metrics.length; metric++) {
- var rb = document.createElement('input');
- var l = document.createElement('label');
- rb.setAttribute('type','radio');
- rb.setAttribute('name','metric');
- rb.setAttribute('onClick', "changeColumn('"+metric.toString()+"')");
- l.innerHTML = metrics[metric];
- document.getElementById('metrics').appendChild(rb);
- document.getElementById('metrics').appendChild(l);
- }
-}
-
-function draw_files() {
- var options = {'allowHtml': true, 'width': "100%", 'height': "50%"};
- if (better != 0) delete better;
-
- col=eval(ftable+'[column]')
- better = new google.visualization.DataTable(col)
-
- // Python Template code replaces the following line with a list of
- // formatters.
- if (ftable == 'filestable_dsnr')
- formatter = new google.visualization.NumberFormat(
- {fractionDigits: 4, suffix:" db"});
- else
- formatter = new google.visualization.NumberFormat(
- {fractionDigits: 4, suffix:"%"});
-
- //%%formatters%%//
-
- bettertable.draw(better,options);
- google.visualization.events.addListener(bettertable, 'select',
- selectBetterHandler);
- query_file()
-}
-
-function query_file() {
- imagestr = better.getFormattedValue(selected, 0)
- var metricjson = eval('(' + snrs[column][selected] + ')');
- metricdata = new google.visualization.DataTable(metricjson, 0.6);
- if( metricView != 0 ) delete metricView;
- metricView = new google.visualization.DataView(metricdata);
-
- chart.draw(metricView, {curveType:'function',
- explorer: {},
- chartArea:{left:chart_left, top:chart_top, width:chart_width,
- height:chart_height-90},
- hAxis:{title:"Datarate in kbps"},
- vAxis:{title:"Quality in decibels", format: '##.0', textPosition: 'in'},
- legend:{position:"in"}, title:imagestr, pointSize:2, lineWidth:1,
- width:chart_width, height:chart_height-50 });
-
- google.visualization.events.addListener(chart, 'select', chartSelect);
- google.visualization.events.addListener(chart, 'onmouseover', chartMouseOver);
- google.visualization.events.addListener(chart, 'onmouseout', chartMouseOut);
-}
-
-function chartMouseOut(e) {
- statusbar = document.getElementById('status');
- statusbar.style.display = 'none';
-}
-
-function chartMouseOver(e) {
- pointDifference(e.row, e.column)
-}
-
-function pointDifference(row, col) {
- if(!row || !col)
- return;
-
- var cols = metricdata.getNumberOfColumns();
- var rows = metricdata.getNumberOfRows();
-
- var sel_bitrate = metricView.getValue(row, 0 );
- var sel_metric = metricView.getValue(row, col);
-
- var message = '<ul>' + metricView.getColumnLabel(col) +
- ' (' + sel_bitrate.toFixed(0) + ' kbps, ' + sel_metric.toFixed(2) + ')' + ' is ';
-
-
- // col 0 is datarate
- for( var i=1;i<cols;++i) {
-
- var metric_greatest_thats_less = 0;
- var rate_greatest_thats_less = 0;
- var metric_smallest_thats_greater = 999;
- var rate_smallest_thats_greater = 0;
-
- if(i==col)
- continue;
-
- // Find the lowest metric for the column that's greater than sel_metric and
- // the highest metric for this column that's less than the metric.
- for(var line_count = 0; line_count < rows; ++line_count) {
- this_metric = metricdata.getValue(line_count, i)
- this_rate = metricdata.getValue(line_count, 0)
- if(!this_metric)
- continue;
-
- if(this_metric > metric_greatest_thats_less &&
- this_metric <= sel_metric) {
- metric_greatest_thats_less = this_metric;
- rate_greatest_thats_less = this_rate;
- }
- if(this_metric < metric_smallest_thats_greater &&
- this_metric > sel_metric) {
- metric_smallest_thats_greater = this_metric;
- rate_smallest_thats_greater = this_rate;
- }
- }
-
- if(rate_smallest_thats_greater == 0 || rate_greatest_thats_less == 0) {
- message = message + " <li> Couldn't find a point on both sides.</li>"
- } else {
- metric_slope = ( rate_smallest_thats_greater - rate_greatest_thats_less) /
- ( metric_smallest_thats_greater - metric_greatest_thats_less);
-
- projected_rate = ( sel_metric - metric_greatest_thats_less) *
- metric_slope + rate_greatest_thats_less;
-
- difference = 100 * (projected_rate / sel_bitrate - 1);
-
-
- if (difference > 0)
- message = message + "<li> " + difference.toFixed(2) +
- "% smaller than <em>" +
- metricdata.getColumnLabel(i) + "</em></li> "
- else
- message = message + "<li> " + -difference.toFixed(2) +
- "% bigger than <em>" +
- metricdata.getColumnLabel(i) + "</em></li> "
- }
-
- }
- message = message + "</ul>"
- statusbar = document.getElementById('status');
- statusbar.innerHTML = "<p>" + message + "</p>";
- statusbar.style.display = 'block';
-}
-
-function chartSelect() {
- var selection = chart.getSelection();
- var message = '';
- var min = metricView.getFormattedValue(selection[0].row, 0);
- var max = metricView.getFormattedValue(selection[selection.length-1].row, 0);
- var val = metricView.getFormattedValue(selection[0].row,selection[0].column);
-
- pointDifference(selection[0].row, selection[0].column)
- min = min / 3
- max = max * 3
- metricView.setRows(metricdata.getFilteredRows(
- [{column: 0,minValue: min, maxValue:max}]));
-
- chart.draw(metricView, {curveType:'function',
- chartArea:{left:40, top:10, width:chart_width, height:chart_height - 110},
- hAxis:{title:"datarate in kbps"}, vAxis:{title:"quality in decibels"},
- legend:{position:"in"}, title:imagestr, pointSize:2, lineWidth:1,
- width:chart_width, height:chart_height - 50});
-}
-
-function selectBetterHandler() {
- var selection = bettertable.getSelection();
- for (var i = 0; i < selection.length; i++) {
- item = selection[i];
- }
- selected = item.row
- query_file()
-}
-
-
-google.load('visualization', '1', {'packages' : ['corechart','table']});
-google.setOnLoadCallback(setup_vis);
-</script>
-</head>
-
-<body>
-
- <div class="container_12">
-
- <div class="grid_12 header">
- <h2>Codec Comparison Results</h2>
- </div>
-
- <div class="grid_12 radio">
-
- <form name="myform">
- Method For Combining Points
- <input type="radio" checked name="column" value="1"
- onClick="changeMetric('filestable_avg')" />Average of bitrates difference
- <input type="radio" name="column" value="2"
- onClick="changeMetric('filestable_dsnr')" />BDSNR
- <input type="radio" name="column" value="3"
- onClick="changeMetric('filestable_drate')" />BDRATE
- </form>
-
- <form id="metrics" name="myform">
- </form>
-
- </div>
-
- <div class="grid_12 main">
-
- <div class="grid_5 alpha cliplist">
- <div id="bettertable"></div>
- </div>
-
- <div class="grid_5 chartarea">
- <div id="metricgraph"></div>
- </div>
-
- <div class="grid_2 omega indicators">
- <div class="content">
- <h5>Indicators</h5>
- <hr>
- <div id="status"></div>
- </div>
- </div>
-
- </div>
-
- </div>
-
-</body>
-</html>
diff --git a/third_party/aom/test/monochrome_test.cc b/third_party/aom/test/monochrome_test.cc
deleted file mode 100644
index ebccba584..000000000
--- a/third_party/aom/test/monochrome_test.cc
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <climits>
-#include <vector>
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-#include "test/codec_factory.h"
-#include "test/encode_test_driver.h"
-#include "test/i420_video_source.h"
-#include "test/video_source.h"
-#include "test/util.h"
-
-namespace {
-
-class MonochromeTest
- : public ::libaom_test::CodecTestWithParam<libaom_test::TestMode>,
- public ::libaom_test::EncoderTest {
- protected:
- MonochromeTest() : EncoderTest(GET_PARAM(0)), frame0_psnr_y_(0.) {}
-
- virtual ~MonochromeTest() {}
-
- virtual void SetUp() {
- InitializeConfig();
- SetMode(GET_PARAM(1));
- }
-
- virtual void DecompressedFrameHook(const aom_image_t &img,
- aom_codec_pts_t pts) {
- (void)pts;
-
- // Get value of top-left corner pixel of U plane
- int chroma_value = img.planes[AOM_PLANE_U][0];
-
- bool is_chroma_constant =
- ComparePlaneToValue(img, AOM_PLANE_U, chroma_value) &&
- ComparePlaneToValue(img, AOM_PLANE_V, chroma_value);
-
- // Chroma planes should be constant
- EXPECT_TRUE(is_chroma_constant);
-
- // Monochrome flag on image should be set
- EXPECT_EQ(img.monochrome, 1);
-
- chroma_value_list_.push_back(chroma_value);
- }
-
- // Returns true if all pixels on the plane are equal to value, and returns
- // false otherwise.
- bool ComparePlaneToValue(const aom_image_t &img, const int plane,
- const int value) {
- const int w = aom_img_plane_width(&img, plane);
- const int h = aom_img_plane_height(&img, plane);
- const uint8_t *const buf = img.planes[plane];
- const int stride = img.stride[plane];
-
- for (int r = 0; r < h; ++r) {
- for (int c = 0; c < w; ++c) {
- if (buf[r * stride + c] != value) return false;
- }
- }
- return true;
- }
-
- virtual void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) {
- // Check that the initial Y PSNR value is 'high enough', and check that
- // subsequent Y PSNR values are 'close' to this initial value.
- if (frame0_psnr_y_ == 0.) {
- frame0_psnr_y_ = pkt->data.psnr.psnr[1];
- EXPECT_GT(frame0_psnr_y_, 29.);
- }
- EXPECT_NEAR(pkt->data.psnr.psnr[1], frame0_psnr_y_, 2.5);
- }
-
- std::vector<int> chroma_value_list_;
- double frame0_psnr_y_;
-};
-
-TEST_P(MonochromeTest, TestMonochromeEncoding) {
- ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
- 30, 1, 0, 5);
-
- init_flags_ = AOM_CODEC_USE_PSNR;
-
- cfg_.g_w = 352;
- cfg_.g_h = 288;
-
- cfg_.rc_buf_initial_sz = 500;
- cfg_.rc_buf_optimal_sz = 600;
- cfg_.rc_buf_sz = 1000;
- cfg_.rc_min_quantizer = 2;
- cfg_.rc_max_quantizer = 56;
- cfg_.rc_undershoot_pct = 50;
- cfg_.rc_overshoot_pct = 50;
- cfg_.rc_end_usage = AOM_CBR;
- cfg_.kf_mode = AOM_KF_AUTO;
- cfg_.g_lag_in_frames = 1;
- cfg_.kf_min_dist = cfg_.kf_max_dist = 3000;
- // Enable dropped frames.
- cfg_.rc_dropframe_thresh = 1;
- // Disable error_resilience mode.
- cfg_.g_error_resilient = 0;
- // Run at low bitrate.
- cfg_.rc_target_bitrate = 40;
- // Set monochrome encoding flag
- cfg_.monochrome = 1;
-
- ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-
- // Check that the chroma planes are equal across all frames
- std::vector<int>::const_iterator iter = chroma_value_list_.begin();
- int initial_chroma_value = *iter;
- for (; iter != chroma_value_list_.end(); ++iter) {
- // Check that all decoded frames have the same constant chroma planes.
- EXPECT_EQ(*iter, initial_chroma_value);
- }
-}
-
-AV1_INSTANTIATE_TEST_CASE(MonochromeTest,
- ::testing::Values(::libaom_test::kTwoPassGood));
-
-} // namespace
diff --git a/third_party/aom/test/motion_vector_test.cc b/third_party/aom/test/motion_vector_test.cc
deleted file mode 100644
index 27eb93893..000000000
--- a/third_party/aom/test/motion_vector_test.cc
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "test/codec_factory.h"
-#include "test/encode_test_driver.h"
-#include "test/util.h"
-#include "test/yuv_video_source.h"
-
-namespace {
-#define MAX_EXTREME_MV 1
-#define MIN_EXTREME_MV 2
-
-// Encoding modes
-const libaom_test::TestMode kEncodingModeVectors[] = {
- ::libaom_test::kTwoPassGood,
- ::libaom_test::kOnePassGood,
-};
-
-// Encoding speeds
-const int kCpuUsedVectors[] = { 1, 5 };
-
-// MV test modes: 1 - always use maximum MV; 2 - always use minimum MV.
-const int kMVTestModes[] = { MAX_EXTREME_MV, MIN_EXTREME_MV };
-
-class MotionVectorTestLarge
- : public ::libaom_test::CodecTestWith3Params<libaom_test::TestMode, int,
- int>,
- public ::libaom_test::EncoderTest {
- protected:
- MotionVectorTestLarge()
- : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)),
- cpu_used_(GET_PARAM(2)), mv_test_mode_(GET_PARAM(3)) {}
-
- virtual ~MotionVectorTestLarge() {}
-
- virtual void SetUp() {
- InitializeConfig();
- SetMode(encoding_mode_);
- if (encoding_mode_ != ::libaom_test::kRealTime) {
- cfg_.g_lag_in_frames = 3;
- cfg_.rc_end_usage = AOM_VBR;
- } else {
- cfg_.g_lag_in_frames = 0;
- cfg_.rc_end_usage = AOM_CBR;
- cfg_.rc_buf_sz = 1000;
- cfg_.rc_buf_initial_sz = 500;
- cfg_.rc_buf_optimal_sz = 600;
- }
- }
-
- virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
- ::libaom_test::Encoder *encoder) {
- if (video->frame() == 1) {
- encoder->Control(AOME_SET_CPUUSED, cpu_used_);
- encoder->Control(AV1E_ENABLE_MOTION_VECTOR_UNIT_TEST, mv_test_mode_);
- if (encoding_mode_ != ::libaom_test::kRealTime) {
- encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
- encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7);
- encoder->Control(AOME_SET_ARNR_STRENGTH, 5);
- }
- }
- }
-
- libaom_test::TestMode encoding_mode_;
- int cpu_used_;
- int mv_test_mode_;
-};
-
-TEST_P(MotionVectorTestLarge, OverallTest) {
- int width = 3840;
- int height = 2160;
-
- // Reduce the test clip's resolution while testing on 32-bit system.
- if (sizeof(void *) == 4) {
- width = 2048;
- height = 360;
- }
-
- cfg_.rc_target_bitrate = 24000;
- cfg_.g_profile = 0;
- init_flags_ = AOM_CODEC_USE_PSNR;
-
- testing::internal::scoped_ptr<libaom_test::VideoSource> video;
- video.reset(new libaom_test::YUVVideoSource(
- "niklas_640_480_30.yuv", AOM_IMG_FMT_I420, width, height, 30, 1, 0, 3));
-
- ASSERT_TRUE(video.get() != NULL);
- ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
-}
-
-AV1_INSTANTIATE_TEST_CASE(MotionVectorTestLarge,
- ::testing::ValuesIn(kEncodingModeVectors),
- ::testing::ValuesIn(kCpuUsedVectors),
- ::testing::ValuesIn(kMVTestModes));
-} // namespace
diff --git a/third_party/aom/test/noise_model_test.cc b/third_party/aom/test/noise_model_test.cc
deleted file mode 100644
index b5b387e31..000000000
--- a/third_party/aom/test/noise_model_test.cc
+++ /dev/null
@@ -1,1343 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <math.h>
-#include <algorithm>
-#include <vector>
-
-#include "aom_dsp/noise_model.h"
-#include "aom_dsp/noise_util.h"
-#include "config/aom_dsp_rtcd.h"
-#include "test/acm_random.h"
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-namespace {
-
-// Return normally distrbuted values with standard deviation of sigma.
-double randn(libaom_test::ACMRandom *random, double sigma) {
- while (1) {
- const double u = 2.0 * ((double)random->Rand31() /
- testing::internal::Random::kMaxRange) -
- 1.0;
- const double v = 2.0 * ((double)random->Rand31() /
- testing::internal::Random::kMaxRange) -
- 1.0;
- const double s = u * u + v * v;
- if (s > 0 && s < 1) {
- return sigma * (u * sqrt(-2.0 * log(s) / s));
- }
- }
- return 0;
-}
-
-// Synthesizes noise using the auto-regressive filter of the given lag,
-// with the provided n coefficients sampled at the given coords.
-void noise_synth(libaom_test::ACMRandom *random, int lag, int n,
- const int (*coords)[2], const double *coeffs, double *data,
- int w, int h) {
- const int pad_size = 3 * lag;
- const int padded_w = w + pad_size;
- const int padded_h = h + pad_size;
- int x = 0, y = 0;
- std::vector<double> padded(padded_w * padded_h);
-
- for (y = 0; y < padded_h; ++y) {
- for (x = 0; x < padded_w; ++x) {
- padded[y * padded_w + x] = randn(random, 1.0);
- }
- }
- for (y = lag; y < padded_h; ++y) {
- for (x = lag; x < padded_w; ++x) {
- double sum = 0;
- int i = 0;
- for (i = 0; i < n; ++i) {
- const int dx = coords[i][0];
- const int dy = coords[i][1];
- sum += padded[(y + dy) * padded_w + (x + dx)] * coeffs[i];
- }
- padded[y * padded_w + x] += sum;
- }
- }
- // Copy over the padded rows to the output
- for (y = 0; y < h; ++y) {
- memcpy(data + y * w, &padded[0] + y * padded_w, sizeof(*data) * w);
- }
-}
-
-std::vector<float> get_noise_psd(double *noise, int width, int height,
- int block_size) {
- float *block =
- (float *)aom_memalign(32, block_size * block_size * sizeof(block));
- std::vector<float> psd(block_size * block_size);
- int num_blocks = 0;
- struct aom_noise_tx_t *tx = aom_noise_tx_malloc(block_size);
- for (int y = 0; y <= height - block_size; y += block_size / 2) {
- for (int x = 0; x <= width - block_size; x += block_size / 2) {
- for (int yy = 0; yy < block_size; ++yy) {
- for (int xx = 0; xx < block_size; ++xx) {
- block[yy * block_size + xx] = (float)noise[(y + yy) * width + x + xx];
- }
- }
- aom_noise_tx_forward(tx, &block[0]);
- aom_noise_tx_add_energy(tx, &psd[0]);
- num_blocks++;
- }
- }
- for (int yy = 0; yy < block_size; ++yy) {
- for (int xx = 0; xx <= block_size / 2; ++xx) {
- psd[yy * block_size + xx] /= num_blocks;
- }
- }
- // Fill in the data that is missing due to symmetries
- for (int xx = 1; xx < block_size / 2; ++xx) {
- psd[(block_size - xx)] = psd[xx];
- }
- for (int yy = 1; yy < block_size; ++yy) {
- for (int xx = 1; xx < block_size / 2; ++xx) {
- psd[(block_size - yy) * block_size + (block_size - xx)] =
- psd[yy * block_size + xx];
- }
- }
- aom_noise_tx_free(tx);
- aom_free(block);
- return psd;
-}
-
-} // namespace
-
-TEST(NoiseStrengthSolver, GetCentersTwoBins) {
- aom_noise_strength_solver_t solver;
- aom_noise_strength_solver_init(&solver, 2, 8);
- EXPECT_NEAR(0, aom_noise_strength_solver_get_center(&solver, 0), 1e-5);
- EXPECT_NEAR(255, aom_noise_strength_solver_get_center(&solver, 1), 1e-5);
- aom_noise_strength_solver_free(&solver);
-}
-
-TEST(NoiseStrengthSolver, GetCentersTwoBins10bit) {
- aom_noise_strength_solver_t solver;
- aom_noise_strength_solver_init(&solver, 2, 10);
- EXPECT_NEAR(0, aom_noise_strength_solver_get_center(&solver, 0), 1e-5);
- EXPECT_NEAR(1023, aom_noise_strength_solver_get_center(&solver, 1), 1e-5);
- aom_noise_strength_solver_free(&solver);
-}
-
-TEST(NoiseStrengthSolver, GetCenters256Bins) {
- const int num_bins = 256;
- aom_noise_strength_solver_t solver;
- aom_noise_strength_solver_init(&solver, num_bins, 8);
-
- for (int i = 0; i < 256; ++i) {
- EXPECT_NEAR(i, aom_noise_strength_solver_get_center(&solver, i), 1e-5);
- }
- aom_noise_strength_solver_free(&solver);
-}
-
-// Tests that the noise strength solver returns the identity transform when
-// given identity-like constraints.
-TEST(NoiseStrengthSolver, ObserveIdentity) {
- const int num_bins = 256;
- aom_noise_strength_solver_t solver;
- EXPECT_EQ(1, aom_noise_strength_solver_init(&solver, num_bins, 8));
-
- // We have to add a big more strength to constraints at the boundary to
- // overcome any regularization.
- for (int j = 0; j < 5; ++j) {
- aom_noise_strength_solver_add_measurement(&solver, 0, 0);
- aom_noise_strength_solver_add_measurement(&solver, 255, 255);
- }
- for (int i = 0; i < 256; ++i) {
- aom_noise_strength_solver_add_measurement(&solver, i, i);
- }
- EXPECT_EQ(1, aom_noise_strength_solver_solve(&solver));
- for (int i = 2; i < num_bins - 2; ++i) {
- EXPECT_NEAR(i, solver.eqns.x[i], 0.1);
- }
-
- aom_noise_strength_lut_t lut;
- EXPECT_EQ(1, aom_noise_strength_solver_fit_piecewise(&solver, 2, &lut));
-
- ASSERT_EQ(2, lut.num_points);
- EXPECT_NEAR(0.0, lut.points[0][0], 1e-5);
- EXPECT_NEAR(0.0, lut.points[0][1], 0.5);
- EXPECT_NEAR(255.0, lut.points[1][0], 1e-5);
- EXPECT_NEAR(255.0, lut.points[1][1], 0.5);
-
- aom_noise_strength_lut_free(&lut);
- aom_noise_strength_solver_free(&solver);
-}
-
-TEST(NoiseStrengthSolver, SimplifiesCurve) {
- const int num_bins = 256;
- aom_noise_strength_solver_t solver;
- EXPECT_EQ(1, aom_noise_strength_solver_init(&solver, num_bins, 8));
-
- // Create a parabolic input
- for (int i = 0; i < 256; ++i) {
- const double x = (i - 127.5) / 63.5;
- aom_noise_strength_solver_add_measurement(&solver, i, x * x);
- }
- EXPECT_EQ(1, aom_noise_strength_solver_solve(&solver));
-
- // First try to fit an unconstrained lut
- aom_noise_strength_lut_t lut;
- EXPECT_EQ(1, aom_noise_strength_solver_fit_piecewise(&solver, -1, &lut));
- ASSERT_LE(20, lut.num_points);
- aom_noise_strength_lut_free(&lut);
-
- // Now constrain the maximum number of points
- const int kMaxPoints = 9;
- EXPECT_EQ(1,
- aom_noise_strength_solver_fit_piecewise(&solver, kMaxPoints, &lut));
- ASSERT_EQ(kMaxPoints, lut.num_points);
-
- // Check that the input parabola is still well represented
- EXPECT_NEAR(0.0, lut.points[0][0], 1e-5);
- EXPECT_NEAR(4.0, lut.points[0][1], 0.1);
- for (int i = 1; i < lut.num_points - 1; ++i) {
- const double x = (lut.points[i][0] - 128.) / 64.;
- EXPECT_NEAR(x * x, lut.points[i][1], 0.1);
- }
- EXPECT_NEAR(255.0, lut.points[kMaxPoints - 1][0], 1e-5);
-
- EXPECT_NEAR(4.0, lut.points[kMaxPoints - 1][1], 0.1);
- aom_noise_strength_lut_free(&lut);
- aom_noise_strength_solver_free(&solver);
-}
-
-TEST(NoiseStrengthLut, LutEvalSinglePoint) {
- aom_noise_strength_lut_t lut;
- ASSERT_TRUE(aom_noise_strength_lut_init(&lut, 1));
- ASSERT_EQ(1, lut.num_points);
- lut.points[0][0] = 0;
- lut.points[0][1] = 1;
- EXPECT_EQ(1, aom_noise_strength_lut_eval(&lut, -1));
- EXPECT_EQ(1, aom_noise_strength_lut_eval(&lut, 0));
- EXPECT_EQ(1, aom_noise_strength_lut_eval(&lut, 1));
- aom_noise_strength_lut_free(&lut);
-}
-
-TEST(NoiseStrengthLut, LutEvalMultiPointInterp) {
- const double kEps = 1e-5;
- aom_noise_strength_lut_t lut;
- ASSERT_TRUE(aom_noise_strength_lut_init(&lut, 4));
- ASSERT_EQ(4, lut.num_points);
-
- lut.points[0][0] = 0;
- lut.points[0][1] = 0;
-
- lut.points[1][0] = 1;
- lut.points[1][1] = 1;
-
- lut.points[2][0] = 2;
- lut.points[2][1] = 1;
-
- lut.points[3][0] = 100;
- lut.points[3][1] = 1001;
-
- // Test lower boundary
- EXPECT_EQ(0, aom_noise_strength_lut_eval(&lut, -1));
- EXPECT_EQ(0, aom_noise_strength_lut_eval(&lut, 0));
-
- // Test first part that should be identity
- EXPECT_NEAR(0.25, aom_noise_strength_lut_eval(&lut, 0.25), kEps);
- EXPECT_NEAR(0.75, aom_noise_strength_lut_eval(&lut, 0.75), kEps);
-
- // This is a constant section (should evaluate to 1)
- EXPECT_NEAR(1.0, aom_noise_strength_lut_eval(&lut, 1.25), kEps);
- EXPECT_NEAR(1.0, aom_noise_strength_lut_eval(&lut, 1.75), kEps);
-
- // Test interpolation between to non-zero y coords.
- EXPECT_NEAR(1, aom_noise_strength_lut_eval(&lut, 2), kEps);
- EXPECT_NEAR(251, aom_noise_strength_lut_eval(&lut, 26.5), kEps);
- EXPECT_NEAR(751, aom_noise_strength_lut_eval(&lut, 75.5), kEps);
-
- // Test upper boundary
- EXPECT_EQ(1001, aom_noise_strength_lut_eval(&lut, 100));
- EXPECT_EQ(1001, aom_noise_strength_lut_eval(&lut, 101));
-
- aom_noise_strength_lut_free(&lut);
-}
-
-TEST(NoiseModel, InitSuccessWithValidSquareShape) {
- aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, 2, 8, 0 };
- aom_noise_model_t model;
-
- EXPECT_TRUE(aom_noise_model_init(&model, params));
-
- const int kNumCoords = 12;
- const int kCoords[][2] = { { -2, -2 }, { -1, -2 }, { 0, -2 }, { 1, -2 },
- { 2, -2 }, { -2, -1 }, { -1, -1 }, { 0, -1 },
- { 1, -1 }, { 2, -1 }, { -2, 0 }, { -1, 0 } };
- EXPECT_EQ(kNumCoords, model.n);
- for (int i = 0; i < kNumCoords; ++i) {
- const int *coord = kCoords[i];
- EXPECT_EQ(coord[0], model.coords[i][0]);
- EXPECT_EQ(coord[1], model.coords[i][1]);
- }
- aom_noise_model_free(&model);
-}
-
-TEST(NoiseModel, InitSuccessWithValidDiamondShape) {
- aom_noise_model_t model;
- aom_noise_model_params_t params = { AOM_NOISE_SHAPE_DIAMOND, 2, 8, 0 };
- EXPECT_TRUE(aom_noise_model_init(&model, params));
- EXPECT_EQ(6, model.n);
- const int kNumCoords = 6;
- const int kCoords[][2] = { { 0, -2 }, { -1, -1 }, { 0, -1 },
- { 1, -1 }, { -2, 0 }, { -1, 0 } };
- EXPECT_EQ(kNumCoords, model.n);
- for (int i = 0; i < kNumCoords; ++i) {
- const int *coord = kCoords[i];
- EXPECT_EQ(coord[0], model.coords[i][0]);
- EXPECT_EQ(coord[1], model.coords[i][1]);
- }
- aom_noise_model_free(&model);
-}
-
-TEST(NoiseModel, InitFailsWithTooLargeLag) {
- aom_noise_model_t model;
- aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, 10, 8, 0 };
- EXPECT_FALSE(aom_noise_model_init(&model, params));
- aom_noise_model_free(&model);
-}
-
-TEST(NoiseModel, InitFailsWithTooSmallLag) {
- aom_noise_model_t model;
- aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, 0, 8, 0 };
- EXPECT_FALSE(aom_noise_model_init(&model, params));
- aom_noise_model_free(&model);
-}
-
-TEST(NoiseModel, InitFailsWithInvalidShape) {
- aom_noise_model_t model;
- aom_noise_model_params_t params = { aom_noise_shape(100), 3, 8, 0 };
- EXPECT_FALSE(aom_noise_model_init(&model, params));
- aom_noise_model_free(&model);
-}
-
-// A container template class to hold a data type and extra arguments.
-// All of these args are bundled into one struct so that we can use
-// parameterized tests on combinations of supported data types
-// (uint8_t and uint16_t) and bit depths (8, 10, 12).
-template <typename T, int bit_depth, bool use_highbd>
-struct BitDepthParams {
- typedef T data_type_t;
- static const int kBitDepth = bit_depth;
- static const bool kUseHighBD = use_highbd;
-};
-
-template <typename T>
-class FlatBlockEstimatorTest : public ::testing::Test, public T {
- public:
- virtual void SetUp() { random_.Reset(171); }
- typedef std::vector<typename T::data_type_t> VecType;
- VecType data_;
- libaom_test::ACMRandom random_;
-};
-
-TYPED_TEST_CASE_P(FlatBlockEstimatorTest);
-
-TYPED_TEST_P(FlatBlockEstimatorTest, ExtractBlock) {
- const int kBlockSize = 16;
- aom_flat_block_finder_t flat_block_finder;
- ASSERT_EQ(1, aom_flat_block_finder_init(&flat_block_finder, kBlockSize,
- this->kBitDepth, this->kUseHighBD));
- const double normalization = flat_block_finder.normalization;
-
- // Test with an image of more than one block.
- const int h = 2 * kBlockSize;
- const int w = 2 * kBlockSize;
- const int stride = 2 * kBlockSize;
- this->data_.resize(h * stride, 128);
-
- // Set up the (0,0) block to be a plane and the (0,1) block to be a
- // checkerboard
- const int shift = this->kBitDepth - 8;
- for (int y = 0; y < kBlockSize; ++y) {
- for (int x = 0; x < kBlockSize; ++x) {
- this->data_[y * stride + x] = (-y + x + 128) << shift;
- this->data_[y * stride + x + kBlockSize] =
- ((x % 2 + y % 2) % 2 ? 128 - 20 : 128 + 20) << shift;
- }
- }
- std::vector<double> block(kBlockSize * kBlockSize, 1);
- std::vector<double> plane(kBlockSize * kBlockSize, 1);
-
- // The block data should be a constant (zero) and the rest of the plane
- // trend is covered in the plane data.
- aom_flat_block_finder_extract_block(&flat_block_finder,
- (uint8_t *)&this->data_[0], w, h, stride,
- 0, 0, &plane[0], &block[0]);
- for (int y = 0; y < kBlockSize; ++y) {
- for (int x = 0; x < kBlockSize; ++x) {
- EXPECT_NEAR(0, block[y * kBlockSize + x], 1e-5);
- EXPECT_NEAR((double)(this->data_[y * stride + x]) / normalization,
- plane[y * kBlockSize + x], 1e-5);
- }
- }
-
- // The plane trend is a constant, and the block is a zero mean checkerboard.
- aom_flat_block_finder_extract_block(&flat_block_finder,
- (uint8_t *)&this->data_[0], w, h, stride,
- kBlockSize, 0, &plane[0], &block[0]);
- const int mid = 128 << shift;
- for (int y = 0; y < kBlockSize; ++y) {
- for (int x = 0; x < kBlockSize; ++x) {
- EXPECT_NEAR(((double)this->data_[y * stride + x + kBlockSize] - mid) /
- normalization,
- block[y * kBlockSize + x], 1e-5);
- EXPECT_NEAR(mid / normalization, plane[y * kBlockSize + x], 1e-5);
- }
- }
- aom_flat_block_finder_free(&flat_block_finder);
-}
-
-TYPED_TEST_P(FlatBlockEstimatorTest, FindFlatBlocks) {
- const int kBlockSize = 32;
- aom_flat_block_finder_t flat_block_finder;
- ASSERT_EQ(1, aom_flat_block_finder_init(&flat_block_finder, kBlockSize,
- this->kBitDepth, this->kUseHighBD));
-
- const int num_blocks_w = 8;
- const int h = kBlockSize;
- const int w = kBlockSize * num_blocks_w;
- const int stride = w;
- this->data_.resize(h * stride, 128);
- std::vector<uint8_t> flat_blocks(num_blocks_w, 0);
-
- const int shift = this->kBitDepth - 8;
- for (int y = 0; y < kBlockSize; ++y) {
- for (int x = 0; x < kBlockSize; ++x) {
- // Block 0 (not flat): constant doesn't have enough variance to qualify
- this->data_[y * stride + x + 0 * kBlockSize] = 128 << shift;
-
- // Block 1 (not flat): too high of variance is hard to validate as flat
- this->data_[y * stride + x + 1 * kBlockSize] =
- ((uint8_t)(128 + randn(&this->random_, 5))) << shift;
-
- // Block 2 (flat): slight checkerboard added to constant
- const int check = (x % 2 + y % 2) % 2 ? -2 : 2;
- this->data_[y * stride + x + 2 * kBlockSize] = (128 + check) << shift;
-
- // Block 3 (flat): planar block with checkerboard pattern is also flat
- this->data_[y * stride + x + 3 * kBlockSize] =
- (y * 2 - x / 2 + 128 + check) << shift;
-
- // Block 4 (flat): gaussian random with standard deviation 1.
- this->data_[y * stride + x + 4 * kBlockSize] =
- ((uint8_t)(randn(&this->random_, 1) + x + 128.0)) << shift;
-
- // Block 5 (flat): gaussian random with standard deviation 2.
- this->data_[y * stride + x + 5 * kBlockSize] =
- ((uint8_t)(randn(&this->random_, 2) + y + 128.0)) << shift;
-
- // Block 6 (not flat): too high of directional gradient.
- const int strong_edge = x > kBlockSize / 2 ? 64 : 0;
- this->data_[y * stride + x + 6 * kBlockSize] =
- ((uint8_t)(randn(&this->random_, 1) + strong_edge + 128.0)) << shift;
-
- // Block 7 (not flat): too high gradient.
- const int big_check = ((x >> 2) % 2 + (y >> 2) % 2) % 2 ? -16 : 16;
- this->data_[y * stride + x + 7 * kBlockSize] =
- ((uint8_t)(randn(&this->random_, 1) + big_check + 128.0)) << shift;
- }
- }
-
- EXPECT_EQ(4, aom_flat_block_finder_run(&flat_block_finder,
- (uint8_t *)&this->data_[0], w, h,
- stride, &flat_blocks[0]));
-
- // First two blocks are not flat
- EXPECT_EQ(0, flat_blocks[0]);
- EXPECT_EQ(0, flat_blocks[1]);
-
- // Next 4 blocks are flat.
- EXPECT_EQ(255, flat_blocks[2]);
- EXPECT_EQ(255, flat_blocks[3]);
- EXPECT_EQ(255, flat_blocks[4]);
- EXPECT_EQ(255, flat_blocks[5]);
-
- // Last 2 are not flat by threshold
- EXPECT_EQ(0, flat_blocks[6]);
- EXPECT_EQ(0, flat_blocks[7]);
-
- // Add the noise from non-flat block 1 to every block.
- for (int y = 0; y < kBlockSize; ++y) {
- for (int x = 0; x < kBlockSize * num_blocks_w; ++x) {
- this->data_[y * stride + x] +=
- (this->data_[y * stride + x % kBlockSize + kBlockSize] -
- (128 << shift));
- }
- }
- // Now the scored selection will pick the one that is most likely flat (block
- // 0)
- EXPECT_EQ(1, aom_flat_block_finder_run(&flat_block_finder,
- (uint8_t *)&this->data_[0], w, h,
- stride, &flat_blocks[0]));
- EXPECT_EQ(1, flat_blocks[0]);
- EXPECT_EQ(0, flat_blocks[1]);
- EXPECT_EQ(0, flat_blocks[2]);
- EXPECT_EQ(0, flat_blocks[3]);
- EXPECT_EQ(0, flat_blocks[4]);
- EXPECT_EQ(0, flat_blocks[5]);
- EXPECT_EQ(0, flat_blocks[6]);
- EXPECT_EQ(0, flat_blocks[7]);
-
- aom_flat_block_finder_free(&flat_block_finder);
-}
-
-REGISTER_TYPED_TEST_CASE_P(FlatBlockEstimatorTest, ExtractBlock,
- FindFlatBlocks);
-
-typedef ::testing::Types<BitDepthParams<uint8_t, 8, false>, // lowbd
- BitDepthParams<uint16_t, 8, true>, // lowbd in 16-bit
- BitDepthParams<uint16_t, 10, true>, // highbd data
- BitDepthParams<uint16_t, 12, true> >
- AllBitDepthParams;
-INSTANTIATE_TYPED_TEST_CASE_P(FlatBlockInstatiation, FlatBlockEstimatorTest,
- AllBitDepthParams);
-
-template <typename T>
-class NoiseModelUpdateTest : public ::testing::Test, public T {
- public:
- static const int kWidth = 128;
- static const int kHeight = 128;
- static const int kBlockSize = 16;
- static const int kNumBlocksX = kWidth / kBlockSize;
- static const int kNumBlocksY = kHeight / kBlockSize;
-
- virtual void SetUp() {
- const aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, 3,
- T::kBitDepth, T::kUseHighBD };
- ASSERT_TRUE(aom_noise_model_init(&model_, params));
-
- random_.Reset(100171);
-
- data_.resize(kWidth * kHeight * 3);
- denoised_.resize(kWidth * kHeight * 3);
- noise_.resize(kWidth * kHeight * 3);
- renoise_.resize(kWidth * kHeight);
- flat_blocks_.resize(kNumBlocksX * kNumBlocksY);
-
- for (int c = 0, offset = 0; c < 3; ++c, offset += kWidth * kHeight) {
- data_ptr_[c] = &data_[offset];
- noise_ptr_[c] = &noise_[offset];
- denoised_ptr_[c] = &denoised_[offset];
- strides_[c] = kWidth;
-
- data_ptr_raw_[c] = (uint8_t *)&data_[offset];
- denoised_ptr_raw_[c] = (uint8_t *)&denoised_[offset];
- }
- chroma_sub_[0] = 0;
- chroma_sub_[1] = 0;
- }
-
- int NoiseModelUpdate(int block_size = kBlockSize) {
- return aom_noise_model_update(&model_, data_ptr_raw_, denoised_ptr_raw_,
- kWidth, kHeight, strides_, chroma_sub_,
- &flat_blocks_[0], block_size);
- }
-
- void TearDown() { aom_noise_model_free(&model_); }
-
- protected:
- aom_noise_model_t model_;
- std::vector<typename T::data_type_t> data_;
- std::vector<typename T::data_type_t> denoised_;
-
- std::vector<double> noise_;
- std::vector<double> renoise_;
- std::vector<uint8_t> flat_blocks_;
-
- typename T::data_type_t *data_ptr_[3];
- typename T::data_type_t *denoised_ptr_[3];
-
- double *noise_ptr_[3];
- int strides_[3];
- int chroma_sub_[2];
- libaom_test::ACMRandom random_;
-
- private:
- uint8_t *data_ptr_raw_[3];
- uint8_t *denoised_ptr_raw_[3];
-};
-
-TYPED_TEST_CASE_P(NoiseModelUpdateTest);
-
-TYPED_TEST_P(NoiseModelUpdateTest, UpdateFailsNoFlatBlocks) {
- EXPECT_EQ(AOM_NOISE_STATUS_INSUFFICIENT_FLAT_BLOCKS,
- this->NoiseModelUpdate());
-}
-
-TYPED_TEST_P(NoiseModelUpdateTest, UpdateSuccessForZeroNoiseAllFlat) {
- this->flat_blocks_.assign(this->flat_blocks_.size(), 1);
- this->denoised_.assign(this->denoised_.size(), 128);
- this->data_.assign(this->denoised_.size(), 128);
- EXPECT_EQ(AOM_NOISE_STATUS_INTERNAL_ERROR, this->NoiseModelUpdate());
-}
-
-TYPED_TEST_P(NoiseModelUpdateTest, UpdateFailsBlockSizeTooSmall) {
- this->flat_blocks_.assign(this->flat_blocks_.size(), 1);
- this->denoised_.assign(this->denoised_.size(), 128);
- this->data_.assign(this->denoised_.size(), 128);
- EXPECT_EQ(AOM_NOISE_STATUS_INVALID_ARGUMENT,
- this->NoiseModelUpdate(6 /* block_size=6 is too small*/));
-}
-
-TYPED_TEST_P(NoiseModelUpdateTest, UpdateSuccessForWhiteRandomNoise) {
- aom_noise_model_t &model = this->model_;
- const int kWidth = this->kWidth;
- const int kHeight = this->kHeight;
-
- const int shift = this->kBitDepth - 8;
- for (int y = 0; y < kHeight; ++y) {
- for (int x = 0; x < kWidth; ++x) {
- this->data_ptr_[0][y * kWidth + x] =
- int(64 + y + randn(&this->random_, 1)) << shift;
- this->denoised_ptr_[0][y * kWidth + x] = (64 + y) << shift;
- // Make the chroma planes completely correlated with the Y plane
- for (int c = 1; c < 3; ++c) {
- this->data_ptr_[c][y * kWidth + x] = this->data_ptr_[0][y * kWidth + x];
- this->denoised_ptr_[c][y * kWidth + x] =
- this->denoised_ptr_[0][y * kWidth + x];
- }
- }
- }
- this->flat_blocks_.assign(this->flat_blocks_.size(), 1);
- EXPECT_EQ(AOM_NOISE_STATUS_OK, this->NoiseModelUpdate());
-
- const double kCoeffEps = 0.075;
- const int n = model.n;
- for (int c = 0; c < 3; ++c) {
- for (int i = 0; i < n; ++i) {
- EXPECT_NEAR(0, model.latest_state[c].eqns.x[i], kCoeffEps);
- EXPECT_NEAR(0, model.combined_state[c].eqns.x[i], kCoeffEps);
- }
- // The second and third channels are highly correlated with the first.
- if (c > 0) {
- ASSERT_EQ(n + 1, model.latest_state[c].eqns.n);
- ASSERT_EQ(n + 1, model.combined_state[c].eqns.n);
-
- EXPECT_NEAR(1, model.latest_state[c].eqns.x[n], kCoeffEps);
- EXPECT_NEAR(1, model.combined_state[c].eqns.x[n], kCoeffEps);
- }
- }
-
- // The fitted noise strength should be close to the standard deviation
- // for all intensity bins.
- const double kStdEps = 0.1;
- const double normalize = 1 << shift;
-
- for (int i = 0; i < model.latest_state[0].strength_solver.eqns.n; ++i) {
- EXPECT_NEAR(1.0,
- model.latest_state[0].strength_solver.eqns.x[i] / normalize,
- kStdEps);
- EXPECT_NEAR(1.0,
- model.combined_state[0].strength_solver.eqns.x[i] / normalize,
- kStdEps);
- }
-
- aom_noise_strength_lut_t lut;
- aom_noise_strength_solver_fit_piecewise(
- &model.latest_state[0].strength_solver, -1, &lut);
- ASSERT_EQ(2, lut.num_points);
- EXPECT_NEAR(0.0, lut.points[0][0], 1e-5);
- EXPECT_NEAR(1.0, lut.points[0][1] / normalize, kStdEps);
- EXPECT_NEAR((1 << this->kBitDepth) - 1, lut.points[1][0], 1e-5);
- EXPECT_NEAR(1.0, lut.points[1][1] / normalize, kStdEps);
- aom_noise_strength_lut_free(&lut);
-}
-
-TYPED_TEST_P(NoiseModelUpdateTest, UpdateSuccessForScaledWhiteNoise) {
- aom_noise_model_t &model = this->model_;
- const int kWidth = this->kWidth;
- const int kHeight = this->kHeight;
-
- const double kCoeffEps = 0.055;
- const double kLowStd = 1;
- const double kHighStd = 4;
- const int shift = this->kBitDepth - 8;
- for (int y = 0; y < kHeight; ++y) {
- for (int x = 0; x < kWidth; ++x) {
- for (int c = 0; c < 3; ++c) {
- // The image data is bimodal:
- // Bottom half has low intensity and low noise strength
- // Top half has high intensity and high noise strength
- const int avg = (y < kHeight / 2) ? 4 : 245;
- const double std = (y < kHeight / 2) ? kLowStd : kHighStd;
- this->data_ptr_[c][y * kWidth + x] =
- ((uint8_t)std::min((int)255,
- (int)(2 + avg + randn(&this->random_, std))))
- << shift;
- this->denoised_ptr_[c][y * kWidth + x] = (2 + avg) << shift;
- }
- }
- }
- // Label all blocks as flat for the update
- this->flat_blocks_.assign(this->flat_blocks_.size(), 1);
- EXPECT_EQ(AOM_NOISE_STATUS_OK, this->NoiseModelUpdate());
-
- const int n = model.n;
- // The noise is uncorrelated spatially and with the y channel.
- // All coefficients should be reasonably close to zero.
- for (int c = 0; c < 3; ++c) {
- for (int i = 0; i < n; ++i) {
- EXPECT_NEAR(0, model.latest_state[c].eqns.x[i], kCoeffEps);
- EXPECT_NEAR(0, model.combined_state[c].eqns.x[i], kCoeffEps);
- }
- if (c > 0) {
- ASSERT_EQ(n + 1, model.latest_state[c].eqns.n);
- ASSERT_EQ(n + 1, model.combined_state[c].eqns.n);
-
- // The correlation to the y channel should be low (near zero)
- EXPECT_NEAR(0, model.latest_state[c].eqns.x[n], kCoeffEps);
- EXPECT_NEAR(0, model.combined_state[c].eqns.x[n], kCoeffEps);
- }
- }
-
- // Noise strength should vary between kLowStd and kHighStd.
- const double kStdEps = 0.15;
- // We have to normalize fitted standard deviation based on bit depth.
- const double normalize = (1 << shift);
-
- ASSERT_EQ(20, model.latest_state[0].strength_solver.eqns.n);
- for (int i = 0; i < model.latest_state[0].strength_solver.eqns.n; ++i) {
- const double a = i / 19.0;
- const double expected = (kLowStd * (1.0 - a) + kHighStd * a);
- EXPECT_NEAR(expected,
- model.latest_state[0].strength_solver.eqns.x[i] / normalize,
- kStdEps);
- EXPECT_NEAR(expected,
- model.combined_state[0].strength_solver.eqns.x[i] / normalize,
- kStdEps);
- }
-
- // If we fit a piecewise linear model, there should be two points:
- // one near kLowStd at 0, and the other near kHighStd and 255.
- aom_noise_strength_lut_t lut;
- aom_noise_strength_solver_fit_piecewise(
- &model.latest_state[0].strength_solver, 2, &lut);
- ASSERT_EQ(2, lut.num_points);
- EXPECT_NEAR(0, lut.points[0][0], 1e-4);
- EXPECT_NEAR(kLowStd, lut.points[0][1] / normalize, kStdEps);
- EXPECT_NEAR((1 << this->kBitDepth) - 1, lut.points[1][0], 1e-5);
- EXPECT_NEAR(kHighStd, lut.points[1][1] / normalize, kStdEps);
- aom_noise_strength_lut_free(&lut);
-}
-
-TYPED_TEST_P(NoiseModelUpdateTest, UpdateSuccessForCorrelatedNoise) {
- aom_noise_model_t &model = this->model_;
- const int kWidth = this->kWidth;
- const int kHeight = this->kHeight;
- const int kNumCoeffs = 24;
- const double kStd = 4;
- const double kStdEps = 0.3;
- const double kCoeffEps = 0.065;
- // Use different coefficients for each channel
- const double kCoeffs[3][24] = {
- { 0.02884, -0.03356, 0.00633, 0.01757, 0.02849, -0.04620,
- 0.02833, -0.07178, 0.07076, -0.11603, -0.10413, -0.16571,
- 0.05158, -0.07969, 0.02640, -0.07191, 0.02530, 0.41968,
- 0.21450, -0.00702, -0.01401, -0.03676, -0.08713, 0.44196 },
- { 0.00269, -0.01291, -0.01513, 0.07234, 0.03208, 0.00477,
- 0.00226, -0.00254, 0.03533, 0.12841, -0.25970, -0.06336,
- 0.05238, -0.00845, -0.03118, 0.09043, -0.36558, 0.48903,
- 0.00595, -0.11938, 0.02106, 0.095956, -0.350139, 0.59305 },
- { -0.00643, -0.01080, -0.01466, 0.06951, 0.03707, -0.00482,
- 0.00817, -0.00909, 0.02949, 0.12181, -0.25210, -0.07886,
- 0.06083, -0.01210, -0.03108, 0.08944, -0.35875, 0.49150,
- 0.00415, -0.12905, 0.02870, 0.09740, -0.34610, 0.58824 },
- };
-
- ASSERT_EQ(model.n, kNumCoeffs);
- this->chroma_sub_[0] = this->chroma_sub_[1] = 1;
-
- this->flat_blocks_.assign(this->flat_blocks_.size(), 1);
-
- // Add different noise onto each plane
- const int shift = this->kBitDepth - 8;
- for (int c = 0; c < 3; ++c) {
- noise_synth(&this->random_, model.params.lag, model.n, model.coords,
- kCoeffs[c], this->noise_ptr_[c], kWidth, kHeight);
- const int x_shift = c > 0 ? this->chroma_sub_[0] : 0;
- const int y_shift = c > 0 ? this->chroma_sub_[1] : 0;
- for (int y = 0; y < (kHeight >> y_shift); ++y) {
- for (int x = 0; x < (kWidth >> x_shift); ++x) {
- const uint8_t value = 64 + x / 2 + y / 4;
- this->data_ptr_[c][y * kWidth + x] =
- (uint8_t(value + this->noise_ptr_[c][y * kWidth + x] * kStd))
- << shift;
- this->denoised_ptr_[c][y * kWidth + x] = value << shift;
- }
- }
- }
- EXPECT_EQ(AOM_NOISE_STATUS_OK, this->NoiseModelUpdate());
-
- // For the Y plane, the solved coefficients should be close to the original
- const int n = model.n;
- for (int c = 0; c < 3; ++c) {
- for (int i = 0; i < n; ++i) {
- EXPECT_NEAR(kCoeffs[c][i], model.latest_state[c].eqns.x[i], kCoeffEps);
- EXPECT_NEAR(kCoeffs[c][i], model.combined_state[c].eqns.x[i], kCoeffEps);
- }
- // The chroma planes should be uncorrelated with the luma plane
- if (c > 0) {
- EXPECT_NEAR(0, model.latest_state[c].eqns.x[n], kCoeffEps);
- EXPECT_NEAR(0, model.combined_state[c].eqns.x[n], kCoeffEps);
- }
- // Correlation between the coefficient vector and the fitted coefficients
- // should be close to 1.
- EXPECT_LT(0.98, aom_normalized_cross_correlation(
- model.latest_state[c].eqns.x, kCoeffs[c], kNumCoeffs));
-
- noise_synth(&this->random_, model.params.lag, model.n, model.coords,
- model.latest_state[c].eqns.x, &this->renoise_[0], kWidth,
- kHeight);
-
- EXPECT_TRUE(aom_noise_data_validate(&this->renoise_[0], kWidth, kHeight));
- }
-
- // Check fitted noise strength
- const double normalize = 1 << shift;
- for (int c = 0; c < 3; ++c) {
- for (int i = 0; i < model.latest_state[c].strength_solver.eqns.n; ++i) {
- EXPECT_NEAR(kStd,
- model.latest_state[c].strength_solver.eqns.x[i] / normalize,
- kStdEps);
- }
- }
-}
-
-TYPED_TEST_P(NoiseModelUpdateTest,
- NoiseStrengthChangeSignalsDifferentNoiseType) {
- aom_noise_model_t &model = this->model_;
- const int kWidth = this->kWidth;
- const int kHeight = this->kHeight;
- const int kBlockSize = this->kBlockSize;
- // Create a gradient image with std = 2 uncorrelated noise
- const double kStd = 2;
- const int shift = this->kBitDepth - 8;
-
- for (int i = 0; i < kWidth * kHeight; ++i) {
- const uint8_t val = (i % kWidth) < kWidth / 2 ? 64 : 192;
- for (int c = 0; c < 3; ++c) {
- this->noise_ptr_[c][i] = randn(&this->random_, 1);
- this->data_ptr_[c][i] = ((uint8_t)(this->noise_ptr_[c][i] * kStd + val))
- << shift;
- this->denoised_ptr_[c][i] = val << shift;
- }
- }
- this->flat_blocks_.assign(this->flat_blocks_.size(), 1);
- EXPECT_EQ(AOM_NOISE_STATUS_OK, this->NoiseModelUpdate());
-
- const int kNumBlocks = kWidth * kHeight / kBlockSize / kBlockSize;
- EXPECT_EQ(kNumBlocks, model.latest_state[0].strength_solver.num_equations);
- EXPECT_EQ(kNumBlocks, model.latest_state[1].strength_solver.num_equations);
- EXPECT_EQ(kNumBlocks, model.latest_state[2].strength_solver.num_equations);
- EXPECT_EQ(kNumBlocks, model.combined_state[0].strength_solver.num_equations);
- EXPECT_EQ(kNumBlocks, model.combined_state[1].strength_solver.num_equations);
- EXPECT_EQ(kNumBlocks, model.combined_state[2].strength_solver.num_equations);
-
- // Bump up noise by an insignificant amount
- for (int i = 0; i < kWidth * kHeight; ++i) {
- const uint8_t val = (i % kWidth) < kWidth / 2 ? 64 : 192;
- this->data_ptr_[0][i] =
- ((uint8_t)(this->noise_ptr_[0][i] * (kStd + 0.085) + val)) << shift;
- }
- EXPECT_EQ(AOM_NOISE_STATUS_OK, this->NoiseModelUpdate());
-
- const double kARGainTolerance = 0.02;
- for (int c = 0; c < 3; ++c) {
- EXPECT_EQ(kNumBlocks, model.latest_state[c].strength_solver.num_equations);
- EXPECT_EQ(15250, model.latest_state[c].num_observations);
- EXPECT_NEAR(1, model.latest_state[c].ar_gain, kARGainTolerance);
-
- EXPECT_EQ(2 * kNumBlocks,
- model.combined_state[c].strength_solver.num_equations);
- EXPECT_EQ(2 * 15250, model.combined_state[c].num_observations);
- EXPECT_NEAR(1, model.combined_state[c].ar_gain, kARGainTolerance);
- }
-
- // Bump up the noise strength on half the image for one channel by a
- // significant amount.
- for (int i = 0; i < kWidth * kHeight; ++i) {
- const uint8_t val = (i % kWidth) < kWidth / 2 ? 64 : 128;
- if (i % kWidth < kWidth / 2) {
- this->data_ptr_[0][i] =
- ((uint8_t)(randn(&this->random_, kStd + 0.5) + val)) << shift;
- }
- }
- EXPECT_EQ(AOM_NOISE_STATUS_DIFFERENT_NOISE_TYPE, this->NoiseModelUpdate());
-
- // Since we didn't update the combined state, it should still be at 2 *
- // num_blocks
- EXPECT_EQ(kNumBlocks, model.latest_state[0].strength_solver.num_equations);
- EXPECT_EQ(2 * kNumBlocks,
- model.combined_state[0].strength_solver.num_equations);
-
- // In normal operation, the "latest" estimate can be saved to the "combined"
- // state for continued updates.
- aom_noise_model_save_latest(&model);
- for (int c = 0; c < 3; ++c) {
- EXPECT_EQ(kNumBlocks, model.latest_state[c].strength_solver.num_equations);
- EXPECT_EQ(15250, model.latest_state[c].num_observations);
- EXPECT_NEAR(1, model.latest_state[c].ar_gain, kARGainTolerance);
-
- EXPECT_EQ(kNumBlocks,
- model.combined_state[c].strength_solver.num_equations);
- EXPECT_EQ(15250, model.combined_state[c].num_observations);
- EXPECT_NEAR(1, model.combined_state[c].ar_gain, kARGainTolerance);
- }
-}
-
-TYPED_TEST_P(NoiseModelUpdateTest, NoiseCoeffsSignalsDifferentNoiseType) {
- aom_noise_model_t &model = this->model_;
- const int kWidth = this->kWidth;
- const int kHeight = this->kHeight;
- const double kCoeffs[2][24] = {
- { 0.02884, -0.03356, 0.00633, 0.01757, 0.02849, -0.04620,
- 0.02833, -0.07178, 0.07076, -0.11603, -0.10413, -0.16571,
- 0.05158, -0.07969, 0.02640, -0.07191, 0.02530, 0.41968,
- 0.21450, -0.00702, -0.01401, -0.03676, -0.08713, 0.44196 },
- { 0.00269, -0.01291, -0.01513, 0.07234, 0.03208, 0.00477,
- 0.00226, -0.00254, 0.03533, 0.12841, -0.25970, -0.06336,
- 0.05238, -0.00845, -0.03118, 0.09043, -0.36558, 0.48903,
- 0.00595, -0.11938, 0.02106, 0.095956, -0.350139, 0.59305 }
- };
-
- noise_synth(&this->random_, model.params.lag, model.n, model.coords,
- kCoeffs[0], this->noise_ptr_[0], kWidth, kHeight);
- for (int i = 0; i < kWidth * kHeight; ++i) {
- this->data_ptr_[0][i] = (uint8_t)(128 + this->noise_ptr_[0][i]);
- }
- this->flat_blocks_.assign(this->flat_blocks_.size(), 1);
- EXPECT_EQ(AOM_NOISE_STATUS_OK, this->NoiseModelUpdate());
-
- // Now try with the second set of AR coefficients
- noise_synth(&this->random_, model.params.lag, model.n, model.coords,
- kCoeffs[1], this->noise_ptr_[0], kWidth, kHeight);
- for (int i = 0; i < kWidth * kHeight; ++i) {
- this->data_ptr_[0][i] = (uint8_t)(128 + this->noise_ptr_[0][i]);
- }
- EXPECT_EQ(AOM_NOISE_STATUS_DIFFERENT_NOISE_TYPE, this->NoiseModelUpdate());
-}
-REGISTER_TYPED_TEST_CASE_P(NoiseModelUpdateTest, UpdateFailsNoFlatBlocks,
- UpdateSuccessForZeroNoiseAllFlat,
- UpdateFailsBlockSizeTooSmall,
- UpdateSuccessForWhiteRandomNoise,
- UpdateSuccessForScaledWhiteNoise,
- UpdateSuccessForCorrelatedNoise,
- NoiseStrengthChangeSignalsDifferentNoiseType,
- NoiseCoeffsSignalsDifferentNoiseType);
-
-INSTANTIATE_TYPED_TEST_CASE_P(NoiseModelUpdateTestInstatiation,
- NoiseModelUpdateTest, AllBitDepthParams);
-
-TEST(NoiseModelGetGrainParameters, TestLagSize) {
- aom_film_grain_t film_grain;
- for (int lag = 1; lag <= 3; ++lag) {
- aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, lag, 8, 0 };
- aom_noise_model_t model;
- EXPECT_TRUE(aom_noise_model_init(&model, params));
- EXPECT_TRUE(aom_noise_model_get_grain_parameters(&model, &film_grain));
- EXPECT_EQ(lag, film_grain.ar_coeff_lag);
- aom_noise_model_free(&model);
- }
-
- aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, 4, 8, 0 };
- aom_noise_model_t model;
- EXPECT_TRUE(aom_noise_model_init(&model, params));
- EXPECT_FALSE(aom_noise_model_get_grain_parameters(&model, &film_grain));
- aom_noise_model_free(&model);
-}
-
-TEST(NoiseModelGetGrainParameters, TestARCoeffShiftBounds) {
- struct TestCase {
- double max_input_value;
- int expected_ar_coeff_shift;
- int expected_value;
- };
- const int lag = 1;
- const int kNumTestCases = 19;
- const TestCase test_cases[] = {
- // Test cases for ar_coeff_shift = 9
- { 0, 9, 0 },
- { 0.125, 9, 64 },
- { -0.125, 9, -64 },
- { 0.2499, 9, 127 },
- { -0.25, 9, -128 },
- // Test cases for ar_coeff_shift = 8
- { 0.25, 8, 64 },
- { -0.2501, 8, -64 },
- { 0.499, 8, 127 },
- { -0.5, 8, -128 },
- // Test cases for ar_coeff_shift = 7
- { 0.5, 7, 64 },
- { -0.5001, 7, -64 },
- { 0.999, 7, 127 },
- { -1, 7, -128 },
- // Test cases for ar_coeff_shift = 6
- { 1.0, 6, 64 },
- { -1.0001, 6, -64 },
- { 2.0, 6, 127 },
- { -2.0, 6, -128 },
- { 4, 6, 127 },
- { -4, 6, -128 },
- };
- aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, lag, 8, 0 };
- aom_noise_model_t model;
- EXPECT_TRUE(aom_noise_model_init(&model, params));
-
- for (int i = 0; i < kNumTestCases; ++i) {
- const TestCase &test_case = test_cases[i];
- model.combined_state[0].eqns.x[0] = test_case.max_input_value;
-
- aom_film_grain_t film_grain;
- EXPECT_TRUE(aom_noise_model_get_grain_parameters(&model, &film_grain));
- EXPECT_EQ(1, film_grain.ar_coeff_lag);
- EXPECT_EQ(test_case.expected_ar_coeff_shift, film_grain.ar_coeff_shift);
- EXPECT_EQ(test_case.expected_value, film_grain.ar_coeffs_y[0]);
- }
- aom_noise_model_free(&model);
-}
-
-TEST(NoiseModelGetGrainParameters, TestNoiseStrengthShiftBounds) {
- struct TestCase {
- double max_input_value;
- int expected_scaling_shift;
- int expected_value;
- };
- const int kNumTestCases = 10;
- const TestCase test_cases[] = {
- { 0, 11, 0 }, { 1, 11, 64 }, { 2, 11, 128 }, { 3.99, 11, 255 },
- { 4, 10, 128 }, { 7.99, 10, 255 }, { 8, 9, 128 }, { 16, 8, 128 },
- { 31.99, 8, 255 }, { 64, 8, 255 }, // clipped
- };
- const int lag = 1;
- aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, lag, 8, 0 };
- aom_noise_model_t model;
- EXPECT_TRUE(aom_noise_model_init(&model, params));
-
- for (int i = 0; i < kNumTestCases; ++i) {
- const TestCase &test_case = test_cases[i];
- aom_equation_system_t &eqns = model.combined_state[0].strength_solver.eqns;
- // Set the fitted scale parameters to be a constant value.
- for (int j = 0; j < eqns.n; ++j) {
- eqns.x[j] = test_case.max_input_value;
- }
- aom_film_grain_t film_grain;
- EXPECT_TRUE(aom_noise_model_get_grain_parameters(&model, &film_grain));
- // We expect a single constant segemnt
- EXPECT_EQ(test_case.expected_scaling_shift, film_grain.scaling_shift);
- EXPECT_EQ(test_case.expected_value, film_grain.scaling_points_y[0][1]);
- EXPECT_EQ(test_case.expected_value, film_grain.scaling_points_y[1][1]);
- }
- aom_noise_model_free(&model);
-}
-
-// The AR coefficients are the same inputs used to generate "Test 2" in the test
-// vectors
-TEST(NoiseModelGetGrainParameters, GetGrainParametersReal) {
- const double kInputCoeffsY[] = { 0.0315, 0.0073, 0.0218, 0.00235, 0.00511,
- -0.0222, 0.0627, -0.022, 0.05575, -0.1816,
- 0.0107, -0.1966, 0.00065, -0.0809, 0.04934,
- -0.1349, -0.0352, 0.41772, 0.27973, 0.04207,
- -0.0429, -0.1372, 0.06193, 0.52032 };
- const double kInputCoeffsCB[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5 };
- const double kInputCoeffsCR[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0.5 };
- const int kExpectedARCoeffsY[] = { 4, 1, 3, 0, 1, -3, 8, -3,
- 7, -23, 1, -25, 0, -10, 6, -17,
- -5, 53, 36, 5, -5, -18, 8, 67 };
- const int kExpectedARCoeffsCB[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 84 };
- const int kExpectedARCoeffsCR[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -126 };
- // Scaling function is initialized analytically with a sqrt function.
- const int kNumScalingPointsY = 12;
- const int kExpectedScalingPointsY[][2] = {
- { 0, 0 }, { 13, 44 }, { 27, 62 }, { 40, 76 },
- { 54, 88 }, { 67, 98 }, { 94, 117 }, { 121, 132 },
- { 148, 146 }, { 174, 159 }, { 201, 171 }, { 255, 192 },
- };
-
- const int lag = 3;
- aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, lag, 8, 0 };
- aom_noise_model_t model;
- EXPECT_TRUE(aom_noise_model_init(&model, params));
-
- // Setup the AR coeffs
- memcpy(model.combined_state[0].eqns.x, kInputCoeffsY, sizeof(kInputCoeffsY));
- memcpy(model.combined_state[1].eqns.x, kInputCoeffsCB,
- sizeof(kInputCoeffsCB));
- memcpy(model.combined_state[2].eqns.x, kInputCoeffsCR,
- sizeof(kInputCoeffsCR));
- for (int i = 0; i < model.combined_state[0].strength_solver.num_bins; ++i) {
- const double x =
- ((double)i) / (model.combined_state[0].strength_solver.num_bins - 1.0);
- model.combined_state[0].strength_solver.eqns.x[i] = 6 * sqrt(x);
- model.combined_state[1].strength_solver.eqns.x[i] = 3;
- model.combined_state[2].strength_solver.eqns.x[i] = 2;
-
- // Inject some observations into the strength solver, as during film grain
- // parameter extraction an estimate of the average strength will be used to
- // adjust correlation.
- const int n = model.combined_state[0].strength_solver.num_bins;
- for (int j = 0; j < model.combined_state[0].strength_solver.num_bins; ++j) {
- model.combined_state[0].strength_solver.eqns.A[i * n + j] = 1;
- model.combined_state[1].strength_solver.eqns.A[i * n + j] = 1;
- model.combined_state[2].strength_solver.eqns.A[i * n + j] = 1;
- }
- }
-
- aom_film_grain_t film_grain;
- EXPECT_TRUE(aom_noise_model_get_grain_parameters(&model, &film_grain));
- EXPECT_EQ(lag, film_grain.ar_coeff_lag);
- EXPECT_EQ(3, film_grain.ar_coeff_lag);
- EXPECT_EQ(7, film_grain.ar_coeff_shift);
- EXPECT_EQ(10, film_grain.scaling_shift);
- EXPECT_EQ(kNumScalingPointsY, film_grain.num_y_points);
- EXPECT_EQ(1, film_grain.update_parameters);
- EXPECT_EQ(1, film_grain.apply_grain);
-
- const int kNumARCoeffs = 24;
- for (int i = 0; i < kNumARCoeffs; ++i) {
- EXPECT_EQ(kExpectedARCoeffsY[i], film_grain.ar_coeffs_y[i]);
- }
- for (int i = 0; i < kNumARCoeffs + 1; ++i) {
- EXPECT_EQ(kExpectedARCoeffsCB[i], film_grain.ar_coeffs_cb[i]);
- }
- for (int i = 0; i < kNumARCoeffs + 1; ++i) {
- EXPECT_EQ(kExpectedARCoeffsCR[i], film_grain.ar_coeffs_cr[i]);
- }
- for (int i = 0; i < kNumScalingPointsY; ++i) {
- EXPECT_EQ(kExpectedScalingPointsY[i][0], film_grain.scaling_points_y[i][0]);
- EXPECT_EQ(kExpectedScalingPointsY[i][1], film_grain.scaling_points_y[i][1]);
- }
-
- // CB strength should just be a piecewise segment
- EXPECT_EQ(2, film_grain.num_cb_points);
- EXPECT_EQ(0, film_grain.scaling_points_cb[0][0]);
- EXPECT_EQ(255, film_grain.scaling_points_cb[1][0]);
- EXPECT_EQ(96, film_grain.scaling_points_cb[0][1]);
- EXPECT_EQ(96, film_grain.scaling_points_cb[1][1]);
-
- // CR strength should just be a piecewise segment
- EXPECT_EQ(2, film_grain.num_cr_points);
- EXPECT_EQ(0, film_grain.scaling_points_cr[0][0]);
- EXPECT_EQ(255, film_grain.scaling_points_cr[1][0]);
- EXPECT_EQ(64, film_grain.scaling_points_cr[0][1]);
- EXPECT_EQ(64, film_grain.scaling_points_cr[1][1]);
-
- EXPECT_EQ(128, film_grain.cb_mult);
- EXPECT_EQ(192, film_grain.cb_luma_mult);
- EXPECT_EQ(256, film_grain.cb_offset);
- EXPECT_EQ(128, film_grain.cr_mult);
- EXPECT_EQ(192, film_grain.cr_luma_mult);
- EXPECT_EQ(256, film_grain.cr_offset);
- EXPECT_EQ(0, film_grain.chroma_scaling_from_luma);
- EXPECT_EQ(0, film_grain.grain_scale_shift);
-
- aom_noise_model_free(&model);
-}
-
-template <typename T>
-class WienerDenoiseTest : public ::testing::Test, public T {
- public:
- static void SetUpTestCase() { aom_dsp_rtcd(); }
-
- protected:
- void SetUp() {
- static const float kNoiseLevel = 5.f;
- static const float kStd = 4.0;
- static const double kMaxValue = (1 << T::kBitDepth) - 1;
-
- chroma_sub_[0] = 1;
- chroma_sub_[1] = 1;
- stride_[0] = kWidth;
- stride_[1] = kWidth / 2;
- stride_[2] = kWidth / 2;
- for (int k = 0; k < 3; ++k) {
- data_[k].resize(kWidth * kHeight);
- denoised_[k].resize(kWidth * kHeight);
- noise_psd_[k].resize(kBlockSize * kBlockSize);
- }
-
- const double kCoeffsY[] = { 0.0406, -0.116, -0.078, -0.152, 0.0033, -0.093,
- 0.048, 0.404, 0.2353, -0.035, -0.093, 0.441 };
- const int kCoords[12][2] = {
- { -2, -2 }, { -1, -2 }, { 0, -2 }, { 1, -2 }, { 2, -2 }, { -2, -1 },
- { -1, -1 }, { 0, -1 }, { 1, -1 }, { 2, -1 }, { -2, 0 }, { -1, 0 }
- };
- const int kLag = 2;
- const int kLength = 12;
- libaom_test::ACMRandom random;
- std::vector<double> noise(kWidth * kHeight);
- noise_synth(&random, kLag, kLength, kCoords, kCoeffsY, &noise[0], kWidth,
- kHeight);
- noise_psd_[0] = get_noise_psd(&noise[0], kWidth, kHeight, kBlockSize);
- for (int i = 0; i < kBlockSize * kBlockSize; ++i) {
- noise_psd_[0][i] = (float)(noise_psd_[0][i] * kStd * kStd * kScaleNoise *
- kScaleNoise / (kMaxValue * kMaxValue));
- }
-
- float psd_value =
- aom_noise_psd_get_default_value(kBlockSizeChroma, kNoiseLevel);
- for (int i = 0; i < kBlockSizeChroma * kBlockSizeChroma; ++i) {
- noise_psd_[1][i] = psd_value;
- noise_psd_[2][i] = psd_value;
- }
- for (int y = 0; y < kHeight; ++y) {
- for (int x = 0; x < kWidth; ++x) {
- data_[0][y * stride_[0] + x] = (typename T::data_type_t)fclamp(
- (x + noise[y * stride_[0] + x] * kStd) * kScaleNoise, 0, kMaxValue);
- }
- }
-
- for (int c = 1; c < 3; ++c) {
- for (int y = 0; y < (kHeight >> 1); ++y) {
- for (int x = 0; x < (kWidth >> 1); ++x) {
- data_[c][y * stride_[c] + x] = (typename T::data_type_t)fclamp(
- (x + randn(&random, kStd)) * kScaleNoise, 0, kMaxValue);
- }
- }
- }
- for (int k = 0; k < 3; ++k) {
- noise_psd_ptrs_[k] = &noise_psd_[k][0];
- }
- }
- static const int kBlockSize = 32;
- static const int kBlockSizeChroma = 16;
- static const int kWidth = 256;
- static const int kHeight = 256;
- static const int kScaleNoise = 1 << (T::kBitDepth - 8);
-
- std::vector<typename T::data_type_t> data_[3];
- std::vector<typename T::data_type_t> denoised_[3];
- std::vector<float> noise_psd_[3];
- int chroma_sub_[2];
- float *noise_psd_ptrs_[3];
- int stride_[3];
-};
-
-TYPED_TEST_CASE_P(WienerDenoiseTest);
-
-TYPED_TEST_P(WienerDenoiseTest, InvalidBlockSize) {
- const uint8_t *const data_ptrs[3] = {
- reinterpret_cast<uint8_t *>(&this->data_[0][0]),
- reinterpret_cast<uint8_t *>(&this->data_[1][0]),
- reinterpret_cast<uint8_t *>(&this->data_[2][0]),
- };
- uint8_t *denoised_ptrs[3] = {
- reinterpret_cast<uint8_t *>(&this->denoised_[0][0]),
- reinterpret_cast<uint8_t *>(&this->denoised_[1][0]),
- reinterpret_cast<uint8_t *>(&this->denoised_[2][0]),
- };
- EXPECT_EQ(0, aom_wiener_denoise_2d(data_ptrs, denoised_ptrs, this->kWidth,
- this->kHeight, this->stride_,
- this->chroma_sub_, this->noise_psd_ptrs_,
- 18, this->kBitDepth, this->kUseHighBD));
- EXPECT_EQ(0, aom_wiener_denoise_2d(data_ptrs, denoised_ptrs, this->kWidth,
- this->kHeight, this->stride_,
- this->chroma_sub_, this->noise_psd_ptrs_,
- 48, this->kBitDepth, this->kUseHighBD));
- EXPECT_EQ(0, aom_wiener_denoise_2d(data_ptrs, denoised_ptrs, this->kWidth,
- this->kHeight, this->stride_,
- this->chroma_sub_, this->noise_psd_ptrs_,
- 64, this->kBitDepth, this->kUseHighBD));
-}
-
-TYPED_TEST_P(WienerDenoiseTest, InvalidChromaSubsampling) {
- const uint8_t *const data_ptrs[3] = {
- reinterpret_cast<uint8_t *>(&this->data_[0][0]),
- reinterpret_cast<uint8_t *>(&this->data_[1][0]),
- reinterpret_cast<uint8_t *>(&this->data_[2][0]),
- };
- uint8_t *denoised_ptrs[3] = {
- reinterpret_cast<uint8_t *>(&this->denoised_[0][0]),
- reinterpret_cast<uint8_t *>(&this->denoised_[1][0]),
- reinterpret_cast<uint8_t *>(&this->denoised_[2][0]),
- };
- int chroma_sub[2] = { 1, 0 };
- EXPECT_EQ(0, aom_wiener_denoise_2d(data_ptrs, denoised_ptrs, this->kWidth,
- this->kHeight, this->stride_, chroma_sub,
- this->noise_psd_ptrs_, 32, this->kBitDepth,
- this->kUseHighBD));
-
- chroma_sub[0] = 0;
- chroma_sub[1] = 1;
- EXPECT_EQ(0, aom_wiener_denoise_2d(data_ptrs, denoised_ptrs, this->kWidth,
- this->kHeight, this->stride_, chroma_sub,
- this->noise_psd_ptrs_, 32, this->kBitDepth,
- this->kUseHighBD));
-}
-
-TYPED_TEST_P(WienerDenoiseTest, GradientTest) {
- const int kWidth = this->kWidth;
- const int kHeight = this->kHeight;
- const int kBlockSize = this->kBlockSize;
- const uint8_t *const data_ptrs[3] = {
- reinterpret_cast<uint8_t *>(&this->data_[0][0]),
- reinterpret_cast<uint8_t *>(&this->data_[1][0]),
- reinterpret_cast<uint8_t *>(&this->data_[2][0]),
- };
- uint8_t *denoised_ptrs[3] = {
- reinterpret_cast<uint8_t *>(&this->denoised_[0][0]),
- reinterpret_cast<uint8_t *>(&this->denoised_[1][0]),
- reinterpret_cast<uint8_t *>(&this->denoised_[2][0]),
- };
- const int ret = aom_wiener_denoise_2d(
- data_ptrs, denoised_ptrs, kWidth, kHeight, this->stride_,
- this->chroma_sub_, this->noise_psd_ptrs_, this->kBlockSize,
- this->kBitDepth, this->kUseHighBD);
- EXPECT_EQ(1, ret);
-
- // Check the noise on the denoised image (from the analytical gradient)
- // and make sure that it is less than what we added.
- for (int c = 0; c < 3; ++c) {
- std::vector<double> measured_noise(kWidth * kHeight);
-
- double var = 0;
- const int shift = (c > 0);
- for (int x = 0; x < (kWidth >> shift); ++x) {
- for (int y = 0; y < (kHeight >> shift); ++y) {
- const double diff = this->denoised_[c][y * this->stride_[c] + x] -
- x * this->kScaleNoise;
- var += diff * diff;
- measured_noise[y * kWidth + x] = diff;
- }
- }
- var /= (kWidth * kHeight);
- const double std = sqrt(std::max(0.0, var));
- EXPECT_LE(std, 1.25f * this->kScaleNoise);
- if (c == 0) {
- std::vector<float> measured_psd =
- get_noise_psd(&measured_noise[0], kWidth, kHeight, kBlockSize);
- std::vector<double> measured_psd_d(kBlockSize * kBlockSize);
- std::vector<double> noise_psd_d(kBlockSize * kBlockSize);
- std::copy(measured_psd.begin(), measured_psd.end(),
- measured_psd_d.begin());
- std::copy(this->noise_psd_[0].begin(), this->noise_psd_[0].end(),
- noise_psd_d.begin());
- EXPECT_LT(
- aom_normalized_cross_correlation(&measured_psd_d[0], &noise_psd_d[0],
- (int)(noise_psd_d.size())),
- 0.35);
- }
- }
-}
-
-REGISTER_TYPED_TEST_CASE_P(WienerDenoiseTest, InvalidBlockSize,
- InvalidChromaSubsampling, GradientTest);
-
-INSTANTIATE_TYPED_TEST_CASE_P(WienerDenoiseTestInstatiation, WienerDenoiseTest,
- AllBitDepthParams);
diff --git a/third_party/aom/test/obmc_sad_test.cc b/third_party/aom/test/obmc_sad_test.cc
deleted file mode 100644
index 6cef86961..000000000
--- a/third_party/aom/test/obmc_sad_test.cc
+++ /dev/null
@@ -1,237 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "test/function_equivalence_test.h"
-#include "test/register_state_check.h"
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom/aom_integer.h"
-
-#define MAX_SB_SQUARE (MAX_SB_SIZE * MAX_SB_SIZE)
-
-using libaom_test::FunctionEquivalenceTest;
-
-namespace {
-
-static const int kIterations = 1000;
-static const int kMaskMax = 64;
-
-typedef unsigned int (*ObmcSadF)(const uint8_t *pre, int pre_stride,
- const int32_t *wsrc, const int32_t *mask);
-typedef libaom_test::FuncParam<ObmcSadF> TestFuncs;
-
-////////////////////////////////////////////////////////////////////////////////
-// 8 bit
-////////////////////////////////////////////////////////////////////////////////
-
-class ObmcSadTest : public FunctionEquivalenceTest<ObmcSadF> {};
-
-TEST_P(ObmcSadTest, RandomValues) {
- DECLARE_ALIGNED(32, uint8_t, pre[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]);
-
- for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
- const int pre_stride = rng_(MAX_SB_SIZE + 1);
-
- for (int i = 0; i < MAX_SB_SQUARE; ++i) {
- pre[i] = rng_.Rand8();
- wsrc[i] = rng_.Rand8() * rng_(kMaskMax * kMaskMax + 1);
- mask[i] = rng_(kMaskMax * kMaskMax + 1);
- }
-
- const unsigned int ref_res = params_.ref_func(pre, pre_stride, wsrc, mask);
- unsigned int tst_res;
- ASM_REGISTER_STATE_CHECK(tst_res =
- params_.tst_func(pre, pre_stride, wsrc, mask));
-
- ASSERT_EQ(ref_res, tst_res);
- }
-}
-
-TEST_P(ObmcSadTest, ExtremeValues) {
- DECLARE_ALIGNED(32, uint8_t, pre[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]);
-
- for (int iter = 0; iter < MAX_SB_SIZE && !HasFatalFailure(); ++iter) {
- const int pre_stride = iter;
-
- for (int i = 0; i < MAX_SB_SQUARE; ++i) {
- pre[i] = UINT8_MAX;
- wsrc[i] = UINT8_MAX * kMaskMax * kMaskMax;
- mask[i] = kMaskMax * kMaskMax;
- }
-
- const unsigned int ref_res = params_.ref_func(pre, pre_stride, wsrc, mask);
- unsigned int tst_res;
- ASM_REGISTER_STATE_CHECK(tst_res =
- params_.tst_func(pre, pre_stride, wsrc, mask));
-
- ASSERT_EQ(ref_res, tst_res);
- }
-}
-
-#if HAVE_SSE4_1
-const ObmcSadTest::ParamType sse4_functions[] = {
- TestFuncs(aom_obmc_sad128x128_c, aom_obmc_sad128x128_sse4_1),
- TestFuncs(aom_obmc_sad128x64_c, aom_obmc_sad128x64_sse4_1),
- TestFuncs(aom_obmc_sad64x128_c, aom_obmc_sad64x128_sse4_1),
- TestFuncs(aom_obmc_sad64x64_c, aom_obmc_sad64x64_sse4_1),
- TestFuncs(aom_obmc_sad64x32_c, aom_obmc_sad64x32_sse4_1),
- TestFuncs(aom_obmc_sad32x64_c, aom_obmc_sad32x64_sse4_1),
- TestFuncs(aom_obmc_sad32x32_c, aom_obmc_sad32x32_sse4_1),
- TestFuncs(aom_obmc_sad32x16_c, aom_obmc_sad32x16_sse4_1),
- TestFuncs(aom_obmc_sad16x32_c, aom_obmc_sad16x32_sse4_1),
- TestFuncs(aom_obmc_sad16x16_c, aom_obmc_sad16x16_sse4_1),
- TestFuncs(aom_obmc_sad16x8_c, aom_obmc_sad16x8_sse4_1),
- TestFuncs(aom_obmc_sad8x16_c, aom_obmc_sad8x16_sse4_1),
- TestFuncs(aom_obmc_sad8x8_c, aom_obmc_sad8x8_sse4_1),
- TestFuncs(aom_obmc_sad8x4_c, aom_obmc_sad8x4_sse4_1),
- TestFuncs(aom_obmc_sad4x8_c, aom_obmc_sad4x8_sse4_1),
- TestFuncs(aom_obmc_sad4x4_c, aom_obmc_sad4x4_sse4_1)
-};
-
-INSTANTIATE_TEST_CASE_P(SSE4_1, ObmcSadTest,
- ::testing::ValuesIn(sse4_functions));
-#endif // HAVE_SSE4_1
-
-#if HAVE_AVX2
-const ObmcSadTest::ParamType avx2_functions[] = {
- TestFuncs(aom_obmc_sad128x128_c, aom_obmc_sad128x128_avx2),
- TestFuncs(aom_obmc_sad128x64_c, aom_obmc_sad128x64_avx2),
- TestFuncs(aom_obmc_sad64x128_c, aom_obmc_sad64x128_avx2),
- TestFuncs(aom_obmc_sad64x64_c, aom_obmc_sad64x64_avx2),
- TestFuncs(aom_obmc_sad64x32_c, aom_obmc_sad64x32_avx2),
- TestFuncs(aom_obmc_sad32x64_c, aom_obmc_sad32x64_avx2),
- TestFuncs(aom_obmc_sad32x32_c, aom_obmc_sad32x32_avx2),
- TestFuncs(aom_obmc_sad32x16_c, aom_obmc_sad32x16_avx2),
- TestFuncs(aom_obmc_sad16x32_c, aom_obmc_sad16x32_avx2),
- TestFuncs(aom_obmc_sad16x16_c, aom_obmc_sad16x16_avx2),
- TestFuncs(aom_obmc_sad16x8_c, aom_obmc_sad16x8_avx2),
- TestFuncs(aom_obmc_sad8x16_c, aom_obmc_sad8x16_avx2),
- TestFuncs(aom_obmc_sad8x8_c, aom_obmc_sad8x8_avx2),
- TestFuncs(aom_obmc_sad8x4_c, aom_obmc_sad8x4_avx2),
- TestFuncs(aom_obmc_sad4x8_c, aom_obmc_sad4x8_avx2),
- TestFuncs(aom_obmc_sad4x4_c, aom_obmc_sad4x4_avx2)
-};
-
-INSTANTIATE_TEST_CASE_P(AVX2, ObmcSadTest, ::testing::ValuesIn(avx2_functions));
-#endif // HAVE_AVX2
-
-////////////////////////////////////////////////////////////////////////////////
-// High bit-depth
-////////////////////////////////////////////////////////////////////////////////
-
-class ObmcSadHBDTest : public FunctionEquivalenceTest<ObmcSadF> {};
-
-TEST_P(ObmcSadHBDTest, RandomValues) {
- DECLARE_ALIGNED(32, uint16_t, pre[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]);
-
- for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
- const int pre_stride = rng_(MAX_SB_SIZE + 1);
-
- for (int i = 0; i < MAX_SB_SQUARE; ++i) {
- pre[i] = rng_(1 << 12);
- wsrc[i] = rng_(1 << 12) * rng_(kMaskMax * kMaskMax + 1);
- mask[i] = rng_(kMaskMax * kMaskMax + 1);
- }
-
- const unsigned int ref_res =
- params_.ref_func(CONVERT_TO_BYTEPTR(pre), pre_stride, wsrc, mask);
- unsigned int tst_res;
- ASM_REGISTER_STATE_CHECK(
- tst_res =
- params_.tst_func(CONVERT_TO_BYTEPTR(pre), pre_stride, wsrc, mask));
-
- ASSERT_EQ(ref_res, tst_res);
- }
-}
-
-TEST_P(ObmcSadHBDTest, ExtremeValues) {
- DECLARE_ALIGNED(32, uint16_t, pre[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]);
-
- for (int iter = 0; iter < MAX_SB_SIZE && !HasFatalFailure(); ++iter) {
- const int pre_stride = iter;
-
- for (int i = 0; i < MAX_SB_SQUARE; ++i) {
- pre[i] = (1 << 12) - 1;
- wsrc[i] = ((1 << 12) - 1) * kMaskMax * kMaskMax;
- mask[i] = kMaskMax * kMaskMax;
- }
-
- const unsigned int ref_res =
- params_.ref_func(CONVERT_TO_BYTEPTR(pre), pre_stride, wsrc, mask);
- unsigned int tst_res;
- ASM_REGISTER_STATE_CHECK(
- tst_res =
- params_.tst_func(CONVERT_TO_BYTEPTR(pre), pre_stride, wsrc, mask));
-
- ASSERT_EQ(ref_res, tst_res);
- }
-}
-
-#if HAVE_SSE4_1
-ObmcSadHBDTest::ParamType sse4_functions_hbd[] = {
- TestFuncs(aom_highbd_obmc_sad128x128_c, aom_highbd_obmc_sad128x128_sse4_1),
- TestFuncs(aom_highbd_obmc_sad128x64_c, aom_highbd_obmc_sad128x64_sse4_1),
- TestFuncs(aom_highbd_obmc_sad64x128_c, aom_highbd_obmc_sad64x128_sse4_1),
- TestFuncs(aom_highbd_obmc_sad64x64_c, aom_highbd_obmc_sad64x64_sse4_1),
- TestFuncs(aom_highbd_obmc_sad64x32_c, aom_highbd_obmc_sad64x32_sse4_1),
- TestFuncs(aom_highbd_obmc_sad32x64_c, aom_highbd_obmc_sad32x64_sse4_1),
- TestFuncs(aom_highbd_obmc_sad32x32_c, aom_highbd_obmc_sad32x32_sse4_1),
- TestFuncs(aom_highbd_obmc_sad32x16_c, aom_highbd_obmc_sad32x16_sse4_1),
- TestFuncs(aom_highbd_obmc_sad16x32_c, aom_highbd_obmc_sad16x32_sse4_1),
- TestFuncs(aom_highbd_obmc_sad16x16_c, aom_highbd_obmc_sad16x16_sse4_1),
- TestFuncs(aom_highbd_obmc_sad16x8_c, aom_highbd_obmc_sad16x8_sse4_1),
- TestFuncs(aom_highbd_obmc_sad8x16_c, aom_highbd_obmc_sad8x16_sse4_1),
- TestFuncs(aom_highbd_obmc_sad8x8_c, aom_highbd_obmc_sad8x8_sse4_1),
- TestFuncs(aom_highbd_obmc_sad8x4_c, aom_highbd_obmc_sad8x4_sse4_1),
- TestFuncs(aom_highbd_obmc_sad4x8_c, aom_highbd_obmc_sad4x8_sse4_1),
- TestFuncs(aom_highbd_obmc_sad4x4_c, aom_highbd_obmc_sad4x4_sse4_1)
-};
-
-INSTANTIATE_TEST_CASE_P(SSE4_1, ObmcSadHBDTest,
- ::testing::ValuesIn(sse4_functions_hbd));
-#endif // HAVE_SSE4_1
-
-#if HAVE_AVX2
-ObmcSadHBDTest::ParamType avx2_functions_hbd[] = {
- TestFuncs(aom_highbd_obmc_sad128x128_c, aom_highbd_obmc_sad128x128_avx2),
- TestFuncs(aom_highbd_obmc_sad128x64_c, aom_highbd_obmc_sad128x64_avx2),
- TestFuncs(aom_highbd_obmc_sad64x128_c, aom_highbd_obmc_sad64x128_avx2),
- TestFuncs(aom_highbd_obmc_sad64x64_c, aom_highbd_obmc_sad64x64_avx2),
- TestFuncs(aom_highbd_obmc_sad64x32_c, aom_highbd_obmc_sad64x32_avx2),
- TestFuncs(aom_highbd_obmc_sad32x64_c, aom_highbd_obmc_sad32x64_avx2),
- TestFuncs(aom_highbd_obmc_sad32x32_c, aom_highbd_obmc_sad32x32_avx2),
- TestFuncs(aom_highbd_obmc_sad32x16_c, aom_highbd_obmc_sad32x16_avx2),
- TestFuncs(aom_highbd_obmc_sad16x32_c, aom_highbd_obmc_sad16x32_avx2),
- TestFuncs(aom_highbd_obmc_sad16x16_c, aom_highbd_obmc_sad16x16_avx2),
- TestFuncs(aom_highbd_obmc_sad16x8_c, aom_highbd_obmc_sad16x8_avx2),
- TestFuncs(aom_highbd_obmc_sad8x16_c, aom_highbd_obmc_sad8x16_avx2),
- TestFuncs(aom_highbd_obmc_sad8x8_c, aom_highbd_obmc_sad8x8_avx2),
- TestFuncs(aom_highbd_obmc_sad8x4_c, aom_highbd_obmc_sad8x4_avx2),
- TestFuncs(aom_highbd_obmc_sad4x8_c, aom_highbd_obmc_sad4x8_avx2),
- TestFuncs(aom_highbd_obmc_sad4x4_c, aom_highbd_obmc_sad4x4_avx2)
-};
-
-INSTANTIATE_TEST_CASE_P(AVX2, ObmcSadHBDTest,
- ::testing::ValuesIn(avx2_functions_hbd));
-#endif // HAVE_AVX2
-} // namespace
diff --git a/third_party/aom/test/obmc_variance_test.cc b/third_party/aom/test/obmc_variance_test.cc
deleted file mode 100644
index 4563b964a..000000000
--- a/third_party/aom/test/obmc_variance_test.cc
+++ /dev/null
@@ -1,345 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-#include "test/acm_random.h"
-
-#include "test/function_equivalence_test.h"
-#include "test/register_state_check.h"
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom/aom_integer.h"
-
-#define MAX_SB_SQUARE (MAX_SB_SIZE * MAX_SB_SIZE)
-
-using libaom_test::ACMRandom;
-using libaom_test::FunctionEquivalenceTest;
-
-namespace {
-
-static const int kIterations = 1000;
-static const int kMaskMax = 64;
-
-typedef unsigned int (*ObmcVarF)(const uint8_t *pre, int pre_stride,
- const int32_t *wsrc, const int32_t *mask,
- unsigned int *sse);
-typedef libaom_test::FuncParam<ObmcVarF> TestFuncs;
-
-////////////////////////////////////////////////////////////////////////////////
-// 8 bit
-////////////////////////////////////////////////////////////////////////////////
-
-class ObmcVarianceTest : public FunctionEquivalenceTest<ObmcVarF> {};
-
-TEST_P(ObmcVarianceTest, RandomValues) {
- DECLARE_ALIGNED(32, uint8_t, pre[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]);
-
- for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
- const int pre_stride = this->rng_(MAX_SB_SIZE + 1);
-
- for (int i = 0; i < MAX_SB_SQUARE; ++i) {
- pre[i] = this->rng_.Rand8();
- wsrc[i] = this->rng_.Rand8() * this->rng_(kMaskMax * kMaskMax + 1);
- mask[i] = this->rng_(kMaskMax * kMaskMax + 1);
- }
-
- unsigned int ref_sse, tst_sse;
- const unsigned int ref_res =
- params_.ref_func(pre, pre_stride, wsrc, mask, &ref_sse);
- unsigned int tst_res;
- ASM_REGISTER_STATE_CHECK(
- tst_res = params_.tst_func(pre, pre_stride, wsrc, mask, &tst_sse));
-
- ASSERT_EQ(ref_res, tst_res);
- ASSERT_EQ(ref_sse, tst_sse);
- }
-}
-
-TEST_P(ObmcVarianceTest, ExtremeValues) {
- DECLARE_ALIGNED(32, uint8_t, pre[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]);
-
- for (int iter = 0; iter < MAX_SB_SIZE && !HasFatalFailure(); ++iter) {
- const int pre_stride = iter;
-
- for (int i = 0; i < MAX_SB_SQUARE; ++i) {
- pre[i] = UINT8_MAX;
- wsrc[i] = UINT8_MAX * kMaskMax * kMaskMax;
- mask[i] = kMaskMax * kMaskMax;
- }
-
- unsigned int ref_sse, tst_sse;
- const unsigned int ref_res =
- params_.ref_func(pre, pre_stride, wsrc, mask, &ref_sse);
- unsigned int tst_res;
- ASM_REGISTER_STATE_CHECK(
- tst_res = params_.tst_func(pre, pre_stride, wsrc, mask, &tst_sse));
-
- ASSERT_EQ(ref_res, tst_res);
- ASSERT_EQ(ref_sse, tst_sse);
- }
-}
-
-TEST_P(ObmcVarianceTest, DISABLED_Speed) {
- DECLARE_ALIGNED(32, uint8_t, pre[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]);
-
- const int pre_stride = this->rng_(MAX_SB_SIZE + 1);
-
- for (int i = 0; i < MAX_SB_SQUARE; ++i) {
- pre[i] = this->rng_.Rand8();
- wsrc[i] = this->rng_.Rand8() * this->rng_(kMaskMax * kMaskMax + 1);
- mask[i] = this->rng_(kMaskMax * kMaskMax + 1);
- }
-
- const int num_loops = 1000000;
- unsigned int ref_sse, tst_sse;
- aom_usec_timer ref_timer, test_timer;
-
- aom_usec_timer_start(&ref_timer);
- for (int i = 0; i < num_loops; ++i) {
- params_.ref_func(pre, pre_stride, wsrc, mask, &ref_sse);
- }
- aom_usec_timer_mark(&ref_timer);
- const int elapsed_time_c =
- static_cast<int>(aom_usec_timer_elapsed(&ref_timer));
-
- aom_usec_timer_start(&test_timer);
- for (int i = 0; i < num_loops; ++i) {
- params_.tst_func(pre, pre_stride, wsrc, mask, &tst_sse);
- }
- aom_usec_timer_mark(&test_timer);
- const int elapsed_time_simd =
- static_cast<int>(aom_usec_timer_elapsed(&test_timer));
-
- printf("c_time=%d \t simd_time=%d \t gain=%d \n", elapsed_time_c,
- elapsed_time_simd, (elapsed_time_c / elapsed_time_simd));
-}
-
-#if HAVE_SSE4_1
-const ObmcVarianceTest::ParamType sse4_functions[] = {
- TestFuncs(aom_obmc_variance128x128_c, aom_obmc_variance128x128_sse4_1),
- TestFuncs(aom_obmc_variance128x64_c, aom_obmc_variance128x64_sse4_1),
- TestFuncs(aom_obmc_variance64x128_c, aom_obmc_variance64x128_sse4_1),
- TestFuncs(aom_obmc_variance64x64_c, aom_obmc_variance64x64_sse4_1),
- TestFuncs(aom_obmc_variance64x32_c, aom_obmc_variance64x32_sse4_1),
- TestFuncs(aom_obmc_variance32x64_c, aom_obmc_variance32x64_sse4_1),
- TestFuncs(aom_obmc_variance32x32_c, aom_obmc_variance32x32_sse4_1),
- TestFuncs(aom_obmc_variance32x16_c, aom_obmc_variance32x16_sse4_1),
- TestFuncs(aom_obmc_variance16x32_c, aom_obmc_variance16x32_sse4_1),
- TestFuncs(aom_obmc_variance16x16_c, aom_obmc_variance16x16_sse4_1),
- TestFuncs(aom_obmc_variance16x8_c, aom_obmc_variance16x8_sse4_1),
- TestFuncs(aom_obmc_variance8x16_c, aom_obmc_variance8x16_sse4_1),
- TestFuncs(aom_obmc_variance8x8_c, aom_obmc_variance8x8_sse4_1),
- TestFuncs(aom_obmc_variance8x4_c, aom_obmc_variance8x4_sse4_1),
- TestFuncs(aom_obmc_variance4x8_c, aom_obmc_variance4x8_sse4_1),
- TestFuncs(aom_obmc_variance4x4_c, aom_obmc_variance4x4_sse4_1)
-};
-
-INSTANTIATE_TEST_CASE_P(SSE4_1, ObmcVarianceTest,
- ::testing::ValuesIn(sse4_functions));
-#endif // HAVE_SSE4_1
-
-#if HAVE_AVX2
-const ObmcVarianceTest::ParamType avx2_functions[] = {
- TestFuncs(aom_obmc_variance128x128_c, aom_obmc_variance128x128_avx2),
- TestFuncs(aom_obmc_variance128x64_c, aom_obmc_variance128x64_avx2),
- TestFuncs(aom_obmc_variance64x128_c, aom_obmc_variance64x128_avx2),
- TestFuncs(aom_obmc_variance64x64_c, aom_obmc_variance64x64_avx2),
- TestFuncs(aom_obmc_variance64x32_c, aom_obmc_variance64x32_avx2),
- TestFuncs(aom_obmc_variance32x64_c, aom_obmc_variance32x64_avx2),
- TestFuncs(aom_obmc_variance32x32_c, aom_obmc_variance32x32_avx2),
- TestFuncs(aom_obmc_variance32x16_c, aom_obmc_variance32x16_avx2),
- TestFuncs(aom_obmc_variance16x32_c, aom_obmc_variance16x32_avx2),
- TestFuncs(aom_obmc_variance16x16_c, aom_obmc_variance16x16_avx2),
- TestFuncs(aom_obmc_variance16x8_c, aom_obmc_variance16x8_avx2),
- TestFuncs(aom_obmc_variance8x16_c, aom_obmc_variance8x16_avx2),
- TestFuncs(aom_obmc_variance8x8_c, aom_obmc_variance8x8_avx2),
- TestFuncs(aom_obmc_variance8x4_c, aom_obmc_variance8x4_avx2),
- TestFuncs(aom_obmc_variance4x8_c, aom_obmc_variance4x8_sse4_1),
- TestFuncs(aom_obmc_variance4x4_c, aom_obmc_variance4x4_sse4_1)
-};
-
-INSTANTIATE_TEST_CASE_P(AVX2, ObmcVarianceTest,
- ::testing::ValuesIn(avx2_functions));
-#endif // HAVE_AVX2
-
-////////////////////////////////////////////////////////////////////////////////
-// High bit-depth
-////////////////////////////////////////////////////////////////////////////////
-
-class ObmcVarianceHBDTest : public FunctionEquivalenceTest<ObmcVarF> {};
-
-TEST_P(ObmcVarianceHBDTest, RandomValues) {
- DECLARE_ALIGNED(32, uint16_t, pre[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]);
-
- for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
- const int pre_stride = this->rng_(MAX_SB_SIZE + 1);
-
- for (int i = 0; i < MAX_SB_SQUARE; ++i) {
- pre[i] = this->rng_(1 << params_.bit_depth);
- wsrc[i] = this->rng_(1 << params_.bit_depth) *
- this->rng_(kMaskMax * kMaskMax + 1);
- mask[i] = this->rng_(kMaskMax * kMaskMax + 1);
- }
-
- unsigned int ref_sse, tst_sse;
- const unsigned int ref_res = params_.ref_func(
- CONVERT_TO_BYTEPTR(pre), pre_stride, wsrc, mask, &ref_sse);
- unsigned int tst_res;
- ASM_REGISTER_STATE_CHECK(tst_res = params_.tst_func(CONVERT_TO_BYTEPTR(pre),
- pre_stride, wsrc, mask,
- &tst_sse));
-
- ASSERT_EQ(ref_res, tst_res);
- ASSERT_EQ(ref_sse, tst_sse);
- }
-}
-
-TEST_P(ObmcVarianceHBDTest, ExtremeValues) {
- DECLARE_ALIGNED(32, uint16_t, pre[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]);
-
- for (int iter = 0; iter < MAX_SB_SIZE && !HasFatalFailure(); ++iter) {
- const int pre_stride = iter;
-
- for (int i = 0; i < MAX_SB_SQUARE; ++i) {
- pre[i] = (1 << params_.bit_depth) - 1;
- wsrc[i] = ((1 << params_.bit_depth) - 1) * kMaskMax * kMaskMax;
- mask[i] = kMaskMax * kMaskMax;
- }
-
- unsigned int ref_sse, tst_sse;
- const unsigned int ref_res = params_.ref_func(
- CONVERT_TO_BYTEPTR(pre), pre_stride, wsrc, mask, &ref_sse);
- unsigned int tst_res;
- ASM_REGISTER_STATE_CHECK(tst_res = params_.tst_func(CONVERT_TO_BYTEPTR(pre),
- pre_stride, wsrc, mask,
- &tst_sse));
-
- ASSERT_EQ(ref_res, tst_res);
- ASSERT_EQ(ref_sse, tst_sse);
- }
-}
-
-#if HAVE_SSE4_1
-ObmcVarianceHBDTest::ParamType sse4_functions_hbd[] = {
- TestFuncs(aom_highbd_obmc_variance128x128_c,
- aom_highbd_obmc_variance128x128_sse4_1, 8),
- TestFuncs(aom_highbd_obmc_variance128x64_c,
- aom_highbd_obmc_variance128x64_sse4_1, 8),
- TestFuncs(aom_highbd_obmc_variance64x128_c,
- aom_highbd_obmc_variance64x128_sse4_1, 8),
- TestFuncs(aom_highbd_obmc_variance64x64_c,
- aom_highbd_obmc_variance64x64_sse4_1, 8),
- TestFuncs(aom_highbd_obmc_variance64x32_c,
- aom_highbd_obmc_variance64x32_sse4_1, 8),
- TestFuncs(aom_highbd_obmc_variance32x64_c,
- aom_highbd_obmc_variance32x64_sse4_1, 8),
- TestFuncs(aom_highbd_obmc_variance32x32_c,
- aom_highbd_obmc_variance32x32_sse4_1, 8),
- TestFuncs(aom_highbd_obmc_variance32x16_c,
- aom_highbd_obmc_variance32x16_sse4_1, 8),
- TestFuncs(aom_highbd_obmc_variance16x32_c,
- aom_highbd_obmc_variance16x32_sse4_1, 8),
- TestFuncs(aom_highbd_obmc_variance16x16_c,
- aom_highbd_obmc_variance16x16_sse4_1, 8),
- TestFuncs(aom_highbd_obmc_variance16x8_c, aom_highbd_obmc_variance16x8_sse4_1,
- 8),
- TestFuncs(aom_highbd_obmc_variance8x16_c, aom_highbd_obmc_variance8x16_sse4_1,
- 8),
- TestFuncs(aom_highbd_obmc_variance8x8_c, aom_highbd_obmc_variance8x8_sse4_1,
- 8),
- TestFuncs(aom_highbd_obmc_variance8x4_c, aom_highbd_obmc_variance8x4_sse4_1,
- 8),
- TestFuncs(aom_highbd_obmc_variance4x8_c, aom_highbd_obmc_variance4x8_sse4_1,
- 8),
- TestFuncs(aom_highbd_obmc_variance4x4_c, aom_highbd_obmc_variance4x4_sse4_1,
- 8),
- TestFuncs(aom_highbd_10_obmc_variance128x128_c,
- aom_highbd_10_obmc_variance128x128_sse4_1, 10),
- TestFuncs(aom_highbd_10_obmc_variance128x64_c,
- aom_highbd_10_obmc_variance128x64_sse4_1, 10),
- TestFuncs(aom_highbd_10_obmc_variance64x128_c,
- aom_highbd_10_obmc_variance64x128_sse4_1, 10),
- TestFuncs(aom_highbd_10_obmc_variance64x64_c,
- aom_highbd_10_obmc_variance64x64_sse4_1, 10),
- TestFuncs(aom_highbd_10_obmc_variance64x32_c,
- aom_highbd_10_obmc_variance64x32_sse4_1, 10),
- TestFuncs(aom_highbd_10_obmc_variance32x64_c,
- aom_highbd_10_obmc_variance32x64_sse4_1, 10),
- TestFuncs(aom_highbd_10_obmc_variance32x32_c,
- aom_highbd_10_obmc_variance32x32_sse4_1, 10),
- TestFuncs(aom_highbd_10_obmc_variance32x16_c,
- aom_highbd_10_obmc_variance32x16_sse4_1, 10),
- TestFuncs(aom_highbd_10_obmc_variance16x32_c,
- aom_highbd_10_obmc_variance16x32_sse4_1, 10),
- TestFuncs(aom_highbd_10_obmc_variance16x16_c,
- aom_highbd_10_obmc_variance16x16_sse4_1, 10),
- TestFuncs(aom_highbd_10_obmc_variance16x8_c,
- aom_highbd_10_obmc_variance16x8_sse4_1, 10),
- TestFuncs(aom_highbd_10_obmc_variance8x16_c,
- aom_highbd_10_obmc_variance8x16_sse4_1, 10),
- TestFuncs(aom_highbd_10_obmc_variance8x8_c,
- aom_highbd_10_obmc_variance8x8_sse4_1, 10),
- TestFuncs(aom_highbd_10_obmc_variance8x4_c,
- aom_highbd_10_obmc_variance8x4_sse4_1, 10),
- TestFuncs(aom_highbd_10_obmc_variance4x8_c,
- aom_highbd_10_obmc_variance4x8_sse4_1, 10),
- TestFuncs(aom_highbd_10_obmc_variance4x4_c,
- aom_highbd_10_obmc_variance4x4_sse4_1, 10),
- TestFuncs(aom_highbd_12_obmc_variance128x128_c,
- aom_highbd_12_obmc_variance128x128_sse4_1, 12),
- TestFuncs(aom_highbd_12_obmc_variance128x64_c,
- aom_highbd_12_obmc_variance128x64_sse4_1, 12),
- TestFuncs(aom_highbd_12_obmc_variance64x128_c,
- aom_highbd_12_obmc_variance64x128_sse4_1, 12),
- TestFuncs(aom_highbd_12_obmc_variance64x64_c,
- aom_highbd_12_obmc_variance64x64_sse4_1, 12),
- TestFuncs(aom_highbd_12_obmc_variance64x32_c,
- aom_highbd_12_obmc_variance64x32_sse4_1, 12),
- TestFuncs(aom_highbd_12_obmc_variance32x64_c,
- aom_highbd_12_obmc_variance32x64_sse4_1, 12),
- TestFuncs(aom_highbd_12_obmc_variance32x32_c,
- aom_highbd_12_obmc_variance32x32_sse4_1, 12),
- TestFuncs(aom_highbd_12_obmc_variance32x16_c,
- aom_highbd_12_obmc_variance32x16_sse4_1, 12),
- TestFuncs(aom_highbd_12_obmc_variance16x32_c,
- aom_highbd_12_obmc_variance16x32_sse4_1, 12),
- TestFuncs(aom_highbd_12_obmc_variance16x16_c,
- aom_highbd_12_obmc_variance16x16_sse4_1, 12),
- TestFuncs(aom_highbd_12_obmc_variance16x8_c,
- aom_highbd_12_obmc_variance16x8_sse4_1, 12),
- TestFuncs(aom_highbd_12_obmc_variance8x16_c,
- aom_highbd_12_obmc_variance8x16_sse4_1, 12),
- TestFuncs(aom_highbd_12_obmc_variance8x8_c,
- aom_highbd_12_obmc_variance8x8_sse4_1, 12),
- TestFuncs(aom_highbd_12_obmc_variance8x4_c,
- aom_highbd_12_obmc_variance8x4_sse4_1, 12),
- TestFuncs(aom_highbd_12_obmc_variance4x8_c,
- aom_highbd_12_obmc_variance4x8_sse4_1, 12),
- TestFuncs(aom_highbd_12_obmc_variance4x4_c,
- aom_highbd_12_obmc_variance4x4_sse4_1, 12)
-};
-
-INSTANTIATE_TEST_CASE_P(SSE4_1, ObmcVarianceHBDTest,
- ::testing::ValuesIn(sse4_functions_hbd));
-#endif // HAVE_SSE4_1
-} // namespace
diff --git a/third_party/aom/test/onyxc_int_test.cc b/third_party/aom/test/onyxc_int_test.cc
deleted file mode 100644
index 388959518..000000000
--- a/third_party/aom/test/onyxc_int_test.cc
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "av1/common/onyxc_int.h"
-
-TEST(OnyxcInt, TestGetTxSize) {
- for (int t = TX_4X4; t < TX_SIZES_ALL; t++) {
- TX_SIZE t2 = get_tx_size(tx_size_wide[t], tx_size_high[t]);
- GTEST_ASSERT_EQ(tx_size_wide[t], tx_size_wide[t2]);
- GTEST_ASSERT_EQ(tx_size_high[t], tx_size_high[t2]);
- }
-}
diff --git a/third_party/aom/test/pickrst_test.cc b/third_party/aom/test/pickrst_test.cc
deleted file mode 100644
index 040e8e8b7..000000000
--- a/third_party/aom/test/pickrst_test.cc
+++ /dev/null
@@ -1,187 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "test/function_equivalence_test.h"
-#include "test/register_state_check.h"
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom/aom_integer.h"
-#include "av1/encoder/pickrst.h"
-using libaom_test::FunctionEquivalenceTest;
-
-#define MAX_DATA_BLOCK 384
-
-namespace {
-static const int kIterations = 100;
-
-typedef int64_t (*lowbd_pixel_proj_error_func)(
- const uint8_t *src8, int width, int height, int src_stride,
- const uint8_t *dat8, int dat_stride, int32_t *flt0, int flt0_stride,
- int32_t *flt1, int flt1_stride, int xq[2], const sgr_params_type *params);
-
-typedef libaom_test::FuncParam<lowbd_pixel_proj_error_func> TestFuncs;
-
-////////////////////////////////////////////////////////////////////////////////
-// 8 bit
-////////////////////////////////////////////////////////////////////////////////
-
-typedef ::testing::tuple<const lowbd_pixel_proj_error_func>
- PixelProjErrorTestParam;
-
-class PixelProjErrorTest
- : public ::testing::TestWithParam<PixelProjErrorTestParam> {
- public:
- virtual void SetUp() {
- target_func_ = GET_PARAM(0);
- src_ = (uint8_t *)(aom_malloc(MAX_DATA_BLOCK * MAX_DATA_BLOCK *
- sizeof(uint8_t)));
- dgd_ = (uint8_t *)(aom_malloc(MAX_DATA_BLOCK * MAX_DATA_BLOCK *
- sizeof(uint8_t)));
- flt0_ = (int32_t *)(aom_malloc(MAX_DATA_BLOCK * MAX_DATA_BLOCK *
- sizeof(int32_t)));
- flt1_ = (int32_t *)(aom_malloc(MAX_DATA_BLOCK * MAX_DATA_BLOCK *
- sizeof(int32_t)));
- }
- virtual void TearDown() {
- aom_free(src_);
- aom_free(dgd_);
- aom_free(flt0_);
- aom_free(flt1_);
- }
- void runPixelProjErrorTest(int32_t run_times);
- void runPixelProjErrorTest_ExtremeValues();
-
- private:
- lowbd_pixel_proj_error_func target_func_;
- ACMRandom rng_;
- uint8_t *src_;
- uint8_t *dgd_;
- int32_t *flt0_;
- int32_t *flt1_;
-};
-
-void PixelProjErrorTest::runPixelProjErrorTest(int32_t run_times) {
- int h_end = run_times != 1 ? 128 : (rng_.Rand16() % MAX_DATA_BLOCK) + 1;
- int v_end = run_times != 1 ? 128 : (rng_.Rand16() % MAX_DATA_BLOCK) + 1;
- const int dgd_stride = MAX_DATA_BLOCK;
- const int src_stride = MAX_DATA_BLOCK;
- const int flt0_stride = MAX_DATA_BLOCK;
- const int flt1_stride = MAX_DATA_BLOCK;
- sgr_params_type params;
- int xq[2];
- const int iters = run_times == 1 ? kIterations : 4;
- for (int iter = 0; iter < iters && !HasFatalFailure(); ++iter) {
- int64_t err_ref = 0, err_test = 1;
- for (int i = 0; i < MAX_DATA_BLOCK * MAX_DATA_BLOCK; ++i) {
- dgd_[i] = rng_.Rand8();
- src_[i] = rng_.Rand8();
- flt0_[i] = rng_.Rand15Signed();
- flt1_[i] = rng_.Rand15Signed();
- }
- xq[0] = rng_.Rand8() % (1 << SGRPROJ_PRJ_BITS);
- xq[1] = rng_.Rand8() % (1 << SGRPROJ_PRJ_BITS);
- params.r[0] = run_times == 1 ? (rng_.Rand8() % MAX_RADIUS) : (iter % 2);
- params.r[1] = run_times == 1 ? (rng_.Rand8() % MAX_RADIUS) : (iter / 2);
- params.s[0] = run_times == 1 ? (rng_.Rand8() % MAX_RADIUS) : (iter % 2);
- params.s[1] = run_times == 1 ? (rng_.Rand8() % MAX_RADIUS) : (iter / 2);
- uint8_t *dgd = dgd_;
- uint8_t *src = src_;
-
- aom_usec_timer timer;
- aom_usec_timer_start(&timer);
- for (int i = 0; i < run_times; ++i) {
- err_ref = av1_lowbd_pixel_proj_error_c(src, h_end, v_end, src_stride, dgd,
- dgd_stride, flt0_, flt0_stride,
- flt1_, flt1_stride, xq, &params);
- }
- aom_usec_timer_mark(&timer);
- const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
- aom_usec_timer_start(&timer);
- for (int i = 0; i < run_times; ++i) {
- err_test =
- target_func_(src, h_end, v_end, src_stride, dgd, dgd_stride, flt0_,
- flt0_stride, flt1_, flt1_stride, xq, &params);
- }
- aom_usec_timer_mark(&timer);
- const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
- if (run_times > 10) {
- printf("r0 %d r1 %d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", params.r[0],
- params.r[1], h_end, v_end, time1, time2, time1 / time2);
- }
- ASSERT_EQ(err_ref, err_test);
- }
-}
-
-void PixelProjErrorTest::runPixelProjErrorTest_ExtremeValues() {
- const int h_start = 0;
- int h_end = 192;
- const int v_start = 0;
- int v_end = 192;
- const int dgd_stride = MAX_DATA_BLOCK;
- const int src_stride = MAX_DATA_BLOCK;
- const int flt0_stride = MAX_DATA_BLOCK;
- const int flt1_stride = MAX_DATA_BLOCK;
- sgr_params_type params;
- int xq[2];
- const int iters = kIterations;
- for (int iter = 0; iter < iters && !HasFatalFailure(); ++iter) {
- int64_t err_ref = 0, err_test = 1;
- for (int i = 0; i < MAX_DATA_BLOCK * MAX_DATA_BLOCK; ++i) {
- dgd_[i] = 0;
- src_[i] = 255;
- flt0_[i] = rng_.Rand15Signed();
- flt1_[i] = rng_.Rand15Signed();
- }
- xq[0] = rng_.Rand8() % (1 << SGRPROJ_PRJ_BITS);
- xq[1] = rng_.Rand8() % (1 << SGRPROJ_PRJ_BITS);
- params.r[0] = rng_.Rand8() % MAX_RADIUS;
- params.r[1] = rng_.Rand8() % MAX_RADIUS;
- params.s[0] = rng_.Rand8() % MAX_RADIUS;
- params.s[1] = rng_.Rand8() % MAX_RADIUS;
- uint8_t *dgd = dgd_;
- uint8_t *src = src_;
-
- err_ref = av1_lowbd_pixel_proj_error_c(
- src, h_end - h_start, v_end - v_start, src_stride, dgd, dgd_stride,
- flt0_, flt0_stride, flt1_, flt1_stride, xq, &params);
-
- err_test = target_func_(src, h_end - h_start, v_end - v_start, src_stride,
- dgd, dgd_stride, flt0_, flt0_stride, flt1_,
- flt1_stride, xq, &params);
-
- ASSERT_EQ(err_ref, err_test);
- }
-}
-
-TEST_P(PixelProjErrorTest, RandomValues) { runPixelProjErrorTest(1); }
-
-TEST_P(PixelProjErrorTest, ExtremeValues) {
- runPixelProjErrorTest_ExtremeValues();
-}
-
-TEST_P(PixelProjErrorTest, DISABLED_Speed) { runPixelProjErrorTest(200000); }
-
-#if HAVE_SSE4_1
-INSTANTIATE_TEST_CASE_P(SSE4_1, PixelProjErrorTest,
- ::testing::Values(av1_lowbd_pixel_proj_error_sse4_1));
-#endif // HAVE_SSE4_1
-
-#if HAVE_AVX2
-
-INSTANTIATE_TEST_CASE_P(AVX2, PixelProjErrorTest,
- ::testing::Values(av1_lowbd_pixel_proj_error_avx2));
-#endif // HAVE_AVX2
-
-} // namespace
diff --git a/third_party/aom/test/qm_test.cc b/third_party/aom/test/qm_test.cc
deleted file mode 100644
index c87506b41..000000000
--- a/third_party/aom/test/qm_test.cc
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#include "config/aom_config.h"
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-#include "test/codec_factory.h"
-#include "test/encode_test_driver.h"
-#include "test/i420_video_source.h"
-#include "test/util.h"
-
-namespace {
-
-class QMTest
- : public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int>,
- public ::libaom_test::EncoderTest {
- protected:
- QMTest() : EncoderTest(GET_PARAM(0)) {}
- virtual ~QMTest() {}
-
- virtual void SetUp() {
- InitializeConfig();
- SetMode(GET_PARAM(1));
- set_cpu_used_ = GET_PARAM(2);
- }
-
- virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
- ::libaom_test::Encoder *encoder) {
- if (video->frame() == 1) {
- encoder->Control(AOME_SET_CPUUSED, set_cpu_used_);
- encoder->Control(AV1E_SET_ENABLE_QM, 1);
- encoder->Control(AV1E_SET_QM_MIN, qm_min_);
- encoder->Control(AV1E_SET_QM_MAX, qm_max_);
-
- encoder->Control(AOME_SET_MAX_INTRA_BITRATE_PCT, 100);
- }
- }
-
- void DoTest(int qm_min, int qm_max) {
- qm_min_ = qm_min;
- qm_max_ = qm_max;
- cfg_.kf_max_dist = 12;
- cfg_.rc_min_quantizer = 8;
- cfg_.rc_max_quantizer = 56;
- cfg_.rc_end_usage = AOM_CBR;
- cfg_.g_lag_in_frames = 6;
- cfg_.rc_buf_initial_sz = 500;
- cfg_.rc_buf_optimal_sz = 500;
- cfg_.rc_buf_sz = 1000;
- cfg_.rc_target_bitrate = 300;
- ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352,
- 288, 30, 1, 0, 15);
- ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
- }
-
- int set_cpu_used_;
- int qm_min_;
- int qm_max_;
-};
-
-// encodes and decodes without a mismatch.
-TEST_P(QMTest, TestNoMisMatchQM1) { DoTest(5, 9); }
-
-// encodes and decodes without a mismatch.
-TEST_P(QMTest, TestNoMisMatchQM2) { DoTest(0, 8); }
-
-// encodes and decodes without a mismatch.
-TEST_P(QMTest, TestNoMisMatchQM3) { DoTest(9, 15); }
-
-AV1_INSTANTIATE_TEST_CASE(QMTest,
- ::testing::Values(::libaom_test::kRealTime,
- ::libaom_test::kOnePassGood),
- ::testing::Range(5, 9));
-} // namespace
diff --git a/third_party/aom/test/quantize_func_test.cc b/third_party/aom/test/quantize_func_test.cc
deleted file mode 100644
index 554d0c721..000000000
--- a/third_party/aom/test/quantize_func_test.cc
+++ /dev/null
@@ -1,425 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-#include "config/av1_rtcd.h"
-
-#include "aom/aom_codec.h"
-#include "aom_ports/aom_timer.h"
-#include "av1/encoder/encoder.h"
-#include "av1/common/scan.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
-
-namespace {
-using libaom_test::ACMRandom;
-
-#define QUAN_PARAM_LIST \
- const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, \
- const int16_t *round_ptr, const int16_t *quant_ptr, \
- const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, \
- tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, \
- const int16_t *scan, const int16_t *iscan
-
-typedef void (*QuantizeFunc)(QUAN_PARAM_LIST);
-typedef void (*QuantizeFuncHbd)(QUAN_PARAM_LIST, int log_scale);
-
-#define HBD_QUAN_FUNC \
- fn(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr, \
- qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan, log_scale)
-
-#define LBD_QUAN_FUNC \
- fn(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr, \
- qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan)
-
-template <QuantizeFuncHbd fn>
-void highbd_quan16x16_wrapper(QUAN_PARAM_LIST) {
- const int log_scale = 0;
- HBD_QUAN_FUNC;
-}
-
-template <QuantizeFuncHbd fn>
-void highbd_quan32x32_wrapper(QUAN_PARAM_LIST) {
- const int log_scale = 1;
- HBD_QUAN_FUNC;
-}
-
-template <QuantizeFuncHbd fn>
-void highbd_quan64x64_wrapper(QUAN_PARAM_LIST) {
- const int log_scale = 2;
- HBD_QUAN_FUNC;
-}
-
-typedef enum { TYPE_B, TYPE_DC, TYPE_FP } QuantType;
-
-using ::testing::tuple;
-typedef tuple<QuantizeFunc, QuantizeFunc, TX_SIZE, QuantType, aom_bit_depth_t>
- QuantizeParam;
-
-typedef struct {
- QUANTS quant;
- Dequants dequant;
-} QuanTable;
-
-const int kTestNum = 1000;
-
-class QuantizeTest : public ::testing::TestWithParam<QuantizeParam> {
- protected:
- QuantizeTest()
- : quant_ref_(GET_PARAM(0)), quant_(GET_PARAM(1)), tx_size_(GET_PARAM(2)),
- type_(GET_PARAM(3)), bd_(GET_PARAM(4)) {}
-
- virtual ~QuantizeTest() {}
-
- virtual void SetUp() {
- qtab_ = reinterpret_cast<QuanTable *>(aom_memalign(32, sizeof(*qtab_)));
- const int n_coeffs = coeff_num();
- coeff_ = reinterpret_cast<tran_low_t *>(
- aom_memalign(32, 6 * n_coeffs * sizeof(tran_low_t)));
- InitQuantizer();
- }
-
- virtual void TearDown() {
- aom_free(qtab_);
- qtab_ = NULL;
- aom_free(coeff_);
- coeff_ = NULL;
- libaom_test::ClearSystemState();
- }
-
- void InitQuantizer() {
- av1_build_quantizer(bd_, 0, 0, 0, 0, 0, &qtab_->quant, &qtab_->dequant);
- }
-
- void QuantizeRun(bool is_loop, int q = 0, int test_num = 1) {
- tran_low_t *coeff_ptr = coeff_;
- const intptr_t n_coeffs = coeff_num();
-
- tran_low_t *qcoeff_ref = coeff_ptr + n_coeffs;
- tran_low_t *dqcoeff_ref = qcoeff_ref + n_coeffs;
-
- tran_low_t *qcoeff = dqcoeff_ref + n_coeffs;
- tran_low_t *dqcoeff = qcoeff + n_coeffs;
- uint16_t *eob = (uint16_t *)(dqcoeff + n_coeffs);
-
- // Testing uses 2-D DCT scan order table
- const SCAN_ORDER *const sc = get_default_scan(tx_size_, DCT_DCT);
-
- // Testing uses luminance quantization table
- const int16_t *zbin = qtab_->quant.y_zbin[q];
-
- const int16_t *round = 0;
- const int16_t *quant = 0;
- if (type_ == TYPE_B) {
- round = qtab_->quant.y_round[q];
- quant = qtab_->quant.y_quant[q];
- } else if (type_ == TYPE_FP) {
- round = qtab_->quant.y_round_fp[q];
- quant = qtab_->quant.y_quant_fp[q];
- }
-
- const int16_t *quant_shift = qtab_->quant.y_quant_shift[q];
- const int16_t *dequant = qtab_->dequant.y_dequant_QTX[q];
-
- for (int i = 0; i < test_num; ++i) {
- if (is_loop) FillCoeffRandom();
-
- memset(qcoeff_ref, 0, 5 * n_coeffs * sizeof(*qcoeff_ref));
-
- quant_ref_(coeff_ptr, n_coeffs, zbin, round, quant, quant_shift,
- qcoeff_ref, dqcoeff_ref, dequant, &eob[0], sc->scan,
- sc->iscan);
-
- ASM_REGISTER_STATE_CHECK(quant_(coeff_ptr, n_coeffs, zbin, round, quant,
- quant_shift, qcoeff, dqcoeff, dequant,
- &eob[1], sc->scan, sc->iscan));
-
- for (int j = 0; j < n_coeffs; ++j) {
- ASSERT_EQ(qcoeff_ref[j], qcoeff[j])
- << "Q mismatch on test: " << i << " at position: " << j
- << " Q: " << q << " coeff: " << coeff_ptr[j];
- }
-
- for (int j = 0; j < n_coeffs; ++j) {
- ASSERT_EQ(dqcoeff_ref[j], dqcoeff[j])
- << "Dq mismatch on test: " << i << " at position: " << j
- << " Q: " << q << " coeff: " << coeff_ptr[j];
- }
-
- ASSERT_EQ(eob[0], eob[1])
- << "eobs mismatch on test: " << i << " Q: " << q;
- }
- }
-
- void CompareResults(const tran_low_t *buf_ref, const tran_low_t *buf,
- int size, const char *text, int q, int number) {
- int i;
- for (i = 0; i < size; ++i) {
- ASSERT_EQ(buf_ref[i], buf[i]) << text << " mismatch on test: " << number
- << " at position: " << i << " Q: " << q;
- }
- }
-
- int coeff_num() const { return av1_get_max_eob(tx_size_); }
-
- void FillCoeff(tran_low_t c) {
- const int n_coeffs = coeff_num();
- for (int i = 0; i < n_coeffs; ++i) {
- coeff_[i] = c;
- }
- }
-
- void FillCoeffRandom() {
- const int n_coeffs = coeff_num();
- FillCoeffZero();
- int num = rnd_.Rand16() % n_coeffs;
- for (int i = 0; i < num; ++i) {
- coeff_[i] = GetRandomCoeff();
- }
- }
-
- void FillCoeffZero() { FillCoeff(0); }
-
- void FillCoeffConstant() {
- tran_low_t c = GetRandomCoeff();
- FillCoeff(c);
- }
-
- void FillDcOnly() {
- FillCoeffZero();
- coeff_[0] = GetRandomCoeff();
- }
-
- void FillDcLargeNegative() {
- FillCoeffZero();
- // Generate a qcoeff which contains 512/-512 (0x0100/0xFE00) to catch issues
- // like BUG=883 where the constant being compared was incorrectly
- // initialized.
- coeff_[0] = -8191;
- }
-
- tran_low_t GetRandomCoeff() {
- tran_low_t coeff;
- if (bd_ == AOM_BITS_8) {
- coeff =
- clamp(static_cast<int16_t>(rnd_.Rand16()), INT16_MIN + 1, INT16_MAX);
- } else {
- tran_low_t min = -(1 << (7 + bd_));
- tran_low_t max = -min - 1;
- coeff = clamp(static_cast<tran_low_t>(rnd_.Rand31()), min, max);
- }
- return coeff;
- }
-
- ACMRandom rnd_;
- QuanTable *qtab_;
- tran_low_t *coeff_;
- QuantizeFunc quant_ref_;
- QuantizeFunc quant_;
- TX_SIZE tx_size_;
- QuantType type_;
- aom_bit_depth_t bd_;
-};
-
-TEST_P(QuantizeTest, ZeroInput) {
- FillCoeffZero();
- QuantizeRun(false);
-}
-
-TEST_P(QuantizeTest, LargeNegativeInput) {
- FillDcLargeNegative();
- QuantizeRun(false, 0, 1);
-}
-
-TEST_P(QuantizeTest, DcOnlyInput) {
- FillDcOnly();
- QuantizeRun(false, 0, 1);
-}
-
-TEST_P(QuantizeTest, RandomInput) { QuantizeRun(true, 0, kTestNum); }
-
-TEST_P(QuantizeTest, MultipleQ) {
- for (int q = 0; q < QINDEX_RANGE; ++q) {
- QuantizeRun(true, q, kTestNum);
- }
-}
-
-// Force the coeff to be half the value of the dequant. This exposes a
-// mismatch found in av1_quantize_fp_sse2().
-TEST_P(QuantizeTest, CoeffHalfDequant) {
- FillCoeff(16);
- QuantizeRun(false, 25, 1);
-}
-
-TEST_P(QuantizeTest, DISABLED_Speed) {
- tran_low_t *coeff_ptr = coeff_;
- const intptr_t n_coeffs = coeff_num();
-
- tran_low_t *qcoeff_ref = coeff_ptr + n_coeffs;
- tran_low_t *dqcoeff_ref = qcoeff_ref + n_coeffs;
-
- tran_low_t *qcoeff = dqcoeff_ref + n_coeffs;
- tran_low_t *dqcoeff = qcoeff + n_coeffs;
- uint16_t *eob = (uint16_t *)(dqcoeff + n_coeffs);
-
- // Testing uses 2-D DCT scan order table
- const SCAN_ORDER *const sc = get_default_scan(tx_size_, DCT_DCT);
-
- // Testing uses luminance quantization table
- const int q = 22;
- const int16_t *zbin = qtab_->quant.y_zbin[q];
- const int16_t *round_fp = qtab_->quant.y_round_fp[q];
- const int16_t *quant_fp = qtab_->quant.y_quant_fp[q];
- const int16_t *quant_shift = qtab_->quant.y_quant_shift[q];
- const int16_t *dequant = qtab_->dequant.y_dequant_QTX[q];
- const int kNumTests = 5000000;
- aom_usec_timer timer;
-
- FillCoeffRandom();
-
- aom_usec_timer_start(&timer);
- for (int n = 0; n < kNumTests; ++n) {
- quant_(coeff_ptr, n_coeffs, zbin, round_fp, quant_fp, quant_shift, qcoeff,
- dqcoeff, dequant, eob, sc->scan, sc->iscan);
- }
- aom_usec_timer_mark(&timer);
-
- const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
- printf("Elapsed time: %d us\n", elapsed_time);
-}
-
-using ::testing::make_tuple;
-
-#if HAVE_AVX2
-const QuantizeParam kQParamArrayAvx2[] = {
- make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_avx2, TX_16X16, TYPE_FP,
- AOM_BITS_8),
- make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_avx2, TX_4X16, TYPE_FP,
- AOM_BITS_8),
- make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_avx2, TX_16X4, TYPE_FP,
- AOM_BITS_8),
- make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_avx2, TX_32X8, TYPE_FP,
- AOM_BITS_8),
- make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_avx2, TX_8X32, TYPE_FP,
- AOM_BITS_8),
- make_tuple(&av1_quantize_fp_32x32_c, &av1_quantize_fp_32x32_avx2, TX_32X32,
- TYPE_FP, AOM_BITS_8),
- make_tuple(&av1_quantize_fp_32x32_c, &av1_quantize_fp_32x32_avx2, TX_16X64,
- TYPE_FP, AOM_BITS_8),
- make_tuple(&av1_quantize_fp_32x32_c, &av1_quantize_fp_32x32_avx2, TX_64X16,
- TYPE_FP, AOM_BITS_8),
- make_tuple(&av1_quantize_fp_64x64_c, &av1_quantize_fp_64x64_avx2, TX_64X64,
- TYPE_FP, AOM_BITS_8),
- make_tuple(&highbd_quan16x16_wrapper<av1_highbd_quantize_fp_c>,
- &highbd_quan16x16_wrapper<av1_highbd_quantize_fp_avx2>, TX_16X16,
- TYPE_FP, AOM_BITS_8),
- make_tuple(&highbd_quan16x16_wrapper<av1_highbd_quantize_fp_c>,
- &highbd_quan16x16_wrapper<av1_highbd_quantize_fp_avx2>, TX_16X16,
- TYPE_FP, AOM_BITS_10),
- make_tuple(&highbd_quan16x16_wrapper<av1_highbd_quantize_fp_c>,
- &highbd_quan16x16_wrapper<av1_highbd_quantize_fp_avx2>, TX_16X16,
- TYPE_FP, AOM_BITS_12),
- make_tuple(&highbd_quan32x32_wrapper<av1_highbd_quantize_fp_c>,
- &highbd_quan32x32_wrapper<av1_highbd_quantize_fp_avx2>, TX_32X32,
- TYPE_FP, AOM_BITS_8),
- make_tuple(&highbd_quan32x32_wrapper<av1_highbd_quantize_fp_c>,
- &highbd_quan32x32_wrapper<av1_highbd_quantize_fp_avx2>, TX_32X32,
- TYPE_FP, AOM_BITS_10),
- make_tuple(&highbd_quan32x32_wrapper<av1_highbd_quantize_fp_c>,
- &highbd_quan32x32_wrapper<av1_highbd_quantize_fp_avx2>, TX_32X32,
- TYPE_FP, AOM_BITS_12),
- make_tuple(&highbd_quan64x64_wrapper<av1_highbd_quantize_fp_c>,
- &highbd_quan64x64_wrapper<av1_highbd_quantize_fp_avx2>, TX_64X64,
- TYPE_FP, AOM_BITS_8),
- make_tuple(&highbd_quan64x64_wrapper<av1_highbd_quantize_fp_c>,
- &highbd_quan64x64_wrapper<av1_highbd_quantize_fp_avx2>, TX_64X64,
- TYPE_FP, AOM_BITS_10),
- make_tuple(&highbd_quan64x64_wrapper<av1_highbd_quantize_fp_c>,
- &highbd_quan64x64_wrapper<av1_highbd_quantize_fp_avx2>, TX_64X64,
- TYPE_FP, AOM_BITS_12),
- make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_avx2, TX_16X16,
- TYPE_B, AOM_BITS_8),
- make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_avx2, TX_16X16,
- TYPE_B, AOM_BITS_10),
- make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_avx2, TX_16X16,
- TYPE_B, AOM_BITS_12),
-};
-
-INSTANTIATE_TEST_CASE_P(AVX2, QuantizeTest,
- ::testing::ValuesIn(kQParamArrayAvx2));
-#endif // HAVE_AVX2
-
-#if HAVE_SSE2
-const QuantizeParam kQParamArraySSE2[] = {
- make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_sse2, TX_16X16, TYPE_FP,
- AOM_BITS_8),
- make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_sse2, TX_4X16, TYPE_FP,
- AOM_BITS_8),
- make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_sse2, TX_16X4, TYPE_FP,
- AOM_BITS_8),
- make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_sse2, TX_8X32, TYPE_FP,
- AOM_BITS_8),
- make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_sse2, TX_32X8, TYPE_FP,
- AOM_BITS_8),
- make_tuple(&aom_quantize_b_c, &aom_quantize_b_sse2, TX_16X16, TYPE_B,
- AOM_BITS_8),
- make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_sse2, TX_16X16,
- TYPE_B, AOM_BITS_8),
- make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_sse2, TX_16X16,
- TYPE_B, AOM_BITS_10),
- make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_sse2, TX_16X16,
- TYPE_B, AOM_BITS_12),
- make_tuple(&aom_highbd_quantize_b_32x32_c, &aom_highbd_quantize_b_32x32_sse2,
- TX_32X32, TYPE_B, AOM_BITS_8),
- make_tuple(&aom_highbd_quantize_b_32x32_c, &aom_highbd_quantize_b_32x32_sse2,
- TX_32X32, TYPE_B, AOM_BITS_10),
- make_tuple(&aom_highbd_quantize_b_32x32_c, &aom_highbd_quantize_b_32x32_sse2,
- TX_32X32, TYPE_B, AOM_BITS_12),
-};
-
-INSTANTIATE_TEST_CASE_P(SSE2, QuantizeTest,
- ::testing::ValuesIn(kQParamArraySSE2));
-#endif
-
-#if HAVE_SSSE3 && ARCH_X86_64
-INSTANTIATE_TEST_CASE_P(
- SSSE3, QuantizeTest,
- ::testing::Values(make_tuple(&aom_quantize_b_c, &aom_quantize_b_ssse3,
- TX_16X16, TYPE_B, AOM_BITS_8)));
-
-// Like libvpx, the ssse3 and avx quantize tests do not pass.
-// https://bugs.chromium.org/p/webm/issues/detail?id=1448
-INSTANTIATE_TEST_CASE_P(
- DISABLED_SSSE3_32x32, QuantizeTest,
- ::testing::Values(make_tuple(&aom_quantize_b_32x32_c,
- &aom_quantize_b_32x32_ssse3, TX_16X16, TYPE_B,
- AOM_BITS_8)));
-
-#endif // HAVE_SSSE3 && ARCH_X86_64
-
-#if HAVE_AVX && ARCH_X86_64
-INSTANTIATE_TEST_CASE_P(
- AVX, QuantizeTest,
- ::testing::Values(
- make_tuple(&aom_quantize_b_c, &aom_quantize_b_avx, TX_16X16, TYPE_B,
- AOM_BITS_8),
- // Although these tests will not pass against _c, test them against each
- // other so there is some minor checking.
- make_tuple(&aom_quantize_b_32x32_ssse3, &aom_quantize_b_32x32_avx,
- TX_32X32, TYPE_B, AOM_BITS_8)));
-
-#endif // HAVE_AVX && ARCH_X86_64
-} // namespace
diff --git a/third_party/aom/test/reconinter_test.cc b/third_party/aom/test/reconinter_test.cc
deleted file mode 100644
index a8536e517..000000000
--- a/third_party/aom/test/reconinter_test.cc
+++ /dev/null
@@ -1,258 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <stdint.h>
-#include <stdio.h>
-#include <string.h>
-
-#include "config/aom_config.h"
-#include "config/av1_rtcd.h"
-
-#include "aom_ports/mem.h"
-#include "av1/common/scan.h"
-#include "av1/common/txb_common.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-namespace {
-using libaom_test::ACMRandom;
-
-typedef void (*buildcompdiffwtdmaskd_func)(uint8_t *mask,
- DIFFWTD_MASK_TYPE mask_type,
- const uint8_t *src0, int src0_stride,
- const uint8_t *src1, int src1_stride,
- int h, int w);
-
-typedef ::testing::tuple<BLOCK_SIZE, buildcompdiffwtdmaskd_func>
- BuildCompDiffwtdMaskDParam;
-
-#if HAVE_SSE4_1
-::testing::internal::ParamGenerator<BuildCompDiffwtdMaskDParam> BuildParams(
- buildcompdiffwtdmaskd_func filter) {
- return ::testing::Combine(::testing::Range(BLOCK_4X4, BLOCK_SIZES_ALL),
- ::testing::Values(filter));
-}
-#endif
-
-class BuildCompDiffwtdMaskTest
- : public ::testing::TestWithParam<BuildCompDiffwtdMaskDParam> {
- public:
- virtual ~BuildCompDiffwtdMaskTest() {}
-
- virtual void TearDown() { libaom_test::ClearSystemState(); }
- void RunTest(buildcompdiffwtdmaskd_func test_impl, const int is_speed,
- const DIFFWTD_MASK_TYPE type);
-
- private:
- ACMRandom rnd_;
-};
-
-typedef void (*buildcompdiffwtdmaskd16_func)(
- uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0,
- int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w,
- ConvolveParams *conv_params, int bd);
-
-typedef ::testing::tuple<int, buildcompdiffwtdmaskd16_func, BLOCK_SIZE>
- BuildCompDiffwtdMaskD16Param;
-
-#if HAVE_SSE4_1 || HAVE_NEON
-::testing::internal::ParamGenerator<BuildCompDiffwtdMaskD16Param> BuildParams(
- buildcompdiffwtdmaskd16_func filter) {
- return ::testing::Combine(::testing::Range(8, 13, 2),
- ::testing::Values(filter),
- ::testing::Range(BLOCK_4X4, BLOCK_SIZES_ALL));
-}
-#endif
-class BuildCompDiffwtdMaskD16Test
- : public ::testing::TestWithParam<BuildCompDiffwtdMaskD16Param> {
- public:
- ~BuildCompDiffwtdMaskD16Test() {}
- virtual void TearDown() { libaom_test::ClearSystemState(); }
- void SetUp() { rnd_.Reset(ACMRandom::DeterministicSeed()); }
-
- protected:
- void RunCheckOutput(buildcompdiffwtdmaskd16_func test_impl);
- void RunSpeedTest(buildcompdiffwtdmaskd16_func test_impl,
- DIFFWTD_MASK_TYPE mask_type);
- libaom_test::ACMRandom rnd_;
-}; // class BuildCompDiffwtdMaskD16Test
-
-void BuildCompDiffwtdMaskD16Test::RunCheckOutput(
- buildcompdiffwtdmaskd16_func test_impl) {
- const int block_idx = GET_PARAM(2);
- const int bd = GET_PARAM(0);
- const int width = block_size_wide[block_idx];
- const int height = block_size_high[block_idx];
- DECLARE_ALIGNED(16, uint8_t, mask_ref[2 * MAX_SB_SQUARE]);
- DECLARE_ALIGNED(16, uint8_t, mask_test[2 * MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, uint16_t, src0[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, uint16_t, src1[MAX_SB_SQUARE]);
-
- ConvolveParams conv_params = get_conv_params_no_round(0, 0, NULL, 0, 1, bd);
-
- int in_precision =
- bd + 2 * FILTER_BITS - conv_params.round_0 - conv_params.round_1 + 2;
-
- for (int i = 0; i < MAX_SB_SQUARE; i++) {
- src0[i] = rnd_.Rand16() & ((1 << in_precision) - 1);
- src1[i] = rnd_.Rand16() & ((1 << in_precision) - 1);
- }
-
- for (int mask_type = 0; mask_type < DIFFWTD_MASK_TYPES; mask_type++) {
- av1_build_compound_diffwtd_mask_d16_c(
- mask_ref, (DIFFWTD_MASK_TYPE)mask_type, src0, width, src1, width,
- height, width, &conv_params, bd);
-
- test_impl(mask_test, (DIFFWTD_MASK_TYPE)mask_type, src0, width, src1, width,
- height, width, &conv_params, bd);
-
- for (int r = 0; r < height; ++r) {
- for (int c = 0; c < width; ++c) {
- ASSERT_EQ(mask_ref[c + r * width], mask_test[c + r * width])
- << "Mismatch at unit tests for BuildCompDiffwtdMaskD16Test\n"
- << " Pixel mismatch at index "
- << "[" << r << "," << c << "] "
- << " @ " << width << "x" << height << " inv " << mask_type;
- }
- }
- }
-}
-
-void BuildCompDiffwtdMaskD16Test::RunSpeedTest(
- buildcompdiffwtdmaskd16_func test_impl, DIFFWTD_MASK_TYPE mask_type) {
- const int block_idx = GET_PARAM(2);
- const int bd = GET_PARAM(0);
- const int width = block_size_wide[block_idx];
- const int height = block_size_high[block_idx];
- DECLARE_ALIGNED(16, uint8_t, mask[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, uint16_t, src0[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(32, uint16_t, src1[MAX_SB_SQUARE]);
-
- ConvolveParams conv_params = get_conv_params_no_round(0, 0, NULL, 0, 1, bd);
-
- int in_precision =
- bd + 2 * FILTER_BITS - conv_params.round_0 - conv_params.round_1 + 2;
-
- for (int i = 0; i < MAX_SB_SQUARE; i++) {
- src0[i] = rnd_.Rand16() & ((1 << in_precision) - 1);
- src1[i] = rnd_.Rand16() & ((1 << in_precision) - 1);
- }
-
- const int num_loops = 10000000 / (width + height);
- aom_usec_timer timer;
- aom_usec_timer_start(&timer);
-
- for (int i = 0; i < num_loops; ++i)
- av1_build_compound_diffwtd_mask_d16_c(mask, mask_type, src0, width, src1,
- width, height, width, &conv_params,
- bd);
-
- aom_usec_timer_mark(&timer);
- const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
-
- aom_usec_timer timer1;
- aom_usec_timer_start(&timer1);
-
- for (int i = 0; i < num_loops; ++i)
- test_impl(mask, mask_type, src0, width, src1, width, height, width,
- &conv_params, bd);
-
- aom_usec_timer_mark(&timer1);
- const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1));
- printf("av1_build_compound_diffwtd_mask_d16 %3dx%-3d: %7.2f \n", width,
- height, elapsed_time / double(elapsed_time1));
-}
-#if HAVE_SSE4_1
-void BuildCompDiffwtdMaskTest::RunTest(buildcompdiffwtdmaskd_func test_impl,
- const int is_speed,
- const DIFFWTD_MASK_TYPE type) {
- const int sb_type = GET_PARAM(0);
- const int width = block_size_wide[sb_type];
- const int height = block_size_high[sb_type];
- DECLARE_ALIGNED(16, uint8_t, mask_ref[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(16, uint8_t, mask_test[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(16, uint8_t, src0[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(16, uint8_t, src1[MAX_SB_SQUARE]);
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- for (int i = 0; i < width * height; i++) {
- src0[i] = rnd.Rand8();
- src1[i] = rnd.Rand8();
- }
- const int run_times = is_speed ? (10000000 / (width + height)) : 1;
- aom_usec_timer timer;
- aom_usec_timer_start(&timer);
- for (int i = 0; i < run_times; ++i) {
- av1_build_compound_diffwtd_mask_c(mask_ref, type, src0, width, src1, width,
- height, width);
- }
- const double t1 = get_time_mark(&timer);
- aom_usec_timer_start(&timer);
- for (int i = 0; i < run_times; ++i) {
- test_impl(mask_test, type, src0, width, src1, width, height, width);
- }
- const double t2 = get_time_mark(&timer);
- if (is_speed) {
- printf("mask %d %3dx%-3d:%7.2f/%7.2fns", type, width, height, t1, t2);
- printf("(%3.2f)\n", t1 / t2);
- }
- for (int r = 0; r < height; ++r) {
- for (int c = 0; c < width; ++c) {
- ASSERT_EQ(mask_ref[c + r * width], mask_test[c + r * width])
- << "[" << r << "," << c << "] " << run_times << " @ " << width << "x"
- << height << " inv " << type;
- }
- }
-}
-
-TEST_P(BuildCompDiffwtdMaskTest, match) {
- RunTest(GET_PARAM(1), 0, DIFFWTD_38);
- RunTest(GET_PARAM(1), 0, DIFFWTD_38_INV);
-}
-TEST_P(BuildCompDiffwtdMaskTest, DISABLED_Speed) {
- RunTest(GET_PARAM(1), 1, DIFFWTD_38);
- RunTest(GET_PARAM(1), 1, DIFFWTD_38_INV);
-}
-#endif
-TEST_P(BuildCompDiffwtdMaskD16Test, CheckOutput) {
- RunCheckOutput(GET_PARAM(1));
-}
-
-TEST_P(BuildCompDiffwtdMaskD16Test, DISABLED_Speed) {
- RunSpeedTest(GET_PARAM(1), DIFFWTD_38);
- RunSpeedTest(GET_PARAM(1), DIFFWTD_38_INV);
-}
-
-#if HAVE_SSE4_1
-INSTANTIATE_TEST_CASE_P(SSE4_1, BuildCompDiffwtdMaskTest,
- BuildParams(av1_build_compound_diffwtd_mask_sse4_1));
-
-INSTANTIATE_TEST_CASE_P(
- SSE4_1, BuildCompDiffwtdMaskD16Test,
- BuildParams(av1_build_compound_diffwtd_mask_d16_sse4_1));
-#endif
-
-#if HAVE_AVX2
-INSTANTIATE_TEST_CASE_P(AVX2, BuildCompDiffwtdMaskTest,
- BuildParams(av1_build_compound_diffwtd_mask_avx2));
-
-INSTANTIATE_TEST_CASE_P(AVX2, BuildCompDiffwtdMaskD16Test,
- BuildParams(av1_build_compound_diffwtd_mask_d16_avx2));
-#endif
-
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(NEON, BuildCompDiffwtdMaskD16Test,
- BuildParams(av1_build_compound_diffwtd_mask_d16_neon));
-#endif
-
-} // namespace
diff --git a/third_party/aom/test/register_state_check.h b/third_party/aom/test/register_state_check.h
deleted file mode 100644
index d404621dd..000000000
--- a/third_party/aom/test/register_state_check.h
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_TEST_REGISTER_STATE_CHECK_H_
-#define AOM_TEST_REGISTER_STATE_CHECK_H_
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "config/aom_config.h"
-
-#include "aom/aom_integer.h"
-
-// ASM_REGISTER_STATE_CHECK(asm_function)
-// Minimally validates the environment pre & post function execution. This
-// variant should be used with assembly functions which are not expected to
-// fully restore the system state. See platform implementations of
-// RegisterStateCheck for details.
-//
-// API_REGISTER_STATE_CHECK(api_function)
-// Performs all the checks done by ASM_REGISTER_STATE_CHECK() and any
-// additional checks to ensure the environment is in a consistent state pre &
-// post function execution. This variant should be used with API functions.
-// See platform implementations of RegisterStateCheckXXX for details.
-//
-
-#if defined(_WIN64) && ARCH_X86_64
-
-#undef NOMINMAX
-#define NOMINMAX
-#define WIN32_LEAN_AND_MEAN
-#include <windows.h>
-#include <winnt.h>
-
-inline bool operator==(const M128A &lhs, const M128A &rhs) {
- return (lhs.Low == rhs.Low && lhs.High == rhs.High);
-}
-
-namespace libaom_test {
-
-// Compares the state of xmm[6-15] at construction with their state at
-// destruction. These registers should be preserved by the callee on
-// Windows x64.
-class RegisterStateCheck {
- public:
- RegisterStateCheck() { initialized_ = StoreRegisters(&pre_context_); }
- ~RegisterStateCheck() { Check(); }
-
- private:
- static bool StoreRegisters(CONTEXT *const context) {
- const HANDLE this_thread = GetCurrentThread();
- EXPECT_TRUE(this_thread != NULL);
- context->ContextFlags = CONTEXT_FLOATING_POINT;
- const bool context_saved = GetThreadContext(this_thread, context) == TRUE;
- EXPECT_TRUE(context_saved) << "GetLastError: " << GetLastError();
- return context_saved;
- }
-
- // Compares the register state. Returns true if the states match.
- void Check() const {
- ASSERT_TRUE(initialized_);
- CONTEXT post_context;
- ASSERT_TRUE(StoreRegisters(&post_context));
-
- const M128A *xmm_pre = &pre_context_.Xmm6;
- const M128A *xmm_post = &post_context.Xmm6;
- for (int i = 6; i <= 15; ++i) {
- EXPECT_EQ(*xmm_pre, *xmm_post) << "xmm" << i << " has been modified!";
- ++xmm_pre;
- ++xmm_post;
- }
- }
-
- bool initialized_;
- CONTEXT pre_context_;
-};
-
-#define ASM_REGISTER_STATE_CHECK(statement) \
- do { \
- libaom_test::RegisterStateCheck reg_check; \
- statement; \
- } while (false)
-
-} // namespace libaom_test
-
-#else
-
-namespace libaom_test {
-
-class RegisterStateCheck {};
-#define ASM_REGISTER_STATE_CHECK(statement) statement
-
-} // namespace libaom_test
-
-#endif // _WIN64 && ARCH_X86_64
-
-#if ARCH_X86 || ARCH_X86_64
-#if defined(__GNUC__)
-
-namespace libaom_test {
-
-// Checks the FPU tag word pre/post execution to ensure emms has been called.
-class RegisterStateCheckMMX {
- public:
- RegisterStateCheckMMX() {
- __asm__ volatile("fstenv %0" : "=rm"(pre_fpu_env_));
- }
- ~RegisterStateCheckMMX() { Check(); }
-
- private:
- // Checks the FPU tag word pre/post execution, returning false if not cleared
- // to 0xffff.
- void Check() const {
- EXPECT_EQ(0xffff, pre_fpu_env_[4])
- << "FPU was in an inconsistent state prior to call";
-
- uint16_t post_fpu_env[14];
- __asm__ volatile("fstenv %0" : "=rm"(post_fpu_env));
- EXPECT_EQ(0xffff, post_fpu_env[4])
- << "FPU was left in an inconsistent state after call";
- }
-
- uint16_t pre_fpu_env_[14];
-};
-
-#define API_REGISTER_STATE_CHECK(statement) \
- do { \
- libaom_test::RegisterStateCheckMMX reg_check; \
- ASM_REGISTER_STATE_CHECK(statement); \
- } while (false)
-
-} // namespace libaom_test
-
-#endif // __GNUC__
-#endif // ARCH_X86 || ARCH_X86_64
-
-#ifndef API_REGISTER_STATE_CHECK
-#define API_REGISTER_STATE_CHECK ASM_REGISTER_STATE_CHECK
-#endif
-
-#endif // AOM_TEST_REGISTER_STATE_CHECK_H_
diff --git a/third_party/aom/test/resize_test.cc b/third_party/aom/test/resize_test.cc
deleted file mode 100644
index b270b8362..000000000
--- a/third_party/aom/test/resize_test.cc
+++ /dev/null
@@ -1,642 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <climits>
-#include <vector>
-#include "aom_dsp/aom_dsp_common.h"
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-#include "test/codec_factory.h"
-#include "test/encode_test_driver.h"
-#include "test/i420_video_source.h"
-#include "test/video_source.h"
-#include "test/util.h"
-
-// Enable(1) or Disable(0) writing of the compressed bitstream.
-#define WRITE_COMPRESSED_STREAM 0
-
-namespace {
-
-#if WRITE_COMPRESSED_STREAM
-static void mem_put_le16(char *const mem, unsigned int val) {
- mem[0] = val;
- mem[1] = val >> 8;
-}
-
-static void mem_put_le32(char *const mem, unsigned int val) {
- mem[0] = val;
- mem[1] = val >> 8;
- mem[2] = val >> 16;
- mem[3] = val >> 24;
-}
-
-static void write_ivf_file_header(const aom_codec_enc_cfg_t *const cfg,
- int frame_cnt, FILE *const outfile) {
- char header[32];
-
- header[0] = 'D';
- header[1] = 'K';
- header[2] = 'I';
- header[3] = 'F';
- mem_put_le16(header + 4, 0); /* version */
- mem_put_le16(header + 6, 32); /* headersize */
- mem_put_le32(header + 8, 0x30395056); /* fourcc (av1) */
- mem_put_le16(header + 12, cfg->g_w); /* width */
- mem_put_le16(header + 14, cfg->g_h); /* height */
- mem_put_le32(header + 16, cfg->g_timebase.den); /* rate */
- mem_put_le32(header + 20, cfg->g_timebase.num); /* scale */
- mem_put_le32(header + 24, frame_cnt); /* length */
- mem_put_le32(header + 28, 0); /* unused */
-
- (void)fwrite(header, 1, 32, outfile);
-}
-
-static void write_ivf_frame_size(FILE *const outfile, const size_t size) {
- char header[4];
- mem_put_le32(header, static_cast<unsigned int>(size));
- (void)fwrite(header, 1, 4, outfile);
-}
-
-static void write_ivf_frame_header(const aom_codec_cx_pkt_t *const pkt,
- FILE *const outfile) {
- char header[12];
- aom_codec_pts_t pts;
-
- if (pkt->kind != AOM_CODEC_CX_FRAME_PKT) return;
-
- pts = pkt->data.frame.pts;
- mem_put_le32(header, static_cast<unsigned int>(pkt->data.frame.sz));
- mem_put_le32(header + 4, pts & 0xFFFFFFFF);
- mem_put_le32(header + 8, pts >> 32);
-
- (void)fwrite(header, 1, 12, outfile);
-}
-#endif // WRITE_COMPRESSED_STREAM
-
-const unsigned int kInitialWidth = 320;
-const unsigned int kInitialHeight = 240;
-
-struct FrameInfo {
- FrameInfo(aom_codec_pts_t _pts, unsigned int _w, unsigned int _h)
- : pts(_pts), w(_w), h(_h) {}
-
- aom_codec_pts_t pts;
- unsigned int w;
- unsigned int h;
-};
-
-void ScaleForFrameNumber(unsigned int frame, unsigned int initial_w,
- unsigned int initial_h, unsigned int *w,
- unsigned int *h, int flag_codec) {
- if (frame < 10) {
- *w = initial_w;
- *h = initial_h;
- return;
- }
- if (frame < 20) {
- *w = initial_w * 3 / 4;
- *h = initial_h * 3 / 4;
- return;
- }
- if (frame < 30) {
- *w = initial_w / 2;
- *h = initial_h / 2;
- return;
- }
- if (frame < 40) {
- *w = initial_w;
- *h = initial_h;
- return;
- }
- if (frame < 50) {
- *w = initial_w * 3 / 4;
- *h = initial_h * 3 / 4;
- return;
- }
- if (frame < 60) {
- *w = initial_w / 2;
- *h = initial_h / 2;
- return;
- }
- if (frame < 70) {
- *w = initial_w;
- *h = initial_h;
- return;
- }
- if (frame < 80) {
- *w = initial_w * 3 / 4;
- *h = initial_h * 3 / 4;
- return;
- }
- if (frame < 90) {
- *w = initial_w / 2;
- *h = initial_h / 2;
- return;
- }
- if (frame < 100) {
- *w = initial_w * 3 / 4;
- *h = initial_h * 3 / 4;
- return;
- }
- if (frame < 110) {
- *w = initial_w;
- *h = initial_h;
- return;
- }
- // Go down very low
- if (frame < 120) {
- *w = initial_w / 4;
- *h = initial_h / 4;
- return;
- }
- if (flag_codec == 1) {
- // Cases that only works for AV1.
- // For AV1: Swap width and height of original.
- if (frame < 140) {
- *w = initial_h;
- *h = initial_w;
- return;
- }
- }
- *w = initial_w;
- *h = initial_h;
-}
-
-class ResizingVideoSource : public ::libaom_test::DummyVideoSource {
- public:
- ResizingVideoSource() {
- SetSize(kInitialWidth, kInitialHeight);
- limit_ = 150;
- }
- int flag_codec_;
- virtual ~ResizingVideoSource() {}
-
- protected:
- virtual void Next() {
- ++frame_;
- unsigned int width;
- unsigned int height;
- ScaleForFrameNumber(frame_, kInitialWidth, kInitialHeight, &width, &height,
- flag_codec_);
- SetSize(width, height);
- FillFrame();
- }
-};
-
-class ResizeTest
- : public ::libaom_test::CodecTestWithParam<libaom_test::TestMode>,
- public ::libaom_test::EncoderTest {
- protected:
- ResizeTest() : EncoderTest(GET_PARAM(0)) {}
-
- virtual ~ResizeTest() {}
-
- virtual void SetUp() {
- InitializeConfig();
- SetMode(GET_PARAM(1));
- }
-
- virtual void DecompressedFrameHook(const aom_image_t &img,
- aom_codec_pts_t pts) {
- frame_info_list_.push_back(FrameInfo(pts, img.d_w, img.d_h));
- }
-
- std::vector<FrameInfo> frame_info_list_;
-};
-
-TEST_P(ResizeTest, TestExternalResizeWorks) {
- ResizingVideoSource video;
- video.flag_codec_ = 0;
- cfg_.g_lag_in_frames = 0;
- // We use max(kInitialWidth, kInitialHeight) because during the test
- // the width and height of the frame are swapped
- cfg_.g_forced_max_frame_width = cfg_.g_forced_max_frame_height =
- AOMMAX(kInitialWidth, kInitialHeight);
- ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-
- // Check we decoded the same number of frames as we attempted to encode
- ASSERT_EQ(frame_info_list_.size(), video.limit());
-
- for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
- info != frame_info_list_.end(); ++info) {
- const unsigned int frame = static_cast<unsigned>(info->pts);
- unsigned int expected_w;
- unsigned int expected_h;
- ScaleForFrameNumber(frame, kInitialWidth, kInitialHeight, &expected_w,
- &expected_h, 0);
- EXPECT_EQ(expected_w, info->w)
- << "Frame " << frame << " had unexpected width";
- EXPECT_EQ(expected_h, info->h)
- << "Frame " << frame << " had unexpected height";
- }
-}
-
-const unsigned int kStepDownFrame = 3;
-const unsigned int kStepUpFrame = 6;
-
-class ResizeInternalTestLarge : public ResizeTest {
- protected:
-#if WRITE_COMPRESSED_STREAM
- ResizeInternalTestLarge()
- : ResizeTest(), frame0_psnr_(0.0), outfile_(NULL), out_frames_(0) {}
-#else
- ResizeInternalTestLarge() : ResizeTest(), frame0_psnr_(0.0) {}
-#endif
-
- virtual ~ResizeInternalTestLarge() {}
-
- virtual void BeginPassHook(unsigned int /*pass*/) {
-#if WRITE_COMPRESSED_STREAM
- outfile_ = fopen("av10-2-05-resize.ivf", "wb");
-#endif
- }
-
- virtual void EndPassHook() {
-#if WRITE_COMPRESSED_STREAM
- if (outfile_) {
- if (!fseek(outfile_, 0, SEEK_SET))
- write_ivf_file_header(&cfg_, out_frames_, outfile_);
- fclose(outfile_);
- outfile_ = NULL;
- }
-#endif
- }
-
- virtual void PreEncodeFrameHook(libaom_test::VideoSource *video,
- libaom_test::Encoder *encoder) {
- if (change_config_) {
- int new_q = 60;
- if (video->frame() == 0) {
- struct aom_scaling_mode mode = { AOME_ONETWO, AOME_ONETWO };
- encoder->Control(AOME_SET_SCALEMODE, &mode);
- }
- if (video->frame() == 1) {
- struct aom_scaling_mode mode = { AOME_NORMAL, AOME_NORMAL };
- encoder->Control(AOME_SET_SCALEMODE, &mode);
- cfg_.rc_min_quantizer = cfg_.rc_max_quantizer = new_q;
- encoder->Config(&cfg_);
- }
- } else {
- if (video->frame() >= kStepDownFrame && video->frame() < kStepUpFrame) {
- struct aom_scaling_mode mode = { AOME_FOURFIVE, AOME_THREEFIVE };
- encoder->Control(AOME_SET_SCALEMODE, &mode);
- }
- if (video->frame() >= kStepUpFrame) {
- struct aom_scaling_mode mode = { AOME_NORMAL, AOME_NORMAL };
- encoder->Control(AOME_SET_SCALEMODE, &mode);
- }
- }
- }
-
- virtual void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) {
- if (frame0_psnr_ == 0.) frame0_psnr_ = pkt->data.psnr.psnr[0];
- EXPECT_NEAR(pkt->data.psnr.psnr[0], frame0_psnr_, 2.5);
- }
-
-#if WRITE_COMPRESSED_STREAM
- virtual void FramePktHook(const aom_codec_cx_pkt_t *pkt) {
- ++out_frames_;
-
- // Write initial file header if first frame.
- if (pkt->data.frame.pts == 0) write_ivf_file_header(&cfg_, 0, outfile_);
-
- // Write frame header and data.
- write_ivf_frame_header(pkt, outfile_);
- (void)fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile_);
- }
-#endif
-
- double frame0_psnr_;
- bool change_config_;
-#if WRITE_COMPRESSED_STREAM
- FILE *outfile_;
- unsigned int out_frames_;
-#endif
-};
-
-TEST_P(ResizeInternalTestLarge, TestInternalResizeWorks) {
- ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
- 30, 1, 0, 10);
- init_flags_ = AOM_CODEC_USE_PSNR;
- change_config_ = false;
-
- // q picked such that initial keyframe on this clip is ~30dB PSNR
- cfg_.rc_min_quantizer = cfg_.rc_max_quantizer = 48;
-
- // If the number of frames being encoded is smaller than g_lag_in_frames
- // the encoded frame is unavailable using the current API. Comparing
- // frames to detect mismatch would then not be possible. Set
- // g_lag_in_frames = 0 to get around this.
- cfg_.g_lag_in_frames = 0;
- ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-
- for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
- info != frame_info_list_.end(); ++info) {
- }
- for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
- info != frame_info_list_.end(); ++info) {
- const aom_codec_pts_t pts = info->pts;
- if (pts >= kStepDownFrame && pts < kStepUpFrame) {
- ASSERT_EQ(282U, info->w) << "Frame " << pts << " had unexpected width";
- ASSERT_EQ(173U, info->h) << "Frame " << pts << " had unexpected height";
- } else {
- EXPECT_EQ(352U, info->w) << "Frame " << pts << " had unexpected width";
- EXPECT_EQ(288U, info->h) << "Frame " << pts << " had unexpected height";
- }
- }
-}
-
-TEST_P(ResizeInternalTestLarge, TestInternalResizeChangeConfig) {
- ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
- 30, 1, 0, 10);
- cfg_.g_w = 352;
- cfg_.g_h = 288;
- change_config_ = true;
- ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-}
-
-class ResizeRealtimeTest
- : public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int>,
- public ::libaom_test::EncoderTest {
- protected:
- ResizeRealtimeTest() : EncoderTest(GET_PARAM(0)) {}
- virtual ~ResizeRealtimeTest() {}
-
- virtual void PreEncodeFrameHook(libaom_test::VideoSource *video,
- libaom_test::Encoder *encoder) {
- if (video->frame() == 0) {
- encoder->Control(AV1E_SET_AQ_MODE, 3);
- encoder->Control(AOME_SET_CPUUSED, set_cpu_used_);
- }
-
- if (change_bitrate_ && video->frame() == 120) {
- change_bitrate_ = false;
- cfg_.rc_target_bitrate = 500;
- encoder->Config(&cfg_);
- }
- }
-
- virtual void SetUp() {
- InitializeConfig();
- SetMode(GET_PARAM(1));
- set_cpu_used_ = GET_PARAM(2);
- }
-
- virtual void DecompressedFrameHook(const aom_image_t &img,
- aom_codec_pts_t pts) {
- frame_info_list_.push_back(FrameInfo(pts, img.d_w, img.d_h));
- }
-
- virtual void MismatchHook(const aom_image_t *img1, const aom_image_t *img2) {
- double mismatch_psnr = compute_psnr(img1, img2);
- mismatch_psnr_ += mismatch_psnr;
- ++mismatch_nframes_;
- }
-
- unsigned int GetMismatchFrames() { return mismatch_nframes_; }
-
- void DefaultConfig() {
- cfg_.rc_buf_initial_sz = 500;
- cfg_.rc_buf_optimal_sz = 600;
- cfg_.rc_buf_sz = 1000;
- cfg_.rc_min_quantizer = 2;
- cfg_.rc_max_quantizer = 56;
- cfg_.rc_undershoot_pct = 50;
- cfg_.rc_overshoot_pct = 50;
- cfg_.rc_end_usage = AOM_CBR;
- cfg_.kf_mode = AOM_KF_AUTO;
- cfg_.g_lag_in_frames = 0;
- cfg_.kf_min_dist = cfg_.kf_max_dist = 3000;
- // Enable dropped frames.
- cfg_.rc_dropframe_thresh = 1;
- // Disable error_resilience mode.
- cfg_.g_error_resilient = 0;
- // Run at low bitrate.
- cfg_.rc_target_bitrate = 200;
- // We use max(kInitialWidth, kInitialHeight) because during the test
- // the width and height of the frame are swapped
- cfg_.g_forced_max_frame_width = cfg_.g_forced_max_frame_height =
- AOMMAX(kInitialWidth, kInitialHeight);
- }
-
- std::vector<FrameInfo> frame_info_list_;
- int set_cpu_used_;
- bool change_bitrate_;
- double mismatch_psnr_;
- int mismatch_nframes_;
-};
-
-TEST_P(ResizeRealtimeTest, TestExternalResizeWorks) {
- ResizingVideoSource video;
- video.flag_codec_ = 1;
- DefaultConfig();
- change_bitrate_ = false;
- mismatch_psnr_ = 0.0;
- mismatch_nframes_ = 0;
- ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-
- // Check we decoded the same number of frames as we attempted to encode
- ASSERT_EQ(frame_info_list_.size(), video.limit());
-
- for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
- info != frame_info_list_.end(); ++info) {
- const unsigned int frame = static_cast<unsigned>(info->pts);
- unsigned int expected_w;
- unsigned int expected_h;
- ScaleForFrameNumber(frame, kInitialWidth, kInitialHeight, &expected_w,
- &expected_h, 1);
- EXPECT_EQ(expected_w, info->w)
- << "Frame " << frame << " had unexpected width";
- EXPECT_EQ(expected_h, info->h)
- << "Frame " << frame << " had unexpected height";
- EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
- }
-}
-
-// Verify the dynamic resizer behavior for real time, 1 pass CBR mode.
-// Run at low bitrate, with resize_allowed = 1, and verify that we get
-// one resize down event.
-TEST_P(ResizeRealtimeTest, DISABLED_TestInternalResizeDown) {
- ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
- 30, 1, 0, 299);
- DefaultConfig();
- cfg_.g_w = 352;
- cfg_.g_h = 288;
- change_bitrate_ = false;
- mismatch_psnr_ = 0.0;
- mismatch_nframes_ = 0;
- ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-
- unsigned int last_w = cfg_.g_w;
- unsigned int last_h = cfg_.g_h;
- int resize_count = 0;
- for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
- info != frame_info_list_.end(); ++info) {
- if (info->w != last_w || info->h != last_h) {
- // Verify that resize down occurs.
- ASSERT_LT(info->w, last_w);
- ASSERT_LT(info->h, last_h);
- last_w = info->w;
- last_h = info->h;
- resize_count++;
- }
- }
-
-#if CONFIG_AV1_DECODER
- // Verify that we get 1 resize down event in this test.
- ASSERT_EQ(1, resize_count) << "Resizing should occur.";
- EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
-#else
- printf("Warning: AV1 decoder unavailable, unable to check resize count!\n");
-#endif
-}
-
-// Verify the dynamic resizer behavior for real time, 1 pass CBR mode.
-// Start at low target bitrate, raise the bitrate in the middle of the clip,
-// scaling-up should occur after bitrate changed.
-TEST_P(ResizeRealtimeTest, DISABLED_TestInternalResizeDownUpChangeBitRate) {
- ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
- 30, 1, 0, 359);
- DefaultConfig();
- cfg_.g_w = 352;
- cfg_.g_h = 288;
- change_bitrate_ = true;
- mismatch_psnr_ = 0.0;
- mismatch_nframes_ = 0;
- // Disable dropped frames.
- cfg_.rc_dropframe_thresh = 0;
- // Starting bitrate low.
- cfg_.rc_target_bitrate = 80;
- ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-
- unsigned int last_w = cfg_.g_w;
- unsigned int last_h = cfg_.g_h;
- int resize_count = 0;
- for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
- info != frame_info_list_.end(); ++info) {
- if (info->w != last_w || info->h != last_h) {
- resize_count++;
- if (resize_count == 1) {
- // Verify that resize down occurs.
- ASSERT_LT(info->w, last_w);
- ASSERT_LT(info->h, last_h);
- } else if (resize_count == 2) {
- // Verify that resize up occurs.
- ASSERT_GT(info->w, last_w);
- ASSERT_GT(info->h, last_h);
- }
- last_w = info->w;
- last_h = info->h;
- }
- }
-
-#if CONFIG_AV1_DECODER
- // Verify that we get 2 resize events in this test.
- ASSERT_EQ(resize_count, 2) << "Resizing should occur twice.";
- EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
-#else
- printf("Warning: AV1 decoder unavailable, unable to check resize count!\n");
-#endif
-}
-
-class ResizeCspTest : public ResizeTest {
- protected:
-#if WRITE_COMPRESSED_STREAM
- ResizeCspTest()
- : ResizeTest(), frame0_psnr_(0.0), outfile_(NULL), out_frames_(0) {}
-#else
- ResizeCspTest() : ResizeTest(), frame0_psnr_(0.0) {}
-#endif
-
- virtual ~ResizeCspTest() {}
-
- virtual void BeginPassHook(unsigned int /*pass*/) {
-#if WRITE_COMPRESSED_STREAM
- outfile_ = fopen("av11-2-05-cspchape.ivf", "wb");
-#endif
- }
-
- virtual void EndPassHook() {
-#if WRITE_COMPRESSED_STREAM
- if (outfile_) {
- if (!fseek(outfile_, 0, SEEK_SET))
- write_ivf_file_header(&cfg_, out_frames_, outfile_);
- fclose(outfile_);
- outfile_ = NULL;
- }
-#endif
- }
-
- virtual void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) {
- if (frame0_psnr_ == 0.) frame0_psnr_ = pkt->data.psnr.psnr[0];
- EXPECT_NEAR(pkt->data.psnr.psnr[0], frame0_psnr_, 2.0);
- }
-
-#if WRITE_COMPRESSED_STREAM
- virtual void FramePktHook(const aom_codec_cx_pkt_t *pkt) {
- ++out_frames_;
-
- // Write initial file header if first frame.
- if (pkt->data.frame.pts == 0) write_ivf_file_header(&cfg_, 0, outfile_);
-
- // Write frame header and data.
- write_ivf_frame_header(pkt, outfile_);
- (void)fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile_);
- }
-#endif
-
- double frame0_psnr_;
-#if WRITE_COMPRESSED_STREAM
- FILE *outfile_;
- unsigned int out_frames_;
-#endif
-};
-
-class ResizingCspVideoSource : public ::libaom_test::DummyVideoSource {
- public:
- explicit ResizingCspVideoSource(aom_img_fmt_t image_format) {
- SetSize(kInitialWidth, kInitialHeight);
- SetImageFormat(image_format);
- limit_ = 30;
- }
-
- virtual ~ResizingCspVideoSource() {}
-};
-
-#if (defined(DISABLE_TRELLISQ_SEARCH) && DISABLE_TRELLISQ_SEARCH)
-TEST_P(ResizeCspTest, DISABLED_TestResizeCspWorks) {
-#else
-TEST_P(ResizeCspTest, TestResizeCspWorks) {
-#endif
- const aom_img_fmt_t image_formats[] = { AOM_IMG_FMT_I420, AOM_IMG_FMT_I444 };
- for (size_t i = 0; i < GTEST_ARRAY_SIZE_(image_formats); ++i) {
- ResizingCspVideoSource video(image_formats[i]);
- init_flags_ = AOM_CODEC_USE_PSNR;
- cfg_.rc_min_quantizer = cfg_.rc_max_quantizer = 48;
- cfg_.g_lag_in_frames = 0;
- cfg_.g_profile = (image_formats[i] == AOM_IMG_FMT_I420) ? 0 : 1;
- ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-
- // Check we decoded the same number of frames as we attempted to encode
- ASSERT_EQ(frame_info_list_.size(), video.limit());
- frame_info_list_.clear();
- }
-}
-
-AV1_INSTANTIATE_TEST_CASE(ResizeTest,
- ::testing::Values(::libaom_test::kRealTime));
-AV1_INSTANTIATE_TEST_CASE(ResizeInternalTestLarge,
- ::testing::Values(::libaom_test::kOnePassGood));
-AV1_INSTANTIATE_TEST_CASE(ResizeRealtimeTest,
- ::testing::Values(::libaom_test::kRealTime),
- ::testing::Range(5, 9));
-AV1_INSTANTIATE_TEST_CASE(ResizeCspTest,
- ::testing::Values(::libaom_test::kRealTime));
-} // namespace
diff --git a/third_party/aom/test/run_encodes.sh b/third_party/aom/test/run_encodes.sh
deleted file mode 100755
index 2096d8b15..000000000
--- a/third_party/aom/test/run_encodes.sh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/bash
-#
-# Copyright (c) 2016, Alliance for Open Media. All rights reserved.
-#
-# This source code is subject to the terms of the BSD 2 Clause License and
-# the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-# was not distributed with this source code in the LICENSE file, you can
-# obtain it at www.aomedia.org/license/software. If the Alliance for Open
-# Media Patent License 1.0 was not distributed with this source code in the
-# PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-#
-# Author: jimbankoski@google.com (Jim Bankoski)
-
-if [[ $# -ne 4 ]]; then
- echo Encodes all the y4m files in the directory at the bitrates specified by
- echo the first 3 parameters and stores the results in a subdirectory named by
- echo the 4th parameter:
- echo
- echo Usage: run_encodes.sh start-kbps end-kbps step-kbps output-directory
- echo Example: run_encodes.sh 200 500 50 baseline
- exit
-fi
-
-s=$1
-e=$2
-step=$3
-newdir=$4
-
-for i in ./*y4m; do
- for (( b=$s; b<= $e; b+= $step ))
- do
- best_encode.sh $i $b
- done
- mv opsnr.stt $i.stt
-done
-
-mkdir $newdir
-mv *.stt $newdir
-mv *.webm $newdir
diff --git a/third_party/aom/test/sad_test.cc b/third_party/aom/test/sad_test.cc
deleted file mode 100644
index 845fe79da..000000000
--- a/third_party/aom/test/sad_test.cc
+++ /dev/null
@@ -1,1528 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <string.h>
-#include <limits.h>
-#include <stdio.h>
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
-#include "aom/aom_codec.h"
-#include "aom_mem/aom_mem.h"
-#include "aom_ports/mem.h"
-
-typedef unsigned int (*SadMxNFunc)(const uint8_t *src_ptr, int src_stride,
- const uint8_t *ref_ptr, int ref_stride);
-typedef ::testing::tuple<int, int, SadMxNFunc, int> SadMxNParam;
-
-typedef uint32_t (*SadMxNAvgFunc)(const uint8_t *src_ptr, int src_stride,
- const uint8_t *ref_ptr, int ref_stride,
- const uint8_t *second_pred);
-typedef ::testing::tuple<int, int, SadMxNAvgFunc, int> SadMxNAvgParam;
-
-typedef void (*JntCompAvgFunc)(uint8_t *comp_pred, const uint8_t *pred,
- int width, int height, const uint8_t *ref,
- int ref_stride,
- const JNT_COMP_PARAMS *jcp_param);
-typedef ::testing::tuple<int, int, JntCompAvgFunc, int> JntCompAvgParam;
-
-typedef unsigned int (*JntSadMxhFunc)(const uint8_t *src_ptr, int src_stride,
- const uint8_t *ref_ptr, int ref_stride,
- int width, int height);
-typedef ::testing::tuple<int, int, JntSadMxhFunc, int> JntSadMxhParam;
-
-typedef uint32_t (*JntSadMxNAvgFunc)(const uint8_t *src_ptr, int src_stride,
- const uint8_t *ref_ptr, int ref_stride,
- const uint8_t *second_pred,
- const JNT_COMP_PARAMS *jcp_param);
-typedef ::testing::tuple<int, int, JntSadMxNAvgFunc, int> JntSadMxNAvgParam;
-
-typedef void (*SadMxNx4Func)(const uint8_t *src_ptr, int src_stride,
- const uint8_t *const ref_ptr[], int ref_stride,
- uint32_t *sad_array);
-typedef ::testing::tuple<int, int, SadMxNx4Func, int> SadMxNx4Param;
-
-using libaom_test::ACMRandom;
-
-namespace {
-class SADTestBase : public ::testing::Test {
- public:
- SADTestBase(int width, int height, int bit_depth)
- : width_(width), height_(height), bd_(bit_depth) {}
-
- static void SetUpTestCase() {
- source_data8_ = reinterpret_cast<uint8_t *>(
- aom_memalign(kDataAlignment, kDataBlockSize));
- reference_data8_ = reinterpret_cast<uint8_t *>(
- aom_memalign(kDataAlignment, kDataBufferSize));
- second_pred8_ =
- reinterpret_cast<uint8_t *>(aom_memalign(kDataAlignment, 128 * 128));
- comp_pred8_ =
- reinterpret_cast<uint8_t *>(aom_memalign(kDataAlignment, 128 * 128));
- comp_pred8_test_ =
- reinterpret_cast<uint8_t *>(aom_memalign(kDataAlignment, 128 * 128));
- source_data16_ = reinterpret_cast<uint16_t *>(
- aom_memalign(kDataAlignment, kDataBlockSize * sizeof(uint16_t)));
- reference_data16_ = reinterpret_cast<uint16_t *>(
- aom_memalign(kDataAlignment, kDataBufferSize * sizeof(uint16_t)));
- second_pred16_ = reinterpret_cast<uint16_t *>(
- aom_memalign(kDataAlignment, 128 * 128 * sizeof(uint16_t)));
- comp_pred16_ = reinterpret_cast<uint16_t *>(
- aom_memalign(kDataAlignment, 128 * 128 * sizeof(uint16_t)));
- comp_pred16_test_ = reinterpret_cast<uint16_t *>(
- aom_memalign(kDataAlignment, 128 * 128 * sizeof(uint16_t)));
- }
-
- static void TearDownTestCase() {
- aom_free(source_data8_);
- source_data8_ = NULL;
- aom_free(reference_data8_);
- reference_data8_ = NULL;
- aom_free(second_pred8_);
- second_pred8_ = NULL;
- aom_free(comp_pred8_);
- comp_pred8_ = NULL;
- aom_free(comp_pred8_test_);
- comp_pred8_test_ = NULL;
- aom_free(source_data16_);
- source_data16_ = NULL;
- aom_free(reference_data16_);
- reference_data16_ = NULL;
- aom_free(second_pred16_);
- second_pred16_ = NULL;
- aom_free(comp_pred16_);
- comp_pred16_ = NULL;
- aom_free(comp_pred16_test_);
- comp_pred16_test_ = NULL;
- }
-
- virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
- // Handle up to 4 128x128 blocks, with stride up to 256
- static const int kDataAlignment = 16;
- static const int kDataBlockSize = 128 * 256;
- static const int kDataBufferSize = 4 * kDataBlockSize;
-
- virtual void SetUp() {
- if (bd_ == -1) {
- use_high_bit_depth_ = false;
- bit_depth_ = AOM_BITS_8;
- source_data_ = source_data8_;
- reference_data_ = reference_data8_;
- second_pred_ = second_pred8_;
- comp_pred_ = comp_pred8_;
- comp_pred_test_ = comp_pred8_test_;
- } else {
- use_high_bit_depth_ = true;
- bit_depth_ = static_cast<aom_bit_depth_t>(bd_);
- source_data_ = CONVERT_TO_BYTEPTR(source_data16_);
- reference_data_ = CONVERT_TO_BYTEPTR(reference_data16_);
- second_pred_ = CONVERT_TO_BYTEPTR(second_pred16_);
- comp_pred_ = CONVERT_TO_BYTEPTR(comp_pred16_);
- comp_pred_test_ = CONVERT_TO_BYTEPTR(comp_pred16_test_);
- }
- mask_ = (1 << bit_depth_) - 1;
- source_stride_ = (width_ + 31) & ~31;
- reference_stride_ = width_ * 2;
- rnd_.Reset(ACMRandom::DeterministicSeed());
- }
-
- virtual uint8_t *GetReference(int block_idx) {
- if (use_high_bit_depth_)
- return CONVERT_TO_BYTEPTR(CONVERT_TO_SHORTPTR(reference_data_) +
- block_idx * kDataBlockSize);
- return reference_data_ + block_idx * kDataBlockSize;
- }
-
- // Sum of Absolute Differences. Given two blocks, calculate the absolute
- // difference between two pixels in the same relative location; accumulate.
- unsigned int ReferenceSAD(int block_idx) {
- unsigned int sad = 0;
- const uint8_t *const reference8 = GetReference(block_idx);
- const uint8_t *const source8 = source_data_;
- const uint16_t *const reference16 =
- CONVERT_TO_SHORTPTR(GetReference(block_idx));
- const uint16_t *const source16 = CONVERT_TO_SHORTPTR(source_data_);
- for (int h = 0; h < height_; ++h) {
- for (int w = 0; w < width_; ++w) {
- if (!use_high_bit_depth_) {
- sad += abs(source8[h * source_stride_ + w] -
- reference8[h * reference_stride_ + w]);
- } else {
- sad += abs(source16[h * source_stride_ + w] -
- reference16[h * reference_stride_ + w]);
- }
- }
- }
- return sad;
- }
-
- // Sum of Absolute Differences Average. Given two blocks, and a prediction
- // calculate the absolute difference between one pixel and average of the
- // corresponding and predicted pixels; accumulate.
- unsigned int ReferenceSADavg(int block_idx) {
- unsigned int sad = 0;
- const uint8_t *const reference8 = GetReference(block_idx);
- const uint8_t *const source8 = source_data_;
- const uint8_t *const second_pred8 = second_pred_;
- const uint16_t *const reference16 =
- CONVERT_TO_SHORTPTR(GetReference(block_idx));
- const uint16_t *const source16 = CONVERT_TO_SHORTPTR(source_data_);
- const uint16_t *const second_pred16 = CONVERT_TO_SHORTPTR(second_pred_);
- for (int h = 0; h < height_; ++h) {
- for (int w = 0; w < width_; ++w) {
- if (!use_high_bit_depth_) {
- const int tmp = second_pred8[h * width_ + w] +
- reference8[h * reference_stride_ + w];
- const uint8_t comp_pred = ROUND_POWER_OF_TWO(tmp, 1);
- sad += abs(source8[h * source_stride_ + w] - comp_pred);
- } else {
- const int tmp = second_pred16[h * width_ + w] +
- reference16[h * reference_stride_ + w];
- const uint16_t comp_pred = ROUND_POWER_OF_TWO(tmp, 1);
- sad += abs(source16[h * source_stride_ + w] - comp_pred);
- }
- }
- }
- return sad;
- }
-
- void ReferenceJntCompAvg(int block_idx) {
- const uint8_t *const reference8 = GetReference(block_idx);
- const uint8_t *const second_pred8 = second_pred_;
- uint8_t *const comp_pred8 = comp_pred_;
- const uint16_t *const reference16 =
- CONVERT_TO_SHORTPTR(GetReference(block_idx));
- const uint16_t *const second_pred16 = CONVERT_TO_SHORTPTR(second_pred_);
- uint16_t *const comp_pred16 = CONVERT_TO_SHORTPTR(comp_pred_);
- for (int h = 0; h < height_; ++h) {
- for (int w = 0; w < width_; ++w) {
- if (!use_high_bit_depth_) {
- const int tmp =
- second_pred8[h * width_ + w] * jcp_param_.bck_offset +
- reference8[h * reference_stride_ + w] * jcp_param_.fwd_offset;
- comp_pred8[h * width_ + w] = ROUND_POWER_OF_TWO(tmp, 4);
- } else {
- const int tmp =
- second_pred16[h * width_ + w] * jcp_param_.bck_offset +
- reference16[h * reference_stride_ + w] * jcp_param_.fwd_offset;
- comp_pred16[h * width_ + w] = ROUND_POWER_OF_TWO(tmp, 4);
- }
- }
- }
- }
-
- unsigned int ReferenceJntSADavg(int block_idx) {
- unsigned int sad = 0;
- const uint8_t *const reference8 = GetReference(block_idx);
- const uint8_t *const source8 = source_data_;
- const uint8_t *const second_pred8 = second_pred_;
- const uint16_t *const reference16 =
- CONVERT_TO_SHORTPTR(GetReference(block_idx));
- const uint16_t *const source16 = CONVERT_TO_SHORTPTR(source_data_);
- const uint16_t *const second_pred16 = CONVERT_TO_SHORTPTR(second_pred_);
- for (int h = 0; h < height_; ++h) {
- for (int w = 0; w < width_; ++w) {
- if (!use_high_bit_depth_) {
- const int tmp =
- second_pred8[h * width_ + w] * jcp_param_.bck_offset +
- reference8[h * reference_stride_ + w] * jcp_param_.fwd_offset;
- const uint8_t comp_pred = ROUND_POWER_OF_TWO(tmp, 4);
- sad += abs(source8[h * source_stride_ + w] - comp_pred);
- } else {
- const int tmp =
- second_pred16[h * width_ + w] * jcp_param_.bck_offset +
- reference16[h * reference_stride_ + w] * jcp_param_.fwd_offset;
- const uint16_t comp_pred = ROUND_POWER_OF_TWO(tmp, 4);
- sad += abs(source16[h * source_stride_ + w] - comp_pred);
- }
- }
- }
- return sad;
- }
-
- void FillConstant(uint8_t *data, int stride, uint16_t fill_constant) {
- uint8_t *data8 = data;
- uint16_t *data16 = CONVERT_TO_SHORTPTR(data);
- for (int h = 0; h < height_; ++h) {
- for (int w = 0; w < width_; ++w) {
- if (!use_high_bit_depth_) {
- data8[h * stride + w] = static_cast<uint8_t>(fill_constant);
- } else {
- data16[h * stride + w] = fill_constant;
- }
- }
- }
- }
-
- void FillRandom(uint8_t *data, int stride) {
- uint8_t *data8 = data;
- uint16_t *data16 = CONVERT_TO_SHORTPTR(data);
- for (int h = 0; h < height_; ++h) {
- for (int w = 0; w < width_; ++w) {
- if (!use_high_bit_depth_) {
- data8[h * stride + w] = rnd_.Rand8();
- } else {
- data16[h * stride + w] = rnd_.Rand16() & mask_;
- }
- }
- }
- }
-
- int width_, height_, mask_, bd_;
- aom_bit_depth_t bit_depth_;
- static uint8_t *source_data_;
- static uint8_t *reference_data_;
- static uint8_t *second_pred_;
- int source_stride_;
- bool use_high_bit_depth_;
- static uint8_t *source_data8_;
- static uint8_t *reference_data8_;
- static uint8_t *second_pred8_;
- static uint16_t *source_data16_;
- static uint16_t *reference_data16_;
- static uint16_t *second_pred16_;
- int reference_stride_;
- static uint8_t *comp_pred_;
- static uint8_t *comp_pred8_;
- static uint16_t *comp_pred16_;
- static uint8_t *comp_pred_test_;
- static uint8_t *comp_pred8_test_;
- static uint16_t *comp_pred16_test_;
- JNT_COMP_PARAMS jcp_param_;
-
- ACMRandom rnd_;
-};
-
-class SADx4Test : public ::testing::WithParamInterface<SadMxNx4Param>,
- public SADTestBase {
- public:
- SADx4Test() : SADTestBase(GET_PARAM(0), GET_PARAM(1), GET_PARAM(3)) {}
-
- protected:
- void SADs(unsigned int *results) {
- const uint8_t *references[] = { GetReference(0), GetReference(1),
- GetReference(2), GetReference(3) };
-
- ASM_REGISTER_STATE_CHECK(GET_PARAM(2)(
- source_data_, source_stride_, references, reference_stride_, results));
- }
-
- void CheckSADs() {
- unsigned int reference_sad, exp_sad[4];
-
- SADs(exp_sad);
- for (int block = 0; block < 4; ++block) {
- reference_sad = ReferenceSAD(block);
-
- EXPECT_EQ(reference_sad, exp_sad[block]) << "block " << block;
- }
- }
-};
-
-class SADTest : public ::testing::WithParamInterface<SadMxNParam>,
- public SADTestBase {
- public:
- SADTest() : SADTestBase(GET_PARAM(0), GET_PARAM(1), GET_PARAM(3)) {}
-
- protected:
- unsigned int SAD(int block_idx) {
- unsigned int ret;
- const uint8_t *const reference = GetReference(block_idx);
-
- ASM_REGISTER_STATE_CHECK(ret = GET_PARAM(2)(source_data_, source_stride_,
- reference, reference_stride_));
- return ret;
- }
-
- void CheckSAD() {
- const unsigned int reference_sad = ReferenceSAD(0);
- const unsigned int exp_sad = SAD(0);
-
- ASSERT_EQ(reference_sad, exp_sad);
- }
-
- void SpeedSAD() {
- int test_count = 20000000;
- while (test_count > 0) {
- SAD(0);
- test_count -= 1;
- }
- }
-};
-
-class SADavgTest : public ::testing::WithParamInterface<SadMxNAvgParam>,
- public SADTestBase {
- public:
- SADavgTest() : SADTestBase(GET_PARAM(0), GET_PARAM(1), GET_PARAM(3)) {}
-
- protected:
- unsigned int SAD_avg(int block_idx) {
- unsigned int ret;
- const uint8_t *const reference = GetReference(block_idx);
-
- ASM_REGISTER_STATE_CHECK(ret = GET_PARAM(2)(source_data_, source_stride_,
- reference, reference_stride_,
- second_pred_));
- return ret;
- }
-
- void CheckSAD() {
- const unsigned int reference_sad = ReferenceSADavg(0);
- const unsigned int exp_sad = SAD_avg(0);
-
- ASSERT_EQ(reference_sad, exp_sad);
- }
-};
-
-class JntCompAvgTest : public ::testing::WithParamInterface<JntCompAvgParam>,
- public SADTestBase {
- public:
- JntCompAvgTest() : SADTestBase(GET_PARAM(0), GET_PARAM(1), GET_PARAM(3)) {}
-
- protected:
- void jnt_comp_avg(int block_idx) {
- const uint8_t *const reference = GetReference(block_idx);
-
- ASM_REGISTER_STATE_CHECK(GET_PARAM(2)(comp_pred_test_, second_pred_, width_,
- height_, reference, reference_stride_,
- &jcp_param_));
- }
-
- void CheckCompAvg() {
- for (int j = 0; j < 2; ++j) {
- for (int i = 0; i < 4; ++i) {
- jcp_param_.fwd_offset = quant_dist_lookup_table[j][i][0];
- jcp_param_.bck_offset = quant_dist_lookup_table[j][i][1];
-
- ReferenceJntCompAvg(0);
- jnt_comp_avg(0);
-
- for (int y = 0; y < height_; ++y)
- for (int x = 0; x < width_; ++x)
- ASSERT_EQ(comp_pred_[y * width_ + x],
- comp_pred_test_[y * width_ + x]);
- }
- }
- }
-};
-
-class JntSADTest : public ::testing::WithParamInterface<JntSadMxhParam>,
- public SADTestBase {
- public:
- JntSADTest() : SADTestBase(GET_PARAM(0), GET_PARAM(1), GET_PARAM(3)) {}
-
- protected:
- unsigned int SAD(int block_idx) {
- unsigned int ret;
- const uint8_t *const reference = GetReference(block_idx);
-
- ASM_REGISTER_STATE_CHECK(ret = GET_PARAM(2)(source_data_, source_stride_,
- reference, reference_stride_,
- GET_PARAM(0), GET_PARAM(1)));
- return ret;
- }
-
- void CheckSAD() {
- const unsigned int reference_sad = ReferenceSAD(0);
- const unsigned int exp_sad = SAD(0);
-
- ASSERT_EQ(reference_sad, exp_sad);
- }
-
- void SpeedSAD() {
- int test_count = 20000000;
- while (test_count > 0) {
- SAD(0);
- test_count -= 1;
- }
- }
-};
-
-class JntSADavgTest : public ::testing::WithParamInterface<JntSadMxNAvgParam>,
- public SADTestBase {
- public:
- JntSADavgTest() : SADTestBase(GET_PARAM(0), GET_PARAM(1), GET_PARAM(3)) {}
-
- protected:
- unsigned int jnt_SAD_avg(int block_idx) {
- unsigned int ret;
- const uint8_t *const reference = GetReference(block_idx);
-
- ASM_REGISTER_STATE_CHECK(ret = GET_PARAM(2)(source_data_, source_stride_,
- reference, reference_stride_,
- second_pred_, &jcp_param_));
- return ret;
- }
-
- void CheckSAD() {
- for (int j = 0; j < 2; ++j) {
- for (int i = 0; i < 4; ++i) {
- jcp_param_.fwd_offset = quant_dist_lookup_table[j][i][0];
- jcp_param_.bck_offset = quant_dist_lookup_table[j][i][1];
-
- const unsigned int reference_sad = ReferenceJntSADavg(0);
- const unsigned int exp_sad = jnt_SAD_avg(0);
-
- ASSERT_EQ(reference_sad, exp_sad);
- }
- }
- }
-};
-
-uint8_t *SADTestBase::source_data_ = NULL;
-uint8_t *SADTestBase::reference_data_ = NULL;
-uint8_t *SADTestBase::second_pred_ = NULL;
-uint8_t *SADTestBase::comp_pred_ = NULL;
-uint8_t *SADTestBase::comp_pred_test_ = NULL;
-uint8_t *SADTestBase::source_data8_ = NULL;
-uint8_t *SADTestBase::reference_data8_ = NULL;
-uint8_t *SADTestBase::second_pred8_ = NULL;
-uint8_t *SADTestBase::comp_pred8_ = NULL;
-uint8_t *SADTestBase::comp_pred8_test_ = NULL;
-uint16_t *SADTestBase::source_data16_ = NULL;
-uint16_t *SADTestBase::reference_data16_ = NULL;
-uint16_t *SADTestBase::second_pred16_ = NULL;
-uint16_t *SADTestBase::comp_pred16_ = NULL;
-uint16_t *SADTestBase::comp_pred16_test_ = NULL;
-
-TEST_P(SADTest, MaxRef) {
- FillConstant(source_data_, source_stride_, 0);
- FillConstant(reference_data_, reference_stride_, mask_);
- CheckSAD();
-}
-
-TEST_P(SADTest, MaxSrc) {
- FillConstant(source_data_, source_stride_, mask_);
- FillConstant(reference_data_, reference_stride_, 0);
- CheckSAD();
-}
-
-TEST_P(SADTest, ShortRef) {
- const int tmp_stride = reference_stride_;
- reference_stride_ >>= 1;
- FillRandom(source_data_, source_stride_);
- FillRandom(reference_data_, reference_stride_);
- CheckSAD();
- reference_stride_ = tmp_stride;
-}
-
-TEST_P(SADTest, UnalignedRef) {
- // The reference frame, but not the source frame, may be unaligned for
- // certain types of searches.
- const int tmp_stride = reference_stride_;
- reference_stride_ -= 1;
- FillRandom(source_data_, source_stride_);
- FillRandom(reference_data_, reference_stride_);
- CheckSAD();
- reference_stride_ = tmp_stride;
-}
-
-TEST_P(SADTest, ShortSrc) {
- const int tmp_stride = source_stride_;
- source_stride_ >>= 1;
- int test_count = 2000;
- while (test_count > 0) {
- FillRandom(source_data_, source_stride_);
- FillRandom(reference_data_, reference_stride_);
- CheckSAD();
- test_count -= 1;
- }
- source_stride_ = tmp_stride;
-}
-
-#define SPEED_TEST (0)
-#if SPEED_TEST
-TEST_P(SADTest, Speed) {
- const int tmp_stride = source_stride_;
- source_stride_ >>= 1;
- FillRandom(source_data_, source_stride_);
- FillRandom(reference_data_, reference_stride_);
- SpeedSAD();
- source_stride_ = tmp_stride;
-}
-#endif
-
-TEST_P(SADavgTest, MaxRef) {
- FillConstant(source_data_, source_stride_, 0);
- FillConstant(reference_data_, reference_stride_, mask_);
- FillConstant(second_pred_, width_, 0);
- CheckSAD();
-}
-TEST_P(SADavgTest, MaxSrc) {
- FillConstant(source_data_, source_stride_, mask_);
- FillConstant(reference_data_, reference_stride_, 0);
- FillConstant(second_pred_, width_, 0);
- CheckSAD();
-}
-
-TEST_P(SADavgTest, ShortRef) {
- const int tmp_stride = reference_stride_;
- reference_stride_ >>= 1;
- FillRandom(source_data_, source_stride_);
- FillRandom(reference_data_, reference_stride_);
- FillRandom(second_pred_, width_);
- CheckSAD();
- reference_stride_ = tmp_stride;
-}
-
-TEST_P(SADavgTest, UnalignedRef) {
- // The reference frame, but not the source frame, may be unaligned for
- // certain types of searches.
- const int tmp_stride = reference_stride_;
- reference_stride_ -= 1;
- FillRandom(source_data_, source_stride_);
- FillRandom(reference_data_, reference_stride_);
- FillRandom(second_pred_, width_);
- CheckSAD();
- reference_stride_ = tmp_stride;
-}
-
-TEST_P(SADavgTest, ShortSrc) {
- const int tmp_stride = source_stride_;
- source_stride_ >>= 1;
- int test_count = 2000;
- while (test_count > 0) {
- FillRandom(source_data_, source_stride_);
- FillRandom(reference_data_, reference_stride_);
- FillRandom(second_pred_, width_);
- CheckSAD();
- test_count -= 1;
- }
- source_stride_ = tmp_stride;
-}
-
-TEST_P(JntCompAvgTest, MaxRef) {
- FillConstant(reference_data_, reference_stride_, mask_);
- FillConstant(second_pred_, width_, 0);
- CheckCompAvg();
-}
-
-TEST_P(JntCompAvgTest, MaxSecondPred) {
- FillConstant(reference_data_, reference_stride_, 0);
- FillConstant(second_pred_, width_, mask_);
- CheckCompAvg();
-}
-
-TEST_P(JntCompAvgTest, ShortRef) {
- const int tmp_stride = reference_stride_;
- reference_stride_ >>= 1;
- FillRandom(reference_data_, reference_stride_);
- FillRandom(second_pred_, width_);
- CheckCompAvg();
- reference_stride_ = tmp_stride;
-}
-
-TEST_P(JntCompAvgTest, UnalignedRef) {
- // The reference frame, but not the source frame, may be unaligned for
- // certain types of searches.
- const int tmp_stride = reference_stride_;
- reference_stride_ -= 1;
- FillRandom(reference_data_, reference_stride_);
- FillRandom(second_pred_, width_);
- CheckCompAvg();
- reference_stride_ = tmp_stride;
-}
-
-TEST_P(JntSADTest, MaxRef) {
- FillConstant(source_data_, source_stride_, 0);
- FillConstant(reference_data_, reference_stride_, mask_);
- CheckSAD();
-}
-
-TEST_P(JntSADTest, MaxSrc) {
- FillConstant(source_data_, source_stride_, mask_);
- FillConstant(reference_data_, reference_stride_, 0);
- CheckSAD();
-}
-
-TEST_P(JntSADTest, ShortRef) {
- const int tmp_stride = reference_stride_;
- reference_stride_ >>= 1;
- FillRandom(source_data_, source_stride_);
- FillRandom(reference_data_, reference_stride_);
- CheckSAD();
- reference_stride_ = tmp_stride;
-}
-
-TEST_P(JntSADTest, UnalignedRef) {
- // The reference frame, but not the source frame, may be unaligned for
- // certain types of searches.
- const int tmp_stride = reference_stride_;
- reference_stride_ -= 1;
- FillRandom(source_data_, source_stride_);
- FillRandom(reference_data_, reference_stride_);
- CheckSAD();
- reference_stride_ = tmp_stride;
-}
-
-TEST_P(JntSADTest, ShortSrc) {
- const int tmp_stride = source_stride_;
- source_stride_ >>= 1;
- int test_count = 2000;
- while (test_count > 0) {
- FillRandom(source_data_, source_stride_);
- FillRandom(reference_data_, reference_stride_);
- CheckSAD();
- test_count -= 1;
- }
- source_stride_ = tmp_stride;
-}
-
-TEST_P(JntSADavgTest, MaxRef) {
- FillConstant(source_data_, source_stride_, 0);
- FillConstant(reference_data_, reference_stride_, mask_);
- FillConstant(second_pred_, width_, 0);
- CheckSAD();
-}
-TEST_P(JntSADavgTest, MaxSrc) {
- FillConstant(source_data_, source_stride_, mask_);
- FillConstant(reference_data_, reference_stride_, 0);
- FillConstant(second_pred_, width_, 0);
- CheckSAD();
-}
-
-TEST_P(JntSADavgTest, ShortRef) {
- const int tmp_stride = reference_stride_;
- reference_stride_ >>= 1;
- FillRandom(source_data_, source_stride_);
- FillRandom(reference_data_, reference_stride_);
- FillRandom(second_pred_, width_);
- CheckSAD();
- reference_stride_ = tmp_stride;
-}
-
-TEST_P(JntSADavgTest, UnalignedRef) {
- // The reference frame, but not the source frame, may be unaligned for
- // certain types of searches.
- const int tmp_stride = reference_stride_;
- reference_stride_ -= 1;
- FillRandom(source_data_, source_stride_);
- FillRandom(reference_data_, reference_stride_);
- FillRandom(second_pred_, width_);
- CheckSAD();
- reference_stride_ = tmp_stride;
-}
-
-TEST_P(JntSADavgTest, ShortSrc) {
- const int tmp_stride = source_stride_;
- source_stride_ >>= 1;
- int test_count = 2000;
- while (test_count > 0) {
- FillRandom(source_data_, source_stride_);
- FillRandom(reference_data_, reference_stride_);
- FillRandom(second_pred_, width_);
- CheckSAD();
- test_count -= 1;
- }
- source_stride_ = tmp_stride;
-}
-
-TEST_P(SADx4Test, MaxRef) {
- FillConstant(source_data_, source_stride_, 0);
- FillConstant(GetReference(0), reference_stride_, mask_);
- FillConstant(GetReference(1), reference_stride_, mask_);
- FillConstant(GetReference(2), reference_stride_, mask_);
- FillConstant(GetReference(3), reference_stride_, mask_);
- CheckSADs();
-}
-
-TEST_P(SADx4Test, MaxSrc) {
- FillConstant(source_data_, source_stride_, mask_);
- FillConstant(GetReference(0), reference_stride_, 0);
- FillConstant(GetReference(1), reference_stride_, 0);
- FillConstant(GetReference(2), reference_stride_, 0);
- FillConstant(GetReference(3), reference_stride_, 0);
- CheckSADs();
-}
-
-TEST_P(SADx4Test, ShortRef) {
- int tmp_stride = reference_stride_;
- reference_stride_ >>= 1;
- FillRandom(source_data_, source_stride_);
- FillRandom(GetReference(0), reference_stride_);
- FillRandom(GetReference(1), reference_stride_);
- FillRandom(GetReference(2), reference_stride_);
- FillRandom(GetReference(3), reference_stride_);
- CheckSADs();
- reference_stride_ = tmp_stride;
-}
-
-TEST_P(SADx4Test, UnalignedRef) {
- // The reference frame, but not the source frame, may be unaligned for
- // certain types of searches.
- int tmp_stride = reference_stride_;
- reference_stride_ -= 1;
- FillRandom(source_data_, source_stride_);
- FillRandom(GetReference(0), reference_stride_);
- FillRandom(GetReference(1), reference_stride_);
- FillRandom(GetReference(2), reference_stride_);
- FillRandom(GetReference(3), reference_stride_);
- CheckSADs();
- reference_stride_ = tmp_stride;
-}
-
-TEST_P(SADx4Test, ShortSrc) {
- int tmp_stride = source_stride_;
- source_stride_ >>= 1;
- int test_count = 1000;
- while (test_count > 0) {
- FillRandom(source_data_, source_stride_);
- FillRandom(GetReference(0), reference_stride_);
- FillRandom(GetReference(1), reference_stride_);
- FillRandom(GetReference(2), reference_stride_);
- FillRandom(GetReference(3), reference_stride_);
- CheckSADs();
- test_count -= 1;
- }
- source_stride_ = tmp_stride;
-}
-
-TEST_P(SADx4Test, SrcAlignedByWidth) {
- uint8_t *tmp_source_data = source_data_;
- source_data_ += width_;
- FillRandom(source_data_, source_stride_);
- FillRandom(GetReference(0), reference_stride_);
- FillRandom(GetReference(1), reference_stride_);
- FillRandom(GetReference(2), reference_stride_);
- FillRandom(GetReference(3), reference_stride_);
- CheckSADs();
- source_data_ = tmp_source_data;
-}
-
-using ::testing::make_tuple;
-
-//------------------------------------------------------------------------------
-// C functions
-const SadMxNParam c_tests[] = {
- make_tuple(128, 128, &aom_sad128x128_c, -1),
- make_tuple(128, 64, &aom_sad128x64_c, -1),
- make_tuple(64, 128, &aom_sad64x128_c, -1),
- make_tuple(64, 64, &aom_sad64x64_c, -1),
- make_tuple(64, 32, &aom_sad64x32_c, -1),
- make_tuple(32, 64, &aom_sad32x64_c, -1),
- make_tuple(32, 32, &aom_sad32x32_c, -1),
- make_tuple(32, 16, &aom_sad32x16_c, -1),
- make_tuple(16, 32, &aom_sad16x32_c, -1),
- make_tuple(16, 16, &aom_sad16x16_c, -1),
- make_tuple(16, 8, &aom_sad16x8_c, -1),
- make_tuple(8, 16, &aom_sad8x16_c, -1),
- make_tuple(8, 8, &aom_sad8x8_c, -1),
- make_tuple(8, 4, &aom_sad8x4_c, -1),
- make_tuple(4, 8, &aom_sad4x8_c, -1),
- make_tuple(4, 4, &aom_sad4x4_c, -1),
- make_tuple(128, 128, &aom_highbd_sad128x128_c, 8),
- make_tuple(128, 64, &aom_highbd_sad128x64_c, 8),
- make_tuple(64, 128, &aom_highbd_sad64x128_c, 8),
- make_tuple(64, 64, &aom_highbd_sad64x64_c, 8),
- make_tuple(64, 32, &aom_highbd_sad64x32_c, 8),
- make_tuple(32, 64, &aom_highbd_sad32x64_c, 8),
- make_tuple(32, 32, &aom_highbd_sad32x32_c, 8),
- make_tuple(32, 16, &aom_highbd_sad32x16_c, 8),
- make_tuple(16, 32, &aom_highbd_sad16x32_c, 8),
- make_tuple(16, 16, &aom_highbd_sad16x16_c, 8),
- make_tuple(16, 8, &aom_highbd_sad16x8_c, 8),
- make_tuple(8, 16, &aom_highbd_sad8x16_c, 8),
- make_tuple(8, 8, &aom_highbd_sad8x8_c, 8),
- make_tuple(8, 4, &aom_highbd_sad8x4_c, 8),
- make_tuple(4, 8, &aom_highbd_sad4x8_c, 8),
- make_tuple(4, 4, &aom_highbd_sad4x4_c, 8),
- make_tuple(128, 128, &aom_highbd_sad128x128_c, 10),
- make_tuple(128, 64, &aom_highbd_sad128x64_c, 10),
- make_tuple(64, 128, &aom_highbd_sad64x128_c, 10),
- make_tuple(64, 64, &aom_highbd_sad64x64_c, 10),
- make_tuple(64, 32, &aom_highbd_sad64x32_c, 10),
- make_tuple(32, 64, &aom_highbd_sad32x64_c, 10),
- make_tuple(32, 32, &aom_highbd_sad32x32_c, 10),
- make_tuple(32, 16, &aom_highbd_sad32x16_c, 10),
- make_tuple(16, 32, &aom_highbd_sad16x32_c, 10),
- make_tuple(16, 16, &aom_highbd_sad16x16_c, 10),
- make_tuple(16, 8, &aom_highbd_sad16x8_c, 10),
- make_tuple(8, 16, &aom_highbd_sad8x16_c, 10),
- make_tuple(8, 8, &aom_highbd_sad8x8_c, 10),
- make_tuple(8, 4, &aom_highbd_sad8x4_c, 10),
- make_tuple(4, 8, &aom_highbd_sad4x8_c, 10),
- make_tuple(4, 4, &aom_highbd_sad4x4_c, 10),
- make_tuple(128, 128, &aom_highbd_sad128x128_c, 12),
- make_tuple(128, 64, &aom_highbd_sad128x64_c, 12),
- make_tuple(64, 128, &aom_highbd_sad64x128_c, 12),
- make_tuple(64, 64, &aom_highbd_sad64x64_c, 12),
- make_tuple(64, 32, &aom_highbd_sad64x32_c, 12),
- make_tuple(32, 64, &aom_highbd_sad32x64_c, 12),
- make_tuple(32, 32, &aom_highbd_sad32x32_c, 12),
- make_tuple(32, 16, &aom_highbd_sad32x16_c, 12),
- make_tuple(16, 32, &aom_highbd_sad16x32_c, 12),
- make_tuple(16, 16, &aom_highbd_sad16x16_c, 12),
- make_tuple(16, 8, &aom_highbd_sad16x8_c, 12),
- make_tuple(8, 16, &aom_highbd_sad8x16_c, 12),
- make_tuple(8, 8, &aom_highbd_sad8x8_c, 12),
- make_tuple(8, 4, &aom_highbd_sad8x4_c, 12),
- make_tuple(4, 8, &aom_highbd_sad4x8_c, 12),
- make_tuple(4, 4, &aom_highbd_sad4x4_c, 12),
-};
-INSTANTIATE_TEST_CASE_P(C, SADTest, ::testing::ValuesIn(c_tests));
-
-const SadMxNAvgParam avg_c_tests[] = {
- make_tuple(128, 128, &aom_sad128x128_avg_c, -1),
- make_tuple(128, 64, &aom_sad128x64_avg_c, -1),
- make_tuple(64, 128, &aom_sad64x128_avg_c, -1),
- make_tuple(64, 64, &aom_sad64x64_avg_c, -1),
- make_tuple(64, 32, &aom_sad64x32_avg_c, -1),
- make_tuple(32, 64, &aom_sad32x64_avg_c, -1),
- make_tuple(32, 32, &aom_sad32x32_avg_c, -1),
- make_tuple(32, 16, &aom_sad32x16_avg_c, -1),
- make_tuple(16, 32, &aom_sad16x32_avg_c, -1),
- make_tuple(16, 16, &aom_sad16x16_avg_c, -1),
- make_tuple(16, 8, &aom_sad16x8_avg_c, -1),
- make_tuple(8, 16, &aom_sad8x16_avg_c, -1),
- make_tuple(8, 8, &aom_sad8x8_avg_c, -1),
- make_tuple(8, 4, &aom_sad8x4_avg_c, -1),
- make_tuple(4, 8, &aom_sad4x8_avg_c, -1),
- make_tuple(4, 4, &aom_sad4x4_avg_c, -1),
- make_tuple(128, 128, &aom_highbd_sad128x128_avg_c, 8),
- make_tuple(128, 64, &aom_highbd_sad128x64_avg_c, 8),
- make_tuple(64, 128, &aom_highbd_sad64x128_avg_c, 8),
- make_tuple(64, 64, &aom_highbd_sad64x64_avg_c, 8),
- make_tuple(64, 32, &aom_highbd_sad64x32_avg_c, 8),
- make_tuple(32, 64, &aom_highbd_sad32x64_avg_c, 8),
- make_tuple(32, 32, &aom_highbd_sad32x32_avg_c, 8),
- make_tuple(32, 16, &aom_highbd_sad32x16_avg_c, 8),
- make_tuple(16, 32, &aom_highbd_sad16x32_avg_c, 8),
- make_tuple(16, 16, &aom_highbd_sad16x16_avg_c, 8),
- make_tuple(16, 8, &aom_highbd_sad16x8_avg_c, 8),
- make_tuple(8, 16, &aom_highbd_sad8x16_avg_c, 8),
- make_tuple(8, 8, &aom_highbd_sad8x8_avg_c, 8),
- make_tuple(8, 4, &aom_highbd_sad8x4_avg_c, 8),
- make_tuple(4, 8, &aom_highbd_sad4x8_avg_c, 8),
- make_tuple(4, 4, &aom_highbd_sad4x4_avg_c, 8),
- make_tuple(128, 128, &aom_highbd_sad128x128_avg_c, 10),
- make_tuple(128, 64, &aom_highbd_sad128x64_avg_c, 10),
- make_tuple(64, 128, &aom_highbd_sad64x128_avg_c, 10),
- make_tuple(64, 64, &aom_highbd_sad64x64_avg_c, 10),
- make_tuple(64, 32, &aom_highbd_sad64x32_avg_c, 10),
- make_tuple(32, 64, &aom_highbd_sad32x64_avg_c, 10),
- make_tuple(32, 32, &aom_highbd_sad32x32_avg_c, 10),
- make_tuple(32, 16, &aom_highbd_sad32x16_avg_c, 10),
- make_tuple(16, 32, &aom_highbd_sad16x32_avg_c, 10),
- make_tuple(16, 16, &aom_highbd_sad16x16_avg_c, 10),
- make_tuple(16, 8, &aom_highbd_sad16x8_avg_c, 10),
- make_tuple(8, 16, &aom_highbd_sad8x16_avg_c, 10),
- make_tuple(8, 8, &aom_highbd_sad8x8_avg_c, 10),
- make_tuple(8, 4, &aom_highbd_sad8x4_avg_c, 10),
- make_tuple(4, 8, &aom_highbd_sad4x8_avg_c, 10),
- make_tuple(4, 4, &aom_highbd_sad4x4_avg_c, 10),
- make_tuple(128, 128, &aom_highbd_sad128x128_avg_c, 12),
- make_tuple(128, 64, &aom_highbd_sad128x64_avg_c, 12),
- make_tuple(64, 128, &aom_highbd_sad64x128_avg_c, 12),
- make_tuple(64, 64, &aom_highbd_sad64x64_avg_c, 12),
- make_tuple(64, 32, &aom_highbd_sad64x32_avg_c, 12),
- make_tuple(32, 64, &aom_highbd_sad32x64_avg_c, 12),
- make_tuple(32, 32, &aom_highbd_sad32x32_avg_c, 12),
- make_tuple(32, 16, &aom_highbd_sad32x16_avg_c, 12),
- make_tuple(16, 32, &aom_highbd_sad16x32_avg_c, 12),
- make_tuple(16, 16, &aom_highbd_sad16x16_avg_c, 12),
- make_tuple(16, 8, &aom_highbd_sad16x8_avg_c, 12),
- make_tuple(8, 16, &aom_highbd_sad8x16_avg_c, 12),
- make_tuple(8, 8, &aom_highbd_sad8x8_avg_c, 12),
- make_tuple(8, 4, &aom_highbd_sad8x4_avg_c, 12),
- make_tuple(4, 8, &aom_highbd_sad4x8_avg_c, 12),
- make_tuple(4, 4, &aom_highbd_sad4x4_avg_c, 12),
-};
-INSTANTIATE_TEST_CASE_P(C, SADavgTest, ::testing::ValuesIn(avg_c_tests));
-
-// TODO(chengchen): add highbd tests
-const JntCompAvgParam jnt_comp_avg_c_tests[] = {
- make_tuple(128, 128, &aom_jnt_comp_avg_pred_c, -1),
- make_tuple(128, 64, &aom_jnt_comp_avg_pred_c, -1),
- make_tuple(64, 128, &aom_jnt_comp_avg_pred_c, -1),
- make_tuple(64, 64, &aom_jnt_comp_avg_pred_c, -1),
- make_tuple(64, 32, &aom_jnt_comp_avg_pred_c, -1),
- make_tuple(32, 64, &aom_jnt_comp_avg_pred_c, -1),
- make_tuple(32, 32, &aom_jnt_comp_avg_pred_c, -1),
- make_tuple(32, 16, &aom_jnt_comp_avg_pred_c, -1),
- make_tuple(16, 32, &aom_jnt_comp_avg_pred_c, -1),
- make_tuple(16, 16, &aom_jnt_comp_avg_pred_c, -1),
- make_tuple(16, 8, &aom_jnt_comp_avg_pred_c, -1),
- make_tuple(8, 16, &aom_jnt_comp_avg_pred_c, -1),
- make_tuple(8, 8, &aom_jnt_comp_avg_pred_c, -1),
- make_tuple(8, 4, &aom_jnt_comp_avg_pred_c, -1),
- make_tuple(4, 8, &aom_jnt_comp_avg_pred_c, -1),
- make_tuple(4, 4, &aom_jnt_comp_avg_pred_c, -1),
-};
-
-INSTANTIATE_TEST_CASE_P(C, JntCompAvgTest,
- ::testing::ValuesIn(jnt_comp_avg_c_tests));
-
-const JntSadMxNAvgParam jnt_avg_c_tests[] = {
- make_tuple(128, 128, &aom_jnt_sad128x128_avg_c, -1),
- make_tuple(128, 64, &aom_jnt_sad128x64_avg_c, -1),
- make_tuple(64, 128, &aom_jnt_sad64x128_avg_c, -1),
- make_tuple(64, 64, &aom_jnt_sad64x64_avg_c, -1),
- make_tuple(64, 32, &aom_jnt_sad64x32_avg_c, -1),
- make_tuple(32, 64, &aom_jnt_sad32x64_avg_c, -1),
- make_tuple(32, 32, &aom_jnt_sad32x32_avg_c, -1),
- make_tuple(32, 16, &aom_jnt_sad32x16_avg_c, -1),
- make_tuple(16, 32, &aom_jnt_sad16x32_avg_c, -1),
- make_tuple(16, 16, &aom_jnt_sad16x16_avg_c, -1),
- make_tuple(16, 8, &aom_jnt_sad16x8_avg_c, -1),
- make_tuple(8, 16, &aom_jnt_sad8x16_avg_c, -1),
- make_tuple(8, 8, &aom_jnt_sad8x8_avg_c, -1),
- make_tuple(8, 4, &aom_jnt_sad8x4_avg_c, -1),
- make_tuple(4, 8, &aom_jnt_sad4x8_avg_c, -1),
- make_tuple(4, 4, &aom_jnt_sad4x4_avg_c, -1),
-};
-INSTANTIATE_TEST_CASE_P(C, JntSADavgTest, ::testing::ValuesIn(jnt_avg_c_tests));
-
-const SadMxNx4Param x4d_c_tests[] = {
- make_tuple(128, 128, &aom_sad128x128x4d_c, -1),
- make_tuple(128, 64, &aom_sad128x64x4d_c, -1),
- make_tuple(64, 128, &aom_sad64x128x4d_c, -1),
- make_tuple(64, 64, &aom_sad64x64x4d_c, -1),
- make_tuple(64, 32, &aom_sad64x32x4d_c, -1),
- make_tuple(32, 64, &aom_sad32x64x4d_c, -1),
- make_tuple(32, 32, &aom_sad32x32x4d_c, -1),
- make_tuple(32, 16, &aom_sad32x16x4d_c, -1),
- make_tuple(16, 32, &aom_sad16x32x4d_c, -1),
- make_tuple(16, 16, &aom_sad16x16x4d_c, -1),
- make_tuple(16, 8, &aom_sad16x8x4d_c, -1),
- make_tuple(8, 16, &aom_sad8x16x4d_c, -1),
- make_tuple(8, 8, &aom_sad8x8x4d_c, -1),
- make_tuple(8, 4, &aom_sad8x4x4d_c, -1),
- make_tuple(4, 8, &aom_sad4x8x4d_c, -1),
- make_tuple(4, 4, &aom_sad4x4x4d_c, -1),
- make_tuple(128, 128, &aom_highbd_sad128x128x4d_c, 8),
- make_tuple(128, 64, &aom_highbd_sad128x64x4d_c, 8),
- make_tuple(64, 128, &aom_highbd_sad64x128x4d_c, 8),
- make_tuple(64, 64, &aom_highbd_sad64x64x4d_c, 8),
- make_tuple(64, 32, &aom_highbd_sad64x32x4d_c, 8),
- make_tuple(32, 64, &aom_highbd_sad32x64x4d_c, 8),
- make_tuple(32, 32, &aom_highbd_sad32x32x4d_c, 8),
- make_tuple(32, 16, &aom_highbd_sad32x16x4d_c, 8),
- make_tuple(16, 32, &aom_highbd_sad16x32x4d_c, 8),
- make_tuple(16, 16, &aom_highbd_sad16x16x4d_c, 8),
- make_tuple(16, 8, &aom_highbd_sad16x8x4d_c, 8),
- make_tuple(8, 16, &aom_highbd_sad8x16x4d_c, 8),
- make_tuple(8, 8, &aom_highbd_sad8x8x4d_c, 8),
- make_tuple(8, 4, &aom_highbd_sad8x4x4d_c, 8),
- make_tuple(4, 8, &aom_highbd_sad4x8x4d_c, 8),
- make_tuple(4, 4, &aom_highbd_sad4x4x4d_c, 8),
- make_tuple(128, 128, &aom_highbd_sad128x128x4d_c, 10),
- make_tuple(128, 64, &aom_highbd_sad128x64x4d_c, 10),
- make_tuple(64, 128, &aom_highbd_sad64x128x4d_c, 10),
- make_tuple(64, 64, &aom_highbd_sad64x64x4d_c, 10),
- make_tuple(64, 32, &aom_highbd_sad64x32x4d_c, 10),
- make_tuple(32, 64, &aom_highbd_sad32x64x4d_c, 10),
- make_tuple(32, 32, &aom_highbd_sad32x32x4d_c, 10),
- make_tuple(32, 16, &aom_highbd_sad32x16x4d_c, 10),
- make_tuple(16, 32, &aom_highbd_sad16x32x4d_c, 10),
- make_tuple(16, 16, &aom_highbd_sad16x16x4d_c, 10),
- make_tuple(16, 8, &aom_highbd_sad16x8x4d_c, 10),
- make_tuple(8, 16, &aom_highbd_sad8x16x4d_c, 10),
- make_tuple(8, 8, &aom_highbd_sad8x8x4d_c, 10),
- make_tuple(8, 4, &aom_highbd_sad8x4x4d_c, 10),
- make_tuple(4, 8, &aom_highbd_sad4x8x4d_c, 10),
- make_tuple(4, 4, &aom_highbd_sad4x4x4d_c, 10),
- make_tuple(128, 128, &aom_highbd_sad128x128x4d_c, 12),
- make_tuple(128, 64, &aom_highbd_sad128x64x4d_c, 12),
- make_tuple(64, 128, &aom_highbd_sad64x128x4d_c, 12),
- make_tuple(64, 64, &aom_highbd_sad64x64x4d_c, 12),
- make_tuple(64, 32, &aom_highbd_sad64x32x4d_c, 12),
- make_tuple(32, 64, &aom_highbd_sad32x64x4d_c, 12),
- make_tuple(32, 32, &aom_highbd_sad32x32x4d_c, 12),
- make_tuple(32, 16, &aom_highbd_sad32x16x4d_c, 12),
- make_tuple(16, 32, &aom_highbd_sad16x32x4d_c, 12),
- make_tuple(16, 16, &aom_highbd_sad16x16x4d_c, 12),
- make_tuple(16, 8, &aom_highbd_sad16x8x4d_c, 12),
- make_tuple(8, 16, &aom_highbd_sad8x16x4d_c, 12),
- make_tuple(8, 8, &aom_highbd_sad8x8x4d_c, 12),
- make_tuple(8, 4, &aom_highbd_sad8x4x4d_c, 12),
- make_tuple(4, 8, &aom_highbd_sad4x8x4d_c, 12),
- make_tuple(4, 4, &aom_highbd_sad4x4x4d_c, 12),
-};
-INSTANTIATE_TEST_CASE_P(C, SADx4Test, ::testing::ValuesIn(x4d_c_tests));
-
-//------------------------------------------------------------------------------
-// ARM functions
-#if HAVE_NEON
-const SadMxNParam neon_tests[] = {
- make_tuple(64, 64, &aom_sad64x64_neon, -1),
- make_tuple(32, 32, &aom_sad32x32_neon, -1),
- make_tuple(16, 16, &aom_sad16x16_neon, -1),
- make_tuple(16, 8, &aom_sad16x8_neon, -1),
- make_tuple(8, 16, &aom_sad8x16_neon, -1),
- make_tuple(8, 8, &aom_sad8x8_neon, -1),
- make_tuple(4, 4, &aom_sad4x4_neon, -1),
-};
-INSTANTIATE_TEST_CASE_P(NEON, SADTest, ::testing::ValuesIn(neon_tests));
-
-const SadMxNx4Param x4d_neon_tests[] = {
- make_tuple(64, 64, &aom_sad64x64x4d_neon, -1),
- make_tuple(32, 32, &aom_sad32x32x4d_neon, -1),
- make_tuple(16, 16, &aom_sad16x16x4d_neon, -1),
-};
-INSTANTIATE_TEST_CASE_P(NEON, SADx4Test, ::testing::ValuesIn(x4d_neon_tests));
-#endif // HAVE_NEON
-
-//------------------------------------------------------------------------------
-// x86 functions
-#if HAVE_SSE2
-const SadMxNParam sse2_tests[] = {
- make_tuple(128, 128, &aom_sad128x128_sse2, -1),
- make_tuple(128, 64, &aom_sad128x64_sse2, -1),
- make_tuple(64, 128, &aom_sad64x128_sse2, -1),
- make_tuple(64, 64, &aom_sad64x64_sse2, -1),
- make_tuple(64, 32, &aom_sad64x32_sse2, -1),
- make_tuple(32, 64, &aom_sad32x64_sse2, -1),
- make_tuple(32, 32, &aom_sad32x32_sse2, -1),
- make_tuple(32, 16, &aom_sad32x16_sse2, -1),
- make_tuple(16, 32, &aom_sad16x32_sse2, -1),
- make_tuple(16, 16, &aom_sad16x16_sse2, -1),
- make_tuple(16, 8, &aom_sad16x8_sse2, -1),
- make_tuple(8, 16, &aom_sad8x16_sse2, -1),
- make_tuple(8, 8, &aom_sad8x8_sse2, -1),
- make_tuple(8, 4, &aom_sad8x4_sse2, -1),
- make_tuple(4, 8, &aom_sad4x8_sse2, -1),
- make_tuple(4, 4, &aom_sad4x4_sse2, -1),
- make_tuple(64, 64, &aom_highbd_sad64x64_sse2, 8),
- make_tuple(64, 32, &aom_highbd_sad64x32_sse2, 8),
- make_tuple(32, 64, &aom_highbd_sad32x64_sse2, 8),
- make_tuple(32, 32, &aom_highbd_sad32x32_sse2, 8),
- make_tuple(32, 16, &aom_highbd_sad32x16_sse2, 8),
- make_tuple(16, 32, &aom_highbd_sad16x32_sse2, 8),
- make_tuple(16, 16, &aom_highbd_sad16x16_sse2, 8),
- make_tuple(16, 8, &aom_highbd_sad16x8_sse2, 8),
- make_tuple(8, 16, &aom_highbd_sad8x16_sse2, 8),
- make_tuple(8, 8, &aom_highbd_sad8x8_sse2, 8),
- make_tuple(8, 4, &aom_highbd_sad8x4_sse2, 8),
- make_tuple(64, 64, &aom_highbd_sad64x64_sse2, 10),
- make_tuple(64, 32, &aom_highbd_sad64x32_sse2, 10),
- make_tuple(32, 64, &aom_highbd_sad32x64_sse2, 10),
- make_tuple(32, 32, &aom_highbd_sad32x32_sse2, 10),
- make_tuple(32, 16, &aom_highbd_sad32x16_sse2, 10),
- make_tuple(16, 32, &aom_highbd_sad16x32_sse2, 10),
- make_tuple(16, 16, &aom_highbd_sad16x16_sse2, 10),
- make_tuple(16, 8, &aom_highbd_sad16x8_sse2, 10),
- make_tuple(8, 16, &aom_highbd_sad8x16_sse2, 10),
- make_tuple(8, 8, &aom_highbd_sad8x8_sse2, 10),
- make_tuple(8, 4, &aom_highbd_sad8x4_sse2, 10),
- make_tuple(64, 64, &aom_highbd_sad64x64_sse2, 12),
- make_tuple(64, 32, &aom_highbd_sad64x32_sse2, 12),
- make_tuple(32, 64, &aom_highbd_sad32x64_sse2, 12),
- make_tuple(32, 32, &aom_highbd_sad32x32_sse2, 12),
- make_tuple(32, 16, &aom_highbd_sad32x16_sse2, 12),
- make_tuple(16, 32, &aom_highbd_sad16x32_sse2, 12),
- make_tuple(16, 16, &aom_highbd_sad16x16_sse2, 12),
- make_tuple(16, 8, &aom_highbd_sad16x8_sse2, 12),
- make_tuple(8, 16, &aom_highbd_sad8x16_sse2, 12),
- make_tuple(8, 8, &aom_highbd_sad8x8_sse2, 12),
- make_tuple(8, 4, &aom_highbd_sad8x4_sse2, 12),
-};
-INSTANTIATE_TEST_CASE_P(SSE2, SADTest, ::testing::ValuesIn(sse2_tests));
-
-const SadMxNAvgParam avg_sse2_tests[] = {
- make_tuple(128, 128, &aom_sad128x128_avg_sse2, -1),
- make_tuple(128, 64, &aom_sad128x64_avg_sse2, -1),
- make_tuple(64, 128, &aom_sad64x128_avg_sse2, -1),
- make_tuple(64, 64, &aom_sad64x64_avg_sse2, -1),
- make_tuple(64, 32, &aom_sad64x32_avg_sse2, -1),
- make_tuple(32, 64, &aom_sad32x64_avg_sse2, -1),
- make_tuple(32, 32, &aom_sad32x32_avg_sse2, -1),
- make_tuple(32, 16, &aom_sad32x16_avg_sse2, -1),
- make_tuple(16, 32, &aom_sad16x32_avg_sse2, -1),
- make_tuple(16, 16, &aom_sad16x16_avg_sse2, -1),
- make_tuple(16, 8, &aom_sad16x8_avg_sse2, -1),
- make_tuple(8, 16, &aom_sad8x16_avg_sse2, -1),
- make_tuple(8, 8, &aom_sad8x8_avg_sse2, -1),
- make_tuple(8, 4, &aom_sad8x4_avg_sse2, -1),
- make_tuple(4, 8, &aom_sad4x8_avg_sse2, -1),
- make_tuple(4, 4, &aom_sad4x4_avg_sse2, -1),
- make_tuple(64, 64, &aom_highbd_sad64x64_avg_sse2, 8),
- make_tuple(64, 32, &aom_highbd_sad64x32_avg_sse2, 8),
- make_tuple(32, 64, &aom_highbd_sad32x64_avg_sse2, 8),
- make_tuple(32, 32, &aom_highbd_sad32x32_avg_sse2, 8),
- make_tuple(32, 16, &aom_highbd_sad32x16_avg_sse2, 8),
- make_tuple(16, 32, &aom_highbd_sad16x32_avg_sse2, 8),
- make_tuple(16, 16, &aom_highbd_sad16x16_avg_sse2, 8),
- make_tuple(16, 8, &aom_highbd_sad16x8_avg_sse2, 8),
- make_tuple(8, 16, &aom_highbd_sad8x16_avg_sse2, 8),
- make_tuple(8, 8, &aom_highbd_sad8x8_avg_sse2, 8),
- make_tuple(8, 4, &aom_highbd_sad8x4_avg_sse2, 8),
- make_tuple(64, 64, &aom_highbd_sad64x64_avg_sse2, 10),
- make_tuple(64, 32, &aom_highbd_sad64x32_avg_sse2, 10),
- make_tuple(32, 64, &aom_highbd_sad32x64_avg_sse2, 10),
- make_tuple(32, 32, &aom_highbd_sad32x32_avg_sse2, 10),
- make_tuple(32, 16, &aom_highbd_sad32x16_avg_sse2, 10),
- make_tuple(16, 32, &aom_highbd_sad16x32_avg_sse2, 10),
- make_tuple(16, 16, &aom_highbd_sad16x16_avg_sse2, 10),
- make_tuple(16, 8, &aom_highbd_sad16x8_avg_sse2, 10),
- make_tuple(8, 16, &aom_highbd_sad8x16_avg_sse2, 10),
- make_tuple(8, 8, &aom_highbd_sad8x8_avg_sse2, 10),
- make_tuple(8, 4, &aom_highbd_sad8x4_avg_sse2, 10),
- make_tuple(64, 64, &aom_highbd_sad64x64_avg_sse2, 12),
- make_tuple(64, 32, &aom_highbd_sad64x32_avg_sse2, 12),
- make_tuple(32, 64, &aom_highbd_sad32x64_avg_sse2, 12),
- make_tuple(32, 32, &aom_highbd_sad32x32_avg_sse2, 12),
- make_tuple(32, 16, &aom_highbd_sad32x16_avg_sse2, 12),
- make_tuple(16, 32, &aom_highbd_sad16x32_avg_sse2, 12),
- make_tuple(16, 16, &aom_highbd_sad16x16_avg_sse2, 12),
- make_tuple(16, 8, &aom_highbd_sad16x8_avg_sse2, 12),
- make_tuple(8, 16, &aom_highbd_sad8x16_avg_sse2, 12),
- make_tuple(8, 8, &aom_highbd_sad8x8_avg_sse2, 12),
- make_tuple(8, 4, &aom_highbd_sad8x4_avg_sse2, 12),
-};
-INSTANTIATE_TEST_CASE_P(SSE2, SADavgTest, ::testing::ValuesIn(avg_sse2_tests));
-
-const SadMxNx4Param x4d_sse2_tests[] = {
- make_tuple(128, 128, &aom_sad128x128x4d_sse2, -1),
- make_tuple(128, 64, &aom_sad128x64x4d_sse2, -1),
- make_tuple(64, 128, &aom_sad64x128x4d_sse2, -1),
- make_tuple(64, 64, &aom_sad64x64x4d_sse2, -1),
- make_tuple(64, 32, &aom_sad64x32x4d_sse2, -1),
- make_tuple(32, 64, &aom_sad32x64x4d_sse2, -1),
- make_tuple(32, 32, &aom_sad32x32x4d_sse2, -1),
- make_tuple(32, 16, &aom_sad32x16x4d_sse2, -1),
- make_tuple(16, 32, &aom_sad16x32x4d_sse2, -1),
- make_tuple(16, 16, &aom_sad16x16x4d_sse2, -1),
- make_tuple(16, 8, &aom_sad16x8x4d_sse2, -1),
- make_tuple(8, 16, &aom_sad8x16x4d_sse2, -1),
- make_tuple(8, 8, &aom_sad8x8x4d_sse2, -1),
- make_tuple(8, 4, &aom_sad8x4x4d_sse2, -1),
- make_tuple(4, 8, &aom_sad4x8x4d_sse2, -1),
- make_tuple(4, 4, &aom_sad4x4x4d_sse2, -1),
- make_tuple(64, 64, &aom_highbd_sad64x64x4d_sse2, 8),
- make_tuple(64, 32, &aom_highbd_sad64x32x4d_sse2, 8),
- make_tuple(32, 64, &aom_highbd_sad32x64x4d_sse2, 8),
- make_tuple(32, 32, &aom_highbd_sad32x32x4d_sse2, 8),
- make_tuple(32, 16, &aom_highbd_sad32x16x4d_sse2, 8),
- make_tuple(16, 32, &aom_highbd_sad16x32x4d_sse2, 8),
- make_tuple(16, 16, &aom_highbd_sad16x16x4d_sse2, 8),
- make_tuple(16, 8, &aom_highbd_sad16x8x4d_sse2, 8),
- make_tuple(8, 16, &aom_highbd_sad8x16x4d_sse2, 8),
- make_tuple(8, 8, &aom_highbd_sad8x8x4d_sse2, 8),
- make_tuple(8, 4, &aom_highbd_sad8x4x4d_sse2, 8),
- make_tuple(4, 8, &aom_highbd_sad4x8x4d_sse2, 8),
- make_tuple(4, 4, &aom_highbd_sad4x4x4d_sse2, 8),
- make_tuple(64, 64, &aom_highbd_sad64x64x4d_sse2, 10),
- make_tuple(64, 32, &aom_highbd_sad64x32x4d_sse2, 10),
- make_tuple(32, 64, &aom_highbd_sad32x64x4d_sse2, 10),
- make_tuple(32, 32, &aom_highbd_sad32x32x4d_sse2, 10),
- make_tuple(32, 16, &aom_highbd_sad32x16x4d_sse2, 10),
- make_tuple(16, 32, &aom_highbd_sad16x32x4d_sse2, 10),
- make_tuple(16, 16, &aom_highbd_sad16x16x4d_sse2, 10),
- make_tuple(16, 8, &aom_highbd_sad16x8x4d_sse2, 10),
- make_tuple(8, 16, &aom_highbd_sad8x16x4d_sse2, 10),
- make_tuple(8, 8, &aom_highbd_sad8x8x4d_sse2, 10),
- make_tuple(8, 4, &aom_highbd_sad8x4x4d_sse2, 10),
- make_tuple(4, 8, &aom_highbd_sad4x8x4d_sse2, 10),
- make_tuple(4, 4, &aom_highbd_sad4x4x4d_sse2, 10),
- make_tuple(64, 64, &aom_highbd_sad64x64x4d_sse2, 12),
- make_tuple(64, 32, &aom_highbd_sad64x32x4d_sse2, 12),
- make_tuple(32, 64, &aom_highbd_sad32x64x4d_sse2, 12),
- make_tuple(32, 32, &aom_highbd_sad32x32x4d_sse2, 12),
- make_tuple(32, 16, &aom_highbd_sad32x16x4d_sse2, 12),
- make_tuple(16, 32, &aom_highbd_sad16x32x4d_sse2, 12),
- make_tuple(16, 16, &aom_highbd_sad16x16x4d_sse2, 12),
- make_tuple(16, 8, &aom_highbd_sad16x8x4d_sse2, 12),
- make_tuple(8, 16, &aom_highbd_sad8x16x4d_sse2, 12),
- make_tuple(8, 8, &aom_highbd_sad8x8x4d_sse2, 12),
- make_tuple(8, 4, &aom_highbd_sad8x4x4d_sse2, 12),
- make_tuple(4, 8, &aom_highbd_sad4x8x4d_sse2, 12),
- make_tuple(4, 4, &aom_highbd_sad4x4x4d_sse2, 12),
-};
-INSTANTIATE_TEST_CASE_P(SSE2, SADx4Test, ::testing::ValuesIn(x4d_sse2_tests));
-#endif // HAVE_SSE2
-
-#if HAVE_SSSE3
-// Note: These are named sse2, but part of ssse3 file and only built and linked
-// when ssse3 is enabled.
-const JntSadMxhParam jnt_sad_sse2_tests[] = {
- make_tuple(4, 4, &aom_sad4xh_sse2, -1),
- make_tuple(4, 8, &aom_sad4xh_sse2, -1),
- make_tuple(8, 4, &aom_sad8xh_sse2, -1),
- make_tuple(8, 8, &aom_sad8xh_sse2, -1),
- make_tuple(8, 16, &aom_sad8xh_sse2, -1),
- make_tuple(16, 8, &aom_sad16xh_sse2, -1),
- make_tuple(16, 16, &aom_sad16xh_sse2, -1),
- make_tuple(16, 32, &aom_sad16xh_sse2, -1),
- make_tuple(32, 16, &aom_sad32xh_sse2, -1),
- make_tuple(32, 32, &aom_sad32xh_sse2, -1),
- make_tuple(32, 64, &aom_sad32xh_sse2, -1),
- make_tuple(64, 32, &aom_sad64xh_sse2, -1),
- make_tuple(64, 64, &aom_sad64xh_sse2, -1),
- make_tuple(128, 128, &aom_sad128xh_sse2, -1),
- make_tuple(128, 64, &aom_sad128xh_sse2, -1),
- make_tuple(64, 128, &aom_sad64xh_sse2, -1),
- make_tuple(4, 16, &aom_sad4xh_sse2, -1),
- make_tuple(16, 4, &aom_sad16xh_sse2, -1),
- make_tuple(8, 32, &aom_sad8xh_sse2, -1),
- make_tuple(32, 8, &aom_sad32xh_sse2, -1),
- make_tuple(16, 64, &aom_sad16xh_sse2, -1),
- make_tuple(64, 16, &aom_sad64xh_sse2, -1),
-};
-INSTANTIATE_TEST_CASE_P(SSE2, JntSADTest,
- ::testing::ValuesIn(jnt_sad_sse2_tests));
-
-#endif // HAVE_SSSE3
-
-#if HAVE_SSE3
-// Only functions are x3, which do not have tests.
-#endif // HAVE_SSE3
-
-#if HAVE_SSSE3
-const JntCompAvgParam jnt_comp_avg_ssse3_tests[] = {
- make_tuple(128, 128, &aom_jnt_comp_avg_pred_ssse3, -1),
- make_tuple(128, 64, &aom_jnt_comp_avg_pred_ssse3, -1),
- make_tuple(64, 128, &aom_jnt_comp_avg_pred_ssse3, -1),
- make_tuple(64, 64, &aom_jnt_comp_avg_pred_ssse3, -1),
- make_tuple(64, 32, &aom_jnt_comp_avg_pred_ssse3, -1),
- make_tuple(32, 64, &aom_jnt_comp_avg_pred_ssse3, -1),
- make_tuple(32, 32, &aom_jnt_comp_avg_pred_ssse3, -1),
- make_tuple(32, 16, &aom_jnt_comp_avg_pred_ssse3, -1),
- make_tuple(16, 32, &aom_jnt_comp_avg_pred_ssse3, -1),
- make_tuple(16, 16, &aom_jnt_comp_avg_pred_ssse3, -1),
- make_tuple(16, 8, &aom_jnt_comp_avg_pred_ssse3, -1),
- make_tuple(8, 16, &aom_jnt_comp_avg_pred_ssse3, -1),
- make_tuple(8, 8, &aom_jnt_comp_avg_pred_ssse3, -1),
- make_tuple(8, 4, &aom_jnt_comp_avg_pred_ssse3, -1),
- make_tuple(4, 8, &aom_jnt_comp_avg_pred_ssse3, -1),
- make_tuple(4, 4, &aom_jnt_comp_avg_pred_ssse3, -1),
- make_tuple(16, 16, &aom_jnt_comp_avg_pred_ssse3, -1),
-};
-
-INSTANTIATE_TEST_CASE_P(SSSE3, JntCompAvgTest,
- ::testing::ValuesIn(jnt_comp_avg_ssse3_tests));
-
-const JntSadMxNAvgParam jnt_avg_ssse3_tests[] = {
- make_tuple(128, 128, &aom_jnt_sad128x128_avg_ssse3, -1),
- make_tuple(128, 64, &aom_jnt_sad128x64_avg_ssse3, -1),
- make_tuple(64, 128, &aom_jnt_sad64x128_avg_ssse3, -1),
- make_tuple(64, 64, &aom_jnt_sad64x64_avg_ssse3, -1),
- make_tuple(64, 32, &aom_jnt_sad64x32_avg_ssse3, -1),
- make_tuple(32, 64, &aom_jnt_sad32x64_avg_ssse3, -1),
- make_tuple(32, 32, &aom_jnt_sad32x32_avg_ssse3, -1),
- make_tuple(32, 16, &aom_jnt_sad32x16_avg_ssse3, -1),
- make_tuple(16, 32, &aom_jnt_sad16x32_avg_ssse3, -1),
- make_tuple(16, 16, &aom_jnt_sad16x16_avg_ssse3, -1),
- make_tuple(16, 8, &aom_jnt_sad16x8_avg_ssse3, -1),
- make_tuple(8, 16, &aom_jnt_sad8x16_avg_ssse3, -1),
- make_tuple(8, 8, &aom_jnt_sad8x8_avg_ssse3, -1),
- make_tuple(8, 4, &aom_jnt_sad8x4_avg_ssse3, -1),
- make_tuple(4, 8, &aom_jnt_sad4x8_avg_ssse3, -1),
- make_tuple(4, 4, &aom_jnt_sad4x4_avg_ssse3, -1),
-};
-INSTANTIATE_TEST_CASE_P(SSSE3, JntSADavgTest,
- ::testing::ValuesIn(jnt_avg_ssse3_tests));
-#endif // HAVE_SSSE3
-
-#if HAVE_SSE4_1
-// Only functions are x8, which do not have tests.
-#endif // HAVE_SSE4_1
-
-#if HAVE_AVX2
-const SadMxNParam avx2_tests[] = {
- make_tuple(64, 128, &aom_sad64x128_avx2, -1),
- make_tuple(128, 64, &aom_sad128x64_avx2, -1),
- make_tuple(128, 128, &aom_sad128x128_avx2, -1),
- make_tuple(64, 64, &aom_sad64x64_avx2, -1),
- make_tuple(64, 32, &aom_sad64x32_avx2, -1),
- make_tuple(32, 64, &aom_sad32x64_avx2, -1),
- make_tuple(32, 32, &aom_sad32x32_avx2, -1),
- make_tuple(32, 16, &aom_sad32x16_avx2, -1),
- make_tuple(128, 128, &aom_highbd_sad128x128_avx2, 8),
- make_tuple(128, 128, &aom_highbd_sad128x128_avx2, 10),
- make_tuple(128, 128, &aom_highbd_sad128x128_avx2, 12),
- make_tuple(128, 64, &aom_highbd_sad128x64_avx2, 8),
- make_tuple(128, 64, &aom_highbd_sad128x64_avx2, 10),
- make_tuple(128, 64, &aom_highbd_sad128x64_avx2, 12),
- make_tuple(64, 128, &aom_highbd_sad64x128_avx2, 8),
- make_tuple(64, 128, &aom_highbd_sad64x128_avx2, 10),
- make_tuple(64, 128, &aom_highbd_sad64x128_avx2, 12),
- make_tuple(64, 64, &aom_highbd_sad64x64_avx2, 8),
- make_tuple(64, 64, &aom_highbd_sad64x64_avx2, 10),
- make_tuple(64, 64, &aom_highbd_sad64x64_avx2, 12),
- make_tuple(64, 32, &aom_highbd_sad64x32_avx2, 8),
- make_tuple(64, 32, &aom_highbd_sad64x32_avx2, 10),
- make_tuple(64, 32, &aom_highbd_sad64x32_avx2, 12),
- make_tuple(32, 64, &aom_highbd_sad32x64_avx2, 8),
- make_tuple(32, 64, &aom_highbd_sad32x64_avx2, 10),
- make_tuple(32, 64, &aom_highbd_sad32x64_avx2, 12),
- make_tuple(32, 32, &aom_highbd_sad32x32_avx2, 8),
- make_tuple(32, 32, &aom_highbd_sad32x32_avx2, 10),
- make_tuple(32, 32, &aom_highbd_sad32x32_avx2, 12),
- make_tuple(32, 16, &aom_highbd_sad32x16_avx2, 8),
- make_tuple(32, 16, &aom_highbd_sad32x16_avx2, 10),
- make_tuple(32, 16, &aom_highbd_sad32x16_avx2, 12),
- make_tuple(16, 32, &aom_highbd_sad16x32_avx2, 8),
- make_tuple(16, 32, &aom_highbd_sad16x32_avx2, 10),
- make_tuple(16, 32, &aom_highbd_sad16x32_avx2, 12),
- make_tuple(16, 16, &aom_highbd_sad16x16_avx2, 8),
- make_tuple(16, 16, &aom_highbd_sad16x16_avx2, 10),
- make_tuple(16, 16, &aom_highbd_sad16x16_avx2, 12),
- make_tuple(16, 8, &aom_highbd_sad16x8_avx2, 8),
- make_tuple(16, 8, &aom_highbd_sad16x8_avx2, 10),
- make_tuple(16, 8, &aom_highbd_sad16x8_avx2, 12),
-};
-INSTANTIATE_TEST_CASE_P(AVX2, SADTest, ::testing::ValuesIn(avx2_tests));
-
-const SadMxNAvgParam avg_avx2_tests[] = {
- make_tuple(64, 128, &aom_sad64x128_avg_avx2, -1),
- make_tuple(128, 64, &aom_sad128x64_avg_avx2, -1),
- make_tuple(128, 128, &aom_sad128x128_avg_avx2, -1),
- make_tuple(64, 64, &aom_sad64x64_avg_avx2, -1),
- make_tuple(64, 32, &aom_sad64x32_avg_avx2, -1),
- make_tuple(32, 64, &aom_sad32x64_avg_avx2, -1),
- make_tuple(32, 32, &aom_sad32x32_avg_avx2, -1),
- make_tuple(32, 16, &aom_sad32x16_avg_avx2, -1),
- make_tuple(128, 128, &aom_highbd_sad128x128_avg_avx2, 8),
- make_tuple(128, 128, &aom_highbd_sad128x128_avg_avx2, 10),
- make_tuple(128, 128, &aom_highbd_sad128x128_avg_avx2, 12),
- make_tuple(128, 64, &aom_highbd_sad128x64_avg_avx2, 8),
- make_tuple(128, 64, &aom_highbd_sad128x64_avg_avx2, 10),
- make_tuple(128, 64, &aom_highbd_sad128x64_avg_avx2, 12),
- make_tuple(64, 128, &aom_highbd_sad64x128_avg_avx2, 8),
- make_tuple(64, 128, &aom_highbd_sad64x128_avg_avx2, 10),
- make_tuple(64, 128, &aom_highbd_sad64x128_avg_avx2, 12),
- make_tuple(64, 64, &aom_highbd_sad64x64_avg_avx2, 8),
- make_tuple(64, 64, &aom_highbd_sad64x64_avg_avx2, 10),
- make_tuple(64, 64, &aom_highbd_sad64x64_avg_avx2, 12),
- make_tuple(64, 32, &aom_highbd_sad64x32_avg_avx2, 8),
- make_tuple(64, 32, &aom_highbd_sad64x32_avg_avx2, 10),
- make_tuple(64, 32, &aom_highbd_sad64x32_avg_avx2, 12),
- make_tuple(32, 64, &aom_highbd_sad32x64_avg_avx2, 8),
- make_tuple(32, 64, &aom_highbd_sad32x64_avg_avx2, 10),
- make_tuple(32, 64, &aom_highbd_sad32x64_avg_avx2, 12),
- make_tuple(32, 32, &aom_highbd_sad32x32_avg_avx2, 8),
- make_tuple(32, 32, &aom_highbd_sad32x32_avg_avx2, 10),
- make_tuple(32, 32, &aom_highbd_sad32x32_avg_avx2, 12),
- make_tuple(32, 16, &aom_highbd_sad32x16_avg_avx2, 8),
- make_tuple(32, 16, &aom_highbd_sad32x16_avg_avx2, 10),
- make_tuple(32, 16, &aom_highbd_sad32x16_avg_avx2, 12),
- make_tuple(16, 32, &aom_highbd_sad16x32_avg_avx2, 8),
- make_tuple(16, 32, &aom_highbd_sad16x32_avg_avx2, 10),
- make_tuple(16, 32, &aom_highbd_sad16x32_avg_avx2, 12),
- make_tuple(16, 16, &aom_highbd_sad16x16_avg_avx2, 8),
- make_tuple(16, 16, &aom_highbd_sad16x16_avg_avx2, 10),
- make_tuple(16, 16, &aom_highbd_sad16x16_avg_avx2, 12),
- make_tuple(16, 8, &aom_highbd_sad16x8_avg_avx2, 8),
- make_tuple(16, 8, &aom_highbd_sad16x8_avg_avx2, 10),
- make_tuple(16, 8, &aom_highbd_sad16x8_avg_avx2, 12),
-};
-INSTANTIATE_TEST_CASE_P(AVX2, SADavgTest, ::testing::ValuesIn(avg_avx2_tests));
-
-const SadMxNx4Param x4d_avx2_tests[] = {
- make_tuple(64, 128, &aom_sad64x128x4d_avx2, -1),
- make_tuple(128, 64, &aom_sad128x64x4d_avx2, -1),
- make_tuple(128, 128, &aom_sad128x128x4d_avx2, -1),
- make_tuple(64, 64, &aom_sad64x64x4d_avx2, -1),
- make_tuple(32, 64, &aom_sad32x64x4d_avx2, -1),
- make_tuple(64, 32, &aom_sad64x32x4d_avx2, -1),
- make_tuple(32, 32, &aom_sad32x32x4d_avx2, -1),
- make_tuple(128, 128, &aom_highbd_sad128x128x4d_avx2, 8),
- make_tuple(128, 128, &aom_highbd_sad128x128x4d_avx2, 10),
- make_tuple(128, 128, &aom_highbd_sad128x128x4d_avx2, 12),
- make_tuple(128, 64, &aom_highbd_sad128x64x4d_avx2, 8),
- make_tuple(128, 64, &aom_highbd_sad128x64x4d_avx2, 10),
- make_tuple(128, 64, &aom_highbd_sad128x64x4d_avx2, 12),
- make_tuple(64, 128, &aom_highbd_sad64x128x4d_avx2, 8),
- make_tuple(64, 128, &aom_highbd_sad64x128x4d_avx2, 10),
- make_tuple(64, 128, &aom_highbd_sad64x128x4d_avx2, 12),
- make_tuple(64, 64, &aom_highbd_sad64x64x4d_avx2, 8),
- make_tuple(64, 64, &aom_highbd_sad64x64x4d_avx2, 10),
- make_tuple(64, 64, &aom_highbd_sad64x64x4d_avx2, 12),
- make_tuple(64, 32, &aom_highbd_sad64x32x4d_avx2, 8),
- make_tuple(64, 32, &aom_highbd_sad64x32x4d_avx2, 10),
- make_tuple(64, 32, &aom_highbd_sad64x32x4d_avx2, 12),
- make_tuple(32, 64, &aom_highbd_sad32x64x4d_avx2, 8),
- make_tuple(32, 64, &aom_highbd_sad32x64x4d_avx2, 10),
- make_tuple(32, 64, &aom_highbd_sad32x64x4d_avx2, 12),
- make_tuple(32, 32, &aom_highbd_sad32x32x4d_avx2, 8),
- make_tuple(32, 32, &aom_highbd_sad32x32x4d_avx2, 10),
- make_tuple(32, 32, &aom_highbd_sad32x32x4d_avx2, 12),
- make_tuple(32, 16, &aom_highbd_sad32x16x4d_avx2, 8),
- make_tuple(32, 16, &aom_highbd_sad32x16x4d_avx2, 10),
- make_tuple(32, 16, &aom_highbd_sad32x16x4d_avx2, 12),
- make_tuple(16, 32, &aom_highbd_sad16x32x4d_avx2, 8),
- make_tuple(16, 32, &aom_highbd_sad16x32x4d_avx2, 10),
- make_tuple(16, 32, &aom_highbd_sad16x32x4d_avx2, 12),
- make_tuple(16, 16, &aom_highbd_sad16x16x4d_avx2, 8),
- make_tuple(16, 16, &aom_highbd_sad16x16x4d_avx2, 10),
- make_tuple(16, 16, &aom_highbd_sad16x16x4d_avx2, 12),
- make_tuple(16, 8, &aom_highbd_sad16x8x4d_avx2, 8),
- make_tuple(16, 8, &aom_highbd_sad16x8x4d_avx2, 10),
- make_tuple(16, 8, &aom_highbd_sad16x8x4d_avx2, 12),
-};
-INSTANTIATE_TEST_CASE_P(AVX2, SADx4Test, ::testing::ValuesIn(x4d_avx2_tests));
-#endif // HAVE_AVX2
-
-//------------------------------------------------------------------------------
-// MIPS functions
-#if HAVE_MSA
-const SadMxNParam msa_tests[] = {
- make_tuple(64, 64, &aom_sad64x64_msa, -1),
- make_tuple(64, 32, &aom_sad64x32_msa, -1),
- make_tuple(32, 64, &aom_sad32x64_msa, -1),
- make_tuple(32, 32, &aom_sad32x32_msa, -1),
- make_tuple(32, 16, &aom_sad32x16_msa, -1),
- make_tuple(16, 32, &aom_sad16x32_msa, -1),
- make_tuple(16, 16, &aom_sad16x16_msa, -1),
- make_tuple(16, 8, &aom_sad16x8_msa, -1),
- make_tuple(8, 16, &aom_sad8x16_msa, -1),
- make_tuple(8, 8, &aom_sad8x8_msa, -1),
- make_tuple(8, 4, &aom_sad8x4_msa, -1),
- make_tuple(4, 8, &aom_sad4x8_msa, -1),
- make_tuple(4, 4, &aom_sad4x4_msa, -1),
-};
-INSTANTIATE_TEST_CASE_P(MSA, SADTest, ::testing::ValuesIn(msa_tests));
-
-const SadMxNAvgParam avg_msa_tests[] = {
- make_tuple(64, 64, &aom_sad64x64_avg_msa, -1),
- make_tuple(64, 32, &aom_sad64x32_avg_msa, -1),
- make_tuple(32, 64, &aom_sad32x64_avg_msa, -1),
- make_tuple(32, 32, &aom_sad32x32_avg_msa, -1),
- make_tuple(32, 16, &aom_sad32x16_avg_msa, -1),
- make_tuple(16, 32, &aom_sad16x32_avg_msa, -1),
- make_tuple(16, 16, &aom_sad16x16_avg_msa, -1),
- make_tuple(16, 8, &aom_sad16x8_avg_msa, -1),
- make_tuple(8, 16, &aom_sad8x16_avg_msa, -1),
- make_tuple(8, 8, &aom_sad8x8_avg_msa, -1),
- make_tuple(8, 4, &aom_sad8x4_avg_msa, -1),
- make_tuple(4, 8, &aom_sad4x8_avg_msa, -1),
- make_tuple(4, 4, &aom_sad4x4_avg_msa, -1),
-};
-INSTANTIATE_TEST_CASE_P(MSA, SADavgTest, ::testing::ValuesIn(avg_msa_tests));
-
-const SadMxNx4Param x4d_msa_tests[] = {
- make_tuple(64, 64, &aom_sad64x64x4d_msa, -1),
- make_tuple(64, 32, &aom_sad64x32x4d_msa, -1),
- make_tuple(32, 64, &aom_sad32x64x4d_msa, -1),
- make_tuple(32, 32, &aom_sad32x32x4d_msa, -1),
- make_tuple(32, 16, &aom_sad32x16x4d_msa, -1),
- make_tuple(16, 32, &aom_sad16x32x4d_msa, -1),
- make_tuple(16, 16, &aom_sad16x16x4d_msa, -1),
- make_tuple(16, 8, &aom_sad16x8x4d_msa, -1),
- make_tuple(8, 16, &aom_sad8x16x4d_msa, -1),
- make_tuple(8, 8, &aom_sad8x8x4d_msa, -1),
- make_tuple(8, 4, &aom_sad8x4x4d_msa, -1),
- make_tuple(4, 8, &aom_sad4x8x4d_msa, -1),
- make_tuple(4, 4, &aom_sad4x4x4d_msa, -1),
-};
-INSTANTIATE_TEST_CASE_P(MSA, SADx4Test, ::testing::ValuesIn(x4d_msa_tests));
-#endif // HAVE_MSA
-
-} // namespace
diff --git a/third_party/aom/test/scalability_test.cc b/third_party/aom/test/scalability_test.cc
deleted file mode 100644
index b39918861..000000000
--- a/third_party/aom/test/scalability_test.cc
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-#include "test/codec_factory.h"
-#include "test/encode_test_driver.h"
-#include "test/i420_video_source.h"
-#include "test/util.h"
-
-namespace {
-
-const int kCpuUsed = 8;
-const int kBaseLayerQp = 55;
-const int kEnhancementLayerQp = 20;
-
-class ScalabilityTest
- : public ::libaom_test::CodecTestWithParam<libaom_test::TestMode>,
- public ::libaom_test::EncoderTest {
- protected:
- ScalabilityTest() : EncoderTest(GET_PARAM(0)) {}
- virtual ~ScalabilityTest() {}
-
- virtual void SetUp() {
- InitializeConfig();
- SetMode(GET_PARAM(1));
- num_spatial_layers_ = 2;
- }
-
- virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
- ::libaom_test::Encoder *encoder) {
- if (video->frame() == 0) {
- encoder->Control(AOME_SET_CPUUSED, kCpuUsed);
- encoder->Control(AOME_SET_NUMBER_SPATIAL_LAYERS, num_spatial_layers_);
- } else if (video->frame() % num_spatial_layers_) {
- frame_flags_ = AOM_EFLAG_NO_REF_LAST2 | AOM_EFLAG_NO_REF_LAST3 |
- AOM_EFLAG_NO_REF_GF | AOM_EFLAG_NO_REF_ARF |
- AOM_EFLAG_NO_REF_BWD | AOM_EFLAG_NO_REF_ARF2 |
- AOM_EFLAG_NO_UPD_LAST | AOM_EFLAG_NO_UPD_GF |
- AOM_EFLAG_NO_UPD_ARF | AOM_EFLAG_NO_UPD_ENTROPY;
- encoder->Control(AOME_SET_SPATIAL_LAYER_ID, 1);
- encoder->Control(AOME_SET_CQ_LEVEL, kEnhancementLayerQp);
- } else {
- frame_flags_ = AOM_EFLAG_NO_REF_LAST2 | AOM_EFLAG_NO_REF_LAST3 |
- AOM_EFLAG_NO_REF_GF | AOM_EFLAG_NO_REF_ARF |
- AOM_EFLAG_NO_REF_BWD | AOM_EFLAG_NO_REF_ARF2 |
- AOM_EFLAG_NO_UPD_GF | AOM_EFLAG_NO_UPD_ARF |
- AOM_EFLAG_NO_UPD_ENTROPY;
- encoder->Control(AOME_SET_SPATIAL_LAYER_ID, 0);
- encoder->Control(AOME_SET_CQ_LEVEL, kBaseLayerQp);
- }
- }
-
- void DoTest(int num_spatial_layers) {
- num_spatial_layers_ = num_spatial_layers;
- cfg_.rc_end_usage = AOM_Q;
- cfg_.g_lag_in_frames = 0;
-
- ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352,
- 288, 30, 1, 0, 18);
- ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
- }
-
- int num_spatial_layers_;
-};
-
-TEST_P(ScalabilityTest, TestNoMismatch2SpatialLayers) { DoTest(2); }
-
-TEST_P(ScalabilityTest, TestNoMismatch3SpatialLayers) { DoTest(3); }
-
-AV1_INSTANTIATE_TEST_CASE(ScalabilityTest,
- ::testing::Values(::libaom_test::kRealTime));
-
-} // namespace
diff --git a/third_party/aom/test/scan_test.cc b/third_party/aom/test/scan_test.cc
deleted file mode 100644
index dee2ab5a6..000000000
--- a/third_party/aom/test/scan_test.cc
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-#include "av1/common/scan.h"
-#include "av1/common/txb_common.h"
-#include "test/av1_txfm_test.h"
-
-static int scan_test(const int16_t *scan, const int16_t *iscan, int si, int r,
- int c, int w) {
- if (iscan[r * w + c] != si || scan[si] != r * w + c) {
- printf("r %d c %d ref_iscan %d iscan %d ref_scan %d scan %d\n", r, c, si,
- iscan[r * w + c], r * w + c, scan[si]);
- return 1;
- } else {
- return 0;
- }
-}
-
-int scan_order_test(const SCAN_ORDER *scan_order, int w, int h,
- SCAN_MODE mode) {
- const int16_t *scan = scan_order->scan;
- const int16_t *iscan = scan_order->iscan;
- int dim = w + h - 1;
- if (mode == SCAN_MODE_ZIG_ZAG) {
- int si = 0;
- for (int i = 0; i < dim; ++i) {
- if (i % 2 == 0) {
- for (int c = 0; c < w; ++c) {
- int r = i - c;
- if (r >= 0 && r < h) {
- if (scan_test(scan, iscan, si, r, c, w)) return 1;
- ++si;
- }
- }
- } else {
- for (int r = 0; r < h; ++r) {
- int c = i - r;
- if (c >= 0 && c < w) {
- if (scan_test(scan, iscan, si, r, c, w)) return 1;
- ++si;
- }
- }
- }
- }
- } else if (mode == SCAN_MODE_COL_DIAG) {
- int si = 0;
- for (int i = 0; i < dim; ++i) {
- for (int c = 0; c < w; ++c) {
- int r = i - c;
- if (r >= 0 && r < h) {
- if (scan_test(scan, iscan, si, r, c, w)) return 1;
- ++si;
- }
- }
- }
- } else if (mode == SCAN_MODE_ROW_DIAG) {
- int si = 0;
- for (int i = 0; i < dim; ++i) {
- for (int r = 0; r < h; ++r) {
- int c = i - r;
- if (c >= 0 && c < w) {
- if (scan_test(scan, iscan, si, r, c, w)) return 1;
- ++si;
- }
- }
- }
- } else if (mode == SCAN_MODE_ROW_1D) {
- int si = 0;
- for (int r = 0; r < h; ++r) {
- for (int c = 0; c < w; ++c) {
- if (scan_test(scan, iscan, si, r, c, w)) return 1;
- ++si;
- }
- }
- } else {
- assert(mode == SCAN_MODE_COL_1D);
- int si = 0;
- for (int c = 0; c < w; ++c) {
- for (int r = 0; r < h; ++r) {
- if (scan_test(scan, iscan, si, r, c, w)) return 1;
- ++si;
- }
- }
- }
- return 0;
-}
-
-TEST(Av1ScanTest, Dependency) {
- for (int tx_size = TX_4X4; tx_size < TX_SIZES_ALL; ++tx_size) {
- const int org_rows = tx_size_high[(TX_SIZE)tx_size];
- const int org_cols = tx_size_wide[(TX_SIZE)tx_size];
- const int rows = get_txb_high((TX_SIZE)tx_size);
- const int cols = get_txb_wide((TX_SIZE)tx_size);
- for (int tx_type = 0; tx_type < TX_TYPES; ++tx_type) {
- if (libaom_test::IsTxSizeTypeValid(static_cast<TX_SIZE>(tx_size),
- static_cast<TX_TYPE>(tx_type)) ==
- false) {
- continue;
- }
- SCAN_MODE scan_mode;
- TX_CLASS tx_class = tx_type_to_class[(TX_TYPE)tx_type];
- if (tx_class == TX_CLASS_2D) {
- if (rows == cols) {
- scan_mode = SCAN_MODE_ZIG_ZAG;
- } else if (rows > cols) {
- scan_mode = SCAN_MODE_ROW_DIAG;
- } else {
- scan_mode = SCAN_MODE_COL_DIAG;
- }
- } else if (tx_class == TX_CLASS_VERT) {
- scan_mode = SCAN_MODE_ROW_1D;
- } else {
- assert(tx_class == TX_CLASS_HORIZ);
- scan_mode = SCAN_MODE_COL_1D;
- }
- const SCAN_ORDER *scan_order =
- get_default_scan((TX_SIZE)tx_size, (TX_TYPE)tx_type);
- ASSERT_EQ(scan_order_test(scan_order, cols, rows, scan_mode), 0)
- << "scan mismatch tx_class " << tx_class << " tx_type " << tx_type
- << " tx_w " << org_cols << " tx_h " << org_rows << " scan_mode "
- << scan_mode << "\n";
- }
- }
-}
diff --git a/third_party/aom/test/segment_binarization_sync.cc b/third_party/aom/test/segment_binarization_sync.cc
deleted file mode 100644
index bd8cf1141..000000000
--- a/third_party/aom/test/segment_binarization_sync.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-#include "test/acm_random.h"
-
-using libaom_test::ACMRandom;
-
-extern "C" {
-int av1_neg_interleave(int x, int ref, int max);
-int av1_neg_deinterleave(int diff, int ref, int max);
-}
-
-namespace {
-
-struct Segment {
- int id;
- int pred;
- int last_id;
-};
-
-Segment GenerateSegment(int seed) {
- static const int MAX_SEGMENTS = 8;
-
- ACMRandom rnd_(seed);
-
- Segment segment;
- const int last_segid = rnd_.PseudoUniform(MAX_SEGMENTS);
- segment.last_id = last_segid;
- segment.pred = rnd_.PseudoUniform(MAX_SEGMENTS);
- segment.id = rnd_.PseudoUniform(last_segid + 1);
-
- return segment;
-}
-
-// Try to reveal a mismatch between segment binarization and debinarization
-TEST(SegmentBinarizationSync, SearchForBinarizationMismatch) {
- const int count_tests = 1000;
- const int seed_init = 4321;
-
- for (int i = 0; i < count_tests; ++i) {
- const Segment seg = GenerateSegment(seed_init + i);
-
- const int max_segid = seg.last_id + 1;
- const int seg_diff = av1_neg_interleave(seg.id, seg.pred, max_segid);
- const int decoded_segid =
- av1_neg_deinterleave(seg_diff, seg.pred, max_segid);
-
- ASSERT_EQ(decoded_segid, seg.id);
- }
-}
-
-} // namespace
diff --git a/third_party/aom/test/selfguided_filter_test.cc b/third_party/aom/test/selfguided_filter_test.cc
deleted file mode 100644
index d2d5c6105..000000000
--- a/third_party/aom/test/selfguided_filter_test.cc
+++ /dev/null
@@ -1,410 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <ctime>
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "config/av1_rtcd.h"
-
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
-
-#include "aom_ports/aom_timer.h"
-#include "av1/common/mv.h"
-#include "av1/common/restoration.h"
-
-namespace {
-
-using ::testing::make_tuple;
-using ::testing::tuple;
-using libaom_test::ACMRandom;
-
-typedef void (*SgrFunc)(const uint8_t *dat8, int width, int height, int stride,
- int eps, const int *xqd, uint8_t *dst8, int dst_stride,
- int32_t *tmpbuf, int bit_depth, int highbd);
-
-// Test parameter list:
-// <tst_fun_>
-typedef tuple<SgrFunc> FilterTestParam;
-
-class AV1SelfguidedFilterTest
- : public ::testing::TestWithParam<FilterTestParam> {
- public:
- virtual ~AV1SelfguidedFilterTest() {}
- virtual void SetUp() {}
-
- virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
- void RunSpeedTest() {
- tst_fun_ = GET_PARAM(0);
- const int pu_width = RESTORATION_PROC_UNIT_SIZE;
- const int pu_height = RESTORATION_PROC_UNIT_SIZE;
- const int width = 256, height = 256, stride = 288, out_stride = 288;
- const int NUM_ITERS = 2000;
- int i, j, k;
-
- uint8_t *input_ =
- (uint8_t *)aom_memalign(32, stride * (height + 32) * sizeof(uint8_t));
- uint8_t *output_ = (uint8_t *)aom_memalign(
- 32, out_stride * (height + 32) * sizeof(uint8_t));
- int32_t *tmpbuf = (int32_t *)aom_memalign(32, RESTORATION_TMPBUF_SIZE);
- uint8_t *input = input_ + stride * 16 + 16;
- uint8_t *output = output_ + out_stride * 16 + 16;
-
- ACMRandom rnd(ACMRandom::DeterministicSeed());
-
- for (i = -16; i < height + 16; ++i)
- for (j = -16; j < width + 16; ++j)
- input[i * stride + j] = rnd.Rand16() & 0xFF;
-
- int xqd[2] = { SGRPROJ_PRJ_MIN0 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 -
- SGRPROJ_PRJ_MIN0),
- SGRPROJ_PRJ_MIN1 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 -
- SGRPROJ_PRJ_MIN1) };
- // Fix a parameter set, since the speed depends slightly on r.
- // Change this to test different combinations of values of r.
- int eps = 15;
-
- av1_loop_restoration_precal();
-
- aom_usec_timer ref_timer;
- aom_usec_timer_start(&ref_timer);
- for (i = 0; i < NUM_ITERS; ++i) {
- for (k = 0; k < height; k += pu_height)
- for (j = 0; j < width; j += pu_width) {
- int w = AOMMIN(pu_width, width - j);
- int h = AOMMIN(pu_height, height - k);
- uint8_t *input_p = input + k * stride + j;
- uint8_t *output_p = output + k * out_stride + j;
- apply_selfguided_restoration_c(input_p, w, h, stride, eps, xqd,
- output_p, out_stride, tmpbuf, 8, 0);
- }
- }
- aom_usec_timer_mark(&ref_timer);
- const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);
-
- aom_usec_timer tst_timer;
- aom_usec_timer_start(&tst_timer);
- for (i = 0; i < NUM_ITERS; ++i) {
- for (k = 0; k < height; k += pu_height)
- for (j = 0; j < width; j += pu_width) {
- int w = AOMMIN(pu_width, width - j);
- int h = AOMMIN(pu_height, height - k);
- uint8_t *input_p = input + k * stride + j;
- uint8_t *output_p = output + k * out_stride + j;
- tst_fun_(input_p, w, h, stride, eps, xqd, output_p, out_stride,
- tmpbuf, 8, 0);
- }
- }
- aom_usec_timer_mark(&tst_timer);
- const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);
-
- std::cout << "[ ] C time = " << ref_time / 1000
- << " ms, SIMD time = " << tst_time / 1000 << " ms\n";
-
- EXPECT_GT(ref_time, tst_time)
- << "Error: AV1SelfguidedFilterTest.SpeedTest, SIMD slower than C.\n"
- << "C time: " << ref_time << " us\n"
- << "SIMD time: " << tst_time << " us\n";
-
- aom_free(input_);
- aom_free(output_);
- aom_free(tmpbuf);
- }
-
- void RunCorrectnessTest() {
- tst_fun_ = GET_PARAM(0);
- const int pu_width = RESTORATION_PROC_UNIT_SIZE;
- const int pu_height = RESTORATION_PROC_UNIT_SIZE;
- // Set the maximum width/height to test here. We actually test a small
- // range of sizes *up to* this size, so that we can check, eg.,
- // the behaviour on tiles which are not a multiple of 4 wide.
- const int max_w = 260, max_h = 260, stride = 672, out_stride = 672;
- const int NUM_ITERS = 81;
- int i, j, k;
-
- uint8_t *input_ =
- (uint8_t *)aom_memalign(32, stride * (max_h + 32) * sizeof(uint8_t));
- uint8_t *output_ = (uint8_t *)aom_memalign(
- 32, out_stride * (max_h + 32) * sizeof(uint8_t));
- uint8_t *output2_ = (uint8_t *)aom_memalign(
- 32, out_stride * (max_h + 32) * sizeof(uint8_t));
- int32_t *tmpbuf = (int32_t *)aom_memalign(32, RESTORATION_TMPBUF_SIZE);
-
- uint8_t *input = input_ + stride * 16 + 16;
- uint8_t *output = output_ + out_stride * 16 + 16;
- uint8_t *output2 = output2_ + out_stride * 16 + 16;
-
- ACMRandom rnd(ACMRandom::DeterministicSeed());
-
- av1_loop_restoration_precal();
-
- for (i = 0; i < NUM_ITERS; ++i) {
- for (j = -16; j < max_h + 16; ++j)
- for (k = -16; k < max_w + 16; ++k)
- input[j * stride + k] = rnd.Rand16() & 0xFF;
-
- int xqd[2] = { SGRPROJ_PRJ_MIN0 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 -
- SGRPROJ_PRJ_MIN0),
- SGRPROJ_PRJ_MIN1 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 -
- SGRPROJ_PRJ_MIN1) };
- int eps = rnd.PseudoUniform(1 << SGRPROJ_PARAMS_BITS);
-
- // Test various tile sizes around 256x256
- int test_w = max_w - (i / 9);
- int test_h = max_h - (i % 9);
-
- for (k = 0; k < test_h; k += pu_height)
- for (j = 0; j < test_w; j += pu_width) {
- int w = AOMMIN(pu_width, test_w - j);
- int h = AOMMIN(pu_height, test_h - k);
- uint8_t *input_p = input + k * stride + j;
- uint8_t *output_p = output + k * out_stride + j;
- uint8_t *output2_p = output2 + k * out_stride + j;
- tst_fun_(input_p, w, h, stride, eps, xqd, output_p, out_stride,
- tmpbuf, 8, 0);
- apply_selfguided_restoration_c(input_p, w, h, stride, eps, xqd,
- output2_p, out_stride, tmpbuf, 8, 0);
- }
-
- for (j = 0; j < test_h; ++j)
- for (k = 0; k < test_w; ++k) {
- ASSERT_EQ(output[j * out_stride + k], output2[j * out_stride + k]);
- }
- }
-
- aom_free(input_);
- aom_free(output_);
- aom_free(output2_);
- aom_free(tmpbuf);
- }
-
- private:
- SgrFunc tst_fun_;
-};
-
-TEST_P(AV1SelfguidedFilterTest, DISABLED_SpeedTest) { RunSpeedTest(); }
-TEST_P(AV1SelfguidedFilterTest, CorrectnessTest) { RunCorrectnessTest(); }
-
-#if HAVE_SSE4_1
-INSTANTIATE_TEST_CASE_P(SSE4_1, AV1SelfguidedFilterTest,
- ::testing::Values(apply_selfguided_restoration_sse4_1));
-#endif
-
-#if HAVE_AVX2
-INSTANTIATE_TEST_CASE_P(AVX2, AV1SelfguidedFilterTest,
- ::testing::Values(apply_selfguided_restoration_avx2));
-#endif
-
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(NEON, AV1SelfguidedFilterTest,
- ::testing::Values(apply_selfguided_restoration_neon));
-#endif
-
-// Test parameter list:
-// <tst_fun_, bit_depth>
-typedef tuple<SgrFunc, int> HighbdFilterTestParam;
-
-class AV1HighbdSelfguidedFilterTest
- : public ::testing::TestWithParam<HighbdFilterTestParam> {
- public:
- virtual ~AV1HighbdSelfguidedFilterTest() {}
- virtual void SetUp() {}
-
- virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
- void RunSpeedTest() {
- tst_fun_ = GET_PARAM(0);
- const int pu_width = RESTORATION_PROC_UNIT_SIZE;
- const int pu_height = RESTORATION_PROC_UNIT_SIZE;
- const int width = 256, height = 256, stride = 288, out_stride = 288;
- const int NUM_ITERS = 2000;
- int i, j, k;
- int bit_depth = GET_PARAM(1);
- int mask = (1 << bit_depth) - 1;
-
- uint16_t *input_ =
- (uint16_t *)aom_memalign(32, stride * (height + 32) * sizeof(uint16_t));
- uint16_t *output_ = (uint16_t *)aom_memalign(
- 32, out_stride * (height + 32) * sizeof(uint16_t));
- int32_t *tmpbuf = (int32_t *)aom_memalign(32, RESTORATION_TMPBUF_SIZE);
- uint16_t *input = input_ + stride * 16 + 16;
- uint16_t *output = output_ + out_stride * 16 + 16;
-
- ACMRandom rnd(ACMRandom::DeterministicSeed());
-
- for (i = -16; i < height + 16; ++i)
- for (j = -16; j < width + 16; ++j)
- input[i * stride + j] = rnd.Rand16() & mask;
-
- int xqd[2] = { SGRPROJ_PRJ_MIN0 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 -
- SGRPROJ_PRJ_MIN0),
- SGRPROJ_PRJ_MIN1 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 -
- SGRPROJ_PRJ_MIN1) };
- // Fix a parameter set, since the speed depends slightly on r.
- // Change this to test different combinations of values of r.
- int eps = 15;
-
- av1_loop_restoration_precal();
-
- aom_usec_timer ref_timer;
- aom_usec_timer_start(&ref_timer);
- for (i = 0; i < NUM_ITERS; ++i) {
- for (k = 0; k < height; k += pu_height)
- for (j = 0; j < width; j += pu_width) {
- int w = AOMMIN(pu_width, width - j);
- int h = AOMMIN(pu_height, height - k);
- uint16_t *input_p = input + k * stride + j;
- uint16_t *output_p = output + k * out_stride + j;
- apply_selfguided_restoration_c(
- CONVERT_TO_BYTEPTR(input_p), w, h, stride, eps, xqd,
- CONVERT_TO_BYTEPTR(output_p), out_stride, tmpbuf, bit_depth, 1);
- }
- }
- aom_usec_timer_mark(&ref_timer);
- const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);
-
- aom_usec_timer tst_timer;
- aom_usec_timer_start(&tst_timer);
- for (i = 0; i < NUM_ITERS; ++i) {
- for (k = 0; k < height; k += pu_height)
- for (j = 0; j < width; j += pu_width) {
- int w = AOMMIN(pu_width, width - j);
- int h = AOMMIN(pu_height, height - k);
- uint16_t *input_p = input + k * stride + j;
- uint16_t *output_p = output + k * out_stride + j;
- tst_fun_(CONVERT_TO_BYTEPTR(input_p), w, h, stride, eps, xqd,
- CONVERT_TO_BYTEPTR(output_p), out_stride, tmpbuf, bit_depth,
- 1);
- }
- }
- aom_usec_timer_mark(&tst_timer);
- const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);
-
- std::cout << "[ ] C time = " << ref_time / 1000
- << " ms, SIMD time = " << tst_time / 1000 << " ms\n";
-
- EXPECT_GT(ref_time, tst_time)
- << "Error: AV1HighbdSelfguidedFilterTest.SpeedTest, SIMD slower than "
- "C.\n"
- << "C time: " << ref_time << " us\n"
- << "SIMD time: " << tst_time << " us\n";
-
- aom_free(input_);
- aom_free(output_);
- aom_free(tmpbuf);
- }
-
- void RunCorrectnessTest() {
- tst_fun_ = GET_PARAM(0);
- const int pu_width = RESTORATION_PROC_UNIT_SIZE;
- const int pu_height = RESTORATION_PROC_UNIT_SIZE;
- // Set the maximum width/height to test here. We actually test a small
- // range of sizes *up to* this size, so that we can check, eg.,
- // the behaviour on tiles which are not a multiple of 4 wide.
- const int max_w = 260, max_h = 260, stride = 672, out_stride = 672;
- const int NUM_ITERS = 81;
- int i, j, k;
- int bit_depth = GET_PARAM(1);
- int mask = (1 << bit_depth) - 1;
-
- uint16_t *input_ =
- (uint16_t *)aom_memalign(32, stride * (max_h + 32) * sizeof(uint16_t));
- uint16_t *output_ = (uint16_t *)aom_memalign(
- 32, out_stride * (max_h + 32) * sizeof(uint16_t));
- uint16_t *output2_ = (uint16_t *)aom_memalign(
- 32, out_stride * (max_h + 32) * sizeof(uint16_t));
- int32_t *tmpbuf = (int32_t *)aom_memalign(32, RESTORATION_TMPBUF_SIZE);
-
- uint16_t *input = input_ + stride * 16 + 16;
- uint16_t *output = output_ + out_stride * 16 + 16;
- uint16_t *output2 = output2_ + out_stride * 16 + 16;
-
- ACMRandom rnd(ACMRandom::DeterministicSeed());
-
- av1_loop_restoration_precal();
-
- for (i = 0; i < NUM_ITERS; ++i) {
- for (j = -16; j < max_h + 16; ++j)
- for (k = -16; k < max_w + 16; ++k)
- input[j * stride + k] = rnd.Rand16() & mask;
-
- int xqd[2] = { SGRPROJ_PRJ_MIN0 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 -
- SGRPROJ_PRJ_MIN0),
- SGRPROJ_PRJ_MIN1 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 -
- SGRPROJ_PRJ_MIN1) };
- int eps = rnd.PseudoUniform(1 << SGRPROJ_PARAMS_BITS);
-
- // Test various tile sizes around 256x256
- int test_w = max_w - (i / 9);
- int test_h = max_h - (i % 9);
-
- for (k = 0; k < test_h; k += pu_height)
- for (j = 0; j < test_w; j += pu_width) {
- int w = AOMMIN(pu_width, test_w - j);
- int h = AOMMIN(pu_height, test_h - k);
- uint16_t *input_p = input + k * stride + j;
- uint16_t *output_p = output + k * out_stride + j;
- uint16_t *output2_p = output2 + k * out_stride + j;
- tst_fun_(CONVERT_TO_BYTEPTR(input_p), w, h, stride, eps, xqd,
- CONVERT_TO_BYTEPTR(output_p), out_stride, tmpbuf, bit_depth,
- 1);
- apply_selfguided_restoration_c(
- CONVERT_TO_BYTEPTR(input_p), w, h, stride, eps, xqd,
- CONVERT_TO_BYTEPTR(output2_p), out_stride, tmpbuf, bit_depth, 1);
- }
-
- for (j = 0; j < test_h; ++j)
- for (k = 0; k < test_w; ++k)
- ASSERT_EQ(output[j * out_stride + k], output2[j * out_stride + k]);
- }
-
- aom_free(input_);
- aom_free(output_);
- aom_free(output2_);
- aom_free(tmpbuf);
- }
-
- private:
- SgrFunc tst_fun_;
-};
-
-TEST_P(AV1HighbdSelfguidedFilterTest, DISABLED_SpeedTest) { RunSpeedTest(); }
-TEST_P(AV1HighbdSelfguidedFilterTest, CorrectnessTest) { RunCorrectnessTest(); }
-
-#if HAVE_SSE4_1
-const int highbd_params_sse4_1[] = { 8, 10, 12 };
-INSTANTIATE_TEST_CASE_P(
- SSE4_1, AV1HighbdSelfguidedFilterTest,
- ::testing::Combine(::testing::Values(apply_selfguided_restoration_sse4_1),
- ::testing::ValuesIn(highbd_params_sse4_1)));
-#endif
-
-#if HAVE_AVX2
-const int highbd_params_avx2[] = { 8, 10, 12 };
-INSTANTIATE_TEST_CASE_P(
- AVX2, AV1HighbdSelfguidedFilterTest,
- ::testing::Combine(::testing::Values(apply_selfguided_restoration_avx2),
- ::testing::ValuesIn(highbd_params_avx2)));
-#endif
-#if HAVE_NEON
-const int highbd_params_neon[] = { 8, 10, 12 };
-INSTANTIATE_TEST_CASE_P(
- NEON, AV1HighbdSelfguidedFilterTest,
- ::testing::Combine(::testing::Values(apply_selfguided_restoration_neon),
- ::testing::ValuesIn(highbd_params_neon)));
-#endif
-} // namespace
diff --git a/third_party/aom/test/set_maps.sh b/third_party/aom/test/set_maps.sh
deleted file mode 100755
index 4f59b06d6..000000000
--- a/third_party/aom/test/set_maps.sh
+++ /dev/null
@@ -1,52 +0,0 @@
-#!/bin/sh
-## Copyright (c) 2016, Alliance for Open Media. All rights reserved
-##
-## This source code is subject to the terms of the BSD 2 Clause License and
-## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-## was not distributed with this source code in the LICENSE file, you can
-## obtain it at www.aomedia.org/license/software. If the Alliance for Open
-## Media Patent License 1.0 was not distributed with this source code in the
-## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-##
-## This file tests the libaom set_maps example. To add new tests to this file,
-## do the following:
-## 1. Write a shell function (this is your test).
-## 2. Add the function to set_maps_tests (on a new line).
-##
-. $(dirname $0)/tools_common.sh
-
-# Environment check: $YUV_RAW_INPUT is required, and set_maps must exist in
-# $LIBAOM_BIN_PATH.
-set_maps_verify_environment() {
- if [ ! -e "${YUV_RAW_INPUT}" ]; then
- echo "Libaom test data must exist in LIBAOM_TEST_DATA_PATH."
- return 1
- fi
- if [ -z "$(aom_tool_path set_maps)" ]; then
- elog "set_maps not found. It must exist in LIBAOM_BIN_PATH or its parent."
- return 1
- fi
-}
-
-# Runs set_maps using the codec specified by $1.
-set_maps() {
- local encoder="$(aom_tool_path set_maps)"
- local codec="$1"
- local output_file="${AOM_TEST_OUTPUT_DIR}/set_maps_${codec}.ivf"
-
- eval "${AOM_TEST_PREFIX}" "${encoder}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" \
- "${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" \
- ${devnull}
-
- [ -e "${output_file}" ] || return 1
-}
-
-set_maps_av1() {
- if [ "$(av1_encode_available)" = "yes" ]; then
- set_maps av1 || return 1
- fi
-}
-
-set_maps_tests="set_maps_av1"
-
-run_tests set_maps_verify_environment "${set_maps_tests}"
diff --git a/third_party/aom/test/simd_avx2_test.cc b/third_party/aom/test/simd_avx2_test.cc
deleted file mode 100644
index 8a012bff8..000000000
--- a/third_party/aom/test/simd_avx2_test.cc
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#define ARCH AVX2
-#define ARCH_POSTFIX(name) name##_avx2
-#define SIMD_NAMESPACE simd_test_avx2
-#include "test/simd_impl.h"
diff --git a/third_party/aom/test/simd_cmp_avx2.cc b/third_party/aom/test/simd_cmp_avx2.cc
deleted file mode 100644
index cda632bcd..000000000
--- a/third_party/aom/test/simd_cmp_avx2.cc
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#define ARCH AVX2
-#define ARCH_POSTFIX(name) name##_avx2
-#define SIMD_NAMESPACE simd_test_avx2
-#include "test/simd_cmp_impl.h"
diff --git a/third_party/aom/test/simd_cmp_impl.h b/third_party/aom/test/simd_cmp_impl.h
deleted file mode 100644
index b98af9aad..000000000
--- a/third_party/aom/test/simd_cmp_impl.h
+++ /dev/null
@@ -1,2171 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <string>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "test/acm_random.h"
-#include "aom_dsp/aom_simd.h"
-#undef SIMD_INLINE
-#define SIMD_INLINE static // Don't enforce inlining
-#include "aom_dsp/simd/v256_intrinsics_c.h"
-
-// Machine tuned code goes into this file. This file is included from
-// simd_cmp_sse2.cc, simd_cmp_ssse3.cc etc which define the macros
-// ARCH (=neon, sse2, ssse3, etc), SIMD_NAMESPACE and ARCH_POSTFIX().
-
-#ifdef _MSC_VER
-// Disable "value of intrinsic immediate argument 'value' is out of range
-// 'lowerbound - upperbound'" warning. Visual Studio emits this warning though
-// the parameters are conditionally checked in e.g., v256_shr_n_byte. Adding a
-// mask doesn't always appear to be sufficient.
-#pragma warning(disable : 4556)
-#endif
-
-using libaom_test::ACMRandom;
-
-namespace SIMD_NAMESPACE {
-
-// Wrap templates around intrinsics using immediate values
-template <int shift>
-v64 imm_v64_shl_n_byte(v64 a) {
- return v64_shl_n_byte(a, shift);
-}
-template <int shift>
-v64 imm_v64_shr_n_byte(v64 a) {
- return v64_shr_n_byte(a, shift);
-}
-template <int shift>
-v64 imm_v64_shl_n_8(v64 a) {
- return v64_shl_n_8(a, shift);
-}
-template <int shift>
-v64 imm_v64_shr_n_u8(v64 a) {
- return v64_shr_n_u8(a, shift);
-}
-template <int shift>
-v64 imm_v64_shr_n_s8(v64 a) {
- return v64_shr_n_s8(a, shift);
-}
-template <int shift>
-v64 imm_v64_shl_n_16(v64 a) {
- return v64_shl_n_16(a, shift);
-}
-template <int shift>
-v64 imm_v64_shr_n_u16(v64 a) {
- return v64_shr_n_u16(a, shift);
-}
-template <int shift>
-v64 imm_v64_shr_n_s16(v64 a) {
- return v64_shr_n_s16(a, shift);
-}
-template <int shift>
-v64 imm_v64_shl_n_32(v64 a) {
- return v64_shl_n_32(a, shift);
-}
-template <int shift>
-v64 imm_v64_shr_n_u32(v64 a) {
- return v64_shr_n_u32(a, shift);
-}
-template <int shift>
-v64 imm_v64_shr_n_s32(v64 a) {
- return v64_shr_n_s32(a, shift);
-}
-template <int shift>
-v64 imm_v64_align(v64 a, v64 b) {
- return v64_align(a, b, shift);
-}
-
-// Wrap templates around corresponding C implementations of the above
-template <int shift>
-c_v64 c_imm_v64_shl_n_byte(c_v64 a) {
- return c_v64_shl_n_byte(a, shift);
-}
-template <int shift>
-c_v64 c_imm_v64_shr_n_byte(c_v64 a) {
- return c_v64_shr_n_byte(a, shift);
-}
-template <int shift>
-c_v64 c_imm_v64_shl_n_8(c_v64 a) {
- return c_v64_shl_n_8(a, shift);
-}
-template <int shift>
-c_v64 c_imm_v64_shr_n_u8(c_v64 a) {
- return c_v64_shr_n_u8(a, shift);
-}
-template <int shift>
-c_v64 c_imm_v64_shr_n_s8(c_v64 a) {
- return c_v64_shr_n_s8(a, shift);
-}
-template <int shift>
-c_v64 c_imm_v64_shl_n_16(c_v64 a) {
- return c_v64_shl_n_16(a, shift);
-}
-template <int shift>
-c_v64 c_imm_v64_shr_n_u16(c_v64 a) {
- return c_v64_shr_n_u16(a, shift);
-}
-template <int shift>
-c_v64 c_imm_v64_shr_n_s16(c_v64 a) {
- return c_v64_shr_n_s16(a, shift);
-}
-template <int shift>
-c_v64 c_imm_v64_shl_n_32(c_v64 a) {
- return c_v64_shl_n_32(a, shift);
-}
-template <int shift>
-c_v64 c_imm_v64_shr_n_u32(c_v64 a) {
- return c_v64_shr_n_u32(a, shift);
-}
-template <int shift>
-c_v64 c_imm_v64_shr_n_s32(c_v64 a) {
- return c_v64_shr_n_s32(a, shift);
-}
-template <int shift>
-c_v64 c_imm_v64_align(c_v64 a, c_v64 b) {
- return c_v64_align(a, b, shift);
-}
-
-template <int shift>
-v128 imm_v128_shl_n_byte(v128 a) {
- return v128_shl_n_byte(a, shift);
-}
-template <int shift>
-v128 imm_v128_shr_n_byte(v128 a) {
- return v128_shr_n_byte(a, shift);
-}
-template <int shift>
-v128 imm_v128_shl_n_8(v128 a) {
- return v128_shl_n_8(a, shift);
-}
-template <int shift>
-v128 imm_v128_shr_n_u8(v128 a) {
- return v128_shr_n_u8(a, shift);
-}
-template <int shift>
-v128 imm_v128_shr_n_s8(v128 a) {
- return v128_shr_n_s8(a, shift);
-}
-template <int shift>
-v128 imm_v128_shl_n_16(v128 a) {
- return v128_shl_n_16(a, shift);
-}
-template <int shift>
-v128 imm_v128_shr_n_u16(v128 a) {
- return v128_shr_n_u16(a, shift);
-}
-template <int shift>
-v128 imm_v128_shr_n_s16(v128 a) {
- return v128_shr_n_s16(a, shift);
-}
-template <int shift>
-v128 imm_v128_shl_n_32(v128 a) {
- return v128_shl_n_32(a, shift);
-}
-template <int shift>
-v128 imm_v128_shr_n_u32(v128 a) {
- return v128_shr_n_u32(a, shift);
-}
-template <int shift>
-v128 imm_v128_shr_n_s32(v128 a) {
- return v128_shr_n_s32(a, shift);
-}
-template <int shift>
-v128 imm_v128_shl_n_64(v128 a) {
- return v128_shl_n_64(a, shift);
-}
-template <int shift>
-v128 imm_v128_shr_n_u64(v128 a) {
- return v128_shr_n_u64(a, shift);
-}
-template <int shift>
-v128 imm_v128_shr_n_s64(v128 a) {
- return v128_shr_n_s64(a, shift);
-}
-template <int shift>
-v128 imm_v128_align(v128 a, v128 b) {
- return v128_align(a, b, shift);
-}
-
-template <int shift>
-c_v128 c_imm_v128_shl_n_byte(c_v128 a) {
- return c_v128_shl_n_byte(a, shift);
-}
-template <int shift>
-c_v128 c_imm_v128_shr_n_byte(c_v128 a) {
- return c_v128_shr_n_byte(a, shift);
-}
-template <int shift>
-c_v128 c_imm_v128_shl_n_8(c_v128 a) {
- return c_v128_shl_n_8(a, shift);
-}
-template <int shift>
-c_v128 c_imm_v128_shr_n_u8(c_v128 a) {
- return c_v128_shr_n_u8(a, shift);
-}
-template <int shift>
-c_v128 c_imm_v128_shr_n_s8(c_v128 a) {
- return c_v128_shr_n_s8(a, shift);
-}
-template <int shift>
-c_v128 c_imm_v128_shl_n_16(c_v128 a) {
- return c_v128_shl_n_16(a, shift);
-}
-template <int shift>
-c_v128 c_imm_v128_shr_n_u16(c_v128 a) {
- return c_v128_shr_n_u16(a, shift);
-}
-template <int shift>
-c_v128 c_imm_v128_shr_n_s16(c_v128 a) {
- return c_v128_shr_n_s16(a, shift);
-}
-template <int shift>
-c_v128 c_imm_v128_shl_n_32(c_v128 a) {
- return c_v128_shl_n_32(a, shift);
-}
-template <int shift>
-c_v128 c_imm_v128_shr_n_u32(c_v128 a) {
- return c_v128_shr_n_u32(a, shift);
-}
-template <int shift>
-c_v128 c_imm_v128_shr_n_s32(c_v128 a) {
- return c_v128_shr_n_s32(a, shift);
-}
-template <int shift>
-c_v128 c_imm_v128_shl_n_64(c_v128 a) {
- return c_v128_shl_n_64(a, shift);
-}
-template <int shift>
-c_v128 c_imm_v128_shr_n_u64(c_v128 a) {
- return c_v128_shr_n_u64(a, shift);
-}
-template <int shift>
-c_v128 c_imm_v128_shr_n_s64(c_v128 a) {
- return c_v128_shr_n_s64(a, shift);
-}
-template <int shift>
-c_v128 c_imm_v128_align(c_v128 a, c_v128 b) {
- return c_v128_align(a, b, shift);
-}
-
-template <int shift>
-v256 imm_v256_shl_n_word(v256 a) {
- return v256_shl_n_word(a, shift);
-}
-template <int shift>
-v256 imm_v256_shr_n_word(v256 a) {
- return v256_shr_n_word(a, shift);
-}
-template <int shift>
-v256 imm_v256_shl_n_byte(v256 a) {
- return v256_shl_n_byte(a, shift);
-}
-template <int shift>
-v256 imm_v256_shr_n_byte(v256 a) {
- return v256_shr_n_byte(a, shift);
-}
-template <int shift>
-v256 imm_v256_shl_n_8(v256 a) {
- return v256_shl_n_8(a, shift);
-}
-template <int shift>
-v256 imm_v256_shr_n_u8(v256 a) {
- return v256_shr_n_u8(a, shift);
-}
-template <int shift>
-v256 imm_v256_shr_n_s8(v256 a) {
- return v256_shr_n_s8(a, shift);
-}
-template <int shift>
-v256 imm_v256_shl_n_16(v256 a) {
- return v256_shl_n_16(a, shift);
-}
-template <int shift>
-v256 imm_v256_shr_n_u16(v256 a) {
- return v256_shr_n_u16(a, shift);
-}
-template <int shift>
-v256 imm_v256_shr_n_s16(v256 a) {
- return v256_shr_n_s16(a, shift);
-}
-template <int shift>
-v256 imm_v256_shl_n_32(v256 a) {
- return v256_shl_n_32(a, shift);
-}
-template <int shift>
-v256 imm_v256_shr_n_u32(v256 a) {
- return v256_shr_n_u32(a, shift);
-}
-template <int shift>
-v256 imm_v256_shr_n_s32(v256 a) {
- return v256_shr_n_s32(a, shift);
-}
-template <int shift>
-v256 imm_v256_shl_n_64(v256 a) {
- return v256_shl_n_64(a, shift);
-}
-template <int shift>
-v256 imm_v256_shr_n_u64(v256 a) {
- return v256_shr_n_u64(a, shift);
-}
-template <int shift>
-v256 imm_v256_shr_n_s64(v256 a) {
- return v256_shr_n_s64(a, shift);
-}
-template <int shift>
-v256 imm_v256_align(v256 a, v256 b) {
- return v256_align(a, b, shift);
-}
-
-template <int shift>
-c_v256 c_imm_v256_shl_n_word(c_v256 a) {
- return c_v256_shl_n_word(a, shift);
-}
-template <int shift>
-c_v256 c_imm_v256_shr_n_word(c_v256 a) {
- return c_v256_shr_n_word(a, shift);
-}
-template <int shift>
-c_v256 c_imm_v256_shl_n_byte(c_v256 a) {
- return c_v256_shl_n_byte(a, shift);
-}
-template <int shift>
-c_v256 c_imm_v256_shr_n_byte(c_v256 a) {
- return c_v256_shr_n_byte(a, shift);
-}
-template <int shift>
-c_v256 c_imm_v256_shl_n_8(c_v256 a) {
- return c_v256_shl_n_8(a, shift);
-}
-template <int shift>
-c_v256 c_imm_v256_shr_n_u8(c_v256 a) {
- return c_v256_shr_n_u8(a, shift);
-}
-template <int shift>
-c_v256 c_imm_v256_shr_n_s8(c_v256 a) {
- return c_v256_shr_n_s8(a, shift);
-}
-template <int shift>
-c_v256 c_imm_v256_shl_n_16(c_v256 a) {
- return c_v256_shl_n_16(a, shift);
-}
-template <int shift>
-c_v256 c_imm_v256_shr_n_u16(c_v256 a) {
- return c_v256_shr_n_u16(a, shift);
-}
-template <int shift>
-c_v256 c_imm_v256_shr_n_s16(c_v256 a) {
- return c_v256_shr_n_s16(a, shift);
-}
-template <int shift>
-c_v256 c_imm_v256_shl_n_32(c_v256 a) {
- return c_v256_shl_n_32(a, shift);
-}
-template <int shift>
-c_v256 c_imm_v256_shr_n_u32(c_v256 a) {
- return c_v256_shr_n_u32(a, shift);
-}
-template <int shift>
-c_v256 c_imm_v256_shr_n_s32(c_v256 a) {
- return c_v256_shr_n_s32(a, shift);
-}
-template <int shift>
-c_v256 c_imm_v256_shl_n_64(c_v256 a) {
- return c_v256_shl_n_64(a, shift);
-}
-template <int shift>
-c_v256 c_imm_v256_shr_n_u64(c_v256 a) {
- return c_v256_shr_n_u64(a, shift);
-}
-template <int shift>
-c_v256 c_imm_v256_shr_n_s64(c_v256 a) {
- return c_v256_shr_n_s64(a, shift);
-}
-template <int shift>
-c_v256 c_imm_v256_align(c_v256 a, c_v256 b) {
- return c_v256_align(a, b, shift);
-}
-
-// Wrappers around the the SAD and SSD functions
-uint32_t v64_sad_u8(v64 a, v64 b) {
- return v64_sad_u8_sum(::v64_sad_u8(v64_sad_u8_init(), a, b));
-}
-uint32_t v64_ssd_u8(v64 a, v64 b) {
- return v64_ssd_u8_sum(::v64_ssd_u8(v64_ssd_u8_init(), a, b));
-}
-
-uint32_t c_v64_sad_u8(c_v64 a, c_v64 b) {
- return c_v64_sad_u8_sum(::c_v64_sad_u8(c_v64_sad_u8_init(), a, b));
-}
-uint32_t c_v64_ssd_u8(c_v64 a, c_v64 b) {
- return c_v64_ssd_u8_sum(::c_v64_ssd_u8(c_v64_ssd_u8_init(), a, b));
-}
-uint32_t v128_sad_u8(v128 a, v128 b) {
- return v128_sad_u8_sum(::v128_sad_u8(v128_sad_u8_init(), a, b));
-}
-uint32_t v128_ssd_u8(v128 a, v128 b) {
- return v128_ssd_u8_sum(::v128_ssd_u8(v128_ssd_u8_init(), a, b));
-}
-uint32_t c_v128_sad_u8(c_v128 a, c_v128 b) {
- return c_v128_sad_u8_sum(::c_v128_sad_u8(c_v128_sad_u8_init(), a, b));
-}
-uint32_t c_v128_ssd_u8(c_v128 a, c_v128 b) {
- return c_v128_ssd_u8_sum(::c_v128_ssd_u8(c_v128_ssd_u8_init(), a, b));
-}
-uint32_t v128_sad_u16(v128 a, v128 b) {
- return v128_sad_u16_sum(::v128_sad_u16(v128_sad_u16_init(), a, b));
-}
-uint64_t v128_ssd_s16(v128 a, v128 b) {
- return v128_ssd_s16_sum(::v128_ssd_s16(v128_ssd_s16_init(), a, b));
-}
-uint32_t c_v128_sad_u16(c_v128 a, c_v128 b) {
- return c_v128_sad_u16_sum(::c_v128_sad_u16(c_v128_sad_u16_init(), a, b));
-}
-uint64_t c_v128_ssd_s16(c_v128 a, c_v128 b) {
- return c_v128_ssd_s16_sum(::c_v128_ssd_s16(c_v128_ssd_s16_init(), a, b));
-}
-uint32_t v256_sad_u8(v256 a, v256 b) {
- return v256_sad_u8_sum(::v256_sad_u8(v256_sad_u8_init(), a, b));
-}
-uint32_t v256_ssd_u8(v256 a, v256 b) {
- return v256_ssd_u8_sum(::v256_ssd_u8(v256_ssd_u8_init(), a, b));
-}
-uint32_t c_v256_sad_u8(c_v256 a, c_v256 b) {
- return c_v256_sad_u8_sum(::c_v256_sad_u8(c_v256_sad_u8_init(), a, b));
-}
-uint32_t c_v256_ssd_u8(c_v256 a, c_v256 b) {
- return c_v256_ssd_u8_sum(::c_v256_ssd_u8(c_v256_ssd_u8_init(), a, b));
-}
-uint32_t v256_sad_u16(v256 a, v256 b) {
- return v256_sad_u16_sum(::v256_sad_u16(v256_sad_u16_init(), a, b));
-}
-uint64_t v256_ssd_s16(v256 a, v256 b) {
- return v256_ssd_s16_sum(::v256_ssd_s16(v256_ssd_s16_init(), a, b));
-}
-uint32_t c_v256_sad_u16(c_v256 a, c_v256 b) {
- return c_v256_sad_u16_sum(::c_v256_sad_u16(c_v256_sad_u16_init(), a, b));
-}
-uint64_t c_v256_ssd_s16(c_v256 a, c_v256 b) {
- return c_v256_ssd_s16_sum(::c_v256_ssd_s16(c_v256_ssd_s16_init(), a, b));
-}
-
-namespace {
-
-typedef void (*fptr)();
-
-typedef struct {
- const char *name;
- fptr ref;
- fptr simd;
-} mapping;
-
-#define MAP(name) \
- { \
- #name, reinterpret_cast < fptr > (c_##name), \
- reinterpret_cast < fptr > (name) \
- }
-
-const mapping m[] = { MAP(v64_sad_u8),
- MAP(v64_ssd_u8),
- MAP(v64_add_8),
- MAP(v64_add_16),
- MAP(v64_sadd_s8),
- MAP(v64_sadd_u8),
- MAP(v64_sadd_s16),
- MAP(v64_add_32),
- MAP(v64_sub_8),
- MAP(v64_ssub_u8),
- MAP(v64_ssub_s8),
- MAP(v64_sub_16),
- MAP(v64_ssub_s16),
- MAP(v64_ssub_u16),
- MAP(v64_sub_32),
- MAP(v64_ziplo_8),
- MAP(v64_ziphi_8),
- MAP(v64_ziplo_16),
- MAP(v64_ziphi_16),
- MAP(v64_ziplo_32),
- MAP(v64_ziphi_32),
- MAP(v64_pack_s32_u16),
- MAP(v64_pack_s32_s16),
- MAP(v64_pack_s16_u8),
- MAP(v64_pack_s16_s8),
- MAP(v64_unziphi_8),
- MAP(v64_unziplo_8),
- MAP(v64_unziphi_16),
- MAP(v64_unziplo_16),
- MAP(v64_or),
- MAP(v64_xor),
- MAP(v64_and),
- MAP(v64_andn),
- MAP(v64_mullo_s16),
- MAP(v64_mulhi_s16),
- MAP(v64_mullo_s32),
- MAP(v64_madd_s16),
- MAP(v64_madd_us8),
- MAP(v64_avg_u8),
- MAP(v64_rdavg_u8),
- MAP(v64_rdavg_u16),
- MAP(v64_avg_u16),
- MAP(v64_min_u8),
- MAP(v64_max_u8),
- MAP(v64_min_s8),
- MAP(v64_max_s8),
- MAP(v64_min_s16),
- MAP(v64_max_s16),
- MAP(v64_cmpgt_s8),
- MAP(v64_cmplt_s8),
- MAP(v64_cmpeq_8),
- MAP(v64_cmpgt_s16),
- MAP(v64_cmplt_s16),
- MAP(v64_cmpeq_16),
- MAP(v64_shuffle_8),
- MAP(imm_v64_align<1>),
- MAP(imm_v64_align<2>),
- MAP(imm_v64_align<3>),
- MAP(imm_v64_align<4>),
- MAP(imm_v64_align<5>),
- MAP(imm_v64_align<6>),
- MAP(imm_v64_align<7>),
- MAP(v64_abs_s8),
- MAP(v64_abs_s16),
- MAP(v64_unpacklo_u8_s16),
- MAP(v64_unpackhi_u8_s16),
- MAP(v64_unpacklo_s8_s16),
- MAP(v64_unpackhi_s8_s16),
- MAP(v64_unpacklo_u16_s32),
- MAP(v64_unpacklo_s16_s32),
- MAP(v64_unpackhi_u16_s32),
- MAP(v64_unpackhi_s16_s32),
- MAP(imm_v64_shr_n_byte<1>),
- MAP(imm_v64_shr_n_byte<2>),
- MAP(imm_v64_shr_n_byte<3>),
- MAP(imm_v64_shr_n_byte<4>),
- MAP(imm_v64_shr_n_byte<5>),
- MAP(imm_v64_shr_n_byte<6>),
- MAP(imm_v64_shr_n_byte<7>),
- MAP(imm_v64_shl_n_byte<1>),
- MAP(imm_v64_shl_n_byte<2>),
- MAP(imm_v64_shl_n_byte<3>),
- MAP(imm_v64_shl_n_byte<4>),
- MAP(imm_v64_shl_n_byte<5>),
- MAP(imm_v64_shl_n_byte<6>),
- MAP(imm_v64_shl_n_byte<7>),
- MAP(imm_v64_shl_n_8<1>),
- MAP(imm_v64_shl_n_8<2>),
- MAP(imm_v64_shl_n_8<3>),
- MAP(imm_v64_shl_n_8<4>),
- MAP(imm_v64_shl_n_8<5>),
- MAP(imm_v64_shl_n_8<6>),
- MAP(imm_v64_shl_n_8<7>),
- MAP(imm_v64_shr_n_u8<1>),
- MAP(imm_v64_shr_n_u8<2>),
- MAP(imm_v64_shr_n_u8<3>),
- MAP(imm_v64_shr_n_u8<4>),
- MAP(imm_v64_shr_n_u8<5>),
- MAP(imm_v64_shr_n_u8<6>),
- MAP(imm_v64_shr_n_u8<7>),
- MAP(imm_v64_shr_n_s8<1>),
- MAP(imm_v64_shr_n_s8<2>),
- MAP(imm_v64_shr_n_s8<3>),
- MAP(imm_v64_shr_n_s8<4>),
- MAP(imm_v64_shr_n_s8<5>),
- MAP(imm_v64_shr_n_s8<6>),
- MAP(imm_v64_shr_n_s8<7>),
- MAP(imm_v64_shl_n_16<1>),
- MAP(imm_v64_shl_n_16<2>),
- MAP(imm_v64_shl_n_16<4>),
- MAP(imm_v64_shl_n_16<6>),
- MAP(imm_v64_shl_n_16<8>),
- MAP(imm_v64_shl_n_16<10>),
- MAP(imm_v64_shl_n_16<12>),
- MAP(imm_v64_shl_n_16<14>),
- MAP(imm_v64_shr_n_u16<1>),
- MAP(imm_v64_shr_n_u16<2>),
- MAP(imm_v64_shr_n_u16<4>),
- MAP(imm_v64_shr_n_u16<6>),
- MAP(imm_v64_shr_n_u16<8>),
- MAP(imm_v64_shr_n_u16<10>),
- MAP(imm_v64_shr_n_u16<12>),
- MAP(imm_v64_shr_n_u16<14>),
- MAP(imm_v64_shr_n_s16<1>),
- MAP(imm_v64_shr_n_s16<2>),
- MAP(imm_v64_shr_n_s16<4>),
- MAP(imm_v64_shr_n_s16<6>),
- MAP(imm_v64_shr_n_s16<8>),
- MAP(imm_v64_shr_n_s16<10>),
- MAP(imm_v64_shr_n_s16<12>),
- MAP(imm_v64_shr_n_s16<14>),
- MAP(imm_v64_shl_n_32<1>),
- MAP(imm_v64_shl_n_32<4>),
- MAP(imm_v64_shl_n_32<8>),
- MAP(imm_v64_shl_n_32<12>),
- MAP(imm_v64_shl_n_32<16>),
- MAP(imm_v64_shl_n_32<20>),
- MAP(imm_v64_shl_n_32<24>),
- MAP(imm_v64_shl_n_32<28>),
- MAP(imm_v64_shr_n_u32<1>),
- MAP(imm_v64_shr_n_u32<4>),
- MAP(imm_v64_shr_n_u32<8>),
- MAP(imm_v64_shr_n_u32<12>),
- MAP(imm_v64_shr_n_u32<16>),
- MAP(imm_v64_shr_n_u32<20>),
- MAP(imm_v64_shr_n_u32<24>),
- MAP(imm_v64_shr_n_u32<28>),
- MAP(imm_v64_shr_n_s32<1>),
- MAP(imm_v64_shr_n_s32<4>),
- MAP(imm_v64_shr_n_s32<8>),
- MAP(imm_v64_shr_n_s32<12>),
- MAP(imm_v64_shr_n_s32<16>),
- MAP(imm_v64_shr_n_s32<20>),
- MAP(imm_v64_shr_n_s32<24>),
- MAP(imm_v64_shr_n_s32<28>),
- MAP(v64_shl_8),
- MAP(v64_shr_u8),
- MAP(v64_shr_s8),
- MAP(v64_shl_16),
- MAP(v64_shr_u16),
- MAP(v64_shr_s16),
- MAP(v64_shl_32),
- MAP(v64_shr_u32),
- MAP(v64_shr_s32),
- MAP(v64_hadd_u8),
- MAP(v64_hadd_s16),
- MAP(v64_dotp_s16),
- MAP(v64_dotp_su8),
- MAP(v64_u64),
- MAP(v64_low_u32),
- MAP(v64_high_u32),
- MAP(v64_low_s32),
- MAP(v64_high_s32),
- MAP(v64_dup_8),
- MAP(v64_dup_16),
- MAP(v64_dup_32),
- MAP(v64_from_32),
- MAP(v64_zero),
- MAP(v64_from_16),
- MAP(v128_sad_u8),
- MAP(v128_ssd_u8),
- MAP(v128_sad_u16),
- MAP(v128_ssd_s16),
- MAP(v128_add_8),
- MAP(v128_add_16),
- MAP(v128_sadd_s8),
- MAP(v128_sadd_u8),
- MAP(v128_sadd_s16),
- MAP(v128_add_32),
- MAP(v128_add_64),
- MAP(v128_sub_8),
- MAP(v128_ssub_u8),
- MAP(v128_ssub_s8),
- MAP(v128_sub_16),
- MAP(v128_ssub_s16),
- MAP(v128_ssub_u16),
- MAP(v128_sub_32),
- MAP(v128_sub_64),
- MAP(v128_ziplo_8),
- MAP(v128_ziphi_8),
- MAP(v128_ziplo_16),
- MAP(v128_ziphi_16),
- MAP(v128_ziplo_32),
- MAP(v128_ziphi_32),
- MAP(v128_ziplo_64),
- MAP(v128_ziphi_64),
- MAP(v128_unziphi_8),
- MAP(v128_unziplo_8),
- MAP(v128_unziphi_16),
- MAP(v128_unziplo_16),
- MAP(v128_unziphi_32),
- MAP(v128_unziplo_32),
- MAP(v128_pack_s32_u16),
- MAP(v128_pack_s32_s16),
- MAP(v128_pack_s16_u8),
- MAP(v128_pack_s16_s8),
- MAP(v128_or),
- MAP(v128_xor),
- MAP(v128_and),
- MAP(v128_andn),
- MAP(v128_mullo_s16),
- MAP(v128_mulhi_s16),
- MAP(v128_mullo_s32),
- MAP(v128_madd_s16),
- MAP(v128_madd_us8),
- MAP(v128_avg_u8),
- MAP(v128_rdavg_u8),
- MAP(v128_rdavg_u16),
- MAP(v128_avg_u16),
- MAP(v128_min_u8),
- MAP(v128_max_u8),
- MAP(v128_min_s8),
- MAP(v128_max_s8),
- MAP(v128_min_s16),
- MAP(v128_max_s16),
- MAP(v128_min_s32),
- MAP(v128_max_s32),
- MAP(v128_cmpgt_s8),
- MAP(v128_cmplt_s8),
- MAP(v128_cmpeq_8),
- MAP(v128_cmpgt_s16),
- MAP(v128_cmpeq_16),
- MAP(v128_cmplt_s16),
- MAP(v128_cmpgt_s32),
- MAP(v128_cmpeq_32),
- MAP(v128_cmplt_s32),
- MAP(v128_shuffle_8),
- MAP(imm_v128_align<1>),
- MAP(imm_v128_align<2>),
- MAP(imm_v128_align<3>),
- MAP(imm_v128_align<4>),
- MAP(imm_v128_align<5>),
- MAP(imm_v128_align<6>),
- MAP(imm_v128_align<7>),
- MAP(imm_v128_align<8>),
- MAP(imm_v128_align<9>),
- MAP(imm_v128_align<10>),
- MAP(imm_v128_align<11>),
- MAP(imm_v128_align<12>),
- MAP(imm_v128_align<13>),
- MAP(imm_v128_align<14>),
- MAP(imm_v128_align<15>),
- MAP(v128_abs_s8),
- MAP(v128_abs_s16),
- MAP(v128_padd_u8),
- MAP(v128_padd_s16),
- MAP(v128_unpacklo_u16_s32),
- MAP(v128_unpacklo_s16_s32),
- MAP(v128_unpackhi_u16_s32),
- MAP(v128_unpackhi_s16_s32),
- MAP(imm_v128_shr_n_byte<1>),
- MAP(imm_v128_shr_n_byte<2>),
- MAP(imm_v128_shr_n_byte<3>),
- MAP(imm_v128_shr_n_byte<4>),
- MAP(imm_v128_shr_n_byte<5>),
- MAP(imm_v128_shr_n_byte<6>),
- MAP(imm_v128_shr_n_byte<7>),
- MAP(imm_v128_shr_n_byte<8>),
- MAP(imm_v128_shr_n_byte<9>),
- MAP(imm_v128_shr_n_byte<10>),
- MAP(imm_v128_shr_n_byte<11>),
- MAP(imm_v128_shr_n_byte<12>),
- MAP(imm_v128_shr_n_byte<13>),
- MAP(imm_v128_shr_n_byte<14>),
- MAP(imm_v128_shr_n_byte<15>),
- MAP(imm_v128_shl_n_byte<1>),
- MAP(imm_v128_shl_n_byte<2>),
- MAP(imm_v128_shl_n_byte<3>),
- MAP(imm_v128_shl_n_byte<4>),
- MAP(imm_v128_shl_n_byte<5>),
- MAP(imm_v128_shl_n_byte<6>),
- MAP(imm_v128_shl_n_byte<7>),
- MAP(imm_v128_shl_n_byte<8>),
- MAP(imm_v128_shl_n_byte<9>),
- MAP(imm_v128_shl_n_byte<10>),
- MAP(imm_v128_shl_n_byte<11>),
- MAP(imm_v128_shl_n_byte<12>),
- MAP(imm_v128_shl_n_byte<13>),
- MAP(imm_v128_shl_n_byte<14>),
- MAP(imm_v128_shl_n_byte<15>),
- MAP(imm_v128_shl_n_8<1>),
- MAP(imm_v128_shl_n_8<2>),
- MAP(imm_v128_shl_n_8<3>),
- MAP(imm_v128_shl_n_8<4>),
- MAP(imm_v128_shl_n_8<5>),
- MAP(imm_v128_shl_n_8<6>),
- MAP(imm_v128_shl_n_8<7>),
- MAP(imm_v128_shr_n_u8<1>),
- MAP(imm_v128_shr_n_u8<2>),
- MAP(imm_v128_shr_n_u8<3>),
- MAP(imm_v128_shr_n_u8<4>),
- MAP(imm_v128_shr_n_u8<5>),
- MAP(imm_v128_shr_n_u8<6>),
- MAP(imm_v128_shr_n_u8<7>),
- MAP(imm_v128_shr_n_s8<1>),
- MAP(imm_v128_shr_n_s8<2>),
- MAP(imm_v128_shr_n_s8<3>),
- MAP(imm_v128_shr_n_s8<4>),
- MAP(imm_v128_shr_n_s8<5>),
- MAP(imm_v128_shr_n_s8<6>),
- MAP(imm_v128_shr_n_s8<7>),
- MAP(imm_v128_shl_n_16<1>),
- MAP(imm_v128_shl_n_16<2>),
- MAP(imm_v128_shl_n_16<4>),
- MAP(imm_v128_shl_n_16<6>),
- MAP(imm_v128_shl_n_16<8>),
- MAP(imm_v128_shl_n_16<10>),
- MAP(imm_v128_shl_n_16<12>),
- MAP(imm_v128_shl_n_16<14>),
- MAP(imm_v128_shr_n_u16<1>),
- MAP(imm_v128_shr_n_u16<2>),
- MAP(imm_v128_shr_n_u16<4>),
- MAP(imm_v128_shr_n_u16<6>),
- MAP(imm_v128_shr_n_u16<8>),
- MAP(imm_v128_shr_n_u16<10>),
- MAP(imm_v128_shr_n_u16<12>),
- MAP(imm_v128_shr_n_u16<14>),
- MAP(imm_v128_shr_n_s16<1>),
- MAP(imm_v128_shr_n_s16<2>),
- MAP(imm_v128_shr_n_s16<4>),
- MAP(imm_v128_shr_n_s16<6>),
- MAP(imm_v128_shr_n_s16<8>),
- MAP(imm_v128_shr_n_s16<10>),
- MAP(imm_v128_shr_n_s16<12>),
- MAP(imm_v128_shr_n_s16<14>),
- MAP(imm_v128_shl_n_32<1>),
- MAP(imm_v128_shl_n_32<4>),
- MAP(imm_v128_shl_n_32<8>),
- MAP(imm_v128_shl_n_32<12>),
- MAP(imm_v128_shl_n_32<16>),
- MAP(imm_v128_shl_n_32<20>),
- MAP(imm_v128_shl_n_32<24>),
- MAP(imm_v128_shl_n_32<28>),
- MAP(imm_v128_shr_n_u32<1>),
- MAP(imm_v128_shr_n_u32<4>),
- MAP(imm_v128_shr_n_u32<8>),
- MAP(imm_v128_shr_n_u32<12>),
- MAP(imm_v128_shr_n_u32<16>),
- MAP(imm_v128_shr_n_u32<20>),
- MAP(imm_v128_shr_n_u32<24>),
- MAP(imm_v128_shr_n_u32<28>),
- MAP(imm_v128_shr_n_s32<1>),
- MAP(imm_v128_shr_n_s32<4>),
- MAP(imm_v128_shr_n_s32<8>),
- MAP(imm_v128_shr_n_s32<12>),
- MAP(imm_v128_shr_n_s32<16>),
- MAP(imm_v128_shr_n_s32<20>),
- MAP(imm_v128_shr_n_s32<24>),
- MAP(imm_v128_shr_n_s32<28>),
- MAP(imm_v128_shl_n_64<1>),
- MAP(imm_v128_shl_n_64<4>),
- MAP(imm_v128_shl_n_64<8>),
- MAP(imm_v128_shl_n_64<12>),
- MAP(imm_v128_shl_n_64<16>),
- MAP(imm_v128_shl_n_64<20>),
- MAP(imm_v128_shl_n_64<24>),
- MAP(imm_v128_shl_n_64<28>),
- MAP(imm_v128_shl_n_64<32>),
- MAP(imm_v128_shl_n_64<36>),
- MAP(imm_v128_shl_n_64<40>),
- MAP(imm_v128_shl_n_64<44>),
- MAP(imm_v128_shl_n_64<48>),
- MAP(imm_v128_shl_n_64<52>),
- MAP(imm_v128_shl_n_64<56>),
- MAP(imm_v128_shl_n_64<60>),
- MAP(imm_v128_shr_n_u64<1>),
- MAP(imm_v128_shr_n_u64<4>),
- MAP(imm_v128_shr_n_u64<8>),
- MAP(imm_v128_shr_n_u64<12>),
- MAP(imm_v128_shr_n_u64<16>),
- MAP(imm_v128_shr_n_u64<20>),
- MAP(imm_v128_shr_n_u64<24>),
- MAP(imm_v128_shr_n_u64<28>),
- MAP(imm_v128_shr_n_u64<32>),
- MAP(imm_v128_shr_n_u64<36>),
- MAP(imm_v128_shr_n_u64<40>),
- MAP(imm_v128_shr_n_u64<44>),
- MAP(imm_v128_shr_n_u64<48>),
- MAP(imm_v128_shr_n_u64<52>),
- MAP(imm_v128_shr_n_u64<56>),
- MAP(imm_v128_shr_n_u64<60>),
- MAP(imm_v128_shr_n_s64<1>),
- MAP(imm_v128_shr_n_s64<4>),
- MAP(imm_v128_shr_n_s64<8>),
- MAP(imm_v128_shr_n_s64<12>),
- MAP(imm_v128_shr_n_s64<16>),
- MAP(imm_v128_shr_n_s64<20>),
- MAP(imm_v128_shr_n_s64<24>),
- MAP(imm_v128_shr_n_s64<28>),
- MAP(imm_v128_shr_n_s64<32>),
- MAP(imm_v128_shr_n_s64<36>),
- MAP(imm_v128_shr_n_s64<40>),
- MAP(imm_v128_shr_n_s64<44>),
- MAP(imm_v128_shr_n_s64<48>),
- MAP(imm_v128_shr_n_s64<52>),
- MAP(imm_v128_shr_n_s64<56>),
- MAP(imm_v128_shr_n_s64<60>),
- MAP(v128_from_v64),
- MAP(v128_zip_8),
- MAP(v128_zip_16),
- MAP(v128_zip_32),
- MAP(v128_mul_s16),
- MAP(v128_unpack_u8_s16),
- MAP(v128_unpack_s8_s16),
- MAP(v128_unpack_u16_s32),
- MAP(v128_unpack_s16_s32),
- MAP(v128_shl_8),
- MAP(v128_shr_u8),
- MAP(v128_shr_s8),
- MAP(v128_shl_16),
- MAP(v128_shr_u16),
- MAP(v128_shr_s16),
- MAP(v128_shl_32),
- MAP(v128_shr_u32),
- MAP(v128_shr_s32),
- MAP(v128_shl_64),
- MAP(v128_shr_u64),
- MAP(v128_shr_s64),
- MAP(v128_hadd_u8),
- MAP(v128_dotp_su8),
- MAP(v128_dotp_s16),
- MAP(v128_dotp_s32),
- MAP(v128_low_u32),
- MAP(v128_low_v64),
- MAP(v128_high_v64),
- MAP(v128_from_64),
- MAP(v128_from_32),
- MAP(v128_movemask_8),
- MAP(v128_zero),
- MAP(v128_dup_8),
- MAP(v128_dup_16),
- MAP(v128_dup_32),
- MAP(v128_dup_64),
- MAP(v128_unpacklo_u8_s16),
- MAP(v128_unpackhi_u8_s16),
- MAP(v128_unpacklo_s8_s16),
- MAP(v128_unpackhi_s8_s16),
- MAP(v128_blend_8),
- MAP(u32_load_unaligned),
- MAP(u32_store_unaligned),
- MAP(v64_load_unaligned),
- MAP(v64_store_unaligned),
- MAP(v128_load_unaligned),
- MAP(v128_store_unaligned),
- MAP(v256_sad_u8),
- MAP(v256_ssd_u8),
- MAP(v256_sad_u16),
- MAP(v256_ssd_s16),
- MAP(v256_hadd_u8),
- MAP(v256_low_u64),
- MAP(v256_dotp_su8),
- MAP(v256_dotp_s16),
- MAP(v256_dotp_s32),
- MAP(v256_add_8),
- MAP(v256_add_16),
- MAP(v256_sadd_s8),
- MAP(v256_sadd_u8),
- MAP(v256_sadd_s16),
- MAP(v256_add_32),
- MAP(v256_add_64),
- MAP(v256_sub_8),
- MAP(v256_ssub_u8),
- MAP(v256_ssub_s8),
- MAP(v256_sub_16),
- MAP(v256_ssub_u16),
- MAP(v256_ssub_s16),
- MAP(v256_sub_32),
- MAP(v256_sub_64),
- MAP(v256_ziplo_8),
- MAP(v256_ziphi_8),
- MAP(v256_ziplo_16),
- MAP(v256_ziphi_16),
- MAP(v256_ziplo_32),
- MAP(v256_ziphi_32),
- MAP(v256_ziplo_64),
- MAP(v256_ziphi_64),
- MAP(v256_unziphi_8),
- MAP(v256_unziplo_8),
- MAP(v256_unziphi_16),
- MAP(v256_unziplo_16),
- MAP(v256_unziphi_32),
- MAP(v256_unziplo_32),
- MAP(v256_unziphi_64),
- MAP(v256_unziplo_64),
- MAP(v256_pack_s32_u16),
- MAP(v256_pack_s32_s16),
- MAP(v256_pack_s16_u8),
- MAP(v256_pack_s16_s8),
- MAP(v256_or),
- MAP(v256_xor),
- MAP(v256_and),
- MAP(v256_andn),
- MAP(v256_mullo_s16),
- MAP(v256_mulhi_s16),
- MAP(v256_mullo_s32),
- MAP(v256_madd_s16),
- MAP(v256_madd_us8),
- MAP(v256_avg_u8),
- MAP(v256_rdavg_u8),
- MAP(v256_rdavg_u16),
- MAP(v256_avg_u16),
- MAP(v256_min_u8),
- MAP(v256_max_u8),
- MAP(v256_min_s8),
- MAP(v256_max_s8),
- MAP(v256_min_s16),
- MAP(v256_max_s16),
- MAP(v256_min_s32),
- MAP(v256_max_s32),
- MAP(v256_cmpgt_s8),
- MAP(v256_cmplt_s8),
- MAP(v256_cmpeq_8),
- MAP(v256_cmpgt_s16),
- MAP(v256_cmplt_s16),
- MAP(v256_cmpeq_16),
- MAP(v256_cmpgt_s32),
- MAP(v256_cmplt_s32),
- MAP(v256_cmpeq_32),
- MAP(v256_shuffle_8),
- MAP(v256_pshuffle_8),
- MAP(v256_wideshuffle_8),
- MAP(imm_v256_align<1>),
- MAP(imm_v256_align<2>),
- MAP(imm_v256_align<3>),
- MAP(imm_v256_align<4>),
- MAP(imm_v256_align<5>),
- MAP(imm_v256_align<6>),
- MAP(imm_v256_align<7>),
- MAP(imm_v256_align<8>),
- MAP(imm_v256_align<9>),
- MAP(imm_v256_align<10>),
- MAP(imm_v256_align<11>),
- MAP(imm_v256_align<12>),
- MAP(imm_v256_align<13>),
- MAP(imm_v256_align<14>),
- MAP(imm_v256_align<15>),
- MAP(imm_v256_align<16>),
- MAP(imm_v256_align<17>),
- MAP(imm_v256_align<18>),
- MAP(imm_v256_align<19>),
- MAP(imm_v256_align<20>),
- MAP(imm_v256_align<21>),
- MAP(imm_v256_align<22>),
- MAP(imm_v256_align<23>),
- MAP(imm_v256_align<24>),
- MAP(imm_v256_align<25>),
- MAP(imm_v256_align<26>),
- MAP(imm_v256_align<27>),
- MAP(imm_v256_align<28>),
- MAP(imm_v256_align<29>),
- MAP(imm_v256_align<30>),
- MAP(imm_v256_align<31>),
- MAP(v256_from_v128),
- MAP(v256_zip_8),
- MAP(v256_zip_16),
- MAP(v256_zip_32),
- MAP(v256_mul_s16),
- MAP(v256_unpack_u8_s16),
- MAP(v256_unpack_s8_s16),
- MAP(v256_unpack_u16_s32),
- MAP(v256_unpack_s16_s32),
- MAP(v256_shl_8),
- MAP(v256_shr_u8),
- MAP(v256_shr_s8),
- MAP(v256_shl_16),
- MAP(v256_shr_u16),
- MAP(v256_shr_s16),
- MAP(v256_shl_32),
- MAP(v256_shr_u32),
- MAP(v256_shr_s32),
- MAP(v256_shl_64),
- MAP(v256_shr_u64),
- MAP(v256_shr_s64),
- MAP(v256_abs_s8),
- MAP(v256_abs_s16),
- MAP(v256_padd_u8),
- MAP(v256_padd_s16),
- MAP(v256_unpacklo_u16_s32),
- MAP(v256_unpacklo_s16_s32),
- MAP(v256_unpackhi_u16_s32),
- MAP(v256_unpackhi_s16_s32),
- MAP(imm_v256_shr_n_word<1>),
- MAP(imm_v256_shr_n_word<2>),
- MAP(imm_v256_shr_n_word<3>),
- MAP(imm_v256_shr_n_word<4>),
- MAP(imm_v256_shr_n_word<5>),
- MAP(imm_v256_shr_n_word<6>),
- MAP(imm_v256_shr_n_word<7>),
- MAP(imm_v256_shr_n_word<8>),
- MAP(imm_v256_shr_n_word<9>),
- MAP(imm_v256_shr_n_word<10>),
- MAP(imm_v256_shr_n_word<11>),
- MAP(imm_v256_shr_n_word<12>),
- MAP(imm_v256_shr_n_word<13>),
- MAP(imm_v256_shr_n_word<14>),
- MAP(imm_v256_shr_n_word<15>),
- MAP(imm_v256_shl_n_word<1>),
- MAP(imm_v256_shl_n_word<2>),
- MAP(imm_v256_shl_n_word<3>),
- MAP(imm_v256_shl_n_word<4>),
- MAP(imm_v256_shl_n_word<5>),
- MAP(imm_v256_shl_n_word<6>),
- MAP(imm_v256_shl_n_word<7>),
- MAP(imm_v256_shl_n_word<8>),
- MAP(imm_v256_shl_n_word<9>),
- MAP(imm_v256_shl_n_word<10>),
- MAP(imm_v256_shl_n_word<11>),
- MAP(imm_v256_shl_n_word<12>),
- MAP(imm_v256_shl_n_word<13>),
- MAP(imm_v256_shl_n_word<14>),
- MAP(imm_v256_shl_n_word<15>),
- MAP(imm_v256_shr_n_byte<1>),
- MAP(imm_v256_shr_n_byte<2>),
- MAP(imm_v256_shr_n_byte<3>),
- MAP(imm_v256_shr_n_byte<4>),
- MAP(imm_v256_shr_n_byte<5>),
- MAP(imm_v256_shr_n_byte<6>),
- MAP(imm_v256_shr_n_byte<7>),
- MAP(imm_v256_shr_n_byte<8>),
- MAP(imm_v256_shr_n_byte<9>),
- MAP(imm_v256_shr_n_byte<10>),
- MAP(imm_v256_shr_n_byte<11>),
- MAP(imm_v256_shr_n_byte<12>),
- MAP(imm_v256_shr_n_byte<13>),
- MAP(imm_v256_shr_n_byte<14>),
- MAP(imm_v256_shr_n_byte<15>),
- MAP(imm_v256_shr_n_byte<16>),
- MAP(imm_v256_shr_n_byte<17>),
- MAP(imm_v256_shr_n_byte<18>),
- MAP(imm_v256_shr_n_byte<19>),
- MAP(imm_v256_shr_n_byte<20>),
- MAP(imm_v256_shr_n_byte<21>),
- MAP(imm_v256_shr_n_byte<22>),
- MAP(imm_v256_shr_n_byte<23>),
- MAP(imm_v256_shr_n_byte<24>),
- MAP(imm_v256_shr_n_byte<25>),
- MAP(imm_v256_shr_n_byte<26>),
- MAP(imm_v256_shr_n_byte<27>),
- MAP(imm_v256_shr_n_byte<28>),
- MAP(imm_v256_shr_n_byte<29>),
- MAP(imm_v256_shr_n_byte<30>),
- MAP(imm_v256_shr_n_byte<31>),
- MAP(imm_v256_shl_n_byte<1>),
- MAP(imm_v256_shl_n_byte<2>),
- MAP(imm_v256_shl_n_byte<3>),
- MAP(imm_v256_shl_n_byte<4>),
- MAP(imm_v256_shl_n_byte<5>),
- MAP(imm_v256_shl_n_byte<6>),
- MAP(imm_v256_shl_n_byte<7>),
- MAP(imm_v256_shl_n_byte<8>),
- MAP(imm_v256_shl_n_byte<9>),
- MAP(imm_v256_shl_n_byte<10>),
- MAP(imm_v256_shl_n_byte<11>),
- MAP(imm_v256_shl_n_byte<12>),
- MAP(imm_v256_shl_n_byte<13>),
- MAP(imm_v256_shl_n_byte<14>),
- MAP(imm_v256_shl_n_byte<15>),
- MAP(imm_v256_shl_n_byte<16>),
- MAP(imm_v256_shl_n_byte<17>),
- MAP(imm_v256_shl_n_byte<18>),
- MAP(imm_v256_shl_n_byte<19>),
- MAP(imm_v256_shl_n_byte<20>),
- MAP(imm_v256_shl_n_byte<21>),
- MAP(imm_v256_shl_n_byte<22>),
- MAP(imm_v256_shl_n_byte<23>),
- MAP(imm_v256_shl_n_byte<24>),
- MAP(imm_v256_shl_n_byte<25>),
- MAP(imm_v256_shl_n_byte<26>),
- MAP(imm_v256_shl_n_byte<27>),
- MAP(imm_v256_shl_n_byte<28>),
- MAP(imm_v256_shl_n_byte<29>),
- MAP(imm_v256_shl_n_byte<30>),
- MAP(imm_v256_shl_n_byte<31>),
- MAP(imm_v256_shl_n_8<1>),
- MAP(imm_v256_shl_n_8<2>),
- MAP(imm_v256_shl_n_8<3>),
- MAP(imm_v256_shl_n_8<4>),
- MAP(imm_v256_shl_n_8<5>),
- MAP(imm_v256_shl_n_8<6>),
- MAP(imm_v256_shl_n_8<7>),
- MAP(imm_v256_shr_n_u8<1>),
- MAP(imm_v256_shr_n_u8<2>),
- MAP(imm_v256_shr_n_u8<3>),
- MAP(imm_v256_shr_n_u8<4>),
- MAP(imm_v256_shr_n_u8<5>),
- MAP(imm_v256_shr_n_u8<6>),
- MAP(imm_v256_shr_n_u8<7>),
- MAP(imm_v256_shr_n_s8<1>),
- MAP(imm_v256_shr_n_s8<2>),
- MAP(imm_v256_shr_n_s8<3>),
- MAP(imm_v256_shr_n_s8<4>),
- MAP(imm_v256_shr_n_s8<5>),
- MAP(imm_v256_shr_n_s8<6>),
- MAP(imm_v256_shr_n_s8<7>),
- MAP(imm_v256_shl_n_16<1>),
- MAP(imm_v256_shl_n_16<2>),
- MAP(imm_v256_shl_n_16<4>),
- MAP(imm_v256_shl_n_16<6>),
- MAP(imm_v256_shl_n_16<8>),
- MAP(imm_v256_shl_n_16<10>),
- MAP(imm_v256_shl_n_16<12>),
- MAP(imm_v256_shl_n_16<14>),
- MAP(imm_v256_shr_n_u16<1>),
- MAP(imm_v256_shr_n_u16<2>),
- MAP(imm_v256_shr_n_u16<4>),
- MAP(imm_v256_shr_n_u16<6>),
- MAP(imm_v256_shr_n_u16<8>),
- MAP(imm_v256_shr_n_u16<10>),
- MAP(imm_v256_shr_n_u16<12>),
- MAP(imm_v256_shr_n_u16<14>),
- MAP(imm_v256_shr_n_s16<1>),
- MAP(imm_v256_shr_n_s16<2>),
- MAP(imm_v256_shr_n_s16<4>),
- MAP(imm_v256_shr_n_s16<6>),
- MAP(imm_v256_shr_n_s16<8>),
- MAP(imm_v256_shr_n_s16<10>),
- MAP(imm_v256_shr_n_s16<12>),
- MAP(imm_v256_shr_n_s16<14>),
- MAP(imm_v256_shl_n_32<1>),
- MAP(imm_v256_shl_n_32<4>),
- MAP(imm_v256_shl_n_32<8>),
- MAP(imm_v256_shl_n_32<12>),
- MAP(imm_v256_shl_n_32<16>),
- MAP(imm_v256_shl_n_32<20>),
- MAP(imm_v256_shl_n_32<24>),
- MAP(imm_v256_shl_n_32<28>),
- MAP(imm_v256_shr_n_u32<1>),
- MAP(imm_v256_shr_n_u32<4>),
- MAP(imm_v256_shr_n_u32<8>),
- MAP(imm_v256_shr_n_u32<12>),
- MAP(imm_v256_shr_n_u32<16>),
- MAP(imm_v256_shr_n_u32<20>),
- MAP(imm_v256_shr_n_u32<24>),
- MAP(imm_v256_shr_n_u32<28>),
- MAP(imm_v256_shr_n_s32<1>),
- MAP(imm_v256_shr_n_s32<4>),
- MAP(imm_v256_shr_n_s32<8>),
- MAP(imm_v256_shr_n_s32<12>),
- MAP(imm_v256_shr_n_s32<16>),
- MAP(imm_v256_shr_n_s32<20>),
- MAP(imm_v256_shr_n_s32<24>),
- MAP(imm_v256_shr_n_s32<28>),
- MAP(imm_v256_shl_n_64<1>),
- MAP(imm_v256_shl_n_64<4>),
- MAP(imm_v256_shl_n_64<8>),
- MAP(imm_v256_shl_n_64<12>),
- MAP(imm_v256_shl_n_64<16>),
- MAP(imm_v256_shl_n_64<20>),
- MAP(imm_v256_shl_n_64<24>),
- MAP(imm_v256_shl_n_64<28>),
- MAP(imm_v256_shl_n_64<32>),
- MAP(imm_v256_shl_n_64<36>),
- MAP(imm_v256_shl_n_64<40>),
- MAP(imm_v256_shl_n_64<44>),
- MAP(imm_v256_shl_n_64<48>),
- MAP(imm_v256_shl_n_64<52>),
- MAP(imm_v256_shl_n_64<56>),
- MAP(imm_v256_shl_n_64<60>),
- MAP(imm_v256_shr_n_u64<1>),
- MAP(imm_v256_shr_n_u64<4>),
- MAP(imm_v256_shr_n_u64<8>),
- MAP(imm_v256_shr_n_u64<12>),
- MAP(imm_v256_shr_n_u64<16>),
- MAP(imm_v256_shr_n_u64<20>),
- MAP(imm_v256_shr_n_u64<24>),
- MAP(imm_v256_shr_n_u64<28>),
- MAP(imm_v256_shr_n_u64<32>),
- MAP(imm_v256_shr_n_u64<36>),
- MAP(imm_v256_shr_n_u64<40>),
- MAP(imm_v256_shr_n_u64<44>),
- MAP(imm_v256_shr_n_u64<48>),
- MAP(imm_v256_shr_n_u64<52>),
- MAP(imm_v256_shr_n_u64<56>),
- MAP(imm_v256_shr_n_u64<60>),
- MAP(imm_v256_shr_n_s64<1>),
- MAP(imm_v256_shr_n_s64<4>),
- MAP(imm_v256_shr_n_s64<8>),
- MAP(imm_v256_shr_n_s64<12>),
- MAP(imm_v256_shr_n_s64<16>),
- MAP(imm_v256_shr_n_s64<20>),
- MAP(imm_v256_shr_n_s64<24>),
- MAP(imm_v256_shr_n_s64<28>),
- MAP(imm_v256_shr_n_s64<32>),
- MAP(imm_v256_shr_n_s64<36>),
- MAP(imm_v256_shr_n_s64<40>),
- MAP(imm_v256_shr_n_s64<44>),
- MAP(imm_v256_shr_n_s64<48>),
- MAP(imm_v256_shr_n_s64<52>),
- MAP(imm_v256_shr_n_s64<56>),
- MAP(imm_v256_shr_n_s64<60>),
- MAP(v256_movemask_8),
- MAP(v256_zero),
- MAP(v256_dup_8),
- MAP(v256_dup_16),
- MAP(v256_dup_32),
- MAP(v256_dup_64),
- MAP(v256_low_u32),
- MAP(v256_low_v64),
- MAP(v256_from_64),
- MAP(v256_from_v64),
- MAP(v256_ziplo_128),
- MAP(v256_ziphi_128),
- MAP(v256_unpacklo_u8_s16),
- MAP(v256_unpackhi_u8_s16),
- MAP(v256_unpacklo_s8_s16),
- MAP(v256_unpackhi_s8_s16),
- MAP(v256_blend_8),
- { NULL, NULL, NULL } };
-#undef MAP
-
-// Map reference functions to machine tuned functions. Since the
-// functions depend on machine tuned types, the non-machine tuned
-// instantiations of the test can't refer to these functions directly,
-// so we refer to them by name and do the mapping here.
-void Map(const char *name, fptr *ref, fptr *simd) {
- unsigned int i;
- for (i = 0; m[i].name && strcmp(name, m[i].name); i++) {
- }
-
- *ref = m[i].ref;
- *simd = m[i].simd;
-}
-
-// Used for printing errors in TestSimd1Arg, TestSimd2Args and TestSimd3Args
-std::string Print(const uint8_t *a, int size) {
- std::string text = "0x";
- for (int i = 0; i < size; i++) {
- const uint8_t c = a[!CONFIG_BIG_ENDIAN ? size - 1 - i : i];
- // Same as snprintf(..., ..., "%02x", c)
- text += (c >> 4) + '0' + ((c >> 4) > 9) * ('a' - '0' - 10);
- text += (c & 15) + '0' + ((c & 15) > 9) * ('a' - '0' - 10);
- }
-
- return text;
-}
-
-// Used in TestSimd1Arg, TestSimd2Args and TestSimd3Args to restrict argument
-// ranges
-void SetMask(uint8_t *s, int size, uint32_t mask, uint32_t maskwidth) {
- switch (maskwidth) {
- case 0: {
- break;
- }
- case 8: {
- for (int i = 0; i < size; i++) s[i] &= mask;
- break;
- }
- case 16: {
- uint16_t *t = reinterpret_cast<uint16_t *>(s);
- assert(!(reinterpret_cast<uintptr_t>(s) & 1));
- for (int i = 0; i < size / 2; i++) t[i] &= mask;
- break;
- }
- case 32: {
- uint32_t *t = reinterpret_cast<uint32_t *>(s);
- assert(!(reinterpret_cast<uintptr_t>(s) & 3));
- for (int i = 0; i < size / 4; i++) t[i] &= mask;
- break;
- }
- case 64: {
- uint64_t *t = reinterpret_cast<uint64_t *>(s);
- assert(!(reinterpret_cast<uintptr_t>(s) & 7));
- for (int i = 0; i < size / 8; i++) t[i] &= mask;
- break;
- }
- default: {
- FAIL() << "Unsupported mask width";
- break;
- }
- }
-}
-
-// We need some extra load/store functions
-void u64_store_aligned(void *p, uint64_t a) {
- v64_store_aligned(p, v64_from_64(a));
-}
-void s32_store_aligned(void *p, int32_t a) {
- u32_store_aligned(p, static_cast<uint32_t>(a));
-}
-void s64_store_aligned(void *p, int64_t a) {
- v64_store_aligned(p, v64_from_64(static_cast<uint64_t>(a)));
-}
-
-void c_u64_store_aligned(void *p, uint64_t a) {
- c_v64_store_aligned(p, c_v64_from_64(a));
-}
-
-void c_s32_store_aligned(void *p, int32_t a) {
- c_u32_store_aligned(p, static_cast<uint32_t>(a));
-}
-
-void c_s64_store_aligned(void *p, int64_t a) {
- c_v64_store_aligned(p, c_v64_from_64(static_cast<uint64_t>(a)));
-}
-
-uint64_t u64_load_aligned(const void *p) {
- return v64_u64(v64_load_aligned(p));
-}
-uint16_t u16_load_aligned(const void *p) {
- return *(reinterpret_cast<const uint16_t *>(p));
-}
-uint8_t u8_load_aligned(const void *p) {
- return *(reinterpret_cast<const uint8_t *>(p));
-}
-
-uint64_t c_u64_load_aligned(const void *p) {
- return c_v64_u64(c_v64_load_aligned(p));
-}
-uint16_t c_u16_load_aligned(const void *p) {
- return *(reinterpret_cast<const uint16_t *>(p));
-}
-uint8_t c_u8_load_aligned(const void *p) {
- return *(reinterpret_cast<const uint8_t *>(p));
-}
-
-// CompareSimd1Arg, CompareSimd2Args and CompareSimd3Args compare
-// intrinsics taking 1, 2 or 3 arguments respectively with their
-// corresponding C reference. Ideally, the loads and stores should
-// have gone into the template parameter list, but v64 and v128 could
-// be typedef'ed to the same type (which is the case on x86) and then
-// we can't instantiate both v64 and v128, so the function return and
-// argument types, including the always differing types in the C
-// equivalent are used instead. The function arguments must be void
-// pointers and then go through a cast to avoid matching errors in the
-// branches eliminated by the typeid tests in the calling function.
-template <typename Ret, typename Arg, typename CRet, typename CArg>
-int CompareSimd1Arg(fptr store, fptr load, fptr simd, void *d, fptr c_store,
- fptr c_load, fptr c_simd, void *ref_d, const void *a) {
- void (*const my_store)(void *, Ret) = (void (*const)(void *, Ret))store;
- Arg (*const my_load)(const void *) = (Arg(*const)(const void *))load;
- Ret (*const my_simd)(Arg) = (Ret(*const)(Arg))simd;
- void (*const my_c_store)(void *, CRet) = (void (*const)(void *, CRet))c_store;
- CArg (*const my_c_load)(const void *) = (CArg(*const)(const void *))c_load;
- CRet (*const my_c_simd)(CArg) = (CRet(*const)(CArg))c_simd;
-
- // Call reference and intrinsic
- my_c_store(ref_d, my_c_simd(my_c_load(a)));
- my_store(d, my_simd(my_load(a)));
-
- // Compare results
- return memcmp(ref_d, d, sizeof(CRet));
-}
-
-template <typename Ret, typename Arg1, typename Arg2, typename CRet,
- typename CArg1, typename CArg2>
-int CompareSimd2Args(fptr store, fptr load1, fptr load2, fptr simd, void *d,
- fptr c_store, fptr c_load1, fptr c_load2, fptr c_simd,
- void *ref_d, const void *a, const void *b) {
- void (*const my_store)(void *, Ret) = (void (*const)(void *, Ret))store;
- Arg1 (*const my_load1)(const void *) = (Arg1(*const)(const void *))load1;
- Arg2 (*const my_load2)(const void *) = (Arg2(*const)(const void *))load2;
- Ret (*const my_simd)(Arg1, Arg2) = (Ret(*const)(Arg1, Arg2))simd;
- void (*const my_c_store)(void *, CRet) = (void (*const)(void *, CRet))c_store;
- CArg1 (*const my_c_load1)(const void *) =
- (CArg1(*const)(const void *))c_load1;
- CArg2 (*const my_c_load2)(const void *) =
- (CArg2(*const)(const void *))c_load2;
- CRet (*const my_c_simd)(CArg1, CArg2) = (CRet(*const)(CArg1, CArg2))c_simd;
-
- // Call reference and intrinsic
- my_c_store(ref_d, my_c_simd(my_c_load1(a), my_c_load2(b)));
- my_store(d, my_simd(my_load1(a), my_load2(b)));
-
- // Compare results
- return memcmp(ref_d, d, sizeof(CRet));
-}
-
-template <typename Ret, typename Arg1, typename Arg2, typename Arg3,
- typename CRet, typename CArg1, typename CArg2, typename CArg3>
-int CompareSimd3Args(fptr store, fptr load1, fptr load2, fptr load3, fptr simd,
- void *d, fptr c_store, fptr c_load1, fptr c_load2,
- fptr c_load3, fptr c_simd, void *ref_d, const void *a,
- const void *b, const void *c) {
- void (*const my_store)(void *, Ret) = (void (*const)(void *, Ret))store;
- Arg1 (*const my_load1)(const void *) = (Arg1(*const)(const void *))load1;
- Arg2 (*const my_load2)(const void *) = (Arg2(*const)(const void *))load2;
- Arg3 (*const my_load3)(const void *) = (Arg3(*const)(const void *))load3;
- Ret (*const my_simd)(Arg1, Arg2, Arg3) = (Ret(*const)(Arg1, Arg2, Arg3))simd;
- void (*const my_c_store)(void *, CRet) = (void (*const)(void *, CRet))c_store;
- CArg1 (*const my_c_load1)(const void *) =
- (CArg1(*const)(const void *))c_load1;
- CArg2 (*const my_c_load2)(const void *) =
- (CArg2(*const)(const void *))c_load2;
- CArg2 (*const my_c_load3)(const void *) =
- (CArg2(*const)(const void *))c_load3;
- CRet (*const my_c_simd)(CArg1, CArg2, CArg3) =
- (CRet(*const)(CArg1, CArg2, CArg3))c_simd;
-
- // Call reference and intrinsic
- my_c_store(ref_d, my_c_simd(my_c_load1(a), my_c_load2(b), my_c_load3(c)));
- my_store(d, my_simd(my_load1(a), my_load2(b), my_load3(c)));
-
- // Compare results
- return memcmp(ref_d, d, sizeof(CRet));
-}
-
-} // namespace
-
-template <typename CRet, typename CArg>
-void TestSimd1Arg(uint32_t iterations, uint32_t mask, uint32_t maskwidth,
- const char *name) {
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- fptr ref_simd;
- fptr simd;
- int error = 0;
- DECLARE_ALIGNED(32, uint8_t, s[32]);
- DECLARE_ALIGNED(32, uint8_t, d[32]);
- DECLARE_ALIGNED(32, uint8_t, ref_d[32]);
- assert(sizeof(CArg) <= 32 && sizeof(CRet) <= 32);
- memset(ref_d, 0, sizeof(ref_d));
- memset(d, 0, sizeof(d));
-
- Map(name, &ref_simd, &simd);
- if (simd == NULL || ref_simd == NULL) {
- FAIL() << "Internal error: Unknown intrinsic function " << name;
- }
- for (unsigned int count = 0;
- count < iterations && !error && !testing::Test::HasFailure(); count++) {
- for (unsigned int c = 0; c < sizeof(CArg); c++) s[c] = rnd.Rand8();
-
- if (maskwidth) {
- SetMask(s, sizeof(CArg), mask, maskwidth);
- }
-
- if (typeid(CRet) == typeid(c_v64) && typeid(CArg) == typeid(c_v64)) {
- // V64_V64
- error = CompareSimd1Arg<v64, v64, CRet, CArg>(
- reinterpret_cast<fptr>(v64_store_aligned),
- reinterpret_cast<fptr>(v64_load_aligned), simd, d,
- reinterpret_cast<fptr>(c_v64_store_aligned),
- reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s);
- } else if (typeid(CRet) == typeid(c_v64) &&
- typeid(CArg) == typeid(uint8_t)) {
- // V64_U8
- error = CompareSimd1Arg<v64, uint8_t, CRet, CArg>(
- reinterpret_cast<fptr>(v64_store_aligned),
- reinterpret_cast<fptr>(u8_load_aligned), simd, d,
- reinterpret_cast<fptr>(c_v64_store_aligned),
- reinterpret_cast<fptr>(c_u8_load_aligned), ref_simd, ref_d, s);
- } else if (typeid(CRet) == typeid(c_v64) &&
- typeid(CArg) == typeid(uint16_t)) {
- // V64_U16
- error = CompareSimd1Arg<v64, uint16_t, CRet, CArg>(
- reinterpret_cast<fptr>(v64_store_aligned),
- reinterpret_cast<fptr>(u16_load_aligned), simd, d,
- reinterpret_cast<fptr>(c_v64_store_aligned),
- reinterpret_cast<fptr>(c_u16_load_aligned), ref_simd, ref_d, s);
- } else if (typeid(CRet) == typeid(c_v64) &&
- typeid(CArg) == typeid(uint32_t)) {
- // V64_U32
- error = CompareSimd1Arg<v64, uint32_t, CRet, CArg>(
- reinterpret_cast<fptr>(v64_store_aligned),
- reinterpret_cast<fptr>(u32_load_aligned), simd, d,
- reinterpret_cast<fptr>(c_v64_store_aligned),
- reinterpret_cast<fptr>(c_u32_load_aligned), ref_simd, ref_d, s);
- } else if (typeid(CRet) == typeid(uint64_t) &&
- typeid(CArg) == typeid(c_v64)) {
- // U64_V64
- error = CompareSimd1Arg<uint64_t, v64, CRet, CArg>(
- reinterpret_cast<fptr>(u64_store_aligned),
- reinterpret_cast<fptr>(v64_load_aligned), simd, d,
- reinterpret_cast<fptr>(c_u64_store_aligned),
- reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s);
- } else if (typeid(CRet) == typeid(int64_t) &&
- typeid(CArg) == typeid(c_v64)) {
- // S64_V64
- error = CompareSimd1Arg<int64_t, v64, CRet, CArg>(
- reinterpret_cast<fptr>(s64_store_aligned),
- reinterpret_cast<fptr>(v64_load_aligned), simd, d,
- reinterpret_cast<fptr>(c_s64_store_aligned),
- reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s);
- } else if (typeid(CRet) == typeid(uint32_t) &&
- typeid(CArg) == typeid(c_v64)) {
- // U32_V64
- error = CompareSimd1Arg<uint32_t, v64, CRet, CArg>(
- reinterpret_cast<fptr>(u32_store_aligned),
- reinterpret_cast<fptr>(v64_load_aligned), simd, d,
- reinterpret_cast<fptr>(c_u32_store_aligned),
- reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s);
- } else if (typeid(CRet) == typeid(int32_t) &&
- typeid(CArg) == typeid(c_v64)) {
- // S32_V64
- error = CompareSimd1Arg<int32_t, v64, CRet, CArg>(
- reinterpret_cast<fptr>(s32_store_aligned),
- reinterpret_cast<fptr>(v64_load_aligned), simd, d,
- reinterpret_cast<fptr>(c_s32_store_aligned),
- reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s);
- } else if (typeid(CRet) == typeid(uint32_t) &&
- typeid(CArg) == typeid(c_v128)) {
- // U32_V128
- error = CompareSimd1Arg<uint32_t, v128, CRet, CArg>(
- reinterpret_cast<fptr>(u32_store_aligned),
- reinterpret_cast<fptr>(v128_load_aligned), simd, d,
- reinterpret_cast<fptr>(c_u32_store_aligned),
- reinterpret_cast<fptr>(c_v128_load_aligned), ref_simd, ref_d, s);
- } else if (typeid(CRet) == typeid(uint64_t) &&
- typeid(CArg) == typeid(c_v128)) {
- // U64_V128
- error = CompareSimd1Arg<uint64_t, v128, CRet, CArg>(
- reinterpret_cast<fptr>(u64_store_aligned),
- reinterpret_cast<fptr>(v128_load_aligned), simd, d,
- reinterpret_cast<fptr>(c_u64_store_aligned),
- reinterpret_cast<fptr>(c_v128_load_aligned), ref_simd, ref_d, s);
- } else if (typeid(CRet) == typeid(uint64_t) &&
- typeid(CArg) == typeid(c_v256)) {
- // U64_V256
- error = CompareSimd1Arg<uint64_t, v256, CRet, CArg>(
- reinterpret_cast<fptr>(u64_store_aligned),
- reinterpret_cast<fptr>(v256_load_aligned), simd, d,
- reinterpret_cast<fptr>(c_u64_store_aligned),
- reinterpret_cast<fptr>(c_v256_load_aligned), ref_simd, ref_d, s);
- } else if (typeid(CRet) == typeid(c_v64) &&
- typeid(CArg) == typeid(c_v128)) {
- // V64_V128
- error = CompareSimd1Arg<v64, v128, CRet, CArg>(
- reinterpret_cast<fptr>(v64_store_aligned),
- reinterpret_cast<fptr>(v128_load_aligned), simd, d,
- reinterpret_cast<fptr>(c_v64_store_aligned),
- reinterpret_cast<fptr>(c_v128_load_aligned), ref_simd, ref_d, s);
- } else if (typeid(CRet) == typeid(c_v128) &&
- typeid(CArg) == typeid(c_v128)) {
- // V128_V128
- error = CompareSimd1Arg<v128, v128, CRet, CArg>(
- reinterpret_cast<fptr>(v128_store_aligned),
- reinterpret_cast<fptr>(v128_load_aligned), simd, d,
- reinterpret_cast<fptr>(c_v128_store_aligned),
- reinterpret_cast<fptr>(c_v128_load_aligned), ref_simd, ref_d, s);
- } else if (typeid(CRet) == typeid(c_v128) &&
- typeid(CArg) == typeid(c_v64)) {
- // V128_V64
- error = CompareSimd1Arg<v128, v64, CRet, CArg>(
- reinterpret_cast<fptr>(v128_store_aligned),
- reinterpret_cast<fptr>(v64_load_aligned), simd, d,
- reinterpret_cast<fptr>(c_v128_store_aligned),
- reinterpret_cast<fptr>(c_v64_load_aligned), ref_simd, ref_d, s);
- } else if (typeid(CRet) == typeid(c_v128) &&
- typeid(CArg) == typeid(uint8_t)) {
- // V128_U8
- error = CompareSimd1Arg<v128, uint8_t, CRet, CArg>(
- reinterpret_cast<fptr>(v128_store_aligned),
- reinterpret_cast<fptr>(u8_load_aligned), simd, d,
- reinterpret_cast<fptr>(c_v128_store_aligned),
- reinterpret_cast<fptr>(c_u8_load_aligned), ref_simd, ref_d, s);
- } else if (typeid(CRet) == typeid(c_v128) &&
- typeid(CArg) == typeid(uint16_t)) {
- // V128_U16
- error = CompareSimd1Arg<v128, uint16_t, CRet, CArg>(
- reinterpret_cast<fptr>(v128_store_aligned),
- reinterpret_cast<fptr>(u16_load_aligned), simd, d,
- reinterpret_cast<fptr>(c_v128_store_aligned),
- reinterpret_cast<fptr>(c_u16_load_aligned), ref_simd, ref_d, s);
- } else if (typeid(CRet) == typeid(c_v128) &&
- typeid(CArg) == typeid(uint32_t)) {
- // V128_U32
- error = CompareSimd1Arg<v128, uint32_t, CRet, CArg>(
- reinterpret_cast<fptr>(v128_store_aligned),
- reinterpret_cast<fptr>(u32_load_aligned), simd, d,
- reinterpret_cast<fptr>(c_v128_store_aligned),
- reinterpret_cast<fptr>(c_u32_load_aligned), ref_simd, ref_d, s);
- } else if (typeid(CRet) == typeid(c_v128) &&
- typeid(CArg) == typeid(uint64_t)) {
- // V128_U64
- error = CompareSimd1Arg<v128, uint64_t, CRet, CArg>(
- reinterpret_cast<fptr>(v128_store_aligned),
- reinterpret_cast<fptr>(u64_load_aligned), simd, d,
- reinterpret_cast<fptr>(c_v128_store_aligned),
- reinterpret_cast<fptr>(c_u64_load_aligned), ref_simd, ref_d, s);
- } else if (typeid(CRet) == typeid(c_v256) &&
- typeid(CArg) == typeid(c_v256)) {
- // V256_V256
- error = CompareSimd1Arg<v256, v256, CRet, CArg>(
- reinterpret_cast<fptr>(v256_store_aligned),
- reinterpret_cast<fptr>(v256_load_aligned), simd, d,
- reinterpret_cast<fptr>(c_v256_store_aligned),
- reinterpret_cast<fptr>(c_v256_load_aligned), ref_simd, ref_d, s);
- } else if (typeid(CRet) == typeid(c_v256) &&
- typeid(CArg) == typeid(c_v128)) {
- // V256_V128
- error = CompareSimd1Arg<v256, v128, CRet, CArg>(
- reinterpret_cast<fptr>(v256_store_aligned),
- reinterpret_cast<fptr>(v128_load_aligned), simd, d,
- reinterpret_cast<fptr>(c_v256_store_aligned),
- reinterpret_cast<fptr>(c_v128_load_aligned), ref_simd, ref_d, s);
- } else if (typeid(CRet) == typeid(c_v256) &&
- typeid(CArg) == typeid(uint8_t)) {
- // V256_U8
- error = CompareSimd1Arg<v256, uint8_t, CRet, CArg>(
- reinterpret_cast<fptr>(v256_store_aligned),
- reinterpret_cast<fptr>(u8_load_aligned), simd, d,
- reinterpret_cast<fptr>(c_v256_store_aligned),
- reinterpret_cast<fptr>(c_u8_load_aligned), ref_simd, ref_d, s);
- } else if (typeid(CRet) == typeid(c_v256) &&
- typeid(CArg) == typeid(uint16_t)) {
- // V256_U16
- error = CompareSimd1Arg<v256, uint16_t, CRet, CArg>(
- reinterpret_cast<fptr>(v256_store_aligned),
- reinterpret_cast<fptr>(u16_load_aligned), simd, d,
- reinterpret_cast<fptr>(c_v256_store_aligned),
- reinterpret_cast<fptr>(c_u16_load_aligned), ref_simd, ref_d, s);
- } else if (typeid(CRet) == typeid(c_v256) &&
- typeid(CArg) == typeid(uint32_t)) {
- // V256_U32
- error = CompareSimd1Arg<v256, uint32_t, CRet, CArg>(
- reinterpret_cast<fptr>(v256_store_aligned),
- reinterpret_cast<fptr>(u32_load_aligned), simd, d,
- reinterpret_cast<fptr>(c_v256_store_aligned),
- reinterpret_cast<fptr>(c_u32_load_aligned), ref_simd, ref_d, s);
- } else if (typeid(CRet) == typeid(c_v256) &&
- typeid(CArg) == typeid(uint64_t)) {
- // V256_U64
- error = CompareSimd1Arg<v256, uint64_t, CRet, CArg>(
- reinterpret_cast<fptr>(v256_store_aligned),
- reinterpret_cast<fptr>(u64_load_aligned), simd, d,
- reinterpret_cast<fptr>(c_v256_store_aligned),
- reinterpret_cast<fptr>(c_u64_load_aligned), ref_simd, ref_d, s);
- } else if (typeid(CRet) == typeid(uint32_t) &&
- typeid(CArg) == typeid(c_v256)) {
- // U32_V256
- error = CompareSimd1Arg<uint32_t, v256, CRet, CArg>(
- reinterpret_cast<fptr>(u32_store_aligned),
- reinterpret_cast<fptr>(v256_load_aligned), simd, d,
- reinterpret_cast<fptr>(c_u32_store_aligned),
- reinterpret_cast<fptr>(c_v256_load_aligned), ref_simd, ref_d, s);
- } else if (typeid(CRet) == typeid(c_v64) &&
- typeid(CArg) == typeid(c_v256)) {
- // V64_V256
- error = CompareSimd1Arg<v64, v256, CRet, CArg>(
- reinterpret_cast<fptr>(v64_store_aligned),
- reinterpret_cast<fptr>(v256_load_aligned), simd, d,
- reinterpret_cast<fptr>(c_v64_store_aligned),
- reinterpret_cast<fptr>(c_v256_load_aligned), ref_simd, ref_d, s);
- } else {
- FAIL() << "Internal error: Unknown intrinsic function "
- << typeid(CRet).name() << " " << name << "(" << typeid(CArg).name()
- << ")";
- }
- }
-
- EXPECT_EQ(0, error) << "Error: mismatch for " << name << "("
- << Print(s, sizeof(s)) << ") -> " << Print(d, sizeof(d))
- << " (simd), " << Print(ref_d, sizeof(ref_d)) << " (ref)";
-}
-
-template <typename CRet, typename CArg1, typename CArg2>
-void TestSimd2Args(uint32_t iterations, uint32_t mask, uint32_t maskwidth,
- const char *name) {
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- fptr ref_simd;
- fptr simd;
- int error = 0;
- DECLARE_ALIGNED(32, uint8_t, s1[32]);
- DECLARE_ALIGNED(32, uint8_t, s2[32]);
- DECLARE_ALIGNED(32, uint8_t, d[32]);
- DECLARE_ALIGNED(32, uint8_t, ref_d[32]);
- assert(sizeof(CArg1) <= 32 && sizeof(CArg2) <= 32 && sizeof(CRet) <= 32);
- memset(ref_d, 0, sizeof(ref_d));
- memset(d, 0, sizeof(d));
-
- Map(name, &ref_simd, &simd);
- if (simd == NULL || ref_simd == NULL) {
- FAIL() << "Internal error: Unknown intrinsic function " << name;
- }
-
- for (unsigned int count = 0;
- count < iterations && !error && !testing::Test::HasFailure(); count++) {
- for (unsigned int c = 0; c < sizeof(CArg1); c++) s1[c] = rnd.Rand8();
-
- for (unsigned int c = 0; c < sizeof(CArg2); c++) s2[c] = rnd.Rand8();
-
- if (maskwidth) SetMask(s2, sizeof(CArg2), mask, maskwidth);
-
- if (typeid(CRet) == typeid(c_v64) && typeid(CArg1) == typeid(c_v64) &&
- typeid(CArg2) == typeid(c_v64)) {
- // V64_V64V64
- error = CompareSimd2Args<v64, v64, v64, CRet, CArg1, CArg2>(
- reinterpret_cast<fptr>(v64_store_aligned),
- reinterpret_cast<fptr>(v64_load_aligned),
- reinterpret_cast<fptr>(v64_load_aligned), simd, d,
- reinterpret_cast<fptr>(c_v64_store_aligned),
- reinterpret_cast<fptr>(c_v64_load_aligned),
- reinterpret_cast<fptr>(c_v64_load_aligned),
- reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
- } else if (typeid(CRet) == typeid(c_v64) &&
- typeid(CArg1) == typeid(uint32_t) &&
- typeid(CArg2) == typeid(uint32_t)) {
- // V64_U32U32
- error = CompareSimd2Args<v64, uint32_t, uint32_t, CRet, CArg1, CArg2>(
- reinterpret_cast<fptr>(v64_store_aligned),
- reinterpret_cast<fptr>(u32_load_aligned),
- reinterpret_cast<fptr>(u32_load_aligned), simd, d,
- reinterpret_cast<fptr>(c_v64_store_aligned),
- reinterpret_cast<fptr>(c_u32_load_aligned),
- reinterpret_cast<fptr>(c_u32_load_aligned),
- reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
- } else if (typeid(CRet) == typeid(uint32_t) &&
- typeid(CArg1) == typeid(c_v64) &&
- typeid(CArg2) == typeid(c_v64)) {
- // U32_V64V64
- error = CompareSimd2Args<uint32_t, v64, v64, CRet, CArg1, CArg2>(
- reinterpret_cast<fptr>(u32_store_aligned),
- reinterpret_cast<fptr>(v64_load_aligned),
- reinterpret_cast<fptr>(v64_load_aligned), simd, d,
- reinterpret_cast<fptr>(c_u32_store_aligned),
- reinterpret_cast<fptr>(c_v64_load_aligned),
- reinterpret_cast<fptr>(c_v64_load_aligned),
- reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
- } else if (typeid(CRet) == typeid(int64_t) &&
- typeid(CArg1) == typeid(c_v64) &&
- typeid(CArg2) == typeid(c_v64)) {
- // S64_V64V64
- error = CompareSimd2Args<int64_t, v64, v64, CRet, CArg1, CArg2>(
- reinterpret_cast<fptr>(s64_store_aligned),
- reinterpret_cast<fptr>(v64_load_aligned),
- reinterpret_cast<fptr>(v64_load_aligned), simd, d,
- reinterpret_cast<fptr>(c_s64_store_aligned),
- reinterpret_cast<fptr>(c_v64_load_aligned),
- reinterpret_cast<fptr>(c_v64_load_aligned),
- reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
- } else if (typeid(CRet) == typeid(c_v64) &&
- typeid(CArg1) == typeid(c_v64) &&
- typeid(CArg2) == typeid(uint32_t)) {
- // V64_V64U32
- error = CompareSimd2Args<v64, v64, uint32_t, CRet, CArg1, CArg2>(
- reinterpret_cast<fptr>(v64_store_aligned),
- reinterpret_cast<fptr>(v64_load_aligned),
- reinterpret_cast<fptr>(u32_load_aligned), simd, d,
- reinterpret_cast<fptr>(c_v64_store_aligned),
- reinterpret_cast<fptr>(c_v64_load_aligned),
- reinterpret_cast<fptr>(c_u32_load_aligned),
- reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
- } else if (typeid(CRet) == typeid(c_v128) &&
- typeid(CArg1) == typeid(c_v128) &&
- typeid(CArg2) == typeid(c_v128)) {
- // V128_V128V128
- error = CompareSimd2Args<v128, v128, v128, CRet, CArg1, CArg2>(
- reinterpret_cast<fptr>(v128_store_aligned),
- reinterpret_cast<fptr>(v128_load_aligned),
- reinterpret_cast<fptr>(v128_load_aligned), simd, d,
- reinterpret_cast<fptr>(c_v128_store_aligned),
- reinterpret_cast<fptr>(c_v128_load_aligned),
- reinterpret_cast<fptr>(c_v128_load_aligned),
- reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
- } else if (typeid(CRet) == typeid(uint32_t) &&
- typeid(CArg1) == typeid(c_v128) &&
- typeid(CArg2) == typeid(c_v128)) {
- // U32_V128V128
- error = CompareSimd2Args<uint32_t, v128, v128, CRet, CArg1, CArg2>(
- reinterpret_cast<fptr>(u32_store_aligned),
- reinterpret_cast<fptr>(v128_load_aligned),
- reinterpret_cast<fptr>(v128_load_aligned), simd, d,
- reinterpret_cast<fptr>(c_u32_store_aligned),
- reinterpret_cast<fptr>(c_v128_load_aligned),
- reinterpret_cast<fptr>(c_v128_load_aligned),
- reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
- } else if (typeid(CRet) == typeid(uint64_t) &&
- typeid(CArg1) == typeid(c_v128) &&
- typeid(CArg2) == typeid(c_v128)) {
- // U64_V128V128
- error = CompareSimd2Args<uint64_t, v128, v128, CRet, CArg1, CArg2>(
- reinterpret_cast<fptr>(u64_store_aligned),
- reinterpret_cast<fptr>(v128_load_aligned),
- reinterpret_cast<fptr>(v128_load_aligned), simd, d,
- reinterpret_cast<fptr>(c_u64_store_aligned),
- reinterpret_cast<fptr>(c_v128_load_aligned),
- reinterpret_cast<fptr>(c_v128_load_aligned),
- reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
- } else if (typeid(CRet) == typeid(int64_t) &&
- typeid(CArg1) == typeid(c_v128) &&
- typeid(CArg2) == typeid(c_v128)) {
- // S64_V128V128
- error = CompareSimd2Args<int64_t, v128, v128, CRet, CArg1, CArg2>(
- reinterpret_cast<fptr>(s64_store_aligned),
- reinterpret_cast<fptr>(v128_load_aligned),
- reinterpret_cast<fptr>(v128_load_aligned), simd, d,
- reinterpret_cast<fptr>(c_s64_store_aligned),
- reinterpret_cast<fptr>(c_v128_load_aligned),
- reinterpret_cast<fptr>(c_v128_load_aligned),
- reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
- } else if (typeid(CRet) == typeid(c_v128) &&
- typeid(CArg1) == typeid(uint64_t) &&
- typeid(CArg2) == typeid(uint64_t)) {
- // V128_U64U64
- error = CompareSimd2Args<v128, uint64_t, uint64_t, CRet, CArg1, CArg2>(
- reinterpret_cast<fptr>(v128_store_aligned),
- reinterpret_cast<fptr>(u64_load_aligned),
- reinterpret_cast<fptr>(u64_load_aligned), simd, d,
- reinterpret_cast<fptr>(c_v128_store_aligned),
- reinterpret_cast<fptr>(c_u64_load_aligned),
- reinterpret_cast<fptr>(c_u64_load_aligned),
- reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
- } else if (typeid(CRet) == typeid(c_v128) &&
- typeid(CArg1) == typeid(c_v64) &&
- typeid(CArg2) == typeid(c_v64)) {
- // V128_V64V64
- error = CompareSimd2Args<v128, v64, v64, CRet, CArg1, CArg2>(
- reinterpret_cast<fptr>(v128_store_aligned),
- reinterpret_cast<fptr>(v64_load_aligned),
- reinterpret_cast<fptr>(v64_load_aligned), simd, d,
- reinterpret_cast<fptr>(c_v128_store_aligned),
- reinterpret_cast<fptr>(c_v64_load_aligned),
- reinterpret_cast<fptr>(c_v64_load_aligned),
- reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
- } else if (typeid(CRet) == typeid(c_v128) &&
- typeid(CArg1) == typeid(c_v128) &&
- typeid(CArg2) == typeid(uint32_t)) {
- // V128_V128U32
- error = CompareSimd2Args<v128, v128, uint32_t, CRet, CArg1, CArg2>(
- reinterpret_cast<fptr>(v128_store_aligned),
- reinterpret_cast<fptr>(v128_load_aligned),
- reinterpret_cast<fptr>(u32_load_aligned), simd, d,
- reinterpret_cast<fptr>(c_v128_store_aligned),
- reinterpret_cast<fptr>(c_v128_load_aligned),
- reinterpret_cast<fptr>(c_u32_load_aligned),
- reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
- } else if (typeid(CRet) == typeid(c_v256) &&
- typeid(CArg1) == typeid(c_v256) &&
- typeid(CArg2) == typeid(c_v256)) {
- // V256_V256V256
- error = CompareSimd2Args<v256, v256, v256, CRet, CArg1, CArg2>(
- reinterpret_cast<fptr>(v256_store_aligned),
- reinterpret_cast<fptr>(v256_load_aligned),
- reinterpret_cast<fptr>(v256_load_aligned), simd, d,
- reinterpret_cast<fptr>(c_v256_store_aligned),
- reinterpret_cast<fptr>(c_v256_load_aligned),
- reinterpret_cast<fptr>(c_v256_load_aligned),
- reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
- } else if (typeid(CRet) == typeid(uint64_t) &&
- typeid(CArg1) == typeid(c_v256) &&
- typeid(CArg2) == typeid(c_v256)) {
- // U64_V256V256
- error = CompareSimd2Args<uint64_t, v256, v256, CRet, CArg1, CArg2>(
- reinterpret_cast<fptr>(u64_store_aligned),
- reinterpret_cast<fptr>(v256_load_aligned),
- reinterpret_cast<fptr>(v256_load_aligned), simd, d,
- reinterpret_cast<fptr>(c_u64_store_aligned),
- reinterpret_cast<fptr>(c_v256_load_aligned),
- reinterpret_cast<fptr>(c_v256_load_aligned),
- reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
- } else if (typeid(CRet) == typeid(int64_t) &&
- typeid(CArg1) == typeid(c_v256) &&
- typeid(CArg2) == typeid(c_v256)) {
- // S64_V256V256
- error = CompareSimd2Args<int64_t, v256, v256, CRet, CArg1, CArg2>(
- reinterpret_cast<fptr>(s64_store_aligned),
- reinterpret_cast<fptr>(v256_load_aligned),
- reinterpret_cast<fptr>(v256_load_aligned), simd, d,
- reinterpret_cast<fptr>(c_s64_store_aligned),
- reinterpret_cast<fptr>(c_v256_load_aligned),
- reinterpret_cast<fptr>(c_v256_load_aligned),
- reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
- } else if (typeid(CRet) == typeid(uint32_t) &&
- typeid(CArg1) == typeid(c_v256) &&
- typeid(CArg2) == typeid(c_v256)) {
- // U32_V256V256
- error = CompareSimd2Args<uint32_t, v256, v256, CRet, CArg1, CArg2>(
- reinterpret_cast<fptr>(u32_store_aligned),
- reinterpret_cast<fptr>(v256_load_aligned),
- reinterpret_cast<fptr>(v256_load_aligned), simd, d,
- reinterpret_cast<fptr>(c_u32_store_aligned),
- reinterpret_cast<fptr>(c_v256_load_aligned),
- reinterpret_cast<fptr>(c_v256_load_aligned),
- reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
- } else if (typeid(CRet) == typeid(c_v256) &&
- typeid(CArg1) == typeid(c_v128) &&
- typeid(CArg2) == typeid(c_v128)) {
- // V256_V128V128
- error = CompareSimd2Args<v256, v128, v128, CRet, CArg1, CArg2>(
- reinterpret_cast<fptr>(v256_store_aligned),
- reinterpret_cast<fptr>(v128_load_aligned),
- reinterpret_cast<fptr>(v128_load_aligned), simd, d,
- reinterpret_cast<fptr>(c_v256_store_aligned),
- reinterpret_cast<fptr>(c_v128_load_aligned),
- reinterpret_cast<fptr>(c_v128_load_aligned),
- reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
- } else if (typeid(CRet) == typeid(c_v256) &&
- typeid(CArg1) == typeid(c_v256) &&
- typeid(CArg2) == typeid(uint32_t)) {
- // V256_V256U32
- error = CompareSimd2Args<v256, v256, uint32_t, CRet, CArg1, CArg2>(
- reinterpret_cast<fptr>(v256_store_aligned),
- reinterpret_cast<fptr>(v256_load_aligned),
- reinterpret_cast<fptr>(u32_load_aligned), simd, d,
- reinterpret_cast<fptr>(c_v256_store_aligned),
- reinterpret_cast<fptr>(c_v256_load_aligned),
- reinterpret_cast<fptr>(c_u32_load_aligned),
- reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2);
-
- } else {
- FAIL() << "Internal error: Unknown intrinsic function "
- << typeid(CRet).name() << " " << name << "("
- << typeid(CArg1).name() << ", " << typeid(CArg2).name() << ")";
- }
- }
-
- EXPECT_EQ(0, error) << "Error: mismatch for " << name << "("
- << Print(s1, sizeof(s1)) << ", " << Print(s2, sizeof(s2))
- << ") -> " << Print(d, sizeof(d)) << " (simd), "
- << Print(ref_d, sizeof(ref_d)) << " (ref)";
-}
-
-template <typename CRet, typename CArg1, typename CArg2, typename CArg3>
-void TestSimd3Args(uint32_t iterations, uint32_t mask, uint32_t maskwidth,
- const char *name) {
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- fptr ref_simd;
- fptr simd;
- int error = 0;
- DECLARE_ALIGNED(32, uint8_t, s1[32]);
- DECLARE_ALIGNED(32, uint8_t, s2[32]);
- DECLARE_ALIGNED(32, uint8_t, s3[32]);
- DECLARE_ALIGNED(32, uint8_t, d[32]);
- DECLARE_ALIGNED(32, uint8_t, ref_d[32]);
- assert(sizeof(CArg1) <= 32 && sizeof(CArg2) <= 32 && sizeof(CArg3) <= 32 &&
- sizeof(CRet) <= 32);
- memset(ref_d, 0, sizeof(ref_d));
- memset(d, 0, sizeof(d));
-
- Map(name, &ref_simd, &simd);
- if (simd == NULL || ref_simd == NULL) {
- FAIL() << "Internal error: Unknown intrinsic function " << name;
- }
-
- for (unsigned int count = 0;
- count < iterations && !error && !testing::Test::HasFailure(); count++) {
- for (unsigned int c = 0; c < sizeof(CArg1); c++) s1[c] = rnd.Rand8();
-
- for (unsigned int c = 0; c < sizeof(CArg2); c++) s2[c] = rnd.Rand8();
-
- for (unsigned int c = 0; c < sizeof(CArg3); c++) s3[c] = rnd.Rand8();
-
- if (maskwidth) SetMask(s3, sizeof(CArg3), mask, maskwidth);
-
- if (typeid(CRet) == typeid(c_v128) && typeid(CArg1) == typeid(c_v128) &&
- typeid(CArg2) == typeid(c_v128) && typeid(CArg3) == typeid(c_v128)) {
- // V128_V128V128V128
- error =
- CompareSimd3Args<v128, v128, v128, v128, CRet, CArg1, CArg2, CArg3>(
- reinterpret_cast<fptr>(v128_store_aligned),
- reinterpret_cast<fptr>(v128_load_aligned),
- reinterpret_cast<fptr>(v128_load_aligned),
- reinterpret_cast<fptr>(v128_load_aligned), simd, d,
- reinterpret_cast<fptr>(c_v128_store_aligned),
- reinterpret_cast<fptr>(c_v128_load_aligned),
- reinterpret_cast<fptr>(c_v128_load_aligned),
- reinterpret_cast<fptr>(c_v128_load_aligned),
- reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2, s3);
- } else if (typeid(CRet) == typeid(c_v256) &&
- typeid(CArg1) == typeid(c_v256) &&
- typeid(CArg2) == typeid(c_v256) &&
- typeid(CArg3) == typeid(c_v256)) {
- // V256_V256V256V256
- error =
- CompareSimd3Args<v256, v256, v256, v256, CRet, CArg1, CArg2, CArg3>(
- reinterpret_cast<fptr>(v256_store_aligned),
- reinterpret_cast<fptr>(v256_load_aligned),
- reinterpret_cast<fptr>(v256_load_aligned),
- reinterpret_cast<fptr>(v256_load_aligned), simd, d,
- reinterpret_cast<fptr>(c_v256_store_aligned),
- reinterpret_cast<fptr>(c_v256_load_aligned),
- reinterpret_cast<fptr>(c_v256_load_aligned),
- reinterpret_cast<fptr>(c_v256_load_aligned),
- reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2, s3);
- } else {
- FAIL() << "Internal error: Unknown intrinsic function "
- << typeid(CRet).name() << " " << name << "("
- << typeid(CArg1).name() << ", " << typeid(CArg2).name() << ", "
- << typeid(CArg3).name() << ")";
- }
- }
-
- EXPECT_EQ(0, error) << "Error: mismatch for " << name << "("
- << Print(s1, sizeof(s1)) << ", " << Print(s2, sizeof(s2))
- << ", " << Print(s3, sizeof(s3)) << ") -> "
- << Print(d, sizeof(d)) << " (simd), "
- << Print(ref_d, sizeof(ref_d)) << " (ref)";
-}
-
-// Instantiations to make the functions callable from another files
-template void TestSimd1Arg<c_v64, uint8_t>(uint32_t, uint32_t, uint32_t,
- const char *);
-template void TestSimd1Arg<c_v64, uint16_t>(uint32_t, uint32_t, uint32_t,
- const char *);
-template void TestSimd1Arg<c_v64, uint32_t>(uint32_t, uint32_t, uint32_t,
- const char *);
-template void TestSimd1Arg<c_v64, c_v64>(uint32_t, uint32_t, uint32_t,
- const char *);
-template void TestSimd1Arg<uint32_t, c_v64>(uint32_t, uint32_t, uint32_t,
- const char *);
-template void TestSimd1Arg<int32_t, c_v64>(uint32_t, uint32_t, uint32_t,
- const char *);
-template void TestSimd1Arg<uint64_t, c_v64>(uint32_t, uint32_t, uint32_t,
- const char *);
-template void TestSimd1Arg<int64_t, c_v64>(uint32_t, uint32_t, uint32_t,
- const char *);
-template void TestSimd2Args<c_v64, uint32_t, uint32_t>(uint32_t, uint32_t,
- uint32_t, const char *);
-template void TestSimd2Args<c_v64, c_v64, c_v64>(uint32_t, uint32_t, uint32_t,
- const char *);
-template void TestSimd2Args<c_v64, c_v64, uint32_t>(uint32_t, uint32_t,
- uint32_t, const char *);
-template void TestSimd2Args<int64_t, c_v64, c_v64>(uint32_t, uint32_t, uint32_t,
- const char *);
-template void TestSimd2Args<uint32_t, c_v64, c_v64>(uint32_t, uint32_t,
- uint32_t, const char *);
-template void TestSimd1Arg<c_v128, c_v128>(uint32_t, uint32_t, uint32_t,
- const char *);
-template void TestSimd1Arg<c_v128, uint8_t>(uint32_t, uint32_t, uint32_t,
- const char *);
-template void TestSimd1Arg<c_v128, uint16_t>(uint32_t, uint32_t, uint32_t,
- const char *);
-template void TestSimd1Arg<c_v128, uint32_t>(uint32_t, uint32_t, uint32_t,
- const char *);
-template void TestSimd1Arg<c_v128, uint64_t>(uint32_t, uint32_t, uint32_t,
- const char *);
-template void TestSimd1Arg<c_v128, c_v64>(uint32_t, uint32_t, uint32_t,
- const char *);
-template void TestSimd1Arg<uint32_t, c_v128>(uint32_t, uint32_t, uint32_t,
- const char *);
-template void TestSimd1Arg<uint64_t, c_v128>(uint32_t, uint32_t, uint32_t,
- const char *);
-template void TestSimd1Arg<c_v64, c_v128>(uint32_t, uint32_t, uint32_t,
- const char *);
-template void TestSimd2Args<c_v128, c_v128, c_v128>(uint32_t, uint32_t,
- uint32_t, const char *);
-template void TestSimd2Args<c_v128, c_v128, uint32_t>(uint32_t, uint32_t,
- uint32_t, const char *);
-template void TestSimd2Args<c_v128, uint64_t, uint64_t>(uint32_t, uint32_t,
- uint32_t, const char *);
-template void TestSimd2Args<c_v128, c_v64, c_v64>(uint32_t, uint32_t, uint32_t,
- const char *);
-template void TestSimd2Args<uint64_t, c_v128, c_v128>(uint32_t, uint32_t,
- uint32_t, const char *);
-template void TestSimd2Args<int64_t, c_v128, c_v128>(uint32_t, uint32_t,
- uint32_t, const char *);
-template void TestSimd2Args<uint32_t, c_v128, c_v128>(uint32_t, uint32_t,
- uint32_t, const char *);
-template void TestSimd3Args<c_v128, c_v128, c_v128, c_v128>(uint32_t, uint32_t,
- uint32_t,
- const char *);
-template void TestSimd1Arg<c_v256, c_v128>(uint32_t, uint32_t, uint32_t,
- const char *);
-template void TestSimd1Arg<c_v256, c_v256>(uint32_t, uint32_t, uint32_t,
- const char *);
-template void TestSimd1Arg<uint64_t, c_v256>(uint32_t, uint32_t, uint32_t,
- const char *);
-template void TestSimd1Arg<c_v256, uint8_t>(uint32_t, uint32_t, uint32_t,
- const char *);
-template void TestSimd1Arg<c_v256, uint16_t>(uint32_t, uint32_t, uint32_t,
- const char *);
-template void TestSimd1Arg<c_v256, uint32_t>(uint32_t, uint32_t, uint32_t,
- const char *);
-template void TestSimd1Arg<c_v256, uint64_t>(uint32_t, uint32_t, uint32_t,
- const char *);
-template void TestSimd1Arg<uint32_t, c_v256>(uint32_t, uint32_t, uint32_t,
- const char *);
-template void TestSimd1Arg<c_v64, c_v256>(uint32_t, uint32_t, uint32_t,
- const char *);
-template void TestSimd2Args<c_v256, c_v128, c_v128>(uint32_t, uint32_t,
- uint32_t, const char *);
-template void TestSimd2Args<c_v256, c_v256, c_v256>(uint32_t, uint32_t,
- uint32_t, const char *);
-template void TestSimd2Args<c_v256, c_v256, uint32_t>(uint32_t, uint32_t,
- uint32_t, const char *);
-template void TestSimd2Args<uint64_t, c_v256, c_v256>(uint32_t, uint32_t,
- uint32_t, const char *);
-template void TestSimd2Args<int64_t, c_v256, c_v256>(uint32_t, uint32_t,
- uint32_t, const char *);
-template void TestSimd2Args<uint32_t, c_v256, c_v256>(uint32_t, uint32_t,
- uint32_t, const char *);
-template void TestSimd3Args<c_v256, c_v256, c_v256, c_v256>(uint32_t, uint32_t,
- uint32_t,
- const char *);
-
-} // namespace SIMD_NAMESPACE
diff --git a/third_party/aom/test/simd_cmp_neon.cc b/third_party/aom/test/simd_cmp_neon.cc
deleted file mode 100644
index 53c1e2a07..000000000
--- a/third_party/aom/test/simd_cmp_neon.cc
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#if defined(__OPTIMIZE__) && __OPTIMIZE__
-#define ARCH NEON
-#define ARCH_POSTFIX(name) name##_neon
-#define SIMD_NAMESPACE simd_test_neon
-#include "test/simd_cmp_impl.h"
-#endif
diff --git a/third_party/aom/test/simd_cmp_sse2.cc b/third_party/aom/test/simd_cmp_sse2.cc
deleted file mode 100644
index f7827a7fa..000000000
--- a/third_party/aom/test/simd_cmp_sse2.cc
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#if (defined(__OPTIMIZE__) && __OPTIMIZE__) || \
- (!defined(__GNUC__) && !defined(_DEBUG))
-#define ARCH SSE2
-#define ARCH_POSTFIX(name) name##_sse2
-#define SIMD_NAMESPACE simd_test_sse2
-#include "test/simd_cmp_impl.h"
-#endif
diff --git a/third_party/aom/test/simd_cmp_sse4.cc b/third_party/aom/test/simd_cmp_sse4.cc
deleted file mode 100644
index 3566764b6..000000000
--- a/third_party/aom/test/simd_cmp_sse4.cc
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#if (defined(__OPTIMIZE__) && __OPTIMIZE__) || \
- (!defined(__GNUC__) && !defined(_DEBUG))
-#define ARCH SSE4_1
-#define ARCH_POSTFIX(name) name##_sse4_1
-#define SIMD_NAMESPACE simd_test_sse4_1
-#include "test/simd_cmp_impl.h"
-#endif
diff --git a/third_party/aom/test/simd_cmp_ssse3.cc b/third_party/aom/test/simd_cmp_ssse3.cc
deleted file mode 100644
index 57bf135dd..000000000
--- a/third_party/aom/test/simd_cmp_ssse3.cc
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#if (defined(__OPTIMIZE__) && __OPTIMIZE__) || \
- (!defined(__GNUC__) && !defined(_DEBUG))
-#define ARCH SSSE3
-#define ARCH_POSTFIX(name) name##_ssse3
-#define SIMD_NAMESPACE simd_test_ssse3
-#include "test/simd_cmp_impl.h"
-#endif
diff --git a/third_party/aom/test/simd_impl.h b/third_party/aom/test/simd_impl.h
deleted file mode 100644
index fd06f67fd..000000000
--- a/third_party/aom/test/simd_impl.h
+++ /dev/null
@@ -1,1141 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#define SIMD_CHECK 1
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "aom_dsp/aom_simd_inline.h"
-#include "aom_dsp/simd/v256_intrinsics_c.h"
-
-namespace SIMD_NAMESPACE {
-
-template <typename param_signature>
-class TestIntrinsic : public ::testing::TestWithParam<param_signature> {
- public:
- virtual ~TestIntrinsic() {}
- virtual void SetUp() {
- mask = ::testing::get<0>(this->GetParam());
- maskwidth = ::testing::get<1>(this->GetParam());
- name = ::testing::get<2>(this->GetParam());
- }
-
- virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
- uint32_t mask, maskwidth;
- const char *name;
-};
-
-// Create one typedef for each function signature
-#define TYPEDEF_SIMD(name) \
- typedef TestIntrinsic< ::testing::tuple<uint32_t, uint32_t, const char *> > \
- ARCH_POSTFIX(name)
-
-TYPEDEF_SIMD(V64_U8);
-TYPEDEF_SIMD(V64_U16);
-TYPEDEF_SIMD(V64_U32);
-TYPEDEF_SIMD(V64_V64);
-TYPEDEF_SIMD(U32_V64);
-TYPEDEF_SIMD(S32_V64);
-TYPEDEF_SIMD(U64_V64);
-TYPEDEF_SIMD(S64_V64);
-TYPEDEF_SIMD(V64_U32U32);
-TYPEDEF_SIMD(V64_V64V64);
-TYPEDEF_SIMD(S64_V64V64);
-TYPEDEF_SIMD(V64_V64U32);
-TYPEDEF_SIMD(U32_V64V64);
-TYPEDEF_SIMD(V128_V64);
-TYPEDEF_SIMD(V128_V128);
-TYPEDEF_SIMD(U32_V128);
-TYPEDEF_SIMD(U64_V128);
-TYPEDEF_SIMD(V64_V128);
-TYPEDEF_SIMD(V128_U8);
-TYPEDEF_SIMD(V128_U16);
-TYPEDEF_SIMD(V128_U32);
-TYPEDEF_SIMD(V128_U64);
-TYPEDEF_SIMD(V128_U64U64);
-TYPEDEF_SIMD(V128_V64V64);
-TYPEDEF_SIMD(V128_V128V128);
-TYPEDEF_SIMD(V128_V128V128V128);
-TYPEDEF_SIMD(S64_V128V128);
-TYPEDEF_SIMD(V128_V128U32);
-TYPEDEF_SIMD(U32_V128V128);
-TYPEDEF_SIMD(U64_V128V128);
-TYPEDEF_SIMD(V256_V128);
-TYPEDEF_SIMD(V256_V256);
-TYPEDEF_SIMD(U64_V256);
-TYPEDEF_SIMD(V256_V128V128);
-TYPEDEF_SIMD(V256_V256V256);
-TYPEDEF_SIMD(V256_V256V256V256);
-TYPEDEF_SIMD(U64_V256V256);
-TYPEDEF_SIMD(S64_V256V256);
-TYPEDEF_SIMD(V256_V256U32);
-TYPEDEF_SIMD(U32_V256V256);
-TYPEDEF_SIMD(V256_U8);
-TYPEDEF_SIMD(V256_U16);
-TYPEDEF_SIMD(V256_U32);
-TYPEDEF_SIMD(V256_U64);
-TYPEDEF_SIMD(U32_V256);
-TYPEDEF_SIMD(V64_V256);
-
-// Google Test allows up to 50 tests per case, so split the largest
-typedef ARCH_POSTFIX(V64_V64) ARCH_POSTFIX(V64_V64_Part2);
-typedef ARCH_POSTFIX(V64_V64V64) ARCH_POSTFIX(V64_V64V64_Part2);
-typedef ARCH_POSTFIX(V128_V128) ARCH_POSTFIX(V128_V128_Part2);
-typedef ARCH_POSTFIX(V128_V128) ARCH_POSTFIX(V128_V128_Part3);
-typedef ARCH_POSTFIX(V128_V128) ARCH_POSTFIX(V128_V128_Part4);
-typedef ARCH_POSTFIX(V128_V128V128) ARCH_POSTFIX(V128_V128V128_Part2);
-typedef ARCH_POSTFIX(V256_V256) ARCH_POSTFIX(V256_V256_Part2);
-typedef ARCH_POSTFIX(V256_V256) ARCH_POSTFIX(V256_V256_Part3);
-typedef ARCH_POSTFIX(V256_V256) ARCH_POSTFIX(V256_V256_Part4);
-typedef ARCH_POSTFIX(V256_V256) ARCH_POSTFIX(V256_V256_Part5);
-typedef ARCH_POSTFIX(V256_V256V256) ARCH_POSTFIX(V256_V256V256_Part2);
-
-// These functions are machine tuned located elsewhere
-template <typename c_ret, typename c_arg>
-void TestSimd1Arg(uint32_t iterations, uint32_t mask, uint32_t maskwidth,
- const char *name);
-
-template <typename c_ret, typename c_arg1, typename c_arg2>
-void TestSimd2Args(uint32_t iterations, uint32_t mask, uint32_t maskwidth,
- const char *name);
-
-template <typename c_ret, typename c_arg1, typename c_arg2, typename c_arg3>
-void TestSimd3Args(uint32_t iterations, uint32_t mask, uint32_t maskwidth,
- const char *name);
-
-const int kIterations = 65536;
-
-// Add a macro layer since TEST_P will quote the name so we need to
-// expand it first with the prefix.
-#define MY_TEST_P(name, test) TEST_P(name, test)
-
-MY_TEST_P(ARCH_POSTFIX(V64_U8), TestIntrinsics) {
- TestSimd1Arg<c_v64, uint8_t>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(V64_U16), TestIntrinsics) {
- TestSimd1Arg<c_v64, uint16_t>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(V64_U32), TestIntrinsics) {
- TestSimd1Arg<c_v64, uint32_t>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(V64_V64), TestIntrinsics) {
- TestSimd1Arg<c_v64, c_v64>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(U64_V64), TestIntrinsics) {
- TestSimd1Arg<uint64_t, c_v64>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(S64_V64), TestIntrinsics) {
- TestSimd1Arg<int64_t, c_v64>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(U32_V64), TestIntrinsics) {
- TestSimd1Arg<uint32_t, c_v64>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(S32_V64), TestIntrinsics) {
- TestSimd1Arg<int32_t, c_v64>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(V64_U32U32), TestIntrinsics) {
- TestSimd2Args<c_v64, uint32_t, uint32_t>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(V64_V64V64), TestIntrinsics) {
- TestSimd2Args<c_v64, c_v64, c_v64>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(S64_V64V64), TestIntrinsics) {
- TestSimd2Args<int64_t, c_v64, c_v64>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(U32_V64V64), TestIntrinsics) {
- TestSimd2Args<uint32_t, c_v64, c_v64>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(V64_V64U32), TestIntrinsics) {
- TestSimd2Args<c_v64, c_v64, uint32_t>(kIterations, mask, maskwidth, name);
-}
-
-// Google Test allows up to 50 tests per case, so split the largest
-MY_TEST_P(ARCH_POSTFIX(V64_V64_Part2), TestIntrinsics) {
- TestSimd1Arg<c_v64, c_v64>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(V64_V64V64_Part2), TestIntrinsics) {
- TestSimd2Args<c_v64, c_v64, c_v64>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(U32_V128), TestIntrinsics) {
- TestSimd1Arg<uint32_t, c_v128>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(U64_V128), TestIntrinsics) {
- TestSimd1Arg<uint64_t, c_v128>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(V64_V128), TestIntrinsics) {
- TestSimd1Arg<c_v64, c_v128>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(V128_V128), TestIntrinsics) {
- TestSimd1Arg<c_v128, c_v128>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(V128_U8), TestIntrinsics) {
- TestSimd1Arg<c_v128, uint8_t>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(V128_U16), TestIntrinsics) {
- TestSimd1Arg<c_v128, uint16_t>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(V128_U32), TestIntrinsics) {
- TestSimd1Arg<c_v128, uint32_t>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(V128_U64), TestIntrinsics) {
- TestSimd1Arg<c_v128, uint64_t>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(V128_V64), TestIntrinsics) {
- TestSimd1Arg<c_v128, c_v64>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(V128_V128V128), TestIntrinsics) {
- TestSimd2Args<c_v128, c_v128, c_v128>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(V128_V128V128V128), TestIntrinsics) {
- TestSimd3Args<c_v128, c_v128, c_v128, c_v128>(kIterations, mask, maskwidth,
- name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(U32_V128V128), TestIntrinsics) {
- TestSimd2Args<uint32_t, c_v128, c_v128>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(U64_V128V128), TestIntrinsics) {
- TestSimd2Args<uint64_t, c_v128, c_v128>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(S64_V128V128), TestIntrinsics) {
- TestSimd2Args<int64_t, c_v128, c_v128>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(V128_U64U64), TestIntrinsics) {
- TestSimd2Args<c_v128, uint64_t, uint64_t>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(V128_V64V64), TestIntrinsics) {
- TestSimd2Args<c_v128, c_v64, c_v64>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(V128_V128U32), TestIntrinsics) {
- TestSimd2Args<c_v128, c_v128, uint32_t>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(V128_V128V128_Part2), TestIntrinsics) {
- TestSimd2Args<c_v128, c_v128, c_v128>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(V128_V128_Part2), TestIntrinsics) {
- TestSimd1Arg<c_v128, c_v128>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(V128_V128_Part3), TestIntrinsics) {
- TestSimd1Arg<c_v128, c_v128>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(V128_V128_Part4), TestIntrinsics) {
- TestSimd1Arg<c_v128, c_v128>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(U64_V256), TestIntrinsics) {
- TestSimd1Arg<uint64_t, c_v256>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(V256_V256), TestIntrinsics) {
- TestSimd1Arg<c_v256, c_v256>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(V256_V128), TestIntrinsics) {
- TestSimd1Arg<c_v256, c_v128>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(V256_V256V256), TestIntrinsics) {
- TestSimd2Args<c_v256, c_v256, c_v256>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(V256_V256V256V256), TestIntrinsics) {
- TestSimd3Args<c_v256, c_v256, c_v256, c_v256>(kIterations, mask, maskwidth,
- name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(V256_V128V128), TestIntrinsics) {
- TestSimd2Args<c_v256, c_v128, c_v128>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(U32_V256V256), TestIntrinsics) {
- TestSimd2Args<uint32_t, c_v256, c_v256>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(U64_V256V256), TestIntrinsics) {
- TestSimd2Args<uint64_t, c_v256, c_v256>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(S64_V256V256), TestIntrinsics) {
- TestSimd2Args<int64_t, c_v256, c_v256>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(V256_V256V256_Part2), TestIntrinsics) {
- TestSimd2Args<c_v256, c_v256, c_v256>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(V256_V256U32), TestIntrinsics) {
- TestSimd2Args<c_v256, c_v256, uint32_t>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(V256_V256_Part2), TestIntrinsics) {
- TestSimd1Arg<c_v256, c_v256>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(V256_V256_Part3), TestIntrinsics) {
- TestSimd1Arg<c_v256, c_v256>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(V256_V256_Part4), TestIntrinsics) {
- TestSimd1Arg<c_v256, c_v256>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(V256_V256_Part5), TestIntrinsics) {
- TestSimd1Arg<c_v256, c_v256>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(V256_U8), TestIntrinsics) {
- TestSimd1Arg<c_v256, uint8_t>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(V256_U16), TestIntrinsics) {
- TestSimd1Arg<c_v256, uint16_t>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(V256_U32), TestIntrinsics) {
- TestSimd1Arg<c_v256, uint32_t>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(V256_U64), TestIntrinsics) {
- TestSimd1Arg<c_v256, uint64_t>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(U32_V256), TestIntrinsics) {
- TestSimd1Arg<uint32_t, c_v256>(kIterations, mask, maskwidth, name);
-}
-
-MY_TEST_P(ARCH_POSTFIX(V64_V256), TestIntrinsics) {
- TestSimd1Arg<c_v64, c_v256>(kIterations, mask, maskwidth, name);
-}
-
-// Add a macro layer since INSTANTIATE_TEST_CASE_P will quote the name
-// so we need to expand it first with the prefix
-#define INSTANTIATE(name, type, ...) \
- INSTANTIATE_TEST_CASE_P(name, type, ::testing::Values(__VA_ARGS__))
-
-#define SIMD_TUPLE(name, mask, maskwidth) \
- ::testing::make_tuple(mask, maskwidth, static_cast<const char *>(#name))
-
-INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V64V64),
- (SIMD_TUPLE(v64_sad_u8, 0U, 0U), SIMD_TUPLE(v64_ssd_u8, 0U, 0U)));
-
-INSTANTIATE(
- ARCH, ARCH_POSTFIX(V64_V64V64), SIMD_TUPLE(v64_add_8, 0U, 0U),
- SIMD_TUPLE(v64_add_16, 0U, 0U), SIMD_TUPLE(v64_sadd_s16, 0U, 0U),
- SIMD_TUPLE(v64_add_32, 0U, 0U), SIMD_TUPLE(v64_sub_8, 0U, 0U),
- SIMD_TUPLE(v64_ssub_u8, 0U, 0U), SIMD_TUPLE(v64_ssub_s8, 0U, 0U),
- SIMD_TUPLE(v64_sub_16, 0U, 0U), SIMD_TUPLE(v64_ssub_s16, 0U, 0U),
- SIMD_TUPLE(v64_ssub_u16, 0U, 0U), SIMD_TUPLE(v64_sub_32, 0U, 0U),
- SIMD_TUPLE(v64_ziplo_8, 0U, 0U), SIMD_TUPLE(v64_ziphi_8, 0U, 0U),
- SIMD_TUPLE(v64_ziplo_16, 0U, 0U), SIMD_TUPLE(v64_ziphi_16, 0U, 0U),
- SIMD_TUPLE(v64_ziplo_32, 0U, 0U), SIMD_TUPLE(v64_ziphi_32, 0U, 0U),
- SIMD_TUPLE(v64_pack_s32_s16, 0U, 0U), SIMD_TUPLE(v64_pack_s16_u8, 0U, 0U),
- SIMD_TUPLE(v64_pack_s16_s8, 0U, 0U), SIMD_TUPLE(v64_unziphi_8, 0U, 0U),
- SIMD_TUPLE(v64_unziplo_8, 0U, 0U), SIMD_TUPLE(v64_unziphi_16, 0U, 0U),
- SIMD_TUPLE(v64_unziplo_16, 0U, 0U), SIMD_TUPLE(v64_or, 0U, 0U),
- SIMD_TUPLE(v64_xor, 0U, 0U), SIMD_TUPLE(v64_and, 0U, 0U),
- SIMD_TUPLE(v64_andn, 0U, 0U), SIMD_TUPLE(v64_mullo_s16, 0U, 0U),
- SIMD_TUPLE(v64_mulhi_s16, 0U, 0U), SIMD_TUPLE(v64_mullo_s32, 0U, 0U),
- SIMD_TUPLE(v64_madd_s16, 0U, 0U), SIMD_TUPLE(v64_madd_us8, 0U, 0U),
- SIMD_TUPLE(v64_avg_u8, 0U, 0U), SIMD_TUPLE(v64_rdavg_u8, 0U, 0U),
- SIMD_TUPLE(v64_avg_u16, 0U, 0U), SIMD_TUPLE(v64_min_u8, 0U, 0U),
- SIMD_TUPLE(v64_max_u8, 0U, 0U), SIMD_TUPLE(v64_min_s8, 0U, 0U),
- SIMD_TUPLE(v64_max_s8, 0U, 0U), SIMD_TUPLE(v64_min_s16, 0U, 0U),
- SIMD_TUPLE(v64_max_s16, 0U, 0U), SIMD_TUPLE(v64_cmpgt_s8, 0U, 0U),
- SIMD_TUPLE(v64_cmplt_s8, 0U, 0U), SIMD_TUPLE(v64_cmpeq_8, 0U, 0U),
- SIMD_TUPLE(v64_cmpgt_s16, 0U, 0U), SIMD_TUPLE(v64_cmplt_s16, 0U, 0U),
- SIMD_TUPLE(v64_cmpeq_16, 0U, 0U));
-
-INSTANTIATE(
- ARCH, ARCH_POSTFIX(V64_V64V64_Part2), SIMD_TUPLE(v64_shuffle_8, 7U, 8U),
- SIMD_TUPLE(v64_pack_s32_u16, 0U, 0U), SIMD_TUPLE(v64_rdavg_u16, 0U, 0U),
- SIMD_TUPLE(v64_sadd_s8, 0U, 0U), SIMD_TUPLE(v64_sadd_u8, 0U, 0U),
- SIMD_TUPLE(imm_v64_align<1>, 0U, 0U), SIMD_TUPLE(imm_v64_align<2>, 0U, 0U),
- SIMD_TUPLE(imm_v64_align<3>, 0U, 0U), SIMD_TUPLE(imm_v64_align<4>, 0U, 0U),
- SIMD_TUPLE(imm_v64_align<5>, 0U, 0U), SIMD_TUPLE(imm_v64_align<6>, 0U, 0U),
- SIMD_TUPLE(imm_v64_align<7>, 0U, 0U));
-
-INSTANTIATE(ARCH, ARCH_POSTFIX(V64_V64), SIMD_TUPLE(v64_abs_s8, 0U, 0U),
- SIMD_TUPLE(v64_abs_s16, 0U, 0U),
- SIMD_TUPLE(v64_unpacklo_u8_s16, 0U, 0U),
- SIMD_TUPLE(v64_unpackhi_u8_s16, 0U, 0U),
- SIMD_TUPLE(v64_unpacklo_s8_s16, 0U, 0U),
- SIMD_TUPLE(v64_unpackhi_s8_s16, 0U, 0U),
- SIMD_TUPLE(v64_unpacklo_u16_s32, 0U, 0U),
- SIMD_TUPLE(v64_unpacklo_s16_s32, 0U, 0U),
- SIMD_TUPLE(v64_unpackhi_u16_s32, 0U, 0U),
- SIMD_TUPLE(v64_unpackhi_s16_s32, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_byte<1>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_byte<2>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_byte<3>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_byte<4>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_byte<5>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_byte<6>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_byte<7>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shl_n_byte<1>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shl_n_byte<2>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shl_n_byte<3>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shl_n_byte<4>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shl_n_byte<5>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shl_n_byte<6>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shl_n_byte<7>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shl_n_8<1>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shl_n_8<2>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shl_n_8<3>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shl_n_8<4>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shl_n_8<5>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shl_n_8<6>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shl_n_8<7>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_u8<1>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_u8<2>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_u8<3>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_u8<4>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_u8<5>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_u8<6>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_u8<7>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_s8<1>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_s8<2>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_s8<3>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_s8<4>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_s8<5>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_s8<6>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_s8<7>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shl_n_16<1>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shl_n_16<2>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shl_n_16<4>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shl_n_16<6>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shl_n_16<8>, 0U, 0U));
-
-INSTANTIATE(ARCH, ARCH_POSTFIX(V64_V64_Part2),
- SIMD_TUPLE(imm_v64_shl_n_16<10>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shl_n_16<12>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shl_n_16<14>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_u16<1>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_u16<2>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_u16<4>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_u16<6>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_u16<8>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_u16<10>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_u16<12>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_u16<14>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_s16<1>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_s16<2>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_s16<4>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_s16<6>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_s16<8>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_s16<10>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_s16<12>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_s16<14>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shl_n_32<1>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shl_n_32<4>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shl_n_32<8>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shl_n_32<12>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shl_n_32<16>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shl_n_32<20>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shl_n_32<24>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shl_n_32<28>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_u32<1>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_u32<4>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_u32<8>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_u32<12>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_u32<16>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_u32<20>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_u32<24>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_u32<28>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_s32<1>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_s32<4>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_s32<8>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_s32<12>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_s32<16>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_s32<20>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_s32<24>, 0U, 0U),
- SIMD_TUPLE(imm_v64_shr_n_s32<28>, 0U, 0U));
-
-INSTANTIATE(ARCH, ARCH_POSTFIX(V64_V64U32), SIMD_TUPLE(v64_shl_8, 7U, 32U),
- SIMD_TUPLE(v64_shr_u8, 7U, 32U), SIMD_TUPLE(v64_shr_s8, 7U, 32U),
- SIMD_TUPLE(v64_shl_16, 15U, 32U), SIMD_TUPLE(v64_shr_u16, 15U, 32U),
- SIMD_TUPLE(v64_shr_s16, 15U, 32U), SIMD_TUPLE(v64_shl_32, 31U, 32U),
- SIMD_TUPLE(v64_shr_u32, 31U, 32U),
- SIMD_TUPLE(v64_shr_s32, 31U, 32U));
-
-INSTANTIATE(ARCH, ARCH_POSTFIX(U64_V64), SIMD_TUPLE(v64_hadd_u8, 0U, 0U),
- SIMD_TUPLE(v64_u64, 0U, 0U));
-
-INSTANTIATE(ARCH, ARCH_POSTFIX(S64_V64), SIMD_TUPLE(v64_hadd_s16, 0U, 0U));
-
-INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V64), SIMD_TUPLE(v64_low_u32, 0U, 0U),
- SIMD_TUPLE(v64_high_u32, 0U, 0U));
-
-INSTANTIATE(ARCH, ARCH_POSTFIX(S32_V64), SIMD_TUPLE(v64_low_s32, 0U, 0U),
- SIMD_TUPLE(v64_high_s32, 0U, 0U));
-
-INSTANTIATE(ARCH, ARCH_POSTFIX(S64_V64V64), SIMD_TUPLE(v64_dotp_s16, 0U, 0U),
- SIMD_TUPLE(v64_dotp_su8, 0U, 0U));
-
-INSTANTIATE(ARCH, ARCH_POSTFIX(V64_U8), SIMD_TUPLE(v64_dup_8, 0U, 0U));
-
-INSTANTIATE(ARCH, ARCH_POSTFIX(V64_U16), SIMD_TUPLE(v64_dup_16, 0U, 0U));
-
-INSTANTIATE(ARCH, ARCH_POSTFIX(V64_U32), SIMD_TUPLE(v64_dup_32, 0U, 0U));
-
-INSTANTIATE(ARCH, ARCH_POSTFIX(V64_U32U32), SIMD_TUPLE(v64_from_32, 0U, 0U));
-
-INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V128V128), SIMD_TUPLE(v128_sad_u8, 0U, 0U),
- SIMD_TUPLE(v128_ssd_u8, 0U, 0U), SIMD_TUPLE(v128_sad_u16, 0U, 0U));
-INSTANTIATE(ARCH, ARCH_POSTFIX(U64_V128V128), SIMD_TUPLE(v128_ssd_s16, 0U, 0U));
-
-INSTANTIATE(
- ARCH, ARCH_POSTFIX(V128_V128V128), SIMD_TUPLE(v128_add_8, 0U, 0U),
- SIMD_TUPLE(v128_add_16, 0U, 0U), SIMD_TUPLE(v128_sadd_s16, 0U, 0U),
- SIMD_TUPLE(v128_add_32, 0U, 0U), SIMD_TUPLE(v128_sub_8, 0U, 0U),
- SIMD_TUPLE(v128_ssub_u8, 0U, 0U), SIMD_TUPLE(v128_ssub_s8, 0U, 0U),
- SIMD_TUPLE(v128_sub_16, 0U, 0U), SIMD_TUPLE(v128_ssub_s16, 0U, 0U),
- SIMD_TUPLE(v128_ssub_u16, 0U, 0U), SIMD_TUPLE(v128_sub_32, 0U, 0U),
- SIMD_TUPLE(v128_ziplo_8, 0U, 0U), SIMD_TUPLE(v128_ziphi_8, 0U, 0U),
- SIMD_TUPLE(v128_ziplo_16, 0U, 0U), SIMD_TUPLE(v128_ziphi_16, 0U, 0U),
- SIMD_TUPLE(v128_ziplo_32, 0U, 0U), SIMD_TUPLE(v128_ziphi_32, 0U, 0U),
- SIMD_TUPLE(v128_ziplo_64, 0U, 0U), SIMD_TUPLE(v128_ziphi_64, 0U, 0U),
- SIMD_TUPLE(v128_unziphi_8, 0U, 0U), SIMD_TUPLE(v128_unziplo_8, 0U, 0U),
- SIMD_TUPLE(v128_unziphi_16, 0U, 0U), SIMD_TUPLE(v128_unziplo_16, 0U, 0U),
- SIMD_TUPLE(v128_unziphi_32, 0U, 0U), SIMD_TUPLE(v128_unziplo_32, 0U, 0U),
- SIMD_TUPLE(v128_pack_s32_s16, 0U, 0U), SIMD_TUPLE(v128_pack_s16_u8, 0U, 0U),
- SIMD_TUPLE(v128_pack_s16_s8, 0U, 0U), SIMD_TUPLE(v128_or, 0U, 0U),
- SIMD_TUPLE(v128_xor, 0U, 0U), SIMD_TUPLE(v128_and, 0U, 0U),
- SIMD_TUPLE(v128_andn, 0U, 0U), SIMD_TUPLE(v128_mullo_s16, 0U, 0U),
- SIMD_TUPLE(v128_mulhi_s16, 0U, 0U), SIMD_TUPLE(v128_mullo_s32, 0U, 0U),
- SIMD_TUPLE(v128_madd_s16, 0U, 0U), SIMD_TUPLE(v128_madd_us8, 0U, 0U),
- SIMD_TUPLE(v128_avg_u8, 0U, 0U), SIMD_TUPLE(v128_rdavg_u8, 0U, 0U),
- SIMD_TUPLE(v128_avg_u16, 0U, 0U), SIMD_TUPLE(v128_min_u8, 0U, 0U),
- SIMD_TUPLE(v128_max_u8, 0U, 0U), SIMD_TUPLE(v128_min_s8, 0U, 0U),
- SIMD_TUPLE(v128_max_s8, 0U, 0U), SIMD_TUPLE(v128_min_s16, 0U, 0U),
- SIMD_TUPLE(v128_max_s16, 0U, 0U), SIMD_TUPLE(v128_cmpgt_s8, 0U, 0U),
- SIMD_TUPLE(v128_cmplt_s8, 0U, 0U), SIMD_TUPLE(v128_cmpeq_8, 0U, 0U),
- SIMD_TUPLE(v128_cmpgt_s16, 0U, 0U));
-
-INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128V128_Part2),
- SIMD_TUPLE(v128_pack_s32_u16, 0U, 0U),
- SIMD_TUPLE(v128_rdavg_u16, 0U, 0U), SIMD_TUPLE(v128_add_64, 0U, 0U),
- SIMD_TUPLE(v128_sub_64, 0U, 0U), SIMD_TUPLE(v128_sadd_s8, 0U, 0U),
- SIMD_TUPLE(v128_sadd_u8, 0U, 0U), SIMD_TUPLE(v128_cmpeq_16, 0U, 0U),
- SIMD_TUPLE(v128_cmplt_s16, 0U, 0U),
- SIMD_TUPLE(v128_cmplt_s32, 0U, 0U),
- SIMD_TUPLE(v128_cmpeq_32, 0U, 0U),
- SIMD_TUPLE(v128_cmpgt_s32, 0U, 0U),
- SIMD_TUPLE(v128_shuffle_8, 15U, 8U),
- SIMD_TUPLE(v128_min_s32, 0U, 0U), SIMD_TUPLE(v128_max_s32, 0U, 0U),
- SIMD_TUPLE(imm_v128_align<1>, 0U, 0U),
- SIMD_TUPLE(imm_v128_align<2>, 0U, 0U),
- SIMD_TUPLE(imm_v128_align<3>, 0U, 0U),
- SIMD_TUPLE(imm_v128_align<4>, 0U, 0U),
- SIMD_TUPLE(imm_v128_align<5>, 0U, 0U),
- SIMD_TUPLE(imm_v128_align<6>, 0U, 0U),
- SIMD_TUPLE(imm_v128_align<7>, 0U, 0U),
- SIMD_TUPLE(imm_v128_align<8>, 0U, 0U),
- SIMD_TUPLE(imm_v128_align<9>, 0U, 0U),
- SIMD_TUPLE(imm_v128_align<10>, 0U, 0U),
- SIMD_TUPLE(imm_v128_align<11>, 0U, 0U),
- SIMD_TUPLE(imm_v128_align<12>, 0U, 0U),
- SIMD_TUPLE(imm_v128_align<13>, 0U, 0U),
- SIMD_TUPLE(imm_v128_align<14>, 0U, 0U),
- SIMD_TUPLE(imm_v128_align<15>, 0U, 0U));
-
-INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128V128V128),
- SIMD_TUPLE(v128_blend_8, 0U, 0U));
-
-INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128), SIMD_TUPLE(v128_abs_s8, 0U, 0U),
- SIMD_TUPLE(v128_abs_s16, 0U, 0U), SIMD_TUPLE(v128_padd_s16, 0U, 0U),
- SIMD_TUPLE(v128_unpacklo_u8_s16, 0U, 0U),
- SIMD_TUPLE(v128_unpacklo_s8_s16, 0U, 0U),
- SIMD_TUPLE(v128_unpacklo_u16_s32, 0U, 0U),
- SIMD_TUPLE(v128_unpacklo_s16_s32, 0U, 0U),
- SIMD_TUPLE(v128_unpackhi_u8_s16, 0U, 0U),
- SIMD_TUPLE(v128_unpackhi_s8_s16, 0U, 0U),
- SIMD_TUPLE(v128_unpackhi_u16_s32, 0U, 0U),
- SIMD_TUPLE(v128_unpackhi_s16_s32, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_byte<1>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_byte<2>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_byte<3>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_byte<4>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_byte<5>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_byte<6>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_byte<7>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_byte<8>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_byte<9>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_byte<10>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_byte<11>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_byte<12>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_byte<13>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_byte<14>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_byte<15>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_byte<1>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_byte<2>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_byte<3>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_byte<4>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_byte<5>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_byte<6>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_byte<7>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_byte<8>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_byte<9>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_byte<10>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_byte<11>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_byte<12>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_byte<13>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_byte<14>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_byte<15>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_8<1>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_8<2>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_8<3>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_8<4>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_8<5>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_8<6>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_8<7>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_u8<1>, 0U, 0U));
-
-INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128_Part2),
- SIMD_TUPLE(imm_v128_shr_n_u8<2>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_u8<3>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_u8<4>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_u8<5>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_u8<6>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_u8<7>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_s8<1>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_s8<2>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_s8<3>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_s8<4>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_s8<5>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_s8<6>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_s8<7>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_16<1>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_16<2>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_16<4>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_16<6>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_16<8>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_16<10>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_16<12>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_16<14>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_u16<1>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_u16<2>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_u16<4>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_u16<6>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_u16<8>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_u16<10>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_u16<12>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_u16<14>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_s16<1>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_s16<2>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_s16<4>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_s16<6>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_s16<8>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_s16<10>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_s16<12>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_s16<14>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_32<1>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_32<4>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_32<8>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_32<12>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_32<16>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_32<20>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_32<24>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_32<28>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_u32<1>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_u32<4>, 0U, 0U));
-
-INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128_Part3),
- SIMD_TUPLE(imm_v128_shr_n_u32<8>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_u32<12>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_u32<16>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_u32<20>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_u32<24>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_u32<28>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_s32<1>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_s32<4>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_s32<8>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_s32<12>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_s32<16>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_s32<20>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_s32<24>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_s32<28>, 0U, 0U));
-
-INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128_Part4),
- SIMD_TUPLE(imm_v128_shl_n_64<1>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_64<4>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_64<8>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_64<12>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_64<16>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_64<20>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_64<24>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_64<28>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_64<32>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_64<36>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_64<40>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_64<44>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_64<48>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_64<52>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_64<56>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shl_n_64<60>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_u64<1>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_u64<4>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_u64<8>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_u64<12>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_u64<16>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_u64<20>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_u64<24>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_u64<28>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_u64<32>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_u64<36>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_u64<40>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_u64<44>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_u64<48>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_u64<52>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_u64<56>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_u64<60>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_s64<1>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_s64<4>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_s64<8>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_s64<12>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_s64<16>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_s64<20>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_s64<24>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_s64<28>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_s64<32>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_s64<36>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_s64<40>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_s64<44>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_s64<48>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_s64<52>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_s64<56>, 0U, 0U),
- SIMD_TUPLE(imm_v128_shr_n_s64<60>, 0U, 0U),
- SIMD_TUPLE(v128_padd_u8, 0U, 0U));
-
-INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V64V64), SIMD_TUPLE(v128_from_v64, 0U, 0U),
- SIMD_TUPLE(v128_zip_8, 0U, 0U), SIMD_TUPLE(v128_zip_16, 0U, 0U),
- SIMD_TUPLE(v128_zip_32, 0U, 0U), SIMD_TUPLE(v128_mul_s16, 0U, 0U));
-
-INSTANTIATE(ARCH, ARCH_POSTFIX(V128_U64U64), SIMD_TUPLE(v128_from_64, 0U, 0U));
-
-INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V64),
- SIMD_TUPLE(v128_unpack_u8_s16, 0U, 0U),
- SIMD_TUPLE(v128_unpack_s8_s16, 0U, 0U),
- SIMD_TUPLE(v128_unpack_u16_s32, 0U, 0U),
- SIMD_TUPLE(v128_unpack_s16_s32, 0U, 0U));
-
-INSTANTIATE(
- ARCH, ARCH_POSTFIX(V128_V128U32), SIMD_TUPLE(v128_shl_8, 7U, 32U),
- SIMD_TUPLE(v128_shr_u8, 7U, 32U), SIMD_TUPLE(v128_shr_s8, 7U, 32U),
- SIMD_TUPLE(v128_shl_16, 15U, 32U), SIMD_TUPLE(v128_shr_u16, 15U, 32U),
- SIMD_TUPLE(v128_shr_s16, 15U, 32U), SIMD_TUPLE(v128_shl_32, 31U, 32U),
- SIMD_TUPLE(v128_shr_u32, 31U, 32U), SIMD_TUPLE(v128_shr_s32, 31U, 32U),
- SIMD_TUPLE(v128_shl_64, 63U, 32U), SIMD_TUPLE(v128_shr_u64, 63U, 32U),
- SIMD_TUPLE(v128_shr_s64, 63U, 32U));
-
-INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V128), SIMD_TUPLE(v128_low_u32, 0U, 0U),
- SIMD_TUPLE(v128_movemask_8, 0U, 0U));
-
-INSTANTIATE(ARCH, ARCH_POSTFIX(U64_V128), SIMD_TUPLE(v128_hadd_u8, 0U, 0U));
-
-INSTANTIATE(ARCH, ARCH_POSTFIX(V64_V128), SIMD_TUPLE(v128_low_v64, 0U, 0U),
- SIMD_TUPLE(v128_high_v64, 0U, 0U));
-
-INSTANTIATE(ARCH, ARCH_POSTFIX(V128_U8), SIMD_TUPLE(v128_dup_8, 0U, 0U));
-
-INSTANTIATE(ARCH, ARCH_POSTFIX(V128_U16), SIMD_TUPLE(v128_dup_16, 0U, 0U));
-
-INSTANTIATE(ARCH, ARCH_POSTFIX(V128_U32), SIMD_TUPLE(v128_dup_32, 0U, 0U));
-
-INSTANTIATE(ARCH, ARCH_POSTFIX(V128_U64), SIMD_TUPLE(v128_dup_64, 0U, 0U));
-
-INSTANTIATE(ARCH, ARCH_POSTFIX(S64_V128V128), SIMD_TUPLE(v128_dotp_s16, 0U, 0U),
- SIMD_TUPLE(v128_dotp_s32, 0U, 0U),
- SIMD_TUPLE(v128_dotp_su8, 0U, 0U));
-
-INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V256V256), SIMD_TUPLE(v256_sad_u8, 0U, 0U),
- SIMD_TUPLE(v256_ssd_u8, 0U, 0U), SIMD_TUPLE(v256_sad_u16, 0U, 0U));
-
-INSTANTIATE(ARCH, ARCH_POSTFIX(U64_V256), SIMD_TUPLE(v256_hadd_u8, 0U, 0U),
- SIMD_TUPLE(v256_low_u64, 0U, 0U));
-
-INSTANTIATE(ARCH, ARCH_POSTFIX(S64_V256V256), SIMD_TUPLE(v256_dotp_s16, 0U, 0U),
- SIMD_TUPLE(v256_dotp_s32, 0U, 0U),
- SIMD_TUPLE(v256_dotp_su8, 0U, 0U));
-
-INSTANTIATE(ARCH, ARCH_POSTFIX(U64_V256V256), SIMD_TUPLE(v256_ssd_s16, 0U, 0U));
-
-INSTANTIATE(
- ARCH, ARCH_POSTFIX(V256_V256V256), SIMD_TUPLE(v256_add_8, 0U, 0U),
- SIMD_TUPLE(v256_add_16, 0U, 0U), SIMD_TUPLE(v256_sadd_s16, 0U, 0U),
- SIMD_TUPLE(v256_add_32, 0U, 0U), SIMD_TUPLE(v256_sub_8, 0U, 0U),
- SIMD_TUPLE(v256_ssub_u8, 0U, 0U), SIMD_TUPLE(v256_ssub_s8, 0U, 0U),
- SIMD_TUPLE(v256_sub_16, 0U, 0U), SIMD_TUPLE(v256_ssub_s16, 0U, 0U),
- SIMD_TUPLE(v256_ssub_u16, 0U, 0U), SIMD_TUPLE(v256_sub_32, 0U, 0U),
- SIMD_TUPLE(v256_ziplo_8, 0U, 0U), SIMD_TUPLE(v256_ziphi_8, 0U, 0U),
- SIMD_TUPLE(v256_ziplo_16, 0U, 0U), SIMD_TUPLE(v256_ziphi_16, 0U, 0U),
- SIMD_TUPLE(v256_ziplo_32, 0U, 0U), SIMD_TUPLE(v256_ziphi_32, 0U, 0U),
- SIMD_TUPLE(v256_ziplo_64, 0U, 0U), SIMD_TUPLE(v256_ziphi_64, 0U, 0U),
- SIMD_TUPLE(v256_ziplo_128, 0U, 0U), SIMD_TUPLE(v256_ziphi_128, 0U, 0U),
- SIMD_TUPLE(v256_unziphi_8, 0U, 0U), SIMD_TUPLE(v256_unziplo_8, 0U, 0U),
- SIMD_TUPLE(v256_unziphi_16, 0U, 0U), SIMD_TUPLE(v256_unziplo_16, 0U, 0U),
- SIMD_TUPLE(v256_unziphi_32, 0U, 0U), SIMD_TUPLE(v256_unziplo_32, 0U, 0U),
- SIMD_TUPLE(v256_pack_s32_s16, 0U, 0U), SIMD_TUPLE(v256_pack_s16_u8, 0U, 0U),
- SIMD_TUPLE(v256_pack_s16_s8, 0U, 0U), SIMD_TUPLE(v256_or, 0U, 0U),
- SIMD_TUPLE(v256_xor, 0U, 0U), SIMD_TUPLE(v256_and, 0U, 0U),
- SIMD_TUPLE(v256_andn, 0U, 0U), SIMD_TUPLE(v256_mullo_s16, 0U, 0U),
- SIMD_TUPLE(v256_mulhi_s16, 0U, 0U), SIMD_TUPLE(v256_mullo_s32, 0U, 0U),
- SIMD_TUPLE(v256_madd_s16, 0U, 0U), SIMD_TUPLE(v256_madd_us8, 0U, 0U),
- SIMD_TUPLE(v256_avg_u8, 0U, 0U), SIMD_TUPLE(v256_rdavg_u8, 0U, 0U),
- SIMD_TUPLE(v256_avg_u16, 0U, 0U), SIMD_TUPLE(v256_min_u8, 0U, 0U),
- SIMD_TUPLE(v256_max_u8, 0U, 0U), SIMD_TUPLE(v256_min_s8, 0U, 0U),
- SIMD_TUPLE(v256_max_s8, 0U, 0U), SIMD_TUPLE(v256_min_s16, 0U, 0U),
- SIMD_TUPLE(v256_max_s16, 0U, 0U), SIMD_TUPLE(v256_cmpgt_s8, 0U, 0U),
- SIMD_TUPLE(v256_cmplt_s8, 0U, 0U));
-
-INSTANTIATE(
- ARCH, ARCH_POSTFIX(V256_V256V256_Part2), SIMD_TUPLE(v256_cmpeq_8, 0U, 0U),
- SIMD_TUPLE(v256_min_s32, 0U, 0U), SIMD_TUPLE(v256_max_s32, 0U, 0U),
- SIMD_TUPLE(v256_add_64, 0U, 0U), SIMD_TUPLE(v256_sub_64, 0U, 0U),
- SIMD_TUPLE(v256_cmpgt_s16, 0U, 0U), SIMD_TUPLE(v256_cmplt_s16, 0U, 0U),
- SIMD_TUPLE(v256_cmpeq_16, 0U, 0U), SIMD_TUPLE(v256_cmpgt_s32, 0U, 0U),
- SIMD_TUPLE(v256_cmplt_s32, 0U, 0U), SIMD_TUPLE(v256_cmpeq_32, 0U, 0U),
- SIMD_TUPLE(v256_shuffle_8, 31U, 8U), SIMD_TUPLE(v256_pshuffle_8, 15U, 8U),
- SIMD_TUPLE(imm_v256_align<1>, 0U, 0U), SIMD_TUPLE(v256_sadd_s8, 0U, 0U),
- SIMD_TUPLE(v256_sadd_u8, 0U, 0U), SIMD_TUPLE(v256_pack_s32_u16, 0U, 0U),
- SIMD_TUPLE(v256_rdavg_u16, 0U, 0U), SIMD_TUPLE(imm_v256_align<2>, 0U, 0U),
- SIMD_TUPLE(v256_unziphi_64, 0U, 0U), SIMD_TUPLE(v256_unziplo_64, 0U, 0U),
- SIMD_TUPLE(imm_v256_align<3>, 0U, 0U),
- SIMD_TUPLE(imm_v256_align<4>, 0U, 0U),
- SIMD_TUPLE(imm_v256_align<5>, 0U, 0U),
- SIMD_TUPLE(imm_v256_align<6>, 0U, 0U),
- SIMD_TUPLE(imm_v256_align<7>, 0U, 0U),
- SIMD_TUPLE(imm_v256_align<8>, 0U, 0U),
- SIMD_TUPLE(imm_v256_align<9>, 0U, 0U),
- SIMD_TUPLE(imm_v256_align<10>, 0U, 0U),
- SIMD_TUPLE(imm_v256_align<11>, 0U, 0U),
- SIMD_TUPLE(imm_v256_align<12>, 0U, 0U),
- SIMD_TUPLE(imm_v256_align<13>, 0U, 0U),
- SIMD_TUPLE(imm_v256_align<14>, 0U, 0U),
- SIMD_TUPLE(imm_v256_align<15>, 0U, 0U),
- SIMD_TUPLE(imm_v256_align<16>, 0U, 0U),
- SIMD_TUPLE(imm_v256_align<17>, 0U, 0U),
- SIMD_TUPLE(imm_v256_align<18>, 0U, 0U),
- SIMD_TUPLE(imm_v256_align<19>, 0U, 0U),
- SIMD_TUPLE(imm_v256_align<20>, 0U, 0U),
- SIMD_TUPLE(imm_v256_align<21>, 0U, 0U),
- SIMD_TUPLE(imm_v256_align<22>, 0U, 0U),
- SIMD_TUPLE(imm_v256_align<23>, 0U, 0U),
- SIMD_TUPLE(imm_v256_align<24>, 0U, 0U),
- SIMD_TUPLE(imm_v256_align<25>, 0U, 0U),
- SIMD_TUPLE(imm_v256_align<26>, 0U, 0U),
- SIMD_TUPLE(imm_v256_align<27>, 0U, 0U),
- SIMD_TUPLE(imm_v256_align<28>, 0U, 0U),
- SIMD_TUPLE(imm_v256_align<29>, 0U, 0U),
- SIMD_TUPLE(imm_v256_align<30>, 0U, 0U),
- SIMD_TUPLE(imm_v256_align<31>, 0U, 0U));
-
-INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V128V128),
- SIMD_TUPLE(v256_from_v128, 0U, 0U), SIMD_TUPLE(v256_zip_8, 0U, 0U),
- SIMD_TUPLE(v256_zip_16, 0U, 0U), SIMD_TUPLE(v256_zip_32, 0U, 0U),
- SIMD_TUPLE(v256_mul_s16, 0U, 0U));
-
-INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V128),
- SIMD_TUPLE(v256_unpack_u8_s16, 0U, 0U),
- SIMD_TUPLE(v256_unpack_s8_s16, 0U, 0U),
- SIMD_TUPLE(v256_unpack_u16_s32, 0U, 0U),
- SIMD_TUPLE(v256_unpack_s16_s32, 0U, 0U));
-
-INSTANTIATE(
- ARCH, ARCH_POSTFIX(V256_V256U32), SIMD_TUPLE(v256_shl_8, 7U, 32U),
- SIMD_TUPLE(v256_shr_u8, 7U, 32U), SIMD_TUPLE(v256_shr_s8, 7U, 32U),
- SIMD_TUPLE(v256_shl_16, 15U, 32U), SIMD_TUPLE(v256_shr_u16, 15U, 32U),
- SIMD_TUPLE(v256_shr_s16, 15U, 32U), SIMD_TUPLE(v256_shl_32, 31U, 32U),
- SIMD_TUPLE(v256_shr_u32, 31U, 32U), SIMD_TUPLE(v256_shr_s32, 31U, 32U),
- SIMD_TUPLE(v256_shl_64, 63U, 32U), SIMD_TUPLE(v256_shr_u64, 63U, 32U),
- SIMD_TUPLE(v256_shr_s64, 63U, 32U));
-
-INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V256), SIMD_TUPLE(v256_abs_s8, 0U, 0U),
- SIMD_TUPLE(v256_abs_s16, 0U, 0U), SIMD_TUPLE(v256_padd_s16, 0U, 0U),
- SIMD_TUPLE(v256_unpacklo_u8_s16, 0U, 0U),
- SIMD_TUPLE(v256_unpacklo_s8_s16, 0U, 0U),
- SIMD_TUPLE(v256_unpacklo_u16_s32, 0U, 0U),
- SIMD_TUPLE(v256_unpacklo_s16_s32, 0U, 0U),
- SIMD_TUPLE(v256_unpackhi_u8_s16, 0U, 0U),
- SIMD_TUPLE(v256_unpackhi_s8_s16, 0U, 0U),
- SIMD_TUPLE(v256_unpackhi_u16_s32, 0U, 0U),
- SIMD_TUPLE(v256_unpackhi_s16_s32, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_byte<1>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_byte<2>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_byte<3>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_byte<4>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_byte<5>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_byte<6>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_byte<7>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_byte<8>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_byte<9>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_byte<10>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_byte<11>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_byte<12>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_byte<13>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_byte<14>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_byte<15>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_byte<16>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_byte<17>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_byte<18>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_byte<19>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_byte<20>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_byte<21>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_byte<22>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_byte<23>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_byte<24>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_byte<25>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_byte<26>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_byte<27>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_byte<28>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_byte<29>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_byte<30>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_byte<31>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_byte<1>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_byte<2>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_byte<3>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_byte<4>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_byte<5>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_byte<6>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_byte<7>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_byte<8>, 0U, 0U));
-
-INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V256_Part2),
- SIMD_TUPLE(imm_v256_shl_n_byte<9>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_byte<10>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_byte<11>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_byte<12>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_byte<13>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_byte<14>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_byte<15>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_byte<16>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_byte<17>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_byte<18>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_byte<19>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_byte<20>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_byte<21>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_byte<22>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_byte<23>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_byte<24>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_byte<25>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_byte<26>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_byte<27>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_byte<28>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_byte<29>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_byte<30>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_byte<31>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_8<1>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_8<2>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_8<3>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_8<4>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_8<5>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_8<6>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_8<7>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_u8<1>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_u8<2>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_u8<3>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_u8<4>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_u8<5>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_u8<6>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_u8<7>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_s8<1>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_s8<2>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_s8<3>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_s8<4>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_s8<5>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_s8<6>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_s8<7>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_16<1>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_16<2>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_16<4>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_16<6>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_16<8>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_16<10>, 0U, 0U));
-
-INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V256_Part3),
- SIMD_TUPLE(imm_v256_shl_n_16<12>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_16<14>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_u16<1>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_u16<2>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_u16<4>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_u16<6>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_u16<8>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_u16<10>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_u16<12>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_u16<14>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_s16<1>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_s16<2>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_s16<4>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_s16<6>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_s16<8>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_s16<10>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_s16<12>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_s16<14>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_32<1>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_32<4>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_32<8>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_32<12>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_32<16>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_32<20>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_32<24>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_32<28>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_u32<1>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_u32<4>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_u32<8>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_u32<12>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_u32<16>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_u32<20>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_u32<24>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_u32<28>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_s32<1>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_s32<4>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_s32<8>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_s32<12>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_s32<16>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_s32<20>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_s32<24>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_s32<28>, 0U, 0U));
-
-INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V256_Part4),
- SIMD_TUPLE(imm_v256_shl_n_64<1>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_64<4>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_64<8>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_64<12>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_64<16>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_64<20>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_64<24>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_64<28>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_64<32>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_64<36>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_64<40>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_64<44>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_64<48>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_64<52>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_64<56>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_64<60>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_u64<1>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_u64<4>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_u64<8>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_u64<12>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_u64<16>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_u64<20>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_u64<24>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_u64<28>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_u64<32>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_u64<36>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_u64<40>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_u64<44>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_u64<48>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_u64<52>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_u64<56>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_u64<60>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_s64<1>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_s64<4>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_s64<8>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_s64<12>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_s64<16>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_s64<20>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_s64<24>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_s64<28>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_s64<32>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_s64<36>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_s64<40>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_s64<44>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_s64<48>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_s64<52>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_s64<56>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_s64<60>, 0U, 0U),
- SIMD_TUPLE(v256_padd_u8, 0U, 0U));
-
-INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V256_Part5),
- SIMD_TUPLE(imm_v256_shr_n_word<1>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_word<2>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_word<3>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_word<4>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_word<5>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_word<6>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_word<7>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_word<8>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_word<9>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_word<10>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_word<11>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_word<12>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_word<13>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_word<14>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shr_n_word<15>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_word<1>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_word<2>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_word<3>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_word<4>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_word<5>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_word<6>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_word<7>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_word<8>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_word<9>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_word<10>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_word<11>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_word<12>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_word<13>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_word<14>, 0U, 0U),
- SIMD_TUPLE(imm_v256_shl_n_word<15>, 0U, 0U));
-
-INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V256V256V256),
- SIMD_TUPLE(v256_blend_8, 0U, 0U),
- SIMD_TUPLE(v256_wideshuffle_8, 63U, 8U));
-
-INSTANTIATE(ARCH, ARCH_POSTFIX(V256_U8), SIMD_TUPLE(v256_dup_8, 0U, 0U));
-
-INSTANTIATE(ARCH, ARCH_POSTFIX(V256_U16), SIMD_TUPLE(v256_dup_16, 0U, 0U));
-
-INSTANTIATE(ARCH, ARCH_POSTFIX(V256_U32), SIMD_TUPLE(v256_dup_32, 0U, 0U));
-
-INSTANTIATE(ARCH, ARCH_POSTFIX(V256_U64), SIMD_TUPLE(v256_dup_64, 0U, 0U));
-
-INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V256), SIMD_TUPLE(v256_low_u32, 0U, 0U),
- SIMD_TUPLE(v256_movemask_8, 0U, 0U));
-
-INSTANTIATE(ARCH, ARCH_POSTFIX(V64_V256), SIMD_TUPLE(v256_low_v64, 0U, 0U));
-
-} // namespace SIMD_NAMESPACE
diff --git a/third_party/aom/test/simd_neon_test.cc b/third_party/aom/test/simd_neon_test.cc
deleted file mode 100644
index b67b18895..000000000
--- a/third_party/aom/test/simd_neon_test.cc
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#if defined(__OPTIMIZE__) && __OPTIMIZE__
-#define ARCH NEON
-#define ARCH_POSTFIX(name) name##_neon
-#define SIMD_NAMESPACE simd_test_neon
-#include "test/simd_impl.h"
-#endif
diff --git a/third_party/aom/test/simd_sse2_test.cc b/third_party/aom/test/simd_sse2_test.cc
deleted file mode 100644
index b37a931b3..000000000
--- a/third_party/aom/test/simd_sse2_test.cc
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#if (defined(__OPTIMIZE__) && __OPTIMIZE__) || \
- (!defined(__GNUC__) && !defined(_DEBUG))
-#define ARCH SSE2
-#define ARCH_POSTFIX(name) name##_sse2
-#define SIMD_NAMESPACE simd_test_sse2
-#include "test/simd_impl.h"
-#endif
diff --git a/third_party/aom/test/simd_sse4_test.cc b/third_party/aom/test/simd_sse4_test.cc
deleted file mode 100644
index b1c9d5cd8..000000000
--- a/third_party/aom/test/simd_sse4_test.cc
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#if (defined(__OPTIMIZE__) && __OPTIMIZE__) || \
- (!defined(__GNUC__) && !defined(_DEBUG))
-#define ARCH SSE4_1
-#define ARCH_POSTFIX(name) name##_sse4_1
-#define SIMD_NAMESPACE simd_test_sse4_1
-#include "test/simd_impl.h"
-#endif
diff --git a/third_party/aom/test/simd_ssse3_test.cc b/third_party/aom/test/simd_ssse3_test.cc
deleted file mode 100644
index d95c26fb5..000000000
--- a/third_party/aom/test/simd_ssse3_test.cc
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#if (defined(__OPTIMIZE__) && __OPTIMIZE__) || \
- (!defined(__GNUC__) && !defined(_DEBUG))
-#define ARCH SSSE3
-#define ARCH_POSTFIX(name) name##_ssse3
-#define SIMD_NAMESPACE simd_test_ssse3
-#include "test/simd_impl.h"
-#endif
diff --git a/third_party/aom/test/simple_decoder.sh b/third_party/aom/test/simple_decoder.sh
deleted file mode 100755
index 5f39ad206..000000000
--- a/third_party/aom/test/simple_decoder.sh
+++ /dev/null
@@ -1,58 +0,0 @@
-#!/bin/sh
-## Copyright (c) 2016, Alliance for Open Media. All rights reserved
-##
-## This source code is subject to the terms of the BSD 2 Clause License and
-## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-## was not distributed with this source code in the LICENSE file, you can
-## obtain it at www.aomedia.org/license/software. If the Alliance for Open
-## Media Patent License 1.0 was not distributed with this source code in the
-## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-##
-## This file tests the libaom simple_decoder example code. To add new tests to
-## this file, do the following:
-## 1. Write a shell function (this is your test).
-## 2. Add the function to simple_decoder_tests (on a new line).
-##
-. $(dirname $0)/tools_common.sh
-
-# Environment check: Make sure input is available:
-simple_decoder_verify_environment() {
- if [ ! "$(av1_encode_available)" = "yes" ] && [ ! -e "${AV1_IVF_FILE}" ]; then
- return 1
- fi
-}
-
-# Runs simple_decoder using $1 as input file. $2 is the codec name, and is used
-# solely to name the output file.
-simple_decoder() {
- local decoder="$(aom_tool_path simple_decoder)"
- local input_file="$1"
- local codec="$2"
- local output_file="${AOM_TEST_OUTPUT_DIR}/simple_decoder_${codec}.raw"
-
- if [ ! -x "${decoder}" ]; then
- elog "${decoder} does not exist or is not executable."
- return 1
- fi
-
- eval "${AOM_TEST_PREFIX}" "${decoder}" "${input_file}" "${output_file}" \
- ${devnull}
-
- [ -e "${output_file}" ] || return 1
-}
-
-simple_decoder_av1() {
- if [ "$(av1_decode_available)" = "yes" ]; then
- if [ ! -e "${AV1_IVF_FILE}" ]; then
- local file="${AOM_TEST_OUTPUT_DIR}/test_encode.ivf"
- encode_yuv_raw_input_av1 "${file}" --ivf
- simple_decoder "${file}" av1 || return 1
- else
- simple_decoder "${AV1_IVF_FILE}" av1 || return 1
- fi
- fi
-}
-
-simple_decoder_tests="simple_decoder_av1"
-
-run_tests simple_decoder_verify_environment "${simple_decoder_tests}"
diff --git a/third_party/aom/test/simple_encoder.sh b/third_party/aom/test/simple_encoder.sh
deleted file mode 100755
index 5cd6b46a1..000000000
--- a/third_party/aom/test/simple_encoder.sh
+++ /dev/null
@@ -1,53 +0,0 @@
-#!/bin/sh
-## Copyright (c) 2016, Alliance for Open Media. All rights reserved
-##
-## This source code is subject to the terms of the BSD 2 Clause License and
-## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-## was not distributed with this source code in the LICENSE file, you can
-## obtain it at www.aomedia.org/license/software. If the Alliance for Open
-## Media Patent License 1.0 was not distributed with this source code in the
-## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-##
-## This file tests the libaom simple_encoder example. To add new tests to this
-## file, do the following:
-## 1. Write a shell function (this is your test).
-## 2. Add the function to simple_encoder_tests (on a new line).
-##
-. $(dirname $0)/tools_common.sh
-
-# Environment check: $YUV_RAW_INPUT is required.
-simple_encoder_verify_environment() {
- if [ ! -e "${YUV_RAW_INPUT}" ]; then
- echo "Libaom test data must exist in LIBAOM_TEST_DATA_PATH."
- return 1
- fi
-}
-
-# Runs simple_encoder using the codec specified by $1 with a frame limit of 100.
-simple_encoder() {
- local encoder="${LIBAOM_BIN_PATH}/simple_encoder${AOM_TEST_EXE_SUFFIX}"
- local codec="$1"
- local output_file="${AOM_TEST_OUTPUT_DIR}/simple_encoder_${codec}.ivf"
-
- if [ ! -x "${encoder}" ]; then
- elog "${encoder} does not exist or is not executable."
- return 1
- fi
-
- eval "${AOM_TEST_PREFIX}" "${encoder}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" \
- "${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" 9999 0 5 \
- ${devnull}
-
- [ -e "${output_file}" ] || return 1
-}
-
-
-simple_encoder_av1() {
- if [ "$(av1_encode_available)" = "yes" ]; then
- simple_encoder av1 || return 1
- fi
-}
-
-simple_encoder_tests="simple_encoder_av1"
-
-run_tests simple_encoder_verify_environment "${simple_encoder_tests}"
diff --git a/third_party/aom/test/subtract_test.cc b/third_party/aom/test/subtract_test.cc
deleted file mode 100644
index 7dcedf56d..000000000
--- a/third_party/aom/test/subtract_test.cc
+++ /dev/null
@@ -1,249 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
-#include "av1/common/blockd.h"
-#include "aom_mem/aom_mem.h"
-#include "aom_ports/mem.h"
-
-typedef void (*SubtractFunc)(int rows, int cols, int16_t *diff_ptr,
- ptrdiff_t diff_stride, const uint8_t *src_ptr,
- ptrdiff_t src_stride, const uint8_t *pred_ptr,
- ptrdiff_t pred_stride);
-
-namespace {
-
-class AV1SubtractBlockTest : public ::testing::TestWithParam<SubtractFunc> {
- public:
- virtual void TearDown() { libaom_test::ClearSystemState(); }
-};
-
-using libaom_test::ACMRandom;
-
-TEST_P(AV1SubtractBlockTest, SimpleSubtract) {
- ACMRandom rnd(ACMRandom::DeterministicSeed());
-
- // FIXME(rbultje) split in its own file
- for (BLOCK_SIZE bsize = BLOCK_4X4; bsize < BLOCK_SIZES;
- bsize = static_cast<BLOCK_SIZE>(static_cast<int>(bsize) + 1)) {
- const int block_width = block_size_wide[bsize];
- const int block_height = block_size_high[bsize];
- int16_t *diff = reinterpret_cast<int16_t *>(
- aom_memalign(16, sizeof(*diff) * block_width * block_height * 2));
- uint8_t *pred = reinterpret_cast<uint8_t *>(
- aom_memalign(16, block_width * block_height * 2));
- uint8_t *src = reinterpret_cast<uint8_t *>(
- aom_memalign(16, block_width * block_height * 2));
-
- for (int n = 0; n < 100; n++) {
- for (int r = 0; r < block_height; ++r) {
- for (int c = 0; c < block_width * 2; ++c) {
- src[r * block_width * 2 + c] = rnd.Rand8();
- pred[r * block_width * 2 + c] = rnd.Rand8();
- }
- }
-
- GetParam()(block_height, block_width, diff, block_width, src, block_width,
- pred, block_width);
-
- for (int r = 0; r < block_height; ++r) {
- for (int c = 0; c < block_width; ++c) {
- EXPECT_EQ(diff[r * block_width + c],
- (src[r * block_width + c] - pred[r * block_width + c]))
- << "r = " << r << ", c = " << c << ", bs = " << bsize;
- }
- }
-
- GetParam()(block_height, block_width, diff, block_width * 2, src,
- block_width * 2, pred, block_width * 2);
-
- for (int r = 0; r < block_height; ++r) {
- for (int c = 0; c < block_width; ++c) {
- EXPECT_EQ(
- diff[r * block_width * 2 + c],
- (src[r * block_width * 2 + c] - pred[r * block_width * 2 + c]))
- << "r = " << r << ", c = " << c << ", bs = " << bsize;
- }
- }
- }
- aom_free(diff);
- aom_free(pred);
- aom_free(src);
- }
-}
-
-INSTANTIATE_TEST_CASE_P(C, AV1SubtractBlockTest,
- ::testing::Values(aom_subtract_block_c));
-
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(SSE2, AV1SubtractBlockTest,
- ::testing::Values(aom_subtract_block_sse2));
-#endif
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(NEON, AV1SubtractBlockTest,
- ::testing::Values(aom_subtract_block_neon));
-#endif
-#if HAVE_MSA
-INSTANTIATE_TEST_CASE_P(MSA, AV1SubtractBlockTest,
- ::testing::Values(aom_subtract_block_msa));
-#endif
-
-typedef void (*HBDSubtractFunc)(int rows, int cols, int16_t *diff_ptr,
- ptrdiff_t diff_stride, const uint8_t *src_ptr,
- ptrdiff_t src_stride, const uint8_t *pred_ptr,
- ptrdiff_t pred_stride, int bd);
-
-using ::testing::get;
-using ::testing::make_tuple;
-using ::testing::tuple;
-
-// <width, height, bit_dpeth, subtract>
-typedef tuple<int, int, int, HBDSubtractFunc> Params;
-
-class AV1HBDSubtractBlockTest : public ::testing::TestWithParam<Params> {
- public:
- virtual void SetUp() {
- block_width_ = GET_PARAM(0);
- block_height_ = GET_PARAM(1);
- bit_depth_ = static_cast<aom_bit_depth_t>(GET_PARAM(2));
- func_ = GET_PARAM(3);
-
- rnd_.Reset(ACMRandom::DeterministicSeed());
-
- const size_t max_width = 128;
- const size_t max_block_size = max_width * max_width;
- src_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>(
- aom_memalign(16, max_block_size * sizeof(uint16_t))));
- pred_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>(
- aom_memalign(16, max_block_size * sizeof(uint16_t))));
- diff_ = reinterpret_cast<int16_t *>(
- aom_memalign(16, max_block_size * sizeof(int16_t)));
- }
-
- virtual void TearDown() {
- aom_free(CONVERT_TO_SHORTPTR(src_));
- aom_free(CONVERT_TO_SHORTPTR(pred_));
- aom_free(diff_);
- }
-
- protected:
- void CheckResult();
- void RunForSpeed();
-
- private:
- ACMRandom rnd_;
- int block_height_;
- int block_width_;
- aom_bit_depth_t bit_depth_;
- HBDSubtractFunc func_;
- uint8_t *src_;
- uint8_t *pred_;
- int16_t *diff_;
-};
-
-void AV1HBDSubtractBlockTest::CheckResult() {
- const int test_num = 100;
- const size_t max_width = 128;
- const int max_block_size = max_width * max_width;
- const int mask = (1 << bit_depth_) - 1;
- int i, j;
-
- for (i = 0; i < test_num; ++i) {
- for (j = 0; j < max_block_size; ++j) {
- CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask;
- CONVERT_TO_SHORTPTR(pred_)[j] = rnd_.Rand16() & mask;
- }
-
- func_(block_height_, block_width_, diff_, block_width_, src_, block_width_,
- pred_, block_width_, bit_depth_);
-
- for (int r = 0; r < block_height_; ++r) {
- for (int c = 0; c < block_width_; ++c) {
- EXPECT_EQ(diff_[r * block_width_ + c],
- (CONVERT_TO_SHORTPTR(src_)[r * block_width_ + c] -
- CONVERT_TO_SHORTPTR(pred_)[r * block_width_ + c]))
- << "r = " << r << ", c = " << c << ", test: " << i;
- }
- }
- }
-}
-
-TEST_P(AV1HBDSubtractBlockTest, CheckResult) { CheckResult(); }
-
-void AV1HBDSubtractBlockTest::RunForSpeed() {
- const int test_num = 200000;
- const size_t max_width = 128;
- const int max_block_size = max_width * max_width;
- const int mask = (1 << bit_depth_) - 1;
- int i, j;
-
- for (j = 0; j < max_block_size; ++j) {
- CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask;
- CONVERT_TO_SHORTPTR(pred_)[j] = rnd_.Rand16() & mask;
- }
-
- for (i = 0; i < test_num; ++i) {
- func_(block_height_, block_width_, diff_, block_width_, src_, block_width_,
- pred_, block_width_, bit_depth_);
- }
-}
-
-TEST_P(AV1HBDSubtractBlockTest, DISABLED_Speed) { RunForSpeed(); }
-
-#if HAVE_SSE2
-
-const Params kAV1HBDSubtractBlock_sse2[] = {
- make_tuple(4, 4, 12, &aom_highbd_subtract_block_sse2),
- make_tuple(4, 4, 12, &aom_highbd_subtract_block_c),
- make_tuple(4, 8, 12, &aom_highbd_subtract_block_sse2),
- make_tuple(4, 8, 12, &aom_highbd_subtract_block_c),
- make_tuple(8, 4, 12, &aom_highbd_subtract_block_sse2),
- make_tuple(8, 4, 12, &aom_highbd_subtract_block_c),
- make_tuple(8, 8, 12, &aom_highbd_subtract_block_sse2),
- make_tuple(8, 8, 12, &aom_highbd_subtract_block_c),
- make_tuple(8, 16, 12, &aom_highbd_subtract_block_sse2),
- make_tuple(8, 16, 12, &aom_highbd_subtract_block_c),
- make_tuple(16, 8, 12, &aom_highbd_subtract_block_sse2),
- make_tuple(16, 8, 12, &aom_highbd_subtract_block_c),
- make_tuple(16, 16, 12, &aom_highbd_subtract_block_sse2),
- make_tuple(16, 16, 12, &aom_highbd_subtract_block_c),
- make_tuple(16, 32, 12, &aom_highbd_subtract_block_sse2),
- make_tuple(16, 32, 12, &aom_highbd_subtract_block_c),
- make_tuple(32, 16, 12, &aom_highbd_subtract_block_sse2),
- make_tuple(32, 16, 12, &aom_highbd_subtract_block_c),
- make_tuple(32, 32, 12, &aom_highbd_subtract_block_sse2),
- make_tuple(32, 32, 12, &aom_highbd_subtract_block_c),
- make_tuple(32, 64, 12, &aom_highbd_subtract_block_sse2),
- make_tuple(32, 64, 12, &aom_highbd_subtract_block_c),
- make_tuple(64, 32, 12, &aom_highbd_subtract_block_sse2),
- make_tuple(64, 32, 12, &aom_highbd_subtract_block_c),
- make_tuple(64, 64, 12, &aom_highbd_subtract_block_sse2),
- make_tuple(64, 64, 12, &aom_highbd_subtract_block_c),
- make_tuple(64, 128, 12, &aom_highbd_subtract_block_sse2),
- make_tuple(64, 128, 12, &aom_highbd_subtract_block_c),
- make_tuple(128, 64, 12, &aom_highbd_subtract_block_sse2),
- make_tuple(128, 64, 12, &aom_highbd_subtract_block_c),
- make_tuple(128, 128, 12, &aom_highbd_subtract_block_sse2),
- make_tuple(128, 128, 12, &aom_highbd_subtract_block_c)
-};
-
-INSTANTIATE_TEST_CASE_P(SSE2, AV1HBDSubtractBlockTest,
- ::testing::ValuesIn(kAV1HBDSubtractBlock_sse2));
-#endif // HAVE_SSE2
-} // namespace
diff --git a/third_party/aom/test/sum_squares_test.cc b/third_party/aom/test/sum_squares_test.cc
deleted file mode 100644
index f10998498..000000000
--- a/third_party/aom/test/sum_squares_test.cc
+++ /dev/null
@@ -1,228 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <cmath>
-#include <cstdlib>
-#include <string>
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_ports/mem.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
-#include "test/function_equivalence_test.h"
-
-using libaom_test::ACMRandom;
-using libaom_test::FunctionEquivalenceTest;
-
-namespace {
-const int kNumIterations = 10000;
-
-static const int16_t kInt13Max = (1 << 12) - 1;
-
-typedef uint64_t (*SSI16Func)(const int16_t *src, int stride, int width,
- int height);
-typedef libaom_test::FuncParam<SSI16Func> TestFuncs;
-
-class SumSquaresTest : public ::testing::TestWithParam<TestFuncs> {
- public:
- virtual ~SumSquaresTest() {}
- virtual void SetUp() {
- params_ = this->GetParam();
- rnd_.Reset(ACMRandom::DeterministicSeed());
- src_ = reinterpret_cast<int16_t *>(aom_memalign(16, 256 * 256 * 2));
- ASSERT_TRUE(src_ != NULL);
- }
-
- virtual void TearDown() {
- libaom_test::ClearSystemState();
- aom_free(src_);
- }
- void RunTest(int isRandom);
- void RunSpeedTest();
-
- void GenRandomData(int width, int height, int stride) {
- const int msb = 11; // Up to 12 bit input
- const int limit = 1 << (msb + 1);
- for (int ii = 0; ii < height; ii++) {
- for (int jj = 0; jj < width; jj++) {
- src_[ii * stride + jj] = rnd_(2) ? rnd_(limit) : -rnd_(limit);
- }
- }
- }
-
- void GenExtremeData(int width, int height, int stride) {
- const int msb = 11; // Up to 12 bit input
- const int limit = 1 << (msb + 1);
- const int val = rnd_(2) ? limit - 1 : -(limit - 1);
- for (int ii = 0; ii < height; ii++) {
- for (int jj = 0; jj < width; jj++) {
- src_[ii * stride + jj] = val;
- }
- }
- }
-
- protected:
- TestFuncs params_;
- int16_t *src_;
- ACMRandom rnd_;
-};
-
-void SumSquaresTest::RunTest(int isRandom) {
- int failed = 0;
- for (int k = 0; k < kNumIterations; k++) {
- const int width = 4 * (rnd_(31) + 1); // Up to 128x128
- const int height = 4 * (rnd_(31) + 1); // Up to 128x128
- int stride = 4 << rnd_(7); // Up to 256 stride
- while (stride < width) { // Make sure it's valid
- stride = 4 << rnd_(7);
- }
- if (isRandom) {
- GenRandomData(width, height, stride);
- } else {
- GenExtremeData(width, height, stride);
- }
- const uint64_t res_ref = params_.ref_func(src_, stride, width, height);
- uint64_t res_tst;
- ASM_REGISTER_STATE_CHECK(res_tst =
- params_.tst_func(src_, stride, width, height));
-
- if (!failed) {
- failed = res_ref != res_tst;
- EXPECT_EQ(res_ref, res_tst)
- << "Error: Sum Squares Test [" << width << "x" << height
- << "] C output does not match optimized output.";
- }
- }
-}
-
-void SumSquaresTest::RunSpeedTest() {
- for (int block = BLOCK_4X4; block < BLOCK_SIZES_ALL; block++) {
- const int width = block_size_wide[block]; // Up to 128x128
- const int height = block_size_high[block]; // Up to 128x128
- int stride = 4 << rnd_(7); // Up to 256 stride
- while (stride < width) { // Make sure it's valid
- stride = 4 << rnd_(7);
- }
- GenExtremeData(width, height, stride);
- const int num_loops = 1000000000 / (width + height);
- aom_usec_timer timer;
- aom_usec_timer_start(&timer);
-
- for (int i = 0; i < num_loops; ++i)
- params_.ref_func(src_, stride, width, height);
-
- aom_usec_timer_mark(&timer);
- const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
- printf("SumSquaresTest C %3dx%-3d: %7.2f ns\n", width, height,
- 1000.0 * elapsed_time / num_loops);
-
- aom_usec_timer timer1;
- aom_usec_timer_start(&timer1);
- for (int i = 0; i < num_loops; ++i)
- params_.tst_func(src_, stride, width, height);
- aom_usec_timer_mark(&timer1);
- const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1));
- printf("SumSquaresTest Test %3dx%-3d: %7.2f ns\n", width, height,
- 1000.0 * elapsed_time1 / num_loops);
- }
-}
-
-TEST_P(SumSquaresTest, OperationCheck) {
- RunTest(1); // GenRandomData
-}
-
-TEST_P(SumSquaresTest, ExtremeValues) {
- RunTest(0); // GenExtremeData
-}
-
-TEST_P(SumSquaresTest, DISABLED_Speed) { RunSpeedTest(); }
-
-#if HAVE_SSE2
-
-INSTANTIATE_TEST_CASE_P(
- SSE2, SumSquaresTest,
- ::testing::Values(TestFuncs(&aom_sum_squares_2d_i16_c,
- &aom_sum_squares_2d_i16_sse2)));
-
-#endif // HAVE_SSE2
-
-#if HAVE_AVX2
-INSTANTIATE_TEST_CASE_P(
- AVX2, SumSquaresTest,
- ::testing::Values(TestFuncs(&aom_sum_squares_2d_i16_c,
- &aom_sum_squares_2d_i16_avx2)));
-#endif // HAVE_AVX2
-
-//////////////////////////////////////////////////////////////////////////////
-// 1D version
-//////////////////////////////////////////////////////////////////////////////
-
-typedef uint64_t (*F1D)(const int16_t *src, uint32_t N);
-typedef libaom_test::FuncParam<F1D> TestFuncs1D;
-
-class SumSquares1DTest : public FunctionEquivalenceTest<F1D> {
- protected:
- static const int kIterations = 1000;
- static const int kMaxSize = 256;
-};
-
-TEST_P(SumSquares1DTest, RandomValues) {
- DECLARE_ALIGNED(16, int16_t, src[kMaxSize * kMaxSize]);
-
- for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
- for (int i = 0; i < kMaxSize * kMaxSize; ++i)
- src[i] = rng_(kInt13Max * 2 + 1) - kInt13Max;
-
- const int N = rng_(2) ? rng_(kMaxSize * kMaxSize + 1 - kMaxSize) + kMaxSize
- : rng_(kMaxSize) + 1;
-
- const uint64_t ref_res = params_.ref_func(src, N);
- uint64_t tst_res;
- ASM_REGISTER_STATE_CHECK(tst_res = params_.tst_func(src, N));
-
- ASSERT_EQ(ref_res, tst_res);
- }
-}
-
-TEST_P(SumSquares1DTest, ExtremeValues) {
- DECLARE_ALIGNED(16, int16_t, src[kMaxSize * kMaxSize]);
-
- for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
- if (rng_(2)) {
- for (int i = 0; i < kMaxSize * kMaxSize; ++i) src[i] = kInt13Max;
- } else {
- for (int i = 0; i < kMaxSize * kMaxSize; ++i) src[i] = -kInt13Max;
- }
-
- const int N = rng_(2) ? rng_(kMaxSize * kMaxSize + 1 - kMaxSize) + kMaxSize
- : rng_(kMaxSize) + 1;
-
- const uint64_t ref_res = params_.ref_func(src, N);
- uint64_t tst_res;
- ASM_REGISTER_STATE_CHECK(tst_res = params_.tst_func(src, N));
-
- ASSERT_EQ(ref_res, tst_res);
- }
-}
-
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(SSE2, SumSquares1DTest,
- ::testing::Values(TestFuncs1D(
- aom_sum_squares_i16_c, aom_sum_squares_i16_sse2)));
-
-#endif // HAVE_SSE2
-} // namespace
diff --git a/third_party/aom/test/superframe_test.cc b/third_party/aom/test/superframe_test.cc
deleted file mode 100644
index 7be18f72a..000000000
--- a/third_party/aom/test/superframe_test.cc
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <climits>
-#include <vector>
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-#include "test/codec_factory.h"
-#include "test/encode_test_driver.h"
-#include "test/i420_video_source.h"
-#include "test/util.h"
-
-namespace {
-
-const int kTestMode = 0;
-const int kTileCols = 1;
-const int kTileRows = 2;
-
-typedef ::testing::tuple<libaom_test::TestMode, int, int> SuperframeTestParam;
-
-class SuperframeTest
- : public ::libaom_test::CodecTestWithParam<SuperframeTestParam>,
- public ::libaom_test::EncoderTest {
- protected:
- SuperframeTest() : EncoderTest(GET_PARAM(0)), last_sf_pts_(0) {}
- virtual ~SuperframeTest() {}
-
- virtual void SetUp() {
- InitializeConfig();
- const SuperframeTestParam input = GET_PARAM(1);
- const libaom_test::TestMode mode = ::testing::get<kTestMode>(input);
- SetMode(mode);
- sf_count_ = 0;
- sf_count_max_ = INT_MAX;
- n_tile_cols_ = ::testing::get<kTileCols>(input);
- n_tile_rows_ = ::testing::get<kTileRows>(input);
- }
-
- virtual void PreEncodeFrameHook(libaom_test::VideoSource *video,
- libaom_test::Encoder *encoder) {
- if (video->frame() == 1) {
- encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
- encoder->Control(AOME_SET_CPUUSED, 2);
- encoder->Control(AV1E_SET_TILE_COLUMNS, n_tile_cols_);
- encoder->Control(AV1E_SET_TILE_ROWS, n_tile_rows_);
- }
- }
-
- virtual const aom_codec_cx_pkt_t *MutateEncoderOutputHook(
- const aom_codec_cx_pkt_t *pkt) {
- if (pkt->kind != AOM_CODEC_CX_FRAME_PKT) return pkt;
-
- const uint8_t *buffer = reinterpret_cast<uint8_t *>(pkt->data.frame.buf);
- const uint8_t marker = buffer[0];
- const int frames = (marker & 0x7) + 1;
- const int mag = ((marker >> 3) & 3) + 1;
- const unsigned int index_sz = 2 + mag * (frames - 1);
- if ((marker & 0xe0) == 0xc0 && pkt->data.frame.sz >= index_sz &&
- buffer[index_sz - 1] == marker) {
- // frame is a superframe. strip off the index.
- modified_buf_.resize(pkt->data.frame.sz - index_sz);
- memcpy(&modified_buf_[0], (uint8_t *)pkt->data.frame.buf + index_sz,
- pkt->data.frame.sz - index_sz);
- modified_pkt_ = *pkt;
- modified_pkt_.data.frame.buf = &modified_buf_[0];
- modified_pkt_.data.frame.sz -= index_sz;
-
- sf_count_++;
- last_sf_pts_ = pkt->data.frame.pts;
- return &modified_pkt_;
- }
-
- // Make sure we do a few frames after the last SF
- abort_ |=
- sf_count_ > sf_count_max_ && pkt->data.frame.pts - last_sf_pts_ >= 5;
- return pkt;
- }
-
- int sf_count_;
- int sf_count_max_;
- aom_codec_cx_pkt_t modified_pkt_;
- std::vector<uint8_t> modified_buf_;
- aom_codec_pts_t last_sf_pts_;
-
- private:
- int n_tile_cols_;
- int n_tile_rows_;
-};
-
-TEST_P(SuperframeTest, TestSuperframeIndexIsOptional) {
- sf_count_max_ = 0; // early exit on successful test.
- cfg_.g_lag_in_frames = 25;
- cfg_.large_scale_tile = 1;
- ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
- 30, 1, 0, 40);
- ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
- // NOTE: The use of BWDREF_FRAME will enable the coding of more non-show
- // frames besides ALTREF_FRAME.
- EXPECT_GE(sf_count_, 1);
-}
-
-} // namespace
diff --git a/third_party/aom/test/test-data.sha1 b/third_party/aom/test/test-data.sha1
deleted file mode 100644
index b6ee34701..000000000
--- a/third_party/aom/test/test-data.sha1
+++ /dev/null
@@ -1,507 +0,0 @@
-d5dfb0151c9051f8c85999255645d7a23916d3c0 *hantro_collage_w352h288.yuv
-b87815bf86020c592ccc7a846ba2e28ec8043902 *hantro_odd.yuv
-26b7f64399b84db4b4c9c915d743ec5c2619d4b9 *invalid-bug-1814.ivf
-d3964f9dad9f60363c81b688324d95b4ec7c8038 *invalid-bug-1814.ivf.res
-fa06784f23751d8c37be94160fb821e855199af4 *invalid-oss-fuzz-10061.ivf
-b055f06b9a95aaa5697fa26497b592a47843a7c8 *invalid-oss-fuzz-10061.ivf.res
-c9e06c4c7fb7d69fd635a1f606a5e478d60e99cf *invalid-oss-fuzz-10117-mc-buf-use-highbd.ivf
-88e18e61bd2b7457b4c71ebefbdff0029c41cc04 *invalid-oss-fuzz-10117-mc-buf-use-highbd.ivf.res
-91a5bedeb4832c1c2900736cc0f644bb63971bbc *invalid-oss-fuzz-10227.ivf
-b055f06b9a95aaa5697fa26497b592a47843a7c8 *invalid-oss-fuzz-10227.ivf.res
-c0960f032484579f967881cc025b71cfd7a79ee1 *invalid-oss-fuzz-9463.ivf
-d3964f9dad9f60363c81b688324d95b4ec7c8038 *invalid-oss-fuzz-9463.ivf.res
-f448caf378e250b7eea4fa2d1c3cd7ef4a3211ce *invalid-oss-fuzz-9482.ivf
-b055f06b9a95aaa5697fa26497b592a47843a7c8 *invalid-oss-fuzz-9482.ivf.res
-a686989de79af89136f631fd630df639c7861851 *invalid-oss-fuzz-9720.ivf
-d3964f9dad9f60363c81b688324d95b4ec7c8038 *invalid-oss-fuzz-9720.ivf.res
-a432f96ff0a787268e2f94a8092ab161a18d1b06 *park_joy_90p_10_420.y4m
-0b194cc312c3a2e84d156a221b0a5eb615dfddc5 *park_joy_90p_10_422.y4m
-ff0e0a21dc2adc95b8c1b37902713700655ced17 *park_joy_90p_10_444.y4m
-c934da6fb8cc54ee2a8c17c54cf6076dac37ead0 *park_joy_90p_10_440.yuv
-614c32ae1eca391e867c70d19974f0d62664dd99 *park_joy_90p_12_420.y4m
-c92825f1ea25c5c37855083a69faac6ac4641a9e *park_joy_90p_12_422.y4m
-b592189b885b6cc85db55cc98512a197d73d3b34 *park_joy_90p_12_444.y4m
-82c1bfcca368c2f22bad7d693d690d5499ecdd11 *park_joy_90p_12_440.yuv
-b9e1e90aece2be6e2c90d89e6ab2372d5f8c792d *park_joy_90p_8_420_a10-1.y4m
-4e0eb61e76f0684188d9bc9f3ce61f6b6b77bb2c *park_joy_90p_8_420.y4m
-7a193ff7dfeb96ba5f82b2afd7afa9e1fe83d947 *park_joy_90p_8_422.y4m
-bdb7856e6bc93599bdda05c2e773a9f22b6c6d03 *park_joy_90p_8_444.y4m
-81e1f3843748438b8f2e71db484eb22daf72e939 *park_joy_90p_8_440.yuv
-b1f1c3ec79114b9a0651af24ce634afb44a9a419 *rush_hour_444.y4m
-eb438c6540eb429f74404eedfa3228d409c57874 *desktop_640_360_30.yuv
-89e70ebd22c27d275fe14dc2f1a41841a6d8b9ab *kirland_640_480_30.yuv
-33c533192759e5bb4f07abfbac389dc259db4686 *macmarcomoving_640_480_30.yuv
-8bfaab121080821b8f03b23467911e59ec59b8fe *macmarcostationary_640_480_30.yuv
-70894878d916a599842d9ad0dcd24e10c13e5467 *niklas_640_480_30.yuv
-8784b6df2d8cc946195a90ac00540500d2e522e4 *tacomanarrows_640_480_30.yuv
-edd86a1f5e62fd9da9a9d46078247759c2638009 *tacomasmallcameramovement_640_480_30.yuv
-9a70e8b7d14fba9234d0e51dce876635413ce444 *thaloundeskmtg_640_480_30.yuv
-e7d315dbf4f3928779e0dc624311196d44491d32 *niklas_1280_720_30.yuv
-717da707afcaa1f692ff1946f291054eb75a4f06 *screendata.y4m
-9cfc855459e7549fd015c79e8eca512b2f2cb7e3 *niklas_1280_720_30.y4m
-5b5763b388b1b52a81bb82b39f7ec25c4bd3d0e1 *desktop_credits.y4m
-36ddab9b99eb7545aa0bf362d6f498212d596516 *vase10x10.yuv
-c2e1ec9936b95254187a359e94aa32a9f3dad1b7 *av1-1-b8-00-quantizer-00.ivf
-26cd2a0321d01d9db5f6dace8b43a40cd5b9d58d *av1-1-b8-00-quantizer-00.ivf.md5
-a56dd02c0258d4afea1ee358a22b54e99e39d5e1 *av1-1-b8-00-quantizer-01.ivf
-b3d24124d81f1fbb26f5eb0036accb54f3ec69b2 *av1-1-b8-00-quantizer-01.ivf.md5
-3466327cb842a91d69839b11ef930a74f086f4c6 *av1-1-b8-00-quantizer-02.ivf
-c111dce946100efeaad34203080eee1d55464df6 *av1-1-b8-00-quantizer-02.ivf.md5
-d3f1f32de5e2c0c19a58bb8ef096108388c6a820 *av1-1-b8-00-quantizer-03.ivf
-6265321b31130545b4454982ca93e412a56845b8 *av1-1-b8-00-quantizer-03.ivf.md5
-f37c393ebe73266a5ec8508a2ca33c586ff28e64 *av1-1-b8-00-quantizer-04.ivf
-c6e979da71aecc593c0abb40135dd304152b00dd *av1-1-b8-00-quantizer-04.ivf.md5
-ac9c5e93cb19942a9be259d0567ec96c54dcdc7c *av1-1-b8-00-quantizer-05.ivf
-49e35a7399568a0e4f015ce323d5a45ea780ca87 *av1-1-b8-00-quantizer-05.ivf.md5
-461142b1b50ae74c6b698d23f5ed3b764eadfb89 *av1-1-b8-00-quantizer-06.ivf
-6477ff260624e0f76c94ac872d1e7d5576af4177 *av1-1-b8-00-quantizer-06.ivf.md5
-7f8113cd13d8faaa06fdbaaa50dc328daf037e6d *av1-1-b8-00-quantizer-07.ivf
-b26795c6cb408487c20737977cd6b77311772bf7 *av1-1-b8-00-quantizer-07.ivf.md5
-4218f7945a172e1fe4f9e77ec35085a394eda9f4 *av1-1-b8-00-quantizer-08.ivf
-ea5d7d501e9a69d805251e4871515d28468d8676 *av1-1-b8-00-quantizer-08.ivf.md5
-837f3bcadfe56cf302db2ebaf9a990446fb35801 *av1-1-b8-00-quantizer-09.ivf
-eede995cdac5fd01a411da2e74e86e8394138be1 *av1-1-b8-00-quantizer-09.ivf.md5
-adc229b3780a4968c18ded1bcbe72e3f04643833 *av1-1-b8-00-quantizer-10.ivf
-0799b7e54e54ee97bf0e8aad2b75509ce59c7097 *av1-1-b8-00-quantizer-10.ivf.md5
-44bac8247160a8d9a0ab19f890fc89cc9298de1d *av1-1-b8-00-quantizer-11.ivf
-cc6b2bf167e114599b242aba574e8c6f1fa2f047 *av1-1-b8-00-quantizer-11.ivf.md5
-ebb3af7dfc15567188bcb617021cdc95ebc560e3 *av1-1-b8-00-quantizer-12.ivf
-b716ae29d56cd0c052dbfa1b5dcf850cd0fa8ca7 *av1-1-b8-00-quantizer-12.ivf.md5
-46159641f981a26fb9c374a5ca41e44f0ce0a9f0 *av1-1-b8-00-quantizer-13.ivf
-c6db1b8b4a74f83e4a0647e053cea0fc00f6abab *av1-1-b8-00-quantizer-13.ivf.md5
-fadc909d18eb640760fbb075f922fb050e715470 *av1-1-b8-00-quantizer-14.ivf
-e36bb6b23273633ba3ef7d28160a7258840a1476 *av1-1-b8-00-quantizer-14.ivf.md5
-8befbd9cc1601dcd36ec6911613855f68e6fd40e *av1-1-b8-00-quantizer-15.ivf
-cfc2334b76fb5e7aa9d8607e89d37cbc7716d62e *av1-1-b8-00-quantizer-15.ivf.md5
-ca42e00ae27c6b7f684fe3d2a787d50d2827cb3f *av1-1-b8-00-quantizer-16.ivf
-f11278218a7c3c73cfaab2332bab55f06cedcc81 *av1-1-b8-00-quantizer-16.ivf.md5
-05270d365bdc067f9446eda3029a6f41571a5229 *av1-1-b8-00-quantizer-17.ivf
-fb6482f35e7ad04bf231ea1806226760abcb3c26 *av1-1-b8-00-quantizer-17.ivf.md5
-617bc72037165efbff478d5a0d342b3c20ffcafd *av1-1-b8-00-quantizer-18.ivf
-1ff68d5424f91322123fe0d58f436b8e49cfa99d *av1-1-b8-00-quantizer-18.ivf.md5
-821c3b1ae6054c7a91b2f64428806e57f1157ca6 *av1-1-b8-00-quantizer-19.ivf
-f2fd118e786697553d6987f786660a2bb9f00680 *av1-1-b8-00-quantizer-19.ivf.md5
-48bcf17c27d9a4eb73632a68c09f42eff9f9af99 *av1-1-b8-00-quantizer-20.ivf
-64d55e4c858414bc2837c9c3e2d5fb6d2208c4b8 *av1-1-b8-00-quantizer-20.ivf.md5
-d61ecdd4f0950bc5c8bae1270b22e711bdd22763 *av1-1-b8-00-quantizer-21.ivf
-9d447938596096704fd5f4d41bcdf6fabf9cdfb9 *av1-1-b8-00-quantizer-21.ivf.md5
-59b4b65d8e56ccdd1bddff26a03e991a63409334 *av1-1-b8-00-quantizer-22.ivf
-aa1be0c7c7622d612af85f9bf96a212f6fe5ab56 *av1-1-b8-00-quantizer-22.ivf.md5
-95ed96988eb9916cad956db9b929718769de49f1 *av1-1-b8-00-quantizer-23.ivf
-596b8a3aea468996d609624367465c412751f52b *av1-1-b8-00-quantizer-23.ivf.md5
-e6c2dc4ce725003152797b3d7b34d7eb34da50c8 *av1-1-b8-00-quantizer-24.ivf
-1cd3d7e8b3813a9e5591b94eaeb72d471780e64a *av1-1-b8-00-quantizer-24.ivf.md5
-6734e353008824e523939d1a18daa3f2ab2d8ec6 *av1-1-b8-00-quantizer-25.ivf
-c45cf440a05802c1f9e29472175ed397d130d988 *av1-1-b8-00-quantizer-25.ivf.md5
-3372b1c69fb39811156adcea4f6dba802c0918c2 *av1-1-b8-00-quantizer-26.ivf
-b1751d55bb3fb788751fe28fb7434bee153bda68 *av1-1-b8-00-quantizer-26.ivf.md5
-e7ddb19a6e2a798d6a4e7dfdfc10b4df777b60e3 *av1-1-b8-00-quantizer-27.ivf
-0e19d6b79cd71de69d03e0455349568af979b170 *av1-1-b8-00-quantizer-27.ivf.md5
-7f1c90a35543d6b673e353b3702baf3aa1caeaa7 *av1-1-b8-00-quantizer-28.ivf
-d9a4f9cb88103249a05a7e6aa616bf0c16bf9c95 *av1-1-b8-00-quantizer-28.ivf.md5
-28d741b923011c7fcc50a7318256a638d3110a07 *av1-1-b8-00-quantizer-29.ivf
-c68cacf2b2ff2694945a99ad836dcf1ee3961c09 *av1-1-b8-00-quantizer-29.ivf.md5
-9a5d9ea4bc76dd40d04e92f33f45e9c2e120e85d *av1-1-b8-00-quantizer-30.ivf
-eb02bb8c16c4c0368ddff83e05e516e84ec9eaf3 *av1-1-b8-00-quantizer-30.ivf.md5
-20193c372f44f522e094c2c05fc7e4aaa0717fa8 *av1-1-b8-00-quantizer-31.ivf
-a4c1a4ac332f4911f0d5abbd826ebecfb8432d6c *av1-1-b8-00-quantizer-31.ivf.md5
-9617bbd691f093d259dbc8a642a57a153c1fc00c *av1-1-b8-00-quantizer-32.ivf
-73d60a348454b126ea6368ea604954bc23f210ae *av1-1-b8-00-quantizer-32.ivf.md5
-d9aea9d72a686c59b60584d827f60ca1ee8eee26 *av1-1-b8-00-quantizer-33.ivf
-fbf64de376a63d2d3051da83b0e4e56579b55c0a *av1-1-b8-00-quantizer-33.ivf.md5
-791aaf067f125e5cf4a247cf06a2e29ab071ec90 *av1-1-b8-00-quantizer-34.ivf
-8e2e6efe4c069e54844da19125c4280b95990c69 *av1-1-b8-00-quantizer-34.ivf.md5
-01ba67bba5cbf7c94c65da8f4c9bd6e7db24cf3a *av1-1-b8-00-quantizer-35.ivf
-0c5e60704a4a6bd27e67b6fd72ca7d2cf7fff50f *av1-1-b8-00-quantizer-35.ivf.md5
-3e255b4a320c9522dcec539fef770b6920b9a102 *av1-1-b8-00-quantizer-36.ivf
-1241aab865fd7b4bae73736cbeec1866ea9c90ec *av1-1-b8-00-quantizer-36.ivf.md5
-44fa6fca109747d8f43f6c6aa46d782e5d476d54 *av1-1-b8-00-quantizer-37.ivf
-947f0f887c5ac9149cf85e8114a709d6f410fc32 *av1-1-b8-00-quantizer-37.ivf.md5
-8319ac1ddd6ce3279da5780175dff7a3a5fa1054 *av1-1-b8-00-quantizer-38.ivf
-5f571b7f88678eab9e54f162cc9898f14e437770 *av1-1-b8-00-quantizer-38.ivf.md5
-5975e7056e17608593a8c40619b68e6576d373d9 *av1-1-b8-00-quantizer-39.ivf
-7c870192d6eb70ce5367147a3d2c6a52e11f7bec *av1-1-b8-00-quantizer-39.ivf.md5
-47da942f1e455f1422fc65f06dd57304541d16ac *av1-1-b8-00-quantizer-40.ivf
-6ea7116c9ce3a1641c7060bab2f5e06fd0910d61 *av1-1-b8-00-quantizer-40.ivf.md5
-ab35c15dfde21c2572b14e04dbfd5fac1adae449 *av1-1-b8-00-quantizer-41.ivf
-19596f9849653b913186b9d6b7072984ede96177 *av1-1-b8-00-quantizer-41.ivf.md5
-23a5fa6c3d0eaffaf13f6402465f5dd33d8ea7f1 *av1-1-b8-00-quantizer-42.ivf
-5a2726f0d1b1799d4f70883f1bfe5c9d976c6cf5 *av1-1-b8-00-quantizer-42.ivf.md5
-86cddfc463d2b186ec5a1aa25c4562c05201e3c3 *av1-1-b8-00-quantizer-43.ivf
-674c64ec8487ee774ad09350380fa6ac43815807 *av1-1-b8-00-quantizer-43.ivf.md5
-6894c154eb56c4f3fe44d54fc4f9af468b03d175 *av1-1-b8-00-quantizer-44.ivf
-eca679a2781eb894d18b3d578e3aaf4f48019a15 *av1-1-b8-00-quantizer-44.ivf.md5
-0960bf018ada4224b8344519cf091850d50a57bd *av1-1-b8-00-quantizer-45.ivf
-291bb43b9e1ab167040b51019daf1ccf94fd1e50 *av1-1-b8-00-quantizer-45.ivf.md5
-ea644a4732f1a2534332802c2fa5073344f3c356 *av1-1-b8-00-quantizer-46.ivf
-4c7915382b1d6d08709c95525b04ab8830f20ca1 *av1-1-b8-00-quantizer-46.ivf.md5
-d1f8832d33234e2c74a2280090850153ea24ea82 *av1-1-b8-00-quantizer-47.ivf
-90eb9959e612602934dcc512fe6f54abf0c88d9c *av1-1-b8-00-quantizer-47.ivf.md5
-69c93f760e8b666eb5b98f510e09d90f9230ac9b *av1-1-b8-00-quantizer-48.ivf
-931f869e14bd455de9dac2101b383c29e7d6f04c *av1-1-b8-00-quantizer-48.ivf.md5
-8b660c577d95c031d6711c1134b8d115097f8d7e *av1-1-b8-00-quantizer-49.ivf
-0e3fe8b49d497050dc1a0eac5f3ad60f5fe068fe *av1-1-b8-00-quantizer-49.ivf.md5
-d40bb21448a6da0fc9b88cbcf76d2f4226573acb *av1-1-b8-00-quantizer-50.ivf
-bcd2a9c9a021ba44fc5dc74ae02194fe49ca76a4 *av1-1-b8-00-quantizer-50.ivf.md5
-3b5a1d464aa89b0f1a6ad4f5a03602292b826172 *av1-1-b8-00-quantizer-51.ivf
-49bcde0c56cf8b7fbe429336981be22d39025b74 *av1-1-b8-00-quantizer-51.ivf.md5
-38970a02fb38ddb4954fe4240164cb75de5fc744 *av1-1-b8-00-quantizer-52.ivf
-fd02b034d79d4be150efb02bd4349edfd0e41311 *av1-1-b8-00-quantizer-52.ivf.md5
-2fde7a7cf3014d5196d011c47de4a144227ed122 *av1-1-b8-00-quantizer-53.ivf
-0cb66e6d8fbb29962a69ae1703e22da50db2c92b *av1-1-b8-00-quantizer-53.ivf.md5
-89a69e9b9a601e40cb491ac3a1d32491f2468ac8 *av1-1-b8-00-quantizer-54.ivf
-2f8af51acc73c99b5af81db2bdd1883b611ad311 *av1-1-b8-00-quantizer-54.ivf.md5
-31ee4f56fcb0043e95fff7af49e4ef82aafa5543 *av1-1-b8-00-quantizer-55.ivf
-04a7104e02bdd0fa38c118202dbbecdbd11ace02 *av1-1-b8-00-quantizer-55.ivf.md5
-f262f0b234006a2652fceb77b1a8711aa53abb54 *av1-1-b8-00-quantizer-56.ivf
-bdd54dc25bc5a147c76163af0bced45c56435d79 *av1-1-b8-00-quantizer-56.ivf.md5
-1ef00617091db4b2b839de623bd6b4fb0b2f5f83 *av1-1-b8-00-quantizer-57.ivf
-714c65363a87ed5e6e4ad75c79ddb6af57d41fd9 *av1-1-b8-00-quantizer-57.ivf.md5
-43c9b02feccbb3c709d96015f126b7e3d4c24c64 *av1-1-b8-00-quantizer-58.ivf
-bae22b8d6377862bff8219470c0d87205d186a68 *av1-1-b8-00-quantizer-58.ivf.md5
-ca5f780abe4c02e48cceb9c804f3625723c359bf *av1-1-b8-00-quantizer-59.ivf
-c60a20bbf60b0b0a442ef3f7b682979053909d6e *av1-1-b8-00-quantizer-59.ivf.md5
-1f6f047e9f0e1da22fb514370d92c3c7c66dcf89 *av1-1-b8-00-quantizer-60.ivf
-86dc7fa59d363cf1ae4b027a57b119bda893c1c1 *av1-1-b8-00-quantizer-60.ivf.md5
-bcf0c3353568c47a043f2dc34c9abd3fc04eebd4 *av1-1-b8-00-quantizer-61.ivf
-66fc4f729c5915aa19939d1b6e28e5b398e747bb *av1-1-b8-00-quantizer-61.ivf.md5
-ac8d3c54451b52cf557ef435d33e7638088d66df *av1-1-b8-00-quantizer-62.ivf
-b57f4e1276ead626a3662339a86111ae6fda49d2 *av1-1-b8-00-quantizer-62.ivf.md5
-2a8aa33513d8e01ae9410c4bf5fe1e471b775482 *av1-1-b8-00-quantizer-63.ivf
-9f646ec35a168f495e144c64ba7ce9aeb41cd0a2 *av1-1-b8-00-quantizer-63.ivf.md5
-838388fbda4a1d91be81ff62694c3bf13c460d38 *av1-1-b8-01-size-16x16.ivf
-4229c1caf8e25eb3073456fb90ceed206753901e *av1-1-b8-01-size-16x16.ivf.md5
-23f4253bf71e02b2e8ead66da4b3de875e879ef2 *av1-1-b8-01-size-18x16.ivf
-af125644436d4b6897dade68336cedad663b6610 *av1-1-b8-01-size-18x16.ivf.md5
-94e4a75bd93052f79998e9e08e6b5dd73dc27e50 *av1-1-b8-01-size-32x16.ivf
-e7b3fbc5e4b2469838e7ae36512bd3ce0a81040c *av1-1-b8-01-size-32x16.ivf.md5
-f297bde01c05ec5c07ff8118a0280bd36c52b246 *av1-1-b8-01-size-34x16.ivf
-f6bbd94d6063c689de3c7cf94afa2c68b969d12c *av1-1-b8-01-size-34x16.ivf.md5
-1e18bdf68bab7e7282aacc77e423bc7d93d04a8e *av1-1-b8-01-size-64x16.ivf
-de75732fccfb385294b23c17f0f1a57b455edcf7 *av1-1-b8-01-size-64x16.ivf.md5
-26b1f6ae80b161e971468085778cc1ece502b330 *av1-1-b8-01-size-66x16.ivf
-48bd99813557c314d398e6952da78da07c79d416 *av1-1-b8-01-size-66x16.ivf.md5
-ff213ecf31b982a3a7f009c9739f64e066e1ffe9 *av1-1-b8-01-size-16x18.ivf
-86b20a13b1939dc5f678e80491f190d376233d58 *av1-1-b8-01-size-16x18.ivf.md5
-c90bd878c59263a15c6a6f515d1c7e071f141559 *av1-1-b8-01-size-18x18.ivf
-6f659036ffcd3dd380cf970cf1a06f7755e0b2de *av1-1-b8-01-size-18x18.ivf.md5
-e16a1411381b34817a4c0d8e5eeaeb8cddcc9c46 *av1-1-b8-01-size-32x18.ivf
-fdb1c4ec56f5aa690eadbe897340fee86a06ae2f *av1-1-b8-01-size-32x18.ivf.md5
-fac7052b39bd2d0ae107e0e94050226712c770c2 *av1-1-b8-01-size-34x18.ivf
-adb0d5a99228027eaa3b016963df447c9818c447 *av1-1-b8-01-size-34x18.ivf.md5
-b8be5e55d9be42746c2b547d0e26e80b21c9802a *av1-1-b8-01-size-64x18.ivf
-8f8f6da34cdf78c5a6551c637e1afe279cc3884e *av1-1-b8-01-size-64x18.ivf.md5
-9e066bdcc2cd789cdf551bd4c9c85c178887b880 *av1-1-b8-01-size-66x18.ivf
-e8ec6effa936423ae2eec2b60a3160720d2de912 *av1-1-b8-01-size-66x18.ivf.md5
-6ebe45085cdeebc2acd6da5abd542a59312c0ff4 *av1-1-b8-01-size-16x32.ivf
-044695669103dbf158591dce9c649317a177d5f6 *av1-1-b8-01-size-16x32.ivf.md5
-9fabb4f60641b8c7995d1dc451419165d41258ff *av1-1-b8-01-size-18x32.ivf
-7263764680dfec864c3fad5df824ab1973489a14 *av1-1-b8-01-size-18x32.ivf.md5
-3f72841a24a13e601d79cf029aa1fdb02970ce0b *av1-1-b8-01-size-32x32.ivf
-bbe1ae2888d291ec6bc98cd0784937580c554103 *av1-1-b8-01-size-32x32.ivf.md5
-392131a7c7609acd0dba88fee14f1ed042d23ab1 *av1-1-b8-01-size-34x32.ivf
-eea68165ebe9acd28693374bf2266374b9c77786 *av1-1-b8-01-size-34x32.ivf.md5
-78afdd96265811ab9466e906347b57161e5c010d *av1-1-b8-01-size-64x32.ivf
-47b317af582700b67f6e77659db1dfaa26c8cde6 *av1-1-b8-01-size-64x32.ivf.md5
-2b4d01f2c9f23044c0d886482c7073bd4d5d37d1 *av1-1-b8-01-size-66x32.ivf
-3ad5a58a0ee5086af370b22ab2b5b7592a4f33e7 *av1-1-b8-01-size-66x32.ivf.md5
-78ddae04eb8277ae605bd7017ad7ad27bfc82d39 *av1-1-b8-01-size-16x34.ivf
-d0c18e679f1fc51e4f7409831321eed9c4858f6f *av1-1-b8-01-size-16x34.ivf.md5
-38d8ed885f46aead6ec1271d8a5d4aee79b8eb68 *av1-1-b8-01-size-18x34.ivf
-097ddbd69b8f54826a35efeb0b8b07ec198bba6b *av1-1-b8-01-size-18x34.ivf.md5
-91a42720bc2e7ba701f4d97b463a098b6707cdbd *av1-1-b8-01-size-32x34.ivf
-c590d43d37095bd2e8f8d12c9278477419b72d1a *av1-1-b8-01-size-32x34.ivf.md5
-4cc2a437dba56e8878113d9b390b980522542028 *av1-1-b8-01-size-34x34.ivf
-57eeb971f00e64abde25be69dbcb4e3ce5065a57 *av1-1-b8-01-size-34x34.ivf.md5
-b36fee1b6ad69d1206466615d69c05e0a4407939 *av1-1-b8-01-size-64x34.ivf
-a78aea0250d0b32657dc0eaf2d8394bc766c0e35 *av1-1-b8-01-size-64x34.ivf.md5
-10e441209262e082e31fef8c15b51579c9e81509 *av1-1-b8-01-size-66x34.ivf
-558b46f6ef1662c208012d0b66d1857eeff3244e *av1-1-b8-01-size-66x34.ivf.md5
-dd44aad500c7ca0fc97e3d8f0abed3c83b24c79c *av1-1-b8-01-size-16x64.ivf
-a5b64e8063abcf3e4872dc4baf1c32384dc5cf83 *av1-1-b8-01-size-16x64.ivf.md5
-aa849f0d09bcb2ead44719d63043536932d5c9f2 *av1-1-b8-01-size-18x64.ivf
-bcdf2dea3590c7031158ffe7b907d9ee35e2fe57 *av1-1-b8-01-size-18x64.ivf.md5
-36e856d30e160ba2fbb00510296202f61afaae49 *av1-1-b8-01-size-32x64.ivf
-99299f75b82c40c13f168adf2d124f57044a39a2 *av1-1-b8-01-size-32x64.ivf.md5
-e3e03ec5d38eb25e97e4ec3adc6ed40ecdebd278 *av1-1-b8-01-size-34x64.ivf
-84625abf8a200a7d20dd3dd3b277b50b3d62ce32 *av1-1-b8-01-size-34x64.ivf.md5
-7d017daebef2d39ed42a505a8e6103ab0c0988c1 *av1-1-b8-01-size-64x64.ivf
-1ff38d5ecba82fb2e6ac3b09c29c9fe74885ac29 *av1-1-b8-01-size-64x64.ivf.md5
-e1b58ba0b462508593399a2ed84db5f1c59ffcd2 *av1-1-b8-01-size-66x64.ivf
-a6b2c84c94fe79ab0373d157d1203f8d66de0706 *av1-1-b8-01-size-66x64.ivf.md5
-7b4faa7eb7b73392b62de6613282a98dddc13bb6 *av1-1-b8-01-size-16x66.ivf
-a2dacf2bae3c4ab352af66a9600946d29ab9a6ee *av1-1-b8-01-size-16x66.ivf.md5
-0f97805fa30497d4cf39665150f00dfdea52d862 *av1-1-b8-01-size-18x66.ivf
-33d8ea0765953250f998da3fe161f2a8cfca2353 *av1-1-b8-01-size-18x66.ivf.md5
-c8bb00256de973e3b3ee31b924f554336d310cdb *av1-1-b8-01-size-32x66.ivf
-6a6588e6edc68ff7739968a9e7cc6d9eaaeed356 *av1-1-b8-01-size-32x66.ivf.md5
-75ec54fec5c36eecde6d0a16e0389a5f7ad8ec22 *av1-1-b8-01-size-34x66.ivf
-36101dfa9495c18696c0d7d61f25e748f4de7425 *av1-1-b8-01-size-34x66.ivf.md5
-7e5491716e70f8199156b8843513c935667b281e *av1-1-b8-01-size-64x66.ivf
-da38755bb0c9ef56b81617835ddf1340242c6dce *av1-1-b8-01-size-64x66.ivf.md5
-68b47b386f61d67cb5b824a7e6bf87c8b9c2bf7b *av1-1-b8-01-size-66x66.ivf
-25974893956ebd92df474325946130c34f880ea7 *av1-1-b8-01-size-66x66.ivf.md5
-9f386d19c87dbfd6ac84a06d2393dd88863ac003 *av1-1-b8-01-size-196x196.ivf
-788f77f655f55de3db94dd69870316134c149116 *av1-1-b8-01-size-196x196.ivf.md5
-ed3bb2bb52a9d1786e233ef38142b15b85097875 *av1-1-b8-01-size-198x196.ivf
-3bb6b6721ad9b2838b2d07e47b29d6c0117526b1 *av1-1-b8-01-size-198x196.ivf.md5
-49461772caaaa7b824d48f4e9c77a906b0dc02d5 *av1-1-b8-01-size-200x196.ivf
-f1cba00c36909c56097c8785df476d42bc91f259 *av1-1-b8-01-size-200x196.ivf.md5
-44a656a22958e26ed169a69deb8f373117224f06 *av1-1-b8-01-size-202x196.ivf
-69be876b52fe42811bba52d36d0bcc88d6c25b3f *av1-1-b8-01-size-202x196.ivf.md5
-0a6fe9b478363faedbfd465a75790b4c2661b9ba *av1-1-b8-01-size-208x196.ivf
-fc8e95a6860a8a37ccdf1dfe49828502fcf96a08 *av1-1-b8-01-size-208x196.ivf.md5
-8e05b5a20ec95afd92bb615a7daa2e17a7ef55a8 *av1-1-b8-01-size-210x196.ivf
-0add512bffbda3300d8f684a53b13b996fe2e46d *av1-1-b8-01-size-210x196.ivf.md5
-a15f12652c6b4d0c30f13a439c941bfc4a431d1a *av1-1-b8-01-size-224x196.ivf
-b904b93252175f79e0e2b28896131ce93d5fc925 *av1-1-b8-01-size-224x196.ivf.md5
-1a57b913443b267f4a31a6925c39f5b58022f550 *av1-1-b8-01-size-226x196.ivf
-7cf3087de5804763a82d2a798243a66459664772 *av1-1-b8-01-size-226x196.ivf.md5
-2cc28541a2a72e8b45a368f71e70fc294e2de3ab *av1-1-b8-01-size-196x198.ivf
-bb736eedb4bd1e39bf9d60435b4b27a12842e112 *av1-1-b8-01-size-196x198.ivf.md5
-c4ebf93fbf3ae52108fd7b39ddef3afae48188ea *av1-1-b8-01-size-198x198.ivf
-fa4de6881511728bafa15b5f441a0cfdf683cc75 *av1-1-b8-01-size-198x198.ivf.md5
-55fce983186d454b0eb15527393bb2465ba41c6b *av1-1-b8-01-size-200x198.ivf
-1ac8fb1ee622cbc4aa1b83cb46b4731c85efae62 *av1-1-b8-01-size-200x198.ivf.md5
-67d276c67886f0a91a7ee06751a64f95eeb7bc1f *av1-1-b8-01-size-202x198.ivf
-1633b62d9e4ea41737c42f70cbde9a5671da0cef *av1-1-b8-01-size-202x198.ivf.md5
-081cb3f29d3956d4d858d9661fd3d62c94b68867 *av1-1-b8-01-size-208x198.ivf
-871d1c99167408dd32fa7603a7296c9b99ccda15 *av1-1-b8-01-size-208x198.ivf.md5
-b2d80b42468d5f296ae240cfb1fc0b3dd3d96bbc *av1-1-b8-01-size-210x198.ivf
-6a3382656cb17b532a97b1061697f9a878fc58d1 *av1-1-b8-01-size-210x198.ivf.md5
-84d7994fa20fcf6c1d8dbd4c2060c988a6fce831 *av1-1-b8-01-size-224x198.ivf
-42ea12e15de81f2e8617b6de7bae76de2da4d648 *av1-1-b8-01-size-224x198.ivf.md5
-c74a9281cf98c597121df6bff0ac5312b887f969 *av1-1-b8-01-size-226x198.ivf
-4133aae0001804e2bbc7928fc065517a6dd8b288 *av1-1-b8-01-size-226x198.ivf.md5
-27adbf148c63f807bd617cfd78aeaedb8b0f2304 *av1-1-b8-01-size-196x200.ivf
-9253e525e6207ef1ce0839b8f88ea781e9abe41e *av1-1-b8-01-size-196x200.ivf.md5
-21c9ea4d882e48353d3df66fcde0e4746168163f *av1-1-b8-01-size-198x200.ivf
-3d5ee59fde9194f0eaff736051cfd1d7b7daeff1 *av1-1-b8-01-size-198x200.ivf.md5
-c27b0b57667910847122a0309c703315e444110f *av1-1-b8-01-size-200x200.ivf
-7b2a15a17b421ef07e285ca4e8a224f0512c434d *av1-1-b8-01-size-200x200.ivf.md5
-780de549e4163a52590f7c0f488e027a8a4aa053 *av1-1-b8-01-size-202x200.ivf
-cb0ec0969522ca60d79a639e9b9509363468ffd0 *av1-1-b8-01-size-202x200.ivf.md5
-2c59821904863e264ae61401cbd494a79bc04f13 *av1-1-b8-01-size-208x200.ivf
-9963955966a52b65cdd13465c9fb2ba3b5356755 *av1-1-b8-01-size-208x200.ivf.md5
-ff63121611ea9c0628c7e5af13de5e7786611ca6 *av1-1-b8-01-size-210x200.ivf
-2a5993be234e3af2af6d185b2a6f3aaf1979b83a *av1-1-b8-01-size-210x200.ivf.md5
-b8485ada95440d78b51153227231b1aced1a8273 *av1-1-b8-01-size-224x200.ivf
-9c3cd32ea6c006a91eb37d69dbeccf878de5d214 *av1-1-b8-01-size-224x200.ivf.md5
-1aa0ce3e3a74f9b600a146e98b05547a0b454c48 *av1-1-b8-01-size-226x200.ivf
-e045be96c3af16a9ddc10a9933e8ddfb3319d716 *av1-1-b8-01-size-226x200.ivf.md5
-e92b76480f4339855d998b97182f36b28deadcfa *av1-1-b8-01-size-196x202.ivf
-480c707abcd2a650e2160ec397f8348cecb45770 *av1-1-b8-01-size-196x202.ivf.md5
-137b9c0d10a3bdbdf6f97b3e6331f3e8acaf8f91 *av1-1-b8-01-size-198x202.ivf
-7429642146d0da55161ab13024a261094ee2ce87 *av1-1-b8-01-size-198x202.ivf.md5
-9cea71c44ad015ac702d675bacca17876e65cb1a *av1-1-b8-01-size-200x202.ivf
-76b1ec6c42da55f47e389a561590d1a7c713e495 *av1-1-b8-01-size-200x202.ivf.md5
-26dffdcd0dac9becf68d12e31fcd91eddf1f7154 *av1-1-b8-01-size-202x202.ivf
-ddb75e99123fed4ef05d9b85200cefd8985bc84c *av1-1-b8-01-size-202x202.ivf.md5
-04007e83bb66ba547d09f8926ea5bfc7fd9e4b2a *av1-1-b8-01-size-208x202.ivf
-5b72eb58db22087ad416c499119f41e718395b52 *av1-1-b8-01-size-208x202.ivf.md5
-721ff7c0ae0e2ed896b5acac230113f1404e769c *av1-1-b8-01-size-210x202.ivf
-187d2ef939fc26e1a1c7de65abe8e058d8aae17a *av1-1-b8-01-size-210x202.ivf.md5
-dba41421cc938bcf0234254f96be0325ab66186e *av1-1-b8-01-size-224x202.ivf
-58856038c1eb13a7bf0353a30b1affe844cd31b1 *av1-1-b8-01-size-224x202.ivf.md5
-55eba14878d25dcc351ee5e92fa06e559035b409 *av1-1-b8-01-size-226x202.ivf
-e295b3d791d40d7c1fff2c40a260078dccaef24a *av1-1-b8-01-size-226x202.ivf.md5
-6c777223990ddfd92040a8526646ed0f39299b0d *av1-1-b8-01-size-196x208.ivf
-5210daff766cddaf3945610ee05ff242aef8175a *av1-1-b8-01-size-196x208.ivf.md5
-252831abfb9f4a9a8556c21cc3bf60adfe88210f *av1-1-b8-01-size-198x208.ivf
-35ed9601e608a829980cec81e41b7bd3e5f4c2ce *av1-1-b8-01-size-198x208.ivf.md5
-e800ed893a88704a4576d4984957f3664560daa9 *av1-1-b8-01-size-200x208.ivf
-82c038f9072a2fcf8d55fb4a474fdd791ba9a290 *av1-1-b8-01-size-200x208.ivf.md5
-9ce7bb932dd99f86da8ff2ab89fa4d3089a78da8 *av1-1-b8-01-size-202x208.ivf
-0611bf0179abe3c820a447a2bd3a04c3790f3a87 *av1-1-b8-01-size-202x208.ivf.md5
-e5900d9150c8bebc49776227afd3b0a21f5a6ac6 *av1-1-b8-01-size-208x208.ivf
-86d6b9a3840aa0a77938547c905bd6f45d069681 *av1-1-b8-01-size-208x208.ivf.md5
-2758ba5dad16f4a91334f2ed07a4a037201bb873 *av1-1-b8-01-size-210x208.ivf
-78453b1fda2ccc6f35e0d762567807757bcddb16 *av1-1-b8-01-size-210x208.ivf.md5
-fff88fb8e833f6b4ad64cb591b219c7cceb7f2d2 *av1-1-b8-01-size-224x208.ivf
-87266fc34aaed82cdb98cbc309b221ad52eccd81 *av1-1-b8-01-size-224x208.ivf.md5
-dec839fe64046461015b56cda191835284f42a52 *av1-1-b8-01-size-226x208.ivf
-d7a15264fc3fd55d3aec0ccfaa7c434c6d90969f *av1-1-b8-01-size-226x208.ivf.md5
-584782e93ed1cb7797a90fece44becdd1e23bf0d *av1-1-b8-01-size-196x210.ivf
-ed76ec841b18a457853e368576967c4768fc2730 *av1-1-b8-01-size-196x210.ivf.md5
-dab625599b9f01398b593e865d9a4a95a029d60f *av1-1-b8-01-size-198x210.ivf
-b90e8d96a1f5b329b088b467a11fed2d055d74ca *av1-1-b8-01-size-198x210.ivf.md5
-6774bee17b9e50d2d8630e2e1afc30ded67e662d *av1-1-b8-01-size-200x210.ivf
-343a86bd54eb3dd5e9902eb62a3d776dcff2f4f3 *av1-1-b8-01-size-200x210.ivf.md5
-0456c3b8e242eeee019ca97d155f81124de62c90 *av1-1-b8-01-size-202x210.ivf
-5a6a6428c9858a0d3561db42ceaf981c143fe479 *av1-1-b8-01-size-202x210.ivf.md5
-6a3a8f65bf806b1be7726b983427880f772c9986 *av1-1-b8-01-size-208x210.ivf
-5563ea6d8c65887553ff3000addc6418913f1650 *av1-1-b8-01-size-208x210.ivf.md5
-5a8b69489f8e9b917ea7718ad2645101cdbe5644 *av1-1-b8-01-size-210x210.ivf
-f4b01604036fa23000d44fbf42097ae1181bcd62 *av1-1-b8-01-size-210x210.ivf.md5
-fb6f5b08a048698cfe324557ee8cd840c4a3f6ce *av1-1-b8-01-size-224x210.ivf
-3ce5c404e3ca09c8e994b3043bad42cd555b00c0 *av1-1-b8-01-size-224x210.ivf.md5
-2e9fc8510d2131b2f3c9a93bececac985e4426d2 *av1-1-b8-01-size-226x210.ivf
-897c537e259331ca86cdd6e4d2bd343f8538402e *av1-1-b8-01-size-226x210.ivf.md5
-8300512106fce3424eb74b5d4bc0f4f19f7c9af8 *av1-1-b8-01-size-196x224.ivf
-43662ea025ea79afe4964fd4d12a77f4aa4e565e *av1-1-b8-01-size-196x224.ivf.md5
-640f8fda7ade8f2850e2275a9f5e233e33a0ba8d *av1-1-b8-01-size-198x224.ivf
-9ac690bdbbce47d7b169128b568f955e70076f8c *av1-1-b8-01-size-198x224.ivf.md5
-ce2e9379c72fc924e364d5727605394a1438a211 *av1-1-b8-01-size-200x224.ivf
-1ec35a53d88072b96b255202f678178bc7e5bb20 *av1-1-b8-01-size-200x224.ivf.md5
-5d3af7921623deccb578115c8ce207c019f97f50 *av1-1-b8-01-size-202x224.ivf
-14eafd55b0cda3a3476cae7ad500dbd5ee899dd5 *av1-1-b8-01-size-202x224.ivf.md5
-6b6d78e466cf94a5ef8dfe252caa0948dd2ec175 *av1-1-b8-01-size-208x224.ivf
-e178b0c272dfcfe614c6b49cb28dad11781af0b6 *av1-1-b8-01-size-208x224.ivf.md5
-dd2232b9e18971d7e19650a1e3218aef1010247f *av1-1-b8-01-size-210x224.ivf
-40a66198c47820f5fa2d2e389ec0c1191ea4ffcc *av1-1-b8-01-size-210x224.ivf.md5
-9ec028b81a5ea311683328d856f436e6d0b0e6a0 *av1-1-b8-01-size-224x224.ivf
-143b9530ce722385db2c2d883daa649ed42b8d40 *av1-1-b8-01-size-224x224.ivf.md5
-bf833947e62935c54e1e727ccb36157f7c1e9e5d *av1-1-b8-01-size-226x224.ivf
-ca4f3b44463106e4f0bb54e490c3bd457d7d780b *av1-1-b8-01-size-226x224.ivf.md5
-5525f7e312ec073f480ed5a2be5bdc4f0ce51a09 *av1-1-b8-01-size-196x226.ivf
-062d4b240741184458d2d2abd243ed7877631de8 *av1-1-b8-01-size-196x226.ivf.md5
-e6b911142394b94c23191eaa63c9eb41a00f80b0 *av1-1-b8-01-size-198x226.ivf
-3b580d903dddf47082f5e055bfb01a4f05c09b7d *av1-1-b8-01-size-198x226.ivf.md5
-70feb5efeb28df25f7d1a661c73bf013c5ada9b4 *av1-1-b8-01-size-200x226.ivf
-f0b894e7f787e62f1492be62f3dedeb065062160 *av1-1-b8-01-size-200x226.ivf.md5
-7f9a10831e2389b31497fad50080b4d5452d6e91 *av1-1-b8-01-size-202x226.ivf
-45b7194eba9367c8059403c23ca4ae49e988dfaf *av1-1-b8-01-size-202x226.ivf.md5
-967837a2cfbf9aa3131f73aec6a52dcdd82926c7 *av1-1-b8-01-size-208x226.ivf
-c8baedb48fd5d4c956aa8d73fd957370f718f047 *av1-1-b8-01-size-208x226.ivf.md5
-9c926226b9f6b015501d8ac1e3f95e8570283a05 *av1-1-b8-01-size-210x226.ivf
-57d4837667fd4c5a7aeb908626d701b632852c60 *av1-1-b8-01-size-210x226.ivf.md5
-25a4940922761239809d82c45c2be1c5e4f48785 *av1-1-b8-01-size-224x226.ivf
-87ae7e7558241bf3575a333f56fbad4dfdade8ff *av1-1-b8-01-size-224x226.ivf.md5
-40dd208eb525cd90d7c0674cf787097fb909afae *av1-1-b8-01-size-226x226.ivf
-34bdef682a4eae0e0a05e4486a968af1df8b220a *av1-1-b8-01-size-226x226.ivf.md5
-9bbe8499796aa588ff02e313fb0d4349940d2fea *av1-1-b10-00-quantizer-00.ivf
-36b402eedad2bacee8ac09acce44e2fc356dd80b *av1-1-b10-00-quantizer-00.ivf.md5
-1d5e1d2827624f328020bf123df213bb175577e0 *av1-1-b10-00-quantizer-01.ivf
-16c529be5502369e43ce9c6fe99a9709968e3daf *av1-1-b10-00-quantizer-01.ivf.md5
-39abc20739242a8f05efd4b35d7603c8ad7ff45d *av1-1-b10-00-quantizer-02.ivf
-81faa72c3d43b003966fe09ffaae51b07b1059be *av1-1-b10-00-quantizer-02.ivf.md5
-92ebf349b803333a43824a83d997b8cf76f656f9 *av1-1-b10-00-quantizer-03.ivf
-5e7556dc998cb8b506a43cc078e30802d7e600e6 *av1-1-b10-00-quantizer-03.ivf.md5
-1c496177c66e49f2e3556af87ec67afb5060170b *av1-1-b10-00-quantizer-04.ivf
-560fea4800a44fe19ed8d3e74f425bdbf1fb8abd *av1-1-b10-00-quantizer-04.ivf.md5
-7de864b8475ce0acd0ecb01827f2c9add815352b *av1-1-b10-00-quantizer-05.ivf
-1c1aea3db3f54a91866d89fd3b1a0d285ca10310 *av1-1-b10-00-quantizer-05.ivf.md5
-b6501c165619b036d0f7864fd4739973d2d18970 *av1-1-b10-00-quantizer-06.ivf
-d758c8eff275651006c41e7dd447cac13b489ad7 *av1-1-b10-00-quantizer-06.ivf.md5
-e4df6f588f156dffaafd9517b64f753cfc9ccf05 *av1-1-b10-00-quantizer-07.ivf
-3c577f67dade4537de642fd457ea2b367424f336 *av1-1-b10-00-quantizer-07.ivf.md5
-07e9c4c18abb36c8699c1c12bebcc727f090b525 *av1-1-b10-00-quantizer-08.ivf
-4981568ade3170f311cb114fa2689edc4bc35e67 *av1-1-b10-00-quantizer-08.ivf.md5
-2268ecd2899f1b41ae9898925b1d62cfefa30282 *av1-1-b10-00-quantizer-09.ivf
-029b03029b65b7c4c208961f0820467ad42fd3d6 *av1-1-b10-00-quantizer-09.ivf.md5
-3d2adaf6441cfa9585dcbf7d19d65bf6992a29a3 *av1-1-b10-00-quantizer-10.ivf
-017b7fb4c3ba0747c2d5688d493da33ef993d110 *av1-1-b10-00-quantizer-10.ivf.md5
-006535760bd7dc1cfc95e648b05215954a2e76c2 *av1-1-b10-00-quantizer-11.ivf
-c0ae083deb8e820aa49034af4d100944dd977018 *av1-1-b10-00-quantizer-11.ivf.md5
-840e0cbfe1acc8a7a45c823dc55ab44a0b6b553e *av1-1-b10-00-quantizer-12.ivf
-49232ea38bdef650c94808f53834f1137cd4bf39 *av1-1-b10-00-quantizer-12.ivf.md5
-04b0e5a7387e07474f51be4b2c3e05211b40f0d0 *av1-1-b10-00-quantizer-13.ivf
-a51b5ec4b890df3a64f9f0d866b8c41296c9e081 *av1-1-b10-00-quantizer-13.ivf.md5
-5dc47a140fbcbf08bf91481ee3585e9e067561ab *av1-1-b10-00-quantizer-14.ivf
-2625319eef69d6225e6ab6e5ce7790491406cb5d *av1-1-b10-00-quantizer-14.ivf.md5
-f866be86d8d8aa08ded30e42988b0936c1a16064 *av1-1-b10-00-quantizer-15.ivf
-03b7c1eefb54d99e30051c7123c0453f04a6579d *av1-1-b10-00-quantizer-15.ivf.md5
-548df2371dfb485419ed9baf28e3f495c64f364a *av1-1-b10-00-quantizer-16.ivf
-8a0d6bf1626b05b65c77331305414fe9be54e8c6 *av1-1-b10-00-quantizer-16.ivf.md5
-0077c82f96a2e095a3cb8de9bfa63715e3c9f438 *av1-1-b10-00-quantizer-17.ivf
-5d85f77f3087f4b206930722a945c60039262be4 *av1-1-b10-00-quantizer-17.ivf.md5
-1e0f1245ecb4c903b5dc7072d959fc43a7bba381 *av1-1-b10-00-quantizer-18.ivf
-06316ae2b45f2359a70cc3855ffd6ab81048b41a *av1-1-b10-00-quantizer-18.ivf.md5
-f197198f7ec058110185fda5297a1a43993654df *av1-1-b10-00-quantizer-19.ivf
-bac522c7f234d506c75b5495d74b3fa57c83a4df *av1-1-b10-00-quantizer-19.ivf.md5
-c2f57324d000b349323f37d5ebebde8c2b861f30 *av1-1-b10-00-quantizer-20.ivf
-999c6110786cbc25e67792234a5a02f2cb4553c7 *av1-1-b10-00-quantizer-20.ivf.md5
-2ffad9adfd19286fe2166ba877289d201c9a634f *av1-1-b10-00-quantizer-21.ivf
-d55713eaa791cfd7bf69b6c26d5032029d9a0f06 *av1-1-b10-00-quantizer-21.ivf.md5
-382528db53328c1a38976f5d9b579eef35d839f4 *av1-1-b10-00-quantizer-22.ivf
-cb5bd459e1a90126da9264cff4281515f95755b2 *av1-1-b10-00-quantizer-22.ivf.md5
-b52cc6160fc66f72ad66c198d275a1c73f925022 *av1-1-b10-00-quantizer-23.ivf
-c0f9d6659e1f283e9356fd7b4ac9f7cc5544cdc2 *av1-1-b10-00-quantizer-23.ivf.md5
-e11f15e3b63e7606b1122bb3670ee77c09c04840 *av1-1-b10-00-quantizer-24.ivf
-e9f141b924440e044270c81a68458fe498599a8e *av1-1-b10-00-quantizer-24.ivf.md5
-fb91793b69824c99b0218788dcea0a74ebd7e84e *av1-1-b10-00-quantizer-25.ivf
-434e33d609b2683c3cfbcc3a2cdfc26339590fb6 *av1-1-b10-00-quantizer-25.ivf.md5
-d82e38f31cdcf8b43479e6ddaa83373de38f70a2 *av1-1-b10-00-quantizer-26.ivf
-183943b851ba383a536f13c83b93f61ac8961ad5 *av1-1-b10-00-quantizer-26.ivf.md5
-6bf5e4e8e0aca699e493b9eb3672d2117494d74d *av1-1-b10-00-quantizer-27.ivf
-f0fb7e0a99180828b0e38b2cfe0622eecc2d26b8 *av1-1-b10-00-quantizer-27.ivf.md5
-d5adee2567544c3ae4223b3f3528a770377878d2 *av1-1-b10-00-quantizer-28.ivf
-14edf588efc67570e529b0ff8aeb8e7a0c69238b *av1-1-b10-00-quantizer-28.ivf.md5
-e6dcdc106847956035e3f00aabf4470f97e1887e *av1-1-b10-00-quantizer-29.ivf
-413c5cb778611c7c1a810b53861b9ab1fb391f17 *av1-1-b10-00-quantizer-29.ivf.md5
-b5e98b3f6b1db04d46bf43064c6ac64f797aff00 *av1-1-b10-00-quantizer-30.ivf
-d1a603661d76c28658c7cd2892b408e91d77893e *av1-1-b10-00-quantizer-30.ivf.md5
-80168371d1150e82e3f46bcbbcabba458b835b19 *av1-1-b10-00-quantizer-31.ivf
-904ecd033d4af5239c4d5b3f86e51ed5c3c2e3fb *av1-1-b10-00-quantizer-31.ivf.md5
-96291f6ace85980892d135a5b74188cd629c325f *av1-1-b10-00-quantizer-32.ivf
-a5ceace390d4a75d48281fe29060c21557e4f5ae *av1-1-b10-00-quantizer-32.ivf.md5
-0f80495de34eae07c4905b72573a315a879390ec *av1-1-b10-00-quantizer-33.ivf
-72b8f662973a660412946687dff878b276ae518e *av1-1-b10-00-quantizer-33.ivf.md5
-24905e3be7db320994b7fb8311dfd50a7c9e54da *av1-1-b10-00-quantizer-34.ivf
-cea514bb1b7b064c4d31914a2cb266611c278577 *av1-1-b10-00-quantizer-34.ivf.md5
-083012960dd7c17d3b00fa0e807759c98faded8f *av1-1-b10-00-quantizer-35.ivf
-de5fdb9e1e581484af1cc7d2dd3c3e84c90cebb2 *av1-1-b10-00-quantizer-35.ivf.md5
-f725f179aeee5b413620c0dd81b007b245c2a7ed *av1-1-b10-00-quantizer-36.ivf
-246b1931c04c02df1f168090e2650827cd5dbabd *av1-1-b10-00-quantizer-36.ivf.md5
-f6aa824156e9848f237481889a8103eb6130f31d *av1-1-b10-00-quantizer-37.ivf
-a8f78dd15fc2994369a08c2ddddcd0760c62ea5b *av1-1-b10-00-quantizer-37.ivf.md5
-a8dd662338c493aea266b99203e70af25982633f *av1-1-b10-00-quantizer-38.ivf
-09f36d998e85d0450060f540e50b075ae1432fc6 *av1-1-b10-00-quantizer-38.ivf.md5
-d97428871720ed658da6ed0e3f7c15da83387e4c *av1-1-b10-00-quantizer-39.ivf
-8c5230048909ee8f86f87c116f153cd910d0141f *av1-1-b10-00-quantizer-39.ivf.md5
-86e754e55e9b63c6e0a4fef01761414f8a6b61ca *av1-1-b10-00-quantizer-40.ivf
-99a71accf6457264e45ca80d3b1f082ee5acdecc *av1-1-b10-00-quantizer-40.ivf.md5
-9d18b7236506ab7e107c062620b64096ec0cf423 *av1-1-b10-00-quantizer-41.ivf
-5771159a9a7c7b66c9e13bb13ec3d53b37860208 *av1-1-b10-00-quantizer-41.ivf.md5
-54b72bc879a80e66613f421e67db62bba1c0041b *av1-1-b10-00-quantizer-42.ivf
-bf958236883ee7209ef4cb0b7503b430634a291e *av1-1-b10-00-quantizer-42.ivf.md5
-a06d5321a51d90404dd7085ae511d7df5d5e1e05 *av1-1-b10-00-quantizer-43.ivf
-ddb25723d976043d863634b9dc3b5fb84a245803 *av1-1-b10-00-quantizer-43.ivf.md5
-2ea0b64c170d7299dae1c14a8a49349aee8e0d08 *av1-1-b10-00-quantizer-44.ivf
-d18bde1b4893792173fa2014665e9364395ad5e9 *av1-1-b10-00-quantizer-44.ivf.md5
-73e506a32d3518e23424f231c7b5323d7a34a3d6 *av1-1-b10-00-quantizer-45.ivf
-be6224ebc77a3e5fb9c1645b876007e584a09d89 *av1-1-b10-00-quantizer-45.ivf.md5
-841223871374464194edc739c48dc7cefd1ff255 *av1-1-b10-00-quantizer-46.ivf
-4766d616f923496a8dc113c9b7f875f0c0735f9a *av1-1-b10-00-quantizer-46.ivf.md5
-8bbbbea130aaea453f7b826956a5520d10a0eccf *av1-1-b10-00-quantizer-47.ivf
-3ea21fac0c492b03d8ec25e4ee0971cd57e5f71a *av1-1-b10-00-quantizer-47.ivf.md5
-3ce83e0f1e1835b9a6c10fe502a16fd3650839e0 *av1-1-b10-00-quantizer-48.ivf
-b468de2c09fca5a6b2bb7a20bab4afd8d192c31d *av1-1-b10-00-quantizer-48.ivf.md5
-f3a757c678aa00f9a9c4c4658d37733fd935925a *av1-1-b10-00-quantizer-49.ivf
-f888dc88db576122695d4eb41c486aacd28a2d1d *av1-1-b10-00-quantizer-49.ivf.md5
-a9d78aaef105cc5a95b7ebb54783f37e75673123 *av1-1-b10-00-quantizer-50.ivf
-06d0c5e79cc794030c4be022089b1d12c1383f71 *av1-1-b10-00-quantizer-50.ivf.md5
-165c20ee372f83682d094541097e375227353239 *av1-1-b10-00-quantizer-51.ivf
-b3d90214b8c6e6f6d9357bb5784d10081325c356 *av1-1-b10-00-quantizer-51.ivf.md5
-5b3ea7a18654d943065f5c176974c3960b56664e *av1-1-b10-00-quantizer-52.ivf
-dc61a6e4e2549074130023b14b137fb4fe442ce3 *av1-1-b10-00-quantizer-52.ivf.md5
-74c3b5851b6a94d33b575a689eb8d34592e95d5f *av1-1-b10-00-quantizer-53.ivf
-a80e43a0fb2b852426bd941b8d4b8f56690e9bc9 *av1-1-b10-00-quantizer-53.ivf.md5
-d05b8dea2cddd4f0d9e792f42f71afbd29f7811c *av1-1-b10-00-quantizer-54.ivf
-432937893321f4bd25fa400b8988c5788cb06ecf *av1-1-b10-00-quantizer-54.ivf.md5
-4eaee0f1970426be0bbeb7d4fccdc7e804e9bea4 *av1-1-b10-00-quantizer-55.ivf
-710ab95ce1dcd2540db4477ff4ee6ab771fe0759 *av1-1-b10-00-quantizer-55.ivf.md5
-fe637930c9faa8744cba37effc4cb5510315d1c0 *av1-1-b10-00-quantizer-56.ivf
-2f9431b30523fb6a3e4122f22c6c3ff7b96a7987 *av1-1-b10-00-quantizer-56.ivf.md5
-ed54fc7fcec194eef1f50adbbe12a6a36ab6836b *av1-1-b10-00-quantizer-57.ivf
-43bccac7800b399210cf15520a83739c23a5d9c7 *av1-1-b10-00-quantizer-57.ivf.md5
-a7b8d628ba3e4c5f37aa6a3d7b82afda73ac89dc *av1-1-b10-00-quantizer-58.ivf
-b26638272b787df54f45a46629b852acbcb73e3d *av1-1-b10-00-quantizer-58.ivf.md5
-c077f22ff547fb5ffd020e8dac91d05942fb52df *av1-1-b10-00-quantizer-59.ivf
-4efd99cc0891bf345b8cd2ae8e21709d61be497b *av1-1-b10-00-quantizer-59.ivf.md5
-301ab53039d75e1ffa8cc6a0874d9ea94e4a6a0d *av1-1-b10-00-quantizer-60.ivf
-4729bd734a6edd2d8d0432a3f66b3d91d565050e *av1-1-b10-00-quantizer-60.ivf.md5
-c78640d3211034df9fcb273bdfc18625819652f2 *av1-1-b10-00-quantizer-61.ivf
-3d823eb2b33ccfea68db506626bcbecf49b0f167 *av1-1-b10-00-quantizer-61.ivf.md5
-bf241a449a28773b93e6e529a06dfc28109577e4 *av1-1-b10-00-quantizer-62.ivf
-75457d8476f1927f737d089dcf3d0f7f99f3c4fb *av1-1-b10-00-quantizer-62.ivf.md5
-8b6eb3fff2e0db7eac775b08c745250ca591e2d9 *av1-1-b10-00-quantizer-63.ivf
-63ea689d025593e5d91760785b8e446d04d4671e *av1-1-b10-00-quantizer-63.ivf.md5
-a9f7ea6312a533cc6426a6145edd190d45813c37 *av1-1-b8-02-allintra.ivf
-8fd8f789cfee1069d20f3e2c241f5cad7292239e *av1-1-b8-02-allintra.ivf.md5
-e69e41fee40b408b6eebcc79f266a95f2ee24f9e *av1-1-b8-03-sizedown.mkv
-8c528fb3ccda959a29721566e132f730935ca32b *av1-1-b8-03-sizedown.mkv.md5
-1889da5ee1708007e47bb887470ac477e1d7ba01 *av1-1-b8-03-sizeup.mkv
-8de81b170635d456602dc8923a8b39c534d01fa8 *av1-1-b8-03-sizeup.mkv.md5
-d3ed7de0aa8c155fe35e0f5f4203240710d31383 *park_joy_90p_8_420_monochrome.y4m
-5b3f0907407b809aa66b62cb080feda8c92454ca *park_joy_90p_8_420_vertical_csp.y4m
diff --git a/third_party/aom/test/test.cmake b/third_party/aom/test/test.cmake
deleted file mode 100644
index b16ae14c3..000000000
--- a/third_party/aom/test/test.cmake
+++ /dev/null
@@ -1,438 +0,0 @@
-#
-# Copyright (c) 2017, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and the
-# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
-# not distributed with this source code in the LICENSE file, you can obtain it
-# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
-# License 1.0 was not distributed with this source code in the PATENTS file, you
-# can obtain it at www.aomedia.org/license/patent.
-#
-if(AOM_TEST_TEST_CMAKE_)
- return()
-endif() # AOM_TEST_TEST_CMAKE_
-set(AOM_TEST_TEST_CMAKE_ 1)
-
-include(FindPythonInterp)
-include(ProcessorCount)
-
-include("${AOM_ROOT}/test/test_data_util.cmake")
-
-set(AOM_UNIT_TEST_DATA_LIST_FILE "${AOM_ROOT}/test/test-data.sha1")
-
-list(APPEND AOM_UNIT_TEST_WRAPPER_SOURCES "${AOM_GEN_SRC_DIR}/usage_exit.c"
- "${AOM_ROOT}/test/test_libaom.cc")
-
-list(APPEND AOM_UNIT_TEST_COMMON_SOURCES
- "${AOM_ROOT}/test/acm_random.h"
- "${AOM_ROOT}/test/aom_integer_test.cc"
- "${AOM_ROOT}/test/av1_config_test.cc"
- "${AOM_ROOT}/test/blockd_test.cc"
- "${AOM_ROOT}/test/clear_system_state.h"
- "${AOM_ROOT}/test/codec_factory.h"
- "${AOM_ROOT}/test/decode_test_driver.cc"
- "${AOM_ROOT}/test/decode_test_driver.h"
- "${AOM_ROOT}/test/function_equivalence_test.h"
- "${AOM_ROOT}/test/log2_test.cc"
- "${AOM_ROOT}/test/md5_helper.h"
- "${AOM_ROOT}/test/register_state_check.h"
- "${AOM_ROOT}/test/test_vectors.cc"
- "${AOM_ROOT}/test/test_vectors.h"
- "${AOM_ROOT}/test/transform_test_base.h"
- "${AOM_ROOT}/test/util.h"
- "${AOM_ROOT}/test/video_source.h")
-
-if(CONFIG_INTERNAL_STATS)
- list(APPEND AOM_UNIT_TEST_COMMON_SOURCES
- "${AOM_ROOT}/test/hbd_metrics_test.cc")
-endif()
-
-list(APPEND AOM_UNIT_TEST_DECODER_SOURCES "${AOM_ROOT}/test/decode_api_test.cc"
- "${AOM_ROOT}/test/external_frame_buffer_test.cc"
- "${AOM_ROOT}/test/invalid_file_test.cc"
- "${AOM_ROOT}/test/test_vector_test.cc"
- "${AOM_ROOT}/test/ivf_video_source.h")
-
-list(APPEND AOM_UNIT_TEST_ENCODER_SOURCES
- "${AOM_ROOT}/test/active_map_test.cc"
- "${AOM_ROOT}/test/altref_test.cc"
- "${AOM_ROOT}/test/aq_segment_test.cc"
- "${AOM_ROOT}/test/borders_test.cc"
- "${AOM_ROOT}/test/cpu_speed_test.cc"
- "${AOM_ROOT}/test/datarate_test.cc"
- "${AOM_ROOT}/test/encode_api_test.cc"
- "${AOM_ROOT}/test/encode_test_driver.cc"
- "${AOM_ROOT}/test/encode_test_driver.h"
- "${AOM_ROOT}/test/end_to_end_test.cc"
- "${AOM_ROOT}/test/error_resilience_test.cc"
- "${AOM_ROOT}/test/frame_size_tests.cc"
- "${AOM_ROOT}/test/horz_superres_test.cc"
- "${AOM_ROOT}/test/i420_video_source.h"
- "${AOM_ROOT}/test/lossless_test.cc"
- "${AOM_ROOT}/test/monochrome_test.cc"
- "${AOM_ROOT}/test/qm_test.cc"
- "${AOM_ROOT}/test/resize_test.cc"
- "${AOM_ROOT}/test/scalability_test.cc"
- "${AOM_ROOT}/test/y4m_test.cc"
- "${AOM_ROOT}/test/y4m_video_source.h"
- "${AOM_ROOT}/test/yuv_video_source.h")
-
-list(APPEND AOM_DECODE_PERF_TEST_SOURCES "${AOM_ROOT}/test/decode_perf_test.cc")
-list(APPEND AOM_ENCODE_PERF_TEST_SOURCES "${AOM_ROOT}/test/encode_perf_test.cc")
-list(APPEND AOM_UNIT_TEST_WEBM_SOURCES "${AOM_ROOT}/test/webm_video_source.h")
-list(APPEND AOM_TEST_INTRA_PRED_SPEED_SOURCES "${AOM_GEN_SRC_DIR}/usage_exit.c"
- "${AOM_ROOT}/test/test_intra_pred_speed.cc")
-
-if(NOT BUILD_SHARED_LIBS)
- list(APPEND AOM_UNIT_TEST_COMMON_SOURCES
- "${AOM_ROOT}/test/cdef_test.cc"
- "${AOM_ROOT}/test/cfl_test.cc"
- "${AOM_ROOT}/test/convolve_test.cc"
- "${AOM_ROOT}/test/hiprec_convolve_test.cc"
- "${AOM_ROOT}/test/hiprec_convolve_test_util.cc"
- "${AOM_ROOT}/test/hiprec_convolve_test_util.h"
- "${AOM_ROOT}/test/intrabc_test.cc"
- "${AOM_ROOT}/test/intrapred_test.cc"
- "${AOM_ROOT}/test/lpf_test.cc"
- "${AOM_ROOT}/test/onyxc_int_test.cc"
- "${AOM_ROOT}/test/scan_test.cc"
- "${AOM_ROOT}/test/selfguided_filter_test.cc"
- "${AOM_ROOT}/test/simd_cmp_impl.h"
- "${AOM_ROOT}/test/simd_impl.h")
-
- if(CONFIG_ACCOUNTING)
- list(APPEND AOM_UNIT_TEST_COMMON_SOURCES
- "${AOM_ROOT}/test/accounting_test.cc")
- endif()
-
- if(CONFIG_AV1_DECODER AND CONFIG_AV1_ENCODER)
- list(APPEND AOM_UNIT_TEST_COMMON_SOURCES
- "${AOM_ROOT}/test/av1_encoder_parms_get_to_decoder.cc"
- "${AOM_ROOT}/test/av1_ext_tile_test.cc"
- "${AOM_ROOT}/test/binary_codes_test.cc"
- "${AOM_ROOT}/test/boolcoder_test.cc"
- "${AOM_ROOT}/test/coding_path_sync.cc"
- "${AOM_ROOT}/test/decode_multithreaded_test.cc"
- "${AOM_ROOT}/test/divu_small_test.cc"
- "${AOM_ROOT}/test/dr_prediction_test.cc"
- "${AOM_ROOT}/test/ec_test.cc"
- "${AOM_ROOT}/test/ethread_test.cc"
- "${AOM_ROOT}/test/film_grain_table_test.cc"
- "${AOM_ROOT}/test/segment_binarization_sync.cc"
- "${AOM_ROOT}/test/superframe_test.cc"
- "${AOM_ROOT}/test/tile_independence_test.cc")
- endif()
-
- list(APPEND AOM_UNIT_TEST_COMMON_INTRIN_NEON
- "${AOM_ROOT}/test/simd_cmp_neon.cc")
- if(HAVE_NEON)
- list(APPEND AOM_UNIT_TEST_COMMON_SOURCES
- "${AOM_ROOT}/test/simd_neon_test.cc")
- endif()
-
- list(APPEND AOM_UNIT_TEST_COMMON_INTRIN_SSE2
- "${AOM_ROOT}/test/simd_cmp_sse2.cc")
- if(HAVE_SSE2)
- list(APPEND AOM_UNIT_TEST_COMMON_SOURCES
- "${AOM_ROOT}/test/simd_sse2_test.cc")
- endif()
-
- list(APPEND AOM_UNIT_TEST_COMMON_INTRIN_SSSE3
- "${AOM_ROOT}/test/simd_cmp_ssse3.cc")
- if(HAVE_SSSE3)
- list(APPEND AOM_UNIT_TEST_COMMON_SOURCES
- "${AOM_ROOT}/test/simd_ssse3_test.cc")
- endif()
-
- if(HAVE_SSE4)
- list(APPEND AOM_UNIT_TEST_COMMON_SOURCES
- "${AOM_ROOT}/test/simd_sse4_test.cc")
- endif()
-
- if(HAVE_SSE4_1)
- list(APPEND AOM_UNIT_TEST_COMMON_SOURCES
- "${AOM_ROOT}/test/filterintra_test.cc")
- endif()
-
- list(APPEND AOM_UNIT_TEST_COMMON_INTRIN_AVX2
- "${AOM_ROOT}/test/simd_cmp_avx2.cc")
- if(HAVE_AVX2)
- list(APPEND AOM_UNIT_TEST_COMMON_SOURCES
- "${AOM_ROOT}/test/simd_avx2_test.cc")
- endif()
-
- list(APPEND AOM_UNIT_TEST_ENCODER_SOURCES
- "${AOM_ROOT}/test/arf_freq_test.cc"
- "${AOM_ROOT}/test/av1_convolve_2d_test.cc"
- "${AOM_ROOT}/test/av1_convolve_2d_test_util.cc"
- "${AOM_ROOT}/test/av1_convolve_2d_test_util.h"
- "${AOM_ROOT}/test/av1_fwd_txfm1d_test.cc"
- "${AOM_ROOT}/test/av1_fwd_txfm2d_test.cc"
- "${AOM_ROOT}/test/av1_inv_txfm1d_test.cc"
- "${AOM_ROOT}/test/av1_inv_txfm2d_test.cc"
- "${AOM_ROOT}/test/av1_round_shift_array_test.cc"
- "${AOM_ROOT}/test/av1_txfm_test.cc"
- "${AOM_ROOT}/test/av1_txfm_test.h"
- "${AOM_ROOT}/test/av1_wedge_utils_test.cc"
- "${AOM_ROOT}/test/blend_a64_mask_1d_test.cc"
- "${AOM_ROOT}/test/blend_a64_mask_test.cc"
- "${AOM_ROOT}/test/comp_avg_pred_test.cc"
- "${AOM_ROOT}/test/comp_avg_pred_test.h"
- "${AOM_ROOT}/test/comp_mask_variance_test.cc"
- "${AOM_ROOT}/test/encodetxb_test.cc"
- "${AOM_ROOT}/test/error_block_test.cc"
- "${AOM_ROOT}/test/fft_test.cc"
- "${AOM_ROOT}/test/fwht4x4_test.cc"
- "${AOM_ROOT}/test/masked_sad_test.cc"
- "${AOM_ROOT}/test/masked_variance_test.cc"
- "${AOM_ROOT}/test/motion_vector_test.cc"
- "${AOM_ROOT}/test/noise_model_test.cc"
- "${AOM_ROOT}/test/obmc_sad_test.cc"
- "${AOM_ROOT}/test/obmc_variance_test.cc"
- "${AOM_ROOT}/test/pickrst_test.cc"
- "${AOM_ROOT}/test/sad_test.cc"
- "${AOM_ROOT}/test/subtract_test.cc"
- "${AOM_ROOT}/test/reconinter_test.cc"
- "${AOM_ROOT}/test/sum_squares_test.cc"
- "${AOM_ROOT}/test/variance_test.cc"
- "${AOM_ROOT}/test/wiener_test.cc"
- "${AOM_ROOT}/test/warp_filter_test.cc"
- "${AOM_ROOT}/test/warp_filter_test_util.cc"
- "${AOM_ROOT}/test/warp_filter_test_util.h")
-
- list(APPEND AOM_UNIT_TEST_ENCODER_INTRIN_SSE4_1
- "${AOM_ROOT}/test/av1_highbd_iht_test.cc"
- "${AOM_ROOT}/test/av1_quantize_test.cc"
- "${AOM_ROOT}/test/corner_match_test.cc"
- "${AOM_ROOT}/test/quantize_func_test.cc"
- "${AOM_ROOT}/test/simd_cmp_sse4.cc")
-
- if(HAVE_SSE4_1)
- list(APPEND AOM_UNIT_TEST_ENCODER_SOURCES
- "${AOM_ROOT}/test/av1_convolve_scale_test.cc"
- "${AOM_ROOT}/test/av1_horz_only_frame_superres_test.cc"
- "${AOM_ROOT}/test/intra_edge_test.cc")
-
- endif()
-
- if(HAVE_SSE4_2)
- list(APPEND AOM_UNIT_TEST_ENCODER_SOURCES "${AOM_ROOT}/test/hash_test.cc")
- endif()
-
-endif()
-
-if(ENABLE_TESTS)
- find_package(PythonInterp)
- if(NOT PYTHONINTERP_FOUND)
- message(FATAL_ERROR
- "--- Unit tests require Python, rerun cmake with "
- "-DENABLE_TESTS=0 to avoid this error, or install Python and "
- "make sure it's in your PATH.")
- endif()
-
- if(MSVC) # Force static run time to avoid collisions with googletest.
- include("${AOM_ROOT}/build/cmake/msvc_runtime.cmake")
- if(BUILD_SHARED_LIBS)
- set(AOM_DISABLE_GTEST_CMAKE 1)
- endif()
- endif()
-
- if(BUILD_SHARED_LIBS AND APPLE) # Silence an RPATH warning.
- set(CMAKE_MACOSX_RPATH 1)
- endif()
-
- include_directories(
- "${AOM_ROOT}/third_party/googletest/src/googletest/include")
-
- if(AOM_DISABLE_GTEST_CMAKE)
- include_directories("${AOM_ROOT}/third_party/googletest/src/googletest")
- add_library(
- gtest
- STATIC
- "${AOM_ROOT}/third_party/googletest/src/googletest/src/gtest-all.cc")
- else()
- add_subdirectory("${AOM_ROOT}/third_party/googletest/src/googletest"
- EXCLUDE_FROM_ALL)
- endif()
-endif()
-
-# Setup testdata download targets, test build targets, and test run targets. The
-# libaom and app util targets must exist before this function is called.
-function(setup_aom_test_targets)
-
- # TODO(tomfinegan): Build speed optimization. $AOM_UNIT_TEST_COMMON_SOURCES
- # and $AOM_UNIT_TEST_ENCODER_SOURCES are very large. The build of test targets
- # could be sped up (on multicore build machines) by compiling sources in each
- # list into separate object library targets, and then linking them into
- # test_libaom.
- add_library(test_aom_common OBJECT ${AOM_UNIT_TEST_COMMON_SOURCES})
- add_dependencies(test_aom_common aom)
-
- if(CONFIG_AV1_DECODER)
- add_library(test_aom_decoder OBJECT ${AOM_UNIT_TEST_DECODER_SOURCES})
- add_dependencies(test_aom_decoder aom)
- endif()
-
- if(CONFIG_AV1_ENCODER)
- add_library(test_aom_encoder OBJECT ${AOM_UNIT_TEST_ENCODER_SOURCES})
- add_dependencies(test_aom_encoder aom)
- endif()
-
- add_executable(test_libaom ${AOM_UNIT_TEST_WRAPPER_SOURCES}
- $<TARGET_OBJECTS:aom_common_app_util>
- $<TARGET_OBJECTS:test_aom_common>)
- list(APPEND AOM_APP_TARGETS test_libaom)
-
- if(CONFIG_AV1_DECODER)
- target_sources(test_libaom PRIVATE $<TARGET_OBJECTS:aom_decoder_app_util>
- $<TARGET_OBJECTS:test_aom_decoder>)
-
- if(ENABLE_DECODE_PERF_TESTS AND CONFIG_WEBM_IO)
- target_sources(test_libaom PRIVATE ${AOM_DECODE_PERF_TEST_SOURCES})
- endif()
- endif()
-
- if(CONFIG_AV1_ENCODER)
- target_sources(test_libaom PRIVATE $<TARGET_OBJECTS:test_aom_encoder>
- $<TARGET_OBJECTS:aom_encoder_app_util>)
-
- if(ENABLE_ENCODE_PERF_TESTS)
- target_sources(test_libaom PRIVATE ${AOM_ENCODE_PERF_TEST_SOURCES})
- endif()
-
- if(NOT BUILD_SHARED_LIBS)
- add_executable(test_intra_pred_speed ${AOM_TEST_INTRA_PRED_SPEED_SOURCES}
- $<TARGET_OBJECTS:aom_common_app_util>)
- target_link_libraries(test_intra_pred_speed ${AOM_LIB_LINK_TYPE} aom
- gtest)
- list(APPEND AOM_APP_TARGETS test_intra_pred_speed)
- endif()
- endif()
-
- target_link_libraries(test_libaom ${AOM_LIB_LINK_TYPE} aom gtest)
-
- if(CONFIG_LIBYUV)
- target_sources(test_libaom PRIVATE $<TARGET_OBJECTS:yuv>)
- endif()
- if(CONFIG_WEBM_IO)
- target_sources(test_libaom PRIVATE $<TARGET_OBJECTS:webm>)
- endif()
- if(HAVE_SSE2)
- add_intrinsics_source_to_target("-msse2" "test_libaom"
- "AOM_UNIT_TEST_COMMON_INTRIN_SSE2")
- endif()
- if(HAVE_SSSE3)
- add_intrinsics_source_to_target("-mssse3" "test_libaom"
- "AOM_UNIT_TEST_COMMON_INTRIN_SSSE3")
- endif()
- if(HAVE_SSE4_1)
- add_intrinsics_source_to_target("-msse4.1" "test_libaom"
- "AOM_UNIT_TEST_COMMON_INTRIN_SSE4_1")
- if(CONFIG_AV1_ENCODER)
- if(AOM_UNIT_TEST_ENCODER_INTRIN_SSE4_1)
- add_intrinsics_source_to_target("-msse4.1" "test_libaom"
- "AOM_UNIT_TEST_ENCODER_INTRIN_SSE4_1")
- endif()
- endif()
- endif()
- if(HAVE_AVX2)
- add_intrinsics_source_to_target("-mavx2" "test_libaom"
- "AOM_UNIT_TEST_COMMON_INTRIN_AVX2")
- endif()
- if(HAVE_NEON)
- add_intrinsics_source_to_target("${AOM_NEON_INTRIN_FLAG}" "test_libaom"
- "AOM_UNIT_TEST_COMMON_INTRIN_NEON")
- endif()
-
- if(ENABLE_TESTDATA)
- make_test_data_lists("${AOM_UNIT_TEST_DATA_LIST_FILE}" test_files
- test_file_checksums)
- list(LENGTH test_files num_test_files)
- list(LENGTH test_file_checksums num_test_file_checksums)
-
- math(EXPR max_file_index "${num_test_files} - 1")
- foreach(test_index RANGE ${max_file_index})
- list(GET test_files ${test_index} test_file)
- list(GET test_file_checksums ${test_index} test_file_checksum)
- add_custom_target(testdata_${test_index}
- COMMAND
- ${CMAKE_COMMAND} -DAOM_CONFIG_DIR="${AOM_CONFIG_DIR}"
- -DAOM_ROOT="${AOM_ROOT}"
- -DAOM_TEST_FILE="${test_file}"
- -DAOM_TEST_CHECKSUM=${test_file_checksum} -P
- "${AOM_ROOT}/test/test_data_download_worker.cmake")
- list(APPEND testdata_targets testdata_${test_index})
- endforeach()
-
- # Create a custom build target for running each test data download target.
- add_custom_target(testdata)
- add_dependencies(testdata ${testdata_targets})
-
- # Skip creation of test run targets when generating for Visual Studio and
- # Xcode unless the user explicitly requests IDE test hosting. This is done
- # to make build cycles in the IDE tolerable when the IDE command for build
- # project is used to build AOM. Default behavior in IDEs is to build all
- # targets, and the test run takes hours.
- if(((NOT MSVC) AND (NOT XCODE)) OR ENABLE_IDE_TEST_HOSTING)
-
- # Pick a reasonable number of targets (this controls parallelization).
- processorcount(num_test_targets)
- if(num_test_targets EQUAL 0) # Just default to 10 targets when there's no
- # processor count available.
- set(num_test_targets 10)
- endif()
-
- math(EXPR max_shard_index "${num_test_targets} - 1")
- foreach(shard_index RANGE ${max_shard_index})
- set(test_name "test_${shard_index}")
- add_custom_target(${test_name}
- COMMAND ${CMAKE_COMMAND}
- -DGTEST_SHARD_INDEX=${shard_index}
- -DGTEST_TOTAL_SHARDS=${num_test_targets}
- -DTEST_LIBAOM=$<TARGET_FILE:test_libaom> -P
- "${AOM_ROOT}/test/test_runner.cmake"
- DEPENDS testdata test_libaom)
- list(APPEND test_targets ${test_name})
- endforeach()
- add_custom_target(runtests)
- add_dependencies(runtests ${test_targets})
- endif()
- endif()
-
- # Collect all variables containing libaom test source files.
- get_cmake_property(all_cmake_vars VARIABLES)
- foreach(var ${all_cmake_vars})
-
- # https://github.com/cheshirekow/cmake_format/issues/34
-# cmake-format: off
- if (("${var}" MATCHES "_TEST_" AND NOT
- "${var}" MATCHES
- "_DATA_\|_CMAKE_\|INTRA_PRED\|_COMPILED\|_HOSTING\|_PERF_\|CODER_")
- OR (CONFIG_AV1_ENCODER AND ENABLE_ENCODE_PERF_TESTS AND
- "${var}" MATCHES "_ENCODE_PERF_TEST_")
- OR (CONFIG_AV1_DECODER AND ENABLE_DECODE_PERF_TESTS AND
- "${var}" MATCHES "_DECODE_PERF_TEST_")
- OR (CONFIG_AV1_ENCODER AND "${var}" MATCHES "_TEST_ENCODER_")
- OR (CONFIG_AV1_DECODER AND "${var}" MATCHES "_TEST_DECODER_"))
- list(APPEND aom_test_source_vars ${var})
- endif()
- # cmake-format: on
- endforeach()
-
- # Libaom_test_srcs.txt generation.
- set(libaom_test_srcs_txt_file "${AOM_CONFIG_DIR}/libaom_test_srcs.txt")
- file(WRITE "${libaom_test_srcs_txt_file}"
- "# This file is generated. DO NOT EDIT.\n")
-
- # Static source file list first.
- foreach(aom_test_source_var ${aom_test_source_vars})
- foreach(file ${${aom_test_source_var}})
- if(NOT "${file}" MATCHES "${AOM_CONFIG_DIR}")
- string(REPLACE "${AOM_ROOT}/" "" file "${file}")
- file(APPEND "${libaom_test_srcs_txt_file}" "${file}\n")
- endif()
- endforeach()
- endforeach()
-
- set(AOM_APP_TARGETS ${AOM_APP_TARGETS} PARENT_SCOPE)
-endfunction()
diff --git a/third_party/aom/test/test_data_download_worker.cmake b/third_party/aom/test/test_data_download_worker.cmake
deleted file mode 100644
index dc803497d..000000000
--- a/third_party/aom/test/test_data_download_worker.cmake
+++ /dev/null
@@ -1,46 +0,0 @@
-#
-# Copyright (c) 2017, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and the
-# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
-# not distributed with this source code in the LICENSE file, you can obtain it
-# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
-# License 1.0 was not distributed with this source code in the PATENTS file, you
-# can obtain it at www.aomedia.org/license/patent.
-#
-include("${AOM_ROOT}/test/test_data_util.cmake")
-
-# https://github.com/cheshirekow/cmake_format/issues/34
-# cmake-format: off
-if (NOT AOM_ROOT OR NOT AOM_CONFIG_DIR OR NOT AOM_TEST_FILE
- OR NOT AOM_TEST_CHECKSUM)
- message(FATAL_ERROR
- "AOM_ROOT, AOM_CONFIG_DIR, AOM_TEST_FILE and AOM_TEST_CHECKSUM must be
- defined.")
-endif ()
-# cmake-format: on
-
-set(AOM_TEST_DATA_URL "http://storage.googleapis.com/aom-test-data")
-
-if(NOT AOM_TEST_DATA_PATH)
- set(AOM_TEST_DATA_PATH "$ENV{LIBAOM_TEST_DATA_PATH}")
-endif()
-
-if("${AOM_TEST_DATA_PATH}" STREQUAL "")
- message(WARNING
- "Writing test data to ${AOM_CONFIG_DIR}, set "
- "$LIBAOM_TEST_DATA_PATH in your environment to avoid this warning.")
- set(AOM_TEST_DATA_PATH "${AOM_CONFIG_DIR}")
-endif()
-
-if(NOT EXISTS "${AOM_TEST_DATA_PATH}")
- file(MAKE_DIRECTORY "${AOM_TEST_DATA_PATH}")
-endif()
-
-expand_test_file_paths("AOM_TEST_FILE" "${AOM_TEST_DATA_PATH}" "filepath")
-expand_test_file_paths("AOM_TEST_FILE" "${AOM_TEST_DATA_URL}" "url")
-
-check_file("${filepath}" "${AOM_TEST_CHECKSUM}" "needs_download")
-if(needs_download)
- download_test_file("${url}" "${AOM_TEST_CHECKSUM}" "${filepath}")
-endif()
diff --git a/third_party/aom/test/test_data_util.cmake b/third_party/aom/test/test_data_util.cmake
deleted file mode 100644
index 45c951478..000000000
--- a/third_party/aom/test/test_data_util.cmake
+++ /dev/null
@@ -1,598 +0,0 @@
-#
-# Copyright (c) 2017, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and the
-# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
-# not distributed with this source code in the LICENSE file, you can obtain it
-# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
-# License 1.0 was not distributed with this source code in the PATENTS file, you
-# can obtain it at www.aomedia.org/license/patent.
-#
-
-list(APPEND AOM_TEST_DATA_FILE_NAMES
- "hantro_collage_w352h288.yuv"
- "hantro_odd.yuv"
- "invalid-bug-1814.ivf"
- "invalid-bug-1814.ivf.res"
- "invalid-oss-fuzz-10061.ivf"
- "invalid-oss-fuzz-10061.ivf.res"
- "invalid-oss-fuzz-10117-mc-buf-use-highbd.ivf"
- "invalid-oss-fuzz-10117-mc-buf-use-highbd.ivf.res"
- "invalid-oss-fuzz-10227.ivf"
- "invalid-oss-fuzz-10227.ivf.res"
- "invalid-oss-fuzz-9463.ivf"
- "invalid-oss-fuzz-9463.ivf.res"
- "invalid-oss-fuzz-9482.ivf"
- "invalid-oss-fuzz-9482.ivf.res"
- "invalid-oss-fuzz-9720.ivf"
- "invalid-oss-fuzz-9720.ivf.res"
- "park_joy_90p_10_420.y4m"
- "park_joy_90p_10_422.y4m"
- "park_joy_90p_10_444.y4m"
- "park_joy_90p_12_420.y4m"
- "park_joy_90p_12_422.y4m"
- "park_joy_90p_12_444.y4m"
- "park_joy_90p_8_420_a10-1.y4m"
- "park_joy_90p_8_420.y4m"
- "park_joy_90p_8_420_monochrome.y4m"
- "park_joy_90p_8_420_vertical_csp.y4m"
- "park_joy_90p_8_422.y4m"
- "park_joy_90p_8_444.y4m"
- "desktop_credits.y4m"
- "niklas_1280_720_30.y4m"
- "rush_hour_444.y4m"
- "screendata.y4m"
- "niklas_640_480_30.yuv"
- "vase10x10.yuv")
-
-if(ENABLE_DECODE_PERF_TESTS AND CONFIG_AV1_ENCODER)
- list(APPEND AOM_TEST_DATA_FILE_NAMES "niklas_1280_720_30.yuv")
-endif()
-
-if(CONFIG_AV1_DECODER)
- list(APPEND AOM_TEST_DATA_FILE_NAMES
- "av1-1-b8-00-quantizer-00.ivf"
- "av1-1-b8-00-quantizer-00.ivf.md5"
- "av1-1-b8-00-quantizer-01.ivf"
- "av1-1-b8-00-quantizer-01.ivf.md5"
- "av1-1-b8-00-quantizer-02.ivf"
- "av1-1-b8-00-quantizer-02.ivf.md5"
- "av1-1-b8-00-quantizer-03.ivf"
- "av1-1-b8-00-quantizer-03.ivf.md5"
- "av1-1-b8-00-quantizer-04.ivf"
- "av1-1-b8-00-quantizer-04.ivf.md5"
- "av1-1-b8-00-quantizer-05.ivf"
- "av1-1-b8-00-quantizer-05.ivf.md5"
- "av1-1-b8-00-quantizer-06.ivf"
- "av1-1-b8-00-quantizer-06.ivf.md5"
- "av1-1-b8-00-quantizer-07.ivf"
- "av1-1-b8-00-quantizer-07.ivf.md5"
- "av1-1-b8-00-quantizer-08.ivf"
- "av1-1-b8-00-quantizer-08.ivf.md5"
- "av1-1-b8-00-quantizer-09.ivf"
- "av1-1-b8-00-quantizer-09.ivf.md5"
- "av1-1-b8-00-quantizer-10.ivf"
- "av1-1-b8-00-quantizer-10.ivf.md5"
- "av1-1-b8-00-quantizer-11.ivf"
- "av1-1-b8-00-quantizer-11.ivf.md5"
- "av1-1-b8-00-quantizer-12.ivf"
- "av1-1-b8-00-quantizer-12.ivf.md5"
- "av1-1-b8-00-quantizer-13.ivf"
- "av1-1-b8-00-quantizer-13.ivf.md5"
- "av1-1-b8-00-quantizer-14.ivf"
- "av1-1-b8-00-quantizer-14.ivf.md5"
- "av1-1-b8-00-quantizer-15.ivf"
- "av1-1-b8-00-quantizer-15.ivf.md5"
- "av1-1-b8-00-quantizer-16.ivf"
- "av1-1-b8-00-quantizer-16.ivf.md5"
- "av1-1-b8-00-quantizer-17.ivf"
- "av1-1-b8-00-quantizer-17.ivf.md5"
- "av1-1-b8-00-quantizer-18.ivf"
- "av1-1-b8-00-quantizer-18.ivf.md5"
- "av1-1-b8-00-quantizer-19.ivf"
- "av1-1-b8-00-quantizer-19.ivf.md5"
- "av1-1-b8-00-quantizer-20.ivf"
- "av1-1-b8-00-quantizer-20.ivf.md5"
- "av1-1-b8-00-quantizer-21.ivf"
- "av1-1-b8-00-quantizer-21.ivf.md5"
- "av1-1-b8-00-quantizer-22.ivf"
- "av1-1-b8-00-quantizer-22.ivf.md5"
- "av1-1-b8-00-quantizer-23.ivf"
- "av1-1-b8-00-quantizer-23.ivf.md5"
- "av1-1-b8-00-quantizer-24.ivf"
- "av1-1-b8-00-quantizer-24.ivf.md5"
- "av1-1-b8-00-quantizer-25.ivf"
- "av1-1-b8-00-quantizer-25.ivf.md5"
- "av1-1-b8-00-quantizer-26.ivf"
- "av1-1-b8-00-quantizer-26.ivf.md5"
- "av1-1-b8-00-quantizer-27.ivf"
- "av1-1-b8-00-quantizer-27.ivf.md5"
- "av1-1-b8-00-quantizer-28.ivf"
- "av1-1-b8-00-quantizer-28.ivf.md5"
- "av1-1-b8-00-quantizer-29.ivf"
- "av1-1-b8-00-quantizer-29.ivf.md5"
- "av1-1-b8-00-quantizer-30.ivf"
- "av1-1-b8-00-quantizer-30.ivf.md5"
- "av1-1-b8-00-quantizer-31.ivf"
- "av1-1-b8-00-quantizer-31.ivf.md5"
- "av1-1-b8-00-quantizer-32.ivf"
- "av1-1-b8-00-quantizer-32.ivf.md5"
- "av1-1-b8-00-quantizer-33.ivf"
- "av1-1-b8-00-quantizer-33.ivf.md5"
- "av1-1-b8-00-quantizer-34.ivf"
- "av1-1-b8-00-quantizer-34.ivf.md5"
- "av1-1-b8-00-quantizer-35.ivf"
- "av1-1-b8-00-quantizer-35.ivf.md5"
- "av1-1-b8-00-quantizer-36.ivf"
- "av1-1-b8-00-quantizer-36.ivf.md5"
- "av1-1-b8-00-quantizer-37.ivf"
- "av1-1-b8-00-quantizer-37.ivf.md5"
- "av1-1-b8-00-quantizer-38.ivf"
- "av1-1-b8-00-quantizer-38.ivf.md5"
- "av1-1-b8-00-quantizer-39.ivf"
- "av1-1-b8-00-quantizer-39.ivf.md5"
- "av1-1-b8-00-quantizer-40.ivf"
- "av1-1-b8-00-quantizer-40.ivf.md5"
- "av1-1-b8-00-quantizer-41.ivf"
- "av1-1-b8-00-quantizer-41.ivf.md5"
- "av1-1-b8-00-quantizer-42.ivf"
- "av1-1-b8-00-quantizer-42.ivf.md5"
- "av1-1-b8-00-quantizer-43.ivf"
- "av1-1-b8-00-quantizer-43.ivf.md5"
- "av1-1-b8-00-quantizer-44.ivf"
- "av1-1-b8-00-quantizer-44.ivf.md5"
- "av1-1-b8-00-quantizer-45.ivf"
- "av1-1-b8-00-quantizer-45.ivf.md5"
- "av1-1-b8-00-quantizer-46.ivf"
- "av1-1-b8-00-quantizer-46.ivf.md5"
- "av1-1-b8-00-quantizer-47.ivf"
- "av1-1-b8-00-quantizer-47.ivf.md5"
- "av1-1-b8-00-quantizer-48.ivf"
- "av1-1-b8-00-quantizer-48.ivf.md5"
- "av1-1-b8-00-quantizer-49.ivf"
- "av1-1-b8-00-quantizer-49.ivf.md5"
- "av1-1-b8-00-quantizer-50.ivf"
- "av1-1-b8-00-quantizer-50.ivf.md5"
- "av1-1-b8-00-quantizer-51.ivf"
- "av1-1-b8-00-quantizer-51.ivf.md5"
- "av1-1-b8-00-quantizer-52.ivf"
- "av1-1-b8-00-quantizer-52.ivf.md5"
- "av1-1-b8-00-quantizer-53.ivf"
- "av1-1-b8-00-quantizer-53.ivf.md5"
- "av1-1-b8-00-quantizer-54.ivf"
- "av1-1-b8-00-quantizer-54.ivf.md5"
- "av1-1-b8-00-quantizer-55.ivf"
- "av1-1-b8-00-quantizer-55.ivf.md5"
- "av1-1-b8-00-quantizer-56.ivf"
- "av1-1-b8-00-quantizer-56.ivf.md5"
- "av1-1-b8-00-quantizer-57.ivf"
- "av1-1-b8-00-quantizer-57.ivf.md5"
- "av1-1-b8-00-quantizer-58.ivf"
- "av1-1-b8-00-quantizer-58.ivf.md5"
- "av1-1-b8-00-quantizer-59.ivf"
- "av1-1-b8-00-quantizer-59.ivf.md5"
- "av1-1-b8-00-quantizer-60.ivf"
- "av1-1-b8-00-quantizer-60.ivf.md5"
- "av1-1-b8-00-quantizer-61.ivf"
- "av1-1-b8-00-quantizer-61.ivf.md5"
- "av1-1-b8-00-quantizer-62.ivf"
- "av1-1-b8-00-quantizer-62.ivf.md5"
- "av1-1-b8-00-quantizer-63.ivf"
- "av1-1-b8-00-quantizer-63.ivf.md5"
- "av1-1-b10-00-quantizer-00.ivf"
- "av1-1-b10-00-quantizer-00.ivf.md5"
- "av1-1-b10-00-quantizer-01.ivf"
- "av1-1-b10-00-quantizer-01.ivf.md5"
- "av1-1-b10-00-quantizer-02.ivf"
- "av1-1-b10-00-quantizer-02.ivf.md5"
- "av1-1-b10-00-quantizer-03.ivf"
- "av1-1-b10-00-quantizer-03.ivf.md5"
- "av1-1-b10-00-quantizer-04.ivf"
- "av1-1-b10-00-quantizer-04.ivf.md5"
- "av1-1-b10-00-quantizer-05.ivf"
- "av1-1-b10-00-quantizer-05.ivf.md5"
- "av1-1-b10-00-quantizer-06.ivf"
- "av1-1-b10-00-quantizer-06.ivf.md5"
- "av1-1-b10-00-quantizer-07.ivf"
- "av1-1-b10-00-quantizer-07.ivf.md5"
- "av1-1-b10-00-quantizer-08.ivf"
- "av1-1-b10-00-quantizer-08.ivf.md5"
- "av1-1-b10-00-quantizer-09.ivf"
- "av1-1-b10-00-quantizer-09.ivf.md5"
- "av1-1-b10-00-quantizer-10.ivf"
- "av1-1-b10-00-quantizer-10.ivf.md5"
- "av1-1-b10-00-quantizer-11.ivf"
- "av1-1-b10-00-quantizer-11.ivf.md5"
- "av1-1-b10-00-quantizer-12.ivf"
- "av1-1-b10-00-quantizer-12.ivf.md5"
- "av1-1-b10-00-quantizer-13.ivf"
- "av1-1-b10-00-quantizer-13.ivf.md5"
- "av1-1-b10-00-quantizer-14.ivf"
- "av1-1-b10-00-quantizer-14.ivf.md5"
- "av1-1-b10-00-quantizer-15.ivf"
- "av1-1-b10-00-quantizer-15.ivf.md5"
- "av1-1-b10-00-quantizer-16.ivf"
- "av1-1-b10-00-quantizer-16.ivf.md5"
- "av1-1-b10-00-quantizer-17.ivf"
- "av1-1-b10-00-quantizer-17.ivf.md5"
- "av1-1-b10-00-quantizer-18.ivf"
- "av1-1-b10-00-quantizer-18.ivf.md5"
- "av1-1-b10-00-quantizer-19.ivf"
- "av1-1-b10-00-quantizer-19.ivf.md5"
- "av1-1-b10-00-quantizer-20.ivf"
- "av1-1-b10-00-quantizer-20.ivf.md5"
- "av1-1-b10-00-quantizer-21.ivf"
- "av1-1-b10-00-quantizer-21.ivf.md5"
- "av1-1-b10-00-quantizer-22.ivf"
- "av1-1-b10-00-quantizer-22.ivf.md5"
- "av1-1-b10-00-quantizer-23.ivf"
- "av1-1-b10-00-quantizer-23.ivf.md5"
- "av1-1-b10-00-quantizer-24.ivf"
- "av1-1-b10-00-quantizer-24.ivf.md5"
- "av1-1-b10-00-quantizer-25.ivf"
- "av1-1-b10-00-quantizer-25.ivf.md5"
- "av1-1-b10-00-quantizer-26.ivf"
- "av1-1-b10-00-quantizer-26.ivf.md5"
- "av1-1-b10-00-quantizer-27.ivf"
- "av1-1-b10-00-quantizer-27.ivf.md5"
- "av1-1-b10-00-quantizer-28.ivf"
- "av1-1-b10-00-quantizer-28.ivf.md5"
- "av1-1-b10-00-quantizer-29.ivf"
- "av1-1-b10-00-quantizer-29.ivf.md5"
- "av1-1-b10-00-quantizer-30.ivf"
- "av1-1-b10-00-quantizer-30.ivf.md5"
- "av1-1-b10-00-quantizer-31.ivf"
- "av1-1-b10-00-quantizer-31.ivf.md5"
- "av1-1-b10-00-quantizer-32.ivf"
- "av1-1-b10-00-quantizer-32.ivf.md5"
- "av1-1-b10-00-quantizer-33.ivf"
- "av1-1-b10-00-quantizer-33.ivf.md5"
- "av1-1-b10-00-quantizer-34.ivf"
- "av1-1-b10-00-quantizer-34.ivf.md5"
- "av1-1-b10-00-quantizer-35.ivf"
- "av1-1-b10-00-quantizer-35.ivf.md5"
- "av1-1-b10-00-quantizer-36.ivf"
- "av1-1-b10-00-quantizer-36.ivf.md5"
- "av1-1-b10-00-quantizer-37.ivf"
- "av1-1-b10-00-quantizer-37.ivf.md5"
- "av1-1-b10-00-quantizer-38.ivf"
- "av1-1-b10-00-quantizer-38.ivf.md5"
- "av1-1-b10-00-quantizer-39.ivf"
- "av1-1-b10-00-quantizer-39.ivf.md5"
- "av1-1-b10-00-quantizer-40.ivf"
- "av1-1-b10-00-quantizer-40.ivf.md5"
- "av1-1-b10-00-quantizer-41.ivf"
- "av1-1-b10-00-quantizer-41.ivf.md5"
- "av1-1-b10-00-quantizer-42.ivf"
- "av1-1-b10-00-quantizer-42.ivf.md5"
- "av1-1-b10-00-quantizer-43.ivf"
- "av1-1-b10-00-quantizer-43.ivf.md5"
- "av1-1-b10-00-quantizer-44.ivf"
- "av1-1-b10-00-quantizer-44.ivf.md5"
- "av1-1-b10-00-quantizer-45.ivf"
- "av1-1-b10-00-quantizer-45.ivf.md5"
- "av1-1-b10-00-quantizer-46.ivf"
- "av1-1-b10-00-quantizer-46.ivf.md5"
- "av1-1-b10-00-quantizer-47.ivf"
- "av1-1-b10-00-quantizer-47.ivf.md5"
- "av1-1-b10-00-quantizer-48.ivf"
- "av1-1-b10-00-quantizer-48.ivf.md5"
- "av1-1-b10-00-quantizer-49.ivf"
- "av1-1-b10-00-quantizer-49.ivf.md5"
- "av1-1-b10-00-quantizer-50.ivf"
- "av1-1-b10-00-quantizer-50.ivf.md5"
- "av1-1-b10-00-quantizer-51.ivf"
- "av1-1-b10-00-quantizer-51.ivf.md5"
- "av1-1-b10-00-quantizer-52.ivf"
- "av1-1-b10-00-quantizer-52.ivf.md5"
- "av1-1-b10-00-quantizer-53.ivf"
- "av1-1-b10-00-quantizer-53.ivf.md5"
- "av1-1-b10-00-quantizer-54.ivf"
- "av1-1-b10-00-quantizer-54.ivf.md5"
- "av1-1-b10-00-quantizer-55.ivf"
- "av1-1-b10-00-quantizer-55.ivf.md5"
- "av1-1-b10-00-quantizer-56.ivf"
- "av1-1-b10-00-quantizer-56.ivf.md5"
- "av1-1-b10-00-quantizer-57.ivf"
- "av1-1-b10-00-quantizer-57.ivf.md5"
- "av1-1-b10-00-quantizer-58.ivf"
- "av1-1-b10-00-quantizer-58.ivf.md5"
- "av1-1-b10-00-quantizer-59.ivf"
- "av1-1-b10-00-quantizer-59.ivf.md5"
- "av1-1-b10-00-quantizer-60.ivf"
- "av1-1-b10-00-quantizer-60.ivf.md5"
- "av1-1-b10-00-quantizer-61.ivf"
- "av1-1-b10-00-quantizer-61.ivf.md5"
- "av1-1-b10-00-quantizer-62.ivf"
- "av1-1-b10-00-quantizer-62.ivf.md5"
- "av1-1-b10-00-quantizer-63.ivf"
- "av1-1-b10-00-quantizer-63.ivf.md5"
- "av1-1-b8-01-size-16x16.ivf"
- "av1-1-b8-01-size-16x16.ivf.md5"
- "av1-1-b8-01-size-16x18.ivf"
- "av1-1-b8-01-size-16x18.ivf.md5"
- "av1-1-b8-01-size-16x32.ivf"
- "av1-1-b8-01-size-16x32.ivf.md5"
- "av1-1-b8-01-size-16x34.ivf"
- "av1-1-b8-01-size-16x34.ivf.md5"
- "av1-1-b8-01-size-16x64.ivf"
- "av1-1-b8-01-size-16x64.ivf.md5"
- "av1-1-b8-01-size-16x66.ivf"
- "av1-1-b8-01-size-16x66.ivf.md5"
- "av1-1-b8-01-size-18x16.ivf"
- "av1-1-b8-01-size-18x16.ivf.md5"
- "av1-1-b8-01-size-18x18.ivf"
- "av1-1-b8-01-size-18x18.ivf.md5"
- "av1-1-b8-01-size-18x32.ivf"
- "av1-1-b8-01-size-18x32.ivf.md5"
- "av1-1-b8-01-size-18x34.ivf"
- "av1-1-b8-01-size-18x34.ivf.md5"
- "av1-1-b8-01-size-18x64.ivf"
- "av1-1-b8-01-size-18x64.ivf.md5"
- "av1-1-b8-01-size-18x66.ivf"
- "av1-1-b8-01-size-18x66.ivf.md5"
- "av1-1-b8-01-size-196x196.ivf"
- "av1-1-b8-01-size-196x196.ivf.md5"
- "av1-1-b8-01-size-196x198.ivf"
- "av1-1-b8-01-size-196x198.ivf.md5"
- "av1-1-b8-01-size-196x200.ivf"
- "av1-1-b8-01-size-196x200.ivf.md5"
- "av1-1-b8-01-size-196x202.ivf"
- "av1-1-b8-01-size-196x202.ivf.md5"
- "av1-1-b8-01-size-196x208.ivf"
- "av1-1-b8-01-size-196x208.ivf.md5"
- "av1-1-b8-01-size-196x210.ivf"
- "av1-1-b8-01-size-196x210.ivf.md5"
- "av1-1-b8-01-size-196x224.ivf"
- "av1-1-b8-01-size-196x224.ivf.md5"
- "av1-1-b8-01-size-196x226.ivf"
- "av1-1-b8-01-size-196x226.ivf.md5"
- "av1-1-b8-01-size-198x196.ivf"
- "av1-1-b8-01-size-198x196.ivf.md5"
- "av1-1-b8-01-size-198x198.ivf"
- "av1-1-b8-01-size-198x198.ivf.md5"
- "av1-1-b8-01-size-198x200.ivf"
- "av1-1-b8-01-size-198x200.ivf.md5"
- "av1-1-b8-01-size-198x202.ivf"
- "av1-1-b8-01-size-198x202.ivf.md5"
- "av1-1-b8-01-size-198x208.ivf"
- "av1-1-b8-01-size-198x208.ivf.md5"
- "av1-1-b8-01-size-198x210.ivf"
- "av1-1-b8-01-size-198x210.ivf.md5"
- "av1-1-b8-01-size-198x224.ivf"
- "av1-1-b8-01-size-198x224.ivf.md5"
- "av1-1-b8-01-size-198x226.ivf"
- "av1-1-b8-01-size-198x226.ivf.md5"
- "av1-1-b8-01-size-200x196.ivf"
- "av1-1-b8-01-size-200x196.ivf.md5"
- "av1-1-b8-01-size-200x198.ivf"
- "av1-1-b8-01-size-200x198.ivf.md5"
- "av1-1-b8-01-size-200x200.ivf"
- "av1-1-b8-01-size-200x200.ivf.md5"
- "av1-1-b8-01-size-200x202.ivf"
- "av1-1-b8-01-size-200x202.ivf.md5"
- "av1-1-b8-01-size-200x208.ivf"
- "av1-1-b8-01-size-200x208.ivf.md5"
- "av1-1-b8-01-size-200x210.ivf"
- "av1-1-b8-01-size-200x210.ivf.md5"
- "av1-1-b8-01-size-200x224.ivf"
- "av1-1-b8-01-size-200x224.ivf.md5"
- "av1-1-b8-01-size-200x226.ivf"
- "av1-1-b8-01-size-200x226.ivf.md5"
- "av1-1-b8-01-size-202x196.ivf"
- "av1-1-b8-01-size-202x196.ivf.md5"
- "av1-1-b8-01-size-202x198.ivf"
- "av1-1-b8-01-size-202x198.ivf.md5"
- "av1-1-b8-01-size-202x200.ivf"
- "av1-1-b8-01-size-202x200.ivf.md5"
- "av1-1-b8-01-size-202x202.ivf"
- "av1-1-b8-01-size-202x202.ivf.md5"
- "av1-1-b8-01-size-202x208.ivf"
- "av1-1-b8-01-size-202x208.ivf.md5"
- "av1-1-b8-01-size-202x210.ivf"
- "av1-1-b8-01-size-202x210.ivf.md5"
- "av1-1-b8-01-size-202x224.ivf"
- "av1-1-b8-01-size-202x224.ivf.md5"
- "av1-1-b8-01-size-202x226.ivf"
- "av1-1-b8-01-size-202x226.ivf.md5"
- "av1-1-b8-01-size-208x196.ivf"
- "av1-1-b8-01-size-208x196.ivf.md5"
- "av1-1-b8-01-size-208x198.ivf"
- "av1-1-b8-01-size-208x198.ivf.md5"
- "av1-1-b8-01-size-208x200.ivf"
- "av1-1-b8-01-size-208x200.ivf.md5"
- "av1-1-b8-01-size-208x202.ivf"
- "av1-1-b8-01-size-208x202.ivf.md5"
- "av1-1-b8-01-size-208x208.ivf"
- "av1-1-b8-01-size-208x208.ivf.md5"
- "av1-1-b8-01-size-208x210.ivf"
- "av1-1-b8-01-size-208x210.ivf.md5"
- "av1-1-b8-01-size-208x224.ivf"
- "av1-1-b8-01-size-208x224.ivf.md5"
- "av1-1-b8-01-size-208x226.ivf"
- "av1-1-b8-01-size-208x226.ivf.md5"
- "av1-1-b8-01-size-210x196.ivf"
- "av1-1-b8-01-size-210x196.ivf.md5"
- "av1-1-b8-01-size-210x198.ivf"
- "av1-1-b8-01-size-210x198.ivf.md5"
- "av1-1-b8-01-size-210x200.ivf"
- "av1-1-b8-01-size-210x200.ivf.md5"
- "av1-1-b8-01-size-210x202.ivf"
- "av1-1-b8-01-size-210x202.ivf.md5"
- "av1-1-b8-01-size-210x208.ivf"
- "av1-1-b8-01-size-210x208.ivf.md5"
- "av1-1-b8-01-size-210x210.ivf"
- "av1-1-b8-01-size-210x210.ivf.md5"
- "av1-1-b8-01-size-210x224.ivf"
- "av1-1-b8-01-size-210x224.ivf.md5"
- "av1-1-b8-01-size-210x226.ivf"
- "av1-1-b8-01-size-210x226.ivf.md5"
- "av1-1-b8-01-size-224x196.ivf"
- "av1-1-b8-01-size-224x196.ivf.md5"
- "av1-1-b8-01-size-224x198.ivf"
- "av1-1-b8-01-size-224x198.ivf.md5"
- "av1-1-b8-01-size-224x200.ivf"
- "av1-1-b8-01-size-224x200.ivf.md5"
- "av1-1-b8-01-size-224x202.ivf"
- "av1-1-b8-01-size-224x202.ivf.md5"
- "av1-1-b8-01-size-224x208.ivf"
- "av1-1-b8-01-size-224x208.ivf.md5"
- "av1-1-b8-01-size-224x210.ivf"
- "av1-1-b8-01-size-224x210.ivf.md5"
- "av1-1-b8-01-size-224x224.ivf"
- "av1-1-b8-01-size-224x224.ivf.md5"
- "av1-1-b8-01-size-224x226.ivf"
- "av1-1-b8-01-size-224x226.ivf.md5"
- "av1-1-b8-01-size-226x196.ivf"
- "av1-1-b8-01-size-226x196.ivf.md5"
- "av1-1-b8-01-size-226x198.ivf"
- "av1-1-b8-01-size-226x198.ivf.md5"
- "av1-1-b8-01-size-226x200.ivf"
- "av1-1-b8-01-size-226x200.ivf.md5"
- "av1-1-b8-01-size-226x202.ivf"
- "av1-1-b8-01-size-226x202.ivf.md5"
- "av1-1-b8-01-size-226x208.ivf"
- "av1-1-b8-01-size-226x208.ivf.md5"
- "av1-1-b8-01-size-226x210.ivf"
- "av1-1-b8-01-size-226x210.ivf.md5"
- "av1-1-b8-01-size-226x224.ivf"
- "av1-1-b8-01-size-226x224.ivf.md5"
- "av1-1-b8-01-size-226x226.ivf"
- "av1-1-b8-01-size-226x226.ivf.md5"
- "av1-1-b8-01-size-32x16.ivf"
- "av1-1-b8-01-size-32x16.ivf.md5"
- "av1-1-b8-01-size-32x18.ivf"
- "av1-1-b8-01-size-32x18.ivf.md5"
- "av1-1-b8-01-size-32x32.ivf"
- "av1-1-b8-01-size-32x32.ivf.md5"
- "av1-1-b8-01-size-32x34.ivf"
- "av1-1-b8-01-size-32x34.ivf.md5"
- "av1-1-b8-01-size-32x64.ivf"
- "av1-1-b8-01-size-32x64.ivf.md5"
- "av1-1-b8-01-size-32x66.ivf"
- "av1-1-b8-01-size-32x66.ivf.md5"
- "av1-1-b8-01-size-34x16.ivf"
- "av1-1-b8-01-size-34x16.ivf.md5"
- "av1-1-b8-01-size-34x18.ivf"
- "av1-1-b8-01-size-34x18.ivf.md5"
- "av1-1-b8-01-size-34x32.ivf"
- "av1-1-b8-01-size-34x32.ivf.md5"
- "av1-1-b8-01-size-34x34.ivf"
- "av1-1-b8-01-size-34x34.ivf.md5"
- "av1-1-b8-01-size-34x64.ivf"
- "av1-1-b8-01-size-34x64.ivf.md5"
- "av1-1-b8-01-size-34x66.ivf"
- "av1-1-b8-01-size-34x66.ivf.md5"
- "av1-1-b8-01-size-64x16.ivf"
- "av1-1-b8-01-size-64x16.ivf.md5"
- "av1-1-b8-01-size-64x18.ivf"
- "av1-1-b8-01-size-64x18.ivf.md5"
- "av1-1-b8-01-size-64x32.ivf"
- "av1-1-b8-01-size-64x32.ivf.md5"
- "av1-1-b8-01-size-64x34.ivf"
- "av1-1-b8-01-size-64x34.ivf.md5"
- "av1-1-b8-01-size-64x64.ivf"
- "av1-1-b8-01-size-64x64.ivf.md5"
- "av1-1-b8-01-size-64x66.ivf"
- "av1-1-b8-01-size-64x66.ivf.md5"
- "av1-1-b8-01-size-66x16.ivf"
- "av1-1-b8-01-size-66x16.ivf.md5"
- "av1-1-b8-01-size-66x18.ivf"
- "av1-1-b8-01-size-66x18.ivf.md5"
- "av1-1-b8-01-size-66x32.ivf"
- "av1-1-b8-01-size-66x32.ivf.md5"
- "av1-1-b8-01-size-66x34.ivf"
- "av1-1-b8-01-size-66x34.ivf.md5"
- "av1-1-b8-01-size-66x64.ivf"
- "av1-1-b8-01-size-66x64.ivf.md5"
- "av1-1-b8-01-size-66x66.ivf"
- "av1-1-b8-01-size-66x66.ivf.md5"
- "av1-1-b8-02-allintra.ivf"
- "av1-1-b8-02-allintra.ivf.md5"
- "av1-1-b8-03-sizeup.mkv"
- "av1-1-b8-03-sizeup.mkv.md5"
- "av1-1-b8-03-sizedown.mkv"
- "av1-1-b8-03-sizedown.mkv.md5")
-endif()
-
-if(ENABLE_ENCODE_PERF_TESTS AND CONFIG_AV1_ENCODER)
- list(APPEND AOM_TEST_DATA_FILE_NAMES "desktop_640_360_30.yuv"
- "kirland_640_480_30.yuv" "macmarcomoving_640_480_30.yuv"
- "macmarcostationary_640_480_30.yuv" "niklas_1280_720_30.yuv"
- "tacomanarrows_640_480_30.yuv"
- "tacomasmallcameramovement_640_480_30.yuv"
- "thaloundeskmtg_640_480_30.yuv")
-endif()
-
-# Parses test/test-data.sha1 and writes captured file names and checksums to
-# $out_files and $out_checksums as lists.
-function(make_test_data_lists test_data_file out_files out_checksums)
- if(NOT test_data_file OR NOT EXISTS "${test_data_file}")
- message(FATAL_ERROR "Test info file missing or empty (${test_data_file})")
- endif()
-
- # Read $test_data_file into $files_and_checksums. $files_and_checksums becomes
- # a list with an entry for each line from $test_data_file.
- file(STRINGS "${test_data_file}" files_and_checksums)
-
- # Iterate over the list of lines and split it into $checksums and $filenames.
- foreach(line ${files_and_checksums})
- string(FIND "${line}" " *" delim_pos)
-
- math(EXPR filename_pos "${delim_pos} + 2")
- string(SUBSTRING "${line}" 0 ${delim_pos} checksum)
- string(SUBSTRING "${line}" ${filename_pos} -1 filename)
-
- list(FIND AOM_TEST_DATA_FILE_NAMES ${filename} list_index)
- if(NOT ${list_index} EQUAL -1)
-
- # Include the name and checksum in output only when the file is needed.
- set(checksums ${checksums} ${checksum})
- set(filenames ${filenames} ${filename})
- endif()
- endforeach()
-
- list(LENGTH filenames num_files)
- list(LENGTH checksums num_checksums)
- if(NOT checksums OR NOT filenames OR NOT num_files EQUAL num_checksums)
- message(FATAL_ERROR "Parsing of ${test_data_file} failed.")
- endif()
-
- set(${out_checksums} ${checksums} PARENT_SCOPE)
- set(${out_files} ${filenames} PARENT_SCOPE)
-endfunction()
-
-# Appends each file name in $test_files to $test_dir and adds the result path to
-# $out_path_list.
-function(expand_test_file_paths test_files test_dir out_path_list)
- foreach(filename ${${test_files}})
- set(path_list ${path_list} "${test_dir}/${filename}")
- endforeach()
- set(${out_path_list} ${path_list} PARENT_SCOPE)
-endfunction()
-
-function(check_file local_path expected_checksum out_needs_update)
- if(EXISTS "${local_path}")
- file(SHA1 "${local_path}" file_checksum)
- else()
- set(${out_needs_update} 1 PARENT_SCOPE)
- return()
- endif()
-
- if("${file_checksum}" STREQUAL "${expected_checksum}")
- unset(${out_needs_update} PARENT_SCOPE)
- else()
- set(${out_needs_update} 1 PARENT_SCOPE)
- return()
- endif()
- message("${local_path} up to date.")
-endfunction()
-
-# Downloads data from $file_url, confirms that $file_checksum matches, and
-# writes it to $local_path.
-function(download_test_file file_url file_checksum local_path)
- message("Downloading ${file_url} ...")
- file(DOWNLOAD "${file_url}" "${local_path}" SHOW_PROGRESS
- EXPECTED_HASH SHA1=${file_checksum})
- message("Download of ${file_url} complete.")
-endfunction()
diff --git a/third_party/aom/test/test_intra_pred_speed.cc b/third_party/aom/test/test_intra_pred_speed.cc
deleted file mode 100644
index b72ac1167..000000000
--- a/third_party/aom/test/test_intra_pred_speed.cc
+++ /dev/null
@@ -1,1464 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-// Test and time AOM intra-predictor functions
-
-#include <stdio.h>
-#include <string>
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/md5_helper.h"
-#include "aom/aom_integer.h"
-#include "aom_ports/mem.h"
-#include "aom_ports/aom_timer.h"
-#include "av1/common/common_data.h"
-
-// -----------------------------------------------------------------------------
-
-namespace {
-
-// Note:
-// APPLY_UNIT_TESTS
-// 1: Do unit tests
-// 0: Generate MD5 array as required
-#define APPLY_UNIT_TESTS 1
-
-typedef void (*AvxPredFunc)(uint8_t *dst, ptrdiff_t y_stride,
- const uint8_t *above, const uint8_t *left);
-
-const int kBPS = 64;
-const int kTotalPixels = kBPS * kBPS;
-// 4 DC variants, V, H, PAETH, SMOOTH, SMOOTH_V, SMOOTH_H
-const int kNumAv1IntraFuncs = 10;
-
-#if APPLY_UNIT_TESTS
-const char *kAv1IntraPredNames[kNumAv1IntraFuncs] = {
- "DC_PRED", "DC_LEFT_PRED", "DC_TOP_PRED", "DC_128_PRED", "V_PRED",
- "H_PRED", "PAETH_PRED", "SMOOTH_PRED", "SMOOTH_V_PRED", "SMOOTH_H_PRED",
-};
-#endif // APPLY_UNIT_TESTS
-
-template <typename Pixel>
-struct IntraPredTestMem {
- void Init(int block_width, int block_height, int bd) {
- ASSERT_LE(block_width, kBPS);
- ASSERT_LE(block_height, kBPS);
- // Note: for blocks having width <= 32 and height <= 32, we generate 32x32
- // random pixels as before to avoid having to recalculate all hashes again.
- const int block_size_upto_32 = (block_width <= 32) && (block_height <= 32);
- stride = block_size_upto_32 ? 32 : kBPS;
- num_pixels = stride * stride;
- libaom_test::ACMRandom rnd(libaom_test::ACMRandom::DeterministicSeed());
- above = above_mem + 16;
- const int mask = (1 << bd) - 1;
- for (int i = 0; i < num_pixels; ++i) ref_src[i] = rnd.Rand16() & mask;
- for (int i = 0; i < stride; ++i) left[i] = rnd.Rand16() & mask;
- for (int i = -1; i < stride; ++i) above[i] = rnd.Rand16() & mask;
-
- for (int i = stride; i < 2 * stride; ++i) {
- left[i] = rnd.Rand16() & mask;
- above[i] = rnd.Rand16() & mask;
- }
- }
-
- DECLARE_ALIGNED(16, Pixel, src[kTotalPixels]);
- DECLARE_ALIGNED(16, Pixel, ref_src[kTotalPixels]);
- DECLARE_ALIGNED(16, Pixel, left[2 * kBPS]);
- Pixel *above;
- int stride;
- int num_pixels;
-
- private:
- DECLARE_ALIGNED(16, Pixel, above_mem[2 * kBPS + 16]);
-};
-
-// -----------------------------------------------------------------------------
-// Low Bittdepth
-
-typedef IntraPredTestMem<uint8_t> Av1IntraPredTestMem;
-
-static const char *const kTxSizeStrings[TX_SIZES_ALL] = {
- "4X4", "8X8", "16X16", "32X32", "64X64", "4X8", "8X4",
- "8X16", "16X8", "16X32", "32X16", "32X64", "64X32", "4X16",
- "16X4", "8X32", "32X8", "16X64", "64X16",
-};
-
-void CheckMd5Signature(TX_SIZE tx_size, bool is_hbd,
- const char *const signatures[], const void *data,
- size_t data_size, int elapsed_time, int idx) {
- const std::string hbd_str = is_hbd ? "Hbd " : "";
- const std::string name_str = hbd_str + "Intra" + kTxSizeStrings[tx_size];
- libaom_test::MD5 md5;
- md5.Add(reinterpret_cast<const uint8_t *>(data), data_size);
-#if APPLY_UNIT_TESTS
- printf("Mode %s[%13s]: %5d ms MD5: %s\n", name_str.c_str(),
- kAv1IntraPredNames[idx], elapsed_time, md5.Get());
- EXPECT_STREQ(signatures[idx], md5.Get());
-#else
- (void)signatures;
- (void)elapsed_time;
- (void)idx;
- printf("\"%s\",\n", md5.Get());
-#endif
-}
-
-void TestIntraPred(TX_SIZE tx_size, AvxPredFunc const *pred_funcs,
- const char *const signatures[]) {
- const int block_width = tx_size_wide[tx_size];
- const int block_height = tx_size_high[tx_size];
- const int num_pixels_per_test =
- block_width * block_height * kNumAv1IntraFuncs;
- const int kNumTests = static_cast<int>(2.e10 / num_pixels_per_test);
- Av1IntraPredTestMem intra_pred_test_mem;
- intra_pred_test_mem.Init(block_width, block_height, 8);
-
- for (int k = 0; k < kNumAv1IntraFuncs; ++k) {
- if (pred_funcs[k] == NULL) continue;
- memcpy(intra_pred_test_mem.src, intra_pred_test_mem.ref_src,
- sizeof(intra_pred_test_mem.src));
- aom_usec_timer timer;
- aom_usec_timer_start(&timer);
- for (int num_tests = 0; num_tests < kNumTests; ++num_tests) {
- pred_funcs[k](intra_pred_test_mem.src, intra_pred_test_mem.stride,
- intra_pred_test_mem.above, intra_pred_test_mem.left);
- }
- libaom_test::ClearSystemState();
- aom_usec_timer_mark(&timer);
- const int elapsed_time =
- static_cast<int>(aom_usec_timer_elapsed(&timer) / 1000);
- CheckMd5Signature(
- tx_size, false, signatures, intra_pred_test_mem.src,
- intra_pred_test_mem.num_pixels * sizeof(*intra_pred_test_mem.src),
- elapsed_time, k);
- }
-}
-
-static const char *const kSignatures[TX_SIZES_ALL][kNumAv1IntraFuncs] = {
- {
- // 4X4
- "e7ed7353c3383fff942e500e9bfe82fe",
- "2a4a26fcc6ce005eadc08354d196c8a9",
- "269d92eff86f315d9c38fe7640d85b15",
- "ae2960eea9f71ee3dabe08b282ec1773",
- "6c1abcc44e90148998b51acd11144e9c",
- "f7bb3186e1ef8a2b326037ff898cad8e",
- "59fc0e923a08cfac0a493fb38988e2bb",
- "9ff8bb37d9c830e6ab8ecb0c435d3c91",
- "de6937fca02354f2874dbc5dbec5d5b3",
- "723cf948137f7d8c7860d814e55ae67d",
- },
- {
- // 8X8
- "d8bbae5d6547cfc17e4f5f44c8730e88",
- "373bab6d931868d41a601d9d88ce9ac3",
- "6fdd5ff4ff79656c14747598ca9e3706",
- "d9661c2811d6a73674f40ffb2b841847",
- "7c722d10b19ccff0b8c171868e747385",
- "f81dd986eb2b50f750d3a7da716b7e27",
- "064404361748dd111a890a1470d7f0ea",
- "dc29b7e1f78cc8e7525d5ea4c0ab9b78",
- "97111eb1bc26bade6272015df829f1ae",
- "d19a8a73cc46b807f2c5e817576cc1e1",
- },
- {
- // 16X16
- "50971c07ce26977d30298538fffec619",
- "527a6b9e0dc5b21b98cf276305432bef",
- "7eff2868f80ebc2c43a4f367281d80f7",
- "67cd60512b54964ef6aff1bd4816d922",
- "48371c87dc95c08a33b2048f89cf6468",
- "b0acf2872ee411d7530af6d2625a7084",
- "93d6b5352b571805ab16a55e1bbed86a",
- "03764e4c0aebbc180e4e2c68fb06df2b",
- "bb6c74c9076c9f266ab11fb57060d8e6",
- "0c5162bc28489756ddb847b5678e6f07",
- },
- {
- // 32X32
- "a0a618c900e65ae521ccc8af789729f2",
- "985aaa7c72b4a6c2fb431d32100cf13a",
- "10662d09febc3ca13ee4e700120daeb5",
- "b3b01379ba08916ef6b1b35f7d9ad51c",
- "9f4261755795af97e34679c333ec7004",
- "bc2c9da91ad97ef0d1610fb0a9041657",
- "ef1653982b69e1f64bee3759f3e1ec45",
- "1a51a675deba2c83282142eb48d3dc3d",
- "866c224746dc260cda861a7b1b383fb3",
- "cea23799fc3526e1b6a6ff02b42b82af",
- },
- {
- // 64X64
- "6e1094fa7b50bc813aa2ba29f5df8755",
- "afe020786b83b793c2bbd9468097ff6e",
- "be91585259bc37bf4dc1651936e90b3e",
- "a1650dbcd56e10288c3e269eca37967d",
- "9e5c34f3797e0cdd3cd9d4c05b0d8950",
- "bc87be7ac899cc6a28f399d7516c49fe",
- "9811fd0d2dd515f06122f5d1bd18b784",
- "3c140e466f2c2c0d9cb7d2157ab8dc27",
- "9543de76c925a8f6adc884cc7f98dc91",
- "df1df0376cc944afe7e74e94f53e575a",
- },
- {
- // 4X8
- "d9fbebdc85f71ab1e18461b2db4a2adc",
- "5ccb2a68284bc9714d94b8a06ccadbb2",
- "735d059abc2744f3ff3f9590f7191b37",
- "d9fbebdc85f71ab1e18461b2db4a2adc",
- "6819497c44cd0ace120add83672996ee",
- "7e3244f5a2d3edf81c7e962a842b97f9",
- "809350f164cd4d1650850bb0f59c3260",
- "1b60a394331eeab6927a6f8aaff57040",
- "5307de1bd7329ba6b281d2c1b0b457f9",
- "24c58a8138339846d95568efb91751db",
- },
- {
- // 8X4
- "23f9fc11344426c9bee2e06d57dfd628",
- "2d71a26d1bae1fb34734de7b42fc5eb7",
- "5af9c1b2fd9d5721fad67b67b3f7c816",
- "00d71b17be662753813d515f197d145e",
- "bef10ec984427e28f4390f43809d10af",
- "77773cdfb7ed6bc882ab202a64b0a470",
- "2cc48bd66d6b0121b5221d52ccd732af",
- "b302155e1c9eeeafe2ba2bf68e807a46",
- "561bc8d0e76d5041ebd5168fc6a115e1",
- "81d0113fb1d0a9a24ffd6f1987b77948",
- },
- {
- // 8X16
- "c849de88b24f773dfcdd1d48d1209796",
- "6cb807c1897b94866a0f3d3c56ed8695",
- "d56db05a8ac7981762f5b877f486c4ef",
- "b4bc01eb6e59a40922ad17715cafb04b",
- "09d178439534f4062ae687c351f66d64",
- "644501399cf73080ac606e5cef7ca09b",
- "278076495180e17c065a95ab7278539a",
- "9dd7f324816f242be408ffeb0c673732",
- "f520c4a20acfa0bea1d253c6f0f040fd",
- "85f38df809df2c2d7c8b4a157a65cd44",
- },
- {
- // 16X8
- "b4cbdbdf10ce13300b4063a3daf99e04",
- "3731e1e6202064a9d0604d7c293ecee4",
- "6c856188c4256a06452f0d5d70cac436",
- "1f2192b4c8c497589484ea7bf9c944e8",
- "84011bd4b7f565119d06787840e333a0",
- "0e48949f7a6aa36f0d76b5d01f91124a",
- "60eff8064634b6c73b10681356baeee9",
- "1559aeb081a9c0c71111d6093c2ff9fd",
- "c15479b739713773e5cabb748451987b",
- "72e33ec12c9b67aea26d8d005fb82de2",
- },
- {
- // 16X32
- "abe5233d189cdbf79424721571bbaa7b",
- "282759f81e3cfb2e2d396fe406b72a8b",
- "e2224926c264f6f174cbc3167a233168",
- "6814e85c2b33f8c9415d62e80394b47b",
- "99cbbb60459c08a3061d72c4e4f6276a",
- "1d1567d40b8e816f8c1f71e576fe0f87",
- "36fdd371b624a075814d497c4832ec85",
- "8ab8da61b727442b6ff692b40d0df018",
- "e35a10ad7fdf2327e821504a90f6a6eb",
- "1f7211e727dc1de7d6a55d082fbdd821",
- },
- {
- // 32X16
- "d1aeb8d5fdcfd3307922af01a798a4dc",
- "b0bcb514ebfbee065faea9d34c12ae75",
- "d6a18c63b4e909871c0137ca652fad23",
- "fd047f2fc1b8ffb95d0eeef3e8796a45",
- "645ab60779ea348fd93c81561c31bab9",
- "4409633c9db8dff41ade4292a3a56e7f",
- "5e36a11e069b31c2a739f3a9c7b37c24",
- "e83b9483d702cfae496991c3c7fa92c0",
- "12f6ddf98c7f30a277307f1ea935b030",
- "354321d6c32bbdb0739e4fa2acbf41e1",
- },
- {
- // 32X64
- "0ce332b343934b34cd4417725faa85cb",
- "4e2a2cfd8f56f15939bdfc753145b303",
- "0f46d124ba9f48cdd5d5290acf786d6d",
- "e1e8ed803236367821981500a3d9eebe",
- "1d2f8e48e3adb7c448be05d9f66f4954",
- "9fb2e176636a5689b26f73ca73fcc512",
- "e720ebccae7e25e36f23da53ae5b5d6a",
- "86fe4364734169aaa4520d799890d530",
- "b1870290764bb1b100d1974e2bd70f1d",
- "ce5b238e19d85ef69d85badfab4e63ae",
- },
- {
- // 64X32
- "a6c5aeb722615089efbca80b02951ceb",
- "538424b24bd0830f21788e7238ca762f",
- "80c15b303235f9bc2259027bb92dfdc4",
- "e48e1ac15e97191a8fda08d62fff343e",
- "12604b37875533665078405ef4582e35",
- "0048afa17bd3e1632d68b96048836530",
- "07a0cfcb56a5eed50c4bd6c26814336b",
- "529d8a070de5bc6531fa3ee8f450c233",
- "33c50a11c7d78f72434064f634305e95",
- "e0ef7f0559c1a50ec5a8c12011b962f7",
- },
- {
- // 4X16
- "750491056568eb8fe15387b86bdf06b8",
- "3a52dae9f599f08cfb3bd1b910dc0e11",
- "af79f71e3e03dbeca44e2e13561f70c7",
- "ca7dfd7624afc0c06fb5552f44398535",
- "b591af115444bf43140c29c269f68fb2",
- "483d942ae36e69e62f31eb215331416f",
- "f14b58525e81870bc5d95c7ac71a347f",
- "371208bb4027d9badb04095d1590bbc4",
- "c7049c21b2924d70c7c12784d6b6b796",
- "7d87233f4b5b0f12086045e5d7b2d4c2",
- },
- {
- // 16X4
- "7c6e325a65e77e732b3adbe237e045e4",
- "24478f93ffcec47852e004d0fe948464",
- "258d042c67d4ba3ecfa667f0adc9aebf",
- "b2cd21d06959f159a1f3c4d9768ee7fb",
- "b4e1f38157bf8410e7c3da02f687a343",
- "869e703729eb0fc0711c254944ff5d5a",
- "9638dd77105a640b146a8201ea7a0801",
- "919d932c6af8a1cc7486e8ce996dd487",
- "e1c9be493b6714c7ae48f30044c43140",
- "bf0fe3889d654b2f6eb98c8fc751f9e4",
- },
- {
- // 8X32
- "8dfac4319fe0bd40013ffb3102da8c72",
- "feb46b6dc4e2ca0a09533bfc51d4dcb0",
- "850837ec714c37262216527aaf4cbbe9",
- "4603c7800fb08361f163daca876e8bda",
- "1ff95e7d2debc27b05806fb25abfd624",
- "d81b9a51a062b23ca7823804cb7bec22",
- "f1d8978158766f46335203608cb807e7",
- "f3527096256258c0878d644a9d7d53ca",
- "cbde98ac8b009953eb112807ad2ea29e",
- "654fb1153415747feae599f538122af5",
- },
- {
- // 32X8
- "3d4ee16fab374357474f60b845327bc7",
- "bc17c5059473a476df4e85f56395ad55",
- "3d4ee16fab374357474f60b845327bc7",
- "c14b8db34dc2355b84e3735c9ba16c7f",
- "a71d25b5d47a92a8b9223c98f18458ee",
- "6c1cfe2b1893f4576a80675687cb6426",
- "92d11bbef8b85bb48d799bb055de3514",
- "bcf81d1db8ae5cc03360467f44f498ec",
- "79f8c564163555592e808e145eaf5c60",
- "46fff139cef2ef773938bcc8b0e5abb8",
- },
- {
- // 16X64
- "3b2a053ee8b05a8ac35ad23b0422a151",
- "12b0c69595328c465e0b25e0c9e3e9fc",
- "f77c544ac8035e01920deae40cee7b07",
- "727797ef15ccd8d325476fe8f12006a3",
- "f3be77c0fe67eb5d9d515e92bec21eb7",
- "f1ece6409e01e9dd98b800d49628247d",
- "efd2ec9bfbbd4fd1f6604ea369df1894",
- "ec703de918422b9e03197ba0ed60a199",
- "739418efb89c07f700895deaa5d0b3e3",
- "9943ae1bbeeebfe1d3a92dc39e049d63",
- },
- {
- // 64X16
- "821b76b1494d4f84d20817840f719a1a",
- "69e462c3338a9aaf993c3f7cfbc15649",
- "516d8f6eb054d74d150e7b444185b6b9",
- "de1b736e9d99129609d6ef3a491507a0",
- "fd9b4276e7affe1e0e4ce4f428058994",
- "cd82fd361a4767ac29a9f406b480b8f3",
- "2792c2f810157a4a6cb13c28529ff779",
- "1220442d90c4255ba0969d28b91e93a6",
- "c7253e10b45f7f67dfee3256c9b94825",
- "879792198071c7e0b50b9b5010d8c18f",
- },
-};
-
-} // namespace
-
-// Defines a test case for |arch| (e.g., C, SSE2, ...) passing the predictors
-// to TestIntraPred. The test name is 'arch.TestIntraPred_tx_size', e.g.,
-// C.TestIntraPred.0
-#define INTRA_PRED_TEST(arch, tx_size, dc, dc_left, dc_top, dc_128, v, h, \
- paeth, smooth, smooth_v, smooth_h) \
- TEST(arch, DISABLED_##TestIntraPred_##tx_size) { \
- static const AvxPredFunc aom_intra_pred[] = { \
- dc, dc_left, dc_top, dc_128, v, h, paeth, smooth, smooth_v, smooth_h \
- }; \
- TestIntraPred(tx_size, aom_intra_pred, kSignatures[tx_size]); \
- }
-
-// -----------------------------------------------------------------------------
-// 4x4, 4x8, 4x16
-
-INTRA_PRED_TEST(C_1, TX_4X4, aom_dc_predictor_4x4_c,
- aom_dc_left_predictor_4x4_c, aom_dc_top_predictor_4x4_c,
- aom_dc_128_predictor_4x4_c, aom_v_predictor_4x4_c,
- aom_h_predictor_4x4_c, aom_paeth_predictor_4x4_c,
- aom_smooth_predictor_4x4_c, aom_smooth_v_predictor_4x4_c,
- aom_smooth_h_predictor_4x4_c)
-
-INTRA_PRED_TEST(C_2, TX_4X8, aom_dc_predictor_4x8_c,
- aom_dc_left_predictor_4x8_c, aom_dc_top_predictor_4x8_c,
- aom_dc_128_predictor_4x8_c, aom_v_predictor_4x8_c,
- aom_h_predictor_4x8_c, aom_paeth_predictor_4x8_c,
- aom_smooth_predictor_4x8_c, aom_smooth_v_predictor_4x8_c,
- aom_smooth_h_predictor_4x8_c)
-
-INTRA_PRED_TEST(C_3, TX_4X16, aom_dc_predictor_4x16_c,
- aom_dc_left_predictor_4x16_c, aom_dc_top_predictor_4x16_c,
- aom_dc_128_predictor_4x16_c, aom_v_predictor_4x16_c,
- aom_h_predictor_4x16_c, aom_paeth_predictor_4x16_c,
- aom_smooth_predictor_4x16_c, aom_smooth_v_predictor_4x16_c,
- aom_smooth_h_predictor_4x16_c)
-
-#if HAVE_SSE2
-INTRA_PRED_TEST(SSE2_1, TX_4X4, aom_dc_predictor_4x4_sse2,
- aom_dc_left_predictor_4x4_sse2, aom_dc_top_predictor_4x4_sse2,
- aom_dc_128_predictor_4x4_sse2, aom_v_predictor_4x4_sse2,
- aom_h_predictor_4x4_sse2, NULL, NULL, NULL, NULL)
-INTRA_PRED_TEST(SSE2_2, TX_4X8, aom_dc_predictor_4x8_sse2,
- aom_dc_left_predictor_4x8_sse2, aom_dc_top_predictor_4x8_sse2,
- aom_dc_128_predictor_4x8_sse2, aom_v_predictor_4x8_sse2,
- aom_h_predictor_4x8_sse2, NULL, NULL, NULL, NULL)
-INTRA_PRED_TEST(SSE2_3, TX_4X16, aom_dc_predictor_4x16_sse2,
- aom_dc_left_predictor_4x16_sse2, aom_dc_top_predictor_4x16_sse2,
- aom_dc_128_predictor_4x16_sse2, aom_v_predictor_4x16_sse2,
- aom_h_predictor_4x16_sse2, NULL, NULL, NULL, NULL)
-#endif // HAVE_SSE2
-
-#if HAVE_SSSE3
-INTRA_PRED_TEST(SSSE3_1, TX_4X4, NULL, NULL, NULL, NULL, NULL, NULL,
- aom_paeth_predictor_4x4_ssse3, aom_smooth_predictor_4x4_ssse3,
- aom_smooth_v_predictor_4x4_ssse3,
- aom_smooth_h_predictor_4x4_ssse3)
-INTRA_PRED_TEST(SSSE3_2, TX_4X8, NULL, NULL, NULL, NULL, NULL, NULL,
- aom_paeth_predictor_4x8_ssse3, aom_smooth_predictor_4x8_ssse3,
- aom_smooth_v_predictor_4x8_ssse3,
- aom_smooth_h_predictor_4x8_ssse3)
-INTRA_PRED_TEST(SSSE3_3, TX_4X16, NULL, NULL, NULL, NULL, NULL, NULL,
- aom_paeth_predictor_4x16_ssse3, aom_smooth_predictor_4x16_ssse3,
- aom_smooth_v_predictor_4x16_ssse3,
- aom_smooth_h_predictor_4x16_ssse3)
-#endif // HAVE_SSSE3
-
-#if HAVE_DSPR2
-INTRA_PRED_TEST(DSPR2, TX_4X4, aom_dc_predictor_4x4_dspr2, NULL, NULL, NULL,
- NULL, aom_h_predictor_4x4_dspr2, NULL, NULL, NULL, NULL)
-#endif // HAVE_DSPR2
-
-#if HAVE_NEON
-INTRA_PRED_TEST(NEON, TX_4X4, aom_dc_predictor_4x4_neon,
- aom_dc_left_predictor_4x4_neon, aom_dc_top_predictor_4x4_neon,
- aom_dc_128_predictor_4x4_neon, aom_v_predictor_4x4_neon,
- aom_h_predictor_4x4_neon, NULL, NULL, NULL, NULL)
-#endif // HAVE_NEON
-
-#if HAVE_MSA
-INTRA_PRED_TEST(MSA, TX_4X4, aom_dc_predictor_4x4_msa,
- aom_dc_left_predictor_4x4_msa, aom_dc_top_predictor_4x4_msa,
- aom_dc_128_predictor_4x4_msa, aom_v_predictor_4x4_msa,
- aom_h_predictor_4x4_msa, NULL, NULL, NULL, NULL)
-#endif // HAVE_MSA
-
-// -----------------------------------------------------------------------------
-// 8x8, 8x4, 8x16, 8x32
-
-INTRA_PRED_TEST(C_1, TX_8X8, aom_dc_predictor_8x8_c,
- aom_dc_left_predictor_8x8_c, aom_dc_top_predictor_8x8_c,
- aom_dc_128_predictor_8x8_c, aom_v_predictor_8x8_c,
- aom_h_predictor_8x8_c, aom_paeth_predictor_8x8_c,
- aom_smooth_predictor_8x8_c, aom_smooth_v_predictor_8x8_c,
- aom_smooth_h_predictor_8x8_c)
-
-INTRA_PRED_TEST(C_2, TX_8X4, aom_dc_predictor_8x4_c,
- aom_dc_left_predictor_8x4_c, aom_dc_top_predictor_8x4_c,
- aom_dc_128_predictor_8x4_c, aom_v_predictor_8x4_c,
- aom_h_predictor_8x4_c, aom_paeth_predictor_8x4_c,
- aom_smooth_predictor_8x4_c, aom_smooth_v_predictor_8x4_c,
- aom_smooth_h_predictor_8x4_c)
-
-INTRA_PRED_TEST(C_3, TX_8X16, aom_dc_predictor_8x16_c,
- aom_dc_left_predictor_8x16_c, aom_dc_top_predictor_8x16_c,
- aom_dc_128_predictor_8x16_c, aom_v_predictor_8x16_c,
- aom_h_predictor_8x16_c, aom_paeth_predictor_8x16_c,
- aom_smooth_predictor_8x16_c, aom_smooth_v_predictor_8x16_c,
- aom_smooth_h_predictor_8x16_c)
-
-INTRA_PRED_TEST(C_4, TX_8X32, aom_dc_predictor_8x32_c,
- aom_dc_left_predictor_8x32_c, aom_dc_top_predictor_8x32_c,
- aom_dc_128_predictor_8x32_c, aom_v_predictor_8x32_c,
- aom_h_predictor_8x32_c, aom_paeth_predictor_8x32_c,
- aom_smooth_predictor_8x32_c, aom_smooth_v_predictor_8x32_c,
- aom_smooth_h_predictor_8x32_c)
-
-#if HAVE_SSE2
-INTRA_PRED_TEST(SSE2_1, TX_8X8, aom_dc_predictor_8x8_sse2,
- aom_dc_left_predictor_8x8_sse2, aom_dc_top_predictor_8x8_sse2,
- aom_dc_128_predictor_8x8_sse2, aom_v_predictor_8x8_sse2,
- aom_h_predictor_8x8_sse2, NULL, NULL, NULL, NULL)
-INTRA_PRED_TEST(SSE2_2, TX_8X4, aom_dc_predictor_8x4_sse2,
- aom_dc_left_predictor_8x4_sse2, aom_dc_top_predictor_8x4_sse2,
- aom_dc_128_predictor_8x4_sse2, aom_v_predictor_8x4_sse2,
- aom_h_predictor_8x4_sse2, NULL, NULL, NULL, NULL)
-INTRA_PRED_TEST(SSE2_3, TX_8X16, aom_dc_predictor_8x16_sse2,
- aom_dc_left_predictor_8x16_sse2, aom_dc_top_predictor_8x16_sse2,
- aom_dc_128_predictor_8x16_sse2, aom_v_predictor_8x16_sse2,
- aom_h_predictor_8x16_sse2, NULL, NULL, NULL, NULL)
-INTRA_PRED_TEST(SSE2_4, TX_8X32, aom_dc_predictor_8x32_sse2,
- aom_dc_left_predictor_8x32_sse2, aom_dc_top_predictor_8x32_sse2,
- aom_dc_128_predictor_8x32_sse2, aom_v_predictor_8x32_sse2,
- aom_h_predictor_8x32_sse2, NULL, NULL, NULL, NULL)
-#endif // HAVE_SSE2
-
-#if HAVE_SSSE3
-INTRA_PRED_TEST(SSSE3_1, TX_8X8, NULL, NULL, NULL, NULL, NULL, NULL,
- aom_paeth_predictor_8x8_ssse3, aom_smooth_predictor_8x8_ssse3,
- aom_smooth_v_predictor_8x8_ssse3,
- aom_smooth_h_predictor_8x8_ssse3)
-INTRA_PRED_TEST(SSSE3_2, TX_8X4, NULL, NULL, NULL, NULL, NULL, NULL,
- aom_paeth_predictor_8x4_ssse3, aom_smooth_predictor_8x4_ssse3,
- aom_smooth_v_predictor_8x4_ssse3,
- aom_smooth_h_predictor_8x4_ssse3)
-INTRA_PRED_TEST(SSSE3_3, TX_8X16, NULL, NULL, NULL, NULL, NULL, NULL,
- aom_paeth_predictor_8x16_ssse3, aom_smooth_predictor_8x16_ssse3,
- aom_smooth_v_predictor_8x16_ssse3,
- aom_smooth_h_predictor_8x16_ssse3)
-INTRA_PRED_TEST(SSSE3_4, TX_8X32, NULL, NULL, NULL, NULL, NULL, NULL,
- aom_paeth_predictor_8x32_ssse3, aom_smooth_predictor_8x32_ssse3,
- aom_smooth_v_predictor_8x32_ssse3,
- aom_smooth_h_predictor_8x32_ssse3)
-#endif // HAVE_SSSE3
-
-#if HAVE_DSPR2
-INTRA_PRED_TEST(DSPR2, TX_8X8, aom_dc_predictor_8x8_dspr2, NULL, NULL, NULL,
- NULL, aom_h_predictor_8x8_dspr2, NULL, NULL, NULL, NULL)
-#endif // HAVE_DSPR2
-
-#if HAVE_NEON
-INTRA_PRED_TEST(NEON, TX_8X8, aom_dc_predictor_8x8_neon,
- aom_dc_left_predictor_8x8_neon, aom_dc_top_predictor_8x8_neon,
- aom_dc_128_predictor_8x8_neon, aom_v_predictor_8x8_neon,
- aom_h_predictor_8x8_neon, NULL, NULL, NULL, NULL)
-#endif // HAVE_NEON
-
-#if HAVE_MSA
-INTRA_PRED_TEST(MSA, TX_8X8, aom_dc_predictor_8x8_msa,
- aom_dc_left_predictor_8x8_msa, aom_dc_top_predictor_8x8_msa,
- aom_dc_128_predictor_8x8_msa, aom_v_predictor_8x8_msa,
- aom_h_predictor_8x8_msa, NULL, NULL, NULL, NULL)
-#endif // HAVE_MSA
-
-// -----------------------------------------------------------------------------
-// 16x16, 16x8, 16x32, 16x4, 16x64
-
-INTRA_PRED_TEST(C_1, TX_16X16, aom_dc_predictor_16x16_c,
- aom_dc_left_predictor_16x16_c, aom_dc_top_predictor_16x16_c,
- aom_dc_128_predictor_16x16_c, aom_v_predictor_16x16_c,
- aom_h_predictor_16x16_c, aom_paeth_predictor_16x16_c,
- aom_smooth_predictor_16x16_c, aom_smooth_v_predictor_16x16_c,
- aom_smooth_h_predictor_16x16_c)
-
-INTRA_PRED_TEST(C_2, TX_16X8, aom_dc_predictor_16x8_c,
- aom_dc_left_predictor_16x8_c, aom_dc_top_predictor_16x8_c,
- aom_dc_128_predictor_16x8_c, aom_v_predictor_16x8_c,
- aom_h_predictor_16x8_c, aom_paeth_predictor_16x8_c,
- aom_smooth_predictor_16x8_c, aom_smooth_v_predictor_16x8_c,
- aom_smooth_h_predictor_16x8_c)
-
-INTRA_PRED_TEST(C_3, TX_16X32, aom_dc_predictor_16x32_c,
- aom_dc_left_predictor_16x32_c, aom_dc_top_predictor_16x32_c,
- aom_dc_128_predictor_16x32_c, aom_v_predictor_16x32_c,
- aom_h_predictor_16x32_c, aom_paeth_predictor_16x32_c,
- aom_smooth_predictor_16x32_c, aom_smooth_v_predictor_16x32_c,
- aom_smooth_h_predictor_16x32_c)
-
-INTRA_PRED_TEST(C_4, TX_16X4, aom_dc_predictor_16x4_c,
- aom_dc_left_predictor_16x4_c, aom_dc_top_predictor_16x4_c,
- aom_dc_128_predictor_16x4_c, aom_v_predictor_16x4_c,
- aom_h_predictor_16x4_c, aom_paeth_predictor_16x4_c,
- aom_smooth_predictor_16x4_c, aom_smooth_v_predictor_16x4_c,
- aom_smooth_h_predictor_16x4_c)
-
-INTRA_PRED_TEST(C_5, TX_16X64, aom_dc_predictor_16x64_c,
- aom_dc_left_predictor_16x64_c, aom_dc_top_predictor_16x64_c,
- aom_dc_128_predictor_16x64_c, aom_v_predictor_16x64_c,
- aom_h_predictor_16x64_c, aom_paeth_predictor_16x64_c,
- aom_smooth_predictor_16x64_c, aom_smooth_v_predictor_16x64_c,
- aom_smooth_h_predictor_16x64_c)
-
-#if HAVE_SSE2
-INTRA_PRED_TEST(SSE2_1, TX_16X16, aom_dc_predictor_16x16_sse2,
- aom_dc_left_predictor_16x16_sse2,
- aom_dc_top_predictor_16x16_sse2,
- aom_dc_128_predictor_16x16_sse2, aom_v_predictor_16x16_sse2,
- aom_h_predictor_16x16_sse2, NULL, NULL, NULL, NULL)
-INTRA_PRED_TEST(SSE2_2, TX_16X8, aom_dc_predictor_16x8_sse2,
- aom_dc_left_predictor_16x8_sse2, aom_dc_top_predictor_16x8_sse2,
- aom_dc_128_predictor_16x8_sse2, aom_v_predictor_16x8_sse2,
- aom_h_predictor_16x8_sse2, NULL, NULL, NULL, NULL)
-INTRA_PRED_TEST(SSE2_3, TX_16X32, aom_dc_predictor_16x32_sse2,
- aom_dc_left_predictor_16x32_sse2,
- aom_dc_top_predictor_16x32_sse2,
- aom_dc_128_predictor_16x32_sse2, aom_v_predictor_16x32_sse2,
- aom_h_predictor_16x32_sse2, NULL, NULL, NULL, NULL)
-INTRA_PRED_TEST(SSE2_4, TX_16X64, aom_dc_predictor_16x64_sse2,
- aom_dc_left_predictor_16x64_sse2,
- aom_dc_top_predictor_16x64_sse2,
- aom_dc_128_predictor_16x64_sse2, aom_v_predictor_16x64_sse2,
- aom_h_predictor_16x64_sse2, NULL, NULL, NULL, NULL)
-INTRA_PRED_TEST(SSE2_5, TX_16X4, aom_dc_predictor_16x4_sse2,
- aom_dc_left_predictor_16x4_sse2, aom_dc_top_predictor_16x4_sse2,
- aom_dc_128_predictor_16x4_sse2, aom_v_predictor_16x4_sse2,
- aom_h_predictor_16x4_sse2, NULL, NULL, NULL, NULL)
-#endif // HAVE_SSE2
-
-#if HAVE_SSSE3
-INTRA_PRED_TEST(SSSE3_1, TX_16X16, NULL, NULL, NULL, NULL, NULL, NULL,
- aom_paeth_predictor_16x16_ssse3,
- aom_smooth_predictor_16x16_ssse3,
- aom_smooth_v_predictor_16x16_ssse3,
- aom_smooth_h_predictor_16x16_ssse3)
-INTRA_PRED_TEST(SSSE3_2, TX_16X8, NULL, NULL, NULL, NULL, NULL, NULL,
- aom_paeth_predictor_16x8_ssse3, aom_smooth_predictor_16x8_ssse3,
- aom_smooth_v_predictor_16x8_ssse3,
- aom_smooth_h_predictor_16x8_ssse3)
-INTRA_PRED_TEST(SSSE3_3, TX_16X32, NULL, NULL, NULL, NULL, NULL, NULL,
- aom_paeth_predictor_16x32_ssse3,
- aom_smooth_predictor_16x32_ssse3,
- aom_smooth_v_predictor_16x32_ssse3,
- aom_smooth_h_predictor_16x32_ssse3)
-INTRA_PRED_TEST(SSSE3_4, TX_16X64, NULL, NULL, NULL, NULL, NULL, NULL,
- aom_paeth_predictor_16x64_ssse3,
- aom_smooth_predictor_16x64_ssse3,
- aom_smooth_v_predictor_16x64_ssse3,
- aom_smooth_h_predictor_16x64_ssse3)
-INTRA_PRED_TEST(SSSE3_5, TX_16X4, NULL, NULL, NULL, NULL, NULL, NULL,
- aom_paeth_predictor_16x4_ssse3, aom_smooth_predictor_16x4_ssse3,
- aom_smooth_v_predictor_16x4_ssse3,
- aom_smooth_h_predictor_16x4_ssse3)
-#endif // HAVE_SSSE3
-
-#if HAVE_AVX2
-INTRA_PRED_TEST(AVX2_1, TX_16X16, NULL, NULL, NULL, NULL, NULL, NULL,
- aom_paeth_predictor_16x16_avx2, NULL, NULL, NULL)
-INTRA_PRED_TEST(AVX2_2, TX_16X8, NULL, NULL, NULL, NULL, NULL, NULL,
- aom_paeth_predictor_16x8_avx2, NULL, NULL, NULL)
-INTRA_PRED_TEST(AVX2_3, TX_16X32, NULL, NULL, NULL, NULL, NULL, NULL,
- aom_paeth_predictor_16x32_avx2, NULL, NULL, NULL)
-INTRA_PRED_TEST(AVX2_4, TX_16X64, NULL, NULL, NULL, NULL, NULL, NULL,
- aom_paeth_predictor_16x64_avx2, NULL, NULL, NULL)
-#endif // HAVE_AVX2
-
-#if HAVE_DSPR2
-INTRA_PRED_TEST(DSPR2, TX_16X16, aom_dc_predictor_16x16_dspr2, NULL, NULL, NULL,
- NULL, aom_h_predictor_16x16_dspr2, NULL, NULL, NULL, NULL)
-#endif // HAVE_DSPR2
-
-#if HAVE_NEON
-INTRA_PRED_TEST(NEON, TX_16X16, aom_dc_predictor_16x16_neon,
- aom_dc_left_predictor_16x16_neon,
- aom_dc_top_predictor_16x16_neon,
- aom_dc_128_predictor_16x16_neon, aom_v_predictor_16x16_neon,
- aom_h_predictor_16x16_neon, NULL, NULL, NULL, NULL)
-#endif // HAVE_NEON
-
-#if HAVE_MSA
-INTRA_PRED_TEST(MSA, TX_16X16, aom_dc_predictor_16x16_msa,
- aom_dc_left_predictor_16x16_msa, aom_dc_top_predictor_16x16_msa,
- aom_dc_128_predictor_16x16_msa, aom_v_predictor_16x16_msa,
- aom_h_predictor_16x16_msa, NULL, NULL, NULL, NULL)
-#endif // HAVE_MSA
-
-// -----------------------------------------------------------------------------
-// 32x32, 32x16, 32x64, 32x8
-
-INTRA_PRED_TEST(C_1, TX_32X32, aom_dc_predictor_32x32_c,
- aom_dc_left_predictor_32x32_c, aom_dc_top_predictor_32x32_c,
- aom_dc_128_predictor_32x32_c, aom_v_predictor_32x32_c,
- aom_h_predictor_32x32_c, aom_paeth_predictor_32x32_c,
- aom_smooth_predictor_32x32_c, aom_smooth_v_predictor_32x32_c,
- aom_smooth_h_predictor_32x32_c)
-
-INTRA_PRED_TEST(C_2, TX_32X16, aom_dc_predictor_32x16_c,
- aom_dc_left_predictor_32x16_c, aom_dc_top_predictor_32x16_c,
- aom_dc_128_predictor_32x16_c, aom_v_predictor_32x16_c,
- aom_h_predictor_32x16_c, aom_paeth_predictor_32x16_c,
- aom_smooth_predictor_32x16_c, aom_smooth_v_predictor_32x16_c,
- aom_smooth_h_predictor_32x16_c)
-
-INTRA_PRED_TEST(C_3, TX_32X64, aom_dc_predictor_32x64_c,
- aom_dc_left_predictor_32x64_c, aom_dc_top_predictor_32x64_c,
- aom_dc_128_predictor_32x64_c, aom_v_predictor_32x64_c,
- aom_h_predictor_32x64_c, aom_paeth_predictor_32x64_c,
- aom_smooth_predictor_32x64_c, aom_smooth_v_predictor_32x64_c,
- aom_smooth_h_predictor_32x64_c)
-
-INTRA_PRED_TEST(C_4, TX_32X8, aom_dc_predictor_32x8_c,
- aom_dc_left_predictor_32x8_c, aom_dc_top_predictor_32x8_c,
- aom_dc_128_predictor_32x8_c, aom_v_predictor_32x8_c,
- aom_h_predictor_32x8_c, aom_paeth_predictor_32x8_c,
- aom_smooth_predictor_32x8_c, aom_smooth_v_predictor_32x8_c,
- aom_smooth_h_predictor_32x8_c)
-
-#if HAVE_SSE2
-INTRA_PRED_TEST(SSE2_1, TX_32X32, aom_dc_predictor_32x32_sse2,
- aom_dc_left_predictor_32x32_sse2,
- aom_dc_top_predictor_32x32_sse2,
- aom_dc_128_predictor_32x32_sse2, aom_v_predictor_32x32_sse2,
- aom_h_predictor_32x32_sse2, NULL, NULL, NULL, NULL)
-INTRA_PRED_TEST(SSE2_2, TX_32X16, aom_dc_predictor_32x16_sse2,
- aom_dc_left_predictor_32x16_sse2,
- aom_dc_top_predictor_32x16_sse2,
- aom_dc_128_predictor_32x16_sse2, aom_v_predictor_32x16_sse2,
- aom_h_predictor_32x16_sse2, NULL, NULL, NULL, NULL)
-INTRA_PRED_TEST(SSE2_3, TX_32X64, aom_dc_predictor_32x64_sse2,
- aom_dc_left_predictor_32x64_sse2,
- aom_dc_top_predictor_32x64_sse2,
- aom_dc_128_predictor_32x64_sse2, aom_v_predictor_32x64_sse2,
- aom_h_predictor_32x64_sse2, NULL, NULL, NULL, NULL)
-INTRA_PRED_TEST(SSE2_4, TX_32X8, aom_dc_predictor_32x8_sse2,
- aom_dc_left_predictor_32x8_sse2, aom_dc_top_predictor_32x8_sse2,
- aom_dc_128_predictor_32x8_sse2, aom_v_predictor_32x8_sse2,
- aom_h_predictor_32x8_sse2, NULL, NULL, NULL, NULL)
-#endif // HAVE_SSE2
-
-#if HAVE_SSSE3
-INTRA_PRED_TEST(SSSE3_1, TX_32X32, NULL, NULL, NULL, NULL, NULL, NULL,
- aom_paeth_predictor_32x32_ssse3,
- aom_smooth_predictor_32x32_ssse3,
- aom_smooth_v_predictor_32x32_ssse3,
- aom_smooth_h_predictor_32x32_ssse3)
-INTRA_PRED_TEST(SSSE3_2, TX_32X16, NULL, NULL, NULL, NULL, NULL, NULL,
- aom_paeth_predictor_32x16_ssse3,
- aom_smooth_predictor_32x16_ssse3,
- aom_smooth_v_predictor_32x16_ssse3,
- aom_smooth_h_predictor_32x16_ssse3)
-INTRA_PRED_TEST(SSSE3_3, TX_32X64, NULL, NULL, NULL, NULL, NULL, NULL,
- aom_paeth_predictor_32x64_ssse3,
- aom_smooth_predictor_32x64_ssse3,
- aom_smooth_v_predictor_32x64_ssse3,
- aom_smooth_h_predictor_32x64_ssse3)
-INTRA_PRED_TEST(SSSE3_4, TX_32X8, NULL, NULL, NULL, NULL, NULL, NULL,
- aom_paeth_predictor_32x8_ssse3, aom_smooth_predictor_32x8_ssse3,
- aom_smooth_v_predictor_32x8_ssse3,
- aom_smooth_h_predictor_32x8_ssse3)
-#endif // HAVE_SSSE3
-
-#if HAVE_AVX2
-INTRA_PRED_TEST(AVX2_1, TX_32X32, aom_dc_predictor_32x32_avx2,
- aom_dc_left_predictor_32x32_avx2,
- aom_dc_top_predictor_32x32_avx2,
- aom_dc_128_predictor_32x32_avx2, aom_v_predictor_32x32_avx2,
- aom_h_predictor_32x32_avx2, aom_paeth_predictor_32x32_avx2,
- NULL, NULL, NULL)
-INTRA_PRED_TEST(AVX2_2, TX_32X16, aom_dc_predictor_32x16_avx2,
- aom_dc_left_predictor_32x16_avx2,
- aom_dc_top_predictor_32x16_avx2,
- aom_dc_128_predictor_32x16_avx2, aom_v_predictor_32x16_avx2,
- NULL, aom_paeth_predictor_32x16_avx2, NULL, NULL, NULL)
-INTRA_PRED_TEST(AVX2_3, TX_32X64, aom_dc_predictor_32x64_avx2,
- aom_dc_left_predictor_32x64_avx2,
- aom_dc_top_predictor_32x64_avx2,
- aom_dc_128_predictor_32x64_avx2, aom_v_predictor_32x64_avx2,
- NULL, aom_paeth_predictor_32x64_avx2, NULL, NULL, NULL)
-#endif // HAVE_AVX2
-
-#if HAVE_NEON
-INTRA_PRED_TEST(NEON, TX_32X32, aom_dc_predictor_32x32_neon,
- aom_dc_left_predictor_32x32_neon,
- aom_dc_top_predictor_32x32_neon,
- aom_dc_128_predictor_32x32_neon, aom_v_predictor_32x32_neon,
- aom_h_predictor_32x32_neon, NULL, NULL, NULL, NULL)
-#endif // HAVE_NEON
-
-#if HAVE_MSA
-INTRA_PRED_TEST(MSA, TX_32X32, aom_dc_predictor_32x32_msa,
- aom_dc_left_predictor_32x32_msa, aom_dc_top_predictor_32x32_msa,
- aom_dc_128_predictor_32x32_msa, aom_v_predictor_32x32_msa,
- aom_h_predictor_32x32_msa, NULL, NULL, NULL, NULL)
-#endif // HAVE_MSA
-
-// -----------------------------------------------------------------------------
-// 64x64, 64x32, 64x16
-
-INTRA_PRED_TEST(C_1, TX_64X64, aom_dc_predictor_64x64_c,
- aom_dc_left_predictor_64x64_c, aom_dc_top_predictor_64x64_c,
- aom_dc_128_predictor_64x64_c, aom_v_predictor_64x64_c,
- aom_h_predictor_64x64_c, aom_paeth_predictor_64x64_c,
- aom_smooth_predictor_64x64_c, aom_smooth_v_predictor_64x64_c,
- aom_smooth_h_predictor_64x64_c)
-
-INTRA_PRED_TEST(C_2, TX_64X32, aom_dc_predictor_64x32_c,
- aom_dc_left_predictor_64x32_c, aom_dc_top_predictor_64x32_c,
- aom_dc_128_predictor_64x32_c, aom_v_predictor_64x32_c,
- aom_h_predictor_64x32_c, aom_paeth_predictor_64x32_c,
- aom_smooth_predictor_64x32_c, aom_smooth_v_predictor_64x32_c,
- aom_smooth_h_predictor_64x32_c)
-
-INTRA_PRED_TEST(C_3, TX_64X16, aom_dc_predictor_64x16_c,
- aom_dc_left_predictor_64x16_c, aom_dc_top_predictor_64x16_c,
- aom_dc_128_predictor_64x16_c, aom_v_predictor_64x16_c,
- aom_h_predictor_64x16_c, aom_paeth_predictor_64x16_c,
- aom_smooth_predictor_64x16_c, aom_smooth_v_predictor_64x16_c,
- aom_smooth_h_predictor_64x16_c)
-
-#if HAVE_SSE2
-INTRA_PRED_TEST(SSE2_4, TX_64X64, aom_dc_predictor_64x64_sse2,
- aom_dc_left_predictor_64x64_sse2,
- aom_dc_top_predictor_64x64_sse2,
- aom_dc_128_predictor_64x64_sse2, aom_v_predictor_64x64_sse2,
- aom_h_predictor_64x64_sse2, NULL, NULL, NULL, NULL)
-INTRA_PRED_TEST(SSE2_5, TX_64X32, aom_dc_predictor_64x32_sse2,
- aom_dc_left_predictor_64x32_sse2,
- aom_dc_top_predictor_64x32_sse2,
- aom_dc_128_predictor_64x32_sse2, aom_v_predictor_64x32_sse2,
- aom_h_predictor_64x32_sse2, NULL, NULL, NULL, NULL)
-INTRA_PRED_TEST(SSE2_6, TX_64X16, aom_dc_predictor_64x16_sse2,
- aom_dc_left_predictor_64x16_sse2,
- aom_dc_top_predictor_64x16_sse2,
- aom_dc_128_predictor_64x16_sse2, aom_v_predictor_64x16_sse2,
- aom_h_predictor_64x16_sse2, NULL, NULL, NULL, NULL)
-#endif
-
-#if HAVE_SSSE3
-INTRA_PRED_TEST(SSSE3_4, TX_64X64, NULL, NULL, NULL, NULL, NULL, NULL,
- aom_paeth_predictor_64x64_ssse3,
- aom_smooth_predictor_64x64_ssse3,
- aom_smooth_v_predictor_64x64_ssse3,
- aom_smooth_h_predictor_64x64_ssse3)
-INTRA_PRED_TEST(SSSE3_5, TX_64X32, NULL, NULL, NULL, NULL, NULL, NULL,
- aom_paeth_predictor_64x32_ssse3,
- aom_smooth_predictor_64x32_ssse3,
- aom_smooth_v_predictor_64x32_ssse3,
- aom_smooth_h_predictor_64x32_ssse3)
-INTRA_PRED_TEST(SSSE3_6, TX_64X16, NULL, NULL, NULL, NULL, NULL, NULL,
- aom_paeth_predictor_64x16_ssse3,
- aom_smooth_predictor_64x16_ssse3,
- aom_smooth_v_predictor_64x16_ssse3,
- aom_smooth_h_predictor_64x16_ssse3)
-#endif
-
-#if HAVE_AVX2
-INTRA_PRED_TEST(AVX2_4, TX_64X64, aom_dc_predictor_64x64_avx2,
- aom_dc_left_predictor_64x64_avx2,
- aom_dc_top_predictor_64x64_avx2,
- aom_dc_128_predictor_64x64_avx2, aom_v_predictor_64x64_avx2,
- NULL, aom_paeth_predictor_64x64_avx2, NULL, NULL, NULL)
-INTRA_PRED_TEST(AVX2_5, TX_64X32, aom_dc_predictor_64x32_avx2,
- aom_dc_left_predictor_64x32_avx2,
- aom_dc_top_predictor_64x32_avx2,
- aom_dc_128_predictor_64x32_avx2, aom_v_predictor_64x32_avx2,
- NULL, aom_paeth_predictor_64x32_avx2, NULL, NULL, NULL)
-INTRA_PRED_TEST(AVX2_6, TX_64X16, aom_dc_predictor_64x16_avx2,
- aom_dc_left_predictor_64x16_avx2,
- aom_dc_top_predictor_64x16_avx2,
- aom_dc_128_predictor_64x16_avx2, aom_v_predictor_64x16_avx2,
- NULL, aom_paeth_predictor_64x16_avx2, NULL, NULL, NULL)
-#endif
-// -----------------------------------------------------------------------------
-// High Bitdepth
-namespace {
-
-typedef void (*AvxHighbdPredFunc)(uint16_t *dst, ptrdiff_t y_stride,
- const uint16_t *above, const uint16_t *left,
- int bd);
-
-typedef IntraPredTestMem<uint16_t> Av1HighbdIntraPredTestMem;
-
-void TestHighbdIntraPred(TX_SIZE tx_size, AvxHighbdPredFunc const *pred_funcs,
- const char *const signatures[]) {
- const int block_width = tx_size_wide[tx_size];
- const int block_height = tx_size_high[tx_size];
- const int num_pixels_per_test =
- block_width * block_height * kNumAv1IntraFuncs;
- const int kNumTests = static_cast<int>(2.e10 / num_pixels_per_test);
- Av1HighbdIntraPredTestMem intra_pred_test_mem;
- const int bd = 12;
- intra_pred_test_mem.Init(block_width, block_height, bd);
-
- for (int k = 0; k < kNumAv1IntraFuncs; ++k) {
- if (pred_funcs[k] == NULL) continue;
- memcpy(intra_pred_test_mem.src, intra_pred_test_mem.ref_src,
- sizeof(intra_pred_test_mem.src));
- aom_usec_timer timer;
- aom_usec_timer_start(&timer);
- for (int num_tests = 0; num_tests < kNumTests; ++num_tests) {
- pred_funcs[k](intra_pred_test_mem.src, intra_pred_test_mem.stride,
- intra_pred_test_mem.above, intra_pred_test_mem.left, bd);
- }
- libaom_test::ClearSystemState();
- aom_usec_timer_mark(&timer);
- const int elapsed_time =
- static_cast<int>(aom_usec_timer_elapsed(&timer) / 1000);
- CheckMd5Signature(
- tx_size, true, signatures, intra_pred_test_mem.src,
- intra_pred_test_mem.num_pixels * sizeof(*intra_pred_test_mem.src),
- elapsed_time, k);
- }
-}
-
-static const char *const kHighbdSignatures[TX_SIZES_ALL][kNumAv1IntraFuncs] = {
- {
- // 4X4
- "11f74af6c5737df472f3275cbde062fa",
- "51bea056b6447c93f6eb8f6b7e8f6f71",
- "27e97f946766331795886f4de04c5594",
- "53ab15974b049111fb596c5168ec7e3f",
- "f0b640bb176fbe4584cf3d32a9b0320a",
- "729783ca909e03afd4b47111c80d967b",
- "6e30009c45474a22032678b1bd579c8f",
- "e57cba016d808aa8a35619df2a65f049",
- "55a6c37f39afcbbf5abca4a985b96459",
- "a623d45b37dafec1f8a75c4c5218913d",
- },
- {
- // 8X8
- "03da8829fe94663047fd108c5fcaa71d",
- "ecdb37b8120a2d3a4c706b016bd1bfd7",
- "1d4543ed8d2b9368cb96898095fe8a75",
- "f791c9a67b913cbd82d9da8ecede30e2",
- "065c70646f4dbaff913282f55a45a441",
- "51f87123616662ef7c35691497dfd0ba",
- "85c01ba03df68f9ece7bd3fa0f8980e6",
- "ad19b7dac092f56df6d054e1f67f21e7",
- "0edc415b5dd7299f7a34fb9f71d31d78",
- "2bc8ec19e9f4b77a64b8a0a1f6aec7e7",
- },
- {
- // 16X16
- "e33cb3f56a878e2fddb1b2fc51cdd275",
- "c7bff6f04b6052c8ab335d726dbbd52d",
- "d0b0b47b654a9bcc5c6008110a44589b",
- "78f5da7b10b2b9ab39f114a33b6254e9",
- "c78e31d23831abb40d6271a318fdd6f3",
- "90d1347f4ec9198a0320daecb6ff90b8",
- "e63ded54ab3d0e8728b6f24d4f01e53f",
- "35ce21fbe0ea114c089fc3489a78155d",
- "f277f6ef8e4d717f1f0dfe2706ac197d",
- "e8014d3f41256976c02e0f1e622ba2b9",
- },
- {
- // 32X32
- "a3e8056ba7e36628cce4917cd956fedd",
- "cc7d3024fe8748b512407edee045377e",
- "2aab0a0f330a1d3e19b8ecb8f06387a3",
- "a547bc3fb7b06910bf3973122a426661",
- "26f712514da95042f93d6e8dc8e431dc",
- "bb08c6e16177081daa3d936538dbc2e3",
- "84bf83f94a51b33654ca940c6f8bc057",
- "7168b03fc31bf29596a344d6a35d007c",
- "b073a70d3672f1282236994f5d12e94b",
- "c51607aebad5dcb3c1e3b58ef9e5b84e",
- },
- {
- // 64X64
- "a6baa0d4bfb2269a94c7a38f86a4bccf",
- "3f1ef5f473a49eba743f17a3324adf9d",
- "12ac11889ae5f55b7781454efd706a6a",
- "d9a906c0e692b22e1b4414e71a704b7e",
- "47d4cadd56f70c11ff8f3e5d8df81161",
- "de997744cf24c16c5ac2a36b02b351cc",
- "23781211ae178ddeb6c4bb97a6bd7d83",
- "a79d2e28340ca34b9e37daabbf030f63",
- "0372bd3ddfc258750a6ac106b70587f4",
- "228ef625d9460cbf6fa253a16a730976",
- },
- {
- // 4X8
- "22d519b796d59644043466320e4ccd14",
- "09513a738c49b3f9542d27f34abbe1d5",
- "807ae5e8813443ff01e71be6efacfb69",
- "cbfa18d0293430b6e9708b0be1fd2394",
- "346c354c34ec7fa780b576db355dab88",
- "f97dae85c35359632380b09ca98d611e",
- "698ae351d8896d89ed9e4e67b6e53eda",
- "dcc197034a9c45a3d8238bf085835f4e",
- "7a35e2c42ffdc2efc2d6d1d75a100fc7",
- "41ab6cebd4516c87a91b2a593e2c2506",
- },
- {
- // 8X4
- "d58cd4c4bf3b7bbaa5db5e1a5622ec78",
- "6e572c35aa782d00cafcb99e9ea047ea",
- "e8c22a3702b416dc9ab974505afbed09",
- "aaa4e4762a795aad7ad74de0c662c4e4",
- "a19f9101967383c3dcbd516dc317a291",
- "9ab8cb91f1a595b9ebe3fe8de58031aa",
- "2cf9021d5f1169268699807ee118b65f",
- "ee9605fcbd6fb871f1c5cd81a6989327",
- "b4871af8316089e3e23522175df7e93f",
- "d33301e1c2cb173be46792a22d19881a",
- },
- {
- // 8X16
- "4562de1d0336610880fdd5685498a9ec",
- "16310fa7076394f16fc85c4b149d89c9",
- "0e94af88e1dc573b6f0f499cddd1f530",
- "dfd245ee20d091c67809160340365aa9",
- "d3562504327f70c096c5be23fd8a3747",
- "601b853558502acbb5135eadd2da117a",
- "3c624345a723a1b2b1bea05a6a08bc99",
- "2a9c781de609e0184cc7ab442050f4e5",
- "0ddc5035c22252747126b61fc238c74d",
- "e43f5d83bab759af69c7b6773fc8f9b2",
- },
- {
- // 16X8
- "a57d6b5a9bfd30c29591d8717ace9c51",
- "f5907ba97ee6c53e339e953fc8d845ee",
- "ea3aa727913ce45af06f89dd1808db5f",
- "408af4f23e48d14b48ee35ae094fcd18",
- "85c41cbcb5d744f7961e8950026fbffe",
- "8a4e588a837638887ba671f8d4910485",
- "b792d8826b67a21757ea7097cff9e05b",
- "f94ce7101bb87fd3bb9312112527dbf4",
- "688c6660a6dc6fa61fa1aa38e708c209",
- "0cdf641b4f81d69509c92ae0b93ef5ff",
- },
- {
- // 16X32
- "aee4b3b0e3cc02d48e2c40d77f807927",
- "8baef2b2e789f79c8df9d90ad10f34a4",
- "038c38ee3c4f090bb8d736eab136aafc",
- "1a3de2aaeaffd68a9fd6c7f6557b83f3",
- "385c6e0ea29421dd81011a2934641e26",
- "6cf96c285d1a2d4787f955dad715b08c",
- "2d7f75dcd73b9528c8396279ff09ff3a",
- "5a63cd1841e4ed470e4ca5ef845f2281",
- "610d899ca945fbead33287d4335a8b32",
- "6bafaad81fce37be46730187e78d8b11",
- },
- {
- // 32X16
- "290b23c9f5a1de7905bfa71a942da29b",
- "701e7b82593c66da5052fc4b6afd79ce",
- "4da828c5455cd246735a663fbb204989",
- "e3fbeaf234efece8dbd752b77226200c",
- "4d1d8c969f05155a7e7e84cf7aad021b",
- "c22e4877c2c946d5bdc0d542e29e70cf",
- "8ac1ce815e7780500f842b0beb0bb980",
- "9fee2e2502b507f25bfad30a55b0b610",
- "4ced9c212ec6f9956e27f68a91b59fef",
- "4a7a0b93f138bb0863e4e465b01ec0b1",
- },
- {
- // 32X64
- "ad9cfc395a5c5644a21d958c7274ac14",
- "f29d6d03c143ddf96fef04c19f2c8333",
- "a8bdc852ef704dd4975c61893e8fbc3f",
- "7d0bd7dea26226741dbca9a97f27fa74",
- "45c27c5cca9a91b6ae8379feb0881c9f",
- "8a0b78df1e001b85c874d686eac4aa1b",
- "ce9fa75fac54a3f6c0cc3f2083b938f1",
- "c0dca10d88762c954af18dc9e3791a39",
- "61df229eddfccab913b8fda4bb02f9ac",
- "4f4df6bc8d50a5600b573f0e44d70e66",
- },
- {
- // 64X32
- "db9d82921fd88b24fdff6f849f2f9c87",
- "5ecc7fdc52d2f575ad4f2d0e9e6b1e11",
- "b4581311a0a73d95dfac7f8f44591032",
- "68bd283cfd1a125f6b2ee47cee874d36",
- "804179f05c032908a5e36077bb87c994",
- "fc5fd041a8ee779015394d0c066ee43c",
- "68f5579ccadfe9a1baafb158334a3db2",
- "fe237e45e215ab06d79046da9ad71e84",
- "9a8a938a6824551bf7d21b8fd1d70ea1",
- "eb7332f2017cd96882c76e7136aeaf53",
- },
- {
- // 4X16
- "7bafa307d507747b8132e7735b7f1c73",
- "e58bc2d8213a97d1fea9cfb73d7a9633",
- "435f8a8e8bbf14dbf2fe16b2be9e97aa",
- "1d0e767b68d84acbfb50b7a04e633836",
- "5f713bd7b324fe73bb7063e35ee14e5e",
- "0dac4e1fa3d59814202715468c01ed56",
- "47709d1db4a330c7a8900f450e6fddd1",
- "258e0b930bb27db28f05da9cf7d1ee7c",
- "36cf030fbae767912593efea045bfff5",
- "248d7aceabb7499febae663fae41a920",
- },
- {
- // 16X4
- "04dde98e632670e393704742c89f9067",
- "8c72543f1664651ae1fa08e2ac0adb9b",
- "2354a2cdc2773aa2df8ab4010db1be39",
- "6300ad3221c26da39b10e0e6d87ee3be",
- "8ea30b661c6ba60b28d3167f19e449b8",
- "fb6c1e4ff101a371cede63c2955cdb7e",
- "a517c06433d6d7927b16a72184a23e92",
- "393828be5d62ab6c48668bea5e2f801a",
- "b1e510c542013eb9d6fb188dea2ce90a",
- "569a8f2fe01679ca216535ecbcdccb62",
- },
- {
- // 8X32
- "9d541865c185ca7607852852613ac1fc",
- "b96be67f08c6b5fa5ebd3411299c2f7c",
- "75a2dcf50004b9d188849b048239767e",
- "429492ff415c9fd9b050d73b2ad500f8",
- "64b3606c1ccd036bd766bd5711392cf4",
- "cb59844a0f01660ac955bae3511f1100",
- "3e076155b7a70e8828618e3f33b51e3d",
- "ed2d1f597ab7c50beff690f737cf9726",
- "7909c6a26aaf20c59d996d3e5b5f9c29",
- "965798807240c98c6f7cc9b457ed0773",
- },
- {
- // 32X8
- "36f391aa31619eec1f4d9ee95ea454cc",
- "b82648f14eeba2527357cb50bc3223cb",
- "7a7b2adf429125e8bee9d1d00a66e13f",
- "4198e4d6ba503b7cc2d7e96bb845f661",
- "96c160d2ec1be9fe0cdea9682f14d257",
- "19a450bcebaa75afb4fc6bd1fd6434af",
- "2bd2e35967d43d0ec1c6587a36f204d5",
- "49799a99aa4ccfbd989bee92a99422f1",
- "955530e99813812a74659edeac3f5475",
- "f0316b84e378a19cd11b19a6e40b2914",
- },
- {
- // 16X64
- "8cba1b70a0bde29e8ef235cedc5faa7d",
- "96d00ddc7537bf7f196006591b733b4e",
- "cbf69d5d157c9f3355a4757b1d6e3414",
- "3ac1f642019493dec1b737d7a3a1b4e5",
- "35f9ee300d7fa3c97338e81a6f21dcd4",
- "aae335442e77c8ebc280f16ea50ba9c7",
- "a6140fdac2278644328be094d88731db",
- "2df93621b6ff100f7008432d509f4161",
- "c77bf5aee39e7ed4a3dd715f816f452a",
- "02109bd63557d90225c32a8f1338258e",
- },
- {
- // 64X16
- "a5e2f9fb685d5f4a048e9a96affd25a4",
- "1348f249690d9eefe09d9ad7ead2c801",
- "525da4b187acd81b1ff1116b60461141",
- "e99d072de858094c98b01bd4a6772634",
- "873bfa9dc24693f19721f7c8d527f7d3",
- "0acfc6507bd3468e9679efc127d6e4b9",
- "57d03f8d079c7264854e22ac1157cfae",
- "6c2c4036f70c7d957a9399b5436c0774",
- "42b8e4a97b7f8416c72a5148c031c0b1",
- "a38a2c5f79993dfae8530e9e25800893",
- },
-};
-
-} // namespace
-
-#define HIGHBD_INTRA_PRED_TEST(arch, tx_size, dc, dc_left, dc_top, dc_128, v, \
- h, paeth, smooth, smooth_v, smooth_h) \
- TEST(arch, DISABLED_##TestHighbdIntraPred_##tx_size) { \
- static const AvxHighbdPredFunc aom_intra_pred[] = { \
- dc, dc_left, dc_top, dc_128, v, h, paeth, smooth, smooth_v, smooth_h \
- }; \
- TestHighbdIntraPred(tx_size, aom_intra_pred, kHighbdSignatures[tx_size]); \
- }
-
-// -----------------------------------------------------------------------------
-// 4x4, 4x8, 4x16
-
-HIGHBD_INTRA_PRED_TEST(
- C_1, TX_4X4, aom_highbd_dc_predictor_4x4_c,
- aom_highbd_dc_left_predictor_4x4_c, aom_highbd_dc_top_predictor_4x4_c,
- aom_highbd_dc_128_predictor_4x4_c, aom_highbd_v_predictor_4x4_c,
- aom_highbd_h_predictor_4x4_c, aom_highbd_paeth_predictor_4x4_c,
- aom_highbd_smooth_predictor_4x4_c, aom_highbd_smooth_v_predictor_4x4_c,
- aom_highbd_smooth_h_predictor_4x4_c)
-
-HIGHBD_INTRA_PRED_TEST(
- C_2, TX_4X8, aom_highbd_dc_predictor_4x8_c,
- aom_highbd_dc_left_predictor_4x8_c, aom_highbd_dc_top_predictor_4x8_c,
- aom_highbd_dc_128_predictor_4x8_c, aom_highbd_v_predictor_4x8_c,
- aom_highbd_h_predictor_4x8_c, aom_highbd_paeth_predictor_4x8_c,
- aom_highbd_smooth_predictor_4x8_c, aom_highbd_smooth_v_predictor_4x8_c,
- aom_highbd_smooth_h_predictor_4x8_c)
-
-HIGHBD_INTRA_PRED_TEST(
- C_3, TX_4X16, aom_highbd_dc_predictor_4x16_c,
- aom_highbd_dc_left_predictor_4x16_c, aom_highbd_dc_top_predictor_4x16_c,
- aom_highbd_dc_128_predictor_4x16_c, aom_highbd_v_predictor_4x16_c,
- aom_highbd_h_predictor_4x16_c, aom_highbd_paeth_predictor_4x16_c,
- aom_highbd_smooth_predictor_4x16_c, aom_highbd_smooth_v_predictor_4x16_c,
- aom_highbd_smooth_h_predictor_4x16_c)
-
-#if HAVE_SSE2
-HIGHBD_INTRA_PRED_TEST(SSE2_1, TX_4X4, aom_highbd_dc_predictor_4x4_sse2,
- aom_highbd_dc_left_predictor_4x4_sse2,
- aom_highbd_dc_top_predictor_4x4_sse2,
- aom_highbd_dc_128_predictor_4x4_sse2,
- aom_highbd_v_predictor_4x4_sse2,
- aom_highbd_h_predictor_4x4_sse2, NULL, NULL, NULL, NULL)
-
-HIGHBD_INTRA_PRED_TEST(SSE2_2, TX_4X8, aom_highbd_dc_predictor_4x8_sse2,
- aom_highbd_dc_left_predictor_4x8_sse2,
- aom_highbd_dc_top_predictor_4x8_sse2,
- aom_highbd_dc_128_predictor_4x8_sse2,
- aom_highbd_v_predictor_4x8_sse2,
- aom_highbd_h_predictor_4x8_sse2, NULL, NULL, NULL, NULL)
-#endif
-
-// -----------------------------------------------------------------------------
-// 8x8, 8x4, 8x16, 8x32
-
-HIGHBD_INTRA_PRED_TEST(
- C_1, TX_8X8, aom_highbd_dc_predictor_8x8_c,
- aom_highbd_dc_left_predictor_8x8_c, aom_highbd_dc_top_predictor_8x8_c,
- aom_highbd_dc_128_predictor_8x8_c, aom_highbd_v_predictor_8x8_c,
- aom_highbd_h_predictor_8x8_c, aom_highbd_paeth_predictor_8x8_c,
- aom_highbd_smooth_predictor_8x8_c, aom_highbd_smooth_v_predictor_8x8_c,
- aom_highbd_smooth_h_predictor_8x8_c)
-
-HIGHBD_INTRA_PRED_TEST(
- C_2, TX_8X4, aom_highbd_dc_predictor_8x4_c,
- aom_highbd_dc_left_predictor_8x4_c, aom_highbd_dc_top_predictor_8x4_c,
- aom_highbd_dc_128_predictor_8x4_c, aom_highbd_v_predictor_8x4_c,
- aom_highbd_h_predictor_8x4_c, aom_highbd_paeth_predictor_8x4_c,
- aom_highbd_smooth_predictor_8x4_c, aom_highbd_smooth_v_predictor_8x4_c,
- aom_highbd_smooth_h_predictor_8x4_c)
-
-HIGHBD_INTRA_PRED_TEST(
- C_3, TX_8X16, aom_highbd_dc_predictor_8x16_c,
- aom_highbd_dc_left_predictor_8x16_c, aom_highbd_dc_top_predictor_8x16_c,
- aom_highbd_dc_128_predictor_8x16_c, aom_highbd_v_predictor_8x16_c,
- aom_highbd_h_predictor_8x16_c, aom_highbd_paeth_predictor_8x16_c,
- aom_highbd_smooth_predictor_8x16_c, aom_highbd_smooth_v_predictor_8x16_c,
- aom_highbd_smooth_h_predictor_8x16_c)
-
-HIGHBD_INTRA_PRED_TEST(
- C_4, TX_8X32, aom_highbd_dc_predictor_8x32_c,
- aom_highbd_dc_left_predictor_8x32_c, aom_highbd_dc_top_predictor_8x32_c,
- aom_highbd_dc_128_predictor_8x32_c, aom_highbd_v_predictor_8x32_c,
- aom_highbd_h_predictor_8x32_c, aom_highbd_paeth_predictor_8x32_c,
- aom_highbd_smooth_predictor_8x32_c, aom_highbd_smooth_v_predictor_8x32_c,
- aom_highbd_smooth_h_predictor_8x32_c)
-
-#if HAVE_SSE2
-HIGHBD_INTRA_PRED_TEST(SSE2_1, TX_8X8, aom_highbd_dc_predictor_8x8_sse2,
- aom_highbd_dc_left_predictor_8x8_sse2,
- aom_highbd_dc_top_predictor_8x8_sse2,
- aom_highbd_dc_128_predictor_8x8_sse2,
- aom_highbd_v_predictor_8x8_sse2,
- aom_highbd_h_predictor_8x8_sse2, NULL, NULL, NULL, NULL)
-HIGHBD_INTRA_PRED_TEST(SSE2_2, TX_8X4, aom_highbd_dc_predictor_8x4_sse2,
- aom_highbd_dc_left_predictor_8x4_sse2,
- aom_highbd_dc_top_predictor_8x4_sse2,
- aom_highbd_dc_128_predictor_8x4_sse2,
- aom_highbd_v_predictor_8x4_sse2,
- aom_highbd_h_predictor_8x4_sse2, NULL, NULL, NULL, NULL)
-HIGHBD_INTRA_PRED_TEST(SSE2_3, TX_8X16, aom_highbd_dc_predictor_8x16_sse2,
- aom_highbd_dc_left_predictor_8x16_sse2,
- aom_highbd_dc_top_predictor_8x16_sse2,
- aom_highbd_dc_128_predictor_8x16_sse2,
- aom_highbd_v_predictor_8x16_sse2,
- aom_highbd_h_predictor_8x16_sse2, NULL, NULL, NULL, NULL)
-#endif
-
-#if HAVE_SSSE3
-HIGHBD_INTRA_PRED_TEST(SSSE3, TX_8X8, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL)
-#endif
-
-// -----------------------------------------------------------------------------
-// 16x16, 16x8, 16x32, 16x4, 16x64
-
-HIGHBD_INTRA_PRED_TEST(
- C_1, TX_16X16, aom_highbd_dc_predictor_16x16_c,
- aom_highbd_dc_left_predictor_16x16_c, aom_highbd_dc_top_predictor_16x16_c,
- aom_highbd_dc_128_predictor_16x16_c, aom_highbd_v_predictor_16x16_c,
- aom_highbd_h_predictor_16x16_c, aom_highbd_paeth_predictor_16x16_c,
- aom_highbd_smooth_predictor_16x16_c, aom_highbd_smooth_v_predictor_16x16_c,
- aom_highbd_smooth_h_predictor_16x16_c)
-
-HIGHBD_INTRA_PRED_TEST(
- C_2, TX_16X8, aom_highbd_dc_predictor_16x8_c,
- aom_highbd_dc_left_predictor_16x8_c, aom_highbd_dc_top_predictor_16x8_c,
- aom_highbd_dc_128_predictor_16x8_c, aom_highbd_v_predictor_16x8_c,
- aom_highbd_h_predictor_16x8_c, aom_highbd_paeth_predictor_16x8_c,
- aom_highbd_smooth_predictor_16x8_c, aom_highbd_smooth_v_predictor_16x8_c,
- aom_highbd_smooth_h_predictor_16x8_c)
-
-HIGHBD_INTRA_PRED_TEST(
- C_3, TX_16X32, aom_highbd_dc_predictor_16x32_c,
- aom_highbd_dc_left_predictor_16x32_c, aom_highbd_dc_top_predictor_16x32_c,
- aom_highbd_dc_128_predictor_16x32_c, aom_highbd_v_predictor_16x32_c,
- aom_highbd_h_predictor_16x32_c, aom_highbd_paeth_predictor_16x32_c,
- aom_highbd_smooth_predictor_16x32_c, aom_highbd_smooth_v_predictor_16x32_c,
- aom_highbd_smooth_h_predictor_16x32_c)
-
-HIGHBD_INTRA_PRED_TEST(
- C_4, TX_16X4, aom_highbd_dc_predictor_16x4_c,
- aom_highbd_dc_left_predictor_16x4_c, aom_highbd_dc_top_predictor_16x4_c,
- aom_highbd_dc_128_predictor_16x4_c, aom_highbd_v_predictor_16x4_c,
- aom_highbd_h_predictor_16x4_c, aom_highbd_paeth_predictor_16x4_c,
- aom_highbd_smooth_predictor_16x4_c, aom_highbd_smooth_v_predictor_16x4_c,
- aom_highbd_smooth_h_predictor_16x4_c)
-
-HIGHBD_INTRA_PRED_TEST(
- C_5, TX_16X64, aom_highbd_dc_predictor_16x64_c,
- aom_highbd_dc_left_predictor_16x64_c, aom_highbd_dc_top_predictor_16x64_c,
- aom_highbd_dc_128_predictor_16x64_c, aom_highbd_v_predictor_16x64_c,
- aom_highbd_h_predictor_16x64_c, aom_highbd_paeth_predictor_16x64_c,
- aom_highbd_smooth_predictor_16x64_c, aom_highbd_smooth_v_predictor_16x64_c,
- aom_highbd_smooth_h_predictor_16x64_c)
-
-#if HAVE_SSE2
-HIGHBD_INTRA_PRED_TEST(SSE2_1, TX_16X16, aom_highbd_dc_predictor_16x16_sse2,
- aom_highbd_dc_left_predictor_16x16_sse2,
- aom_highbd_dc_top_predictor_16x16_sse2,
- aom_highbd_dc_128_predictor_16x16_sse2,
- aom_highbd_v_predictor_16x16_sse2,
- aom_highbd_h_predictor_16x16_sse2, NULL, NULL, NULL,
- NULL)
-HIGHBD_INTRA_PRED_TEST(SSE2_2, TX_16X8, aom_highbd_dc_predictor_16x8_sse2,
- aom_highbd_dc_left_predictor_16x8_sse2,
- aom_highbd_dc_top_predictor_16x8_sse2,
- aom_highbd_dc_128_predictor_16x8_sse2,
- aom_highbd_v_predictor_16x8_sse2,
- aom_highbd_h_predictor_16x8_sse2, NULL, NULL, NULL, NULL)
-HIGHBD_INTRA_PRED_TEST(SSE2_3, TX_16X32, aom_highbd_dc_predictor_16x32_sse2,
- aom_highbd_dc_left_predictor_16x32_sse2,
- aom_highbd_dc_top_predictor_16x32_sse2,
- aom_highbd_dc_128_predictor_16x32_sse2,
- aom_highbd_v_predictor_16x32_sse2,
- aom_highbd_h_predictor_16x32_sse2, NULL, NULL, NULL,
- NULL)
-#endif
-
-#if HAVE_SSSE3
-HIGHBD_INTRA_PRED_TEST(SSSE3_1, TX_16X16, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL)
-#endif
-
-#if HAVE_AVX2
-HIGHBD_INTRA_PRED_TEST(AVX2_1, TX_16X16, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL)
-
-HIGHBD_INTRA_PRED_TEST(AVX2_2, TX_16X8, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL)
-
-HIGHBD_INTRA_PRED_TEST(AVX2_3, TX_16X32, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL)
-#endif
-
-// -----------------------------------------------------------------------------
-// 32x32, 32x16, 32x64, 32x8
-
-HIGHBD_INTRA_PRED_TEST(
- C_1, TX_32X32, aom_highbd_dc_predictor_32x32_c,
- aom_highbd_dc_left_predictor_32x32_c, aom_highbd_dc_top_predictor_32x32_c,
- aom_highbd_dc_128_predictor_32x32_c, aom_highbd_v_predictor_32x32_c,
- aom_highbd_h_predictor_32x32_c, aom_highbd_paeth_predictor_32x32_c,
- aom_highbd_smooth_predictor_32x32_c, aom_highbd_smooth_v_predictor_32x32_c,
- aom_highbd_smooth_h_predictor_32x32_c)
-
-HIGHBD_INTRA_PRED_TEST(
- C_2, TX_32X16, aom_highbd_dc_predictor_32x16_c,
- aom_highbd_dc_left_predictor_32x16_c, aom_highbd_dc_top_predictor_32x16_c,
- aom_highbd_dc_128_predictor_32x16_c, aom_highbd_v_predictor_32x16_c,
- aom_highbd_h_predictor_32x16_c, aom_highbd_paeth_predictor_32x16_c,
- aom_highbd_smooth_predictor_32x16_c, aom_highbd_smooth_v_predictor_32x16_c,
- aom_highbd_smooth_h_predictor_32x16_c)
-
-HIGHBD_INTRA_PRED_TEST(
- C_3, TX_32X64, aom_highbd_dc_predictor_32x64_c,
- aom_highbd_dc_left_predictor_32x64_c, aom_highbd_dc_top_predictor_32x64_c,
- aom_highbd_dc_128_predictor_32x64_c, aom_highbd_v_predictor_32x64_c,
- aom_highbd_h_predictor_32x64_c, aom_highbd_paeth_predictor_32x64_c,
- aom_highbd_smooth_predictor_32x64_c, aom_highbd_smooth_v_predictor_32x64_c,
- aom_highbd_smooth_h_predictor_32x64_c)
-
-HIGHBD_INTRA_PRED_TEST(
- C_4, TX_32X8, aom_highbd_dc_predictor_32x8_c,
- aom_highbd_dc_left_predictor_32x8_c, aom_highbd_dc_top_predictor_32x8_c,
- aom_highbd_dc_128_predictor_32x8_c, aom_highbd_v_predictor_32x8_c,
- aom_highbd_h_predictor_32x8_c, aom_highbd_paeth_predictor_32x8_c,
- aom_highbd_smooth_predictor_32x8_c, aom_highbd_smooth_v_predictor_32x8_c,
- aom_highbd_smooth_h_predictor_32x8_c)
-
-#if HAVE_SSE2
-HIGHBD_INTRA_PRED_TEST(SSE2_1, TX_32X32, aom_highbd_dc_predictor_32x32_sse2,
- aom_highbd_dc_left_predictor_32x32_sse2,
- aom_highbd_dc_top_predictor_32x32_sse2,
- aom_highbd_dc_128_predictor_32x32_sse2,
- aom_highbd_v_predictor_32x32_sse2,
- aom_highbd_h_predictor_32x32_sse2, NULL, NULL, NULL,
- NULL)
-HIGHBD_INTRA_PRED_TEST(SSE2_2, TX_32X16, aom_highbd_dc_predictor_32x16_sse2,
- aom_highbd_dc_left_predictor_32x16_sse2,
- aom_highbd_dc_top_predictor_32x16_sse2,
- aom_highbd_dc_128_predictor_32x16_sse2,
- aom_highbd_v_predictor_32x16_sse2,
- aom_highbd_h_predictor_32x16_sse2, NULL, NULL, NULL,
- NULL)
-#endif
-
-#if HAVE_SSSE3
-HIGHBD_INTRA_PRED_TEST(SSSE3_1, TX_32X32, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL)
-#endif
-
-#if HAVE_AVX2
-HIGHBD_INTRA_PRED_TEST(AVX2_1, TX_32X32, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL)
-
-HIGHBD_INTRA_PRED_TEST(AVX2_2, TX_32X16, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL)
-#endif
-
-// -----------------------------------------------------------------------------
-// 64x64, 64x32, 64x16
-
-HIGHBD_INTRA_PRED_TEST(
- C_1, TX_64X64, aom_highbd_dc_predictor_64x64_c,
- aom_highbd_dc_left_predictor_64x64_c, aom_highbd_dc_top_predictor_64x64_c,
- aom_highbd_dc_128_predictor_64x64_c, aom_highbd_v_predictor_64x64_c,
- aom_highbd_h_predictor_64x64_c, aom_highbd_paeth_predictor_64x64_c,
- aom_highbd_smooth_predictor_64x64_c, aom_highbd_smooth_v_predictor_64x64_c,
- aom_highbd_smooth_h_predictor_64x64_c)
-
-HIGHBD_INTRA_PRED_TEST(
- C_2, TX_64X32, aom_highbd_dc_predictor_64x32_c,
- aom_highbd_dc_left_predictor_64x32_c, aom_highbd_dc_top_predictor_64x32_c,
- aom_highbd_dc_128_predictor_64x32_c, aom_highbd_v_predictor_64x32_c,
- aom_highbd_h_predictor_64x32_c, aom_highbd_paeth_predictor_64x32_c,
- aom_highbd_smooth_predictor_64x32_c, aom_highbd_smooth_v_predictor_64x32_c,
- aom_highbd_smooth_h_predictor_64x32_c)
-
-HIGHBD_INTRA_PRED_TEST(
- C_3, TX_64X16, aom_highbd_dc_predictor_64x16_c,
- aom_highbd_dc_left_predictor_64x16_c, aom_highbd_dc_top_predictor_64x16_c,
- aom_highbd_dc_128_predictor_64x16_c, aom_highbd_v_predictor_64x16_c,
- aom_highbd_h_predictor_64x16_c, aom_highbd_paeth_predictor_64x16_c,
- aom_highbd_smooth_predictor_64x16_c, aom_highbd_smooth_v_predictor_64x16_c,
- aom_highbd_smooth_h_predictor_64x16_c)
-
-// -----------------------------------------------------------------------------
-
-#include "test/test_libaom.cc"
diff --git a/third_party/aom/test/test_libaom.cc b/third_party/aom/test/test_libaom.cc
deleted file mode 100644
index b55d76237..000000000
--- a/third_party/aom/test/test_libaom.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <string.h>
-
-#include <string>
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "config/aom_config.h"
-
-#if ARCH_X86 || ARCH_X86_64
-#include "aom_ports/x86.h"
-#endif
-extern "C" {
-extern void av1_rtcd();
-extern void aom_dsp_rtcd();
-extern void aom_scale_rtcd();
-}
-
-#if ARCH_X86 || ARCH_X86_64
-static void append_negative_gtest_filter(const char *str) {
- std::string filter = ::testing::FLAGS_gtest_filter;
- // Negative patterns begin with one '-' followed by a ':' separated list.
- if (filter.find('-') == std::string::npos) filter += '-';
- // OPT.* matches TEST() functions
- // OPT/* matches TEST_P() functions
- // OPT_* matches tests which have been manually sharded.
- // We do not match OPT* because of SSE/SSE2 collisions.
- const char *search_terminators = "./_";
- for (size_t pos = 0; pos < strlen(search_terminators); ++pos) {
- filter += ":";
- filter += str;
- filter += search_terminators[pos];
- filter += "*";
- }
- ::testing::FLAGS_gtest_filter = filter;
-}
-#endif // ARCH_X86 || ARCH_X86_64
-
-int main(int argc, char **argv) {
- ::testing::InitGoogleTest(&argc, argv);
-
-#if ARCH_X86 || ARCH_X86_64
- const int simd_caps = x86_simd_caps();
- if (!(simd_caps & HAS_MMX)) append_negative_gtest_filter("MMX");
- if (!(simd_caps & HAS_SSE)) append_negative_gtest_filter("SSE");
- if (!(simd_caps & HAS_SSE2)) append_negative_gtest_filter("SSE2");
- if (!(simd_caps & HAS_SSE3)) append_negative_gtest_filter("SSE3");
- if (!(simd_caps & HAS_SSSE3)) append_negative_gtest_filter("SSSE3");
- if (!(simd_caps & HAS_SSE4_1)) append_negative_gtest_filter("SSE4_1");
- if (!(simd_caps & HAS_SSE4_2)) append_negative_gtest_filter("SSE4_2");
- if (!(simd_caps & HAS_AVX)) append_negative_gtest_filter("AVX");
- if (!(simd_caps & HAS_AVX2)) append_negative_gtest_filter("AVX2");
-#endif // ARCH_X86 || ARCH_X86_64
-
-// Shared library builds don't support whitebox tests that exercise internal
-// symbols.
-#if !CONFIG_SHARED
- av1_rtcd();
- aom_dsp_rtcd();
- aom_scale_rtcd();
-#endif // !CONFIG_SHARED
-
- return RUN_ALL_TESTS();
-}
diff --git a/third_party/aom/test/test_runner.cmake b/third_party/aom/test/test_runner.cmake
deleted file mode 100644
index d3747b1e3..000000000
--- a/third_party/aom/test/test_runner.cmake
+++ /dev/null
@@ -1,28 +0,0 @@
-#
-# Copyright (c) 2017, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and the
-# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
-# not distributed with this source code in the LICENSE file, you can obtain it
-# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
-# License 1.0 was not distributed with this source code in the PATENTS file, you
-# can obtain it at www.aomedia.org/license/patent.
-#
-if(NOT GTEST_TOTAL_SHARDS OR "${GTEST_SHARD_INDEX}" STREQUAL "" OR NOT
- TEST_LIBAOM)
- message(
- FATAL_ERROR
- "The variables GTEST_SHARD_INDEX, GTEST_TOTAL_SHARDS and TEST_LIBAOM
- must be defined."
- )
-endif()
-
-set($ENV{GTEST_SHARD_INDEX} ${GTEST_SHARD_INDEX})
-set($ENV{GTEST_TOTAL_SHARDS} ${GTEST_TOTAL_SHARDS})
-execute_process(COMMAND ${TEST_LIBAOM} RESULT_VARIABLE test_result)
-set(test_message "Test shard ${GTEST_SHARD_INDEX}/${GTEST_TOTAL_SHARDS} result")
-message("${test_message}: ${test_result}")
-
-if(NOT "${test_result}" STREQUAL "0")
- message(FATAL_ERROR "${test_message}: FAILED, non-zero exit code.")
-endif()
diff --git a/third_party/aom/test/test_vector_test.cc b/third_party/aom/test/test_vector_test.cc
deleted file mode 100644
index 286988b17..000000000
--- a/third_party/aom/test/test_vector_test.cc
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <cstdio>
-#include <cstdlib>
-#include <set>
-#include <string>
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-#include "common/tools_common.h"
-#include "config/aom_config.h"
-#include "test/codec_factory.h"
-#include "test/decode_test_driver.h"
-#include "test/ivf_video_source.h"
-#include "test/md5_helper.h"
-#include "test/test_vectors.h"
-#include "test/util.h"
-#if CONFIG_WEBM_IO
-#include "test/webm_video_source.h"
-#endif
-
-namespace {
-
-const int kThreads = 0;
-const int kFileName = 1;
-const int kRowMT = 2;
-
-typedef ::testing::tuple<int, const char *, int> DecodeParam;
-
-class TestVectorTest : public ::libaom_test::DecoderTest,
- public ::libaom_test::CodecTestWithParam<DecodeParam> {
- protected:
- TestVectorTest() : DecoderTest(GET_PARAM(0)), md5_file_(NULL) {}
-
- virtual ~TestVectorTest() {
- if (md5_file_) fclose(md5_file_);
- }
-
- void OpenMD5File(const std::string &md5_file_name_) {
- md5_file_ = libaom_test::OpenTestDataFile(md5_file_name_);
- ASSERT_TRUE(md5_file_ != NULL)
- << "Md5 file open failed. Filename: " << md5_file_name_;
- }
-
- virtual void PreDecodeFrameHook(
- const libaom_test::CompressedVideoSource &video,
- libaom_test::Decoder *decoder) {
- if (video.frame_number() == 0) decoder->Control(AV1D_SET_ROW_MT, row_mt_);
- }
-
- virtual void DecompressedFrameHook(const aom_image_t &img,
- const unsigned int frame_number) {
- ASSERT_TRUE(md5_file_ != NULL);
- char expected_md5[33];
- char junk[128];
-
- // Read correct md5 checksums.
- const int res = fscanf(md5_file_, "%s %s", expected_md5, junk);
- ASSERT_NE(res, EOF) << "Read md5 data failed";
- expected_md5[32] = '\0';
-
- ::libaom_test::MD5 md5_res;
-#if !CONFIG_LOWBITDEPTH
- const aom_img_fmt_t shifted_fmt =
- (aom_img_fmt)(img.fmt & ~AOM_IMG_FMT_HIGHBITDEPTH);
- if (img.bit_depth == 8 && shifted_fmt != img.fmt) {
- aom_image_t *img_shifted =
- aom_img_alloc(NULL, shifted_fmt, img.d_w, img.d_h, 16);
- img_shifted->bit_depth = img.bit_depth;
- img_shifted->monochrome = img.monochrome;
- aom_img_downshift(img_shifted, &img, 0);
- md5_res.Add(img_shifted);
- aom_img_free(img_shifted);
- } else {
-#endif
- md5_res.Add(&img);
-#if !CONFIG_LOWBITDEPTH
- }
-#endif
-
- const char *actual_md5 = md5_res.Get();
- // Check md5 match.
- ASSERT_STREQ(expected_md5, actual_md5)
- << "Md5 checksums don't match: frame number = " << frame_number;
- }
-
- unsigned int row_mt_;
-
- private:
- FILE *md5_file_;
-};
-
-// This test runs through the whole set of test vectors, and decodes them.
-// The md5 checksums are computed for each frame in the video file. If md5
-// checksums match the correct md5 data, then the test is passed. Otherwise,
-// the test failed.
-TEST_P(TestVectorTest, MD5Match) {
- const DecodeParam input = GET_PARAM(1);
- const std::string filename = ::testing::get<kFileName>(input);
- aom_codec_flags_t flags = 0;
- aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t();
- char str[256];
-
- cfg.threads = ::testing::get<kThreads>(input);
- row_mt_ = ::testing::get<kRowMT>(input);
-
- snprintf(str, sizeof(str) / sizeof(str[0]) - 1, "file: %s threads: %d",
- filename.c_str(), cfg.threads);
- SCOPED_TRACE(str);
-
- // Open compressed video file.
- testing::internal::scoped_ptr<libaom_test::CompressedVideoSource> video;
- if (filename.substr(filename.length() - 3, 3) == "ivf") {
- video.reset(new libaom_test::IVFVideoSource(filename));
- } else if (filename.substr(filename.length() - 4, 4) == "webm" ||
- filename.substr(filename.length() - 3, 3) == "mkv") {
-#if CONFIG_WEBM_IO
- video.reset(new libaom_test::WebMVideoSource(filename));
-#else
- fprintf(stderr, "WebM IO is disabled, skipping test vector %s\n",
- filename.c_str());
- return;
-#endif
- }
- ASSERT_TRUE(video.get() != NULL);
- video->Init();
-
- // Construct md5 file name.
- const std::string md5_filename = filename + ".md5";
- OpenMD5File(md5_filename);
-
- // Set decode config and flags.
- cfg.allow_lowbitdepth = CONFIG_LOWBITDEPTH;
- set_cfg(cfg);
- set_flags(flags);
-
- // Decode frame, and check the md5 matching.
- ASSERT_NO_FATAL_FAILURE(RunLoop(video.get(), cfg));
-}
-
-#if CONFIG_AV1_DECODER
-AV1_INSTANTIATE_TEST_CASE(
- TestVectorTest,
- ::testing::Combine(::testing::Values(1), // Single thread.
- ::testing::ValuesIn(libaom_test::kAV1TestVectors,
- libaom_test::kAV1TestVectors +
- libaom_test::kNumAV1TestVectors),
- ::testing::Values(0)));
-
-// Test AV1 decode in with different numbers of threads.
-INSTANTIATE_TEST_CASE_P(
- AV1MultiThreaded, TestVectorTest,
- ::testing::Combine(
- ::testing::Values(
- static_cast<const libaom_test::CodecFactory *>(&libaom_test::kAV1)),
- ::testing::Combine(
- ::testing::Range(2, 9), // With 2 ~ 8 threads.
- ::testing::ValuesIn(libaom_test::kAV1TestVectors,
- libaom_test::kAV1TestVectors +
- libaom_test::kNumAV1TestVectors),
- ::testing::Range(0, 2))));
-
-#endif // CONFIG_AV1_DECODER
-
-} // namespace
diff --git a/third_party/aom/test/test_vectors.cc b/third_party/aom/test/test_vectors.cc
deleted file mode 100644
index 71e431e18..000000000
--- a/third_party/aom/test/test_vectors.cc
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "test/test_vectors.h"
-
-namespace libaom_test {
-
-#define NELEMENTS(x) static_cast<int>(sizeof(x) / sizeof(x[0]))
-
-#if CONFIG_AV1_DECODER
-const char *const kAV1TestVectors[] = {
- "av1-1-b8-00-quantizer-00.ivf", "av1-1-b8-00-quantizer-01.ivf",
- "av1-1-b8-00-quantizer-02.ivf", "av1-1-b8-00-quantizer-03.ivf",
- "av1-1-b8-00-quantizer-04.ivf", "av1-1-b8-00-quantizer-05.ivf",
- "av1-1-b8-00-quantizer-06.ivf", "av1-1-b8-00-quantizer-07.ivf",
- "av1-1-b8-00-quantizer-08.ivf", "av1-1-b8-00-quantizer-09.ivf",
- "av1-1-b8-00-quantizer-10.ivf", "av1-1-b8-00-quantizer-11.ivf",
- "av1-1-b8-00-quantizer-12.ivf", "av1-1-b8-00-quantizer-13.ivf",
- "av1-1-b8-00-quantizer-14.ivf", "av1-1-b8-00-quantizer-15.ivf",
- "av1-1-b8-00-quantizer-16.ivf", "av1-1-b8-00-quantizer-17.ivf",
- "av1-1-b8-00-quantizer-18.ivf", "av1-1-b8-00-quantizer-19.ivf",
- "av1-1-b8-00-quantizer-20.ivf", "av1-1-b8-00-quantizer-21.ivf",
- "av1-1-b8-00-quantizer-22.ivf", "av1-1-b8-00-quantizer-23.ivf",
- "av1-1-b8-00-quantizer-24.ivf", "av1-1-b8-00-quantizer-25.ivf",
- "av1-1-b8-00-quantizer-26.ivf", "av1-1-b8-00-quantizer-27.ivf",
- "av1-1-b8-00-quantizer-28.ivf", "av1-1-b8-00-quantizer-29.ivf",
- "av1-1-b8-00-quantizer-30.ivf", "av1-1-b8-00-quantizer-31.ivf",
- "av1-1-b8-00-quantizer-32.ivf", "av1-1-b8-00-quantizer-33.ivf",
- "av1-1-b8-00-quantizer-34.ivf", "av1-1-b8-00-quantizer-35.ivf",
- "av1-1-b8-00-quantizer-36.ivf", "av1-1-b8-00-quantizer-37.ivf",
- "av1-1-b8-00-quantizer-38.ivf", "av1-1-b8-00-quantizer-39.ivf",
- "av1-1-b8-00-quantizer-40.ivf", "av1-1-b8-00-quantizer-41.ivf",
- "av1-1-b8-00-quantizer-42.ivf", "av1-1-b8-00-quantizer-43.ivf",
- "av1-1-b8-00-quantizer-44.ivf", "av1-1-b8-00-quantizer-45.ivf",
- "av1-1-b8-00-quantizer-46.ivf", "av1-1-b8-00-quantizer-47.ivf",
- "av1-1-b8-00-quantizer-48.ivf", "av1-1-b8-00-quantizer-49.ivf",
- "av1-1-b8-00-quantizer-50.ivf", "av1-1-b8-00-quantizer-51.ivf",
- "av1-1-b8-00-quantizer-52.ivf", "av1-1-b8-00-quantizer-53.ivf",
- "av1-1-b8-00-quantizer-54.ivf", "av1-1-b8-00-quantizer-55.ivf",
- "av1-1-b8-00-quantizer-56.ivf", "av1-1-b8-00-quantizer-57.ivf",
- "av1-1-b8-00-quantizer-58.ivf", "av1-1-b8-00-quantizer-59.ivf",
- "av1-1-b8-00-quantizer-60.ivf", "av1-1-b8-00-quantizer-61.ivf",
- "av1-1-b8-00-quantizer-62.ivf", "av1-1-b8-00-quantizer-63.ivf",
- "av1-1-b10-00-quantizer-00.ivf", "av1-1-b10-00-quantizer-01.ivf",
- "av1-1-b10-00-quantizer-02.ivf", "av1-1-b10-00-quantizer-03.ivf",
- "av1-1-b10-00-quantizer-04.ivf", "av1-1-b10-00-quantizer-05.ivf",
- "av1-1-b10-00-quantizer-06.ivf", "av1-1-b10-00-quantizer-07.ivf",
- "av1-1-b10-00-quantizer-08.ivf", "av1-1-b10-00-quantizer-09.ivf",
- "av1-1-b10-00-quantizer-10.ivf", "av1-1-b10-00-quantizer-11.ivf",
- "av1-1-b10-00-quantizer-12.ivf", "av1-1-b10-00-quantizer-13.ivf",
- "av1-1-b10-00-quantizer-14.ivf", "av1-1-b10-00-quantizer-15.ivf",
- "av1-1-b10-00-quantizer-16.ivf", "av1-1-b10-00-quantizer-17.ivf",
- "av1-1-b10-00-quantizer-18.ivf", "av1-1-b10-00-quantizer-19.ivf",
- "av1-1-b10-00-quantizer-20.ivf", "av1-1-b10-00-quantizer-21.ivf",
- "av1-1-b10-00-quantizer-22.ivf", "av1-1-b10-00-quantizer-23.ivf",
- "av1-1-b10-00-quantizer-24.ivf", "av1-1-b10-00-quantizer-25.ivf",
- "av1-1-b10-00-quantizer-26.ivf", "av1-1-b10-00-quantizer-27.ivf",
- "av1-1-b10-00-quantizer-28.ivf", "av1-1-b10-00-quantizer-29.ivf",
- "av1-1-b10-00-quantizer-30.ivf", "av1-1-b10-00-quantizer-31.ivf",
- "av1-1-b10-00-quantizer-32.ivf", "av1-1-b10-00-quantizer-33.ivf",
- "av1-1-b10-00-quantizer-34.ivf", "av1-1-b10-00-quantizer-35.ivf",
- "av1-1-b10-00-quantizer-36.ivf", "av1-1-b10-00-quantizer-37.ivf",
- "av1-1-b10-00-quantizer-38.ivf", "av1-1-b10-00-quantizer-39.ivf",
- "av1-1-b10-00-quantizer-40.ivf", "av1-1-b10-00-quantizer-41.ivf",
- "av1-1-b10-00-quantizer-42.ivf", "av1-1-b10-00-quantizer-43.ivf",
- "av1-1-b10-00-quantizer-44.ivf", "av1-1-b10-00-quantizer-45.ivf",
- "av1-1-b10-00-quantizer-46.ivf", "av1-1-b10-00-quantizer-47.ivf",
- "av1-1-b10-00-quantizer-48.ivf", "av1-1-b10-00-quantizer-49.ivf",
- "av1-1-b10-00-quantizer-50.ivf", "av1-1-b10-00-quantizer-51.ivf",
- "av1-1-b10-00-quantizer-52.ivf", "av1-1-b10-00-quantizer-53.ivf",
- "av1-1-b10-00-quantizer-54.ivf", "av1-1-b10-00-quantizer-55.ivf",
- "av1-1-b10-00-quantizer-56.ivf", "av1-1-b10-00-quantizer-57.ivf",
- "av1-1-b10-00-quantizer-58.ivf", "av1-1-b10-00-quantizer-59.ivf",
- "av1-1-b10-00-quantizer-60.ivf", "av1-1-b10-00-quantizer-61.ivf",
- "av1-1-b10-00-quantizer-62.ivf", "av1-1-b10-00-quantizer-63.ivf",
- "av1-1-b8-01-size-16x16.ivf", "av1-1-b8-01-size-16x18.ivf",
- "av1-1-b8-01-size-16x32.ivf", "av1-1-b8-01-size-16x34.ivf",
- "av1-1-b8-01-size-16x64.ivf", "av1-1-b8-01-size-16x66.ivf",
- "av1-1-b8-01-size-18x16.ivf", "av1-1-b8-01-size-18x18.ivf",
- "av1-1-b8-01-size-18x32.ivf", "av1-1-b8-01-size-18x34.ivf",
- "av1-1-b8-01-size-18x64.ivf", "av1-1-b8-01-size-18x66.ivf",
- "av1-1-b8-01-size-196x196.ivf", "av1-1-b8-01-size-196x198.ivf",
- "av1-1-b8-01-size-196x200.ivf", "av1-1-b8-01-size-196x202.ivf",
- "av1-1-b8-01-size-196x208.ivf", "av1-1-b8-01-size-196x210.ivf",
- "av1-1-b8-01-size-196x224.ivf", "av1-1-b8-01-size-196x226.ivf",
- "av1-1-b8-01-size-198x196.ivf", "av1-1-b8-01-size-198x198.ivf",
- "av1-1-b8-01-size-198x200.ivf", "av1-1-b8-01-size-198x202.ivf",
- "av1-1-b8-01-size-198x208.ivf", "av1-1-b8-01-size-198x210.ivf",
- "av1-1-b8-01-size-198x224.ivf", "av1-1-b8-01-size-198x226.ivf",
- "av1-1-b8-01-size-200x196.ivf", "av1-1-b8-01-size-200x198.ivf",
- "av1-1-b8-01-size-200x200.ivf", "av1-1-b8-01-size-200x202.ivf",
- "av1-1-b8-01-size-200x208.ivf", "av1-1-b8-01-size-200x210.ivf",
- "av1-1-b8-01-size-200x224.ivf", "av1-1-b8-01-size-200x226.ivf",
- "av1-1-b8-01-size-202x196.ivf", "av1-1-b8-01-size-202x198.ivf",
- "av1-1-b8-01-size-202x200.ivf", "av1-1-b8-01-size-202x202.ivf",
- "av1-1-b8-01-size-202x208.ivf", "av1-1-b8-01-size-202x210.ivf",
- "av1-1-b8-01-size-202x224.ivf", "av1-1-b8-01-size-202x226.ivf",
- "av1-1-b8-01-size-208x196.ivf", "av1-1-b8-01-size-208x198.ivf",
- "av1-1-b8-01-size-208x200.ivf", "av1-1-b8-01-size-208x202.ivf",
- "av1-1-b8-01-size-208x208.ivf", "av1-1-b8-01-size-208x210.ivf",
- "av1-1-b8-01-size-208x224.ivf", "av1-1-b8-01-size-208x226.ivf",
- "av1-1-b8-01-size-210x196.ivf", "av1-1-b8-01-size-210x198.ivf",
- "av1-1-b8-01-size-210x200.ivf", "av1-1-b8-01-size-210x202.ivf",
- "av1-1-b8-01-size-210x208.ivf", "av1-1-b8-01-size-210x210.ivf",
- "av1-1-b8-01-size-210x224.ivf", "av1-1-b8-01-size-210x226.ivf",
- "av1-1-b8-01-size-224x196.ivf", "av1-1-b8-01-size-224x198.ivf",
- "av1-1-b8-01-size-224x200.ivf", "av1-1-b8-01-size-224x202.ivf",
- "av1-1-b8-01-size-224x208.ivf", "av1-1-b8-01-size-224x210.ivf",
- "av1-1-b8-01-size-224x224.ivf", "av1-1-b8-01-size-224x226.ivf",
- "av1-1-b8-01-size-226x196.ivf", "av1-1-b8-01-size-226x198.ivf",
- "av1-1-b8-01-size-226x200.ivf", "av1-1-b8-01-size-226x202.ivf",
- "av1-1-b8-01-size-226x208.ivf", "av1-1-b8-01-size-226x210.ivf",
- "av1-1-b8-01-size-226x224.ivf", "av1-1-b8-01-size-226x226.ivf",
- "av1-1-b8-01-size-32x16.ivf", "av1-1-b8-01-size-32x18.ivf",
- "av1-1-b8-01-size-32x32.ivf", "av1-1-b8-01-size-32x34.ivf",
- "av1-1-b8-01-size-32x64.ivf", "av1-1-b8-01-size-32x66.ivf",
- "av1-1-b8-01-size-34x16.ivf", "av1-1-b8-01-size-34x18.ivf",
- "av1-1-b8-01-size-34x32.ivf", "av1-1-b8-01-size-34x34.ivf",
- "av1-1-b8-01-size-34x64.ivf", "av1-1-b8-01-size-34x66.ivf",
- "av1-1-b8-01-size-64x16.ivf", "av1-1-b8-01-size-64x18.ivf",
- "av1-1-b8-01-size-64x32.ivf", "av1-1-b8-01-size-64x34.ivf",
- "av1-1-b8-01-size-64x64.ivf", "av1-1-b8-01-size-64x66.ivf",
- "av1-1-b8-01-size-66x16.ivf", "av1-1-b8-01-size-66x18.ivf",
- "av1-1-b8-01-size-66x32.ivf", "av1-1-b8-01-size-66x34.ivf",
- "av1-1-b8-01-size-66x64.ivf", "av1-1-b8-01-size-66x66.ivf",
- "av1-1-b8-02-allintra.ivf", "av1-1-b8-03-sizedown.mkv",
- "av1-1-b8-03-sizeup.mkv"
-};
-const int kNumAV1TestVectors = NELEMENTS(kAV1TestVectors);
-#endif // CONFIG_AV1_DECODER
-
-} // namespace libaom_test
diff --git a/third_party/aom/test/test_vectors.h b/third_party/aom/test/test_vectors.h
deleted file mode 100644
index be37f6e37..000000000
--- a/third_party/aom/test/test_vectors.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_TEST_TEST_VECTORS_H_
-#define AOM_TEST_TEST_VECTORS_H_
-
-#include "config/aom_config.h"
-
-namespace libaom_test {
-
-#if CONFIG_AV1_DECODER
-extern const int kNumAV1TestVectors;
-extern const char *const kAV1TestVectors[];
-#endif
-
-} // namespace libaom_test
-
-#endif // AOM_TEST_TEST_VECTORS_H_
diff --git a/third_party/aom/test/tile_independence_test.cc b/third_party/aom/test/tile_independence_test.cc
deleted file mode 100644
index cf534c0c5..000000000
--- a/third_party/aom/test/tile_independence_test.cc
+++ /dev/null
@@ -1,173 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <cstdio>
-#include <cstdlib>
-#include <string>
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-#include "test/codec_factory.h"
-#include "test/encode_test_driver.h"
-#include "test/i420_video_source.h"
-#include "test/util.h"
-#include "test/md5_helper.h"
-#include "aom_mem/aom_mem.h"
-
-namespace {
-class TileIndependenceTest
- : public ::libaom_test::CodecTestWith3Params<int, int, int>,
- public ::libaom_test::EncoderTest {
- protected:
- TileIndependenceTest()
- : EncoderTest(GET_PARAM(0)), md5_fw_order_(), md5_inv_order_(),
- n_tile_cols_(GET_PARAM(1)), n_tile_rows_(GET_PARAM(2)),
- n_tile_groups_(GET_PARAM(3)) {
- init_flags_ = AOM_CODEC_USE_PSNR;
- aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t();
- cfg.w = 704;
- cfg.h = 576;
- cfg.threads = 1;
- cfg.allow_lowbitdepth = 1;
- fw_dec_ = codec_->CreateDecoder(cfg, 0);
- inv_dec_ = codec_->CreateDecoder(cfg, 0);
- inv_dec_->Control(AV1_INVERT_TILE_DECODE_ORDER, 1);
-
- if (fw_dec_->IsAV1() && inv_dec_->IsAV1()) {
- fw_dec_->Control(AV1_SET_DECODE_TILE_ROW, -1);
- fw_dec_->Control(AV1_SET_DECODE_TILE_COL, -1);
- inv_dec_->Control(AV1_SET_DECODE_TILE_ROW, -1);
- inv_dec_->Control(AV1_SET_DECODE_TILE_COL, -1);
- }
- }
-
- virtual ~TileIndependenceTest() {
- delete fw_dec_;
- delete inv_dec_;
- }
-
- virtual void SetUp() {
- InitializeConfig();
- SetMode(libaom_test::kTwoPassGood);
- }
-
- virtual void PreEncodeFrameHook(libaom_test::VideoSource *video,
- libaom_test::Encoder *encoder) {
- if (video->frame() == 1) {
- encoder->Control(AV1E_SET_TILE_COLUMNS, n_tile_cols_);
- encoder->Control(AV1E_SET_TILE_ROWS, n_tile_rows_);
- SetCpuUsed(encoder);
- } else if (video->frame() == 3) {
- encoder->Control(AV1E_SET_NUM_TG, n_tile_groups_);
- }
- }
-
- virtual void SetCpuUsed(libaom_test::Encoder *encoder) {
- static const int kCpuUsed = 3;
- encoder->Control(AOME_SET_CPUUSED, kCpuUsed);
- }
-
- void UpdateMD5(::libaom_test::Decoder *dec, const aom_codec_cx_pkt_t *pkt,
- ::libaom_test::MD5 *md5) {
- const aom_codec_err_t res = dec->DecodeFrame(
- reinterpret_cast<uint8_t *>(pkt->data.frame.buf), pkt->data.frame.sz);
- if (res != AOM_CODEC_OK) {
- abort_ = true;
- ASSERT_EQ(AOM_CODEC_OK, res);
- }
- const aom_image_t *img = dec->GetDxData().Next();
- md5->Add(img);
- }
-
- virtual void FramePktHook(const aom_codec_cx_pkt_t *pkt) {
- UpdateMD5(fw_dec_, pkt, &md5_fw_order_);
- UpdateMD5(inv_dec_, pkt, &md5_inv_order_);
- }
-
- void DoTest() {
- const aom_rational timebase = { 33333333, 1000000000 };
- cfg_.g_timebase = timebase;
- cfg_.rc_target_bitrate = 500;
- cfg_.g_lag_in_frames = 12;
- cfg_.rc_end_usage = AOM_VBR;
-
- libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 704, 576,
- timebase.den, timebase.num, 0, 5);
- ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-
- const char *md5_fw_str = md5_fw_order_.Get();
- const char *md5_inv_str = md5_inv_order_.Get();
- ASSERT_STREQ(md5_fw_str, md5_inv_str);
- }
-
- ::libaom_test::MD5 md5_fw_order_, md5_inv_order_;
- ::libaom_test::Decoder *fw_dec_, *inv_dec_;
-
- private:
- int n_tile_cols_;
- int n_tile_rows_;
- int n_tile_groups_;
-};
-
-// run an encode with 2 or 4 tiles, and do the decode both in normal and
-// inverted tile ordering. Ensure that the MD5 of the output in both cases
-// is identical. If so, tiles are considered independent and the test passes.
-TEST_P(TileIndependenceTest, MD5Match) {
- cfg_.large_scale_tile = 0;
- fw_dec_->Control(AV1_SET_TILE_MODE, 0);
- inv_dec_->Control(AV1_SET_TILE_MODE, 0);
- DoTest();
-}
-
-class TileIndependenceTestLarge : public TileIndependenceTest {
- virtual void SetCpuUsed(libaom_test::Encoder *encoder) {
- static const int kCpuUsed = 0;
- encoder->Control(AOME_SET_CPUUSED, kCpuUsed);
- }
-};
-
-TEST_P(TileIndependenceTestLarge, MD5Match) {
- cfg_.large_scale_tile = 0;
- fw_dec_->Control(AV1_SET_TILE_MODE, 0);
- inv_dec_->Control(AV1_SET_TILE_MODE, 0);
- DoTest();
-}
-
-AV1_INSTANTIATE_TEST_CASE(TileIndependenceTest, ::testing::Values(0, 1),
- ::testing::Values(0, 1), ::testing::Values(1, 2, 4));
-AV1_INSTANTIATE_TEST_CASE(TileIndependenceTestLarge, ::testing::Values(0, 1),
- ::testing::Values(0, 1), ::testing::Values(1, 2, 4));
-
-class TileIndependenceLSTest : public TileIndependenceTest {};
-
-TEST_P(TileIndependenceLSTest, MD5Match) {
- cfg_.large_scale_tile = 1;
- fw_dec_->Control(AV1_SET_TILE_MODE, 1);
- fw_dec_->Control(AV1D_EXT_TILE_DEBUG, 1);
- inv_dec_->Control(AV1_SET_TILE_MODE, 1);
- inv_dec_->Control(AV1D_EXT_TILE_DEBUG, 1);
- DoTest();
-}
-
-class TileIndependenceLSTestLarge : public TileIndependenceTestLarge {};
-
-TEST_P(TileIndependenceLSTestLarge, MD5Match) {
- cfg_.large_scale_tile = 1;
- fw_dec_->Control(AV1_SET_TILE_MODE, 1);
- fw_dec_->Control(AV1D_EXT_TILE_DEBUG, 1);
- inv_dec_->Control(AV1_SET_TILE_MODE, 1);
- inv_dec_->Control(AV1D_EXT_TILE_DEBUG, 1);
- DoTest();
-}
-
-AV1_INSTANTIATE_TEST_CASE(TileIndependenceLSTest, ::testing::Values(6),
- ::testing::Values(6), ::testing::Values(1));
-AV1_INSTANTIATE_TEST_CASE(TileIndependenceLSTestLarge, ::testing::Values(6),
- ::testing::Values(6), ::testing::Values(1));
-} // namespace
diff --git a/third_party/aom/test/tools_common.sh b/third_party/aom/test/tools_common.sh
deleted file mode 100755
index c08710606..000000000
--- a/third_party/aom/test/tools_common.sh
+++ /dev/null
@@ -1,477 +0,0 @@
-#!/bin/sh
-## Copyright (c) 2016, Alliance for Open Media. All rights reserved
-##
-## This source code is subject to the terms of the BSD 2 Clause License and
-## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-## was not distributed with this source code in the LICENSE file, you can
-## obtain it at www.aomedia.org/license/software. If the Alliance for Open
-## Media Patent License 1.0 was not distributed with this source code in the
-## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-##
-## This file contains shell code shared by test scripts for libaom tools.
-
-# Use $AOM_TEST_TOOLS_COMMON_SH as a pseudo include guard.
-if [ -z "${AOM_TEST_TOOLS_COMMON_SH}" ]; then
-AOM_TEST_TOOLS_COMMON_SH=included
-
-set -e
-devnull='> /dev/null 2>&1'
-AOM_TEST_PREFIX=""
-
-elog() {
- echo "$@" 1>&2
-}
-
-vlog() {
- if [ "${AOM_TEST_VERBOSE_OUTPUT}" = "yes" ]; then
- echo "$@"
- fi
-}
-
-# Sets $AOM_TOOL_TEST to the name specified by positional parameter one.
-test_begin() {
- AOM_TOOL_TEST="${1}"
-}
-
-# Clears the AOM_TOOL_TEST variable after confirming that $AOM_TOOL_TEST matches
-# positional parameter one.
-test_end() {
- if [ "$1" != "${AOM_TOOL_TEST}" ]; then
- echo "FAIL completed test mismatch!."
- echo " completed test: ${1}"
- echo " active test: ${AOM_TOOL_TEST}."
- return 1
- fi
- AOM_TOOL_TEST='<unset>'
-}
-
-# Echoes the target configuration being tested.
-test_configuration_target() {
- aom_config_c="${LIBAOM_CONFIG_PATH}/config/aom_config.c"
- # Clean up the cfg pointer line from aom_config.c for easier re-use by
- # someone examining a failure in the example tests.
- # 1. Run grep on aom_config.c for cfg and limit the results to 1.
- # 2. Split the line using ' = ' as separator.
- # 3. Abuse sed to consume the leading " and trailing "; from the assignment
- # to the cfg pointer.
- cmake_config=$(awk -F ' = ' '/cfg/ { print $NF; exit }' "${aom_config_c}" \
- | sed -e s/\"// -e s/\"\;//)
- echo cmake generated via command: cmake path/to/aom ${cmake_config}
-}
-
-# Trap function used for failure reports and tool output directory removal.
-# When the contents of $AOM_TOOL_TEST do not match the string '<unset>', reports
-# failure of test stored in $AOM_TOOL_TEST.
-cleanup() {
- if [ -n "${AOM_TOOL_TEST}" ] && [ "${AOM_TOOL_TEST}" != '<unset>' ]; then
- echo "FAIL: $AOM_TOOL_TEST"
- fi
- if [ "${AOM_TEST_PRESERVE_OUTPUT}" = "yes" ]; then
- return
- fi
- if [ -n "${AOM_TEST_OUTPUT_DIR}" ] && [ -d "${AOM_TEST_OUTPUT_DIR}" ]; then
- rm -rf "${AOM_TEST_OUTPUT_DIR}"
- fi
-}
-
-# Echoes the version string assigned to the VERSION_STRING_NOSP variable defined
-# in $LIBAOM_CONFIG_PATH/config/aom_version.h to stdout.
-cmake_version() {
- aom_version_h="${LIBAOM_CONFIG_PATH}/config/aom_version.h"
-
- # Find VERSION_STRING_NOSP line, split it with '"' and print the next to last
- # field to output the version string to stdout.
- aom_version=$(awk -F \" '/VERSION_STRING_NOSP/ {print $(NF-1)}' \
- "${aom_version_h}")
- echo "v${aom_version}"
-}
-
-# Echoes current git version as reported by running 'git describe', or the
-# version used by the cmake build when git is unavailable.
-source_version() {
- if git --version > /dev/null 2>&1; then
- (cd "$(dirname "${0}")"
- git describe)
- else
- cmake_version
- fi
-}
-
-# Echoes warnings to stdout when source version and CMake build generated
-# version are out of sync.
-check_version_strings() {
- cmake_version=$(cmake_version)
- source_version=$(source_version)
-
- if [ "${cmake_version}" != "${source_version}" ]; then
- echo "Warning: version has changed since last cmake run."
- vlog " cmake version: ${cmake_version} version now: ${source_version}"
- fi
-}
-
-# $1 is the name of an environment variable containing a directory name to
-# test.
-test_env_var_dir() {
- local dir=$(eval echo "\${$1}")
- if [ ! -d "${dir}" ]; then
- elog "'${dir}': No such directory"
- elog "The $1 environment variable must be set to a valid directory."
- return 1
- fi
-}
-
-# This script requires that the LIBAOM_BIN_PATH, LIBAOM_CONFIG_PATH, and
-# LIBAOM_TEST_DATA_PATH variables are in the environment: Confirm that
-# the variables are set and that they all evaluate to directory paths.
-verify_aom_test_environment() {
- test_env_var_dir "LIBAOM_BIN_PATH" \
- && test_env_var_dir "LIBAOM_CONFIG_PATH" \
- && test_env_var_dir "LIBAOM_TEST_DATA_PATH"
-}
-
-# Greps aom_config.h in LIBAOM_CONFIG_PATH for positional parameter one, which
-# should be a LIBAOM preprocessor flag. Echoes yes to stdout when the feature
-# is available.
-aom_config_option_enabled() {
- aom_config_option="${1}"
- aom_config_file="${LIBAOM_CONFIG_PATH}/config/aom_config.h"
- config_line=$(grep "${aom_config_option}" "${aom_config_file}")
- if echo "${config_line}" | egrep -q '1$'; then
- echo yes
- fi
-}
-
-# Echoes yes when output of test_configuration_target() contains win32 or win64.
-is_windows_target() {
- if test_configuration_target \
- | grep -q -e win32 -e win64 > /dev/null 2>&1; then
- echo yes
- fi
-}
-
-# Echoes path to $1 when it's executable and exists in one of the directories
-# included in $tool_paths, or an empty string. Caller is responsible for testing
-# the string once the function returns.
-aom_tool_path() {
- local tool_name="$1"
- local root_path="${LIBAOM_BIN_PATH}"
- local suffix="${AOM_TEST_EXE_SUFFIX}"
- local tool_paths="\
- ${root_path}/${tool_name}${suffix} \
- ${root_path}/../${tool_name}${suffix} \
- ${root_path}/tools/${tool_name}${suffix} \
- ${root_path}/../tools/${tool_name}${suffix}"
-
- local toolpath=""
-
- for tool_path in ${tool_paths}; do
- if [ -x "${tool_path}" ] && [ -f "${tool_path}" ]; then
- echo "${tool_path}"
- return 0
- fi
- done
-
- return 1
-}
-
-# Echoes yes to stdout when the file named by positional parameter one exists
-# in LIBAOM_BIN_PATH, and is executable.
-aom_tool_available() {
- local tool_name="$1"
- local tool="${LIBAOM_BIN_PATH}/${tool_name}${AOM_TEST_EXE_SUFFIX}"
- [ -x "${tool}" ] && echo yes
-}
-
-# Echoes yes to stdout when aom_config_option_enabled() reports yes for
-# CONFIG_AV1_DECODER.
-av1_decode_available() {
- [ "$(aom_config_option_enabled CONFIG_AV1_DECODER)" = "yes" ] && echo yes
-}
-
-# Echoes yes to stdout when aom_config_option_enabled() reports yes for
-# CONFIG_AV1_ENCODER.
-av1_encode_available() {
- [ "$(aom_config_option_enabled CONFIG_AV1_ENCODER)" = "yes" ] && echo yes
-}
-
-# Echoes "fast" encode params for use with aomenc.
-aomenc_encode_test_fast_params() {
- echo "--cpu-used=1
- --limit=${AV1_ENCODE_TEST_FRAME_LIMIT}
- --lag-in-frames=0
- --test-decode=fatal"
-}
-
-# Echoes yes to stdout when aom_config_option_enabled() reports yes for
-# CONFIG_WEBM_IO.
-webm_io_available() {
- [ "$(aom_config_option_enabled CONFIG_WEBM_IO)" = "yes" ] && echo yes
-}
-
-# Filters strings from $1 using the filter specified by $2. Filter behavior
-# depends on the presence of $3. When $3 is present, strings that match the
-# filter are excluded. When $3 is omitted, strings matching the filter are
-# included.
-# The filtered result is echoed to stdout.
-filter_strings() {
- strings=${1}
- filter=${2}
- exclude=${3}
-
- if [ -n "${exclude}" ]; then
- # When positional parameter three exists the caller wants to remove strings.
- # Tell grep to invert matches using the -v argument.
- exclude='-v'
- else
- unset exclude
- fi
-
- if [ -n "${filter}" ]; then
- for s in ${strings}; do
- if echo "${s}" | egrep -q ${exclude} "${filter}" > /dev/null 2>&1; then
- filtered_strings="${filtered_strings} ${s}"
- fi
- done
- else
- filtered_strings="${strings}"
- fi
- echo "${filtered_strings}"
-}
-
-# Runs user test functions passed via positional parameters one and two.
-# Functions in positional parameter one are treated as environment verification
-# functions and are run unconditionally. Functions in positional parameter two
-# are run according to the rules specified in aom_test_usage().
-run_tests() {
- local env_tests="verify_aom_test_environment $1"
- local tests_to_filter="$2"
- local test_name="${AOM_TEST_NAME}"
-
- if [ -z "${test_name}" ]; then
- test_name="$(basename "${0%.*}")"
- fi
-
- if [ "${AOM_TEST_RUN_DISABLED_TESTS}" != "yes" ]; then
- # Filter out DISABLED tests.
- tests_to_filter=$(filter_strings "${tests_to_filter}" ^DISABLED exclude)
- fi
-
- if [ -n "${AOM_TEST_FILTER}" ]; then
- # Remove tests not matching the user's filter.
- tests_to_filter=$(filter_strings "${tests_to_filter}" ${AOM_TEST_FILTER})
- fi
-
- # User requested test listing: Dump test names and return.
- if [ "${AOM_TEST_LIST_TESTS}" = "yes" ]; then
- for test_name in $tests_to_filter; do
- echo ${test_name}
- done
- return
- fi
-
- # Don't bother with the environment tests if everything else was disabled.
- [ -z "${tests_to_filter}" ] && return
-
- # Combine environment and actual tests.
- local tests_to_run="${env_tests} ${tests_to_filter}"
-
- check_version_strings
-
- # Run tests.
- for test in ${tests_to_run}; do
- test_begin "${test}"
- vlog " RUN ${test}"
- "${test}"
- vlog " PASS ${test}"
- test_end "${test}"
- done
-
- local tested_config="$(test_configuration_target) @ $(source_version)"
- echo "${test_name}: Done, all tests pass for ${tested_config}."
-}
-
-aom_test_usage() {
-cat << EOF
- Usage: ${0##*/} [arguments]
- --bin-path <path to libaom binaries directory>
- --config-path <path to libaom config directory>
- --filter <filter>: User test filter. Only tests matching filter are run.
- --run-disabled-tests: Run disabled tests.
- --help: Display this message and exit.
- --test-data-path <path to libaom test data directory>
- --show-program-output: Shows output from all programs being tested.
- --prefix: Allows for a user specified prefix to be inserted before all test
- programs. Grants the ability, for example, to run test programs
- within valgrind.
- --list-tests: List all test names and exit without actually running tests.
- --verbose: Verbose output.
-
- When the --bin-path option is not specified the script attempts to use
- \$LIBAOM_BIN_PATH and then the current directory.
-
- When the --config-path option is not specified the script attempts to use
- \$LIBAOM_CONFIG_PATH and then the current directory.
-
- When the -test-data-path option is not specified the script attempts to use
- \$LIBAOM_TEST_DATA_PATH and then the current directory.
-EOF
-}
-
-# Returns non-zero (failure) when required environment variables are empty
-# strings.
-aom_test_check_environment() {
- if [ -z "${LIBAOM_BIN_PATH}" ] || \
- [ -z "${LIBAOM_CONFIG_PATH}" ] || \
- [ -z "${LIBAOM_TEST_DATA_PATH}" ]; then
- return 1
- fi
-}
-
-# Echo aomenc command line parameters allowing use of a raw yuv file as
-# input to aomenc.
-yuv_raw_input() {
- echo ""${YUV_RAW_INPUT}"
- --width="${YUV_RAW_INPUT_WIDTH}"
- --height="${YUV_RAW_INPUT_HEIGHT}""
-}
-
-# Do a small encode for testing decoders.
-encode_yuv_raw_input_av1() {
- if [ "$(av1_encode_available)" = "yes" ]; then
- local output="$1"
- local encoder="$(aom_tool_path aomenc)"
- shift
- eval "${encoder}" $(yuv_raw_input) \
- $(aomenc_encode_test_fast_params) \
- --output="${output}" \
- $@ \
- ${devnull}
-
- if [ ! -e "${output}" ]; then
- elog "Output file does not exist."
- return 1
- fi
- fi
-}
-
-# Parse the command line.
-while [ -n "$1" ]; do
- case "$1" in
- --bin-path)
- LIBAOM_BIN_PATH="$2"
- shift
- ;;
- --config-path)
- LIBAOM_CONFIG_PATH="$2"
- shift
- ;;
- --filter)
- AOM_TEST_FILTER="$2"
- shift
- ;;
- --run-disabled-tests)
- AOM_TEST_RUN_DISABLED_TESTS=yes
- ;;
- --help)
- aom_test_usage
- exit
- ;;
- --test-data-path)
- LIBAOM_TEST_DATA_PATH="$2"
- shift
- ;;
- --prefix)
- AOM_TEST_PREFIX="$2"
- shift
- ;;
- --verbose)
- AOM_TEST_VERBOSE_OUTPUT=yes
- ;;
- --show-program-output)
- devnull=
- ;;
- --list-tests)
- AOM_TEST_LIST_TESTS=yes
- ;;
- *)
- aom_test_usage
- exit 1
- ;;
- esac
- shift
-done
-
-# Handle running the tests from a build directory without arguments when running
-# the tests on *nix/macosx.
-LIBAOM_BIN_PATH="${LIBAOM_BIN_PATH:-.}"
-LIBAOM_CONFIG_PATH="${LIBAOM_CONFIG_PATH:-.}"
-LIBAOM_TEST_DATA_PATH="${LIBAOM_TEST_DATA_PATH:-.}"
-
-# Create a temporary directory for output files, and a trap to clean it up.
-if [ -n "${TMPDIR}" ]; then
- AOM_TEST_TEMP_ROOT="${TMPDIR}"
-elif [ -n "${TEMPDIR}" ]; then
- AOM_TEST_TEMP_ROOT="${TEMPDIR}"
-else
- AOM_TEST_TEMP_ROOT=/tmp
-fi
-
-AOM_TEST_OUTPUT_DIR="${AOM_TEST_OUTPUT_DIR:-${AOM_TEST_TEMP_ROOT}/aom_test_$$}"
-
-if ! mkdir -p "${AOM_TEST_OUTPUT_DIR}" || \
- [ ! -d "${AOM_TEST_OUTPUT_DIR}" ]; then
- echo "${0##*/}: Cannot create output directory, giving up."
- echo "${0##*/}: AOM_TEST_OUTPUT_DIR=${AOM_TEST_OUTPUT_DIR}"
- exit 1
-fi
-
-AOM_TEST_PRESERVE_OUTPUT=${AOM_TEST_PRESERVE_OUTPUT:-no}
-
-if [ "$(is_windows_target)" = "yes" ]; then
- AOM_TEST_EXE_SUFFIX=".exe"
-fi
-
-# Variables shared by tests.
-AV1_ENCODE_CPU_USED=${AV1_ENCODE_CPU_USED:-1}
-AV1_ENCODE_TEST_FRAME_LIMIT=${AV1_ENCODE_TEST_FRAME_LIMIT:-5}
-AV1_IVF_FILE="${AV1_IVF_FILE:-${AOM_TEST_OUTPUT_DIR}/av1.ivf}"
-AV1_OBU_ANNEXB_FILE="${AV1_OBU_ANNEXB_FILE:-${AOM_TEST_OUTPUT_DIR}/av1.annexb.obu}"
-AV1_OBU_SEC5_FILE="${AV1_OBU_SEC5_FILE:-${AOM_TEST_OUTPUT_DIR}/av1.section5.obu}"
-AV1_WEBM_FILE="${AV1_WEBM_FILE:-${AOM_TEST_OUTPUT_DIR}/av1.webm}"
-
-YUV_RAW_INPUT="${LIBAOM_TEST_DATA_PATH}/hantro_collage_w352h288.yuv"
-YUV_RAW_INPUT_WIDTH=352
-YUV_RAW_INPUT_HEIGHT=288
-
-Y4M_NOSQ_PAR_INPUT="${LIBAOM_TEST_DATA_PATH}/park_joy_90p_8_420_a10-1.y4m"
-Y4M_720P_INPUT="${LIBAOM_TEST_DATA_PATH}/niklas_1280_720_30.y4m"
-
-# Setup a trap function to clean up after tests complete.
-trap cleanup EXIT
-
-vlog "$(basename "${0%.*}") test configuration:
- LIBAOM_BIN_PATH=${LIBAOM_BIN_PATH}
- LIBAOM_CONFIG_PATH=${LIBAOM_CONFIG_PATH}
- LIBAOM_TEST_DATA_PATH=${LIBAOM_TEST_DATA_PATH}
- AOM_TEST_EXE_SUFFIX=${AOM_TEST_EXE_SUFFIX}
- AOM_TEST_FILTER=${AOM_TEST_FILTER}
- AOM_TEST_LIST_TESTS=${AOM_TEST_LIST_TESTS}
- AOM_TEST_OUTPUT_DIR=${AOM_TEST_OUTPUT_DIR}
- AOM_TEST_PREFIX=${AOM_TEST_PREFIX}
- AOM_TEST_PRESERVE_OUTPUT=${AOM_TEST_PRESERVE_OUTPUT}
- AOM_TEST_RUN_DISABLED_TESTS=${AOM_TEST_RUN_DISABLED_TESTS}
- AOM_TEST_SHOW_PROGRAM_OUTPUT=${AOM_TEST_SHOW_PROGRAM_OUTPUT}
- AOM_TEST_TEMP_ROOT=${AOM_TEST_TEMP_ROOT}
- AOM_TEST_VERBOSE_OUTPUT=${AOM_TEST_VERBOSE_OUTPUT}
- AV1_ENCODE_CPU_USED=${AV1_ENCODE_CPU_USED}
- AV1_ENCODE_TEST_FRAME_LIMIT=${AV1_ENCODE_TEST_FRAME_LIMIT}
- AV1_IVF_FILE=${AV1_IVF_FILE}
- AV1_OBU_ANNEXB_FILE=${AV1_OBU_ANNEXB_FILE}
- AV1_OBU_SEC5_FILE=${AV1_OBU_SEC5_FILE}
- AV1_WEBM_FILE=${AV1_WEBM_FILE}
- YUV_RAW_INPUT=${YUV_RAW_INPUT}
- YUV_RAW_INPUT_WIDTH=${YUV_RAW_INPUT_WIDTH}
- YUV_RAW_INPUT_HEIGHT=${YUV_RAW_INPUT_HEIGHT}
- Y4M_NOSQ_PAR_INPUT=${Y4M_NOSQ_PAR_INPUT}"
-
-fi # End $AOM_TEST_TOOLS_COMMON_SH pseudo include guard.
diff --git a/third_party/aom/test/transform_test_base.h b/third_party/aom/test/transform_test_base.h
deleted file mode 100644
index 8ebcf5ff7..000000000
--- a/third_party/aom/test/transform_test_base.h
+++ /dev/null
@@ -1,342 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_TEST_TRANSFORM_TEST_BASE_H_
-#define AOM_TEST_TRANSFORM_TEST_BASE_H_
-
-#include "config/aom_config.h"
-
-#include "aom_mem/aom_mem.h"
-#include "aom/aom_codec.h"
-#include "aom_dsp/txfm_common.h"
-
-namespace libaom_test {
-
-// Note:
-// Same constant are defined in av1/common/av1_entropy.h and
-// av1/common/entropy.h. Goal is to make this base class
-// to use for future codec transform testing. But including
-// either of them would lead to compiling error when we do
-// unit test for another codec. Suggest to move the definition
-// to a aom header file.
-const int kDctMaxValue = 16384;
-
-typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
- TxfmParam *txfm_param);
-
-typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
- const TxfmParam *txfm_param);
-
-class TransformTestBase {
- public:
- virtual ~TransformTestBase() {}
-
- protected:
- virtual void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) = 0;
-
- virtual void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) = 0;
-
- void RunAccuracyCheck(uint32_t ref_max_error, double ref_avg_error) {
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- uint32_t max_error = 0;
- int64_t total_error = 0;
- const int count_test_block = 10000;
-
- int16_t *test_input_block = reinterpret_cast<int16_t *>(
- aom_memalign(16, sizeof(int16_t) * num_coeffs_));
- tran_low_t *test_temp_block = reinterpret_cast<tran_low_t *>(
- aom_memalign(16, sizeof(tran_low_t) * num_coeffs_));
- uint8_t *dst = reinterpret_cast<uint8_t *>(
- aom_memalign(16, sizeof(uint8_t) * num_coeffs_));
- uint8_t *src = reinterpret_cast<uint8_t *>(
- aom_memalign(16, sizeof(uint8_t) * num_coeffs_));
- uint16_t *dst16 = reinterpret_cast<uint16_t *>(
- aom_memalign(16, sizeof(uint16_t) * num_coeffs_));
- uint16_t *src16 = reinterpret_cast<uint16_t *>(
- aom_memalign(16, sizeof(uint16_t) * num_coeffs_));
-
- for (int i = 0; i < count_test_block; ++i) {
- // Initialize a test block with input range [-255, 255].
- for (int j = 0; j < num_coeffs_; ++j) {
- if (bit_depth_ == AOM_BITS_8) {
- src[j] = rnd.Rand8();
- dst[j] = rnd.Rand8();
- test_input_block[j] = src[j] - dst[j];
- } else {
- src16[j] = rnd.Rand16() & mask_;
- dst16[j] = rnd.Rand16() & mask_;
- test_input_block[j] = src16[j] - dst16[j];
- }
- }
-
- ASM_REGISTER_STATE_CHECK(
- RunFwdTxfm(test_input_block, test_temp_block, pitch_));
- if (bit_depth_ == AOM_BITS_8) {
- ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
- } else {
- ASM_REGISTER_STATE_CHECK(
- RunInvTxfm(test_temp_block, CONVERT_TO_BYTEPTR(dst16), pitch_));
- }
-
- for (int j = 0; j < num_coeffs_; ++j) {
- const int diff =
- bit_depth_ == AOM_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
- const uint32_t error = diff * diff;
- if (max_error < error) max_error = error;
- total_error += error;
- }
- }
-
- double avg_error = total_error * 1. / count_test_block / num_coeffs_;
-
- EXPECT_GE(ref_max_error, max_error)
- << "Error: FHT/IHT has an individual round trip error > "
- << ref_max_error;
-
- EXPECT_GE(ref_avg_error, avg_error)
- << "Error: FHT/IHT has average round trip error > " << ref_avg_error
- << " per block";
-
- aom_free(test_input_block);
- aom_free(test_temp_block);
- aom_free(dst);
- aom_free(src);
- aom_free(dst16);
- aom_free(src16);
- }
-
- void RunCoeffCheck() {
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- const int count_test_block = 5000;
-
- // Use a stride value which is not the width of any transform, to catch
- // cases where the transforms use the stride incorrectly.
- int stride = 96;
-
- int16_t *input_block = reinterpret_cast<int16_t *>(
- aom_memalign(16, sizeof(int16_t) * stride * height_));
- tran_low_t *output_ref_block = reinterpret_cast<tran_low_t *>(
- aom_memalign(16, sizeof(tran_low_t) * num_coeffs_));
- tran_low_t *output_block = reinterpret_cast<tran_low_t *>(
- aom_memalign(16, sizeof(tran_low_t) * num_coeffs_));
-
- for (int i = 0; i < count_test_block; ++i) {
- int j, k;
- for (j = 0; j < height_; ++j) {
- for (k = 0; k < pitch_; ++k) {
- int in_idx = j * stride + k;
- int out_idx = j * pitch_ + k;
- input_block[in_idx] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
- if (bit_depth_ == AOM_BITS_8) {
- output_block[out_idx] = output_ref_block[out_idx] = rnd.Rand8();
- } else {
- output_block[out_idx] = output_ref_block[out_idx] =
- rnd.Rand16() & mask_;
- }
- }
- }
-
- fwd_txfm_ref(input_block, output_ref_block, stride, &txfm_param_);
- ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_block, output_block, stride));
-
- // The minimum quant value is 4.
- for (j = 0; j < height_; ++j) {
- for (k = 0; k < pitch_; ++k) {
- int out_idx = j * pitch_ + k;
- ASSERT_EQ(output_block[out_idx], output_ref_block[out_idx])
- << "Error: not bit-exact result at index: " << out_idx
- << " at test block: " << i;
- }
- }
- }
- aom_free(input_block);
- aom_free(output_ref_block);
- aom_free(output_block);
- }
-
- void RunInvCoeffCheck() {
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- const int count_test_block = 5000;
-
- // Use a stride value which is not the width of any transform, to catch
- // cases where the transforms use the stride incorrectly.
- int stride = 96;
-
- int16_t *input_block = reinterpret_cast<int16_t *>(
- aom_memalign(16, sizeof(int16_t) * num_coeffs_));
- tran_low_t *trans_block = reinterpret_cast<tran_low_t *>(
- aom_memalign(16, sizeof(tran_low_t) * num_coeffs_));
- uint8_t *output_block = reinterpret_cast<uint8_t *>(
- aom_memalign(16, sizeof(uint8_t) * stride * height_));
- uint8_t *output_ref_block = reinterpret_cast<uint8_t *>(
- aom_memalign(16, sizeof(uint8_t) * stride * height_));
-
- for (int i = 0; i < count_test_block; ++i) {
- // Initialize a test block with input range [-mask_, mask_].
- int j, k;
- for (j = 0; j < height_; ++j) {
- for (k = 0; k < pitch_; ++k) {
- int in_idx = j * pitch_ + k;
- int out_idx = j * stride + k;
- input_block[in_idx] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
- output_ref_block[out_idx] = rnd.Rand16() & mask_;
- output_block[out_idx] = output_ref_block[out_idx];
- }
- }
-
- fwd_txfm_ref(input_block, trans_block, pitch_, &txfm_param_);
-
- inv_txfm_ref(trans_block, output_ref_block, stride, &txfm_param_);
- ASM_REGISTER_STATE_CHECK(RunInvTxfm(trans_block, output_block, stride));
-
- for (j = 0; j < height_; ++j) {
- for (k = 0; k < pitch_; ++k) {
- int out_idx = j * stride + k;
- ASSERT_EQ(output_block[out_idx], output_ref_block[out_idx])
- << "Error: not bit-exact result at index: " << out_idx
- << " j = " << j << " k = " << k << " at test block: " << i;
- }
- }
- }
- aom_free(input_block);
- aom_free(trans_block);
- aom_free(output_ref_block);
- aom_free(output_block);
- }
-
- void RunMemCheck() {
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- const int count_test_block = 5000;
-
- int16_t *input_extreme_block = reinterpret_cast<int16_t *>(
- aom_memalign(16, sizeof(int16_t) * num_coeffs_));
- tran_low_t *output_ref_block = reinterpret_cast<tran_low_t *>(
- aom_memalign(16, sizeof(tran_low_t) * num_coeffs_));
- tran_low_t *output_block = reinterpret_cast<tran_low_t *>(
- aom_memalign(16, sizeof(tran_low_t) * num_coeffs_));
-
- for (int i = 0; i < count_test_block; ++i) {
- // Initialize a test block with input range [-mask_, mask_].
- for (int j = 0; j < num_coeffs_; ++j) {
- input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_;
- }
- if (i == 0) {
- for (int j = 0; j < num_coeffs_; ++j) input_extreme_block[j] = mask_;
- } else if (i == 1) {
- for (int j = 0; j < num_coeffs_; ++j) input_extreme_block[j] = -mask_;
- }
-
- fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, &txfm_param_);
- ASM_REGISTER_STATE_CHECK(
- RunFwdTxfm(input_extreme_block, output_block, pitch_));
-
- int row_length = FindRowLength();
- // The minimum quant value is 4.
- for (int j = 0; j < num_coeffs_; ++j) {
- ASSERT_EQ(output_block[j], output_ref_block[j])
- << "Not bit-exact at test index: " << i << ", "
- << "j = " << j << std::endl;
- EXPECT_GE(row_length * kDctMaxValue << (bit_depth_ - 8),
- abs(output_block[j]))
- << "Error: NxN FDCT has coefficient larger than N*DCT_MAX_VALUE";
- }
- }
- aom_free(input_extreme_block);
- aom_free(output_ref_block);
- aom_free(output_block);
- }
-
- void RunInvAccuracyCheck(int limit) {
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- const int count_test_block = 1000;
-
- int16_t *in = reinterpret_cast<int16_t *>(
- aom_memalign(16, sizeof(int16_t) * num_coeffs_));
- tran_low_t *coeff = reinterpret_cast<tran_low_t *>(
- aom_memalign(16, sizeof(tran_low_t) * num_coeffs_));
- uint8_t *dst = reinterpret_cast<uint8_t *>(
- aom_memalign(16, sizeof(uint8_t) * num_coeffs_));
- uint8_t *src = reinterpret_cast<uint8_t *>(
- aom_memalign(16, sizeof(uint8_t) * num_coeffs_));
-
- uint16_t *dst16 = reinterpret_cast<uint16_t *>(
- aom_memalign(16, sizeof(uint16_t) * num_coeffs_));
- uint16_t *src16 = reinterpret_cast<uint16_t *>(
- aom_memalign(16, sizeof(uint16_t) * num_coeffs_));
-
- for (int i = 0; i < count_test_block; ++i) {
- // Initialize a test block with input range [-mask_, mask_].
- for (int j = 0; j < num_coeffs_; ++j) {
- if (bit_depth_ == AOM_BITS_8) {
- src[j] = rnd.Rand8();
- dst[j] = rnd.Rand8();
- in[j] = src[j] - dst[j];
- } else {
- src16[j] = rnd.Rand16() & mask_;
- dst16[j] = rnd.Rand16() & mask_;
- in[j] = src16[j] - dst16[j];
- }
- }
-
- fwd_txfm_ref(in, coeff, pitch_, &txfm_param_);
-
- if (bit_depth_ == AOM_BITS_8) {
- ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
- } else {
- ASM_REGISTER_STATE_CHECK(
- RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), pitch_));
- }
-
- for (int j = 0; j < num_coeffs_; ++j) {
- const int diff =
- bit_depth_ == AOM_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
- const uint32_t error = diff * diff;
- ASSERT_GE(static_cast<uint32_t>(limit), error)
- << "Error: 4x4 IDCT has error " << error << " at index " << j;
- }
- }
- aom_free(in);
- aom_free(coeff);
- aom_free(dst);
- aom_free(src);
- aom_free(src16);
- aom_free(dst16);
- }
-
- int pitch_;
- int height_;
- FhtFunc fwd_txfm_ref;
- IhtFunc inv_txfm_ref;
- aom_bit_depth_t bit_depth_;
- int mask_;
- int num_coeffs_;
- TxfmParam txfm_param_;
-
- private:
- // Assume transform size is 4x4, 8x8, 16x16,...
- int FindRowLength() const {
- int row = 4;
- if (16 == num_coeffs_) {
- row = 4;
- } else if (64 == num_coeffs_) {
- row = 8;
- } else if (256 == num_coeffs_) {
- row = 16;
- } else if (1024 == num_coeffs_) {
- row = 32;
- }
- return row;
- }
-};
-
-} // namespace libaom_test
-
-#endif // AOM_TEST_TRANSFORM_TEST_BASE_H_
diff --git a/third_party/aom/test/twopass_encoder.sh b/third_party/aom/test/twopass_encoder.sh
deleted file mode 100755
index cca44ced8..000000000
--- a/third_party/aom/test/twopass_encoder.sh
+++ /dev/null
@@ -1,54 +0,0 @@
-#!/bin/sh
-## Copyright (c) 2016, Alliance for Open Media. All rights reserved
-##
-## This source code is subject to the terms of the BSD 2 Clause License and
-## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-## was not distributed with this source code in the LICENSE file, you can
-## obtain it at www.aomedia.org/license/software. If the Alliance for Open
-## Media Patent License 1.0 was not distributed with this source code in the
-## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-##
-## This file tests the libaom twopass_encoder example. To add new tests to this
-## file, do the following:
-## 1. Write a shell function (this is your test).
-## 2. Add the function to twopass_encoder_tests (on a new line).
-##
-. $(dirname $0)/tools_common.sh
-
-# Environment check: $YUV_RAW_INPUT is required.
-twopass_encoder_verify_environment() {
- if [ ! -e "${YUV_RAW_INPUT}" ]; then
- echo "Libaom test data must exist in LIBAOM_TEST_DATA_PATH."
- return 1
- fi
-}
-
-# Runs twopass_encoder using the codec specified by $1 with a frame limit of
-# 100.
-twopass_encoder() {
- local encoder="$(aom_tool_path twopass_encoder)"
- local codec="$1"
- local output_file="${AOM_TEST_OUTPUT_DIR}/twopass_encoder_${codec}.ivf"
- local limit=7
-
- if [ ! -x "${encoder}" ]; then
- elog "${encoder} does not exist or is not executable."
- return 1
- fi
-
- eval "${AOM_TEST_PREFIX}" "${encoder}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" \
- "${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" "${limit}" \
- ${devnull}
-
- [ -e "${output_file}" ] || return 1
-}
-
-twopass_encoder_av1() {
- if [ "$(av1_encode_available)" = "yes" ]; then
- twopass_encoder av1 || return 1
- fi
-}
-
-twopass_encoder_tests="twopass_encoder_av1"
-
-run_tests twopass_encoder_verify_environment "${twopass_encoder_tests}"
diff --git a/third_party/aom/test/util.h b/third_party/aom/test/util.h
deleted file mode 100644
index c3f4e4442..000000000
--- a/third_party/aom/test/util.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_TEST_UTIL_H_
-#define AOM_TEST_UTIL_H_
-
-#include <stdio.h>
-#include <math.h>
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-#include "aom/aom_integer.h"
-#include "aom/aom_image.h"
-#include "aom_ports/aom_timer.h"
-
-// Macros
-#define GET_PARAM(k) ::testing::get<k>(GetParam())
-
-inline double compute_psnr(const aom_image_t *img1, const aom_image_t *img2) {
- assert((img1->fmt == img2->fmt) && (img1->d_w == img2->d_w) &&
- (img1->d_h == img2->d_h));
-
- const unsigned int width_y = img1->d_w;
- const unsigned int height_y = img1->d_h;
- unsigned int i, j;
-
- int64_t sqrerr = 0;
- for (i = 0; i < height_y; ++i)
- for (j = 0; j < width_y; ++j) {
- int64_t d = img1->planes[AOM_PLANE_Y][i * img1->stride[AOM_PLANE_Y] + j] -
- img2->planes[AOM_PLANE_Y][i * img2->stride[AOM_PLANE_Y] + j];
- sqrerr += d * d;
- }
- double mse = static_cast<double>(sqrerr) / (width_y * height_y);
- double psnr = 100.0;
- if (mse > 0.0) {
- psnr = 10 * log10(255.0 * 255.0 / mse);
- }
- return psnr;
-}
-
-static INLINE double get_time_mark(aom_usec_timer *t) {
- aom_usec_timer_mark(t);
- return static_cast<double>(aom_usec_timer_elapsed(t));
-}
-
-#endif // AOM_TEST_UTIL_H_
diff --git a/third_party/aom/test/variance_test.cc b/third_party/aom/test/variance_test.cc
deleted file mode 100644
index 0df314b0f..000000000
--- a/third_party/aom/test/variance_test.cc
+++ /dev/null
@@ -1,2064 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <cstdlib>
-#include <new>
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "aom/aom_codec.h"
-#include "aom/aom_integer.h"
-#include "aom_mem/aom_mem.h"
-#include "aom_ports/aom_timer.h"
-#include "aom_ports/mem.h"
-
-namespace {
-
-typedef unsigned int (*VarianceMxNFunc)(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride,
- unsigned int *sse);
-typedef unsigned int (*SubpixVarMxNFunc)(const uint8_t *a, int a_stride,
- int xoffset, int yoffset,
- const uint8_t *b, int b_stride,
- unsigned int *sse);
-typedef unsigned int (*SubpixAvgVarMxNFunc)(const uint8_t *a, int a_stride,
- int xoffset, int yoffset,
- const uint8_t *b, int b_stride,
- uint32_t *sse,
- const uint8_t *second_pred);
-typedef unsigned int (*Get4x4SseFunc)(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride);
-typedef unsigned int (*SumOfSquaresFunction)(const int16_t *src);
-typedef unsigned int (*JntSubpixAvgVarMxNFunc)(
- const uint8_t *a, int a_stride, int xoffset, int yoffset, const uint8_t *b,
- int b_stride, uint32_t *sse, const uint8_t *second_pred,
- const JNT_COMP_PARAMS *jcp_param);
-typedef uint32_t (*ObmcSubpelVarFunc)(const uint8_t *pre, int pre_stride,
- int xoffset, int yoffset,
- const int32_t *wsrc, const int32_t *mask,
- unsigned int *sse);
-
-using libaom_test::ACMRandom;
-
-// Truncate high bit depth results by downshifting (with rounding) by:
-// 2 * (bit_depth - 8) for sse
-// (bit_depth - 8) for se
-static void RoundHighBitDepth(int bit_depth, int64_t *se, uint64_t *sse) {
- switch (bit_depth) {
- case AOM_BITS_12:
- *sse = (*sse + 128) >> 8;
- *se = (*se + 8) >> 4;
- break;
- case AOM_BITS_10:
- *sse = (*sse + 8) >> 4;
- *se = (*se + 2) >> 2;
- break;
- case AOM_BITS_8:
- default: break;
- }
-}
-
-static unsigned int mb_ss_ref(const int16_t *src) {
- unsigned int res = 0;
- for (int i = 0; i < 256; ++i) {
- res += src[i] * src[i];
- }
- return res;
-}
-
-/* Note:
- * Our codebase calculates the "diff" value in the variance algorithm by
- * (src - ref).
- */
-static uint32_t variance_ref(const uint8_t *src, const uint8_t *ref, int l2w,
- int l2h, int src_stride, int ref_stride,
- uint32_t *sse_ptr, bool use_high_bit_depth_,
- aom_bit_depth_t bit_depth) {
- int64_t se = 0;
- uint64_t sse = 0;
- const int w = 1 << l2w;
- const int h = 1 << l2h;
- for (int y = 0; y < h; y++) {
- for (int x = 0; x < w; x++) {
- int diff;
- if (!use_high_bit_depth_) {
- diff = src[y * src_stride + x] - ref[y * ref_stride + x];
- se += diff;
- sse += diff * diff;
- } else {
- diff = CONVERT_TO_SHORTPTR(src)[y * src_stride + x] -
- CONVERT_TO_SHORTPTR(ref)[y * ref_stride + x];
- se += diff;
- sse += diff * diff;
- }
- }
- }
- RoundHighBitDepth(bit_depth, &se, &sse);
- *sse_ptr = static_cast<uint32_t>(sse);
- return static_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h)));
-}
-
-/* The subpel reference functions differ from the codec version in one aspect:
- * they calculate the bilinear factors directly instead of using a lookup table
- * and therefore upshift xoff and yoff by 1. Only every other calculated value
- * is used so the codec version shrinks the table to save space and maintain
- * compatibility with vp8.
- */
-static uint32_t subpel_variance_ref(const uint8_t *ref, const uint8_t *src,
- int l2w, int l2h, int xoff, int yoff,
- uint32_t *sse_ptr, bool use_high_bit_depth_,
- aom_bit_depth_t bit_depth) {
- int64_t se = 0;
- uint64_t sse = 0;
- const int w = 1 << l2w;
- const int h = 1 << l2h;
-
- xoff <<= 1;
- yoff <<= 1;
-
- for (int y = 0; y < h; y++) {
- for (int x = 0; x < w; x++) {
- // Bilinear interpolation at a 16th pel step.
- if (!use_high_bit_depth_) {
- const int a1 = ref[(w + 1) * (y + 0) + x + 0];
- const int a2 = ref[(w + 1) * (y + 0) + x + 1];
- const int b1 = ref[(w + 1) * (y + 1) + x + 0];
- const int b2 = ref[(w + 1) * (y + 1) + x + 1];
- const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
- const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
- const int r = a + (((b - a) * yoff + 8) >> 4);
- const int diff = r - src[w * y + x];
- se += diff;
- sse += diff * diff;
- } else {
- uint16_t *ref16 = CONVERT_TO_SHORTPTR(ref);
- uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
- const int a1 = ref16[(w + 1) * (y + 0) + x + 0];
- const int a2 = ref16[(w + 1) * (y + 0) + x + 1];
- const int b1 = ref16[(w + 1) * (y + 1) + x + 0];
- const int b2 = ref16[(w + 1) * (y + 1) + x + 1];
- const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
- const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
- const int r = a + (((b - a) * yoff + 8) >> 4);
- const int diff = r - src16[w * y + x];
- se += diff;
- sse += diff * diff;
- }
- }
- }
- RoundHighBitDepth(bit_depth, &se, &sse);
- *sse_ptr = static_cast<uint32_t>(sse);
- return static_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h)));
-}
-
-static uint32_t subpel_avg_variance_ref(const uint8_t *ref, const uint8_t *src,
- const uint8_t *second_pred, int l2w,
- int l2h, int xoff, int yoff,
- uint32_t *sse_ptr,
- bool use_high_bit_depth,
- aom_bit_depth_t bit_depth) {
- int64_t se = 0;
- uint64_t sse = 0;
- const int w = 1 << l2w;
- const int h = 1 << l2h;
-
- xoff <<= 1;
- yoff <<= 1;
-
- for (int y = 0; y < h; y++) {
- for (int x = 0; x < w; x++) {
- // bilinear interpolation at a 16th pel step
- if (!use_high_bit_depth) {
- const int a1 = ref[(w + 1) * (y + 0) + x + 0];
- const int a2 = ref[(w + 1) * (y + 0) + x + 1];
- const int b1 = ref[(w + 1) * (y + 1) + x + 0];
- const int b2 = ref[(w + 1) * (y + 1) + x + 1];
- const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
- const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
- const int r = a + (((b - a) * yoff + 8) >> 4);
- const int diff =
- ((r + second_pred[w * y + x] + 1) >> 1) - src[w * y + x];
- se += diff;
- sse += diff * diff;
- } else {
- const uint16_t *ref16 = CONVERT_TO_SHORTPTR(ref);
- const uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
- const uint16_t *sec16 = CONVERT_TO_SHORTPTR(second_pred);
- const int a1 = ref16[(w + 1) * (y + 0) + x + 0];
- const int a2 = ref16[(w + 1) * (y + 0) + x + 1];
- const int b1 = ref16[(w + 1) * (y + 1) + x + 0];
- const int b2 = ref16[(w + 1) * (y + 1) + x + 1];
- const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
- const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
- const int r = a + (((b - a) * yoff + 8) >> 4);
- const int diff = ((r + sec16[w * y + x] + 1) >> 1) - src16[w * y + x];
- se += diff;
- sse += diff * diff;
- }
- }
- }
- RoundHighBitDepth(bit_depth, &se, &sse);
- *sse_ptr = static_cast<uint32_t>(sse);
- return static_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h)));
-}
-
-static uint32_t jnt_subpel_avg_variance_ref(
- const uint8_t *ref, const uint8_t *src, const uint8_t *second_pred, int l2w,
- int l2h, int xoff, int yoff, uint32_t *sse_ptr, bool use_high_bit_depth,
- aom_bit_depth_t bit_depth, JNT_COMP_PARAMS *jcp_param) {
- int64_t se = 0;
- uint64_t sse = 0;
- const int w = 1 << l2w;
- const int h = 1 << l2h;
-
- xoff <<= 1;
- yoff <<= 1;
-
- for (int y = 0; y < h; y++) {
- for (int x = 0; x < w; x++) {
- // bilinear interpolation at a 16th pel step
- if (!use_high_bit_depth) {
- const int a1 = ref[(w + 0) * (y + 0) + x + 0];
- const int a2 = ref[(w + 0) * (y + 0) + x + 1];
- const int b1 = ref[(w + 0) * (y + 1) + x + 0];
- const int b2 = ref[(w + 0) * (y + 1) + x + 1];
- const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
- const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
- const int r = a + (((b - a) * yoff + 8) >> 4);
- const int avg = ROUND_POWER_OF_TWO(
- r * jcp_param->fwd_offset +
- second_pred[w * y + x] * jcp_param->bck_offset,
- DIST_PRECISION_BITS);
- const int diff = avg - src[w * y + x];
-
- se += diff;
- sse += diff * diff;
- } else {
- const uint16_t *ref16 = CONVERT_TO_SHORTPTR(ref);
- const uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
- const uint16_t *sec16 = CONVERT_TO_SHORTPTR(second_pred);
- const int a1 = ref16[(w + 0) * (y + 0) + x + 0];
- const int a2 = ref16[(w + 0) * (y + 0) + x + 1];
- const int b1 = ref16[(w + 0) * (y + 1) + x + 0];
- const int b2 = ref16[(w + 0) * (y + 1) + x + 1];
- const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
- const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
- const int r = a + (((b - a) * yoff + 8) >> 4);
- const int avg =
- ROUND_POWER_OF_TWO(r * jcp_param->fwd_offset +
- sec16[w * y + x] * jcp_param->bck_offset,
- DIST_PRECISION_BITS);
- const int diff = avg - src16[w * y + x];
-
- se += diff;
- sse += diff * diff;
- }
- }
- }
- RoundHighBitDepth(bit_depth, &se, &sse);
- *sse_ptr = static_cast<uint32_t>(sse);
- return static_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h)));
-}
-
-static uint32_t obmc_subpel_variance_ref(const uint8_t *pre, int l2w, int l2h,
- int xoff, int yoff,
- const int32_t *wsrc,
- const int32_t *mask, uint32_t *sse_ptr,
- bool use_high_bit_depth_,
- aom_bit_depth_t bit_depth) {
- int64_t se = 0;
- uint64_t sse = 0;
- const int w = 1 << l2w;
- const int h = 1 << l2h;
-
- xoff <<= 1;
- yoff <<= 1;
-
- for (int y = 0; y < h; y++) {
- for (int x = 0; x < w; x++) {
- // Bilinear interpolation at a 16th pel step.
- if (!use_high_bit_depth_) {
- const int a1 = pre[(w + 1) * (y + 0) + x + 0];
- const int a2 = pre[(w + 1) * (y + 0) + x + 1];
- const int b1 = pre[(w + 1) * (y + 1) + x + 0];
- const int b2 = pre[(w + 1) * (y + 1) + x + 1];
- const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
- const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
- const int r = a + (((b - a) * yoff + 8) >> 4);
- const int diff = ROUND_POWER_OF_TWO_SIGNED(
- wsrc[w * y + x] - r * mask[w * y + x], 12);
- se += diff;
- sse += diff * diff;
- } else {
- uint16_t *pre16 = CONVERT_TO_SHORTPTR(pre);
- const int a1 = pre16[(w + 1) * (y + 0) + x + 0];
- const int a2 = pre16[(w + 1) * (y + 0) + x + 1];
- const int b1 = pre16[(w + 1) * (y + 1) + x + 0];
- const int b2 = pre16[(w + 1) * (y + 1) + x + 1];
- const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
- const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
- const int r = a + (((b - a) * yoff + 8) >> 4);
- const int diff = ROUND_POWER_OF_TWO_SIGNED(
- wsrc[w * y + x] - r * mask[w * y + x], 12);
- se += diff;
- sse += diff * diff;
- }
- }
- }
- RoundHighBitDepth(bit_depth, &se, &sse);
- *sse_ptr = static_cast<uint32_t>(sse);
- return static_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h)));
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-class SumOfSquaresTest : public ::testing::TestWithParam<SumOfSquaresFunction> {
- public:
- SumOfSquaresTest() : func_(GetParam()) {}
-
- virtual ~SumOfSquaresTest() { libaom_test::ClearSystemState(); }
-
- protected:
- void ConstTest();
- void RefTest();
-
- SumOfSquaresFunction func_;
- ACMRandom rnd_;
-};
-
-void SumOfSquaresTest::ConstTest() {
- int16_t mem[256];
- unsigned int res;
- for (int v = 0; v < 256; ++v) {
- for (int i = 0; i < 256; ++i) {
- mem[i] = v;
- }
- ASM_REGISTER_STATE_CHECK(res = func_(mem));
- EXPECT_EQ(256u * (v * v), res);
- }
-}
-
-void SumOfSquaresTest::RefTest() {
- int16_t mem[256];
- for (int i = 0; i < 100; ++i) {
- for (int j = 0; j < 256; ++j) {
- mem[j] = rnd_.Rand8() - rnd_.Rand8();
- }
-
- const unsigned int expected = mb_ss_ref(mem);
- unsigned int res;
- ASM_REGISTER_STATE_CHECK(res = func_(mem));
- EXPECT_EQ(expected, res);
- }
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// Encapsulating struct to store the function to test along with
-// some testing context.
-// Can be used for MSE, SSE, Variance, etc.
-
-template <typename Func>
-struct TestParams {
- TestParams(int log2w = 0, int log2h = 0, Func function = NULL,
- int bit_depth_value = 0)
- : log2width(log2w), log2height(log2h), func(function) {
- use_high_bit_depth = (bit_depth_value > 0);
- if (use_high_bit_depth) {
- bit_depth = static_cast<aom_bit_depth_t>(bit_depth_value);
- } else {
- bit_depth = AOM_BITS_8;
- }
- width = 1 << log2width;
- height = 1 << log2height;
- block_size = width * height;
- mask = (1u << bit_depth) - 1;
- }
-
- int log2width, log2height;
- int width, height;
- int block_size;
- Func func;
- aom_bit_depth_t bit_depth;
- bool use_high_bit_depth;
- uint32_t mask;
-};
-
-template <typename Func>
-std::ostream &operator<<(std::ostream &os, const TestParams<Func> &p) {
- return os << "width/height:" << p.width << "/" << p.height
- << " function:" << reinterpret_cast<const void *>(p.func)
- << " bit-depth:" << p.bit_depth;
-}
-
-// Main class for testing a function type
-template <typename FunctionType>
-class MainTestClass
- : public ::testing::TestWithParam<TestParams<FunctionType> > {
- public:
- virtual void SetUp() {
- params_ = this->GetParam();
-
- rnd_.Reset(ACMRandom::DeterministicSeed());
- const size_t unit =
- use_high_bit_depth() ? sizeof(uint16_t) : sizeof(uint8_t);
- src_ = reinterpret_cast<uint8_t *>(aom_memalign(16, block_size() * unit));
- ref_ = new uint8_t[block_size() * unit];
- ASSERT_TRUE(src_ != NULL);
- ASSERT_TRUE(ref_ != NULL);
- if (use_high_bit_depth()) {
- // TODO(skal): remove!
- src_ = CONVERT_TO_BYTEPTR(src_);
- ref_ = CONVERT_TO_BYTEPTR(ref_);
- }
- }
-
- virtual void TearDown() {
- if (use_high_bit_depth()) {
- // TODO(skal): remove!
- src_ = reinterpret_cast<uint8_t *>(CONVERT_TO_SHORTPTR(src_));
- ref_ = reinterpret_cast<uint8_t *>(CONVERT_TO_SHORTPTR(ref_));
- }
-
- aom_free(src_);
- delete[] ref_;
- src_ = NULL;
- ref_ = NULL;
- libaom_test::ClearSystemState();
- }
-
- protected:
- // We could sub-class MainTestClass into dedicated class for Variance
- // and MSE/SSE, but it involves a lot of 'this->xxx' dereferencing
- // to access top class fields xxx. That's cumbersome, so for now we'll just
- // implement the testing methods here:
-
- // Variance tests
- void ZeroTest();
- void RefTest();
- void RefStrideTest();
- void OneQuarterTest();
- void SpeedTest();
-
- // MSE/SSE tests
- void RefTestMse();
- void RefTestSse();
- void MaxTestMse();
- void MaxTestSse();
-
- protected:
- ACMRandom rnd_;
- uint8_t *src_;
- uint8_t *ref_;
- TestParams<FunctionType> params_;
-
- // some relay helpers
- bool use_high_bit_depth() const { return params_.use_high_bit_depth; }
- int byte_shift() const { return params_.bit_depth - 8; }
- int block_size() const { return params_.block_size; }
- int width() const { return params_.width; }
- int height() const { return params_.height; }
- uint32_t mask() const { return params_.mask; }
-};
-
-////////////////////////////////////////////////////////////////////////////////
-// Tests related to variance.
-
-template <typename VarianceFunctionType>
-void MainTestClass<VarianceFunctionType>::ZeroTest() {
- for (int i = 0; i <= 255; ++i) {
- if (!use_high_bit_depth()) {
- memset(src_, i, block_size());
- } else {
- uint16_t *const src16 = CONVERT_TO_SHORTPTR(src_);
- for (int k = 0; k < block_size(); ++k) src16[k] = i << byte_shift();
- }
- for (int j = 0; j <= 255; ++j) {
- if (!use_high_bit_depth()) {
- memset(ref_, j, block_size());
- } else {
- uint16_t *const ref16 = CONVERT_TO_SHORTPTR(ref_);
- for (int k = 0; k < block_size(); ++k) ref16[k] = j << byte_shift();
- }
- unsigned int sse, var;
- ASM_REGISTER_STATE_CHECK(
- var = params_.func(src_, width(), ref_, width(), &sse));
- EXPECT_EQ(0u, var) << "src values: " << i << " ref values: " << j;
- }
- }
-}
-
-template <typename VarianceFunctionType>
-void MainTestClass<VarianceFunctionType>::RefTest() {
- for (int i = 0; i < 10; ++i) {
- for (int j = 0; j < block_size(); j++) {
- if (!use_high_bit_depth()) {
- src_[j] = rnd_.Rand8();
- ref_[j] = rnd_.Rand8();
- } else {
- CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
- CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
- }
- }
- unsigned int sse1, sse2, var1, var2;
- const int stride = width();
- ASM_REGISTER_STATE_CHECK(
- var1 = params_.func(src_, stride, ref_, stride, &sse1));
- var2 =
- variance_ref(src_, ref_, params_.log2width, params_.log2height, stride,
- stride, &sse2, use_high_bit_depth(), params_.bit_depth);
- EXPECT_EQ(sse1, sse2) << "Error at test index: " << i;
- EXPECT_EQ(var1, var2) << "Error at test index: " << i;
- }
-}
-
-template <typename VarianceFunctionType>
-void MainTestClass<VarianceFunctionType>::RefStrideTest() {
- for (int i = 0; i < 10; ++i) {
- const int ref_stride = (i & 1) * width();
- const int src_stride = ((i >> 1) & 1) * width();
- for (int j = 0; j < block_size(); j++) {
- const int ref_ind = (j / width()) * ref_stride + j % width();
- const int src_ind = (j / width()) * src_stride + j % width();
- if (!use_high_bit_depth()) {
- src_[src_ind] = rnd_.Rand8();
- ref_[ref_ind] = rnd_.Rand8();
- } else {
- CONVERT_TO_SHORTPTR(src_)[src_ind] = rnd_.Rand16() & mask();
- CONVERT_TO_SHORTPTR(ref_)[ref_ind] = rnd_.Rand16() & mask();
- }
- }
- unsigned int sse1, sse2;
- unsigned int var1, var2;
-
- ASM_REGISTER_STATE_CHECK(
- var1 = params_.func(src_, src_stride, ref_, ref_stride, &sse1));
- var2 = variance_ref(src_, ref_, params_.log2width, params_.log2height,
- src_stride, ref_stride, &sse2, use_high_bit_depth(),
- params_.bit_depth);
- EXPECT_EQ(sse1, sse2) << "Error at test index: " << i;
- EXPECT_EQ(var1, var2) << "Error at test index: " << i;
- }
-}
-
-template <typename VarianceFunctionType>
-void MainTestClass<VarianceFunctionType>::OneQuarterTest() {
- const int half = block_size() / 2;
- if (!use_high_bit_depth()) {
- memset(src_, 255, block_size());
- memset(ref_, 255, half);
- memset(ref_ + half, 0, half);
- } else {
- aom_memset16(CONVERT_TO_SHORTPTR(src_), 255 << byte_shift(), block_size());
- aom_memset16(CONVERT_TO_SHORTPTR(ref_), 255 << byte_shift(), half);
- aom_memset16(CONVERT_TO_SHORTPTR(ref_) + half, 0, half);
- }
- unsigned int sse, var, expected;
- ASM_REGISTER_STATE_CHECK(
- var = params_.func(src_, width(), ref_, width(), &sse));
- expected = block_size() * 255 * 255 / 4;
- EXPECT_EQ(expected, var);
-}
-
-template <typename VarianceFunctionType>
-void MainTestClass<VarianceFunctionType>::SpeedTest() {
- for (int j = 0; j < block_size(); j++) {
- if (!use_high_bit_depth()) {
- src_[j] = rnd_.Rand8();
- ref_[j] = rnd_.Rand8();
- } else {
- CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
- CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
- }
- }
- unsigned int sse;
- const int stride = width();
- int run_time = 1000000000 / block_size();
- aom_usec_timer timer;
- aom_usec_timer_start(&timer);
- for (int i = 0; i < run_time; ++i) {
- params_.func(src_, stride, ref_, stride, &sse);
- }
-
- aom_usec_timer_mark(&timer);
- const double elapsed_time =
- static_cast<double>(aom_usec_timer_elapsed(&timer));
- printf("Variance %dx%d : %7.2fns\n", width(), height(), elapsed_time);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// Tests related to MSE / SSE.
-
-template <typename FunctionType>
-void MainTestClass<FunctionType>::RefTestMse() {
- for (int i = 0; i < 10; ++i) {
- for (int j = 0; j < block_size(); ++j) {
- src_[j] = rnd_.Rand8();
- ref_[j] = rnd_.Rand8();
- }
- unsigned int sse1, sse2;
- const int stride = width();
- ASM_REGISTER_STATE_CHECK(params_.func(src_, stride, ref_, stride, &sse1));
- variance_ref(src_, ref_, params_.log2width, params_.log2height, stride,
- stride, &sse2, false, AOM_BITS_8);
- EXPECT_EQ(sse1, sse2);
- }
-}
-
-template <typename FunctionType>
-void MainTestClass<FunctionType>::RefTestSse() {
- for (int i = 0; i < 10; ++i) {
- for (int j = 0; j < block_size(); ++j) {
- src_[j] = rnd_.Rand8();
- ref_[j] = rnd_.Rand8();
- }
- unsigned int sse2;
- unsigned int var1;
- const int stride = width();
- ASM_REGISTER_STATE_CHECK(var1 = params_.func(src_, stride, ref_, stride));
- variance_ref(src_, ref_, params_.log2width, params_.log2height, stride,
- stride, &sse2, false, AOM_BITS_8);
- EXPECT_EQ(var1, sse2);
- }
-}
-
-template <typename FunctionType>
-void MainTestClass<FunctionType>::MaxTestMse() {
- memset(src_, 255, block_size());
- memset(ref_, 0, block_size());
- unsigned int sse;
- ASM_REGISTER_STATE_CHECK(params_.func(src_, width(), ref_, width(), &sse));
- const unsigned int expected = block_size() * 255 * 255;
- EXPECT_EQ(expected, sse);
-}
-
-template <typename FunctionType>
-void MainTestClass<FunctionType>::MaxTestSse() {
- memset(src_, 255, block_size());
- memset(ref_, 0, block_size());
- unsigned int var;
- ASM_REGISTER_STATE_CHECK(var = params_.func(src_, width(), ref_, width()));
- const unsigned int expected = block_size() * 255 * 255;
- EXPECT_EQ(expected, var);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-using ::testing::get;
-using ::testing::make_tuple;
-using ::testing::tuple;
-
-template <typename FunctionType>
-class SubpelVarianceTest
- : public ::testing::TestWithParam<TestParams<FunctionType> > {
- public:
- virtual void SetUp() {
- params_ = this->GetParam();
-
- rnd_.Reset(ACMRandom::DeterministicSeed());
- if (!use_high_bit_depth()) {
- src_ = reinterpret_cast<uint8_t *>(aom_memalign(32, block_size()));
- sec_ = reinterpret_cast<uint8_t *>(aom_memalign(32, block_size()));
- ref_ = reinterpret_cast<uint8_t *>(
- aom_memalign(32, block_size() + width() + height() + 1));
- } else {
- src_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>(
- aom_memalign(32, block_size() * sizeof(uint16_t))));
- sec_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>(
- aom_memalign(32, block_size() * sizeof(uint16_t))));
- ref_ = CONVERT_TO_BYTEPTR(aom_memalign(
- 32, (block_size() + width() + height() + 1) * sizeof(uint16_t)));
- }
- ASSERT_TRUE(src_ != NULL);
- ASSERT_TRUE(sec_ != NULL);
- ASSERT_TRUE(ref_ != NULL);
- }
-
- virtual void TearDown() {
- if (!use_high_bit_depth()) {
- aom_free(src_);
- aom_free(ref_);
- aom_free(sec_);
- } else {
- aom_free(CONVERT_TO_SHORTPTR(src_));
- aom_free(CONVERT_TO_SHORTPTR(ref_));
- aom_free(CONVERT_TO_SHORTPTR(sec_));
- }
- libaom_test::ClearSystemState();
- }
-
- protected:
- void RefTest();
- void ExtremeRefTest();
-
- ACMRandom rnd_;
- uint8_t *src_;
- uint8_t *ref_;
- uint8_t *sec_;
- TestParams<FunctionType> params_;
- JNT_COMP_PARAMS jcp_param_;
-
- // some relay helpers
- bool use_high_bit_depth() const { return params_.use_high_bit_depth; }
- int byte_shift() const { return params_.bit_depth - 8; }
- int block_size() const { return params_.block_size; }
- int width() const { return params_.width; }
- int height() const { return params_.height; }
- uint32_t mask() const { return params_.mask; }
-};
-
-template <typename SubpelVarianceFunctionType>
-void SubpelVarianceTest<SubpelVarianceFunctionType>::RefTest() {
- for (int x = 0; x < 8; ++x) {
- for (int y = 0; y < 8; ++y) {
- if (!use_high_bit_depth()) {
- for (int j = 0; j < block_size(); j++) {
- src_[j] = rnd_.Rand8();
- }
- for (int j = 0; j < block_size() + width() + height() + 1; j++) {
- ref_[j] = rnd_.Rand8();
- }
- } else {
- for (int j = 0; j < block_size(); j++) {
- CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
- }
- for (int j = 0; j < block_size() + width() + height() + 1; j++) {
- CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
- }
- }
- unsigned int sse1, sse2;
- unsigned int var1;
- ASM_REGISTER_STATE_CHECK(
- var1 = params_.func(ref_, width() + 1, x, y, src_, width(), &sse1));
- const unsigned int var2 = subpel_variance_ref(
- ref_, src_, params_.log2width, params_.log2height, x, y, &sse2,
- use_high_bit_depth(), params_.bit_depth);
- EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y;
- EXPECT_EQ(var1, var2) << "at position " << x << ", " << y;
- }
- }
-}
-
-template <typename SubpelVarianceFunctionType>
-void SubpelVarianceTest<SubpelVarianceFunctionType>::ExtremeRefTest() {
- // Compare against reference.
- // Src: Set the first half of values to 0, the second half to the maximum.
- // Ref: Set the first half of values to the maximum, the second half to 0.
- for (int x = 0; x < 8; ++x) {
- for (int y = 0; y < 8; ++y) {
- const int half = block_size() / 2;
- if (!use_high_bit_depth()) {
- memset(src_, 0, half);
- memset(src_ + half, 255, half);
- memset(ref_, 255, half);
- memset(ref_ + half, 0, half + width() + height() + 1);
- } else {
- aom_memset16(CONVERT_TO_SHORTPTR(src_), mask(), half);
- aom_memset16(CONVERT_TO_SHORTPTR(src_) + half, 0, half);
- aom_memset16(CONVERT_TO_SHORTPTR(ref_), 0, half);
- aom_memset16(CONVERT_TO_SHORTPTR(ref_) + half, mask(),
- half + width() + height() + 1);
- }
- unsigned int sse1, sse2;
- unsigned int var1;
- ASM_REGISTER_STATE_CHECK(
- var1 = params_.func(ref_, width() + 1, x, y, src_, width(), &sse1));
- const unsigned int var2 = subpel_variance_ref(
- ref_, src_, params_.log2width, params_.log2height, x, y, &sse2,
- use_high_bit_depth(), params_.bit_depth);
- EXPECT_EQ(sse1, sse2) << "for xoffset " << x << " and yoffset " << y;
- EXPECT_EQ(var1, var2) << "for xoffset " << x << " and yoffset " << y;
- }
- }
-}
-
-template <>
-void SubpelVarianceTest<SubpixAvgVarMxNFunc>::RefTest() {
- for (int x = 0; x < 8; ++x) {
- for (int y = 0; y < 8; ++y) {
- if (!use_high_bit_depth()) {
- for (int j = 0; j < block_size(); j++) {
- src_[j] = rnd_.Rand8();
- sec_[j] = rnd_.Rand8();
- }
- for (int j = 0; j < block_size() + width() + height() + 1; j++) {
- ref_[j] = rnd_.Rand8();
- }
- } else {
- for (int j = 0; j < block_size(); j++) {
- CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
- CONVERT_TO_SHORTPTR(sec_)[j] = rnd_.Rand16() & mask();
- }
- for (int j = 0; j < block_size() + width() + height() + 1; j++) {
- CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
- }
- }
- uint32_t sse1, sse2;
- uint32_t var1, var2;
- ASM_REGISTER_STATE_CHECK(var1 = params_.func(ref_, width() + 1, x, y,
- src_, width(), &sse1, sec_));
- var2 = subpel_avg_variance_ref(ref_, src_, sec_, params_.log2width,
- params_.log2height, x, y, &sse2,
- use_high_bit_depth(), params_.bit_depth);
- EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y;
- EXPECT_EQ(var1, var2) << "at position " << x << ", " << y;
- }
- }
-}
-
-template <>
-void SubpelVarianceTest<JntSubpixAvgVarMxNFunc>::RefTest() {
- for (int x = 0; x < 8; ++x) {
- for (int y = 0; y < 8; ++y) {
- if (!use_high_bit_depth()) {
- for (int j = 0; j < block_size(); j++) {
- src_[j] = rnd_.Rand8();
- sec_[j] = rnd_.Rand8();
- }
- for (int j = 0; j < block_size() + width() + height() + 1; j++) {
- ref_[j] = rnd_.Rand8();
- }
- } else {
- for (int j = 0; j < block_size(); j++) {
- CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
- CONVERT_TO_SHORTPTR(sec_)[j] = rnd_.Rand16() & mask();
- }
- for (int j = 0; j < block_size() + width() + height() + 1; j++) {
- CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
- }
- }
- for (int x0 = 0; x0 < 2; ++x0) {
- for (int y0 = 0; y0 < 4; ++y0) {
- uint32_t sse1, sse2;
- uint32_t var1, var2;
- jcp_param_.fwd_offset = quant_dist_lookup_table[x0][y0][0];
- jcp_param_.bck_offset = quant_dist_lookup_table[x0][y0][1];
- ASM_REGISTER_STATE_CHECK(var1 = params_.func(ref_, width() + 0, x, y,
- src_, width(), &sse1,
- sec_, &jcp_param_));
- var2 = jnt_subpel_avg_variance_ref(
- ref_, src_, sec_, params_.log2width, params_.log2height, x, y,
- &sse2, use_high_bit_depth(), params_.bit_depth, &jcp_param_);
- EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y;
- EXPECT_EQ(var1, var2) << "at position " << x << ", " << y;
- }
- }
- }
- }
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-static const int kMaskMax = 64;
-
-typedef TestParams<ObmcSubpelVarFunc> ObmcSubpelVarianceParams;
-
-template <typename FunctionType>
-class ObmcVarianceTest
- : public ::testing::TestWithParam<TestParams<FunctionType> > {
- public:
- virtual void SetUp() {
- params_ = this->GetParam();
-
- rnd_.Reset(ACMRandom::DeterministicSeed());
- if (!use_high_bit_depth()) {
- pre_ = reinterpret_cast<uint8_t *>(
- aom_memalign(32, block_size() + width() + height() + 1));
- } else {
- pre_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>(aom_memalign(
- 32, block_size() + width() + height() + 1 * sizeof(uint16_t))));
- }
- wsrc_ = reinterpret_cast<int32_t *>(
- aom_memalign(32, block_size() * sizeof(uint32_t)));
- mask_ = reinterpret_cast<int32_t *>(
- aom_memalign(32, block_size() * sizeof(uint32_t)));
- ASSERT_TRUE(pre_ != NULL);
- ASSERT_TRUE(wsrc_ != NULL);
- ASSERT_TRUE(mask_ != NULL);
- }
-
- virtual void TearDown() {
- if (!use_high_bit_depth()) {
- aom_free(pre_);
- } else {
- aom_free(CONVERT_TO_SHORTPTR(pre_));
- }
- aom_free(wsrc_);
- aom_free(mask_);
- libaom_test::ClearSystemState();
- }
-
- protected:
- void RefTest();
- void ExtremeRefTest();
- void SpeedTest();
-
- ACMRandom rnd_;
- uint8_t *pre_;
- int32_t *wsrc_;
- int32_t *mask_;
- TestParams<FunctionType> params_;
-
- // some relay helpers
- bool use_high_bit_depth() const { return params_.use_high_bit_depth; }
- int byte_shift() const { return params_.bit_depth - 8; }
- int block_size() const { return params_.block_size; }
- int width() const { return params_.width; }
- int height() const { return params_.height; }
- uint32_t bd_mask() const { return params_.mask; }
-};
-
-template <>
-void ObmcVarianceTest<ObmcSubpelVarFunc>::RefTest() {
- for (int x = 0; x < 8; ++x) {
- for (int y = 0; y < 8; ++y) {
- if (!use_high_bit_depth())
- for (int j = 0; j < block_size() + width() + height() + 1; j++)
- pre_[j] = rnd_.Rand8();
- else
- for (int j = 0; j < block_size() + width() + height() + 1; j++)
- CONVERT_TO_SHORTPTR(pre_)[j] = rnd_.Rand16() & bd_mask();
- for (int j = 0; j < block_size(); j++) {
- wsrc_[j] = (rnd_.Rand16() & bd_mask()) * rnd_(kMaskMax * kMaskMax + 1);
- mask_[j] = rnd_(kMaskMax * kMaskMax + 1);
- }
-
- uint32_t sse1, sse2;
- uint32_t var1, var2;
- ASM_REGISTER_STATE_CHECK(
- var1 = params_.func(pre_, width() + 1, x, y, wsrc_, mask_, &sse1));
- var2 = obmc_subpel_variance_ref(
- pre_, params_.log2width, params_.log2height, x, y, wsrc_, mask_,
- &sse2, use_high_bit_depth(), params_.bit_depth);
- EXPECT_EQ(sse1, sse2) << "for xoffset " << x << " and yoffset " << y;
- EXPECT_EQ(var1, var2) << "for xoffset " << x << " and yoffset " << y;
- }
- }
-}
-
-template <>
-void ObmcVarianceTest<ObmcSubpelVarFunc>::ExtremeRefTest() {
- // Pre: Set the first half of values to the maximum, the second half to 0.
- // Mask: same as above
- // WSrc: Set the first half of values to 0, the second half to the maximum.
- for (int x = 0; x < 8; ++x) {
- for (int y = 0; y < 8; ++y) {
- const int half = block_size() / 2;
- if (!use_high_bit_depth()) {
- memset(pre_, 255, half);
- memset(pre_ + half, 0, half + width() + height() + 1);
- } else {
- aom_memset16(CONVERT_TO_SHORTPTR(pre_), bd_mask(), half);
- aom_memset16(CONVERT_TO_SHORTPTR(pre_) + half, 0, half);
- }
- for (int j = 0; j < half; j++) {
- wsrc_[j] = bd_mask() * kMaskMax * kMaskMax;
- mask_[j] = 0;
- }
- for (int j = half; j < block_size(); j++) {
- wsrc_[j] = 0;
- mask_[j] = kMaskMax * kMaskMax;
- }
-
- uint32_t sse1, sse2;
- uint32_t var1, var2;
- ASM_REGISTER_STATE_CHECK(
- var1 = params_.func(pre_, width() + 1, x, y, wsrc_, mask_, &sse1));
- var2 = obmc_subpel_variance_ref(
- pre_, params_.log2width, params_.log2height, x, y, wsrc_, mask_,
- &sse2, use_high_bit_depth(), params_.bit_depth);
- EXPECT_EQ(sse1, sse2) << "for xoffset " << x << " and yoffset " << y;
- EXPECT_EQ(var1, var2) << "for xoffset " << x << " and yoffset " << y;
- }
- }
-}
-
-template <>
-void ObmcVarianceTest<ObmcSubpelVarFunc>::SpeedTest() {
- if (!use_high_bit_depth())
- for (int j = 0; j < block_size() + width() + height() + 1; j++)
- pre_[j] = rnd_.Rand8();
- else
- for (int j = 0; j < block_size() + width() + height() + 1; j++)
- CONVERT_TO_SHORTPTR(pre_)[j] = rnd_.Rand16() & bd_mask();
- for (int j = 0; j < block_size(); j++) {
- wsrc_[j] = (rnd_.Rand16() & bd_mask()) * rnd_(kMaskMax * kMaskMax + 1);
- mask_[j] = rnd_(kMaskMax * kMaskMax + 1);
- }
- unsigned int sse1;
- const int stride = width() + 1;
- int run_time = 1000000000 / block_size();
- aom_usec_timer timer;
-
- aom_usec_timer_start(&timer);
- for (int i = 0; i < run_time; ++i) {
- int x = rnd_(8);
- int y = rnd_(8);
- ASM_REGISTER_STATE_CHECK(
- params_.func(pre_, stride, x, y, wsrc_, mask_, &sse1));
- }
- aom_usec_timer_mark(&timer);
-
- const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
- printf("obmc_sub_pixel_variance_%dx%d_%d: %d us\n", width(), height(),
- params_.bit_depth, elapsed_time);
-}
-
-typedef MainTestClass<Get4x4SseFunc> AvxSseTest;
-typedef MainTestClass<VarianceMxNFunc> AvxMseTest;
-typedef MainTestClass<VarianceMxNFunc> AvxVarianceTest;
-typedef SubpelVarianceTest<SubpixVarMxNFunc> AvxSubpelVarianceTest;
-typedef SubpelVarianceTest<SubpixAvgVarMxNFunc> AvxSubpelAvgVarianceTest;
-typedef SubpelVarianceTest<JntSubpixAvgVarMxNFunc> AvxJntSubpelAvgVarianceTest;
-typedef ObmcVarianceTest<ObmcSubpelVarFunc> AvxObmcSubpelVarianceTest;
-
-TEST_P(AvxSseTest, RefSse) { RefTestSse(); }
-TEST_P(AvxSseTest, MaxSse) { MaxTestSse(); }
-TEST_P(AvxMseTest, RefMse) { RefTestMse(); }
-TEST_P(AvxMseTest, MaxMse) { MaxTestMse(); }
-TEST_P(AvxVarianceTest, Zero) { ZeroTest(); }
-TEST_P(AvxVarianceTest, Ref) { RefTest(); }
-TEST_P(AvxVarianceTest, RefStride) { RefStrideTest(); }
-TEST_P(AvxVarianceTest, OneQuarter) { OneQuarterTest(); }
-TEST_P(AvxVarianceTest, DISABLED_Speed) { SpeedTest(); }
-TEST_P(SumOfSquaresTest, Const) { ConstTest(); }
-TEST_P(SumOfSquaresTest, Ref) { RefTest(); }
-TEST_P(AvxSubpelVarianceTest, Ref) { RefTest(); }
-TEST_P(AvxSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); }
-TEST_P(AvxSubpelAvgVarianceTest, Ref) { RefTest(); }
-TEST_P(AvxJntSubpelAvgVarianceTest, Ref) { RefTest(); }
-TEST_P(AvxObmcSubpelVarianceTest, Ref) { RefTest(); }
-TEST_P(AvxObmcSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); }
-TEST_P(AvxObmcSubpelVarianceTest, DISABLED_Speed) { SpeedTest(); }
-
-INSTANTIATE_TEST_CASE_P(C, SumOfSquaresTest,
- ::testing::Values(aom_get_mb_ss_c));
-
-typedef TestParams<Get4x4SseFunc> SseParams;
-INSTANTIATE_TEST_CASE_P(C, AvxSseTest,
- ::testing::Values(SseParams(2, 2,
- &aom_get4x4sse_cs_c)));
-
-typedef TestParams<VarianceMxNFunc> MseParams;
-INSTANTIATE_TEST_CASE_P(C, AvxMseTest,
- ::testing::Values(MseParams(4, 4, &aom_mse16x16_c),
- MseParams(4, 3, &aom_mse16x8_c),
- MseParams(3, 4, &aom_mse8x16_c),
- MseParams(3, 3, &aom_mse8x8_c)));
-
-typedef TestParams<VarianceMxNFunc> VarianceParams;
-INSTANTIATE_TEST_CASE_P(
- C, AvxVarianceTest,
- ::testing::Values(VarianceParams(7, 7, &aom_variance128x128_c),
- VarianceParams(7, 6, &aom_variance128x64_c),
- VarianceParams(6, 7, &aom_variance64x128_c),
- VarianceParams(6, 6, &aom_variance64x64_c),
- VarianceParams(6, 5, &aom_variance64x32_c),
- VarianceParams(5, 6, &aom_variance32x64_c),
- VarianceParams(5, 5, &aom_variance32x32_c),
- VarianceParams(5, 4, &aom_variance32x16_c),
- VarianceParams(4, 5, &aom_variance16x32_c),
- VarianceParams(4, 4, &aom_variance16x16_c),
- VarianceParams(4, 3, &aom_variance16x8_c),
- VarianceParams(3, 4, &aom_variance8x16_c),
- VarianceParams(3, 3, &aom_variance8x8_c),
- VarianceParams(3, 2, &aom_variance8x4_c),
- VarianceParams(2, 3, &aom_variance4x8_c),
- VarianceParams(2, 2, &aom_variance4x4_c)));
-
-typedef TestParams<SubpixVarMxNFunc> SubpelVarianceParams;
-INSTANTIATE_TEST_CASE_P(
- C, AvxSubpelVarianceTest,
- ::testing::Values(
- SubpelVarianceParams(7, 7, &aom_sub_pixel_variance128x128_c, 0),
- SubpelVarianceParams(7, 6, &aom_sub_pixel_variance128x64_c, 0),
- SubpelVarianceParams(6, 7, &aom_sub_pixel_variance64x128_c, 0),
- SubpelVarianceParams(6, 6, &aom_sub_pixel_variance64x64_c, 0),
- SubpelVarianceParams(6, 5, &aom_sub_pixel_variance64x32_c, 0),
- SubpelVarianceParams(5, 6, &aom_sub_pixel_variance32x64_c, 0),
- SubpelVarianceParams(5, 5, &aom_sub_pixel_variance32x32_c, 0),
- SubpelVarianceParams(5, 4, &aom_sub_pixel_variance32x16_c, 0),
- SubpelVarianceParams(4, 5, &aom_sub_pixel_variance16x32_c, 0),
- SubpelVarianceParams(4, 4, &aom_sub_pixel_variance16x16_c, 0),
- SubpelVarianceParams(4, 3, &aom_sub_pixel_variance16x8_c, 0),
- SubpelVarianceParams(3, 4, &aom_sub_pixel_variance8x16_c, 0),
- SubpelVarianceParams(3, 3, &aom_sub_pixel_variance8x8_c, 0),
- SubpelVarianceParams(3, 2, &aom_sub_pixel_variance8x4_c, 0),
- SubpelVarianceParams(2, 3, &aom_sub_pixel_variance4x8_c, 0),
- SubpelVarianceParams(2, 2, &aom_sub_pixel_variance4x4_c, 0)));
-
-typedef TestParams<SubpixAvgVarMxNFunc> SubpelAvgVarianceParams;
-INSTANTIATE_TEST_CASE_P(
- C, AvxSubpelAvgVarianceTest,
- ::testing::Values(
- SubpelAvgVarianceParams(7, 7, &aom_sub_pixel_avg_variance128x128_c, 0),
- SubpelAvgVarianceParams(7, 6, &aom_sub_pixel_avg_variance128x64_c, 0),
- SubpelAvgVarianceParams(6, 7, &aom_sub_pixel_avg_variance64x128_c, 0),
- SubpelAvgVarianceParams(6, 6, &aom_sub_pixel_avg_variance64x64_c, 0),
- SubpelAvgVarianceParams(6, 5, &aom_sub_pixel_avg_variance64x32_c, 0),
- SubpelAvgVarianceParams(5, 6, &aom_sub_pixel_avg_variance32x64_c, 0),
- SubpelAvgVarianceParams(5, 5, &aom_sub_pixel_avg_variance32x32_c, 0),
- SubpelAvgVarianceParams(5, 4, &aom_sub_pixel_avg_variance32x16_c, 0),
- SubpelAvgVarianceParams(4, 5, &aom_sub_pixel_avg_variance16x32_c, 0),
- SubpelAvgVarianceParams(4, 4, &aom_sub_pixel_avg_variance16x16_c, 0),
- SubpelAvgVarianceParams(4, 3, &aom_sub_pixel_avg_variance16x8_c, 0),
- SubpelAvgVarianceParams(3, 4, &aom_sub_pixel_avg_variance8x16_c, 0),
- SubpelAvgVarianceParams(3, 3, &aom_sub_pixel_avg_variance8x8_c, 0),
- SubpelAvgVarianceParams(3, 2, &aom_sub_pixel_avg_variance8x4_c, 0),
- SubpelAvgVarianceParams(2, 3, &aom_sub_pixel_avg_variance4x8_c, 0),
- SubpelAvgVarianceParams(2, 2, &aom_sub_pixel_avg_variance4x4_c, 0)));
-
-typedef TestParams<JntSubpixAvgVarMxNFunc> JntSubpelAvgVarianceParams;
-INSTANTIATE_TEST_CASE_P(
- C, AvxJntSubpelAvgVarianceTest,
- ::testing::Values(
- JntSubpelAvgVarianceParams(6, 6, &aom_jnt_sub_pixel_avg_variance64x64_c,
- 0),
- JntSubpelAvgVarianceParams(6, 5, &aom_jnt_sub_pixel_avg_variance64x32_c,
- 0),
- JntSubpelAvgVarianceParams(5, 6, &aom_jnt_sub_pixel_avg_variance32x64_c,
- 0),
- JntSubpelAvgVarianceParams(5, 5, &aom_jnt_sub_pixel_avg_variance32x32_c,
- 0),
- JntSubpelAvgVarianceParams(5, 4, &aom_jnt_sub_pixel_avg_variance32x16_c,
- 0),
- JntSubpelAvgVarianceParams(4, 5, &aom_jnt_sub_pixel_avg_variance16x32_c,
- 0),
- JntSubpelAvgVarianceParams(4, 4, &aom_jnt_sub_pixel_avg_variance16x16_c,
- 0),
- JntSubpelAvgVarianceParams(4, 3, &aom_jnt_sub_pixel_avg_variance16x8_c,
- 0),
- JntSubpelAvgVarianceParams(3, 4, &aom_jnt_sub_pixel_avg_variance8x16_c,
- 0),
- JntSubpelAvgVarianceParams(3, 3, &aom_jnt_sub_pixel_avg_variance8x8_c,
- 0),
- JntSubpelAvgVarianceParams(3, 2, &aom_jnt_sub_pixel_avg_variance8x4_c,
- 0),
- JntSubpelAvgVarianceParams(2, 3, &aom_jnt_sub_pixel_avg_variance4x8_c,
- 0),
- JntSubpelAvgVarianceParams(2, 2, &aom_jnt_sub_pixel_avg_variance4x4_c,
- 0)));
-
-INSTANTIATE_TEST_CASE_P(
- C, AvxObmcSubpelVarianceTest,
- ::testing::Values(
- ObmcSubpelVarianceParams(7, 7, &aom_obmc_sub_pixel_variance128x128_c,
- 0),
- ObmcSubpelVarianceParams(7, 6, &aom_obmc_sub_pixel_variance128x64_c, 0),
- ObmcSubpelVarianceParams(6, 7, &aom_obmc_sub_pixel_variance64x128_c, 0),
- ObmcSubpelVarianceParams(6, 6, &aom_obmc_sub_pixel_variance64x64_c, 0),
- ObmcSubpelVarianceParams(6, 5, &aom_obmc_sub_pixel_variance64x32_c, 0),
- ObmcSubpelVarianceParams(5, 6, &aom_obmc_sub_pixel_variance32x64_c, 0),
- ObmcSubpelVarianceParams(5, 5, &aom_obmc_sub_pixel_variance32x32_c, 0),
- ObmcSubpelVarianceParams(5, 4, &aom_obmc_sub_pixel_variance32x16_c, 0),
- ObmcSubpelVarianceParams(4, 5, &aom_obmc_sub_pixel_variance16x32_c, 0),
- ObmcSubpelVarianceParams(4, 4, &aom_obmc_sub_pixel_variance16x16_c, 0),
- ObmcSubpelVarianceParams(4, 3, &aom_obmc_sub_pixel_variance16x8_c, 0),
- ObmcSubpelVarianceParams(3, 4, &aom_obmc_sub_pixel_variance8x16_c, 0),
- ObmcSubpelVarianceParams(3, 3, &aom_obmc_sub_pixel_variance8x8_c, 0),
- ObmcSubpelVarianceParams(3, 2, &aom_obmc_sub_pixel_variance8x4_c, 0),
- ObmcSubpelVarianceParams(2, 3, &aom_obmc_sub_pixel_variance4x8_c, 0),
- ObmcSubpelVarianceParams(2, 2, &aom_obmc_sub_pixel_variance4x4_c, 0)));
-
-typedef MainTestClass<VarianceMxNFunc> AvxHBDMseTest;
-typedef MainTestClass<VarianceMxNFunc> AvxHBDVarianceTest;
-typedef SubpelVarianceTest<SubpixVarMxNFunc> AvxHBDSubpelVarianceTest;
-typedef SubpelVarianceTest<SubpixAvgVarMxNFunc> AvxHBDSubpelAvgVarianceTest;
-typedef ObmcVarianceTest<ObmcSubpelVarFunc> AvxHBDObmcSubpelVarianceTest;
-
-TEST_P(AvxHBDMseTest, RefMse) { RefTestMse(); }
-TEST_P(AvxHBDMseTest, MaxMse) { MaxTestMse(); }
-TEST_P(AvxHBDVarianceTest, Zero) { ZeroTest(); }
-TEST_P(AvxHBDVarianceTest, Ref) { RefTest(); }
-TEST_P(AvxHBDVarianceTest, RefStride) { RefStrideTest(); }
-TEST_P(AvxHBDVarianceTest, OneQuarter) { OneQuarterTest(); }
-TEST_P(AvxHBDVarianceTest, DISABLED_Speed) { SpeedTest(); }
-TEST_P(AvxHBDSubpelVarianceTest, Ref) { RefTest(); }
-TEST_P(AvxHBDSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); }
-TEST_P(AvxHBDSubpelAvgVarianceTest, Ref) { RefTest(); }
-
-/* TODO(debargha): This test does not support the highbd version
-INSTANTIATE_TEST_CASE_P(
- C, AvxHBDMseTest,
- ::testing::Values(make_tuple(4, 4, &aom_highbd_12_mse16x16_c),
- make_tuple(4, 4, &aom_highbd_12_mse16x8_c),
- make_tuple(4, 4, &aom_highbd_12_mse8x16_c),
- make_tuple(4, 4, &aom_highbd_12_mse8x8_c),
- make_tuple(4, 4, &aom_highbd_10_mse16x16_c),
- make_tuple(4, 4, &aom_highbd_10_mse16x8_c),
- make_tuple(4, 4, &aom_highbd_10_mse8x16_c),
- make_tuple(4, 4, &aom_highbd_10_mse8x8_c),
- make_tuple(4, 4, &aom_highbd_8_mse16x16_c),
- make_tuple(4, 4, &aom_highbd_8_mse16x8_c),
- make_tuple(4, 4, &aom_highbd_8_mse8x16_c),
- make_tuple(4, 4, &aom_highbd_8_mse8x8_c)));
-*/
-
-const VarianceParams kArrayHBDVariance_c[] = {
- VarianceParams(7, 7, &aom_highbd_12_variance128x128_c, 12),
- VarianceParams(7, 6, &aom_highbd_12_variance128x64_c, 12),
- VarianceParams(6, 7, &aom_highbd_12_variance64x128_c, 12),
- VarianceParams(6, 6, &aom_highbd_12_variance64x64_c, 12),
- VarianceParams(6, 5, &aom_highbd_12_variance64x32_c, 12),
- VarianceParams(5, 6, &aom_highbd_12_variance32x64_c, 12),
- VarianceParams(5, 5, &aom_highbd_12_variance32x32_c, 12),
- VarianceParams(5, 4, &aom_highbd_12_variance32x16_c, 12),
- VarianceParams(4, 5, &aom_highbd_12_variance16x32_c, 12),
- VarianceParams(4, 4, &aom_highbd_12_variance16x16_c, 12),
- VarianceParams(4, 3, &aom_highbd_12_variance16x8_c, 12),
- VarianceParams(3, 4, &aom_highbd_12_variance8x16_c, 12),
- VarianceParams(3, 3, &aom_highbd_12_variance8x8_c, 12),
- VarianceParams(3, 2, &aom_highbd_12_variance8x4_c, 12),
- VarianceParams(2, 3, &aom_highbd_12_variance4x8_c, 12),
- VarianceParams(2, 2, &aom_highbd_12_variance4x4_c, 12),
- VarianceParams(7, 7, &aom_highbd_10_variance128x128_c, 10),
- VarianceParams(7, 6, &aom_highbd_10_variance128x64_c, 10),
- VarianceParams(6, 7, &aom_highbd_10_variance64x128_c, 10),
- VarianceParams(6, 6, &aom_highbd_10_variance64x64_c, 10),
- VarianceParams(6, 5, &aom_highbd_10_variance64x32_c, 10),
- VarianceParams(5, 6, &aom_highbd_10_variance32x64_c, 10),
- VarianceParams(5, 5, &aom_highbd_10_variance32x32_c, 10),
- VarianceParams(5, 4, &aom_highbd_10_variance32x16_c, 10),
- VarianceParams(4, 5, &aom_highbd_10_variance16x32_c, 10),
- VarianceParams(4, 4, &aom_highbd_10_variance16x16_c, 10),
- VarianceParams(4, 3, &aom_highbd_10_variance16x8_c, 10),
- VarianceParams(3, 4, &aom_highbd_10_variance8x16_c, 10),
- VarianceParams(3, 3, &aom_highbd_10_variance8x8_c, 10),
- VarianceParams(3, 2, &aom_highbd_10_variance8x4_c, 10),
- VarianceParams(2, 3, &aom_highbd_10_variance4x8_c, 10),
- VarianceParams(2, 2, &aom_highbd_10_variance4x4_c, 10),
- VarianceParams(7, 7, &aom_highbd_8_variance128x128_c, 8),
- VarianceParams(7, 6, &aom_highbd_8_variance128x64_c, 8),
- VarianceParams(6, 7, &aom_highbd_8_variance64x128_c, 8),
- VarianceParams(6, 6, &aom_highbd_8_variance64x64_c, 8),
- VarianceParams(6, 5, &aom_highbd_8_variance64x32_c, 8),
- VarianceParams(5, 6, &aom_highbd_8_variance32x64_c, 8),
- VarianceParams(5, 5, &aom_highbd_8_variance32x32_c, 8),
- VarianceParams(5, 4, &aom_highbd_8_variance32x16_c, 8),
- VarianceParams(4, 5, &aom_highbd_8_variance16x32_c, 8),
- VarianceParams(4, 4, &aom_highbd_8_variance16x16_c, 8),
- VarianceParams(4, 3, &aom_highbd_8_variance16x8_c, 8),
- VarianceParams(3, 4, &aom_highbd_8_variance8x16_c, 8),
- VarianceParams(3, 3, &aom_highbd_8_variance8x8_c, 8),
- VarianceParams(3, 2, &aom_highbd_8_variance8x4_c, 8),
- VarianceParams(2, 3, &aom_highbd_8_variance4x8_c, 8),
- VarianceParams(2, 2, &aom_highbd_8_variance4x4_c, 8)
-};
-INSTANTIATE_TEST_CASE_P(C, AvxHBDVarianceTest,
- ::testing::ValuesIn(kArrayHBDVariance_c));
-
-#if HAVE_SSE4_1
-INSTANTIATE_TEST_CASE_P(
- SSE4_1, AvxHBDVarianceTest,
- ::testing::Values(
- VarianceParams(2, 2, &aom_highbd_8_variance4x4_sse4_1, 8),
- VarianceParams(2, 2, &aom_highbd_10_variance4x4_sse4_1, 10),
- VarianceParams(2, 2, &aom_highbd_12_variance4x4_sse4_1, 12)));
-#endif // HAVE_SSE4_1
-
-const SubpelVarianceParams kArrayHBDSubpelVariance_c[] = {
- SubpelVarianceParams(7, 7, &aom_highbd_8_sub_pixel_variance128x128_c, 8),
- SubpelVarianceParams(7, 6, &aom_highbd_8_sub_pixel_variance128x64_c, 8),
- SubpelVarianceParams(6, 7, &aom_highbd_8_sub_pixel_variance64x128_c, 8),
- SubpelVarianceParams(6, 6, &aom_highbd_8_sub_pixel_variance64x64_c, 8),
- SubpelVarianceParams(6, 5, &aom_highbd_8_sub_pixel_variance64x32_c, 8),
- SubpelVarianceParams(5, 6, &aom_highbd_8_sub_pixel_variance32x64_c, 8),
- SubpelVarianceParams(5, 5, &aom_highbd_8_sub_pixel_variance32x32_c, 8),
- SubpelVarianceParams(5, 4, &aom_highbd_8_sub_pixel_variance32x16_c, 8),
- SubpelVarianceParams(4, 5, &aom_highbd_8_sub_pixel_variance16x32_c, 8),
- SubpelVarianceParams(4, 4, &aom_highbd_8_sub_pixel_variance16x16_c, 8),
- SubpelVarianceParams(4, 3, &aom_highbd_8_sub_pixel_variance16x8_c, 8),
- SubpelVarianceParams(3, 4, &aom_highbd_8_sub_pixel_variance8x16_c, 8),
- SubpelVarianceParams(3, 3, &aom_highbd_8_sub_pixel_variance8x8_c, 8),
- SubpelVarianceParams(3, 2, &aom_highbd_8_sub_pixel_variance8x4_c, 8),
- SubpelVarianceParams(2, 3, &aom_highbd_8_sub_pixel_variance4x8_c, 8),
- SubpelVarianceParams(2, 2, &aom_highbd_8_sub_pixel_variance4x4_c, 8),
- SubpelVarianceParams(7, 7, &aom_highbd_10_sub_pixel_variance128x128_c, 10),
- SubpelVarianceParams(7, 6, &aom_highbd_10_sub_pixel_variance128x64_c, 10),
- SubpelVarianceParams(6, 7, &aom_highbd_10_sub_pixel_variance64x128_c, 10),
- SubpelVarianceParams(6, 6, &aom_highbd_10_sub_pixel_variance64x64_c, 10),
- SubpelVarianceParams(6, 5, &aom_highbd_10_sub_pixel_variance64x32_c, 10),
- SubpelVarianceParams(5, 6, &aom_highbd_10_sub_pixel_variance32x64_c, 10),
- SubpelVarianceParams(5, 5, &aom_highbd_10_sub_pixel_variance32x32_c, 10),
- SubpelVarianceParams(5, 4, &aom_highbd_10_sub_pixel_variance32x16_c, 10),
- SubpelVarianceParams(4, 5, &aom_highbd_10_sub_pixel_variance16x32_c, 10),
- SubpelVarianceParams(4, 4, &aom_highbd_10_sub_pixel_variance16x16_c, 10),
- SubpelVarianceParams(4, 3, &aom_highbd_10_sub_pixel_variance16x8_c, 10),
- SubpelVarianceParams(3, 4, &aom_highbd_10_sub_pixel_variance8x16_c, 10),
- SubpelVarianceParams(3, 3, &aom_highbd_10_sub_pixel_variance8x8_c, 10),
- SubpelVarianceParams(3, 2, &aom_highbd_10_sub_pixel_variance8x4_c, 10),
- SubpelVarianceParams(2, 3, &aom_highbd_10_sub_pixel_variance4x8_c, 10),
- SubpelVarianceParams(2, 2, &aom_highbd_10_sub_pixel_variance4x4_c, 10),
- SubpelVarianceParams(7, 7, &aom_highbd_12_sub_pixel_variance128x128_c, 12),
- SubpelVarianceParams(7, 6, &aom_highbd_12_sub_pixel_variance128x64_c, 12),
- SubpelVarianceParams(6, 7, &aom_highbd_12_sub_pixel_variance64x128_c, 12),
- SubpelVarianceParams(6, 6, &aom_highbd_12_sub_pixel_variance64x64_c, 12),
- SubpelVarianceParams(6, 5, &aom_highbd_12_sub_pixel_variance64x32_c, 12),
- SubpelVarianceParams(5, 6, &aom_highbd_12_sub_pixel_variance32x64_c, 12),
- SubpelVarianceParams(5, 5, &aom_highbd_12_sub_pixel_variance32x32_c, 12),
- SubpelVarianceParams(5, 4, &aom_highbd_12_sub_pixel_variance32x16_c, 12),
- SubpelVarianceParams(4, 5, &aom_highbd_12_sub_pixel_variance16x32_c, 12),
- SubpelVarianceParams(4, 4, &aom_highbd_12_sub_pixel_variance16x16_c, 12),
- SubpelVarianceParams(4, 3, &aom_highbd_12_sub_pixel_variance16x8_c, 12),
- SubpelVarianceParams(3, 4, &aom_highbd_12_sub_pixel_variance8x16_c, 12),
- SubpelVarianceParams(3, 3, &aom_highbd_12_sub_pixel_variance8x8_c, 12),
- SubpelVarianceParams(3, 2, &aom_highbd_12_sub_pixel_variance8x4_c, 12),
- SubpelVarianceParams(2, 3, &aom_highbd_12_sub_pixel_variance4x8_c, 12),
- SubpelVarianceParams(2, 2, &aom_highbd_12_sub_pixel_variance4x4_c, 12),
-};
-INSTANTIATE_TEST_CASE_P(C, AvxHBDSubpelVarianceTest,
- ::testing::ValuesIn(kArrayHBDSubpelVariance_c));
-
-const SubpelAvgVarianceParams kArrayHBDSubpelAvgVariance_c[] = {
- SubpelAvgVarianceParams(7, 7, &aom_highbd_8_sub_pixel_avg_variance128x128_c,
- 8),
- SubpelAvgVarianceParams(7, 6, &aom_highbd_8_sub_pixel_avg_variance128x64_c,
- 8),
- SubpelAvgVarianceParams(6, 7, &aom_highbd_8_sub_pixel_avg_variance64x128_c,
- 8),
- SubpelAvgVarianceParams(6, 6, &aom_highbd_8_sub_pixel_avg_variance64x64_c, 8),
- SubpelAvgVarianceParams(6, 5, &aom_highbd_8_sub_pixel_avg_variance64x32_c, 8),
- SubpelAvgVarianceParams(5, 6, &aom_highbd_8_sub_pixel_avg_variance32x64_c, 8),
- SubpelAvgVarianceParams(5, 5, &aom_highbd_8_sub_pixel_avg_variance32x32_c, 8),
- SubpelAvgVarianceParams(5, 4, &aom_highbd_8_sub_pixel_avg_variance32x16_c, 8),
- SubpelAvgVarianceParams(4, 5, &aom_highbd_8_sub_pixel_avg_variance16x32_c, 8),
- SubpelAvgVarianceParams(4, 4, &aom_highbd_8_sub_pixel_avg_variance16x16_c, 8),
- SubpelAvgVarianceParams(4, 3, &aom_highbd_8_sub_pixel_avg_variance16x8_c, 8),
- SubpelAvgVarianceParams(3, 4, &aom_highbd_8_sub_pixel_avg_variance8x16_c, 8),
- SubpelAvgVarianceParams(3, 3, &aom_highbd_8_sub_pixel_avg_variance8x8_c, 8),
- SubpelAvgVarianceParams(3, 2, &aom_highbd_8_sub_pixel_avg_variance8x4_c, 8),
- SubpelAvgVarianceParams(2, 3, &aom_highbd_8_sub_pixel_avg_variance4x8_c, 8),
- SubpelAvgVarianceParams(2, 2, &aom_highbd_8_sub_pixel_avg_variance4x4_c, 8),
- SubpelAvgVarianceParams(7, 7, &aom_highbd_10_sub_pixel_avg_variance128x128_c,
- 10),
- SubpelAvgVarianceParams(7, 6, &aom_highbd_10_sub_pixel_avg_variance128x64_c,
- 10),
- SubpelAvgVarianceParams(6, 7, &aom_highbd_10_sub_pixel_avg_variance64x128_c,
- 10),
- SubpelAvgVarianceParams(6, 6, &aom_highbd_10_sub_pixel_avg_variance64x64_c,
- 10),
- SubpelAvgVarianceParams(6, 5, &aom_highbd_10_sub_pixel_avg_variance64x32_c,
- 10),
- SubpelAvgVarianceParams(5, 6, &aom_highbd_10_sub_pixel_avg_variance32x64_c,
- 10),
- SubpelAvgVarianceParams(5, 5, &aom_highbd_10_sub_pixel_avg_variance32x32_c,
- 10),
- SubpelAvgVarianceParams(5, 4, &aom_highbd_10_sub_pixel_avg_variance32x16_c,
- 10),
- SubpelAvgVarianceParams(4, 5, &aom_highbd_10_sub_pixel_avg_variance16x32_c,
- 10),
- SubpelAvgVarianceParams(4, 4, &aom_highbd_10_sub_pixel_avg_variance16x16_c,
- 10),
- SubpelAvgVarianceParams(4, 3, &aom_highbd_10_sub_pixel_avg_variance16x8_c,
- 10),
- SubpelAvgVarianceParams(3, 4, &aom_highbd_10_sub_pixel_avg_variance8x16_c,
- 10),
- SubpelAvgVarianceParams(3, 3, &aom_highbd_10_sub_pixel_avg_variance8x8_c, 10),
- SubpelAvgVarianceParams(3, 2, &aom_highbd_10_sub_pixel_avg_variance8x4_c, 10),
- SubpelAvgVarianceParams(2, 3, &aom_highbd_10_sub_pixel_avg_variance4x8_c, 10),
- SubpelAvgVarianceParams(2, 2, &aom_highbd_10_sub_pixel_avg_variance4x4_c, 10),
- SubpelAvgVarianceParams(7, 7, &aom_highbd_12_sub_pixel_avg_variance128x128_c,
- 12),
- SubpelAvgVarianceParams(7, 6, &aom_highbd_12_sub_pixel_avg_variance128x64_c,
- 12),
- SubpelAvgVarianceParams(6, 7, &aom_highbd_12_sub_pixel_avg_variance64x128_c,
- 12),
- SubpelAvgVarianceParams(6, 6, &aom_highbd_12_sub_pixel_avg_variance64x64_c,
- 12),
- SubpelAvgVarianceParams(6, 5, &aom_highbd_12_sub_pixel_avg_variance64x32_c,
- 12),
- SubpelAvgVarianceParams(5, 6, &aom_highbd_12_sub_pixel_avg_variance32x64_c,
- 12),
- SubpelAvgVarianceParams(5, 5, &aom_highbd_12_sub_pixel_avg_variance32x32_c,
- 12),
- SubpelAvgVarianceParams(5, 4, &aom_highbd_12_sub_pixel_avg_variance32x16_c,
- 12),
- SubpelAvgVarianceParams(4, 5, &aom_highbd_12_sub_pixel_avg_variance16x32_c,
- 12),
- SubpelAvgVarianceParams(4, 4, &aom_highbd_12_sub_pixel_avg_variance16x16_c,
- 12),
- SubpelAvgVarianceParams(4, 3, &aom_highbd_12_sub_pixel_avg_variance16x8_c,
- 12),
- SubpelAvgVarianceParams(3, 4, &aom_highbd_12_sub_pixel_avg_variance8x16_c,
- 12),
- SubpelAvgVarianceParams(3, 3, &aom_highbd_12_sub_pixel_avg_variance8x8_c, 12),
- SubpelAvgVarianceParams(3, 2, &aom_highbd_12_sub_pixel_avg_variance8x4_c, 12),
- SubpelAvgVarianceParams(2, 3, &aom_highbd_12_sub_pixel_avg_variance4x8_c, 12),
- SubpelAvgVarianceParams(2, 2, &aom_highbd_12_sub_pixel_avg_variance4x4_c, 12)
-};
-INSTANTIATE_TEST_CASE_P(C, AvxHBDSubpelAvgVarianceTest,
- ::testing::ValuesIn(kArrayHBDSubpelAvgVariance_c));
-
-const ObmcSubpelVarianceParams kArrayHBDObmcSubpelVariance_c[] = {
- ObmcSubpelVarianceParams(7, 7, &aom_highbd_obmc_sub_pixel_variance128x128_c,
- 8),
- ObmcSubpelVarianceParams(7, 6, &aom_highbd_obmc_sub_pixel_variance128x64_c,
- 8),
- ObmcSubpelVarianceParams(6, 7, &aom_highbd_obmc_sub_pixel_variance64x128_c,
- 8),
- ObmcSubpelVarianceParams(6, 6, &aom_highbd_obmc_sub_pixel_variance64x64_c, 8),
- ObmcSubpelVarianceParams(6, 5, &aom_highbd_obmc_sub_pixel_variance64x32_c, 8),
- ObmcSubpelVarianceParams(5, 6, &aom_highbd_obmc_sub_pixel_variance32x64_c, 8),
- ObmcSubpelVarianceParams(5, 5, &aom_highbd_obmc_sub_pixel_variance32x32_c, 8),
- ObmcSubpelVarianceParams(5, 4, &aom_highbd_obmc_sub_pixel_variance32x16_c, 8),
- ObmcSubpelVarianceParams(4, 5, &aom_highbd_obmc_sub_pixel_variance16x32_c, 8),
- ObmcSubpelVarianceParams(4, 4, &aom_highbd_obmc_sub_pixel_variance16x16_c, 8),
- ObmcSubpelVarianceParams(4, 3, &aom_highbd_obmc_sub_pixel_variance16x8_c, 8),
- ObmcSubpelVarianceParams(3, 4, &aom_highbd_obmc_sub_pixel_variance8x16_c, 8),
- ObmcSubpelVarianceParams(3, 3, &aom_highbd_obmc_sub_pixel_variance8x8_c, 8),
- ObmcSubpelVarianceParams(3, 2, &aom_highbd_obmc_sub_pixel_variance8x4_c, 8),
- ObmcSubpelVarianceParams(2, 3, &aom_highbd_obmc_sub_pixel_variance4x8_c, 8),
- ObmcSubpelVarianceParams(2, 2, &aom_highbd_obmc_sub_pixel_variance4x4_c, 8),
- ObmcSubpelVarianceParams(7, 7,
- &aom_highbd_10_obmc_sub_pixel_variance128x128_c, 10),
- ObmcSubpelVarianceParams(7, 6, &aom_highbd_10_obmc_sub_pixel_variance128x64_c,
- 10),
- ObmcSubpelVarianceParams(6, 7, &aom_highbd_10_obmc_sub_pixel_variance64x128_c,
- 10),
- ObmcSubpelVarianceParams(6, 6, &aom_highbd_10_obmc_sub_pixel_variance64x64_c,
- 10),
- ObmcSubpelVarianceParams(6, 5, &aom_highbd_10_obmc_sub_pixel_variance64x32_c,
- 10),
- ObmcSubpelVarianceParams(5, 6, &aom_highbd_10_obmc_sub_pixel_variance32x64_c,
- 10),
- ObmcSubpelVarianceParams(5, 5, &aom_highbd_10_obmc_sub_pixel_variance32x32_c,
- 10),
- ObmcSubpelVarianceParams(5, 4, &aom_highbd_10_obmc_sub_pixel_variance32x16_c,
- 10),
- ObmcSubpelVarianceParams(4, 5, &aom_highbd_10_obmc_sub_pixel_variance16x32_c,
- 10),
- ObmcSubpelVarianceParams(4, 4, &aom_highbd_10_obmc_sub_pixel_variance16x16_c,
- 10),
- ObmcSubpelVarianceParams(4, 3, &aom_highbd_10_obmc_sub_pixel_variance16x8_c,
- 10),
- ObmcSubpelVarianceParams(3, 4, &aom_highbd_10_obmc_sub_pixel_variance8x16_c,
- 10),
- ObmcSubpelVarianceParams(3, 3, &aom_highbd_10_obmc_sub_pixel_variance8x8_c,
- 10),
- ObmcSubpelVarianceParams(3, 2, &aom_highbd_10_obmc_sub_pixel_variance8x4_c,
- 10),
- ObmcSubpelVarianceParams(2, 3, &aom_highbd_10_obmc_sub_pixel_variance4x8_c,
- 10),
- ObmcSubpelVarianceParams(2, 2, &aom_highbd_10_obmc_sub_pixel_variance4x4_c,
- 10),
- ObmcSubpelVarianceParams(7, 7,
- &aom_highbd_12_obmc_sub_pixel_variance128x128_c, 12),
- ObmcSubpelVarianceParams(7, 6, &aom_highbd_12_obmc_sub_pixel_variance128x64_c,
- 12),
- ObmcSubpelVarianceParams(6, 7, &aom_highbd_12_obmc_sub_pixel_variance64x128_c,
- 12),
- ObmcSubpelVarianceParams(6, 6, &aom_highbd_12_obmc_sub_pixel_variance64x64_c,
- 12),
- ObmcSubpelVarianceParams(6, 5, &aom_highbd_12_obmc_sub_pixel_variance64x32_c,
- 12),
- ObmcSubpelVarianceParams(5, 6, &aom_highbd_12_obmc_sub_pixel_variance32x64_c,
- 12),
- ObmcSubpelVarianceParams(5, 5, &aom_highbd_12_obmc_sub_pixel_variance32x32_c,
- 12),
- ObmcSubpelVarianceParams(5, 4, &aom_highbd_12_obmc_sub_pixel_variance32x16_c,
- 12),
- ObmcSubpelVarianceParams(4, 5, &aom_highbd_12_obmc_sub_pixel_variance16x32_c,
- 12),
- ObmcSubpelVarianceParams(4, 4, &aom_highbd_12_obmc_sub_pixel_variance16x16_c,
- 12),
- ObmcSubpelVarianceParams(4, 3, &aom_highbd_12_obmc_sub_pixel_variance16x8_c,
- 12),
- ObmcSubpelVarianceParams(3, 4, &aom_highbd_12_obmc_sub_pixel_variance8x16_c,
- 12),
- ObmcSubpelVarianceParams(3, 3, &aom_highbd_12_obmc_sub_pixel_variance8x8_c,
- 12),
- ObmcSubpelVarianceParams(3, 2, &aom_highbd_12_obmc_sub_pixel_variance8x4_c,
- 12),
- ObmcSubpelVarianceParams(2, 3, &aom_highbd_12_obmc_sub_pixel_variance4x8_c,
- 12),
- ObmcSubpelVarianceParams(2, 2, &aom_highbd_12_obmc_sub_pixel_variance4x4_c,
- 12)
-};
-INSTANTIATE_TEST_CASE_P(C, AvxHBDObmcSubpelVarianceTest,
- ::testing::ValuesIn(kArrayHBDObmcSubpelVariance_c));
-
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(SSE2, SumOfSquaresTest,
- ::testing::Values(aom_get_mb_ss_sse2));
-
-INSTANTIATE_TEST_CASE_P(SSE2, AvxMseTest,
- ::testing::Values(MseParams(4, 4, &aom_mse16x16_sse2),
- MseParams(4, 3, &aom_mse16x8_sse2),
- MseParams(3, 4, &aom_mse8x16_sse2),
- MseParams(3, 3, &aom_mse8x8_sse2)));
-
-INSTANTIATE_TEST_CASE_P(
- SSE2, AvxVarianceTest,
- ::testing::Values(VarianceParams(7, 7, &aom_variance128x128_sse2),
- VarianceParams(7, 6, &aom_variance128x64_sse2),
- VarianceParams(6, 7, &aom_variance64x128_sse2),
- VarianceParams(6, 6, &aom_variance64x64_sse2),
- VarianceParams(6, 5, &aom_variance64x32_sse2),
- VarianceParams(6, 4, &aom_variance64x16_sse2),
- VarianceParams(5, 6, &aom_variance32x64_sse2),
- VarianceParams(5, 5, &aom_variance32x32_sse2),
- VarianceParams(5, 4, &aom_variance32x16_sse2),
- VarianceParams(5, 3, &aom_variance32x8_sse2),
- VarianceParams(4, 6, &aom_variance16x64_sse2),
- VarianceParams(4, 5, &aom_variance16x32_sse2),
- VarianceParams(4, 4, &aom_variance16x16_sse2),
- VarianceParams(4, 3, &aom_variance16x8_sse2),
- VarianceParams(4, 2, &aom_variance16x4_sse2),
- VarianceParams(3, 5, &aom_variance8x32_sse2),
- VarianceParams(3, 4, &aom_variance8x16_sse2),
- VarianceParams(3, 3, &aom_variance8x8_sse2),
- VarianceParams(3, 2, &aom_variance8x4_sse2),
- VarianceParams(2, 4, &aom_variance4x16_sse2),
- VarianceParams(2, 3, &aom_variance4x8_sse2),
- VarianceParams(2, 2, &aom_variance4x4_sse2)));
-
-INSTANTIATE_TEST_CASE_P(
- SSE2, AvxSubpelVarianceTest,
- ::testing::Values(
- SubpelVarianceParams(7, 7, &aom_sub_pixel_variance128x128_sse2, 0),
- SubpelVarianceParams(7, 6, &aom_sub_pixel_variance128x64_sse2, 0),
- SubpelVarianceParams(6, 7, &aom_sub_pixel_variance64x128_sse2, 0),
- SubpelVarianceParams(6, 6, &aom_sub_pixel_variance64x64_sse2, 0),
- SubpelVarianceParams(6, 5, &aom_sub_pixel_variance64x32_sse2, 0),
- SubpelVarianceParams(5, 6, &aom_sub_pixel_variance32x64_sse2, 0),
- SubpelVarianceParams(5, 5, &aom_sub_pixel_variance32x32_sse2, 0),
- SubpelVarianceParams(5, 4, &aom_sub_pixel_variance32x16_sse2, 0),
- SubpelVarianceParams(4, 5, &aom_sub_pixel_variance16x32_sse2, 0),
- SubpelVarianceParams(4, 4, &aom_sub_pixel_variance16x16_sse2, 0),
- SubpelVarianceParams(4, 3, &aom_sub_pixel_variance16x8_sse2, 0),
- SubpelVarianceParams(3, 4, &aom_sub_pixel_variance8x16_sse2, 0),
- SubpelVarianceParams(3, 3, &aom_sub_pixel_variance8x8_sse2, 0),
- SubpelVarianceParams(3, 2, &aom_sub_pixel_variance8x4_sse2, 0),
- SubpelVarianceParams(2, 3, &aom_sub_pixel_variance4x8_sse2, 0),
- SubpelVarianceParams(2, 2, &aom_sub_pixel_variance4x4_sse2, 0)));
-
-INSTANTIATE_TEST_CASE_P(
- SSE2, AvxSubpelAvgVarianceTest,
- ::testing::Values(
- SubpelAvgVarianceParams(7, 7, &aom_sub_pixel_avg_variance128x128_sse2,
- 0),
- SubpelAvgVarianceParams(7, 6, &aom_sub_pixel_avg_variance128x64_sse2,
- 0),
- SubpelAvgVarianceParams(6, 7, &aom_sub_pixel_avg_variance64x128_sse2,
- 0),
- SubpelAvgVarianceParams(6, 6, &aom_sub_pixel_avg_variance64x64_sse2, 0),
- SubpelAvgVarianceParams(6, 5, &aom_sub_pixel_avg_variance64x32_sse2, 0),
- SubpelAvgVarianceParams(5, 6, &aom_sub_pixel_avg_variance32x64_sse2, 0),
- SubpelAvgVarianceParams(5, 5, &aom_sub_pixel_avg_variance32x32_sse2, 0),
- SubpelAvgVarianceParams(5, 4, &aom_sub_pixel_avg_variance32x16_sse2, 0),
- SubpelAvgVarianceParams(4, 5, &aom_sub_pixel_avg_variance16x32_sse2, 0),
- SubpelAvgVarianceParams(4, 4, &aom_sub_pixel_avg_variance16x16_sse2, 0),
- SubpelAvgVarianceParams(4, 3, &aom_sub_pixel_avg_variance16x8_sse2, 0),
- SubpelAvgVarianceParams(3, 4, &aom_sub_pixel_avg_variance8x16_sse2, 0),
- SubpelAvgVarianceParams(3, 3, &aom_sub_pixel_avg_variance8x8_sse2, 0),
- SubpelAvgVarianceParams(3, 2, &aom_sub_pixel_avg_variance8x4_sse2, 0),
- SubpelAvgVarianceParams(2, 3, &aom_sub_pixel_avg_variance4x8_sse2, 0),
- SubpelAvgVarianceParams(2, 2, &aom_sub_pixel_avg_variance4x4_sse2, 0)));
-
-#if HAVE_SSE4_1
-INSTANTIATE_TEST_CASE_P(
- SSE4_1, AvxSubpelVarianceTest,
- ::testing::Values(
- SubpelVarianceParams(2, 2, &aom_highbd_8_sub_pixel_variance4x4_sse4_1,
- 8),
- SubpelVarianceParams(2, 2, &aom_highbd_10_sub_pixel_variance4x4_sse4_1,
- 10),
- SubpelVarianceParams(2, 2, &aom_highbd_12_sub_pixel_variance4x4_sse4_1,
- 12)));
-
-INSTANTIATE_TEST_CASE_P(
- SSE4_1, AvxSubpelAvgVarianceTest,
- ::testing::Values(
- SubpelAvgVarianceParams(2, 2,
- &aom_highbd_8_sub_pixel_avg_variance4x4_sse4_1,
- 8),
- SubpelAvgVarianceParams(2, 2,
- &aom_highbd_10_sub_pixel_avg_variance4x4_sse4_1,
- 10),
- SubpelAvgVarianceParams(2, 2,
- &aom_highbd_12_sub_pixel_avg_variance4x4_sse4_1,
- 12)));
-#endif // HAVE_SSE4_1
-
-/* TODO(debargha): This test does not support the highbd version
-INSTANTIATE_TEST_CASE_P(
- SSE2, AvxHBDMseTest,
- ::testing::Values(MseParams(4, 4, &aom_highbd_12_mse16x16_sse2),
- MseParams(4, 3, &aom_highbd_12_mse16x8_sse2),
- MseParams(3, 4, &aom_highbd_12_mse8x16_sse2),
- MseParams(3, 3, &aom_highbd_12_mse8x8_sse2),
- MseParams(4, 4, &aom_highbd_10_mse16x16_sse2),
- MseParams(4, 3, &aom_highbd_10_mse16x8_sse2),
- MseParams(3, 4, &aom_highbd_10_mse8x16_sse2),
- MseParams(3, 3, &aom_highbd_10_mse8x8_sse2),
- MseParams(4, 4, &aom_highbd_8_mse16x16_sse2),
- MseParams(4, 3, &aom_highbd_8_mse16x8_sse2),
- MseParams(3, 4, &aom_highbd_8_mse8x16_sse2),
- MseParams(3, 3, &aom_highbd_8_mse8x8_sse2)));
-*/
-
-const VarianceParams kArrayHBDVariance_sse2[] = {
- VarianceParams(7, 7, &aom_highbd_12_variance128x128_sse2, 12),
- VarianceParams(7, 6, &aom_highbd_12_variance128x64_sse2, 12),
- VarianceParams(6, 7, &aom_highbd_12_variance64x128_sse2, 12),
- VarianceParams(6, 6, &aom_highbd_12_variance64x64_sse2, 12),
- VarianceParams(6, 5, &aom_highbd_12_variance64x32_sse2, 12),
- VarianceParams(5, 6, &aom_highbd_12_variance32x64_sse2, 12),
- VarianceParams(5, 5, &aom_highbd_12_variance32x32_sse2, 12),
- VarianceParams(5, 4, &aom_highbd_12_variance32x16_sse2, 12),
- VarianceParams(4, 5, &aom_highbd_12_variance16x32_sse2, 12),
- VarianceParams(4, 4, &aom_highbd_12_variance16x16_sse2, 12),
- VarianceParams(4, 3, &aom_highbd_12_variance16x8_sse2, 12),
- VarianceParams(3, 4, &aom_highbd_12_variance8x16_sse2, 12),
- VarianceParams(3, 3, &aom_highbd_12_variance8x8_sse2, 12),
- VarianceParams(7, 7, &aom_highbd_10_variance128x128_sse2, 10),
- VarianceParams(7, 6, &aom_highbd_10_variance128x64_sse2, 10),
- VarianceParams(6, 7, &aom_highbd_10_variance64x128_sse2, 10),
- VarianceParams(6, 6, &aom_highbd_10_variance64x64_sse2, 10),
- VarianceParams(6, 5, &aom_highbd_10_variance64x32_sse2, 10),
- VarianceParams(5, 6, &aom_highbd_10_variance32x64_sse2, 10),
- VarianceParams(5, 5, &aom_highbd_10_variance32x32_sse2, 10),
- VarianceParams(5, 4, &aom_highbd_10_variance32x16_sse2, 10),
- VarianceParams(4, 5, &aom_highbd_10_variance16x32_sse2, 10),
- VarianceParams(4, 4, &aom_highbd_10_variance16x16_sse2, 10),
- VarianceParams(4, 3, &aom_highbd_10_variance16x8_sse2, 10),
- VarianceParams(3, 4, &aom_highbd_10_variance8x16_sse2, 10),
- VarianceParams(3, 3, &aom_highbd_10_variance8x8_sse2, 10),
- VarianceParams(7, 7, &aom_highbd_8_variance128x128_sse2, 8),
- VarianceParams(7, 6, &aom_highbd_8_variance128x64_sse2, 8),
- VarianceParams(6, 7, &aom_highbd_8_variance64x128_sse2, 8),
- VarianceParams(6, 6, &aom_highbd_8_variance64x64_sse2, 8),
- VarianceParams(6, 5, &aom_highbd_8_variance64x32_sse2, 8),
- VarianceParams(5, 6, &aom_highbd_8_variance32x64_sse2, 8),
- VarianceParams(5, 5, &aom_highbd_8_variance32x32_sse2, 8),
- VarianceParams(5, 4, &aom_highbd_8_variance32x16_sse2, 8),
- VarianceParams(4, 5, &aom_highbd_8_variance16x32_sse2, 8),
- VarianceParams(4, 4, &aom_highbd_8_variance16x16_sse2, 8),
- VarianceParams(4, 3, &aom_highbd_8_variance16x8_sse2, 8),
- VarianceParams(3, 4, &aom_highbd_8_variance8x16_sse2, 8),
- VarianceParams(3, 3, &aom_highbd_8_variance8x8_sse2, 8)
-};
-INSTANTIATE_TEST_CASE_P(SSE2, AvxHBDVarianceTest,
- ::testing::ValuesIn(kArrayHBDVariance_sse2));
-
-#if HAVE_AVX2
-
-const VarianceParams kArrayHBDVariance_avx2[] = {
- VarianceParams(7, 7, &aom_highbd_10_variance128x128_avx2, 10),
- VarianceParams(7, 6, &aom_highbd_10_variance128x64_avx2, 10),
- VarianceParams(6, 7, &aom_highbd_10_variance64x128_avx2, 10),
- VarianceParams(6, 6, &aom_highbd_10_variance64x64_avx2, 10),
- VarianceParams(6, 5, &aom_highbd_10_variance64x32_avx2, 10),
- VarianceParams(5, 6, &aom_highbd_10_variance32x64_avx2, 10),
- VarianceParams(5, 5, &aom_highbd_10_variance32x32_avx2, 10),
- VarianceParams(5, 4, &aom_highbd_10_variance32x16_avx2, 10),
- VarianceParams(4, 5, &aom_highbd_10_variance16x32_avx2, 10),
- VarianceParams(4, 4, &aom_highbd_10_variance16x16_avx2, 10),
- VarianceParams(4, 3, &aom_highbd_10_variance16x8_avx2, 10),
- VarianceParams(3, 4, &aom_highbd_10_variance8x16_avx2, 10),
- VarianceParams(3, 3, &aom_highbd_10_variance8x8_avx2, 10)
-};
-
-INSTANTIATE_TEST_CASE_P(AVX2, AvxHBDVarianceTest,
- ::testing::ValuesIn(kArrayHBDVariance_avx2));
-#endif // HAVE_AVX2
-
-const SubpelVarianceParams kArrayHBDSubpelVariance_sse2[] = {
- SubpelVarianceParams(6, 6, &aom_highbd_12_sub_pixel_variance64x64_sse2, 12),
- SubpelVarianceParams(6, 5, &aom_highbd_12_sub_pixel_variance64x32_sse2, 12),
- SubpelVarianceParams(5, 6, &aom_highbd_12_sub_pixel_variance32x64_sse2, 12),
- SubpelVarianceParams(5, 5, &aom_highbd_12_sub_pixel_variance32x32_sse2, 12),
- SubpelVarianceParams(5, 4, &aom_highbd_12_sub_pixel_variance32x16_sse2, 12),
- SubpelVarianceParams(4, 5, &aom_highbd_12_sub_pixel_variance16x32_sse2, 12),
- SubpelVarianceParams(4, 4, &aom_highbd_12_sub_pixel_variance16x16_sse2, 12),
- SubpelVarianceParams(4, 3, &aom_highbd_12_sub_pixel_variance16x8_sse2, 12),
- SubpelVarianceParams(3, 4, &aom_highbd_12_sub_pixel_variance8x16_sse2, 12),
- SubpelVarianceParams(3, 3, &aom_highbd_12_sub_pixel_variance8x8_sse2, 12),
- SubpelVarianceParams(3, 2, &aom_highbd_12_sub_pixel_variance8x4_sse2, 12),
- SubpelVarianceParams(6, 6, &aom_highbd_10_sub_pixel_variance64x64_sse2, 10),
- SubpelVarianceParams(6, 5, &aom_highbd_10_sub_pixel_variance64x32_sse2, 10),
- SubpelVarianceParams(5, 6, &aom_highbd_10_sub_pixel_variance32x64_sse2, 10),
- SubpelVarianceParams(5, 5, &aom_highbd_10_sub_pixel_variance32x32_sse2, 10),
- SubpelVarianceParams(5, 4, &aom_highbd_10_sub_pixel_variance32x16_sse2, 10),
- SubpelVarianceParams(4, 5, &aom_highbd_10_sub_pixel_variance16x32_sse2, 10),
- SubpelVarianceParams(4, 4, &aom_highbd_10_sub_pixel_variance16x16_sse2, 10),
- SubpelVarianceParams(4, 3, &aom_highbd_10_sub_pixel_variance16x8_sse2, 10),
- SubpelVarianceParams(3, 4, &aom_highbd_10_sub_pixel_variance8x16_sse2, 10),
- SubpelVarianceParams(3, 3, &aom_highbd_10_sub_pixel_variance8x8_sse2, 10),
- SubpelVarianceParams(3, 2, &aom_highbd_10_sub_pixel_variance8x4_sse2, 10),
- SubpelVarianceParams(6, 6, &aom_highbd_8_sub_pixel_variance64x64_sse2, 8),
- SubpelVarianceParams(6, 5, &aom_highbd_8_sub_pixel_variance64x32_sse2, 8),
- SubpelVarianceParams(5, 6, &aom_highbd_8_sub_pixel_variance32x64_sse2, 8),
- SubpelVarianceParams(5, 5, &aom_highbd_8_sub_pixel_variance32x32_sse2, 8),
- SubpelVarianceParams(5, 4, &aom_highbd_8_sub_pixel_variance32x16_sse2, 8),
- SubpelVarianceParams(4, 5, &aom_highbd_8_sub_pixel_variance16x32_sse2, 8),
- SubpelVarianceParams(4, 4, &aom_highbd_8_sub_pixel_variance16x16_sse2, 8),
- SubpelVarianceParams(4, 3, &aom_highbd_8_sub_pixel_variance16x8_sse2, 8),
- SubpelVarianceParams(3, 4, &aom_highbd_8_sub_pixel_variance8x16_sse2, 8),
- SubpelVarianceParams(3, 3, &aom_highbd_8_sub_pixel_variance8x8_sse2, 8),
- SubpelVarianceParams(3, 2, &aom_highbd_8_sub_pixel_variance8x4_sse2, 8)
-};
-
-INSTANTIATE_TEST_CASE_P(SSE2, AvxHBDSubpelVarianceTest,
- ::testing::ValuesIn(kArrayHBDSubpelVariance_sse2));
-
-const SubpelAvgVarianceParams kArrayHBDSubpelAvgVariance_sse2[] = {
- SubpelAvgVarianceParams(6, 6, &aom_highbd_12_sub_pixel_avg_variance64x64_sse2,
- 12),
- SubpelAvgVarianceParams(6, 5, &aom_highbd_12_sub_pixel_avg_variance64x32_sse2,
- 12),
- SubpelAvgVarianceParams(5, 6, &aom_highbd_12_sub_pixel_avg_variance32x64_sse2,
- 12),
- SubpelAvgVarianceParams(5, 5, &aom_highbd_12_sub_pixel_avg_variance32x32_sse2,
- 12),
- SubpelAvgVarianceParams(5, 4, &aom_highbd_12_sub_pixel_avg_variance32x16_sse2,
- 12),
- SubpelAvgVarianceParams(4, 5, &aom_highbd_12_sub_pixel_avg_variance16x32_sse2,
- 12),
- SubpelAvgVarianceParams(4, 4, &aom_highbd_12_sub_pixel_avg_variance16x16_sse2,
- 12),
- SubpelAvgVarianceParams(4, 3, &aom_highbd_12_sub_pixel_avg_variance16x8_sse2,
- 12),
- SubpelAvgVarianceParams(3, 4, &aom_highbd_12_sub_pixel_avg_variance8x16_sse2,
- 12),
- SubpelAvgVarianceParams(3, 3, &aom_highbd_12_sub_pixel_avg_variance8x8_sse2,
- 12),
- SubpelAvgVarianceParams(3, 2, &aom_highbd_12_sub_pixel_avg_variance8x4_sse2,
- 12),
- SubpelAvgVarianceParams(6, 6, &aom_highbd_10_sub_pixel_avg_variance64x64_sse2,
- 10),
- SubpelAvgVarianceParams(6, 5, &aom_highbd_10_sub_pixel_avg_variance64x32_sse2,
- 10),
- SubpelAvgVarianceParams(5, 6, &aom_highbd_10_sub_pixel_avg_variance32x64_sse2,
- 10),
- SubpelAvgVarianceParams(5, 5, &aom_highbd_10_sub_pixel_avg_variance32x32_sse2,
- 10),
- SubpelAvgVarianceParams(5, 4, &aom_highbd_10_sub_pixel_avg_variance32x16_sse2,
- 10),
- SubpelAvgVarianceParams(4, 5, &aom_highbd_10_sub_pixel_avg_variance16x32_sse2,
- 10),
- SubpelAvgVarianceParams(4, 4, &aom_highbd_10_sub_pixel_avg_variance16x16_sse2,
- 10),
- SubpelAvgVarianceParams(4, 3, &aom_highbd_10_sub_pixel_avg_variance16x8_sse2,
- 10),
- SubpelAvgVarianceParams(3, 4, &aom_highbd_10_sub_pixel_avg_variance8x16_sse2,
- 10),
- SubpelAvgVarianceParams(3, 3, &aom_highbd_10_sub_pixel_avg_variance8x8_sse2,
- 10),
- SubpelAvgVarianceParams(3, 2, &aom_highbd_10_sub_pixel_avg_variance8x4_sse2,
- 10),
- SubpelAvgVarianceParams(6, 6, &aom_highbd_8_sub_pixel_avg_variance64x64_sse2,
- 8),
- SubpelAvgVarianceParams(6, 5, &aom_highbd_8_sub_pixel_avg_variance64x32_sse2,
- 8),
- SubpelAvgVarianceParams(5, 6, &aom_highbd_8_sub_pixel_avg_variance32x64_sse2,
- 8),
- SubpelAvgVarianceParams(5, 5, &aom_highbd_8_sub_pixel_avg_variance32x32_sse2,
- 8),
- SubpelAvgVarianceParams(5, 4, &aom_highbd_8_sub_pixel_avg_variance32x16_sse2,
- 8),
- SubpelAvgVarianceParams(4, 5, &aom_highbd_8_sub_pixel_avg_variance16x32_sse2,
- 8),
- SubpelAvgVarianceParams(4, 4, &aom_highbd_8_sub_pixel_avg_variance16x16_sse2,
- 8),
- SubpelAvgVarianceParams(4, 3, &aom_highbd_8_sub_pixel_avg_variance16x8_sse2,
- 8),
- SubpelAvgVarianceParams(3, 4, &aom_highbd_8_sub_pixel_avg_variance8x16_sse2,
- 8),
- SubpelAvgVarianceParams(3, 3, &aom_highbd_8_sub_pixel_avg_variance8x8_sse2,
- 8),
- SubpelAvgVarianceParams(3, 2, &aom_highbd_8_sub_pixel_avg_variance8x4_sse2, 8)
-};
-
-INSTANTIATE_TEST_CASE_P(SSE2, AvxHBDSubpelAvgVarianceTest,
- ::testing::ValuesIn(kArrayHBDSubpelAvgVariance_sse2));
-#endif // HAVE_SSE2
-
-#if HAVE_SSSE3
-INSTANTIATE_TEST_CASE_P(
- SSSE3, AvxSubpelVarianceTest,
- ::testing::Values(
- SubpelVarianceParams(7, 7, &aom_sub_pixel_variance128x128_ssse3, 0),
- SubpelVarianceParams(7, 6, &aom_sub_pixel_variance128x64_ssse3, 0),
- SubpelVarianceParams(6, 7, &aom_sub_pixel_variance64x128_ssse3, 0),
- SubpelVarianceParams(6, 6, &aom_sub_pixel_variance64x64_ssse3, 0),
- SubpelVarianceParams(6, 5, &aom_sub_pixel_variance64x32_ssse3, 0),
- SubpelVarianceParams(5, 6, &aom_sub_pixel_variance32x64_ssse3, 0),
- SubpelVarianceParams(5, 5, &aom_sub_pixel_variance32x32_ssse3, 0),
- SubpelVarianceParams(5, 4, &aom_sub_pixel_variance32x16_ssse3, 0),
- SubpelVarianceParams(4, 5, &aom_sub_pixel_variance16x32_ssse3, 0),
- SubpelVarianceParams(4, 4, &aom_sub_pixel_variance16x16_ssse3, 0),
- SubpelVarianceParams(4, 3, &aom_sub_pixel_variance16x8_ssse3, 0),
- SubpelVarianceParams(3, 4, &aom_sub_pixel_variance8x16_ssse3, 0),
- SubpelVarianceParams(3, 3, &aom_sub_pixel_variance8x8_ssse3, 0),
- SubpelVarianceParams(3, 2, &aom_sub_pixel_variance8x4_ssse3, 0),
- SubpelVarianceParams(2, 3, &aom_sub_pixel_variance4x8_ssse3, 0),
- SubpelVarianceParams(2, 2, &aom_sub_pixel_variance4x4_ssse3, 0)));
-
-INSTANTIATE_TEST_CASE_P(
- SSSE3, AvxSubpelAvgVarianceTest,
- ::testing::Values(
- SubpelAvgVarianceParams(7, 7, &aom_sub_pixel_avg_variance128x128_ssse3,
- 0),
- SubpelAvgVarianceParams(7, 6, &aom_sub_pixel_avg_variance128x64_ssse3,
- 0),
- SubpelAvgVarianceParams(6, 7, &aom_sub_pixel_avg_variance64x128_ssse3,
- 0),
- SubpelAvgVarianceParams(6, 6, &aom_sub_pixel_avg_variance64x64_ssse3,
- 0),
- SubpelAvgVarianceParams(6, 5, &aom_sub_pixel_avg_variance64x32_ssse3,
- 0),
- SubpelAvgVarianceParams(5, 6, &aom_sub_pixel_avg_variance32x64_ssse3,
- 0),
- SubpelAvgVarianceParams(5, 5, &aom_sub_pixel_avg_variance32x32_ssse3,
- 0),
- SubpelAvgVarianceParams(5, 4, &aom_sub_pixel_avg_variance32x16_ssse3,
- 0),
- SubpelAvgVarianceParams(4, 5, &aom_sub_pixel_avg_variance16x32_ssse3,
- 0),
- SubpelAvgVarianceParams(4, 4, &aom_sub_pixel_avg_variance16x16_ssse3,
- 0),
- SubpelAvgVarianceParams(4, 3, &aom_sub_pixel_avg_variance16x8_ssse3, 0),
- SubpelAvgVarianceParams(3, 4, &aom_sub_pixel_avg_variance8x16_ssse3, 0),
- SubpelAvgVarianceParams(3, 3, &aom_sub_pixel_avg_variance8x8_ssse3, 0),
- SubpelAvgVarianceParams(3, 2, &aom_sub_pixel_avg_variance8x4_ssse3, 0),
- SubpelAvgVarianceParams(2, 3, &aom_sub_pixel_avg_variance4x8_ssse3, 0),
- SubpelAvgVarianceParams(2, 2, &aom_sub_pixel_avg_variance4x4_ssse3,
- 0)));
-
-INSTANTIATE_TEST_CASE_P(
- SSSE3, AvxJntSubpelAvgVarianceTest,
- ::testing::Values(
- JntSubpelAvgVarianceParams(6, 6,
- &aom_jnt_sub_pixel_avg_variance64x64_ssse3,
- 0),
- JntSubpelAvgVarianceParams(6, 5,
- &aom_jnt_sub_pixel_avg_variance64x32_ssse3,
- 0),
- JntSubpelAvgVarianceParams(5, 6,
- &aom_jnt_sub_pixel_avg_variance32x64_ssse3,
- 0),
- JntSubpelAvgVarianceParams(5, 5,
- &aom_jnt_sub_pixel_avg_variance32x32_ssse3,
- 0),
- JntSubpelAvgVarianceParams(5, 4,
- &aom_jnt_sub_pixel_avg_variance32x16_ssse3,
- 0),
- JntSubpelAvgVarianceParams(4, 5,
- &aom_jnt_sub_pixel_avg_variance16x32_ssse3,
- 0),
- JntSubpelAvgVarianceParams(4, 4,
- &aom_jnt_sub_pixel_avg_variance16x16_ssse3,
- 0),
- JntSubpelAvgVarianceParams(4, 3,
- &aom_jnt_sub_pixel_avg_variance16x8_ssse3,
- 0),
- JntSubpelAvgVarianceParams(3, 4,
- &aom_jnt_sub_pixel_avg_variance8x16_ssse3,
- 0),
- JntSubpelAvgVarianceParams(3, 3,
- &aom_jnt_sub_pixel_avg_variance8x8_ssse3, 0),
- JntSubpelAvgVarianceParams(3, 2,
- &aom_jnt_sub_pixel_avg_variance8x4_ssse3, 0),
- JntSubpelAvgVarianceParams(2, 3,
- &aom_jnt_sub_pixel_avg_variance4x8_ssse3, 0),
- JntSubpelAvgVarianceParams(2, 2,
- &aom_jnt_sub_pixel_avg_variance4x4_ssse3,
- 0)));
-#endif // HAVE_SSSE3
-
-#if HAVE_SSE4_1
-INSTANTIATE_TEST_CASE_P(
- SSE4_1, AvxObmcSubpelVarianceTest,
- ::testing::Values(
- ObmcSubpelVarianceParams(7, 7,
- &aom_obmc_sub_pixel_variance128x128_sse4_1, 0),
- ObmcSubpelVarianceParams(7, 6,
- &aom_obmc_sub_pixel_variance128x64_sse4_1, 0),
- ObmcSubpelVarianceParams(6, 7,
- &aom_obmc_sub_pixel_variance64x128_sse4_1, 0),
- ObmcSubpelVarianceParams(6, 6, &aom_obmc_sub_pixel_variance64x64_sse4_1,
- 0),
- ObmcSubpelVarianceParams(6, 5, &aom_obmc_sub_pixel_variance64x32_sse4_1,
- 0),
- ObmcSubpelVarianceParams(5, 6, &aom_obmc_sub_pixel_variance32x64_sse4_1,
- 0),
- ObmcSubpelVarianceParams(5, 5, &aom_obmc_sub_pixel_variance32x32_sse4_1,
- 0),
- ObmcSubpelVarianceParams(5, 4, &aom_obmc_sub_pixel_variance32x16_sse4_1,
- 0),
- ObmcSubpelVarianceParams(4, 5, &aom_obmc_sub_pixel_variance16x32_sse4_1,
- 0),
- ObmcSubpelVarianceParams(4, 4, &aom_obmc_sub_pixel_variance16x16_sse4_1,
- 0),
- ObmcSubpelVarianceParams(4, 3, &aom_obmc_sub_pixel_variance16x8_sse4_1,
- 0),
- ObmcSubpelVarianceParams(3, 4, &aom_obmc_sub_pixel_variance8x16_sse4_1,
- 0),
- ObmcSubpelVarianceParams(3, 3, &aom_obmc_sub_pixel_variance8x8_sse4_1,
- 0),
- ObmcSubpelVarianceParams(3, 2, &aom_obmc_sub_pixel_variance8x4_sse4_1,
- 0),
- ObmcSubpelVarianceParams(2, 3, &aom_obmc_sub_pixel_variance4x8_sse4_1,
- 0),
- ObmcSubpelVarianceParams(2, 2, &aom_obmc_sub_pixel_variance4x4_sse4_1,
- 0)));
-#endif // HAVE_SSE4_1
-
-#if HAVE_AVX2
-INSTANTIATE_TEST_CASE_P(AVX2, AvxMseTest,
- ::testing::Values(MseParams(4, 4, &aom_mse16x16_avx2)));
-
-INSTANTIATE_TEST_CASE_P(
- AVX2, AvxVarianceTest,
- ::testing::Values(VarianceParams(7, 7, &aom_variance128x128_avx2),
- VarianceParams(7, 6, &aom_variance128x64_avx2),
- VarianceParams(6, 7, &aom_variance64x128_avx2),
- VarianceParams(6, 6, &aom_variance64x64_avx2),
- VarianceParams(6, 5, &aom_variance64x32_avx2),
- VarianceParams(6, 4, &aom_variance64x16_avx2),
- VarianceParams(5, 6, &aom_variance32x64_avx2),
- VarianceParams(5, 5, &aom_variance32x32_avx2),
- VarianceParams(5, 4, &aom_variance32x16_avx2),
- VarianceParams(5, 3, &aom_variance32x8_avx2),
- VarianceParams(4, 6, &aom_variance16x64_avx2),
- VarianceParams(4, 5, &aom_variance16x32_avx2),
- VarianceParams(4, 4, &aom_variance16x16_avx2),
- VarianceParams(4, 3, &aom_variance16x8_avx2),
- VarianceParams(4, 2, &aom_variance16x4_avx2)));
-
-INSTANTIATE_TEST_CASE_P(
- AVX2, AvxSubpelVarianceTest,
- ::testing::Values(
- SubpelVarianceParams(7, 7, &aom_sub_pixel_variance128x128_avx2, 0),
- SubpelVarianceParams(7, 6, &aom_sub_pixel_variance128x64_avx2, 0),
- SubpelVarianceParams(6, 7, &aom_sub_pixel_variance64x128_avx2, 0),
- SubpelVarianceParams(6, 6, &aom_sub_pixel_variance64x64_avx2, 0),
- SubpelVarianceParams(6, 5, &aom_sub_pixel_variance64x32_avx2, 0),
- SubpelVarianceParams(5, 6, &aom_sub_pixel_variance32x64_avx2, 0),
- SubpelVarianceParams(5, 5, &aom_sub_pixel_variance32x32_avx2, 0),
- SubpelVarianceParams(5, 4, &aom_sub_pixel_variance32x16_avx2, 0)));
-
-INSTANTIATE_TEST_CASE_P(
- AVX2, AvxSubpelAvgVarianceTest,
- ::testing::Values(
- SubpelAvgVarianceParams(7, 7, &aom_sub_pixel_avg_variance128x128_avx2,
- 0),
- SubpelAvgVarianceParams(7, 6, &aom_sub_pixel_avg_variance128x64_avx2,
- 0),
- SubpelAvgVarianceParams(6, 7, &aom_sub_pixel_avg_variance64x128_avx2,
- 0),
- SubpelAvgVarianceParams(6, 6, &aom_sub_pixel_avg_variance64x64_avx2, 0),
- SubpelAvgVarianceParams(6, 5, &aom_sub_pixel_avg_variance64x32_avx2, 0),
- SubpelAvgVarianceParams(5, 6, &aom_sub_pixel_avg_variance32x64_avx2, 0),
- SubpelAvgVarianceParams(5, 5, &aom_sub_pixel_avg_variance32x32_avx2, 0),
- SubpelAvgVarianceParams(5, 4, &aom_sub_pixel_avg_variance32x16_avx2,
- 0)));
-#endif // HAVE_AVX2
-
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(NEON, AvxSseTest,
- ::testing::Values(SseParams(2, 2,
- &aom_get4x4sse_cs_neon)));
-
-INSTANTIATE_TEST_CASE_P(NEON, AvxMseTest,
- ::testing::Values(MseParams(4, 4, &aom_mse16x16_neon)));
-
-INSTANTIATE_TEST_CASE_P(
- NEON, AvxVarianceTest,
- ::testing::Values(VarianceParams(6, 6, &aom_variance64x64_neon),
- VarianceParams(6, 5, &aom_variance64x32_neon),
- VarianceParams(5, 6, &aom_variance32x64_neon),
- VarianceParams(5, 5, &aom_variance32x32_neon),
- VarianceParams(4, 4, &aom_variance16x16_neon),
- VarianceParams(4, 3, &aom_variance16x8_neon),
- VarianceParams(3, 4, &aom_variance8x16_neon),
- VarianceParams(3, 3, &aom_variance8x8_neon)));
-
-INSTANTIATE_TEST_CASE_P(
- NEON, AvxSubpelVarianceTest,
- ::testing::Values(
- SubpelVarianceParams(6, 6, &aom_sub_pixel_variance64x64_neon, 0),
- SubpelVarianceParams(5, 5, &aom_sub_pixel_variance32x32_neon, 0),
- SubpelVarianceParams(4, 4, &aom_sub_pixel_variance16x16_neon, 0),
- SubpelVarianceParams(3, 3, &aom_sub_pixel_variance8x8_neon, 0)));
-#endif // HAVE_NEON
-
-#if HAVE_MSA
-INSTANTIATE_TEST_CASE_P(MSA, SumOfSquaresTest,
- ::testing::Values(aom_get_mb_ss_msa));
-
-INSTANTIATE_TEST_CASE_P(MSA, AvxSseTest,
- ::testing::Values(SseParams(2, 2,
- &aom_get4x4sse_cs_msa)));
-
-INSTANTIATE_TEST_CASE_P(MSA, AvxMseTest,
- ::testing::Values(MseParams(4, 4, &aom_mse16x16_msa),
- MseParams(4, 3, &aom_mse16x8_msa),
- MseParams(3, 4, &aom_mse8x16_msa),
- MseParams(3, 3, &aom_mse8x8_msa)));
-
-INSTANTIATE_TEST_CASE_P(
- MSA, AvxVarianceTest,
- ::testing::Values(VarianceParams(6, 6, &aom_variance64x64_msa),
- VarianceParams(6, 5, &aom_variance64x32_msa),
- VarianceParams(5, 6, &aom_variance32x64_msa),
- VarianceParams(5, 5, &aom_variance32x32_msa),
- VarianceParams(5, 4, &aom_variance32x16_msa),
- VarianceParams(4, 5, &aom_variance16x32_msa),
- VarianceParams(4, 4, &aom_variance16x16_msa),
- VarianceParams(4, 3, &aom_variance16x8_msa),
- VarianceParams(3, 4, &aom_variance8x16_msa),
- VarianceParams(3, 3, &aom_variance8x8_msa),
- VarianceParams(3, 2, &aom_variance8x4_msa),
- VarianceParams(2, 3, &aom_variance4x8_msa),
- VarianceParams(2, 2, &aom_variance4x4_msa)));
-
-INSTANTIATE_TEST_CASE_P(
- MSA, AvxSubpelVarianceTest,
- ::testing::Values(
- SubpelVarianceParams(2, 2, &aom_sub_pixel_variance4x4_msa, 0),
- SubpelVarianceParams(2, 3, &aom_sub_pixel_variance4x8_msa, 0),
- SubpelVarianceParams(3, 2, &aom_sub_pixel_variance8x4_msa, 0),
- SubpelVarianceParams(3, 3, &aom_sub_pixel_variance8x8_msa, 0),
- SubpelVarianceParams(3, 4, &aom_sub_pixel_variance8x16_msa, 0),
- SubpelVarianceParams(4, 3, &aom_sub_pixel_variance16x8_msa, 0),
- SubpelVarianceParams(4, 4, &aom_sub_pixel_variance16x16_msa, 0),
- SubpelVarianceParams(4, 5, &aom_sub_pixel_variance16x32_msa, 0),
- SubpelVarianceParams(5, 4, &aom_sub_pixel_variance32x16_msa, 0),
- SubpelVarianceParams(5, 5, &aom_sub_pixel_variance32x32_msa, 0),
- SubpelVarianceParams(5, 6, &aom_sub_pixel_variance32x64_msa, 0),
- SubpelVarianceParams(6, 5, &aom_sub_pixel_variance64x32_msa, 0),
- SubpelVarianceParams(6, 6, &aom_sub_pixel_variance64x64_msa, 0)));
-
-INSTANTIATE_TEST_CASE_P(
- MSA, AvxSubpelAvgVarianceTest,
- ::testing::Values(
- SubpelAvgVarianceParams(6, 6, &aom_sub_pixel_avg_variance64x64_msa, 0),
- SubpelAvgVarianceParams(6, 5, &aom_sub_pixel_avg_variance64x32_msa, 0),
- SubpelAvgVarianceParams(5, 6, &aom_sub_pixel_avg_variance32x64_msa, 0),
- SubpelAvgVarianceParams(5, 5, &aom_sub_pixel_avg_variance32x32_msa, 0),
- SubpelAvgVarianceParams(5, 4, &aom_sub_pixel_avg_variance32x16_msa, 0),
- SubpelAvgVarianceParams(4, 5, &aom_sub_pixel_avg_variance16x32_msa, 0),
- SubpelAvgVarianceParams(4, 4, &aom_sub_pixel_avg_variance16x16_msa, 0),
- SubpelAvgVarianceParams(4, 3, &aom_sub_pixel_avg_variance16x8_msa, 0),
- SubpelAvgVarianceParams(3, 4, &aom_sub_pixel_avg_variance8x16_msa, 0),
- SubpelAvgVarianceParams(3, 3, &aom_sub_pixel_avg_variance8x8_msa, 0),
- SubpelAvgVarianceParams(3, 2, &aom_sub_pixel_avg_variance8x4_msa, 0),
- SubpelAvgVarianceParams(2, 3, &aom_sub_pixel_avg_variance4x8_msa, 0),
- SubpelAvgVarianceParams(2, 2, &aom_sub_pixel_avg_variance4x4_msa, 0)));
-#endif // HAVE_MSA
-} // namespace
diff --git a/third_party/aom/test/video_source.h b/third_party/aom/test/video_source.h
deleted file mode 100644
index 3c1c5e559..000000000
--- a/third_party/aom/test/video_source.h
+++ /dev/null
@@ -1,259 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#ifndef AOM_TEST_VIDEO_SOURCE_H_
-#define AOM_TEST_VIDEO_SOURCE_H_
-
-#if defined(_WIN32)
-#undef NOMINMAX
-#define NOMINMAX
-#define WIN32_LEAN_AND_MEAN
-#include <windows.h>
-#endif
-#include <cstdio>
-#include <cstdlib>
-#include <string>
-#include "test/acm_random.h"
-#include "aom/aom_encoder.h"
-
-namespace libaom_test {
-
-// Helper macros to ensure LIBAOM_TEST_DATA_PATH is a quoted string.
-// These are undefined right below GetDataPath
-// NOTE: LIBAOM_TEST_DATA_PATH MUST NOT be a quoted string before
-// Stringification or the GetDataPath will fail at runtime
-#define TO_STRING(S) #S
-#define STRINGIFY(S) TO_STRING(S)
-
-// A simple function to encapsulate cross platform retrieval of test data path
-static std::string GetDataPath() {
- const char *const data_path = getenv("LIBAOM_TEST_DATA_PATH");
- if (data_path == NULL) {
-#ifdef LIBAOM_TEST_DATA_PATH
- // In some environments, we cannot set environment variables
- // Instead, we set the data path by using a preprocessor symbol
- // which can be set from make files
- return STRINGIFY(LIBAOM_TEST_DATA_PATH);
-#else
- return ".";
-#endif
- }
- return data_path;
-}
-
-// Undefining stringification macros because they are not used elsewhere
-#undef TO_STRING
-#undef STRINGIFY
-
-inline FILE *OpenTestDataFile(const std::string &file_name) {
- const std::string path_to_source = GetDataPath() + "/" + file_name;
- return fopen(path_to_source.c_str(), "rb");
-}
-
-static FILE *GetTempOutFile(std::string *file_name) {
- file_name->clear();
-#if defined(_WIN32)
- char fname[MAX_PATH];
- char tmppath[MAX_PATH];
- if (GetTempPathA(MAX_PATH, tmppath)) {
- // Assume for now that the filename generated is unique per process
- if (GetTempFileNameA(tmppath, "lvx", 0, fname)) {
- file_name->assign(fname);
- return fopen(fname, "wb+");
- }
- }
- return NULL;
-#else
- char name_template[] = "/tmp/libaomtest.XXXXXX";
- const int fd = mkstemp(name_template);
- *file_name = name_template;
- return fdopen(fd, "wb+");
-#endif
-}
-
-class TempOutFile {
- public:
- TempOutFile() { file_ = GetTempOutFile(&file_name_); }
- ~TempOutFile() {
- CloseFile();
- if (!file_name_.empty()) {
- EXPECT_EQ(0, remove(file_name_.c_str()));
- }
- }
- FILE *file() { return file_; }
- const std::string &file_name() { return file_name_; }
-
- protected:
- void CloseFile() {
- if (file_) {
- fclose(file_);
- file_ = NULL;
- }
- }
- FILE *file_;
- std::string file_name_;
-};
-
-// Abstract base class for test video sources, which provide a stream of
-// aom_image_t images with associated timestamps and duration.
-class VideoSource {
- public:
- virtual ~VideoSource() {}
-
- // Prepare the stream for reading, rewind/open as necessary.
- virtual void Begin() = 0;
-
- // Advance the cursor to the next frame
- virtual void Next() = 0;
-
- // Get the current video frame, or NULL on End-Of-Stream.
- virtual aom_image_t *img() const = 0;
-
- // Get the presentation timestamp of the current frame.
- virtual aom_codec_pts_t pts() const = 0;
-
- // Get the current frame's duration
- virtual unsigned long duration() const = 0;
-
- // Get the timebase for the stream
- virtual aom_rational_t timebase() const = 0;
-
- // Get the current frame counter, starting at 0.
- virtual unsigned int frame() const = 0;
-
- // Get the current file limit.
- virtual unsigned int limit() const = 0;
-};
-
-class DummyVideoSource : public VideoSource {
- public:
- DummyVideoSource()
- : img_(NULL), limit_(100), width_(80), height_(64),
- format_(AOM_IMG_FMT_I420) {
- ReallocImage();
- }
-
- virtual ~DummyVideoSource() { aom_img_free(img_); }
-
- virtual void Begin() {
- frame_ = 0;
- FillFrame();
- }
-
- virtual void Next() {
- ++frame_;
- FillFrame();
- }
-
- virtual aom_image_t *img() const { return (frame_ < limit_) ? img_ : NULL; }
-
- // Models a stream where Timebase = 1/FPS, so pts == frame.
- virtual aom_codec_pts_t pts() const { return frame_; }
-
- virtual unsigned long duration() const { return 1; }
-
- virtual aom_rational_t timebase() const {
- const aom_rational_t t = { 1, 30 };
- return t;
- }
-
- virtual unsigned int frame() const { return frame_; }
-
- virtual unsigned int limit() const { return limit_; }
-
- void set_limit(unsigned int limit) { limit_ = limit; }
-
- void SetSize(unsigned int width, unsigned int height) {
- if (width != width_ || height != height_) {
- width_ = width;
- height_ = height;
- ReallocImage();
- }
- }
-
- void SetImageFormat(aom_img_fmt_t format) {
- if (format_ != format) {
- format_ = format;
- ReallocImage();
- }
- }
-
- protected:
- virtual void FillFrame() {
- if (img_) memset(img_->img_data, 0, raw_sz_);
- }
-
- void ReallocImage() {
- aom_img_free(img_);
- img_ = aom_img_alloc(NULL, format_, width_, height_, 32);
- raw_sz_ = ((img_->w + 31) & ~31) * img_->h * img_->bps / 8;
- }
-
- aom_image_t *img_;
- size_t raw_sz_;
- unsigned int limit_;
- unsigned int frame_;
- unsigned int width_;
- unsigned int height_;
- aom_img_fmt_t format_;
-};
-
-class RandomVideoSource : public DummyVideoSource {
- public:
- RandomVideoSource(int seed = ACMRandom::DeterministicSeed())
- : rnd_(seed), seed_(seed) {}
-
- protected:
- // Reset the RNG to get a matching stream for the second pass
- virtual void Begin() {
- frame_ = 0;
- rnd_.Reset(seed_);
- FillFrame();
- }
-
- // 15 frames of noise, followed by 15 static frames. Reset to 0 rather
- // than holding previous frames to encourage keyframes to be thrown.
- virtual void FillFrame() {
- if (img_) {
- if (frame_ % 30 < 15)
- for (size_t i = 0; i < raw_sz_; ++i) img_->img_data[i] = rnd_.Rand8();
- else
- memset(img_->img_data, 0, raw_sz_);
- }
- }
-
- ACMRandom rnd_;
- int seed_;
-};
-
-// Abstract base class for test video sources, which provide a stream of
-// decompressed images to the decoder.
-class CompressedVideoSource {
- public:
- virtual ~CompressedVideoSource() {}
-
- virtual void Init() = 0;
-
- // Prepare the stream for reading, rewind/open as necessary.
- virtual void Begin() = 0;
-
- // Advance the cursor to the next frame
- virtual void Next() = 0;
-
- virtual const uint8_t *cxdata() const = 0;
-
- virtual size_t frame_size() const = 0;
-
- virtual unsigned int frame_number() const = 0;
-};
-
-} // namespace libaom_test
-
-#endif // AOM_TEST_VIDEO_SOURCE_H_
diff --git a/third_party/aom/test/visual_metrics.py b/third_party/aom/test/visual_metrics.py
deleted file mode 100755
index 9055feb33..000000000
--- a/third_party/aom/test/visual_metrics.py
+++ /dev/null
@@ -1,466 +0,0 @@
-#!/usr/bin/python
-#
-# Copyright (c) 2016, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and
-# the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-# was not distributed with this source code in the LICENSE file, you can
-# obtain it at www.aomedia.org/license/software. If the Alliance for Open
-# Media Patent License 1.0 was not distributed with this source code in the
-# PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-#
-
-"""Converts video encoding result data from text files to visualization
-data source."""
-
-__author__ = "jzern@google.com (James Zern),"
-__author__ += "jimbankoski@google.com (Jim Bankoski)"
-
-import fnmatch
-import numpy as np
-import scipy as sp
-import scipy.interpolate
-import os
-import re
-import string
-import sys
-import math
-import warnings
-
-import gviz_api
-
-from os.path import basename
-from os.path import splitext
-
-warnings.simplefilter('ignore', np.RankWarning)
-warnings.simplefilter('ignore', RuntimeWarning)
-
-def bdsnr2(metric_set1, metric_set2):
- """
- BJONTEGAARD Bjontegaard metric calculation adapted
- Bjontegaard's snr metric allows to compute the average % saving in decibels
- between two rate-distortion curves [1]. This is an adaptation of that
- method that fixes inconsistencies when the curve fit operation goes awry
- by replacing the curve fit function with a Piecewise Cubic Hermite
- Interpolating Polynomial and then integrating that by evaluating that
- function at small intervals using the trapezoid method to calculate
- the integral.
-
- metric_set1 - list of tuples ( bitrate, metric ) for first graph
- metric_set2 - list of tuples ( bitrate, metric ) for second graph
- """
-
- if not metric_set1 or not metric_set2:
- return 0.0
-
- try:
-
- # pchip_interlopate requires keys sorted by x axis. x-axis will
- # be our metric not the bitrate so sort by metric.
- metric_set1.sort()
- metric_set2.sort()
-
- # Pull the log of the rate and clamped psnr from metric_sets.
- log_rate1 = [math.log(x[0]) for x in metric_set1]
- metric1 = [100.0 if x[1] == float('inf') else x[1] for x in metric_set1]
- log_rate2 = [math.log(x[0]) for x in metric_set2]
- metric2 = [100.0 if x[1] == float('inf') else x[1] for x in metric_set2]
-
- # Integration interval. This metric only works on the area that's
- # overlapping. Extrapolation of these things is sketchy so we avoid.
- min_int = max([min(log_rate1), min(log_rate2)])
- max_int = min([max(log_rate1), max(log_rate2)])
-
- # No overlap means no sensible metric possible.
- if max_int <= min_int:
- return 0.0
-
- # Use Piecewise Cubic Hermite Interpolating Polynomial interpolation to
- # create 100 new samples points separated by interval.
- lin = np.linspace(min_int, max_int, num=100, retstep=True)
- interval = lin[1]
- samples = lin[0]
- v1 = scipy.interpolate.pchip_interpolate(log_rate1, metric1, samples)
- v2 = scipy.interpolate.pchip_interpolate(log_rate2, metric2, samples)
-
- # Calculate the integral using the trapezoid method on the samples.
- int_v1 = np.trapz(v1, dx=interval)
- int_v2 = np.trapz(v2, dx=interval)
-
- # Calculate the average improvement.
- avg_exp_diff = (int_v2 - int_v1) / (max_int - min_int)
-
- except (TypeError, ZeroDivisionError, ValueError, np.RankWarning) as e:
- return 0
-
- return avg_exp_diff
-
-def bdrate2(metric_set1, metric_set2):
- """
- BJONTEGAARD Bjontegaard metric calculation adapted
- Bjontegaard's metric allows to compute the average % saving in bitrate
- between two rate-distortion curves [1]. This is an adaptation of that
- method that fixes inconsistencies when the curve fit operation goes awry
- by replacing the curve fit function with a Piecewise Cubic Hermite
- Interpolating Polynomial and then integrating that by evaluating that
- function at small intervals using the trapezoid method to calculate
- the integral.
-
- metric_set1 - list of tuples ( bitrate, metric ) for first graph
- metric_set2 - list of tuples ( bitrate, metric ) for second graph
- """
-
- if not metric_set1 or not metric_set2:
- return 0.0
-
- try:
-
- # pchip_interlopate requires keys sorted by x axis. x-axis will
- # be our metric not the bitrate so sort by metric.
- metric_set1.sort(key=lambda tup: tup[1])
- metric_set2.sort(key=lambda tup: tup[1])
-
- # Pull the log of the rate and clamped psnr from metric_sets.
- log_rate1 = [math.log(x[0]) for x in metric_set1]
- metric1 = [100.0 if x[1] == float('inf') else x[1] for x in metric_set1]
- log_rate2 = [math.log(x[0]) for x in metric_set2]
- metric2 = [100.0 if x[1] == float('inf') else x[1] for x in metric_set2]
-
- # Integration interval. This metric only works on the area that's
- # overlapping. Extrapolation of these things is sketchy so we avoid.
- min_int = max([min(metric1), min(metric2)])
- max_int = min([max(metric1), max(metric2)])
-
- # No overlap means no sensible metric possible.
- if max_int <= min_int:
- return 0.0
-
- # Use Piecewise Cubic Hermite Interpolating Polynomial interpolation to
- # create 100 new samples points separated by interval.
- lin = np.linspace(min_int, max_int, num=100, retstep=True)
- interval = lin[1]
- samples = lin[0]
- v1 = scipy.interpolate.pchip_interpolate(metric1, log_rate1, samples)
- v2 = scipy.interpolate.pchip_interpolate(metric2, log_rate2, samples)
-
- # Calculate the integral using the trapezoid method on the samples.
- int_v1 = np.trapz(v1, dx=interval)
- int_v2 = np.trapz(v2, dx=interval)
-
- # Calculate the average improvement.
- avg_exp_diff = (int_v2 - int_v1) / (max_int - min_int)
-
- except (TypeError, ZeroDivisionError, ValueError, np.RankWarning) as e:
- return 0
-
- # Convert to a percentage.
- avg_diff = (math.exp(avg_exp_diff) - 1) * 100
-
- return avg_diff
-
-
-
-def FillForm(string_for_substitution, dictionary_of_vars):
- """
- This function substitutes all matches of the command string //%% ... %%//
- with the variable represented by ... .
- """
- return_string = string_for_substitution
- for i in re.findall("//%%(.*)%%//", string_for_substitution):
- return_string = re.sub("//%%" + i + "%%//", dictionary_of_vars[i],
- return_string)
- return return_string
-
-
-def HasMetrics(line):
- """
- The metrics files produced by aomenc are started with a B for headers.
- """
- # If the first char of the first word on the line is a digit
- if len(line) == 0:
- return False
- if len(line.split()) == 0:
- return False
- if line.split()[0][0:1].isdigit():
- return True
- return False
-
-def GetMetrics(file_name):
- metric_file = open(file_name, "r")
- return metric_file.readline().split();
-
-def ParseMetricFile(file_name, metric_column):
- metric_set1 = set([])
- metric_file = open(file_name, "r")
- for line in metric_file:
- metrics = string.split(line)
- if HasMetrics(line):
- if metric_column < len(metrics):
- try:
- tuple = float(metrics[0]), float(metrics[metric_column])
- except:
- tuple = float(metrics[0]), 0
- else:
- tuple = float(metrics[0]), 0
- metric_set1.add(tuple)
- metric_set1_sorted = sorted(metric_set1)
- return metric_set1_sorted
-
-
-def FileBetter(file_name_1, file_name_2, metric_column, method):
- """
- Compares two data files and determines which is better and by how
- much. Also produces a histogram of how much better, by PSNR.
- metric_column is the metric.
- """
- # Store and parse our two files into lists of unique tuples.
-
- # Read the two files, parsing out lines starting with bitrate.
- metric_set1_sorted = ParseMetricFile(file_name_1, metric_column)
- metric_set2_sorted = ParseMetricFile(file_name_2, metric_column)
-
-
- def GraphBetter(metric_set1_sorted, metric_set2_sorted, base_is_set_2):
- """
- Search through the sorted metric file for metrics on either side of
- the metric from file 1. Since both lists are sorted we really
- should not have to search through the entire range, but these
- are small files."""
- total_bitrate_difference_ratio = 0.0
- count = 0
- for bitrate, metric in metric_set1_sorted:
- if bitrate == 0:
- continue
- for i in range(len(metric_set2_sorted) - 1):
- s2_bitrate_0, s2_metric_0 = metric_set2_sorted[i]
- s2_bitrate_1, s2_metric_1 = metric_set2_sorted[i + 1]
- # We have a point on either side of our metric range.
- if metric > s2_metric_0 and metric <= s2_metric_1:
-
- # Calculate a slope.
- if s2_metric_1 - s2_metric_0 != 0:
- metric_slope = ((s2_bitrate_1 - s2_bitrate_0) /
- (s2_metric_1 - s2_metric_0))
- else:
- metric_slope = 0
-
- estimated_s2_bitrate = (s2_bitrate_0 + (metric - s2_metric_0) *
- metric_slope)
-
- if estimated_s2_bitrate == 0:
- continue
- # Calculate percentage difference as given by base.
- if base_is_set_2 == 0:
- bitrate_difference_ratio = ((bitrate - estimated_s2_bitrate) /
- bitrate)
- else:
- bitrate_difference_ratio = ((bitrate - estimated_s2_bitrate) /
- estimated_s2_bitrate)
-
- total_bitrate_difference_ratio += bitrate_difference_ratio
- count += 1
- break
-
- # Calculate the average improvement between graphs.
- if count != 0:
- avg = total_bitrate_difference_ratio / count
-
- else:
- avg = 0.0
-
- return avg
-
- # Be fair to both graphs by testing all the points in each.
- if method == 'avg':
- avg_improvement = 50 * (
- GraphBetter(metric_set1_sorted, metric_set2_sorted, 1) -
- GraphBetter(metric_set2_sorted, metric_set1_sorted, 0))
- elif method == 'dsnr':
- avg_improvement = bdsnr2(metric_set1_sorted, metric_set2_sorted)
- else:
- avg_improvement = bdrate2(metric_set2_sorted, metric_set1_sorted)
-
- return avg_improvement
-
-
-def HandleFiles(variables):
- """
- This script creates html for displaying metric data produced from data
- in a video stats file, as created by the AOM project when enable_psnr
- is turned on:
-
- Usage: visual_metrics.py template.html pattern base_dir sub_dir [ sub_dir2 ..]
-
- The script parses each metrics file [see below] that matches the
- statfile_pattern in the baseline directory and looks for the file that
- matches that same file in each of the sub_dirs, and compares the resultant
- metrics bitrate, avg psnr, glb psnr, and ssim. "
-
- It provides a table in which each row is a file in the line directory,
- and a column for each subdir, with the cells representing how that clip
- compares to baseline for that subdir. A graph is given for each which
- compares filesize to that metric. If you click on a point in the graph it
- zooms in on that point.
-
- a SAMPLE metrics file:
-
- Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us)
- 25.911 38.242 38.104 38.258 38.121 75.790 14103
- Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us)
- 49.982 41.264 41.129 41.255 41.122 83.993 19817
- Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us)
- 74.967 42.911 42.767 42.899 42.756 87.928 17332
- Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us)
- 100.012 43.983 43.838 43.881 43.738 89.695 25389
- Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us)
- 149.980 45.338 45.203 45.184 45.043 91.591 25438
- Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us)
- 199.852 46.225 46.123 46.113 45.999 92.679 28302
- Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us)
- 249.922 46.864 46.773 46.777 46.673 93.334 27244
- Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us)
- 299.998 47.366 47.281 47.317 47.220 93.844 27137
- Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us)
- 349.769 47.746 47.677 47.722 47.648 94.178 32226
- Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us)
- 399.773 48.032 47.971 48.013 47.946 94.362 36203
-
- sample use:
- visual_metrics.py template.html "*stt" aom aom_b aom_c > metrics.html
- """
-
- # The template file is the html file into which we will write the
- # data from the stats file, formatted correctly for the gviz_api.
- template_file = open(variables[1], "r")
- page_template = template_file.read()
- template_file.close()
-
- # This is the path match pattern for finding stats files amongst
- # all the other files it could be. eg: *.stt
- file_pattern = variables[2]
-
- # This is the directory with files that we will use to do the comparison
- # against.
- baseline_dir = variables[3]
- snrs = ''
- filestable = {}
-
- filestable['dsnr'] = ''
- filestable['drate'] = ''
- filestable['avg'] = ''
-
- # Dirs is directories after the baseline to compare to the base.
- dirs = variables[4:len(variables)]
-
- # Find the metric files in the baseline directory.
- dir_list = sorted(fnmatch.filter(os.listdir(baseline_dir), file_pattern))
-
- metrics = GetMetrics(baseline_dir + "/" + dir_list[0])
-
- metrics_js = 'metrics = ["' + '", "'.join(metrics) + '"];'
-
- for column in range(1, len(metrics)):
-
- for metric in ['avg','dsnr','drate']:
- description = {"file": ("string", "File")}
-
- # Go through each directory and add a column header to our description.
- countoverall = {}
- sumoverall = {}
-
- for directory in dirs:
- description[directory] = ("number", directory)
- countoverall[directory] = 0
- sumoverall[directory] = 0
-
- # Data holds the data for the visualization, name given comes from
- # gviz_api sample code.
- data = []
- for filename in dir_list:
- row = {'file': splitext(basename(filename))[0] }
- baseline_file_name = baseline_dir + "/" + filename
-
- # Read the metric file from each of the directories in our list.
- for directory in dirs:
- metric_file_name = directory + "/" + filename
-
- # If there is a metric file in the current directory, open it
- # and calculate its overall difference between it and the baseline
- # directory's metric file.
- if os.path.isfile(metric_file_name):
- overall = FileBetter(baseline_file_name, metric_file_name,
- column, metric)
- row[directory] = overall
-
- sumoverall[directory] += overall
- countoverall[directory] += 1
-
- data.append(row)
-
- # Add the overall numbers.
- row = {"file": "OVERALL" }
- for directory in dirs:
- row[directory] = sumoverall[directory] / countoverall[directory]
- data.append(row)
-
- # write the tables out
- data_table = gviz_api.DataTable(description)
- data_table.LoadData(data)
-
- filestable[metric] = ( filestable[metric] + "filestable_" + metric +
- "[" + str(column) + "]=" +
- data_table.ToJSon(columns_order=["file"]+dirs) + "\n" )
-
- filestable_avg = filestable['avg']
- filestable_dpsnr = filestable['dsnr']
- filestable_drate = filestable['drate']
-
- # Now we collect all the data for all the graphs. First the column
- # headers which will be Datarate and then each directory.
- columns = ("datarate",baseline_dir)
- description = {"datarate":("number", "Datarate")}
- for directory in dirs:
- description[directory] = ("number", directory)
-
- description[baseline_dir] = ("number", baseline_dir)
-
- snrs = snrs + "snrs[" + str(column) + "] = ["
-
- # Now collect the data for the graphs, file by file.
- for filename in dir_list:
-
- data = []
-
- # Collect the file in each directory and store all of its metrics
- # in the associated gviz metrics table.
- all_dirs = dirs + [baseline_dir]
- for directory in all_dirs:
-
- metric_file_name = directory + "/" + filename
- if not os.path.isfile(metric_file_name):
- continue
-
- # Read and parse the metrics file storing it to the data we'll
- # use for the gviz_api.Datatable.
- metrics = ParseMetricFile(metric_file_name, column)
- for bitrate, metric in metrics:
- data.append({"datarate": bitrate, directory: metric})
-
- data_table = gviz_api.DataTable(description)
- data_table.LoadData(data)
- snrs = snrs + "'" + data_table.ToJSon(
- columns_order=tuple(["datarate",baseline_dir]+dirs)) + "',"
-
- snrs = snrs + "]\n"
-
- formatters = ""
- for i in range(len(dirs)):
- formatters = "%s formatter.format(better, %d);" % (formatters, i+1)
-
- print FillForm(page_template, vars())
- return
-
-if len(sys.argv) < 3:
- print HandleFiles.__doc__
-else:
- HandleFiles(sys.argv)
diff --git a/third_party/aom/test/warp_filter_test.cc b/third_party/aom/test/warp_filter_test.cc
deleted file mode 100644
index 19a4e8b6a..000000000
--- a/third_party/aom/test/warp_filter_test.cc
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-#include "test/warp_filter_test_util.h"
-using ::testing::make_tuple;
-using ::testing::tuple;
-using libaom_test::ACMRandom;
-using libaom_test::AV1HighbdWarpFilter::AV1HighbdWarpFilterTest;
-using libaom_test::AV1WarpFilter::AV1WarpFilterTest;
-
-namespace {
-
-TEST_P(AV1WarpFilterTest, CheckOutput) {
- RunCheckOutput(::testing::get<3>(GET_PARAM(0)));
-}
-TEST_P(AV1WarpFilterTest, DISABLED_Speed) {
- RunSpeedTest(::testing::get<3>(GET_PARAM(0)));
-}
-
-INSTANTIATE_TEST_CASE_P(
- C, AV1WarpFilterTest,
- libaom_test::AV1WarpFilter::BuildParams(av1_warp_affine_c));
-
-#if HAVE_SSE4_1
-INSTANTIATE_TEST_CASE_P(
- SSE4_1, AV1WarpFilterTest,
- libaom_test::AV1WarpFilter::BuildParams(av1_warp_affine_sse4_1));
-
-TEST_P(AV1HighbdWarpFilterTest, CheckOutput) {
- RunCheckOutput(::testing::get<4>(GET_PARAM(0)));
-}
-TEST_P(AV1HighbdWarpFilterTest, DISABLED_Speed) {
- RunSpeedTest(::testing::get<4>(GET_PARAM(0)));
-}
-
-INSTANTIATE_TEST_CASE_P(SSE4_1, AV1HighbdWarpFilterTest,
- libaom_test::AV1HighbdWarpFilter::BuildParams(
- av1_highbd_warp_affine_sse4_1));
-
-#endif // HAVE_SSE4_1
-
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(
- NEON, AV1WarpFilterTest,
- libaom_test::AV1WarpFilter::BuildParams(av1_warp_affine_neon));
-#endif // HAVE_NEON
-
-} // namespace
diff --git a/third_party/aom/test/warp_filter_test_util.cc b/third_party/aom/test/warp_filter_test_util.cc
deleted file mode 100644
index 69b2ed4af..000000000
--- a/third_party/aom/test/warp_filter_test_util.cc
+++ /dev/null
@@ -1,480 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#include "aom_ports/aom_timer.h"
-#include "test/warp_filter_test_util.h"
-
-using ::testing::make_tuple;
-using ::testing::tuple;
-
-namespace libaom_test {
-
-int32_t random_warped_param(libaom_test::ACMRandom *rnd, int bits) {
- // 1 in 8 chance of generating zero (arbitrarily chosen)
- if (((rnd->Rand8()) & 7) == 0) return 0;
- // Otherwise, enerate uniform values in the range
- // [-(1 << bits), 1] U [1, 1<<bits]
- int32_t v = 1 + (rnd->Rand16() & ((1 << bits) - 1));
- if ((rnd->Rand8()) & 1) return -v;
- return v;
-}
-
-void generate_warped_model(libaom_test::ACMRandom *rnd, int32_t *mat,
- int16_t *alpha, int16_t *beta, int16_t *gamma,
- int16_t *delta, const int is_alpha_zero,
- const int is_beta_zero, const int is_gamma_zero,
- const int is_delta_zero) {
- while (1) {
- int rnd8 = rnd->Rand8() & 3;
- mat[0] = random_warped_param(rnd, WARPEDMODEL_PREC_BITS + 6);
- mat[1] = random_warped_param(rnd, WARPEDMODEL_PREC_BITS + 6);
- mat[2] = (random_warped_param(rnd, WARPEDMODEL_PREC_BITS - 3)) +
- (1 << WARPEDMODEL_PREC_BITS);
- mat[3] = random_warped_param(rnd, WARPEDMODEL_PREC_BITS - 3);
-
- if (rnd8 <= 1) {
- // AFFINE
- mat[4] = random_warped_param(rnd, WARPEDMODEL_PREC_BITS - 3);
- mat[5] = (random_warped_param(rnd, WARPEDMODEL_PREC_BITS - 3)) +
- (1 << WARPEDMODEL_PREC_BITS);
- } else if (rnd8 == 2) {
- mat[4] = -mat[3];
- mat[5] = mat[2];
- } else {
- mat[4] = random_warped_param(rnd, WARPEDMODEL_PREC_BITS - 3);
- mat[5] = (random_warped_param(rnd, WARPEDMODEL_PREC_BITS - 3)) +
- (1 << WARPEDMODEL_PREC_BITS);
- if (is_alpha_zero == 1) mat[2] = 1 << WARPEDMODEL_PREC_BITS;
- if (is_beta_zero == 1) mat[3] = 0;
- if (is_gamma_zero == 1) mat[4] = 0;
- if (is_delta_zero == 1)
- mat[5] = (((int64_t)mat[3] * mat[4] + (mat[2] / 2)) / mat[2]) +
- (1 << WARPEDMODEL_PREC_BITS);
- }
-
- // Calculate the derived parameters and check that they are suitable
- // for the warp filter.
- assert(mat[2] != 0);
-
- *alpha = clamp(mat[2] - (1 << WARPEDMODEL_PREC_BITS), INT16_MIN, INT16_MAX);
- *beta = clamp(mat[3], INT16_MIN, INT16_MAX);
- *gamma = clamp(((int64_t)mat[4] * (1 << WARPEDMODEL_PREC_BITS)) / mat[2],
- INT16_MIN, INT16_MAX);
- *delta =
- clamp(mat[5] - (((int64_t)mat[3] * mat[4] + (mat[2] / 2)) / mat[2]) -
- (1 << WARPEDMODEL_PREC_BITS),
- INT16_MIN, INT16_MAX);
-
- if ((4 * abs(*alpha) + 7 * abs(*beta) >= (1 << WARPEDMODEL_PREC_BITS)) ||
- (4 * abs(*gamma) + 4 * abs(*delta) >= (1 << WARPEDMODEL_PREC_BITS)))
- continue;
-
- *alpha = ROUND_POWER_OF_TWO_SIGNED(*alpha, WARP_PARAM_REDUCE_BITS) *
- (1 << WARP_PARAM_REDUCE_BITS);
- *beta = ROUND_POWER_OF_TWO_SIGNED(*beta, WARP_PARAM_REDUCE_BITS) *
- (1 << WARP_PARAM_REDUCE_BITS);
- *gamma = ROUND_POWER_OF_TWO_SIGNED(*gamma, WARP_PARAM_REDUCE_BITS) *
- (1 << WARP_PARAM_REDUCE_BITS);
- *delta = ROUND_POWER_OF_TWO_SIGNED(*delta, WARP_PARAM_REDUCE_BITS) *
- (1 << WARP_PARAM_REDUCE_BITS);
-
- // We have a valid model, so finish
- return;
- }
-}
-
-namespace AV1WarpFilter {
-::testing::internal::ParamGenerator<WarpTestParams> BuildParams(
- warp_affine_func filter) {
- WarpTestParam params[] = {
- make_tuple(4, 4, 50000, filter), make_tuple(8, 8, 50000, filter),
- make_tuple(64, 64, 1000, filter), make_tuple(4, 16, 20000, filter),
- make_tuple(32, 8, 10000, filter),
- };
- return ::testing::Combine(::testing::ValuesIn(params),
- ::testing::Values(0, 1), ::testing::Values(0, 1),
- ::testing::Values(0, 1), ::testing::Values(0, 1));
-}
-
-AV1WarpFilterTest::~AV1WarpFilterTest() {}
-void AV1WarpFilterTest::SetUp() { rnd_.Reset(ACMRandom::DeterministicSeed()); }
-
-void AV1WarpFilterTest::TearDown() { libaom_test::ClearSystemState(); }
-
-void AV1WarpFilterTest::RunSpeedTest(warp_affine_func test_impl) {
- const int w = 128, h = 128;
- const int border = 16;
- const int stride = w + 2 * border;
- WarpTestParam params = GET_PARAM(0);
- const int out_w = ::testing::get<0>(params),
- out_h = ::testing::get<1>(params);
- const int is_alpha_zero = GET_PARAM(1);
- const int is_beta_zero = GET_PARAM(2);
- const int is_gamma_zero = GET_PARAM(3);
- const int is_delta_zero = GET_PARAM(4);
- int sub_x, sub_y;
- const int bd = 8;
-
- uint8_t *input_ = new uint8_t[h * stride];
- uint8_t *input = input_ + border;
-
- // The warp functions always write rows with widths that are multiples of 8.
- // So to avoid a buffer overflow, we may need to pad rows to a multiple of 8.
- int output_n = ((out_w + 7) & ~7) * out_h;
- uint8_t *output = new uint8_t[output_n];
- int32_t mat[8];
- int16_t alpha, beta, gamma, delta;
- ConvolveParams conv_params = get_conv_params(0, 0, bd);
- CONV_BUF_TYPE *dsta = new CONV_BUF_TYPE[output_n];
- generate_warped_model(&rnd_, mat, &alpha, &beta, &gamma, &delta,
- is_alpha_zero, is_beta_zero, is_gamma_zero,
- is_delta_zero);
-
- for (int r = 0; r < h; ++r)
- for (int c = 0; c < w; ++c) input[r * stride + c] = rnd_.Rand8();
- for (int r = 0; r < h; ++r) {
- memset(input + r * stride - border, input[r * stride], border);
- memset(input + r * stride + w, input[r * stride + (w - 1)], border);
- }
-
- sub_x = 0;
- sub_y = 0;
- int do_average = 0;
-
- conv_params = get_conv_params_no_round(do_average, 0, dsta, out_w, 1, bd);
- conv_params.use_jnt_comp_avg = 0;
-
- const int num_loops = 1000000000 / (out_w + out_h);
- aom_usec_timer timer;
- aom_usec_timer_start(&timer);
- for (int i = 0; i < num_loops; ++i)
- test_impl(mat, input, w, h, stride, output, 32, 32, out_w, out_h, out_w,
- sub_x, sub_y, &conv_params, alpha, beta, gamma, delta);
-
- aom_usec_timer_mark(&timer);
- const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
- printf("warp %3dx%-3d: %7.2f ns\n", out_w, out_h,
- 1000.0 * elapsed_time / num_loops);
-
- delete[] input_;
- delete[] output;
- delete[] dsta;
-}
-
-void AV1WarpFilterTest::RunCheckOutput(warp_affine_func test_impl) {
- const int w = 128, h = 128;
- const int border = 16;
- const int stride = w + 2 * border;
- WarpTestParam params = GET_PARAM(0);
- const int is_alpha_zero = GET_PARAM(1);
- const int is_beta_zero = GET_PARAM(2);
- const int is_gamma_zero = GET_PARAM(3);
- const int is_delta_zero = GET_PARAM(4);
- const int out_w = ::testing::get<0>(params),
- out_h = ::testing::get<1>(params);
- const int num_iters = ::testing::get<2>(params);
- int i, j, sub_x, sub_y;
- const int bd = 8;
-
- // The warp functions always write rows with widths that are multiples of 8.
- // So to avoid a buffer overflow, we may need to pad rows to a multiple of 8.
- int output_n = ((out_w + 7) & ~7) * out_h;
- uint8_t *input_ = new uint8_t[h * stride];
- uint8_t *input = input_ + border;
- uint8_t *output = new uint8_t[output_n];
- uint8_t *output2 = new uint8_t[output_n];
- int32_t mat[8];
- int16_t alpha, beta, gamma, delta;
- ConvolveParams conv_params = get_conv_params(0, 0, bd);
- CONV_BUF_TYPE *dsta = new CONV_BUF_TYPE[output_n];
- CONV_BUF_TYPE *dstb = new CONV_BUF_TYPE[output_n];
- for (int i = 0; i < output_n; ++i) output[i] = output2[i] = rnd_.Rand8();
-
- for (i = 0; i < num_iters; ++i) {
- // Generate an input block and extend its borders horizontally
- for (int r = 0; r < h; ++r)
- for (int c = 0; c < w; ++c) input[r * stride + c] = rnd_.Rand8();
- for (int r = 0; r < h; ++r) {
- memset(input + r * stride - border, input[r * stride], border);
- memset(input + r * stride + w, input[r * stride + (w - 1)], border);
- }
- const int use_no_round = rnd_.Rand8() & 1;
- for (sub_x = 0; sub_x < 2; ++sub_x)
- for (sub_y = 0; sub_y < 2; ++sub_y) {
- generate_warped_model(&rnd_, mat, &alpha, &beta, &gamma, &delta,
- is_alpha_zero, is_beta_zero, is_gamma_zero,
- is_delta_zero);
-
- for (int ii = 0; ii < 2; ++ii) {
- for (int jj = 0; jj < 5; ++jj) {
- for (int do_average = 0; do_average <= 1; ++do_average) {
- if (use_no_round) {
- conv_params =
- get_conv_params_no_round(do_average, 0, dsta, out_w, 1, bd);
- } else {
- conv_params = get_conv_params(0, 0, bd);
- }
- if (jj >= 4) {
- conv_params.use_jnt_comp_avg = 0;
- } else {
- conv_params.use_jnt_comp_avg = 1;
- conv_params.fwd_offset = quant_dist_lookup_table[ii][jj][0];
- conv_params.bck_offset = quant_dist_lookup_table[ii][jj][1];
- }
- av1_warp_affine_c(mat, input, w, h, stride, output, 32, 32, out_w,
- out_h, out_w, sub_x, sub_y, &conv_params, alpha,
- beta, gamma, delta);
- if (use_no_round) {
- conv_params =
- get_conv_params_no_round(do_average, 0, dstb, out_w, 1, bd);
- }
- if (jj >= 4) {
- conv_params.use_jnt_comp_avg = 0;
- } else {
- conv_params.use_jnt_comp_avg = 1;
- conv_params.fwd_offset = quant_dist_lookup_table[ii][jj][0];
- conv_params.bck_offset = quant_dist_lookup_table[ii][jj][1];
- }
- test_impl(mat, input, w, h, stride, output2, 32, 32, out_w, out_h,
- out_w, sub_x, sub_y, &conv_params, alpha, beta, gamma,
- delta);
- if (use_no_round) {
- for (j = 0; j < out_w * out_h; ++j)
- ASSERT_EQ(dsta[j], dstb[j])
- << "Pixel mismatch at index " << j << " = ("
- << (j % out_w) << ", " << (j / out_w) << ") on iteration "
- << i;
- for (j = 0; j < out_w * out_h; ++j)
- ASSERT_EQ(output[j], output2[j])
- << "Pixel mismatch at index " << j << " = ("
- << (j % out_w) << ", " << (j / out_w) << ") on iteration "
- << i;
- } else {
- for (j = 0; j < out_w * out_h; ++j)
- ASSERT_EQ(output[j], output2[j])
- << "Pixel mismatch at index " << j << " = ("
- << (j % out_w) << ", " << (j / out_w) << ") on iteration "
- << i;
- }
- }
- }
- }
- }
- }
- delete[] input_;
- delete[] output;
- delete[] output2;
- delete[] dsta;
- delete[] dstb;
-}
-} // namespace AV1WarpFilter
-
-namespace AV1HighbdWarpFilter {
-::testing::internal::ParamGenerator<HighbdWarpTestParams> BuildParams(
- highbd_warp_affine_func filter) {
- const HighbdWarpTestParam params[] = {
- make_tuple(4, 4, 100, 8, filter), make_tuple(8, 8, 100, 8, filter),
- make_tuple(64, 64, 100, 8, filter), make_tuple(4, 16, 100, 8, filter),
- make_tuple(32, 8, 100, 8, filter), make_tuple(4, 4, 100, 10, filter),
- make_tuple(8, 8, 100, 10, filter), make_tuple(64, 64, 100, 10, filter),
- make_tuple(4, 16, 100, 10, filter), make_tuple(32, 8, 100, 10, filter),
- make_tuple(4, 4, 100, 12, filter), make_tuple(8, 8, 100, 12, filter),
- make_tuple(64, 64, 100, 12, filter), make_tuple(4, 16, 100, 12, filter),
- make_tuple(32, 8, 100, 12, filter),
- };
- return ::testing::Combine(::testing::ValuesIn(params),
- ::testing::Values(0, 1), ::testing::Values(0, 1),
- ::testing::Values(0, 1), ::testing::Values(0, 1));
-}
-
-AV1HighbdWarpFilterTest::~AV1HighbdWarpFilterTest() {}
-void AV1HighbdWarpFilterTest::SetUp() {
- rnd_.Reset(ACMRandom::DeterministicSeed());
-}
-
-void AV1HighbdWarpFilterTest::TearDown() { libaom_test::ClearSystemState(); }
-
-void AV1HighbdWarpFilterTest::RunSpeedTest(highbd_warp_affine_func test_impl) {
- const int w = 128, h = 128;
- const int border = 16;
- const int stride = w + 2 * border;
- HighbdWarpTestParam param = GET_PARAM(0);
- const int is_alpha_zero = GET_PARAM(1);
- const int is_beta_zero = GET_PARAM(2);
- const int is_gamma_zero = GET_PARAM(3);
- const int is_delta_zero = GET_PARAM(4);
- const int out_w = ::testing::get<0>(param), out_h = ::testing::get<1>(param);
- const int bd = ::testing::get<3>(param);
- const int mask = (1 << bd) - 1;
- int sub_x, sub_y;
-
- // The warp functions always write rows with widths that are multiples of 8.
- // So to avoid a buffer overflow, we may need to pad rows to a multiple of 8.
- int output_n = ((out_w + 7) & ~7) * out_h;
- uint16_t *input_ = new uint16_t[h * stride];
- uint16_t *input = input_ + border;
- uint16_t *output = new uint16_t[output_n];
- int32_t mat[8];
- int16_t alpha, beta, gamma, delta;
- ConvolveParams conv_params = get_conv_params(0, 0, bd);
- CONV_BUF_TYPE *dsta = new CONV_BUF_TYPE[output_n];
-
- generate_warped_model(&rnd_, mat, &alpha, &beta, &gamma, &delta,
- is_alpha_zero, is_beta_zero, is_gamma_zero,
- is_delta_zero);
- // Generate an input block and extend its borders horizontally
- for (int r = 0; r < h; ++r)
- for (int c = 0; c < w; ++c) input[r * stride + c] = rnd_.Rand16() & mask;
- for (int r = 0; r < h; ++r) {
- for (int c = 0; c < border; ++c) {
- input[r * stride - border + c] = input[r * stride];
- input[r * stride + w + c] = input[r * stride + (w - 1)];
- }
- }
-
- sub_x = 0;
- sub_y = 0;
- int do_average = 0;
- conv_params.use_jnt_comp_avg = 0;
- conv_params = get_conv_params_no_round(do_average, 0, dsta, out_w, 1, bd);
-
- const int num_loops = 1000000000 / (out_w + out_h);
- aom_usec_timer timer;
- aom_usec_timer_start(&timer);
-
- for (int i = 0; i < num_loops; ++i)
- test_impl(mat, input, w, h, stride, output, 32, 32, out_w, out_h, out_w,
- sub_x, sub_y, bd, &conv_params, alpha, beta, gamma, delta);
-
- aom_usec_timer_mark(&timer);
- const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
- printf("highbd warp %3dx%-3d: %7.2f ns\n", out_w, out_h,
- 1000.0 * elapsed_time / num_loops);
-
- delete[] input_;
- delete[] output;
- delete[] dsta;
-}
-
-void AV1HighbdWarpFilterTest::RunCheckOutput(
- highbd_warp_affine_func test_impl) {
- const int w = 128, h = 128;
- const int border = 16;
- const int stride = w + 2 * border;
- HighbdWarpTestParam param = GET_PARAM(0);
- const int is_alpha_zero = GET_PARAM(1);
- const int is_beta_zero = GET_PARAM(2);
- const int is_gamma_zero = GET_PARAM(3);
- const int is_delta_zero = GET_PARAM(4);
- const int out_w = ::testing::get<0>(param), out_h = ::testing::get<1>(param);
- const int bd = ::testing::get<3>(param);
- const int num_iters = ::testing::get<2>(param);
- const int mask = (1 << bd) - 1;
- int i, j, sub_x, sub_y;
-
- // The warp functions always write rows with widths that are multiples of 8.
- // So to avoid a buffer overflow, we may need to pad rows to a multiple of 8.
- int output_n = ((out_w + 7) & ~7) * out_h;
- uint16_t *input_ = new uint16_t[h * stride];
- uint16_t *input = input_ + border;
- uint16_t *output = new uint16_t[output_n];
- uint16_t *output2 = new uint16_t[output_n];
- int32_t mat[8];
- int16_t alpha, beta, gamma, delta;
- ConvolveParams conv_params = get_conv_params(0, 0, bd);
- CONV_BUF_TYPE *dsta = new CONV_BUF_TYPE[output_n];
- CONV_BUF_TYPE *dstb = new CONV_BUF_TYPE[output_n];
- for (int i = 0; i < output_n; ++i) output[i] = output2[i] = rnd_.Rand16();
-
- for (i = 0; i < num_iters; ++i) {
- // Generate an input block and extend its borders horizontally
- for (int r = 0; r < h; ++r)
- for (int c = 0; c < w; ++c) input[r * stride + c] = rnd_.Rand16() & mask;
- for (int r = 0; r < h; ++r) {
- for (int c = 0; c < border; ++c) {
- input[r * stride - border + c] = input[r * stride];
- input[r * stride + w + c] = input[r * stride + (w - 1)];
- }
- }
- const int use_no_round = rnd_.Rand8() & 1;
- for (sub_x = 0; sub_x < 2; ++sub_x)
- for (sub_y = 0; sub_y < 2; ++sub_y) {
- generate_warped_model(&rnd_, mat, &alpha, &beta, &gamma, &delta,
- is_alpha_zero, is_beta_zero, is_gamma_zero,
- is_delta_zero);
- for (int ii = 0; ii < 2; ++ii) {
- for (int jj = 0; jj < 5; ++jj) {
- for (int do_average = 0; do_average <= 1; ++do_average) {
- if (use_no_round) {
- conv_params =
- get_conv_params_no_round(do_average, 0, dsta, out_w, 1, bd);
- } else {
- conv_params = get_conv_params(0, 0, bd);
- }
- if (jj >= 4) {
- conv_params.use_jnt_comp_avg = 0;
- } else {
- conv_params.use_jnt_comp_avg = 1;
- conv_params.fwd_offset = quant_dist_lookup_table[ii][jj][0];
- conv_params.bck_offset = quant_dist_lookup_table[ii][jj][1];
- }
-
- av1_highbd_warp_affine_c(mat, input, w, h, stride, output, 32, 32,
- out_w, out_h, out_w, sub_x, sub_y, bd,
- &conv_params, alpha, beta, gamma, delta);
- if (use_no_round) {
- // TODO(angiebird): Change this to test_impl once we have SIMD
- // implementation
- conv_params =
- get_conv_params_no_round(do_average, 0, dstb, out_w, 1, bd);
- }
- if (jj >= 4) {
- conv_params.use_jnt_comp_avg = 0;
- } else {
- conv_params.use_jnt_comp_avg = 1;
- conv_params.fwd_offset = quant_dist_lookup_table[ii][jj][0];
- conv_params.bck_offset = quant_dist_lookup_table[ii][jj][1];
- }
- test_impl(mat, input, w, h, stride, output2, 32, 32, out_w, out_h,
- out_w, sub_x, sub_y, bd, &conv_params, alpha, beta,
- gamma, delta);
-
- if (use_no_round) {
- for (j = 0; j < out_w * out_h; ++j)
- ASSERT_EQ(dsta[j], dstb[j])
- << "Pixel mismatch at index " << j << " = ("
- << (j % out_w) << ", " << (j / out_w) << ") on iteration "
- << i;
- for (j = 0; j < out_w * out_h; ++j)
- ASSERT_EQ(output[j], output2[j])
- << "Pixel mismatch at index " << j << " = ("
- << (j % out_w) << ", " << (j / out_w) << ") on iteration "
- << i;
- } else {
- for (j = 0; j < out_w * out_h; ++j)
- ASSERT_EQ(output[j], output2[j])
- << "Pixel mismatch at index " << j << " = ("
- << (j % out_w) << ", " << (j / out_w) << ") on iteration "
- << i;
- }
- }
- }
- }
- }
- }
-
- delete[] input_;
- delete[] output;
- delete[] output2;
- delete[] dsta;
- delete[] dstb;
-}
-} // namespace AV1HighbdWarpFilter
-} // namespace libaom_test
diff --git a/third_party/aom/test/warp_filter_test_util.h b/third_party/aom/test/warp_filter_test_util.h
deleted file mode 100644
index b8998e5c8..000000000
--- a/third_party/aom/test/warp_filter_test_util.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_TEST_WARP_FILTER_TEST_UTIL_H_
-#define AOM_TEST_WARP_FILTER_TEST_UTIL_H_
-
-#include "config/av1_rtcd.h"
-#include "config/aom_dsp_rtcd.h"
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-#include "test/acm_random.h"
-#include "test/util.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-
-#include "av1/common/mv.h"
-#include "av1/common/common_data.h"
-
-namespace libaom_test {
-
-void generate_warped_model(libaom_test::ACMRandom *rnd, int32_t *mat,
- int16_t *alpha, int16_t *beta, int16_t *gamma,
- int16_t *delta, int is_alpha_zero, int is_beta_zero,
- int is_gamma_zero, int is_delta_zero);
-
-namespace AV1WarpFilter {
-
-typedef void (*warp_affine_func)(const int32_t *mat, const uint8_t *ref,
- int width, int height, int stride,
- uint8_t *pred, int p_col, int p_row,
- int p_width, int p_height, int p_stride,
- int subsampling_x, int subsampling_y,
- ConvolveParams *conv_params, int16_t alpha,
- int16_t beta, int16_t gamma, int16_t delta);
-
-typedef ::testing::tuple<int, int, int, warp_affine_func> WarpTestParam;
-typedef ::testing::tuple<WarpTestParam, int, int, int, int> WarpTestParams;
-
-::testing::internal::ParamGenerator<WarpTestParams> BuildParams(
- warp_affine_func filter);
-
-class AV1WarpFilterTest : public ::testing::TestWithParam<WarpTestParams> {
- public:
- virtual ~AV1WarpFilterTest();
- virtual void SetUp();
-
- virtual void TearDown();
-
- protected:
- void RunCheckOutput(warp_affine_func test_impl);
- void RunSpeedTest(warp_affine_func test_impl);
-
- libaom_test::ACMRandom rnd_;
-};
-
-} // namespace AV1WarpFilter
-
-namespace AV1HighbdWarpFilter {
-typedef void (*highbd_warp_affine_func)(const int32_t *mat, const uint16_t *ref,
- int width, int height, int stride,
- uint16_t *pred, int p_col, int p_row,
- int p_width, int p_height, int p_stride,
- int subsampling_x, int subsampling_y,
- int bd, ConvolveParams *conv_params,
- int16_t alpha, int16_t beta,
- int16_t gamma, int16_t delta);
-
-typedef ::testing::tuple<int, int, int, int, highbd_warp_affine_func>
- HighbdWarpTestParam;
-typedef ::testing::tuple<HighbdWarpTestParam, int, int, int, int>
- HighbdWarpTestParams;
-
-::testing::internal::ParamGenerator<HighbdWarpTestParams> BuildParams(
- highbd_warp_affine_func filter);
-
-class AV1HighbdWarpFilterTest
- : public ::testing::TestWithParam<HighbdWarpTestParams> {
- public:
- virtual ~AV1HighbdWarpFilterTest();
- virtual void SetUp();
-
- virtual void TearDown();
-
- protected:
- void RunCheckOutput(highbd_warp_affine_func test_impl);
- void RunSpeedTest(highbd_warp_affine_func test_impl);
-
- libaom_test::ACMRandom rnd_;
-};
-
-} // namespace AV1HighbdWarpFilter
-
-} // namespace libaom_test
-
-#endif // AOM_TEST_WARP_FILTER_TEST_UTIL_H_
diff --git a/third_party/aom/test/webm_video_source.h b/third_party/aom/test/webm_video_source.h
deleted file mode 100644
index bb3d11735..000000000
--- a/third_party/aom/test/webm_video_source.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#ifndef AOM_TEST_WEBM_VIDEO_SOURCE_H_
-#define AOM_TEST_WEBM_VIDEO_SOURCE_H_
-#include <cstdarg>
-#include <cstdio>
-#include <cstdlib>
-#include <new>
-#include <string>
-#include "common/tools_common.h"
-#include "common/webmdec.h"
-#include "test/video_source.h"
-
-namespace libaom_test {
-
-// This class extends VideoSource to allow parsing of WebM files,
-// so that we can do actual file decodes.
-class WebMVideoSource : public CompressedVideoSource {
- public:
- explicit WebMVideoSource(const std::string &file_name)
- : file_name_(file_name), aom_ctx_(new AvxInputContext()),
- webm_ctx_(new WebmInputContext()), buf_(NULL), buf_sz_(0), frame_sz_(0),
- frame_number_(0), end_of_file_(false) {}
-
- virtual ~WebMVideoSource() {
- if (aom_ctx_->file != NULL) fclose(aom_ctx_->file);
- webm_free(webm_ctx_);
- delete aom_ctx_;
- delete webm_ctx_;
- }
-
- virtual void Init() {}
-
- virtual void Begin() {
- aom_ctx_->file = OpenTestDataFile(file_name_);
- ASSERT_TRUE(aom_ctx_->file != NULL)
- << "Input file open failed. Filename: " << file_name_;
-
- ASSERT_EQ(file_is_webm(webm_ctx_, aom_ctx_), 1) << "file is not WebM";
-
- FillFrame();
- }
-
- virtual void Next() {
- ++frame_number_;
- FillFrame();
- }
-
- void FillFrame() {
- ASSERT_TRUE(aom_ctx_->file != NULL);
- const int status = webm_read_frame(webm_ctx_, &buf_, &frame_sz_, &buf_sz_);
- ASSERT_GE(status, 0) << "webm_read_frame failed";
- if (status == 1) {
- end_of_file_ = true;
- }
- }
-
- void SeekToNextKeyFrame() {
- ASSERT_TRUE(aom_ctx_->file != NULL);
- do {
- const int status =
- webm_read_frame(webm_ctx_, &buf_, &frame_sz_, &buf_sz_);
- ASSERT_GE(status, 0) << "webm_read_frame failed";
- ++frame_number_;
- if (status == 1) {
- end_of_file_ = true;
- }
- } while (!webm_ctx_->is_key_frame && !end_of_file_);
- }
-
- virtual const uint8_t *cxdata() const { return end_of_file_ ? NULL : buf_; }
- virtual size_t frame_size() const { return frame_sz_; }
- virtual unsigned int frame_number() const { return frame_number_; }
-
- protected:
- std::string file_name_;
- AvxInputContext *aom_ctx_;
- WebmInputContext *webm_ctx_;
- uint8_t *buf_; // Owned by webm_ctx_ and freed when webm_ctx_ is freed.
- size_t buf_sz_;
- size_t frame_sz_;
- unsigned int frame_number_;
- bool end_of_file_;
-};
-
-} // namespace libaom_test
-
-#endif // AOM_TEST_WEBM_VIDEO_SOURCE_H_
diff --git a/third_party/aom/test/wiener_test.cc b/third_party/aom/test/wiener_test.cc
deleted file mode 100644
index dfec09119..000000000
--- a/third_party/aom/test/wiener_test.cc
+++ /dev/null
@@ -1,280 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <vector>
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "test/function_equivalence_test.h"
-#include "test/register_state_check.h"
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom/aom_integer.h"
-#include "av1/encoder/pickrst.h"
-
-#define MAX_WIENER_BLOCK 384
-#define MAX_DATA_BLOCK (MAX_WIENER_BLOCK + WIENER_WIN)
-using libaom_test::FunctionEquivalenceTest;
-
-namespace {
-
-static void compute_stats_win_opt_c(int wiener_win, const uint8_t *dgd,
- const uint8_t *src, int h_start, int h_end,
- int v_start, int v_end, int dgd_stride,
- int src_stride, double *M, double *H) {
- ASSERT_TRUE(wiener_win == WIENER_WIN || wiener_win == WIENER_WIN_CHROMA);
- int i, j, k, l, m, n;
- const int pixel_count = (h_end - h_start) * (v_end - v_start);
- const int wiener_win2 = wiener_win * wiener_win;
- const int wiener_halfwin = (wiener_win >> 1);
- const double avg =
- find_average(dgd, h_start, h_end, v_start, v_end, dgd_stride);
-
- std::vector<std::vector<int64_t> > M_int(wiener_win,
- std::vector<int64_t>(wiener_win, 0));
- std::vector<std::vector<int64_t> > H_int(
- wiener_win * wiener_win, std::vector<int64_t>(wiener_win * 8, 0));
- std::vector<std::vector<int32_t> > sumY(wiener_win,
- std::vector<int32_t>(wiener_win, 0));
- int32_t sumX = 0;
- const uint8_t *dgd_win = dgd - wiener_halfwin * dgd_stride - wiener_halfwin;
-
- for (i = v_start; i < v_end; i++) {
- for (j = h_start; j < h_end; j += 2) {
- const uint8_t X1 = src[i * src_stride + j];
- const uint8_t X2 = src[i * src_stride + j + 1];
- sumX += X1 + X2;
-
- const uint8_t *dgd_ij = dgd_win + i * dgd_stride + j;
- for (k = 0; k < wiener_win; k++) {
- for (l = 0; l < wiener_win; l++) {
- const uint8_t *dgd_ijkl = dgd_ij + k * dgd_stride + l;
- int64_t *H_int_temp = &H_int[(l * wiener_win + k)][0];
- const uint8_t D1 = dgd_ijkl[0];
- const uint8_t D2 = dgd_ijkl[1];
- sumY[k][l] += D1 + D2;
- M_int[l][k] += D1 * X1 + D2 * X2;
- for (m = 0; m < wiener_win; m++) {
- for (n = 0; n < wiener_win; n++) {
- H_int_temp[m * 8 + n] += D1 * dgd_ij[n + dgd_stride * m] +
- D2 * dgd_ij[n + dgd_stride * m + 1];
- }
- }
- }
- }
- }
- }
-
- const double avg_square_sum = avg * avg * pixel_count;
- for (k = 0; k < wiener_win; k++) {
- for (l = 0; l < wiener_win; l++) {
- M[l * wiener_win + k] =
- M_int[l][k] + avg_square_sum - avg * (sumX + sumY[k][l]);
- for (m = 0; m < wiener_win; m++) {
- for (n = 0; n < wiener_win; n++) {
- H[(l * wiener_win + k) * wiener_win2 + m * wiener_win + n] =
- H_int[(l * wiener_win + k)][n * 8 + m] + avg_square_sum -
- avg * (sumY[k][l] + sumY[n][m]);
- }
- }
- }
- }
-}
-
-void compute_stats_opt_c(int wiener_win, const uint8_t *dgd, const uint8_t *src,
- int h_start, int h_end, int v_start, int v_end,
- int dgd_stride, int src_stride, double *M, double *H) {
- if (wiener_win == WIENER_WIN || wiener_win == WIENER_WIN_CHROMA) {
- compute_stats_win_opt_c(wiener_win, dgd, src, h_start, h_end, v_start,
- v_end, dgd_stride, src_stride, M, H);
- } else {
- av1_compute_stats_c(wiener_win, dgd, src, h_start, h_end, v_start, v_end,
- dgd_stride, src_stride, M, H);
- }
-}
-
-static const int kIterations = 100;
-static const double min_error = (double)(0.01);
-typedef void (*compute_stats_Func)(int wiener_win, const uint8_t *dgd,
- const uint8_t *src, int h_start, int h_end,
- int v_start, int v_end, int dgd_stride,
- int src_stride, double *M, double *H);
-
-typedef libaom_test::FuncParam<compute_stats_Func> TestFuncs;
-
-////////////////////////////////////////////////////////////////////////////////
-// 8 bit
-////////////////////////////////////////////////////////////////////////////////
-
-typedef ::testing::tuple<const compute_stats_Func> WienerTestParam;
-
-class WienerTest : public ::testing::TestWithParam<WienerTestParam> {
- public:
- virtual void SetUp() { target_func_ = GET_PARAM(0); }
- void runWienerTest(const int32_t wiener_win, int32_t run_times);
- void runWienerTest_ExtremeValues(const int32_t wiener_win);
-
- private:
- compute_stats_Func target_func_;
- ACMRandom rng_;
-};
-
-void WienerTest::runWienerTest(const int32_t wiener_win, int32_t run_times) {
- const int32_t wiener_halfwin = wiener_win >> 1;
- const int32_t wiener_win2 = wiener_win * wiener_win;
- DECLARE_ALIGNED(32, uint8_t, dgd_buf[MAX_DATA_BLOCK * MAX_DATA_BLOCK]);
- DECLARE_ALIGNED(32, uint8_t, src_buf[MAX_DATA_BLOCK * MAX_DATA_BLOCK]);
- DECLARE_ALIGNED(32, double, M_ref[WIENER_WIN2]);
- DECLARE_ALIGNED(32, double, H_ref[WIENER_WIN2 * WIENER_WIN2]);
- DECLARE_ALIGNED(32, double, M_test[WIENER_WIN2]);
- DECLARE_ALIGNED(32, double, H_test[WIENER_WIN2 * WIENER_WIN2]);
- const int h_start = ((rng_.Rand16() % (MAX_WIENER_BLOCK / 2)) & (~7));
- int h_end =
- run_times != 1 ? 256 : ((rng_.Rand16() % MAX_WIENER_BLOCK) & (~7)) + 8;
- const int v_start = ((rng_.Rand16() % (MAX_WIENER_BLOCK / 2)) & (~7));
- int v_end =
- run_times != 1 ? 256 : ((rng_.Rand16() % MAX_WIENER_BLOCK) & (~7)) + 8;
- const int dgd_stride = h_end;
- const int src_stride = MAX_DATA_BLOCK;
- const int iters = run_times == 1 ? kIterations : 2;
- for (int iter = 0; iter < iters && !HasFatalFailure(); ++iter) {
- for (int i = 0; i < MAX_DATA_BLOCK * MAX_DATA_BLOCK; ++i) {
- dgd_buf[i] = rng_.Rand8();
- src_buf[i] = rng_.Rand8();
- }
- uint8_t *dgd = dgd_buf + wiener_halfwin * MAX_DATA_BLOCK + wiener_halfwin;
- uint8_t *src = src_buf;
-
- aom_usec_timer timer;
- aom_usec_timer_start(&timer);
- for (int i = 0; i < run_times; ++i) {
- av1_compute_stats_c(wiener_win, dgd, src, h_start, h_end, v_start, v_end,
- dgd_stride, src_stride, M_ref, H_ref);
- }
- aom_usec_timer_mark(&timer);
- const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
- aom_usec_timer_start(&timer);
- for (int i = 0; i < run_times; ++i) {
- target_func_(wiener_win, dgd, src, h_start, h_end, v_start, v_end,
- dgd_stride, src_stride, M_test, H_test);
- }
- aom_usec_timer_mark(&timer);
- const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
- if (run_times > 10) {
- printf("win %d %3dx%-3d:%7.2f/%7.2fns", wiener_win, h_end, v_end, time1,
- time2);
- printf("(%3.2f)\n", time1 / time2);
- }
- int failed = 0;
- for (int i = 0; i < wiener_win2; ++i) {
- if (fabs(M_ref[i] - M_test[i]) > min_error) {
- failed = 1;
- printf("win %d M iter %d [%4d] ref %6.0f test %6.0f \n", wiener_win,
- iter, i, M_ref[i], M_test[i]);
- break;
- }
- }
- // ASSERT_EQ(failed, 0);
- for (int i = 0; i < wiener_win2 * wiener_win2; ++i) {
- if (fabs(H_ref[i] - H_test[i]) > min_error) {
- failed = 1;
- printf("win %d H iter %d [%4d] ref %6.0f test %6.0f \n", wiener_win,
- iter, i, H_ref[i], H_test[i]);
- break;
- }
- }
- ASSERT_EQ(failed, 0);
- }
-}
-
-void WienerTest::runWienerTest_ExtremeValues(const int32_t wiener_win) {
- const int32_t wiener_halfwin = wiener_win >> 1;
- const int32_t wiener_win2 = wiener_win * wiener_win;
- DECLARE_ALIGNED(32, uint8_t, dgd_buf[MAX_DATA_BLOCK * MAX_DATA_BLOCK]);
- DECLARE_ALIGNED(32, uint8_t, src_buf[MAX_DATA_BLOCK * MAX_DATA_BLOCK]);
- DECLARE_ALIGNED(32, double, M_ref[WIENER_WIN2]);
- DECLARE_ALIGNED(32, double, H_ref[WIENER_WIN2 * WIENER_WIN2]);
- DECLARE_ALIGNED(32, double, M_test[WIENER_WIN2]);
- DECLARE_ALIGNED(32, double, H_test[WIENER_WIN2 * WIENER_WIN2]);
- const int h_start = 16;
- const int h_end = MAX_WIENER_BLOCK;
- const int v_start = 16;
- const int v_end = MAX_WIENER_BLOCK;
- const int dgd_stride = h_end;
- const int src_stride = MAX_DATA_BLOCK;
- const int iters = 1;
- for (int iter = 0; iter < iters && !HasFatalFailure(); ++iter) {
- for (int i = 0; i < MAX_DATA_BLOCK * MAX_DATA_BLOCK; ++i) {
- dgd_buf[i] = 255;
- src_buf[i] = 255;
- }
- uint8_t *dgd = dgd_buf + wiener_halfwin * MAX_DATA_BLOCK + wiener_halfwin;
- uint8_t *src = src_buf;
-
- av1_compute_stats_c(wiener_win, dgd, src, h_start, h_end, v_start, v_end,
- dgd_stride, src_stride, M_ref, H_ref);
-
- target_func_(wiener_win, dgd, src, h_start, h_end, v_start, v_end,
- dgd_stride, src_stride, M_test, H_test);
-
- int failed = 0;
- for (int i = 0; i < wiener_win2; ++i) {
- if (fabs(M_ref[i] - M_test[i]) > min_error) {
- failed = 1;
- printf("win %d M iter %d [%4d] ref %6.0f test %6.0f \n", wiener_win,
- iter, i, M_ref[i], M_test[i]);
- break;
- }
- }
- // ASSERT_EQ(failed, 0);
- for (int i = 0; i < wiener_win2 * wiener_win2; ++i) {
- if (fabs(H_ref[i] - H_test[i]) > min_error) {
- failed = 1;
- printf("win %d H iter %d [%4d] ref %6.0f test %6.0f \n", wiener_win,
- iter, i, H_ref[i], H_test[i]);
- break;
- }
- }
- ASSERT_EQ(failed, 0);
- }
-}
-
-TEST_P(WienerTest, RandomValues) {
- runWienerTest(WIENER_WIN, 1);
- runWienerTest(WIENER_WIN_CHROMA, 1);
-}
-
-TEST_P(WienerTest, ExtremeValues) {
- runWienerTest_ExtremeValues(WIENER_WIN);
- runWienerTest_ExtremeValues(WIENER_WIN_CHROMA);
-}
-
-TEST_P(WienerTest, DISABLED_Speed) {
- runWienerTest(WIENER_WIN, 200);
- runWienerTest(WIENER_WIN_CHROMA, 200);
-}
-
-INSTANTIATE_TEST_CASE_P(C, WienerTest, ::testing::Values(compute_stats_opt_c));
-
-#if HAVE_SSE4_1
-INSTANTIATE_TEST_CASE_P(SSE4_1, WienerTest,
- ::testing::Values(av1_compute_stats_sse4_1));
-#endif // HAVE_SSE4_1
-
-#if HAVE_AVX2
-
-INSTANTIATE_TEST_CASE_P(AVX2, WienerTest,
- ::testing::Values(av1_compute_stats_avx2));
-#endif // HAVE_AVX2
-
-} // namespace
diff --git a/third_party/aom/test/y4m_test.cc b/third_party/aom/test/y4m_test.cc
deleted file mode 100644
index 6cc75ef5b..000000000
--- a/third_party/aom/test/y4m_test.cc
+++ /dev/null
@@ -1,180 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <string>
-
-#include "config/aom_config.h"
-
-#include "common/y4menc.h"
-#include "test/md5_helper.h"
-#include "test/util.h"
-#include "test/y4m_video_source.h"
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-namespace {
-
-using std::string;
-
-static const unsigned int kWidth = 160;
-static const unsigned int kHeight = 90;
-static const unsigned int kFrames = 10;
-
-struct Y4mTestParam {
- const char *filename;
- unsigned int bit_depth;
- aom_img_fmt format;
- const char *md5raw;
-};
-
-const Y4mTestParam kY4mTestVectors[] = {
- { "park_joy_90p_8_420.y4m", 8, AOM_IMG_FMT_I420,
- "e5406275b9fc6bb3436c31d4a05c1cab" },
- { "park_joy_90p_8_420_monochrome.y4m", 8, AOM_IMG_FMT_I420,
- "95ef5bf6218580588be24a5271bb6a7f" },
- { "park_joy_90p_8_420_vertical_csp.y4m", 8, AOM_IMG_FMT_I420,
- "f53a40fec15254ac312527339d9c686b" },
- { "park_joy_90p_8_422.y4m", 8, AOM_IMG_FMT_I422,
- "284a47a47133b12884ec3a14e959a0b6" },
- { "park_joy_90p_8_444.y4m", 8, AOM_IMG_FMT_I444,
- "90517ff33843d85de712fd4fe60dbed0" },
- { "park_joy_90p_10_420.y4m", 10, AOM_IMG_FMT_I42016,
- "63f21f9f717d8b8631bd2288ee87137b" },
- { "park_joy_90p_10_422.y4m", 10, AOM_IMG_FMT_I42216,
- "48ab51fb540aed07f7ff5af130c9b605" },
- { "park_joy_90p_10_444.y4m", 10, AOM_IMG_FMT_I44416,
- "067bfd75aa85ff9bae91fa3e0edd1e3e" },
- { "park_joy_90p_12_420.y4m", 12, AOM_IMG_FMT_I42016,
- "9e6d8f6508c6e55625f6b697bc461cef" },
- { "park_joy_90p_12_422.y4m", 12, AOM_IMG_FMT_I42216,
- "b239c6b301c0b835485be349ca83a7e3" },
- { "park_joy_90p_12_444.y4m", 12, AOM_IMG_FMT_I44416,
- "5a6481a550821dab6d0192f5c63845e9" },
-};
-
-static const int PLANES_YUV[] = { AOM_PLANE_Y, AOM_PLANE_U, AOM_PLANE_V };
-
-class Y4mVideoSourceTest : public ::testing::TestWithParam<Y4mTestParam>,
- public ::libaom_test::Y4mVideoSource {
- protected:
- Y4mVideoSourceTest() : Y4mVideoSource("", 0, 0) {}
-
- virtual ~Y4mVideoSourceTest() { CloseSource(); }
-
- virtual void Init(const std::string &file_name, int limit) {
- file_name_ = file_name;
- start_ = 0;
- limit_ = limit;
- frame_ = 0;
- Begin();
- }
-
- // Checks y4m header information
- void HeaderChecks(unsigned int bit_depth, aom_img_fmt_t fmt) {
- ASSERT_TRUE(input_file_ != NULL);
- ASSERT_EQ(y4m_.pic_w, (int)kWidth);
- ASSERT_EQ(y4m_.pic_h, (int)kHeight);
- ASSERT_EQ(img()->d_w, kWidth);
- ASSERT_EQ(img()->d_h, kHeight);
- ASSERT_EQ(y4m_.bit_depth, bit_depth);
- ASSERT_EQ(y4m_.aom_fmt, fmt);
- if (fmt == AOM_IMG_FMT_I420 || fmt == AOM_IMG_FMT_I42016) {
- ASSERT_EQ(y4m_.bps, (int)y4m_.bit_depth * 3 / 2);
- ASSERT_EQ(img()->x_chroma_shift, 1U);
- ASSERT_EQ(img()->y_chroma_shift, 1U);
- }
- if (fmt == AOM_IMG_FMT_I422 || fmt == AOM_IMG_FMT_I42216) {
- ASSERT_EQ(y4m_.bps, (int)y4m_.bit_depth * 2);
- ASSERT_EQ(img()->x_chroma_shift, 1U);
- ASSERT_EQ(img()->y_chroma_shift, 0U);
- }
- if (fmt == AOM_IMG_FMT_I444 || fmt == AOM_IMG_FMT_I44416) {
- ASSERT_EQ(y4m_.bps, (int)y4m_.bit_depth * 3);
- ASSERT_EQ(img()->x_chroma_shift, 0U);
- ASSERT_EQ(img()->y_chroma_shift, 0U);
- }
- }
-
- // Checks MD5 of the raw frame data
- void Md5Check(const string &expected_md5) {
- ASSERT_TRUE(input_file_ != NULL);
- libaom_test::MD5 md5;
- for (unsigned int i = start_; i < limit_; i++) {
- md5.Add(img());
- Next();
- }
- ASSERT_EQ(string(md5.Get()), expected_md5);
- }
-};
-
-TEST_P(Y4mVideoSourceTest, SourceTest) {
- const Y4mTestParam t = GetParam();
- Init(t.filename, kFrames);
- HeaderChecks(t.bit_depth, t.format);
- Md5Check(t.md5raw);
-}
-
-INSTANTIATE_TEST_CASE_P(C, Y4mVideoSourceTest,
- ::testing::ValuesIn(kY4mTestVectors));
-
-class Y4mVideoWriteTest : public Y4mVideoSourceTest {
- protected:
- Y4mVideoWriteTest() : tmpfile_(NULL) {}
-
- virtual ~Y4mVideoWriteTest() {
- delete tmpfile_;
- input_file_ = NULL;
- }
-
- void ReplaceInputFile(FILE *input_file) {
- CloseSource();
- frame_ = 0;
- input_file_ = input_file;
- rewind(input_file_);
- ReadSourceToStart();
- }
-
- // Writes out a y4m file and then reads it back
- void WriteY4mAndReadBack() {
- ASSERT_TRUE(input_file_ != NULL);
- char buf[Y4M_BUFFER_SIZE] = { 0 };
- const struct AvxRational framerate = { y4m_.fps_n, y4m_.fps_d };
- tmpfile_ = new libaom_test::TempOutFile;
- ASSERT_TRUE(tmpfile_->file() != NULL);
- y4m_write_file_header(buf, sizeof(buf), kWidth, kHeight, &framerate,
- img()->monochrome, img()->csp, y4m_.aom_fmt,
- y4m_.bit_depth);
- fputs(buf, tmpfile_->file());
- for (unsigned int i = start_; i < limit_; i++) {
- y4m_write_frame_header(buf, sizeof(buf));
- fputs(buf, tmpfile_->file());
- y4m_write_image_file(img(), PLANES_YUV, tmpfile_->file());
- Next();
- }
- ReplaceInputFile(tmpfile_->file());
- }
-
- virtual void Init(const std::string &file_name, int limit) {
- Y4mVideoSourceTest::Init(file_name, limit);
- WriteY4mAndReadBack();
- }
- libaom_test::TempOutFile *tmpfile_;
-};
-
-TEST_P(Y4mVideoWriteTest, WriteTest) {
- const Y4mTestParam t = GetParam();
- Init(t.filename, kFrames);
- HeaderChecks(t.bit_depth, t.format);
- Md5Check(t.md5raw);
-}
-
-INSTANTIATE_TEST_CASE_P(C, Y4mVideoWriteTest,
- ::testing::ValuesIn(kY4mTestVectors));
-} // namespace
diff --git a/third_party/aom/test/y4m_video_source.h b/third_party/aom/test/y4m_video_source.h
deleted file mode 100644
index 3dea901e6..000000000
--- a/third_party/aom/test/y4m_video_source.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#ifndef AOM_TEST_Y4M_VIDEO_SOURCE_H_
-#define AOM_TEST_Y4M_VIDEO_SOURCE_H_
-#include <algorithm>
-#include <string>
-
-#include "common/y4minput.h"
-#include "test/video_source.h"
-
-namespace libaom_test {
-
-// This class extends VideoSource to allow parsing of raw yv12
-// so that we can do actual file encodes.
-class Y4mVideoSource : public VideoSource {
- public:
- Y4mVideoSource(const std::string &file_name, unsigned int start, int limit)
- : file_name_(file_name), input_file_(NULL), img_(new aom_image_t()),
- start_(start), limit_(limit), frame_(0), framerate_numerator_(0),
- framerate_denominator_(0), y4m_() {}
-
- virtual ~Y4mVideoSource() {
- aom_img_free(img_.get());
- CloseSource();
- }
-
- virtual void OpenSource() {
- CloseSource();
- input_file_ = OpenTestDataFile(file_name_);
- ASSERT_TRUE(input_file_ != NULL)
- << "Input file open failed. Filename: " << file_name_;
- }
-
- virtual void ReadSourceToStart() {
- ASSERT_TRUE(input_file_ != NULL);
- ASSERT_FALSE(y4m_input_open(&y4m_, input_file_, NULL, 0, 0));
- framerate_numerator_ = y4m_.fps_n;
- framerate_denominator_ = y4m_.fps_d;
- frame_ = 0;
- for (unsigned int i = 0; i < start_; i++) {
- Next();
- }
- FillFrame();
- }
-
- virtual void Begin() {
- OpenSource();
- ReadSourceToStart();
- }
-
- virtual void Next() {
- ++frame_;
- FillFrame();
- }
-
- virtual aom_image_t *img() const {
- return (frame_ < limit_) ? img_.get() : NULL;
- }
-
- // Models a stream where Timebase = 1/FPS, so pts == frame.
- virtual aom_codec_pts_t pts() const { return frame_; }
-
- virtual unsigned long duration() const { return 1; }
-
- virtual aom_rational_t timebase() const {
- const aom_rational_t t = { framerate_denominator_, framerate_numerator_ };
- return t;
- }
-
- virtual unsigned int frame() const { return frame_; }
-
- virtual unsigned int limit() const { return limit_; }
-
- virtual void FillFrame() {
- ASSERT_TRUE(input_file_ != NULL);
- // Read a frame from input_file.
- y4m_input_fetch_frame(&y4m_, input_file_, img_.get());
- }
-
- // Swap buffers with another y4m source. This allows reading a new frame
- // while keeping the old frame around. A whole Y4mSource is required and
- // not just a aom_image_t because of how the y4m reader manipulates
- // aom_image_t internals,
- void SwapBuffers(Y4mVideoSource *other) {
- std::swap(other->y4m_.dst_buf, y4m_.dst_buf);
- aom_image_t *tmp;
- tmp = other->img_.release();
- other->img_.reset(img_.release());
- img_.reset(tmp);
- }
-
- protected:
- void CloseSource() {
- y4m_input_close(&y4m_);
- y4m_ = y4m_input();
- if (input_file_ != NULL) {
- fclose(input_file_);
- input_file_ = NULL;
- }
- }
-
- std::string file_name_;
- FILE *input_file_;
- testing::internal::scoped_ptr<aom_image_t> img_;
- unsigned int start_;
- unsigned int limit_;
- unsigned int frame_;
- int framerate_numerator_;
- int framerate_denominator_;
- y4m_input y4m_;
-};
-
-} // namespace libaom_test
-
-#endif // AOM_TEST_Y4M_VIDEO_SOURCE_H_
diff --git a/third_party/aom/test/yuv_video_source.h b/third_party/aom/test/yuv_video_source.h
deleted file mode 100644
index 774ecc008..000000000
--- a/third_party/aom/test/yuv_video_source.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#ifndef AOM_TEST_YUV_VIDEO_SOURCE_H_
-#define AOM_TEST_YUV_VIDEO_SOURCE_H_
-
-#include <cstdio>
-#include <cstdlib>
-#include <string>
-
-#include "test/video_source.h"
-#include "aom/aom_image.h"
-
-namespace libaom_test {
-
-// This class extends VideoSource to allow parsing of raw YUV
-// formats of various color sampling and bit-depths so that we can
-// do actual file encodes.
-class YUVVideoSource : public VideoSource {
- public:
- YUVVideoSource(const std::string &file_name, aom_img_fmt format,
- unsigned int width, unsigned int height, int rate_numerator,
- int rate_denominator, unsigned int start, int limit)
- : file_name_(file_name), input_file_(NULL), img_(NULL), start_(start),
- limit_(limit), frame_(0), width_(0), height_(0),
- format_(AOM_IMG_FMT_NONE), framerate_numerator_(rate_numerator),
- framerate_denominator_(rate_denominator) {
- // This initializes format_, raw_size_, width_, height_ and allocates img.
- SetSize(width, height, format);
- }
-
- virtual ~YUVVideoSource() {
- aom_img_free(img_);
- if (input_file_) fclose(input_file_);
- }
-
- virtual void Begin() {
- if (input_file_) fclose(input_file_);
- input_file_ = OpenTestDataFile(file_name_);
- ASSERT_TRUE(input_file_ != NULL)
- << "Input file open failed. Filename: " << file_name_;
- if (start_)
- fseek(input_file_, static_cast<unsigned>(raw_size_) * start_, SEEK_SET);
-
- frame_ = start_;
- FillFrame();
- }
-
- virtual void Next() {
- ++frame_;
- FillFrame();
- }
-
- virtual aom_image_t *img() const { return (frame_ < limit_) ? img_ : NULL; }
-
- // Models a stream where Timebase = 1/FPS, so pts == frame.
- virtual aom_codec_pts_t pts() const { return frame_; }
-
- virtual unsigned long duration() const { return 1; }
-
- virtual aom_rational_t timebase() const {
- const aom_rational_t t = { framerate_denominator_, framerate_numerator_ };
- return t;
- }
-
- virtual unsigned int frame() const { return frame_; }
-
- virtual unsigned int limit() const { return limit_; }
-
- virtual void SetSize(unsigned int width, unsigned int height,
- aom_img_fmt format) {
- if (width != width_ || height != height_ || format != format_) {
- aom_img_free(img_);
- img_ = aom_img_alloc(NULL, format, width, height, 1);
- ASSERT_TRUE(img_ != NULL);
- width_ = width;
- height_ = height;
- format_ = format;
- switch (format) {
- case AOM_IMG_FMT_I420: raw_size_ = width * height * 3 / 2; break;
- case AOM_IMG_FMT_I422: raw_size_ = width * height * 2; break;
- case AOM_IMG_FMT_I444: raw_size_ = width * height * 3; break;
- case AOM_IMG_FMT_I42016: raw_size_ = width * height * 3; break;
- case AOM_IMG_FMT_I42216: raw_size_ = width * height * 4; break;
- case AOM_IMG_FMT_I44416: raw_size_ = width * height * 6; break;
- default: ASSERT_TRUE(0);
- }
- }
- }
-
- virtual void FillFrame() {
- ASSERT_TRUE(input_file_ != NULL);
- // Read a frame from input_file.
- if (fread(img_->img_data, raw_size_, 1, input_file_) == 0) {
- limit_ = frame_;
- }
- }
-
- protected:
- std::string file_name_;
- FILE *input_file_;
- aom_image_t *img_;
- size_t raw_size_;
- unsigned int start_;
- unsigned int limit_;
- unsigned int frame_;
- unsigned int width_;
- unsigned int height_;
- aom_img_fmt format_;
- int framerate_numerator_;
- int framerate_denominator_;
-};
-
-} // namespace libaom_test
-
-#endif // AOM_TEST_YUV_VIDEO_SOURCE_H_
diff --git a/third_party/aom/third_party/fastfeat/LICENSE b/third_party/aom/third_party/fastfeat/LICENSE
deleted file mode 100644
index f347008d6..000000000
--- a/third_party/aom/third_party/fastfeat/LICENSE
+++ /dev/null
@@ -1,30 +0,0 @@
-Copyright (c) 2006, 2008 Edward Rosten
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
-
-
- *Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
-
- *Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
-
- *Neither the name of the University of Cambridge nor the names of
- its contributors may be used to endorse or promote products derived
- from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/third_party/aom/third_party/fastfeat/README.libvpx b/third_party/aom/third_party/fastfeat/README.libvpx
deleted file mode 100644
index 1e58a303b..000000000
--- a/third_party/aom/third_party/fastfeat/README.libvpx
+++ /dev/null
@@ -1,39 +0,0 @@
-URL: https://github.com/edrosten/fast-C-src
-Version: 391d5e939eb1545d24c10533d7de424db8d9c191
-License: BSD
-License File: LICENSE
-
-Description:
-Library to compute FAST features with non-maximum suppression.
-
-The files are valid C and C++ code, and have no special requirements for
-compiling, and they do not depend on any libraries. Just compile them along with
-the rest of your project.
-
-To use the functions, #include "fast.h"
-
-The corner detectors have the following prototype (where X is 9, 10, 11 or 12):
-
-xy* fastX_detect_nonmax(const unsigned char * data, int xsize, int ysize, int stride, int threshold, int* numcorners)
-
-Where xy is the following simple struct typedef:
-
-typedef struct
-{
- int x, y;
-} xy;
-
-The image is passed in as a block of data and dimensions, and the list of
-corners is returned as an array of xy structs, and an integer (numcorners)
-with the number of corners returned. The data can be deallocated with free().
-Nonmaximal suppression is performed on the corners. Note that the stride
-is the number of bytes between rows. If your image has no padding, then this
-is the same as xsize.
-
-The detection, scoring and nonmaximal suppression are available as individual
-functions. To see how to use the individual functions, see fast.c
-
-Local Modifications:
-Add lines to turn off clang formatting for these files
-Remove Fast 10, 11 and 12
-Convert tabs to spaces
diff --git a/third_party/aom/third_party/fastfeat/fast.c b/third_party/aom/third_party/fastfeat/fast.c
deleted file mode 100644
index 0d7efc154..000000000
--- a/third_party/aom/third_party/fastfeat/fast.c
+++ /dev/null
@@ -1,22 +0,0 @@
-// clang-format off
-#include <stdlib.h>
-#include "fast.h"
-
-
-xy* fast9_detect_nonmax(const byte* im, int xsize, int ysize, int stride, int b, int* ret_num_corners)
-{
- xy* corners;
- int num_corners;
- int* scores;
- xy* nonmax;
-
- corners = fast9_detect(im, xsize, ysize, stride, b, &num_corners);
- scores = fast9_score(im, stride, corners, num_corners, b);
- nonmax = nonmax_suppression(corners, scores, num_corners, ret_num_corners);
-
- free(corners);
- free(scores);
-
- return nonmax;
-}
-// clang-format on
diff --git a/third_party/aom/third_party/fastfeat/fast.h b/third_party/aom/third_party/fastfeat/fast.h
deleted file mode 100644
index a00730e3d..000000000
--- a/third_party/aom/third_party/fastfeat/fast.h
+++ /dev/null
@@ -1,20 +0,0 @@
-// clang-format off
-#ifndef FAST_H
-#define FAST_H
-
-typedef struct { int x, y; } xy;
-typedef unsigned char byte;
-
-int fast9_corner_score(const byte* p, const int pixel[], int bstart);
-
-xy* fast9_detect(const byte* im, int xsize, int ysize, int stride, int b, int* ret_num_corners);
-
-int* fast9_score(const byte* i, int stride, xy* corners, int num_corners, int b);
-
-xy* fast9_detect_nonmax(const byte* im, int xsize, int ysize, int stride, int b, int* ret_num_corners);
-
-xy* nonmax_suppression(const xy* corners, const int* scores, int num_corners, int* ret_num_nonmax);
-
-
-#endif
-// clang-format on
diff --git a/third_party/aom/third_party/fastfeat/fast_9.c b/third_party/aom/third_party/fastfeat/fast_9.c
deleted file mode 100644
index ec167a953..000000000
--- a/third_party/aom/third_party/fastfeat/fast_9.c
+++ /dev/null
@@ -1,5911 +0,0 @@
-// clang-format off
-/*This is mechanically generated code*/
-#include <stdlib.h>
-
-typedef struct { int x, y; } xy;
-typedef unsigned char byte;
-
-int fast9_corner_score(const byte* p, const int pixel[], int bstart)
-{
- int bmin = bstart;
- int bmax = 255;
- int b = (bmax + bmin)/2;
-
- /*Compute the score using binary search*/
- for(;;)
- {
- int cb = *p + b;
- int c_b= *p - b;
-
-
- if( p[pixel[0]] > cb)
- if( p[pixel[1]] > cb)
- if( p[pixel[2]] > cb)
- if( p[pixel[3]] > cb)
- if( p[pixel[4]] > cb)
- if( p[pixel[5]] > cb)
- if( p[pixel[6]] > cb)
- if( p[pixel[7]] > cb)
- if( p[pixel[8]] > cb)
- goto is_a_corner;
- else
- if( p[pixel[15]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else if( p[pixel[7]] < c_b)
- if( p[pixel[14]] > cb)
- if( p[pixel[15]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else if( p[pixel[14]] < c_b)
- if( p[pixel[8]] < c_b)
- if( p[pixel[9]] < c_b)
- if( p[pixel[10]] < c_b)
- if( p[pixel[11]] < c_b)
- if( p[pixel[12]] < c_b)
- if( p[pixel[13]] < c_b)
- if( p[pixel[15]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[14]] > cb)
- if( p[pixel[15]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else if( p[pixel[6]] < c_b)
- if( p[pixel[15]] > cb)
- if( p[pixel[13]] > cb)
- if( p[pixel[14]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else if( p[pixel[13]] < c_b)
- if( p[pixel[7]] < c_b)
- if( p[pixel[8]] < c_b)
- if( p[pixel[9]] < c_b)
- if( p[pixel[10]] < c_b)
- if( p[pixel[11]] < c_b)
- if( p[pixel[12]] < c_b)
- if( p[pixel[14]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[7]] < c_b)
- if( p[pixel[8]] < c_b)
- if( p[pixel[9]] < c_b)
- if( p[pixel[10]] < c_b)
- if( p[pixel[11]] < c_b)
- if( p[pixel[12]] < c_b)
- if( p[pixel[13]] < c_b)
- if( p[pixel[14]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[13]] > cb)
- if( p[pixel[14]] > cb)
- if( p[pixel[15]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else if( p[pixel[13]] < c_b)
- if( p[pixel[7]] < c_b)
- if( p[pixel[8]] < c_b)
- if( p[pixel[9]] < c_b)
- if( p[pixel[10]] < c_b)
- if( p[pixel[11]] < c_b)
- if( p[pixel[12]] < c_b)
- if( p[pixel[14]] < c_b)
- if( p[pixel[15]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else if( p[pixel[5]] < c_b)
- if( p[pixel[14]] > cb)
- if( p[pixel[12]] > cb)
- if( p[pixel[13]] > cb)
- if( p[pixel[15]] > cb)
- goto is_a_corner;
- else
- if( p[pixel[6]] > cb)
- if( p[pixel[7]] > cb)
- if( p[pixel[8]] > cb)
- if( p[pixel[9]] > cb)
- if( p[pixel[10]] > cb)
- if( p[pixel[11]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else if( p[pixel[12]] < c_b)
- if( p[pixel[6]] < c_b)
- if( p[pixel[7]] < c_b)
- if( p[pixel[8]] < c_b)
- if( p[pixel[9]] < c_b)
- if( p[pixel[10]] < c_b)
- if( p[pixel[11]] < c_b)
- if( p[pixel[13]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else if( p[pixel[14]] < c_b)
- if( p[pixel[7]] < c_b)
- if( p[pixel[8]] < c_b)
- if( p[pixel[9]] < c_b)
- if( p[pixel[10]] < c_b)
- if( p[pixel[11]] < c_b)
- if( p[pixel[12]] < c_b)
- if( p[pixel[13]] < c_b)
- if( p[pixel[6]] < c_b)
- goto is_a_corner;
- else
- if( p[pixel[15]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[6]] < c_b)
- if( p[pixel[7]] < c_b)
- if( p[pixel[8]] < c_b)
- if( p[pixel[9]] < c_b)
- if( p[pixel[10]] < c_b)
- if( p[pixel[11]] < c_b)
- if( p[pixel[12]] < c_b)
- if( p[pixel[13]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[12]] > cb)
- if( p[pixel[13]] > cb)
- if( p[pixel[14]] > cb)
- if( p[pixel[15]] > cb)
- goto is_a_corner;
- else
- if( p[pixel[6]] > cb)
- if( p[pixel[7]] > cb)
- if( p[pixel[8]] > cb)
- if( p[pixel[9]] > cb)
- if( p[pixel[10]] > cb)
- if( p[pixel[11]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else if( p[pixel[12]] < c_b)
- if( p[pixel[7]] < c_b)
- if( p[pixel[8]] < c_b)
- if( p[pixel[9]] < c_b)
- if( p[pixel[10]] < c_b)
- if( p[pixel[11]] < c_b)
- if( p[pixel[13]] < c_b)
- if( p[pixel[14]] < c_b)
- if( p[pixel[6]] < c_b)
- goto is_a_corner;
- else
- if( p[pixel[15]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else if( p[pixel[4]] < c_b)
- if( p[pixel[13]] > cb)
- if( p[pixel[11]] > cb)
- if( p[pixel[12]] > cb)
- if( p[pixel[14]] > cb)
- if( p[pixel[15]] > cb)
- goto is_a_corner;
- else
- if( p[pixel[6]] > cb)
- if( p[pixel[7]] > cb)
- if( p[pixel[8]] > cb)
- if( p[pixel[9]] > cb)
- if( p[pixel[10]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[5]] > cb)
- if( p[pixel[6]] > cb)
- if( p[pixel[7]] > cb)
- if( p[pixel[8]] > cb)
- if( p[pixel[9]] > cb)
- if( p[pixel[10]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else if( p[pixel[11]] < c_b)
- if( p[pixel[5]] < c_b)
- if( p[pixel[6]] < c_b)
- if( p[pixel[7]] < c_b)
- if( p[pixel[8]] < c_b)
- if( p[pixel[9]] < c_b)
- if( p[pixel[10]] < c_b)
- if( p[pixel[12]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else if( p[pixel[13]] < c_b)
- if( p[pixel[7]] < c_b)
- if( p[pixel[8]] < c_b)
- if( p[pixel[9]] < c_b)
- if( p[pixel[10]] < c_b)
- if( p[pixel[11]] < c_b)
- if( p[pixel[12]] < c_b)
- if( p[pixel[6]] < c_b)
- if( p[pixel[5]] < c_b)
- goto is_a_corner;
- else
- if( p[pixel[14]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[14]] < c_b)
- if( p[pixel[15]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[5]] < c_b)
- if( p[pixel[6]] < c_b)
- if( p[pixel[7]] < c_b)
- if( p[pixel[8]] < c_b)
- if( p[pixel[9]] < c_b)
- if( p[pixel[10]] < c_b)
- if( p[pixel[11]] < c_b)
- if( p[pixel[12]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[11]] > cb)
- if( p[pixel[12]] > cb)
- if( p[pixel[13]] > cb)
- if( p[pixel[14]] > cb)
- if( p[pixel[15]] > cb)
- goto is_a_corner;
- else
- if( p[pixel[6]] > cb)
- if( p[pixel[7]] > cb)
- if( p[pixel[8]] > cb)
- if( p[pixel[9]] > cb)
- if( p[pixel[10]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[5]] > cb)
- if( p[pixel[6]] > cb)
- if( p[pixel[7]] > cb)
- if( p[pixel[8]] > cb)
- if( p[pixel[9]] > cb)
- if( p[pixel[10]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else if( p[pixel[11]] < c_b)
- if( p[pixel[7]] < c_b)
- if( p[pixel[8]] < c_b)
- if( p[pixel[9]] < c_b)
- if( p[pixel[10]] < c_b)
- if( p[pixel[12]] < c_b)
- if( p[pixel[13]] < c_b)
- if( p[pixel[6]] < c_b)
- if( p[pixel[5]] < c_b)
- goto is_a_corner;
- else
- if( p[pixel[14]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[14]] < c_b)
- if( p[pixel[15]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else if( p[pixel[3]] < c_b)
- if( p[pixel[10]] > cb)
- if( p[pixel[11]] > cb)
- if( p[pixel[12]] > cb)
- if( p[pixel[13]] > cb)
- if( p[pixel[14]] > cb)
- if( p[pixel[15]] > cb)
- goto is_a_corner;
- else
- if( p[pixel[6]] > cb)
- if( p[pixel[7]] > cb)
- if( p[pixel[8]] > cb)
- if( p[pixel[9]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[5]] > cb)
- if( p[pixel[6]] > cb)
- if( p[pixel[7]] > cb)
- if( p[pixel[8]] > cb)
- if( p[pixel[9]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[4]] > cb)
- if( p[pixel[5]] > cb)
- if( p[pixel[6]] > cb)
- if( p[pixel[7]] > cb)
- if( p[pixel[8]] > cb)
- if( p[pixel[9]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else if( p[pixel[10]] < c_b)
- if( p[pixel[7]] < c_b)
- if( p[pixel[8]] < c_b)
- if( p[pixel[9]] < c_b)
- if( p[pixel[11]] < c_b)
- if( p[pixel[6]] < c_b)
- if( p[pixel[5]] < c_b)
- if( p[pixel[4]] < c_b)
- goto is_a_corner;
- else
- if( p[pixel[12]] < c_b)
- if( p[pixel[13]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[12]] < c_b)
- if( p[pixel[13]] < c_b)
- if( p[pixel[14]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[12]] < c_b)
- if( p[pixel[13]] < c_b)
- if( p[pixel[14]] < c_b)
- if( p[pixel[15]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[10]] > cb)
- if( p[pixel[11]] > cb)
- if( p[pixel[12]] > cb)
- if( p[pixel[13]] > cb)
- if( p[pixel[14]] > cb)
- if( p[pixel[15]] > cb)
- goto is_a_corner;
- else
- if( p[pixel[6]] > cb)
- if( p[pixel[7]] > cb)
- if( p[pixel[8]] > cb)
- if( p[pixel[9]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[5]] > cb)
- if( p[pixel[6]] > cb)
- if( p[pixel[7]] > cb)
- if( p[pixel[8]] > cb)
- if( p[pixel[9]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[4]] > cb)
- if( p[pixel[5]] > cb)
- if( p[pixel[6]] > cb)
- if( p[pixel[7]] > cb)
- if( p[pixel[8]] > cb)
- if( p[pixel[9]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else if( p[pixel[10]] < c_b)
- if( p[pixel[7]] < c_b)
- if( p[pixel[8]] < c_b)
- if( p[pixel[9]] < c_b)
- if( p[pixel[11]] < c_b)
- if( p[pixel[12]] < c_b)
- if( p[pixel[6]] < c_b)
- if( p[pixel[5]] < c_b)
- if( p[pixel[4]] < c_b)
- goto is_a_corner;
- else
- if( p[pixel[13]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[13]] < c_b)
- if( p[pixel[14]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[13]] < c_b)
- if( p[pixel[14]] < c_b)
- if( p[pixel[15]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else if( p[pixel[2]] < c_b)
- if( p[pixel[9]] > cb)
- if( p[pixel[10]] > cb)
- if( p[pixel[11]] > cb)
- if( p[pixel[12]] > cb)
- if( p[pixel[13]] > cb)
- if( p[pixel[14]] > cb)
- if( p[pixel[15]] > cb)
- goto is_a_corner;
- else
- if( p[pixel[6]] > cb)
- if( p[pixel[7]] > cb)
- if( p[pixel[8]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[5]] > cb)
- if( p[pixel[6]] > cb)
- if( p[pixel[7]] > cb)
- if( p[pixel[8]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[4]] > cb)
- if( p[pixel[5]] > cb)
- if( p[pixel[6]] > cb)
- if( p[pixel[7]] > cb)
- if( p[pixel[8]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[3]] > cb)
- if( p[pixel[4]] > cb)
- if( p[pixel[5]] > cb)
- if( p[pixel[6]] > cb)
- if( p[pixel[7]] > cb)
- if( p[pixel[8]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else if( p[pixel[9]] < c_b)
- if( p[pixel[7]] < c_b)
- if( p[pixel[8]] < c_b)
- if( p[pixel[10]] < c_b)
- if( p[pixel[6]] < c_b)
- if( p[pixel[5]] < c_b)
- if( p[pixel[4]] < c_b)
- if( p[pixel[3]] < c_b)
- goto is_a_corner;
- else
- if( p[pixel[11]] < c_b)
- if( p[pixel[12]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[11]] < c_b)
- if( p[pixel[12]] < c_b)
- if( p[pixel[13]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[11]] < c_b)
- if( p[pixel[12]] < c_b)
- if( p[pixel[13]] < c_b)
- if( p[pixel[14]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[11]] < c_b)
- if( p[pixel[12]] < c_b)
- if( p[pixel[13]] < c_b)
- if( p[pixel[14]] < c_b)
- if( p[pixel[15]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[9]] > cb)
- if( p[pixel[10]] > cb)
- if( p[pixel[11]] > cb)
- if( p[pixel[12]] > cb)
- if( p[pixel[13]] > cb)
- if( p[pixel[14]] > cb)
- if( p[pixel[15]] > cb)
- goto is_a_corner;
- else
- if( p[pixel[6]] > cb)
- if( p[pixel[7]] > cb)
- if( p[pixel[8]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[5]] > cb)
- if( p[pixel[6]] > cb)
- if( p[pixel[7]] > cb)
- if( p[pixel[8]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[4]] > cb)
- if( p[pixel[5]] > cb)
- if( p[pixel[6]] > cb)
- if( p[pixel[7]] > cb)
- if( p[pixel[8]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[3]] > cb)
- if( p[pixel[4]] > cb)
- if( p[pixel[5]] > cb)
- if( p[pixel[6]] > cb)
- if( p[pixel[7]] > cb)
- if( p[pixel[8]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else if( p[pixel[9]] < c_b)
- if( p[pixel[7]] < c_b)
- if( p[pixel[8]] < c_b)
- if( p[pixel[10]] < c_b)
- if( p[pixel[11]] < c_b)
- if( p[pixel[6]] < c_b)
- if( p[pixel[5]] < c_b)
- if( p[pixel[4]] < c_b)
- if( p[pixel[3]] < c_b)
- goto is_a_corner;
- else
- if( p[pixel[12]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[12]] < c_b)
- if( p[pixel[13]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[12]] < c_b)
- if( p[pixel[13]] < c_b)
- if( p[pixel[14]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[12]] < c_b)
- if( p[pixel[13]] < c_b)
- if( p[pixel[14]] < c_b)
- if( p[pixel[15]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else if( p[pixel[1]] < c_b)
- if( p[pixel[8]] > cb)
- if( p[pixel[9]] > cb)
- if( p[pixel[10]] > cb)
- if( p[pixel[11]] > cb)
- if( p[pixel[12]] > cb)
- if( p[pixel[13]] > cb)
- if( p[pixel[14]] > cb)
- if( p[pixel[15]] > cb)
- goto is_a_corner;
- else
- if( p[pixel[6]] > cb)
- if( p[pixel[7]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[5]] > cb)
- if( p[pixel[6]] > cb)
- if( p[pixel[7]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[4]] > cb)
- if( p[pixel[5]] > cb)
- if( p[pixel[6]] > cb)
- if( p[pixel[7]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[3]] > cb)
- if( p[pixel[4]] > cb)
- if( p[pixel[5]] > cb)
- if( p[pixel[6]] > cb)
- if( p[pixel[7]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[2]] > cb)
- if( p[pixel[3]] > cb)
- if( p[pixel[4]] > cb)
- if( p[pixel[5]] > cb)
- if( p[pixel[6]] > cb)
- if( p[pixel[7]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else if( p[pixel[8]] < c_b)
- if( p[pixel[7]] < c_b)
- if( p[pixel[9]] < c_b)
- if( p[pixel[6]] < c_b)
- if( p[pixel[5]] < c_b)
- if( p[pixel[4]] < c_b)
- if( p[pixel[3]] < c_b)
- if( p[pixel[2]] < c_b)
- goto is_a_corner;
- else
- if( p[pixel[10]] < c_b)
- if( p[pixel[11]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[10]] < c_b)
- if( p[pixel[11]] < c_b)
- if( p[pixel[12]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[10]] < c_b)
- if( p[pixel[11]] < c_b)
- if( p[pixel[12]] < c_b)
- if( p[pixel[13]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[10]] < c_b)
- if( p[pixel[11]] < c_b)
- if( p[pixel[12]] < c_b)
- if( p[pixel[13]] < c_b)
- if( p[pixel[14]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[10]] < c_b)
- if( p[pixel[11]] < c_b)
- if( p[pixel[12]] < c_b)
- if( p[pixel[13]] < c_b)
- if( p[pixel[14]] < c_b)
- if( p[pixel[15]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[8]] > cb)
- if( p[pixel[9]] > cb)
- if( p[pixel[10]] > cb)
- if( p[pixel[11]] > cb)
- if( p[pixel[12]] > cb)
- if( p[pixel[13]] > cb)
- if( p[pixel[14]] > cb)
- if( p[pixel[15]] > cb)
- goto is_a_corner;
- else
- if( p[pixel[6]] > cb)
- if( p[pixel[7]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[5]] > cb)
- if( p[pixel[6]] > cb)
- if( p[pixel[7]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[4]] > cb)
- if( p[pixel[5]] > cb)
- if( p[pixel[6]] > cb)
- if( p[pixel[7]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[3]] > cb)
- if( p[pixel[4]] > cb)
- if( p[pixel[5]] > cb)
- if( p[pixel[6]] > cb)
- if( p[pixel[7]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[2]] > cb)
- if( p[pixel[3]] > cb)
- if( p[pixel[4]] > cb)
- if( p[pixel[5]] > cb)
- if( p[pixel[6]] > cb)
- if( p[pixel[7]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else if( p[pixel[8]] < c_b)
- if( p[pixel[7]] < c_b)
- if( p[pixel[9]] < c_b)
- if( p[pixel[10]] < c_b)
- if( p[pixel[6]] < c_b)
- if( p[pixel[5]] < c_b)
- if( p[pixel[4]] < c_b)
- if( p[pixel[3]] < c_b)
- if( p[pixel[2]] < c_b)
- goto is_a_corner;
- else
- if( p[pixel[11]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[11]] < c_b)
- if( p[pixel[12]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[11]] < c_b)
- if( p[pixel[12]] < c_b)
- if( p[pixel[13]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[11]] < c_b)
- if( p[pixel[12]] < c_b)
- if( p[pixel[13]] < c_b)
- if( p[pixel[14]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[11]] < c_b)
- if( p[pixel[12]] < c_b)
- if( p[pixel[13]] < c_b)
- if( p[pixel[14]] < c_b)
- if( p[pixel[15]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else if( p[pixel[0]] < c_b)
- if( p[pixel[1]] > cb)
- if( p[pixel[8]] > cb)
- if( p[pixel[7]] > cb)
- if( p[pixel[9]] > cb)
- if( p[pixel[6]] > cb)
- if( p[pixel[5]] > cb)
- if( p[pixel[4]] > cb)
- if( p[pixel[3]] > cb)
- if( p[pixel[2]] > cb)
- goto is_a_corner;
- else
- if( p[pixel[10]] > cb)
- if( p[pixel[11]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[10]] > cb)
- if( p[pixel[11]] > cb)
- if( p[pixel[12]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[10]] > cb)
- if( p[pixel[11]] > cb)
- if( p[pixel[12]] > cb)
- if( p[pixel[13]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[10]] > cb)
- if( p[pixel[11]] > cb)
- if( p[pixel[12]] > cb)
- if( p[pixel[13]] > cb)
- if( p[pixel[14]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[10]] > cb)
- if( p[pixel[11]] > cb)
- if( p[pixel[12]] > cb)
- if( p[pixel[13]] > cb)
- if( p[pixel[14]] > cb)
- if( p[pixel[15]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else if( p[pixel[8]] < c_b)
- if( p[pixel[9]] < c_b)
- if( p[pixel[10]] < c_b)
- if( p[pixel[11]] < c_b)
- if( p[pixel[12]] < c_b)
- if( p[pixel[13]] < c_b)
- if( p[pixel[14]] < c_b)
- if( p[pixel[15]] < c_b)
- goto is_a_corner;
- else
- if( p[pixel[6]] < c_b)
- if( p[pixel[7]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[5]] < c_b)
- if( p[pixel[6]] < c_b)
- if( p[pixel[7]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[4]] < c_b)
- if( p[pixel[5]] < c_b)
- if( p[pixel[6]] < c_b)
- if( p[pixel[7]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[3]] < c_b)
- if( p[pixel[4]] < c_b)
- if( p[pixel[5]] < c_b)
- if( p[pixel[6]] < c_b)
- if( p[pixel[7]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[2]] < c_b)
- if( p[pixel[3]] < c_b)
- if( p[pixel[4]] < c_b)
- if( p[pixel[5]] < c_b)
- if( p[pixel[6]] < c_b)
- if( p[pixel[7]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else if( p[pixel[1]] < c_b)
- if( p[pixel[2]] > cb)
- if( p[pixel[9]] > cb)
- if( p[pixel[7]] > cb)
- if( p[pixel[8]] > cb)
- if( p[pixel[10]] > cb)
- if( p[pixel[6]] > cb)
- if( p[pixel[5]] > cb)
- if( p[pixel[4]] > cb)
- if( p[pixel[3]] > cb)
- goto is_a_corner;
- else
- if( p[pixel[11]] > cb)
- if( p[pixel[12]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[11]] > cb)
- if( p[pixel[12]] > cb)
- if( p[pixel[13]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[11]] > cb)
- if( p[pixel[12]] > cb)
- if( p[pixel[13]] > cb)
- if( p[pixel[14]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[11]] > cb)
- if( p[pixel[12]] > cb)
- if( p[pixel[13]] > cb)
- if( p[pixel[14]] > cb)
- if( p[pixel[15]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else if( p[pixel[9]] < c_b)
- if( p[pixel[10]] < c_b)
- if( p[pixel[11]] < c_b)
- if( p[pixel[12]] < c_b)
- if( p[pixel[13]] < c_b)
- if( p[pixel[14]] < c_b)
- if( p[pixel[15]] < c_b)
- goto is_a_corner;
- else
- if( p[pixel[6]] < c_b)
- if( p[pixel[7]] < c_b)
- if( p[pixel[8]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[5]] < c_b)
- if( p[pixel[6]] < c_b)
- if( p[pixel[7]] < c_b)
- if( p[pixel[8]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[4]] < c_b)
- if( p[pixel[5]] < c_b)
- if( p[pixel[6]] < c_b)
- if( p[pixel[7]] < c_b)
- if( p[pixel[8]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[3]] < c_b)
- if( p[pixel[4]] < c_b)
- if( p[pixel[5]] < c_b)
- if( p[pixel[6]] < c_b)
- if( p[pixel[7]] < c_b)
- if( p[pixel[8]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else if( p[pixel[2]] < c_b)
- if( p[pixel[3]] > cb)
- if( p[pixel[10]] > cb)
- if( p[pixel[7]] > cb)
- if( p[pixel[8]] > cb)
- if( p[pixel[9]] > cb)
- if( p[pixel[11]] > cb)
- if( p[pixel[6]] > cb)
- if( p[pixel[5]] > cb)
- if( p[pixel[4]] > cb)
- goto is_a_corner;
- else
- if( p[pixel[12]] > cb)
- if( p[pixel[13]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[12]] > cb)
- if( p[pixel[13]] > cb)
- if( p[pixel[14]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[12]] > cb)
- if( p[pixel[13]] > cb)
- if( p[pixel[14]] > cb)
- if( p[pixel[15]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else if( p[pixel[10]] < c_b)
- if( p[pixel[11]] < c_b)
- if( p[pixel[12]] < c_b)
- if( p[pixel[13]] < c_b)
- if( p[pixel[14]] < c_b)
- if( p[pixel[15]] < c_b)
- goto is_a_corner;
- else
- if( p[pixel[6]] < c_b)
- if( p[pixel[7]] < c_b)
- if( p[pixel[8]] < c_b)
- if( p[pixel[9]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[5]] < c_b)
- if( p[pixel[6]] < c_b)
- if( p[pixel[7]] < c_b)
- if( p[pixel[8]] < c_b)
- if( p[pixel[9]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[4]] < c_b)
- if( p[pixel[5]] < c_b)
- if( p[pixel[6]] < c_b)
- if( p[pixel[7]] < c_b)
- if( p[pixel[8]] < c_b)
- if( p[pixel[9]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else if( p[pixel[3]] < c_b)
- if( p[pixel[4]] > cb)
- if( p[pixel[13]] > cb)
- if( p[pixel[7]] > cb)
- if( p[pixel[8]] > cb)
- if( p[pixel[9]] > cb)
- if( p[pixel[10]] > cb)
- if( p[pixel[11]] > cb)
- if( p[pixel[12]] > cb)
- if( p[pixel[6]] > cb)
- if( p[pixel[5]] > cb)
- goto is_a_corner;
- else
- if( p[pixel[14]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[14]] > cb)
- if( p[pixel[15]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else if( p[pixel[13]] < c_b)
- if( p[pixel[11]] > cb)
- if( p[pixel[5]] > cb)
- if( p[pixel[6]] > cb)
- if( p[pixel[7]] > cb)
- if( p[pixel[8]] > cb)
- if( p[pixel[9]] > cb)
- if( p[pixel[10]] > cb)
- if( p[pixel[12]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else if( p[pixel[11]] < c_b)
- if( p[pixel[12]] < c_b)
- if( p[pixel[14]] < c_b)
- if( p[pixel[15]] < c_b)
- goto is_a_corner;
- else
- if( p[pixel[6]] < c_b)
- if( p[pixel[7]] < c_b)
- if( p[pixel[8]] < c_b)
- if( p[pixel[9]] < c_b)
- if( p[pixel[10]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[5]] < c_b)
- if( p[pixel[6]] < c_b)
- if( p[pixel[7]] < c_b)
- if( p[pixel[8]] < c_b)
- if( p[pixel[9]] < c_b)
- if( p[pixel[10]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[5]] > cb)
- if( p[pixel[6]] > cb)
- if( p[pixel[7]] > cb)
- if( p[pixel[8]] > cb)
- if( p[pixel[9]] > cb)
- if( p[pixel[10]] > cb)
- if( p[pixel[11]] > cb)
- if( p[pixel[12]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else if( p[pixel[4]] < c_b)
- if( p[pixel[5]] > cb)
- if( p[pixel[14]] > cb)
- if( p[pixel[7]] > cb)
- if( p[pixel[8]] > cb)
- if( p[pixel[9]] > cb)
- if( p[pixel[10]] > cb)
- if( p[pixel[11]] > cb)
- if( p[pixel[12]] > cb)
- if( p[pixel[13]] > cb)
- if( p[pixel[6]] > cb)
- goto is_a_corner;
- else
- if( p[pixel[15]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else if( p[pixel[14]] < c_b)
- if( p[pixel[12]] > cb)
- if( p[pixel[6]] > cb)
- if( p[pixel[7]] > cb)
- if( p[pixel[8]] > cb)
- if( p[pixel[9]] > cb)
- if( p[pixel[10]] > cb)
- if( p[pixel[11]] > cb)
- if( p[pixel[13]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else if( p[pixel[12]] < c_b)
- if( p[pixel[13]] < c_b)
- if( p[pixel[15]] < c_b)
- goto is_a_corner;
- else
- if( p[pixel[6]] < c_b)
- if( p[pixel[7]] < c_b)
- if( p[pixel[8]] < c_b)
- if( p[pixel[9]] < c_b)
- if( p[pixel[10]] < c_b)
- if( p[pixel[11]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[6]] > cb)
- if( p[pixel[7]] > cb)
- if( p[pixel[8]] > cb)
- if( p[pixel[9]] > cb)
- if( p[pixel[10]] > cb)
- if( p[pixel[11]] > cb)
- if( p[pixel[12]] > cb)
- if( p[pixel[13]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else if( p[pixel[5]] < c_b)
- if( p[pixel[6]] > cb)
- if( p[pixel[15]] < c_b)
- if( p[pixel[13]] > cb)
- if( p[pixel[7]] > cb)
- if( p[pixel[8]] > cb)
- if( p[pixel[9]] > cb)
- if( p[pixel[10]] > cb)
- if( p[pixel[11]] > cb)
- if( p[pixel[12]] > cb)
- if( p[pixel[14]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else if( p[pixel[13]] < c_b)
- if( p[pixel[14]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[7]] > cb)
- if( p[pixel[8]] > cb)
- if( p[pixel[9]] > cb)
- if( p[pixel[10]] > cb)
- if( p[pixel[11]] > cb)
- if( p[pixel[12]] > cb)
- if( p[pixel[13]] > cb)
- if( p[pixel[14]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else if( p[pixel[6]] < c_b)
- if( p[pixel[7]] > cb)
- if( p[pixel[14]] > cb)
- if( p[pixel[8]] > cb)
- if( p[pixel[9]] > cb)
- if( p[pixel[10]] > cb)
- if( p[pixel[11]] > cb)
- if( p[pixel[12]] > cb)
- if( p[pixel[13]] > cb)
- if( p[pixel[15]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else if( p[pixel[14]] < c_b)
- if( p[pixel[15]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else if( p[pixel[7]] < c_b)
- if( p[pixel[8]] < c_b)
- goto is_a_corner;
- else
- if( p[pixel[15]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[14]] < c_b)
- if( p[pixel[15]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[13]] > cb)
- if( p[pixel[7]] > cb)
- if( p[pixel[8]] > cb)
- if( p[pixel[9]] > cb)
- if( p[pixel[10]] > cb)
- if( p[pixel[11]] > cb)
- if( p[pixel[12]] > cb)
- if( p[pixel[14]] > cb)
- if( p[pixel[15]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else if( p[pixel[13]] < c_b)
- if( p[pixel[14]] < c_b)
- if( p[pixel[15]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[12]] > cb)
- if( p[pixel[7]] > cb)
- if( p[pixel[8]] > cb)
- if( p[pixel[9]] > cb)
- if( p[pixel[10]] > cb)
- if( p[pixel[11]] > cb)
- if( p[pixel[13]] > cb)
- if( p[pixel[14]] > cb)
- if( p[pixel[6]] > cb)
- goto is_a_corner;
- else
- if( p[pixel[15]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else if( p[pixel[12]] < c_b)
- if( p[pixel[13]] < c_b)
- if( p[pixel[14]] < c_b)
- if( p[pixel[15]] < c_b)
- goto is_a_corner;
- else
- if( p[pixel[6]] < c_b)
- if( p[pixel[7]] < c_b)
- if( p[pixel[8]] < c_b)
- if( p[pixel[9]] < c_b)
- if( p[pixel[10]] < c_b)
- if( p[pixel[11]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[11]] > cb)
- if( p[pixel[7]] > cb)
- if( p[pixel[8]] > cb)
- if( p[pixel[9]] > cb)
- if( p[pixel[10]] > cb)
- if( p[pixel[12]] > cb)
- if( p[pixel[13]] > cb)
- if( p[pixel[6]] > cb)
- if( p[pixel[5]] > cb)
- goto is_a_corner;
- else
- if( p[pixel[14]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[14]] > cb)
- if( p[pixel[15]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else if( p[pixel[11]] < c_b)
- if( p[pixel[12]] < c_b)
- if( p[pixel[13]] < c_b)
- if( p[pixel[14]] < c_b)
- if( p[pixel[15]] < c_b)
- goto is_a_corner;
- else
- if( p[pixel[6]] < c_b)
- if( p[pixel[7]] < c_b)
- if( p[pixel[8]] < c_b)
- if( p[pixel[9]] < c_b)
- if( p[pixel[10]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[5]] < c_b)
- if( p[pixel[6]] < c_b)
- if( p[pixel[7]] < c_b)
- if( p[pixel[8]] < c_b)
- if( p[pixel[9]] < c_b)
- if( p[pixel[10]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[10]] > cb)
- if( p[pixel[7]] > cb)
- if( p[pixel[8]] > cb)
- if( p[pixel[9]] > cb)
- if( p[pixel[11]] > cb)
- if( p[pixel[12]] > cb)
- if( p[pixel[6]] > cb)
- if( p[pixel[5]] > cb)
- if( p[pixel[4]] > cb)
- goto is_a_corner;
- else
- if( p[pixel[13]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[13]] > cb)
- if( p[pixel[14]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[13]] > cb)
- if( p[pixel[14]] > cb)
- if( p[pixel[15]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else if( p[pixel[10]] < c_b)
- if( p[pixel[11]] < c_b)
- if( p[pixel[12]] < c_b)
- if( p[pixel[13]] < c_b)
- if( p[pixel[14]] < c_b)
- if( p[pixel[15]] < c_b)
- goto is_a_corner;
- else
- if( p[pixel[6]] < c_b)
- if( p[pixel[7]] < c_b)
- if( p[pixel[8]] < c_b)
- if( p[pixel[9]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[5]] < c_b)
- if( p[pixel[6]] < c_b)
- if( p[pixel[7]] < c_b)
- if( p[pixel[8]] < c_b)
- if( p[pixel[9]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[4]] < c_b)
- if( p[pixel[5]] < c_b)
- if( p[pixel[6]] < c_b)
- if( p[pixel[7]] < c_b)
- if( p[pixel[8]] < c_b)
- if( p[pixel[9]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[9]] > cb)
- if( p[pixel[7]] > cb)
- if( p[pixel[8]] > cb)
- if( p[pixel[10]] > cb)
- if( p[pixel[11]] > cb)
- if( p[pixel[6]] > cb)
- if( p[pixel[5]] > cb)
- if( p[pixel[4]] > cb)
- if( p[pixel[3]] > cb)
- goto is_a_corner;
- else
- if( p[pixel[12]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[12]] > cb)
- if( p[pixel[13]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[12]] > cb)
- if( p[pixel[13]] > cb)
- if( p[pixel[14]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[12]] > cb)
- if( p[pixel[13]] > cb)
- if( p[pixel[14]] > cb)
- if( p[pixel[15]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else if( p[pixel[9]] < c_b)
- if( p[pixel[10]] < c_b)
- if( p[pixel[11]] < c_b)
- if( p[pixel[12]] < c_b)
- if( p[pixel[13]] < c_b)
- if( p[pixel[14]] < c_b)
- if( p[pixel[15]] < c_b)
- goto is_a_corner;
- else
- if( p[pixel[6]] < c_b)
- if( p[pixel[7]] < c_b)
- if( p[pixel[8]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[5]] < c_b)
- if( p[pixel[6]] < c_b)
- if( p[pixel[7]] < c_b)
- if( p[pixel[8]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[4]] < c_b)
- if( p[pixel[5]] < c_b)
- if( p[pixel[6]] < c_b)
- if( p[pixel[7]] < c_b)
- if( p[pixel[8]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[3]] < c_b)
- if( p[pixel[4]] < c_b)
- if( p[pixel[5]] < c_b)
- if( p[pixel[6]] < c_b)
- if( p[pixel[7]] < c_b)
- if( p[pixel[8]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[8]] > cb)
- if( p[pixel[7]] > cb)
- if( p[pixel[9]] > cb)
- if( p[pixel[10]] > cb)
- if( p[pixel[6]] > cb)
- if( p[pixel[5]] > cb)
- if( p[pixel[4]] > cb)
- if( p[pixel[3]] > cb)
- if( p[pixel[2]] > cb)
- goto is_a_corner;
- else
- if( p[pixel[11]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[11]] > cb)
- if( p[pixel[12]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[11]] > cb)
- if( p[pixel[12]] > cb)
- if( p[pixel[13]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[11]] > cb)
- if( p[pixel[12]] > cb)
- if( p[pixel[13]] > cb)
- if( p[pixel[14]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[11]] > cb)
- if( p[pixel[12]] > cb)
- if( p[pixel[13]] > cb)
- if( p[pixel[14]] > cb)
- if( p[pixel[15]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else if( p[pixel[8]] < c_b)
- if( p[pixel[9]] < c_b)
- if( p[pixel[10]] < c_b)
- if( p[pixel[11]] < c_b)
- if( p[pixel[12]] < c_b)
- if( p[pixel[13]] < c_b)
- if( p[pixel[14]] < c_b)
- if( p[pixel[15]] < c_b)
- goto is_a_corner;
- else
- if( p[pixel[6]] < c_b)
- if( p[pixel[7]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[5]] < c_b)
- if( p[pixel[6]] < c_b)
- if( p[pixel[7]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[4]] < c_b)
- if( p[pixel[5]] < c_b)
- if( p[pixel[6]] < c_b)
- if( p[pixel[7]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[3]] < c_b)
- if( p[pixel[4]] < c_b)
- if( p[pixel[5]] < c_b)
- if( p[pixel[6]] < c_b)
- if( p[pixel[7]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[2]] < c_b)
- if( p[pixel[3]] < c_b)
- if( p[pixel[4]] < c_b)
- if( p[pixel[5]] < c_b)
- if( p[pixel[6]] < c_b)
- if( p[pixel[7]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[7]] > cb)
- if( p[pixel[8]] > cb)
- if( p[pixel[9]] > cb)
- if( p[pixel[6]] > cb)
- if( p[pixel[5]] > cb)
- if( p[pixel[4]] > cb)
- if( p[pixel[3]] > cb)
- if( p[pixel[2]] > cb)
- if( p[pixel[1]] > cb)
- goto is_a_corner;
- else
- if( p[pixel[10]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[10]] > cb)
- if( p[pixel[11]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[10]] > cb)
- if( p[pixel[11]] > cb)
- if( p[pixel[12]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[10]] > cb)
- if( p[pixel[11]] > cb)
- if( p[pixel[12]] > cb)
- if( p[pixel[13]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[10]] > cb)
- if( p[pixel[11]] > cb)
- if( p[pixel[12]] > cb)
- if( p[pixel[13]] > cb)
- if( p[pixel[14]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[10]] > cb)
- if( p[pixel[11]] > cb)
- if( p[pixel[12]] > cb)
- if( p[pixel[13]] > cb)
- if( p[pixel[14]] > cb)
- if( p[pixel[15]] > cb)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else if( p[pixel[7]] < c_b)
- if( p[pixel[8]] < c_b)
- if( p[pixel[9]] < c_b)
- if( p[pixel[6]] < c_b)
- if( p[pixel[5]] < c_b)
- if( p[pixel[4]] < c_b)
- if( p[pixel[3]] < c_b)
- if( p[pixel[2]] < c_b)
- if( p[pixel[1]] < c_b)
- goto is_a_corner;
- else
- if( p[pixel[10]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[10]] < c_b)
- if( p[pixel[11]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[10]] < c_b)
- if( p[pixel[11]] < c_b)
- if( p[pixel[12]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[10]] < c_b)
- if( p[pixel[11]] < c_b)
- if( p[pixel[12]] < c_b)
- if( p[pixel[13]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[10]] < c_b)
- if( p[pixel[11]] < c_b)
- if( p[pixel[12]] < c_b)
- if( p[pixel[13]] < c_b)
- if( p[pixel[14]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- if( p[pixel[10]] < c_b)
- if( p[pixel[11]] < c_b)
- if( p[pixel[12]] < c_b)
- if( p[pixel[13]] < c_b)
- if( p[pixel[14]] < c_b)
- if( p[pixel[15]] < c_b)
- goto is_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
- else
- goto is_not_a_corner;
-
-is_a_corner:
- bmin=b;
- goto end_if;
-
-is_not_a_corner:
- bmax=b;
- goto end_if;
-
-end_if:
-
- if(bmin == bmax - 1 || bmin == bmax)
- return bmin;
- b = (bmin + bmax) / 2;
- }
-}
-
-static void make_offsets(int pixel[], int row_stride)
-{
- pixel[0] = 0 + row_stride * 3;
- pixel[1] = 1 + row_stride * 3;
- pixel[2] = 2 + row_stride * 2;
- pixel[3] = 3 + row_stride * 1;
- pixel[4] = 3 + row_stride * 0;
- pixel[5] = 3 + row_stride * -1;
- pixel[6] = 2 + row_stride * -2;
- pixel[7] = 1 + row_stride * -3;
- pixel[8] = 0 + row_stride * -3;
- pixel[9] = -1 + row_stride * -3;
- pixel[10] = -2 + row_stride * -2;
- pixel[11] = -3 + row_stride * -1;
- pixel[12] = -3 + row_stride * 0;
- pixel[13] = -3 + row_stride * 1;
- pixel[14] = -2 + row_stride * 2;
- pixel[15] = -1 + row_stride * 3;
-}
-
-
-
-int* fast9_score(const byte* i, int stride, xy* corners, int num_corners, int b)
-{
- int* scores = (int*)malloc(sizeof(int)* num_corners);
- int n;
-
- int pixel[16];
- make_offsets(pixel, stride);
-
- for(n=0; n < num_corners; n++)
- scores[n] = fast9_corner_score(i + corners[n].y*stride + corners[n].x, pixel, b);
-
- return scores;
-}
-
-
-xy* fast9_detect(const byte* im, int xsize, int ysize, int stride, int b, int* ret_num_corners)
-{
- int num_corners=0;
- xy* ret_corners;
- int rsize=512;
- int pixel[16];
- int x, y;
-
- ret_corners = (xy*)malloc(sizeof(xy)*rsize);
- make_offsets(pixel, stride);
-
- for(y=3; y < ysize - 3; y++)
- for(x=3; x < xsize - 3; x++)
- {
- const byte* p = im + y*stride + x;
-
- int cb = *p + b;
- int c_b= *p - b;
- if(p[pixel[0]] > cb)
- if(p[pixel[1]] > cb)
- if(p[pixel[2]] > cb)
- if(p[pixel[3]] > cb)
- if(p[pixel[4]] > cb)
- if(p[pixel[5]] > cb)
- if(p[pixel[6]] > cb)
- if(p[pixel[7]] > cb)
- if(p[pixel[8]] > cb)
- {}
- else
- if(p[pixel[15]] > cb)
- {}
- else
- continue;
- else if(p[pixel[7]] < c_b)
- if(p[pixel[14]] > cb)
- if(p[pixel[15]] > cb)
- {}
- else
- continue;
- else if(p[pixel[14]] < c_b)
- if(p[pixel[8]] < c_b)
- if(p[pixel[9]] < c_b)
- if(p[pixel[10]] < c_b)
- if(p[pixel[11]] < c_b)
- if(p[pixel[12]] < c_b)
- if(p[pixel[13]] < c_b)
- if(p[pixel[15]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[14]] > cb)
- if(p[pixel[15]] > cb)
- {}
- else
- continue;
- else
- continue;
- else if(p[pixel[6]] < c_b)
- if(p[pixel[15]] > cb)
- if(p[pixel[13]] > cb)
- if(p[pixel[14]] > cb)
- {}
- else
- continue;
- else if(p[pixel[13]] < c_b)
- if(p[pixel[7]] < c_b)
- if(p[pixel[8]] < c_b)
- if(p[pixel[9]] < c_b)
- if(p[pixel[10]] < c_b)
- if(p[pixel[11]] < c_b)
- if(p[pixel[12]] < c_b)
- if(p[pixel[14]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[7]] < c_b)
- if(p[pixel[8]] < c_b)
- if(p[pixel[9]] < c_b)
- if(p[pixel[10]] < c_b)
- if(p[pixel[11]] < c_b)
- if(p[pixel[12]] < c_b)
- if(p[pixel[13]] < c_b)
- if(p[pixel[14]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[13]] > cb)
- if(p[pixel[14]] > cb)
- if(p[pixel[15]] > cb)
- {}
- else
- continue;
- else
- continue;
- else if(p[pixel[13]] < c_b)
- if(p[pixel[7]] < c_b)
- if(p[pixel[8]] < c_b)
- if(p[pixel[9]] < c_b)
- if(p[pixel[10]] < c_b)
- if(p[pixel[11]] < c_b)
- if(p[pixel[12]] < c_b)
- if(p[pixel[14]] < c_b)
- if(p[pixel[15]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else if(p[pixel[5]] < c_b)
- if(p[pixel[14]] > cb)
- if(p[pixel[12]] > cb)
- if(p[pixel[13]] > cb)
- if(p[pixel[15]] > cb)
- {}
- else
- if(p[pixel[6]] > cb)
- if(p[pixel[7]] > cb)
- if(p[pixel[8]] > cb)
- if(p[pixel[9]] > cb)
- if(p[pixel[10]] > cb)
- if(p[pixel[11]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else if(p[pixel[12]] < c_b)
- if(p[pixel[6]] < c_b)
- if(p[pixel[7]] < c_b)
- if(p[pixel[8]] < c_b)
- if(p[pixel[9]] < c_b)
- if(p[pixel[10]] < c_b)
- if(p[pixel[11]] < c_b)
- if(p[pixel[13]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else if(p[pixel[14]] < c_b)
- if(p[pixel[7]] < c_b)
- if(p[pixel[8]] < c_b)
- if(p[pixel[9]] < c_b)
- if(p[pixel[10]] < c_b)
- if(p[pixel[11]] < c_b)
- if(p[pixel[12]] < c_b)
- if(p[pixel[13]] < c_b)
- if(p[pixel[6]] < c_b)
- {}
- else
- if(p[pixel[15]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[6]] < c_b)
- if(p[pixel[7]] < c_b)
- if(p[pixel[8]] < c_b)
- if(p[pixel[9]] < c_b)
- if(p[pixel[10]] < c_b)
- if(p[pixel[11]] < c_b)
- if(p[pixel[12]] < c_b)
- if(p[pixel[13]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[12]] > cb)
- if(p[pixel[13]] > cb)
- if(p[pixel[14]] > cb)
- if(p[pixel[15]] > cb)
- {}
- else
- if(p[pixel[6]] > cb)
- if(p[pixel[7]] > cb)
- if(p[pixel[8]] > cb)
- if(p[pixel[9]] > cb)
- if(p[pixel[10]] > cb)
- if(p[pixel[11]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else if(p[pixel[12]] < c_b)
- if(p[pixel[7]] < c_b)
- if(p[pixel[8]] < c_b)
- if(p[pixel[9]] < c_b)
- if(p[pixel[10]] < c_b)
- if(p[pixel[11]] < c_b)
- if(p[pixel[13]] < c_b)
- if(p[pixel[14]] < c_b)
- if(p[pixel[6]] < c_b)
- {}
- else
- if(p[pixel[15]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else if(p[pixel[4]] < c_b)
- if(p[pixel[13]] > cb)
- if(p[pixel[11]] > cb)
- if(p[pixel[12]] > cb)
- if(p[pixel[14]] > cb)
- if(p[pixel[15]] > cb)
- {}
- else
- if(p[pixel[6]] > cb)
- if(p[pixel[7]] > cb)
- if(p[pixel[8]] > cb)
- if(p[pixel[9]] > cb)
- if(p[pixel[10]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[5]] > cb)
- if(p[pixel[6]] > cb)
- if(p[pixel[7]] > cb)
- if(p[pixel[8]] > cb)
- if(p[pixel[9]] > cb)
- if(p[pixel[10]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else if(p[pixel[11]] < c_b)
- if(p[pixel[5]] < c_b)
- if(p[pixel[6]] < c_b)
- if(p[pixel[7]] < c_b)
- if(p[pixel[8]] < c_b)
- if(p[pixel[9]] < c_b)
- if(p[pixel[10]] < c_b)
- if(p[pixel[12]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else if(p[pixel[13]] < c_b)
- if(p[pixel[7]] < c_b)
- if(p[pixel[8]] < c_b)
- if(p[pixel[9]] < c_b)
- if(p[pixel[10]] < c_b)
- if(p[pixel[11]] < c_b)
- if(p[pixel[12]] < c_b)
- if(p[pixel[6]] < c_b)
- if(p[pixel[5]] < c_b)
- {}
- else
- if(p[pixel[14]] < c_b)
- {}
- else
- continue;
- else
- if(p[pixel[14]] < c_b)
- if(p[pixel[15]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[5]] < c_b)
- if(p[pixel[6]] < c_b)
- if(p[pixel[7]] < c_b)
- if(p[pixel[8]] < c_b)
- if(p[pixel[9]] < c_b)
- if(p[pixel[10]] < c_b)
- if(p[pixel[11]] < c_b)
- if(p[pixel[12]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[11]] > cb)
- if(p[pixel[12]] > cb)
- if(p[pixel[13]] > cb)
- if(p[pixel[14]] > cb)
- if(p[pixel[15]] > cb)
- {}
- else
- if(p[pixel[6]] > cb)
- if(p[pixel[7]] > cb)
- if(p[pixel[8]] > cb)
- if(p[pixel[9]] > cb)
- if(p[pixel[10]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[5]] > cb)
- if(p[pixel[6]] > cb)
- if(p[pixel[7]] > cb)
- if(p[pixel[8]] > cb)
- if(p[pixel[9]] > cb)
- if(p[pixel[10]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else if(p[pixel[11]] < c_b)
- if(p[pixel[7]] < c_b)
- if(p[pixel[8]] < c_b)
- if(p[pixel[9]] < c_b)
- if(p[pixel[10]] < c_b)
- if(p[pixel[12]] < c_b)
- if(p[pixel[13]] < c_b)
- if(p[pixel[6]] < c_b)
- if(p[pixel[5]] < c_b)
- {}
- else
- if(p[pixel[14]] < c_b)
- {}
- else
- continue;
- else
- if(p[pixel[14]] < c_b)
- if(p[pixel[15]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else if(p[pixel[3]] < c_b)
- if(p[pixel[10]] > cb)
- if(p[pixel[11]] > cb)
- if(p[pixel[12]] > cb)
- if(p[pixel[13]] > cb)
- if(p[pixel[14]] > cb)
- if(p[pixel[15]] > cb)
- {}
- else
- if(p[pixel[6]] > cb)
- if(p[pixel[7]] > cb)
- if(p[pixel[8]] > cb)
- if(p[pixel[9]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[5]] > cb)
- if(p[pixel[6]] > cb)
- if(p[pixel[7]] > cb)
- if(p[pixel[8]] > cb)
- if(p[pixel[9]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[4]] > cb)
- if(p[pixel[5]] > cb)
- if(p[pixel[6]] > cb)
- if(p[pixel[7]] > cb)
- if(p[pixel[8]] > cb)
- if(p[pixel[9]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else if(p[pixel[10]] < c_b)
- if(p[pixel[7]] < c_b)
- if(p[pixel[8]] < c_b)
- if(p[pixel[9]] < c_b)
- if(p[pixel[11]] < c_b)
- if(p[pixel[6]] < c_b)
- if(p[pixel[5]] < c_b)
- if(p[pixel[4]] < c_b)
- {}
- else
- if(p[pixel[12]] < c_b)
- if(p[pixel[13]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- if(p[pixel[12]] < c_b)
- if(p[pixel[13]] < c_b)
- if(p[pixel[14]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[12]] < c_b)
- if(p[pixel[13]] < c_b)
- if(p[pixel[14]] < c_b)
- if(p[pixel[15]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[10]] > cb)
- if(p[pixel[11]] > cb)
- if(p[pixel[12]] > cb)
- if(p[pixel[13]] > cb)
- if(p[pixel[14]] > cb)
- if(p[pixel[15]] > cb)
- {}
- else
- if(p[pixel[6]] > cb)
- if(p[pixel[7]] > cb)
- if(p[pixel[8]] > cb)
- if(p[pixel[9]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[5]] > cb)
- if(p[pixel[6]] > cb)
- if(p[pixel[7]] > cb)
- if(p[pixel[8]] > cb)
- if(p[pixel[9]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[4]] > cb)
- if(p[pixel[5]] > cb)
- if(p[pixel[6]] > cb)
- if(p[pixel[7]] > cb)
- if(p[pixel[8]] > cb)
- if(p[pixel[9]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else if(p[pixel[10]] < c_b)
- if(p[pixel[7]] < c_b)
- if(p[pixel[8]] < c_b)
- if(p[pixel[9]] < c_b)
- if(p[pixel[11]] < c_b)
- if(p[pixel[12]] < c_b)
- if(p[pixel[6]] < c_b)
- if(p[pixel[5]] < c_b)
- if(p[pixel[4]] < c_b)
- {}
- else
- if(p[pixel[13]] < c_b)
- {}
- else
- continue;
- else
- if(p[pixel[13]] < c_b)
- if(p[pixel[14]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- if(p[pixel[13]] < c_b)
- if(p[pixel[14]] < c_b)
- if(p[pixel[15]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else if(p[pixel[2]] < c_b)
- if(p[pixel[9]] > cb)
- if(p[pixel[10]] > cb)
- if(p[pixel[11]] > cb)
- if(p[pixel[12]] > cb)
- if(p[pixel[13]] > cb)
- if(p[pixel[14]] > cb)
- if(p[pixel[15]] > cb)
- {}
- else
- if(p[pixel[6]] > cb)
- if(p[pixel[7]] > cb)
- if(p[pixel[8]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[5]] > cb)
- if(p[pixel[6]] > cb)
- if(p[pixel[7]] > cb)
- if(p[pixel[8]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[4]] > cb)
- if(p[pixel[5]] > cb)
- if(p[pixel[6]] > cb)
- if(p[pixel[7]] > cb)
- if(p[pixel[8]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[3]] > cb)
- if(p[pixel[4]] > cb)
- if(p[pixel[5]] > cb)
- if(p[pixel[6]] > cb)
- if(p[pixel[7]] > cb)
- if(p[pixel[8]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else if(p[pixel[9]] < c_b)
- if(p[pixel[7]] < c_b)
- if(p[pixel[8]] < c_b)
- if(p[pixel[10]] < c_b)
- if(p[pixel[6]] < c_b)
- if(p[pixel[5]] < c_b)
- if(p[pixel[4]] < c_b)
- if(p[pixel[3]] < c_b)
- {}
- else
- if(p[pixel[11]] < c_b)
- if(p[pixel[12]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- if(p[pixel[11]] < c_b)
- if(p[pixel[12]] < c_b)
- if(p[pixel[13]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[11]] < c_b)
- if(p[pixel[12]] < c_b)
- if(p[pixel[13]] < c_b)
- if(p[pixel[14]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[11]] < c_b)
- if(p[pixel[12]] < c_b)
- if(p[pixel[13]] < c_b)
- if(p[pixel[14]] < c_b)
- if(p[pixel[15]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[9]] > cb)
- if(p[pixel[10]] > cb)
- if(p[pixel[11]] > cb)
- if(p[pixel[12]] > cb)
- if(p[pixel[13]] > cb)
- if(p[pixel[14]] > cb)
- if(p[pixel[15]] > cb)
- {}
- else
- if(p[pixel[6]] > cb)
- if(p[pixel[7]] > cb)
- if(p[pixel[8]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[5]] > cb)
- if(p[pixel[6]] > cb)
- if(p[pixel[7]] > cb)
- if(p[pixel[8]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[4]] > cb)
- if(p[pixel[5]] > cb)
- if(p[pixel[6]] > cb)
- if(p[pixel[7]] > cb)
- if(p[pixel[8]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[3]] > cb)
- if(p[pixel[4]] > cb)
- if(p[pixel[5]] > cb)
- if(p[pixel[6]] > cb)
- if(p[pixel[7]] > cb)
- if(p[pixel[8]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else if(p[pixel[9]] < c_b)
- if(p[pixel[7]] < c_b)
- if(p[pixel[8]] < c_b)
- if(p[pixel[10]] < c_b)
- if(p[pixel[11]] < c_b)
- if(p[pixel[6]] < c_b)
- if(p[pixel[5]] < c_b)
- if(p[pixel[4]] < c_b)
- if(p[pixel[3]] < c_b)
- {}
- else
- if(p[pixel[12]] < c_b)
- {}
- else
- continue;
- else
- if(p[pixel[12]] < c_b)
- if(p[pixel[13]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- if(p[pixel[12]] < c_b)
- if(p[pixel[13]] < c_b)
- if(p[pixel[14]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[12]] < c_b)
- if(p[pixel[13]] < c_b)
- if(p[pixel[14]] < c_b)
- if(p[pixel[15]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else if(p[pixel[1]] < c_b)
- if(p[pixel[8]] > cb)
- if(p[pixel[9]] > cb)
- if(p[pixel[10]] > cb)
- if(p[pixel[11]] > cb)
- if(p[pixel[12]] > cb)
- if(p[pixel[13]] > cb)
- if(p[pixel[14]] > cb)
- if(p[pixel[15]] > cb)
- {}
- else
- if(p[pixel[6]] > cb)
- if(p[pixel[7]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- if(p[pixel[5]] > cb)
- if(p[pixel[6]] > cb)
- if(p[pixel[7]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[4]] > cb)
- if(p[pixel[5]] > cb)
- if(p[pixel[6]] > cb)
- if(p[pixel[7]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[3]] > cb)
- if(p[pixel[4]] > cb)
- if(p[pixel[5]] > cb)
- if(p[pixel[6]] > cb)
- if(p[pixel[7]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[2]] > cb)
- if(p[pixel[3]] > cb)
- if(p[pixel[4]] > cb)
- if(p[pixel[5]] > cb)
- if(p[pixel[6]] > cb)
- if(p[pixel[7]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else if(p[pixel[8]] < c_b)
- if(p[pixel[7]] < c_b)
- if(p[pixel[9]] < c_b)
- if(p[pixel[6]] < c_b)
- if(p[pixel[5]] < c_b)
- if(p[pixel[4]] < c_b)
- if(p[pixel[3]] < c_b)
- if(p[pixel[2]] < c_b)
- {}
- else
- if(p[pixel[10]] < c_b)
- if(p[pixel[11]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- if(p[pixel[10]] < c_b)
- if(p[pixel[11]] < c_b)
- if(p[pixel[12]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[10]] < c_b)
- if(p[pixel[11]] < c_b)
- if(p[pixel[12]] < c_b)
- if(p[pixel[13]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[10]] < c_b)
- if(p[pixel[11]] < c_b)
- if(p[pixel[12]] < c_b)
- if(p[pixel[13]] < c_b)
- if(p[pixel[14]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[10]] < c_b)
- if(p[pixel[11]] < c_b)
- if(p[pixel[12]] < c_b)
- if(p[pixel[13]] < c_b)
- if(p[pixel[14]] < c_b)
- if(p[pixel[15]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[8]] > cb)
- if(p[pixel[9]] > cb)
- if(p[pixel[10]] > cb)
- if(p[pixel[11]] > cb)
- if(p[pixel[12]] > cb)
- if(p[pixel[13]] > cb)
- if(p[pixel[14]] > cb)
- if(p[pixel[15]] > cb)
- {}
- else
- if(p[pixel[6]] > cb)
- if(p[pixel[7]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- if(p[pixel[5]] > cb)
- if(p[pixel[6]] > cb)
- if(p[pixel[7]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[4]] > cb)
- if(p[pixel[5]] > cb)
- if(p[pixel[6]] > cb)
- if(p[pixel[7]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[3]] > cb)
- if(p[pixel[4]] > cb)
- if(p[pixel[5]] > cb)
- if(p[pixel[6]] > cb)
- if(p[pixel[7]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[2]] > cb)
- if(p[pixel[3]] > cb)
- if(p[pixel[4]] > cb)
- if(p[pixel[5]] > cb)
- if(p[pixel[6]] > cb)
- if(p[pixel[7]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else if(p[pixel[8]] < c_b)
- if(p[pixel[7]] < c_b)
- if(p[pixel[9]] < c_b)
- if(p[pixel[10]] < c_b)
- if(p[pixel[6]] < c_b)
- if(p[pixel[5]] < c_b)
- if(p[pixel[4]] < c_b)
- if(p[pixel[3]] < c_b)
- if(p[pixel[2]] < c_b)
- {}
- else
- if(p[pixel[11]] < c_b)
- {}
- else
- continue;
- else
- if(p[pixel[11]] < c_b)
- if(p[pixel[12]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- if(p[pixel[11]] < c_b)
- if(p[pixel[12]] < c_b)
- if(p[pixel[13]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[11]] < c_b)
- if(p[pixel[12]] < c_b)
- if(p[pixel[13]] < c_b)
- if(p[pixel[14]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[11]] < c_b)
- if(p[pixel[12]] < c_b)
- if(p[pixel[13]] < c_b)
- if(p[pixel[14]] < c_b)
- if(p[pixel[15]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else if(p[pixel[0]] < c_b)
- if(p[pixel[1]] > cb)
- if(p[pixel[8]] > cb)
- if(p[pixel[7]] > cb)
- if(p[pixel[9]] > cb)
- if(p[pixel[6]] > cb)
- if(p[pixel[5]] > cb)
- if(p[pixel[4]] > cb)
- if(p[pixel[3]] > cb)
- if(p[pixel[2]] > cb)
- {}
- else
- if(p[pixel[10]] > cb)
- if(p[pixel[11]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- if(p[pixel[10]] > cb)
- if(p[pixel[11]] > cb)
- if(p[pixel[12]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[10]] > cb)
- if(p[pixel[11]] > cb)
- if(p[pixel[12]] > cb)
- if(p[pixel[13]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[10]] > cb)
- if(p[pixel[11]] > cb)
- if(p[pixel[12]] > cb)
- if(p[pixel[13]] > cb)
- if(p[pixel[14]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[10]] > cb)
- if(p[pixel[11]] > cb)
- if(p[pixel[12]] > cb)
- if(p[pixel[13]] > cb)
- if(p[pixel[14]] > cb)
- if(p[pixel[15]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else if(p[pixel[8]] < c_b)
- if(p[pixel[9]] < c_b)
- if(p[pixel[10]] < c_b)
- if(p[pixel[11]] < c_b)
- if(p[pixel[12]] < c_b)
- if(p[pixel[13]] < c_b)
- if(p[pixel[14]] < c_b)
- if(p[pixel[15]] < c_b)
- {}
- else
- if(p[pixel[6]] < c_b)
- if(p[pixel[7]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- if(p[pixel[5]] < c_b)
- if(p[pixel[6]] < c_b)
- if(p[pixel[7]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[4]] < c_b)
- if(p[pixel[5]] < c_b)
- if(p[pixel[6]] < c_b)
- if(p[pixel[7]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[3]] < c_b)
- if(p[pixel[4]] < c_b)
- if(p[pixel[5]] < c_b)
- if(p[pixel[6]] < c_b)
- if(p[pixel[7]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[2]] < c_b)
- if(p[pixel[3]] < c_b)
- if(p[pixel[4]] < c_b)
- if(p[pixel[5]] < c_b)
- if(p[pixel[6]] < c_b)
- if(p[pixel[7]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else if(p[pixel[1]] < c_b)
- if(p[pixel[2]] > cb)
- if(p[pixel[9]] > cb)
- if(p[pixel[7]] > cb)
- if(p[pixel[8]] > cb)
- if(p[pixel[10]] > cb)
- if(p[pixel[6]] > cb)
- if(p[pixel[5]] > cb)
- if(p[pixel[4]] > cb)
- if(p[pixel[3]] > cb)
- {}
- else
- if(p[pixel[11]] > cb)
- if(p[pixel[12]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- if(p[pixel[11]] > cb)
- if(p[pixel[12]] > cb)
- if(p[pixel[13]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[11]] > cb)
- if(p[pixel[12]] > cb)
- if(p[pixel[13]] > cb)
- if(p[pixel[14]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[11]] > cb)
- if(p[pixel[12]] > cb)
- if(p[pixel[13]] > cb)
- if(p[pixel[14]] > cb)
- if(p[pixel[15]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else if(p[pixel[9]] < c_b)
- if(p[pixel[10]] < c_b)
- if(p[pixel[11]] < c_b)
- if(p[pixel[12]] < c_b)
- if(p[pixel[13]] < c_b)
- if(p[pixel[14]] < c_b)
- if(p[pixel[15]] < c_b)
- {}
- else
- if(p[pixel[6]] < c_b)
- if(p[pixel[7]] < c_b)
- if(p[pixel[8]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[5]] < c_b)
- if(p[pixel[6]] < c_b)
- if(p[pixel[7]] < c_b)
- if(p[pixel[8]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[4]] < c_b)
- if(p[pixel[5]] < c_b)
- if(p[pixel[6]] < c_b)
- if(p[pixel[7]] < c_b)
- if(p[pixel[8]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[3]] < c_b)
- if(p[pixel[4]] < c_b)
- if(p[pixel[5]] < c_b)
- if(p[pixel[6]] < c_b)
- if(p[pixel[7]] < c_b)
- if(p[pixel[8]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else if(p[pixel[2]] < c_b)
- if(p[pixel[3]] > cb)
- if(p[pixel[10]] > cb)
- if(p[pixel[7]] > cb)
- if(p[pixel[8]] > cb)
- if(p[pixel[9]] > cb)
- if(p[pixel[11]] > cb)
- if(p[pixel[6]] > cb)
- if(p[pixel[5]] > cb)
- if(p[pixel[4]] > cb)
- {}
- else
- if(p[pixel[12]] > cb)
- if(p[pixel[13]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- if(p[pixel[12]] > cb)
- if(p[pixel[13]] > cb)
- if(p[pixel[14]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[12]] > cb)
- if(p[pixel[13]] > cb)
- if(p[pixel[14]] > cb)
- if(p[pixel[15]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else if(p[pixel[10]] < c_b)
- if(p[pixel[11]] < c_b)
- if(p[pixel[12]] < c_b)
- if(p[pixel[13]] < c_b)
- if(p[pixel[14]] < c_b)
- if(p[pixel[15]] < c_b)
- {}
- else
- if(p[pixel[6]] < c_b)
- if(p[pixel[7]] < c_b)
- if(p[pixel[8]] < c_b)
- if(p[pixel[9]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[5]] < c_b)
- if(p[pixel[6]] < c_b)
- if(p[pixel[7]] < c_b)
- if(p[pixel[8]] < c_b)
- if(p[pixel[9]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[4]] < c_b)
- if(p[pixel[5]] < c_b)
- if(p[pixel[6]] < c_b)
- if(p[pixel[7]] < c_b)
- if(p[pixel[8]] < c_b)
- if(p[pixel[9]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else if(p[pixel[3]] < c_b)
- if(p[pixel[4]] > cb)
- if(p[pixel[13]] > cb)
- if(p[pixel[7]] > cb)
- if(p[pixel[8]] > cb)
- if(p[pixel[9]] > cb)
- if(p[pixel[10]] > cb)
- if(p[pixel[11]] > cb)
- if(p[pixel[12]] > cb)
- if(p[pixel[6]] > cb)
- if(p[pixel[5]] > cb)
- {}
- else
- if(p[pixel[14]] > cb)
- {}
- else
- continue;
- else
- if(p[pixel[14]] > cb)
- if(p[pixel[15]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else if(p[pixel[13]] < c_b)
- if(p[pixel[11]] > cb)
- if(p[pixel[5]] > cb)
- if(p[pixel[6]] > cb)
- if(p[pixel[7]] > cb)
- if(p[pixel[8]] > cb)
- if(p[pixel[9]] > cb)
- if(p[pixel[10]] > cb)
- if(p[pixel[12]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else if(p[pixel[11]] < c_b)
- if(p[pixel[12]] < c_b)
- if(p[pixel[14]] < c_b)
- if(p[pixel[15]] < c_b)
- {}
- else
- if(p[pixel[6]] < c_b)
- if(p[pixel[7]] < c_b)
- if(p[pixel[8]] < c_b)
- if(p[pixel[9]] < c_b)
- if(p[pixel[10]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[5]] < c_b)
- if(p[pixel[6]] < c_b)
- if(p[pixel[7]] < c_b)
- if(p[pixel[8]] < c_b)
- if(p[pixel[9]] < c_b)
- if(p[pixel[10]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[5]] > cb)
- if(p[pixel[6]] > cb)
- if(p[pixel[7]] > cb)
- if(p[pixel[8]] > cb)
- if(p[pixel[9]] > cb)
- if(p[pixel[10]] > cb)
- if(p[pixel[11]] > cb)
- if(p[pixel[12]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else if(p[pixel[4]] < c_b)
- if(p[pixel[5]] > cb)
- if(p[pixel[14]] > cb)
- if(p[pixel[7]] > cb)
- if(p[pixel[8]] > cb)
- if(p[pixel[9]] > cb)
- if(p[pixel[10]] > cb)
- if(p[pixel[11]] > cb)
- if(p[pixel[12]] > cb)
- if(p[pixel[13]] > cb)
- if(p[pixel[6]] > cb)
- {}
- else
- if(p[pixel[15]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else if(p[pixel[14]] < c_b)
- if(p[pixel[12]] > cb)
- if(p[pixel[6]] > cb)
- if(p[pixel[7]] > cb)
- if(p[pixel[8]] > cb)
- if(p[pixel[9]] > cb)
- if(p[pixel[10]] > cb)
- if(p[pixel[11]] > cb)
- if(p[pixel[13]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else if(p[pixel[12]] < c_b)
- if(p[pixel[13]] < c_b)
- if(p[pixel[15]] < c_b)
- {}
- else
- if(p[pixel[6]] < c_b)
- if(p[pixel[7]] < c_b)
- if(p[pixel[8]] < c_b)
- if(p[pixel[9]] < c_b)
- if(p[pixel[10]] < c_b)
- if(p[pixel[11]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[6]] > cb)
- if(p[pixel[7]] > cb)
- if(p[pixel[8]] > cb)
- if(p[pixel[9]] > cb)
- if(p[pixel[10]] > cb)
- if(p[pixel[11]] > cb)
- if(p[pixel[12]] > cb)
- if(p[pixel[13]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else if(p[pixel[5]] < c_b)
- if(p[pixel[6]] > cb)
- if(p[pixel[15]] < c_b)
- if(p[pixel[13]] > cb)
- if(p[pixel[7]] > cb)
- if(p[pixel[8]] > cb)
- if(p[pixel[9]] > cb)
- if(p[pixel[10]] > cb)
- if(p[pixel[11]] > cb)
- if(p[pixel[12]] > cb)
- if(p[pixel[14]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else if(p[pixel[13]] < c_b)
- if(p[pixel[14]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- if(p[pixel[7]] > cb)
- if(p[pixel[8]] > cb)
- if(p[pixel[9]] > cb)
- if(p[pixel[10]] > cb)
- if(p[pixel[11]] > cb)
- if(p[pixel[12]] > cb)
- if(p[pixel[13]] > cb)
- if(p[pixel[14]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else if(p[pixel[6]] < c_b)
- if(p[pixel[7]] > cb)
- if(p[pixel[14]] > cb)
- if(p[pixel[8]] > cb)
- if(p[pixel[9]] > cb)
- if(p[pixel[10]] > cb)
- if(p[pixel[11]] > cb)
- if(p[pixel[12]] > cb)
- if(p[pixel[13]] > cb)
- if(p[pixel[15]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else if(p[pixel[14]] < c_b)
- if(p[pixel[15]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else if(p[pixel[7]] < c_b)
- if(p[pixel[8]] < c_b)
- {}
- else
- if(p[pixel[15]] < c_b)
- {}
- else
- continue;
- else
- if(p[pixel[14]] < c_b)
- if(p[pixel[15]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- if(p[pixel[13]] > cb)
- if(p[pixel[7]] > cb)
- if(p[pixel[8]] > cb)
- if(p[pixel[9]] > cb)
- if(p[pixel[10]] > cb)
- if(p[pixel[11]] > cb)
- if(p[pixel[12]] > cb)
- if(p[pixel[14]] > cb)
- if(p[pixel[15]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else if(p[pixel[13]] < c_b)
- if(p[pixel[14]] < c_b)
- if(p[pixel[15]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[12]] > cb)
- if(p[pixel[7]] > cb)
- if(p[pixel[8]] > cb)
- if(p[pixel[9]] > cb)
- if(p[pixel[10]] > cb)
- if(p[pixel[11]] > cb)
- if(p[pixel[13]] > cb)
- if(p[pixel[14]] > cb)
- if(p[pixel[6]] > cb)
- {}
- else
- if(p[pixel[15]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else if(p[pixel[12]] < c_b)
- if(p[pixel[13]] < c_b)
- if(p[pixel[14]] < c_b)
- if(p[pixel[15]] < c_b)
- {}
- else
- if(p[pixel[6]] < c_b)
- if(p[pixel[7]] < c_b)
- if(p[pixel[8]] < c_b)
- if(p[pixel[9]] < c_b)
- if(p[pixel[10]] < c_b)
- if(p[pixel[11]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[11]] > cb)
- if(p[pixel[7]] > cb)
- if(p[pixel[8]] > cb)
- if(p[pixel[9]] > cb)
- if(p[pixel[10]] > cb)
- if(p[pixel[12]] > cb)
- if(p[pixel[13]] > cb)
- if(p[pixel[6]] > cb)
- if(p[pixel[5]] > cb)
- {}
- else
- if(p[pixel[14]] > cb)
- {}
- else
- continue;
- else
- if(p[pixel[14]] > cb)
- if(p[pixel[15]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else if(p[pixel[11]] < c_b)
- if(p[pixel[12]] < c_b)
- if(p[pixel[13]] < c_b)
- if(p[pixel[14]] < c_b)
- if(p[pixel[15]] < c_b)
- {}
- else
- if(p[pixel[6]] < c_b)
- if(p[pixel[7]] < c_b)
- if(p[pixel[8]] < c_b)
- if(p[pixel[9]] < c_b)
- if(p[pixel[10]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[5]] < c_b)
- if(p[pixel[6]] < c_b)
- if(p[pixel[7]] < c_b)
- if(p[pixel[8]] < c_b)
- if(p[pixel[9]] < c_b)
- if(p[pixel[10]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[10]] > cb)
- if(p[pixel[7]] > cb)
- if(p[pixel[8]] > cb)
- if(p[pixel[9]] > cb)
- if(p[pixel[11]] > cb)
- if(p[pixel[12]] > cb)
- if(p[pixel[6]] > cb)
- if(p[pixel[5]] > cb)
- if(p[pixel[4]] > cb)
- {}
- else
- if(p[pixel[13]] > cb)
- {}
- else
- continue;
- else
- if(p[pixel[13]] > cb)
- if(p[pixel[14]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- if(p[pixel[13]] > cb)
- if(p[pixel[14]] > cb)
- if(p[pixel[15]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else if(p[pixel[10]] < c_b)
- if(p[pixel[11]] < c_b)
- if(p[pixel[12]] < c_b)
- if(p[pixel[13]] < c_b)
- if(p[pixel[14]] < c_b)
- if(p[pixel[15]] < c_b)
- {}
- else
- if(p[pixel[6]] < c_b)
- if(p[pixel[7]] < c_b)
- if(p[pixel[8]] < c_b)
- if(p[pixel[9]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[5]] < c_b)
- if(p[pixel[6]] < c_b)
- if(p[pixel[7]] < c_b)
- if(p[pixel[8]] < c_b)
- if(p[pixel[9]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[4]] < c_b)
- if(p[pixel[5]] < c_b)
- if(p[pixel[6]] < c_b)
- if(p[pixel[7]] < c_b)
- if(p[pixel[8]] < c_b)
- if(p[pixel[9]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[9]] > cb)
- if(p[pixel[7]] > cb)
- if(p[pixel[8]] > cb)
- if(p[pixel[10]] > cb)
- if(p[pixel[11]] > cb)
- if(p[pixel[6]] > cb)
- if(p[pixel[5]] > cb)
- if(p[pixel[4]] > cb)
- if(p[pixel[3]] > cb)
- {}
- else
- if(p[pixel[12]] > cb)
- {}
- else
- continue;
- else
- if(p[pixel[12]] > cb)
- if(p[pixel[13]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- if(p[pixel[12]] > cb)
- if(p[pixel[13]] > cb)
- if(p[pixel[14]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[12]] > cb)
- if(p[pixel[13]] > cb)
- if(p[pixel[14]] > cb)
- if(p[pixel[15]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else if(p[pixel[9]] < c_b)
- if(p[pixel[10]] < c_b)
- if(p[pixel[11]] < c_b)
- if(p[pixel[12]] < c_b)
- if(p[pixel[13]] < c_b)
- if(p[pixel[14]] < c_b)
- if(p[pixel[15]] < c_b)
- {}
- else
- if(p[pixel[6]] < c_b)
- if(p[pixel[7]] < c_b)
- if(p[pixel[8]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[5]] < c_b)
- if(p[pixel[6]] < c_b)
- if(p[pixel[7]] < c_b)
- if(p[pixel[8]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[4]] < c_b)
- if(p[pixel[5]] < c_b)
- if(p[pixel[6]] < c_b)
- if(p[pixel[7]] < c_b)
- if(p[pixel[8]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[3]] < c_b)
- if(p[pixel[4]] < c_b)
- if(p[pixel[5]] < c_b)
- if(p[pixel[6]] < c_b)
- if(p[pixel[7]] < c_b)
- if(p[pixel[8]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[8]] > cb)
- if(p[pixel[7]] > cb)
- if(p[pixel[9]] > cb)
- if(p[pixel[10]] > cb)
- if(p[pixel[6]] > cb)
- if(p[pixel[5]] > cb)
- if(p[pixel[4]] > cb)
- if(p[pixel[3]] > cb)
- if(p[pixel[2]] > cb)
- {}
- else
- if(p[pixel[11]] > cb)
- {}
- else
- continue;
- else
- if(p[pixel[11]] > cb)
- if(p[pixel[12]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- if(p[pixel[11]] > cb)
- if(p[pixel[12]] > cb)
- if(p[pixel[13]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[11]] > cb)
- if(p[pixel[12]] > cb)
- if(p[pixel[13]] > cb)
- if(p[pixel[14]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[11]] > cb)
- if(p[pixel[12]] > cb)
- if(p[pixel[13]] > cb)
- if(p[pixel[14]] > cb)
- if(p[pixel[15]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else if(p[pixel[8]] < c_b)
- if(p[pixel[9]] < c_b)
- if(p[pixel[10]] < c_b)
- if(p[pixel[11]] < c_b)
- if(p[pixel[12]] < c_b)
- if(p[pixel[13]] < c_b)
- if(p[pixel[14]] < c_b)
- if(p[pixel[15]] < c_b)
- {}
- else
- if(p[pixel[6]] < c_b)
- if(p[pixel[7]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- if(p[pixel[5]] < c_b)
- if(p[pixel[6]] < c_b)
- if(p[pixel[7]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[4]] < c_b)
- if(p[pixel[5]] < c_b)
- if(p[pixel[6]] < c_b)
- if(p[pixel[7]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[3]] < c_b)
- if(p[pixel[4]] < c_b)
- if(p[pixel[5]] < c_b)
- if(p[pixel[6]] < c_b)
- if(p[pixel[7]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[2]] < c_b)
- if(p[pixel[3]] < c_b)
- if(p[pixel[4]] < c_b)
- if(p[pixel[5]] < c_b)
- if(p[pixel[6]] < c_b)
- if(p[pixel[7]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[7]] > cb)
- if(p[pixel[8]] > cb)
- if(p[pixel[9]] > cb)
- if(p[pixel[6]] > cb)
- if(p[pixel[5]] > cb)
- if(p[pixel[4]] > cb)
- if(p[pixel[3]] > cb)
- if(p[pixel[2]] > cb)
- if(p[pixel[1]] > cb)
- {}
- else
- if(p[pixel[10]] > cb)
- {}
- else
- continue;
- else
- if(p[pixel[10]] > cb)
- if(p[pixel[11]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- if(p[pixel[10]] > cb)
- if(p[pixel[11]] > cb)
- if(p[pixel[12]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[10]] > cb)
- if(p[pixel[11]] > cb)
- if(p[pixel[12]] > cb)
- if(p[pixel[13]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[10]] > cb)
- if(p[pixel[11]] > cb)
- if(p[pixel[12]] > cb)
- if(p[pixel[13]] > cb)
- if(p[pixel[14]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[10]] > cb)
- if(p[pixel[11]] > cb)
- if(p[pixel[12]] > cb)
- if(p[pixel[13]] > cb)
- if(p[pixel[14]] > cb)
- if(p[pixel[15]] > cb)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else if(p[pixel[7]] < c_b)
- if(p[pixel[8]] < c_b)
- if(p[pixel[9]] < c_b)
- if(p[pixel[6]] < c_b)
- if(p[pixel[5]] < c_b)
- if(p[pixel[4]] < c_b)
- if(p[pixel[3]] < c_b)
- if(p[pixel[2]] < c_b)
- if(p[pixel[1]] < c_b)
- {}
- else
- if(p[pixel[10]] < c_b)
- {}
- else
- continue;
- else
- if(p[pixel[10]] < c_b)
- if(p[pixel[11]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- if(p[pixel[10]] < c_b)
- if(p[pixel[11]] < c_b)
- if(p[pixel[12]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[10]] < c_b)
- if(p[pixel[11]] < c_b)
- if(p[pixel[12]] < c_b)
- if(p[pixel[13]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[10]] < c_b)
- if(p[pixel[11]] < c_b)
- if(p[pixel[12]] < c_b)
- if(p[pixel[13]] < c_b)
- if(p[pixel[14]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- if(p[pixel[10]] < c_b)
- if(p[pixel[11]] < c_b)
- if(p[pixel[12]] < c_b)
- if(p[pixel[13]] < c_b)
- if(p[pixel[14]] < c_b)
- if(p[pixel[15]] < c_b)
- {}
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- else
- continue;
- if(num_corners == rsize)
- {
- rsize*=2;
- ret_corners = (xy*)realloc(ret_corners, sizeof(xy)*rsize);
- }
- ret_corners[num_corners].x = x;
- ret_corners[num_corners].y = y;
- num_corners++;
-
- }
-
- *ret_num_corners = num_corners;
- return ret_corners;
-
-}
-
-// clang-format on
diff --git a/third_party/aom/third_party/fastfeat/nonmax.c b/third_party/aom/third_party/fastfeat/nonmax.c
deleted file mode 100644
index 0438c4dc1..000000000
--- a/third_party/aom/third_party/fastfeat/nonmax.c
+++ /dev/null
@@ -1,121 +0,0 @@
-// clang-format off
-#include <stdlib.h>
-#include "fast.h"
-
-
-#define Compare(X, Y) ((X)>=(Y))
-
-xy* nonmax_suppression(const xy* corners, const int* scores, int num_corners, int* ret_num_nonmax)
-{
- int num_nonmax=0;
- int last_row;
- int* row_start;
- int i, j;
- xy* ret_nonmax;
- const int sz = (int)num_corners;
-
- /*Point above points (roughly) to the pixel above the one of interest, if there
- is a feature there.*/
- int point_above = 0;
- int point_below = 0;
-
-
- if(num_corners < 1)
- {
- *ret_num_nonmax = 0;
- return 0;
- }
-
- ret_nonmax = (xy*)malloc(num_corners * sizeof(xy));
-
- /* Find where each row begins
- (the corners are output in raster scan order). A beginning of -1 signifies
- that there are no corners on that row. */
- last_row = corners[num_corners-1].y;
- row_start = (int*)malloc((last_row+1)*sizeof(int));
-
- for(i=0; i < last_row+1; i++)
- row_start[i] = -1;
-
- {
- int prev_row = -1;
- for(i=0; i< num_corners; i++)
- if(corners[i].y != prev_row)
- {
- row_start[corners[i].y] = i;
- prev_row = corners[i].y;
- }
- }
-
-
-
- for(i=0; i < sz; i++)
- {
- int score = scores[i];
- xy pos = corners[i];
-
- /*Check left */
- if(i > 0)
- if(corners[i-1].x == pos.x-1 && corners[i-1].y == pos.y && Compare(scores[i-1], score))
- continue;
-
- /*Check right*/
- if(i < (sz - 1))
- if(corners[i+1].x == pos.x+1 && corners[i+1].y == pos.y && Compare(scores[i+1], score))
- continue;
-
- /*Check above (if there is a valid row above)*/
- if(pos.y > 0)
- if (row_start[pos.y - 1] != -1)
- {
- /*Make sure that current point_above is one
- row above.*/
- if(corners[point_above].y < pos.y - 1)
- point_above = row_start[pos.y-1];
-
- /*Make point_above point to the first of the pixels above the current point,
- if it exists.*/
- for(; corners[point_above].y < pos.y && corners[point_above].x < pos.x - 1; point_above++)
- {}
-
-
- for(j=point_above; corners[j].y < pos.y && corners[j].x <= pos.x + 1; j++)
- {
- int x = corners[j].x;
- if( (x == pos.x - 1 || x ==pos.x || x == pos.x+1) && Compare(scores[j], score))
- goto cont;
- }
-
- }
-
- /*Check below (if there is anything below)*/
- if(pos.y >= 0)
- if (pos.y != last_row && row_start[pos.y + 1] != -1 && point_below < sz) /*Nothing below*/
- {
- if(corners[point_below].y < pos.y + 1)
- point_below = row_start[pos.y+1];
-
- /* Make point below point to one of the pixels belowthe current point, if it
- exists.*/
- for(; point_below < sz && corners[point_below].y == pos.y+1 && corners[point_below].x < pos.x - 1; point_below++)
- {}
-
- for(j=point_below; j < sz && corners[j].y == pos.y+1 && corners[j].x <= pos.x + 1; j++)
- {
- int x = corners[j].x;
- if( (x == pos.x - 1 || x ==pos.x || x == pos.x+1) && Compare(scores[j],score))
- goto cont;
- }
- }
-
- ret_nonmax[num_nonmax++] = corners[i];
-cont:
- ;
- }
-
- free(row_start);
- *ret_num_nonmax = num_nonmax;
- return ret_nonmax;
-}
-
-// clang-format on
diff --git a/third_party/aom/third_party/googletest/README.libaom b/third_party/aom/third_party/googletest/README.libaom
deleted file mode 100644
index 9784dd51b..000000000
--- a/third_party/aom/third_party/googletest/README.libaom
+++ /dev/null
@@ -1,26 +0,0 @@
-URL: https://github.com/google/googletest
-Version: 1.8.0
-License: BSD
-License File: LICENSE
-
-Description:
-Google's framework for writing C++ tests on a variety of platforms
-(Linux, Mac OS X, Windows, Windows CE, Symbian, etc). Based on the
-xUnit architecture. Supports automatic test discovery, a rich set of
-assertions, user-defined assertions, death tests, fatal and non-fatal
-failures, various options for running the tests, and XML test report
-generation.
-
-Local Modifications:
-- Remove everything but:
- googletest-release-1.8.0/googletest/
- cmake/
- include/
- src/
- CHANGES
- CMakelists.txt
- CONTRIBUTORS
- LICENSE
- README.md
-- Suppress unsigned overflow instrumentation in the LCG
- https://github.com/google/googletest/pull/1066
diff --git a/third_party/aom/third_party/googletest/gtest.mk b/third_party/aom/third_party/googletest/gtest.mk
deleted file mode 100644
index fc4dbdc24..000000000
--- a/third_party/aom/third_party/googletest/gtest.mk
+++ /dev/null
@@ -1 +0,0 @@
-GTEST_SRCS-yes += googletest/src/googletest/src/gtest-all.cc
diff --git a/third_party/aom/third_party/googletest/src/googletest/CHANGES b/third_party/aom/third_party/googletest/src/googletest/CHANGES
deleted file mode 100644
index 055213242..000000000
--- a/third_party/aom/third_party/googletest/src/googletest/CHANGES
+++ /dev/null
@@ -1,157 +0,0 @@
-Changes for 1.7.0:
-
-* New feature: death tests are supported on OpenBSD and in iOS
- simulator now.
-* New feature: Google Test now implements a protocol to allow
- a test runner to detect that a test program has exited
- prematurely and report it as a failure (before it would be
- falsely reported as a success if the exit code is 0).
-* New feature: Test::RecordProperty() can now be used outside of the
- lifespan of a test method, in which case it will be attributed to
- the current test case or the test program in the XML report.
-* New feature (potentially breaking): --gtest_list_tests now prints
- the type parameters and value parameters for each test.
-* Improvement: char pointers and char arrays are now escaped properly
- in failure messages.
-* Improvement: failure summary in XML reports now includes file and
- line information.
-* Improvement: the <testsuites> XML element now has a timestamp attribute.
-* Improvement: When --gtest_filter is specified, XML report now doesn't
- contain information about tests that are filtered out.
-* Fixed the bug where long --gtest_filter flag values are truncated in
- death tests.
-* Potentially breaking change: RUN_ALL_TESTS() is now implemented as a
- function instead of a macro in order to work better with Clang.
-* Compatibility fixes with C++ 11 and various platforms.
-* Bug/warning fixes.
-
-Changes for 1.6.0:
-
-* New feature: ADD_FAILURE_AT() for reporting a test failure at the
- given source location -- useful for writing testing utilities.
-* New feature: the universal value printer is moved from Google Mock
- to Google Test.
-* New feature: type parameters and value parameters are reported in
- the XML report now.
-* A gtest_disable_pthreads CMake option.
-* Colored output works in GNU Screen sessions now.
-* Parameters of value-parameterized tests are now printed in the
- textual output.
-* Failures from ad hoc test assertions run before RUN_ALL_TESTS() are
- now correctly reported.
-* Arguments of ASSERT_XY and EXPECT_XY no longer need to support << to
- ostream.
-* More complete handling of exceptions.
-* GTEST_ASSERT_XY can be used instead of ASSERT_XY in case the latter
- name is already used by another library.
-* --gtest_catch_exceptions is now true by default, allowing a test
- program to continue after an exception is thrown.
-* Value-parameterized test fixtures can now derive from Test and
- WithParamInterface<T> separately, easing conversion of legacy tests.
-* Death test messages are clearly marked to make them more
- distinguishable from other messages.
-* Compatibility fixes for Android, Google Native Client, MinGW, HP UX,
- PowerPC, Lucid autotools, libCStd, Sun C++, Borland C++ Builder (Code Gear),
- IBM XL C++ (Visual Age C++), and C++0x.
-* Bug fixes and implementation clean-ups.
-* Potentially incompatible changes: disables the harmful 'make install'
- command in autotools.
-
-Changes for 1.5.0:
-
- * New feature: assertions can be safely called in multiple threads
- where the pthreads library is available.
- * New feature: predicates used inside EXPECT_TRUE() and friends
- can now generate custom failure messages.
- * New feature: Google Test can now be compiled as a DLL.
- * New feature: fused source files are included.
- * New feature: prints help when encountering unrecognized Google Test flags.
- * Experimental feature: CMake build script (requires CMake 2.6.4+).
- * Experimental feature: the Pump script for meta programming.
- * double values streamed to an assertion are printed with enough precision
- to differentiate any two different values.
- * Google Test now works on Solaris and AIX.
- * Build and test script improvements.
- * Bug fixes and implementation clean-ups.
-
- Potentially breaking changes:
-
- * Stopped supporting VC++ 7.1 with exceptions disabled.
- * Dropped support for 'make install'.
-
-Changes for 1.4.0:
-
- * New feature: the event listener API
- * New feature: test shuffling
- * New feature: the XML report format is closer to junitreport and can
- be parsed by Hudson now.
- * New feature: when a test runs under Visual Studio, its failures are
- integrated in the IDE.
- * New feature: /MD(d) versions of VC++ projects.
- * New feature: elapsed time for the tests is printed by default.
- * New feature: comes with a TR1 tuple implementation such that Boost
- is no longer needed for Combine().
- * New feature: EXPECT_DEATH_IF_SUPPORTED macro and friends.
- * New feature: the Xcode project can now produce static gtest
- libraries in addition to a framework.
- * Compatibility fixes for Solaris, Cygwin, minGW, Windows Mobile,
- Symbian, gcc, and C++Builder.
- * Bug fixes and implementation clean-ups.
-
-Changes for 1.3.0:
-
- * New feature: death tests on Windows, Cygwin, and Mac.
- * New feature: ability to use Google Test assertions in other testing
- frameworks.
- * New feature: ability to run disabled test via
- --gtest_also_run_disabled_tests.
- * New feature: the --help flag for printing the usage.
- * New feature: access to Google Test flag values in user code.
- * New feature: a script that packs Google Test into one .h and one
- .cc file for easy deployment.
- * New feature: support for distributing test functions to multiple
- machines (requires support from the test runner).
- * Bug fixes and implementation clean-ups.
-
-Changes for 1.2.1:
-
- * Compatibility fixes for Linux IA-64 and IBM z/OS.
- * Added support for using Boost and other TR1 implementations.
- * Changes to the build scripts to support upcoming release of Google C++
- Mocking Framework.
- * Added Makefile to the distribution package.
- * Improved build instructions in README.
-
-Changes for 1.2.0:
-
- * New feature: value-parameterized tests.
- * New feature: the ASSERT/EXPECT_(NON)FATAL_FAILURE(_ON_ALL_THREADS)
- macros.
- * Changed the XML report format to match JUnit/Ant's.
- * Added tests to the Xcode project.
- * Added scons/SConscript for building with SCons.
- * Added src/gtest-all.cc for building Google Test from a single file.
- * Fixed compatibility with Solaris and z/OS.
- * Enabled running Python tests on systems with python 2.3 installed,
- e.g. Mac OS X 10.4.
- * Bug fixes.
-
-Changes for 1.1.0:
-
- * New feature: type-parameterized tests.
- * New feature: exception assertions.
- * New feature: printing elapsed time of tests.
- * Improved the robustness of death tests.
- * Added an Xcode project and samples.
- * Adjusted the output format on Windows to be understandable by Visual Studio.
- * Minor bug fixes.
-
-Changes for 1.0.1:
-
- * Added project files for Visual Studio 7.1.
- * Fixed issues with compiling on Mac OS X.
- * Fixed issues with compiling on Cygwin.
-
-Changes for 1.0.0:
-
- * Initial Open Source release of Google Test
diff --git a/third_party/aom/third_party/googletest/src/googletest/CMakeLists.txt b/third_party/aom/third_party/googletest/src/googletest/CMakeLists.txt
deleted file mode 100644
index 621d0f042..000000000
--- a/third_party/aom/third_party/googletest/src/googletest/CMakeLists.txt
+++ /dev/null
@@ -1,286 +0,0 @@
-########################################################################
-# CMake build script for Google Test.
-#
-# To run the tests for Google Test itself on Linux, use 'make test' or
-# ctest. You can select which tests to run using 'ctest -R regex'.
-# For more options, run 'ctest --help'.
-
-# BUILD_SHARED_LIBS is a standard CMake variable, but we declare it here to
-# make it prominent in the GUI.
-option(BUILD_SHARED_LIBS "Build shared libraries (DLLs)." OFF)
-
-# When other libraries are using a shared version of runtime libraries,
-# Google Test also has to use one.
-option(
- gtest_force_shared_crt
- "Use shared (DLL) run-time lib even when Google Test is built as static lib."
- OFF)
-
-option(gtest_build_tests "Build all of gtest's own tests." OFF)
-
-option(gtest_build_samples "Build gtest's sample programs." OFF)
-
-option(gtest_disable_pthreads "Disable uses of pthreads in gtest." OFF)
-
-option(
- gtest_hide_internal_symbols
- "Build gtest with internal symbols hidden in shared libraries."
- OFF)
-
-# Defines pre_project_set_up_hermetic_build() and set_up_hermetic_build().
-include(cmake/hermetic_build.cmake OPTIONAL)
-
-if (COMMAND pre_project_set_up_hermetic_build)
- pre_project_set_up_hermetic_build()
-endif()
-
-########################################################################
-#
-# Project-wide settings
-
-# Name of the project.
-#
-# CMake files in this project can refer to the root source directory
-# as ${gtest_SOURCE_DIR} and to the root binary directory as
-# ${gtest_BINARY_DIR}.
-# Language "C" is required for find_package(Threads).
-project(gtest CXX C)
-cmake_minimum_required(VERSION 2.6.2)
-
-if (COMMAND set_up_hermetic_build)
- set_up_hermetic_build()
-endif()
-
-if (gtest_hide_internal_symbols)
- set(CMAKE_CXX_VISIBILITY_PRESET hidden)
- set(CMAKE_VISIBILITY_INLINES_HIDDEN 1)
-endif()
-
-# Define helper functions and macros used by Google Test.
-include(cmake/internal_utils.cmake)
-
-config_compiler_and_linker() # Defined in internal_utils.cmake.
-
-# Where Google Test's .h files can be found.
-include_directories(
- ${gtest_SOURCE_DIR}/include
- ${gtest_SOURCE_DIR})
-
-# Where Google Test's libraries can be found.
-link_directories(${gtest_BINARY_DIR}/src)
-
-# Summary of tuple support for Microsoft Visual Studio:
-# Compiler version(MS) version(cmake) Support
-# ---------- ----------- -------------- -----------------------------
-# <= VS 2010 <= 10 <= 1600 Use Google Tests's own tuple.
-# VS 2012 11 1700 std::tr1::tuple + _VARIADIC_MAX=10
-# VS 2013 12 1800 std::tr1::tuple
-if (MSVC AND MSVC_VERSION EQUAL 1700)
- add_definitions(/D _VARIADIC_MAX=10)
-endif()
-
-########################################################################
-#
-# Defines the gtest & gtest_main libraries. User tests should link
-# with one of them.
-
-# Google Test libraries. We build them using more strict warnings than what
-# are used for other targets, to ensure that gtest can be compiled by a user
-# aggressive about warnings.
-cxx_library(gtest "${cxx_strict}" src/gtest-all.cc)
-cxx_library(gtest_main "${cxx_strict}" src/gtest_main.cc)
-target_link_libraries(gtest_main gtest)
-
-# If the CMake version supports it, attach header directory information
-# to the targets for when we are part of a parent build (ie being pulled
-# in via add_subdirectory() rather than being a standalone build).
-if (DEFINED CMAKE_VERSION AND NOT "${CMAKE_VERSION}" VERSION_LESS "2.8.11")
- target_include_directories(gtest INTERFACE "${gtest_SOURCE_DIR}/include")
- target_include_directories(gtest_main INTERFACE "${gtest_SOURCE_DIR}/include")
-endif()
-
-########################################################################
-#
-# Install rules
-install(TARGETS gtest gtest_main
- DESTINATION lib)
-install(DIRECTORY ${gtest_SOURCE_DIR}/include/gtest
- DESTINATION include)
-
-########################################################################
-#
-# Samples on how to link user tests with gtest or gtest_main.
-#
-# They are not built by default. To build them, set the
-# gtest_build_samples option to ON. You can do it by running ccmake
-# or specifying the -Dgtest_build_samples=ON flag when running cmake.
-
-if (gtest_build_samples)
- cxx_executable(sample1_unittest samples gtest_main samples/sample1.cc)
- cxx_executable(sample2_unittest samples gtest_main samples/sample2.cc)
- cxx_executable(sample3_unittest samples gtest_main)
- cxx_executable(sample4_unittest samples gtest_main samples/sample4.cc)
- cxx_executable(sample5_unittest samples gtest_main samples/sample1.cc)
- cxx_executable(sample6_unittest samples gtest_main)
- cxx_executable(sample7_unittest samples gtest_main)
- cxx_executable(sample8_unittest samples gtest_main)
- cxx_executable(sample9_unittest samples gtest)
- cxx_executable(sample10_unittest samples gtest)
-endif()
-
-########################################################################
-#
-# Google Test's own tests.
-#
-# You can skip this section if you aren't interested in testing
-# Google Test itself.
-#
-# The tests are not built by default. To build them, set the
-# gtest_build_tests option to ON. You can do it by running ccmake
-# or specifying the -Dgtest_build_tests=ON flag when running cmake.
-
-if (gtest_build_tests)
- # This must be set in the root directory for the tests to be run by
- # 'make test' or ctest.
- enable_testing()
-
- ############################################################
- # C++ tests built with standard compiler flags.
-
- cxx_test(gtest-death-test_test gtest_main)
- cxx_test(gtest_environment_test gtest)
- cxx_test(gtest-filepath_test gtest_main)
- cxx_test(gtest-linked_ptr_test gtest_main)
- cxx_test(gtest-listener_test gtest_main)
- cxx_test(gtest_main_unittest gtest_main)
- cxx_test(gtest-message_test gtest_main)
- cxx_test(gtest_no_test_unittest gtest)
- cxx_test(gtest-options_test gtest_main)
- cxx_test(gtest-param-test_test gtest
- test/gtest-param-test2_test.cc)
- cxx_test(gtest-port_test gtest_main)
- cxx_test(gtest_pred_impl_unittest gtest_main)
- cxx_test(gtest_premature_exit_test gtest
- test/gtest_premature_exit_test.cc)
- cxx_test(gtest-printers_test gtest_main)
- cxx_test(gtest_prod_test gtest_main
- test/production.cc)
- cxx_test(gtest_repeat_test gtest)
- cxx_test(gtest_sole_header_test gtest_main)
- cxx_test(gtest_stress_test gtest)
- cxx_test(gtest-test-part_test gtest_main)
- cxx_test(gtest_throw_on_failure_ex_test gtest)
- cxx_test(gtest-typed-test_test gtest_main
- test/gtest-typed-test2_test.cc)
- cxx_test(gtest_unittest gtest_main)
- cxx_test(gtest-unittest-api_test gtest)
-
- ############################################################
- # C++ tests built with non-standard compiler flags.
-
- # MSVC 7.1 does not support STL with exceptions disabled.
- if (NOT MSVC OR MSVC_VERSION GREATER 1310)
- cxx_library(gtest_no_exception "${cxx_no_exception}"
- src/gtest-all.cc)
- cxx_library(gtest_main_no_exception "${cxx_no_exception}"
- src/gtest-all.cc src/gtest_main.cc)
- endif()
- cxx_library(gtest_main_no_rtti "${cxx_no_rtti}"
- src/gtest-all.cc src/gtest_main.cc)
-
- cxx_test_with_flags(gtest-death-test_ex_nocatch_test
- "${cxx_exception} -DGTEST_ENABLE_CATCH_EXCEPTIONS_=0"
- gtest test/gtest-death-test_ex_test.cc)
- cxx_test_with_flags(gtest-death-test_ex_catch_test
- "${cxx_exception} -DGTEST_ENABLE_CATCH_EXCEPTIONS_=1"
- gtest test/gtest-death-test_ex_test.cc)
-
- cxx_test_with_flags(gtest_no_rtti_unittest "${cxx_no_rtti}"
- gtest_main_no_rtti test/gtest_unittest.cc)
-
- cxx_shared_library(gtest_dll "${cxx_default}"
- src/gtest-all.cc src/gtest_main.cc)
-
- cxx_executable_with_flags(gtest_dll_test_ "${cxx_default}"
- gtest_dll test/gtest_all_test.cc)
- set_target_properties(gtest_dll_test_
- PROPERTIES
- COMPILE_DEFINITIONS "GTEST_LINKED_AS_SHARED_LIBRARY=1")
-
- if (NOT MSVC OR MSVC_VERSION LESS 1600) # 1600 is Visual Studio 2010.
- # Visual Studio 2010, 2012, and 2013 define symbols in std::tr1 that
- # conflict with our own definitions. Therefore using our own tuple does not
- # work on those compilers.
- cxx_library(gtest_main_use_own_tuple "${cxx_use_own_tuple}"
- src/gtest-all.cc src/gtest_main.cc)
-
- cxx_test_with_flags(gtest-tuple_test "${cxx_use_own_tuple}"
- gtest_main_use_own_tuple test/gtest-tuple_test.cc)
-
- cxx_test_with_flags(gtest_use_own_tuple_test "${cxx_use_own_tuple}"
- gtest_main_use_own_tuple
- test/gtest-param-test_test.cc test/gtest-param-test2_test.cc)
- endif()
-
- ############################################################
- # Python tests.
-
- cxx_executable(gtest_break_on_failure_unittest_ test gtest)
- py_test(gtest_break_on_failure_unittest)
-
- # Visual Studio .NET 2003 does not support STL with exceptions disabled.
- if (NOT MSVC OR MSVC_VERSION GREATER 1310) # 1310 is Visual Studio .NET 2003
- cxx_executable_with_flags(
- gtest_catch_exceptions_no_ex_test_
- "${cxx_no_exception}"
- gtest_main_no_exception
- test/gtest_catch_exceptions_test_.cc)
- endif()
-
- cxx_executable_with_flags(
- gtest_catch_exceptions_ex_test_
- "${cxx_exception}"
- gtest_main
- test/gtest_catch_exceptions_test_.cc)
- py_test(gtest_catch_exceptions_test)
-
- cxx_executable(gtest_color_test_ test gtest)
- py_test(gtest_color_test)
-
- cxx_executable(gtest_env_var_test_ test gtest)
- py_test(gtest_env_var_test)
-
- cxx_executable(gtest_filter_unittest_ test gtest)
- py_test(gtest_filter_unittest)
-
- cxx_executable(gtest_help_test_ test gtest_main)
- py_test(gtest_help_test)
-
- cxx_executable(gtest_list_tests_unittest_ test gtest)
- py_test(gtest_list_tests_unittest)
-
- cxx_executable(gtest_output_test_ test gtest)
- py_test(gtest_output_test)
-
- cxx_executable(gtest_shuffle_test_ test gtest)
- py_test(gtest_shuffle_test)
-
- # MSVC 7.1 does not support STL with exceptions disabled.
- if (NOT MSVC OR MSVC_VERSION GREATER 1310)
- cxx_executable(gtest_throw_on_failure_test_ test gtest_no_exception)
- set_target_properties(gtest_throw_on_failure_test_
- PROPERTIES
- COMPILE_FLAGS "${cxx_no_exception}")
- py_test(gtest_throw_on_failure_test)
- endif()
-
- cxx_executable(gtest_uninitialized_test_ test gtest)
- py_test(gtest_uninitialized_test)
-
- cxx_executable(gtest_xml_outfile1_test_ test gtest_main)
- cxx_executable(gtest_xml_outfile2_test_ test gtest_main)
- py_test(gtest_xml_outfiles_test)
-
- cxx_executable(gtest_xml_output_unittest_ test gtest)
- py_test(gtest_xml_output_unittest)
-endif()
diff --git a/third_party/aom/third_party/googletest/src/googletest/CONTRIBUTORS b/third_party/aom/third_party/googletest/src/googletest/CONTRIBUTORS
deleted file mode 100644
index feae2fc04..000000000
--- a/third_party/aom/third_party/googletest/src/googletest/CONTRIBUTORS
+++ /dev/null
@@ -1,37 +0,0 @@
-# This file contains a list of people who've made non-trivial
-# contribution to the Google C++ Testing Framework project. People
-# who commit code to the project are encouraged to add their names
-# here. Please keep the list sorted by first names.
-
-Ajay Joshi <jaj@google.com>
-Balázs Dán <balazs.dan@gmail.com>
-Bharat Mediratta <bharat@menalto.com>
-Chandler Carruth <chandlerc@google.com>
-Chris Prince <cprince@google.com>
-Chris Taylor <taylorc@google.com>
-Dan Egnor <egnor@google.com>
-Eric Roman <eroman@chromium.org>
-Hady Zalek <hady.zalek@gmail.com>
-Jeffrey Yasskin <jyasskin@google.com>
-Jói Sigurðsson <joi@google.com>
-Keir Mierle <mierle@gmail.com>
-Keith Ray <keith.ray@gmail.com>
-Kenton Varda <kenton@google.com>
-Manuel Klimek <klimek@google.com>
-Markus Heule <markus.heule@gmail.com>
-Mika Raento <mikie@iki.fi>
-Miklós Fazekas <mfazekas@szemafor.com>
-Pasi Valminen <pasi.valminen@gmail.com>
-Patrick Hanna <phanna@google.com>
-Patrick Riley <pfr@google.com>
-Peter Kaminski <piotrk@google.com>
-Preston Jackson <preston.a.jackson@gmail.com>
-Rainer Klaffenboeck <rainer.klaffenboeck@dynatrace.com>
-Russ Cox <rsc@google.com>
-Russ Rufer <russ@pentad.com>
-Sean Mcafee <eefacm@gmail.com>
-Sigurður Ásgeirsson <siggi@google.com>
-Tracy Bialik <tracy@pentad.com>
-Vadim Berman <vadimb@google.com>
-Vlad Losev <vladl@google.com>
-Zhanyong Wan <wan@google.com>
diff --git a/third_party/aom/third_party/googletest/src/googletest/LICENSE b/third_party/aom/third_party/googletest/src/googletest/LICENSE
deleted file mode 100644
index 1941a11f8..000000000
--- a/third_party/aom/third_party/googletest/src/googletest/LICENSE
+++ /dev/null
@@ -1,28 +0,0 @@
-Copyright 2008, Google Inc.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
- * Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above
-copyright notice, this list of conditions and the following disclaimer
-in the documentation and/or other materials provided with the
-distribution.
- * Neither the name of Google Inc. nor the names of its
-contributors may be used to endorse or promote products derived from
-this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/third_party/aom/third_party/googletest/src/googletest/README.md b/third_party/aom/third_party/googletest/src/googletest/README.md
deleted file mode 100644
index edd440805..000000000
--- a/third_party/aom/third_party/googletest/src/googletest/README.md
+++ /dev/null
@@ -1,280 +0,0 @@
-
-### Generic Build Instructions ###
-
-#### Setup ####
-
-To build Google Test and your tests that use it, you need to tell your
-build system where to find its headers and source files. The exact
-way to do it depends on which build system you use, and is usually
-straightforward.
-
-#### Build ####
-
-Suppose you put Google Test in directory `${GTEST_DIR}`. To build it,
-create a library build target (or a project as called by Visual Studio
-and Xcode) to compile
-
- ${GTEST_DIR}/src/gtest-all.cc
-
-with `${GTEST_DIR}/include` in the system header search path and `${GTEST_DIR}`
-in the normal header search path. Assuming a Linux-like system and gcc,
-something like the following will do:
-
- g++ -isystem ${GTEST_DIR}/include -I${GTEST_DIR} \
- -pthread -c ${GTEST_DIR}/src/gtest-all.cc
- ar -rv libgtest.a gtest-all.o
-
-(We need `-pthread` as Google Test uses threads.)
-
-Next, you should compile your test source file with
-`${GTEST_DIR}/include` in the system header search path, and link it
-with gtest and any other necessary libraries:
-
- g++ -isystem ${GTEST_DIR}/include -pthread path/to/your_test.cc libgtest.a \
- -o your_test
-
-As an example, the make/ directory contains a Makefile that you can
-use to build Google Test on systems where GNU make is available
-(e.g. Linux, Mac OS X, and Cygwin). It doesn't try to build Google
-Test's own tests. Instead, it just builds the Google Test library and
-a sample test. You can use it as a starting point for your own build
-script.
-
-If the default settings are correct for your environment, the
-following commands should succeed:
-
- cd ${GTEST_DIR}/make
- make
- ./sample1_unittest
-
-If you see errors, try to tweak the contents of `make/Makefile` to make
-them go away. There are instructions in `make/Makefile` on how to do
-it.
-
-### Using CMake ###
-
-Google Test comes with a CMake build script (
-[CMakeLists.txt](CMakeLists.txt)) that can be used on a wide range of platforms ("C" stands for
-cross-platform.). If you don't have CMake installed already, you can
-download it for free from <http://www.cmake.org/>.
-
-CMake works by generating native makefiles or build projects that can
-be used in the compiler environment of your choice. The typical
-workflow starts with:
-
- mkdir mybuild # Create a directory to hold the build output.
- cd mybuild
- cmake ${GTEST_DIR} # Generate native build scripts.
-
-If you want to build Google Test's samples, you should replace the
-last command with
-
- cmake -Dgtest_build_samples=ON ${GTEST_DIR}
-
-If you are on a \*nix system, you should now see a Makefile in the
-current directory. Just type 'make' to build gtest.
-
-If you use Windows and have Visual Studio installed, a `gtest.sln` file
-and several `.vcproj` files will be created. You can then build them
-using Visual Studio.
-
-On Mac OS X with Xcode installed, a `.xcodeproj` file will be generated.
-
-### Legacy Build Scripts ###
-
-Before settling on CMake, we have been providing hand-maintained build
-projects/scripts for Visual Studio, Xcode, and Autotools. While we
-continue to provide them for convenience, they are not actively
-maintained any more. We highly recommend that you follow the
-instructions in the previous two sections to integrate Google Test
-with your existing build system.
-
-If you still need to use the legacy build scripts, here's how:
-
-The msvc\ folder contains two solutions with Visual C++ projects.
-Open the `gtest.sln` or `gtest-md.sln` file using Visual Studio, and you
-are ready to build Google Test the same way you build any Visual
-Studio project. Files that have names ending with -md use DLL
-versions of Microsoft runtime libraries (the /MD or the /MDd compiler
-option). Files without that suffix use static versions of the runtime
-libraries (the /MT or the /MTd option). Please note that one must use
-the same option to compile both gtest and the test code. If you use
-Visual Studio 2005 or above, we recommend the -md version as /MD is
-the default for new projects in these versions of Visual Studio.
-
-On Mac OS X, open the `gtest.xcodeproj` in the `xcode/` folder using
-Xcode. Build the "gtest" target. The universal binary framework will
-end up in your selected build directory (selected in the Xcode
-"Preferences..." -> "Building" pane and defaults to xcode/build).
-Alternatively, at the command line, enter:
-
- xcodebuild
-
-This will build the "Release" configuration of gtest.framework in your
-default build location. See the "xcodebuild" man page for more
-information about building different configurations and building in
-different locations.
-
-If you wish to use the Google Test Xcode project with Xcode 4.x and
-above, you need to either:
-
- * update the SDK configuration options in xcode/Config/General.xconfig.
- Comment options `SDKROOT`, `MACOS_DEPLOYMENT_TARGET`, and `GCC_VERSION`. If
- you choose this route you lose the ability to target earlier versions
- of MacOS X.
- * Install an SDK for an earlier version. This doesn't appear to be
- supported by Apple, but has been reported to work
- (http://stackoverflow.com/questions/5378518).
-
-### Tweaking Google Test ###
-
-Google Test can be used in diverse environments. The default
-configuration may not work (or may not work well) out of the box in
-some environments. However, you can easily tweak Google Test by
-defining control macros on the compiler command line. Generally,
-these macros are named like `GTEST_XYZ` and you define them to either 1
-or 0 to enable or disable a certain feature.
-
-We list the most frequently used macros below. For a complete list,
-see file [include/gtest/internal/gtest-port.h](include/gtest/internal/gtest-port.h).
-
-### Choosing a TR1 Tuple Library ###
-
-Some Google Test features require the C++ Technical Report 1 (TR1)
-tuple library, which is not yet available with all compilers. The
-good news is that Google Test implements a subset of TR1 tuple that's
-enough for its own need, and will automatically use this when the
-compiler doesn't provide TR1 tuple.
-
-Usually you don't need to care about which tuple library Google Test
-uses. However, if your project already uses TR1 tuple, you need to
-tell Google Test to use the same TR1 tuple library the rest of your
-project uses, or the two tuple implementations will clash. To do
-that, add
-
- -DGTEST_USE_OWN_TR1_TUPLE=0
-
-to the compiler flags while compiling Google Test and your tests. If
-you want to force Google Test to use its own tuple library, just add
-
- -DGTEST_USE_OWN_TR1_TUPLE=1
-
-to the compiler flags instead.
-
-If you don't want Google Test to use tuple at all, add
-
- -DGTEST_HAS_TR1_TUPLE=0
-
-and all features using tuple will be disabled.
-
-### Multi-threaded Tests ###
-
-Google Test is thread-safe where the pthread library is available.
-After `#include "gtest/gtest.h"`, you can check the `GTEST_IS_THREADSAFE`
-macro to see whether this is the case (yes if the macro is `#defined` to
-1, no if it's undefined.).
-
-If Google Test doesn't correctly detect whether pthread is available
-in your environment, you can force it with
-
- -DGTEST_HAS_PTHREAD=1
-
-or
-
- -DGTEST_HAS_PTHREAD=0
-
-When Google Test uses pthread, you may need to add flags to your
-compiler and/or linker to select the pthread library, or you'll get
-link errors. If you use the CMake script or the deprecated Autotools
-script, this is taken care of for you. If you use your own build
-script, you'll need to read your compiler and linker's manual to
-figure out what flags to add.
-
-### As a Shared Library (DLL) ###
-
-Google Test is compact, so most users can build and link it as a
-static library for the simplicity. You can choose to use Google Test
-as a shared library (known as a DLL on Windows) if you prefer.
-
-To compile *gtest* as a shared library, add
-
- -DGTEST_CREATE_SHARED_LIBRARY=1
-
-to the compiler flags. You'll also need to tell the linker to produce
-a shared library instead - consult your linker's manual for how to do
-it.
-
-To compile your *tests* that use the gtest shared library, add
-
- -DGTEST_LINKED_AS_SHARED_LIBRARY=1
-
-to the compiler flags.
-
-Note: while the above steps aren't technically necessary today when
-using some compilers (e.g. GCC), they may become necessary in the
-future, if we decide to improve the speed of loading the library (see
-<http://gcc.gnu.org/wiki/Visibility> for details). Therefore you are
-recommended to always add the above flags when using Google Test as a
-shared library. Otherwise a future release of Google Test may break
-your build script.
-
-### Avoiding Macro Name Clashes ###
-
-In C++, macros don't obey namespaces. Therefore two libraries that
-both define a macro of the same name will clash if you `#include` both
-definitions. In case a Google Test macro clashes with another
-library, you can force Google Test to rename its macro to avoid the
-conflict.
-
-Specifically, if both Google Test and some other code define macro
-FOO, you can add
-
- -DGTEST_DONT_DEFINE_FOO=1
-
-to the compiler flags to tell Google Test to change the macro's name
-from `FOO` to `GTEST_FOO`. Currently `FOO` can be `FAIL`, `SUCCEED`,
-or `TEST`. For example, with `-DGTEST_DONT_DEFINE_TEST=1`, you'll
-need to write
-
- GTEST_TEST(SomeTest, DoesThis) { ... }
-
-instead of
-
- TEST(SomeTest, DoesThis) { ... }
-
-in order to define a test.
-
-## Developing Google Test ##
-
-This section discusses how to make your own changes to Google Test.
-
-### Testing Google Test Itself ###
-
-To make sure your changes work as intended and don't break existing
-functionality, you'll want to compile and run Google Test's own tests.
-For that you can use CMake:
-
- mkdir mybuild
- cd mybuild
- cmake -Dgtest_build_tests=ON ${GTEST_DIR}
-
-Make sure you have Python installed, as some of Google Test's tests
-are written in Python. If the cmake command complains about not being
-able to find Python (`Could NOT find PythonInterp (missing:
-PYTHON_EXECUTABLE)`), try telling it explicitly where your Python
-executable can be found:
-
- cmake -DPYTHON_EXECUTABLE=path/to/python -Dgtest_build_tests=ON ${GTEST_DIR}
-
-Next, you can build Google Test and all of its own tests. On \*nix,
-this is usually done by 'make'. To run the tests, do
-
- make test
-
-All tests should pass.
-
-Normally you don't need to worry about regenerating the source files,
-unless you need to modify them. In that case, you should modify the
-corresponding .pump files instead and run the pump.py Python script to
-regenerate them. You can find pump.py in the [scripts/](scripts/) directory.
-Read the [Pump manual](docs/PumpManual.md) for how to use it.
diff --git a/third_party/aom/third_party/googletest/src/googletest/cmake/internal_utils.cmake b/third_party/aom/third_party/googletest/src/googletest/cmake/internal_utils.cmake
deleted file mode 100644
index 777b91ed4..000000000
--- a/third_party/aom/third_party/googletest/src/googletest/cmake/internal_utils.cmake
+++ /dev/null
@@ -1,254 +0,0 @@
-# Defines functions and macros useful for building Google Test and
-# Google Mock.
-#
-# Note:
-#
-# - This file will be run twice when building Google Mock (once via
-# Google Test's CMakeLists.txt, and once via Google Mock's).
-# Therefore it shouldn't have any side effects other than defining
-# the functions and macros.
-#
-# - The functions/macros defined in this file may depend on Google
-# Test and Google Mock's option() definitions, and thus must be
-# called *after* the options have been defined.
-
-# Tweaks CMake's default compiler/linker settings to suit Google Test's needs.
-#
-# This must be a macro(), as inside a function string() can only
-# update variables in the function scope.
-macro(fix_default_compiler_settings_)
- if (MSVC)
- # For MSVC, CMake sets certain flags to defaults we want to override.
- # This replacement code is taken from sample in the CMake Wiki at
- # http://www.cmake.org/Wiki/CMake_FAQ#Dynamic_Replace.
- foreach (flag_var
- CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
- CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
- if (NOT BUILD_SHARED_LIBS AND NOT gtest_force_shared_crt)
- # When Google Test is built as a shared library, it should also use
- # shared runtime libraries. Otherwise, it may end up with multiple
- # copies of runtime library data in different modules, resulting in
- # hard-to-find crashes. When it is built as a static library, it is
- # preferable to use CRT as static libraries, as we don't have to rely
- # on CRT DLLs being available. CMake always defaults to using shared
- # CRT libraries, so we override that default here.
- string(REPLACE "/MD" "-MT" ${flag_var} "${${flag_var}}")
- endif()
-
- # We prefer more strict warning checking for building Google Test.
- # Replaces /W3 with /W4 in defaults.
- string(REPLACE "/W3" "/W4" ${flag_var} "${${flag_var}}")
- endforeach()
- endif()
-endmacro()
-
-# Defines the compiler/linker flags used to build Google Test and
-# Google Mock. You can tweak these definitions to suit your need. A
-# variable's value is empty before it's explicitly assigned to.
-macro(config_compiler_and_linker)
- if (NOT gtest_disable_pthreads)
- # Defines CMAKE_USE_PTHREADS_INIT and CMAKE_THREAD_LIBS_INIT.
- find_package(Threads)
- endif()
-
- fix_default_compiler_settings_()
- if (MSVC)
- # Newlines inside flags variables break CMake's NMake generator.
- # TODO(vladl@google.com): Add -RTCs and -RTCu to debug builds.
- set(cxx_base_flags "-GS -W4 -WX -wd4251 -wd4275 -nologo -J -Zi")
- if (MSVC_VERSION LESS 1400) # 1400 is Visual Studio 2005
- # Suppress spurious warnings MSVC 7.1 sometimes issues.
- # Forcing value to bool.
- set(cxx_base_flags "${cxx_base_flags} -wd4800")
- # Copy constructor and assignment operator could not be generated.
- set(cxx_base_flags "${cxx_base_flags} -wd4511 -wd4512")
- # Compatibility warnings not applicable to Google Test.
- # Resolved overload was found by argument-dependent lookup.
- set(cxx_base_flags "${cxx_base_flags} -wd4675")
- endif()
- if (MSVC_VERSION LESS 1500) # 1500 is Visual Studio 2008
- # Conditional expression is constant.
- # When compiling with /W4, we get several instances of C4127
- # (Conditional expression is constant). In our code, we disable that
- # warning on a case-by-case basis. However, on Visual Studio 2005,
- # the warning fires on std::list. Therefore on that compiler and earlier,
- # we disable the warning project-wide.
- set(cxx_base_flags "${cxx_base_flags} -wd4127")
- endif()
- if (NOT (MSVC_VERSION LESS 1700)) # 1700 is Visual Studio 2012.
- # Suppress "unreachable code" warning on VS 2012 and later.
- # http://stackoverflow.com/questions/3232669 explains the issue.
- set(cxx_base_flags "${cxx_base_flags} -wd4702")
- endif()
- if (NOT (MSVC_VERSION GREATER 1900)) # 1900 is Visual Studio 2015
- # BigObj required for tests.
- set(cxx_base_flags "${cxx_base_flags} -bigobj")
- endif()
-
- set(cxx_base_flags "${cxx_base_flags} -D_UNICODE -DUNICODE -DWIN32 -D_WIN32")
- set(cxx_base_flags "${cxx_base_flags} -DSTRICT -DWIN32_LEAN_AND_MEAN")
- set(cxx_exception_flags "-EHsc -D_HAS_EXCEPTIONS=1")
- set(cxx_no_exception_flags "-D_HAS_EXCEPTIONS=0")
- set(cxx_no_rtti_flags "-GR-")
- elseif (CMAKE_COMPILER_IS_GNUCXX)
- set(cxx_base_flags "-Wall -Wshadow")
- set(cxx_exception_flags "-fexceptions")
- set(cxx_no_exception_flags "-fno-exceptions")
- # Until version 4.3.2, GCC doesn't define a macro to indicate
- # whether RTTI is enabled. Therefore we define GTEST_HAS_RTTI
- # explicitly.
- set(cxx_no_rtti_flags "-fno-rtti -DGTEST_HAS_RTTI=0")
- set(cxx_strict_flags
- "-Wextra -Wno-unused-parameter -Wno-missing-field-initializers")
- elseif (CMAKE_CXX_COMPILER_ID STREQUAL "SunPro")
- set(cxx_exception_flags "-features=except")
- # Sun Pro doesn't provide macros to indicate whether exceptions and
- # RTTI are enabled, so we define GTEST_HAS_* explicitly.
- set(cxx_no_exception_flags "-features=no%except -DGTEST_HAS_EXCEPTIONS=0")
- set(cxx_no_rtti_flags "-features=no%rtti -DGTEST_HAS_RTTI=0")
- elseif (CMAKE_CXX_COMPILER_ID STREQUAL "VisualAge" OR
- CMAKE_CXX_COMPILER_ID STREQUAL "XL")
- # CMake 2.8 changes Visual Age's compiler ID to "XL".
- set(cxx_exception_flags "-qeh")
- set(cxx_no_exception_flags "-qnoeh")
- # Until version 9.0, Visual Age doesn't define a macro to indicate
- # whether RTTI is enabled. Therefore we define GTEST_HAS_RTTI
- # explicitly.
- set(cxx_no_rtti_flags "-qnortti -DGTEST_HAS_RTTI=0")
- elseif (CMAKE_CXX_COMPILER_ID STREQUAL "HP")
- set(cxx_base_flags "-AA -mt")
- set(cxx_exception_flags "-DGTEST_HAS_EXCEPTIONS=1")
- set(cxx_no_exception_flags "+noeh -DGTEST_HAS_EXCEPTIONS=0")
- # RTTI can not be disabled in HP aCC compiler.
- set(cxx_no_rtti_flags "")
- endif()
-
- if (CMAKE_USE_PTHREADS_INIT) # The pthreads library is available and allowed.
- set(cxx_base_flags "${cxx_base_flags} -DGTEST_HAS_PTHREAD=1")
- else()
- set(cxx_base_flags "${cxx_base_flags} -DGTEST_HAS_PTHREAD=0")
- endif()
-
- # For building gtest's own tests and samples.
- set(cxx_exception "${CMAKE_CXX_FLAGS} ${cxx_base_flags} ${cxx_exception_flags}")
- set(cxx_no_exception
- "${CMAKE_CXX_FLAGS} ${cxx_base_flags} ${cxx_no_exception_flags}")
- set(cxx_default "${cxx_exception}")
- set(cxx_no_rtti "${cxx_default} ${cxx_no_rtti_flags}")
- set(cxx_use_own_tuple "${cxx_default} -DGTEST_USE_OWN_TR1_TUPLE=1")
-
- # For building the gtest libraries.
- set(cxx_strict "${cxx_default} ${cxx_strict_flags}")
-endmacro()
-
-# Defines the gtest & gtest_main libraries. User tests should link
-# with one of them.
-function(cxx_library_with_type name type cxx_flags)
- # type can be either STATIC or SHARED to denote a static or shared library.
- # ARGN refers to additional arguments after 'cxx_flags'.
- add_library(${name} ${type} ${ARGN})
- set_target_properties(${name}
- PROPERTIES
- COMPILE_FLAGS "${cxx_flags}")
- if (BUILD_SHARED_LIBS OR type STREQUAL "SHARED")
- set_target_properties(${name}
- PROPERTIES
- COMPILE_DEFINITIONS "GTEST_CREATE_SHARED_LIBRARY=1")
- endif()
- if (CMAKE_USE_PTHREADS_INIT)
- target_link_libraries(${name} ${CMAKE_THREAD_LIBS_INIT})
- endif()
-endfunction()
-
-########################################################################
-#
-# Helper functions for creating build targets.
-
-function(cxx_shared_library name cxx_flags)
- cxx_library_with_type(${name} SHARED "${cxx_flags}" ${ARGN})
-endfunction()
-
-function(cxx_library name cxx_flags)
- cxx_library_with_type(${name} "" "${cxx_flags}" ${ARGN})
-endfunction()
-
-# cxx_executable_with_flags(name cxx_flags libs srcs...)
-#
-# creates a named C++ executable that depends on the given libraries and
-# is built from the given source files with the given compiler flags.
-function(cxx_executable_with_flags name cxx_flags libs)
- add_executable(${name} ${ARGN})
- if (cxx_flags)
- set_target_properties(${name}
- PROPERTIES
- COMPILE_FLAGS "${cxx_flags}")
- endif()
- if (BUILD_SHARED_LIBS)
- set_target_properties(${name}
- PROPERTIES
- COMPILE_DEFINITIONS "GTEST_LINKED_AS_SHARED_LIBRARY=1")
- endif()
- # To support mixing linking in static and dynamic libraries, link each
- # library in with an extra call to target_link_libraries.
- foreach (lib "${libs}")
- target_link_libraries(${name} ${lib})
- endforeach()
-endfunction()
-
-# cxx_executable(name dir lib srcs...)
-#
-# creates a named target that depends on the given libs and is built
-# from the given source files. dir/name.cc is implicitly included in
-# the source file list.
-function(cxx_executable name dir libs)
- cxx_executable_with_flags(
- ${name} "${cxx_default}" "${libs}" "${dir}/${name}.cc" ${ARGN})
-endfunction()
-
-# Sets PYTHONINTERP_FOUND and PYTHON_EXECUTABLE.
-find_package(PythonInterp)
-
-# cxx_test_with_flags(name cxx_flags libs srcs...)
-#
-# creates a named C++ test that depends on the given libs and is built
-# from the given source files with the given compiler flags.
-function(cxx_test_with_flags name cxx_flags libs)
- cxx_executable_with_flags(${name} "${cxx_flags}" "${libs}" ${ARGN})
- add_test(${name} ${name})
-endfunction()
-
-# cxx_test(name libs srcs...)
-#
-# creates a named test target that depends on the given libs and is
-# built from the given source files. Unlike cxx_test_with_flags,
-# test/name.cc is already implicitly included in the source file list.
-function(cxx_test name libs)
- cxx_test_with_flags("${name}" "${cxx_default}" "${libs}"
- "test/${name}.cc" ${ARGN})
-endfunction()
-
-# py_test(name)
-#
-# creates a Python test with the given name whose main module is in
-# test/name.py. It does nothing if Python is not installed.
-function(py_test name)
- # We are not supporting Python tests on Linux yet as they consider
- # all Linux environments to be google3 and try to use google3 features.
- if (PYTHONINTERP_FOUND)
- # ${CMAKE_BINARY_DIR} is known at configuration time, so we can
- # directly bind it from cmake. ${CTEST_CONFIGURATION_TYPE} is known
- # only at ctest runtime (by calling ctest -c <Configuration>), so
- # we have to escape $ to delay variable substitution here.
- if (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 3.1)
- add_test(
- NAME ${name}
- COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test/${name}.py
- --build_dir=${CMAKE_CURRENT_BINARY_DIR}/$<CONFIGURATION>)
- else (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 3.1)
- add_test(
- ${name}
- ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test/${name}.py
- --build_dir=${CMAKE_CURRENT_BINARY_DIR}/\${CTEST_CONFIGURATION_TYPE})
- endif (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 3.1)
- endif()
-endfunction()
diff --git a/third_party/aom/third_party/googletest/src/googletest/include/gtest/gtest-death-test.h b/third_party/aom/third_party/googletest/src/googletest/include/gtest/gtest-death-test.h
deleted file mode 100644
index 957a69c6a..000000000
--- a/third_party/aom/third_party/googletest/src/googletest/include/gtest/gtest-death-test.h
+++ /dev/null
@@ -1,294 +0,0 @@
-// Copyright 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Author: wan@google.com (Zhanyong Wan)
-//
-// The Google C++ Testing Framework (Google Test)
-//
-// This header file defines the public API for death tests. It is
-// #included by gtest.h so a user doesn't need to include this
-// directly.
-
-#ifndef GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_
-#define GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_
-
-#include "gtest/internal/gtest-death-test-internal.h"
-
-namespace testing {
-
-// This flag controls the style of death tests. Valid values are "threadsafe",
-// meaning that the death test child process will re-execute the test binary
-// from the start, running only a single death test, or "fast",
-// meaning that the child process will execute the test logic immediately
-// after forking.
-GTEST_DECLARE_string_(death_test_style);
-
-#if GTEST_HAS_DEATH_TEST
-
-namespace internal {
-
-// Returns a Boolean value indicating whether the caller is currently
-// executing in the context of the death test child process. Tools such as
-// Valgrind heap checkers may need this to modify their behavior in death
-// tests. IMPORTANT: This is an internal utility. Using it may break the
-// implementation of death tests. User code MUST NOT use it.
-GTEST_API_ bool InDeathTestChild();
-
-} // namespace internal
-
-// The following macros are useful for writing death tests.
-
-// Here's what happens when an ASSERT_DEATH* or EXPECT_DEATH* is
-// executed:
-//
-// 1. It generates a warning if there is more than one active
-// thread. This is because it's safe to fork() or clone() only
-// when there is a single thread.
-//
-// 2. The parent process clone()s a sub-process and runs the death
-// test in it; the sub-process exits with code 0 at the end of the
-// death test, if it hasn't exited already.
-//
-// 3. The parent process waits for the sub-process to terminate.
-//
-// 4. The parent process checks the exit code and error message of
-// the sub-process.
-//
-// Examples:
-//
-// ASSERT_DEATH(server.SendMessage(56, "Hello"), "Invalid port number");
-// for (int i = 0; i < 5; i++) {
-// EXPECT_DEATH(server.ProcessRequest(i),
-// "Invalid request .* in ProcessRequest()")
-// << "Failed to die on request " << i;
-// }
-//
-// ASSERT_EXIT(server.ExitNow(), ::testing::ExitedWithCode(0), "Exiting");
-//
-// bool KilledBySIGHUP(int exit_code) {
-// return WIFSIGNALED(exit_code) && WTERMSIG(exit_code) == SIGHUP;
-// }
-//
-// ASSERT_EXIT(client.HangUpServer(), KilledBySIGHUP, "Hanging up!");
-//
-// On the regular expressions used in death tests:
-//
-// On POSIX-compliant systems (*nix), we use the <regex.h> library,
-// which uses the POSIX extended regex syntax.
-//
-// On other platforms (e.g. Windows), we only support a simple regex
-// syntax implemented as part of Google Test. This limited
-// implementation should be enough most of the time when writing
-// death tests; though it lacks many features you can find in PCRE
-// or POSIX extended regex syntax. For example, we don't support
-// union ("x|y"), grouping ("(xy)"), brackets ("[xy]"), and
-// repetition count ("x{5,7}"), among others.
-//
-// Below is the syntax that we do support. We chose it to be a
-// subset of both PCRE and POSIX extended regex, so it's easy to
-// learn wherever you come from. In the following: 'A' denotes a
-// literal character, period (.), or a single \\ escape sequence;
-// 'x' and 'y' denote regular expressions; 'm' and 'n' are for
-// natural numbers.
-//
-// c matches any literal character c
-// \\d matches any decimal digit
-// \\D matches any character that's not a decimal digit
-// \\f matches \f
-// \\n matches \n
-// \\r matches \r
-// \\s matches any ASCII whitespace, including \n
-// \\S matches any character that's not a whitespace
-// \\t matches \t
-// \\v matches \v
-// \\w matches any letter, _, or decimal digit
-// \\W matches any character that \\w doesn't match
-// \\c matches any literal character c, which must be a punctuation
-// . matches any single character except \n
-// A? matches 0 or 1 occurrences of A
-// A* matches 0 or many occurrences of A
-// A+ matches 1 or many occurrences of A
-// ^ matches the beginning of a string (not that of each line)
-// $ matches the end of a string (not that of each line)
-// xy matches x followed by y
-//
-// If you accidentally use PCRE or POSIX extended regex features
-// not implemented by us, you will get a run-time failure. In that
-// case, please try to rewrite your regular expression within the
-// above syntax.
-//
-// This implementation is *not* meant to be as highly tuned or robust
-// as a compiled regex library, but should perform well enough for a
-// death test, which already incurs significant overhead by launching
-// a child process.
-//
-// Known caveats:
-//
-// A "threadsafe" style death test obtains the path to the test
-// program from argv[0] and re-executes it in the sub-process. For
-// simplicity, the current implementation doesn't search the PATH
-// when launching the sub-process. This means that the user must
-// invoke the test program via a path that contains at least one
-// path separator (e.g. path/to/foo_test and
-// /absolute/path/to/bar_test are fine, but foo_test is not). This
-// is rarely a problem as people usually don't put the test binary
-// directory in PATH.
-//
-// TODO(wan@google.com): make thread-safe death tests search the PATH.
-
-// Asserts that a given statement causes the program to exit, with an
-// integer exit status that satisfies predicate, and emitting error output
-// that matches regex.
-# define ASSERT_EXIT(statement, predicate, regex) \
- GTEST_DEATH_TEST_(statement, predicate, regex, GTEST_FATAL_FAILURE_)
-
-// Like ASSERT_EXIT, but continues on to successive tests in the
-// test case, if any:
-# define EXPECT_EXIT(statement, predicate, regex) \
- GTEST_DEATH_TEST_(statement, predicate, regex, GTEST_NONFATAL_FAILURE_)
-
-// Asserts that a given statement causes the program to exit, either by
-// explicitly exiting with a nonzero exit code or being killed by a
-// signal, and emitting error output that matches regex.
-# define ASSERT_DEATH(statement, regex) \
- ASSERT_EXIT(statement, ::testing::internal::ExitedUnsuccessfully, regex)
-
-// Like ASSERT_DEATH, but continues on to successive tests in the
-// test case, if any:
-# define EXPECT_DEATH(statement, regex) \
- EXPECT_EXIT(statement, ::testing::internal::ExitedUnsuccessfully, regex)
-
-// Two predicate classes that can be used in {ASSERT,EXPECT}_EXIT*:
-
-// Tests that an exit code describes a normal exit with a given exit code.
-class GTEST_API_ ExitedWithCode {
- public:
- explicit ExitedWithCode(int exit_code);
- bool operator()(int exit_status) const;
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ExitedWithCode& other);
-
- const int exit_code_;
-};
-
-# if !GTEST_OS_WINDOWS
-// Tests that an exit code describes an exit due to termination by a
-// given signal.
-class GTEST_API_ KilledBySignal {
- public:
- explicit KilledBySignal(int signum);
- bool operator()(int exit_status) const;
- private:
- const int signum_;
-};
-# endif // !GTEST_OS_WINDOWS
-
-// EXPECT_DEBUG_DEATH asserts that the given statements die in debug mode.
-// The death testing framework causes this to have interesting semantics,
-// since the sideeffects of the call are only visible in opt mode, and not
-// in debug mode.
-//
-// In practice, this can be used to test functions that utilize the
-// LOG(DFATAL) macro using the following style:
-//
-// int DieInDebugOr12(int* sideeffect) {
-// if (sideeffect) {
-// *sideeffect = 12;
-// }
-// LOG(DFATAL) << "death";
-// return 12;
-// }
-//
-// TEST(TestCase, TestDieOr12WorksInDgbAndOpt) {
-// int sideeffect = 0;
-// // Only asserts in dbg.
-// EXPECT_DEBUG_DEATH(DieInDebugOr12(&sideeffect), "death");
-//
-// #ifdef NDEBUG
-// // opt-mode has sideeffect visible.
-// EXPECT_EQ(12, sideeffect);
-// #else
-// // dbg-mode no visible sideeffect.
-// EXPECT_EQ(0, sideeffect);
-// #endif
-// }
-//
-// This will assert that DieInDebugReturn12InOpt() crashes in debug
-// mode, usually due to a DCHECK or LOG(DFATAL), but returns the
-// appropriate fallback value (12 in this case) in opt mode. If you
-// need to test that a function has appropriate side-effects in opt
-// mode, include assertions against the side-effects. A general
-// pattern for this is:
-//
-// EXPECT_DEBUG_DEATH({
-// // Side-effects here will have an effect after this statement in
-// // opt mode, but none in debug mode.
-// EXPECT_EQ(12, DieInDebugOr12(&sideeffect));
-// }, "death");
-//
-# ifdef NDEBUG
-
-# define EXPECT_DEBUG_DEATH(statement, regex) \
- GTEST_EXECUTE_STATEMENT_(statement, regex)
-
-# define ASSERT_DEBUG_DEATH(statement, regex) \
- GTEST_EXECUTE_STATEMENT_(statement, regex)
-
-# else
-
-# define EXPECT_DEBUG_DEATH(statement, regex) \
- EXPECT_DEATH(statement, regex)
-
-# define ASSERT_DEBUG_DEATH(statement, regex) \
- ASSERT_DEATH(statement, regex)
-
-# endif // NDEBUG for EXPECT_DEBUG_DEATH
-#endif // GTEST_HAS_DEATH_TEST
-
-// EXPECT_DEATH_IF_SUPPORTED(statement, regex) and
-// ASSERT_DEATH_IF_SUPPORTED(statement, regex) expand to real death tests if
-// death tests are supported; otherwise they just issue a warning. This is
-// useful when you are combining death test assertions with normal test
-// assertions in one test.
-#if GTEST_HAS_DEATH_TEST
-# define EXPECT_DEATH_IF_SUPPORTED(statement, regex) \
- EXPECT_DEATH(statement, regex)
-# define ASSERT_DEATH_IF_SUPPORTED(statement, regex) \
- ASSERT_DEATH(statement, regex)
-#else
-# define EXPECT_DEATH_IF_SUPPORTED(statement, regex) \
- GTEST_UNSUPPORTED_DEATH_TEST_(statement, regex, )
-# define ASSERT_DEATH_IF_SUPPORTED(statement, regex) \
- GTEST_UNSUPPORTED_DEATH_TEST_(statement, regex, return)
-#endif
-
-} // namespace testing
-
-#endif // GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_
diff --git a/third_party/aom/third_party/googletest/src/googletest/include/gtest/gtest-message.h b/third_party/aom/third_party/googletest/src/googletest/include/gtest/gtest-message.h
deleted file mode 100644
index fe879bca7..000000000
--- a/third_party/aom/third_party/googletest/src/googletest/include/gtest/gtest-message.h
+++ /dev/null
@@ -1,250 +0,0 @@
-// Copyright 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Author: wan@google.com (Zhanyong Wan)
-//
-// The Google C++ Testing Framework (Google Test)
-//
-// This header file defines the Message class.
-//
-// IMPORTANT NOTE: Due to limitation of the C++ language, we have to
-// leave some internal implementation details in this header file.
-// They are clearly marked by comments like this:
-//
-// // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-//
-// Such code is NOT meant to be used by a user directly, and is subject
-// to CHANGE WITHOUT NOTICE. Therefore DO NOT DEPEND ON IT in a user
-// program!
-
-#ifndef GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_
-#define GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_
-
-#include <limits>
-
-#include "gtest/internal/gtest-port.h"
-
-// Ensures that there is at least one operator<< in the global namespace.
-// See Message& operator<<(...) below for why.
-void operator<<(const testing::internal::Secret&, int);
-
-namespace testing {
-
-// The Message class works like an ostream repeater.
-//
-// Typical usage:
-//
-// 1. You stream a bunch of values to a Message object.
-// It will remember the text in a stringstream.
-// 2. Then you stream the Message object to an ostream.
-// This causes the text in the Message to be streamed
-// to the ostream.
-//
-// For example;
-//
-// testing::Message foo;
-// foo << 1 << " != " << 2;
-// std::cout << foo;
-//
-// will print "1 != 2".
-//
-// Message is not intended to be inherited from. In particular, its
-// destructor is not virtual.
-//
-// Note that stringstream behaves differently in gcc and in MSVC. You
-// can stream a NULL char pointer to it in the former, but not in the
-// latter (it causes an access violation if you do). The Message
-// class hides this difference by treating a NULL char pointer as
-// "(null)".
-class GTEST_API_ Message {
- private:
- // The type of basic IO manipulators (endl, ends, and flush) for
- // narrow streams.
- typedef std::ostream& (*BasicNarrowIoManip)(std::ostream&);
-
- public:
- // Constructs an empty Message.
- Message();
-
- // Copy constructor.
- Message(const Message& msg) : ss_(new ::std::stringstream) { // NOLINT
- *ss_ << msg.GetString();
- }
-
- // Constructs a Message from a C-string.
- explicit Message(const char* str) : ss_(new ::std::stringstream) {
- *ss_ << str;
- }
-
-#if GTEST_OS_SYMBIAN
- // Streams a value (either a pointer or not) to this object.
- template <typename T>
- inline Message& operator <<(const T& value) {
- StreamHelper(typename internal::is_pointer<T>::type(), value);
- return *this;
- }
-#else
- // Streams a non-pointer value to this object.
- template <typename T>
- inline Message& operator <<(const T& val) {
- // Some libraries overload << for STL containers. These
- // overloads are defined in the global namespace instead of ::std.
- //
- // C++'s symbol lookup rule (i.e. Koenig lookup) says that these
- // overloads are visible in either the std namespace or the global
- // namespace, but not other namespaces, including the testing
- // namespace which Google Test's Message class is in.
- //
- // To allow STL containers (and other types that has a << operator
- // defined in the global namespace) to be used in Google Test
- // assertions, testing::Message must access the custom << operator
- // from the global namespace. With this using declaration,
- // overloads of << defined in the global namespace and those
- // visible via Koenig lookup are both exposed in this function.
- using ::operator <<;
- *ss_ << val;
- return *this;
- }
-
- // Streams a pointer value to this object.
- //
- // This function is an overload of the previous one. When you
- // stream a pointer to a Message, this definition will be used as it
- // is more specialized. (The C++ Standard, section
- // [temp.func.order].) If you stream a non-pointer, then the
- // previous definition will be used.
- //
- // The reason for this overload is that streaming a NULL pointer to
- // ostream is undefined behavior. Depending on the compiler, you
- // may get "0", "(nil)", "(null)", or an access violation. To
- // ensure consistent result across compilers, we always treat NULL
- // as "(null)".
- template <typename T>
- inline Message& operator <<(T* const& pointer) { // NOLINT
- if (pointer == NULL) {
- *ss_ << "(null)";
- } else {
- *ss_ << pointer;
- }
- return *this;
- }
-#endif // GTEST_OS_SYMBIAN
-
- // Since the basic IO manipulators are overloaded for both narrow
- // and wide streams, we have to provide this specialized definition
- // of operator <<, even though its body is the same as the
- // templatized version above. Without this definition, streaming
- // endl or other basic IO manipulators to Message will confuse the
- // compiler.
- Message& operator <<(BasicNarrowIoManip val) {
- *ss_ << val;
- return *this;
- }
-
- // Instead of 1/0, we want to see true/false for bool values.
- Message& operator <<(bool b) {
- return *this << (b ? "true" : "false");
- }
-
- // These two overloads allow streaming a wide C string to a Message
- // using the UTF-8 encoding.
- Message& operator <<(const wchar_t* wide_c_str);
- Message& operator <<(wchar_t* wide_c_str);
-
-#if GTEST_HAS_STD_WSTRING
- // Converts the given wide string to a narrow string using the UTF-8
- // encoding, and streams the result to this Message object.
- Message& operator <<(const ::std::wstring& wstr);
-#endif // GTEST_HAS_STD_WSTRING
-
-#if GTEST_HAS_GLOBAL_WSTRING
- // Converts the given wide string to a narrow string using the UTF-8
- // encoding, and streams the result to this Message object.
- Message& operator <<(const ::wstring& wstr);
-#endif // GTEST_HAS_GLOBAL_WSTRING
-
- // Gets the text streamed to this object so far as an std::string.
- // Each '\0' character in the buffer is replaced with "\\0".
- //
- // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
- std::string GetString() const;
-
- private:
-
-#if GTEST_OS_SYMBIAN
- // These are needed as the Nokia Symbian Compiler cannot decide between
- // const T& and const T* in a function template. The Nokia compiler _can_
- // decide between class template specializations for T and T*, so a
- // tr1::type_traits-like is_pointer works, and we can overload on that.
- template <typename T>
- inline void StreamHelper(internal::true_type /*is_pointer*/, T* pointer) {
- if (pointer == NULL) {
- *ss_ << "(null)";
- } else {
- *ss_ << pointer;
- }
- }
- template <typename T>
- inline void StreamHelper(internal::false_type /*is_pointer*/,
- const T& value) {
- // See the comments in Message& operator <<(const T&) above for why
- // we need this using statement.
- using ::operator <<;
- *ss_ << value;
- }
-#endif // GTEST_OS_SYMBIAN
-
- // We'll hold the text streamed to this object here.
- const internal::scoped_ptr< ::std::stringstream> ss_;
-
- // We declare (but don't implement) this to prevent the compiler
- // from implementing the assignment operator.
- void operator=(const Message&);
-};
-
-// Streams a Message to an ostream.
-inline std::ostream& operator <<(std::ostream& os, const Message& sb) {
- return os << sb.GetString();
-}
-
-namespace internal {
-
-// Converts a streamable value to an std::string. A NULL pointer is
-// converted to "(null)". When the input value is a ::string,
-// ::std::string, ::wstring, or ::std::wstring object, each NUL
-// character in it is replaced with "\\0".
-template <typename T>
-std::string StreamableToString(const T& streamable) {
- return (Message() << streamable).GetString();
-}
-
-} // namespace internal
-} // namespace testing
-
-#endif // GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_
diff --git a/third_party/aom/third_party/googletest/src/googletest/include/gtest/gtest-param-test.h b/third_party/aom/third_party/googletest/src/googletest/include/gtest/gtest-param-test.h
deleted file mode 100644
index 038f9ba79..000000000
--- a/third_party/aom/third_party/googletest/src/googletest/include/gtest/gtest-param-test.h
+++ /dev/null
@@ -1,1444 +0,0 @@
-// This file was GENERATED by command:
-// pump.py gtest-param-test.h.pump
-// DO NOT EDIT BY HAND!!!
-
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Authors: vladl@google.com (Vlad Losev)
-//
-// Macros and functions for implementing parameterized tests
-// in Google C++ Testing Framework (Google Test)
-//
-// This file is generated by a SCRIPT. DO NOT EDIT BY HAND!
-//
-#ifndef GTEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_
-#define GTEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_
-
-
-// Value-parameterized tests allow you to test your code with different
-// parameters without writing multiple copies of the same test.
-//
-// Here is how you use value-parameterized tests:
-
-#if 0
-
-// To write value-parameterized tests, first you should define a fixture
-// class. It is usually derived from testing::TestWithParam<T> (see below for
-// another inheritance scheme that's sometimes useful in more complicated
-// class hierarchies), where the type of your parameter values.
-// TestWithParam<T> is itself derived from testing::Test. T can be any
-// copyable type. If it's a raw pointer, you are responsible for managing the
-// lifespan of the pointed values.
-
-class FooTest : public ::testing::TestWithParam<const char*> {
- // You can implement all the usual class fixture members here.
-};
-
-// Then, use the TEST_P macro to define as many parameterized tests
-// for this fixture as you want. The _P suffix is for "parameterized"
-// or "pattern", whichever you prefer to think.
-
-TEST_P(FooTest, DoesBlah) {
- // Inside a test, access the test parameter with the GetParam() method
- // of the TestWithParam<T> class:
- EXPECT_TRUE(foo.Blah(GetParam()));
- ...
-}
-
-TEST_P(FooTest, HasBlahBlah) {
- ...
-}
-
-// Finally, you can use INSTANTIATE_TEST_CASE_P to instantiate the test
-// case with any set of parameters you want. Google Test defines a number
-// of functions for generating test parameters. They return what we call
-// (surprise!) parameter generators. Here is a summary of them, which
-// are all in the testing namespace:
-//
-//
-// Range(begin, end [, step]) - Yields values {begin, begin+step,
-// begin+step+step, ...}. The values do not
-// include end. step defaults to 1.
-// Values(v1, v2, ..., vN) - Yields values {v1, v2, ..., vN}.
-// ValuesIn(container) - Yields values from a C-style array, an STL
-// ValuesIn(begin,end) container, or an iterator range [begin, end).
-// Bool() - Yields sequence {false, true}.
-// Combine(g1, g2, ..., gN) - Yields all combinations (the Cartesian product
-// for the math savvy) of the values generated
-// by the N generators.
-//
-// For more details, see comments at the definitions of these functions below
-// in this file.
-//
-// The following statement will instantiate tests from the FooTest test case
-// each with parameter values "meeny", "miny", and "moe".
-
-INSTANTIATE_TEST_CASE_P(InstantiationName,
- FooTest,
- Values("meeny", "miny", "moe"));
-
-// To distinguish different instances of the pattern, (yes, you
-// can instantiate it more then once) the first argument to the
-// INSTANTIATE_TEST_CASE_P macro is a prefix that will be added to the
-// actual test case name. Remember to pick unique prefixes for different
-// instantiations. The tests from the instantiation above will have
-// these names:
-//
-// * InstantiationName/FooTest.DoesBlah/0 for "meeny"
-// * InstantiationName/FooTest.DoesBlah/1 for "miny"
-// * InstantiationName/FooTest.DoesBlah/2 for "moe"
-// * InstantiationName/FooTest.HasBlahBlah/0 for "meeny"
-// * InstantiationName/FooTest.HasBlahBlah/1 for "miny"
-// * InstantiationName/FooTest.HasBlahBlah/2 for "moe"
-//
-// You can use these names in --gtest_filter.
-//
-// This statement will instantiate all tests from FooTest again, each
-// with parameter values "cat" and "dog":
-
-const char* pets[] = {"cat", "dog"};
-INSTANTIATE_TEST_CASE_P(AnotherInstantiationName, FooTest, ValuesIn(pets));
-
-// The tests from the instantiation above will have these names:
-//
-// * AnotherInstantiationName/FooTest.DoesBlah/0 for "cat"
-// * AnotherInstantiationName/FooTest.DoesBlah/1 for "dog"
-// * AnotherInstantiationName/FooTest.HasBlahBlah/0 for "cat"
-// * AnotherInstantiationName/FooTest.HasBlahBlah/1 for "dog"
-//
-// Please note that INSTANTIATE_TEST_CASE_P will instantiate all tests
-// in the given test case, whether their definitions come before or
-// AFTER the INSTANTIATE_TEST_CASE_P statement.
-//
-// Please also note that generator expressions (including parameters to the
-// generators) are evaluated in InitGoogleTest(), after main() has started.
-// This allows the user on one hand, to adjust generator parameters in order
-// to dynamically determine a set of tests to run and on the other hand,
-// give the user a chance to inspect the generated tests with Google Test
-// reflection API before RUN_ALL_TESTS() is executed.
-//
-// You can see samples/sample7_unittest.cc and samples/sample8_unittest.cc
-// for more examples.
-//
-// In the future, we plan to publish the API for defining new parameter
-// generators. But for now this interface remains part of the internal
-// implementation and is subject to change.
-//
-//
-// A parameterized test fixture must be derived from testing::Test and from
-// testing::WithParamInterface<T>, where T is the type of the parameter
-// values. Inheriting from TestWithParam<T> satisfies that requirement because
-// TestWithParam<T> inherits from both Test and WithParamInterface. In more
-// complicated hierarchies, however, it is occasionally useful to inherit
-// separately from Test and WithParamInterface. For example:
-
-class BaseTest : public ::testing::Test {
- // You can inherit all the usual members for a non-parameterized test
- // fixture here.
-};
-
-class DerivedTest : public BaseTest, public ::testing::WithParamInterface<int> {
- // The usual test fixture members go here too.
-};
-
-TEST_F(BaseTest, HasFoo) {
- // This is an ordinary non-parameterized test.
-}
-
-TEST_P(DerivedTest, DoesBlah) {
- // GetParam works just the same here as if you inherit from TestWithParam.
- EXPECT_TRUE(foo.Blah(GetParam()));
-}
-
-#endif // 0
-
-#include "gtest/internal/gtest-port.h"
-
-#if !GTEST_OS_SYMBIAN
-# include <utility>
-#endif
-
-// scripts/fuse_gtest.py depends on gtest's own header being #included
-// *unconditionally*. Therefore these #includes cannot be moved
-// inside #if GTEST_HAS_PARAM_TEST.
-#include "gtest/internal/gtest-internal.h"
-#include "gtest/internal/gtest-param-util.h"
-#include "gtest/internal/gtest-param-util-generated.h"
-
-#if GTEST_HAS_PARAM_TEST
-
-namespace testing {
-
-// Functions producing parameter generators.
-//
-// Google Test uses these generators to produce parameters for value-
-// parameterized tests. When a parameterized test case is instantiated
-// with a particular generator, Google Test creates and runs tests
-// for each element in the sequence produced by the generator.
-//
-// In the following sample, tests from test case FooTest are instantiated
-// each three times with parameter values 3, 5, and 8:
-//
-// class FooTest : public TestWithParam<int> { ... };
-//
-// TEST_P(FooTest, TestThis) {
-// }
-// TEST_P(FooTest, TestThat) {
-// }
-// INSTANTIATE_TEST_CASE_P(TestSequence, FooTest, Values(3, 5, 8));
-//
-
-// Range() returns generators providing sequences of values in a range.
-//
-// Synopsis:
-// Range(start, end)
-// - returns a generator producing a sequence of values {start, start+1,
-// start+2, ..., }.
-// Range(start, end, step)
-// - returns a generator producing a sequence of values {start, start+step,
-// start+step+step, ..., }.
-// Notes:
-// * The generated sequences never include end. For example, Range(1, 5)
-// returns a generator producing a sequence {1, 2, 3, 4}. Range(1, 9, 2)
-// returns a generator producing {1, 3, 5, 7}.
-// * start and end must have the same type. That type may be any integral or
-// floating-point type or a user defined type satisfying these conditions:
-// * It must be assignable (have operator=() defined).
-// * It must have operator+() (operator+(int-compatible type) for
-// two-operand version).
-// * It must have operator<() defined.
-// Elements in the resulting sequences will also have that type.
-// * Condition start < end must be satisfied in order for resulting sequences
-// to contain any elements.
-//
-template <typename T, typename IncrementT>
-internal::ParamGenerator<T> Range(T start, T end, IncrementT step) {
- return internal::ParamGenerator<T>(
- new internal::RangeGenerator<T, IncrementT>(start, end, step));
-}
-
-template <typename T>
-internal::ParamGenerator<T> Range(T start, T end) {
- return Range(start, end, 1);
-}
-
-// ValuesIn() function allows generation of tests with parameters coming from
-// a container.
-//
-// Synopsis:
-// ValuesIn(const T (&array)[N])
-// - returns a generator producing sequences with elements from
-// a C-style array.
-// ValuesIn(const Container& container)
-// - returns a generator producing sequences with elements from
-// an STL-style container.
-// ValuesIn(Iterator begin, Iterator end)
-// - returns a generator producing sequences with elements from
-// a range [begin, end) defined by a pair of STL-style iterators. These
-// iterators can also be plain C pointers.
-//
-// Please note that ValuesIn copies the values from the containers
-// passed in and keeps them to generate tests in RUN_ALL_TESTS().
-//
-// Examples:
-//
-// This instantiates tests from test case StringTest
-// each with C-string values of "foo", "bar", and "baz":
-//
-// const char* strings[] = {"foo", "bar", "baz"};
-// INSTANTIATE_TEST_CASE_P(StringSequence, SrtingTest, ValuesIn(strings));
-//
-// This instantiates tests from test case StlStringTest
-// each with STL strings with values "a" and "b":
-//
-// ::std::vector< ::std::string> GetParameterStrings() {
-// ::std::vector< ::std::string> v;
-// v.push_back("a");
-// v.push_back("b");
-// return v;
-// }
-//
-// INSTANTIATE_TEST_CASE_P(CharSequence,
-// StlStringTest,
-// ValuesIn(GetParameterStrings()));
-//
-//
-// This will also instantiate tests from CharTest
-// each with parameter values 'a' and 'b':
-//
-// ::std::list<char> GetParameterChars() {
-// ::std::list<char> list;
-// list.push_back('a');
-// list.push_back('b');
-// return list;
-// }
-// ::std::list<char> l = GetParameterChars();
-// INSTANTIATE_TEST_CASE_P(CharSequence2,
-// CharTest,
-// ValuesIn(l.begin(), l.end()));
-//
-template <typename ForwardIterator>
-internal::ParamGenerator<
- typename ::testing::internal::IteratorTraits<ForwardIterator>::value_type>
-ValuesIn(ForwardIterator begin, ForwardIterator end) {
- typedef typename ::testing::internal::IteratorTraits<ForwardIterator>
- ::value_type ParamType;
- return internal::ParamGenerator<ParamType>(
- new internal::ValuesInIteratorRangeGenerator<ParamType>(begin, end));
-}
-
-template <typename T, size_t N>
-internal::ParamGenerator<T> ValuesIn(const T (&array)[N]) {
- return ValuesIn(array, array + N);
-}
-
-template <class Container>
-internal::ParamGenerator<typename Container::value_type> ValuesIn(
- const Container& container) {
- return ValuesIn(container.begin(), container.end());
-}
-
-// Values() allows generating tests from explicitly specified list of
-// parameters.
-//
-// Synopsis:
-// Values(T v1, T v2, ..., T vN)
-// - returns a generator producing sequences with elements v1, v2, ..., vN.
-//
-// For example, this instantiates tests from test case BarTest each
-// with values "one", "two", and "three":
-//
-// INSTANTIATE_TEST_CASE_P(NumSequence, BarTest, Values("one", "two", "three"));
-//
-// This instantiates tests from test case BazTest each with values 1, 2, 3.5.
-// The exact type of values will depend on the type of parameter in BazTest.
-//
-// INSTANTIATE_TEST_CASE_P(FloatingNumbers, BazTest, Values(1, 2, 3.5));
-//
-// Currently, Values() supports from 1 to 50 parameters.
-//
-template <typename T1>
-internal::ValueArray1<T1> Values(T1 v1) {
- return internal::ValueArray1<T1>(v1);
-}
-
-template <typename T1, typename T2>
-internal::ValueArray2<T1, T2> Values(T1 v1, T2 v2) {
- return internal::ValueArray2<T1, T2>(v1, v2);
-}
-
-template <typename T1, typename T2, typename T3>
-internal::ValueArray3<T1, T2, T3> Values(T1 v1, T2 v2, T3 v3) {
- return internal::ValueArray3<T1, T2, T3>(v1, v2, v3);
-}
-
-template <typename T1, typename T2, typename T3, typename T4>
-internal::ValueArray4<T1, T2, T3, T4> Values(T1 v1, T2 v2, T3 v3, T4 v4) {
- return internal::ValueArray4<T1, T2, T3, T4>(v1, v2, v3, v4);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5>
-internal::ValueArray5<T1, T2, T3, T4, T5> Values(T1 v1, T2 v2, T3 v3, T4 v4,
- T5 v5) {
- return internal::ValueArray5<T1, T2, T3, T4, T5>(v1, v2, v3, v4, v5);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6>
-internal::ValueArray6<T1, T2, T3, T4, T5, T6> Values(T1 v1, T2 v2, T3 v3,
- T4 v4, T5 v5, T6 v6) {
- return internal::ValueArray6<T1, T2, T3, T4, T5, T6>(v1, v2, v3, v4, v5, v6);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7>
-internal::ValueArray7<T1, T2, T3, T4, T5, T6, T7> Values(T1 v1, T2 v2, T3 v3,
- T4 v4, T5 v5, T6 v6, T7 v7) {
- return internal::ValueArray7<T1, T2, T3, T4, T5, T6, T7>(v1, v2, v3, v4, v5,
- v6, v7);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8>
-internal::ValueArray8<T1, T2, T3, T4, T5, T6, T7, T8> Values(T1 v1, T2 v2,
- T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8) {
- return internal::ValueArray8<T1, T2, T3, T4, T5, T6, T7, T8>(v1, v2, v3, v4,
- v5, v6, v7, v8);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9>
-internal::ValueArray9<T1, T2, T3, T4, T5, T6, T7, T8, T9> Values(T1 v1, T2 v2,
- T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9) {
- return internal::ValueArray9<T1, T2, T3, T4, T5, T6, T7, T8, T9>(v1, v2, v3,
- v4, v5, v6, v7, v8, v9);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10>
-internal::ValueArray10<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10> Values(T1 v1,
- T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10) {
- return internal::ValueArray10<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10>(v1,
- v2, v3, v4, v5, v6, v7, v8, v9, v10);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11>
-internal::ValueArray11<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10,
- T11> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11) {
- return internal::ValueArray11<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10,
- T11>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12>
-internal::ValueArray12<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
- T12> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11, T12 v12) {
- return internal::ValueArray12<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
- T12>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13>
-internal::ValueArray13<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
- T13> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11, T12 v12, T13 v13) {
- return internal::ValueArray13<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
- T12, T13>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14>
-internal::ValueArray14<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11, T12 v12, T13 v13, T14 v14) {
- return internal::ValueArray14<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
- T12, T13, T14>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13,
- v14);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15>
-internal::ValueArray15<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8,
- T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15) {
- return internal::ValueArray15<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
- T12, T13, T14, T15>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
- v13, v14, v15);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16>
-internal::ValueArray16<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
- T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
- T16 v16) {
- return internal::ValueArray16<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
- T12, T13, T14, T15, T16>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11,
- v12, v13, v14, v15, v16);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17>
-internal::ValueArray17<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
- T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
- T16 v16, T17 v17) {
- return internal::ValueArray17<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
- T12, T13, T14, T15, T16, T17>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,
- v11, v12, v13, v14, v15, v16, v17);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18>
-internal::ValueArray18<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6,
- T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
- T16 v16, T17 v17, T18 v18) {
- return internal::ValueArray18<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
- T12, T13, T14, T15, T16, T17, T18>(v1, v2, v3, v4, v5, v6, v7, v8, v9,
- v10, v11, v12, v13, v14, v15, v16, v17, v18);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19>
-internal::ValueArray19<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5,
- T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14,
- T15 v15, T16 v16, T17 v17, T18 v18, T19 v19) {
- return internal::ValueArray19<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
- T12, T13, T14, T15, T16, T17, T18, T19>(v1, v2, v3, v4, v5, v6, v7, v8,
- v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20>
-internal::ValueArray20<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20> Values(T1 v1, T2 v2, T3 v3, T4 v4,
- T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13,
- T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20) {
- return internal::ValueArray20<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
- T12, T13, T14, T15, T16, T17, T18, T19, T20>(v1, v2, v3, v4, v5, v6, v7,
- v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21>
-internal::ValueArray21<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21> Values(T1 v1, T2 v2, T3 v3, T4 v4,
- T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13,
- T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21) {
- return internal::ValueArray21<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
- T12, T13, T14, T15, T16, T17, T18, T19, T20, T21>(v1, v2, v3, v4, v5, v6,
- v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22>
-internal::ValueArray22<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22> Values(T1 v1, T2 v2, T3 v3,
- T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
- T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20,
- T21 v21, T22 v22) {
- return internal::ValueArray22<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
- T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22>(v1, v2, v3, v4,
- v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19,
- v20, v21, v22);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23>
-internal::ValueArray23<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23> Values(T1 v1, T2 v2,
- T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
- T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20,
- T21 v21, T22 v22, T23 v23) {
- return internal::ValueArray23<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
- T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23>(v1, v2, v3,
- v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19,
- v20, v21, v22, v23);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24>
-internal::ValueArray24<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24> Values(T1 v1, T2 v2,
- T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
- T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20,
- T21 v21, T22 v22, T23 v23, T24 v24) {
- return internal::ValueArray24<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
- T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24>(v1, v2,
- v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18,
- v19, v20, v21, v22, v23, v24);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25>
-internal::ValueArray25<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25> Values(T1 v1,
- T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11,
- T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19,
- T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25) {
- return internal::ValueArray25<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
- T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25>(v1,
- v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17,
- v18, v19, v20, v21, v22, v23, v24, v25);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26>
-internal::ValueArray26<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
- T26> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
- T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
- T26 v26) {
- return internal::ValueArray26<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
- T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
- T26>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15,
- v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27>
-internal::ValueArray27<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
- T27> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
- T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
- T26 v26, T27 v27) {
- return internal::ValueArray27<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
- T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
- T26, T27>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14,
- v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28>
-internal::ValueArray28<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
- T28> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
- T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
- T26 v26, T27 v27, T28 v28) {
- return internal::ValueArray28<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
- T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
- T26, T27, T28>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13,
- v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27,
- v28);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29>
-internal::ValueArray29<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
- T29> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
- T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
- T26 v26, T27 v27, T28 v28, T29 v29) {
- return internal::ValueArray29<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
- T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
- T26, T27, T28, T29>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
- v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26,
- v27, v28, v29);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30>
-internal::ValueArray30<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
- T29, T30> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8,
- T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16,
- T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24,
- T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30) {
- return internal::ValueArray30<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
- T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
- T26, T27, T28, T29, T30>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11,
- v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25,
- v26, v27, v28, v29, v30);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31>
-internal::ValueArray31<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
- T29, T30, T31> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
- T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
- T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23,
- T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31) {
- return internal::ValueArray31<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
- T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
- T26, T27, T28, T29, T30, T31>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,
- v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24,
- v25, v26, v27, v28, v29, v30, v31);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32>
-internal::ValueArray32<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
- T29, T30, T31, T32> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
- T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
- T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23,
- T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31,
- T32 v32) {
- return internal::ValueArray32<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
- T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
- T26, T27, T28, T29, T30, T31, T32>(v1, v2, v3, v4, v5, v6, v7, v8, v9,
- v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23,
- v24, v25, v26, v27, v28, v29, v30, v31, v32);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33>
-internal::ValueArray33<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
- T29, T30, T31, T32, T33> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6,
- T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
- T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23,
- T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31,
- T32 v32, T33 v33) {
- return internal::ValueArray33<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
- T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
- T26, T27, T28, T29, T30, T31, T32, T33>(v1, v2, v3, v4, v5, v6, v7, v8,
- v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23,
- v24, v25, v26, v27, v28, v29, v30, v31, v32, v33);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34>
-internal::ValueArray34<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
- T29, T30, T31, T32, T33, T34> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5,
- T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14,
- T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22,
- T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30,
- T31 v31, T32 v32, T33 v33, T34 v34) {
- return internal::ValueArray34<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
- T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
- T26, T27, T28, T29, T30, T31, T32, T33, T34>(v1, v2, v3, v4, v5, v6, v7,
- v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22,
- v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35>
-internal::ValueArray35<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
- T29, T30, T31, T32, T33, T34, T35> Values(T1 v1, T2 v2, T3 v3, T4 v4,
- T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13,
- T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21,
- T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29,
- T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35) {
- return internal::ValueArray35<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
- T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
- T26, T27, T28, T29, T30, T31, T32, T33, T34, T35>(v1, v2, v3, v4, v5, v6,
- v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21,
- v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36>
-internal::ValueArray36<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
- T29, T30, T31, T32, T33, T34, T35, T36> Values(T1 v1, T2 v2, T3 v3, T4 v4,
- T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13,
- T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21,
- T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29,
- T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36) {
- return internal::ValueArray36<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
- T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
- T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36>(v1, v2, v3, v4,
- v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19,
- v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33,
- v34, v35, v36);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37>
-internal::ValueArray37<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
- T29, T30, T31, T32, T33, T34, T35, T36, T37> Values(T1 v1, T2 v2, T3 v3,
- T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
- T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20,
- T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28,
- T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36,
- T37 v37) {
- return internal::ValueArray37<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
- T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
- T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37>(v1, v2, v3,
- v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19,
- v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33,
- v34, v35, v36, v37);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38>
-internal::ValueArray38<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
- T29, T30, T31, T32, T33, T34, T35, T36, T37, T38> Values(T1 v1, T2 v2,
- T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
- T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20,
- T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28,
- T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36,
- T37 v37, T38 v38) {
- return internal::ValueArray38<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
- T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
- T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38>(v1, v2,
- v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18,
- v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32,
- v33, v34, v35, v36, v37, v38);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38, typename T39>
-internal::ValueArray39<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
- T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39> Values(T1 v1, T2 v2,
- T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
- T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20,
- T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28,
- T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36,
- T37 v37, T38 v38, T39 v39) {
- return internal::ValueArray39<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
- T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
- T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39>(v1,
- v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17,
- v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31,
- v32, v33, v34, v35, v36, v37, v38, v39);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38, typename T39, typename T40>
-internal::ValueArray40<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
- T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40> Values(T1 v1,
- T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11,
- T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19,
- T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27,
- T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35,
- T36 v36, T37 v37, T38 v38, T39 v39, T40 v40) {
- return internal::ValueArray40<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
- T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
- T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
- T40>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15,
- v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29,
- v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38, typename T39, typename T40,
- typename T41>
-internal::ValueArray41<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
- T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
- T41> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
- T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
- T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
- T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41) {
- return internal::ValueArray41<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
- T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
- T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
- T40, T41>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14,
- v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28,
- v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38, typename T39, typename T40,
- typename T41, typename T42>
-internal::ValueArray42<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
- T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
- T42> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
- T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
- T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
- T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
- T42 v42) {
- return internal::ValueArray42<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
- T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
- T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
- T40, T41, T42>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13,
- v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27,
- v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41,
- v42);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38, typename T39, typename T40,
- typename T41, typename T42, typename T43>
-internal::ValueArray43<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
- T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
- T43> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
- T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
- T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
- T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
- T42 v42, T43 v43) {
- return internal::ValueArray43<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
- T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
- T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
- T40, T41, T42, T43>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
- v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26,
- v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40,
- v41, v42, v43);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38, typename T39, typename T40,
- typename T41, typename T42, typename T43, typename T44>
-internal::ValueArray44<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
- T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
- T44> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
- T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
- T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
- T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
- T42 v42, T43 v43, T44 v44) {
- return internal::ValueArray44<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
- T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
- T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
- T40, T41, T42, T43, T44>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11,
- v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25,
- v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39,
- v40, v41, v42, v43, v44);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38, typename T39, typename T40,
- typename T41, typename T42, typename T43, typename T44, typename T45>
-internal::ValueArray45<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
- T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
- T44, T45> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8,
- T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16,
- T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24,
- T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32,
- T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40,
- T41 v41, T42 v42, T43 v43, T44 v44, T45 v45) {
- return internal::ValueArray45<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
- T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
- T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
- T40, T41, T42, T43, T44, T45>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,
- v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24,
- v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38,
- v39, v40, v41, v42, v43, v44, v45);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38, typename T39, typename T40,
- typename T41, typename T42, typename T43, typename T44, typename T45,
- typename T46>
-internal::ValueArray46<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
- T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
- T44, T45, T46> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
- T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
- T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23,
- T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31,
- T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39,
- T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46) {
- return internal::ValueArray46<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
- T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
- T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
- T40, T41, T42, T43, T44, T45, T46>(v1, v2, v3, v4, v5, v6, v7, v8, v9,
- v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23,
- v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37,
- v38, v39, v40, v41, v42, v43, v44, v45, v46);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38, typename T39, typename T40,
- typename T41, typename T42, typename T43, typename T44, typename T45,
- typename T46, typename T47>
-internal::ValueArray47<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
- T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
- T44, T45, T46, T47> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
- T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
- T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23,
- T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31,
- T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39,
- T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47) {
- return internal::ValueArray47<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
- T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
- T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
- T40, T41, T42, T43, T44, T45, T46, T47>(v1, v2, v3, v4, v5, v6, v7, v8,
- v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23,
- v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37,
- v38, v39, v40, v41, v42, v43, v44, v45, v46, v47);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38, typename T39, typename T40,
- typename T41, typename T42, typename T43, typename T44, typename T45,
- typename T46, typename T47, typename T48>
-internal::ValueArray48<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
- T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
- T44, T45, T46, T47, T48> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6,
- T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
- T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23,
- T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31,
- T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39,
- T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47,
- T48 v48) {
- return internal::ValueArray48<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
- T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
- T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
- T40, T41, T42, T43, T44, T45, T46, T47, T48>(v1, v2, v3, v4, v5, v6, v7,
- v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22,
- v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36,
- v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38, typename T39, typename T40,
- typename T41, typename T42, typename T43, typename T44, typename T45,
- typename T46, typename T47, typename T48, typename T49>
-internal::ValueArray49<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
- T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
- T44, T45, T46, T47, T48, T49> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5,
- T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14,
- T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22,
- T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30,
- T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38,
- T39 v39, T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46,
- T47 v47, T48 v48, T49 v49) {
- return internal::ValueArray49<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
- T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
- T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
- T40, T41, T42, T43, T44, T45, T46, T47, T48, T49>(v1, v2, v3, v4, v5, v6,
- v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21,
- v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35,
- v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38, typename T39, typename T40,
- typename T41, typename T42, typename T43, typename T44, typename T45,
- typename T46, typename T47, typename T48, typename T49, typename T50>
-internal::ValueArray50<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
- T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
- T44, T45, T46, T47, T48, T49, T50> Values(T1 v1, T2 v2, T3 v3, T4 v4,
- T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13,
- T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21,
- T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29,
- T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37,
- T38 v38, T39 v39, T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45,
- T46 v46, T47 v47, T48 v48, T49 v49, T50 v50) {
- return internal::ValueArray50<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
- T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
- T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
- T40, T41, T42, T43, T44, T45, T46, T47, T48, T49, T50>(v1, v2, v3, v4,
- v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19,
- v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33,
- v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47,
- v48, v49, v50);
-}
-
-// Bool() allows generating tests with parameters in a set of (false, true).
-//
-// Synopsis:
-// Bool()
-// - returns a generator producing sequences with elements {false, true}.
-//
-// It is useful when testing code that depends on Boolean flags. Combinations
-// of multiple flags can be tested when several Bool()'s are combined using
-// Combine() function.
-//
-// In the following example all tests in the test case FlagDependentTest
-// will be instantiated twice with parameters false and true.
-//
-// class FlagDependentTest : public testing::TestWithParam<bool> {
-// virtual void SetUp() {
-// external_flag = GetParam();
-// }
-// }
-// INSTANTIATE_TEST_CASE_P(BoolSequence, FlagDependentTest, Bool());
-//
-inline internal::ParamGenerator<bool> Bool() {
- return Values(false, true);
-}
-
-# if GTEST_HAS_COMBINE
-// Combine() allows the user to combine two or more sequences to produce
-// values of a Cartesian product of those sequences' elements.
-//
-// Synopsis:
-// Combine(gen1, gen2, ..., genN)
-// - returns a generator producing sequences with elements coming from
-// the Cartesian product of elements from the sequences generated by
-// gen1, gen2, ..., genN. The sequence elements will have a type of
-// tuple<T1, T2, ..., TN> where T1, T2, ..., TN are the types
-// of elements from sequences produces by gen1, gen2, ..., genN.
-//
-// Combine can have up to 10 arguments. This number is currently limited
-// by the maximum number of elements in the tuple implementation used by Google
-// Test.
-//
-// Example:
-//
-// This will instantiate tests in test case AnimalTest each one with
-// the parameter values tuple("cat", BLACK), tuple("cat", WHITE),
-// tuple("dog", BLACK), and tuple("dog", WHITE):
-//
-// enum Color { BLACK, GRAY, WHITE };
-// class AnimalTest
-// : public testing::TestWithParam<tuple<const char*, Color> > {...};
-//
-// TEST_P(AnimalTest, AnimalLooksNice) {...}
-//
-// INSTANTIATE_TEST_CASE_P(AnimalVariations, AnimalTest,
-// Combine(Values("cat", "dog"),
-// Values(BLACK, WHITE)));
-//
-// This will instantiate tests in FlagDependentTest with all variations of two
-// Boolean flags:
-//
-// class FlagDependentTest
-// : public testing::TestWithParam<tuple<bool, bool> > {
-// virtual void SetUp() {
-// // Assigns external_flag_1 and external_flag_2 values from the tuple.
-// tie(external_flag_1, external_flag_2) = GetParam();
-// }
-// };
-//
-// TEST_P(FlagDependentTest, TestFeature1) {
-// // Test your code using external_flag_1 and external_flag_2 here.
-// }
-// INSTANTIATE_TEST_CASE_P(TwoBoolSequence, FlagDependentTest,
-// Combine(Bool(), Bool()));
-//
-template <typename Generator1, typename Generator2>
-internal::CartesianProductHolder2<Generator1, Generator2> Combine(
- const Generator1& g1, const Generator2& g2) {
- return internal::CartesianProductHolder2<Generator1, Generator2>(
- g1, g2);
-}
-
-template <typename Generator1, typename Generator2, typename Generator3>
-internal::CartesianProductHolder3<Generator1, Generator2, Generator3> Combine(
- const Generator1& g1, const Generator2& g2, const Generator3& g3) {
- return internal::CartesianProductHolder3<Generator1, Generator2, Generator3>(
- g1, g2, g3);
-}
-
-template <typename Generator1, typename Generator2, typename Generator3,
- typename Generator4>
-internal::CartesianProductHolder4<Generator1, Generator2, Generator3,
- Generator4> Combine(
- const Generator1& g1, const Generator2& g2, const Generator3& g3,
- const Generator4& g4) {
- return internal::CartesianProductHolder4<Generator1, Generator2, Generator3,
- Generator4>(
- g1, g2, g3, g4);
-}
-
-template <typename Generator1, typename Generator2, typename Generator3,
- typename Generator4, typename Generator5>
-internal::CartesianProductHolder5<Generator1, Generator2, Generator3,
- Generator4, Generator5> Combine(
- const Generator1& g1, const Generator2& g2, const Generator3& g3,
- const Generator4& g4, const Generator5& g5) {
- return internal::CartesianProductHolder5<Generator1, Generator2, Generator3,
- Generator4, Generator5>(
- g1, g2, g3, g4, g5);
-}
-
-template <typename Generator1, typename Generator2, typename Generator3,
- typename Generator4, typename Generator5, typename Generator6>
-internal::CartesianProductHolder6<Generator1, Generator2, Generator3,
- Generator4, Generator5, Generator6> Combine(
- const Generator1& g1, const Generator2& g2, const Generator3& g3,
- const Generator4& g4, const Generator5& g5, const Generator6& g6) {
- return internal::CartesianProductHolder6<Generator1, Generator2, Generator3,
- Generator4, Generator5, Generator6>(
- g1, g2, g3, g4, g5, g6);
-}
-
-template <typename Generator1, typename Generator2, typename Generator3,
- typename Generator4, typename Generator5, typename Generator6,
- typename Generator7>
-internal::CartesianProductHolder7<Generator1, Generator2, Generator3,
- Generator4, Generator5, Generator6, Generator7> Combine(
- const Generator1& g1, const Generator2& g2, const Generator3& g3,
- const Generator4& g4, const Generator5& g5, const Generator6& g6,
- const Generator7& g7) {
- return internal::CartesianProductHolder7<Generator1, Generator2, Generator3,
- Generator4, Generator5, Generator6, Generator7>(
- g1, g2, g3, g4, g5, g6, g7);
-}
-
-template <typename Generator1, typename Generator2, typename Generator3,
- typename Generator4, typename Generator5, typename Generator6,
- typename Generator7, typename Generator8>
-internal::CartesianProductHolder8<Generator1, Generator2, Generator3,
- Generator4, Generator5, Generator6, Generator7, Generator8> Combine(
- const Generator1& g1, const Generator2& g2, const Generator3& g3,
- const Generator4& g4, const Generator5& g5, const Generator6& g6,
- const Generator7& g7, const Generator8& g8) {
- return internal::CartesianProductHolder8<Generator1, Generator2, Generator3,
- Generator4, Generator5, Generator6, Generator7, Generator8>(
- g1, g2, g3, g4, g5, g6, g7, g8);
-}
-
-template <typename Generator1, typename Generator2, typename Generator3,
- typename Generator4, typename Generator5, typename Generator6,
- typename Generator7, typename Generator8, typename Generator9>
-internal::CartesianProductHolder9<Generator1, Generator2, Generator3,
- Generator4, Generator5, Generator6, Generator7, Generator8,
- Generator9> Combine(
- const Generator1& g1, const Generator2& g2, const Generator3& g3,
- const Generator4& g4, const Generator5& g5, const Generator6& g6,
- const Generator7& g7, const Generator8& g8, const Generator9& g9) {
- return internal::CartesianProductHolder9<Generator1, Generator2, Generator3,
- Generator4, Generator5, Generator6, Generator7, Generator8, Generator9>(
- g1, g2, g3, g4, g5, g6, g7, g8, g9);
-}
-
-template <typename Generator1, typename Generator2, typename Generator3,
- typename Generator4, typename Generator5, typename Generator6,
- typename Generator7, typename Generator8, typename Generator9,
- typename Generator10>
-internal::CartesianProductHolder10<Generator1, Generator2, Generator3,
- Generator4, Generator5, Generator6, Generator7, Generator8, Generator9,
- Generator10> Combine(
- const Generator1& g1, const Generator2& g2, const Generator3& g3,
- const Generator4& g4, const Generator5& g5, const Generator6& g6,
- const Generator7& g7, const Generator8& g8, const Generator9& g9,
- const Generator10& g10) {
- return internal::CartesianProductHolder10<Generator1, Generator2, Generator3,
- Generator4, Generator5, Generator6, Generator7, Generator8, Generator9,
- Generator10>(
- g1, g2, g3, g4, g5, g6, g7, g8, g9, g10);
-}
-# endif // GTEST_HAS_COMBINE
-
-
-
-# define TEST_P(test_case_name, test_name) \
- class GTEST_TEST_CLASS_NAME_(test_case_name, test_name) \
- : public test_case_name { \
- public: \
- GTEST_TEST_CLASS_NAME_(test_case_name, test_name)() {} \
- virtual void TestBody(); \
- private: \
- static int AddToRegistry() { \
- ::testing::UnitTest::GetInstance()->parameterized_test_registry(). \
- GetTestCasePatternHolder<test_case_name>(\
- #test_case_name, \
- ::testing::internal::CodeLocation(\
- __FILE__, __LINE__))->AddTestPattern(\
- #test_case_name, \
- #test_name, \
- new ::testing::internal::TestMetaFactory< \
- GTEST_TEST_CLASS_NAME_(\
- test_case_name, test_name)>()); \
- return 0; \
- } \
- static int gtest_registering_dummy_ GTEST_ATTRIBUTE_UNUSED_; \
- GTEST_DISALLOW_COPY_AND_ASSIGN_(\
- GTEST_TEST_CLASS_NAME_(test_case_name, test_name)); \
- }; \
- int GTEST_TEST_CLASS_NAME_(test_case_name, \
- test_name)::gtest_registering_dummy_ = \
- GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::AddToRegistry(); \
- void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody()
-
-// The optional last argument to INSTANTIATE_TEST_CASE_P allows the user
-// to specify a function or functor that generates custom test name suffixes
-// based on the test parameters. The function should accept one argument of
-// type testing::TestParamInfo<class ParamType>, and return std::string.
-//
-// testing::PrintToStringParamName is a builtin test suffix generator that
-// returns the value of testing::PrintToString(GetParam()). It does not work
-// for std::string or C strings.
-//
-// Note: test names must be non-empty, unique, and may only contain ASCII
-// alphanumeric characters or underscore.
-
-# define INSTANTIATE_TEST_CASE_P(prefix, test_case_name, generator, ...) \
- ::testing::internal::ParamGenerator<test_case_name::ParamType> \
- gtest_##prefix##test_case_name##_EvalGenerator_() { return generator; } \
- ::std::string gtest_##prefix##test_case_name##_EvalGenerateName_( \
- const ::testing::TestParamInfo<test_case_name::ParamType>& info) { \
- return ::testing::internal::GetParamNameGen<test_case_name::ParamType> \
- (__VA_ARGS__)(info); \
- } \
- int gtest_##prefix##test_case_name##_dummy_ GTEST_ATTRIBUTE_UNUSED_ = \
- ::testing::UnitTest::GetInstance()->parameterized_test_registry(). \
- GetTestCasePatternHolder<test_case_name>(\
- #test_case_name, \
- ::testing::internal::CodeLocation(\
- __FILE__, __LINE__))->AddTestCaseInstantiation(\
- #prefix, \
- &gtest_##prefix##test_case_name##_EvalGenerator_, \
- &gtest_##prefix##test_case_name##_EvalGenerateName_, \
- __FILE__, __LINE__)
-
-} // namespace testing
-
-#endif // GTEST_HAS_PARAM_TEST
-
-#endif // GTEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_
diff --git a/third_party/aom/third_party/googletest/src/googletest/include/gtest/gtest-param-test.h.pump b/third_party/aom/third_party/googletest/src/googletest/include/gtest/gtest-param-test.h.pump
deleted file mode 100644
index 3078d6d2a..000000000
--- a/third_party/aom/third_party/googletest/src/googletest/include/gtest/gtest-param-test.h.pump
+++ /dev/null
@@ -1,510 +0,0 @@
-$$ -*- mode: c++; -*-
-$var n = 50 $$ Maximum length of Values arguments we want to support.
-$var maxtuple = 10 $$ Maximum number of Combine arguments we want to support.
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Authors: vladl@google.com (Vlad Losev)
-//
-// Macros and functions for implementing parameterized tests
-// in Google C++ Testing Framework (Google Test)
-//
-// This file is generated by a SCRIPT. DO NOT EDIT BY HAND!
-//
-#ifndef GTEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_
-#define GTEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_
-
-
-// Value-parameterized tests allow you to test your code with different
-// parameters without writing multiple copies of the same test.
-//
-// Here is how you use value-parameterized tests:
-
-#if 0
-
-// To write value-parameterized tests, first you should define a fixture
-// class. It is usually derived from testing::TestWithParam<T> (see below for
-// another inheritance scheme that's sometimes useful in more complicated
-// class hierarchies), where the type of your parameter values.
-// TestWithParam<T> is itself derived from testing::Test. T can be any
-// copyable type. If it's a raw pointer, you are responsible for managing the
-// lifespan of the pointed values.
-
-class FooTest : public ::testing::TestWithParam<const char*> {
- // You can implement all the usual class fixture members here.
-};
-
-// Then, use the TEST_P macro to define as many parameterized tests
-// for this fixture as you want. The _P suffix is for "parameterized"
-// or "pattern", whichever you prefer to think.
-
-TEST_P(FooTest, DoesBlah) {
- // Inside a test, access the test parameter with the GetParam() method
- // of the TestWithParam<T> class:
- EXPECT_TRUE(foo.Blah(GetParam()));
- ...
-}
-
-TEST_P(FooTest, HasBlahBlah) {
- ...
-}
-
-// Finally, you can use INSTANTIATE_TEST_CASE_P to instantiate the test
-// case with any set of parameters you want. Google Test defines a number
-// of functions for generating test parameters. They return what we call
-// (surprise!) parameter generators. Here is a summary of them, which
-// are all in the testing namespace:
-//
-//
-// Range(begin, end [, step]) - Yields values {begin, begin+step,
-// begin+step+step, ...}. The values do not
-// include end. step defaults to 1.
-// Values(v1, v2, ..., vN) - Yields values {v1, v2, ..., vN}.
-// ValuesIn(container) - Yields values from a C-style array, an STL
-// ValuesIn(begin,end) container, or an iterator range [begin, end).
-// Bool() - Yields sequence {false, true}.
-// Combine(g1, g2, ..., gN) - Yields all combinations (the Cartesian product
-// for the math savvy) of the values generated
-// by the N generators.
-//
-// For more details, see comments at the definitions of these functions below
-// in this file.
-//
-// The following statement will instantiate tests from the FooTest test case
-// each with parameter values "meeny", "miny", and "moe".
-
-INSTANTIATE_TEST_CASE_P(InstantiationName,
- FooTest,
- Values("meeny", "miny", "moe"));
-
-// To distinguish different instances of the pattern, (yes, you
-// can instantiate it more then once) the first argument to the
-// INSTANTIATE_TEST_CASE_P macro is a prefix that will be added to the
-// actual test case name. Remember to pick unique prefixes for different
-// instantiations. The tests from the instantiation above will have
-// these names:
-//
-// * InstantiationName/FooTest.DoesBlah/0 for "meeny"
-// * InstantiationName/FooTest.DoesBlah/1 for "miny"
-// * InstantiationName/FooTest.DoesBlah/2 for "moe"
-// * InstantiationName/FooTest.HasBlahBlah/0 for "meeny"
-// * InstantiationName/FooTest.HasBlahBlah/1 for "miny"
-// * InstantiationName/FooTest.HasBlahBlah/2 for "moe"
-//
-// You can use these names in --gtest_filter.
-//
-// This statement will instantiate all tests from FooTest again, each
-// with parameter values "cat" and "dog":
-
-const char* pets[] = {"cat", "dog"};
-INSTANTIATE_TEST_CASE_P(AnotherInstantiationName, FooTest, ValuesIn(pets));
-
-// The tests from the instantiation above will have these names:
-//
-// * AnotherInstantiationName/FooTest.DoesBlah/0 for "cat"
-// * AnotherInstantiationName/FooTest.DoesBlah/1 for "dog"
-// * AnotherInstantiationName/FooTest.HasBlahBlah/0 for "cat"
-// * AnotherInstantiationName/FooTest.HasBlahBlah/1 for "dog"
-//
-// Please note that INSTANTIATE_TEST_CASE_P will instantiate all tests
-// in the given test case, whether their definitions come before or
-// AFTER the INSTANTIATE_TEST_CASE_P statement.
-//
-// Please also note that generator expressions (including parameters to the
-// generators) are evaluated in InitGoogleTest(), after main() has started.
-// This allows the user on one hand, to adjust generator parameters in order
-// to dynamically determine a set of tests to run and on the other hand,
-// give the user a chance to inspect the generated tests with Google Test
-// reflection API before RUN_ALL_TESTS() is executed.
-//
-// You can see samples/sample7_unittest.cc and samples/sample8_unittest.cc
-// for more examples.
-//
-// In the future, we plan to publish the API for defining new parameter
-// generators. But for now this interface remains part of the internal
-// implementation and is subject to change.
-//
-//
-// A parameterized test fixture must be derived from testing::Test and from
-// testing::WithParamInterface<T>, where T is the type of the parameter
-// values. Inheriting from TestWithParam<T> satisfies that requirement because
-// TestWithParam<T> inherits from both Test and WithParamInterface. In more
-// complicated hierarchies, however, it is occasionally useful to inherit
-// separately from Test and WithParamInterface. For example:
-
-class BaseTest : public ::testing::Test {
- // You can inherit all the usual members for a non-parameterized test
- // fixture here.
-};
-
-class DerivedTest : public BaseTest, public ::testing::WithParamInterface<int> {
- // The usual test fixture members go here too.
-};
-
-TEST_F(BaseTest, HasFoo) {
- // This is an ordinary non-parameterized test.
-}
-
-TEST_P(DerivedTest, DoesBlah) {
- // GetParam works just the same here as if you inherit from TestWithParam.
- EXPECT_TRUE(foo.Blah(GetParam()));
-}
-
-#endif // 0
-
-#include "gtest/internal/gtest-port.h"
-
-#if !GTEST_OS_SYMBIAN
-# include <utility>
-#endif
-
-// scripts/fuse_gtest.py depends on gtest's own header being #included
-// *unconditionally*. Therefore these #includes cannot be moved
-// inside #if GTEST_HAS_PARAM_TEST.
-#include "gtest/internal/gtest-internal.h"
-#include "gtest/internal/gtest-param-util.h"
-#include "gtest/internal/gtest-param-util-generated.h"
-
-#if GTEST_HAS_PARAM_TEST
-
-namespace testing {
-
-// Functions producing parameter generators.
-//
-// Google Test uses these generators to produce parameters for value-
-// parameterized tests. When a parameterized test case is instantiated
-// with a particular generator, Google Test creates and runs tests
-// for each element in the sequence produced by the generator.
-//
-// In the following sample, tests from test case FooTest are instantiated
-// each three times with parameter values 3, 5, and 8:
-//
-// class FooTest : public TestWithParam<int> { ... };
-//
-// TEST_P(FooTest, TestThis) {
-// }
-// TEST_P(FooTest, TestThat) {
-// }
-// INSTANTIATE_TEST_CASE_P(TestSequence, FooTest, Values(3, 5, 8));
-//
-
-// Range() returns generators providing sequences of values in a range.
-//
-// Synopsis:
-// Range(start, end)
-// - returns a generator producing a sequence of values {start, start+1,
-// start+2, ..., }.
-// Range(start, end, step)
-// - returns a generator producing a sequence of values {start, start+step,
-// start+step+step, ..., }.
-// Notes:
-// * The generated sequences never include end. For example, Range(1, 5)
-// returns a generator producing a sequence {1, 2, 3, 4}. Range(1, 9, 2)
-// returns a generator producing {1, 3, 5, 7}.
-// * start and end must have the same type. That type may be any integral or
-// floating-point type or a user defined type satisfying these conditions:
-// * It must be assignable (have operator=() defined).
-// * It must have operator+() (operator+(int-compatible type) for
-// two-operand version).
-// * It must have operator<() defined.
-// Elements in the resulting sequences will also have that type.
-// * Condition start < end must be satisfied in order for resulting sequences
-// to contain any elements.
-//
-template <typename T, typename IncrementT>
-internal::ParamGenerator<T> Range(T start, T end, IncrementT step) {
- return internal::ParamGenerator<T>(
- new internal::RangeGenerator<T, IncrementT>(start, end, step));
-}
-
-template <typename T>
-internal::ParamGenerator<T> Range(T start, T end) {
- return Range(start, end, 1);
-}
-
-// ValuesIn() function allows generation of tests with parameters coming from
-// a container.
-//
-// Synopsis:
-// ValuesIn(const T (&array)[N])
-// - returns a generator producing sequences with elements from
-// a C-style array.
-// ValuesIn(const Container& container)
-// - returns a generator producing sequences with elements from
-// an STL-style container.
-// ValuesIn(Iterator begin, Iterator end)
-// - returns a generator producing sequences with elements from
-// a range [begin, end) defined by a pair of STL-style iterators. These
-// iterators can also be plain C pointers.
-//
-// Please note that ValuesIn copies the values from the containers
-// passed in and keeps them to generate tests in RUN_ALL_TESTS().
-//
-// Examples:
-//
-// This instantiates tests from test case StringTest
-// each with C-string values of "foo", "bar", and "baz":
-//
-// const char* strings[] = {"foo", "bar", "baz"};
-// INSTANTIATE_TEST_CASE_P(StringSequence, SrtingTest, ValuesIn(strings));
-//
-// This instantiates tests from test case StlStringTest
-// each with STL strings with values "a" and "b":
-//
-// ::std::vector< ::std::string> GetParameterStrings() {
-// ::std::vector< ::std::string> v;
-// v.push_back("a");
-// v.push_back("b");
-// return v;
-// }
-//
-// INSTANTIATE_TEST_CASE_P(CharSequence,
-// StlStringTest,
-// ValuesIn(GetParameterStrings()));
-//
-//
-// This will also instantiate tests from CharTest
-// each with parameter values 'a' and 'b':
-//
-// ::std::list<char> GetParameterChars() {
-// ::std::list<char> list;
-// list.push_back('a');
-// list.push_back('b');
-// return list;
-// }
-// ::std::list<char> l = GetParameterChars();
-// INSTANTIATE_TEST_CASE_P(CharSequence2,
-// CharTest,
-// ValuesIn(l.begin(), l.end()));
-//
-template <typename ForwardIterator>
-internal::ParamGenerator<
- typename ::testing::internal::IteratorTraits<ForwardIterator>::value_type>
-ValuesIn(ForwardIterator begin, ForwardIterator end) {
- typedef typename ::testing::internal::IteratorTraits<ForwardIterator>
- ::value_type ParamType;
- return internal::ParamGenerator<ParamType>(
- new internal::ValuesInIteratorRangeGenerator<ParamType>(begin, end));
-}
-
-template <typename T, size_t N>
-internal::ParamGenerator<T> ValuesIn(const T (&array)[N]) {
- return ValuesIn(array, array + N);
-}
-
-template <class Container>
-internal::ParamGenerator<typename Container::value_type> ValuesIn(
- const Container& container) {
- return ValuesIn(container.begin(), container.end());
-}
-
-// Values() allows generating tests from explicitly specified list of
-// parameters.
-//
-// Synopsis:
-// Values(T v1, T v2, ..., T vN)
-// - returns a generator producing sequences with elements v1, v2, ..., vN.
-//
-// For example, this instantiates tests from test case BarTest each
-// with values "one", "two", and "three":
-//
-// INSTANTIATE_TEST_CASE_P(NumSequence, BarTest, Values("one", "two", "three"));
-//
-// This instantiates tests from test case BazTest each with values 1, 2, 3.5.
-// The exact type of values will depend on the type of parameter in BazTest.
-//
-// INSTANTIATE_TEST_CASE_P(FloatingNumbers, BazTest, Values(1, 2, 3.5));
-//
-// Currently, Values() supports from 1 to $n parameters.
-//
-$range i 1..n
-$for i [[
-$range j 1..i
-
-template <$for j, [[typename T$j]]>
-internal::ValueArray$i<$for j, [[T$j]]> Values($for j, [[T$j v$j]]) {
- return internal::ValueArray$i<$for j, [[T$j]]>($for j, [[v$j]]);
-}
-
-]]
-
-// Bool() allows generating tests with parameters in a set of (false, true).
-//
-// Synopsis:
-// Bool()
-// - returns a generator producing sequences with elements {false, true}.
-//
-// It is useful when testing code that depends on Boolean flags. Combinations
-// of multiple flags can be tested when several Bool()'s are combined using
-// Combine() function.
-//
-// In the following example all tests in the test case FlagDependentTest
-// will be instantiated twice with parameters false and true.
-//
-// class FlagDependentTest : public testing::TestWithParam<bool> {
-// virtual void SetUp() {
-// external_flag = GetParam();
-// }
-// }
-// INSTANTIATE_TEST_CASE_P(BoolSequence, FlagDependentTest, Bool());
-//
-inline internal::ParamGenerator<bool> Bool() {
- return Values(false, true);
-}
-
-# if GTEST_HAS_COMBINE
-// Combine() allows the user to combine two or more sequences to produce
-// values of a Cartesian product of those sequences' elements.
-//
-// Synopsis:
-// Combine(gen1, gen2, ..., genN)
-// - returns a generator producing sequences with elements coming from
-// the Cartesian product of elements from the sequences generated by
-// gen1, gen2, ..., genN. The sequence elements will have a type of
-// tuple<T1, T2, ..., TN> where T1, T2, ..., TN are the types
-// of elements from sequences produces by gen1, gen2, ..., genN.
-//
-// Combine can have up to $maxtuple arguments. This number is currently limited
-// by the maximum number of elements in the tuple implementation used by Google
-// Test.
-//
-// Example:
-//
-// This will instantiate tests in test case AnimalTest each one with
-// the parameter values tuple("cat", BLACK), tuple("cat", WHITE),
-// tuple("dog", BLACK), and tuple("dog", WHITE):
-//
-// enum Color { BLACK, GRAY, WHITE };
-// class AnimalTest
-// : public testing::TestWithParam<tuple<const char*, Color> > {...};
-//
-// TEST_P(AnimalTest, AnimalLooksNice) {...}
-//
-// INSTANTIATE_TEST_CASE_P(AnimalVariations, AnimalTest,
-// Combine(Values("cat", "dog"),
-// Values(BLACK, WHITE)));
-//
-// This will instantiate tests in FlagDependentTest with all variations of two
-// Boolean flags:
-//
-// class FlagDependentTest
-// : public testing::TestWithParam<tuple<bool, bool> > {
-// virtual void SetUp() {
-// // Assigns external_flag_1 and external_flag_2 values from the tuple.
-// tie(external_flag_1, external_flag_2) = GetParam();
-// }
-// };
-//
-// TEST_P(FlagDependentTest, TestFeature1) {
-// // Test your code using external_flag_1 and external_flag_2 here.
-// }
-// INSTANTIATE_TEST_CASE_P(TwoBoolSequence, FlagDependentTest,
-// Combine(Bool(), Bool()));
-//
-$range i 2..maxtuple
-$for i [[
-$range j 1..i
-
-template <$for j, [[typename Generator$j]]>
-internal::CartesianProductHolder$i<$for j, [[Generator$j]]> Combine(
- $for j, [[const Generator$j& g$j]]) {
- return internal::CartesianProductHolder$i<$for j, [[Generator$j]]>(
- $for j, [[g$j]]);
-}
-
-]]
-# endif // GTEST_HAS_COMBINE
-
-
-
-# define TEST_P(test_case_name, test_name) \
- class GTEST_TEST_CLASS_NAME_(test_case_name, test_name) \
- : public test_case_name { \
- public: \
- GTEST_TEST_CLASS_NAME_(test_case_name, test_name)() {} \
- virtual void TestBody(); \
- private: \
- static int AddToRegistry() { \
- ::testing::UnitTest::GetInstance()->parameterized_test_registry(). \
- GetTestCasePatternHolder<test_case_name>(\
- #test_case_name, \
- ::testing::internal::CodeLocation(\
- __FILE__, __LINE__))->AddTestPattern(\
- #test_case_name, \
- #test_name, \
- new ::testing::internal::TestMetaFactory< \
- GTEST_TEST_CLASS_NAME_(\
- test_case_name, test_name)>()); \
- return 0; \
- } \
- static int gtest_registering_dummy_ GTEST_ATTRIBUTE_UNUSED_; \
- GTEST_DISALLOW_COPY_AND_ASSIGN_(\
- GTEST_TEST_CLASS_NAME_(test_case_name, test_name)); \
- }; \
- int GTEST_TEST_CLASS_NAME_(test_case_name, \
- test_name)::gtest_registering_dummy_ = \
- GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::AddToRegistry(); \
- void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody()
-
-// The optional last argument to INSTANTIATE_TEST_CASE_P allows the user
-// to specify a function or functor that generates custom test name suffixes
-// based on the test parameters. The function should accept one argument of
-// type testing::TestParamInfo<class ParamType>, and return std::string.
-//
-// testing::PrintToStringParamName is a builtin test suffix generator that
-// returns the value of testing::PrintToString(GetParam()).
-//
-// Note: test names must be non-empty, unique, and may only contain ASCII
-// alphanumeric characters or underscore. Because PrintToString adds quotes
-// to std::string and C strings, it won't work for these types.
-
-# define INSTANTIATE_TEST_CASE_P(prefix, test_case_name, generator, ...) \
- ::testing::internal::ParamGenerator<test_case_name::ParamType> \
- gtest_##prefix##test_case_name##_EvalGenerator_() { return generator; } \
- ::std::string gtest_##prefix##test_case_name##_EvalGenerateName_( \
- const ::testing::TestParamInfo<test_case_name::ParamType>& info) { \
- return ::testing::internal::GetParamNameGen<test_case_name::ParamType> \
- (__VA_ARGS__)(info); \
- } \
- int gtest_##prefix##test_case_name##_dummy_ GTEST_ATTRIBUTE_UNUSED_ = \
- ::testing::UnitTest::GetInstance()->parameterized_test_registry(). \
- GetTestCasePatternHolder<test_case_name>(\
- #test_case_name, \
- ::testing::internal::CodeLocation(\
- __FILE__, __LINE__))->AddTestCaseInstantiation(\
- #prefix, \
- &gtest_##prefix##test_case_name##_EvalGenerator_, \
- &gtest_##prefix##test_case_name##_EvalGenerateName_, \
- __FILE__, __LINE__)
-
-} // namespace testing
-
-#endif // GTEST_HAS_PARAM_TEST
-
-#endif // GTEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_
diff --git a/third_party/aom/third_party/googletest/src/googletest/include/gtest/gtest-printers.h b/third_party/aom/third_party/googletest/src/googletest/include/gtest/gtest-printers.h
deleted file mode 100644
index 8a33164cb..000000000
--- a/third_party/aom/third_party/googletest/src/googletest/include/gtest/gtest-printers.h
+++ /dev/null
@@ -1,993 +0,0 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Author: wan@google.com (Zhanyong Wan)
-
-// Google Test - The Google C++ Testing Framework
-//
-// This file implements a universal value printer that can print a
-// value of any type T:
-//
-// void ::testing::internal::UniversalPrinter<T>::Print(value, ostream_ptr);
-//
-// A user can teach this function how to print a class type T by
-// defining either operator<<() or PrintTo() in the namespace that
-// defines T. More specifically, the FIRST defined function in the
-// following list will be used (assuming T is defined in namespace
-// foo):
-//
-// 1. foo::PrintTo(const T&, ostream*)
-// 2. operator<<(ostream&, const T&) defined in either foo or the
-// global namespace.
-//
-// If none of the above is defined, it will print the debug string of
-// the value if it is a protocol buffer, or print the raw bytes in the
-// value otherwise.
-//
-// To aid debugging: when T is a reference type, the address of the
-// value is also printed; when T is a (const) char pointer, both the
-// pointer value and the NUL-terminated string it points to are
-// printed.
-//
-// We also provide some convenient wrappers:
-//
-// // Prints a value to a string. For a (const or not) char
-// // pointer, the NUL-terminated string (but not the pointer) is
-// // printed.
-// std::string ::testing::PrintToString(const T& value);
-//
-// // Prints a value tersely: for a reference type, the referenced
-// // value (but not the address) is printed; for a (const or not) char
-// // pointer, the NUL-terminated string (but not the pointer) is
-// // printed.
-// void ::testing::internal::UniversalTersePrint(const T& value, ostream*);
-//
-// // Prints value using the type inferred by the compiler. The difference
-// // from UniversalTersePrint() is that this function prints both the
-// // pointer and the NUL-terminated string for a (const or not) char pointer.
-// void ::testing::internal::UniversalPrint(const T& value, ostream*);
-//
-// // Prints the fields of a tuple tersely to a string vector, one
-// // element for each field. Tuple support must be enabled in
-// // gtest-port.h.
-// std::vector<string> UniversalTersePrintTupleFieldsToStrings(
-// const Tuple& value);
-//
-// Known limitation:
-//
-// The print primitives print the elements of an STL-style container
-// using the compiler-inferred type of *iter where iter is a
-// const_iterator of the container. When const_iterator is an input
-// iterator but not a forward iterator, this inferred type may not
-// match value_type, and the print output may be incorrect. In
-// practice, this is rarely a problem as for most containers
-// const_iterator is a forward iterator. We'll fix this if there's an
-// actual need for it. Note that this fix cannot rely on value_type
-// being defined as many user-defined container types don't have
-// value_type.
-
-#ifndef GTEST_INCLUDE_GTEST_GTEST_PRINTERS_H_
-#define GTEST_INCLUDE_GTEST_GTEST_PRINTERS_H_
-
-#include <ostream> // NOLINT
-#include <sstream>
-#include <string>
-#include <utility>
-#include <vector>
-#include "gtest/internal/gtest-port.h"
-#include "gtest/internal/gtest-internal.h"
-
-#if GTEST_HAS_STD_TUPLE_
-# include <tuple>
-#endif
-
-namespace testing {
-
-// Definitions in the 'internal' and 'internal2' name spaces are
-// subject to change without notice. DO NOT USE THEM IN USER CODE!
-namespace internal2 {
-
-// Prints the given number of bytes in the given object to the given
-// ostream.
-GTEST_API_ void PrintBytesInObjectTo(const unsigned char* obj_bytes,
- size_t count,
- ::std::ostream* os);
-
-// For selecting which printer to use when a given type has neither <<
-// nor PrintTo().
-enum TypeKind {
- kProtobuf, // a protobuf type
- kConvertibleToInteger, // a type implicitly convertible to BiggestInt
- // (e.g. a named or unnamed enum type)
- kOtherType // anything else
-};
-
-// TypeWithoutFormatter<T, kTypeKind>::PrintValue(value, os) is called
-// by the universal printer to print a value of type T when neither
-// operator<< nor PrintTo() is defined for T, where kTypeKind is the
-// "kind" of T as defined by enum TypeKind.
-template <typename T, TypeKind kTypeKind>
-class TypeWithoutFormatter {
- public:
- // This default version is called when kTypeKind is kOtherType.
- static void PrintValue(const T& value, ::std::ostream* os) {
- PrintBytesInObjectTo(reinterpret_cast<const unsigned char*>(&value),
- sizeof(value), os);
- }
-};
-
-// We print a protobuf using its ShortDebugString() when the string
-// doesn't exceed this many characters; otherwise we print it using
-// DebugString() for better readability.
-const size_t kProtobufOneLinerMaxLength = 50;
-
-template <typename T>
-class TypeWithoutFormatter<T, kProtobuf> {
- public:
- static void PrintValue(const T& value, ::std::ostream* os) {
- const ::testing::internal::string short_str = value.ShortDebugString();
- const ::testing::internal::string pretty_str =
- short_str.length() <= kProtobufOneLinerMaxLength ?
- short_str : ("\n" + value.DebugString());
- *os << ("<" + pretty_str + ">");
- }
-};
-
-template <typename T>
-class TypeWithoutFormatter<T, kConvertibleToInteger> {
- public:
- // Since T has no << operator or PrintTo() but can be implicitly
- // converted to BiggestInt, we print it as a BiggestInt.
- //
- // Most likely T is an enum type (either named or unnamed), in which
- // case printing it as an integer is the desired behavior. In case
- // T is not an enum, printing it as an integer is the best we can do
- // given that it has no user-defined printer.
- static void PrintValue(const T& value, ::std::ostream* os) {
- const internal::BiggestInt kBigInt = value;
- *os << kBigInt;
- }
-};
-
-// Prints the given value to the given ostream. If the value is a
-// protocol message, its debug string is printed; if it's an enum or
-// of a type implicitly convertible to BiggestInt, it's printed as an
-// integer; otherwise the bytes in the value are printed. This is
-// what UniversalPrinter<T>::Print() does when it knows nothing about
-// type T and T has neither << operator nor PrintTo().
-//
-// A user can override this behavior for a class type Foo by defining
-// a << operator in the namespace where Foo is defined.
-//
-// We put this operator in namespace 'internal2' instead of 'internal'
-// to simplify the implementation, as much code in 'internal' needs to
-// use << in STL, which would conflict with our own << were it defined
-// in 'internal'.
-//
-// Note that this operator<< takes a generic std::basic_ostream<Char,
-// CharTraits> type instead of the more restricted std::ostream. If
-// we define it to take an std::ostream instead, we'll get an
-// "ambiguous overloads" compiler error when trying to print a type
-// Foo that supports streaming to std::basic_ostream<Char,
-// CharTraits>, as the compiler cannot tell whether
-// operator<<(std::ostream&, const T&) or
-// operator<<(std::basic_stream<Char, CharTraits>, const Foo&) is more
-// specific.
-template <typename Char, typename CharTraits, typename T>
-::std::basic_ostream<Char, CharTraits>& operator<<(
- ::std::basic_ostream<Char, CharTraits>& os, const T& x) {
- TypeWithoutFormatter<T,
- (internal::IsAProtocolMessage<T>::value ? kProtobuf :
- internal::ImplicitlyConvertible<const T&, internal::BiggestInt>::value ?
- kConvertibleToInteger : kOtherType)>::PrintValue(x, &os);
- return os;
-}
-
-} // namespace internal2
-} // namespace testing
-
-// This namespace MUST NOT BE NESTED IN ::testing, or the name look-up
-// magic needed for implementing UniversalPrinter won't work.
-namespace testing_internal {
-
-// Used to print a value that is not an STL-style container when the
-// user doesn't define PrintTo() for it.
-template <typename T>
-void DefaultPrintNonContainerTo(const T& value, ::std::ostream* os) {
- // With the following statement, during unqualified name lookup,
- // testing::internal2::operator<< appears as if it was declared in
- // the nearest enclosing namespace that contains both
- // ::testing_internal and ::testing::internal2, i.e. the global
- // namespace. For more details, refer to the C++ Standard section
- // 7.3.4-1 [namespace.udir]. This allows us to fall back onto
- // testing::internal2::operator<< in case T doesn't come with a <<
- // operator.
- //
- // We cannot write 'using ::testing::internal2::operator<<;', which
- // gcc 3.3 fails to compile due to a compiler bug.
- using namespace ::testing::internal2; // NOLINT
-
- // Assuming T is defined in namespace foo, in the next statement,
- // the compiler will consider all of:
- //
- // 1. foo::operator<< (thanks to Koenig look-up),
- // 2. ::operator<< (as the current namespace is enclosed in ::),
- // 3. testing::internal2::operator<< (thanks to the using statement above).
- //
- // The operator<< whose type matches T best will be picked.
- //
- // We deliberately allow #2 to be a candidate, as sometimes it's
- // impossible to define #1 (e.g. when foo is ::std, defining
- // anything in it is undefined behavior unless you are a compiler
- // vendor.).
- *os << value;
-}
-
-} // namespace testing_internal
-
-namespace testing {
-namespace internal {
-
-// FormatForComparison<ToPrint, OtherOperand>::Format(value) formats a
-// value of type ToPrint that is an operand of a comparison assertion
-// (e.g. ASSERT_EQ). OtherOperand is the type of the other operand in
-// the comparison, and is used to help determine the best way to
-// format the value. In particular, when the value is a C string
-// (char pointer) and the other operand is an STL string object, we
-// want to format the C string as a string, since we know it is
-// compared by value with the string object. If the value is a char
-// pointer but the other operand is not an STL string object, we don't
-// know whether the pointer is supposed to point to a NUL-terminated
-// string, and thus want to print it as a pointer to be safe.
-//
-// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-
-// The default case.
-template <typename ToPrint, typename OtherOperand>
-class FormatForComparison {
- public:
- static ::std::string Format(const ToPrint& value) {
- return ::testing::PrintToString(value);
- }
-};
-
-// Array.
-template <typename ToPrint, size_t N, typename OtherOperand>
-class FormatForComparison<ToPrint[N], OtherOperand> {
- public:
- static ::std::string Format(const ToPrint* value) {
- return FormatForComparison<const ToPrint*, OtherOperand>::Format(value);
- }
-};
-
-// By default, print C string as pointers to be safe, as we don't know
-// whether they actually point to a NUL-terminated string.
-
-#define GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(CharType) \
- template <typename OtherOperand> \
- class FormatForComparison<CharType*, OtherOperand> { \
- public: \
- static ::std::string Format(CharType* value) { \
- return ::testing::PrintToString(static_cast<const void*>(value)); \
- } \
- }
-
-GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(char);
-GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(const char);
-GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(wchar_t);
-GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(const wchar_t);
-
-#undef GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_
-
-// If a C string is compared with an STL string object, we know it's meant
-// to point to a NUL-terminated string, and thus can print it as a string.
-
-#define GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(CharType, OtherStringType) \
- template <> \
- class FormatForComparison<CharType*, OtherStringType> { \
- public: \
- static ::std::string Format(CharType* value) { \
- return ::testing::PrintToString(value); \
- } \
- }
-
-GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(char, ::std::string);
-GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const char, ::std::string);
-
-#if GTEST_HAS_GLOBAL_STRING
-GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(char, ::string);
-GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const char, ::string);
-#endif
-
-#if GTEST_HAS_GLOBAL_WSTRING
-GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(wchar_t, ::wstring);
-GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const wchar_t, ::wstring);
-#endif
-
-#if GTEST_HAS_STD_WSTRING
-GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(wchar_t, ::std::wstring);
-GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const wchar_t, ::std::wstring);
-#endif
-
-#undef GTEST_IMPL_FORMAT_C_STRING_AS_STRING_
-
-// Formats a comparison assertion (e.g. ASSERT_EQ, EXPECT_LT, and etc)
-// operand to be used in a failure message. The type (but not value)
-// of the other operand may affect the format. This allows us to
-// print a char* as a raw pointer when it is compared against another
-// char* or void*, and print it as a C string when it is compared
-// against an std::string object, for example.
-//
-// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-template <typename T1, typename T2>
-std::string FormatForComparisonFailureMessage(
- const T1& value, const T2& /* other_operand */) {
- return FormatForComparison<T1, T2>::Format(value);
-}
-
-// UniversalPrinter<T>::Print(value, ostream_ptr) prints the given
-// value to the given ostream. The caller must ensure that
-// 'ostream_ptr' is not NULL, or the behavior is undefined.
-//
-// We define UniversalPrinter as a class template (as opposed to a
-// function template), as we need to partially specialize it for
-// reference types, which cannot be done with function templates.
-template <typename T>
-class UniversalPrinter;
-
-template <typename T>
-void UniversalPrint(const T& value, ::std::ostream* os);
-
-// Used to print an STL-style container when the user doesn't define
-// a PrintTo() for it.
-template <typename C>
-void DefaultPrintTo(IsContainer /* dummy */,
- false_type /* is not a pointer */,
- const C& container, ::std::ostream* os) {
- const size_t kMaxCount = 32; // The maximum number of elements to print.
- *os << '{';
- size_t count = 0;
- for (typename C::const_iterator it = container.begin();
- it != container.end(); ++it, ++count) {
- if (count > 0) {
- *os << ',';
- if (count == kMaxCount) { // Enough has been printed.
- *os << " ...";
- break;
- }
- }
- *os << ' ';
- // We cannot call PrintTo(*it, os) here as PrintTo() doesn't
- // handle *it being a native array.
- internal::UniversalPrint(*it, os);
- }
-
- if (count > 0) {
- *os << ' ';
- }
- *os << '}';
-}
-
-// Used to print a pointer that is neither a char pointer nor a member
-// pointer, when the user doesn't define PrintTo() for it. (A member
-// variable pointer or member function pointer doesn't really point to
-// a location in the address space. Their representation is
-// implementation-defined. Therefore they will be printed as raw
-// bytes.)
-template <typename T>
-void DefaultPrintTo(IsNotContainer /* dummy */,
- true_type /* is a pointer */,
- T* p, ::std::ostream* os) {
- if (p == NULL) {
- *os << "NULL";
- } else {
- // C++ doesn't allow casting from a function pointer to any object
- // pointer.
- //
- // IsTrue() silences warnings: "Condition is always true",
- // "unreachable code".
- if (IsTrue(ImplicitlyConvertible<T*, const void*>::value)) {
- // T is not a function type. We just call << to print p,
- // relying on ADL to pick up user-defined << for their pointer
- // types, if any.
- *os << p;
- } else {
- // T is a function type, so '*os << p' doesn't do what we want
- // (it just prints p as bool). We want to print p as a const
- // void*. However, we cannot cast it to const void* directly,
- // even using reinterpret_cast, as earlier versions of gcc
- // (e.g. 3.4.5) cannot compile the cast when p is a function
- // pointer. Casting to UInt64 first solves the problem.
- *os << reinterpret_cast<const void*>(
- reinterpret_cast<internal::UInt64>(p));
- }
- }
-}
-
-// Used to print a non-container, non-pointer value when the user
-// doesn't define PrintTo() for it.
-template <typename T>
-void DefaultPrintTo(IsNotContainer /* dummy */,
- false_type /* is not a pointer */,
- const T& value, ::std::ostream* os) {
- ::testing_internal::DefaultPrintNonContainerTo(value, os);
-}
-
-// Prints the given value using the << operator if it has one;
-// otherwise prints the bytes in it. This is what
-// UniversalPrinter<T>::Print() does when PrintTo() is not specialized
-// or overloaded for type T.
-//
-// A user can override this behavior for a class type Foo by defining
-// an overload of PrintTo() in the namespace where Foo is defined. We
-// give the user this option as sometimes defining a << operator for
-// Foo is not desirable (e.g. the coding style may prevent doing it,
-// or there is already a << operator but it doesn't do what the user
-// wants).
-template <typename T>
-void PrintTo(const T& value, ::std::ostream* os) {
- // DefaultPrintTo() is overloaded. The type of its first two
- // arguments determine which version will be picked. If T is an
- // STL-style container, the version for container will be called; if
- // T is a pointer, the pointer version will be called; otherwise the
- // generic version will be called.
- //
- // Note that we check for container types here, prior to we check
- // for protocol message types in our operator<<. The rationale is:
- //
- // For protocol messages, we want to give people a chance to
- // override Google Mock's format by defining a PrintTo() or
- // operator<<. For STL containers, other formats can be
- // incompatible with Google Mock's format for the container
- // elements; therefore we check for container types here to ensure
- // that our format is used.
- //
- // The second argument of DefaultPrintTo() is needed to bypass a bug
- // in Symbian's C++ compiler that prevents it from picking the right
- // overload between:
- //
- // PrintTo(const T& x, ...);
- // PrintTo(T* x, ...);
- DefaultPrintTo(IsContainerTest<T>(0), is_pointer<T>(), value, os);
-}
-
-// The following list of PrintTo() overloads tells
-// UniversalPrinter<T>::Print() how to print standard types (built-in
-// types, strings, plain arrays, and pointers).
-
-// Overloads for various char types.
-GTEST_API_ void PrintTo(unsigned char c, ::std::ostream* os);
-GTEST_API_ void PrintTo(signed char c, ::std::ostream* os);
-inline void PrintTo(char c, ::std::ostream* os) {
- // When printing a plain char, we always treat it as unsigned. This
- // way, the output won't be affected by whether the compiler thinks
- // char is signed or not.
- PrintTo(static_cast<unsigned char>(c), os);
-}
-
-// Overloads for other simple built-in types.
-inline void PrintTo(bool x, ::std::ostream* os) {
- *os << (x ? "true" : "false");
-}
-
-// Overload for wchar_t type.
-// Prints a wchar_t as a symbol if it is printable or as its internal
-// code otherwise and also as its decimal code (except for L'\0').
-// The L'\0' char is printed as "L'\\0'". The decimal code is printed
-// as signed integer when wchar_t is implemented by the compiler
-// as a signed type and is printed as an unsigned integer when wchar_t
-// is implemented as an unsigned type.
-GTEST_API_ void PrintTo(wchar_t wc, ::std::ostream* os);
-
-// Overloads for C strings.
-GTEST_API_ void PrintTo(const char* s, ::std::ostream* os);
-inline void PrintTo(char* s, ::std::ostream* os) {
- PrintTo(ImplicitCast_<const char*>(s), os);
-}
-
-// signed/unsigned char is often used for representing binary data, so
-// we print pointers to it as void* to be safe.
-inline void PrintTo(const signed char* s, ::std::ostream* os) {
- PrintTo(ImplicitCast_<const void*>(s), os);
-}
-inline void PrintTo(signed char* s, ::std::ostream* os) {
- PrintTo(ImplicitCast_<const void*>(s), os);
-}
-inline void PrintTo(const unsigned char* s, ::std::ostream* os) {
- PrintTo(ImplicitCast_<const void*>(s), os);
-}
-inline void PrintTo(unsigned char* s, ::std::ostream* os) {
- PrintTo(ImplicitCast_<const void*>(s), os);
-}
-
-// MSVC can be configured to define wchar_t as a typedef of unsigned
-// short. It defines _NATIVE_WCHAR_T_DEFINED when wchar_t is a native
-// type. When wchar_t is a typedef, defining an overload for const
-// wchar_t* would cause unsigned short* be printed as a wide string,
-// possibly causing invalid memory accesses.
-#if !defined(_MSC_VER) || defined(_NATIVE_WCHAR_T_DEFINED)
-// Overloads for wide C strings
-GTEST_API_ void PrintTo(const wchar_t* s, ::std::ostream* os);
-inline void PrintTo(wchar_t* s, ::std::ostream* os) {
- PrintTo(ImplicitCast_<const wchar_t*>(s), os);
-}
-#endif
-
-// Overload for C arrays. Multi-dimensional arrays are printed
-// properly.
-
-// Prints the given number of elements in an array, without printing
-// the curly braces.
-template <typename T>
-void PrintRawArrayTo(const T a[], size_t count, ::std::ostream* os) {
- UniversalPrint(a[0], os);
- for (size_t i = 1; i != count; i++) {
- *os << ", ";
- UniversalPrint(a[i], os);
- }
-}
-
-// Overloads for ::string and ::std::string.
-#if GTEST_HAS_GLOBAL_STRING
-GTEST_API_ void PrintStringTo(const ::string&s, ::std::ostream* os);
-inline void PrintTo(const ::string& s, ::std::ostream* os) {
- PrintStringTo(s, os);
-}
-#endif // GTEST_HAS_GLOBAL_STRING
-
-GTEST_API_ void PrintStringTo(const ::std::string&s, ::std::ostream* os);
-inline void PrintTo(const ::std::string& s, ::std::ostream* os) {
- PrintStringTo(s, os);
-}
-
-// Overloads for ::wstring and ::std::wstring.
-#if GTEST_HAS_GLOBAL_WSTRING
-GTEST_API_ void PrintWideStringTo(const ::wstring&s, ::std::ostream* os);
-inline void PrintTo(const ::wstring& s, ::std::ostream* os) {
- PrintWideStringTo(s, os);
-}
-#endif // GTEST_HAS_GLOBAL_WSTRING
-
-#if GTEST_HAS_STD_WSTRING
-GTEST_API_ void PrintWideStringTo(const ::std::wstring&s, ::std::ostream* os);
-inline void PrintTo(const ::std::wstring& s, ::std::ostream* os) {
- PrintWideStringTo(s, os);
-}
-#endif // GTEST_HAS_STD_WSTRING
-
-#if GTEST_HAS_TR1_TUPLE || GTEST_HAS_STD_TUPLE_
-// Helper function for printing a tuple. T must be instantiated with
-// a tuple type.
-template <typename T>
-void PrintTupleTo(const T& t, ::std::ostream* os);
-#endif // GTEST_HAS_TR1_TUPLE || GTEST_HAS_STD_TUPLE_
-
-#if GTEST_HAS_TR1_TUPLE
-// Overload for ::std::tr1::tuple. Needed for printing function arguments,
-// which are packed as tuples.
-
-// Overloaded PrintTo() for tuples of various arities. We support
-// tuples of up-to 10 fields. The following implementation works
-// regardless of whether tr1::tuple is implemented using the
-// non-standard variadic template feature or not.
-
-inline void PrintTo(const ::std::tr1::tuple<>& t, ::std::ostream* os) {
- PrintTupleTo(t, os);
-}
-
-template <typename T1>
-void PrintTo(const ::std::tr1::tuple<T1>& t, ::std::ostream* os) {
- PrintTupleTo(t, os);
-}
-
-template <typename T1, typename T2>
-void PrintTo(const ::std::tr1::tuple<T1, T2>& t, ::std::ostream* os) {
- PrintTupleTo(t, os);
-}
-
-template <typename T1, typename T2, typename T3>
-void PrintTo(const ::std::tr1::tuple<T1, T2, T3>& t, ::std::ostream* os) {
- PrintTupleTo(t, os);
-}
-
-template <typename T1, typename T2, typename T3, typename T4>
-void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4>& t, ::std::ostream* os) {
- PrintTupleTo(t, os);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5>
-void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4, T5>& t,
- ::std::ostream* os) {
- PrintTupleTo(t, os);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6>
-void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4, T5, T6>& t,
- ::std::ostream* os) {
- PrintTupleTo(t, os);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7>
-void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7>& t,
- ::std::ostream* os) {
- PrintTupleTo(t, os);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8>
-void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8>& t,
- ::std::ostream* os) {
- PrintTupleTo(t, os);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9>
-void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8, T9>& t,
- ::std::ostream* os) {
- PrintTupleTo(t, os);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10>
-void PrintTo(
- const ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10>& t,
- ::std::ostream* os) {
- PrintTupleTo(t, os);
-}
-#endif // GTEST_HAS_TR1_TUPLE
-
-#if GTEST_HAS_STD_TUPLE_
-template <typename... Types>
-void PrintTo(const ::std::tuple<Types...>& t, ::std::ostream* os) {
- PrintTupleTo(t, os);
-}
-#endif // GTEST_HAS_STD_TUPLE_
-
-// Overload for std::pair.
-template <typename T1, typename T2>
-void PrintTo(const ::std::pair<T1, T2>& value, ::std::ostream* os) {
- *os << '(';
- // We cannot use UniversalPrint(value.first, os) here, as T1 may be
- // a reference type. The same for printing value.second.
- UniversalPrinter<T1>::Print(value.first, os);
- *os << ", ";
- UniversalPrinter<T2>::Print(value.second, os);
- *os << ')';
-}
-
-// Implements printing a non-reference type T by letting the compiler
-// pick the right overload of PrintTo() for T.
-template <typename T>
-class UniversalPrinter {
- public:
- // MSVC warns about adding const to a function type, so we want to
- // disable the warning.
- GTEST_DISABLE_MSC_WARNINGS_PUSH_(4180)
-
- // Note: we deliberately don't call this PrintTo(), as that name
- // conflicts with ::testing::internal::PrintTo in the body of the
- // function.
- static void Print(const T& value, ::std::ostream* os) {
- // By default, ::testing::internal::PrintTo() is used for printing
- // the value.
- //
- // Thanks to Koenig look-up, if T is a class and has its own
- // PrintTo() function defined in its namespace, that function will
- // be visible here. Since it is more specific than the generic ones
- // in ::testing::internal, it will be picked by the compiler in the
- // following statement - exactly what we want.
- PrintTo(value, os);
- }
-
- GTEST_DISABLE_MSC_WARNINGS_POP_()
-};
-
-// UniversalPrintArray(begin, len, os) prints an array of 'len'
-// elements, starting at address 'begin'.
-template <typename T>
-void UniversalPrintArray(const T* begin, size_t len, ::std::ostream* os) {
- if (len == 0) {
- *os << "{}";
- } else {
- *os << "{ ";
- const size_t kThreshold = 18;
- const size_t kChunkSize = 8;
- // If the array has more than kThreshold elements, we'll have to
- // omit some details by printing only the first and the last
- // kChunkSize elements.
- // TODO(wan@google.com): let the user control the threshold using a flag.
- if (len <= kThreshold) {
- PrintRawArrayTo(begin, len, os);
- } else {
- PrintRawArrayTo(begin, kChunkSize, os);
- *os << ", ..., ";
- PrintRawArrayTo(begin + len - kChunkSize, kChunkSize, os);
- }
- *os << " }";
- }
-}
-// This overload prints a (const) char array compactly.
-GTEST_API_ void UniversalPrintArray(
- const char* begin, size_t len, ::std::ostream* os);
-
-// This overload prints a (const) wchar_t array compactly.
-GTEST_API_ void UniversalPrintArray(
- const wchar_t* begin, size_t len, ::std::ostream* os);
-
-// Implements printing an array type T[N].
-template <typename T, size_t N>
-class UniversalPrinter<T[N]> {
- public:
- // Prints the given array, omitting some elements when there are too
- // many.
- static void Print(const T (&a)[N], ::std::ostream* os) {
- UniversalPrintArray(a, N, os);
- }
-};
-
-// Implements printing a reference type T&.
-template <typename T>
-class UniversalPrinter<T&> {
- public:
- // MSVC warns about adding const to a function type, so we want to
- // disable the warning.
- GTEST_DISABLE_MSC_WARNINGS_PUSH_(4180)
-
- static void Print(const T& value, ::std::ostream* os) {
- // Prints the address of the value. We use reinterpret_cast here
- // as static_cast doesn't compile when T is a function type.
- *os << "@" << reinterpret_cast<const void*>(&value) << " ";
-
- // Then prints the value itself.
- UniversalPrint(value, os);
- }
-
- GTEST_DISABLE_MSC_WARNINGS_POP_()
-};
-
-// Prints a value tersely: for a reference type, the referenced value
-// (but not the address) is printed; for a (const) char pointer, the
-// NUL-terminated string (but not the pointer) is printed.
-
-template <typename T>
-class UniversalTersePrinter {
- public:
- static void Print(const T& value, ::std::ostream* os) {
- UniversalPrint(value, os);
- }
-};
-template <typename T>
-class UniversalTersePrinter<T&> {
- public:
- static void Print(const T& value, ::std::ostream* os) {
- UniversalPrint(value, os);
- }
-};
-template <typename T, size_t N>
-class UniversalTersePrinter<T[N]> {
- public:
- static void Print(const T (&value)[N], ::std::ostream* os) {
- UniversalPrinter<T[N]>::Print(value, os);
- }
-};
-template <>
-class UniversalTersePrinter<const char*> {
- public:
- static void Print(const char* str, ::std::ostream* os) {
- if (str == NULL) {
- *os << "NULL";
- } else {
- UniversalPrint(string(str), os);
- }
- }
-};
-template <>
-class UniversalTersePrinter<char*> {
- public:
- static void Print(char* str, ::std::ostream* os) {
- UniversalTersePrinter<const char*>::Print(str, os);
- }
-};
-
-#if GTEST_HAS_STD_WSTRING
-template <>
-class UniversalTersePrinter<const wchar_t*> {
- public:
- static void Print(const wchar_t* str, ::std::ostream* os) {
- if (str == NULL) {
- *os << "NULL";
- } else {
- UniversalPrint(::std::wstring(str), os);
- }
- }
-};
-#endif
-
-template <>
-class UniversalTersePrinter<wchar_t*> {
- public:
- static void Print(wchar_t* str, ::std::ostream* os) {
- UniversalTersePrinter<const wchar_t*>::Print(str, os);
- }
-};
-
-template <typename T>
-void UniversalTersePrint(const T& value, ::std::ostream* os) {
- UniversalTersePrinter<T>::Print(value, os);
-}
-
-// Prints a value using the type inferred by the compiler. The
-// difference between this and UniversalTersePrint() is that for a
-// (const) char pointer, this prints both the pointer and the
-// NUL-terminated string.
-template <typename T>
-void UniversalPrint(const T& value, ::std::ostream* os) {
- // A workarond for the bug in VC++ 7.1 that prevents us from instantiating
- // UniversalPrinter with T directly.
- typedef T T1;
- UniversalPrinter<T1>::Print(value, os);
-}
-
-typedef ::std::vector<string> Strings;
-
-// TuplePolicy<TupleT> must provide:
-// - tuple_size
-// size of tuple TupleT.
-// - get<size_t I>(const TupleT& t)
-// static function extracting element I of tuple TupleT.
-// - tuple_element<size_t I>::type
-// type of element I of tuple TupleT.
-template <typename TupleT>
-struct TuplePolicy;
-
-#if GTEST_HAS_TR1_TUPLE
-template <typename TupleT>
-struct TuplePolicy {
- typedef TupleT Tuple;
- static const size_t tuple_size = ::std::tr1::tuple_size<Tuple>::value;
-
- template <size_t I>
- struct tuple_element : ::std::tr1::tuple_element<I, Tuple> {};
-
- template <size_t I>
- static typename AddReference<
- const typename ::std::tr1::tuple_element<I, Tuple>::type>::type get(
- const Tuple& tuple) {
- return ::std::tr1::get<I>(tuple);
- }
-};
-template <typename TupleT>
-const size_t TuplePolicy<TupleT>::tuple_size;
-#endif // GTEST_HAS_TR1_TUPLE
-
-#if GTEST_HAS_STD_TUPLE_
-template <typename... Types>
-struct TuplePolicy< ::std::tuple<Types...> > {
- typedef ::std::tuple<Types...> Tuple;
- static const size_t tuple_size = ::std::tuple_size<Tuple>::value;
-
- template <size_t I>
- struct tuple_element : ::std::tuple_element<I, Tuple> {};
-
- template <size_t I>
- static const typename ::std::tuple_element<I, Tuple>::type& get(
- const Tuple& tuple) {
- return ::std::get<I>(tuple);
- }
-};
-template <typename... Types>
-const size_t TuplePolicy< ::std::tuple<Types...> >::tuple_size;
-#endif // GTEST_HAS_STD_TUPLE_
-
-#if GTEST_HAS_TR1_TUPLE || GTEST_HAS_STD_TUPLE_
-// This helper template allows PrintTo() for tuples and
-// UniversalTersePrintTupleFieldsToStrings() to be defined by
-// induction on the number of tuple fields. The idea is that
-// TuplePrefixPrinter<N>::PrintPrefixTo(t, os) prints the first N
-// fields in tuple t, and can be defined in terms of
-// TuplePrefixPrinter<N - 1>.
-//
-// The inductive case.
-template <size_t N>
-struct TuplePrefixPrinter {
- // Prints the first N fields of a tuple.
- template <typename Tuple>
- static void PrintPrefixTo(const Tuple& t, ::std::ostream* os) {
- TuplePrefixPrinter<N - 1>::PrintPrefixTo(t, os);
- GTEST_INTENTIONAL_CONST_COND_PUSH_()
- if (N > 1) {
- GTEST_INTENTIONAL_CONST_COND_POP_()
- *os << ", ";
- }
- UniversalPrinter<
- typename TuplePolicy<Tuple>::template tuple_element<N - 1>::type>
- ::Print(TuplePolicy<Tuple>::template get<N - 1>(t), os);
- }
-
- // Tersely prints the first N fields of a tuple to a string vector,
- // one element for each field.
- template <typename Tuple>
- static void TersePrintPrefixToStrings(const Tuple& t, Strings* strings) {
- TuplePrefixPrinter<N - 1>::TersePrintPrefixToStrings(t, strings);
- ::std::stringstream ss;
- UniversalTersePrint(TuplePolicy<Tuple>::template get<N - 1>(t), &ss);
- strings->push_back(ss.str());
- }
-};
-
-// Base case.
-template <>
-struct TuplePrefixPrinter<0> {
- template <typename Tuple>
- static void PrintPrefixTo(const Tuple&, ::std::ostream*) {}
-
- template <typename Tuple>
- static void TersePrintPrefixToStrings(const Tuple&, Strings*) {}
-};
-
-// Helper function for printing a tuple.
-// Tuple must be either std::tr1::tuple or std::tuple type.
-template <typename Tuple>
-void PrintTupleTo(const Tuple& t, ::std::ostream* os) {
- *os << "(";
- TuplePrefixPrinter<TuplePolicy<Tuple>::tuple_size>::PrintPrefixTo(t, os);
- *os << ")";
-}
-
-// Prints the fields of a tuple tersely to a string vector, one
-// element for each field. See the comment before
-// UniversalTersePrint() for how we define "tersely".
-template <typename Tuple>
-Strings UniversalTersePrintTupleFieldsToStrings(const Tuple& value) {
- Strings result;
- TuplePrefixPrinter<TuplePolicy<Tuple>::tuple_size>::
- TersePrintPrefixToStrings(value, &result);
- return result;
-}
-#endif // GTEST_HAS_TR1_TUPLE || GTEST_HAS_STD_TUPLE_
-
-} // namespace internal
-
-template <typename T>
-::std::string PrintToString(const T& value) {
- ::std::stringstream ss;
- internal::UniversalTersePrinter<T>::Print(value, &ss);
- return ss.str();
-}
-
-} // namespace testing
-
-// Include any custom printer added by the local installation.
-// We must include this header at the end to make sure it can use the
-// declarations from this file.
-#include "gtest/internal/custom/gtest-printers.h"
-
-#endif // GTEST_INCLUDE_GTEST_GTEST_PRINTERS_H_
diff --git a/third_party/aom/third_party/googletest/src/googletest/include/gtest/gtest-spi.h b/third_party/aom/third_party/googletest/src/googletest/include/gtest/gtest-spi.h
deleted file mode 100644
index f63fa9a1b..000000000
--- a/third_party/aom/third_party/googletest/src/googletest/include/gtest/gtest-spi.h
+++ /dev/null
@@ -1,232 +0,0 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Author: wan@google.com (Zhanyong Wan)
-//
-// Utilities for testing Google Test itself and code that uses Google Test
-// (e.g. frameworks built on top of Google Test).
-
-#ifndef GTEST_INCLUDE_GTEST_GTEST_SPI_H_
-#define GTEST_INCLUDE_GTEST_GTEST_SPI_H_
-
-#include "gtest/gtest.h"
-
-namespace testing {
-
-// This helper class can be used to mock out Google Test failure reporting
-// so that we can test Google Test or code that builds on Google Test.
-//
-// An object of this class appends a TestPartResult object to the
-// TestPartResultArray object given in the constructor whenever a Google Test
-// failure is reported. It can either intercept only failures that are
-// generated in the same thread that created this object or it can intercept
-// all generated failures. The scope of this mock object can be controlled with
-// the second argument to the two arguments constructor.
-class GTEST_API_ ScopedFakeTestPartResultReporter
- : public TestPartResultReporterInterface {
- public:
- // The two possible mocking modes of this object.
- enum InterceptMode {
- INTERCEPT_ONLY_CURRENT_THREAD, // Intercepts only thread local failures.
- INTERCEPT_ALL_THREADS // Intercepts all failures.
- };
-
- // The c'tor sets this object as the test part result reporter used
- // by Google Test. The 'result' parameter specifies where to report the
- // results. This reporter will only catch failures generated in the current
- // thread. DEPRECATED
- explicit ScopedFakeTestPartResultReporter(TestPartResultArray* result);
-
- // Same as above, but you can choose the interception scope of this object.
- ScopedFakeTestPartResultReporter(InterceptMode intercept_mode,
- TestPartResultArray* result);
-
- // The d'tor restores the previous test part result reporter.
- virtual ~ScopedFakeTestPartResultReporter();
-
- // Appends the TestPartResult object to the TestPartResultArray
- // received in the constructor.
- //
- // This method is from the TestPartResultReporterInterface
- // interface.
- virtual void ReportTestPartResult(const TestPartResult& result);
- private:
- void Init();
-
- const InterceptMode intercept_mode_;
- TestPartResultReporterInterface* old_reporter_;
- TestPartResultArray* const result_;
-
- GTEST_DISALLOW_COPY_AND_ASSIGN_(ScopedFakeTestPartResultReporter);
-};
-
-namespace internal {
-
-// A helper class for implementing EXPECT_FATAL_FAILURE() and
-// EXPECT_NONFATAL_FAILURE(). Its destructor verifies that the given
-// TestPartResultArray contains exactly one failure that has the given
-// type and contains the given substring. If that's not the case, a
-// non-fatal failure will be generated.
-class GTEST_API_ SingleFailureChecker {
- public:
- // The constructor remembers the arguments.
- SingleFailureChecker(const TestPartResultArray* results,
- TestPartResult::Type type,
- const string& substr);
- ~SingleFailureChecker();
- private:
- const TestPartResultArray* const results_;
- const TestPartResult::Type type_;
- const string substr_;
-
- GTEST_DISALLOW_COPY_AND_ASSIGN_(SingleFailureChecker);
-};
-
-} // namespace internal
-
-} // namespace testing
-
-// A set of macros for testing Google Test assertions or code that's expected
-// to generate Google Test fatal failures. It verifies that the given
-// statement will cause exactly one fatal Google Test failure with 'substr'
-// being part of the failure message.
-//
-// There are two different versions of this macro. EXPECT_FATAL_FAILURE only
-// affects and considers failures generated in the current thread and
-// EXPECT_FATAL_FAILURE_ON_ALL_THREADS does the same but for all threads.
-//
-// The verification of the assertion is done correctly even when the statement
-// throws an exception or aborts the current function.
-//
-// Known restrictions:
-// - 'statement' cannot reference local non-static variables or
-// non-static members of the current object.
-// - 'statement' cannot return a value.
-// - You cannot stream a failure message to this macro.
-//
-// Note that even though the implementations of the following two
-// macros are much alike, we cannot refactor them to use a common
-// helper macro, due to some peculiarity in how the preprocessor
-// works. The AcceptsMacroThatExpandsToUnprotectedComma test in
-// gtest_unittest.cc will fail to compile if we do that.
-#define EXPECT_FATAL_FAILURE(statement, substr) \
- do { \
- class GTestExpectFatalFailureHelper {\
- public:\
- static void Execute() { statement; }\
- };\
- ::testing::TestPartResultArray gtest_failures;\
- ::testing::internal::SingleFailureChecker gtest_checker(\
- &gtest_failures, ::testing::TestPartResult::kFatalFailure, (substr));\
- {\
- ::testing::ScopedFakeTestPartResultReporter gtest_reporter(\
- ::testing::ScopedFakeTestPartResultReporter:: \
- INTERCEPT_ONLY_CURRENT_THREAD, &gtest_failures);\
- GTestExpectFatalFailureHelper::Execute();\
- }\
- } while (::testing::internal::AlwaysFalse())
-
-#define EXPECT_FATAL_FAILURE_ON_ALL_THREADS(statement, substr) \
- do { \
- class GTestExpectFatalFailureHelper {\
- public:\
- static void Execute() { statement; }\
- };\
- ::testing::TestPartResultArray gtest_failures;\
- ::testing::internal::SingleFailureChecker gtest_checker(\
- &gtest_failures, ::testing::TestPartResult::kFatalFailure, (substr));\
- {\
- ::testing::ScopedFakeTestPartResultReporter gtest_reporter(\
- ::testing::ScopedFakeTestPartResultReporter:: \
- INTERCEPT_ALL_THREADS, &gtest_failures);\
- GTestExpectFatalFailureHelper::Execute();\
- }\
- } while (::testing::internal::AlwaysFalse())
-
-// A macro for testing Google Test assertions or code that's expected to
-// generate Google Test non-fatal failures. It asserts that the given
-// statement will cause exactly one non-fatal Google Test failure with 'substr'
-// being part of the failure message.
-//
-// There are two different versions of this macro. EXPECT_NONFATAL_FAILURE only
-// affects and considers failures generated in the current thread and
-// EXPECT_NONFATAL_FAILURE_ON_ALL_THREADS does the same but for all threads.
-//
-// 'statement' is allowed to reference local variables and members of
-// the current object.
-//
-// The verification of the assertion is done correctly even when the statement
-// throws an exception or aborts the current function.
-//
-// Known restrictions:
-// - You cannot stream a failure message to this macro.
-//
-// Note that even though the implementations of the following two
-// macros are much alike, we cannot refactor them to use a common
-// helper macro, due to some peculiarity in how the preprocessor
-// works. If we do that, the code won't compile when the user gives
-// EXPECT_NONFATAL_FAILURE() a statement that contains a macro that
-// expands to code containing an unprotected comma. The
-// AcceptsMacroThatExpandsToUnprotectedComma test in gtest_unittest.cc
-// catches that.
-//
-// For the same reason, we have to write
-// if (::testing::internal::AlwaysTrue()) { statement; }
-// instead of
-// GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement)
-// to avoid an MSVC warning on unreachable code.
-#define EXPECT_NONFATAL_FAILURE(statement, substr) \
- do {\
- ::testing::TestPartResultArray gtest_failures;\
- ::testing::internal::SingleFailureChecker gtest_checker(\
- &gtest_failures, ::testing::TestPartResult::kNonFatalFailure, \
- (substr));\
- {\
- ::testing::ScopedFakeTestPartResultReporter gtest_reporter(\
- ::testing::ScopedFakeTestPartResultReporter:: \
- INTERCEPT_ONLY_CURRENT_THREAD, &gtest_failures);\
- if (::testing::internal::AlwaysTrue()) { statement; }\
- }\
- } while (::testing::internal::AlwaysFalse())
-
-#define EXPECT_NONFATAL_FAILURE_ON_ALL_THREADS(statement, substr) \
- do {\
- ::testing::TestPartResultArray gtest_failures;\
- ::testing::internal::SingleFailureChecker gtest_checker(\
- &gtest_failures, ::testing::TestPartResult::kNonFatalFailure, \
- (substr));\
- {\
- ::testing::ScopedFakeTestPartResultReporter gtest_reporter(\
- ::testing::ScopedFakeTestPartResultReporter::INTERCEPT_ALL_THREADS, \
- &gtest_failures);\
- if (::testing::internal::AlwaysTrue()) { statement; }\
- }\
- } while (::testing::internal::AlwaysFalse())
-
-#endif // GTEST_INCLUDE_GTEST_GTEST_SPI_H_
diff --git a/third_party/aom/third_party/googletest/src/googletest/include/gtest/gtest-test-part.h b/third_party/aom/third_party/googletest/src/googletest/include/gtest/gtest-test-part.h
deleted file mode 100644
index 77eb84483..000000000
--- a/third_party/aom/third_party/googletest/src/googletest/include/gtest/gtest-test-part.h
+++ /dev/null
@@ -1,179 +0,0 @@
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Author: mheule@google.com (Markus Heule)
-//
-
-#ifndef GTEST_INCLUDE_GTEST_GTEST_TEST_PART_H_
-#define GTEST_INCLUDE_GTEST_GTEST_TEST_PART_H_
-
-#include <iosfwd>
-#include <vector>
-#include "gtest/internal/gtest-internal.h"
-#include "gtest/internal/gtest-string.h"
-
-namespace testing {
-
-// A copyable object representing the result of a test part (i.e. an
-// assertion or an explicit FAIL(), ADD_FAILURE(), or SUCCESS()).
-//
-// Don't inherit from TestPartResult as its destructor is not virtual.
-class GTEST_API_ TestPartResult {
- public:
- // The possible outcomes of a test part (i.e. an assertion or an
- // explicit SUCCEED(), FAIL(), or ADD_FAILURE()).
- enum Type {
- kSuccess, // Succeeded.
- kNonFatalFailure, // Failed but the test can continue.
- kFatalFailure // Failed and the test should be terminated.
- };
-
- // C'tor. TestPartResult does NOT have a default constructor.
- // Always use this constructor (with parameters) to create a
- // TestPartResult object.
- TestPartResult(Type a_type,
- const char* a_file_name,
- int a_line_number,
- const char* a_message)
- : type_(a_type),
- file_name_(a_file_name == NULL ? "" : a_file_name),
- line_number_(a_line_number),
- summary_(ExtractSummary(a_message)),
- message_(a_message) {
- }
-
- // Gets the outcome of the test part.
- Type type() const { return type_; }
-
- // Gets the name of the source file where the test part took place, or
- // NULL if it's unknown.
- const char* file_name() const {
- return file_name_.empty() ? NULL : file_name_.c_str();
- }
-
- // Gets the line in the source file where the test part took place,
- // or -1 if it's unknown.
- int line_number() const { return line_number_; }
-
- // Gets the summary of the failure message.
- const char* summary() const { return summary_.c_str(); }
-
- // Gets the message associated with the test part.
- const char* message() const { return message_.c_str(); }
-
- // Returns true iff the test part passed.
- bool passed() const { return type_ == kSuccess; }
-
- // Returns true iff the test part failed.
- bool failed() const { return type_ != kSuccess; }
-
- // Returns true iff the test part non-fatally failed.
- bool nonfatally_failed() const { return type_ == kNonFatalFailure; }
-
- // Returns true iff the test part fatally failed.
- bool fatally_failed() const { return type_ == kFatalFailure; }
-
- private:
- Type type_;
-
- // Gets the summary of the failure message by omitting the stack
- // trace in it.
- static std::string ExtractSummary(const char* message);
-
- // The name of the source file where the test part took place, or
- // "" if the source file is unknown.
- std::string file_name_;
- // The line in the source file where the test part took place, or -1
- // if the line number is unknown.
- int line_number_;
- std::string summary_; // The test failure summary.
- std::string message_; // The test failure message.
-};
-
-// Prints a TestPartResult object.
-std::ostream& operator<<(std::ostream& os, const TestPartResult& result);
-
-// An array of TestPartResult objects.
-//
-// Don't inherit from TestPartResultArray as its destructor is not
-// virtual.
-class GTEST_API_ TestPartResultArray {
- public:
- TestPartResultArray() {}
-
- // Appends the given TestPartResult to the array.
- void Append(const TestPartResult& result);
-
- // Returns the TestPartResult at the given index (0-based).
- const TestPartResult& GetTestPartResult(int index) const;
-
- // Returns the number of TestPartResult objects in the array.
- int size() const;
-
- private:
- std::vector<TestPartResult> array_;
-
- GTEST_DISALLOW_COPY_AND_ASSIGN_(TestPartResultArray);
-};
-
-// This interface knows how to report a test part result.
-class TestPartResultReporterInterface {
- public:
- virtual ~TestPartResultReporterInterface() {}
-
- virtual void ReportTestPartResult(const TestPartResult& result) = 0;
-};
-
-namespace internal {
-
-// This helper class is used by {ASSERT|EXPECT}_NO_FATAL_FAILURE to check if a
-// statement generates new fatal failures. To do so it registers itself as the
-// current test part result reporter. Besides checking if fatal failures were
-// reported, it only delegates the reporting to the former result reporter.
-// The original result reporter is restored in the destructor.
-// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-class GTEST_API_ HasNewFatalFailureHelper
- : public TestPartResultReporterInterface {
- public:
- HasNewFatalFailureHelper();
- virtual ~HasNewFatalFailureHelper();
- virtual void ReportTestPartResult(const TestPartResult& result);
- bool has_new_fatal_failure() const { return has_new_fatal_failure_; }
- private:
- bool has_new_fatal_failure_;
- TestPartResultReporterInterface* original_reporter_;
-
- GTEST_DISALLOW_COPY_AND_ASSIGN_(HasNewFatalFailureHelper);
-};
-
-} // namespace internal
-
-} // namespace testing
-
-#endif // GTEST_INCLUDE_GTEST_GTEST_TEST_PART_H_
diff --git a/third_party/aom/third_party/googletest/src/googletest/include/gtest/gtest-typed-test.h b/third_party/aom/third_party/googletest/src/googletest/include/gtest/gtest-typed-test.h
deleted file mode 100644
index 5f69d5678..000000000
--- a/third_party/aom/third_party/googletest/src/googletest/include/gtest/gtest-typed-test.h
+++ /dev/null
@@ -1,263 +0,0 @@
-// Copyright 2008 Google Inc.
-// All Rights Reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Author: wan@google.com (Zhanyong Wan)
-
-#ifndef GTEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_
-#define GTEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_
-
-// This header implements typed tests and type-parameterized tests.
-
-// Typed (aka type-driven) tests repeat the same test for types in a
-// list. You must know which types you want to test with when writing
-// typed tests. Here's how you do it:
-
-#if 0
-
-// First, define a fixture class template. It should be parameterized
-// by a type. Remember to derive it from testing::Test.
-template <typename T>
-class FooTest : public testing::Test {
- public:
- ...
- typedef std::list<T> List;
- static T shared_;
- T value_;
-};
-
-// Next, associate a list of types with the test case, which will be
-// repeated for each type in the list. The typedef is necessary for
-// the macro to parse correctly.
-typedef testing::Types<char, int, unsigned int> MyTypes;
-TYPED_TEST_CASE(FooTest, MyTypes);
-
-// If the type list contains only one type, you can write that type
-// directly without Types<...>:
-// TYPED_TEST_CASE(FooTest, int);
-
-// Then, use TYPED_TEST() instead of TEST_F() to define as many typed
-// tests for this test case as you want.
-TYPED_TEST(FooTest, DoesBlah) {
- // Inside a test, refer to TypeParam to get the type parameter.
- // Since we are inside a derived class template, C++ requires use to
- // visit the members of FooTest via 'this'.
- TypeParam n = this->value_;
-
- // To visit static members of the fixture, add the TestFixture::
- // prefix.
- n += TestFixture::shared_;
-
- // To refer to typedefs in the fixture, add the "typename
- // TestFixture::" prefix.
- typename TestFixture::List values;
- values.push_back(n);
- ...
-}
-
-TYPED_TEST(FooTest, HasPropertyA) { ... }
-
-#endif // 0
-
-// Type-parameterized tests are abstract test patterns parameterized
-// by a type. Compared with typed tests, type-parameterized tests
-// allow you to define the test pattern without knowing what the type
-// parameters are. The defined pattern can be instantiated with
-// different types any number of times, in any number of translation
-// units.
-//
-// If you are designing an interface or concept, you can define a
-// suite of type-parameterized tests to verify properties that any
-// valid implementation of the interface/concept should have. Then,
-// each implementation can easily instantiate the test suite to verify
-// that it conforms to the requirements, without having to write
-// similar tests repeatedly. Here's an example:
-
-#if 0
-
-// First, define a fixture class template. It should be parameterized
-// by a type. Remember to derive it from testing::Test.
-template <typename T>
-class FooTest : public testing::Test {
- ...
-};
-
-// Next, declare that you will define a type-parameterized test case
-// (the _P suffix is for "parameterized" or "pattern", whichever you
-// prefer):
-TYPED_TEST_CASE_P(FooTest);
-
-// Then, use TYPED_TEST_P() to define as many type-parameterized tests
-// for this type-parameterized test case as you want.
-TYPED_TEST_P(FooTest, DoesBlah) {
- // Inside a test, refer to TypeParam to get the type parameter.
- TypeParam n = 0;
- ...
-}
-
-TYPED_TEST_P(FooTest, HasPropertyA) { ... }
-
-// Now the tricky part: you need to register all test patterns before
-// you can instantiate them. The first argument of the macro is the
-// test case name; the rest are the names of the tests in this test
-// case.
-REGISTER_TYPED_TEST_CASE_P(FooTest,
- DoesBlah, HasPropertyA);
-
-// Finally, you are free to instantiate the pattern with the types you
-// want. If you put the above code in a header file, you can #include
-// it in multiple C++ source files and instantiate it multiple times.
-//
-// To distinguish different instances of the pattern, the first
-// argument to the INSTANTIATE_* macro is a prefix that will be added
-// to the actual test case name. Remember to pick unique prefixes for
-// different instances.
-typedef testing::Types<char, int, unsigned int> MyTypes;
-INSTANTIATE_TYPED_TEST_CASE_P(My, FooTest, MyTypes);
-
-// If the type list contains only one type, you can write that type
-// directly without Types<...>:
-// INSTANTIATE_TYPED_TEST_CASE_P(My, FooTest, int);
-
-#endif // 0
-
-#include "gtest/internal/gtest-port.h"
-#include "gtest/internal/gtest-type-util.h"
-
-// Implements typed tests.
-
-#if GTEST_HAS_TYPED_TEST
-
-// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
-//
-// Expands to the name of the typedef for the type parameters of the
-// given test case.
-# define GTEST_TYPE_PARAMS_(TestCaseName) gtest_type_params_##TestCaseName##_
-
-// The 'Types' template argument below must have spaces around it
-// since some compilers may choke on '>>' when passing a template
-// instance (e.g. Types<int>)
-# define TYPED_TEST_CASE(CaseName, Types) \
- typedef ::testing::internal::TypeList< Types >::type \
- GTEST_TYPE_PARAMS_(CaseName)
-
-# define TYPED_TEST(CaseName, TestName) \
- template <typename gtest_TypeParam_> \
- class GTEST_TEST_CLASS_NAME_(CaseName, TestName) \
- : public CaseName<gtest_TypeParam_> { \
- private: \
- typedef CaseName<gtest_TypeParam_> TestFixture; \
- typedef gtest_TypeParam_ TypeParam; \
- virtual void TestBody(); \
- }; \
- bool gtest_##CaseName##_##TestName##_registered_ GTEST_ATTRIBUTE_UNUSED_ = \
- ::testing::internal::TypeParameterizedTest< \
- CaseName, \
- ::testing::internal::TemplateSel< \
- GTEST_TEST_CLASS_NAME_(CaseName, TestName)>, \
- GTEST_TYPE_PARAMS_(CaseName)>::Register(\
- "", ::testing::internal::CodeLocation(__FILE__, __LINE__), \
- #CaseName, #TestName, 0); \
- template <typename gtest_TypeParam_> \
- void GTEST_TEST_CLASS_NAME_(CaseName, TestName)<gtest_TypeParam_>::TestBody()
-
-#endif // GTEST_HAS_TYPED_TEST
-
-// Implements type-parameterized tests.
-
-#if GTEST_HAS_TYPED_TEST_P
-
-// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
-//
-// Expands to the namespace name that the type-parameterized tests for
-// the given type-parameterized test case are defined in. The exact
-// name of the namespace is subject to change without notice.
-# define GTEST_CASE_NAMESPACE_(TestCaseName) \
- gtest_case_##TestCaseName##_
-
-// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
-//
-// Expands to the name of the variable used to remember the names of
-// the defined tests in the given test case.
-# define GTEST_TYPED_TEST_CASE_P_STATE_(TestCaseName) \
- gtest_typed_test_case_p_state_##TestCaseName##_
-
-// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE DIRECTLY.
-//
-// Expands to the name of the variable used to remember the names of
-// the registered tests in the given test case.
-# define GTEST_REGISTERED_TEST_NAMES_(TestCaseName) \
- gtest_registered_test_names_##TestCaseName##_
-
-// The variables defined in the type-parameterized test macros are
-// static as typically these macros are used in a .h file that can be
-// #included in multiple translation units linked together.
-# define TYPED_TEST_CASE_P(CaseName) \
- static ::testing::internal::TypedTestCasePState \
- GTEST_TYPED_TEST_CASE_P_STATE_(CaseName)
-
-# define TYPED_TEST_P(CaseName, TestName) \
- namespace GTEST_CASE_NAMESPACE_(CaseName) { \
- template <typename gtest_TypeParam_> \
- class TestName : public CaseName<gtest_TypeParam_> { \
- private: \
- typedef CaseName<gtest_TypeParam_> TestFixture; \
- typedef gtest_TypeParam_ TypeParam; \
- virtual void TestBody(); \
- }; \
- static bool gtest_##TestName##_defined_ GTEST_ATTRIBUTE_UNUSED_ = \
- GTEST_TYPED_TEST_CASE_P_STATE_(CaseName).AddTestName(\
- __FILE__, __LINE__, #CaseName, #TestName); \
- } \
- template <typename gtest_TypeParam_> \
- void GTEST_CASE_NAMESPACE_(CaseName)::TestName<gtest_TypeParam_>::TestBody()
-
-# define REGISTER_TYPED_TEST_CASE_P(CaseName, ...) \
- namespace GTEST_CASE_NAMESPACE_(CaseName) { \
- typedef ::testing::internal::Templates<__VA_ARGS__>::type gtest_AllTests_; \
- } \
- static const char* const GTEST_REGISTERED_TEST_NAMES_(CaseName) = \
- GTEST_TYPED_TEST_CASE_P_STATE_(CaseName).VerifyRegisteredTestNames(\
- __FILE__, __LINE__, #__VA_ARGS__)
-
-// The 'Types' template argument below must have spaces around it
-// since some compilers may choke on '>>' when passing a template
-// instance (e.g. Types<int>)
-# define INSTANTIATE_TYPED_TEST_CASE_P(Prefix, CaseName, Types) \
- bool gtest_##Prefix##_##CaseName GTEST_ATTRIBUTE_UNUSED_ = \
- ::testing::internal::TypeParameterizedTestCase<CaseName, \
- GTEST_CASE_NAMESPACE_(CaseName)::gtest_AllTests_, \
- ::testing::internal::TypeList< Types >::type>::Register(\
- #Prefix, \
- ::testing::internal::CodeLocation(__FILE__, __LINE__), \
- &GTEST_TYPED_TEST_CASE_P_STATE_(CaseName), \
- #CaseName, GTEST_REGISTERED_TEST_NAMES_(CaseName))
-
-#endif // GTEST_HAS_TYPED_TEST_P
-
-#endif // GTEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_
diff --git a/third_party/aom/third_party/googletest/src/googletest/include/gtest/gtest.h b/third_party/aom/third_party/googletest/src/googletest/include/gtest/gtest.h
deleted file mode 100644
index f846c5bd6..000000000
--- a/third_party/aom/third_party/googletest/src/googletest/include/gtest/gtest.h
+++ /dev/null
@@ -1,2236 +0,0 @@
-// Copyright 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Author: wan@google.com (Zhanyong Wan)
-//
-// The Google C++ Testing Framework (Google Test)
-//
-// This header file defines the public API for Google Test. It should be
-// included by any test program that uses Google Test.
-//
-// IMPORTANT NOTE: Due to limitation of the C++ language, we have to
-// leave some internal implementation details in this header file.
-// They are clearly marked by comments like this:
-//
-// // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-//
-// Such code is NOT meant to be used by a user directly, and is subject
-// to CHANGE WITHOUT NOTICE. Therefore DO NOT DEPEND ON IT in a user
-// program!
-//
-// Acknowledgment: Google Test borrowed the idea of automatic test
-// registration from Barthelemy Dagenais' (barthelemy@prologique.com)
-// easyUnit framework.
-
-#ifndef GTEST_INCLUDE_GTEST_GTEST_H_
-#define GTEST_INCLUDE_GTEST_GTEST_H_
-
-#include <limits>
-#include <ostream>
-#include <vector>
-
-#include "gtest/internal/gtest-internal.h"
-#include "gtest/internal/gtest-string.h"
-#include "gtest/gtest-death-test.h"
-#include "gtest/gtest-message.h"
-#include "gtest/gtest-param-test.h"
-#include "gtest/gtest-printers.h"
-#include "gtest/gtest_prod.h"
-#include "gtest/gtest-test-part.h"
-#include "gtest/gtest-typed-test.h"
-
-// Depending on the platform, different string classes are available.
-// On Linux, in addition to ::std::string, Google also makes use of
-// class ::string, which has the same interface as ::std::string, but
-// has a different implementation.
-//
-// You can define GTEST_HAS_GLOBAL_STRING to 1 to indicate that
-// ::string is available AND is a distinct type to ::std::string, or
-// define it to 0 to indicate otherwise.
-//
-// If ::std::string and ::string are the same class on your platform
-// due to aliasing, you should define GTEST_HAS_GLOBAL_STRING to 0.
-//
-// If you do not define GTEST_HAS_GLOBAL_STRING, it is defined
-// heuristically.
-
-namespace testing {
-
-// Declares the flags.
-
-// This flag temporary enables the disabled tests.
-GTEST_DECLARE_bool_(also_run_disabled_tests);
-
-// This flag brings the debugger on an assertion failure.
-GTEST_DECLARE_bool_(break_on_failure);
-
-// This flag controls whether Google Test catches all test-thrown exceptions
-// and logs them as failures.
-GTEST_DECLARE_bool_(catch_exceptions);
-
-// This flag enables using colors in terminal output. Available values are
-// "yes" to enable colors, "no" (disable colors), or "auto" (the default)
-// to let Google Test decide.
-GTEST_DECLARE_string_(color);
-
-// This flag sets up the filter to select by name using a glob pattern
-// the tests to run. If the filter is not given all tests are executed.
-GTEST_DECLARE_string_(filter);
-
-// This flag causes the Google Test to list tests. None of the tests listed
-// are actually run if the flag is provided.
-GTEST_DECLARE_bool_(list_tests);
-
-// This flag controls whether Google Test emits a detailed XML report to a file
-// in addition to its normal textual output.
-GTEST_DECLARE_string_(output);
-
-// This flags control whether Google Test prints the elapsed time for each
-// test.
-GTEST_DECLARE_bool_(print_time);
-
-// This flag specifies the random number seed.
-GTEST_DECLARE_int32_(random_seed);
-
-// This flag sets how many times the tests are repeated. The default value
-// is 1. If the value is -1 the tests are repeating forever.
-GTEST_DECLARE_int32_(repeat);
-
-// This flag controls whether Google Test includes Google Test internal
-// stack frames in failure stack traces.
-GTEST_DECLARE_bool_(show_internal_stack_frames);
-
-// When this flag is specified, tests' order is randomized on every iteration.
-GTEST_DECLARE_bool_(shuffle);
-
-// This flag specifies the maximum number of stack frames to be
-// printed in a failure message.
-GTEST_DECLARE_int32_(stack_trace_depth);
-
-// When this flag is specified, a failed assertion will throw an
-// exception if exceptions are enabled, or exit the program with a
-// non-zero code otherwise.
-GTEST_DECLARE_bool_(throw_on_failure);
-
-// When this flag is set with a "host:port" string, on supported
-// platforms test results are streamed to the specified port on
-// the specified host machine.
-GTEST_DECLARE_string_(stream_result_to);
-
-// The upper limit for valid stack trace depths.
-const int kMaxStackTraceDepth = 100;
-
-namespace internal {
-
-class AssertHelper;
-class DefaultGlobalTestPartResultReporter;
-class ExecDeathTest;
-class NoExecDeathTest;
-class FinalSuccessChecker;
-class GTestFlagSaver;
-class StreamingListenerTest;
-class TestResultAccessor;
-class TestEventListenersAccessor;
-class TestEventRepeater;
-class UnitTestRecordPropertyTestHelper;
-class WindowsDeathTest;
-class UnitTestImpl* GetUnitTestImpl();
-void ReportFailureInUnknownLocation(TestPartResult::Type result_type,
- const std::string& message);
-
-} // namespace internal
-
-// The friend relationship of some of these classes is cyclic.
-// If we don't forward declare them the compiler might confuse the classes
-// in friendship clauses with same named classes on the scope.
-class Test;
-class TestCase;
-class TestInfo;
-class UnitTest;
-
-// A class for indicating whether an assertion was successful. When
-// the assertion wasn't successful, the AssertionResult object
-// remembers a non-empty message that describes how it failed.
-//
-// To create an instance of this class, use one of the factory functions
-// (AssertionSuccess() and AssertionFailure()).
-//
-// This class is useful for two purposes:
-// 1. Defining predicate functions to be used with Boolean test assertions
-// EXPECT_TRUE/EXPECT_FALSE and their ASSERT_ counterparts
-// 2. Defining predicate-format functions to be
-// used with predicate assertions (ASSERT_PRED_FORMAT*, etc).
-//
-// For example, if you define IsEven predicate:
-//
-// testing::AssertionResult IsEven(int n) {
-// if ((n % 2) == 0)
-// return testing::AssertionSuccess();
-// else
-// return testing::AssertionFailure() << n << " is odd";
-// }
-//
-// Then the failed expectation EXPECT_TRUE(IsEven(Fib(5)))
-// will print the message
-//
-// Value of: IsEven(Fib(5))
-// Actual: false (5 is odd)
-// Expected: true
-//
-// instead of a more opaque
-//
-// Value of: IsEven(Fib(5))
-// Actual: false
-// Expected: true
-//
-// in case IsEven is a simple Boolean predicate.
-//
-// If you expect your predicate to be reused and want to support informative
-// messages in EXPECT_FALSE and ASSERT_FALSE (negative assertions show up
-// about half as often as positive ones in our tests), supply messages for
-// both success and failure cases:
-//
-// testing::AssertionResult IsEven(int n) {
-// if ((n % 2) == 0)
-// return testing::AssertionSuccess() << n << " is even";
-// else
-// return testing::AssertionFailure() << n << " is odd";
-// }
-//
-// Then a statement EXPECT_FALSE(IsEven(Fib(6))) will print
-//
-// Value of: IsEven(Fib(6))
-// Actual: true (8 is even)
-// Expected: false
-//
-// NB: Predicates that support negative Boolean assertions have reduced
-// performance in positive ones so be careful not to use them in tests
-// that have lots (tens of thousands) of positive Boolean assertions.
-//
-// To use this class with EXPECT_PRED_FORMAT assertions such as:
-//
-// // Verifies that Foo() returns an even number.
-// EXPECT_PRED_FORMAT1(IsEven, Foo());
-//
-// you need to define:
-//
-// testing::AssertionResult IsEven(const char* expr, int n) {
-// if ((n % 2) == 0)
-// return testing::AssertionSuccess();
-// else
-// return testing::AssertionFailure()
-// << "Expected: " << expr << " is even\n Actual: it's " << n;
-// }
-//
-// If Foo() returns 5, you will see the following message:
-//
-// Expected: Foo() is even
-// Actual: it's 5
-//
-class GTEST_API_ AssertionResult {
- public:
- // Copy constructor.
- // Used in EXPECT_TRUE/FALSE(assertion_result).
- AssertionResult(const AssertionResult& other);
-
- GTEST_DISABLE_MSC_WARNINGS_PUSH_(4800 /* forcing value to bool */)
-
- // Used in the EXPECT_TRUE/FALSE(bool_expression).
- //
- // T must be contextually convertible to bool.
- //
- // The second parameter prevents this overload from being considered if
- // the argument is implicitly convertible to AssertionResult. In that case
- // we want AssertionResult's copy constructor to be used.
- template <typename T>
- explicit AssertionResult(
- const T& success,
- typename internal::EnableIf<
- !internal::ImplicitlyConvertible<T, AssertionResult>::value>::type*
- /*enabler*/ = NULL)
- : success_(success) {}
-
- GTEST_DISABLE_MSC_WARNINGS_POP_()
-
- // Assignment operator.
- AssertionResult& operator=(AssertionResult other) {
- swap(other);
- return *this;
- }
-
- // Returns true iff the assertion succeeded.
- operator bool() const { return success_; } // NOLINT
-
- // Returns the assertion's negation. Used with EXPECT/ASSERT_FALSE.
- AssertionResult operator!() const;
-
- // Returns the text streamed into this AssertionResult. Test assertions
- // use it when they fail (i.e., the predicate's outcome doesn't match the
- // assertion's expectation). When nothing has been streamed into the
- // object, returns an empty string.
- const char* message() const {
- return message_.get() != NULL ? message_->c_str() : "";
- }
- // TODO(vladl@google.com): Remove this after making sure no clients use it.
- // Deprecated; please use message() instead.
- const char* failure_message() const { return message(); }
-
- // Streams a custom failure message into this object.
- template <typename T> AssertionResult& operator<<(const T& value) {
- AppendMessage(Message() << value);
- return *this;
- }
-
- // Allows streaming basic output manipulators such as endl or flush into
- // this object.
- AssertionResult& operator<<(
- ::std::ostream& (*basic_manipulator)(::std::ostream& stream)) {
- AppendMessage(Message() << basic_manipulator);
- return *this;
- }
-
- private:
- // Appends the contents of message to message_.
- void AppendMessage(const Message& a_message) {
- if (message_.get() == NULL)
- message_.reset(new ::std::string);
- message_->append(a_message.GetString().c_str());
- }
-
- // Swap the contents of this AssertionResult with other.
- void swap(AssertionResult& other);
-
- // Stores result of the assertion predicate.
- bool success_;
- // Stores the message describing the condition in case the expectation
- // construct is not satisfied with the predicate's outcome.
- // Referenced via a pointer to avoid taking too much stack frame space
- // with test assertions.
- internal::scoped_ptr< ::std::string> message_;
-};
-
-// Makes a successful assertion result.
-GTEST_API_ AssertionResult AssertionSuccess();
-
-// Makes a failed assertion result.
-GTEST_API_ AssertionResult AssertionFailure();
-
-// Makes a failed assertion result with the given failure message.
-// Deprecated; use AssertionFailure() << msg.
-GTEST_API_ AssertionResult AssertionFailure(const Message& msg);
-
-// The abstract class that all tests inherit from.
-//
-// In Google Test, a unit test program contains one or many TestCases, and
-// each TestCase contains one or many Tests.
-//
-// When you define a test using the TEST macro, you don't need to
-// explicitly derive from Test - the TEST macro automatically does
-// this for you.
-//
-// The only time you derive from Test is when defining a test fixture
-// to be used a TEST_F. For example:
-//
-// class FooTest : public testing::Test {
-// protected:
-// void SetUp() override { ... }
-// void TearDown() override { ... }
-// ...
-// };
-//
-// TEST_F(FooTest, Bar) { ... }
-// TEST_F(FooTest, Baz) { ... }
-//
-// Test is not copyable.
-class GTEST_API_ Test {
- public:
- friend class TestInfo;
-
- // Defines types for pointers to functions that set up and tear down
- // a test case.
- typedef internal::SetUpTestCaseFunc SetUpTestCaseFunc;
- typedef internal::TearDownTestCaseFunc TearDownTestCaseFunc;
-
- // The d'tor is virtual as we intend to inherit from Test.
- virtual ~Test();
-
- // Sets up the stuff shared by all tests in this test case.
- //
- // Google Test will call Foo::SetUpTestCase() before running the first
- // test in test case Foo. Hence a sub-class can define its own
- // SetUpTestCase() method to shadow the one defined in the super
- // class.
- static void SetUpTestCase() {}
-
- // Tears down the stuff shared by all tests in this test case.
- //
- // Google Test will call Foo::TearDownTestCase() after running the last
- // test in test case Foo. Hence a sub-class can define its own
- // TearDownTestCase() method to shadow the one defined in the super
- // class.
- static void TearDownTestCase() {}
-
- // Returns true iff the current test has a fatal failure.
- static bool HasFatalFailure();
-
- // Returns true iff the current test has a non-fatal failure.
- static bool HasNonfatalFailure();
-
- // Returns true iff the current test has a (either fatal or
- // non-fatal) failure.
- static bool HasFailure() { return HasFatalFailure() || HasNonfatalFailure(); }
-
- // Logs a property for the current test, test case, or for the entire
- // invocation of the test program when used outside of the context of a
- // test case. Only the last value for a given key is remembered. These
- // are public static so they can be called from utility functions that are
- // not members of the test fixture. Calls to RecordProperty made during
- // lifespan of the test (from the moment its constructor starts to the
- // moment its destructor finishes) will be output in XML as attributes of
- // the <testcase> element. Properties recorded from fixture's
- // SetUpTestCase or TearDownTestCase are logged as attributes of the
- // corresponding <testsuite> element. Calls to RecordProperty made in the
- // global context (before or after invocation of RUN_ALL_TESTS and from
- // SetUp/TearDown method of Environment objects registered with Google
- // Test) will be output as attributes of the <testsuites> element.
- static void RecordProperty(const std::string& key, const std::string& value);
- static void RecordProperty(const std::string& key, int value);
-
- protected:
- // Creates a Test object.
- Test();
-
- // Sets up the test fixture.
- virtual void SetUp();
-
- // Tears down the test fixture.
- virtual void TearDown();
-
- private:
- // Returns true iff the current test has the same fixture class as
- // the first test in the current test case.
- static bool HasSameFixtureClass();
-
- // Runs the test after the test fixture has been set up.
- //
- // A sub-class must implement this to define the test logic.
- //
- // DO NOT OVERRIDE THIS FUNCTION DIRECTLY IN A USER PROGRAM.
- // Instead, use the TEST or TEST_F macro.
- virtual void TestBody() = 0;
-
- // Sets up, executes, and tears down the test.
- void Run();
-
- // Deletes self. We deliberately pick an unusual name for this
- // internal method to avoid clashing with names used in user TESTs.
- void DeleteSelf_() { delete this; }
-
- const internal::scoped_ptr< GTEST_FLAG_SAVER_ > gtest_flag_saver_;
-
- // Often a user misspells SetUp() as Setup() and spends a long time
- // wondering why it is never called by Google Test. The declaration of
- // the following method is solely for catching such an error at
- // compile time:
- //
- // - The return type is deliberately chosen to be not void, so it
- // will be a conflict if void Setup() is declared in the user's
- // test fixture.
- //
- // - This method is private, so it will be another compiler error
- // if the method is called from the user's test fixture.
- //
- // DO NOT OVERRIDE THIS FUNCTION.
- //
- // If you see an error about overriding the following function or
- // about it being private, you have mis-spelled SetUp() as Setup().
- struct Setup_should_be_spelled_SetUp {};
- virtual Setup_should_be_spelled_SetUp* Setup() { return NULL; }
-
- // We disallow copying Tests.
- GTEST_DISALLOW_COPY_AND_ASSIGN_(Test);
-};
-
-typedef internal::TimeInMillis TimeInMillis;
-
-// A copyable object representing a user specified test property which can be
-// output as a key/value string pair.
-//
-// Don't inherit from TestProperty as its destructor is not virtual.
-class TestProperty {
- public:
- // C'tor. TestProperty does NOT have a default constructor.
- // Always use this constructor (with parameters) to create a
- // TestProperty object.
- TestProperty(const std::string& a_key, const std::string& a_value) :
- key_(a_key), value_(a_value) {
- }
-
- // Gets the user supplied key.
- const char* key() const {
- return key_.c_str();
- }
-
- // Gets the user supplied value.
- const char* value() const {
- return value_.c_str();
- }
-
- // Sets a new value, overriding the one supplied in the constructor.
- void SetValue(const std::string& new_value) {
- value_ = new_value;
- }
-
- private:
- // The key supplied by the user.
- std::string key_;
- // The value supplied by the user.
- std::string value_;
-};
-
-// The result of a single Test. This includes a list of
-// TestPartResults, a list of TestProperties, a count of how many
-// death tests there are in the Test, and how much time it took to run
-// the Test.
-//
-// TestResult is not copyable.
-class GTEST_API_ TestResult {
- public:
- // Creates an empty TestResult.
- TestResult();
-
- // D'tor. Do not inherit from TestResult.
- ~TestResult();
-
- // Gets the number of all test parts. This is the sum of the number
- // of successful test parts and the number of failed test parts.
- int total_part_count() const;
-
- // Returns the number of the test properties.
- int test_property_count() const;
-
- // Returns true iff the test passed (i.e. no test part failed).
- bool Passed() const { return !Failed(); }
-
- // Returns true iff the test failed.
- bool Failed() const;
-
- // Returns true iff the test fatally failed.
- bool HasFatalFailure() const;
-
- // Returns true iff the test has a non-fatal failure.
- bool HasNonfatalFailure() const;
-
- // Returns the elapsed time, in milliseconds.
- TimeInMillis elapsed_time() const { return elapsed_time_; }
-
- // Returns the i-th test part result among all the results. i can range
- // from 0 to test_property_count() - 1. If i is not in that range, aborts
- // the program.
- const TestPartResult& GetTestPartResult(int i) const;
-
- // Returns the i-th test property. i can range from 0 to
- // test_property_count() - 1. If i is not in that range, aborts the
- // program.
- const TestProperty& GetTestProperty(int i) const;
-
- private:
- friend class TestInfo;
- friend class TestCase;
- friend class UnitTest;
- friend class internal::DefaultGlobalTestPartResultReporter;
- friend class internal::ExecDeathTest;
- friend class internal::TestResultAccessor;
- friend class internal::UnitTestImpl;
- friend class internal::WindowsDeathTest;
-
- // Gets the vector of TestPartResults.
- const std::vector<TestPartResult>& test_part_results() const {
- return test_part_results_;
- }
-
- // Gets the vector of TestProperties.
- const std::vector<TestProperty>& test_properties() const {
- return test_properties_;
- }
-
- // Sets the elapsed time.
- void set_elapsed_time(TimeInMillis elapsed) { elapsed_time_ = elapsed; }
-
- // Adds a test property to the list. The property is validated and may add
- // a non-fatal failure if invalid (e.g., if it conflicts with reserved
- // key names). If a property is already recorded for the same key, the
- // value will be updated, rather than storing multiple values for the same
- // key. xml_element specifies the element for which the property is being
- // recorded and is used for validation.
- void RecordProperty(const std::string& xml_element,
- const TestProperty& test_property);
-
- // Adds a failure if the key is a reserved attribute of Google Test
- // testcase tags. Returns true if the property is valid.
- // TODO(russr): Validate attribute names are legal and human readable.
- static bool ValidateTestProperty(const std::string& xml_element,
- const TestProperty& test_property);
-
- // Adds a test part result to the list.
- void AddTestPartResult(const TestPartResult& test_part_result);
-
- // Returns the death test count.
- int death_test_count() const { return death_test_count_; }
-
- // Increments the death test count, returning the new count.
- int increment_death_test_count() { return ++death_test_count_; }
-
- // Clears the test part results.
- void ClearTestPartResults();
-
- // Clears the object.
- void Clear();
-
- // Protects mutable state of the property vector and of owned
- // properties, whose values may be updated.
- internal::Mutex test_properites_mutex_;
-
- // The vector of TestPartResults
- std::vector<TestPartResult> test_part_results_;
- // The vector of TestProperties
- std::vector<TestProperty> test_properties_;
- // Running count of death tests.
- int death_test_count_;
- // The elapsed time, in milliseconds.
- TimeInMillis elapsed_time_;
-
- // We disallow copying TestResult.
- GTEST_DISALLOW_COPY_AND_ASSIGN_(TestResult);
-}; // class TestResult
-
-// A TestInfo object stores the following information about a test:
-//
-// Test case name
-// Test name
-// Whether the test should be run
-// A function pointer that creates the test object when invoked
-// Test result
-//
-// The constructor of TestInfo registers itself with the UnitTest
-// singleton such that the RUN_ALL_TESTS() macro knows which tests to
-// run.
-class GTEST_API_ TestInfo {
- public:
- // Destructs a TestInfo object. This function is not virtual, so
- // don't inherit from TestInfo.
- ~TestInfo();
-
- // Returns the test case name.
- const char* test_case_name() const { return test_case_name_.c_str(); }
-
- // Returns the test name.
- const char* name() const { return name_.c_str(); }
-
- // Returns the name of the parameter type, or NULL if this is not a typed
- // or a type-parameterized test.
- const char* type_param() const {
- if (type_param_.get() != NULL)
- return type_param_->c_str();
- return NULL;
- }
-
- // Returns the text representation of the value parameter, or NULL if this
- // is not a value-parameterized test.
- const char* value_param() const {
- if (value_param_.get() != NULL)
- return value_param_->c_str();
- return NULL;
- }
-
- // Returns the file name where this test is defined.
- const char* file() const { return location_.file.c_str(); }
-
- // Returns the line where this test is defined.
- int line() const { return location_.line; }
-
- // Returns true if this test should run, that is if the test is not
- // disabled (or it is disabled but the also_run_disabled_tests flag has
- // been specified) and its full name matches the user-specified filter.
- //
- // Google Test allows the user to filter the tests by their full names.
- // The full name of a test Bar in test case Foo is defined as
- // "Foo.Bar". Only the tests that match the filter will run.
- //
- // A filter is a colon-separated list of glob (not regex) patterns,
- // optionally followed by a '-' and a colon-separated list of
- // negative patterns (tests to exclude). A test is run if it
- // matches one of the positive patterns and does not match any of
- // the negative patterns.
- //
- // For example, *A*:Foo.* is a filter that matches any string that
- // contains the character 'A' or starts with "Foo.".
- bool should_run() const { return should_run_; }
-
- // Returns true iff this test will appear in the XML report.
- bool is_reportable() const {
- // For now, the XML report includes all tests matching the filter.
- // In the future, we may trim tests that are excluded because of
- // sharding.
- return matches_filter_;
- }
-
- // Returns the result of the test.
- const TestResult* result() const { return &result_; }
-
- private:
-#if GTEST_HAS_DEATH_TEST
- friend class internal::DefaultDeathTestFactory;
-#endif // GTEST_HAS_DEATH_TEST
- friend class Test;
- friend class TestCase;
- friend class internal::UnitTestImpl;
- friend class internal::StreamingListenerTest;
- friend TestInfo* internal::MakeAndRegisterTestInfo(
- const char* test_case_name,
- const char* name,
- const char* type_param,
- const char* value_param,
- internal::CodeLocation code_location,
- internal::TypeId fixture_class_id,
- Test::SetUpTestCaseFunc set_up_tc,
- Test::TearDownTestCaseFunc tear_down_tc,
- internal::TestFactoryBase* factory);
-
- // Constructs a TestInfo object. The newly constructed instance assumes
- // ownership of the factory object.
- TestInfo(const std::string& test_case_name,
- const std::string& name,
- const char* a_type_param, // NULL if not a type-parameterized test
- const char* a_value_param, // NULL if not a value-parameterized test
- internal::CodeLocation a_code_location,
- internal::TypeId fixture_class_id,
- internal::TestFactoryBase* factory);
-
- // Increments the number of death tests encountered in this test so
- // far.
- int increment_death_test_count() {
- return result_.increment_death_test_count();
- }
-
- // Creates the test object, runs it, records its result, and then
- // deletes it.
- void Run();
-
- static void ClearTestResult(TestInfo* test_info) {
- test_info->result_.Clear();
- }
-
- // These fields are immutable properties of the test.
- const std::string test_case_name_; // Test case name
- const std::string name_; // Test name
- // Name of the parameter type, or NULL if this is not a typed or a
- // type-parameterized test.
- const internal::scoped_ptr<const ::std::string> type_param_;
- // Text representation of the value parameter, or NULL if this is not a
- // value-parameterized test.
- const internal::scoped_ptr<const ::std::string> value_param_;
- internal::CodeLocation location_;
- const internal::TypeId fixture_class_id_; // ID of the test fixture class
- bool should_run_; // True iff this test should run
- bool is_disabled_; // True iff this test is disabled
- bool matches_filter_; // True if this test matches the
- // user-specified filter.
- internal::TestFactoryBase* const factory_; // The factory that creates
- // the test object
-
- // This field is mutable and needs to be reset before running the
- // test for the second time.
- TestResult result_;
-
- GTEST_DISALLOW_COPY_AND_ASSIGN_(TestInfo);
-};
-
-// A test case, which consists of a vector of TestInfos.
-//
-// TestCase is not copyable.
-class GTEST_API_ TestCase {
- public:
- // Creates a TestCase with the given name.
- //
- // TestCase does NOT have a default constructor. Always use this
- // constructor to create a TestCase object.
- //
- // Arguments:
- //
- // name: name of the test case
- // a_type_param: the name of the test's type parameter, or NULL if
- // this is not a type-parameterized test.
- // set_up_tc: pointer to the function that sets up the test case
- // tear_down_tc: pointer to the function that tears down the test case
- TestCase(const char* name, const char* a_type_param,
- Test::SetUpTestCaseFunc set_up_tc,
- Test::TearDownTestCaseFunc tear_down_tc);
-
- // Destructor of TestCase.
- virtual ~TestCase();
-
- // Gets the name of the TestCase.
- const char* name() const { return name_.c_str(); }
-
- // Returns the name of the parameter type, or NULL if this is not a
- // type-parameterized test case.
- const char* type_param() const {
- if (type_param_.get() != NULL)
- return type_param_->c_str();
- return NULL;
- }
-
- // Returns true if any test in this test case should run.
- bool should_run() const { return should_run_; }
-
- // Gets the number of successful tests in this test case.
- int successful_test_count() const;
-
- // Gets the number of failed tests in this test case.
- int failed_test_count() const;
-
- // Gets the number of disabled tests that will be reported in the XML report.
- int reportable_disabled_test_count() const;
-
- // Gets the number of disabled tests in this test case.
- int disabled_test_count() const;
-
- // Gets the number of tests to be printed in the XML report.
- int reportable_test_count() const;
-
- // Get the number of tests in this test case that should run.
- int test_to_run_count() const;
-
- // Gets the number of all tests in this test case.
- int total_test_count() const;
-
- // Returns true iff the test case passed.
- bool Passed() const { return !Failed(); }
-
- // Returns true iff the test case failed.
- bool Failed() const { return failed_test_count() > 0; }
-
- // Returns the elapsed time, in milliseconds.
- TimeInMillis elapsed_time() const { return elapsed_time_; }
-
- // Returns the i-th test among all the tests. i can range from 0 to
- // total_test_count() - 1. If i is not in that range, returns NULL.
- const TestInfo* GetTestInfo(int i) const;
-
- // Returns the TestResult that holds test properties recorded during
- // execution of SetUpTestCase and TearDownTestCase.
- const TestResult& ad_hoc_test_result() const { return ad_hoc_test_result_; }
-
- private:
- friend class Test;
- friend class internal::UnitTestImpl;
-
- // Gets the (mutable) vector of TestInfos in this TestCase.
- std::vector<TestInfo*>& test_info_list() { return test_info_list_; }
-
- // Gets the (immutable) vector of TestInfos in this TestCase.
- const std::vector<TestInfo*>& test_info_list() const {
- return test_info_list_;
- }
-
- // Returns the i-th test among all the tests. i can range from 0 to
- // total_test_count() - 1. If i is not in that range, returns NULL.
- TestInfo* GetMutableTestInfo(int i);
-
- // Sets the should_run member.
- void set_should_run(bool should) { should_run_ = should; }
-
- // Adds a TestInfo to this test case. Will delete the TestInfo upon
- // destruction of the TestCase object.
- void AddTestInfo(TestInfo * test_info);
-
- // Clears the results of all tests in this test case.
- void ClearResult();
-
- // Clears the results of all tests in the given test case.
- static void ClearTestCaseResult(TestCase* test_case) {
- test_case->ClearResult();
- }
-
- // Runs every test in this TestCase.
- void Run();
-
- // Runs SetUpTestCase() for this TestCase. This wrapper is needed
- // for catching exceptions thrown from SetUpTestCase().
- void RunSetUpTestCase() { (*set_up_tc_)(); }
-
- // Runs TearDownTestCase() for this TestCase. This wrapper is
- // needed for catching exceptions thrown from TearDownTestCase().
- void RunTearDownTestCase() { (*tear_down_tc_)(); }
-
- // Returns true iff test passed.
- static bool TestPassed(const TestInfo* test_info) {
- return test_info->should_run() && test_info->result()->Passed();
- }
-
- // Returns true iff test failed.
- static bool TestFailed(const TestInfo* test_info) {
- return test_info->should_run() && test_info->result()->Failed();
- }
-
- // Returns true iff the test is disabled and will be reported in the XML
- // report.
- static bool TestReportableDisabled(const TestInfo* test_info) {
- return test_info->is_reportable() && test_info->is_disabled_;
- }
-
- // Returns true iff test is disabled.
- static bool TestDisabled(const TestInfo* test_info) {
- return test_info->is_disabled_;
- }
-
- // Returns true iff this test will appear in the XML report.
- static bool TestReportable(const TestInfo* test_info) {
- return test_info->is_reportable();
- }
-
- // Returns true if the given test should run.
- static bool ShouldRunTest(const TestInfo* test_info) {
- return test_info->should_run();
- }
-
- // Shuffles the tests in this test case.
- void ShuffleTests(internal::Random* random);
-
- // Restores the test order to before the first shuffle.
- void UnshuffleTests();
-
- // Name of the test case.
- std::string name_;
- // Name of the parameter type, or NULL if this is not a typed or a
- // type-parameterized test.
- const internal::scoped_ptr<const ::std::string> type_param_;
- // The vector of TestInfos in their original order. It owns the
- // elements in the vector.
- std::vector<TestInfo*> test_info_list_;
- // Provides a level of indirection for the test list to allow easy
- // shuffling and restoring the test order. The i-th element in this
- // vector is the index of the i-th test in the shuffled test list.
- std::vector<int> test_indices_;
- // Pointer to the function that sets up the test case.
- Test::SetUpTestCaseFunc set_up_tc_;
- // Pointer to the function that tears down the test case.
- Test::TearDownTestCaseFunc tear_down_tc_;
- // True iff any test in this test case should run.
- bool should_run_;
- // Elapsed time, in milliseconds.
- TimeInMillis elapsed_time_;
- // Holds test properties recorded during execution of SetUpTestCase and
- // TearDownTestCase.
- TestResult ad_hoc_test_result_;
-
- // We disallow copying TestCases.
- GTEST_DISALLOW_COPY_AND_ASSIGN_(TestCase);
-};
-
-// An Environment object is capable of setting up and tearing down an
-// environment. You should subclass this to define your own
-// environment(s).
-//
-// An Environment object does the set-up and tear-down in virtual
-// methods SetUp() and TearDown() instead of the constructor and the
-// destructor, as:
-//
-// 1. You cannot safely throw from a destructor. This is a problem
-// as in some cases Google Test is used where exceptions are enabled, and
-// we may want to implement ASSERT_* using exceptions where they are
-// available.
-// 2. You cannot use ASSERT_* directly in a constructor or
-// destructor.
-class Environment {
- public:
- // The d'tor is virtual as we need to subclass Environment.
- virtual ~Environment() {}
-
- // Override this to define how to set up the environment.
- virtual void SetUp() {}
-
- // Override this to define how to tear down the environment.
- virtual void TearDown() {}
- private:
- // If you see an error about overriding the following function or
- // about it being private, you have mis-spelled SetUp() as Setup().
- struct Setup_should_be_spelled_SetUp {};
- virtual Setup_should_be_spelled_SetUp* Setup() { return NULL; }
-};
-
-// The interface for tracing execution of tests. The methods are organized in
-// the order the corresponding events are fired.
-class TestEventListener {
- public:
- virtual ~TestEventListener() {}
-
- // Fired before any test activity starts.
- virtual void OnTestProgramStart(const UnitTest& unit_test) = 0;
-
- // Fired before each iteration of tests starts. There may be more than
- // one iteration if GTEST_FLAG(repeat) is set. iteration is the iteration
- // index, starting from 0.
- virtual void OnTestIterationStart(const UnitTest& unit_test,
- int iteration) = 0;
-
- // Fired before environment set-up for each iteration of tests starts.
- virtual void OnEnvironmentsSetUpStart(const UnitTest& unit_test) = 0;
-
- // Fired after environment set-up for each iteration of tests ends.
- virtual void OnEnvironmentsSetUpEnd(const UnitTest& unit_test) = 0;
-
- // Fired before the test case starts.
- virtual void OnTestCaseStart(const TestCase& test_case) = 0;
-
- // Fired before the test starts.
- virtual void OnTestStart(const TestInfo& test_info) = 0;
-
- // Fired after a failed assertion or a SUCCEED() invocation.
- virtual void OnTestPartResult(const TestPartResult& test_part_result) = 0;
-
- // Fired after the test ends.
- virtual void OnTestEnd(const TestInfo& test_info) = 0;
-
- // Fired after the test case ends.
- virtual void OnTestCaseEnd(const TestCase& test_case) = 0;
-
- // Fired before environment tear-down for each iteration of tests starts.
- virtual void OnEnvironmentsTearDownStart(const UnitTest& unit_test) = 0;
-
- // Fired after environment tear-down for each iteration of tests ends.
- virtual void OnEnvironmentsTearDownEnd(const UnitTest& unit_test) = 0;
-
- // Fired after each iteration of tests finishes.
- virtual void OnTestIterationEnd(const UnitTest& unit_test,
- int iteration) = 0;
-
- // Fired after all test activities have ended.
- virtual void OnTestProgramEnd(const UnitTest& unit_test) = 0;
-};
-
-// The convenience class for users who need to override just one or two
-// methods and are not concerned that a possible change to a signature of
-// the methods they override will not be caught during the build. For
-// comments about each method please see the definition of TestEventListener
-// above.
-class EmptyTestEventListener : public TestEventListener {
- public:
- virtual void OnTestProgramStart(const UnitTest& /*unit_test*/) {}
- virtual void OnTestIterationStart(const UnitTest& /*unit_test*/,
- int /*iteration*/) {}
- virtual void OnEnvironmentsSetUpStart(const UnitTest& /*unit_test*/) {}
- virtual void OnEnvironmentsSetUpEnd(const UnitTest& /*unit_test*/) {}
- virtual void OnTestCaseStart(const TestCase& /*test_case*/) {}
- virtual void OnTestStart(const TestInfo& /*test_info*/) {}
- virtual void OnTestPartResult(const TestPartResult& /*test_part_result*/) {}
- virtual void OnTestEnd(const TestInfo& /*test_info*/) {}
- virtual void OnTestCaseEnd(const TestCase& /*test_case*/) {}
- virtual void OnEnvironmentsTearDownStart(const UnitTest& /*unit_test*/) {}
- virtual void OnEnvironmentsTearDownEnd(const UnitTest& /*unit_test*/) {}
- virtual void OnTestIterationEnd(const UnitTest& /*unit_test*/,
- int /*iteration*/) {}
- virtual void OnTestProgramEnd(const UnitTest& /*unit_test*/) {}
-};
-
-// TestEventListeners lets users add listeners to track events in Google Test.
-class GTEST_API_ TestEventListeners {
- public:
- TestEventListeners();
- ~TestEventListeners();
-
- // Appends an event listener to the end of the list. Google Test assumes
- // the ownership of the listener (i.e. it will delete the listener when
- // the test program finishes).
- void Append(TestEventListener* listener);
-
- // Removes the given event listener from the list and returns it. It then
- // becomes the caller's responsibility to delete the listener. Returns
- // NULL if the listener is not found in the list.
- TestEventListener* Release(TestEventListener* listener);
-
- // Returns the standard listener responsible for the default console
- // output. Can be removed from the listeners list to shut down default
- // console output. Note that removing this object from the listener list
- // with Release transfers its ownership to the caller and makes this
- // function return NULL the next time.
- TestEventListener* default_result_printer() const {
- return default_result_printer_;
- }
-
- // Returns the standard listener responsible for the default XML output
- // controlled by the --gtest_output=xml flag. Can be removed from the
- // listeners list by users who want to shut down the default XML output
- // controlled by this flag and substitute it with custom one. Note that
- // removing this object from the listener list with Release transfers its
- // ownership to the caller and makes this function return NULL the next
- // time.
- TestEventListener* default_xml_generator() const {
- return default_xml_generator_;
- }
-
- private:
- friend class TestCase;
- friend class TestInfo;
- friend class internal::DefaultGlobalTestPartResultReporter;
- friend class internal::NoExecDeathTest;
- friend class internal::TestEventListenersAccessor;
- friend class internal::UnitTestImpl;
-
- // Returns repeater that broadcasts the TestEventListener events to all
- // subscribers.
- TestEventListener* repeater();
-
- // Sets the default_result_printer attribute to the provided listener.
- // The listener is also added to the listener list and previous
- // default_result_printer is removed from it and deleted. The listener can
- // also be NULL in which case it will not be added to the list. Does
- // nothing if the previous and the current listener objects are the same.
- void SetDefaultResultPrinter(TestEventListener* listener);
-
- // Sets the default_xml_generator attribute to the provided listener. The
- // listener is also added to the listener list and previous
- // default_xml_generator is removed from it and deleted. The listener can
- // also be NULL in which case it will not be added to the list. Does
- // nothing if the previous and the current listener objects are the same.
- void SetDefaultXmlGenerator(TestEventListener* listener);
-
- // Controls whether events will be forwarded by the repeater to the
- // listeners in the list.
- bool EventForwardingEnabled() const;
- void SuppressEventForwarding();
-
- // The actual list of listeners.
- internal::TestEventRepeater* repeater_;
- // Listener responsible for the standard result output.
- TestEventListener* default_result_printer_;
- // Listener responsible for the creation of the XML output file.
- TestEventListener* default_xml_generator_;
-
- // We disallow copying TestEventListeners.
- GTEST_DISALLOW_COPY_AND_ASSIGN_(TestEventListeners);
-};
-
-// A UnitTest consists of a vector of TestCases.
-//
-// This is a singleton class. The only instance of UnitTest is
-// created when UnitTest::GetInstance() is first called. This
-// instance is never deleted.
-//
-// UnitTest is not copyable.
-//
-// This class is thread-safe as long as the methods are called
-// according to their specification.
-class GTEST_API_ UnitTest {
- public:
- // Gets the singleton UnitTest object. The first time this method
- // is called, a UnitTest object is constructed and returned.
- // Consecutive calls will return the same object.
- static UnitTest* GetInstance();
-
- // Runs all tests in this UnitTest object and prints the result.
- // Returns 0 if successful, or 1 otherwise.
- //
- // This method can only be called from the main thread.
- //
- // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
- int Run() GTEST_MUST_USE_RESULT_;
-
- // Returns the working directory when the first TEST() or TEST_F()
- // was executed. The UnitTest object owns the string.
- const char* original_working_dir() const;
-
- // Returns the TestCase object for the test that's currently running,
- // or NULL if no test is running.
- const TestCase* current_test_case() const
- GTEST_LOCK_EXCLUDED_(mutex_);
-
- // Returns the TestInfo object for the test that's currently running,
- // or NULL if no test is running.
- const TestInfo* current_test_info() const
- GTEST_LOCK_EXCLUDED_(mutex_);
-
- // Returns the random seed used at the start of the current test run.
- int random_seed() const;
-
-#if GTEST_HAS_PARAM_TEST
- // Returns the ParameterizedTestCaseRegistry object used to keep track of
- // value-parameterized tests and instantiate and register them.
- //
- // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
- internal::ParameterizedTestCaseRegistry& parameterized_test_registry()
- GTEST_LOCK_EXCLUDED_(mutex_);
-#endif // GTEST_HAS_PARAM_TEST
-
- // Gets the number of successful test cases.
- int successful_test_case_count() const;
-
- // Gets the number of failed test cases.
- int failed_test_case_count() const;
-
- // Gets the number of all test cases.
- int total_test_case_count() const;
-
- // Gets the number of all test cases that contain at least one test
- // that should run.
- int test_case_to_run_count() const;
-
- // Gets the number of successful tests.
- int successful_test_count() const;
-
- // Gets the number of failed tests.
- int failed_test_count() const;
-
- // Gets the number of disabled tests that will be reported in the XML report.
- int reportable_disabled_test_count() const;
-
- // Gets the number of disabled tests.
- int disabled_test_count() const;
-
- // Gets the number of tests to be printed in the XML report.
- int reportable_test_count() const;
-
- // Gets the number of all tests.
- int total_test_count() const;
-
- // Gets the number of tests that should run.
- int test_to_run_count() const;
-
- // Gets the time of the test program start, in ms from the start of the
- // UNIX epoch.
- TimeInMillis start_timestamp() const;
-
- // Gets the elapsed time, in milliseconds.
- TimeInMillis elapsed_time() const;
-
- // Returns true iff the unit test passed (i.e. all test cases passed).
- bool Passed() const;
-
- // Returns true iff the unit test failed (i.e. some test case failed
- // or something outside of all tests failed).
- bool Failed() const;
-
- // Gets the i-th test case among all the test cases. i can range from 0 to
- // total_test_case_count() - 1. If i is not in that range, returns NULL.
- const TestCase* GetTestCase(int i) const;
-
- // Returns the TestResult containing information on test failures and
- // properties logged outside of individual test cases.
- const TestResult& ad_hoc_test_result() const;
-
- // Returns the list of event listeners that can be used to track events
- // inside Google Test.
- TestEventListeners& listeners();
-
- private:
- // Registers and returns a global test environment. When a test
- // program is run, all global test environments will be set-up in
- // the order they were registered. After all tests in the program
- // have finished, all global test environments will be torn-down in
- // the *reverse* order they were registered.
- //
- // The UnitTest object takes ownership of the given environment.
- //
- // This method can only be called from the main thread.
- Environment* AddEnvironment(Environment* env);
-
- // Adds a TestPartResult to the current TestResult object. All
- // Google Test assertion macros (e.g. ASSERT_TRUE, EXPECT_EQ, etc)
- // eventually call this to report their results. The user code
- // should use the assertion macros instead of calling this directly.
- void AddTestPartResult(TestPartResult::Type result_type,
- const char* file_name,
- int line_number,
- const std::string& message,
- const std::string& os_stack_trace)
- GTEST_LOCK_EXCLUDED_(mutex_);
-
- // Adds a TestProperty to the current TestResult object when invoked from
- // inside a test, to current TestCase's ad_hoc_test_result_ when invoked
- // from SetUpTestCase or TearDownTestCase, or to the global property set
- // when invoked elsewhere. If the result already contains a property with
- // the same key, the value will be updated.
- void RecordProperty(const std::string& key, const std::string& value);
-
- // Gets the i-th test case among all the test cases. i can range from 0 to
- // total_test_case_count() - 1. If i is not in that range, returns NULL.
- TestCase* GetMutableTestCase(int i);
-
- // Accessors for the implementation object.
- internal::UnitTestImpl* impl() { return impl_; }
- const internal::UnitTestImpl* impl() const { return impl_; }
-
- // These classes and funcions are friends as they need to access private
- // members of UnitTest.
- friend class Test;
- friend class internal::AssertHelper;
- friend class internal::ScopedTrace;
- friend class internal::StreamingListenerTest;
- friend class internal::UnitTestRecordPropertyTestHelper;
- friend Environment* AddGlobalTestEnvironment(Environment* env);
- friend internal::UnitTestImpl* internal::GetUnitTestImpl();
- friend void internal::ReportFailureInUnknownLocation(
- TestPartResult::Type result_type,
- const std::string& message);
-
- // Creates an empty UnitTest.
- UnitTest();
-
- // D'tor
- virtual ~UnitTest();
-
- // Pushes a trace defined by SCOPED_TRACE() on to the per-thread
- // Google Test trace stack.
- void PushGTestTrace(const internal::TraceInfo& trace)
- GTEST_LOCK_EXCLUDED_(mutex_);
-
- // Pops a trace from the per-thread Google Test trace stack.
- void PopGTestTrace()
- GTEST_LOCK_EXCLUDED_(mutex_);
-
- // Protects mutable state in *impl_. This is mutable as some const
- // methods need to lock it too.
- mutable internal::Mutex mutex_;
-
- // Opaque implementation object. This field is never changed once
- // the object is constructed. We don't mark it as const here, as
- // doing so will cause a warning in the constructor of UnitTest.
- // Mutable state in *impl_ is protected by mutex_.
- internal::UnitTestImpl* impl_;
-
- // We disallow copying UnitTest.
- GTEST_DISALLOW_COPY_AND_ASSIGN_(UnitTest);
-};
-
-// A convenient wrapper for adding an environment for the test
-// program.
-//
-// You should call this before RUN_ALL_TESTS() is called, probably in
-// main(). If you use gtest_main, you need to call this before main()
-// starts for it to take effect. For example, you can define a global
-// variable like this:
-//
-// testing::Environment* const foo_env =
-// testing::AddGlobalTestEnvironment(new FooEnvironment);
-//
-// However, we strongly recommend you to write your own main() and
-// call AddGlobalTestEnvironment() there, as relying on initialization
-// of global variables makes the code harder to read and may cause
-// problems when you register multiple environments from different
-// translation units and the environments have dependencies among them
-// (remember that the compiler doesn't guarantee the order in which
-// global variables from different translation units are initialized).
-inline Environment* AddGlobalTestEnvironment(Environment* env) {
- return UnitTest::GetInstance()->AddEnvironment(env);
-}
-
-// Initializes Google Test. This must be called before calling
-// RUN_ALL_TESTS(). In particular, it parses a command line for the
-// flags that Google Test recognizes. Whenever a Google Test flag is
-// seen, it is removed from argv, and *argc is decremented.
-//
-// No value is returned. Instead, the Google Test flag variables are
-// updated.
-//
-// Calling the function for the second time has no user-visible effect.
-GTEST_API_ void InitGoogleTest(int* argc, char** argv);
-
-// This overloaded version can be used in Windows programs compiled in
-// UNICODE mode.
-GTEST_API_ void InitGoogleTest(int* argc, wchar_t** argv);
-
-namespace internal {
-
-// Separate the error generating code from the code path to reduce the stack
-// frame size of CmpHelperEQ. This helps reduce the overhead of some sanitizers
-// when calling EXPECT_* in a tight loop.
-template <typename T1, typename T2>
-AssertionResult CmpHelperEQFailure(const char* lhs_expression,
- const char* rhs_expression,
- const T1& lhs, const T2& rhs) {
- return EqFailure(lhs_expression,
- rhs_expression,
- FormatForComparisonFailureMessage(lhs, rhs),
- FormatForComparisonFailureMessage(rhs, lhs),
- false);
-}
-
-// The helper function for {ASSERT|EXPECT}_EQ.
-template <typename T1, typename T2>
-AssertionResult CmpHelperEQ(const char* lhs_expression,
- const char* rhs_expression,
- const T1& lhs,
- const T2& rhs) {
-GTEST_DISABLE_MSC_WARNINGS_PUSH_(4389 /* signed/unsigned mismatch */)
- if (lhs == rhs) {
- return AssertionSuccess();
- }
-GTEST_DISABLE_MSC_WARNINGS_POP_()
-
- return CmpHelperEQFailure(lhs_expression, rhs_expression, lhs, rhs);
-}
-
-// With this overloaded version, we allow anonymous enums to be used
-// in {ASSERT|EXPECT}_EQ when compiled with gcc 4, as anonymous enums
-// can be implicitly cast to BiggestInt.
-GTEST_API_ AssertionResult CmpHelperEQ(const char* lhs_expression,
- const char* rhs_expression,
- BiggestInt lhs,
- BiggestInt rhs);
-
-// The helper class for {ASSERT|EXPECT}_EQ. The template argument
-// lhs_is_null_literal is true iff the first argument to ASSERT_EQ()
-// is a null pointer literal. The following default implementation is
-// for lhs_is_null_literal being false.
-template <bool lhs_is_null_literal>
-class EqHelper {
- public:
- // This templatized version is for the general case.
- template <typename T1, typename T2>
- static AssertionResult Compare(const char* lhs_expression,
- const char* rhs_expression,
- const T1& lhs,
- const T2& rhs) {
- return CmpHelperEQ(lhs_expression, rhs_expression, lhs, rhs);
- }
-
- // With this overloaded version, we allow anonymous enums to be used
- // in {ASSERT|EXPECT}_EQ when compiled with gcc 4, as anonymous
- // enums can be implicitly cast to BiggestInt.
- //
- // Even though its body looks the same as the above version, we
- // cannot merge the two, as it will make anonymous enums unhappy.
- static AssertionResult Compare(const char* lhs_expression,
- const char* rhs_expression,
- BiggestInt lhs,
- BiggestInt rhs) {
- return CmpHelperEQ(lhs_expression, rhs_expression, lhs, rhs);
- }
-};
-
-// This specialization is used when the first argument to ASSERT_EQ()
-// is a null pointer literal, like NULL, false, or 0.
-template <>
-class EqHelper<true> {
- public:
- // We define two overloaded versions of Compare(). The first
- // version will be picked when the second argument to ASSERT_EQ() is
- // NOT a pointer, e.g. ASSERT_EQ(0, AnIntFunction()) or
- // EXPECT_EQ(false, a_bool).
- template <typename T1, typename T2>
- static AssertionResult Compare(
- const char* lhs_expression,
- const char* rhs_expression,
- const T1& lhs,
- const T2& rhs,
- // The following line prevents this overload from being considered if T2
- // is not a pointer type. We need this because ASSERT_EQ(NULL, my_ptr)
- // expands to Compare("", "", NULL, my_ptr), which requires a conversion
- // to match the Secret* in the other overload, which would otherwise make
- // this template match better.
- typename EnableIf<!is_pointer<T2>::value>::type* = 0) {
- return CmpHelperEQ(lhs_expression, rhs_expression, lhs, rhs);
- }
-
- // This version will be picked when the second argument to ASSERT_EQ() is a
- // pointer, e.g. ASSERT_EQ(NULL, a_pointer).
- template <typename T>
- static AssertionResult Compare(
- const char* lhs_expression,
- const char* rhs_expression,
- // We used to have a second template parameter instead of Secret*. That
- // template parameter would deduce to 'long', making this a better match
- // than the first overload even without the first overload's EnableIf.
- // Unfortunately, gcc with -Wconversion-null warns when "passing NULL to
- // non-pointer argument" (even a deduced integral argument), so the old
- // implementation caused warnings in user code.
- Secret* /* lhs (NULL) */,
- T* rhs) {
- // We already know that 'lhs' is a null pointer.
- return CmpHelperEQ(lhs_expression, rhs_expression,
- static_cast<T*>(NULL), rhs);
- }
-};
-
-// Separate the error generating code from the code path to reduce the stack
-// frame size of CmpHelperOP. This helps reduce the overhead of some sanitizers
-// when calling EXPECT_OP in a tight loop.
-template <typename T1, typename T2>
-AssertionResult CmpHelperOpFailure(const char* expr1, const char* expr2,
- const T1& val1, const T2& val2,
- const char* op) {
- return AssertionFailure()
- << "Expected: (" << expr1 << ") " << op << " (" << expr2
- << "), actual: " << FormatForComparisonFailureMessage(val1, val2)
- << " vs " << FormatForComparisonFailureMessage(val2, val1);
-}
-
-// A macro for implementing the helper functions needed to implement
-// ASSERT_?? and EXPECT_??. It is here just to avoid copy-and-paste
-// of similar code.
-//
-// For each templatized helper function, we also define an overloaded
-// version for BiggestInt in order to reduce code bloat and allow
-// anonymous enums to be used with {ASSERT|EXPECT}_?? when compiled
-// with gcc 4.
-//
-// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-
-#define GTEST_IMPL_CMP_HELPER_(op_name, op)\
-template <typename T1, typename T2>\
-AssertionResult CmpHelper##op_name(const char* expr1, const char* expr2, \
- const T1& val1, const T2& val2) {\
- if (val1 op val2) {\
- return AssertionSuccess();\
- } else {\
- return CmpHelperOpFailure(expr1, expr2, val1, val2, #op);\
- }\
-}\
-GTEST_API_ AssertionResult CmpHelper##op_name(\
- const char* expr1, const char* expr2, BiggestInt val1, BiggestInt val2)
-
-// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-
-// Implements the helper function for {ASSERT|EXPECT}_NE
-GTEST_IMPL_CMP_HELPER_(NE, !=);
-// Implements the helper function for {ASSERT|EXPECT}_LE
-GTEST_IMPL_CMP_HELPER_(LE, <=);
-// Implements the helper function for {ASSERT|EXPECT}_LT
-GTEST_IMPL_CMP_HELPER_(LT, <);
-// Implements the helper function for {ASSERT|EXPECT}_GE
-GTEST_IMPL_CMP_HELPER_(GE, >=);
-// Implements the helper function for {ASSERT|EXPECT}_GT
-GTEST_IMPL_CMP_HELPER_(GT, >);
-
-#undef GTEST_IMPL_CMP_HELPER_
-
-// The helper function for {ASSERT|EXPECT}_STREQ.
-//
-// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-GTEST_API_ AssertionResult CmpHelperSTREQ(const char* s1_expression,
- const char* s2_expression,
- const char* s1,
- const char* s2);
-
-// The helper function for {ASSERT|EXPECT}_STRCASEEQ.
-//
-// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-GTEST_API_ AssertionResult CmpHelperSTRCASEEQ(const char* s1_expression,
- const char* s2_expression,
- const char* s1,
- const char* s2);
-
-// The helper function for {ASSERT|EXPECT}_STRNE.
-//
-// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-GTEST_API_ AssertionResult CmpHelperSTRNE(const char* s1_expression,
- const char* s2_expression,
- const char* s1,
- const char* s2);
-
-// The helper function for {ASSERT|EXPECT}_STRCASENE.
-//
-// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-GTEST_API_ AssertionResult CmpHelperSTRCASENE(const char* s1_expression,
- const char* s2_expression,
- const char* s1,
- const char* s2);
-
-
-// Helper function for *_STREQ on wide strings.
-//
-// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-GTEST_API_ AssertionResult CmpHelperSTREQ(const char* s1_expression,
- const char* s2_expression,
- const wchar_t* s1,
- const wchar_t* s2);
-
-// Helper function for *_STRNE on wide strings.
-//
-// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-GTEST_API_ AssertionResult CmpHelperSTRNE(const char* s1_expression,
- const char* s2_expression,
- const wchar_t* s1,
- const wchar_t* s2);
-
-} // namespace internal
-
-// IsSubstring() and IsNotSubstring() are intended to be used as the
-// first argument to {EXPECT,ASSERT}_PRED_FORMAT2(), not by
-// themselves. They check whether needle is a substring of haystack
-// (NULL is considered a substring of itself only), and return an
-// appropriate error message when they fail.
-//
-// The {needle,haystack}_expr arguments are the stringified
-// expressions that generated the two real arguments.
-GTEST_API_ AssertionResult IsSubstring(
- const char* needle_expr, const char* haystack_expr,
- const char* needle, const char* haystack);
-GTEST_API_ AssertionResult IsSubstring(
- const char* needle_expr, const char* haystack_expr,
- const wchar_t* needle, const wchar_t* haystack);
-GTEST_API_ AssertionResult IsNotSubstring(
- const char* needle_expr, const char* haystack_expr,
- const char* needle, const char* haystack);
-GTEST_API_ AssertionResult IsNotSubstring(
- const char* needle_expr, const char* haystack_expr,
- const wchar_t* needle, const wchar_t* haystack);
-GTEST_API_ AssertionResult IsSubstring(
- const char* needle_expr, const char* haystack_expr,
- const ::std::string& needle, const ::std::string& haystack);
-GTEST_API_ AssertionResult IsNotSubstring(
- const char* needle_expr, const char* haystack_expr,
- const ::std::string& needle, const ::std::string& haystack);
-
-#if GTEST_HAS_STD_WSTRING
-GTEST_API_ AssertionResult IsSubstring(
- const char* needle_expr, const char* haystack_expr,
- const ::std::wstring& needle, const ::std::wstring& haystack);
-GTEST_API_ AssertionResult IsNotSubstring(
- const char* needle_expr, const char* haystack_expr,
- const ::std::wstring& needle, const ::std::wstring& haystack);
-#endif // GTEST_HAS_STD_WSTRING
-
-namespace internal {
-
-// Helper template function for comparing floating-points.
-//
-// Template parameter:
-//
-// RawType: the raw floating-point type (either float or double)
-//
-// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-template <typename RawType>
-AssertionResult CmpHelperFloatingPointEQ(const char* lhs_expression,
- const char* rhs_expression,
- RawType lhs_value,
- RawType rhs_value) {
- const FloatingPoint<RawType> lhs(lhs_value), rhs(rhs_value);
-
- if (lhs.AlmostEquals(rhs)) {
- return AssertionSuccess();
- }
-
- ::std::stringstream lhs_ss;
- lhs_ss << std::setprecision(std::numeric_limits<RawType>::digits10 + 2)
- << lhs_value;
-
- ::std::stringstream rhs_ss;
- rhs_ss << std::setprecision(std::numeric_limits<RawType>::digits10 + 2)
- << rhs_value;
-
- return EqFailure(lhs_expression,
- rhs_expression,
- StringStreamToString(&lhs_ss),
- StringStreamToString(&rhs_ss),
- false);
-}
-
-// Helper function for implementing ASSERT_NEAR.
-//
-// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-GTEST_API_ AssertionResult DoubleNearPredFormat(const char* expr1,
- const char* expr2,
- const char* abs_error_expr,
- double val1,
- double val2,
- double abs_error);
-
-// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
-// A class that enables one to stream messages to assertion macros
-class GTEST_API_ AssertHelper {
- public:
- // Constructor.
- AssertHelper(TestPartResult::Type type,
- const char* file,
- int line,
- const char* message);
- ~AssertHelper();
-
- // Message assignment is a semantic trick to enable assertion
- // streaming; see the GTEST_MESSAGE_ macro below.
- void operator=(const Message& message) const;
-
- private:
- // We put our data in a struct so that the size of the AssertHelper class can
- // be as small as possible. This is important because gcc is incapable of
- // re-using stack space even for temporary variables, so every EXPECT_EQ
- // reserves stack space for another AssertHelper.
- struct AssertHelperData {
- AssertHelperData(TestPartResult::Type t,
- const char* srcfile,
- int line_num,
- const char* msg)
- : type(t), file(srcfile), line(line_num), message(msg) { }
-
- TestPartResult::Type const type;
- const char* const file;
- int const line;
- std::string const message;
-
- private:
- GTEST_DISALLOW_COPY_AND_ASSIGN_(AssertHelperData);
- };
-
- AssertHelperData* const data_;
-
- GTEST_DISALLOW_COPY_AND_ASSIGN_(AssertHelper);
-};
-
-} // namespace internal
-
-#if GTEST_HAS_PARAM_TEST
-// The pure interface class that all value-parameterized tests inherit from.
-// A value-parameterized class must inherit from both ::testing::Test and
-// ::testing::WithParamInterface. In most cases that just means inheriting
-// from ::testing::TestWithParam, but more complicated test hierarchies
-// may need to inherit from Test and WithParamInterface at different levels.
-//
-// This interface has support for accessing the test parameter value via
-// the GetParam() method.
-//
-// Use it with one of the parameter generator defining functions, like Range(),
-// Values(), ValuesIn(), Bool(), and Combine().
-//
-// class FooTest : public ::testing::TestWithParam<int> {
-// protected:
-// FooTest() {
-// // Can use GetParam() here.
-// }
-// virtual ~FooTest() {
-// // Can use GetParam() here.
-// }
-// virtual void SetUp() {
-// // Can use GetParam() here.
-// }
-// virtual void TearDown {
-// // Can use GetParam() here.
-// }
-// };
-// TEST_P(FooTest, DoesBar) {
-// // Can use GetParam() method here.
-// Foo foo;
-// ASSERT_TRUE(foo.DoesBar(GetParam()));
-// }
-// INSTANTIATE_TEST_CASE_P(OneToTenRange, FooTest, ::testing::Range(1, 10));
-
-template <typename T>
-class WithParamInterface {
- public:
- typedef T ParamType;
- virtual ~WithParamInterface() {}
-
- // The current parameter value. Is also available in the test fixture's
- // constructor. This member function is non-static, even though it only
- // references static data, to reduce the opportunity for incorrect uses
- // like writing 'WithParamInterface<bool>::GetParam()' for a test that
- // uses a fixture whose parameter type is int.
- const ParamType& GetParam() const {
- GTEST_CHECK_(parameter_ != NULL)
- << "GetParam() can only be called inside a value-parameterized test "
- << "-- did you intend to write TEST_P instead of TEST_F?";
- return *parameter_;
- }
-
- private:
- // Sets parameter value. The caller is responsible for making sure the value
- // remains alive and unchanged throughout the current test.
- static void SetParam(const ParamType* parameter) {
- parameter_ = parameter;
- }
-
- // Static value used for accessing parameter during a test lifetime.
- static const ParamType* parameter_;
-
- // TestClass must be a subclass of WithParamInterface<T> and Test.
- template <class TestClass> friend class internal::ParameterizedTestFactory;
-};
-
-template <typename T>
-const T* WithParamInterface<T>::parameter_ = NULL;
-
-// Most value-parameterized classes can ignore the existence of
-// WithParamInterface, and can just inherit from ::testing::TestWithParam.
-
-template <typename T>
-class TestWithParam : public Test, public WithParamInterface<T> {
-};
-
-#endif // GTEST_HAS_PARAM_TEST
-
-// Macros for indicating success/failure in test code.
-
-// ADD_FAILURE unconditionally adds a failure to the current test.
-// SUCCEED generates a success - it doesn't automatically make the
-// current test successful, as a test is only successful when it has
-// no failure.
-//
-// EXPECT_* verifies that a certain condition is satisfied. If not,
-// it behaves like ADD_FAILURE. In particular:
-//
-// EXPECT_TRUE verifies that a Boolean condition is true.
-// EXPECT_FALSE verifies that a Boolean condition is false.
-//
-// FAIL and ASSERT_* are similar to ADD_FAILURE and EXPECT_*, except
-// that they will also abort the current function on failure. People
-// usually want the fail-fast behavior of FAIL and ASSERT_*, but those
-// writing data-driven tests often find themselves using ADD_FAILURE
-// and EXPECT_* more.
-
-// Generates a nonfatal failure with a generic message.
-#define ADD_FAILURE() GTEST_NONFATAL_FAILURE_("Failed")
-
-// Generates a nonfatal failure at the given source file location with
-// a generic message.
-#define ADD_FAILURE_AT(file, line) \
- GTEST_MESSAGE_AT_(file, line, "Failed", \
- ::testing::TestPartResult::kNonFatalFailure)
-
-// Generates a fatal failure with a generic message.
-#define GTEST_FAIL() GTEST_FATAL_FAILURE_("Failed")
-
-// Define this macro to 1 to omit the definition of FAIL(), which is a
-// generic name and clashes with some other libraries.
-#if !GTEST_DONT_DEFINE_FAIL
-# define FAIL() GTEST_FAIL()
-#endif
-
-// Generates a success with a generic message.
-#define GTEST_SUCCEED() GTEST_SUCCESS_("Succeeded")
-
-// Define this macro to 1 to omit the definition of SUCCEED(), which
-// is a generic name and clashes with some other libraries.
-#if !GTEST_DONT_DEFINE_SUCCEED
-# define SUCCEED() GTEST_SUCCEED()
-#endif
-
-// Macros for testing exceptions.
-//
-// * {ASSERT|EXPECT}_THROW(statement, expected_exception):
-// Tests that the statement throws the expected exception.
-// * {ASSERT|EXPECT}_NO_THROW(statement):
-// Tests that the statement doesn't throw any exception.
-// * {ASSERT|EXPECT}_ANY_THROW(statement):
-// Tests that the statement throws an exception.
-
-#define EXPECT_THROW(statement, expected_exception) \
- GTEST_TEST_THROW_(statement, expected_exception, GTEST_NONFATAL_FAILURE_)
-#define EXPECT_NO_THROW(statement) \
- GTEST_TEST_NO_THROW_(statement, GTEST_NONFATAL_FAILURE_)
-#define EXPECT_ANY_THROW(statement) \
- GTEST_TEST_ANY_THROW_(statement, GTEST_NONFATAL_FAILURE_)
-#define ASSERT_THROW(statement, expected_exception) \
- GTEST_TEST_THROW_(statement, expected_exception, GTEST_FATAL_FAILURE_)
-#define ASSERT_NO_THROW(statement) \
- GTEST_TEST_NO_THROW_(statement, GTEST_FATAL_FAILURE_)
-#define ASSERT_ANY_THROW(statement) \
- GTEST_TEST_ANY_THROW_(statement, GTEST_FATAL_FAILURE_)
-
-// Boolean assertions. Condition can be either a Boolean expression or an
-// AssertionResult. For more information on how to use AssertionResult with
-// these macros see comments on that class.
-#define EXPECT_TRUE(condition) \
- GTEST_TEST_BOOLEAN_((condition), #condition, false, true, \
- GTEST_NONFATAL_FAILURE_)
-#define EXPECT_FALSE(condition) \
- GTEST_TEST_BOOLEAN_(!(condition), #condition, true, false, \
- GTEST_NONFATAL_FAILURE_)
-#define ASSERT_TRUE(condition) \
- GTEST_TEST_BOOLEAN_((condition), #condition, false, true, \
- GTEST_FATAL_FAILURE_)
-#define ASSERT_FALSE(condition) \
- GTEST_TEST_BOOLEAN_(!(condition), #condition, true, false, \
- GTEST_FATAL_FAILURE_)
-
-// Includes the auto-generated header that implements a family of
-// generic predicate assertion macros.
-#include "gtest/gtest_pred_impl.h"
-
-// Macros for testing equalities and inequalities.
-//
-// * {ASSERT|EXPECT}_EQ(v1, v2): Tests that v1 == v2
-// * {ASSERT|EXPECT}_NE(v1, v2): Tests that v1 != v2
-// * {ASSERT|EXPECT}_LT(v1, v2): Tests that v1 < v2
-// * {ASSERT|EXPECT}_LE(v1, v2): Tests that v1 <= v2
-// * {ASSERT|EXPECT}_GT(v1, v2): Tests that v1 > v2
-// * {ASSERT|EXPECT}_GE(v1, v2): Tests that v1 >= v2
-//
-// When they are not, Google Test prints both the tested expressions and
-// their actual values. The values must be compatible built-in types,
-// or you will get a compiler error. By "compatible" we mean that the
-// values can be compared by the respective operator.
-//
-// Note:
-//
-// 1. It is possible to make a user-defined type work with
-// {ASSERT|EXPECT}_??(), but that requires overloading the
-// comparison operators and is thus discouraged by the Google C++
-// Usage Guide. Therefore, you are advised to use the
-// {ASSERT|EXPECT}_TRUE() macro to assert that two objects are
-// equal.
-//
-// 2. The {ASSERT|EXPECT}_??() macros do pointer comparisons on
-// pointers (in particular, C strings). Therefore, if you use it
-// with two C strings, you are testing how their locations in memory
-// are related, not how their content is related. To compare two C
-// strings by content, use {ASSERT|EXPECT}_STR*().
-//
-// 3. {ASSERT|EXPECT}_EQ(v1, v2) is preferred to
-// {ASSERT|EXPECT}_TRUE(v1 == v2), as the former tells you
-// what the actual value is when it fails, and similarly for the
-// other comparisons.
-//
-// 4. Do not depend on the order in which {ASSERT|EXPECT}_??()
-// evaluate their arguments, which is undefined.
-//
-// 5. These macros evaluate their arguments exactly once.
-//
-// Examples:
-//
-// EXPECT_NE(5, Foo());
-// EXPECT_EQ(NULL, a_pointer);
-// ASSERT_LT(i, array_size);
-// ASSERT_GT(records.size(), 0) << "There is no record left.";
-
-#define EXPECT_EQ(val1, val2) \
- EXPECT_PRED_FORMAT2(::testing::internal:: \
- EqHelper<GTEST_IS_NULL_LITERAL_(val1)>::Compare, \
- val1, val2)
-#define EXPECT_NE(val1, val2) \
- EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperNE, val1, val2)
-#define EXPECT_LE(val1, val2) \
- EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperLE, val1, val2)
-#define EXPECT_LT(val1, val2) \
- EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperLT, val1, val2)
-#define EXPECT_GE(val1, val2) \
- EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperGE, val1, val2)
-#define EXPECT_GT(val1, val2) \
- EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperGT, val1, val2)
-
-#define GTEST_ASSERT_EQ(val1, val2) \
- ASSERT_PRED_FORMAT2(::testing::internal:: \
- EqHelper<GTEST_IS_NULL_LITERAL_(val1)>::Compare, \
- val1, val2)
-#define GTEST_ASSERT_NE(val1, val2) \
- ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperNE, val1, val2)
-#define GTEST_ASSERT_LE(val1, val2) \
- ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperLE, val1, val2)
-#define GTEST_ASSERT_LT(val1, val2) \
- ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperLT, val1, val2)
-#define GTEST_ASSERT_GE(val1, val2) \
- ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperGE, val1, val2)
-#define GTEST_ASSERT_GT(val1, val2) \
- ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperGT, val1, val2)
-
-// Define macro GTEST_DONT_DEFINE_ASSERT_XY to 1 to omit the definition of
-// ASSERT_XY(), which clashes with some users' own code.
-
-#if !GTEST_DONT_DEFINE_ASSERT_EQ
-# define ASSERT_EQ(val1, val2) GTEST_ASSERT_EQ(val1, val2)
-#endif
-
-#if !GTEST_DONT_DEFINE_ASSERT_NE
-# define ASSERT_NE(val1, val2) GTEST_ASSERT_NE(val1, val2)
-#endif
-
-#if !GTEST_DONT_DEFINE_ASSERT_LE
-# define ASSERT_LE(val1, val2) GTEST_ASSERT_LE(val1, val2)
-#endif
-
-#if !GTEST_DONT_DEFINE_ASSERT_LT
-# define ASSERT_LT(val1, val2) GTEST_ASSERT_LT(val1, val2)
-#endif
-
-#if !GTEST_DONT_DEFINE_ASSERT_GE
-# define ASSERT_GE(val1, val2) GTEST_ASSERT_GE(val1, val2)
-#endif
-
-#if !GTEST_DONT_DEFINE_ASSERT_GT
-# define ASSERT_GT(val1, val2) GTEST_ASSERT_GT(val1, val2)
-#endif
-
-// C-string Comparisons. All tests treat NULL and any non-NULL string
-// as different. Two NULLs are equal.
-//
-// * {ASSERT|EXPECT}_STREQ(s1, s2): Tests that s1 == s2
-// * {ASSERT|EXPECT}_STRNE(s1, s2): Tests that s1 != s2
-// * {ASSERT|EXPECT}_STRCASEEQ(s1, s2): Tests that s1 == s2, ignoring case
-// * {ASSERT|EXPECT}_STRCASENE(s1, s2): Tests that s1 != s2, ignoring case
-//
-// For wide or narrow string objects, you can use the
-// {ASSERT|EXPECT}_??() macros.
-//
-// Don't depend on the order in which the arguments are evaluated,
-// which is undefined.
-//
-// These macros evaluate their arguments exactly once.
-
-#define EXPECT_STREQ(s1, s2) \
- EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTREQ, s1, s2)
-#define EXPECT_STRNE(s1, s2) \
- EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTRNE, s1, s2)
-#define EXPECT_STRCASEEQ(s1, s2) \
- EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASEEQ, s1, s2)
-#define EXPECT_STRCASENE(s1, s2)\
- EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASENE, s1, s2)
-
-#define ASSERT_STREQ(s1, s2) \
- ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTREQ, s1, s2)
-#define ASSERT_STRNE(s1, s2) \
- ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTRNE, s1, s2)
-#define ASSERT_STRCASEEQ(s1, s2) \
- ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASEEQ, s1, s2)
-#define ASSERT_STRCASENE(s1, s2)\
- ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASENE, s1, s2)
-
-// Macros for comparing floating-point numbers.
-//
-// * {ASSERT|EXPECT}_FLOAT_EQ(val1, val2):
-// Tests that two float values are almost equal.
-// * {ASSERT|EXPECT}_DOUBLE_EQ(val1, val2):
-// Tests that two double values are almost equal.
-// * {ASSERT|EXPECT}_NEAR(v1, v2, abs_error):
-// Tests that v1 and v2 are within the given distance to each other.
-//
-// Google Test uses ULP-based comparison to automatically pick a default
-// error bound that is appropriate for the operands. See the
-// FloatingPoint template class in gtest-internal.h if you are
-// interested in the implementation details.
-
-#define EXPECT_FLOAT_EQ(val1, val2)\
- EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ<float>, \
- val1, val2)
-
-#define EXPECT_DOUBLE_EQ(val1, val2)\
- EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ<double>, \
- val1, val2)
-
-#define ASSERT_FLOAT_EQ(val1, val2)\
- ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ<float>, \
- val1, val2)
-
-#define ASSERT_DOUBLE_EQ(val1, val2)\
- ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ<double>, \
- val1, val2)
-
-#define EXPECT_NEAR(val1, val2, abs_error)\
- EXPECT_PRED_FORMAT3(::testing::internal::DoubleNearPredFormat, \
- val1, val2, abs_error)
-
-#define ASSERT_NEAR(val1, val2, abs_error)\
- ASSERT_PRED_FORMAT3(::testing::internal::DoubleNearPredFormat, \
- val1, val2, abs_error)
-
-// These predicate format functions work on floating-point values, and
-// can be used in {ASSERT|EXPECT}_PRED_FORMAT2*(), e.g.
-//
-// EXPECT_PRED_FORMAT2(testing::DoubleLE, Foo(), 5.0);
-
-// Asserts that val1 is less than, or almost equal to, val2. Fails
-// otherwise. In particular, it fails if either val1 or val2 is NaN.
-GTEST_API_ AssertionResult FloatLE(const char* expr1, const char* expr2,
- float val1, float val2);
-GTEST_API_ AssertionResult DoubleLE(const char* expr1, const char* expr2,
- double val1, double val2);
-
-
-#if GTEST_OS_WINDOWS
-
-// Macros that test for HRESULT failure and success, these are only useful
-// on Windows, and rely on Windows SDK macros and APIs to compile.
-//
-// * {ASSERT|EXPECT}_HRESULT_{SUCCEEDED|FAILED}(expr)
-//
-// When expr unexpectedly fails or succeeds, Google Test prints the
-// expected result and the actual result with both a human-readable
-// string representation of the error, if available, as well as the
-// hex result code.
-# define EXPECT_HRESULT_SUCCEEDED(expr) \
- EXPECT_PRED_FORMAT1(::testing::internal::IsHRESULTSuccess, (expr))
-
-# define ASSERT_HRESULT_SUCCEEDED(expr) \
- ASSERT_PRED_FORMAT1(::testing::internal::IsHRESULTSuccess, (expr))
-
-# define EXPECT_HRESULT_FAILED(expr) \
- EXPECT_PRED_FORMAT1(::testing::internal::IsHRESULTFailure, (expr))
-
-# define ASSERT_HRESULT_FAILED(expr) \
- ASSERT_PRED_FORMAT1(::testing::internal::IsHRESULTFailure, (expr))
-
-#endif // GTEST_OS_WINDOWS
-
-// Macros that execute statement and check that it doesn't generate new fatal
-// failures in the current thread.
-//
-// * {ASSERT|EXPECT}_NO_FATAL_FAILURE(statement);
-//
-// Examples:
-//
-// EXPECT_NO_FATAL_FAILURE(Process());
-// ASSERT_NO_FATAL_FAILURE(Process()) << "Process() failed";
-//
-#define ASSERT_NO_FATAL_FAILURE(statement) \
- GTEST_TEST_NO_FATAL_FAILURE_(statement, GTEST_FATAL_FAILURE_)
-#define EXPECT_NO_FATAL_FAILURE(statement) \
- GTEST_TEST_NO_FATAL_FAILURE_(statement, GTEST_NONFATAL_FAILURE_)
-
-// Causes a trace (including the source file path, the current line
-// number, and the given message) to be included in every test failure
-// message generated by code in the current scope. The effect is
-// undone when the control leaves the current scope.
-//
-// The message argument can be anything streamable to std::ostream.
-//
-// In the implementation, we include the current line number as part
-// of the dummy variable name, thus allowing multiple SCOPED_TRACE()s
-// to appear in the same block - as long as they are on different
-// lines.
-#define SCOPED_TRACE(message) \
- ::testing::internal::ScopedTrace GTEST_CONCAT_TOKEN_(gtest_trace_, __LINE__)(\
- __FILE__, __LINE__, ::testing::Message() << (message))
-
-// Compile-time assertion for type equality.
-// StaticAssertTypeEq<type1, type2>() compiles iff type1 and type2 are
-// the same type. The value it returns is not interesting.
-//
-// Instead of making StaticAssertTypeEq a class template, we make it a
-// function template that invokes a helper class template. This
-// prevents a user from misusing StaticAssertTypeEq<T1, T2> by
-// defining objects of that type.
-//
-// CAVEAT:
-//
-// When used inside a method of a class template,
-// StaticAssertTypeEq<T1, T2>() is effective ONLY IF the method is
-// instantiated. For example, given:
-//
-// template <typename T> class Foo {
-// public:
-// void Bar() { testing::StaticAssertTypeEq<int, T>(); }
-// };
-//
-// the code:
-//
-// void Test1() { Foo<bool> foo; }
-//
-// will NOT generate a compiler error, as Foo<bool>::Bar() is never
-// actually instantiated. Instead, you need:
-//
-// void Test2() { Foo<bool> foo; foo.Bar(); }
-//
-// to cause a compiler error.
-template <typename T1, typename T2>
-bool StaticAssertTypeEq() {
- (void)internal::StaticAssertTypeEqHelper<T1, T2>();
- return true;
-}
-
-// Defines a test.
-//
-// The first parameter is the name of the test case, and the second
-// parameter is the name of the test within the test case.
-//
-// The convention is to end the test case name with "Test". For
-// example, a test case for the Foo class can be named FooTest.
-//
-// Test code should appear between braces after an invocation of
-// this macro. Example:
-//
-// TEST(FooTest, InitializesCorrectly) {
-// Foo foo;
-// EXPECT_TRUE(foo.StatusIsOK());
-// }
-
-// Note that we call GetTestTypeId() instead of GetTypeId<
-// ::testing::Test>() here to get the type ID of testing::Test. This
-// is to work around a suspected linker bug when using Google Test as
-// a framework on Mac OS X. The bug causes GetTypeId<
-// ::testing::Test>() to return different values depending on whether
-// the call is from the Google Test framework itself or from user test
-// code. GetTestTypeId() is guaranteed to always return the same
-// value, as it always calls GetTypeId<>() from the Google Test
-// framework.
-#define GTEST_TEST(test_case_name, test_name)\
- GTEST_TEST_(test_case_name, test_name, \
- ::testing::Test, ::testing::internal::GetTestTypeId())
-
-// Define this macro to 1 to omit the definition of TEST(), which
-// is a generic name and clashes with some other libraries.
-#if !GTEST_DONT_DEFINE_TEST
-# define TEST(test_case_name, test_name) GTEST_TEST(test_case_name, test_name)
-#endif
-
-// Defines a test that uses a test fixture.
-//
-// The first parameter is the name of the test fixture class, which
-// also doubles as the test case name. The second parameter is the
-// name of the test within the test case.
-//
-// A test fixture class must be declared earlier. The user should put
-// his test code between braces after using this macro. Example:
-//
-// class FooTest : public testing::Test {
-// protected:
-// virtual void SetUp() { b_.AddElement(3); }
-//
-// Foo a_;
-// Foo b_;
-// };
-//
-// TEST_F(FooTest, InitializesCorrectly) {
-// EXPECT_TRUE(a_.StatusIsOK());
-// }
-//
-// TEST_F(FooTest, ReturnsElementCountCorrectly) {
-// EXPECT_EQ(0, a_.size());
-// EXPECT_EQ(1, b_.size());
-// }
-
-#define TEST_F(test_fixture, test_name)\
- GTEST_TEST_(test_fixture, test_name, test_fixture, \
- ::testing::internal::GetTypeId<test_fixture>())
-
-} // namespace testing
-
-// Use this function in main() to run all tests. It returns 0 if all
-// tests are successful, or 1 otherwise.
-//
-// RUN_ALL_TESTS() should be invoked after the command line has been
-// parsed by InitGoogleTest().
-//
-// This function was formerly a macro; thus, it is in the global
-// namespace and has an all-caps name.
-int RUN_ALL_TESTS() GTEST_MUST_USE_RESULT_;
-
-inline int RUN_ALL_TESTS() {
- return ::testing::UnitTest::GetInstance()->Run();
-}
-
-#endif // GTEST_INCLUDE_GTEST_GTEST_H_
diff --git a/third_party/aom/third_party/googletest/src/googletest/include/gtest/gtest_pred_impl.h b/third_party/aom/third_party/googletest/src/googletest/include/gtest/gtest_pred_impl.h
deleted file mode 100644
index 30ae712f5..000000000
--- a/third_party/aom/third_party/googletest/src/googletest/include/gtest/gtest_pred_impl.h
+++ /dev/null
@@ -1,358 +0,0 @@
-// Copyright 2006, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// This file is AUTOMATICALLY GENERATED on 10/31/2011 by command
-// 'gen_gtest_pred_impl.py 5'. DO NOT EDIT BY HAND!
-//
-// Implements a family of generic predicate assertion macros.
-
-#ifndef GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_
-#define GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_
-
-// Makes sure this header is not included before gtest.h.
-#ifndef GTEST_INCLUDE_GTEST_GTEST_H_
-# error Do not include gtest_pred_impl.h directly. Include gtest.h instead.
-#endif // GTEST_INCLUDE_GTEST_GTEST_H_
-
-// This header implements a family of generic predicate assertion
-// macros:
-//
-// ASSERT_PRED_FORMAT1(pred_format, v1)
-// ASSERT_PRED_FORMAT2(pred_format, v1, v2)
-// ...
-//
-// where pred_format is a function or functor that takes n (in the
-// case of ASSERT_PRED_FORMATn) values and their source expression
-// text, and returns a testing::AssertionResult. See the definition
-// of ASSERT_EQ in gtest.h for an example.
-//
-// If you don't care about formatting, you can use the more
-// restrictive version:
-//
-// ASSERT_PRED1(pred, v1)
-// ASSERT_PRED2(pred, v1, v2)
-// ...
-//
-// where pred is an n-ary function or functor that returns bool,
-// and the values v1, v2, ..., must support the << operator for
-// streaming to std::ostream.
-//
-// We also define the EXPECT_* variations.
-//
-// For now we only support predicates whose arity is at most 5.
-// Please email googletestframework@googlegroups.com if you need
-// support for higher arities.
-
-// GTEST_ASSERT_ is the basic statement to which all of the assertions
-// in this file reduce. Don't use this in your code.
-
-#define GTEST_ASSERT_(expression, on_failure) \
- GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
- if (const ::testing::AssertionResult gtest_ar = (expression)) \
- ; \
- else \
- on_failure(gtest_ar.failure_message())
-
-
-// Helper function for implementing {EXPECT|ASSERT}_PRED1. Don't use
-// this in your code.
-template <typename Pred,
- typename T1>
-AssertionResult AssertPred1Helper(const char* pred_text,
- const char* e1,
- Pred pred,
- const T1& v1) {
- if (pred(v1)) return AssertionSuccess();
-
- return AssertionFailure() << pred_text << "("
- << e1 << ") evaluates to false, where"
- << "\n" << e1 << " evaluates to " << v1;
-}
-
-// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT1.
-// Don't use this in your code.
-#define GTEST_PRED_FORMAT1_(pred_format, v1, on_failure)\
- GTEST_ASSERT_(pred_format(#v1, v1), \
- on_failure)
-
-// Internal macro for implementing {EXPECT|ASSERT}_PRED1. Don't use
-// this in your code.
-#define GTEST_PRED1_(pred, v1, on_failure)\
- GTEST_ASSERT_(::testing::AssertPred1Helper(#pred, \
- #v1, \
- pred, \
- v1), on_failure)
-
-// Unary predicate assertion macros.
-#define EXPECT_PRED_FORMAT1(pred_format, v1) \
- GTEST_PRED_FORMAT1_(pred_format, v1, GTEST_NONFATAL_FAILURE_)
-#define EXPECT_PRED1(pred, v1) \
- GTEST_PRED1_(pred, v1, GTEST_NONFATAL_FAILURE_)
-#define ASSERT_PRED_FORMAT1(pred_format, v1) \
- GTEST_PRED_FORMAT1_(pred_format, v1, GTEST_FATAL_FAILURE_)
-#define ASSERT_PRED1(pred, v1) \
- GTEST_PRED1_(pred, v1, GTEST_FATAL_FAILURE_)
-
-
-
-// Helper function for implementing {EXPECT|ASSERT}_PRED2. Don't use
-// this in your code.
-template <typename Pred,
- typename T1,
- typename T2>
-AssertionResult AssertPred2Helper(const char* pred_text,
- const char* e1,
- const char* e2,
- Pred pred,
- const T1& v1,
- const T2& v2) {
- if (pred(v1, v2)) return AssertionSuccess();
-
- return AssertionFailure() << pred_text << "("
- << e1 << ", "
- << e2 << ") evaluates to false, where"
- << "\n" << e1 << " evaluates to " << v1
- << "\n" << e2 << " evaluates to " << v2;
-}
-
-// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT2.
-// Don't use this in your code.
-#define GTEST_PRED_FORMAT2_(pred_format, v1, v2, on_failure)\
- GTEST_ASSERT_(pred_format(#v1, #v2, v1, v2), \
- on_failure)
-
-// Internal macro for implementing {EXPECT|ASSERT}_PRED2. Don't use
-// this in your code.
-#define GTEST_PRED2_(pred, v1, v2, on_failure)\
- GTEST_ASSERT_(::testing::AssertPred2Helper(#pred, \
- #v1, \
- #v2, \
- pred, \
- v1, \
- v2), on_failure)
-
-// Binary predicate assertion macros.
-#define EXPECT_PRED_FORMAT2(pred_format, v1, v2) \
- GTEST_PRED_FORMAT2_(pred_format, v1, v2, GTEST_NONFATAL_FAILURE_)
-#define EXPECT_PRED2(pred, v1, v2) \
- GTEST_PRED2_(pred, v1, v2, GTEST_NONFATAL_FAILURE_)
-#define ASSERT_PRED_FORMAT2(pred_format, v1, v2) \
- GTEST_PRED_FORMAT2_(pred_format, v1, v2, GTEST_FATAL_FAILURE_)
-#define ASSERT_PRED2(pred, v1, v2) \
- GTEST_PRED2_(pred, v1, v2, GTEST_FATAL_FAILURE_)
-
-
-
-// Helper function for implementing {EXPECT|ASSERT}_PRED3. Don't use
-// this in your code.
-template <typename Pred,
- typename T1,
- typename T2,
- typename T3>
-AssertionResult AssertPred3Helper(const char* pred_text,
- const char* e1,
- const char* e2,
- const char* e3,
- Pred pred,
- const T1& v1,
- const T2& v2,
- const T3& v3) {
- if (pred(v1, v2, v3)) return AssertionSuccess();
-
- return AssertionFailure() << pred_text << "("
- << e1 << ", "
- << e2 << ", "
- << e3 << ") evaluates to false, where"
- << "\n" << e1 << " evaluates to " << v1
- << "\n" << e2 << " evaluates to " << v2
- << "\n" << e3 << " evaluates to " << v3;
-}
-
-// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT3.
-// Don't use this in your code.
-#define GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, on_failure)\
- GTEST_ASSERT_(pred_format(#v1, #v2, #v3, v1, v2, v3), \
- on_failure)
-
-// Internal macro for implementing {EXPECT|ASSERT}_PRED3. Don't use
-// this in your code.
-#define GTEST_PRED3_(pred, v1, v2, v3, on_failure)\
- GTEST_ASSERT_(::testing::AssertPred3Helper(#pred, \
- #v1, \
- #v2, \
- #v3, \
- pred, \
- v1, \
- v2, \
- v3), on_failure)
-
-// Ternary predicate assertion macros.
-#define EXPECT_PRED_FORMAT3(pred_format, v1, v2, v3) \
- GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, GTEST_NONFATAL_FAILURE_)
-#define EXPECT_PRED3(pred, v1, v2, v3) \
- GTEST_PRED3_(pred, v1, v2, v3, GTEST_NONFATAL_FAILURE_)
-#define ASSERT_PRED_FORMAT3(pred_format, v1, v2, v3) \
- GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, GTEST_FATAL_FAILURE_)
-#define ASSERT_PRED3(pred, v1, v2, v3) \
- GTEST_PRED3_(pred, v1, v2, v3, GTEST_FATAL_FAILURE_)
-
-
-
-// Helper function for implementing {EXPECT|ASSERT}_PRED4. Don't use
-// this in your code.
-template <typename Pred,
- typename T1,
- typename T2,
- typename T3,
- typename T4>
-AssertionResult AssertPred4Helper(const char* pred_text,
- const char* e1,
- const char* e2,
- const char* e3,
- const char* e4,
- Pred pred,
- const T1& v1,
- const T2& v2,
- const T3& v3,
- const T4& v4) {
- if (pred(v1, v2, v3, v4)) return AssertionSuccess();
-
- return AssertionFailure() << pred_text << "("
- << e1 << ", "
- << e2 << ", "
- << e3 << ", "
- << e4 << ") evaluates to false, where"
- << "\n" << e1 << " evaluates to " << v1
- << "\n" << e2 << " evaluates to " << v2
- << "\n" << e3 << " evaluates to " << v3
- << "\n" << e4 << " evaluates to " << v4;
-}
-
-// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT4.
-// Don't use this in your code.
-#define GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, on_failure)\
- GTEST_ASSERT_(pred_format(#v1, #v2, #v3, #v4, v1, v2, v3, v4), \
- on_failure)
-
-// Internal macro for implementing {EXPECT|ASSERT}_PRED4. Don't use
-// this in your code.
-#define GTEST_PRED4_(pred, v1, v2, v3, v4, on_failure)\
- GTEST_ASSERT_(::testing::AssertPred4Helper(#pred, \
- #v1, \
- #v2, \
- #v3, \
- #v4, \
- pred, \
- v1, \
- v2, \
- v3, \
- v4), on_failure)
-
-// 4-ary predicate assertion macros.
-#define EXPECT_PRED_FORMAT4(pred_format, v1, v2, v3, v4) \
- GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, GTEST_NONFATAL_FAILURE_)
-#define EXPECT_PRED4(pred, v1, v2, v3, v4) \
- GTEST_PRED4_(pred, v1, v2, v3, v4, GTEST_NONFATAL_FAILURE_)
-#define ASSERT_PRED_FORMAT4(pred_format, v1, v2, v3, v4) \
- GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, GTEST_FATAL_FAILURE_)
-#define ASSERT_PRED4(pred, v1, v2, v3, v4) \
- GTEST_PRED4_(pred, v1, v2, v3, v4, GTEST_FATAL_FAILURE_)
-
-
-
-// Helper function for implementing {EXPECT|ASSERT}_PRED5. Don't use
-// this in your code.
-template <typename Pred,
- typename T1,
- typename T2,
- typename T3,
- typename T4,
- typename T5>
-AssertionResult AssertPred5Helper(const char* pred_text,
- const char* e1,
- const char* e2,
- const char* e3,
- const char* e4,
- const char* e5,
- Pred pred,
- const T1& v1,
- const T2& v2,
- const T3& v3,
- const T4& v4,
- const T5& v5) {
- if (pred(v1, v2, v3, v4, v5)) return AssertionSuccess();
-
- return AssertionFailure() << pred_text << "("
- << e1 << ", "
- << e2 << ", "
- << e3 << ", "
- << e4 << ", "
- << e5 << ") evaluates to false, where"
- << "\n" << e1 << " evaluates to " << v1
- << "\n" << e2 << " evaluates to " << v2
- << "\n" << e3 << " evaluates to " << v3
- << "\n" << e4 << " evaluates to " << v4
- << "\n" << e5 << " evaluates to " << v5;
-}
-
-// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT5.
-// Don't use this in your code.
-#define GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, on_failure)\
- GTEST_ASSERT_(pred_format(#v1, #v2, #v3, #v4, #v5, v1, v2, v3, v4, v5), \
- on_failure)
-
-// Internal macro for implementing {EXPECT|ASSERT}_PRED5. Don't use
-// this in your code.
-#define GTEST_PRED5_(pred, v1, v2, v3, v4, v5, on_failure)\
- GTEST_ASSERT_(::testing::AssertPred5Helper(#pred, \
- #v1, \
- #v2, \
- #v3, \
- #v4, \
- #v5, \
- pred, \
- v1, \
- v2, \
- v3, \
- v4, \
- v5), on_failure)
-
-// 5-ary predicate assertion macros.
-#define EXPECT_PRED_FORMAT5(pred_format, v1, v2, v3, v4, v5) \
- GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, GTEST_NONFATAL_FAILURE_)
-#define EXPECT_PRED5(pred, v1, v2, v3, v4, v5) \
- GTEST_PRED5_(pred, v1, v2, v3, v4, v5, GTEST_NONFATAL_FAILURE_)
-#define ASSERT_PRED_FORMAT5(pred_format, v1, v2, v3, v4, v5) \
- GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, GTEST_FATAL_FAILURE_)
-#define ASSERT_PRED5(pred, v1, v2, v3, v4, v5) \
- GTEST_PRED5_(pred, v1, v2, v3, v4, v5, GTEST_FATAL_FAILURE_)
-
-
-
-#endif // GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_
diff --git a/third_party/aom/third_party/googletest/src/googletest/include/gtest/gtest_prod.h b/third_party/aom/third_party/googletest/src/googletest/include/gtest/gtest_prod.h
deleted file mode 100644
index da80ddc6c..000000000
--- a/third_party/aom/third_party/googletest/src/googletest/include/gtest/gtest_prod.h
+++ /dev/null
@@ -1,58 +0,0 @@
-// Copyright 2006, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Author: wan@google.com (Zhanyong Wan)
-//
-// Google C++ Testing Framework definitions useful in production code.
-
-#ifndef GTEST_INCLUDE_GTEST_GTEST_PROD_H_
-#define GTEST_INCLUDE_GTEST_GTEST_PROD_H_
-
-// When you need to test the private or protected members of a class,
-// use the FRIEND_TEST macro to declare your tests as friends of the
-// class. For example:
-//
-// class MyClass {
-// private:
-// void MyMethod();
-// FRIEND_TEST(MyClassTest, MyMethod);
-// };
-//
-// class MyClassTest : public testing::Test {
-// // ...
-// };
-//
-// TEST_F(MyClassTest, MyMethod) {
-// // Can call MyClass::MyMethod() here.
-// }
-
-#define FRIEND_TEST(test_case_name, test_name)\
-friend class test_case_name##_##test_name##_Test
-
-#endif // GTEST_INCLUDE_GTEST_GTEST_PROD_H_
diff --git a/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/custom/gtest-port.h b/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/custom/gtest-port.h
deleted file mode 100644
index 7e744bd3b..000000000
--- a/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/custom/gtest-port.h
+++ /dev/null
@@ -1,69 +0,0 @@
-// Copyright 2015, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Injection point for custom user configurations.
-// The following macros can be defined:
-//
-// Flag related macros:
-// GTEST_FLAG(flag_name)
-// GTEST_USE_OWN_FLAGFILE_FLAG_ - Define to 0 when the system provides its
-// own flagfile flag parsing.
-// GTEST_DECLARE_bool_(name)
-// GTEST_DECLARE_int32_(name)
-// GTEST_DECLARE_string_(name)
-// GTEST_DEFINE_bool_(name, default_val, doc)
-// GTEST_DEFINE_int32_(name, default_val, doc)
-// GTEST_DEFINE_string_(name, default_val, doc)
-//
-// Test filtering:
-// GTEST_TEST_FILTER_ENV_VAR_ - The name of an environment variable that
-// will be used if --GTEST_FLAG(test_filter)
-// is not provided.
-//
-// Logging:
-// GTEST_LOG_(severity)
-// GTEST_CHECK_(condition)
-// Functions LogToStderr() and FlushInfoLog() have to be provided too.
-//
-// Threading:
-// GTEST_HAS_NOTIFICATION_ - Enabled if Notification is already provided.
-// GTEST_HAS_MUTEX_AND_THREAD_LOCAL_ - Enabled if Mutex and ThreadLocal are
-// already provided.
-// Must also provide GTEST_DECLARE_STATIC_MUTEX_(mutex) and
-// GTEST_DEFINE_STATIC_MUTEX_(mutex)
-//
-// GTEST_EXCLUSIVE_LOCK_REQUIRED_(locks)
-// GTEST_LOCK_EXCLUDED_(locks)
-//
-// ** Custom implementation starts here **
-
-#ifndef GTEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PORT_H_
-#define GTEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PORT_H_
-
-#endif // GTEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PORT_H_
diff --git a/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/custom/gtest-printers.h b/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/custom/gtest-printers.h
deleted file mode 100644
index 60c1ea050..000000000
--- a/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/custom/gtest-printers.h
+++ /dev/null
@@ -1,42 +0,0 @@
-// Copyright 2015, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// This file provides an injection point for custom printers in a local
-// installation of gTest.
-// It will be included from gtest-printers.h and the overrides in this file
-// will be visible to everyone.
-// See documentation at gtest/gtest-printers.h for details on how to define a
-// custom printer.
-//
-// ** Custom implementation starts here **
-
-#ifndef GTEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PRINTERS_H_
-#define GTEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PRINTERS_H_
-
-#endif // GTEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PRINTERS_H_
diff --git a/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/custom/gtest.h b/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/custom/gtest.h
deleted file mode 100644
index c27412a89..000000000
--- a/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/custom/gtest.h
+++ /dev/null
@@ -1,41 +0,0 @@
-// Copyright 2015, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Injection point for custom user configurations.
-// The following macros can be defined:
-//
-// GTEST_OS_STACK_TRACE_GETTER_ - The name of an implementation of
-// OsStackTraceGetterInterface.
-//
-// ** Custom implementation starts here **
-
-#ifndef GTEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_H_
-#define GTEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_H_
-
-#endif // GTEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_H_
diff --git a/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-death-test-internal.h b/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-death-test-internal.h
deleted file mode 100644
index 2b3a78f5b..000000000
--- a/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-death-test-internal.h
+++ /dev/null
@@ -1,319 +0,0 @@
-// Copyright 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Authors: wan@google.com (Zhanyong Wan), eefacm@gmail.com (Sean Mcafee)
-//
-// The Google C++ Testing Framework (Google Test)
-//
-// This header file defines internal utilities needed for implementing
-// death tests. They are subject to change without notice.
-
-#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_
-#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_
-
-#include "gtest/internal/gtest-internal.h"
-
-#include <stdio.h>
-
-namespace testing {
-namespace internal {
-
-GTEST_DECLARE_string_(internal_run_death_test);
-
-// Names of the flags (needed for parsing Google Test flags).
-const char kDeathTestStyleFlag[] = "death_test_style";
-const char kDeathTestUseFork[] = "death_test_use_fork";
-const char kInternalRunDeathTestFlag[] = "internal_run_death_test";
-
-#if GTEST_HAS_DEATH_TEST
-
-// DeathTest is a class that hides much of the complexity of the
-// GTEST_DEATH_TEST_ macro. It is abstract; its static Create method
-// returns a concrete class that depends on the prevailing death test
-// style, as defined by the --gtest_death_test_style and/or
-// --gtest_internal_run_death_test flags.
-
-// In describing the results of death tests, these terms are used with
-// the corresponding definitions:
-//
-// exit status: The integer exit information in the format specified
-// by wait(2)
-// exit code: The integer code passed to exit(3), _exit(2), or
-// returned from main()
-class GTEST_API_ DeathTest {
- public:
- // Create returns false if there was an error determining the
- // appropriate action to take for the current death test; for example,
- // if the gtest_death_test_style flag is set to an invalid value.
- // The LastMessage method will return a more detailed message in that
- // case. Otherwise, the DeathTest pointer pointed to by the "test"
- // argument is set. If the death test should be skipped, the pointer
- // is set to NULL; otherwise, it is set to the address of a new concrete
- // DeathTest object that controls the execution of the current test.
- static bool Create(const char* statement, const RE* regex,
- const char* file, int line, DeathTest** test);
- DeathTest();
- virtual ~DeathTest() { }
-
- // A helper class that aborts a death test when it's deleted.
- class ReturnSentinel {
- public:
- explicit ReturnSentinel(DeathTest* test) : test_(test) { }
- ~ReturnSentinel() { test_->Abort(TEST_ENCOUNTERED_RETURN_STATEMENT); }
- private:
- DeathTest* const test_;
- GTEST_DISALLOW_COPY_AND_ASSIGN_(ReturnSentinel);
- } GTEST_ATTRIBUTE_UNUSED_;
-
- // An enumeration of possible roles that may be taken when a death
- // test is encountered. EXECUTE means that the death test logic should
- // be executed immediately. OVERSEE means that the program should prepare
- // the appropriate environment for a child process to execute the death
- // test, then wait for it to complete.
- enum TestRole { OVERSEE_TEST, EXECUTE_TEST };
-
- // An enumeration of the three reasons that a test might be aborted.
- enum AbortReason {
- TEST_ENCOUNTERED_RETURN_STATEMENT,
- TEST_THREW_EXCEPTION,
- TEST_DID_NOT_DIE
- };
-
- // Assumes one of the above roles.
- virtual TestRole AssumeRole() = 0;
-
- // Waits for the death test to finish and returns its status.
- virtual int Wait() = 0;
-
- // Returns true if the death test passed; that is, the test process
- // exited during the test, its exit status matches a user-supplied
- // predicate, and its stderr output matches a user-supplied regular
- // expression.
- // The user-supplied predicate may be a macro expression rather
- // than a function pointer or functor, or else Wait and Passed could
- // be combined.
- virtual bool Passed(bool exit_status_ok) = 0;
-
- // Signals that the death test did not die as expected.
- virtual void Abort(AbortReason reason) = 0;
-
- // Returns a human-readable outcome message regarding the outcome of
- // the last death test.
- static const char* LastMessage();
-
- static void set_last_death_test_message(const std::string& message);
-
- private:
- // A string containing a description of the outcome of the last death test.
- static std::string last_death_test_message_;
-
- GTEST_DISALLOW_COPY_AND_ASSIGN_(DeathTest);
-};
-
-// Factory interface for death tests. May be mocked out for testing.
-class DeathTestFactory {
- public:
- virtual ~DeathTestFactory() { }
- virtual bool Create(const char* statement, const RE* regex,
- const char* file, int line, DeathTest** test) = 0;
-};
-
-// A concrete DeathTestFactory implementation for normal use.
-class DefaultDeathTestFactory : public DeathTestFactory {
- public:
- virtual bool Create(const char* statement, const RE* regex,
- const char* file, int line, DeathTest** test);
-};
-
-// Returns true if exit_status describes a process that was terminated
-// by a signal, or exited normally with a nonzero exit code.
-GTEST_API_ bool ExitedUnsuccessfully(int exit_status);
-
-// Traps C++ exceptions escaping statement and reports them as test
-// failures. Note that trapping SEH exceptions is not implemented here.
-# if GTEST_HAS_EXCEPTIONS
-# define GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, death_test) \
- try { \
- GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
- } catch (const ::std::exception& gtest_exception) { \
- fprintf(\
- stderr, \
- "\n%s: Caught std::exception-derived exception escaping the " \
- "death test statement. Exception message: %s\n", \
- ::testing::internal::FormatFileLocation(__FILE__, __LINE__).c_str(), \
- gtest_exception.what()); \
- fflush(stderr); \
- death_test->Abort(::testing::internal::DeathTest::TEST_THREW_EXCEPTION); \
- } catch (...) { \
- death_test->Abort(::testing::internal::DeathTest::TEST_THREW_EXCEPTION); \
- }
-
-# else
-# define GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, death_test) \
- GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement)
-
-# endif
-
-// This macro is for implementing ASSERT_DEATH*, EXPECT_DEATH*,
-// ASSERT_EXIT*, and EXPECT_EXIT*.
-# define GTEST_DEATH_TEST_(statement, predicate, regex, fail) \
- GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
- if (::testing::internal::AlwaysTrue()) { \
- const ::testing::internal::RE& gtest_regex = (regex); \
- ::testing::internal::DeathTest* gtest_dt; \
- if (!::testing::internal::DeathTest::Create(#statement, &gtest_regex, \
- __FILE__, __LINE__, &gtest_dt)) { \
- goto GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__); \
- } \
- if (gtest_dt != NULL) { \
- ::testing::internal::scoped_ptr< ::testing::internal::DeathTest> \
- gtest_dt_ptr(gtest_dt); \
- switch (gtest_dt->AssumeRole()) { \
- case ::testing::internal::DeathTest::OVERSEE_TEST: \
- if (!gtest_dt->Passed(predicate(gtest_dt->Wait()))) { \
- goto GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__); \
- } \
- break; \
- case ::testing::internal::DeathTest::EXECUTE_TEST: { \
- ::testing::internal::DeathTest::ReturnSentinel \
- gtest_sentinel(gtest_dt); \
- GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, gtest_dt); \
- gtest_dt->Abort(::testing::internal::DeathTest::TEST_DID_NOT_DIE); \
- break; \
- } \
- default: \
- break; \
- } \
- } \
- } else \
- GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__): \
- fail(::testing::internal::DeathTest::LastMessage())
-// The symbol "fail" here expands to something into which a message
-// can be streamed.
-
-// This macro is for implementing ASSERT/EXPECT_DEBUG_DEATH when compiled in
-// NDEBUG mode. In this case we need the statements to be executed, the regex is
-// ignored, and the macro must accept a streamed message even though the message
-// is never printed.
-# define GTEST_EXECUTE_STATEMENT_(statement, regex) \
- GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
- if (::testing::internal::AlwaysTrue()) { \
- GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
- } else \
- ::testing::Message()
-
-// A class representing the parsed contents of the
-// --gtest_internal_run_death_test flag, as it existed when
-// RUN_ALL_TESTS was called.
-class InternalRunDeathTestFlag {
- public:
- InternalRunDeathTestFlag(const std::string& a_file,
- int a_line,
- int an_index,
- int a_write_fd)
- : file_(a_file), line_(a_line), index_(an_index),
- write_fd_(a_write_fd) {}
-
- ~InternalRunDeathTestFlag() {
- if (write_fd_ >= 0)
- posix::Close(write_fd_);
- }
-
- const std::string& file() const { return file_; }
- int line() const { return line_; }
- int index() const { return index_; }
- int write_fd() const { return write_fd_; }
-
- private:
- std::string file_;
- int line_;
- int index_;
- int write_fd_;
-
- GTEST_DISALLOW_COPY_AND_ASSIGN_(InternalRunDeathTestFlag);
-};
-
-// Returns a newly created InternalRunDeathTestFlag object with fields
-// initialized from the GTEST_FLAG(internal_run_death_test) flag if
-// the flag is specified; otherwise returns NULL.
-InternalRunDeathTestFlag* ParseInternalRunDeathTestFlag();
-
-#else // GTEST_HAS_DEATH_TEST
-
-// This macro is used for implementing macros such as
-// EXPECT_DEATH_IF_SUPPORTED and ASSERT_DEATH_IF_SUPPORTED on systems where
-// death tests are not supported. Those macros must compile on such systems
-// iff EXPECT_DEATH and ASSERT_DEATH compile with the same parameters on
-// systems that support death tests. This allows one to write such a macro
-// on a system that does not support death tests and be sure that it will
-// compile on a death-test supporting system.
-//
-// Parameters:
-// statement - A statement that a macro such as EXPECT_DEATH would test
-// for program termination. This macro has to make sure this
-// statement is compiled but not executed, to ensure that
-// EXPECT_DEATH_IF_SUPPORTED compiles with a certain
-// parameter iff EXPECT_DEATH compiles with it.
-// regex - A regex that a macro such as EXPECT_DEATH would use to test
-// the output of statement. This parameter has to be
-// compiled but not evaluated by this macro, to ensure that
-// this macro only accepts expressions that a macro such as
-// EXPECT_DEATH would accept.
-// terminator - Must be an empty statement for EXPECT_DEATH_IF_SUPPORTED
-// and a return statement for ASSERT_DEATH_IF_SUPPORTED.
-// This ensures that ASSERT_DEATH_IF_SUPPORTED will not
-// compile inside functions where ASSERT_DEATH doesn't
-// compile.
-//
-// The branch that has an always false condition is used to ensure that
-// statement and regex are compiled (and thus syntactically correct) but
-// never executed. The unreachable code macro protects the terminator
-// statement from generating an 'unreachable code' warning in case
-// statement unconditionally returns or throws. The Message constructor at
-// the end allows the syntax of streaming additional messages into the
-// macro, for compilational compatibility with EXPECT_DEATH/ASSERT_DEATH.
-# define GTEST_UNSUPPORTED_DEATH_TEST_(statement, regex, terminator) \
- GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
- if (::testing::internal::AlwaysTrue()) { \
- GTEST_LOG_(WARNING) \
- << "Death tests are not supported on this platform.\n" \
- << "Statement '" #statement "' cannot be verified."; \
- } else if (::testing::internal::AlwaysFalse()) { \
- ::testing::internal::RE::PartialMatch(".*", (regex)); \
- GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
- terminator; \
- } else \
- ::testing::Message()
-
-#endif // GTEST_HAS_DEATH_TEST
-
-} // namespace internal
-} // namespace testing
-
-#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_
diff --git a/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-filepath.h b/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-filepath.h
deleted file mode 100644
index 7a13b4b0d..000000000
--- a/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-filepath.h
+++ /dev/null
@@ -1,206 +0,0 @@
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Author: keith.ray@gmail.com (Keith Ray)
-//
-// Google Test filepath utilities
-//
-// This header file declares classes and functions used internally by
-// Google Test. They are subject to change without notice.
-//
-// This file is #included in <gtest/internal/gtest-internal.h>.
-// Do not include this header file separately!
-
-#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_
-#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_
-
-#include "gtest/internal/gtest-string.h"
-
-namespace testing {
-namespace internal {
-
-// FilePath - a class for file and directory pathname manipulation which
-// handles platform-specific conventions (like the pathname separator).
-// Used for helper functions for naming files in a directory for xml output.
-// Except for Set methods, all methods are const or static, which provides an
-// "immutable value object" -- useful for peace of mind.
-// A FilePath with a value ending in a path separator ("like/this/") represents
-// a directory, otherwise it is assumed to represent a file. In either case,
-// it may or may not represent an actual file or directory in the file system.
-// Names are NOT checked for syntax correctness -- no checking for illegal
-// characters, malformed paths, etc.
-
-class GTEST_API_ FilePath {
- public:
- FilePath() : pathname_("") { }
- FilePath(const FilePath& rhs) : pathname_(rhs.pathname_) { }
-
- explicit FilePath(const std::string& pathname) : pathname_(pathname) {
- Normalize();
- }
-
- FilePath& operator=(const FilePath& rhs) {
- Set(rhs);
- return *this;
- }
-
- void Set(const FilePath& rhs) {
- pathname_ = rhs.pathname_;
- }
-
- const std::string& string() const { return pathname_; }
- const char* c_str() const { return pathname_.c_str(); }
-
- // Returns the current working directory, or "" if unsuccessful.
- static FilePath GetCurrentDir();
-
- // Given directory = "dir", base_name = "test", number = 0,
- // extension = "xml", returns "dir/test.xml". If number is greater
- // than zero (e.g., 12), returns "dir/test_12.xml".
- // On Windows platform, uses \ as the separator rather than /.
- static FilePath MakeFileName(const FilePath& directory,
- const FilePath& base_name,
- int number,
- const char* extension);
-
- // Given directory = "dir", relative_path = "test.xml",
- // returns "dir/test.xml".
- // On Windows, uses \ as the separator rather than /.
- static FilePath ConcatPaths(const FilePath& directory,
- const FilePath& relative_path);
-
- // Returns a pathname for a file that does not currently exist. The pathname
- // will be directory/base_name.extension or
- // directory/base_name_<number>.extension if directory/base_name.extension
- // already exists. The number will be incremented until a pathname is found
- // that does not already exist.
- // Examples: 'dir/foo_test.xml' or 'dir/foo_test_1.xml'.
- // There could be a race condition if two or more processes are calling this
- // function at the same time -- they could both pick the same filename.
- static FilePath GenerateUniqueFileName(const FilePath& directory,
- const FilePath& base_name,
- const char* extension);
-
- // Returns true iff the path is "".
- bool IsEmpty() const { return pathname_.empty(); }
-
- // If input name has a trailing separator character, removes it and returns
- // the name, otherwise return the name string unmodified.
- // On Windows platform, uses \ as the separator, other platforms use /.
- FilePath RemoveTrailingPathSeparator() const;
-
- // Returns a copy of the FilePath with the directory part removed.
- // Example: FilePath("path/to/file").RemoveDirectoryName() returns
- // FilePath("file"). If there is no directory part ("just_a_file"), it returns
- // the FilePath unmodified. If there is no file part ("just_a_dir/") it
- // returns an empty FilePath ("").
- // On Windows platform, '\' is the path separator, otherwise it is '/'.
- FilePath RemoveDirectoryName() const;
-
- // RemoveFileName returns the directory path with the filename removed.
- // Example: FilePath("path/to/file").RemoveFileName() returns "path/to/".
- // If the FilePath is "a_file" or "/a_file", RemoveFileName returns
- // FilePath("./") or, on Windows, FilePath(".\\"). If the filepath does
- // not have a file, like "just/a/dir/", it returns the FilePath unmodified.
- // On Windows platform, '\' is the path separator, otherwise it is '/'.
- FilePath RemoveFileName() const;
-
- // Returns a copy of the FilePath with the case-insensitive extension removed.
- // Example: FilePath("dir/file.exe").RemoveExtension("EXE") returns
- // FilePath("dir/file"). If a case-insensitive extension is not
- // found, returns a copy of the original FilePath.
- FilePath RemoveExtension(const char* extension) const;
-
- // Creates directories so that path exists. Returns true if successful or if
- // the directories already exist; returns false if unable to create
- // directories for any reason. Will also return false if the FilePath does
- // not represent a directory (that is, it doesn't end with a path separator).
- bool CreateDirectoriesRecursively() const;
-
- // Create the directory so that path exists. Returns true if successful or
- // if the directory already exists; returns false if unable to create the
- // directory for any reason, including if the parent directory does not
- // exist. Not named "CreateDirectory" because that's a macro on Windows.
- bool CreateFolder() const;
-
- // Returns true if FilePath describes something in the file-system,
- // either a file, directory, or whatever, and that something exists.
- bool FileOrDirectoryExists() const;
-
- // Returns true if pathname describes a directory in the file-system
- // that exists.
- bool DirectoryExists() const;
-
- // Returns true if FilePath ends with a path separator, which indicates that
- // it is intended to represent a directory. Returns false otherwise.
- // This does NOT check that a directory (or file) actually exists.
- bool IsDirectory() const;
-
- // Returns true if pathname describes a root directory. (Windows has one
- // root directory per disk drive.)
- bool IsRootDirectory() const;
-
- // Returns true if pathname describes an absolute path.
- bool IsAbsolutePath() const;
-
- private:
- // Replaces multiple consecutive separators with a single separator.
- // For example, "bar///foo" becomes "bar/foo". Does not eliminate other
- // redundancies that might be in a pathname involving "." or "..".
- //
- // A pathname with multiple consecutive separators may occur either through
- // user error or as a result of some scripts or APIs that generate a pathname
- // with a trailing separator. On other platforms the same API or script
- // may NOT generate a pathname with a trailing "/". Then elsewhere that
- // pathname may have another "/" and pathname components added to it,
- // without checking for the separator already being there.
- // The script language and operating system may allow paths like "foo//bar"
- // but some of the functions in FilePath will not handle that correctly. In
- // particular, RemoveTrailingPathSeparator() only removes one separator, and
- // it is called in CreateDirectoriesRecursively() assuming that it will change
- // a pathname from directory syntax (trailing separator) to filename syntax.
- //
- // On Windows this method also replaces the alternate path separator '/' with
- // the primary path separator '\\', so that for example "bar\\/\\foo" becomes
- // "bar\\foo".
-
- void Normalize();
-
- // Returns a pointer to the last occurence of a valid path separator in
- // the FilePath. On Windows, for example, both '/' and '\' are valid path
- // separators. Returns NULL if no path separator was found.
- const char* FindLastPathSeparator() const;
-
- std::string pathname_;
-}; // class FilePath
-
-} // namespace internal
-} // namespace testing
-
-#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_
diff --git a/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-internal.h b/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-internal.h
deleted file mode 100644
index ebd1cf615..000000000
--- a/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-internal.h
+++ /dev/null
@@ -1,1238 +0,0 @@
-// Copyright 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Authors: wan@google.com (Zhanyong Wan), eefacm@gmail.com (Sean Mcafee)
-//
-// The Google C++ Testing Framework (Google Test)
-//
-// This header file declares functions and macros used internally by
-// Google Test. They are subject to change without notice.
-
-#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_
-#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_
-
-#include "gtest/internal/gtest-port.h"
-
-#if GTEST_OS_LINUX
-# include <stdlib.h>
-# include <sys/types.h>
-# include <sys/wait.h>
-# include <unistd.h>
-#endif // GTEST_OS_LINUX
-
-#if GTEST_HAS_EXCEPTIONS
-# include <stdexcept>
-#endif
-
-#include <ctype.h>
-#include <float.h>
-#include <string.h>
-#include <iomanip>
-#include <limits>
-#include <map>
-#include <set>
-#include <string>
-#include <vector>
-
-#include "gtest/gtest-message.h"
-#include "gtest/internal/gtest-string.h"
-#include "gtest/internal/gtest-filepath.h"
-#include "gtest/internal/gtest-type-util.h"
-
-// Due to C++ preprocessor weirdness, we need double indirection to
-// concatenate two tokens when one of them is __LINE__. Writing
-//
-// foo ## __LINE__
-//
-// will result in the token foo__LINE__, instead of foo followed by
-// the current line number. For more details, see
-// http://www.parashift.com/c++-faq-lite/misc-technical-issues.html#faq-39.6
-#define GTEST_CONCAT_TOKEN_(foo, bar) GTEST_CONCAT_TOKEN_IMPL_(foo, bar)
-#define GTEST_CONCAT_TOKEN_IMPL_(foo, bar) foo ## bar
-
-class ProtocolMessage;
-namespace proto2 { class Message; }
-
-namespace testing {
-
-// Forward declarations.
-
-class AssertionResult; // Result of an assertion.
-class Message; // Represents a failure message.
-class Test; // Represents a test.
-class TestInfo; // Information about a test.
-class TestPartResult; // Result of a test part.
-class UnitTest; // A collection of test cases.
-
-template <typename T>
-::std::string PrintToString(const T& value);
-
-namespace internal {
-
-struct TraceInfo; // Information about a trace point.
-class ScopedTrace; // Implements scoped trace.
-class TestInfoImpl; // Opaque implementation of TestInfo
-class UnitTestImpl; // Opaque implementation of UnitTest
-
-// The text used in failure messages to indicate the start of the
-// stack trace.
-GTEST_API_ extern const char kStackTraceMarker[];
-
-// Two overloaded helpers for checking at compile time whether an
-// expression is a null pointer literal (i.e. NULL or any 0-valued
-// compile-time integral constant). Their return values have
-// different sizes, so we can use sizeof() to test which version is
-// picked by the compiler. These helpers have no implementations, as
-// we only need their signatures.
-//
-// Given IsNullLiteralHelper(x), the compiler will pick the first
-// version if x can be implicitly converted to Secret*, and pick the
-// second version otherwise. Since Secret is a secret and incomplete
-// type, the only expression a user can write that has type Secret* is
-// a null pointer literal. Therefore, we know that x is a null
-// pointer literal if and only if the first version is picked by the
-// compiler.
-char IsNullLiteralHelper(Secret* p);
-char (&IsNullLiteralHelper(...))[2]; // NOLINT
-
-// A compile-time bool constant that is true if and only if x is a
-// null pointer literal (i.e. NULL or any 0-valued compile-time
-// integral constant).
-#ifdef GTEST_ELLIPSIS_NEEDS_POD_
-// We lose support for NULL detection where the compiler doesn't like
-// passing non-POD classes through ellipsis (...).
-# define GTEST_IS_NULL_LITERAL_(x) false
-#else
-# define GTEST_IS_NULL_LITERAL_(x) \
- (sizeof(::testing::internal::IsNullLiteralHelper(x)) == 1)
-#endif // GTEST_ELLIPSIS_NEEDS_POD_
-
-// Appends the user-supplied message to the Google-Test-generated message.
-GTEST_API_ std::string AppendUserMessage(
- const std::string& gtest_msg, const Message& user_msg);
-
-#if GTEST_HAS_EXCEPTIONS
-
-// This exception is thrown by (and only by) a failed Google Test
-// assertion when GTEST_FLAG(throw_on_failure) is true (if exceptions
-// are enabled). We derive it from std::runtime_error, which is for
-// errors presumably detectable only at run time. Since
-// std::runtime_error inherits from std::exception, many testing
-// frameworks know how to extract and print the message inside it.
-class GTEST_API_ GoogleTestFailureException : public ::std::runtime_error {
- public:
- explicit GoogleTestFailureException(const TestPartResult& failure);
-};
-
-#endif // GTEST_HAS_EXCEPTIONS
-
-// A helper class for creating scoped traces in user programs.
-class GTEST_API_ ScopedTrace {
- public:
- // The c'tor pushes the given source file location and message onto
- // a trace stack maintained by Google Test.
- ScopedTrace(const char* file, int line, const Message& message);
-
- // The d'tor pops the info pushed by the c'tor.
- //
- // Note that the d'tor is not virtual in order to be efficient.
- // Don't inherit from ScopedTrace!
- ~ScopedTrace();
-
- private:
- GTEST_DISALLOW_COPY_AND_ASSIGN_(ScopedTrace);
-} GTEST_ATTRIBUTE_UNUSED_; // A ScopedTrace object does its job in its
- // c'tor and d'tor. Therefore it doesn't
- // need to be used otherwise.
-
-namespace edit_distance {
-// Returns the optimal edits to go from 'left' to 'right'.
-// All edits cost the same, with replace having lower priority than
-// add/remove.
-// Simple implementation of the Wagner–Fischer algorithm.
-// See http://en.wikipedia.org/wiki/Wagner-Fischer_algorithm
-enum EditType { kMatch, kAdd, kRemove, kReplace };
-GTEST_API_ std::vector<EditType> CalculateOptimalEdits(
- const std::vector<size_t>& left, const std::vector<size_t>& right);
-
-// Same as above, but the input is represented as strings.
-GTEST_API_ std::vector<EditType> CalculateOptimalEdits(
- const std::vector<std::string>& left,
- const std::vector<std::string>& right);
-
-// Create a diff of the input strings in Unified diff format.
-GTEST_API_ std::string CreateUnifiedDiff(const std::vector<std::string>& left,
- const std::vector<std::string>& right,
- size_t context = 2);
-
-} // namespace edit_distance
-
-// Calculate the diff between 'left' and 'right' and return it in unified diff
-// format.
-// If not null, stores in 'total_line_count' the total number of lines found
-// in left + right.
-GTEST_API_ std::string DiffStrings(const std::string& left,
- const std::string& right,
- size_t* total_line_count);
-
-// Constructs and returns the message for an equality assertion
-// (e.g. ASSERT_EQ, EXPECT_STREQ, etc) failure.
-//
-// The first four parameters are the expressions used in the assertion
-// and their values, as strings. For example, for ASSERT_EQ(foo, bar)
-// where foo is 5 and bar is 6, we have:
-//
-// expected_expression: "foo"
-// actual_expression: "bar"
-// expected_value: "5"
-// actual_value: "6"
-//
-// The ignoring_case parameter is true iff the assertion is a
-// *_STRCASEEQ*. When it's true, the string " (ignoring case)" will
-// be inserted into the message.
-GTEST_API_ AssertionResult EqFailure(const char* expected_expression,
- const char* actual_expression,
- const std::string& expected_value,
- const std::string& actual_value,
- bool ignoring_case);
-
-// Constructs a failure message for Boolean assertions such as EXPECT_TRUE.
-GTEST_API_ std::string GetBoolAssertionFailureMessage(
- const AssertionResult& assertion_result,
- const char* expression_text,
- const char* actual_predicate_value,
- const char* expected_predicate_value);
-
-// This template class represents an IEEE floating-point number
-// (either single-precision or double-precision, depending on the
-// template parameters).
-//
-// The purpose of this class is to do more sophisticated number
-// comparison. (Due to round-off error, etc, it's very unlikely that
-// two floating-points will be equal exactly. Hence a naive
-// comparison by the == operation often doesn't work.)
-//
-// Format of IEEE floating-point:
-//
-// The most-significant bit being the leftmost, an IEEE
-// floating-point looks like
-//
-// sign_bit exponent_bits fraction_bits
-//
-// Here, sign_bit is a single bit that designates the sign of the
-// number.
-//
-// For float, there are 8 exponent bits and 23 fraction bits.
-//
-// For double, there are 11 exponent bits and 52 fraction bits.
-//
-// More details can be found at
-// http://en.wikipedia.org/wiki/IEEE_floating-point_standard.
-//
-// Template parameter:
-//
-// RawType: the raw floating-point type (either float or double)
-template <typename RawType>
-class FloatingPoint {
- public:
- // Defines the unsigned integer type that has the same size as the
- // floating point number.
- typedef typename TypeWithSize<sizeof(RawType)>::UInt Bits;
-
- // Constants.
-
- // # of bits in a number.
- static const size_t kBitCount = 8*sizeof(RawType);
-
- // # of fraction bits in a number.
- static const size_t kFractionBitCount =
- std::numeric_limits<RawType>::digits - 1;
-
- // # of exponent bits in a number.
- static const size_t kExponentBitCount = kBitCount - 1 - kFractionBitCount;
-
- // The mask for the sign bit.
- static const Bits kSignBitMask = static_cast<Bits>(1) << (kBitCount - 1);
-
- // The mask for the fraction bits.
- static const Bits kFractionBitMask =
- ~static_cast<Bits>(0) >> (kExponentBitCount + 1);
-
- // The mask for the exponent bits.
- static const Bits kExponentBitMask = ~(kSignBitMask | kFractionBitMask);
-
- // How many ULP's (Units in the Last Place) we want to tolerate when
- // comparing two numbers. The larger the value, the more error we
- // allow. A 0 value means that two numbers must be exactly the same
- // to be considered equal.
- //
- // The maximum error of a single floating-point operation is 0.5
- // units in the last place. On Intel CPU's, all floating-point
- // calculations are done with 80-bit precision, while double has 64
- // bits. Therefore, 4 should be enough for ordinary use.
- //
- // See the following article for more details on ULP:
- // http://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/
- static const size_t kMaxUlps = 4;
-
- // Constructs a FloatingPoint from a raw floating-point number.
- //
- // On an Intel CPU, passing a non-normalized NAN (Not a Number)
- // around may change its bits, although the new value is guaranteed
- // to be also a NAN. Therefore, don't expect this constructor to
- // preserve the bits in x when x is a NAN.
- explicit FloatingPoint(const RawType& x) { u_.value_ = x; }
-
- // Static methods
-
- // Reinterprets a bit pattern as a floating-point number.
- //
- // This function is needed to test the AlmostEquals() method.
- static RawType ReinterpretBits(const Bits bits) {
- FloatingPoint fp(0);
- fp.u_.bits_ = bits;
- return fp.u_.value_;
- }
-
- // Returns the floating-point number that represent positive infinity.
- static RawType Infinity() {
- return ReinterpretBits(kExponentBitMask);
- }
-
- // Returns the maximum representable finite floating-point number.
- static RawType Max();
-
- // Non-static methods
-
- // Returns the bits that represents this number.
- const Bits &bits() const { return u_.bits_; }
-
- // Returns the exponent bits of this number.
- Bits exponent_bits() const { return kExponentBitMask & u_.bits_; }
-
- // Returns the fraction bits of this number.
- Bits fraction_bits() const { return kFractionBitMask & u_.bits_; }
-
- // Returns the sign bit of this number.
- Bits sign_bit() const { return kSignBitMask & u_.bits_; }
-
- // Returns true iff this is NAN (not a number).
- bool is_nan() const {
- // It's a NAN if the exponent bits are all ones and the fraction
- // bits are not entirely zeros.
- return (exponent_bits() == kExponentBitMask) && (fraction_bits() != 0);
- }
-
- // Returns true iff this number is at most kMaxUlps ULP's away from
- // rhs. In particular, this function:
- //
- // - returns false if either number is (or both are) NAN.
- // - treats really large numbers as almost equal to infinity.
- // - thinks +0.0 and -0.0 are 0 DLP's apart.
- bool AlmostEquals(const FloatingPoint& rhs) const {
- // The IEEE standard says that any comparison operation involving
- // a NAN must return false.
- if (is_nan() || rhs.is_nan()) return false;
-
- return DistanceBetweenSignAndMagnitudeNumbers(u_.bits_, rhs.u_.bits_)
- <= kMaxUlps;
- }
-
- private:
- // The data type used to store the actual floating-point number.
- union FloatingPointUnion {
- RawType value_; // The raw floating-point number.
- Bits bits_; // The bits that represent the number.
- };
-
- // Converts an integer from the sign-and-magnitude representation to
- // the biased representation. More precisely, let N be 2 to the
- // power of (kBitCount - 1), an integer x is represented by the
- // unsigned number x + N.
- //
- // For instance,
- //
- // -N + 1 (the most negative number representable using
- // sign-and-magnitude) is represented by 1;
- // 0 is represented by N; and
- // N - 1 (the biggest number representable using
- // sign-and-magnitude) is represented by 2N - 1.
- //
- // Read http://en.wikipedia.org/wiki/Signed_number_representations
- // for more details on signed number representations.
- static Bits SignAndMagnitudeToBiased(const Bits &sam) {
- if (kSignBitMask & sam) {
- // sam represents a negative number.
- return ~sam + 1;
- } else {
- // sam represents a positive number.
- return kSignBitMask | sam;
- }
- }
-
- // Given two numbers in the sign-and-magnitude representation,
- // returns the distance between them as an unsigned number.
- static Bits DistanceBetweenSignAndMagnitudeNumbers(const Bits &sam1,
- const Bits &sam2) {
- const Bits biased1 = SignAndMagnitudeToBiased(sam1);
- const Bits biased2 = SignAndMagnitudeToBiased(sam2);
- return (biased1 >= biased2) ? (biased1 - biased2) : (biased2 - biased1);
- }
-
- FloatingPointUnion u_;
-};
-
-// We cannot use std::numeric_limits<T>::max() as it clashes with the max()
-// macro defined by <windows.h>.
-template <>
-inline float FloatingPoint<float>::Max() { return FLT_MAX; }
-template <>
-inline double FloatingPoint<double>::Max() { return DBL_MAX; }
-
-// Typedefs the instances of the FloatingPoint template class that we
-// care to use.
-typedef FloatingPoint<float> Float;
-typedef FloatingPoint<double> Double;
-
-// In order to catch the mistake of putting tests that use different
-// test fixture classes in the same test case, we need to assign
-// unique IDs to fixture classes and compare them. The TypeId type is
-// used to hold such IDs. The user should treat TypeId as an opaque
-// type: the only operation allowed on TypeId values is to compare
-// them for equality using the == operator.
-typedef const void* TypeId;
-
-template <typename T>
-class TypeIdHelper {
- public:
- // dummy_ must not have a const type. Otherwise an overly eager
- // compiler (e.g. MSVC 7.1 & 8.0) may try to merge
- // TypeIdHelper<T>::dummy_ for different Ts as an "optimization".
- static bool dummy_;
-};
-
-template <typename T>
-bool TypeIdHelper<T>::dummy_ = false;
-
-// GetTypeId<T>() returns the ID of type T. Different values will be
-// returned for different types. Calling the function twice with the
-// same type argument is guaranteed to return the same ID.
-template <typename T>
-TypeId GetTypeId() {
- // The compiler is required to allocate a different
- // TypeIdHelper<T>::dummy_ variable for each T used to instantiate
- // the template. Therefore, the address of dummy_ is guaranteed to
- // be unique.
- return &(TypeIdHelper<T>::dummy_);
-}
-
-// Returns the type ID of ::testing::Test. Always call this instead
-// of GetTypeId< ::testing::Test>() to get the type ID of
-// ::testing::Test, as the latter may give the wrong result due to a
-// suspected linker bug when compiling Google Test as a Mac OS X
-// framework.
-GTEST_API_ TypeId GetTestTypeId();
-
-// Defines the abstract factory interface that creates instances
-// of a Test object.
-class TestFactoryBase {
- public:
- virtual ~TestFactoryBase() {}
-
- // Creates a test instance to run. The instance is both created and destroyed
- // within TestInfoImpl::Run()
- virtual Test* CreateTest() = 0;
-
- protected:
- TestFactoryBase() {}
-
- private:
- GTEST_DISALLOW_COPY_AND_ASSIGN_(TestFactoryBase);
-};
-
-// This class provides implementation of TeastFactoryBase interface.
-// It is used in TEST and TEST_F macros.
-template <class TestClass>
-class TestFactoryImpl : public TestFactoryBase {
- public:
- virtual Test* CreateTest() { return new TestClass; }
-};
-
-#if GTEST_OS_WINDOWS
-
-// Predicate-formatters for implementing the HRESULT checking macros
-// {ASSERT|EXPECT}_HRESULT_{SUCCEEDED|FAILED}
-// We pass a long instead of HRESULT to avoid causing an
-// include dependency for the HRESULT type.
-GTEST_API_ AssertionResult IsHRESULTSuccess(const char* expr,
- long hr); // NOLINT
-GTEST_API_ AssertionResult IsHRESULTFailure(const char* expr,
- long hr); // NOLINT
-
-#endif // GTEST_OS_WINDOWS
-
-// Types of SetUpTestCase() and TearDownTestCase() functions.
-typedef void (*SetUpTestCaseFunc)();
-typedef void (*TearDownTestCaseFunc)();
-
-struct CodeLocation {
- CodeLocation(const string& a_file, int a_line) : file(a_file), line(a_line) {}
-
- string file;
- int line;
-};
-
-// Creates a new TestInfo object and registers it with Google Test;
-// returns the created object.
-//
-// Arguments:
-//
-// test_case_name: name of the test case
-// name: name of the test
-// type_param the name of the test's type parameter, or NULL if
-// this is not a typed or a type-parameterized test.
-// value_param text representation of the test's value parameter,
-// or NULL if this is not a type-parameterized test.
-// code_location: code location where the test is defined
-// fixture_class_id: ID of the test fixture class
-// set_up_tc: pointer to the function that sets up the test case
-// tear_down_tc: pointer to the function that tears down the test case
-// factory: pointer to the factory that creates a test object.
-// The newly created TestInfo instance will assume
-// ownership of the factory object.
-GTEST_API_ TestInfo* MakeAndRegisterTestInfo(
- const char* test_case_name,
- const char* name,
- const char* type_param,
- const char* value_param,
- CodeLocation code_location,
- TypeId fixture_class_id,
- SetUpTestCaseFunc set_up_tc,
- TearDownTestCaseFunc tear_down_tc,
- TestFactoryBase* factory);
-
-// If *pstr starts with the given prefix, modifies *pstr to be right
-// past the prefix and returns true; otherwise leaves *pstr unchanged
-// and returns false. None of pstr, *pstr, and prefix can be NULL.
-GTEST_API_ bool SkipPrefix(const char* prefix, const char** pstr);
-
-#if GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P
-
-// State of the definition of a type-parameterized test case.
-class GTEST_API_ TypedTestCasePState {
- public:
- TypedTestCasePState() : registered_(false) {}
-
- // Adds the given test name to defined_test_names_ and return true
- // if the test case hasn't been registered; otherwise aborts the
- // program.
- bool AddTestName(const char* file, int line, const char* case_name,
- const char* test_name) {
- if (registered_) {
- fprintf(stderr, "%s Test %s must be defined before "
- "REGISTER_TYPED_TEST_CASE_P(%s, ...).\n",
- FormatFileLocation(file, line).c_str(), test_name, case_name);
- fflush(stderr);
- posix::Abort();
- }
- registered_tests_.insert(
- ::std::make_pair(test_name, CodeLocation(file, line)));
- return true;
- }
-
- bool TestExists(const std::string& test_name) const {
- return registered_tests_.count(test_name) > 0;
- }
-
- const CodeLocation& GetCodeLocation(const std::string& test_name) const {
- RegisteredTestsMap::const_iterator it = registered_tests_.find(test_name);
- GTEST_CHECK_(it != registered_tests_.end());
- return it->second;
- }
-
- // Verifies that registered_tests match the test names in
- // defined_test_names_; returns registered_tests if successful, or
- // aborts the program otherwise.
- const char* VerifyRegisteredTestNames(
- const char* file, int line, const char* registered_tests);
-
- private:
- typedef ::std::map<std::string, CodeLocation> RegisteredTestsMap;
-
- bool registered_;
- RegisteredTestsMap registered_tests_;
-};
-
-// Skips to the first non-space char after the first comma in 'str';
-// returns NULL if no comma is found in 'str'.
-inline const char* SkipComma(const char* str) {
- const char* comma = strchr(str, ',');
- if (comma == NULL) {
- return NULL;
- }
- while (IsSpace(*(++comma))) {}
- return comma;
-}
-
-// Returns the prefix of 'str' before the first comma in it; returns
-// the entire string if it contains no comma.
-inline std::string GetPrefixUntilComma(const char* str) {
- const char* comma = strchr(str, ',');
- return comma == NULL ? str : std::string(str, comma);
-}
-
-// Splits a given string on a given delimiter, populating a given
-// vector with the fields.
-void SplitString(const ::std::string& str, char delimiter,
- ::std::vector< ::std::string>* dest);
-
-// TypeParameterizedTest<Fixture, TestSel, Types>::Register()
-// registers a list of type-parameterized tests with Google Test. The
-// return value is insignificant - we just need to return something
-// such that we can call this function in a namespace scope.
-//
-// Implementation note: The GTEST_TEMPLATE_ macro declares a template
-// template parameter. It's defined in gtest-type-util.h.
-template <GTEST_TEMPLATE_ Fixture, class TestSel, typename Types>
-class TypeParameterizedTest {
- public:
- // 'index' is the index of the test in the type list 'Types'
- // specified in INSTANTIATE_TYPED_TEST_CASE_P(Prefix, TestCase,
- // Types). Valid values for 'index' are [0, N - 1] where N is the
- // length of Types.
- static bool Register(const char* prefix,
- CodeLocation code_location,
- const char* case_name, const char* test_names,
- int index) {
- typedef typename Types::Head Type;
- typedef Fixture<Type> FixtureClass;
- typedef typename GTEST_BIND_(TestSel, Type) TestClass;
-
- // First, registers the first type-parameterized test in the type
- // list.
- MakeAndRegisterTestInfo(
- (std::string(prefix) + (prefix[0] == '\0' ? "" : "/") + case_name + "/"
- + StreamableToString(index)).c_str(),
- StripTrailingSpaces(GetPrefixUntilComma(test_names)).c_str(),
- GetTypeName<Type>().c_str(),
- NULL, // No value parameter.
- code_location,
- GetTypeId<FixtureClass>(),
- TestClass::SetUpTestCase,
- TestClass::TearDownTestCase,
- new TestFactoryImpl<TestClass>);
-
- // Next, recurses (at compile time) with the tail of the type list.
- return TypeParameterizedTest<Fixture, TestSel, typename Types::Tail>
- ::Register(prefix, code_location, case_name, test_names, index + 1);
- }
-};
-
-// The base case for the compile time recursion.
-template <GTEST_TEMPLATE_ Fixture, class TestSel>
-class TypeParameterizedTest<Fixture, TestSel, Types0> {
- public:
- static bool Register(const char* /*prefix*/, CodeLocation,
- const char* /*case_name*/, const char* /*test_names*/,
- int /*index*/) {
- return true;
- }
-};
-
-// TypeParameterizedTestCase<Fixture, Tests, Types>::Register()
-// registers *all combinations* of 'Tests' and 'Types' with Google
-// Test. The return value is insignificant - we just need to return
-// something such that we can call this function in a namespace scope.
-template <GTEST_TEMPLATE_ Fixture, typename Tests, typename Types>
-class TypeParameterizedTestCase {
- public:
- static bool Register(const char* prefix, CodeLocation code_location,
- const TypedTestCasePState* state,
- const char* case_name, const char* test_names) {
- std::string test_name = StripTrailingSpaces(
- GetPrefixUntilComma(test_names));
- if (!state->TestExists(test_name)) {
- fprintf(stderr, "Failed to get code location for test %s.%s at %s.",
- case_name, test_name.c_str(),
- FormatFileLocation(code_location.file.c_str(),
- code_location.line).c_str());
- fflush(stderr);
- posix::Abort();
- }
- const CodeLocation& test_location = state->GetCodeLocation(test_name);
-
- typedef typename Tests::Head Head;
-
- // First, register the first test in 'Test' for each type in 'Types'.
- TypeParameterizedTest<Fixture, Head, Types>::Register(
- prefix, test_location, case_name, test_names, 0);
-
- // Next, recurses (at compile time) with the tail of the test list.
- return TypeParameterizedTestCase<Fixture, typename Tests::Tail, Types>
- ::Register(prefix, code_location, state,
- case_name, SkipComma(test_names));
- }
-};
-
-// The base case for the compile time recursion.
-template <GTEST_TEMPLATE_ Fixture, typename Types>
-class TypeParameterizedTestCase<Fixture, Templates0, Types> {
- public:
- static bool Register(const char* /*prefix*/, CodeLocation,
- const TypedTestCasePState* /*state*/,
- const char* /*case_name*/, const char* /*test_names*/) {
- return true;
- }
-};
-
-#endif // GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P
-
-// Returns the current OS stack trace as an std::string.
-//
-// The maximum number of stack frames to be included is specified by
-// the gtest_stack_trace_depth flag. The skip_count parameter
-// specifies the number of top frames to be skipped, which doesn't
-// count against the number of frames to be included.
-//
-// For example, if Foo() calls Bar(), which in turn calls
-// GetCurrentOsStackTraceExceptTop(..., 1), Foo() will be included in
-// the trace but Bar() and GetCurrentOsStackTraceExceptTop() won't.
-GTEST_API_ std::string GetCurrentOsStackTraceExceptTop(
- UnitTest* unit_test, int skip_count);
-
-// Helpers for suppressing warnings on unreachable code or constant
-// condition.
-
-// Always returns true.
-GTEST_API_ bool AlwaysTrue();
-
-// Always returns false.
-inline bool AlwaysFalse() { return !AlwaysTrue(); }
-
-// Helper for suppressing false warning from Clang on a const char*
-// variable declared in a conditional expression always being NULL in
-// the else branch.
-struct GTEST_API_ ConstCharPtr {
- ConstCharPtr(const char* str) : value(str) {}
- operator bool() const { return true; }
- const char* value;
-};
-
-// A simple Linear Congruential Generator for generating random
-// numbers with a uniform distribution. Unlike rand() and srand(), it
-// doesn't use global state (and therefore can't interfere with user
-// code). Unlike rand_r(), it's portable. An LCG isn't very random,
-// but it's good enough for our purposes.
-class GTEST_API_ Random {
- public:
- static const UInt32 kMaxRange = 1u << 31;
-
- explicit Random(UInt32 seed) : state_(seed) {}
-
- void Reseed(UInt32 seed) { state_ = seed; }
-
- // Generates a random number from [0, range). Crashes if 'range' is
- // 0 or greater than kMaxRange.
- UInt32 Generate(UInt32 range);
-
- private:
- UInt32 state_;
- GTEST_DISALLOW_COPY_AND_ASSIGN_(Random);
-};
-
-// Defining a variable of type CompileAssertTypesEqual<T1, T2> will cause a
-// compiler error iff T1 and T2 are different types.
-template <typename T1, typename T2>
-struct CompileAssertTypesEqual;
-
-template <typename T>
-struct CompileAssertTypesEqual<T, T> {
-};
-
-// Removes the reference from a type if it is a reference type,
-// otherwise leaves it unchanged. This is the same as
-// tr1::remove_reference, which is not widely available yet.
-template <typename T>
-struct RemoveReference { typedef T type; }; // NOLINT
-template <typename T>
-struct RemoveReference<T&> { typedef T type; }; // NOLINT
-
-// A handy wrapper around RemoveReference that works when the argument
-// T depends on template parameters.
-#define GTEST_REMOVE_REFERENCE_(T) \
- typename ::testing::internal::RemoveReference<T>::type
-
-// Removes const from a type if it is a const type, otherwise leaves
-// it unchanged. This is the same as tr1::remove_const, which is not
-// widely available yet.
-template <typename T>
-struct RemoveConst { typedef T type; }; // NOLINT
-template <typename T>
-struct RemoveConst<const T> { typedef T type; }; // NOLINT
-
-// MSVC 8.0, Sun C++, and IBM XL C++ have a bug which causes the above
-// definition to fail to remove the const in 'const int[3]' and 'const
-// char[3][4]'. The following specialization works around the bug.
-template <typename T, size_t N>
-struct RemoveConst<const T[N]> {
- typedef typename RemoveConst<T>::type type[N];
-};
-
-#if defined(_MSC_VER) && _MSC_VER < 1400
-// This is the only specialization that allows VC++ 7.1 to remove const in
-// 'const int[3] and 'const int[3][4]'. However, it causes trouble with GCC
-// and thus needs to be conditionally compiled.
-template <typename T, size_t N>
-struct RemoveConst<T[N]> {
- typedef typename RemoveConst<T>::type type[N];
-};
-#endif
-
-// A handy wrapper around RemoveConst that works when the argument
-// T depends on template parameters.
-#define GTEST_REMOVE_CONST_(T) \
- typename ::testing::internal::RemoveConst<T>::type
-
-// Turns const U&, U&, const U, and U all into U.
-#define GTEST_REMOVE_REFERENCE_AND_CONST_(T) \
- GTEST_REMOVE_CONST_(GTEST_REMOVE_REFERENCE_(T))
-
-// Adds reference to a type if it is not a reference type,
-// otherwise leaves it unchanged. This is the same as
-// tr1::add_reference, which is not widely available yet.
-template <typename T>
-struct AddReference { typedef T& type; }; // NOLINT
-template <typename T>
-struct AddReference<T&> { typedef T& type; }; // NOLINT
-
-// A handy wrapper around AddReference that works when the argument T
-// depends on template parameters.
-#define GTEST_ADD_REFERENCE_(T) \
- typename ::testing::internal::AddReference<T>::type
-
-// Adds a reference to const on top of T as necessary. For example,
-// it transforms
-//
-// char ==> const char&
-// const char ==> const char&
-// char& ==> const char&
-// const char& ==> const char&
-//
-// The argument T must depend on some template parameters.
-#define GTEST_REFERENCE_TO_CONST_(T) \
- GTEST_ADD_REFERENCE_(const GTEST_REMOVE_REFERENCE_(T))
-
-// ImplicitlyConvertible<From, To>::value is a compile-time bool
-// constant that's true iff type From can be implicitly converted to
-// type To.
-template <typename From, typename To>
-class ImplicitlyConvertible {
- private:
- // We need the following helper functions only for their types.
- // They have no implementations.
-
- // MakeFrom() is an expression whose type is From. We cannot simply
- // use From(), as the type From may not have a public default
- // constructor.
- static typename AddReference<From>::type MakeFrom();
-
- // These two functions are overloaded. Given an expression
- // Helper(x), the compiler will pick the first version if x can be
- // implicitly converted to type To; otherwise it will pick the
- // second version.
- //
- // The first version returns a value of size 1, and the second
- // version returns a value of size 2. Therefore, by checking the
- // size of Helper(x), which can be done at compile time, we can tell
- // which version of Helper() is used, and hence whether x can be
- // implicitly converted to type To.
- static char Helper(To);
- static char (&Helper(...))[2]; // NOLINT
-
- // We have to put the 'public' section after the 'private' section,
- // or MSVC refuses to compile the code.
- public:
-#if defined(__BORLANDC__)
- // C++Builder cannot use member overload resolution during template
- // instantiation. The simplest workaround is to use its C++0x type traits
- // functions (C++Builder 2009 and above only).
- static const bool value = __is_convertible(From, To);
-#else
- // MSVC warns about implicitly converting from double to int for
- // possible loss of data, so we need to temporarily disable the
- // warning.
- GTEST_DISABLE_MSC_WARNINGS_PUSH_(4244)
- static const bool value =
- sizeof(Helper(ImplicitlyConvertible::MakeFrom())) == 1;
- GTEST_DISABLE_MSC_WARNINGS_POP_()
-#endif // __BORLANDC__
-};
-template <typename From, typename To>
-const bool ImplicitlyConvertible<From, To>::value;
-
-// IsAProtocolMessage<T>::value is a compile-time bool constant that's
-// true iff T is type ProtocolMessage, proto2::Message, or a subclass
-// of those.
-template <typename T>
-struct IsAProtocolMessage
- : public bool_constant<
- ImplicitlyConvertible<const T*, const ::ProtocolMessage*>::value ||
- ImplicitlyConvertible<const T*, const ::proto2::Message*>::value> {
-};
-
-// When the compiler sees expression IsContainerTest<C>(0), if C is an
-// STL-style container class, the first overload of IsContainerTest
-// will be viable (since both C::iterator* and C::const_iterator* are
-// valid types and NULL can be implicitly converted to them). It will
-// be picked over the second overload as 'int' is a perfect match for
-// the type of argument 0. If C::iterator or C::const_iterator is not
-// a valid type, the first overload is not viable, and the second
-// overload will be picked. Therefore, we can determine whether C is
-// a container class by checking the type of IsContainerTest<C>(0).
-// The value of the expression is insignificant.
-//
-// Note that we look for both C::iterator and C::const_iterator. The
-// reason is that C++ injects the name of a class as a member of the
-// class itself (e.g. you can refer to class iterator as either
-// 'iterator' or 'iterator::iterator'). If we look for C::iterator
-// only, for example, we would mistakenly think that a class named
-// iterator is an STL container.
-//
-// Also note that the simpler approach of overloading
-// IsContainerTest(typename C::const_iterator*) and
-// IsContainerTest(...) doesn't work with Visual Age C++ and Sun C++.
-typedef int IsContainer;
-template <class C>
-IsContainer IsContainerTest(int /* dummy */,
- typename C::iterator* /* it */ = NULL,
- typename C::const_iterator* /* const_it */ = NULL) {
- return 0;
-}
-
-typedef char IsNotContainer;
-template <class C>
-IsNotContainer IsContainerTest(long /* dummy */) { return '\0'; }
-
-// EnableIf<condition>::type is void when 'Cond' is true, and
-// undefined when 'Cond' is false. To use SFINAE to make a function
-// overload only apply when a particular expression is true, add
-// "typename EnableIf<expression>::type* = 0" as the last parameter.
-template<bool> struct EnableIf;
-template<> struct EnableIf<true> { typedef void type; }; // NOLINT
-
-// Utilities for native arrays.
-
-// ArrayEq() compares two k-dimensional native arrays using the
-// elements' operator==, where k can be any integer >= 0. When k is
-// 0, ArrayEq() degenerates into comparing a single pair of values.
-
-template <typename T, typename U>
-bool ArrayEq(const T* lhs, size_t size, const U* rhs);
-
-// This generic version is used when k is 0.
-template <typename T, typename U>
-inline bool ArrayEq(const T& lhs, const U& rhs) { return lhs == rhs; }
-
-// This overload is used when k >= 1.
-template <typename T, typename U, size_t N>
-inline bool ArrayEq(const T(&lhs)[N], const U(&rhs)[N]) {
- return internal::ArrayEq(lhs, N, rhs);
-}
-
-// This helper reduces code bloat. If we instead put its logic inside
-// the previous ArrayEq() function, arrays with different sizes would
-// lead to different copies of the template code.
-template <typename T, typename U>
-bool ArrayEq(const T* lhs, size_t size, const U* rhs) {
- for (size_t i = 0; i != size; i++) {
- if (!internal::ArrayEq(lhs[i], rhs[i]))
- return false;
- }
- return true;
-}
-
-// Finds the first element in the iterator range [begin, end) that
-// equals elem. Element may be a native array type itself.
-template <typename Iter, typename Element>
-Iter ArrayAwareFind(Iter begin, Iter end, const Element& elem) {
- for (Iter it = begin; it != end; ++it) {
- if (internal::ArrayEq(*it, elem))
- return it;
- }
- return end;
-}
-
-// CopyArray() copies a k-dimensional native array using the elements'
-// operator=, where k can be any integer >= 0. When k is 0,
-// CopyArray() degenerates into copying a single value.
-
-template <typename T, typename U>
-void CopyArray(const T* from, size_t size, U* to);
-
-// This generic version is used when k is 0.
-template <typename T, typename U>
-inline void CopyArray(const T& from, U* to) { *to = from; }
-
-// This overload is used when k >= 1.
-template <typename T, typename U, size_t N>
-inline void CopyArray(const T(&from)[N], U(*to)[N]) {
- internal::CopyArray(from, N, *to);
-}
-
-// This helper reduces code bloat. If we instead put its logic inside
-// the previous CopyArray() function, arrays with different sizes
-// would lead to different copies of the template code.
-template <typename T, typename U>
-void CopyArray(const T* from, size_t size, U* to) {
- for (size_t i = 0; i != size; i++) {
- internal::CopyArray(from[i], to + i);
- }
-}
-
-// The relation between an NativeArray object (see below) and the
-// native array it represents.
-// We use 2 different structs to allow non-copyable types to be used, as long
-// as RelationToSourceReference() is passed.
-struct RelationToSourceReference {};
-struct RelationToSourceCopy {};
-
-// Adapts a native array to a read-only STL-style container. Instead
-// of the complete STL container concept, this adaptor only implements
-// members useful for Google Mock's container matchers. New members
-// should be added as needed. To simplify the implementation, we only
-// support Element being a raw type (i.e. having no top-level const or
-// reference modifier). It's the client's responsibility to satisfy
-// this requirement. Element can be an array type itself (hence
-// multi-dimensional arrays are supported).
-template <typename Element>
-class NativeArray {
- public:
- // STL-style container typedefs.
- typedef Element value_type;
- typedef Element* iterator;
- typedef const Element* const_iterator;
-
- // Constructs from a native array. References the source.
- NativeArray(const Element* array, size_t count, RelationToSourceReference) {
- InitRef(array, count);
- }
-
- // Constructs from a native array. Copies the source.
- NativeArray(const Element* array, size_t count, RelationToSourceCopy) {
- InitCopy(array, count);
- }
-
- // Copy constructor.
- NativeArray(const NativeArray& rhs) {
- (this->*rhs.clone_)(rhs.array_, rhs.size_);
- }
-
- ~NativeArray() {
- if (clone_ != &NativeArray::InitRef)
- delete[] array_;
- }
-
- // STL-style container methods.
- size_t size() const { return size_; }
- const_iterator begin() const { return array_; }
- const_iterator end() const { return array_ + size_; }
- bool operator==(const NativeArray& rhs) const {
- return size() == rhs.size() &&
- ArrayEq(begin(), size(), rhs.begin());
- }
-
- private:
- enum {
- kCheckTypeIsNotConstOrAReference = StaticAssertTypeEqHelper<
- Element, GTEST_REMOVE_REFERENCE_AND_CONST_(Element)>::value,
- };
-
- // Initializes this object with a copy of the input.
- void InitCopy(const Element* array, size_t a_size) {
- Element* const copy = new Element[a_size];
- CopyArray(array, a_size, copy);
- array_ = copy;
- size_ = a_size;
- clone_ = &NativeArray::InitCopy;
- }
-
- // Initializes this object with a reference of the input.
- void InitRef(const Element* array, size_t a_size) {
- array_ = array;
- size_ = a_size;
- clone_ = &NativeArray::InitRef;
- }
-
- const Element* array_;
- size_t size_;
- void (NativeArray::*clone_)(const Element*, size_t);
-
- GTEST_DISALLOW_ASSIGN_(NativeArray);
-};
-
-} // namespace internal
-} // namespace testing
-
-#define GTEST_MESSAGE_AT_(file, line, message, result_type) \
- ::testing::internal::AssertHelper(result_type, file, line, message) \
- = ::testing::Message()
-
-#define GTEST_MESSAGE_(message, result_type) \
- GTEST_MESSAGE_AT_(__FILE__, __LINE__, message, result_type)
-
-#define GTEST_FATAL_FAILURE_(message) \
- return GTEST_MESSAGE_(message, ::testing::TestPartResult::kFatalFailure)
-
-#define GTEST_NONFATAL_FAILURE_(message) \
- GTEST_MESSAGE_(message, ::testing::TestPartResult::kNonFatalFailure)
-
-#define GTEST_SUCCESS_(message) \
- GTEST_MESSAGE_(message, ::testing::TestPartResult::kSuccess)
-
-// Suppresses MSVC warnings 4072 (unreachable code) for the code following
-// statement if it returns or throws (or doesn't return or throw in some
-// situations).
-#define GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement) \
- if (::testing::internal::AlwaysTrue()) { statement; }
-
-#define GTEST_TEST_THROW_(statement, expected_exception, fail) \
- GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
- if (::testing::internal::ConstCharPtr gtest_msg = "") { \
- bool gtest_caught_expected = false; \
- try { \
- GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
- } \
- catch (expected_exception const&) { \
- gtest_caught_expected = true; \
- } \
- catch (...) { \
- gtest_msg.value = \
- "Expected: " #statement " throws an exception of type " \
- #expected_exception ".\n Actual: it throws a different type."; \
- goto GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__); \
- } \
- if (!gtest_caught_expected) { \
- gtest_msg.value = \
- "Expected: " #statement " throws an exception of type " \
- #expected_exception ".\n Actual: it throws nothing."; \
- goto GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__); \
- } \
- } else \
- GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__): \
- fail(gtest_msg.value)
-
-#define GTEST_TEST_NO_THROW_(statement, fail) \
- GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
- if (::testing::internal::AlwaysTrue()) { \
- try { \
- GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
- } \
- catch (...) { \
- goto GTEST_CONCAT_TOKEN_(gtest_label_testnothrow_, __LINE__); \
- } \
- } else \
- GTEST_CONCAT_TOKEN_(gtest_label_testnothrow_, __LINE__): \
- fail("Expected: " #statement " doesn't throw an exception.\n" \
- " Actual: it throws.")
-
-#define GTEST_TEST_ANY_THROW_(statement, fail) \
- GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
- if (::testing::internal::AlwaysTrue()) { \
- bool gtest_caught_any = false; \
- try { \
- GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
- } \
- catch (...) { \
- gtest_caught_any = true; \
- } \
- if (!gtest_caught_any) { \
- goto GTEST_CONCAT_TOKEN_(gtest_label_testanythrow_, __LINE__); \
- } \
- } else \
- GTEST_CONCAT_TOKEN_(gtest_label_testanythrow_, __LINE__): \
- fail("Expected: " #statement " throws an exception.\n" \
- " Actual: it doesn't.")
-
-
-// Implements Boolean test assertions such as EXPECT_TRUE. expression can be
-// either a boolean expression or an AssertionResult. text is a textual
-// represenation of expression as it was passed into the EXPECT_TRUE.
-#define GTEST_TEST_BOOLEAN_(expression, text, actual, expected, fail) \
- GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
- if (const ::testing::AssertionResult gtest_ar_ = \
- ::testing::AssertionResult(expression)) \
- ; \
- else \
- fail(::testing::internal::GetBoolAssertionFailureMessage(\
- gtest_ar_, text, #actual, #expected).c_str())
-
-#define GTEST_TEST_NO_FATAL_FAILURE_(statement, fail) \
- GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
- if (::testing::internal::AlwaysTrue()) { \
- ::testing::internal::HasNewFatalFailureHelper gtest_fatal_failure_checker; \
- GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
- if (gtest_fatal_failure_checker.has_new_fatal_failure()) { \
- goto GTEST_CONCAT_TOKEN_(gtest_label_testnofatal_, __LINE__); \
- } \
- } else \
- GTEST_CONCAT_TOKEN_(gtest_label_testnofatal_, __LINE__): \
- fail("Expected: " #statement " doesn't generate new fatal " \
- "failures in the current thread.\n" \
- " Actual: it does.")
-
-// Expands to the name of the class that implements the given test.
-#define GTEST_TEST_CLASS_NAME_(test_case_name, test_name) \
- test_case_name##_##test_name##_Test
-
-// Helper macro for defining tests.
-#define GTEST_TEST_(test_case_name, test_name, parent_class, parent_id)\
-class GTEST_TEST_CLASS_NAME_(test_case_name, test_name) : public parent_class {\
- public:\
- GTEST_TEST_CLASS_NAME_(test_case_name, test_name)() {}\
- private:\
- virtual void TestBody();\
- static ::testing::TestInfo* const test_info_ GTEST_ATTRIBUTE_UNUSED_;\
- GTEST_DISALLOW_COPY_AND_ASSIGN_(\
- GTEST_TEST_CLASS_NAME_(test_case_name, test_name));\
-};\
-\
-::testing::TestInfo* const GTEST_TEST_CLASS_NAME_(test_case_name, test_name)\
- ::test_info_ =\
- ::testing::internal::MakeAndRegisterTestInfo(\
- #test_case_name, #test_name, NULL, NULL, \
- ::testing::internal::CodeLocation(__FILE__, __LINE__), \
- (parent_id), \
- parent_class::SetUpTestCase, \
- parent_class::TearDownTestCase, \
- new ::testing::internal::TestFactoryImpl<\
- GTEST_TEST_CLASS_NAME_(test_case_name, test_name)>);\
-void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody()
-
-#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_
-
diff --git a/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-linked_ptr.h b/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-linked_ptr.h
deleted file mode 100644
index 360294221..000000000
--- a/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-linked_ptr.h
+++ /dev/null
@@ -1,243 +0,0 @@
-// Copyright 2003 Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Authors: Dan Egnor (egnor@google.com)
-//
-// A "smart" pointer type with reference tracking. Every pointer to a
-// particular object is kept on a circular linked list. When the last pointer
-// to an object is destroyed or reassigned, the object is deleted.
-//
-// Used properly, this deletes the object when the last reference goes away.
-// There are several caveats:
-// - Like all reference counting schemes, cycles lead to leaks.
-// - Each smart pointer is actually two pointers (8 bytes instead of 4).
-// - Every time a pointer is assigned, the entire list of pointers to that
-// object is traversed. This class is therefore NOT SUITABLE when there
-// will often be more than two or three pointers to a particular object.
-// - References are only tracked as long as linked_ptr<> objects are copied.
-// If a linked_ptr<> is converted to a raw pointer and back, BAD THINGS
-// will happen (double deletion).
-//
-// A good use of this class is storing object references in STL containers.
-// You can safely put linked_ptr<> in a vector<>.
-// Other uses may not be as good.
-//
-// Note: If you use an incomplete type with linked_ptr<>, the class
-// *containing* linked_ptr<> must have a constructor and destructor (even
-// if they do nothing!).
-//
-// Bill Gibbons suggested we use something like this.
-//
-// Thread Safety:
-// Unlike other linked_ptr implementations, in this implementation
-// a linked_ptr object is thread-safe in the sense that:
-// - it's safe to copy linked_ptr objects concurrently,
-// - it's safe to copy *from* a linked_ptr and read its underlying
-// raw pointer (e.g. via get()) concurrently, and
-// - it's safe to write to two linked_ptrs that point to the same
-// shared object concurrently.
-// TODO(wan@google.com): rename this to safe_linked_ptr to avoid
-// confusion with normal linked_ptr.
-
-#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_
-#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_
-
-#include <stdlib.h>
-#include <assert.h>
-
-#include "gtest/internal/gtest-port.h"
-
-namespace testing {
-namespace internal {
-
-// Protects copying of all linked_ptr objects.
-GTEST_API_ GTEST_DECLARE_STATIC_MUTEX_(g_linked_ptr_mutex);
-
-// This is used internally by all instances of linked_ptr<>. It needs to be
-// a non-template class because different types of linked_ptr<> can refer to
-// the same object (linked_ptr<Superclass>(obj) vs linked_ptr<Subclass>(obj)).
-// So, it needs to be possible for different types of linked_ptr to participate
-// in the same circular linked list, so we need a single class type here.
-//
-// DO NOT USE THIS CLASS DIRECTLY YOURSELF. Use linked_ptr<T>.
-class linked_ptr_internal {
- public:
- // Create a new circle that includes only this instance.
- void join_new() {
- next_ = this;
- }
-
- // Many linked_ptr operations may change p.link_ for some linked_ptr
- // variable p in the same circle as this object. Therefore we need
- // to prevent two such operations from occurring concurrently.
- //
- // Note that different types of linked_ptr objects can coexist in a
- // circle (e.g. linked_ptr<Base>, linked_ptr<Derived1>, and
- // linked_ptr<Derived2>). Therefore we must use a single mutex to
- // protect all linked_ptr objects. This can create serious
- // contention in production code, but is acceptable in a testing
- // framework.
-
- // Join an existing circle.
- void join(linked_ptr_internal const* ptr)
- GTEST_LOCK_EXCLUDED_(g_linked_ptr_mutex) {
- MutexLock lock(&g_linked_ptr_mutex);
-
- linked_ptr_internal const* p = ptr;
- while (p->next_ != ptr) {
- assert(p->next_ != this &&
- "Trying to join() a linked ring we are already in. "
- "Is GMock thread safety enabled?");
- p = p->next_;
- }
- p->next_ = this;
- next_ = ptr;
- }
-
- // Leave whatever circle we're part of. Returns true if we were the
- // last member of the circle. Once this is done, you can join() another.
- bool depart()
- GTEST_LOCK_EXCLUDED_(g_linked_ptr_mutex) {
- MutexLock lock(&g_linked_ptr_mutex);
-
- if (next_ == this) return true;
- linked_ptr_internal const* p = next_;
- while (p->next_ != this) {
- assert(p->next_ != next_ &&
- "Trying to depart() a linked ring we are not in. "
- "Is GMock thread safety enabled?");
- p = p->next_;
- }
- p->next_ = next_;
- return false;
- }
-
- private:
- mutable linked_ptr_internal const* next_;
-};
-
-template <typename T>
-class linked_ptr {
- public:
- typedef T element_type;
-
- // Take over ownership of a raw pointer. This should happen as soon as
- // possible after the object is created.
- explicit linked_ptr(T* ptr = NULL) { capture(ptr); }
- ~linked_ptr() { depart(); }
-
- // Copy an existing linked_ptr<>, adding ourselves to the list of references.
- template <typename U> linked_ptr(linked_ptr<U> const& ptr) { copy(&ptr); }
- linked_ptr(linked_ptr const& ptr) { // NOLINT
- assert(&ptr != this);
- copy(&ptr);
- }
-
- // Assignment releases the old value and acquires the new.
- template <typename U> linked_ptr& operator=(linked_ptr<U> const& ptr) {
- depart();
- copy(&ptr);
- return *this;
- }
-
- linked_ptr& operator=(linked_ptr const& ptr) {
- if (&ptr != this) {
- depart();
- copy(&ptr);
- }
- return *this;
- }
-
- // Smart pointer members.
- void reset(T* ptr = NULL) {
- depart();
- capture(ptr);
- }
- T* get() const { return value_; }
- T* operator->() const { return value_; }
- T& operator*() const { return *value_; }
-
- bool operator==(T* p) const { return value_ == p; }
- bool operator!=(T* p) const { return value_ != p; }
- template <typename U>
- bool operator==(linked_ptr<U> const& ptr) const {
- return value_ == ptr.get();
- }
- template <typename U>
- bool operator!=(linked_ptr<U> const& ptr) const {
- return value_ != ptr.get();
- }
-
- private:
- template <typename U>
- friend class linked_ptr;
-
- T* value_;
- linked_ptr_internal link_;
-
- void depart() {
- if (link_.depart()) delete value_;
- }
-
- void capture(T* ptr) {
- value_ = ptr;
- link_.join_new();
- }
-
- template <typename U> void copy(linked_ptr<U> const* ptr) {
- value_ = ptr->get();
- if (value_)
- link_.join(&ptr->link_);
- else
- link_.join_new();
- }
-};
-
-template<typename T> inline
-bool operator==(T* ptr, const linked_ptr<T>& x) {
- return ptr == x.get();
-}
-
-template<typename T> inline
-bool operator!=(T* ptr, const linked_ptr<T>& x) {
- return ptr != x.get();
-}
-
-// A function to convert T* into linked_ptr<T>
-// Doing e.g. make_linked_ptr(new FooBarBaz<type>(arg)) is a shorter notation
-// for linked_ptr<FooBarBaz<type> >(new FooBarBaz<type>(arg))
-template <typename T>
-linked_ptr<T> make_linked_ptr(T* ptr) {
- return linked_ptr<T>(ptr);
-}
-
-} // namespace internal
-} // namespace testing
-
-#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_
diff --git a/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-param-util-generated.h b/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-param-util-generated.h
deleted file mode 100644
index 4d1d81d20..000000000
--- a/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-param-util-generated.h
+++ /dev/null
@@ -1,5146 +0,0 @@
-// This file was GENERATED by command:
-// pump.py gtest-param-util-generated.h.pump
-// DO NOT EDIT BY HAND!!!
-
-// Copyright 2008 Google Inc.
-// All Rights Reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Author: vladl@google.com (Vlad Losev)
-
-// Type and function utilities for implementing parameterized tests.
-// This file is generated by a SCRIPT. DO NOT EDIT BY HAND!
-//
-// Currently Google Test supports at most 50 arguments in Values,
-// and at most 10 arguments in Combine. Please contact
-// googletestframework@googlegroups.com if you need more.
-// Please note that the number of arguments to Combine is limited
-// by the maximum arity of the implementation of tuple which is
-// currently set at 10.
-
-#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_
-#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_
-
-// scripts/fuse_gtest.py depends on gtest's own header being #included
-// *unconditionally*. Therefore these #includes cannot be moved
-// inside #if GTEST_HAS_PARAM_TEST.
-#include "gtest/internal/gtest-param-util.h"
-#include "gtest/internal/gtest-port.h"
-
-#if GTEST_HAS_PARAM_TEST
-
-namespace testing {
-
-// Forward declarations of ValuesIn(), which is implemented in
-// include/gtest/gtest-param-test.h.
-template <typename ForwardIterator>
-internal::ParamGenerator<
- typename ::testing::internal::IteratorTraits<ForwardIterator>::value_type>
-ValuesIn(ForwardIterator begin, ForwardIterator end);
-
-template <typename T, size_t N>
-internal::ParamGenerator<T> ValuesIn(const T (&array)[N]);
-
-template <class Container>
-internal::ParamGenerator<typename Container::value_type> ValuesIn(
- const Container& container);
-
-namespace internal {
-
-// Used in the Values() function to provide polymorphic capabilities.
-template <typename T1>
-class ValueArray1 {
- public:
- explicit ValueArray1(T1 v1) : v1_(v1) {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {static_cast<T>(v1_)};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray1& other);
-
- const T1 v1_;
-};
-
-template <typename T1, typename T2>
-class ValueArray2 {
- public:
- ValueArray2(T1 v1, T2 v2) : v1_(v1), v2_(v2) {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_)};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray2& other);
-
- const T1 v1_;
- const T2 v2_;
-};
-
-template <typename T1, typename T2, typename T3>
-class ValueArray3 {
- public:
- ValueArray3(T1 v1, T2 v2, T3 v3) : v1_(v1), v2_(v2), v3_(v3) {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
- static_cast<T>(v3_)};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray3& other);
-
- const T1 v1_;
- const T2 v2_;
- const T3 v3_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4>
-class ValueArray4 {
- public:
- ValueArray4(T1 v1, T2 v2, T3 v3, T4 v4) : v1_(v1), v2_(v2), v3_(v3),
- v4_(v4) {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
- static_cast<T>(v3_), static_cast<T>(v4_)};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray4& other);
-
- const T1 v1_;
- const T2 v2_;
- const T3 v3_;
- const T4 v4_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5>
-class ValueArray5 {
- public:
- ValueArray5(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5) : v1_(v1), v2_(v2), v3_(v3),
- v4_(v4), v5_(v5) {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
- static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_)};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray5& other);
-
- const T1 v1_;
- const T2 v2_;
- const T3 v3_;
- const T4 v4_;
- const T5 v5_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6>
-class ValueArray6 {
- public:
- ValueArray6(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6) : v1_(v1), v2_(v2),
- v3_(v3), v4_(v4), v5_(v5), v6_(v6) {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
- static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
- static_cast<T>(v6_)};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray6& other);
-
- const T1 v1_;
- const T2 v2_;
- const T3 v3_;
- const T4 v4_;
- const T5 v5_;
- const T6 v6_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7>
-class ValueArray7 {
- public:
- ValueArray7(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7) : v1_(v1),
- v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7) {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
- static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
- static_cast<T>(v6_), static_cast<T>(v7_)};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray7& other);
-
- const T1 v1_;
- const T2 v2_;
- const T3 v3_;
- const T4 v4_;
- const T5 v5_;
- const T6 v6_;
- const T7 v7_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8>
-class ValueArray8 {
- public:
- ValueArray8(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
- T8 v8) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
- v8_(v8) {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
- static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
- static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_)};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray8& other);
-
- const T1 v1_;
- const T2 v2_;
- const T3 v3_;
- const T4 v4_;
- const T5 v5_;
- const T6 v6_;
- const T7 v7_;
- const T8 v8_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9>
-class ValueArray9 {
- public:
- ValueArray9(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8,
- T9 v9) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
- v8_(v8), v9_(v9) {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
- static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
- static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
- static_cast<T>(v9_)};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray9& other);
-
- const T1 v1_;
- const T2 v2_;
- const T3 v3_;
- const T4 v4_;
- const T5 v5_;
- const T6 v6_;
- const T7 v7_;
- const T8 v8_;
- const T9 v9_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10>
-class ValueArray10 {
- public:
- ValueArray10(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
- v8_(v8), v9_(v9), v10_(v10) {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
- static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
- static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
- static_cast<T>(v9_), static_cast<T>(v10_)};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray10& other);
-
- const T1 v1_;
- const T2 v2_;
- const T3 v3_;
- const T4 v4_;
- const T5 v5_;
- const T6 v6_;
- const T7 v7_;
- const T8 v8_;
- const T9 v9_;
- const T10 v10_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11>
-class ValueArray11 {
- public:
- ValueArray11(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6),
- v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11) {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
- static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
- static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
- static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_)};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray11& other);
-
- const T1 v1_;
- const T2 v2_;
- const T3 v3_;
- const T4 v4_;
- const T5 v5_;
- const T6 v6_;
- const T7 v7_;
- const T8 v8_;
- const T9 v9_;
- const T10 v10_;
- const T11 v11_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12>
-class ValueArray12 {
- public:
- ValueArray12(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11, T12 v12) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5),
- v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12) {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
- static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
- static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
- static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
- static_cast<T>(v12_)};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray12& other);
-
- const T1 v1_;
- const T2 v2_;
- const T3 v3_;
- const T4 v4_;
- const T5 v5_;
- const T6 v6_;
- const T7 v7_;
- const T8 v8_;
- const T9 v9_;
- const T10 v10_;
- const T11 v11_;
- const T12 v12_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13>
-class ValueArray13 {
- public:
- ValueArray13(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11, T12 v12, T13 v13) : v1_(v1), v2_(v2), v3_(v3), v4_(v4),
- v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11),
- v12_(v12), v13_(v13) {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
- static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
- static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
- static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
- static_cast<T>(v12_), static_cast<T>(v13_)};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray13& other);
-
- const T1 v1_;
- const T2 v2_;
- const T3 v3_;
- const T4 v4_;
- const T5 v5_;
- const T6 v6_;
- const T7 v7_;
- const T8 v8_;
- const T9 v9_;
- const T10 v10_;
- const T11 v11_;
- const T12 v12_;
- const T13 v13_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14>
-class ValueArray14 {
- public:
- ValueArray14(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11, T12 v12, T13 v13, T14 v14) : v1_(v1), v2_(v2), v3_(v3),
- v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
- v11_(v11), v12_(v12), v13_(v13), v14_(v14) {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
- static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
- static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
- static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
- static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_)};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray14& other);
-
- const T1 v1_;
- const T2 v2_;
- const T3 v3_;
- const T4 v4_;
- const T5 v5_;
- const T6 v6_;
- const T7 v7_;
- const T8 v8_;
- const T9 v9_;
- const T10 v10_;
- const T11 v11_;
- const T12 v12_;
- const T13 v13_;
- const T14 v14_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15>
-class ValueArray15 {
- public:
- ValueArray15(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15) : v1_(v1), v2_(v2),
- v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
- v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15) {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
- static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
- static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
- static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
- static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
- static_cast<T>(v15_)};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray15& other);
-
- const T1 v1_;
- const T2 v2_;
- const T3 v3_;
- const T4 v4_;
- const T5 v5_;
- const T6 v6_;
- const T7 v7_;
- const T8 v8_;
- const T9 v9_;
- const T10 v10_;
- const T11 v11_;
- const T12 v12_;
- const T13 v13_;
- const T14 v14_;
- const T15 v15_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16>
-class ValueArray16 {
- public:
- ValueArray16(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16) : v1_(v1),
- v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9),
- v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15),
- v16_(v16) {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
- static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
- static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
- static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
- static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
- static_cast<T>(v15_), static_cast<T>(v16_)};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray16& other);
-
- const T1 v1_;
- const T2 v2_;
- const T3 v3_;
- const T4 v4_;
- const T5 v5_;
- const T6 v6_;
- const T7 v7_;
- const T8 v8_;
- const T9 v9_;
- const T10 v10_;
- const T11 v11_;
- const T12 v12_;
- const T13 v13_;
- const T14 v14_;
- const T15 v15_;
- const T16 v16_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17>
-class ValueArray17 {
- public:
- ValueArray17(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16,
- T17 v17) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
- v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
- v15_(v15), v16_(v16), v17_(v17) {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
- static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
- static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
- static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
- static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
- static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_)};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray17& other);
-
- const T1 v1_;
- const T2 v2_;
- const T3 v3_;
- const T4 v4_;
- const T5 v5_;
- const T6 v6_;
- const T7 v7_;
- const T8 v8_;
- const T9 v9_;
- const T10 v10_;
- const T11 v11_;
- const T12 v12_;
- const T13 v13_;
- const T14 v14_;
- const T15 v15_;
- const T16 v16_;
- const T17 v17_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18>
-class ValueArray18 {
- public:
- ValueArray18(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
- T18 v18) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
- v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
- v15_(v15), v16_(v16), v17_(v17), v18_(v18) {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
- static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
- static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
- static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
- static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
- static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
- static_cast<T>(v18_)};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray18& other);
-
- const T1 v1_;
- const T2 v2_;
- const T3 v3_;
- const T4 v4_;
- const T5 v5_;
- const T6 v6_;
- const T7 v7_;
- const T8 v8_;
- const T9 v9_;
- const T10 v10_;
- const T11 v11_;
- const T12 v12_;
- const T13 v13_;
- const T14 v14_;
- const T15 v15_;
- const T16 v16_;
- const T17 v17_;
- const T18 v18_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19>
-class ValueArray19 {
- public:
- ValueArray19(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
- T18 v18, T19 v19) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6),
- v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13),
- v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19) {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
- static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
- static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
- static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
- static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
- static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
- static_cast<T>(v18_), static_cast<T>(v19_)};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray19& other);
-
- const T1 v1_;
- const T2 v2_;
- const T3 v3_;
- const T4 v4_;
- const T5 v5_;
- const T6 v6_;
- const T7 v7_;
- const T8 v8_;
- const T9 v9_;
- const T10 v10_;
- const T11 v11_;
- const T12 v12_;
- const T13 v13_;
- const T14 v14_;
- const T15 v15_;
- const T16 v16_;
- const T17 v17_;
- const T18 v18_;
- const T19 v19_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20>
-class ValueArray20 {
- public:
- ValueArray20(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
- T18 v18, T19 v19, T20 v20) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5),
- v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12),
- v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18),
- v19_(v19), v20_(v20) {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
- static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
- static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
- static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
- static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
- static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
- static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_)};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray20& other);
-
- const T1 v1_;
- const T2 v2_;
- const T3 v3_;
- const T4 v4_;
- const T5 v5_;
- const T6 v6_;
- const T7 v7_;
- const T8 v8_;
- const T9 v9_;
- const T10 v10_;
- const T11 v11_;
- const T12 v12_;
- const T13 v13_;
- const T14 v14_;
- const T15 v15_;
- const T16 v16_;
- const T17 v17_;
- const T18 v18_;
- const T19 v19_;
- const T20 v20_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21>
-class ValueArray21 {
- public:
- ValueArray21(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
- T18 v18, T19 v19, T20 v20, T21 v21) : v1_(v1), v2_(v2), v3_(v3), v4_(v4),
- v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11),
- v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17),
- v18_(v18), v19_(v19), v20_(v20), v21_(v21) {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
- static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
- static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
- static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
- static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
- static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
- static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
- static_cast<T>(v21_)};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray21& other);
-
- const T1 v1_;
- const T2 v2_;
- const T3 v3_;
- const T4 v4_;
- const T5 v5_;
- const T6 v6_;
- const T7 v7_;
- const T8 v8_;
- const T9 v9_;
- const T10 v10_;
- const T11 v11_;
- const T12 v12_;
- const T13 v13_;
- const T14 v14_;
- const T15 v15_;
- const T16 v16_;
- const T17 v17_;
- const T18 v18_;
- const T19 v19_;
- const T20 v20_;
- const T21 v21_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22>
-class ValueArray22 {
- public:
- ValueArray22(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
- T18 v18, T19 v19, T20 v20, T21 v21, T22 v22) : v1_(v1), v2_(v2), v3_(v3),
- v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
- v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
- v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22) {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
- static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
- static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
- static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
- static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
- static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
- static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
- static_cast<T>(v21_), static_cast<T>(v22_)};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray22& other);
-
- const T1 v1_;
- const T2 v2_;
- const T3 v3_;
- const T4 v4_;
- const T5 v5_;
- const T6 v6_;
- const T7 v7_;
- const T8 v8_;
- const T9 v9_;
- const T10 v10_;
- const T11 v11_;
- const T12 v12_;
- const T13 v13_;
- const T14 v14_;
- const T15 v15_;
- const T16 v16_;
- const T17 v17_;
- const T18 v18_;
- const T19 v19_;
- const T20 v20_;
- const T21 v21_;
- const T22 v22_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23>
-class ValueArray23 {
- public:
- ValueArray23(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
- T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23) : v1_(v1), v2_(v2),
- v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
- v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
- v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
- v23_(v23) {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
- static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
- static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
- static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
- static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
- static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
- static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
- static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_)};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray23& other);
-
- const T1 v1_;
- const T2 v2_;
- const T3 v3_;
- const T4 v4_;
- const T5 v5_;
- const T6 v6_;
- const T7 v7_;
- const T8 v8_;
- const T9 v9_;
- const T10 v10_;
- const T11 v11_;
- const T12 v12_;
- const T13 v13_;
- const T14 v14_;
- const T15 v15_;
- const T16 v16_;
- const T17 v17_;
- const T18 v18_;
- const T19 v19_;
- const T20 v20_;
- const T21 v21_;
- const T22 v22_;
- const T23 v23_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24>
-class ValueArray24 {
- public:
- ValueArray24(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
- T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24) : v1_(v1),
- v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9),
- v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15),
- v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21),
- v22_(v22), v23_(v23), v24_(v24) {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
- static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
- static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
- static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
- static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
- static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
- static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
- static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
- static_cast<T>(v24_)};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray24& other);
-
- const T1 v1_;
- const T2 v2_;
- const T3 v3_;
- const T4 v4_;
- const T5 v5_;
- const T6 v6_;
- const T7 v7_;
- const T8 v8_;
- const T9 v9_;
- const T10 v10_;
- const T11 v11_;
- const T12 v12_;
- const T13 v13_;
- const T14 v14_;
- const T15 v15_;
- const T16 v16_;
- const T17 v17_;
- const T18 v18_;
- const T19 v19_;
- const T20 v20_;
- const T21 v21_;
- const T22 v22_;
- const T23 v23_;
- const T24 v24_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25>
-class ValueArray25 {
- public:
- ValueArray25(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
- T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24,
- T25 v25) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
- v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
- v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
- v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25) {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
- static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
- static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
- static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
- static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
- static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
- static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
- static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
- static_cast<T>(v24_), static_cast<T>(v25_)};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray25& other);
-
- const T1 v1_;
- const T2 v2_;
- const T3 v3_;
- const T4 v4_;
- const T5 v5_;
- const T6 v6_;
- const T7 v7_;
- const T8 v8_;
- const T9 v9_;
- const T10 v10_;
- const T11 v11_;
- const T12 v12_;
- const T13 v13_;
- const T14 v14_;
- const T15 v15_;
- const T16 v16_;
- const T17 v17_;
- const T18 v18_;
- const T19 v19_;
- const T20 v20_;
- const T21 v21_;
- const T22 v22_;
- const T23 v23_;
- const T24 v24_;
- const T25 v25_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26>
-class ValueArray26 {
- public:
- ValueArray26(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
- T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
- T26 v26) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
- v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
- v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
- v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26) {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
- static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
- static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
- static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
- static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
- static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
- static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
- static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
- static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_)};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray26& other);
-
- const T1 v1_;
- const T2 v2_;
- const T3 v3_;
- const T4 v4_;
- const T5 v5_;
- const T6 v6_;
- const T7 v7_;
- const T8 v8_;
- const T9 v9_;
- const T10 v10_;
- const T11 v11_;
- const T12 v12_;
- const T13 v13_;
- const T14 v14_;
- const T15 v15_;
- const T16 v16_;
- const T17 v17_;
- const T18 v18_;
- const T19 v19_;
- const T20 v20_;
- const T21 v21_;
- const T22 v22_;
- const T23 v23_;
- const T24 v24_;
- const T25 v25_;
- const T26 v26_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27>
-class ValueArray27 {
- public:
- ValueArray27(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
- T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
- T26 v26, T27 v27) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6),
- v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13),
- v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19),
- v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25),
- v26_(v26), v27_(v27) {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
- static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
- static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
- static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
- static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
- static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
- static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
- static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
- static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
- static_cast<T>(v27_)};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray27& other);
-
- const T1 v1_;
- const T2 v2_;
- const T3 v3_;
- const T4 v4_;
- const T5 v5_;
- const T6 v6_;
- const T7 v7_;
- const T8 v8_;
- const T9 v9_;
- const T10 v10_;
- const T11 v11_;
- const T12 v12_;
- const T13 v13_;
- const T14 v14_;
- const T15 v15_;
- const T16 v16_;
- const T17 v17_;
- const T18 v18_;
- const T19 v19_;
- const T20 v20_;
- const T21 v21_;
- const T22 v22_;
- const T23 v23_;
- const T24 v24_;
- const T25 v25_;
- const T26 v26_;
- const T27 v27_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28>
-class ValueArray28 {
- public:
- ValueArray28(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
- T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
- T26 v26, T27 v27, T28 v28) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5),
- v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12),
- v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18),
- v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24),
- v25_(v25), v26_(v26), v27_(v27), v28_(v28) {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
- static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
- static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
- static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
- static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
- static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
- static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
- static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
- static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
- static_cast<T>(v27_), static_cast<T>(v28_)};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray28& other);
-
- const T1 v1_;
- const T2 v2_;
- const T3 v3_;
- const T4 v4_;
- const T5 v5_;
- const T6 v6_;
- const T7 v7_;
- const T8 v8_;
- const T9 v9_;
- const T10 v10_;
- const T11 v11_;
- const T12 v12_;
- const T13 v13_;
- const T14 v14_;
- const T15 v15_;
- const T16 v16_;
- const T17 v17_;
- const T18 v18_;
- const T19 v19_;
- const T20 v20_;
- const T21 v21_;
- const T22 v22_;
- const T23 v23_;
- const T24 v24_;
- const T25 v25_;
- const T26 v26_;
- const T27 v27_;
- const T28 v28_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29>
-class ValueArray29 {
- public:
- ValueArray29(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
- T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
- T26 v26, T27 v27, T28 v28, T29 v29) : v1_(v1), v2_(v2), v3_(v3), v4_(v4),
- v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11),
- v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17),
- v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23),
- v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29) {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
- static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
- static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
- static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
- static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
- static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
- static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
- static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
- static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
- static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_)};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray29& other);
-
- const T1 v1_;
- const T2 v2_;
- const T3 v3_;
- const T4 v4_;
- const T5 v5_;
- const T6 v6_;
- const T7 v7_;
- const T8 v8_;
- const T9 v9_;
- const T10 v10_;
- const T11 v11_;
- const T12 v12_;
- const T13 v13_;
- const T14 v14_;
- const T15 v15_;
- const T16 v16_;
- const T17 v17_;
- const T18 v18_;
- const T19 v19_;
- const T20 v20_;
- const T21 v21_;
- const T22 v22_;
- const T23 v23_;
- const T24 v24_;
- const T25 v25_;
- const T26 v26_;
- const T27 v27_;
- const T28 v28_;
- const T29 v29_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30>
-class ValueArray30 {
- public:
- ValueArray30(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
- T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
- T26 v26, T27 v27, T28 v28, T29 v29, T30 v30) : v1_(v1), v2_(v2), v3_(v3),
- v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
- v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
- v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
- v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28),
- v29_(v29), v30_(v30) {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
- static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
- static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
- static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
- static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
- static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
- static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
- static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
- static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
- static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
- static_cast<T>(v30_)};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray30& other);
-
- const T1 v1_;
- const T2 v2_;
- const T3 v3_;
- const T4 v4_;
- const T5 v5_;
- const T6 v6_;
- const T7 v7_;
- const T8 v8_;
- const T9 v9_;
- const T10 v10_;
- const T11 v11_;
- const T12 v12_;
- const T13 v13_;
- const T14 v14_;
- const T15 v15_;
- const T16 v16_;
- const T17 v17_;
- const T18 v18_;
- const T19 v19_;
- const T20 v20_;
- const T21 v21_;
- const T22 v22_;
- const T23 v23_;
- const T24 v24_;
- const T25 v25_;
- const T26 v26_;
- const T27 v27_;
- const T28 v28_;
- const T29 v29_;
- const T30 v30_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31>
-class ValueArray31 {
- public:
- ValueArray31(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
- T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
- T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31) : v1_(v1), v2_(v2),
- v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
- v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
- v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
- v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28),
- v29_(v29), v30_(v30), v31_(v31) {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
- static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
- static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
- static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
- static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
- static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
- static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
- static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
- static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
- static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
- static_cast<T>(v30_), static_cast<T>(v31_)};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray31& other);
-
- const T1 v1_;
- const T2 v2_;
- const T3 v3_;
- const T4 v4_;
- const T5 v5_;
- const T6 v6_;
- const T7 v7_;
- const T8 v8_;
- const T9 v9_;
- const T10 v10_;
- const T11 v11_;
- const T12 v12_;
- const T13 v13_;
- const T14 v14_;
- const T15 v15_;
- const T16 v16_;
- const T17 v17_;
- const T18 v18_;
- const T19 v19_;
- const T20 v20_;
- const T21 v21_;
- const T22 v22_;
- const T23 v23_;
- const T24 v24_;
- const T25 v25_;
- const T26 v26_;
- const T27 v27_;
- const T28 v28_;
- const T29 v29_;
- const T30 v30_;
- const T31 v31_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32>
-class ValueArray32 {
- public:
- ValueArray32(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
- T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
- T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32) : v1_(v1),
- v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9),
- v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15),
- v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21),
- v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27),
- v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32) {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
- static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
- static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
- static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
- static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
- static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
- static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
- static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
- static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
- static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
- static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_)};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray32& other);
-
- const T1 v1_;
- const T2 v2_;
- const T3 v3_;
- const T4 v4_;
- const T5 v5_;
- const T6 v6_;
- const T7 v7_;
- const T8 v8_;
- const T9 v9_;
- const T10 v10_;
- const T11 v11_;
- const T12 v12_;
- const T13 v13_;
- const T14 v14_;
- const T15 v15_;
- const T16 v16_;
- const T17 v17_;
- const T18 v18_;
- const T19 v19_;
- const T20 v20_;
- const T21 v21_;
- const T22 v22_;
- const T23 v23_;
- const T24 v24_;
- const T25 v25_;
- const T26 v26_;
- const T27 v27_;
- const T28 v28_;
- const T29 v29_;
- const T30 v30_;
- const T31 v31_;
- const T32 v32_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33>
-class ValueArray33 {
- public:
- ValueArray33(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
- T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
- T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32,
- T33 v33) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
- v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
- v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
- v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26),
- v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32),
- v33_(v33) {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
- static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
- static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
- static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
- static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
- static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
- static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
- static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
- static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
- static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
- static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
- static_cast<T>(v33_)};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray33& other);
-
- const T1 v1_;
- const T2 v2_;
- const T3 v3_;
- const T4 v4_;
- const T5 v5_;
- const T6 v6_;
- const T7 v7_;
- const T8 v8_;
- const T9 v9_;
- const T10 v10_;
- const T11 v11_;
- const T12 v12_;
- const T13 v13_;
- const T14 v14_;
- const T15 v15_;
- const T16 v16_;
- const T17 v17_;
- const T18 v18_;
- const T19 v19_;
- const T20 v20_;
- const T21 v21_;
- const T22 v22_;
- const T23 v23_;
- const T24 v24_;
- const T25 v25_;
- const T26 v26_;
- const T27 v27_;
- const T28 v28_;
- const T29 v29_;
- const T30 v30_;
- const T31 v31_;
- const T32 v32_;
- const T33 v33_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34>
-class ValueArray34 {
- public:
- ValueArray34(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
- T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
- T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
- T34 v34) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
- v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
- v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
- v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26),
- v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32),
- v33_(v33), v34_(v34) {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
- static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
- static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
- static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
- static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
- static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
- static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
- static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
- static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
- static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
- static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
- static_cast<T>(v33_), static_cast<T>(v34_)};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray34& other);
-
- const T1 v1_;
- const T2 v2_;
- const T3 v3_;
- const T4 v4_;
- const T5 v5_;
- const T6 v6_;
- const T7 v7_;
- const T8 v8_;
- const T9 v9_;
- const T10 v10_;
- const T11 v11_;
- const T12 v12_;
- const T13 v13_;
- const T14 v14_;
- const T15 v15_;
- const T16 v16_;
- const T17 v17_;
- const T18 v18_;
- const T19 v19_;
- const T20 v20_;
- const T21 v21_;
- const T22 v22_;
- const T23 v23_;
- const T24 v24_;
- const T25 v25_;
- const T26 v26_;
- const T27 v27_;
- const T28 v28_;
- const T29 v29_;
- const T30 v30_;
- const T31 v31_;
- const T32 v32_;
- const T33 v33_;
- const T34 v34_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35>
-class ValueArray35 {
- public:
- ValueArray35(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
- T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
- T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
- T34 v34, T35 v35) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6),
- v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13),
- v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19),
- v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25),
- v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31),
- v32_(v32), v33_(v33), v34_(v34), v35_(v35) {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
- static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
- static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
- static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
- static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
- static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
- static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
- static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
- static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
- static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
- static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
- static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_)};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray35& other);
-
- const T1 v1_;
- const T2 v2_;
- const T3 v3_;
- const T4 v4_;
- const T5 v5_;
- const T6 v6_;
- const T7 v7_;
- const T8 v8_;
- const T9 v9_;
- const T10 v10_;
- const T11 v11_;
- const T12 v12_;
- const T13 v13_;
- const T14 v14_;
- const T15 v15_;
- const T16 v16_;
- const T17 v17_;
- const T18 v18_;
- const T19 v19_;
- const T20 v20_;
- const T21 v21_;
- const T22 v22_;
- const T23 v23_;
- const T24 v24_;
- const T25 v25_;
- const T26 v26_;
- const T27 v27_;
- const T28 v28_;
- const T29 v29_;
- const T30 v30_;
- const T31 v31_;
- const T32 v32_;
- const T33 v33_;
- const T34 v34_;
- const T35 v35_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36>
-class ValueArray36 {
- public:
- ValueArray36(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
- T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
- T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
- T34 v34, T35 v35, T36 v36) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5),
- v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12),
- v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18),
- v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24),
- v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30),
- v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35), v36_(v36) {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
- static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
- static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
- static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
- static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
- static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
- static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
- static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
- static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
- static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
- static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
- static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
- static_cast<T>(v36_)};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray36& other);
-
- const T1 v1_;
- const T2 v2_;
- const T3 v3_;
- const T4 v4_;
- const T5 v5_;
- const T6 v6_;
- const T7 v7_;
- const T8 v8_;
- const T9 v9_;
- const T10 v10_;
- const T11 v11_;
- const T12 v12_;
- const T13 v13_;
- const T14 v14_;
- const T15 v15_;
- const T16 v16_;
- const T17 v17_;
- const T18 v18_;
- const T19 v19_;
- const T20 v20_;
- const T21 v21_;
- const T22 v22_;
- const T23 v23_;
- const T24 v24_;
- const T25 v25_;
- const T26 v26_;
- const T27 v27_;
- const T28 v28_;
- const T29 v29_;
- const T30 v30_;
- const T31 v31_;
- const T32 v32_;
- const T33 v33_;
- const T34 v34_;
- const T35 v35_;
- const T36 v36_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37>
-class ValueArray37 {
- public:
- ValueArray37(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
- T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
- T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
- T34 v34, T35 v35, T36 v36, T37 v37) : v1_(v1), v2_(v2), v3_(v3), v4_(v4),
- v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11),
- v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17),
- v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23),
- v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29),
- v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35),
- v36_(v36), v37_(v37) {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
- static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
- static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
- static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
- static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
- static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
- static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
- static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
- static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
- static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
- static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
- static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
- static_cast<T>(v36_), static_cast<T>(v37_)};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray37& other);
-
- const T1 v1_;
- const T2 v2_;
- const T3 v3_;
- const T4 v4_;
- const T5 v5_;
- const T6 v6_;
- const T7 v7_;
- const T8 v8_;
- const T9 v9_;
- const T10 v10_;
- const T11 v11_;
- const T12 v12_;
- const T13 v13_;
- const T14 v14_;
- const T15 v15_;
- const T16 v16_;
- const T17 v17_;
- const T18 v18_;
- const T19 v19_;
- const T20 v20_;
- const T21 v21_;
- const T22 v22_;
- const T23 v23_;
- const T24 v24_;
- const T25 v25_;
- const T26 v26_;
- const T27 v27_;
- const T28 v28_;
- const T29 v29_;
- const T30 v30_;
- const T31 v31_;
- const T32 v32_;
- const T33 v33_;
- const T34 v34_;
- const T35 v35_;
- const T36 v36_;
- const T37 v37_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38>
-class ValueArray38 {
- public:
- ValueArray38(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
- T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
- T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
- T34 v34, T35 v35, T36 v36, T37 v37, T38 v38) : v1_(v1), v2_(v2), v3_(v3),
- v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
- v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
- v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
- v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28),
- v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34),
- v35_(v35), v36_(v36), v37_(v37), v38_(v38) {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
- static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
- static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
- static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
- static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
- static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
- static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
- static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
- static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
- static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
- static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
- static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
- static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_)};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray38& other);
-
- const T1 v1_;
- const T2 v2_;
- const T3 v3_;
- const T4 v4_;
- const T5 v5_;
- const T6 v6_;
- const T7 v7_;
- const T8 v8_;
- const T9 v9_;
- const T10 v10_;
- const T11 v11_;
- const T12 v12_;
- const T13 v13_;
- const T14 v14_;
- const T15 v15_;
- const T16 v16_;
- const T17 v17_;
- const T18 v18_;
- const T19 v19_;
- const T20 v20_;
- const T21 v21_;
- const T22 v22_;
- const T23 v23_;
- const T24 v24_;
- const T25 v25_;
- const T26 v26_;
- const T27 v27_;
- const T28 v28_;
- const T29 v29_;
- const T30 v30_;
- const T31 v31_;
- const T32 v32_;
- const T33 v33_;
- const T34 v34_;
- const T35 v35_;
- const T36 v36_;
- const T37 v37_;
- const T38 v38_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38, typename T39>
-class ValueArray39 {
- public:
- ValueArray39(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
- T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
- T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
- T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39) : v1_(v1), v2_(v2),
- v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
- v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
- v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
- v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28),
- v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34),
- v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39) {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
- static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
- static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
- static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
- static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
- static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
- static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
- static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
- static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
- static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
- static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
- static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
- static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
- static_cast<T>(v39_)};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray39& other);
-
- const T1 v1_;
- const T2 v2_;
- const T3 v3_;
- const T4 v4_;
- const T5 v5_;
- const T6 v6_;
- const T7 v7_;
- const T8 v8_;
- const T9 v9_;
- const T10 v10_;
- const T11 v11_;
- const T12 v12_;
- const T13 v13_;
- const T14 v14_;
- const T15 v15_;
- const T16 v16_;
- const T17 v17_;
- const T18 v18_;
- const T19 v19_;
- const T20 v20_;
- const T21 v21_;
- const T22 v22_;
- const T23 v23_;
- const T24 v24_;
- const T25 v25_;
- const T26 v26_;
- const T27 v27_;
- const T28 v28_;
- const T29 v29_;
- const T30 v30_;
- const T31 v31_;
- const T32 v32_;
- const T33 v33_;
- const T34 v34_;
- const T35 v35_;
- const T36 v36_;
- const T37 v37_;
- const T38 v38_;
- const T39 v39_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38, typename T39, typename T40>
-class ValueArray40 {
- public:
- ValueArray40(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
- T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
- T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
- T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40) : v1_(v1),
- v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9),
- v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15),
- v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21),
- v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27),
- v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33),
- v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39),
- v40_(v40) {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
- static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
- static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
- static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
- static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
- static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
- static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
- static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
- static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
- static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
- static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
- static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
- static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
- static_cast<T>(v39_), static_cast<T>(v40_)};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray40& other);
-
- const T1 v1_;
- const T2 v2_;
- const T3 v3_;
- const T4 v4_;
- const T5 v5_;
- const T6 v6_;
- const T7 v7_;
- const T8 v8_;
- const T9 v9_;
- const T10 v10_;
- const T11 v11_;
- const T12 v12_;
- const T13 v13_;
- const T14 v14_;
- const T15 v15_;
- const T16 v16_;
- const T17 v17_;
- const T18 v18_;
- const T19 v19_;
- const T20 v20_;
- const T21 v21_;
- const T22 v22_;
- const T23 v23_;
- const T24 v24_;
- const T25 v25_;
- const T26 v26_;
- const T27 v27_;
- const T28 v28_;
- const T29 v29_;
- const T30 v30_;
- const T31 v31_;
- const T32 v32_;
- const T33 v33_;
- const T34 v34_;
- const T35 v35_;
- const T36 v36_;
- const T37 v37_;
- const T38 v38_;
- const T39 v39_;
- const T40 v40_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38, typename T39, typename T40,
- typename T41>
-class ValueArray41 {
- public:
- ValueArray41(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
- T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
- T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
- T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40,
- T41 v41) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
- v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
- v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
- v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26),
- v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32),
- v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38),
- v39_(v39), v40_(v40), v41_(v41) {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
- static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
- static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
- static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
- static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
- static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
- static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
- static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
- static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
- static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
- static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
- static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
- static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
- static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_)};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray41& other);
-
- const T1 v1_;
- const T2 v2_;
- const T3 v3_;
- const T4 v4_;
- const T5 v5_;
- const T6 v6_;
- const T7 v7_;
- const T8 v8_;
- const T9 v9_;
- const T10 v10_;
- const T11 v11_;
- const T12 v12_;
- const T13 v13_;
- const T14 v14_;
- const T15 v15_;
- const T16 v16_;
- const T17 v17_;
- const T18 v18_;
- const T19 v19_;
- const T20 v20_;
- const T21 v21_;
- const T22 v22_;
- const T23 v23_;
- const T24 v24_;
- const T25 v25_;
- const T26 v26_;
- const T27 v27_;
- const T28 v28_;
- const T29 v29_;
- const T30 v30_;
- const T31 v31_;
- const T32 v32_;
- const T33 v33_;
- const T34 v34_;
- const T35 v35_;
- const T36 v36_;
- const T37 v37_;
- const T38 v38_;
- const T39 v39_;
- const T40 v40_;
- const T41 v41_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38, typename T39, typename T40,
- typename T41, typename T42>
-class ValueArray42 {
- public:
- ValueArray42(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
- T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
- T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
- T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
- T42 v42) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
- v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
- v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
- v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26),
- v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32),
- v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38),
- v39_(v39), v40_(v40), v41_(v41), v42_(v42) {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
- static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
- static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
- static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
- static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
- static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
- static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
- static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
- static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
- static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
- static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
- static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
- static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
- static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
- static_cast<T>(v42_)};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray42& other);
-
- const T1 v1_;
- const T2 v2_;
- const T3 v3_;
- const T4 v4_;
- const T5 v5_;
- const T6 v6_;
- const T7 v7_;
- const T8 v8_;
- const T9 v9_;
- const T10 v10_;
- const T11 v11_;
- const T12 v12_;
- const T13 v13_;
- const T14 v14_;
- const T15 v15_;
- const T16 v16_;
- const T17 v17_;
- const T18 v18_;
- const T19 v19_;
- const T20 v20_;
- const T21 v21_;
- const T22 v22_;
- const T23 v23_;
- const T24 v24_;
- const T25 v25_;
- const T26 v26_;
- const T27 v27_;
- const T28 v28_;
- const T29 v29_;
- const T30 v30_;
- const T31 v31_;
- const T32 v32_;
- const T33 v33_;
- const T34 v34_;
- const T35 v35_;
- const T36 v36_;
- const T37 v37_;
- const T38 v38_;
- const T39 v39_;
- const T40 v40_;
- const T41 v41_;
- const T42 v42_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38, typename T39, typename T40,
- typename T41, typename T42, typename T43>
-class ValueArray43 {
- public:
- ValueArray43(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
- T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
- T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
- T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
- T42 v42, T43 v43) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6),
- v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13),
- v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19),
- v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25),
- v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31),
- v32_(v32), v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37),
- v38_(v38), v39_(v39), v40_(v40), v41_(v41), v42_(v42), v43_(v43) {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
- static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
- static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
- static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
- static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
- static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
- static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
- static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
- static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
- static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
- static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
- static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
- static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
- static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
- static_cast<T>(v42_), static_cast<T>(v43_)};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray43& other);
-
- const T1 v1_;
- const T2 v2_;
- const T3 v3_;
- const T4 v4_;
- const T5 v5_;
- const T6 v6_;
- const T7 v7_;
- const T8 v8_;
- const T9 v9_;
- const T10 v10_;
- const T11 v11_;
- const T12 v12_;
- const T13 v13_;
- const T14 v14_;
- const T15 v15_;
- const T16 v16_;
- const T17 v17_;
- const T18 v18_;
- const T19 v19_;
- const T20 v20_;
- const T21 v21_;
- const T22 v22_;
- const T23 v23_;
- const T24 v24_;
- const T25 v25_;
- const T26 v26_;
- const T27 v27_;
- const T28 v28_;
- const T29 v29_;
- const T30 v30_;
- const T31 v31_;
- const T32 v32_;
- const T33 v33_;
- const T34 v34_;
- const T35 v35_;
- const T36 v36_;
- const T37 v37_;
- const T38 v38_;
- const T39 v39_;
- const T40 v40_;
- const T41 v41_;
- const T42 v42_;
- const T43 v43_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38, typename T39, typename T40,
- typename T41, typename T42, typename T43, typename T44>
-class ValueArray44 {
- public:
- ValueArray44(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
- T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
- T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
- T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
- T42 v42, T43 v43, T44 v44) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5),
- v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12),
- v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18),
- v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24),
- v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30),
- v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35), v36_(v36),
- v37_(v37), v38_(v38), v39_(v39), v40_(v40), v41_(v41), v42_(v42),
- v43_(v43), v44_(v44) {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
- static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
- static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
- static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
- static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
- static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
- static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
- static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
- static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
- static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
- static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
- static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
- static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
- static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
- static_cast<T>(v42_), static_cast<T>(v43_), static_cast<T>(v44_)};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray44& other);
-
- const T1 v1_;
- const T2 v2_;
- const T3 v3_;
- const T4 v4_;
- const T5 v5_;
- const T6 v6_;
- const T7 v7_;
- const T8 v8_;
- const T9 v9_;
- const T10 v10_;
- const T11 v11_;
- const T12 v12_;
- const T13 v13_;
- const T14 v14_;
- const T15 v15_;
- const T16 v16_;
- const T17 v17_;
- const T18 v18_;
- const T19 v19_;
- const T20 v20_;
- const T21 v21_;
- const T22 v22_;
- const T23 v23_;
- const T24 v24_;
- const T25 v25_;
- const T26 v26_;
- const T27 v27_;
- const T28 v28_;
- const T29 v29_;
- const T30 v30_;
- const T31 v31_;
- const T32 v32_;
- const T33 v33_;
- const T34 v34_;
- const T35 v35_;
- const T36 v36_;
- const T37 v37_;
- const T38 v38_;
- const T39 v39_;
- const T40 v40_;
- const T41 v41_;
- const T42 v42_;
- const T43 v43_;
- const T44 v44_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38, typename T39, typename T40,
- typename T41, typename T42, typename T43, typename T44, typename T45>
-class ValueArray45 {
- public:
- ValueArray45(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
- T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
- T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
- T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
- T42 v42, T43 v43, T44 v44, T45 v45) : v1_(v1), v2_(v2), v3_(v3), v4_(v4),
- v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11),
- v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17),
- v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23),
- v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29),
- v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35),
- v36_(v36), v37_(v37), v38_(v38), v39_(v39), v40_(v40), v41_(v41),
- v42_(v42), v43_(v43), v44_(v44), v45_(v45) {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
- static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
- static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
- static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
- static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
- static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
- static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
- static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
- static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
- static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
- static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
- static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
- static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
- static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
- static_cast<T>(v42_), static_cast<T>(v43_), static_cast<T>(v44_),
- static_cast<T>(v45_)};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray45& other);
-
- const T1 v1_;
- const T2 v2_;
- const T3 v3_;
- const T4 v4_;
- const T5 v5_;
- const T6 v6_;
- const T7 v7_;
- const T8 v8_;
- const T9 v9_;
- const T10 v10_;
- const T11 v11_;
- const T12 v12_;
- const T13 v13_;
- const T14 v14_;
- const T15 v15_;
- const T16 v16_;
- const T17 v17_;
- const T18 v18_;
- const T19 v19_;
- const T20 v20_;
- const T21 v21_;
- const T22 v22_;
- const T23 v23_;
- const T24 v24_;
- const T25 v25_;
- const T26 v26_;
- const T27 v27_;
- const T28 v28_;
- const T29 v29_;
- const T30 v30_;
- const T31 v31_;
- const T32 v32_;
- const T33 v33_;
- const T34 v34_;
- const T35 v35_;
- const T36 v36_;
- const T37 v37_;
- const T38 v38_;
- const T39 v39_;
- const T40 v40_;
- const T41 v41_;
- const T42 v42_;
- const T43 v43_;
- const T44 v44_;
- const T45 v45_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38, typename T39, typename T40,
- typename T41, typename T42, typename T43, typename T44, typename T45,
- typename T46>
-class ValueArray46 {
- public:
- ValueArray46(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
- T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
- T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
- T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
- T42 v42, T43 v43, T44 v44, T45 v45, T46 v46) : v1_(v1), v2_(v2), v3_(v3),
- v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
- v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
- v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
- v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28),
- v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34),
- v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39), v40_(v40),
- v41_(v41), v42_(v42), v43_(v43), v44_(v44), v45_(v45), v46_(v46) {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
- static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
- static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
- static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
- static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
- static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
- static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
- static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
- static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
- static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
- static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
- static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
- static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
- static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
- static_cast<T>(v42_), static_cast<T>(v43_), static_cast<T>(v44_),
- static_cast<T>(v45_), static_cast<T>(v46_)};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray46& other);
-
- const T1 v1_;
- const T2 v2_;
- const T3 v3_;
- const T4 v4_;
- const T5 v5_;
- const T6 v6_;
- const T7 v7_;
- const T8 v8_;
- const T9 v9_;
- const T10 v10_;
- const T11 v11_;
- const T12 v12_;
- const T13 v13_;
- const T14 v14_;
- const T15 v15_;
- const T16 v16_;
- const T17 v17_;
- const T18 v18_;
- const T19 v19_;
- const T20 v20_;
- const T21 v21_;
- const T22 v22_;
- const T23 v23_;
- const T24 v24_;
- const T25 v25_;
- const T26 v26_;
- const T27 v27_;
- const T28 v28_;
- const T29 v29_;
- const T30 v30_;
- const T31 v31_;
- const T32 v32_;
- const T33 v33_;
- const T34 v34_;
- const T35 v35_;
- const T36 v36_;
- const T37 v37_;
- const T38 v38_;
- const T39 v39_;
- const T40 v40_;
- const T41 v41_;
- const T42 v42_;
- const T43 v43_;
- const T44 v44_;
- const T45 v45_;
- const T46 v46_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38, typename T39, typename T40,
- typename T41, typename T42, typename T43, typename T44, typename T45,
- typename T46, typename T47>
-class ValueArray47 {
- public:
- ValueArray47(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
- T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
- T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
- T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
- T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47) : v1_(v1), v2_(v2),
- v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
- v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
- v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
- v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28),
- v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34),
- v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39), v40_(v40),
- v41_(v41), v42_(v42), v43_(v43), v44_(v44), v45_(v45), v46_(v46),
- v47_(v47) {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
- static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
- static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
- static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
- static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
- static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
- static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
- static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
- static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
- static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
- static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
- static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
- static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
- static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
- static_cast<T>(v42_), static_cast<T>(v43_), static_cast<T>(v44_),
- static_cast<T>(v45_), static_cast<T>(v46_), static_cast<T>(v47_)};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray47& other);
-
- const T1 v1_;
- const T2 v2_;
- const T3 v3_;
- const T4 v4_;
- const T5 v5_;
- const T6 v6_;
- const T7 v7_;
- const T8 v8_;
- const T9 v9_;
- const T10 v10_;
- const T11 v11_;
- const T12 v12_;
- const T13 v13_;
- const T14 v14_;
- const T15 v15_;
- const T16 v16_;
- const T17 v17_;
- const T18 v18_;
- const T19 v19_;
- const T20 v20_;
- const T21 v21_;
- const T22 v22_;
- const T23 v23_;
- const T24 v24_;
- const T25 v25_;
- const T26 v26_;
- const T27 v27_;
- const T28 v28_;
- const T29 v29_;
- const T30 v30_;
- const T31 v31_;
- const T32 v32_;
- const T33 v33_;
- const T34 v34_;
- const T35 v35_;
- const T36 v36_;
- const T37 v37_;
- const T38 v38_;
- const T39 v39_;
- const T40 v40_;
- const T41 v41_;
- const T42 v42_;
- const T43 v43_;
- const T44 v44_;
- const T45 v45_;
- const T46 v46_;
- const T47 v47_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38, typename T39, typename T40,
- typename T41, typename T42, typename T43, typename T44, typename T45,
- typename T46, typename T47, typename T48>
-class ValueArray48 {
- public:
- ValueArray48(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
- T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
- T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
- T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
- T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47, T48 v48) : v1_(v1),
- v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9),
- v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15),
- v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21),
- v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27),
- v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33),
- v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39),
- v40_(v40), v41_(v41), v42_(v42), v43_(v43), v44_(v44), v45_(v45),
- v46_(v46), v47_(v47), v48_(v48) {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
- static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
- static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
- static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
- static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
- static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
- static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
- static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
- static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
- static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
- static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
- static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
- static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
- static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
- static_cast<T>(v42_), static_cast<T>(v43_), static_cast<T>(v44_),
- static_cast<T>(v45_), static_cast<T>(v46_), static_cast<T>(v47_),
- static_cast<T>(v48_)};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray48& other);
-
- const T1 v1_;
- const T2 v2_;
- const T3 v3_;
- const T4 v4_;
- const T5 v5_;
- const T6 v6_;
- const T7 v7_;
- const T8 v8_;
- const T9 v9_;
- const T10 v10_;
- const T11 v11_;
- const T12 v12_;
- const T13 v13_;
- const T14 v14_;
- const T15 v15_;
- const T16 v16_;
- const T17 v17_;
- const T18 v18_;
- const T19 v19_;
- const T20 v20_;
- const T21 v21_;
- const T22 v22_;
- const T23 v23_;
- const T24 v24_;
- const T25 v25_;
- const T26 v26_;
- const T27 v27_;
- const T28 v28_;
- const T29 v29_;
- const T30 v30_;
- const T31 v31_;
- const T32 v32_;
- const T33 v33_;
- const T34 v34_;
- const T35 v35_;
- const T36 v36_;
- const T37 v37_;
- const T38 v38_;
- const T39 v39_;
- const T40 v40_;
- const T41 v41_;
- const T42 v42_;
- const T43 v43_;
- const T44 v44_;
- const T45 v45_;
- const T46 v46_;
- const T47 v47_;
- const T48 v48_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38, typename T39, typename T40,
- typename T41, typename T42, typename T43, typename T44, typename T45,
- typename T46, typename T47, typename T48, typename T49>
-class ValueArray49 {
- public:
- ValueArray49(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
- T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
- T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
- T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
- T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47, T48 v48,
- T49 v49) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
- v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
- v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
- v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26),
- v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32),
- v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38),
- v39_(v39), v40_(v40), v41_(v41), v42_(v42), v43_(v43), v44_(v44),
- v45_(v45), v46_(v46), v47_(v47), v48_(v48), v49_(v49) {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
- static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
- static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
- static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
- static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
- static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
- static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
- static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
- static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
- static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
- static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
- static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
- static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
- static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
- static_cast<T>(v42_), static_cast<T>(v43_), static_cast<T>(v44_),
- static_cast<T>(v45_), static_cast<T>(v46_), static_cast<T>(v47_),
- static_cast<T>(v48_), static_cast<T>(v49_)};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray49& other);
-
- const T1 v1_;
- const T2 v2_;
- const T3 v3_;
- const T4 v4_;
- const T5 v5_;
- const T6 v6_;
- const T7 v7_;
- const T8 v8_;
- const T9 v9_;
- const T10 v10_;
- const T11 v11_;
- const T12 v12_;
- const T13 v13_;
- const T14 v14_;
- const T15 v15_;
- const T16 v16_;
- const T17 v17_;
- const T18 v18_;
- const T19 v19_;
- const T20 v20_;
- const T21 v21_;
- const T22 v22_;
- const T23 v23_;
- const T24 v24_;
- const T25 v25_;
- const T26 v26_;
- const T27 v27_;
- const T28 v28_;
- const T29 v29_;
- const T30 v30_;
- const T31 v31_;
- const T32 v32_;
- const T33 v33_;
- const T34 v34_;
- const T35 v35_;
- const T36 v36_;
- const T37 v37_;
- const T38 v38_;
- const T39 v39_;
- const T40 v40_;
- const T41 v41_;
- const T42 v42_;
- const T43 v43_;
- const T44 v44_;
- const T45 v45_;
- const T46 v46_;
- const T47 v47_;
- const T48 v48_;
- const T49 v49_;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38, typename T39, typename T40,
- typename T41, typename T42, typename T43, typename T44, typename T45,
- typename T46, typename T47, typename T48, typename T49, typename T50>
-class ValueArray50 {
- public:
- ValueArray50(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
- T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
- T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
- T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
- T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
- T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47, T48 v48, T49 v49,
- T50 v50) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
- v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
- v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
- v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26),
- v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32),
- v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38),
- v39_(v39), v40_(v40), v41_(v41), v42_(v42), v43_(v43), v44_(v44),
- v45_(v45), v46_(v46), v47_(v47), v48_(v48), v49_(v49), v50_(v50) {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
- static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
- static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
- static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
- static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
- static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
- static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
- static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
- static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
- static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
- static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
- static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
- static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
- static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
- static_cast<T>(v42_), static_cast<T>(v43_), static_cast<T>(v44_),
- static_cast<T>(v45_), static_cast<T>(v46_), static_cast<T>(v47_),
- static_cast<T>(v48_), static_cast<T>(v49_), static_cast<T>(v50_)};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray50& other);
-
- const T1 v1_;
- const T2 v2_;
- const T3 v3_;
- const T4 v4_;
- const T5 v5_;
- const T6 v6_;
- const T7 v7_;
- const T8 v8_;
- const T9 v9_;
- const T10 v10_;
- const T11 v11_;
- const T12 v12_;
- const T13 v13_;
- const T14 v14_;
- const T15 v15_;
- const T16 v16_;
- const T17 v17_;
- const T18 v18_;
- const T19 v19_;
- const T20 v20_;
- const T21 v21_;
- const T22 v22_;
- const T23 v23_;
- const T24 v24_;
- const T25 v25_;
- const T26 v26_;
- const T27 v27_;
- const T28 v28_;
- const T29 v29_;
- const T30 v30_;
- const T31 v31_;
- const T32 v32_;
- const T33 v33_;
- const T34 v34_;
- const T35 v35_;
- const T36 v36_;
- const T37 v37_;
- const T38 v38_;
- const T39 v39_;
- const T40 v40_;
- const T41 v41_;
- const T42 v42_;
- const T43 v43_;
- const T44 v44_;
- const T45 v45_;
- const T46 v46_;
- const T47 v47_;
- const T48 v48_;
- const T49 v49_;
- const T50 v50_;
-};
-
-# if GTEST_HAS_COMBINE
-// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
-//
-// Generates values from the Cartesian product of values produced
-// by the argument generators.
-//
-template <typename T1, typename T2>
-class CartesianProductGenerator2
- : public ParamGeneratorInterface< ::testing::tuple<T1, T2> > {
- public:
- typedef ::testing::tuple<T1, T2> ParamType;
-
- CartesianProductGenerator2(const ParamGenerator<T1>& g1,
- const ParamGenerator<T2>& g2)
- : g1_(g1), g2_(g2) {}
- virtual ~CartesianProductGenerator2() {}
-
- virtual ParamIteratorInterface<ParamType>* Begin() const {
- return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin());
- }
- virtual ParamIteratorInterface<ParamType>* End() const {
- return new Iterator(this, g1_, g1_.end(), g2_, g2_.end());
- }
-
- private:
- class Iterator : public ParamIteratorInterface<ParamType> {
- public:
- Iterator(const ParamGeneratorInterface<ParamType>* base,
- const ParamGenerator<T1>& g1,
- const typename ParamGenerator<T1>::iterator& current1,
- const ParamGenerator<T2>& g2,
- const typename ParamGenerator<T2>::iterator& current2)
- : base_(base),
- begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
- begin2_(g2.begin()), end2_(g2.end()), current2_(current2) {
- ComputeCurrentValue();
- }
- virtual ~Iterator() {}
-
- virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
- return base_;
- }
- // Advance should not be called on beyond-of-range iterators
- // so no component iterators must be beyond end of range, either.
- virtual void Advance() {
- assert(!AtEnd());
- ++current2_;
- if (current2_ == end2_) {
- current2_ = begin2_;
- ++current1_;
- }
- ComputeCurrentValue();
- }
- virtual ParamIteratorInterface<ParamType>* Clone() const {
- return new Iterator(*this);
- }
- virtual const ParamType* Current() const { return &current_value_; }
- virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
- // Having the same base generator guarantees that the other
- // iterator is of the same type and we can downcast.
- GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
- << "The program attempted to compare iterators "
- << "from different generators." << std::endl;
- const Iterator* typed_other =
- CheckedDowncastToActualType<const Iterator>(&other);
- // We must report iterators equal if they both point beyond their
- // respective ranges. That can happen in a variety of fashions,
- // so we have to consult AtEnd().
- return (AtEnd() && typed_other->AtEnd()) ||
- (
- current1_ == typed_other->current1_ &&
- current2_ == typed_other->current2_);
- }
-
- private:
- Iterator(const Iterator& other)
- : base_(other.base_),
- begin1_(other.begin1_),
- end1_(other.end1_),
- current1_(other.current1_),
- begin2_(other.begin2_),
- end2_(other.end2_),
- current2_(other.current2_) {
- ComputeCurrentValue();
- }
-
- void ComputeCurrentValue() {
- if (!AtEnd())
- current_value_ = ParamType(*current1_, *current2_);
- }
- bool AtEnd() const {
- // We must report iterator past the end of the range when either of the
- // component iterators has reached the end of its range.
- return
- current1_ == end1_ ||
- current2_ == end2_;
- }
-
- // No implementation - assignment is unsupported.
- void operator=(const Iterator& other);
-
- const ParamGeneratorInterface<ParamType>* const base_;
- // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
- // current[i]_ is the actual traversing iterator.
- const typename ParamGenerator<T1>::iterator begin1_;
- const typename ParamGenerator<T1>::iterator end1_;
- typename ParamGenerator<T1>::iterator current1_;
- const typename ParamGenerator<T2>::iterator begin2_;
- const typename ParamGenerator<T2>::iterator end2_;
- typename ParamGenerator<T2>::iterator current2_;
- ParamType current_value_;
- }; // class CartesianProductGenerator2::Iterator
-
- // No implementation - assignment is unsupported.
- void operator=(const CartesianProductGenerator2& other);
-
- const ParamGenerator<T1> g1_;
- const ParamGenerator<T2> g2_;
-}; // class CartesianProductGenerator2
-
-
-template <typename T1, typename T2, typename T3>
-class CartesianProductGenerator3
- : public ParamGeneratorInterface< ::testing::tuple<T1, T2, T3> > {
- public:
- typedef ::testing::tuple<T1, T2, T3> ParamType;
-
- CartesianProductGenerator3(const ParamGenerator<T1>& g1,
- const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3)
- : g1_(g1), g2_(g2), g3_(g3) {}
- virtual ~CartesianProductGenerator3() {}
-
- virtual ParamIteratorInterface<ParamType>* Begin() const {
- return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
- g3_.begin());
- }
- virtual ParamIteratorInterface<ParamType>* End() const {
- return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end());
- }
-
- private:
- class Iterator : public ParamIteratorInterface<ParamType> {
- public:
- Iterator(const ParamGeneratorInterface<ParamType>* base,
- const ParamGenerator<T1>& g1,
- const typename ParamGenerator<T1>::iterator& current1,
- const ParamGenerator<T2>& g2,
- const typename ParamGenerator<T2>::iterator& current2,
- const ParamGenerator<T3>& g3,
- const typename ParamGenerator<T3>::iterator& current3)
- : base_(base),
- begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
- begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
- begin3_(g3.begin()), end3_(g3.end()), current3_(current3) {
- ComputeCurrentValue();
- }
- virtual ~Iterator() {}
-
- virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
- return base_;
- }
- // Advance should not be called on beyond-of-range iterators
- // so no component iterators must be beyond end of range, either.
- virtual void Advance() {
- assert(!AtEnd());
- ++current3_;
- if (current3_ == end3_) {
- current3_ = begin3_;
- ++current2_;
- }
- if (current2_ == end2_) {
- current2_ = begin2_;
- ++current1_;
- }
- ComputeCurrentValue();
- }
- virtual ParamIteratorInterface<ParamType>* Clone() const {
- return new Iterator(*this);
- }
- virtual const ParamType* Current() const { return &current_value_; }
- virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
- // Having the same base generator guarantees that the other
- // iterator is of the same type and we can downcast.
- GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
- << "The program attempted to compare iterators "
- << "from different generators." << std::endl;
- const Iterator* typed_other =
- CheckedDowncastToActualType<const Iterator>(&other);
- // We must report iterators equal if they both point beyond their
- // respective ranges. That can happen in a variety of fashions,
- // so we have to consult AtEnd().
- return (AtEnd() && typed_other->AtEnd()) ||
- (
- current1_ == typed_other->current1_ &&
- current2_ == typed_other->current2_ &&
- current3_ == typed_other->current3_);
- }
-
- private:
- Iterator(const Iterator& other)
- : base_(other.base_),
- begin1_(other.begin1_),
- end1_(other.end1_),
- current1_(other.current1_),
- begin2_(other.begin2_),
- end2_(other.end2_),
- current2_(other.current2_),
- begin3_(other.begin3_),
- end3_(other.end3_),
- current3_(other.current3_) {
- ComputeCurrentValue();
- }
-
- void ComputeCurrentValue() {
- if (!AtEnd())
- current_value_ = ParamType(*current1_, *current2_, *current3_);
- }
- bool AtEnd() const {
- // We must report iterator past the end of the range when either of the
- // component iterators has reached the end of its range.
- return
- current1_ == end1_ ||
- current2_ == end2_ ||
- current3_ == end3_;
- }
-
- // No implementation - assignment is unsupported.
- void operator=(const Iterator& other);
-
- const ParamGeneratorInterface<ParamType>* const base_;
- // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
- // current[i]_ is the actual traversing iterator.
- const typename ParamGenerator<T1>::iterator begin1_;
- const typename ParamGenerator<T1>::iterator end1_;
- typename ParamGenerator<T1>::iterator current1_;
- const typename ParamGenerator<T2>::iterator begin2_;
- const typename ParamGenerator<T2>::iterator end2_;
- typename ParamGenerator<T2>::iterator current2_;
- const typename ParamGenerator<T3>::iterator begin3_;
- const typename ParamGenerator<T3>::iterator end3_;
- typename ParamGenerator<T3>::iterator current3_;
- ParamType current_value_;
- }; // class CartesianProductGenerator3::Iterator
-
- // No implementation - assignment is unsupported.
- void operator=(const CartesianProductGenerator3& other);
-
- const ParamGenerator<T1> g1_;
- const ParamGenerator<T2> g2_;
- const ParamGenerator<T3> g3_;
-}; // class CartesianProductGenerator3
-
-
-template <typename T1, typename T2, typename T3, typename T4>
-class CartesianProductGenerator4
- : public ParamGeneratorInterface< ::testing::tuple<T1, T2, T3, T4> > {
- public:
- typedef ::testing::tuple<T1, T2, T3, T4> ParamType;
-
- CartesianProductGenerator4(const ParamGenerator<T1>& g1,
- const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
- const ParamGenerator<T4>& g4)
- : g1_(g1), g2_(g2), g3_(g3), g4_(g4) {}
- virtual ~CartesianProductGenerator4() {}
-
- virtual ParamIteratorInterface<ParamType>* Begin() const {
- return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
- g3_.begin(), g4_, g4_.begin());
- }
- virtual ParamIteratorInterface<ParamType>* End() const {
- return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
- g4_, g4_.end());
- }
-
- private:
- class Iterator : public ParamIteratorInterface<ParamType> {
- public:
- Iterator(const ParamGeneratorInterface<ParamType>* base,
- const ParamGenerator<T1>& g1,
- const typename ParamGenerator<T1>::iterator& current1,
- const ParamGenerator<T2>& g2,
- const typename ParamGenerator<T2>::iterator& current2,
- const ParamGenerator<T3>& g3,
- const typename ParamGenerator<T3>::iterator& current3,
- const ParamGenerator<T4>& g4,
- const typename ParamGenerator<T4>::iterator& current4)
- : base_(base),
- begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
- begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
- begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
- begin4_(g4.begin()), end4_(g4.end()), current4_(current4) {
- ComputeCurrentValue();
- }
- virtual ~Iterator() {}
-
- virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
- return base_;
- }
- // Advance should not be called on beyond-of-range iterators
- // so no component iterators must be beyond end of range, either.
- virtual void Advance() {
- assert(!AtEnd());
- ++current4_;
- if (current4_ == end4_) {
- current4_ = begin4_;
- ++current3_;
- }
- if (current3_ == end3_) {
- current3_ = begin3_;
- ++current2_;
- }
- if (current2_ == end2_) {
- current2_ = begin2_;
- ++current1_;
- }
- ComputeCurrentValue();
- }
- virtual ParamIteratorInterface<ParamType>* Clone() const {
- return new Iterator(*this);
- }
- virtual const ParamType* Current() const { return &current_value_; }
- virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
- // Having the same base generator guarantees that the other
- // iterator is of the same type and we can downcast.
- GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
- << "The program attempted to compare iterators "
- << "from different generators." << std::endl;
- const Iterator* typed_other =
- CheckedDowncastToActualType<const Iterator>(&other);
- // We must report iterators equal if they both point beyond their
- // respective ranges. That can happen in a variety of fashions,
- // so we have to consult AtEnd().
- return (AtEnd() && typed_other->AtEnd()) ||
- (
- current1_ == typed_other->current1_ &&
- current2_ == typed_other->current2_ &&
- current3_ == typed_other->current3_ &&
- current4_ == typed_other->current4_);
- }
-
- private:
- Iterator(const Iterator& other)
- : base_(other.base_),
- begin1_(other.begin1_),
- end1_(other.end1_),
- current1_(other.current1_),
- begin2_(other.begin2_),
- end2_(other.end2_),
- current2_(other.current2_),
- begin3_(other.begin3_),
- end3_(other.end3_),
- current3_(other.current3_),
- begin4_(other.begin4_),
- end4_(other.end4_),
- current4_(other.current4_) {
- ComputeCurrentValue();
- }
-
- void ComputeCurrentValue() {
- if (!AtEnd())
- current_value_ = ParamType(*current1_, *current2_, *current3_,
- *current4_);
- }
- bool AtEnd() const {
- // We must report iterator past the end of the range when either of the
- // component iterators has reached the end of its range.
- return
- current1_ == end1_ ||
- current2_ == end2_ ||
- current3_ == end3_ ||
- current4_ == end4_;
- }
-
- // No implementation - assignment is unsupported.
- void operator=(const Iterator& other);
-
- const ParamGeneratorInterface<ParamType>* const base_;
- // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
- // current[i]_ is the actual traversing iterator.
- const typename ParamGenerator<T1>::iterator begin1_;
- const typename ParamGenerator<T1>::iterator end1_;
- typename ParamGenerator<T1>::iterator current1_;
- const typename ParamGenerator<T2>::iterator begin2_;
- const typename ParamGenerator<T2>::iterator end2_;
- typename ParamGenerator<T2>::iterator current2_;
- const typename ParamGenerator<T3>::iterator begin3_;
- const typename ParamGenerator<T3>::iterator end3_;
- typename ParamGenerator<T3>::iterator current3_;
- const typename ParamGenerator<T4>::iterator begin4_;
- const typename ParamGenerator<T4>::iterator end4_;
- typename ParamGenerator<T4>::iterator current4_;
- ParamType current_value_;
- }; // class CartesianProductGenerator4::Iterator
-
- // No implementation - assignment is unsupported.
- void operator=(const CartesianProductGenerator4& other);
-
- const ParamGenerator<T1> g1_;
- const ParamGenerator<T2> g2_;
- const ParamGenerator<T3> g3_;
- const ParamGenerator<T4> g4_;
-}; // class CartesianProductGenerator4
-
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5>
-class CartesianProductGenerator5
- : public ParamGeneratorInterface< ::testing::tuple<T1, T2, T3, T4, T5> > {
- public:
- typedef ::testing::tuple<T1, T2, T3, T4, T5> ParamType;
-
- CartesianProductGenerator5(const ParamGenerator<T1>& g1,
- const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
- const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5)
- : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5) {}
- virtual ~CartesianProductGenerator5() {}
-
- virtual ParamIteratorInterface<ParamType>* Begin() const {
- return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
- g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin());
- }
- virtual ParamIteratorInterface<ParamType>* End() const {
- return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
- g4_, g4_.end(), g5_, g5_.end());
- }
-
- private:
- class Iterator : public ParamIteratorInterface<ParamType> {
- public:
- Iterator(const ParamGeneratorInterface<ParamType>* base,
- const ParamGenerator<T1>& g1,
- const typename ParamGenerator<T1>::iterator& current1,
- const ParamGenerator<T2>& g2,
- const typename ParamGenerator<T2>::iterator& current2,
- const ParamGenerator<T3>& g3,
- const typename ParamGenerator<T3>::iterator& current3,
- const ParamGenerator<T4>& g4,
- const typename ParamGenerator<T4>::iterator& current4,
- const ParamGenerator<T5>& g5,
- const typename ParamGenerator<T5>::iterator& current5)
- : base_(base),
- begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
- begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
- begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
- begin4_(g4.begin()), end4_(g4.end()), current4_(current4),
- begin5_(g5.begin()), end5_(g5.end()), current5_(current5) {
- ComputeCurrentValue();
- }
- virtual ~Iterator() {}
-
- virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
- return base_;
- }
- // Advance should not be called on beyond-of-range iterators
- // so no component iterators must be beyond end of range, either.
- virtual void Advance() {
- assert(!AtEnd());
- ++current5_;
- if (current5_ == end5_) {
- current5_ = begin5_;
- ++current4_;
- }
- if (current4_ == end4_) {
- current4_ = begin4_;
- ++current3_;
- }
- if (current3_ == end3_) {
- current3_ = begin3_;
- ++current2_;
- }
- if (current2_ == end2_) {
- current2_ = begin2_;
- ++current1_;
- }
- ComputeCurrentValue();
- }
- virtual ParamIteratorInterface<ParamType>* Clone() const {
- return new Iterator(*this);
- }
- virtual const ParamType* Current() const { return &current_value_; }
- virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
- // Having the same base generator guarantees that the other
- // iterator is of the same type and we can downcast.
- GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
- << "The program attempted to compare iterators "
- << "from different generators." << std::endl;
- const Iterator* typed_other =
- CheckedDowncastToActualType<const Iterator>(&other);
- // We must report iterators equal if they both point beyond their
- // respective ranges. That can happen in a variety of fashions,
- // so we have to consult AtEnd().
- return (AtEnd() && typed_other->AtEnd()) ||
- (
- current1_ == typed_other->current1_ &&
- current2_ == typed_other->current2_ &&
- current3_ == typed_other->current3_ &&
- current4_ == typed_other->current4_ &&
- current5_ == typed_other->current5_);
- }
-
- private:
- Iterator(const Iterator& other)
- : base_(other.base_),
- begin1_(other.begin1_),
- end1_(other.end1_),
- current1_(other.current1_),
- begin2_(other.begin2_),
- end2_(other.end2_),
- current2_(other.current2_),
- begin3_(other.begin3_),
- end3_(other.end3_),
- current3_(other.current3_),
- begin4_(other.begin4_),
- end4_(other.end4_),
- current4_(other.current4_),
- begin5_(other.begin5_),
- end5_(other.end5_),
- current5_(other.current5_) {
- ComputeCurrentValue();
- }
-
- void ComputeCurrentValue() {
- if (!AtEnd())
- current_value_ = ParamType(*current1_, *current2_, *current3_,
- *current4_, *current5_);
- }
- bool AtEnd() const {
- // We must report iterator past the end of the range when either of the
- // component iterators has reached the end of its range.
- return
- current1_ == end1_ ||
- current2_ == end2_ ||
- current3_ == end3_ ||
- current4_ == end4_ ||
- current5_ == end5_;
- }
-
- // No implementation - assignment is unsupported.
- void operator=(const Iterator& other);
-
- const ParamGeneratorInterface<ParamType>* const base_;
- // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
- // current[i]_ is the actual traversing iterator.
- const typename ParamGenerator<T1>::iterator begin1_;
- const typename ParamGenerator<T1>::iterator end1_;
- typename ParamGenerator<T1>::iterator current1_;
- const typename ParamGenerator<T2>::iterator begin2_;
- const typename ParamGenerator<T2>::iterator end2_;
- typename ParamGenerator<T2>::iterator current2_;
- const typename ParamGenerator<T3>::iterator begin3_;
- const typename ParamGenerator<T3>::iterator end3_;
- typename ParamGenerator<T3>::iterator current3_;
- const typename ParamGenerator<T4>::iterator begin4_;
- const typename ParamGenerator<T4>::iterator end4_;
- typename ParamGenerator<T4>::iterator current4_;
- const typename ParamGenerator<T5>::iterator begin5_;
- const typename ParamGenerator<T5>::iterator end5_;
- typename ParamGenerator<T5>::iterator current5_;
- ParamType current_value_;
- }; // class CartesianProductGenerator5::Iterator
-
- // No implementation - assignment is unsupported.
- void operator=(const CartesianProductGenerator5& other);
-
- const ParamGenerator<T1> g1_;
- const ParamGenerator<T2> g2_;
- const ParamGenerator<T3> g3_;
- const ParamGenerator<T4> g4_;
- const ParamGenerator<T5> g5_;
-}; // class CartesianProductGenerator5
-
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6>
-class CartesianProductGenerator6
- : public ParamGeneratorInterface< ::testing::tuple<T1, T2, T3, T4, T5,
- T6> > {
- public:
- typedef ::testing::tuple<T1, T2, T3, T4, T5, T6> ParamType;
-
- CartesianProductGenerator6(const ParamGenerator<T1>& g1,
- const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
- const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5,
- const ParamGenerator<T6>& g6)
- : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6) {}
- virtual ~CartesianProductGenerator6() {}
-
- virtual ParamIteratorInterface<ParamType>* Begin() const {
- return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
- g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin());
- }
- virtual ParamIteratorInterface<ParamType>* End() const {
- return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
- g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end());
- }
-
- private:
- class Iterator : public ParamIteratorInterface<ParamType> {
- public:
- Iterator(const ParamGeneratorInterface<ParamType>* base,
- const ParamGenerator<T1>& g1,
- const typename ParamGenerator<T1>::iterator& current1,
- const ParamGenerator<T2>& g2,
- const typename ParamGenerator<T2>::iterator& current2,
- const ParamGenerator<T3>& g3,
- const typename ParamGenerator<T3>::iterator& current3,
- const ParamGenerator<T4>& g4,
- const typename ParamGenerator<T4>::iterator& current4,
- const ParamGenerator<T5>& g5,
- const typename ParamGenerator<T5>::iterator& current5,
- const ParamGenerator<T6>& g6,
- const typename ParamGenerator<T6>::iterator& current6)
- : base_(base),
- begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
- begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
- begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
- begin4_(g4.begin()), end4_(g4.end()), current4_(current4),
- begin5_(g5.begin()), end5_(g5.end()), current5_(current5),
- begin6_(g6.begin()), end6_(g6.end()), current6_(current6) {
- ComputeCurrentValue();
- }
- virtual ~Iterator() {}
-
- virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
- return base_;
- }
- // Advance should not be called on beyond-of-range iterators
- // so no component iterators must be beyond end of range, either.
- virtual void Advance() {
- assert(!AtEnd());
- ++current6_;
- if (current6_ == end6_) {
- current6_ = begin6_;
- ++current5_;
- }
- if (current5_ == end5_) {
- current5_ = begin5_;
- ++current4_;
- }
- if (current4_ == end4_) {
- current4_ = begin4_;
- ++current3_;
- }
- if (current3_ == end3_) {
- current3_ = begin3_;
- ++current2_;
- }
- if (current2_ == end2_) {
- current2_ = begin2_;
- ++current1_;
- }
- ComputeCurrentValue();
- }
- virtual ParamIteratorInterface<ParamType>* Clone() const {
- return new Iterator(*this);
- }
- virtual const ParamType* Current() const { return &current_value_; }
- virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
- // Having the same base generator guarantees that the other
- // iterator is of the same type and we can downcast.
- GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
- << "The program attempted to compare iterators "
- << "from different generators." << std::endl;
- const Iterator* typed_other =
- CheckedDowncastToActualType<const Iterator>(&other);
- // We must report iterators equal if they both point beyond their
- // respective ranges. That can happen in a variety of fashions,
- // so we have to consult AtEnd().
- return (AtEnd() && typed_other->AtEnd()) ||
- (
- current1_ == typed_other->current1_ &&
- current2_ == typed_other->current2_ &&
- current3_ == typed_other->current3_ &&
- current4_ == typed_other->current4_ &&
- current5_ == typed_other->current5_ &&
- current6_ == typed_other->current6_);
- }
-
- private:
- Iterator(const Iterator& other)
- : base_(other.base_),
- begin1_(other.begin1_),
- end1_(other.end1_),
- current1_(other.current1_),
- begin2_(other.begin2_),
- end2_(other.end2_),
- current2_(other.current2_),
- begin3_(other.begin3_),
- end3_(other.end3_),
- current3_(other.current3_),
- begin4_(other.begin4_),
- end4_(other.end4_),
- current4_(other.current4_),
- begin5_(other.begin5_),
- end5_(other.end5_),
- current5_(other.current5_),
- begin6_(other.begin6_),
- end6_(other.end6_),
- current6_(other.current6_) {
- ComputeCurrentValue();
- }
-
- void ComputeCurrentValue() {
- if (!AtEnd())
- current_value_ = ParamType(*current1_, *current2_, *current3_,
- *current4_, *current5_, *current6_);
- }
- bool AtEnd() const {
- // We must report iterator past the end of the range when either of the
- // component iterators has reached the end of its range.
- return
- current1_ == end1_ ||
- current2_ == end2_ ||
- current3_ == end3_ ||
- current4_ == end4_ ||
- current5_ == end5_ ||
- current6_ == end6_;
- }
-
- // No implementation - assignment is unsupported.
- void operator=(const Iterator& other);
-
- const ParamGeneratorInterface<ParamType>* const base_;
- // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
- // current[i]_ is the actual traversing iterator.
- const typename ParamGenerator<T1>::iterator begin1_;
- const typename ParamGenerator<T1>::iterator end1_;
- typename ParamGenerator<T1>::iterator current1_;
- const typename ParamGenerator<T2>::iterator begin2_;
- const typename ParamGenerator<T2>::iterator end2_;
- typename ParamGenerator<T2>::iterator current2_;
- const typename ParamGenerator<T3>::iterator begin3_;
- const typename ParamGenerator<T3>::iterator end3_;
- typename ParamGenerator<T3>::iterator current3_;
- const typename ParamGenerator<T4>::iterator begin4_;
- const typename ParamGenerator<T4>::iterator end4_;
- typename ParamGenerator<T4>::iterator current4_;
- const typename ParamGenerator<T5>::iterator begin5_;
- const typename ParamGenerator<T5>::iterator end5_;
- typename ParamGenerator<T5>::iterator current5_;
- const typename ParamGenerator<T6>::iterator begin6_;
- const typename ParamGenerator<T6>::iterator end6_;
- typename ParamGenerator<T6>::iterator current6_;
- ParamType current_value_;
- }; // class CartesianProductGenerator6::Iterator
-
- // No implementation - assignment is unsupported.
- void operator=(const CartesianProductGenerator6& other);
-
- const ParamGenerator<T1> g1_;
- const ParamGenerator<T2> g2_;
- const ParamGenerator<T3> g3_;
- const ParamGenerator<T4> g4_;
- const ParamGenerator<T5> g5_;
- const ParamGenerator<T6> g6_;
-}; // class CartesianProductGenerator6
-
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7>
-class CartesianProductGenerator7
- : public ParamGeneratorInterface< ::testing::tuple<T1, T2, T3, T4, T5, T6,
- T7> > {
- public:
- typedef ::testing::tuple<T1, T2, T3, T4, T5, T6, T7> ParamType;
-
- CartesianProductGenerator7(const ParamGenerator<T1>& g1,
- const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
- const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5,
- const ParamGenerator<T6>& g6, const ParamGenerator<T7>& g7)
- : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7) {}
- virtual ~CartesianProductGenerator7() {}
-
- virtual ParamIteratorInterface<ParamType>* Begin() const {
- return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
- g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin(), g7_,
- g7_.begin());
- }
- virtual ParamIteratorInterface<ParamType>* End() const {
- return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
- g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end(), g7_, g7_.end());
- }
-
- private:
- class Iterator : public ParamIteratorInterface<ParamType> {
- public:
- Iterator(const ParamGeneratorInterface<ParamType>* base,
- const ParamGenerator<T1>& g1,
- const typename ParamGenerator<T1>::iterator& current1,
- const ParamGenerator<T2>& g2,
- const typename ParamGenerator<T2>::iterator& current2,
- const ParamGenerator<T3>& g3,
- const typename ParamGenerator<T3>::iterator& current3,
- const ParamGenerator<T4>& g4,
- const typename ParamGenerator<T4>::iterator& current4,
- const ParamGenerator<T5>& g5,
- const typename ParamGenerator<T5>::iterator& current5,
- const ParamGenerator<T6>& g6,
- const typename ParamGenerator<T6>::iterator& current6,
- const ParamGenerator<T7>& g7,
- const typename ParamGenerator<T7>::iterator& current7)
- : base_(base),
- begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
- begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
- begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
- begin4_(g4.begin()), end4_(g4.end()), current4_(current4),
- begin5_(g5.begin()), end5_(g5.end()), current5_(current5),
- begin6_(g6.begin()), end6_(g6.end()), current6_(current6),
- begin7_(g7.begin()), end7_(g7.end()), current7_(current7) {
- ComputeCurrentValue();
- }
- virtual ~Iterator() {}
-
- virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
- return base_;
- }
- // Advance should not be called on beyond-of-range iterators
- // so no component iterators must be beyond end of range, either.
- virtual void Advance() {
- assert(!AtEnd());
- ++current7_;
- if (current7_ == end7_) {
- current7_ = begin7_;
- ++current6_;
- }
- if (current6_ == end6_) {
- current6_ = begin6_;
- ++current5_;
- }
- if (current5_ == end5_) {
- current5_ = begin5_;
- ++current4_;
- }
- if (current4_ == end4_) {
- current4_ = begin4_;
- ++current3_;
- }
- if (current3_ == end3_) {
- current3_ = begin3_;
- ++current2_;
- }
- if (current2_ == end2_) {
- current2_ = begin2_;
- ++current1_;
- }
- ComputeCurrentValue();
- }
- virtual ParamIteratorInterface<ParamType>* Clone() const {
- return new Iterator(*this);
- }
- virtual const ParamType* Current() const { return &current_value_; }
- virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
- // Having the same base generator guarantees that the other
- // iterator is of the same type and we can downcast.
- GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
- << "The program attempted to compare iterators "
- << "from different generators." << std::endl;
- const Iterator* typed_other =
- CheckedDowncastToActualType<const Iterator>(&other);
- // We must report iterators equal if they both point beyond their
- // respective ranges. That can happen in a variety of fashions,
- // so we have to consult AtEnd().
- return (AtEnd() && typed_other->AtEnd()) ||
- (
- current1_ == typed_other->current1_ &&
- current2_ == typed_other->current2_ &&
- current3_ == typed_other->current3_ &&
- current4_ == typed_other->current4_ &&
- current5_ == typed_other->current5_ &&
- current6_ == typed_other->current6_ &&
- current7_ == typed_other->current7_);
- }
-
- private:
- Iterator(const Iterator& other)
- : base_(other.base_),
- begin1_(other.begin1_),
- end1_(other.end1_),
- current1_(other.current1_),
- begin2_(other.begin2_),
- end2_(other.end2_),
- current2_(other.current2_),
- begin3_(other.begin3_),
- end3_(other.end3_),
- current3_(other.current3_),
- begin4_(other.begin4_),
- end4_(other.end4_),
- current4_(other.current4_),
- begin5_(other.begin5_),
- end5_(other.end5_),
- current5_(other.current5_),
- begin6_(other.begin6_),
- end6_(other.end6_),
- current6_(other.current6_),
- begin7_(other.begin7_),
- end7_(other.end7_),
- current7_(other.current7_) {
- ComputeCurrentValue();
- }
-
- void ComputeCurrentValue() {
- if (!AtEnd())
- current_value_ = ParamType(*current1_, *current2_, *current3_,
- *current4_, *current5_, *current6_, *current7_);
- }
- bool AtEnd() const {
- // We must report iterator past the end of the range when either of the
- // component iterators has reached the end of its range.
- return
- current1_ == end1_ ||
- current2_ == end2_ ||
- current3_ == end3_ ||
- current4_ == end4_ ||
- current5_ == end5_ ||
- current6_ == end6_ ||
- current7_ == end7_;
- }
-
- // No implementation - assignment is unsupported.
- void operator=(const Iterator& other);
-
- const ParamGeneratorInterface<ParamType>* const base_;
- // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
- // current[i]_ is the actual traversing iterator.
- const typename ParamGenerator<T1>::iterator begin1_;
- const typename ParamGenerator<T1>::iterator end1_;
- typename ParamGenerator<T1>::iterator current1_;
- const typename ParamGenerator<T2>::iterator begin2_;
- const typename ParamGenerator<T2>::iterator end2_;
- typename ParamGenerator<T2>::iterator current2_;
- const typename ParamGenerator<T3>::iterator begin3_;
- const typename ParamGenerator<T3>::iterator end3_;
- typename ParamGenerator<T3>::iterator current3_;
- const typename ParamGenerator<T4>::iterator begin4_;
- const typename ParamGenerator<T4>::iterator end4_;
- typename ParamGenerator<T4>::iterator current4_;
- const typename ParamGenerator<T5>::iterator begin5_;
- const typename ParamGenerator<T5>::iterator end5_;
- typename ParamGenerator<T5>::iterator current5_;
- const typename ParamGenerator<T6>::iterator begin6_;
- const typename ParamGenerator<T6>::iterator end6_;
- typename ParamGenerator<T6>::iterator current6_;
- const typename ParamGenerator<T7>::iterator begin7_;
- const typename ParamGenerator<T7>::iterator end7_;
- typename ParamGenerator<T7>::iterator current7_;
- ParamType current_value_;
- }; // class CartesianProductGenerator7::Iterator
-
- // No implementation - assignment is unsupported.
- void operator=(const CartesianProductGenerator7& other);
-
- const ParamGenerator<T1> g1_;
- const ParamGenerator<T2> g2_;
- const ParamGenerator<T3> g3_;
- const ParamGenerator<T4> g4_;
- const ParamGenerator<T5> g5_;
- const ParamGenerator<T6> g6_;
- const ParamGenerator<T7> g7_;
-}; // class CartesianProductGenerator7
-
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8>
-class CartesianProductGenerator8
- : public ParamGeneratorInterface< ::testing::tuple<T1, T2, T3, T4, T5, T6,
- T7, T8> > {
- public:
- typedef ::testing::tuple<T1, T2, T3, T4, T5, T6, T7, T8> ParamType;
-
- CartesianProductGenerator8(const ParamGenerator<T1>& g1,
- const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
- const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5,
- const ParamGenerator<T6>& g6, const ParamGenerator<T7>& g7,
- const ParamGenerator<T8>& g8)
- : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7),
- g8_(g8) {}
- virtual ~CartesianProductGenerator8() {}
-
- virtual ParamIteratorInterface<ParamType>* Begin() const {
- return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
- g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin(), g7_,
- g7_.begin(), g8_, g8_.begin());
- }
- virtual ParamIteratorInterface<ParamType>* End() const {
- return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
- g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end(), g7_, g7_.end(), g8_,
- g8_.end());
- }
-
- private:
- class Iterator : public ParamIteratorInterface<ParamType> {
- public:
- Iterator(const ParamGeneratorInterface<ParamType>* base,
- const ParamGenerator<T1>& g1,
- const typename ParamGenerator<T1>::iterator& current1,
- const ParamGenerator<T2>& g2,
- const typename ParamGenerator<T2>::iterator& current2,
- const ParamGenerator<T3>& g3,
- const typename ParamGenerator<T3>::iterator& current3,
- const ParamGenerator<T4>& g4,
- const typename ParamGenerator<T4>::iterator& current4,
- const ParamGenerator<T5>& g5,
- const typename ParamGenerator<T5>::iterator& current5,
- const ParamGenerator<T6>& g6,
- const typename ParamGenerator<T6>::iterator& current6,
- const ParamGenerator<T7>& g7,
- const typename ParamGenerator<T7>::iterator& current7,
- const ParamGenerator<T8>& g8,
- const typename ParamGenerator<T8>::iterator& current8)
- : base_(base),
- begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
- begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
- begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
- begin4_(g4.begin()), end4_(g4.end()), current4_(current4),
- begin5_(g5.begin()), end5_(g5.end()), current5_(current5),
- begin6_(g6.begin()), end6_(g6.end()), current6_(current6),
- begin7_(g7.begin()), end7_(g7.end()), current7_(current7),
- begin8_(g8.begin()), end8_(g8.end()), current8_(current8) {
- ComputeCurrentValue();
- }
- virtual ~Iterator() {}
-
- virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
- return base_;
- }
- // Advance should not be called on beyond-of-range iterators
- // so no component iterators must be beyond end of range, either.
- virtual void Advance() {
- assert(!AtEnd());
- ++current8_;
- if (current8_ == end8_) {
- current8_ = begin8_;
- ++current7_;
- }
- if (current7_ == end7_) {
- current7_ = begin7_;
- ++current6_;
- }
- if (current6_ == end6_) {
- current6_ = begin6_;
- ++current5_;
- }
- if (current5_ == end5_) {
- current5_ = begin5_;
- ++current4_;
- }
- if (current4_ == end4_) {
- current4_ = begin4_;
- ++current3_;
- }
- if (current3_ == end3_) {
- current3_ = begin3_;
- ++current2_;
- }
- if (current2_ == end2_) {
- current2_ = begin2_;
- ++current1_;
- }
- ComputeCurrentValue();
- }
- virtual ParamIteratorInterface<ParamType>* Clone() const {
- return new Iterator(*this);
- }
- virtual const ParamType* Current() const { return &current_value_; }
- virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
- // Having the same base generator guarantees that the other
- // iterator is of the same type and we can downcast.
- GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
- << "The program attempted to compare iterators "
- << "from different generators." << std::endl;
- const Iterator* typed_other =
- CheckedDowncastToActualType<const Iterator>(&other);
- // We must report iterators equal if they both point beyond their
- // respective ranges. That can happen in a variety of fashions,
- // so we have to consult AtEnd().
- return (AtEnd() && typed_other->AtEnd()) ||
- (
- current1_ == typed_other->current1_ &&
- current2_ == typed_other->current2_ &&
- current3_ == typed_other->current3_ &&
- current4_ == typed_other->current4_ &&
- current5_ == typed_other->current5_ &&
- current6_ == typed_other->current6_ &&
- current7_ == typed_other->current7_ &&
- current8_ == typed_other->current8_);
- }
-
- private:
- Iterator(const Iterator& other)
- : base_(other.base_),
- begin1_(other.begin1_),
- end1_(other.end1_),
- current1_(other.current1_),
- begin2_(other.begin2_),
- end2_(other.end2_),
- current2_(other.current2_),
- begin3_(other.begin3_),
- end3_(other.end3_),
- current3_(other.current3_),
- begin4_(other.begin4_),
- end4_(other.end4_),
- current4_(other.current4_),
- begin5_(other.begin5_),
- end5_(other.end5_),
- current5_(other.current5_),
- begin6_(other.begin6_),
- end6_(other.end6_),
- current6_(other.current6_),
- begin7_(other.begin7_),
- end7_(other.end7_),
- current7_(other.current7_),
- begin8_(other.begin8_),
- end8_(other.end8_),
- current8_(other.current8_) {
- ComputeCurrentValue();
- }
-
- void ComputeCurrentValue() {
- if (!AtEnd())
- current_value_ = ParamType(*current1_, *current2_, *current3_,
- *current4_, *current5_, *current6_, *current7_, *current8_);
- }
- bool AtEnd() const {
- // We must report iterator past the end of the range when either of the
- // component iterators has reached the end of its range.
- return
- current1_ == end1_ ||
- current2_ == end2_ ||
- current3_ == end3_ ||
- current4_ == end4_ ||
- current5_ == end5_ ||
- current6_ == end6_ ||
- current7_ == end7_ ||
- current8_ == end8_;
- }
-
- // No implementation - assignment is unsupported.
- void operator=(const Iterator& other);
-
- const ParamGeneratorInterface<ParamType>* const base_;
- // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
- // current[i]_ is the actual traversing iterator.
- const typename ParamGenerator<T1>::iterator begin1_;
- const typename ParamGenerator<T1>::iterator end1_;
- typename ParamGenerator<T1>::iterator current1_;
- const typename ParamGenerator<T2>::iterator begin2_;
- const typename ParamGenerator<T2>::iterator end2_;
- typename ParamGenerator<T2>::iterator current2_;
- const typename ParamGenerator<T3>::iterator begin3_;
- const typename ParamGenerator<T3>::iterator end3_;
- typename ParamGenerator<T3>::iterator current3_;
- const typename ParamGenerator<T4>::iterator begin4_;
- const typename ParamGenerator<T4>::iterator end4_;
- typename ParamGenerator<T4>::iterator current4_;
- const typename ParamGenerator<T5>::iterator begin5_;
- const typename ParamGenerator<T5>::iterator end5_;
- typename ParamGenerator<T5>::iterator current5_;
- const typename ParamGenerator<T6>::iterator begin6_;
- const typename ParamGenerator<T6>::iterator end6_;
- typename ParamGenerator<T6>::iterator current6_;
- const typename ParamGenerator<T7>::iterator begin7_;
- const typename ParamGenerator<T7>::iterator end7_;
- typename ParamGenerator<T7>::iterator current7_;
- const typename ParamGenerator<T8>::iterator begin8_;
- const typename ParamGenerator<T8>::iterator end8_;
- typename ParamGenerator<T8>::iterator current8_;
- ParamType current_value_;
- }; // class CartesianProductGenerator8::Iterator
-
- // No implementation - assignment is unsupported.
- void operator=(const CartesianProductGenerator8& other);
-
- const ParamGenerator<T1> g1_;
- const ParamGenerator<T2> g2_;
- const ParamGenerator<T3> g3_;
- const ParamGenerator<T4> g4_;
- const ParamGenerator<T5> g5_;
- const ParamGenerator<T6> g6_;
- const ParamGenerator<T7> g7_;
- const ParamGenerator<T8> g8_;
-}; // class CartesianProductGenerator8
-
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9>
-class CartesianProductGenerator9
- : public ParamGeneratorInterface< ::testing::tuple<T1, T2, T3, T4, T5, T6,
- T7, T8, T9> > {
- public:
- typedef ::testing::tuple<T1, T2, T3, T4, T5, T6, T7, T8, T9> ParamType;
-
- CartesianProductGenerator9(const ParamGenerator<T1>& g1,
- const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
- const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5,
- const ParamGenerator<T6>& g6, const ParamGenerator<T7>& g7,
- const ParamGenerator<T8>& g8, const ParamGenerator<T9>& g9)
- : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), g8_(g8),
- g9_(g9) {}
- virtual ~CartesianProductGenerator9() {}
-
- virtual ParamIteratorInterface<ParamType>* Begin() const {
- return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
- g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin(), g7_,
- g7_.begin(), g8_, g8_.begin(), g9_, g9_.begin());
- }
- virtual ParamIteratorInterface<ParamType>* End() const {
- return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
- g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end(), g7_, g7_.end(), g8_,
- g8_.end(), g9_, g9_.end());
- }
-
- private:
- class Iterator : public ParamIteratorInterface<ParamType> {
- public:
- Iterator(const ParamGeneratorInterface<ParamType>* base,
- const ParamGenerator<T1>& g1,
- const typename ParamGenerator<T1>::iterator& current1,
- const ParamGenerator<T2>& g2,
- const typename ParamGenerator<T2>::iterator& current2,
- const ParamGenerator<T3>& g3,
- const typename ParamGenerator<T3>::iterator& current3,
- const ParamGenerator<T4>& g4,
- const typename ParamGenerator<T4>::iterator& current4,
- const ParamGenerator<T5>& g5,
- const typename ParamGenerator<T5>::iterator& current5,
- const ParamGenerator<T6>& g6,
- const typename ParamGenerator<T6>::iterator& current6,
- const ParamGenerator<T7>& g7,
- const typename ParamGenerator<T7>::iterator& current7,
- const ParamGenerator<T8>& g8,
- const typename ParamGenerator<T8>::iterator& current8,
- const ParamGenerator<T9>& g9,
- const typename ParamGenerator<T9>::iterator& current9)
- : base_(base),
- begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
- begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
- begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
- begin4_(g4.begin()), end4_(g4.end()), current4_(current4),
- begin5_(g5.begin()), end5_(g5.end()), current5_(current5),
- begin6_(g6.begin()), end6_(g6.end()), current6_(current6),
- begin7_(g7.begin()), end7_(g7.end()), current7_(current7),
- begin8_(g8.begin()), end8_(g8.end()), current8_(current8),
- begin9_(g9.begin()), end9_(g9.end()), current9_(current9) {
- ComputeCurrentValue();
- }
- virtual ~Iterator() {}
-
- virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
- return base_;
- }
- // Advance should not be called on beyond-of-range iterators
- // so no component iterators must be beyond end of range, either.
- virtual void Advance() {
- assert(!AtEnd());
- ++current9_;
- if (current9_ == end9_) {
- current9_ = begin9_;
- ++current8_;
- }
- if (current8_ == end8_) {
- current8_ = begin8_;
- ++current7_;
- }
- if (current7_ == end7_) {
- current7_ = begin7_;
- ++current6_;
- }
- if (current6_ == end6_) {
- current6_ = begin6_;
- ++current5_;
- }
- if (current5_ == end5_) {
- current5_ = begin5_;
- ++current4_;
- }
- if (current4_ == end4_) {
- current4_ = begin4_;
- ++current3_;
- }
- if (current3_ == end3_) {
- current3_ = begin3_;
- ++current2_;
- }
- if (current2_ == end2_) {
- current2_ = begin2_;
- ++current1_;
- }
- ComputeCurrentValue();
- }
- virtual ParamIteratorInterface<ParamType>* Clone() const {
- return new Iterator(*this);
- }
- virtual const ParamType* Current() const { return &current_value_; }
- virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
- // Having the same base generator guarantees that the other
- // iterator is of the same type and we can downcast.
- GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
- << "The program attempted to compare iterators "
- << "from different generators." << std::endl;
- const Iterator* typed_other =
- CheckedDowncastToActualType<const Iterator>(&other);
- // We must report iterators equal if they both point beyond their
- // respective ranges. That can happen in a variety of fashions,
- // so we have to consult AtEnd().
- return (AtEnd() && typed_other->AtEnd()) ||
- (
- current1_ == typed_other->current1_ &&
- current2_ == typed_other->current2_ &&
- current3_ == typed_other->current3_ &&
- current4_ == typed_other->current4_ &&
- current5_ == typed_other->current5_ &&
- current6_ == typed_other->current6_ &&
- current7_ == typed_other->current7_ &&
- current8_ == typed_other->current8_ &&
- current9_ == typed_other->current9_);
- }
-
- private:
- Iterator(const Iterator& other)
- : base_(other.base_),
- begin1_(other.begin1_),
- end1_(other.end1_),
- current1_(other.current1_),
- begin2_(other.begin2_),
- end2_(other.end2_),
- current2_(other.current2_),
- begin3_(other.begin3_),
- end3_(other.end3_),
- current3_(other.current3_),
- begin4_(other.begin4_),
- end4_(other.end4_),
- current4_(other.current4_),
- begin5_(other.begin5_),
- end5_(other.end5_),
- current5_(other.current5_),
- begin6_(other.begin6_),
- end6_(other.end6_),
- current6_(other.current6_),
- begin7_(other.begin7_),
- end7_(other.end7_),
- current7_(other.current7_),
- begin8_(other.begin8_),
- end8_(other.end8_),
- current8_(other.current8_),
- begin9_(other.begin9_),
- end9_(other.end9_),
- current9_(other.current9_) {
- ComputeCurrentValue();
- }
-
- void ComputeCurrentValue() {
- if (!AtEnd())
- current_value_ = ParamType(*current1_, *current2_, *current3_,
- *current4_, *current5_, *current6_, *current7_, *current8_,
- *current9_);
- }
- bool AtEnd() const {
- // We must report iterator past the end of the range when either of the
- // component iterators has reached the end of its range.
- return
- current1_ == end1_ ||
- current2_ == end2_ ||
- current3_ == end3_ ||
- current4_ == end4_ ||
- current5_ == end5_ ||
- current6_ == end6_ ||
- current7_ == end7_ ||
- current8_ == end8_ ||
- current9_ == end9_;
- }
-
- // No implementation - assignment is unsupported.
- void operator=(const Iterator& other);
-
- const ParamGeneratorInterface<ParamType>* const base_;
- // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
- // current[i]_ is the actual traversing iterator.
- const typename ParamGenerator<T1>::iterator begin1_;
- const typename ParamGenerator<T1>::iterator end1_;
- typename ParamGenerator<T1>::iterator current1_;
- const typename ParamGenerator<T2>::iterator begin2_;
- const typename ParamGenerator<T2>::iterator end2_;
- typename ParamGenerator<T2>::iterator current2_;
- const typename ParamGenerator<T3>::iterator begin3_;
- const typename ParamGenerator<T3>::iterator end3_;
- typename ParamGenerator<T3>::iterator current3_;
- const typename ParamGenerator<T4>::iterator begin4_;
- const typename ParamGenerator<T4>::iterator end4_;
- typename ParamGenerator<T4>::iterator current4_;
- const typename ParamGenerator<T5>::iterator begin5_;
- const typename ParamGenerator<T5>::iterator end5_;
- typename ParamGenerator<T5>::iterator current5_;
- const typename ParamGenerator<T6>::iterator begin6_;
- const typename ParamGenerator<T6>::iterator end6_;
- typename ParamGenerator<T6>::iterator current6_;
- const typename ParamGenerator<T7>::iterator begin7_;
- const typename ParamGenerator<T7>::iterator end7_;
- typename ParamGenerator<T7>::iterator current7_;
- const typename ParamGenerator<T8>::iterator begin8_;
- const typename ParamGenerator<T8>::iterator end8_;
- typename ParamGenerator<T8>::iterator current8_;
- const typename ParamGenerator<T9>::iterator begin9_;
- const typename ParamGenerator<T9>::iterator end9_;
- typename ParamGenerator<T9>::iterator current9_;
- ParamType current_value_;
- }; // class CartesianProductGenerator9::Iterator
-
- // No implementation - assignment is unsupported.
- void operator=(const CartesianProductGenerator9& other);
-
- const ParamGenerator<T1> g1_;
- const ParamGenerator<T2> g2_;
- const ParamGenerator<T3> g3_;
- const ParamGenerator<T4> g4_;
- const ParamGenerator<T5> g5_;
- const ParamGenerator<T6> g6_;
- const ParamGenerator<T7> g7_;
- const ParamGenerator<T8> g8_;
- const ParamGenerator<T9> g9_;
-}; // class CartesianProductGenerator9
-
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10>
-class CartesianProductGenerator10
- : public ParamGeneratorInterface< ::testing::tuple<T1, T2, T3, T4, T5, T6,
- T7, T8, T9, T10> > {
- public:
- typedef ::testing::tuple<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10> ParamType;
-
- CartesianProductGenerator10(const ParamGenerator<T1>& g1,
- const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
- const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5,
- const ParamGenerator<T6>& g6, const ParamGenerator<T7>& g7,
- const ParamGenerator<T8>& g8, const ParamGenerator<T9>& g9,
- const ParamGenerator<T10>& g10)
- : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), g8_(g8),
- g9_(g9), g10_(g10) {}
- virtual ~CartesianProductGenerator10() {}
-
- virtual ParamIteratorInterface<ParamType>* Begin() const {
- return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
- g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin(), g7_,
- g7_.begin(), g8_, g8_.begin(), g9_, g9_.begin(), g10_, g10_.begin());
- }
- virtual ParamIteratorInterface<ParamType>* End() const {
- return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
- g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end(), g7_, g7_.end(), g8_,
- g8_.end(), g9_, g9_.end(), g10_, g10_.end());
- }
-
- private:
- class Iterator : public ParamIteratorInterface<ParamType> {
- public:
- Iterator(const ParamGeneratorInterface<ParamType>* base,
- const ParamGenerator<T1>& g1,
- const typename ParamGenerator<T1>::iterator& current1,
- const ParamGenerator<T2>& g2,
- const typename ParamGenerator<T2>::iterator& current2,
- const ParamGenerator<T3>& g3,
- const typename ParamGenerator<T3>::iterator& current3,
- const ParamGenerator<T4>& g4,
- const typename ParamGenerator<T4>::iterator& current4,
- const ParamGenerator<T5>& g5,
- const typename ParamGenerator<T5>::iterator& current5,
- const ParamGenerator<T6>& g6,
- const typename ParamGenerator<T6>::iterator& current6,
- const ParamGenerator<T7>& g7,
- const typename ParamGenerator<T7>::iterator& current7,
- const ParamGenerator<T8>& g8,
- const typename ParamGenerator<T8>::iterator& current8,
- const ParamGenerator<T9>& g9,
- const typename ParamGenerator<T9>::iterator& current9,
- const ParamGenerator<T10>& g10,
- const typename ParamGenerator<T10>::iterator& current10)
- : base_(base),
- begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
- begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
- begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
- begin4_(g4.begin()), end4_(g4.end()), current4_(current4),
- begin5_(g5.begin()), end5_(g5.end()), current5_(current5),
- begin6_(g6.begin()), end6_(g6.end()), current6_(current6),
- begin7_(g7.begin()), end7_(g7.end()), current7_(current7),
- begin8_(g8.begin()), end8_(g8.end()), current8_(current8),
- begin9_(g9.begin()), end9_(g9.end()), current9_(current9),
- begin10_(g10.begin()), end10_(g10.end()), current10_(current10) {
- ComputeCurrentValue();
- }
- virtual ~Iterator() {}
-
- virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
- return base_;
- }
- // Advance should not be called on beyond-of-range iterators
- // so no component iterators must be beyond end of range, either.
- virtual void Advance() {
- assert(!AtEnd());
- ++current10_;
- if (current10_ == end10_) {
- current10_ = begin10_;
- ++current9_;
- }
- if (current9_ == end9_) {
- current9_ = begin9_;
- ++current8_;
- }
- if (current8_ == end8_) {
- current8_ = begin8_;
- ++current7_;
- }
- if (current7_ == end7_) {
- current7_ = begin7_;
- ++current6_;
- }
- if (current6_ == end6_) {
- current6_ = begin6_;
- ++current5_;
- }
- if (current5_ == end5_) {
- current5_ = begin5_;
- ++current4_;
- }
- if (current4_ == end4_) {
- current4_ = begin4_;
- ++current3_;
- }
- if (current3_ == end3_) {
- current3_ = begin3_;
- ++current2_;
- }
- if (current2_ == end2_) {
- current2_ = begin2_;
- ++current1_;
- }
- ComputeCurrentValue();
- }
- virtual ParamIteratorInterface<ParamType>* Clone() const {
- return new Iterator(*this);
- }
- virtual const ParamType* Current() const { return &current_value_; }
- virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
- // Having the same base generator guarantees that the other
- // iterator is of the same type and we can downcast.
- GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
- << "The program attempted to compare iterators "
- << "from different generators." << std::endl;
- const Iterator* typed_other =
- CheckedDowncastToActualType<const Iterator>(&other);
- // We must report iterators equal if they both point beyond their
- // respective ranges. That can happen in a variety of fashions,
- // so we have to consult AtEnd().
- return (AtEnd() && typed_other->AtEnd()) ||
- (
- current1_ == typed_other->current1_ &&
- current2_ == typed_other->current2_ &&
- current3_ == typed_other->current3_ &&
- current4_ == typed_other->current4_ &&
- current5_ == typed_other->current5_ &&
- current6_ == typed_other->current6_ &&
- current7_ == typed_other->current7_ &&
- current8_ == typed_other->current8_ &&
- current9_ == typed_other->current9_ &&
- current10_ == typed_other->current10_);
- }
-
- private:
- Iterator(const Iterator& other)
- : base_(other.base_),
- begin1_(other.begin1_),
- end1_(other.end1_),
- current1_(other.current1_),
- begin2_(other.begin2_),
- end2_(other.end2_),
- current2_(other.current2_),
- begin3_(other.begin3_),
- end3_(other.end3_),
- current3_(other.current3_),
- begin4_(other.begin4_),
- end4_(other.end4_),
- current4_(other.current4_),
- begin5_(other.begin5_),
- end5_(other.end5_),
- current5_(other.current5_),
- begin6_(other.begin6_),
- end6_(other.end6_),
- current6_(other.current6_),
- begin7_(other.begin7_),
- end7_(other.end7_),
- current7_(other.current7_),
- begin8_(other.begin8_),
- end8_(other.end8_),
- current8_(other.current8_),
- begin9_(other.begin9_),
- end9_(other.end9_),
- current9_(other.current9_),
- begin10_(other.begin10_),
- end10_(other.end10_),
- current10_(other.current10_) {
- ComputeCurrentValue();
- }
-
- void ComputeCurrentValue() {
- if (!AtEnd())
- current_value_ = ParamType(*current1_, *current2_, *current3_,
- *current4_, *current5_, *current6_, *current7_, *current8_,
- *current9_, *current10_);
- }
- bool AtEnd() const {
- // We must report iterator past the end of the range when either of the
- // component iterators has reached the end of its range.
- return
- current1_ == end1_ ||
- current2_ == end2_ ||
- current3_ == end3_ ||
- current4_ == end4_ ||
- current5_ == end5_ ||
- current6_ == end6_ ||
- current7_ == end7_ ||
- current8_ == end8_ ||
- current9_ == end9_ ||
- current10_ == end10_;
- }
-
- // No implementation - assignment is unsupported.
- void operator=(const Iterator& other);
-
- const ParamGeneratorInterface<ParamType>* const base_;
- // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
- // current[i]_ is the actual traversing iterator.
- const typename ParamGenerator<T1>::iterator begin1_;
- const typename ParamGenerator<T1>::iterator end1_;
- typename ParamGenerator<T1>::iterator current1_;
- const typename ParamGenerator<T2>::iterator begin2_;
- const typename ParamGenerator<T2>::iterator end2_;
- typename ParamGenerator<T2>::iterator current2_;
- const typename ParamGenerator<T3>::iterator begin3_;
- const typename ParamGenerator<T3>::iterator end3_;
- typename ParamGenerator<T3>::iterator current3_;
- const typename ParamGenerator<T4>::iterator begin4_;
- const typename ParamGenerator<T4>::iterator end4_;
- typename ParamGenerator<T4>::iterator current4_;
- const typename ParamGenerator<T5>::iterator begin5_;
- const typename ParamGenerator<T5>::iterator end5_;
- typename ParamGenerator<T5>::iterator current5_;
- const typename ParamGenerator<T6>::iterator begin6_;
- const typename ParamGenerator<T6>::iterator end6_;
- typename ParamGenerator<T6>::iterator current6_;
- const typename ParamGenerator<T7>::iterator begin7_;
- const typename ParamGenerator<T7>::iterator end7_;
- typename ParamGenerator<T7>::iterator current7_;
- const typename ParamGenerator<T8>::iterator begin8_;
- const typename ParamGenerator<T8>::iterator end8_;
- typename ParamGenerator<T8>::iterator current8_;
- const typename ParamGenerator<T9>::iterator begin9_;
- const typename ParamGenerator<T9>::iterator end9_;
- typename ParamGenerator<T9>::iterator current9_;
- const typename ParamGenerator<T10>::iterator begin10_;
- const typename ParamGenerator<T10>::iterator end10_;
- typename ParamGenerator<T10>::iterator current10_;
- ParamType current_value_;
- }; // class CartesianProductGenerator10::Iterator
-
- // No implementation - assignment is unsupported.
- void operator=(const CartesianProductGenerator10& other);
-
- const ParamGenerator<T1> g1_;
- const ParamGenerator<T2> g2_;
- const ParamGenerator<T3> g3_;
- const ParamGenerator<T4> g4_;
- const ParamGenerator<T5> g5_;
- const ParamGenerator<T6> g6_;
- const ParamGenerator<T7> g7_;
- const ParamGenerator<T8> g8_;
- const ParamGenerator<T9> g9_;
- const ParamGenerator<T10> g10_;
-}; // class CartesianProductGenerator10
-
-
-// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
-//
-// Helper classes providing Combine() with polymorphic features. They allow
-// casting CartesianProductGeneratorN<T> to ParamGenerator<U> if T is
-// convertible to U.
-//
-template <class Generator1, class Generator2>
-class CartesianProductHolder2 {
- public:
-CartesianProductHolder2(const Generator1& g1, const Generator2& g2)
- : g1_(g1), g2_(g2) {}
- template <typename T1, typename T2>
- operator ParamGenerator< ::testing::tuple<T1, T2> >() const {
- return ParamGenerator< ::testing::tuple<T1, T2> >(
- new CartesianProductGenerator2<T1, T2>(
- static_cast<ParamGenerator<T1> >(g1_),
- static_cast<ParamGenerator<T2> >(g2_)));
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const CartesianProductHolder2& other);
-
- const Generator1 g1_;
- const Generator2 g2_;
-}; // class CartesianProductHolder2
-
-template <class Generator1, class Generator2, class Generator3>
-class CartesianProductHolder3 {
- public:
-CartesianProductHolder3(const Generator1& g1, const Generator2& g2,
- const Generator3& g3)
- : g1_(g1), g2_(g2), g3_(g3) {}
- template <typename T1, typename T2, typename T3>
- operator ParamGenerator< ::testing::tuple<T1, T2, T3> >() const {
- return ParamGenerator< ::testing::tuple<T1, T2, T3> >(
- new CartesianProductGenerator3<T1, T2, T3>(
- static_cast<ParamGenerator<T1> >(g1_),
- static_cast<ParamGenerator<T2> >(g2_),
- static_cast<ParamGenerator<T3> >(g3_)));
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const CartesianProductHolder3& other);
-
- const Generator1 g1_;
- const Generator2 g2_;
- const Generator3 g3_;
-}; // class CartesianProductHolder3
-
-template <class Generator1, class Generator2, class Generator3,
- class Generator4>
-class CartesianProductHolder4 {
- public:
-CartesianProductHolder4(const Generator1& g1, const Generator2& g2,
- const Generator3& g3, const Generator4& g4)
- : g1_(g1), g2_(g2), g3_(g3), g4_(g4) {}
- template <typename T1, typename T2, typename T3, typename T4>
- operator ParamGenerator< ::testing::tuple<T1, T2, T3, T4> >() const {
- return ParamGenerator< ::testing::tuple<T1, T2, T3, T4> >(
- new CartesianProductGenerator4<T1, T2, T3, T4>(
- static_cast<ParamGenerator<T1> >(g1_),
- static_cast<ParamGenerator<T2> >(g2_),
- static_cast<ParamGenerator<T3> >(g3_),
- static_cast<ParamGenerator<T4> >(g4_)));
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const CartesianProductHolder4& other);
-
- const Generator1 g1_;
- const Generator2 g2_;
- const Generator3 g3_;
- const Generator4 g4_;
-}; // class CartesianProductHolder4
-
-template <class Generator1, class Generator2, class Generator3,
- class Generator4, class Generator5>
-class CartesianProductHolder5 {
- public:
-CartesianProductHolder5(const Generator1& g1, const Generator2& g2,
- const Generator3& g3, const Generator4& g4, const Generator5& g5)
- : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5) {}
- template <typename T1, typename T2, typename T3, typename T4, typename T5>
- operator ParamGenerator< ::testing::tuple<T1, T2, T3, T4, T5> >() const {
- return ParamGenerator< ::testing::tuple<T1, T2, T3, T4, T5> >(
- new CartesianProductGenerator5<T1, T2, T3, T4, T5>(
- static_cast<ParamGenerator<T1> >(g1_),
- static_cast<ParamGenerator<T2> >(g2_),
- static_cast<ParamGenerator<T3> >(g3_),
- static_cast<ParamGenerator<T4> >(g4_),
- static_cast<ParamGenerator<T5> >(g5_)));
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const CartesianProductHolder5& other);
-
- const Generator1 g1_;
- const Generator2 g2_;
- const Generator3 g3_;
- const Generator4 g4_;
- const Generator5 g5_;
-}; // class CartesianProductHolder5
-
-template <class Generator1, class Generator2, class Generator3,
- class Generator4, class Generator5, class Generator6>
-class CartesianProductHolder6 {
- public:
-CartesianProductHolder6(const Generator1& g1, const Generator2& g2,
- const Generator3& g3, const Generator4& g4, const Generator5& g5,
- const Generator6& g6)
- : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6) {}
- template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6>
- operator ParamGenerator< ::testing::tuple<T1, T2, T3, T4, T5, T6> >() const {
- return ParamGenerator< ::testing::tuple<T1, T2, T3, T4, T5, T6> >(
- new CartesianProductGenerator6<T1, T2, T3, T4, T5, T6>(
- static_cast<ParamGenerator<T1> >(g1_),
- static_cast<ParamGenerator<T2> >(g2_),
- static_cast<ParamGenerator<T3> >(g3_),
- static_cast<ParamGenerator<T4> >(g4_),
- static_cast<ParamGenerator<T5> >(g5_),
- static_cast<ParamGenerator<T6> >(g6_)));
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const CartesianProductHolder6& other);
-
- const Generator1 g1_;
- const Generator2 g2_;
- const Generator3 g3_;
- const Generator4 g4_;
- const Generator5 g5_;
- const Generator6 g6_;
-}; // class CartesianProductHolder6
-
-template <class Generator1, class Generator2, class Generator3,
- class Generator4, class Generator5, class Generator6, class Generator7>
-class CartesianProductHolder7 {
- public:
-CartesianProductHolder7(const Generator1& g1, const Generator2& g2,
- const Generator3& g3, const Generator4& g4, const Generator5& g5,
- const Generator6& g6, const Generator7& g7)
- : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7) {}
- template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7>
- operator ParamGenerator< ::testing::tuple<T1, T2, T3, T4, T5, T6,
- T7> >() const {
- return ParamGenerator< ::testing::tuple<T1, T2, T3, T4, T5, T6, T7> >(
- new CartesianProductGenerator7<T1, T2, T3, T4, T5, T6, T7>(
- static_cast<ParamGenerator<T1> >(g1_),
- static_cast<ParamGenerator<T2> >(g2_),
- static_cast<ParamGenerator<T3> >(g3_),
- static_cast<ParamGenerator<T4> >(g4_),
- static_cast<ParamGenerator<T5> >(g5_),
- static_cast<ParamGenerator<T6> >(g6_),
- static_cast<ParamGenerator<T7> >(g7_)));
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const CartesianProductHolder7& other);
-
- const Generator1 g1_;
- const Generator2 g2_;
- const Generator3 g3_;
- const Generator4 g4_;
- const Generator5 g5_;
- const Generator6 g6_;
- const Generator7 g7_;
-}; // class CartesianProductHolder7
-
-template <class Generator1, class Generator2, class Generator3,
- class Generator4, class Generator5, class Generator6, class Generator7,
- class Generator8>
-class CartesianProductHolder8 {
- public:
-CartesianProductHolder8(const Generator1& g1, const Generator2& g2,
- const Generator3& g3, const Generator4& g4, const Generator5& g5,
- const Generator6& g6, const Generator7& g7, const Generator8& g8)
- : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7),
- g8_(g8) {}
- template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8>
- operator ParamGenerator< ::testing::tuple<T1, T2, T3, T4, T5, T6, T7,
- T8> >() const {
- return ParamGenerator< ::testing::tuple<T1, T2, T3, T4, T5, T6, T7, T8> >(
- new CartesianProductGenerator8<T1, T2, T3, T4, T5, T6, T7, T8>(
- static_cast<ParamGenerator<T1> >(g1_),
- static_cast<ParamGenerator<T2> >(g2_),
- static_cast<ParamGenerator<T3> >(g3_),
- static_cast<ParamGenerator<T4> >(g4_),
- static_cast<ParamGenerator<T5> >(g5_),
- static_cast<ParamGenerator<T6> >(g6_),
- static_cast<ParamGenerator<T7> >(g7_),
- static_cast<ParamGenerator<T8> >(g8_)));
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const CartesianProductHolder8& other);
-
- const Generator1 g1_;
- const Generator2 g2_;
- const Generator3 g3_;
- const Generator4 g4_;
- const Generator5 g5_;
- const Generator6 g6_;
- const Generator7 g7_;
- const Generator8 g8_;
-}; // class CartesianProductHolder8
-
-template <class Generator1, class Generator2, class Generator3,
- class Generator4, class Generator5, class Generator6, class Generator7,
- class Generator8, class Generator9>
-class CartesianProductHolder9 {
- public:
-CartesianProductHolder9(const Generator1& g1, const Generator2& g2,
- const Generator3& g3, const Generator4& g4, const Generator5& g5,
- const Generator6& g6, const Generator7& g7, const Generator8& g8,
- const Generator9& g9)
- : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), g8_(g8),
- g9_(g9) {}
- template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9>
- operator ParamGenerator< ::testing::tuple<T1, T2, T3, T4, T5, T6, T7, T8,
- T9> >() const {
- return ParamGenerator< ::testing::tuple<T1, T2, T3, T4, T5, T6, T7, T8,
- T9> >(
- new CartesianProductGenerator9<T1, T2, T3, T4, T5, T6, T7, T8, T9>(
- static_cast<ParamGenerator<T1> >(g1_),
- static_cast<ParamGenerator<T2> >(g2_),
- static_cast<ParamGenerator<T3> >(g3_),
- static_cast<ParamGenerator<T4> >(g4_),
- static_cast<ParamGenerator<T5> >(g5_),
- static_cast<ParamGenerator<T6> >(g6_),
- static_cast<ParamGenerator<T7> >(g7_),
- static_cast<ParamGenerator<T8> >(g8_),
- static_cast<ParamGenerator<T9> >(g9_)));
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const CartesianProductHolder9& other);
-
- const Generator1 g1_;
- const Generator2 g2_;
- const Generator3 g3_;
- const Generator4 g4_;
- const Generator5 g5_;
- const Generator6 g6_;
- const Generator7 g7_;
- const Generator8 g8_;
- const Generator9 g9_;
-}; // class CartesianProductHolder9
-
-template <class Generator1, class Generator2, class Generator3,
- class Generator4, class Generator5, class Generator6, class Generator7,
- class Generator8, class Generator9, class Generator10>
-class CartesianProductHolder10 {
- public:
-CartesianProductHolder10(const Generator1& g1, const Generator2& g2,
- const Generator3& g3, const Generator4& g4, const Generator5& g5,
- const Generator6& g6, const Generator7& g7, const Generator8& g8,
- const Generator9& g9, const Generator10& g10)
- : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), g8_(g8),
- g9_(g9), g10_(g10) {}
- template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10>
- operator ParamGenerator< ::testing::tuple<T1, T2, T3, T4, T5, T6, T7, T8, T9,
- T10> >() const {
- return ParamGenerator< ::testing::tuple<T1, T2, T3, T4, T5, T6, T7, T8, T9,
- T10> >(
- new CartesianProductGenerator10<T1, T2, T3, T4, T5, T6, T7, T8, T9,
- T10>(
- static_cast<ParamGenerator<T1> >(g1_),
- static_cast<ParamGenerator<T2> >(g2_),
- static_cast<ParamGenerator<T3> >(g3_),
- static_cast<ParamGenerator<T4> >(g4_),
- static_cast<ParamGenerator<T5> >(g5_),
- static_cast<ParamGenerator<T6> >(g6_),
- static_cast<ParamGenerator<T7> >(g7_),
- static_cast<ParamGenerator<T8> >(g8_),
- static_cast<ParamGenerator<T9> >(g9_),
- static_cast<ParamGenerator<T10> >(g10_)));
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const CartesianProductHolder10& other);
-
- const Generator1 g1_;
- const Generator2 g2_;
- const Generator3 g3_;
- const Generator4 g4_;
- const Generator5 g5_;
- const Generator6 g6_;
- const Generator7 g7_;
- const Generator8 g8_;
- const Generator9 g9_;
- const Generator10 g10_;
-}; // class CartesianProductHolder10
-
-# endif // GTEST_HAS_COMBINE
-
-} // namespace internal
-} // namespace testing
-
-#endif // GTEST_HAS_PARAM_TEST
-
-#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_
diff --git a/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-param-util-generated.h.pump b/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-param-util-generated.h.pump
deleted file mode 100644
index 5c7c47af0..000000000
--- a/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-param-util-generated.h.pump
+++ /dev/null
@@ -1,286 +0,0 @@
-$$ -*- mode: c++; -*-
-$var n = 50 $$ Maximum length of Values arguments we want to support.
-$var maxtuple = 10 $$ Maximum number of Combine arguments we want to support.
-// Copyright 2008 Google Inc.
-// All Rights Reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Author: vladl@google.com (Vlad Losev)
-
-// Type and function utilities for implementing parameterized tests.
-// This file is generated by a SCRIPT. DO NOT EDIT BY HAND!
-//
-// Currently Google Test supports at most $n arguments in Values,
-// and at most $maxtuple arguments in Combine. Please contact
-// googletestframework@googlegroups.com if you need more.
-// Please note that the number of arguments to Combine is limited
-// by the maximum arity of the implementation of tuple which is
-// currently set at $maxtuple.
-
-#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_
-#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_
-
-// scripts/fuse_gtest.py depends on gtest's own header being #included
-// *unconditionally*. Therefore these #includes cannot be moved
-// inside #if GTEST_HAS_PARAM_TEST.
-#include "gtest/internal/gtest-param-util.h"
-#include "gtest/internal/gtest-port.h"
-
-#if GTEST_HAS_PARAM_TEST
-
-namespace testing {
-
-// Forward declarations of ValuesIn(), which is implemented in
-// include/gtest/gtest-param-test.h.
-template <typename ForwardIterator>
-internal::ParamGenerator<
- typename ::testing::internal::IteratorTraits<ForwardIterator>::value_type>
-ValuesIn(ForwardIterator begin, ForwardIterator end);
-
-template <typename T, size_t N>
-internal::ParamGenerator<T> ValuesIn(const T (&array)[N]);
-
-template <class Container>
-internal::ParamGenerator<typename Container::value_type> ValuesIn(
- const Container& container);
-
-namespace internal {
-
-// Used in the Values() function to provide polymorphic capabilities.
-$range i 1..n
-$for i [[
-$range j 1..i
-
-template <$for j, [[typename T$j]]>
-class ValueArray$i {
- public:
- $if i==1 [[explicit ]]ValueArray$i($for j, [[T$j v$j]]) : $for j, [[v$(j)_(v$j)]] {}
-
- template <typename T>
- operator ParamGenerator<T>() const {
- const T array[] = {$for j, [[static_cast<T>(v$(j)_)]]};
- return ValuesIn(array);
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const ValueArray$i& other);
-
-$for j [[
-
- const T$j v$(j)_;
-]]
-
-};
-
-]]
-
-# if GTEST_HAS_COMBINE
-// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
-//
-// Generates values from the Cartesian product of values produced
-// by the argument generators.
-//
-$range i 2..maxtuple
-$for i [[
-$range j 1..i
-$range k 2..i
-
-template <$for j, [[typename T$j]]>
-class CartesianProductGenerator$i
- : public ParamGeneratorInterface< ::testing::tuple<$for j, [[T$j]]> > {
- public:
- typedef ::testing::tuple<$for j, [[T$j]]> ParamType;
-
- CartesianProductGenerator$i($for j, [[const ParamGenerator<T$j>& g$j]])
- : $for j, [[g$(j)_(g$j)]] {}
- virtual ~CartesianProductGenerator$i() {}
-
- virtual ParamIteratorInterface<ParamType>* Begin() const {
- return new Iterator(this, $for j, [[g$(j)_, g$(j)_.begin()]]);
- }
- virtual ParamIteratorInterface<ParamType>* End() const {
- return new Iterator(this, $for j, [[g$(j)_, g$(j)_.end()]]);
- }
-
- private:
- class Iterator : public ParamIteratorInterface<ParamType> {
- public:
- Iterator(const ParamGeneratorInterface<ParamType>* base, $for j, [[
-
- const ParamGenerator<T$j>& g$j,
- const typename ParamGenerator<T$j>::iterator& current$(j)]])
- : base_(base),
-$for j, [[
-
- begin$(j)_(g$j.begin()), end$(j)_(g$j.end()), current$(j)_(current$j)
-]] {
- ComputeCurrentValue();
- }
- virtual ~Iterator() {}
-
- virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
- return base_;
- }
- // Advance should not be called on beyond-of-range iterators
- // so no component iterators must be beyond end of range, either.
- virtual void Advance() {
- assert(!AtEnd());
- ++current$(i)_;
-
-$for k [[
- if (current$(i+2-k)_ == end$(i+2-k)_) {
- current$(i+2-k)_ = begin$(i+2-k)_;
- ++current$(i+2-k-1)_;
- }
-
-]]
- ComputeCurrentValue();
- }
- virtual ParamIteratorInterface<ParamType>* Clone() const {
- return new Iterator(*this);
- }
- virtual const ParamType* Current() const { return &current_value_; }
- virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
- // Having the same base generator guarantees that the other
- // iterator is of the same type and we can downcast.
- GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
- << "The program attempted to compare iterators "
- << "from different generators." << std::endl;
- const Iterator* typed_other =
- CheckedDowncastToActualType<const Iterator>(&other);
- // We must report iterators equal if they both point beyond their
- // respective ranges. That can happen in a variety of fashions,
- // so we have to consult AtEnd().
- return (AtEnd() && typed_other->AtEnd()) ||
- ($for j && [[
-
- current$(j)_ == typed_other->current$(j)_
-]]);
- }
-
- private:
- Iterator(const Iterator& other)
- : base_(other.base_), $for j, [[
-
- begin$(j)_(other.begin$(j)_),
- end$(j)_(other.end$(j)_),
- current$(j)_(other.current$(j)_)
-]] {
- ComputeCurrentValue();
- }
-
- void ComputeCurrentValue() {
- if (!AtEnd())
- current_value_ = ParamType($for j, [[*current$(j)_]]);
- }
- bool AtEnd() const {
- // We must report iterator past the end of the range when either of the
- // component iterators has reached the end of its range.
- return
-$for j || [[
-
- current$(j)_ == end$(j)_
-]];
- }
-
- // No implementation - assignment is unsupported.
- void operator=(const Iterator& other);
-
- const ParamGeneratorInterface<ParamType>* const base_;
- // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
- // current[i]_ is the actual traversing iterator.
-$for j [[
-
- const typename ParamGenerator<T$j>::iterator begin$(j)_;
- const typename ParamGenerator<T$j>::iterator end$(j)_;
- typename ParamGenerator<T$j>::iterator current$(j)_;
-]]
-
- ParamType current_value_;
- }; // class CartesianProductGenerator$i::Iterator
-
- // No implementation - assignment is unsupported.
- void operator=(const CartesianProductGenerator$i& other);
-
-
-$for j [[
- const ParamGenerator<T$j> g$(j)_;
-
-]]
-}; // class CartesianProductGenerator$i
-
-
-]]
-
-// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
-//
-// Helper classes providing Combine() with polymorphic features. They allow
-// casting CartesianProductGeneratorN<T> to ParamGenerator<U> if T is
-// convertible to U.
-//
-$range i 2..maxtuple
-$for i [[
-$range j 1..i
-
-template <$for j, [[class Generator$j]]>
-class CartesianProductHolder$i {
- public:
-CartesianProductHolder$i($for j, [[const Generator$j& g$j]])
- : $for j, [[g$(j)_(g$j)]] {}
- template <$for j, [[typename T$j]]>
- operator ParamGenerator< ::testing::tuple<$for j, [[T$j]]> >() const {
- return ParamGenerator< ::testing::tuple<$for j, [[T$j]]> >(
- new CartesianProductGenerator$i<$for j, [[T$j]]>(
-$for j,[[
-
- static_cast<ParamGenerator<T$j> >(g$(j)_)
-]]));
- }
-
- private:
- // No implementation - assignment is unsupported.
- void operator=(const CartesianProductHolder$i& other);
-
-
-$for j [[
- const Generator$j g$(j)_;
-
-]]
-}; // class CartesianProductHolder$i
-
-]]
-
-# endif // GTEST_HAS_COMBINE
-
-} // namespace internal
-} // namespace testing
-
-#endif // GTEST_HAS_PARAM_TEST
-
-#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_
diff --git a/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-param-util.h b/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-param-util.h
deleted file mode 100644
index 82cab9b02..000000000
--- a/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-param-util.h
+++ /dev/null
@@ -1,731 +0,0 @@
-// Copyright 2008 Google Inc.
-// All Rights Reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Author: vladl@google.com (Vlad Losev)
-
-// Type and function utilities for implementing parameterized tests.
-
-#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_
-#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_
-
-#include <ctype.h>
-
-#include <iterator>
-#include <set>
-#include <utility>
-#include <vector>
-
-// scripts/fuse_gtest.py depends on gtest's own header being #included
-// *unconditionally*. Therefore these #includes cannot be moved
-// inside #if GTEST_HAS_PARAM_TEST.
-#include "gtest/internal/gtest-internal.h"
-#include "gtest/internal/gtest-linked_ptr.h"
-#include "gtest/internal/gtest-port.h"
-#include "gtest/gtest-printers.h"
-
-#if GTEST_HAS_PARAM_TEST
-
-namespace testing {
-
-// Input to a parameterized test name generator, describing a test parameter.
-// Consists of the parameter value and the integer parameter index.
-template <class ParamType>
-struct TestParamInfo {
- TestParamInfo(const ParamType& a_param, size_t an_index) :
- param(a_param),
- index(an_index) {}
- ParamType param;
- size_t index;
-};
-
-// A builtin parameterized test name generator which returns the result of
-// testing::PrintToString.
-struct PrintToStringParamName {
- template <class ParamType>
- std::string operator()(const TestParamInfo<ParamType>& info) const {
- return PrintToString(info.param);
- }
-};
-
-namespace internal {
-
-// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
-//
-// Outputs a message explaining invalid registration of different
-// fixture class for the same test case. This may happen when
-// TEST_P macro is used to define two tests with the same name
-// but in different namespaces.
-GTEST_API_ void ReportInvalidTestCaseType(const char* test_case_name,
- CodeLocation code_location);
-
-template <typename> class ParamGeneratorInterface;
-template <typename> class ParamGenerator;
-
-// Interface for iterating over elements provided by an implementation
-// of ParamGeneratorInterface<T>.
-template <typename T>
-class ParamIteratorInterface {
- public:
- virtual ~ParamIteratorInterface() {}
- // A pointer to the base generator instance.
- // Used only for the purposes of iterator comparison
- // to make sure that two iterators belong to the same generator.
- virtual const ParamGeneratorInterface<T>* BaseGenerator() const = 0;
- // Advances iterator to point to the next element
- // provided by the generator. The caller is responsible
- // for not calling Advance() on an iterator equal to
- // BaseGenerator()->End().
- virtual void Advance() = 0;
- // Clones the iterator object. Used for implementing copy semantics
- // of ParamIterator<T>.
- virtual ParamIteratorInterface* Clone() const = 0;
- // Dereferences the current iterator and provides (read-only) access
- // to the pointed value. It is the caller's responsibility not to call
- // Current() on an iterator equal to BaseGenerator()->End().
- // Used for implementing ParamGenerator<T>::operator*().
- virtual const T* Current() const = 0;
- // Determines whether the given iterator and other point to the same
- // element in the sequence generated by the generator.
- // Used for implementing ParamGenerator<T>::operator==().
- virtual bool Equals(const ParamIteratorInterface& other) const = 0;
-};
-
-// Class iterating over elements provided by an implementation of
-// ParamGeneratorInterface<T>. It wraps ParamIteratorInterface<T>
-// and implements the const forward iterator concept.
-template <typename T>
-class ParamIterator {
- public:
- typedef T value_type;
- typedef const T& reference;
- typedef ptrdiff_t difference_type;
-
- // ParamIterator assumes ownership of the impl_ pointer.
- ParamIterator(const ParamIterator& other) : impl_(other.impl_->Clone()) {}
- ParamIterator& operator=(const ParamIterator& other) {
- if (this != &other)
- impl_.reset(other.impl_->Clone());
- return *this;
- }
-
- const T& operator*() const { return *impl_->Current(); }
- const T* operator->() const { return impl_->Current(); }
- // Prefix version of operator++.
- ParamIterator& operator++() {
- impl_->Advance();
- return *this;
- }
- // Postfix version of operator++.
- ParamIterator operator++(int /*unused*/) {
- ParamIteratorInterface<T>* clone = impl_->Clone();
- impl_->Advance();
- return ParamIterator(clone);
- }
- bool operator==(const ParamIterator& other) const {
- return impl_.get() == other.impl_.get() || impl_->Equals(*other.impl_);
- }
- bool operator!=(const ParamIterator& other) const {
- return !(*this == other);
- }
-
- private:
- friend class ParamGenerator<T>;
- explicit ParamIterator(ParamIteratorInterface<T>* impl) : impl_(impl) {}
- scoped_ptr<ParamIteratorInterface<T> > impl_;
-};
-
-// ParamGeneratorInterface<T> is the binary interface to access generators
-// defined in other translation units.
-template <typename T>
-class ParamGeneratorInterface {
- public:
- typedef T ParamType;
-
- virtual ~ParamGeneratorInterface() {}
-
- // Generator interface definition
- virtual ParamIteratorInterface<T>* Begin() const = 0;
- virtual ParamIteratorInterface<T>* End() const = 0;
-};
-
-// Wraps ParamGeneratorInterface<T> and provides general generator syntax
-// compatible with the STL Container concept.
-// This class implements copy initialization semantics and the contained
-// ParamGeneratorInterface<T> instance is shared among all copies
-// of the original object. This is possible because that instance is immutable.
-template<typename T>
-class ParamGenerator {
- public:
- typedef ParamIterator<T> iterator;
-
- explicit ParamGenerator(ParamGeneratorInterface<T>* impl) : impl_(impl) {}
- ParamGenerator(const ParamGenerator& other) : impl_(other.impl_) {}
-
- ParamGenerator& operator=(const ParamGenerator& other) {
- impl_ = other.impl_;
- return *this;
- }
-
- iterator begin() const { return iterator(impl_->Begin()); }
- iterator end() const { return iterator(impl_->End()); }
-
- private:
- linked_ptr<const ParamGeneratorInterface<T> > impl_;
-};
-
-// Generates values from a range of two comparable values. Can be used to
-// generate sequences of user-defined types that implement operator+() and
-// operator<().
-// This class is used in the Range() function.
-template <typename T, typename IncrementT>
-class RangeGenerator : public ParamGeneratorInterface<T> {
- public:
- RangeGenerator(T begin, T end, IncrementT step)
- : begin_(begin), end_(end),
- step_(step), end_index_(CalculateEndIndex(begin, end, step)) {}
- virtual ~RangeGenerator() {}
-
- virtual ParamIteratorInterface<T>* Begin() const {
- return new Iterator(this, begin_, 0, step_);
- }
- virtual ParamIteratorInterface<T>* End() const {
- return new Iterator(this, end_, end_index_, step_);
- }
-
- private:
- class Iterator : public ParamIteratorInterface<T> {
- public:
- Iterator(const ParamGeneratorInterface<T>* base, T value, int index,
- IncrementT step)
- : base_(base), value_(value), index_(index), step_(step) {}
- virtual ~Iterator() {}
-
- virtual const ParamGeneratorInterface<T>* BaseGenerator() const {
- return base_;
- }
- virtual void Advance() {
- value_ = static_cast<T>(value_ + step_);
- index_++;
- }
- virtual ParamIteratorInterface<T>* Clone() const {
- return new Iterator(*this);
- }
- virtual const T* Current() const { return &value_; }
- virtual bool Equals(const ParamIteratorInterface<T>& other) const {
- // Having the same base generator guarantees that the other
- // iterator is of the same type and we can downcast.
- GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
- << "The program attempted to compare iterators "
- << "from different generators." << std::endl;
- const int other_index =
- CheckedDowncastToActualType<const Iterator>(&other)->index_;
- return index_ == other_index;
- }
-
- private:
- Iterator(const Iterator& other)
- : ParamIteratorInterface<T>(),
- base_(other.base_), value_(other.value_), index_(other.index_),
- step_(other.step_) {}
-
- // No implementation - assignment is unsupported.
- void operator=(const Iterator& other);
-
- const ParamGeneratorInterface<T>* const base_;
- T value_;
- int index_;
- const IncrementT step_;
- }; // class RangeGenerator::Iterator
-
- static int CalculateEndIndex(const T& begin,
- const T& end,
- const IncrementT& step) {
- int end_index = 0;
- for (T i = begin; i < end; i = static_cast<T>(i + step))
- end_index++;
- return end_index;
- }
-
- // No implementation - assignment is unsupported.
- void operator=(const RangeGenerator& other);
-
- const T begin_;
- const T end_;
- const IncrementT step_;
- // The index for the end() iterator. All the elements in the generated
- // sequence are indexed (0-based) to aid iterator comparison.
- const int end_index_;
-}; // class RangeGenerator
-
-
-// Generates values from a pair of STL-style iterators. Used in the
-// ValuesIn() function. The elements are copied from the source range
-// since the source can be located on the stack, and the generator
-// is likely to persist beyond that stack frame.
-template <typename T>
-class ValuesInIteratorRangeGenerator : public ParamGeneratorInterface<T> {
- public:
- template <typename ForwardIterator>
- ValuesInIteratorRangeGenerator(ForwardIterator begin, ForwardIterator end)
- : container_(begin, end) {}
- virtual ~ValuesInIteratorRangeGenerator() {}
-
- virtual ParamIteratorInterface<T>* Begin() const {
- return new Iterator(this, container_.begin());
- }
- virtual ParamIteratorInterface<T>* End() const {
- return new Iterator(this, container_.end());
- }
-
- private:
- typedef typename ::std::vector<T> ContainerType;
-
- class Iterator : public ParamIteratorInterface<T> {
- public:
- Iterator(const ParamGeneratorInterface<T>* base,
- typename ContainerType::const_iterator iterator)
- : base_(base), iterator_(iterator) {}
- virtual ~Iterator() {}
-
- virtual const ParamGeneratorInterface<T>* BaseGenerator() const {
- return base_;
- }
- virtual void Advance() {
- ++iterator_;
- value_.reset();
- }
- virtual ParamIteratorInterface<T>* Clone() const {
- return new Iterator(*this);
- }
- // We need to use cached value referenced by iterator_ because *iterator_
- // can return a temporary object (and of type other then T), so just
- // having "return &*iterator_;" doesn't work.
- // value_ is updated here and not in Advance() because Advance()
- // can advance iterator_ beyond the end of the range, and we cannot
- // detect that fact. The client code, on the other hand, is
- // responsible for not calling Current() on an out-of-range iterator.
- virtual const T* Current() const {
- if (value_.get() == NULL)
- value_.reset(new T(*iterator_));
- return value_.get();
- }
- virtual bool Equals(const ParamIteratorInterface<T>& other) const {
- // Having the same base generator guarantees that the other
- // iterator is of the same type and we can downcast.
- GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
- << "The program attempted to compare iterators "
- << "from different generators." << std::endl;
- return iterator_ ==
- CheckedDowncastToActualType<const Iterator>(&other)->iterator_;
- }
-
- private:
- Iterator(const Iterator& other)
- // The explicit constructor call suppresses a false warning
- // emitted by gcc when supplied with the -Wextra option.
- : ParamIteratorInterface<T>(),
- base_(other.base_),
- iterator_(other.iterator_) {}
-
- const ParamGeneratorInterface<T>* const base_;
- typename ContainerType::const_iterator iterator_;
- // A cached value of *iterator_. We keep it here to allow access by
- // pointer in the wrapping iterator's operator->().
- // value_ needs to be mutable to be accessed in Current().
- // Use of scoped_ptr helps manage cached value's lifetime,
- // which is bound by the lifespan of the iterator itself.
- mutable scoped_ptr<const T> value_;
- }; // class ValuesInIteratorRangeGenerator::Iterator
-
- // No implementation - assignment is unsupported.
- void operator=(const ValuesInIteratorRangeGenerator& other);
-
- const ContainerType container_;
-}; // class ValuesInIteratorRangeGenerator
-
-// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
-//
-// Default parameterized test name generator, returns a string containing the
-// integer test parameter index.
-template <class ParamType>
-std::string DefaultParamName(const TestParamInfo<ParamType>& info) {
- Message name_stream;
- name_stream << info.index;
- return name_stream.GetString();
-}
-
-// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
-//
-// Parameterized test name overload helpers, which help the
-// INSTANTIATE_TEST_CASE_P macro choose between the default parameterized
-// test name generator and user param name generator.
-template <class ParamType, class ParamNameGenFunctor>
-ParamNameGenFunctor GetParamNameGen(ParamNameGenFunctor func) {
- return func;
-}
-
-template <class ParamType>
-struct ParamNameGenFunc {
- typedef std::string Type(const TestParamInfo<ParamType>&);
-};
-
-template <class ParamType>
-typename ParamNameGenFunc<ParamType>::Type *GetParamNameGen() {
- return DefaultParamName;
-}
-
-// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
-//
-// Stores a parameter value and later creates tests parameterized with that
-// value.
-template <class TestClass>
-class ParameterizedTestFactory : public TestFactoryBase {
- public:
- typedef typename TestClass::ParamType ParamType;
- explicit ParameterizedTestFactory(ParamType parameter) :
- parameter_(parameter) {}
- virtual Test* CreateTest() {
- TestClass::SetParam(&parameter_);
- return new TestClass();
- }
-
- private:
- const ParamType parameter_;
-
- GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestFactory);
-};
-
-// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
-//
-// TestMetaFactoryBase is a base class for meta-factories that create
-// test factories for passing into MakeAndRegisterTestInfo function.
-template <class ParamType>
-class TestMetaFactoryBase {
- public:
- virtual ~TestMetaFactoryBase() {}
-
- virtual TestFactoryBase* CreateTestFactory(ParamType parameter) = 0;
-};
-
-// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
-//
-// TestMetaFactory creates test factories for passing into
-// MakeAndRegisterTestInfo function. Since MakeAndRegisterTestInfo receives
-// ownership of test factory pointer, same factory object cannot be passed
-// into that method twice. But ParameterizedTestCaseInfo is going to call
-// it for each Test/Parameter value combination. Thus it needs meta factory
-// creator class.
-template <class TestCase>
-class TestMetaFactory
- : public TestMetaFactoryBase<typename TestCase::ParamType> {
- public:
- typedef typename TestCase::ParamType ParamType;
-
- TestMetaFactory() {}
-
- virtual TestFactoryBase* CreateTestFactory(ParamType parameter) {
- return new ParameterizedTestFactory<TestCase>(parameter);
- }
-
- private:
- GTEST_DISALLOW_COPY_AND_ASSIGN_(TestMetaFactory);
-};
-
-// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
-//
-// ParameterizedTestCaseInfoBase is a generic interface
-// to ParameterizedTestCaseInfo classes. ParameterizedTestCaseInfoBase
-// accumulates test information provided by TEST_P macro invocations
-// and generators provided by INSTANTIATE_TEST_CASE_P macro invocations
-// and uses that information to register all resulting test instances
-// in RegisterTests method. The ParameterizeTestCaseRegistry class holds
-// a collection of pointers to the ParameterizedTestCaseInfo objects
-// and calls RegisterTests() on each of them when asked.
-class ParameterizedTestCaseInfoBase {
- public:
- virtual ~ParameterizedTestCaseInfoBase() {}
-
- // Base part of test case name for display purposes.
- virtual const string& GetTestCaseName() const = 0;
- // Test case id to verify identity.
- virtual TypeId GetTestCaseTypeId() const = 0;
- // UnitTest class invokes this method to register tests in this
- // test case right before running them in RUN_ALL_TESTS macro.
- // This method should not be called more then once on any single
- // instance of a ParameterizedTestCaseInfoBase derived class.
- virtual void RegisterTests() = 0;
-
- protected:
- ParameterizedTestCaseInfoBase() {}
-
- private:
- GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestCaseInfoBase);
-};
-
-// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
-//
-// ParameterizedTestCaseInfo accumulates tests obtained from TEST_P
-// macro invocations for a particular test case and generators
-// obtained from INSTANTIATE_TEST_CASE_P macro invocations for that
-// test case. It registers tests with all values generated by all
-// generators when asked.
-template <class TestCase>
-class ParameterizedTestCaseInfo : public ParameterizedTestCaseInfoBase {
- public:
- // ParamType and GeneratorCreationFunc are private types but are required
- // for declarations of public methods AddTestPattern() and
- // AddTestCaseInstantiation().
- typedef typename TestCase::ParamType ParamType;
- // A function that returns an instance of appropriate generator type.
- typedef ParamGenerator<ParamType>(GeneratorCreationFunc)();
- typedef typename ParamNameGenFunc<ParamType>::Type ParamNameGeneratorFunc;
-
- explicit ParameterizedTestCaseInfo(
- const char* name, CodeLocation code_location)
- : test_case_name_(name), code_location_(code_location) {}
-
- // Test case base name for display purposes.
- virtual const string& GetTestCaseName() const { return test_case_name_; }
- // Test case id to verify identity.
- virtual TypeId GetTestCaseTypeId() const { return GetTypeId<TestCase>(); }
- // TEST_P macro uses AddTestPattern() to record information
- // about a single test in a LocalTestInfo structure.
- // test_case_name is the base name of the test case (without invocation
- // prefix). test_base_name is the name of an individual test without
- // parameter index. For the test SequenceA/FooTest.DoBar/1 FooTest is
- // test case base name and DoBar is test base name.
- void AddTestPattern(const char* test_case_name,
- const char* test_base_name,
- TestMetaFactoryBase<ParamType>* meta_factory) {
- tests_.push_back(linked_ptr<TestInfo>(new TestInfo(test_case_name,
- test_base_name,
- meta_factory)));
- }
- // INSTANTIATE_TEST_CASE_P macro uses AddGenerator() to record information
- // about a generator.
- int AddTestCaseInstantiation(const string& instantiation_name,
- GeneratorCreationFunc* func,
- ParamNameGeneratorFunc* name_func,
- const char* file,
- int line) {
- instantiations_.push_back(
- InstantiationInfo(instantiation_name, func, name_func, file, line));
- return 0; // Return value used only to run this method in namespace scope.
- }
- // UnitTest class invokes this method to register tests in this test case
- // test cases right before running tests in RUN_ALL_TESTS macro.
- // This method should not be called more then once on any single
- // instance of a ParameterizedTestCaseInfoBase derived class.
- // UnitTest has a guard to prevent from calling this method more then once.
- virtual void RegisterTests() {
- for (typename TestInfoContainer::iterator test_it = tests_.begin();
- test_it != tests_.end(); ++test_it) {
- linked_ptr<TestInfo> test_info = *test_it;
- for (typename InstantiationContainer::iterator gen_it =
- instantiations_.begin(); gen_it != instantiations_.end();
- ++gen_it) {
- const string& instantiation_name = gen_it->name;
- ParamGenerator<ParamType> generator((*gen_it->generator)());
- ParamNameGeneratorFunc* name_func = gen_it->name_func;
- const char* file = gen_it->file;
- int line = gen_it->line;
-
- string test_case_name;
- if ( !instantiation_name.empty() )
- test_case_name = instantiation_name + "/";
- test_case_name += test_info->test_case_base_name;
-
- size_t i = 0;
- std::set<std::string> test_param_names;
- for (typename ParamGenerator<ParamType>::iterator param_it =
- generator.begin();
- param_it != generator.end(); ++param_it, ++i) {
- Message test_name_stream;
-
- std::string param_name = name_func(
- TestParamInfo<ParamType>(*param_it, i));
-
- GTEST_CHECK_(IsValidParamName(param_name))
- << "Parameterized test name '" << param_name
- << "' is invalid, in " << file
- << " line " << line << std::endl;
-
- GTEST_CHECK_(test_param_names.count(param_name) == 0)
- << "Duplicate parameterized test name '" << param_name
- << "', in " << file << " line " << line << std::endl;
-
- test_param_names.insert(param_name);
-
- test_name_stream << test_info->test_base_name << "/" << param_name;
- MakeAndRegisterTestInfo(
- test_case_name.c_str(),
- test_name_stream.GetString().c_str(),
- NULL, // No type parameter.
- PrintToString(*param_it).c_str(),
- code_location_,
- GetTestCaseTypeId(),
- TestCase::SetUpTestCase,
- TestCase::TearDownTestCase,
- test_info->test_meta_factory->CreateTestFactory(*param_it));
- } // for param_it
- } // for gen_it
- } // for test_it
- } // RegisterTests
-
- private:
- // LocalTestInfo structure keeps information about a single test registered
- // with TEST_P macro.
- struct TestInfo {
- TestInfo(const char* a_test_case_base_name,
- const char* a_test_base_name,
- TestMetaFactoryBase<ParamType>* a_test_meta_factory) :
- test_case_base_name(a_test_case_base_name),
- test_base_name(a_test_base_name),
- test_meta_factory(a_test_meta_factory) {}
-
- const string test_case_base_name;
- const string test_base_name;
- const scoped_ptr<TestMetaFactoryBase<ParamType> > test_meta_factory;
- };
- typedef ::std::vector<linked_ptr<TestInfo> > TestInfoContainer;
- // Records data received from INSTANTIATE_TEST_CASE_P macros:
- // <Instantiation name, Sequence generator creation function,
- // Name generator function, Source file, Source line>
- struct InstantiationInfo {
- InstantiationInfo(const std::string &name_in,
- GeneratorCreationFunc* generator_in,
- ParamNameGeneratorFunc* name_func_in,
- const char* file_in,
- int line_in)
- : name(name_in),
- generator(generator_in),
- name_func(name_func_in),
- file(file_in),
- line(line_in) {}
-
- std::string name;
- GeneratorCreationFunc* generator;
- ParamNameGeneratorFunc* name_func;
- const char* file;
- int line;
- };
- typedef ::std::vector<InstantiationInfo> InstantiationContainer;
-
- static bool IsValidParamName(const std::string& name) {
- // Check for empty string
- if (name.empty())
- return false;
-
- // Check for invalid characters
- for (std::string::size_type index = 0; index < name.size(); ++index) {
- if (!isalnum(name[index]) && name[index] != '_')
- return false;
- }
-
- return true;
- }
-
- const string test_case_name_;
- CodeLocation code_location_;
- TestInfoContainer tests_;
- InstantiationContainer instantiations_;
-
- GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestCaseInfo);
-}; // class ParameterizedTestCaseInfo
-
-// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
-//
-// ParameterizedTestCaseRegistry contains a map of ParameterizedTestCaseInfoBase
-// classes accessed by test case names. TEST_P and INSTANTIATE_TEST_CASE_P
-// macros use it to locate their corresponding ParameterizedTestCaseInfo
-// descriptors.
-class ParameterizedTestCaseRegistry {
- public:
- ParameterizedTestCaseRegistry() {}
- ~ParameterizedTestCaseRegistry() {
- for (TestCaseInfoContainer::iterator it = test_case_infos_.begin();
- it != test_case_infos_.end(); ++it) {
- delete *it;
- }
- }
-
- // Looks up or creates and returns a structure containing information about
- // tests and instantiations of a particular test case.
- template <class TestCase>
- ParameterizedTestCaseInfo<TestCase>* GetTestCasePatternHolder(
- const char* test_case_name,
- CodeLocation code_location) {
- ParameterizedTestCaseInfo<TestCase>* typed_test_info = NULL;
- for (TestCaseInfoContainer::iterator it = test_case_infos_.begin();
- it != test_case_infos_.end(); ++it) {
- if ((*it)->GetTestCaseName() == test_case_name) {
- if ((*it)->GetTestCaseTypeId() != GetTypeId<TestCase>()) {
- // Complain about incorrect usage of Google Test facilities
- // and terminate the program since we cannot guaranty correct
- // test case setup and tear-down in this case.
- ReportInvalidTestCaseType(test_case_name, code_location);
- posix::Abort();
- } else {
- // At this point we are sure that the object we found is of the same
- // type we are looking for, so we downcast it to that type
- // without further checks.
- typed_test_info = CheckedDowncastToActualType<
- ParameterizedTestCaseInfo<TestCase> >(*it);
- }
- break;
- }
- }
- if (typed_test_info == NULL) {
- typed_test_info = new ParameterizedTestCaseInfo<TestCase>(
- test_case_name, code_location);
- test_case_infos_.push_back(typed_test_info);
- }
- return typed_test_info;
- }
- void RegisterTests() {
- for (TestCaseInfoContainer::iterator it = test_case_infos_.begin();
- it != test_case_infos_.end(); ++it) {
- (*it)->RegisterTests();
- }
- }
-
- private:
- typedef ::std::vector<ParameterizedTestCaseInfoBase*> TestCaseInfoContainer;
-
- TestCaseInfoContainer test_case_infos_;
-
- GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestCaseRegistry);
-};
-
-} // namespace internal
-} // namespace testing
-
-#endif // GTEST_HAS_PARAM_TEST
-
-#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_
diff --git a/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-port-arch.h b/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-port-arch.h
deleted file mode 100644
index 74ab94905..000000000
--- a/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-port-arch.h
+++ /dev/null
@@ -1,93 +0,0 @@
-// Copyright 2015, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// The Google C++ Testing Framework (Google Test)
-//
-// This header file defines the GTEST_OS_* macro.
-// It is separate from gtest-port.h so that custom/gtest-port.h can include it.
-
-#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_ARCH_H_
-#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_ARCH_H_
-
-// Determines the platform on which Google Test is compiled.
-#ifdef __CYGWIN__
-# define GTEST_OS_CYGWIN 1
-#elif defined __SYMBIAN32__
-# define GTEST_OS_SYMBIAN 1
-#elif defined _WIN32
-# define GTEST_OS_WINDOWS 1
-# ifdef _WIN32_WCE
-# define GTEST_OS_WINDOWS_MOBILE 1
-# elif defined(__MINGW__) || defined(__MINGW32__)
-# define GTEST_OS_WINDOWS_MINGW 1
-# elif defined(WINAPI_FAMILY)
-# include <winapifamily.h>
-# if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
-# define GTEST_OS_WINDOWS_DESKTOP 1
-# elif WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_PHONE_APP)
-# define GTEST_OS_WINDOWS_PHONE 1
-# elif WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP)
-# define GTEST_OS_WINDOWS_RT 1
-# else
- // WINAPI_FAMILY defined but no known partition matched.
- // Default to desktop.
-# define GTEST_OS_WINDOWS_DESKTOP 1
-# endif
-# else
-# define GTEST_OS_WINDOWS_DESKTOP 1
-# endif // _WIN32_WCE
-#elif defined __APPLE__
-# define GTEST_OS_MAC 1
-# if TARGET_OS_IPHONE
-# define GTEST_OS_IOS 1
-# endif
-#elif defined __FreeBSD__
-# define GTEST_OS_FREEBSD 1
-#elif defined __linux__
-# define GTEST_OS_LINUX 1
-# if defined __ANDROID__
-# define GTEST_OS_LINUX_ANDROID 1
-# endif
-#elif defined __MVS__
-# define GTEST_OS_ZOS 1
-#elif defined(__sun) && defined(__SVR4)
-# define GTEST_OS_SOLARIS 1
-#elif defined(_AIX)
-# define GTEST_OS_AIX 1
-#elif defined(__hpux)
-# define GTEST_OS_HPUX 1
-#elif defined __native_client__
-# define GTEST_OS_NACL 1
-#elif defined __OpenBSD__
-# define GTEST_OS_OPENBSD 1
-#elif defined __QNX__
-# define GTEST_OS_QNX 1
-#endif // __CYGWIN__
-
-#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_ARCH_H_
diff --git a/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-port.h b/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-port.h
deleted file mode 100644
index da57e65d3..000000000
--- a/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-port.h
+++ /dev/null
@@ -1,2567 +0,0 @@
-// Copyright 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Authors: wan@google.com (Zhanyong Wan)
-//
-// Low-level types and utilities for porting Google Test to various
-// platforms. All macros ending with _ and symbols defined in an
-// internal namespace are subject to change without notice. Code
-// outside Google Test MUST NOT USE THEM DIRECTLY. Macros that don't
-// end with _ are part of Google Test's public API and can be used by
-// code outside Google Test.
-//
-// This file is fundamental to Google Test. All other Google Test source
-// files are expected to #include this. Therefore, it cannot #include
-// any other Google Test header.
-
-#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_
-#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_
-
-// Environment-describing macros
-// -----------------------------
-//
-// Google Test can be used in many different environments. Macros in
-// this section tell Google Test what kind of environment it is being
-// used in, such that Google Test can provide environment-specific
-// features and implementations.
-//
-// Google Test tries to automatically detect the properties of its
-// environment, so users usually don't need to worry about these
-// macros. However, the automatic detection is not perfect.
-// Sometimes it's necessary for a user to define some of the following
-// macros in the build script to override Google Test's decisions.
-//
-// If the user doesn't define a macro in the list, Google Test will
-// provide a default definition. After this header is #included, all
-// macros in this list will be defined to either 1 or 0.
-//
-// Notes to maintainers:
-// - Each macro here is a user-tweakable knob; do not grow the list
-// lightly.
-// - Use #if to key off these macros. Don't use #ifdef or "#if
-// defined(...)", which will not work as these macros are ALWAYS
-// defined.
-//
-// GTEST_HAS_CLONE - Define it to 1/0 to indicate that clone(2)
-// is/isn't available.
-// GTEST_HAS_EXCEPTIONS - Define it to 1/0 to indicate that exceptions
-// are enabled.
-// GTEST_HAS_GLOBAL_STRING - Define it to 1/0 to indicate that ::string
-// is/isn't available (some systems define
-// ::string, which is different to std::string).
-// GTEST_HAS_GLOBAL_WSTRING - Define it to 1/0 to indicate that ::string
-// is/isn't available (some systems define
-// ::wstring, which is different to std::wstring).
-// GTEST_HAS_POSIX_RE - Define it to 1/0 to indicate that POSIX regular
-// expressions are/aren't available.
-// GTEST_HAS_PTHREAD - Define it to 1/0 to indicate that <pthread.h>
-// is/isn't available.
-// GTEST_HAS_RTTI - Define it to 1/0 to indicate that RTTI is/isn't
-// enabled.
-// GTEST_HAS_STD_WSTRING - Define it to 1/0 to indicate that
-// std::wstring does/doesn't work (Google Test can
-// be used where std::wstring is unavailable).
-// GTEST_HAS_TR1_TUPLE - Define it to 1/0 to indicate tr1::tuple
-// is/isn't available.
-// GTEST_HAS_SEH - Define it to 1/0 to indicate whether the
-// compiler supports Microsoft's "Structured
-// Exception Handling".
-// GTEST_HAS_STREAM_REDIRECTION
-// - Define it to 1/0 to indicate whether the
-// platform supports I/O stream redirection using
-// dup() and dup2().
-// GTEST_USE_OWN_TR1_TUPLE - Define it to 1/0 to indicate whether Google
-// Test's own tr1 tuple implementation should be
-// used. Unused when the user sets
-// GTEST_HAS_TR1_TUPLE to 0.
-// GTEST_LANG_CXX11 - Define it to 1/0 to indicate that Google Test
-// is building in C++11/C++98 mode.
-// GTEST_LINKED_AS_SHARED_LIBRARY
-// - Define to 1 when compiling tests that use
-// Google Test as a shared library (known as
-// DLL on Windows).
-// GTEST_CREATE_SHARED_LIBRARY
-// - Define to 1 when compiling Google Test itself
-// as a shared library.
-
-// Platform-indicating macros
-// --------------------------
-//
-// Macros indicating the platform on which Google Test is being used
-// (a macro is defined to 1 if compiled on the given platform;
-// otherwise UNDEFINED -- it's never defined to 0.). Google Test
-// defines these macros automatically. Code outside Google Test MUST
-// NOT define them.
-//
-// GTEST_OS_AIX - IBM AIX
-// GTEST_OS_CYGWIN - Cygwin
-// GTEST_OS_FREEBSD - FreeBSD
-// GTEST_OS_HPUX - HP-UX
-// GTEST_OS_LINUX - Linux
-// GTEST_OS_LINUX_ANDROID - Google Android
-// GTEST_OS_MAC - Mac OS X
-// GTEST_OS_IOS - iOS
-// GTEST_OS_NACL - Google Native Client (NaCl)
-// GTEST_OS_OPENBSD - OpenBSD
-// GTEST_OS_QNX - QNX
-// GTEST_OS_SOLARIS - Sun Solaris
-// GTEST_OS_SYMBIAN - Symbian
-// GTEST_OS_WINDOWS - Windows (Desktop, MinGW, or Mobile)
-// GTEST_OS_WINDOWS_DESKTOP - Windows Desktop
-// GTEST_OS_WINDOWS_MINGW - MinGW
-// GTEST_OS_WINDOWS_MOBILE - Windows Mobile
-// GTEST_OS_WINDOWS_PHONE - Windows Phone
-// GTEST_OS_WINDOWS_RT - Windows Store App/WinRT
-// GTEST_OS_ZOS - z/OS
-//
-// Among the platforms, Cygwin, Linux, Max OS X, and Windows have the
-// most stable support. Since core members of the Google Test project
-// don't have access to other platforms, support for them may be less
-// stable. If you notice any problems on your platform, please notify
-// googletestframework@googlegroups.com (patches for fixing them are
-// even more welcome!).
-//
-// It is possible that none of the GTEST_OS_* macros are defined.
-
-// Feature-indicating macros
-// -------------------------
-//
-// Macros indicating which Google Test features are available (a macro
-// is defined to 1 if the corresponding feature is supported;
-// otherwise UNDEFINED -- it's never defined to 0.). Google Test
-// defines these macros automatically. Code outside Google Test MUST
-// NOT define them.
-//
-// These macros are public so that portable tests can be written.
-// Such tests typically surround code using a feature with an #if
-// which controls that code. For example:
-//
-// #if GTEST_HAS_DEATH_TEST
-// EXPECT_DEATH(DoSomethingDeadly());
-// #endif
-//
-// GTEST_HAS_COMBINE - the Combine() function (for value-parameterized
-// tests)
-// GTEST_HAS_DEATH_TEST - death tests
-// GTEST_HAS_PARAM_TEST - value-parameterized tests
-// GTEST_HAS_TYPED_TEST - typed tests
-// GTEST_HAS_TYPED_TEST_P - type-parameterized tests
-// GTEST_IS_THREADSAFE - Google Test is thread-safe.
-// GTEST_USES_POSIX_RE - enhanced POSIX regex is used. Do not confuse with
-// GTEST_HAS_POSIX_RE (see above) which users can
-// define themselves.
-// GTEST_USES_SIMPLE_RE - our own simple regex is used;
-// the above two are mutually exclusive.
-// GTEST_CAN_COMPARE_NULL - accepts untyped NULL in EXPECT_EQ().
-
-// Misc public macros
-// ------------------
-//
-// GTEST_FLAG(flag_name) - references the variable corresponding to
-// the given Google Test flag.
-
-// Internal utilities
-// ------------------
-//
-// The following macros and utilities are for Google Test's INTERNAL
-// use only. Code outside Google Test MUST NOT USE THEM DIRECTLY.
-//
-// Macros for basic C++ coding:
-// GTEST_AMBIGUOUS_ELSE_BLOCKER_ - for disabling a gcc warning.
-// GTEST_ATTRIBUTE_UNUSED_ - declares that a class' instances or a
-// variable don't have to be used.
-// GTEST_DISALLOW_ASSIGN_ - disables operator=.
-// GTEST_DISALLOW_COPY_AND_ASSIGN_ - disables copy ctor and operator=.
-// GTEST_MUST_USE_RESULT_ - declares that a function's result must be used.
-// GTEST_INTENTIONAL_CONST_COND_PUSH_ - start code section where MSVC C4127 is
-// suppressed (constant conditional).
-// GTEST_INTENTIONAL_CONST_COND_POP_ - finish code section where MSVC C4127
-// is suppressed.
-//
-// C++11 feature wrappers:
-//
-// testing::internal::move - portability wrapper for std::move.
-//
-// Synchronization:
-// Mutex, MutexLock, ThreadLocal, GetThreadCount()
-// - synchronization primitives.
-//
-// Template meta programming:
-// is_pointer - as in TR1; needed on Symbian and IBM XL C/C++ only.
-// IteratorTraits - partial implementation of std::iterator_traits, which
-// is not available in libCstd when compiled with Sun C++.
-//
-// Smart pointers:
-// scoped_ptr - as in TR2.
-//
-// Regular expressions:
-// RE - a simple regular expression class using the POSIX
-// Extended Regular Expression syntax on UNIX-like
-// platforms, or a reduced regular exception syntax on
-// other platforms, including Windows.
-//
-// Logging:
-// GTEST_LOG_() - logs messages at the specified severity level.
-// LogToStderr() - directs all log messages to stderr.
-// FlushInfoLog() - flushes informational log messages.
-//
-// Stdout and stderr capturing:
-// CaptureStdout() - starts capturing stdout.
-// GetCapturedStdout() - stops capturing stdout and returns the captured
-// string.
-// CaptureStderr() - starts capturing stderr.
-// GetCapturedStderr() - stops capturing stderr and returns the captured
-// string.
-//
-// Integer types:
-// TypeWithSize - maps an integer to a int type.
-// Int32, UInt32, Int64, UInt64, TimeInMillis
-// - integers of known sizes.
-// BiggestInt - the biggest signed integer type.
-//
-// Command-line utilities:
-// GTEST_DECLARE_*() - declares a flag.
-// GTEST_DEFINE_*() - defines a flag.
-// GetInjectableArgvs() - returns the command line as a vector of strings.
-//
-// Environment variable utilities:
-// GetEnv() - gets the value of an environment variable.
-// BoolFromGTestEnv() - parses a bool environment variable.
-// Int32FromGTestEnv() - parses an Int32 environment variable.
-// StringFromGTestEnv() - parses a string environment variable.
-
-#include <ctype.h> // for isspace, etc
-#include <stddef.h> // for ptrdiff_t
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#ifndef _WIN32_WCE
-# include <sys/types.h>
-# include <sys/stat.h>
-#endif // !_WIN32_WCE
-
-#if defined __APPLE__
-# include <AvailabilityMacros.h>
-# include <TargetConditionals.h>
-#endif
-
-#include <algorithm> // NOLINT
-#include <iostream> // NOLINT
-#include <sstream> // NOLINT
-#include <string> // NOLINT
-#include <utility>
-#include <vector> // NOLINT
-
-#include "gtest/internal/gtest-port-arch.h"
-#include "gtest/internal/custom/gtest-port.h"
-
-#if !defined(GTEST_DEV_EMAIL_)
-# define GTEST_DEV_EMAIL_ "googletestframework@@googlegroups.com"
-# define GTEST_FLAG_PREFIX_ "gtest_"
-# define GTEST_FLAG_PREFIX_DASH_ "gtest-"
-# define GTEST_FLAG_PREFIX_UPPER_ "GTEST_"
-# define GTEST_NAME_ "Google Test"
-# define GTEST_PROJECT_URL_ "https://github.com/google/googletest/"
-#endif // !defined(GTEST_DEV_EMAIL_)
-
-#if !defined(GTEST_INIT_GOOGLE_TEST_NAME_)
-# define GTEST_INIT_GOOGLE_TEST_NAME_ "testing::InitGoogleTest"
-#endif // !defined(GTEST_INIT_GOOGLE_TEST_NAME_)
-
-// Determines the version of gcc that is used to compile this.
-#ifdef __GNUC__
-// 40302 means version 4.3.2.
-# define GTEST_GCC_VER_ \
- (__GNUC__*10000 + __GNUC_MINOR__*100 + __GNUC_PATCHLEVEL__)
-#endif // __GNUC__
-
-// Macros for disabling Microsoft Visual C++ warnings.
-//
-// GTEST_DISABLE_MSC_WARNINGS_PUSH_(4800 4385)
-// /* code that triggers warnings C4800 and C4385 */
-// GTEST_DISABLE_MSC_WARNINGS_POP_()
-#if _MSC_VER >= 1500
-# define GTEST_DISABLE_MSC_WARNINGS_PUSH_(warnings) \
- __pragma(warning(push)) \
- __pragma(warning(disable: warnings))
-# define GTEST_DISABLE_MSC_WARNINGS_POP_() \
- __pragma(warning(pop))
-#else
-// Older versions of MSVC don't have __pragma.
-# define GTEST_DISABLE_MSC_WARNINGS_PUSH_(warnings)
-# define GTEST_DISABLE_MSC_WARNINGS_POP_()
-#endif
-
-#ifndef GTEST_LANG_CXX11
-// gcc and clang define __GXX_EXPERIMENTAL_CXX0X__ when
-// -std={c,gnu}++{0x,11} is passed. The C++11 standard specifies a
-// value for __cplusplus, and recent versions of clang, gcc, and
-// probably other compilers set that too in C++11 mode.
-# if __GXX_EXPERIMENTAL_CXX0X__ || __cplusplus >= 201103L
-// Compiling in at least C++11 mode.
-# define GTEST_LANG_CXX11 1
-# else
-# define GTEST_LANG_CXX11 0
-# endif
-#endif
-
-// Distinct from C++11 language support, some environments don't provide
-// proper C++11 library support. Notably, it's possible to build in
-// C++11 mode when targeting Mac OS X 10.6, which has an old libstdc++
-// with no C++11 support.
-//
-// libstdc++ has sufficient C++11 support as of GCC 4.6.0, __GLIBCXX__
-// 20110325, but maintenance releases in the 4.4 and 4.5 series followed
-// this date, so check for those versions by their date stamps.
-// https://gcc.gnu.org/onlinedocs/libstdc++/manual/abi.html#abi.versioning
-#if GTEST_LANG_CXX11 && \
- (!defined(__GLIBCXX__) || ( \
- __GLIBCXX__ >= 20110325ul && /* GCC >= 4.6.0 */ \
- /* Blacklist of patch releases of older branches: */ \
- __GLIBCXX__ != 20110416ul && /* GCC 4.4.6 */ \
- __GLIBCXX__ != 20120313ul && /* GCC 4.4.7 */ \
- __GLIBCXX__ != 20110428ul && /* GCC 4.5.3 */ \
- __GLIBCXX__ != 20120702ul)) /* GCC 4.5.4 */
-# define GTEST_STDLIB_CXX11 1
-#endif
-
-// Only use C++11 library features if the library provides them.
-#if GTEST_STDLIB_CXX11
-# define GTEST_HAS_STD_BEGIN_AND_END_ 1
-# define GTEST_HAS_STD_FORWARD_LIST_ 1
-# define GTEST_HAS_STD_FUNCTION_ 1
-# define GTEST_HAS_STD_INITIALIZER_LIST_ 1
-# define GTEST_HAS_STD_MOVE_ 1
-# define GTEST_HAS_STD_SHARED_PTR_ 1
-# define GTEST_HAS_STD_TYPE_TRAITS_ 1
-# define GTEST_HAS_STD_UNIQUE_PTR_ 1
-#endif
-
-// C++11 specifies that <tuple> provides std::tuple.
-// Some platforms still might not have it, however.
-#if GTEST_LANG_CXX11
-# define GTEST_HAS_STD_TUPLE_ 1
-# if defined(__clang__)
-// Inspired by http://clang.llvm.org/docs/LanguageExtensions.html#__has_include
-# if defined(__has_include) && !__has_include(<tuple>)
-# undef GTEST_HAS_STD_TUPLE_
-# endif
-# elif defined(_MSC_VER)
-// Inspired by boost/config/stdlib/dinkumware.hpp
-# if defined(_CPPLIB_VER) && _CPPLIB_VER < 520
-# undef GTEST_HAS_STD_TUPLE_
-# endif
-# elif defined(__GLIBCXX__)
-// Inspired by boost/config/stdlib/libstdcpp3.hpp,
-// http://gcc.gnu.org/gcc-4.2/changes.html and
-// http://gcc.gnu.org/onlinedocs/libstdc++/manual/bk01pt01ch01.html#manual.intro.status.standard.200x
-# if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 2)
-# undef GTEST_HAS_STD_TUPLE_
-# endif
-# endif
-#endif
-
-// Brings in definitions for functions used in the testing::internal::posix
-// namespace (read, write, close, chdir, isatty, stat). We do not currently
-// use them on Windows Mobile.
-#if GTEST_OS_WINDOWS
-# if !GTEST_OS_WINDOWS_MOBILE
-# include <direct.h>
-# include <io.h>
-# endif
-// In order to avoid having to include <windows.h>, use forward declaration
-// assuming CRITICAL_SECTION is a typedef of _RTL_CRITICAL_SECTION.
-// This assumption is verified by
-// WindowsTypesTest.CRITICAL_SECTIONIs_RTL_CRITICAL_SECTION.
-struct _RTL_CRITICAL_SECTION;
-#else
-// This assumes that non-Windows OSes provide unistd.h. For OSes where this
-// is not the case, we need to include headers that provide the functions
-// mentioned above.
-# include <unistd.h>
-# include <strings.h>
-#endif // GTEST_OS_WINDOWS
-
-#if GTEST_OS_LINUX_ANDROID
-// Used to define __ANDROID_API__ matching the target NDK API level.
-# include <android/api-level.h> // NOLINT
-#endif
-
-// Defines this to true iff Google Test can use POSIX regular expressions.
-#ifndef GTEST_HAS_POSIX_RE
-# if GTEST_OS_LINUX_ANDROID
-// On Android, <regex.h> is only available starting with Gingerbread.
-# define GTEST_HAS_POSIX_RE (__ANDROID_API__ >= 9)
-# else
-# define GTEST_HAS_POSIX_RE (!GTEST_OS_WINDOWS)
-# endif
-#endif
-
-#if GTEST_USES_PCRE
-// The appropriate headers have already been included.
-
-#elif GTEST_HAS_POSIX_RE
-
-// On some platforms, <regex.h> needs someone to define size_t, and
-// won't compile otherwise. We can #include it here as we already
-// included <stdlib.h>, which is guaranteed to define size_t through
-// <stddef.h>.
-# include <regex.h> // NOLINT
-
-# define GTEST_USES_POSIX_RE 1
-
-#elif GTEST_OS_WINDOWS
-
-// <regex.h> is not available on Windows. Use our own simple regex
-// implementation instead.
-# define GTEST_USES_SIMPLE_RE 1
-
-#else
-
-// <regex.h> may not be available on this platform. Use our own
-// simple regex implementation instead.
-# define GTEST_USES_SIMPLE_RE 1
-
-#endif // GTEST_USES_PCRE
-
-#ifndef GTEST_HAS_EXCEPTIONS
-// The user didn't tell us whether exceptions are enabled, so we need
-// to figure it out.
-# if defined(_MSC_VER) || defined(__BORLANDC__)
-// MSVC's and C++Builder's implementations of the STL use the _HAS_EXCEPTIONS
-// macro to enable exceptions, so we'll do the same.
-// Assumes that exceptions are enabled by default.
-# ifndef _HAS_EXCEPTIONS
-# define _HAS_EXCEPTIONS 1
-# endif // _HAS_EXCEPTIONS
-# define GTEST_HAS_EXCEPTIONS _HAS_EXCEPTIONS
-# elif defined(__clang__)
-// clang defines __EXCEPTIONS iff exceptions are enabled before clang 220714,
-// but iff cleanups are enabled after that. In Obj-C++ files, there can be
-// cleanups for ObjC exceptions which also need cleanups, even if C++ exceptions
-// are disabled. clang has __has_feature(cxx_exceptions) which checks for C++
-// exceptions starting at clang r206352, but which checked for cleanups prior to
-// that. To reliably check for C++ exception availability with clang, check for
-// __EXCEPTIONS && __has_feature(cxx_exceptions).
-# define GTEST_HAS_EXCEPTIONS (__EXCEPTIONS && __has_feature(cxx_exceptions))
-# elif defined(__GNUC__) && __EXCEPTIONS
-// gcc defines __EXCEPTIONS to 1 iff exceptions are enabled.
-# define GTEST_HAS_EXCEPTIONS 1
-# elif defined(__SUNPRO_CC)
-// Sun Pro CC supports exceptions. However, there is no compile-time way of
-// detecting whether they are enabled or not. Therefore, we assume that
-// they are enabled unless the user tells us otherwise.
-# define GTEST_HAS_EXCEPTIONS 1
-# elif defined(__IBMCPP__) && __EXCEPTIONS
-// xlC defines __EXCEPTIONS to 1 iff exceptions are enabled.
-# define GTEST_HAS_EXCEPTIONS 1
-# elif defined(__HP_aCC)
-// Exception handling is in effect by default in HP aCC compiler. It has to
-// be turned of by +noeh compiler option if desired.
-# define GTEST_HAS_EXCEPTIONS 1
-# else
-// For other compilers, we assume exceptions are disabled to be
-// conservative.
-# define GTEST_HAS_EXCEPTIONS 0
-# endif // defined(_MSC_VER) || defined(__BORLANDC__)
-#endif // GTEST_HAS_EXCEPTIONS
-
-#if !defined(GTEST_HAS_STD_STRING)
-// Even though we don't use this macro any longer, we keep it in case
-// some clients still depend on it.
-# define GTEST_HAS_STD_STRING 1
-#elif !GTEST_HAS_STD_STRING
-// The user told us that ::std::string isn't available.
-# error "Google Test cannot be used where ::std::string isn't available."
-#endif // !defined(GTEST_HAS_STD_STRING)
-
-#ifndef GTEST_HAS_GLOBAL_STRING
-// The user didn't tell us whether ::string is available, so we need
-// to figure it out.
-
-# define GTEST_HAS_GLOBAL_STRING 0
-
-#endif // GTEST_HAS_GLOBAL_STRING
-
-#ifndef GTEST_HAS_STD_WSTRING
-// The user didn't tell us whether ::std::wstring is available, so we need
-// to figure it out.
-// TODO(wan@google.com): uses autoconf to detect whether ::std::wstring
-// is available.
-
-// Cygwin 1.7 and below doesn't support ::std::wstring.
-// Solaris' libc++ doesn't support it either. Android has
-// no support for it at least as recent as Froyo (2.2).
-# define GTEST_HAS_STD_WSTRING \
- (!(GTEST_OS_LINUX_ANDROID || GTEST_OS_CYGWIN || GTEST_OS_SOLARIS))
-
-#endif // GTEST_HAS_STD_WSTRING
-
-#ifndef GTEST_HAS_GLOBAL_WSTRING
-// The user didn't tell us whether ::wstring is available, so we need
-// to figure it out.
-# define GTEST_HAS_GLOBAL_WSTRING \
- (GTEST_HAS_STD_WSTRING && GTEST_HAS_GLOBAL_STRING)
-#endif // GTEST_HAS_GLOBAL_WSTRING
-
-// Determines whether RTTI is available.
-#ifndef GTEST_HAS_RTTI
-// The user didn't tell us whether RTTI is enabled, so we need to
-// figure it out.
-
-# ifdef _MSC_VER
-
-# ifdef _CPPRTTI // MSVC defines this macro iff RTTI is enabled.
-# define GTEST_HAS_RTTI 1
-# else
-# define GTEST_HAS_RTTI 0
-# endif
-
-// Starting with version 4.3.2, gcc defines __GXX_RTTI iff RTTI is enabled.
-# elif defined(__GNUC__) && (GTEST_GCC_VER_ >= 40302)
-
-# ifdef __GXX_RTTI
-// When building against STLport with the Android NDK and with
-// -frtti -fno-exceptions, the build fails at link time with undefined
-// references to __cxa_bad_typeid. Note sure if STL or toolchain bug,
-// so disable RTTI when detected.
-# if GTEST_OS_LINUX_ANDROID && defined(_STLPORT_MAJOR) && \
- !defined(__EXCEPTIONS)
-# define GTEST_HAS_RTTI 0
-# else
-# define GTEST_HAS_RTTI 1
-# endif // GTEST_OS_LINUX_ANDROID && __STLPORT_MAJOR && !__EXCEPTIONS
-# else
-# define GTEST_HAS_RTTI 0
-# endif // __GXX_RTTI
-
-// Clang defines __GXX_RTTI starting with version 3.0, but its manual recommends
-// using has_feature instead. has_feature(cxx_rtti) is supported since 2.7, the
-// first version with C++ support.
-# elif defined(__clang__)
-
-# define GTEST_HAS_RTTI __has_feature(cxx_rtti)
-
-// Starting with version 9.0 IBM Visual Age defines __RTTI_ALL__ to 1 if
-// both the typeid and dynamic_cast features are present.
-# elif defined(__IBMCPP__) && (__IBMCPP__ >= 900)
-
-# ifdef __RTTI_ALL__
-# define GTEST_HAS_RTTI 1
-# else
-# define GTEST_HAS_RTTI 0
-# endif
-
-# else
-
-// For all other compilers, we assume RTTI is enabled.
-# define GTEST_HAS_RTTI 1
-
-# endif // _MSC_VER
-
-#endif // GTEST_HAS_RTTI
-
-// It's this header's responsibility to #include <typeinfo> when RTTI
-// is enabled.
-#if GTEST_HAS_RTTI
-# include <typeinfo>
-#endif
-
-// Determines whether Google Test can use the pthreads library.
-#ifndef GTEST_HAS_PTHREAD
-// The user didn't tell us explicitly, so we make reasonable assumptions about
-// which platforms have pthreads support.
-//
-// To disable threading support in Google Test, add -DGTEST_HAS_PTHREAD=0
-// to your compiler flags.
-# define GTEST_HAS_PTHREAD (GTEST_OS_LINUX || GTEST_OS_MAC || GTEST_OS_HPUX \
- || GTEST_OS_QNX || GTEST_OS_FREEBSD || GTEST_OS_NACL)
-#endif // GTEST_HAS_PTHREAD
-
-#if GTEST_HAS_PTHREAD
-// gtest-port.h guarantees to #include <pthread.h> when GTEST_HAS_PTHREAD is
-// true.
-# include <pthread.h> // NOLINT
-
-// For timespec and nanosleep, used below.
-# include <time.h> // NOLINT
-#endif
-
-// Determines if hash_map/hash_set are available.
-// Only used for testing against those containers.
-#if !defined(GTEST_HAS_HASH_MAP_)
-# if _MSC_VER
-# define GTEST_HAS_HASH_MAP_ 1 // Indicates that hash_map is available.
-# define GTEST_HAS_HASH_SET_ 1 // Indicates that hash_set is available.
-# endif // _MSC_VER
-#endif // !defined(GTEST_HAS_HASH_MAP_)
-
-// Determines whether Google Test can use tr1/tuple. You can define
-// this macro to 0 to prevent Google Test from using tuple (any
-// feature depending on tuple with be disabled in this mode).
-#ifndef GTEST_HAS_TR1_TUPLE
-# if GTEST_OS_LINUX_ANDROID && defined(_STLPORT_MAJOR)
-// STLport, provided with the Android NDK, has neither <tr1/tuple> or <tuple>.
-# define GTEST_HAS_TR1_TUPLE 0
-# else
-// The user didn't tell us not to do it, so we assume it's OK.
-# define GTEST_HAS_TR1_TUPLE 1
-# endif
-#endif // GTEST_HAS_TR1_TUPLE
-
-// Determines whether Google Test's own tr1 tuple implementation
-// should be used.
-#ifndef GTEST_USE_OWN_TR1_TUPLE
-// The user didn't tell us, so we need to figure it out.
-
-// We use our own TR1 tuple if we aren't sure the user has an
-// implementation of it already. At this time, libstdc++ 4.0.0+ and
-// MSVC 2010 are the only mainstream standard libraries that come
-// with a TR1 tuple implementation. NVIDIA's CUDA NVCC compiler
-// pretends to be GCC by defining __GNUC__ and friends, but cannot
-// compile GCC's tuple implementation. MSVC 2008 (9.0) provides TR1
-// tuple in a 323 MB Feature Pack download, which we cannot assume the
-// user has. QNX's QCC compiler is a modified GCC but it doesn't
-// support TR1 tuple. libc++ only provides std::tuple, in C++11 mode,
-// and it can be used with some compilers that define __GNUC__.
-# if (defined(__GNUC__) && !defined(__CUDACC__) && (GTEST_GCC_VER_ >= 40000) \
- && !GTEST_OS_QNX && !defined(_LIBCPP_VERSION)) || _MSC_VER >= 1600
-# define GTEST_ENV_HAS_TR1_TUPLE_ 1
-# endif
-
-// C++11 specifies that <tuple> provides std::tuple. Use that if gtest is used
-// in C++11 mode and libstdc++ isn't very old (binaries targeting OS X 10.6
-// can build with clang but need to use gcc4.2's libstdc++).
-# if GTEST_LANG_CXX11 && (!defined(__GLIBCXX__) || __GLIBCXX__ > 20110325)
-# define GTEST_ENV_HAS_STD_TUPLE_ 1
-# endif
-
-# if GTEST_ENV_HAS_TR1_TUPLE_ || GTEST_ENV_HAS_STD_TUPLE_
-# define GTEST_USE_OWN_TR1_TUPLE 0
-# else
-# define GTEST_USE_OWN_TR1_TUPLE 1
-# endif
-
-#endif // GTEST_USE_OWN_TR1_TUPLE
-
-// To avoid conditional compilation everywhere, we make it
-// gtest-port.h's responsibility to #include the header implementing
-// tuple.
-#if GTEST_HAS_STD_TUPLE_
-# include <tuple> // IWYU pragma: export
-# define GTEST_TUPLE_NAMESPACE_ ::std
-#endif // GTEST_HAS_STD_TUPLE_
-
-// We include tr1::tuple even if std::tuple is available to define printers for
-// them.
-#if GTEST_HAS_TR1_TUPLE
-# ifndef GTEST_TUPLE_NAMESPACE_
-# define GTEST_TUPLE_NAMESPACE_ ::std::tr1
-# endif // GTEST_TUPLE_NAMESPACE_
-
-# if GTEST_USE_OWN_TR1_TUPLE
-# include "gtest/internal/gtest-tuple.h" // IWYU pragma: export // NOLINT
-# elif GTEST_ENV_HAS_STD_TUPLE_
-# include <tuple>
-// C++11 puts its tuple into the ::std namespace rather than
-// ::std::tr1. gtest expects tuple to live in ::std::tr1, so put it there.
-// This causes undefined behavior, but supported compilers react in
-// the way we intend.
-namespace std {
-namespace tr1 {
-using ::std::get;
-using ::std::make_tuple;
-using ::std::tuple;
-using ::std::tuple_element;
-using ::std::tuple_size;
-}
-}
-
-# elif GTEST_OS_SYMBIAN
-
-// On Symbian, BOOST_HAS_TR1_TUPLE causes Boost's TR1 tuple library to
-// use STLport's tuple implementation, which unfortunately doesn't
-// work as the copy of STLport distributed with Symbian is incomplete.
-// By making sure BOOST_HAS_TR1_TUPLE is undefined, we force Boost to
-// use its own tuple implementation.
-# ifdef BOOST_HAS_TR1_TUPLE
-# undef BOOST_HAS_TR1_TUPLE
-# endif // BOOST_HAS_TR1_TUPLE
-
-// This prevents <boost/tr1/detail/config.hpp>, which defines
-// BOOST_HAS_TR1_TUPLE, from being #included by Boost's <tuple>.
-# define BOOST_TR1_DETAIL_CONFIG_HPP_INCLUDED
-# include <tuple> // IWYU pragma: export // NOLINT
-
-# elif defined(__GNUC__) && (GTEST_GCC_VER_ >= 40000)
-// GCC 4.0+ implements tr1/tuple in the <tr1/tuple> header. This does
-// not conform to the TR1 spec, which requires the header to be <tuple>.
-
-# if !GTEST_HAS_RTTI && GTEST_GCC_VER_ < 40302
-// Until version 4.3.2, gcc has a bug that causes <tr1/functional>,
-// which is #included by <tr1/tuple>, to not compile when RTTI is
-// disabled. _TR1_FUNCTIONAL is the header guard for
-// <tr1/functional>. Hence the following #define is a hack to prevent
-// <tr1/functional> from being included.
-# define _TR1_FUNCTIONAL 1
-# include <tr1/tuple>
-# undef _TR1_FUNCTIONAL // Allows the user to #include
- // <tr1/functional> if he chooses to.
-# else
-# include <tr1/tuple> // NOLINT
-# endif // !GTEST_HAS_RTTI && GTEST_GCC_VER_ < 40302
-
-# else
-// If the compiler is not GCC 4.0+, we assume the user is using a
-// spec-conforming TR1 implementation.
-# include <tuple> // IWYU pragma: export // NOLINT
-# endif // GTEST_USE_OWN_TR1_TUPLE
-
-#endif // GTEST_HAS_TR1_TUPLE
-
-// Determines whether clone(2) is supported.
-// Usually it will only be available on Linux, excluding
-// Linux on the Itanium architecture.
-// Also see http://linux.die.net/man/2/clone.
-#ifndef GTEST_HAS_CLONE
-// The user didn't tell us, so we need to figure it out.
-
-# if GTEST_OS_LINUX && !defined(__ia64__)
-# if GTEST_OS_LINUX_ANDROID
-// On Android, clone() is only available on ARM starting with Gingerbread.
-# if defined(__arm__) && __ANDROID_API__ >= 9
-# define GTEST_HAS_CLONE 1
-# else
-# define GTEST_HAS_CLONE 0
-# endif
-# else
-# define GTEST_HAS_CLONE 1
-# endif
-# else
-# define GTEST_HAS_CLONE 0
-# endif // GTEST_OS_LINUX && !defined(__ia64__)
-
-#endif // GTEST_HAS_CLONE
-
-// Determines whether to support stream redirection. This is used to test
-// output correctness and to implement death tests.
-#ifndef GTEST_HAS_STREAM_REDIRECTION
-// By default, we assume that stream redirection is supported on all
-// platforms except known mobile ones.
-# if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_SYMBIAN || \
- GTEST_OS_WINDOWS_PHONE || GTEST_OS_WINDOWS_RT
-# define GTEST_HAS_STREAM_REDIRECTION 0
-# else
-# define GTEST_HAS_STREAM_REDIRECTION 1
-# endif // !GTEST_OS_WINDOWS_MOBILE && !GTEST_OS_SYMBIAN
-#endif // GTEST_HAS_STREAM_REDIRECTION
-
-// Determines whether to support death tests.
-// Google Test does not support death tests for VC 7.1 and earlier as
-// abort() in a VC 7.1 application compiled as GUI in debug config
-// pops up a dialog window that cannot be suppressed programmatically.
-#if (GTEST_OS_LINUX || GTEST_OS_CYGWIN || GTEST_OS_SOLARIS || \
- (GTEST_OS_MAC && !GTEST_OS_IOS) || \
- (GTEST_OS_WINDOWS_DESKTOP && _MSC_VER >= 1400) || \
- GTEST_OS_WINDOWS_MINGW || GTEST_OS_AIX || GTEST_OS_HPUX || \
- GTEST_OS_OPENBSD || GTEST_OS_QNX || GTEST_OS_FREEBSD)
-# define GTEST_HAS_DEATH_TEST 1
-#endif
-
-// We don't support MSVC 7.1 with exceptions disabled now. Therefore
-// all the compilers we care about are adequate for supporting
-// value-parameterized tests.
-#define GTEST_HAS_PARAM_TEST 1
-
-// Determines whether to support type-driven tests.
-
-// Typed tests need <typeinfo> and variadic macros, which GCC, VC++ 8.0,
-// Sun Pro CC, IBM Visual Age, and HP aCC support.
-#if defined(__GNUC__) || (_MSC_VER >= 1400) || defined(__SUNPRO_CC) || \
- defined(__IBMCPP__) || defined(__HP_aCC)
-# define GTEST_HAS_TYPED_TEST 1
-# define GTEST_HAS_TYPED_TEST_P 1
-#endif
-
-// Determines whether to support Combine(). This only makes sense when
-// value-parameterized tests are enabled. The implementation doesn't
-// work on Sun Studio since it doesn't understand templated conversion
-// operators.
-#if GTEST_HAS_PARAM_TEST && GTEST_HAS_TR1_TUPLE && !defined(__SUNPRO_CC)
-# define GTEST_HAS_COMBINE 1
-#endif
-
-// Determines whether the system compiler uses UTF-16 for encoding wide strings.
-#define GTEST_WIDE_STRING_USES_UTF16_ \
- (GTEST_OS_WINDOWS || GTEST_OS_CYGWIN || GTEST_OS_SYMBIAN || GTEST_OS_AIX)
-
-// Determines whether test results can be streamed to a socket.
-#if GTEST_OS_LINUX
-# define GTEST_CAN_STREAM_RESULTS_ 1
-#endif
-
-// Defines some utility macros.
-
-// The GNU compiler emits a warning if nested "if" statements are followed by
-// an "else" statement and braces are not used to explicitly disambiguate the
-// "else" binding. This leads to problems with code like:
-//
-// if (gate)
-// ASSERT_*(condition) << "Some message";
-//
-// The "switch (0) case 0:" idiom is used to suppress this.
-#ifdef __INTEL_COMPILER
-# define GTEST_AMBIGUOUS_ELSE_BLOCKER_
-#else
-# define GTEST_AMBIGUOUS_ELSE_BLOCKER_ switch (0) case 0: default: // NOLINT
-#endif
-
-// Use this annotation at the end of a struct/class definition to
-// prevent the compiler from optimizing away instances that are never
-// used. This is useful when all interesting logic happens inside the
-// c'tor and / or d'tor. Example:
-//
-// struct Foo {
-// Foo() { ... }
-// } GTEST_ATTRIBUTE_UNUSED_;
-//
-// Also use it after a variable or parameter declaration to tell the
-// compiler the variable/parameter does not have to be used.
-#if defined(__GNUC__) && !defined(COMPILER_ICC)
-# define GTEST_ATTRIBUTE_UNUSED_ __attribute__ ((unused))
-#elif defined(__clang__)
-# if __has_attribute(unused)
-# define GTEST_ATTRIBUTE_UNUSED_ __attribute__ ((unused))
-# endif
-#endif
-#ifndef GTEST_ATTRIBUTE_UNUSED_
-# define GTEST_ATTRIBUTE_UNUSED_
-#endif
-
-// A macro to disallow operator=
-// This should be used in the private: declarations for a class.
-#define GTEST_DISALLOW_ASSIGN_(type)\
- void operator=(type const &)
-
-// A macro to disallow copy constructor and operator=
-// This should be used in the private: declarations for a class.
-#define GTEST_DISALLOW_COPY_AND_ASSIGN_(type)\
- type(type const &);\
- GTEST_DISALLOW_ASSIGN_(type)
-
-// Tell the compiler to warn about unused return values for functions declared
-// with this macro. The macro should be used on function declarations
-// following the argument list:
-//
-// Sprocket* AllocateSprocket() GTEST_MUST_USE_RESULT_;
-#if defined(__GNUC__) && (GTEST_GCC_VER_ >= 30400) && !defined(COMPILER_ICC)
-# define GTEST_MUST_USE_RESULT_ __attribute__ ((warn_unused_result))
-#else
-# define GTEST_MUST_USE_RESULT_
-#endif // __GNUC__ && (GTEST_GCC_VER_ >= 30400) && !COMPILER_ICC
-
-// MS C++ compiler emits warning when a conditional expression is compile time
-// constant. In some contexts this warning is false positive and needs to be
-// suppressed. Use the following two macros in such cases:
-//
-// GTEST_INTENTIONAL_CONST_COND_PUSH_()
-// while (true) {
-// GTEST_INTENTIONAL_CONST_COND_POP_()
-// }
-# define GTEST_INTENTIONAL_CONST_COND_PUSH_() \
- GTEST_DISABLE_MSC_WARNINGS_PUSH_(4127)
-# define GTEST_INTENTIONAL_CONST_COND_POP_() \
- GTEST_DISABLE_MSC_WARNINGS_POP_()
-
-// Determine whether the compiler supports Microsoft's Structured Exception
-// Handling. This is supported by several Windows compilers but generally
-// does not exist on any other system.
-#ifndef GTEST_HAS_SEH
-// The user didn't tell us, so we need to figure it out.
-
-# if defined(_MSC_VER) || defined(__BORLANDC__)
-// These two compilers are known to support SEH.
-# define GTEST_HAS_SEH 1
-# else
-// Assume no SEH.
-# define GTEST_HAS_SEH 0
-# endif
-
-#define GTEST_IS_THREADSAFE \
- (GTEST_HAS_MUTEX_AND_THREAD_LOCAL_ \
- || (GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT) \
- || GTEST_HAS_PTHREAD)
-
-#endif // GTEST_HAS_SEH
-
-#ifdef _MSC_VER
-# if GTEST_LINKED_AS_SHARED_LIBRARY
-# define GTEST_API_ __declspec(dllimport)
-# elif GTEST_CREATE_SHARED_LIBRARY
-# define GTEST_API_ __declspec(dllexport)
-# endif
-#elif __GNUC__ >= 4 || defined(__clang__)
-# define GTEST_API_ __attribute__((visibility ("default")))
-#endif // _MSC_VER
-
-#ifndef GTEST_API_
-# define GTEST_API_
-#endif
-
-#ifdef __GNUC__
-// Ask the compiler to never inline a given function.
-# define GTEST_NO_INLINE_ __attribute__((noinline))
-#else
-# define GTEST_NO_INLINE_
-#endif
-
-// _LIBCPP_VERSION is defined by the libc++ library from the LLVM project.
-#if defined(__GLIBCXX__) || defined(_LIBCPP_VERSION)
-# define GTEST_HAS_CXXABI_H_ 1
-#else
-# define GTEST_HAS_CXXABI_H_ 0
-#endif
-
-// A function level attribute to disable checking for use of uninitialized
-// memory when built with MemorySanitizer.
-#if defined(__clang__)
-# if __has_feature(memory_sanitizer)
-# define GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_ \
- __attribute__((no_sanitize_memory))
-# else
-# define GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_
-# endif // __has_feature(memory_sanitizer)
-#else
-# define GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_
-#endif // __clang__
-
-// A function level attribute to disable AddressSanitizer instrumentation.
-#if defined(__clang__)
-# if __has_feature(address_sanitizer)
-# define GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_ \
- __attribute__((no_sanitize_address))
-# else
-# define GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_
-# endif // __has_feature(address_sanitizer)
-#else
-# define GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_
-#endif // __clang__
-
-// A function level attribute to disable ThreadSanitizer instrumentation.
-#if defined(__clang__)
-# if __has_feature(thread_sanitizer)
-# define GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_ \
- __attribute__((no_sanitize_thread))
-# else
-# define GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_
-# endif // __has_feature(thread_sanitizer)
-#else
-# define GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_
-#endif // __clang__
-
-// A function level attribute to disable UndefinedBehaviorSanitizer's (defined)
-// unsigned integer overflow instrumentation.
-#if defined(__clang__)
-# if defined(__has_attribute) && __has_attribute(no_sanitize)
-# define GTEST_ATTRIBUTE_NO_SANITIZE_UNSIGNED_OVERFLOW_ \
- __attribute__((no_sanitize("unsigned-integer-overflow")))
-# else
-# define GTEST_ATTRIBUTE_NO_SANITIZE_UNSIGNED_OVERFLOW_
-# endif // defined(__has_attribute) && __has_attribute(no_sanitize)
-#else
-# define GTEST_ATTRIBUTE_NO_SANITIZE_UNSIGNED_OVERFLOW_
-#endif // __clang__
-
-namespace testing {
-
-class Message;
-
-#if defined(GTEST_TUPLE_NAMESPACE_)
-// Import tuple and friends into the ::testing namespace.
-// It is part of our interface, having them in ::testing allows us to change
-// their types as needed.
-using GTEST_TUPLE_NAMESPACE_::get;
-using GTEST_TUPLE_NAMESPACE_::make_tuple;
-using GTEST_TUPLE_NAMESPACE_::tuple;
-using GTEST_TUPLE_NAMESPACE_::tuple_size;
-using GTEST_TUPLE_NAMESPACE_::tuple_element;
-#endif // defined(GTEST_TUPLE_NAMESPACE_)
-
-namespace internal {
-
-// A secret type that Google Test users don't know about. It has no
-// definition on purpose. Therefore it's impossible to create a
-// Secret object, which is what we want.
-class Secret;
-
-// The GTEST_COMPILE_ASSERT_ macro can be used to verify that a compile time
-// expression is true. For example, you could use it to verify the
-// size of a static array:
-//
-// GTEST_COMPILE_ASSERT_(GTEST_ARRAY_SIZE_(names) == NUM_NAMES,
-// names_incorrect_size);
-//
-// or to make sure a struct is smaller than a certain size:
-//
-// GTEST_COMPILE_ASSERT_(sizeof(foo) < 128, foo_too_large);
-//
-// The second argument to the macro is the name of the variable. If
-// the expression is false, most compilers will issue a warning/error
-// containing the name of the variable.
-
-#if GTEST_LANG_CXX11
-# define GTEST_COMPILE_ASSERT_(expr, msg) static_assert(expr, #msg)
-#else // !GTEST_LANG_CXX11
-template <bool>
- struct CompileAssert {
-};
-
-# define GTEST_COMPILE_ASSERT_(expr, msg) \
- typedef ::testing::internal::CompileAssert<(static_cast<bool>(expr))> \
- msg[static_cast<bool>(expr) ? 1 : -1] GTEST_ATTRIBUTE_UNUSED_
-#endif // !GTEST_LANG_CXX11
-
-// Implementation details of GTEST_COMPILE_ASSERT_:
-//
-// (In C++11, we simply use static_assert instead of the following)
-//
-// - GTEST_COMPILE_ASSERT_ works by defining an array type that has -1
-// elements (and thus is invalid) when the expression is false.
-//
-// - The simpler definition
-//
-// #define GTEST_COMPILE_ASSERT_(expr, msg) typedef char msg[(expr) ? 1 : -1]
-//
-// does not work, as gcc supports variable-length arrays whose sizes
-// are determined at run-time (this is gcc's extension and not part
-// of the C++ standard). As a result, gcc fails to reject the
-// following code with the simple definition:
-//
-// int foo;
-// GTEST_COMPILE_ASSERT_(foo, msg); // not supposed to compile as foo is
-// // not a compile-time constant.
-//
-// - By using the type CompileAssert<(bool(expr))>, we ensures that
-// expr is a compile-time constant. (Template arguments must be
-// determined at compile-time.)
-//
-// - The outter parentheses in CompileAssert<(bool(expr))> are necessary
-// to work around a bug in gcc 3.4.4 and 4.0.1. If we had written
-//
-// CompileAssert<bool(expr)>
-//
-// instead, these compilers will refuse to compile
-//
-// GTEST_COMPILE_ASSERT_(5 > 0, some_message);
-//
-// (They seem to think the ">" in "5 > 0" marks the end of the
-// template argument list.)
-//
-// - The array size is (bool(expr) ? 1 : -1), instead of simply
-//
-// ((expr) ? 1 : -1).
-//
-// This is to avoid running into a bug in MS VC 7.1, which
-// causes ((0.0) ? 1 : -1) to incorrectly evaluate to 1.
-
-// StaticAssertTypeEqHelper is used by StaticAssertTypeEq defined in gtest.h.
-//
-// This template is declared, but intentionally undefined.
-template <typename T1, typename T2>
-struct StaticAssertTypeEqHelper;
-
-template <typename T>
-struct StaticAssertTypeEqHelper<T, T> {
- enum { value = true };
-};
-
-// Evaluates to the number of elements in 'array'.
-#define GTEST_ARRAY_SIZE_(array) (sizeof(array) / sizeof(array[0]))
-
-#if GTEST_HAS_GLOBAL_STRING
-typedef ::string string;
-#else
-typedef ::std::string string;
-#endif // GTEST_HAS_GLOBAL_STRING
-
-#if GTEST_HAS_GLOBAL_WSTRING
-typedef ::wstring wstring;
-#elif GTEST_HAS_STD_WSTRING
-typedef ::std::wstring wstring;
-#endif // GTEST_HAS_GLOBAL_WSTRING
-
-// A helper for suppressing warnings on constant condition. It just
-// returns 'condition'.
-GTEST_API_ bool IsTrue(bool condition);
-
-// Defines scoped_ptr.
-
-// This implementation of scoped_ptr is PARTIAL - it only contains
-// enough stuff to satisfy Google Test's need.
-template <typename T>
-class scoped_ptr {
- public:
- typedef T element_type;
-
- explicit scoped_ptr(T* p = NULL) : ptr_(p) {}
- ~scoped_ptr() { reset(); }
-
- T& operator*() const { return *ptr_; }
- T* operator->() const { return ptr_; }
- T* get() const { return ptr_; }
-
- T* release() {
- T* const ptr = ptr_;
- ptr_ = NULL;
- return ptr;
- }
-
- void reset(T* p = NULL) {
- if (p != ptr_) {
- if (IsTrue(sizeof(T) > 0)) { // Makes sure T is a complete type.
- delete ptr_;
- }
- ptr_ = p;
- }
- }
-
- friend void swap(scoped_ptr& a, scoped_ptr& b) {
- using std::swap;
- swap(a.ptr_, b.ptr_);
- }
-
- private:
- T* ptr_;
-
- GTEST_DISALLOW_COPY_AND_ASSIGN_(scoped_ptr);
-};
-
-// Defines RE.
-
-// A simple C++ wrapper for <regex.h>. It uses the POSIX Extended
-// Regular Expression syntax.
-class GTEST_API_ RE {
- public:
- // A copy constructor is required by the Standard to initialize object
- // references from r-values.
- RE(const RE& other) { Init(other.pattern()); }
-
- // Constructs an RE from a string.
- RE(const ::std::string& regex) { Init(regex.c_str()); } // NOLINT
-
-#if GTEST_HAS_GLOBAL_STRING
-
- RE(const ::string& regex) { Init(regex.c_str()); } // NOLINT
-
-#endif // GTEST_HAS_GLOBAL_STRING
-
- RE(const char* regex) { Init(regex); } // NOLINT
- ~RE();
-
- // Returns the string representation of the regex.
- const char* pattern() const { return pattern_; }
-
- // FullMatch(str, re) returns true iff regular expression re matches
- // the entire str.
- // PartialMatch(str, re) returns true iff regular expression re
- // matches a substring of str (including str itself).
- //
- // TODO(wan@google.com): make FullMatch() and PartialMatch() work
- // when str contains NUL characters.
- static bool FullMatch(const ::std::string& str, const RE& re) {
- return FullMatch(str.c_str(), re);
- }
- static bool PartialMatch(const ::std::string& str, const RE& re) {
- return PartialMatch(str.c_str(), re);
- }
-
-#if GTEST_HAS_GLOBAL_STRING
-
- static bool FullMatch(const ::string& str, const RE& re) {
- return FullMatch(str.c_str(), re);
- }
- static bool PartialMatch(const ::string& str, const RE& re) {
- return PartialMatch(str.c_str(), re);
- }
-
-#endif // GTEST_HAS_GLOBAL_STRING
-
- static bool FullMatch(const char* str, const RE& re);
- static bool PartialMatch(const char* str, const RE& re);
-
- private:
- void Init(const char* regex);
-
- // We use a const char* instead of an std::string, as Google Test used to be
- // used where std::string is not available. TODO(wan@google.com): change to
- // std::string.
- const char* pattern_;
- bool is_valid_;
-
-#if GTEST_USES_POSIX_RE
-
- regex_t full_regex_; // For FullMatch().
- regex_t partial_regex_; // For PartialMatch().
-
-#else // GTEST_USES_SIMPLE_RE
-
- const char* full_pattern_; // For FullMatch();
-
-#endif
-
- GTEST_DISALLOW_ASSIGN_(RE);
-};
-
-// Formats a source file path and a line number as they would appear
-// in an error message from the compiler used to compile this code.
-GTEST_API_ ::std::string FormatFileLocation(const char* file, int line);
-
-// Formats a file location for compiler-independent XML output.
-// Although this function is not platform dependent, we put it next to
-// FormatFileLocation in order to contrast the two functions.
-GTEST_API_ ::std::string FormatCompilerIndependentFileLocation(const char* file,
- int line);
-
-// Defines logging utilities:
-// GTEST_LOG_(severity) - logs messages at the specified severity level. The
-// message itself is streamed into the macro.
-// LogToStderr() - directs all log messages to stderr.
-// FlushInfoLog() - flushes informational log messages.
-
-enum GTestLogSeverity {
- GTEST_INFO,
- GTEST_WARNING,
- GTEST_ERROR,
- GTEST_FATAL
-};
-
-// Formats log entry severity, provides a stream object for streaming the
-// log message, and terminates the message with a newline when going out of
-// scope.
-class GTEST_API_ GTestLog {
- public:
- GTestLog(GTestLogSeverity severity, const char* file, int line);
-
- // Flushes the buffers and, if severity is GTEST_FATAL, aborts the program.
- ~GTestLog();
-
- ::std::ostream& GetStream() { return ::std::cerr; }
-
- private:
- const GTestLogSeverity severity_;
-
- GTEST_DISALLOW_COPY_AND_ASSIGN_(GTestLog);
-};
-
-#if !defined(GTEST_LOG_)
-
-# define GTEST_LOG_(severity) \
- ::testing::internal::GTestLog(::testing::internal::GTEST_##severity, \
- __FILE__, __LINE__).GetStream()
-
-inline void LogToStderr() {}
-inline void FlushInfoLog() { fflush(NULL); }
-
-#endif // !defined(GTEST_LOG_)
-
-#if !defined(GTEST_CHECK_)
-// INTERNAL IMPLEMENTATION - DO NOT USE.
-//
-// GTEST_CHECK_ is an all-mode assert. It aborts the program if the condition
-// is not satisfied.
-// Synopsys:
-// GTEST_CHECK_(boolean_condition);
-// or
-// GTEST_CHECK_(boolean_condition) << "Additional message";
-//
-// This checks the condition and if the condition is not satisfied
-// it prints message about the condition violation, including the
-// condition itself, plus additional message streamed into it, if any,
-// and then it aborts the program. It aborts the program irrespective of
-// whether it is built in the debug mode or not.
-# define GTEST_CHECK_(condition) \
- GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
- if (::testing::internal::IsTrue(condition)) \
- ; \
- else \
- GTEST_LOG_(FATAL) << "Condition " #condition " failed. "
-#endif // !defined(GTEST_CHECK_)
-
-// An all-mode assert to verify that the given POSIX-style function
-// call returns 0 (indicating success). Known limitation: this
-// doesn't expand to a balanced 'if' statement, so enclose the macro
-// in {} if you need to use it as the only statement in an 'if'
-// branch.
-#define GTEST_CHECK_POSIX_SUCCESS_(posix_call) \
- if (const int gtest_error = (posix_call)) \
- GTEST_LOG_(FATAL) << #posix_call << "failed with error " \
- << gtest_error
-
-#if GTEST_HAS_STD_MOVE_
-using std::move;
-#else // GTEST_HAS_STD_MOVE_
-template <typename T>
-const T& move(const T& t) {
- return t;
-}
-#endif // GTEST_HAS_STD_MOVE_
-
-// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
-//
-// Use ImplicitCast_ as a safe version of static_cast for upcasting in
-// the type hierarchy (e.g. casting a Foo* to a SuperclassOfFoo* or a
-// const Foo*). When you use ImplicitCast_, the compiler checks that
-// the cast is safe. Such explicit ImplicitCast_s are necessary in
-// surprisingly many situations where C++ demands an exact type match
-// instead of an argument type convertable to a target type.
-//
-// The syntax for using ImplicitCast_ is the same as for static_cast:
-//
-// ImplicitCast_<ToType>(expr)
-//
-// ImplicitCast_ would have been part of the C++ standard library,
-// but the proposal was submitted too late. It will probably make
-// its way into the language in the future.
-//
-// This relatively ugly name is intentional. It prevents clashes with
-// similar functions users may have (e.g., implicit_cast). The internal
-// namespace alone is not enough because the function can be found by ADL.
-template<typename To>
-inline To ImplicitCast_(To x) { return x; }
-
-// When you upcast (that is, cast a pointer from type Foo to type
-// SuperclassOfFoo), it's fine to use ImplicitCast_<>, since upcasts
-// always succeed. When you downcast (that is, cast a pointer from
-// type Foo to type SubclassOfFoo), static_cast<> isn't safe, because
-// how do you know the pointer is really of type SubclassOfFoo? It
-// could be a bare Foo, or of type DifferentSubclassOfFoo. Thus,
-// when you downcast, you should use this macro. In debug mode, we
-// use dynamic_cast<> to double-check the downcast is legal (we die
-// if it's not). In normal mode, we do the efficient static_cast<>
-// instead. Thus, it's important to test in debug mode to make sure
-// the cast is legal!
-// This is the only place in the code we should use dynamic_cast<>.
-// In particular, you SHOULDN'T be using dynamic_cast<> in order to
-// do RTTI (eg code like this:
-// if (dynamic_cast<Subclass1>(foo)) HandleASubclass1Object(foo);
-// if (dynamic_cast<Subclass2>(foo)) HandleASubclass2Object(foo);
-// You should design the code some other way not to need this.
-//
-// This relatively ugly name is intentional. It prevents clashes with
-// similar functions users may have (e.g., down_cast). The internal
-// namespace alone is not enough because the function can be found by ADL.
-template<typename To, typename From> // use like this: DownCast_<T*>(foo);
-inline To DownCast_(From* f) { // so we only accept pointers
- // Ensures that To is a sub-type of From *. This test is here only
- // for compile-time type checking, and has no overhead in an
- // optimized build at run-time, as it will be optimized away
- // completely.
- GTEST_INTENTIONAL_CONST_COND_PUSH_()
- if (false) {
- GTEST_INTENTIONAL_CONST_COND_POP_()
- const To to = NULL;
- ::testing::internal::ImplicitCast_<From*>(to);
- }
-
-#if GTEST_HAS_RTTI
- // RTTI: debug mode only!
- GTEST_CHECK_(f == NULL || dynamic_cast<To>(f) != NULL);
-#endif
- return static_cast<To>(f);
-}
-
-// Downcasts the pointer of type Base to Derived.
-// Derived must be a subclass of Base. The parameter MUST
-// point to a class of type Derived, not any subclass of it.
-// When RTTI is available, the function performs a runtime
-// check to enforce this.
-template <class Derived, class Base>
-Derived* CheckedDowncastToActualType(Base* base) {
-#if GTEST_HAS_RTTI
- GTEST_CHECK_(typeid(*base) == typeid(Derived));
-#endif
-
-#if GTEST_HAS_DOWNCAST_
- return ::down_cast<Derived*>(base);
-#elif GTEST_HAS_RTTI
- return dynamic_cast<Derived*>(base); // NOLINT
-#else
- return static_cast<Derived*>(base); // Poor man's downcast.
-#endif
-}
-
-#if GTEST_HAS_STREAM_REDIRECTION
-
-// Defines the stderr capturer:
-// CaptureStdout - starts capturing stdout.
-// GetCapturedStdout - stops capturing stdout and returns the captured string.
-// CaptureStderr - starts capturing stderr.
-// GetCapturedStderr - stops capturing stderr and returns the captured string.
-//
-GTEST_API_ void CaptureStdout();
-GTEST_API_ std::string GetCapturedStdout();
-GTEST_API_ void CaptureStderr();
-GTEST_API_ std::string GetCapturedStderr();
-
-#endif // GTEST_HAS_STREAM_REDIRECTION
-
-// Returns a path to temporary directory.
-GTEST_API_ std::string TempDir();
-
-// Returns the size (in bytes) of a file.
-GTEST_API_ size_t GetFileSize(FILE* file);
-
-// Reads the entire content of a file as a string.
-GTEST_API_ std::string ReadEntireFile(FILE* file);
-
-// All command line arguments.
-GTEST_API_ const ::std::vector<testing::internal::string>& GetArgvs();
-
-#if GTEST_HAS_DEATH_TEST
-
-const ::std::vector<testing::internal::string>& GetInjectableArgvs();
-void SetInjectableArgvs(const ::std::vector<testing::internal::string>*
- new_argvs);
-
-
-#endif // GTEST_HAS_DEATH_TEST
-
-// Defines synchronization primitives.
-#if GTEST_IS_THREADSAFE
-# if GTEST_HAS_PTHREAD
-// Sleeps for (roughly) n milliseconds. This function is only for testing
-// Google Test's own constructs. Don't use it in user tests, either
-// directly or indirectly.
-inline void SleepMilliseconds(int n) {
- const timespec time = {
- 0, // 0 seconds.
- n * 1000L * 1000L, // And n ms.
- };
- nanosleep(&time, NULL);
-}
-# endif // GTEST_HAS_PTHREAD
-
-# if GTEST_HAS_NOTIFICATION_
-// Notification has already been imported into the namespace.
-// Nothing to do here.
-
-# elif GTEST_HAS_PTHREAD
-// Allows a controller thread to pause execution of newly created
-// threads until notified. Instances of this class must be created
-// and destroyed in the controller thread.
-//
-// This class is only for testing Google Test's own constructs. Do not
-// use it in user tests, either directly or indirectly.
-class Notification {
- public:
- Notification() : notified_(false) {
- GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_init(&mutex_, NULL));
- }
- ~Notification() {
- pthread_mutex_destroy(&mutex_);
- }
-
- // Notifies all threads created with this notification to start. Must
- // be called from the controller thread.
- void Notify() {
- pthread_mutex_lock(&mutex_);
- notified_ = true;
- pthread_mutex_unlock(&mutex_);
- }
-
- // Blocks until the controller thread notifies. Must be called from a test
- // thread.
- void WaitForNotification() {
- for (;;) {
- pthread_mutex_lock(&mutex_);
- const bool notified = notified_;
- pthread_mutex_unlock(&mutex_);
- if (notified)
- break;
- SleepMilliseconds(10);
- }
- }
-
- private:
- pthread_mutex_t mutex_;
- bool notified_;
-
- GTEST_DISALLOW_COPY_AND_ASSIGN_(Notification);
-};
-
-# elif GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT
-
-GTEST_API_ void SleepMilliseconds(int n);
-
-// Provides leak-safe Windows kernel handle ownership.
-// Used in death tests and in threading support.
-class GTEST_API_ AutoHandle {
- public:
- // Assume that Win32 HANDLE type is equivalent to void*. Doing so allows us to
- // avoid including <windows.h> in this header file. Including <windows.h> is
- // undesirable because it defines a lot of symbols and macros that tend to
- // conflict with client code. This assumption is verified by
- // WindowsTypesTest.HANDLEIsVoidStar.
- typedef void* Handle;
- AutoHandle();
- explicit AutoHandle(Handle handle);
-
- ~AutoHandle();
-
- Handle Get() const;
- void Reset();
- void Reset(Handle handle);
-
- private:
- // Returns true iff the handle is a valid handle object that can be closed.
- bool IsCloseable() const;
-
- Handle handle_;
-
- GTEST_DISALLOW_COPY_AND_ASSIGN_(AutoHandle);
-};
-
-// Allows a controller thread to pause execution of newly created
-// threads until notified. Instances of this class must be created
-// and destroyed in the controller thread.
-//
-// This class is only for testing Google Test's own constructs. Do not
-// use it in user tests, either directly or indirectly.
-class GTEST_API_ Notification {
- public:
- Notification();
- void Notify();
- void WaitForNotification();
-
- private:
- AutoHandle event_;
-
- GTEST_DISALLOW_COPY_AND_ASSIGN_(Notification);
-};
-# endif // GTEST_HAS_NOTIFICATION_
-
-// On MinGW, we can have both GTEST_OS_WINDOWS and GTEST_HAS_PTHREAD
-// defined, but we don't want to use MinGW's pthreads implementation, which
-// has conformance problems with some versions of the POSIX standard.
-# if GTEST_HAS_PTHREAD && !GTEST_OS_WINDOWS_MINGW
-
-// As a C-function, ThreadFuncWithCLinkage cannot be templated itself.
-// Consequently, it cannot select a correct instantiation of ThreadWithParam
-// in order to call its Run(). Introducing ThreadWithParamBase as a
-// non-templated base class for ThreadWithParam allows us to bypass this
-// problem.
-class ThreadWithParamBase {
- public:
- virtual ~ThreadWithParamBase() {}
- virtual void Run() = 0;
-};
-
-// pthread_create() accepts a pointer to a function type with the C linkage.
-// According to the Standard (7.5/1), function types with different linkages
-// are different even if they are otherwise identical. Some compilers (for
-// example, SunStudio) treat them as different types. Since class methods
-// cannot be defined with C-linkage we need to define a free C-function to
-// pass into pthread_create().
-extern "C" inline void* ThreadFuncWithCLinkage(void* thread) {
- static_cast<ThreadWithParamBase*>(thread)->Run();
- return NULL;
-}
-
-// Helper class for testing Google Test's multi-threading constructs.
-// To use it, write:
-//
-// void ThreadFunc(int param) { /* Do things with param */ }
-// Notification thread_can_start;
-// ...
-// // The thread_can_start parameter is optional; you can supply NULL.
-// ThreadWithParam<int> thread(&ThreadFunc, 5, &thread_can_start);
-// thread_can_start.Notify();
-//
-// These classes are only for testing Google Test's own constructs. Do
-// not use them in user tests, either directly or indirectly.
-template <typename T>
-class ThreadWithParam : public ThreadWithParamBase {
- public:
- typedef void UserThreadFunc(T);
-
- ThreadWithParam(UserThreadFunc* func, T param, Notification* thread_can_start)
- : func_(func),
- param_(param),
- thread_can_start_(thread_can_start),
- finished_(false) {
- ThreadWithParamBase* const base = this;
- // The thread can be created only after all fields except thread_
- // have been initialized.
- GTEST_CHECK_POSIX_SUCCESS_(
- pthread_create(&thread_, 0, &ThreadFuncWithCLinkage, base));
- }
- ~ThreadWithParam() { Join(); }
-
- void Join() {
- if (!finished_) {
- GTEST_CHECK_POSIX_SUCCESS_(pthread_join(thread_, 0));
- finished_ = true;
- }
- }
-
- virtual void Run() {
- if (thread_can_start_ != NULL)
- thread_can_start_->WaitForNotification();
- func_(param_);
- }
-
- private:
- UserThreadFunc* const func_; // User-supplied thread function.
- const T param_; // User-supplied parameter to the thread function.
- // When non-NULL, used to block execution until the controller thread
- // notifies.
- Notification* const thread_can_start_;
- bool finished_; // true iff we know that the thread function has finished.
- pthread_t thread_; // The native thread object.
-
- GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadWithParam);
-};
-# endif // !GTEST_OS_WINDOWS && GTEST_HAS_PTHREAD ||
- // GTEST_HAS_MUTEX_AND_THREAD_LOCAL_
-
-# if GTEST_HAS_MUTEX_AND_THREAD_LOCAL_
-// Mutex and ThreadLocal have already been imported into the namespace.
-// Nothing to do here.
-
-# elif GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT
-
-// Mutex implements mutex on Windows platforms. It is used in conjunction
-// with class MutexLock:
-//
-// Mutex mutex;
-// ...
-// MutexLock lock(&mutex); // Acquires the mutex and releases it at the
-// // end of the current scope.
-//
-// A static Mutex *must* be defined or declared using one of the following
-// macros:
-// GTEST_DEFINE_STATIC_MUTEX_(g_some_mutex);
-// GTEST_DECLARE_STATIC_MUTEX_(g_some_mutex);
-//
-// (A non-static Mutex is defined/declared in the usual way).
-class GTEST_API_ Mutex {
- public:
- enum MutexType { kStatic = 0, kDynamic = 1 };
- // We rely on kStaticMutex being 0 as it is to what the linker initializes
- // type_ in static mutexes. critical_section_ will be initialized lazily
- // in ThreadSafeLazyInit().
- enum StaticConstructorSelector { kStaticMutex = 0 };
-
- // This constructor intentionally does nothing. It relies on type_ being
- // statically initialized to 0 (effectively setting it to kStatic) and on
- // ThreadSafeLazyInit() to lazily initialize the rest of the members.
- explicit Mutex(StaticConstructorSelector /*dummy*/) {}
-
- Mutex();
- ~Mutex();
-
- void Lock();
-
- void Unlock();
-
- // Does nothing if the current thread holds the mutex. Otherwise, crashes
- // with high probability.
- void AssertHeld();
-
- private:
- // Initializes owner_thread_id_ and critical_section_ in static mutexes.
- void ThreadSafeLazyInit();
-
- // Per http://blogs.msdn.com/b/oldnewthing/archive/2004/02/23/78395.aspx,
- // we assume that 0 is an invalid value for thread IDs.
- unsigned int owner_thread_id_;
-
- // For static mutexes, we rely on these members being initialized to zeros
- // by the linker.
- MutexType type_;
- long critical_section_init_phase_; // NOLINT
- _RTL_CRITICAL_SECTION* critical_section_;
-
- GTEST_DISALLOW_COPY_AND_ASSIGN_(Mutex);
-};
-
-# define GTEST_DECLARE_STATIC_MUTEX_(mutex) \
- extern ::testing::internal::Mutex mutex
-
-# define GTEST_DEFINE_STATIC_MUTEX_(mutex) \
- ::testing::internal::Mutex mutex(::testing::internal::Mutex::kStaticMutex)
-
-// We cannot name this class MutexLock because the ctor declaration would
-// conflict with a macro named MutexLock, which is defined on some
-// platforms. That macro is used as a defensive measure to prevent against
-// inadvertent misuses of MutexLock like "MutexLock(&mu)" rather than
-// "MutexLock l(&mu)". Hence the typedef trick below.
-class GTestMutexLock {
- public:
- explicit GTestMutexLock(Mutex* mutex)
- : mutex_(mutex) { mutex_->Lock(); }
-
- ~GTestMutexLock() { mutex_->Unlock(); }
-
- private:
- Mutex* const mutex_;
-
- GTEST_DISALLOW_COPY_AND_ASSIGN_(GTestMutexLock);
-};
-
-typedef GTestMutexLock MutexLock;
-
-// Base class for ValueHolder<T>. Allows a caller to hold and delete a value
-// without knowing its type.
-class ThreadLocalValueHolderBase {
- public:
- virtual ~ThreadLocalValueHolderBase() {}
-};
-
-// Provides a way for a thread to send notifications to a ThreadLocal
-// regardless of its parameter type.
-class ThreadLocalBase {
- public:
- // Creates a new ValueHolder<T> object holding a default value passed to
- // this ThreadLocal<T>'s constructor and returns it. It is the caller's
- // responsibility not to call this when the ThreadLocal<T> instance already
- // has a value on the current thread.
- virtual ThreadLocalValueHolderBase* NewValueForCurrentThread() const = 0;
-
- protected:
- ThreadLocalBase() {}
- virtual ~ThreadLocalBase() {}
-
- private:
- GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadLocalBase);
-};
-
-// Maps a thread to a set of ThreadLocals that have values instantiated on that
-// thread and notifies them when the thread exits. A ThreadLocal instance is
-// expected to persist until all threads it has values on have terminated.
-class GTEST_API_ ThreadLocalRegistry {
- public:
- // Registers thread_local_instance as having value on the current thread.
- // Returns a value that can be used to identify the thread from other threads.
- static ThreadLocalValueHolderBase* GetValueOnCurrentThread(
- const ThreadLocalBase* thread_local_instance);
-
- // Invoked when a ThreadLocal instance is destroyed.
- static void OnThreadLocalDestroyed(
- const ThreadLocalBase* thread_local_instance);
-};
-
-class GTEST_API_ ThreadWithParamBase {
- public:
- void Join();
-
- protected:
- class Runnable {
- public:
- virtual ~Runnable() {}
- virtual void Run() = 0;
- };
-
- ThreadWithParamBase(Runnable *runnable, Notification* thread_can_start);
- virtual ~ThreadWithParamBase();
-
- private:
- AutoHandle thread_;
-};
-
-// Helper class for testing Google Test's multi-threading constructs.
-template <typename T>
-class ThreadWithParam : public ThreadWithParamBase {
- public:
- typedef void UserThreadFunc(T);
-
- ThreadWithParam(UserThreadFunc* func, T param, Notification* thread_can_start)
- : ThreadWithParamBase(new RunnableImpl(func, param), thread_can_start) {
- }
- virtual ~ThreadWithParam() {}
-
- private:
- class RunnableImpl : public Runnable {
- public:
- RunnableImpl(UserThreadFunc* func, T param)
- : func_(func),
- param_(param) {
- }
- virtual ~RunnableImpl() {}
- virtual void Run() {
- func_(param_);
- }
-
- private:
- UserThreadFunc* const func_;
- const T param_;
-
- GTEST_DISALLOW_COPY_AND_ASSIGN_(RunnableImpl);
- };
-
- GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadWithParam);
-};
-
-// Implements thread-local storage on Windows systems.
-//
-// // Thread 1
-// ThreadLocal<int> tl(100); // 100 is the default value for each thread.
-//
-// // Thread 2
-// tl.set(150); // Changes the value for thread 2 only.
-// EXPECT_EQ(150, tl.get());
-//
-// // Thread 1
-// EXPECT_EQ(100, tl.get()); // In thread 1, tl has the original value.
-// tl.set(200);
-// EXPECT_EQ(200, tl.get());
-//
-// The template type argument T must have a public copy constructor.
-// In addition, the default ThreadLocal constructor requires T to have
-// a public default constructor.
-//
-// The users of a TheadLocal instance have to make sure that all but one
-// threads (including the main one) using that instance have exited before
-// destroying it. Otherwise, the per-thread objects managed for them by the
-// ThreadLocal instance are not guaranteed to be destroyed on all platforms.
-//
-// Google Test only uses global ThreadLocal objects. That means they
-// will die after main() has returned. Therefore, no per-thread
-// object managed by Google Test will be leaked as long as all threads
-// using Google Test have exited when main() returns.
-template <typename T>
-class ThreadLocal : public ThreadLocalBase {
- public:
- ThreadLocal() : default_factory_(new DefaultValueHolderFactory()) {}
- explicit ThreadLocal(const T& value)
- : default_factory_(new InstanceValueHolderFactory(value)) {}
-
- ~ThreadLocal() { ThreadLocalRegistry::OnThreadLocalDestroyed(this); }
-
- T* pointer() { return GetOrCreateValue(); }
- const T* pointer() const { return GetOrCreateValue(); }
- const T& get() const { return *pointer(); }
- void set(const T& value) { *pointer() = value; }
-
- private:
- // Holds a value of T. Can be deleted via its base class without the caller
- // knowing the type of T.
- class ValueHolder : public ThreadLocalValueHolderBase {
- public:
- ValueHolder() : value_() {}
- explicit ValueHolder(const T& value) : value_(value) {}
-
- T* pointer() { return &value_; }
-
- private:
- T value_;
- GTEST_DISALLOW_COPY_AND_ASSIGN_(ValueHolder);
- };
-
-
- T* GetOrCreateValue() const {
- return static_cast<ValueHolder*>(
- ThreadLocalRegistry::GetValueOnCurrentThread(this))->pointer();
- }
-
- virtual ThreadLocalValueHolderBase* NewValueForCurrentThread() const {
- return default_factory_->MakeNewHolder();
- }
-
- class ValueHolderFactory {
- public:
- ValueHolderFactory() {}
- virtual ~ValueHolderFactory() {}
- virtual ValueHolder* MakeNewHolder() const = 0;
-
- private:
- GTEST_DISALLOW_COPY_AND_ASSIGN_(ValueHolderFactory);
- };
-
- class DefaultValueHolderFactory : public ValueHolderFactory {
- public:
- DefaultValueHolderFactory() {}
- virtual ValueHolder* MakeNewHolder() const { return new ValueHolder(); }
-
- private:
- GTEST_DISALLOW_COPY_AND_ASSIGN_(DefaultValueHolderFactory);
- };
-
- class InstanceValueHolderFactory : public ValueHolderFactory {
- public:
- explicit InstanceValueHolderFactory(const T& value) : value_(value) {}
- virtual ValueHolder* MakeNewHolder() const {
- return new ValueHolder(value_);
- }
-
- private:
- const T value_; // The value for each thread.
-
- GTEST_DISALLOW_COPY_AND_ASSIGN_(InstanceValueHolderFactory);
- };
-
- scoped_ptr<ValueHolderFactory> default_factory_;
-
- GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadLocal);
-};
-
-# elif GTEST_HAS_PTHREAD
-
-// MutexBase and Mutex implement mutex on pthreads-based platforms.
-class MutexBase {
- public:
- // Acquires this mutex.
- void Lock() {
- GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_lock(&mutex_));
- owner_ = pthread_self();
- has_owner_ = true;
- }
-
- // Releases this mutex.
- void Unlock() {
- // Since the lock is being released the owner_ field should no longer be
- // considered valid. We don't protect writing to has_owner_ here, as it's
- // the caller's responsibility to ensure that the current thread holds the
- // mutex when this is called.
- has_owner_ = false;
- GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_unlock(&mutex_));
- }
-
- // Does nothing if the current thread holds the mutex. Otherwise, crashes
- // with high probability.
- void AssertHeld() const {
- GTEST_CHECK_(has_owner_ && pthread_equal(owner_, pthread_self()))
- << "The current thread is not holding the mutex @" << this;
- }
-
- // A static mutex may be used before main() is entered. It may even
- // be used before the dynamic initialization stage. Therefore we
- // must be able to initialize a static mutex object at link time.
- // This means MutexBase has to be a POD and its member variables
- // have to be public.
- public:
- pthread_mutex_t mutex_; // The underlying pthread mutex.
- // has_owner_ indicates whether the owner_ field below contains a valid thread
- // ID and is therefore safe to inspect (e.g., to use in pthread_equal()). All
- // accesses to the owner_ field should be protected by a check of this field.
- // An alternative might be to memset() owner_ to all zeros, but there's no
- // guarantee that a zero'd pthread_t is necessarily invalid or even different
- // from pthread_self().
- bool has_owner_;
- pthread_t owner_; // The thread holding the mutex.
-};
-
-// Forward-declares a static mutex.
-# define GTEST_DECLARE_STATIC_MUTEX_(mutex) \
- extern ::testing::internal::MutexBase mutex
-
-// Defines and statically (i.e. at link time) initializes a static mutex.
-# define GTEST_DEFINE_STATIC_MUTEX_(mutex) \
- ::testing::internal::MutexBase mutex = { PTHREAD_MUTEX_INITIALIZER, false, pthread_t() }
-
-// The Mutex class can only be used for mutexes created at runtime. It
-// shares its API with MutexBase otherwise.
-class Mutex : public MutexBase {
- public:
- Mutex() {
- GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_init(&mutex_, NULL));
- has_owner_ = false;
- }
- ~Mutex() {
- GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_destroy(&mutex_));
- }
-
- private:
- GTEST_DISALLOW_COPY_AND_ASSIGN_(Mutex);
-};
-
-// We cannot name this class MutexLock because the ctor declaration would
-// conflict with a macro named MutexLock, which is defined on some
-// platforms. That macro is used as a defensive measure to prevent against
-// inadvertent misuses of MutexLock like "MutexLock(&mu)" rather than
-// "MutexLock l(&mu)". Hence the typedef trick below.
-class GTestMutexLock {
- public:
- explicit GTestMutexLock(MutexBase* mutex)
- : mutex_(mutex) { mutex_->Lock(); }
-
- ~GTestMutexLock() { mutex_->Unlock(); }
-
- private:
- MutexBase* const mutex_;
-
- GTEST_DISALLOW_COPY_AND_ASSIGN_(GTestMutexLock);
-};
-
-typedef GTestMutexLock MutexLock;
-
-// Helpers for ThreadLocal.
-
-// pthread_key_create() requires DeleteThreadLocalValue() to have
-// C-linkage. Therefore it cannot be templatized to access
-// ThreadLocal<T>. Hence the need for class
-// ThreadLocalValueHolderBase.
-class ThreadLocalValueHolderBase {
- public:
- virtual ~ThreadLocalValueHolderBase() {}
-};
-
-// Called by pthread to delete thread-local data stored by
-// pthread_setspecific().
-extern "C" inline void DeleteThreadLocalValue(void* value_holder) {
- delete static_cast<ThreadLocalValueHolderBase*>(value_holder);
-}
-
-// Implements thread-local storage on pthreads-based systems.
-template <typename T>
-class ThreadLocal {
- public:
- ThreadLocal()
- : key_(CreateKey()), default_factory_(new DefaultValueHolderFactory()) {}
- explicit ThreadLocal(const T& value)
- : key_(CreateKey()),
- default_factory_(new InstanceValueHolderFactory(value)) {}
-
- ~ThreadLocal() {
- // Destroys the managed object for the current thread, if any.
- DeleteThreadLocalValue(pthread_getspecific(key_));
-
- // Releases resources associated with the key. This will *not*
- // delete managed objects for other threads.
- GTEST_CHECK_POSIX_SUCCESS_(pthread_key_delete(key_));
- }
-
- T* pointer() { return GetOrCreateValue(); }
- const T* pointer() const { return GetOrCreateValue(); }
- const T& get() const { return *pointer(); }
- void set(const T& value) { *pointer() = value; }
-
- private:
- // Holds a value of type T.
- class ValueHolder : public ThreadLocalValueHolderBase {
- public:
- ValueHolder() : value_() {}
- explicit ValueHolder(const T& value) : value_(value) {}
-
- T* pointer() { return &value_; }
-
- private:
- T value_;
- GTEST_DISALLOW_COPY_AND_ASSIGN_(ValueHolder);
- };
-
- static pthread_key_t CreateKey() {
- pthread_key_t key;
- // When a thread exits, DeleteThreadLocalValue() will be called on
- // the object managed for that thread.
- GTEST_CHECK_POSIX_SUCCESS_(
- pthread_key_create(&key, &DeleteThreadLocalValue));
- return key;
- }
-
- T* GetOrCreateValue() const {
- ThreadLocalValueHolderBase* const holder =
- static_cast<ThreadLocalValueHolderBase*>(pthread_getspecific(key_));
- if (holder != NULL) {
- return CheckedDowncastToActualType<ValueHolder>(holder)->pointer();
- }
-
- ValueHolder* const new_holder = default_factory_->MakeNewHolder();
- ThreadLocalValueHolderBase* const holder_base = new_holder;
- GTEST_CHECK_POSIX_SUCCESS_(pthread_setspecific(key_, holder_base));
- return new_holder->pointer();
- }
-
- class ValueHolderFactory {
- public:
- ValueHolderFactory() {}
- virtual ~ValueHolderFactory() {}
- virtual ValueHolder* MakeNewHolder() const = 0;
-
- private:
- GTEST_DISALLOW_COPY_AND_ASSIGN_(ValueHolderFactory);
- };
-
- class DefaultValueHolderFactory : public ValueHolderFactory {
- public:
- DefaultValueHolderFactory() {}
- virtual ValueHolder* MakeNewHolder() const { return new ValueHolder(); }
-
- private:
- GTEST_DISALLOW_COPY_AND_ASSIGN_(DefaultValueHolderFactory);
- };
-
- class InstanceValueHolderFactory : public ValueHolderFactory {
- public:
- explicit InstanceValueHolderFactory(const T& value) : value_(value) {}
- virtual ValueHolder* MakeNewHolder() const {
- return new ValueHolder(value_);
- }
-
- private:
- const T value_; // The value for each thread.
-
- GTEST_DISALLOW_COPY_AND_ASSIGN_(InstanceValueHolderFactory);
- };
-
- // A key pthreads uses for looking up per-thread values.
- const pthread_key_t key_;
- scoped_ptr<ValueHolderFactory> default_factory_;
-
- GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadLocal);
-};
-
-# endif // GTEST_HAS_MUTEX_AND_THREAD_LOCAL_
-
-#else // GTEST_IS_THREADSAFE
-
-// A dummy implementation of synchronization primitives (mutex, lock,
-// and thread-local variable). Necessary for compiling Google Test where
-// mutex is not supported - using Google Test in multiple threads is not
-// supported on such platforms.
-
-class Mutex {
- public:
- Mutex() {}
- void Lock() {}
- void Unlock() {}
- void AssertHeld() const {}
-};
-
-# define GTEST_DECLARE_STATIC_MUTEX_(mutex) \
- extern ::testing::internal::Mutex mutex
-
-# define GTEST_DEFINE_STATIC_MUTEX_(mutex) ::testing::internal::Mutex mutex
-
-// We cannot name this class MutexLock because the ctor declaration would
-// conflict with a macro named MutexLock, which is defined on some
-// platforms. That macro is used as a defensive measure to prevent against
-// inadvertent misuses of MutexLock like "MutexLock(&mu)" rather than
-// "MutexLock l(&mu)". Hence the typedef trick below.
-class GTestMutexLock {
- public:
- explicit GTestMutexLock(Mutex*) {} // NOLINT
-};
-
-typedef GTestMutexLock MutexLock;
-
-template <typename T>
-class ThreadLocal {
- public:
- ThreadLocal() : value_() {}
- explicit ThreadLocal(const T& value) : value_(value) {}
- T* pointer() { return &value_; }
- const T* pointer() const { return &value_; }
- const T& get() const { return value_; }
- void set(const T& value) { value_ = value; }
- private:
- T value_;
-};
-
-#endif // GTEST_IS_THREADSAFE
-
-// Returns the number of threads running in the process, or 0 to indicate that
-// we cannot detect it.
-GTEST_API_ size_t GetThreadCount();
-
-// Passing non-POD classes through ellipsis (...) crashes the ARM
-// compiler and generates a warning in Sun Studio. The Nokia Symbian
-// and the IBM XL C/C++ compiler try to instantiate a copy constructor
-// for objects passed through ellipsis (...), failing for uncopyable
-// objects. We define this to ensure that only POD is passed through
-// ellipsis on these systems.
-#if defined(__SYMBIAN32__) || defined(__IBMCPP__) || defined(__SUNPRO_CC)
-// We lose support for NULL detection where the compiler doesn't like
-// passing non-POD classes through ellipsis (...).
-# define GTEST_ELLIPSIS_NEEDS_POD_ 1
-#else
-# define GTEST_CAN_COMPARE_NULL 1
-#endif
-
-// The Nokia Symbian and IBM XL C/C++ compilers cannot decide between
-// const T& and const T* in a function template. These compilers
-// _can_ decide between class template specializations for T and T*,
-// so a tr1::type_traits-like is_pointer works.
-#if defined(__SYMBIAN32__) || defined(__IBMCPP__)
-# define GTEST_NEEDS_IS_POINTER_ 1
-#endif
-
-template <bool bool_value>
-struct bool_constant {
- typedef bool_constant<bool_value> type;
- static const bool value = bool_value;
-};
-template <bool bool_value> const bool bool_constant<bool_value>::value;
-
-typedef bool_constant<false> false_type;
-typedef bool_constant<true> true_type;
-
-template <typename T>
-struct is_pointer : public false_type {};
-
-template <typename T>
-struct is_pointer<T*> : public true_type {};
-
-template <typename Iterator>
-struct IteratorTraits {
- typedef typename Iterator::value_type value_type;
-};
-
-template <typename T>
-struct IteratorTraits<T*> {
- typedef T value_type;
-};
-
-template <typename T>
-struct IteratorTraits<const T*> {
- typedef T value_type;
-};
-
-#if GTEST_OS_WINDOWS
-# define GTEST_PATH_SEP_ "\\"
-# define GTEST_HAS_ALT_PATH_SEP_ 1
-// The biggest signed integer type the compiler supports.
-typedef __int64 BiggestInt;
-#else
-# define GTEST_PATH_SEP_ "/"
-# define GTEST_HAS_ALT_PATH_SEP_ 0
-typedef long long BiggestInt; // NOLINT
-#endif // GTEST_OS_WINDOWS
-
-// Utilities for char.
-
-// isspace(int ch) and friends accept an unsigned char or EOF. char
-// may be signed, depending on the compiler (or compiler flags).
-// Therefore we need to cast a char to unsigned char before calling
-// isspace(), etc.
-
-inline bool IsAlpha(char ch) {
- return isalpha(static_cast<unsigned char>(ch)) != 0;
-}
-inline bool IsAlNum(char ch) {
- return isalnum(static_cast<unsigned char>(ch)) != 0;
-}
-inline bool IsDigit(char ch) {
- return isdigit(static_cast<unsigned char>(ch)) != 0;
-}
-inline bool IsLower(char ch) {
- return islower(static_cast<unsigned char>(ch)) != 0;
-}
-inline bool IsSpace(char ch) {
- return isspace(static_cast<unsigned char>(ch)) != 0;
-}
-inline bool IsUpper(char ch) {
- return isupper(static_cast<unsigned char>(ch)) != 0;
-}
-inline bool IsXDigit(char ch) {
- return isxdigit(static_cast<unsigned char>(ch)) != 0;
-}
-inline bool IsXDigit(wchar_t ch) {
- const unsigned char low_byte = static_cast<unsigned char>(ch);
- return ch == low_byte && isxdigit(low_byte) != 0;
-}
-
-inline char ToLower(char ch) {
- return static_cast<char>(tolower(static_cast<unsigned char>(ch)));
-}
-inline char ToUpper(char ch) {
- return static_cast<char>(toupper(static_cast<unsigned char>(ch)));
-}
-
-inline std::string StripTrailingSpaces(std::string str) {
- std::string::iterator it = str.end();
- while (it != str.begin() && IsSpace(*--it))
- it = str.erase(it);
- return str;
-}
-
-// The testing::internal::posix namespace holds wrappers for common
-// POSIX functions. These wrappers hide the differences between
-// Windows/MSVC and POSIX systems. Since some compilers define these
-// standard functions as macros, the wrapper cannot have the same name
-// as the wrapped function.
-
-namespace posix {
-
-// Functions with a different name on Windows.
-
-#if GTEST_OS_WINDOWS
-
-typedef struct _stat StatStruct;
-
-# ifdef __BORLANDC__
-inline int IsATTY(int fd) { return isatty(fd); }
-inline int StrCaseCmp(const char* s1, const char* s2) {
- return stricmp(s1, s2);
-}
-inline char* StrDup(const char* src) { return strdup(src); }
-# else // !__BORLANDC__
-# if GTEST_OS_WINDOWS_MOBILE
-inline int IsATTY(int /* fd */) { return 0; }
-# else
-inline int IsATTY(int fd) { return _isatty(fd); }
-# endif // GTEST_OS_WINDOWS_MOBILE
-inline int StrCaseCmp(const char* s1, const char* s2) {
- return _stricmp(s1, s2);
-}
-inline char* StrDup(const char* src) { return _strdup(src); }
-# endif // __BORLANDC__
-
-# if GTEST_OS_WINDOWS_MOBILE
-inline int FileNo(FILE* file) { return reinterpret_cast<int>(_fileno(file)); }
-// Stat(), RmDir(), and IsDir() are not needed on Windows CE at this
-// time and thus not defined there.
-# else
-inline int FileNo(FILE* file) { return _fileno(file); }
-inline int Stat(const char* path, StatStruct* buf) { return _stat(path, buf); }
-inline int RmDir(const char* dir) { return _rmdir(dir); }
-inline bool IsDir(const StatStruct& st) {
- return (_S_IFDIR & st.st_mode) != 0;
-}
-# endif // GTEST_OS_WINDOWS_MOBILE
-
-#else
-
-typedef struct stat StatStruct;
-
-inline int FileNo(FILE* file) { return fileno(file); }
-inline int IsATTY(int fd) { return isatty(fd); }
-inline int Stat(const char* path, StatStruct* buf) { return stat(path, buf); }
-inline int StrCaseCmp(const char* s1, const char* s2) {
- return strcasecmp(s1, s2);
-}
-inline char* StrDup(const char* src) { return strdup(src); }
-inline int RmDir(const char* dir) { return rmdir(dir); }
-inline bool IsDir(const StatStruct& st) { return S_ISDIR(st.st_mode); }
-
-#endif // GTEST_OS_WINDOWS
-
-// Functions deprecated by MSVC 8.0.
-
-GTEST_DISABLE_MSC_WARNINGS_PUSH_(4996 /* deprecated function */)
-
-inline const char* StrNCpy(char* dest, const char* src, size_t n) {
- return strncpy(dest, src, n);
-}
-
-// ChDir(), FReopen(), FDOpen(), Read(), Write(), Close(), and
-// StrError() aren't needed on Windows CE at this time and thus not
-// defined there.
-
-#if !GTEST_OS_WINDOWS_MOBILE && !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT
-inline int ChDir(const char* dir) { return chdir(dir); }
-#endif
-inline FILE* FOpen(const char* path, const char* mode) {
- return fopen(path, mode);
-}
-#if !GTEST_OS_WINDOWS_MOBILE
-inline FILE *FReopen(const char* path, const char* mode, FILE* stream) {
- return freopen(path, mode, stream);
-}
-inline FILE* FDOpen(int fd, const char* mode) { return fdopen(fd, mode); }
-#endif
-inline int FClose(FILE* fp) { return fclose(fp); }
-#if !GTEST_OS_WINDOWS_MOBILE
-inline int Read(int fd, void* buf, unsigned int count) {
- return static_cast<int>(read(fd, buf, count));
-}
-inline int Write(int fd, const void* buf, unsigned int count) {
- return static_cast<int>(write(fd, buf, count));
-}
-inline int Close(int fd) { return close(fd); }
-inline const char* StrError(int errnum) { return strerror(errnum); }
-#endif
-inline const char* GetEnv(const char* name) {
-#if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_WINDOWS_PHONE | GTEST_OS_WINDOWS_RT
- // We are on Windows CE, which has no environment variables.
- static_cast<void>(name); // To prevent 'unused argument' warning.
- return NULL;
-#elif defined(__BORLANDC__) || defined(__SunOS_5_8) || defined(__SunOS_5_9)
- // Environment variables which we programmatically clear will be set to the
- // empty string rather than unset (NULL). Handle that case.
- const char* const env = getenv(name);
- return (env != NULL && env[0] != '\0') ? env : NULL;
-#else
- return getenv(name);
-#endif
-}
-
-GTEST_DISABLE_MSC_WARNINGS_POP_()
-
-#if GTEST_OS_WINDOWS_MOBILE
-// Windows CE has no C library. The abort() function is used in
-// several places in Google Test. This implementation provides a reasonable
-// imitation of standard behaviour.
-void Abort();
-#else
-inline void Abort() { abort(); }
-#endif // GTEST_OS_WINDOWS_MOBILE
-
-} // namespace posix
-
-// MSVC "deprecates" snprintf and issues warnings wherever it is used. In
-// order to avoid these warnings, we need to use _snprintf or _snprintf_s on
-// MSVC-based platforms. We map the GTEST_SNPRINTF_ macro to the appropriate
-// function in order to achieve that. We use macro definition here because
-// snprintf is a variadic function.
-#if _MSC_VER >= 1400 && !GTEST_OS_WINDOWS_MOBILE
-// MSVC 2005 and above support variadic macros.
-# define GTEST_SNPRINTF_(buffer, size, format, ...) \
- _snprintf_s(buffer, size, size, format, __VA_ARGS__)
-#elif defined(_MSC_VER)
-// Windows CE does not define _snprintf_s and MSVC prior to 2005 doesn't
-// complain about _snprintf.
-# define GTEST_SNPRINTF_ _snprintf
-#else
-# define GTEST_SNPRINTF_ snprintf
-#endif
-
-// The maximum number a BiggestInt can represent. This definition
-// works no matter BiggestInt is represented in one's complement or
-// two's complement.
-//
-// We cannot rely on numeric_limits in STL, as __int64 and long long
-// are not part of standard C++ and numeric_limits doesn't need to be
-// defined for them.
-const BiggestInt kMaxBiggestInt =
- ~(static_cast<BiggestInt>(1) << (8*sizeof(BiggestInt) - 1));
-
-// This template class serves as a compile-time function from size to
-// type. It maps a size in bytes to a primitive type with that
-// size. e.g.
-//
-// TypeWithSize<4>::UInt
-//
-// is typedef-ed to be unsigned int (unsigned integer made up of 4
-// bytes).
-//
-// Such functionality should belong to STL, but I cannot find it
-// there.
-//
-// Google Test uses this class in the implementation of floating-point
-// comparison.
-//
-// For now it only handles UInt (unsigned int) as that's all Google Test
-// needs. Other types can be easily added in the future if need
-// arises.
-template <size_t size>
-class TypeWithSize {
- public:
- // This prevents the user from using TypeWithSize<N> with incorrect
- // values of N.
- typedef void UInt;
-};
-
-// The specialization for size 4.
-template <>
-class TypeWithSize<4> {
- public:
- // unsigned int has size 4 in both gcc and MSVC.
- //
- // As base/basictypes.h doesn't compile on Windows, we cannot use
- // uint32, uint64, and etc here.
- typedef int Int;
- typedef unsigned int UInt;
-};
-
-// The specialization for size 8.
-template <>
-class TypeWithSize<8> {
- public:
-#if GTEST_OS_WINDOWS
- typedef __int64 Int;
- typedef unsigned __int64 UInt;
-#else
- typedef long long Int; // NOLINT
- typedef unsigned long long UInt; // NOLINT
-#endif // GTEST_OS_WINDOWS
-};
-
-// Integer types of known sizes.
-typedef TypeWithSize<4>::Int Int32;
-typedef TypeWithSize<4>::UInt UInt32;
-typedef TypeWithSize<8>::Int Int64;
-typedef TypeWithSize<8>::UInt UInt64;
-typedef TypeWithSize<8>::Int TimeInMillis; // Represents time in milliseconds.
-
-// Utilities for command line flags and environment variables.
-
-// Macro for referencing flags.
-#if !defined(GTEST_FLAG)
-# define GTEST_FLAG(name) FLAGS_gtest_##name
-#endif // !defined(GTEST_FLAG)
-
-#if !defined(GTEST_USE_OWN_FLAGFILE_FLAG_)
-# define GTEST_USE_OWN_FLAGFILE_FLAG_ 1
-#endif // !defined(GTEST_USE_OWN_FLAGFILE_FLAG_)
-
-#if !defined(GTEST_DECLARE_bool_)
-# define GTEST_FLAG_SAVER_ ::testing::internal::GTestFlagSaver
-
-// Macros for declaring flags.
-# define GTEST_DECLARE_bool_(name) GTEST_API_ extern bool GTEST_FLAG(name)
-# define GTEST_DECLARE_int32_(name) \
- GTEST_API_ extern ::testing::internal::Int32 GTEST_FLAG(name)
-#define GTEST_DECLARE_string_(name) \
- GTEST_API_ extern ::std::string GTEST_FLAG(name)
-
-// Macros for defining flags.
-#define GTEST_DEFINE_bool_(name, default_val, doc) \
- GTEST_API_ bool GTEST_FLAG(name) = (default_val)
-#define GTEST_DEFINE_int32_(name, default_val, doc) \
- GTEST_API_ ::testing::internal::Int32 GTEST_FLAG(name) = (default_val)
-#define GTEST_DEFINE_string_(name, default_val, doc) \
- GTEST_API_ ::std::string GTEST_FLAG(name) = (default_val)
-
-#endif // !defined(GTEST_DECLARE_bool_)
-
-// Thread annotations
-#if !defined(GTEST_EXCLUSIVE_LOCK_REQUIRED_)
-# define GTEST_EXCLUSIVE_LOCK_REQUIRED_(locks)
-# define GTEST_LOCK_EXCLUDED_(locks)
-#endif // !defined(GTEST_EXCLUSIVE_LOCK_REQUIRED_)
-
-// Parses 'str' for a 32-bit signed integer. If successful, writes the result
-// to *value and returns true; otherwise leaves *value unchanged and returns
-// false.
-// TODO(chandlerc): Find a better way to refactor flag and environment parsing
-// out of both gtest-port.cc and gtest.cc to avoid exporting this utility
-// function.
-bool ParseInt32(const Message& src_text, const char* str, Int32* value);
-
-// Parses a bool/Int32/string from the environment variable
-// corresponding to the given Google Test flag.
-bool BoolFromGTestEnv(const char* flag, bool default_val);
-GTEST_API_ Int32 Int32FromGTestEnv(const char* flag, Int32 default_val);
-std::string StringFromGTestEnv(const char* flag, const char* default_val);
-
-} // namespace internal
-} // namespace testing
-
-#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_
diff --git a/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-string.h b/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-string.h
deleted file mode 100644
index 97f1a7fdd..000000000
--- a/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-string.h
+++ /dev/null
@@ -1,167 +0,0 @@
-// Copyright 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Authors: wan@google.com (Zhanyong Wan), eefacm@gmail.com (Sean Mcafee)
-//
-// The Google C++ Testing Framework (Google Test)
-//
-// This header file declares the String class and functions used internally by
-// Google Test. They are subject to change without notice. They should not used
-// by code external to Google Test.
-//
-// This header file is #included by <gtest/internal/gtest-internal.h>.
-// It should not be #included by other files.
-
-#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_
-#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_
-
-#ifdef __BORLANDC__
-// string.h is not guaranteed to provide strcpy on C++ Builder.
-# include <mem.h>
-#endif
-
-#include <string.h>
-#include <string>
-
-#include "gtest/internal/gtest-port.h"
-
-namespace testing {
-namespace internal {
-
-// String - an abstract class holding static string utilities.
-class GTEST_API_ String {
- public:
- // Static utility methods
-
- // Clones a 0-terminated C string, allocating memory using new. The
- // caller is responsible for deleting the return value using
- // delete[]. Returns the cloned string, or NULL if the input is
- // NULL.
- //
- // This is different from strdup() in string.h, which allocates
- // memory using malloc().
- static const char* CloneCString(const char* c_str);
-
-#if GTEST_OS_WINDOWS_MOBILE
- // Windows CE does not have the 'ANSI' versions of Win32 APIs. To be
- // able to pass strings to Win32 APIs on CE we need to convert them
- // to 'Unicode', UTF-16.
-
- // Creates a UTF-16 wide string from the given ANSI string, allocating
- // memory using new. The caller is responsible for deleting the return
- // value using delete[]. Returns the wide string, or NULL if the
- // input is NULL.
- //
- // The wide string is created using the ANSI codepage (CP_ACP) to
- // match the behaviour of the ANSI versions of Win32 calls and the
- // C runtime.
- static LPCWSTR AnsiToUtf16(const char* c_str);
-
- // Creates an ANSI string from the given wide string, allocating
- // memory using new. The caller is responsible for deleting the return
- // value using delete[]. Returns the ANSI string, or NULL if the
- // input is NULL.
- //
- // The returned string is created using the ANSI codepage (CP_ACP) to
- // match the behaviour of the ANSI versions of Win32 calls and the
- // C runtime.
- static const char* Utf16ToAnsi(LPCWSTR utf16_str);
-#endif
-
- // Compares two C strings. Returns true iff they have the same content.
- //
- // Unlike strcmp(), this function can handle NULL argument(s). A
- // NULL C string is considered different to any non-NULL C string,
- // including the empty string.
- static bool CStringEquals(const char* lhs, const char* rhs);
-
- // Converts a wide C string to a String using the UTF-8 encoding.
- // NULL will be converted to "(null)". If an error occurred during
- // the conversion, "(failed to convert from wide string)" is
- // returned.
- static std::string ShowWideCString(const wchar_t* wide_c_str);
-
- // Compares two wide C strings. Returns true iff they have the same
- // content.
- //
- // Unlike wcscmp(), this function can handle NULL argument(s). A
- // NULL C string is considered different to any non-NULL C string,
- // including the empty string.
- static bool WideCStringEquals(const wchar_t* lhs, const wchar_t* rhs);
-
- // Compares two C strings, ignoring case. Returns true iff they
- // have the same content.
- //
- // Unlike strcasecmp(), this function can handle NULL argument(s).
- // A NULL C string is considered different to any non-NULL C string,
- // including the empty string.
- static bool CaseInsensitiveCStringEquals(const char* lhs,
- const char* rhs);
-
- // Compares two wide C strings, ignoring case. Returns true iff they
- // have the same content.
- //
- // Unlike wcscasecmp(), this function can handle NULL argument(s).
- // A NULL C string is considered different to any non-NULL wide C string,
- // including the empty string.
- // NB: The implementations on different platforms slightly differ.
- // On windows, this method uses _wcsicmp which compares according to LC_CTYPE
- // environment variable. On GNU platform this method uses wcscasecmp
- // which compares according to LC_CTYPE category of the current locale.
- // On MacOS X, it uses towlower, which also uses LC_CTYPE category of the
- // current locale.
- static bool CaseInsensitiveWideCStringEquals(const wchar_t* lhs,
- const wchar_t* rhs);
-
- // Returns true iff the given string ends with the given suffix, ignoring
- // case. Any string is considered to end with an empty suffix.
- static bool EndsWithCaseInsensitive(
- const std::string& str, const std::string& suffix);
-
- // Formats an int value as "%02d".
- static std::string FormatIntWidth2(int value); // "%02d" for width == 2
-
- // Formats an int value as "%X".
- static std::string FormatHexInt(int value);
-
- // Formats a byte as "%02X".
- static std::string FormatByte(unsigned char value);
-
- private:
- String(); // Not meant to be instantiated.
-}; // class String
-
-// Gets the content of the stringstream's buffer as an std::string. Each '\0'
-// character in the buffer is replaced with "\\0".
-GTEST_API_ std::string StringStreamToString(::std::stringstream* stream);
-
-} // namespace internal
-} // namespace testing
-
-#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_
diff --git a/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-tuple.h b/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-tuple.h
deleted file mode 100644
index e9b405340..000000000
--- a/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-tuple.h
+++ /dev/null
@@ -1,1020 +0,0 @@
-// This file was GENERATED by command:
-// pump.py gtest-tuple.h.pump
-// DO NOT EDIT BY HAND!!!
-
-// Copyright 2009 Google Inc.
-// All Rights Reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Author: wan@google.com (Zhanyong Wan)
-
-// Implements a subset of TR1 tuple needed by Google Test and Google Mock.
-
-#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_
-#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_
-
-#include <utility> // For ::std::pair.
-
-// The compiler used in Symbian has a bug that prevents us from declaring the
-// tuple template as a friend (it complains that tuple is redefined). This
-// hack bypasses the bug by declaring the members that should otherwise be
-// private as public.
-// Sun Studio versions < 12 also have the above bug.
-#if defined(__SYMBIAN32__) || (defined(__SUNPRO_CC) && __SUNPRO_CC < 0x590)
-# define GTEST_DECLARE_TUPLE_AS_FRIEND_ public:
-#else
-# define GTEST_DECLARE_TUPLE_AS_FRIEND_ \
- template <GTEST_10_TYPENAMES_(U)> friend class tuple; \
- private:
-#endif
-
-// Visual Studio 2010, 2012, and 2013 define symbols in std::tr1 that conflict
-// with our own definitions. Therefore using our own tuple does not work on
-// those compilers.
-#if defined(_MSC_VER) && _MSC_VER >= 1600 /* 1600 is Visual Studio 2010 */
-# error "gtest's tuple doesn't compile on Visual Studio 2010 or later. \
-GTEST_USE_OWN_TR1_TUPLE must be set to 0 on those compilers."
-#endif
-
-// GTEST_n_TUPLE_(T) is the type of an n-tuple.
-#define GTEST_0_TUPLE_(T) tuple<>
-#define GTEST_1_TUPLE_(T) tuple<T##0, void, void, void, void, void, void, \
- void, void, void>
-#define GTEST_2_TUPLE_(T) tuple<T##0, T##1, void, void, void, void, void, \
- void, void, void>
-#define GTEST_3_TUPLE_(T) tuple<T##0, T##1, T##2, void, void, void, void, \
- void, void, void>
-#define GTEST_4_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, void, void, void, \
- void, void, void>
-#define GTEST_5_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, void, void, \
- void, void, void>
-#define GTEST_6_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, T##5, void, \
- void, void, void>
-#define GTEST_7_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, T##5, T##6, \
- void, void, void>
-#define GTEST_8_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, T##5, T##6, \
- T##7, void, void>
-#define GTEST_9_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, T##5, T##6, \
- T##7, T##8, void>
-#define GTEST_10_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, T##5, T##6, \
- T##7, T##8, T##9>
-
-// GTEST_n_TYPENAMES_(T) declares a list of n typenames.
-#define GTEST_0_TYPENAMES_(T)
-#define GTEST_1_TYPENAMES_(T) typename T##0
-#define GTEST_2_TYPENAMES_(T) typename T##0, typename T##1
-#define GTEST_3_TYPENAMES_(T) typename T##0, typename T##1, typename T##2
-#define GTEST_4_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
- typename T##3
-#define GTEST_5_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
- typename T##3, typename T##4
-#define GTEST_6_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
- typename T##3, typename T##4, typename T##5
-#define GTEST_7_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
- typename T##3, typename T##4, typename T##5, typename T##6
-#define GTEST_8_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
- typename T##3, typename T##4, typename T##5, typename T##6, typename T##7
-#define GTEST_9_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
- typename T##3, typename T##4, typename T##5, typename T##6, \
- typename T##7, typename T##8
-#define GTEST_10_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
- typename T##3, typename T##4, typename T##5, typename T##6, \
- typename T##7, typename T##8, typename T##9
-
-// In theory, defining stuff in the ::std namespace is undefined
-// behavior. We can do this as we are playing the role of a standard
-// library vendor.
-namespace std {
-namespace tr1 {
-
-template <typename T0 = void, typename T1 = void, typename T2 = void,
- typename T3 = void, typename T4 = void, typename T5 = void,
- typename T6 = void, typename T7 = void, typename T8 = void,
- typename T9 = void>
-class tuple;
-
-// Anything in namespace gtest_internal is Google Test's INTERNAL
-// IMPLEMENTATION DETAIL and MUST NOT BE USED DIRECTLY in user code.
-namespace gtest_internal {
-
-// ByRef<T>::type is T if T is a reference; otherwise it's const T&.
-template <typename T>
-struct ByRef { typedef const T& type; }; // NOLINT
-template <typename T>
-struct ByRef<T&> { typedef T& type; }; // NOLINT
-
-// A handy wrapper for ByRef.
-#define GTEST_BY_REF_(T) typename ::std::tr1::gtest_internal::ByRef<T>::type
-
-// AddRef<T>::type is T if T is a reference; otherwise it's T&. This
-// is the same as tr1::add_reference<T>::type.
-template <typename T>
-struct AddRef { typedef T& type; }; // NOLINT
-template <typename T>
-struct AddRef<T&> { typedef T& type; }; // NOLINT
-
-// A handy wrapper for AddRef.
-#define GTEST_ADD_REF_(T) typename ::std::tr1::gtest_internal::AddRef<T>::type
-
-// A helper for implementing get<k>().
-template <int k> class Get;
-
-// A helper for implementing tuple_element<k, T>. kIndexValid is true
-// iff k < the number of fields in tuple type T.
-template <bool kIndexValid, int kIndex, class Tuple>
-struct TupleElement;
-
-template <GTEST_10_TYPENAMES_(T)>
-struct TupleElement<true, 0, GTEST_10_TUPLE_(T) > {
- typedef T0 type;
-};
-
-template <GTEST_10_TYPENAMES_(T)>
-struct TupleElement<true, 1, GTEST_10_TUPLE_(T) > {
- typedef T1 type;
-};
-
-template <GTEST_10_TYPENAMES_(T)>
-struct TupleElement<true, 2, GTEST_10_TUPLE_(T) > {
- typedef T2 type;
-};
-
-template <GTEST_10_TYPENAMES_(T)>
-struct TupleElement<true, 3, GTEST_10_TUPLE_(T) > {
- typedef T3 type;
-};
-
-template <GTEST_10_TYPENAMES_(T)>
-struct TupleElement<true, 4, GTEST_10_TUPLE_(T) > {
- typedef T4 type;
-};
-
-template <GTEST_10_TYPENAMES_(T)>
-struct TupleElement<true, 5, GTEST_10_TUPLE_(T) > {
- typedef T5 type;
-};
-
-template <GTEST_10_TYPENAMES_(T)>
-struct TupleElement<true, 6, GTEST_10_TUPLE_(T) > {
- typedef T6 type;
-};
-
-template <GTEST_10_TYPENAMES_(T)>
-struct TupleElement<true, 7, GTEST_10_TUPLE_(T) > {
- typedef T7 type;
-};
-
-template <GTEST_10_TYPENAMES_(T)>
-struct TupleElement<true, 8, GTEST_10_TUPLE_(T) > {
- typedef T8 type;
-};
-
-template <GTEST_10_TYPENAMES_(T)>
-struct TupleElement<true, 9, GTEST_10_TUPLE_(T) > {
- typedef T9 type;
-};
-
-} // namespace gtest_internal
-
-template <>
-class tuple<> {
- public:
- tuple() {}
- tuple(const tuple& /* t */) {}
- tuple& operator=(const tuple& /* t */) { return *this; }
-};
-
-template <GTEST_1_TYPENAMES_(T)>
-class GTEST_1_TUPLE_(T) {
- public:
- template <int k> friend class gtest_internal::Get;
-
- tuple() : f0_() {}
-
- explicit tuple(GTEST_BY_REF_(T0) f0) : f0_(f0) {}
-
- tuple(const tuple& t) : f0_(t.f0_) {}
-
- template <GTEST_1_TYPENAMES_(U)>
- tuple(const GTEST_1_TUPLE_(U)& t) : f0_(t.f0_) {}
-
- tuple& operator=(const tuple& t) { return CopyFrom(t); }
-
- template <GTEST_1_TYPENAMES_(U)>
- tuple& operator=(const GTEST_1_TUPLE_(U)& t) {
- return CopyFrom(t);
- }
-
- GTEST_DECLARE_TUPLE_AS_FRIEND_
-
- template <GTEST_1_TYPENAMES_(U)>
- tuple& CopyFrom(const GTEST_1_TUPLE_(U)& t) {
- f0_ = t.f0_;
- return *this;
- }
-
- T0 f0_;
-};
-
-template <GTEST_2_TYPENAMES_(T)>
-class GTEST_2_TUPLE_(T) {
- public:
- template <int k> friend class gtest_internal::Get;
-
- tuple() : f0_(), f1_() {}
-
- explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1) : f0_(f0),
- f1_(f1) {}
-
- tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_) {}
-
- template <GTEST_2_TYPENAMES_(U)>
- tuple(const GTEST_2_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_) {}
- template <typename U0, typename U1>
- tuple(const ::std::pair<U0, U1>& p) : f0_(p.first), f1_(p.second) {}
-
- tuple& operator=(const tuple& t) { return CopyFrom(t); }
-
- template <GTEST_2_TYPENAMES_(U)>
- tuple& operator=(const GTEST_2_TUPLE_(U)& t) {
- return CopyFrom(t);
- }
- template <typename U0, typename U1>
- tuple& operator=(const ::std::pair<U0, U1>& p) {
- f0_ = p.first;
- f1_ = p.second;
- return *this;
- }
-
- GTEST_DECLARE_TUPLE_AS_FRIEND_
-
- template <GTEST_2_TYPENAMES_(U)>
- tuple& CopyFrom(const GTEST_2_TUPLE_(U)& t) {
- f0_ = t.f0_;
- f1_ = t.f1_;
- return *this;
- }
-
- T0 f0_;
- T1 f1_;
-};
-
-template <GTEST_3_TYPENAMES_(T)>
-class GTEST_3_TUPLE_(T) {
- public:
- template <int k> friend class gtest_internal::Get;
-
- tuple() : f0_(), f1_(), f2_() {}
-
- explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
- GTEST_BY_REF_(T2) f2) : f0_(f0), f1_(f1), f2_(f2) {}
-
- tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_) {}
-
- template <GTEST_3_TYPENAMES_(U)>
- tuple(const GTEST_3_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_) {}
-
- tuple& operator=(const tuple& t) { return CopyFrom(t); }
-
- template <GTEST_3_TYPENAMES_(U)>
- tuple& operator=(const GTEST_3_TUPLE_(U)& t) {
- return CopyFrom(t);
- }
-
- GTEST_DECLARE_TUPLE_AS_FRIEND_
-
- template <GTEST_3_TYPENAMES_(U)>
- tuple& CopyFrom(const GTEST_3_TUPLE_(U)& t) {
- f0_ = t.f0_;
- f1_ = t.f1_;
- f2_ = t.f2_;
- return *this;
- }
-
- T0 f0_;
- T1 f1_;
- T2 f2_;
-};
-
-template <GTEST_4_TYPENAMES_(T)>
-class GTEST_4_TUPLE_(T) {
- public:
- template <int k> friend class gtest_internal::Get;
-
- tuple() : f0_(), f1_(), f2_(), f3_() {}
-
- explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
- GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3) : f0_(f0), f1_(f1), f2_(f2),
- f3_(f3) {}
-
- tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_) {}
-
- template <GTEST_4_TYPENAMES_(U)>
- tuple(const GTEST_4_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
- f3_(t.f3_) {}
-
- tuple& operator=(const tuple& t) { return CopyFrom(t); }
-
- template <GTEST_4_TYPENAMES_(U)>
- tuple& operator=(const GTEST_4_TUPLE_(U)& t) {
- return CopyFrom(t);
- }
-
- GTEST_DECLARE_TUPLE_AS_FRIEND_
-
- template <GTEST_4_TYPENAMES_(U)>
- tuple& CopyFrom(const GTEST_4_TUPLE_(U)& t) {
- f0_ = t.f0_;
- f1_ = t.f1_;
- f2_ = t.f2_;
- f3_ = t.f3_;
- return *this;
- }
-
- T0 f0_;
- T1 f1_;
- T2 f2_;
- T3 f3_;
-};
-
-template <GTEST_5_TYPENAMES_(T)>
-class GTEST_5_TUPLE_(T) {
- public:
- template <int k> friend class gtest_internal::Get;
-
- tuple() : f0_(), f1_(), f2_(), f3_(), f4_() {}
-
- explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
- GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3,
- GTEST_BY_REF_(T4) f4) : f0_(f0), f1_(f1), f2_(f2), f3_(f3), f4_(f4) {}
-
- tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_),
- f4_(t.f4_) {}
-
- template <GTEST_5_TYPENAMES_(U)>
- tuple(const GTEST_5_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
- f3_(t.f3_), f4_(t.f4_) {}
-
- tuple& operator=(const tuple& t) { return CopyFrom(t); }
-
- template <GTEST_5_TYPENAMES_(U)>
- tuple& operator=(const GTEST_5_TUPLE_(U)& t) {
- return CopyFrom(t);
- }
-
- GTEST_DECLARE_TUPLE_AS_FRIEND_
-
- template <GTEST_5_TYPENAMES_(U)>
- tuple& CopyFrom(const GTEST_5_TUPLE_(U)& t) {
- f0_ = t.f0_;
- f1_ = t.f1_;
- f2_ = t.f2_;
- f3_ = t.f3_;
- f4_ = t.f4_;
- return *this;
- }
-
- T0 f0_;
- T1 f1_;
- T2 f2_;
- T3 f3_;
- T4 f4_;
-};
-
-template <GTEST_6_TYPENAMES_(T)>
-class GTEST_6_TUPLE_(T) {
- public:
- template <int k> friend class gtest_internal::Get;
-
- tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_() {}
-
- explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
- GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4,
- GTEST_BY_REF_(T5) f5) : f0_(f0), f1_(f1), f2_(f2), f3_(f3), f4_(f4),
- f5_(f5) {}
-
- tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_),
- f4_(t.f4_), f5_(t.f5_) {}
-
- template <GTEST_6_TYPENAMES_(U)>
- tuple(const GTEST_6_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
- f3_(t.f3_), f4_(t.f4_), f5_(t.f5_) {}
-
- tuple& operator=(const tuple& t) { return CopyFrom(t); }
-
- template <GTEST_6_TYPENAMES_(U)>
- tuple& operator=(const GTEST_6_TUPLE_(U)& t) {
- return CopyFrom(t);
- }
-
- GTEST_DECLARE_TUPLE_AS_FRIEND_
-
- template <GTEST_6_TYPENAMES_(U)>
- tuple& CopyFrom(const GTEST_6_TUPLE_(U)& t) {
- f0_ = t.f0_;
- f1_ = t.f1_;
- f2_ = t.f2_;
- f3_ = t.f3_;
- f4_ = t.f4_;
- f5_ = t.f5_;
- return *this;
- }
-
- T0 f0_;
- T1 f1_;
- T2 f2_;
- T3 f3_;
- T4 f4_;
- T5 f5_;
-};
-
-template <GTEST_7_TYPENAMES_(T)>
-class GTEST_7_TUPLE_(T) {
- public:
- template <int k> friend class gtest_internal::Get;
-
- tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_(), f6_() {}
-
- explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
- GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4,
- GTEST_BY_REF_(T5) f5, GTEST_BY_REF_(T6) f6) : f0_(f0), f1_(f1), f2_(f2),
- f3_(f3), f4_(f4), f5_(f5), f6_(f6) {}
-
- tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_),
- f4_(t.f4_), f5_(t.f5_), f6_(t.f6_) {}
-
- template <GTEST_7_TYPENAMES_(U)>
- tuple(const GTEST_7_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
- f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_) {}
-
- tuple& operator=(const tuple& t) { return CopyFrom(t); }
-
- template <GTEST_7_TYPENAMES_(U)>
- tuple& operator=(const GTEST_7_TUPLE_(U)& t) {
- return CopyFrom(t);
- }
-
- GTEST_DECLARE_TUPLE_AS_FRIEND_
-
- template <GTEST_7_TYPENAMES_(U)>
- tuple& CopyFrom(const GTEST_7_TUPLE_(U)& t) {
- f0_ = t.f0_;
- f1_ = t.f1_;
- f2_ = t.f2_;
- f3_ = t.f3_;
- f4_ = t.f4_;
- f5_ = t.f5_;
- f6_ = t.f6_;
- return *this;
- }
-
- T0 f0_;
- T1 f1_;
- T2 f2_;
- T3 f3_;
- T4 f4_;
- T5 f5_;
- T6 f6_;
-};
-
-template <GTEST_8_TYPENAMES_(T)>
-class GTEST_8_TUPLE_(T) {
- public:
- template <int k> friend class gtest_internal::Get;
-
- tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_(), f6_(), f7_() {}
-
- explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
- GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4,
- GTEST_BY_REF_(T5) f5, GTEST_BY_REF_(T6) f6,
- GTEST_BY_REF_(T7) f7) : f0_(f0), f1_(f1), f2_(f2), f3_(f3), f4_(f4),
- f5_(f5), f6_(f6), f7_(f7) {}
-
- tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_),
- f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_) {}
-
- template <GTEST_8_TYPENAMES_(U)>
- tuple(const GTEST_8_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
- f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_) {}
-
- tuple& operator=(const tuple& t) { return CopyFrom(t); }
-
- template <GTEST_8_TYPENAMES_(U)>
- tuple& operator=(const GTEST_8_TUPLE_(U)& t) {
- return CopyFrom(t);
- }
-
- GTEST_DECLARE_TUPLE_AS_FRIEND_
-
- template <GTEST_8_TYPENAMES_(U)>
- tuple& CopyFrom(const GTEST_8_TUPLE_(U)& t) {
- f0_ = t.f0_;
- f1_ = t.f1_;
- f2_ = t.f2_;
- f3_ = t.f3_;
- f4_ = t.f4_;
- f5_ = t.f5_;
- f6_ = t.f6_;
- f7_ = t.f7_;
- return *this;
- }
-
- T0 f0_;
- T1 f1_;
- T2 f2_;
- T3 f3_;
- T4 f4_;
- T5 f5_;
- T6 f6_;
- T7 f7_;
-};
-
-template <GTEST_9_TYPENAMES_(T)>
-class GTEST_9_TUPLE_(T) {
- public:
- template <int k> friend class gtest_internal::Get;
-
- tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_(), f6_(), f7_(), f8_() {}
-
- explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
- GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4,
- GTEST_BY_REF_(T5) f5, GTEST_BY_REF_(T6) f6, GTEST_BY_REF_(T7) f7,
- GTEST_BY_REF_(T8) f8) : f0_(f0), f1_(f1), f2_(f2), f3_(f3), f4_(f4),
- f5_(f5), f6_(f6), f7_(f7), f8_(f8) {}
-
- tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_),
- f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_), f8_(t.f8_) {}
-
- template <GTEST_9_TYPENAMES_(U)>
- tuple(const GTEST_9_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
- f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_), f8_(t.f8_) {}
-
- tuple& operator=(const tuple& t) { return CopyFrom(t); }
-
- template <GTEST_9_TYPENAMES_(U)>
- tuple& operator=(const GTEST_9_TUPLE_(U)& t) {
- return CopyFrom(t);
- }
-
- GTEST_DECLARE_TUPLE_AS_FRIEND_
-
- template <GTEST_9_TYPENAMES_(U)>
- tuple& CopyFrom(const GTEST_9_TUPLE_(U)& t) {
- f0_ = t.f0_;
- f1_ = t.f1_;
- f2_ = t.f2_;
- f3_ = t.f3_;
- f4_ = t.f4_;
- f5_ = t.f5_;
- f6_ = t.f6_;
- f7_ = t.f7_;
- f8_ = t.f8_;
- return *this;
- }
-
- T0 f0_;
- T1 f1_;
- T2 f2_;
- T3 f3_;
- T4 f4_;
- T5 f5_;
- T6 f6_;
- T7 f7_;
- T8 f8_;
-};
-
-template <GTEST_10_TYPENAMES_(T)>
-class tuple {
- public:
- template <int k> friend class gtest_internal::Get;
-
- tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_(), f6_(), f7_(), f8_(),
- f9_() {}
-
- explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
- GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4,
- GTEST_BY_REF_(T5) f5, GTEST_BY_REF_(T6) f6, GTEST_BY_REF_(T7) f7,
- GTEST_BY_REF_(T8) f8, GTEST_BY_REF_(T9) f9) : f0_(f0), f1_(f1), f2_(f2),
- f3_(f3), f4_(f4), f5_(f5), f6_(f6), f7_(f7), f8_(f8), f9_(f9) {}
-
- tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_),
- f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_), f8_(t.f8_), f9_(t.f9_) {}
-
- template <GTEST_10_TYPENAMES_(U)>
- tuple(const GTEST_10_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
- f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_), f8_(t.f8_),
- f9_(t.f9_) {}
-
- tuple& operator=(const tuple& t) { return CopyFrom(t); }
-
- template <GTEST_10_TYPENAMES_(U)>
- tuple& operator=(const GTEST_10_TUPLE_(U)& t) {
- return CopyFrom(t);
- }
-
- GTEST_DECLARE_TUPLE_AS_FRIEND_
-
- template <GTEST_10_TYPENAMES_(U)>
- tuple& CopyFrom(const GTEST_10_TUPLE_(U)& t) {
- f0_ = t.f0_;
- f1_ = t.f1_;
- f2_ = t.f2_;
- f3_ = t.f3_;
- f4_ = t.f4_;
- f5_ = t.f5_;
- f6_ = t.f6_;
- f7_ = t.f7_;
- f8_ = t.f8_;
- f9_ = t.f9_;
- return *this;
- }
-
- T0 f0_;
- T1 f1_;
- T2 f2_;
- T3 f3_;
- T4 f4_;
- T5 f5_;
- T6 f6_;
- T7 f7_;
- T8 f8_;
- T9 f9_;
-};
-
-// 6.1.3.2 Tuple creation functions.
-
-// Known limitations: we don't support passing an
-// std::tr1::reference_wrapper<T> to make_tuple(). And we don't
-// implement tie().
-
-inline tuple<> make_tuple() { return tuple<>(); }
-
-template <GTEST_1_TYPENAMES_(T)>
-inline GTEST_1_TUPLE_(T) make_tuple(const T0& f0) {
- return GTEST_1_TUPLE_(T)(f0);
-}
-
-template <GTEST_2_TYPENAMES_(T)>
-inline GTEST_2_TUPLE_(T) make_tuple(const T0& f0, const T1& f1) {
- return GTEST_2_TUPLE_(T)(f0, f1);
-}
-
-template <GTEST_3_TYPENAMES_(T)>
-inline GTEST_3_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2) {
- return GTEST_3_TUPLE_(T)(f0, f1, f2);
-}
-
-template <GTEST_4_TYPENAMES_(T)>
-inline GTEST_4_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
- const T3& f3) {
- return GTEST_4_TUPLE_(T)(f0, f1, f2, f3);
-}
-
-template <GTEST_5_TYPENAMES_(T)>
-inline GTEST_5_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
- const T3& f3, const T4& f4) {
- return GTEST_5_TUPLE_(T)(f0, f1, f2, f3, f4);
-}
-
-template <GTEST_6_TYPENAMES_(T)>
-inline GTEST_6_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
- const T3& f3, const T4& f4, const T5& f5) {
- return GTEST_6_TUPLE_(T)(f0, f1, f2, f3, f4, f5);
-}
-
-template <GTEST_7_TYPENAMES_(T)>
-inline GTEST_7_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
- const T3& f3, const T4& f4, const T5& f5, const T6& f6) {
- return GTEST_7_TUPLE_(T)(f0, f1, f2, f3, f4, f5, f6);
-}
-
-template <GTEST_8_TYPENAMES_(T)>
-inline GTEST_8_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
- const T3& f3, const T4& f4, const T5& f5, const T6& f6, const T7& f7) {
- return GTEST_8_TUPLE_(T)(f0, f1, f2, f3, f4, f5, f6, f7);
-}
-
-template <GTEST_9_TYPENAMES_(T)>
-inline GTEST_9_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
- const T3& f3, const T4& f4, const T5& f5, const T6& f6, const T7& f7,
- const T8& f8) {
- return GTEST_9_TUPLE_(T)(f0, f1, f2, f3, f4, f5, f6, f7, f8);
-}
-
-template <GTEST_10_TYPENAMES_(T)>
-inline GTEST_10_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
- const T3& f3, const T4& f4, const T5& f5, const T6& f6, const T7& f7,
- const T8& f8, const T9& f9) {
- return GTEST_10_TUPLE_(T)(f0, f1, f2, f3, f4, f5, f6, f7, f8, f9);
-}
-
-// 6.1.3.3 Tuple helper classes.
-
-template <typename Tuple> struct tuple_size;
-
-template <GTEST_0_TYPENAMES_(T)>
-struct tuple_size<GTEST_0_TUPLE_(T) > {
- static const int value = 0;
-};
-
-template <GTEST_1_TYPENAMES_(T)>
-struct tuple_size<GTEST_1_TUPLE_(T) > {
- static const int value = 1;
-};
-
-template <GTEST_2_TYPENAMES_(T)>
-struct tuple_size<GTEST_2_TUPLE_(T) > {
- static const int value = 2;
-};
-
-template <GTEST_3_TYPENAMES_(T)>
-struct tuple_size<GTEST_3_TUPLE_(T) > {
- static const int value = 3;
-};
-
-template <GTEST_4_TYPENAMES_(T)>
-struct tuple_size<GTEST_4_TUPLE_(T) > {
- static const int value = 4;
-};
-
-template <GTEST_5_TYPENAMES_(T)>
-struct tuple_size<GTEST_5_TUPLE_(T) > {
- static const int value = 5;
-};
-
-template <GTEST_6_TYPENAMES_(T)>
-struct tuple_size<GTEST_6_TUPLE_(T) > {
- static const int value = 6;
-};
-
-template <GTEST_7_TYPENAMES_(T)>
-struct tuple_size<GTEST_7_TUPLE_(T) > {
- static const int value = 7;
-};
-
-template <GTEST_8_TYPENAMES_(T)>
-struct tuple_size<GTEST_8_TUPLE_(T) > {
- static const int value = 8;
-};
-
-template <GTEST_9_TYPENAMES_(T)>
-struct tuple_size<GTEST_9_TUPLE_(T) > {
- static const int value = 9;
-};
-
-template <GTEST_10_TYPENAMES_(T)>
-struct tuple_size<GTEST_10_TUPLE_(T) > {
- static const int value = 10;
-};
-
-template <int k, class Tuple>
-struct tuple_element {
- typedef typename gtest_internal::TupleElement<
- k < (tuple_size<Tuple>::value), k, Tuple>::type type;
-};
-
-#define GTEST_TUPLE_ELEMENT_(k, Tuple) typename tuple_element<k, Tuple >::type
-
-// 6.1.3.4 Element access.
-
-namespace gtest_internal {
-
-template <>
-class Get<0> {
- public:
- template <class Tuple>
- static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(0, Tuple))
- Field(Tuple& t) { return t.f0_; } // NOLINT
-
- template <class Tuple>
- static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(0, Tuple))
- ConstField(const Tuple& t) { return t.f0_; }
-};
-
-template <>
-class Get<1> {
- public:
- template <class Tuple>
- static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(1, Tuple))
- Field(Tuple& t) { return t.f1_; } // NOLINT
-
- template <class Tuple>
- static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(1, Tuple))
- ConstField(const Tuple& t) { return t.f1_; }
-};
-
-template <>
-class Get<2> {
- public:
- template <class Tuple>
- static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(2, Tuple))
- Field(Tuple& t) { return t.f2_; } // NOLINT
-
- template <class Tuple>
- static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(2, Tuple))
- ConstField(const Tuple& t) { return t.f2_; }
-};
-
-template <>
-class Get<3> {
- public:
- template <class Tuple>
- static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(3, Tuple))
- Field(Tuple& t) { return t.f3_; } // NOLINT
-
- template <class Tuple>
- static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(3, Tuple))
- ConstField(const Tuple& t) { return t.f3_; }
-};
-
-template <>
-class Get<4> {
- public:
- template <class Tuple>
- static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(4, Tuple))
- Field(Tuple& t) { return t.f4_; } // NOLINT
-
- template <class Tuple>
- static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(4, Tuple))
- ConstField(const Tuple& t) { return t.f4_; }
-};
-
-template <>
-class Get<5> {
- public:
- template <class Tuple>
- static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(5, Tuple))
- Field(Tuple& t) { return t.f5_; } // NOLINT
-
- template <class Tuple>
- static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(5, Tuple))
- ConstField(const Tuple& t) { return t.f5_; }
-};
-
-template <>
-class Get<6> {
- public:
- template <class Tuple>
- static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(6, Tuple))
- Field(Tuple& t) { return t.f6_; } // NOLINT
-
- template <class Tuple>
- static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(6, Tuple))
- ConstField(const Tuple& t) { return t.f6_; }
-};
-
-template <>
-class Get<7> {
- public:
- template <class Tuple>
- static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(7, Tuple))
- Field(Tuple& t) { return t.f7_; } // NOLINT
-
- template <class Tuple>
- static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(7, Tuple))
- ConstField(const Tuple& t) { return t.f7_; }
-};
-
-template <>
-class Get<8> {
- public:
- template <class Tuple>
- static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(8, Tuple))
- Field(Tuple& t) { return t.f8_; } // NOLINT
-
- template <class Tuple>
- static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(8, Tuple))
- ConstField(const Tuple& t) { return t.f8_; }
-};
-
-template <>
-class Get<9> {
- public:
- template <class Tuple>
- static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(9, Tuple))
- Field(Tuple& t) { return t.f9_; } // NOLINT
-
- template <class Tuple>
- static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(9, Tuple))
- ConstField(const Tuple& t) { return t.f9_; }
-};
-
-} // namespace gtest_internal
-
-template <int k, GTEST_10_TYPENAMES_(T)>
-GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(k, GTEST_10_TUPLE_(T)))
-get(GTEST_10_TUPLE_(T)& t) {
- return gtest_internal::Get<k>::Field(t);
-}
-
-template <int k, GTEST_10_TYPENAMES_(T)>
-GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(k, GTEST_10_TUPLE_(T)))
-get(const GTEST_10_TUPLE_(T)& t) {
- return gtest_internal::Get<k>::ConstField(t);
-}
-
-// 6.1.3.5 Relational operators
-
-// We only implement == and !=, as we don't have a need for the rest yet.
-
-namespace gtest_internal {
-
-// SameSizeTuplePrefixComparator<k, k>::Eq(t1, t2) returns true if the
-// first k fields of t1 equals the first k fields of t2.
-// SameSizeTuplePrefixComparator(k1, k2) would be a compiler error if
-// k1 != k2.
-template <int kSize1, int kSize2>
-struct SameSizeTuplePrefixComparator;
-
-template <>
-struct SameSizeTuplePrefixComparator<0, 0> {
- template <class Tuple1, class Tuple2>
- static bool Eq(const Tuple1& /* t1 */, const Tuple2& /* t2 */) {
- return true;
- }
-};
-
-template <int k>
-struct SameSizeTuplePrefixComparator<k, k> {
- template <class Tuple1, class Tuple2>
- static bool Eq(const Tuple1& t1, const Tuple2& t2) {
- return SameSizeTuplePrefixComparator<k - 1, k - 1>::Eq(t1, t2) &&
- ::std::tr1::get<k - 1>(t1) == ::std::tr1::get<k - 1>(t2);
- }
-};
-
-} // namespace gtest_internal
-
-template <GTEST_10_TYPENAMES_(T), GTEST_10_TYPENAMES_(U)>
-inline bool operator==(const GTEST_10_TUPLE_(T)& t,
- const GTEST_10_TUPLE_(U)& u) {
- return gtest_internal::SameSizeTuplePrefixComparator<
- tuple_size<GTEST_10_TUPLE_(T) >::value,
- tuple_size<GTEST_10_TUPLE_(U) >::value>::Eq(t, u);
-}
-
-template <GTEST_10_TYPENAMES_(T), GTEST_10_TYPENAMES_(U)>
-inline bool operator!=(const GTEST_10_TUPLE_(T)& t,
- const GTEST_10_TUPLE_(U)& u) { return !(t == u); }
-
-// 6.1.4 Pairs.
-// Unimplemented.
-
-} // namespace tr1
-} // namespace std
-
-#undef GTEST_0_TUPLE_
-#undef GTEST_1_TUPLE_
-#undef GTEST_2_TUPLE_
-#undef GTEST_3_TUPLE_
-#undef GTEST_4_TUPLE_
-#undef GTEST_5_TUPLE_
-#undef GTEST_6_TUPLE_
-#undef GTEST_7_TUPLE_
-#undef GTEST_8_TUPLE_
-#undef GTEST_9_TUPLE_
-#undef GTEST_10_TUPLE_
-
-#undef GTEST_0_TYPENAMES_
-#undef GTEST_1_TYPENAMES_
-#undef GTEST_2_TYPENAMES_
-#undef GTEST_3_TYPENAMES_
-#undef GTEST_4_TYPENAMES_
-#undef GTEST_5_TYPENAMES_
-#undef GTEST_6_TYPENAMES_
-#undef GTEST_7_TYPENAMES_
-#undef GTEST_8_TYPENAMES_
-#undef GTEST_9_TYPENAMES_
-#undef GTEST_10_TYPENAMES_
-
-#undef GTEST_DECLARE_TUPLE_AS_FRIEND_
-#undef GTEST_BY_REF_
-#undef GTEST_ADD_REF_
-#undef GTEST_TUPLE_ELEMENT_
-
-#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_
diff --git a/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-tuple.h.pump b/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-tuple.h.pump
deleted file mode 100644
index 429ddfeec..000000000
--- a/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-tuple.h.pump
+++ /dev/null
@@ -1,347 +0,0 @@
-$$ -*- mode: c++; -*-
-$var n = 10 $$ Maximum number of tuple fields we want to support.
-$$ This meta comment fixes auto-indentation in Emacs. }}
-// Copyright 2009 Google Inc.
-// All Rights Reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Author: wan@google.com (Zhanyong Wan)
-
-// Implements a subset of TR1 tuple needed by Google Test and Google Mock.
-
-#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_
-#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_
-
-#include <utility> // For ::std::pair.
-
-// The compiler used in Symbian has a bug that prevents us from declaring the
-// tuple template as a friend (it complains that tuple is redefined). This
-// hack bypasses the bug by declaring the members that should otherwise be
-// private as public.
-// Sun Studio versions < 12 also have the above bug.
-#if defined(__SYMBIAN32__) || (defined(__SUNPRO_CC) && __SUNPRO_CC < 0x590)
-# define GTEST_DECLARE_TUPLE_AS_FRIEND_ public:
-#else
-# define GTEST_DECLARE_TUPLE_AS_FRIEND_ \
- template <GTEST_$(n)_TYPENAMES_(U)> friend class tuple; \
- private:
-#endif
-
-// Visual Studio 2010, 2012, and 2013 define symbols in std::tr1 that conflict
-// with our own definitions. Therefore using our own tuple does not work on
-// those compilers.
-#if defined(_MSC_VER) && _MSC_VER >= 1600 /* 1600 is Visual Studio 2010 */
-# error "gtest's tuple doesn't compile on Visual Studio 2010 or later. \
-GTEST_USE_OWN_TR1_TUPLE must be set to 0 on those compilers."
-#endif
-
-
-$range i 0..n-1
-$range j 0..n
-$range k 1..n
-// GTEST_n_TUPLE_(T) is the type of an n-tuple.
-#define GTEST_0_TUPLE_(T) tuple<>
-
-$for k [[
-$range m 0..k-1
-$range m2 k..n-1
-#define GTEST_$(k)_TUPLE_(T) tuple<$for m, [[T##$m]]$for m2 [[, void]]>
-
-]]
-
-// GTEST_n_TYPENAMES_(T) declares a list of n typenames.
-
-$for j [[
-$range m 0..j-1
-#define GTEST_$(j)_TYPENAMES_(T) $for m, [[typename T##$m]]
-
-
-]]
-
-// In theory, defining stuff in the ::std namespace is undefined
-// behavior. We can do this as we are playing the role of a standard
-// library vendor.
-namespace std {
-namespace tr1 {
-
-template <$for i, [[typename T$i = void]]>
-class tuple;
-
-// Anything in namespace gtest_internal is Google Test's INTERNAL
-// IMPLEMENTATION DETAIL and MUST NOT BE USED DIRECTLY in user code.
-namespace gtest_internal {
-
-// ByRef<T>::type is T if T is a reference; otherwise it's const T&.
-template <typename T>
-struct ByRef { typedef const T& type; }; // NOLINT
-template <typename T>
-struct ByRef<T&> { typedef T& type; }; // NOLINT
-
-// A handy wrapper for ByRef.
-#define GTEST_BY_REF_(T) typename ::std::tr1::gtest_internal::ByRef<T>::type
-
-// AddRef<T>::type is T if T is a reference; otherwise it's T&. This
-// is the same as tr1::add_reference<T>::type.
-template <typename T>
-struct AddRef { typedef T& type; }; // NOLINT
-template <typename T>
-struct AddRef<T&> { typedef T& type; }; // NOLINT
-
-// A handy wrapper for AddRef.
-#define GTEST_ADD_REF_(T) typename ::std::tr1::gtest_internal::AddRef<T>::type
-
-// A helper for implementing get<k>().
-template <int k> class Get;
-
-// A helper for implementing tuple_element<k, T>. kIndexValid is true
-// iff k < the number of fields in tuple type T.
-template <bool kIndexValid, int kIndex, class Tuple>
-struct TupleElement;
-
-
-$for i [[
-template <GTEST_$(n)_TYPENAMES_(T)>
-struct TupleElement<true, $i, GTEST_$(n)_TUPLE_(T) > {
- typedef T$i type;
-};
-
-
-]]
-} // namespace gtest_internal
-
-template <>
-class tuple<> {
- public:
- tuple() {}
- tuple(const tuple& /* t */) {}
- tuple& operator=(const tuple& /* t */) { return *this; }
-};
-
-
-$for k [[
-$range m 0..k-1
-template <GTEST_$(k)_TYPENAMES_(T)>
-class $if k < n [[GTEST_$(k)_TUPLE_(T)]] $else [[tuple]] {
- public:
- template <int k> friend class gtest_internal::Get;
-
- tuple() : $for m, [[f$(m)_()]] {}
-
- explicit tuple($for m, [[GTEST_BY_REF_(T$m) f$m]]) : [[]]
-$for m, [[f$(m)_(f$m)]] {}
-
- tuple(const tuple& t) : $for m, [[f$(m)_(t.f$(m)_)]] {}
-
- template <GTEST_$(k)_TYPENAMES_(U)>
- tuple(const GTEST_$(k)_TUPLE_(U)& t) : $for m, [[f$(m)_(t.f$(m)_)]] {}
-
-$if k == 2 [[
- template <typename U0, typename U1>
- tuple(const ::std::pair<U0, U1>& p) : f0_(p.first), f1_(p.second) {}
-
-]]
-
- tuple& operator=(const tuple& t) { return CopyFrom(t); }
-
- template <GTEST_$(k)_TYPENAMES_(U)>
- tuple& operator=(const GTEST_$(k)_TUPLE_(U)& t) {
- return CopyFrom(t);
- }
-
-$if k == 2 [[
- template <typename U0, typename U1>
- tuple& operator=(const ::std::pair<U0, U1>& p) {
- f0_ = p.first;
- f1_ = p.second;
- return *this;
- }
-
-]]
-
- GTEST_DECLARE_TUPLE_AS_FRIEND_
-
- template <GTEST_$(k)_TYPENAMES_(U)>
- tuple& CopyFrom(const GTEST_$(k)_TUPLE_(U)& t) {
-
-$for m [[
- f$(m)_ = t.f$(m)_;
-
-]]
- return *this;
- }
-
-
-$for m [[
- T$m f$(m)_;
-
-]]
-};
-
-
-]]
-// 6.1.3.2 Tuple creation functions.
-
-// Known limitations: we don't support passing an
-// std::tr1::reference_wrapper<T> to make_tuple(). And we don't
-// implement tie().
-
-inline tuple<> make_tuple() { return tuple<>(); }
-
-$for k [[
-$range m 0..k-1
-
-template <GTEST_$(k)_TYPENAMES_(T)>
-inline GTEST_$(k)_TUPLE_(T) make_tuple($for m, [[const T$m& f$m]]) {
- return GTEST_$(k)_TUPLE_(T)($for m, [[f$m]]);
-}
-
-]]
-
-// 6.1.3.3 Tuple helper classes.
-
-template <typename Tuple> struct tuple_size;
-
-
-$for j [[
-template <GTEST_$(j)_TYPENAMES_(T)>
-struct tuple_size<GTEST_$(j)_TUPLE_(T) > {
- static const int value = $j;
-};
-
-
-]]
-template <int k, class Tuple>
-struct tuple_element {
- typedef typename gtest_internal::TupleElement<
- k < (tuple_size<Tuple>::value), k, Tuple>::type type;
-};
-
-#define GTEST_TUPLE_ELEMENT_(k, Tuple) typename tuple_element<k, Tuple >::type
-
-// 6.1.3.4 Element access.
-
-namespace gtest_internal {
-
-
-$for i [[
-template <>
-class Get<$i> {
- public:
- template <class Tuple>
- static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_($i, Tuple))
- Field(Tuple& t) { return t.f$(i)_; } // NOLINT
-
- template <class Tuple>
- static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_($i, Tuple))
- ConstField(const Tuple& t) { return t.f$(i)_; }
-};
-
-
-]]
-} // namespace gtest_internal
-
-template <int k, GTEST_$(n)_TYPENAMES_(T)>
-GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(k, GTEST_$(n)_TUPLE_(T)))
-get(GTEST_$(n)_TUPLE_(T)& t) {
- return gtest_internal::Get<k>::Field(t);
-}
-
-template <int k, GTEST_$(n)_TYPENAMES_(T)>
-GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(k, GTEST_$(n)_TUPLE_(T)))
-get(const GTEST_$(n)_TUPLE_(T)& t) {
- return gtest_internal::Get<k>::ConstField(t);
-}
-
-// 6.1.3.5 Relational operators
-
-// We only implement == and !=, as we don't have a need for the rest yet.
-
-namespace gtest_internal {
-
-// SameSizeTuplePrefixComparator<k, k>::Eq(t1, t2) returns true if the
-// first k fields of t1 equals the first k fields of t2.
-// SameSizeTuplePrefixComparator(k1, k2) would be a compiler error if
-// k1 != k2.
-template <int kSize1, int kSize2>
-struct SameSizeTuplePrefixComparator;
-
-template <>
-struct SameSizeTuplePrefixComparator<0, 0> {
- template <class Tuple1, class Tuple2>
- static bool Eq(const Tuple1& /* t1 */, const Tuple2& /* t2 */) {
- return true;
- }
-};
-
-template <int k>
-struct SameSizeTuplePrefixComparator<k, k> {
- template <class Tuple1, class Tuple2>
- static bool Eq(const Tuple1& t1, const Tuple2& t2) {
- return SameSizeTuplePrefixComparator<k - 1, k - 1>::Eq(t1, t2) &&
- ::std::tr1::get<k - 1>(t1) == ::std::tr1::get<k - 1>(t2);
- }
-};
-
-} // namespace gtest_internal
-
-template <GTEST_$(n)_TYPENAMES_(T), GTEST_$(n)_TYPENAMES_(U)>
-inline bool operator==(const GTEST_$(n)_TUPLE_(T)& t,
- const GTEST_$(n)_TUPLE_(U)& u) {
- return gtest_internal::SameSizeTuplePrefixComparator<
- tuple_size<GTEST_$(n)_TUPLE_(T) >::value,
- tuple_size<GTEST_$(n)_TUPLE_(U) >::value>::Eq(t, u);
-}
-
-template <GTEST_$(n)_TYPENAMES_(T), GTEST_$(n)_TYPENAMES_(U)>
-inline bool operator!=(const GTEST_$(n)_TUPLE_(T)& t,
- const GTEST_$(n)_TUPLE_(U)& u) { return !(t == u); }
-
-// 6.1.4 Pairs.
-// Unimplemented.
-
-} // namespace tr1
-} // namespace std
-
-
-$for j [[
-#undef GTEST_$(j)_TUPLE_
-
-]]
-
-
-$for j [[
-#undef GTEST_$(j)_TYPENAMES_
-
-]]
-
-#undef GTEST_DECLARE_TUPLE_AS_FRIEND_
-#undef GTEST_BY_REF_
-#undef GTEST_ADD_REF_
-#undef GTEST_TUPLE_ELEMENT_
-
-#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_
diff --git a/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-type-util.h b/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-type-util.h
deleted file mode 100644
index e46f7cfcb..000000000
--- a/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-type-util.h
+++ /dev/null
@@ -1,3331 +0,0 @@
-// This file was GENERATED by command:
-// pump.py gtest-type-util.h.pump
-// DO NOT EDIT BY HAND!!!
-
-// Copyright 2008 Google Inc.
-// All Rights Reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Author: wan@google.com (Zhanyong Wan)
-
-// Type utilities needed for implementing typed and type-parameterized
-// tests. This file is generated by a SCRIPT. DO NOT EDIT BY HAND!
-//
-// Currently we support at most 50 types in a list, and at most 50
-// type-parameterized tests in one type-parameterized test case.
-// Please contact googletestframework@googlegroups.com if you need
-// more.
-
-#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_
-#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_
-
-#include "gtest/internal/gtest-port.h"
-
-// #ifdef __GNUC__ is too general here. It is possible to use gcc without using
-// libstdc++ (which is where cxxabi.h comes from).
-# if GTEST_HAS_CXXABI_H_
-# include <cxxabi.h>
-# elif defined(__HP_aCC)
-# include <acxx_demangle.h>
-# endif // GTEST_HASH_CXXABI_H_
-
-namespace testing {
-namespace internal {
-
-// GetTypeName<T>() returns a human-readable name of type T.
-// NB: This function is also used in Google Mock, so don't move it inside of
-// the typed-test-only section below.
-template <typename T>
-std::string GetTypeName() {
-# if GTEST_HAS_RTTI
-
- const char* const name = typeid(T).name();
-# if GTEST_HAS_CXXABI_H_ || defined(__HP_aCC)
- int status = 0;
- // gcc's implementation of typeid(T).name() mangles the type name,
- // so we have to demangle it.
-# if GTEST_HAS_CXXABI_H_
- using abi::__cxa_demangle;
-# endif // GTEST_HAS_CXXABI_H_
- char* const readable_name = __cxa_demangle(name, 0, 0, &status);
- const std::string name_str(status == 0 ? readable_name : name);
- free(readable_name);
- return name_str;
-# else
- return name;
-# endif // GTEST_HAS_CXXABI_H_ || __HP_aCC
-
-# else
-
- return "<type>";
-
-# endif // GTEST_HAS_RTTI
-}
-
-#if GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P
-
-// AssertyTypeEq<T1, T2>::type is defined iff T1 and T2 are the same
-// type. This can be used as a compile-time assertion to ensure that
-// two types are equal.
-
-template <typename T1, typename T2>
-struct AssertTypeEq;
-
-template <typename T>
-struct AssertTypeEq<T, T> {
- typedef bool type;
-};
-
-// A unique type used as the default value for the arguments of class
-// template Types. This allows us to simulate variadic templates
-// (e.g. Types<int>, Type<int, double>, and etc), which C++ doesn't
-// support directly.
-struct None {};
-
-// The following family of struct and struct templates are used to
-// represent type lists. In particular, TypesN<T1, T2, ..., TN>
-// represents a type list with N types (T1, T2, ..., and TN) in it.
-// Except for Types0, every struct in the family has two member types:
-// Head for the first type in the list, and Tail for the rest of the
-// list.
-
-// The empty type list.
-struct Types0 {};
-
-// Type lists of length 1, 2, 3, and so on.
-
-template <typename T1>
-struct Types1 {
- typedef T1 Head;
- typedef Types0 Tail;
-};
-template <typename T1, typename T2>
-struct Types2 {
- typedef T1 Head;
- typedef Types1<T2> Tail;
-};
-
-template <typename T1, typename T2, typename T3>
-struct Types3 {
- typedef T1 Head;
- typedef Types2<T2, T3> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4>
-struct Types4 {
- typedef T1 Head;
- typedef Types3<T2, T3, T4> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5>
-struct Types5 {
- typedef T1 Head;
- typedef Types4<T2, T3, T4, T5> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6>
-struct Types6 {
- typedef T1 Head;
- typedef Types5<T2, T3, T4, T5, T6> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7>
-struct Types7 {
- typedef T1 Head;
- typedef Types6<T2, T3, T4, T5, T6, T7> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8>
-struct Types8 {
- typedef T1 Head;
- typedef Types7<T2, T3, T4, T5, T6, T7, T8> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9>
-struct Types9 {
- typedef T1 Head;
- typedef Types8<T2, T3, T4, T5, T6, T7, T8, T9> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10>
-struct Types10 {
- typedef T1 Head;
- typedef Types9<T2, T3, T4, T5, T6, T7, T8, T9, T10> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11>
-struct Types11 {
- typedef T1 Head;
- typedef Types10<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12>
-struct Types12 {
- typedef T1 Head;
- typedef Types11<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13>
-struct Types13 {
- typedef T1 Head;
- typedef Types12<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14>
-struct Types14 {
- typedef T1 Head;
- typedef Types13<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15>
-struct Types15 {
- typedef T1 Head;
- typedef Types14<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16>
-struct Types16 {
- typedef T1 Head;
- typedef Types15<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17>
-struct Types17 {
- typedef T1 Head;
- typedef Types16<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18>
-struct Types18 {
- typedef T1 Head;
- typedef Types17<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19>
-struct Types19 {
- typedef T1 Head;
- typedef Types18<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20>
-struct Types20 {
- typedef T1 Head;
- typedef Types19<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21>
-struct Types21 {
- typedef T1 Head;
- typedef Types20<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22>
-struct Types22 {
- typedef T1 Head;
- typedef Types21<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23>
-struct Types23 {
- typedef T1 Head;
- typedef Types22<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24>
-struct Types24 {
- typedef T1 Head;
- typedef Types23<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25>
-struct Types25 {
- typedef T1 Head;
- typedef Types24<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26>
-struct Types26 {
- typedef T1 Head;
- typedef Types25<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27>
-struct Types27 {
- typedef T1 Head;
- typedef Types26<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28>
-struct Types28 {
- typedef T1 Head;
- typedef Types27<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29>
-struct Types29 {
- typedef T1 Head;
- typedef Types28<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
- T29> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30>
-struct Types30 {
- typedef T1 Head;
- typedef Types29<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
- T30> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31>
-struct Types31 {
- typedef T1 Head;
- typedef Types30<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
- T30, T31> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32>
-struct Types32 {
- typedef T1 Head;
- typedef Types31<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
- T30, T31, T32> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33>
-struct Types33 {
- typedef T1 Head;
- typedef Types32<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
- T30, T31, T32, T33> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34>
-struct Types34 {
- typedef T1 Head;
- typedef Types33<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
- T30, T31, T32, T33, T34> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35>
-struct Types35 {
- typedef T1 Head;
- typedef Types34<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
- T30, T31, T32, T33, T34, T35> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36>
-struct Types36 {
- typedef T1 Head;
- typedef Types35<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
- T30, T31, T32, T33, T34, T35, T36> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37>
-struct Types37 {
- typedef T1 Head;
- typedef Types36<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
- T30, T31, T32, T33, T34, T35, T36, T37> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38>
-struct Types38 {
- typedef T1 Head;
- typedef Types37<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
- T30, T31, T32, T33, T34, T35, T36, T37, T38> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38, typename T39>
-struct Types39 {
- typedef T1 Head;
- typedef Types38<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
- T30, T31, T32, T33, T34, T35, T36, T37, T38, T39> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38, typename T39, typename T40>
-struct Types40 {
- typedef T1 Head;
- typedef Types39<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
- T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38, typename T39, typename T40,
- typename T41>
-struct Types41 {
- typedef T1 Head;
- typedef Types40<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
- T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38, typename T39, typename T40,
- typename T41, typename T42>
-struct Types42 {
- typedef T1 Head;
- typedef Types41<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
- T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38, typename T39, typename T40,
- typename T41, typename T42, typename T43>
-struct Types43 {
- typedef T1 Head;
- typedef Types42<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
- T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
- T43> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38, typename T39, typename T40,
- typename T41, typename T42, typename T43, typename T44>
-struct Types44 {
- typedef T1 Head;
- typedef Types43<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
- T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
- T44> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38, typename T39, typename T40,
- typename T41, typename T42, typename T43, typename T44, typename T45>
-struct Types45 {
- typedef T1 Head;
- typedef Types44<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
- T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
- T44, T45> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38, typename T39, typename T40,
- typename T41, typename T42, typename T43, typename T44, typename T45,
- typename T46>
-struct Types46 {
- typedef T1 Head;
- typedef Types45<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
- T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
- T44, T45, T46> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38, typename T39, typename T40,
- typename T41, typename T42, typename T43, typename T44, typename T45,
- typename T46, typename T47>
-struct Types47 {
- typedef T1 Head;
- typedef Types46<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
- T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
- T44, T45, T46, T47> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38, typename T39, typename T40,
- typename T41, typename T42, typename T43, typename T44, typename T45,
- typename T46, typename T47, typename T48>
-struct Types48 {
- typedef T1 Head;
- typedef Types47<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
- T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
- T44, T45, T46, T47, T48> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38, typename T39, typename T40,
- typename T41, typename T42, typename T43, typename T44, typename T45,
- typename T46, typename T47, typename T48, typename T49>
-struct Types49 {
- typedef T1 Head;
- typedef Types48<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
- T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
- T44, T45, T46, T47, T48, T49> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38, typename T39, typename T40,
- typename T41, typename T42, typename T43, typename T44, typename T45,
- typename T46, typename T47, typename T48, typename T49, typename T50>
-struct Types50 {
- typedef T1 Head;
- typedef Types49<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
- T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
- T44, T45, T46, T47, T48, T49, T50> Tail;
-};
-
-
-} // namespace internal
-
-// We don't want to require the users to write TypesN<...> directly,
-// as that would require them to count the length. Types<...> is much
-// easier to write, but generates horrible messages when there is a
-// compiler error, as gcc insists on printing out each template
-// argument, even if it has the default value (this means Types<int>
-// will appear as Types<int, None, None, ..., None> in the compiler
-// errors).
-//
-// Our solution is to combine the best part of the two approaches: a
-// user would write Types<T1, ..., TN>, and Google Test will translate
-// that to TypesN<T1, ..., TN> internally to make error messages
-// readable. The translation is done by the 'type' member of the
-// Types template.
-template <typename T1 = internal::None, typename T2 = internal::None,
- typename T3 = internal::None, typename T4 = internal::None,
- typename T5 = internal::None, typename T6 = internal::None,
- typename T7 = internal::None, typename T8 = internal::None,
- typename T9 = internal::None, typename T10 = internal::None,
- typename T11 = internal::None, typename T12 = internal::None,
- typename T13 = internal::None, typename T14 = internal::None,
- typename T15 = internal::None, typename T16 = internal::None,
- typename T17 = internal::None, typename T18 = internal::None,
- typename T19 = internal::None, typename T20 = internal::None,
- typename T21 = internal::None, typename T22 = internal::None,
- typename T23 = internal::None, typename T24 = internal::None,
- typename T25 = internal::None, typename T26 = internal::None,
- typename T27 = internal::None, typename T28 = internal::None,
- typename T29 = internal::None, typename T30 = internal::None,
- typename T31 = internal::None, typename T32 = internal::None,
- typename T33 = internal::None, typename T34 = internal::None,
- typename T35 = internal::None, typename T36 = internal::None,
- typename T37 = internal::None, typename T38 = internal::None,
- typename T39 = internal::None, typename T40 = internal::None,
- typename T41 = internal::None, typename T42 = internal::None,
- typename T43 = internal::None, typename T44 = internal::None,
- typename T45 = internal::None, typename T46 = internal::None,
- typename T47 = internal::None, typename T48 = internal::None,
- typename T49 = internal::None, typename T50 = internal::None>
-struct Types {
- typedef internal::Types50<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
- T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
- T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
- T41, T42, T43, T44, T45, T46, T47, T48, T49, T50> type;
-};
-
-template <>
-struct Types<internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None> {
- typedef internal::Types0 type;
-};
-template <typename T1>
-struct Types<T1, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None> {
- typedef internal::Types1<T1> type;
-};
-template <typename T1, typename T2>
-struct Types<T1, T2, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None> {
- typedef internal::Types2<T1, T2> type;
-};
-template <typename T1, typename T2, typename T3>
-struct Types<T1, T2, T3, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None> {
- typedef internal::Types3<T1, T2, T3> type;
-};
-template <typename T1, typename T2, typename T3, typename T4>
-struct Types<T1, T2, T3, T4, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None> {
- typedef internal::Types4<T1, T2, T3, T4> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5>
-struct Types<T1, T2, T3, T4, T5, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None> {
- typedef internal::Types5<T1, T2, T3, T4, T5> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6>
-struct Types<T1, T2, T3, T4, T5, T6, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None> {
- typedef internal::Types6<T1, T2, T3, T4, T5, T6> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7>
-struct Types<T1, T2, T3, T4, T5, T6, T7, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None> {
- typedef internal::Types7<T1, T2, T3, T4, T5, T6, T7> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None> {
- typedef internal::Types8<T1, T2, T3, T4, T5, T6, T7, T8> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None> {
- typedef internal::Types9<T1, T2, T3, T4, T5, T6, T7, T8, T9> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None> {
- typedef internal::Types10<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None> {
- typedef internal::Types11<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None> {
- typedef internal::Types12<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
- T12> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None> {
- typedef internal::Types13<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
- T13> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None> {
- typedef internal::Types14<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
- T13, T14> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None> {
- typedef internal::Types15<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
- T13, T14, T15> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None> {
- typedef internal::Types16<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
- T13, T14, T15, T16> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None> {
- typedef internal::Types17<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
- T13, T14, T15, T16, T17> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None> {
- typedef internal::Types18<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
- T13, T14, T15, T16, T17, T18> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None> {
- typedef internal::Types19<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
- T13, T14, T15, T16, T17, T18, T19> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None> {
- typedef internal::Types20<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
- T13, T14, T15, T16, T17, T18, T19, T20> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None> {
- typedef internal::Types21<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
- T13, T14, T15, T16, T17, T18, T19, T20, T21> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None> {
- typedef internal::Types22<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
- T13, T14, T15, T16, T17, T18, T19, T20, T21, T22> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None> {
- typedef internal::Types23<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
- T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None> {
- typedef internal::Types24<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
- T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None> {
- typedef internal::Types25<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
- T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None> {
- typedef internal::Types26<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
- T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
- T26> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None> {
- typedef internal::Types27<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
- T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
- T27> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None> {
- typedef internal::Types28<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
- T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
- T27, T28> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None> {
- typedef internal::Types29<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
- T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
- T27, T28, T29> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None> {
- typedef internal::Types30<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
- T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
- T27, T28, T29, T30> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
- T31, internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None> {
- typedef internal::Types31<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
- T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
- T27, T28, T29, T30, T31> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
- T31, T32, internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None> {
- typedef internal::Types32<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
- T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
- T27, T28, T29, T30, T31, T32> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
- T31, T32, T33, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None> {
- typedef internal::Types33<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
- T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
- T27, T28, T29, T30, T31, T32, T33> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
- T31, T32, T33, T34, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None> {
- typedef internal::Types34<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
- T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
- T27, T28, T29, T30, T31, T32, T33, T34> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
- T31, T32, T33, T34, T35, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None> {
- typedef internal::Types35<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
- T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
- T27, T28, T29, T30, T31, T32, T33, T34, T35> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
- T31, T32, T33, T34, T35, T36, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None> {
- typedef internal::Types36<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
- T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
- T27, T28, T29, T30, T31, T32, T33, T34, T35, T36> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
- T31, T32, T33, T34, T35, T36, T37, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None> {
- typedef internal::Types37<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
- T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
- T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
- T31, T32, T33, T34, T35, T36, T37, T38, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None> {
- typedef internal::Types38<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
- T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
- T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38, typename T39>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
- T31, T32, T33, T34, T35, T36, T37, T38, T39, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None> {
- typedef internal::Types39<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
- T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
- T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38, typename T39, typename T40>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
- T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None> {
- typedef internal::Types40<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
- T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
- T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
- T40> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38, typename T39, typename T40,
- typename T41>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
- T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None, internal::None> {
- typedef internal::Types41<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
- T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
- T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
- T41> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38, typename T39, typename T40,
- typename T41, typename T42>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
- T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, internal::None,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None> {
- typedef internal::Types42<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
- T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
- T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
- T41, T42> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38, typename T39, typename T40,
- typename T41, typename T42, typename T43>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
- T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None, internal::None> {
- typedef internal::Types43<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
- T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
- T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
- T41, T42, T43> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38, typename T39, typename T40,
- typename T41, typename T42, typename T43, typename T44>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
- T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
- internal::None, internal::None, internal::None, internal::None,
- internal::None, internal::None> {
- typedef internal::Types44<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
- T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
- T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
- T41, T42, T43, T44> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38, typename T39, typename T40,
- typename T41, typename T42, typename T43, typename T44, typename T45>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
- T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, T45,
- internal::None, internal::None, internal::None, internal::None,
- internal::None> {
- typedef internal::Types45<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
- T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
- T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
- T41, T42, T43, T44, T45> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38, typename T39, typename T40,
- typename T41, typename T42, typename T43, typename T44, typename T45,
- typename T46>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
- T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, T45,
- T46, internal::None, internal::None, internal::None, internal::None> {
- typedef internal::Types46<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
- T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
- T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
- T41, T42, T43, T44, T45, T46> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38, typename T39, typename T40,
- typename T41, typename T42, typename T43, typename T44, typename T45,
- typename T46, typename T47>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
- T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, T45,
- T46, T47, internal::None, internal::None, internal::None> {
- typedef internal::Types47<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
- T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
- T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
- T41, T42, T43, T44, T45, T46, T47> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38, typename T39, typename T40,
- typename T41, typename T42, typename T43, typename T44, typename T45,
- typename T46, typename T47, typename T48>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
- T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, T45,
- T46, T47, T48, internal::None, internal::None> {
- typedef internal::Types48<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
- T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
- T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
- T41, T42, T43, T44, T45, T46, T47, T48> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38, typename T39, typename T40,
- typename T41, typename T42, typename T43, typename T44, typename T45,
- typename T46, typename T47, typename T48, typename T49>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
- T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
- T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, T45,
- T46, T47, T48, T49, internal::None> {
- typedef internal::Types49<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
- T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
- T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
- T41, T42, T43, T44, T45, T46, T47, T48, T49> type;
-};
-
-namespace internal {
-
-# define GTEST_TEMPLATE_ template <typename T> class
-
-// The template "selector" struct TemplateSel<Tmpl> is used to
-// represent Tmpl, which must be a class template with one type
-// parameter, as a type. TemplateSel<Tmpl>::Bind<T>::type is defined
-// as the type Tmpl<T>. This allows us to actually instantiate the
-// template "selected" by TemplateSel<Tmpl>.
-//
-// This trick is necessary for simulating typedef for class templates,
-// which C++ doesn't support directly.
-template <GTEST_TEMPLATE_ Tmpl>
-struct TemplateSel {
- template <typename T>
- struct Bind {
- typedef Tmpl<T> type;
- };
-};
-
-# define GTEST_BIND_(TmplSel, T) \
- TmplSel::template Bind<T>::type
-
-// A unique struct template used as the default value for the
-// arguments of class template Templates. This allows us to simulate
-// variadic templates (e.g. Templates<int>, Templates<int, double>,
-// and etc), which C++ doesn't support directly.
-template <typename T>
-struct NoneT {};
-
-// The following family of struct and struct templates are used to
-// represent template lists. In particular, TemplatesN<T1, T2, ...,
-// TN> represents a list of N templates (T1, T2, ..., and TN). Except
-// for Templates0, every struct in the family has two member types:
-// Head for the selector of the first template in the list, and Tail
-// for the rest of the list.
-
-// The empty template list.
-struct Templates0 {};
-
-// Template lists of length 1, 2, 3, and so on.
-
-template <GTEST_TEMPLATE_ T1>
-struct Templates1 {
- typedef TemplateSel<T1> Head;
- typedef Templates0 Tail;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2>
-struct Templates2 {
- typedef TemplateSel<T1> Head;
- typedef Templates1<T2> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3>
-struct Templates3 {
- typedef TemplateSel<T1> Head;
- typedef Templates2<T2, T3> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4>
-struct Templates4 {
- typedef TemplateSel<T1> Head;
- typedef Templates3<T2, T3, T4> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5>
-struct Templates5 {
- typedef TemplateSel<T1> Head;
- typedef Templates4<T2, T3, T4, T5> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6>
-struct Templates6 {
- typedef TemplateSel<T1> Head;
- typedef Templates5<T2, T3, T4, T5, T6> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7>
-struct Templates7 {
- typedef TemplateSel<T1> Head;
- typedef Templates6<T2, T3, T4, T5, T6, T7> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8>
-struct Templates8 {
- typedef TemplateSel<T1> Head;
- typedef Templates7<T2, T3, T4, T5, T6, T7, T8> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9>
-struct Templates9 {
- typedef TemplateSel<T1> Head;
- typedef Templates8<T2, T3, T4, T5, T6, T7, T8, T9> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10>
-struct Templates10 {
- typedef TemplateSel<T1> Head;
- typedef Templates9<T2, T3, T4, T5, T6, T7, T8, T9, T10> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11>
-struct Templates11 {
- typedef TemplateSel<T1> Head;
- typedef Templates10<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12>
-struct Templates12 {
- typedef TemplateSel<T1> Head;
- typedef Templates11<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13>
-struct Templates13 {
- typedef TemplateSel<T1> Head;
- typedef Templates12<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14>
-struct Templates14 {
- typedef TemplateSel<T1> Head;
- typedef Templates13<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15>
-struct Templates15 {
- typedef TemplateSel<T1> Head;
- typedef Templates14<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16>
-struct Templates16 {
- typedef TemplateSel<T1> Head;
- typedef Templates15<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17>
-struct Templates17 {
- typedef TemplateSel<T1> Head;
- typedef Templates16<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18>
-struct Templates18 {
- typedef TemplateSel<T1> Head;
- typedef Templates17<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19>
-struct Templates19 {
- typedef TemplateSel<T1> Head;
- typedef Templates18<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20>
-struct Templates20 {
- typedef TemplateSel<T1> Head;
- typedef Templates19<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21>
-struct Templates21 {
- typedef TemplateSel<T1> Head;
- typedef Templates20<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22>
-struct Templates22 {
- typedef TemplateSel<T1> Head;
- typedef Templates21<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23>
-struct Templates23 {
- typedef TemplateSel<T1> Head;
- typedef Templates22<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24>
-struct Templates24 {
- typedef TemplateSel<T1> Head;
- typedef Templates23<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25>
-struct Templates25 {
- typedef TemplateSel<T1> Head;
- typedef Templates24<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26>
-struct Templates26 {
- typedef TemplateSel<T1> Head;
- typedef Templates25<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27>
-struct Templates27 {
- typedef TemplateSel<T1> Head;
- typedef Templates26<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
- GTEST_TEMPLATE_ T28>
-struct Templates28 {
- typedef TemplateSel<T1> Head;
- typedef Templates27<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
- T28> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
- GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29>
-struct Templates29 {
- typedef TemplateSel<T1> Head;
- typedef Templates28<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
- T29> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
- GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30>
-struct Templates30 {
- typedef TemplateSel<T1> Head;
- typedef Templates29<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
- T29, T30> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
- GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
- GTEST_TEMPLATE_ T31>
-struct Templates31 {
- typedef TemplateSel<T1> Head;
- typedef Templates30<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
- T29, T30, T31> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
- GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
- GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32>
-struct Templates32 {
- typedef TemplateSel<T1> Head;
- typedef Templates31<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
- T29, T30, T31, T32> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
- GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
- GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33>
-struct Templates33 {
- typedef TemplateSel<T1> Head;
- typedef Templates32<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
- T29, T30, T31, T32, T33> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
- GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
- GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
- GTEST_TEMPLATE_ T34>
-struct Templates34 {
- typedef TemplateSel<T1> Head;
- typedef Templates33<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
- T29, T30, T31, T32, T33, T34> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
- GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
- GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
- GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35>
-struct Templates35 {
- typedef TemplateSel<T1> Head;
- typedef Templates34<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
- T29, T30, T31, T32, T33, T34, T35> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
- GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
- GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
- GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36>
-struct Templates36 {
- typedef TemplateSel<T1> Head;
- typedef Templates35<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
- T29, T30, T31, T32, T33, T34, T35, T36> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
- GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
- GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
- GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
- GTEST_TEMPLATE_ T37>
-struct Templates37 {
- typedef TemplateSel<T1> Head;
- typedef Templates36<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
- T29, T30, T31, T32, T33, T34, T35, T36, T37> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
- GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
- GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
- GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
- GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38>
-struct Templates38 {
- typedef TemplateSel<T1> Head;
- typedef Templates37<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
- T29, T30, T31, T32, T33, T34, T35, T36, T37, T38> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
- GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
- GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
- GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
- GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39>
-struct Templates39 {
- typedef TemplateSel<T1> Head;
- typedef Templates38<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
- T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
- GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
- GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
- GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
- GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
- GTEST_TEMPLATE_ T40>
-struct Templates40 {
- typedef TemplateSel<T1> Head;
- typedef Templates39<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
- T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
- GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
- GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
- GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
- GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
- GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41>
-struct Templates41 {
- typedef TemplateSel<T1> Head;
- typedef Templates40<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
- T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
- GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
- GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
- GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
- GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
- GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42>
-struct Templates42 {
- typedef TemplateSel<T1> Head;
- typedef Templates41<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
- T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
- T42> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
- GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
- GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
- GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
- GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
- GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
- GTEST_TEMPLATE_ T43>
-struct Templates43 {
- typedef TemplateSel<T1> Head;
- typedef Templates42<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
- T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
- T43> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
- GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
- GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
- GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
- GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
- GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
- GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44>
-struct Templates44 {
- typedef TemplateSel<T1> Head;
- typedef Templates43<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
- T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
- T43, T44> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
- GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
- GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
- GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
- GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
- GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
- GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45>
-struct Templates45 {
- typedef TemplateSel<T1> Head;
- typedef Templates44<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
- T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
- T43, T44, T45> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
- GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
- GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
- GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
- GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
- GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
- GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
- GTEST_TEMPLATE_ T46>
-struct Templates46 {
- typedef TemplateSel<T1> Head;
- typedef Templates45<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
- T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
- T43, T44, T45, T46> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
- GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
- GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
- GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
- GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
- GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
- GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
- GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47>
-struct Templates47 {
- typedef TemplateSel<T1> Head;
- typedef Templates46<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
- T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
- T43, T44, T45, T46, T47> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
- GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
- GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
- GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
- GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
- GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
- GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
- GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47, GTEST_TEMPLATE_ T48>
-struct Templates48 {
- typedef TemplateSel<T1> Head;
- typedef Templates47<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
- T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
- T43, T44, T45, T46, T47, T48> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
- GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
- GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
- GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
- GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
- GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
- GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
- GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47, GTEST_TEMPLATE_ T48,
- GTEST_TEMPLATE_ T49>
-struct Templates49 {
- typedef TemplateSel<T1> Head;
- typedef Templates48<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
- T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
- T43, T44, T45, T46, T47, T48, T49> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
- GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
- GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
- GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
- GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
- GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
- GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
- GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47, GTEST_TEMPLATE_ T48,
- GTEST_TEMPLATE_ T49, GTEST_TEMPLATE_ T50>
-struct Templates50 {
- typedef TemplateSel<T1> Head;
- typedef Templates49<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
- T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
- T43, T44, T45, T46, T47, T48, T49, T50> Tail;
-};
-
-
-// We don't want to require the users to write TemplatesN<...> directly,
-// as that would require them to count the length. Templates<...> is much
-// easier to write, but generates horrible messages when there is a
-// compiler error, as gcc insists on printing out each template
-// argument, even if it has the default value (this means Templates<list>
-// will appear as Templates<list, NoneT, NoneT, ..., NoneT> in the compiler
-// errors).
-//
-// Our solution is to combine the best part of the two approaches: a
-// user would write Templates<T1, ..., TN>, and Google Test will translate
-// that to TemplatesN<T1, ..., TN> internally to make error messages
-// readable. The translation is done by the 'type' member of the
-// Templates template.
-template <GTEST_TEMPLATE_ T1 = NoneT, GTEST_TEMPLATE_ T2 = NoneT,
- GTEST_TEMPLATE_ T3 = NoneT, GTEST_TEMPLATE_ T4 = NoneT,
- GTEST_TEMPLATE_ T5 = NoneT, GTEST_TEMPLATE_ T6 = NoneT,
- GTEST_TEMPLATE_ T7 = NoneT, GTEST_TEMPLATE_ T8 = NoneT,
- GTEST_TEMPLATE_ T9 = NoneT, GTEST_TEMPLATE_ T10 = NoneT,
- GTEST_TEMPLATE_ T11 = NoneT, GTEST_TEMPLATE_ T12 = NoneT,
- GTEST_TEMPLATE_ T13 = NoneT, GTEST_TEMPLATE_ T14 = NoneT,
- GTEST_TEMPLATE_ T15 = NoneT, GTEST_TEMPLATE_ T16 = NoneT,
- GTEST_TEMPLATE_ T17 = NoneT, GTEST_TEMPLATE_ T18 = NoneT,
- GTEST_TEMPLATE_ T19 = NoneT, GTEST_TEMPLATE_ T20 = NoneT,
- GTEST_TEMPLATE_ T21 = NoneT, GTEST_TEMPLATE_ T22 = NoneT,
- GTEST_TEMPLATE_ T23 = NoneT, GTEST_TEMPLATE_ T24 = NoneT,
- GTEST_TEMPLATE_ T25 = NoneT, GTEST_TEMPLATE_ T26 = NoneT,
- GTEST_TEMPLATE_ T27 = NoneT, GTEST_TEMPLATE_ T28 = NoneT,
- GTEST_TEMPLATE_ T29 = NoneT, GTEST_TEMPLATE_ T30 = NoneT,
- GTEST_TEMPLATE_ T31 = NoneT, GTEST_TEMPLATE_ T32 = NoneT,
- GTEST_TEMPLATE_ T33 = NoneT, GTEST_TEMPLATE_ T34 = NoneT,
- GTEST_TEMPLATE_ T35 = NoneT, GTEST_TEMPLATE_ T36 = NoneT,
- GTEST_TEMPLATE_ T37 = NoneT, GTEST_TEMPLATE_ T38 = NoneT,
- GTEST_TEMPLATE_ T39 = NoneT, GTEST_TEMPLATE_ T40 = NoneT,
- GTEST_TEMPLATE_ T41 = NoneT, GTEST_TEMPLATE_ T42 = NoneT,
- GTEST_TEMPLATE_ T43 = NoneT, GTEST_TEMPLATE_ T44 = NoneT,
- GTEST_TEMPLATE_ T45 = NoneT, GTEST_TEMPLATE_ T46 = NoneT,
- GTEST_TEMPLATE_ T47 = NoneT, GTEST_TEMPLATE_ T48 = NoneT,
- GTEST_TEMPLATE_ T49 = NoneT, GTEST_TEMPLATE_ T50 = NoneT>
-struct Templates {
- typedef Templates50<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
- T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
- T42, T43, T44, T45, T46, T47, T48, T49, T50> type;
-};
-
-template <>
-struct Templates<NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT> {
- typedef Templates0 type;
-};
-template <GTEST_TEMPLATE_ T1>
-struct Templates<T1, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT> {
- typedef Templates1<T1> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2>
-struct Templates<T1, T2, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT> {
- typedef Templates2<T1, T2> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3>
-struct Templates<T1, T2, T3, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
- typedef Templates3<T1, T2, T3> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4>
-struct Templates<T1, T2, T3, T4, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
- typedef Templates4<T1, T2, T3, T4> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5>
-struct Templates<T1, T2, T3, T4, T5, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
- typedef Templates5<T1, T2, T3, T4, T5> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6>
-struct Templates<T1, T2, T3, T4, T5, T6, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
- typedef Templates6<T1, T2, T3, T4, T5, T6> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
- typedef Templates7<T1, T2, T3, T4, T5, T6, T7> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
- typedef Templates8<T1, T2, T3, T4, T5, T6, T7, T8> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
- typedef Templates9<T1, T2, T3, T4, T5, T6, T7, T8, T9> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
- typedef Templates10<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
- typedef Templates11<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
- typedef Templates12<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
- typedef Templates13<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
- T13> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
- typedef Templates14<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT> {
- typedef Templates15<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT> {
- typedef Templates16<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT> {
- typedef Templates17<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT> {
- typedef Templates18<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT> {
- typedef Templates19<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT> {
- typedef Templates20<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT> {
- typedef Templates21<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT> {
- typedef Templates22<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT> {
- typedef Templates23<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT> {
- typedef Templates24<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT> {
- typedef Templates25<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT> {
- typedef Templates26<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT> {
- typedef Templates27<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
- T27> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
- GTEST_TEMPLATE_ T28>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT> {
- typedef Templates28<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
- T28> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
- GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT> {
- typedef Templates29<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
- T28, T29> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
- GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
- T30, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
- typedef Templates30<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
- T28, T29, T30> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
- GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
- GTEST_TEMPLATE_ T31>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
- T30, T31, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
- typedef Templates31<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
- T28, T29, T30, T31> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
- GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
- GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
- T30, T31, T32, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
- typedef Templates32<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
- T28, T29, T30, T31, T32> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
- GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
- GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
- T30, T31, T32, T33, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
- typedef Templates33<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
- T28, T29, T30, T31, T32, T33> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
- GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
- GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
- GTEST_TEMPLATE_ T34>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
- T30, T31, T32, T33, T34, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
- typedef Templates34<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
- T28, T29, T30, T31, T32, T33, T34> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
- GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
- GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
- GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
- T30, T31, T32, T33, T34, T35, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
- typedef Templates35<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
- T28, T29, T30, T31, T32, T33, T34, T35> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
- GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
- GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
- GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
- T30, T31, T32, T33, T34, T35, T36, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
- typedef Templates36<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
- T28, T29, T30, T31, T32, T33, T34, T35, T36> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
- GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
- GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
- GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
- GTEST_TEMPLATE_ T37>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
- T30, T31, T32, T33, T34, T35, T36, T37, NoneT, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
- typedef Templates37<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
- T28, T29, T30, T31, T32, T33, T34, T35, T36, T37> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
- GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
- GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
- GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
- GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
- T30, T31, T32, T33, T34, T35, T36, T37, T38, NoneT, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
- typedef Templates38<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
- T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
- GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
- GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
- GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
- GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
- T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
- typedef Templates39<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
- T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
- GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
- GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
- GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
- GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
- GTEST_TEMPLATE_ T40>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
- T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, NoneT, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
- typedef Templates40<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
- T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
- GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
- GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
- GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
- GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
- GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
- T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, NoneT, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
- typedef Templates41<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
- T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
- T41> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
- GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
- GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
- GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
- GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
- GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
- T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, NoneT,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
- typedef Templates42<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
- T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
- T42> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
- GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
- GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
- GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
- GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
- GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
- GTEST_TEMPLATE_ T43>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
- T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
- typedef Templates43<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
- T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
- T42, T43> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
- GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
- GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
- GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
- GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
- GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
- GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
- T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
- NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
- typedef Templates44<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
- T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
- T42, T43, T44> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
- GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
- GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
- GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
- GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
- GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
- GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
- T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
- T45, NoneT, NoneT, NoneT, NoneT, NoneT> {
- typedef Templates45<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
- T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
- T42, T43, T44, T45> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
- GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
- GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
- GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
- GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
- GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
- GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
- GTEST_TEMPLATE_ T46>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
- T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
- T45, T46, NoneT, NoneT, NoneT, NoneT> {
- typedef Templates46<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
- T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
- T42, T43, T44, T45, T46> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
- GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
- GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
- GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
- GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
- GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
- GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
- GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
- T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
- T45, T46, T47, NoneT, NoneT, NoneT> {
- typedef Templates47<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
- T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
- T42, T43, T44, T45, T46, T47> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
- GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
- GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
- GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
- GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
- GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
- GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
- GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47, GTEST_TEMPLATE_ T48>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
- T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
- T45, T46, T47, T48, NoneT, NoneT> {
- typedef Templates48<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
- T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
- T42, T43, T44, T45, T46, T47, T48> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
- GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
- GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
- GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
- GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
- GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
- GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
- GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
- GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
- GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
- GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
- GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
- GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
- GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
- GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
- GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47, GTEST_TEMPLATE_ T48,
- GTEST_TEMPLATE_ T49>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
- T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
- T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
- T45, T46, T47, T48, T49, NoneT> {
- typedef Templates49<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
- T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
- T42, T43, T44, T45, T46, T47, T48, T49> type;
-};
-
-// The TypeList template makes it possible to use either a single type
-// or a Types<...> list in TYPED_TEST_CASE() and
-// INSTANTIATE_TYPED_TEST_CASE_P().
-
-template <typename T>
-struct TypeList {
- typedef Types1<T> type;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
- typename T6, typename T7, typename T8, typename T9, typename T10,
- typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17, typename T18, typename T19, typename T20,
- typename T21, typename T22, typename T23, typename T24, typename T25,
- typename T26, typename T27, typename T28, typename T29, typename T30,
- typename T31, typename T32, typename T33, typename T34, typename T35,
- typename T36, typename T37, typename T38, typename T39, typename T40,
- typename T41, typename T42, typename T43, typename T44, typename T45,
- typename T46, typename T47, typename T48, typename T49, typename T50>
-struct TypeList<Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
- T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
- T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
- T44, T45, T46, T47, T48, T49, T50> > {
- typedef typename Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
- T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
- T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
- T41, T42, T43, T44, T45, T46, T47, T48, T49, T50>::type type;
-};
-
-#endif // GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P
-
-} // namespace internal
-} // namespace testing
-
-#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_
diff --git a/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-type-util.h.pump b/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-type-util.h.pump
deleted file mode 100644
index 251fdf025..000000000
--- a/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-type-util.h.pump
+++ /dev/null
@@ -1,297 +0,0 @@
-$$ -*- mode: c++; -*-
-$var n = 50 $$ Maximum length of type lists we want to support.
-// Copyright 2008 Google Inc.
-// All Rights Reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Author: wan@google.com (Zhanyong Wan)
-
-// Type utilities needed for implementing typed and type-parameterized
-// tests. This file is generated by a SCRIPT. DO NOT EDIT BY HAND!
-//
-// Currently we support at most $n types in a list, and at most $n
-// type-parameterized tests in one type-parameterized test case.
-// Please contact googletestframework@googlegroups.com if you need
-// more.
-
-#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_
-#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_
-
-#include "gtest/internal/gtest-port.h"
-
-// #ifdef __GNUC__ is too general here. It is possible to use gcc without using
-// libstdc++ (which is where cxxabi.h comes from).
-# if GTEST_HAS_CXXABI_H_
-# include <cxxabi.h>
-# elif defined(__HP_aCC)
-# include <acxx_demangle.h>
-# endif // GTEST_HASH_CXXABI_H_
-
-namespace testing {
-namespace internal {
-
-// GetTypeName<T>() returns a human-readable name of type T.
-// NB: This function is also used in Google Mock, so don't move it inside of
-// the typed-test-only section below.
-template <typename T>
-std::string GetTypeName() {
-# if GTEST_HAS_RTTI
-
- const char* const name = typeid(T).name();
-# if GTEST_HAS_CXXABI_H_ || defined(__HP_aCC)
- int status = 0;
- // gcc's implementation of typeid(T).name() mangles the type name,
- // so we have to demangle it.
-# if GTEST_HAS_CXXABI_H_
- using abi::__cxa_demangle;
-# endif // GTEST_HAS_CXXABI_H_
- char* const readable_name = __cxa_demangle(name, 0, 0, &status);
- const std::string name_str(status == 0 ? readable_name : name);
- free(readable_name);
- return name_str;
-# else
- return name;
-# endif // GTEST_HAS_CXXABI_H_ || __HP_aCC
-
-# else
-
- return "<type>";
-
-# endif // GTEST_HAS_RTTI
-}
-
-#if GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P
-
-// AssertyTypeEq<T1, T2>::type is defined iff T1 and T2 are the same
-// type. This can be used as a compile-time assertion to ensure that
-// two types are equal.
-
-template <typename T1, typename T2>
-struct AssertTypeEq;
-
-template <typename T>
-struct AssertTypeEq<T, T> {
- typedef bool type;
-};
-
-// A unique type used as the default value for the arguments of class
-// template Types. This allows us to simulate variadic templates
-// (e.g. Types<int>, Type<int, double>, and etc), which C++ doesn't
-// support directly.
-struct None {};
-
-// The following family of struct and struct templates are used to
-// represent type lists. In particular, TypesN<T1, T2, ..., TN>
-// represents a type list with N types (T1, T2, ..., and TN) in it.
-// Except for Types0, every struct in the family has two member types:
-// Head for the first type in the list, and Tail for the rest of the
-// list.
-
-// The empty type list.
-struct Types0 {};
-
-// Type lists of length 1, 2, 3, and so on.
-
-template <typename T1>
-struct Types1 {
- typedef T1 Head;
- typedef Types0 Tail;
-};
-
-$range i 2..n
-
-$for i [[
-$range j 1..i
-$range k 2..i
-template <$for j, [[typename T$j]]>
-struct Types$i {
- typedef T1 Head;
- typedef Types$(i-1)<$for k, [[T$k]]> Tail;
-};
-
-
-]]
-
-} // namespace internal
-
-// We don't want to require the users to write TypesN<...> directly,
-// as that would require them to count the length. Types<...> is much
-// easier to write, but generates horrible messages when there is a
-// compiler error, as gcc insists on printing out each template
-// argument, even if it has the default value (this means Types<int>
-// will appear as Types<int, None, None, ..., None> in the compiler
-// errors).
-//
-// Our solution is to combine the best part of the two approaches: a
-// user would write Types<T1, ..., TN>, and Google Test will translate
-// that to TypesN<T1, ..., TN> internally to make error messages
-// readable. The translation is done by the 'type' member of the
-// Types template.
-
-$range i 1..n
-template <$for i, [[typename T$i = internal::None]]>
-struct Types {
- typedef internal::Types$n<$for i, [[T$i]]> type;
-};
-
-template <>
-struct Types<$for i, [[internal::None]]> {
- typedef internal::Types0 type;
-};
-
-$range i 1..n-1
-$for i [[
-$range j 1..i
-$range k i+1..n
-template <$for j, [[typename T$j]]>
-struct Types<$for j, [[T$j]]$for k[[, internal::None]]> {
- typedef internal::Types$i<$for j, [[T$j]]> type;
-};
-
-]]
-
-namespace internal {
-
-# define GTEST_TEMPLATE_ template <typename T> class
-
-// The template "selector" struct TemplateSel<Tmpl> is used to
-// represent Tmpl, which must be a class template with one type
-// parameter, as a type. TemplateSel<Tmpl>::Bind<T>::type is defined
-// as the type Tmpl<T>. This allows us to actually instantiate the
-// template "selected" by TemplateSel<Tmpl>.
-//
-// This trick is necessary for simulating typedef for class templates,
-// which C++ doesn't support directly.
-template <GTEST_TEMPLATE_ Tmpl>
-struct TemplateSel {
- template <typename T>
- struct Bind {
- typedef Tmpl<T> type;
- };
-};
-
-# define GTEST_BIND_(TmplSel, T) \
- TmplSel::template Bind<T>::type
-
-// A unique struct template used as the default value for the
-// arguments of class template Templates. This allows us to simulate
-// variadic templates (e.g. Templates<int>, Templates<int, double>,
-// and etc), which C++ doesn't support directly.
-template <typename T>
-struct NoneT {};
-
-// The following family of struct and struct templates are used to
-// represent template lists. In particular, TemplatesN<T1, T2, ...,
-// TN> represents a list of N templates (T1, T2, ..., and TN). Except
-// for Templates0, every struct in the family has two member types:
-// Head for the selector of the first template in the list, and Tail
-// for the rest of the list.
-
-// The empty template list.
-struct Templates0 {};
-
-// Template lists of length 1, 2, 3, and so on.
-
-template <GTEST_TEMPLATE_ T1>
-struct Templates1 {
- typedef TemplateSel<T1> Head;
- typedef Templates0 Tail;
-};
-
-$range i 2..n
-
-$for i [[
-$range j 1..i
-$range k 2..i
-template <$for j, [[GTEST_TEMPLATE_ T$j]]>
-struct Templates$i {
- typedef TemplateSel<T1> Head;
- typedef Templates$(i-1)<$for k, [[T$k]]> Tail;
-};
-
-
-]]
-
-// We don't want to require the users to write TemplatesN<...> directly,
-// as that would require them to count the length. Templates<...> is much
-// easier to write, but generates horrible messages when there is a
-// compiler error, as gcc insists on printing out each template
-// argument, even if it has the default value (this means Templates<list>
-// will appear as Templates<list, NoneT, NoneT, ..., NoneT> in the compiler
-// errors).
-//
-// Our solution is to combine the best part of the two approaches: a
-// user would write Templates<T1, ..., TN>, and Google Test will translate
-// that to TemplatesN<T1, ..., TN> internally to make error messages
-// readable. The translation is done by the 'type' member of the
-// Templates template.
-
-$range i 1..n
-template <$for i, [[GTEST_TEMPLATE_ T$i = NoneT]]>
-struct Templates {
- typedef Templates$n<$for i, [[T$i]]> type;
-};
-
-template <>
-struct Templates<$for i, [[NoneT]]> {
- typedef Templates0 type;
-};
-
-$range i 1..n-1
-$for i [[
-$range j 1..i
-$range k i+1..n
-template <$for j, [[GTEST_TEMPLATE_ T$j]]>
-struct Templates<$for j, [[T$j]]$for k[[, NoneT]]> {
- typedef Templates$i<$for j, [[T$j]]> type;
-};
-
-]]
-
-// The TypeList template makes it possible to use either a single type
-// or a Types<...> list in TYPED_TEST_CASE() and
-// INSTANTIATE_TYPED_TEST_CASE_P().
-
-template <typename T>
-struct TypeList {
- typedef Types1<T> type;
-};
-
-
-$range i 1..n
-template <$for i, [[typename T$i]]>
-struct TypeList<Types<$for i, [[T$i]]> > {
- typedef typename Types<$for i, [[T$i]]>::type type;
-};
-
-#endif // GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P
-
-} // namespace internal
-} // namespace testing
-
-#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_
diff --git a/third_party/aom/third_party/googletest/src/googletest/src/gtest-all.cc b/third_party/aom/third_party/googletest/src/googletest/src/gtest-all.cc
deleted file mode 100644
index 0a9cee522..000000000
--- a/third_party/aom/third_party/googletest/src/googletest/src/gtest-all.cc
+++ /dev/null
@@ -1,48 +0,0 @@
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Author: mheule@google.com (Markus Heule)
-//
-// Google C++ Testing Framework (Google Test)
-//
-// Sometimes it's desirable to build Google Test by compiling a single file.
-// This file serves this purpose.
-
-// This line ensures that gtest.h can be compiled on its own, even
-// when it's fused.
-#include "gtest/gtest.h"
-
-// The following lines pull in the real gtest *.cc files.
-#include "src/gtest.cc"
-#include "src/gtest-death-test.cc"
-#include "src/gtest-filepath.cc"
-#include "src/gtest-port.cc"
-#include "src/gtest-printers.cc"
-#include "src/gtest-test-part.cc"
-#include "src/gtest-typed-test.cc"
diff --git a/third_party/aom/third_party/googletest/src/googletest/src/gtest-death-test.cc b/third_party/aom/third_party/googletest/src/googletest/src/gtest-death-test.cc
deleted file mode 100644
index a01a36983..000000000
--- a/third_party/aom/third_party/googletest/src/googletest/src/gtest-death-test.cc
+++ /dev/null
@@ -1,1342 +0,0 @@
-// Copyright 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Author: wan@google.com (Zhanyong Wan), vladl@google.com (Vlad Losev)
-//
-// This file implements death tests.
-
-#include "gtest/gtest-death-test.h"
-#include "gtest/internal/gtest-port.h"
-#include "gtest/internal/custom/gtest.h"
-
-#if GTEST_HAS_DEATH_TEST
-
-# if GTEST_OS_MAC
-# include <crt_externs.h>
-# endif // GTEST_OS_MAC
-
-# include <errno.h>
-# include <fcntl.h>
-# include <limits.h>
-
-# if GTEST_OS_LINUX
-# include <signal.h>
-# endif // GTEST_OS_LINUX
-
-# include <stdarg.h>
-
-# if GTEST_OS_WINDOWS
-# include <windows.h>
-# else
-# include <sys/mman.h>
-# include <sys/wait.h>
-# endif // GTEST_OS_WINDOWS
-
-# if GTEST_OS_QNX
-# include <spawn.h>
-# endif // GTEST_OS_QNX
-
-#endif // GTEST_HAS_DEATH_TEST
-
-#include "gtest/gtest-message.h"
-#include "gtest/internal/gtest-string.h"
-
-// Indicates that this translation unit is part of Google Test's
-// implementation. It must come before gtest-internal-inl.h is
-// included, or there will be a compiler error. This trick exists to
-// prevent the accidental inclusion of gtest-internal-inl.h in the
-// user's code.
-#define GTEST_IMPLEMENTATION_ 1
-#include "src/gtest-internal-inl.h"
-#undef GTEST_IMPLEMENTATION_
-
-namespace testing {
-
-// Constants.
-
-// The default death test style.
-static const char kDefaultDeathTestStyle[] = "fast";
-
-GTEST_DEFINE_string_(
- death_test_style,
- internal::StringFromGTestEnv("death_test_style", kDefaultDeathTestStyle),
- "Indicates how to run a death test in a forked child process: "
- "\"threadsafe\" (child process re-executes the test binary "
- "from the beginning, running only the specific death test) or "
- "\"fast\" (child process runs the death test immediately "
- "after forking).");
-
-GTEST_DEFINE_bool_(
- death_test_use_fork,
- internal::BoolFromGTestEnv("death_test_use_fork", false),
- "Instructs to use fork()/_exit() instead of clone() in death tests. "
- "Ignored and always uses fork() on POSIX systems where clone() is not "
- "implemented. Useful when running under valgrind or similar tools if "
- "those do not support clone(). Valgrind 3.3.1 will just fail if "
- "it sees an unsupported combination of clone() flags. "
- "It is not recommended to use this flag w/o valgrind though it will "
- "work in 99% of the cases. Once valgrind is fixed, this flag will "
- "most likely be removed.");
-
-namespace internal {
-GTEST_DEFINE_string_(
- internal_run_death_test, "",
- "Indicates the file, line number, temporal index of "
- "the single death test to run, and a file descriptor to "
- "which a success code may be sent, all separated by "
- "the '|' characters. This flag is specified if and only if the current "
- "process is a sub-process launched for running a thread-safe "
- "death test. FOR INTERNAL USE ONLY.");
-} // namespace internal
-
-#if GTEST_HAS_DEATH_TEST
-
-namespace internal {
-
-// Valid only for fast death tests. Indicates the code is running in the
-// child process of a fast style death test.
-# if !GTEST_OS_WINDOWS
-static bool g_in_fast_death_test_child = false;
-# endif
-
-// Returns a Boolean value indicating whether the caller is currently
-// executing in the context of the death test child process. Tools such as
-// Valgrind heap checkers may need this to modify their behavior in death
-// tests. IMPORTANT: This is an internal utility. Using it may break the
-// implementation of death tests. User code MUST NOT use it.
-bool InDeathTestChild() {
-# if GTEST_OS_WINDOWS
-
- // On Windows, death tests are thread-safe regardless of the value of the
- // death_test_style flag.
- return !GTEST_FLAG(internal_run_death_test).empty();
-
-# else
-
- if (GTEST_FLAG(death_test_style) == "threadsafe")
- return !GTEST_FLAG(internal_run_death_test).empty();
- else
- return g_in_fast_death_test_child;
-#endif
-}
-
-} // namespace internal
-
-// ExitedWithCode constructor.
-ExitedWithCode::ExitedWithCode(int exit_code) : exit_code_(exit_code) {
-}
-
-// ExitedWithCode function-call operator.
-bool ExitedWithCode::operator()(int exit_status) const {
-# if GTEST_OS_WINDOWS
-
- return exit_status == exit_code_;
-
-# else
-
- return WIFEXITED(exit_status) && WEXITSTATUS(exit_status) == exit_code_;
-
-# endif // GTEST_OS_WINDOWS
-}
-
-# if !GTEST_OS_WINDOWS
-// KilledBySignal constructor.
-KilledBySignal::KilledBySignal(int signum) : signum_(signum) {
-}
-
-// KilledBySignal function-call operator.
-bool KilledBySignal::operator()(int exit_status) const {
-# if defined(GTEST_KILLED_BY_SIGNAL_OVERRIDE_)
- {
- bool result;
- if (GTEST_KILLED_BY_SIGNAL_OVERRIDE_(signum_, exit_status, &result)) {
- return result;
- }
- }
-# endif // defined(GTEST_KILLED_BY_SIGNAL_OVERRIDE_)
- return WIFSIGNALED(exit_status) && WTERMSIG(exit_status) == signum_;
-}
-# endif // !GTEST_OS_WINDOWS
-
-namespace internal {
-
-// Utilities needed for death tests.
-
-// Generates a textual description of a given exit code, in the format
-// specified by wait(2).
-static std::string ExitSummary(int exit_code) {
- Message m;
-
-# if GTEST_OS_WINDOWS
-
- m << "Exited with exit status " << exit_code;
-
-# else
-
- if (WIFEXITED(exit_code)) {
- m << "Exited with exit status " << WEXITSTATUS(exit_code);
- } else if (WIFSIGNALED(exit_code)) {
- m << "Terminated by signal " << WTERMSIG(exit_code);
- }
-# ifdef WCOREDUMP
- if (WCOREDUMP(exit_code)) {
- m << " (core dumped)";
- }
-# endif
-# endif // GTEST_OS_WINDOWS
-
- return m.GetString();
-}
-
-// Returns true if exit_status describes a process that was terminated
-// by a signal, or exited normally with a nonzero exit code.
-bool ExitedUnsuccessfully(int exit_status) {
- return !ExitedWithCode(0)(exit_status);
-}
-
-# if !GTEST_OS_WINDOWS
-// Generates a textual failure message when a death test finds more than
-// one thread running, or cannot determine the number of threads, prior
-// to executing the given statement. It is the responsibility of the
-// caller not to pass a thread_count of 1.
-static std::string DeathTestThreadWarning(size_t thread_count) {
- Message msg;
- msg << "Death tests use fork(), which is unsafe particularly"
- << " in a threaded context. For this test, " << GTEST_NAME_ << " ";
- if (thread_count == 0)
- msg << "couldn't detect the number of threads.";
- else
- msg << "detected " << thread_count << " threads.";
- return msg.GetString();
-}
-# endif // !GTEST_OS_WINDOWS
-
-// Flag characters for reporting a death test that did not die.
-static const char kDeathTestLived = 'L';
-static const char kDeathTestReturned = 'R';
-static const char kDeathTestThrew = 'T';
-static const char kDeathTestInternalError = 'I';
-
-// An enumeration describing all of the possible ways that a death test can
-// conclude. DIED means that the process died while executing the test
-// code; LIVED means that process lived beyond the end of the test code;
-// RETURNED means that the test statement attempted to execute a return
-// statement, which is not allowed; THREW means that the test statement
-// returned control by throwing an exception. IN_PROGRESS means the test
-// has not yet concluded.
-// TODO(vladl@google.com): Unify names and possibly values for
-// AbortReason, DeathTestOutcome, and flag characters above.
-enum DeathTestOutcome { IN_PROGRESS, DIED, LIVED, RETURNED, THREW };
-
-// Routine for aborting the program which is safe to call from an
-// exec-style death test child process, in which case the error
-// message is propagated back to the parent process. Otherwise, the
-// message is simply printed to stderr. In either case, the program
-// then exits with status 1.
-void DeathTestAbort(const std::string& message) {
- // On a POSIX system, this function may be called from a threadsafe-style
- // death test child process, which operates on a very small stack. Use
- // the heap for any additional non-minuscule memory requirements.
- const InternalRunDeathTestFlag* const flag =
- GetUnitTestImpl()->internal_run_death_test_flag();
- if (flag != NULL) {
- FILE* parent = posix::FDOpen(flag->write_fd(), "w");
- fputc(kDeathTestInternalError, parent);
- fprintf(parent, "%s", message.c_str());
- fflush(parent);
- _exit(1);
- } else {
- fprintf(stderr, "%s", message.c_str());
- fflush(stderr);
- posix::Abort();
- }
-}
-
-// A replacement for CHECK that calls DeathTestAbort if the assertion
-// fails.
-# define GTEST_DEATH_TEST_CHECK_(expression) \
- do { \
- if (!::testing::internal::IsTrue(expression)) { \
- DeathTestAbort( \
- ::std::string("CHECK failed: File ") + __FILE__ + ", line " \
- + ::testing::internal::StreamableToString(__LINE__) + ": " \
- + #expression); \
- } \
- } while (::testing::internal::AlwaysFalse())
-
-// This macro is similar to GTEST_DEATH_TEST_CHECK_, but it is meant for
-// evaluating any system call that fulfills two conditions: it must return
-// -1 on failure, and set errno to EINTR when it is interrupted and
-// should be tried again. The macro expands to a loop that repeatedly
-// evaluates the expression as long as it evaluates to -1 and sets
-// errno to EINTR. If the expression evaluates to -1 but errno is
-// something other than EINTR, DeathTestAbort is called.
-# define GTEST_DEATH_TEST_CHECK_SYSCALL_(expression) \
- do { \
- int gtest_retval; \
- do { \
- gtest_retval = (expression); \
- } while (gtest_retval == -1 && errno == EINTR); \
- if (gtest_retval == -1) { \
- DeathTestAbort( \
- ::std::string("CHECK failed: File ") + __FILE__ + ", line " \
- + ::testing::internal::StreamableToString(__LINE__) + ": " \
- + #expression + " != -1"); \
- } \
- } while (::testing::internal::AlwaysFalse())
-
-// Returns the message describing the last system error in errno.
-std::string GetLastErrnoDescription() {
- return errno == 0 ? "" : posix::StrError(errno);
-}
-
-// This is called from a death test parent process to read a failure
-// message from the death test child process and log it with the FATAL
-// severity. On Windows, the message is read from a pipe handle. On other
-// platforms, it is read from a file descriptor.
-static void FailFromInternalError(int fd) {
- Message error;
- char buffer[256];
- int num_read;
-
- do {
- while ((num_read = posix::Read(fd, buffer, 255)) > 0) {
- buffer[num_read] = '\0';
- error << buffer;
- }
- } while (num_read == -1 && errno == EINTR);
-
- if (num_read == 0) {
- GTEST_LOG_(FATAL) << error.GetString();
- } else {
- const int last_error = errno;
- GTEST_LOG_(FATAL) << "Error while reading death test internal: "
- << GetLastErrnoDescription() << " [" << last_error << "]";
- }
-}
-
-// Death test constructor. Increments the running death test count
-// for the current test.
-DeathTest::DeathTest() {
- TestInfo* const info = GetUnitTestImpl()->current_test_info();
- if (info == NULL) {
- DeathTestAbort("Cannot run a death test outside of a TEST or "
- "TEST_F construct");
- }
-}
-
-// Creates and returns a death test by dispatching to the current
-// death test factory.
-bool DeathTest::Create(const char* statement, const RE* regex,
- const char* file, int line, DeathTest** test) {
- return GetUnitTestImpl()->death_test_factory()->Create(
- statement, regex, file, line, test);
-}
-
-const char* DeathTest::LastMessage() {
- return last_death_test_message_.c_str();
-}
-
-void DeathTest::set_last_death_test_message(const std::string& message) {
- last_death_test_message_ = message;
-}
-
-std::string DeathTest::last_death_test_message_;
-
-// Provides cross platform implementation for some death functionality.
-class DeathTestImpl : public DeathTest {
- protected:
- DeathTestImpl(const char* a_statement, const RE* a_regex)
- : statement_(a_statement),
- regex_(a_regex),
- spawned_(false),
- status_(-1),
- outcome_(IN_PROGRESS),
- read_fd_(-1),
- write_fd_(-1) {}
-
- // read_fd_ is expected to be closed and cleared by a derived class.
- ~DeathTestImpl() { GTEST_DEATH_TEST_CHECK_(read_fd_ == -1); }
-
- void Abort(AbortReason reason);
- virtual bool Passed(bool status_ok);
-
- const char* statement() const { return statement_; }
- const RE* regex() const { return regex_; }
- bool spawned() const { return spawned_; }
- void set_spawned(bool is_spawned) { spawned_ = is_spawned; }
- int status() const { return status_; }
- void set_status(int a_status) { status_ = a_status; }
- DeathTestOutcome outcome() const { return outcome_; }
- void set_outcome(DeathTestOutcome an_outcome) { outcome_ = an_outcome; }
- int read_fd() const { return read_fd_; }
- void set_read_fd(int fd) { read_fd_ = fd; }
- int write_fd() const { return write_fd_; }
- void set_write_fd(int fd) { write_fd_ = fd; }
-
- // Called in the parent process only. Reads the result code of the death
- // test child process via a pipe, interprets it to set the outcome_
- // member, and closes read_fd_. Outputs diagnostics and terminates in
- // case of unexpected codes.
- void ReadAndInterpretStatusByte();
-
- private:
- // The textual content of the code this object is testing. This class
- // doesn't own this string and should not attempt to delete it.
- const char* const statement_;
- // The regular expression which test output must match. DeathTestImpl
- // doesn't own this object and should not attempt to delete it.
- const RE* const regex_;
- // True if the death test child process has been successfully spawned.
- bool spawned_;
- // The exit status of the child process.
- int status_;
- // How the death test concluded.
- DeathTestOutcome outcome_;
- // Descriptor to the read end of the pipe to the child process. It is
- // always -1 in the child process. The child keeps its write end of the
- // pipe in write_fd_.
- int read_fd_;
- // Descriptor to the child's write end of the pipe to the parent process.
- // It is always -1 in the parent process. The parent keeps its end of the
- // pipe in read_fd_.
- int write_fd_;
-};
-
-// Called in the parent process only. Reads the result code of the death
-// test child process via a pipe, interprets it to set the outcome_
-// member, and closes read_fd_. Outputs diagnostics and terminates in
-// case of unexpected codes.
-void DeathTestImpl::ReadAndInterpretStatusByte() {
- char flag;
- int bytes_read;
-
- // The read() here blocks until data is available (signifying the
- // failure of the death test) or until the pipe is closed (signifying
- // its success), so it's okay to call this in the parent before
- // the child process has exited.
- do {
- bytes_read = posix::Read(read_fd(), &flag, 1);
- } while (bytes_read == -1 && errno == EINTR);
-
- if (bytes_read == 0) {
- set_outcome(DIED);
- } else if (bytes_read == 1) {
- switch (flag) {
- case kDeathTestReturned:
- set_outcome(RETURNED);
- break;
- case kDeathTestThrew:
- set_outcome(THREW);
- break;
- case kDeathTestLived:
- set_outcome(LIVED);
- break;
- case kDeathTestInternalError:
- FailFromInternalError(read_fd()); // Does not return.
- break;
- default:
- GTEST_LOG_(FATAL) << "Death test child process reported "
- << "unexpected status byte ("
- << static_cast<unsigned int>(flag) << ")";
- }
- } else {
- GTEST_LOG_(FATAL) << "Read from death test child process failed: "
- << GetLastErrnoDescription();
- }
- GTEST_DEATH_TEST_CHECK_SYSCALL_(posix::Close(read_fd()));
- set_read_fd(-1);
-}
-
-// Signals that the death test code which should have exited, didn't.
-// Should be called only in a death test child process.
-// Writes a status byte to the child's status file descriptor, then
-// calls _exit(1).
-void DeathTestImpl::Abort(AbortReason reason) {
- // The parent process considers the death test to be a failure if
- // it finds any data in our pipe. So, here we write a single flag byte
- // to the pipe, then exit.
- const char status_ch =
- reason == TEST_DID_NOT_DIE ? kDeathTestLived :
- reason == TEST_THREW_EXCEPTION ? kDeathTestThrew : kDeathTestReturned;
-
- GTEST_DEATH_TEST_CHECK_SYSCALL_(posix::Write(write_fd(), &status_ch, 1));
- // We are leaking the descriptor here because on some platforms (i.e.,
- // when built as Windows DLL), destructors of global objects will still
- // run after calling _exit(). On such systems, write_fd_ will be
- // indirectly closed from the destructor of UnitTestImpl, causing double
- // close if it is also closed here. On debug configurations, double close
- // may assert. As there are no in-process buffers to flush here, we are
- // relying on the OS to close the descriptor after the process terminates
- // when the destructors are not run.
- _exit(1); // Exits w/o any normal exit hooks (we were supposed to crash)
-}
-
-// Returns an indented copy of stderr output for a death test.
-// This makes distinguishing death test output lines from regular log lines
-// much easier.
-static ::std::string FormatDeathTestOutput(const ::std::string& output) {
- ::std::string ret;
- for (size_t at = 0; ; ) {
- const size_t line_end = output.find('\n', at);
- ret += "[ DEATH ] ";
- if (line_end == ::std::string::npos) {
- ret += output.substr(at);
- break;
- }
- ret += output.substr(at, line_end + 1 - at);
- at = line_end + 1;
- }
- return ret;
-}
-
-// Assesses the success or failure of a death test, using both private
-// members which have previously been set, and one argument:
-//
-// Private data members:
-// outcome: An enumeration describing how the death test
-// concluded: DIED, LIVED, THREW, or RETURNED. The death test
-// fails in the latter three cases.
-// status: The exit status of the child process. On *nix, it is in the
-// in the format specified by wait(2). On Windows, this is the
-// value supplied to the ExitProcess() API or a numeric code
-// of the exception that terminated the program.
-// regex: A regular expression object to be applied to
-// the test's captured standard error output; the death test
-// fails if it does not match.
-//
-// Argument:
-// status_ok: true if exit_status is acceptable in the context of
-// this particular death test, which fails if it is false
-//
-// Returns true iff all of the above conditions are met. Otherwise, the
-// first failing condition, in the order given above, is the one that is
-// reported. Also sets the last death test message string.
-bool DeathTestImpl::Passed(bool status_ok) {
- if (!spawned())
- return false;
-
- const std::string error_message = GetCapturedStderr();
-
- bool success = false;
- Message buffer;
-
- buffer << "Death test: " << statement() << "\n";
- switch (outcome()) {
- case LIVED:
- buffer << " Result: failed to die.\n"
- << " Error msg:\n" << FormatDeathTestOutput(error_message);
- break;
- case THREW:
- buffer << " Result: threw an exception.\n"
- << " Error msg:\n" << FormatDeathTestOutput(error_message);
- break;
- case RETURNED:
- buffer << " Result: illegal return in test statement.\n"
- << " Error msg:\n" << FormatDeathTestOutput(error_message);
- break;
- case DIED:
- if (status_ok) {
- const bool matched = RE::PartialMatch(error_message.c_str(), *regex());
- if (matched) {
- success = true;
- } else {
- buffer << " Result: died but not with expected error.\n"
- << " Expected: " << regex()->pattern() << "\n"
- << "Actual msg:\n" << FormatDeathTestOutput(error_message);
- }
- } else {
- buffer << " Result: died but not with expected exit code:\n"
- << " " << ExitSummary(status()) << "\n"
- << "Actual msg:\n" << FormatDeathTestOutput(error_message);
- }
- break;
- case IN_PROGRESS:
- default:
- GTEST_LOG_(FATAL)
- << "DeathTest::Passed somehow called before conclusion of test";
- }
-
- DeathTest::set_last_death_test_message(buffer.GetString());
- return success;
-}
-
-# if GTEST_OS_WINDOWS
-// WindowsDeathTest implements death tests on Windows. Due to the
-// specifics of starting new processes on Windows, death tests there are
-// always threadsafe, and Google Test considers the
-// --gtest_death_test_style=fast setting to be equivalent to
-// --gtest_death_test_style=threadsafe there.
-//
-// A few implementation notes: Like the Linux version, the Windows
-// implementation uses pipes for child-to-parent communication. But due to
-// the specifics of pipes on Windows, some extra steps are required:
-//
-// 1. The parent creates a communication pipe and stores handles to both
-// ends of it.
-// 2. The parent starts the child and provides it with the information
-// necessary to acquire the handle to the write end of the pipe.
-// 3. The child acquires the write end of the pipe and signals the parent
-// using a Windows event.
-// 4. Now the parent can release the write end of the pipe on its side. If
-// this is done before step 3, the object's reference count goes down to
-// 0 and it is destroyed, preventing the child from acquiring it. The
-// parent now has to release it, or read operations on the read end of
-// the pipe will not return when the child terminates.
-// 5. The parent reads child's output through the pipe (outcome code and
-// any possible error messages) from the pipe, and its stderr and then
-// determines whether to fail the test.
-//
-// Note: to distinguish Win32 API calls from the local method and function
-// calls, the former are explicitly resolved in the global namespace.
-//
-class WindowsDeathTest : public DeathTestImpl {
- public:
- WindowsDeathTest(const char* a_statement,
- const RE* a_regex,
- const char* file,
- int line)
- : DeathTestImpl(a_statement, a_regex), file_(file), line_(line) {}
-
- // All of these virtual functions are inherited from DeathTest.
- virtual int Wait();
- virtual TestRole AssumeRole();
-
- private:
- // The name of the file in which the death test is located.
- const char* const file_;
- // The line number on which the death test is located.
- const int line_;
- // Handle to the write end of the pipe to the child process.
- AutoHandle write_handle_;
- // Child process handle.
- AutoHandle child_handle_;
- // Event the child process uses to signal the parent that it has
- // acquired the handle to the write end of the pipe. After seeing this
- // event the parent can release its own handles to make sure its
- // ReadFile() calls return when the child terminates.
- AutoHandle event_handle_;
-};
-
-// Waits for the child in a death test to exit, returning its exit
-// status, or 0 if no child process exists. As a side effect, sets the
-// outcome data member.
-int WindowsDeathTest::Wait() {
- if (!spawned())
- return 0;
-
- // Wait until the child either signals that it has acquired the write end
- // of the pipe or it dies.
- const HANDLE wait_handles[2] = { child_handle_.Get(), event_handle_.Get() };
- switch (::WaitForMultipleObjects(2,
- wait_handles,
- FALSE, // Waits for any of the handles.
- INFINITE)) {
- case WAIT_OBJECT_0:
- case WAIT_OBJECT_0 + 1:
- break;
- default:
- GTEST_DEATH_TEST_CHECK_(false); // Should not get here.
- }
-
- // The child has acquired the write end of the pipe or exited.
- // We release the handle on our side and continue.
- write_handle_.Reset();
- event_handle_.Reset();
-
- ReadAndInterpretStatusByte();
-
- // Waits for the child process to exit if it haven't already. This
- // returns immediately if the child has already exited, regardless of
- // whether previous calls to WaitForMultipleObjects synchronized on this
- // handle or not.
- GTEST_DEATH_TEST_CHECK_(
- WAIT_OBJECT_0 == ::WaitForSingleObject(child_handle_.Get(),
- INFINITE));
- DWORD status_code;
- GTEST_DEATH_TEST_CHECK_(
- ::GetExitCodeProcess(child_handle_.Get(), &status_code) != FALSE);
- child_handle_.Reset();
- set_status(static_cast<int>(status_code));
- return status();
-}
-
-// The AssumeRole process for a Windows death test. It creates a child
-// process with the same executable as the current process to run the
-// death test. The child process is given the --gtest_filter and
-// --gtest_internal_run_death_test flags such that it knows to run the
-// current death test only.
-DeathTest::TestRole WindowsDeathTest::AssumeRole() {
- const UnitTestImpl* const impl = GetUnitTestImpl();
- const InternalRunDeathTestFlag* const flag =
- impl->internal_run_death_test_flag();
- const TestInfo* const info = impl->current_test_info();
- const int death_test_index = info->result()->death_test_count();
-
- if (flag != NULL) {
- // ParseInternalRunDeathTestFlag() has performed all the necessary
- // processing.
- set_write_fd(flag->write_fd());
- return EXECUTE_TEST;
- }
-
- // WindowsDeathTest uses an anonymous pipe to communicate results of
- // a death test.
- SECURITY_ATTRIBUTES handles_are_inheritable = {
- sizeof(SECURITY_ATTRIBUTES), NULL, TRUE };
- HANDLE read_handle, write_handle;
- GTEST_DEATH_TEST_CHECK_(
- ::CreatePipe(&read_handle, &write_handle, &handles_are_inheritable,
- 0) // Default buffer size.
- != FALSE);
- set_read_fd(::_open_osfhandle(reinterpret_cast<intptr_t>(read_handle),
- O_RDONLY));
- write_handle_.Reset(write_handle);
- event_handle_.Reset(::CreateEvent(
- &handles_are_inheritable,
- TRUE, // The event will automatically reset to non-signaled state.
- FALSE, // The initial state is non-signalled.
- NULL)); // The even is unnamed.
- GTEST_DEATH_TEST_CHECK_(event_handle_.Get() != NULL);
- const std::string filter_flag =
- std::string("--") + GTEST_FLAG_PREFIX_ + kFilterFlag + "=" +
- info->test_case_name() + "." + info->name();
- const std::string internal_flag =
- std::string("--") + GTEST_FLAG_PREFIX_ + kInternalRunDeathTestFlag +
- "=" + file_ + "|" + StreamableToString(line_) + "|" +
- StreamableToString(death_test_index) + "|" +
- StreamableToString(static_cast<unsigned int>(::GetCurrentProcessId())) +
- // size_t has the same width as pointers on both 32-bit and 64-bit
- // Windows platforms.
- // See http://msdn.microsoft.com/en-us/library/tcxf1dw6.aspx.
- "|" + StreamableToString(reinterpret_cast<size_t>(write_handle)) +
- "|" + StreamableToString(reinterpret_cast<size_t>(event_handle_.Get()));
-
- char executable_path[_MAX_PATH + 1]; // NOLINT
- GTEST_DEATH_TEST_CHECK_(
- _MAX_PATH + 1 != ::GetModuleFileNameA(NULL,
- executable_path,
- _MAX_PATH));
-
- std::string command_line =
- std::string(::GetCommandLineA()) + " " + filter_flag + " \"" +
- internal_flag + "\"";
-
- DeathTest::set_last_death_test_message("");
-
- CaptureStderr();
- // Flush the log buffers since the log streams are shared with the child.
- FlushInfoLog();
-
- // The child process will share the standard handles with the parent.
- STARTUPINFOA startup_info;
- memset(&startup_info, 0, sizeof(STARTUPINFO));
- startup_info.dwFlags = STARTF_USESTDHANDLES;
- startup_info.hStdInput = ::GetStdHandle(STD_INPUT_HANDLE);
- startup_info.hStdOutput = ::GetStdHandle(STD_OUTPUT_HANDLE);
- startup_info.hStdError = ::GetStdHandle(STD_ERROR_HANDLE);
-
- PROCESS_INFORMATION process_info;
- GTEST_DEATH_TEST_CHECK_(::CreateProcessA(
- executable_path,
- const_cast<char*>(command_line.c_str()),
- NULL, // Retuned process handle is not inheritable.
- NULL, // Retuned thread handle is not inheritable.
- TRUE, // Child inherits all inheritable handles (for write_handle_).
- 0x0, // Default creation flags.
- NULL, // Inherit the parent's environment.
- UnitTest::GetInstance()->original_working_dir(),
- &startup_info,
- &process_info) != FALSE);
- child_handle_.Reset(process_info.hProcess);
- ::CloseHandle(process_info.hThread);
- set_spawned(true);
- return OVERSEE_TEST;
-}
-# else // We are not on Windows.
-
-// ForkingDeathTest provides implementations for most of the abstract
-// methods of the DeathTest interface. Only the AssumeRole method is
-// left undefined.
-class ForkingDeathTest : public DeathTestImpl {
- public:
- ForkingDeathTest(const char* statement, const RE* regex);
-
- // All of these virtual functions are inherited from DeathTest.
- virtual int Wait();
-
- protected:
- void set_child_pid(pid_t child_pid) { child_pid_ = child_pid; }
-
- private:
- // PID of child process during death test; 0 in the child process itself.
- pid_t child_pid_;
-};
-
-// Constructs a ForkingDeathTest.
-ForkingDeathTest::ForkingDeathTest(const char* a_statement, const RE* a_regex)
- : DeathTestImpl(a_statement, a_regex),
- child_pid_(-1) {}
-
-// Waits for the child in a death test to exit, returning its exit
-// status, or 0 if no child process exists. As a side effect, sets the
-// outcome data member.
-int ForkingDeathTest::Wait() {
- if (!spawned())
- return 0;
-
- ReadAndInterpretStatusByte();
-
- int status_value;
- GTEST_DEATH_TEST_CHECK_SYSCALL_(waitpid(child_pid_, &status_value, 0));
- set_status(status_value);
- return status_value;
-}
-
-// A concrete death test class that forks, then immediately runs the test
-// in the child process.
-class NoExecDeathTest : public ForkingDeathTest {
- public:
- NoExecDeathTest(const char* a_statement, const RE* a_regex) :
- ForkingDeathTest(a_statement, a_regex) { }
- virtual TestRole AssumeRole();
-};
-
-// The AssumeRole process for a fork-and-run death test. It implements a
-// straightforward fork, with a simple pipe to transmit the status byte.
-DeathTest::TestRole NoExecDeathTest::AssumeRole() {
- const size_t thread_count = GetThreadCount();
- if (thread_count != 1) {
- GTEST_LOG_(WARNING) << DeathTestThreadWarning(thread_count);
- }
-
- int pipe_fd[2];
- GTEST_DEATH_TEST_CHECK_(pipe(pipe_fd) != -1);
-
- DeathTest::set_last_death_test_message("");
- CaptureStderr();
- // When we fork the process below, the log file buffers are copied, but the
- // file descriptors are shared. We flush all log files here so that closing
- // the file descriptors in the child process doesn't throw off the
- // synchronization between descriptors and buffers in the parent process.
- // This is as close to the fork as possible to avoid a race condition in case
- // there are multiple threads running before the death test, and another
- // thread writes to the log file.
- FlushInfoLog();
-
- const pid_t child_pid = fork();
- GTEST_DEATH_TEST_CHECK_(child_pid != -1);
- set_child_pid(child_pid);
- if (child_pid == 0) {
- GTEST_DEATH_TEST_CHECK_SYSCALL_(close(pipe_fd[0]));
- set_write_fd(pipe_fd[1]);
- // Redirects all logging to stderr in the child process to prevent
- // concurrent writes to the log files. We capture stderr in the parent
- // process and append the child process' output to a log.
- LogToStderr();
- // Event forwarding to the listeners of event listener API mush be shut
- // down in death test subprocesses.
- GetUnitTestImpl()->listeners()->SuppressEventForwarding();
- g_in_fast_death_test_child = true;
- return EXECUTE_TEST;
- } else {
- GTEST_DEATH_TEST_CHECK_SYSCALL_(close(pipe_fd[1]));
- set_read_fd(pipe_fd[0]);
- set_spawned(true);
- return OVERSEE_TEST;
- }
-}
-
-// A concrete death test class that forks and re-executes the main
-// program from the beginning, with command-line flags set that cause
-// only this specific death test to be run.
-class ExecDeathTest : public ForkingDeathTest {
- public:
- ExecDeathTest(const char* a_statement, const RE* a_regex,
- const char* file, int line) :
- ForkingDeathTest(a_statement, a_regex), file_(file), line_(line) { }
- virtual TestRole AssumeRole();
- private:
- static ::std::vector<testing::internal::string>
- GetArgvsForDeathTestChildProcess() {
- ::std::vector<testing::internal::string> args = GetInjectableArgvs();
-# if defined(GTEST_EXTRA_DEATH_TEST_COMMAND_LINE_ARGS_)
- ::std::vector<testing::internal::string> extra_args =
- GTEST_EXTRA_DEATH_TEST_COMMAND_LINE_ARGS_();
- args.insert(args.end(), extra_args.begin(), extra_args.end());
-# endif // defined(GTEST_EXTRA_DEATH_TEST_COMMAND_LINE_ARGS_)
- return args;
- }
- // The name of the file in which the death test is located.
- const char* const file_;
- // The line number on which the death test is located.
- const int line_;
-};
-
-// Utility class for accumulating command-line arguments.
-class Arguments {
- public:
- Arguments() {
- args_.push_back(NULL);
- }
-
- ~Arguments() {
- for (std::vector<char*>::iterator i = args_.begin(); i != args_.end();
- ++i) {
- free(*i);
- }
- }
- void AddArgument(const char* argument) {
- args_.insert(args_.end() - 1, posix::StrDup(argument));
- }
-
- template <typename Str>
- void AddArguments(const ::std::vector<Str>& arguments) {
- for (typename ::std::vector<Str>::const_iterator i = arguments.begin();
- i != arguments.end();
- ++i) {
- args_.insert(args_.end() - 1, posix::StrDup(i->c_str()));
- }
- }
- char* const* Argv() {
- return &args_[0];
- }
-
- private:
- std::vector<char*> args_;
-};
-
-// A struct that encompasses the arguments to the child process of a
-// threadsafe-style death test process.
-struct ExecDeathTestArgs {
- char* const* argv; // Command-line arguments for the child's call to exec
- int close_fd; // File descriptor to close; the read end of a pipe
-};
-
-# if GTEST_OS_MAC
-inline char** GetEnviron() {
- // When Google Test is built as a framework on MacOS X, the environ variable
- // is unavailable. Apple's documentation (man environ) recommends using
- // _NSGetEnviron() instead.
- return *_NSGetEnviron();
-}
-# else
-// Some POSIX platforms expect you to declare environ. extern "C" makes
-// it reside in the global namespace.
-extern "C" char** environ;
-inline char** GetEnviron() { return environ; }
-# endif // GTEST_OS_MAC
-
-# if !GTEST_OS_QNX
-// The main function for a threadsafe-style death test child process.
-// This function is called in a clone()-ed process and thus must avoid
-// any potentially unsafe operations like malloc or libc functions.
-static int ExecDeathTestChildMain(void* child_arg) {
- ExecDeathTestArgs* const args = static_cast<ExecDeathTestArgs*>(child_arg);
- GTEST_DEATH_TEST_CHECK_SYSCALL_(close(args->close_fd));
-
- // We need to execute the test program in the same environment where
- // it was originally invoked. Therefore we change to the original
- // working directory first.
- const char* const original_dir =
- UnitTest::GetInstance()->original_working_dir();
- // We can safely call chdir() as it's a direct system call.
- if (chdir(original_dir) != 0) {
- DeathTestAbort(std::string("chdir(\"") + original_dir + "\") failed: " +
- GetLastErrnoDescription());
- return EXIT_FAILURE;
- }
-
- // We can safely call execve() as it's a direct system call. We
- // cannot use execvp() as it's a libc function and thus potentially
- // unsafe. Since execve() doesn't search the PATH, the user must
- // invoke the test program via a valid path that contains at least
- // one path separator.
- execve(args->argv[0], args->argv, GetEnviron());
- DeathTestAbort(std::string("execve(") + args->argv[0] + ", ...) in " +
- original_dir + " failed: " +
- GetLastErrnoDescription());
- return EXIT_FAILURE;
-}
-# endif // !GTEST_OS_QNX
-
-// Two utility routines that together determine the direction the stack
-// grows.
-// This could be accomplished more elegantly by a single recursive
-// function, but we want to guard against the unlikely possibility of
-// a smart compiler optimizing the recursion away.
-//
-// GTEST_NO_INLINE_ is required to prevent GCC 4.6 from inlining
-// StackLowerThanAddress into StackGrowsDown, which then doesn't give
-// correct answer.
-void StackLowerThanAddress(const void* ptr, bool* result) GTEST_NO_INLINE_;
-void StackLowerThanAddress(const void* ptr, bool* result) {
- int dummy;
- *result = (&dummy < ptr);
-}
-
-// Make sure AddressSanitizer does not tamper with the stack here.
-GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_
-bool StackGrowsDown() {
- int dummy;
- bool result;
- StackLowerThanAddress(&dummy, &result);
- return result;
-}
-
-// Spawns a child process with the same executable as the current process in
-// a thread-safe manner and instructs it to run the death test. The
-// implementation uses fork(2) + exec. On systems where clone(2) is
-// available, it is used instead, being slightly more thread-safe. On QNX,
-// fork supports only single-threaded environments, so this function uses
-// spawn(2) there instead. The function dies with an error message if
-// anything goes wrong.
-static pid_t ExecDeathTestSpawnChild(char* const* argv, int close_fd) {
- ExecDeathTestArgs args = { argv, close_fd };
- pid_t child_pid = -1;
-
-# if GTEST_OS_QNX
- // Obtains the current directory and sets it to be closed in the child
- // process.
- const int cwd_fd = open(".", O_RDONLY);
- GTEST_DEATH_TEST_CHECK_(cwd_fd != -1);
- GTEST_DEATH_TEST_CHECK_SYSCALL_(fcntl(cwd_fd, F_SETFD, FD_CLOEXEC));
- // We need to execute the test program in the same environment where
- // it was originally invoked. Therefore we change to the original
- // working directory first.
- const char* const original_dir =
- UnitTest::GetInstance()->original_working_dir();
- // We can safely call chdir() as it's a direct system call.
- if (chdir(original_dir) != 0) {
- DeathTestAbort(std::string("chdir(\"") + original_dir + "\") failed: " +
- GetLastErrnoDescription());
- return EXIT_FAILURE;
- }
-
- int fd_flags;
- // Set close_fd to be closed after spawn.
- GTEST_DEATH_TEST_CHECK_SYSCALL_(fd_flags = fcntl(close_fd, F_GETFD));
- GTEST_DEATH_TEST_CHECK_SYSCALL_(fcntl(close_fd, F_SETFD,
- fd_flags | FD_CLOEXEC));
- struct inheritance inherit = {0};
- // spawn is a system call.
- child_pid = spawn(args.argv[0], 0, NULL, &inherit, args.argv, GetEnviron());
- // Restores the current working directory.
- GTEST_DEATH_TEST_CHECK_(fchdir(cwd_fd) != -1);
- GTEST_DEATH_TEST_CHECK_SYSCALL_(close(cwd_fd));
-
-# else // GTEST_OS_QNX
-# if GTEST_OS_LINUX
- // When a SIGPROF signal is received while fork() or clone() are executing,
- // the process may hang. To avoid this, we ignore SIGPROF here and re-enable
- // it after the call to fork()/clone() is complete.
- struct sigaction saved_sigprof_action;
- struct sigaction ignore_sigprof_action;
- memset(&ignore_sigprof_action, 0, sizeof(ignore_sigprof_action));
- sigemptyset(&ignore_sigprof_action.sa_mask);
- ignore_sigprof_action.sa_handler = SIG_IGN;
- GTEST_DEATH_TEST_CHECK_SYSCALL_(sigaction(
- SIGPROF, &ignore_sigprof_action, &saved_sigprof_action));
-# endif // GTEST_OS_LINUX
-
-# if GTEST_HAS_CLONE
- const bool use_fork = GTEST_FLAG(death_test_use_fork);
-
- if (!use_fork) {
- static const bool stack_grows_down = StackGrowsDown();
- const size_t stack_size = getpagesize();
- // MMAP_ANONYMOUS is not defined on Mac, so we use MAP_ANON instead.
- void* const stack = mmap(NULL, stack_size, PROT_READ | PROT_WRITE,
- MAP_ANON | MAP_PRIVATE, -1, 0);
- GTEST_DEATH_TEST_CHECK_(stack != MAP_FAILED);
-
- // Maximum stack alignment in bytes: For a downward-growing stack, this
- // amount is subtracted from size of the stack space to get an address
- // that is within the stack space and is aligned on all systems we care
- // about. As far as I know there is no ABI with stack alignment greater
- // than 64. We assume stack and stack_size already have alignment of
- // kMaxStackAlignment.
- const size_t kMaxStackAlignment = 64;
- void* const stack_top =
- static_cast<char*>(stack) +
- (stack_grows_down ? stack_size - kMaxStackAlignment : 0);
- GTEST_DEATH_TEST_CHECK_(stack_size > kMaxStackAlignment &&
- reinterpret_cast<intptr_t>(stack_top) % kMaxStackAlignment == 0);
-
- child_pid = clone(&ExecDeathTestChildMain, stack_top, SIGCHLD, &args);
-
- GTEST_DEATH_TEST_CHECK_(munmap(stack, stack_size) != -1);
- }
-# else
- const bool use_fork = true;
-# endif // GTEST_HAS_CLONE
-
- if (use_fork && (child_pid = fork()) == 0) {
- ExecDeathTestChildMain(&args);
- _exit(0);
- }
-# endif // GTEST_OS_QNX
-# if GTEST_OS_LINUX
- GTEST_DEATH_TEST_CHECK_SYSCALL_(
- sigaction(SIGPROF, &saved_sigprof_action, NULL));
-# endif // GTEST_OS_LINUX
-
- GTEST_DEATH_TEST_CHECK_(child_pid != -1);
- return child_pid;
-}
-
-// The AssumeRole process for a fork-and-exec death test. It re-executes the
-// main program from the beginning, setting the --gtest_filter
-// and --gtest_internal_run_death_test flags to cause only the current
-// death test to be re-run.
-DeathTest::TestRole ExecDeathTest::AssumeRole() {
- const UnitTestImpl* const impl = GetUnitTestImpl();
- const InternalRunDeathTestFlag* const flag =
- impl->internal_run_death_test_flag();
- const TestInfo* const info = impl->current_test_info();
- const int death_test_index = info->result()->death_test_count();
-
- if (flag != NULL) {
- set_write_fd(flag->write_fd());
- return EXECUTE_TEST;
- }
-
- int pipe_fd[2];
- GTEST_DEATH_TEST_CHECK_(pipe(pipe_fd) != -1);
- // Clear the close-on-exec flag on the write end of the pipe, lest
- // it be closed when the child process does an exec:
- GTEST_DEATH_TEST_CHECK_(fcntl(pipe_fd[1], F_SETFD, 0) != -1);
-
- const std::string filter_flag =
- std::string("--") + GTEST_FLAG_PREFIX_ + kFilterFlag + "="
- + info->test_case_name() + "." + info->name();
- const std::string internal_flag =
- std::string("--") + GTEST_FLAG_PREFIX_ + kInternalRunDeathTestFlag + "="
- + file_ + "|" + StreamableToString(line_) + "|"
- + StreamableToString(death_test_index) + "|"
- + StreamableToString(pipe_fd[1]);
- Arguments args;
- args.AddArguments(GetArgvsForDeathTestChildProcess());
- args.AddArgument(filter_flag.c_str());
- args.AddArgument(internal_flag.c_str());
-
- DeathTest::set_last_death_test_message("");
-
- CaptureStderr();
- // See the comment in NoExecDeathTest::AssumeRole for why the next line
- // is necessary.
- FlushInfoLog();
-
- const pid_t child_pid = ExecDeathTestSpawnChild(args.Argv(), pipe_fd[0]);
- GTEST_DEATH_TEST_CHECK_SYSCALL_(close(pipe_fd[1]));
- set_child_pid(child_pid);
- set_read_fd(pipe_fd[0]);
- set_spawned(true);
- return OVERSEE_TEST;
-}
-
-# endif // !GTEST_OS_WINDOWS
-
-// Creates a concrete DeathTest-derived class that depends on the
-// --gtest_death_test_style flag, and sets the pointer pointed to
-// by the "test" argument to its address. If the test should be
-// skipped, sets that pointer to NULL. Returns true, unless the
-// flag is set to an invalid value.
-bool DefaultDeathTestFactory::Create(const char* statement, const RE* regex,
- const char* file, int line,
- DeathTest** test) {
- UnitTestImpl* const impl = GetUnitTestImpl();
- const InternalRunDeathTestFlag* const flag =
- impl->internal_run_death_test_flag();
- const int death_test_index = impl->current_test_info()
- ->increment_death_test_count();
-
- if (flag != NULL) {
- if (death_test_index > flag->index()) {
- DeathTest::set_last_death_test_message(
- "Death test count (" + StreamableToString(death_test_index)
- + ") somehow exceeded expected maximum ("
- + StreamableToString(flag->index()) + ")");
- return false;
- }
-
- if (!(flag->file() == file && flag->line() == line &&
- flag->index() == death_test_index)) {
- *test = NULL;
- return true;
- }
- }
-
-# if GTEST_OS_WINDOWS
-
- if (GTEST_FLAG(death_test_style) == "threadsafe" ||
- GTEST_FLAG(death_test_style) == "fast") {
- *test = new WindowsDeathTest(statement, regex, file, line);
- }
-
-# else
-
- if (GTEST_FLAG(death_test_style) == "threadsafe") {
- *test = new ExecDeathTest(statement, regex, file, line);
- } else if (GTEST_FLAG(death_test_style) == "fast") {
- *test = new NoExecDeathTest(statement, regex);
- }
-
-# endif // GTEST_OS_WINDOWS
-
- else { // NOLINT - this is more readable than unbalanced brackets inside #if.
- DeathTest::set_last_death_test_message(
- "Unknown death test style \"" + GTEST_FLAG(death_test_style)
- + "\" encountered");
- return false;
- }
-
- return true;
-}
-
-# if GTEST_OS_WINDOWS
-// Recreates the pipe and event handles from the provided parameters,
-// signals the event, and returns a file descriptor wrapped around the pipe
-// handle. This function is called in the child process only.
-int GetStatusFileDescriptor(unsigned int parent_process_id,
- size_t write_handle_as_size_t,
- size_t event_handle_as_size_t) {
- AutoHandle parent_process_handle(::OpenProcess(PROCESS_DUP_HANDLE,
- FALSE, // Non-inheritable.
- parent_process_id));
- if (parent_process_handle.Get() == INVALID_HANDLE_VALUE) {
- DeathTestAbort("Unable to open parent process " +
- StreamableToString(parent_process_id));
- }
-
- // TODO(vladl@google.com): Replace the following check with a
- // compile-time assertion when available.
- GTEST_CHECK_(sizeof(HANDLE) <= sizeof(size_t));
-
- const HANDLE write_handle =
- reinterpret_cast<HANDLE>(write_handle_as_size_t);
- HANDLE dup_write_handle;
-
- // The newly initialized handle is accessible only in in the parent
- // process. To obtain one accessible within the child, we need to use
- // DuplicateHandle.
- if (!::DuplicateHandle(parent_process_handle.Get(), write_handle,
- ::GetCurrentProcess(), &dup_write_handle,
- 0x0, // Requested privileges ignored since
- // DUPLICATE_SAME_ACCESS is used.
- FALSE, // Request non-inheritable handler.
- DUPLICATE_SAME_ACCESS)) {
- DeathTestAbort("Unable to duplicate the pipe handle " +
- StreamableToString(write_handle_as_size_t) +
- " from the parent process " +
- StreamableToString(parent_process_id));
- }
-
- const HANDLE event_handle = reinterpret_cast<HANDLE>(event_handle_as_size_t);
- HANDLE dup_event_handle;
-
- if (!::DuplicateHandle(parent_process_handle.Get(), event_handle,
- ::GetCurrentProcess(), &dup_event_handle,
- 0x0,
- FALSE,
- DUPLICATE_SAME_ACCESS)) {
- DeathTestAbort("Unable to duplicate the event handle " +
- StreamableToString(event_handle_as_size_t) +
- " from the parent process " +
- StreamableToString(parent_process_id));
- }
-
- const int write_fd =
- ::_open_osfhandle(reinterpret_cast<intptr_t>(dup_write_handle), O_APPEND);
- if (write_fd == -1) {
- DeathTestAbort("Unable to convert pipe handle " +
- StreamableToString(write_handle_as_size_t) +
- " to a file descriptor");
- }
-
- // Signals the parent that the write end of the pipe has been acquired
- // so the parent can release its own write end.
- ::SetEvent(dup_event_handle);
-
- return write_fd;
-}
-# endif // GTEST_OS_WINDOWS
-
-// Returns a newly created InternalRunDeathTestFlag object with fields
-// initialized from the GTEST_FLAG(internal_run_death_test) flag if
-// the flag is specified; otherwise returns NULL.
-InternalRunDeathTestFlag* ParseInternalRunDeathTestFlag() {
- if (GTEST_FLAG(internal_run_death_test) == "") return NULL;
-
- // GTEST_HAS_DEATH_TEST implies that we have ::std::string, so we
- // can use it here.
- int line = -1;
- int index = -1;
- ::std::vector< ::std::string> fields;
- SplitString(GTEST_FLAG(internal_run_death_test).c_str(), '|', &fields);
- int write_fd = -1;
-
-# if GTEST_OS_WINDOWS
-
- unsigned int parent_process_id = 0;
- size_t write_handle_as_size_t = 0;
- size_t event_handle_as_size_t = 0;
-
- if (fields.size() != 6
- || !ParseNaturalNumber(fields[1], &line)
- || !ParseNaturalNumber(fields[2], &index)
- || !ParseNaturalNumber(fields[3], &parent_process_id)
- || !ParseNaturalNumber(fields[4], &write_handle_as_size_t)
- || !ParseNaturalNumber(fields[5], &event_handle_as_size_t)) {
- DeathTestAbort("Bad --gtest_internal_run_death_test flag: " +
- GTEST_FLAG(internal_run_death_test));
- }
- write_fd = GetStatusFileDescriptor(parent_process_id,
- write_handle_as_size_t,
- event_handle_as_size_t);
-# else
-
- if (fields.size() != 4
- || !ParseNaturalNumber(fields[1], &line)
- || !ParseNaturalNumber(fields[2], &index)
- || !ParseNaturalNumber(fields[3], &write_fd)) {
- DeathTestAbort("Bad --gtest_internal_run_death_test flag: "
- + GTEST_FLAG(internal_run_death_test));
- }
-
-# endif // GTEST_OS_WINDOWS
-
- return new InternalRunDeathTestFlag(fields[0], line, index, write_fd);
-}
-
-} // namespace internal
-
-#endif // GTEST_HAS_DEATH_TEST
-
-} // namespace testing
diff --git a/third_party/aom/third_party/googletest/src/googletest/src/gtest-filepath.cc b/third_party/aom/third_party/googletest/src/googletest/src/gtest-filepath.cc
deleted file mode 100644
index 0292dc119..000000000
--- a/third_party/aom/third_party/googletest/src/googletest/src/gtest-filepath.cc
+++ /dev/null
@@ -1,387 +0,0 @@
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Authors: keith.ray@gmail.com (Keith Ray)
-
-#include "gtest/gtest-message.h"
-#include "gtest/internal/gtest-filepath.h"
-#include "gtest/internal/gtest-port.h"
-
-#include <stdlib.h>
-
-#if GTEST_OS_WINDOWS_MOBILE
-# include <windows.h>
-#elif GTEST_OS_WINDOWS
-# include <direct.h>
-# include <io.h>
-#elif GTEST_OS_SYMBIAN
-// Symbian OpenC has PATH_MAX in sys/syslimits.h
-# include <sys/syslimits.h>
-#else
-# include <limits.h>
-# include <climits> // Some Linux distributions define PATH_MAX here.
-#endif // GTEST_OS_WINDOWS_MOBILE
-
-#if GTEST_OS_WINDOWS
-# define GTEST_PATH_MAX_ _MAX_PATH
-#elif defined(PATH_MAX)
-# define GTEST_PATH_MAX_ PATH_MAX
-#elif defined(_XOPEN_PATH_MAX)
-# define GTEST_PATH_MAX_ _XOPEN_PATH_MAX
-#else
-# define GTEST_PATH_MAX_ _POSIX_PATH_MAX
-#endif // GTEST_OS_WINDOWS
-
-#include "gtest/internal/gtest-string.h"
-
-namespace testing {
-namespace internal {
-
-#if GTEST_OS_WINDOWS
-// On Windows, '\\' is the standard path separator, but many tools and the
-// Windows API also accept '/' as an alternate path separator. Unless otherwise
-// noted, a file path can contain either kind of path separators, or a mixture
-// of them.
-const char kPathSeparator = '\\';
-const char kAlternatePathSeparator = '/';
-const char kAlternatePathSeparatorString[] = "/";
-# if GTEST_OS_WINDOWS_MOBILE
-// Windows CE doesn't have a current directory. You should not use
-// the current directory in tests on Windows CE, but this at least
-// provides a reasonable fallback.
-const char kCurrentDirectoryString[] = "\\";
-// Windows CE doesn't define INVALID_FILE_ATTRIBUTES
-const DWORD kInvalidFileAttributes = 0xffffffff;
-# else
-const char kCurrentDirectoryString[] = ".\\";
-# endif // GTEST_OS_WINDOWS_MOBILE
-#else
-const char kPathSeparator = '/';
-const char kCurrentDirectoryString[] = "./";
-#endif // GTEST_OS_WINDOWS
-
-// Returns whether the given character is a valid path separator.
-static bool IsPathSeparator(char c) {
-#if GTEST_HAS_ALT_PATH_SEP_
- return (c == kPathSeparator) || (c == kAlternatePathSeparator);
-#else
- return c == kPathSeparator;
-#endif
-}
-
-// Returns the current working directory, or "" if unsuccessful.
-FilePath FilePath::GetCurrentDir() {
-#if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_WINDOWS_PHONE || GTEST_OS_WINDOWS_RT
- // Windows CE doesn't have a current directory, so we just return
- // something reasonable.
- return FilePath(kCurrentDirectoryString);
-#elif GTEST_OS_WINDOWS
- char cwd[GTEST_PATH_MAX_ + 1] = { '\0' };
- return FilePath(_getcwd(cwd, sizeof(cwd)) == NULL ? "" : cwd);
-#else
- char cwd[GTEST_PATH_MAX_ + 1] = { '\0' };
- char* result = getcwd(cwd, sizeof(cwd));
-# if GTEST_OS_NACL
- // getcwd will likely fail in NaCl due to the sandbox, so return something
- // reasonable. The user may have provided a shim implementation for getcwd,
- // however, so fallback only when failure is detected.
- return FilePath(result == NULL ? kCurrentDirectoryString : cwd);
-# endif // GTEST_OS_NACL
- return FilePath(result == NULL ? "" : cwd);
-#endif // GTEST_OS_WINDOWS_MOBILE
-}
-
-// Returns a copy of the FilePath with the case-insensitive extension removed.
-// Example: FilePath("dir/file.exe").RemoveExtension("EXE") returns
-// FilePath("dir/file"). If a case-insensitive extension is not
-// found, returns a copy of the original FilePath.
-FilePath FilePath::RemoveExtension(const char* extension) const {
- const std::string dot_extension = std::string(".") + extension;
- if (String::EndsWithCaseInsensitive(pathname_, dot_extension)) {
- return FilePath(pathname_.substr(
- 0, pathname_.length() - dot_extension.length()));
- }
- return *this;
-}
-
-// Returns a pointer to the last occurence of a valid path separator in
-// the FilePath. On Windows, for example, both '/' and '\' are valid path
-// separators. Returns NULL if no path separator was found.
-const char* FilePath::FindLastPathSeparator() const {
- const char* const last_sep = strrchr(c_str(), kPathSeparator);
-#if GTEST_HAS_ALT_PATH_SEP_
- const char* const last_alt_sep = strrchr(c_str(), kAlternatePathSeparator);
- // Comparing two pointers of which only one is NULL is undefined.
- if (last_alt_sep != NULL &&
- (last_sep == NULL || last_alt_sep > last_sep)) {
- return last_alt_sep;
- }
-#endif
- return last_sep;
-}
-
-// Returns a copy of the FilePath with the directory part removed.
-// Example: FilePath("path/to/file").RemoveDirectoryName() returns
-// FilePath("file"). If there is no directory part ("just_a_file"), it returns
-// the FilePath unmodified. If there is no file part ("just_a_dir/") it
-// returns an empty FilePath ("").
-// On Windows platform, '\' is the path separator, otherwise it is '/'.
-FilePath FilePath::RemoveDirectoryName() const {
- const char* const last_sep = FindLastPathSeparator();
- return last_sep ? FilePath(last_sep + 1) : *this;
-}
-
-// RemoveFileName returns the directory path with the filename removed.
-// Example: FilePath("path/to/file").RemoveFileName() returns "path/to/".
-// If the FilePath is "a_file" or "/a_file", RemoveFileName returns
-// FilePath("./") or, on Windows, FilePath(".\\"). If the filepath does
-// not have a file, like "just/a/dir/", it returns the FilePath unmodified.
-// On Windows platform, '\' is the path separator, otherwise it is '/'.
-FilePath FilePath::RemoveFileName() const {
- const char* const last_sep = FindLastPathSeparator();
- std::string dir;
- if (last_sep) {
- dir = std::string(c_str(), last_sep + 1 - c_str());
- } else {
- dir = kCurrentDirectoryString;
- }
- return FilePath(dir);
-}
-
-// Helper functions for naming files in a directory for xml output.
-
-// Given directory = "dir", base_name = "test", number = 0,
-// extension = "xml", returns "dir/test.xml". If number is greater
-// than zero (e.g., 12), returns "dir/test_12.xml".
-// On Windows platform, uses \ as the separator rather than /.
-FilePath FilePath::MakeFileName(const FilePath& directory,
- const FilePath& base_name,
- int number,
- const char* extension) {
- std::string file;
- if (number == 0) {
- file = base_name.string() + "." + extension;
- } else {
- file = base_name.string() + "_" + StreamableToString(number)
- + "." + extension;
- }
- return ConcatPaths(directory, FilePath(file));
-}
-
-// Given directory = "dir", relative_path = "test.xml", returns "dir/test.xml".
-// On Windows, uses \ as the separator rather than /.
-FilePath FilePath::ConcatPaths(const FilePath& directory,
- const FilePath& relative_path) {
- if (directory.IsEmpty())
- return relative_path;
- const FilePath dir(directory.RemoveTrailingPathSeparator());
- return FilePath(dir.string() + kPathSeparator + relative_path.string());
-}
-
-// Returns true if pathname describes something findable in the file-system,
-// either a file, directory, or whatever.
-bool FilePath::FileOrDirectoryExists() const {
-#if GTEST_OS_WINDOWS_MOBILE
- LPCWSTR unicode = String::AnsiToUtf16(pathname_.c_str());
- const DWORD attributes = GetFileAttributes(unicode);
- delete [] unicode;
- return attributes != kInvalidFileAttributes;
-#else
- posix::StatStruct file_stat;
- return posix::Stat(pathname_.c_str(), &file_stat) == 0;
-#endif // GTEST_OS_WINDOWS_MOBILE
-}
-
-// Returns true if pathname describes a directory in the file-system
-// that exists.
-bool FilePath::DirectoryExists() const {
- bool result = false;
-#if GTEST_OS_WINDOWS
- // Don't strip off trailing separator if path is a root directory on
- // Windows (like "C:\\").
- const FilePath& path(IsRootDirectory() ? *this :
- RemoveTrailingPathSeparator());
-#else
- const FilePath& path(*this);
-#endif
-
-#if GTEST_OS_WINDOWS_MOBILE
- LPCWSTR unicode = String::AnsiToUtf16(path.c_str());
- const DWORD attributes = GetFileAttributes(unicode);
- delete [] unicode;
- if ((attributes != kInvalidFileAttributes) &&
- (attributes & FILE_ATTRIBUTE_DIRECTORY)) {
- result = true;
- }
-#else
- posix::StatStruct file_stat;
- result = posix::Stat(path.c_str(), &file_stat) == 0 &&
- posix::IsDir(file_stat);
-#endif // GTEST_OS_WINDOWS_MOBILE
-
- return result;
-}
-
-// Returns true if pathname describes a root directory. (Windows has one
-// root directory per disk drive.)
-bool FilePath::IsRootDirectory() const {
-#if GTEST_OS_WINDOWS
- // TODO(wan@google.com): on Windows a network share like
- // \\server\share can be a root directory, although it cannot be the
- // current directory. Handle this properly.
- return pathname_.length() == 3 && IsAbsolutePath();
-#else
- return pathname_.length() == 1 && IsPathSeparator(pathname_.c_str()[0]);
-#endif
-}
-
-// Returns true if pathname describes an absolute path.
-bool FilePath::IsAbsolutePath() const {
- const char* const name = pathname_.c_str();
-#if GTEST_OS_WINDOWS
- return pathname_.length() >= 3 &&
- ((name[0] >= 'a' && name[0] <= 'z') ||
- (name[0] >= 'A' && name[0] <= 'Z')) &&
- name[1] == ':' &&
- IsPathSeparator(name[2]);
-#else
- return IsPathSeparator(name[0]);
-#endif
-}
-
-// Returns a pathname for a file that does not currently exist. The pathname
-// will be directory/base_name.extension or
-// directory/base_name_<number>.extension if directory/base_name.extension
-// already exists. The number will be incremented until a pathname is found
-// that does not already exist.
-// Examples: 'dir/foo_test.xml' or 'dir/foo_test_1.xml'.
-// There could be a race condition if two or more processes are calling this
-// function at the same time -- they could both pick the same filename.
-FilePath FilePath::GenerateUniqueFileName(const FilePath& directory,
- const FilePath& base_name,
- const char* extension) {
- FilePath full_pathname;
- int number = 0;
- do {
- full_pathname.Set(MakeFileName(directory, base_name, number++, extension));
- } while (full_pathname.FileOrDirectoryExists());
- return full_pathname;
-}
-
-// Returns true if FilePath ends with a path separator, which indicates that
-// it is intended to represent a directory. Returns false otherwise.
-// This does NOT check that a directory (or file) actually exists.
-bool FilePath::IsDirectory() const {
- return !pathname_.empty() &&
- IsPathSeparator(pathname_.c_str()[pathname_.length() - 1]);
-}
-
-// Create directories so that path exists. Returns true if successful or if
-// the directories already exist; returns false if unable to create directories
-// for any reason.
-bool FilePath::CreateDirectoriesRecursively() const {
- if (!this->IsDirectory()) {
- return false;
- }
-
- if (pathname_.length() == 0 || this->DirectoryExists()) {
- return true;
- }
-
- const FilePath parent(this->RemoveTrailingPathSeparator().RemoveFileName());
- return parent.CreateDirectoriesRecursively() && this->CreateFolder();
-}
-
-// Create the directory so that path exists. Returns true if successful or
-// if the directory already exists; returns false if unable to create the
-// directory for any reason, including if the parent directory does not
-// exist. Not named "CreateDirectory" because that's a macro on Windows.
-bool FilePath::CreateFolder() const {
-#if GTEST_OS_WINDOWS_MOBILE
- FilePath removed_sep(this->RemoveTrailingPathSeparator());
- LPCWSTR unicode = String::AnsiToUtf16(removed_sep.c_str());
- int result = CreateDirectory(unicode, NULL) ? 0 : -1;
- delete [] unicode;
-#elif GTEST_OS_WINDOWS
- int result = _mkdir(pathname_.c_str());
-#else
- int result = mkdir(pathname_.c_str(), 0777);
-#endif // GTEST_OS_WINDOWS_MOBILE
-
- if (result == -1) {
- return this->DirectoryExists(); // An error is OK if the directory exists.
- }
- return true; // No error.
-}
-
-// If input name has a trailing separator character, remove it and return the
-// name, otherwise return the name string unmodified.
-// On Windows platform, uses \ as the separator, other platforms use /.
-FilePath FilePath::RemoveTrailingPathSeparator() const {
- return IsDirectory()
- ? FilePath(pathname_.substr(0, pathname_.length() - 1))
- : *this;
-}
-
-// Removes any redundant separators that might be in the pathname.
-// For example, "bar///foo" becomes "bar/foo". Does not eliminate other
-// redundancies that might be in a pathname involving "." or "..".
-// TODO(wan@google.com): handle Windows network shares (e.g. \\server\share).
-void FilePath::Normalize() {
- if (pathname_.c_str() == NULL) {
- pathname_ = "";
- return;
- }
- const char* src = pathname_.c_str();
- char* const dest = new char[pathname_.length() + 1];
- char* dest_ptr = dest;
- memset(dest_ptr, 0, pathname_.length() + 1);
-
- while (*src != '\0') {
- *dest_ptr = *src;
- if (!IsPathSeparator(*src)) {
- src++;
- } else {
-#if GTEST_HAS_ALT_PATH_SEP_
- if (*dest_ptr == kAlternatePathSeparator) {
- *dest_ptr = kPathSeparator;
- }
-#endif
- while (IsPathSeparator(*src))
- src++;
- }
- dest_ptr++;
- }
- *dest_ptr = '\0';
- pathname_ = dest;
- delete[] dest;
-}
-
-} // namespace internal
-} // namespace testing
diff --git a/third_party/aom/third_party/googletest/src/googletest/src/gtest-internal-inl.h b/third_party/aom/third_party/googletest/src/googletest/src/gtest-internal-inl.h
deleted file mode 100644
index ed8a682a9..000000000
--- a/third_party/aom/third_party/googletest/src/googletest/src/gtest-internal-inl.h
+++ /dev/null
@@ -1,1183 +0,0 @@
-// Copyright 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Utility functions and classes used by the Google C++ testing framework.
-//
-// Author: wan@google.com (Zhanyong Wan)
-//
-// This file contains purely Google Test's internal implementation. Please
-// DO NOT #INCLUDE IT IN A USER PROGRAM.
-
-#ifndef GTEST_SRC_GTEST_INTERNAL_INL_H_
-#define GTEST_SRC_GTEST_INTERNAL_INL_H_
-
-// GTEST_IMPLEMENTATION_ is defined to 1 iff the current translation unit is
-// part of Google Test's implementation; otherwise it's undefined.
-#if !GTEST_IMPLEMENTATION_
-// If this file is included from the user's code, just say no.
-# error "gtest-internal-inl.h is part of Google Test's internal implementation."
-# error "It must not be included except by Google Test itself."
-#endif // GTEST_IMPLEMENTATION_
-
-#ifndef _WIN32_WCE
-# include <errno.h>
-#endif // !_WIN32_WCE
-#include <stddef.h>
-#include <stdlib.h> // For strtoll/_strtoul64/malloc/free.
-#include <string.h> // For memmove.
-
-#include <algorithm>
-#include <string>
-#include <vector>
-
-#include "gtest/internal/gtest-port.h"
-
-#if GTEST_CAN_STREAM_RESULTS_
-# include <arpa/inet.h> // NOLINT
-# include <netdb.h> // NOLINT
-#endif
-
-#if GTEST_OS_WINDOWS
-# include <windows.h> // NOLINT
-#endif // GTEST_OS_WINDOWS
-
-#include "gtest/gtest.h" // NOLINT
-#include "gtest/gtest-spi.h"
-
-namespace testing {
-
-// Declares the flags.
-//
-// We don't want the users to modify this flag in the code, but want
-// Google Test's own unit tests to be able to access it. Therefore we
-// declare it here as opposed to in gtest.h.
-GTEST_DECLARE_bool_(death_test_use_fork);
-
-namespace internal {
-
-// The value of GetTestTypeId() as seen from within the Google Test
-// library. This is solely for testing GetTestTypeId().
-GTEST_API_ extern const TypeId kTestTypeIdInGoogleTest;
-
-// Names of the flags (needed for parsing Google Test flags).
-const char kAlsoRunDisabledTestsFlag[] = "also_run_disabled_tests";
-const char kBreakOnFailureFlag[] = "break_on_failure";
-const char kCatchExceptionsFlag[] = "catch_exceptions";
-const char kColorFlag[] = "color";
-const char kFilterFlag[] = "filter";
-const char kListTestsFlag[] = "list_tests";
-const char kOutputFlag[] = "output";
-const char kPrintTimeFlag[] = "print_time";
-const char kRandomSeedFlag[] = "random_seed";
-const char kRepeatFlag[] = "repeat";
-const char kShuffleFlag[] = "shuffle";
-const char kStackTraceDepthFlag[] = "stack_trace_depth";
-const char kStreamResultToFlag[] = "stream_result_to";
-const char kThrowOnFailureFlag[] = "throw_on_failure";
-const char kFlagfileFlag[] = "flagfile";
-
-// A valid random seed must be in [1, kMaxRandomSeed].
-const int kMaxRandomSeed = 99999;
-
-// g_help_flag is true iff the --help flag or an equivalent form is
-// specified on the command line.
-GTEST_API_ extern bool g_help_flag;
-
-// Returns the current time in milliseconds.
-GTEST_API_ TimeInMillis GetTimeInMillis();
-
-// Returns true iff Google Test should use colors in the output.
-GTEST_API_ bool ShouldUseColor(bool stdout_is_tty);
-
-// Formats the given time in milliseconds as seconds.
-GTEST_API_ std::string FormatTimeInMillisAsSeconds(TimeInMillis ms);
-
-// Converts the given time in milliseconds to a date string in the ISO 8601
-// format, without the timezone information. N.B.: due to the use the
-// non-reentrant localtime() function, this function is not thread safe. Do
-// not use it in any code that can be called from multiple threads.
-GTEST_API_ std::string FormatEpochTimeInMillisAsIso8601(TimeInMillis ms);
-
-// Parses a string for an Int32 flag, in the form of "--flag=value".
-//
-// On success, stores the value of the flag in *value, and returns
-// true. On failure, returns false without changing *value.
-GTEST_API_ bool ParseInt32Flag(
- const char* str, const char* flag, Int32* value);
-
-// Returns a random seed in range [1, kMaxRandomSeed] based on the
-// given --gtest_random_seed flag value.
-inline int GetRandomSeedFromFlag(Int32 random_seed_flag) {
- const unsigned int raw_seed = (random_seed_flag == 0) ?
- static_cast<unsigned int>(GetTimeInMillis()) :
- static_cast<unsigned int>(random_seed_flag);
-
- // Normalizes the actual seed to range [1, kMaxRandomSeed] such that
- // it's easy to type.
- const int normalized_seed =
- static_cast<int>((raw_seed - 1U) %
- static_cast<unsigned int>(kMaxRandomSeed)) + 1;
- return normalized_seed;
-}
-
-// Returns the first valid random seed after 'seed'. The behavior is
-// undefined if 'seed' is invalid. The seed after kMaxRandomSeed is
-// considered to be 1.
-inline int GetNextRandomSeed(int seed) {
- GTEST_CHECK_(1 <= seed && seed <= kMaxRandomSeed)
- << "Invalid random seed " << seed << " - must be in [1, "
- << kMaxRandomSeed << "].";
- const int next_seed = seed + 1;
- return (next_seed > kMaxRandomSeed) ? 1 : next_seed;
-}
-
-// This class saves the values of all Google Test flags in its c'tor, and
-// restores them in its d'tor.
-class GTestFlagSaver {
- public:
- // The c'tor.
- GTestFlagSaver() {
- also_run_disabled_tests_ = GTEST_FLAG(also_run_disabled_tests);
- break_on_failure_ = GTEST_FLAG(break_on_failure);
- catch_exceptions_ = GTEST_FLAG(catch_exceptions);
- color_ = GTEST_FLAG(color);
- death_test_style_ = GTEST_FLAG(death_test_style);
- death_test_use_fork_ = GTEST_FLAG(death_test_use_fork);
- filter_ = GTEST_FLAG(filter);
- internal_run_death_test_ = GTEST_FLAG(internal_run_death_test);
- list_tests_ = GTEST_FLAG(list_tests);
- output_ = GTEST_FLAG(output);
- print_time_ = GTEST_FLAG(print_time);
- random_seed_ = GTEST_FLAG(random_seed);
- repeat_ = GTEST_FLAG(repeat);
- shuffle_ = GTEST_FLAG(shuffle);
- stack_trace_depth_ = GTEST_FLAG(stack_trace_depth);
- stream_result_to_ = GTEST_FLAG(stream_result_to);
- throw_on_failure_ = GTEST_FLAG(throw_on_failure);
- }
-
- // The d'tor is not virtual. DO NOT INHERIT FROM THIS CLASS.
- ~GTestFlagSaver() {
- GTEST_FLAG(also_run_disabled_tests) = also_run_disabled_tests_;
- GTEST_FLAG(break_on_failure) = break_on_failure_;
- GTEST_FLAG(catch_exceptions) = catch_exceptions_;
- GTEST_FLAG(color) = color_;
- GTEST_FLAG(death_test_style) = death_test_style_;
- GTEST_FLAG(death_test_use_fork) = death_test_use_fork_;
- GTEST_FLAG(filter) = filter_;
- GTEST_FLAG(internal_run_death_test) = internal_run_death_test_;
- GTEST_FLAG(list_tests) = list_tests_;
- GTEST_FLAG(output) = output_;
- GTEST_FLAG(print_time) = print_time_;
- GTEST_FLAG(random_seed) = random_seed_;
- GTEST_FLAG(repeat) = repeat_;
- GTEST_FLAG(shuffle) = shuffle_;
- GTEST_FLAG(stack_trace_depth) = stack_trace_depth_;
- GTEST_FLAG(stream_result_to) = stream_result_to_;
- GTEST_FLAG(throw_on_failure) = throw_on_failure_;
- }
-
- private:
- // Fields for saving the original values of flags.
- bool also_run_disabled_tests_;
- bool break_on_failure_;
- bool catch_exceptions_;
- std::string color_;
- std::string death_test_style_;
- bool death_test_use_fork_;
- std::string filter_;
- std::string internal_run_death_test_;
- bool list_tests_;
- std::string output_;
- bool print_time_;
- internal::Int32 random_seed_;
- internal::Int32 repeat_;
- bool shuffle_;
- internal::Int32 stack_trace_depth_;
- std::string stream_result_to_;
- bool throw_on_failure_;
-} GTEST_ATTRIBUTE_UNUSED_;
-
-// Converts a Unicode code point to a narrow string in UTF-8 encoding.
-// code_point parameter is of type UInt32 because wchar_t may not be
-// wide enough to contain a code point.
-// If the code_point is not a valid Unicode code point
-// (i.e. outside of Unicode range U+0 to U+10FFFF) it will be converted
-// to "(Invalid Unicode 0xXXXXXXXX)".
-GTEST_API_ std::string CodePointToUtf8(UInt32 code_point);
-
-// Converts a wide string to a narrow string in UTF-8 encoding.
-// The wide string is assumed to have the following encoding:
-// UTF-16 if sizeof(wchar_t) == 2 (on Windows, Cygwin, Symbian OS)
-// UTF-32 if sizeof(wchar_t) == 4 (on Linux)
-// Parameter str points to a null-terminated wide string.
-// Parameter num_chars may additionally limit the number
-// of wchar_t characters processed. -1 is used when the entire string
-// should be processed.
-// If the string contains code points that are not valid Unicode code points
-// (i.e. outside of Unicode range U+0 to U+10FFFF) they will be output
-// as '(Invalid Unicode 0xXXXXXXXX)'. If the string is in UTF16 encoding
-// and contains invalid UTF-16 surrogate pairs, values in those pairs
-// will be encoded as individual Unicode characters from Basic Normal Plane.
-GTEST_API_ std::string WideStringToUtf8(const wchar_t* str, int num_chars);
-
-// Reads the GTEST_SHARD_STATUS_FILE environment variable, and creates the file
-// if the variable is present. If a file already exists at this location, this
-// function will write over it. If the variable is present, but the file cannot
-// be created, prints an error and exits.
-void WriteToShardStatusFileIfNeeded();
-
-// Checks whether sharding is enabled by examining the relevant
-// environment variable values. If the variables are present,
-// but inconsistent (e.g., shard_index >= total_shards), prints
-// an error and exits. If in_subprocess_for_death_test, sharding is
-// disabled because it must only be applied to the original test
-// process. Otherwise, we could filter out death tests we intended to execute.
-GTEST_API_ bool ShouldShard(const char* total_shards_str,
- const char* shard_index_str,
- bool in_subprocess_for_death_test);
-
-// Parses the environment variable var as an Int32. If it is unset,
-// returns default_val. If it is not an Int32, prints an error and
-// and aborts.
-GTEST_API_ Int32 Int32FromEnvOrDie(const char* env_var, Int32 default_val);
-
-// Given the total number of shards, the shard index, and the test id,
-// returns true iff the test should be run on this shard. The test id is
-// some arbitrary but unique non-negative integer assigned to each test
-// method. Assumes that 0 <= shard_index < total_shards.
-GTEST_API_ bool ShouldRunTestOnShard(
- int total_shards, int shard_index, int test_id);
-
-// STL container utilities.
-
-// Returns the number of elements in the given container that satisfy
-// the given predicate.
-template <class Container, typename Predicate>
-inline int CountIf(const Container& c, Predicate predicate) {
- // Implemented as an explicit loop since std::count_if() in libCstd on
- // Solaris has a non-standard signature.
- int count = 0;
- for (typename Container::const_iterator it = c.begin(); it != c.end(); ++it) {
- if (predicate(*it))
- ++count;
- }
- return count;
-}
-
-// Applies a function/functor to each element in the container.
-template <class Container, typename Functor>
-void ForEach(const Container& c, Functor functor) {
- std::for_each(c.begin(), c.end(), functor);
-}
-
-// Returns the i-th element of the vector, or default_value if i is not
-// in range [0, v.size()).
-template <typename E>
-inline E GetElementOr(const std::vector<E>& v, int i, E default_value) {
- return (i < 0 || i >= static_cast<int>(v.size())) ? default_value : v[i];
-}
-
-// Performs an in-place shuffle of a range of the vector's elements.
-// 'begin' and 'end' are element indices as an STL-style range;
-// i.e. [begin, end) are shuffled, where 'end' == size() means to
-// shuffle to the end of the vector.
-template <typename E>
-void ShuffleRange(internal::Random* random, int begin, int end,
- std::vector<E>* v) {
- const int size = static_cast<int>(v->size());
- GTEST_CHECK_(0 <= begin && begin <= size)
- << "Invalid shuffle range start " << begin << ": must be in range [0, "
- << size << "].";
- GTEST_CHECK_(begin <= end && end <= size)
- << "Invalid shuffle range finish " << end << ": must be in range ["
- << begin << ", " << size << "].";
-
- // Fisher-Yates shuffle, from
- // http://en.wikipedia.org/wiki/Fisher-Yates_shuffle
- for (int range_width = end - begin; range_width >= 2; range_width--) {
- const int last_in_range = begin + range_width - 1;
- const int selected = begin + random->Generate(range_width);
- std::swap((*v)[selected], (*v)[last_in_range]);
- }
-}
-
-// Performs an in-place shuffle of the vector's elements.
-template <typename E>
-inline void Shuffle(internal::Random* random, std::vector<E>* v) {
- ShuffleRange(random, 0, static_cast<int>(v->size()), v);
-}
-
-// A function for deleting an object. Handy for being used as a
-// functor.
-template <typename T>
-static void Delete(T* x) {
- delete x;
-}
-
-// A predicate that checks the key of a TestProperty against a known key.
-//
-// TestPropertyKeyIs is copyable.
-class TestPropertyKeyIs {
- public:
- // Constructor.
- //
- // TestPropertyKeyIs has NO default constructor.
- explicit TestPropertyKeyIs(const std::string& key) : key_(key) {}
-
- // Returns true iff the test name of test property matches on key_.
- bool operator()(const TestProperty& test_property) const {
- return test_property.key() == key_;
- }
-
- private:
- std::string key_;
-};
-
-// Class UnitTestOptions.
-//
-// This class contains functions for processing options the user
-// specifies when running the tests. It has only static members.
-//
-// In most cases, the user can specify an option using either an
-// environment variable or a command line flag. E.g. you can set the
-// test filter using either GTEST_FILTER or --gtest_filter. If both
-// the variable and the flag are present, the latter overrides the
-// former.
-class GTEST_API_ UnitTestOptions {
- public:
- // Functions for processing the gtest_output flag.
-
- // Returns the output format, or "" for normal printed output.
- static std::string GetOutputFormat();
-
- // Returns the absolute path of the requested output file, or the
- // default (test_detail.xml in the original working directory) if
- // none was explicitly specified.
- static std::string GetAbsolutePathToOutputFile();
-
- // Functions for processing the gtest_filter flag.
-
- // Returns true iff the wildcard pattern matches the string. The
- // first ':' or '\0' character in pattern marks the end of it.
- //
- // This recursive algorithm isn't very efficient, but is clear and
- // works well enough for matching test names, which are short.
- static bool PatternMatchesString(const char *pattern, const char *str);
-
- // Returns true iff the user-specified filter matches the test case
- // name and the test name.
- static bool FilterMatchesTest(const std::string &test_case_name,
- const std::string &test_name);
-
-#if GTEST_OS_WINDOWS
- // Function for supporting the gtest_catch_exception flag.
-
- // Returns EXCEPTION_EXECUTE_HANDLER if Google Test should handle the
- // given SEH exception, or EXCEPTION_CONTINUE_SEARCH otherwise.
- // This function is useful as an __except condition.
- static int GTestShouldProcessSEH(DWORD exception_code);
-#endif // GTEST_OS_WINDOWS
-
- // Returns true if "name" matches the ':' separated list of glob-style
- // filters in "filter".
- static bool MatchesFilter(const std::string& name, const char* filter);
-};
-
-// Returns the current application's name, removing directory path if that
-// is present. Used by UnitTestOptions::GetOutputFile.
-GTEST_API_ FilePath GetCurrentExecutableName();
-
-// The role interface for getting the OS stack trace as a string.
-class OsStackTraceGetterInterface {
- public:
- OsStackTraceGetterInterface() {}
- virtual ~OsStackTraceGetterInterface() {}
-
- // Returns the current OS stack trace as an std::string. Parameters:
- //
- // max_depth - the maximum number of stack frames to be included
- // in the trace.
- // skip_count - the number of top frames to be skipped; doesn't count
- // against max_depth.
- virtual string CurrentStackTrace(int max_depth, int skip_count) = 0;
-
- // UponLeavingGTest() should be called immediately before Google Test calls
- // user code. It saves some information about the current stack that
- // CurrentStackTrace() will use to find and hide Google Test stack frames.
- virtual void UponLeavingGTest() = 0;
-
- // This string is inserted in place of stack frames that are part of
- // Google Test's implementation.
- static const char* const kElidedFramesMarker;
-
- private:
- GTEST_DISALLOW_COPY_AND_ASSIGN_(OsStackTraceGetterInterface);
-};
-
-// A working implementation of the OsStackTraceGetterInterface interface.
-class OsStackTraceGetter : public OsStackTraceGetterInterface {
- public:
- OsStackTraceGetter() {}
-
- virtual string CurrentStackTrace(int max_depth, int skip_count);
- virtual void UponLeavingGTest();
-
- private:
- GTEST_DISALLOW_COPY_AND_ASSIGN_(OsStackTraceGetter);
-};
-
-// Information about a Google Test trace point.
-struct TraceInfo {
- const char* file;
- int line;
- std::string message;
-};
-
-// This is the default global test part result reporter used in UnitTestImpl.
-// This class should only be used by UnitTestImpl.
-class DefaultGlobalTestPartResultReporter
- : public TestPartResultReporterInterface {
- public:
- explicit DefaultGlobalTestPartResultReporter(UnitTestImpl* unit_test);
- // Implements the TestPartResultReporterInterface. Reports the test part
- // result in the current test.
- virtual void ReportTestPartResult(const TestPartResult& result);
-
- private:
- UnitTestImpl* const unit_test_;
-
- GTEST_DISALLOW_COPY_AND_ASSIGN_(DefaultGlobalTestPartResultReporter);
-};
-
-// This is the default per thread test part result reporter used in
-// UnitTestImpl. This class should only be used by UnitTestImpl.
-class DefaultPerThreadTestPartResultReporter
- : public TestPartResultReporterInterface {
- public:
- explicit DefaultPerThreadTestPartResultReporter(UnitTestImpl* unit_test);
- // Implements the TestPartResultReporterInterface. The implementation just
- // delegates to the current global test part result reporter of *unit_test_.
- virtual void ReportTestPartResult(const TestPartResult& result);
-
- private:
- UnitTestImpl* const unit_test_;
-
- GTEST_DISALLOW_COPY_AND_ASSIGN_(DefaultPerThreadTestPartResultReporter);
-};
-
-// The private implementation of the UnitTest class. We don't protect
-// the methods under a mutex, as this class is not accessible by a
-// user and the UnitTest class that delegates work to this class does
-// proper locking.
-class GTEST_API_ UnitTestImpl {
- public:
- explicit UnitTestImpl(UnitTest* parent);
- virtual ~UnitTestImpl();
-
- // There are two different ways to register your own TestPartResultReporter.
- // You can register your own repoter to listen either only for test results
- // from the current thread or for results from all threads.
- // By default, each per-thread test result repoter just passes a new
- // TestPartResult to the global test result reporter, which registers the
- // test part result for the currently running test.
-
- // Returns the global test part result reporter.
- TestPartResultReporterInterface* GetGlobalTestPartResultReporter();
-
- // Sets the global test part result reporter.
- void SetGlobalTestPartResultReporter(
- TestPartResultReporterInterface* reporter);
-
- // Returns the test part result reporter for the current thread.
- TestPartResultReporterInterface* GetTestPartResultReporterForCurrentThread();
-
- // Sets the test part result reporter for the current thread.
- void SetTestPartResultReporterForCurrentThread(
- TestPartResultReporterInterface* reporter);
-
- // Gets the number of successful test cases.
- int successful_test_case_count() const;
-
- // Gets the number of failed test cases.
- int failed_test_case_count() const;
-
- // Gets the number of all test cases.
- int total_test_case_count() const;
-
- // Gets the number of all test cases that contain at least one test
- // that should run.
- int test_case_to_run_count() const;
-
- // Gets the number of successful tests.
- int successful_test_count() const;
-
- // Gets the number of failed tests.
- int failed_test_count() const;
-
- // Gets the number of disabled tests that will be reported in the XML report.
- int reportable_disabled_test_count() const;
-
- // Gets the number of disabled tests.
- int disabled_test_count() const;
-
- // Gets the number of tests to be printed in the XML report.
- int reportable_test_count() const;
-
- // Gets the number of all tests.
- int total_test_count() const;
-
- // Gets the number of tests that should run.
- int test_to_run_count() const;
-
- // Gets the time of the test program start, in ms from the start of the
- // UNIX epoch.
- TimeInMillis start_timestamp() const { return start_timestamp_; }
-
- // Gets the elapsed time, in milliseconds.
- TimeInMillis elapsed_time() const { return elapsed_time_; }
-
- // Returns true iff the unit test passed (i.e. all test cases passed).
- bool Passed() const { return !Failed(); }
-
- // Returns true iff the unit test failed (i.e. some test case failed
- // or something outside of all tests failed).
- bool Failed() const {
- return failed_test_case_count() > 0 || ad_hoc_test_result()->Failed();
- }
-
- // Gets the i-th test case among all the test cases. i can range from 0 to
- // total_test_case_count() - 1. If i is not in that range, returns NULL.
- const TestCase* GetTestCase(int i) const {
- const int index = GetElementOr(test_case_indices_, i, -1);
- return index < 0 ? NULL : test_cases_[i];
- }
-
- // Gets the i-th test case among all the test cases. i can range from 0 to
- // total_test_case_count() - 1. If i is not in that range, returns NULL.
- TestCase* GetMutableTestCase(int i) {
- const int index = GetElementOr(test_case_indices_, i, -1);
- return index < 0 ? NULL : test_cases_[index];
- }
-
- // Provides access to the event listener list.
- TestEventListeners* listeners() { return &listeners_; }
-
- // Returns the TestResult for the test that's currently running, or
- // the TestResult for the ad hoc test if no test is running.
- TestResult* current_test_result();
-
- // Returns the TestResult for the ad hoc test.
- const TestResult* ad_hoc_test_result() const { return &ad_hoc_test_result_; }
-
- // Sets the OS stack trace getter.
- //
- // Does nothing if the input and the current OS stack trace getter
- // are the same; otherwise, deletes the old getter and makes the
- // input the current getter.
- void set_os_stack_trace_getter(OsStackTraceGetterInterface* getter);
-
- // Returns the current OS stack trace getter if it is not NULL;
- // otherwise, creates an OsStackTraceGetter, makes it the current
- // getter, and returns it.
- OsStackTraceGetterInterface* os_stack_trace_getter();
-
- // Returns the current OS stack trace as an std::string.
- //
- // The maximum number of stack frames to be included is specified by
- // the gtest_stack_trace_depth flag. The skip_count parameter
- // specifies the number of top frames to be skipped, which doesn't
- // count against the number of frames to be included.
- //
- // For example, if Foo() calls Bar(), which in turn calls
- // CurrentOsStackTraceExceptTop(1), Foo() will be included in the
- // trace but Bar() and CurrentOsStackTraceExceptTop() won't.
- std::string CurrentOsStackTraceExceptTop(int skip_count) GTEST_NO_INLINE_;
-
- // Finds and returns a TestCase with the given name. If one doesn't
- // exist, creates one and returns it.
- //
- // Arguments:
- //
- // test_case_name: name of the test case
- // type_param: the name of the test's type parameter, or NULL if
- // this is not a typed or a type-parameterized test.
- // set_up_tc: pointer to the function that sets up the test case
- // tear_down_tc: pointer to the function that tears down the test case
- TestCase* GetTestCase(const char* test_case_name,
- const char* type_param,
- Test::SetUpTestCaseFunc set_up_tc,
- Test::TearDownTestCaseFunc tear_down_tc);
-
- // Adds a TestInfo to the unit test.
- //
- // Arguments:
- //
- // set_up_tc: pointer to the function that sets up the test case
- // tear_down_tc: pointer to the function that tears down the test case
- // test_info: the TestInfo object
- void AddTestInfo(Test::SetUpTestCaseFunc set_up_tc,
- Test::TearDownTestCaseFunc tear_down_tc,
- TestInfo* test_info) {
- // In order to support thread-safe death tests, we need to
- // remember the original working directory when the test program
- // was first invoked. We cannot do this in RUN_ALL_TESTS(), as
- // the user may have changed the current directory before calling
- // RUN_ALL_TESTS(). Therefore we capture the current directory in
- // AddTestInfo(), which is called to register a TEST or TEST_F
- // before main() is reached.
- if (original_working_dir_.IsEmpty()) {
- original_working_dir_.Set(FilePath::GetCurrentDir());
- GTEST_CHECK_(!original_working_dir_.IsEmpty())
- << "Failed to get the current working directory.";
- }
-
- GetTestCase(test_info->test_case_name(),
- test_info->type_param(),
- set_up_tc,
- tear_down_tc)->AddTestInfo(test_info);
- }
-
-#if GTEST_HAS_PARAM_TEST
- // Returns ParameterizedTestCaseRegistry object used to keep track of
- // value-parameterized tests and instantiate and register them.
- internal::ParameterizedTestCaseRegistry& parameterized_test_registry() {
- return parameterized_test_registry_;
- }
-#endif // GTEST_HAS_PARAM_TEST
-
- // Sets the TestCase object for the test that's currently running.
- void set_current_test_case(TestCase* a_current_test_case) {
- current_test_case_ = a_current_test_case;
- }
-
- // Sets the TestInfo object for the test that's currently running. If
- // current_test_info is NULL, the assertion results will be stored in
- // ad_hoc_test_result_.
- void set_current_test_info(TestInfo* a_current_test_info) {
- current_test_info_ = a_current_test_info;
- }
-
- // Registers all parameterized tests defined using TEST_P and
- // INSTANTIATE_TEST_CASE_P, creating regular tests for each test/parameter
- // combination. This method can be called more then once; it has guards
- // protecting from registering the tests more then once. If
- // value-parameterized tests are disabled, RegisterParameterizedTests is
- // present but does nothing.
- void RegisterParameterizedTests();
-
- // Runs all tests in this UnitTest object, prints the result, and
- // returns true if all tests are successful. If any exception is
- // thrown during a test, this test is considered to be failed, but
- // the rest of the tests will still be run.
- bool RunAllTests();
-
- // Clears the results of all tests, except the ad hoc tests.
- void ClearNonAdHocTestResult() {
- ForEach(test_cases_, TestCase::ClearTestCaseResult);
- }
-
- // Clears the results of ad-hoc test assertions.
- void ClearAdHocTestResult() {
- ad_hoc_test_result_.Clear();
- }
-
- // Adds a TestProperty to the current TestResult object when invoked in a
- // context of a test or a test case, or to the global property set. If the
- // result already contains a property with the same key, the value will be
- // updated.
- void RecordProperty(const TestProperty& test_property);
-
- enum ReactionToSharding {
- HONOR_SHARDING_PROTOCOL,
- IGNORE_SHARDING_PROTOCOL
- };
-
- // Matches the full name of each test against the user-specified
- // filter to decide whether the test should run, then records the
- // result in each TestCase and TestInfo object.
- // If shard_tests == HONOR_SHARDING_PROTOCOL, further filters tests
- // based on sharding variables in the environment.
- // Returns the number of tests that should run.
- int FilterTests(ReactionToSharding shard_tests);
-
- // Prints the names of the tests matching the user-specified filter flag.
- void ListTestsMatchingFilter();
-
- const TestCase* current_test_case() const { return current_test_case_; }
- TestInfo* current_test_info() { return current_test_info_; }
- const TestInfo* current_test_info() const { return current_test_info_; }
-
- // Returns the vector of environments that need to be set-up/torn-down
- // before/after the tests are run.
- std::vector<Environment*>& environments() { return environments_; }
-
- // Getters for the per-thread Google Test trace stack.
- std::vector<TraceInfo>& gtest_trace_stack() {
- return *(gtest_trace_stack_.pointer());
- }
- const std::vector<TraceInfo>& gtest_trace_stack() const {
- return gtest_trace_stack_.get();
- }
-
-#if GTEST_HAS_DEATH_TEST
- void InitDeathTestSubprocessControlInfo() {
- internal_run_death_test_flag_.reset(ParseInternalRunDeathTestFlag());
- }
- // Returns a pointer to the parsed --gtest_internal_run_death_test
- // flag, or NULL if that flag was not specified.
- // This information is useful only in a death test child process.
- // Must not be called before a call to InitGoogleTest.
- const InternalRunDeathTestFlag* internal_run_death_test_flag() const {
- return internal_run_death_test_flag_.get();
- }
-
- // Returns a pointer to the current death test factory.
- internal::DeathTestFactory* death_test_factory() {
- return death_test_factory_.get();
- }
-
- void SuppressTestEventsIfInSubprocess();
-
- friend class ReplaceDeathTestFactory;
-#endif // GTEST_HAS_DEATH_TEST
-
- // Initializes the event listener performing XML output as specified by
- // UnitTestOptions. Must not be called before InitGoogleTest.
- void ConfigureXmlOutput();
-
-#if GTEST_CAN_STREAM_RESULTS_
- // Initializes the event listener for streaming test results to a socket.
- // Must not be called before InitGoogleTest.
- void ConfigureStreamingOutput();
-#endif
-
- // Performs initialization dependent upon flag values obtained in
- // ParseGoogleTestFlagsOnly. Is called from InitGoogleTest after the call to
- // ParseGoogleTestFlagsOnly. In case a user neglects to call InitGoogleTest
- // this function is also called from RunAllTests. Since this function can be
- // called more than once, it has to be idempotent.
- void PostFlagParsingInit();
-
- // Gets the random seed used at the start of the current test iteration.
- int random_seed() const { return random_seed_; }
-
- // Gets the random number generator.
- internal::Random* random() { return &random_; }
-
- // Shuffles all test cases, and the tests within each test case,
- // making sure that death tests are still run first.
- void ShuffleTests();
-
- // Restores the test cases and tests to their order before the first shuffle.
- void UnshuffleTests();
-
- // Returns the value of GTEST_FLAG(catch_exceptions) at the moment
- // UnitTest::Run() starts.
- bool catch_exceptions() const { return catch_exceptions_; }
-
- private:
- friend class ::testing::UnitTest;
-
- // Used by UnitTest::Run() to capture the state of
- // GTEST_FLAG(catch_exceptions) at the moment it starts.
- void set_catch_exceptions(bool value) { catch_exceptions_ = value; }
-
- // The UnitTest object that owns this implementation object.
- UnitTest* const parent_;
-
- // The working directory when the first TEST() or TEST_F() was
- // executed.
- internal::FilePath original_working_dir_;
-
- // The default test part result reporters.
- DefaultGlobalTestPartResultReporter default_global_test_part_result_reporter_;
- DefaultPerThreadTestPartResultReporter
- default_per_thread_test_part_result_reporter_;
-
- // Points to (but doesn't own) the global test part result reporter.
- TestPartResultReporterInterface* global_test_part_result_repoter_;
-
- // Protects read and write access to global_test_part_result_reporter_.
- internal::Mutex global_test_part_result_reporter_mutex_;
-
- // Points to (but doesn't own) the per-thread test part result reporter.
- internal::ThreadLocal<TestPartResultReporterInterface*>
- per_thread_test_part_result_reporter_;
-
- // The vector of environments that need to be set-up/torn-down
- // before/after the tests are run.
- std::vector<Environment*> environments_;
-
- // The vector of TestCases in their original order. It owns the
- // elements in the vector.
- std::vector<TestCase*> test_cases_;
-
- // Provides a level of indirection for the test case list to allow
- // easy shuffling and restoring the test case order. The i-th
- // element of this vector is the index of the i-th test case in the
- // shuffled order.
- std::vector<int> test_case_indices_;
-
-#if GTEST_HAS_PARAM_TEST
- // ParameterizedTestRegistry object used to register value-parameterized
- // tests.
- internal::ParameterizedTestCaseRegistry parameterized_test_registry_;
-
- // Indicates whether RegisterParameterizedTests() has been called already.
- bool parameterized_tests_registered_;
-#endif // GTEST_HAS_PARAM_TEST
-
- // Index of the last death test case registered. Initially -1.
- int last_death_test_case_;
-
- // This points to the TestCase for the currently running test. It
- // changes as Google Test goes through one test case after another.
- // When no test is running, this is set to NULL and Google Test
- // stores assertion results in ad_hoc_test_result_. Initially NULL.
- TestCase* current_test_case_;
-
- // This points to the TestInfo for the currently running test. It
- // changes as Google Test goes through one test after another. When
- // no test is running, this is set to NULL and Google Test stores
- // assertion results in ad_hoc_test_result_. Initially NULL.
- TestInfo* current_test_info_;
-
- // Normally, a user only writes assertions inside a TEST or TEST_F,
- // or inside a function called by a TEST or TEST_F. Since Google
- // Test keeps track of which test is current running, it can
- // associate such an assertion with the test it belongs to.
- //
- // If an assertion is encountered when no TEST or TEST_F is running,
- // Google Test attributes the assertion result to an imaginary "ad hoc"
- // test, and records the result in ad_hoc_test_result_.
- TestResult ad_hoc_test_result_;
-
- // The list of event listeners that can be used to track events inside
- // Google Test.
- TestEventListeners listeners_;
-
- // The OS stack trace getter. Will be deleted when the UnitTest
- // object is destructed. By default, an OsStackTraceGetter is used,
- // but the user can set this field to use a custom getter if that is
- // desired.
- OsStackTraceGetterInterface* os_stack_trace_getter_;
-
- // True iff PostFlagParsingInit() has been called.
- bool post_flag_parse_init_performed_;
-
- // The random number seed used at the beginning of the test run.
- int random_seed_;
-
- // Our random number generator.
- internal::Random random_;
-
- // The time of the test program start, in ms from the start of the
- // UNIX epoch.
- TimeInMillis start_timestamp_;
-
- // How long the test took to run, in milliseconds.
- TimeInMillis elapsed_time_;
-
-#if GTEST_HAS_DEATH_TEST
- // The decomposed components of the gtest_internal_run_death_test flag,
- // parsed when RUN_ALL_TESTS is called.
- internal::scoped_ptr<InternalRunDeathTestFlag> internal_run_death_test_flag_;
- internal::scoped_ptr<internal::DeathTestFactory> death_test_factory_;
-#endif // GTEST_HAS_DEATH_TEST
-
- // A per-thread stack of traces created by the SCOPED_TRACE() macro.
- internal::ThreadLocal<std::vector<TraceInfo> > gtest_trace_stack_;
-
- // The value of GTEST_FLAG(catch_exceptions) at the moment RunAllTests()
- // starts.
- bool catch_exceptions_;
-
- GTEST_DISALLOW_COPY_AND_ASSIGN_(UnitTestImpl);
-}; // class UnitTestImpl
-
-// Convenience function for accessing the global UnitTest
-// implementation object.
-inline UnitTestImpl* GetUnitTestImpl() {
- return UnitTest::GetInstance()->impl();
-}
-
-#if GTEST_USES_SIMPLE_RE
-
-// Internal helper functions for implementing the simple regular
-// expression matcher.
-GTEST_API_ bool IsInSet(char ch, const char* str);
-GTEST_API_ bool IsAsciiDigit(char ch);
-GTEST_API_ bool IsAsciiPunct(char ch);
-GTEST_API_ bool IsRepeat(char ch);
-GTEST_API_ bool IsAsciiWhiteSpace(char ch);
-GTEST_API_ bool IsAsciiWordChar(char ch);
-GTEST_API_ bool IsValidEscape(char ch);
-GTEST_API_ bool AtomMatchesChar(bool escaped, char pattern, char ch);
-GTEST_API_ bool ValidateRegex(const char* regex);
-GTEST_API_ bool MatchRegexAtHead(const char* regex, const char* str);
-GTEST_API_ bool MatchRepetitionAndRegexAtHead(
- bool escaped, char ch, char repeat, const char* regex, const char* str);
-GTEST_API_ bool MatchRegexAnywhere(const char* regex, const char* str);
-
-#endif // GTEST_USES_SIMPLE_RE
-
-// Parses the command line for Google Test flags, without initializing
-// other parts of Google Test.
-GTEST_API_ void ParseGoogleTestFlagsOnly(int* argc, char** argv);
-GTEST_API_ void ParseGoogleTestFlagsOnly(int* argc, wchar_t** argv);
-
-#if GTEST_HAS_DEATH_TEST
-
-// Returns the message describing the last system error, regardless of the
-// platform.
-GTEST_API_ std::string GetLastErrnoDescription();
-
-// Attempts to parse a string into a positive integer pointed to by the
-// number parameter. Returns true if that is possible.
-// GTEST_HAS_DEATH_TEST implies that we have ::std::string, so we can use
-// it here.
-template <typename Integer>
-bool ParseNaturalNumber(const ::std::string& str, Integer* number) {
- // Fail fast if the given string does not begin with a digit;
- // this bypasses strtoXXX's "optional leading whitespace and plus
- // or minus sign" semantics, which are undesirable here.
- if (str.empty() || !IsDigit(str[0])) {
- return false;
- }
- errno = 0;
-
- char* end;
- // BiggestConvertible is the largest integer type that system-provided
- // string-to-number conversion routines can return.
-
-# if GTEST_OS_WINDOWS && !defined(__GNUC__)
-
- // MSVC and C++ Builder define __int64 instead of the standard long long.
- typedef unsigned __int64 BiggestConvertible;
- const BiggestConvertible parsed = _strtoui64(str.c_str(), &end, 10);
-
-# else
-
- typedef unsigned long long BiggestConvertible; // NOLINT
- const BiggestConvertible parsed = strtoull(str.c_str(), &end, 10);
-
-# endif // GTEST_OS_WINDOWS && !defined(__GNUC__)
-
- const bool parse_success = *end == '\0' && errno == 0;
-
- // TODO(vladl@google.com): Convert this to compile time assertion when it is
- // available.
- GTEST_CHECK_(sizeof(Integer) <= sizeof(parsed));
-
- const Integer result = static_cast<Integer>(parsed);
- if (parse_success && static_cast<BiggestConvertible>(result) == parsed) {
- *number = result;
- return true;
- }
- return false;
-}
-#endif // GTEST_HAS_DEATH_TEST
-
-// TestResult contains some private methods that should be hidden from
-// Google Test user but are required for testing. This class allow our tests
-// to access them.
-//
-// This class is supplied only for the purpose of testing Google Test's own
-// constructs. Do not use it in user tests, either directly or indirectly.
-class TestResultAccessor {
- public:
- static void RecordProperty(TestResult* test_result,
- const std::string& xml_element,
- const TestProperty& property) {
- test_result->RecordProperty(xml_element, property);
- }
-
- static void ClearTestPartResults(TestResult* test_result) {
- test_result->ClearTestPartResults();
- }
-
- static const std::vector<testing::TestPartResult>& test_part_results(
- const TestResult& test_result) {
- return test_result.test_part_results();
- }
-};
-
-#if GTEST_CAN_STREAM_RESULTS_
-
-// Streams test results to the given port on the given host machine.
-class GTEST_API_ StreamingListener : public EmptyTestEventListener {
- public:
- // Abstract base class for writing strings to a socket.
- class AbstractSocketWriter {
- public:
- virtual ~AbstractSocketWriter() {}
-
- // Sends a string to the socket.
- virtual void Send(const string& message) = 0;
-
- // Closes the socket.
- virtual void CloseConnection() {}
-
- // Sends a string and a newline to the socket.
- void SendLn(const string& message) {
- Send(message + "\n");
- }
- };
-
- // Concrete class for actually writing strings to a socket.
- class SocketWriter : public AbstractSocketWriter {
- public:
- SocketWriter(const string& host, const string& port)
- : sockfd_(-1), host_name_(host), port_num_(port) {
- MakeConnection();
- }
-
- virtual ~SocketWriter() {
- if (sockfd_ != -1)
- CloseConnection();
- }
-
- // Sends a string to the socket.
- virtual void Send(const string& message) {
- GTEST_CHECK_(sockfd_ != -1)
- << "Send() can be called only when there is a connection.";
-
- const int len = static_cast<int>(message.length());
- if (write(sockfd_, message.c_str(), len) != len) {
- GTEST_LOG_(WARNING)
- << "stream_result_to: failed to stream to "
- << host_name_ << ":" << port_num_;
- }
- }
-
- private:
- // Creates a client socket and connects to the server.
- void MakeConnection();
-
- // Closes the socket.
- void CloseConnection() {
- GTEST_CHECK_(sockfd_ != -1)
- << "CloseConnection() can be called only when there is a connection.";
-
- close(sockfd_);
- sockfd_ = -1;
- }
-
- int sockfd_; // socket file descriptor
- const string host_name_;
- const string port_num_;
-
- GTEST_DISALLOW_COPY_AND_ASSIGN_(SocketWriter);
- }; // class SocketWriter
-
- // Escapes '=', '&', '%', and '\n' characters in str as "%xx".
- static string UrlEncode(const char* str);
-
- StreamingListener(const string& host, const string& port)
- : socket_writer_(new SocketWriter(host, port)) { Start(); }
-
- explicit StreamingListener(AbstractSocketWriter* socket_writer)
- : socket_writer_(socket_writer) { Start(); }
-
- void OnTestProgramStart(const UnitTest& /* unit_test */) {
- SendLn("event=TestProgramStart");
- }
-
- void OnTestProgramEnd(const UnitTest& unit_test) {
- // Note that Google Test current only report elapsed time for each
- // test iteration, not for the entire test program.
- SendLn("event=TestProgramEnd&passed=" + FormatBool(unit_test.Passed()));
-
- // Notify the streaming server to stop.
- socket_writer_->CloseConnection();
- }
-
- void OnTestIterationStart(const UnitTest& /* unit_test */, int iteration) {
- SendLn("event=TestIterationStart&iteration=" +
- StreamableToString(iteration));
- }
-
- void OnTestIterationEnd(const UnitTest& unit_test, int /* iteration */) {
- SendLn("event=TestIterationEnd&passed=" +
- FormatBool(unit_test.Passed()) + "&elapsed_time=" +
- StreamableToString(unit_test.elapsed_time()) + "ms");
- }
-
- void OnTestCaseStart(const TestCase& test_case) {
- SendLn(std::string("event=TestCaseStart&name=") + test_case.name());
- }
-
- void OnTestCaseEnd(const TestCase& test_case) {
- SendLn("event=TestCaseEnd&passed=" + FormatBool(test_case.Passed())
- + "&elapsed_time=" + StreamableToString(test_case.elapsed_time())
- + "ms");
- }
-
- void OnTestStart(const TestInfo& test_info) {
- SendLn(std::string("event=TestStart&name=") + test_info.name());
- }
-
- void OnTestEnd(const TestInfo& test_info) {
- SendLn("event=TestEnd&passed=" +
- FormatBool((test_info.result())->Passed()) +
- "&elapsed_time=" +
- StreamableToString((test_info.result())->elapsed_time()) + "ms");
- }
-
- void OnTestPartResult(const TestPartResult& test_part_result) {
- const char* file_name = test_part_result.file_name();
- if (file_name == NULL)
- file_name = "";
- SendLn("event=TestPartResult&file=" + UrlEncode(file_name) +
- "&line=" + StreamableToString(test_part_result.line_number()) +
- "&message=" + UrlEncode(test_part_result.message()));
- }
-
- private:
- // Sends the given message and a newline to the socket.
- void SendLn(const string& message) { socket_writer_->SendLn(message); }
-
- // Called at the start of streaming to notify the receiver what
- // protocol we are using.
- void Start() { SendLn("gtest_streaming_protocol_version=1.0"); }
-
- string FormatBool(bool value) { return value ? "1" : "0"; }
-
- const scoped_ptr<AbstractSocketWriter> socket_writer_;
-
- GTEST_DISALLOW_COPY_AND_ASSIGN_(StreamingListener);
-}; // class StreamingListener
-
-#endif // GTEST_CAN_STREAM_RESULTS_
-
-} // namespace internal
-} // namespace testing
-
-#endif // GTEST_SRC_GTEST_INTERNAL_INL_H_
diff --git a/third_party/aom/third_party/googletest/src/googletest/src/gtest-port.cc b/third_party/aom/third_party/googletest/src/googletest/src/gtest-port.cc
deleted file mode 100644
index e5bf3dd2b..000000000
--- a/third_party/aom/third_party/googletest/src/googletest/src/gtest-port.cc
+++ /dev/null
@@ -1,1259 +0,0 @@
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Author: wan@google.com (Zhanyong Wan)
-
-#include "gtest/internal/gtest-port.h"
-
-#include <limits.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <fstream>
-
-#if GTEST_OS_WINDOWS
-# include <windows.h>
-# include <io.h>
-# include <sys/stat.h>
-# include <map> // Used in ThreadLocal.
-#else
-# include <unistd.h>
-#endif // GTEST_OS_WINDOWS
-
-#if GTEST_OS_MAC
-# include <mach/mach_init.h>
-# include <mach/task.h>
-# include <mach/vm_map.h>
-#endif // GTEST_OS_MAC
-
-#if GTEST_OS_QNX
-# include <devctl.h>
-# include <fcntl.h>
-# include <sys/procfs.h>
-#endif // GTEST_OS_QNX
-
-#if GTEST_OS_AIX
-# include <procinfo.h>
-# include <sys/types.h>
-#endif // GTEST_OS_AIX
-
-#include "gtest/gtest-spi.h"
-#include "gtest/gtest-message.h"
-#include "gtest/internal/gtest-internal.h"
-#include "gtest/internal/gtest-string.h"
-
-// Indicates that this translation unit is part of Google Test's
-// implementation. It must come before gtest-internal-inl.h is
-// included, or there will be a compiler error. This trick exists to
-// prevent the accidental inclusion of gtest-internal-inl.h in the
-// user's code.
-#define GTEST_IMPLEMENTATION_ 1
-#include "src/gtest-internal-inl.h"
-#undef GTEST_IMPLEMENTATION_
-
-namespace testing {
-namespace internal {
-
-#if defined(_MSC_VER) || defined(__BORLANDC__)
-// MSVC and C++Builder do not provide a definition of STDERR_FILENO.
-const int kStdOutFileno = 1;
-const int kStdErrFileno = 2;
-#else
-const int kStdOutFileno = STDOUT_FILENO;
-const int kStdErrFileno = STDERR_FILENO;
-#endif // _MSC_VER
-
-#if GTEST_OS_LINUX
-
-namespace {
-template <typename T>
-T ReadProcFileField(const string& filename, int field) {
- std::string dummy;
- std::ifstream file(filename.c_str());
- while (field-- > 0) {
- file >> dummy;
- }
- T output = 0;
- file >> output;
- return output;
-}
-} // namespace
-
-// Returns the number of active threads, or 0 when there is an error.
-size_t GetThreadCount() {
- const string filename =
- (Message() << "/proc/" << getpid() << "/stat").GetString();
- return ReadProcFileField<int>(filename, 19);
-}
-
-#elif GTEST_OS_MAC
-
-size_t GetThreadCount() {
- const task_t task = mach_task_self();
- mach_msg_type_number_t thread_count;
- thread_act_array_t thread_list;
- const kern_return_t status = task_threads(task, &thread_list, &thread_count);
- if (status == KERN_SUCCESS) {
- // task_threads allocates resources in thread_list and we need to free them
- // to avoid leaks.
- vm_deallocate(task,
- reinterpret_cast<vm_address_t>(thread_list),
- sizeof(thread_t) * thread_count);
- return static_cast<size_t>(thread_count);
- } else {
- return 0;
- }
-}
-
-#elif GTEST_OS_QNX
-
-// Returns the number of threads running in the process, or 0 to indicate that
-// we cannot detect it.
-size_t GetThreadCount() {
- const int fd = open("/proc/self/as", O_RDONLY);
- if (fd < 0) {
- return 0;
- }
- procfs_info process_info;
- const int status =
- devctl(fd, DCMD_PROC_INFO, &process_info, sizeof(process_info), NULL);
- close(fd);
- if (status == EOK) {
- return static_cast<size_t>(process_info.num_threads);
- } else {
- return 0;
- }
-}
-
-#elif GTEST_OS_AIX
-
-size_t GetThreadCount() {
- struct procentry64 entry;
- pid_t pid = getpid();
- int status = getprocs64(&entry, sizeof(entry), NULL, 0, &pid, 1);
- if (status == 1) {
- return entry.pi_thcount;
- } else {
- return 0;
- }
-}
-
-#else
-
-size_t GetThreadCount() {
- // There's no portable way to detect the number of threads, so we just
- // return 0 to indicate that we cannot detect it.
- return 0;
-}
-
-#endif // GTEST_OS_LINUX
-
-#if GTEST_IS_THREADSAFE && GTEST_OS_WINDOWS
-
-void SleepMilliseconds(int n) {
- ::Sleep(n);
-}
-
-AutoHandle::AutoHandle()
- : handle_(INVALID_HANDLE_VALUE) {}
-
-AutoHandle::AutoHandle(Handle handle)
- : handle_(handle) {}
-
-AutoHandle::~AutoHandle() {
- Reset();
-}
-
-AutoHandle::Handle AutoHandle::Get() const {
- return handle_;
-}
-
-void AutoHandle::Reset() {
- Reset(INVALID_HANDLE_VALUE);
-}
-
-void AutoHandle::Reset(HANDLE handle) {
- // Resetting with the same handle we already own is invalid.
- if (handle_ != handle) {
- if (IsCloseable()) {
- ::CloseHandle(handle_);
- }
- handle_ = handle;
- } else {
- GTEST_CHECK_(!IsCloseable())
- << "Resetting a valid handle to itself is likely a programmer error "
- "and thus not allowed.";
- }
-}
-
-bool AutoHandle::IsCloseable() const {
- // Different Windows APIs may use either of these values to represent an
- // invalid handle.
- return handle_ != NULL && handle_ != INVALID_HANDLE_VALUE;
-}
-
-Notification::Notification()
- : event_(::CreateEvent(NULL, // Default security attributes.
- TRUE, // Do not reset automatically.
- FALSE, // Initially unset.
- NULL)) { // Anonymous event.
- GTEST_CHECK_(event_.Get() != NULL);
-}
-
-void Notification::Notify() {
- GTEST_CHECK_(::SetEvent(event_.Get()) != FALSE);
-}
-
-void Notification::WaitForNotification() {
- GTEST_CHECK_(
- ::WaitForSingleObject(event_.Get(), INFINITE) == WAIT_OBJECT_0);
-}
-
-Mutex::Mutex()
- : owner_thread_id_(0),
- type_(kDynamic),
- critical_section_init_phase_(0),
- critical_section_(new CRITICAL_SECTION) {
- ::InitializeCriticalSection(critical_section_);
-}
-
-Mutex::~Mutex() {
- // Static mutexes are leaked intentionally. It is not thread-safe to try
- // to clean them up.
- // TODO(yukawa): Switch to Slim Reader/Writer (SRW) Locks, which requires
- // nothing to clean it up but is available only on Vista and later.
- // http://msdn.microsoft.com/en-us/library/windows/desktop/aa904937.aspx
- if (type_ == kDynamic) {
- ::DeleteCriticalSection(critical_section_);
- delete critical_section_;
- critical_section_ = NULL;
- }
-}
-
-void Mutex::Lock() {
- ThreadSafeLazyInit();
- ::EnterCriticalSection(critical_section_);
- owner_thread_id_ = ::GetCurrentThreadId();
-}
-
-void Mutex::Unlock() {
- ThreadSafeLazyInit();
- // We don't protect writing to owner_thread_id_ here, as it's the
- // caller's responsibility to ensure that the current thread holds the
- // mutex when this is called.
- owner_thread_id_ = 0;
- ::LeaveCriticalSection(critical_section_);
-}
-
-// Does nothing if the current thread holds the mutex. Otherwise, crashes
-// with high probability.
-void Mutex::AssertHeld() {
- ThreadSafeLazyInit();
- GTEST_CHECK_(owner_thread_id_ == ::GetCurrentThreadId())
- << "The current thread is not holding the mutex @" << this;
-}
-
-// Initializes owner_thread_id_ and critical_section_ in static mutexes.
-void Mutex::ThreadSafeLazyInit() {
- // Dynamic mutexes are initialized in the constructor.
- if (type_ == kStatic) {
- switch (
- ::InterlockedCompareExchange(&critical_section_init_phase_, 1L, 0L)) {
- case 0:
- // If critical_section_init_phase_ was 0 before the exchange, we
- // are the first to test it and need to perform the initialization.
- owner_thread_id_ = 0;
- critical_section_ = new CRITICAL_SECTION;
- ::InitializeCriticalSection(critical_section_);
- // Updates the critical_section_init_phase_ to 2 to signal
- // initialization complete.
- GTEST_CHECK_(::InterlockedCompareExchange(
- &critical_section_init_phase_, 2L, 1L) ==
- 1L);
- break;
- case 1:
- // Somebody else is already initializing the mutex; spin until they
- // are done.
- while (::InterlockedCompareExchange(&critical_section_init_phase_,
- 2L,
- 2L) != 2L) {
- // Possibly yields the rest of the thread's time slice to other
- // threads.
- ::Sleep(0);
- }
- break;
-
- case 2:
- break; // The mutex is already initialized and ready for use.
-
- default:
- GTEST_CHECK_(false)
- << "Unexpected value of critical_section_init_phase_ "
- << "while initializing a static mutex.";
- }
- }
-}
-
-namespace {
-
-class ThreadWithParamSupport : public ThreadWithParamBase {
- public:
- static HANDLE CreateThread(Runnable* runnable,
- Notification* thread_can_start) {
- ThreadMainParam* param = new ThreadMainParam(runnable, thread_can_start);
- DWORD thread_id;
- // TODO(yukawa): Consider to use _beginthreadex instead.
- HANDLE thread_handle = ::CreateThread(
- NULL, // Default security.
- 0, // Default stack size.
- &ThreadWithParamSupport::ThreadMain,
- param, // Parameter to ThreadMainStatic
- 0x0, // Default creation flags.
- &thread_id); // Need a valid pointer for the call to work under Win98.
- GTEST_CHECK_(thread_handle != NULL) << "CreateThread failed with error "
- << ::GetLastError() << ".";
- if (thread_handle == NULL) {
- delete param;
- }
- return thread_handle;
- }
-
- private:
- struct ThreadMainParam {
- ThreadMainParam(Runnable* runnable, Notification* thread_can_start)
- : runnable_(runnable),
- thread_can_start_(thread_can_start) {
- }
- scoped_ptr<Runnable> runnable_;
- // Does not own.
- Notification* thread_can_start_;
- };
-
- static DWORD WINAPI ThreadMain(void* ptr) {
- // Transfers ownership.
- scoped_ptr<ThreadMainParam> param(static_cast<ThreadMainParam*>(ptr));
- if (param->thread_can_start_ != NULL)
- param->thread_can_start_->WaitForNotification();
- param->runnable_->Run();
- return 0;
- }
-
- // Prohibit instantiation.
- ThreadWithParamSupport();
-
- GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadWithParamSupport);
-};
-
-} // namespace
-
-ThreadWithParamBase::ThreadWithParamBase(Runnable *runnable,
- Notification* thread_can_start)
- : thread_(ThreadWithParamSupport::CreateThread(runnable,
- thread_can_start)) {
-}
-
-ThreadWithParamBase::~ThreadWithParamBase() {
- Join();
-}
-
-void ThreadWithParamBase::Join() {
- GTEST_CHECK_(::WaitForSingleObject(thread_.Get(), INFINITE) == WAIT_OBJECT_0)
- << "Failed to join the thread with error " << ::GetLastError() << ".";
-}
-
-// Maps a thread to a set of ThreadIdToThreadLocals that have values
-// instantiated on that thread and notifies them when the thread exits. A
-// ThreadLocal instance is expected to persist until all threads it has
-// values on have terminated.
-class ThreadLocalRegistryImpl {
- public:
- // Registers thread_local_instance as having value on the current thread.
- // Returns a value that can be used to identify the thread from other threads.
- static ThreadLocalValueHolderBase* GetValueOnCurrentThread(
- const ThreadLocalBase* thread_local_instance) {
- DWORD current_thread = ::GetCurrentThreadId();
- MutexLock lock(&mutex_);
- ThreadIdToThreadLocals* const thread_to_thread_locals =
- GetThreadLocalsMapLocked();
- ThreadIdToThreadLocals::iterator thread_local_pos =
- thread_to_thread_locals->find(current_thread);
- if (thread_local_pos == thread_to_thread_locals->end()) {
- thread_local_pos = thread_to_thread_locals->insert(
- std::make_pair(current_thread, ThreadLocalValues())).first;
- StartWatcherThreadFor(current_thread);
- }
- ThreadLocalValues& thread_local_values = thread_local_pos->second;
- ThreadLocalValues::iterator value_pos =
- thread_local_values.find(thread_local_instance);
- if (value_pos == thread_local_values.end()) {
- value_pos =
- thread_local_values
- .insert(std::make_pair(
- thread_local_instance,
- linked_ptr<ThreadLocalValueHolderBase>(
- thread_local_instance->NewValueForCurrentThread())))
- .first;
- }
- return value_pos->second.get();
- }
-
- static void OnThreadLocalDestroyed(
- const ThreadLocalBase* thread_local_instance) {
- std::vector<linked_ptr<ThreadLocalValueHolderBase> > value_holders;
- // Clean up the ThreadLocalValues data structure while holding the lock, but
- // defer the destruction of the ThreadLocalValueHolderBases.
- {
- MutexLock lock(&mutex_);
- ThreadIdToThreadLocals* const thread_to_thread_locals =
- GetThreadLocalsMapLocked();
- for (ThreadIdToThreadLocals::iterator it =
- thread_to_thread_locals->begin();
- it != thread_to_thread_locals->end();
- ++it) {
- ThreadLocalValues& thread_local_values = it->second;
- ThreadLocalValues::iterator value_pos =
- thread_local_values.find(thread_local_instance);
- if (value_pos != thread_local_values.end()) {
- value_holders.push_back(value_pos->second);
- thread_local_values.erase(value_pos);
- // This 'if' can only be successful at most once, so theoretically we
- // could break out of the loop here, but we don't bother doing so.
- }
- }
- }
- // Outside the lock, let the destructor for 'value_holders' deallocate the
- // ThreadLocalValueHolderBases.
- }
-
- static void OnThreadExit(DWORD thread_id) {
- GTEST_CHECK_(thread_id != 0) << ::GetLastError();
- std::vector<linked_ptr<ThreadLocalValueHolderBase> > value_holders;
- // Clean up the ThreadIdToThreadLocals data structure while holding the
- // lock, but defer the destruction of the ThreadLocalValueHolderBases.
- {
- MutexLock lock(&mutex_);
- ThreadIdToThreadLocals* const thread_to_thread_locals =
- GetThreadLocalsMapLocked();
- ThreadIdToThreadLocals::iterator thread_local_pos =
- thread_to_thread_locals->find(thread_id);
- if (thread_local_pos != thread_to_thread_locals->end()) {
- ThreadLocalValues& thread_local_values = thread_local_pos->second;
- for (ThreadLocalValues::iterator value_pos =
- thread_local_values.begin();
- value_pos != thread_local_values.end();
- ++value_pos) {
- value_holders.push_back(value_pos->second);
- }
- thread_to_thread_locals->erase(thread_local_pos);
- }
- }
- // Outside the lock, let the destructor for 'value_holders' deallocate the
- // ThreadLocalValueHolderBases.
- }
-
- private:
- // In a particular thread, maps a ThreadLocal object to its value.
- typedef std::map<const ThreadLocalBase*,
- linked_ptr<ThreadLocalValueHolderBase> > ThreadLocalValues;
- // Stores all ThreadIdToThreadLocals having values in a thread, indexed by
- // thread's ID.
- typedef std::map<DWORD, ThreadLocalValues> ThreadIdToThreadLocals;
-
- // Holds the thread id and thread handle that we pass from
- // StartWatcherThreadFor to WatcherThreadFunc.
- typedef std::pair<DWORD, HANDLE> ThreadIdAndHandle;
-
- static void StartWatcherThreadFor(DWORD thread_id) {
- // The returned handle will be kept in thread_map and closed by
- // watcher_thread in WatcherThreadFunc.
- HANDLE thread = ::OpenThread(SYNCHRONIZE | THREAD_QUERY_INFORMATION,
- FALSE,
- thread_id);
- GTEST_CHECK_(thread != NULL);
- // We need to to pass a valid thread ID pointer into CreateThread for it
- // to work correctly under Win98.
- DWORD watcher_thread_id;
- HANDLE watcher_thread = ::CreateThread(
- NULL, // Default security.
- 0, // Default stack size
- &ThreadLocalRegistryImpl::WatcherThreadFunc,
- reinterpret_cast<LPVOID>(new ThreadIdAndHandle(thread_id, thread)),
- CREATE_SUSPENDED,
- &watcher_thread_id);
- GTEST_CHECK_(watcher_thread != NULL);
- // Give the watcher thread the same priority as ours to avoid being
- // blocked by it.
- ::SetThreadPriority(watcher_thread,
- ::GetThreadPriority(::GetCurrentThread()));
- ::ResumeThread(watcher_thread);
- ::CloseHandle(watcher_thread);
- }
-
- // Monitors exit from a given thread and notifies those
- // ThreadIdToThreadLocals about thread termination.
- static DWORD WINAPI WatcherThreadFunc(LPVOID param) {
- const ThreadIdAndHandle* tah =
- reinterpret_cast<const ThreadIdAndHandle*>(param);
- GTEST_CHECK_(
- ::WaitForSingleObject(tah->second, INFINITE) == WAIT_OBJECT_0);
- OnThreadExit(tah->first);
- ::CloseHandle(tah->second);
- delete tah;
- return 0;
- }
-
- // Returns map of thread local instances.
- static ThreadIdToThreadLocals* GetThreadLocalsMapLocked() {
- mutex_.AssertHeld();
- static ThreadIdToThreadLocals* map = new ThreadIdToThreadLocals;
- return map;
- }
-
- // Protects access to GetThreadLocalsMapLocked() and its return value.
- static Mutex mutex_;
- // Protects access to GetThreadMapLocked() and its return value.
- static Mutex thread_map_mutex_;
-};
-
-Mutex ThreadLocalRegistryImpl::mutex_(Mutex::kStaticMutex);
-Mutex ThreadLocalRegistryImpl::thread_map_mutex_(Mutex::kStaticMutex);
-
-ThreadLocalValueHolderBase* ThreadLocalRegistry::GetValueOnCurrentThread(
- const ThreadLocalBase* thread_local_instance) {
- return ThreadLocalRegistryImpl::GetValueOnCurrentThread(
- thread_local_instance);
-}
-
-void ThreadLocalRegistry::OnThreadLocalDestroyed(
- const ThreadLocalBase* thread_local_instance) {
- ThreadLocalRegistryImpl::OnThreadLocalDestroyed(thread_local_instance);
-}
-
-#endif // GTEST_IS_THREADSAFE && GTEST_OS_WINDOWS
-
-#if GTEST_USES_POSIX_RE
-
-// Implements RE. Currently only needed for death tests.
-
-RE::~RE() {
- if (is_valid_) {
- // regfree'ing an invalid regex might crash because the content
- // of the regex is undefined. Since the regex's are essentially
- // the same, one cannot be valid (or invalid) without the other
- // being so too.
- regfree(&partial_regex_);
- regfree(&full_regex_);
- }
- free(const_cast<char*>(pattern_));
-}
-
-// Returns true iff regular expression re matches the entire str.
-bool RE::FullMatch(const char* str, const RE& re) {
- if (!re.is_valid_) return false;
-
- regmatch_t match;
- return regexec(&re.full_regex_, str, 1, &match, 0) == 0;
-}
-
-// Returns true iff regular expression re matches a substring of str
-// (including str itself).
-bool RE::PartialMatch(const char* str, const RE& re) {
- if (!re.is_valid_) return false;
-
- regmatch_t match;
- return regexec(&re.partial_regex_, str, 1, &match, 0) == 0;
-}
-
-// Initializes an RE from its string representation.
-void RE::Init(const char* regex) {
- pattern_ = posix::StrDup(regex);
-
- // Reserves enough bytes to hold the regular expression used for a
- // full match.
- const size_t full_regex_len = strlen(regex) + 10;
- char* const full_pattern = new char[full_regex_len];
-
- snprintf(full_pattern, full_regex_len, "^(%s)$", regex);
- is_valid_ = regcomp(&full_regex_, full_pattern, REG_EXTENDED) == 0;
- // We want to call regcomp(&partial_regex_, ...) even if the
- // previous expression returns false. Otherwise partial_regex_ may
- // not be properly initialized can may cause trouble when it's
- // freed.
- //
- // Some implementation of POSIX regex (e.g. on at least some
- // versions of Cygwin) doesn't accept the empty string as a valid
- // regex. We change it to an equivalent form "()" to be safe.
- if (is_valid_) {
- const char* const partial_regex = (*regex == '\0') ? "()" : regex;
- is_valid_ = regcomp(&partial_regex_, partial_regex, REG_EXTENDED) == 0;
- }
- EXPECT_TRUE(is_valid_)
- << "Regular expression \"" << regex
- << "\" is not a valid POSIX Extended regular expression.";
-
- delete[] full_pattern;
-}
-
-#elif GTEST_USES_SIMPLE_RE
-
-// Returns true iff ch appears anywhere in str (excluding the
-// terminating '\0' character).
-bool IsInSet(char ch, const char* str) {
- return ch != '\0' && strchr(str, ch) != NULL;
-}
-
-// Returns true iff ch belongs to the given classification. Unlike
-// similar functions in <ctype.h>, these aren't affected by the
-// current locale.
-bool IsAsciiDigit(char ch) { return '0' <= ch && ch <= '9'; }
-bool IsAsciiPunct(char ch) {
- return IsInSet(ch, "^-!\"#$%&'()*+,./:;<=>?@[\\]_`{|}~");
-}
-bool IsRepeat(char ch) { return IsInSet(ch, "?*+"); }
-bool IsAsciiWhiteSpace(char ch) { return IsInSet(ch, " \f\n\r\t\v"); }
-bool IsAsciiWordChar(char ch) {
- return ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') ||
- ('0' <= ch && ch <= '9') || ch == '_';
-}
-
-// Returns true iff "\\c" is a supported escape sequence.
-bool IsValidEscape(char c) {
- return (IsAsciiPunct(c) || IsInSet(c, "dDfnrsStvwW"));
-}
-
-// Returns true iff the given atom (specified by escaped and pattern)
-// matches ch. The result is undefined if the atom is invalid.
-bool AtomMatchesChar(bool escaped, char pattern_char, char ch) {
- if (escaped) { // "\\p" where p is pattern_char.
- switch (pattern_char) {
- case 'd': return IsAsciiDigit(ch);
- case 'D': return !IsAsciiDigit(ch);
- case 'f': return ch == '\f';
- case 'n': return ch == '\n';
- case 'r': return ch == '\r';
- case 's': return IsAsciiWhiteSpace(ch);
- case 'S': return !IsAsciiWhiteSpace(ch);
- case 't': return ch == '\t';
- case 'v': return ch == '\v';
- case 'w': return IsAsciiWordChar(ch);
- case 'W': return !IsAsciiWordChar(ch);
- }
- return IsAsciiPunct(pattern_char) && pattern_char == ch;
- }
-
- return (pattern_char == '.' && ch != '\n') || pattern_char == ch;
-}
-
-// Helper function used by ValidateRegex() to format error messages.
-std::string FormatRegexSyntaxError(const char* regex, int index) {
- return (Message() << "Syntax error at index " << index
- << " in simple regular expression \"" << regex << "\": ").GetString();
-}
-
-// Generates non-fatal failures and returns false if regex is invalid;
-// otherwise returns true.
-bool ValidateRegex(const char* regex) {
- if (regex == NULL) {
- // TODO(wan@google.com): fix the source file location in the
- // assertion failures to match where the regex is used in user
- // code.
- ADD_FAILURE() << "NULL is not a valid simple regular expression.";
- return false;
- }
-
- bool is_valid = true;
-
- // True iff ?, *, or + can follow the previous atom.
- bool prev_repeatable = false;
- for (int i = 0; regex[i]; i++) {
- if (regex[i] == '\\') { // An escape sequence
- i++;
- if (regex[i] == '\0') {
- ADD_FAILURE() << FormatRegexSyntaxError(regex, i - 1)
- << "'\\' cannot appear at the end.";
- return false;
- }
-
- if (!IsValidEscape(regex[i])) {
- ADD_FAILURE() << FormatRegexSyntaxError(regex, i - 1)
- << "invalid escape sequence \"\\" << regex[i] << "\".";
- is_valid = false;
- }
- prev_repeatable = true;
- } else { // Not an escape sequence.
- const char ch = regex[i];
-
- if (ch == '^' && i > 0) {
- ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
- << "'^' can only appear at the beginning.";
- is_valid = false;
- } else if (ch == '$' && regex[i + 1] != '\0') {
- ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
- << "'$' can only appear at the end.";
- is_valid = false;
- } else if (IsInSet(ch, "()[]{}|")) {
- ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
- << "'" << ch << "' is unsupported.";
- is_valid = false;
- } else if (IsRepeat(ch) && !prev_repeatable) {
- ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
- << "'" << ch << "' can only follow a repeatable token.";
- is_valid = false;
- }
-
- prev_repeatable = !IsInSet(ch, "^$?*+");
- }
- }
-
- return is_valid;
-}
-
-// Matches a repeated regex atom followed by a valid simple regular
-// expression. The regex atom is defined as c if escaped is false,
-// or \c otherwise. repeat is the repetition meta character (?, *,
-// or +). The behavior is undefined if str contains too many
-// characters to be indexable by size_t, in which case the test will
-// probably time out anyway. We are fine with this limitation as
-// std::string has it too.
-bool MatchRepetitionAndRegexAtHead(
- bool escaped, char c, char repeat, const char* regex,
- const char* str) {
- const size_t min_count = (repeat == '+') ? 1 : 0;
- const size_t max_count = (repeat == '?') ? 1 :
- static_cast<size_t>(-1) - 1;
- // We cannot call numeric_limits::max() as it conflicts with the
- // max() macro on Windows.
-
- for (size_t i = 0; i <= max_count; ++i) {
- // We know that the atom matches each of the first i characters in str.
- if (i >= min_count && MatchRegexAtHead(regex, str + i)) {
- // We have enough matches at the head, and the tail matches too.
- // Since we only care about *whether* the pattern matches str
- // (as opposed to *how* it matches), there is no need to find a
- // greedy match.
- return true;
- }
- if (str[i] == '\0' || !AtomMatchesChar(escaped, c, str[i]))
- return false;
- }
- return false;
-}
-
-// Returns true iff regex matches a prefix of str. regex must be a
-// valid simple regular expression and not start with "^", or the
-// result is undefined.
-bool MatchRegexAtHead(const char* regex, const char* str) {
- if (*regex == '\0') // An empty regex matches a prefix of anything.
- return true;
-
- // "$" only matches the end of a string. Note that regex being
- // valid guarantees that there's nothing after "$" in it.
- if (*regex == '$')
- return *str == '\0';
-
- // Is the first thing in regex an escape sequence?
- const bool escaped = *regex == '\\';
- if (escaped)
- ++regex;
- if (IsRepeat(regex[1])) {
- // MatchRepetitionAndRegexAtHead() calls MatchRegexAtHead(), so
- // here's an indirect recursion. It terminates as the regex gets
- // shorter in each recursion.
- return MatchRepetitionAndRegexAtHead(
- escaped, regex[0], regex[1], regex + 2, str);
- } else {
- // regex isn't empty, isn't "$", and doesn't start with a
- // repetition. We match the first atom of regex with the first
- // character of str and recurse.
- return (*str != '\0') && AtomMatchesChar(escaped, *regex, *str) &&
- MatchRegexAtHead(regex + 1, str + 1);
- }
-}
-
-// Returns true iff regex matches any substring of str. regex must be
-// a valid simple regular expression, or the result is undefined.
-//
-// The algorithm is recursive, but the recursion depth doesn't exceed
-// the regex length, so we won't need to worry about running out of
-// stack space normally. In rare cases the time complexity can be
-// exponential with respect to the regex length + the string length,
-// but usually it's must faster (often close to linear).
-bool MatchRegexAnywhere(const char* regex, const char* str) {
- if (regex == NULL || str == NULL)
- return false;
-
- if (*regex == '^')
- return MatchRegexAtHead(regex + 1, str);
-
- // A successful match can be anywhere in str.
- do {
- if (MatchRegexAtHead(regex, str))
- return true;
- } while (*str++ != '\0');
- return false;
-}
-
-// Implements the RE class.
-
-RE::~RE() {
- free(const_cast<char*>(pattern_));
- free(const_cast<char*>(full_pattern_));
-}
-
-// Returns true iff regular expression re matches the entire str.
-bool RE::FullMatch(const char* str, const RE& re) {
- return re.is_valid_ && MatchRegexAnywhere(re.full_pattern_, str);
-}
-
-// Returns true iff regular expression re matches a substring of str
-// (including str itself).
-bool RE::PartialMatch(const char* str, const RE& re) {
- return re.is_valid_ && MatchRegexAnywhere(re.pattern_, str);
-}
-
-// Initializes an RE from its string representation.
-void RE::Init(const char* regex) {
- pattern_ = full_pattern_ = NULL;
- if (regex != NULL) {
- pattern_ = posix::StrDup(regex);
- }
-
- is_valid_ = ValidateRegex(regex);
- if (!is_valid_) {
- // No need to calculate the full pattern when the regex is invalid.
- return;
- }
-
- const size_t len = strlen(regex);
- // Reserves enough bytes to hold the regular expression used for a
- // full match: we need space to prepend a '^', append a '$', and
- // terminate the string with '\0'.
- char* buffer = static_cast<char*>(malloc(len + 3));
- full_pattern_ = buffer;
-
- if (*regex != '^')
- *buffer++ = '^'; // Makes sure full_pattern_ starts with '^'.
-
- // We don't use snprintf or strncpy, as they trigger a warning when
- // compiled with VC++ 8.0.
- memcpy(buffer, regex, len);
- buffer += len;
-
- if (len == 0 || regex[len - 1] != '$')
- *buffer++ = '$'; // Makes sure full_pattern_ ends with '$'.
-
- *buffer = '\0';
-}
-
-#endif // GTEST_USES_POSIX_RE
-
-const char kUnknownFile[] = "unknown file";
-
-// Formats a source file path and a line number as they would appear
-// in an error message from the compiler used to compile this code.
-GTEST_API_ ::std::string FormatFileLocation(const char* file, int line) {
- const std::string file_name(file == NULL ? kUnknownFile : file);
-
- if (line < 0) {
- return file_name + ":";
- }
-#ifdef _MSC_VER
- return file_name + "(" + StreamableToString(line) + "):";
-#else
- return file_name + ":" + StreamableToString(line) + ":";
-#endif // _MSC_VER
-}
-
-// Formats a file location for compiler-independent XML output.
-// Although this function is not platform dependent, we put it next to
-// FormatFileLocation in order to contrast the two functions.
-// Note that FormatCompilerIndependentFileLocation() does NOT append colon
-// to the file location it produces, unlike FormatFileLocation().
-GTEST_API_ ::std::string FormatCompilerIndependentFileLocation(
- const char* file, int line) {
- const std::string file_name(file == NULL ? kUnknownFile : file);
-
- if (line < 0)
- return file_name;
- else
- return file_name + ":" + StreamableToString(line);
-}
-
-GTestLog::GTestLog(GTestLogSeverity severity, const char* file, int line)
- : severity_(severity) {
- const char* const marker =
- severity == GTEST_INFO ? "[ INFO ]" :
- severity == GTEST_WARNING ? "[WARNING]" :
- severity == GTEST_ERROR ? "[ ERROR ]" : "[ FATAL ]";
- GetStream() << ::std::endl << marker << " "
- << FormatFileLocation(file, line).c_str() << ": ";
-}
-
-// Flushes the buffers and, if severity is GTEST_FATAL, aborts the program.
-GTestLog::~GTestLog() {
- GetStream() << ::std::endl;
- if (severity_ == GTEST_FATAL) {
- fflush(stderr);
- posix::Abort();
- }
-}
-// Disable Microsoft deprecation warnings for POSIX functions called from
-// this class (creat, dup, dup2, and close)
-GTEST_DISABLE_MSC_WARNINGS_PUSH_(4996)
-
-#if GTEST_HAS_STREAM_REDIRECTION
-
-// Object that captures an output stream (stdout/stderr).
-class CapturedStream {
- public:
- // The ctor redirects the stream to a temporary file.
- explicit CapturedStream(int fd) : fd_(fd), uncaptured_fd_(dup(fd)) {
-# if GTEST_OS_WINDOWS
- char temp_dir_path[MAX_PATH + 1] = { '\0' }; // NOLINT
- char temp_file_path[MAX_PATH + 1] = { '\0' }; // NOLINT
-
- ::GetTempPathA(sizeof(temp_dir_path), temp_dir_path);
- const UINT success = ::GetTempFileNameA(temp_dir_path,
- "gtest_redir",
- 0, // Generate unique file name.
- temp_file_path);
- GTEST_CHECK_(success != 0)
- << "Unable to create a temporary file in " << temp_dir_path;
- const int captured_fd = creat(temp_file_path, _S_IREAD | _S_IWRITE);
- GTEST_CHECK_(captured_fd != -1) << "Unable to open temporary file "
- << temp_file_path;
- filename_ = temp_file_path;
-# else
- // There's no guarantee that a test has write access to the current
- // directory, so we create the temporary file in the /tmp directory
- // instead. We use /tmp on most systems, and /sdcard on Android.
- // That's because Android doesn't have /tmp.
-# if GTEST_OS_LINUX_ANDROID
- // Note: Android applications are expected to call the framework's
- // Context.getExternalStorageDirectory() method through JNI to get
- // the location of the world-writable SD Card directory. However,
- // this requires a Context handle, which cannot be retrieved
- // globally from native code. Doing so also precludes running the
- // code as part of a regular standalone executable, which doesn't
- // run in a Dalvik process (e.g. when running it through 'adb shell').
- //
- // The location /sdcard is directly accessible from native code
- // and is the only location (unofficially) supported by the Android
- // team. It's generally a symlink to the real SD Card mount point
- // which can be /mnt/sdcard, /mnt/sdcard0, /system/media/sdcard, or
- // other OEM-customized locations. Never rely on these, and always
- // use /sdcard.
- char name_template[] = "/sdcard/gtest_captured_stream.XXXXXX";
-# else
- char name_template[] = "/tmp/captured_stream.XXXXXX";
-# endif // GTEST_OS_LINUX_ANDROID
- const int captured_fd = mkstemp(name_template);
- filename_ = name_template;
-# endif // GTEST_OS_WINDOWS
- fflush(NULL);
- dup2(captured_fd, fd_);
- close(captured_fd);
- }
-
- ~CapturedStream() {
- remove(filename_.c_str());
- }
-
- std::string GetCapturedString() {
- if (uncaptured_fd_ != -1) {
- // Restores the original stream.
- fflush(NULL);
- dup2(uncaptured_fd_, fd_);
- close(uncaptured_fd_);
- uncaptured_fd_ = -1;
- }
-
- FILE* const file = posix::FOpen(filename_.c_str(), "r");
- const std::string content = ReadEntireFile(file);
- posix::FClose(file);
- return content;
- }
-
- private:
- const int fd_; // A stream to capture.
- int uncaptured_fd_;
- // Name of the temporary file holding the stderr output.
- ::std::string filename_;
-
- GTEST_DISALLOW_COPY_AND_ASSIGN_(CapturedStream);
-};
-
-GTEST_DISABLE_MSC_WARNINGS_POP_()
-
-static CapturedStream* g_captured_stderr = NULL;
-static CapturedStream* g_captured_stdout = NULL;
-
-// Starts capturing an output stream (stdout/stderr).
-void CaptureStream(int fd, const char* stream_name, CapturedStream** stream) {
- if (*stream != NULL) {
- GTEST_LOG_(FATAL) << "Only one " << stream_name
- << " capturer can exist at a time.";
- }
- *stream = new CapturedStream(fd);
-}
-
-// Stops capturing the output stream and returns the captured string.
-std::string GetCapturedStream(CapturedStream** captured_stream) {
- const std::string content = (*captured_stream)->GetCapturedString();
-
- delete *captured_stream;
- *captured_stream = NULL;
-
- return content;
-}
-
-// Starts capturing stdout.
-void CaptureStdout() {
- CaptureStream(kStdOutFileno, "stdout", &g_captured_stdout);
-}
-
-// Starts capturing stderr.
-void CaptureStderr() {
- CaptureStream(kStdErrFileno, "stderr", &g_captured_stderr);
-}
-
-// Stops capturing stdout and returns the captured string.
-std::string GetCapturedStdout() {
- return GetCapturedStream(&g_captured_stdout);
-}
-
-// Stops capturing stderr and returns the captured string.
-std::string GetCapturedStderr() {
- return GetCapturedStream(&g_captured_stderr);
-}
-
-#endif // GTEST_HAS_STREAM_REDIRECTION
-
-std::string TempDir() {
-#if GTEST_OS_WINDOWS_MOBILE
- return "\\temp\\";
-#elif GTEST_OS_WINDOWS
- const char* temp_dir = posix::GetEnv("TEMP");
- if (temp_dir == NULL || temp_dir[0] == '\0')
- return "\\temp\\";
- else if (temp_dir[strlen(temp_dir) - 1] == '\\')
- return temp_dir;
- else
- return std::string(temp_dir) + "\\";
-#elif GTEST_OS_LINUX_ANDROID
- return "/sdcard/";
-#else
- return "/tmp/";
-#endif // GTEST_OS_WINDOWS_MOBILE
-}
-
-size_t GetFileSize(FILE* file) {
- fseek(file, 0, SEEK_END);
- return static_cast<size_t>(ftell(file));
-}
-
-std::string ReadEntireFile(FILE* file) {
- const size_t file_size = GetFileSize(file);
- char* const buffer = new char[file_size];
-
- size_t bytes_last_read = 0; // # of bytes read in the last fread()
- size_t bytes_read = 0; // # of bytes read so far
-
- fseek(file, 0, SEEK_SET);
-
- // Keeps reading the file until we cannot read further or the
- // pre-determined file size is reached.
- do {
- bytes_last_read = fread(buffer+bytes_read, 1, file_size-bytes_read, file);
- bytes_read += bytes_last_read;
- } while (bytes_last_read > 0 && bytes_read < file_size);
-
- const std::string content(buffer, bytes_read);
- delete[] buffer;
-
- return content;
-}
-
-#if GTEST_HAS_DEATH_TEST
-
-static const ::std::vector<testing::internal::string>* g_injected_test_argvs =
- NULL; // Owned.
-
-void SetInjectableArgvs(const ::std::vector<testing::internal::string>* argvs) {
- if (g_injected_test_argvs != argvs)
- delete g_injected_test_argvs;
- g_injected_test_argvs = argvs;
-}
-
-const ::std::vector<testing::internal::string>& GetInjectableArgvs() {
- if (g_injected_test_argvs != NULL) {
- return *g_injected_test_argvs;
- }
- return GetArgvs();
-}
-#endif // GTEST_HAS_DEATH_TEST
-
-#if GTEST_OS_WINDOWS_MOBILE
-namespace posix {
-void Abort() {
- DebugBreak();
- TerminateProcess(GetCurrentProcess(), 1);
-}
-} // namespace posix
-#endif // GTEST_OS_WINDOWS_MOBILE
-
-// Returns the name of the environment variable corresponding to the
-// given flag. For example, FlagToEnvVar("foo") will return
-// "GTEST_FOO" in the open-source version.
-static std::string FlagToEnvVar(const char* flag) {
- const std::string full_flag =
- (Message() << GTEST_FLAG_PREFIX_ << flag).GetString();
-
- Message env_var;
- for (size_t i = 0; i != full_flag.length(); i++) {
- env_var << ToUpper(full_flag.c_str()[i]);
- }
-
- return env_var.GetString();
-}
-
-// Parses 'str' for a 32-bit signed integer. If successful, writes
-// the result to *value and returns true; otherwise leaves *value
-// unchanged and returns false.
-bool ParseInt32(const Message& src_text, const char* str, Int32* value) {
- // Parses the environment variable as a decimal integer.
- char* end = NULL;
- const long long_value = strtol(str, &end, 10); // NOLINT
-
- // Has strtol() consumed all characters in the string?
- if (*end != '\0') {
- // No - an invalid character was encountered.
- Message msg;
- msg << "WARNING: " << src_text
- << " is expected to be a 32-bit integer, but actually"
- << " has value \"" << str << "\".\n";
- printf("%s", msg.GetString().c_str());
- fflush(stdout);
- return false;
- }
-
- // Is the parsed value in the range of an Int32?
- const Int32 result = static_cast<Int32>(long_value);
- if (long_value == LONG_MAX || long_value == LONG_MIN ||
- // The parsed value overflows as a long. (strtol() returns
- // LONG_MAX or LONG_MIN when the input overflows.)
- result != long_value
- // The parsed value overflows as an Int32.
- ) {
- Message msg;
- msg << "WARNING: " << src_text
- << " is expected to be a 32-bit integer, but actually"
- << " has value " << str << ", which overflows.\n";
- printf("%s", msg.GetString().c_str());
- fflush(stdout);
- return false;
- }
-
- *value = result;
- return true;
-}
-
-// Reads and returns the Boolean environment variable corresponding to
-// the given flag; if it's not set, returns default_value.
-//
-// The value is considered true iff it's not "0".
-bool BoolFromGTestEnv(const char* flag, bool default_value) {
-#if defined(GTEST_GET_BOOL_FROM_ENV_)
- return GTEST_GET_BOOL_FROM_ENV_(flag, default_value);
-#endif // defined(GTEST_GET_BOOL_FROM_ENV_)
- const std::string env_var = FlagToEnvVar(flag);
- const char* const string_value = posix::GetEnv(env_var.c_str());
- return string_value == NULL ?
- default_value : strcmp(string_value, "0") != 0;
-}
-
-// Reads and returns a 32-bit integer stored in the environment
-// variable corresponding to the given flag; if it isn't set or
-// doesn't represent a valid 32-bit integer, returns default_value.
-Int32 Int32FromGTestEnv(const char* flag, Int32 default_value) {
-#if defined(GTEST_GET_INT32_FROM_ENV_)
- return GTEST_GET_INT32_FROM_ENV_(flag, default_value);
-#endif // defined(GTEST_GET_INT32_FROM_ENV_)
- const std::string env_var = FlagToEnvVar(flag);
- const char* const string_value = posix::GetEnv(env_var.c_str());
- if (string_value == NULL) {
- // The environment variable is not set.
- return default_value;
- }
-
- Int32 result = default_value;
- if (!ParseInt32(Message() << "Environment variable " << env_var,
- string_value, &result)) {
- printf("The default value %s is used.\n",
- (Message() << default_value).GetString().c_str());
- fflush(stdout);
- return default_value;
- }
-
- return result;
-}
-
-// Reads and returns the string environment variable corresponding to
-// the given flag; if it's not set, returns default_value.
-std::string StringFromGTestEnv(const char* flag, const char* default_value) {
-#if defined(GTEST_GET_STRING_FROM_ENV_)
- return GTEST_GET_STRING_FROM_ENV_(flag, default_value);
-#endif // defined(GTEST_GET_STRING_FROM_ENV_)
- const std::string env_var = FlagToEnvVar(flag);
- const char* value = posix::GetEnv(env_var.c_str());
- if (value != NULL) {
- return value;
- }
-
- // As a special case for the 'output' flag, if GTEST_OUTPUT is not
- // set, we look for XML_OUTPUT_FILE, which is set by the Bazel build
- // system. The value of XML_OUTPUT_FILE is a filename without the
- // "xml:" prefix of GTEST_OUTPUT.
- //
- // The net priority order after flag processing is thus:
- // --gtest_output command line flag
- // GTEST_OUTPUT environment variable
- // XML_OUTPUT_FILE environment variable
- // 'default_value'
- if (strcmp(flag, "output") == 0) {
- value = posix::GetEnv("XML_OUTPUT_FILE");
- if (value != NULL) {
- return std::string("xml:") + value;
- }
- }
- return default_value;
-}
-
-} // namespace internal
-} // namespace testing
diff --git a/third_party/aom/third_party/googletest/src/googletest/src/gtest-printers.cc b/third_party/aom/third_party/googletest/src/googletest/src/gtest-printers.cc
deleted file mode 100644
index a2df412f8..000000000
--- a/third_party/aom/third_party/googletest/src/googletest/src/gtest-printers.cc
+++ /dev/null
@@ -1,373 +0,0 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Author: wan@google.com (Zhanyong Wan)
-
-// Google Test - The Google C++ Testing Framework
-//
-// This file implements a universal value printer that can print a
-// value of any type T:
-//
-// void ::testing::internal::UniversalPrinter<T>::Print(value, ostream_ptr);
-//
-// It uses the << operator when possible, and prints the bytes in the
-// object otherwise. A user can override its behavior for a class
-// type Foo by defining either operator<<(::std::ostream&, const Foo&)
-// or void PrintTo(const Foo&, ::std::ostream*) in the namespace that
-// defines Foo.
-
-#include "gtest/gtest-printers.h"
-#include <ctype.h>
-#include <stdio.h>
-#include <cwchar>
-#include <ostream> // NOLINT
-#include <string>
-#include "gtest/internal/gtest-port.h"
-
-namespace testing {
-
-namespace {
-
-using ::std::ostream;
-
-// Prints a segment of bytes in the given object.
-GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_
-GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_
-GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_
-void PrintByteSegmentInObjectTo(const unsigned char* obj_bytes, size_t start,
- size_t count, ostream* os) {
- char text[5] = "";
- for (size_t i = 0; i != count; i++) {
- const size_t j = start + i;
- if (i != 0) {
- // Organizes the bytes into groups of 2 for easy parsing by
- // human.
- if ((j % 2) == 0)
- *os << ' ';
- else
- *os << '-';
- }
- GTEST_SNPRINTF_(text, sizeof(text), "%02X", obj_bytes[j]);
- *os << text;
- }
-}
-
-// Prints the bytes in the given value to the given ostream.
-void PrintBytesInObjectToImpl(const unsigned char* obj_bytes, size_t count,
- ostream* os) {
- // Tells the user how big the object is.
- *os << count << "-byte object <";
-
- const size_t kThreshold = 132;
- const size_t kChunkSize = 64;
- // If the object size is bigger than kThreshold, we'll have to omit
- // some details by printing only the first and the last kChunkSize
- // bytes.
- // TODO(wan): let the user control the threshold using a flag.
- if (count < kThreshold) {
- PrintByteSegmentInObjectTo(obj_bytes, 0, count, os);
- } else {
- PrintByteSegmentInObjectTo(obj_bytes, 0, kChunkSize, os);
- *os << " ... ";
- // Rounds up to 2-byte boundary.
- const size_t resume_pos = (count - kChunkSize + 1)/2*2;
- PrintByteSegmentInObjectTo(obj_bytes, resume_pos, count - resume_pos, os);
- }
- *os << ">";
-}
-
-} // namespace
-
-namespace internal2 {
-
-// Delegates to PrintBytesInObjectToImpl() to print the bytes in the
-// given object. The delegation simplifies the implementation, which
-// uses the << operator and thus is easier done outside of the
-// ::testing::internal namespace, which contains a << operator that
-// sometimes conflicts with the one in STL.
-void PrintBytesInObjectTo(const unsigned char* obj_bytes, size_t count,
- ostream* os) {
- PrintBytesInObjectToImpl(obj_bytes, count, os);
-}
-
-} // namespace internal2
-
-namespace internal {
-
-// Depending on the value of a char (or wchar_t), we print it in one
-// of three formats:
-// - as is if it's a printable ASCII (e.g. 'a', '2', ' '),
-// - as a hexidecimal escape sequence (e.g. '\x7F'), or
-// - as a special escape sequence (e.g. '\r', '\n').
-enum CharFormat {
- kAsIs,
- kHexEscape,
- kSpecialEscape
-};
-
-// Returns true if c is a printable ASCII character. We test the
-// value of c directly instead of calling isprint(), which is buggy on
-// Windows Mobile.
-inline bool IsPrintableAscii(wchar_t c) {
- return 0x20 <= c && c <= 0x7E;
-}
-
-// Prints a wide or narrow char c as a character literal without the
-// quotes, escaping it when necessary; returns how c was formatted.
-// The template argument UnsignedChar is the unsigned version of Char,
-// which is the type of c.
-template <typename UnsignedChar, typename Char>
-static CharFormat PrintAsCharLiteralTo(Char c, ostream* os) {
- switch (static_cast<wchar_t>(c)) {
- case L'\0':
- *os << "\\0";
- break;
- case L'\'':
- *os << "\\'";
- break;
- case L'\\':
- *os << "\\\\";
- break;
- case L'\a':
- *os << "\\a";
- break;
- case L'\b':
- *os << "\\b";
- break;
- case L'\f':
- *os << "\\f";
- break;
- case L'\n':
- *os << "\\n";
- break;
- case L'\r':
- *os << "\\r";
- break;
- case L'\t':
- *os << "\\t";
- break;
- case L'\v':
- *os << "\\v";
- break;
- default:
- if (IsPrintableAscii(c)) {
- *os << static_cast<char>(c);
- return kAsIs;
- } else {
- *os << "\\x" + String::FormatHexInt(static_cast<UnsignedChar>(c));
- return kHexEscape;
- }
- }
- return kSpecialEscape;
-}
-
-// Prints a wchar_t c as if it's part of a string literal, escaping it when
-// necessary; returns how c was formatted.
-static CharFormat PrintAsStringLiteralTo(wchar_t c, ostream* os) {
- switch (c) {
- case L'\'':
- *os << "'";
- return kAsIs;
- case L'"':
- *os << "\\\"";
- return kSpecialEscape;
- default:
- return PrintAsCharLiteralTo<wchar_t>(c, os);
- }
-}
-
-// Prints a char c as if it's part of a string literal, escaping it when
-// necessary; returns how c was formatted.
-static CharFormat PrintAsStringLiteralTo(char c, ostream* os) {
- return PrintAsStringLiteralTo(
- static_cast<wchar_t>(static_cast<unsigned char>(c)), os);
-}
-
-// Prints a wide or narrow character c and its code. '\0' is printed
-// as "'\\0'", other unprintable characters are also properly escaped
-// using the standard C++ escape sequence. The template argument
-// UnsignedChar is the unsigned version of Char, which is the type of c.
-template <typename UnsignedChar, typename Char>
-void PrintCharAndCodeTo(Char c, ostream* os) {
- // First, print c as a literal in the most readable form we can find.
- *os << ((sizeof(c) > 1) ? "L'" : "'");
- const CharFormat format = PrintAsCharLiteralTo<UnsignedChar>(c, os);
- *os << "'";
-
- // To aid user debugging, we also print c's code in decimal, unless
- // it's 0 (in which case c was printed as '\\0', making the code
- // obvious).
- if (c == 0)
- return;
- *os << " (" << static_cast<int>(c);
-
- // For more convenience, we print c's code again in hexidecimal,
- // unless c was already printed in the form '\x##' or the code is in
- // [1, 9].
- if (format == kHexEscape || (1 <= c && c <= 9)) {
- // Do nothing.
- } else {
- *os << ", 0x" << String::FormatHexInt(static_cast<UnsignedChar>(c));
- }
- *os << ")";
-}
-
-void PrintTo(unsigned char c, ::std::ostream* os) {
- PrintCharAndCodeTo<unsigned char>(c, os);
-}
-void PrintTo(signed char c, ::std::ostream* os) {
- PrintCharAndCodeTo<unsigned char>(c, os);
-}
-
-// Prints a wchar_t as a symbol if it is printable or as its internal
-// code otherwise and also as its code. L'\0' is printed as "L'\\0'".
-void PrintTo(wchar_t wc, ostream* os) {
- PrintCharAndCodeTo<wchar_t>(wc, os);
-}
-
-// Prints the given array of characters to the ostream. CharType must be either
-// char or wchar_t.
-// The array starts at begin, the length is len, it may include '\0' characters
-// and may not be NUL-terminated.
-template <typename CharType>
-GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_
-GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_
-GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_
-static void PrintCharsAsStringTo(
- const CharType* begin, size_t len, ostream* os) {
- const char* const kQuoteBegin = sizeof(CharType) == 1 ? "\"" : "L\"";
- *os << kQuoteBegin;
- bool is_previous_hex = false;
- for (size_t index = 0; index < len; ++index) {
- const CharType cur = begin[index];
- if (is_previous_hex && IsXDigit(cur)) {
- // Previous character is of '\x..' form and this character can be
- // interpreted as another hexadecimal digit in its number. Break string to
- // disambiguate.
- *os << "\" " << kQuoteBegin;
- }
- is_previous_hex = PrintAsStringLiteralTo(cur, os) == kHexEscape;
- }
- *os << "\"";
-}
-
-// Prints a (const) char/wchar_t array of 'len' elements, starting at address
-// 'begin'. CharType must be either char or wchar_t.
-template <typename CharType>
-GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_
-GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_
-GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_
-static void UniversalPrintCharArray(
- const CharType* begin, size_t len, ostream* os) {
- // The code
- // const char kFoo[] = "foo";
- // generates an array of 4, not 3, elements, with the last one being '\0'.
- //
- // Therefore when printing a char array, we don't print the last element if
- // it's '\0', such that the output matches the string literal as it's
- // written in the source code.
- if (len > 0 && begin[len - 1] == '\0') {
- PrintCharsAsStringTo(begin, len - 1, os);
- return;
- }
-
- // If, however, the last element in the array is not '\0', e.g.
- // const char kFoo[] = { 'f', 'o', 'o' };
- // we must print the entire array. We also print a message to indicate
- // that the array is not NUL-terminated.
- PrintCharsAsStringTo(begin, len, os);
- *os << " (no terminating NUL)";
-}
-
-// Prints a (const) char array of 'len' elements, starting at address 'begin'.
-void UniversalPrintArray(const char* begin, size_t len, ostream* os) {
- UniversalPrintCharArray(begin, len, os);
-}
-
-// Prints a (const) wchar_t array of 'len' elements, starting at address
-// 'begin'.
-void UniversalPrintArray(const wchar_t* begin, size_t len, ostream* os) {
- UniversalPrintCharArray(begin, len, os);
-}
-
-// Prints the given C string to the ostream.
-void PrintTo(const char* s, ostream* os) {
- if (s == NULL) {
- *os << "NULL";
- } else {
- *os << ImplicitCast_<const void*>(s) << " pointing to ";
- PrintCharsAsStringTo(s, strlen(s), os);
- }
-}
-
-// MSVC compiler can be configured to define whar_t as a typedef
-// of unsigned short. Defining an overload for const wchar_t* in that case
-// would cause pointers to unsigned shorts be printed as wide strings,
-// possibly accessing more memory than intended and causing invalid
-// memory accesses. MSVC defines _NATIVE_WCHAR_T_DEFINED symbol when
-// wchar_t is implemented as a native type.
-#if !defined(_MSC_VER) || defined(_NATIVE_WCHAR_T_DEFINED)
-// Prints the given wide C string to the ostream.
-void PrintTo(const wchar_t* s, ostream* os) {
- if (s == NULL) {
- *os << "NULL";
- } else {
- *os << ImplicitCast_<const void*>(s) << " pointing to ";
- PrintCharsAsStringTo(s, std::wcslen(s), os);
- }
-}
-#endif // wchar_t is native
-
-// Prints a ::string object.
-#if GTEST_HAS_GLOBAL_STRING
-void PrintStringTo(const ::string& s, ostream* os) {
- PrintCharsAsStringTo(s.data(), s.size(), os);
-}
-#endif // GTEST_HAS_GLOBAL_STRING
-
-void PrintStringTo(const ::std::string& s, ostream* os) {
- PrintCharsAsStringTo(s.data(), s.size(), os);
-}
-
-// Prints a ::wstring object.
-#if GTEST_HAS_GLOBAL_WSTRING
-void PrintWideStringTo(const ::wstring& s, ostream* os) {
- PrintCharsAsStringTo(s.data(), s.size(), os);
-}
-#endif // GTEST_HAS_GLOBAL_WSTRING
-
-#if GTEST_HAS_STD_WSTRING
-void PrintWideStringTo(const ::std::wstring& s, ostream* os) {
- PrintCharsAsStringTo(s.data(), s.size(), os);
-}
-#endif // GTEST_HAS_STD_WSTRING
-
-} // namespace internal
-
-} // namespace testing
diff --git a/third_party/aom/third_party/googletest/src/googletest/src/gtest-test-part.cc b/third_party/aom/third_party/googletest/src/googletest/src/gtest-test-part.cc
deleted file mode 100644
index fb0e35425..000000000
--- a/third_party/aom/third_party/googletest/src/googletest/src/gtest-test-part.cc
+++ /dev/null
@@ -1,110 +0,0 @@
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Author: mheule@google.com (Markus Heule)
-//
-// The Google C++ Testing Framework (Google Test)
-
-#include "gtest/gtest-test-part.h"
-
-// Indicates that this translation unit is part of Google Test's
-// implementation. It must come before gtest-internal-inl.h is
-// included, or there will be a compiler error. This trick exists to
-// prevent the accidental inclusion of gtest-internal-inl.h in the
-// user's code.
-#define GTEST_IMPLEMENTATION_ 1
-#include "src/gtest-internal-inl.h"
-#undef GTEST_IMPLEMENTATION_
-
-namespace testing {
-
-using internal::GetUnitTestImpl;
-
-// Gets the summary of the failure message by omitting the stack trace
-// in it.
-std::string TestPartResult::ExtractSummary(const char* message) {
- const char* const stack_trace = strstr(message, internal::kStackTraceMarker);
- return stack_trace == NULL ? message :
- std::string(message, stack_trace);
-}
-
-// Prints a TestPartResult object.
-std::ostream& operator<<(std::ostream& os, const TestPartResult& result) {
- return os
- << result.file_name() << ":" << result.line_number() << ": "
- << (result.type() == TestPartResult::kSuccess ? "Success" :
- result.type() == TestPartResult::kFatalFailure ? "Fatal failure" :
- "Non-fatal failure") << ":\n"
- << result.message() << std::endl;
-}
-
-// Appends a TestPartResult to the array.
-void TestPartResultArray::Append(const TestPartResult& result) {
- array_.push_back(result);
-}
-
-// Returns the TestPartResult at the given index (0-based).
-const TestPartResult& TestPartResultArray::GetTestPartResult(int index) const {
- if (index < 0 || index >= size()) {
- printf("\nInvalid index (%d) into TestPartResultArray.\n", index);
- internal::posix::Abort();
- }
-
- return array_[index];
-}
-
-// Returns the number of TestPartResult objects in the array.
-int TestPartResultArray::size() const {
- return static_cast<int>(array_.size());
-}
-
-namespace internal {
-
-HasNewFatalFailureHelper::HasNewFatalFailureHelper()
- : has_new_fatal_failure_(false),
- original_reporter_(GetUnitTestImpl()->
- GetTestPartResultReporterForCurrentThread()) {
- GetUnitTestImpl()->SetTestPartResultReporterForCurrentThread(this);
-}
-
-HasNewFatalFailureHelper::~HasNewFatalFailureHelper() {
- GetUnitTestImpl()->SetTestPartResultReporterForCurrentThread(
- original_reporter_);
-}
-
-void HasNewFatalFailureHelper::ReportTestPartResult(
- const TestPartResult& result) {
- if (result.fatally_failed())
- has_new_fatal_failure_ = true;
- original_reporter_->ReportTestPartResult(result);
-}
-
-} // namespace internal
-
-} // namespace testing
diff --git a/third_party/aom/third_party/googletest/src/googletest/src/gtest-typed-test.cc b/third_party/aom/third_party/googletest/src/googletest/src/gtest-typed-test.cc
deleted file mode 100644
index df1eef475..000000000
--- a/third_party/aom/third_party/googletest/src/googletest/src/gtest-typed-test.cc
+++ /dev/null
@@ -1,118 +0,0 @@
-// Copyright 2008 Google Inc.
-// All Rights Reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Author: wan@google.com (Zhanyong Wan)
-
-#include "gtest/gtest-typed-test.h"
-#include "gtest/gtest.h"
-
-namespace testing {
-namespace internal {
-
-#if GTEST_HAS_TYPED_TEST_P
-
-// Skips to the first non-space char in str. Returns an empty string if str
-// contains only whitespace characters.
-static const char* SkipSpaces(const char* str) {
- while (IsSpace(*str))
- str++;
- return str;
-}
-
-static std::vector<std::string> SplitIntoTestNames(const char* src) {
- std::vector<std::string> name_vec;
- src = SkipSpaces(src);
- for (; src != NULL; src = SkipComma(src)) {
- name_vec.push_back(StripTrailingSpaces(GetPrefixUntilComma(src)));
- }
- return name_vec;
-}
-
-// Verifies that registered_tests match the test names in
-// registered_tests_; returns registered_tests if successful, or
-// aborts the program otherwise.
-const char* TypedTestCasePState::VerifyRegisteredTestNames(
- const char* file, int line, const char* registered_tests) {
- typedef RegisteredTestsMap::const_iterator RegisteredTestIter;
- registered_ = true;
-
- std::vector<std::string> name_vec = SplitIntoTestNames(registered_tests);
-
- Message errors;
-
- std::set<std::string> tests;
- for (std::vector<std::string>::const_iterator name_it = name_vec.begin();
- name_it != name_vec.end(); ++name_it) {
- const std::string& name = *name_it;
- if (tests.count(name) != 0) {
- errors << "Test " << name << " is listed more than once.\n";
- continue;
- }
-
- bool found = false;
- for (RegisteredTestIter it = registered_tests_.begin();
- it != registered_tests_.end();
- ++it) {
- if (name == it->first) {
- found = true;
- break;
- }
- }
-
- if (found) {
- tests.insert(name);
- } else {
- errors << "No test named " << name
- << " can be found in this test case.\n";
- }
- }
-
- for (RegisteredTestIter it = registered_tests_.begin();
- it != registered_tests_.end();
- ++it) {
- if (tests.count(it->first) == 0) {
- errors << "You forgot to list test " << it->first << ".\n";
- }
- }
-
- const std::string& errors_str = errors.GetString();
- if (errors_str != "") {
- fprintf(stderr, "%s %s", FormatFileLocation(file, line).c_str(),
- errors_str.c_str());
- fflush(stderr);
- posix::Abort();
- }
-
- return registered_tests;
-}
-
-#endif // GTEST_HAS_TYPED_TEST_P
-
-} // namespace internal
-} // namespace testing
diff --git a/third_party/aom/third_party/googletest/src/googletest/src/gtest.cc b/third_party/aom/third_party/googletest/src/googletest/src/gtest.cc
deleted file mode 100644
index 5a8932c73..000000000
--- a/third_party/aom/third_party/googletest/src/googletest/src/gtest.cc
+++ /dev/null
@@ -1,5389 +0,0 @@
-// Copyright 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Author: wan@google.com (Zhanyong Wan)
-//
-// The Google C++ Testing Framework (Google Test)
-
-#include "gtest/gtest.h"
-#include "gtest/internal/custom/gtest.h"
-#include "gtest/gtest-spi.h"
-
-#include <ctype.h>
-#include <math.h>
-#include <stdarg.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <time.h>
-#include <wchar.h>
-#include <wctype.h>
-
-#include <algorithm>
-#include <iomanip>
-#include <limits>
-#include <list>
-#include <map>
-#include <ostream> // NOLINT
-#include <sstream>
-#include <vector>
-
-#if GTEST_OS_LINUX
-
-// TODO(kenton@google.com): Use autoconf to detect availability of
-// gettimeofday().
-# define GTEST_HAS_GETTIMEOFDAY_ 1
-
-# include <fcntl.h> // NOLINT
-# include <limits.h> // NOLINT
-# include <sched.h> // NOLINT
-// Declares vsnprintf(). This header is not available on Windows.
-# include <strings.h> // NOLINT
-# include <sys/mman.h> // NOLINT
-# include <sys/time.h> // NOLINT
-# include <unistd.h> // NOLINT
-# include <string>
-
-#elif GTEST_OS_SYMBIAN
-# define GTEST_HAS_GETTIMEOFDAY_ 1
-# include <sys/time.h> // NOLINT
-
-#elif GTEST_OS_ZOS
-# define GTEST_HAS_GETTIMEOFDAY_ 1
-# include <sys/time.h> // NOLINT
-
-// On z/OS we additionally need strings.h for strcasecmp.
-# include <strings.h> // NOLINT
-
-#elif GTEST_OS_WINDOWS_MOBILE // We are on Windows CE.
-
-# include <windows.h> // NOLINT
-# undef min
-
-#elif GTEST_OS_WINDOWS // We are on Windows proper.
-
-# include <io.h> // NOLINT
-# include <sys/timeb.h> // NOLINT
-# include <sys/types.h> // NOLINT
-# include <sys/stat.h> // NOLINT
-
-# if GTEST_OS_WINDOWS_MINGW
-// MinGW has gettimeofday() but not _ftime64().
-// TODO(kenton@google.com): Use autoconf to detect availability of
-// gettimeofday().
-// TODO(kenton@google.com): There are other ways to get the time on
-// Windows, like GetTickCount() or GetSystemTimeAsFileTime(). MinGW
-// supports these. consider using them instead.
-# define GTEST_HAS_GETTIMEOFDAY_ 1
-# include <sys/time.h> // NOLINT
-# endif // GTEST_OS_WINDOWS_MINGW
-
-// cpplint thinks that the header is already included, so we want to
-// silence it.
-# include <windows.h> // NOLINT
-# undef min
-
-#else
-
-// Assume other platforms have gettimeofday().
-// TODO(kenton@google.com): Use autoconf to detect availability of
-// gettimeofday().
-# define GTEST_HAS_GETTIMEOFDAY_ 1
-
-// cpplint thinks that the header is already included, so we want to
-// silence it.
-# include <sys/time.h> // NOLINT
-# include <unistd.h> // NOLINT
-
-#endif // GTEST_OS_LINUX
-
-#if GTEST_HAS_EXCEPTIONS
-# include <stdexcept>
-#endif
-
-#if GTEST_CAN_STREAM_RESULTS_
-# include <arpa/inet.h> // NOLINT
-# include <netdb.h> // NOLINT
-# include <sys/socket.h> // NOLINT
-# include <sys/types.h> // NOLINT
-#endif
-
-// Indicates that this translation unit is part of Google Test's
-// implementation. It must come before gtest-internal-inl.h is
-// included, or there will be a compiler error. This trick is to
-// prevent a user from accidentally including gtest-internal-inl.h in
-// his code.
-#define GTEST_IMPLEMENTATION_ 1
-#include "src/gtest-internal-inl.h"
-#undef GTEST_IMPLEMENTATION_
-
-#if GTEST_OS_WINDOWS
-# define vsnprintf _vsnprintf
-#endif // GTEST_OS_WINDOWS
-
-namespace testing {
-
-using internal::CountIf;
-using internal::ForEach;
-using internal::GetElementOr;
-using internal::Shuffle;
-
-// Constants.
-
-// A test whose test case name or test name matches this filter is
-// disabled and not run.
-static const char kDisableTestFilter[] = "DISABLED_*:*/DISABLED_*";
-
-// A test case whose name matches this filter is considered a death
-// test case and will be run before test cases whose name doesn't
-// match this filter.
-static const char kDeathTestCaseFilter[] = "*DeathTest:*DeathTest/*";
-
-// A test filter that matches everything.
-static const char kUniversalFilter[] = "*";
-
-// The default output file for XML output.
-static const char kDefaultOutputFile[] = "test_detail.xml";
-
-// The environment variable name for the test shard index.
-static const char kTestShardIndex[] = "GTEST_SHARD_INDEX";
-// The environment variable name for the total number of test shards.
-static const char kTestTotalShards[] = "GTEST_TOTAL_SHARDS";
-// The environment variable name for the test shard status file.
-static const char kTestShardStatusFile[] = "GTEST_SHARD_STATUS_FILE";
-
-namespace internal {
-
-// The text used in failure messages to indicate the start of the
-// stack trace.
-const char kStackTraceMarker[] = "\nStack trace:\n";
-
-// g_help_flag is true iff the --help flag or an equivalent form is
-// specified on the command line.
-bool g_help_flag = false;
-
-} // namespace internal
-
-static const char* GetDefaultFilter() {
-#ifdef GTEST_TEST_FILTER_ENV_VAR_
- const char* const testbridge_test_only = getenv(GTEST_TEST_FILTER_ENV_VAR_);
- if (testbridge_test_only != NULL) {
- return testbridge_test_only;
- }
-#endif // GTEST_TEST_FILTER_ENV_VAR_
- return kUniversalFilter;
-}
-
-GTEST_DEFINE_bool_(
- also_run_disabled_tests,
- internal::BoolFromGTestEnv("also_run_disabled_tests", false),
- "Run disabled tests too, in addition to the tests normally being run.");
-
-GTEST_DEFINE_bool_(
- break_on_failure,
- internal::BoolFromGTestEnv("break_on_failure", false),
- "True iff a failed assertion should be a debugger break-point.");
-
-GTEST_DEFINE_bool_(
- catch_exceptions,
- internal::BoolFromGTestEnv("catch_exceptions", true),
- "True iff " GTEST_NAME_
- " should catch exceptions and treat them as test failures.");
-
-GTEST_DEFINE_string_(
- color,
- internal::StringFromGTestEnv("color", "auto"),
- "Whether to use colors in the output. Valid values: yes, no, "
- "and auto. 'auto' means to use colors if the output is "
- "being sent to a terminal and the TERM environment variable "
- "is set to a terminal type that supports colors.");
-
-GTEST_DEFINE_string_(
- filter,
- internal::StringFromGTestEnv("filter", GetDefaultFilter()),
- "A colon-separated list of glob (not regex) patterns "
- "for filtering the tests to run, optionally followed by a "
- "'-' and a : separated list of negative patterns (tests to "
- "exclude). A test is run if it matches one of the positive "
- "patterns and does not match any of the negative patterns.");
-
-GTEST_DEFINE_bool_(list_tests, false,
- "List all tests without running them.");
-
-GTEST_DEFINE_string_(
- output,
- internal::StringFromGTestEnv("output", ""),
- "A format (currently must be \"xml\"), optionally followed "
- "by a colon and an output file name or directory. A directory "
- "is indicated by a trailing pathname separator. "
- "Examples: \"xml:filename.xml\", \"xml::directoryname/\". "
- "If a directory is specified, output files will be created "
- "within that directory, with file-names based on the test "
- "executable's name and, if necessary, made unique by adding "
- "digits.");
-
-GTEST_DEFINE_bool_(
- print_time,
- internal::BoolFromGTestEnv("print_time", true),
- "True iff " GTEST_NAME_
- " should display elapsed time in text output.");
-
-GTEST_DEFINE_int32_(
- random_seed,
- internal::Int32FromGTestEnv("random_seed", 0),
- "Random number seed to use when shuffling test orders. Must be in range "
- "[1, 99999], or 0 to use a seed based on the current time.");
-
-GTEST_DEFINE_int32_(
- repeat,
- internal::Int32FromGTestEnv("repeat", 1),
- "How many times to repeat each test. Specify a negative number "
- "for repeating forever. Useful for shaking out flaky tests.");
-
-GTEST_DEFINE_bool_(
- show_internal_stack_frames, false,
- "True iff " GTEST_NAME_ " should include internal stack frames when "
- "printing test failure stack traces.");
-
-GTEST_DEFINE_bool_(
- shuffle,
- internal::BoolFromGTestEnv("shuffle", false),
- "True iff " GTEST_NAME_
- " should randomize tests' order on every run.");
-
-GTEST_DEFINE_int32_(
- stack_trace_depth,
- internal::Int32FromGTestEnv("stack_trace_depth", kMaxStackTraceDepth),
- "The maximum number of stack frames to print when an "
- "assertion fails. The valid range is 0 through 100, inclusive.");
-
-GTEST_DEFINE_string_(
- stream_result_to,
- internal::StringFromGTestEnv("stream_result_to", ""),
- "This flag specifies the host name and the port number on which to stream "
- "test results. Example: \"localhost:555\". The flag is effective only on "
- "Linux.");
-
-GTEST_DEFINE_bool_(
- throw_on_failure,
- internal::BoolFromGTestEnv("throw_on_failure", false),
- "When this flag is specified, a failed assertion will throw an exception "
- "if exceptions are enabled or exit the program with a non-zero code "
- "otherwise.");
-
-#if GTEST_USE_OWN_FLAGFILE_FLAG_
-GTEST_DEFINE_string_(
- flagfile,
- internal::StringFromGTestEnv("flagfile", ""),
- "This flag specifies the flagfile to read command-line flags from.");
-#endif // GTEST_USE_OWN_FLAGFILE_FLAG_
-
-namespace internal {
-
-// Generates a random number from [0, range), using a Linear
-// Congruential Generator (LCG). Crashes if 'range' is 0 or greater
-// than kMaxRange.
-GTEST_ATTRIBUTE_NO_SANITIZE_UNSIGNED_OVERFLOW_
-UInt32 Random::Generate(UInt32 range) {
- // These constants are the same as are used in glibc's rand(3).
- state_ = (1103515245U*state_ + 12345U) % kMaxRange;
-
- GTEST_CHECK_(range > 0)
- << "Cannot generate a number in the range [0, 0).";
- GTEST_CHECK_(range <= kMaxRange)
- << "Generation of a number in [0, " << range << ") was requested, "
- << "but this can only generate numbers in [0, " << kMaxRange << ").";
-
- // Converting via modulus introduces a bit of downward bias, but
- // it's simple, and a linear congruential generator isn't too good
- // to begin with.
- return state_ % range;
-}
-
-// GTestIsInitialized() returns true iff the user has initialized
-// Google Test. Useful for catching the user mistake of not initializing
-// Google Test before calling RUN_ALL_TESTS().
-static bool GTestIsInitialized() { return GetArgvs().size() > 0; }
-
-// Iterates over a vector of TestCases, keeping a running sum of the
-// results of calling a given int-returning method on each.
-// Returns the sum.
-static int SumOverTestCaseList(const std::vector<TestCase*>& case_list,
- int (TestCase::*method)() const) {
- int sum = 0;
- for (size_t i = 0; i < case_list.size(); i++) {
- sum += (case_list[i]->*method)();
- }
- return sum;
-}
-
-// Returns true iff the test case passed.
-static bool TestCasePassed(const TestCase* test_case) {
- return test_case->should_run() && test_case->Passed();
-}
-
-// Returns true iff the test case failed.
-static bool TestCaseFailed(const TestCase* test_case) {
- return test_case->should_run() && test_case->Failed();
-}
-
-// Returns true iff test_case contains at least one test that should
-// run.
-static bool ShouldRunTestCase(const TestCase* test_case) {
- return test_case->should_run();
-}
-
-// AssertHelper constructor.
-AssertHelper::AssertHelper(TestPartResult::Type type,
- const char* file,
- int line,
- const char* message)
- : data_(new AssertHelperData(type, file, line, message)) {
-}
-
-AssertHelper::~AssertHelper() {
- delete data_;
-}
-
-// Message assignment, for assertion streaming support.
-void AssertHelper::operator=(const Message& message) const {
- UnitTest::GetInstance()->
- AddTestPartResult(data_->type, data_->file, data_->line,
- AppendUserMessage(data_->message, message),
- UnitTest::GetInstance()->impl()
- ->CurrentOsStackTraceExceptTop(1)
- // Skips the stack frame for this function itself.
- ); // NOLINT
-}
-
-// Mutex for linked pointers.
-GTEST_API_ GTEST_DEFINE_STATIC_MUTEX_(g_linked_ptr_mutex);
-
-// A copy of all command line arguments. Set by InitGoogleTest().
-::std::vector<testing::internal::string> g_argvs;
-
-const ::std::vector<testing::internal::string>& GetArgvs() {
-#if defined(GTEST_CUSTOM_GET_ARGVS_)
- return GTEST_CUSTOM_GET_ARGVS_();
-#else // defined(GTEST_CUSTOM_GET_ARGVS_)
- return g_argvs;
-#endif // defined(GTEST_CUSTOM_GET_ARGVS_)
-}
-
-// Returns the current application's name, removing directory path if that
-// is present.
-FilePath GetCurrentExecutableName() {
- FilePath result;
-
-#if GTEST_OS_WINDOWS
- result.Set(FilePath(GetArgvs()[0]).RemoveExtension("exe"));
-#else
- result.Set(FilePath(GetArgvs()[0]));
-#endif // GTEST_OS_WINDOWS
-
- return result.RemoveDirectoryName();
-}
-
-// Functions for processing the gtest_output flag.
-
-// Returns the output format, or "" for normal printed output.
-std::string UnitTestOptions::GetOutputFormat() {
- const char* const gtest_output_flag = GTEST_FLAG(output).c_str();
- if (gtest_output_flag == NULL) return std::string("");
-
- const char* const colon = strchr(gtest_output_flag, ':');
- return (colon == NULL) ?
- std::string(gtest_output_flag) :
- std::string(gtest_output_flag, colon - gtest_output_flag);
-}
-
-// Returns the name of the requested output file, or the default if none
-// was explicitly specified.
-std::string UnitTestOptions::GetAbsolutePathToOutputFile() {
- const char* const gtest_output_flag = GTEST_FLAG(output).c_str();
- if (gtest_output_flag == NULL)
- return "";
-
- const char* const colon = strchr(gtest_output_flag, ':');
- if (colon == NULL)
- return internal::FilePath::ConcatPaths(
- internal::FilePath(
- UnitTest::GetInstance()->original_working_dir()),
- internal::FilePath(kDefaultOutputFile)).string();
-
- internal::FilePath output_name(colon + 1);
- if (!output_name.IsAbsolutePath())
- // TODO(wan@google.com): on Windows \some\path is not an absolute
- // path (as its meaning depends on the current drive), yet the
- // following logic for turning it into an absolute path is wrong.
- // Fix it.
- output_name = internal::FilePath::ConcatPaths(
- internal::FilePath(UnitTest::GetInstance()->original_working_dir()),
- internal::FilePath(colon + 1));
-
- if (!output_name.IsDirectory())
- return output_name.string();
-
- internal::FilePath result(internal::FilePath::GenerateUniqueFileName(
- output_name, internal::GetCurrentExecutableName(),
- GetOutputFormat().c_str()));
- return result.string();
-}
-
-// Returns true iff the wildcard pattern matches the string. The
-// first ':' or '\0' character in pattern marks the end of it.
-//
-// This recursive algorithm isn't very efficient, but is clear and
-// works well enough for matching test names, which are short.
-bool UnitTestOptions::PatternMatchesString(const char *pattern,
- const char *str) {
- switch (*pattern) {
- case '\0':
- case ':': // Either ':' or '\0' marks the end of the pattern.
- return *str == '\0';
- case '?': // Matches any single character.
- return *str != '\0' && PatternMatchesString(pattern + 1, str + 1);
- case '*': // Matches any string (possibly empty) of characters.
- return (*str != '\0' && PatternMatchesString(pattern, str + 1)) ||
- PatternMatchesString(pattern + 1, str);
- default: // Non-special character. Matches itself.
- return *pattern == *str &&
- PatternMatchesString(pattern + 1, str + 1);
- }
-}
-
-bool UnitTestOptions::MatchesFilter(
- const std::string& name, const char* filter) {
- const char *cur_pattern = filter;
- for (;;) {
- if (PatternMatchesString(cur_pattern, name.c_str())) {
- return true;
- }
-
- // Finds the next pattern in the filter.
- cur_pattern = strchr(cur_pattern, ':');
-
- // Returns if no more pattern can be found.
- if (cur_pattern == NULL) {
- return false;
- }
-
- // Skips the pattern separater (the ':' character).
- cur_pattern++;
- }
-}
-
-// Returns true iff the user-specified filter matches the test case
-// name and the test name.
-bool UnitTestOptions::FilterMatchesTest(const std::string &test_case_name,
- const std::string &test_name) {
- const std::string& full_name = test_case_name + "." + test_name.c_str();
-
- // Split --gtest_filter at '-', if there is one, to separate into
- // positive filter and negative filter portions
- const char* const p = GTEST_FLAG(filter).c_str();
- const char* const dash = strchr(p, '-');
- std::string positive;
- std::string negative;
- if (dash == NULL) {
- positive = GTEST_FLAG(filter).c_str(); // Whole string is a positive filter
- negative = "";
- } else {
- positive = std::string(p, dash); // Everything up to the dash
- negative = std::string(dash + 1); // Everything after the dash
- if (positive.empty()) {
- // Treat '-test1' as the same as '*-test1'
- positive = kUniversalFilter;
- }
- }
-
- // A filter is a colon-separated list of patterns. It matches a
- // test if any pattern in it matches the test.
- return (MatchesFilter(full_name, positive.c_str()) &&
- !MatchesFilter(full_name, negative.c_str()));
-}
-
-#if GTEST_HAS_SEH
-// Returns EXCEPTION_EXECUTE_HANDLER if Google Test should handle the
-// given SEH exception, or EXCEPTION_CONTINUE_SEARCH otherwise.
-// This function is useful as an __except condition.
-int UnitTestOptions::GTestShouldProcessSEH(DWORD exception_code) {
- // Google Test should handle a SEH exception if:
- // 1. the user wants it to, AND
- // 2. this is not a breakpoint exception, AND
- // 3. this is not a C++ exception (VC++ implements them via SEH,
- // apparently).
- //
- // SEH exception code for C++ exceptions.
- // (see http://support.microsoft.com/kb/185294 for more information).
- const DWORD kCxxExceptionCode = 0xe06d7363;
-
- bool should_handle = true;
-
- if (!GTEST_FLAG(catch_exceptions))
- should_handle = false;
- else if (exception_code == EXCEPTION_BREAKPOINT)
- should_handle = false;
- else if (exception_code == kCxxExceptionCode)
- should_handle = false;
-
- return should_handle ? EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH;
-}
-#endif // GTEST_HAS_SEH
-
-} // namespace internal
-
-// The c'tor sets this object as the test part result reporter used by
-// Google Test. The 'result' parameter specifies where to report the
-// results. Intercepts only failures from the current thread.
-ScopedFakeTestPartResultReporter::ScopedFakeTestPartResultReporter(
- TestPartResultArray* result)
- : intercept_mode_(INTERCEPT_ONLY_CURRENT_THREAD),
- result_(result) {
- Init();
-}
-
-// The c'tor sets this object as the test part result reporter used by
-// Google Test. The 'result' parameter specifies where to report the
-// results.
-ScopedFakeTestPartResultReporter::ScopedFakeTestPartResultReporter(
- InterceptMode intercept_mode, TestPartResultArray* result)
- : intercept_mode_(intercept_mode),
- result_(result) {
- Init();
-}
-
-void ScopedFakeTestPartResultReporter::Init() {
- internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
- if (intercept_mode_ == INTERCEPT_ALL_THREADS) {
- old_reporter_ = impl->GetGlobalTestPartResultReporter();
- impl->SetGlobalTestPartResultReporter(this);
- } else {
- old_reporter_ = impl->GetTestPartResultReporterForCurrentThread();
- impl->SetTestPartResultReporterForCurrentThread(this);
- }
-}
-
-// The d'tor restores the test part result reporter used by Google Test
-// before.
-ScopedFakeTestPartResultReporter::~ScopedFakeTestPartResultReporter() {
- internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
- if (intercept_mode_ == INTERCEPT_ALL_THREADS) {
- impl->SetGlobalTestPartResultReporter(old_reporter_);
- } else {
- impl->SetTestPartResultReporterForCurrentThread(old_reporter_);
- }
-}
-
-// Increments the test part result count and remembers the result.
-// This method is from the TestPartResultReporterInterface interface.
-void ScopedFakeTestPartResultReporter::ReportTestPartResult(
- const TestPartResult& result) {
- result_->Append(result);
-}
-
-namespace internal {
-
-// Returns the type ID of ::testing::Test. We should always call this
-// instead of GetTypeId< ::testing::Test>() to get the type ID of
-// testing::Test. This is to work around a suspected linker bug when
-// using Google Test as a framework on Mac OS X. The bug causes
-// GetTypeId< ::testing::Test>() to return different values depending
-// on whether the call is from the Google Test framework itself or
-// from user test code. GetTestTypeId() is guaranteed to always
-// return the same value, as it always calls GetTypeId<>() from the
-// gtest.cc, which is within the Google Test framework.
-TypeId GetTestTypeId() {
- return GetTypeId<Test>();
-}
-
-// The value of GetTestTypeId() as seen from within the Google Test
-// library. This is solely for testing GetTestTypeId().
-extern const TypeId kTestTypeIdInGoogleTest = GetTestTypeId();
-
-// This predicate-formatter checks that 'results' contains a test part
-// failure of the given type and that the failure message contains the
-// given substring.
-AssertionResult HasOneFailure(const char* /* results_expr */,
- const char* /* type_expr */,
- const char* /* substr_expr */,
- const TestPartResultArray& results,
- TestPartResult::Type type,
- const string& substr) {
- const std::string expected(type == TestPartResult::kFatalFailure ?
- "1 fatal failure" :
- "1 non-fatal failure");
- Message msg;
- if (results.size() != 1) {
- msg << "Expected: " << expected << "\n"
- << " Actual: " << results.size() << " failures";
- for (int i = 0; i < results.size(); i++) {
- msg << "\n" << results.GetTestPartResult(i);
- }
- return AssertionFailure() << msg;
- }
-
- const TestPartResult& r = results.GetTestPartResult(0);
- if (r.type() != type) {
- return AssertionFailure() << "Expected: " << expected << "\n"
- << " Actual:\n"
- << r;
- }
-
- if (strstr(r.message(), substr.c_str()) == NULL) {
- return AssertionFailure() << "Expected: " << expected << " containing \""
- << substr << "\"\n"
- << " Actual:\n"
- << r;
- }
-
- return AssertionSuccess();
-}
-
-// The constructor of SingleFailureChecker remembers where to look up
-// test part results, what type of failure we expect, and what
-// substring the failure message should contain.
-SingleFailureChecker:: SingleFailureChecker(
- const TestPartResultArray* results,
- TestPartResult::Type type,
- const string& substr)
- : results_(results),
- type_(type),
- substr_(substr) {}
-
-// The destructor of SingleFailureChecker verifies that the given
-// TestPartResultArray contains exactly one failure that has the given
-// type and contains the given substring. If that's not the case, a
-// non-fatal failure will be generated.
-SingleFailureChecker::~SingleFailureChecker() {
- EXPECT_PRED_FORMAT3(HasOneFailure, *results_, type_, substr_);
-}
-
-DefaultGlobalTestPartResultReporter::DefaultGlobalTestPartResultReporter(
- UnitTestImpl* unit_test) : unit_test_(unit_test) {}
-
-void DefaultGlobalTestPartResultReporter::ReportTestPartResult(
- const TestPartResult& result) {
- unit_test_->current_test_result()->AddTestPartResult(result);
- unit_test_->listeners()->repeater()->OnTestPartResult(result);
-}
-
-DefaultPerThreadTestPartResultReporter::DefaultPerThreadTestPartResultReporter(
- UnitTestImpl* unit_test) : unit_test_(unit_test) {}
-
-void DefaultPerThreadTestPartResultReporter::ReportTestPartResult(
- const TestPartResult& result) {
- unit_test_->GetGlobalTestPartResultReporter()->ReportTestPartResult(result);
-}
-
-// Returns the global test part result reporter.
-TestPartResultReporterInterface*
-UnitTestImpl::GetGlobalTestPartResultReporter() {
- internal::MutexLock lock(&global_test_part_result_reporter_mutex_);
- return global_test_part_result_repoter_;
-}
-
-// Sets the global test part result reporter.
-void UnitTestImpl::SetGlobalTestPartResultReporter(
- TestPartResultReporterInterface* reporter) {
- internal::MutexLock lock(&global_test_part_result_reporter_mutex_);
- global_test_part_result_repoter_ = reporter;
-}
-
-// Returns the test part result reporter for the current thread.
-TestPartResultReporterInterface*
-UnitTestImpl::GetTestPartResultReporterForCurrentThread() {
- return per_thread_test_part_result_reporter_.get();
-}
-
-// Sets the test part result reporter for the current thread.
-void UnitTestImpl::SetTestPartResultReporterForCurrentThread(
- TestPartResultReporterInterface* reporter) {
- per_thread_test_part_result_reporter_.set(reporter);
-}
-
-// Gets the number of successful test cases.
-int UnitTestImpl::successful_test_case_count() const {
- return CountIf(test_cases_, TestCasePassed);
-}
-
-// Gets the number of failed test cases.
-int UnitTestImpl::failed_test_case_count() const {
- return CountIf(test_cases_, TestCaseFailed);
-}
-
-// Gets the number of all test cases.
-int UnitTestImpl::total_test_case_count() const {
- return static_cast<int>(test_cases_.size());
-}
-
-// Gets the number of all test cases that contain at least one test
-// that should run.
-int UnitTestImpl::test_case_to_run_count() const {
- return CountIf(test_cases_, ShouldRunTestCase);
-}
-
-// Gets the number of successful tests.
-int UnitTestImpl::successful_test_count() const {
- return SumOverTestCaseList(test_cases_, &TestCase::successful_test_count);
-}
-
-// Gets the number of failed tests.
-int UnitTestImpl::failed_test_count() const {
- return SumOverTestCaseList(test_cases_, &TestCase::failed_test_count);
-}
-
-// Gets the number of disabled tests that will be reported in the XML report.
-int UnitTestImpl::reportable_disabled_test_count() const {
- return SumOverTestCaseList(test_cases_,
- &TestCase::reportable_disabled_test_count);
-}
-
-// Gets the number of disabled tests.
-int UnitTestImpl::disabled_test_count() const {
- return SumOverTestCaseList(test_cases_, &TestCase::disabled_test_count);
-}
-
-// Gets the number of tests to be printed in the XML report.
-int UnitTestImpl::reportable_test_count() const {
- return SumOverTestCaseList(test_cases_, &TestCase::reportable_test_count);
-}
-
-// Gets the number of all tests.
-int UnitTestImpl::total_test_count() const {
- return SumOverTestCaseList(test_cases_, &TestCase::total_test_count);
-}
-
-// Gets the number of tests that should run.
-int UnitTestImpl::test_to_run_count() const {
- return SumOverTestCaseList(test_cases_, &TestCase::test_to_run_count);
-}
-
-// Returns the current OS stack trace as an std::string.
-//
-// The maximum number of stack frames to be included is specified by
-// the gtest_stack_trace_depth flag. The skip_count parameter
-// specifies the number of top frames to be skipped, which doesn't
-// count against the number of frames to be included.
-//
-// For example, if Foo() calls Bar(), which in turn calls
-// CurrentOsStackTraceExceptTop(1), Foo() will be included in the
-// trace but Bar() and CurrentOsStackTraceExceptTop() won't.
-std::string UnitTestImpl::CurrentOsStackTraceExceptTop(int skip_count) {
- return os_stack_trace_getter()->CurrentStackTrace(
- static_cast<int>(GTEST_FLAG(stack_trace_depth)),
- skip_count + 1
- // Skips the user-specified number of frames plus this function
- // itself.
- ); // NOLINT
-}
-
-// Returns the current time in milliseconds.
-TimeInMillis GetTimeInMillis() {
-#if GTEST_OS_WINDOWS_MOBILE || defined(__BORLANDC__)
- // Difference between 1970-01-01 and 1601-01-01 in milliseconds.
- // http://analogous.blogspot.com/2005/04/epoch.html
- const TimeInMillis kJavaEpochToWinFileTimeDelta =
- static_cast<TimeInMillis>(116444736UL) * 100000UL;
- const DWORD kTenthMicrosInMilliSecond = 10000;
-
- SYSTEMTIME now_systime;
- FILETIME now_filetime;
- ULARGE_INTEGER now_int64;
- // TODO(kenton@google.com): Shouldn't this just use
- // GetSystemTimeAsFileTime()?
- GetSystemTime(&now_systime);
- if (SystemTimeToFileTime(&now_systime, &now_filetime)) {
- now_int64.LowPart = now_filetime.dwLowDateTime;
- now_int64.HighPart = now_filetime.dwHighDateTime;
- now_int64.QuadPart = (now_int64.QuadPart / kTenthMicrosInMilliSecond) -
- kJavaEpochToWinFileTimeDelta;
- return now_int64.QuadPart;
- }
- return 0;
-#elif GTEST_OS_WINDOWS && !GTEST_HAS_GETTIMEOFDAY_
- __timeb64 now;
-
- // MSVC 8 deprecates _ftime64(), so we want to suppress warning 4996
- // (deprecated function) there.
- // TODO(kenton@google.com): Use GetTickCount()? Or use
- // SystemTimeToFileTime()
- GTEST_DISABLE_MSC_WARNINGS_PUSH_(4996)
- _ftime64(&now);
- GTEST_DISABLE_MSC_WARNINGS_POP_()
-
- return static_cast<TimeInMillis>(now.time) * 1000 + now.millitm;
-#elif GTEST_HAS_GETTIMEOFDAY_
- struct timeval now;
- gettimeofday(&now, NULL);
- return static_cast<TimeInMillis>(now.tv_sec) * 1000 + now.tv_usec / 1000;
-#else
-# error "Don't know how to get the current time on your system."
-#endif
-}
-
-// Utilities
-
-// class String.
-
-#if GTEST_OS_WINDOWS_MOBILE
-// Creates a UTF-16 wide string from the given ANSI string, allocating
-// memory using new. The caller is responsible for deleting the return
-// value using delete[]. Returns the wide string, or NULL if the
-// input is NULL.
-LPCWSTR String::AnsiToUtf16(const char* ansi) {
- if (!ansi) return NULL;
- const int length = strlen(ansi);
- const int unicode_length =
- MultiByteToWideChar(CP_ACP, 0, ansi, length,
- NULL, 0);
- WCHAR* unicode = new WCHAR[unicode_length + 1];
- MultiByteToWideChar(CP_ACP, 0, ansi, length,
- unicode, unicode_length);
- unicode[unicode_length] = 0;
- return unicode;
-}
-
-// Creates an ANSI string from the given wide string, allocating
-// memory using new. The caller is responsible for deleting the return
-// value using delete[]. Returns the ANSI string, or NULL if the
-// input is NULL.
-const char* String::Utf16ToAnsi(LPCWSTR utf16_str) {
- if (!utf16_str) return NULL;
- const int ansi_length =
- WideCharToMultiByte(CP_ACP, 0, utf16_str, -1,
- NULL, 0, NULL, NULL);
- char* ansi = new char[ansi_length + 1];
- WideCharToMultiByte(CP_ACP, 0, utf16_str, -1,
- ansi, ansi_length, NULL, NULL);
- ansi[ansi_length] = 0;
- return ansi;
-}
-
-#endif // GTEST_OS_WINDOWS_MOBILE
-
-// Compares two C strings. Returns true iff they have the same content.
-//
-// Unlike strcmp(), this function can handle NULL argument(s). A NULL
-// C string is considered different to any non-NULL C string,
-// including the empty string.
-bool String::CStringEquals(const char * lhs, const char * rhs) {
- if ( lhs == NULL ) return rhs == NULL;
-
- if ( rhs == NULL ) return false;
-
- return strcmp(lhs, rhs) == 0;
-}
-
-#if GTEST_HAS_STD_WSTRING || GTEST_HAS_GLOBAL_WSTRING
-
-// Converts an array of wide chars to a narrow string using the UTF-8
-// encoding, and streams the result to the given Message object.
-static void StreamWideCharsToMessage(const wchar_t* wstr, size_t length,
- Message* msg) {
- for (size_t i = 0; i != length; ) { // NOLINT
- if (wstr[i] != L'\0') {
- *msg << WideStringToUtf8(wstr + i, static_cast<int>(length - i));
- while (i != length && wstr[i] != L'\0')
- i++;
- } else {
- *msg << '\0';
- i++;
- }
- }
-}
-
-#endif // GTEST_HAS_STD_WSTRING || GTEST_HAS_GLOBAL_WSTRING
-
-void SplitString(const ::std::string& str, char delimiter,
- ::std::vector< ::std::string>* dest) {
- ::std::vector< ::std::string> parsed;
- ::std::string::size_type pos = 0;
- while (::testing::internal::AlwaysTrue()) {
- const ::std::string::size_type colon = str.find(delimiter, pos);
- if (colon == ::std::string::npos) {
- parsed.push_back(str.substr(pos));
- break;
- } else {
- parsed.push_back(str.substr(pos, colon - pos));
- pos = colon + 1;
- }
- }
- dest->swap(parsed);
-}
-
-} // namespace internal
-
-// Constructs an empty Message.
-// We allocate the stringstream separately because otherwise each use of
-// ASSERT/EXPECT in a procedure adds over 200 bytes to the procedure's
-// stack frame leading to huge stack frames in some cases; gcc does not reuse
-// the stack space.
-Message::Message() : ss_(new ::std::stringstream) {
- // By default, we want there to be enough precision when printing
- // a double to a Message.
- *ss_ << std::setprecision(std::numeric_limits<double>::digits10 + 2);
-}
-
-// These two overloads allow streaming a wide C string to a Message
-// using the UTF-8 encoding.
-Message& Message::operator <<(const wchar_t* wide_c_str) {
- return *this << internal::String::ShowWideCString(wide_c_str);
-}
-Message& Message::operator <<(wchar_t* wide_c_str) {
- return *this << internal::String::ShowWideCString(wide_c_str);
-}
-
-#if GTEST_HAS_STD_WSTRING
-// Converts the given wide string to a narrow string using the UTF-8
-// encoding, and streams the result to this Message object.
-Message& Message::operator <<(const ::std::wstring& wstr) {
- internal::StreamWideCharsToMessage(wstr.c_str(), wstr.length(), this);
- return *this;
-}
-#endif // GTEST_HAS_STD_WSTRING
-
-#if GTEST_HAS_GLOBAL_WSTRING
-// Converts the given wide string to a narrow string using the UTF-8
-// encoding, and streams the result to this Message object.
-Message& Message::operator <<(const ::wstring& wstr) {
- internal::StreamWideCharsToMessage(wstr.c_str(), wstr.length(), this);
- return *this;
-}
-#endif // GTEST_HAS_GLOBAL_WSTRING
-
-// Gets the text streamed to this object so far as an std::string.
-// Each '\0' character in the buffer is replaced with "\\0".
-std::string Message::GetString() const {
- return internal::StringStreamToString(ss_.get());
-}
-
-// AssertionResult constructors.
-// Used in EXPECT_TRUE/FALSE(assertion_result).
-AssertionResult::AssertionResult(const AssertionResult& other)
- : success_(other.success_),
- message_(other.message_.get() != NULL ?
- new ::std::string(*other.message_) :
- static_cast< ::std::string*>(NULL)) {
-}
-
-// Swaps two AssertionResults.
-void AssertionResult::swap(AssertionResult& other) {
- using std::swap;
- swap(success_, other.success_);
- swap(message_, other.message_);
-}
-
-// Returns the assertion's negation. Used with EXPECT/ASSERT_FALSE.
-AssertionResult AssertionResult::operator!() const {
- AssertionResult negation(!success_);
- if (message_.get() != NULL)
- negation << *message_;
- return negation;
-}
-
-// Makes a successful assertion result.
-AssertionResult AssertionSuccess() {
- return AssertionResult(true);
-}
-
-// Makes a failed assertion result.
-AssertionResult AssertionFailure() {
- return AssertionResult(false);
-}
-
-// Makes a failed assertion result with the given failure message.
-// Deprecated; use AssertionFailure() << message.
-AssertionResult AssertionFailure(const Message& message) {
- return AssertionFailure() << message;
-}
-
-namespace internal {
-
-namespace edit_distance {
-std::vector<EditType> CalculateOptimalEdits(const std::vector<size_t>& left,
- const std::vector<size_t>& right) {
- std::vector<std::vector<double> > costs(
- left.size() + 1, std::vector<double>(right.size() + 1));
- std::vector<std::vector<EditType> > best_move(
- left.size() + 1, std::vector<EditType>(right.size() + 1));
-
- // Populate for empty right.
- for (size_t l_i = 0; l_i < costs.size(); ++l_i) {
- costs[l_i][0] = static_cast<double>(l_i);
- best_move[l_i][0] = kRemove;
- }
- // Populate for empty left.
- for (size_t r_i = 1; r_i < costs[0].size(); ++r_i) {
- costs[0][r_i] = static_cast<double>(r_i);
- best_move[0][r_i] = kAdd;
- }
-
- for (size_t l_i = 0; l_i < left.size(); ++l_i) {
- for (size_t r_i = 0; r_i < right.size(); ++r_i) {
- if (left[l_i] == right[r_i]) {
- // Found a match. Consume it.
- costs[l_i + 1][r_i + 1] = costs[l_i][r_i];
- best_move[l_i + 1][r_i + 1] = kMatch;
- continue;
- }
-
- const double add = costs[l_i + 1][r_i];
- const double remove = costs[l_i][r_i + 1];
- const double replace = costs[l_i][r_i];
- if (add < remove && add < replace) {
- costs[l_i + 1][r_i + 1] = add + 1;
- best_move[l_i + 1][r_i + 1] = kAdd;
- } else if (remove < add && remove < replace) {
- costs[l_i + 1][r_i + 1] = remove + 1;
- best_move[l_i + 1][r_i + 1] = kRemove;
- } else {
- // We make replace a little more expensive than add/remove to lower
- // their priority.
- costs[l_i + 1][r_i + 1] = replace + 1.00001;
- best_move[l_i + 1][r_i + 1] = kReplace;
- }
- }
- }
-
- // Reconstruct the best path. We do it in reverse order.
- std::vector<EditType> best_path;
- for (size_t l_i = left.size(), r_i = right.size(); l_i > 0 || r_i > 0;) {
- EditType move = best_move[l_i][r_i];
- best_path.push_back(move);
- l_i -= move != kAdd;
- r_i -= move != kRemove;
- }
- std::reverse(best_path.begin(), best_path.end());
- return best_path;
-}
-
-namespace {
-
-// Helper class to convert string into ids with deduplication.
-class InternalStrings {
- public:
- size_t GetId(const std::string& str) {
- IdMap::iterator it = ids_.find(str);
- if (it != ids_.end()) return it->second;
- size_t id = ids_.size();
- return ids_[str] = id;
- }
-
- private:
- typedef std::map<std::string, size_t> IdMap;
- IdMap ids_;
-};
-
-} // namespace
-
-std::vector<EditType> CalculateOptimalEdits(
- const std::vector<std::string>& left,
- const std::vector<std::string>& right) {
- std::vector<size_t> left_ids, right_ids;
- {
- InternalStrings intern_table;
- for (size_t i = 0; i < left.size(); ++i) {
- left_ids.push_back(intern_table.GetId(left[i]));
- }
- for (size_t i = 0; i < right.size(); ++i) {
- right_ids.push_back(intern_table.GetId(right[i]));
- }
- }
- return CalculateOptimalEdits(left_ids, right_ids);
-}
-
-namespace {
-
-// Helper class that holds the state for one hunk and prints it out to the
-// stream.
-// It reorders adds/removes when possible to group all removes before all
-// adds. It also adds the hunk header before printint into the stream.
-class Hunk {
- public:
- Hunk(size_t left_start, size_t right_start)
- : left_start_(left_start),
- right_start_(right_start),
- adds_(),
- removes_(),
- common_() {}
-
- void PushLine(char edit, const char* line) {
- switch (edit) {
- case ' ':
- ++common_;
- FlushEdits();
- hunk_.push_back(std::make_pair(' ', line));
- break;
- case '-':
- ++removes_;
- hunk_removes_.push_back(std::make_pair('-', line));
- break;
- case '+':
- ++adds_;
- hunk_adds_.push_back(std::make_pair('+', line));
- break;
- }
- }
-
- void PrintTo(std::ostream* os) {
- PrintHeader(os);
- FlushEdits();
- for (std::list<std::pair<char, const char*> >::const_iterator it =
- hunk_.begin();
- it != hunk_.end(); ++it) {
- *os << it->first << it->second << "\n";
- }
- }
-
- bool has_edits() const { return adds_ || removes_; }
-
- private:
- void FlushEdits() {
- hunk_.splice(hunk_.end(), hunk_removes_);
- hunk_.splice(hunk_.end(), hunk_adds_);
- }
-
- // Print a unified diff header for one hunk.
- // The format is
- // "@@ -<left_start>,<left_length> +<right_start>,<right_length> @@"
- // where the left/right parts are ommitted if unnecessary.
- void PrintHeader(std::ostream* ss) const {
- *ss << "@@ ";
- if (removes_) {
- *ss << "-" << left_start_ << "," << (removes_ + common_);
- }
- if (removes_ && adds_) {
- *ss << " ";
- }
- if (adds_) {
- *ss << "+" << right_start_ << "," << (adds_ + common_);
- }
- *ss << " @@\n";
- }
-
- size_t left_start_, right_start_;
- size_t adds_, removes_, common_;
- std::list<std::pair<char, const char*> > hunk_, hunk_adds_, hunk_removes_;
-};
-
-} // namespace
-
-// Create a list of diff hunks in Unified diff format.
-// Each hunk has a header generated by PrintHeader above plus a body with
-// lines prefixed with ' ' for no change, '-' for deletion and '+' for
-// addition.
-// 'context' represents the desired unchanged prefix/suffix around the diff.
-// If two hunks are close enough that their contexts overlap, then they are
-// joined into one hunk.
-std::string CreateUnifiedDiff(const std::vector<std::string>& left,
- const std::vector<std::string>& right,
- size_t context) {
- const std::vector<EditType> edits = CalculateOptimalEdits(left, right);
-
- size_t l_i = 0, r_i = 0, edit_i = 0;
- std::stringstream ss;
- while (edit_i < edits.size()) {
- // Find first edit.
- while (edit_i < edits.size() && edits[edit_i] == kMatch) {
- ++l_i;
- ++r_i;
- ++edit_i;
- }
-
- // Find the first line to include in the hunk.
- const size_t prefix_context = std::min(l_i, context);
- Hunk hunk(l_i - prefix_context + 1, r_i - prefix_context + 1);
- for (size_t i = prefix_context; i > 0; --i) {
- hunk.PushLine(' ', left[l_i - i].c_str());
- }
-
- // Iterate the edits until we found enough suffix for the hunk or the input
- // is over.
- size_t n_suffix = 0;
- for (; edit_i < edits.size(); ++edit_i) {
- if (n_suffix >= context) {
- // Continue only if the next hunk is very close.
- std::vector<EditType>::const_iterator it = edits.begin() + edit_i;
- while (it != edits.end() && *it == kMatch) ++it;
- if (it == edits.end() || (it - edits.begin()) - edit_i >= context) {
- // There is no next edit or it is too far away.
- break;
- }
- }
-
- EditType edit = edits[edit_i];
- // Reset count when a non match is found.
- n_suffix = edit == kMatch ? n_suffix + 1 : 0;
-
- if (edit == kMatch || edit == kRemove || edit == kReplace) {
- hunk.PushLine(edit == kMatch ? ' ' : '-', left[l_i].c_str());
- }
- if (edit == kAdd || edit == kReplace) {
- hunk.PushLine('+', right[r_i].c_str());
- }
-
- // Advance indices, depending on edit type.
- l_i += edit != kAdd;
- r_i += edit != kRemove;
- }
-
- if (!hunk.has_edits()) {
- // We are done. We don't want this hunk.
- break;
- }
-
- hunk.PrintTo(&ss);
- }
- return ss.str();
-}
-
-} // namespace edit_distance
-
-namespace {
-
-// The string representation of the values received in EqFailure() are already
-// escaped. Split them on escaped '\n' boundaries. Leave all other escaped
-// characters the same.
-std::vector<std::string> SplitEscapedString(const std::string& str) {
- std::vector<std::string> lines;
- size_t start = 0, end = str.size();
- if (end > 2 && str[0] == '"' && str[end - 1] == '"') {
- ++start;
- --end;
- }
- bool escaped = false;
- for (size_t i = start; i + 1 < end; ++i) {
- if (escaped) {
- escaped = false;
- if (str[i] == 'n') {
- lines.push_back(str.substr(start, i - start - 1));
- start = i + 1;
- }
- } else {
- escaped = str[i] == '\\';
- }
- }
- lines.push_back(str.substr(start, end - start));
- return lines;
-}
-
-} // namespace
-
-// Constructs and returns the message for an equality assertion
-// (e.g. ASSERT_EQ, EXPECT_STREQ, etc) failure.
-//
-// The first four parameters are the expressions used in the assertion
-// and their values, as strings. For example, for ASSERT_EQ(foo, bar)
-// where foo is 5 and bar is 6, we have:
-//
-// lhs_expression: "foo"
-// rhs_expression: "bar"
-// lhs_value: "5"
-// rhs_value: "6"
-//
-// The ignoring_case parameter is true iff the assertion is a
-// *_STRCASEEQ*. When it's true, the string "Ignoring case" will
-// be inserted into the message.
-AssertionResult EqFailure(const char* lhs_expression,
- const char* rhs_expression,
- const std::string& lhs_value,
- const std::string& rhs_value,
- bool ignoring_case) {
- Message msg;
- msg << " Expected: " << lhs_expression;
- if (lhs_value != lhs_expression) {
- msg << "\n Which is: " << lhs_value;
- }
- msg << "\nTo be equal to: " << rhs_expression;
- if (rhs_value != rhs_expression) {
- msg << "\n Which is: " << rhs_value;
- }
-
- if (ignoring_case) {
- msg << "\nIgnoring case";
- }
-
- if (!lhs_value.empty() && !rhs_value.empty()) {
- const std::vector<std::string> lhs_lines =
- SplitEscapedString(lhs_value);
- const std::vector<std::string> rhs_lines =
- SplitEscapedString(rhs_value);
- if (lhs_lines.size() > 1 || rhs_lines.size() > 1) {
- msg << "\nWith diff:\n"
- << edit_distance::CreateUnifiedDiff(lhs_lines, rhs_lines);
- }
- }
-
- return AssertionFailure() << msg;
-}
-
-// Constructs a failure message for Boolean assertions such as EXPECT_TRUE.
-std::string GetBoolAssertionFailureMessage(
- const AssertionResult& assertion_result,
- const char* expression_text,
- const char* actual_predicate_value,
- const char* expected_predicate_value) {
- const char* actual_message = assertion_result.message();
- Message msg;
- msg << "Value of: " << expression_text
- << "\n Actual: " << actual_predicate_value;
- if (actual_message[0] != '\0')
- msg << " (" << actual_message << ")";
- msg << "\nExpected: " << expected_predicate_value;
- return msg.GetString();
-}
-
-// Helper function for implementing ASSERT_NEAR.
-AssertionResult DoubleNearPredFormat(const char* expr1,
- const char* expr2,
- const char* abs_error_expr,
- double val1,
- double val2,
- double abs_error) {
- const double diff = fabs(val1 - val2);
- if (diff <= abs_error) return AssertionSuccess();
-
- // TODO(wan): do not print the value of an expression if it's
- // already a literal.
- return AssertionFailure()
- << "The difference between " << expr1 << " and " << expr2
- << " is " << diff << ", which exceeds " << abs_error_expr << ", where\n"
- << expr1 << " evaluates to " << val1 << ",\n"
- << expr2 << " evaluates to " << val2 << ", and\n"
- << abs_error_expr << " evaluates to " << abs_error << ".";
-}
-
-
-// Helper template for implementing FloatLE() and DoubleLE().
-template <typename RawType>
-AssertionResult FloatingPointLE(const char* expr1,
- const char* expr2,
- RawType val1,
- RawType val2) {
- // Returns success if val1 is less than val2,
- if (val1 < val2) {
- return AssertionSuccess();
- }
-
- // or if val1 is almost equal to val2.
- const FloatingPoint<RawType> lhs(val1), rhs(val2);
- if (lhs.AlmostEquals(rhs)) {
- return AssertionSuccess();
- }
-
- // Note that the above two checks will both fail if either val1 or
- // val2 is NaN, as the IEEE floating-point standard requires that
- // any predicate involving a NaN must return false.
-
- ::std::stringstream val1_ss;
- val1_ss << std::setprecision(std::numeric_limits<RawType>::digits10 + 2)
- << val1;
-
- ::std::stringstream val2_ss;
- val2_ss << std::setprecision(std::numeric_limits<RawType>::digits10 + 2)
- << val2;
-
- return AssertionFailure()
- << "Expected: (" << expr1 << ") <= (" << expr2 << ")\n"
- << " Actual: " << StringStreamToString(&val1_ss) << " vs "
- << StringStreamToString(&val2_ss);
-}
-
-} // namespace internal
-
-// Asserts that val1 is less than, or almost equal to, val2. Fails
-// otherwise. In particular, it fails if either val1 or val2 is NaN.
-AssertionResult FloatLE(const char* expr1, const char* expr2,
- float val1, float val2) {
- return internal::FloatingPointLE<float>(expr1, expr2, val1, val2);
-}
-
-// Asserts that val1 is less than, or almost equal to, val2. Fails
-// otherwise. In particular, it fails if either val1 or val2 is NaN.
-AssertionResult DoubleLE(const char* expr1, const char* expr2,
- double val1, double val2) {
- return internal::FloatingPointLE<double>(expr1, expr2, val1, val2);
-}
-
-namespace internal {
-
-// The helper function for {ASSERT|EXPECT}_EQ with int or enum
-// arguments.
-AssertionResult CmpHelperEQ(const char* lhs_expression,
- const char* rhs_expression,
- BiggestInt lhs,
- BiggestInt rhs) {
- if (lhs == rhs) {
- return AssertionSuccess();
- }
-
- return EqFailure(lhs_expression,
- rhs_expression,
- FormatForComparisonFailureMessage(lhs, rhs),
- FormatForComparisonFailureMessage(rhs, lhs),
- false);
-}
-
-// A macro for implementing the helper functions needed to implement
-// ASSERT_?? and EXPECT_?? with integer or enum arguments. It is here
-// just to avoid copy-and-paste of similar code.
-#define GTEST_IMPL_CMP_HELPER_(op_name, op)\
-AssertionResult CmpHelper##op_name(const char* expr1, const char* expr2, \
- BiggestInt val1, BiggestInt val2) {\
- if (val1 op val2) {\
- return AssertionSuccess();\
- } else {\
- return AssertionFailure() \
- << "Expected: (" << expr1 << ") " #op " (" << expr2\
- << "), actual: " << FormatForComparisonFailureMessage(val1, val2)\
- << " vs " << FormatForComparisonFailureMessage(val2, val1);\
- }\
-}
-
-// Implements the helper function for {ASSERT|EXPECT}_NE with int or
-// enum arguments.
-GTEST_IMPL_CMP_HELPER_(NE, !=)
-// Implements the helper function for {ASSERT|EXPECT}_LE with int or
-// enum arguments.
-GTEST_IMPL_CMP_HELPER_(LE, <=)
-// Implements the helper function for {ASSERT|EXPECT}_LT with int or
-// enum arguments.
-GTEST_IMPL_CMP_HELPER_(LT, < )
-// Implements the helper function for {ASSERT|EXPECT}_GE with int or
-// enum arguments.
-GTEST_IMPL_CMP_HELPER_(GE, >=)
-// Implements the helper function for {ASSERT|EXPECT}_GT with int or
-// enum arguments.
-GTEST_IMPL_CMP_HELPER_(GT, > )
-
-#undef GTEST_IMPL_CMP_HELPER_
-
-// The helper function for {ASSERT|EXPECT}_STREQ.
-AssertionResult CmpHelperSTREQ(const char* lhs_expression,
- const char* rhs_expression,
- const char* lhs,
- const char* rhs) {
- if (String::CStringEquals(lhs, rhs)) {
- return AssertionSuccess();
- }
-
- return EqFailure(lhs_expression,
- rhs_expression,
- PrintToString(lhs),
- PrintToString(rhs),
- false);
-}
-
-// The helper function for {ASSERT|EXPECT}_STRCASEEQ.
-AssertionResult CmpHelperSTRCASEEQ(const char* lhs_expression,
- const char* rhs_expression,
- const char* lhs,
- const char* rhs) {
- if (String::CaseInsensitiveCStringEquals(lhs, rhs)) {
- return AssertionSuccess();
- }
-
- return EqFailure(lhs_expression,
- rhs_expression,
- PrintToString(lhs),
- PrintToString(rhs),
- true);
-}
-
-// The helper function for {ASSERT|EXPECT}_STRNE.
-AssertionResult CmpHelperSTRNE(const char* s1_expression,
- const char* s2_expression,
- const char* s1,
- const char* s2) {
- if (!String::CStringEquals(s1, s2)) {
- return AssertionSuccess();
- } else {
- return AssertionFailure() << "Expected: (" << s1_expression << ") != ("
- << s2_expression << "), actual: \""
- << s1 << "\" vs \"" << s2 << "\"";
- }
-}
-
-// The helper function for {ASSERT|EXPECT}_STRCASENE.
-AssertionResult CmpHelperSTRCASENE(const char* s1_expression,
- const char* s2_expression,
- const char* s1,
- const char* s2) {
- if (!String::CaseInsensitiveCStringEquals(s1, s2)) {
- return AssertionSuccess();
- } else {
- return AssertionFailure()
- << "Expected: (" << s1_expression << ") != ("
- << s2_expression << ") (ignoring case), actual: \""
- << s1 << "\" vs \"" << s2 << "\"";
- }
-}
-
-} // namespace internal
-
-namespace {
-
-// Helper functions for implementing IsSubString() and IsNotSubstring().
-
-// This group of overloaded functions return true iff needle is a
-// substring of haystack. NULL is considered a substring of itself
-// only.
-
-bool IsSubstringPred(const char* needle, const char* haystack) {
- if (needle == NULL || haystack == NULL)
- return needle == haystack;
-
- return strstr(haystack, needle) != NULL;
-}
-
-bool IsSubstringPred(const wchar_t* needle, const wchar_t* haystack) {
- if (needle == NULL || haystack == NULL)
- return needle == haystack;
-
- return wcsstr(haystack, needle) != NULL;
-}
-
-// StringType here can be either ::std::string or ::std::wstring.
-template <typename StringType>
-bool IsSubstringPred(const StringType& needle,
- const StringType& haystack) {
- return haystack.find(needle) != StringType::npos;
-}
-
-// This function implements either IsSubstring() or IsNotSubstring(),
-// depending on the value of the expected_to_be_substring parameter.
-// StringType here can be const char*, const wchar_t*, ::std::string,
-// or ::std::wstring.
-template <typename StringType>
-AssertionResult IsSubstringImpl(
- bool expected_to_be_substring,
- const char* needle_expr, const char* haystack_expr,
- const StringType& needle, const StringType& haystack) {
- if (IsSubstringPred(needle, haystack) == expected_to_be_substring)
- return AssertionSuccess();
-
- const bool is_wide_string = sizeof(needle[0]) > 1;
- const char* const begin_string_quote = is_wide_string ? "L\"" : "\"";
- return AssertionFailure()
- << "Value of: " << needle_expr << "\n"
- << " Actual: " << begin_string_quote << needle << "\"\n"
- << "Expected: " << (expected_to_be_substring ? "" : "not ")
- << "a substring of " << haystack_expr << "\n"
- << "Which is: " << begin_string_quote << haystack << "\"";
-}
-
-} // namespace
-
-// IsSubstring() and IsNotSubstring() check whether needle is a
-// substring of haystack (NULL is considered a substring of itself
-// only), and return an appropriate error message when they fail.
-
-AssertionResult IsSubstring(
- const char* needle_expr, const char* haystack_expr,
- const char* needle, const char* haystack) {
- return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack);
-}
-
-AssertionResult IsSubstring(
- const char* needle_expr, const char* haystack_expr,
- const wchar_t* needle, const wchar_t* haystack) {
- return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack);
-}
-
-AssertionResult IsNotSubstring(
- const char* needle_expr, const char* haystack_expr,
- const char* needle, const char* haystack) {
- return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack);
-}
-
-AssertionResult IsNotSubstring(
- const char* needle_expr, const char* haystack_expr,
- const wchar_t* needle, const wchar_t* haystack) {
- return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack);
-}
-
-AssertionResult IsSubstring(
- const char* needle_expr, const char* haystack_expr,
- const ::std::string& needle, const ::std::string& haystack) {
- return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack);
-}
-
-AssertionResult IsNotSubstring(
- const char* needle_expr, const char* haystack_expr,
- const ::std::string& needle, const ::std::string& haystack) {
- return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack);
-}
-
-#if GTEST_HAS_STD_WSTRING
-AssertionResult IsSubstring(
- const char* needle_expr, const char* haystack_expr,
- const ::std::wstring& needle, const ::std::wstring& haystack) {
- return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack);
-}
-
-AssertionResult IsNotSubstring(
- const char* needle_expr, const char* haystack_expr,
- const ::std::wstring& needle, const ::std::wstring& haystack) {
- return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack);
-}
-#endif // GTEST_HAS_STD_WSTRING
-
-namespace internal {
-
-#if GTEST_OS_WINDOWS
-
-namespace {
-
-// Helper function for IsHRESULT{SuccessFailure} predicates
-AssertionResult HRESULTFailureHelper(const char* expr,
- const char* expected,
- long hr) { // NOLINT
-# if GTEST_OS_WINDOWS_MOBILE
-
- // Windows CE doesn't support FormatMessage.
- const char error_text[] = "";
-
-# else
-
- // Looks up the human-readable system message for the HRESULT code
- // and since we're not passing any params to FormatMessage, we don't
- // want inserts expanded.
- const DWORD kFlags = FORMAT_MESSAGE_FROM_SYSTEM |
- FORMAT_MESSAGE_IGNORE_INSERTS;
- const DWORD kBufSize = 4096;
- // Gets the system's human readable message string for this HRESULT.
- char error_text[kBufSize] = { '\0' };
- DWORD message_length = ::FormatMessageA(kFlags,
- 0, // no source, we're asking system
- hr, // the error
- 0, // no line width restrictions
- error_text, // output buffer
- kBufSize, // buf size
- NULL); // no arguments for inserts
- // Trims tailing white space (FormatMessage leaves a trailing CR-LF)
- for (; message_length && IsSpace(error_text[message_length - 1]);
- --message_length) {
- error_text[message_length - 1] = '\0';
- }
-
-# endif // GTEST_OS_WINDOWS_MOBILE
-
- const std::string error_hex("0x" + String::FormatHexInt(hr));
- return ::testing::AssertionFailure()
- << "Expected: " << expr << " " << expected << ".\n"
- << " Actual: " << error_hex << " " << error_text << "\n";
-}
-
-} // namespace
-
-AssertionResult IsHRESULTSuccess(const char* expr, long hr) { // NOLINT
- if (SUCCEEDED(hr)) {
- return AssertionSuccess();
- }
- return HRESULTFailureHelper(expr, "succeeds", hr);
-}
-
-AssertionResult IsHRESULTFailure(const char* expr, long hr) { // NOLINT
- if (FAILED(hr)) {
- return AssertionSuccess();
- }
- return HRESULTFailureHelper(expr, "fails", hr);
-}
-
-#endif // GTEST_OS_WINDOWS
-
-// Utility functions for encoding Unicode text (wide strings) in
-// UTF-8.
-
-// A Unicode code-point can have upto 21 bits, and is encoded in UTF-8
-// like this:
-//
-// Code-point length Encoding
-// 0 - 7 bits 0xxxxxxx
-// 8 - 11 bits 110xxxxx 10xxxxxx
-// 12 - 16 bits 1110xxxx 10xxxxxx 10xxxxxx
-// 17 - 21 bits 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
-
-// The maximum code-point a one-byte UTF-8 sequence can represent.
-const UInt32 kMaxCodePoint1 = (static_cast<UInt32>(1) << 7) - 1;
-
-// The maximum code-point a two-byte UTF-8 sequence can represent.
-const UInt32 kMaxCodePoint2 = (static_cast<UInt32>(1) << (5 + 6)) - 1;
-
-// The maximum code-point a three-byte UTF-8 sequence can represent.
-const UInt32 kMaxCodePoint3 = (static_cast<UInt32>(1) << (4 + 2*6)) - 1;
-
-// The maximum code-point a four-byte UTF-8 sequence can represent.
-const UInt32 kMaxCodePoint4 = (static_cast<UInt32>(1) << (3 + 3*6)) - 1;
-
-// Chops off the n lowest bits from a bit pattern. Returns the n
-// lowest bits. As a side effect, the original bit pattern will be
-// shifted to the right by n bits.
-inline UInt32 ChopLowBits(UInt32* bits, int n) {
- const UInt32 low_bits = *bits & ((static_cast<UInt32>(1) << n) - 1);
- *bits >>= n;
- return low_bits;
-}
-
-// Converts a Unicode code point to a narrow string in UTF-8 encoding.
-// code_point parameter is of type UInt32 because wchar_t may not be
-// wide enough to contain a code point.
-// If the code_point is not a valid Unicode code point
-// (i.e. outside of Unicode range U+0 to U+10FFFF) it will be converted
-// to "(Invalid Unicode 0xXXXXXXXX)".
-std::string CodePointToUtf8(UInt32 code_point) {
- if (code_point > kMaxCodePoint4) {
- return "(Invalid Unicode 0x" + String::FormatHexInt(code_point) + ")";
- }
-
- char str[5]; // Big enough for the largest valid code point.
- if (code_point <= kMaxCodePoint1) {
- str[1] = '\0';
- str[0] = static_cast<char>(code_point); // 0xxxxxxx
- } else if (code_point <= kMaxCodePoint2) {
- str[2] = '\0';
- str[1] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx
- str[0] = static_cast<char>(0xC0 | code_point); // 110xxxxx
- } else if (code_point <= kMaxCodePoint3) {
- str[3] = '\0';
- str[2] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx
- str[1] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx
- str[0] = static_cast<char>(0xE0 | code_point); // 1110xxxx
- } else { // code_point <= kMaxCodePoint4
- str[4] = '\0';
- str[3] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx
- str[2] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx
- str[1] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx
- str[0] = static_cast<char>(0xF0 | code_point); // 11110xxx
- }
- return str;
-}
-
-// The following two functions only make sense if the the system
-// uses UTF-16 for wide string encoding. All supported systems
-// with 16 bit wchar_t (Windows, Cygwin, Symbian OS) do use UTF-16.
-
-// Determines if the arguments constitute UTF-16 surrogate pair
-// and thus should be combined into a single Unicode code point
-// using CreateCodePointFromUtf16SurrogatePair.
-inline bool IsUtf16SurrogatePair(wchar_t first, wchar_t second) {
- return sizeof(wchar_t) == 2 &&
- (first & 0xFC00) == 0xD800 && (second & 0xFC00) == 0xDC00;
-}
-
-// Creates a Unicode code point from UTF16 surrogate pair.
-inline UInt32 CreateCodePointFromUtf16SurrogatePair(wchar_t first,
- wchar_t second) {
- const UInt32 mask = (1 << 10) - 1;
- return (sizeof(wchar_t) == 2) ?
- (((first & mask) << 10) | (second & mask)) + 0x10000 :
- // This function should not be called when the condition is
- // false, but we provide a sensible default in case it is.
- static_cast<UInt32>(first);
-}
-
-// Converts a wide string to a narrow string in UTF-8 encoding.
-// The wide string is assumed to have the following encoding:
-// UTF-16 if sizeof(wchar_t) == 2 (on Windows, Cygwin, Symbian OS)
-// UTF-32 if sizeof(wchar_t) == 4 (on Linux)
-// Parameter str points to a null-terminated wide string.
-// Parameter num_chars may additionally limit the number
-// of wchar_t characters processed. -1 is used when the entire string
-// should be processed.
-// If the string contains code points that are not valid Unicode code points
-// (i.e. outside of Unicode range U+0 to U+10FFFF) they will be output
-// as '(Invalid Unicode 0xXXXXXXXX)'. If the string is in UTF16 encoding
-// and contains invalid UTF-16 surrogate pairs, values in those pairs
-// will be encoded as individual Unicode characters from Basic Normal Plane.
-std::string WideStringToUtf8(const wchar_t* str, int num_chars) {
- if (num_chars == -1)
- num_chars = static_cast<int>(wcslen(str));
-
- ::std::stringstream stream;
- for (int i = 0; i < num_chars; ++i) {
- UInt32 unicode_code_point;
-
- if (str[i] == L'\0') {
- break;
- } else if (i + 1 < num_chars && IsUtf16SurrogatePair(str[i], str[i + 1])) {
- unicode_code_point = CreateCodePointFromUtf16SurrogatePair(str[i],
- str[i + 1]);
- i++;
- } else {
- unicode_code_point = static_cast<UInt32>(str[i]);
- }
-
- stream << CodePointToUtf8(unicode_code_point);
- }
- return StringStreamToString(&stream);
-}
-
-// Converts a wide C string to an std::string using the UTF-8 encoding.
-// NULL will be converted to "(null)".
-std::string String::ShowWideCString(const wchar_t * wide_c_str) {
- if (wide_c_str == NULL) return "(null)";
-
- return internal::WideStringToUtf8(wide_c_str, -1);
-}
-
-// Compares two wide C strings. Returns true iff they have the same
-// content.
-//
-// Unlike wcscmp(), this function can handle NULL argument(s). A NULL
-// C string is considered different to any non-NULL C string,
-// including the empty string.
-bool String::WideCStringEquals(const wchar_t * lhs, const wchar_t * rhs) {
- if (lhs == NULL) return rhs == NULL;
-
- if (rhs == NULL) return false;
-
- return wcscmp(lhs, rhs) == 0;
-}
-
-// Helper function for *_STREQ on wide strings.
-AssertionResult CmpHelperSTREQ(const char* lhs_expression,
- const char* rhs_expression,
- const wchar_t* lhs,
- const wchar_t* rhs) {
- if (String::WideCStringEquals(lhs, rhs)) {
- return AssertionSuccess();
- }
-
- return EqFailure(lhs_expression,
- rhs_expression,
- PrintToString(lhs),
- PrintToString(rhs),
- false);
-}
-
-// Helper function for *_STRNE on wide strings.
-AssertionResult CmpHelperSTRNE(const char* s1_expression,
- const char* s2_expression,
- const wchar_t* s1,
- const wchar_t* s2) {
- if (!String::WideCStringEquals(s1, s2)) {
- return AssertionSuccess();
- }
-
- return AssertionFailure() << "Expected: (" << s1_expression << ") != ("
- << s2_expression << "), actual: "
- << PrintToString(s1)
- << " vs " << PrintToString(s2);
-}
-
-// Compares two C strings, ignoring case. Returns true iff they have
-// the same content.
-//
-// Unlike strcasecmp(), this function can handle NULL argument(s). A
-// NULL C string is considered different to any non-NULL C string,
-// including the empty string.
-bool String::CaseInsensitiveCStringEquals(const char * lhs, const char * rhs) {
- if (lhs == NULL)
- return rhs == NULL;
- if (rhs == NULL)
- return false;
- return posix::StrCaseCmp(lhs, rhs) == 0;
-}
-
- // Compares two wide C strings, ignoring case. Returns true iff they
- // have the same content.
- //
- // Unlike wcscasecmp(), this function can handle NULL argument(s).
- // A NULL C string is considered different to any non-NULL wide C string,
- // including the empty string.
- // NB: The implementations on different platforms slightly differ.
- // On windows, this method uses _wcsicmp which compares according to LC_CTYPE
- // environment variable. On GNU platform this method uses wcscasecmp
- // which compares according to LC_CTYPE category of the current locale.
- // On MacOS X, it uses towlower, which also uses LC_CTYPE category of the
- // current locale.
-bool String::CaseInsensitiveWideCStringEquals(const wchar_t* lhs,
- const wchar_t* rhs) {
- if (lhs == NULL) return rhs == NULL;
-
- if (rhs == NULL) return false;
-
-#if GTEST_OS_WINDOWS
- return _wcsicmp(lhs, rhs) == 0;
-#elif GTEST_OS_LINUX && !GTEST_OS_LINUX_ANDROID
- return wcscasecmp(lhs, rhs) == 0;
-#else
- // Android, Mac OS X and Cygwin don't define wcscasecmp.
- // Other unknown OSes may not define it either.
- wint_t left, right;
- do {
- left = towlower(*lhs++);
- right = towlower(*rhs++);
- } while (left && left == right);
- return left == right;
-#endif // OS selector
-}
-
-// Returns true iff str ends with the given suffix, ignoring case.
-// Any string is considered to end with an empty suffix.
-bool String::EndsWithCaseInsensitive(
- const std::string& str, const std::string& suffix) {
- const size_t str_len = str.length();
- const size_t suffix_len = suffix.length();
- return (str_len >= suffix_len) &&
- CaseInsensitiveCStringEquals(str.c_str() + str_len - suffix_len,
- suffix.c_str());
-}
-
-// Formats an int value as "%02d".
-std::string String::FormatIntWidth2(int value) {
- std::stringstream ss;
- ss << std::setfill('0') << std::setw(2) << value;
- return ss.str();
-}
-
-// Formats an int value as "%X".
-std::string String::FormatHexInt(int value) {
- std::stringstream ss;
- ss << std::hex << std::uppercase << value;
- return ss.str();
-}
-
-// Formats a byte as "%02X".
-std::string String::FormatByte(unsigned char value) {
- std::stringstream ss;
- ss << std::setfill('0') << std::setw(2) << std::hex << std::uppercase
- << static_cast<unsigned int>(value);
- return ss.str();
-}
-
-// Converts the buffer in a stringstream to an std::string, converting NUL
-// bytes to "\\0" along the way.
-std::string StringStreamToString(::std::stringstream* ss) {
- const ::std::string& str = ss->str();
- const char* const start = str.c_str();
- const char* const end = start + str.length();
-
- std::string result;
- result.reserve(2 * (end - start));
- for (const char* ch = start; ch != end; ++ch) {
- if (*ch == '\0') {
- result += "\\0"; // Replaces NUL with "\\0";
- } else {
- result += *ch;
- }
- }
-
- return result;
-}
-
-// Appends the user-supplied message to the Google-Test-generated message.
-std::string AppendUserMessage(const std::string& gtest_msg,
- const Message& user_msg) {
- // Appends the user message if it's non-empty.
- const std::string user_msg_string = user_msg.GetString();
- if (user_msg_string.empty()) {
- return gtest_msg;
- }
-
- return gtest_msg + "\n" + user_msg_string;
-}
-
-} // namespace internal
-
-// class TestResult
-
-// Creates an empty TestResult.
-TestResult::TestResult()
- : death_test_count_(0),
- elapsed_time_(0) {
-}
-
-// D'tor.
-TestResult::~TestResult() {
-}
-
-// Returns the i-th test part result among all the results. i can
-// range from 0 to total_part_count() - 1. If i is not in that range,
-// aborts the program.
-const TestPartResult& TestResult::GetTestPartResult(int i) const {
- if (i < 0 || i >= total_part_count())
- internal::posix::Abort();
- return test_part_results_.at(i);
-}
-
-// Returns the i-th test property. i can range from 0 to
-// test_property_count() - 1. If i is not in that range, aborts the
-// program.
-const TestProperty& TestResult::GetTestProperty(int i) const {
- if (i < 0 || i >= test_property_count())
- internal::posix::Abort();
- return test_properties_.at(i);
-}
-
-// Clears the test part results.
-void TestResult::ClearTestPartResults() {
- test_part_results_.clear();
-}
-
-// Adds a test part result to the list.
-void TestResult::AddTestPartResult(const TestPartResult& test_part_result) {
- test_part_results_.push_back(test_part_result);
-}
-
-// Adds a test property to the list. If a property with the same key as the
-// supplied property is already represented, the value of this test_property
-// replaces the old value for that key.
-void TestResult::RecordProperty(const std::string& xml_element,
- const TestProperty& test_property) {
- if (!ValidateTestProperty(xml_element, test_property)) {
- return;
- }
- internal::MutexLock lock(&test_properites_mutex_);
- const std::vector<TestProperty>::iterator property_with_matching_key =
- std::find_if(test_properties_.begin(), test_properties_.end(),
- internal::TestPropertyKeyIs(test_property.key()));
- if (property_with_matching_key == test_properties_.end()) {
- test_properties_.push_back(test_property);
- return;
- }
- property_with_matching_key->SetValue(test_property.value());
-}
-
-// The list of reserved attributes used in the <testsuites> element of XML
-// output.
-static const char* const kReservedTestSuitesAttributes[] = {
- "disabled",
- "errors",
- "failures",
- "name",
- "random_seed",
- "tests",
- "time",
- "timestamp"
-};
-
-// The list of reserved attributes used in the <testsuite> element of XML
-// output.
-static const char* const kReservedTestSuiteAttributes[] = {
- "disabled",
- "errors",
- "failures",
- "name",
- "tests",
- "time"
-};
-
-// The list of reserved attributes used in the <testcase> element of XML output.
-static const char* const kReservedTestCaseAttributes[] = {
- "classname",
- "name",
- "status",
- "time",
- "type_param",
- "value_param"
-};
-
-template <int kSize>
-std::vector<std::string> ArrayAsVector(const char* const (&array)[kSize]) {
- return std::vector<std::string>(array, array + kSize);
-}
-
-static std::vector<std::string> GetReservedAttributesForElement(
- const std::string& xml_element) {
- if (xml_element == "testsuites") {
- return ArrayAsVector(kReservedTestSuitesAttributes);
- } else if (xml_element == "testsuite") {
- return ArrayAsVector(kReservedTestSuiteAttributes);
- } else if (xml_element == "testcase") {
- return ArrayAsVector(kReservedTestCaseAttributes);
- } else {
- GTEST_CHECK_(false) << "Unrecognized xml_element provided: " << xml_element;
- }
- // This code is unreachable but some compilers may not realizes that.
- return std::vector<std::string>();
-}
-
-static std::string FormatWordList(const std::vector<std::string>& words) {
- Message word_list;
- for (size_t i = 0; i < words.size(); ++i) {
- if (i > 0 && words.size() > 2) {
- word_list << ", ";
- }
- if (i == words.size() - 1) {
- word_list << "and ";
- }
- word_list << "'" << words[i] << "'";
- }
- return word_list.GetString();
-}
-
-bool ValidateTestPropertyName(const std::string& property_name,
- const std::vector<std::string>& reserved_names) {
- if (std::find(reserved_names.begin(), reserved_names.end(), property_name) !=
- reserved_names.end()) {
- ADD_FAILURE() << "Reserved key used in RecordProperty(): " << property_name
- << " (" << FormatWordList(reserved_names)
- << " are reserved by " << GTEST_NAME_ << ")";
- return false;
- }
- return true;
-}
-
-// Adds a failure if the key is a reserved attribute of the element named
-// xml_element. Returns true if the property is valid.
-bool TestResult::ValidateTestProperty(const std::string& xml_element,
- const TestProperty& test_property) {
- return ValidateTestPropertyName(test_property.key(),
- GetReservedAttributesForElement(xml_element));
-}
-
-// Clears the object.
-void TestResult::Clear() {
- test_part_results_.clear();
- test_properties_.clear();
- death_test_count_ = 0;
- elapsed_time_ = 0;
-}
-
-// Returns true iff the test failed.
-bool TestResult::Failed() const {
- for (int i = 0; i < total_part_count(); ++i) {
- if (GetTestPartResult(i).failed())
- return true;
- }
- return false;
-}
-
-// Returns true iff the test part fatally failed.
-static bool TestPartFatallyFailed(const TestPartResult& result) {
- return result.fatally_failed();
-}
-
-// Returns true iff the test fatally failed.
-bool TestResult::HasFatalFailure() const {
- return CountIf(test_part_results_, TestPartFatallyFailed) > 0;
-}
-
-// Returns true iff the test part non-fatally failed.
-static bool TestPartNonfatallyFailed(const TestPartResult& result) {
- return result.nonfatally_failed();
-}
-
-// Returns true iff the test has a non-fatal failure.
-bool TestResult::HasNonfatalFailure() const {
- return CountIf(test_part_results_, TestPartNonfatallyFailed) > 0;
-}
-
-// Gets the number of all test parts. This is the sum of the number
-// of successful test parts and the number of failed test parts.
-int TestResult::total_part_count() const {
- return static_cast<int>(test_part_results_.size());
-}
-
-// Returns the number of the test properties.
-int TestResult::test_property_count() const {
- return static_cast<int>(test_properties_.size());
-}
-
-// class Test
-
-// Creates a Test object.
-
-// The c'tor saves the states of all flags.
-Test::Test()
- : gtest_flag_saver_(new GTEST_FLAG_SAVER_) {
-}
-
-// The d'tor restores the states of all flags. The actual work is
-// done by the d'tor of the gtest_flag_saver_ field, and thus not
-// visible here.
-Test::~Test() {
-}
-
-// Sets up the test fixture.
-//
-// A sub-class may override this.
-void Test::SetUp() {
-}
-
-// Tears down the test fixture.
-//
-// A sub-class may override this.
-void Test::TearDown() {
-}
-
-// Allows user supplied key value pairs to be recorded for later output.
-void Test::RecordProperty(const std::string& key, const std::string& value) {
- UnitTest::GetInstance()->RecordProperty(key, value);
-}
-
-// Allows user supplied key value pairs to be recorded for later output.
-void Test::RecordProperty(const std::string& key, int value) {
- Message value_message;
- value_message << value;
- RecordProperty(key, value_message.GetString().c_str());
-}
-
-namespace internal {
-
-void ReportFailureInUnknownLocation(TestPartResult::Type result_type,
- const std::string& message) {
- // This function is a friend of UnitTest and as such has access to
- // AddTestPartResult.
- UnitTest::GetInstance()->AddTestPartResult(
- result_type,
- NULL, // No info about the source file where the exception occurred.
- -1, // We have no info on which line caused the exception.
- message,
- ""); // No stack trace, either.
-}
-
-} // namespace internal
-
-// Google Test requires all tests in the same test case to use the same test
-// fixture class. This function checks if the current test has the
-// same fixture class as the first test in the current test case. If
-// yes, it returns true; otherwise it generates a Google Test failure and
-// returns false.
-bool Test::HasSameFixtureClass() {
- internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
- const TestCase* const test_case = impl->current_test_case();
-
- // Info about the first test in the current test case.
- const TestInfo* const first_test_info = test_case->test_info_list()[0];
- const internal::TypeId first_fixture_id = first_test_info->fixture_class_id_;
- const char* const first_test_name = first_test_info->name();
-
- // Info about the current test.
- const TestInfo* const this_test_info = impl->current_test_info();
- const internal::TypeId this_fixture_id = this_test_info->fixture_class_id_;
- const char* const this_test_name = this_test_info->name();
-
- if (this_fixture_id != first_fixture_id) {
- // Is the first test defined using TEST?
- const bool first_is_TEST = first_fixture_id == internal::GetTestTypeId();
- // Is this test defined using TEST?
- const bool this_is_TEST = this_fixture_id == internal::GetTestTypeId();
-
- if (first_is_TEST || this_is_TEST) {
- // Both TEST and TEST_F appear in same test case, which is incorrect.
- // Tell the user how to fix this.
-
- // Gets the name of the TEST and the name of the TEST_F. Note
- // that first_is_TEST and this_is_TEST cannot both be true, as
- // the fixture IDs are different for the two tests.
- const char* const TEST_name =
- first_is_TEST ? first_test_name : this_test_name;
- const char* const TEST_F_name =
- first_is_TEST ? this_test_name : first_test_name;
-
- ADD_FAILURE()
- << "All tests in the same test case must use the same test fixture\n"
- << "class, so mixing TEST_F and TEST in the same test case is\n"
- << "illegal. In test case " << this_test_info->test_case_name()
- << ",\n"
- << "test " << TEST_F_name << " is defined using TEST_F but\n"
- << "test " << TEST_name << " is defined using TEST. You probably\n"
- << "want to change the TEST to TEST_F or move it to another test\n"
- << "case.";
- } else {
- // Two fixture classes with the same name appear in two different
- // namespaces, which is not allowed. Tell the user how to fix this.
- ADD_FAILURE()
- << "All tests in the same test case must use the same test fixture\n"
- << "class. However, in test case "
- << this_test_info->test_case_name() << ",\n"
- << "you defined test " << first_test_name
- << " and test " << this_test_name << "\n"
- << "using two different test fixture classes. This can happen if\n"
- << "the two classes are from different namespaces or translation\n"
- << "units and have the same name. You should probably rename one\n"
- << "of the classes to put the tests into different test cases.";
- }
- return false;
- }
-
- return true;
-}
-
-#if GTEST_HAS_SEH
-
-// Adds an "exception thrown" fatal failure to the current test. This
-// function returns its result via an output parameter pointer because VC++
-// prohibits creation of objects with destructors on stack in functions
-// using __try (see error C2712).
-static std::string* FormatSehExceptionMessage(DWORD exception_code,
- const char* location) {
- Message message;
- message << "SEH exception with code 0x" << std::setbase(16) <<
- exception_code << std::setbase(10) << " thrown in " << location << ".";
-
- return new std::string(message.GetString());
-}
-
-#endif // GTEST_HAS_SEH
-
-namespace internal {
-
-#if GTEST_HAS_EXCEPTIONS
-
-// Adds an "exception thrown" fatal failure to the current test.
-static std::string FormatCxxExceptionMessage(const char* description,
- const char* location) {
- Message message;
- if (description != NULL) {
- message << "C++ exception with description \"" << description << "\"";
- } else {
- message << "Unknown C++ exception";
- }
- message << " thrown in " << location << ".";
-
- return message.GetString();
-}
-
-static std::string PrintTestPartResultToString(
- const TestPartResult& test_part_result);
-
-GoogleTestFailureException::GoogleTestFailureException(
- const TestPartResult& failure)
- : ::std::runtime_error(PrintTestPartResultToString(failure).c_str()) {}
-
-#endif // GTEST_HAS_EXCEPTIONS
-
-// We put these helper functions in the internal namespace as IBM's xlC
-// compiler rejects the code if they were declared static.
-
-// Runs the given method and handles SEH exceptions it throws, when
-// SEH is supported; returns the 0-value for type Result in case of an
-// SEH exception. (Microsoft compilers cannot handle SEH and C++
-// exceptions in the same function. Therefore, we provide a separate
-// wrapper function for handling SEH exceptions.)
-template <class T, typename Result>
-Result HandleSehExceptionsInMethodIfSupported(
- T* object, Result (T::*method)(), const char* location) {
-#if GTEST_HAS_SEH
- __try {
- return (object->*method)();
- } __except (internal::UnitTestOptions::GTestShouldProcessSEH( // NOLINT
- GetExceptionCode())) {
- // We create the exception message on the heap because VC++ prohibits
- // creation of objects with destructors on stack in functions using __try
- // (see error C2712).
- std::string* exception_message = FormatSehExceptionMessage(
- GetExceptionCode(), location);
- internal::ReportFailureInUnknownLocation(TestPartResult::kFatalFailure,
- *exception_message);
- delete exception_message;
- return static_cast<Result>(0);
- }
-#else
- (void)location;
- return (object->*method)();
-#endif // GTEST_HAS_SEH
-}
-
-// Runs the given method and catches and reports C++ and/or SEH-style
-// exceptions, if they are supported; returns the 0-value for type
-// Result in case of an SEH exception.
-template <class T, typename Result>
-Result HandleExceptionsInMethodIfSupported(
- T* object, Result (T::*method)(), const char* location) {
- // NOTE: The user code can affect the way in which Google Test handles
- // exceptions by setting GTEST_FLAG(catch_exceptions), but only before
- // RUN_ALL_TESTS() starts. It is technically possible to check the flag
- // after the exception is caught and either report or re-throw the
- // exception based on the flag's value:
- //
- // try {
- // // Perform the test method.
- // } catch (...) {
- // if (GTEST_FLAG(catch_exceptions))
- // // Report the exception as failure.
- // else
- // throw; // Re-throws the original exception.
- // }
- //
- // However, the purpose of this flag is to allow the program to drop into
- // the debugger when the exception is thrown. On most platforms, once the
- // control enters the catch block, the exception origin information is
- // lost and the debugger will stop the program at the point of the
- // re-throw in this function -- instead of at the point of the original
- // throw statement in the code under test. For this reason, we perform
- // the check early, sacrificing the ability to affect Google Test's
- // exception handling in the method where the exception is thrown.
- if (internal::GetUnitTestImpl()->catch_exceptions()) {
-#if GTEST_HAS_EXCEPTIONS
- try {
- return HandleSehExceptionsInMethodIfSupported(object, method, location);
- } catch (const internal::GoogleTestFailureException&) { // NOLINT
- // This exception type can only be thrown by a failed Google
- // Test assertion with the intention of letting another testing
- // framework catch it. Therefore we just re-throw it.
- throw;
- } catch (const std::exception& e) { // NOLINT
- internal::ReportFailureInUnknownLocation(
- TestPartResult::kFatalFailure,
- FormatCxxExceptionMessage(e.what(), location));
- } catch (...) { // NOLINT
- internal::ReportFailureInUnknownLocation(
- TestPartResult::kFatalFailure,
- FormatCxxExceptionMessage(NULL, location));
- }
- return static_cast<Result>(0);
-#else
- return HandleSehExceptionsInMethodIfSupported(object, method, location);
-#endif // GTEST_HAS_EXCEPTIONS
- } else {
- return (object->*method)();
- }
-}
-
-} // namespace internal
-
-// Runs the test and updates the test result.
-void Test::Run() {
- if (!HasSameFixtureClass()) return;
-
- internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
- impl->os_stack_trace_getter()->UponLeavingGTest();
- internal::HandleExceptionsInMethodIfSupported(this, &Test::SetUp, "SetUp()");
- // We will run the test only if SetUp() was successful.
- if (!HasFatalFailure()) {
- impl->os_stack_trace_getter()->UponLeavingGTest();
- internal::HandleExceptionsInMethodIfSupported(
- this, &Test::TestBody, "the test body");
- }
-
- // However, we want to clean up as much as possible. Hence we will
- // always call TearDown(), even if SetUp() or the test body has
- // failed.
- impl->os_stack_trace_getter()->UponLeavingGTest();
- internal::HandleExceptionsInMethodIfSupported(
- this, &Test::TearDown, "TearDown()");
-}
-
-// Returns true iff the current test has a fatal failure.
-bool Test::HasFatalFailure() {
- return internal::GetUnitTestImpl()->current_test_result()->HasFatalFailure();
-}
-
-// Returns true iff the current test has a non-fatal failure.
-bool Test::HasNonfatalFailure() {
- return internal::GetUnitTestImpl()->current_test_result()->
- HasNonfatalFailure();
-}
-
-// class TestInfo
-
-// Constructs a TestInfo object. It assumes ownership of the test factory
-// object.
-TestInfo::TestInfo(const std::string& a_test_case_name,
- const std::string& a_name,
- const char* a_type_param,
- const char* a_value_param,
- internal::CodeLocation a_code_location,
- internal::TypeId fixture_class_id,
- internal::TestFactoryBase* factory)
- : test_case_name_(a_test_case_name),
- name_(a_name),
- type_param_(a_type_param ? new std::string(a_type_param) : NULL),
- value_param_(a_value_param ? new std::string(a_value_param) : NULL),
- location_(a_code_location),
- fixture_class_id_(fixture_class_id),
- should_run_(false),
- is_disabled_(false),
- matches_filter_(false),
- factory_(factory),
- result_() {}
-
-// Destructs a TestInfo object.
-TestInfo::~TestInfo() { delete factory_; }
-
-namespace internal {
-
-// Creates a new TestInfo object and registers it with Google Test;
-// returns the created object.
-//
-// Arguments:
-//
-// test_case_name: name of the test case
-// name: name of the test
-// type_param: the name of the test's type parameter, or NULL if
-// this is not a typed or a type-parameterized test.
-// value_param: text representation of the test's value parameter,
-// or NULL if this is not a value-parameterized test.
-// code_location: code location where the test is defined
-// fixture_class_id: ID of the test fixture class
-// set_up_tc: pointer to the function that sets up the test case
-// tear_down_tc: pointer to the function that tears down the test case
-// factory: pointer to the factory that creates a test object.
-// The newly created TestInfo instance will assume
-// ownership of the factory object.
-TestInfo* MakeAndRegisterTestInfo(
- const char* test_case_name,
- const char* name,
- const char* type_param,
- const char* value_param,
- CodeLocation code_location,
- TypeId fixture_class_id,
- SetUpTestCaseFunc set_up_tc,
- TearDownTestCaseFunc tear_down_tc,
- TestFactoryBase* factory) {
- TestInfo* const test_info =
- new TestInfo(test_case_name, name, type_param, value_param,
- code_location, fixture_class_id, factory);
- GetUnitTestImpl()->AddTestInfo(set_up_tc, tear_down_tc, test_info);
- return test_info;
-}
-
-#if GTEST_HAS_PARAM_TEST
-void ReportInvalidTestCaseType(const char* test_case_name,
- CodeLocation code_location) {
- Message errors;
- errors
- << "Attempted redefinition of test case " << test_case_name << ".\n"
- << "All tests in the same test case must use the same test fixture\n"
- << "class. However, in test case " << test_case_name << ", you tried\n"
- << "to define a test using a fixture class different from the one\n"
- << "used earlier. This can happen if the two fixture classes are\n"
- << "from different namespaces and have the same name. You should\n"
- << "probably rename one of the classes to put the tests into different\n"
- << "test cases.";
-
- fprintf(stderr, "%s %s",
- FormatFileLocation(code_location.file.c_str(),
- code_location.line).c_str(),
- errors.GetString().c_str());
-}
-#endif // GTEST_HAS_PARAM_TEST
-
-} // namespace internal
-
-namespace {
-
-// A predicate that checks the test name of a TestInfo against a known
-// value.
-//
-// This is used for implementation of the TestCase class only. We put
-// it in the anonymous namespace to prevent polluting the outer
-// namespace.
-//
-// TestNameIs is copyable.
-class TestNameIs {
- public:
- // Constructor.
- //
- // TestNameIs has NO default constructor.
- explicit TestNameIs(const char* name)
- : name_(name) {}
-
- // Returns true iff the test name of test_info matches name_.
- bool operator()(const TestInfo * test_info) const {
- return test_info && test_info->name() == name_;
- }
-
- private:
- std::string name_;
-};
-
-} // namespace
-
-namespace internal {
-
-// This method expands all parameterized tests registered with macros TEST_P
-// and INSTANTIATE_TEST_CASE_P into regular tests and registers those.
-// This will be done just once during the program runtime.
-void UnitTestImpl::RegisterParameterizedTests() {
-#if GTEST_HAS_PARAM_TEST
- if (!parameterized_tests_registered_) {
- parameterized_test_registry_.RegisterTests();
- parameterized_tests_registered_ = true;
- }
-#endif
-}
-
-} // namespace internal
-
-// Creates the test object, runs it, records its result, and then
-// deletes it.
-void TestInfo::Run() {
- if (!should_run_) return;
-
- // Tells UnitTest where to store test result.
- internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
- impl->set_current_test_info(this);
-
- TestEventListener* repeater = UnitTest::GetInstance()->listeners().repeater();
-
- // Notifies the unit test event listeners that a test is about to start.
- repeater->OnTestStart(*this);
-
- const TimeInMillis start = internal::GetTimeInMillis();
-
- impl->os_stack_trace_getter()->UponLeavingGTest();
-
- // Creates the test object.
- Test* const test = internal::HandleExceptionsInMethodIfSupported(
- factory_, &internal::TestFactoryBase::CreateTest,
- "the test fixture's constructor");
-
- // Runs the test only if the test object was created and its
- // constructor didn't generate a fatal failure.
- if ((test != NULL) && !Test::HasFatalFailure()) {
- // This doesn't throw as all user code that can throw are wrapped into
- // exception handling code.
- test->Run();
- }
-
- // Deletes the test object.
- impl->os_stack_trace_getter()->UponLeavingGTest();
- internal::HandleExceptionsInMethodIfSupported(
- test, &Test::DeleteSelf_, "the test fixture's destructor");
-
- result_.set_elapsed_time(internal::GetTimeInMillis() - start);
-
- // Notifies the unit test event listener that a test has just finished.
- repeater->OnTestEnd(*this);
-
- // Tells UnitTest to stop associating assertion results to this
- // test.
- impl->set_current_test_info(NULL);
-}
-
-// class TestCase
-
-// Gets the number of successful tests in this test case.
-int TestCase::successful_test_count() const {
- return CountIf(test_info_list_, TestPassed);
-}
-
-// Gets the number of failed tests in this test case.
-int TestCase::failed_test_count() const {
- return CountIf(test_info_list_, TestFailed);
-}
-
-// Gets the number of disabled tests that will be reported in the XML report.
-int TestCase::reportable_disabled_test_count() const {
- return CountIf(test_info_list_, TestReportableDisabled);
-}
-
-// Gets the number of disabled tests in this test case.
-int TestCase::disabled_test_count() const {
- return CountIf(test_info_list_, TestDisabled);
-}
-
-// Gets the number of tests to be printed in the XML report.
-int TestCase::reportable_test_count() const {
- return CountIf(test_info_list_, TestReportable);
-}
-
-// Get the number of tests in this test case that should run.
-int TestCase::test_to_run_count() const {
- return CountIf(test_info_list_, ShouldRunTest);
-}
-
-// Gets the number of all tests.
-int TestCase::total_test_count() const {
- return static_cast<int>(test_info_list_.size());
-}
-
-// Creates a TestCase with the given name.
-//
-// Arguments:
-//
-// name: name of the test case
-// a_type_param: the name of the test case's type parameter, or NULL if
-// this is not a typed or a type-parameterized test case.
-// set_up_tc: pointer to the function that sets up the test case
-// tear_down_tc: pointer to the function that tears down the test case
-TestCase::TestCase(const char* a_name, const char* a_type_param,
- Test::SetUpTestCaseFunc set_up_tc,
- Test::TearDownTestCaseFunc tear_down_tc)
- : name_(a_name),
- type_param_(a_type_param ? new std::string(a_type_param) : NULL),
- set_up_tc_(set_up_tc),
- tear_down_tc_(tear_down_tc),
- should_run_(false),
- elapsed_time_(0) {
-}
-
-// Destructor of TestCase.
-TestCase::~TestCase() {
- // Deletes every Test in the collection.
- ForEach(test_info_list_, internal::Delete<TestInfo>);
-}
-
-// Returns the i-th test among all the tests. i can range from 0 to
-// total_test_count() - 1. If i is not in that range, returns NULL.
-const TestInfo* TestCase::GetTestInfo(int i) const {
- const int index = GetElementOr(test_indices_, i, -1);
- return index < 0 ? NULL : test_info_list_[index];
-}
-
-// Returns the i-th test among all the tests. i can range from 0 to
-// total_test_count() - 1. If i is not in that range, returns NULL.
-TestInfo* TestCase::GetMutableTestInfo(int i) {
- const int index = GetElementOr(test_indices_, i, -1);
- return index < 0 ? NULL : test_info_list_[index];
-}
-
-// Adds a test to this test case. Will delete the test upon
-// destruction of the TestCase object.
-void TestCase::AddTestInfo(TestInfo * test_info) {
- test_info_list_.push_back(test_info);
- test_indices_.push_back(static_cast<int>(test_indices_.size()));
-}
-
-// Runs every test in this TestCase.
-void TestCase::Run() {
- if (!should_run_) return;
-
- internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
- impl->set_current_test_case(this);
-
- TestEventListener* repeater = UnitTest::GetInstance()->listeners().repeater();
-
- repeater->OnTestCaseStart(*this);
- impl->os_stack_trace_getter()->UponLeavingGTest();
- internal::HandleExceptionsInMethodIfSupported(
- this, &TestCase::RunSetUpTestCase, "SetUpTestCase()");
-
- const internal::TimeInMillis start = internal::GetTimeInMillis();
- for (int i = 0; i < total_test_count(); i++) {
- GetMutableTestInfo(i)->Run();
- }
- elapsed_time_ = internal::GetTimeInMillis() - start;
-
- impl->os_stack_trace_getter()->UponLeavingGTest();
- internal::HandleExceptionsInMethodIfSupported(
- this, &TestCase::RunTearDownTestCase, "TearDownTestCase()");
-
- repeater->OnTestCaseEnd(*this);
- impl->set_current_test_case(NULL);
-}
-
-// Clears the results of all tests in this test case.
-void TestCase::ClearResult() {
- ad_hoc_test_result_.Clear();
- ForEach(test_info_list_, TestInfo::ClearTestResult);
-}
-
-// Shuffles the tests in this test case.
-void TestCase::ShuffleTests(internal::Random* random) {
- Shuffle(random, &test_indices_);
-}
-
-// Restores the test order to before the first shuffle.
-void TestCase::UnshuffleTests() {
- for (size_t i = 0; i < test_indices_.size(); i++) {
- test_indices_[i] = static_cast<int>(i);
- }
-}
-
-// Formats a countable noun. Depending on its quantity, either the
-// singular form or the plural form is used. e.g.
-//
-// FormatCountableNoun(1, "formula", "formuli") returns "1 formula".
-// FormatCountableNoun(5, "book", "books") returns "5 books".
-static std::string FormatCountableNoun(int count,
- const char * singular_form,
- const char * plural_form) {
- return internal::StreamableToString(count) + " " +
- (count == 1 ? singular_form : plural_form);
-}
-
-// Formats the count of tests.
-static std::string FormatTestCount(int test_count) {
- return FormatCountableNoun(test_count, "test", "tests");
-}
-
-// Formats the count of test cases.
-static std::string FormatTestCaseCount(int test_case_count) {
- return FormatCountableNoun(test_case_count, "test case", "test cases");
-}
-
-// Converts a TestPartResult::Type enum to human-friendly string
-// representation. Both kNonFatalFailure and kFatalFailure are translated
-// to "Failure", as the user usually doesn't care about the difference
-// between the two when viewing the test result.
-static const char * TestPartResultTypeToString(TestPartResult::Type type) {
- switch (type) {
- case TestPartResult::kSuccess:
- return "Success";
-
- case TestPartResult::kNonFatalFailure:
- case TestPartResult::kFatalFailure:
-#ifdef _MSC_VER
- return "error: ";
-#else
- return "Failure\n";
-#endif
- default:
- return "Unknown result type";
- }
-}
-
-namespace internal {
-
-// Prints a TestPartResult to an std::string.
-static std::string PrintTestPartResultToString(
- const TestPartResult& test_part_result) {
- return (Message()
- << internal::FormatFileLocation(test_part_result.file_name(),
- test_part_result.line_number())
- << " " << TestPartResultTypeToString(test_part_result.type())
- << test_part_result.message()).GetString();
-}
-
-// Prints a TestPartResult.
-static void PrintTestPartResult(const TestPartResult& test_part_result) {
- const std::string& result =
- PrintTestPartResultToString(test_part_result);
- printf("%s\n", result.c_str());
- fflush(stdout);
- // If the test program runs in Visual Studio or a debugger, the
- // following statements add the test part result message to the Output
- // window such that the user can double-click on it to jump to the
- // corresponding source code location; otherwise they do nothing.
-#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE
- // We don't call OutputDebugString*() on Windows Mobile, as printing
- // to stdout is done by OutputDebugString() there already - we don't
- // want the same message printed twice.
- ::OutputDebugStringA(result.c_str());
- ::OutputDebugStringA("\n");
-#endif
-}
-
-// class PrettyUnitTestResultPrinter
-
-enum GTestColor {
- COLOR_DEFAULT,
- COLOR_RED,
- COLOR_GREEN,
- COLOR_YELLOW
-};
-
-#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE && \
- !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT
-
-// Returns the character attribute for the given color.
-WORD GetColorAttribute(GTestColor color) {
- switch (color) {
- case COLOR_RED: return FOREGROUND_RED;
- case COLOR_GREEN: return FOREGROUND_GREEN;
- case COLOR_YELLOW: return FOREGROUND_RED | FOREGROUND_GREEN;
- default: return 0;
- }
-}
-
-#else
-
-// Returns the ANSI color code for the given color. COLOR_DEFAULT is
-// an invalid input.
-const char* GetAnsiColorCode(GTestColor color) {
- switch (color) {
- case COLOR_RED: return "1";
- case COLOR_GREEN: return "2";
- case COLOR_YELLOW: return "3";
- default: return NULL;
- };
-}
-
-#endif // GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE
-
-// Returns true iff Google Test should use colors in the output.
-bool ShouldUseColor(bool stdout_is_tty) {
- const char* const gtest_color = GTEST_FLAG(color).c_str();
-
- if (String::CaseInsensitiveCStringEquals(gtest_color, "auto")) {
-#if GTEST_OS_WINDOWS
- // On Windows the TERM variable is usually not set, but the
- // console there does support colors.
- return stdout_is_tty;
-#else
- // On non-Windows platforms, we rely on the TERM variable.
- const char* const term = posix::GetEnv("TERM");
- const bool term_supports_color =
- String::CStringEquals(term, "xterm") ||
- String::CStringEquals(term, "xterm-color") ||
- String::CStringEquals(term, "xterm-256color") ||
- String::CStringEquals(term, "screen") ||
- String::CStringEquals(term, "screen-256color") ||
- String::CStringEquals(term, "tmux") ||
- String::CStringEquals(term, "tmux-256color") ||
- String::CStringEquals(term, "rxvt-unicode") ||
- String::CStringEquals(term, "rxvt-unicode-256color") ||
- String::CStringEquals(term, "linux") ||
- String::CStringEquals(term, "cygwin");
- return stdout_is_tty && term_supports_color;
-#endif // GTEST_OS_WINDOWS
- }
-
- return String::CaseInsensitiveCStringEquals(gtest_color, "yes") ||
- String::CaseInsensitiveCStringEquals(gtest_color, "true") ||
- String::CaseInsensitiveCStringEquals(gtest_color, "t") ||
- String::CStringEquals(gtest_color, "1");
- // We take "yes", "true", "t", and "1" as meaning "yes". If the
- // value is neither one of these nor "auto", we treat it as "no" to
- // be conservative.
-}
-
-// Helpers for printing colored strings to stdout. Note that on Windows, we
-// cannot simply emit special characters and have the terminal change colors.
-// This routine must actually emit the characters rather than return a string
-// that would be colored when printed, as can be done on Linux.
-void ColoredPrintf(GTestColor color, const char* fmt, ...) {
- va_list args;
- va_start(args, fmt);
-
-#if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_SYMBIAN || GTEST_OS_ZOS || \
- GTEST_OS_IOS || GTEST_OS_WINDOWS_PHONE || GTEST_OS_WINDOWS_RT
- const bool use_color = AlwaysFalse();
-#else
- static const bool in_color_mode =
- ShouldUseColor(posix::IsATTY(posix::FileNo(stdout)) != 0);
- const bool use_color = in_color_mode && (color != COLOR_DEFAULT);
-#endif // GTEST_OS_WINDOWS_MOBILE || GTEST_OS_SYMBIAN || GTEST_OS_ZOS
- // The '!= 0' comparison is necessary to satisfy MSVC 7.1.
-
- if (!use_color) {
- vprintf(fmt, args);
- va_end(args);
- return;
- }
-
-#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE && \
- !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT
- const HANDLE stdout_handle = GetStdHandle(STD_OUTPUT_HANDLE);
-
- // Gets the current text color.
- CONSOLE_SCREEN_BUFFER_INFO buffer_info;
- GetConsoleScreenBufferInfo(stdout_handle, &buffer_info);
- const WORD old_color_attrs = buffer_info.wAttributes;
-
- // We need to flush the stream buffers into the console before each
- // SetConsoleTextAttribute call lest it affect the text that is already
- // printed but has not yet reached the console.
- fflush(stdout);
- SetConsoleTextAttribute(stdout_handle,
- GetColorAttribute(color) | FOREGROUND_INTENSITY);
- vprintf(fmt, args);
-
- fflush(stdout);
- // Restores the text color.
- SetConsoleTextAttribute(stdout_handle, old_color_attrs);
-#else
- printf("\033[0;3%sm", GetAnsiColorCode(color));
- vprintf(fmt, args);
- printf("\033[m"); // Resets the terminal to default.
-#endif // GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE
- va_end(args);
-}
-
-// Text printed in Google Test's text output and --gunit_list_tests
-// output to label the type parameter and value parameter for a test.
-static const char kTypeParamLabel[] = "TypeParam";
-static const char kValueParamLabel[] = "GetParam()";
-
-void PrintFullTestCommentIfPresent(const TestInfo& test_info) {
- const char* const type_param = test_info.type_param();
- const char* const value_param = test_info.value_param();
-
- if (type_param != NULL || value_param != NULL) {
- printf(", where ");
- if (type_param != NULL) {
- printf("%s = %s", kTypeParamLabel, type_param);
- if (value_param != NULL)
- printf(" and ");
- }
- if (value_param != NULL) {
- printf("%s = %s", kValueParamLabel, value_param);
- }
- }
-}
-
-// This class implements the TestEventListener interface.
-//
-// Class PrettyUnitTestResultPrinter is copyable.
-class PrettyUnitTestResultPrinter : public TestEventListener {
- public:
- PrettyUnitTestResultPrinter() {}
- static void PrintTestName(const char * test_case, const char * test) {
- printf("%s.%s", test_case, test);
- }
-
- // The following methods override what's in the TestEventListener class.
- virtual void OnTestProgramStart(const UnitTest& /*unit_test*/) {}
- virtual void OnTestIterationStart(const UnitTest& unit_test, int iteration);
- virtual void OnEnvironmentsSetUpStart(const UnitTest& unit_test);
- virtual void OnEnvironmentsSetUpEnd(const UnitTest& /*unit_test*/) {}
- virtual void OnTestCaseStart(const TestCase& test_case);
- virtual void OnTestStart(const TestInfo& test_info);
- virtual void OnTestPartResult(const TestPartResult& result);
- virtual void OnTestEnd(const TestInfo& test_info);
- virtual void OnTestCaseEnd(const TestCase& test_case);
- virtual void OnEnvironmentsTearDownStart(const UnitTest& unit_test);
- virtual void OnEnvironmentsTearDownEnd(const UnitTest& /*unit_test*/) {}
- virtual void OnTestIterationEnd(const UnitTest& unit_test, int iteration);
- virtual void OnTestProgramEnd(const UnitTest& /*unit_test*/) {}
-
- private:
- static void PrintFailedTests(const UnitTest& unit_test);
-};
-
- // Fired before each iteration of tests starts.
-void PrettyUnitTestResultPrinter::OnTestIterationStart(
- const UnitTest& unit_test, int iteration) {
- if (GTEST_FLAG(repeat) != 1)
- printf("\nRepeating all tests (iteration %d) . . .\n\n", iteration + 1);
-
- const char* const filter = GTEST_FLAG(filter).c_str();
-
- // Prints the filter if it's not *. This reminds the user that some
- // tests may be skipped.
- if (!String::CStringEquals(filter, kUniversalFilter)) {
- ColoredPrintf(COLOR_YELLOW,
- "Note: %s filter = %s\n", GTEST_NAME_, filter);
- }
-
- if (internal::ShouldShard(kTestTotalShards, kTestShardIndex, false)) {
- const Int32 shard_index = Int32FromEnvOrDie(kTestShardIndex, -1);
- ColoredPrintf(COLOR_YELLOW,
- "Note: This is test shard %d of %s.\n",
- static_cast<int>(shard_index) + 1,
- internal::posix::GetEnv(kTestTotalShards));
- }
-
- if (GTEST_FLAG(shuffle)) {
- ColoredPrintf(COLOR_YELLOW,
- "Note: Randomizing tests' orders with a seed of %d .\n",
- unit_test.random_seed());
- }
-
- ColoredPrintf(COLOR_GREEN, "[==========] ");
- printf("Running %s from %s.\n",
- FormatTestCount(unit_test.test_to_run_count()).c_str(),
- FormatTestCaseCount(unit_test.test_case_to_run_count()).c_str());
- fflush(stdout);
-}
-
-void PrettyUnitTestResultPrinter::OnEnvironmentsSetUpStart(
- const UnitTest& /*unit_test*/) {
- ColoredPrintf(COLOR_GREEN, "[----------] ");
- printf("Global test environment set-up.\n");
- fflush(stdout);
-}
-
-void PrettyUnitTestResultPrinter::OnTestCaseStart(const TestCase& test_case) {
- const std::string counts =
- FormatCountableNoun(test_case.test_to_run_count(), "test", "tests");
- ColoredPrintf(COLOR_GREEN, "[----------] ");
- printf("%s from %s", counts.c_str(), test_case.name());
- if (test_case.type_param() == NULL) {
- printf("\n");
- } else {
- printf(", where %s = %s\n", kTypeParamLabel, test_case.type_param());
- }
- fflush(stdout);
-}
-
-void PrettyUnitTestResultPrinter::OnTestStart(const TestInfo& test_info) {
- ColoredPrintf(COLOR_GREEN, "[ RUN ] ");
- PrintTestName(test_info.test_case_name(), test_info.name());
- printf("\n");
- fflush(stdout);
-}
-
-// Called after an assertion failure.
-void PrettyUnitTestResultPrinter::OnTestPartResult(
- const TestPartResult& result) {
- // If the test part succeeded, we don't need to do anything.
- if (result.type() == TestPartResult::kSuccess)
- return;
-
- // Print failure message from the assertion (e.g. expected this and got that).
- PrintTestPartResult(result);
- fflush(stdout);
-}
-
-void PrettyUnitTestResultPrinter::OnTestEnd(const TestInfo& test_info) {
- if (test_info.result()->Passed()) {
- ColoredPrintf(COLOR_GREEN, "[ OK ] ");
- } else {
- ColoredPrintf(COLOR_RED, "[ FAILED ] ");
- }
- PrintTestName(test_info.test_case_name(), test_info.name());
- if (test_info.result()->Failed())
- PrintFullTestCommentIfPresent(test_info);
-
- if (GTEST_FLAG(print_time)) {
- printf(" (%s ms)\n", internal::StreamableToString(
- test_info.result()->elapsed_time()).c_str());
- } else {
- printf("\n");
- }
- fflush(stdout);
-}
-
-void PrettyUnitTestResultPrinter::OnTestCaseEnd(const TestCase& test_case) {
- if (!GTEST_FLAG(print_time)) return;
-
- const std::string counts =
- FormatCountableNoun(test_case.test_to_run_count(), "test", "tests");
- ColoredPrintf(COLOR_GREEN, "[----------] ");
- printf("%s from %s (%s ms total)\n\n",
- counts.c_str(), test_case.name(),
- internal::StreamableToString(test_case.elapsed_time()).c_str());
- fflush(stdout);
-}
-
-void PrettyUnitTestResultPrinter::OnEnvironmentsTearDownStart(
- const UnitTest& /*unit_test*/) {
- ColoredPrintf(COLOR_GREEN, "[----------] ");
- printf("Global test environment tear-down\n");
- fflush(stdout);
-}
-
-// Internal helper for printing the list of failed tests.
-void PrettyUnitTestResultPrinter::PrintFailedTests(const UnitTest& unit_test) {
- const int failed_test_count = unit_test.failed_test_count();
- if (failed_test_count == 0) {
- return;
- }
-
- for (int i = 0; i < unit_test.total_test_case_count(); ++i) {
- const TestCase& test_case = *unit_test.GetTestCase(i);
- if (!test_case.should_run() || (test_case.failed_test_count() == 0)) {
- continue;
- }
- for (int j = 0; j < test_case.total_test_count(); ++j) {
- const TestInfo& test_info = *test_case.GetTestInfo(j);
- if (!test_info.should_run() || test_info.result()->Passed()) {
- continue;
- }
- ColoredPrintf(COLOR_RED, "[ FAILED ] ");
- printf("%s.%s", test_case.name(), test_info.name());
- PrintFullTestCommentIfPresent(test_info);
- printf("\n");
- }
- }
-}
-
-void PrettyUnitTestResultPrinter::OnTestIterationEnd(const UnitTest& unit_test,
- int /*iteration*/) {
- ColoredPrintf(COLOR_GREEN, "[==========] ");
- printf("%s from %s ran.",
- FormatTestCount(unit_test.test_to_run_count()).c_str(),
- FormatTestCaseCount(unit_test.test_case_to_run_count()).c_str());
- if (GTEST_FLAG(print_time)) {
- printf(" (%s ms total)",
- internal::StreamableToString(unit_test.elapsed_time()).c_str());
- }
- printf("\n");
- ColoredPrintf(COLOR_GREEN, "[ PASSED ] ");
- printf("%s.\n", FormatTestCount(unit_test.successful_test_count()).c_str());
-
- int num_failures = unit_test.failed_test_count();
- if (!unit_test.Passed()) {
- const int failed_test_count = unit_test.failed_test_count();
- ColoredPrintf(COLOR_RED, "[ FAILED ] ");
- printf("%s, listed below:\n", FormatTestCount(failed_test_count).c_str());
- PrintFailedTests(unit_test);
- printf("\n%2d FAILED %s\n", num_failures,
- num_failures == 1 ? "TEST" : "TESTS");
- }
-
- int num_disabled = unit_test.reportable_disabled_test_count();
- if (num_disabled && !GTEST_FLAG(also_run_disabled_tests)) {
- if (!num_failures) {
- printf("\n"); // Add a spacer if no FAILURE banner is displayed.
- }
- ColoredPrintf(COLOR_YELLOW,
- " YOU HAVE %d DISABLED %s\n\n",
- num_disabled,
- num_disabled == 1 ? "TEST" : "TESTS");
- }
- // Ensure that Google Test output is printed before, e.g., heapchecker output.
- fflush(stdout);
-}
-
-// End PrettyUnitTestResultPrinter
-
-// class TestEventRepeater
-//
-// This class forwards events to other event listeners.
-class TestEventRepeater : public TestEventListener {
- public:
- TestEventRepeater() : forwarding_enabled_(true) {}
- virtual ~TestEventRepeater();
- void Append(TestEventListener *listener);
- TestEventListener* Release(TestEventListener* listener);
-
- // Controls whether events will be forwarded to listeners_. Set to false
- // in death test child processes.
- bool forwarding_enabled() const { return forwarding_enabled_; }
- void set_forwarding_enabled(bool enable) { forwarding_enabled_ = enable; }
-
- virtual void OnTestProgramStart(const UnitTest& unit_test);
- virtual void OnTestIterationStart(const UnitTest& unit_test, int iteration);
- virtual void OnEnvironmentsSetUpStart(const UnitTest& unit_test);
- virtual void OnEnvironmentsSetUpEnd(const UnitTest& unit_test);
- virtual void OnTestCaseStart(const TestCase& test_case);
- virtual void OnTestStart(const TestInfo& test_info);
- virtual void OnTestPartResult(const TestPartResult& result);
- virtual void OnTestEnd(const TestInfo& test_info);
- virtual void OnTestCaseEnd(const TestCase& test_case);
- virtual void OnEnvironmentsTearDownStart(const UnitTest& unit_test);
- virtual void OnEnvironmentsTearDownEnd(const UnitTest& unit_test);
- virtual void OnTestIterationEnd(const UnitTest& unit_test, int iteration);
- virtual void OnTestProgramEnd(const UnitTest& unit_test);
-
- private:
- // Controls whether events will be forwarded to listeners_. Set to false
- // in death test child processes.
- bool forwarding_enabled_;
- // The list of listeners that receive events.
- std::vector<TestEventListener*> listeners_;
-
- GTEST_DISALLOW_COPY_AND_ASSIGN_(TestEventRepeater);
-};
-
-TestEventRepeater::~TestEventRepeater() {
- ForEach(listeners_, Delete<TestEventListener>);
-}
-
-void TestEventRepeater::Append(TestEventListener *listener) {
- listeners_.push_back(listener);
-}
-
-// TODO(vladl@google.com): Factor the search functionality into Vector::Find.
-TestEventListener* TestEventRepeater::Release(TestEventListener *listener) {
- for (size_t i = 0; i < listeners_.size(); ++i) {
- if (listeners_[i] == listener) {
- listeners_.erase(listeners_.begin() + i);
- return listener;
- }
- }
-
- return NULL;
-}
-
-// Since most methods are very similar, use macros to reduce boilerplate.
-// This defines a member that forwards the call to all listeners.
-#define GTEST_REPEATER_METHOD_(Name, Type) \
-void TestEventRepeater::Name(const Type& parameter) { \
- if (forwarding_enabled_) { \
- for (size_t i = 0; i < listeners_.size(); i++) { \
- listeners_[i]->Name(parameter); \
- } \
- } \
-}
-// This defines a member that forwards the call to all listeners in reverse
-// order.
-#define GTEST_REVERSE_REPEATER_METHOD_(Name, Type) \
-void TestEventRepeater::Name(const Type& parameter) { \
- if (forwarding_enabled_) { \
- for (int i = static_cast<int>(listeners_.size()) - 1; i >= 0; i--) { \
- listeners_[i]->Name(parameter); \
- } \
- } \
-}
-
-GTEST_REPEATER_METHOD_(OnTestProgramStart, UnitTest)
-GTEST_REPEATER_METHOD_(OnEnvironmentsSetUpStart, UnitTest)
-GTEST_REPEATER_METHOD_(OnTestCaseStart, TestCase)
-GTEST_REPEATER_METHOD_(OnTestStart, TestInfo)
-GTEST_REPEATER_METHOD_(OnTestPartResult, TestPartResult)
-GTEST_REPEATER_METHOD_(OnEnvironmentsTearDownStart, UnitTest)
-GTEST_REVERSE_REPEATER_METHOD_(OnEnvironmentsSetUpEnd, UnitTest)
-GTEST_REVERSE_REPEATER_METHOD_(OnEnvironmentsTearDownEnd, UnitTest)
-GTEST_REVERSE_REPEATER_METHOD_(OnTestEnd, TestInfo)
-GTEST_REVERSE_REPEATER_METHOD_(OnTestCaseEnd, TestCase)
-GTEST_REVERSE_REPEATER_METHOD_(OnTestProgramEnd, UnitTest)
-
-#undef GTEST_REPEATER_METHOD_
-#undef GTEST_REVERSE_REPEATER_METHOD_
-
-void TestEventRepeater::OnTestIterationStart(const UnitTest& unit_test,
- int iteration) {
- if (forwarding_enabled_) {
- for (size_t i = 0; i < listeners_.size(); i++) {
- listeners_[i]->OnTestIterationStart(unit_test, iteration);
- }
- }
-}
-
-void TestEventRepeater::OnTestIterationEnd(const UnitTest& unit_test,
- int iteration) {
- if (forwarding_enabled_) {
- for (int i = static_cast<int>(listeners_.size()) - 1; i >= 0; i--) {
- listeners_[i]->OnTestIterationEnd(unit_test, iteration);
- }
- }
-}
-
-// End TestEventRepeater
-
-// This class generates an XML output file.
-class XmlUnitTestResultPrinter : public EmptyTestEventListener {
- public:
- explicit XmlUnitTestResultPrinter(const char* output_file);
-
- virtual void OnTestIterationEnd(const UnitTest& unit_test, int iteration);
-
- private:
- // Is c a whitespace character that is normalized to a space character
- // when it appears in an XML attribute value?
- static bool IsNormalizableWhitespace(char c) {
- return c == 0x9 || c == 0xA || c == 0xD;
- }
-
- // May c appear in a well-formed XML document?
- static bool IsValidXmlCharacter(char c) {
- return IsNormalizableWhitespace(c) || c >= 0x20;
- }
-
- // Returns an XML-escaped copy of the input string str. If
- // is_attribute is true, the text is meant to appear as an attribute
- // value, and normalizable whitespace is preserved by replacing it
- // with character references.
- static std::string EscapeXml(const std::string& str, bool is_attribute);
-
- // Returns the given string with all characters invalid in XML removed.
- static std::string RemoveInvalidXmlCharacters(const std::string& str);
-
- // Convenience wrapper around EscapeXml when str is an attribute value.
- static std::string EscapeXmlAttribute(const std::string& str) {
- return EscapeXml(str, true);
- }
-
- // Convenience wrapper around EscapeXml when str is not an attribute value.
- static std::string EscapeXmlText(const char* str) {
- return EscapeXml(str, false);
- }
-
- // Verifies that the given attribute belongs to the given element and
- // streams the attribute as XML.
- static void OutputXmlAttribute(std::ostream* stream,
- const std::string& element_name,
- const std::string& name,
- const std::string& value);
-
- // Streams an XML CDATA section, escaping invalid CDATA sequences as needed.
- static void OutputXmlCDataSection(::std::ostream* stream, const char* data);
-
- // Streams an XML representation of a TestInfo object.
- static void OutputXmlTestInfo(::std::ostream* stream,
- const char* test_case_name,
- const TestInfo& test_info);
-
- // Prints an XML representation of a TestCase object
- static void PrintXmlTestCase(::std::ostream* stream,
- const TestCase& test_case);
-
- // Prints an XML summary of unit_test to output stream out.
- static void PrintXmlUnitTest(::std::ostream* stream,
- const UnitTest& unit_test);
-
- // Produces a string representing the test properties in a result as space
- // delimited XML attributes based on the property key="value" pairs.
- // When the std::string is not empty, it includes a space at the beginning,
- // to delimit this attribute from prior attributes.
- static std::string TestPropertiesAsXmlAttributes(const TestResult& result);
-
- // The output file.
- const std::string output_file_;
-
- GTEST_DISALLOW_COPY_AND_ASSIGN_(XmlUnitTestResultPrinter);
-};
-
-// Creates a new XmlUnitTestResultPrinter.
-XmlUnitTestResultPrinter::XmlUnitTestResultPrinter(const char* output_file)
- : output_file_(output_file) {
- if (output_file_.c_str() == NULL || output_file_.empty()) {
- fprintf(stderr, "XML output file may not be null\n");
- fflush(stderr);
- exit(EXIT_FAILURE);
- }
-}
-
-// Called after the unit test ends.
-void XmlUnitTestResultPrinter::OnTestIterationEnd(const UnitTest& unit_test,
- int /*iteration*/) {
- FILE* xmlout = NULL;
- FilePath output_file(output_file_);
- FilePath output_dir(output_file.RemoveFileName());
-
- if (output_dir.CreateDirectoriesRecursively()) {
- xmlout = posix::FOpen(output_file_.c_str(), "w");
- }
- if (xmlout == NULL) {
- // TODO(wan): report the reason of the failure.
- //
- // We don't do it for now as:
- //
- // 1. There is no urgent need for it.
- // 2. It's a bit involved to make the errno variable thread-safe on
- // all three operating systems (Linux, Windows, and Mac OS).
- // 3. To interpret the meaning of errno in a thread-safe way,
- // we need the strerror_r() function, which is not available on
- // Windows.
- fprintf(stderr,
- "Unable to open file \"%s\"\n",
- output_file_.c_str());
- fflush(stderr);
- exit(EXIT_FAILURE);
- }
- std::stringstream stream;
- PrintXmlUnitTest(&stream, unit_test);
- fprintf(xmlout, "%s", StringStreamToString(&stream).c_str());
- fclose(xmlout);
-}
-
-// Returns an XML-escaped copy of the input string str. If is_attribute
-// is true, the text is meant to appear as an attribute value, and
-// normalizable whitespace is preserved by replacing it with character
-// references.
-//
-// Invalid XML characters in str, if any, are stripped from the output.
-// It is expected that most, if not all, of the text processed by this
-// module will consist of ordinary English text.
-// If this module is ever modified to produce version 1.1 XML output,
-// most invalid characters can be retained using character references.
-// TODO(wan): It might be nice to have a minimally invasive, human-readable
-// escaping scheme for invalid characters, rather than dropping them.
-std::string XmlUnitTestResultPrinter::EscapeXml(
- const std::string& str, bool is_attribute) {
- Message m;
-
- for (size_t i = 0; i < str.size(); ++i) {
- const char ch = str[i];
- switch (ch) {
- case '<':
- m << "&lt;";
- break;
- case '>':
- m << "&gt;";
- break;
- case '&':
- m << "&amp;";
- break;
- case '\'':
- if (is_attribute)
- m << "&apos;";
- else
- m << '\'';
- break;
- case '"':
- if (is_attribute)
- m << "&quot;";
- else
- m << '"';
- break;
- default:
- if (IsValidXmlCharacter(ch)) {
- if (is_attribute && IsNormalizableWhitespace(ch))
- m << "&#x" << String::FormatByte(static_cast<unsigned char>(ch))
- << ";";
- else
- m << ch;
- }
- break;
- }
- }
-
- return m.GetString();
-}
-
-// Returns the given string with all characters invalid in XML removed.
-// Currently invalid characters are dropped from the string. An
-// alternative is to replace them with certain characters such as . or ?.
-std::string XmlUnitTestResultPrinter::RemoveInvalidXmlCharacters(
- const std::string& str) {
- std::string output;
- output.reserve(str.size());
- for (std::string::const_iterator it = str.begin(); it != str.end(); ++it)
- if (IsValidXmlCharacter(*it))
- output.push_back(*it);
-
- return output;
-}
-
-// The following routines generate an XML representation of a UnitTest
-// object.
-//
-// This is how Google Test concepts map to the DTD:
-//
-// <testsuites name="AllTests"> <-- corresponds to a UnitTest object
-// <testsuite name="testcase-name"> <-- corresponds to a TestCase object
-// <testcase name="test-name"> <-- corresponds to a TestInfo object
-// <failure message="...">...</failure>
-// <failure message="...">...</failure>
-// <failure message="...">...</failure>
-// <-- individual assertion failures
-// </testcase>
-// </testsuite>
-// </testsuites>
-
-// Formats the given time in milliseconds as seconds.
-std::string FormatTimeInMillisAsSeconds(TimeInMillis ms) {
- ::std::stringstream ss;
- ss << (static_cast<double>(ms) * 1e-3);
- return ss.str();
-}
-
-static bool PortableLocaltime(time_t seconds, struct tm* out) {
-#if defined(_MSC_VER)
- return localtime_s(out, &seconds) == 0;
-#elif defined(__MINGW32__) || defined(__MINGW64__)
- // MINGW <time.h> provides neither localtime_r nor localtime_s, but uses
- // Windows' localtime(), which has a thread-local tm buffer.
- struct tm* tm_ptr = localtime(&seconds); // NOLINT
- if (tm_ptr == NULL)
- return false;
- *out = *tm_ptr;
- return true;
-#else
- return localtime_r(&seconds, out) != NULL;
-#endif
-}
-
-// Converts the given epoch time in milliseconds to a date string in the ISO
-// 8601 format, without the timezone information.
-std::string FormatEpochTimeInMillisAsIso8601(TimeInMillis ms) {
- struct tm time_struct;
- if (!PortableLocaltime(static_cast<time_t>(ms / 1000), &time_struct))
- return "";
- // YYYY-MM-DDThh:mm:ss
- return StreamableToString(time_struct.tm_year + 1900) + "-" +
- String::FormatIntWidth2(time_struct.tm_mon + 1) + "-" +
- String::FormatIntWidth2(time_struct.tm_mday) + "T" +
- String::FormatIntWidth2(time_struct.tm_hour) + ":" +
- String::FormatIntWidth2(time_struct.tm_min) + ":" +
- String::FormatIntWidth2(time_struct.tm_sec);
-}
-
-// Streams an XML CDATA section, escaping invalid CDATA sequences as needed.
-void XmlUnitTestResultPrinter::OutputXmlCDataSection(::std::ostream* stream,
- const char* data) {
- const char* segment = data;
- *stream << "<![CDATA[";
- for (;;) {
- const char* const next_segment = strstr(segment, "]]>");
- if (next_segment != NULL) {
- stream->write(
- segment, static_cast<std::streamsize>(next_segment - segment));
- *stream << "]]>]]&gt;<![CDATA[";
- segment = next_segment + strlen("]]>");
- } else {
- *stream << segment;
- break;
- }
- }
- *stream << "]]>";
-}
-
-void XmlUnitTestResultPrinter::OutputXmlAttribute(
- std::ostream* stream,
- const std::string& element_name,
- const std::string& name,
- const std::string& value) {
- const std::vector<std::string>& allowed_names =
- GetReservedAttributesForElement(element_name);
-
- GTEST_CHECK_(std::find(allowed_names.begin(), allowed_names.end(), name) !=
- allowed_names.end())
- << "Attribute " << name << " is not allowed for element <" << element_name
- << ">.";
-
- *stream << " " << name << "=\"" << EscapeXmlAttribute(value) << "\"";
-}
-
-// Prints an XML representation of a TestInfo object.
-// TODO(wan): There is also value in printing properties with the plain printer.
-void XmlUnitTestResultPrinter::OutputXmlTestInfo(::std::ostream* stream,
- const char* test_case_name,
- const TestInfo& test_info) {
- const TestResult& result = *test_info.result();
- const std::string kTestcase = "testcase";
-
- *stream << " <testcase";
- OutputXmlAttribute(stream, kTestcase, "name", test_info.name());
-
- if (test_info.value_param() != NULL) {
- OutputXmlAttribute(stream, kTestcase, "value_param",
- test_info.value_param());
- }
- if (test_info.type_param() != NULL) {
- OutputXmlAttribute(stream, kTestcase, "type_param", test_info.type_param());
- }
-
- OutputXmlAttribute(stream, kTestcase, "status",
- test_info.should_run() ? "run" : "notrun");
- OutputXmlAttribute(stream, kTestcase, "time",
- FormatTimeInMillisAsSeconds(result.elapsed_time()));
- OutputXmlAttribute(stream, kTestcase, "classname", test_case_name);
- *stream << TestPropertiesAsXmlAttributes(result);
-
- int failures = 0;
- for (int i = 0; i < result.total_part_count(); ++i) {
- const TestPartResult& part = result.GetTestPartResult(i);
- if (part.failed()) {
- if (++failures == 1) {
- *stream << ">\n";
- }
- const string location = internal::FormatCompilerIndependentFileLocation(
- part.file_name(), part.line_number());
- const string summary = location + "\n" + part.summary();
- *stream << " <failure message=\""
- << EscapeXmlAttribute(summary.c_str())
- << "\" type=\"\">";
- const string detail = location + "\n" + part.message();
- OutputXmlCDataSection(stream, RemoveInvalidXmlCharacters(detail).c_str());
- *stream << "</failure>\n";
- }
- }
-
- if (failures == 0)
- *stream << " />\n";
- else
- *stream << " </testcase>\n";
-}
-
-// Prints an XML representation of a TestCase object
-void XmlUnitTestResultPrinter::PrintXmlTestCase(std::ostream* stream,
- const TestCase& test_case) {
- const std::string kTestsuite = "testsuite";
- *stream << " <" << kTestsuite;
- OutputXmlAttribute(stream, kTestsuite, "name", test_case.name());
- OutputXmlAttribute(stream, kTestsuite, "tests",
- StreamableToString(test_case.reportable_test_count()));
- OutputXmlAttribute(stream, kTestsuite, "failures",
- StreamableToString(test_case.failed_test_count()));
- OutputXmlAttribute(
- stream, kTestsuite, "disabled",
- StreamableToString(test_case.reportable_disabled_test_count()));
- OutputXmlAttribute(stream, kTestsuite, "errors", "0");
- OutputXmlAttribute(stream, kTestsuite, "time",
- FormatTimeInMillisAsSeconds(test_case.elapsed_time()));
- *stream << TestPropertiesAsXmlAttributes(test_case.ad_hoc_test_result())
- << ">\n";
-
- for (int i = 0; i < test_case.total_test_count(); ++i) {
- if (test_case.GetTestInfo(i)->is_reportable())
- OutputXmlTestInfo(stream, test_case.name(), *test_case.GetTestInfo(i));
- }
- *stream << " </" << kTestsuite << ">\n";
-}
-
-// Prints an XML summary of unit_test to output stream out.
-void XmlUnitTestResultPrinter::PrintXmlUnitTest(std::ostream* stream,
- const UnitTest& unit_test) {
- const std::string kTestsuites = "testsuites";
-
- *stream << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
- *stream << "<" << kTestsuites;
-
- OutputXmlAttribute(stream, kTestsuites, "tests",
- StreamableToString(unit_test.reportable_test_count()));
- OutputXmlAttribute(stream, kTestsuites, "failures",
- StreamableToString(unit_test.failed_test_count()));
- OutputXmlAttribute(
- stream, kTestsuites, "disabled",
- StreamableToString(unit_test.reportable_disabled_test_count()));
- OutputXmlAttribute(stream, kTestsuites, "errors", "0");
- OutputXmlAttribute(
- stream, kTestsuites, "timestamp",
- FormatEpochTimeInMillisAsIso8601(unit_test.start_timestamp()));
- OutputXmlAttribute(stream, kTestsuites, "time",
- FormatTimeInMillisAsSeconds(unit_test.elapsed_time()));
-
- if (GTEST_FLAG(shuffle)) {
- OutputXmlAttribute(stream, kTestsuites, "random_seed",
- StreamableToString(unit_test.random_seed()));
- }
-
- *stream << TestPropertiesAsXmlAttributes(unit_test.ad_hoc_test_result());
-
- OutputXmlAttribute(stream, kTestsuites, "name", "AllTests");
- *stream << ">\n";
-
- for (int i = 0; i < unit_test.total_test_case_count(); ++i) {
- if (unit_test.GetTestCase(i)->reportable_test_count() > 0)
- PrintXmlTestCase(stream, *unit_test.GetTestCase(i));
- }
- *stream << "</" << kTestsuites << ">\n";
-}
-
-// Produces a string representing the test properties in a result as space
-// delimited XML attributes based on the property key="value" pairs.
-std::string XmlUnitTestResultPrinter::TestPropertiesAsXmlAttributes(
- const TestResult& result) {
- Message attributes;
- for (int i = 0; i < result.test_property_count(); ++i) {
- const TestProperty& property = result.GetTestProperty(i);
- attributes << " " << property.key() << "="
- << "\"" << EscapeXmlAttribute(property.value()) << "\"";
- }
- return attributes.GetString();
-}
-
-// End XmlUnitTestResultPrinter
-
-#if GTEST_CAN_STREAM_RESULTS_
-
-// Checks if str contains '=', '&', '%' or '\n' characters. If yes,
-// replaces them by "%xx" where xx is their hexadecimal value. For
-// example, replaces "=" with "%3D". This algorithm is O(strlen(str))
-// in both time and space -- important as the input str may contain an
-// arbitrarily long test failure message and stack trace.
-string StreamingListener::UrlEncode(const char* str) {
- string result;
- result.reserve(strlen(str) + 1);
- for (char ch = *str; ch != '\0'; ch = *++str) {
- switch (ch) {
- case '%':
- case '=':
- case '&':
- case '\n':
- result.append("%" + String::FormatByte(static_cast<unsigned char>(ch)));
- break;
- default:
- result.push_back(ch);
- break;
- }
- }
- return result;
-}
-
-void StreamingListener::SocketWriter::MakeConnection() {
- GTEST_CHECK_(sockfd_ == -1)
- << "MakeConnection() can't be called when there is already a connection.";
-
- addrinfo hints;
- memset(&hints, 0, sizeof(hints));
- hints.ai_family = AF_UNSPEC; // To allow both IPv4 and IPv6 addresses.
- hints.ai_socktype = SOCK_STREAM;
- addrinfo* servinfo = NULL;
-
- // Use the getaddrinfo() to get a linked list of IP addresses for
- // the given host name.
- const int error_num = getaddrinfo(
- host_name_.c_str(), port_num_.c_str(), &hints, &servinfo);
- if (error_num != 0) {
- GTEST_LOG_(WARNING) << "stream_result_to: getaddrinfo() failed: "
- << gai_strerror(error_num);
- }
-
- // Loop through all the results and connect to the first we can.
- for (addrinfo* cur_addr = servinfo; sockfd_ == -1 && cur_addr != NULL;
- cur_addr = cur_addr->ai_next) {
- sockfd_ = socket(
- cur_addr->ai_family, cur_addr->ai_socktype, cur_addr->ai_protocol);
- if (sockfd_ != -1) {
- // Connect the client socket to the server socket.
- if (connect(sockfd_, cur_addr->ai_addr, cur_addr->ai_addrlen) == -1) {
- close(sockfd_);
- sockfd_ = -1;
- }
- }
- }
-
- freeaddrinfo(servinfo); // all done with this structure
-
- if (sockfd_ == -1) {
- GTEST_LOG_(WARNING) << "stream_result_to: failed to connect to "
- << host_name_ << ":" << port_num_;
- }
-}
-
-// End of class Streaming Listener
-#endif // GTEST_CAN_STREAM_RESULTS__
-
-// Class ScopedTrace
-
-// Pushes the given source file location and message onto a per-thread
-// trace stack maintained by Google Test.
-ScopedTrace::ScopedTrace(const char* file, int line, const Message& message)
- GTEST_LOCK_EXCLUDED_(&UnitTest::mutex_) {
- TraceInfo trace;
- trace.file = file;
- trace.line = line;
- trace.message = message.GetString();
-
- UnitTest::GetInstance()->PushGTestTrace(trace);
-}
-
-// Pops the info pushed by the c'tor.
-ScopedTrace::~ScopedTrace()
- GTEST_LOCK_EXCLUDED_(&UnitTest::mutex_) {
- UnitTest::GetInstance()->PopGTestTrace();
-}
-
-
-// class OsStackTraceGetter
-
-const char* const OsStackTraceGetterInterface::kElidedFramesMarker =
- "... " GTEST_NAME_ " internal frames ...";
-
-string OsStackTraceGetter::CurrentStackTrace(int /*max_depth*/,
- int /*skip_count*/) {
- return "";
-}
-
-void OsStackTraceGetter::UponLeavingGTest() {}
-
-// A helper class that creates the premature-exit file in its
-// constructor and deletes the file in its destructor.
-class ScopedPrematureExitFile {
- public:
- explicit ScopedPrematureExitFile(const char* premature_exit_filepath)
- : premature_exit_filepath_(premature_exit_filepath) {
- // If a path to the premature-exit file is specified...
- if (premature_exit_filepath != NULL && *premature_exit_filepath != '\0') {
- // create the file with a single "0" character in it. I/O
- // errors are ignored as there's nothing better we can do and we
- // don't want to fail the test because of this.
- FILE* pfile = posix::FOpen(premature_exit_filepath, "w");
- fwrite("0", 1, 1, pfile);
- fclose(pfile);
- }
- }
-
- ~ScopedPrematureExitFile() {
- if (premature_exit_filepath_ != NULL && *premature_exit_filepath_ != '\0') {
- remove(premature_exit_filepath_);
- }
- }
-
- private:
- const char* const premature_exit_filepath_;
-
- GTEST_DISALLOW_COPY_AND_ASSIGN_(ScopedPrematureExitFile);
-};
-
-} // namespace internal
-
-// class TestEventListeners
-
-TestEventListeners::TestEventListeners()
- : repeater_(new internal::TestEventRepeater()),
- default_result_printer_(NULL),
- default_xml_generator_(NULL) {
-}
-
-TestEventListeners::~TestEventListeners() { delete repeater_; }
-
-// Returns the standard listener responsible for the default console
-// output. Can be removed from the listeners list to shut down default
-// console output. Note that removing this object from the listener list
-// with Release transfers its ownership to the user.
-void TestEventListeners::Append(TestEventListener* listener) {
- repeater_->Append(listener);
-}
-
-// Removes the given event listener from the list and returns it. It then
-// becomes the caller's responsibility to delete the listener. Returns
-// NULL if the listener is not found in the list.
-TestEventListener* TestEventListeners::Release(TestEventListener* listener) {
- if (listener == default_result_printer_)
- default_result_printer_ = NULL;
- else if (listener == default_xml_generator_)
- default_xml_generator_ = NULL;
- return repeater_->Release(listener);
-}
-
-// Returns repeater that broadcasts the TestEventListener events to all
-// subscribers.
-TestEventListener* TestEventListeners::repeater() { return repeater_; }
-
-// Sets the default_result_printer attribute to the provided listener.
-// The listener is also added to the listener list and previous
-// default_result_printer is removed from it and deleted. The listener can
-// also be NULL in which case it will not be added to the list. Does
-// nothing if the previous and the current listener objects are the same.
-void TestEventListeners::SetDefaultResultPrinter(TestEventListener* listener) {
- if (default_result_printer_ != listener) {
- // It is an error to pass this method a listener that is already in the
- // list.
- delete Release(default_result_printer_);
- default_result_printer_ = listener;
- if (listener != NULL)
- Append(listener);
- }
-}
-
-// Sets the default_xml_generator attribute to the provided listener. The
-// listener is also added to the listener list and previous
-// default_xml_generator is removed from it and deleted. The listener can
-// also be NULL in which case it will not be added to the list. Does
-// nothing if the previous and the current listener objects are the same.
-void TestEventListeners::SetDefaultXmlGenerator(TestEventListener* listener) {
- if (default_xml_generator_ != listener) {
- // It is an error to pass this method a listener that is already in the
- // list.
- delete Release(default_xml_generator_);
- default_xml_generator_ = listener;
- if (listener != NULL)
- Append(listener);
- }
-}
-
-// Controls whether events will be forwarded by the repeater to the
-// listeners in the list.
-bool TestEventListeners::EventForwardingEnabled() const {
- return repeater_->forwarding_enabled();
-}
-
-void TestEventListeners::SuppressEventForwarding() {
- repeater_->set_forwarding_enabled(false);
-}
-
-// class UnitTest
-
-// Gets the singleton UnitTest object. The first time this method is
-// called, a UnitTest object is constructed and returned. Consecutive
-// calls will return the same object.
-//
-// We don't protect this under mutex_ as a user is not supposed to
-// call this before main() starts, from which point on the return
-// value will never change.
-UnitTest* UnitTest::GetInstance() {
- // When compiled with MSVC 7.1 in optimized mode, destroying the
- // UnitTest object upon exiting the program messes up the exit code,
- // causing successful tests to appear failed. We have to use a
- // different implementation in this case to bypass the compiler bug.
- // This implementation makes the compiler happy, at the cost of
- // leaking the UnitTest object.
-
- // CodeGear C++Builder insists on a public destructor for the
- // default implementation. Use this implementation to keep good OO
- // design with private destructor.
-
-#if (_MSC_VER == 1310 && !defined(_DEBUG)) || defined(__BORLANDC__)
- static UnitTest* const instance = new UnitTest;
- return instance;
-#else
- static UnitTest instance;
- return &instance;
-#endif // (_MSC_VER == 1310 && !defined(_DEBUG)) || defined(__BORLANDC__)
-}
-
-// Gets the number of successful test cases.
-int UnitTest::successful_test_case_count() const {
- return impl()->successful_test_case_count();
-}
-
-// Gets the number of failed test cases.
-int UnitTest::failed_test_case_count() const {
- return impl()->failed_test_case_count();
-}
-
-// Gets the number of all test cases.
-int UnitTest::total_test_case_count() const {
- return impl()->total_test_case_count();
-}
-
-// Gets the number of all test cases that contain at least one test
-// that should run.
-int UnitTest::test_case_to_run_count() const {
- return impl()->test_case_to_run_count();
-}
-
-// Gets the number of successful tests.
-int UnitTest::successful_test_count() const {
- return impl()->successful_test_count();
-}
-
-// Gets the number of failed tests.
-int UnitTest::failed_test_count() const { return impl()->failed_test_count(); }
-
-// Gets the number of disabled tests that will be reported in the XML report.
-int UnitTest::reportable_disabled_test_count() const {
- return impl()->reportable_disabled_test_count();
-}
-
-// Gets the number of disabled tests.
-int UnitTest::disabled_test_count() const {
- return impl()->disabled_test_count();
-}
-
-// Gets the number of tests to be printed in the XML report.
-int UnitTest::reportable_test_count() const {
- return impl()->reportable_test_count();
-}
-
-// Gets the number of all tests.
-int UnitTest::total_test_count() const { return impl()->total_test_count(); }
-
-// Gets the number of tests that should run.
-int UnitTest::test_to_run_count() const { return impl()->test_to_run_count(); }
-
-// Gets the time of the test program start, in ms from the start of the
-// UNIX epoch.
-internal::TimeInMillis UnitTest::start_timestamp() const {
- return impl()->start_timestamp();
-}
-
-// Gets the elapsed time, in milliseconds.
-internal::TimeInMillis UnitTest::elapsed_time() const {
- return impl()->elapsed_time();
-}
-
-// Returns true iff the unit test passed (i.e. all test cases passed).
-bool UnitTest::Passed() const { return impl()->Passed(); }
-
-// Returns true iff the unit test failed (i.e. some test case failed
-// or something outside of all tests failed).
-bool UnitTest::Failed() const { return impl()->Failed(); }
-
-// Gets the i-th test case among all the test cases. i can range from 0 to
-// total_test_case_count() - 1. If i is not in that range, returns NULL.
-const TestCase* UnitTest::GetTestCase(int i) const {
- return impl()->GetTestCase(i);
-}
-
-// Returns the TestResult containing information on test failures and
-// properties logged outside of individual test cases.
-const TestResult& UnitTest::ad_hoc_test_result() const {
- return *impl()->ad_hoc_test_result();
-}
-
-// Gets the i-th test case among all the test cases. i can range from 0 to
-// total_test_case_count() - 1. If i is not in that range, returns NULL.
-TestCase* UnitTest::GetMutableTestCase(int i) {
- return impl()->GetMutableTestCase(i);
-}
-
-// Returns the list of event listeners that can be used to track events
-// inside Google Test.
-TestEventListeners& UnitTest::listeners() {
- return *impl()->listeners();
-}
-
-// Registers and returns a global test environment. When a test
-// program is run, all global test environments will be set-up in the
-// order they were registered. After all tests in the program have
-// finished, all global test environments will be torn-down in the
-// *reverse* order they were registered.
-//
-// The UnitTest object takes ownership of the given environment.
-//
-// We don't protect this under mutex_, as we only support calling it
-// from the main thread.
-Environment* UnitTest::AddEnvironment(Environment* env) {
- if (env == NULL) {
- return NULL;
- }
-
- impl_->environments().push_back(env);
- return env;
-}
-
-// Adds a TestPartResult to the current TestResult object. All Google Test
-// assertion macros (e.g. ASSERT_TRUE, EXPECT_EQ, etc) eventually call
-// this to report their results. The user code should use the
-// assertion macros instead of calling this directly.
-void UnitTest::AddTestPartResult(
- TestPartResult::Type result_type,
- const char* file_name,
- int line_number,
- const std::string& message,
- const std::string& os_stack_trace) GTEST_LOCK_EXCLUDED_(mutex_) {
- Message msg;
- msg << message;
-
- internal::MutexLock lock(&mutex_);
- if (impl_->gtest_trace_stack().size() > 0) {
- msg << "\n" << GTEST_NAME_ << " trace:";
-
- for (int i = static_cast<int>(impl_->gtest_trace_stack().size());
- i > 0; --i) {
- const internal::TraceInfo& trace = impl_->gtest_trace_stack()[i - 1];
- msg << "\n" << internal::FormatFileLocation(trace.file, trace.line)
- << " " << trace.message;
- }
- }
-
- if (os_stack_trace.c_str() != NULL && !os_stack_trace.empty()) {
- msg << internal::kStackTraceMarker << os_stack_trace;
- }
-
- const TestPartResult result =
- TestPartResult(result_type, file_name, line_number,
- msg.GetString().c_str());
- impl_->GetTestPartResultReporterForCurrentThread()->
- ReportTestPartResult(result);
-
- if (result_type != TestPartResult::kSuccess) {
- // gtest_break_on_failure takes precedence over
- // gtest_throw_on_failure. This allows a user to set the latter
- // in the code (perhaps in order to use Google Test assertions
- // with another testing framework) and specify the former on the
- // command line for debugging.
- if (GTEST_FLAG(break_on_failure)) {
-#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT
- // Using DebugBreak on Windows allows gtest to still break into a debugger
- // when a failure happens and both the --gtest_break_on_failure and
- // the --gtest_catch_exceptions flags are specified.
- DebugBreak();
-#else
- // Dereference NULL through a volatile pointer to prevent the compiler
- // from removing. We use this rather than abort() or __builtin_trap() for
- // portability: Symbian doesn't implement abort() well, and some debuggers
- // don't correctly trap abort().
- *static_cast<volatile int*>(NULL) = 1;
-#endif // GTEST_OS_WINDOWS
- } else if (GTEST_FLAG(throw_on_failure)) {
-#if GTEST_HAS_EXCEPTIONS
- throw internal::GoogleTestFailureException(result);
-#else
- // We cannot call abort() as it generates a pop-up in debug mode
- // that cannot be suppressed in VC 7.1 or below.
- exit(1);
-#endif
- }
- }
-}
-
-// Adds a TestProperty to the current TestResult object when invoked from
-// inside a test, to current TestCase's ad_hoc_test_result_ when invoked
-// from SetUpTestCase or TearDownTestCase, or to the global property set
-// when invoked elsewhere. If the result already contains a property with
-// the same key, the value will be updated.
-void UnitTest::RecordProperty(const std::string& key,
- const std::string& value) {
- impl_->RecordProperty(TestProperty(key, value));
-}
-
-// Runs all tests in this UnitTest object and prints the result.
-// Returns 0 if successful, or 1 otherwise.
-//
-// We don't protect this under mutex_, as we only support calling it
-// from the main thread.
-int UnitTest::Run() {
- const bool in_death_test_child_process =
- internal::GTEST_FLAG(internal_run_death_test).length() > 0;
-
- // Google Test implements this protocol for catching that a test
- // program exits before returning control to Google Test:
- //
- // 1. Upon start, Google Test creates a file whose absolute path
- // is specified by the environment variable
- // TEST_PREMATURE_EXIT_FILE.
- // 2. When Google Test has finished its work, it deletes the file.
- //
- // This allows a test runner to set TEST_PREMATURE_EXIT_FILE before
- // running a Google-Test-based test program and check the existence
- // of the file at the end of the test execution to see if it has
- // exited prematurely.
-
- // If we are in the child process of a death test, don't
- // create/delete the premature exit file, as doing so is unnecessary
- // and will confuse the parent process. Otherwise, create/delete
- // the file upon entering/leaving this function. If the program
- // somehow exits before this function has a chance to return, the
- // premature-exit file will be left undeleted, causing a test runner
- // that understands the premature-exit-file protocol to report the
- // test as having failed.
- const internal::ScopedPrematureExitFile premature_exit_file(
- in_death_test_child_process ?
- NULL : internal::posix::GetEnv("TEST_PREMATURE_EXIT_FILE"));
-
- // Captures the value of GTEST_FLAG(catch_exceptions). This value will be
- // used for the duration of the program.
- impl()->set_catch_exceptions(GTEST_FLAG(catch_exceptions));
-
-#if GTEST_HAS_SEH
- // Either the user wants Google Test to catch exceptions thrown by the
- // tests or this is executing in the context of death test child
- // process. In either case the user does not want to see pop-up dialogs
- // about crashes - they are expected.
- if (impl()->catch_exceptions() || in_death_test_child_process) {
-# if !GTEST_OS_WINDOWS_MOBILE && !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT
- // SetErrorMode doesn't exist on CE.
- SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOALIGNMENTFAULTEXCEPT |
- SEM_NOGPFAULTERRORBOX | SEM_NOOPENFILEERRORBOX);
-# endif // !GTEST_OS_WINDOWS_MOBILE
-
-# if (defined(_MSC_VER) || GTEST_OS_WINDOWS_MINGW) && !GTEST_OS_WINDOWS_MOBILE
- // Death test children can be terminated with _abort(). On Windows,
- // _abort() can show a dialog with a warning message. This forces the
- // abort message to go to stderr instead.
- _set_error_mode(_OUT_TO_STDERR);
-# endif
-
-# if _MSC_VER >= 1400 && !GTEST_OS_WINDOWS_MOBILE
- // In the debug version, Visual Studio pops up a separate dialog
- // offering a choice to debug the aborted program. We need to suppress
- // this dialog or it will pop up for every EXPECT/ASSERT_DEATH statement
- // executed. Google Test will notify the user of any unexpected
- // failure via stderr.
- //
- // VC++ doesn't define _set_abort_behavior() prior to the version 8.0.
- // Users of prior VC versions shall suffer the agony and pain of
- // clicking through the countless debug dialogs.
- // TODO(vladl@google.com): find a way to suppress the abort dialog() in the
- // debug mode when compiled with VC 7.1 or lower.
- if (!GTEST_FLAG(break_on_failure))
- _set_abort_behavior(
- 0x0, // Clear the following flags:
- _WRITE_ABORT_MSG | _CALL_REPORTFAULT); // pop-up window, core dump.
-# endif
- }
-#endif // GTEST_HAS_SEH
-
- return internal::HandleExceptionsInMethodIfSupported(
- impl(),
- &internal::UnitTestImpl::RunAllTests,
- "auxiliary test code (environments or event listeners)") ? 0 : 1;
-}
-
-// Returns the working directory when the first TEST() or TEST_F() was
-// executed.
-const char* UnitTest::original_working_dir() const {
- return impl_->original_working_dir_.c_str();
-}
-
-// Returns the TestCase object for the test that's currently running,
-// or NULL if no test is running.
-const TestCase* UnitTest::current_test_case() const
- GTEST_LOCK_EXCLUDED_(mutex_) {
- internal::MutexLock lock(&mutex_);
- return impl_->current_test_case();
-}
-
-// Returns the TestInfo object for the test that's currently running,
-// or NULL if no test is running.
-const TestInfo* UnitTest::current_test_info() const
- GTEST_LOCK_EXCLUDED_(mutex_) {
- internal::MutexLock lock(&mutex_);
- return impl_->current_test_info();
-}
-
-// Returns the random seed used at the start of the current test run.
-int UnitTest::random_seed() const { return impl_->random_seed(); }
-
-#if GTEST_HAS_PARAM_TEST
-// Returns ParameterizedTestCaseRegistry object used to keep track of
-// value-parameterized tests and instantiate and register them.
-internal::ParameterizedTestCaseRegistry&
- UnitTest::parameterized_test_registry()
- GTEST_LOCK_EXCLUDED_(mutex_) {
- return impl_->parameterized_test_registry();
-}
-#endif // GTEST_HAS_PARAM_TEST
-
-// Creates an empty UnitTest.
-UnitTest::UnitTest() {
- impl_ = new internal::UnitTestImpl(this);
-}
-
-// Destructor of UnitTest.
-UnitTest::~UnitTest() {
- delete impl_;
-}
-
-// Pushes a trace defined by SCOPED_TRACE() on to the per-thread
-// Google Test trace stack.
-void UnitTest::PushGTestTrace(const internal::TraceInfo& trace)
- GTEST_LOCK_EXCLUDED_(mutex_) {
- internal::MutexLock lock(&mutex_);
- impl_->gtest_trace_stack().push_back(trace);
-}
-
-// Pops a trace from the per-thread Google Test trace stack.
-void UnitTest::PopGTestTrace()
- GTEST_LOCK_EXCLUDED_(mutex_) {
- internal::MutexLock lock(&mutex_);
- impl_->gtest_trace_stack().pop_back();
-}
-
-namespace internal {
-
-UnitTestImpl::UnitTestImpl(UnitTest* parent)
- : parent_(parent),
- GTEST_DISABLE_MSC_WARNINGS_PUSH_(4355 /* using this in initializer */)
- default_global_test_part_result_reporter_(this),
- default_per_thread_test_part_result_reporter_(this),
- GTEST_DISABLE_MSC_WARNINGS_POP_()
- global_test_part_result_repoter_(
- &default_global_test_part_result_reporter_),
- per_thread_test_part_result_reporter_(
- &default_per_thread_test_part_result_reporter_),
-#if GTEST_HAS_PARAM_TEST
- parameterized_test_registry_(),
- parameterized_tests_registered_(false),
-#endif // GTEST_HAS_PARAM_TEST
- last_death_test_case_(-1),
- current_test_case_(NULL),
- current_test_info_(NULL),
- ad_hoc_test_result_(),
- os_stack_trace_getter_(NULL),
- post_flag_parse_init_performed_(false),
- random_seed_(0), // Will be overridden by the flag before first use.
- random_(0), // Will be reseeded before first use.
- start_timestamp_(0),
- elapsed_time_(0),
-#if GTEST_HAS_DEATH_TEST
- death_test_factory_(new DefaultDeathTestFactory),
-#endif
- // Will be overridden by the flag before first use.
- catch_exceptions_(false) {
- listeners()->SetDefaultResultPrinter(new PrettyUnitTestResultPrinter);
-}
-
-UnitTestImpl::~UnitTestImpl() {
- // Deletes every TestCase.
- ForEach(test_cases_, internal::Delete<TestCase>);
-
- // Deletes every Environment.
- ForEach(environments_, internal::Delete<Environment>);
-
- delete os_stack_trace_getter_;
-}
-
-// Adds a TestProperty to the current TestResult object when invoked in a
-// context of a test, to current test case's ad_hoc_test_result when invoke
-// from SetUpTestCase/TearDownTestCase, or to the global property set
-// otherwise. If the result already contains a property with the same key,
-// the value will be updated.
-void UnitTestImpl::RecordProperty(const TestProperty& test_property) {
- std::string xml_element;
- TestResult* test_result; // TestResult appropriate for property recording.
-
- if (current_test_info_ != NULL) {
- xml_element = "testcase";
- test_result = &(current_test_info_->result_);
- } else if (current_test_case_ != NULL) {
- xml_element = "testsuite";
- test_result = &(current_test_case_->ad_hoc_test_result_);
- } else {
- xml_element = "testsuites";
- test_result = &ad_hoc_test_result_;
- }
- test_result->RecordProperty(xml_element, test_property);
-}
-
-#if GTEST_HAS_DEATH_TEST
-// Disables event forwarding if the control is currently in a death test
-// subprocess. Must not be called before InitGoogleTest.
-void UnitTestImpl::SuppressTestEventsIfInSubprocess() {
- if (internal_run_death_test_flag_.get() != NULL)
- listeners()->SuppressEventForwarding();
-}
-#endif // GTEST_HAS_DEATH_TEST
-
-// Initializes event listeners performing XML output as specified by
-// UnitTestOptions. Must not be called before InitGoogleTest.
-void UnitTestImpl::ConfigureXmlOutput() {
- const std::string& output_format = UnitTestOptions::GetOutputFormat();
- if (output_format == "xml") {
- listeners()->SetDefaultXmlGenerator(new XmlUnitTestResultPrinter(
- UnitTestOptions::GetAbsolutePathToOutputFile().c_str()));
- } else if (output_format != "") {
- printf("WARNING: unrecognized output format \"%s\" ignored.\n",
- output_format.c_str());
- fflush(stdout);
- }
-}
-
-#if GTEST_CAN_STREAM_RESULTS_
-// Initializes event listeners for streaming test results in string form.
-// Must not be called before InitGoogleTest.
-void UnitTestImpl::ConfigureStreamingOutput() {
- const std::string& target = GTEST_FLAG(stream_result_to);
- if (!target.empty()) {
- const size_t pos = target.find(':');
- if (pos != std::string::npos) {
- listeners()->Append(new StreamingListener(target.substr(0, pos),
- target.substr(pos+1)));
- } else {
- printf("WARNING: unrecognized streaming target \"%s\" ignored.\n",
- target.c_str());
- fflush(stdout);
- }
- }
-}
-#endif // GTEST_CAN_STREAM_RESULTS_
-
-// Performs initialization dependent upon flag values obtained in
-// ParseGoogleTestFlagsOnly. Is called from InitGoogleTest after the call to
-// ParseGoogleTestFlagsOnly. In case a user neglects to call InitGoogleTest
-// this function is also called from RunAllTests. Since this function can be
-// called more than once, it has to be idempotent.
-void UnitTestImpl::PostFlagParsingInit() {
- // Ensures that this function does not execute more than once.
- if (!post_flag_parse_init_performed_) {
- post_flag_parse_init_performed_ = true;
-
-#if defined(GTEST_CUSTOM_TEST_EVENT_LISTENER_)
- // Register to send notifications about key process state changes.
- listeners()->Append(new GTEST_CUSTOM_TEST_EVENT_LISTENER_());
-#endif // defined(GTEST_CUSTOM_TEST_EVENT_LISTENER_)
-
-#if GTEST_HAS_DEATH_TEST
- InitDeathTestSubprocessControlInfo();
- SuppressTestEventsIfInSubprocess();
-#endif // GTEST_HAS_DEATH_TEST
-
- // Registers parameterized tests. This makes parameterized tests
- // available to the UnitTest reflection API without running
- // RUN_ALL_TESTS.
- RegisterParameterizedTests();
-
- // Configures listeners for XML output. This makes it possible for users
- // to shut down the default XML output before invoking RUN_ALL_TESTS.
- ConfigureXmlOutput();
-
-#if GTEST_CAN_STREAM_RESULTS_
- // Configures listeners for streaming test results to the specified server.
- ConfigureStreamingOutput();
-#endif // GTEST_CAN_STREAM_RESULTS_
- }
-}
-
-// A predicate that checks the name of a TestCase against a known
-// value.
-//
-// This is used for implementation of the UnitTest class only. We put
-// it in the anonymous namespace to prevent polluting the outer
-// namespace.
-//
-// TestCaseNameIs is copyable.
-class TestCaseNameIs {
- public:
- // Constructor.
- explicit TestCaseNameIs(const std::string& name)
- : name_(name) {}
-
- // Returns true iff the name of test_case matches name_.
- bool operator()(const TestCase* test_case) const {
- return test_case != NULL && strcmp(test_case->name(), name_.c_str()) == 0;
- }
-
- private:
- std::string name_;
-};
-
-// Finds and returns a TestCase with the given name. If one doesn't
-// exist, creates one and returns it. It's the CALLER'S
-// RESPONSIBILITY to ensure that this function is only called WHEN THE
-// TESTS ARE NOT SHUFFLED.
-//
-// Arguments:
-//
-// test_case_name: name of the test case
-// type_param: the name of the test case's type parameter, or NULL if
-// this is not a typed or a type-parameterized test case.
-// set_up_tc: pointer to the function that sets up the test case
-// tear_down_tc: pointer to the function that tears down the test case
-TestCase* UnitTestImpl::GetTestCase(const char* test_case_name,
- const char* type_param,
- Test::SetUpTestCaseFunc set_up_tc,
- Test::TearDownTestCaseFunc tear_down_tc) {
- // Can we find a TestCase with the given name?
- const std::vector<TestCase*>::const_iterator test_case =
- std::find_if(test_cases_.begin(), test_cases_.end(),
- TestCaseNameIs(test_case_name));
-
- if (test_case != test_cases_.end())
- return *test_case;
-
- // No. Let's create one.
- TestCase* const new_test_case =
- new TestCase(test_case_name, type_param, set_up_tc, tear_down_tc);
-
- // Is this a death test case?
- if (internal::UnitTestOptions::MatchesFilter(test_case_name,
- kDeathTestCaseFilter)) {
- // Yes. Inserts the test case after the last death test case
- // defined so far. This only works when the test cases haven't
- // been shuffled. Otherwise we may end up running a death test
- // after a non-death test.
- ++last_death_test_case_;
- test_cases_.insert(test_cases_.begin() + last_death_test_case_,
- new_test_case);
- } else {
- // No. Appends to the end of the list.
- test_cases_.push_back(new_test_case);
- }
-
- test_case_indices_.push_back(static_cast<int>(test_case_indices_.size()));
- return new_test_case;
-}
-
-// Helpers for setting up / tearing down the given environment. They
-// are for use in the ForEach() function.
-static void SetUpEnvironment(Environment* env) { env->SetUp(); }
-static void TearDownEnvironment(Environment* env) { env->TearDown(); }
-
-// Runs all tests in this UnitTest object, prints the result, and
-// returns true if all tests are successful. If any exception is
-// thrown during a test, the test is considered to be failed, but the
-// rest of the tests will still be run.
-//
-// When parameterized tests are enabled, it expands and registers
-// parameterized tests first in RegisterParameterizedTests().
-// All other functions called from RunAllTests() may safely assume that
-// parameterized tests are ready to be counted and run.
-bool UnitTestImpl::RunAllTests() {
- // Makes sure InitGoogleTest() was called.
- if (!GTestIsInitialized()) {
- printf("%s",
- "\nThis test program did NOT call ::testing::InitGoogleTest "
- "before calling RUN_ALL_TESTS(). Please fix it.\n");
- return false;
- }
-
- // Do not run any test if the --help flag was specified.
- if (g_help_flag)
- return true;
-
- // Repeats the call to the post-flag parsing initialization in case the
- // user didn't call InitGoogleTest.
- PostFlagParsingInit();
-
- // Even if sharding is not on, test runners may want to use the
- // GTEST_SHARD_STATUS_FILE to query whether the test supports the sharding
- // protocol.
- internal::WriteToShardStatusFileIfNeeded();
-
- // True iff we are in a subprocess for running a thread-safe-style
- // death test.
- bool in_subprocess_for_death_test = false;
-
-#if GTEST_HAS_DEATH_TEST
- in_subprocess_for_death_test = (internal_run_death_test_flag_.get() != NULL);
-# if defined(GTEST_EXTRA_DEATH_TEST_CHILD_SETUP_)
- if (in_subprocess_for_death_test) {
- GTEST_EXTRA_DEATH_TEST_CHILD_SETUP_();
- }
-# endif // defined(GTEST_EXTRA_DEATH_TEST_CHILD_SETUP_)
-#endif // GTEST_HAS_DEATH_TEST
-
- const bool should_shard = ShouldShard(kTestTotalShards, kTestShardIndex,
- in_subprocess_for_death_test);
-
- // Compares the full test names with the filter to decide which
- // tests to run.
- const bool has_tests_to_run = FilterTests(should_shard
- ? HONOR_SHARDING_PROTOCOL
- : IGNORE_SHARDING_PROTOCOL) > 0;
-
- // Lists the tests and exits if the --gtest_list_tests flag was specified.
- if (GTEST_FLAG(list_tests)) {
- // This must be called *after* FilterTests() has been called.
- ListTestsMatchingFilter();
- return true;
- }
-
- random_seed_ = GTEST_FLAG(shuffle) ?
- GetRandomSeedFromFlag(GTEST_FLAG(random_seed)) : 0;
-
- // True iff at least one test has failed.
- bool failed = false;
-
- TestEventListener* repeater = listeners()->repeater();
-
- start_timestamp_ = GetTimeInMillis();
- repeater->OnTestProgramStart(*parent_);
-
- // How many times to repeat the tests? We don't want to repeat them
- // when we are inside the subprocess of a death test.
- const int repeat = in_subprocess_for_death_test ? 1 : GTEST_FLAG(repeat);
- // Repeats forever if the repeat count is negative.
- const bool forever = repeat < 0;
- for (int i = 0; forever || i != repeat; i++) {
- // We want to preserve failures generated by ad-hoc test
- // assertions executed before RUN_ALL_TESTS().
- ClearNonAdHocTestResult();
-
- const TimeInMillis start = GetTimeInMillis();
-
- // Shuffles test cases and tests if requested.
- if (has_tests_to_run && GTEST_FLAG(shuffle)) {
- random()->Reseed(random_seed_);
- // This should be done before calling OnTestIterationStart(),
- // such that a test event listener can see the actual test order
- // in the event.
- ShuffleTests();
- }
-
- // Tells the unit test event listeners that the tests are about to start.
- repeater->OnTestIterationStart(*parent_, i);
-
- // Runs each test case if there is at least one test to run.
- if (has_tests_to_run) {
- // Sets up all environments beforehand.
- repeater->OnEnvironmentsSetUpStart(*parent_);
- ForEach(environments_, SetUpEnvironment);
- repeater->OnEnvironmentsSetUpEnd(*parent_);
-
- // Runs the tests only if there was no fatal failure during global
- // set-up.
- if (!Test::HasFatalFailure()) {
- for (int test_index = 0; test_index < total_test_case_count();
- test_index++) {
- GetMutableTestCase(test_index)->Run();
- }
- }
-
- // Tears down all environments in reverse order afterwards.
- repeater->OnEnvironmentsTearDownStart(*parent_);
- std::for_each(environments_.rbegin(), environments_.rend(),
- TearDownEnvironment);
- repeater->OnEnvironmentsTearDownEnd(*parent_);
- }
-
- elapsed_time_ = GetTimeInMillis() - start;
-
- // Tells the unit test event listener that the tests have just finished.
- repeater->OnTestIterationEnd(*parent_, i);
-
- // Gets the result and clears it.
- if (!Passed()) {
- failed = true;
- }
-
- // Restores the original test order after the iteration. This
- // allows the user to quickly repro a failure that happens in the
- // N-th iteration without repeating the first (N - 1) iterations.
- // This is not enclosed in "if (GTEST_FLAG(shuffle)) { ... }", in
- // case the user somehow changes the value of the flag somewhere
- // (it's always safe to unshuffle the tests).
- UnshuffleTests();
-
- if (GTEST_FLAG(shuffle)) {
- // Picks a new random seed for each iteration.
- random_seed_ = GetNextRandomSeed(random_seed_);
- }
- }
-
- repeater->OnTestProgramEnd(*parent_);
-
- return !failed;
-}
-
-// Reads the GTEST_SHARD_STATUS_FILE environment variable, and creates the file
-// if the variable is present. If a file already exists at this location, this
-// function will write over it. If the variable is present, but the file cannot
-// be created, prints an error and exits.
-void WriteToShardStatusFileIfNeeded() {
- const char* const test_shard_file = posix::GetEnv(kTestShardStatusFile);
- if (test_shard_file != NULL) {
- FILE* const file = posix::FOpen(test_shard_file, "w");
- if (file == NULL) {
- ColoredPrintf(COLOR_RED,
- "Could not write to the test shard status file \"%s\" "
- "specified by the %s environment variable.\n",
- test_shard_file, kTestShardStatusFile);
- fflush(stdout);
- exit(EXIT_FAILURE);
- }
- fclose(file);
- }
-}
-
-// Checks whether sharding is enabled by examining the relevant
-// environment variable values. If the variables are present,
-// but inconsistent (i.e., shard_index >= total_shards), prints
-// an error and exits. If in_subprocess_for_death_test, sharding is
-// disabled because it must only be applied to the original test
-// process. Otherwise, we could filter out death tests we intended to execute.
-bool ShouldShard(const char* total_shards_env,
- const char* shard_index_env,
- bool in_subprocess_for_death_test) {
- if (in_subprocess_for_death_test) {
- return false;
- }
-
- const Int32 total_shards = Int32FromEnvOrDie(total_shards_env, -1);
- const Int32 shard_index = Int32FromEnvOrDie(shard_index_env, -1);
-
- if (total_shards == -1 && shard_index == -1) {
- return false;
- } else if (total_shards == -1 && shard_index != -1) {
- const Message msg = Message()
- << "Invalid environment variables: you have "
- << kTestShardIndex << " = " << shard_index
- << ", but have left " << kTestTotalShards << " unset.\n";
- ColoredPrintf(COLOR_RED, msg.GetString().c_str());
- fflush(stdout);
- exit(EXIT_FAILURE);
- } else if (total_shards != -1 && shard_index == -1) {
- const Message msg = Message()
- << "Invalid environment variables: you have "
- << kTestTotalShards << " = " << total_shards
- << ", but have left " << kTestShardIndex << " unset.\n";
- ColoredPrintf(COLOR_RED, msg.GetString().c_str());
- fflush(stdout);
- exit(EXIT_FAILURE);
- } else if (shard_index < 0 || shard_index >= total_shards) {
- const Message msg = Message()
- << "Invalid environment variables: we require 0 <= "
- << kTestShardIndex << " < " << kTestTotalShards
- << ", but you have " << kTestShardIndex << "=" << shard_index
- << ", " << kTestTotalShards << "=" << total_shards << ".\n";
- ColoredPrintf(COLOR_RED, msg.GetString().c_str());
- fflush(stdout);
- exit(EXIT_FAILURE);
- }
-
- return total_shards > 1;
-}
-
-// Parses the environment variable var as an Int32. If it is unset,
-// returns default_val. If it is not an Int32, prints an error
-// and aborts.
-Int32 Int32FromEnvOrDie(const char* var, Int32 default_val) {
- const char* str_val = posix::GetEnv(var);
- if (str_val == NULL) {
- return default_val;
- }
-
- Int32 result;
- if (!ParseInt32(Message() << "The value of environment variable " << var,
- str_val, &result)) {
- exit(EXIT_FAILURE);
- }
- return result;
-}
-
-// Given the total number of shards, the shard index, and the test id,
-// returns true iff the test should be run on this shard. The test id is
-// some arbitrary but unique non-negative integer assigned to each test
-// method. Assumes that 0 <= shard_index < total_shards.
-bool ShouldRunTestOnShard(int total_shards, int shard_index, int test_id) {
- return (test_id % total_shards) == shard_index;
-}
-
-// Compares the name of each test with the user-specified filter to
-// decide whether the test should be run, then records the result in
-// each TestCase and TestInfo object.
-// If shard_tests == true, further filters tests based on sharding
-// variables in the environment - see
-// http://code.google.com/p/googletest/wiki/GoogleTestAdvancedGuide.
-// Returns the number of tests that should run.
-int UnitTestImpl::FilterTests(ReactionToSharding shard_tests) {
- const Int32 total_shards = shard_tests == HONOR_SHARDING_PROTOCOL ?
- Int32FromEnvOrDie(kTestTotalShards, -1) : -1;
- const Int32 shard_index = shard_tests == HONOR_SHARDING_PROTOCOL ?
- Int32FromEnvOrDie(kTestShardIndex, -1) : -1;
-
- // num_runnable_tests are the number of tests that will
- // run across all shards (i.e., match filter and are not disabled).
- // num_selected_tests are the number of tests to be run on
- // this shard.
- int num_runnable_tests = 0;
- int num_selected_tests = 0;
- for (size_t i = 0; i < test_cases_.size(); i++) {
- TestCase* const test_case = test_cases_[i];
- const std::string &test_case_name = test_case->name();
- test_case->set_should_run(false);
-
- for (size_t j = 0; j < test_case->test_info_list().size(); j++) {
- TestInfo* const test_info = test_case->test_info_list()[j];
- const std::string test_name(test_info->name());
- // A test is disabled if test case name or test name matches
- // kDisableTestFilter.
- const bool is_disabled =
- internal::UnitTestOptions::MatchesFilter(test_case_name,
- kDisableTestFilter) ||
- internal::UnitTestOptions::MatchesFilter(test_name,
- kDisableTestFilter);
- test_info->is_disabled_ = is_disabled;
-
- const bool matches_filter =
- internal::UnitTestOptions::FilterMatchesTest(test_case_name,
- test_name);
- test_info->matches_filter_ = matches_filter;
-
- const bool is_runnable =
- (GTEST_FLAG(also_run_disabled_tests) || !is_disabled) &&
- matches_filter;
-
- const bool is_selected = is_runnable &&
- (shard_tests == IGNORE_SHARDING_PROTOCOL ||
- ShouldRunTestOnShard(total_shards, shard_index,
- num_runnable_tests));
-
- num_runnable_tests += is_runnable;
- num_selected_tests += is_selected;
-
- test_info->should_run_ = is_selected;
- test_case->set_should_run(test_case->should_run() || is_selected);
- }
- }
- return num_selected_tests;
-}
-
-// Prints the given C-string on a single line by replacing all '\n'
-// characters with string "\\n". If the output takes more than
-// max_length characters, only prints the first max_length characters
-// and "...".
-static void PrintOnOneLine(const char* str, int max_length) {
- if (str != NULL) {
- for (int i = 0; *str != '\0'; ++str) {
- if (i >= max_length) {
- printf("...");
- break;
- }
- if (*str == '\n') {
- printf("\\n");
- i += 2;
- } else {
- printf("%c", *str);
- ++i;
- }
- }
- }
-}
-
-// Prints the names of the tests matching the user-specified filter flag.
-void UnitTestImpl::ListTestsMatchingFilter() {
- // Print at most this many characters for each type/value parameter.
- const int kMaxParamLength = 250;
-
- for (size_t i = 0; i < test_cases_.size(); i++) {
- const TestCase* const test_case = test_cases_[i];
- bool printed_test_case_name = false;
-
- for (size_t j = 0; j < test_case->test_info_list().size(); j++) {
- const TestInfo* const test_info =
- test_case->test_info_list()[j];
- if (test_info->matches_filter_) {
- if (!printed_test_case_name) {
- printed_test_case_name = true;
- printf("%s.", test_case->name());
- if (test_case->type_param() != NULL) {
- printf(" # %s = ", kTypeParamLabel);
- // We print the type parameter on a single line to make
- // the output easy to parse by a program.
- PrintOnOneLine(test_case->type_param(), kMaxParamLength);
- }
- printf("\n");
- }
- printf(" %s", test_info->name());
- if (test_info->value_param() != NULL) {
- printf(" # %s = ", kValueParamLabel);
- // We print the value parameter on a single line to make the
- // output easy to parse by a program.
- PrintOnOneLine(test_info->value_param(), kMaxParamLength);
- }
- printf("\n");
- }
- }
- }
- fflush(stdout);
-}
-
-// Sets the OS stack trace getter.
-//
-// Does nothing if the input and the current OS stack trace getter are
-// the same; otherwise, deletes the old getter and makes the input the
-// current getter.
-void UnitTestImpl::set_os_stack_trace_getter(
- OsStackTraceGetterInterface* getter) {
- if (os_stack_trace_getter_ != getter) {
- delete os_stack_trace_getter_;
- os_stack_trace_getter_ = getter;
- }
-}
-
-// Returns the current OS stack trace getter if it is not NULL;
-// otherwise, creates an OsStackTraceGetter, makes it the current
-// getter, and returns it.
-OsStackTraceGetterInterface* UnitTestImpl::os_stack_trace_getter() {
- if (os_stack_trace_getter_ == NULL) {
-#ifdef GTEST_OS_STACK_TRACE_GETTER_
- os_stack_trace_getter_ = new GTEST_OS_STACK_TRACE_GETTER_;
-#else
- os_stack_trace_getter_ = new OsStackTraceGetter;
-#endif // GTEST_OS_STACK_TRACE_GETTER_
- }
-
- return os_stack_trace_getter_;
-}
-
-// Returns the TestResult for the test that's currently running, or
-// the TestResult for the ad hoc test if no test is running.
-TestResult* UnitTestImpl::current_test_result() {
- return current_test_info_ ?
- &(current_test_info_->result_) : &ad_hoc_test_result_;
-}
-
-// Shuffles all test cases, and the tests within each test case,
-// making sure that death tests are still run first.
-void UnitTestImpl::ShuffleTests() {
- // Shuffles the death test cases.
- ShuffleRange(random(), 0, last_death_test_case_ + 1, &test_case_indices_);
-
- // Shuffles the non-death test cases.
- ShuffleRange(random(), last_death_test_case_ + 1,
- static_cast<int>(test_cases_.size()), &test_case_indices_);
-
- // Shuffles the tests inside each test case.
- for (size_t i = 0; i < test_cases_.size(); i++) {
- test_cases_[i]->ShuffleTests(random());
- }
-}
-
-// Restores the test cases and tests to their order before the first shuffle.
-void UnitTestImpl::UnshuffleTests() {
- for (size_t i = 0; i < test_cases_.size(); i++) {
- // Unshuffles the tests in each test case.
- test_cases_[i]->UnshuffleTests();
- // Resets the index of each test case.
- test_case_indices_[i] = static_cast<int>(i);
- }
-}
-
-// Returns the current OS stack trace as an std::string.
-//
-// The maximum number of stack frames to be included is specified by
-// the gtest_stack_trace_depth flag. The skip_count parameter
-// specifies the number of top frames to be skipped, which doesn't
-// count against the number of frames to be included.
-//
-// For example, if Foo() calls Bar(), which in turn calls
-// GetCurrentOsStackTraceExceptTop(..., 1), Foo() will be included in
-// the trace but Bar() and GetCurrentOsStackTraceExceptTop() won't.
-std::string GetCurrentOsStackTraceExceptTop(UnitTest* /*unit_test*/,
- int skip_count) {
- // We pass skip_count + 1 to skip this wrapper function in addition
- // to what the user really wants to skip.
- return GetUnitTestImpl()->CurrentOsStackTraceExceptTop(skip_count + 1);
-}
-
-// Used by the GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_ macro to
-// suppress unreachable code warnings.
-namespace {
-class ClassUniqueToAlwaysTrue {};
-}
-
-bool IsTrue(bool condition) { return condition; }
-
-bool AlwaysTrue() {
-#if GTEST_HAS_EXCEPTIONS
- // This condition is always false so AlwaysTrue() never actually throws,
- // but it makes the compiler think that it may throw.
- if (IsTrue(false))
- throw ClassUniqueToAlwaysTrue();
-#endif // GTEST_HAS_EXCEPTIONS
- return true;
-}
-
-// If *pstr starts with the given prefix, modifies *pstr to be right
-// past the prefix and returns true; otherwise leaves *pstr unchanged
-// and returns false. None of pstr, *pstr, and prefix can be NULL.
-bool SkipPrefix(const char* prefix, const char** pstr) {
- const size_t prefix_len = strlen(prefix);
- if (strncmp(*pstr, prefix, prefix_len) == 0) {
- *pstr += prefix_len;
- return true;
- }
- return false;
-}
-
-// Parses a string as a command line flag. The string should have
-// the format "--flag=value". When def_optional is true, the "=value"
-// part can be omitted.
-//
-// Returns the value of the flag, or NULL if the parsing failed.
-const char* ParseFlagValue(const char* str,
- const char* flag,
- bool def_optional) {
- // str and flag must not be NULL.
- if (str == NULL || flag == NULL) return NULL;
-
- // The flag must start with "--" followed by GTEST_FLAG_PREFIX_.
- const std::string flag_str = std::string("--") + GTEST_FLAG_PREFIX_ + flag;
- const size_t flag_len = flag_str.length();
- if (strncmp(str, flag_str.c_str(), flag_len) != 0) return NULL;
-
- // Skips the flag name.
- const char* flag_end = str + flag_len;
-
- // When def_optional is true, it's OK to not have a "=value" part.
- if (def_optional && (flag_end[0] == '\0')) {
- return flag_end;
- }
-
- // If def_optional is true and there are more characters after the
- // flag name, or if def_optional is false, there must be a '=' after
- // the flag name.
- if (flag_end[0] != '=') return NULL;
-
- // Returns the string after "=".
- return flag_end + 1;
-}
-
-// Parses a string for a bool flag, in the form of either
-// "--flag=value" or "--flag".
-//
-// In the former case, the value is taken as true as long as it does
-// not start with '0', 'f', or 'F'.
-//
-// In the latter case, the value is taken as true.
-//
-// On success, stores the value of the flag in *value, and returns
-// true. On failure, returns false without changing *value.
-bool ParseBoolFlag(const char* str, const char* flag, bool* value) {
- // Gets the value of the flag as a string.
- const char* const value_str = ParseFlagValue(str, flag, true);
-
- // Aborts if the parsing failed.
- if (value_str == NULL) return false;
-
- // Converts the string value to a bool.
- *value = !(*value_str == '0' || *value_str == 'f' || *value_str == 'F');
- return true;
-}
-
-// Parses a string for an Int32 flag, in the form of
-// "--flag=value".
-//
-// On success, stores the value of the flag in *value, and returns
-// true. On failure, returns false without changing *value.
-bool ParseInt32Flag(const char* str, const char* flag, Int32* value) {
- // Gets the value of the flag as a string.
- const char* const value_str = ParseFlagValue(str, flag, false);
-
- // Aborts if the parsing failed.
- if (value_str == NULL) return false;
-
- // Sets *value to the value of the flag.
- return ParseInt32(Message() << "The value of flag --" << flag,
- value_str, value);
-}
-
-// Parses a string for a string flag, in the form of
-// "--flag=value".
-//
-// On success, stores the value of the flag in *value, and returns
-// true. On failure, returns false without changing *value.
-bool ParseStringFlag(const char* str, const char* flag, std::string* value) {
- // Gets the value of the flag as a string.
- const char* const value_str = ParseFlagValue(str, flag, false);
-
- // Aborts if the parsing failed.
- if (value_str == NULL) return false;
-
- // Sets *value to the value of the flag.
- *value = value_str;
- return true;
-}
-
-// Determines whether a string has a prefix that Google Test uses for its
-// flags, i.e., starts with GTEST_FLAG_PREFIX_ or GTEST_FLAG_PREFIX_DASH_.
-// If Google Test detects that a command line flag has its prefix but is not
-// recognized, it will print its help message. Flags starting with
-// GTEST_INTERNAL_PREFIX_ followed by "internal_" are considered Google Test
-// internal flags and do not trigger the help message.
-static bool HasGoogleTestFlagPrefix(const char* str) {
- return (SkipPrefix("--", &str) ||
- SkipPrefix("-", &str) ||
- SkipPrefix("/", &str)) &&
- !SkipPrefix(GTEST_FLAG_PREFIX_ "internal_", &str) &&
- (SkipPrefix(GTEST_FLAG_PREFIX_, &str) ||
- SkipPrefix(GTEST_FLAG_PREFIX_DASH_, &str));
-}
-
-// Prints a string containing code-encoded text. The following escape
-// sequences can be used in the string to control the text color:
-//
-// @@ prints a single '@' character.
-// @R changes the color to red.
-// @G changes the color to green.
-// @Y changes the color to yellow.
-// @D changes to the default terminal text color.
-//
-// TODO(wan@google.com): Write tests for this once we add stdout
-// capturing to Google Test.
-static void PrintColorEncoded(const char* str) {
- GTestColor color = COLOR_DEFAULT; // The current color.
-
- // Conceptually, we split the string into segments divided by escape
- // sequences. Then we print one segment at a time. At the end of
- // each iteration, the str pointer advances to the beginning of the
- // next segment.
- for (;;) {
- const char* p = strchr(str, '@');
- if (p == NULL) {
- ColoredPrintf(color, "%s", str);
- return;
- }
-
- ColoredPrintf(color, "%s", std::string(str, p).c_str());
-
- const char ch = p[1];
- str = p + 2;
- if (ch == '@') {
- ColoredPrintf(color, "@");
- } else if (ch == 'D') {
- color = COLOR_DEFAULT;
- } else if (ch == 'R') {
- color = COLOR_RED;
- } else if (ch == 'G') {
- color = COLOR_GREEN;
- } else if (ch == 'Y') {
- color = COLOR_YELLOW;
- } else {
- --str;
- }
- }
-}
-
-static const char kColorEncodedHelpMessage[] =
-"This program contains tests written using " GTEST_NAME_ ". You can use the\n"
-"following command line flags to control its behavior:\n"
-"\n"
-"Test Selection:\n"
-" @G--" GTEST_FLAG_PREFIX_ "list_tests@D\n"
-" List the names of all tests instead of running them. The name of\n"
-" TEST(Foo, Bar) is \"Foo.Bar\".\n"
-" @G--" GTEST_FLAG_PREFIX_ "filter=@YPOSTIVE_PATTERNS"
- "[@G-@YNEGATIVE_PATTERNS]@D\n"
-" Run only the tests whose name matches one of the positive patterns but\n"
-" none of the negative patterns. '?' matches any single character; '*'\n"
-" matches any substring; ':' separates two patterns.\n"
-" @G--" GTEST_FLAG_PREFIX_ "also_run_disabled_tests@D\n"
-" Run all disabled tests too.\n"
-"\n"
-"Test Execution:\n"
-" @G--" GTEST_FLAG_PREFIX_ "repeat=@Y[COUNT]@D\n"
-" Run the tests repeatedly; use a negative count to repeat forever.\n"
-" @G--" GTEST_FLAG_PREFIX_ "shuffle@D\n"
-" Randomize tests' orders on every iteration.\n"
-" @G--" GTEST_FLAG_PREFIX_ "random_seed=@Y[NUMBER]@D\n"
-" Random number seed to use for shuffling test orders (between 1 and\n"
-" 99999, or 0 to use a seed based on the current time).\n"
-"\n"
-"Test Output:\n"
-" @G--" GTEST_FLAG_PREFIX_ "color=@Y(@Gyes@Y|@Gno@Y|@Gauto@Y)@D\n"
-" Enable/disable colored output. The default is @Gauto@D.\n"
-" -@G-" GTEST_FLAG_PREFIX_ "print_time=0@D\n"
-" Don't print the elapsed time of each test.\n"
-" @G--" GTEST_FLAG_PREFIX_ "output=xml@Y[@G:@YDIRECTORY_PATH@G"
- GTEST_PATH_SEP_ "@Y|@G:@YFILE_PATH]@D\n"
-" Generate an XML report in the given directory or with the given file\n"
-" name. @YFILE_PATH@D defaults to @Gtest_details.xml@D.\n"
-#if GTEST_CAN_STREAM_RESULTS_
-" @G--" GTEST_FLAG_PREFIX_ "stream_result_to=@YHOST@G:@YPORT@D\n"
-" Stream test results to the given server.\n"
-#endif // GTEST_CAN_STREAM_RESULTS_
-"\n"
-"Assertion Behavior:\n"
-#if GTEST_HAS_DEATH_TEST && !GTEST_OS_WINDOWS
-" @G--" GTEST_FLAG_PREFIX_ "death_test_style=@Y(@Gfast@Y|@Gthreadsafe@Y)@D\n"
-" Set the default death test style.\n"
-#endif // GTEST_HAS_DEATH_TEST && !GTEST_OS_WINDOWS
-" @G--" GTEST_FLAG_PREFIX_ "break_on_failure@D\n"
-" Turn assertion failures into debugger break-points.\n"
-" @G--" GTEST_FLAG_PREFIX_ "throw_on_failure@D\n"
-" Turn assertion failures into C++ exceptions.\n"
-" @G--" GTEST_FLAG_PREFIX_ "catch_exceptions=0@D\n"
-" Do not report exceptions as test failures. Instead, allow them\n"
-" to crash the program or throw a pop-up (on Windows).\n"
-"\n"
-"Except for @G--" GTEST_FLAG_PREFIX_ "list_tests@D, you can alternatively set "
- "the corresponding\n"
-"environment variable of a flag (all letters in upper-case). For example, to\n"
-"disable colored text output, you can either specify @G--" GTEST_FLAG_PREFIX_
- "color=no@D or set\n"
-"the @G" GTEST_FLAG_PREFIX_UPPER_ "COLOR@D environment variable to @Gno@D.\n"
-"\n"
-"For more information, please read the " GTEST_NAME_ " documentation at\n"
-"@G" GTEST_PROJECT_URL_ "@D. If you find a bug in " GTEST_NAME_ "\n"
-"(not one in your own code or tests), please report it to\n"
-"@G<" GTEST_DEV_EMAIL_ ">@D.\n";
-
-bool ParseGoogleTestFlag(const char* const arg) {
- return ParseBoolFlag(arg, kAlsoRunDisabledTestsFlag,
- &GTEST_FLAG(also_run_disabled_tests)) ||
- ParseBoolFlag(arg, kBreakOnFailureFlag,
- &GTEST_FLAG(break_on_failure)) ||
- ParseBoolFlag(arg, kCatchExceptionsFlag,
- &GTEST_FLAG(catch_exceptions)) ||
- ParseStringFlag(arg, kColorFlag, &GTEST_FLAG(color)) ||
- ParseStringFlag(arg, kDeathTestStyleFlag,
- &GTEST_FLAG(death_test_style)) ||
- ParseBoolFlag(arg, kDeathTestUseFork,
- &GTEST_FLAG(death_test_use_fork)) ||
- ParseStringFlag(arg, kFilterFlag, &GTEST_FLAG(filter)) ||
- ParseStringFlag(arg, kInternalRunDeathTestFlag,
- &GTEST_FLAG(internal_run_death_test)) ||
- ParseBoolFlag(arg, kListTestsFlag, &GTEST_FLAG(list_tests)) ||
- ParseStringFlag(arg, kOutputFlag, &GTEST_FLAG(output)) ||
- ParseBoolFlag(arg, kPrintTimeFlag, &GTEST_FLAG(print_time)) ||
- ParseInt32Flag(arg, kRandomSeedFlag, &GTEST_FLAG(random_seed)) ||
- ParseInt32Flag(arg, kRepeatFlag, &GTEST_FLAG(repeat)) ||
- ParseBoolFlag(arg, kShuffleFlag, &GTEST_FLAG(shuffle)) ||
- ParseInt32Flag(arg, kStackTraceDepthFlag,
- &GTEST_FLAG(stack_trace_depth)) ||
- ParseStringFlag(arg, kStreamResultToFlag,
- &GTEST_FLAG(stream_result_to)) ||
- ParseBoolFlag(arg, kThrowOnFailureFlag,
- &GTEST_FLAG(throw_on_failure));
-}
-
-#if GTEST_USE_OWN_FLAGFILE_FLAG_
-void LoadFlagsFromFile(const std::string& path) {
- FILE* flagfile = posix::FOpen(path.c_str(), "r");
- if (!flagfile) {
- fprintf(stderr,
- "Unable to open file \"%s\"\n",
- GTEST_FLAG(flagfile).c_str());
- fflush(stderr);
- exit(EXIT_FAILURE);
- }
- std::string contents(ReadEntireFile(flagfile));
- posix::FClose(flagfile);
- std::vector<std::string> lines;
- SplitString(contents, '\n', &lines);
- for (size_t i = 0; i < lines.size(); ++i) {
- if (lines[i].empty())
- continue;
- if (!ParseGoogleTestFlag(lines[i].c_str()))
- g_help_flag = true;
- }
-}
-#endif // GTEST_USE_OWN_FLAGFILE_FLAG_
-
-// Parses the command line for Google Test flags, without initializing
-// other parts of Google Test. The type parameter CharType can be
-// instantiated to either char or wchar_t.
-template <typename CharType>
-void ParseGoogleTestFlagsOnlyImpl(int* argc, CharType** argv) {
- for (int i = 1; i < *argc; i++) {
- const std::string arg_string = StreamableToString(argv[i]);
- const char* const arg = arg_string.c_str();
-
- using internal::ParseBoolFlag;
- using internal::ParseInt32Flag;
- using internal::ParseStringFlag;
-
- bool remove_flag = false;
- if (ParseGoogleTestFlag(arg)) {
- remove_flag = true;
-#if GTEST_USE_OWN_FLAGFILE_FLAG_
- } else if (ParseStringFlag(arg, kFlagfileFlag, &GTEST_FLAG(flagfile))) {
- LoadFlagsFromFile(GTEST_FLAG(flagfile));
- remove_flag = true;
-#endif // GTEST_USE_OWN_FLAGFILE_FLAG_
- } else if (arg_string == "--help" || arg_string == "-h" ||
- arg_string == "-?" || arg_string == "/?" ||
- HasGoogleTestFlagPrefix(arg)) {
- // Both help flag and unrecognized Google Test flags (excluding
- // internal ones) trigger help display.
- g_help_flag = true;
- }
-
- if (remove_flag) {
- // Shift the remainder of the argv list left by one. Note
- // that argv has (*argc + 1) elements, the last one always being
- // NULL. The following loop moves the trailing NULL element as
- // well.
- for (int j = i; j != *argc; j++) {
- argv[j] = argv[j + 1];
- }
-
- // Decrements the argument count.
- (*argc)--;
-
- // We also need to decrement the iterator as we just removed
- // an element.
- i--;
- }
- }
-
- if (g_help_flag) {
- // We print the help here instead of in RUN_ALL_TESTS(), as the
- // latter may not be called at all if the user is using Google
- // Test with another testing framework.
- PrintColorEncoded(kColorEncodedHelpMessage);
- }
-}
-
-// Parses the command line for Google Test flags, without initializing
-// other parts of Google Test.
-void ParseGoogleTestFlagsOnly(int* argc, char** argv) {
- ParseGoogleTestFlagsOnlyImpl(argc, argv);
-}
-void ParseGoogleTestFlagsOnly(int* argc, wchar_t** argv) {
- ParseGoogleTestFlagsOnlyImpl(argc, argv);
-}
-
-// The internal implementation of InitGoogleTest().
-//
-// The type parameter CharType can be instantiated to either char or
-// wchar_t.
-template <typename CharType>
-void InitGoogleTestImpl(int* argc, CharType** argv) {
- // We don't want to run the initialization code twice.
- if (GTestIsInitialized()) return;
-
- if (*argc <= 0) return;
-
- g_argvs.clear();
- for (int i = 0; i != *argc; i++) {
- g_argvs.push_back(StreamableToString(argv[i]));
- }
-
- ParseGoogleTestFlagsOnly(argc, argv);
- GetUnitTestImpl()->PostFlagParsingInit();
-}
-
-} // namespace internal
-
-// Initializes Google Test. This must be called before calling
-// RUN_ALL_TESTS(). In particular, it parses a command line for the
-// flags that Google Test recognizes. Whenever a Google Test flag is
-// seen, it is removed from argv, and *argc is decremented.
-//
-// No value is returned. Instead, the Google Test flag variables are
-// updated.
-//
-// Calling the function for the second time has no user-visible effect.
-void InitGoogleTest(int* argc, char** argv) {
-#if defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_)
- GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_(argc, argv);
-#else // defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_)
- internal::InitGoogleTestImpl(argc, argv);
-#endif // defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_)
-}
-
-// This overloaded version can be used in Windows programs compiled in
-// UNICODE mode.
-void InitGoogleTest(int* argc, wchar_t** argv) {
-#if defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_)
- GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_(argc, argv);
-#else // defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_)
- internal::InitGoogleTestImpl(argc, argv);
-#endif // defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_)
-}
-
-} // namespace testing
diff --git a/third_party/aom/third_party/googletest/src/googletest/src/gtest_main.cc b/third_party/aom/third_party/googletest/src/googletest/src/gtest_main.cc
deleted file mode 100644
index f30282255..000000000
--- a/third_party/aom/third_party/googletest/src/googletest/src/gtest_main.cc
+++ /dev/null
@@ -1,38 +0,0 @@
-// Copyright 2006, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include <stdio.h>
-
-#include "gtest/gtest.h"
-
-GTEST_API_ int main(int argc, char **argv) {
- printf("Running main() from gtest_main.cc\n");
- testing::InitGoogleTest(&argc, argv);
- return RUN_ALL_TESTS();
-}
diff --git a/third_party/aom/third_party/libwebm/AUTHORS.TXT b/third_party/aom/third_party/libwebm/AUTHORS.TXT
deleted file mode 100644
index 9686ac13e..000000000
--- a/third_party/aom/third_party/libwebm/AUTHORS.TXT
+++ /dev/null
@@ -1,4 +0,0 @@
-# Names should be added to this file like so:
-# Name or Organization <email address>
-
-Google Inc.
diff --git a/third_party/aom/third_party/libwebm/Android.mk b/third_party/aom/third_party/libwebm/Android.mk
deleted file mode 100644
index b46ba101d..000000000
--- a/third_party/aom/third_party/libwebm/Android.mk
+++ /dev/null
@@ -1,17 +0,0 @@
-LOCAL_PATH:= $(call my-dir)
-
-include $(CLEAR_VARS)
-LOCAL_MODULE:= libwebm
-LOCAL_CPPFLAGS:=-D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS
-LOCAL_CPPFLAGS+=-D__STDC_LIMIT_MACROS -std=c++11
-LOCAL_C_INCLUDES:= $(LOCAL_PATH)
-LOCAL_EXPORT_C_INCLUDES:= $(LOCAL_PATH)
-
-LOCAL_SRC_FILES:= common/file_util.cc \
- common/hdr_util.cc \
- mkvparser/mkvparser.cc \
- mkvparser/mkvreader.cc \
- mkvmuxer/mkvmuxer.cc \
- mkvmuxer/mkvmuxerutil.cc \
- mkvmuxer/mkvwriter.cc
-include $(BUILD_STATIC_LIBRARY)
diff --git a/third_party/aom/third_party/libwebm/LICENSE.TXT b/third_party/aom/third_party/libwebm/LICENSE.TXT
deleted file mode 100644
index 7a6f99547..000000000
--- a/third_party/aom/third_party/libwebm/LICENSE.TXT
+++ /dev/null
@@ -1,30 +0,0 @@
-Copyright (c) 2010, Google Inc. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
-
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in
- the documentation and/or other materials provided with the
- distribution.
-
- * Neither the name of Google nor the names of its contributors may
- be used to endorse or promote products derived from this software
- without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
diff --git a/third_party/aom/third_party/libwebm/PATENTS.TXT b/third_party/aom/third_party/libwebm/PATENTS.TXT
deleted file mode 100644
index caedf607e..000000000
--- a/third_party/aom/third_party/libwebm/PATENTS.TXT
+++ /dev/null
@@ -1,23 +0,0 @@
-Additional IP Rights Grant (Patents)
-------------------------------------
-
-"These implementations" means the copyrightable works that implement the WebM
-codecs distributed by Google as part of the WebM Project.
-
-Google hereby grants to you a perpetual, worldwide, non-exclusive, no-charge,
-royalty-free, irrevocable (except as stated in this section) patent license to
-make, have made, use, offer to sell, sell, import, transfer, and otherwise
-run, modify and propagate the contents of these implementations of WebM, where
-such license applies only to those patent claims, both currently owned by
-Google and acquired in the future, licensable by Google that are necessarily
-infringed by these implementations of WebM. This grant does not include claims
-that would be infringed only as a consequence of further modification of these
-implementations. If you or your agent or exclusive licensee institute or order
-or agree to the institution of patent litigation or any other patent
-enforcement activity against any entity (including a cross-claim or
-counterclaim in a lawsuit) alleging that any of these implementations of WebM
-or any code incorporated within any of these implementations of WebM
-constitute direct or contributory patent infringement, or inducement of
-patent infringement, then any patent rights granted to you under this License
-for these implementations of WebM shall terminate as of the date such
-litigation is filed.
diff --git a/third_party/aom/third_party/libwebm/README.libaom b/third_party/aom/third_party/libwebm/README.libaom
deleted file mode 100644
index bd288d201..000000000
--- a/third_party/aom/third_party/libwebm/README.libaom
+++ /dev/null
@@ -1,22 +0,0 @@
-URL: https://chromium.googlesource.com/webm/libwebm
-Version: af81f26025b7435fa9a14ad07c58b44cf9280430
-License: BSD
-License File: LICENSE.txt
-
-Description:
-libwebm is used to handle WebM container I/O.
-
-Local Changes:
-Add av1 codec as an eligible codec for webm:
- https://aomedia-review.googlesource.com/c/aom/+/15103
-Only keep:
- - Android.mk
- - AUTHORS.TXT
- - common/
- file_util.cc/h
- hdr_util.cc/h
- webmids.h
- - LICENSE.TXT
- - mkvmuxer/
- - mkvparser/
- - PATENTS.TXT
diff --git a/third_party/aom/third_party/libwebm/common/file_util.cc b/third_party/aom/third_party/libwebm/common/file_util.cc
deleted file mode 100644
index 618ffc087..000000000
--- a/third_party/aom/third_party/libwebm/common/file_util.cc
+++ /dev/null
@@ -1,93 +0,0 @@
-// Copyright (c) 2016 The WebM project authors. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the LICENSE file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-#include "common/file_util.h"
-
-#include <sys/stat.h>
-#ifndef _MSC_VER
-#include <unistd.h> // close()
-#endif
-
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
-#include <fstream>
-#include <ios>
-#include <string>
-
-namespace libwebm {
-
-std::string GetTempFileName() {
-#if !defined _MSC_VER && !defined __MINGW32__
- std::string temp_file_name_template_str =
- std::string(std::getenv("TEST_TMPDIR") ? std::getenv("TEST_TMPDIR") :
- ".") +
- "/libwebm_temp.XXXXXX";
- char* temp_file_name_template =
- new char[temp_file_name_template_str.length() + 1];
- memset(temp_file_name_template, 0, temp_file_name_template_str.length() + 1);
- temp_file_name_template_str.copy(temp_file_name_template,
- temp_file_name_template_str.length(), 0);
- int fd = mkstemp(temp_file_name_template);
- std::string temp_file_name =
- (fd != -1) ? std::string(temp_file_name_template) : std::string();
- delete[] temp_file_name_template;
- if (fd != -1) {
- close(fd);
- }
- return temp_file_name;
-#else
- char tmp_file_name[_MAX_PATH];
-#if defined _MSC_VER || defined MINGW_HAS_SECURE_API
- errno_t err = tmpnam_s(tmp_file_name);
-#else
- char* fname_pointer = tmpnam(tmp_file_name);
- errno_t err = (fname_pointer == &tmp_file_name[0]) ? 0 : -1;
-#endif
- if (err == 0) {
- return std::string(tmp_file_name);
- }
- return std::string();
-#endif
-}
-
-uint64_t GetFileSize(const std::string& file_name) {
- uint64_t file_size = 0;
-#ifndef _MSC_VER
- struct stat st;
- st.st_size = 0;
- if (stat(file_name.c_str(), &st) == 0) {
-#else
- struct _stat st;
- st.st_size = 0;
- if (_stat(file_name.c_str(), &st) == 0) {
-#endif
- file_size = st.st_size;
- }
- return file_size;
-}
-
-bool GetFileContents(const std::string& file_name, std::string* contents) {
- std::ifstream file(file_name.c_str());
- *contents = std::string(static_cast<size_t>(GetFileSize(file_name)), 0);
- if (file.good() && contents->size()) {
- file.read(&(*contents)[0], contents->size());
- }
- return !file.fail();
-}
-
-TempFileDeleter::TempFileDeleter() { file_name_ = GetTempFileName(); }
-
-TempFileDeleter::~TempFileDeleter() {
- std::ifstream file(file_name_.c_str());
- if (file.good()) {
- file.close();
- std::remove(file_name_.c_str());
- }
-}
-
-} // namespace libwebm
diff --git a/third_party/aom/third_party/libwebm/common/file_util.h b/third_party/aom/third_party/libwebm/common/file_util.h
deleted file mode 100644
index a87373464..000000000
--- a/third_party/aom/third_party/libwebm/common/file_util.h
+++ /dev/null
@@ -1,44 +0,0 @@
-// Copyright (c) 2016 The WebM project authors. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the LICENSE file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-#ifndef LIBWEBM_COMMON_FILE_UTIL_H_
-#define LIBWEBM_COMMON_FILE_UTIL_H_
-
-#include <stdint.h>
-
-#include <string>
-
-#include "mkvmuxer/mkvmuxertypes.h" // LIBWEBM_DISALLOW_COPY_AND_ASSIGN()
-
-namespace libwebm {
-
-// Returns a temporary file name.
-std::string GetTempFileName();
-
-// Returns size of file specified by |file_name|, or 0 upon failure.
-uint64_t GetFileSize(const std::string& file_name);
-
-// Gets the contents file_name as a string. Returns false on error.
-bool GetFileContents(const std::string& file_name, std::string* contents);
-
-// Manages life of temporary file specified at time of construction. Deletes
-// file upon destruction.
-class TempFileDeleter {
- public:
- TempFileDeleter();
- explicit TempFileDeleter(std::string file_name) : file_name_(file_name) {}
- ~TempFileDeleter();
- const std::string& name() const { return file_name_; }
-
- private:
- std::string file_name_;
- LIBWEBM_DISALLOW_COPY_AND_ASSIGN(TempFileDeleter);
-};
-
-} // namespace libwebm
-
-#endif // LIBWEBM_COMMON_FILE_UTIL_H_
diff --git a/third_party/aom/third_party/libwebm/common/hdr_util.cc b/third_party/aom/third_party/libwebm/common/hdr_util.cc
deleted file mode 100644
index 916f7170b..000000000
--- a/third_party/aom/third_party/libwebm/common/hdr_util.cc
+++ /dev/null
@@ -1,220 +0,0 @@
-// Copyright (c) 2016 The WebM project authors. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the LICENSE file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-#include "hdr_util.h"
-
-#include <climits>
-#include <cstddef>
-#include <new>
-
-#include "mkvparser/mkvparser.h"
-
-namespace libwebm {
-const int Vp9CodecFeatures::kValueNotPresent = INT_MAX;
-
-bool CopyPrimaryChromaticity(const mkvparser::PrimaryChromaticity& parser_pc,
- PrimaryChromaticityPtr* muxer_pc) {
- muxer_pc->reset(new (std::nothrow)
- mkvmuxer::PrimaryChromaticity(parser_pc.x, parser_pc.y));
- if (!muxer_pc->get())
- return false;
- return true;
-}
-
-bool MasteringMetadataValuePresent(double value) {
- return value != mkvparser::MasteringMetadata::kValueNotPresent;
-}
-
-bool CopyMasteringMetadata(const mkvparser::MasteringMetadata& parser_mm,
- mkvmuxer::MasteringMetadata* muxer_mm) {
- if (MasteringMetadataValuePresent(parser_mm.luminance_max))
- muxer_mm->set_luminance_max(parser_mm.luminance_max);
- if (MasteringMetadataValuePresent(parser_mm.luminance_min))
- muxer_mm->set_luminance_min(parser_mm.luminance_min);
-
- PrimaryChromaticityPtr r_ptr(nullptr);
- PrimaryChromaticityPtr g_ptr(nullptr);
- PrimaryChromaticityPtr b_ptr(nullptr);
- PrimaryChromaticityPtr wp_ptr(nullptr);
-
- if (parser_mm.r) {
- if (!CopyPrimaryChromaticity(*parser_mm.r, &r_ptr))
- return false;
- }
- if (parser_mm.g) {
- if (!CopyPrimaryChromaticity(*parser_mm.g, &g_ptr))
- return false;
- }
- if (parser_mm.b) {
- if (!CopyPrimaryChromaticity(*parser_mm.b, &b_ptr))
- return false;
- }
- if (parser_mm.white_point) {
- if (!CopyPrimaryChromaticity(*parser_mm.white_point, &wp_ptr))
- return false;
- }
-
- if (!muxer_mm->SetChromaticity(r_ptr.get(), g_ptr.get(), b_ptr.get(),
- wp_ptr.get())) {
- return false;
- }
-
- return true;
-}
-
-bool ColourValuePresent(long long value) {
- return value != mkvparser::Colour::kValueNotPresent;
-}
-
-bool CopyColour(const mkvparser::Colour& parser_colour,
- mkvmuxer::Colour* muxer_colour) {
- if (!muxer_colour)
- return false;
-
- if (ColourValuePresent(parser_colour.matrix_coefficients))
- muxer_colour->set_matrix_coefficients(parser_colour.matrix_coefficients);
- if (ColourValuePresent(parser_colour.bits_per_channel))
- muxer_colour->set_bits_per_channel(parser_colour.bits_per_channel);
- if (ColourValuePresent(parser_colour.chroma_subsampling_horz)) {
- muxer_colour->set_chroma_subsampling_horz(
- parser_colour.chroma_subsampling_horz);
- }
- if (ColourValuePresent(parser_colour.chroma_subsampling_vert)) {
- muxer_colour->set_chroma_subsampling_vert(
- parser_colour.chroma_subsampling_vert);
- }
- if (ColourValuePresent(parser_colour.cb_subsampling_horz))
- muxer_colour->set_cb_subsampling_horz(parser_colour.cb_subsampling_horz);
- if (ColourValuePresent(parser_colour.cb_subsampling_vert))
- muxer_colour->set_cb_subsampling_vert(parser_colour.cb_subsampling_vert);
- if (ColourValuePresent(parser_colour.chroma_siting_horz))
- muxer_colour->set_chroma_siting_horz(parser_colour.chroma_siting_horz);
- if (ColourValuePresent(parser_colour.chroma_siting_vert))
- muxer_colour->set_chroma_siting_vert(parser_colour.chroma_siting_vert);
- if (ColourValuePresent(parser_colour.range))
- muxer_colour->set_range(parser_colour.range);
- if (ColourValuePresent(parser_colour.transfer_characteristics)) {
- muxer_colour->set_transfer_characteristics(
- parser_colour.transfer_characteristics);
- }
- if (ColourValuePresent(parser_colour.primaries))
- muxer_colour->set_primaries(parser_colour.primaries);
- if (ColourValuePresent(parser_colour.max_cll))
- muxer_colour->set_max_cll(parser_colour.max_cll);
- if (ColourValuePresent(parser_colour.max_fall))
- muxer_colour->set_max_fall(parser_colour.max_fall);
-
- if (parser_colour.mastering_metadata) {
- mkvmuxer::MasteringMetadata muxer_mm;
- if (!CopyMasteringMetadata(*parser_colour.mastering_metadata, &muxer_mm))
- return false;
- if (!muxer_colour->SetMasteringMetadata(muxer_mm))
- return false;
- }
- return true;
-}
-
-// Format of VPx private data:
-//
-// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
-// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-// | ID Byte | Length | |
-// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
-// | |
-// : Bytes 1..Length of Codec Feature :
-// | |
-// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-//
-// ID Byte Format
-// ID byte is an unsigned byte.
-// 0 1 2 3 4 5 6 7
-// +-+-+-+-+-+-+-+-+
-// |X| ID |
-// +-+-+-+-+-+-+-+-+
-//
-// The X bit is reserved.
-//
-// See the following link for more information:
-// http://www.webmproject.org/vp9/profiles/
-bool ParseVpxCodecPrivate(const uint8_t* private_data, int32_t length,
- Vp9CodecFeatures* features) {
- const int kVpxCodecPrivateMinLength = 3;
- if (!private_data || !features || length < kVpxCodecPrivateMinLength)
- return false;
-
- const uint8_t kVp9ProfileId = 1;
- const uint8_t kVp9LevelId = 2;
- const uint8_t kVp9BitDepthId = 3;
- const uint8_t kVp9ChromaSubsamplingId = 4;
- const int kVpxFeatureLength = 1;
- int offset = 0;
-
- // Set features to not set.
- features->profile = Vp9CodecFeatures::kValueNotPresent;
- features->level = Vp9CodecFeatures::kValueNotPresent;
- features->bit_depth = Vp9CodecFeatures::kValueNotPresent;
- features->chroma_subsampling = Vp9CodecFeatures::kValueNotPresent;
- do {
- const uint8_t id_byte = private_data[offset++];
- const uint8_t length_byte = private_data[offset++];
- if (length_byte != kVpxFeatureLength)
- return false;
- if (id_byte == kVp9ProfileId) {
- const int priv_profile = static_cast<int>(private_data[offset++]);
- if (priv_profile < 0 || priv_profile > 3)
- return false;
- if (features->profile != Vp9CodecFeatures::kValueNotPresent &&
- features->profile != priv_profile) {
- return false;
- }
- features->profile = priv_profile;
- } else if (id_byte == kVp9LevelId) {
- const int priv_level = static_cast<int>(private_data[offset++]);
-
- const int kNumLevels = 14;
- const int levels[kNumLevels] = {10, 11, 20, 21, 30, 31, 40,
- 41, 50, 51, 52, 60, 61, 62};
-
- for (int i = 0; i < kNumLevels; ++i) {
- if (priv_level == levels[i]) {
- if (features->level != Vp9CodecFeatures::kValueNotPresent &&
- features->level != priv_level) {
- return false;
- }
- features->level = priv_level;
- break;
- }
- }
- if (features->level == Vp9CodecFeatures::kValueNotPresent)
- return false;
- } else if (id_byte == kVp9BitDepthId) {
- const int priv_profile = static_cast<int>(private_data[offset++]);
- if (priv_profile != 8 && priv_profile != 10 && priv_profile != 12)
- return false;
- if (features->bit_depth != Vp9CodecFeatures::kValueNotPresent &&
- features->bit_depth != priv_profile) {
- return false;
- }
- features->bit_depth = priv_profile;
- } else if (id_byte == kVp9ChromaSubsamplingId) {
- const int priv_profile = static_cast<int>(private_data[offset++]);
- if (priv_profile != 0 && priv_profile != 2 && priv_profile != 3)
- return false;
- if (features->chroma_subsampling != Vp9CodecFeatures::kValueNotPresent &&
- features->chroma_subsampling != priv_profile) {
- return false;
- }
- features->chroma_subsampling = priv_profile;
- } else {
- // Invalid ID.
- return false;
- }
- } while (offset + kVpxCodecPrivateMinLength <= length);
-
- return true;
-}
-} // namespace libwebm
diff --git a/third_party/aom/third_party/libwebm/common/hdr_util.h b/third_party/aom/third_party/libwebm/common/hdr_util.h
deleted file mode 100644
index 78e2eeb70..000000000
--- a/third_party/aom/third_party/libwebm/common/hdr_util.h
+++ /dev/null
@@ -1,71 +0,0 @@
-// Copyright (c) 2016 The WebM project authors. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the LICENSE file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-#ifndef LIBWEBM_COMMON_HDR_UTIL_H_
-#define LIBWEBM_COMMON_HDR_UTIL_H_
-
-#include <stdint.h>
-
-#include <memory>
-
-#include "mkvmuxer/mkvmuxer.h"
-
-namespace mkvparser {
-struct Colour;
-struct MasteringMetadata;
-struct PrimaryChromaticity;
-} // namespace mkvparser
-
-namespace libwebm {
-// Utility types and functions for working with the Colour element and its
-// children. Copiers return true upon success. Presence functions return true
-// when the specified element is present.
-
-// TODO(tomfinegan): These should be moved to libwebm_utils once c++11 is
-// required by libwebm.
-
-// Features of the VP9 codec that may be set in the CodecPrivate of a VP9 video
-// stream. A value of kValueNotPresent represents that the value was not set in
-// the CodecPrivate.
-struct Vp9CodecFeatures {
- static const int kValueNotPresent;
-
- Vp9CodecFeatures()
- : profile(kValueNotPresent),
- level(kValueNotPresent),
- bit_depth(kValueNotPresent),
- chroma_subsampling(kValueNotPresent) {}
- ~Vp9CodecFeatures() {}
-
- int profile;
- int level;
- int bit_depth;
- int chroma_subsampling;
-};
-
-typedef std::unique_ptr<mkvmuxer::PrimaryChromaticity> PrimaryChromaticityPtr;
-
-bool CopyPrimaryChromaticity(const mkvparser::PrimaryChromaticity& parser_pc,
- PrimaryChromaticityPtr* muxer_pc);
-
-bool MasteringMetadataValuePresent(double value);
-
-bool CopyMasteringMetadata(const mkvparser::MasteringMetadata& parser_mm,
- mkvmuxer::MasteringMetadata* muxer_mm);
-
-bool ColourValuePresent(long long value);
-
-bool CopyColour(const mkvparser::Colour& parser_colour,
- mkvmuxer::Colour* muxer_colour);
-
-// Returns true if |features| is set to one or more valid values.
-bool ParseVpxCodecPrivate(const uint8_t* private_data, int32_t length,
- Vp9CodecFeatures* features);
-
-} // namespace libwebm
-
-#endif // LIBWEBM_COMMON_HDR_UTIL_H_
diff --git a/third_party/aom/third_party/libwebm/common/webmids.h b/third_party/aom/third_party/libwebm/common/webmids.h
deleted file mode 100644
index 89d722a71..000000000
--- a/third_party/aom/third_party/libwebm/common/webmids.h
+++ /dev/null
@@ -1,192 +0,0 @@
-// Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the LICENSE file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-
-#ifndef COMMON_WEBMIDS_H_
-#define COMMON_WEBMIDS_H_
-
-namespace libwebm {
-
-enum MkvId {
- kMkvEBML = 0x1A45DFA3,
- kMkvEBMLVersion = 0x4286,
- kMkvEBMLReadVersion = 0x42F7,
- kMkvEBMLMaxIDLength = 0x42F2,
- kMkvEBMLMaxSizeLength = 0x42F3,
- kMkvDocType = 0x4282,
- kMkvDocTypeVersion = 0x4287,
- kMkvDocTypeReadVersion = 0x4285,
- kMkvVoid = 0xEC,
- kMkvSignatureSlot = 0x1B538667,
- kMkvSignatureAlgo = 0x7E8A,
- kMkvSignatureHash = 0x7E9A,
- kMkvSignaturePublicKey = 0x7EA5,
- kMkvSignature = 0x7EB5,
- kMkvSignatureElements = 0x7E5B,
- kMkvSignatureElementList = 0x7E7B,
- kMkvSignedElement = 0x6532,
- // segment
- kMkvSegment = 0x18538067,
- // Meta Seek Information
- kMkvSeekHead = 0x114D9B74,
- kMkvSeek = 0x4DBB,
- kMkvSeekID = 0x53AB,
- kMkvSeekPosition = 0x53AC,
- // Segment Information
- kMkvInfo = 0x1549A966,
- kMkvTimecodeScale = 0x2AD7B1,
- kMkvDuration = 0x4489,
- kMkvDateUTC = 0x4461,
- kMkvTitle = 0x7BA9,
- kMkvMuxingApp = 0x4D80,
- kMkvWritingApp = 0x5741,
- // Cluster
- kMkvCluster = 0x1F43B675,
- kMkvTimecode = 0xE7,
- kMkvPrevSize = 0xAB,
- kMkvBlockGroup = 0xA0,
- kMkvBlock = 0xA1,
- kMkvBlockDuration = 0x9B,
- kMkvReferenceBlock = 0xFB,
- kMkvLaceNumber = 0xCC,
- kMkvSimpleBlock = 0xA3,
- kMkvBlockAdditions = 0x75A1,
- kMkvBlockMore = 0xA6,
- kMkvBlockAddID = 0xEE,
- kMkvBlockAdditional = 0xA5,
- kMkvDiscardPadding = 0x75A2,
- // Track
- kMkvTracks = 0x1654AE6B,
- kMkvTrackEntry = 0xAE,
- kMkvTrackNumber = 0xD7,
- kMkvTrackUID = 0x73C5,
- kMkvTrackType = 0x83,
- kMkvFlagEnabled = 0xB9,
- kMkvFlagDefault = 0x88,
- kMkvFlagForced = 0x55AA,
- kMkvFlagLacing = 0x9C,
- kMkvDefaultDuration = 0x23E383,
- kMkvMaxBlockAdditionID = 0x55EE,
- kMkvName = 0x536E,
- kMkvLanguage = 0x22B59C,
- kMkvCodecID = 0x86,
- kMkvCodecPrivate = 0x63A2,
- kMkvCodecName = 0x258688,
- kMkvCodecDelay = 0x56AA,
- kMkvSeekPreRoll = 0x56BB,
- // video
- kMkvVideo = 0xE0,
- kMkvFlagInterlaced = 0x9A,
- kMkvStereoMode = 0x53B8,
- kMkvAlphaMode = 0x53C0,
- kMkvPixelWidth = 0xB0,
- kMkvPixelHeight = 0xBA,
- kMkvPixelCropBottom = 0x54AA,
- kMkvPixelCropTop = 0x54BB,
- kMkvPixelCropLeft = 0x54CC,
- kMkvPixelCropRight = 0x54DD,
- kMkvDisplayWidth = 0x54B0,
- kMkvDisplayHeight = 0x54BA,
- kMkvDisplayUnit = 0x54B2,
- kMkvAspectRatioType = 0x54B3,
- kMkvFrameRate = 0x2383E3,
- // end video
- // colour
- kMkvColour = 0x55B0,
- kMkvMatrixCoefficients = 0x55B1,
- kMkvBitsPerChannel = 0x55B2,
- kMkvChromaSubsamplingHorz = 0x55B3,
- kMkvChromaSubsamplingVert = 0x55B4,
- kMkvCbSubsamplingHorz = 0x55B5,
- kMkvCbSubsamplingVert = 0x55B6,
- kMkvChromaSitingHorz = 0x55B7,
- kMkvChromaSitingVert = 0x55B8,
- kMkvRange = 0x55B9,
- kMkvTransferCharacteristics = 0x55BA,
- kMkvPrimaries = 0x55BB,
- kMkvMaxCLL = 0x55BC,
- kMkvMaxFALL = 0x55BD,
- // mastering metadata
- kMkvMasteringMetadata = 0x55D0,
- kMkvPrimaryRChromaticityX = 0x55D1,
- kMkvPrimaryRChromaticityY = 0x55D2,
- kMkvPrimaryGChromaticityX = 0x55D3,
- kMkvPrimaryGChromaticityY = 0x55D4,
- kMkvPrimaryBChromaticityX = 0x55D5,
- kMkvPrimaryBChromaticityY = 0x55D6,
- kMkvWhitePointChromaticityX = 0x55D7,
- kMkvWhitePointChromaticityY = 0x55D8,
- kMkvLuminanceMax = 0x55D9,
- kMkvLuminanceMin = 0x55DA,
- // end mastering metadata
- // end colour
- // projection
- kMkvProjection = 0x7670,
- kMkvProjectionType = 0x7671,
- kMkvProjectionPrivate = 0x7672,
- kMkvProjectionPoseYaw = 0x7673,
- kMkvProjectionPosePitch = 0x7674,
- kMkvProjectionPoseRoll = 0x7675,
- // end projection
- // audio
- kMkvAudio = 0xE1,
- kMkvSamplingFrequency = 0xB5,
- kMkvOutputSamplingFrequency = 0x78B5,
- kMkvChannels = 0x9F,
- kMkvBitDepth = 0x6264,
- // end audio
- // ContentEncodings
- kMkvContentEncodings = 0x6D80,
- kMkvContentEncoding = 0x6240,
- kMkvContentEncodingOrder = 0x5031,
- kMkvContentEncodingScope = 0x5032,
- kMkvContentEncodingType = 0x5033,
- kMkvContentCompression = 0x5034,
- kMkvContentCompAlgo = 0x4254,
- kMkvContentCompSettings = 0x4255,
- kMkvContentEncryption = 0x5035,
- kMkvContentEncAlgo = 0x47E1,
- kMkvContentEncKeyID = 0x47E2,
- kMkvContentSignature = 0x47E3,
- kMkvContentSigKeyID = 0x47E4,
- kMkvContentSigAlgo = 0x47E5,
- kMkvContentSigHashAlgo = 0x47E6,
- kMkvContentEncAESSettings = 0x47E7,
- kMkvAESSettingsCipherMode = 0x47E8,
- kMkvAESSettingsCipherInitData = 0x47E9,
- // end ContentEncodings
- // Cueing Data
- kMkvCues = 0x1C53BB6B,
- kMkvCuePoint = 0xBB,
- kMkvCueTime = 0xB3,
- kMkvCueTrackPositions = 0xB7,
- kMkvCueTrack = 0xF7,
- kMkvCueClusterPosition = 0xF1,
- kMkvCueBlockNumber = 0x5378,
- // Chapters
- kMkvChapters = 0x1043A770,
- kMkvEditionEntry = 0x45B9,
- kMkvChapterAtom = 0xB6,
- kMkvChapterUID = 0x73C4,
- kMkvChapterStringUID = 0x5654,
- kMkvChapterTimeStart = 0x91,
- kMkvChapterTimeEnd = 0x92,
- kMkvChapterDisplay = 0x80,
- kMkvChapString = 0x85,
- kMkvChapLanguage = 0x437C,
- kMkvChapCountry = 0x437E,
- // Tags
- kMkvTags = 0x1254C367,
- kMkvTag = 0x7373,
- kMkvSimpleTag = 0x67C8,
- kMkvTagName = 0x45A3,
- kMkvTagString = 0x4487
-};
-
-} // namespace libwebm
-
-#endif // COMMON_WEBMIDS_H_
diff --git a/third_party/aom/third_party/libwebm/mkvmuxer/mkvmuxer.cc b/third_party/aom/third_party/libwebm/mkvmuxer/mkvmuxer.cc
deleted file mode 100644
index bae2c99b8..000000000
--- a/third_party/aom/third_party/libwebm/mkvmuxer/mkvmuxer.cc
+++ /dev/null
@@ -1,4194 +0,0 @@
-// Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the LICENSE file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-
-#include "mkvmuxer/mkvmuxer.h"
-
-#include <stdint.h>
-
-#include <cfloat>
-#include <climits>
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
-#include <ctime>
-#include <memory>
-#include <new>
-#include <string>
-#include <vector>
-
-#include "common/webmids.h"
-#include "mkvmuxer/mkvmuxerutil.h"
-#include "mkvmuxer/mkvwriter.h"
-#include "mkvparser/mkvparser.h"
-
-namespace mkvmuxer {
-
-const float PrimaryChromaticity::kChromaticityMin = 0.0f;
-const float PrimaryChromaticity::kChromaticityMax = 1.0f;
-const float MasteringMetadata::kMinLuminance = 0.0f;
-const float MasteringMetadata::kMinLuminanceMax = 999.99f;
-const float MasteringMetadata::kMaxLuminanceMax = 9999.99f;
-const float MasteringMetadata::kValueNotPresent = FLT_MAX;
-const uint64_t Colour::kValueNotPresent = UINT64_MAX;
-
-namespace {
-
-const char kDocTypeWebm[] = "webm";
-const char kDocTypeMatroska[] = "matroska";
-
-// Deallocate the string designated by |dst|, and then copy the |src|
-// string to |dst|. The caller owns both the |src| string and the
-// |dst| copy (hence the caller is responsible for eventually
-// deallocating the strings, either directly, or indirectly via
-// StrCpy). Returns true if the source string was successfully copied
-// to the destination.
-bool StrCpy(const char* src, char** dst_ptr) {
- if (dst_ptr == NULL)
- return false;
-
- char*& dst = *dst_ptr;
-
- delete[] dst;
- dst = NULL;
-
- if (src == NULL)
- return true;
-
- const size_t size = strlen(src) + 1;
-
- dst = new (std::nothrow) char[size]; // NOLINT
- if (dst == NULL)
- return false;
-
- strcpy(dst, src); // NOLINT
- return true;
-}
-
-typedef std::unique_ptr<PrimaryChromaticity> PrimaryChromaticityPtr;
-bool CopyChromaticity(const PrimaryChromaticity* src,
- PrimaryChromaticityPtr* dst) {
- if (!dst)
- return false;
-
- dst->reset(new (std::nothrow) PrimaryChromaticity(src->x(), src->y()));
- if (!dst->get())
- return false;
-
- return true;
-}
-
-} // namespace
-
-///////////////////////////////////////////////////////////////
-//
-// IMkvWriter Class
-
-IMkvWriter::IMkvWriter() {}
-
-IMkvWriter::~IMkvWriter() {}
-
-bool WriteEbmlHeader(IMkvWriter* writer, uint64_t doc_type_version,
- const char* const doc_type) {
- // Level 0
- uint64_t size =
- EbmlElementSize(libwebm::kMkvEBMLVersion, static_cast<uint64>(1));
- size += EbmlElementSize(libwebm::kMkvEBMLReadVersion, static_cast<uint64>(1));
- size += EbmlElementSize(libwebm::kMkvEBMLMaxIDLength, static_cast<uint64>(4));
- size +=
- EbmlElementSize(libwebm::kMkvEBMLMaxSizeLength, static_cast<uint64>(8));
- size += EbmlElementSize(libwebm::kMkvDocType, doc_type);
- size += EbmlElementSize(libwebm::kMkvDocTypeVersion,
- static_cast<uint64>(doc_type_version));
- size +=
- EbmlElementSize(libwebm::kMkvDocTypeReadVersion, static_cast<uint64>(2));
-
- if (!WriteEbmlMasterElement(writer, libwebm::kMkvEBML, size))
- return false;
- if (!WriteEbmlElement(writer, libwebm::kMkvEBMLVersion,
- static_cast<uint64>(1))) {
- return false;
- }
- if (!WriteEbmlElement(writer, libwebm::kMkvEBMLReadVersion,
- static_cast<uint64>(1))) {
- return false;
- }
- if (!WriteEbmlElement(writer, libwebm::kMkvEBMLMaxIDLength,
- static_cast<uint64>(4))) {
- return false;
- }
- if (!WriteEbmlElement(writer, libwebm::kMkvEBMLMaxSizeLength,
- static_cast<uint64>(8))) {
- return false;
- }
- if (!WriteEbmlElement(writer, libwebm::kMkvDocType, doc_type))
- return false;
- if (!WriteEbmlElement(writer, libwebm::kMkvDocTypeVersion,
- static_cast<uint64>(doc_type_version))) {
- return false;
- }
- if (!WriteEbmlElement(writer, libwebm::kMkvDocTypeReadVersion,
- static_cast<uint64>(2))) {
- return false;
- }
-
- return true;
-}
-
-bool WriteEbmlHeader(IMkvWriter* writer, uint64_t doc_type_version) {
- return WriteEbmlHeader(writer, doc_type_version, kDocTypeWebm);
-}
-
-bool WriteEbmlHeader(IMkvWriter* writer) {
- return WriteEbmlHeader(writer, mkvmuxer::Segment::kDefaultDocTypeVersion);
-}
-
-bool ChunkedCopy(mkvparser::IMkvReader* source, mkvmuxer::IMkvWriter* dst,
- int64_t start, int64_t size) {
- // TODO(vigneshv): Check if this is a reasonable value.
- const uint32_t kBufSize = 2048;
- uint8_t* buf = new uint8_t[kBufSize];
- int64_t offset = start;
- while (size > 0) {
- const int64_t read_len = (size > kBufSize) ? kBufSize : size;
- if (source->Read(offset, static_cast<long>(read_len), buf))
- return false;
- dst->Write(buf, static_cast<uint32_t>(read_len));
- offset += read_len;
- size -= read_len;
- }
- delete[] buf;
- return true;
-}
-
-///////////////////////////////////////////////////////////////
-//
-// Frame Class
-
-Frame::Frame()
- : add_id_(0),
- additional_(NULL),
- additional_length_(0),
- duration_(0),
- duration_set_(false),
- frame_(NULL),
- is_key_(false),
- length_(0),
- track_number_(0),
- timestamp_(0),
- discard_padding_(0),
- reference_block_timestamp_(0),
- reference_block_timestamp_set_(false) {}
-
-Frame::~Frame() {
- delete[] frame_;
- delete[] additional_;
-}
-
-bool Frame::CopyFrom(const Frame& frame) {
- delete[] frame_;
- frame_ = NULL;
- length_ = 0;
- if (frame.length() > 0 && frame.frame() != NULL &&
- !Init(frame.frame(), frame.length())) {
- return false;
- }
- add_id_ = 0;
- delete[] additional_;
- additional_ = NULL;
- additional_length_ = 0;
- if (frame.additional_length() > 0 && frame.additional() != NULL &&
- !AddAdditionalData(frame.additional(), frame.additional_length(),
- frame.add_id())) {
- return false;
- }
- duration_ = frame.duration();
- duration_set_ = frame.duration_set();
- is_key_ = frame.is_key();
- track_number_ = frame.track_number();
- timestamp_ = frame.timestamp();
- discard_padding_ = frame.discard_padding();
- reference_block_timestamp_ = frame.reference_block_timestamp();
- reference_block_timestamp_set_ = frame.reference_block_timestamp_set();
- return true;
-}
-
-bool Frame::Init(const uint8_t* frame, uint64_t length) {
- uint8_t* const data =
- new (std::nothrow) uint8_t[static_cast<size_t>(length)]; // NOLINT
- if (!data)
- return false;
-
- delete[] frame_;
- frame_ = data;
- length_ = length;
-
- memcpy(frame_, frame, static_cast<size_t>(length_));
- return true;
-}
-
-bool Frame::AddAdditionalData(const uint8_t* additional, uint64_t length,
- uint64_t add_id) {
- uint8_t* const data =
- new (std::nothrow) uint8_t[static_cast<size_t>(length)]; // NOLINT
- if (!data)
- return false;
-
- delete[] additional_;
- additional_ = data;
- additional_length_ = length;
- add_id_ = add_id;
-
- memcpy(additional_, additional, static_cast<size_t>(additional_length_));
- return true;
-}
-
-bool Frame::IsValid() const {
- if (length_ == 0 || !frame_) {
- return false;
- }
- if ((additional_length_ != 0 && !additional_) ||
- (additional_ != NULL && additional_length_ == 0)) {
- return false;
- }
- if (track_number_ == 0 || track_number_ > kMaxTrackNumber) {
- return false;
- }
- if (!CanBeSimpleBlock() && !is_key_ && !reference_block_timestamp_set_) {
- return false;
- }
- return true;
-}
-
-bool Frame::CanBeSimpleBlock() const {
- return additional_ == NULL && discard_padding_ == 0 && duration_ == 0;
-}
-
-void Frame::set_duration(uint64_t duration) {
- duration_ = duration;
- duration_set_ = true;
-}
-
-void Frame::set_reference_block_timestamp(int64_t reference_block_timestamp) {
- reference_block_timestamp_ = reference_block_timestamp;
- reference_block_timestamp_set_ = true;
-}
-
-///////////////////////////////////////////////////////////////
-//
-// CuePoint Class
-
-CuePoint::CuePoint()
- : time_(0),
- track_(0),
- cluster_pos_(0),
- block_number_(1),
- output_block_number_(true) {}
-
-CuePoint::~CuePoint() {}
-
-bool CuePoint::Write(IMkvWriter* writer) const {
- if (!writer || track_ < 1 || cluster_pos_ < 1)
- return false;
-
- uint64_t size = EbmlElementSize(libwebm::kMkvCueClusterPosition,
- static_cast<uint64>(cluster_pos_));
- size += EbmlElementSize(libwebm::kMkvCueTrack, static_cast<uint64>(track_));
- if (output_block_number_ && block_number_ > 1)
- size += EbmlElementSize(libwebm::kMkvCueBlockNumber,
- static_cast<uint64>(block_number_));
- const uint64_t track_pos_size =
- EbmlMasterElementSize(libwebm::kMkvCueTrackPositions, size) + size;
- const uint64_t payload_size =
- EbmlElementSize(libwebm::kMkvCueTime, static_cast<uint64>(time_)) +
- track_pos_size;
-
- if (!WriteEbmlMasterElement(writer, libwebm::kMkvCuePoint, payload_size))
- return false;
-
- const int64_t payload_position = writer->Position();
- if (payload_position < 0)
- return false;
-
- if (!WriteEbmlElement(writer, libwebm::kMkvCueTime,
- static_cast<uint64>(time_))) {
- return false;
- }
-
- if (!WriteEbmlMasterElement(writer, libwebm::kMkvCueTrackPositions, size))
- return false;
- if (!WriteEbmlElement(writer, libwebm::kMkvCueTrack,
- static_cast<uint64>(track_))) {
- return false;
- }
- if (!WriteEbmlElement(writer, libwebm::kMkvCueClusterPosition,
- static_cast<uint64>(cluster_pos_))) {
- return false;
- }
- if (output_block_number_ && block_number_ > 1) {
- if (!WriteEbmlElement(writer, libwebm::kMkvCueBlockNumber,
- static_cast<uint64>(block_number_))) {
- return false;
- }
- }
-
- const int64_t stop_position = writer->Position();
- if (stop_position < 0)
- return false;
-
- if (stop_position - payload_position != static_cast<int64_t>(payload_size))
- return false;
-
- return true;
-}
-
-uint64_t CuePoint::PayloadSize() const {
- uint64_t size = EbmlElementSize(libwebm::kMkvCueClusterPosition,
- static_cast<uint64>(cluster_pos_));
- size += EbmlElementSize(libwebm::kMkvCueTrack, static_cast<uint64>(track_));
- if (output_block_number_ && block_number_ > 1)
- size += EbmlElementSize(libwebm::kMkvCueBlockNumber,
- static_cast<uint64>(block_number_));
- const uint64_t track_pos_size =
- EbmlMasterElementSize(libwebm::kMkvCueTrackPositions, size) + size;
- const uint64_t payload_size =
- EbmlElementSize(libwebm::kMkvCueTime, static_cast<uint64>(time_)) +
- track_pos_size;
-
- return payload_size;
-}
-
-uint64_t CuePoint::Size() const {
- const uint64_t payload_size = PayloadSize();
- return EbmlMasterElementSize(libwebm::kMkvCuePoint, payload_size) +
- payload_size;
-}
-
-///////////////////////////////////////////////////////////////
-//
-// Cues Class
-
-Cues::Cues()
- : cue_entries_capacity_(0),
- cue_entries_size_(0),
- cue_entries_(NULL),
- output_block_number_(true) {}
-
-Cues::~Cues() {
- if (cue_entries_) {
- for (int32_t i = 0; i < cue_entries_size_; ++i) {
- CuePoint* const cue = cue_entries_[i];
- delete cue;
- }
- delete[] cue_entries_;
- }
-}
-
-bool Cues::AddCue(CuePoint* cue) {
- if (!cue)
- return false;
-
- if ((cue_entries_size_ + 1) > cue_entries_capacity_) {
- // Add more CuePoints.
- const int32_t new_capacity =
- (!cue_entries_capacity_) ? 2 : cue_entries_capacity_ * 2;
-
- if (new_capacity < 1)
- return false;
-
- CuePoint** const cues =
- new (std::nothrow) CuePoint*[new_capacity]; // NOLINT
- if (!cues)
- return false;
-
- for (int32_t i = 0; i < cue_entries_size_; ++i) {
- cues[i] = cue_entries_[i];
- }
-
- delete[] cue_entries_;
-
- cue_entries_ = cues;
- cue_entries_capacity_ = new_capacity;
- }
-
- cue->set_output_block_number(output_block_number_);
- cue_entries_[cue_entries_size_++] = cue;
- return true;
-}
-
-CuePoint* Cues::GetCueByIndex(int32_t index) const {
- if (cue_entries_ == NULL)
- return NULL;
-
- if (index >= cue_entries_size_)
- return NULL;
-
- return cue_entries_[index];
-}
-
-uint64_t Cues::Size() {
- uint64_t size = 0;
- for (int32_t i = 0; i < cue_entries_size_; ++i)
- size += GetCueByIndex(i)->Size();
- size += EbmlMasterElementSize(libwebm::kMkvCues, size);
- return size;
-}
-
-bool Cues::Write(IMkvWriter* writer) const {
- if (!writer)
- return false;
-
- uint64_t size = 0;
- for (int32_t i = 0; i < cue_entries_size_; ++i) {
- const CuePoint* const cue = GetCueByIndex(i);
-
- if (!cue)
- return false;
-
- size += cue->Size();
- }
-
- if (!WriteEbmlMasterElement(writer, libwebm::kMkvCues, size))
- return false;
-
- const int64_t payload_position = writer->Position();
- if (payload_position < 0)
- return false;
-
- for (int32_t i = 0; i < cue_entries_size_; ++i) {
- const CuePoint* const cue = GetCueByIndex(i);
-
- if (!cue->Write(writer))
- return false;
- }
-
- const int64_t stop_position = writer->Position();
- if (stop_position < 0)
- return false;
-
- if (stop_position - payload_position != static_cast<int64_t>(size))
- return false;
-
- return true;
-}
-
-///////////////////////////////////////////////////////////////
-//
-// ContentEncAESSettings Class
-
-ContentEncAESSettings::ContentEncAESSettings() : cipher_mode_(kCTR) {}
-
-uint64_t ContentEncAESSettings::Size() const {
- const uint64_t payload = PayloadSize();
- const uint64_t size =
- EbmlMasterElementSize(libwebm::kMkvContentEncAESSettings, payload) +
- payload;
- return size;
-}
-
-bool ContentEncAESSettings::Write(IMkvWriter* writer) const {
- const uint64_t payload = PayloadSize();
-
- if (!WriteEbmlMasterElement(writer, libwebm::kMkvContentEncAESSettings,
- payload))
- return false;
-
- const int64_t payload_position = writer->Position();
- if (payload_position < 0)
- return false;
-
- if (!WriteEbmlElement(writer, libwebm::kMkvAESSettingsCipherMode,
- static_cast<uint64>(cipher_mode_))) {
- return false;
- }
-
- const int64_t stop_position = writer->Position();
- if (stop_position < 0 ||
- stop_position - payload_position != static_cast<int64_t>(payload))
- return false;
-
- return true;
-}
-
-uint64_t ContentEncAESSettings::PayloadSize() const {
- uint64_t size = EbmlElementSize(libwebm::kMkvAESSettingsCipherMode,
- static_cast<uint64>(cipher_mode_));
- return size;
-}
-
-///////////////////////////////////////////////////////////////
-//
-// ContentEncoding Class
-
-ContentEncoding::ContentEncoding()
- : enc_algo_(5),
- enc_key_id_(NULL),
- encoding_order_(0),
- encoding_scope_(1),
- encoding_type_(1),
- enc_key_id_length_(0) {}
-
-ContentEncoding::~ContentEncoding() { delete[] enc_key_id_; }
-
-bool ContentEncoding::SetEncryptionID(const uint8_t* id, uint64_t length) {
- if (!id || length < 1)
- return false;
-
- delete[] enc_key_id_;
-
- enc_key_id_ =
- new (std::nothrow) uint8_t[static_cast<size_t>(length)]; // NOLINT
- if (!enc_key_id_)
- return false;
-
- memcpy(enc_key_id_, id, static_cast<size_t>(length));
- enc_key_id_length_ = length;
-
- return true;
-}
-
-uint64_t ContentEncoding::Size() const {
- const uint64_t encryption_size = EncryptionSize();
- const uint64_t encoding_size = EncodingSize(0, encryption_size);
- const uint64_t encodings_size =
- EbmlMasterElementSize(libwebm::kMkvContentEncoding, encoding_size) +
- encoding_size;
-
- return encodings_size;
-}
-
-bool ContentEncoding::Write(IMkvWriter* writer) const {
- const uint64_t encryption_size = EncryptionSize();
- const uint64_t encoding_size = EncodingSize(0, encryption_size);
- const uint64_t size =
- EbmlMasterElementSize(libwebm::kMkvContentEncoding, encoding_size) +
- encoding_size;
-
- const int64_t payload_position = writer->Position();
- if (payload_position < 0)
- return false;
-
- if (!WriteEbmlMasterElement(writer, libwebm::kMkvContentEncoding,
- encoding_size))
- return false;
- if (!WriteEbmlElement(writer, libwebm::kMkvContentEncodingOrder,
- static_cast<uint64>(encoding_order_)))
- return false;
- if (!WriteEbmlElement(writer, libwebm::kMkvContentEncodingScope,
- static_cast<uint64>(encoding_scope_)))
- return false;
- if (!WriteEbmlElement(writer, libwebm::kMkvContentEncodingType,
- static_cast<uint64>(encoding_type_)))
- return false;
-
- if (!WriteEbmlMasterElement(writer, libwebm::kMkvContentEncryption,
- encryption_size))
- return false;
- if (!WriteEbmlElement(writer, libwebm::kMkvContentEncAlgo,
- static_cast<uint64>(enc_algo_))) {
- return false;
- }
- if (!WriteEbmlElement(writer, libwebm::kMkvContentEncKeyID, enc_key_id_,
- enc_key_id_length_))
- return false;
-
- if (!enc_aes_settings_.Write(writer))
- return false;
-
- const int64_t stop_position = writer->Position();
- if (stop_position < 0 ||
- stop_position - payload_position != static_cast<int64_t>(size))
- return false;
-
- return true;
-}
-
-uint64_t ContentEncoding::EncodingSize(uint64_t compresion_size,
- uint64_t encryption_size) const {
- // TODO(fgalligan): Add support for compression settings.
- if (compresion_size != 0)
- return 0;
-
- uint64_t encoding_size = 0;
-
- if (encryption_size > 0) {
- encoding_size +=
- EbmlMasterElementSize(libwebm::kMkvContentEncryption, encryption_size) +
- encryption_size;
- }
- encoding_size += EbmlElementSize(libwebm::kMkvContentEncodingType,
- static_cast<uint64>(encoding_type_));
- encoding_size += EbmlElementSize(libwebm::kMkvContentEncodingScope,
- static_cast<uint64>(encoding_scope_));
- encoding_size += EbmlElementSize(libwebm::kMkvContentEncodingOrder,
- static_cast<uint64>(encoding_order_));
-
- return encoding_size;
-}
-
-uint64_t ContentEncoding::EncryptionSize() const {
- const uint64_t aes_size = enc_aes_settings_.Size();
-
- uint64_t encryption_size = EbmlElementSize(libwebm::kMkvContentEncKeyID,
- enc_key_id_, enc_key_id_length_);
- encryption_size += EbmlElementSize(libwebm::kMkvContentEncAlgo,
- static_cast<uint64>(enc_algo_));
-
- return encryption_size + aes_size;
-}
-
-///////////////////////////////////////////////////////////////
-//
-// Track Class
-
-Track::Track(unsigned int* seed)
- : codec_id_(NULL),
- codec_private_(NULL),
- language_(NULL),
- max_block_additional_id_(0),
- name_(NULL),
- number_(0),
- type_(0),
- uid_(MakeUID(seed)),
- codec_delay_(0),
- seek_pre_roll_(0),
- default_duration_(0),
- codec_private_length_(0),
- content_encoding_entries_(NULL),
- content_encoding_entries_size_(0) {}
-
-Track::~Track() {
- delete[] codec_id_;
- delete[] codec_private_;
- delete[] language_;
- delete[] name_;
-
- if (content_encoding_entries_) {
- for (uint32_t i = 0; i < content_encoding_entries_size_; ++i) {
- ContentEncoding* const encoding = content_encoding_entries_[i];
- delete encoding;
- }
- delete[] content_encoding_entries_;
- }
-}
-
-bool Track::AddContentEncoding() {
- const uint32_t count = content_encoding_entries_size_ + 1;
-
- ContentEncoding** const content_encoding_entries =
- new (std::nothrow) ContentEncoding*[count]; // NOLINT
- if (!content_encoding_entries)
- return false;
-
- ContentEncoding* const content_encoding =
- new (std::nothrow) ContentEncoding(); // NOLINT
- if (!content_encoding) {
- delete[] content_encoding_entries;
- return false;
- }
-
- for (uint32_t i = 0; i < content_encoding_entries_size_; ++i) {
- content_encoding_entries[i] = content_encoding_entries_[i];
- }
-
- delete[] content_encoding_entries_;
-
- content_encoding_entries_ = content_encoding_entries;
- content_encoding_entries_[content_encoding_entries_size_] = content_encoding;
- content_encoding_entries_size_ = count;
- return true;
-}
-
-ContentEncoding* Track::GetContentEncodingByIndex(uint32_t index) const {
- if (content_encoding_entries_ == NULL)
- return NULL;
-
- if (index >= content_encoding_entries_size_)
- return NULL;
-
- return content_encoding_entries_[index];
-}
-
-uint64_t Track::PayloadSize() const {
- uint64_t size =
- EbmlElementSize(libwebm::kMkvTrackNumber, static_cast<uint64>(number_));
- size += EbmlElementSize(libwebm::kMkvTrackUID, static_cast<uint64>(uid_));
- size += EbmlElementSize(libwebm::kMkvTrackType, static_cast<uint64>(type_));
- if (codec_id_)
- size += EbmlElementSize(libwebm::kMkvCodecID, codec_id_);
- if (codec_private_)
- size += EbmlElementSize(libwebm::kMkvCodecPrivate, codec_private_,
- codec_private_length_);
- if (language_)
- size += EbmlElementSize(libwebm::kMkvLanguage, language_);
- if (name_)
- size += EbmlElementSize(libwebm::kMkvName, name_);
- if (max_block_additional_id_) {
- size += EbmlElementSize(libwebm::kMkvMaxBlockAdditionID,
- static_cast<uint64>(max_block_additional_id_));
- }
- if (codec_delay_) {
- size += EbmlElementSize(libwebm::kMkvCodecDelay,
- static_cast<uint64>(codec_delay_));
- }
- if (seek_pre_roll_) {
- size += EbmlElementSize(libwebm::kMkvSeekPreRoll,
- static_cast<uint64>(seek_pre_roll_));
- }
- if (default_duration_) {
- size += EbmlElementSize(libwebm::kMkvDefaultDuration,
- static_cast<uint64>(default_duration_));
- }
-
- if (content_encoding_entries_size_ > 0) {
- uint64_t content_encodings_size = 0;
- for (uint32_t i = 0; i < content_encoding_entries_size_; ++i) {
- ContentEncoding* const encoding = content_encoding_entries_[i];
- content_encodings_size += encoding->Size();
- }
-
- size += EbmlMasterElementSize(libwebm::kMkvContentEncodings,
- content_encodings_size) +
- content_encodings_size;
- }
-
- return size;
-}
-
-uint64_t Track::Size() const {
- uint64_t size = PayloadSize();
- size += EbmlMasterElementSize(libwebm::kMkvTrackEntry, size);
- return size;
-}
-
-bool Track::Write(IMkvWriter* writer) const {
- if (!writer)
- return false;
-
- // mandatory elements without a default value.
- if (!type_ || !codec_id_)
- return false;
-
- // |size| may be bigger than what is written out in this function because
- // derived classes may write out more data in the Track element.
- const uint64_t payload_size = PayloadSize();
-
- if (!WriteEbmlMasterElement(writer, libwebm::kMkvTrackEntry, payload_size))
- return false;
-
- uint64_t size =
- EbmlElementSize(libwebm::kMkvTrackNumber, static_cast<uint64>(number_));
- size += EbmlElementSize(libwebm::kMkvTrackUID, static_cast<uint64>(uid_));
- size += EbmlElementSize(libwebm::kMkvTrackType, static_cast<uint64>(type_));
- if (codec_id_)
- size += EbmlElementSize(libwebm::kMkvCodecID, codec_id_);
- if (codec_private_)
- size += EbmlElementSize(libwebm::kMkvCodecPrivate, codec_private_,
- static_cast<uint64>(codec_private_length_));
- if (language_)
- size += EbmlElementSize(libwebm::kMkvLanguage, language_);
- if (name_)
- size += EbmlElementSize(libwebm::kMkvName, name_);
- if (max_block_additional_id_)
- size += EbmlElementSize(libwebm::kMkvMaxBlockAdditionID,
- static_cast<uint64>(max_block_additional_id_));
- if (codec_delay_)
- size += EbmlElementSize(libwebm::kMkvCodecDelay,
- static_cast<uint64>(codec_delay_));
- if (seek_pre_roll_)
- size += EbmlElementSize(libwebm::kMkvSeekPreRoll,
- static_cast<uint64>(seek_pre_roll_));
- if (default_duration_)
- size += EbmlElementSize(libwebm::kMkvDefaultDuration,
- static_cast<uint64>(default_duration_));
-
- const int64_t payload_position = writer->Position();
- if (payload_position < 0)
- return false;
-
- if (!WriteEbmlElement(writer, libwebm::kMkvTrackNumber,
- static_cast<uint64>(number_)))
- return false;
- if (!WriteEbmlElement(writer, libwebm::kMkvTrackUID,
- static_cast<uint64>(uid_)))
- return false;
- if (!WriteEbmlElement(writer, libwebm::kMkvTrackType,
- static_cast<uint64>(type_)))
- return false;
- if (max_block_additional_id_) {
- if (!WriteEbmlElement(writer, libwebm::kMkvMaxBlockAdditionID,
- static_cast<uint64>(max_block_additional_id_))) {
- return false;
- }
- }
- if (codec_delay_) {
- if (!WriteEbmlElement(writer, libwebm::kMkvCodecDelay,
- static_cast<uint64>(codec_delay_)))
- return false;
- }
- if (seek_pre_roll_) {
- if (!WriteEbmlElement(writer, libwebm::kMkvSeekPreRoll,
- static_cast<uint64>(seek_pre_roll_)))
- return false;
- }
- if (default_duration_) {
- if (!WriteEbmlElement(writer, libwebm::kMkvDefaultDuration,
- static_cast<uint64>(default_duration_)))
- return false;
- }
- if (codec_id_) {
- if (!WriteEbmlElement(writer, libwebm::kMkvCodecID, codec_id_))
- return false;
- }
- if (codec_private_) {
- if (!WriteEbmlElement(writer, libwebm::kMkvCodecPrivate, codec_private_,
- static_cast<uint64>(codec_private_length_)))
- return false;
- }
- if (language_) {
- if (!WriteEbmlElement(writer, libwebm::kMkvLanguage, language_))
- return false;
- }
- if (name_) {
- if (!WriteEbmlElement(writer, libwebm::kMkvName, name_))
- return false;
- }
-
- int64_t stop_position = writer->Position();
- if (stop_position < 0 ||
- stop_position - payload_position != static_cast<int64_t>(size))
- return false;
-
- if (content_encoding_entries_size_ > 0) {
- uint64_t content_encodings_size = 0;
- for (uint32_t i = 0; i < content_encoding_entries_size_; ++i) {
- ContentEncoding* const encoding = content_encoding_entries_[i];
- content_encodings_size += encoding->Size();
- }
-
- if (!WriteEbmlMasterElement(writer, libwebm::kMkvContentEncodings,
- content_encodings_size))
- return false;
-
- for (uint32_t i = 0; i < content_encoding_entries_size_; ++i) {
- ContentEncoding* const encoding = content_encoding_entries_[i];
- if (!encoding->Write(writer))
- return false;
- }
- }
-
- stop_position = writer->Position();
- if (stop_position < 0)
- return false;
- return true;
-}
-
-bool Track::SetCodecPrivate(const uint8_t* codec_private, uint64_t length) {
- if (!codec_private || length < 1)
- return false;
-
- delete[] codec_private_;
-
- codec_private_ =
- new (std::nothrow) uint8_t[static_cast<size_t>(length)]; // NOLINT
- if (!codec_private_)
- return false;
-
- memcpy(codec_private_, codec_private, static_cast<size_t>(length));
- codec_private_length_ = length;
-
- return true;
-}
-
-void Track::set_codec_id(const char* codec_id) {
- if (codec_id) {
- delete[] codec_id_;
-
- const size_t length = strlen(codec_id) + 1;
- codec_id_ = new (std::nothrow) char[length]; // NOLINT
- if (codec_id_) {
-#ifdef _MSC_VER
- strcpy_s(codec_id_, length, codec_id);
-#else
- strcpy(codec_id_, codec_id);
-#endif
- }
- }
-}
-
-// TODO(fgalligan): Vet the language parameter.
-void Track::set_language(const char* language) {
- if (language) {
- delete[] language_;
-
- const size_t length = strlen(language) + 1;
- language_ = new (std::nothrow) char[length]; // NOLINT
- if (language_) {
-#ifdef _MSC_VER
- strcpy_s(language_, length, language);
-#else
- strcpy(language_, language);
-#endif
- }
- }
-}
-
-void Track::set_name(const char* name) {
- if (name) {
- delete[] name_;
-
- const size_t length = strlen(name) + 1;
- name_ = new (std::nothrow) char[length]; // NOLINT
- if (name_) {
-#ifdef _MSC_VER
- strcpy_s(name_, length, name);
-#else
- strcpy(name_, name);
-#endif
- }
- }
-}
-
-///////////////////////////////////////////////////////////////
-//
-// Colour and its child elements
-
-uint64_t PrimaryChromaticity::PrimaryChromaticitySize(
- libwebm::MkvId x_id, libwebm::MkvId y_id) const {
- return EbmlElementSize(x_id, x_) + EbmlElementSize(y_id, y_);
-}
-
-bool PrimaryChromaticity::Write(IMkvWriter* writer, libwebm::MkvId x_id,
- libwebm::MkvId y_id) const {
- if (!Valid()) {
- return false;
- }
- return WriteEbmlElement(writer, x_id, x_) &&
- WriteEbmlElement(writer, y_id, y_);
-}
-
-bool PrimaryChromaticity::Valid() const {
- return (x_ >= kChromaticityMin && x_ <= kChromaticityMax &&
- y_ >= kChromaticityMin && y_ <= kChromaticityMax);
-}
-
-uint64_t MasteringMetadata::MasteringMetadataSize() const {
- uint64_t size = PayloadSize();
-
- if (size > 0)
- size += EbmlMasterElementSize(libwebm::kMkvMasteringMetadata, size);
-
- return size;
-}
-
-bool MasteringMetadata::Valid() const {
- if (luminance_min_ != kValueNotPresent) {
- if (luminance_min_ < kMinLuminance || luminance_min_ > kMinLuminanceMax ||
- luminance_min_ > luminance_max_) {
- return false;
- }
- }
- if (luminance_max_ != kValueNotPresent) {
- if (luminance_max_ < kMinLuminance || luminance_max_ > kMaxLuminanceMax ||
- luminance_max_ < luminance_min_) {
- return false;
- }
- }
- if (r_ && !r_->Valid())
- return false;
- if (g_ && !g_->Valid())
- return false;
- if (b_ && !b_->Valid())
- return false;
- if (white_point_ && !white_point_->Valid())
- return false;
-
- return true;
-}
-
-bool MasteringMetadata::Write(IMkvWriter* writer) const {
- const uint64_t size = PayloadSize();
-
- // Don't write an empty element.
- if (size == 0)
- return true;
-
- if (!WriteEbmlMasterElement(writer, libwebm::kMkvMasteringMetadata, size))
- return false;
- if (luminance_max_ != kValueNotPresent &&
- !WriteEbmlElement(writer, libwebm::kMkvLuminanceMax, luminance_max_)) {
- return false;
- }
- if (luminance_min_ != kValueNotPresent &&
- !WriteEbmlElement(writer, libwebm::kMkvLuminanceMin, luminance_min_)) {
- return false;
- }
- if (r_ &&
- !r_->Write(writer, libwebm::kMkvPrimaryRChromaticityX,
- libwebm::kMkvPrimaryRChromaticityY)) {
- return false;
- }
- if (g_ &&
- !g_->Write(writer, libwebm::kMkvPrimaryGChromaticityX,
- libwebm::kMkvPrimaryGChromaticityY)) {
- return false;
- }
- if (b_ &&
- !b_->Write(writer, libwebm::kMkvPrimaryBChromaticityX,
- libwebm::kMkvPrimaryBChromaticityY)) {
- return false;
- }
- if (white_point_ &&
- !white_point_->Write(writer, libwebm::kMkvWhitePointChromaticityX,
- libwebm::kMkvWhitePointChromaticityY)) {
- return false;
- }
-
- return true;
-}
-
-bool MasteringMetadata::SetChromaticity(
- const PrimaryChromaticity* r, const PrimaryChromaticity* g,
- const PrimaryChromaticity* b, const PrimaryChromaticity* white_point) {
- PrimaryChromaticityPtr r_ptr(nullptr);
- if (r) {
- if (!CopyChromaticity(r, &r_ptr))
- return false;
- }
- PrimaryChromaticityPtr g_ptr(nullptr);
- if (g) {
- if (!CopyChromaticity(g, &g_ptr))
- return false;
- }
- PrimaryChromaticityPtr b_ptr(nullptr);
- if (b) {
- if (!CopyChromaticity(b, &b_ptr))
- return false;
- }
- PrimaryChromaticityPtr wp_ptr(nullptr);
- if (white_point) {
- if (!CopyChromaticity(white_point, &wp_ptr))
- return false;
- }
-
- r_ = r_ptr.release();
- g_ = g_ptr.release();
- b_ = b_ptr.release();
- white_point_ = wp_ptr.release();
- return true;
-}
-
-uint64_t MasteringMetadata::PayloadSize() const {
- uint64_t size = 0;
-
- if (luminance_max_ != kValueNotPresent)
- size += EbmlElementSize(libwebm::kMkvLuminanceMax, luminance_max_);
- if (luminance_min_ != kValueNotPresent)
- size += EbmlElementSize(libwebm::kMkvLuminanceMin, luminance_min_);
-
- if (r_) {
- size += r_->PrimaryChromaticitySize(libwebm::kMkvPrimaryRChromaticityX,
- libwebm::kMkvPrimaryRChromaticityY);
- }
- if (g_) {
- size += g_->PrimaryChromaticitySize(libwebm::kMkvPrimaryGChromaticityX,
- libwebm::kMkvPrimaryGChromaticityY);
- }
- if (b_) {
- size += b_->PrimaryChromaticitySize(libwebm::kMkvPrimaryBChromaticityX,
- libwebm::kMkvPrimaryBChromaticityY);
- }
- if (white_point_) {
- size += white_point_->PrimaryChromaticitySize(
- libwebm::kMkvWhitePointChromaticityX,
- libwebm::kMkvWhitePointChromaticityY);
- }
-
- return size;
-}
-
-uint64_t Colour::ColourSize() const {
- uint64_t size = PayloadSize();
-
- if (size > 0)
- size += EbmlMasterElementSize(libwebm::kMkvColour, size);
-
- return size;
-}
-
-bool Colour::Valid() const {
- if (mastering_metadata_ && !mastering_metadata_->Valid())
- return false;
- if (matrix_coefficients_ != kValueNotPresent &&
- !IsMatrixCoefficientsValueValid(matrix_coefficients_)) {
- return false;
- }
- if (chroma_siting_horz_ != kValueNotPresent &&
- !IsChromaSitingHorzValueValid(chroma_siting_horz_)) {
- return false;
- }
- if (chroma_siting_vert_ != kValueNotPresent &&
- !IsChromaSitingVertValueValid(chroma_siting_vert_)) {
- return false;
- }
- if (range_ != kValueNotPresent && !IsColourRangeValueValid(range_))
- return false;
- if (transfer_characteristics_ != kValueNotPresent &&
- !IsTransferCharacteristicsValueValid(transfer_characteristics_)) {
- return false;
- }
- if (primaries_ != kValueNotPresent && !IsPrimariesValueValid(primaries_))
- return false;
-
- return true;
-}
-
-bool Colour::Write(IMkvWriter* writer) const {
- const uint64_t size = PayloadSize();
-
- // Don't write an empty element.
- if (size == 0)
- return true;
-
- // Don't write an invalid element.
- if (!Valid())
- return false;
-
- if (!WriteEbmlMasterElement(writer, libwebm::kMkvColour, size))
- return false;
-
- if (matrix_coefficients_ != kValueNotPresent &&
- !WriteEbmlElement(writer, libwebm::kMkvMatrixCoefficients,
- static_cast<uint64>(matrix_coefficients_))) {
- return false;
- }
- if (bits_per_channel_ != kValueNotPresent &&
- !WriteEbmlElement(writer, libwebm::kMkvBitsPerChannel,
- static_cast<uint64>(bits_per_channel_))) {
- return false;
- }
- if (chroma_subsampling_horz_ != kValueNotPresent &&
- !WriteEbmlElement(writer, libwebm::kMkvChromaSubsamplingHorz,
- static_cast<uint64>(chroma_subsampling_horz_))) {
- return false;
- }
- if (chroma_subsampling_vert_ != kValueNotPresent &&
- !WriteEbmlElement(writer, libwebm::kMkvChromaSubsamplingVert,
- static_cast<uint64>(chroma_subsampling_vert_))) {
- return false;
- }
-
- if (cb_subsampling_horz_ != kValueNotPresent &&
- !WriteEbmlElement(writer, libwebm::kMkvCbSubsamplingHorz,
- static_cast<uint64>(cb_subsampling_horz_))) {
- return false;
- }
- if (cb_subsampling_vert_ != kValueNotPresent &&
- !WriteEbmlElement(writer, libwebm::kMkvCbSubsamplingVert,
- static_cast<uint64>(cb_subsampling_vert_))) {
- return false;
- }
- if (chroma_siting_horz_ != kValueNotPresent &&
- !WriteEbmlElement(writer, libwebm::kMkvChromaSitingHorz,
- static_cast<uint64>(chroma_siting_horz_))) {
- return false;
- }
- if (chroma_siting_vert_ != kValueNotPresent &&
- !WriteEbmlElement(writer, libwebm::kMkvChromaSitingVert,
- static_cast<uint64>(chroma_siting_vert_))) {
- return false;
- }
- if (range_ != kValueNotPresent &&
- !WriteEbmlElement(writer, libwebm::kMkvRange,
- static_cast<uint64>(range_))) {
- return false;
- }
- if (transfer_characteristics_ != kValueNotPresent &&
- !WriteEbmlElement(writer, libwebm::kMkvTransferCharacteristics,
- static_cast<uint64>(transfer_characteristics_))) {
- return false;
- }
- if (primaries_ != kValueNotPresent &&
- !WriteEbmlElement(writer, libwebm::kMkvPrimaries,
- static_cast<uint64>(primaries_))) {
- return false;
- }
- if (max_cll_ != kValueNotPresent &&
- !WriteEbmlElement(writer, libwebm::kMkvMaxCLL,
- static_cast<uint64>(max_cll_))) {
- return false;
- }
- if (max_fall_ != kValueNotPresent &&
- !WriteEbmlElement(writer, libwebm::kMkvMaxFALL,
- static_cast<uint64>(max_fall_))) {
- return false;
- }
-
- if (mastering_metadata_ && !mastering_metadata_->Write(writer))
- return false;
-
- return true;
-}
-
-bool Colour::SetMasteringMetadata(const MasteringMetadata& mastering_metadata) {
- std::unique_ptr<MasteringMetadata> mm_ptr(new MasteringMetadata());
- if (!mm_ptr.get())
- return false;
-
- mm_ptr->set_luminance_max(mastering_metadata.luminance_max());
- mm_ptr->set_luminance_min(mastering_metadata.luminance_min());
-
- if (!mm_ptr->SetChromaticity(mastering_metadata.r(), mastering_metadata.g(),
- mastering_metadata.b(),
- mastering_metadata.white_point())) {
- return false;
- }
-
- delete mastering_metadata_;
- mastering_metadata_ = mm_ptr.release();
- return true;
-}
-
-uint64_t Colour::PayloadSize() const {
- uint64_t size = 0;
-
- if (matrix_coefficients_ != kValueNotPresent) {
- size += EbmlElementSize(libwebm::kMkvMatrixCoefficients,
- static_cast<uint64>(matrix_coefficients_));
- }
- if (bits_per_channel_ != kValueNotPresent) {
- size += EbmlElementSize(libwebm::kMkvBitsPerChannel,
- static_cast<uint64>(bits_per_channel_));
- }
- if (chroma_subsampling_horz_ != kValueNotPresent) {
- size += EbmlElementSize(libwebm::kMkvChromaSubsamplingHorz,
- static_cast<uint64>(chroma_subsampling_horz_));
- }
- if (chroma_subsampling_vert_ != kValueNotPresent) {
- size += EbmlElementSize(libwebm::kMkvChromaSubsamplingVert,
- static_cast<uint64>(chroma_subsampling_vert_));
- }
- if (cb_subsampling_horz_ != kValueNotPresent) {
- size += EbmlElementSize(libwebm::kMkvCbSubsamplingHorz,
- static_cast<uint64>(cb_subsampling_horz_));
- }
- if (cb_subsampling_vert_ != kValueNotPresent) {
- size += EbmlElementSize(libwebm::kMkvCbSubsamplingVert,
- static_cast<uint64>(cb_subsampling_vert_));
- }
- if (chroma_siting_horz_ != kValueNotPresent) {
- size += EbmlElementSize(libwebm::kMkvChromaSitingHorz,
- static_cast<uint64>(chroma_siting_horz_));
- }
- if (chroma_siting_vert_ != kValueNotPresent) {
- size += EbmlElementSize(libwebm::kMkvChromaSitingVert,
- static_cast<uint64>(chroma_siting_vert_));
- }
- if (range_ != kValueNotPresent) {
- size += EbmlElementSize(libwebm::kMkvRange, static_cast<uint64>(range_));
- }
- if (transfer_characteristics_ != kValueNotPresent) {
- size += EbmlElementSize(libwebm::kMkvTransferCharacteristics,
- static_cast<uint64>(transfer_characteristics_));
- }
- if (primaries_ != kValueNotPresent) {
- size += EbmlElementSize(libwebm::kMkvPrimaries,
- static_cast<uint64>(primaries_));
- }
- if (max_cll_ != kValueNotPresent) {
- size += EbmlElementSize(libwebm::kMkvMaxCLL, static_cast<uint64>(max_cll_));
- }
- if (max_fall_ != kValueNotPresent) {
- size +=
- EbmlElementSize(libwebm::kMkvMaxFALL, static_cast<uint64>(max_fall_));
- }
-
- if (mastering_metadata_)
- size += mastering_metadata_->MasteringMetadataSize();
-
- return size;
-}
-
-///////////////////////////////////////////////////////////////
-//
-// Projection element
-
-uint64_t Projection::ProjectionSize() const {
- uint64_t size = PayloadSize();
-
- if (size > 0)
- size += EbmlMasterElementSize(libwebm::kMkvProjection, size);
-
- return size;
-}
-
-bool Projection::Write(IMkvWriter* writer) const {
- const uint64_t size = PayloadSize();
-
- // Don't write an empty element.
- if (size == 0)
- return true;
-
- if (!WriteEbmlMasterElement(writer, libwebm::kMkvProjection, size))
- return false;
-
- if (!WriteEbmlElement(writer, libwebm::kMkvProjectionType,
- static_cast<uint64>(type_))) {
- return false;
- }
-
- if (private_data_length_ > 0 && private_data_ != NULL &&
- !WriteEbmlElement(writer, libwebm::kMkvProjectionPrivate, private_data_,
- private_data_length_)) {
- return false;
- }
-
- if (!WriteEbmlElement(writer, libwebm::kMkvProjectionPoseYaw, pose_yaw_))
- return false;
-
- if (!WriteEbmlElement(writer, libwebm::kMkvProjectionPosePitch,
- pose_pitch_)) {
- return false;
- }
-
- if (!WriteEbmlElement(writer, libwebm::kMkvProjectionPoseRoll, pose_roll_)) {
- return false;
- }
-
- return true;
-}
-
-bool Projection::SetProjectionPrivate(const uint8_t* data,
- uint64_t data_length) {
- if (data == NULL || data_length == 0) {
- return false;
- }
-
- if (data_length != static_cast<size_t>(data_length)) {
- return false;
- }
-
- uint8_t* new_private_data =
- new (std::nothrow) uint8_t[static_cast<size_t>(data_length)];
- if (new_private_data == NULL) {
- return false;
- }
-
- delete[] private_data_;
- private_data_ = new_private_data;
- private_data_length_ = data_length;
- memcpy(private_data_, data, static_cast<size_t>(data_length));
-
- return true;
-}
-
-uint64_t Projection::PayloadSize() const {
- uint64_t size =
- EbmlElementSize(libwebm::kMkvProjection, static_cast<uint64>(type_));
-
- if (private_data_length_ > 0 && private_data_ != NULL) {
- size += EbmlElementSize(libwebm::kMkvProjectionPrivate, private_data_,
- private_data_length_);
- }
-
- size += EbmlElementSize(libwebm::kMkvProjectionPoseYaw, pose_yaw_);
- size += EbmlElementSize(libwebm::kMkvProjectionPosePitch, pose_pitch_);
- size += EbmlElementSize(libwebm::kMkvProjectionPoseRoll, pose_roll_);
-
- return size;
-}
-
-///////////////////////////////////////////////////////////////
-//
-// VideoTrack Class
-
-VideoTrack::VideoTrack(unsigned int* seed)
- : Track(seed),
- display_height_(0),
- display_width_(0),
- pixel_height_(0),
- pixel_width_(0),
- crop_left_(0),
- crop_right_(0),
- crop_top_(0),
- crop_bottom_(0),
- frame_rate_(0.0),
- height_(0),
- stereo_mode_(0),
- alpha_mode_(0),
- width_(0),
- colour_(NULL),
- projection_(NULL) {}
-
-VideoTrack::~VideoTrack() {
- delete colour_;
- delete projection_;
-}
-
-bool VideoTrack::SetStereoMode(uint64_t stereo_mode) {
- if (stereo_mode != kMono && stereo_mode != kSideBySideLeftIsFirst &&
- stereo_mode != kTopBottomRightIsFirst &&
- stereo_mode != kTopBottomLeftIsFirst &&
- stereo_mode != kSideBySideRightIsFirst)
- return false;
-
- stereo_mode_ = stereo_mode;
- return true;
-}
-
-bool VideoTrack::SetAlphaMode(uint64_t alpha_mode) {
- if (alpha_mode != kNoAlpha && alpha_mode != kAlpha)
- return false;
-
- alpha_mode_ = alpha_mode;
- return true;
-}
-
-uint64_t VideoTrack::PayloadSize() const {
- const uint64_t parent_size = Track::PayloadSize();
-
- uint64_t size = VideoPayloadSize();
- size += EbmlMasterElementSize(libwebm::kMkvVideo, size);
-
- return parent_size + size;
-}
-
-bool VideoTrack::Write(IMkvWriter* writer) const {
- if (!Track::Write(writer))
- return false;
-
- const uint64_t size = VideoPayloadSize();
-
- if (!WriteEbmlMasterElement(writer, libwebm::kMkvVideo, size))
- return false;
-
- const int64_t payload_position = writer->Position();
- if (payload_position < 0)
- return false;
-
- if (!WriteEbmlElement(
- writer, libwebm::kMkvPixelWidth,
- static_cast<uint64>((pixel_width_ > 0) ? pixel_width_ : width_)))
- return false;
- if (!WriteEbmlElement(
- writer, libwebm::kMkvPixelHeight,
- static_cast<uint64>((pixel_height_ > 0) ? pixel_height_ : height_)))
- return false;
- if (display_width_ > 0) {
- if (!WriteEbmlElement(writer, libwebm::kMkvDisplayWidth,
- static_cast<uint64>(display_width_)))
- return false;
- }
- if (display_height_ > 0) {
- if (!WriteEbmlElement(writer, libwebm::kMkvDisplayHeight,
- static_cast<uint64>(display_height_)))
- return false;
- }
- if (crop_left_ > 0) {
- if (!WriteEbmlElement(writer, libwebm::kMkvPixelCropLeft,
- static_cast<uint64>(crop_left_)))
- return false;
- }
- if (crop_right_ > 0) {
- if (!WriteEbmlElement(writer, libwebm::kMkvPixelCropRight,
- static_cast<uint64>(crop_right_)))
- return false;
- }
- if (crop_top_ > 0) {
- if (!WriteEbmlElement(writer, libwebm::kMkvPixelCropTop,
- static_cast<uint64>(crop_top_)))
- return false;
- }
- if (crop_bottom_ > 0) {
- if (!WriteEbmlElement(writer, libwebm::kMkvPixelCropBottom,
- static_cast<uint64>(crop_bottom_)))
- return false;
- }
- if (stereo_mode_ > kMono) {
- if (!WriteEbmlElement(writer, libwebm::kMkvStereoMode,
- static_cast<uint64>(stereo_mode_)))
- return false;
- }
- if (alpha_mode_ > kNoAlpha) {
- if (!WriteEbmlElement(writer, libwebm::kMkvAlphaMode,
- static_cast<uint64>(alpha_mode_)))
- return false;
- }
- if (frame_rate_ > 0.0) {
- if (!WriteEbmlElement(writer, libwebm::kMkvFrameRate,
- static_cast<float>(frame_rate_))) {
- return false;
- }
- }
- if (colour_) {
- if (!colour_->Write(writer))
- return false;
- }
- if (projection_) {
- if (!projection_->Write(writer))
- return false;
- }
-
- const int64_t stop_position = writer->Position();
- if (stop_position < 0 ||
- stop_position - payload_position != static_cast<int64_t>(size)) {
- return false;
- }
-
- return true;
-}
-
-bool VideoTrack::SetColour(const Colour& colour) {
- std::unique_ptr<Colour> colour_ptr(new Colour());
- if (!colour_ptr.get())
- return false;
-
- if (colour.mastering_metadata()) {
- if (!colour_ptr->SetMasteringMetadata(*colour.mastering_metadata()))
- return false;
- }
-
- colour_ptr->set_matrix_coefficients(colour.matrix_coefficients());
- colour_ptr->set_bits_per_channel(colour.bits_per_channel());
- colour_ptr->set_chroma_subsampling_horz(colour.chroma_subsampling_horz());
- colour_ptr->set_chroma_subsampling_vert(colour.chroma_subsampling_vert());
- colour_ptr->set_cb_subsampling_horz(colour.cb_subsampling_horz());
- colour_ptr->set_cb_subsampling_vert(colour.cb_subsampling_vert());
- colour_ptr->set_chroma_siting_horz(colour.chroma_siting_horz());
- colour_ptr->set_chroma_siting_vert(colour.chroma_siting_vert());
- colour_ptr->set_range(colour.range());
- colour_ptr->set_transfer_characteristics(colour.transfer_characteristics());
- colour_ptr->set_primaries(colour.primaries());
- colour_ptr->set_max_cll(colour.max_cll());
- colour_ptr->set_max_fall(colour.max_fall());
- delete colour_;
- colour_ = colour_ptr.release();
- return true;
-}
-
-bool VideoTrack::SetProjection(const Projection& projection) {
- std::unique_ptr<Projection> projection_ptr(new Projection());
- if (!projection_ptr.get())
- return false;
-
- if (projection.private_data()) {
- if (!projection_ptr->SetProjectionPrivate(
- projection.private_data(), projection.private_data_length())) {
- return false;
- }
- }
-
- projection_ptr->set_type(projection.type());
- projection_ptr->set_pose_yaw(projection.pose_yaw());
- projection_ptr->set_pose_pitch(projection.pose_pitch());
- projection_ptr->set_pose_roll(projection.pose_roll());
- delete projection_;
- projection_ = projection_ptr.release();
- return true;
-}
-
-uint64_t VideoTrack::VideoPayloadSize() const {
- uint64_t size = EbmlElementSize(
- libwebm::kMkvPixelWidth,
- static_cast<uint64>((pixel_width_ > 0) ? pixel_width_ : width_));
- size += EbmlElementSize(
- libwebm::kMkvPixelHeight,
- static_cast<uint64>((pixel_height_ > 0) ? pixel_height_ : height_));
- if (display_width_ > 0)
- size += EbmlElementSize(libwebm::kMkvDisplayWidth,
- static_cast<uint64>(display_width_));
- if (display_height_ > 0)
- size += EbmlElementSize(libwebm::kMkvDisplayHeight,
- static_cast<uint64>(display_height_));
- if (crop_left_ > 0)
- size += EbmlElementSize(libwebm::kMkvPixelCropLeft,
- static_cast<uint64>(crop_left_));
- if (crop_right_ > 0)
- size += EbmlElementSize(libwebm::kMkvPixelCropRight,
- static_cast<uint64>(crop_right_));
- if (crop_top_ > 0)
- size += EbmlElementSize(libwebm::kMkvPixelCropTop,
- static_cast<uint64>(crop_top_));
- if (crop_bottom_ > 0)
- size += EbmlElementSize(libwebm::kMkvPixelCropBottom,
- static_cast<uint64>(crop_bottom_));
- if (stereo_mode_ > kMono)
- size += EbmlElementSize(libwebm::kMkvStereoMode,
- static_cast<uint64>(stereo_mode_));
- if (alpha_mode_ > kNoAlpha)
- size += EbmlElementSize(libwebm::kMkvAlphaMode,
- static_cast<uint64>(alpha_mode_));
- if (frame_rate_ > 0.0)
- size += EbmlElementSize(libwebm::kMkvFrameRate,
- static_cast<float>(frame_rate_));
- if (colour_)
- size += colour_->ColourSize();
- if (projection_)
- size += projection_->ProjectionSize();
-
- return size;
-}
-
-///////////////////////////////////////////////////////////////
-//
-// AudioTrack Class
-
-AudioTrack::AudioTrack(unsigned int* seed)
- : Track(seed), bit_depth_(0), channels_(1), sample_rate_(0.0) {}
-
-AudioTrack::~AudioTrack() {}
-
-uint64_t AudioTrack::PayloadSize() const {
- const uint64_t parent_size = Track::PayloadSize();
-
- uint64_t size = EbmlElementSize(libwebm::kMkvSamplingFrequency,
- static_cast<float>(sample_rate_));
- size +=
- EbmlElementSize(libwebm::kMkvChannels, static_cast<uint64>(channels_));
- if (bit_depth_ > 0)
- size +=
- EbmlElementSize(libwebm::kMkvBitDepth, static_cast<uint64>(bit_depth_));
- size += EbmlMasterElementSize(libwebm::kMkvAudio, size);
-
- return parent_size + size;
-}
-
-bool AudioTrack::Write(IMkvWriter* writer) const {
- if (!Track::Write(writer))
- return false;
-
- // Calculate AudioSettings size.
- uint64_t size = EbmlElementSize(libwebm::kMkvSamplingFrequency,
- static_cast<float>(sample_rate_));
- size +=
- EbmlElementSize(libwebm::kMkvChannels, static_cast<uint64>(channels_));
- if (bit_depth_ > 0)
- size +=
- EbmlElementSize(libwebm::kMkvBitDepth, static_cast<uint64>(bit_depth_));
-
- if (!WriteEbmlMasterElement(writer, libwebm::kMkvAudio, size))
- return false;
-
- const int64_t payload_position = writer->Position();
- if (payload_position < 0)
- return false;
-
- if (!WriteEbmlElement(writer, libwebm::kMkvSamplingFrequency,
- static_cast<float>(sample_rate_)))
- return false;
- if (!WriteEbmlElement(writer, libwebm::kMkvChannels,
- static_cast<uint64>(channels_)))
- return false;
- if (bit_depth_ > 0)
- if (!WriteEbmlElement(writer, libwebm::kMkvBitDepth,
- static_cast<uint64>(bit_depth_)))
- return false;
-
- const int64_t stop_position = writer->Position();
- if (stop_position < 0 ||
- stop_position - payload_position != static_cast<int64_t>(size))
- return false;
-
- return true;
-}
-
-///////////////////////////////////////////////////////////////
-//
-// Tracks Class
-
-const char Tracks::kOpusCodecId[] = "A_OPUS";
-const char Tracks::kVorbisCodecId[] = "A_VORBIS";
-const char Tracks::kVp8CodecId[] = "V_VP8";
-const char Tracks::kVp9CodecId[] = "V_VP9";
-const char Tracks::kVp10CodecId[] = "V_VP10";
-const char Tracks::kAV1CodecId[] = "V_AV1";
-const char Tracks::kWebVttCaptionsId[] = "D_WEBVTT/CAPTIONS";
-const char Tracks::kWebVttDescriptionsId[] = "D_WEBVTT/DESCRIPTIONS";
-const char Tracks::kWebVttMetadataId[] = "D_WEBVTT/METADATA";
-const char Tracks::kWebVttSubtitlesId[] = "D_WEBVTT/SUBTITLES";
-
-Tracks::Tracks()
- : track_entries_(NULL), track_entries_size_(0), wrote_tracks_(false) {}
-
-Tracks::~Tracks() {
- if (track_entries_) {
- for (uint32_t i = 0; i < track_entries_size_; ++i) {
- Track* const track = track_entries_[i];
- delete track;
- }
- delete[] track_entries_;
- }
-}
-
-bool Tracks::AddTrack(Track* track, int32_t number) {
- if (number < 0 || wrote_tracks_)
- return false;
-
- // This muxer only supports track numbers in the range [1, 126], in
- // order to be able (to use Matroska integer representation) to
- // serialize the block header (of which the track number is a part)
- // for a frame using exactly 4 bytes.
-
- if (number > 0x7E)
- return false;
-
- uint32_t track_num = number;
-
- if (track_num > 0) {
- // Check to make sure a track does not already have |track_num|.
- for (uint32_t i = 0; i < track_entries_size_; ++i) {
- if (track_entries_[i]->number() == track_num)
- return false;
- }
- }
-
- const uint32_t count = track_entries_size_ + 1;
-
- Track** const track_entries = new (std::nothrow) Track*[count]; // NOLINT
- if (!track_entries)
- return false;
-
- for (uint32_t i = 0; i < track_entries_size_; ++i) {
- track_entries[i] = track_entries_[i];
- }
-
- delete[] track_entries_;
-
- // Find the lowest availible track number > 0.
- if (track_num == 0) {
- track_num = count;
-
- // Check to make sure a track does not already have |track_num|.
- bool exit = false;
- do {
- exit = true;
- for (uint32_t i = 0; i < track_entries_size_; ++i) {
- if (track_entries[i]->number() == track_num) {
- track_num++;
- exit = false;
- break;
- }
- }
- } while (!exit);
- }
- track->set_number(track_num);
-
- track_entries_ = track_entries;
- track_entries_[track_entries_size_] = track;
- track_entries_size_ = count;
- return true;
-}
-
-const Track* Tracks::GetTrackByIndex(uint32_t index) const {
- if (track_entries_ == NULL)
- return NULL;
-
- if (index >= track_entries_size_)
- return NULL;
-
- return track_entries_[index];
-}
-
-Track* Tracks::GetTrackByNumber(uint64_t track_number) const {
- const int32_t count = track_entries_size();
- for (int32_t i = 0; i < count; ++i) {
- if (track_entries_[i]->number() == track_number)
- return track_entries_[i];
- }
-
- return NULL;
-}
-
-bool Tracks::TrackIsAudio(uint64_t track_number) const {
- const Track* const track = GetTrackByNumber(track_number);
-
- if (track->type() == kAudio)
- return true;
-
- return false;
-}
-
-bool Tracks::TrackIsVideo(uint64_t track_number) const {
- const Track* const track = GetTrackByNumber(track_number);
-
- if (track->type() == kVideo)
- return true;
-
- return false;
-}
-
-bool Tracks::Write(IMkvWriter* writer) const {
- uint64_t size = 0;
- const int32_t count = track_entries_size();
- for (int32_t i = 0; i < count; ++i) {
- const Track* const track = GetTrackByIndex(i);
-
- if (!track)
- return false;
-
- size += track->Size();
- }
-
- if (!WriteEbmlMasterElement(writer, libwebm::kMkvTracks, size))
- return false;
-
- const int64_t payload_position = writer->Position();
- if (payload_position < 0)
- return false;
-
- for (int32_t i = 0; i < count; ++i) {
- const Track* const track = GetTrackByIndex(i);
- if (!track->Write(writer))
- return false;
- }
-
- const int64_t stop_position = writer->Position();
- if (stop_position < 0 ||
- stop_position - payload_position != static_cast<int64_t>(size))
- return false;
-
- wrote_tracks_ = true;
- return true;
-}
-
-///////////////////////////////////////////////////////////////
-//
-// Chapter Class
-
-bool Chapter::set_id(const char* id) { return StrCpy(id, &id_); }
-
-void Chapter::set_time(const Segment& segment, uint64_t start_ns,
- uint64_t end_ns) {
- const SegmentInfo* const info = segment.GetSegmentInfo();
- const uint64_t timecode_scale = info->timecode_scale();
- start_timecode_ = start_ns / timecode_scale;
- end_timecode_ = end_ns / timecode_scale;
-}
-
-bool Chapter::add_string(const char* title, const char* language,
- const char* country) {
- if (!ExpandDisplaysArray())
- return false;
-
- Display& d = displays_[displays_count_++];
- d.Init();
-
- if (!d.set_title(title))
- return false;
-
- if (!d.set_language(language))
- return false;
-
- if (!d.set_country(country))
- return false;
-
- return true;
-}
-
-Chapter::Chapter() {
- // This ctor only constructs the object. Proper initialization is
- // done in Init() (called in Chapters::AddChapter()). The only
- // reason we bother implementing this ctor is because we had to
- // declare it as private (along with the dtor), in order to prevent
- // clients from creating Chapter instances (a privelege we grant
- // only to the Chapters class). Doing no initialization here also
- // means that creating arrays of chapter objects is more efficient,
- // because we only initialize each new chapter object as it becomes
- // active on the array.
-}
-
-Chapter::~Chapter() {}
-
-void Chapter::Init(unsigned int* seed) {
- id_ = NULL;
- start_timecode_ = 0;
- end_timecode_ = 0;
- displays_ = NULL;
- displays_size_ = 0;
- displays_count_ = 0;
- uid_ = MakeUID(seed);
-}
-
-void Chapter::ShallowCopy(Chapter* dst) const {
- dst->id_ = id_;
- dst->start_timecode_ = start_timecode_;
- dst->end_timecode_ = end_timecode_;
- dst->uid_ = uid_;
- dst->displays_ = displays_;
- dst->displays_size_ = displays_size_;
- dst->displays_count_ = displays_count_;
-}
-
-void Chapter::Clear() {
- StrCpy(NULL, &id_);
-
- while (displays_count_ > 0) {
- Display& d = displays_[--displays_count_];
- d.Clear();
- }
-
- delete[] displays_;
- displays_ = NULL;
-
- displays_size_ = 0;
-}
-
-bool Chapter::ExpandDisplaysArray() {
- if (displays_size_ > displays_count_)
- return true; // nothing to do yet
-
- const int size = (displays_size_ == 0) ? 1 : 2 * displays_size_;
-
- Display* const displays = new (std::nothrow) Display[size]; // NOLINT
- if (displays == NULL)
- return false;
-
- for (int idx = 0; idx < displays_count_; ++idx) {
- displays[idx] = displays_[idx]; // shallow copy
- }
-
- delete[] displays_;
-
- displays_ = displays;
- displays_size_ = size;
-
- return true;
-}
-
-uint64_t Chapter::WriteAtom(IMkvWriter* writer) const {
- uint64_t payload_size =
- EbmlElementSize(libwebm::kMkvChapterStringUID, id_) +
- EbmlElementSize(libwebm::kMkvChapterUID, static_cast<uint64>(uid_)) +
- EbmlElementSize(libwebm::kMkvChapterTimeStart,
- static_cast<uint64>(start_timecode_)) +
- EbmlElementSize(libwebm::kMkvChapterTimeEnd,
- static_cast<uint64>(end_timecode_));
-
- for (int idx = 0; idx < displays_count_; ++idx) {
- const Display& d = displays_[idx];
- payload_size += d.WriteDisplay(NULL);
- }
-
- const uint64_t atom_size =
- EbmlMasterElementSize(libwebm::kMkvChapterAtom, payload_size) +
- payload_size;
-
- if (writer == NULL)
- return atom_size;
-
- const int64_t start = writer->Position();
-
- if (!WriteEbmlMasterElement(writer, libwebm::kMkvChapterAtom, payload_size))
- return 0;
-
- if (!WriteEbmlElement(writer, libwebm::kMkvChapterStringUID, id_))
- return 0;
-
- if (!WriteEbmlElement(writer, libwebm::kMkvChapterUID,
- static_cast<uint64>(uid_)))
- return 0;
-
- if (!WriteEbmlElement(writer, libwebm::kMkvChapterTimeStart,
- static_cast<uint64>(start_timecode_)))
- return 0;
-
- if (!WriteEbmlElement(writer, libwebm::kMkvChapterTimeEnd,
- static_cast<uint64>(end_timecode_)))
- return 0;
-
- for (int idx = 0; idx < displays_count_; ++idx) {
- const Display& d = displays_[idx];
-
- if (!d.WriteDisplay(writer))
- return 0;
- }
-
- const int64_t stop = writer->Position();
-
- if (stop >= start && uint64_t(stop - start) != atom_size)
- return 0;
-
- return atom_size;
-}
-
-void Chapter::Display::Init() {
- title_ = NULL;
- language_ = NULL;
- country_ = NULL;
-}
-
-void Chapter::Display::Clear() {
- StrCpy(NULL, &title_);
- StrCpy(NULL, &language_);
- StrCpy(NULL, &country_);
-}
-
-bool Chapter::Display::set_title(const char* title) {
- return StrCpy(title, &title_);
-}
-
-bool Chapter::Display::set_language(const char* language) {
- return StrCpy(language, &language_);
-}
-
-bool Chapter::Display::set_country(const char* country) {
- return StrCpy(country, &country_);
-}
-
-uint64_t Chapter::Display::WriteDisplay(IMkvWriter* writer) const {
- uint64_t payload_size = EbmlElementSize(libwebm::kMkvChapString, title_);
-
- if (language_)
- payload_size += EbmlElementSize(libwebm::kMkvChapLanguage, language_);
-
- if (country_)
- payload_size += EbmlElementSize(libwebm::kMkvChapCountry, country_);
-
- const uint64_t display_size =
- EbmlMasterElementSize(libwebm::kMkvChapterDisplay, payload_size) +
- payload_size;
-
- if (writer == NULL)
- return display_size;
-
- const int64_t start = writer->Position();
-
- if (!WriteEbmlMasterElement(writer, libwebm::kMkvChapterDisplay,
- payload_size))
- return 0;
-
- if (!WriteEbmlElement(writer, libwebm::kMkvChapString, title_))
- return 0;
-
- if (language_) {
- if (!WriteEbmlElement(writer, libwebm::kMkvChapLanguage, language_))
- return 0;
- }
-
- if (country_) {
- if (!WriteEbmlElement(writer, libwebm::kMkvChapCountry, country_))
- return 0;
- }
-
- const int64_t stop = writer->Position();
-
- if (stop >= start && uint64_t(stop - start) != display_size)
- return 0;
-
- return display_size;
-}
-
-///////////////////////////////////////////////////////////////
-//
-// Chapters Class
-
-Chapters::Chapters() : chapters_size_(0), chapters_count_(0), chapters_(NULL) {}
-
-Chapters::~Chapters() {
- while (chapters_count_ > 0) {
- Chapter& chapter = chapters_[--chapters_count_];
- chapter.Clear();
- }
-
- delete[] chapters_;
- chapters_ = NULL;
-}
-
-int Chapters::Count() const { return chapters_count_; }
-
-Chapter* Chapters::AddChapter(unsigned int* seed) {
- if (!ExpandChaptersArray())
- return NULL;
-
- Chapter& chapter = chapters_[chapters_count_++];
- chapter.Init(seed);
-
- return &chapter;
-}
-
-bool Chapters::Write(IMkvWriter* writer) const {
- if (writer == NULL)
- return false;
-
- const uint64_t payload_size = WriteEdition(NULL); // return size only
-
- if (!WriteEbmlMasterElement(writer, libwebm::kMkvChapters, payload_size))
- return false;
-
- const int64_t start = writer->Position();
-
- if (WriteEdition(writer) == 0) // error
- return false;
-
- const int64_t stop = writer->Position();
-
- if (stop >= start && uint64_t(stop - start) != payload_size)
- return false;
-
- return true;
-}
-
-bool Chapters::ExpandChaptersArray() {
- if (chapters_size_ > chapters_count_)
- return true; // nothing to do yet
-
- const int size = (chapters_size_ == 0) ? 1 : 2 * chapters_size_;
-
- Chapter* const chapters = new (std::nothrow) Chapter[size]; // NOLINT
- if (chapters == NULL)
- return false;
-
- for (int idx = 0; idx < chapters_count_; ++idx) {
- const Chapter& src = chapters_[idx];
- Chapter* const dst = chapters + idx;
- src.ShallowCopy(dst);
- }
-
- delete[] chapters_;
-
- chapters_ = chapters;
- chapters_size_ = size;
-
- return true;
-}
-
-uint64_t Chapters::WriteEdition(IMkvWriter* writer) const {
- uint64_t payload_size = 0;
-
- for (int idx = 0; idx < chapters_count_; ++idx) {
- const Chapter& chapter = chapters_[idx];
- payload_size += chapter.WriteAtom(NULL);
- }
-
- const uint64_t edition_size =
- EbmlMasterElementSize(libwebm::kMkvEditionEntry, payload_size) +
- payload_size;
-
- if (writer == NULL) // return size only
- return edition_size;
-
- const int64_t start = writer->Position();
-
- if (!WriteEbmlMasterElement(writer, libwebm::kMkvEditionEntry, payload_size))
- return 0; // error
-
- for (int idx = 0; idx < chapters_count_; ++idx) {
- const Chapter& chapter = chapters_[idx];
-
- const uint64_t chapter_size = chapter.WriteAtom(writer);
- if (chapter_size == 0) // error
- return 0;
- }
-
- const int64_t stop = writer->Position();
-
- if (stop >= start && uint64_t(stop - start) != edition_size)
- return 0;
-
- return edition_size;
-}
-
-// Tag Class
-
-bool Tag::add_simple_tag(const char* tag_name, const char* tag_string) {
- if (!ExpandSimpleTagsArray())
- return false;
-
- SimpleTag& st = simple_tags_[simple_tags_count_++];
- st.Init();
-
- if (!st.set_tag_name(tag_name))
- return false;
-
- if (!st.set_tag_string(tag_string))
- return false;
-
- return true;
-}
-
-Tag::Tag() {
- simple_tags_ = NULL;
- simple_tags_size_ = 0;
- simple_tags_count_ = 0;
-}
-
-Tag::~Tag() {}
-
-void Tag::ShallowCopy(Tag* dst) const {
- dst->simple_tags_ = simple_tags_;
- dst->simple_tags_size_ = simple_tags_size_;
- dst->simple_tags_count_ = simple_tags_count_;
-}
-
-void Tag::Clear() {
- while (simple_tags_count_ > 0) {
- SimpleTag& st = simple_tags_[--simple_tags_count_];
- st.Clear();
- }
-
- delete[] simple_tags_;
- simple_tags_ = NULL;
-
- simple_tags_size_ = 0;
-}
-
-bool Tag::ExpandSimpleTagsArray() {
- if (simple_tags_size_ > simple_tags_count_)
- return true; // nothing to do yet
-
- const int size = (simple_tags_size_ == 0) ? 1 : 2 * simple_tags_size_;
-
- SimpleTag* const simple_tags = new (std::nothrow) SimpleTag[size]; // NOLINT
- if (simple_tags == NULL)
- return false;
-
- for (int idx = 0; idx < simple_tags_count_; ++idx) {
- simple_tags[idx] = simple_tags_[idx]; // shallow copy
- }
-
- delete[] simple_tags_;
-
- simple_tags_ = simple_tags;
- simple_tags_size_ = size;
-
- return true;
-}
-
-uint64_t Tag::Write(IMkvWriter* writer) const {
- uint64_t payload_size = 0;
-
- for (int idx = 0; idx < simple_tags_count_; ++idx) {
- const SimpleTag& st = simple_tags_[idx];
- payload_size += st.Write(NULL);
- }
-
- const uint64_t tag_size =
- EbmlMasterElementSize(libwebm::kMkvTag, payload_size) + payload_size;
-
- if (writer == NULL)
- return tag_size;
-
- const int64_t start = writer->Position();
-
- if (!WriteEbmlMasterElement(writer, libwebm::kMkvTag, payload_size))
- return 0;
-
- for (int idx = 0; idx < simple_tags_count_; ++idx) {
- const SimpleTag& st = simple_tags_[idx];
-
- if (!st.Write(writer))
- return 0;
- }
-
- const int64_t stop = writer->Position();
-
- if (stop >= start && uint64_t(stop - start) != tag_size)
- return 0;
-
- return tag_size;
-}
-
-// Tag::SimpleTag
-
-void Tag::SimpleTag::Init() {
- tag_name_ = NULL;
- tag_string_ = NULL;
-}
-
-void Tag::SimpleTag::Clear() {
- StrCpy(NULL, &tag_name_);
- StrCpy(NULL, &tag_string_);
-}
-
-bool Tag::SimpleTag::set_tag_name(const char* tag_name) {
- return StrCpy(tag_name, &tag_name_);
-}
-
-bool Tag::SimpleTag::set_tag_string(const char* tag_string) {
- return StrCpy(tag_string, &tag_string_);
-}
-
-uint64_t Tag::SimpleTag::Write(IMkvWriter* writer) const {
- uint64_t payload_size = EbmlElementSize(libwebm::kMkvTagName, tag_name_);
-
- payload_size += EbmlElementSize(libwebm::kMkvTagString, tag_string_);
-
- const uint64_t simple_tag_size =
- EbmlMasterElementSize(libwebm::kMkvSimpleTag, payload_size) +
- payload_size;
-
- if (writer == NULL)
- return simple_tag_size;
-
- const int64_t start = writer->Position();
-
- if (!WriteEbmlMasterElement(writer, libwebm::kMkvSimpleTag, payload_size))
- return 0;
-
- if (!WriteEbmlElement(writer, libwebm::kMkvTagName, tag_name_))
- return 0;
-
- if (!WriteEbmlElement(writer, libwebm::kMkvTagString, tag_string_))
- return 0;
-
- const int64_t stop = writer->Position();
-
- if (stop >= start && uint64_t(stop - start) != simple_tag_size)
- return 0;
-
- return simple_tag_size;
-}
-
-// Tags Class
-
-Tags::Tags() : tags_size_(0), tags_count_(0), tags_(NULL) {}
-
-Tags::~Tags() {
- while (tags_count_ > 0) {
- Tag& tag = tags_[--tags_count_];
- tag.Clear();
- }
-
- delete[] tags_;
- tags_ = NULL;
-}
-
-int Tags::Count() const { return tags_count_; }
-
-Tag* Tags::AddTag() {
- if (!ExpandTagsArray())
- return NULL;
-
- Tag& tag = tags_[tags_count_++];
-
- return &tag;
-}
-
-bool Tags::Write(IMkvWriter* writer) const {
- if (writer == NULL)
- return false;
-
- uint64_t payload_size = 0;
-
- for (int idx = 0; idx < tags_count_; ++idx) {
- const Tag& tag = tags_[idx];
- payload_size += tag.Write(NULL);
- }
-
- if (!WriteEbmlMasterElement(writer, libwebm::kMkvTags, payload_size))
- return false;
-
- const int64_t start = writer->Position();
-
- for (int idx = 0; idx < tags_count_; ++idx) {
- const Tag& tag = tags_[idx];
-
- const uint64_t tag_size = tag.Write(writer);
- if (tag_size == 0) // error
- return 0;
- }
-
- const int64_t stop = writer->Position();
-
- if (stop >= start && uint64_t(stop - start) != payload_size)
- return false;
-
- return true;
-}
-
-bool Tags::ExpandTagsArray() {
- if (tags_size_ > tags_count_)
- return true; // nothing to do yet
-
- const int size = (tags_size_ == 0) ? 1 : 2 * tags_size_;
-
- Tag* const tags = new (std::nothrow) Tag[size]; // NOLINT
- if (tags == NULL)
- return false;
-
- for (int idx = 0; idx < tags_count_; ++idx) {
- const Tag& src = tags_[idx];
- Tag* const dst = tags + idx;
- src.ShallowCopy(dst);
- }
-
- delete[] tags_;
-
- tags_ = tags;
- tags_size_ = size;
-
- return true;
-}
-
-///////////////////////////////////////////////////////////////
-//
-// Cluster class
-
-Cluster::Cluster(uint64_t timecode, int64_t cues_pos, uint64_t timecode_scale,
- bool write_last_frame_with_duration, bool fixed_size_timecode)
- : blocks_added_(0),
- finalized_(false),
- fixed_size_timecode_(fixed_size_timecode),
- header_written_(false),
- payload_size_(0),
- position_for_cues_(cues_pos),
- size_position_(-1),
- timecode_(timecode),
- timecode_scale_(timecode_scale),
- write_last_frame_with_duration_(write_last_frame_with_duration),
- writer_(NULL) {}
-
-Cluster::~Cluster() {
- // Delete any stored frames that are left behind. This will happen if the
- // Cluster was not Finalized for whatever reason.
- while (!stored_frames_.empty()) {
- while (!stored_frames_.begin()->second.empty()) {
- delete stored_frames_.begin()->second.front();
- stored_frames_.begin()->second.pop_front();
- }
- stored_frames_.erase(stored_frames_.begin()->first);
- }
-}
-
-bool Cluster::Init(IMkvWriter* ptr_writer) {
- if (!ptr_writer) {
- return false;
- }
- writer_ = ptr_writer;
- return true;
-}
-
-bool Cluster::AddFrame(const Frame* const frame) {
- return QueueOrWriteFrame(frame);
-}
-
-bool Cluster::AddFrame(const uint8_t* data, uint64_t length,
- uint64_t track_number, uint64_t abs_timecode,
- bool is_key) {
- Frame frame;
- if (!frame.Init(data, length))
- return false;
- frame.set_track_number(track_number);
- frame.set_timestamp(abs_timecode);
- frame.set_is_key(is_key);
- return QueueOrWriteFrame(&frame);
-}
-
-bool Cluster::AddFrameWithAdditional(const uint8_t* data, uint64_t length,
- const uint8_t* additional,
- uint64_t additional_length,
- uint64_t add_id, uint64_t track_number,
- uint64_t abs_timecode, bool is_key) {
- if (!additional || additional_length == 0) {
- return false;
- }
- Frame frame;
- if (!frame.Init(data, length) ||
- !frame.AddAdditionalData(additional, additional_length, add_id)) {
- return false;
- }
- frame.set_track_number(track_number);
- frame.set_timestamp(abs_timecode);
- frame.set_is_key(is_key);
- return QueueOrWriteFrame(&frame);
-}
-
-bool Cluster::AddFrameWithDiscardPadding(const uint8_t* data, uint64_t length,
- int64_t discard_padding,
- uint64_t track_number,
- uint64_t abs_timecode, bool is_key) {
- Frame frame;
- if (!frame.Init(data, length))
- return false;
- frame.set_discard_padding(discard_padding);
- frame.set_track_number(track_number);
- frame.set_timestamp(abs_timecode);
- frame.set_is_key(is_key);
- return QueueOrWriteFrame(&frame);
-}
-
-bool Cluster::AddMetadata(const uint8_t* data, uint64_t length,
- uint64_t track_number, uint64_t abs_timecode,
- uint64_t duration_timecode) {
- Frame frame;
- if (!frame.Init(data, length))
- return false;
- frame.set_track_number(track_number);
- frame.set_timestamp(abs_timecode);
- frame.set_duration(duration_timecode);
- frame.set_is_key(true); // All metadata blocks are keyframes.
- return QueueOrWriteFrame(&frame);
-}
-
-void Cluster::AddPayloadSize(uint64_t size) { payload_size_ += size; }
-
-bool Cluster::Finalize() {
- return !write_last_frame_with_duration_ && Finalize(false, 0);
-}
-
-bool Cluster::Finalize(bool set_last_frame_duration, uint64_t duration) {
- if (!writer_ || finalized_)
- return false;
-
- if (write_last_frame_with_duration_) {
- // Write out held back Frames. This essentially performs a k-way merge
- // across all tracks in the increasing order of timestamps.
- while (!stored_frames_.empty()) {
- Frame* frame = stored_frames_.begin()->second.front();
-
- // Get the next frame to write (frame with least timestamp across all
- // tracks).
- for (FrameMapIterator frames_iterator = ++stored_frames_.begin();
- frames_iterator != stored_frames_.end(); ++frames_iterator) {
- if (frames_iterator->second.front()->timestamp() < frame->timestamp()) {
- frame = frames_iterator->second.front();
- }
- }
-
- // Set the duration if it's the last frame for the track.
- if (set_last_frame_duration &&
- stored_frames_[frame->track_number()].size() == 1 &&
- !frame->duration_set()) {
- frame->set_duration(duration - frame->timestamp());
- if (!frame->is_key() && !frame->reference_block_timestamp_set()) {
- frame->set_reference_block_timestamp(
- last_block_timestamp_[frame->track_number()]);
- }
- }
-
- // Write the frame and remove it from |stored_frames_|.
- const bool wrote_frame = DoWriteFrame(frame);
- stored_frames_[frame->track_number()].pop_front();
- if (stored_frames_[frame->track_number()].empty()) {
- stored_frames_.erase(frame->track_number());
- }
- delete frame;
- if (!wrote_frame)
- return false;
- }
- }
-
- if (size_position_ == -1)
- return false;
-
- if (writer_->Seekable()) {
- const int64_t pos = writer_->Position();
-
- if (writer_->Position(size_position_))
- return false;
-
- if (WriteUIntSize(writer_, payload_size(), 8))
- return false;
-
- if (writer_->Position(pos))
- return false;
- }
-
- finalized_ = true;
-
- return true;
-}
-
-uint64_t Cluster::Size() const {
- const uint64_t element_size =
- EbmlMasterElementSize(libwebm::kMkvCluster, 0xFFFFFFFFFFFFFFFFULL) +
- payload_size_;
- return element_size;
-}
-
-bool Cluster::PreWriteBlock() {
- if (finalized_)
- return false;
-
- if (!header_written_) {
- if (!WriteClusterHeader())
- return false;
- }
-
- return true;
-}
-
-void Cluster::PostWriteBlock(uint64_t element_size) {
- AddPayloadSize(element_size);
- ++blocks_added_;
-}
-
-int64_t Cluster::GetRelativeTimecode(int64_t abs_timecode) const {
- const int64_t cluster_timecode = this->Cluster::timecode();
- const int64_t rel_timecode =
- static_cast<int64_t>(abs_timecode) - cluster_timecode;
-
- if (rel_timecode < 0 || rel_timecode > kMaxBlockTimecode)
- return -1;
-
- return rel_timecode;
-}
-
-bool Cluster::DoWriteFrame(const Frame* const frame) {
- if (!frame || !frame->IsValid())
- return false;
-
- if (!PreWriteBlock())
- return false;
-
- const uint64_t element_size = WriteFrame(writer_, frame, this);
- if (element_size == 0)
- return false;
-
- PostWriteBlock(element_size);
- last_block_timestamp_[frame->track_number()] = frame->timestamp();
- return true;
-}
-
-bool Cluster::QueueOrWriteFrame(const Frame* const frame) {
- if (!frame || !frame->IsValid())
- return false;
-
- // If |write_last_frame_with_duration_| is not set, then write the frame right
- // away.
- if (!write_last_frame_with_duration_) {
- return DoWriteFrame(frame);
- }
-
- // Queue the current frame.
- uint64_t track_number = frame->track_number();
- Frame* const frame_to_store = new Frame();
- frame_to_store->CopyFrom(*frame);
- stored_frames_[track_number].push_back(frame_to_store);
-
- // Iterate through all queued frames in the current track except the last one
- // and write it if it is okay to do so (i.e.) no other track has an held back
- // frame with timestamp <= the timestamp of the frame in question.
- std::vector<std::list<Frame*>::iterator> frames_to_erase;
- for (std::list<Frame*>::iterator
- current_track_iterator = stored_frames_[track_number].begin(),
- end = --stored_frames_[track_number].end();
- current_track_iterator != end; ++current_track_iterator) {
- const Frame* const frame_to_write = *current_track_iterator;
- bool okay_to_write = true;
- for (FrameMapIterator track_iterator = stored_frames_.begin();
- track_iterator != stored_frames_.end(); ++track_iterator) {
- if (track_iterator->first == track_number) {
- continue;
- }
- if (track_iterator->second.front()->timestamp() <
- frame_to_write->timestamp()) {
- okay_to_write = false;
- break;
- }
- }
- if (okay_to_write) {
- const bool wrote_frame = DoWriteFrame(frame_to_write);
- delete frame_to_write;
- if (!wrote_frame)
- return false;
- frames_to_erase.push_back(current_track_iterator);
- } else {
- break;
- }
- }
- for (std::vector<std::list<Frame*>::iterator>::iterator iterator =
- frames_to_erase.begin();
- iterator != frames_to_erase.end(); ++iterator) {
- stored_frames_[track_number].erase(*iterator);
- }
- return true;
-}
-
-bool Cluster::WriteClusterHeader() {
- if (finalized_)
- return false;
-
- if (WriteID(writer_, libwebm::kMkvCluster))
- return false;
-
- // Save for later.
- size_position_ = writer_->Position();
-
- // Write "unknown" (EBML coded -1) as cluster size value. We need to write 8
- // bytes because we do not know how big our cluster will be.
- if (SerializeInt(writer_, kEbmlUnknownValue, 8))
- return false;
-
- if (!WriteEbmlElement(writer_, libwebm::kMkvTimecode, timecode(),
- fixed_size_timecode_ ? 8 : 0)) {
- return false;
- }
- AddPayloadSize(EbmlElementSize(libwebm::kMkvTimecode, timecode(),
- fixed_size_timecode_ ? 8 : 0));
- header_written_ = true;
-
- return true;
-}
-
-///////////////////////////////////////////////////////////////
-//
-// SeekHead Class
-
-SeekHead::SeekHead() : start_pos_(0ULL) {
- for (int32_t i = 0; i < kSeekEntryCount; ++i) {
- seek_entry_id_[i] = 0;
- seek_entry_pos_[i] = 0;
- }
-}
-
-SeekHead::~SeekHead() {}
-
-bool SeekHead::Finalize(IMkvWriter* writer) const {
- if (writer->Seekable()) {
- if (start_pos_ == -1)
- return false;
-
- uint64_t payload_size = 0;
- uint64_t entry_size[kSeekEntryCount];
-
- for (int32_t i = 0; i < kSeekEntryCount; ++i) {
- if (seek_entry_id_[i] != 0) {
- entry_size[i] = EbmlElementSize(libwebm::kMkvSeekID,
- static_cast<uint64>(seek_entry_id_[i]));
- entry_size[i] += EbmlElementSize(
- libwebm::kMkvSeekPosition, static_cast<uint64>(seek_entry_pos_[i]));
-
- payload_size +=
- EbmlMasterElementSize(libwebm::kMkvSeek, entry_size[i]) +
- entry_size[i];
- }
- }
-
- // No SeekHead elements
- if (payload_size == 0)
- return true;
-
- const int64_t pos = writer->Position();
- if (writer->Position(start_pos_))
- return false;
-
- if (!WriteEbmlMasterElement(writer, libwebm::kMkvSeekHead, payload_size))
- return false;
-
- for (int32_t i = 0; i < kSeekEntryCount; ++i) {
- if (seek_entry_id_[i] != 0) {
- if (!WriteEbmlMasterElement(writer, libwebm::kMkvSeek, entry_size[i]))
- return false;
-
- if (!WriteEbmlElement(writer, libwebm::kMkvSeekID,
- static_cast<uint64>(seek_entry_id_[i])))
- return false;
-
- if (!WriteEbmlElement(writer, libwebm::kMkvSeekPosition,
- static_cast<uint64>(seek_entry_pos_[i])))
- return false;
- }
- }
-
- const uint64_t total_entry_size = kSeekEntryCount * MaxEntrySize();
- const uint64_t total_size =
- EbmlMasterElementSize(libwebm::kMkvSeekHead, total_entry_size) +
- total_entry_size;
- const int64_t size_left = total_size - (writer->Position() - start_pos_);
-
- const uint64_t bytes_written = WriteVoidElement(writer, size_left);
- if (!bytes_written)
- return false;
-
- if (writer->Position(pos))
- return false;
- }
-
- return true;
-}
-
-bool SeekHead::Write(IMkvWriter* writer) {
- const uint64_t entry_size = kSeekEntryCount * MaxEntrySize();
- const uint64_t size =
- EbmlMasterElementSize(libwebm::kMkvSeekHead, entry_size);
-
- start_pos_ = writer->Position();
-
- const uint64_t bytes_written = WriteVoidElement(writer, size + entry_size);
- if (!bytes_written)
- return false;
-
- return true;
-}
-
-bool SeekHead::AddSeekEntry(uint32_t id, uint64_t pos) {
- for (int32_t i = 0; i < kSeekEntryCount; ++i) {
- if (seek_entry_id_[i] == 0) {
- seek_entry_id_[i] = id;
- seek_entry_pos_[i] = pos;
- return true;
- }
- }
- return false;
-}
-
-uint32_t SeekHead::GetId(int index) const {
- if (index < 0 || index >= kSeekEntryCount)
- return UINT_MAX;
- return seek_entry_id_[index];
-}
-
-uint64_t SeekHead::GetPosition(int index) const {
- if (index < 0 || index >= kSeekEntryCount)
- return ULLONG_MAX;
- return seek_entry_pos_[index];
-}
-
-bool SeekHead::SetSeekEntry(int index, uint32_t id, uint64_t position) {
- if (index < 0 || index >= kSeekEntryCount)
- return false;
- seek_entry_id_[index] = id;
- seek_entry_pos_[index] = position;
- return true;
-}
-
-uint64_t SeekHead::MaxEntrySize() const {
- const uint64_t max_entry_payload_size =
- EbmlElementSize(libwebm::kMkvSeekID,
- static_cast<uint64>(UINT64_C(0xffffffff))) +
- EbmlElementSize(libwebm::kMkvSeekPosition,
- static_cast<uint64>(UINT64_C(0xffffffffffffffff)));
- const uint64_t max_entry_size =
- EbmlMasterElementSize(libwebm::kMkvSeek, max_entry_payload_size) +
- max_entry_payload_size;
-
- return max_entry_size;
-}
-
-///////////////////////////////////////////////////////////////
-//
-// SegmentInfo Class
-
-SegmentInfo::SegmentInfo()
- : duration_(-1.0),
- muxing_app_(NULL),
- timecode_scale_(1000000ULL),
- writing_app_(NULL),
- date_utc_(LLONG_MIN),
- duration_pos_(-1) {}
-
-SegmentInfo::~SegmentInfo() {
- delete[] muxing_app_;
- delete[] writing_app_;
-}
-
-bool SegmentInfo::Init() {
- int32_t major;
- int32_t minor;
- int32_t build;
- int32_t revision;
- GetVersion(&major, &minor, &build, &revision);
- char temp[256];
-#ifdef _MSC_VER
- sprintf_s(temp, sizeof(temp) / sizeof(temp[0]), "libwebm-%d.%d.%d.%d", major,
- minor, build, revision);
-#else
- snprintf(temp, sizeof(temp) / sizeof(temp[0]), "libwebm-%d.%d.%d.%d", major,
- minor, build, revision);
-#endif
-
- const size_t app_len = strlen(temp) + 1;
-
- delete[] muxing_app_;
-
- muxing_app_ = new (std::nothrow) char[app_len]; // NOLINT
- if (!muxing_app_)
- return false;
-
-#ifdef _MSC_VER
- strcpy_s(muxing_app_, app_len, temp);
-#else
- strcpy(muxing_app_, temp);
-#endif
-
- set_writing_app(temp);
- if (!writing_app_)
- return false;
- return true;
-}
-
-bool SegmentInfo::Finalize(IMkvWriter* writer) const {
- if (!writer)
- return false;
-
- if (duration_ > 0.0) {
- if (writer->Seekable()) {
- if (duration_pos_ == -1)
- return false;
-
- const int64_t pos = writer->Position();
-
- if (writer->Position(duration_pos_))
- return false;
-
- if (!WriteEbmlElement(writer, libwebm::kMkvDuration,
- static_cast<float>(duration_)))
- return false;
-
- if (writer->Position(pos))
- return false;
- }
- }
-
- return true;
-}
-
-bool SegmentInfo::Write(IMkvWriter* writer) {
- if (!writer || !muxing_app_ || !writing_app_)
- return false;
-
- uint64_t size = EbmlElementSize(libwebm::kMkvTimecodeScale,
- static_cast<uint64>(timecode_scale_));
- if (duration_ > 0.0)
- size +=
- EbmlElementSize(libwebm::kMkvDuration, static_cast<float>(duration_));
- if (date_utc_ != LLONG_MIN)
- size += EbmlDateElementSize(libwebm::kMkvDateUTC);
- size += EbmlElementSize(libwebm::kMkvMuxingApp, muxing_app_);
- size += EbmlElementSize(libwebm::kMkvWritingApp, writing_app_);
-
- if (!WriteEbmlMasterElement(writer, libwebm::kMkvInfo, size))
- return false;
-
- const int64_t payload_position = writer->Position();
- if (payload_position < 0)
- return false;
-
- if (!WriteEbmlElement(writer, libwebm::kMkvTimecodeScale,
- static_cast<uint64>(timecode_scale_)))
- return false;
-
- if (duration_ > 0.0) {
- // Save for later
- duration_pos_ = writer->Position();
-
- if (!WriteEbmlElement(writer, libwebm::kMkvDuration,
- static_cast<float>(duration_)))
- return false;
- }
-
- if (date_utc_ != LLONG_MIN)
- WriteEbmlDateElement(writer, libwebm::kMkvDateUTC, date_utc_);
-
- if (!WriteEbmlElement(writer, libwebm::kMkvMuxingApp, muxing_app_))
- return false;
- if (!WriteEbmlElement(writer, libwebm::kMkvWritingApp, writing_app_))
- return false;
-
- const int64_t stop_position = writer->Position();
- if (stop_position < 0 ||
- stop_position - payload_position != static_cast<int64_t>(size))
- return false;
-
- return true;
-}
-
-void SegmentInfo::set_muxing_app(const char* app) {
- if (app) {
- const size_t length = strlen(app) + 1;
- char* temp_str = new (std::nothrow) char[length]; // NOLINT
- if (!temp_str)
- return;
-
-#ifdef _MSC_VER
- strcpy_s(temp_str, length, app);
-#else
- strcpy(temp_str, app);
-#endif
-
- delete[] muxing_app_;
- muxing_app_ = temp_str;
- }
-}
-
-void SegmentInfo::set_writing_app(const char* app) {
- if (app) {
- const size_t length = strlen(app) + 1;
- char* temp_str = new (std::nothrow) char[length]; // NOLINT
- if (!temp_str)
- return;
-
-#ifdef _MSC_VER
- strcpy_s(temp_str, length, app);
-#else
- strcpy(temp_str, app);
-#endif
-
- delete[] writing_app_;
- writing_app_ = temp_str;
- }
-}
-
-///////////////////////////////////////////////////////////////
-//
-// Segment Class
-
-Segment::Segment()
- : chunk_count_(0),
- chunk_name_(NULL),
- chunk_writer_cluster_(NULL),
- chunk_writer_cues_(NULL),
- chunk_writer_header_(NULL),
- chunking_(false),
- chunking_base_name_(NULL),
- cluster_list_(NULL),
- cluster_list_capacity_(0),
- cluster_list_size_(0),
- cues_position_(kAfterClusters),
- cues_track_(0),
- force_new_cluster_(false),
- frames_(NULL),
- frames_capacity_(0),
- frames_size_(0),
- has_video_(false),
- header_written_(false),
- last_block_duration_(0),
- last_timestamp_(0),
- max_cluster_duration_(kDefaultMaxClusterDuration),
- max_cluster_size_(0),
- mode_(kFile),
- new_cuepoint_(false),
- output_cues_(true),
- accurate_cluster_duration_(false),
- fixed_size_cluster_timecode_(false),
- estimate_file_duration_(false),
- payload_pos_(0),
- size_position_(0),
- doc_type_version_(kDefaultDocTypeVersion),
- doc_type_version_written_(0),
- duration_(0.0),
- writer_cluster_(NULL),
- writer_cues_(NULL),
- writer_header_(NULL) {
- const time_t curr_time = time(NULL);
- seed_ = static_cast<unsigned int>(curr_time);
-#ifdef _WIN32
- srand(seed_);
-#endif
-}
-
-Segment::~Segment() {
- if (cluster_list_) {
- for (int32_t i = 0; i < cluster_list_size_; ++i) {
- Cluster* const cluster = cluster_list_[i];
- delete cluster;
- }
- delete[] cluster_list_;
- }
-
- if (frames_) {
- for (int32_t i = 0; i < frames_size_; ++i) {
- Frame* const frame = frames_[i];
- delete frame;
- }
- delete[] frames_;
- }
-
- delete[] chunk_name_;
- delete[] chunking_base_name_;
-
- if (chunk_writer_cluster_) {
- chunk_writer_cluster_->Close();
- delete chunk_writer_cluster_;
- }
- if (chunk_writer_cues_) {
- chunk_writer_cues_->Close();
- delete chunk_writer_cues_;
- }
- if (chunk_writer_header_) {
- chunk_writer_header_->Close();
- delete chunk_writer_header_;
- }
-}
-
-void Segment::MoveCuesBeforeClustersHelper(uint64_t diff, int32_t index,
- uint64_t* cues_size) {
- CuePoint* const cue_point = cues_.GetCueByIndex(index);
- if (cue_point == NULL)
- return;
- const uint64_t old_cue_point_size = cue_point->Size();
- const uint64_t cluster_pos = cue_point->cluster_pos() + diff;
- cue_point->set_cluster_pos(cluster_pos); // update the new cluster position
- // New size of the cue is computed as follows
- // Let a = current sum of size of all CuePoints
- // Let b = Increase in Cue Point's size due to this iteration
- // Let c = Increase in size of Cues Element's length due to this iteration
- // (This is computed as CodedSize(a + b) - CodedSize(a))
- // Let d = b + c. Now d is the |diff| passed to the next recursive call.
- // Let e = a + b. Now e is the |cues_size| passed to the next recursive
- // call.
- const uint64_t cue_point_size_diff = cue_point->Size() - old_cue_point_size;
- const uint64_t cue_size_diff =
- GetCodedUIntSize(*cues_size + cue_point_size_diff) -
- GetCodedUIntSize(*cues_size);
- *cues_size += cue_point_size_diff;
- diff = cue_size_diff + cue_point_size_diff;
- if (diff > 0) {
- for (int32_t i = 0; i < cues_.cue_entries_size(); ++i) {
- MoveCuesBeforeClustersHelper(diff, i, cues_size);
- }
- }
-}
-
-void Segment::MoveCuesBeforeClusters() {
- const uint64_t current_cue_size = cues_.Size();
- uint64_t cue_size = 0;
- for (int32_t i = 0; i < cues_.cue_entries_size(); ++i)
- cue_size += cues_.GetCueByIndex(i)->Size();
- for (int32_t i = 0; i < cues_.cue_entries_size(); ++i)
- MoveCuesBeforeClustersHelper(current_cue_size, i, &cue_size);
-
- // Adjust the Seek Entry to reflect the change in position
- // of Cluster and Cues
- int32_t cluster_index = 0;
- int32_t cues_index = 0;
- for (int32_t i = 0; i < SeekHead::kSeekEntryCount; ++i) {
- if (seek_head_.GetId(i) == libwebm::kMkvCluster)
- cluster_index = i;
- if (seek_head_.GetId(i) == libwebm::kMkvCues)
- cues_index = i;
- }
- seek_head_.SetSeekEntry(cues_index, libwebm::kMkvCues,
- seek_head_.GetPosition(cluster_index));
- seek_head_.SetSeekEntry(cluster_index, libwebm::kMkvCluster,
- cues_.Size() + seek_head_.GetPosition(cues_index));
-}
-
-bool Segment::Init(IMkvWriter* ptr_writer) {
- if (!ptr_writer) {
- return false;
- }
- writer_cluster_ = ptr_writer;
- writer_cues_ = ptr_writer;
- writer_header_ = ptr_writer;
- memset(&track_frames_written_, 0,
- sizeof(track_frames_written_[0]) * kMaxTrackNumber);
- memset(&last_track_timestamp_, 0,
- sizeof(last_track_timestamp_[0]) * kMaxTrackNumber);
- return segment_info_.Init();
-}
-
-bool Segment::CopyAndMoveCuesBeforeClusters(mkvparser::IMkvReader* reader,
- IMkvWriter* writer) {
- if (!writer->Seekable() || chunking_)
- return false;
- const int64_t cluster_offset =
- cluster_list_[0]->size_position() - GetUIntSize(libwebm::kMkvCluster);
-
- // Copy the headers.
- if (!ChunkedCopy(reader, writer, 0, cluster_offset))
- return false;
-
- // Recompute cue positions and seek entries.
- MoveCuesBeforeClusters();
-
- // Write cues and seek entries.
- // TODO(vigneshv): As of now, it's safe to call seek_head_.Finalize() for the
- // second time with a different writer object. But the name Finalize() doesn't
- // indicate something we want to call more than once. So consider renaming it
- // to write() or some such.
- if (!cues_.Write(writer) || !seek_head_.Finalize(writer))
- return false;
-
- // Copy the Clusters.
- if (!ChunkedCopy(reader, writer, cluster_offset,
- cluster_end_offset_ - cluster_offset))
- return false;
-
- // Update the Segment size in case the Cues size has changed.
- const int64_t pos = writer->Position();
- const int64_t segment_size = writer->Position() - payload_pos_;
- if (writer->Position(size_position_) ||
- WriteUIntSize(writer, segment_size, 8) || writer->Position(pos))
- return false;
- return true;
-}
-
-bool Segment::Finalize() {
- if (WriteFramesAll() < 0)
- return false;
-
- // In kLive mode, call Cluster::Finalize only if |accurate_cluster_duration_|
- // is set. In all other modes, always call Cluster::Finalize.
- if ((mode_ == kLive ? accurate_cluster_duration_ : true) &&
- cluster_list_size_ > 0) {
- // Update last cluster's size
- Cluster* const old_cluster = cluster_list_[cluster_list_size_ - 1];
-
- // For the last frame of the last Cluster, we don't write it as a BlockGroup
- // with Duration unless the frame itself has duration set explicitly.
- if (!old_cluster || !old_cluster->Finalize(false, 0))
- return false;
- }
-
- if (mode_ == kFile) {
- if (chunking_ && chunk_writer_cluster_) {
- chunk_writer_cluster_->Close();
- chunk_count_++;
- }
-
- double duration =
- (static_cast<double>(last_timestamp_) + last_block_duration_) /
- segment_info_.timecode_scale();
- if (duration_ > 0.0) {
- duration = duration_;
- } else {
- if (last_block_duration_ == 0 && estimate_file_duration_) {
- const int num_tracks = static_cast<int>(tracks_.track_entries_size());
- for (int i = 0; i < num_tracks; ++i) {
- if (track_frames_written_[i] < 2)
- continue;
-
- // Estimate the duration for the last block of a Track.
- const double nano_per_frame =
- static_cast<double>(last_track_timestamp_[i]) /
- (track_frames_written_[i] - 1);
- const double track_duration =
- (last_track_timestamp_[i] + nano_per_frame) /
- segment_info_.timecode_scale();
- if (track_duration > duration)
- duration = track_duration;
- }
- }
- }
- segment_info_.set_duration(duration);
- if (!segment_info_.Finalize(writer_header_))
- return false;
-
- if (output_cues_)
- if (!seek_head_.AddSeekEntry(libwebm::kMkvCues, MaxOffset()))
- return false;
-
- if (chunking_) {
- if (!chunk_writer_cues_)
- return false;
-
- char* name = NULL;
- if (!UpdateChunkName("cues", &name))
- return false;
-
- const bool cues_open = chunk_writer_cues_->Open(name);
- delete[] name;
- if (!cues_open)
- return false;
- }
-
- cluster_end_offset_ = writer_cluster_->Position();
-
- // Write the seek headers and cues
- if (output_cues_)
- if (!cues_.Write(writer_cues_))
- return false;
-
- if (!seek_head_.Finalize(writer_header_))
- return false;
-
- if (writer_header_->Seekable()) {
- if (size_position_ == -1)
- return false;
-
- const int64_t segment_size = MaxOffset();
- if (segment_size < 1)
- return false;
-
- const int64_t pos = writer_header_->Position();
- UpdateDocTypeVersion();
- if (doc_type_version_ != doc_type_version_written_) {
- if (writer_header_->Position(0))
- return false;
-
- const char* const doc_type =
- DocTypeIsWebm() ? kDocTypeWebm : kDocTypeMatroska;
- if (!WriteEbmlHeader(writer_header_, doc_type_version_, doc_type))
- return false;
- if (writer_header_->Position() != ebml_header_size_)
- return false;
-
- doc_type_version_written_ = doc_type_version_;
- }
-
- if (writer_header_->Position(size_position_))
- return false;
-
- if (WriteUIntSize(writer_header_, segment_size, 8))
- return false;
-
- if (writer_header_->Position(pos))
- return false;
- }
-
- if (chunking_) {
- // Do not close any writers until the segment size has been written,
- // otherwise the size may be off.
- if (!chunk_writer_cues_ || !chunk_writer_header_)
- return false;
-
- chunk_writer_cues_->Close();
- chunk_writer_header_->Close();
- }
- }
-
- return true;
-}
-
-Track* Segment::AddTrack(int32_t number) {
- Track* const track = new (std::nothrow) Track(&seed_); // NOLINT
-
- if (!track)
- return NULL;
-
- if (!tracks_.AddTrack(track, number)) {
- delete track;
- return NULL;
- }
-
- return track;
-}
-
-Chapter* Segment::AddChapter() { return chapters_.AddChapter(&seed_); }
-
-Tag* Segment::AddTag() { return tags_.AddTag(); }
-
-uint64_t Segment::AddVideoTrack(int32_t width, int32_t height, int32_t number) {
- VideoTrack* const track = new (std::nothrow) VideoTrack(&seed_); // NOLINT
- if (!track)
- return 0;
-
- track->set_type(Tracks::kVideo);
- track->set_codec_id(Tracks::kVp8CodecId);
- track->set_width(width);
- track->set_height(height);
-
- if (!tracks_.AddTrack(track, number)) {
- delete track;
- return 0;
- }
- has_video_ = true;
-
- return track->number();
-}
-
-bool Segment::AddCuePoint(uint64_t timestamp, uint64_t track) {
- if (cluster_list_size_ < 1)
- return false;
-
- const Cluster* const cluster = cluster_list_[cluster_list_size_ - 1];
- if (!cluster)
- return false;
-
- CuePoint* const cue = new (std::nothrow) CuePoint(); // NOLINT
- if (!cue)
- return false;
-
- cue->set_time(timestamp / segment_info_.timecode_scale());
- cue->set_block_number(cluster->blocks_added());
- cue->set_cluster_pos(cluster->position_for_cues());
- cue->set_track(track);
- if (!cues_.AddCue(cue)) {
- delete cue;
- return false;
- }
-
- new_cuepoint_ = false;
- return true;
-}
-
-uint64_t Segment::AddAudioTrack(int32_t sample_rate, int32_t channels,
- int32_t number) {
- AudioTrack* const track = new (std::nothrow) AudioTrack(&seed_); // NOLINT
- if (!track)
- return 0;
-
- track->set_type(Tracks::kAudio);
- track->set_codec_id(Tracks::kVorbisCodecId);
- track->set_sample_rate(sample_rate);
- track->set_channels(channels);
-
- if (!tracks_.AddTrack(track, number)) {
- delete track;
- return 0;
- }
-
- return track->number();
-}
-
-bool Segment::AddFrame(const uint8_t* data, uint64_t length,
- uint64_t track_number, uint64_t timestamp, bool is_key) {
- if (!data)
- return false;
-
- Frame frame;
- if (!frame.Init(data, length))
- return false;
- frame.set_track_number(track_number);
- frame.set_timestamp(timestamp);
- frame.set_is_key(is_key);
- return AddGenericFrame(&frame);
-}
-
-bool Segment::AddFrameWithAdditional(const uint8_t* data, uint64_t length,
- const uint8_t* additional,
- uint64_t additional_length,
- uint64_t add_id, uint64_t track_number,
- uint64_t timestamp, bool is_key) {
- if (!data || !additional)
- return false;
-
- Frame frame;
- if (!frame.Init(data, length) ||
- !frame.AddAdditionalData(additional, additional_length, add_id)) {
- return false;
- }
- frame.set_track_number(track_number);
- frame.set_timestamp(timestamp);
- frame.set_is_key(is_key);
- return AddGenericFrame(&frame);
-}
-
-bool Segment::AddFrameWithDiscardPadding(const uint8_t* data, uint64_t length,
- int64_t discard_padding,
- uint64_t track_number,
- uint64_t timestamp, bool is_key) {
- if (!data)
- return false;
-
- Frame frame;
- if (!frame.Init(data, length))
- return false;
- frame.set_discard_padding(discard_padding);
- frame.set_track_number(track_number);
- frame.set_timestamp(timestamp);
- frame.set_is_key(is_key);
- return AddGenericFrame(&frame);
-}
-
-bool Segment::AddMetadata(const uint8_t* data, uint64_t length,
- uint64_t track_number, uint64_t timestamp_ns,
- uint64_t duration_ns) {
- if (!data)
- return false;
-
- Frame frame;
- if (!frame.Init(data, length))
- return false;
- frame.set_track_number(track_number);
- frame.set_timestamp(timestamp_ns);
- frame.set_duration(duration_ns);
- frame.set_is_key(true); // All metadata blocks are keyframes.
- return AddGenericFrame(&frame);
-}
-
-bool Segment::AddGenericFrame(const Frame* frame) {
- if (!frame)
- return false;
-
- if (!CheckHeaderInfo())
- return false;
-
- // Check for non-monotonically increasing timestamps.
- if (frame->timestamp() < last_timestamp_)
- return false;
-
- // Check if the track number is valid.
- if (!tracks_.GetTrackByNumber(frame->track_number()))
- return false;
-
- if (frame->discard_padding() != 0)
- doc_type_version_ = 4;
-
- if (cluster_list_size_ > 0) {
- const uint64_t timecode_scale = segment_info_.timecode_scale();
- const uint64_t frame_timecode = frame->timestamp() / timecode_scale;
-
- const Cluster* const last_cluster = cluster_list_[cluster_list_size_ - 1];
- const uint64_t last_cluster_timecode = last_cluster->timecode();
-
- const uint64_t rel_timecode = frame_timecode - last_cluster_timecode;
- if (rel_timecode > kMaxBlockTimecode) {
- force_new_cluster_ = true;
- }
- }
-
- // If the segment has a video track hold onto audio frames to make sure the
- // audio that is associated with the start time of a video key-frame is
- // muxed into the same cluster.
- if (has_video_ && tracks_.TrackIsAudio(frame->track_number()) &&
- !force_new_cluster_) {
- Frame* const new_frame = new (std::nothrow) Frame();
- if (!new_frame || !new_frame->CopyFrom(*frame)) {
- delete new_frame;
- return false;
- }
- if (!QueueFrame(new_frame)) {
- delete new_frame;
- return false;
- }
- track_frames_written_[frame->track_number() - 1]++;
- return true;
- }
-
- if (!DoNewClusterProcessing(frame->track_number(), frame->timestamp(),
- frame->is_key())) {
- return false;
- }
-
- if (cluster_list_size_ < 1)
- return false;
-
- Cluster* const cluster = cluster_list_[cluster_list_size_ - 1];
- if (!cluster)
- return false;
-
- // If the Frame is not a SimpleBlock, then set the reference_block_timestamp
- // if it is not set already.
- bool frame_created = false;
- if (!frame->CanBeSimpleBlock() && !frame->is_key() &&
- !frame->reference_block_timestamp_set()) {
- Frame* const new_frame = new (std::nothrow) Frame();
- if (!new_frame || !new_frame->CopyFrom(*frame)) {
- delete new_frame;
- return false;
- }
- new_frame->set_reference_block_timestamp(
- last_track_timestamp_[frame->track_number() - 1]);
- frame = new_frame;
- frame_created = true;
- }
-
- if (!cluster->AddFrame(frame))
- return false;
-
- if (new_cuepoint_ && cues_track_ == frame->track_number()) {
- if (!AddCuePoint(frame->timestamp(), cues_track_))
- return false;
- }
-
- last_timestamp_ = frame->timestamp();
- last_track_timestamp_[frame->track_number() - 1] = frame->timestamp();
- last_block_duration_ = frame->duration();
- track_frames_written_[frame->track_number() - 1]++;
-
- if (frame_created)
- delete frame;
- return true;
-}
-
-void Segment::OutputCues(bool output_cues) { output_cues_ = output_cues; }
-
-void Segment::AccurateClusterDuration(bool accurate_cluster_duration) {
- accurate_cluster_duration_ = accurate_cluster_duration;
-}
-
-void Segment::UseFixedSizeClusterTimecode(bool fixed_size_cluster_timecode) {
- fixed_size_cluster_timecode_ = fixed_size_cluster_timecode;
-}
-
-bool Segment::SetChunking(bool chunking, const char* filename) {
- if (chunk_count_ > 0)
- return false;
-
- if (chunking) {
- if (!filename)
- return false;
-
- // Check if we are being set to what is already set.
- if (chunking_ && !strcmp(filename, chunking_base_name_))
- return true;
-
- const size_t name_length = strlen(filename) + 1;
- char* const temp = new (std::nothrow) char[name_length]; // NOLINT
- if (!temp)
- return false;
-
-#ifdef _MSC_VER
- strcpy_s(temp, name_length, filename);
-#else
- strcpy(temp, filename);
-#endif
-
- delete[] chunking_base_name_;
- chunking_base_name_ = temp;
-
- if (!UpdateChunkName("chk", &chunk_name_))
- return false;
-
- if (!chunk_writer_cluster_) {
- chunk_writer_cluster_ = new (std::nothrow) MkvWriter(); // NOLINT
- if (!chunk_writer_cluster_)
- return false;
- }
-
- if (!chunk_writer_cues_) {
- chunk_writer_cues_ = new (std::nothrow) MkvWriter(); // NOLINT
- if (!chunk_writer_cues_)
- return false;
- }
-
- if (!chunk_writer_header_) {
- chunk_writer_header_ = new (std::nothrow) MkvWriter(); // NOLINT
- if (!chunk_writer_header_)
- return false;
- }
-
- if (!chunk_writer_cluster_->Open(chunk_name_))
- return false;
-
- const size_t header_length = strlen(filename) + strlen(".hdr") + 1;
- char* const header = new (std::nothrow) char[header_length]; // NOLINT
- if (!header)
- return false;
-
-#ifdef _MSC_VER
- strcpy_s(header, header_length - strlen(".hdr"), chunking_base_name_);
- strcat_s(header, header_length, ".hdr");
-#else
- strcpy(header, chunking_base_name_);
- strcat(header, ".hdr");
-#endif
- if (!chunk_writer_header_->Open(header)) {
- delete[] header;
- return false;
- }
-
- writer_cluster_ = chunk_writer_cluster_;
- writer_cues_ = chunk_writer_cues_;
- writer_header_ = chunk_writer_header_;
-
- delete[] header;
- }
-
- chunking_ = chunking;
-
- return true;
-}
-
-bool Segment::CuesTrack(uint64_t track_number) {
- const Track* const track = GetTrackByNumber(track_number);
- if (!track)
- return false;
-
- cues_track_ = track_number;
- return true;
-}
-
-void Segment::ForceNewClusterOnNextFrame() { force_new_cluster_ = true; }
-
-Track* Segment::GetTrackByNumber(uint64_t track_number) const {
- return tracks_.GetTrackByNumber(track_number);
-}
-
-bool Segment::WriteSegmentHeader() {
- UpdateDocTypeVersion();
-
- const char* const doc_type =
- DocTypeIsWebm() ? kDocTypeWebm : kDocTypeMatroska;
- if (!WriteEbmlHeader(writer_header_, doc_type_version_, doc_type))
- return false;
- doc_type_version_written_ = doc_type_version_;
- ebml_header_size_ = static_cast<int32_t>(writer_header_->Position());
-
- // Write "unknown" (-1) as segment size value. If mode is kFile, Segment
- // will write over duration when the file is finalized.
- if (WriteID(writer_header_, libwebm::kMkvSegment))
- return false;
-
- // Save for later.
- size_position_ = writer_header_->Position();
-
- // Write "unknown" (EBML coded -1) as segment size value. We need to write 8
- // bytes because if we are going to overwrite the segment size later we do
- // not know how big our segment will be.
- if (SerializeInt(writer_header_, kEbmlUnknownValue, 8))
- return false;
-
- payload_pos_ = writer_header_->Position();
-
- if (mode_ == kFile && writer_header_->Seekable()) {
- // Set the duration > 0.0 so SegmentInfo will write out the duration. When
- // the muxer is done writing we will set the correct duration and have
- // SegmentInfo upadte it.
- segment_info_.set_duration(1.0);
-
- if (!seek_head_.Write(writer_header_))
- return false;
- }
-
- if (!seek_head_.AddSeekEntry(libwebm::kMkvInfo, MaxOffset()))
- return false;
- if (!segment_info_.Write(writer_header_))
- return false;
-
- if (!seek_head_.AddSeekEntry(libwebm::kMkvTracks, MaxOffset()))
- return false;
- if (!tracks_.Write(writer_header_))
- return false;
-
- if (chapters_.Count() > 0) {
- if (!seek_head_.AddSeekEntry(libwebm::kMkvChapters, MaxOffset()))
- return false;
- if (!chapters_.Write(writer_header_))
- return false;
- }
-
- if (tags_.Count() > 0) {
- if (!seek_head_.AddSeekEntry(libwebm::kMkvTags, MaxOffset()))
- return false;
- if (!tags_.Write(writer_header_))
- return false;
- }
-
- if (chunking_ && (mode_ == kLive || !writer_header_->Seekable())) {
- if (!chunk_writer_header_)
- return false;
-
- chunk_writer_header_->Close();
- }
-
- header_written_ = true;
-
- return true;
-}
-
-// Here we are testing whether to create a new cluster, given a frame
-// having time frame_timestamp_ns.
-//
-int Segment::TestFrame(uint64_t track_number, uint64_t frame_timestamp_ns,
- bool is_key) const {
- if (force_new_cluster_)
- return 1;
-
- // If no clusters have been created yet, then create a new cluster
- // and write this frame immediately, in the new cluster. This path
- // should only be followed once, the first time we attempt to write
- // a frame.
-
- if (cluster_list_size_ <= 0)
- return 1;
-
- // There exists at least one cluster. We must compare the frame to
- // the last cluster, in order to determine whether the frame is
- // written to the existing cluster, or that a new cluster should be
- // created.
-
- const uint64_t timecode_scale = segment_info_.timecode_scale();
- const uint64_t frame_timecode = frame_timestamp_ns / timecode_scale;
-
- const Cluster* const last_cluster = cluster_list_[cluster_list_size_ - 1];
- const uint64_t last_cluster_timecode = last_cluster->timecode();
-
- // For completeness we test for the case when the frame's timecode
- // is less than the cluster's timecode. Although in principle that
- // is allowed, this muxer doesn't actually write clusters like that,
- // so this indicates a bug somewhere in our algorithm.
-
- if (frame_timecode < last_cluster_timecode) // should never happen
- return -1;
-
- // If the frame has a timestamp significantly larger than the last
- // cluster (in Matroska, cluster-relative timestamps are serialized
- // using a 16-bit signed integer), then we cannot write this frame
- // to that cluster, and so we must create a new cluster.
-
- const int64_t delta_timecode = frame_timecode - last_cluster_timecode;
-
- if (delta_timecode > kMaxBlockTimecode)
- return 2;
-
- // We decide to create a new cluster when we have a video keyframe.
- // This will flush queued (audio) frames, and write the keyframe
- // immediately, in the newly-created cluster.
-
- if (is_key && tracks_.TrackIsVideo(track_number))
- return 1;
-
- // Create a new cluster if we have accumulated too many frames
- // already, where "too many" is defined as "the total time of frames
- // in the cluster exceeds a threshold".
-
- const uint64_t delta_ns = delta_timecode * timecode_scale;
-
- if (max_cluster_duration_ > 0 && delta_ns >= max_cluster_duration_)
- return 1;
-
- // This is similar to the case above, with the difference that a new
- // cluster is created when the size of the current cluster exceeds a
- // threshold.
-
- const uint64_t cluster_size = last_cluster->payload_size();
-
- if (max_cluster_size_ > 0 && cluster_size >= max_cluster_size_)
- return 1;
-
- // There's no need to create a new cluster, so emit this frame now.
-
- return 0;
-}
-
-bool Segment::MakeNewCluster(uint64_t frame_timestamp_ns) {
- const int32_t new_size = cluster_list_size_ + 1;
-
- if (new_size > cluster_list_capacity_) {
- // Add more clusters.
- const int32_t new_capacity =
- (cluster_list_capacity_ <= 0) ? 1 : cluster_list_capacity_ * 2;
- Cluster** const clusters =
- new (std::nothrow) Cluster*[new_capacity]; // NOLINT
- if (!clusters)
- return false;
-
- for (int32_t i = 0; i < cluster_list_size_; ++i) {
- clusters[i] = cluster_list_[i];
- }
-
- delete[] cluster_list_;
-
- cluster_list_ = clusters;
- cluster_list_capacity_ = new_capacity;
- }
-
- if (!WriteFramesLessThan(frame_timestamp_ns))
- return false;
-
- if (cluster_list_size_ > 0) {
- // Update old cluster's size
- Cluster* const old_cluster = cluster_list_[cluster_list_size_ - 1];
-
- if (!old_cluster || !old_cluster->Finalize(true, frame_timestamp_ns))
- return false;
- }
-
- if (output_cues_)
- new_cuepoint_ = true;
-
- if (chunking_ && cluster_list_size_ > 0) {
- chunk_writer_cluster_->Close();
- chunk_count_++;
-
- if (!UpdateChunkName("chk", &chunk_name_))
- return false;
- if (!chunk_writer_cluster_->Open(chunk_name_))
- return false;
- }
-
- const uint64_t timecode_scale = segment_info_.timecode_scale();
- const uint64_t frame_timecode = frame_timestamp_ns / timecode_scale;
-
- uint64_t cluster_timecode = frame_timecode;
-
- if (frames_size_ > 0) {
- const Frame* const f = frames_[0]; // earliest queued frame
- const uint64_t ns = f->timestamp();
- const uint64_t tc = ns / timecode_scale;
-
- if (tc < cluster_timecode)
- cluster_timecode = tc;
- }
-
- Cluster*& cluster = cluster_list_[cluster_list_size_];
- const int64_t offset = MaxOffset();
- cluster = new (std::nothrow)
- Cluster(cluster_timecode, offset, segment_info_.timecode_scale(),
- accurate_cluster_duration_, fixed_size_cluster_timecode_);
- if (!cluster)
- return false;
-
- if (!cluster->Init(writer_cluster_))
- return false;
-
- cluster_list_size_ = new_size;
- return true;
-}
-
-bool Segment::DoNewClusterProcessing(uint64_t track_number,
- uint64_t frame_timestamp_ns, bool is_key) {
- for (;;) {
- // Based on the characteristics of the current frame and current
- // cluster, decide whether to create a new cluster.
- const int result = TestFrame(track_number, frame_timestamp_ns, is_key);
- if (result < 0) // error
- return false;
-
- // Always set force_new_cluster_ to false after TestFrame.
- force_new_cluster_ = false;
-
- // A non-zero result means create a new cluster.
- if (result > 0 && !MakeNewCluster(frame_timestamp_ns))
- return false;
-
- // Write queued (audio) frames.
- const int frame_count = WriteFramesAll();
- if (frame_count < 0) // error
- return false;
-
- // Write the current frame to the current cluster (if TestFrame
- // returns 0) or to a newly created cluster (TestFrame returns 1).
- if (result <= 1)
- return true;
-
- // TestFrame returned 2, which means there was a large time
- // difference between the cluster and the frame itself. Do the
- // test again, comparing the frame to the new cluster.
- }
-}
-
-bool Segment::CheckHeaderInfo() {
- if (!header_written_) {
- if (!WriteSegmentHeader())
- return false;
-
- if (!seek_head_.AddSeekEntry(libwebm::kMkvCluster, MaxOffset()))
- return false;
-
- if (output_cues_ && cues_track_ == 0) {
- // Check for a video track
- for (uint32_t i = 0; i < tracks_.track_entries_size(); ++i) {
- const Track* const track = tracks_.GetTrackByIndex(i);
- if (!track)
- return false;
-
- if (tracks_.TrackIsVideo(track->number())) {
- cues_track_ = track->number();
- break;
- }
- }
-
- // Set first track found
- if (cues_track_ == 0) {
- const Track* const track = tracks_.GetTrackByIndex(0);
- if (!track)
- return false;
-
- cues_track_ = track->number();
- }
- }
- }
- return true;
-}
-
-void Segment::UpdateDocTypeVersion() {
- for (uint32_t index = 0; index < tracks_.track_entries_size(); ++index) {
- const Track* track = tracks_.GetTrackByIndex(index);
- if (track == NULL)
- break;
- if ((track->codec_delay() || track->seek_pre_roll()) &&
- doc_type_version_ < 4) {
- doc_type_version_ = 4;
- break;
- }
- }
-}
-
-bool Segment::UpdateChunkName(const char* ext, char** name) const {
- if (!name || !ext)
- return false;
-
- char ext_chk[64];
-#ifdef _MSC_VER
- sprintf_s(ext_chk, sizeof(ext_chk), "_%06d.%s", chunk_count_, ext);
-#else
- snprintf(ext_chk, sizeof(ext_chk), "_%06d.%s", chunk_count_, ext);
-#endif
-
- const size_t length = strlen(chunking_base_name_) + strlen(ext_chk) + 1;
- char* const str = new (std::nothrow) char[length]; // NOLINT
- if (!str)
- return false;
-
-#ifdef _MSC_VER
- strcpy_s(str, length - strlen(ext_chk), chunking_base_name_);
- strcat_s(str, length, ext_chk);
-#else
- strcpy(str, chunking_base_name_);
- strcat(str, ext_chk);
-#endif
-
- delete[] * name;
- *name = str;
-
- return true;
-}
-
-int64_t Segment::MaxOffset() {
- if (!writer_header_)
- return -1;
-
- int64_t offset = writer_header_->Position() - payload_pos_;
-
- if (chunking_) {
- for (int32_t i = 0; i < cluster_list_size_; ++i) {
- Cluster* const cluster = cluster_list_[i];
- offset += cluster->Size();
- }
-
- if (writer_cues_)
- offset += writer_cues_->Position();
- }
-
- return offset;
-}
-
-bool Segment::QueueFrame(Frame* frame) {
- const int32_t new_size = frames_size_ + 1;
-
- if (new_size > frames_capacity_) {
- // Add more frames.
- const int32_t new_capacity = (!frames_capacity_) ? 2 : frames_capacity_ * 2;
-
- if (new_capacity < 1)
- return false;
-
- Frame** const frames = new (std::nothrow) Frame*[new_capacity]; // NOLINT
- if (!frames)
- return false;
-
- for (int32_t i = 0; i < frames_size_; ++i) {
- frames[i] = frames_[i];
- }
-
- delete[] frames_;
- frames_ = frames;
- frames_capacity_ = new_capacity;
- }
-
- frames_[frames_size_++] = frame;
-
- return true;
-}
-
-int Segment::WriteFramesAll() {
- if (frames_ == NULL)
- return 0;
-
- if (cluster_list_size_ < 1)
- return -1;
-
- Cluster* const cluster = cluster_list_[cluster_list_size_ - 1];
-
- if (!cluster)
- return -1;
-
- for (int32_t i = 0; i < frames_size_; ++i) {
- Frame*& frame = frames_[i];
- // TODO(jzern/vigneshv): using Segment::AddGenericFrame here would limit the
- // places where |doc_type_version_| needs to be updated.
- if (frame->discard_padding() != 0)
- doc_type_version_ = 4;
- if (!cluster->AddFrame(frame))
- return -1;
-
- if (new_cuepoint_ && cues_track_ == frame->track_number()) {
- if (!AddCuePoint(frame->timestamp(), cues_track_))
- return -1;
- }
-
- if (frame->timestamp() > last_timestamp_) {
- last_timestamp_ = frame->timestamp();
- last_track_timestamp_[frame->track_number() - 1] = frame->timestamp();
- }
-
- delete frame;
- frame = NULL;
- }
-
- const int result = frames_size_;
- frames_size_ = 0;
-
- return result;
-}
-
-bool Segment::WriteFramesLessThan(uint64_t timestamp) {
- // Check |cluster_list_size_| to see if this is the first cluster. If it is
- // the first cluster the audio frames that are less than the first video
- // timesatmp will be written in a later step.
- if (frames_size_ > 0 && cluster_list_size_ > 0) {
- if (!frames_)
- return false;
-
- Cluster* const cluster = cluster_list_[cluster_list_size_ - 1];
- if (!cluster)
- return false;
-
- int32_t shift_left = 0;
-
- // TODO(fgalligan): Change this to use the durations of frames instead of
- // the next frame's start time if the duration is accurate.
- for (int32_t i = 1; i < frames_size_; ++i) {
- const Frame* const frame_curr = frames_[i];
-
- if (frame_curr->timestamp() > timestamp)
- break;
-
- const Frame* const frame_prev = frames_[i - 1];
- if (frame_prev->discard_padding() != 0)
- doc_type_version_ = 4;
- if (!cluster->AddFrame(frame_prev))
- return false;
-
- if (new_cuepoint_ && cues_track_ == frame_prev->track_number()) {
- if (!AddCuePoint(frame_prev->timestamp(), cues_track_))
- return false;
- }
-
- ++shift_left;
- if (frame_prev->timestamp() > last_timestamp_) {
- last_timestamp_ = frame_prev->timestamp();
- last_track_timestamp_[frame_prev->track_number() - 1] =
- frame_prev->timestamp();
- }
-
- delete frame_prev;
- }
-
- if (shift_left > 0) {
- if (shift_left >= frames_size_)
- return false;
-
- const int32_t new_frames_size = frames_size_ - shift_left;
- for (int32_t i = 0; i < new_frames_size; ++i) {
- frames_[i] = frames_[i + shift_left];
- }
-
- frames_size_ = new_frames_size;
- }
- }
-
- return true;
-}
-
-bool Segment::DocTypeIsWebm() const {
- const int kNumCodecIds = 10;
-
- // TODO(vigneshv): Tweak .clang-format.
- const char* kWebmCodecIds[kNumCodecIds] = {
- Tracks::kOpusCodecId, Tracks::kVorbisCodecId,
- Tracks::kVp8CodecId, Tracks::kVp9CodecId,
- Tracks::kVp10CodecId, Tracks::kAV1CodecId,
- Tracks::kWebVttCaptionsId, Tracks::kWebVttDescriptionsId,
- Tracks::kWebVttMetadataId, Tracks::kWebVttSubtitlesId};
-
- const int num_tracks = static_cast<int>(tracks_.track_entries_size());
- for (int track_index = 0; track_index < num_tracks; ++track_index) {
- const Track* const track = tracks_.GetTrackByIndex(track_index);
- const std::string codec_id = track->codec_id();
-
- bool id_is_webm = false;
- for (int id_index = 0; id_index < kNumCodecIds; ++id_index) {
- if (codec_id == kWebmCodecIds[id_index]) {
- id_is_webm = true;
- break;
- }
- }
-
- if (!id_is_webm)
- return false;
- }
-
- return true;
-}
-
-} // namespace mkvmuxer
diff --git a/third_party/aom/third_party/libwebm/mkvmuxer/mkvmuxer.h b/third_party/aom/third_party/libwebm/mkvmuxer/mkvmuxer.h
deleted file mode 100644
index 9e817bced..000000000
--- a/third_party/aom/third_party/libwebm/mkvmuxer/mkvmuxer.h
+++ /dev/null
@@ -1,1922 +0,0 @@
-// Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the LICENSE file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-
-#ifndef MKVMUXER_MKVMUXER_H_
-#define MKVMUXER_MKVMUXER_H_
-
-#include <stdint.h>
-
-#include <cstddef>
-#include <list>
-#include <map>
-
-#include "common/webmids.h"
-#include "mkvmuxer/mkvmuxertypes.h"
-
-// For a description of the WebM elements see
-// http://www.webmproject.org/code/specs/container/.
-
-namespace mkvparser {
-class IMkvReader;
-} // namespace mkvparser
-
-namespace mkvmuxer {
-
-class MkvWriter;
-class Segment;
-
-const uint64_t kMaxTrackNumber = 126;
-
-///////////////////////////////////////////////////////////////
-// Interface used by the mkvmuxer to write out the Mkv data.
-class IMkvWriter {
- public:
- // Writes out |len| bytes of |buf|. Returns 0 on success.
- virtual int32 Write(const void* buf, uint32 len) = 0;
-
- // Returns the offset of the output position from the beginning of the
- // output.
- virtual int64 Position() const = 0;
-
- // Set the current File position. Returns 0 on success.
- virtual int32 Position(int64 position) = 0;
-
- // Returns true if the writer is seekable.
- virtual bool Seekable() const = 0;
-
- // Element start notification. Called whenever an element identifier is about
- // to be written to the stream. |element_id| is the element identifier, and
- // |position| is the location in the WebM stream where the first octet of the
- // element identifier will be written.
- // Note: the |MkvId| enumeration in webmids.hpp defines element values.
- virtual void ElementStartNotify(uint64 element_id, int64 position) = 0;
-
- protected:
- IMkvWriter();
- virtual ~IMkvWriter();
-
- private:
- LIBWEBM_DISALLOW_COPY_AND_ASSIGN(IMkvWriter);
-};
-
-// Writes out the EBML header for a WebM file, but allows caller to specify
-// DocType. This function must be called before any other libwebm writing
-// functions are called.
-bool WriteEbmlHeader(IMkvWriter* writer, uint64_t doc_type_version,
- const char* const doc_type);
-
-// Writes out the EBML header for a WebM file. This function must be called
-// before any other libwebm writing functions are called.
-bool WriteEbmlHeader(IMkvWriter* writer, uint64_t doc_type_version);
-
-// Deprecated. Writes out EBML header with doc_type_version as
-// kDefaultDocTypeVersion. Exists for backward compatibility.
-bool WriteEbmlHeader(IMkvWriter* writer);
-
-// Copies in Chunk from source to destination between the given byte positions
-bool ChunkedCopy(mkvparser::IMkvReader* source, IMkvWriter* dst, int64_t start,
- int64_t size);
-
-///////////////////////////////////////////////////////////////
-// Class to hold data the will be written to a block.
-class Frame {
- public:
- Frame();
- ~Frame();
-
- // Sets this frame's contents based on |frame|. Returns true on success. On
- // failure, this frame's existing contents may be lost.
- bool CopyFrom(const Frame& frame);
-
- // Copies |frame| data into |frame_|. Returns true on success.
- bool Init(const uint8_t* frame, uint64_t length);
-
- // Copies |additional| data into |additional_|. Returns true on success.
- bool AddAdditionalData(const uint8_t* additional, uint64_t length,
- uint64_t add_id);
-
- // Returns true if the frame has valid parameters.
- bool IsValid() const;
-
- // Returns true if the frame can be written as a SimpleBlock based on current
- // parameters.
- bool CanBeSimpleBlock() const;
-
- uint64_t add_id() const { return add_id_; }
- const uint8_t* additional() const { return additional_; }
- uint64_t additional_length() const { return additional_length_; }
- void set_duration(uint64_t duration);
- uint64_t duration() const { return duration_; }
- bool duration_set() const { return duration_set_; }
- const uint8_t* frame() const { return frame_; }
- void set_is_key(bool key) { is_key_ = key; }
- bool is_key() const { return is_key_; }
- uint64_t length() const { return length_; }
- void set_track_number(uint64_t track_number) { track_number_ = track_number; }
- uint64_t track_number() const { return track_number_; }
- void set_timestamp(uint64_t timestamp) { timestamp_ = timestamp; }
- uint64_t timestamp() const { return timestamp_; }
- void set_discard_padding(int64_t discard_padding) {
- discard_padding_ = discard_padding;
- }
- int64_t discard_padding() const { return discard_padding_; }
- void set_reference_block_timestamp(int64_t reference_block_timestamp);
- int64_t reference_block_timestamp() const {
- return reference_block_timestamp_;
- }
- bool reference_block_timestamp_set() const {
- return reference_block_timestamp_set_;
- }
-
- private:
- // Id of the Additional data.
- uint64_t add_id_;
-
- // Pointer to additional data. Owned by this class.
- uint8_t* additional_;
-
- // Length of the additional data.
- uint64_t additional_length_;
-
- // Duration of the frame in nanoseconds.
- uint64_t duration_;
-
- // Flag indicating that |duration_| has been set. Setting duration causes the
- // frame to be written out as a Block with BlockDuration instead of as a
- // SimpleBlock.
- bool duration_set_;
-
- // Pointer to the data. Owned by this class.
- uint8_t* frame_;
-
- // Flag telling if the data should set the key flag of a block.
- bool is_key_;
-
- // Length of the data.
- uint64_t length_;
-
- // Mkv track number the data is associated with.
- uint64_t track_number_;
-
- // Timestamp of the data in nanoseconds.
- uint64_t timestamp_;
-
- // Discard padding for the frame.
- int64_t discard_padding_;
-
- // Reference block timestamp.
- int64_t reference_block_timestamp_;
-
- // Flag indicating if |reference_block_timestamp_| has been set.
- bool reference_block_timestamp_set_;
-
- LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Frame);
-};
-
-///////////////////////////////////////////////////////////////
-// Class to hold one cue point in a Cues element.
-class CuePoint {
- public:
- CuePoint();
- ~CuePoint();
-
- // Returns the size in bytes for the entire CuePoint element.
- uint64_t Size() const;
-
- // Output the CuePoint element to the writer. Returns true on success.
- bool Write(IMkvWriter* writer) const;
-
- void set_time(uint64_t time) { time_ = time; }
- uint64_t time() const { return time_; }
- void set_track(uint64_t track) { track_ = track; }
- uint64_t track() const { return track_; }
- void set_cluster_pos(uint64_t cluster_pos) { cluster_pos_ = cluster_pos; }
- uint64_t cluster_pos() const { return cluster_pos_; }
- void set_block_number(uint64_t block_number) { block_number_ = block_number; }
- uint64_t block_number() const { return block_number_; }
- void set_output_block_number(bool output_block_number) {
- output_block_number_ = output_block_number;
- }
- bool output_block_number() const { return output_block_number_; }
-
- private:
- // Returns the size in bytes for the payload of the CuePoint element.
- uint64_t PayloadSize() const;
-
- // Absolute timecode according to the segment time base.
- uint64_t time_;
-
- // The Track element associated with the CuePoint.
- uint64_t track_;
-
- // The position of the Cluster containing the Block.
- uint64_t cluster_pos_;
-
- // Number of the Block within the Cluster, starting from 1.
- uint64_t block_number_;
-
- // If true the muxer will write out the block number for the cue if the
- // block number is different than the default of 1. Default is set to true.
- bool output_block_number_;
-
- LIBWEBM_DISALLOW_COPY_AND_ASSIGN(CuePoint);
-};
-
-///////////////////////////////////////////////////////////////
-// Cues element.
-class Cues {
- public:
- Cues();
- ~Cues();
-
- // Adds a cue point to the Cues element. Returns true on success.
- bool AddCue(CuePoint* cue);
-
- // Returns the cue point by index. Returns NULL if there is no cue point
- // match.
- CuePoint* GetCueByIndex(int32_t index) const;
-
- // Returns the total size of the Cues element
- uint64_t Size();
-
- // Output the Cues element to the writer. Returns true on success.
- bool Write(IMkvWriter* writer) const;
-
- int32_t cue_entries_size() const { return cue_entries_size_; }
- void set_output_block_number(bool output_block_number) {
- output_block_number_ = output_block_number;
- }
- bool output_block_number() const { return output_block_number_; }
-
- private:
- // Number of allocated elements in |cue_entries_|.
- int32_t cue_entries_capacity_;
-
- // Number of CuePoints in |cue_entries_|.
- int32_t cue_entries_size_;
-
- // CuePoint list.
- CuePoint** cue_entries_;
-
- // If true the muxer will write out the block number for the cue if the
- // block number is different than the default of 1. Default is set to true.
- bool output_block_number_;
-
- LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Cues);
-};
-
-///////////////////////////////////////////////////////////////
-// ContentEncAESSettings element
-class ContentEncAESSettings {
- public:
- enum { kCTR = 1 };
-
- ContentEncAESSettings();
- ~ContentEncAESSettings() {}
-
- // Returns the size in bytes for the ContentEncAESSettings element.
- uint64_t Size() const;
-
- // Writes out the ContentEncAESSettings element to |writer|. Returns true on
- // success.
- bool Write(IMkvWriter* writer) const;
-
- uint64_t cipher_mode() const { return cipher_mode_; }
-
- private:
- // Returns the size in bytes for the payload of the ContentEncAESSettings
- // element.
- uint64_t PayloadSize() const;
-
- // Sub elements
- uint64_t cipher_mode_;
-
- LIBWEBM_DISALLOW_COPY_AND_ASSIGN(ContentEncAESSettings);
-};
-
-///////////////////////////////////////////////////////////////
-// ContentEncoding element
-// Elements used to describe if the track data has been encrypted or
-// compressed with zlib or header stripping.
-// Currently only whole frames can be encrypted with AES. This dictates that
-// ContentEncodingOrder will be 0, ContentEncodingScope will be 1,
-// ContentEncodingType will be 1, and ContentEncAlgo will be 5.
-class ContentEncoding {
- public:
- ContentEncoding();
- ~ContentEncoding();
-
- // Sets the content encryption id. Copies |length| bytes from |id| to
- // |enc_key_id_|. Returns true on success.
- bool SetEncryptionID(const uint8_t* id, uint64_t length);
-
- // Returns the size in bytes for the ContentEncoding element.
- uint64_t Size() const;
-
- // Writes out the ContentEncoding element to |writer|. Returns true on
- // success.
- bool Write(IMkvWriter* writer) const;
-
- uint64_t enc_algo() const { return enc_algo_; }
- uint64_t encoding_order() const { return encoding_order_; }
- uint64_t encoding_scope() const { return encoding_scope_; }
- uint64_t encoding_type() const { return encoding_type_; }
- ContentEncAESSettings* enc_aes_settings() { return &enc_aes_settings_; }
-
- private:
- // Returns the size in bytes for the encoding elements.
- uint64_t EncodingSize(uint64_t compresion_size,
- uint64_t encryption_size) const;
-
- // Returns the size in bytes for the encryption elements.
- uint64_t EncryptionSize() const;
-
- // Track element names
- uint64_t enc_algo_;
- uint8_t* enc_key_id_;
- uint64_t encoding_order_;
- uint64_t encoding_scope_;
- uint64_t encoding_type_;
-
- // ContentEncAESSettings element.
- ContentEncAESSettings enc_aes_settings_;
-
- // Size of the ContentEncKeyID data in bytes.
- uint64_t enc_key_id_length_;
-
- LIBWEBM_DISALLOW_COPY_AND_ASSIGN(ContentEncoding);
-};
-
-///////////////////////////////////////////////////////////////
-// Colour element.
-class PrimaryChromaticity {
- public:
- static const float kChromaticityMin;
- static const float kChromaticityMax;
-
- PrimaryChromaticity(float x_val, float y_val) : x_(x_val), y_(y_val) {}
- PrimaryChromaticity() : x_(0), y_(0) {}
- ~PrimaryChromaticity() {}
-
- // Returns sum of |x_id| and |y_id| element id sizes and payload sizes.
- uint64_t PrimaryChromaticitySize(libwebm::MkvId x_id,
- libwebm::MkvId y_id) const;
- bool Valid() const;
- bool Write(IMkvWriter* writer, libwebm::MkvId x_id,
- libwebm::MkvId y_id) const;
-
- float x() const { return x_; }
- void set_x(float new_x) { x_ = new_x; }
- float y() const { return y_; }
- void set_y(float new_y) { y_ = new_y; }
-
- private:
- float x_;
- float y_;
-};
-
-class MasteringMetadata {
- public:
- static const float kValueNotPresent;
- static const float kMinLuminance;
- static const float kMinLuminanceMax;
- static const float kMaxLuminanceMax;
-
- MasteringMetadata()
- : luminance_max_(kValueNotPresent),
- luminance_min_(kValueNotPresent),
- r_(NULL),
- g_(NULL),
- b_(NULL),
- white_point_(NULL) {}
- ~MasteringMetadata() {
- delete r_;
- delete g_;
- delete b_;
- delete white_point_;
- }
-
- // Returns total size of the MasteringMetadata element.
- uint64_t MasteringMetadataSize() const;
- bool Valid() const;
- bool Write(IMkvWriter* writer) const;
-
- // Copies non-null chromaticity.
- bool SetChromaticity(const PrimaryChromaticity* r,
- const PrimaryChromaticity* g,
- const PrimaryChromaticity* b,
- const PrimaryChromaticity* white_point);
- const PrimaryChromaticity* r() const { return r_; }
- const PrimaryChromaticity* g() const { return g_; }
- const PrimaryChromaticity* b() const { return b_; }
- const PrimaryChromaticity* white_point() const { return white_point_; }
-
- float luminance_max() const { return luminance_max_; }
- void set_luminance_max(float luminance_max) {
- luminance_max_ = luminance_max;
- }
- float luminance_min() const { return luminance_min_; }
- void set_luminance_min(float luminance_min) {
- luminance_min_ = luminance_min;
- }
-
- private:
- // Returns size of MasteringMetadata child elements.
- uint64_t PayloadSize() const;
-
- float luminance_max_;
- float luminance_min_;
- PrimaryChromaticity* r_;
- PrimaryChromaticity* g_;
- PrimaryChromaticity* b_;
- PrimaryChromaticity* white_point_;
-};
-
-class Colour {
- public:
- enum MatrixCoefficients {
- kGbr = 0,
- kBt709 = 1,
- kUnspecifiedMc = 2,
- kReserved = 3,
- kFcc = 4,
- kBt470bg = 5,
- kSmpte170MMc = 6,
- kSmpte240MMc = 7,
- kYcocg = 8,
- kBt2020NonConstantLuminance = 9,
- kBt2020ConstantLuminance = 10,
- };
- enum ChromaSitingHorz {
- kUnspecifiedCsh = 0,
- kLeftCollocated = 1,
- kHalfCsh = 2,
- };
- enum ChromaSitingVert {
- kUnspecifiedCsv = 0,
- kTopCollocated = 1,
- kHalfCsv = 2,
- };
- enum Range {
- kUnspecifiedCr = 0,
- kBroadcastRange = 1,
- kFullRange = 2,
- kMcTcDefined = 3, // Defined by MatrixCoefficients/TransferCharacteristics.
- };
- enum TransferCharacteristics {
- kIturBt709Tc = 1,
- kUnspecifiedTc = 2,
- kReservedTc = 3,
- kGamma22Curve = 4,
- kGamma28Curve = 5,
- kSmpte170MTc = 6,
- kSmpte240MTc = 7,
- kLinear = 8,
- kLog = 9,
- kLogSqrt = 10,
- kIec6196624 = 11,
- kIturBt1361ExtendedColourGamut = 12,
- kIec6196621 = 13,
- kIturBt202010bit = 14,
- kIturBt202012bit = 15,
- kSmpteSt2084 = 16,
- kSmpteSt4281Tc = 17,
- kAribStdB67Hlg = 18,
- };
- enum Primaries {
- kReservedP0 = 0,
- kIturBt709P = 1,
- kUnspecifiedP = 2,
- kReservedP3 = 3,
- kIturBt470M = 4,
- kIturBt470Bg = 5,
- kSmpte170MP = 6,
- kSmpte240MP = 7,
- kFilm = 8,
- kIturBt2020 = 9,
- kSmpteSt4281P = 10,
- kJedecP22Phosphors = 22,
- };
- static const uint64_t kValueNotPresent;
- Colour()
- : matrix_coefficients_(kValueNotPresent),
- bits_per_channel_(kValueNotPresent),
- chroma_subsampling_horz_(kValueNotPresent),
- chroma_subsampling_vert_(kValueNotPresent),
- cb_subsampling_horz_(kValueNotPresent),
- cb_subsampling_vert_(kValueNotPresent),
- chroma_siting_horz_(kValueNotPresent),
- chroma_siting_vert_(kValueNotPresent),
- range_(kValueNotPresent),
- transfer_characteristics_(kValueNotPresent),
- primaries_(kValueNotPresent),
- max_cll_(kValueNotPresent),
- max_fall_(kValueNotPresent),
- mastering_metadata_(NULL) {}
- ~Colour() { delete mastering_metadata_; }
-
- // Returns total size of the Colour element.
- uint64_t ColourSize() const;
- bool Valid() const;
- bool Write(IMkvWriter* writer) const;
-
- // Deep copies |mastering_metadata|.
- bool SetMasteringMetadata(const MasteringMetadata& mastering_metadata);
-
- const MasteringMetadata* mastering_metadata() const {
- return mastering_metadata_;
- }
-
- uint64_t matrix_coefficients() const { return matrix_coefficients_; }
- void set_matrix_coefficients(uint64_t matrix_coefficients) {
- matrix_coefficients_ = matrix_coefficients;
- }
- uint64_t bits_per_channel() const { return bits_per_channel_; }
- void set_bits_per_channel(uint64_t bits_per_channel) {
- bits_per_channel_ = bits_per_channel;
- }
- uint64_t chroma_subsampling_horz() const { return chroma_subsampling_horz_; }
- void set_chroma_subsampling_horz(uint64_t chroma_subsampling_horz) {
- chroma_subsampling_horz_ = chroma_subsampling_horz;
- }
- uint64_t chroma_subsampling_vert() const { return chroma_subsampling_vert_; }
- void set_chroma_subsampling_vert(uint64_t chroma_subsampling_vert) {
- chroma_subsampling_vert_ = chroma_subsampling_vert;
- }
- uint64_t cb_subsampling_horz() const { return cb_subsampling_horz_; }
- void set_cb_subsampling_horz(uint64_t cb_subsampling_horz) {
- cb_subsampling_horz_ = cb_subsampling_horz;
- }
- uint64_t cb_subsampling_vert() const { return cb_subsampling_vert_; }
- void set_cb_subsampling_vert(uint64_t cb_subsampling_vert) {
- cb_subsampling_vert_ = cb_subsampling_vert;
- }
- uint64_t chroma_siting_horz() const { return chroma_siting_horz_; }
- void set_chroma_siting_horz(uint64_t chroma_siting_horz) {
- chroma_siting_horz_ = chroma_siting_horz;
- }
- uint64_t chroma_siting_vert() const { return chroma_siting_vert_; }
- void set_chroma_siting_vert(uint64_t chroma_siting_vert) {
- chroma_siting_vert_ = chroma_siting_vert;
- }
- uint64_t range() const { return range_; }
- void set_range(uint64_t range) { range_ = range; }
- uint64_t transfer_characteristics() const {
- return transfer_characteristics_;
- }
- void set_transfer_characteristics(uint64_t transfer_characteristics) {
- transfer_characteristics_ = transfer_characteristics;
- }
- uint64_t primaries() const { return primaries_; }
- void set_primaries(uint64_t primaries) { primaries_ = primaries; }
- uint64_t max_cll() const { return max_cll_; }
- void set_max_cll(uint64_t max_cll) { max_cll_ = max_cll; }
- uint64_t max_fall() const { return max_fall_; }
- void set_max_fall(uint64_t max_fall) { max_fall_ = max_fall; }
-
- private:
- // Returns size of Colour child elements.
- uint64_t PayloadSize() const;
-
- uint64_t matrix_coefficients_;
- uint64_t bits_per_channel_;
- uint64_t chroma_subsampling_horz_;
- uint64_t chroma_subsampling_vert_;
- uint64_t cb_subsampling_horz_;
- uint64_t cb_subsampling_vert_;
- uint64_t chroma_siting_horz_;
- uint64_t chroma_siting_vert_;
- uint64_t range_;
- uint64_t transfer_characteristics_;
- uint64_t primaries_;
- uint64_t max_cll_;
- uint64_t max_fall_;
-
- MasteringMetadata* mastering_metadata_;
-};
-
-///////////////////////////////////////////////////////////////
-// Projection element.
-class Projection {
- public:
- enum ProjectionType {
- kTypeNotPresent = -1,
- kRectangular = 0,
- kEquirectangular = 1,
- kCubeMap = 2,
- kMesh = 3,
- };
- static const uint64_t kValueNotPresent;
- Projection()
- : type_(kRectangular),
- pose_yaw_(0.0),
- pose_pitch_(0.0),
- pose_roll_(0.0),
- private_data_(NULL),
- private_data_length_(0) {}
- ~Projection() { delete[] private_data_; }
-
- uint64_t ProjectionSize() const;
- bool Write(IMkvWriter* writer) const;
-
- bool SetProjectionPrivate(const uint8_t* private_data,
- uint64_t private_data_length);
-
- ProjectionType type() const { return type_; }
- void set_type(ProjectionType type) { type_ = type; }
- float pose_yaw() const { return pose_yaw_; }
- void set_pose_yaw(float pose_yaw) { pose_yaw_ = pose_yaw; }
- float pose_pitch() const { return pose_pitch_; }
- void set_pose_pitch(float pose_pitch) { pose_pitch_ = pose_pitch; }
- float pose_roll() const { return pose_roll_; }
- void set_pose_roll(float pose_roll) { pose_roll_ = pose_roll; }
- uint8_t* private_data() const { return private_data_; }
- uint64_t private_data_length() const { return private_data_length_; }
-
- private:
- // Returns size of VideoProjection child elements.
- uint64_t PayloadSize() const;
-
- ProjectionType type_;
- float pose_yaw_;
- float pose_pitch_;
- float pose_roll_;
- uint8_t* private_data_;
- uint64_t private_data_length_;
-};
-
-///////////////////////////////////////////////////////////////
-// Track element.
-class Track {
- public:
- // The |seed| parameter is used to synthesize a UID for the track.
- explicit Track(unsigned int* seed);
- virtual ~Track();
-
- // Adds a ContentEncoding element to the Track. Returns true on success.
- virtual bool AddContentEncoding();
-
- // Returns the ContentEncoding by index. Returns NULL if there is no
- // ContentEncoding match.
- ContentEncoding* GetContentEncodingByIndex(uint32_t index) const;
-
- // Returns the size in bytes for the payload of the Track element.
- virtual uint64_t PayloadSize() const;
-
- // Returns the size in bytes of the Track element.
- virtual uint64_t Size() const;
-
- // Output the Track element to the writer. Returns true on success.
- virtual bool Write(IMkvWriter* writer) const;
-
- // Sets the CodecPrivate element of the Track element. Copies |length|
- // bytes from |codec_private| to |codec_private_|. Returns true on success.
- bool SetCodecPrivate(const uint8_t* codec_private, uint64_t length);
-
- void set_codec_id(const char* codec_id);
- const char* codec_id() const { return codec_id_; }
- const uint8_t* codec_private() const { return codec_private_; }
- void set_language(const char* language);
- const char* language() const { return language_; }
- void set_max_block_additional_id(uint64_t max_block_additional_id) {
- max_block_additional_id_ = max_block_additional_id;
- }
- uint64_t max_block_additional_id() const { return max_block_additional_id_; }
- void set_name(const char* name);
- const char* name() const { return name_; }
- void set_number(uint64_t number) { number_ = number; }
- uint64_t number() const { return number_; }
- void set_type(uint64_t type) { type_ = type; }
- uint64_t type() const { return type_; }
- void set_uid(uint64_t uid) { uid_ = uid; }
- uint64_t uid() const { return uid_; }
- void set_codec_delay(uint64_t codec_delay) { codec_delay_ = codec_delay; }
- uint64_t codec_delay() const { return codec_delay_; }
- void set_seek_pre_roll(uint64_t seek_pre_roll) {
- seek_pre_roll_ = seek_pre_roll;
- }
- uint64_t seek_pre_roll() const { return seek_pre_roll_; }
- void set_default_duration(uint64_t default_duration) {
- default_duration_ = default_duration;
- }
- uint64_t default_duration() const { return default_duration_; }
-
- uint64_t codec_private_length() const { return codec_private_length_; }
- uint32_t content_encoding_entries_size() const {
- return content_encoding_entries_size_;
- }
-
- private:
- // Track element names.
- char* codec_id_;
- uint8_t* codec_private_;
- char* language_;
- uint64_t max_block_additional_id_;
- char* name_;
- uint64_t number_;
- uint64_t type_;
- uint64_t uid_;
- uint64_t codec_delay_;
- uint64_t seek_pre_roll_;
- uint64_t default_duration_;
-
- // Size of the CodecPrivate data in bytes.
- uint64_t codec_private_length_;
-
- // ContentEncoding element list.
- ContentEncoding** content_encoding_entries_;
-
- // Number of ContentEncoding elements added.
- uint32_t content_encoding_entries_size_;
-
- LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Track);
-};
-
-///////////////////////////////////////////////////////////////
-// Track that has video specific elements.
-class VideoTrack : public Track {
- public:
- // Supported modes for stereo 3D.
- enum StereoMode {
- kMono = 0,
- kSideBySideLeftIsFirst = 1,
- kTopBottomRightIsFirst = 2,
- kTopBottomLeftIsFirst = 3,
- kSideBySideRightIsFirst = 11
- };
-
- enum AlphaMode { kNoAlpha = 0, kAlpha = 1 };
-
- // The |seed| parameter is used to synthesize a UID for the track.
- explicit VideoTrack(unsigned int* seed);
- virtual ~VideoTrack();
-
- // Returns the size in bytes for the payload of the Track element plus the
- // video specific elements.
- virtual uint64_t PayloadSize() const;
-
- // Output the VideoTrack element to the writer. Returns true on success.
- virtual bool Write(IMkvWriter* writer) const;
-
- // Sets the video's stereo mode. Returns true on success.
- bool SetStereoMode(uint64_t stereo_mode);
-
- // Sets the video's alpha mode. Returns true on success.
- bool SetAlphaMode(uint64_t alpha_mode);
-
- void set_display_height(uint64_t height) { display_height_ = height; }
- uint64_t display_height() const { return display_height_; }
- void set_display_width(uint64_t width) { display_width_ = width; }
- uint64_t display_width() const { return display_width_; }
- void set_pixel_height(uint64_t height) { pixel_height_ = height; }
- uint64_t pixel_height() const { return pixel_height_; }
- void set_pixel_width(uint64_t width) { pixel_width_ = width; }
- uint64_t pixel_width() const { return pixel_width_; }
-
- void set_crop_left(uint64_t crop_left) { crop_left_ = crop_left; }
- uint64_t crop_left() const { return crop_left_; }
- void set_crop_right(uint64_t crop_right) { crop_right_ = crop_right; }
- uint64_t crop_right() const { return crop_right_; }
- void set_crop_top(uint64_t crop_top) { crop_top_ = crop_top; }
- uint64_t crop_top() const { return crop_top_; }
- void set_crop_bottom(uint64_t crop_bottom) { crop_bottom_ = crop_bottom; }
- uint64_t crop_bottom() const { return crop_bottom_; }
-
- void set_frame_rate(double frame_rate) { frame_rate_ = frame_rate; }
- double frame_rate() const { return frame_rate_; }
- void set_height(uint64_t height) { height_ = height; }
- uint64_t height() const { return height_; }
- uint64_t stereo_mode() { return stereo_mode_; }
- uint64_t alpha_mode() { return alpha_mode_; }
- void set_width(uint64_t width) { width_ = width; }
- uint64_t width() const { return width_; }
-
- Colour* colour() { return colour_; }
-
- // Deep copies |colour|.
- bool SetColour(const Colour& colour);
-
- Projection* projection() { return projection_; }
-
- // Deep copies |projection|.
- bool SetProjection(const Projection& projection);
-
- private:
- // Returns the size in bytes of the Video element.
- uint64_t VideoPayloadSize() const;
-
- // Video track element names.
- uint64_t display_height_;
- uint64_t display_width_;
- uint64_t pixel_height_;
- uint64_t pixel_width_;
- uint64_t crop_left_;
- uint64_t crop_right_;
- uint64_t crop_top_;
- uint64_t crop_bottom_;
- double frame_rate_;
- uint64_t height_;
- uint64_t stereo_mode_;
- uint64_t alpha_mode_;
- uint64_t width_;
-
- Colour* colour_;
- Projection* projection_;
-
- LIBWEBM_DISALLOW_COPY_AND_ASSIGN(VideoTrack);
-};
-
-///////////////////////////////////////////////////////////////
-// Track that has audio specific elements.
-class AudioTrack : public Track {
- public:
- // The |seed| parameter is used to synthesize a UID for the track.
- explicit AudioTrack(unsigned int* seed);
- virtual ~AudioTrack();
-
- // Returns the size in bytes for the payload of the Track element plus the
- // audio specific elements.
- virtual uint64_t PayloadSize() const;
-
- // Output the AudioTrack element to the writer. Returns true on success.
- virtual bool Write(IMkvWriter* writer) const;
-
- void set_bit_depth(uint64_t bit_depth) { bit_depth_ = bit_depth; }
- uint64_t bit_depth() const { return bit_depth_; }
- void set_channels(uint64_t channels) { channels_ = channels; }
- uint64_t channels() const { return channels_; }
- void set_sample_rate(double sample_rate) { sample_rate_ = sample_rate; }
- double sample_rate() const { return sample_rate_; }
-
- private:
- // Audio track element names.
- uint64_t bit_depth_;
- uint64_t channels_;
- double sample_rate_;
-
- LIBWEBM_DISALLOW_COPY_AND_ASSIGN(AudioTrack);
-};
-
-///////////////////////////////////////////////////////////////
-// Tracks element
-class Tracks {
- public:
- // Audio and video type defined by the Matroska specs.
- enum { kVideo = 0x1, kAudio = 0x2 };
-
- static const char kOpusCodecId[];
- static const char kVorbisCodecId[];
- static const char kVp8CodecId[];
- static const char kVp9CodecId[];
- static const char kVp10CodecId[];
- static const char kAV1CodecId[];
- static const char kWebVttCaptionsId[];
- static const char kWebVttDescriptionsId[];
- static const char kWebVttMetadataId[];
- static const char kWebVttSubtitlesId[];
-
- Tracks();
- ~Tracks();
-
- // Adds a Track element to the Tracks object. |track| will be owned and
- // deleted by the Tracks object. Returns true on success. |number| is the
- // number to use for the track. |number| must be >= 0. If |number| == 0
- // then the muxer will decide on the track number.
- bool AddTrack(Track* track, int32_t number);
-
- // Returns the track by index. Returns NULL if there is no track match.
- const Track* GetTrackByIndex(uint32_t idx) const;
-
- // Search the Tracks and return the track that matches |tn|. Returns NULL
- // if there is no track match.
- Track* GetTrackByNumber(uint64_t track_number) const;
-
- // Returns true if the track number is an audio track.
- bool TrackIsAudio(uint64_t track_number) const;
-
- // Returns true if the track number is a video track.
- bool TrackIsVideo(uint64_t track_number) const;
-
- // Output the Tracks element to the writer. Returns true on success.
- bool Write(IMkvWriter* writer) const;
-
- uint32_t track_entries_size() const { return track_entries_size_; }
-
- private:
- // Track element list.
- Track** track_entries_;
-
- // Number of Track elements added.
- uint32_t track_entries_size_;
-
- // Whether or not Tracks element has already been written via IMkvWriter.
- mutable bool wrote_tracks_;
-
- LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Tracks);
-};
-
-///////////////////////////////////////////////////////////////
-// Chapter element
-//
-class Chapter {
- public:
- // Set the identifier for this chapter. (This corresponds to the
- // Cue Identifier line in WebVTT.)
- // TODO(matthewjheaney): the actual serialization of this item in
- // MKV is pending.
- bool set_id(const char* id);
-
- // Converts the nanosecond start and stop times of this chapter to
- // their corresponding timecode values, and stores them that way.
- void set_time(const Segment& segment, uint64_t start_time_ns,
- uint64_t end_time_ns);
-
- // Sets the uid for this chapter. Primarily used to enable
- // deterministic output from the muxer.
- void set_uid(const uint64_t uid) { uid_ = uid; }
-
- // Add a title string to this chapter, per the semantics described
- // here:
- // http://www.matroska.org/technical/specs/index.html
- //
- // The title ("chapter string") is a UTF-8 string.
- //
- // The language has ISO 639-2 representation, described here:
- // http://www.loc.gov/standards/iso639-2/englangn.html
- // http://www.loc.gov/standards/iso639-2/php/English_list.php
- // If you specify NULL as the language value, this implies
- // English ("eng").
- //
- // The country value corresponds to the codes listed here:
- // http://www.iana.org/domains/root/db/
- //
- // The function returns false if the string could not be allocated.
- bool add_string(const char* title, const char* language, const char* country);
-
- private:
- friend class Chapters;
-
- // For storage of chapter titles that differ by language.
- class Display {
- public:
- // Establish representation invariant for new Display object.
- void Init();
-
- // Reclaim resources, in anticipation of destruction.
- void Clear();
-
- // Copies the title to the |title_| member. Returns false on
- // error.
- bool set_title(const char* title);
-
- // Copies the language to the |language_| member. Returns false
- // on error.
- bool set_language(const char* language);
-
- // Copies the country to the |country_| member. Returns false on
- // error.
- bool set_country(const char* country);
-
- // If |writer| is non-NULL, serialize the Display sub-element of
- // the Atom into the stream. Returns the Display element size on
- // success, 0 if error.
- uint64_t WriteDisplay(IMkvWriter* writer) const;
-
- private:
- char* title_;
- char* language_;
- char* country_;
- };
-
- Chapter();
- ~Chapter();
-
- // Establish the representation invariant for a newly-created
- // Chapter object. The |seed| parameter is used to create the UID
- // for this chapter atom.
- void Init(unsigned int* seed);
-
- // Copies this Chapter object to a different one. This is used when
- // expanding a plain array of Chapter objects (see Chapters).
- void ShallowCopy(Chapter* dst) const;
-
- // Reclaim resources used by this Chapter object, pending its
- // destruction.
- void Clear();
-
- // If there is no storage remaining on the |displays_| array for a
- // new display object, creates a new, longer array and copies the
- // existing Display objects to the new array. Returns false if the
- // array cannot be expanded.
- bool ExpandDisplaysArray();
-
- // If |writer| is non-NULL, serialize the Atom sub-element into the
- // stream. Returns the total size of the element on success, 0 if
- // error.
- uint64_t WriteAtom(IMkvWriter* writer) const;
-
- // The string identifier for this chapter (corresponds to WebVTT cue
- // identifier).
- char* id_;
-
- // Start timecode of the chapter.
- uint64_t start_timecode_;
-
- // Stop timecode of the chapter.
- uint64_t end_timecode_;
-
- // The binary identifier for this chapter.
- uint64_t uid_;
-
- // The Atom element can contain multiple Display sub-elements, as
- // the same logical title can be rendered in different languages.
- Display* displays_;
-
- // The physical length (total size) of the |displays_| array.
- int displays_size_;
-
- // The logical length (number of active elements) on the |displays_|
- // array.
- int displays_count_;
-
- LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Chapter);
-};
-
-///////////////////////////////////////////////////////////////
-// Chapters element
-//
-class Chapters {
- public:
- Chapters();
- ~Chapters();
-
- Chapter* AddChapter(unsigned int* seed);
-
- // Returns the number of chapters that have been added.
- int Count() const;
-
- // Output the Chapters element to the writer. Returns true on success.
- bool Write(IMkvWriter* writer) const;
-
- private:
- // Expands the chapters_ array if there is not enough space to contain
- // another chapter object. Returns true on success.
- bool ExpandChaptersArray();
-
- // If |writer| is non-NULL, serialize the Edition sub-element of the
- // Chapters element into the stream. Returns the Edition element
- // size on success, 0 if error.
- uint64_t WriteEdition(IMkvWriter* writer) const;
-
- // Total length of the chapters_ array.
- int chapters_size_;
-
- // Number of active chapters on the chapters_ array.
- int chapters_count_;
-
- // Array for storage of chapter objects.
- Chapter* chapters_;
-
- LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Chapters);
-};
-
-///////////////////////////////////////////////////////////////
-// Tag element
-//
-class Tag {
- public:
- bool add_simple_tag(const char* tag_name, const char* tag_string);
-
- private:
- // Tags calls Clear and the destructor of Tag
- friend class Tags;
-
- // For storage of simple tags
- class SimpleTag {
- public:
- // Establish representation invariant for new SimpleTag object.
- void Init();
-
- // Reclaim resources, in anticipation of destruction.
- void Clear();
-
- // Copies the title to the |tag_name_| member. Returns false on
- // error.
- bool set_tag_name(const char* tag_name);
-
- // Copies the language to the |tag_string_| member. Returns false
- // on error.
- bool set_tag_string(const char* tag_string);
-
- // If |writer| is non-NULL, serialize the SimpleTag sub-element of
- // the Atom into the stream. Returns the SimpleTag element size on
- // success, 0 if error.
- uint64_t Write(IMkvWriter* writer) const;
-
- private:
- char* tag_name_;
- char* tag_string_;
- };
-
- Tag();
- ~Tag();
-
- // Copies this Tag object to a different one. This is used when
- // expanding a plain array of Tag objects (see Tags).
- void ShallowCopy(Tag* dst) const;
-
- // Reclaim resources used by this Tag object, pending its
- // destruction.
- void Clear();
-
- // If there is no storage remaining on the |simple_tags_| array for a
- // new display object, creates a new, longer array and copies the
- // existing SimpleTag objects to the new array. Returns false if the
- // array cannot be expanded.
- bool ExpandSimpleTagsArray();
-
- // If |writer| is non-NULL, serialize the Tag sub-element into the
- // stream. Returns the total size of the element on success, 0 if
- // error.
- uint64_t Write(IMkvWriter* writer) const;
-
- // The Atom element can contain multiple SimpleTag sub-elements
- SimpleTag* simple_tags_;
-
- // The physical length (total size) of the |simple_tags_| array.
- int simple_tags_size_;
-
- // The logical length (number of active elements) on the |simple_tags_|
- // array.
- int simple_tags_count_;
-
- LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Tag);
-};
-
-///////////////////////////////////////////////////////////////
-// Tags element
-//
-class Tags {
- public:
- Tags();
- ~Tags();
-
- Tag* AddTag();
-
- // Returns the number of tags that have been added.
- int Count() const;
-
- // Output the Tags element to the writer. Returns true on success.
- bool Write(IMkvWriter* writer) const;
-
- private:
- // Expands the tags_ array if there is not enough space to contain
- // another tag object. Returns true on success.
- bool ExpandTagsArray();
-
- // Total length of the tags_ array.
- int tags_size_;
-
- // Number of active tags on the tags_ array.
- int tags_count_;
-
- // Array for storage of tag objects.
- Tag* tags_;
-
- LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Tags);
-};
-
-///////////////////////////////////////////////////////////////
-// Cluster element
-//
-// Notes:
-// |Init| must be called before any other method in this class.
-class Cluster {
- public:
- // |timecode| is the absolute timecode of the cluster. |cues_pos| is the
- // position for the cluster within the segment that should be written in
- // the cues element. |timecode_scale| is the timecode scale of the segment.
- Cluster(uint64_t timecode, int64_t cues_pos, uint64_t timecode_scale,
- bool write_last_frame_with_duration = false,
- bool fixed_size_timecode = false);
- ~Cluster();
-
- bool Init(IMkvWriter* ptr_writer);
-
- // Adds a frame to be output in the file. The frame is written out through
- // |writer_| if successful. Returns true on success.
- bool AddFrame(const Frame* frame);
-
- // Adds a frame to be output in the file. The frame is written out through
- // |writer_| if successful. Returns true on success.
- // Inputs:
- // data: Pointer to the data
- // length: Length of the data
- // track_number: Track to add the data to. Value returned by Add track
- // functions. The range of allowed values is [1, 126].
- // timecode: Absolute (not relative to cluster) timestamp of the
- // frame, expressed in timecode units.
- // is_key: Flag telling whether or not this frame is a key frame.
- bool AddFrame(const uint8_t* data, uint64_t length, uint64_t track_number,
- uint64_t timecode, // timecode units (absolute)
- bool is_key);
-
- // Adds a frame to be output in the file. The frame is written out through
- // |writer_| if successful. Returns true on success.
- // Inputs:
- // data: Pointer to the data
- // length: Length of the data
- // additional: Pointer to the additional data
- // additional_length: Length of the additional data
- // add_id: Value of BlockAddID element
- // track_number: Track to add the data to. Value returned by Add track
- // functions. The range of allowed values is [1, 126].
- // abs_timecode: Absolute (not relative to cluster) timestamp of the
- // frame, expressed in timecode units.
- // is_key: Flag telling whether or not this frame is a key frame.
- bool AddFrameWithAdditional(const uint8_t* data, uint64_t length,
- const uint8_t* additional,
- uint64_t additional_length, uint64_t add_id,
- uint64_t track_number, uint64_t abs_timecode,
- bool is_key);
-
- // Adds a frame to be output in the file. The frame is written out through
- // |writer_| if successful. Returns true on success.
- // Inputs:
- // data: Pointer to the data.
- // length: Length of the data.
- // discard_padding: DiscardPadding element value.
- // track_number: Track to add the data to. Value returned by Add track
- // functions. The range of allowed values is [1, 126].
- // abs_timecode: Absolute (not relative to cluster) timestamp of the
- // frame, expressed in timecode units.
- // is_key: Flag telling whether or not this frame is a key frame.
- bool AddFrameWithDiscardPadding(const uint8_t* data, uint64_t length,
- int64_t discard_padding,
- uint64_t track_number, uint64_t abs_timecode,
- bool is_key);
-
- // Writes a frame of metadata to the output medium; returns true on
- // success.
- // Inputs:
- // data: Pointer to the data
- // length: Length of the data
- // track_number: Track to add the data to. Value returned by Add track
- // functions. The range of allowed values is [1, 126].
- // timecode: Absolute (not relative to cluster) timestamp of the
- // metadata frame, expressed in timecode units.
- // duration: Duration of metadata frame, in timecode units.
- //
- // The metadata frame is written as a block group, with a duration
- // sub-element but no reference time sub-elements (indicating that
- // it is considered a keyframe, per Matroska semantics).
- bool AddMetadata(const uint8_t* data, uint64_t length, uint64_t track_number,
- uint64_t timecode, uint64_t duration);
-
- // Increments the size of the cluster's data in bytes.
- void AddPayloadSize(uint64_t size);
-
- // Closes the cluster so no more data can be written to it. Will update the
- // cluster's size if |writer_| is seekable. Returns true on success. This
- // variant of Finalize() fails when |write_last_frame_with_duration_| is set
- // to true.
- bool Finalize();
-
- // Closes the cluster so no more data can be written to it. Will update the
- // cluster's size if |writer_| is seekable. Returns true on success.
- // Inputs:
- // set_last_frame_duration: Boolean indicating whether or not the duration
- // of the last frame should be set. If set to
- // false, the |duration| value is ignored and
- // |write_last_frame_with_duration_| will not be
- // honored.
- // duration: Duration of the Cluster in timecode scale.
- bool Finalize(bool set_last_frame_duration, uint64_t duration);
-
- // Returns the size in bytes for the entire Cluster element.
- uint64_t Size() const;
-
- // Given |abs_timecode|, calculates timecode relative to most recent timecode.
- // Returns -1 on failure, or a relative timecode.
- int64_t GetRelativeTimecode(int64_t abs_timecode) const;
-
- int64_t size_position() const { return size_position_; }
- int32_t blocks_added() const { return blocks_added_; }
- uint64_t payload_size() const { return payload_size_; }
- int64_t position_for_cues() const { return position_for_cues_; }
- uint64_t timecode() const { return timecode_; }
- uint64_t timecode_scale() const { return timecode_scale_; }
- void set_write_last_frame_with_duration(bool write_last_frame_with_duration) {
- write_last_frame_with_duration_ = write_last_frame_with_duration;
- }
- bool write_last_frame_with_duration() const {
- return write_last_frame_with_duration_;
- }
-
- private:
- // Iterator type for the |stored_frames_| map.
- typedef std::map<uint64_t, std::list<Frame*> >::iterator FrameMapIterator;
-
- // Utility method that confirms that blocks can still be added, and that the
- // cluster header has been written. Used by |DoWriteFrame*|. Returns true
- // when successful.
- bool PreWriteBlock();
-
- // Utility method used by the |DoWriteFrame*| methods that handles the book
- // keeping required after each block is written.
- void PostWriteBlock(uint64_t element_size);
-
- // Does some verification and calls WriteFrame.
- bool DoWriteFrame(const Frame* const frame);
-
- // Either holds back the given frame, or writes it out depending on whether or
- // not |write_last_frame_with_duration_| is set.
- bool QueueOrWriteFrame(const Frame* const frame);
-
- // Outputs the Cluster header to |writer_|. Returns true on success.
- bool WriteClusterHeader();
-
- // Number of blocks added to the cluster.
- int32_t blocks_added_;
-
- // Flag telling if the cluster has been closed.
- bool finalized_;
-
- // Flag indicating whether the cluster's timecode will always be written out
- // using 8 bytes.
- bool fixed_size_timecode_;
-
- // Flag telling if the cluster's header has been written.
- bool header_written_;
-
- // The size of the cluster elements in bytes.
- uint64_t payload_size_;
-
- // The file position used for cue points.
- const int64_t position_for_cues_;
-
- // The file position of the cluster's size element.
- int64_t size_position_;
-
- // The absolute timecode of the cluster.
- const uint64_t timecode_;
-
- // The timecode scale of the Segment containing the cluster.
- const uint64_t timecode_scale_;
-
- // Flag indicating whether the last frame of the cluster should be written as
- // a Block with Duration. If set to true, then it will result in holding back
- // of frames and the parameterized version of Finalize() must be called to
- // finish writing the Cluster.
- bool write_last_frame_with_duration_;
-
- // Map used to hold back frames, if required. Track number is the key.
- std::map<uint64_t, std::list<Frame*> > stored_frames_;
-
- // Map from track number to the timestamp of the last block written for that
- // track.
- std::map<uint64_t, uint64_t> last_block_timestamp_;
-
- // Pointer to the writer object. Not owned by this class.
- IMkvWriter* writer_;
-
- LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Cluster);
-};
-
-///////////////////////////////////////////////////////////////
-// SeekHead element
-class SeekHead {
- public:
- SeekHead();
- ~SeekHead();
-
- // TODO(fgalligan): Change this to reserve a certain size. Then check how
- // big the seek entry to be added is as not every seek entry will be the
- // maximum size it could be.
- // Adds a seek entry to be written out when the element is finalized. |id|
- // must be the coded mkv element id. |pos| is the file position of the
- // element. Returns true on success.
- bool AddSeekEntry(uint32_t id, uint64_t pos);
-
- // Writes out SeekHead and SeekEntry elements. Returns true on success.
- bool Finalize(IMkvWriter* writer) const;
-
- // Returns the id of the Seek Entry at the given index. Returns -1 if index is
- // out of range.
- uint32_t GetId(int index) const;
-
- // Returns the position of the Seek Entry at the given index. Returns -1 if
- // index is out of range.
- uint64_t GetPosition(int index) const;
-
- // Sets the Seek Entry id and position at given index.
- // Returns true on success.
- bool SetSeekEntry(int index, uint32_t id, uint64_t position);
-
- // Reserves space by writing out a Void element which will be updated with
- // a SeekHead element later. Returns true on success.
- bool Write(IMkvWriter* writer);
-
- // We are going to put a cap on the number of Seek Entries.
- const static int32_t kSeekEntryCount = 5;
-
- private:
- // Returns the maximum size in bytes of one seek entry.
- uint64_t MaxEntrySize() const;
-
- // Seek entry id element list.
- uint32_t seek_entry_id_[kSeekEntryCount];
-
- // Seek entry pos element list.
- uint64_t seek_entry_pos_[kSeekEntryCount];
-
- // The file position of SeekHead element.
- int64_t start_pos_;
-
- LIBWEBM_DISALLOW_COPY_AND_ASSIGN(SeekHead);
-};
-
-///////////////////////////////////////////////////////////////
-// Segment Information element
-class SegmentInfo {
- public:
- SegmentInfo();
- ~SegmentInfo();
-
- // Will update the duration if |duration_| is > 0.0. Returns true on success.
- bool Finalize(IMkvWriter* writer) const;
-
- // Sets |muxing_app_| and |writing_app_|.
- bool Init();
-
- // Output the Segment Information element to the writer. Returns true on
- // success.
- bool Write(IMkvWriter* writer);
-
- void set_duration(double duration) { duration_ = duration; }
- double duration() const { return duration_; }
- void set_muxing_app(const char* app);
- const char* muxing_app() const { return muxing_app_; }
- void set_timecode_scale(uint64_t scale) { timecode_scale_ = scale; }
- uint64_t timecode_scale() const { return timecode_scale_; }
- void set_writing_app(const char* app);
- const char* writing_app() const { return writing_app_; }
- void set_date_utc(int64_t date_utc) { date_utc_ = date_utc; }
- int64_t date_utc() const { return date_utc_; }
-
- private:
- // Segment Information element names.
- // Initially set to -1 to signify that a duration has not been set and should
- // not be written out.
- double duration_;
- // Set to libwebm-%d.%d.%d.%d, major, minor, build, revision.
- char* muxing_app_;
- uint64_t timecode_scale_;
- // Initially set to libwebm-%d.%d.%d.%d, major, minor, build, revision.
- char* writing_app_;
- // LLONG_MIN when DateUTC is not set.
- int64_t date_utc_;
-
- // The file position of the duration element.
- int64_t duration_pos_;
-
- LIBWEBM_DISALLOW_COPY_AND_ASSIGN(SegmentInfo);
-};
-
-///////////////////////////////////////////////////////////////
-// This class represents the main segment in a WebM file. Currently only
-// supports one Segment element.
-//
-// Notes:
-// |Init| must be called before any other method in this class.
-class Segment {
- public:
- enum Mode { kLive = 0x1, kFile = 0x2 };
-
- enum CuesPosition {
- kAfterClusters = 0x0, // Position Cues after Clusters - Default
- kBeforeClusters = 0x1 // Position Cues before Clusters
- };
-
- static const uint32_t kDefaultDocTypeVersion = 4;
- static const uint64_t kDefaultMaxClusterDuration = 30000000000ULL;
-
- Segment();
- ~Segment();
-
- // Initializes |SegmentInfo| and returns result. Always returns false when
- // |ptr_writer| is NULL.
- bool Init(IMkvWriter* ptr_writer);
-
- // Adds a generic track to the segment. Returns the newly-allocated
- // track object (which is owned by the segment) on success, NULL on
- // error. |number| is the number to use for the track. |number|
- // must be >= 0. If |number| == 0 then the muxer will decide on the
- // track number.
- Track* AddTrack(int32_t number);
-
- // Adds a Vorbis audio track to the segment. Returns the number of the track
- // on success, 0 on error. |number| is the number to use for the audio track.
- // |number| must be >= 0. If |number| == 0 then the muxer will decide on
- // the track number.
- uint64_t AddAudioTrack(int32_t sample_rate, int32_t channels, int32_t number);
-
- // Adds an empty chapter to the chapters of this segment. Returns
- // non-NULL on success. After adding the chapter, the caller should
- // populate its fields via the Chapter member functions.
- Chapter* AddChapter();
-
- // Adds an empty tag to the tags of this segment. Returns
- // non-NULL on success. After adding the tag, the caller should
- // populate its fields via the Tag member functions.
- Tag* AddTag();
-
- // Adds a cue point to the Cues element. |timestamp| is the time in
- // nanoseconds of the cue's time. |track| is the Track of the Cue. This
- // function must be called after AddFrame to calculate the correct
- // BlockNumber for the CuePoint. Returns true on success.
- bool AddCuePoint(uint64_t timestamp, uint64_t track);
-
- // Adds a frame to be output in the file. Returns true on success.
- // Inputs:
- // data: Pointer to the data
- // length: Length of the data
- // track_number: Track to add the data to. Value returned by Add track
- // functions.
- // timestamp: Timestamp of the frame in nanoseconds from 0.
- // is_key: Flag telling whether or not this frame is a key frame.
- bool AddFrame(const uint8_t* data, uint64_t length, uint64_t track_number,
- uint64_t timestamp_ns, bool is_key);
-
- // Writes a frame of metadata to the output medium; returns true on
- // success.
- // Inputs:
- // data: Pointer to the data
- // length: Length of the data
- // track_number: Track to add the data to. Value returned by Add track
- // functions.
- // timecode: Absolute timestamp of the metadata frame, expressed
- // in nanosecond units.
- // duration: Duration of metadata frame, in nanosecond units.
- //
- // The metadata frame is written as a block group, with a duration
- // sub-element but no reference time sub-elements (indicating that
- // it is considered a keyframe, per Matroska semantics).
- bool AddMetadata(const uint8_t* data, uint64_t length, uint64_t track_number,
- uint64_t timestamp_ns, uint64_t duration_ns);
-
- // Writes a frame with additional data to the output medium; returns true on
- // success.
- // Inputs:
- // data: Pointer to the data.
- // length: Length of the data.
- // additional: Pointer to additional data.
- // additional_length: Length of additional data.
- // add_id: Additional ID which identifies the type of additional data.
- // track_number: Track to add the data to. Value returned by Add track
- // functions.
- // timestamp: Absolute timestamp of the frame, expressed in nanosecond
- // units.
- // is_key: Flag telling whether or not this frame is a key frame.
- bool AddFrameWithAdditional(const uint8_t* data, uint64_t length,
- const uint8_t* additional,
- uint64_t additional_length, uint64_t add_id,
- uint64_t track_number, uint64_t timestamp,
- bool is_key);
-
- // Writes a frame with DiscardPadding to the output medium; returns true on
- // success.
- // Inputs:
- // data: Pointer to the data.
- // length: Length of the data.
- // discard_padding: DiscardPadding element value.
- // track_number: Track to add the data to. Value returned by Add track
- // functions.
- // timestamp: Absolute timestamp of the frame, expressed in nanosecond
- // units.
- // is_key: Flag telling whether or not this frame is a key frame.
- bool AddFrameWithDiscardPadding(const uint8_t* data, uint64_t length,
- int64_t discard_padding,
- uint64_t track_number, uint64_t timestamp,
- bool is_key);
-
- // Writes a Frame to the output medium. Chooses the correct way of writing
- // the frame (Block vs SimpleBlock) based on the parameters passed.
- // Inputs:
- // frame: frame object
- bool AddGenericFrame(const Frame* frame);
-
- // Adds a VP8 video track to the segment. Returns the number of the track on
- // success, 0 on error. |number| is the number to use for the video track.
- // |number| must be >= 0. If |number| == 0 then the muxer will decide on
- // the track number.
- uint64_t AddVideoTrack(int32_t width, int32_t height, int32_t number);
-
- // This function must be called after Finalize() if you need a copy of the
- // output with Cues written before the Clusters. It will return false if the
- // writer is not seekable of if chunking is set to true.
- // Input parameters:
- // reader - an IMkvReader object created with the same underlying file of the
- // current writer object. Make sure to close the existing writer
- // object before creating this so that all the data is properly
- // flushed and available for reading.
- // writer - an IMkvWriter object pointing to a *different* file than the one
- // pointed by the current writer object. This file will contain the
- // Cues element before the Clusters.
- bool CopyAndMoveCuesBeforeClusters(mkvparser::IMkvReader* reader,
- IMkvWriter* writer);
-
- // Sets which track to use for the Cues element. Must have added the track
- // before calling this function. Returns true on success. |track_number| is
- // returned by the Add track functions.
- bool CuesTrack(uint64_t track_number);
-
- // This will force the muxer to create a new Cluster when the next frame is
- // added.
- void ForceNewClusterOnNextFrame();
-
- // Writes out any frames that have not been written out. Finalizes the last
- // cluster. May update the size and duration of the segment. May output the
- // Cues element. May finalize the SeekHead element. Returns true on success.
- bool Finalize();
-
- // Returns the Cues object.
- Cues* GetCues() { return &cues_; }
-
- // Returns the Segment Information object.
- const SegmentInfo* GetSegmentInfo() const { return &segment_info_; }
- SegmentInfo* GetSegmentInfo() { return &segment_info_; }
-
- // Search the Tracks and return the track that matches |track_number|.
- // Returns NULL if there is no track match.
- Track* GetTrackByNumber(uint64_t track_number) const;
-
- // Toggles whether to output a cues element.
- void OutputCues(bool output_cues);
-
- // Toggles whether to write the last frame in each Cluster with Duration.
- void AccurateClusterDuration(bool accurate_cluster_duration);
-
- // Toggles whether to write the Cluster Timecode using exactly 8 bytes.
- void UseFixedSizeClusterTimecode(bool fixed_size_cluster_timecode);
-
- // Sets if the muxer will output files in chunks or not. |chunking| is a
- // flag telling whether or not to turn on chunking. |filename| is the base
- // filename for the chunk files. The header chunk file will be named
- // |filename|.hdr and the data chunks will be named
- // |filename|_XXXXXX.chk. Chunking implies that the muxer will be writing
- // to files so the muxer will use the default MkvWriter class to control
- // what data is written to what files. Returns true on success.
- // TODO: Should we change the IMkvWriter Interface to add Open and Close?
- // That will force the interface to be dependent on files.
- bool SetChunking(bool chunking, const char* filename);
-
- bool chunking() const { return chunking_; }
- uint64_t cues_track() const { return cues_track_; }
- void set_max_cluster_duration(uint64_t max_cluster_duration) {
- max_cluster_duration_ = max_cluster_duration;
- }
- uint64_t max_cluster_duration() const { return max_cluster_duration_; }
- void set_max_cluster_size(uint64_t max_cluster_size) {
- max_cluster_size_ = max_cluster_size;
- }
- uint64_t max_cluster_size() const { return max_cluster_size_; }
- void set_mode(Mode mode) { mode_ = mode; }
- Mode mode() const { return mode_; }
- CuesPosition cues_position() const { return cues_position_; }
- bool output_cues() const { return output_cues_; }
- void set_estimate_file_duration(bool estimate_duration) {
- estimate_file_duration_ = estimate_duration;
- }
- bool estimate_file_duration() const { return estimate_file_duration_; }
- const SegmentInfo* segment_info() const { return &segment_info_; }
- void set_duration(double duration) { duration_ = duration; }
- double duration() const { return duration_; }
-
- // Returns true when codec IDs are valid for WebM.
- bool DocTypeIsWebm() const;
-
- private:
- // Checks if header information has been output and initialized. If not it
- // will output the Segment element and initialize the SeekHead elment and
- // Cues elements.
- bool CheckHeaderInfo();
-
- // Sets |doc_type_version_| based on the current element requirements.
- void UpdateDocTypeVersion();
-
- // Sets |name| according to how many chunks have been written. |ext| is the
- // file extension. |name| must be deleted by the calling app. Returns true
- // on success.
- bool UpdateChunkName(const char* ext, char** name) const;
-
- // Returns the maximum offset within the segment's payload. When chunking
- // this function is needed to determine offsets of elements within the
- // chunked files. Returns -1 on error.
- int64_t MaxOffset();
-
- // Adds the frame to our frame array.
- bool QueueFrame(Frame* frame);
-
- // Output all frames that are queued. Returns -1 on error, otherwise
- // it returns the number of frames written.
- int WriteFramesAll();
-
- // Output all frames that are queued that have an end time that is less
- // then |timestamp|. Returns true on success and if there are no frames
- // queued.
- bool WriteFramesLessThan(uint64_t timestamp);
-
- // Outputs the segment header, Segment Information element, SeekHead element,
- // and Tracks element to |writer_|.
- bool WriteSegmentHeader();
-
- // Given a frame with the specified timestamp (nanosecond units) and
- // keyframe status, determine whether a new cluster should be
- // created, before writing enqueued frames and the frame itself. The
- // function returns one of the following values:
- // -1 = error: an out-of-order frame was detected
- // 0 = do not create a new cluster, and write frame to the existing cluster
- // 1 = create a new cluster, and write frame to that new cluster
- // 2 = create a new cluster, and re-run test
- int TestFrame(uint64_t track_num, uint64_t timestamp_ns, bool key) const;
-
- // Create a new cluster, using the earlier of the first enqueued
- // frame, or the indicated time. Returns true on success.
- bool MakeNewCluster(uint64_t timestamp_ns);
-
- // Checks whether a new cluster needs to be created, and if so
- // creates a new cluster. Returns false if creation of a new cluster
- // was necessary but creation was not successful.
- bool DoNewClusterProcessing(uint64_t track_num, uint64_t timestamp_ns,
- bool key);
-
- // Adjusts Cue Point values (to place Cues before Clusters) so that they
- // reflect the correct offsets.
- void MoveCuesBeforeClusters();
-
- // This function recursively computes the correct cluster offsets (this is
- // done to move the Cues before Clusters). It recursively updates the change
- // in size (which indicates a change in cluster offset) until no sizes change.
- // Parameters:
- // diff - indicates the difference in size of the Cues element that needs to
- // accounted for.
- // index - index in the list of Cues which is currently being adjusted.
- // cue_size - sum of size of all the CuePoint elements.
- void MoveCuesBeforeClustersHelper(uint64_t diff, int index,
- uint64_t* cue_size);
-
- // Seeds the random number generator used to make UIDs.
- unsigned int seed_;
-
- // WebM elements
- Cues cues_;
- SeekHead seek_head_;
- SegmentInfo segment_info_;
- Tracks tracks_;
- Chapters chapters_;
- Tags tags_;
-
- // Number of chunks written.
- int chunk_count_;
-
- // Current chunk filename.
- char* chunk_name_;
-
- // Default MkvWriter object created by this class used for writing clusters
- // out in separate files.
- MkvWriter* chunk_writer_cluster_;
-
- // Default MkvWriter object created by this class used for writing Cues
- // element out to a file.
- MkvWriter* chunk_writer_cues_;
-
- // Default MkvWriter object created by this class used for writing the
- // Matroska header out to a file.
- MkvWriter* chunk_writer_header_;
-
- // Flag telling whether or not the muxer is chunking output to multiple
- // files.
- bool chunking_;
-
- // Base filename for the chunked files.
- char* chunking_base_name_;
-
- // File position offset where the Clusters end.
- int64_t cluster_end_offset_;
-
- // List of clusters.
- Cluster** cluster_list_;
-
- // Number of cluster pointers allocated in the cluster list.
- int32_t cluster_list_capacity_;
-
- // Number of clusters in the cluster list.
- int32_t cluster_list_size_;
-
- // Indicates whether Cues should be written before or after Clusters
- CuesPosition cues_position_;
-
- // Track number that is associated with the cues element for this segment.
- uint64_t cues_track_;
-
- // Tells the muxer to force a new cluster on the next Block.
- bool force_new_cluster_;
-
- // List of stored audio frames. These variables are used to store frames so
- // the muxer can follow the guideline "Audio blocks that contain the video
- // key frame's timecode should be in the same cluster as the video key frame
- // block."
- Frame** frames_;
-
- // Number of frame pointers allocated in the frame list.
- int32_t frames_capacity_;
-
- // Number of frames in the frame list.
- int32_t frames_size_;
-
- // Flag telling if a video track has been added to the segment.
- bool has_video_;
-
- // Flag telling if the segment's header has been written.
- bool header_written_;
-
- // Duration of the last block in nanoseconds.
- uint64_t last_block_duration_;
-
- // Last timestamp in nanoseconds added to a cluster.
- uint64_t last_timestamp_;
-
- // Last timestamp in nanoseconds by track number added to a cluster.
- uint64_t last_track_timestamp_[kMaxTrackNumber];
-
- // Number of frames written per track.
- uint64_t track_frames_written_[kMaxTrackNumber];
-
- // Maximum time in nanoseconds for a cluster duration. This variable is a
- // guideline and some clusters may have a longer duration. Default is 30
- // seconds.
- uint64_t max_cluster_duration_;
-
- // Maximum size in bytes for a cluster. This variable is a guideline and
- // some clusters may have a larger size. Default is 0 which signifies that
- // the muxer will decide the size.
- uint64_t max_cluster_size_;
-
- // The mode that segment is in. If set to |kLive| the writer must not
- // seek backwards.
- Mode mode_;
-
- // Flag telling the muxer that a new cue point should be added.
- bool new_cuepoint_;
-
- // TODO(fgalligan): Should we add support for more than one Cues element?
- // Flag whether or not the muxer should output a Cues element.
- bool output_cues_;
-
- // Flag whether or not the last frame in each Cluster will have a Duration
- // element in it.
- bool accurate_cluster_duration_;
-
- // Flag whether or not to write the Cluster Timecode using exactly 8 bytes.
- bool fixed_size_cluster_timecode_;
-
- // Flag whether or not to estimate the file duration.
- bool estimate_file_duration_;
-
- // The size of the EBML header, used to validate the header if
- // WriteEbmlHeader() is called more than once.
- int32_t ebml_header_size_;
-
- // The file position of the segment's payload.
- int64_t payload_pos_;
-
- // The file position of the element's size.
- int64_t size_position_;
-
- // Current DocTypeVersion (|doc_type_version_|) and that written in
- // WriteSegmentHeader().
- // WriteEbmlHeader() will be called from Finalize() if |doc_type_version_|
- // differs from |doc_type_version_written_|.
- uint32_t doc_type_version_;
- uint32_t doc_type_version_written_;
-
- // If |duration_| is > 0, then explicitly set the duration of the segment.
- double duration_;
-
- // Pointer to the writer objects. Not owned by this class.
- IMkvWriter* writer_cluster_;
- IMkvWriter* writer_cues_;
- IMkvWriter* writer_header_;
-
- LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Segment);
-};
-
-} // namespace mkvmuxer
-
-#endif // MKVMUXER_MKVMUXER_H_
diff --git a/third_party/aom/third_party/libwebm/mkvmuxer/mkvmuxertypes.h b/third_party/aom/third_party/libwebm/mkvmuxer/mkvmuxertypes.h
deleted file mode 100644
index e5db12160..000000000
--- a/third_party/aom/third_party/libwebm/mkvmuxer/mkvmuxertypes.h
+++ /dev/null
@@ -1,28 +0,0 @@
-// Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the LICENSE file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-
-#ifndef MKVMUXER_MKVMUXERTYPES_H_
-#define MKVMUXER_MKVMUXERTYPES_H_
-
-namespace mkvmuxer {
-typedef unsigned char uint8;
-typedef short int16;
-typedef int int32;
-typedef unsigned int uint32;
-typedef long long int64;
-typedef unsigned long long uint64;
-} // namespace mkvmuxer
-
-// Copied from Chromium basictypes.h
-// A macro to disallow the copy constructor and operator= functions
-// This should be used in the private: declarations for a class
-#define LIBWEBM_DISALLOW_COPY_AND_ASSIGN(TypeName) \
- TypeName(const TypeName&); \
- void operator=(const TypeName&)
-
-#endif // MKVMUXER_MKVMUXERTYPES_HPP_
diff --git a/third_party/aom/third_party/libwebm/mkvmuxer/mkvmuxerutil.cc b/third_party/aom/third_party/libwebm/mkvmuxer/mkvmuxerutil.cc
deleted file mode 100644
index 355d4e22b..000000000
--- a/third_party/aom/third_party/libwebm/mkvmuxer/mkvmuxerutil.cc
+++ /dev/null
@@ -1,744 +0,0 @@
-// Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the LICENSE file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-
-#include "mkvmuxer/mkvmuxerutil.h"
-
-#ifdef __ANDROID__
-#include <fcntl.h>
-#include <unistd.h>
-#endif
-
-#include <cassert>
-#include <cmath>
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
-#include <ctime>
-#include <new>
-
-#include "common/webmids.h"
-#include "mkvmuxer/mkvmuxer.h"
-#include "mkvmuxer/mkvwriter.h"
-
-namespace mkvmuxer {
-
-namespace {
-
-// Date elements are always 8 octets in size.
-const int kDateElementSize = 8;
-
-uint64 WriteBlock(IMkvWriter* writer, const Frame* const frame, int64 timecode,
- uint64 timecode_scale) {
- uint64 block_additional_elem_size = 0;
- uint64 block_addid_elem_size = 0;
- uint64 block_more_payload_size = 0;
- uint64 block_more_elem_size = 0;
- uint64 block_additions_payload_size = 0;
- uint64 block_additions_elem_size = 0;
- if (frame->additional()) {
- block_additional_elem_size =
- EbmlElementSize(libwebm::kMkvBlockAdditional, frame->additional(),
- frame->additional_length());
- block_addid_elem_size = EbmlElementSize(
- libwebm::kMkvBlockAddID, static_cast<uint64>(frame->add_id()));
-
- block_more_payload_size =
- block_addid_elem_size + block_additional_elem_size;
- block_more_elem_size =
- EbmlMasterElementSize(libwebm::kMkvBlockMore, block_more_payload_size) +
- block_more_payload_size;
- block_additions_payload_size = block_more_elem_size;
- block_additions_elem_size =
- EbmlMasterElementSize(libwebm::kMkvBlockAdditions,
- block_additions_payload_size) +
- block_additions_payload_size;
- }
-
- uint64 discard_padding_elem_size = 0;
- if (frame->discard_padding() != 0) {
- discard_padding_elem_size =
- EbmlElementSize(libwebm::kMkvDiscardPadding,
- static_cast<int64>(frame->discard_padding()));
- }
-
- const uint64 reference_block_timestamp =
- frame->reference_block_timestamp() / timecode_scale;
- uint64 reference_block_elem_size = 0;
- if (!frame->is_key()) {
- reference_block_elem_size =
- EbmlElementSize(libwebm::kMkvReferenceBlock, reference_block_timestamp);
- }
-
- const uint64 duration = frame->duration() / timecode_scale;
- uint64 block_duration_elem_size = 0;
- if (duration > 0)
- block_duration_elem_size =
- EbmlElementSize(libwebm::kMkvBlockDuration, duration);
-
- const uint64 block_payload_size = 4 + frame->length();
- const uint64 block_elem_size =
- EbmlMasterElementSize(libwebm::kMkvBlock, block_payload_size) +
- block_payload_size;
-
- const uint64 block_group_payload_size =
- block_elem_size + block_additions_elem_size + block_duration_elem_size +
- discard_padding_elem_size + reference_block_elem_size;
-
- if (!WriteEbmlMasterElement(writer, libwebm::kMkvBlockGroup,
- block_group_payload_size)) {
- return 0;
- }
-
- if (!WriteEbmlMasterElement(writer, libwebm::kMkvBlock, block_payload_size))
- return 0;
-
- if (WriteUInt(writer, frame->track_number()))
- return 0;
-
- if (SerializeInt(writer, timecode, 2))
- return 0;
-
- // For a Block, flags is always 0.
- if (SerializeInt(writer, 0, 1))
- return 0;
-
- if (writer->Write(frame->frame(), static_cast<uint32>(frame->length())))
- return 0;
-
- if (frame->additional()) {
- if (!WriteEbmlMasterElement(writer, libwebm::kMkvBlockAdditions,
- block_additions_payload_size)) {
- return 0;
- }
-
- if (!WriteEbmlMasterElement(writer, libwebm::kMkvBlockMore,
- block_more_payload_size))
- return 0;
-
- if (!WriteEbmlElement(writer, libwebm::kMkvBlockAddID,
- static_cast<uint64>(frame->add_id())))
- return 0;
-
- if (!WriteEbmlElement(writer, libwebm::kMkvBlockAdditional,
- frame->additional(), frame->additional_length())) {
- return 0;
- }
- }
-
- if (frame->discard_padding() != 0 &&
- !WriteEbmlElement(writer, libwebm::kMkvDiscardPadding,
- static_cast<int64>(frame->discard_padding()))) {
- return false;
- }
-
- if (!frame->is_key() &&
- !WriteEbmlElement(writer, libwebm::kMkvReferenceBlock,
- reference_block_timestamp)) {
- return false;
- }
-
- if (duration > 0 &&
- !WriteEbmlElement(writer, libwebm::kMkvBlockDuration, duration)) {
- return false;
- }
- return EbmlMasterElementSize(libwebm::kMkvBlockGroup,
- block_group_payload_size) +
- block_group_payload_size;
-}
-
-uint64 WriteSimpleBlock(IMkvWriter* writer, const Frame* const frame,
- int64 timecode) {
- if (WriteID(writer, libwebm::kMkvSimpleBlock))
- return 0;
-
- const int32 size = static_cast<int32>(frame->length()) + 4;
- if (WriteUInt(writer, size))
- return 0;
-
- if (WriteUInt(writer, static_cast<uint64>(frame->track_number())))
- return 0;
-
- if (SerializeInt(writer, timecode, 2))
- return 0;
-
- uint64 flags = 0;
- if (frame->is_key())
- flags |= 0x80;
-
- if (SerializeInt(writer, flags, 1))
- return 0;
-
- if (writer->Write(frame->frame(), static_cast<uint32>(frame->length())))
- return 0;
-
- return GetUIntSize(libwebm::kMkvSimpleBlock) + GetCodedUIntSize(size) + 4 +
- frame->length();
-}
-
-} // namespace
-
-int32 GetCodedUIntSize(uint64 value) {
- if (value < 0x000000000000007FULL)
- return 1;
- else if (value < 0x0000000000003FFFULL)
- return 2;
- else if (value < 0x00000000001FFFFFULL)
- return 3;
- else if (value < 0x000000000FFFFFFFULL)
- return 4;
- else if (value < 0x00000007FFFFFFFFULL)
- return 5;
- else if (value < 0x000003FFFFFFFFFFULL)
- return 6;
- else if (value < 0x0001FFFFFFFFFFFFULL)
- return 7;
- return 8;
-}
-
-int32 GetUIntSize(uint64 value) {
- if (value < 0x0000000000000100ULL)
- return 1;
- else if (value < 0x0000000000010000ULL)
- return 2;
- else if (value < 0x0000000001000000ULL)
- return 3;
- else if (value < 0x0000000100000000ULL)
- return 4;
- else if (value < 0x0000010000000000ULL)
- return 5;
- else if (value < 0x0001000000000000ULL)
- return 6;
- else if (value < 0x0100000000000000ULL)
- return 7;
- return 8;
-}
-
-int32 GetIntSize(int64 value) {
- // Doubling the requested value ensures positive values with their high bit
- // set are written with 0-padding to avoid flipping the signedness.
- const uint64 v = (value < 0) ? value ^ -1LL : value;
- return GetUIntSize(2 * v);
-}
-
-uint64 EbmlMasterElementSize(uint64 type, uint64 value) {
- // Size of EBML ID
- int32 ebml_size = GetUIntSize(type);
-
- // Datasize
- ebml_size += GetCodedUIntSize(value);
-
- return ebml_size;
-}
-
-uint64 EbmlElementSize(uint64 type, int64 value) {
- // Size of EBML ID
- int32 ebml_size = GetUIntSize(type);
-
- // Datasize
- ebml_size += GetIntSize(value);
-
- // Size of Datasize
- ebml_size++;
-
- return ebml_size;
-}
-
-uint64 EbmlElementSize(uint64 type, uint64 value) {
- return EbmlElementSize(type, value, 0);
-}
-
-uint64 EbmlElementSize(uint64 type, uint64 value, uint64 fixed_size) {
- // Size of EBML ID
- uint64 ebml_size = GetUIntSize(type);
-
- // Datasize
- ebml_size += (fixed_size > 0) ? fixed_size : GetUIntSize(value);
-
- // Size of Datasize
- ebml_size++;
-
- return ebml_size;
-}
-
-uint64 EbmlElementSize(uint64 type, float /* value */) {
- // Size of EBML ID
- uint64 ebml_size = GetUIntSize(type);
-
- // Datasize
- ebml_size += sizeof(float);
-
- // Size of Datasize
- ebml_size++;
-
- return ebml_size;
-}
-
-uint64 EbmlElementSize(uint64 type, const char* value) {
- if (!value)
- return 0;
-
- // Size of EBML ID
- uint64 ebml_size = GetUIntSize(type);
-
- // Datasize
- ebml_size += strlen(value);
-
- // Size of Datasize
- ebml_size += GetCodedUIntSize(strlen(value));
-
- return ebml_size;
-}
-
-uint64 EbmlElementSize(uint64 type, const uint8* value, uint64 size) {
- if (!value)
- return 0;
-
- // Size of EBML ID
- uint64 ebml_size = GetUIntSize(type);
-
- // Datasize
- ebml_size += size;
-
- // Size of Datasize
- ebml_size += GetCodedUIntSize(size);
-
- return ebml_size;
-}
-
-uint64 EbmlDateElementSize(uint64 type) {
- // Size of EBML ID
- uint64 ebml_size = GetUIntSize(type);
-
- // Datasize
- ebml_size += kDateElementSize;
-
- // Size of Datasize
- ebml_size++;
-
- return ebml_size;
-}
-
-int32 SerializeInt(IMkvWriter* writer, int64 value, int32 size) {
- if (!writer || size < 1 || size > 8)
- return -1;
-
- for (int32 i = 1; i <= size; ++i) {
- const int32 byte_count = size - i;
- const int32 bit_count = byte_count * 8;
-
- const int64 bb = value >> bit_count;
- const uint8 b = static_cast<uint8>(bb);
-
- const int32 status = writer->Write(&b, 1);
-
- if (status < 0)
- return status;
- }
-
- return 0;
-}
-
-int32 SerializeFloat(IMkvWriter* writer, float f) {
- if (!writer)
- return -1;
-
- assert(sizeof(uint32) == sizeof(float));
- // This union is merely used to avoid a reinterpret_cast from float& to
- // uint32& which will result in violation of strict aliasing.
- union U32 {
- uint32 u32;
- float f;
- } value;
- value.f = f;
-
- for (int32 i = 1; i <= 4; ++i) {
- const int32 byte_count = 4 - i;
- const int32 bit_count = byte_count * 8;
-
- const uint8 byte = static_cast<uint8>(value.u32 >> bit_count);
-
- const int32 status = writer->Write(&byte, 1);
-
- if (status < 0)
- return status;
- }
-
- return 0;
-}
-
-int32 WriteUInt(IMkvWriter* writer, uint64 value) {
- if (!writer)
- return -1;
-
- int32 size = GetCodedUIntSize(value);
-
- return WriteUIntSize(writer, value, size);
-}
-
-int32 WriteUIntSize(IMkvWriter* writer, uint64 value, int32 size) {
- if (!writer || size < 0 || size > 8)
- return -1;
-
- if (size > 0) {
- const uint64 bit = 1LL << (size * 7);
-
- if (value > (bit - 2))
- return -1;
-
- value |= bit;
- } else {
- size = 1;
- int64 bit;
-
- for (;;) {
- bit = 1LL << (size * 7);
- const uint64 max = bit - 2;
-
- if (value <= max)
- break;
-
- ++size;
- }
-
- if (size > 8)
- return false;
-
- value |= bit;
- }
-
- return SerializeInt(writer, value, size);
-}
-
-int32 WriteID(IMkvWriter* writer, uint64 type) {
- if (!writer)
- return -1;
-
- writer->ElementStartNotify(type, writer->Position());
-
- const int32 size = GetUIntSize(type);
-
- return SerializeInt(writer, type, size);
-}
-
-bool WriteEbmlMasterElement(IMkvWriter* writer, uint64 type, uint64 size) {
- if (!writer)
- return false;
-
- if (WriteID(writer, type))
- return false;
-
- if (WriteUInt(writer, size))
- return false;
-
- return true;
-}
-
-bool WriteEbmlElement(IMkvWriter* writer, uint64 type, uint64 value) {
- return WriteEbmlElement(writer, type, value, 0);
-}
-
-bool WriteEbmlElement(IMkvWriter* writer, uint64 type, uint64 value,
- uint64 fixed_size) {
- if (!writer)
- return false;
-
- if (WriteID(writer, type))
- return false;
-
- uint64 size = GetUIntSize(value);
- if (fixed_size > 0) {
- if (size > fixed_size)
- return false;
- size = fixed_size;
- }
- if (WriteUInt(writer, size))
- return false;
-
- if (SerializeInt(writer, value, static_cast<int32>(size)))
- return false;
-
- return true;
-}
-
-bool WriteEbmlElement(IMkvWriter* writer, uint64 type, int64 value) {
- if (!writer)
- return false;
-
- if (WriteID(writer, type))
- return 0;
-
- const uint64 size = GetIntSize(value);
- if (WriteUInt(writer, size))
- return false;
-
- if (SerializeInt(writer, value, static_cast<int32>(size)))
- return false;
-
- return true;
-}
-
-bool WriteEbmlElement(IMkvWriter* writer, uint64 type, float value) {
- if (!writer)
- return false;
-
- if (WriteID(writer, type))
- return false;
-
- if (WriteUInt(writer, 4))
- return false;
-
- if (SerializeFloat(writer, value))
- return false;
-
- return true;
-}
-
-bool WriteEbmlElement(IMkvWriter* writer, uint64 type, const char* value) {
- if (!writer || !value)
- return false;
-
- if (WriteID(writer, type))
- return false;
-
- const uint64 length = strlen(value);
- if (WriteUInt(writer, length))
- return false;
-
- if (writer->Write(value, static_cast<const uint32>(length)))
- return false;
-
- return true;
-}
-
-bool WriteEbmlElement(IMkvWriter* writer, uint64 type, const uint8* value,
- uint64 size) {
- if (!writer || !value || size < 1)
- return false;
-
- if (WriteID(writer, type))
- return false;
-
- if (WriteUInt(writer, size))
- return false;
-
- if (writer->Write(value, static_cast<uint32>(size)))
- return false;
-
- return true;
-}
-
-bool WriteEbmlDateElement(IMkvWriter* writer, uint64 type, int64 value) {
- if (!writer)
- return false;
-
- if (WriteID(writer, type))
- return false;
-
- if (WriteUInt(writer, kDateElementSize))
- return false;
-
- if (SerializeInt(writer, value, kDateElementSize))
- return false;
-
- return true;
-}
-
-uint64 WriteFrame(IMkvWriter* writer, const Frame* const frame,
- Cluster* cluster) {
- if (!writer || !frame || !frame->IsValid() || !cluster ||
- !cluster->timecode_scale())
- return 0;
-
- // Technically the timecode for a block can be less than the
- // timecode for the cluster itself (remember that block timecode
- // is a signed, 16-bit integer). However, as a simplification we
- // only permit non-negative cluster-relative timecodes for blocks.
- const int64 relative_timecode = cluster->GetRelativeTimecode(
- frame->timestamp() / cluster->timecode_scale());
- if (relative_timecode < 0 || relative_timecode > kMaxBlockTimecode)
- return 0;
-
- return frame->CanBeSimpleBlock() ?
- WriteSimpleBlock(writer, frame, relative_timecode) :
- WriteBlock(writer, frame, relative_timecode,
- cluster->timecode_scale());
-}
-
-uint64 WriteVoidElement(IMkvWriter* writer, uint64 size) {
- if (!writer)
- return false;
-
- // Subtract one for the void ID and the coded size.
- uint64 void_entry_size = size - 1 - GetCodedUIntSize(size - 1);
- uint64 void_size = EbmlMasterElementSize(libwebm::kMkvVoid, void_entry_size) +
- void_entry_size;
-
- if (void_size != size)
- return 0;
-
- const int64 payload_position = writer->Position();
- if (payload_position < 0)
- return 0;
-
- if (WriteID(writer, libwebm::kMkvVoid))
- return 0;
-
- if (WriteUInt(writer, void_entry_size))
- return 0;
-
- const uint8 value = 0;
- for (int32 i = 0; i < static_cast<int32>(void_entry_size); ++i) {
- if (writer->Write(&value, 1))
- return 0;
- }
-
- const int64 stop_position = writer->Position();
- if (stop_position < 0 ||
- stop_position - payload_position != static_cast<int64>(void_size))
- return 0;
-
- return void_size;
-}
-
-void GetVersion(int32* major, int32* minor, int32* build, int32* revision) {
- *major = 0;
- *minor = 2;
- *build = 1;
- *revision = 0;
-}
-
-uint64 MakeUID(unsigned int* seed) {
- uint64 uid = 0;
-
-#ifdef __MINGW32__
- srand(*seed);
-#endif
-
- for (int i = 0; i < 7; ++i) { // avoid problems with 8-byte values
- uid <<= 8;
-
-// TODO(fgalligan): Move random number generation to platform specific code.
-#ifdef _MSC_VER
- (void)seed;
- const int32 nn = rand();
-#elif __ANDROID__
- (void)seed;
- int32 temp_num = 1;
- int fd = open("/dev/urandom", O_RDONLY);
- if (fd != -1) {
- read(fd, &temp_num, sizeof(temp_num));
- close(fd);
- }
- const int32 nn = temp_num;
-#elif defined __MINGW32__
- const int32 nn = rand();
-#else
- const int32 nn = rand_r(seed);
-#endif
- const int32 n = 0xFF & (nn >> 4); // throw away low-order bits
-
- uid |= n;
- }
-
- return uid;
-}
-
-bool IsMatrixCoefficientsValueValid(uint64_t value) {
- switch (value) {
- case mkvmuxer::Colour::kGbr:
- case mkvmuxer::Colour::kBt709:
- case mkvmuxer::Colour::kUnspecifiedMc:
- case mkvmuxer::Colour::kReserved:
- case mkvmuxer::Colour::kFcc:
- case mkvmuxer::Colour::kBt470bg:
- case mkvmuxer::Colour::kSmpte170MMc:
- case mkvmuxer::Colour::kSmpte240MMc:
- case mkvmuxer::Colour::kYcocg:
- case mkvmuxer::Colour::kBt2020NonConstantLuminance:
- case mkvmuxer::Colour::kBt2020ConstantLuminance:
- return true;
- }
- return false;
-}
-
-bool IsChromaSitingHorzValueValid(uint64_t value) {
- switch (value) {
- case mkvmuxer::Colour::kUnspecifiedCsh:
- case mkvmuxer::Colour::kLeftCollocated:
- case mkvmuxer::Colour::kHalfCsh:
- return true;
- }
- return false;
-}
-
-bool IsChromaSitingVertValueValid(uint64_t value) {
- switch (value) {
- case mkvmuxer::Colour::kUnspecifiedCsv:
- case mkvmuxer::Colour::kTopCollocated:
- case mkvmuxer::Colour::kHalfCsv:
- return true;
- }
- return false;
-}
-
-bool IsColourRangeValueValid(uint64_t value) {
- switch (value) {
- case mkvmuxer::Colour::kUnspecifiedCr:
- case mkvmuxer::Colour::kBroadcastRange:
- case mkvmuxer::Colour::kFullRange:
- case mkvmuxer::Colour::kMcTcDefined:
- return true;
- }
- return false;
-}
-
-bool IsTransferCharacteristicsValueValid(uint64_t value) {
- switch (value) {
- case mkvmuxer::Colour::kIturBt709Tc:
- case mkvmuxer::Colour::kUnspecifiedTc:
- case mkvmuxer::Colour::kReservedTc:
- case mkvmuxer::Colour::kGamma22Curve:
- case mkvmuxer::Colour::kGamma28Curve:
- case mkvmuxer::Colour::kSmpte170MTc:
- case mkvmuxer::Colour::kSmpte240MTc:
- case mkvmuxer::Colour::kLinear:
- case mkvmuxer::Colour::kLog:
- case mkvmuxer::Colour::kLogSqrt:
- case mkvmuxer::Colour::kIec6196624:
- case mkvmuxer::Colour::kIturBt1361ExtendedColourGamut:
- case mkvmuxer::Colour::kIec6196621:
- case mkvmuxer::Colour::kIturBt202010bit:
- case mkvmuxer::Colour::kIturBt202012bit:
- case mkvmuxer::Colour::kSmpteSt2084:
- case mkvmuxer::Colour::kSmpteSt4281Tc:
- case mkvmuxer::Colour::kAribStdB67Hlg:
- return true;
- }
- return false;
-}
-
-bool IsPrimariesValueValid(uint64_t value) {
- switch (value) {
- case mkvmuxer::Colour::kReservedP0:
- case mkvmuxer::Colour::kIturBt709P:
- case mkvmuxer::Colour::kUnspecifiedP:
- case mkvmuxer::Colour::kReservedP3:
- case mkvmuxer::Colour::kIturBt470M:
- case mkvmuxer::Colour::kIturBt470Bg:
- case mkvmuxer::Colour::kSmpte170MP:
- case mkvmuxer::Colour::kSmpte240MP:
- case mkvmuxer::Colour::kFilm:
- case mkvmuxer::Colour::kIturBt2020:
- case mkvmuxer::Colour::kSmpteSt4281P:
- case mkvmuxer::Colour::kJedecP22Phosphors:
- return true;
- }
- return false;
-}
-
-} // namespace mkvmuxer
diff --git a/third_party/aom/third_party/libwebm/mkvmuxer/mkvmuxerutil.h b/third_party/aom/third_party/libwebm/mkvmuxer/mkvmuxerutil.h
deleted file mode 100644
index 132388da5..000000000
--- a/third_party/aom/third_party/libwebm/mkvmuxer/mkvmuxerutil.h
+++ /dev/null
@@ -1,112 +0,0 @@
-// Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the LICENSE file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-#ifndef MKVMUXER_MKVMUXERUTIL_H_
-#define MKVMUXER_MKVMUXERUTIL_H_
-
-#include "mkvmuxertypes.h"
-
-#include "stdint.h"
-
-namespace mkvmuxer {
-class Cluster;
-class Frame;
-class IMkvWriter;
-
-// TODO(tomfinegan): mkvmuxer:: integer types continue to be used here because
-// changing them causes pain for downstream projects. It would be nice if a
-// solution that allows removal of the mkvmuxer:: integer types while avoiding
-// pain for downstream users of libwebm. Considering that mkvmuxerutil.{cc,h}
-// are really, for the great majority of cases, EBML size calculation and writer
-// functions, perhaps a more EBML focused utility would be the way to go as a
-// first step.
-
-const uint64 kEbmlUnknownValue = 0x01FFFFFFFFFFFFFFULL;
-const int64 kMaxBlockTimecode = 0x07FFFLL;
-
-// Writes out |value| in Big Endian order. Returns 0 on success.
-int32 SerializeInt(IMkvWriter* writer, int64 value, int32 size);
-
-// Returns the size in bytes of the element.
-int32 GetUIntSize(uint64 value);
-int32 GetIntSize(int64 value);
-int32 GetCodedUIntSize(uint64 value);
-uint64 EbmlMasterElementSize(uint64 type, uint64 value);
-uint64 EbmlElementSize(uint64 type, int64 value);
-uint64 EbmlElementSize(uint64 type, uint64 value);
-uint64 EbmlElementSize(uint64 type, float value);
-uint64 EbmlElementSize(uint64 type, const char* value);
-uint64 EbmlElementSize(uint64 type, const uint8* value, uint64 size);
-uint64 EbmlDateElementSize(uint64 type);
-
-// Returns the size in bytes of the element assuming that the element was
-// written using |fixed_size| bytes. If |fixed_size| is set to zero, then it
-// computes the necessary number of bytes based on |value|.
-uint64 EbmlElementSize(uint64 type, uint64 value, uint64 fixed_size);
-
-// Creates an EBML coded number from |value| and writes it out. The size of
-// the coded number is determined by the value of |value|. |value| must not
-// be in a coded form. Returns 0 on success.
-int32 WriteUInt(IMkvWriter* writer, uint64 value);
-
-// Creates an EBML coded number from |value| and writes it out. The size of
-// the coded number is determined by the value of |size|. |value| must not
-// be in a coded form. Returns 0 on success.
-int32 WriteUIntSize(IMkvWriter* writer, uint64 value, int32 size);
-
-// Output an Mkv master element. Returns true if the element was written.
-bool WriteEbmlMasterElement(IMkvWriter* writer, uint64 value, uint64 size);
-
-// Outputs an Mkv ID, calls |IMkvWriter::ElementStartNotify|, and passes the
-// ID to |SerializeInt|. Returns 0 on success.
-int32 WriteID(IMkvWriter* writer, uint64 type);
-
-// Output an Mkv non-master element. Returns true if the element was written.
-bool WriteEbmlElement(IMkvWriter* writer, uint64 type, uint64 value);
-bool WriteEbmlElement(IMkvWriter* writer, uint64 type, int64 value);
-bool WriteEbmlElement(IMkvWriter* writer, uint64 type, float value);
-bool WriteEbmlElement(IMkvWriter* writer, uint64 type, const char* value);
-bool WriteEbmlElement(IMkvWriter* writer, uint64 type, const uint8* value,
- uint64 size);
-bool WriteEbmlDateElement(IMkvWriter* writer, uint64 type, int64 value);
-
-// Output an Mkv non-master element using fixed size. The element will be
-// written out using exactly |fixed_size| bytes. If |fixed_size| is set to zero
-// then it computes the necessary number of bytes based on |value|. Returns true
-// if the element was written.
-bool WriteEbmlElement(IMkvWriter* writer, uint64 type, uint64 value,
- uint64 fixed_size);
-
-// Output a Mkv Frame. It decides the correct element to write (Block vs
-// SimpleBlock) based on the parameters of the Frame.
-uint64 WriteFrame(IMkvWriter* writer, const Frame* const frame,
- Cluster* cluster);
-
-// Output a void element. |size| must be the entire size in bytes that will be
-// void. The function will calculate the size of the void header and subtract
-// it from |size|.
-uint64 WriteVoidElement(IMkvWriter* writer, uint64 size);
-
-// Returns the version number of the muxer in |major|, |minor|, |build|,
-// and |revision|.
-void GetVersion(int32* major, int32* minor, int32* build, int32* revision);
-
-// Returns a random number to be used for UID, using |seed| to seed
-// the random-number generator (see POSIX rand_r() for semantics).
-uint64 MakeUID(unsigned int* seed);
-
-// Colour field validation helpers. All return true when |value| is valid.
-bool IsMatrixCoefficientsValueValid(uint64_t value);
-bool IsChromaSitingHorzValueValid(uint64_t value);
-bool IsChromaSitingVertValueValid(uint64_t value);
-bool IsColourRangeValueValid(uint64_t value);
-bool IsTransferCharacteristicsValueValid(uint64_t value);
-bool IsPrimariesValueValid(uint64_t value);
-
-} // namespace mkvmuxer
-
-#endif // MKVMUXER_MKVMUXERUTIL_H_
diff --git a/third_party/aom/third_party/libwebm/mkvmuxer/mkvwriter.cc b/third_party/aom/third_party/libwebm/mkvmuxer/mkvwriter.cc
deleted file mode 100644
index 84655d802..000000000
--- a/third_party/aom/third_party/libwebm/mkvmuxer/mkvwriter.cc
+++ /dev/null
@@ -1,90 +0,0 @@
-// Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the LICENSE file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-
-#include "mkvmuxer/mkvwriter.h"
-
-#include <sys/types.h>
-
-#ifdef _MSC_VER
-#include <share.h> // for _SH_DENYWR
-#endif
-
-namespace mkvmuxer {
-
-MkvWriter::MkvWriter() : file_(NULL), writer_owns_file_(true) {}
-
-MkvWriter::MkvWriter(FILE* fp) : file_(fp), writer_owns_file_(false) {}
-
-MkvWriter::~MkvWriter() { Close(); }
-
-int32 MkvWriter::Write(const void* buffer, uint32 length) {
- if (!file_)
- return -1;
-
- if (length == 0)
- return 0;
-
- if (buffer == NULL)
- return -1;
-
- const size_t bytes_written = fwrite(buffer, 1, length, file_);
-
- return (bytes_written == length) ? 0 : -1;
-}
-
-bool MkvWriter::Open(const char* filename) {
- if (filename == NULL)
- return false;
-
- if (file_)
- return false;
-
-#ifdef _MSC_VER
- file_ = _fsopen(filename, "wb", _SH_DENYWR);
-#else
- file_ = fopen(filename, "wb");
-#endif
- if (file_ == NULL)
- return false;
- return true;
-}
-
-void MkvWriter::Close() {
- if (file_ && writer_owns_file_) {
- fclose(file_);
- }
- file_ = NULL;
-}
-
-int64 MkvWriter::Position() const {
- if (!file_)
- return 0;
-
-#ifdef _MSC_VER
- return _ftelli64(file_);
-#else
- return ftell(file_);
-#endif
-}
-
-int32 MkvWriter::Position(int64 position) {
- if (!file_)
- return -1;
-
-#ifdef _MSC_VER
- return _fseeki64(file_, position, SEEK_SET);
-#else
- return fseeko(file_, static_cast<off_t>(position), SEEK_SET);
-#endif
-}
-
-bool MkvWriter::Seekable() const { return true; }
-
-void MkvWriter::ElementStartNotify(uint64, int64) {}
-
-} // namespace mkvmuxer
diff --git a/third_party/aom/third_party/libwebm/mkvmuxer/mkvwriter.h b/third_party/aom/third_party/libwebm/mkvmuxer/mkvwriter.h
deleted file mode 100644
index 4227c6374..000000000
--- a/third_party/aom/third_party/libwebm/mkvmuxer/mkvwriter.h
+++ /dev/null
@@ -1,51 +0,0 @@
-// Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the LICENSE file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-
-#ifndef MKVMUXER_MKVWRITER_H_
-#define MKVMUXER_MKVWRITER_H_
-
-#include <stdio.h>
-
-#include "mkvmuxer/mkvmuxer.h"
-#include "mkvmuxer/mkvmuxertypes.h"
-
-namespace mkvmuxer {
-
-// Default implementation of the IMkvWriter interface on Windows.
-class MkvWriter : public IMkvWriter {
- public:
- MkvWriter();
- explicit MkvWriter(FILE* fp);
- virtual ~MkvWriter();
-
- // IMkvWriter interface
- virtual int64 Position() const;
- virtual int32 Position(int64 position);
- virtual bool Seekable() const;
- virtual int32 Write(const void* buffer, uint32 length);
- virtual void ElementStartNotify(uint64 element_id, int64 position);
-
- // Creates and opens a file for writing. |filename| is the name of the file
- // to open. This function will overwrite the contents of |filename|. Returns
- // true on success.
- bool Open(const char* filename);
-
- // Closes an opened file.
- void Close();
-
- private:
- // File handle to output file.
- FILE* file_;
- bool writer_owns_file_;
-
- LIBWEBM_DISALLOW_COPY_AND_ASSIGN(MkvWriter);
-};
-
-} // namespace mkvmuxer
-
-#endif // MKVMUXER_MKVWRITER_H_
diff --git a/third_party/aom/third_party/libwebm/mkvparser/mkvparser.cc b/third_party/aom/third_party/libwebm/mkvparser/mkvparser.cc
deleted file mode 100644
index e7b76f7da..000000000
--- a/third_party/aom/third_party/libwebm/mkvparser/mkvparser.cc
+++ /dev/null
@@ -1,8049 +0,0 @@
-// Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the LICENSE file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-#include "mkvparser/mkvparser.h"
-
-#if defined(_MSC_VER) && _MSC_VER < 1800
-#include <float.h> // _isnan() / _finite()
-#define MSC_COMPAT
-#endif
-
-#include <cassert>
-#include <cfloat>
-#include <climits>
-#include <cmath>
-#include <cstring>
-#include <memory>
-#include <new>
-
-#include "common/webmids.h"
-
-namespace mkvparser {
-const long long kStringElementSizeLimit = 20 * 1000 * 1000;
-const float MasteringMetadata::kValueNotPresent = FLT_MAX;
-const long long Colour::kValueNotPresent = LLONG_MAX;
-const float Projection::kValueNotPresent = FLT_MAX;
-
-#ifdef MSC_COMPAT
-inline bool isnan(double val) { return !!_isnan(val); }
-inline bool isinf(double val) { return !_finite(val); }
-#else
-inline bool isnan(double val) { return std::isnan(val); }
-inline bool isinf(double val) { return std::isinf(val); }
-#endif // MSC_COMPAT
-
-IMkvReader::~IMkvReader() {}
-
-template <typename Type>
-Type* SafeArrayAlloc(unsigned long long num_elements,
- unsigned long long element_size) {
- if (num_elements == 0 || element_size == 0)
- return NULL;
-
- const size_t kMaxAllocSize = 0x80000000; // 2GiB
- const unsigned long long num_bytes = num_elements * element_size;
- if (element_size > (kMaxAllocSize / num_elements))
- return NULL;
- if (num_bytes != static_cast<size_t>(num_bytes))
- return NULL;
-
- return new (std::nothrow) Type[static_cast<size_t>(num_bytes)];
-}
-
-void GetVersion(int& major, int& minor, int& build, int& revision) {
- major = 1;
- minor = 0;
- build = 0;
- revision = 30;
-}
-
-long long ReadUInt(IMkvReader* pReader, long long pos, long& len) {
- if (!pReader || pos < 0)
- return E_FILE_FORMAT_INVALID;
-
- len = 1;
- unsigned char b;
- int status = pReader->Read(pos, 1, &b);
-
- if (status < 0) // error or underflow
- return status;
-
- if (status > 0) // interpreted as "underflow"
- return E_BUFFER_NOT_FULL;
-
- if (b == 0) // we can't handle u-int values larger than 8 bytes
- return E_FILE_FORMAT_INVALID;
-
- unsigned char m = 0x80;
-
- while (!(b & m)) {
- m >>= 1;
- ++len;
- }
-
- long long result = b & (~m);
- ++pos;
-
- for (int i = 1; i < len; ++i) {
- status = pReader->Read(pos, 1, &b);
-
- if (status < 0) {
- len = 1;
- return status;
- }
-
- if (status > 0) {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
-
- result <<= 8;
- result |= b;
-
- ++pos;
- }
-
- return result;
-}
-
-// Reads an EBML ID and returns it.
-// An ID must at least 1 byte long, cannot exceed 4, and its value must be
-// greater than 0.
-// See known EBML values and EBMLMaxIDLength:
-// http://www.matroska.org/technical/specs/index.html
-// Returns the ID, or a value less than 0 to report an error while reading the
-// ID.
-long long ReadID(IMkvReader* pReader, long long pos, long& len) {
- if (pReader == NULL || pos < 0)
- return E_FILE_FORMAT_INVALID;
-
- // Read the first byte. The length in bytes of the ID is determined by
- // finding the first set bit in the first byte of the ID.
- unsigned char temp_byte = 0;
- int read_status = pReader->Read(pos, 1, &temp_byte);
-
- if (read_status < 0)
- return E_FILE_FORMAT_INVALID;
- else if (read_status > 0) // No data to read.
- return E_BUFFER_NOT_FULL;
-
- if (temp_byte == 0) // ID length > 8 bytes; invalid file.
- return E_FILE_FORMAT_INVALID;
-
- int bit_pos = 0;
- const int kMaxIdLengthInBytes = 4;
- const int kCheckByte = 0x80;
-
- // Find the first bit that's set.
- bool found_bit = false;
- for (; bit_pos < kMaxIdLengthInBytes; ++bit_pos) {
- if ((kCheckByte >> bit_pos) & temp_byte) {
- found_bit = true;
- break;
- }
- }
-
- if (!found_bit) {
- // The value is too large to be a valid ID.
- return E_FILE_FORMAT_INVALID;
- }
-
- // Read the remaining bytes of the ID (if any).
- const int id_length = bit_pos + 1;
- long long ebml_id = temp_byte;
- for (int i = 1; i < id_length; ++i) {
- ebml_id <<= 8;
- read_status = pReader->Read(pos + i, 1, &temp_byte);
-
- if (read_status < 0)
- return E_FILE_FORMAT_INVALID;
- else if (read_status > 0)
- return E_BUFFER_NOT_FULL;
-
- ebml_id |= temp_byte;
- }
-
- len = id_length;
- return ebml_id;
-}
-
-long long GetUIntLength(IMkvReader* pReader, long long pos, long& len) {
- if (!pReader || pos < 0)
- return E_FILE_FORMAT_INVALID;
-
- long long total, available;
-
- int status = pReader->Length(&total, &available);
- if (status < 0 || (total >= 0 && available > total))
- return E_FILE_FORMAT_INVALID;
-
- len = 1;
-
- if (pos >= available)
- return pos; // too few bytes available
-
- unsigned char b;
-
- status = pReader->Read(pos, 1, &b);
-
- if (status != 0)
- return status;
-
- if (b == 0) // we can't handle u-int values larger than 8 bytes
- return E_FILE_FORMAT_INVALID;
-
- unsigned char m = 0x80;
-
- while (!(b & m)) {
- m >>= 1;
- ++len;
- }
-
- return 0; // success
-}
-
-// TODO(vigneshv): This function assumes that unsigned values never have their
-// high bit set.
-long long UnserializeUInt(IMkvReader* pReader, long long pos, long long size) {
- if (!pReader || pos < 0 || (size <= 0) || (size > 8))
- return E_FILE_FORMAT_INVALID;
-
- long long result = 0;
-
- for (long long i = 0; i < size; ++i) {
- unsigned char b;
-
- const long status = pReader->Read(pos, 1, &b);
-
- if (status < 0)
- return status;
-
- result <<= 8;
- result |= b;
-
- ++pos;
- }
-
- return result;
-}
-
-long UnserializeFloat(IMkvReader* pReader, long long pos, long long size_,
- double& result) {
- if (!pReader || pos < 0 || ((size_ != 4) && (size_ != 8)))
- return E_FILE_FORMAT_INVALID;
-
- const long size = static_cast<long>(size_);
-
- unsigned char buf[8];
-
- const int status = pReader->Read(pos, size, buf);
-
- if (status < 0) // error
- return status;
-
- if (size == 4) {
- union {
- float f;
- unsigned long ff;
- };
-
- ff = 0;
-
- for (int i = 0;;) {
- ff |= buf[i];
-
- if (++i >= 4)
- break;
-
- ff <<= 8;
- }
-
- result = f;
- } else {
- union {
- double d;
- unsigned long long dd;
- };
-
- dd = 0;
-
- for (int i = 0;;) {
- dd |= buf[i];
-
- if (++i >= 8)
- break;
-
- dd <<= 8;
- }
-
- result = d;
- }
-
- if (mkvparser::isinf(result) || mkvparser::isnan(result))
- return E_FILE_FORMAT_INVALID;
-
- return 0;
-}
-
-long UnserializeInt(IMkvReader* pReader, long long pos, long long size,
- long long& result_ref) {
- if (!pReader || pos < 0 || size < 1 || size > 8)
- return E_FILE_FORMAT_INVALID;
-
- signed char first_byte = 0;
- const long status = pReader->Read(pos, 1, (unsigned char*)&first_byte);
-
- if (status < 0)
- return status;
-
- unsigned long long result = first_byte;
- ++pos;
-
- for (long i = 1; i < size; ++i) {
- unsigned char b;
-
- const long status = pReader->Read(pos, 1, &b);
-
- if (status < 0)
- return status;
-
- result <<= 8;
- result |= b;
-
- ++pos;
- }
-
- result_ref = static_cast<long long>(result);
- return 0;
-}
-
-long UnserializeString(IMkvReader* pReader, long long pos, long long size,
- char*& str) {
- delete[] str;
- str = NULL;
-
- if (size >= LONG_MAX || size < 0 || size > kStringElementSizeLimit)
- return E_FILE_FORMAT_INVALID;
-
- // +1 for '\0' terminator
- const long required_size = static_cast<long>(size) + 1;
-
- str = SafeArrayAlloc<char>(1, required_size);
- if (str == NULL)
- return E_FILE_FORMAT_INVALID;
-
- unsigned char* const buf = reinterpret_cast<unsigned char*>(str);
-
- const long status = pReader->Read(pos, static_cast<long>(size), buf);
-
- if (status) {
- delete[] str;
- str = NULL;
-
- return status;
- }
-
- str[required_size - 1] = '\0';
- return 0;
-}
-
-long ParseElementHeader(IMkvReader* pReader, long long& pos, long long stop,
- long long& id, long long& size) {
- if (stop >= 0 && pos >= stop)
- return E_FILE_FORMAT_INVALID;
-
- long len;
-
- id = ReadID(pReader, pos, len);
-
- if (id < 0)
- return E_FILE_FORMAT_INVALID;
-
- pos += len; // consume id
-
- if (stop >= 0 && pos >= stop)
- return E_FILE_FORMAT_INVALID;
-
- size = ReadUInt(pReader, pos, len);
-
- if (size < 0 || len < 1 || len > 8) {
- // Invalid: Negative payload size, negative or 0 length integer, or integer
- // larger than 64 bits (libwebm cannot handle them).
- return E_FILE_FORMAT_INVALID;
- }
-
- // Avoid rolling over pos when very close to LLONG_MAX.
- const unsigned long long rollover_check =
- static_cast<unsigned long long>(pos) + len;
- if (rollover_check > LLONG_MAX)
- return E_FILE_FORMAT_INVALID;
-
- pos += len; // consume length of size
-
- // pos now designates payload
-
- if (stop >= 0 && pos > stop)
- return E_FILE_FORMAT_INVALID;
-
- return 0; // success
-}
-
-bool Match(IMkvReader* pReader, long long& pos, unsigned long expected_id,
- long long& val) {
- if (!pReader || pos < 0)
- return false;
-
- long long total = 0;
- long long available = 0;
-
- const long status = pReader->Length(&total, &available);
- if (status < 0 || (total >= 0 && available > total))
- return false;
-
- long len = 0;
-
- const long long id = ReadID(pReader, pos, len);
- if (id < 0 || (available - pos) > len)
- return false;
-
- if (static_cast<unsigned long>(id) != expected_id)
- return false;
-
- pos += len; // consume id
-
- const long long size = ReadUInt(pReader, pos, len);
- if (size < 0 || size > 8 || len < 1 || len > 8 || (available - pos) > len)
- return false;
-
- pos += len; // consume length of size of payload
-
- val = UnserializeUInt(pReader, pos, size);
- if (val < 0)
- return false;
-
- pos += size; // consume size of payload
-
- return true;
-}
-
-bool Match(IMkvReader* pReader, long long& pos, unsigned long expected_id,
- unsigned char*& buf, size_t& buflen) {
- if (!pReader || pos < 0)
- return false;
-
- long long total = 0;
- long long available = 0;
-
- long status = pReader->Length(&total, &available);
- if (status < 0 || (total >= 0 && available > total))
- return false;
-
- long len = 0;
- const long long id = ReadID(pReader, pos, len);
- if (id < 0 || (available - pos) > len)
- return false;
-
- if (static_cast<unsigned long>(id) != expected_id)
- return false;
-
- pos += len; // consume id
-
- const long long size = ReadUInt(pReader, pos, len);
- if (size < 0 || len <= 0 || len > 8 || (available - pos) > len)
- return false;
-
- unsigned long long rollover_check =
- static_cast<unsigned long long>(pos) + len;
- if (rollover_check > LLONG_MAX)
- return false;
-
- pos += len; // consume length of size of payload
-
- rollover_check = static_cast<unsigned long long>(pos) + size;
- if (rollover_check > LLONG_MAX)
- return false;
-
- if ((pos + size) > available)
- return false;
-
- if (size >= LONG_MAX)
- return false;
-
- const long buflen_ = static_cast<long>(size);
-
- buf = SafeArrayAlloc<unsigned char>(1, buflen_);
- if (!buf)
- return false;
-
- status = pReader->Read(pos, buflen_, buf);
- if (status != 0)
- return false;
-
- buflen = buflen_;
-
- pos += size; // consume size of payload
- return true;
-}
-
-EBMLHeader::EBMLHeader() : m_docType(NULL) { Init(); }
-
-EBMLHeader::~EBMLHeader() { delete[] m_docType; }
-
-void EBMLHeader::Init() {
- m_version = 1;
- m_readVersion = 1;
- m_maxIdLength = 4;
- m_maxSizeLength = 8;
-
- if (m_docType) {
- delete[] m_docType;
- m_docType = NULL;
- }
-
- m_docTypeVersion = 1;
- m_docTypeReadVersion = 1;
-}
-
-long long EBMLHeader::Parse(IMkvReader* pReader, long long& pos) {
- if (!pReader)
- return E_FILE_FORMAT_INVALID;
-
- long long total, available;
-
- long status = pReader->Length(&total, &available);
-
- if (status < 0) // error
- return status;
-
- pos = 0;
-
- // Scan until we find what looks like the first byte of the EBML header.
- const long long kMaxScanBytes = (available >= 1024) ? 1024 : available;
- const unsigned char kEbmlByte0 = 0x1A;
- unsigned char scan_byte = 0;
-
- while (pos < kMaxScanBytes) {
- status = pReader->Read(pos, 1, &scan_byte);
-
- if (status < 0) // error
- return status;
- else if (status > 0)
- return E_BUFFER_NOT_FULL;
-
- if (scan_byte == kEbmlByte0)
- break;
-
- ++pos;
- }
-
- long len = 0;
- const long long ebml_id = ReadID(pReader, pos, len);
-
- if (ebml_id == E_BUFFER_NOT_FULL)
- return E_BUFFER_NOT_FULL;
-
- if (len != 4 || ebml_id != libwebm::kMkvEBML)
- return E_FILE_FORMAT_INVALID;
-
- // Move read pos forward to the EBML header size field.
- pos += 4;
-
- // Read length of size field.
- long long result = GetUIntLength(pReader, pos, len);
-
- if (result < 0) // error
- return E_FILE_FORMAT_INVALID;
- else if (result > 0) // need more data
- return E_BUFFER_NOT_FULL;
-
- if (len < 1 || len > 8)
- return E_FILE_FORMAT_INVALID;
-
- if ((total >= 0) && ((total - pos) < len))
- return E_FILE_FORMAT_INVALID;
-
- if ((available - pos) < len)
- return pos + len; // try again later
-
- // Read the EBML header size.
- result = ReadUInt(pReader, pos, len);
-
- if (result < 0) // error
- return result;
-
- pos += len; // consume size field
-
- // pos now designates start of payload
-
- if ((total >= 0) && ((total - pos) < result))
- return E_FILE_FORMAT_INVALID;
-
- if ((available - pos) < result)
- return pos + result;
-
- const long long end = pos + result;
-
- Init();
-
- while (pos < end) {
- long long id, size;
-
- status = ParseElementHeader(pReader, pos, end, id, size);
-
- if (status < 0) // error
- return status;
-
- if (size == 0)
- return E_FILE_FORMAT_INVALID;
-
- if (id == libwebm::kMkvEBMLVersion) {
- m_version = UnserializeUInt(pReader, pos, size);
-
- if (m_version <= 0)
- return E_FILE_FORMAT_INVALID;
- } else if (id == libwebm::kMkvEBMLReadVersion) {
- m_readVersion = UnserializeUInt(pReader, pos, size);
-
- if (m_readVersion <= 0)
- return E_FILE_FORMAT_INVALID;
- } else if (id == libwebm::kMkvEBMLMaxIDLength) {
- m_maxIdLength = UnserializeUInt(pReader, pos, size);
-
- if (m_maxIdLength <= 0)
- return E_FILE_FORMAT_INVALID;
- } else if (id == libwebm::kMkvEBMLMaxSizeLength) {
- m_maxSizeLength = UnserializeUInt(pReader, pos, size);
-
- if (m_maxSizeLength <= 0)
- return E_FILE_FORMAT_INVALID;
- } else if (id == libwebm::kMkvDocType) {
- if (m_docType)
- return E_FILE_FORMAT_INVALID;
-
- status = UnserializeString(pReader, pos, size, m_docType);
-
- if (status) // error
- return status;
- } else if (id == libwebm::kMkvDocTypeVersion) {
- m_docTypeVersion = UnserializeUInt(pReader, pos, size);
-
- if (m_docTypeVersion <= 0)
- return E_FILE_FORMAT_INVALID;
- } else if (id == libwebm::kMkvDocTypeReadVersion) {
- m_docTypeReadVersion = UnserializeUInt(pReader, pos, size);
-
- if (m_docTypeReadVersion <= 0)
- return E_FILE_FORMAT_INVALID;
- }
-
- pos += size;
- }
-
- if (pos != end)
- return E_FILE_FORMAT_INVALID;
-
- // Make sure DocType, DocTypeReadVersion, and DocTypeVersion are valid.
- if (m_docType == NULL || m_docTypeReadVersion <= 0 || m_docTypeVersion <= 0)
- return E_FILE_FORMAT_INVALID;
-
- // Make sure EBMLMaxIDLength and EBMLMaxSizeLength are valid.
- if (m_maxIdLength <= 0 || m_maxIdLength > 4 || m_maxSizeLength <= 0 ||
- m_maxSizeLength > 8)
- return E_FILE_FORMAT_INVALID;
-
- return 0;
-}
-
-Segment::Segment(IMkvReader* pReader, long long elem_start,
- // long long elem_size,
- long long start, long long size)
- : m_pReader(pReader),
- m_element_start(elem_start),
- // m_element_size(elem_size),
- m_start(start),
- m_size(size),
- m_pos(start),
- m_pUnknownSize(0),
- m_pSeekHead(NULL),
- m_pInfo(NULL),
- m_pTracks(NULL),
- m_pCues(NULL),
- m_pChapters(NULL),
- m_pTags(NULL),
- m_clusters(NULL),
- m_clusterCount(0),
- m_clusterPreloadCount(0),
- m_clusterSize(0) {}
-
-Segment::~Segment() {
- const long count = m_clusterCount + m_clusterPreloadCount;
-
- Cluster** i = m_clusters;
- Cluster** j = m_clusters + count;
-
- while (i != j) {
- Cluster* const p = *i++;
- delete p;
- }
-
- delete[] m_clusters;
-
- delete m_pTracks;
- delete m_pInfo;
- delete m_pCues;
- delete m_pChapters;
- delete m_pTags;
- delete m_pSeekHead;
-}
-
-long long Segment::CreateInstance(IMkvReader* pReader, long long pos,
- Segment*& pSegment) {
- if (pReader == NULL || pos < 0)
- return E_PARSE_FAILED;
-
- pSegment = NULL;
-
- long long total, available;
-
- const long status = pReader->Length(&total, &available);
-
- if (status < 0) // error
- return status;
-
- if (available < 0)
- return -1;
-
- if ((total >= 0) && (available > total))
- return -1;
-
- // I would assume that in practice this loop would execute
- // exactly once, but we allow for other elements (e.g. Void)
- // to immediately follow the EBML header. This is fine for
- // the source filter case (since the entire file is available),
- // but in the splitter case over a network we should probably
- // just give up early. We could for example decide only to
- // execute this loop a maximum of, say, 10 times.
- // TODO:
- // There is an implied "give up early" by only parsing up
- // to the available limit. We do do that, but only if the
- // total file size is unknown. We could decide to always
- // use what's available as our limit (irrespective of whether
- // we happen to know the total file length). This would have
- // as its sense "parse this much of the file before giving up",
- // which a slightly different sense from "try to parse up to
- // 10 EMBL elements before giving up".
-
- for (;;) {
- if ((total >= 0) && (pos >= total))
- return E_FILE_FORMAT_INVALID;
-
- // Read ID
- long len;
- long long result = GetUIntLength(pReader, pos, len);
-
- if (result) // error, or too few available bytes
- return result;
-
- if ((total >= 0) && ((pos + len) > total))
- return E_FILE_FORMAT_INVALID;
-
- if ((pos + len) > available)
- return pos + len;
-
- const long long idpos = pos;
- const long long id = ReadID(pReader, pos, len);
-
- if (id < 0)
- return E_FILE_FORMAT_INVALID;
-
- pos += len; // consume ID
-
- // Read Size
-
- result = GetUIntLength(pReader, pos, len);
-
- if (result) // error, or too few available bytes
- return result;
-
- if ((total >= 0) && ((pos + len) > total))
- return E_FILE_FORMAT_INVALID;
-
- if ((pos + len) > available)
- return pos + len;
-
- long long size = ReadUInt(pReader, pos, len);
-
- if (size < 0) // error
- return size;
-
- pos += len; // consume length of size of element
-
- // Pos now points to start of payload
-
- // Handle "unknown size" for live streaming of webm files.
- const long long unknown_size = (1LL << (7 * len)) - 1;
-
- if (id == libwebm::kMkvSegment) {
- if (size == unknown_size)
- size = -1;
-
- else if (total < 0)
- size = -1;
-
- else if ((pos + size) > total)
- size = -1;
-
- pSegment = new (std::nothrow) Segment(pReader, idpos, pos, size);
- if (pSegment == NULL)
- return E_PARSE_FAILED;
-
- return 0; // success
- }
-
- if (size == unknown_size)
- return E_FILE_FORMAT_INVALID;
-
- if ((total >= 0) && ((pos + size) > total))
- return E_FILE_FORMAT_INVALID;
-
- if ((pos + size) > available)
- return pos + size;
-
- pos += size; // consume payload
- }
-}
-
-long long Segment::ParseHeaders() {
- // Outermost (level 0) segment object has been constructed,
- // and pos designates start of payload. We need to find the
- // inner (level 1) elements.
- long long total, available;
-
- const int status = m_pReader->Length(&total, &available);
-
- if (status < 0) // error
- return status;
-
- if (total > 0 && available > total)
- return E_FILE_FORMAT_INVALID;
-
- const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size;
-
- if ((segment_stop >= 0 && total >= 0 && segment_stop > total) ||
- (segment_stop >= 0 && m_pos > segment_stop)) {
- return E_FILE_FORMAT_INVALID;
- }
-
- for (;;) {
- if ((total >= 0) && (m_pos >= total))
- break;
-
- if ((segment_stop >= 0) && (m_pos >= segment_stop))
- break;
-
- long long pos = m_pos;
- const long long element_start = pos;
-
- // Avoid rolling over pos when very close to LLONG_MAX.
- unsigned long long rollover_check = pos + 1ULL;
- if (rollover_check > LLONG_MAX)
- return E_FILE_FORMAT_INVALID;
-
- if ((pos + 1) > available)
- return (pos + 1);
-
- long len;
- long long result = GetUIntLength(m_pReader, pos, len);
-
- if (result < 0) // error
- return result;
-
- if (result > 0) {
- // MkvReader doesn't have enough data to satisfy this read attempt.
- return (pos + 1);
- }
-
- if ((segment_stop >= 0) && ((pos + len) > segment_stop))
- return E_FILE_FORMAT_INVALID;
-
- if ((pos + len) > available)
- return pos + len;
-
- const long long idpos = pos;
- const long long id = ReadID(m_pReader, idpos, len);
-
- if (id < 0)
- return E_FILE_FORMAT_INVALID;
-
- if (id == libwebm::kMkvCluster)
- break;
-
- pos += len; // consume ID
-
- if ((pos + 1) > available)
- return (pos + 1);
-
- // Read Size
- result = GetUIntLength(m_pReader, pos, len);
-
- if (result < 0) // error
- return result;
-
- if (result > 0) {
- // MkvReader doesn't have enough data to satisfy this read attempt.
- return (pos + 1);
- }
-
- if ((segment_stop >= 0) && ((pos + len) > segment_stop))
- return E_FILE_FORMAT_INVALID;
-
- if ((pos + len) > available)
- return pos + len;
-
- const long long size = ReadUInt(m_pReader, pos, len);
-
- if (size < 0 || len < 1 || len > 8) {
- // TODO(tomfinegan): ReadUInt should return an error when len is < 1 or
- // len > 8 is true instead of checking this _everywhere_.
- return size;
- }
-
- pos += len; // consume length of size of element
-
- // Avoid rolling over pos when very close to LLONG_MAX.
- rollover_check = static_cast<unsigned long long>(pos) + size;
- if (rollover_check > LLONG_MAX)
- return E_FILE_FORMAT_INVALID;
-
- const long long element_size = size + pos - element_start;
-
- // Pos now points to start of payload
-
- if ((segment_stop >= 0) && ((pos + size) > segment_stop))
- return E_FILE_FORMAT_INVALID;
-
- // We read EBML elements either in total or nothing at all.
-
- if ((pos + size) > available)
- return pos + size;
-
- if (id == libwebm::kMkvInfo) {
- if (m_pInfo)
- return E_FILE_FORMAT_INVALID;
-
- m_pInfo = new (std::nothrow)
- SegmentInfo(this, pos, size, element_start, element_size);
-
- if (m_pInfo == NULL)
- return -1;
-
- const long status = m_pInfo->Parse();
-
- if (status)
- return status;
- } else if (id == libwebm::kMkvTracks) {
- if (m_pTracks)
- return E_FILE_FORMAT_INVALID;
-
- m_pTracks = new (std::nothrow)
- Tracks(this, pos, size, element_start, element_size);
-
- if (m_pTracks == NULL)
- return -1;
-
- const long status = m_pTracks->Parse();
-
- if (status)
- return status;
- } else if (id == libwebm::kMkvCues) {
- if (m_pCues == NULL) {
- m_pCues = new (std::nothrow)
- Cues(this, pos, size, element_start, element_size);
-
- if (m_pCues == NULL)
- return -1;
- }
- } else if (id == libwebm::kMkvSeekHead) {
- if (m_pSeekHead == NULL) {
- m_pSeekHead = new (std::nothrow)
- SeekHead(this, pos, size, element_start, element_size);
-
- if (m_pSeekHead == NULL)
- return -1;
-
- const long status = m_pSeekHead->Parse();
-
- if (status)
- return status;
- }
- } else if (id == libwebm::kMkvChapters) {
- if (m_pChapters == NULL) {
- m_pChapters = new (std::nothrow)
- Chapters(this, pos, size, element_start, element_size);
-
- if (m_pChapters == NULL)
- return -1;
-
- const long status = m_pChapters->Parse();
-
- if (status)
- return status;
- }
- } else if (id == libwebm::kMkvTags) {
- if (m_pTags == NULL) {
- m_pTags = new (std::nothrow)
- Tags(this, pos, size, element_start, element_size);
-
- if (m_pTags == NULL)
- return -1;
-
- const long status = m_pTags->Parse();
-
- if (status)
- return status;
- }
- }
-
- m_pos = pos + size; // consume payload
- }
-
- if (segment_stop >= 0 && m_pos > segment_stop)
- return E_FILE_FORMAT_INVALID;
-
- if (m_pInfo == NULL) // TODO: liberalize this behavior
- return E_FILE_FORMAT_INVALID;
-
- if (m_pTracks == NULL)
- return E_FILE_FORMAT_INVALID;
-
- return 0; // success
-}
-
-long Segment::LoadCluster(long long& pos, long& len) {
- for (;;) {
- const long result = DoLoadCluster(pos, len);
-
- if (result <= 1)
- return result;
- }
-}
-
-long Segment::DoLoadCluster(long long& pos, long& len) {
- if (m_pos < 0)
- return DoLoadClusterUnknownSize(pos, len);
-
- long long total, avail;
-
- long status = m_pReader->Length(&total, &avail);
-
- if (status < 0) // error
- return status;
-
- if (total >= 0 && avail > total)
- return E_FILE_FORMAT_INVALID;
-
- const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size;
-
- long long cluster_off = -1; // offset relative to start of segment
- long long cluster_size = -1; // size of cluster payload
-
- for (;;) {
- if ((total >= 0) && (m_pos >= total))
- return 1; // no more clusters
-
- if ((segment_stop >= 0) && (m_pos >= segment_stop))
- return 1; // no more clusters
-
- pos = m_pos;
-
- // Read ID
-
- if ((pos + 1) > avail) {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
-
- long long result = GetUIntLength(m_pReader, pos, len);
-
- if (result < 0) // error
- return static_cast<long>(result);
-
- if (result > 0)
- return E_BUFFER_NOT_FULL;
-
- if ((segment_stop >= 0) && ((pos + len) > segment_stop))
- return E_FILE_FORMAT_INVALID;
-
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
-
- const long long idpos = pos;
- const long long id = ReadID(m_pReader, idpos, len);
-
- if (id < 0)
- return E_FILE_FORMAT_INVALID;
-
- pos += len; // consume ID
-
- // Read Size
-
- if ((pos + 1) > avail) {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
-
- result = GetUIntLength(m_pReader, pos, len);
-
- if (result < 0) // error
- return static_cast<long>(result);
-
- if (result > 0)
- return E_BUFFER_NOT_FULL;
-
- if ((segment_stop >= 0) && ((pos + len) > segment_stop))
- return E_FILE_FORMAT_INVALID;
-
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
-
- const long long size = ReadUInt(m_pReader, pos, len);
-
- if (size < 0) // error
- return static_cast<long>(size);
-
- pos += len; // consume length of size of element
-
- // pos now points to start of payload
-
- if (size == 0) {
- // Missing element payload: move on.
- m_pos = pos;
- continue;
- }
-
- const long long unknown_size = (1LL << (7 * len)) - 1;
-
- if ((segment_stop >= 0) && (size != unknown_size) &&
- ((pos + size) > segment_stop)) {
- return E_FILE_FORMAT_INVALID;
- }
-
- if (id == libwebm::kMkvCues) {
- if (size == unknown_size) {
- // Cues element of unknown size: Not supported.
- return E_FILE_FORMAT_INVALID;
- }
-
- if (m_pCues == NULL) {
- const long long element_size = (pos - idpos) + size;
-
- m_pCues = new (std::nothrow) Cues(this, pos, size, idpos, element_size);
- if (m_pCues == NULL)
- return -1;
- }
-
- m_pos = pos + size; // consume payload
- continue;
- }
-
- if (id != libwebm::kMkvCluster) {
- // Besides the Segment, Libwebm allows only cluster elements of unknown
- // size. Fail the parse upon encountering a non-cluster element reporting
- // unknown size.
- if (size == unknown_size)
- return E_FILE_FORMAT_INVALID;
-
- m_pos = pos + size; // consume payload
- continue;
- }
-
- // We have a cluster.
-
- cluster_off = idpos - m_start; // relative pos
-
- if (size != unknown_size)
- cluster_size = size;
-
- break;
- }
-
- if (cluster_off < 0) {
- // No cluster, die.
- return E_FILE_FORMAT_INVALID;
- }
-
- long long pos_;
- long len_;
-
- status = Cluster::HasBlockEntries(this, cluster_off, pos_, len_);
-
- if (status < 0) { // error, or underflow
- pos = pos_;
- len = len_;
-
- return status;
- }
-
- // status == 0 means "no block entries found"
- // status > 0 means "found at least one block entry"
-
- // TODO:
- // The issue here is that the segment increments its own
- // pos ptr past the most recent cluster parsed, and then
- // starts from there to parse the next cluster. If we
- // don't know the size of the current cluster, then we
- // must either parse its payload (as we do below), looking
- // for the cluster (or cues) ID to terminate the parse.
- // This isn't really what we want: rather, we really need
- // a way to create the curr cluster object immediately.
- // The pity is that cluster::parse can determine its own
- // boundary, and we largely duplicate that same logic here.
- //
- // Maybe we need to get rid of our look-ahead preloading
- // in source::parse???
- //
- // As we're parsing the blocks in the curr cluster
- //(in cluster::parse), we should have some way to signal
- // to the segment that we have determined the boundary,
- // so it can adjust its own segment::m_pos member.
- //
- // The problem is that we're asserting in asyncreadinit,
- // because we adjust the pos down to the curr seek pos,
- // and the resulting adjusted len is > 2GB. I'm suspicious
- // that this is even correct, but even if it is, we can't
- // be loading that much data in the cache anyway.
-
- const long idx = m_clusterCount;
-
- if (m_clusterPreloadCount > 0) {
- if (idx >= m_clusterSize)
- return E_FILE_FORMAT_INVALID;
-
- Cluster* const pCluster = m_clusters[idx];
- if (pCluster == NULL || pCluster->m_index >= 0)
- return E_FILE_FORMAT_INVALID;
-
- const long long off = pCluster->GetPosition();
- if (off < 0)
- return E_FILE_FORMAT_INVALID;
-
- if (off == cluster_off) { // preloaded already
- if (status == 0) // no entries found
- return E_FILE_FORMAT_INVALID;
-
- if (cluster_size >= 0)
- pos += cluster_size;
- else {
- const long long element_size = pCluster->GetElementSize();
-
- if (element_size <= 0)
- return E_FILE_FORMAT_INVALID; // TODO: handle this case
-
- pos = pCluster->m_element_start + element_size;
- }
-
- pCluster->m_index = idx; // move from preloaded to loaded
- ++m_clusterCount;
- --m_clusterPreloadCount;
-
- m_pos = pos; // consume payload
- if (segment_stop >= 0 && m_pos > segment_stop)
- return E_FILE_FORMAT_INVALID;
-
- return 0; // success
- }
- }
-
- if (status == 0) { // no entries found
- if (cluster_size >= 0)
- pos += cluster_size;
-
- if ((total >= 0) && (pos >= total)) {
- m_pos = total;
- return 1; // no more clusters
- }
-
- if ((segment_stop >= 0) && (pos >= segment_stop)) {
- m_pos = segment_stop;
- return 1; // no more clusters
- }
-
- m_pos = pos;
- return 2; // try again
- }
-
- // status > 0 means we have an entry
-
- Cluster* const pCluster = Cluster::Create(this, idx, cluster_off);
- if (pCluster == NULL)
- return -1;
-
- if (!AppendCluster(pCluster)) {
- delete pCluster;
- return -1;
- }
-
- if (cluster_size >= 0) {
- pos += cluster_size;
-
- m_pos = pos;
-
- if (segment_stop > 0 && m_pos > segment_stop)
- return E_FILE_FORMAT_INVALID;
-
- return 0;
- }
-
- m_pUnknownSize = pCluster;
- m_pos = -pos;
-
- return 0; // partial success, since we have a new cluster
-
- // status == 0 means "no block entries found"
- // pos designates start of payload
- // m_pos has NOT been adjusted yet (in case we need to come back here)
-}
-
-long Segment::DoLoadClusterUnknownSize(long long& pos, long& len) {
- if (m_pos >= 0 || m_pUnknownSize == NULL)
- return E_PARSE_FAILED;
-
- const long status = m_pUnknownSize->Parse(pos, len);
-
- if (status < 0) // error or underflow
- return status;
-
- if (status == 0) // parsed a block
- return 2; // continue parsing
-
- const long long start = m_pUnknownSize->m_element_start;
- const long long size = m_pUnknownSize->GetElementSize();
-
- if (size < 0)
- return E_FILE_FORMAT_INVALID;
-
- pos = start + size;
- m_pos = pos;
-
- m_pUnknownSize = 0;
-
- return 2; // continue parsing
-}
-
-bool Segment::AppendCluster(Cluster* pCluster) {
- if (pCluster == NULL || pCluster->m_index < 0)
- return false;
-
- const long count = m_clusterCount + m_clusterPreloadCount;
-
- long& size = m_clusterSize;
- const long idx = pCluster->m_index;
-
- if (size < count || idx != m_clusterCount)
- return false;
-
- if (count >= size) {
- const long n = (size <= 0) ? 2048 : 2 * size;
-
- Cluster** const qq = new (std::nothrow) Cluster*[n];
- if (qq == NULL)
- return false;
-
- Cluster** q = qq;
- Cluster** p = m_clusters;
- Cluster** const pp = p + count;
-
- while (p != pp)
- *q++ = *p++;
-
- delete[] m_clusters;
-
- m_clusters = qq;
- size = n;
- }
-
- if (m_clusterPreloadCount > 0) {
- Cluster** const p = m_clusters + m_clusterCount;
- if (*p == NULL || (*p)->m_index >= 0)
- return false;
-
- Cluster** q = p + m_clusterPreloadCount;
- if (q >= (m_clusters + size))
- return false;
-
- for (;;) {
- Cluster** const qq = q - 1;
- if ((*qq)->m_index >= 0)
- return false;
-
- *q = *qq;
- q = qq;
-
- if (q == p)
- break;
- }
- }
-
- m_clusters[idx] = pCluster;
- ++m_clusterCount;
- return true;
-}
-
-bool Segment::PreloadCluster(Cluster* pCluster, ptrdiff_t idx) {
- if (pCluster == NULL || pCluster->m_index >= 0 || idx < m_clusterCount)
- return false;
-
- const long count = m_clusterCount + m_clusterPreloadCount;
-
- long& size = m_clusterSize;
- if (size < count)
- return false;
-
- if (count >= size) {
- const long n = (size <= 0) ? 2048 : 2 * size;
-
- Cluster** const qq = new (std::nothrow) Cluster*[n];
- if (qq == NULL)
- return false;
- Cluster** q = qq;
-
- Cluster** p = m_clusters;
- Cluster** const pp = p + count;
-
- while (p != pp)
- *q++ = *p++;
-
- delete[] m_clusters;
-
- m_clusters = qq;
- size = n;
- }
-
- if (m_clusters == NULL)
- return false;
-
- Cluster** const p = m_clusters + idx;
-
- Cluster** q = m_clusters + count;
- if (q < p || q >= (m_clusters + size))
- return false;
-
- while (q > p) {
- Cluster** const qq = q - 1;
-
- if ((*qq)->m_index >= 0)
- return false;
-
- *q = *qq;
- q = qq;
- }
-
- m_clusters[idx] = pCluster;
- ++m_clusterPreloadCount;
- return true;
-}
-
-long Segment::Load() {
- if (m_clusters != NULL || m_clusterSize != 0 || m_clusterCount != 0)
- return E_PARSE_FAILED;
-
- // Outermost (level 0) segment object has been constructed,
- // and pos designates start of payload. We need to find the
- // inner (level 1) elements.
-
- const long long header_status = ParseHeaders();
-
- if (header_status < 0) // error
- return static_cast<long>(header_status);
-
- if (header_status > 0) // underflow
- return E_BUFFER_NOT_FULL;
-
- if (m_pInfo == NULL || m_pTracks == NULL)
- return E_FILE_FORMAT_INVALID;
-
- for (;;) {
- const long status = LoadCluster();
-
- if (status < 0) // error
- return status;
-
- if (status >= 1) // no more clusters
- return 0;
- }
-}
-
-SeekHead::Entry::Entry() : id(0), pos(0), element_start(0), element_size(0) {}
-
-SeekHead::SeekHead(Segment* pSegment, long long start, long long size_,
- long long element_start, long long element_size)
- : m_pSegment(pSegment),
- m_start(start),
- m_size(size_),
- m_element_start(element_start),
- m_element_size(element_size),
- m_entries(0),
- m_entry_count(0),
- m_void_elements(0),
- m_void_element_count(0) {}
-
-SeekHead::~SeekHead() {
- delete[] m_entries;
- delete[] m_void_elements;
-}
-
-long SeekHead::Parse() {
- IMkvReader* const pReader = m_pSegment->m_pReader;
-
- long long pos = m_start;
- const long long stop = m_start + m_size;
-
- // first count the seek head entries
-
- int entry_count = 0;
- int void_element_count = 0;
-
- while (pos < stop) {
- long long id, size;
-
- const long status = ParseElementHeader(pReader, pos, stop, id, size);
-
- if (status < 0) // error
- return status;
-
- if (id == libwebm::kMkvSeek)
- ++entry_count;
- else if (id == libwebm::kMkvVoid)
- ++void_element_count;
-
- pos += size; // consume payload
-
- if (pos > stop)
- return E_FILE_FORMAT_INVALID;
- }
-
- if (pos != stop)
- return E_FILE_FORMAT_INVALID;
-
- if (entry_count > 0) {
- m_entries = new (std::nothrow) Entry[entry_count];
-
- if (m_entries == NULL)
- return -1;
- }
-
- if (void_element_count > 0) {
- m_void_elements = new (std::nothrow) VoidElement[void_element_count];
-
- if (m_void_elements == NULL)
- return -1;
- }
-
- // now parse the entries and void elements
-
- Entry* pEntry = m_entries;
- VoidElement* pVoidElement = m_void_elements;
-
- pos = m_start;
-
- while (pos < stop) {
- const long long idpos = pos;
-
- long long id, size;
-
- const long status = ParseElementHeader(pReader, pos, stop, id, size);
-
- if (status < 0) // error
- return status;
-
- if (id == libwebm::kMkvSeek && entry_count > 0) {
- if (ParseEntry(pReader, pos, size, pEntry)) {
- Entry& e = *pEntry++;
-
- e.element_start = idpos;
- e.element_size = (pos + size) - idpos;
- }
- } else if (id == libwebm::kMkvVoid && void_element_count > 0) {
- VoidElement& e = *pVoidElement++;
-
- e.element_start = idpos;
- e.element_size = (pos + size) - idpos;
- }
-
- pos += size; // consume payload
- if (pos > stop)
- return E_FILE_FORMAT_INVALID;
- }
-
- if (pos != stop)
- return E_FILE_FORMAT_INVALID;
-
- ptrdiff_t count_ = ptrdiff_t(pEntry - m_entries);
- assert(count_ >= 0);
- assert(count_ <= entry_count);
-
- m_entry_count = static_cast<int>(count_);
-
- count_ = ptrdiff_t(pVoidElement - m_void_elements);
- assert(count_ >= 0);
- assert(count_ <= void_element_count);
-
- m_void_element_count = static_cast<int>(count_);
-
- return 0;
-}
-
-int SeekHead::GetCount() const { return m_entry_count; }
-
-const SeekHead::Entry* SeekHead::GetEntry(int idx) const {
- if (idx < 0)
- return 0;
-
- if (idx >= m_entry_count)
- return 0;
-
- return m_entries + idx;
-}
-
-int SeekHead::GetVoidElementCount() const { return m_void_element_count; }
-
-const SeekHead::VoidElement* SeekHead::GetVoidElement(int idx) const {
- if (idx < 0)
- return 0;
-
- if (idx >= m_void_element_count)
- return 0;
-
- return m_void_elements + idx;
-}
-
-long Segment::ParseCues(long long off, long long& pos, long& len) {
- if (m_pCues)
- return 0; // success
-
- if (off < 0)
- return -1;
-
- long long total, avail;
-
- const int status = m_pReader->Length(&total, &avail);
-
- if (status < 0) // error
- return status;
-
- assert((total < 0) || (avail <= total));
-
- pos = m_start + off;
-
- if ((total < 0) || (pos >= total))
- return 1; // don't bother parsing cues
-
- const long long element_start = pos;
- const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size;
-
- if ((pos + 1) > avail) {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
-
- long long result = GetUIntLength(m_pReader, pos, len);
-
- if (result < 0) // error
- return static_cast<long>(result);
-
- if (result > 0) // underflow (weird)
- {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
-
- if ((segment_stop >= 0) && ((pos + len) > segment_stop))
- return E_FILE_FORMAT_INVALID;
-
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
-
- const long long idpos = pos;
-
- const long long id = ReadID(m_pReader, idpos, len);
-
- if (id != libwebm::kMkvCues)
- return E_FILE_FORMAT_INVALID;
-
- pos += len; // consume ID
- assert((segment_stop < 0) || (pos <= segment_stop));
-
- // Read Size
-
- if ((pos + 1) > avail) {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
-
- result = GetUIntLength(m_pReader, pos, len);
-
- if (result < 0) // error
- return static_cast<long>(result);
-
- if (result > 0) // underflow (weird)
- {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
-
- if ((segment_stop >= 0) && ((pos + len) > segment_stop))
- return E_FILE_FORMAT_INVALID;
-
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
-
- const long long size = ReadUInt(m_pReader, pos, len);
-
- if (size < 0) // error
- return static_cast<long>(size);
-
- if (size == 0) // weird, although technically not illegal
- return 1; // done
-
- pos += len; // consume length of size of element
- assert((segment_stop < 0) || (pos <= segment_stop));
-
- // Pos now points to start of payload
-
- const long long element_stop = pos + size;
-
- if ((segment_stop >= 0) && (element_stop > segment_stop))
- return E_FILE_FORMAT_INVALID;
-
- if ((total >= 0) && (element_stop > total))
- return 1; // don't bother parsing anymore
-
- len = static_cast<long>(size);
-
- if (element_stop > avail)
- return E_BUFFER_NOT_FULL;
-
- const long long element_size = element_stop - element_start;
-
- m_pCues =
- new (std::nothrow) Cues(this, pos, size, element_start, element_size);
- if (m_pCues == NULL)
- return -1;
-
- return 0; // success
-}
-
-bool SeekHead::ParseEntry(IMkvReader* pReader, long long start, long long size_,
- Entry* pEntry) {
- if (size_ <= 0)
- return false;
-
- long long pos = start;
- const long long stop = start + size_;
-
- long len;
-
- // parse the container for the level-1 element ID
-
- const long long seekIdId = ReadID(pReader, pos, len);
- if (seekIdId < 0)
- return false;
-
- if (seekIdId != libwebm::kMkvSeekID)
- return false;
-
- if ((pos + len) > stop)
- return false;
-
- pos += len; // consume SeekID id
-
- const long long seekIdSize = ReadUInt(pReader, pos, len);
-
- if (seekIdSize <= 0)
- return false;
-
- if ((pos + len) > stop)
- return false;
-
- pos += len; // consume size of field
-
- if ((pos + seekIdSize) > stop)
- return false;
-
- pEntry->id = ReadID(pReader, pos, len); // payload
-
- if (pEntry->id <= 0)
- return false;
-
- if (len != seekIdSize)
- return false;
-
- pos += seekIdSize; // consume SeekID payload
-
- const long long seekPosId = ReadID(pReader, pos, len);
-
- if (seekPosId != libwebm::kMkvSeekPosition)
- return false;
-
- if ((pos + len) > stop)
- return false;
-
- pos += len; // consume id
-
- const long long seekPosSize = ReadUInt(pReader, pos, len);
-
- if (seekPosSize <= 0)
- return false;
-
- if ((pos + len) > stop)
- return false;
-
- pos += len; // consume size
-
- if ((pos + seekPosSize) > stop)
- return false;
-
- pEntry->pos = UnserializeUInt(pReader, pos, seekPosSize);
-
- if (pEntry->pos < 0)
- return false;
-
- pos += seekPosSize; // consume payload
-
- if (pos != stop)
- return false;
-
- return true;
-}
-
-Cues::Cues(Segment* pSegment, long long start_, long long size_,
- long long element_start, long long element_size)
- : m_pSegment(pSegment),
- m_start(start_),
- m_size(size_),
- m_element_start(element_start),
- m_element_size(element_size),
- m_cue_points(NULL),
- m_count(0),
- m_preload_count(0),
- m_pos(start_) {}
-
-Cues::~Cues() {
- const long n = m_count + m_preload_count;
-
- CuePoint** p = m_cue_points;
- CuePoint** const q = p + n;
-
- while (p != q) {
- CuePoint* const pCP = *p++;
- assert(pCP);
-
- delete pCP;
- }
-
- delete[] m_cue_points;
-}
-
-long Cues::GetCount() const {
- if (m_cue_points == NULL)
- return -1;
-
- return m_count; // TODO: really ignore preload count?
-}
-
-bool Cues::DoneParsing() const {
- const long long stop = m_start + m_size;
- return (m_pos >= stop);
-}
-
-bool Cues::Init() const {
- if (m_cue_points)
- return true;
-
- if (m_count != 0 || m_preload_count != 0)
- return false;
-
- IMkvReader* const pReader = m_pSegment->m_pReader;
-
- const long long stop = m_start + m_size;
- long long pos = m_start;
-
- long cue_points_size = 0;
-
- while (pos < stop) {
- const long long idpos = pos;
-
- long len;
-
- const long long id = ReadID(pReader, pos, len);
- if (id < 0 || (pos + len) > stop) {
- return false;
- }
-
- pos += len; // consume ID
-
- const long long size = ReadUInt(pReader, pos, len);
- if (size < 0 || (pos + len > stop)) {
- return false;
- }
-
- pos += len; // consume Size field
- if (pos + size > stop) {
- return false;
- }
-
- if (id == libwebm::kMkvCuePoint) {
- if (!PreloadCuePoint(cue_points_size, idpos))
- return false;
- }
-
- pos += size; // skip payload
- }
- return true;
-}
-
-bool Cues::PreloadCuePoint(long& cue_points_size, long long pos) const {
- if (m_count != 0)
- return false;
-
- if (m_preload_count >= cue_points_size) {
- const long n = (cue_points_size <= 0) ? 2048 : 2 * cue_points_size;
-
- CuePoint** const qq = new (std::nothrow) CuePoint*[n];
- if (qq == NULL)
- return false;
-
- CuePoint** q = qq; // beginning of target
-
- CuePoint** p = m_cue_points; // beginning of source
- CuePoint** const pp = p + m_preload_count; // end of source
-
- while (p != pp)
- *q++ = *p++;
-
- delete[] m_cue_points;
-
- m_cue_points = qq;
- cue_points_size = n;
- }
-
- CuePoint* const pCP = new (std::nothrow) CuePoint(m_preload_count, pos);
- if (pCP == NULL)
- return false;
-
- m_cue_points[m_preload_count++] = pCP;
- return true;
-}
-
-bool Cues::LoadCuePoint() const {
- const long long stop = m_start + m_size;
-
- if (m_pos >= stop)
- return false; // nothing else to do
-
- if (!Init()) {
- m_pos = stop;
- return false;
- }
-
- IMkvReader* const pReader = m_pSegment->m_pReader;
-
- while (m_pos < stop) {
- const long long idpos = m_pos;
-
- long len;
-
- const long long id = ReadID(pReader, m_pos, len);
- if (id < 0 || (m_pos + len) > stop)
- return false;
-
- m_pos += len; // consume ID
-
- const long long size = ReadUInt(pReader, m_pos, len);
- if (size < 0 || (m_pos + len) > stop)
- return false;
-
- m_pos += len; // consume Size field
- if ((m_pos + size) > stop)
- return false;
-
- if (id != libwebm::kMkvCuePoint) {
- m_pos += size; // consume payload
- if (m_pos > stop)
- return false;
-
- continue;
- }
-
- if (m_preload_count < 1)
- return false;
-
- CuePoint* const pCP = m_cue_points[m_count];
- if (!pCP || (pCP->GetTimeCode() < 0 && (-pCP->GetTimeCode() != idpos)))
- return false;
-
- if (!pCP->Load(pReader)) {
- m_pos = stop;
- return false;
- }
- ++m_count;
- --m_preload_count;
-
- m_pos += size; // consume payload
- if (m_pos > stop)
- return false;
-
- return true; // yes, we loaded a cue point
- }
-
- return false; // no, we did not load a cue point
-}
-
-bool Cues::Find(long long time_ns, const Track* pTrack, const CuePoint*& pCP,
- const CuePoint::TrackPosition*& pTP) const {
- if (time_ns < 0 || pTrack == NULL || m_cue_points == NULL || m_count == 0)
- return false;
-
- CuePoint** const ii = m_cue_points;
- CuePoint** i = ii;
-
- CuePoint** const jj = ii + m_count;
- CuePoint** j = jj;
-
- pCP = *i;
- if (pCP == NULL)
- return false;
-
- if (time_ns <= pCP->GetTime(m_pSegment)) {
- pTP = pCP->Find(pTrack);
- return (pTP != NULL);
- }
-
- while (i < j) {
- // INVARIANT:
- //[ii, i) <= time_ns
- //[i, j) ?
- //[j, jj) > time_ns
-
- CuePoint** const k = i + (j - i) / 2;
- if (k >= jj)
- return false;
-
- CuePoint* const pCP = *k;
- if (pCP == NULL)
- return false;
-
- const long long t = pCP->GetTime(m_pSegment);
-
- if (t <= time_ns)
- i = k + 1;
- else
- j = k;
-
- if (i > j)
- return false;
- }
-
- if (i != j || i > jj || i <= ii)
- return false;
-
- pCP = *--i;
-
- if (pCP == NULL || pCP->GetTime(m_pSegment) > time_ns)
- return false;
-
- // TODO: here and elsewhere, it's probably not correct to search
- // for the cue point with this time, and then search for a matching
- // track. In principle, the matching track could be on some earlier
- // cue point, and with our current algorithm, we'd miss it. To make
- // this bullet-proof, we'd need to create a secondary structure,
- // with a list of cue points that apply to a track, and then search
- // that track-based structure for a matching cue point.
-
- pTP = pCP->Find(pTrack);
- return (pTP != NULL);
-}
-
-const CuePoint* Cues::GetFirst() const {
- if (m_cue_points == NULL || m_count == 0)
- return NULL;
-
- CuePoint* const* const pp = m_cue_points;
- if (pp == NULL)
- return NULL;
-
- CuePoint* const pCP = pp[0];
- if (pCP == NULL || pCP->GetTimeCode() < 0)
- return NULL;
-
- return pCP;
-}
-
-const CuePoint* Cues::GetLast() const {
- if (m_cue_points == NULL || m_count <= 0)
- return NULL;
-
- const long index = m_count - 1;
-
- CuePoint* const* const pp = m_cue_points;
- if (pp == NULL)
- return NULL;
-
- CuePoint* const pCP = pp[index];
- if (pCP == NULL || pCP->GetTimeCode() < 0)
- return NULL;
-
- return pCP;
-}
-
-const CuePoint* Cues::GetNext(const CuePoint* pCurr) const {
- if (pCurr == NULL || pCurr->GetTimeCode() < 0 || m_cue_points == NULL ||
- m_count < 1) {
- return NULL;
- }
-
- long index = pCurr->m_index;
- if (index >= m_count)
- return NULL;
-
- CuePoint* const* const pp = m_cue_points;
- if (pp == NULL || pp[index] != pCurr)
- return NULL;
-
- ++index;
-
- if (index >= m_count)
- return NULL;
-
- CuePoint* const pNext = pp[index];
-
- if (pNext == NULL || pNext->GetTimeCode() < 0)
- return NULL;
-
- return pNext;
-}
-
-const BlockEntry* Cues::GetBlock(const CuePoint* pCP,
- const CuePoint::TrackPosition* pTP) const {
- if (pCP == NULL || pTP == NULL)
- return NULL;
-
- return m_pSegment->GetBlock(*pCP, *pTP);
-}
-
-const BlockEntry* Segment::GetBlock(const CuePoint& cp,
- const CuePoint::TrackPosition& tp) {
- Cluster** const ii = m_clusters;
- Cluster** i = ii;
-
- const long count = m_clusterCount + m_clusterPreloadCount;
-
- Cluster** const jj = ii + count;
- Cluster** j = jj;
-
- while (i < j) {
- // INVARIANT:
- //[ii, i) < pTP->m_pos
- //[i, j) ?
- //[j, jj) > pTP->m_pos
-
- Cluster** const k = i + (j - i) / 2;
- assert(k < jj);
-
- Cluster* const pCluster = *k;
- assert(pCluster);
-
- // const long long pos_ = pCluster->m_pos;
- // assert(pos_);
- // const long long pos = pos_ * ((pos_ < 0) ? -1 : 1);
-
- const long long pos = pCluster->GetPosition();
- assert(pos >= 0);
-
- if (pos < tp.m_pos)
- i = k + 1;
- else if (pos > tp.m_pos)
- j = k;
- else
- return pCluster->GetEntry(cp, tp);
- }
-
- assert(i == j);
- // assert(Cluster::HasBlockEntries(this, tp.m_pos));
-
- Cluster* const pCluster = Cluster::Create(this, -1, tp.m_pos); //, -1);
- if (pCluster == NULL)
- return NULL;
-
- const ptrdiff_t idx = i - m_clusters;
-
- if (!PreloadCluster(pCluster, idx)) {
- delete pCluster;
- return NULL;
- }
- assert(m_clusters);
- assert(m_clusterPreloadCount > 0);
- assert(m_clusters[idx] == pCluster);
-
- return pCluster->GetEntry(cp, tp);
-}
-
-const Cluster* Segment::FindOrPreloadCluster(long long requested_pos) {
- if (requested_pos < 0)
- return 0;
-
- Cluster** const ii = m_clusters;
- Cluster** i = ii;
-
- const long count = m_clusterCount + m_clusterPreloadCount;
-
- Cluster** const jj = ii + count;
- Cluster** j = jj;
-
- while (i < j) {
- // INVARIANT:
- //[ii, i) < pTP->m_pos
- //[i, j) ?
- //[j, jj) > pTP->m_pos
-
- Cluster** const k = i + (j - i) / 2;
- assert(k < jj);
-
- Cluster* const pCluster = *k;
- assert(pCluster);
-
- // const long long pos_ = pCluster->m_pos;
- // assert(pos_);
- // const long long pos = pos_ * ((pos_ < 0) ? -1 : 1);
-
- const long long pos = pCluster->GetPosition();
- assert(pos >= 0);
-
- if (pos < requested_pos)
- i = k + 1;
- else if (pos > requested_pos)
- j = k;
- else
- return pCluster;
- }
-
- assert(i == j);
- // assert(Cluster::HasBlockEntries(this, tp.m_pos));
-
- Cluster* const pCluster = Cluster::Create(this, -1, requested_pos);
- if (pCluster == NULL)
- return NULL;
-
- const ptrdiff_t idx = i - m_clusters;
-
- if (!PreloadCluster(pCluster, idx)) {
- delete pCluster;
- return NULL;
- }
- assert(m_clusters);
- assert(m_clusterPreloadCount > 0);
- assert(m_clusters[idx] == pCluster);
-
- return pCluster;
-}
-
-CuePoint::CuePoint(long idx, long long pos)
- : m_element_start(0),
- m_element_size(0),
- m_index(idx),
- m_timecode(-1 * pos),
- m_track_positions(NULL),
- m_track_positions_count(0) {
- assert(pos > 0);
-}
-
-CuePoint::~CuePoint() { delete[] m_track_positions; }
-
-bool CuePoint::Load(IMkvReader* pReader) {
- // odbgstream os;
- // os << "CuePoint::Load(begin): timecode=" << m_timecode << endl;
-
- if (m_timecode >= 0) // already loaded
- return true;
-
- assert(m_track_positions == NULL);
- assert(m_track_positions_count == 0);
-
- long long pos_ = -m_timecode;
- const long long element_start = pos_;
-
- long long stop;
-
- {
- long len;
-
- const long long id = ReadID(pReader, pos_, len);
- if (id != libwebm::kMkvCuePoint)
- return false;
-
- pos_ += len; // consume ID
-
- const long long size = ReadUInt(pReader, pos_, len);
- assert(size >= 0);
-
- pos_ += len; // consume Size field
- // pos_ now points to start of payload
-
- stop = pos_ + size;
- }
-
- const long long element_size = stop - element_start;
-
- long long pos = pos_;
-
- // First count number of track positions
-
- while (pos < stop) {
- long len;
-
- const long long id = ReadID(pReader, pos, len);
- if ((id < 0) || (pos + len > stop)) {
- return false;
- }
-
- pos += len; // consume ID
-
- const long long size = ReadUInt(pReader, pos, len);
- if ((size < 0) || (pos + len > stop)) {
- return false;
- }
-
- pos += len; // consume Size field
- if ((pos + size) > stop) {
- return false;
- }
-
- if (id == libwebm::kMkvCueTime)
- m_timecode = UnserializeUInt(pReader, pos, size);
-
- else if (id == libwebm::kMkvCueTrackPositions)
- ++m_track_positions_count;
-
- pos += size; // consume payload
- }
-
- if (m_timecode < 0 || m_track_positions_count <= 0) {
- return false;
- }
-
- // os << "CuePoint::Load(cont'd): idpos=" << idpos
- // << " timecode=" << m_timecode
- // << endl;
-
- m_track_positions = new (std::nothrow) TrackPosition[m_track_positions_count];
- if (m_track_positions == NULL)
- return false;
-
- // Now parse track positions
-
- TrackPosition* p = m_track_positions;
- pos = pos_;
-
- while (pos < stop) {
- long len;
-
- const long long id = ReadID(pReader, pos, len);
- if (id < 0 || (pos + len) > stop)
- return false;
-
- pos += len; // consume ID
-
- const long long size = ReadUInt(pReader, pos, len);
- assert(size >= 0);
- assert((pos + len) <= stop);
-
- pos += len; // consume Size field
- assert((pos + size) <= stop);
-
- if (id == libwebm::kMkvCueTrackPositions) {
- TrackPosition& tp = *p++;
- if (!tp.Parse(pReader, pos, size)) {
- return false;
- }
- }
-
- pos += size; // consume payload
- if (pos > stop)
- return false;
- }
-
- assert(size_t(p - m_track_positions) == m_track_positions_count);
-
- m_element_start = element_start;
- m_element_size = element_size;
-
- return true;
-}
-
-bool CuePoint::TrackPosition::Parse(IMkvReader* pReader, long long start_,
- long long size_) {
- const long long stop = start_ + size_;
- long long pos = start_;
-
- m_track = -1;
- m_pos = -1;
- m_block = 1; // default
-
- while (pos < stop) {
- long len;
-
- const long long id = ReadID(pReader, pos, len);
- if ((id < 0) || ((pos + len) > stop)) {
- return false;
- }
-
- pos += len; // consume ID
-
- const long long size = ReadUInt(pReader, pos, len);
- if ((size < 0) || ((pos + len) > stop)) {
- return false;
- }
-
- pos += len; // consume Size field
- if ((pos + size) > stop) {
- return false;
- }
-
- if (id == libwebm::kMkvCueTrack)
- m_track = UnserializeUInt(pReader, pos, size);
- else if (id == libwebm::kMkvCueClusterPosition)
- m_pos = UnserializeUInt(pReader, pos, size);
- else if (id == libwebm::kMkvCueBlockNumber)
- m_block = UnserializeUInt(pReader, pos, size);
-
- pos += size; // consume payload
- }
-
- if ((m_pos < 0) || (m_track <= 0)) {
- return false;
- }
-
- return true;
-}
-
-const CuePoint::TrackPosition* CuePoint::Find(const Track* pTrack) const {
- if (pTrack == NULL) {
- return NULL;
- }
-
- const long long n = pTrack->GetNumber();
-
- const TrackPosition* i = m_track_positions;
- const TrackPosition* const j = i + m_track_positions_count;
-
- while (i != j) {
- const TrackPosition& p = *i++;
-
- if (p.m_track == n)
- return &p;
- }
-
- return NULL; // no matching track number found
-}
-
-long long CuePoint::GetTimeCode() const { return m_timecode; }
-
-long long CuePoint::GetTime(const Segment* pSegment) const {
- assert(pSegment);
- assert(m_timecode >= 0);
-
- const SegmentInfo* const pInfo = pSegment->GetInfo();
- assert(pInfo);
-
- const long long scale = pInfo->GetTimeCodeScale();
- assert(scale >= 1);
-
- const long long time = scale * m_timecode;
-
- return time;
-}
-
-bool Segment::DoneParsing() const {
- if (m_size < 0) {
- long long total, avail;
-
- const int status = m_pReader->Length(&total, &avail);
-
- if (status < 0) // error
- return true; // must assume done
-
- if (total < 0)
- return false; // assume live stream
-
- return (m_pos >= total);
- }
-
- const long long stop = m_start + m_size;
-
- return (m_pos >= stop);
-}
-
-const Cluster* Segment::GetFirst() const {
- if ((m_clusters == NULL) || (m_clusterCount <= 0))
- return &m_eos;
-
- Cluster* const pCluster = m_clusters[0];
- assert(pCluster);
-
- return pCluster;
-}
-
-const Cluster* Segment::GetLast() const {
- if ((m_clusters == NULL) || (m_clusterCount <= 0))
- return &m_eos;
-
- const long idx = m_clusterCount - 1;
-
- Cluster* const pCluster = m_clusters[idx];
- assert(pCluster);
-
- return pCluster;
-}
-
-unsigned long Segment::GetCount() const { return m_clusterCount; }
-
-const Cluster* Segment::GetNext(const Cluster* pCurr) {
- assert(pCurr);
- assert(pCurr != &m_eos);
- assert(m_clusters);
-
- long idx = pCurr->m_index;
-
- if (idx >= 0) {
- assert(m_clusterCount > 0);
- assert(idx < m_clusterCount);
- assert(pCurr == m_clusters[idx]);
-
- ++idx;
-
- if (idx >= m_clusterCount)
- return &m_eos; // caller will LoadCluster as desired
-
- Cluster* const pNext = m_clusters[idx];
- assert(pNext);
- assert(pNext->m_index >= 0);
- assert(pNext->m_index == idx);
-
- return pNext;
- }
-
- assert(m_clusterPreloadCount > 0);
-
- long long pos = pCurr->m_element_start;
-
- assert(m_size >= 0); // TODO
- const long long stop = m_start + m_size; // end of segment
-
- {
- long len;
-
- long long result = GetUIntLength(m_pReader, pos, len);
- assert(result == 0);
- assert((pos + len) <= stop); // TODO
- if (result != 0)
- return NULL;
-
- const long long id = ReadID(m_pReader, pos, len);
- if (id != libwebm::kMkvCluster)
- return NULL;
-
- pos += len; // consume ID
-
- // Read Size
- result = GetUIntLength(m_pReader, pos, len);
- assert(result == 0); // TODO
- assert((pos + len) <= stop); // TODO
-
- const long long size = ReadUInt(m_pReader, pos, len);
- assert(size > 0); // TODO
- // assert((pCurr->m_size <= 0) || (pCurr->m_size == size));
-
- pos += len; // consume length of size of element
- assert((pos + size) <= stop); // TODO
-
- // Pos now points to start of payload
-
- pos += size; // consume payload
- }
-
- long long off_next = 0;
-
- while (pos < stop) {
- long len;
-
- long long result = GetUIntLength(m_pReader, pos, len);
- assert(result == 0);
- assert((pos + len) <= stop); // TODO
- if (result != 0)
- return NULL;
-
- const long long idpos = pos; // pos of next (potential) cluster
-
- const long long id = ReadID(m_pReader, idpos, len);
- if (id < 0)
- return NULL;
-
- pos += len; // consume ID
-
- // Read Size
- result = GetUIntLength(m_pReader, pos, len);
- assert(result == 0); // TODO
- assert((pos + len) <= stop); // TODO
-
- const long long size = ReadUInt(m_pReader, pos, len);
- assert(size >= 0); // TODO
-
- pos += len; // consume length of size of element
- assert((pos + size) <= stop); // TODO
-
- // Pos now points to start of payload
-
- if (size == 0) // weird
- continue;
-
- if (id == libwebm::kMkvCluster) {
- const long long off_next_ = idpos - m_start;
-
- long long pos_;
- long len_;
-
- const long status = Cluster::HasBlockEntries(this, off_next_, pos_, len_);
-
- assert(status >= 0);
-
- if (status > 0) {
- off_next = off_next_;
- break;
- }
- }
-
- pos += size; // consume payload
- }
-
- if (off_next <= 0)
- return 0;
-
- Cluster** const ii = m_clusters + m_clusterCount;
- Cluster** i = ii;
-
- Cluster** const jj = ii + m_clusterPreloadCount;
- Cluster** j = jj;
-
- while (i < j) {
- // INVARIANT:
- //[0, i) < pos_next
- //[i, j) ?
- //[j, jj) > pos_next
-
- Cluster** const k = i + (j - i) / 2;
- assert(k < jj);
-
- Cluster* const pNext = *k;
- assert(pNext);
- assert(pNext->m_index < 0);
-
- // const long long pos_ = pNext->m_pos;
- // assert(pos_);
- // pos = pos_ * ((pos_ < 0) ? -1 : 1);
-
- pos = pNext->GetPosition();
-
- if (pos < off_next)
- i = k + 1;
- else if (pos > off_next)
- j = k;
- else
- return pNext;
- }
-
- assert(i == j);
-
- Cluster* const pNext = Cluster::Create(this, -1, off_next);
- if (pNext == NULL)
- return NULL;
-
- const ptrdiff_t idx_next = i - m_clusters; // insertion position
-
- if (!PreloadCluster(pNext, idx_next)) {
- delete pNext;
- return NULL;
- }
- assert(m_clusters);
- assert(idx_next < m_clusterSize);
- assert(m_clusters[idx_next] == pNext);
-
- return pNext;
-}
-
-long Segment::ParseNext(const Cluster* pCurr, const Cluster*& pResult,
- long long& pos, long& len) {
- assert(pCurr);
- assert(!pCurr->EOS());
- assert(m_clusters);
-
- pResult = 0;
-
- if (pCurr->m_index >= 0) { // loaded (not merely preloaded)
- assert(m_clusters[pCurr->m_index] == pCurr);
-
- const long next_idx = pCurr->m_index + 1;
-
- if (next_idx < m_clusterCount) {
- pResult = m_clusters[next_idx];
- return 0; // success
- }
-
- // curr cluster is last among loaded
-
- const long result = LoadCluster(pos, len);
-
- if (result < 0) // error or underflow
- return result;
-
- if (result > 0) // no more clusters
- {
- // pResult = &m_eos;
- return 1;
- }
-
- pResult = GetLast();
- return 0; // success
- }
-
- assert(m_pos > 0);
-
- long long total, avail;
-
- long status = m_pReader->Length(&total, &avail);
-
- if (status < 0) // error
- return status;
-
- assert((total < 0) || (avail <= total));
-
- const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size;
-
- // interrogate curr cluster
-
- pos = pCurr->m_element_start;
-
- if (pCurr->m_element_size >= 0)
- pos += pCurr->m_element_size;
- else {
- if ((pos + 1) > avail) {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
-
- long long result = GetUIntLength(m_pReader, pos, len);
-
- if (result < 0) // error
- return static_cast<long>(result);
-
- if (result > 0) // weird
- return E_BUFFER_NOT_FULL;
-
- if ((segment_stop >= 0) && ((pos + len) > segment_stop))
- return E_FILE_FORMAT_INVALID;
-
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
-
- const long long id = ReadUInt(m_pReader, pos, len);
-
- if (id != libwebm::kMkvCluster)
- return -1;
-
- pos += len; // consume ID
-
- // Read Size
-
- if ((pos + 1) > avail) {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
-
- result = GetUIntLength(m_pReader, pos, len);
-
- if (result < 0) // error
- return static_cast<long>(result);
-
- if (result > 0) // weird
- return E_BUFFER_NOT_FULL;
-
- if ((segment_stop >= 0) && ((pos + len) > segment_stop))
- return E_FILE_FORMAT_INVALID;
-
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
-
- const long long size = ReadUInt(m_pReader, pos, len);
-
- if (size < 0) // error
- return static_cast<long>(size);
-
- pos += len; // consume size field
-
- const long long unknown_size = (1LL << (7 * len)) - 1;
-
- if (size == unknown_size) // TODO: should never happen
- return E_FILE_FORMAT_INVALID; // TODO: resolve this
-
- // assert((pCurr->m_size <= 0) || (pCurr->m_size == size));
-
- if ((segment_stop >= 0) && ((pos + size) > segment_stop))
- return E_FILE_FORMAT_INVALID;
-
- // Pos now points to start of payload
-
- pos += size; // consume payload (that is, the current cluster)
- if (segment_stop >= 0 && pos > segment_stop)
- return E_FILE_FORMAT_INVALID;
-
- // By consuming the payload, we are assuming that the curr
- // cluster isn't interesting. That is, we don't bother checking
- // whether the payload of the curr cluster is less than what
- // happens to be available (obtained via IMkvReader::Length).
- // Presumably the caller has already dispensed with the current
- // cluster, and really does want the next cluster.
- }
-
- // pos now points to just beyond the last fully-loaded cluster
-
- for (;;) {
- const long status = DoParseNext(pResult, pos, len);
-
- if (status <= 1)
- return status;
- }
-}
-
-long Segment::DoParseNext(const Cluster*& pResult, long long& pos, long& len) {
- long long total, avail;
-
- long status = m_pReader->Length(&total, &avail);
-
- if (status < 0) // error
- return status;
-
- assert((total < 0) || (avail <= total));
-
- const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size;
-
- // Parse next cluster. This is strictly a parsing activity.
- // Creation of a new cluster object happens later, after the
- // parsing is done.
-
- long long off_next = 0;
- long long cluster_size = -1;
-
- for (;;) {
- if ((total >= 0) && (pos >= total))
- return 1; // EOF
-
- if ((segment_stop >= 0) && (pos >= segment_stop))
- return 1; // EOF
-
- if ((pos + 1) > avail) {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
-
- long long result = GetUIntLength(m_pReader, pos, len);
-
- if (result < 0) // error
- return static_cast<long>(result);
-
- if (result > 0) // weird
- return E_BUFFER_NOT_FULL;
-
- if ((segment_stop >= 0) && ((pos + len) > segment_stop))
- return E_FILE_FORMAT_INVALID;
-
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
-
- const long long idpos = pos; // absolute
- const long long idoff = pos - m_start; // relative
-
- const long long id = ReadID(m_pReader, idpos, len); // absolute
-
- if (id < 0) // error
- return static_cast<long>(id);
-
- if (id == 0) // weird
- return -1; // generic error
-
- pos += len; // consume ID
-
- // Read Size
-
- if ((pos + 1) > avail) {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
-
- result = GetUIntLength(m_pReader, pos, len);
-
- if (result < 0) // error
- return static_cast<long>(result);
-
- if (result > 0) // weird
- return E_BUFFER_NOT_FULL;
-
- if ((segment_stop >= 0) && ((pos + len) > segment_stop))
- return E_FILE_FORMAT_INVALID;
-
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
-
- const long long size = ReadUInt(m_pReader, pos, len);
-
- if (size < 0) // error
- return static_cast<long>(size);
-
- pos += len; // consume length of size of element
-
- // Pos now points to start of payload
-
- if (size == 0) // weird
- continue;
-
- const long long unknown_size = (1LL << (7 * len)) - 1;
-
- if ((segment_stop >= 0) && (size != unknown_size) &&
- ((pos + size) > segment_stop)) {
- return E_FILE_FORMAT_INVALID;
- }
-
- if (id == libwebm::kMkvCues) {
- if (size == unknown_size)
- return E_FILE_FORMAT_INVALID;
-
- const long long element_stop = pos + size;
-
- if ((segment_stop >= 0) && (element_stop > segment_stop))
- return E_FILE_FORMAT_INVALID;
-
- const long long element_start = idpos;
- const long long element_size = element_stop - element_start;
-
- if (m_pCues == NULL) {
- m_pCues = new (std::nothrow)
- Cues(this, pos, size, element_start, element_size);
- if (m_pCues == NULL)
- return false;
- }
-
- pos += size; // consume payload
- if (segment_stop >= 0 && pos > segment_stop)
- return E_FILE_FORMAT_INVALID;
-
- continue;
- }
-
- if (id != libwebm::kMkvCluster) { // not a Cluster ID
- if (size == unknown_size)
- return E_FILE_FORMAT_INVALID;
-
- pos += size; // consume payload
- if (segment_stop >= 0 && pos > segment_stop)
- return E_FILE_FORMAT_INVALID;
-
- continue;
- }
-
- // We have a cluster.
- off_next = idoff;
-
- if (size != unknown_size)
- cluster_size = size;
-
- break;
- }
-
- assert(off_next > 0); // have cluster
-
- // We have parsed the next cluster.
- // We have not created a cluster object yet. What we need
- // to do now is determine whether it has already be preloaded
- //(in which case, an object for this cluster has already been
- // created), and if not, create a new cluster object.
-
- Cluster** const ii = m_clusters + m_clusterCount;
- Cluster** i = ii;
-
- Cluster** const jj = ii + m_clusterPreloadCount;
- Cluster** j = jj;
-
- while (i < j) {
- // INVARIANT:
- //[0, i) < pos_next
- //[i, j) ?
- //[j, jj) > pos_next
-
- Cluster** const k = i + (j - i) / 2;
- assert(k < jj);
-
- const Cluster* const pNext = *k;
- assert(pNext);
- assert(pNext->m_index < 0);
-
- pos = pNext->GetPosition();
- assert(pos >= 0);
-
- if (pos < off_next)
- i = k + 1;
- else if (pos > off_next)
- j = k;
- else {
- pResult = pNext;
- return 0; // success
- }
- }
-
- assert(i == j);
-
- long long pos_;
- long len_;
-
- status = Cluster::HasBlockEntries(this, off_next, pos_, len_);
-
- if (status < 0) { // error or underflow
- pos = pos_;
- len = len_;
-
- return status;
- }
-
- if (status > 0) { // means "found at least one block entry"
- Cluster* const pNext = Cluster::Create(this,
- -1, // preloaded
- off_next);
- if (pNext == NULL)
- return -1;
-
- const ptrdiff_t idx_next = i - m_clusters; // insertion position
-
- if (!PreloadCluster(pNext, idx_next)) {
- delete pNext;
- return -1;
- }
- assert(m_clusters);
- assert(idx_next < m_clusterSize);
- assert(m_clusters[idx_next] == pNext);
-
- pResult = pNext;
- return 0; // success
- }
-
- // status == 0 means "no block entries found"
-
- if (cluster_size < 0) { // unknown size
- const long long payload_pos = pos; // absolute pos of cluster payload
-
- for (;;) { // determine cluster size
- if ((total >= 0) && (pos >= total))
- break;
-
- if ((segment_stop >= 0) && (pos >= segment_stop))
- break; // no more clusters
-
- // Read ID
-
- if ((pos + 1) > avail) {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
-
- long long result = GetUIntLength(m_pReader, pos, len);
-
- if (result < 0) // error
- return static_cast<long>(result);
-
- if (result > 0) // weird
- return E_BUFFER_NOT_FULL;
-
- if ((segment_stop >= 0) && ((pos + len) > segment_stop))
- return E_FILE_FORMAT_INVALID;
-
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
-
- const long long idpos = pos;
- const long long id = ReadID(m_pReader, idpos, len);
-
- if (id < 0) // error (or underflow)
- return static_cast<long>(id);
-
- // This is the distinguished set of ID's we use to determine
- // that we have exhausted the sub-element's inside the cluster
- // whose ID we parsed earlier.
-
- if (id == libwebm::kMkvCluster || id == libwebm::kMkvCues)
- break;
-
- pos += len; // consume ID (of sub-element)
-
- // Read Size
-
- if ((pos + 1) > avail) {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
-
- result = GetUIntLength(m_pReader, pos, len);
-
- if (result < 0) // error
- return static_cast<long>(result);
-
- if (result > 0) // weird
- return E_BUFFER_NOT_FULL;
-
- if ((segment_stop >= 0) && ((pos + len) > segment_stop))
- return E_FILE_FORMAT_INVALID;
-
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
-
- const long long size = ReadUInt(m_pReader, pos, len);
-
- if (size < 0) // error
- return static_cast<long>(size);
-
- pos += len; // consume size field of element
-
- // pos now points to start of sub-element's payload
-
- if (size == 0) // weird
- continue;
-
- const long long unknown_size = (1LL << (7 * len)) - 1;
-
- if (size == unknown_size)
- return E_FILE_FORMAT_INVALID; // not allowed for sub-elements
-
- if ((segment_stop >= 0) && ((pos + size) > segment_stop)) // weird
- return E_FILE_FORMAT_INVALID;
-
- pos += size; // consume payload of sub-element
- if (segment_stop >= 0 && pos > segment_stop)
- return E_FILE_FORMAT_INVALID;
- } // determine cluster size
-
- cluster_size = pos - payload_pos;
- assert(cluster_size >= 0); // TODO: handle cluster_size = 0
-
- pos = payload_pos; // reset and re-parse original cluster
- }
-
- pos += cluster_size; // consume payload
- if (segment_stop >= 0 && pos > segment_stop)
- return E_FILE_FORMAT_INVALID;
-
- return 2; // try to find a cluster that follows next
-}
-
-const Cluster* Segment::FindCluster(long long time_ns) const {
- if ((m_clusters == NULL) || (m_clusterCount <= 0))
- return &m_eos;
-
- {
- Cluster* const pCluster = m_clusters[0];
- assert(pCluster);
- assert(pCluster->m_index == 0);
-
- if (time_ns <= pCluster->GetTime())
- return pCluster;
- }
-
- // Binary search of cluster array
-
- long i = 0;
- long j = m_clusterCount;
-
- while (i < j) {
- // INVARIANT:
- //[0, i) <= time_ns
- //[i, j) ?
- //[j, m_clusterCount) > time_ns
-
- const long k = i + (j - i) / 2;
- assert(k < m_clusterCount);
-
- Cluster* const pCluster = m_clusters[k];
- assert(pCluster);
- assert(pCluster->m_index == k);
-
- const long long t = pCluster->GetTime();
-
- if (t <= time_ns)
- i = k + 1;
- else
- j = k;
-
- assert(i <= j);
- }
-
- assert(i == j);
- assert(i > 0);
- assert(i <= m_clusterCount);
-
- const long k = i - 1;
-
- Cluster* const pCluster = m_clusters[k];
- assert(pCluster);
- assert(pCluster->m_index == k);
- assert(pCluster->GetTime() <= time_ns);
-
- return pCluster;
-}
-
-const Tracks* Segment::GetTracks() const { return m_pTracks; }
-const SegmentInfo* Segment::GetInfo() const { return m_pInfo; }
-const Cues* Segment::GetCues() const { return m_pCues; }
-const Chapters* Segment::GetChapters() const { return m_pChapters; }
-const Tags* Segment::GetTags() const { return m_pTags; }
-const SeekHead* Segment::GetSeekHead() const { return m_pSeekHead; }
-
-long long Segment::GetDuration() const {
- assert(m_pInfo);
- return m_pInfo->GetDuration();
-}
-
-Chapters::Chapters(Segment* pSegment, long long payload_start,
- long long payload_size, long long element_start,
- long long element_size)
- : m_pSegment(pSegment),
- m_start(payload_start),
- m_size(payload_size),
- m_element_start(element_start),
- m_element_size(element_size),
- m_editions(NULL),
- m_editions_size(0),
- m_editions_count(0) {}
-
-Chapters::~Chapters() {
- while (m_editions_count > 0) {
- Edition& e = m_editions[--m_editions_count];
- e.Clear();
- }
- delete[] m_editions;
-}
-
-long Chapters::Parse() {
- IMkvReader* const pReader = m_pSegment->m_pReader;
-
- long long pos = m_start; // payload start
- const long long stop = pos + m_size; // payload stop
-
- while (pos < stop) {
- long long id, size;
-
- long status = ParseElementHeader(pReader, pos, stop, id, size);
-
- if (status < 0) // error
- return status;
-
- if (size == 0) // weird
- continue;
-
- if (id == libwebm::kMkvEditionEntry) {
- status = ParseEdition(pos, size);
-
- if (status < 0) // error
- return status;
- }
-
- pos += size;
- if (pos > stop)
- return E_FILE_FORMAT_INVALID;
- }
-
- if (pos != stop)
- return E_FILE_FORMAT_INVALID;
- return 0;
-}
-
-int Chapters::GetEditionCount() const { return m_editions_count; }
-
-const Chapters::Edition* Chapters::GetEdition(int idx) const {
- if (idx < 0)
- return NULL;
-
- if (idx >= m_editions_count)
- return NULL;
-
- return m_editions + idx;
-}
-
-bool Chapters::ExpandEditionsArray() {
- if (m_editions_size > m_editions_count)
- return true; // nothing else to do
-
- const int size = (m_editions_size == 0) ? 1 : 2 * m_editions_size;
-
- Edition* const editions = new (std::nothrow) Edition[size];
-
- if (editions == NULL)
- return false;
-
- for (int idx = 0; idx < m_editions_count; ++idx) {
- m_editions[idx].ShallowCopy(editions[idx]);
- }
-
- delete[] m_editions;
- m_editions = editions;
-
- m_editions_size = size;
- return true;
-}
-
-long Chapters::ParseEdition(long long pos, long long size) {
- if (!ExpandEditionsArray())
- return -1;
-
- Edition& e = m_editions[m_editions_count++];
- e.Init();
-
- return e.Parse(m_pSegment->m_pReader, pos, size);
-}
-
-Chapters::Edition::Edition() {}
-
-Chapters::Edition::~Edition() {}
-
-int Chapters::Edition::GetAtomCount() const { return m_atoms_count; }
-
-const Chapters::Atom* Chapters::Edition::GetAtom(int index) const {
- if (index < 0)
- return NULL;
-
- if (index >= m_atoms_count)
- return NULL;
-
- return m_atoms + index;
-}
-
-void Chapters::Edition::Init() {
- m_atoms = NULL;
- m_atoms_size = 0;
- m_atoms_count = 0;
-}
-
-void Chapters::Edition::ShallowCopy(Edition& rhs) const {
- rhs.m_atoms = m_atoms;
- rhs.m_atoms_size = m_atoms_size;
- rhs.m_atoms_count = m_atoms_count;
-}
-
-void Chapters::Edition::Clear() {
- while (m_atoms_count > 0) {
- Atom& a = m_atoms[--m_atoms_count];
- a.Clear();
- }
-
- delete[] m_atoms;
- m_atoms = NULL;
-
- m_atoms_size = 0;
-}
-
-long Chapters::Edition::Parse(IMkvReader* pReader, long long pos,
- long long size) {
- const long long stop = pos + size;
-
- while (pos < stop) {
- long long id, size;
-
- long status = ParseElementHeader(pReader, pos, stop, id, size);
-
- if (status < 0) // error
- return status;
-
- if (size == 0)
- continue;
-
- if (id == libwebm::kMkvChapterAtom) {
- status = ParseAtom(pReader, pos, size);
-
- if (status < 0) // error
- return status;
- }
-
- pos += size;
- if (pos > stop)
- return E_FILE_FORMAT_INVALID;
- }
-
- if (pos != stop)
- return E_FILE_FORMAT_INVALID;
- return 0;
-}
-
-long Chapters::Edition::ParseAtom(IMkvReader* pReader, long long pos,
- long long size) {
- if (!ExpandAtomsArray())
- return -1;
-
- Atom& a = m_atoms[m_atoms_count++];
- a.Init();
-
- return a.Parse(pReader, pos, size);
-}
-
-bool Chapters::Edition::ExpandAtomsArray() {
- if (m_atoms_size > m_atoms_count)
- return true; // nothing else to do
-
- const int size = (m_atoms_size == 0) ? 1 : 2 * m_atoms_size;
-
- Atom* const atoms = new (std::nothrow) Atom[size];
-
- if (atoms == NULL)
- return false;
-
- for (int idx = 0; idx < m_atoms_count; ++idx) {
- m_atoms[idx].ShallowCopy(atoms[idx]);
- }
-
- delete[] m_atoms;
- m_atoms = atoms;
-
- m_atoms_size = size;
- return true;
-}
-
-Chapters::Atom::Atom() {}
-
-Chapters::Atom::~Atom() {}
-
-unsigned long long Chapters::Atom::GetUID() const { return m_uid; }
-
-const char* Chapters::Atom::GetStringUID() const { return m_string_uid; }
-
-long long Chapters::Atom::GetStartTimecode() const { return m_start_timecode; }
-
-long long Chapters::Atom::GetStopTimecode() const { return m_stop_timecode; }
-
-long long Chapters::Atom::GetStartTime(const Chapters* pChapters) const {
- return GetTime(pChapters, m_start_timecode);
-}
-
-long long Chapters::Atom::GetStopTime(const Chapters* pChapters) const {
- return GetTime(pChapters, m_stop_timecode);
-}
-
-int Chapters::Atom::GetDisplayCount() const { return m_displays_count; }
-
-const Chapters::Display* Chapters::Atom::GetDisplay(int index) const {
- if (index < 0)
- return NULL;
-
- if (index >= m_displays_count)
- return NULL;
-
- return m_displays + index;
-}
-
-void Chapters::Atom::Init() {
- m_string_uid = NULL;
- m_uid = 0;
- m_start_timecode = -1;
- m_stop_timecode = -1;
-
- m_displays = NULL;
- m_displays_size = 0;
- m_displays_count = 0;
-}
-
-void Chapters::Atom::ShallowCopy(Atom& rhs) const {
- rhs.m_string_uid = m_string_uid;
- rhs.m_uid = m_uid;
- rhs.m_start_timecode = m_start_timecode;
- rhs.m_stop_timecode = m_stop_timecode;
-
- rhs.m_displays = m_displays;
- rhs.m_displays_size = m_displays_size;
- rhs.m_displays_count = m_displays_count;
-}
-
-void Chapters::Atom::Clear() {
- delete[] m_string_uid;
- m_string_uid = NULL;
-
- while (m_displays_count > 0) {
- Display& d = m_displays[--m_displays_count];
- d.Clear();
- }
-
- delete[] m_displays;
- m_displays = NULL;
-
- m_displays_size = 0;
-}
-
-long Chapters::Atom::Parse(IMkvReader* pReader, long long pos, long long size) {
- const long long stop = pos + size;
-
- while (pos < stop) {
- long long id, size;
-
- long status = ParseElementHeader(pReader, pos, stop, id, size);
-
- if (status < 0) // error
- return status;
-
- if (size == 0) // 0 length payload, skip.
- continue;
-
- if (id == libwebm::kMkvChapterDisplay) {
- status = ParseDisplay(pReader, pos, size);
-
- if (status < 0) // error
- return status;
- } else if (id == libwebm::kMkvChapterStringUID) {
- status = UnserializeString(pReader, pos, size, m_string_uid);
-
- if (status < 0) // error
- return status;
- } else if (id == libwebm::kMkvChapterUID) {
- long long val;
- status = UnserializeInt(pReader, pos, size, val);
-
- if (status < 0) // error
- return status;
-
- m_uid = static_cast<unsigned long long>(val);
- } else if (id == libwebm::kMkvChapterTimeStart) {
- const long long val = UnserializeUInt(pReader, pos, size);
-
- if (val < 0) // error
- return static_cast<long>(val);
-
- m_start_timecode = val;
- } else if (id == libwebm::kMkvChapterTimeEnd) {
- const long long val = UnserializeUInt(pReader, pos, size);
-
- if (val < 0) // error
- return static_cast<long>(val);
-
- m_stop_timecode = val;
- }
-
- pos += size;
- if (pos > stop)
- return E_FILE_FORMAT_INVALID;
- }
-
- if (pos != stop)
- return E_FILE_FORMAT_INVALID;
- return 0;
-}
-
-long long Chapters::Atom::GetTime(const Chapters* pChapters,
- long long timecode) {
- if (pChapters == NULL)
- return -1;
-
- Segment* const pSegment = pChapters->m_pSegment;
-
- if (pSegment == NULL) // weird
- return -1;
-
- const SegmentInfo* const pInfo = pSegment->GetInfo();
-
- if (pInfo == NULL)
- return -1;
-
- const long long timecode_scale = pInfo->GetTimeCodeScale();
-
- if (timecode_scale < 1) // weird
- return -1;
-
- if (timecode < 0)
- return -1;
-
- const long long result = timecode_scale * timecode;
-
- return result;
-}
-
-long Chapters::Atom::ParseDisplay(IMkvReader* pReader, long long pos,
- long long size) {
- if (!ExpandDisplaysArray())
- return -1;
-
- Display& d = m_displays[m_displays_count++];
- d.Init();
-
- return d.Parse(pReader, pos, size);
-}
-
-bool Chapters::Atom::ExpandDisplaysArray() {
- if (m_displays_size > m_displays_count)
- return true; // nothing else to do
-
- const int size = (m_displays_size == 0) ? 1 : 2 * m_displays_size;
-
- Display* const displays = new (std::nothrow) Display[size];
-
- if (displays == NULL)
- return false;
-
- for (int idx = 0; idx < m_displays_count; ++idx) {
- m_displays[idx].ShallowCopy(displays[idx]);
- }
-
- delete[] m_displays;
- m_displays = displays;
-
- m_displays_size = size;
- return true;
-}
-
-Chapters::Display::Display() {}
-
-Chapters::Display::~Display() {}
-
-const char* Chapters::Display::GetString() const { return m_string; }
-
-const char* Chapters::Display::GetLanguage() const { return m_language; }
-
-const char* Chapters::Display::GetCountry() const { return m_country; }
-
-void Chapters::Display::Init() {
- m_string = NULL;
- m_language = NULL;
- m_country = NULL;
-}
-
-void Chapters::Display::ShallowCopy(Display& rhs) const {
- rhs.m_string = m_string;
- rhs.m_language = m_language;
- rhs.m_country = m_country;
-}
-
-void Chapters::Display::Clear() {
- delete[] m_string;
- m_string = NULL;
-
- delete[] m_language;
- m_language = NULL;
-
- delete[] m_country;
- m_country = NULL;
-}
-
-long Chapters::Display::Parse(IMkvReader* pReader, long long pos,
- long long size) {
- const long long stop = pos + size;
-
- while (pos < stop) {
- long long id, size;
-
- long status = ParseElementHeader(pReader, pos, stop, id, size);
-
- if (status < 0) // error
- return status;
-
- if (size == 0) // No payload.
- continue;
-
- if (id == libwebm::kMkvChapString) {
- status = UnserializeString(pReader, pos, size, m_string);
-
- if (status)
- return status;
- } else if (id == libwebm::kMkvChapLanguage) {
- status = UnserializeString(pReader, pos, size, m_language);
-
- if (status)
- return status;
- } else if (id == libwebm::kMkvChapCountry) {
- status = UnserializeString(pReader, pos, size, m_country);
-
- if (status)
- return status;
- }
-
- pos += size;
- if (pos > stop)
- return E_FILE_FORMAT_INVALID;
- }
-
- if (pos != stop)
- return E_FILE_FORMAT_INVALID;
- return 0;
-}
-
-Tags::Tags(Segment* pSegment, long long payload_start, long long payload_size,
- long long element_start, long long element_size)
- : m_pSegment(pSegment),
- m_start(payload_start),
- m_size(payload_size),
- m_element_start(element_start),
- m_element_size(element_size),
- m_tags(NULL),
- m_tags_size(0),
- m_tags_count(0) {}
-
-Tags::~Tags() {
- while (m_tags_count > 0) {
- Tag& t = m_tags[--m_tags_count];
- t.Clear();
- }
- delete[] m_tags;
-}
-
-long Tags::Parse() {
- IMkvReader* const pReader = m_pSegment->m_pReader;
-
- long long pos = m_start; // payload start
- const long long stop = pos + m_size; // payload stop
-
- while (pos < stop) {
- long long id, size;
-
- long status = ParseElementHeader(pReader, pos, stop, id, size);
-
- if (status < 0)
- return status;
-
- if (size == 0) // 0 length tag, read another
- continue;
-
- if (id == libwebm::kMkvTag) {
- status = ParseTag(pos, size);
-
- if (status < 0)
- return status;
- }
-
- pos += size;
- if (pos > stop)
- return E_FILE_FORMAT_INVALID;
- }
-
- if (pos != stop)
- return E_FILE_FORMAT_INVALID;
-
- return 0;
-}
-
-int Tags::GetTagCount() const { return m_tags_count; }
-
-const Tags::Tag* Tags::GetTag(int idx) const {
- if (idx < 0)
- return NULL;
-
- if (idx >= m_tags_count)
- return NULL;
-
- return m_tags + idx;
-}
-
-bool Tags::ExpandTagsArray() {
- if (m_tags_size > m_tags_count)
- return true; // nothing else to do
-
- const int size = (m_tags_size == 0) ? 1 : 2 * m_tags_size;
-
- Tag* const tags = new (std::nothrow) Tag[size];
-
- if (tags == NULL)
- return false;
-
- for (int idx = 0; idx < m_tags_count; ++idx) {
- m_tags[idx].ShallowCopy(tags[idx]);
- }
-
- delete[] m_tags;
- m_tags = tags;
-
- m_tags_size = size;
- return true;
-}
-
-long Tags::ParseTag(long long pos, long long size) {
- if (!ExpandTagsArray())
- return -1;
-
- Tag& t = m_tags[m_tags_count++];
- t.Init();
-
- return t.Parse(m_pSegment->m_pReader, pos, size);
-}
-
-Tags::Tag::Tag() {}
-
-Tags::Tag::~Tag() {}
-
-int Tags::Tag::GetSimpleTagCount() const { return m_simple_tags_count; }
-
-const Tags::SimpleTag* Tags::Tag::GetSimpleTag(int index) const {
- if (index < 0)
- return NULL;
-
- if (index >= m_simple_tags_count)
- return NULL;
-
- return m_simple_tags + index;
-}
-
-void Tags::Tag::Init() {
- m_simple_tags = NULL;
- m_simple_tags_size = 0;
- m_simple_tags_count = 0;
-}
-
-void Tags::Tag::ShallowCopy(Tag& rhs) const {
- rhs.m_simple_tags = m_simple_tags;
- rhs.m_simple_tags_size = m_simple_tags_size;
- rhs.m_simple_tags_count = m_simple_tags_count;
-}
-
-void Tags::Tag::Clear() {
- while (m_simple_tags_count > 0) {
- SimpleTag& d = m_simple_tags[--m_simple_tags_count];
- d.Clear();
- }
-
- delete[] m_simple_tags;
- m_simple_tags = NULL;
-
- m_simple_tags_size = 0;
-}
-
-long Tags::Tag::Parse(IMkvReader* pReader, long long pos, long long size) {
- const long long stop = pos + size;
-
- while (pos < stop) {
- long long id, size;
-
- long status = ParseElementHeader(pReader, pos, stop, id, size);
-
- if (status < 0)
- return status;
-
- if (size == 0) // 0 length tag, read another
- continue;
-
- if (id == libwebm::kMkvSimpleTag) {
- status = ParseSimpleTag(pReader, pos, size);
-
- if (status < 0)
- return status;
- }
-
- pos += size;
- if (pos > stop)
- return E_FILE_FORMAT_INVALID;
- }
-
- if (pos != stop)
- return E_FILE_FORMAT_INVALID;
- return 0;
-}
-
-long Tags::Tag::ParseSimpleTag(IMkvReader* pReader, long long pos,
- long long size) {
- if (!ExpandSimpleTagsArray())
- return -1;
-
- SimpleTag& st = m_simple_tags[m_simple_tags_count++];
- st.Init();
-
- return st.Parse(pReader, pos, size);
-}
-
-bool Tags::Tag::ExpandSimpleTagsArray() {
- if (m_simple_tags_size > m_simple_tags_count)
- return true; // nothing else to do
-
- const int size = (m_simple_tags_size == 0) ? 1 : 2 * m_simple_tags_size;
-
- SimpleTag* const displays = new (std::nothrow) SimpleTag[size];
-
- if (displays == NULL)
- return false;
-
- for (int idx = 0; idx < m_simple_tags_count; ++idx) {
- m_simple_tags[idx].ShallowCopy(displays[idx]);
- }
-
- delete[] m_simple_tags;
- m_simple_tags = displays;
-
- m_simple_tags_size = size;
- return true;
-}
-
-Tags::SimpleTag::SimpleTag() {}
-
-Tags::SimpleTag::~SimpleTag() {}
-
-const char* Tags::SimpleTag::GetTagName() const { return m_tag_name; }
-
-const char* Tags::SimpleTag::GetTagString() const { return m_tag_string; }
-
-void Tags::SimpleTag::Init() {
- m_tag_name = NULL;
- m_tag_string = NULL;
-}
-
-void Tags::SimpleTag::ShallowCopy(SimpleTag& rhs) const {
- rhs.m_tag_name = m_tag_name;
- rhs.m_tag_string = m_tag_string;
-}
-
-void Tags::SimpleTag::Clear() {
- delete[] m_tag_name;
- m_tag_name = NULL;
-
- delete[] m_tag_string;
- m_tag_string = NULL;
-}
-
-long Tags::SimpleTag::Parse(IMkvReader* pReader, long long pos,
- long long size) {
- const long long stop = pos + size;
-
- while (pos < stop) {
- long long id, size;
-
- long status = ParseElementHeader(pReader, pos, stop, id, size);
-
- if (status < 0) // error
- return status;
-
- if (size == 0) // weird
- continue;
-
- if (id == libwebm::kMkvTagName) {
- status = UnserializeString(pReader, pos, size, m_tag_name);
-
- if (status)
- return status;
- } else if (id == libwebm::kMkvTagString) {
- status = UnserializeString(pReader, pos, size, m_tag_string);
-
- if (status)
- return status;
- }
-
- pos += size;
- if (pos > stop)
- return E_FILE_FORMAT_INVALID;
- }
-
- if (pos != stop)
- return E_FILE_FORMAT_INVALID;
- return 0;
-}
-
-SegmentInfo::SegmentInfo(Segment* pSegment, long long start, long long size_,
- long long element_start, long long element_size)
- : m_pSegment(pSegment),
- m_start(start),
- m_size(size_),
- m_element_start(element_start),
- m_element_size(element_size),
- m_pMuxingAppAsUTF8(NULL),
- m_pWritingAppAsUTF8(NULL),
- m_pTitleAsUTF8(NULL) {}
-
-SegmentInfo::~SegmentInfo() {
- delete[] m_pMuxingAppAsUTF8;
- m_pMuxingAppAsUTF8 = NULL;
-
- delete[] m_pWritingAppAsUTF8;
- m_pWritingAppAsUTF8 = NULL;
-
- delete[] m_pTitleAsUTF8;
- m_pTitleAsUTF8 = NULL;
-}
-
-long SegmentInfo::Parse() {
- assert(m_pMuxingAppAsUTF8 == NULL);
- assert(m_pWritingAppAsUTF8 == NULL);
- assert(m_pTitleAsUTF8 == NULL);
-
- IMkvReader* const pReader = m_pSegment->m_pReader;
-
- long long pos = m_start;
- const long long stop = m_start + m_size;
-
- m_timecodeScale = 1000000;
- m_duration = -1;
-
- while (pos < stop) {
- long long id, size;
-
- const long status = ParseElementHeader(pReader, pos, stop, id, size);
-
- if (status < 0) // error
- return status;
-
- if (id == libwebm::kMkvTimecodeScale) {
- m_timecodeScale = UnserializeUInt(pReader, pos, size);
-
- if (m_timecodeScale <= 0)
- return E_FILE_FORMAT_INVALID;
- } else if (id == libwebm::kMkvDuration) {
- const long status = UnserializeFloat(pReader, pos, size, m_duration);
-
- if (status < 0)
- return status;
-
- if (m_duration < 0)
- return E_FILE_FORMAT_INVALID;
- } else if (id == libwebm::kMkvMuxingApp) {
- const long status =
- UnserializeString(pReader, pos, size, m_pMuxingAppAsUTF8);
-
- if (status)
- return status;
- } else if (id == libwebm::kMkvWritingApp) {
- const long status =
- UnserializeString(pReader, pos, size, m_pWritingAppAsUTF8);
-
- if (status)
- return status;
- } else if (id == libwebm::kMkvTitle) {
- const long status = UnserializeString(pReader, pos, size, m_pTitleAsUTF8);
-
- if (status)
- return status;
- }
-
- pos += size;
-
- if (pos > stop)
- return E_FILE_FORMAT_INVALID;
- }
-
- const double rollover_check = m_duration * m_timecodeScale;
- if (rollover_check > static_cast<double>(LLONG_MAX))
- return E_FILE_FORMAT_INVALID;
-
- if (pos != stop)
- return E_FILE_FORMAT_INVALID;
-
- return 0;
-}
-
-long long SegmentInfo::GetTimeCodeScale() const { return m_timecodeScale; }
-
-long long SegmentInfo::GetDuration() const {
- if (m_duration < 0)
- return -1;
-
- assert(m_timecodeScale >= 1);
-
- const double dd = double(m_duration) * double(m_timecodeScale);
- const long long d = static_cast<long long>(dd);
-
- return d;
-}
-
-const char* SegmentInfo::GetMuxingAppAsUTF8() const {
- return m_pMuxingAppAsUTF8;
-}
-
-const char* SegmentInfo::GetWritingAppAsUTF8() const {
- return m_pWritingAppAsUTF8;
-}
-
-const char* SegmentInfo::GetTitleAsUTF8() const { return m_pTitleAsUTF8; }
-
-///////////////////////////////////////////////////////////////
-// ContentEncoding element
-ContentEncoding::ContentCompression::ContentCompression()
- : algo(0), settings(NULL), settings_len(0) {}
-
-ContentEncoding::ContentCompression::~ContentCompression() {
- delete[] settings;
-}
-
-ContentEncoding::ContentEncryption::ContentEncryption()
- : algo(0),
- key_id(NULL),
- key_id_len(0),
- signature(NULL),
- signature_len(0),
- sig_key_id(NULL),
- sig_key_id_len(0),
- sig_algo(0),
- sig_hash_algo(0) {}
-
-ContentEncoding::ContentEncryption::~ContentEncryption() {
- delete[] key_id;
- delete[] signature;
- delete[] sig_key_id;
-}
-
-ContentEncoding::ContentEncoding()
- : compression_entries_(NULL),
- compression_entries_end_(NULL),
- encryption_entries_(NULL),
- encryption_entries_end_(NULL),
- encoding_order_(0),
- encoding_scope_(1),
- encoding_type_(0) {}
-
-ContentEncoding::~ContentEncoding() {
- ContentCompression** comp_i = compression_entries_;
- ContentCompression** const comp_j = compression_entries_end_;
-
- while (comp_i != comp_j) {
- ContentCompression* const comp = *comp_i++;
- delete comp;
- }
-
- delete[] compression_entries_;
-
- ContentEncryption** enc_i = encryption_entries_;
- ContentEncryption** const enc_j = encryption_entries_end_;
-
- while (enc_i != enc_j) {
- ContentEncryption* const enc = *enc_i++;
- delete enc;
- }
-
- delete[] encryption_entries_;
-}
-
-const ContentEncoding::ContentCompression*
-ContentEncoding::GetCompressionByIndex(unsigned long idx) const {
- const ptrdiff_t count = compression_entries_end_ - compression_entries_;
- assert(count >= 0);
-
- if (idx >= static_cast<unsigned long>(count))
- return NULL;
-
- return compression_entries_[idx];
-}
-
-unsigned long ContentEncoding::GetCompressionCount() const {
- const ptrdiff_t count = compression_entries_end_ - compression_entries_;
- assert(count >= 0);
-
- return static_cast<unsigned long>(count);
-}
-
-const ContentEncoding::ContentEncryption* ContentEncoding::GetEncryptionByIndex(
- unsigned long idx) const {
- const ptrdiff_t count = encryption_entries_end_ - encryption_entries_;
- assert(count >= 0);
-
- if (idx >= static_cast<unsigned long>(count))
- return NULL;
-
- return encryption_entries_[idx];
-}
-
-unsigned long ContentEncoding::GetEncryptionCount() const {
- const ptrdiff_t count = encryption_entries_end_ - encryption_entries_;
- assert(count >= 0);
-
- return static_cast<unsigned long>(count);
-}
-
-long ContentEncoding::ParseContentEncAESSettingsEntry(
- long long start, long long size, IMkvReader* pReader,
- ContentEncAESSettings* aes) {
- assert(pReader);
- assert(aes);
-
- long long pos = start;
- const long long stop = start + size;
-
- while (pos < stop) {
- long long id, size;
- const long status = ParseElementHeader(pReader, pos, stop, id, size);
- if (status < 0) // error
- return status;
-
- if (id == libwebm::kMkvAESSettingsCipherMode) {
- aes->cipher_mode = UnserializeUInt(pReader, pos, size);
- if (aes->cipher_mode != 1)
- return E_FILE_FORMAT_INVALID;
- }
-
- pos += size; // consume payload
- if (pos > stop)
- return E_FILE_FORMAT_INVALID;
- }
-
- return 0;
-}
-
-long ContentEncoding::ParseContentEncodingEntry(long long start, long long size,
- IMkvReader* pReader) {
- assert(pReader);
-
- long long pos = start;
- const long long stop = start + size;
-
- // Count ContentCompression and ContentEncryption elements.
- int compression_count = 0;
- int encryption_count = 0;
-
- while (pos < stop) {
- long long id, size;
- const long status = ParseElementHeader(pReader, pos, stop, id, size);
- if (status < 0) // error
- return status;
-
- if (id == libwebm::kMkvContentCompression)
- ++compression_count;
-
- if (id == libwebm::kMkvContentEncryption)
- ++encryption_count;
-
- pos += size; // consume payload
- if (pos > stop)
- return E_FILE_FORMAT_INVALID;
- }
-
- if (compression_count <= 0 && encryption_count <= 0)
- return -1;
-
- if (compression_count > 0) {
- compression_entries_ =
- new (std::nothrow) ContentCompression*[compression_count];
- if (!compression_entries_)
- return -1;
- compression_entries_end_ = compression_entries_;
- }
-
- if (encryption_count > 0) {
- encryption_entries_ =
- new (std::nothrow) ContentEncryption*[encryption_count];
- if (!encryption_entries_) {
- delete[] compression_entries_;
- return -1;
- }
- encryption_entries_end_ = encryption_entries_;
- }
-
- pos = start;
- while (pos < stop) {
- long long id, size;
- long status = ParseElementHeader(pReader, pos, stop, id, size);
- if (status < 0) // error
- return status;
-
- if (id == libwebm::kMkvContentEncodingOrder) {
- encoding_order_ = UnserializeUInt(pReader, pos, size);
- } else if (id == libwebm::kMkvContentEncodingScope) {
- encoding_scope_ = UnserializeUInt(pReader, pos, size);
- if (encoding_scope_ < 1)
- return -1;
- } else if (id == libwebm::kMkvContentEncodingType) {
- encoding_type_ = UnserializeUInt(pReader, pos, size);
- } else if (id == libwebm::kMkvContentCompression) {
- ContentCompression* const compression =
- new (std::nothrow) ContentCompression();
- if (!compression)
- return -1;
-
- status = ParseCompressionEntry(pos, size, pReader, compression);
- if (status) {
- delete compression;
- return status;
- }
- *compression_entries_end_++ = compression;
- } else if (id == libwebm::kMkvContentEncryption) {
- ContentEncryption* const encryption =
- new (std::nothrow) ContentEncryption();
- if (!encryption)
- return -1;
-
- status = ParseEncryptionEntry(pos, size, pReader, encryption);
- if (status) {
- delete encryption;
- return status;
- }
- *encryption_entries_end_++ = encryption;
- }
-
- pos += size; // consume payload
- if (pos > stop)
- return E_FILE_FORMAT_INVALID;
- }
-
- if (pos != stop)
- return E_FILE_FORMAT_INVALID;
- return 0;
-}
-
-long ContentEncoding::ParseCompressionEntry(long long start, long long size,
- IMkvReader* pReader,
- ContentCompression* compression) {
- assert(pReader);
- assert(compression);
-
- long long pos = start;
- const long long stop = start + size;
-
- bool valid = false;
-
- while (pos < stop) {
- long long id, size;
- const long status = ParseElementHeader(pReader, pos, stop, id, size);
- if (status < 0) // error
- return status;
-
- if (id == libwebm::kMkvContentCompAlgo) {
- long long algo = UnserializeUInt(pReader, pos, size);
- if (algo < 0)
- return E_FILE_FORMAT_INVALID;
- compression->algo = algo;
- valid = true;
- } else if (id == libwebm::kMkvContentCompSettings) {
- if (size <= 0)
- return E_FILE_FORMAT_INVALID;
-
- const size_t buflen = static_cast<size_t>(size);
- unsigned char* buf = SafeArrayAlloc<unsigned char>(1, buflen);
- if (buf == NULL)
- return -1;
-
- const int read_status =
- pReader->Read(pos, static_cast<long>(buflen), buf);
- if (read_status) {
- delete[] buf;
- return status;
- }
-
- compression->settings = buf;
- compression->settings_len = buflen;
- }
-
- pos += size; // consume payload
- if (pos > stop)
- return E_FILE_FORMAT_INVALID;
- }
-
- // ContentCompAlgo is mandatory
- if (!valid)
- return E_FILE_FORMAT_INVALID;
-
- return 0;
-}
-
-long ContentEncoding::ParseEncryptionEntry(long long start, long long size,
- IMkvReader* pReader,
- ContentEncryption* encryption) {
- assert(pReader);
- assert(encryption);
-
- long long pos = start;
- const long long stop = start + size;
-
- while (pos < stop) {
- long long id, size;
- const long status = ParseElementHeader(pReader, pos, stop, id, size);
- if (status < 0) // error
- return status;
-
- if (id == libwebm::kMkvContentEncAlgo) {
- encryption->algo = UnserializeUInt(pReader, pos, size);
- if (encryption->algo != 5)
- return E_FILE_FORMAT_INVALID;
- } else if (id == libwebm::kMkvContentEncKeyID) {
- delete[] encryption->key_id;
- encryption->key_id = NULL;
- encryption->key_id_len = 0;
-
- if (size <= 0)
- return E_FILE_FORMAT_INVALID;
-
- const size_t buflen = static_cast<size_t>(size);
- unsigned char* buf = SafeArrayAlloc<unsigned char>(1, buflen);
- if (buf == NULL)
- return -1;
-
- const int read_status =
- pReader->Read(pos, static_cast<long>(buflen), buf);
- if (read_status) {
- delete[] buf;
- return status;
- }
-
- encryption->key_id = buf;
- encryption->key_id_len = buflen;
- } else if (id == libwebm::kMkvContentSignature) {
- delete[] encryption->signature;
- encryption->signature = NULL;
- encryption->signature_len = 0;
-
- if (size <= 0)
- return E_FILE_FORMAT_INVALID;
-
- const size_t buflen = static_cast<size_t>(size);
- unsigned char* buf = SafeArrayAlloc<unsigned char>(1, buflen);
- if (buf == NULL)
- return -1;
-
- const int read_status =
- pReader->Read(pos, static_cast<long>(buflen), buf);
- if (read_status) {
- delete[] buf;
- return status;
- }
-
- encryption->signature = buf;
- encryption->signature_len = buflen;
- } else if (id == libwebm::kMkvContentSigKeyID) {
- delete[] encryption->sig_key_id;
- encryption->sig_key_id = NULL;
- encryption->sig_key_id_len = 0;
-
- if (size <= 0)
- return E_FILE_FORMAT_INVALID;
-
- const size_t buflen = static_cast<size_t>(size);
- unsigned char* buf = SafeArrayAlloc<unsigned char>(1, buflen);
- if (buf == NULL)
- return -1;
-
- const int read_status =
- pReader->Read(pos, static_cast<long>(buflen), buf);
- if (read_status) {
- delete[] buf;
- return status;
- }
-
- encryption->sig_key_id = buf;
- encryption->sig_key_id_len = buflen;
- } else if (id == libwebm::kMkvContentSigAlgo) {
- encryption->sig_algo = UnserializeUInt(pReader, pos, size);
- } else if (id == libwebm::kMkvContentSigHashAlgo) {
- encryption->sig_hash_algo = UnserializeUInt(pReader, pos, size);
- } else if (id == libwebm::kMkvContentEncAESSettings) {
- const long status = ParseContentEncAESSettingsEntry(
- pos, size, pReader, &encryption->aes_settings);
- if (status)
- return status;
- }
-
- pos += size; // consume payload
- if (pos > stop)
- return E_FILE_FORMAT_INVALID;
- }
-
- return 0;
-}
-
-Track::Track(Segment* pSegment, long long element_start, long long element_size)
- : m_pSegment(pSegment),
- m_element_start(element_start),
- m_element_size(element_size),
- content_encoding_entries_(NULL),
- content_encoding_entries_end_(NULL) {}
-
-Track::~Track() {
- Info& info = const_cast<Info&>(m_info);
- info.Clear();
-
- ContentEncoding** i = content_encoding_entries_;
- ContentEncoding** const j = content_encoding_entries_end_;
-
- while (i != j) {
- ContentEncoding* const encoding = *i++;
- delete encoding;
- }
-
- delete[] content_encoding_entries_;
-}
-
-long Track::Create(Segment* pSegment, const Info& info, long long element_start,
- long long element_size, Track*& pResult) {
- if (pResult)
- return -1;
-
- Track* const pTrack =
- new (std::nothrow) Track(pSegment, element_start, element_size);
-
- if (pTrack == NULL)
- return -1; // generic error
-
- const int status = info.Copy(pTrack->m_info);
-
- if (status) { // error
- delete pTrack;
- return status;
- }
-
- pResult = pTrack;
- return 0; // success
-}
-
-Track::Info::Info()
- : uid(0),
- defaultDuration(0),
- codecDelay(0),
- seekPreRoll(0),
- nameAsUTF8(NULL),
- language(NULL),
- codecId(NULL),
- codecNameAsUTF8(NULL),
- codecPrivate(NULL),
- codecPrivateSize(0),
- lacing(false) {}
-
-Track::Info::~Info() { Clear(); }
-
-void Track::Info::Clear() {
- delete[] nameAsUTF8;
- nameAsUTF8 = NULL;
-
- delete[] language;
- language = NULL;
-
- delete[] codecId;
- codecId = NULL;
-
- delete[] codecPrivate;
- codecPrivate = NULL;
- codecPrivateSize = 0;
-
- delete[] codecNameAsUTF8;
- codecNameAsUTF8 = NULL;
-}
-
-int Track::Info::CopyStr(char* Info::*str, Info& dst_) const {
- if (str == static_cast<char * Info::*>(NULL))
- return -1;
-
- char*& dst = dst_.*str;
-
- if (dst) // should be NULL already
- return -1;
-
- const char* const src = this->*str;
-
- if (src == NULL)
- return 0;
-
- const size_t len = strlen(src);
-
- dst = SafeArrayAlloc<char>(1, len + 1);
-
- if (dst == NULL)
- return -1;
-
- strcpy(dst, src);
-
- return 0;
-}
-
-int Track::Info::Copy(Info& dst) const {
- if (&dst == this)
- return 0;
-
- dst.type = type;
- dst.number = number;
- dst.defaultDuration = defaultDuration;
- dst.codecDelay = codecDelay;
- dst.seekPreRoll = seekPreRoll;
- dst.uid = uid;
- dst.lacing = lacing;
- dst.settings = settings;
-
- // We now copy the string member variables from src to dst.
- // This involves memory allocation so in principle the operation
- // can fail (indeed, that's why we have Info::Copy), so we must
- // report this to the caller. An error return from this function
- // therefore implies that the copy was only partially successful.
-
- if (int status = CopyStr(&Info::nameAsUTF8, dst))
- return status;
-
- if (int status = CopyStr(&Info::language, dst))
- return status;
-
- if (int status = CopyStr(&Info::codecId, dst))
- return status;
-
- if (int status = CopyStr(&Info::codecNameAsUTF8, dst))
- return status;
-
- if (codecPrivateSize > 0) {
- if (codecPrivate == NULL)
- return -1;
-
- if (dst.codecPrivate)
- return -1;
-
- if (dst.codecPrivateSize != 0)
- return -1;
-
- dst.codecPrivate = SafeArrayAlloc<unsigned char>(1, codecPrivateSize);
-
- if (dst.codecPrivate == NULL)
- return -1;
-
- memcpy(dst.codecPrivate, codecPrivate, codecPrivateSize);
- dst.codecPrivateSize = codecPrivateSize;
- }
-
- return 0;
-}
-
-const BlockEntry* Track::GetEOS() const { return &m_eos; }
-
-long Track::GetType() const { return m_info.type; }
-
-long Track::GetNumber() const { return m_info.number; }
-
-unsigned long long Track::GetUid() const { return m_info.uid; }
-
-const char* Track::GetNameAsUTF8() const { return m_info.nameAsUTF8; }
-
-const char* Track::GetLanguage() const { return m_info.language; }
-
-const char* Track::GetCodecNameAsUTF8() const { return m_info.codecNameAsUTF8; }
-
-const char* Track::GetCodecId() const { return m_info.codecId; }
-
-const unsigned char* Track::GetCodecPrivate(size_t& size) const {
- size = m_info.codecPrivateSize;
- return m_info.codecPrivate;
-}
-
-bool Track::GetLacing() const { return m_info.lacing; }
-
-unsigned long long Track::GetDefaultDuration() const {
- return m_info.defaultDuration;
-}
-
-unsigned long long Track::GetCodecDelay() const { return m_info.codecDelay; }
-
-unsigned long long Track::GetSeekPreRoll() const { return m_info.seekPreRoll; }
-
-long Track::GetFirst(const BlockEntry*& pBlockEntry) const {
- const Cluster* pCluster = m_pSegment->GetFirst();
-
- for (int i = 0;;) {
- if (pCluster == NULL) {
- pBlockEntry = GetEOS();
- return 1;
- }
-
- if (pCluster->EOS()) {
- if (m_pSegment->DoneParsing()) {
- pBlockEntry = GetEOS();
- return 1;
- }
-
- pBlockEntry = 0;
- return E_BUFFER_NOT_FULL;
- }
-
- long status = pCluster->GetFirst(pBlockEntry);
-
- if (status < 0) // error
- return status;
-
- if (pBlockEntry == 0) { // empty cluster
- pCluster = m_pSegment->GetNext(pCluster);
- continue;
- }
-
- for (;;) {
- const Block* const pBlock = pBlockEntry->GetBlock();
- assert(pBlock);
-
- const long long tn = pBlock->GetTrackNumber();
-
- if ((tn == m_info.number) && VetEntry(pBlockEntry))
- return 0;
-
- const BlockEntry* pNextEntry;
-
- status = pCluster->GetNext(pBlockEntry, pNextEntry);
-
- if (status < 0) // error
- return status;
-
- if (pNextEntry == 0)
- break;
-
- pBlockEntry = pNextEntry;
- }
-
- ++i;
-
- if (i >= 100)
- break;
-
- pCluster = m_pSegment->GetNext(pCluster);
- }
-
- // NOTE: if we get here, it means that we didn't find a block with
- // a matching track number. We interpret that as an error (which
- // might be too conservative).
-
- pBlockEntry = GetEOS(); // so we can return a non-NULL value
- return 1;
-}
-
-long Track::GetNext(const BlockEntry* pCurrEntry,
- const BlockEntry*& pNextEntry) const {
- assert(pCurrEntry);
- assert(!pCurrEntry->EOS()); //?
-
- const Block* const pCurrBlock = pCurrEntry->GetBlock();
- assert(pCurrBlock && pCurrBlock->GetTrackNumber() == m_info.number);
- if (!pCurrBlock || pCurrBlock->GetTrackNumber() != m_info.number)
- return -1;
-
- const Cluster* pCluster = pCurrEntry->GetCluster();
- assert(pCluster);
- assert(!pCluster->EOS());
-
- long status = pCluster->GetNext(pCurrEntry, pNextEntry);
-
- if (status < 0) // error
- return status;
-
- for (int i = 0;;) {
- while (pNextEntry) {
- const Block* const pNextBlock = pNextEntry->GetBlock();
- assert(pNextBlock);
-
- if (pNextBlock->GetTrackNumber() == m_info.number)
- return 0;
-
- pCurrEntry = pNextEntry;
-
- status = pCluster->GetNext(pCurrEntry, pNextEntry);
-
- if (status < 0) // error
- return status;
- }
-
- pCluster = m_pSegment->GetNext(pCluster);
-
- if (pCluster == NULL) {
- pNextEntry = GetEOS();
- return 1;
- }
-
- if (pCluster->EOS()) {
- if (m_pSegment->DoneParsing()) {
- pNextEntry = GetEOS();
- return 1;
- }
-
- // TODO: there is a potential O(n^2) problem here: we tell the
- // caller to (pre)load another cluster, which he does, but then he
- // calls GetNext again, which repeats the same search. This is
- // a pathological case, since the only way it can happen is if
- // there exists a long sequence of clusters none of which contain a
- // block from this track. One way around this problem is for the
- // caller to be smarter when he loads another cluster: don't call
- // us back until you have a cluster that contains a block from this
- // track. (Of course, that's not cheap either, since our caller
- // would have to scan the each cluster as it's loaded, so that
- // would just push back the problem.)
-
- pNextEntry = NULL;
- return E_BUFFER_NOT_FULL;
- }
-
- status = pCluster->GetFirst(pNextEntry);
-
- if (status < 0) // error
- return status;
-
- if (pNextEntry == NULL) // empty cluster
- continue;
-
- ++i;
-
- if (i >= 100)
- break;
- }
-
- // NOTE: if we get here, it means that we didn't find a block with
- // a matching track number after lots of searching, so we give
- // up trying.
-
- pNextEntry = GetEOS(); // so we can return a non-NULL value
- return 1;
-}
-
-bool Track::VetEntry(const BlockEntry* pBlockEntry) const {
- assert(pBlockEntry);
- const Block* const pBlock = pBlockEntry->GetBlock();
- assert(pBlock);
- assert(pBlock->GetTrackNumber() == m_info.number);
- if (!pBlock || pBlock->GetTrackNumber() != m_info.number)
- return false;
-
- // This function is used during a seek to determine whether the
- // frame is a valid seek target. This default function simply
- // returns true, which means all frames are valid seek targets.
- // It gets overridden by the VideoTrack class, because only video
- // keyframes can be used as seek target.
-
- return true;
-}
-
-long Track::Seek(long long time_ns, const BlockEntry*& pResult) const {
- const long status = GetFirst(pResult);
-
- if (status < 0) // buffer underflow, etc
- return status;
-
- assert(pResult);
-
- if (pResult->EOS())
- return 0;
-
- const Cluster* pCluster = pResult->GetCluster();
- assert(pCluster);
- assert(pCluster->GetIndex() >= 0);
-
- if (time_ns <= pResult->GetBlock()->GetTime(pCluster))
- return 0;
-
- Cluster** const clusters = m_pSegment->m_clusters;
- assert(clusters);
-
- const long count = m_pSegment->GetCount(); // loaded only, not preloaded
- assert(count > 0);
-
- Cluster** const i = clusters + pCluster->GetIndex();
- assert(i);
- assert(*i == pCluster);
- assert(pCluster->GetTime() <= time_ns);
-
- Cluster** const j = clusters + count;
-
- Cluster** lo = i;
- Cluster** hi = j;
-
- while (lo < hi) {
- // INVARIANT:
- //[i, lo) <= time_ns
- //[lo, hi) ?
- //[hi, j) > time_ns
-
- Cluster** const mid = lo + (hi - lo) / 2;
- assert(mid < hi);
-
- pCluster = *mid;
- assert(pCluster);
- assert(pCluster->GetIndex() >= 0);
- assert(pCluster->GetIndex() == long(mid - m_pSegment->m_clusters));
-
- const long long t = pCluster->GetTime();
-
- if (t <= time_ns)
- lo = mid + 1;
- else
- hi = mid;
-
- assert(lo <= hi);
- }
-
- assert(lo == hi);
- assert(lo > i);
- assert(lo <= j);
-
- while (lo > i) {
- pCluster = *--lo;
- assert(pCluster);
- assert(pCluster->GetTime() <= time_ns);
-
- pResult = pCluster->GetEntry(this);
-
- if ((pResult != 0) && !pResult->EOS())
- return 0;
-
- // landed on empty cluster (no entries)
- }
-
- pResult = GetEOS(); // weird
- return 0;
-}
-
-const ContentEncoding* Track::GetContentEncodingByIndex(
- unsigned long idx) const {
- const ptrdiff_t count =
- content_encoding_entries_end_ - content_encoding_entries_;
- assert(count >= 0);
-
- if (idx >= static_cast<unsigned long>(count))
- return NULL;
-
- return content_encoding_entries_[idx];
-}
-
-unsigned long Track::GetContentEncodingCount() const {
- const ptrdiff_t count =
- content_encoding_entries_end_ - content_encoding_entries_;
- assert(count >= 0);
-
- return static_cast<unsigned long>(count);
-}
-
-long Track::ParseContentEncodingsEntry(long long start, long long size) {
- IMkvReader* const pReader = m_pSegment->m_pReader;
- assert(pReader);
-
- long long pos = start;
- const long long stop = start + size;
-
- // Count ContentEncoding elements.
- int count = 0;
- while (pos < stop) {
- long long id, size;
- const long status = ParseElementHeader(pReader, pos, stop, id, size);
- if (status < 0) // error
- return status;
-
- // pos now designates start of element
- if (id == libwebm::kMkvContentEncoding)
- ++count;
-
- pos += size; // consume payload
- if (pos > stop)
- return E_FILE_FORMAT_INVALID;
- }
-
- if (count <= 0)
- return -1;
-
- content_encoding_entries_ = new (std::nothrow) ContentEncoding*[count];
- if (!content_encoding_entries_)
- return -1;
-
- content_encoding_entries_end_ = content_encoding_entries_;
-
- pos = start;
- while (pos < stop) {
- long long id, size;
- long status = ParseElementHeader(pReader, pos, stop, id, size);
- if (status < 0) // error
- return status;
-
- // pos now designates start of element
- if (id == libwebm::kMkvContentEncoding) {
- ContentEncoding* const content_encoding =
- new (std::nothrow) ContentEncoding();
- if (!content_encoding)
- return -1;
-
- status = content_encoding->ParseContentEncodingEntry(pos, size, pReader);
- if (status) {
- delete content_encoding;
- return status;
- }
-
- *content_encoding_entries_end_++ = content_encoding;
- }
-
- pos += size; // consume payload
- if (pos > stop)
- return E_FILE_FORMAT_INVALID;
- }
-
- if (pos != stop)
- return E_FILE_FORMAT_INVALID;
-
- return 0;
-}
-
-Track::EOSBlock::EOSBlock() : BlockEntry(NULL, LONG_MIN) {}
-
-BlockEntry::Kind Track::EOSBlock::GetKind() const { return kBlockEOS; }
-
-const Block* Track::EOSBlock::GetBlock() const { return NULL; }
-
-bool PrimaryChromaticity::Parse(IMkvReader* reader, long long read_pos,
- long long value_size, bool is_x,
- PrimaryChromaticity** chromaticity) {
- if (!reader)
- return false;
-
- if (!*chromaticity)
- *chromaticity = new PrimaryChromaticity();
-
- if (!*chromaticity)
- return false;
-
- PrimaryChromaticity* pc = *chromaticity;
- float* value = is_x ? &pc->x : &pc->y;
-
- double parser_value = 0;
- const long long parse_status =
- UnserializeFloat(reader, read_pos, value_size, parser_value);
-
- // Valid range is [0, 1]. Make sure the double is representable as a float
- // before casting.
- if (parse_status < 0 || parser_value < 0.0 || parser_value > 1.0 ||
- (parser_value > 0.0 && parser_value < FLT_MIN))
- return false;
-
- *value = static_cast<float>(parser_value);
-
- return true;
-}
-
-bool MasteringMetadata::Parse(IMkvReader* reader, long long mm_start,
- long long mm_size, MasteringMetadata** mm) {
- if (!reader || *mm)
- return false;
-
- std::unique_ptr<MasteringMetadata> mm_ptr(new MasteringMetadata());
- if (!mm_ptr.get())
- return false;
-
- const long long mm_end = mm_start + mm_size;
- long long read_pos = mm_start;
-
- while (read_pos < mm_end) {
- long long child_id = 0;
- long long child_size = 0;
-
- const long long status =
- ParseElementHeader(reader, read_pos, mm_end, child_id, child_size);
- if (status < 0)
- return false;
-
- if (child_id == libwebm::kMkvLuminanceMax) {
- double value = 0;
- const long long value_parse_status =
- UnserializeFloat(reader, read_pos, child_size, value);
- if (value < -FLT_MAX || value > FLT_MAX ||
- (value > 0.0 && value < FLT_MIN)) {
- return false;
- }
- mm_ptr->luminance_max = static_cast<float>(value);
- if (value_parse_status < 0 || mm_ptr->luminance_max < 0.0 ||
- mm_ptr->luminance_max > 9999.99) {
- return false;
- }
- } else if (child_id == libwebm::kMkvLuminanceMin) {
- double value = 0;
- const long long value_parse_status =
- UnserializeFloat(reader, read_pos, child_size, value);
- if (value < -FLT_MAX || value > FLT_MAX ||
- (value > 0.0 && value < FLT_MIN)) {
- return false;
- }
- mm_ptr->luminance_min = static_cast<float>(value);
- if (value_parse_status < 0 || mm_ptr->luminance_min < 0.0 ||
- mm_ptr->luminance_min > 999.9999) {
- return false;
- }
- } else {
- bool is_x = false;
- PrimaryChromaticity** chromaticity;
- switch (child_id) {
- case libwebm::kMkvPrimaryRChromaticityX:
- case libwebm::kMkvPrimaryRChromaticityY:
- is_x = child_id == libwebm::kMkvPrimaryRChromaticityX;
- chromaticity = &mm_ptr->r;
- break;
- case libwebm::kMkvPrimaryGChromaticityX:
- case libwebm::kMkvPrimaryGChromaticityY:
- is_x = child_id == libwebm::kMkvPrimaryGChromaticityX;
- chromaticity = &mm_ptr->g;
- break;
- case libwebm::kMkvPrimaryBChromaticityX:
- case libwebm::kMkvPrimaryBChromaticityY:
- is_x = child_id == libwebm::kMkvPrimaryBChromaticityX;
- chromaticity = &mm_ptr->b;
- break;
- case libwebm::kMkvWhitePointChromaticityX:
- case libwebm::kMkvWhitePointChromaticityY:
- is_x = child_id == libwebm::kMkvWhitePointChromaticityX;
- chromaticity = &mm_ptr->white_point;
- break;
- default:
- return false;
- }
- const bool value_parse_status = PrimaryChromaticity::Parse(
- reader, read_pos, child_size, is_x, chromaticity);
- if (!value_parse_status)
- return false;
- }
-
- read_pos += child_size;
- if (read_pos > mm_end)
- return false;
- }
-
- *mm = mm_ptr.release();
- return true;
-}
-
-bool Colour::Parse(IMkvReader* reader, long long colour_start,
- long long colour_size, Colour** colour) {
- if (!reader || *colour)
- return false;
-
- std::unique_ptr<Colour> colour_ptr(new Colour());
- if (!colour_ptr.get())
- return false;
-
- const long long colour_end = colour_start + colour_size;
- long long read_pos = colour_start;
-
- while (read_pos < colour_end) {
- long long child_id = 0;
- long long child_size = 0;
-
- const long status =
- ParseElementHeader(reader, read_pos, colour_end, child_id, child_size);
- if (status < 0)
- return false;
-
- if (child_id == libwebm::kMkvMatrixCoefficients) {
- colour_ptr->matrix_coefficients =
- UnserializeUInt(reader, read_pos, child_size);
- if (colour_ptr->matrix_coefficients < 0)
- return false;
- } else if (child_id == libwebm::kMkvBitsPerChannel) {
- colour_ptr->bits_per_channel =
- UnserializeUInt(reader, read_pos, child_size);
- if (colour_ptr->bits_per_channel < 0)
- return false;
- } else if (child_id == libwebm::kMkvChromaSubsamplingHorz) {
- colour_ptr->chroma_subsampling_horz =
- UnserializeUInt(reader, read_pos, child_size);
- if (colour_ptr->chroma_subsampling_horz < 0)
- return false;
- } else if (child_id == libwebm::kMkvChromaSubsamplingVert) {
- colour_ptr->chroma_subsampling_vert =
- UnserializeUInt(reader, read_pos, child_size);
- if (colour_ptr->chroma_subsampling_vert < 0)
- return false;
- } else if (child_id == libwebm::kMkvCbSubsamplingHorz) {
- colour_ptr->cb_subsampling_horz =
- UnserializeUInt(reader, read_pos, child_size);
- if (colour_ptr->cb_subsampling_horz < 0)
- return false;
- } else if (child_id == libwebm::kMkvCbSubsamplingVert) {
- colour_ptr->cb_subsampling_vert =
- UnserializeUInt(reader, read_pos, child_size);
- if (colour_ptr->cb_subsampling_vert < 0)
- return false;
- } else if (child_id == libwebm::kMkvChromaSitingHorz) {
- colour_ptr->chroma_siting_horz =
- UnserializeUInt(reader, read_pos, child_size);
- if (colour_ptr->chroma_siting_horz < 0)
- return false;
- } else if (child_id == libwebm::kMkvChromaSitingVert) {
- colour_ptr->chroma_siting_vert =
- UnserializeUInt(reader, read_pos, child_size);
- if (colour_ptr->chroma_siting_vert < 0)
- return false;
- } else if (child_id == libwebm::kMkvRange) {
- colour_ptr->range = UnserializeUInt(reader, read_pos, child_size);
- if (colour_ptr->range < 0)
- return false;
- } else if (child_id == libwebm::kMkvTransferCharacteristics) {
- colour_ptr->transfer_characteristics =
- UnserializeUInt(reader, read_pos, child_size);
- if (colour_ptr->transfer_characteristics < 0)
- return false;
- } else if (child_id == libwebm::kMkvPrimaries) {
- colour_ptr->primaries = UnserializeUInt(reader, read_pos, child_size);
- if (colour_ptr->primaries < 0)
- return false;
- } else if (child_id == libwebm::kMkvMaxCLL) {
- colour_ptr->max_cll = UnserializeUInt(reader, read_pos, child_size);
- if (colour_ptr->max_cll < 0)
- return false;
- } else if (child_id == libwebm::kMkvMaxFALL) {
- colour_ptr->max_fall = UnserializeUInt(reader, read_pos, child_size);
- if (colour_ptr->max_fall < 0)
- return false;
- } else if (child_id == libwebm::kMkvMasteringMetadata) {
- if (!MasteringMetadata::Parse(reader, read_pos, child_size,
- &colour_ptr->mastering_metadata))
- return false;
- } else {
- return false;
- }
-
- read_pos += child_size;
- if (read_pos > colour_end)
- return false;
- }
- *colour = colour_ptr.release();
- return true;
-}
-
-bool Projection::Parse(IMkvReader* reader, long long start, long long size,
- Projection** projection) {
- if (!reader || *projection)
- return false;
-
- std::unique_ptr<Projection> projection_ptr(new Projection());
- if (!projection_ptr.get())
- return false;
-
- const long long end = start + size;
- long long read_pos = start;
-
- while (read_pos < end) {
- long long child_id = 0;
- long long child_size = 0;
-
- const long long status =
- ParseElementHeader(reader, read_pos, end, child_id, child_size);
- if (status < 0)
- return false;
-
- if (child_id == libwebm::kMkvProjectionType) {
- long long projection_type = kTypeNotPresent;
- projection_type = UnserializeUInt(reader, read_pos, child_size);
- if (projection_type < 0)
- return false;
-
- projection_ptr->type = static_cast<ProjectionType>(projection_type);
- } else if (child_id == libwebm::kMkvProjectionPrivate) {
- unsigned char* data = SafeArrayAlloc<unsigned char>(1, child_size);
-
- if (data == NULL)
- return false;
-
- const int status =
- reader->Read(read_pos, static_cast<long>(child_size), data);
-
- if (status) {
- delete[] data;
- return false;
- }
-
- projection_ptr->private_data = data;
- projection_ptr->private_data_length = static_cast<size_t>(child_size);
- } else {
- double value = 0;
- const long long value_parse_status =
- UnserializeFloat(reader, read_pos, child_size, value);
- // Make sure value is representable as a float before casting.
- if (value_parse_status < 0 || value < -FLT_MAX || value > FLT_MAX ||
- (value > 0.0 && value < FLT_MIN)) {
- return false;
- }
-
- switch (child_id) {
- case libwebm::kMkvProjectionPoseYaw:
- projection_ptr->pose_yaw = static_cast<float>(value);
- break;
- case libwebm::kMkvProjectionPosePitch:
- projection_ptr->pose_pitch = static_cast<float>(value);
- break;
- case libwebm::kMkvProjectionPoseRoll:
- projection_ptr->pose_roll = static_cast<float>(value);
- break;
- default:
- return false;
- }
- }
-
- read_pos += child_size;
- if (read_pos > end)
- return false;
- }
-
- *projection = projection_ptr.release();
- return true;
-}
-
-VideoTrack::VideoTrack(Segment* pSegment, long long element_start,
- long long element_size)
- : Track(pSegment, element_start, element_size),
- m_colour(NULL),
- m_projection(NULL) {}
-
-VideoTrack::~VideoTrack() {
- delete m_colour;
- delete m_projection;
-}
-
-long VideoTrack::Parse(Segment* pSegment, const Info& info,
- long long element_start, long long element_size,
- VideoTrack*& pResult) {
- if (pResult)
- return -1;
-
- if (info.type != Track::kVideo)
- return -1;
-
- long long width = 0;
- long long height = 0;
- long long display_width = 0;
- long long display_height = 0;
- long long display_unit = 0;
- long long stereo_mode = 0;
-
- double rate = 0.0;
-
- IMkvReader* const pReader = pSegment->m_pReader;
-
- const Settings& s = info.settings;
- assert(s.start >= 0);
- assert(s.size >= 0);
-
- long long pos = s.start;
- assert(pos >= 0);
-
- const long long stop = pos + s.size;
-
- Colour* colour = NULL;
- Projection* projection = NULL;
-
- while (pos < stop) {
- long long id, size;
-
- const long status = ParseElementHeader(pReader, pos, stop, id, size);
-
- if (status < 0) // error
- return status;
-
- if (id == libwebm::kMkvPixelWidth) {
- width = UnserializeUInt(pReader, pos, size);
-
- if (width <= 0)
- return E_FILE_FORMAT_INVALID;
- } else if (id == libwebm::kMkvPixelHeight) {
- height = UnserializeUInt(pReader, pos, size);
-
- if (height <= 0)
- return E_FILE_FORMAT_INVALID;
- } else if (id == libwebm::kMkvDisplayWidth) {
- display_width = UnserializeUInt(pReader, pos, size);
-
- if (display_width <= 0)
- return E_FILE_FORMAT_INVALID;
- } else if (id == libwebm::kMkvDisplayHeight) {
- display_height = UnserializeUInt(pReader, pos, size);
-
- if (display_height <= 0)
- return E_FILE_FORMAT_INVALID;
- } else if (id == libwebm::kMkvDisplayUnit) {
- display_unit = UnserializeUInt(pReader, pos, size);
-
- if (display_unit < 0)
- return E_FILE_FORMAT_INVALID;
- } else if (id == libwebm::kMkvStereoMode) {
- stereo_mode = UnserializeUInt(pReader, pos, size);
-
- if (stereo_mode < 0)
- return E_FILE_FORMAT_INVALID;
- } else if (id == libwebm::kMkvFrameRate) {
- const long status = UnserializeFloat(pReader, pos, size, rate);
-
- if (status < 0)
- return status;
-
- if (rate <= 0)
- return E_FILE_FORMAT_INVALID;
- } else if (id == libwebm::kMkvColour) {
- if (!Colour::Parse(pReader, pos, size, &colour))
- return E_FILE_FORMAT_INVALID;
- } else if (id == libwebm::kMkvProjection) {
- if (!Projection::Parse(pReader, pos, size, &projection))
- return E_FILE_FORMAT_INVALID;
- }
-
- pos += size; // consume payload
- if (pos > stop)
- return E_FILE_FORMAT_INVALID;
- }
-
- if (pos != stop)
- return E_FILE_FORMAT_INVALID;
-
- VideoTrack* const pTrack =
- new (std::nothrow) VideoTrack(pSegment, element_start, element_size);
-
- if (pTrack == NULL)
- return -1; // generic error
-
- const int status = info.Copy(pTrack->m_info);
-
- if (status) { // error
- delete pTrack;
- return status;
- }
-
- pTrack->m_width = width;
- pTrack->m_height = height;
- pTrack->m_display_width = display_width;
- pTrack->m_display_height = display_height;
- pTrack->m_display_unit = display_unit;
- pTrack->m_stereo_mode = stereo_mode;
- pTrack->m_rate = rate;
- pTrack->m_colour = colour;
- pTrack->m_projection = projection;
-
- pResult = pTrack;
- return 0; // success
-}
-
-bool VideoTrack::VetEntry(const BlockEntry* pBlockEntry) const {
- return Track::VetEntry(pBlockEntry) && pBlockEntry->GetBlock()->IsKey();
-}
-
-long VideoTrack::Seek(long long time_ns, const BlockEntry*& pResult) const {
- const long status = GetFirst(pResult);
-
- if (status < 0) // buffer underflow, etc
- return status;
-
- assert(pResult);
-
- if (pResult->EOS())
- return 0;
-
- const Cluster* pCluster = pResult->GetCluster();
- assert(pCluster);
- assert(pCluster->GetIndex() >= 0);
-
- if (time_ns <= pResult->GetBlock()->GetTime(pCluster))
- return 0;
-
- Cluster** const clusters = m_pSegment->m_clusters;
- assert(clusters);
-
- const long count = m_pSegment->GetCount(); // loaded only, not pre-loaded
- assert(count > 0);
-
- Cluster** const i = clusters + pCluster->GetIndex();
- assert(i);
- assert(*i == pCluster);
- assert(pCluster->GetTime() <= time_ns);
-
- Cluster** const j = clusters + count;
-
- Cluster** lo = i;
- Cluster** hi = j;
-
- while (lo < hi) {
- // INVARIANT:
- //[i, lo) <= time_ns
- //[lo, hi) ?
- //[hi, j) > time_ns
-
- Cluster** const mid = lo + (hi - lo) / 2;
- assert(mid < hi);
-
- pCluster = *mid;
- assert(pCluster);
- assert(pCluster->GetIndex() >= 0);
- assert(pCluster->GetIndex() == long(mid - m_pSegment->m_clusters));
-
- const long long t = pCluster->GetTime();
-
- if (t <= time_ns)
- lo = mid + 1;
- else
- hi = mid;
-
- assert(lo <= hi);
- }
-
- assert(lo == hi);
- assert(lo > i);
- assert(lo <= j);
-
- pCluster = *--lo;
- assert(pCluster);
- assert(pCluster->GetTime() <= time_ns);
-
- pResult = pCluster->GetEntry(this, time_ns);
-
- if ((pResult != 0) && !pResult->EOS()) // found a keyframe
- return 0;
-
- while (lo != i) {
- pCluster = *--lo;
- assert(pCluster);
- assert(pCluster->GetTime() <= time_ns);
-
- pResult = pCluster->GetEntry(this, time_ns);
-
- if ((pResult != 0) && !pResult->EOS())
- return 0;
- }
-
- // weird: we're on the first cluster, but no keyframe found
- // should never happen but we must return something anyway
-
- pResult = GetEOS();
- return 0;
-}
-
-Colour* VideoTrack::GetColour() const { return m_colour; }
-
-Projection* VideoTrack::GetProjection() const { return m_projection; }
-
-long long VideoTrack::GetWidth() const { return m_width; }
-
-long long VideoTrack::GetHeight() const { return m_height; }
-
-long long VideoTrack::GetDisplayWidth() const {
- return m_display_width > 0 ? m_display_width : GetWidth();
-}
-
-long long VideoTrack::GetDisplayHeight() const {
- return m_display_height > 0 ? m_display_height : GetHeight();
-}
-
-long long VideoTrack::GetDisplayUnit() const { return m_display_unit; }
-
-long long VideoTrack::GetStereoMode() const { return m_stereo_mode; }
-
-double VideoTrack::GetFrameRate() const { return m_rate; }
-
-AudioTrack::AudioTrack(Segment* pSegment, long long element_start,
- long long element_size)
- : Track(pSegment, element_start, element_size) {}
-
-long AudioTrack::Parse(Segment* pSegment, const Info& info,
- long long element_start, long long element_size,
- AudioTrack*& pResult) {
- if (pResult)
- return -1;
-
- if (info.type != Track::kAudio)
- return -1;
-
- IMkvReader* const pReader = pSegment->m_pReader;
-
- const Settings& s = info.settings;
- assert(s.start >= 0);
- assert(s.size >= 0);
-
- long long pos = s.start;
- assert(pos >= 0);
-
- const long long stop = pos + s.size;
-
- double rate = 8000.0; // MKV default
- long long channels = 1;
- long long bit_depth = 0;
-
- while (pos < stop) {
- long long id, size;
-
- long status = ParseElementHeader(pReader, pos, stop, id, size);
-
- if (status < 0) // error
- return status;
-
- if (id == libwebm::kMkvSamplingFrequency) {
- status = UnserializeFloat(pReader, pos, size, rate);
-
- if (status < 0)
- return status;
-
- if (rate <= 0)
- return E_FILE_FORMAT_INVALID;
- } else if (id == libwebm::kMkvChannels) {
- channels = UnserializeUInt(pReader, pos, size);
-
- if (channels <= 0)
- return E_FILE_FORMAT_INVALID;
- } else if (id == libwebm::kMkvBitDepth) {
- bit_depth = UnserializeUInt(pReader, pos, size);
-
- if (bit_depth <= 0)
- return E_FILE_FORMAT_INVALID;
- }
-
- pos += size; // consume payload
- if (pos > stop)
- return E_FILE_FORMAT_INVALID;
- }
-
- if (pos != stop)
- return E_FILE_FORMAT_INVALID;
-
- AudioTrack* const pTrack =
- new (std::nothrow) AudioTrack(pSegment, element_start, element_size);
-
- if (pTrack == NULL)
- return -1; // generic error
-
- const int status = info.Copy(pTrack->m_info);
-
- if (status) {
- delete pTrack;
- return status;
- }
-
- pTrack->m_rate = rate;
- pTrack->m_channels = channels;
- pTrack->m_bitDepth = bit_depth;
-
- pResult = pTrack;
- return 0; // success
-}
-
-double AudioTrack::GetSamplingRate() const { return m_rate; }
-
-long long AudioTrack::GetChannels() const { return m_channels; }
-
-long long AudioTrack::GetBitDepth() const { return m_bitDepth; }
-
-Tracks::Tracks(Segment* pSegment, long long start, long long size_,
- long long element_start, long long element_size)
- : m_pSegment(pSegment),
- m_start(start),
- m_size(size_),
- m_element_start(element_start),
- m_element_size(element_size),
- m_trackEntries(NULL),
- m_trackEntriesEnd(NULL) {}
-
-long Tracks::Parse() {
- assert(m_trackEntries == NULL);
- assert(m_trackEntriesEnd == NULL);
-
- const long long stop = m_start + m_size;
- IMkvReader* const pReader = m_pSegment->m_pReader;
-
- int count = 0;
- long long pos = m_start;
-
- while (pos < stop) {
- long long id, size;
-
- const long status = ParseElementHeader(pReader, pos, stop, id, size);
-
- if (status < 0) // error
- return status;
-
- if (size == 0) // weird
- continue;
-
- if (id == libwebm::kMkvTrackEntry)
- ++count;
-
- pos += size; // consume payload
- if (pos > stop)
- return E_FILE_FORMAT_INVALID;
- }
-
- if (pos != stop)
- return E_FILE_FORMAT_INVALID;
-
- if (count <= 0)
- return 0; // success
-
- m_trackEntries = new (std::nothrow) Track*[count];
-
- if (m_trackEntries == NULL)
- return -1;
-
- m_trackEntriesEnd = m_trackEntries;
-
- pos = m_start;
-
- while (pos < stop) {
- const long long element_start = pos;
-
- long long id, payload_size;
-
- const long status =
- ParseElementHeader(pReader, pos, stop, id, payload_size);
-
- if (status < 0) // error
- return status;
-
- if (payload_size == 0) // weird
- continue;
-
- const long long payload_stop = pos + payload_size;
- assert(payload_stop <= stop); // checked in ParseElement
-
- const long long element_size = payload_stop - element_start;
-
- if (id == libwebm::kMkvTrackEntry) {
- Track*& pTrack = *m_trackEntriesEnd;
- pTrack = NULL;
-
- const long status = ParseTrackEntry(pos, payload_size, element_start,
- element_size, pTrack);
- if (status)
- return status;
-
- if (pTrack)
- ++m_trackEntriesEnd;
- }
-
- pos = payload_stop;
- if (pos > stop)
- return E_FILE_FORMAT_INVALID;
- }
-
- if (pos != stop)
- return E_FILE_FORMAT_INVALID;
-
- return 0; // success
-}
-
-unsigned long Tracks::GetTracksCount() const {
- const ptrdiff_t result = m_trackEntriesEnd - m_trackEntries;
- assert(result >= 0);
-
- return static_cast<unsigned long>(result);
-}
-
-long Tracks::ParseTrackEntry(long long track_start, long long track_size,
- long long element_start, long long element_size,
- Track*& pResult) const {
- if (pResult)
- return -1;
-
- IMkvReader* const pReader = m_pSegment->m_pReader;
-
- long long pos = track_start;
- const long long track_stop = track_start + track_size;
-
- Track::Info info;
-
- info.type = 0;
- info.number = 0;
- info.uid = 0;
- info.defaultDuration = 0;
-
- Track::Settings v;
- v.start = -1;
- v.size = -1;
-
- Track::Settings a;
- a.start = -1;
- a.size = -1;
-
- Track::Settings e; // content_encodings_settings;
- e.start = -1;
- e.size = -1;
-
- long long lacing = 1; // default is true
-
- while (pos < track_stop) {
- long long id, size;
-
- const long status = ParseElementHeader(pReader, pos, track_stop, id, size);
-
- if (status < 0) // error
- return status;
-
- if (size < 0)
- return E_FILE_FORMAT_INVALID;
-
- const long long start = pos;
-
- if (id == libwebm::kMkvVideo) {
- v.start = start;
- v.size = size;
- } else if (id == libwebm::kMkvAudio) {
- a.start = start;
- a.size = size;
- } else if (id == libwebm::kMkvContentEncodings) {
- e.start = start;
- e.size = size;
- } else if (id == libwebm::kMkvTrackUID) {
- if (size > 8)
- return E_FILE_FORMAT_INVALID;
-
- info.uid = 0;
-
- long long pos_ = start;
- const long long pos_end = start + size;
-
- while (pos_ != pos_end) {
- unsigned char b;
-
- const int status = pReader->Read(pos_, 1, &b);
-
- if (status)
- return status;
-
- info.uid <<= 8;
- info.uid |= b;
-
- ++pos_;
- }
- } else if (id == libwebm::kMkvTrackNumber) {
- const long long num = UnserializeUInt(pReader, pos, size);
-
- if ((num <= 0) || (num > 127))
- return E_FILE_FORMAT_INVALID;
-
- info.number = static_cast<long>(num);
- } else if (id == libwebm::kMkvTrackType) {
- const long long type = UnserializeUInt(pReader, pos, size);
-
- if ((type <= 0) || (type > 254))
- return E_FILE_FORMAT_INVALID;
-
- info.type = static_cast<long>(type);
- } else if (id == libwebm::kMkvName) {
- const long status =
- UnserializeString(pReader, pos, size, info.nameAsUTF8);
-
- if (status)
- return status;
- } else if (id == libwebm::kMkvLanguage) {
- const long status = UnserializeString(pReader, pos, size, info.language);
-
- if (status)
- return status;
- } else if (id == libwebm::kMkvDefaultDuration) {
- const long long duration = UnserializeUInt(pReader, pos, size);
-
- if (duration < 0)
- return E_FILE_FORMAT_INVALID;
-
- info.defaultDuration = static_cast<unsigned long long>(duration);
- } else if (id == libwebm::kMkvCodecID) {
- const long status = UnserializeString(pReader, pos, size, info.codecId);
-
- if (status)
- return status;
- } else if (id == libwebm::kMkvFlagLacing) {
- lacing = UnserializeUInt(pReader, pos, size);
-
- if ((lacing < 0) || (lacing > 1))
- return E_FILE_FORMAT_INVALID;
- } else if (id == libwebm::kMkvCodecPrivate) {
- delete[] info.codecPrivate;
- info.codecPrivate = NULL;
- info.codecPrivateSize = 0;
-
- const size_t buflen = static_cast<size_t>(size);
-
- if (buflen) {
- unsigned char* buf = SafeArrayAlloc<unsigned char>(1, buflen);
-
- if (buf == NULL)
- return -1;
-
- const int status = pReader->Read(pos, static_cast<long>(buflen), buf);
-
- if (status) {
- delete[] buf;
- return status;
- }
-
- info.codecPrivate = buf;
- info.codecPrivateSize = buflen;
- }
- } else if (id == libwebm::kMkvCodecName) {
- const long status =
- UnserializeString(pReader, pos, size, info.codecNameAsUTF8);
-
- if (status)
- return status;
- } else if (id == libwebm::kMkvCodecDelay) {
- info.codecDelay = UnserializeUInt(pReader, pos, size);
- } else if (id == libwebm::kMkvSeekPreRoll) {
- info.seekPreRoll = UnserializeUInt(pReader, pos, size);
- }
-
- pos += size; // consume payload
- if (pos > track_stop)
- return E_FILE_FORMAT_INVALID;
- }
-
- if (pos != track_stop)
- return E_FILE_FORMAT_INVALID;
-
- if (info.number <= 0) // not specified
- return E_FILE_FORMAT_INVALID;
-
- if (GetTrackByNumber(info.number))
- return E_FILE_FORMAT_INVALID;
-
- if (info.type <= 0) // not specified
- return E_FILE_FORMAT_INVALID;
-
- info.lacing = (lacing > 0) ? true : false;
-
- if (info.type == Track::kVideo) {
- if (v.start < 0)
- return E_FILE_FORMAT_INVALID;
-
- if (a.start >= 0)
- return E_FILE_FORMAT_INVALID;
-
- info.settings = v;
-
- VideoTrack* pTrack = NULL;
-
- const long status = VideoTrack::Parse(m_pSegment, info, element_start,
- element_size, pTrack);
-
- if (status)
- return status;
-
- pResult = pTrack;
- assert(pResult);
-
- if (e.start >= 0)
- pResult->ParseContentEncodingsEntry(e.start, e.size);
- } else if (info.type == Track::kAudio) {
- if (a.start < 0)
- return E_FILE_FORMAT_INVALID;
-
- if (v.start >= 0)
- return E_FILE_FORMAT_INVALID;
-
- info.settings = a;
-
- AudioTrack* pTrack = NULL;
-
- const long status = AudioTrack::Parse(m_pSegment, info, element_start,
- element_size, pTrack);
-
- if (status)
- return status;
-
- pResult = pTrack;
- assert(pResult);
-
- if (e.start >= 0)
- pResult->ParseContentEncodingsEntry(e.start, e.size);
- } else {
- // neither video nor audio - probably metadata or subtitles
-
- if (a.start >= 0)
- return E_FILE_FORMAT_INVALID;
-
- if (v.start >= 0)
- return E_FILE_FORMAT_INVALID;
-
- if (info.type == Track::kMetadata && e.start >= 0)
- return E_FILE_FORMAT_INVALID;
-
- info.settings.start = -1;
- info.settings.size = 0;
-
- Track* pTrack = NULL;
-
- const long status =
- Track::Create(m_pSegment, info, element_start, element_size, pTrack);
-
- if (status)
- return status;
-
- pResult = pTrack;
- assert(pResult);
- }
-
- return 0; // success
-}
-
-Tracks::~Tracks() {
- Track** i = m_trackEntries;
- Track** const j = m_trackEntriesEnd;
-
- while (i != j) {
- Track* const pTrack = *i++;
- delete pTrack;
- }
-
- delete[] m_trackEntries;
-}
-
-const Track* Tracks::GetTrackByNumber(long tn) const {
- if (tn < 0)
- return NULL;
-
- Track** i = m_trackEntries;
- Track** const j = m_trackEntriesEnd;
-
- while (i != j) {
- Track* const pTrack = *i++;
-
- if (pTrack == NULL)
- continue;
-
- if (tn == pTrack->GetNumber())
- return pTrack;
- }
-
- return NULL; // not found
-}
-
-const Track* Tracks::GetTrackByIndex(unsigned long idx) const {
- const ptrdiff_t count = m_trackEntriesEnd - m_trackEntries;
-
- if (idx >= static_cast<unsigned long>(count))
- return NULL;
-
- return m_trackEntries[idx];
-}
-
-long Cluster::Load(long long& pos, long& len) const {
- if (m_pSegment == NULL)
- return E_PARSE_FAILED;
-
- if (m_timecode >= 0) // at least partially loaded
- return 0;
-
- if (m_pos != m_element_start || m_element_size >= 0)
- return E_PARSE_FAILED;
-
- IMkvReader* const pReader = m_pSegment->m_pReader;
- long long total, avail;
- const int status = pReader->Length(&total, &avail);
-
- if (status < 0) // error
- return status;
-
- if (total >= 0 && (avail > total || m_pos > total))
- return E_FILE_FORMAT_INVALID;
-
- pos = m_pos;
-
- long long cluster_size = -1;
-
- if ((pos + 1) > avail) {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
-
- long long result = GetUIntLength(pReader, pos, len);
-
- if (result < 0) // error or underflow
- return static_cast<long>(result);
-
- if (result > 0)
- return E_BUFFER_NOT_FULL;
-
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
-
- const long long id_ = ReadID(pReader, pos, len);
-
- if (id_ < 0) // error
- return static_cast<long>(id_);
-
- if (id_ != libwebm::kMkvCluster)
- return E_FILE_FORMAT_INVALID;
-
- pos += len; // consume id
-
- // read cluster size
-
- if ((pos + 1) > avail) {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
-
- result = GetUIntLength(pReader, pos, len);
-
- if (result < 0) // error
- return static_cast<long>(result);
-
- if (result > 0)
- return E_BUFFER_NOT_FULL;
-
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
-
- const long long size = ReadUInt(pReader, pos, len);
-
- if (size < 0) // error
- return static_cast<long>(cluster_size);
-
- if (size == 0)
- return E_FILE_FORMAT_INVALID;
-
- pos += len; // consume length of size of element
-
- const long long unknown_size = (1LL << (7 * len)) - 1;
-
- if (size != unknown_size)
- cluster_size = size;
-
- // pos points to start of payload
- long long timecode = -1;
- long long new_pos = -1;
- bool bBlock = false;
-
- long long cluster_stop = (cluster_size < 0) ? -1 : pos + cluster_size;
-
- for (;;) {
- if ((cluster_stop >= 0) && (pos >= cluster_stop))
- break;
-
- // Parse ID
-
- if ((pos + 1) > avail) {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
-
- long long result = GetUIntLength(pReader, pos, len);
-
- if (result < 0) // error
- return static_cast<long>(result);
-
- if (result > 0)
- return E_BUFFER_NOT_FULL;
-
- if ((cluster_stop >= 0) && ((pos + len) > cluster_stop))
- return E_FILE_FORMAT_INVALID;
-
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
-
- const long long id = ReadID(pReader, pos, len);
-
- if (id < 0) // error
- return static_cast<long>(id);
-
- if (id == 0)
- return E_FILE_FORMAT_INVALID;
-
- // This is the distinguished set of ID's we use to determine
- // that we have exhausted the sub-element's inside the cluster
- // whose ID we parsed earlier.
-
- if (id == libwebm::kMkvCluster)
- break;
-
- if (id == libwebm::kMkvCues)
- break;
-
- pos += len; // consume ID field
-
- // Parse Size
-
- if ((pos + 1) > avail) {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
-
- result = GetUIntLength(pReader, pos, len);
-
- if (result < 0) // error
- return static_cast<long>(result);
-
- if (result > 0)
- return E_BUFFER_NOT_FULL;
-
- if ((cluster_stop >= 0) && ((pos + len) > cluster_stop))
- return E_FILE_FORMAT_INVALID;
-
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
-
- const long long size = ReadUInt(pReader, pos, len);
-
- if (size < 0) // error
- return static_cast<long>(size);
-
- const long long unknown_size = (1LL << (7 * len)) - 1;
-
- if (size == unknown_size)
- return E_FILE_FORMAT_INVALID;
-
- pos += len; // consume size field
-
- if ((cluster_stop >= 0) && (pos > cluster_stop))
- return E_FILE_FORMAT_INVALID;
-
- // pos now points to start of payload
-
- if (size == 0)
- continue;
-
- if ((cluster_stop >= 0) && ((pos + size) > cluster_stop))
- return E_FILE_FORMAT_INVALID;
-
- if (id == libwebm::kMkvTimecode) {
- len = static_cast<long>(size);
-
- if ((pos + size) > avail)
- return E_BUFFER_NOT_FULL;
-
- timecode = UnserializeUInt(pReader, pos, size);
-
- if (timecode < 0) // error (or underflow)
- return static_cast<long>(timecode);
-
- new_pos = pos + size;
-
- if (bBlock)
- break;
- } else if (id == libwebm::kMkvBlockGroup) {
- bBlock = true;
- break;
- } else if (id == libwebm::kMkvSimpleBlock) {
- bBlock = true;
- break;
- }
-
- pos += size; // consume payload
- if (cluster_stop >= 0 && pos > cluster_stop)
- return E_FILE_FORMAT_INVALID;
- }
-
- if (cluster_stop >= 0 && pos > cluster_stop)
- return E_FILE_FORMAT_INVALID;
-
- if (timecode < 0) // no timecode found
- return E_FILE_FORMAT_INVALID;
-
- if (!bBlock)
- return E_FILE_FORMAT_INVALID;
-
- m_pos = new_pos; // designates position just beyond timecode payload
- m_timecode = timecode; // m_timecode >= 0 means we're partially loaded
-
- if (cluster_size >= 0)
- m_element_size = cluster_stop - m_element_start;
-
- return 0;
-}
-
-long Cluster::Parse(long long& pos, long& len) const {
- long status = Load(pos, len);
-
- if (status < 0)
- return status;
-
- if (m_pos < m_element_start || m_timecode < 0)
- return E_PARSE_FAILED;
-
- const long long cluster_stop =
- (m_element_size < 0) ? -1 : m_element_start + m_element_size;
-
- if ((cluster_stop >= 0) && (m_pos >= cluster_stop))
- return 1; // nothing else to do
-
- IMkvReader* const pReader = m_pSegment->m_pReader;
-
- long long total, avail;
-
- status = pReader->Length(&total, &avail);
-
- if (status < 0) // error
- return status;
-
- if (total >= 0 && avail > total)
- return E_FILE_FORMAT_INVALID;
-
- pos = m_pos;
-
- for (;;) {
- if ((cluster_stop >= 0) && (pos >= cluster_stop))
- break;
-
- if ((total >= 0) && (pos >= total)) {
- if (m_element_size < 0)
- m_element_size = pos - m_element_start;
-
- break;
- }
-
- // Parse ID
-
- if ((pos + 1) > avail) {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
-
- long long result = GetUIntLength(pReader, pos, len);
-
- if (result < 0) // error
- return static_cast<long>(result);
-
- if (result > 0)
- return E_BUFFER_NOT_FULL;
-
- if ((cluster_stop >= 0) && ((pos + len) > cluster_stop))
- return E_FILE_FORMAT_INVALID;
-
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
-
- const long long id = ReadID(pReader, pos, len);
-
- if (id < 0)
- return E_FILE_FORMAT_INVALID;
-
- // This is the distinguished set of ID's we use to determine
- // that we have exhausted the sub-element's inside the cluster
- // whose ID we parsed earlier.
-
- if ((id == libwebm::kMkvCluster) || (id == libwebm::kMkvCues)) {
- if (m_element_size < 0)
- m_element_size = pos - m_element_start;
-
- break;
- }
-
- pos += len; // consume ID field
-
- // Parse Size
-
- if ((pos + 1) > avail) {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
-
- result = GetUIntLength(pReader, pos, len);
-
- if (result < 0) // error
- return static_cast<long>(result);
-
- if (result > 0)
- return E_BUFFER_NOT_FULL;
-
- if ((cluster_stop >= 0) && ((pos + len) > cluster_stop))
- return E_FILE_FORMAT_INVALID;
-
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
-
- const long long size = ReadUInt(pReader, pos, len);
-
- if (size < 0) // error
- return static_cast<long>(size);
-
- const long long unknown_size = (1LL << (7 * len)) - 1;
-
- if (size == unknown_size)
- return E_FILE_FORMAT_INVALID;
-
- pos += len; // consume size field
-
- if ((cluster_stop >= 0) && (pos > cluster_stop))
- return E_FILE_FORMAT_INVALID;
-
- // pos now points to start of payload
-
- if (size == 0)
- continue;
-
- // const long long block_start = pos;
- const long long block_stop = pos + size;
-
- if (cluster_stop >= 0) {
- if (block_stop > cluster_stop) {
- if (id == libwebm::kMkvBlockGroup || id == libwebm::kMkvSimpleBlock) {
- return E_FILE_FORMAT_INVALID;
- }
-
- pos = cluster_stop;
- break;
- }
- } else if ((total >= 0) && (block_stop > total)) {
- m_element_size = total - m_element_start;
- pos = total;
- break;
- } else if (block_stop > avail) {
- len = static_cast<long>(size);
- return E_BUFFER_NOT_FULL;
- }
-
- Cluster* const this_ = const_cast<Cluster*>(this);
-
- if (id == libwebm::kMkvBlockGroup)
- return this_->ParseBlockGroup(size, pos, len);
-
- if (id == libwebm::kMkvSimpleBlock)
- return this_->ParseSimpleBlock(size, pos, len);
-
- pos += size; // consume payload
- if (cluster_stop >= 0 && pos > cluster_stop)
- return E_FILE_FORMAT_INVALID;
- }
-
- if (m_element_size < 1)
- return E_FILE_FORMAT_INVALID;
-
- m_pos = pos;
- if (cluster_stop >= 0 && m_pos > cluster_stop)
- return E_FILE_FORMAT_INVALID;
-
- if (m_entries_count > 0) {
- const long idx = m_entries_count - 1;
-
- const BlockEntry* const pLast = m_entries[idx];
- if (pLast == NULL)
- return E_PARSE_FAILED;
-
- const Block* const pBlock = pLast->GetBlock();
- if (pBlock == NULL)
- return E_PARSE_FAILED;
-
- const long long start = pBlock->m_start;
-
- if ((total >= 0) && (start > total))
- return E_PARSE_FAILED; // defend against trucated stream
-
- const long long size = pBlock->m_size;
-
- const long long stop = start + size;
- if (cluster_stop >= 0 && stop > cluster_stop)
- return E_FILE_FORMAT_INVALID;
-
- if ((total >= 0) && (stop > total))
- return E_PARSE_FAILED; // defend against trucated stream
- }
-
- return 1; // no more entries
-}
-
-long Cluster::ParseSimpleBlock(long long block_size, long long& pos,
- long& len) {
- const long long block_start = pos;
- const long long block_stop = pos + block_size;
-
- IMkvReader* const pReader = m_pSegment->m_pReader;
-
- long long total, avail;
-
- long status = pReader->Length(&total, &avail);
-
- if (status < 0) // error
- return status;
-
- assert((total < 0) || (avail <= total));
-
- // parse track number
-
- if ((pos + 1) > avail) {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
-
- long long result = GetUIntLength(pReader, pos, len);
-
- if (result < 0) // error
- return static_cast<long>(result);
-
- if (result > 0) // weird
- return E_BUFFER_NOT_FULL;
-
- if ((pos + len) > block_stop)
- return E_FILE_FORMAT_INVALID;
-
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
-
- const long long track = ReadUInt(pReader, pos, len);
-
- if (track < 0) // error
- return static_cast<long>(track);
-
- if (track == 0)
- return E_FILE_FORMAT_INVALID;
-
- pos += len; // consume track number
-
- if ((pos + 2) > block_stop)
- return E_FILE_FORMAT_INVALID;
-
- if ((pos + 2) > avail) {
- len = 2;
- return E_BUFFER_NOT_FULL;
- }
-
- pos += 2; // consume timecode
-
- if ((pos + 1) > block_stop)
- return E_FILE_FORMAT_INVALID;
-
- if ((pos + 1) > avail) {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
-
- unsigned char flags;
-
- status = pReader->Read(pos, 1, &flags);
-
- if (status < 0) { // error or underflow
- len = 1;
- return status;
- }
-
- ++pos; // consume flags byte
- assert(pos <= avail);
-
- if (pos >= block_stop)
- return E_FILE_FORMAT_INVALID;
-
- const int lacing = int(flags & 0x06) >> 1;
-
- if ((lacing != 0) && (block_stop > avail)) {
- len = static_cast<long>(block_stop - pos);
- return E_BUFFER_NOT_FULL;
- }
-
- status = CreateBlock(libwebm::kMkvSimpleBlock, block_start, block_size,
- 0); // DiscardPadding
-
- if (status != 0)
- return status;
-
- m_pos = block_stop;
-
- return 0; // success
-}
-
-long Cluster::ParseBlockGroup(long long payload_size, long long& pos,
- long& len) {
- const long long payload_start = pos;
- const long long payload_stop = pos + payload_size;
-
- IMkvReader* const pReader = m_pSegment->m_pReader;
-
- long long total, avail;
-
- long status = pReader->Length(&total, &avail);
-
- if (status < 0) // error
- return status;
-
- assert((total < 0) || (avail <= total));
-
- if ((total >= 0) && (payload_stop > total))
- return E_FILE_FORMAT_INVALID;
-
- if (payload_stop > avail) {
- len = static_cast<long>(payload_size);
- return E_BUFFER_NOT_FULL;
- }
-
- long long discard_padding = 0;
-
- while (pos < payload_stop) {
- // parse sub-block element ID
-
- if ((pos + 1) > avail) {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
-
- long long result = GetUIntLength(pReader, pos, len);
-
- if (result < 0) // error
- return static_cast<long>(result);
-
- if (result > 0) // weird
- return E_BUFFER_NOT_FULL;
-
- if ((pos + len) > payload_stop)
- return E_FILE_FORMAT_INVALID;
-
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
-
- const long long id = ReadID(pReader, pos, len);
-
- if (id < 0) // error
- return static_cast<long>(id);
-
- if (id == 0) // not a valid ID
- return E_FILE_FORMAT_INVALID;
-
- pos += len; // consume ID field
-
- // Parse Size
-
- if ((pos + 1) > avail) {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
-
- result = GetUIntLength(pReader, pos, len);
-
- if (result < 0) // error
- return static_cast<long>(result);
-
- if (result > 0) // weird
- return E_BUFFER_NOT_FULL;
-
- if ((pos + len) > payload_stop)
- return E_FILE_FORMAT_INVALID;
-
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
-
- const long long size = ReadUInt(pReader, pos, len);
-
- if (size < 0) // error
- return static_cast<long>(size);
-
- pos += len; // consume size field
-
- // pos now points to start of sub-block group payload
-
- if (pos > payload_stop)
- return E_FILE_FORMAT_INVALID;
-
- if (size == 0) // weird
- continue;
-
- const long long unknown_size = (1LL << (7 * len)) - 1;
-
- if (size == unknown_size)
- return E_FILE_FORMAT_INVALID;
-
- if (id == libwebm::kMkvDiscardPadding) {
- status = UnserializeInt(pReader, pos, size, discard_padding);
-
- if (status < 0) // error
- return status;
- }
-
- if (id != libwebm::kMkvBlock) {
- pos += size; // consume sub-part of block group
-
- if (pos > payload_stop)
- return E_FILE_FORMAT_INVALID;
-
- continue;
- }
-
- const long long block_stop = pos + size;
-
- if (block_stop > payload_stop)
- return E_FILE_FORMAT_INVALID;
-
- // parse track number
-
- if ((pos + 1) > avail) {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
-
- result = GetUIntLength(pReader, pos, len);
-
- if (result < 0) // error
- return static_cast<long>(result);
-
- if (result > 0) // weird
- return E_BUFFER_NOT_FULL;
-
- if ((pos + len) > block_stop)
- return E_FILE_FORMAT_INVALID;
-
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
-
- const long long track = ReadUInt(pReader, pos, len);
-
- if (track < 0) // error
- return static_cast<long>(track);
-
- if (track == 0)
- return E_FILE_FORMAT_INVALID;
-
- pos += len; // consume track number
-
- if ((pos + 2) > block_stop)
- return E_FILE_FORMAT_INVALID;
-
- if ((pos + 2) > avail) {
- len = 2;
- return E_BUFFER_NOT_FULL;
- }
-
- pos += 2; // consume timecode
-
- if ((pos + 1) > block_stop)
- return E_FILE_FORMAT_INVALID;
-
- if ((pos + 1) > avail) {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
-
- unsigned char flags;
-
- status = pReader->Read(pos, 1, &flags);
-
- if (status < 0) { // error or underflow
- len = 1;
- return status;
- }
-
- ++pos; // consume flags byte
- assert(pos <= avail);
-
- if (pos >= block_stop)
- return E_FILE_FORMAT_INVALID;
-
- const int lacing = int(flags & 0x06) >> 1;
-
- if ((lacing != 0) && (block_stop > avail)) {
- len = static_cast<long>(block_stop - pos);
- return E_BUFFER_NOT_FULL;
- }
-
- pos = block_stop; // consume block-part of block group
- if (pos > payload_stop)
- return E_FILE_FORMAT_INVALID;
- }
-
- if (pos != payload_stop)
- return E_FILE_FORMAT_INVALID;
-
- status = CreateBlock(libwebm::kMkvBlockGroup, payload_start, payload_size,
- discard_padding);
- if (status != 0)
- return status;
-
- m_pos = payload_stop;
-
- return 0; // success
-}
-
-long Cluster::GetEntry(long index, const mkvparser::BlockEntry*& pEntry) const {
- assert(m_pos >= m_element_start);
-
- pEntry = NULL;
-
- if (index < 0)
- return -1; // generic error
-
- if (m_entries_count < 0)
- return E_BUFFER_NOT_FULL;
-
- assert(m_entries);
- assert(m_entries_size > 0);
- assert(m_entries_count <= m_entries_size);
-
- if (index < m_entries_count) {
- pEntry = m_entries[index];
- assert(pEntry);
-
- return 1; // found entry
- }
-
- if (m_element_size < 0) // we don't know cluster end yet
- return E_BUFFER_NOT_FULL; // underflow
-
- const long long element_stop = m_element_start + m_element_size;
-
- if (m_pos >= element_stop)
- return 0; // nothing left to parse
-
- return E_BUFFER_NOT_FULL; // underflow, since more remains to be parsed
-}
-
-Cluster* Cluster::Create(Segment* pSegment, long idx, long long off) {
- if (!pSegment || off < 0)
- return NULL;
-
- const long long element_start = pSegment->m_start + off;
-
- Cluster* const pCluster =
- new (std::nothrow) Cluster(pSegment, idx, element_start);
-
- return pCluster;
-}
-
-Cluster::Cluster()
- : m_pSegment(NULL),
- m_element_start(0),
- m_index(0),
- m_pos(0),
- m_element_size(0),
- m_timecode(0),
- m_entries(NULL),
- m_entries_size(0),
- m_entries_count(0) // means "no entries"
-{}
-
-Cluster::Cluster(Segment* pSegment, long idx, long long element_start
- /* long long element_size */)
- : m_pSegment(pSegment),
- m_element_start(element_start),
- m_index(idx),
- m_pos(element_start),
- m_element_size(-1 /* element_size */),
- m_timecode(-1),
- m_entries(NULL),
- m_entries_size(0),
- m_entries_count(-1) // means "has not been parsed yet"
-{}
-
-Cluster::~Cluster() {
- if (m_entries_count <= 0) {
- delete[] m_entries;
- return;
- }
-
- BlockEntry** i = m_entries;
- BlockEntry** const j = m_entries + m_entries_count;
-
- while (i != j) {
- BlockEntry* p = *i++;
- assert(p);
-
- delete p;
- }
-
- delete[] m_entries;
-}
-
-bool Cluster::EOS() const { return (m_pSegment == NULL); }
-
-long Cluster::GetIndex() const { return m_index; }
-
-long long Cluster::GetPosition() const {
- const long long pos = m_element_start - m_pSegment->m_start;
- assert(pos >= 0);
-
- return pos;
-}
-
-long long Cluster::GetElementSize() const { return m_element_size; }
-
-long Cluster::HasBlockEntries(
- const Segment* pSegment,
- long long off, // relative to start of segment payload
- long long& pos, long& len) {
- assert(pSegment);
- assert(off >= 0); // relative to segment
-
- IMkvReader* const pReader = pSegment->m_pReader;
-
- long long total, avail;
-
- long status = pReader->Length(&total, &avail);
-
- if (status < 0) // error
- return status;
-
- assert((total < 0) || (avail <= total));
-
- pos = pSegment->m_start + off; // absolute
-
- if ((total >= 0) && (pos >= total))
- return 0; // we don't even have a complete cluster
-
- const long long segment_stop =
- (pSegment->m_size < 0) ? -1 : pSegment->m_start + pSegment->m_size;
-
- long long cluster_stop = -1; // interpreted later to mean "unknown size"
-
- {
- if ((pos + 1) > avail) {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
-
- long long result = GetUIntLength(pReader, pos, len);
-
- if (result < 0) // error
- return static_cast<long>(result);
-
- if (result > 0) // need more data
- return E_BUFFER_NOT_FULL;
-
- if ((segment_stop >= 0) && ((pos + len) > segment_stop))
- return E_FILE_FORMAT_INVALID;
-
- if ((total >= 0) && ((pos + len) > total))
- return 0;
-
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
-
- const long long id = ReadID(pReader, pos, len);
-
- if (id < 0) // error
- return static_cast<long>(id);
-
- if (id != libwebm::kMkvCluster)
- return E_PARSE_FAILED;
-
- pos += len; // consume Cluster ID field
-
- // read size field
-
- if ((pos + 1) > avail) {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
-
- result = GetUIntLength(pReader, pos, len);
-
- if (result < 0) // error
- return static_cast<long>(result);
-
- if (result > 0) // weird
- return E_BUFFER_NOT_FULL;
-
- if ((segment_stop >= 0) && ((pos + len) > segment_stop))
- return E_FILE_FORMAT_INVALID;
-
- if ((total >= 0) && ((pos + len) > total))
- return 0;
-
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
-
- const long long size = ReadUInt(pReader, pos, len);
-
- if (size < 0) // error
- return static_cast<long>(size);
-
- if (size == 0)
- return 0; // cluster does not have entries
-
- pos += len; // consume size field
-
- // pos now points to start of payload
-
- const long long unknown_size = (1LL << (7 * len)) - 1;
-
- if (size != unknown_size) {
- cluster_stop = pos + size;
- assert(cluster_stop >= 0);
-
- if ((segment_stop >= 0) && (cluster_stop > segment_stop))
- return E_FILE_FORMAT_INVALID;
-
- if ((total >= 0) && (cluster_stop > total))
- // return E_FILE_FORMAT_INVALID; //too conservative
- return 0; // cluster does not have any entries
- }
- }
-
- for (;;) {
- if ((cluster_stop >= 0) && (pos >= cluster_stop))
- return 0; // no entries detected
-
- if ((pos + 1) > avail) {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
-
- long long result = GetUIntLength(pReader, pos, len);
-
- if (result < 0) // error
- return static_cast<long>(result);
-
- if (result > 0) // need more data
- return E_BUFFER_NOT_FULL;
-
- if ((cluster_stop >= 0) && ((pos + len) > cluster_stop))
- return E_FILE_FORMAT_INVALID;
-
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
-
- const long long id = ReadID(pReader, pos, len);
-
- if (id < 0) // error
- return static_cast<long>(id);
-
- // This is the distinguished set of ID's we use to determine
- // that we have exhausted the sub-element's inside the cluster
- // whose ID we parsed earlier.
-
- if (id == libwebm::kMkvCluster)
- return 0; // no entries found
-
- if (id == libwebm::kMkvCues)
- return 0; // no entries found
-
- pos += len; // consume id field
-
- if ((cluster_stop >= 0) && (pos >= cluster_stop))
- return E_FILE_FORMAT_INVALID;
-
- // read size field
-
- if ((pos + 1) > avail) {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
-
- result = GetUIntLength(pReader, pos, len);
-
- if (result < 0) // error
- return static_cast<long>(result);
-
- if (result > 0) // underflow
- return E_BUFFER_NOT_FULL;
-
- if ((cluster_stop >= 0) && ((pos + len) > cluster_stop))
- return E_FILE_FORMAT_INVALID;
-
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
-
- const long long size = ReadUInt(pReader, pos, len);
-
- if (size < 0) // error
- return static_cast<long>(size);
-
- pos += len; // consume size field
-
- // pos now points to start of payload
-
- if ((cluster_stop >= 0) && (pos > cluster_stop))
- return E_FILE_FORMAT_INVALID;
-
- if (size == 0) // weird
- continue;
-
- const long long unknown_size = (1LL << (7 * len)) - 1;
-
- if (size == unknown_size)
- return E_FILE_FORMAT_INVALID; // not supported inside cluster
-
- if ((cluster_stop >= 0) && ((pos + size) > cluster_stop))
- return E_FILE_FORMAT_INVALID;
-
- if (id == libwebm::kMkvBlockGroup)
- return 1; // have at least one entry
-
- if (id == libwebm::kMkvSimpleBlock)
- return 1; // have at least one entry
-
- pos += size; // consume payload
- if (cluster_stop >= 0 && pos > cluster_stop)
- return E_FILE_FORMAT_INVALID;
- }
-}
-
-long long Cluster::GetTimeCode() const {
- long long pos;
- long len;
-
- const long status = Load(pos, len);
-
- if (status < 0) // error
- return status;
-
- return m_timecode;
-}
-
-long long Cluster::GetTime() const {
- const long long tc = GetTimeCode();
-
- if (tc < 0)
- return tc;
-
- const SegmentInfo* const pInfo = m_pSegment->GetInfo();
- assert(pInfo);
-
- const long long scale = pInfo->GetTimeCodeScale();
- assert(scale >= 1);
-
- const long long t = m_timecode * scale;
-
- return t;
-}
-
-long long Cluster::GetFirstTime() const {
- const BlockEntry* pEntry;
-
- const long status = GetFirst(pEntry);
-
- if (status < 0) // error
- return status;
-
- if (pEntry == NULL) // empty cluster
- return GetTime();
-
- const Block* const pBlock = pEntry->GetBlock();
- assert(pBlock);
-
- return pBlock->GetTime(this);
-}
-
-long long Cluster::GetLastTime() const {
- const BlockEntry* pEntry;
-
- const long status = GetLast(pEntry);
-
- if (status < 0) // error
- return status;
-
- if (pEntry == NULL) // empty cluster
- return GetTime();
-
- const Block* const pBlock = pEntry->GetBlock();
- assert(pBlock);
-
- return pBlock->GetTime(this);
-}
-
-long Cluster::CreateBlock(long long id,
- long long pos, // absolute pos of payload
- long long size, long long discard_padding) {
- if (id != libwebm::kMkvBlockGroup && id != libwebm::kMkvSimpleBlock)
- return E_PARSE_FAILED;
-
- if (m_entries_count < 0) { // haven't parsed anything yet
- assert(m_entries == NULL);
- assert(m_entries_size == 0);
-
- m_entries_size = 1024;
- m_entries = new (std::nothrow) BlockEntry*[m_entries_size];
- if (m_entries == NULL)
- return -1;
-
- m_entries_count = 0;
- } else {
- assert(m_entries);
- assert(m_entries_size > 0);
- assert(m_entries_count <= m_entries_size);
-
- if (m_entries_count >= m_entries_size) {
- const long entries_size = 2 * m_entries_size;
-
- BlockEntry** const entries = new (std::nothrow) BlockEntry*[entries_size];
- if (entries == NULL)
- return -1;
-
- BlockEntry** src = m_entries;
- BlockEntry** const src_end = src + m_entries_count;
-
- BlockEntry** dst = entries;
-
- while (src != src_end)
- *dst++ = *src++;
-
- delete[] m_entries;
-
- m_entries = entries;
- m_entries_size = entries_size;
- }
- }
-
- if (id == libwebm::kMkvBlockGroup)
- return CreateBlockGroup(pos, size, discard_padding);
- else
- return CreateSimpleBlock(pos, size);
-}
-
-long Cluster::CreateBlockGroup(long long start_offset, long long size,
- long long discard_padding) {
- assert(m_entries);
- assert(m_entries_size > 0);
- assert(m_entries_count >= 0);
- assert(m_entries_count < m_entries_size);
-
- IMkvReader* const pReader = m_pSegment->m_pReader;
-
- long long pos = start_offset;
- const long long stop = start_offset + size;
-
- // For WebM files, there is a bias towards previous reference times
- //(in order to support alt-ref frames, which refer back to the previous
- // keyframe). Normally a 0 value is not possible, but here we tenatively
- // allow 0 as the value of a reference frame, with the interpretation
- // that this is a "previous" reference time.
-
- long long prev = 1; // nonce
- long long next = 0; // nonce
- long long duration = -1; // really, this is unsigned
-
- long long bpos = -1;
- long long bsize = -1;
-
- while (pos < stop) {
- long len;
- const long long id = ReadID(pReader, pos, len);
- if (id < 0 || (pos + len) > stop)
- return E_FILE_FORMAT_INVALID;
-
- pos += len; // consume ID
-
- const long long size = ReadUInt(pReader, pos, len);
- assert(size >= 0); // TODO
- assert((pos + len) <= stop);
-
- pos += len; // consume size
-
- if (id == libwebm::kMkvBlock) {
- if (bpos < 0) { // Block ID
- bpos = pos;
- bsize = size;
- }
- } else if (id == libwebm::kMkvBlockDuration) {
- if (size > 8)
- return E_FILE_FORMAT_INVALID;
-
- duration = UnserializeUInt(pReader, pos, size);
-
- if (duration < 0)
- return E_FILE_FORMAT_INVALID;
- } else if (id == libwebm::kMkvReferenceBlock) {
- if (size > 8 || size <= 0)
- return E_FILE_FORMAT_INVALID;
- const long size_ = static_cast<long>(size);
-
- long long time;
-
- long status = UnserializeInt(pReader, pos, size_, time);
- assert(status == 0);
- if (status != 0)
- return -1;
-
- if (time <= 0) // see note above
- prev = time;
- else
- next = time;
- }
-
- pos += size; // consume payload
- if (pos > stop)
- return E_FILE_FORMAT_INVALID;
- }
- if (bpos < 0)
- return E_FILE_FORMAT_INVALID;
-
- if (pos != stop)
- return E_FILE_FORMAT_INVALID;
- assert(bsize >= 0);
-
- const long idx = m_entries_count;
-
- BlockEntry** const ppEntry = m_entries + idx;
- BlockEntry*& pEntry = *ppEntry;
-
- pEntry = new (std::nothrow)
- BlockGroup(this, idx, bpos, bsize, prev, next, duration, discard_padding);
-
- if (pEntry == NULL)
- return -1; // generic error
-
- BlockGroup* const p = static_cast<BlockGroup*>(pEntry);
-
- const long status = p->Parse();
-
- if (status == 0) { // success
- ++m_entries_count;
- return 0;
- }
-
- delete pEntry;
- pEntry = 0;
-
- return status;
-}
-
-long Cluster::CreateSimpleBlock(long long st, long long sz) {
- assert(m_entries);
- assert(m_entries_size > 0);
- assert(m_entries_count >= 0);
- assert(m_entries_count < m_entries_size);
-
- const long idx = m_entries_count;
-
- BlockEntry** const ppEntry = m_entries + idx;
- BlockEntry*& pEntry = *ppEntry;
-
- pEntry = new (std::nothrow) SimpleBlock(this, idx, st, sz);
-
- if (pEntry == NULL)
- return -1; // generic error
-
- SimpleBlock* const p = static_cast<SimpleBlock*>(pEntry);
-
- const long status = p->Parse();
-
- if (status == 0) {
- ++m_entries_count;
- return 0;
- }
-
- delete pEntry;
- pEntry = 0;
-
- return status;
-}
-
-long Cluster::GetFirst(const BlockEntry*& pFirst) const {
- if (m_entries_count <= 0) {
- long long pos;
- long len;
-
- const long status = Parse(pos, len);
-
- if (status < 0) { // error
- pFirst = NULL;
- return status;
- }
-
- if (m_entries_count <= 0) { // empty cluster
- pFirst = NULL;
- return 0;
- }
- }
-
- assert(m_entries);
-
- pFirst = m_entries[0];
- assert(pFirst);
-
- return 0; // success
-}
-
-long Cluster::GetLast(const BlockEntry*& pLast) const {
- for (;;) {
- long long pos;
- long len;
-
- const long status = Parse(pos, len);
-
- if (status < 0) { // error
- pLast = NULL;
- return status;
- }
-
- if (status > 0) // no new block
- break;
- }
-
- if (m_entries_count <= 0) {
- pLast = NULL;
- return 0;
- }
-
- assert(m_entries);
-
- const long idx = m_entries_count - 1;
-
- pLast = m_entries[idx];
- assert(pLast);
-
- return 0;
-}
-
-long Cluster::GetNext(const BlockEntry* pCurr, const BlockEntry*& pNext) const {
- assert(pCurr);
- assert(m_entries);
- assert(m_entries_count > 0);
-
- size_t idx = pCurr->GetIndex();
- assert(idx < size_t(m_entries_count));
- assert(m_entries[idx] == pCurr);
-
- ++idx;
-
- if (idx >= size_t(m_entries_count)) {
- long long pos;
- long len;
-
- const long status = Parse(pos, len);
-
- if (status < 0) { // error
- pNext = NULL;
- return status;
- }
-
- if (status > 0) {
- pNext = NULL;
- return 0;
- }
-
- assert(m_entries);
- assert(m_entries_count > 0);
- assert(idx < size_t(m_entries_count));
- }
-
- pNext = m_entries[idx];
- assert(pNext);
-
- return 0;
-}
-
-long Cluster::GetEntryCount() const { return m_entries_count; }
-
-const BlockEntry* Cluster::GetEntry(const Track* pTrack,
- long long time_ns) const {
- assert(pTrack);
-
- if (m_pSegment == NULL) // this is the special EOS cluster
- return pTrack->GetEOS();
-
- const BlockEntry* pResult = pTrack->GetEOS();
-
- long index = 0;
-
- for (;;) {
- if (index >= m_entries_count) {
- long long pos;
- long len;
-
- const long status = Parse(pos, len);
- assert(status >= 0);
-
- if (status > 0) // completely parsed, and no more entries
- return pResult;
-
- if (status < 0) // should never happen
- return 0;
-
- assert(m_entries);
- assert(index < m_entries_count);
- }
-
- const BlockEntry* const pEntry = m_entries[index];
- assert(pEntry);
- assert(!pEntry->EOS());
-
- const Block* const pBlock = pEntry->GetBlock();
- assert(pBlock);
-
- if (pBlock->GetTrackNumber() != pTrack->GetNumber()) {
- ++index;
- continue;
- }
-
- if (pTrack->VetEntry(pEntry)) {
- if (time_ns < 0) // just want first candidate block
- return pEntry;
-
- const long long ns = pBlock->GetTime(this);
-
- if (ns > time_ns)
- return pResult;
-
- pResult = pEntry; // have a candidate
- } else if (time_ns >= 0) {
- const long long ns = pBlock->GetTime(this);
-
- if (ns > time_ns)
- return pResult;
- }
-
- ++index;
- }
-}
-
-const BlockEntry* Cluster::GetEntry(const CuePoint& cp,
- const CuePoint::TrackPosition& tp) const {
- assert(m_pSegment);
- const long long tc = cp.GetTimeCode();
-
- if (tp.m_block > 0) {
- const long block = static_cast<long>(tp.m_block);
- const long index = block - 1;
-
- while (index >= m_entries_count) {
- long long pos;
- long len;
-
- const long status = Parse(pos, len);
-
- if (status < 0) // TODO: can this happen?
- return NULL;
-
- if (status > 0) // nothing remains to be parsed
- return NULL;
- }
-
- const BlockEntry* const pEntry = m_entries[index];
- assert(pEntry);
- assert(!pEntry->EOS());
-
- const Block* const pBlock = pEntry->GetBlock();
- assert(pBlock);
-
- if ((pBlock->GetTrackNumber() == tp.m_track) &&
- (pBlock->GetTimeCode(this) == tc)) {
- return pEntry;
- }
- }
-
- long index = 0;
-
- for (;;) {
- if (index >= m_entries_count) {
- long long pos;
- long len;
-
- const long status = Parse(pos, len);
-
- if (status < 0) // TODO: can this happen?
- return NULL;
-
- if (status > 0) // nothing remains to be parsed
- return NULL;
-
- assert(m_entries);
- assert(index < m_entries_count);
- }
-
- const BlockEntry* const pEntry = m_entries[index];
- assert(pEntry);
- assert(!pEntry->EOS());
-
- const Block* const pBlock = pEntry->GetBlock();
- assert(pBlock);
-
- if (pBlock->GetTrackNumber() != tp.m_track) {
- ++index;
- continue;
- }
-
- const long long tc_ = pBlock->GetTimeCode(this);
-
- if (tc_ < tc) {
- ++index;
- continue;
- }
-
- if (tc_ > tc)
- return NULL;
-
- const Tracks* const pTracks = m_pSegment->GetTracks();
- assert(pTracks);
-
- const long tn = static_cast<long>(tp.m_track);
- const Track* const pTrack = pTracks->GetTrackByNumber(tn);
-
- if (pTrack == NULL)
- return NULL;
-
- const long long type = pTrack->GetType();
-
- if (type == 2) // audio
- return pEntry;
-
- if (type != 1) // not video
- return NULL;
-
- if (!pBlock->IsKey())
- return NULL;
-
- return pEntry;
- }
-}
-
-BlockEntry::BlockEntry(Cluster* p, long idx) : m_pCluster(p), m_index(idx) {}
-BlockEntry::~BlockEntry() {}
-const Cluster* BlockEntry::GetCluster() const { return m_pCluster; }
-long BlockEntry::GetIndex() const { return m_index; }
-
-SimpleBlock::SimpleBlock(Cluster* pCluster, long idx, long long start,
- long long size)
- : BlockEntry(pCluster, idx), m_block(start, size, 0) {}
-
-long SimpleBlock::Parse() { return m_block.Parse(m_pCluster); }
-BlockEntry::Kind SimpleBlock::GetKind() const { return kBlockSimple; }
-const Block* SimpleBlock::GetBlock() const { return &m_block; }
-
-BlockGroup::BlockGroup(Cluster* pCluster, long idx, long long block_start,
- long long block_size, long long prev, long long next,
- long long duration, long long discard_padding)
- : BlockEntry(pCluster, idx),
- m_block(block_start, block_size, discard_padding),
- m_prev(prev),
- m_next(next),
- m_duration(duration) {}
-
-long BlockGroup::Parse() {
- const long status = m_block.Parse(m_pCluster);
-
- if (status)
- return status;
-
- m_block.SetKey((m_prev > 0) && (m_next <= 0));
-
- return 0;
-}
-
-BlockEntry::Kind BlockGroup::GetKind() const { return kBlockGroup; }
-const Block* BlockGroup::GetBlock() const { return &m_block; }
-long long BlockGroup::GetPrevTimeCode() const { return m_prev; }
-long long BlockGroup::GetNextTimeCode() const { return m_next; }
-long long BlockGroup::GetDurationTimeCode() const { return m_duration; }
-
-Block::Block(long long start, long long size_, long long discard_padding)
- : m_start(start),
- m_size(size_),
- m_track(0),
- m_timecode(-1),
- m_flags(0),
- m_frames(NULL),
- m_frame_count(-1),
- m_discard_padding(discard_padding) {}
-
-Block::~Block() { delete[] m_frames; }
-
-long Block::Parse(const Cluster* pCluster) {
- if (pCluster == NULL)
- return -1;
-
- if (pCluster->m_pSegment == NULL)
- return -1;
-
- assert(m_start >= 0);
- assert(m_size >= 0);
- assert(m_track <= 0);
- assert(m_frames == NULL);
- assert(m_frame_count <= 0);
-
- long long pos = m_start;
- const long long stop = m_start + m_size;
-
- long len;
-
- IMkvReader* const pReader = pCluster->m_pSegment->m_pReader;
-
- m_track = ReadUInt(pReader, pos, len);
-
- if (m_track <= 0)
- return E_FILE_FORMAT_INVALID;
-
- if ((pos + len) > stop)
- return E_FILE_FORMAT_INVALID;
-
- pos += len; // consume track number
-
- if ((stop - pos) < 2)
- return E_FILE_FORMAT_INVALID;
-
- long status;
- long long value;
-
- status = UnserializeInt(pReader, pos, 2, value);
-
- if (status)
- return E_FILE_FORMAT_INVALID;
-
- if (value < SHRT_MIN)
- return E_FILE_FORMAT_INVALID;
-
- if (value > SHRT_MAX)
- return E_FILE_FORMAT_INVALID;
-
- m_timecode = static_cast<short>(value);
-
- pos += 2;
-
- if ((stop - pos) <= 0)
- return E_FILE_FORMAT_INVALID;
-
- status = pReader->Read(pos, 1, &m_flags);
-
- if (status)
- return E_FILE_FORMAT_INVALID;
-
- const int lacing = int(m_flags & 0x06) >> 1;
-
- ++pos; // consume flags byte
-
- if (lacing == 0) { // no lacing
- if (pos > stop)
- return E_FILE_FORMAT_INVALID;
-
- m_frame_count = 1;
- m_frames = new (std::nothrow) Frame[m_frame_count];
- if (m_frames == NULL)
- return -1;
-
- Frame& f = m_frames[0];
- f.pos = pos;
-
- const long long frame_size = stop - pos;
-
- if (frame_size > LONG_MAX || frame_size <= 0)
- return E_FILE_FORMAT_INVALID;
-
- f.len = static_cast<long>(frame_size);
-
- return 0; // success
- }
-
- if (pos >= stop)
- return E_FILE_FORMAT_INVALID;
-
- unsigned char biased_count;
-
- status = pReader->Read(pos, 1, &biased_count);
-
- if (status)
- return E_FILE_FORMAT_INVALID;
-
- ++pos; // consume frame count
- if (pos > stop)
- return E_FILE_FORMAT_INVALID;
-
- m_frame_count = int(biased_count) + 1;
-
- m_frames = new (std::nothrow) Frame[m_frame_count];
- if (m_frames == NULL)
- return -1;
-
- if (!m_frames)
- return E_FILE_FORMAT_INVALID;
-
- if (lacing == 1) { // Xiph
- Frame* pf = m_frames;
- Frame* const pf_end = pf + m_frame_count;
-
- long long size = 0;
- int frame_count = m_frame_count;
-
- while (frame_count > 1) {
- long frame_size = 0;
-
- for (;;) {
- unsigned char val;
-
- if (pos >= stop)
- return E_FILE_FORMAT_INVALID;
-
- status = pReader->Read(pos, 1, &val);
-
- if (status)
- return E_FILE_FORMAT_INVALID;
-
- ++pos; // consume xiph size byte
-
- frame_size += val;
-
- if (val < 255)
- break;
- }
-
- Frame& f = *pf++;
- assert(pf < pf_end);
- if (pf >= pf_end)
- return E_FILE_FORMAT_INVALID;
-
- f.pos = 0; // patch later
-
- if (frame_size <= 0)
- return E_FILE_FORMAT_INVALID;
-
- f.len = frame_size;
- size += frame_size; // contribution of this frame
-
- --frame_count;
- }
-
- if (pf >= pf_end || pos > stop)
- return E_FILE_FORMAT_INVALID;
-
- {
- Frame& f = *pf++;
-
- if (pf != pf_end)
- return E_FILE_FORMAT_INVALID;
-
- f.pos = 0; // patch later
-
- const long long total_size = stop - pos;
-
- if (total_size < size)
- return E_FILE_FORMAT_INVALID;
-
- const long long frame_size = total_size - size;
-
- if (frame_size > LONG_MAX || frame_size <= 0)
- return E_FILE_FORMAT_INVALID;
-
- f.len = static_cast<long>(frame_size);
- }
-
- pf = m_frames;
- while (pf != pf_end) {
- Frame& f = *pf++;
- assert((pos + f.len) <= stop);
-
- if ((pos + f.len) > stop)
- return E_FILE_FORMAT_INVALID;
-
- f.pos = pos;
- pos += f.len;
- }
-
- assert(pos == stop);
- if (pos != stop)
- return E_FILE_FORMAT_INVALID;
-
- } else if (lacing == 2) { // fixed-size lacing
- if (pos >= stop)
- return E_FILE_FORMAT_INVALID;
-
- const long long total_size = stop - pos;
-
- if ((total_size % m_frame_count) != 0)
- return E_FILE_FORMAT_INVALID;
-
- const long long frame_size = total_size / m_frame_count;
-
- if (frame_size > LONG_MAX || frame_size <= 0)
- return E_FILE_FORMAT_INVALID;
-
- Frame* pf = m_frames;
- Frame* const pf_end = pf + m_frame_count;
-
- while (pf != pf_end) {
- assert((pos + frame_size) <= stop);
- if ((pos + frame_size) > stop)
- return E_FILE_FORMAT_INVALID;
-
- Frame& f = *pf++;
-
- f.pos = pos;
- f.len = static_cast<long>(frame_size);
-
- pos += frame_size;
- }
-
- assert(pos == stop);
- if (pos != stop)
- return E_FILE_FORMAT_INVALID;
-
- } else {
- assert(lacing == 3); // EBML lacing
-
- if (pos >= stop)
- return E_FILE_FORMAT_INVALID;
-
- long long size = 0;
- int frame_count = m_frame_count;
-
- long long frame_size = ReadUInt(pReader, pos, len);
-
- if (frame_size <= 0)
- return E_FILE_FORMAT_INVALID;
-
- if (frame_size > LONG_MAX)
- return E_FILE_FORMAT_INVALID;
-
- if ((pos + len) > stop)
- return E_FILE_FORMAT_INVALID;
-
- pos += len; // consume length of size of first frame
-
- if ((pos + frame_size) > stop)
- return E_FILE_FORMAT_INVALID;
-
- Frame* pf = m_frames;
- Frame* const pf_end = pf + m_frame_count;
-
- {
- Frame& curr = *pf;
-
- curr.pos = 0; // patch later
-
- curr.len = static_cast<long>(frame_size);
- size += curr.len; // contribution of this frame
- }
-
- --frame_count;
-
- while (frame_count > 1) {
- if (pos >= stop)
- return E_FILE_FORMAT_INVALID;
-
- assert(pf < pf_end);
- if (pf >= pf_end)
- return E_FILE_FORMAT_INVALID;
-
- const Frame& prev = *pf++;
- assert(prev.len == frame_size);
- if (prev.len != frame_size)
- return E_FILE_FORMAT_INVALID;
-
- assert(pf < pf_end);
- if (pf >= pf_end)
- return E_FILE_FORMAT_INVALID;
-
- Frame& curr = *pf;
-
- curr.pos = 0; // patch later
-
- const long long delta_size_ = ReadUInt(pReader, pos, len);
-
- if (delta_size_ < 0)
- return E_FILE_FORMAT_INVALID;
-
- if ((pos + len) > stop)
- return E_FILE_FORMAT_INVALID;
-
- pos += len; // consume length of (delta) size
- if (pos > stop)
- return E_FILE_FORMAT_INVALID;
-
- const long exp = 7 * len - 1;
- const long long bias = (1LL << exp) - 1LL;
- const long long delta_size = delta_size_ - bias;
-
- frame_size += delta_size;
-
- if (frame_size <= 0)
- return E_FILE_FORMAT_INVALID;
-
- if (frame_size > LONG_MAX)
- return E_FILE_FORMAT_INVALID;
-
- curr.len = static_cast<long>(frame_size);
- // Check if size + curr.len could overflow.
- if (size > LLONG_MAX - curr.len) {
- return E_FILE_FORMAT_INVALID;
- }
- size += curr.len; // contribution of this frame
-
- --frame_count;
- }
-
- // parse last frame
- if (frame_count > 0) {
- if (pos > stop || pf >= pf_end)
- return E_FILE_FORMAT_INVALID;
-
- const Frame& prev = *pf++;
- assert(prev.len == frame_size);
- if (prev.len != frame_size)
- return E_FILE_FORMAT_INVALID;
-
- if (pf >= pf_end)
- return E_FILE_FORMAT_INVALID;
-
- Frame& curr = *pf++;
- if (pf != pf_end)
- return E_FILE_FORMAT_INVALID;
-
- curr.pos = 0; // patch later
-
- const long long total_size = stop - pos;
-
- if (total_size < size)
- return E_FILE_FORMAT_INVALID;
-
- frame_size = total_size - size;
-
- if (frame_size > LONG_MAX || frame_size <= 0)
- return E_FILE_FORMAT_INVALID;
-
- curr.len = static_cast<long>(frame_size);
- }
-
- pf = m_frames;
- while (pf != pf_end) {
- Frame& f = *pf++;
- if ((pos + f.len) > stop)
- return E_FILE_FORMAT_INVALID;
-
- f.pos = pos;
- pos += f.len;
- }
-
- if (pos != stop)
- return E_FILE_FORMAT_INVALID;
- }
-
- return 0; // success
-}
-
-long long Block::GetTimeCode(const Cluster* pCluster) const {
- if (pCluster == 0)
- return m_timecode;
-
- const long long tc0 = pCluster->GetTimeCode();
- assert(tc0 >= 0);
-
- // Check if tc0 + m_timecode would overflow.
- if (tc0 < 0 || LLONG_MAX - tc0 < m_timecode) {
- return -1;
- }
-
- const long long tc = tc0 + m_timecode;
-
- return tc; // unscaled timecode units
-}
-
-long long Block::GetTime(const Cluster* pCluster) const {
- assert(pCluster);
-
- const long long tc = GetTimeCode(pCluster);
-
- const Segment* const pSegment = pCluster->m_pSegment;
- const SegmentInfo* const pInfo = pSegment->GetInfo();
- assert(pInfo);
-
- const long long scale = pInfo->GetTimeCodeScale();
- assert(scale >= 1);
-
- // Check if tc * scale could overflow.
- if (tc != 0 && scale > LLONG_MAX / tc) {
- return -1;
- }
- const long long ns = tc * scale;
-
- return ns;
-}
-
-long long Block::GetTrackNumber() const { return m_track; }
-
-bool Block::IsKey() const {
- return ((m_flags & static_cast<unsigned char>(1 << 7)) != 0);
-}
-
-void Block::SetKey(bool bKey) {
- if (bKey)
- m_flags |= static_cast<unsigned char>(1 << 7);
- else
- m_flags &= 0x7F;
-}
-
-bool Block::IsInvisible() const { return bool(int(m_flags & 0x08) != 0); }
-
-Block::Lacing Block::GetLacing() const {
- const int value = int(m_flags & 0x06) >> 1;
- return static_cast<Lacing>(value);
-}
-
-int Block::GetFrameCount() const { return m_frame_count; }
-
-const Block::Frame& Block::GetFrame(int idx) const {
- assert(idx >= 0);
- assert(idx < m_frame_count);
-
- const Frame& f = m_frames[idx];
- assert(f.pos > 0);
- assert(f.len > 0);
-
- return f;
-}
-
-long Block::Frame::Read(IMkvReader* pReader, unsigned char* buf) const {
- assert(pReader);
- assert(buf);
-
- const long status = pReader->Read(pos, len, buf);
- return status;
-}
-
-long long Block::GetDiscardPadding() const { return m_discard_padding; }
-
-} // namespace mkvparser
diff --git a/third_party/aom/third_party/libwebm/mkvparser/mkvparser.h b/third_party/aom/third_party/libwebm/mkvparser/mkvparser.h
deleted file mode 100644
index 26c2b7e5e..000000000
--- a/third_party/aom/third_party/libwebm/mkvparser/mkvparser.h
+++ /dev/null
@@ -1,1145 +0,0 @@
-// Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the LICENSE file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-#ifndef MKVPARSER_MKVPARSER_H_
-#define MKVPARSER_MKVPARSER_H_
-
-#include <cstddef>
-
-namespace mkvparser {
-
-const int E_PARSE_FAILED = -1;
-const int E_FILE_FORMAT_INVALID = -2;
-const int E_BUFFER_NOT_FULL = -3;
-
-class IMkvReader {
- public:
- virtual int Read(long long pos, long len, unsigned char* buf) = 0;
- virtual int Length(long long* total, long long* available) = 0;
-
- protected:
- virtual ~IMkvReader();
-};
-
-template <typename Type>
-Type* SafeArrayAlloc(unsigned long long num_elements,
- unsigned long long element_size);
-long long GetUIntLength(IMkvReader*, long long, long&);
-long long ReadUInt(IMkvReader*, long long, long&);
-long long ReadID(IMkvReader* pReader, long long pos, long& len);
-long long UnserializeUInt(IMkvReader*, long long pos, long long size);
-
-long UnserializeFloat(IMkvReader*, long long pos, long long size, double&);
-long UnserializeInt(IMkvReader*, long long pos, long long size,
- long long& result);
-
-long UnserializeString(IMkvReader*, long long pos, long long size, char*& str);
-
-long ParseElementHeader(IMkvReader* pReader,
- long long& pos, // consume id and size fields
- long long stop, // if you know size of element's parent
- long long& id, long long& size);
-
-bool Match(IMkvReader*, long long&, unsigned long, long long&);
-bool Match(IMkvReader*, long long&, unsigned long, unsigned char*&, size_t&);
-
-void GetVersion(int& major, int& minor, int& build, int& revision);
-
-struct EBMLHeader {
- EBMLHeader();
- ~EBMLHeader();
- long long m_version;
- long long m_readVersion;
- long long m_maxIdLength;
- long long m_maxSizeLength;
- char* m_docType;
- long long m_docTypeVersion;
- long long m_docTypeReadVersion;
-
- long long Parse(IMkvReader*, long long&);
- void Init();
-};
-
-class Segment;
-class Track;
-class Cluster;
-
-class Block {
- Block(const Block&);
- Block& operator=(const Block&);
-
- public:
- const long long m_start;
- const long long m_size;
-
- Block(long long start, long long size, long long discard_padding);
- ~Block();
-
- long Parse(const Cluster*);
-
- long long GetTrackNumber() const;
- long long GetTimeCode(const Cluster*) const; // absolute, but not scaled
- long long GetTime(const Cluster*) const; // absolute, and scaled (ns)
- bool IsKey() const;
- void SetKey(bool);
- bool IsInvisible() const;
-
- enum Lacing { kLacingNone, kLacingXiph, kLacingFixed, kLacingEbml };
- Lacing GetLacing() const;
-
- int GetFrameCount() const; // to index frames: [0, count)
-
- struct Frame {
- long long pos; // absolute offset
- long len;
-
- long Read(IMkvReader*, unsigned char*) const;
- };
-
- const Frame& GetFrame(int frame_index) const;
-
- long long GetDiscardPadding() const;
-
- private:
- long long m_track; // Track::Number()
- short m_timecode; // relative to cluster
- unsigned char m_flags;
-
- Frame* m_frames;
- int m_frame_count;
-
- protected:
- const long long m_discard_padding;
-};
-
-class BlockEntry {
- BlockEntry(const BlockEntry&);
- BlockEntry& operator=(const BlockEntry&);
-
- protected:
- BlockEntry(Cluster*, long index);
-
- public:
- virtual ~BlockEntry();
-
- bool EOS() const { return (GetKind() == kBlockEOS); }
- const Cluster* GetCluster() const;
- long GetIndex() const;
- virtual const Block* GetBlock() const = 0;
-
- enum Kind { kBlockEOS, kBlockSimple, kBlockGroup };
- virtual Kind GetKind() const = 0;
-
- protected:
- Cluster* const m_pCluster;
- const long m_index;
-};
-
-class SimpleBlock : public BlockEntry {
- SimpleBlock(const SimpleBlock&);
- SimpleBlock& operator=(const SimpleBlock&);
-
- public:
- SimpleBlock(Cluster*, long index, long long start, long long size);
- long Parse();
-
- Kind GetKind() const;
- const Block* GetBlock() const;
-
- protected:
- Block m_block;
-};
-
-class BlockGroup : public BlockEntry {
- BlockGroup(const BlockGroup&);
- BlockGroup& operator=(const BlockGroup&);
-
- public:
- BlockGroup(Cluster*, long index,
- long long block_start, // absolute pos of block's payload
- long long block_size, // size of block's payload
- long long prev, long long next, long long duration,
- long long discard_padding);
-
- long Parse();
-
- Kind GetKind() const;
- const Block* GetBlock() const;
-
- long long GetPrevTimeCode() const; // relative to block's time
- long long GetNextTimeCode() const; // as above
- long long GetDurationTimeCode() const;
-
- private:
- Block m_block;
- const long long m_prev;
- const long long m_next;
- const long long m_duration;
-};
-
-///////////////////////////////////////////////////////////////
-// ContentEncoding element
-// Elements used to describe if the track data has been encrypted or
-// compressed with zlib or header stripping.
-class ContentEncoding {
- public:
- enum { kCTR = 1 };
-
- ContentEncoding();
- ~ContentEncoding();
-
- // ContentCompression element names
- struct ContentCompression {
- ContentCompression();
- ~ContentCompression();
-
- unsigned long long algo;
- unsigned char* settings;
- long long settings_len;
- };
-
- // ContentEncAESSettings element names
- struct ContentEncAESSettings {
- ContentEncAESSettings() : cipher_mode(kCTR) {}
- ~ContentEncAESSettings() {}
-
- unsigned long long cipher_mode;
- };
-
- // ContentEncryption element names
- struct ContentEncryption {
- ContentEncryption();
- ~ContentEncryption();
-
- unsigned long long algo;
- unsigned char* key_id;
- long long key_id_len;
- unsigned char* signature;
- long long signature_len;
- unsigned char* sig_key_id;
- long long sig_key_id_len;
- unsigned long long sig_algo;
- unsigned long long sig_hash_algo;
-
- ContentEncAESSettings aes_settings;
- };
-
- // Returns ContentCompression represented by |idx|. Returns NULL if |idx|
- // is out of bounds.
- const ContentCompression* GetCompressionByIndex(unsigned long idx) const;
-
- // Returns number of ContentCompression elements in this ContentEncoding
- // element.
- unsigned long GetCompressionCount() const;
-
- // Parses the ContentCompression element from |pReader|. |start| is the
- // starting offset of the ContentCompression payload. |size| is the size in
- // bytes of the ContentCompression payload. |compression| is where the parsed
- // values will be stored.
- long ParseCompressionEntry(long long start, long long size,
- IMkvReader* pReader,
- ContentCompression* compression);
-
- // Returns ContentEncryption represented by |idx|. Returns NULL if |idx|
- // is out of bounds.
- const ContentEncryption* GetEncryptionByIndex(unsigned long idx) const;
-
- // Returns number of ContentEncryption elements in this ContentEncoding
- // element.
- unsigned long GetEncryptionCount() const;
-
- // Parses the ContentEncAESSettings element from |pReader|. |start| is the
- // starting offset of the ContentEncAESSettings payload. |size| is the
- // size in bytes of the ContentEncAESSettings payload. |encryption| is
- // where the parsed values will be stored.
- long ParseContentEncAESSettingsEntry(long long start, long long size,
- IMkvReader* pReader,
- ContentEncAESSettings* aes);
-
- // Parses the ContentEncoding element from |pReader|. |start| is the
- // starting offset of the ContentEncoding payload. |size| is the size in
- // bytes of the ContentEncoding payload. Returns true on success.
- long ParseContentEncodingEntry(long long start, long long size,
- IMkvReader* pReader);
-
- // Parses the ContentEncryption element from |pReader|. |start| is the
- // starting offset of the ContentEncryption payload. |size| is the size in
- // bytes of the ContentEncryption payload. |encryption| is where the parsed
- // values will be stored.
- long ParseEncryptionEntry(long long start, long long size,
- IMkvReader* pReader, ContentEncryption* encryption);
-
- unsigned long long encoding_order() const { return encoding_order_; }
- unsigned long long encoding_scope() const { return encoding_scope_; }
- unsigned long long encoding_type() const { return encoding_type_; }
-
- private:
- // Member variables for list of ContentCompression elements.
- ContentCompression** compression_entries_;
- ContentCompression** compression_entries_end_;
-
- // Member variables for list of ContentEncryption elements.
- ContentEncryption** encryption_entries_;
- ContentEncryption** encryption_entries_end_;
-
- // ContentEncoding element names
- unsigned long long encoding_order_;
- unsigned long long encoding_scope_;
- unsigned long long encoding_type_;
-
- // LIBWEBM_DISALLOW_COPY_AND_ASSIGN(ContentEncoding);
- ContentEncoding(const ContentEncoding&);
- ContentEncoding& operator=(const ContentEncoding&);
-};
-
-class Track {
- Track(const Track&);
- Track& operator=(const Track&);
-
- public:
- class Info;
- static long Create(Segment*, const Info&, long long element_start,
- long long element_size, Track*&);
-
- enum Type { kVideo = 1, kAudio = 2, kSubtitle = 0x11, kMetadata = 0x21 };
-
- Segment* const m_pSegment;
- const long long m_element_start;
- const long long m_element_size;
- virtual ~Track();
-
- long GetType() const;
- long GetNumber() const;
- unsigned long long GetUid() const;
- const char* GetNameAsUTF8() const;
- const char* GetLanguage() const;
- const char* GetCodecNameAsUTF8() const;
- const char* GetCodecId() const;
- const unsigned char* GetCodecPrivate(size_t&) const;
- bool GetLacing() const;
- unsigned long long GetDefaultDuration() const;
- unsigned long long GetCodecDelay() const;
- unsigned long long GetSeekPreRoll() const;
-
- const BlockEntry* GetEOS() const;
-
- struct Settings {
- long long start;
- long long size;
- };
-
- class Info {
- public:
- Info();
- ~Info();
- int Copy(Info&) const;
- void Clear();
- long type;
- long number;
- unsigned long long uid;
- unsigned long long defaultDuration;
- unsigned long long codecDelay;
- unsigned long long seekPreRoll;
- char* nameAsUTF8;
- char* language;
- char* codecId;
- char* codecNameAsUTF8;
- unsigned char* codecPrivate;
- size_t codecPrivateSize;
- bool lacing;
- Settings settings;
-
- private:
- Info(const Info&);
- Info& operator=(const Info&);
- int CopyStr(char* Info::*str, Info&) const;
- };
-
- long GetFirst(const BlockEntry*&) const;
- long GetNext(const BlockEntry* pCurr, const BlockEntry*& pNext) const;
- virtual bool VetEntry(const BlockEntry*) const;
- virtual long Seek(long long time_ns, const BlockEntry*&) const;
-
- const ContentEncoding* GetContentEncodingByIndex(unsigned long idx) const;
- unsigned long GetContentEncodingCount() const;
-
- long ParseContentEncodingsEntry(long long start, long long size);
-
- protected:
- Track(Segment*, long long element_start, long long element_size);
-
- Info m_info;
-
- class EOSBlock : public BlockEntry {
- public:
- EOSBlock();
-
- Kind GetKind() const;
- const Block* GetBlock() const;
- };
-
- EOSBlock m_eos;
-
- private:
- ContentEncoding** content_encoding_entries_;
- ContentEncoding** content_encoding_entries_end_;
-};
-
-struct PrimaryChromaticity {
- PrimaryChromaticity() : x(0), y(0) {}
- ~PrimaryChromaticity() {}
- static bool Parse(IMkvReader* reader, long long read_pos,
- long long value_size, bool is_x,
- PrimaryChromaticity** chromaticity);
- float x;
- float y;
-};
-
-struct MasteringMetadata {
- static const float kValueNotPresent;
-
- MasteringMetadata()
- : r(NULL),
- g(NULL),
- b(NULL),
- white_point(NULL),
- luminance_max(kValueNotPresent),
- luminance_min(kValueNotPresent) {}
- ~MasteringMetadata() {
- delete r;
- delete g;
- delete b;
- delete white_point;
- }
-
- static bool Parse(IMkvReader* reader, long long element_start,
- long long element_size,
- MasteringMetadata** mastering_metadata);
-
- PrimaryChromaticity* r;
- PrimaryChromaticity* g;
- PrimaryChromaticity* b;
- PrimaryChromaticity* white_point;
- float luminance_max;
- float luminance_min;
-};
-
-struct Colour {
- static const long long kValueNotPresent;
-
- // Unless otherwise noted all values assigned upon construction are the
- // equivalent of unspecified/default.
- Colour()
- : matrix_coefficients(kValueNotPresent),
- bits_per_channel(kValueNotPresent),
- chroma_subsampling_horz(kValueNotPresent),
- chroma_subsampling_vert(kValueNotPresent),
- cb_subsampling_horz(kValueNotPresent),
- cb_subsampling_vert(kValueNotPresent),
- chroma_siting_horz(kValueNotPresent),
- chroma_siting_vert(kValueNotPresent),
- range(kValueNotPresent),
- transfer_characteristics(kValueNotPresent),
- primaries(kValueNotPresent),
- max_cll(kValueNotPresent),
- max_fall(kValueNotPresent),
- mastering_metadata(NULL) {}
- ~Colour() {
- delete mastering_metadata;
- mastering_metadata = NULL;
- }
-
- static bool Parse(IMkvReader* reader, long long element_start,
- long long element_size, Colour** colour);
-
- long long matrix_coefficients;
- long long bits_per_channel;
- long long chroma_subsampling_horz;
- long long chroma_subsampling_vert;
- long long cb_subsampling_horz;
- long long cb_subsampling_vert;
- long long chroma_siting_horz;
- long long chroma_siting_vert;
- long long range;
- long long transfer_characteristics;
- long long primaries;
- long long max_cll;
- long long max_fall;
-
- MasteringMetadata* mastering_metadata;
-};
-
-struct Projection {
- enum ProjectionType {
- kTypeNotPresent = -1,
- kRectangular = 0,
- kEquirectangular = 1,
- kCubeMap = 2,
- kMesh = 3,
- };
- static const float kValueNotPresent;
- Projection()
- : type(kTypeNotPresent),
- private_data(NULL),
- private_data_length(0),
- pose_yaw(kValueNotPresent),
- pose_pitch(kValueNotPresent),
- pose_roll(kValueNotPresent) {}
- ~Projection() { delete[] private_data; }
- static bool Parse(IMkvReader* reader, long long element_start,
- long long element_size, Projection** projection);
-
- ProjectionType type;
- unsigned char* private_data;
- size_t private_data_length;
- float pose_yaw;
- float pose_pitch;
- float pose_roll;
-};
-
-class VideoTrack : public Track {
- VideoTrack(const VideoTrack&);
- VideoTrack& operator=(const VideoTrack&);
-
- VideoTrack(Segment*, long long element_start, long long element_size);
-
- public:
- virtual ~VideoTrack();
- static long Parse(Segment*, const Info&, long long element_start,
- long long element_size, VideoTrack*&);
-
- long long GetWidth() const;
- long long GetHeight() const;
- long long GetDisplayWidth() const;
- long long GetDisplayHeight() const;
- long long GetDisplayUnit() const;
- long long GetStereoMode() const;
- double GetFrameRate() const;
-
- bool VetEntry(const BlockEntry*) const;
- long Seek(long long time_ns, const BlockEntry*&) const;
-
- Colour* GetColour() const;
-
- Projection* GetProjection() const;
-
- private:
- long long m_width;
- long long m_height;
- long long m_display_width;
- long long m_display_height;
- long long m_display_unit;
- long long m_stereo_mode;
-
- double m_rate;
-
- Colour* m_colour;
- Projection* m_projection;
-};
-
-class AudioTrack : public Track {
- AudioTrack(const AudioTrack&);
- AudioTrack& operator=(const AudioTrack&);
-
- AudioTrack(Segment*, long long element_start, long long element_size);
-
- public:
- static long Parse(Segment*, const Info&, long long element_start,
- long long element_size, AudioTrack*&);
-
- double GetSamplingRate() const;
- long long GetChannels() const;
- long long GetBitDepth() const;
-
- private:
- double m_rate;
- long long m_channels;
- long long m_bitDepth;
-};
-
-class Tracks {
- Tracks(const Tracks&);
- Tracks& operator=(const Tracks&);
-
- public:
- Segment* const m_pSegment;
- const long long m_start;
- const long long m_size;
- const long long m_element_start;
- const long long m_element_size;
-
- Tracks(Segment*, long long start, long long size, long long element_start,
- long long element_size);
-
- ~Tracks();
-
- long Parse();
-
- unsigned long GetTracksCount() const;
-
- const Track* GetTrackByNumber(long tn) const;
- const Track* GetTrackByIndex(unsigned long idx) const;
-
- private:
- Track** m_trackEntries;
- Track** m_trackEntriesEnd;
-
- long ParseTrackEntry(long long payload_start, long long payload_size,
- long long element_start, long long element_size,
- Track*&) const;
-};
-
-class Chapters {
- Chapters(const Chapters&);
- Chapters& operator=(const Chapters&);
-
- public:
- Segment* const m_pSegment;
- const long long m_start;
- const long long m_size;
- const long long m_element_start;
- const long long m_element_size;
-
- Chapters(Segment*, long long payload_start, long long payload_size,
- long long element_start, long long element_size);
-
- ~Chapters();
-
- long Parse();
-
- class Atom;
- class Edition;
-
- class Display {
- friend class Atom;
- Display();
- Display(const Display&);
- ~Display();
- Display& operator=(const Display&);
-
- public:
- const char* GetString() const;
- const char* GetLanguage() const;
- const char* GetCountry() const;
-
- private:
- void Init();
- void ShallowCopy(Display&) const;
- void Clear();
- long Parse(IMkvReader*, long long pos, long long size);
-
- char* m_string;
- char* m_language;
- char* m_country;
- };
-
- class Atom {
- friend class Edition;
- Atom();
- Atom(const Atom&);
- ~Atom();
- Atom& operator=(const Atom&);
-
- public:
- unsigned long long GetUID() const;
- const char* GetStringUID() const;
-
- long long GetStartTimecode() const;
- long long GetStopTimecode() const;
-
- long long GetStartTime(const Chapters*) const;
- long long GetStopTime(const Chapters*) const;
-
- int GetDisplayCount() const;
- const Display* GetDisplay(int index) const;
-
- private:
- void Init();
- void ShallowCopy(Atom&) const;
- void Clear();
- long Parse(IMkvReader*, long long pos, long long size);
- static long long GetTime(const Chapters*, long long timecode);
-
- long ParseDisplay(IMkvReader*, long long pos, long long size);
- bool ExpandDisplaysArray();
-
- char* m_string_uid;
- unsigned long long m_uid;
- long long m_start_timecode;
- long long m_stop_timecode;
-
- Display* m_displays;
- int m_displays_size;
- int m_displays_count;
- };
-
- class Edition {
- friend class Chapters;
- Edition();
- Edition(const Edition&);
- ~Edition();
- Edition& operator=(const Edition&);
-
- public:
- int GetAtomCount() const;
- const Atom* GetAtom(int index) const;
-
- private:
- void Init();
- void ShallowCopy(Edition&) const;
- void Clear();
- long Parse(IMkvReader*, long long pos, long long size);
-
- long ParseAtom(IMkvReader*, long long pos, long long size);
- bool ExpandAtomsArray();
-
- Atom* m_atoms;
- int m_atoms_size;
- int m_atoms_count;
- };
-
- int GetEditionCount() const;
- const Edition* GetEdition(int index) const;
-
- private:
- long ParseEdition(long long pos, long long size);
- bool ExpandEditionsArray();
-
- Edition* m_editions;
- int m_editions_size;
- int m_editions_count;
-};
-
-class Tags {
- Tags(const Tags&);
- Tags& operator=(const Tags&);
-
- public:
- Segment* const m_pSegment;
- const long long m_start;
- const long long m_size;
- const long long m_element_start;
- const long long m_element_size;
-
- Tags(Segment*, long long payload_start, long long payload_size,
- long long element_start, long long element_size);
-
- ~Tags();
-
- long Parse();
-
- class Tag;
- class SimpleTag;
-
- class SimpleTag {
- friend class Tag;
- SimpleTag();
- SimpleTag(const SimpleTag&);
- ~SimpleTag();
- SimpleTag& operator=(const SimpleTag&);
-
- public:
- const char* GetTagName() const;
- const char* GetTagString() const;
-
- private:
- void Init();
- void ShallowCopy(SimpleTag&) const;
- void Clear();
- long Parse(IMkvReader*, long long pos, long long size);
-
- char* m_tag_name;
- char* m_tag_string;
- };
-
- class Tag {
- friend class Tags;
- Tag();
- Tag(const Tag&);
- ~Tag();
- Tag& operator=(const Tag&);
-
- public:
- int GetSimpleTagCount() const;
- const SimpleTag* GetSimpleTag(int index) const;
-
- private:
- void Init();
- void ShallowCopy(Tag&) const;
- void Clear();
- long Parse(IMkvReader*, long long pos, long long size);
-
- long ParseSimpleTag(IMkvReader*, long long pos, long long size);
- bool ExpandSimpleTagsArray();
-
- SimpleTag* m_simple_tags;
- int m_simple_tags_size;
- int m_simple_tags_count;
- };
-
- int GetTagCount() const;
- const Tag* GetTag(int index) const;
-
- private:
- long ParseTag(long long pos, long long size);
- bool ExpandTagsArray();
-
- Tag* m_tags;
- int m_tags_size;
- int m_tags_count;
-};
-
-class SegmentInfo {
- SegmentInfo(const SegmentInfo&);
- SegmentInfo& operator=(const SegmentInfo&);
-
- public:
- Segment* const m_pSegment;
- const long long m_start;
- const long long m_size;
- const long long m_element_start;
- const long long m_element_size;
-
- SegmentInfo(Segment*, long long start, long long size,
- long long element_start, long long element_size);
-
- ~SegmentInfo();
-
- long Parse();
-
- long long GetTimeCodeScale() const;
- long long GetDuration() const; // scaled
- const char* GetMuxingAppAsUTF8() const;
- const char* GetWritingAppAsUTF8() const;
- const char* GetTitleAsUTF8() const;
-
- private:
- long long m_timecodeScale;
- double m_duration;
- char* m_pMuxingAppAsUTF8;
- char* m_pWritingAppAsUTF8;
- char* m_pTitleAsUTF8;
-};
-
-class SeekHead {
- SeekHead(const SeekHead&);
- SeekHead& operator=(const SeekHead&);
-
- public:
- Segment* const m_pSegment;
- const long long m_start;
- const long long m_size;
- const long long m_element_start;
- const long long m_element_size;
-
- SeekHead(Segment*, long long start, long long size, long long element_start,
- long long element_size);
-
- ~SeekHead();
-
- long Parse();
-
- struct Entry {
- Entry();
-
- // the SeekHead entry payload
- long long id;
- long long pos;
-
- // absolute pos of SeekEntry ID
- long long element_start;
-
- // SeekEntry ID size + size size + payload
- long long element_size;
- };
-
- int GetCount() const;
- const Entry* GetEntry(int idx) const;
-
- struct VoidElement {
- // absolute pos of Void ID
- long long element_start;
-
- // ID size + size size + payload size
- long long element_size;
- };
-
- int GetVoidElementCount() const;
- const VoidElement* GetVoidElement(int idx) const;
-
- private:
- Entry* m_entries;
- int m_entry_count;
-
- VoidElement* m_void_elements;
- int m_void_element_count;
-
- static bool ParseEntry(IMkvReader*,
- long long pos, // payload
- long long size, Entry*);
-};
-
-class Cues;
-class CuePoint {
- friend class Cues;
-
- CuePoint(long, long long);
- ~CuePoint();
-
- CuePoint(const CuePoint&);
- CuePoint& operator=(const CuePoint&);
-
- public:
- long long m_element_start;
- long long m_element_size;
-
- bool Load(IMkvReader*);
-
- long long GetTimeCode() const; // absolute but unscaled
- long long GetTime(const Segment*) const; // absolute and scaled (ns units)
-
- struct TrackPosition {
- long long m_track;
- long long m_pos; // of cluster
- long long m_block;
- // codec_state //defaults to 0
- // reference = clusters containing req'd referenced blocks
- // reftime = timecode of the referenced block
-
- bool Parse(IMkvReader*, long long, long long);
- };
-
- const TrackPosition* Find(const Track*) const;
-
- private:
- const long m_index;
- long long m_timecode;
- TrackPosition* m_track_positions;
- size_t m_track_positions_count;
-};
-
-class Cues {
- friend class Segment;
-
- Cues(Segment*, long long start, long long size, long long element_start,
- long long element_size);
- ~Cues();
-
- Cues(const Cues&);
- Cues& operator=(const Cues&);
-
- public:
- Segment* const m_pSegment;
- const long long m_start;
- const long long m_size;
- const long long m_element_start;
- const long long m_element_size;
-
- bool Find( // lower bound of time_ns
- long long time_ns, const Track*, const CuePoint*&,
- const CuePoint::TrackPosition*&) const;
-
- const CuePoint* GetFirst() const;
- const CuePoint* GetLast() const;
- const CuePoint* GetNext(const CuePoint*) const;
-
- const BlockEntry* GetBlock(const CuePoint*,
- const CuePoint::TrackPosition*) const;
-
- bool LoadCuePoint() const;
- long GetCount() const; // loaded only
- // long GetTotal() const; //loaded + preloaded
- bool DoneParsing() const;
-
- private:
- bool Init() const;
- bool PreloadCuePoint(long&, long long) const;
-
- mutable CuePoint** m_cue_points;
- mutable long m_count;
- mutable long m_preload_count;
- mutable long long m_pos;
-};
-
-class Cluster {
- friend class Segment;
-
- Cluster(const Cluster&);
- Cluster& operator=(const Cluster&);
-
- public:
- Segment* const m_pSegment;
-
- public:
- static Cluster* Create(Segment*,
- long index, // index in segment
- long long off); // offset relative to segment
- // long long element_size);
-
- Cluster(); // EndOfStream
- ~Cluster();
-
- bool EOS() const;
-
- long long GetTimeCode() const; // absolute, but not scaled
- long long GetTime() const; // absolute, and scaled (nanosecond units)
- long long GetFirstTime() const; // time (ns) of first (earliest) block
- long long GetLastTime() const; // time (ns) of last (latest) block
-
- long GetFirst(const BlockEntry*&) const;
- long GetLast(const BlockEntry*&) const;
- long GetNext(const BlockEntry* curr, const BlockEntry*& next) const;
-
- const BlockEntry* GetEntry(const Track*, long long ns = -1) const;
- const BlockEntry* GetEntry(const CuePoint&,
- const CuePoint::TrackPosition&) const;
- // const BlockEntry* GetMaxKey(const VideoTrack*) const;
-
- // static bool HasBlockEntries(const Segment*, long long);
-
- static long HasBlockEntries(const Segment*, long long idoff, long long& pos,
- long& size);
-
- long GetEntryCount() const;
-
- long Load(long long& pos, long& size) const;
-
- long Parse(long long& pos, long& size) const;
- long GetEntry(long index, const mkvparser::BlockEntry*&) const;
-
- protected:
- Cluster(Segment*, long index, long long element_start);
- // long long element_size);
-
- public:
- const long long m_element_start;
- long long GetPosition() const; // offset relative to segment
-
- long GetIndex() const;
- long long GetElementSize() const;
- // long long GetPayloadSize() const;
-
- // long long Unparsed() const;
-
- private:
- long m_index;
- mutable long long m_pos;
- // mutable long long m_size;
- mutable long long m_element_size;
- mutable long long m_timecode;
- mutable BlockEntry** m_entries;
- mutable long m_entries_size;
- mutable long m_entries_count;
-
- long ParseSimpleBlock(long long, long long&, long&);
- long ParseBlockGroup(long long, long long&, long&);
-
- long CreateBlock(long long id, long long pos, long long size,
- long long discard_padding);
- long CreateBlockGroup(long long start_offset, long long size,
- long long discard_padding);
- long CreateSimpleBlock(long long, long long);
-};
-
-class Segment {
- friend class Cues;
- friend class Track;
- friend class VideoTrack;
-
- Segment(const Segment&);
- Segment& operator=(const Segment&);
-
- private:
- Segment(IMkvReader*, long long elem_start,
- // long long elem_size,
- long long pos, long long size);
-
- public:
- IMkvReader* const m_pReader;
- const long long m_element_start;
- // const long long m_element_size;
- const long long m_start; // posn of segment payload
- const long long m_size; // size of segment payload
- Cluster m_eos; // TODO: make private?
-
- static long long CreateInstance(IMkvReader*, long long, Segment*&);
- ~Segment();
-
- long Load(); // loads headers and all clusters
-
- // for incremental loading
- // long long Unparsed() const;
- bool DoneParsing() const;
- long long ParseHeaders(); // stops when first cluster is found
- // long FindNextCluster(long long& pos, long& size) const;
- long LoadCluster(long long& pos, long& size); // load one cluster
- long LoadCluster();
-
- long ParseNext(const Cluster* pCurr, const Cluster*& pNext, long long& pos,
- long& size);
-
- const SeekHead* GetSeekHead() const;
- const Tracks* GetTracks() const;
- const SegmentInfo* GetInfo() const;
- const Cues* GetCues() const;
- const Chapters* GetChapters() const;
- const Tags* GetTags() const;
-
- long long GetDuration() const;
-
- unsigned long GetCount() const;
- const Cluster* GetFirst() const;
- const Cluster* GetLast() const;
- const Cluster* GetNext(const Cluster*);
-
- const Cluster* FindCluster(long long time_nanoseconds) const;
- // const BlockEntry* Seek(long long time_nanoseconds, const Track*) const;
-
- const Cluster* FindOrPreloadCluster(long long pos);
-
- long ParseCues(long long cues_off, // offset relative to start of segment
- long long& parse_pos, long& parse_len);
-
- private:
- long long m_pos; // absolute file posn; what has been consumed so far
- Cluster* m_pUnknownSize;
-
- SeekHead* m_pSeekHead;
- SegmentInfo* m_pInfo;
- Tracks* m_pTracks;
- Cues* m_pCues;
- Chapters* m_pChapters;
- Tags* m_pTags;
- Cluster** m_clusters;
- long m_clusterCount; // number of entries for which m_index >= 0
- long m_clusterPreloadCount; // number of entries for which m_index < 0
- long m_clusterSize; // array size
-
- long DoLoadCluster(long long&, long&);
- long DoLoadClusterUnknownSize(long long&, long&);
- long DoParseNext(const Cluster*&, long long&, long&);
-
- bool AppendCluster(Cluster*);
- bool PreloadCluster(Cluster*, ptrdiff_t);
-
- // void ParseSeekHead(long long pos, long long size);
- // void ParseSeekEntry(long long pos, long long size);
- // void ParseCues(long long);
-
- const BlockEntry* GetBlock(const CuePoint&, const CuePoint::TrackPosition&);
-};
-
-} // namespace mkvparser
-
-inline long mkvparser::Segment::LoadCluster() {
- long long pos;
- long size;
-
- return LoadCluster(pos, size);
-}
-
-#endif // MKVPARSER_MKVPARSER_H_
diff --git a/third_party/aom/third_party/libwebm/mkvparser/mkvreader.cc b/third_party/aom/third_party/libwebm/mkvparser/mkvreader.cc
deleted file mode 100644
index 23d68f508..000000000
--- a/third_party/aom/third_party/libwebm/mkvparser/mkvreader.cc
+++ /dev/null
@@ -1,133 +0,0 @@
-// Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the LICENSE file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-#include "mkvparser/mkvreader.h"
-
-#include <sys/types.h>
-
-#include <cassert>
-
-namespace mkvparser {
-
-MkvReader::MkvReader() : m_file(NULL), reader_owns_file_(true) {}
-
-MkvReader::MkvReader(FILE* fp) : m_file(fp), reader_owns_file_(false) {
- GetFileSize();
-}
-
-MkvReader::~MkvReader() {
- if (reader_owns_file_)
- Close();
- m_file = NULL;
-}
-
-int MkvReader::Open(const char* fileName) {
- if (fileName == NULL)
- return -1;
-
- if (m_file)
- return -1;
-
-#ifdef _MSC_VER
- const errno_t e = fopen_s(&m_file, fileName, "rb");
-
- if (e)
- return -1; // error
-#else
- m_file = fopen(fileName, "rb");
-
- if (m_file == NULL)
- return -1;
-#endif
- return !GetFileSize();
-}
-
-bool MkvReader::GetFileSize() {
- if (m_file == NULL)
- return false;
-#ifdef _MSC_VER
- int status = _fseeki64(m_file, 0L, SEEK_END);
-
- if (status)
- return false; // error
-
- m_length = _ftelli64(m_file);
-#else
- fseek(m_file, 0L, SEEK_END);
- m_length = ftell(m_file);
-#endif
- assert(m_length >= 0);
-
- if (m_length < 0)
- return false;
-
-#ifdef _MSC_VER
- status = _fseeki64(m_file, 0L, SEEK_SET);
-
- if (status)
- return false; // error
-#else
- fseek(m_file, 0L, SEEK_SET);
-#endif
-
- return true;
-}
-
-void MkvReader::Close() {
- if (m_file != NULL) {
- fclose(m_file);
- m_file = NULL;
- }
-}
-
-int MkvReader::Length(long long* total, long long* available) {
- if (m_file == NULL)
- return -1;
-
- if (total)
- *total = m_length;
-
- if (available)
- *available = m_length;
-
- return 0;
-}
-
-int MkvReader::Read(long long offset, long len, unsigned char* buffer) {
- if (m_file == NULL)
- return -1;
-
- if (offset < 0)
- return -1;
-
- if (len < 0)
- return -1;
-
- if (len == 0)
- return 0;
-
- if (offset >= m_length)
- return -1;
-
-#ifdef _MSC_VER
- const int status = _fseeki64(m_file, offset, SEEK_SET);
-
- if (status)
- return -1; // error
-#else
- fseeko(m_file, static_cast<off_t>(offset), SEEK_SET);
-#endif
-
- const size_t size = fread(buffer, 1, len, m_file);
-
- if (size < size_t(len))
- return -1; // error
-
- return 0; // success
-}
-
-} // namespace mkvparser
diff --git a/third_party/aom/third_party/libwebm/mkvparser/mkvreader.h b/third_party/aom/third_party/libwebm/mkvparser/mkvreader.h
deleted file mode 100644
index 9831ecf64..000000000
--- a/third_party/aom/third_party/libwebm/mkvparser/mkvreader.h
+++ /dev/null
@@ -1,45 +0,0 @@
-// Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the LICENSE file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-#ifndef MKVPARSER_MKVREADER_H_
-#define MKVPARSER_MKVREADER_H_
-
-#include <cstdio>
-
-#include "mkvparser/mkvparser.h"
-
-namespace mkvparser {
-
-class MkvReader : public IMkvReader {
- public:
- MkvReader();
- explicit MkvReader(FILE* fp);
- virtual ~MkvReader();
-
- int Open(const char*);
- void Close();
-
- virtual int Read(long long position, long length, unsigned char* buffer);
- virtual int Length(long long* total, long long* available);
-
- private:
- MkvReader(const MkvReader&);
- MkvReader& operator=(const MkvReader&);
-
- // Determines the size of the file. This is called either by the constructor
- // or by the Open function depending on file ownership. Returns true on
- // success.
- bool GetFileSize();
-
- long long m_length;
- FILE* m_file;
- bool reader_owns_file_;
-};
-
-} // namespace mkvparser
-
-#endif // MKVPARSER_MKVREADER_H_
diff --git a/third_party/aom/third_party/libyuv/README.libaom b/third_party/aom/third_party/libyuv/README.libaom
deleted file mode 100644
index 09693c1f2..000000000
--- a/third_party/aom/third_party/libyuv/README.libaom
+++ /dev/null
@@ -1,15 +0,0 @@
-Name: libyuv
-URL: http://code.google.com/p/libyuv/
-Version: 1456
-License: BSD
-License File: LICENSE
-
-Description:
-libyuv is an open source project that includes YUV conversion and scaling
-functionality.
-
-The optimized scaler in libyuv is used in multiple resolution encoder example,
-which down-samples the original input video (f.g. 1280x720) a number of times
-in order to encode multiple resolution bit streams.
-
-Local Modifications:
diff --git a/third_party/aom/third_party/libyuv/include/libyuv/basic_types.h b/third_party/aom/third_party/libyuv/include/libyuv/basic_types.h
deleted file mode 100644
index 66e68536c..000000000
--- a/third_party/aom/third_party/libyuv/include/libyuv/basic_types.h
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef INCLUDE_LIBYUV_BASIC_TYPES_H_ // NOLINT
-#define INCLUDE_LIBYUV_BASIC_TYPES_H_
-
-#include <stddef.h> // for NULL, size_t
-
-#if defined(__ANDROID__) || (defined(_MSC_VER) && (_MSC_VER < 1600))
-#include <sys/types.h> // for uintptr_t on x86
-#else
-#include <stdint.h> // for uintptr_t
-#endif
-
-#ifndef GG_LONGLONG
-#ifndef INT_TYPES_DEFINED
-#define INT_TYPES_DEFINED
-#ifdef COMPILER_MSVC
-typedef unsigned __int64 uint64;
-typedef __int64 int64;
-#ifndef INT64_C
-#define INT64_C(x) x ## I64
-#endif
-#ifndef UINT64_C
-#define UINT64_C(x) x ## UI64
-#endif
-#define INT64_F "I64"
-#else // COMPILER_MSVC
-#if defined(__LP64__) && !defined(__OpenBSD__) && !defined(__APPLE__)
-typedef unsigned long uint64; // NOLINT
-typedef long int64; // NOLINT
-#ifndef INT64_C
-#define INT64_C(x) x ## L
-#endif
-#ifndef UINT64_C
-#define UINT64_C(x) x ## UL
-#endif
-#define INT64_F "l"
-#else // defined(__LP64__) && !defined(__OpenBSD__) && !defined(__APPLE__)
-typedef unsigned long long uint64; // NOLINT
-typedef long long int64; // NOLINT
-#ifndef INT64_C
-#define INT64_C(x) x ## LL
-#endif
-#ifndef UINT64_C
-#define UINT64_C(x) x ## ULL
-#endif
-#define INT64_F "ll"
-#endif // __LP64__
-#endif // COMPILER_MSVC
-typedef unsigned int uint32;
-typedef int int32;
-typedef unsigned short uint16; // NOLINT
-typedef short int16; // NOLINT
-typedef unsigned char uint8;
-typedef signed char int8;
-#endif // INT_TYPES_DEFINED
-#endif // GG_LONGLONG
-
-// Detect compiler is for x86 or x64.
-#if defined(__x86_64__) || defined(_M_X64) || \
- defined(__i386__) || defined(_M_IX86)
-#define CPU_X86 1
-#endif
-// Detect compiler is for ARM.
-#if defined(__arm__) || defined(_M_ARM)
-#define CPU_ARM 1
-#endif
-
-#ifndef ALIGNP
-#ifdef __cplusplus
-#define ALIGNP(p, t) \
- (reinterpret_cast<uint8*>(((reinterpret_cast<uintptr_t>(p) + \
- ((t) - 1)) & ~((t) - 1))))
-#else
-#define ALIGNP(p, t) \
- ((uint8*)((((uintptr_t)(p) + ((t) - 1)) & ~((t) - 1)))) /* NOLINT */
-#endif
-#endif
-
-#if !defined(LIBYUV_API)
-#if defined(_WIN32) || defined(__CYGWIN__)
-#if defined(LIBYUV_BUILDING_SHARED_LIBRARY)
-#define LIBYUV_API __declspec(dllexport)
-#elif defined(LIBYUV_USING_SHARED_LIBRARY)
-#define LIBYUV_API __declspec(dllimport)
-#else
-#define LIBYUV_API
-#endif // LIBYUV_BUILDING_SHARED_LIBRARY
-#elif defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__APPLE__) && \
- (defined(LIBYUV_BUILDING_SHARED_LIBRARY) || \
- defined(LIBYUV_USING_SHARED_LIBRARY))
-#define LIBYUV_API __attribute__ ((visibility ("default")))
-#else
-#define LIBYUV_API
-#endif // __GNUC__
-#endif // LIBYUV_API
-
-#define LIBYUV_BOOL int
-#define LIBYUV_FALSE 0
-#define LIBYUV_TRUE 1
-
-// Visual C x86 or GCC little endian.
-#if defined(__x86_64__) || defined(_M_X64) || \
- defined(__i386__) || defined(_M_IX86) || \
- defined(__arm__) || defined(_M_ARM) || \
- (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
-#define LIBYUV_LITTLE_ENDIAN
-#endif
-
-#endif // INCLUDE_LIBYUV_BASIC_TYPES_H_ NOLINT
diff --git a/third_party/aom/third_party/libyuv/include/libyuv/compare.h b/third_party/aom/third_party/libyuv/include/libyuv/compare.h
deleted file mode 100644
index 2a9f1560c..000000000
--- a/third_party/aom/third_party/libyuv/include/libyuv/compare.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef INCLUDE_LIBYUV_COMPARE_H_ // NOLINT
-#define INCLUDE_LIBYUV_COMPARE_H_
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// Compute a hash for specified memory. Seed of 5381 recommended.
-LIBYUV_API
-uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed);
-
-// Scan an opaque argb image and return fourcc based on alpha offset.
-// Returns FOURCC_ARGB, FOURCC_BGRA, or 0 if unknown.
-LIBYUV_API
-uint32 ARGBDetect(const uint8* argb, int stride_argb, int width, int height);
-
-// Sum Square Error - used to compute Mean Square Error or PSNR.
-LIBYUV_API
-uint64 ComputeSumSquareError(const uint8* src_a,
- const uint8* src_b, int count);
-
-LIBYUV_API
-uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a,
- const uint8* src_b, int stride_b,
- int width, int height);
-
-static const int kMaxPsnr = 128;
-
-LIBYUV_API
-double SumSquareErrorToPsnr(uint64 sse, uint64 count);
-
-LIBYUV_API
-double CalcFramePsnr(const uint8* src_a, int stride_a,
- const uint8* src_b, int stride_b,
- int width, int height);
-
-LIBYUV_API
-double I420Psnr(const uint8* src_y_a, int stride_y_a,
- const uint8* src_u_a, int stride_u_a,
- const uint8* src_v_a, int stride_v_a,
- const uint8* src_y_b, int stride_y_b,
- const uint8* src_u_b, int stride_u_b,
- const uint8* src_v_b, int stride_v_b,
- int width, int height);
-
-LIBYUV_API
-double CalcFrameSsim(const uint8* src_a, int stride_a,
- const uint8* src_b, int stride_b,
- int width, int height);
-
-LIBYUV_API
-double I420Ssim(const uint8* src_y_a, int stride_y_a,
- const uint8* src_u_a, int stride_u_a,
- const uint8* src_v_a, int stride_v_a,
- const uint8* src_y_b, int stride_y_b,
- const uint8* src_u_b, int stride_u_b,
- const uint8* src_v_b, int stride_v_b,
- int width, int height);
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
-
-#endif // INCLUDE_LIBYUV_COMPARE_H_ NOLINT
diff --git a/third_party/aom/third_party/libyuv/include/libyuv/convert.h b/third_party/aom/third_party/libyuv/include/libyuv/convert.h
deleted file mode 100644
index d6f206c10..000000000
--- a/third_party/aom/third_party/libyuv/include/libyuv/convert.h
+++ /dev/null
@@ -1,246 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef INCLUDE_LIBYUV_CONVERT_H_ // NOLINT
-#define INCLUDE_LIBYUV_CONVERT_H_
-
-#include "libyuv/basic_types.h"
-// TODO(fbarchard): Remove the following headers includes.
-#include "libyuv/convert_from.h"
-#include "libyuv/planar_functions.h"
-#include "libyuv/rotate.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// Convert I444 to I420.
-LIBYUV_API
-int I444ToI420(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// Convert I422 to I420.
-LIBYUV_API
-int I422ToI420(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// Convert I411 to I420.
-LIBYUV_API
-int I411ToI420(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// Copy I420 to I420.
-#define I420ToI420 I420Copy
-LIBYUV_API
-int I420Copy(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// Convert I400 (grey) to I420.
-LIBYUV_API
-int I400ToI420(const uint8* src_y, int src_stride_y,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-#define J400ToJ420 I400ToI420
-
-// Convert NV12 to I420.
-LIBYUV_API
-int NV12ToI420(const uint8* src_y, int src_stride_y,
- const uint8* src_uv, int src_stride_uv,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// Convert NV21 to I420.
-LIBYUV_API
-int NV21ToI420(const uint8* src_y, int src_stride_y,
- const uint8* src_vu, int src_stride_vu,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// Convert YUY2 to I420.
-LIBYUV_API
-int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// Convert UYVY to I420.
-LIBYUV_API
-int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// Convert M420 to I420.
-LIBYUV_API
-int M420ToI420(const uint8* src_m420, int src_stride_m420,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// ARGB little endian (bgra in memory) to I420.
-LIBYUV_API
-int ARGBToI420(const uint8* src_frame, int src_stride_frame,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// BGRA little endian (argb in memory) to I420.
-LIBYUV_API
-int BGRAToI420(const uint8* src_frame, int src_stride_frame,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// ABGR little endian (rgba in memory) to I420.
-LIBYUV_API
-int ABGRToI420(const uint8* src_frame, int src_stride_frame,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// RGBA little endian (abgr in memory) to I420.
-LIBYUV_API
-int RGBAToI420(const uint8* src_frame, int src_stride_frame,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// RGB little endian (bgr in memory) to I420.
-LIBYUV_API
-int RGB24ToI420(const uint8* src_frame, int src_stride_frame,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// RGB big endian (rgb in memory) to I420.
-LIBYUV_API
-int RAWToI420(const uint8* src_frame, int src_stride_frame,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// RGB16 (RGBP fourcc) little endian to I420.
-LIBYUV_API
-int RGB565ToI420(const uint8* src_frame, int src_stride_frame,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// RGB15 (RGBO fourcc) little endian to I420.
-LIBYUV_API
-int ARGB1555ToI420(const uint8* src_frame, int src_stride_frame,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// RGB12 (R444 fourcc) little endian to I420.
-LIBYUV_API
-int ARGB4444ToI420(const uint8* src_frame, int src_stride_frame,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-#ifdef HAVE_JPEG
-// src_width/height provided by capture.
-// dst_width/height for clipping determine final size.
-LIBYUV_API
-int MJPGToI420(const uint8* sample, size_t sample_size,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int src_width, int src_height,
- int dst_width, int dst_height);
-
-// Query size of MJPG in pixels.
-LIBYUV_API
-int MJPGSize(const uint8* sample, size_t sample_size,
- int* width, int* height);
-#endif
-
-// Convert camera sample to I420 with cropping, rotation and vertical flip.
-// "src_size" is needed to parse MJPG.
-// "dst_stride_y" number of bytes in a row of the dst_y plane.
-// Normally this would be the same as dst_width, with recommended alignment
-// to 16 bytes for better efficiency.
-// If rotation of 90 or 270 is used, stride is affected. The caller should
-// allocate the I420 buffer according to rotation.
-// "dst_stride_u" number of bytes in a row of the dst_u plane.
-// Normally this would be the same as (dst_width + 1) / 2, with
-// recommended alignment to 16 bytes for better efficiency.
-// If rotation of 90 or 270 is used, stride is affected.
-// "crop_x" and "crop_y" are starting position for cropping.
-// To center, crop_x = (src_width - dst_width) / 2
-// crop_y = (src_height - dst_height) / 2
-// "src_width" / "src_height" is size of src_frame in pixels.
-// "src_height" can be negative indicating a vertically flipped image source.
-// "crop_width" / "crop_height" is the size to crop the src to.
-// Must be less than or equal to src_width/src_height
-// Cropping parameters are pre-rotation.
-// "rotation" can be 0, 90, 180 or 270.
-// "format" is a fourcc. ie 'I420', 'YUY2'
-// Returns 0 for successful; -1 for invalid parameter. Non-zero for failure.
-LIBYUV_API
-int ConvertToI420(const uint8* src_frame, size_t src_size,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int crop_x, int crop_y,
- int src_width, int src_height,
- int crop_width, int crop_height,
- enum RotationMode rotation,
- uint32 format);
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
-
-#endif // INCLUDE_LIBYUV_CONVERT_H_ NOLINT
diff --git a/third_party/aom/third_party/libyuv/include/libyuv/convert_argb.h b/third_party/aom/third_party/libyuv/include/libyuv/convert_argb.h
deleted file mode 100644
index ea75c0b26..000000000
--- a/third_party/aom/third_party/libyuv/include/libyuv/convert_argb.h
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef INCLUDE_LIBYUV_CONVERT_ARGB_H_ // NOLINT
-#define INCLUDE_LIBYUV_CONVERT_ARGB_H_
-
-#include "libyuv/basic_types.h"
-// TODO(fbarchard): Remove the following headers includes
-#include "libyuv/convert_from.h"
-#include "libyuv/planar_functions.h"
-#include "libyuv/rotate.h"
-
-// TODO(fbarchard): This set of functions should exactly match convert.h
-// TODO(fbarchard): Add tests. Create random content of right size and convert
-// with C vs Opt and or to I420 and compare.
-// TODO(fbarchard): Some of these functions lack parameter setting.
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// Alias.
-#define ARGBToARGB ARGBCopy
-
-// Copy ARGB to ARGB.
-LIBYUV_API
-int ARGBCopy(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Convert I420 to ARGB.
-LIBYUV_API
-int I420ToARGB(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Convert I422 to ARGB.
-LIBYUV_API
-int I422ToARGB(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Convert I444 to ARGB.
-LIBYUV_API
-int I444ToARGB(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Convert I411 to ARGB.
-LIBYUV_API
-int I411ToARGB(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Convert I400 (grey) to ARGB. Reverse of ARGBToI400.
-LIBYUV_API
-int I400ToARGB(const uint8* src_y, int src_stride_y,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Convert J400 (jpeg grey) to ARGB.
-LIBYUV_API
-int J400ToARGB(const uint8* src_y, int src_stride_y,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Alias.
-#define YToARGB I400ToARGB
-
-// Convert NV12 to ARGB.
-LIBYUV_API
-int NV12ToARGB(const uint8* src_y, int src_stride_y,
- const uint8* src_uv, int src_stride_uv,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Convert NV21 to ARGB.
-LIBYUV_API
-int NV21ToARGB(const uint8* src_y, int src_stride_y,
- const uint8* src_vu, int src_stride_vu,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Convert M420 to ARGB.
-LIBYUV_API
-int M420ToARGB(const uint8* src_m420, int src_stride_m420,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Convert YUY2 to ARGB.
-LIBYUV_API
-int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Convert UYVY to ARGB.
-LIBYUV_API
-int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Convert J420 to ARGB.
-LIBYUV_API
-int J420ToARGB(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Convert J422 to ARGB.
-LIBYUV_API
-int J422ToARGB(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// BGRA little endian (argb in memory) to ARGB.
-LIBYUV_API
-int BGRAToARGB(const uint8* src_frame, int src_stride_frame,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// ABGR little endian (rgba in memory) to ARGB.
-LIBYUV_API
-int ABGRToARGB(const uint8* src_frame, int src_stride_frame,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// RGBA little endian (abgr in memory) to ARGB.
-LIBYUV_API
-int RGBAToARGB(const uint8* src_frame, int src_stride_frame,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Deprecated function name.
-#define BG24ToARGB RGB24ToARGB
-
-// RGB little endian (bgr in memory) to ARGB.
-LIBYUV_API
-int RGB24ToARGB(const uint8* src_frame, int src_stride_frame,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// RGB big endian (rgb in memory) to ARGB.
-LIBYUV_API
-int RAWToARGB(const uint8* src_frame, int src_stride_frame,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// RGB16 (RGBP fourcc) little endian to ARGB.
-LIBYUV_API
-int RGB565ToARGB(const uint8* src_frame, int src_stride_frame,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// RGB15 (RGBO fourcc) little endian to ARGB.
-LIBYUV_API
-int ARGB1555ToARGB(const uint8* src_frame, int src_stride_frame,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// RGB12 (R444 fourcc) little endian to ARGB.
-LIBYUV_API
-int ARGB4444ToARGB(const uint8* src_frame, int src_stride_frame,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-#ifdef HAVE_JPEG
-// src_width/height provided by capture
-// dst_width/height for clipping determine final size.
-LIBYUV_API
-int MJPGToARGB(const uint8* sample, size_t sample_size,
- uint8* dst_argb, int dst_stride_argb,
- int src_width, int src_height,
- int dst_width, int dst_height);
-#endif
-
-// Convert camera sample to ARGB with cropping, rotation and vertical flip.
-// "src_size" is needed to parse MJPG.
-// "dst_stride_argb" number of bytes in a row of the dst_argb plane.
-// Normally this would be the same as dst_width, with recommended alignment
-// to 16 bytes for better efficiency.
-// If rotation of 90 or 270 is used, stride is affected. The caller should
-// allocate the I420 buffer according to rotation.
-// "dst_stride_u" number of bytes in a row of the dst_u plane.
-// Normally this would be the same as (dst_width + 1) / 2, with
-// recommended alignment to 16 bytes for better efficiency.
-// If rotation of 90 or 270 is used, stride is affected.
-// "crop_x" and "crop_y" are starting position for cropping.
-// To center, crop_x = (src_width - dst_width) / 2
-// crop_y = (src_height - dst_height) / 2
-// "src_width" / "src_height" is size of src_frame in pixels.
-// "src_height" can be negative indicating a vertically flipped image source.
-// "crop_width" / "crop_height" is the size to crop the src to.
-// Must be less than or equal to src_width/src_height
-// Cropping parameters are pre-rotation.
-// "rotation" can be 0, 90, 180 or 270.
-// "format" is a fourcc. ie 'I420', 'YUY2'
-// Returns 0 for successful; -1 for invalid parameter. Non-zero for failure.
-LIBYUV_API
-int ConvertToARGB(const uint8* src_frame, size_t src_size,
- uint8* dst_argb, int dst_stride_argb,
- int crop_x, int crop_y,
- int src_width, int src_height,
- int crop_width, int crop_height,
- enum RotationMode rotation,
- uint32 format);
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
-
-#endif // INCLUDE_LIBYUV_CONVERT_ARGB_H_ NOLINT
diff --git a/third_party/aom/third_party/libyuv/include/libyuv/convert_from.h b/third_party/aom/third_party/libyuv/include/libyuv/convert_from.h
deleted file mode 100644
index 3591b4fd6..000000000
--- a/third_party/aom/third_party/libyuv/include/libyuv/convert_from.h
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef INCLUDE_LIBYUV_CONVERT_FROM_H_ // NOLINT
-#define INCLUDE_LIBYUV_CONVERT_FROM_H_
-
-#include "libyuv/basic_types.h"
-#include "libyuv/rotate.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// See Also convert.h for conversions from formats to I420.
-
-// I420Copy in convert to I420ToI420.
-
-LIBYUV_API
-int I420ToI422(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-LIBYUV_API
-int I420ToI444(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-LIBYUV_API
-int I420ToI411(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// Copy to I400. Source can be I420, I422, I444, I400, NV12 or NV21.
-LIBYUV_API
-int I400Copy(const uint8* src_y, int src_stride_y,
- uint8* dst_y, int dst_stride_y,
- int width, int height);
-
-// TODO(fbarchard): I420ToM420
-
-LIBYUV_API
-int I420ToNV12(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_uv, int dst_stride_uv,
- int width, int height);
-
-LIBYUV_API
-int I420ToNV21(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_vu, int dst_stride_vu,
- int width, int height);
-
-LIBYUV_API
-int I420ToYUY2(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_frame, int dst_stride_frame,
- int width, int height);
-
-LIBYUV_API
-int I420ToUYVY(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_frame, int dst_stride_frame,
- int width, int height);
-
-LIBYUV_API
-int I420ToARGB(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-LIBYUV_API
-int I420ToBGRA(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-LIBYUV_API
-int I420ToABGR(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-LIBYUV_API
-int I420ToRGBA(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_rgba, int dst_stride_rgba,
- int width, int height);
-
-LIBYUV_API
-int I420ToRGB24(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_frame, int dst_stride_frame,
- int width, int height);
-
-LIBYUV_API
-int I420ToRAW(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_frame, int dst_stride_frame,
- int width, int height);
-
-LIBYUV_API
-int I420ToRGB565(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_frame, int dst_stride_frame,
- int width, int height);
-
-// Convert I420 To RGB565 with 4x4 dither matrix (16 bytes).
-// Values in dither matrix from 0 to 7 recommended.
-// The order of the dither matrix is first byte is upper left.
-
-LIBYUV_API
-int I420ToRGB565Dither(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_frame, int dst_stride_frame,
- const uint8* dither4x4, int width, int height);
-
-LIBYUV_API
-int I420ToARGB1555(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_frame, int dst_stride_frame,
- int width, int height);
-
-LIBYUV_API
-int I420ToARGB4444(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_frame, int dst_stride_frame,
- int width, int height);
-
-// Convert I420 to specified format.
-// "dst_sample_stride" is bytes in a row for the destination. Pass 0 if the
-// buffer has contiguous rows. Can be negative. A multiple of 16 is optimal.
-LIBYUV_API
-int ConvertFromI420(const uint8* y, int y_stride,
- const uint8* u, int u_stride,
- const uint8* v, int v_stride,
- uint8* dst_sample, int dst_sample_stride,
- int width, int height,
- uint32 format);
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
-
-#endif // INCLUDE_LIBYUV_CONVERT_FROM_H_ NOLINT
diff --git a/third_party/aom/third_party/libyuv/include/libyuv/convert_from_argb.h b/third_party/aom/third_party/libyuv/include/libyuv/convert_from_argb.h
deleted file mode 100644
index 4a6226813..000000000
--- a/third_party/aom/third_party/libyuv/include/libyuv/convert_from_argb.h
+++ /dev/null
@@ -1,191 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_ // NOLINT
-#define INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// Copy ARGB to ARGB.
-#define ARGBToARGB ARGBCopy
-LIBYUV_API
-int ARGBCopy(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Convert ARGB To BGRA.
-LIBYUV_API
-int ARGBToBGRA(const uint8* src_argb, int src_stride_argb,
- uint8* dst_bgra, int dst_stride_bgra,
- int width, int height);
-
-// Convert ARGB To ABGR.
-LIBYUV_API
-int ARGBToABGR(const uint8* src_argb, int src_stride_argb,
- uint8* dst_abgr, int dst_stride_abgr,
- int width, int height);
-
-// Convert ARGB To RGBA.
-LIBYUV_API
-int ARGBToRGBA(const uint8* src_argb, int src_stride_argb,
- uint8* dst_rgba, int dst_stride_rgba,
- int width, int height);
-
-// Convert ARGB To RGB24.
-LIBYUV_API
-int ARGBToRGB24(const uint8* src_argb, int src_stride_argb,
- uint8* dst_rgb24, int dst_stride_rgb24,
- int width, int height);
-
-// Convert ARGB To RAW.
-LIBYUV_API
-int ARGBToRAW(const uint8* src_argb, int src_stride_argb,
- uint8* dst_rgb, int dst_stride_rgb,
- int width, int height);
-
-// Convert ARGB To RGB565.
-LIBYUV_API
-int ARGBToRGB565(const uint8* src_argb, int src_stride_argb,
- uint8* dst_rgb565, int dst_stride_rgb565,
- int width, int height);
-
-// Convert ARGB To RGB565 with 4x4 dither matrix (16 bytes).
-// Values in dither matrix from 0 to 7 recommended.
-// The order of the dither matrix is first byte is upper left.
-// TODO(fbarchard): Consider pointer to 2d array for dither4x4.
-// const uint8(*dither)[4][4];
-LIBYUV_API
-int ARGBToRGB565Dither(const uint8* src_argb, int src_stride_argb,
- uint8* dst_rgb565, int dst_stride_rgb565,
- const uint8* dither4x4, int width, int height);
-
-// Convert ARGB To ARGB1555.
-LIBYUV_API
-int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb1555, int dst_stride_argb1555,
- int width, int height);
-
-// Convert ARGB To ARGB4444.
-LIBYUV_API
-int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb4444, int dst_stride_argb4444,
- int width, int height);
-
-// Convert ARGB To I444.
-LIBYUV_API
-int ARGBToI444(const uint8* src_argb, int src_stride_argb,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// Convert ARGB To I422.
-LIBYUV_API
-int ARGBToI422(const uint8* src_argb, int src_stride_argb,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// Convert ARGB To I420. (also in convert.h)
-LIBYUV_API
-int ARGBToI420(const uint8* src_argb, int src_stride_argb,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// Convert ARGB to J420. (JPeg full range I420).
-LIBYUV_API
-int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
- uint8* dst_yj, int dst_stride_yj,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// Convert ARGB to J422.
-LIBYUV_API
-int ARGBToJ422(const uint8* src_argb, int src_stride_argb,
- uint8* dst_yj, int dst_stride_yj,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// Convert ARGB To I411.
-LIBYUV_API
-int ARGBToI411(const uint8* src_argb, int src_stride_argb,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// Convert ARGB to J400. (JPeg full range).
-LIBYUV_API
-int ARGBToJ400(const uint8* src_argb, int src_stride_argb,
- uint8* dst_yj, int dst_stride_yj,
- int width, int height);
-
-// Convert ARGB to I400.
-LIBYUV_API
-int ARGBToI400(const uint8* src_argb, int src_stride_argb,
- uint8* dst_y, int dst_stride_y,
- int width, int height);
-
-// Convert ARGB to G. (Reverse of J400toARGB, which replicates G back to ARGB)
-LIBYUV_API
-int ARGBToG(const uint8* src_argb, int src_stride_argb,
- uint8* dst_g, int dst_stride_g,
- int width, int height);
-
-// Convert ARGB To NV12.
-LIBYUV_API
-int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_uv, int dst_stride_uv,
- int width, int height);
-
-// Convert ARGB To NV21.
-LIBYUV_API
-int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_vu, int dst_stride_vu,
- int width, int height);
-
-// Convert ARGB To NV21.
-LIBYUV_API
-int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_vu, int dst_stride_vu,
- int width, int height);
-
-// Convert ARGB To YUY2.
-LIBYUV_API
-int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
- uint8* dst_yuy2, int dst_stride_yuy2,
- int width, int height);
-
-// Convert ARGB To UYVY.
-LIBYUV_API
-int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
- uint8* dst_uyvy, int dst_stride_uyvy,
- int width, int height);
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
-
-#endif // INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_ NOLINT
diff --git a/third_party/aom/third_party/libyuv/include/libyuv/cpu_id.h b/third_party/aom/third_party/libyuv/include/libyuv/cpu_id.h
deleted file mode 100644
index 870e94e8c..000000000
--- a/third_party/aom/third_party/libyuv/include/libyuv/cpu_id.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef INCLUDE_LIBYUV_CPU_ID_H_ // NOLINT
-#define INCLUDE_LIBYUV_CPU_ID_H_
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// TODO(fbarchard): Consider overlapping bits for different architectures.
-// Internal flag to indicate cpuid requires initialization.
-#define kCpuInit 0x1
-
-// These flags are only valid on ARM processors.
-static const int kCpuHasARM = 0x2;
-static const int kCpuHasNEON = 0x4;
-// 0x8 reserved for future ARM flag.
-
-// These flags are only valid on x86 processors.
-static const int kCpuHasX86 = 0x10;
-static const int kCpuHasSSE2 = 0x20;
-static const int kCpuHasSSSE3 = 0x40;
-static const int kCpuHasSSE41 = 0x80;
-static const int kCpuHasSSE42 = 0x100;
-static const int kCpuHasAVX = 0x200;
-static const int kCpuHasAVX2 = 0x400;
-static const int kCpuHasERMS = 0x800;
-static const int kCpuHasFMA3 = 0x1000;
-// 0x2000, 0x4000, 0x8000 reserved for future X86 flags.
-
-// These flags are only valid on MIPS processors.
-static const int kCpuHasMIPS = 0x10000;
-static const int kCpuHasMIPS_DSP = 0x20000;
-static const int kCpuHasMIPS_DSPR2 = 0x40000;
-
-// Internal function used to auto-init.
-LIBYUV_API
-int InitCpuFlags(void);
-
-// Internal function for parsing /proc/cpuinfo.
-LIBYUV_API
-int ArmCpuCaps(const char* cpuinfo_name);
-
-// Detect CPU has SSE2 etc.
-// Test_flag parameter should be one of kCpuHas constants above.
-// returns non-zero if instruction set is detected
-static __inline int TestCpuFlag(int test_flag) {
- LIBYUV_API extern int cpu_info_;
- return (cpu_info_ == kCpuInit ? InitCpuFlags() : cpu_info_) & test_flag;
-}
-
-// For testing, allow CPU flags to be disabled.
-// ie MaskCpuFlags(~kCpuHasSSSE3) to disable SSSE3.
-// MaskCpuFlags(-1) to enable all cpu specific optimizations.
-// MaskCpuFlags(0) to disable all cpu specific optimizations.
-LIBYUV_API
-void MaskCpuFlags(int enable_flags);
-
-// Low level cpuid for X86. Returns zeros on other CPUs.
-// eax is the info type that you want.
-// ecx is typically the cpu number, and should normally be zero.
-LIBYUV_API
-void CpuId(uint32 eax, uint32 ecx, uint32* cpu_info);
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
-
-#endif // INCLUDE_LIBYUV_CPU_ID_H_ NOLINT
diff --git a/third_party/aom/third_party/libyuv/include/libyuv/mjpeg_decoder.h b/third_party/aom/third_party/libyuv/include/libyuv/mjpeg_decoder.h
deleted file mode 100644
index fa1e51f9a..000000000
--- a/third_party/aom/third_party/libyuv/include/libyuv/mjpeg_decoder.h
+++ /dev/null
@@ -1,193 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef INCLUDE_LIBYUV_MJPEG_DECODER_H_ // NOLINT
-#define INCLUDE_LIBYUV_MJPEG_DECODER_H_
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-// NOTE: For a simplified public API use convert.h MJPGToI420().
-
-struct jpeg_common_struct;
-struct jpeg_decompress_struct;
-struct jpeg_source_mgr;
-
-namespace libyuv {
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-LIBYUV_BOOL ValidateJpeg(const uint8* sample, size_t sample_size);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-static const uint32 kUnknownDataSize = 0xFFFFFFFF;
-
-enum JpegSubsamplingType {
- kJpegYuv420,
- kJpegYuv422,
- kJpegYuv411,
- kJpegYuv444,
- kJpegYuv400,
- kJpegUnknown
-};
-
-struct Buffer {
- const uint8* data;
- int len;
-};
-
-struct BufferVector {
- Buffer* buffers;
- int len;
- int pos;
-};
-
-struct SetJmpErrorMgr;
-
-// MJPEG ("Motion JPEG") is a pseudo-standard video codec where the frames are
-// simply independent JPEG images with a fixed huffman table (which is omitted).
-// It is rarely used in video transmission, but is common as a camera capture
-// format, especially in Logitech devices. This class implements a decoder for
-// MJPEG frames.
-//
-// See http://tools.ietf.org/html/rfc2435
-class LIBYUV_API MJpegDecoder {
- public:
- typedef void (*CallbackFunction)(void* opaque,
- const uint8* const* data,
- const int* strides,
- int rows);
-
- static const int kColorSpaceUnknown;
- static const int kColorSpaceGrayscale;
- static const int kColorSpaceRgb;
- static const int kColorSpaceYCbCr;
- static const int kColorSpaceCMYK;
- static const int kColorSpaceYCCK;
-
- MJpegDecoder();
- ~MJpegDecoder();
-
- // Loads a new frame, reads its headers, and determines the uncompressed
- // image format.
- // Returns LIBYUV_TRUE if image looks valid and format is supported.
- // If return value is LIBYUV_TRUE, then the values for all the following
- // getters are populated.
- // src_len is the size of the compressed mjpeg frame in bytes.
- LIBYUV_BOOL LoadFrame(const uint8* src, size_t src_len);
-
- // Returns width of the last loaded frame in pixels.
- int GetWidth();
-
- // Returns height of the last loaded frame in pixels.
- int GetHeight();
-
- // Returns format of the last loaded frame. The return value is one of the
- // kColorSpace* constants.
- int GetColorSpace();
-
- // Number of color components in the color space.
- int GetNumComponents();
-
- // Sample factors of the n-th component.
- int GetHorizSampFactor(int component);
-
- int GetVertSampFactor(int component);
-
- int GetHorizSubSampFactor(int component);
-
- int GetVertSubSampFactor(int component);
-
- // Public for testability.
- int GetImageScanlinesPerImcuRow();
-
- // Public for testability.
- int GetComponentScanlinesPerImcuRow(int component);
-
- // Width of a component in bytes.
- int GetComponentWidth(int component);
-
- // Height of a component.
- int GetComponentHeight(int component);
-
- // Width of a component in bytes with padding for DCTSIZE. Public for testing.
- int GetComponentStride(int component);
-
- // Size of a component in bytes.
- int GetComponentSize(int component);
-
- // Call this after LoadFrame() if you decide you don't want to decode it
- // after all.
- LIBYUV_BOOL UnloadFrame();
-
- // Decodes the entire image into a one-buffer-per-color-component format.
- // dst_width must match exactly. dst_height must be <= to image height; if
- // less, the image is cropped. "planes" must have size equal to at least
- // GetNumComponents() and they must point to non-overlapping buffers of size
- // at least GetComponentSize(i). The pointers in planes are incremented
- // to point to after the end of the written data.
- // TODO(fbarchard): Add dst_x, dst_y to allow specific rect to be decoded.
- LIBYUV_BOOL DecodeToBuffers(uint8** planes, int dst_width, int dst_height);
-
- // Decodes the entire image and passes the data via repeated calls to a
- // callback function. Each call will get the data for a whole number of
- // image scanlines.
- // TODO(fbarchard): Add dst_x, dst_y to allow specific rect to be decoded.
- LIBYUV_BOOL DecodeToCallback(CallbackFunction fn, void* opaque,
- int dst_width, int dst_height);
-
- // The helper function which recognizes the jpeg sub-sampling type.
- static JpegSubsamplingType JpegSubsamplingTypeHelper(
- int* subsample_x, int* subsample_y, int number_of_components);
-
- private:
- void AllocOutputBuffers(int num_outbufs);
- void DestroyOutputBuffers();
-
- LIBYUV_BOOL StartDecode();
- LIBYUV_BOOL FinishDecode();
-
- void SetScanlinePointers(uint8** data);
- LIBYUV_BOOL DecodeImcuRow();
-
- int GetComponentScanlinePadding(int component);
-
- // A buffer holding the input data for a frame.
- Buffer buf_;
- BufferVector buf_vec_;
-
- jpeg_decompress_struct* decompress_struct_;
- jpeg_source_mgr* source_mgr_;
- SetJmpErrorMgr* error_mgr_;
-
- // LIBYUV_TRUE iff at least one component has scanline padding. (i.e.,
- // GetComponentScanlinePadding() != 0.)
- LIBYUV_BOOL has_scanline_padding_;
-
- // Temporaries used to point to scanline outputs.
- int num_outbufs_; // Outermost size of all arrays below.
- uint8*** scanlines_;
- int* scanlines_sizes_;
- // Temporary buffer used for decoding when we can't decode directly to the
- // output buffers. Large enough for just one iMCU row.
- uint8** databuf_;
- int* databuf_strides_;
-};
-
-} // namespace libyuv
-
-#endif // __cplusplus
-#endif // INCLUDE_LIBYUV_MJPEG_DECODER_H_ NOLINT
diff --git a/third_party/aom/third_party/libyuv/include/libyuv/planar_functions.h b/third_party/aom/third_party/libyuv/include/libyuv/planar_functions.h
deleted file mode 100644
index 7fe4d8eed..000000000
--- a/third_party/aom/third_party/libyuv/include/libyuv/planar_functions.h
+++ /dev/null
@@ -1,454 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_ // NOLINT
-#define INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_
-
-#include "libyuv/basic_types.h"
-
-// TODO(fbarchard): Remove the following headers includes.
-#include "libyuv/convert.h"
-#include "libyuv/convert_argb.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// Copy a plane of data.
-LIBYUV_API
-void CopyPlane(const uint8* src_y, int src_stride_y,
- uint8* dst_y, int dst_stride_y,
- int width, int height);
-
-LIBYUV_API
-void CopyPlane_16(const uint16* src_y, int src_stride_y,
- uint16* dst_y, int dst_stride_y,
- int width, int height);
-
-// Set a plane of data to a 32 bit value.
-LIBYUV_API
-void SetPlane(uint8* dst_y, int dst_stride_y,
- int width, int height,
- uint32 value);
-
-// Copy I400. Supports inverting.
-LIBYUV_API
-int I400ToI400(const uint8* src_y, int src_stride_y,
- uint8* dst_y, int dst_stride_y,
- int width, int height);
-
-#define J400ToJ400 I400ToI400
-
-// Copy I422 to I422.
-#define I422ToI422 I422Copy
-LIBYUV_API
-int I422Copy(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// Copy I444 to I444.
-#define I444ToI444 I444Copy
-LIBYUV_API
-int I444Copy(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// Convert YUY2 to I422.
-LIBYUV_API
-int YUY2ToI422(const uint8* src_yuy2, int src_stride_yuy2,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// Convert UYVY to I422.
-LIBYUV_API
-int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-LIBYUV_API
-int YUY2ToNV12(const uint8* src_yuy2, int src_stride_yuy2,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_uv, int dst_stride_uv,
- int width, int height);
-
-LIBYUV_API
-int UYVYToNV12(const uint8* src_uyvy, int src_stride_uyvy,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_uv, int dst_stride_uv,
- int width, int height);
-
-// Convert I420 to I400. (calls CopyPlane ignoring u/v).
-LIBYUV_API
-int I420ToI400(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- int width, int height);
-
-// Alias
-#define J420ToJ400 I420ToI400
-#define I420ToI420Mirror I420Mirror
-
-// I420 mirror.
-LIBYUV_API
-int I420Mirror(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height);
-
-// Alias
-#define I400ToI400Mirror I400Mirror
-
-// I400 mirror. A single plane is mirrored horizontally.
-// Pass negative height to achieve 180 degree rotation.
-LIBYUV_API
-int I400Mirror(const uint8* src_y, int src_stride_y,
- uint8* dst_y, int dst_stride_y,
- int width, int height);
-
-// Alias
-#define ARGBToARGBMirror ARGBMirror
-
-// ARGB mirror.
-LIBYUV_API
-int ARGBMirror(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Convert NV12 to RGB565.
-LIBYUV_API
-int NV12ToRGB565(const uint8* src_y, int src_stride_y,
- const uint8* src_uv, int src_stride_uv,
- uint8* dst_rgb565, int dst_stride_rgb565,
- int width, int height);
-
-// Convert NV21 to RGB565.
-LIBYUV_API
-int NV21ToRGB565(const uint8* src_y, int src_stride_y,
- const uint8* src_uv, int src_stride_uv,
- uint8* dst_rgb565, int dst_stride_rgb565,
- int width, int height);
-
-// I422ToARGB is in convert_argb.h
-// Convert I422 to BGRA.
-LIBYUV_API
-int I422ToBGRA(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_bgra, int dst_stride_bgra,
- int width, int height);
-
-// Convert I422 to ABGR.
-LIBYUV_API
-int I422ToABGR(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_abgr, int dst_stride_abgr,
- int width, int height);
-
-// Convert I422 to RGBA.
-LIBYUV_API
-int I422ToRGBA(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_rgba, int dst_stride_rgba,
- int width, int height);
-
-// Draw a rectangle into I420.
-LIBYUV_API
-int I420Rect(uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int x, int y, int width, int height,
- int value_y, int value_u, int value_v);
-
-// Draw a rectangle into ARGB.
-LIBYUV_API
-int ARGBRect(uint8* dst_argb, int dst_stride_argb,
- int x, int y, int width, int height, uint32 value);
-
-// Convert ARGB to gray scale ARGB.
-LIBYUV_API
-int ARGBGrayTo(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Make a rectangle of ARGB gray scale.
-LIBYUV_API
-int ARGBGray(uint8* dst_argb, int dst_stride_argb,
- int x, int y, int width, int height);
-
-// Make a rectangle of ARGB Sepia tone.
-LIBYUV_API
-int ARGBSepia(uint8* dst_argb, int dst_stride_argb,
- int x, int y, int width, int height);
-
-// Apply a matrix rotation to each ARGB pixel.
-// matrix_argb is 4 signed ARGB values. -128 to 127 representing -2 to 2.
-// The first 4 coefficients apply to B, G, R, A and produce B of the output.
-// The next 4 coefficients apply to B, G, R, A and produce G of the output.
-// The next 4 coefficients apply to B, G, R, A and produce R of the output.
-// The last 4 coefficients apply to B, G, R, A and produce A of the output.
-LIBYUV_API
-int ARGBColorMatrix(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- const int8* matrix_argb,
- int width, int height);
-
-// Deprecated. Use ARGBColorMatrix instead.
-// Apply a matrix rotation to each ARGB pixel.
-// matrix_argb is 3 signed ARGB values. -128 to 127 representing -1 to 1.
-// The first 4 coefficients apply to B, G, R, A and produce B of the output.
-// The next 4 coefficients apply to B, G, R, A and produce G of the output.
-// The last 4 coefficients apply to B, G, R, A and produce R of the output.
-LIBYUV_API
-int RGBColorMatrix(uint8* dst_argb, int dst_stride_argb,
- const int8* matrix_rgb,
- int x, int y, int width, int height);
-
-// Apply a color table each ARGB pixel.
-// Table contains 256 ARGB values.
-LIBYUV_API
-int ARGBColorTable(uint8* dst_argb, int dst_stride_argb,
- const uint8* table_argb,
- int x, int y, int width, int height);
-
-// Apply a color table each ARGB pixel but preserve destination alpha.
-// Table contains 256 ARGB values.
-LIBYUV_API
-int RGBColorTable(uint8* dst_argb, int dst_stride_argb,
- const uint8* table_argb,
- int x, int y, int width, int height);
-
-// Apply a luma/color table each ARGB pixel but preserve destination alpha.
-// Table contains 32768 values indexed by [Y][C] where 7 it 7 bit luma from
-// RGB (YJ style) and C is an 8 bit color component (R, G or B).
-LIBYUV_API
-int ARGBLumaColorTable(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- const uint8* luma_rgb_table,
- int width, int height);
-
-// Apply a 3 term polynomial to ARGB values.
-// poly points to a 4x4 matrix. The first row is constants. The 2nd row is
-// coefficients for b, g, r and a. The 3rd row is coefficients for b squared,
-// g squared, r squared and a squared. The 4rd row is coefficients for b to
-// the 3, g to the 3, r to the 3 and a to the 3. The values are summed and
-// result clamped to 0 to 255.
-// A polynomial approximation can be dirived using software such as 'R'.
-
-LIBYUV_API
-int ARGBPolynomial(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- const float* poly,
- int width, int height);
-
-// Quantize a rectangle of ARGB. Alpha unaffected.
-// scale is a 16 bit fractional fixed point scaler between 0 and 65535.
-// interval_size should be a value between 1 and 255.
-// interval_offset should be a value between 0 and 255.
-LIBYUV_API
-int ARGBQuantize(uint8* dst_argb, int dst_stride_argb,
- int scale, int interval_size, int interval_offset,
- int x, int y, int width, int height);
-
-// Copy ARGB to ARGB.
-LIBYUV_API
-int ARGBCopy(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Copy ARGB to ARGB.
-LIBYUV_API
-int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Copy ARGB to ARGB.
-LIBYUV_API
-int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-typedef void (*ARGBBlendRow)(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width);
-
-// Get function to Alpha Blend ARGB pixels and store to destination.
-LIBYUV_API
-ARGBBlendRow GetARGBBlend();
-
-// Alpha Blend ARGB images and store to destination.
-// Alpha of destination is set to 255.
-LIBYUV_API
-int ARGBBlend(const uint8* src_argb0, int src_stride_argb0,
- const uint8* src_argb1, int src_stride_argb1,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Multiply ARGB image by ARGB image. Shifted down by 8. Saturates to 255.
-LIBYUV_API
-int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0,
- const uint8* src_argb1, int src_stride_argb1,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Add ARGB image with ARGB image. Saturates to 255.
-LIBYUV_API
-int ARGBAdd(const uint8* src_argb0, int src_stride_argb0,
- const uint8* src_argb1, int src_stride_argb1,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Subtract ARGB image (argb1) from ARGB image (argb0). Saturates to 0.
-LIBYUV_API
-int ARGBSubtract(const uint8* src_argb0, int src_stride_argb0,
- const uint8* src_argb1, int src_stride_argb1,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Convert I422 to YUY2.
-LIBYUV_API
-int I422ToYUY2(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_frame, int dst_stride_frame,
- int width, int height);
-
-// Convert I422 to UYVY.
-LIBYUV_API
-int I422ToUYVY(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_frame, int dst_stride_frame,
- int width, int height);
-
-// Convert unattentuated ARGB to preattenuated ARGB.
-LIBYUV_API
-int ARGBAttenuate(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Convert preattentuated ARGB to unattenuated ARGB.
-LIBYUV_API
-int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Convert MJPG to ARGB.
-LIBYUV_API
-int MJPGToARGB(const uint8* sample, size_t sample_size,
- uint8* argb, int argb_stride,
- int w, int h, int dw, int dh);
-
-// Internal function - do not call directly.
-// Computes table of cumulative sum for image where the value is the sum
-// of all values above and to the left of the entry. Used by ARGBBlur.
-LIBYUV_API
-int ARGBComputeCumulativeSum(const uint8* src_argb, int src_stride_argb,
- int32* dst_cumsum, int dst_stride32_cumsum,
- int width, int height);
-
-// Blur ARGB image.
-// dst_cumsum table of width * (height + 1) * 16 bytes aligned to
-// 16 byte boundary.
-// dst_stride32_cumsum is number of ints in a row (width * 4).
-// radius is number of pixels around the center. e.g. 1 = 3x3. 2=5x5.
-// Blur is optimized for radius of 5 (11x11) or less.
-LIBYUV_API
-int ARGBBlur(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- int32* dst_cumsum, int dst_stride32_cumsum,
- int width, int height, int radius);
-
-// Multiply ARGB image by ARGB value.
-LIBYUV_API
-int ARGBShade(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height, uint32 value);
-
-// Interpolate between two ARGB images using specified amount of interpolation
-// (0 to 255) and store to destination.
-// 'interpolation' is specified as 8 bit fraction where 0 means 100% src_argb0
-// and 255 means 1% src_argb0 and 99% src_argb1.
-// Internally uses ARGBScale bilinear filtering.
-// Caveat: This function will write up to 16 bytes beyond the end of dst_argb.
-LIBYUV_API
-int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0,
- const uint8* src_argb1, int src_stride_argb1,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height, int interpolation);
-
-#if defined(__pnacl__) || defined(__CLR_VER) || \
- (defined(__i386__) && !defined(__SSE2__))
-#define LIBYUV_DISABLE_X86
-#endif
-// The following are available on all x86 platforms:
-#if !defined(LIBYUV_DISABLE_X86) && \
- (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
-#define HAS_ARGBAFFINEROW_SSE2
-#endif
-
-// Row function for copying pixels from a source with a slope to a row
-// of destination. Useful for scaling, rotation, mirror, texture mapping.
-LIBYUV_API
-void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride,
- uint8* dst_argb, const float* uv_dudv, int width);
-LIBYUV_API
-void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
- uint8* dst_argb, const float* uv_dudv, int width);
-
-// Shuffle ARGB channel order. e.g. BGRA to ARGB.
-// shuffler is 16 bytes and must be aligned.
-LIBYUV_API
-int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra,
- uint8* dst_argb, int dst_stride_argb,
- const uint8* shuffler, int width, int height);
-
-// Sobel ARGB effect with planar output.
-LIBYUV_API
-int ARGBSobelToPlane(const uint8* src_argb, int src_stride_argb,
- uint8* dst_y, int dst_stride_y,
- int width, int height);
-
-// Sobel ARGB effect.
-LIBYUV_API
-int ARGBSobel(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-// Sobel ARGB effect w/ Sobel X, Sobel, Sobel Y in ARGB.
-LIBYUV_API
-int ARGBSobelXY(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height);
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
-
-#endif // INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_ NOLINT
diff --git a/third_party/aom/third_party/libyuv/include/libyuv/rotate.h b/third_party/aom/third_party/libyuv/include/libyuv/rotate.h
deleted file mode 100644
index 8a9673f28..000000000
--- a/third_party/aom/third_party/libyuv/include/libyuv/rotate.h
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef INCLUDE_LIBYUV_ROTATE_H_ // NOLINT
-#define INCLUDE_LIBYUV_ROTATE_H_
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// Supported rotation.
-typedef enum RotationMode {
- kRotate0 = 0, // No rotation.
- kRotate90 = 90, // Rotate 90 degrees clockwise.
- kRotate180 = 180, // Rotate 180 degrees.
- kRotate270 = 270, // Rotate 270 degrees clockwise.
-
- // Deprecated.
- kRotateNone = 0,
- kRotateClockwise = 90,
- kRotateCounterClockwise = 270,
-} RotationModeEnum;
-
-// Rotate I420 frame.
-LIBYUV_API
-int I420Rotate(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int src_width, int src_height, enum RotationMode mode);
-
-// Rotate NV12 input and store in I420.
-LIBYUV_API
-int NV12ToI420Rotate(const uint8* src_y, int src_stride_y,
- const uint8* src_uv, int src_stride_uv,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int src_width, int src_height, enum RotationMode mode);
-
-// Rotate a plane by 0, 90, 180, or 270.
-LIBYUV_API
-int RotatePlane(const uint8* src, int src_stride,
- uint8* dst, int dst_stride,
- int src_width, int src_height, enum RotationMode mode);
-
-// Rotate planes by 90, 180, 270. Deprecated.
-LIBYUV_API
-void RotatePlane90(const uint8* src, int src_stride,
- uint8* dst, int dst_stride,
- int width, int height);
-
-LIBYUV_API
-void RotatePlane180(const uint8* src, int src_stride,
- uint8* dst, int dst_stride,
- int width, int height);
-
-LIBYUV_API
-void RotatePlane270(const uint8* src, int src_stride,
- uint8* dst, int dst_stride,
- int width, int height);
-
-LIBYUV_API
-void RotateUV90(const uint8* src, int src_stride,
- uint8* dst_a, int dst_stride_a,
- uint8* dst_b, int dst_stride_b,
- int width, int height);
-
-// Rotations for when U and V are interleaved.
-// These functions take one input pointer and
-// split the data into two buffers while
-// rotating them. Deprecated.
-LIBYUV_API
-void RotateUV180(const uint8* src, int src_stride,
- uint8* dst_a, int dst_stride_a,
- uint8* dst_b, int dst_stride_b,
- int width, int height);
-
-LIBYUV_API
-void RotateUV270(const uint8* src, int src_stride,
- uint8* dst_a, int dst_stride_a,
- uint8* dst_b, int dst_stride_b,
- int width, int height);
-
-// The 90 and 270 functions are based on transposes.
-// Doing a transpose with reversing the read/write
-// order will result in a rotation by +- 90 degrees.
-// Deprecated.
-LIBYUV_API
-void TransposePlane(const uint8* src, int src_stride,
- uint8* dst, int dst_stride,
- int width, int height);
-
-LIBYUV_API
-void TransposeUV(const uint8* src, int src_stride,
- uint8* dst_a, int dst_stride_a,
- uint8* dst_b, int dst_stride_b,
- int width, int height);
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
-
-#endif // INCLUDE_LIBYUV_ROTATE_H_ NOLINT
diff --git a/third_party/aom/third_party/libyuv/include/libyuv/rotate_argb.h b/third_party/aom/third_party/libyuv/include/libyuv/rotate_argb.h
deleted file mode 100644
index 2bdc8ec6b..000000000
--- a/third_party/aom/third_party/libyuv/include/libyuv/rotate_argb.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef INCLUDE_LIBYUV_ROTATE_ARGB_H_ // NOLINT
-#define INCLUDE_LIBYUV_ROTATE_ARGB_H_
-
-#include "libyuv/basic_types.h"
-#include "libyuv/rotate.h" // For RotationMode.
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// Rotate ARGB frame
-LIBYUV_API
-int ARGBRotate(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- int src_width, int src_height, enum RotationMode mode);
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
-
-#endif // INCLUDE_LIBYUV_ROTATE_ARGB_H_ NOLINT
diff --git a/third_party/aom/third_party/libyuv/include/libyuv/rotate_row.h b/third_party/aom/third_party/libyuv/include/libyuv/rotate_row.h
deleted file mode 100644
index d0bfbdd2b..000000000
--- a/third_party/aom/third_party/libyuv/include/libyuv/rotate_row.h
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef INCLUDE_LIBYUV_ROTATE_ROW_H_ // NOLINT
-#define INCLUDE_LIBYUV_ROTATE_ROW_H_
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-#if defined(__pnacl__) || defined(__CLR_VER) || \
- (defined(__i386__) && !defined(__SSE2__))
-#define LIBYUV_DISABLE_X86
-#endif
-
-// Visual C 2012 required for AVX2.
-#if defined(_M_IX86) && !defined(__clang__) && \
- defined(_MSC_VER) && _MSC_VER >= 1700
-#define VISUALC_HAS_AVX2 1
-#endif // VisualStudio >= 2012
-
-// TODO(fbarchard): switch to standard form of inline; fails on clangcl.
-#if !defined(LIBYUV_DISABLE_X86) && \
- (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
-#if defined(__APPLE__) && defined(__i386__)
-#define DECLARE_FUNCTION(name) \
- ".text \n" \
- ".private_extern _" #name " \n" \
- ".align 4,0x90 \n" \
-"_" #name ": \n"
-#elif defined(__MINGW32__) || defined(__CYGWIN__) && defined(__i386__)
-#define DECLARE_FUNCTION(name) \
- ".text \n" \
- ".align 4,0x90 \n" \
-"_" #name ": \n"
-#else
-#define DECLARE_FUNCTION(name) \
- ".text \n" \
- ".align 4,0x90 \n" \
-#name ": \n"
-#endif
-#endif
-
-// The following are available for Visual C:
-#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \
- defined(_MSC_VER) && !defined(__clang__)
-#define HAS_TRANSPOSEWX8_SSSE3
-#define HAS_TRANSPOSEUVWX8_SSE2
-#endif
-
-// The following are available for GCC but not NaCL:
-#if !defined(LIBYUV_DISABLE_X86) && \
- (defined(__i386__) || (defined(__x86_64__) && !defined(__native_client__)))
-#define HAS_TRANSPOSEWX8_SSSE3
-#endif
-
-// The following are available for 32 bit GCC:
-#if !defined(LIBYUV_DISABLE_X86) && defined(__i386__) && !defined(__clang__)
-#define HAS_TRANSPOSEUVWX8_SSE2
-#endif
-
-// The following are available for 64 bit GCC but not NaCL:
-#if !defined(LIBYUV_DISABLE_X86) && !defined(__native_client__) && \
- defined(__x86_64__)
-#define HAS_TRANSPOSEWX8_FAST_SSSE3
-#define HAS_TRANSPOSEUVWX8_SSE2
-#endif
-
-#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
- (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
-#define HAS_TRANSPOSEWX8_NEON
-#define HAS_TRANSPOSEUVWX8_NEON
-#endif
-
-#if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \
- defined(__mips__) && \
- defined(__mips_dsp) && (__mips_dsp_rev >= 2)
-#define HAS_TRANSPOSEWX8_MIPS_DSPR2
-#define HAS_TRANSPOSEUVWx8_MIPS_DSPR2
-#endif // defined(__mips__)
-
-void TransposeWxH_C(const uint8* src, int src_stride,
- uint8* dst, int dst_stride, int width, int height);
-
-void TransposeWx8_C(const uint8* src, int src_stride,
- uint8* dst, int dst_stride, int width);
-void TransposeWx8_NEON(const uint8* src, int src_stride,
- uint8* dst, int dst_stride, int width);
-void TransposeWx8_SSSE3(const uint8* src, int src_stride,
- uint8* dst, int dst_stride, int width);
-void TransposeWx8_Fast_SSSE3(const uint8* src, int src_stride,
- uint8* dst, int dst_stride, int width);
-void TransposeWx8_MIPS_DSPR2(const uint8* src, int src_stride,
- uint8* dst, int dst_stride, int width);
-
-void TransposeWx8_Any_NEON(const uint8* src, int src_stride,
- uint8* dst, int dst_stride, int width);
-void TransposeWx8_Any_SSSE3(const uint8* src, int src_stride,
- uint8* dst, int dst_stride, int width);
-void TransposeWx8_Fast_Any_SSSE3(const uint8* src, int src_stride,
- uint8* dst, int dst_stride, int width);
-void TransposeWx8_Any_MIPS_DSPR2(const uint8* src, int src_stride,
- uint8* dst, int dst_stride, int width);
-
-void TransposeUVWxH_C(const uint8* src, int src_stride,
- uint8* dst_a, int dst_stride_a,
- uint8* dst_b, int dst_stride_b,
- int width, int height);
-
-void TransposeUVWx8_C(const uint8* src, int src_stride,
- uint8* dst_a, int dst_stride_a,
- uint8* dst_b, int dst_stride_b, int width);
-void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
- uint8* dst_a, int dst_stride_a,
- uint8* dst_b, int dst_stride_b, int width);
-void TransposeUVWx8_NEON(const uint8* src, int src_stride,
- uint8* dst_a, int dst_stride_a,
- uint8* dst_b, int dst_stride_b, int width);
-void TransposeUVWx8_MIPS_DSPR2(const uint8* src, int src_stride,
- uint8* dst_a, int dst_stride_a,
- uint8* dst_b, int dst_stride_b, int width);
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
-
-#endif // INCLUDE_LIBYUV_ROTATE_ROW_H_ NOLINT
diff --git a/third_party/aom/third_party/libyuv/include/libyuv/row.h b/third_party/aom/third_party/libyuv/include/libyuv/row.h
deleted file mode 100644
index 5c3187ef7..000000000
--- a/third_party/aom/third_party/libyuv/include/libyuv/row.h
+++ /dev/null
@@ -1,1857 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef INCLUDE_LIBYUV_ROW_H_ // NOLINT
-#define INCLUDE_LIBYUV_ROW_H_
-
-#include <stdlib.h> // For malloc.
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a) - 1)))
-
-#ifdef __cplusplus
-#define align_buffer_64(var, size) \
- uint8* var##_mem = reinterpret_cast<uint8*>(malloc((size) + 63)); \
- uint8* var = reinterpret_cast<uint8*> \
- ((reinterpret_cast<intptr_t>(var##_mem) + 63) & ~63)
-#else
-#define align_buffer_64(var, size) \
- uint8* var##_mem = (uint8*)(malloc((size) + 63)); /* NOLINT */ \
- uint8* var = (uint8*)(((intptr_t)(var##_mem) + 63) & ~63) /* NOLINT */
-#endif
-
-#define free_aligned_buffer_64(var) \
- free(var##_mem); \
- var = 0
-
-#if defined(__pnacl__) || defined(__CLR_VER) || \
- (defined(__i386__) && !defined(__SSE2__))
-#define LIBYUV_DISABLE_X86
-#endif
-// True if compiling for SSSE3 as a requirement.
-#if defined(__SSSE3__) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 3))
-#define LIBYUV_SSSE3_ONLY
-#endif
-
-#if defined(__native_client__)
-#define LIBYUV_DISABLE_NEON
-#endif
-// clang >= 3.5.0 required for Arm64.
-#if defined(__clang__) && defined(__aarch64__) && !defined(LIBYUV_DISABLE_NEON)
-#if (__clang_major__ < 3) || (__clang_major__ == 3 && (__clang_minor__ < 5))
-#define LIBYUV_DISABLE_NEON
-#endif // clang >= 3.5
-#endif // __clang__
-
-// The following are available on all x86 platforms:
-#if !defined(LIBYUV_DISABLE_X86) && \
- (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
-// Conversions:
-#define HAS_ABGRTOUVROW_SSSE3
-#define HAS_ABGRTOYROW_SSSE3
-#define HAS_ARGB1555TOARGBROW_SSE2
-#define HAS_ARGB4444TOARGBROW_SSE2
-#define HAS_ARGBSETROW_X86
-#define HAS_ARGBSHUFFLEROW_SSE2
-#define HAS_ARGBSHUFFLEROW_SSSE3
-#define HAS_ARGBTOARGB1555ROW_SSE2
-#define HAS_ARGBTOARGB4444ROW_SSE2
-#define HAS_ARGBTORAWROW_SSSE3
-#define HAS_ARGBTORGB24ROW_SSSE3
-#define HAS_ARGBTORGB565ROW_SSE2
-#define HAS_ARGBTOUV422ROW_SSSE3
-#define HAS_ARGBTOUV444ROW_SSSE3
-#define HAS_ARGBTOUVJROW_SSSE3
-#define HAS_ARGBTOUVROW_SSSE3
-#define HAS_ARGBTOYJROW_SSSE3
-#define HAS_ARGBTOYROW_SSSE3
-#define HAS_BGRATOUVROW_SSSE3
-#define HAS_BGRATOYROW_SSSE3
-#define HAS_COPYROW_ERMS
-#define HAS_COPYROW_SSE2
-#define HAS_I400TOARGBROW_SSE2
-#define HAS_I411TOARGBROW_SSSE3
-#define HAS_I422TOABGRROW_SSSE3
-#define HAS_I422TOARGB1555ROW_SSSE3
-#define HAS_I422TOARGB4444ROW_SSSE3
-#define HAS_I422TOARGBROW_SSSE3
-#define HAS_I422TOBGRAROW_SSSE3
-#define HAS_I422TORAWROW_SSSE3
-#define HAS_I422TORGB24ROW_SSSE3
-#define HAS_I422TORGB565ROW_SSSE3
-#define HAS_I422TORGBAROW_SSSE3
-#define HAS_I422TOUYVYROW_SSE2
-#define HAS_I422TOYUY2ROW_SSE2
-#define HAS_I444TOARGBROW_SSSE3
-#define HAS_J400TOARGBROW_SSE2
-#define HAS_J422TOARGBROW_SSSE3
-#define HAS_MERGEUVROW_SSE2
-#define HAS_MIRRORROW_SSE2
-#define HAS_MIRRORROW_SSSE3
-#define HAS_MIRRORROW_UV_SSSE3
-#define HAS_MIRRORUVROW_SSSE3
-#define HAS_NV12TOARGBROW_SSSE3
-#define HAS_NV12TORGB565ROW_SSSE3
-#define HAS_NV21TOARGBROW_SSSE3
-#define HAS_NV21TORGB565ROW_SSSE3
-#define HAS_RAWTOARGBROW_SSSE3
-#define HAS_RAWTOYROW_SSSE3
-#define HAS_RGB24TOARGBROW_SSSE3
-#define HAS_RGB24TOYROW_SSSE3
-#define HAS_RGB565TOARGBROW_SSE2
-#define HAS_RGBATOUVROW_SSSE3
-#define HAS_RGBATOYROW_SSSE3
-#define HAS_SETROW_ERMS
-#define HAS_SETROW_X86
-#define HAS_SPLITUVROW_SSE2
-#define HAS_UYVYTOARGBROW_SSSE3
-#define HAS_UYVYTOUV422ROW_SSE2
-#define HAS_UYVYTOUVROW_SSE2
-#define HAS_UYVYTOYROW_SSE2
-#define HAS_YUY2TOARGBROW_SSSE3
-#define HAS_YUY2TOUV422ROW_SSE2
-#define HAS_YUY2TOUVROW_SSE2
-#define HAS_YUY2TOYROW_SSE2
-
-// Effects:
-#define HAS_ARGBADDROW_SSE2
-#define HAS_ARGBAFFINEROW_SSE2
-#define HAS_ARGBATTENUATEROW_SSSE3
-#define HAS_ARGBBLENDROW_SSSE3
-#define HAS_ARGBCOLORMATRIXROW_SSSE3
-#define HAS_ARGBCOLORTABLEROW_X86
-#define HAS_ARGBCOPYALPHAROW_SSE2
-#define HAS_ARGBCOPYYTOALPHAROW_SSE2
-#define HAS_ARGBGRAYROW_SSSE3
-#define HAS_ARGBLUMACOLORTABLEROW_SSSE3
-#define HAS_ARGBMIRRORROW_SSE2
-#define HAS_ARGBMULTIPLYROW_SSE2
-#define HAS_ARGBPOLYNOMIALROW_SSE2
-#define HAS_ARGBQUANTIZEROW_SSE2
-#define HAS_ARGBSEPIAROW_SSSE3
-#define HAS_ARGBSHADEROW_SSE2
-#define HAS_ARGBSUBTRACTROW_SSE2
-#define HAS_ARGBUNATTENUATEROW_SSE2
-#define HAS_COMPUTECUMULATIVESUMROW_SSE2
-#define HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
-#define HAS_INTERPOLATEROW_SSE2
-#define HAS_INTERPOLATEROW_SSSE3
-#define HAS_RGBCOLORTABLEROW_X86
-#define HAS_SOBELROW_SSE2
-#define HAS_SOBELTOPLANEROW_SSE2
-#define HAS_SOBELXROW_SSE2
-#define HAS_SOBELXYROW_SSE2
-#define HAS_SOBELYROW_SSE2
-#endif
-
-// The following are available on x64 Visual C and clangcl.
-#if !defined(LIBYUV_DISABLE_X86) && defined (_M_X64) && \
- (!defined(__clang__) || defined(__SSSE3__))
-#define HAS_I422TOARGBROW_SSSE3
-#endif
-
-// GCC >= 4.7.0 required for AVX2.
-#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
-#if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7))
-#define GCC_HAS_AVX2 1
-#endif // GNUC >= 4.7
-#endif // __GNUC__
-
-// clang >= 3.4.0 required for AVX2.
-#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
-#if (__clang_major__ > 3) || (__clang_major__ == 3 && (__clang_minor__ >= 4))
-#define CLANG_HAS_AVX2 1
-#endif // clang >= 3.4
-#endif // __clang__
-
-// Visual C 2012 required for AVX2.
-#if defined(_M_IX86) && !defined(__clang__) && \
- defined(_MSC_VER) && _MSC_VER >= 1700
-#define VISUALC_HAS_AVX2 1
-#endif // VisualStudio >= 2012
-
-// The following are available require VS2012. Port to GCC.
-#if !defined(LIBYUV_DISABLE_X86) && defined(VISUALC_HAS_AVX2)
-#define HAS_ARGB1555TOARGBROW_AVX2
-#define HAS_ARGB4444TOARGBROW_AVX2
-#define HAS_ARGBTOARGB1555ROW_AVX2
-#define HAS_ARGBTOARGB4444ROW_AVX2
-#define HAS_ARGBTORGB565DITHERROW_AVX2
-#define HAS_ARGBTORGB565DITHERROW_SSE2
-#define HAS_ARGBTORGB565ROW_AVX2
-#define HAS_I411TOARGBROW_AVX2
-#define HAS_I422TOARGB1555ROW_AVX2
-#define HAS_I422TOARGB4444ROW_AVX2
-#define HAS_I422TORGB565ROW_AVX2
-#define HAS_I444TOARGBROW_AVX2
-#define HAS_J400TOARGBROW_AVX2
-#define HAS_NV12TOARGBROW_AVX2
-#define HAS_NV12TORGB565ROW_AVX2
-#define HAS_NV21TOARGBROW_AVX2
-#define HAS_NV21TORGB565ROW_AVX2
-#define HAS_RGB565TOARGBROW_AVX2
-#endif
-
-// The following are available on all x86 platforms, but
-// require VS2012, clang 3.4 or gcc 4.7.
-// The code supports NaCL but requires a new compiler and validator.
-#if !defined(LIBYUV_DISABLE_X86) && (defined(VISUALC_HAS_AVX2) || \
- defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
-#define HAS_ARGBCOPYALPHAROW_AVX2
-#define HAS_ARGBCOPYYTOALPHAROW_AVX2
-#define HAS_ARGBMIRRORROW_AVX2
-#define HAS_ARGBPOLYNOMIALROW_AVX2
-#define HAS_ARGBSHUFFLEROW_AVX2
-#define HAS_ARGBTOUVROW_AVX2
-#define HAS_ARGBTOYJROW_AVX2
-#define HAS_ARGBTOYROW_AVX2
-#define HAS_COPYROW_AVX
-#define HAS_I400TOARGBROW_AVX2
-#define HAS_I422TOABGRROW_AVX2
-#define HAS_I422TOARGBROW_AVX2
-#define HAS_I422TOBGRAROW_AVX2
-#define HAS_I422TORAWROW_AVX2
-#define HAS_I422TORGB24ROW_AVX2
-#define HAS_I422TORGBAROW_AVX2
-#define HAS_INTERPOLATEROW_AVX2
-#define HAS_J422TOARGBROW_AVX2
-#define HAS_MERGEUVROW_AVX2
-#define HAS_MIRRORROW_AVX2
-#define HAS_SPLITUVROW_AVX2
-#define HAS_UYVYTOARGBROW_AVX2
-#define HAS_UYVYTOUV422ROW_AVX2
-#define HAS_UYVYTOUVROW_AVX2
-#define HAS_UYVYTOYROW_AVX2
-#define HAS_YUY2TOARGBROW_AVX2
-#define HAS_YUY2TOUV422ROW_AVX2
-#define HAS_YUY2TOUVROW_AVX2
-#define HAS_YUY2TOYROW_AVX2
-
-// Effects:
-#define HAS_ARGBADDROW_AVX2
-#define HAS_ARGBATTENUATEROW_AVX2
-#define HAS_ARGBMULTIPLYROW_AVX2
-#define HAS_ARGBSUBTRACTROW_AVX2
-#define HAS_ARGBUNATTENUATEROW_AVX2
-#endif
-
-// The following are disabled when SSSE3 is available:
-#if !defined(LIBYUV_DISABLE_X86) && \
- (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) && \
- !defined(LIBYUV_SSSE3_ONLY)
-#define HAS_ARGBATTENUATEROW_SSE2
-#define HAS_ARGBBLENDROW_SSE2
-#define HAS_MIRRORROW_SSE2
-#endif
-
-// The following are available on Neon platforms:
-#if !defined(LIBYUV_DISABLE_NEON) && \
- (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
-#define HAS_ABGRTOUVROW_NEON
-#define HAS_ABGRTOYROW_NEON
-#define HAS_ARGB1555TOARGBROW_NEON
-#define HAS_ARGB1555TOUVROW_NEON
-#define HAS_ARGB1555TOYROW_NEON
-#define HAS_ARGB4444TOARGBROW_NEON
-#define HAS_ARGB4444TOUVROW_NEON
-#define HAS_ARGB4444TOYROW_NEON
-#define HAS_ARGBTOARGB1555ROW_NEON
-#define HAS_ARGBTOARGB4444ROW_NEON
-#define HAS_ARGBTORAWROW_NEON
-#define HAS_ARGBTORGB24ROW_NEON
-#define HAS_ARGBTORGB565ROW_NEON
-#define HAS_ARGBTOUV411ROW_NEON
-#define HAS_ARGBTOUV422ROW_NEON
-#define HAS_ARGBTOUV444ROW_NEON
-#define HAS_ARGBTOUVJROW_NEON
-#define HAS_ARGBTOUVROW_NEON
-#define HAS_ARGBTOYJROW_NEON
-#define HAS_ARGBTOYROW_NEON
-#define HAS_BGRATOUVROW_NEON
-#define HAS_BGRATOYROW_NEON
-#define HAS_COPYROW_NEON
-#define HAS_J400TOARGBROW_NEON
-#define HAS_I411TOARGBROW_NEON
-#define HAS_I422TOABGRROW_NEON
-#define HAS_I422TOARGB1555ROW_NEON
-#define HAS_I422TOARGB4444ROW_NEON
-#define HAS_I422TOARGBROW_NEON
-#define HAS_I422TOBGRAROW_NEON
-#define HAS_I422TORAWROW_NEON
-#define HAS_I422TORGB24ROW_NEON
-#define HAS_I422TORGB565ROW_NEON
-#define HAS_I422TORGBAROW_NEON
-#define HAS_I422TOUYVYROW_NEON
-#define HAS_I422TOYUY2ROW_NEON
-#define HAS_I444TOARGBROW_NEON
-#define HAS_MERGEUVROW_NEON
-#define HAS_MIRRORROW_NEON
-#define HAS_MIRRORUVROW_NEON
-#define HAS_NV12TOARGBROW_NEON
-#define HAS_NV12TORGB565ROW_NEON
-#define HAS_NV21TOARGBROW_NEON
-#define HAS_NV21TORGB565ROW_NEON
-#define HAS_RAWTOARGBROW_NEON
-#define HAS_RAWTOUVROW_NEON
-#define HAS_RAWTOYROW_NEON
-#define HAS_RGB24TOARGBROW_NEON
-#define HAS_RGB24TOUVROW_NEON
-#define HAS_RGB24TOYROW_NEON
-#define HAS_RGB565TOARGBROW_NEON
-#define HAS_RGB565TOUVROW_NEON
-#define HAS_RGB565TOYROW_NEON
-#define HAS_RGBATOUVROW_NEON
-#define HAS_RGBATOYROW_NEON
-#define HAS_SETROW_NEON
-#define HAS_ARGBSETROW_NEON
-#define HAS_SPLITUVROW_NEON
-#define HAS_UYVYTOARGBROW_NEON
-#define HAS_UYVYTOUV422ROW_NEON
-#define HAS_UYVYTOUVROW_NEON
-#define HAS_UYVYTOYROW_NEON
-#define HAS_I400TOARGBROW_NEON
-#define HAS_YUY2TOARGBROW_NEON
-#define HAS_YUY2TOUV422ROW_NEON
-#define HAS_YUY2TOUVROW_NEON
-#define HAS_YUY2TOYROW_NEON
-#define HAS_ARGBTORGB565DITHERROW_NEON
-
-// Effects:
-#define HAS_ARGBADDROW_NEON
-#define HAS_ARGBATTENUATEROW_NEON
-#define HAS_ARGBBLENDROW_NEON
-#define HAS_ARGBGRAYROW_NEON
-#define HAS_ARGBMIRRORROW_NEON
-#define HAS_ARGBMULTIPLYROW_NEON
-#define HAS_ARGBQUANTIZEROW_NEON
-#define HAS_ARGBSEPIAROW_NEON
-#define HAS_ARGBSHADEROW_NEON
-#define HAS_ARGBSUBTRACTROW_NEON
-#define HAS_INTERPOLATEROW_NEON
-#define HAS_SOBELROW_NEON
-#define HAS_SOBELTOPLANEROW_NEON
-#define HAS_SOBELXROW_NEON
-#define HAS_SOBELXYROW_NEON
-#define HAS_SOBELYROW_NEON
-#define HAS_ARGBCOLORMATRIXROW_NEON
-#define HAS_ARGBSHUFFLEROW_NEON
-#endif
-
-// The following are available on Mips platforms:
-#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips__) && \
- (_MIPS_SIM == _MIPS_SIM_ABI32) && (__mips_isa_rev < 6)
-#define HAS_COPYROW_MIPS
-#if defined(__mips_dsp) && (__mips_dsp_rev >= 2)
-#define HAS_I422TOABGRROW_MIPS_DSPR2
-#define HAS_I422TOARGBROW_MIPS_DSPR2
-#define HAS_I422TOBGRAROW_MIPS_DSPR2
-#define HAS_INTERPOLATEROW_MIPS_DSPR2
-#define HAS_MIRRORROW_MIPS_DSPR2
-#define HAS_MIRRORUVROW_MIPS_DSPR2
-#define HAS_SPLITUVROW_MIPS_DSPR2
-#endif
-#endif
-
-#if defined(_MSC_VER) && !defined(__CLR_VER)
-#define SIMD_ALIGNED(var) __declspec(align(16)) var
-#define SIMD_ALIGNED32(var) __declspec(align(64)) var
-typedef __declspec(align(16)) int16 vec16[8];
-typedef __declspec(align(16)) int32 vec32[4];
-typedef __declspec(align(16)) int8 vec8[16];
-typedef __declspec(align(16)) uint16 uvec16[8];
-typedef __declspec(align(16)) uint32 uvec32[4];
-typedef __declspec(align(16)) uint8 uvec8[16];
-typedef __declspec(align(32)) int16 lvec16[16];
-typedef __declspec(align(32)) int32 lvec32[8];
-typedef __declspec(align(32)) int8 lvec8[32];
-typedef __declspec(align(32)) uint16 ulvec16[16];
-typedef __declspec(align(32)) uint32 ulvec32[8];
-typedef __declspec(align(32)) uint8 ulvec8[32];
-#elif defined(__GNUC__)
-// Caveat GCC 4.2 to 4.7 have a known issue using vectors with const.
-#define SIMD_ALIGNED(var) var __attribute__((aligned(16)))
-#define SIMD_ALIGNED32(var) var __attribute__((aligned(64)))
-typedef int16 __attribute__((vector_size(16))) vec16;
-typedef int32 __attribute__((vector_size(16))) vec32;
-typedef int8 __attribute__((vector_size(16))) vec8;
-typedef uint16 __attribute__((vector_size(16))) uvec16;
-typedef uint32 __attribute__((vector_size(16))) uvec32;
-typedef uint8 __attribute__((vector_size(16))) uvec8;
-typedef int16 __attribute__((vector_size(32))) lvec16;
-typedef int32 __attribute__((vector_size(32))) lvec32;
-typedef int8 __attribute__((vector_size(32))) lvec8;
-typedef uint16 __attribute__((vector_size(32))) ulvec16;
-typedef uint32 __attribute__((vector_size(32))) ulvec32;
-typedef uint8 __attribute__((vector_size(32))) ulvec8;
-#else
-#define SIMD_ALIGNED(var) var
-#define SIMD_ALIGNED32(var) var
-typedef int16 vec16[8];
-typedef int32 vec32[4];
-typedef int8 vec8[16];
-typedef uint16 uvec16[8];
-typedef uint32 uvec32[4];
-typedef uint8 uvec8[16];
-typedef int16 lvec16[16];
-typedef int32 lvec32[8];
-typedef int8 lvec8[32];
-typedef uint16 ulvec16[16];
-typedef uint32 ulvec32[8];
-typedef uint8 ulvec8[32];
-#endif
-
-#if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__)
-#define OMITFP
-#else
-#define OMITFP __attribute__((optimize("omit-frame-pointer")))
-#endif
-
-// NaCL macros for GCC x86 and x64.
-#if defined(__native_client__)
-#define LABELALIGN ".p2align 5\n"
-#else
-#define LABELALIGN
-#endif
-#if defined(__native_client__) && defined(__x86_64__)
-// r14 is used for MEMOP macros.
-#define NACL_R14 "r14",
-#define BUNDLELOCK ".bundle_lock\n"
-#define BUNDLEUNLOCK ".bundle_unlock\n"
-#define MEMACCESS(base) "%%nacl:(%%r15,%q" #base ")"
-#define MEMACCESS2(offset, base) "%%nacl:" #offset "(%%r15,%q" #base ")"
-#define MEMLEA(offset, base) #offset "(%q" #base ")"
-#define MEMLEA3(offset, index, scale) \
- #offset "(,%q" #index "," #scale ")"
-#define MEMLEA4(offset, base, index, scale) \
- #offset "(%q" #base ",%q" #index "," #scale ")"
-#define MEMMOVESTRING(s, d) "%%nacl:(%q" #s "),%%nacl:(%q" #d "), %%r15"
-#define MEMSTORESTRING(reg, d) "%%" #reg ",%%nacl:(%q" #d "), %%r15"
-#define MEMOPREG(opcode, offset, base, index, scale, reg) \
- BUNDLELOCK \
- "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
- #opcode " (%%r15,%%r14),%%" #reg "\n" \
- BUNDLEUNLOCK
-#define MEMOPMEM(opcode, reg, offset, base, index, scale) \
- BUNDLELOCK \
- "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
- #opcode " %%" #reg ",(%%r15,%%r14)\n" \
- BUNDLEUNLOCK
-#define MEMOPARG(opcode, offset, base, index, scale, arg) \
- BUNDLELOCK \
- "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
- #opcode " (%%r15,%%r14),%" #arg "\n" \
- BUNDLEUNLOCK
-#define VMEMOPREG(opcode, offset, base, index, scale, reg1, reg2) \
- BUNDLELOCK \
- "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
- #opcode " (%%r15,%%r14),%%" #reg1 ",%%" #reg2 "\n" \
- BUNDLEUNLOCK
-#define VEXTOPMEM(op, sel, reg, offset, base, index, scale) \
- BUNDLELOCK \
- "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
- #op " $" #sel ",%%" #reg ",(%%r15,%%r14)\n" \
- BUNDLEUNLOCK
-#else // defined(__native_client__) && defined(__x86_64__)
-#define NACL_R14
-#define BUNDLEALIGN
-#define MEMACCESS(base) "(%" #base ")"
-#define MEMACCESS2(offset, base) #offset "(%" #base ")"
-#define MEMLEA(offset, base) #offset "(%" #base ")"
-#define MEMLEA3(offset, index, scale) \
- #offset "(,%" #index "," #scale ")"
-#define MEMLEA4(offset, base, index, scale) \
- #offset "(%" #base ",%" #index "," #scale ")"
-#define MEMMOVESTRING(s, d)
-#define MEMSTORESTRING(reg, d)
-#define MEMOPREG(opcode, offset, base, index, scale, reg) \
- #opcode " " #offset "(%" #base ",%" #index "," #scale "),%%" #reg "\n"
-#define MEMOPMEM(opcode, reg, offset, base, index, scale) \
- #opcode " %%" #reg ","#offset "(%" #base ",%" #index "," #scale ")\n"
-#define MEMOPARG(opcode, offset, base, index, scale, arg) \
- #opcode " " #offset "(%" #base ",%" #index "," #scale "),%" #arg "\n"
-#define VMEMOPREG(opcode, offset, base, index, scale, reg1, reg2) \
- #opcode " " #offset "(%" #base ",%" #index "," #scale "),%%" #reg1 ",%%" \
- #reg2 "\n"
-#define VEXTOPMEM(op, sel, reg, offset, base, index, scale) \
- #op " $" #sel ",%%" #reg ","#offset "(%" #base ",%" #index "," #scale ")\n"
-#endif // defined(__native_client__) && defined(__x86_64__)
-
-#if defined(__arm__) || defined(__aarch64__)
-#undef MEMACCESS
-#if defined(__native_client__)
-#define MEMACCESS(base) ".p2align 3\nbic %" #base ", #0xc0000000\n"
-#else
-#define MEMACCESS(base)
-#endif
-#endif
-
-void I444ToARGBRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToARGBRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I411ToARGBRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToBGRARow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_bgra,
- int width);
-void I422ToABGRRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_abgr,
- int width);
-void I422ToRGBARow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgba,
- int width);
-void I422ToRGB24Row_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgb24,
- int width);
-void I422ToRAWRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_raw,
- int width);
-void I422ToRGB565Row_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgb565,
- int width);
-void I422ToARGB1555Row_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb1555,
- int width);
-void I422ToARGB4444Row_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb4444,
- int width);
-void NV12ToARGBRow_NEON(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
- int width);
-void NV21ToARGBRow_NEON(const uint8* src_y,
- const uint8* src_vu,
- uint8* dst_argb,
- int width);
-void NV12ToRGB565Row_NEON(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_rgb565,
- int width);
-void NV21ToRGB565Row_NEON(const uint8* src_y,
- const uint8* src_vu,
- uint8* dst_rgb565,
- int width);
-void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
- uint8* dst_argb,
- int width);
-void UYVYToARGBRow_NEON(const uint8* src_uyvy,
- uint8* dst_argb,
- int width);
-
-void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix);
-void ARGBToYRow_Any_AVX2(const uint8* src_argb, uint8* dst_y, int pix);
-void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
-void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix);
-void ARGBToYJRow_Any_AVX2(const uint8* src_argb, uint8* dst_y, int pix);
-void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
-void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix);
-void ABGRToYRow_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix);
-void RGBAToYRow_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix);
-void RGB24ToYRow_SSSE3(const uint8* src_rgb24, uint8* dst_y, int pix);
-void RAWToYRow_SSSE3(const uint8* src_raw, uint8* dst_y, int pix);
-void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix);
-void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int pix);
-void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int pix);
-void ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int pix);
-void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int pix);
-void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int pix);
-void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int pix);
-void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra,
- uint8* dst_u, uint8* dst_v, int pix);
-void ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr,
- uint8* dst_u, uint8* dst_v, int pix);
-void RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba,
- uint8* dst_u, uint8* dst_v, int pix);
-void RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24,
- uint8* dst_u, uint8* dst_v, int pix);
-void RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw,
- uint8* dst_u, uint8* dst_v, int pix);
-void RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565,
- uint8* dst_u, uint8* dst_v, int pix);
-void ARGB1555ToUVRow_NEON(const uint8* src_argb1555, int src_stride_argb1555,
- uint8* dst_u, uint8* dst_v, int pix);
-void ARGB4444ToUVRow_NEON(const uint8* src_argb4444, int src_stride_argb4444,
- uint8* dst_u, uint8* dst_v, int pix);
-void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int pix);
-void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int pix);
-void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int pix);
-void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int pix);
-void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int pix);
-void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int pix);
-void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int pix);
-void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int pix);
-void ARGBToYRow_C(const uint8* src_argb, uint8* dst_y, int pix);
-void ARGBToYJRow_C(const uint8* src_argb, uint8* dst_y, int pix);
-void BGRAToYRow_C(const uint8* src_bgra, uint8* dst_y, int pix);
-void ABGRToYRow_C(const uint8* src_abgr, uint8* dst_y, int pix);
-void RGBAToYRow_C(const uint8* src_rgba, uint8* dst_y, int pix);
-void RGB24ToYRow_C(const uint8* src_rgb24, uint8* dst_y, int pix);
-void RAWToYRow_C(const uint8* src_raw, uint8* dst_y, int pix);
-void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int pix);
-void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int pix);
-void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int pix);
-void ARGBToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
-void ARGBToYJRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
-void BGRAToYRow_Any_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix);
-void ABGRToYRow_Any_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix);
-void RGBAToYRow_Any_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix);
-void RGB24ToYRow_Any_SSSE3(const uint8* src_rgb24, uint8* dst_y, int pix);
-void RAWToYRow_Any_SSSE3(const uint8* src_raw, uint8* dst_y, int pix);
-void ARGBToYRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int pix);
-void ARGBToYJRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int pix);
-void BGRAToYRow_Any_NEON(const uint8* src_bgra, uint8* dst_y, int pix);
-void ABGRToYRow_Any_NEON(const uint8* src_abgr, uint8* dst_y, int pix);
-void RGBAToYRow_Any_NEON(const uint8* src_rgba, uint8* dst_y, int pix);
-void RGB24ToYRow_Any_NEON(const uint8* src_rgb24, uint8* dst_y, int pix);
-void RAWToYRow_Any_NEON(const uint8* src_raw, uint8* dst_y, int pix);
-void RGB565ToYRow_Any_NEON(const uint8* src_rgb565, uint8* dst_y, int pix);
-void ARGB1555ToYRow_Any_NEON(const uint8* src_argb1555, uint8* dst_y, int pix);
-void ARGB4444ToYRow_Any_NEON(const uint8* src_argb4444, uint8* dst_y, int pix);
-
-void ARGBToUVRow_AVX2(const uint8* src_argb, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width);
-void ARGBToUVRow_Any_AVX2(const uint8* src_argb, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width);
-void ARGBToUVRow_SSSE3(const uint8* src_argb, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width);
-void ARGBToUVJRow_SSSE3(const uint8* src_argb, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width);
-void BGRAToUVRow_SSSE3(const uint8* src_bgra, int src_stride_bgra,
- uint8* dst_u, uint8* dst_v, int width);
-void ABGRToUVRow_SSSE3(const uint8* src_abgr, int src_stride_abgr,
- uint8* dst_u, uint8* dst_v, int width);
-void RGBAToUVRow_SSSE3(const uint8* src_rgba, int src_stride_rgba,
- uint8* dst_u, uint8* dst_v, int width);
-void ARGBToUVRow_Any_SSSE3(const uint8* src_argb, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width);
-void ARGBToUVJRow_Any_SSSE3(const uint8* src_argb, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width);
-void BGRAToUVRow_Any_SSSE3(const uint8* src_bgra, int src_stride_bgra,
- uint8* dst_u, uint8* dst_v, int width);
-void ABGRToUVRow_Any_SSSE3(const uint8* src_abgr, int src_stride_abgr,
- uint8* dst_u, uint8* dst_v, int width);
-void RGBAToUVRow_Any_SSSE3(const uint8* src_rgba, int src_stride_rgba,
- uint8* dst_u, uint8* dst_v, int width);
-void ARGBToUV444Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int pix);
-void ARGBToUV422Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int pix);
-void ARGBToUV411Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int pix);
-void ARGBToUVRow_Any_NEON(const uint8* src_argb, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int pix);
-void ARGBToUVJRow_Any_NEON(const uint8* src_argb, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int pix);
-void BGRAToUVRow_Any_NEON(const uint8* src_bgra, int src_stride_bgra,
- uint8* dst_u, uint8* dst_v, int pix);
-void ABGRToUVRow_Any_NEON(const uint8* src_abgr, int src_stride_abgr,
- uint8* dst_u, uint8* dst_v, int pix);
-void RGBAToUVRow_Any_NEON(const uint8* src_rgba, int src_stride_rgba,
- uint8* dst_u, uint8* dst_v, int pix);
-void RGB24ToUVRow_Any_NEON(const uint8* src_rgb24, int src_stride_rgb24,
- uint8* dst_u, uint8* dst_v, int pix);
-void RAWToUVRow_Any_NEON(const uint8* src_raw, int src_stride_raw,
- uint8* dst_u, uint8* dst_v, int pix);
-void RGB565ToUVRow_Any_NEON(const uint8* src_rgb565, int src_stride_rgb565,
- uint8* dst_u, uint8* dst_v, int pix);
-void ARGB1555ToUVRow_Any_NEON(const uint8* src_argb1555,
- int src_stride_argb1555,
- uint8* dst_u, uint8* dst_v, int pix);
-void ARGB4444ToUVRow_Any_NEON(const uint8* src_argb4444,
- int src_stride_argb4444,
- uint8* dst_u, uint8* dst_v, int pix);
-void ARGBToUVRow_C(const uint8* src_argb, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width);
-void ARGBToUVJRow_C(const uint8* src_argb, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width);
-void BGRAToUVRow_C(const uint8* src_bgra, int src_stride_bgra,
- uint8* dst_u, uint8* dst_v, int width);
-void ABGRToUVRow_C(const uint8* src_abgr, int src_stride_abgr,
- uint8* dst_u, uint8* dst_v, int width);
-void RGBAToUVRow_C(const uint8* src_rgba, int src_stride_rgba,
- uint8* dst_u, uint8* dst_v, int width);
-void RGB24ToUVRow_C(const uint8* src_rgb24, int src_stride_rgb24,
- uint8* dst_u, uint8* dst_v, int width);
-void RAWToUVRow_C(const uint8* src_raw, int src_stride_raw,
- uint8* dst_u, uint8* dst_v, int width);
-void RGB565ToUVRow_C(const uint8* src_rgb565, int src_stride_rgb565,
- uint8* dst_u, uint8* dst_v, int width);
-void ARGB1555ToUVRow_C(const uint8* src_argb1555, int src_stride_argb1555,
- uint8* dst_u, uint8* dst_v, int width);
-void ARGB4444ToUVRow_C(const uint8* src_argb4444, int src_stride_argb4444,
- uint8* dst_u, uint8* dst_v, int width);
-
-void ARGBToUV444Row_SSSE3(const uint8* src_argb,
- uint8* dst_u, uint8* dst_v, int width);
-void ARGBToUV444Row_Any_SSSE3(const uint8* src_argb,
- uint8* dst_u, uint8* dst_v, int width);
-
-void ARGBToUV422Row_SSSE3(const uint8* src_argb,
- uint8* dst_u, uint8* dst_v, int width);
-void ARGBToUV422Row_Any_SSSE3(const uint8* src_argb,
- uint8* dst_u, uint8* dst_v, int width);
-
-void ARGBToUV444Row_C(const uint8* src_argb,
- uint8* dst_u, uint8* dst_v, int width);
-void ARGBToUV422Row_C(const uint8* src_argb,
- uint8* dst_u, uint8* dst_v, int width);
-void ARGBToUV411Row_C(const uint8* src_argb,
- uint8* dst_u, uint8* dst_v, int width);
-void ARGBToUVJ422Row_C(const uint8* src_argb,
- uint8* dst_u, uint8* dst_v, int width);
-
-void MirrorRow_AVX2(const uint8* src, uint8* dst, int width);
-void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width);
-void MirrorRow_SSE2(const uint8* src, uint8* dst, int width);
-void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
-void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width);
-void MirrorRow_C(const uint8* src, uint8* dst, int width);
-void MirrorRow_Any_AVX2(const uint8* src, uint8* dst, int width);
-void MirrorRow_Any_SSSE3(const uint8* src, uint8* dst, int width);
-void MirrorRow_Any_SSE2(const uint8* src, uint8* dst, int width);
-void MirrorRow_Any_NEON(const uint8* src, uint8* dst, int width);
-
-void MirrorUVRow_SSSE3(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
- int width);
-void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
- int width);
-void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
- int width);
-void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
- int width);
-
-void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width);
-void ARGBMirrorRow_SSE2(const uint8* src, uint8* dst, int width);
-void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width);
-void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width);
-void ARGBMirrorRow_Any_AVX2(const uint8* src, uint8* dst, int width);
-void ARGBMirrorRow_Any_SSE2(const uint8* src, uint8* dst, int width);
-void ARGBMirrorRow_Any_NEON(const uint8* src, uint8* dst, int width);
-
-void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
-void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
-void SplitUVRow_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
-void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
-void SplitUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
- int pix);
-void SplitUVRow_Any_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
- int pix);
-void SplitUVRow_Any_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
- int pix);
-void SplitUVRow_Any_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
- int pix);
-void SplitUVRow_Any_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
- int pix);
-
-void MergeUVRow_C(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
- int width);
-void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
- int width);
-void MergeUVRow_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
- int width);
-void MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
- int width);
-void MergeUVRow_Any_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
- int width);
-void MergeUVRow_Any_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
- int width);
-void MergeUVRow_Any_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
- int width);
-
-void CopyRow_SSE2(const uint8* src, uint8* dst, int count);
-void CopyRow_AVX(const uint8* src, uint8* dst, int count);
-void CopyRow_ERMS(const uint8* src, uint8* dst, int count);
-void CopyRow_NEON(const uint8* src, uint8* dst, int count);
-void CopyRow_MIPS(const uint8* src, uint8* dst, int count);
-void CopyRow_C(const uint8* src, uint8* dst, int count);
-void CopyRow_Any_SSE2(const uint8* src, uint8* dst, int count);
-void CopyRow_Any_AVX(const uint8* src, uint8* dst, int count);
-void CopyRow_Any_NEON(const uint8* src, uint8* dst, int count);
-
-void CopyRow_16_C(const uint16* src, uint16* dst, int count);
-
-void ARGBCopyAlphaRow_C(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBCopyAlphaRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBCopyAlphaRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width);
-
-void ARGBCopyYToAlphaRow_C(const uint8* src_y, uint8* dst_argb, int width);
-void ARGBCopyYToAlphaRow_SSE2(const uint8* src_y, uint8* dst_argb, int width);
-void ARGBCopyYToAlphaRow_AVX2(const uint8* src_y, uint8* dst_argb, int width);
-
-void SetRow_C(uint8* dst, uint8 v8, int count);
-void SetRow_X86(uint8* dst, uint8 v8, int count);
-void SetRow_ERMS(uint8* dst, uint8 v8, int count);
-void SetRow_NEON(uint8* dst, uint8 v8, int count);
-void SetRow_Any_X86(uint8* dst, uint8 v8, int count);
-void SetRow_Any_NEON(uint8* dst, uint8 v8, int count);
-
-void ARGBSetRow_C(uint8* dst_argb, uint32 v32, int count);
-void ARGBSetRow_X86(uint8* dst_argb, uint32 v32, int count);
-void ARGBSetRow_NEON(uint8* dst_argb, uint32 v32, int count);
-void ARGBSetRow_Any_NEON(uint8* dst_argb, uint32 v32, int count);
-
-// ARGBShufflers for BGRAToARGB etc.
-void ARGBShuffleRow_C(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix);
-void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix);
-void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix);
-void ARGBShuffleRow_AVX2(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix);
-void ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix);
-void ARGBShuffleRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix);
-void ARGBShuffleRow_Any_SSSE3(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix);
-void ARGBShuffleRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix);
-void ARGBShuffleRow_Any_NEON(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix);
-
-void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix);
-void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix);
-void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb, int pix);
-void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555, uint8* dst_argb,
- int pix);
-void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb,
- int pix);
-void RGB565ToARGBRow_AVX2(const uint8* src_rgb565, uint8* dst_argb, int pix);
-void ARGB1555ToARGBRow_AVX2(const uint8* src_argb1555, uint8* dst_argb,
- int pix);
-void ARGB4444ToARGBRow_AVX2(const uint8* src_argb4444, uint8* dst_argb,
- int pix);
-
-void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix);
-void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int pix);
-void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int pix);
-void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, uint8* dst_argb,
- int pix);
-void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, uint8* dst_argb,
- int pix);
-void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int pix);
-void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int pix);
-void RGB565ToARGBRow_C(const uint8* src_rgb, uint8* dst_argb, int pix);
-void ARGB1555ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int pix);
-void ARGB4444ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int pix);
-void RGB24ToARGBRow_Any_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix);
-void RAWToARGBRow_Any_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix);
-
-void RGB565ToARGBRow_Any_SSE2(const uint8* src_rgb565, uint8* dst_argb,
- int pix);
-void ARGB1555ToARGBRow_Any_SSE2(const uint8* src_argb1555, uint8* dst_argb,
- int pix);
-void ARGB4444ToARGBRow_Any_SSE2(const uint8* src_argb4444, uint8* dst_argb,
- int pix);
-void RGB565ToARGBRow_Any_AVX2(const uint8* src_rgb565, uint8* dst_argb,
- int pix);
-void ARGB1555ToARGBRow_Any_AVX2(const uint8* src_argb1555, uint8* dst_argb,
- int pix);
-void ARGB4444ToARGBRow_Any_AVX2(const uint8* src_argb4444, uint8* dst_argb,
- int pix);
-
-void RGB24ToARGBRow_Any_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix);
-void RAWToARGBRow_Any_NEON(const uint8* src_raw, uint8* dst_argb, int pix);
-void RGB565ToARGBRow_Any_NEON(const uint8* src_rgb565, uint8* dst_argb,
- int pix);
-void ARGB1555ToARGBRow_Any_NEON(const uint8* src_argb1555, uint8* dst_argb,
- int pix);
-void ARGB4444ToARGBRow_Any_NEON(const uint8* src_argb4444, uint8* dst_argb,
- int pix);
-
-void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
-
-void ARGBToRGB565DitherRow_C(const uint8* src_argb, uint8* dst_rgb,
- const uint32 dither4, int pix);
-void ARGBToRGB565DitherRow_SSE2(const uint8* src_argb, uint8* dst_rgb,
- const uint32 dither4, int pix);
-void ARGBToRGB565DitherRow_AVX2(const uint8* src_argb, uint8* dst_rgb,
- const uint32 dither4, int pix);
-
-void ARGBToRGB565Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToARGB1555Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToARGB4444Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix);
-
-void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToRGB565DitherRow_NEON(const uint8* src_argb, uint8* dst_rgb,
- const uint32 dither4, int width);
-
-void ARGBToRGBARow_C(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
-
-void J400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix);
-void J400ToARGBRow_AVX2(const uint8* src_y, uint8* dst_argb, int pix);
-void J400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int pix);
-void J400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int pix);
-void J400ToARGBRow_Any_SSE2(const uint8* src_y, uint8* dst_argb, int pix);
-void J400ToARGBRow_Any_AVX2(const uint8* src_y, uint8* dst_argb, int pix);
-void J400ToARGBRow_Any_NEON(const uint8* src_y, uint8* dst_argb, int pix);
-
-void I444ToARGBRow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToARGBRow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I411ToARGBRow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void NV12ToARGBRow_C(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
- int width);
-void NV21ToRGB565Row_C(const uint8* src_y,
- const uint8* src_vu,
- uint8* dst_argb,
- int width);
-void NV12ToRGB565Row_C(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
- int width);
-void NV21ToARGBRow_C(const uint8* src_y,
- const uint8* src_vu,
- uint8* dst_argb,
- int width);
-void YUY2ToARGBRow_C(const uint8* src_yuy2,
- uint8* dst_argb,
- int width);
-void UYVYToARGBRow_C(const uint8* src_uyvy,
- uint8* dst_argb,
- int width);
-void J422ToARGBRow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToBGRARow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_bgra,
- int width);
-void I422ToABGRRow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_abgr,
- int width);
-void I422ToRGBARow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgba,
- int width);
-void I422ToRGB24Row_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgb24,
- int width);
-void I422ToRAWRow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_raw,
- int width);
-void I422ToARGB4444Row_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb4444,
- int width);
-void I422ToARGB1555Row_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb4444,
- int width);
-void I422ToRGB565Row_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgb565,
- int width);
-void I422ToARGBRow_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToBGRARow_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToRGBARow_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToABGRRow_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I444ToARGBRow_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I444ToARGBRow_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToARGBRow_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I411ToARGBRow_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I411ToARGBRow_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void NV12ToARGBRow_SSSE3(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
- int width);
-void NV21ToARGBRow_SSSE3(const uint8* src_y,
- const uint8* src_vu,
- uint8* dst_argb,
- int width);
-void NV12ToARGBRow_AVX2(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
- int width);
-void NV21ToARGBRow_AVX2(const uint8* src_y,
- const uint8* src_vu,
- uint8* dst_argb,
- int width);
-void NV12ToRGB565Row_SSSE3(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
- int width);
-void NV21ToRGB565Row_SSSE3(const uint8* src_y,
- const uint8* src_vu,
- uint8* dst_argb,
- int width);
-void NV12ToRGB565Row_AVX2(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
- int width);
-void NV21ToRGB565Row_AVX2(const uint8* src_y,
- const uint8* src_vu,
- uint8* dst_argb,
- int width);
-void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2,
- uint8* dst_argb,
- int width);
-void UYVYToARGBRow_SSSE3(const uint8* src_uyvy,
- uint8* dst_argb,
- int width);
-void YUY2ToARGBRow_AVX2(const uint8* src_yuy2,
- uint8* dst_argb,
- int width);
-void UYVYToARGBRow_AVX2(const uint8* src_uyvy,
- uint8* dst_argb,
- int width);
-void J422ToARGBRow_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void J422ToARGBRow_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToBGRARow_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_bgra,
- int width);
-void I422ToABGRRow_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_abgr,
- int width);
-void I422ToRGBARow_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgba,
- int width);
-void I422ToARGB4444Row_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToARGB4444Row_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToARGB1555Row_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToARGB1555Row_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToRGB565Row_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToRGB565Row_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToRGB24Row_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgb24,
- int width);
-void I422ToRGB24Row_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgb24,
- int width);
-void I422ToRAWRow_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_raw,
- int width);
-void I422ToRAWRow_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_raw,
- int width);
-void I422ToARGBRow_Any_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToBGRARow_Any_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToRGBARow_Any_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToABGRRow_Any_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I444ToARGBRow_Any_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I444ToARGBRow_Any_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToARGBRow_Any_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I411ToARGBRow_Any_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I411ToARGBRow_Any_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void NV12ToARGBRow_Any_SSSE3(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
- int width);
-void NV21ToARGBRow_Any_SSSE3(const uint8* src_y,
- const uint8* src_vu,
- uint8* dst_argb,
- int width);
-void NV12ToARGBRow_Any_AVX2(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
- int width);
-void NV21ToARGBRow_Any_AVX2(const uint8* src_y,
- const uint8* src_vu,
- uint8* dst_argb,
- int width);
-void NV12ToRGB565Row_Any_SSSE3(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
- int width);
-void NV21ToRGB565Row_Any_SSSE3(const uint8* src_y,
- const uint8* src_vu,
- uint8* dst_argb,
- int width);
-void NV12ToRGB565Row_Any_AVX2(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
- int width);
-void NV21ToRGB565Row_Any_AVX2(const uint8* src_y,
- const uint8* src_vu,
- uint8* dst_argb,
- int width);
-void YUY2ToARGBRow_Any_SSSE3(const uint8* src_yuy2,
- uint8* dst_argb,
- int width);
-void UYVYToARGBRow_Any_SSSE3(const uint8* src_uyvy,
- uint8* dst_argb,
- int width);
-void YUY2ToARGBRow_Any_AVX2(const uint8* src_yuy2,
- uint8* dst_argb,
- int width);
-void UYVYToARGBRow_Any_AVX2(const uint8* src_uyvy,
- uint8* dst_argb,
- int width);
-void J422ToARGBRow_Any_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void J422ToARGBRow_Any_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToBGRARow_Any_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_bgra,
- int width);
-void I422ToABGRRow_Any_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_abgr,
- int width);
-void I422ToRGBARow_Any_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgba,
- int width);
-void I422ToARGB4444Row_Any_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgba,
- int width);
-void I422ToARGB4444Row_Any_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgba,
- int width);
-void I422ToARGB1555Row_Any_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgba,
- int width);
-void I422ToARGB1555Row_Any_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgba,
- int width);
-void I422ToRGB565Row_Any_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgba,
- int width);
-void I422ToRGB565Row_Any_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgba,
- int width);
-void I422ToRGB24Row_Any_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToRGB24Row_Any_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToRAWRow_Any_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToRAWRow_Any_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-
-void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width);
-void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int width);
-void I400ToARGBRow_AVX2(const uint8* src_y, uint8* dst_argb, int width);
-void I400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int width);
-void I400ToARGBRow_Any_SSE2(const uint8* src_y, uint8* dst_argb, int width);
-void I400ToARGBRow_Any_AVX2(const uint8* src_y, uint8* dst_argb, int width);
-void I400ToARGBRow_Any_NEON(const uint8* src_y, uint8* dst_argb, int width);
-
-// ARGB preattenuated alpha blend.
-void ARGBBlendRow_SSSE3(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
-void ARGBBlendRow_SSE2(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
-void ARGBBlendRow_NEON(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
-void ARGBBlendRow_C(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
-
-// ARGB multiply images. Same API as Blend, but these require
-// pointer and width alignment for SSE2.
-void ARGBMultiplyRow_C(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
-void ARGBMultiplyRow_SSE2(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
-void ARGBMultiplyRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
-void ARGBMultiplyRow_AVX2(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
-void ARGBMultiplyRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
-void ARGBMultiplyRow_NEON(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
-void ARGBMultiplyRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
-
-// ARGB add images.
-void ARGBAddRow_C(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
-void ARGBAddRow_SSE2(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
-void ARGBAddRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
-void ARGBAddRow_AVX2(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
-void ARGBAddRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
-void ARGBAddRow_NEON(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
-void ARGBAddRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
-
-// ARGB subtract images. Same API as Blend, but these require
-// pointer and width alignment for SSE2.
-void ARGBSubtractRow_C(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
-void ARGBSubtractRow_SSE2(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
-void ARGBSubtractRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
-void ARGBSubtractRow_AVX2(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
-void ARGBSubtractRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
-void ARGBSubtractRow_NEON(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
-void ARGBSubtractRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width);
-
-void ARGBToRGB24Row_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToRAWRow_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToRGB565Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToARGB1555Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToARGB4444Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
-
-void ARGBToRGB565DitherRow_Any_SSE2(const uint8* src_argb, uint8* dst_rgb,
- const uint32 dither4, int pix);
-void ARGBToRGB565DitherRow_Any_AVX2(const uint8* src_argb, uint8* dst_rgb,
- const uint32 dither4, int pix);
-
-void ARGBToRGB565Row_Any_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToARGB1555Row_Any_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToARGB4444Row_Any_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix);
-
-void ARGBToRGB24Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToRAWRow_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToRGB565Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToARGB1555Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToARGB4444Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToRGB565DitherRow_Any_NEON(const uint8* src_argb, uint8* dst_rgb,
- const uint32 dither4, int width);
-
-void I444ToARGBRow_Any_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToARGBRow_Any_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I411ToARGBRow_Any_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToBGRARow_Any_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToABGRRow_Any_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToRGBARow_Any_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToRGB24Row_Any_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToRAWRow_Any_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToARGB4444Row_Any_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToARGB1555Row_Any_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToRGB565Row_Any_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void NV12ToARGBRow_Any_NEON(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
- int width);
-void NV21ToARGBRow_Any_NEON(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
- int width);
-void NV12ToRGB565Row_Any_NEON(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
- int width);
-void NV21ToRGB565Row_Any_NEON(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
- int width);
-void YUY2ToARGBRow_Any_NEON(const uint8* src_yuy2,
- uint8* dst_argb,
- int width);
-void UYVYToARGBRow_Any_NEON(const uint8* src_uyvy,
- uint8* dst_argb,
- int width);
-void I422ToARGBRow_MIPS_DSPR2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToBGRARow_MIPS_DSPR2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToABGRRow_MIPS_DSPR2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToARGBRow_MIPS_DSPR2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToBGRARow_MIPS_DSPR2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void I422ToABGRRow_MIPS_DSPR2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-
-void YUY2ToYRow_AVX2(const uint8* src_yuy2, uint8* dst_y, int pix);
-void YUY2ToUVRow_AVX2(const uint8* src_yuy2, int stride_yuy2,
- uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToUV422Row_AVX2(const uint8* src_yuy2,
- uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix);
-void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
- uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToUV422Row_SSE2(const uint8* src_yuy2,
- uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int pix);
-void YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2,
- uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToUV422Row_NEON(const uint8* src_yuy2,
- uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int pix);
-void YUY2ToUVRow_C(const uint8* src_yuy2, int stride_yuy2,
- uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToUV422Row_C(const uint8* src_yuy2,
- uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToYRow_Any_AVX2(const uint8* src_yuy2, uint8* dst_y, int pix);
-void YUY2ToUVRow_Any_AVX2(const uint8* src_yuy2, int stride_yuy2,
- uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToUV422Row_Any_AVX2(const uint8* src_yuy2,
- uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToYRow_Any_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix);
-void YUY2ToUVRow_Any_SSE2(const uint8* src_yuy2, int stride_yuy2,
- uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToUV422Row_Any_SSE2(const uint8* src_yuy2,
- uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToYRow_Any_NEON(const uint8* src_yuy2, uint8* dst_y, int pix);
-void YUY2ToUVRow_Any_NEON(const uint8* src_yuy2, int stride_yuy2,
- uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToUV422Row_Any_NEON(const uint8* src_yuy2,
- uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int pix);
-void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToUV422Row_AVX2(const uint8* src_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix);
-void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToUV422Row_SSE2(const uint8* src_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int pix);
-void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToUV422Row_AVX2(const uint8* src_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int pix);
-void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToUV422Row_NEON(const uint8* src_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
-
-void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int pix);
-void UYVYToUVRow_C(const uint8* src_uyvy, int stride_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToUV422Row_C(const uint8* src_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToYRow_Any_AVX2(const uint8* src_uyvy, uint8* dst_y, int pix);
-void UYVYToUVRow_Any_AVX2(const uint8* src_uyvy, int stride_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToUV422Row_Any_AVX2(const uint8* src_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToYRow_Any_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix);
-void UYVYToUVRow_Any_SSE2(const uint8* src_uyvy, int stride_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToUV422Row_Any_SSE2(const uint8* src_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToYRow_Any_NEON(const uint8* src_uyvy, uint8* dst_y, int pix);
-void UYVYToUVRow_Any_NEON(const uint8* src_uyvy, int stride_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToUV422Row_Any_NEON(const uint8* src_uyvy,
- uint8* dst_u, uint8* dst_v, int pix);
-
-void I422ToYUY2Row_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_yuy2, int width);
-void I422ToUYVYRow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_uyvy, int width);
-void I422ToYUY2Row_SSE2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_yuy2, int width);
-void I422ToUYVYRow_SSE2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_uyvy, int width);
-void I422ToYUY2Row_Any_SSE2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_yuy2, int width);
-void I422ToUYVYRow_Any_SSE2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_uyvy, int width);
-void I422ToYUY2Row_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_yuy2, int width);
-void I422ToUYVYRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_uyvy, int width);
-void I422ToYUY2Row_Any_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_yuy2, int width);
-void I422ToUYVYRow_Any_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_uyvy, int width);
-
-// Effects related row functions.
-void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBAttenuateRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb,
- int width);
-void ARGBAttenuateRow_Any_SSSE3(const uint8* src_argb, uint8* dst_argb,
- int width);
-void ARGBAttenuateRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb,
- int width);
-void ARGBAttenuateRow_Any_NEON(const uint8* src_argb, uint8* dst_argb,
- int width);
-
-// Inverse table for unattenuate, shared by C and SSE2.
-extern const uint32 fixed_invtbl8[256];
-void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBUnattenuateRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb,
- int width);
-void ARGBUnattenuateRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb,
- int width);
-
-void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width);
-
-void ARGBSepiaRow_C(uint8* dst_argb, int width);
-void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width);
-void ARGBSepiaRow_NEON(uint8* dst_argb, int width);
-
-void ARGBColorMatrixRow_C(const uint8* src_argb, uint8* dst_argb,
- const int8* matrix_argb, int width);
-void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
- const int8* matrix_argb, int width);
-void ARGBColorMatrixRow_NEON(const uint8* src_argb, uint8* dst_argb,
- const int8* matrix_argb, int width);
-
-void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width);
-void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width);
-
-void RGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width);
-void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width);
-
-void ARGBQuantizeRow_C(uint8* dst_argb, int scale, int interval_size,
- int interval_offset, int width);
-void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size,
- int interval_offset, int width);
-void ARGBQuantizeRow_NEON(uint8* dst_argb, int scale, int interval_size,
- int interval_offset, int width);
-
-void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width,
- uint32 value);
-void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
- uint32 value);
-void ARGBShadeRow_NEON(const uint8* src_argb, uint8* dst_argb, int width,
- uint32 value);
-
-// Used for blur.
-void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
- int width, int area, uint8* dst, int count);
-void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum,
- const int32* previous_cumsum, int width);
-
-void CumulativeSumToAverageRow_C(const int32* topleft, const int32* botleft,
- int width, int area, uint8* dst, int count);
-void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum,
- const int32* previous_cumsum, int width);
-
-LIBYUV_API
-void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride,
- uint8* dst_argb, const float* uv_dudv, int width);
-LIBYUV_API
-void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
- uint8* dst_argb, const float* uv_dudv, int width);
-
-// Used for I420Scale, ARGBScale, and ARGBInterpolate.
-void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride_ptr,
- int width, int source_y_fraction);
-void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride_ptr, int width,
- int source_y_fraction);
-void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride_ptr, int width,
- int source_y_fraction);
-void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride_ptr, int width,
- int source_y_fraction);
-void InterpolateRow_NEON(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride_ptr, int width,
- int source_y_fraction);
-void InterpolateRow_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride_ptr, int width,
- int source_y_fraction);
-void InterpolateRow_Any_NEON(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride_ptr, int width,
- int source_y_fraction);
-void InterpolateRow_Any_SSE2(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride_ptr, int width,
- int source_y_fraction);
-void InterpolateRow_Any_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride_ptr, int width,
- int source_y_fraction);
-void InterpolateRow_Any_AVX2(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride_ptr, int width,
- int source_y_fraction);
-void InterpolateRow_Any_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride_ptr, int width,
- int source_y_fraction);
-
-void InterpolateRow_16_C(uint16* dst_ptr, const uint16* src_ptr,
- ptrdiff_t src_stride_ptr,
- int width, int source_y_fraction);
-
-// Sobel images.
-void SobelXRow_C(const uint8* src_y0, const uint8* src_y1, const uint8* src_y2,
- uint8* dst_sobelx, int width);
-void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1,
- const uint8* src_y2, uint8* dst_sobelx, int width);
-void SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1,
- const uint8* src_y2, uint8* dst_sobelx, int width);
-void SobelYRow_C(const uint8* src_y0, const uint8* src_y1,
- uint8* dst_sobely, int width);
-void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1,
- uint8* dst_sobely, int width);
-void SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1,
- uint8* dst_sobely, int width);
-void SobelRow_C(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_argb, int width);
-void SobelRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_argb, int width);
-void SobelRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_argb, int width);
-void SobelToPlaneRow_C(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_y, int width);
-void SobelToPlaneRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_y, int width);
-void SobelToPlaneRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_y, int width);
-void SobelXYRow_C(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_argb, int width);
-void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_argb, int width);
-void SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_argb, int width);
-void SobelRow_Any_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_argb, int width);
-void SobelRow_Any_NEON(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_argb, int width);
-void SobelToPlaneRow_Any_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_y, int width);
-void SobelToPlaneRow_Any_NEON(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_y, int width);
-void SobelXYRow_Any_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_argb, int width);
-void SobelXYRow_Any_NEON(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_argb, int width);
-
-void ARGBPolynomialRow_C(const uint8* src_argb,
- uint8* dst_argb, const float* poly,
- int width);
-void ARGBPolynomialRow_SSE2(const uint8* src_argb,
- uint8* dst_argb, const float* poly,
- int width);
-void ARGBPolynomialRow_AVX2(const uint8* src_argb,
- uint8* dst_argb, const float* poly,
- int width);
-
-void ARGBLumaColorTableRow_C(const uint8* src_argb, uint8* dst_argb, int width,
- const uint8* luma, uint32 lumacoeff);
-void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
- int width,
- const uint8* luma, uint32 lumacoeff);
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
-
-#endif // INCLUDE_LIBYUV_ROW_H_ NOLINT
diff --git a/third_party/aom/third_party/libyuv/include/libyuv/scale.h b/third_party/aom/third_party/libyuv/include/libyuv/scale.h
deleted file mode 100644
index 3974aba34..000000000
--- a/third_party/aom/third_party/libyuv/include/libyuv/scale.h
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef INCLUDE_LIBYUV_SCALE_H_ // NOLINT
-#define INCLUDE_LIBYUV_SCALE_H_
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// Supported filtering.
-typedef enum FilterMode {
- kFilterNone = 0, // Point sample; Fastest.
- kFilterLinear = 1, // Filter horizontally only.
- kFilterBilinear = 2, // Faster than box, but lower quality scaling down.
- kFilterBox = 3 // Highest quality.
-} FilterModeEnum;
-
-// Scale a YUV plane.
-LIBYUV_API
-void ScalePlane(const uint8* src, int src_stride,
- int src_width, int src_height,
- uint8* dst, int dst_stride,
- int dst_width, int dst_height,
- enum FilterMode filtering);
-
-LIBYUV_API
-void ScalePlane_16(const uint16* src, int src_stride,
- int src_width, int src_height,
- uint16* dst, int dst_stride,
- int dst_width, int dst_height,
- enum FilterMode filtering);
-
-// Scales a YUV 4:2:0 image from the src width and height to the
-// dst width and height.
-// If filtering is kFilterNone, a simple nearest-neighbor algorithm is
-// used. This produces basic (blocky) quality at the fastest speed.
-// If filtering is kFilterBilinear, interpolation is used to produce a better
-// quality image, at the expense of speed.
-// If filtering is kFilterBox, averaging is used to produce ever better
-// quality image, at further expense of speed.
-// Returns 0 if successful.
-
-LIBYUV_API
-int I420Scale(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- int src_width, int src_height,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int dst_width, int dst_height,
- enum FilterMode filtering);
-
-LIBYUV_API
-int I420Scale_16(const uint16* src_y, int src_stride_y,
- const uint16* src_u, int src_stride_u,
- const uint16* src_v, int src_stride_v,
- int src_width, int src_height,
- uint16* dst_y, int dst_stride_y,
- uint16* dst_u, int dst_stride_u,
- uint16* dst_v, int dst_stride_v,
- int dst_width, int dst_height,
- enum FilterMode filtering);
-
-#ifdef __cplusplus
-// Legacy API. Deprecated.
-LIBYUV_API
-int Scale(const uint8* src_y, const uint8* src_u, const uint8* src_v,
- int src_stride_y, int src_stride_u, int src_stride_v,
- int src_width, int src_height,
- uint8* dst_y, uint8* dst_u, uint8* dst_v,
- int dst_stride_y, int dst_stride_u, int dst_stride_v,
- int dst_width, int dst_height,
- LIBYUV_BOOL interpolate);
-
-// Legacy API. Deprecated.
-LIBYUV_API
-int ScaleOffset(const uint8* src_i420, int src_width, int src_height,
- uint8* dst_i420, int dst_width, int dst_height, int dst_yoffset,
- LIBYUV_BOOL interpolate);
-
-// For testing, allow disabling of specialized scalers.
-LIBYUV_API
-void SetUseReferenceImpl(LIBYUV_BOOL use);
-#endif // __cplusplus
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
-
-#endif // INCLUDE_LIBYUV_SCALE_H_ NOLINT
diff --git a/third_party/aom/third_party/libyuv/include/libyuv/scale_argb.h b/third_party/aom/third_party/libyuv/include/libyuv/scale_argb.h
deleted file mode 100644
index 22563837d..000000000
--- a/third_party/aom/third_party/libyuv/include/libyuv/scale_argb.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef INCLUDE_LIBYUV_SCALE_ARGB_H_ // NOLINT
-#define INCLUDE_LIBYUV_SCALE_ARGB_H_
-
-#include "libyuv/basic_types.h"
-#include "libyuv/scale.h" // For FilterMode
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-LIBYUV_API
-int ARGBScale(const uint8* src_argb, int src_stride_argb,
- int src_width, int src_height,
- uint8* dst_argb, int dst_stride_argb,
- int dst_width, int dst_height,
- enum FilterMode filtering);
-
-// Clipped scale takes destination rectangle coordinates for clip values.
-LIBYUV_API
-int ARGBScaleClip(const uint8* src_argb, int src_stride_argb,
- int src_width, int src_height,
- uint8* dst_argb, int dst_stride_argb,
- int dst_width, int dst_height,
- int clip_x, int clip_y, int clip_width, int clip_height,
- enum FilterMode filtering);
-
-// TODO(fbarchard): Implement this.
-// Scale with YUV conversion to ARGB and clipping.
-LIBYUV_API
-int YUVToARGBScaleClip(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint32 src_fourcc,
- int src_width, int src_height,
- uint8* dst_argb, int dst_stride_argb,
- uint32 dst_fourcc,
- int dst_width, int dst_height,
- int clip_x, int clip_y, int clip_width, int clip_height,
- enum FilterMode filtering);
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
-
-#endif // INCLUDE_LIBYUV_SCALE_ARGB_H_ NOLINT
diff --git a/third_party/aom/third_party/libyuv/include/libyuv/scale_row.h b/third_party/aom/third_party/libyuv/include/libyuv/scale_row.h
deleted file mode 100644
index a46b5ce69..000000000
--- a/third_party/aom/third_party/libyuv/include/libyuv/scale_row.h
+++ /dev/null
@@ -1,479 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef INCLUDE_LIBYUV_SCALE_ROW_H_ // NOLINT
-#define INCLUDE_LIBYUV_SCALE_ROW_H_
-
-#include "libyuv/basic_types.h"
-#include "libyuv/scale.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-#if defined(__pnacl__) || defined(__CLR_VER) || \
- (defined(__i386__) && !defined(__SSE2__))
-#define LIBYUV_DISABLE_X86
-#endif
-
-// Visual C 2012 required for AVX2.
-#if defined(_M_IX86) && !defined(__clang__) && \
- defined(_MSC_VER) && _MSC_VER >= 1700
-#define VISUALC_HAS_AVX2 1
-#endif // VisualStudio >= 2012
-
-// The following are available on all x86 platforms:
-#if !defined(LIBYUV_DISABLE_X86) && \
- (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
-#define HAS_FIXEDDIV1_X86
-#define HAS_FIXEDDIV_X86
-#define HAS_SCALEARGBCOLS_SSE2
-#define HAS_SCALEARGBCOLSUP2_SSE2
-#define HAS_SCALEARGBFILTERCOLS_SSSE3
-#define HAS_SCALEARGBROWDOWN2_SSE2
-#define HAS_SCALEARGBROWDOWNEVEN_SSE2
-#define HAS_SCALECOLSUP2_SSE2
-#define HAS_SCALEFILTERCOLS_SSSE3
-#define HAS_SCALEROWDOWN2_SSE2
-#define HAS_SCALEROWDOWN34_SSSE3
-#define HAS_SCALEROWDOWN38_SSSE3
-#define HAS_SCALEROWDOWN4_SSE2
-#endif
-
-// The following are available on VS2012:
-#if !defined(LIBYUV_DISABLE_X86) && defined(VISUALC_HAS_AVX2)
-#define HAS_SCALEADDROW_AVX2
-#define HAS_SCALEROWDOWN2_AVX2
-#define HAS_SCALEROWDOWN4_AVX2
-#endif
-
-// The following are available on Visual C:
-#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && !defined(__clang__)
-#define HAS_SCALEADDROW_SSE2
-#endif
-
-// The following are available on Neon platforms:
-#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
- (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
-#define HAS_SCALEARGBCOLS_NEON
-#define HAS_SCALEARGBROWDOWN2_NEON
-#define HAS_SCALEARGBROWDOWNEVEN_NEON
-#define HAS_SCALEFILTERCOLS_NEON
-#define HAS_SCALEROWDOWN2_NEON
-#define HAS_SCALEROWDOWN34_NEON
-#define HAS_SCALEROWDOWN38_NEON
-#define HAS_SCALEROWDOWN4_NEON
-#define HAS_SCALEARGBFILTERCOLS_NEON
-#endif
-
-// The following are available on Mips platforms:
-#if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \
- defined(__mips__) && defined(__mips_dsp) && (__mips_dsp_rev >= 2)
-#define HAS_SCALEROWDOWN2_MIPS_DSPR2
-#define HAS_SCALEROWDOWN4_MIPS_DSPR2
-#define HAS_SCALEROWDOWN34_MIPS_DSPR2
-#define HAS_SCALEROWDOWN38_MIPS_DSPR2
-#endif
-
-// Scale ARGB vertically with bilinear interpolation.
-void ScalePlaneVertical(int src_height,
- int dst_width, int dst_height,
- int src_stride, int dst_stride,
- const uint8* src_argb, uint8* dst_argb,
- int x, int y, int dy,
- int bpp, enum FilterMode filtering);
-
-void ScalePlaneVertical_16(int src_height,
- int dst_width, int dst_height,
- int src_stride, int dst_stride,
- const uint16* src_argb, uint16* dst_argb,
- int x, int y, int dy,
- int wpp, enum FilterMode filtering);
-
-// Simplify the filtering based on scale factors.
-enum FilterMode ScaleFilterReduce(int src_width, int src_height,
- int dst_width, int dst_height,
- enum FilterMode filtering);
-
-// Divide num by div and return as 16.16 fixed point result.
-int FixedDiv_C(int num, int div);
-int FixedDiv_X86(int num, int div);
-// Divide num - 1 by div - 1 and return as 16.16 fixed point result.
-int FixedDiv1_C(int num, int div);
-int FixedDiv1_X86(int num, int div);
-#ifdef HAS_FIXEDDIV_X86
-#define FixedDiv FixedDiv_X86
-#define FixedDiv1 FixedDiv1_X86
-#else
-#define FixedDiv FixedDiv_C
-#define FixedDiv1 FixedDiv1_C
-#endif
-
-// Compute slope values for stepping.
-void ScaleSlope(int src_width, int src_height,
- int dst_width, int dst_height,
- enum FilterMode filtering,
- int* x, int* y, int* dx, int* dy);
-
-void ScaleRowDown2_C(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width);
-void ScaleRowDown2_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
- uint16* dst, int dst_width);
-void ScaleRowDown2Linear_C(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width);
-void ScaleRowDown2Linear_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
- uint16* dst, int dst_width);
-void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width);
-void ScaleRowDown2Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
- uint16* dst, int dst_width);
-void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width);
-void ScaleRowDown4_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
- uint16* dst, int dst_width);
-void ScaleRowDown4Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width);
-void ScaleRowDown4Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
- uint16* dst, int dst_width);
-void ScaleRowDown34_C(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width);
-void ScaleRowDown34_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
- uint16* dst, int dst_width);
-void ScaleRowDown34_0_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* d, int dst_width);
-void ScaleRowDown34_0_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
- uint16* d, int dst_width);
-void ScaleRowDown34_1_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* d, int dst_width);
-void ScaleRowDown34_1_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
- uint16* d, int dst_width);
-void ScaleCols_C(uint8* dst_ptr, const uint8* src_ptr,
- int dst_width, int x, int dx);
-void ScaleCols_16_C(uint16* dst_ptr, const uint16* src_ptr,
- int dst_width, int x, int dx);
-void ScaleColsUp2_C(uint8* dst_ptr, const uint8* src_ptr,
- int dst_width, int, int);
-void ScaleColsUp2_16_C(uint16* dst_ptr, const uint16* src_ptr,
- int dst_width, int, int);
-void ScaleFilterCols_C(uint8* dst_ptr, const uint8* src_ptr,
- int dst_width, int x, int dx);
-void ScaleFilterCols_16_C(uint16* dst_ptr, const uint16* src_ptr,
- int dst_width, int x, int dx);
-void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr,
- int dst_width, int x, int dx);
-void ScaleFilterCols64_16_C(uint16* dst_ptr, const uint16* src_ptr,
- int dst_width, int x, int dx);
-void ScaleRowDown38_C(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width);
-void ScaleRowDown38_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
- uint16* dst, int dst_width);
-void ScaleRowDown38_3_Box_C(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown38_3_Box_16_C(const uint16* src_ptr,
- ptrdiff_t src_stride,
- uint16* dst_ptr, int dst_width);
-void ScaleRowDown38_2_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown38_2_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
- uint16* dst_ptr, int dst_width);
-void ScaleAddRow_C(const uint8* src_ptr, uint16* dst_ptr, int src_width);
-void ScaleAddRow_16_C(const uint16* src_ptr, uint32* dst_ptr, int src_width);
-void ScaleARGBRowDown2_C(const uint8* src_argb,
- ptrdiff_t src_stride,
- uint8* dst_argb, int dst_width);
-void ScaleARGBRowDown2Linear_C(const uint8* src_argb,
- ptrdiff_t src_stride,
- uint8* dst_argb, int dst_width);
-void ScaleARGBRowDown2Box_C(const uint8* src_argb, ptrdiff_t src_stride,
- uint8* dst_argb, int dst_width);
-void ScaleARGBRowDownEven_C(const uint8* src_argb, ptrdiff_t src_stride,
- int src_stepx,
- uint8* dst_argb, int dst_width);
-void ScaleARGBRowDownEvenBox_C(const uint8* src_argb,
- ptrdiff_t src_stride,
- int src_stepx,
- uint8* dst_argb, int dst_width);
-void ScaleARGBCols_C(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx);
-void ScaleARGBCols64_C(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx);
-void ScaleARGBColsUp2_C(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int, int);
-void ScaleARGBFilterCols_C(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx);
-void ScaleARGBFilterCols64_C(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx);
-
-// Specialized scalers for x86.
-void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown2Linear_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown2_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown2Linear_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown2Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown4_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown4Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-
-void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown38_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown2_Any_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown2Linear_Any_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown2Box_Any_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown2_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown2Linear_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown2Box_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown4_Any_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown4Box_Any_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown4_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown4Box_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-
-void ScaleRowDown34_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown34_1_Box_Any_SSSE3(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown34_0_Box_Any_SSSE3(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown38_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown38_3_Box_Any_SSSE3(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown38_2_Box_Any_SSSE3(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-
-void ScaleAddRow_SSE2(const uint8* src_ptr, uint16* dst_ptr, int src_width);
-void ScaleAddRow_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width);
-void ScaleAddRow_Any_SSE2(const uint8* src_ptr, uint16* dst_ptr, int src_width);
-void ScaleAddRow_Any_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width);
-
-void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
- int dst_width, int x, int dx);
-void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr,
- int dst_width, int x, int dx);
-
-
-// ARGB Column functions
-void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx);
-void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx);
-void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx);
-void ScaleARGBFilterCols_NEON(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx);
-void ScaleARGBCols_NEON(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx);
-void ScaleARGBFilterCols_Any_NEON(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx);
-void ScaleARGBCols_Any_NEON(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx);
-
-// ARGB Row functions
-void ScaleARGBRowDown2_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
- uint8* dst_argb, int dst_width);
-void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
- uint8* dst_argb, int dst_width);
-void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
- uint8* dst_argb, int dst_width);
-void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width);
-void ScaleARGBRowDown2Linear_NEON(const uint8* src_argb, ptrdiff_t src_stride,
- uint8* dst_argb, int dst_width);
-void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width);
-void ScaleARGBRowDown2_Any_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
- uint8* dst_argb, int dst_width);
-void ScaleARGBRowDown2Linear_Any_SSE2(const uint8* src_argb,
- ptrdiff_t src_stride,
- uint8* dst_argb, int dst_width);
-void ScaleARGBRowDown2Box_Any_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
- uint8* dst_argb, int dst_width);
-void ScaleARGBRowDown2_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width);
-void ScaleARGBRowDown2Linear_Any_NEON(const uint8* src_argb,
- ptrdiff_t src_stride,
- uint8* dst_argb, int dst_width);
-void ScaleARGBRowDown2Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width);
-
-void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
- int src_stepx, uint8* dst_argb, int dst_width);
-void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
- int src_stepx,
- uint8* dst_argb, int dst_width);
-void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride,
- int src_stepx,
- uint8* dst_argb, int dst_width);
-void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
- int src_stepx,
- uint8* dst_argb, int dst_width);
-void ScaleARGBRowDownEven_Any_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
- int src_stepx,
- uint8* dst_argb, int dst_width);
-void ScaleARGBRowDownEvenBox_Any_SSE2(const uint8* src_argb,
- ptrdiff_t src_stride,
- int src_stepx,
- uint8* dst_argb, int dst_width);
-void ScaleARGBRowDownEven_Any_NEON(const uint8* src_argb, ptrdiff_t src_stride,
- int src_stepx,
- uint8* dst_argb, int dst_width);
-void ScaleARGBRowDownEvenBox_Any_NEON(const uint8* src_argb,
- ptrdiff_t src_stride,
- int src_stepx,
- uint8* dst_argb, int dst_width);
-
-// ScaleRowDown2Box also used by planar functions
-// NEON downscalers with interpolation.
-
-// Note - not static due to reuse in convert for 444 to 420.
-void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width);
-void ScaleRowDown2Linear_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width);
-void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width);
-
-void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-
-// Down scale from 4 to 3 pixels. Use the neon multilane read/write
-// to load up the every 4th pixel into a 4 different registers.
-// Point samples 32 pixels to 24 pixels.
-void ScaleRowDown34_NEON(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-
-// 32 -> 12
-void ScaleRowDown38_NEON(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-// 32x3 -> 12x1
-void ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-// 32x2 -> 12x1
-void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-
-void ScaleRowDown2_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width);
-void ScaleRowDown2Linear_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width);
-void ScaleRowDown2Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width);
-void ScaleRowDown4_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown4Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown34_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown34_0_Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown34_1_Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-// 32 -> 12
-void ScaleRowDown38_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-// 32x3 -> 12x1
-void ScaleRowDown38_3_Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-// 32x2 -> 12x1
-void ScaleRowDown38_2_Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-
-void ScaleAddRow_NEON(const uint8* src_ptr, uint16* dst_ptr, int src_width);
-void ScaleAddRow_Any_NEON(const uint8* src_ptr, uint16* dst_ptr, int src_width);
-
-void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr,
- int dst_width, int x, int dx);
-
-void ScaleFilterCols_Any_NEON(uint8* dst_ptr, const uint8* src_ptr,
- int dst_width, int x, int dx);
-
-
-void ScaleRowDown2_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width);
-void ScaleRowDown2Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width);
-void ScaleRowDown4_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width);
-void ScaleRowDown4Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width);
-void ScaleRowDown34_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width);
-void ScaleRowDown34_0_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* d, int dst_width);
-void ScaleRowDown34_1_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* d, int dst_width);
-void ScaleRowDown38_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width);
-void ScaleRowDown38_2_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-void ScaleRowDown38_3_Box_MIPS_DSPR2(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
-
-#endif // INCLUDE_LIBYUV_SCALE_ROW_H_ NOLINT
diff --git a/third_party/aom/third_party/libyuv/include/libyuv/version.h b/third_party/aom/third_party/libyuv/include/libyuv/version.h
deleted file mode 100644
index 287b98ebf..000000000
--- a/third_party/aom/third_party/libyuv/include/libyuv/version.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
-#define INCLUDE_LIBYUV_VERSION_H_
-
-#define LIBYUV_VERSION 1456
-
-#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
diff --git a/third_party/aom/third_party/libyuv/include/libyuv/video_common.h b/third_party/aom/third_party/libyuv/include/libyuv/video_common.h
deleted file mode 100644
index 7b0a19cc9..000000000
--- a/third_party/aom/third_party/libyuv/include/libyuv/video_common.h
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-// Common definitions for video, including fourcc and VideoFormat.
-
-#ifndef INCLUDE_LIBYUV_VIDEO_COMMON_H_ // NOLINT
-#define INCLUDE_LIBYUV_VIDEO_COMMON_H_
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-//////////////////////////////////////////////////////////////////////////////
-// Definition of FourCC codes
-//////////////////////////////////////////////////////////////////////////////
-
-// Convert four characters to a FourCC code.
-// Needs to be a macro otherwise the OS X compiler complains when the kFormat*
-// constants are used in a switch.
-#ifdef __cplusplus
-#define FOURCC(a, b, c, d) ( \
- (static_cast<uint32>(a)) | (static_cast<uint32>(b) << 8) | \
- (static_cast<uint32>(c) << 16) | (static_cast<uint32>(d) << 24))
-#else
-#define FOURCC(a, b, c, d) ( \
- ((uint32)(a)) | ((uint32)(b) << 8) | /* NOLINT */ \
- ((uint32)(c) << 16) | ((uint32)(d) << 24)) /* NOLINT */
-#endif
-
-// Some pages discussing FourCC codes:
-// http://www.fourcc.org/yuv.php
-// http://v4l2spec.bytesex.org/spec/book1.htm
-// http://developer.apple.com/quicktime/icefloe/dispatch020.html
-// http://msdn.microsoft.com/library/windows/desktop/dd206750.aspx#nv12
-// http://people.xiph.org/~xiphmont/containers/nut/nut4cc.txt
-
-// FourCC codes grouped according to implementation efficiency.
-// Primary formats should convert in 1 efficient step.
-// Secondary formats are converted in 2 steps.
-// Auxilliary formats call primary converters.
-enum FourCC {
- // 9 Primary YUV formats: 5 planar, 2 biplanar, 2 packed.
- FOURCC_I420 = FOURCC('I', '4', '2', '0'),
- FOURCC_I422 = FOURCC('I', '4', '2', '2'),
- FOURCC_I444 = FOURCC('I', '4', '4', '4'),
- FOURCC_I411 = FOURCC('I', '4', '1', '1'),
- FOURCC_I400 = FOURCC('I', '4', '0', '0'),
- FOURCC_NV21 = FOURCC('N', 'V', '2', '1'),
- FOURCC_NV12 = FOURCC('N', 'V', '1', '2'),
- FOURCC_YUY2 = FOURCC('Y', 'U', 'Y', '2'),
- FOURCC_UYVY = FOURCC('U', 'Y', 'V', 'Y'),
-
- // 2 Secondary YUV formats: row biplanar.
- FOURCC_M420 = FOURCC('M', '4', '2', '0'),
- FOURCC_Q420 = FOURCC('Q', '4', '2', '0'), // deprecated.
-
- // 9 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp.
- FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'),
- FOURCC_BGRA = FOURCC('B', 'G', 'R', 'A'),
- FOURCC_ABGR = FOURCC('A', 'B', 'G', 'R'),
- FOURCC_24BG = FOURCC('2', '4', 'B', 'G'),
- FOURCC_RAW = FOURCC('r', 'a', 'w', ' '),
- FOURCC_RGBA = FOURCC('R', 'G', 'B', 'A'),
- FOURCC_RGBP = FOURCC('R', 'G', 'B', 'P'), // rgb565 LE.
- FOURCC_RGBO = FOURCC('R', 'G', 'B', 'O'), // argb1555 LE.
- FOURCC_R444 = FOURCC('R', '4', '4', '4'), // argb4444 LE.
-
- // 4 Secondary RGB formats: 4 Bayer Patterns. deprecated.
- FOURCC_RGGB = FOURCC('R', 'G', 'G', 'B'),
- FOURCC_BGGR = FOURCC('B', 'G', 'G', 'R'),
- FOURCC_GRBG = FOURCC('G', 'R', 'B', 'G'),
- FOURCC_GBRG = FOURCC('G', 'B', 'R', 'G'),
-
- // 1 Primary Compressed YUV format.
- FOURCC_MJPG = FOURCC('M', 'J', 'P', 'G'),
-
- // 5 Auxiliary YUV variations: 3 with U and V planes are swapped, 1 Alias.
- FOURCC_YV12 = FOURCC('Y', 'V', '1', '2'),
- FOURCC_YV16 = FOURCC('Y', 'V', '1', '6'),
- FOURCC_YV24 = FOURCC('Y', 'V', '2', '4'),
- FOURCC_YU12 = FOURCC('Y', 'U', '1', '2'), // Linux version of I420.
- FOURCC_J420 = FOURCC('J', '4', '2', '0'),
- FOURCC_J400 = FOURCC('J', '4', '0', '0'),
-
- // 14 Auxiliary aliases. CanonicalFourCC() maps these to canonical fourcc.
- FOURCC_IYUV = FOURCC('I', 'Y', 'U', 'V'), // Alias for I420.
- FOURCC_YU16 = FOURCC('Y', 'U', '1', '6'), // Alias for I422.
- FOURCC_YU24 = FOURCC('Y', 'U', '2', '4'), // Alias for I444.
- FOURCC_YUYV = FOURCC('Y', 'U', 'Y', 'V'), // Alias for YUY2.
- FOURCC_YUVS = FOURCC('y', 'u', 'v', 's'), // Alias for YUY2 on Mac.
- FOURCC_HDYC = FOURCC('H', 'D', 'Y', 'C'), // Alias for UYVY.
- FOURCC_2VUY = FOURCC('2', 'v', 'u', 'y'), // Alias for UYVY on Mac.
- FOURCC_JPEG = FOURCC('J', 'P', 'E', 'G'), // Alias for MJPG.
- FOURCC_DMB1 = FOURCC('d', 'm', 'b', '1'), // Alias for MJPG on Mac.
- FOURCC_BA81 = FOURCC('B', 'A', '8', '1'), // Alias for BGGR.
- FOURCC_RGB3 = FOURCC('R', 'G', 'B', '3'), // Alias for RAW.
- FOURCC_BGR3 = FOURCC('B', 'G', 'R', '3'), // Alias for 24BG.
- FOURCC_CM32 = FOURCC(0, 0, 0, 32), // Alias for BGRA kCMPixelFormat_32ARGB
- FOURCC_CM24 = FOURCC(0, 0, 0, 24), // Alias for RAW kCMPixelFormat_24RGB
- FOURCC_L555 = FOURCC('L', '5', '5', '5'), // Alias for RGBO.
- FOURCC_L565 = FOURCC('L', '5', '6', '5'), // Alias for RGBP.
- FOURCC_5551 = FOURCC('5', '5', '5', '1'), // Alias for RGBO.
-
- // 1 Auxiliary compressed YUV format set aside for capturer.
- FOURCC_H264 = FOURCC('H', '2', '6', '4'),
-
- // Match any fourcc.
- FOURCC_ANY = -1,
-};
-
-enum FourCCBpp {
- // Canonical fourcc codes used in our code.
- FOURCC_BPP_I420 = 12,
- FOURCC_BPP_I422 = 16,
- FOURCC_BPP_I444 = 24,
- FOURCC_BPP_I411 = 12,
- FOURCC_BPP_I400 = 8,
- FOURCC_BPP_NV21 = 12,
- FOURCC_BPP_NV12 = 12,
- FOURCC_BPP_YUY2 = 16,
- FOURCC_BPP_UYVY = 16,
- FOURCC_BPP_M420 = 12,
- FOURCC_BPP_Q420 = 12,
- FOURCC_BPP_ARGB = 32,
- FOURCC_BPP_BGRA = 32,
- FOURCC_BPP_ABGR = 32,
- FOURCC_BPP_RGBA = 32,
- FOURCC_BPP_24BG = 24,
- FOURCC_BPP_RAW = 24,
- FOURCC_BPP_RGBP = 16,
- FOURCC_BPP_RGBO = 16,
- FOURCC_BPP_R444 = 16,
- FOURCC_BPP_RGGB = 8,
- FOURCC_BPP_BGGR = 8,
- FOURCC_BPP_GRBG = 8,
- FOURCC_BPP_GBRG = 8,
- FOURCC_BPP_YV12 = 12,
- FOURCC_BPP_YV16 = 16,
- FOURCC_BPP_YV24 = 24,
- FOURCC_BPP_YU12 = 12,
- FOURCC_BPP_J420 = 12,
- FOURCC_BPP_J400 = 8,
- FOURCC_BPP_MJPG = 0, // 0 means unknown.
- FOURCC_BPP_H264 = 0,
- FOURCC_BPP_IYUV = 12,
- FOURCC_BPP_YU16 = 16,
- FOURCC_BPP_YU24 = 24,
- FOURCC_BPP_YUYV = 16,
- FOURCC_BPP_YUVS = 16,
- FOURCC_BPP_HDYC = 16,
- FOURCC_BPP_2VUY = 16,
- FOURCC_BPP_JPEG = 1,
- FOURCC_BPP_DMB1 = 1,
- FOURCC_BPP_BA81 = 8,
- FOURCC_BPP_RGB3 = 24,
- FOURCC_BPP_BGR3 = 24,
- FOURCC_BPP_CM32 = 32,
- FOURCC_BPP_CM24 = 24,
-
- // Match any fourcc.
- FOURCC_BPP_ANY = 0, // 0 means unknown.
-};
-
-// Converts fourcc aliases into canonical ones.
-LIBYUV_API uint32 CanonicalFourCC(uint32 fourcc);
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
-
-#endif // INCLUDE_LIBYUV_VIDEO_COMMON_H_ NOLINT
diff --git a/third_party/aom/third_party/libyuv/source/compare.cc b/third_party/aom/third_party/libyuv/source/compare.cc
deleted file mode 100644
index 46aa8473d..000000000
--- a/third_party/aom/third_party/libyuv/source/compare.cc
+++ /dev/null
@@ -1,373 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/compare.h"
-
-#include <float.h>
-#include <math.h>
-#ifdef _OPENMP
-#include <omp.h>
-#endif
-
-#include "libyuv/basic_types.h"
-#include "libyuv/cpu_id.h"
-#include "libyuv/row.h"
-#include "libyuv/video_common.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// hash seed of 5381 recommended.
-// Internal C version of HashDjb2 with int sized count for efficiency.
-uint32 HashDjb2_C(const uint8* src, int count, uint32 seed);
-
-// This module is for Visual C x86
-#if !defined(LIBYUV_DISABLE_X86) && \
- (defined(_M_IX86) || \
- (defined(__x86_64__) || (defined(__i386__) && !defined(__pic__))))
-#define HAS_HASHDJB2_SSE41
-uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed);
-
-#ifdef VISUALC_HAS_AVX2
-#define HAS_HASHDJB2_AVX2
-uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed);
-#endif
-
-#endif // HAS_HASHDJB2_SSE41
-
-// hash seed of 5381 recommended.
-LIBYUV_API
-uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) {
- const int kBlockSize = 1 << 15; // 32768;
- int remainder;
- uint32 (*HashDjb2_SSE)(const uint8* src, int count, uint32 seed) = HashDjb2_C;
-#if defined(HAS_HASHDJB2_SSE41)
- if (TestCpuFlag(kCpuHasSSE41)) {
- HashDjb2_SSE = HashDjb2_SSE41;
- }
-#endif
-#if defined(HAS_HASHDJB2_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- HashDjb2_SSE = HashDjb2_AVX2;
- }
-#endif
-
- while (count >= (uint64)(kBlockSize)) {
- seed = HashDjb2_SSE(src, kBlockSize, seed);
- src += kBlockSize;
- count -= kBlockSize;
- }
- remainder = (int)(count) & ~15;
- if (remainder) {
- seed = HashDjb2_SSE(src, remainder, seed);
- src += remainder;
- count -= remainder;
- }
- remainder = (int)(count) & 15;
- if (remainder) {
- seed = HashDjb2_C(src, remainder, seed);
- }
- return seed;
-}
-
-static uint32 ARGBDetectRow_C(const uint8* argb, int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- if (argb[0] != 255) { // First byte is not Alpha of 255, so not ARGB.
- return FOURCC_BGRA;
- }
- if (argb[3] != 255) { // 4th byte is not Alpha of 255, so not BGRA.
- return FOURCC_ARGB;
- }
- if (argb[4] != 255) { // Second pixel first byte is not Alpha of 255.
- return FOURCC_BGRA;
- }
- if (argb[7] != 255) { // Second pixel 4th byte is not Alpha of 255.
- return FOURCC_ARGB;
- }
- argb += 8;
- }
- if (width & 1) {
- if (argb[0] != 255) { // First byte is not Alpha of 255, so not ARGB.
- return FOURCC_BGRA;
- }
- if (argb[3] != 255) { // 4th byte is not Alpha of 255, so not BGRA.
- return FOURCC_ARGB;
- }
- }
- return 0;
-}
-
-// Scan an opaque argb image and return fourcc based on alpha offset.
-// Returns FOURCC_ARGB, FOURCC_BGRA, or 0 if unknown.
-LIBYUV_API
-uint32 ARGBDetect(const uint8* argb, int stride_argb, int width, int height) {
- uint32 fourcc = 0;
- int h;
-
- // Coalesce rows.
- if (stride_argb == width * 4) {
- width *= height;
- height = 1;
- stride_argb = 0;
- }
- for (h = 0; h < height && fourcc == 0; ++h) {
- fourcc = ARGBDetectRow_C(argb, width);
- argb += stride_argb;
- }
- return fourcc;
-}
-
-uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count);
-#if !defined(LIBYUV_DISABLE_NEON) && \
- (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
-#define HAS_SUMSQUAREERROR_NEON
-uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count);
-#endif
-#if !defined(LIBYUV_DISABLE_X86) && \
- (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
-#define HAS_SUMSQUAREERROR_SSE2
-uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count);
-#endif
-
-#ifdef VISUALC_HAS_AVX2
-#define HAS_SUMSQUAREERROR_AVX2
-uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count);
-#endif
-
-// TODO(fbarchard): Refactor into row function.
-LIBYUV_API
-uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b,
- int count) {
- // SumSquareError returns values 0 to 65535 for each squared difference.
- // Up to 65536 of those can be summed and remain within a uint32.
- // After each block of 65536 pixels, accumulate into a uint64.
- const int kBlockSize = 65536;
- int remainder = count & (kBlockSize - 1) & ~31;
- uint64 sse = 0;
- int i;
- uint32 (*SumSquareError)(const uint8* src_a, const uint8* src_b, int count) =
- SumSquareError_C;
-#if defined(HAS_SUMSQUAREERROR_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- SumSquareError = SumSquareError_NEON;
- }
-#endif
-#if defined(HAS_SUMSQUAREERROR_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- // Note only used for multiples of 16 so count is not checked.
- SumSquareError = SumSquareError_SSE2;
- }
-#endif
-#if defined(HAS_SUMSQUAREERROR_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- // Note only used for multiples of 32 so count is not checked.
- SumSquareError = SumSquareError_AVX2;
- }
-#endif
-#ifdef _OPENMP
-#pragma omp parallel for reduction(+: sse)
-#endif
- for (i = 0; i < (count - (kBlockSize - 1)); i += kBlockSize) {
- sse += SumSquareError(src_a + i, src_b + i, kBlockSize);
- }
- src_a += count & ~(kBlockSize - 1);
- src_b += count & ~(kBlockSize - 1);
- if (remainder) {
- sse += SumSquareError(src_a, src_b, remainder);
- src_a += remainder;
- src_b += remainder;
- }
- remainder = count & 31;
- if (remainder) {
- sse += SumSquareError_C(src_a, src_b, remainder);
- }
- return sse;
-}
-
-LIBYUV_API
-uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a,
- const uint8* src_b, int stride_b,
- int width, int height) {
- uint64 sse = 0;
- int h;
- // Coalesce rows.
- if (stride_a == width &&
- stride_b == width) {
- width *= height;
- height = 1;
- stride_a = stride_b = 0;
- }
- for (h = 0; h < height; ++h) {
- sse += ComputeSumSquareError(src_a, src_b, width);
- src_a += stride_a;
- src_b += stride_b;
- }
- return sse;
-}
-
-LIBYUV_API
-double SumSquareErrorToPsnr(uint64 sse, uint64 count) {
- double psnr;
- if (sse > 0) {
- double mse = (double)(count) / (double)(sse);
- psnr = 10.0 * log10(255.0 * 255.0 * mse);
- } else {
- psnr = kMaxPsnr; // Limit to prevent divide by 0
- }
-
- if (psnr > kMaxPsnr)
- psnr = kMaxPsnr;
-
- return psnr;
-}
-
-LIBYUV_API
-double CalcFramePsnr(const uint8* src_a, int stride_a,
- const uint8* src_b, int stride_b,
- int width, int height) {
- const uint64 samples = width * height;
- const uint64 sse = ComputeSumSquareErrorPlane(src_a, stride_a,
- src_b, stride_b,
- width, height);
- return SumSquareErrorToPsnr(sse, samples);
-}
-
-LIBYUV_API
-double I420Psnr(const uint8* src_y_a, int stride_y_a,
- const uint8* src_u_a, int stride_u_a,
- const uint8* src_v_a, int stride_v_a,
- const uint8* src_y_b, int stride_y_b,
- const uint8* src_u_b, int stride_u_b,
- const uint8* src_v_b, int stride_v_b,
- int width, int height) {
- const uint64 sse_y = ComputeSumSquareErrorPlane(src_y_a, stride_y_a,
- src_y_b, stride_y_b,
- width, height);
- const int width_uv = (width + 1) >> 1;
- const int height_uv = (height + 1) >> 1;
- const uint64 sse_u = ComputeSumSquareErrorPlane(src_u_a, stride_u_a,
- src_u_b, stride_u_b,
- width_uv, height_uv);
- const uint64 sse_v = ComputeSumSquareErrorPlane(src_v_a, stride_v_a,
- src_v_b, stride_v_b,
- width_uv, height_uv);
- const uint64 samples = width * height + 2 * (width_uv * height_uv);
- const uint64 sse = sse_y + sse_u + sse_v;
- return SumSquareErrorToPsnr(sse, samples);
-}
-
-static const int64 cc1 = 26634; // (64^2*(.01*255)^2
-static const int64 cc2 = 239708; // (64^2*(.03*255)^2
-
-static double Ssim8x8_C(const uint8* src_a, int stride_a,
- const uint8* src_b, int stride_b) {
- int64 sum_a = 0;
- int64 sum_b = 0;
- int64 sum_sq_a = 0;
- int64 sum_sq_b = 0;
- int64 sum_axb = 0;
-
- int i;
- for (i = 0; i < 8; ++i) {
- int j;
- for (j = 0; j < 8; ++j) {
- sum_a += src_a[j];
- sum_b += src_b[j];
- sum_sq_a += src_a[j] * src_a[j];
- sum_sq_b += src_b[j] * src_b[j];
- sum_axb += src_a[j] * src_b[j];
- }
-
- src_a += stride_a;
- src_b += stride_b;
- }
-
- {
- const int64 count = 64;
- // scale the constants by number of pixels
- const int64 c1 = (cc1 * count * count) >> 12;
- const int64 c2 = (cc2 * count * count) >> 12;
-
- const int64 sum_a_x_sum_b = sum_a * sum_b;
-
- const int64 ssim_n = (2 * sum_a_x_sum_b + c1) *
- (2 * count * sum_axb - 2 * sum_a_x_sum_b + c2);
-
- const int64 sum_a_sq = sum_a*sum_a;
- const int64 sum_b_sq = sum_b*sum_b;
-
- const int64 ssim_d = (sum_a_sq + sum_b_sq + c1) *
- (count * sum_sq_a - sum_a_sq +
- count * sum_sq_b - sum_b_sq + c2);
-
- if (ssim_d == 0.0) {
- return DBL_MAX;
- }
- return ssim_n * 1.0 / ssim_d;
- }
-}
-
-// We are using a 8x8 moving window with starting location of each 8x8 window
-// on the 4x4 pixel grid. Such arrangement allows the windows to overlap
-// block boundaries to penalize blocking artifacts.
-LIBYUV_API
-double CalcFrameSsim(const uint8* src_a, int stride_a,
- const uint8* src_b, int stride_b,
- int width, int height) {
- int samples = 0;
- double ssim_total = 0;
- double (*Ssim8x8)(const uint8* src_a, int stride_a,
- const uint8* src_b, int stride_b) = Ssim8x8_C;
-
- // sample point start with each 4x4 location
- int i;
- for (i = 0; i < height - 8; i += 4) {
- int j;
- for (j = 0; j < width - 8; j += 4) {
- ssim_total += Ssim8x8(src_a + j, stride_a, src_b + j, stride_b);
- samples++;
- }
-
- src_a += stride_a * 4;
- src_b += stride_b * 4;
- }
-
- ssim_total /= samples;
- return ssim_total;
-}
-
-LIBYUV_API
-double I420Ssim(const uint8* src_y_a, int stride_y_a,
- const uint8* src_u_a, int stride_u_a,
- const uint8* src_v_a, int stride_v_a,
- const uint8* src_y_b, int stride_y_b,
- const uint8* src_u_b, int stride_u_b,
- const uint8* src_v_b, int stride_v_b,
- int width, int height) {
- const double ssim_y = CalcFrameSsim(src_y_a, stride_y_a,
- src_y_b, stride_y_b, width, height);
- const int width_uv = (width + 1) >> 1;
- const int height_uv = (height + 1) >> 1;
- const double ssim_u = CalcFrameSsim(src_u_a, stride_u_a,
- src_u_b, stride_u_b,
- width_uv, height_uv);
- const double ssim_v = CalcFrameSsim(src_v_a, stride_v_a,
- src_v_b, stride_v_b,
- width_uv, height_uv);
- return ssim_y * 0.8 + 0.1 * (ssim_u + ssim_v);
-}
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/third_party/aom/third_party/libyuv/source/compare_common.cc b/third_party/aom/third_party/libyuv/source/compare_common.cc
deleted file mode 100644
index c546b5182..000000000
--- a/third_party/aom/third_party/libyuv/source/compare_common.cc
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count) {
- uint32 sse = 0u;
- int i;
- for (i = 0; i < count; ++i) {
- int diff = src_a[i] - src_b[i];
- sse += (uint32)(diff * diff);
- }
- return sse;
-}
-
-// hash seed of 5381 recommended.
-// Internal C version of HashDjb2 with int sized count for efficiency.
-uint32 HashDjb2_C(const uint8* src, int count, uint32 seed) {
- uint32 hash = seed;
- int i;
- for (i = 0; i < count; ++i) {
- hash += (hash << 5) + src[i];
- }
- return hash;
-}
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/third_party/aom/third_party/libyuv/source/compare_gcc.cc b/third_party/aom/third_party/libyuv/source/compare_gcc.cc
deleted file mode 100644
index 247cb33bb..000000000
--- a/third_party/aom/third_party/libyuv/source/compare_gcc.cc
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/basic_types.h"
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
-
-uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
- uint32 sse;
- asm volatile ( // NOLINT
- "pxor %%xmm0,%%xmm0 \n"
- "pxor %%xmm5,%%xmm5 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm1 \n"
- "lea " MEMLEA(0x10, 0) ",%0 \n"
- "movdqu " MEMACCESS(1) ",%%xmm2 \n"
- "lea " MEMLEA(0x10, 1) ",%1 \n"
- "movdqa %%xmm1,%%xmm3 \n"
- "psubusb %%xmm2,%%xmm1 \n"
- "psubusb %%xmm3,%%xmm2 \n"
- "por %%xmm2,%%xmm1 \n"
- "movdqa %%xmm1,%%xmm2 \n"
- "punpcklbw %%xmm5,%%xmm1 \n"
- "punpckhbw %%xmm5,%%xmm2 \n"
- "pmaddwd %%xmm1,%%xmm1 \n"
- "pmaddwd %%xmm2,%%xmm2 \n"
- "paddd %%xmm1,%%xmm0 \n"
- "paddd %%xmm2,%%xmm0 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
-
- "pshufd $0xee,%%xmm0,%%xmm1 \n"
- "paddd %%xmm1,%%xmm0 \n"
- "pshufd $0x1,%%xmm0,%%xmm1 \n"
- "paddd %%xmm1,%%xmm0 \n"
- "movd %%xmm0,%3 \n"
-
- : "+r"(src_a), // %0
- "+r"(src_b), // %1
- "+r"(count), // %2
- "=g"(sse) // %3
- :: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
- ); // NOLINT
- return sse;
-}
-
-#endif // defined(__x86_64__) || defined(__i386__)
-
-#if !defined(LIBYUV_DISABLE_X86) && \
- (defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))
-#define HAS_HASHDJB2_SSE41
-static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 }; // 33 ^ 16
-static uvec32 kHashMul0 = {
- 0x0c3525e1, // 33 ^ 15
- 0xa3476dc1, // 33 ^ 14
- 0x3b4039a1, // 33 ^ 13
- 0x4f5f0981, // 33 ^ 12
-};
-static uvec32 kHashMul1 = {
- 0x30f35d61, // 33 ^ 11
- 0x855cb541, // 33 ^ 10
- 0x040a9121, // 33 ^ 9
- 0x747c7101, // 33 ^ 8
-};
-static uvec32 kHashMul2 = {
- 0xec41d4e1, // 33 ^ 7
- 0x4cfa3cc1, // 33 ^ 6
- 0x025528a1, // 33 ^ 5
- 0x00121881, // 33 ^ 4
-};
-static uvec32 kHashMul3 = {
- 0x00008c61, // 33 ^ 3
- 0x00000441, // 33 ^ 2
- 0x00000021, // 33 ^ 1
- 0x00000001, // 33 ^ 0
-};
-
-uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
- uint32 hash;
- asm volatile ( // NOLINT
- "movd %2,%%xmm0 \n"
- "pxor %%xmm7,%%xmm7 \n"
- "movdqa %4,%%xmm6 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm1 \n"
- "lea " MEMLEA(0x10, 0) ",%0 \n"
- "pmulld %%xmm6,%%xmm0 \n"
- "movdqa %5,%%xmm5 \n"
- "movdqa %%xmm1,%%xmm2 \n"
- "punpcklbw %%xmm7,%%xmm2 \n"
- "movdqa %%xmm2,%%xmm3 \n"
- "punpcklwd %%xmm7,%%xmm3 \n"
- "pmulld %%xmm5,%%xmm3 \n"
- "movdqa %6,%%xmm5 \n"
- "movdqa %%xmm2,%%xmm4 \n"
- "punpckhwd %%xmm7,%%xmm4 \n"
- "pmulld %%xmm5,%%xmm4 \n"
- "movdqa %7,%%xmm5 \n"
- "punpckhbw %%xmm7,%%xmm1 \n"
- "movdqa %%xmm1,%%xmm2 \n"
- "punpcklwd %%xmm7,%%xmm2 \n"
- "pmulld %%xmm5,%%xmm2 \n"
- "movdqa %8,%%xmm5 \n"
- "punpckhwd %%xmm7,%%xmm1 \n"
- "pmulld %%xmm5,%%xmm1 \n"
- "paddd %%xmm4,%%xmm3 \n"
- "paddd %%xmm2,%%xmm1 \n"
- "paddd %%xmm3,%%xmm1 \n"
- "pshufd $0xe,%%xmm1,%%xmm2 \n"
- "paddd %%xmm2,%%xmm1 \n"
- "pshufd $0x1,%%xmm1,%%xmm2 \n"
- "paddd %%xmm2,%%xmm1 \n"
- "paddd %%xmm1,%%xmm0 \n"
- "sub $0x10,%1 \n"
- "jg 1b \n"
- "movd %%xmm0,%3 \n"
- : "+r"(src), // %0
- "+r"(count), // %1
- "+rm"(seed), // %2
- "=g"(hash) // %3
- : "m"(kHash16x33), // %4
- "m"(kHashMul0), // %5
- "m"(kHashMul1), // %6
- "m"(kHashMul2), // %7
- "m"(kHashMul3) // %8
- : "memory", "cc"
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
- ); // NOLINT
- return hash;
-}
-#endif // defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
-
diff --git a/third_party/aom/third_party/libyuv/source/compare_neon.cc b/third_party/aom/third_party/libyuv/source/compare_neon.cc
deleted file mode 100644
index ef006ec41..000000000
--- a/third_party/aom/third_party/libyuv/source/compare_neon.cc
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/basic_types.h"
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \
- !defined(__aarch64__)
-
-uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
- volatile uint32 sse;
- asm volatile (
- "vmov.u8 q8, #0 \n"
- "vmov.u8 q10, #0 \n"
- "vmov.u8 q9, #0 \n"
- "vmov.u8 q11, #0 \n"
-
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld1.8 {q0}, [%0]! \n"
- MEMACCESS(1)
- "vld1.8 {q1}, [%1]! \n"
- "subs %2, %2, #16 \n"
- "vsubl.u8 q2, d0, d2 \n"
- "vsubl.u8 q3, d1, d3 \n"
- "vmlal.s16 q8, d4, d4 \n"
- "vmlal.s16 q9, d6, d6 \n"
- "vmlal.s16 q10, d5, d5 \n"
- "vmlal.s16 q11, d7, d7 \n"
- "bgt 1b \n"
-
- "vadd.u32 q8, q8, q9 \n"
- "vadd.u32 q10, q10, q11 \n"
- "vadd.u32 q11, q8, q10 \n"
- "vpaddl.u32 q1, q11 \n"
- "vadd.u64 d0, d2, d3 \n"
- "vmov.32 %3, d0[0] \n"
- : "+r"(src_a),
- "+r"(src_b),
- "+r"(count),
- "=r"(sse)
- :
- : "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11");
- return sse;
-}
-
-#endif // defined(__ARM_NEON__) && !defined(__aarch64__)
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/third_party/aom/third_party/libyuv/source/compare_neon64.cc b/third_party/aom/third_party/libyuv/source/compare_neon64.cc
deleted file mode 100644
index 6d1e5e1bc..000000000
--- a/third_party/aom/third_party/libyuv/source/compare_neon64.cc
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/basic_types.h"
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
-
-uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
- volatile uint32 sse;
- asm volatile (
- "eor v16.16b, v16.16b, v16.16b \n"
- "eor v18.16b, v18.16b, v18.16b \n"
- "eor v17.16b, v17.16b, v17.16b \n"
- "eor v19.16b, v19.16b, v19.16b \n"
-
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "ld1 {v0.16b}, [%0], #16 \n"
- MEMACCESS(1)
- "ld1 {v1.16b}, [%1], #16 \n"
- "subs %w2, %w2, #16 \n"
- "usubl v2.8h, v0.8b, v1.8b \n"
- "usubl2 v3.8h, v0.16b, v1.16b \n"
- "smlal v16.4s, v2.4h, v2.4h \n"
- "smlal v17.4s, v3.4h, v3.4h \n"
- "smlal2 v18.4s, v2.8h, v2.8h \n"
- "smlal2 v19.4s, v3.8h, v3.8h \n"
- "b.gt 1b \n"
-
- "add v16.4s, v16.4s, v17.4s \n"
- "add v18.4s, v18.4s, v19.4s \n"
- "add v19.4s, v16.4s, v18.4s \n"
- "addv s0, v19.4s \n"
- "fmov %w3, s0 \n"
- : "+r"(src_a),
- "+r"(src_b),
- "+r"(count),
- "=r"(sse)
- :
- : "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19");
- return sse;
-}
-
-#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/third_party/aom/third_party/libyuv/source/compare_win.cc b/third_party/aom/third_party/libyuv/source/compare_win.cc
deleted file mode 100644
index 19806f275..000000000
--- a/third_party/aom/third_party/libyuv/source/compare_win.cc
+++ /dev/null
@@ -1,229 +0,0 @@
-/*
- * Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/basic_types.h"
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// This module is for Visual C x86.
-#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \
- defined(_MSC_VER) && !defined(__clang__)
-
-__declspec(naked)
-uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
- __asm {
- mov eax, [esp + 4] // src_a
- mov edx, [esp + 8] // src_b
- mov ecx, [esp + 12] // count
- pxor xmm0, xmm0
- pxor xmm5, xmm5
-
- wloop:
- movdqu xmm1, [eax]
- lea eax, [eax + 16]
- movdqu xmm2, [edx]
- lea edx, [edx + 16]
- movdqa xmm3, xmm1 // abs trick
- psubusb xmm1, xmm2
- psubusb xmm2, xmm3
- por xmm1, xmm2
- movdqa xmm2, xmm1
- punpcklbw xmm1, xmm5
- punpckhbw xmm2, xmm5
- pmaddwd xmm1, xmm1
- pmaddwd xmm2, xmm2
- paddd xmm0, xmm1
- paddd xmm0, xmm2
- sub ecx, 16
- jg wloop
-
- pshufd xmm1, xmm0, 0xee
- paddd xmm0, xmm1
- pshufd xmm1, xmm0, 0x01
- paddd xmm0, xmm1
- movd eax, xmm0
- ret
- }
-}
-
-// Visual C 2012 required for AVX2.
-#if _MSC_VER >= 1700
-// C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX.
-#pragma warning(disable: 4752)
-__declspec(naked)
-uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {
- __asm {
- mov eax, [esp + 4] // src_a
- mov edx, [esp + 8] // src_b
- mov ecx, [esp + 12] // count
- vpxor ymm0, ymm0, ymm0 // sum
- vpxor ymm5, ymm5, ymm5 // constant 0 for unpck
- sub edx, eax
-
- wloop:
- vmovdqu ymm1, [eax]
- vmovdqu ymm2, [eax + edx]
- lea eax, [eax + 32]
- vpsubusb ymm3, ymm1, ymm2 // abs difference trick
- vpsubusb ymm2, ymm2, ymm1
- vpor ymm1, ymm2, ymm3
- vpunpcklbw ymm2, ymm1, ymm5 // u16. mutates order.
- vpunpckhbw ymm1, ymm1, ymm5
- vpmaddwd ymm2, ymm2, ymm2 // square + hadd to u32.
- vpmaddwd ymm1, ymm1, ymm1
- vpaddd ymm0, ymm0, ymm1
- vpaddd ymm0, ymm0, ymm2
- sub ecx, 32
- jg wloop
-
- vpshufd ymm1, ymm0, 0xee // 3, 2 + 1, 0 both lanes.
- vpaddd ymm0, ymm0, ymm1
- vpshufd ymm1, ymm0, 0x01 // 1 + 0 both lanes.
- vpaddd ymm0, ymm0, ymm1
- vpermq ymm1, ymm0, 0x02 // high + low lane.
- vpaddd ymm0, ymm0, ymm1
- vmovd eax, xmm0
- vzeroupper
- ret
- }
-}
-#endif // _MSC_VER >= 1700
-
-#define HAS_HASHDJB2_SSE41
-static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 }; // 33 ^ 16
-static uvec32 kHashMul0 = {
- 0x0c3525e1, // 33 ^ 15
- 0xa3476dc1, // 33 ^ 14
- 0x3b4039a1, // 33 ^ 13
- 0x4f5f0981, // 33 ^ 12
-};
-static uvec32 kHashMul1 = {
- 0x30f35d61, // 33 ^ 11
- 0x855cb541, // 33 ^ 10
- 0x040a9121, // 33 ^ 9
- 0x747c7101, // 33 ^ 8
-};
-static uvec32 kHashMul2 = {
- 0xec41d4e1, // 33 ^ 7
- 0x4cfa3cc1, // 33 ^ 6
- 0x025528a1, // 33 ^ 5
- 0x00121881, // 33 ^ 4
-};
-static uvec32 kHashMul3 = {
- 0x00008c61, // 33 ^ 3
- 0x00000441, // 33 ^ 2
- 0x00000021, // 33 ^ 1
- 0x00000001, // 33 ^ 0
-};
-
-// 27: 66 0F 38 40 C6 pmulld xmm0,xmm6
-// 44: 66 0F 38 40 DD pmulld xmm3,xmm5
-// 59: 66 0F 38 40 E5 pmulld xmm4,xmm5
-// 72: 66 0F 38 40 D5 pmulld xmm2,xmm5
-// 83: 66 0F 38 40 CD pmulld xmm1,xmm5
-#define pmulld(reg) _asm _emit 0x66 _asm _emit 0x0F _asm _emit 0x38 \
- _asm _emit 0x40 _asm _emit reg
-
-__declspec(naked)
-uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
- __asm {
- mov eax, [esp + 4] // src
- mov ecx, [esp + 8] // count
- movd xmm0, [esp + 12] // seed
-
- pxor xmm7, xmm7 // constant 0 for unpck
- movdqa xmm6, kHash16x33
-
- wloop:
- movdqu xmm1, [eax] // src[0-15]
- lea eax, [eax + 16]
- pmulld(0xc6) // pmulld xmm0,xmm6 hash *= 33 ^ 16
- movdqa xmm5, kHashMul0
- movdqa xmm2, xmm1
- punpcklbw xmm2, xmm7 // src[0-7]
- movdqa xmm3, xmm2
- punpcklwd xmm3, xmm7 // src[0-3]
- pmulld(0xdd) // pmulld xmm3, xmm5
- movdqa xmm5, kHashMul1
- movdqa xmm4, xmm2
- punpckhwd xmm4, xmm7 // src[4-7]
- pmulld(0xe5) // pmulld xmm4, xmm5
- movdqa xmm5, kHashMul2
- punpckhbw xmm1, xmm7 // src[8-15]
- movdqa xmm2, xmm1
- punpcklwd xmm2, xmm7 // src[8-11]
- pmulld(0xd5) // pmulld xmm2, xmm5
- movdqa xmm5, kHashMul3
- punpckhwd xmm1, xmm7 // src[12-15]
- pmulld(0xcd) // pmulld xmm1, xmm5
- paddd xmm3, xmm4 // add 16 results
- paddd xmm1, xmm2
- paddd xmm1, xmm3
-
- pshufd xmm2, xmm1, 0x0e // upper 2 dwords
- paddd xmm1, xmm2
- pshufd xmm2, xmm1, 0x01
- paddd xmm1, xmm2
- paddd xmm0, xmm1
- sub ecx, 16
- jg wloop
-
- movd eax, xmm0 // return hash
- ret
- }
-}
-
-// Visual C 2012 required for AVX2.
-#if _MSC_VER >= 1700
-__declspec(naked)
-uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) {
- __asm {
- mov eax, [esp + 4] // src
- mov ecx, [esp + 8] // count
- movd xmm0, [esp + 12] // seed
- movdqa xmm6, kHash16x33
-
- wloop:
- vpmovzxbd xmm3, dword ptr [eax] // src[0-3]
- pmulld xmm0, xmm6 // hash *= 33 ^ 16
- vpmovzxbd xmm4, dword ptr [eax + 4] // src[4-7]
- pmulld xmm3, kHashMul0
- vpmovzxbd xmm2, dword ptr [eax + 8] // src[8-11]
- pmulld xmm4, kHashMul1
- vpmovzxbd xmm1, dword ptr [eax + 12] // src[12-15]
- pmulld xmm2, kHashMul2
- lea eax, [eax + 16]
- pmulld xmm1, kHashMul3
- paddd xmm3, xmm4 // add 16 results
- paddd xmm1, xmm2
- paddd xmm1, xmm3
- pshufd xmm2, xmm1, 0x0e // upper 2 dwords
- paddd xmm1, xmm2
- pshufd xmm2, xmm1, 0x01
- paddd xmm1, xmm2
- paddd xmm0, xmm1
- sub ecx, 16
- jg wloop
-
- movd eax, xmm0 // return hash
- ret
- }
-}
-#endif // _MSC_VER >= 1700
-#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/third_party/aom/third_party/libyuv/source/convert.cc b/third_party/aom/third_party/libyuv/source/convert.cc
deleted file mode 100644
index 3ad6bd7a4..000000000
--- a/third_party/aom/third_party/libyuv/source/convert.cc
+++ /dev/null
@@ -1,1389 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/convert.h"
-
-#include "libyuv/basic_types.h"
-#include "libyuv/cpu_id.h"
-#include "libyuv/planar_functions.h"
-#include "libyuv/rotate.h"
-#include "libyuv/scale.h" // For ScalePlane()
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-#define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s)
-static __inline int Abs(int v) {
- return v >= 0 ? v : -v;
-}
-
-// Any I4xx To I420 format with mirroring.
-static int I4xxToI420(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int src_y_width, int src_y_height,
- int src_uv_width, int src_uv_height) {
- const int dst_y_width = Abs(src_y_width);
- const int dst_y_height = Abs(src_y_height);
- const int dst_uv_width = SUBSAMPLE(dst_y_width, 1, 1);
- const int dst_uv_height = SUBSAMPLE(dst_y_height, 1, 1);
- if (src_y_width == 0 || src_y_height == 0 ||
- src_uv_width == 0 || src_uv_height == 0) {
- return -1;
- }
- ScalePlane(src_y, src_stride_y, src_y_width, src_y_height,
- dst_y, dst_stride_y, dst_y_width, dst_y_height,
- kFilterBilinear);
- ScalePlane(src_u, src_stride_u, src_uv_width, src_uv_height,
- dst_u, dst_stride_u, dst_uv_width, dst_uv_height,
- kFilterBilinear);
- ScalePlane(src_v, src_stride_v, src_uv_width, src_uv_height,
- dst_v, dst_stride_v, dst_uv_width, dst_uv_height,
- kFilterBilinear);
- return 0;
-}
-
-// Copy I420 with optional flipping
-// TODO(fbarchard): Use Scale plane which supports mirroring, but ensure
-// is does row coalescing.
-LIBYUV_API
-int I420Copy(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- int halfwidth = (width + 1) >> 1;
- int halfheight = (height + 1) >> 1;
- if (!src_y || !src_u || !src_v ||
- !dst_y || !dst_u || !dst_v ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- halfheight = (height + 1) >> 1;
- src_y = src_y + (height - 1) * src_stride_y;
- src_u = src_u + (halfheight - 1) * src_stride_u;
- src_v = src_v + (halfheight - 1) * src_stride_v;
- src_stride_y = -src_stride_y;
- src_stride_u = -src_stride_u;
- src_stride_v = -src_stride_v;
- }
-
- if (dst_y) {
- CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
- }
- // Copy UV planes.
- CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight);
- CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
- return 0;
-}
-
-// 422 chroma is 1/2 width, 1x height
-// 420 chroma is 1/2 width, 1/2 height
-LIBYUV_API
-int I422ToI420(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- const int src_uv_width = SUBSAMPLE(width, 1, 1);
- return I4xxToI420(src_y, src_stride_y,
- src_u, src_stride_u,
- src_v, src_stride_v,
- dst_y, dst_stride_y,
- dst_u, dst_stride_u,
- dst_v, dst_stride_v,
- width, height,
- src_uv_width, height);
-}
-
-// 444 chroma is 1x width, 1x height
-// 420 chroma is 1/2 width, 1/2 height
-LIBYUV_API
-int I444ToI420(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- return I4xxToI420(src_y, src_stride_y,
- src_u, src_stride_u,
- src_v, src_stride_v,
- dst_y, dst_stride_y,
- dst_u, dst_stride_u,
- dst_v, dst_stride_v,
- width, height,
- width, height);
-}
-
-// 411 chroma is 1/4 width, 1x height
-// 420 chroma is 1/2 width, 1/2 height
-LIBYUV_API
-int I411ToI420(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- const int src_uv_width = SUBSAMPLE(width, 3, 2);
- return I4xxToI420(src_y, src_stride_y,
- src_u, src_stride_u,
- src_v, src_stride_v,
- dst_y, dst_stride_y,
- dst_u, dst_stride_u,
- dst_v, dst_stride_v,
- width, height,
- src_uv_width, height);
-}
-
-// I400 is greyscale typically used in MJPG
-LIBYUV_API
-int I400ToI420(const uint8* src_y, int src_stride_y,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- int halfwidth = (width + 1) >> 1;
- int halfheight = (height + 1) >> 1;
- if (!src_y || !dst_y || !dst_u || !dst_v ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- halfheight = (height + 1) >> 1;
- src_y = src_y + (height - 1) * src_stride_y;
- src_stride_y = -src_stride_y;
- }
- CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
- SetPlane(dst_u, dst_stride_u, halfwidth, halfheight, 128);
- SetPlane(dst_v, dst_stride_v, halfwidth, halfheight, 128);
- return 0;
-}
-
-static void CopyPlane2(const uint8* src, int src_stride_0, int src_stride_1,
- uint8* dst, int dst_stride,
- int width, int height) {
- int y;
- void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
-#if defined(HAS_COPYROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
- }
-#endif
-#if defined(HAS_COPYROW_AVX)
- if (TestCpuFlag(kCpuHasAVX)) {
- CopyRow = IS_ALIGNED(width, 64) ? CopyRow_AVX : CopyRow_Any_AVX;
- }
-#endif
-#if defined(HAS_COPYROW_ERMS)
- if (TestCpuFlag(kCpuHasERMS)) {
- CopyRow = CopyRow_ERMS;
- }
-#endif
-#if defined(HAS_COPYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
- }
-#endif
-#if defined(HAS_COPYROW_MIPS)
- if (TestCpuFlag(kCpuHasMIPS)) {
- CopyRow = CopyRow_MIPS;
- }
-#endif
-
- // Copy plane
- for (y = 0; y < height - 1; y += 2) {
- CopyRow(src, dst, width);
- CopyRow(src + src_stride_0, dst + dst_stride, width);
- src += src_stride_0 + src_stride_1;
- dst += dst_stride * 2;
- }
- if (height & 1) {
- CopyRow(src, dst, width);
- }
-}
-
-// Support converting from FOURCC_M420
-// Useful for bandwidth constrained transports like USB 1.0 and 2.0 and for
-// easy conversion to I420.
-// M420 format description:
-// M420 is row biplanar 420: 2 rows of Y and 1 row of UV.
-// Chroma is half width / half height. (420)
-// src_stride_m420 is row planar. Normally this will be the width in pixels.
-// The UV plane is half width, but 2 values, so src_stride_m420 applies to
-// this as well as the two Y planes.
-static int X420ToI420(const uint8* src_y,
- int src_stride_y0, int src_stride_y1,
- const uint8* src_uv, int src_stride_uv,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- int y;
- int halfwidth = (width + 1) >> 1;
- int halfheight = (height + 1) >> 1;
- void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) =
- SplitUVRow_C;
- if (!src_y || !src_uv ||
- !dst_y || !dst_u || !dst_v ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- halfheight = (height + 1) >> 1;
- dst_y = dst_y + (height - 1) * dst_stride_y;
- dst_u = dst_u + (halfheight - 1) * dst_stride_u;
- dst_v = dst_v + (halfheight - 1) * dst_stride_v;
- dst_stride_y = -dst_stride_y;
- dst_stride_u = -dst_stride_u;
- dst_stride_v = -dst_stride_v;
- }
- // Coalesce rows.
- if (src_stride_y0 == width &&
- src_stride_y1 == width &&
- dst_stride_y == width) {
- width *= height;
- height = 1;
- src_stride_y0 = src_stride_y1 = dst_stride_y = 0;
- }
- // Coalesce rows.
- if (src_stride_uv == halfwidth * 2 &&
- dst_stride_u == halfwidth &&
- dst_stride_v == halfwidth) {
- halfwidth *= halfheight;
- halfheight = 1;
- src_stride_uv = dst_stride_u = dst_stride_v = 0;
- }
-#if defined(HAS_SPLITUVROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- SplitUVRow = SplitUVRow_Any_SSE2;
- if (IS_ALIGNED(halfwidth, 16)) {
- SplitUVRow = SplitUVRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_SPLITUVROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- SplitUVRow = SplitUVRow_Any_AVX2;
- if (IS_ALIGNED(halfwidth, 32)) {
- SplitUVRow = SplitUVRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_SPLITUVROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- SplitUVRow = SplitUVRow_Any_NEON;
- if (IS_ALIGNED(halfwidth, 16)) {
- SplitUVRow = SplitUVRow_NEON;
- }
- }
-#endif
-#if defined(HAS_SPLITUVROW_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
- IS_ALIGNED(src_uv, 4) && IS_ALIGNED(src_stride_uv, 4) &&
- IS_ALIGNED(dst_u, 4) && IS_ALIGNED(dst_stride_u, 4) &&
- IS_ALIGNED(dst_v, 4) && IS_ALIGNED(dst_stride_v, 4)) {
- SplitUVRow = SplitUVRow_Any_MIPS_DSPR2;
- if (IS_ALIGNED(halfwidth, 16)) {
- SplitUVRow = SplitUVRow_MIPS_DSPR2;
- }
- }
-#endif
-
- if (dst_y) {
- if (src_stride_y0 == src_stride_y1) {
- CopyPlane(src_y, src_stride_y0, dst_y, dst_stride_y, width, height);
- } else {
- CopyPlane2(src_y, src_stride_y0, src_stride_y1, dst_y, dst_stride_y,
- width, height);
- }
- }
-
- for (y = 0; y < halfheight; ++y) {
- // Copy a row of UV.
- SplitUVRow(src_uv, dst_u, dst_v, halfwidth);
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
- src_uv += src_stride_uv;
- }
- return 0;
-}
-
-// Convert NV12 to I420.
-LIBYUV_API
-int NV12ToI420(const uint8* src_y, int src_stride_y,
- const uint8* src_uv, int src_stride_uv,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- return X420ToI420(src_y, src_stride_y, src_stride_y,
- src_uv, src_stride_uv,
- dst_y, dst_stride_y,
- dst_u, dst_stride_u,
- dst_v, dst_stride_v,
- width, height);
-}
-
-// Convert NV21 to I420. Same as NV12 but u and v pointers swapped.
-LIBYUV_API
-int NV21ToI420(const uint8* src_y, int src_stride_y,
- const uint8* src_vu, int src_stride_vu,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- return X420ToI420(src_y, src_stride_y, src_stride_y,
- src_vu, src_stride_vu,
- dst_y, dst_stride_y,
- dst_v, dst_stride_v,
- dst_u, dst_stride_u,
- width, height);
-}
-
-// Convert M420 to I420.
-LIBYUV_API
-int M420ToI420(const uint8* src_m420, int src_stride_m420,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- return X420ToI420(src_m420, src_stride_m420, src_stride_m420 * 2,
- src_m420 + src_stride_m420 * 2, src_stride_m420 * 3,
- dst_y, dst_stride_y,
- dst_u, dst_stride_u,
- dst_v, dst_stride_v,
- width, height);
-}
-
-// Convert YUY2 to I420.
-LIBYUV_API
-int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- int y;
- void (*YUY2ToUVRow)(const uint8* src_yuy2, int src_stride_yuy2,
- uint8* dst_u, uint8* dst_v, int pix) = YUY2ToUVRow_C;
- void (*YUY2ToYRow)(const uint8* src_yuy2,
- uint8* dst_y, int pix) = YUY2ToYRow_C;
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
- src_stride_yuy2 = -src_stride_yuy2;
- }
-#if defined(HAS_YUY2TOYROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- YUY2ToUVRow = YUY2ToUVRow_Any_SSE2;
- YUY2ToYRow = YUY2ToYRow_Any_SSE2;
- if (IS_ALIGNED(width, 16)) {
- YUY2ToUVRow = YUY2ToUVRow_SSE2;
- YUY2ToYRow = YUY2ToYRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_YUY2TOYROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- YUY2ToUVRow = YUY2ToUVRow_Any_AVX2;
- YUY2ToYRow = YUY2ToYRow_Any_AVX2;
- if (IS_ALIGNED(width, 32)) {
- YUY2ToUVRow = YUY2ToUVRow_AVX2;
- YUY2ToYRow = YUY2ToYRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_YUY2TOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- YUY2ToYRow = YUY2ToYRow_Any_NEON;
- YUY2ToUVRow = YUY2ToUVRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- YUY2ToYRow = YUY2ToYRow_NEON;
- YUY2ToUVRow = YUY2ToUVRow_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height - 1; y += 2) {
- YUY2ToUVRow(src_yuy2, src_stride_yuy2, dst_u, dst_v, width);
- YUY2ToYRow(src_yuy2, dst_y, width);
- YUY2ToYRow(src_yuy2 + src_stride_yuy2, dst_y + dst_stride_y, width);
- src_yuy2 += src_stride_yuy2 * 2;
- dst_y += dst_stride_y * 2;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
- }
- if (height & 1) {
- YUY2ToUVRow(src_yuy2, 0, dst_u, dst_v, width);
- YUY2ToYRow(src_yuy2, dst_y, width);
- }
- return 0;
-}
-
-// Convert UYVY to I420.
-LIBYUV_API
-int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- int y;
- void (*UYVYToUVRow)(const uint8* src_uyvy, int src_stride_uyvy,
- uint8* dst_u, uint8* dst_v, int pix) = UYVYToUVRow_C;
- void (*UYVYToYRow)(const uint8* src_uyvy,
- uint8* dst_y, int pix) = UYVYToYRow_C;
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
- src_stride_uyvy = -src_stride_uyvy;
- }
-#if defined(HAS_UYVYTOYROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- UYVYToUVRow = UYVYToUVRow_Any_SSE2;
- UYVYToYRow = UYVYToYRow_Any_SSE2;
- if (IS_ALIGNED(width, 16)) {
- UYVYToUVRow = UYVYToUVRow_SSE2;
- UYVYToYRow = UYVYToYRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_UYVYTOYROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- UYVYToUVRow = UYVYToUVRow_Any_AVX2;
- UYVYToYRow = UYVYToYRow_Any_AVX2;
- if (IS_ALIGNED(width, 32)) {
- UYVYToUVRow = UYVYToUVRow_AVX2;
- UYVYToYRow = UYVYToYRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_UYVYTOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- UYVYToYRow = UYVYToYRow_Any_NEON;
- UYVYToUVRow = UYVYToUVRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- UYVYToYRow = UYVYToYRow_NEON;
- UYVYToUVRow = UYVYToUVRow_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height - 1; y += 2) {
- UYVYToUVRow(src_uyvy, src_stride_uyvy, dst_u, dst_v, width);
- UYVYToYRow(src_uyvy, dst_y, width);
- UYVYToYRow(src_uyvy + src_stride_uyvy, dst_y + dst_stride_y, width);
- src_uyvy += src_stride_uyvy * 2;
- dst_y += dst_stride_y * 2;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
- }
- if (height & 1) {
- UYVYToUVRow(src_uyvy, 0, dst_u, dst_v, width);
- UYVYToYRow(src_uyvy, dst_y, width);
- }
- return 0;
-}
-
-// Convert ARGB to I420.
-LIBYUV_API
-int ARGBToI420(const uint8* src_argb, int src_stride_argb,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- int y;
- void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
- ARGBToYRow_C;
- if (!src_argb ||
- !dst_y || !dst_u || !dst_v ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
-#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
- ARGBToYRow = ARGBToYRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_SSSE3;
- ARGBToYRow = ARGBToYRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- ARGBToUVRow = ARGBToUVRow_Any_AVX2;
- ARGBToYRow = ARGBToYRow_Any_AVX2;
- if (IS_ALIGNED(width, 32)) {
- ARGBToUVRow = ARGBToUVRow_AVX2;
- ARGBToYRow = ARGBToYRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_ARGBTOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBToYRow = ARGBToYRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToYRow = ARGBToYRow_NEON;
- }
- }
-#endif
-#if defined(HAS_ARGBTOUVROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBToUVRow = ARGBToUVRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height - 1; y += 2) {
- ARGBToUVRow(src_argb, src_stride_argb, dst_u, dst_v, width);
- ARGBToYRow(src_argb, dst_y, width);
- ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width);
- src_argb += src_stride_argb * 2;
- dst_y += dst_stride_y * 2;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
- }
- if (height & 1) {
- ARGBToUVRow(src_argb, 0, dst_u, dst_v, width);
- ARGBToYRow(src_argb, dst_y, width);
- }
- return 0;
-}
-
-// Convert BGRA to I420.
-LIBYUV_API
-int BGRAToI420(const uint8* src_bgra, int src_stride_bgra,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- int y;
- void (*BGRAToUVRow)(const uint8* src_bgra0, int src_stride_bgra,
- uint8* dst_u, uint8* dst_v, int width) = BGRAToUVRow_C;
- void (*BGRAToYRow)(const uint8* src_bgra, uint8* dst_y, int pix) =
- BGRAToYRow_C;
- if (!src_bgra ||
- !dst_y || !dst_u || !dst_v ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_bgra = src_bgra + (height - 1) * src_stride_bgra;
- src_stride_bgra = -src_stride_bgra;
- }
-#if defined(HAS_BGRATOYROW_SSSE3) && defined(HAS_BGRATOUVROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- BGRAToUVRow = BGRAToUVRow_Any_SSSE3;
- BGRAToYRow = BGRAToYRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- BGRAToUVRow = BGRAToUVRow_SSSE3;
- BGRAToYRow = BGRAToYRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_BGRATOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- BGRAToYRow = BGRAToYRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- BGRAToYRow = BGRAToYRow_NEON;
- }
- }
-#endif
-#if defined(HAS_BGRATOUVROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- BGRAToUVRow = BGRAToUVRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- BGRAToUVRow = BGRAToUVRow_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height - 1; y += 2) {
- BGRAToUVRow(src_bgra, src_stride_bgra, dst_u, dst_v, width);
- BGRAToYRow(src_bgra, dst_y, width);
- BGRAToYRow(src_bgra + src_stride_bgra, dst_y + dst_stride_y, width);
- src_bgra += src_stride_bgra * 2;
- dst_y += dst_stride_y * 2;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
- }
- if (height & 1) {
- BGRAToUVRow(src_bgra, 0, dst_u, dst_v, width);
- BGRAToYRow(src_bgra, dst_y, width);
- }
- return 0;
-}
-
-// Convert ABGR to I420.
-LIBYUV_API
-int ABGRToI420(const uint8* src_abgr, int src_stride_abgr,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- int y;
- void (*ABGRToUVRow)(const uint8* src_abgr0, int src_stride_abgr,
- uint8* dst_u, uint8* dst_v, int width) = ABGRToUVRow_C;
- void (*ABGRToYRow)(const uint8* src_abgr, uint8* dst_y, int pix) =
- ABGRToYRow_C;
- if (!src_abgr ||
- !dst_y || !dst_u || !dst_v ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_abgr = src_abgr + (height - 1) * src_stride_abgr;
- src_stride_abgr = -src_stride_abgr;
- }
-#if defined(HAS_ABGRTOYROW_SSSE3) && defined(HAS_ABGRTOUVROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- ABGRToUVRow = ABGRToUVRow_Any_SSSE3;
- ABGRToYRow = ABGRToYRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ABGRToUVRow = ABGRToUVRow_SSSE3;
- ABGRToYRow = ABGRToYRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_ABGRTOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ABGRToYRow = ABGRToYRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ABGRToYRow = ABGRToYRow_NEON;
- }
- }
-#endif
-#if defined(HAS_ABGRTOUVROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ABGRToUVRow = ABGRToUVRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- ABGRToUVRow = ABGRToUVRow_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height - 1; y += 2) {
- ABGRToUVRow(src_abgr, src_stride_abgr, dst_u, dst_v, width);
- ABGRToYRow(src_abgr, dst_y, width);
- ABGRToYRow(src_abgr + src_stride_abgr, dst_y + dst_stride_y, width);
- src_abgr += src_stride_abgr * 2;
- dst_y += dst_stride_y * 2;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
- }
- if (height & 1) {
- ABGRToUVRow(src_abgr, 0, dst_u, dst_v, width);
- ABGRToYRow(src_abgr, dst_y, width);
- }
- return 0;
-}
-
-// Convert RGBA to I420.
-LIBYUV_API
-int RGBAToI420(const uint8* src_rgba, int src_stride_rgba,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- int y;
- void (*RGBAToUVRow)(const uint8* src_rgba0, int src_stride_rgba,
- uint8* dst_u, uint8* dst_v, int width) = RGBAToUVRow_C;
- void (*RGBAToYRow)(const uint8* src_rgba, uint8* dst_y, int pix) =
- RGBAToYRow_C;
- if (!src_rgba ||
- !dst_y || !dst_u || !dst_v ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_rgba = src_rgba + (height - 1) * src_stride_rgba;
- src_stride_rgba = -src_stride_rgba;
- }
-#if defined(HAS_RGBATOYROW_SSSE3) && defined(HAS_RGBATOUVROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- RGBAToUVRow = RGBAToUVRow_Any_SSSE3;
- RGBAToYRow = RGBAToYRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- RGBAToUVRow = RGBAToUVRow_SSSE3;
- RGBAToYRow = RGBAToYRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_RGBATOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- RGBAToYRow = RGBAToYRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- RGBAToYRow = RGBAToYRow_NEON;
- }
- }
-#endif
-#if defined(HAS_RGBATOUVROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- RGBAToUVRow = RGBAToUVRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- RGBAToUVRow = RGBAToUVRow_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height - 1; y += 2) {
- RGBAToUVRow(src_rgba, src_stride_rgba, dst_u, dst_v, width);
- RGBAToYRow(src_rgba, dst_y, width);
- RGBAToYRow(src_rgba + src_stride_rgba, dst_y + dst_stride_y, width);
- src_rgba += src_stride_rgba * 2;
- dst_y += dst_stride_y * 2;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
- }
- if (height & 1) {
- RGBAToUVRow(src_rgba, 0, dst_u, dst_v, width);
- RGBAToYRow(src_rgba, dst_y, width);
- }
- return 0;
-}
-
-// Convert RGB24 to I420.
-LIBYUV_API
-int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- int y;
-#if defined(HAS_RGB24TOYROW_NEON)
- void (*RGB24ToUVRow)(const uint8* src_rgb24, int src_stride_rgb24,
- uint8* dst_u, uint8* dst_v, int width) = RGB24ToUVRow_C;
- void (*RGB24ToYRow)(const uint8* src_rgb24, uint8* dst_y, int pix) =
- RGB24ToYRow_C;
-#else
- void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
- RGB24ToARGBRow_C;
- void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
- ARGBToYRow_C;
-#endif
- if (!src_rgb24 || !dst_y || !dst_u || !dst_v ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24;
- src_stride_rgb24 = -src_stride_rgb24;
- }
-
-// Neon version does direct RGB24 to YUV.
-#if defined(HAS_RGB24TOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- RGB24ToUVRow = RGB24ToUVRow_Any_NEON;
- RGB24ToYRow = RGB24ToYRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- RGB24ToYRow = RGB24ToYRow_NEON;
- if (IS_ALIGNED(width, 16)) {
- RGB24ToUVRow = RGB24ToUVRow_NEON;
- }
- }
- }
-// Other platforms do intermediate conversion from RGB24 to ARGB.
-#else
-#if defined(HAS_RGB24TOARGBROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- RGB24ToARGBRow = RGB24ToARGBRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
- ARGBToYRow = ARGBToYRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_SSSE3;
- ARGBToYRow = ARGBToYRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- ARGBToUVRow = ARGBToUVRow_Any_AVX2;
- ARGBToYRow = ARGBToYRow_Any_AVX2;
- if (IS_ALIGNED(width, 32)) {
- ARGBToUVRow = ARGBToUVRow_AVX2;
- ARGBToYRow = ARGBToYRow_AVX2;
- }
- }
-#endif
- {
- // Allocate 2 rows of ARGB.
- const int kRowSize = (width * 4 + 31) & ~31;
- align_buffer_64(row, kRowSize * 2);
-#endif
-
- for (y = 0; y < height - 1; y += 2) {
-#if defined(HAS_RGB24TOYROW_NEON)
- RGB24ToUVRow(src_rgb24, src_stride_rgb24, dst_u, dst_v, width);
- RGB24ToYRow(src_rgb24, dst_y, width);
- RGB24ToYRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width);
-#else
- RGB24ToARGBRow(src_rgb24, row, width);
- RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + kRowSize, width);
- ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
- ARGBToYRow(row, dst_y, width);
- ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
-#endif
- src_rgb24 += src_stride_rgb24 * 2;
- dst_y += dst_stride_y * 2;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
- }
- if (height & 1) {
-#if defined(HAS_RGB24TOYROW_NEON)
- RGB24ToUVRow(src_rgb24, 0, dst_u, dst_v, width);
- RGB24ToYRow(src_rgb24, dst_y, width);
-#else
- RGB24ToARGBRow(src_rgb24, row, width);
- ARGBToUVRow(row, 0, dst_u, dst_v, width);
- ARGBToYRow(row, dst_y, width);
-#endif
- }
-#if !defined(HAS_RGB24TOYROW_NEON)
- free_aligned_buffer_64(row);
- }
-#endif
- return 0;
-}
-
-// Convert RAW to I420.
-LIBYUV_API
-int RAWToI420(const uint8* src_raw, int src_stride_raw,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- int y;
-#if defined(HAS_RAWTOYROW_NEON)
- void (*RAWToUVRow)(const uint8* src_raw, int src_stride_raw,
- uint8* dst_u, uint8* dst_v, int width) = RAWToUVRow_C;
- void (*RAWToYRow)(const uint8* src_raw, uint8* dst_y, int pix) =
- RAWToYRow_C;
-#else
- void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
- RAWToARGBRow_C;
- void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
- ARGBToYRow_C;
-#endif
- if (!src_raw || !dst_y || !dst_u || !dst_v ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_raw = src_raw + (height - 1) * src_stride_raw;
- src_stride_raw = -src_stride_raw;
- }
-
-// Neon version does direct RAW to YUV.
-#if defined(HAS_RAWTOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- RAWToUVRow = RAWToUVRow_Any_NEON;
- RAWToYRow = RAWToYRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- RAWToYRow = RAWToYRow_NEON;
- if (IS_ALIGNED(width, 16)) {
- RAWToUVRow = RAWToUVRow_NEON;
- }
- }
- }
-// Other platforms do intermediate conversion from RAW to ARGB.
-#else
-#if defined(HAS_RAWTOARGBROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- RAWToARGBRow = RAWToARGBRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- RAWToARGBRow = RAWToARGBRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
- ARGBToYRow = ARGBToYRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_SSSE3;
- ARGBToYRow = ARGBToYRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- ARGBToUVRow = ARGBToUVRow_Any_AVX2;
- ARGBToYRow = ARGBToYRow_Any_AVX2;
- if (IS_ALIGNED(width, 32)) {
- ARGBToUVRow = ARGBToUVRow_AVX2;
- ARGBToYRow = ARGBToYRow_AVX2;
- }
- }
-#endif
- {
- // Allocate 2 rows of ARGB.
- const int kRowSize = (width * 4 + 31) & ~31;
- align_buffer_64(row, kRowSize * 2);
-#endif
-
- for (y = 0; y < height - 1; y += 2) {
-#if defined(HAS_RAWTOYROW_NEON)
- RAWToUVRow(src_raw, src_stride_raw, dst_u, dst_v, width);
- RAWToYRow(src_raw, dst_y, width);
- RAWToYRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width);
-#else
- RAWToARGBRow(src_raw, row, width);
- RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width);
- ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
- ARGBToYRow(row, dst_y, width);
- ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
-#endif
- src_raw += src_stride_raw * 2;
- dst_y += dst_stride_y * 2;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
- }
- if (height & 1) {
-#if defined(HAS_RAWTOYROW_NEON)
- RAWToUVRow(src_raw, 0, dst_u, dst_v, width);
- RAWToYRow(src_raw, dst_y, width);
-#else
- RAWToARGBRow(src_raw, row, width);
- ARGBToUVRow(row, 0, dst_u, dst_v, width);
- ARGBToYRow(row, dst_y, width);
-#endif
- }
-#if !defined(HAS_RAWTOYROW_NEON)
- free_aligned_buffer_64(row);
- }
-#endif
- return 0;
-}
-
-// Convert RGB565 to I420.
-LIBYUV_API
-int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- int y;
-#if defined(HAS_RGB565TOYROW_NEON)
- void (*RGB565ToUVRow)(const uint8* src_rgb565, int src_stride_rgb565,
- uint8* dst_u, uint8* dst_v, int width) = RGB565ToUVRow_C;
- void (*RGB565ToYRow)(const uint8* src_rgb565, uint8* dst_y, int pix) =
- RGB565ToYRow_C;
-#else
- void (*RGB565ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
- RGB565ToARGBRow_C;
- void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
- ARGBToYRow_C;
-#endif
- if (!src_rgb565 || !dst_y || !dst_u || !dst_v ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_rgb565 = src_rgb565 + (height - 1) * src_stride_rgb565;
- src_stride_rgb565 = -src_stride_rgb565;
- }
-
-// Neon version does direct RGB565 to YUV.
-#if defined(HAS_RGB565TOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- RGB565ToUVRow = RGB565ToUVRow_Any_NEON;
- RGB565ToYRow = RGB565ToYRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- RGB565ToYRow = RGB565ToYRow_NEON;
- if (IS_ALIGNED(width, 16)) {
- RGB565ToUVRow = RGB565ToUVRow_NEON;
- }
- }
- }
-// Other platforms do intermediate conversion from RGB565 to ARGB.
-#else
-#if defined(HAS_RGB565TOARGBROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- RGB565ToARGBRow = RGB565ToARGBRow_Any_SSE2;
- if (IS_ALIGNED(width, 8)) {
- RGB565ToARGBRow = RGB565ToARGBRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_RGB565TOARGBROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- RGB565ToARGBRow = RGB565ToARGBRow_Any_AVX2;
- if (IS_ALIGNED(width, 16)) {
- RGB565ToARGBRow = RGB565ToARGBRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
- ARGBToYRow = ARGBToYRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_SSSE3;
- ARGBToYRow = ARGBToYRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- ARGBToUVRow = ARGBToUVRow_Any_AVX2;
- ARGBToYRow = ARGBToYRow_Any_AVX2;
- if (IS_ALIGNED(width, 32)) {
- ARGBToUVRow = ARGBToUVRow_AVX2;
- ARGBToYRow = ARGBToYRow_AVX2;
- }
- }
-#endif
- {
- // Allocate 2 rows of ARGB.
- const int kRowSize = (width * 4 + 31) & ~31;
- align_buffer_64(row, kRowSize * 2);
-#endif
-
- for (y = 0; y < height - 1; y += 2) {
-#if defined(HAS_RGB565TOYROW_NEON)
- RGB565ToUVRow(src_rgb565, src_stride_rgb565, dst_u, dst_v, width);
- RGB565ToYRow(src_rgb565, dst_y, width);
- RGB565ToYRow(src_rgb565 + src_stride_rgb565, dst_y + dst_stride_y, width);
-#else
- RGB565ToARGBRow(src_rgb565, row, width);
- RGB565ToARGBRow(src_rgb565 + src_stride_rgb565, row + kRowSize, width);
- ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
- ARGBToYRow(row, dst_y, width);
- ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
-#endif
- src_rgb565 += src_stride_rgb565 * 2;
- dst_y += dst_stride_y * 2;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
- }
- if (height & 1) {
-#if defined(HAS_RGB565TOYROW_NEON)
- RGB565ToUVRow(src_rgb565, 0, dst_u, dst_v, width);
- RGB565ToYRow(src_rgb565, dst_y, width);
-#else
- RGB565ToARGBRow(src_rgb565, row, width);
- ARGBToUVRow(row, 0, dst_u, dst_v, width);
- ARGBToYRow(row, dst_y, width);
-#endif
- }
-#if !defined(HAS_RGB565TOYROW_NEON)
- free_aligned_buffer_64(row);
- }
-#endif
- return 0;
-}
-
-// Convert ARGB1555 to I420.
-LIBYUV_API
-int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- int y;
-#if defined(HAS_ARGB1555TOYROW_NEON)
- void (*ARGB1555ToUVRow)(const uint8* src_argb1555, int src_stride_argb1555,
- uint8* dst_u, uint8* dst_v, int width) = ARGB1555ToUVRow_C;
- void (*ARGB1555ToYRow)(const uint8* src_argb1555, uint8* dst_y, int pix) =
- ARGB1555ToYRow_C;
-#else
- void (*ARGB1555ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
- ARGB1555ToARGBRow_C;
- void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
- ARGBToYRow_C;
-#endif
- if (!src_argb1555 || !dst_y || !dst_u || !dst_v ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_argb1555 = src_argb1555 + (height - 1) * src_stride_argb1555;
- src_stride_argb1555 = -src_stride_argb1555;
- }
-
-// Neon version does direct ARGB1555 to YUV.
-#if defined(HAS_ARGB1555TOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGB1555ToUVRow = ARGB1555ToUVRow_Any_NEON;
- ARGB1555ToYRow = ARGB1555ToYRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGB1555ToYRow = ARGB1555ToYRow_NEON;
- if (IS_ALIGNED(width, 16)) {
- ARGB1555ToUVRow = ARGB1555ToUVRow_NEON;
- }
- }
- }
-// Other platforms do intermediate conversion from ARGB1555 to ARGB.
-#else
-#if defined(HAS_ARGB1555TOARGBROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_SSE2;
- if (IS_ALIGNED(width, 8)) {
- ARGB1555ToARGBRow = ARGB1555ToARGBRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_ARGB1555TOARGBROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_AVX2;
- if (IS_ALIGNED(width, 16)) {
- ARGB1555ToARGBRow = ARGB1555ToARGBRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
- ARGBToYRow = ARGBToYRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_SSSE3;
- ARGBToYRow = ARGBToYRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- ARGBToUVRow = ARGBToUVRow_Any_AVX2;
- ARGBToYRow = ARGBToYRow_Any_AVX2;
- if (IS_ALIGNED(width, 32)) {
- ARGBToUVRow = ARGBToUVRow_AVX2;
- ARGBToYRow = ARGBToYRow_AVX2;
- }
- }
-#endif
- {
- // Allocate 2 rows of ARGB.
- const int kRowSize = (width * 4 + 31) & ~31;
- align_buffer_64(row, kRowSize * 2);
-#endif
-
- for (y = 0; y < height - 1; y += 2) {
-#if defined(HAS_ARGB1555TOYROW_NEON)
- ARGB1555ToUVRow(src_argb1555, src_stride_argb1555, dst_u, dst_v, width);
- ARGB1555ToYRow(src_argb1555, dst_y, width);
- ARGB1555ToYRow(src_argb1555 + src_stride_argb1555, dst_y + dst_stride_y,
- width);
-#else
- ARGB1555ToARGBRow(src_argb1555, row, width);
- ARGB1555ToARGBRow(src_argb1555 + src_stride_argb1555, row + kRowSize,
- width);
- ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
- ARGBToYRow(row, dst_y, width);
- ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
-#endif
- src_argb1555 += src_stride_argb1555 * 2;
- dst_y += dst_stride_y * 2;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
- }
- if (height & 1) {
-#if defined(HAS_ARGB1555TOYROW_NEON)
- ARGB1555ToUVRow(src_argb1555, 0, dst_u, dst_v, width);
- ARGB1555ToYRow(src_argb1555, dst_y, width);
-#else
- ARGB1555ToARGBRow(src_argb1555, row, width);
- ARGBToUVRow(row, 0, dst_u, dst_v, width);
- ARGBToYRow(row, dst_y, width);
-#endif
- }
-#if !defined(HAS_ARGB1555TOYROW_NEON)
- free_aligned_buffer_64(row);
- }
-#endif
- return 0;
-}
-
-// Convert ARGB4444 to I420.
-LIBYUV_API
-int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- int y;
-#if defined(HAS_ARGB4444TOYROW_NEON)
- void (*ARGB4444ToUVRow)(const uint8* src_argb4444, int src_stride_argb4444,
- uint8* dst_u, uint8* dst_v, int width) = ARGB4444ToUVRow_C;
- void (*ARGB4444ToYRow)(const uint8* src_argb4444, uint8* dst_y, int pix) =
- ARGB4444ToYRow_C;
-#else
- void (*ARGB4444ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
- ARGB4444ToARGBRow_C;
- void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
- ARGBToYRow_C;
-#endif
- if (!src_argb4444 || !dst_y || !dst_u || !dst_v ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_argb4444 = src_argb4444 + (height - 1) * src_stride_argb4444;
- src_stride_argb4444 = -src_stride_argb4444;
- }
-
-// Neon version does direct ARGB4444 to YUV.
-#if defined(HAS_ARGB4444TOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGB4444ToUVRow = ARGB4444ToUVRow_Any_NEON;
- ARGB4444ToYRow = ARGB4444ToYRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGB4444ToYRow = ARGB4444ToYRow_NEON;
- if (IS_ALIGNED(width, 16)) {
- ARGB4444ToUVRow = ARGB4444ToUVRow_NEON;
- }
- }
- }
-// Other platforms do intermediate conversion from ARGB4444 to ARGB.
-#else
-#if defined(HAS_ARGB4444TOARGBROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_SSE2;
- if (IS_ALIGNED(width, 8)) {
- ARGB4444ToARGBRow = ARGB4444ToARGBRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_ARGB4444TOARGBROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_AVX2;
- if (IS_ALIGNED(width, 16)) {
- ARGB4444ToARGBRow = ARGB4444ToARGBRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
- ARGBToYRow = ARGBToYRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_SSSE3;
- ARGBToYRow = ARGBToYRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- ARGBToUVRow = ARGBToUVRow_Any_AVX2;
- ARGBToYRow = ARGBToYRow_Any_AVX2;
- if (IS_ALIGNED(width, 32)) {
- ARGBToUVRow = ARGBToUVRow_AVX2;
- ARGBToYRow = ARGBToYRow_AVX2;
- }
- }
-#endif
- {
- // Allocate 2 rows of ARGB.
- const int kRowSize = (width * 4 + 31) & ~31;
- align_buffer_64(row, kRowSize * 2);
-#endif
-
- for (y = 0; y < height - 1; y += 2) {
-#if defined(HAS_ARGB4444TOYROW_NEON)
- ARGB4444ToUVRow(src_argb4444, src_stride_argb4444, dst_u, dst_v, width);
- ARGB4444ToYRow(src_argb4444, dst_y, width);
- ARGB4444ToYRow(src_argb4444 + src_stride_argb4444, dst_y + dst_stride_y,
- width);
-#else
- ARGB4444ToARGBRow(src_argb4444, row, width);
- ARGB4444ToARGBRow(src_argb4444 + src_stride_argb4444, row + kRowSize,
- width);
- ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
- ARGBToYRow(row, dst_y, width);
- ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
-#endif
- src_argb4444 += src_stride_argb4444 * 2;
- dst_y += dst_stride_y * 2;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
- }
- if (height & 1) {
-#if defined(HAS_ARGB4444TOYROW_NEON)
- ARGB4444ToUVRow(src_argb4444, 0, dst_u, dst_v, width);
- ARGB4444ToYRow(src_argb4444, dst_y, width);
-#else
- ARGB4444ToARGBRow(src_argb4444, row, width);
- ARGBToUVRow(row, 0, dst_u, dst_v, width);
- ARGBToYRow(row, dst_y, width);
-#endif
- }
-#if !defined(HAS_ARGB4444TOYROW_NEON)
- free_aligned_buffer_64(row);
- }
-#endif
- return 0;
-}
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/third_party/aom/third_party/libyuv/source/convert_argb.cc b/third_party/aom/third_party/libyuv/source/convert_argb.cc
deleted file mode 100644
index 44756bc41..000000000
--- a/third_party/aom/third_party/libyuv/source/convert_argb.cc
+++ /dev/null
@@ -1,1155 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/convert_argb.h"
-
-#include "libyuv/cpu_id.h"
-#ifdef HAVE_JPEG
-#include "libyuv/mjpeg_decoder.h"
-#endif
-#include "libyuv/rotate_argb.h"
-#include "libyuv/row.h"
-#include "libyuv/video_common.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// Copy ARGB with optional flipping
-LIBYUV_API
-int ARGBCopy(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- if (!src_argb || !dst_argb ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
-
- CopyPlane(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
- width * 4, height);
- return 0;
-}
-
-// Convert I444 to ARGB.
-LIBYUV_API
-int I444ToARGB(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- int y;
- void (*I444ToARGBRow)(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) = I444ToARGBRow_C;
- if (!src_y || !src_u || !src_v ||
- !dst_argb ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_argb = dst_argb + (height - 1) * dst_stride_argb;
- dst_stride_argb = -dst_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_y == width &&
- src_stride_u == width &&
- src_stride_v == width &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
- }
-#if defined(HAS_I444TOARGBROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- I444ToARGBRow = I444ToARGBRow_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- I444ToARGBRow = I444ToARGBRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_I444TOARGBROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- I444ToARGBRow = I444ToARGBRow_Any_AVX2;
- if (IS_ALIGNED(width, 16)) {
- I444ToARGBRow = I444ToARGBRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_I444TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- I444ToARGBRow = I444ToARGBRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- I444ToARGBRow = I444ToARGBRow_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- I444ToARGBRow(src_y, src_u, src_v, dst_argb, width);
- dst_argb += dst_stride_argb;
- src_y += src_stride_y;
- src_u += src_stride_u;
- src_v += src_stride_v;
- }
- return 0;
-}
-
-// Convert I422 to ARGB.
-LIBYUV_API
-int I422ToARGB(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- int y;
- void (*I422ToARGBRow)(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) = I422ToARGBRow_C;
- if (!src_y || !src_u || !src_v ||
- !dst_argb ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_argb = dst_argb + (height - 1) * dst_stride_argb;
- dst_stride_argb = -dst_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_y == width &&
- src_stride_u * 2 == width &&
- src_stride_v * 2 == width &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
- }
-#if defined(HAS_I422TOARGBROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- I422ToARGBRow = I422ToARGBRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_I422TOARGBROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- I422ToARGBRow = I422ToARGBRow_Any_AVX2;
- if (IS_ALIGNED(width, 16)) {
- I422ToARGBRow = I422ToARGBRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_I422TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- I422ToARGBRow = I422ToARGBRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- I422ToARGBRow = I422ToARGBRow_NEON;
- }
- }
-#endif
-#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
- IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
- IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
- IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
- IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
- I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- I422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
- dst_argb += dst_stride_argb;
- src_y += src_stride_y;
- src_u += src_stride_u;
- src_v += src_stride_v;
- }
- return 0;
-}
-
-// Convert I411 to ARGB.
-LIBYUV_API
-int I411ToARGB(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- int y;
- void (*I411ToARGBRow)(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) = I411ToARGBRow_C;
- if (!src_y || !src_u || !src_v ||
- !dst_argb ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_argb = dst_argb + (height - 1) * dst_stride_argb;
- dst_stride_argb = -dst_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_y == width &&
- src_stride_u * 4 == width &&
- src_stride_v * 4 == width &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
- }
-#if defined(HAS_I411TOARGBROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- I411ToARGBRow = I411ToARGBRow_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- I411ToARGBRow = I411ToARGBRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_I411TOARGBROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- I411ToARGBRow = I411ToARGBRow_Any_AVX2;
- if (IS_ALIGNED(width, 16)) {
- I411ToARGBRow = I411ToARGBRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_I411TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- I411ToARGBRow = I411ToARGBRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- I411ToARGBRow = I411ToARGBRow_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- I411ToARGBRow(src_y, src_u, src_v, dst_argb, width);
- dst_argb += dst_stride_argb;
- src_y += src_stride_y;
- src_u += src_stride_u;
- src_v += src_stride_v;
- }
- return 0;
-}
-
-// Convert I400 to ARGB.
-LIBYUV_API
-int I400ToARGB(const uint8* src_y, int src_stride_y,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- int y;
- void (*I400ToARGBRow)(const uint8* y_buf,
- uint8* rgb_buf,
- int width) = I400ToARGBRow_C;
- if (!src_y || !dst_argb ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_argb = dst_argb + (height - 1) * dst_stride_argb;
- dst_stride_argb = -dst_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_y == width &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_y = dst_stride_argb = 0;
- }
-#if defined(HAS_I400TOARGBROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- I400ToARGBRow = I400ToARGBRow_Any_SSE2;
- if (IS_ALIGNED(width, 8)) {
- I400ToARGBRow = I400ToARGBRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_I400TOARGBROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- I400ToARGBRow = I400ToARGBRow_Any_AVX2;
- if (IS_ALIGNED(width, 16)) {
- I400ToARGBRow = I400ToARGBRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_I400TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- I400ToARGBRow = I400ToARGBRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- I400ToARGBRow = I400ToARGBRow_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- I400ToARGBRow(src_y, dst_argb, width);
- dst_argb += dst_stride_argb;
- src_y += src_stride_y;
- }
- return 0;
-}
-
-// Convert J400 to ARGB.
-LIBYUV_API
-int J400ToARGB(const uint8* src_y, int src_stride_y,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- int y;
- void (*J400ToARGBRow)(const uint8* src_y, uint8* dst_argb, int pix) =
- J400ToARGBRow_C;
- if (!src_y || !dst_argb ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_y = src_y + (height - 1) * src_stride_y;
- src_stride_y = -src_stride_y;
- }
- // Coalesce rows.
- if (src_stride_y == width &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_y = dst_stride_argb = 0;
- }
-#if defined(HAS_J400TOARGBROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- J400ToARGBRow = J400ToARGBRow_Any_SSE2;
- if (IS_ALIGNED(width, 8)) {
- J400ToARGBRow = J400ToARGBRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_J400TOARGBROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- J400ToARGBRow = J400ToARGBRow_Any_AVX2;
- if (IS_ALIGNED(width, 16)) {
- J400ToARGBRow = J400ToARGBRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_J400TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- J400ToARGBRow = J400ToARGBRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- J400ToARGBRow = J400ToARGBRow_NEON;
- }
- }
-#endif
- for (y = 0; y < height; ++y) {
- J400ToARGBRow(src_y, dst_argb, width);
- src_y += src_stride_y;
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-// Shuffle table for converting BGRA to ARGB.
-static uvec8 kShuffleMaskBGRAToARGB = {
- 3u, 2u, 1u, 0u, 7u, 6u, 5u, 4u, 11u, 10u, 9u, 8u, 15u, 14u, 13u, 12u
-};
-
-// Shuffle table for converting ABGR to ARGB.
-static uvec8 kShuffleMaskABGRToARGB = {
- 2u, 1u, 0u, 3u, 6u, 5u, 4u, 7u, 10u, 9u, 8u, 11u, 14u, 13u, 12u, 15u
-};
-
-// Shuffle table for converting RGBA to ARGB.
-static uvec8 kShuffleMaskRGBAToARGB = {
- 1u, 2u, 3u, 0u, 5u, 6u, 7u, 4u, 9u, 10u, 11u, 8u, 13u, 14u, 15u, 12u
-};
-
-// Convert BGRA to ARGB.
-LIBYUV_API
-int BGRAToARGB(const uint8* src_bgra, int src_stride_bgra,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- return ARGBShuffle(src_bgra, src_stride_bgra,
- dst_argb, dst_stride_argb,
- (const uint8*)(&kShuffleMaskBGRAToARGB),
- width, height);
-}
-
-// Convert ARGB to BGRA (same as BGRAToARGB).
-LIBYUV_API
-int ARGBToBGRA(const uint8* src_bgra, int src_stride_bgra,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- return ARGBShuffle(src_bgra, src_stride_bgra,
- dst_argb, dst_stride_argb,
- (const uint8*)(&kShuffleMaskBGRAToARGB),
- width, height);
-}
-
-// Convert ABGR to ARGB.
-LIBYUV_API
-int ABGRToARGB(const uint8* src_abgr, int src_stride_abgr,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- return ARGBShuffle(src_abgr, src_stride_abgr,
- dst_argb, dst_stride_argb,
- (const uint8*)(&kShuffleMaskABGRToARGB),
- width, height);
-}
-
-// Convert ARGB to ABGR to (same as ABGRToARGB).
-LIBYUV_API
-int ARGBToABGR(const uint8* src_abgr, int src_stride_abgr,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- return ARGBShuffle(src_abgr, src_stride_abgr,
- dst_argb, dst_stride_argb,
- (const uint8*)(&kShuffleMaskABGRToARGB),
- width, height);
-}
-
-// Convert RGBA to ARGB.
-LIBYUV_API
-int RGBAToARGB(const uint8* src_rgba, int src_stride_rgba,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- return ARGBShuffle(src_rgba, src_stride_rgba,
- dst_argb, dst_stride_argb,
- (const uint8*)(&kShuffleMaskRGBAToARGB),
- width, height);
-}
-
-// Convert RGB24 to ARGB.
-LIBYUV_API
-int RGB24ToARGB(const uint8* src_rgb24, int src_stride_rgb24,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- int y;
- void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
- RGB24ToARGBRow_C;
- if (!src_rgb24 || !dst_argb ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24;
- src_stride_rgb24 = -src_stride_rgb24;
- }
- // Coalesce rows.
- if (src_stride_rgb24 == width * 3 &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_rgb24 = dst_stride_argb = 0;
- }
-#if defined(HAS_RGB24TOARGBROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- RGB24ToARGBRow = RGB24ToARGBRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_RGB24TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- RGB24ToARGBRow = RGB24ToARGBRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- RGB24ToARGBRow = RGB24ToARGBRow_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- RGB24ToARGBRow(src_rgb24, dst_argb, width);
- src_rgb24 += src_stride_rgb24;
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-// Convert RAW to ARGB.
-LIBYUV_API
-int RAWToARGB(const uint8* src_raw, int src_stride_raw,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- int y;
- void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
- RAWToARGBRow_C;
- if (!src_raw || !dst_argb ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_raw = src_raw + (height - 1) * src_stride_raw;
- src_stride_raw = -src_stride_raw;
- }
- // Coalesce rows.
- if (src_stride_raw == width * 3 &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_raw = dst_stride_argb = 0;
- }
-#if defined(HAS_RAWTOARGBROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- RAWToARGBRow = RAWToARGBRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- RAWToARGBRow = RAWToARGBRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_RAWTOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- RAWToARGBRow = RAWToARGBRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- RAWToARGBRow = RAWToARGBRow_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- RAWToARGBRow(src_raw, dst_argb, width);
- src_raw += src_stride_raw;
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-// Convert RGB565 to ARGB.
-LIBYUV_API
-int RGB565ToARGB(const uint8* src_rgb565, int src_stride_rgb565,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- int y;
- void (*RGB565ToARGBRow)(const uint8* src_rgb565, uint8* dst_argb, int pix) =
- RGB565ToARGBRow_C;
- if (!src_rgb565 || !dst_argb ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_rgb565 = src_rgb565 + (height - 1) * src_stride_rgb565;
- src_stride_rgb565 = -src_stride_rgb565;
- }
- // Coalesce rows.
- if (src_stride_rgb565 == width * 2 &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_rgb565 = dst_stride_argb = 0;
- }
-#if defined(HAS_RGB565TOARGBROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- RGB565ToARGBRow = RGB565ToARGBRow_Any_SSE2;
- if (IS_ALIGNED(width, 8)) {
- RGB565ToARGBRow = RGB565ToARGBRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_RGB565TOARGBROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- RGB565ToARGBRow = RGB565ToARGBRow_Any_AVX2;
- if (IS_ALIGNED(width, 16)) {
- RGB565ToARGBRow = RGB565ToARGBRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_RGB565TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- RGB565ToARGBRow = RGB565ToARGBRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- RGB565ToARGBRow = RGB565ToARGBRow_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- RGB565ToARGBRow(src_rgb565, dst_argb, width);
- src_rgb565 += src_stride_rgb565;
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-// Convert ARGB1555 to ARGB.
-LIBYUV_API
-int ARGB1555ToARGB(const uint8* src_argb1555, int src_stride_argb1555,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- int y;
- void (*ARGB1555ToARGBRow)(const uint8* src_argb1555, uint8* dst_argb,
- int pix) = ARGB1555ToARGBRow_C;
- if (!src_argb1555 || !dst_argb ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_argb1555 = src_argb1555 + (height - 1) * src_stride_argb1555;
- src_stride_argb1555 = -src_stride_argb1555;
- }
- // Coalesce rows.
- if (src_stride_argb1555 == width * 2 &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_argb1555 = dst_stride_argb = 0;
- }
-#if defined(HAS_ARGB1555TOARGBROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_SSE2;
- if (IS_ALIGNED(width, 8)) {
- ARGB1555ToARGBRow = ARGB1555ToARGBRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_ARGB1555TOARGBROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_AVX2;
- if (IS_ALIGNED(width, 16)) {
- ARGB1555ToARGBRow = ARGB1555ToARGBRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_ARGB1555TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGB1555ToARGBRow = ARGB1555ToARGBRow_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- ARGB1555ToARGBRow(src_argb1555, dst_argb, width);
- src_argb1555 += src_stride_argb1555;
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-// Convert ARGB4444 to ARGB.
-LIBYUV_API
-int ARGB4444ToARGB(const uint8* src_argb4444, int src_stride_argb4444,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- int y;
- void (*ARGB4444ToARGBRow)(const uint8* src_argb4444, uint8* dst_argb,
- int pix) = ARGB4444ToARGBRow_C;
- if (!src_argb4444 || !dst_argb ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_argb4444 = src_argb4444 + (height - 1) * src_stride_argb4444;
- src_stride_argb4444 = -src_stride_argb4444;
- }
- // Coalesce rows.
- if (src_stride_argb4444 == width * 2 &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_argb4444 = dst_stride_argb = 0;
- }
-#if defined(HAS_ARGB4444TOARGBROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_SSE2;
- if (IS_ALIGNED(width, 8)) {
- ARGB4444ToARGBRow = ARGB4444ToARGBRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_ARGB4444TOARGBROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_AVX2;
- if (IS_ALIGNED(width, 16)) {
- ARGB4444ToARGBRow = ARGB4444ToARGBRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_ARGB4444TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGB4444ToARGBRow = ARGB4444ToARGBRow_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- ARGB4444ToARGBRow(src_argb4444, dst_argb, width);
- src_argb4444 += src_stride_argb4444;
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-// Convert NV12 to ARGB.
-LIBYUV_API
-int NV12ToARGB(const uint8* src_y, int src_stride_y,
- const uint8* src_uv, int src_stride_uv,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- int y;
- void (*NV12ToARGBRow)(const uint8* y_buf,
- const uint8* uv_buf,
- uint8* rgb_buf,
- int width) = NV12ToARGBRow_C;
- if (!src_y || !src_uv || !dst_argb ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_argb = dst_argb + (height - 1) * dst_stride_argb;
- dst_stride_argb = -dst_stride_argb;
- }
-#if defined(HAS_NV12TOARGBROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- NV12ToARGBRow = NV12ToARGBRow_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- NV12ToARGBRow = NV12ToARGBRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_NV12TOARGBROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- NV12ToARGBRow = NV12ToARGBRow_Any_AVX2;
- if (IS_ALIGNED(width, 16)) {
- NV12ToARGBRow = NV12ToARGBRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_NV12TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- NV12ToARGBRow = NV12ToARGBRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- NV12ToARGBRow = NV12ToARGBRow_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- NV12ToARGBRow(src_y, src_uv, dst_argb, width);
- dst_argb += dst_stride_argb;
- src_y += src_stride_y;
- if (y & 1) {
- src_uv += src_stride_uv;
- }
- }
- return 0;
-}
-
-// Convert NV21 to ARGB.
-LIBYUV_API
-int NV21ToARGB(const uint8* src_y, int src_stride_y,
- const uint8* src_uv, int src_stride_uv,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- int y;
- void (*NV21ToARGBRow)(const uint8* y_buf,
- const uint8* uv_buf,
- uint8* rgb_buf,
- int width) = NV21ToARGBRow_C;
- if (!src_y || !src_uv || !dst_argb ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_argb = dst_argb + (height - 1) * dst_stride_argb;
- dst_stride_argb = -dst_stride_argb;
- }
-#if defined(HAS_NV21TOARGBROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- NV21ToARGBRow = NV21ToARGBRow_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- NV21ToARGBRow = NV21ToARGBRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_NV21TOARGBROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- NV21ToARGBRow = NV21ToARGBRow_Any_AVX2;
- if (IS_ALIGNED(width, 16)) {
- NV21ToARGBRow = NV21ToARGBRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_NV21TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- NV21ToARGBRow = NV21ToARGBRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- NV21ToARGBRow = NV21ToARGBRow_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- NV21ToARGBRow(src_y, src_uv, dst_argb, width);
- dst_argb += dst_stride_argb;
- src_y += src_stride_y;
- if (y & 1) {
- src_uv += src_stride_uv;
- }
- }
- return 0;
-}
-
-// Convert M420 to ARGB.
-LIBYUV_API
-int M420ToARGB(const uint8* src_m420, int src_stride_m420,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- int y;
- void (*NV12ToARGBRow)(const uint8* y_buf,
- const uint8* uv_buf,
- uint8* rgb_buf,
- int width) = NV12ToARGBRow_C;
- if (!src_m420 || !dst_argb ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_argb = dst_argb + (height - 1) * dst_stride_argb;
- dst_stride_argb = -dst_stride_argb;
- }
-#if defined(HAS_NV12TOARGBROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- NV12ToARGBRow = NV12ToARGBRow_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- NV12ToARGBRow = NV12ToARGBRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_NV12TOARGBROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- NV12ToARGBRow = NV12ToARGBRow_Any_AVX2;
- if (IS_ALIGNED(width, 16)) {
- NV12ToARGBRow = NV12ToARGBRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_NV12TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- NV12ToARGBRow = NV12ToARGBRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- NV12ToARGBRow = NV12ToARGBRow_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height - 1; y += 2) {
- NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb, width);
- NV12ToARGBRow(src_m420 + src_stride_m420, src_m420 + src_stride_m420 * 2,
- dst_argb + dst_stride_argb, width);
- dst_argb += dst_stride_argb * 2;
- src_m420 += src_stride_m420 * 3;
- }
- if (height & 1) {
- NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb, width);
- }
- return 0;
-}
-
-// Convert YUY2 to ARGB.
-LIBYUV_API
-int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- int y;
- void (*YUY2ToARGBRow)(const uint8* src_yuy2, uint8* dst_argb, int pix) =
- YUY2ToARGBRow_C;
- if (!src_yuy2 || !dst_argb ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
- src_stride_yuy2 = -src_stride_yuy2;
- }
- // Coalesce rows.
- if (src_stride_yuy2 == width * 2 &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_yuy2 = dst_stride_argb = 0;
- }
-#if defined(HAS_YUY2TOARGBROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- YUY2ToARGBRow = YUY2ToARGBRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- YUY2ToARGBRow = YUY2ToARGBRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_YUY2TOARGBROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- YUY2ToARGBRow = YUY2ToARGBRow_Any_AVX2;
- if (IS_ALIGNED(width, 32)) {
- YUY2ToARGBRow = YUY2ToARGBRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_YUY2TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- YUY2ToARGBRow = YUY2ToARGBRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- YUY2ToARGBRow = YUY2ToARGBRow_NEON;
- }
- }
-#endif
- for (y = 0; y < height; ++y) {
- YUY2ToARGBRow(src_yuy2, dst_argb, width);
- src_yuy2 += src_stride_yuy2;
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-// Convert UYVY to ARGB.
-LIBYUV_API
-int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- int y;
- void (*UYVYToARGBRow)(const uint8* src_uyvy, uint8* dst_argb, int pix) =
- UYVYToARGBRow_C;
- if (!src_uyvy || !dst_argb ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
- src_stride_uyvy = -src_stride_uyvy;
- }
- // Coalesce rows.
- if (src_stride_uyvy == width * 2 &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_uyvy = dst_stride_argb = 0;
- }
-#if defined(HAS_UYVYTOARGBROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- UYVYToARGBRow = UYVYToARGBRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- UYVYToARGBRow = UYVYToARGBRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_UYVYTOARGBROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- UYVYToARGBRow = UYVYToARGBRow_Any_AVX2;
- if (IS_ALIGNED(width, 32)) {
- UYVYToARGBRow = UYVYToARGBRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_UYVYTOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- UYVYToARGBRow = UYVYToARGBRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- UYVYToARGBRow = UYVYToARGBRow_NEON;
- }
- }
-#endif
- for (y = 0; y < height; ++y) {
- UYVYToARGBRow(src_uyvy, dst_argb, width);
- src_uyvy += src_stride_uyvy;
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-// Convert J420 to ARGB.
-LIBYUV_API
-int J420ToARGB(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- int y;
- void (*J422ToARGBRow)(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) = J422ToARGBRow_C;
- if (!src_y || !src_u || !src_v || !dst_argb ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_argb = dst_argb + (height - 1) * dst_stride_argb;
- dst_stride_argb = -dst_stride_argb;
- }
-#if defined(HAS_J422TOARGBROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- J422ToARGBRow = J422ToARGBRow_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- J422ToARGBRow = J422ToARGBRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_J422TOARGBROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- J422ToARGBRow = J422ToARGBRow_Any_AVX2;
- if (IS_ALIGNED(width, 16)) {
- J422ToARGBRow = J422ToARGBRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_J422TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- J422ToARGBRow = J422ToARGBRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- J422ToARGBRow = J422ToARGBRow_NEON;
- }
- }
-#endif
-#if defined(HAS_J422TOARGBROW_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
- IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
- IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
- IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
- IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
- J422ToARGBRow = J422ToARGBRow_MIPS_DSPR2;
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- J422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
- dst_argb += dst_stride_argb;
- src_y += src_stride_y;
- if (y & 1) {
- src_u += src_stride_u;
- src_v += src_stride_v;
- }
- }
- return 0;
-}
-
-// Convert J422 to ARGB.
-LIBYUV_API
-int J422ToARGB(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- int y;
- void (*J422ToARGBRow)(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) = J422ToARGBRow_C;
- if (!src_y || !src_u || !src_v ||
- !dst_argb ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_argb = dst_argb + (height - 1) * dst_stride_argb;
- dst_stride_argb = -dst_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_y == width &&
- src_stride_u * 2 == width &&
- src_stride_v * 2 == width &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
- }
-#if defined(HAS_J422TOARGBROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- J422ToARGBRow = J422ToARGBRow_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- J422ToARGBRow = J422ToARGBRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_J422TOARGBROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- J422ToARGBRow = J422ToARGBRow_Any_AVX2;
- if (IS_ALIGNED(width, 16)) {
- J422ToARGBRow = J422ToARGBRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_J422TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- J422ToARGBRow = J422ToARGBRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- J422ToARGBRow = J422ToARGBRow_NEON;
- }
- }
-#endif
-#if defined(HAS_J422TOARGBROW_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
- IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
- IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
- IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
- IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
- J422ToARGBRow = J422ToARGBRow_MIPS_DSPR2;
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- J422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
- dst_argb += dst_stride_argb;
- src_y += src_stride_y;
- src_u += src_stride_u;
- src_v += src_stride_v;
- }
- return 0;
-}
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/third_party/aom/third_party/libyuv/source/convert_from.cc b/third_party/aom/third_party/libyuv/source/convert_from.cc
deleted file mode 100644
index 31f1ac992..000000000
--- a/third_party/aom/third_party/libyuv/source/convert_from.cc
+++ /dev/null
@@ -1,1348 +0,0 @@
-/*
- * Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/convert_from.h"
-
-#include "libyuv/basic_types.h"
-#include "libyuv/convert.h" // For I420Copy
-#include "libyuv/cpu_id.h"
-#include "libyuv/planar_functions.h"
-#include "libyuv/rotate.h"
-#include "libyuv/scale.h" // For ScalePlane()
-#include "libyuv/video_common.h"
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-#define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s)
-static __inline int Abs(int v) {
- return v >= 0 ? v : -v;
-}
-
-// I420 To any I4xx YUV format with mirroring.
-static int I420ToI4xx(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int src_y_width, int src_y_height,
- int dst_uv_width, int dst_uv_height) {
- const int dst_y_width = Abs(src_y_width);
- const int dst_y_height = Abs(src_y_height);
- const int src_uv_width = SUBSAMPLE(src_y_width, 1, 1);
- const int src_uv_height = SUBSAMPLE(src_y_height, 1, 1);
- if (src_y_width == 0 || src_y_height == 0 ||
- dst_uv_width <= 0 || dst_uv_height <= 0) {
- return -1;
- }
- ScalePlane(src_y, src_stride_y, src_y_width, src_y_height,
- dst_y, dst_stride_y, dst_y_width, dst_y_height,
- kFilterBilinear);
- ScalePlane(src_u, src_stride_u, src_uv_width, src_uv_height,
- dst_u, dst_stride_u, dst_uv_width, dst_uv_height,
- kFilterBilinear);
- ScalePlane(src_v, src_stride_v, src_uv_width, src_uv_height,
- dst_v, dst_stride_v, dst_uv_width, dst_uv_height,
- kFilterBilinear);
- return 0;
-}
-
-// 420 chroma is 1/2 width, 1/2 height
-// 422 chroma is 1/2 width, 1x height
-LIBYUV_API
-int I420ToI422(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- const int dst_uv_width = (Abs(width) + 1) >> 1;
- const int dst_uv_height = Abs(height);
- return I420ToI4xx(src_y, src_stride_y,
- src_u, src_stride_u,
- src_v, src_stride_v,
- dst_y, dst_stride_y,
- dst_u, dst_stride_u,
- dst_v, dst_stride_v,
- width, height,
- dst_uv_width, dst_uv_height);
-}
-
-// 420 chroma is 1/2 width, 1/2 height
-// 444 chroma is 1x width, 1x height
-LIBYUV_API
-int I420ToI444(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- const int dst_uv_width = Abs(width);
- const int dst_uv_height = Abs(height);
- return I420ToI4xx(src_y, src_stride_y,
- src_u, src_stride_u,
- src_v, src_stride_v,
- dst_y, dst_stride_y,
- dst_u, dst_stride_u,
- dst_v, dst_stride_v,
- width, height,
- dst_uv_width, dst_uv_height);
-}
-
-// 420 chroma is 1/2 width, 1/2 height
-// 411 chroma is 1/4 width, 1x height
-LIBYUV_API
-int I420ToI411(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- const int dst_uv_width = (Abs(width) + 3) >> 2;
- const int dst_uv_height = Abs(height);
- return I420ToI4xx(src_y, src_stride_y,
- src_u, src_stride_u,
- src_v, src_stride_v,
- dst_y, dst_stride_y,
- dst_u, dst_stride_u,
- dst_v, dst_stride_v,
- width, height,
- dst_uv_width, dst_uv_height);
-}
-
-// Copy to I400. Source can be I420,422,444,400,NV12,NV21
-LIBYUV_API
-int I400Copy(const uint8* src_y, int src_stride_y,
- uint8* dst_y, int dst_stride_y,
- int width, int height) {
- if (!src_y || !dst_y ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_y = src_y + (height - 1) * src_stride_y;
- src_stride_y = -src_stride_y;
- }
- CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
- return 0;
-}
-
-LIBYUV_API
-int I422ToYUY2(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_yuy2, int dst_stride_yuy2,
- int width, int height) {
- int y;
- void (*I422ToYUY2Row)(const uint8* src_y, const uint8* src_u,
- const uint8* src_v, uint8* dst_yuy2, int width) =
- I422ToYUY2Row_C;
- if (!src_y || !src_u || !src_v || !dst_yuy2 ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2;
- dst_stride_yuy2 = -dst_stride_yuy2;
- }
- // Coalesce rows.
- if (src_stride_y == width &&
- src_stride_u * 2 == width &&
- src_stride_v * 2 == width &&
- dst_stride_yuy2 == width * 2) {
- width *= height;
- height = 1;
- src_stride_y = src_stride_u = src_stride_v = dst_stride_yuy2 = 0;
- }
-#if defined(HAS_I422TOYUY2ROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- I422ToYUY2Row = I422ToYUY2Row_Any_SSE2;
- if (IS_ALIGNED(width, 16)) {
- I422ToYUY2Row = I422ToYUY2Row_SSE2;
- }
- }
-#endif
-#if defined(HAS_I422TOYUY2ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- I422ToYUY2Row = I422ToYUY2Row_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- I422ToYUY2Row = I422ToYUY2Row_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- I422ToYUY2Row(src_y, src_u, src_v, dst_yuy2, width);
- src_y += src_stride_y;
- src_u += src_stride_u;
- src_v += src_stride_v;
- dst_yuy2 += dst_stride_yuy2;
- }
- return 0;
-}
-
-LIBYUV_API
-int I420ToYUY2(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_yuy2, int dst_stride_yuy2,
- int width, int height) {
- int y;
- void (*I422ToYUY2Row)(const uint8* src_y, const uint8* src_u,
- const uint8* src_v, uint8* dst_yuy2, int width) =
- I422ToYUY2Row_C;
- if (!src_y || !src_u || !src_v || !dst_yuy2 ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2;
- dst_stride_yuy2 = -dst_stride_yuy2;
- }
-#if defined(HAS_I422TOYUY2ROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- I422ToYUY2Row = I422ToYUY2Row_Any_SSE2;
- if (IS_ALIGNED(width, 16)) {
- I422ToYUY2Row = I422ToYUY2Row_SSE2;
- }
- }
-#endif
-#if defined(HAS_I422TOYUY2ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- I422ToYUY2Row = I422ToYUY2Row_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- I422ToYUY2Row = I422ToYUY2Row_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height - 1; y += 2) {
- I422ToYUY2Row(src_y, src_u, src_v, dst_yuy2, width);
- I422ToYUY2Row(src_y + src_stride_y, src_u, src_v,
- dst_yuy2 + dst_stride_yuy2, width);
- src_y += src_stride_y * 2;
- src_u += src_stride_u;
- src_v += src_stride_v;
- dst_yuy2 += dst_stride_yuy2 * 2;
- }
- if (height & 1) {
- I422ToYUY2Row(src_y, src_u, src_v, dst_yuy2, width);
- }
- return 0;
-}
-
-LIBYUV_API
-int I422ToUYVY(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_uyvy, int dst_stride_uyvy,
- int width, int height) {
- int y;
- void (*I422ToUYVYRow)(const uint8* src_y, const uint8* src_u,
- const uint8* src_v, uint8* dst_uyvy, int width) =
- I422ToUYVYRow_C;
- if (!src_y || !src_u || !src_v || !dst_uyvy ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_uyvy = dst_uyvy + (height - 1) * dst_stride_uyvy;
- dst_stride_uyvy = -dst_stride_uyvy;
- }
- // Coalesce rows.
- if (src_stride_y == width &&
- src_stride_u * 2 == width &&
- src_stride_v * 2 == width &&
- dst_stride_uyvy == width * 2) {
- width *= height;
- height = 1;
- src_stride_y = src_stride_u = src_stride_v = dst_stride_uyvy = 0;
- }
-#if defined(HAS_I422TOUYVYROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- I422ToUYVYRow = I422ToUYVYRow_Any_SSE2;
- if (IS_ALIGNED(width, 16)) {
- I422ToUYVYRow = I422ToUYVYRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_I422TOUYVYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- I422ToUYVYRow = I422ToUYVYRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- I422ToUYVYRow = I422ToUYVYRow_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- I422ToUYVYRow(src_y, src_u, src_v, dst_uyvy, width);
- src_y += src_stride_y;
- src_u += src_stride_u;
- src_v += src_stride_v;
- dst_uyvy += dst_stride_uyvy;
- }
- return 0;
-}
-
-LIBYUV_API
-int I420ToUYVY(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_uyvy, int dst_stride_uyvy,
- int width, int height) {
- int y;
- void (*I422ToUYVYRow)(const uint8* src_y, const uint8* src_u,
- const uint8* src_v, uint8* dst_uyvy, int width) =
- I422ToUYVYRow_C;
- if (!src_y || !src_u || !src_v || !dst_uyvy ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_uyvy = dst_uyvy + (height - 1) * dst_stride_uyvy;
- dst_stride_uyvy = -dst_stride_uyvy;
- }
-#if defined(HAS_I422TOUYVYROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- I422ToUYVYRow = I422ToUYVYRow_Any_SSE2;
- if (IS_ALIGNED(width, 16)) {
- I422ToUYVYRow = I422ToUYVYRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_I422TOUYVYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- I422ToUYVYRow = I422ToUYVYRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- I422ToUYVYRow = I422ToUYVYRow_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height - 1; y += 2) {
- I422ToUYVYRow(src_y, src_u, src_v, dst_uyvy, width);
- I422ToUYVYRow(src_y + src_stride_y, src_u, src_v,
- dst_uyvy + dst_stride_uyvy, width);
- src_y += src_stride_y * 2;
- src_u += src_stride_u;
- src_v += src_stride_v;
- dst_uyvy += dst_stride_uyvy * 2;
- }
- if (height & 1) {
- I422ToUYVYRow(src_y, src_u, src_v, dst_uyvy, width);
- }
- return 0;
-}
-
-LIBYUV_API
-int I420ToNV12(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_uv, int dst_stride_uv,
- int width, int height) {
- int y;
- void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
- int width) = MergeUVRow_C;
- // Coalesce rows.
- int halfwidth = (width + 1) >> 1;
- int halfheight = (height + 1) >> 1;
- if (!src_y || !src_u || !src_v || !dst_y || !dst_uv ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- halfheight = (height + 1) >> 1;
- dst_y = dst_y + (height - 1) * dst_stride_y;
- dst_uv = dst_uv + (halfheight - 1) * dst_stride_uv;
- dst_stride_y = -dst_stride_y;
- dst_stride_uv = -dst_stride_uv;
- }
- if (src_stride_y == width &&
- dst_stride_y == width) {
- width *= height;
- height = 1;
- src_stride_y = dst_stride_y = 0;
- }
- // Coalesce rows.
- if (src_stride_u == halfwidth &&
- src_stride_v == halfwidth &&
- dst_stride_uv == halfwidth * 2) {
- halfwidth *= halfheight;
- halfheight = 1;
- src_stride_u = src_stride_v = dst_stride_uv = 0;
- }
-#if defined(HAS_MERGEUVROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- MergeUVRow_ = MergeUVRow_Any_SSE2;
- if (IS_ALIGNED(halfwidth, 16)) {
- MergeUVRow_ = MergeUVRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_MERGEUVROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- MergeUVRow_ = MergeUVRow_Any_AVX2;
- if (IS_ALIGNED(halfwidth, 32)) {
- MergeUVRow_ = MergeUVRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_MERGEUVROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- MergeUVRow_ = MergeUVRow_Any_NEON;
- if (IS_ALIGNED(halfwidth, 16)) {
- MergeUVRow_ = MergeUVRow_NEON;
- }
- }
-#endif
-
- CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
- for (y = 0; y < halfheight; ++y) {
- // Merge a row of U and V into a row of UV.
- MergeUVRow_(src_u, src_v, dst_uv, halfwidth);
- src_u += src_stride_u;
- src_v += src_stride_v;
- dst_uv += dst_stride_uv;
- }
- return 0;
-}
-
-LIBYUV_API
-int I420ToNV21(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_vu, int dst_stride_vu,
- int width, int height) {
- return I420ToNV12(src_y, src_stride_y,
- src_v, src_stride_v,
- src_u, src_stride_u,
- dst_y, src_stride_y,
- dst_vu, dst_stride_vu,
- width, height);
-}
-
-// Convert I420 to ARGB.
-LIBYUV_API
-int I420ToARGB(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- int y;
- void (*I422ToARGBRow)(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) = I422ToARGBRow_C;
- if (!src_y || !src_u || !src_v || !dst_argb ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_argb = dst_argb + (height - 1) * dst_stride_argb;
- dst_stride_argb = -dst_stride_argb;
- }
-#if defined(HAS_I422TOARGBROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- I422ToARGBRow = I422ToARGBRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_I422TOARGBROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- I422ToARGBRow = I422ToARGBRow_Any_AVX2;
- if (IS_ALIGNED(width, 16)) {
- I422ToARGBRow = I422ToARGBRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_I422TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- I422ToARGBRow = I422ToARGBRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- I422ToARGBRow = I422ToARGBRow_NEON;
- }
- }
-#endif
-#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
- IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
- IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
- IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
- IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
- I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- I422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
- dst_argb += dst_stride_argb;
- src_y += src_stride_y;
- if (y & 1) {
- src_u += src_stride_u;
- src_v += src_stride_v;
- }
- }
- return 0;
-}
-
-// Convert I420 to BGRA.
-LIBYUV_API
-int I420ToBGRA(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_bgra, int dst_stride_bgra,
- int width, int height) {
- int y;
- void (*I422ToBGRARow)(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) = I422ToBGRARow_C;
- if (!src_y || !src_u || !src_v || !dst_bgra ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_bgra = dst_bgra + (height - 1) * dst_stride_bgra;
- dst_stride_bgra = -dst_stride_bgra;
- }
-#if defined(HAS_I422TOBGRAROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- I422ToBGRARow = I422ToBGRARow_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- I422ToBGRARow = I422ToBGRARow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_I422TOBGRAROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- I422ToBGRARow = I422ToBGRARow_Any_AVX2;
- if (IS_ALIGNED(width, 16)) {
- I422ToBGRARow = I422ToBGRARow_AVX2;
- }
- }
-#endif
-#if defined(HAS_I422TOBGRAROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- I422ToBGRARow = I422ToBGRARow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- I422ToBGRARow = I422ToBGRARow_NEON;
- }
- }
-#endif
-#if defined(HAS_I422TOBGRAROW_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
- IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
- IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
- IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
- IS_ALIGNED(dst_bgra, 4) && IS_ALIGNED(dst_stride_bgra, 4)) {
- I422ToBGRARow = I422ToBGRARow_MIPS_DSPR2;
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- I422ToBGRARow(src_y, src_u, src_v, dst_bgra, width);
- dst_bgra += dst_stride_bgra;
- src_y += src_stride_y;
- if (y & 1) {
- src_u += src_stride_u;
- src_v += src_stride_v;
- }
- }
- return 0;
-}
-
-// Convert I420 to ABGR.
-LIBYUV_API
-int I420ToABGR(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_abgr, int dst_stride_abgr,
- int width, int height) {
- int y;
- void (*I422ToABGRRow)(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) = I422ToABGRRow_C;
- if (!src_y || !src_u || !src_v || !dst_abgr ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr;
- dst_stride_abgr = -dst_stride_abgr;
- }
-#if defined(HAS_I422TOABGRROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- I422ToABGRRow = I422ToABGRRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_I422TOABGRROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- I422ToABGRRow = I422ToABGRRow_Any_AVX2;
- if (IS_ALIGNED(width, 16)) {
- I422ToABGRRow = I422ToABGRRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_I422TOABGRROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- I422ToABGRRow = I422ToABGRRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- I422ToABGRRow = I422ToABGRRow_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
- dst_abgr += dst_stride_abgr;
- src_y += src_stride_y;
- if (y & 1) {
- src_u += src_stride_u;
- src_v += src_stride_v;
- }
- }
- return 0;
-}
-
-// Convert I420 to RGBA.
-LIBYUV_API
-int I420ToRGBA(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_rgba, int dst_stride_rgba,
- int width, int height) {
- int y;
- void (*I422ToRGBARow)(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) = I422ToRGBARow_C;
- if (!src_y || !src_u || !src_v || !dst_rgba ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_rgba = dst_rgba + (height - 1) * dst_stride_rgba;
- dst_stride_rgba = -dst_stride_rgba;
- }
-#if defined(HAS_I422TORGBAROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- I422ToRGBARow = I422ToRGBARow_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- I422ToRGBARow = I422ToRGBARow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_I422TORGBAROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- I422ToRGBARow = I422ToRGBARow_Any_AVX2;
- if (IS_ALIGNED(width, 16)) {
- I422ToRGBARow = I422ToRGBARow_AVX2;
- }
- }
-#endif
-#if defined(HAS_I422TORGBAROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- I422ToRGBARow = I422ToRGBARow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- I422ToRGBARow = I422ToRGBARow_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- I422ToRGBARow(src_y, src_u, src_v, dst_rgba, width);
- dst_rgba += dst_stride_rgba;
- src_y += src_stride_y;
- if (y & 1) {
- src_u += src_stride_u;
- src_v += src_stride_v;
- }
- }
- return 0;
-}
-
-// Convert I420 to RGB24.
-LIBYUV_API
-int I420ToRGB24(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_rgb24, int dst_stride_rgb24,
- int width, int height) {
- int y;
- void (*I422ToRGB24Row)(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) = I422ToRGB24Row_C;
- if (!src_y || !src_u || !src_v || !dst_rgb24 ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24;
- dst_stride_rgb24 = -dst_stride_rgb24;
- }
-#if defined(HAS_I422TORGB24ROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- I422ToRGB24Row = I422ToRGB24Row_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- I422ToRGB24Row = I422ToRGB24Row_SSSE3;
- }
- }
-#endif
-#if defined(HAS_I422TORGB24ROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- I422ToRGB24Row = I422ToRGB24Row_Any_AVX2;
- if (IS_ALIGNED(width, 16)) {
- I422ToRGB24Row = I422ToRGB24Row_AVX2;
- }
- }
-#endif
-#if defined(HAS_I422TORGB24ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- I422ToRGB24Row = I422ToRGB24Row_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- I422ToRGB24Row = I422ToRGB24Row_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- I422ToRGB24Row(src_y, src_u, src_v, dst_rgb24, width);
- dst_rgb24 += dst_stride_rgb24;
- src_y += src_stride_y;
- if (y & 1) {
- src_u += src_stride_u;
- src_v += src_stride_v;
- }
- }
- return 0;
-}
-
-// Convert I420 to RAW.
-LIBYUV_API
-int I420ToRAW(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_raw, int dst_stride_raw,
- int width, int height) {
- int y;
- void (*I422ToRAWRow)(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) = I422ToRAWRow_C;
- if (!src_y || !src_u || !src_v || !dst_raw ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_raw = dst_raw + (height - 1) * dst_stride_raw;
- dst_stride_raw = -dst_stride_raw;
- }
-#if defined(HAS_I422TORAWROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- I422ToRAWRow = I422ToRAWRow_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- I422ToRAWRow = I422ToRAWRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_I422TORAWROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- I422ToRAWRow = I422ToRAWRow_Any_AVX2;
- if (IS_ALIGNED(width, 16)) {
- I422ToRAWRow = I422ToRAWRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_I422TORAWROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- I422ToRAWRow = I422ToRAWRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- I422ToRAWRow = I422ToRAWRow_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- I422ToRAWRow(src_y, src_u, src_v, dst_raw, width);
- dst_raw += dst_stride_raw;
- src_y += src_stride_y;
- if (y & 1) {
- src_u += src_stride_u;
- src_v += src_stride_v;
- }
- }
- return 0;
-}
-
-// Convert I420 to ARGB1555.
-LIBYUV_API
-int I420ToARGB1555(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_argb1555, int dst_stride_argb1555,
- int width, int height) {
- int y;
- void (*I422ToARGB1555Row)(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) = I422ToARGB1555Row_C;
- if (!src_y || !src_u || !src_v || !dst_argb1555 ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_argb1555 = dst_argb1555 + (height - 1) * dst_stride_argb1555;
- dst_stride_argb1555 = -dst_stride_argb1555;
- }
-#if defined(HAS_I422TOARGB1555ROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- I422ToARGB1555Row = I422ToARGB1555Row_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- I422ToARGB1555Row = I422ToARGB1555Row_SSSE3;
- }
- }
-#endif
-#if defined(HAS_I422TOARGB1555ROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- I422ToARGB1555Row = I422ToARGB1555Row_Any_AVX2;
- if (IS_ALIGNED(width, 16)) {
- I422ToARGB1555Row = I422ToARGB1555Row_AVX2;
- }
- }
-#endif
-#if defined(HAS_I422TOARGB1555ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- I422ToARGB1555Row = I422ToARGB1555Row_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- I422ToARGB1555Row = I422ToARGB1555Row_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- I422ToARGB1555Row(src_y, src_u, src_v, dst_argb1555, width);
- dst_argb1555 += dst_stride_argb1555;
- src_y += src_stride_y;
- if (y & 1) {
- src_u += src_stride_u;
- src_v += src_stride_v;
- }
- }
- return 0;
-}
-
-
-// Convert I420 to ARGB4444.
-LIBYUV_API
-int I420ToARGB4444(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_argb4444, int dst_stride_argb4444,
- int width, int height) {
- int y;
- void (*I422ToARGB4444Row)(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) = I422ToARGB4444Row_C;
- if (!src_y || !src_u || !src_v || !dst_argb4444 ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_argb4444 = dst_argb4444 + (height - 1) * dst_stride_argb4444;
- dst_stride_argb4444 = -dst_stride_argb4444;
- }
-#if defined(HAS_I422TOARGB4444ROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- I422ToARGB4444Row = I422ToARGB4444Row_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- I422ToARGB4444Row = I422ToARGB4444Row_SSSE3;
- }
- }
-#endif
-#if defined(HAS_I422TOARGB4444ROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- I422ToARGB4444Row = I422ToARGB4444Row_Any_AVX2;
- if (IS_ALIGNED(width, 16)) {
- I422ToARGB4444Row = I422ToARGB4444Row_AVX2;
- }
- }
-#endif
-#if defined(HAS_I422TOARGB4444ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- I422ToARGB4444Row = I422ToARGB4444Row_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- I422ToARGB4444Row = I422ToARGB4444Row_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- I422ToARGB4444Row(src_y, src_u, src_v, dst_argb4444, width);
- dst_argb4444 += dst_stride_argb4444;
- src_y += src_stride_y;
- if (y & 1) {
- src_u += src_stride_u;
- src_v += src_stride_v;
- }
- }
- return 0;
-}
-
-// Convert I420 to RGB565.
-LIBYUV_API
-int I420ToRGB565(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_rgb565, int dst_stride_rgb565,
- int width, int height) {
- int y;
- void (*I422ToRGB565Row)(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) = I422ToRGB565Row_C;
- if (!src_y || !src_u || !src_v || !dst_rgb565 ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
- dst_stride_rgb565 = -dst_stride_rgb565;
- }
-#if defined(HAS_I422TORGB565ROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- I422ToRGB565Row = I422ToRGB565Row_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- I422ToRGB565Row = I422ToRGB565Row_SSSE3;
- }
- }
-#endif
-#if defined(HAS_I422TORGB565ROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- I422ToRGB565Row = I422ToRGB565Row_Any_AVX2;
- if (IS_ALIGNED(width, 16)) {
- I422ToRGB565Row = I422ToRGB565Row_AVX2;
- }
- }
-#endif
-#if defined(HAS_I422TORGB565ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- I422ToRGB565Row = I422ToRGB565Row_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- I422ToRGB565Row = I422ToRGB565Row_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- I422ToRGB565Row(src_y, src_u, src_v, dst_rgb565, width);
- dst_rgb565 += dst_stride_rgb565;
- src_y += src_stride_y;
- if (y & 1) {
- src_u += src_stride_u;
- src_v += src_stride_v;
- }
- }
- return 0;
-}
-
-// Ordered 8x8 dither for 888 to 565. Values from 0 to 7.
-static const uint8 kDither565_4x4[16] = {
- 0, 4, 1, 5,
- 6, 2, 7, 3,
- 1, 5, 0, 4,
- 7, 3, 6, 2,
-};
-
-// Convert I420 to RGB565 with dithering.
-LIBYUV_API
-int I420ToRGB565Dither(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_rgb565, int dst_stride_rgb565,
- const uint8* dither4x4, int width, int height) {
- int y;
- void (*I422ToARGBRow)(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) = I422ToARGBRow_C;
- void (*ARGBToRGB565DitherRow)(const uint8* src_argb, uint8* dst_rgb,
- const uint32 dither4, int pix) = ARGBToRGB565DitherRow_C;
- if (!src_y || !src_u || !src_v || !dst_rgb565 ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
- dst_stride_rgb565 = -dst_stride_rgb565;
- }
- if (!dither4x4) {
- dither4x4 = kDither565_4x4;
- }
-#if defined(HAS_I422TOARGBROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- I422ToARGBRow = I422ToARGBRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_I422TOARGBROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- I422ToARGBRow = I422ToARGBRow_Any_AVX2;
- if (IS_ALIGNED(width, 16)) {
- I422ToARGBRow = I422ToARGBRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_I422TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- I422ToARGBRow = I422ToARGBRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- I422ToARGBRow = I422ToARGBRow_NEON;
- }
- }
-#endif
-#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
- IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
- IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
- IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2)) {
- I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
- }
-#endif
-#if defined(HAS_ARGBTORGB565DITHERROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_SSE2;
- if (IS_ALIGNED(width, 4)) {
- ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_ARGBTORGB565DITHERROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_AVX2;
- if (IS_ALIGNED(width, 8)) {
- ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_ARGBTORGB565DITHERROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_NEON;
- }
- }
-#endif
- {
- // Allocate a row of argb.
- align_buffer_64(row_argb, width * 4);
- for (y = 0; y < height; ++y) {
- I422ToARGBRow(src_y, src_u, src_v, row_argb, width);
- ARGBToRGB565DitherRow(row_argb, dst_rgb565,
- *(uint32*)(dither4x4 + ((y & 3) << 2)), width);
- dst_rgb565 += dst_stride_rgb565;
- src_y += src_stride_y;
- if (y & 1) {
- src_u += src_stride_u;
- src_v += src_stride_v;
- }
- }
- free_aligned_buffer_64(row_argb);
- }
- return 0;
-}
-
-// Convert I420 to specified format
-LIBYUV_API
-int ConvertFromI420(const uint8* y, int y_stride,
- const uint8* u, int u_stride,
- const uint8* v, int v_stride,
- uint8* dst_sample, int dst_sample_stride,
- int width, int height,
- uint32 fourcc) {
- uint32 format = CanonicalFourCC(fourcc);
- int r = 0;
- if (!y || !u|| !v || !dst_sample ||
- width <= 0 || height == 0) {
- return -1;
- }
- switch (format) {
- // Single plane formats
- case FOURCC_YUY2:
- r = I420ToYUY2(y, y_stride,
- u, u_stride,
- v, v_stride,
- dst_sample,
- dst_sample_stride ? dst_sample_stride : width * 2,
- width, height);
- break;
- case FOURCC_UYVY:
- r = I420ToUYVY(y, y_stride,
- u, u_stride,
- v, v_stride,
- dst_sample,
- dst_sample_stride ? dst_sample_stride : width * 2,
- width, height);
- break;
- case FOURCC_RGBP:
- r = I420ToRGB565(y, y_stride,
- u, u_stride,
- v, v_stride,
- dst_sample,
- dst_sample_stride ? dst_sample_stride : width * 2,
- width, height);
- break;
- case FOURCC_RGBO:
- r = I420ToARGB1555(y, y_stride,
- u, u_stride,
- v, v_stride,
- dst_sample,
- dst_sample_stride ? dst_sample_stride : width * 2,
- width, height);
- break;
- case FOURCC_R444:
- r = I420ToARGB4444(y, y_stride,
- u, u_stride,
- v, v_stride,
- dst_sample,
- dst_sample_stride ? dst_sample_stride : width * 2,
- width, height);
- break;
- case FOURCC_24BG:
- r = I420ToRGB24(y, y_stride,
- u, u_stride,
- v, v_stride,
- dst_sample,
- dst_sample_stride ? dst_sample_stride : width * 3,
- width, height);
- break;
- case FOURCC_RAW:
- r = I420ToRAW(y, y_stride,
- u, u_stride,
- v, v_stride,
- dst_sample,
- dst_sample_stride ? dst_sample_stride : width * 3,
- width, height);
- break;
- case FOURCC_ARGB:
- r = I420ToARGB(y, y_stride,
- u, u_stride,
- v, v_stride,
- dst_sample,
- dst_sample_stride ? dst_sample_stride : width * 4,
- width, height);
- break;
- case FOURCC_BGRA:
- r = I420ToBGRA(y, y_stride,
- u, u_stride,
- v, v_stride,
- dst_sample,
- dst_sample_stride ? dst_sample_stride : width * 4,
- width, height);
- break;
- case FOURCC_ABGR:
- r = I420ToABGR(y, y_stride,
- u, u_stride,
- v, v_stride,
- dst_sample,
- dst_sample_stride ? dst_sample_stride : width * 4,
- width, height);
- break;
- case FOURCC_RGBA:
- r = I420ToRGBA(y, y_stride,
- u, u_stride,
- v, v_stride,
- dst_sample,
- dst_sample_stride ? dst_sample_stride : width * 4,
- width, height);
- break;
- case FOURCC_I400:
- r = I400Copy(y, y_stride,
- dst_sample,
- dst_sample_stride ? dst_sample_stride : width,
- width, height);
- break;
- case FOURCC_NV12: {
- uint8* dst_uv = dst_sample + width * height;
- r = I420ToNV12(y, y_stride,
- u, u_stride,
- v, v_stride,
- dst_sample,
- dst_sample_stride ? dst_sample_stride : width,
- dst_uv,
- dst_sample_stride ? dst_sample_stride : width,
- width, height);
- break;
- }
- case FOURCC_NV21: {
- uint8* dst_vu = dst_sample + width * height;
- r = I420ToNV21(y, y_stride,
- u, u_stride,
- v, v_stride,
- dst_sample,
- dst_sample_stride ? dst_sample_stride : width,
- dst_vu,
- dst_sample_stride ? dst_sample_stride : width,
- width, height);
- break;
- }
- // TODO(fbarchard): Add M420.
- // Triplanar formats
- // TODO(fbarchard): halfstride instead of halfwidth
- case FOURCC_I420:
- case FOURCC_YU12:
- case FOURCC_YV12: {
- int halfwidth = (width + 1) / 2;
- int halfheight = (height + 1) / 2;
- uint8* dst_u;
- uint8* dst_v;
- if (format == FOURCC_YV12) {
- dst_v = dst_sample + width * height;
- dst_u = dst_v + halfwidth * halfheight;
- } else {
- dst_u = dst_sample + width * height;
- dst_v = dst_u + halfwidth * halfheight;
- }
- r = I420Copy(y, y_stride,
- u, u_stride,
- v, v_stride,
- dst_sample, width,
- dst_u, halfwidth,
- dst_v, halfwidth,
- width, height);
- break;
- }
- case FOURCC_I422:
- case FOURCC_YV16: {
- int halfwidth = (width + 1) / 2;
- uint8* dst_u;
- uint8* dst_v;
- if (format == FOURCC_YV16) {
- dst_v = dst_sample + width * height;
- dst_u = dst_v + halfwidth * height;
- } else {
- dst_u = dst_sample + width * height;
- dst_v = dst_u + halfwidth * height;
- }
- r = I420ToI422(y, y_stride,
- u, u_stride,
- v, v_stride,
- dst_sample, width,
- dst_u, halfwidth,
- dst_v, halfwidth,
- width, height);
- break;
- }
- case FOURCC_I444:
- case FOURCC_YV24: {
- uint8* dst_u;
- uint8* dst_v;
- if (format == FOURCC_YV24) {
- dst_v = dst_sample + width * height;
- dst_u = dst_v + width * height;
- } else {
- dst_u = dst_sample + width * height;
- dst_v = dst_u + width * height;
- }
- r = I420ToI444(y, y_stride,
- u, u_stride,
- v, v_stride,
- dst_sample, width,
- dst_u, width,
- dst_v, width,
- width, height);
- break;
- }
- case FOURCC_I411: {
- int quarterwidth = (width + 3) / 4;
- uint8* dst_u = dst_sample + width * height;
- uint8* dst_v = dst_u + quarterwidth * height;
- r = I420ToI411(y, y_stride,
- u, u_stride,
- v, v_stride,
- dst_sample, width,
- dst_u, quarterwidth,
- dst_v, quarterwidth,
- width, height);
- break;
- }
-
- // Formats not supported - MJPG, biplanar, some rgb formats.
- default:
- return -1; // unknown fourcc - return failure code.
- }
- return r;
-}
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/third_party/aom/third_party/libyuv/source/convert_from_argb.cc b/third_party/aom/third_party/libyuv/source/convert_from_argb.cc
deleted file mode 100644
index 8d1e97aec..000000000
--- a/third_party/aom/third_party/libyuv/source/convert_from_argb.cc
+++ /dev/null
@@ -1,1301 +0,0 @@
-/*
- * Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/convert_from_argb.h"
-
-#include "libyuv/basic_types.h"
-#include "libyuv/cpu_id.h"
-#include "libyuv/planar_functions.h"
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// ARGB little endian (bgra in memory) to I444
-LIBYUV_API
-int ARGBToI444(const uint8* src_argb, int src_stride_argb,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- int y;
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
- ARGBToYRow_C;
- void (*ARGBToUV444Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int pix) = ARGBToUV444Row_C;
- if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
- return -1;
- }
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_argb == width * 4 &&
- dst_stride_y == width &&
- dst_stride_u == width &&
- dst_stride_v == width) {
- width *= height;
- height = 1;
- src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0;
- }
-#if defined(HAS_ARGBTOUV444ROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- ARGBToUV444Row = ARGBToUV444Row_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUV444Row = ARGBToUV444Row_SSSE3;
- }
- }
-#endif
-#if defined(HAS_ARGBTOUV444ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBToUV444Row = ARGBToUV444Row_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToUV444Row = ARGBToUV444Row_NEON;
- }
- }
-#endif
-#if defined(HAS_ARGBTOYROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- ARGBToYRow = ARGBToYRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToYRow = ARGBToYRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_ARGBTOYROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- ARGBToYRow = ARGBToYRow_Any_AVX2;
- if (IS_ALIGNED(width, 32)) {
- ARGBToYRow = ARGBToYRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_ARGBTOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBToYRow = ARGBToYRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToYRow = ARGBToYRow_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- ARGBToUV444Row(src_argb, dst_u, dst_v, width);
- ARGBToYRow(src_argb, dst_y, width);
- src_argb += src_stride_argb;
- dst_y += dst_stride_y;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
- }
- return 0;
-}
-
-// ARGB little endian (bgra in memory) to I422
-LIBYUV_API
-int ARGBToI422(const uint8* src_argb, int src_stride_argb,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- int y;
- void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int pix) = ARGBToUV422Row_C;
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
- ARGBToYRow_C;
- if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
- return -1;
- }
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_argb == width * 4 &&
- dst_stride_y == width &&
- dst_stride_u * 2 == width &&
- dst_stride_v * 2 == width) {
- width *= height;
- height = 1;
- src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0;
- }
-#if defined(HAS_ARGBTOUV422ROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUV422Row = ARGBToUV422Row_SSSE3;
- }
- }
-#endif
-#if defined(HAS_ARGBTOUV422ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUV422Row = ARGBToUV422Row_NEON;
- }
- }
-#endif
-#if defined(HAS_ARGBTOYROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- ARGBToYRow = ARGBToYRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToYRow = ARGBToYRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_ARGBTOYROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- ARGBToYRow = ARGBToYRow_Any_AVX2;
- if (IS_ALIGNED(width, 32)) {
- ARGBToYRow = ARGBToYRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_ARGBTOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBToYRow = ARGBToYRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToYRow = ARGBToYRow_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- ARGBToUV422Row(src_argb, dst_u, dst_v, width);
- ARGBToYRow(src_argb, dst_y, width);
- src_argb += src_stride_argb;
- dst_y += dst_stride_y;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
- }
- return 0;
-}
-
-// ARGB little endian (bgra in memory) to I411
-LIBYUV_API
-int ARGBToI411(const uint8* src_argb, int src_stride_argb,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- int y;
- void (*ARGBToUV411Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int pix) = ARGBToUV411Row_C;
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
- ARGBToYRow_C;
- if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
- return -1;
- }
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_argb == width * 4 &&
- dst_stride_y == width &&
- dst_stride_u * 4 == width &&
- dst_stride_v * 4 == width) {
- width *= height;
- height = 1;
- src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0;
- }
-#if defined(HAS_ARGBTOYROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- ARGBToYRow = ARGBToYRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToYRow = ARGBToYRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_ARGBTOYROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- ARGBToYRow = ARGBToYRow_Any_AVX2;
- if (IS_ALIGNED(width, 32)) {
- ARGBToYRow = ARGBToYRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_ARGBTOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBToYRow = ARGBToYRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToYRow = ARGBToYRow_NEON;
- }
- }
-#endif
-#if defined(HAS_ARGBTOUV411ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBToUV411Row = ARGBToUV411Row_Any_NEON;
- if (IS_ALIGNED(width, 32)) {
- ARGBToUV411Row = ARGBToUV411Row_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- ARGBToUV411Row(src_argb, dst_u, dst_v, width);
- ARGBToYRow(src_argb, dst_y, width);
- src_argb += src_stride_argb;
- dst_y += dst_stride_y;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
- }
- return 0;
-}
-
-LIBYUV_API
-int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_uv, int dst_stride_uv,
- int width, int height) {
- int y;
- int halfwidth = (width + 1) >> 1;
- void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
- ARGBToYRow_C;
- void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
- int width) = MergeUVRow_C;
- if (!src_argb ||
- !dst_y || !dst_uv ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
-#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
- ARGBToYRow = ARGBToYRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_SSSE3;
- ARGBToYRow = ARGBToYRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- ARGBToUVRow = ARGBToUVRow_Any_AVX2;
- ARGBToYRow = ARGBToYRow_Any_AVX2;
- if (IS_ALIGNED(width, 32)) {
- ARGBToUVRow = ARGBToUVRow_AVX2;
- ARGBToYRow = ARGBToYRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_ARGBTOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBToYRow = ARGBToYRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToYRow = ARGBToYRow_NEON;
- }
- }
-#endif
-#if defined(HAS_ARGBTOUVROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBToUVRow = ARGBToUVRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_NEON;
- }
- }
-#endif
-#if defined(HAS_MERGEUVROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- MergeUVRow_ = MergeUVRow_Any_SSE2;
- if (IS_ALIGNED(halfwidth, 16)) {
- MergeUVRow_ = MergeUVRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_MERGEUVROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- MergeUVRow_ = MergeUVRow_Any_AVX2;
- if (IS_ALIGNED(halfwidth, 32)) {
- MergeUVRow_ = MergeUVRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_MERGEUVROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- MergeUVRow_ = MergeUVRow_Any_NEON;
- if (IS_ALIGNED(halfwidth, 16)) {
- MergeUVRow_ = MergeUVRow_NEON;
- }
- }
-#endif
- {
- // Allocate a rows of uv.
- align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
- uint8* row_v = row_u + ((halfwidth + 31) & ~31);
-
- for (y = 0; y < height - 1; y += 2) {
- ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
- MergeUVRow_(row_u, row_v, dst_uv, halfwidth);
- ARGBToYRow(src_argb, dst_y, width);
- ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width);
- src_argb += src_stride_argb * 2;
- dst_y += dst_stride_y * 2;
- dst_uv += dst_stride_uv;
- }
- if (height & 1) {
- ARGBToUVRow(src_argb, 0, row_u, row_v, width);
- MergeUVRow_(row_u, row_v, dst_uv, halfwidth);
- ARGBToYRow(src_argb, dst_y, width);
- }
- free_aligned_buffer_64(row_u);
- }
- return 0;
-}
-
-// Same as NV12 but U and V swapped.
-LIBYUV_API
-int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_uv, int dst_stride_uv,
- int width, int height) {
- int y;
- int halfwidth = (width + 1) >> 1;
- void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
- ARGBToYRow_C;
- void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
- int width) = MergeUVRow_C;
- if (!src_argb ||
- !dst_y || !dst_uv ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
-#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
- ARGBToYRow = ARGBToYRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_SSSE3;
- ARGBToYRow = ARGBToYRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- ARGBToUVRow = ARGBToUVRow_Any_AVX2;
- ARGBToYRow = ARGBToYRow_Any_AVX2;
- if (IS_ALIGNED(width, 32)) {
- ARGBToUVRow = ARGBToUVRow_AVX2;
- ARGBToYRow = ARGBToYRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_ARGBTOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBToYRow = ARGBToYRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToYRow = ARGBToYRow_NEON;
- }
- }
-#endif
-#if defined(HAS_ARGBTOUVROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBToUVRow = ARGBToUVRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVRow = ARGBToUVRow_NEON;
- }
- }
-#endif
-#if defined(HAS_MERGEUVROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- MergeUVRow_ = MergeUVRow_Any_SSE2;
- if (IS_ALIGNED(halfwidth, 16)) {
- MergeUVRow_ = MergeUVRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_MERGEUVROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- MergeUVRow_ = MergeUVRow_Any_AVX2;
- if (IS_ALIGNED(halfwidth, 32)) {
- MergeUVRow_ = MergeUVRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_MERGEUVROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- MergeUVRow_ = MergeUVRow_Any_NEON;
- if (IS_ALIGNED(halfwidth, 16)) {
- MergeUVRow_ = MergeUVRow_NEON;
- }
- }
-#endif
- {
- // Allocate a rows of uv.
- align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
- uint8* row_v = row_u + ((halfwidth + 31) & ~31);
-
- for (y = 0; y < height - 1; y += 2) {
- ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
- MergeUVRow_(row_v, row_u, dst_uv, halfwidth);
- ARGBToYRow(src_argb, dst_y, width);
- ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width);
- src_argb += src_stride_argb * 2;
- dst_y += dst_stride_y * 2;
- dst_uv += dst_stride_uv;
- }
- if (height & 1) {
- ARGBToUVRow(src_argb, 0, row_u, row_v, width);
- MergeUVRow_(row_v, row_u, dst_uv, halfwidth);
- ARGBToYRow(src_argb, dst_y, width);
- }
- free_aligned_buffer_64(row_u);
- }
- return 0;
-}
-
-// Convert ARGB to YUY2.
-LIBYUV_API
-int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
- uint8* dst_yuy2, int dst_stride_yuy2,
- int width, int height) {
- int y;
- void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int pix) = ARGBToUV422Row_C;
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
- ARGBToYRow_C;
- void (*I422ToYUY2Row)(const uint8* src_y, const uint8* src_u,
- const uint8* src_v, uint8* dst_yuy2, int width) = I422ToYUY2Row_C;
-
- if (!src_argb || !dst_yuy2 ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2;
- dst_stride_yuy2 = -dst_stride_yuy2;
- }
- // Coalesce rows.
- if (src_stride_argb == width * 4 &&
- dst_stride_yuy2 == width * 2) {
- width *= height;
- height = 1;
- src_stride_argb = dst_stride_yuy2 = 0;
- }
-#if defined(HAS_ARGBTOUV422ROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUV422Row = ARGBToUV422Row_SSSE3;
- }
- }
-#endif
-#if defined(HAS_ARGBTOUV422ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUV422Row = ARGBToUV422Row_NEON;
- }
- }
-#endif
-#if defined(HAS_ARGBTOYROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- ARGBToYRow = ARGBToYRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToYRow = ARGBToYRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_ARGBTOYROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- ARGBToYRow = ARGBToYRow_Any_AVX2;
- if (IS_ALIGNED(width, 32)) {
- ARGBToYRow = ARGBToYRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_ARGBTOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBToYRow = ARGBToYRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToYRow = ARGBToYRow_NEON;
- }
- }
-#endif
-
-#if defined(HAS_I422TOYUY2ROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- I422ToYUY2Row = I422ToYUY2Row_Any_SSE2;
- if (IS_ALIGNED(width, 16)) {
- I422ToYUY2Row = I422ToYUY2Row_SSE2;
- }
- }
-#endif
-#if defined(HAS_I422TOYUY2ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- I422ToYUY2Row = I422ToYUY2Row_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- I422ToYUY2Row = I422ToYUY2Row_NEON;
- }
- }
-#endif
-
- {
- // Allocate a rows of yuv.
- align_buffer_64(row_y, ((width + 63) & ~63) * 2);
- uint8* row_u = row_y + ((width + 63) & ~63);
- uint8* row_v = row_u + ((width + 63) & ~63) / 2;
-
- for (y = 0; y < height; ++y) {
- ARGBToUV422Row(src_argb, row_u, row_v, width);
- ARGBToYRow(src_argb, row_y, width);
- I422ToYUY2Row(row_y, row_u, row_v, dst_yuy2, width);
- src_argb += src_stride_argb;
- dst_yuy2 += dst_stride_yuy2;
- }
-
- free_aligned_buffer_64(row_y);
- }
- return 0;
-}
-
-// Convert ARGB to UYVY.
-LIBYUV_API
-int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
- uint8* dst_uyvy, int dst_stride_uyvy,
- int width, int height) {
- int y;
- void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int pix) = ARGBToUV422Row_C;
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
- ARGBToYRow_C;
- void (*I422ToUYVYRow)(const uint8* src_y, const uint8* src_u,
- const uint8* src_v, uint8* dst_uyvy, int width) = I422ToUYVYRow_C;
-
- if (!src_argb || !dst_uyvy ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_uyvy = dst_uyvy + (height - 1) * dst_stride_uyvy;
- dst_stride_uyvy = -dst_stride_uyvy;
- }
- // Coalesce rows.
- if (src_stride_argb == width * 4 &&
- dst_stride_uyvy == width * 2) {
- width *= height;
- height = 1;
- src_stride_argb = dst_stride_uyvy = 0;
- }
-#if defined(HAS_ARGBTOUV422ROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUV422Row = ARGBToUV422Row_SSSE3;
- }
- }
-#endif
-#if defined(HAS_ARGBTOUV422ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUV422Row = ARGBToUV422Row_NEON;
- }
- }
-#endif
-#if defined(HAS_ARGBTOYROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- ARGBToYRow = ARGBToYRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToYRow = ARGBToYRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_ARGBTOYROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- ARGBToYRow = ARGBToYRow_Any_AVX2;
- if (IS_ALIGNED(width, 32)) {
- ARGBToYRow = ARGBToYRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_ARGBTOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBToYRow = ARGBToYRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToYRow = ARGBToYRow_NEON;
- }
- }
-#endif
-
-#if defined(HAS_I422TOUYVYROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- I422ToUYVYRow = I422ToUYVYRow_Any_SSE2;
- if (IS_ALIGNED(width, 16)) {
- I422ToUYVYRow = I422ToUYVYRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_I422TOUYVYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- I422ToUYVYRow = I422ToUYVYRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- I422ToUYVYRow = I422ToUYVYRow_NEON;
- }
- }
-#endif
-
- {
- // Allocate a rows of yuv.
- align_buffer_64(row_y, ((width + 63) & ~63) * 2);
- uint8* row_u = row_y + ((width + 63) & ~63);
- uint8* row_v = row_u + ((width + 63) & ~63) / 2;
-
- for (y = 0; y < height; ++y) {
- ARGBToUV422Row(src_argb, row_u, row_v, width);
- ARGBToYRow(src_argb, row_y, width);
- I422ToUYVYRow(row_y, row_u, row_v, dst_uyvy, width);
- src_argb += src_stride_argb;
- dst_uyvy += dst_stride_uyvy;
- }
-
- free_aligned_buffer_64(row_y);
- }
- return 0;
-}
-
-// Convert ARGB to I400.
-LIBYUV_API
-int ARGBToI400(const uint8* src_argb, int src_stride_argb,
- uint8* dst_y, int dst_stride_y,
- int width, int height) {
- int y;
- void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
- ARGBToYRow_C;
- if (!src_argb || !dst_y || width <= 0 || height == 0) {
- return -1;
- }
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_argb == width * 4 &&
- dst_stride_y == width) {
- width *= height;
- height = 1;
- src_stride_argb = dst_stride_y = 0;
- }
-#if defined(HAS_ARGBTOYROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- ARGBToYRow = ARGBToYRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToYRow = ARGBToYRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_ARGBTOYROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- ARGBToYRow = ARGBToYRow_Any_AVX2;
- if (IS_ALIGNED(width, 32)) {
- ARGBToYRow = ARGBToYRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_ARGBTOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBToYRow = ARGBToYRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToYRow = ARGBToYRow_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- ARGBToYRow(src_argb, dst_y, width);
- src_argb += src_stride_argb;
- dst_y += dst_stride_y;
- }
- return 0;
-}
-
-// Shuffle table for converting ARGB to RGBA.
-static uvec8 kShuffleMaskARGBToRGBA = {
- 3u, 0u, 1u, 2u, 7u, 4u, 5u, 6u, 11u, 8u, 9u, 10u, 15u, 12u, 13u, 14u
-};
-
-// Convert ARGB to RGBA.
-LIBYUV_API
-int ARGBToRGBA(const uint8* src_argb, int src_stride_argb,
- uint8* dst_rgba, int dst_stride_rgba,
- int width, int height) {
- return ARGBShuffle(src_argb, src_stride_argb,
- dst_rgba, dst_stride_rgba,
- (const uint8*)(&kShuffleMaskARGBToRGBA),
- width, height);
-}
-
-// Convert ARGB To RGB24.
-LIBYUV_API
-int ARGBToRGB24(const uint8* src_argb, int src_stride_argb,
- uint8* dst_rgb24, int dst_stride_rgb24,
- int width, int height) {
- int y;
- void (*ARGBToRGB24Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
- ARGBToRGB24Row_C;
- if (!src_argb || !dst_rgb24 || width <= 0 || height == 0) {
- return -1;
- }
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_argb == width * 4 &&
- dst_stride_rgb24 == width * 3) {
- width *= height;
- height = 1;
- src_stride_argb = dst_stride_rgb24 = 0;
- }
-#if defined(HAS_ARGBTORGB24ROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- ARGBToRGB24Row = ARGBToRGB24Row_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToRGB24Row = ARGBToRGB24Row_SSSE3;
- }
- }
-#endif
-#if defined(HAS_ARGBTORGB24ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBToRGB24Row = ARGBToRGB24Row_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToRGB24Row = ARGBToRGB24Row_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- ARGBToRGB24Row(src_argb, dst_rgb24, width);
- src_argb += src_stride_argb;
- dst_rgb24 += dst_stride_rgb24;
- }
- return 0;
-}
-
-// Convert ARGB To RAW.
-LIBYUV_API
-int ARGBToRAW(const uint8* src_argb, int src_stride_argb,
- uint8* dst_raw, int dst_stride_raw,
- int width, int height) {
- int y;
- void (*ARGBToRAWRow)(const uint8* src_argb, uint8* dst_rgb, int pix) =
- ARGBToRAWRow_C;
- if (!src_argb || !dst_raw || width <= 0 || height == 0) {
- return -1;
- }
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_argb == width * 4 &&
- dst_stride_raw == width * 3) {
- width *= height;
- height = 1;
- src_stride_argb = dst_stride_raw = 0;
- }
-#if defined(HAS_ARGBTORAWROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- ARGBToRAWRow = ARGBToRAWRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToRAWRow = ARGBToRAWRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_ARGBTORAWROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBToRAWRow = ARGBToRAWRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToRAWRow = ARGBToRAWRow_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- ARGBToRAWRow(src_argb, dst_raw, width);
- src_argb += src_stride_argb;
- dst_raw += dst_stride_raw;
- }
- return 0;
-}
-
-// Ordered 8x8 dither for 888 to 565. Values from 0 to 7.
-static const uint8 kDither565_4x4[16] = {
- 0, 4, 1, 5,
- 6, 2, 7, 3,
- 1, 5, 0, 4,
- 7, 3, 6, 2,
-};
-
-// Convert ARGB To RGB565 with 4x4 dither matrix (16 bytes).
-LIBYUV_API
-int ARGBToRGB565Dither(const uint8* src_argb, int src_stride_argb,
- uint8* dst_rgb565, int dst_stride_rgb565,
- const uint8* dither4x4, int width, int height) {
- int y;
- void (*ARGBToRGB565DitherRow)(const uint8* src_argb, uint8* dst_rgb,
- const uint32 dither4, int pix) = ARGBToRGB565DitherRow_C;
- if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) {
- return -1;
- }
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
- if (!dither4x4) {
- dither4x4 = kDither565_4x4;
- }
-#if defined(HAS_ARGBTORGB565DITHERROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_SSE2;
- if (IS_ALIGNED(width, 4)) {
- ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_ARGBTORGB565DITHERROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_AVX2;
- if (IS_ALIGNED(width, 8)) {
- ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_ARGBTORGB565DITHERROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_NEON;
- }
- }
-#endif
- for (y = 0; y < height; ++y) {
- ARGBToRGB565DitherRow(src_argb, dst_rgb565,
- *(uint32*)(dither4x4 + ((y & 3) << 2)), width);
- src_argb += src_stride_argb;
- dst_rgb565 += dst_stride_rgb565;
- }
- return 0;
-}
-
-// Convert ARGB To RGB565.
-// TODO(fbarchard): Consider using dither function low level with zeros.
-LIBYUV_API
-int ARGBToRGB565(const uint8* src_argb, int src_stride_argb,
- uint8* dst_rgb565, int dst_stride_rgb565,
- int width, int height) {
- int y;
- void (*ARGBToRGB565Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
- ARGBToRGB565Row_C;
- if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) {
- return -1;
- }
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_argb == width * 4 &&
- dst_stride_rgb565 == width * 2) {
- width *= height;
- height = 1;
- src_stride_argb = dst_stride_rgb565 = 0;
- }
-#if defined(HAS_ARGBTORGB565ROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- ARGBToRGB565Row = ARGBToRGB565Row_Any_SSE2;
- if (IS_ALIGNED(width, 4)) {
- ARGBToRGB565Row = ARGBToRGB565Row_SSE2;
- }
- }
-#endif
-#if defined(HAS_ARGBTORGB565ROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- ARGBToRGB565Row = ARGBToRGB565Row_Any_AVX2;
- if (IS_ALIGNED(width, 8)) {
- ARGBToRGB565Row = ARGBToRGB565Row_AVX2;
- }
- }
-#endif
-#if defined(HAS_ARGBTORGB565ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBToRGB565Row = ARGBToRGB565Row_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToRGB565Row = ARGBToRGB565Row_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- ARGBToRGB565Row(src_argb, dst_rgb565, width);
- src_argb += src_stride_argb;
- dst_rgb565 += dst_stride_rgb565;
- }
- return 0;
-}
-
-// Convert ARGB To ARGB1555.
-LIBYUV_API
-int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb1555, int dst_stride_argb1555,
- int width, int height) {
- int y;
- void (*ARGBToARGB1555Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
- ARGBToARGB1555Row_C;
- if (!src_argb || !dst_argb1555 || width <= 0 || height == 0) {
- return -1;
- }
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_argb == width * 4 &&
- dst_stride_argb1555 == width * 2) {
- width *= height;
- height = 1;
- src_stride_argb = dst_stride_argb1555 = 0;
- }
-#if defined(HAS_ARGBTOARGB1555ROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- ARGBToARGB1555Row = ARGBToARGB1555Row_Any_SSE2;
- if (IS_ALIGNED(width, 4)) {
- ARGBToARGB1555Row = ARGBToARGB1555Row_SSE2;
- }
- }
-#endif
-#if defined(HAS_ARGBTOARGB1555ROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- ARGBToARGB1555Row = ARGBToARGB1555Row_Any_AVX2;
- if (IS_ALIGNED(width, 8)) {
- ARGBToARGB1555Row = ARGBToARGB1555Row_AVX2;
- }
- }
-#endif
-#if defined(HAS_ARGBTOARGB1555ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBToARGB1555Row = ARGBToARGB1555Row_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToARGB1555Row = ARGBToARGB1555Row_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- ARGBToARGB1555Row(src_argb, dst_argb1555, width);
- src_argb += src_stride_argb;
- dst_argb1555 += dst_stride_argb1555;
- }
- return 0;
-}
-
-// Convert ARGB To ARGB4444.
-LIBYUV_API
-int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb4444, int dst_stride_argb4444,
- int width, int height) {
- int y;
- void (*ARGBToARGB4444Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
- ARGBToARGB4444Row_C;
- if (!src_argb || !dst_argb4444 || width <= 0 || height == 0) {
- return -1;
- }
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_argb == width * 4 &&
- dst_stride_argb4444 == width * 2) {
- width *= height;
- height = 1;
- src_stride_argb = dst_stride_argb4444 = 0;
- }
-#if defined(HAS_ARGBTOARGB4444ROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- ARGBToARGB4444Row = ARGBToARGB4444Row_Any_SSE2;
- if (IS_ALIGNED(width, 4)) {
- ARGBToARGB4444Row = ARGBToARGB4444Row_SSE2;
- }
- }
-#endif
-#if defined(HAS_ARGBTOARGB4444ROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- ARGBToARGB4444Row = ARGBToARGB4444Row_Any_AVX2;
- if (IS_ALIGNED(width, 8)) {
- ARGBToARGB4444Row = ARGBToARGB4444Row_AVX2;
- }
- }
-#endif
-#if defined(HAS_ARGBTOARGB4444ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBToARGB4444Row = ARGBToARGB4444Row_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToARGB4444Row = ARGBToARGB4444Row_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- ARGBToARGB4444Row(src_argb, dst_argb4444, width);
- src_argb += src_stride_argb;
- dst_argb4444 += dst_stride_argb4444;
- }
- return 0;
-}
-
-// Convert ARGB to J420. (JPeg full range I420).
-LIBYUV_API
-int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
- uint8* dst_yj, int dst_stride_yj,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- int y;
- void (*ARGBToUVJRow)(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) = ARGBToUVJRow_C;
- void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int pix) =
- ARGBToYJRow_C;
- if (!src_argb ||
- !dst_yj || !dst_u || !dst_v ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
-#if defined(HAS_ARGBTOYJROW_SSSE3) && defined(HAS_ARGBTOUVJROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3;
- ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVJRow = ARGBToUVJRow_SSSE3;
- ARGBToYJRow = ARGBToYJRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_ARGBTOYJROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- ARGBToYJRow = ARGBToYJRow_Any_AVX2;
- if (IS_ALIGNED(width, 32)) {
- ARGBToYJRow = ARGBToYJRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_ARGBTOYJROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBToYJRow = ARGBToYJRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToYJRow = ARGBToYJRow_NEON;
- }
- }
-#endif
-#if defined(HAS_ARGBTOUVJROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBToUVJRow = ARGBToUVJRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVJRow = ARGBToUVJRow_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height - 1; y += 2) {
- ARGBToUVJRow(src_argb, src_stride_argb, dst_u, dst_v, width);
- ARGBToYJRow(src_argb, dst_yj, width);
- ARGBToYJRow(src_argb + src_stride_argb, dst_yj + dst_stride_yj, width);
- src_argb += src_stride_argb * 2;
- dst_yj += dst_stride_yj * 2;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
- }
- if (height & 1) {
- ARGBToUVJRow(src_argb, 0, dst_u, dst_v, width);
- ARGBToYJRow(src_argb, dst_yj, width);
- }
- return 0;
-}
-
-// ARGB little endian (bgra in memory) to J422
-LIBYUV_API
-int ARGBToJ422(const uint8* src_argb, int src_stride_argb,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- int y;
- void (*ARGBToUVJ422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int pix) = ARGBToUVJ422Row_C;
- void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_y, int pix) =
- ARGBToYJRow_C;
- if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
- return -1;
- }
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_argb == width * 4 &&
- dst_stride_y == width &&
- dst_stride_u * 2 == width &&
- dst_stride_v * 2 == width) {
- width *= height;
- height = 1;
- src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0;
- }
-#if defined(HAS_ARGBTOUVJ422ROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- ARGBToUVJ422Row = ARGBToUVJ422Row_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVJ422Row = ARGBToUVJ422Row_SSSE3;
- }
- }
-#endif
-#if defined(HAS_ARGBTOUVJ422ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBToUVJ422Row = ARGBToUVJ422Row_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- ARGBToUVJ422Row = ARGBToUVJ422Row_NEON;
- }
- }
-#endif
-
-#if defined(HAS_ARGBTOYJROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToYJRow = ARGBToYJRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_ARGBTOYJROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- ARGBToYJRow = ARGBToYJRow_Any_AVX2;
- if (IS_ALIGNED(width, 32)) {
- ARGBToYJRow = ARGBToYJRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_ARGBTOYJROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBToYJRow = ARGBToYJRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToYJRow = ARGBToYJRow_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- ARGBToUVJ422Row(src_argb, dst_u, dst_v, width);
- ARGBToYJRow(src_argb, dst_y, width);
- src_argb += src_stride_argb;
- dst_y += dst_stride_y;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
- }
- return 0;
-}
-
-// Convert ARGB to J400.
-LIBYUV_API
-int ARGBToJ400(const uint8* src_argb, int src_stride_argb,
- uint8* dst_yj, int dst_stride_yj,
- int width, int height) {
- int y;
- void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int pix) =
- ARGBToYJRow_C;
- if (!src_argb || !dst_yj || width <= 0 || height == 0) {
- return -1;
- }
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_argb == width * 4 &&
- dst_stride_yj == width) {
- width *= height;
- height = 1;
- src_stride_argb = dst_stride_yj = 0;
- }
-#if defined(HAS_ARGBTOYJROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToYJRow = ARGBToYJRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_ARGBTOYJROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- ARGBToYJRow = ARGBToYJRow_Any_AVX2;
- if (IS_ALIGNED(width, 32)) {
- ARGBToYJRow = ARGBToYJRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_ARGBTOYJROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBToYJRow = ARGBToYJRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToYJRow = ARGBToYJRow_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- ARGBToYJRow(src_argb, dst_yj, width);
- src_argb += src_stride_argb;
- dst_yj += dst_stride_yj;
- }
- return 0;
-}
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/third_party/aom/third_party/libyuv/source/convert_jpeg.cc b/third_party/aom/third_party/libyuv/source/convert_jpeg.cc
deleted file mode 100644
index bcb980f7f..000000000
--- a/third_party/aom/third_party/libyuv/source/convert_jpeg.cc
+++ /dev/null
@@ -1,392 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/convert.h"
-
-#ifdef HAVE_JPEG
-#include "libyuv/mjpeg_decoder.h"
-#endif
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-#ifdef HAVE_JPEG
-struct I420Buffers {
- uint8* y;
- int y_stride;
- uint8* u;
- int u_stride;
- uint8* v;
- int v_stride;
- int w;
- int h;
-};
-
-static void JpegCopyI420(void* opaque,
- const uint8* const* data,
- const int* strides,
- int rows) {
- I420Buffers* dest = (I420Buffers*)(opaque);
- I420Copy(data[0], strides[0],
- data[1], strides[1],
- data[2], strides[2],
- dest->y, dest->y_stride,
- dest->u, dest->u_stride,
- dest->v, dest->v_stride,
- dest->w, rows);
- dest->y += rows * dest->y_stride;
- dest->u += ((rows + 1) >> 1) * dest->u_stride;
- dest->v += ((rows + 1) >> 1) * dest->v_stride;
- dest->h -= rows;
-}
-
-static void JpegI422ToI420(void* opaque,
- const uint8* const* data,
- const int* strides,
- int rows) {
- I420Buffers* dest = (I420Buffers*)(opaque);
- I422ToI420(data[0], strides[0],
- data[1], strides[1],
- data[2], strides[2],
- dest->y, dest->y_stride,
- dest->u, dest->u_stride,
- dest->v, dest->v_stride,
- dest->w, rows);
- dest->y += rows * dest->y_stride;
- dest->u += ((rows + 1) >> 1) * dest->u_stride;
- dest->v += ((rows + 1) >> 1) * dest->v_stride;
- dest->h -= rows;
-}
-
-static void JpegI444ToI420(void* opaque,
- const uint8* const* data,
- const int* strides,
- int rows) {
- I420Buffers* dest = (I420Buffers*)(opaque);
- I444ToI420(data[0], strides[0],
- data[1], strides[1],
- data[2], strides[2],
- dest->y, dest->y_stride,
- dest->u, dest->u_stride,
- dest->v, dest->v_stride,
- dest->w, rows);
- dest->y += rows * dest->y_stride;
- dest->u += ((rows + 1) >> 1) * dest->u_stride;
- dest->v += ((rows + 1) >> 1) * dest->v_stride;
- dest->h -= rows;
-}
-
-static void JpegI411ToI420(void* opaque,
- const uint8* const* data,
- const int* strides,
- int rows) {
- I420Buffers* dest = (I420Buffers*)(opaque);
- I411ToI420(data[0], strides[0],
- data[1], strides[1],
- data[2], strides[2],
- dest->y, dest->y_stride,
- dest->u, dest->u_stride,
- dest->v, dest->v_stride,
- dest->w, rows);
- dest->y += rows * dest->y_stride;
- dest->u += ((rows + 1) >> 1) * dest->u_stride;
- dest->v += ((rows + 1) >> 1) * dest->v_stride;
- dest->h -= rows;
-}
-
-static void JpegI400ToI420(void* opaque,
- const uint8* const* data,
- const int* strides,
- int rows) {
- I420Buffers* dest = (I420Buffers*)(opaque);
- I400ToI420(data[0], strides[0],
- dest->y, dest->y_stride,
- dest->u, dest->u_stride,
- dest->v, dest->v_stride,
- dest->w, rows);
- dest->y += rows * dest->y_stride;
- dest->u += ((rows + 1) >> 1) * dest->u_stride;
- dest->v += ((rows + 1) >> 1) * dest->v_stride;
- dest->h -= rows;
-}
-
-// Query size of MJPG in pixels.
-LIBYUV_API
-int MJPGSize(const uint8* sample, size_t sample_size,
- int* width, int* height) {
- MJpegDecoder mjpeg_decoder;
- LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size);
- if (ret) {
- *width = mjpeg_decoder.GetWidth();
- *height = mjpeg_decoder.GetHeight();
- }
- mjpeg_decoder.UnloadFrame();
- return ret ? 0 : -1; // -1 for runtime failure.
-}
-
-// MJPG (Motion JPeg) to I420
-// TODO(fbarchard): review w and h requirement. dw and dh may be enough.
-LIBYUV_API
-int MJPGToI420(const uint8* sample,
- size_t sample_size,
- uint8* y, int y_stride,
- uint8* u, int u_stride,
- uint8* v, int v_stride,
- int w, int h,
- int dw, int dh) {
- if (sample_size == kUnknownDataSize) {
- // ERROR: MJPEG frame size unknown
- return -1;
- }
-
- // TODO(fbarchard): Port MJpeg to C.
- MJpegDecoder mjpeg_decoder;
- LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size);
- if (ret && (mjpeg_decoder.GetWidth() != w ||
- mjpeg_decoder.GetHeight() != h)) {
- // ERROR: MJPEG frame has unexpected dimensions
- mjpeg_decoder.UnloadFrame();
- return 1; // runtime failure
- }
- if (ret) {
- I420Buffers bufs = { y, y_stride, u, u_stride, v, v_stride, dw, dh };
- // YUV420
- if (mjpeg_decoder.GetColorSpace() ==
- MJpegDecoder::kColorSpaceYCbCr &&
- mjpeg_decoder.GetNumComponents() == 3 &&
- mjpeg_decoder.GetVertSampFactor(0) == 2 &&
- mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
- mjpeg_decoder.GetVertSampFactor(1) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
- mjpeg_decoder.GetVertSampFactor(2) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(2) == 1) {
- ret = mjpeg_decoder.DecodeToCallback(&JpegCopyI420, &bufs, dw, dh);
- // YUV422
- } else if (mjpeg_decoder.GetColorSpace() ==
- MJpegDecoder::kColorSpaceYCbCr &&
- mjpeg_decoder.GetNumComponents() == 3 &&
- mjpeg_decoder.GetVertSampFactor(0) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
- mjpeg_decoder.GetVertSampFactor(1) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
- mjpeg_decoder.GetVertSampFactor(2) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(2) == 1) {
- ret = mjpeg_decoder.DecodeToCallback(&JpegI422ToI420, &bufs, dw, dh);
- // YUV444
- } else if (mjpeg_decoder.GetColorSpace() ==
- MJpegDecoder::kColorSpaceYCbCr &&
- mjpeg_decoder.GetNumComponents() == 3 &&
- mjpeg_decoder.GetVertSampFactor(0) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(0) == 1 &&
- mjpeg_decoder.GetVertSampFactor(1) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
- mjpeg_decoder.GetVertSampFactor(2) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(2) == 1) {
- ret = mjpeg_decoder.DecodeToCallback(&JpegI444ToI420, &bufs, dw, dh);
- // YUV411
- } else if (mjpeg_decoder.GetColorSpace() ==
- MJpegDecoder::kColorSpaceYCbCr &&
- mjpeg_decoder.GetNumComponents() == 3 &&
- mjpeg_decoder.GetVertSampFactor(0) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(0) == 4 &&
- mjpeg_decoder.GetVertSampFactor(1) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
- mjpeg_decoder.GetVertSampFactor(2) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(2) == 1) {
- ret = mjpeg_decoder.DecodeToCallback(&JpegI411ToI420, &bufs, dw, dh);
- // YUV400
- } else if (mjpeg_decoder.GetColorSpace() ==
- MJpegDecoder::kColorSpaceGrayscale &&
- mjpeg_decoder.GetNumComponents() == 1 &&
- mjpeg_decoder.GetVertSampFactor(0) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(0) == 1) {
- ret = mjpeg_decoder.DecodeToCallback(&JpegI400ToI420, &bufs, dw, dh);
- } else {
- // TODO(fbarchard): Implement conversion for any other colorspace/sample
- // factors that occur in practice. 411 is supported by libjpeg
- // ERROR: Unable to convert MJPEG frame because format is not supported
- mjpeg_decoder.UnloadFrame();
- return 1;
- }
- }
- return ret ? 0 : 1;
-}
-
-#ifdef HAVE_JPEG
-struct ARGBBuffers {
- uint8* argb;
- int argb_stride;
- int w;
- int h;
-};
-
-static void JpegI420ToARGB(void* opaque,
- const uint8* const* data,
- const int* strides,
- int rows) {
- ARGBBuffers* dest = (ARGBBuffers*)(opaque);
- I420ToARGB(data[0], strides[0],
- data[1], strides[1],
- data[2], strides[2],
- dest->argb, dest->argb_stride,
- dest->w, rows);
- dest->argb += rows * dest->argb_stride;
- dest->h -= rows;
-}
-
-static void JpegI422ToARGB(void* opaque,
- const uint8* const* data,
- const int* strides,
- int rows) {
- ARGBBuffers* dest = (ARGBBuffers*)(opaque);
- I422ToARGB(data[0], strides[0],
- data[1], strides[1],
- data[2], strides[2],
- dest->argb, dest->argb_stride,
- dest->w, rows);
- dest->argb += rows * dest->argb_stride;
- dest->h -= rows;
-}
-
-static void JpegI444ToARGB(void* opaque,
- const uint8* const* data,
- const int* strides,
- int rows) {
- ARGBBuffers* dest = (ARGBBuffers*)(opaque);
- I444ToARGB(data[0], strides[0],
- data[1], strides[1],
- data[2], strides[2],
- dest->argb, dest->argb_stride,
- dest->w, rows);
- dest->argb += rows * dest->argb_stride;
- dest->h -= rows;
-}
-
-static void JpegI411ToARGB(void* opaque,
- const uint8* const* data,
- const int* strides,
- int rows) {
- ARGBBuffers* dest = (ARGBBuffers*)(opaque);
- I411ToARGB(data[0], strides[0],
- data[1], strides[1],
- data[2], strides[2],
- dest->argb, dest->argb_stride,
- dest->w, rows);
- dest->argb += rows * dest->argb_stride;
- dest->h -= rows;
-}
-
-static void JpegI400ToARGB(void* opaque,
- const uint8* const* data,
- const int* strides,
- int rows) {
- ARGBBuffers* dest = (ARGBBuffers*)(opaque);
- I400ToARGB(data[0], strides[0],
- dest->argb, dest->argb_stride,
- dest->w, rows);
- dest->argb += rows * dest->argb_stride;
- dest->h -= rows;
-}
-
-// MJPG (Motion JPeg) to ARGB
-// TODO(fbarchard): review w and h requirement. dw and dh may be enough.
-LIBYUV_API
-int MJPGToARGB(const uint8* sample,
- size_t sample_size,
- uint8* argb, int argb_stride,
- int w, int h,
- int dw, int dh) {
- if (sample_size == kUnknownDataSize) {
- // ERROR: MJPEG frame size unknown
- return -1;
- }
-
- // TODO(fbarchard): Port MJpeg to C.
- MJpegDecoder mjpeg_decoder;
- LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size);
- if (ret && (mjpeg_decoder.GetWidth() != w ||
- mjpeg_decoder.GetHeight() != h)) {
- // ERROR: MJPEG frame has unexpected dimensions
- mjpeg_decoder.UnloadFrame();
- return 1; // runtime failure
- }
- if (ret) {
- ARGBBuffers bufs = { argb, argb_stride, dw, dh };
- // YUV420
- if (mjpeg_decoder.GetColorSpace() ==
- MJpegDecoder::kColorSpaceYCbCr &&
- mjpeg_decoder.GetNumComponents() == 3 &&
- mjpeg_decoder.GetVertSampFactor(0) == 2 &&
- mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
- mjpeg_decoder.GetVertSampFactor(1) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
- mjpeg_decoder.GetVertSampFactor(2) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(2) == 1) {
- ret = mjpeg_decoder.DecodeToCallback(&JpegI420ToARGB, &bufs, dw, dh);
- // YUV422
- } else if (mjpeg_decoder.GetColorSpace() ==
- MJpegDecoder::kColorSpaceYCbCr &&
- mjpeg_decoder.GetNumComponents() == 3 &&
- mjpeg_decoder.GetVertSampFactor(0) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
- mjpeg_decoder.GetVertSampFactor(1) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
- mjpeg_decoder.GetVertSampFactor(2) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(2) == 1) {
- ret = mjpeg_decoder.DecodeToCallback(&JpegI422ToARGB, &bufs, dw, dh);
- // YUV444
- } else if (mjpeg_decoder.GetColorSpace() ==
- MJpegDecoder::kColorSpaceYCbCr &&
- mjpeg_decoder.GetNumComponents() == 3 &&
- mjpeg_decoder.GetVertSampFactor(0) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(0) == 1 &&
- mjpeg_decoder.GetVertSampFactor(1) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
- mjpeg_decoder.GetVertSampFactor(2) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(2) == 1) {
- ret = mjpeg_decoder.DecodeToCallback(&JpegI444ToARGB, &bufs, dw, dh);
- // YUV411
- } else if (mjpeg_decoder.GetColorSpace() ==
- MJpegDecoder::kColorSpaceYCbCr &&
- mjpeg_decoder.GetNumComponents() == 3 &&
- mjpeg_decoder.GetVertSampFactor(0) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(0) == 4 &&
- mjpeg_decoder.GetVertSampFactor(1) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
- mjpeg_decoder.GetVertSampFactor(2) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(2) == 1) {
- ret = mjpeg_decoder.DecodeToCallback(&JpegI411ToARGB, &bufs, dw, dh);
- // YUV400
- } else if (mjpeg_decoder.GetColorSpace() ==
- MJpegDecoder::kColorSpaceGrayscale &&
- mjpeg_decoder.GetNumComponents() == 1 &&
- mjpeg_decoder.GetVertSampFactor(0) == 1 &&
- mjpeg_decoder.GetHorizSampFactor(0) == 1) {
- ret = mjpeg_decoder.DecodeToCallback(&JpegI400ToARGB, &bufs, dw, dh);
- } else {
- // TODO(fbarchard): Implement conversion for any other colorspace/sample
- // factors that occur in practice. 411 is supported by libjpeg
- // ERROR: Unable to convert MJPEG frame because format is not supported
- mjpeg_decoder.UnloadFrame();
- return 1;
- }
- }
- return ret ? 0 : 1;
-}
-#endif
-
-#endif
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/third_party/aom/third_party/libyuv/source/convert_to_argb.cc b/third_party/aom/third_party/libyuv/source/convert_to_argb.cc
deleted file mode 100644
index af829fbd3..000000000
--- a/third_party/aom/third_party/libyuv/source/convert_to_argb.cc
+++ /dev/null
@@ -1,306 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/convert_argb.h"
-
-#include "libyuv/cpu_id.h"
-#ifdef HAVE_JPEG
-#include "libyuv/mjpeg_decoder.h"
-#endif
-#include "libyuv/rotate_argb.h"
-#include "libyuv/row.h"
-#include "libyuv/video_common.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// Convert camera sample to I420 with cropping, rotation and vertical flip.
-// src_width is used for source stride computation
-// src_height is used to compute location of planes, and indicate inversion
-// sample_size is measured in bytes and is the size of the frame.
-// With MJPEG it is the compressed size of the frame.
-LIBYUV_API
-int ConvertToARGB(const uint8* sample, size_t sample_size,
- uint8* crop_argb, int argb_stride,
- int crop_x, int crop_y,
- int src_width, int src_height,
- int crop_width, int crop_height,
- enum RotationMode rotation,
- uint32 fourcc) {
- uint32 format = CanonicalFourCC(fourcc);
- int aligned_src_width = (src_width + 1) & ~1;
- const uint8* src;
- const uint8* src_uv;
- int abs_src_height = (src_height < 0) ? -src_height : src_height;
- int inv_crop_height = (crop_height < 0) ? -crop_height : crop_height;
- int r = 0;
-
- // One pass rotation is available for some formats. For the rest, convert
- // to I420 (with optional vertical flipping) into a temporary I420 buffer,
- // and then rotate the I420 to the final destination buffer.
- // For in-place conversion, if destination crop_argb is same as source sample,
- // also enable temporary buffer.
- LIBYUV_BOOL need_buf = (rotation && format != FOURCC_ARGB) ||
- crop_argb == sample;
- uint8* tmp_argb = crop_argb;
- int tmp_argb_stride = argb_stride;
- uint8* rotate_buffer = NULL;
- int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height;
-
- if (crop_argb == NULL || sample == NULL ||
- src_width <= 0 || crop_width <= 0 ||
- src_height == 0 || crop_height == 0) {
- return -1;
- }
- if (src_height < 0) {
- inv_crop_height = -inv_crop_height;
- }
-
- if (need_buf) {
- int argb_size = crop_width * abs_crop_height * 4;
- rotate_buffer = (uint8*)malloc(argb_size);
- if (!rotate_buffer) {
- return 1; // Out of memory runtime error.
- }
- crop_argb = rotate_buffer;
- argb_stride = crop_width;
- }
-
- switch (format) {
- // Single plane formats
- case FOURCC_YUY2:
- src = sample + (aligned_src_width * crop_y + crop_x) * 2;
- r = YUY2ToARGB(src, aligned_src_width * 2,
- crop_argb, argb_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_UYVY:
- src = sample + (aligned_src_width * crop_y + crop_x) * 2;
- r = UYVYToARGB(src, aligned_src_width * 2,
- crop_argb, argb_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_24BG:
- src = sample + (src_width * crop_y + crop_x) * 3;
- r = RGB24ToARGB(src, src_width * 3,
- crop_argb, argb_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_RAW:
- src = sample + (src_width * crop_y + crop_x) * 3;
- r = RAWToARGB(src, src_width * 3,
- crop_argb, argb_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_ARGB:
- src = sample + (src_width * crop_y + crop_x) * 4;
- r = ARGBToARGB(src, src_width * 4,
- crop_argb, argb_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_BGRA:
- src = sample + (src_width * crop_y + crop_x) * 4;
- r = BGRAToARGB(src, src_width * 4,
- crop_argb, argb_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_ABGR:
- src = sample + (src_width * crop_y + crop_x) * 4;
- r = ABGRToARGB(src, src_width * 4,
- crop_argb, argb_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_RGBA:
- src = sample + (src_width * crop_y + crop_x) * 4;
- r = RGBAToARGB(src, src_width * 4,
- crop_argb, argb_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_RGBP:
- src = sample + (src_width * crop_y + crop_x) * 2;
- r = RGB565ToARGB(src, src_width * 2,
- crop_argb, argb_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_RGBO:
- src = sample + (src_width * crop_y + crop_x) * 2;
- r = ARGB1555ToARGB(src, src_width * 2,
- crop_argb, argb_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_R444:
- src = sample + (src_width * crop_y + crop_x) * 2;
- r = ARGB4444ToARGB(src, src_width * 2,
- crop_argb, argb_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_I400:
- src = sample + src_width * crop_y + crop_x;
- r = I400ToARGB(src, src_width,
- crop_argb, argb_stride,
- crop_width, inv_crop_height);
- break;
-
- // Biplanar formats
- case FOURCC_NV12:
- src = sample + (src_width * crop_y + crop_x);
- src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x;
- r = NV12ToARGB(src, src_width,
- src_uv, aligned_src_width,
- crop_argb, argb_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_NV21:
- src = sample + (src_width * crop_y + crop_x);
- src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x;
- // Call NV12 but with u and v parameters swapped.
- r = NV21ToARGB(src, src_width,
- src_uv, aligned_src_width,
- crop_argb, argb_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_M420:
- src = sample + (src_width * crop_y) * 12 / 8 + crop_x;
- r = M420ToARGB(src, src_width,
- crop_argb, argb_stride,
- crop_width, inv_crop_height);
- break;
- // Triplanar formats
- case FOURCC_I420:
- case FOURCC_YU12:
- case FOURCC_YV12: {
- const uint8* src_y = sample + (src_width * crop_y + crop_x);
- const uint8* src_u;
- const uint8* src_v;
- int halfwidth = (src_width + 1) / 2;
- int halfheight = (abs_src_height + 1) / 2;
- if (format == FOURCC_YV12) {
- src_v = sample + src_width * abs_src_height +
- (halfwidth * crop_y + crop_x) / 2;
- src_u = sample + src_width * abs_src_height +
- halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
- } else {
- src_u = sample + src_width * abs_src_height +
- (halfwidth * crop_y + crop_x) / 2;
- src_v = sample + src_width * abs_src_height +
- halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
- }
- r = I420ToARGB(src_y, src_width,
- src_u, halfwidth,
- src_v, halfwidth,
- crop_argb, argb_stride,
- crop_width, inv_crop_height);
- break;
- }
-
- case FOURCC_J420: {
- const uint8* src_y = sample + (src_width * crop_y + crop_x);
- const uint8* src_u;
- const uint8* src_v;
- int halfwidth = (src_width + 1) / 2;
- int halfheight = (abs_src_height + 1) / 2;
- src_u = sample + src_width * abs_src_height +
- (halfwidth * crop_y + crop_x) / 2;
- src_v = sample + src_width * abs_src_height +
- halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
- r = J420ToARGB(src_y, src_width,
- src_u, halfwidth,
- src_v, halfwidth,
- crop_argb, argb_stride,
- crop_width, inv_crop_height);
- break;
- }
-
- case FOURCC_I422:
- case FOURCC_YV16: {
- const uint8* src_y = sample + src_width * crop_y + crop_x;
- const uint8* src_u;
- const uint8* src_v;
- int halfwidth = (src_width + 1) / 2;
- if (format == FOURCC_YV16) {
- src_v = sample + src_width * abs_src_height +
- halfwidth * crop_y + crop_x / 2;
- src_u = sample + src_width * abs_src_height +
- halfwidth * (abs_src_height + crop_y) + crop_x / 2;
- } else {
- src_u = sample + src_width * abs_src_height +
- halfwidth * crop_y + crop_x / 2;
- src_v = sample + src_width * abs_src_height +
- halfwidth * (abs_src_height + crop_y) + crop_x / 2;
- }
- r = I422ToARGB(src_y, src_width,
- src_u, halfwidth,
- src_v, halfwidth,
- crop_argb, argb_stride,
- crop_width, inv_crop_height);
- break;
- }
- case FOURCC_I444:
- case FOURCC_YV24: {
- const uint8* src_y = sample + src_width * crop_y + crop_x;
- const uint8* src_u;
- const uint8* src_v;
- if (format == FOURCC_YV24) {
- src_v = sample + src_width * (abs_src_height + crop_y) + crop_x;
- src_u = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
- } else {
- src_u = sample + src_width * (abs_src_height + crop_y) + crop_x;
- src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
- }
- r = I444ToARGB(src_y, src_width,
- src_u, src_width,
- src_v, src_width,
- crop_argb, argb_stride,
- crop_width, inv_crop_height);
- break;
- }
- case FOURCC_I411: {
- int quarterwidth = (src_width + 3) / 4;
- const uint8* src_y = sample + src_width * crop_y + crop_x;
- const uint8* src_u = sample + src_width * abs_src_height +
- quarterwidth * crop_y + crop_x / 4;
- const uint8* src_v = sample + src_width * abs_src_height +
- quarterwidth * (abs_src_height + crop_y) + crop_x / 4;
- r = I411ToARGB(src_y, src_width,
- src_u, quarterwidth,
- src_v, quarterwidth,
- crop_argb, argb_stride,
- crop_width, inv_crop_height);
- break;
- }
-#ifdef HAVE_JPEG
- case FOURCC_MJPG:
- r = MJPGToARGB(sample, sample_size,
- crop_argb, argb_stride,
- src_width, abs_src_height, crop_width, inv_crop_height);
- break;
-#endif
- default:
- r = -1; // unknown fourcc - return failure code.
- }
-
- if (need_buf) {
- if (!r) {
- r = ARGBRotate(crop_argb, argb_stride,
- tmp_argb, tmp_argb_stride,
- crop_width, abs_crop_height, rotation);
- }
- free(rotate_buffer);
- }
-
- return r;
-}
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/third_party/aom/third_party/libyuv/source/convert_to_i420.cc b/third_party/aom/third_party/libyuv/source/convert_to_i420.cc
deleted file mode 100644
index 5e75369b5..000000000
--- a/third_party/aom/third_party/libyuv/source/convert_to_i420.cc
+++ /dev/null
@@ -1,339 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <stdlib.h>
-
-#include "libyuv/convert.h"
-
-#include "libyuv/video_common.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// Convert camera sample to I420 with cropping, rotation and vertical flip.
-// src_width is used for source stride computation
-// src_height is used to compute location of planes, and indicate inversion
-// sample_size is measured in bytes and is the size of the frame.
-// With MJPEG it is the compressed size of the frame.
-LIBYUV_API
-int ConvertToI420(const uint8* sample,
- size_t sample_size,
- uint8* y, int y_stride,
- uint8* u, int u_stride,
- uint8* v, int v_stride,
- int crop_x, int crop_y,
- int src_width, int src_height,
- int crop_width, int crop_height,
- enum RotationMode rotation,
- uint32 fourcc) {
- uint32 format = CanonicalFourCC(fourcc);
- int aligned_src_width = (src_width + 1) & ~1;
- const uint8* src;
- const uint8* src_uv;
- int abs_src_height = (src_height < 0) ? -src_height : src_height;
- int inv_crop_height = (crop_height < 0) ? -crop_height : crop_height;
- int r = 0;
- LIBYUV_BOOL need_buf = (rotation && format != FOURCC_I420 &&
- format != FOURCC_NV12 && format != FOURCC_NV21 &&
- format != FOURCC_YU12 && format != FOURCC_YV12) || y == sample;
- uint8* tmp_y = y;
- uint8* tmp_u = u;
- uint8* tmp_v = v;
- int tmp_y_stride = y_stride;
- int tmp_u_stride = u_stride;
- int tmp_v_stride = v_stride;
- uint8* rotate_buffer = NULL;
- int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height;
-
- if (!y || !u || !v || !sample ||
- src_width <= 0 || crop_width <= 0 ||
- src_height == 0 || crop_height == 0) {
- return -1;
- }
- if (src_height < 0) {
- inv_crop_height = -inv_crop_height;
- }
-
- // One pass rotation is available for some formats. For the rest, convert
- // to I420 (with optional vertical flipping) into a temporary I420 buffer,
- // and then rotate the I420 to the final destination buffer.
- // For in-place conversion, if destination y is same as source sample,
- // also enable temporary buffer.
- if (need_buf) {
- int y_size = crop_width * abs_crop_height;
- int uv_size = ((crop_width + 1) / 2) * ((abs_crop_height + 1) / 2);
- rotate_buffer = (uint8*)malloc(y_size + uv_size * 2);
- if (!rotate_buffer) {
- return 1; // Out of memory runtime error.
- }
- y = rotate_buffer;
- u = y + y_size;
- v = u + uv_size;
- y_stride = crop_width;
- u_stride = v_stride = ((crop_width + 1) / 2);
- }
-
- switch (format) {
- // Single plane formats
- case FOURCC_YUY2:
- src = sample + (aligned_src_width * crop_y + crop_x) * 2;
- r = YUY2ToI420(src, aligned_src_width * 2,
- y, y_stride,
- u, u_stride,
- v, v_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_UYVY:
- src = sample + (aligned_src_width * crop_y + crop_x) * 2;
- r = UYVYToI420(src, aligned_src_width * 2,
- y, y_stride,
- u, u_stride,
- v, v_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_RGBP:
- src = sample + (src_width * crop_y + crop_x) * 2;
- r = RGB565ToI420(src, src_width * 2,
- y, y_stride,
- u, u_stride,
- v, v_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_RGBO:
- src = sample + (src_width * crop_y + crop_x) * 2;
- r = ARGB1555ToI420(src, src_width * 2,
- y, y_stride,
- u, u_stride,
- v, v_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_R444:
- src = sample + (src_width * crop_y + crop_x) * 2;
- r = ARGB4444ToI420(src, src_width * 2,
- y, y_stride,
- u, u_stride,
- v, v_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_24BG:
- src = sample + (src_width * crop_y + crop_x) * 3;
- r = RGB24ToI420(src, src_width * 3,
- y, y_stride,
- u, u_stride,
- v, v_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_RAW:
- src = sample + (src_width * crop_y + crop_x) * 3;
- r = RAWToI420(src, src_width * 3,
- y, y_stride,
- u, u_stride,
- v, v_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_ARGB:
- src = sample + (src_width * crop_y + crop_x) * 4;
- r = ARGBToI420(src, src_width * 4,
- y, y_stride,
- u, u_stride,
- v, v_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_BGRA:
- src = sample + (src_width * crop_y + crop_x) * 4;
- r = BGRAToI420(src, src_width * 4,
- y, y_stride,
- u, u_stride,
- v, v_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_ABGR:
- src = sample + (src_width * crop_y + crop_x) * 4;
- r = ABGRToI420(src, src_width * 4,
- y, y_stride,
- u, u_stride,
- v, v_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_RGBA:
- src = sample + (src_width * crop_y + crop_x) * 4;
- r = RGBAToI420(src, src_width * 4,
- y, y_stride,
- u, u_stride,
- v, v_stride,
- crop_width, inv_crop_height);
- break;
- case FOURCC_I400:
- src = sample + src_width * crop_y + crop_x;
- r = I400ToI420(src, src_width,
- y, y_stride,
- u, u_stride,
- v, v_stride,
- crop_width, inv_crop_height);
- break;
- // Biplanar formats
- case FOURCC_NV12:
- src = sample + (src_width * crop_y + crop_x);
- src_uv = sample + (src_width * src_height) +
- ((crop_y / 2) * aligned_src_width) + ((crop_x / 2) * 2);
- r = NV12ToI420Rotate(src, src_width,
- src_uv, aligned_src_width,
- y, y_stride,
- u, u_stride,
- v, v_stride,
- crop_width, inv_crop_height, rotation);
- break;
- case FOURCC_NV21:
- src = sample + (src_width * crop_y + crop_x);
- src_uv = sample + (src_width * src_height) +
- ((crop_y / 2) * aligned_src_width) + ((crop_x / 2) * 2);
- // Call NV12 but with u and v parameters swapped.
- r = NV12ToI420Rotate(src, src_width,
- src_uv, aligned_src_width,
- y, y_stride,
- v, v_stride,
- u, u_stride,
- crop_width, inv_crop_height, rotation);
- break;
- case FOURCC_M420:
- src = sample + (src_width * crop_y) * 12 / 8 + crop_x;
- r = M420ToI420(src, src_width,
- y, y_stride,
- u, u_stride,
- v, v_stride,
- crop_width, inv_crop_height);
- break;
- // Triplanar formats
- case FOURCC_I420:
- case FOURCC_YU12:
- case FOURCC_YV12: {
- const uint8* src_y = sample + (src_width * crop_y + crop_x);
- const uint8* src_u;
- const uint8* src_v;
- int halfwidth = (src_width + 1) / 2;
- int halfheight = (abs_src_height + 1) / 2;
- if (format == FOURCC_YV12) {
- src_v = sample + src_width * abs_src_height +
- (halfwidth * crop_y + crop_x) / 2;
- src_u = sample + src_width * abs_src_height +
- halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
- } else {
- src_u = sample + src_width * abs_src_height +
- (halfwidth * crop_y + crop_x) / 2;
- src_v = sample + src_width * abs_src_height +
- halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
- }
- r = I420Rotate(src_y, src_width,
- src_u, halfwidth,
- src_v, halfwidth,
- y, y_stride,
- u, u_stride,
- v, v_stride,
- crop_width, inv_crop_height, rotation);
- break;
- }
- case FOURCC_I422:
- case FOURCC_YV16: {
- const uint8* src_y = sample + src_width * crop_y + crop_x;
- const uint8* src_u;
- const uint8* src_v;
- int halfwidth = (src_width + 1) / 2;
- if (format == FOURCC_YV16) {
- src_v = sample + src_width * abs_src_height +
- halfwidth * crop_y + crop_x / 2;
- src_u = sample + src_width * abs_src_height +
- halfwidth * (abs_src_height + crop_y) + crop_x / 2;
- } else {
- src_u = sample + src_width * abs_src_height +
- halfwidth * crop_y + crop_x / 2;
- src_v = sample + src_width * abs_src_height +
- halfwidth * (abs_src_height + crop_y) + crop_x / 2;
- }
- r = I422ToI420(src_y, src_width,
- src_u, halfwidth,
- src_v, halfwidth,
- y, y_stride,
- u, u_stride,
- v, v_stride,
- crop_width, inv_crop_height);
- break;
- }
- case FOURCC_I444:
- case FOURCC_YV24: {
- const uint8* src_y = sample + src_width * crop_y + crop_x;
- const uint8* src_u;
- const uint8* src_v;
- if (format == FOURCC_YV24) {
- src_v = sample + src_width * (abs_src_height + crop_y) + crop_x;
- src_u = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
- } else {
- src_u = sample + src_width * (abs_src_height + crop_y) + crop_x;
- src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
- }
- r = I444ToI420(src_y, src_width,
- src_u, src_width,
- src_v, src_width,
- y, y_stride,
- u, u_stride,
- v, v_stride,
- crop_width, inv_crop_height);
- break;
- }
- case FOURCC_I411: {
- int quarterwidth = (src_width + 3) / 4;
- const uint8* src_y = sample + src_width * crop_y + crop_x;
- const uint8* src_u = sample + src_width * abs_src_height +
- quarterwidth * crop_y + crop_x / 4;
- const uint8* src_v = sample + src_width * abs_src_height +
- quarterwidth * (abs_src_height + crop_y) + crop_x / 4;
- r = I411ToI420(src_y, src_width,
- src_u, quarterwidth,
- src_v, quarterwidth,
- y, y_stride,
- u, u_stride,
- v, v_stride,
- crop_width, inv_crop_height);
- break;
- }
-#ifdef HAVE_JPEG
- case FOURCC_MJPG:
- r = MJPGToI420(sample, sample_size,
- y, y_stride,
- u, u_stride,
- v, v_stride,
- src_width, abs_src_height, crop_width, inv_crop_height);
- break;
-#endif
- default:
- r = -1; // unknown fourcc - return failure code.
- }
-
- if (need_buf) {
- if (!r) {
- r = I420Rotate(y, y_stride,
- u, u_stride,
- v, v_stride,
- tmp_y, tmp_y_stride,
- tmp_u, tmp_u_stride,
- tmp_v, tmp_v_stride,
- crop_width, abs_crop_height, rotation);
- }
- free(rotate_buffer);
- }
-
- return r;
-}
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/third_party/aom/third_party/libyuv/source/cpu_id.cc b/third_party/aom/third_party/libyuv/source/cpu_id.cc
deleted file mode 100644
index 72f686e3b..000000000
--- a/third_party/aom/third_party/libyuv/source/cpu_id.cc
+++ /dev/null
@@ -1,307 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/cpu_id.h"
-
-#if (defined(_MSC_VER) && !defined(__clang__)) && !defined(__clang__)
-#include <intrin.h> // For __cpuidex()
-#endif
-#if !defined(__pnacl__) && !defined(__CLR_VER) && \
- !defined(__native_client__) && (defined(_M_IX86) || defined(_M_X64)) && \
- defined(_MSC_VER) && !defined(__clang__) && (_MSC_FULL_VER >= 160040219)
-#include <immintrin.h> // For _xgetbv()
-#endif
-
-#if !defined(__native_client__)
-#include <stdlib.h> // For getenv()
-#endif
-
-// For ArmCpuCaps() but unittested on all platforms
-#include <stdio.h>
-#include <string.h>
-
-#include "libyuv/basic_types.h" // For CPU_X86
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// For functions that use the stack and have runtime checks for overflow,
-// use SAFEBUFFERS to avoid additional check.
-#if (defined(_MSC_VER) && !defined(__clang__)) && (_MSC_FULL_VER >= 160040219)
-#define SAFEBUFFERS __declspec(safebuffers)
-#else
-#define SAFEBUFFERS
-#endif
-
-// Low level cpuid for X86.
-#if (defined(_M_IX86) || defined(_M_X64) || \
- defined(__i386__) || defined(__x86_64__)) && \
- !defined(__pnacl__) && !defined(__CLR_VER)
-LIBYUV_API
-void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) {
-#if (defined(_MSC_VER) && !defined(__clang__)) && !defined(__clang__)
-// Visual C version uses intrinsic or inline x86 assembly.
-#if (_MSC_FULL_VER >= 160040219)
- __cpuidex((int*)(cpu_info), info_eax, info_ecx);
-#elif defined(_M_IX86)
- __asm {
- mov eax, info_eax
- mov ecx, info_ecx
- mov edi, cpu_info
- cpuid
- mov [edi], eax
- mov [edi + 4], ebx
- mov [edi + 8], ecx
- mov [edi + 12], edx
- }
-#else
- if (info_ecx == 0) {
- __cpuid((int*)(cpu_info), info_eax);
- } else {
- cpu_info[3] = cpu_info[2] = cpu_info[1] = cpu_info[0] = 0;
- }
-#endif
-// GCC version uses inline x86 assembly.
-#else // (defined(_MSC_VER) && !defined(__clang__)) && !defined(__clang__)
- uint32 info_ebx, info_edx;
- asm volatile ( // NOLINT
-#if defined( __i386__) && defined(__PIC__)
- // Preserve ebx for fpic 32 bit.
- "mov %%ebx, %%edi \n"
- "cpuid \n"
- "xchg %%edi, %%ebx \n"
- : "=D" (info_ebx),
-#else
- "cpuid \n"
- : "=b" (info_ebx),
-#endif // defined( __i386__) && defined(__PIC__)
- "+a" (info_eax), "+c" (info_ecx), "=d" (info_edx));
- cpu_info[0] = info_eax;
- cpu_info[1] = info_ebx;
- cpu_info[2] = info_ecx;
- cpu_info[3] = info_edx;
-#endif // (defined(_MSC_VER) && !defined(__clang__)) && !defined(__clang__)
-}
-#else // (defined(_M_IX86) || defined(_M_X64) ...
-LIBYUV_API
-void CpuId(uint32 eax, uint32 ecx, uint32* cpu_info) {
- cpu_info[0] = cpu_info[1] = cpu_info[2] = cpu_info[3] = 0;
-}
-#endif
-
-// TODO(fbarchard): Enable xgetbv when validator supports it.
-#if (defined(_M_IX86) || defined(_M_X64) || \
- defined(__i386__) || defined(__x86_64__)) && \
- !defined(__pnacl__) && !defined(__CLR_VER) && !defined(__native_client__)
-#define HAS_XGETBV
-// X86 CPUs have xgetbv to detect OS saves high parts of ymm registers.
-int TestOsSaveYmm() {
- uint32 xcr0 = 0u;
-#if (defined(_MSC_VER) && !defined(__clang__)) && (_MSC_FULL_VER >= 160040219)
- xcr0 = (uint32)(_xgetbv(0)); // VS2010 SP1 required.
-#elif defined(_M_IX86) && defined(_MSC_VER) && !defined(__clang__)
- __asm {
- xor ecx, ecx // xcr 0
- _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0 // For VS2010 and earlier.
- mov xcr0, eax
- }
-#elif defined(__i386__) || defined(__x86_64__)
- asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcr0) : "c" (0) : "%edx");
-#endif // defined(__i386__) || defined(__x86_64__)
- return((xcr0 & 6) == 6); // Is ymm saved?
-}
-#endif // defined(_M_IX86) || defined(_M_X64) ..
-
-// based on libaom arm_cpudetect.c
-// For Arm, but public to allow testing on any CPU
-LIBYUV_API SAFEBUFFERS
-int ArmCpuCaps(const char* cpuinfo_name) {
- char cpuinfo_line[512];
- FILE* f = fopen(cpuinfo_name, "r");
- if (!f) {
- // Assume Neon if /proc/cpuinfo is unavailable.
- // This will occur for Chrome sandbox for Pepper or Render process.
- return kCpuHasNEON;
- }
- while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f)) {
- if (memcmp(cpuinfo_line, "Features", 8) == 0) {
- char* p = strstr(cpuinfo_line, " neon");
- if (p && (p[5] == ' ' || p[5] == '\n')) {
- fclose(f);
- return kCpuHasNEON;
- }
- // aarch64 uses asimd for Neon.
- p = strstr(cpuinfo_line, " asimd");
- if (p && (p[6] == ' ' || p[6] == '\n')) {
- fclose(f);
- return kCpuHasNEON;
- }
- }
- }
- fclose(f);
- return 0;
-}
-
-#if defined(__mips__) && defined(__linux__)
-static int MipsCpuCaps(const char* search_string) {
- char cpuinfo_line[512];
- const char* file_name = "/proc/cpuinfo";
- FILE* f = fopen(file_name, "r");
- if (!f) {
- // Assume DSP if /proc/cpuinfo is unavailable.
- // This will occur for Chrome sandbox for Pepper or Render process.
- return kCpuHasMIPS_DSP;
- }
- while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f) != NULL) {
- if (strstr(cpuinfo_line, search_string) != NULL) {
- fclose(f);
- return kCpuHasMIPS_DSP;
- }
- }
- fclose(f);
- return 0;
-}
-#endif
-
-// CPU detect function for SIMD instruction sets.
-LIBYUV_API
-int cpu_info_ = kCpuInit; // cpu_info is not initialized yet.
-
-// Test environment variable for disabling CPU features. Any non-zero value
-// to disable. Zero ignored to make it easy to set the variable on/off.
-#if !defined(__native_client__) && !defined(_M_ARM)
-
-static LIBYUV_BOOL TestEnv(const char* name) {
- const char* var = getenv(name);
- if (var) {
- if (var[0] != '0') {
- return LIBYUV_TRUE;
- }
- }
- return LIBYUV_FALSE;
-}
-#else // nacl does not support getenv().
-static LIBYUV_BOOL TestEnv(const char*) {
- return LIBYUV_FALSE;
-}
-#endif
-
-LIBYUV_API SAFEBUFFERS
-int InitCpuFlags(void) {
-#if !defined(__pnacl__) && !defined(__CLR_VER) && defined(CPU_X86)
-
- uint32 cpu_info0[4] = { 0, 0, 0, 0 };
- uint32 cpu_info1[4] = { 0, 0, 0, 0 };
- uint32 cpu_info7[4] = { 0, 0, 0, 0 };
- CpuId(0, 0, cpu_info0);
- CpuId(1, 0, cpu_info1);
- if (cpu_info0[0] >= 7) {
- CpuId(7, 0, cpu_info7);
- }
- cpu_info_ = ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) |
- ((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) |
- ((cpu_info1[2] & 0x00080000) ? kCpuHasSSE41 : 0) |
- ((cpu_info1[2] & 0x00100000) ? kCpuHasSSE42 : 0) |
- ((cpu_info7[1] & 0x00000200) ? kCpuHasERMS : 0) |
- ((cpu_info1[2] & 0x00001000) ? kCpuHasFMA3 : 0) |
- kCpuHasX86;
-
-#ifdef HAS_XGETBV
- if ((cpu_info1[2] & 0x18000000) == 0x18000000 && // AVX and OSSave
- TestOsSaveYmm()) { // Saves YMM.
- cpu_info_ |= ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) |
- kCpuHasAVX;
- }
-#endif
- // Environment variable overrides for testing.
- if (TestEnv("LIBYUV_DISABLE_X86")) {
- cpu_info_ &= ~kCpuHasX86;
- }
- if (TestEnv("LIBYUV_DISABLE_SSE2")) {
- cpu_info_ &= ~kCpuHasSSE2;
- }
- if (TestEnv("LIBYUV_DISABLE_SSSE3")) {
- cpu_info_ &= ~kCpuHasSSSE3;
- }
- if (TestEnv("LIBYUV_DISABLE_SSE41")) {
- cpu_info_ &= ~kCpuHasSSE41;
- }
- if (TestEnv("LIBYUV_DISABLE_SSE42")) {
- cpu_info_ &= ~kCpuHasSSE42;
- }
- if (TestEnv("LIBYUV_DISABLE_AVX")) {
- cpu_info_ &= ~kCpuHasAVX;
- }
- if (TestEnv("LIBYUV_DISABLE_AVX2")) {
- cpu_info_ &= ~kCpuHasAVX2;
- }
- if (TestEnv("LIBYUV_DISABLE_ERMS")) {
- cpu_info_ &= ~kCpuHasERMS;
- }
- if (TestEnv("LIBYUV_DISABLE_FMA3")) {
- cpu_info_ &= ~kCpuHasFMA3;
- }
-#endif
-#if defined(__mips__) && defined(__linux__)
- // Linux mips parse text file for dsp detect.
- cpu_info_ = MipsCpuCaps("dsp"); // set kCpuHasMIPS_DSP.
-#if defined(__mips_dspr2)
- cpu_info_ |= kCpuHasMIPS_DSPR2;
-#endif
- cpu_info_ |= kCpuHasMIPS;
-
- if (getenv("LIBYUV_DISABLE_MIPS")) {
- cpu_info_ &= ~kCpuHasMIPS;
- }
- if (getenv("LIBYUV_DISABLE_MIPS_DSP")) {
- cpu_info_ &= ~kCpuHasMIPS_DSP;
- }
- if (getenv("LIBYUV_DISABLE_MIPS_DSPR2")) {
- cpu_info_ &= ~kCpuHasMIPS_DSPR2;
- }
-#endif
-#if defined(__arm__) || defined(__aarch64__)
-// gcc -mfpu=neon defines __ARM_NEON__
-// __ARM_NEON__ generates code that requires Neon. NaCL also requires Neon.
-// For Linux, /proc/cpuinfo can be tested but without that assume Neon.
-#if defined(__ARM_NEON__) || defined(__native_client__) || !defined(__linux__)
- cpu_info_ = kCpuHasNEON;
-// For aarch64(arm64), /proc/cpuinfo's feature is not complete, e.g. no neon
-// flag in it.
-// So for aarch64, neon enabling is hard coded here.
-#endif
-#if defined(__aarch64__)
- cpu_info_ = kCpuHasNEON;
-#else
- // Linux arm parse text file for neon detect.
- cpu_info_ = ArmCpuCaps("/proc/cpuinfo");
-#endif
- cpu_info_ |= kCpuHasARM;
- if (TestEnv("LIBYUV_DISABLE_NEON")) {
- cpu_info_ &= ~kCpuHasNEON;
- }
-#endif // __arm__
- if (TestEnv("LIBYUV_DISABLE_ASM")) {
- cpu_info_ = 0;
- }
- return cpu_info_;
-}
-
-LIBYUV_API
-void MaskCpuFlags(int enable_flags) {
- cpu_info_ = InitCpuFlags() & enable_flags;
-}
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/third_party/aom/third_party/libyuv/source/mjpeg_decoder.cc b/third_party/aom/third_party/libyuv/source/mjpeg_decoder.cc
deleted file mode 100644
index 75f8a610e..000000000
--- a/third_party/aom/third_party/libyuv/source/mjpeg_decoder.cc
+++ /dev/null
@@ -1,572 +0,0 @@
-/*
- * Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/mjpeg_decoder.h"
-
-#ifdef HAVE_JPEG
-#include <assert.h>
-
-#if !defined(__pnacl__) && !defined(__CLR_VER) && \
- !defined(COVERAGE_ENABLED) && !defined(TARGET_IPHONE_SIMULATOR)
-// Must be included before jpeglib.
-#include <setjmp.h>
-#define HAVE_SETJMP
-
-#if defined(_MSC_VER)
-// disable warning 4324: structure was padded due to __declspec(align())
-#pragma warning(disable:4324)
-#endif
-
-#endif
-struct FILE; // For jpeglib.h.
-
-// C++ build requires extern C for jpeg internals.
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <jpeglib.h>
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#include "libyuv/planar_functions.h" // For CopyPlane().
-
-namespace libyuv {
-
-#ifdef HAVE_SETJMP
-struct SetJmpErrorMgr {
- jpeg_error_mgr base; // Must be at the top
- jmp_buf setjmp_buffer;
-};
-#endif
-
-const int MJpegDecoder::kColorSpaceUnknown = JCS_UNKNOWN;
-const int MJpegDecoder::kColorSpaceGrayscale = JCS_GRAYSCALE;
-const int MJpegDecoder::kColorSpaceRgb = JCS_RGB;
-const int MJpegDecoder::kColorSpaceYCbCr = JCS_YCbCr;
-const int MJpegDecoder::kColorSpaceCMYK = JCS_CMYK;
-const int MJpegDecoder::kColorSpaceYCCK = JCS_YCCK;
-
-// Methods that are passed to jpeglib.
-boolean fill_input_buffer(jpeg_decompress_struct* cinfo);
-void init_source(jpeg_decompress_struct* cinfo);
-void skip_input_data(jpeg_decompress_struct* cinfo,
- long num_bytes); // NOLINT
-void term_source(jpeg_decompress_struct* cinfo);
-void ErrorHandler(jpeg_common_struct* cinfo);
-
-MJpegDecoder::MJpegDecoder()
- : has_scanline_padding_(LIBYUV_FALSE),
- num_outbufs_(0),
- scanlines_(NULL),
- scanlines_sizes_(NULL),
- databuf_(NULL),
- databuf_strides_(NULL) {
- decompress_struct_ = new jpeg_decompress_struct;
- source_mgr_ = new jpeg_source_mgr;
-#ifdef HAVE_SETJMP
- error_mgr_ = new SetJmpErrorMgr;
- decompress_struct_->err = jpeg_std_error(&error_mgr_->base);
- // Override standard exit()-based error handler.
- error_mgr_->base.error_exit = &ErrorHandler;
-#endif
- decompress_struct_->client_data = NULL;
- source_mgr_->init_source = &init_source;
- source_mgr_->fill_input_buffer = &fill_input_buffer;
- source_mgr_->skip_input_data = &skip_input_data;
- source_mgr_->resync_to_restart = &jpeg_resync_to_restart;
- source_mgr_->term_source = &term_source;
- jpeg_create_decompress(decompress_struct_);
- decompress_struct_->src = source_mgr_;
- buf_vec_.buffers = &buf_;
- buf_vec_.len = 1;
-}
-
-MJpegDecoder::~MJpegDecoder() {
- jpeg_destroy_decompress(decompress_struct_);
- delete decompress_struct_;
- delete source_mgr_;
-#ifdef HAVE_SETJMP
- delete error_mgr_;
-#endif
- DestroyOutputBuffers();
-}
-
-LIBYUV_BOOL MJpegDecoder::LoadFrame(const uint8* src, size_t src_len) {
- if (!ValidateJpeg(src, src_len)) {
- return LIBYUV_FALSE;
- }
-
- buf_.data = src;
- buf_.len = static_cast<int>(src_len);
- buf_vec_.pos = 0;
- decompress_struct_->client_data = &buf_vec_;
-#ifdef HAVE_SETJMP
- if (setjmp(error_mgr_->setjmp_buffer)) {
- // We called jpeg_read_header, it experienced an error, and we called
- // longjmp() and rewound the stack to here. Return error.
- return LIBYUV_FALSE;
- }
-#endif
- if (jpeg_read_header(decompress_struct_, TRUE) != JPEG_HEADER_OK) {
- // ERROR: Bad MJPEG header
- return LIBYUV_FALSE;
- }
- AllocOutputBuffers(GetNumComponents());
- for (int i = 0; i < num_outbufs_; ++i) {
- int scanlines_size = GetComponentScanlinesPerImcuRow(i);
- if (scanlines_sizes_[i] != scanlines_size) {
- if (scanlines_[i]) {
- delete scanlines_[i];
- }
- scanlines_[i] = new uint8* [scanlines_size];
- scanlines_sizes_[i] = scanlines_size;
- }
-
- // We allocate padding for the final scanline to pad it up to DCTSIZE bytes
- // to avoid memory errors, since jpeglib only reads full MCUs blocks. For
- // the preceding scanlines, the padding is not needed/wanted because the
- // following addresses will already be valid (they are the initial bytes of
- // the next scanline) and will be overwritten when jpeglib writes out that
- // next scanline.
- int databuf_stride = GetComponentStride(i);
- int databuf_size = scanlines_size * databuf_stride;
- if (databuf_strides_[i] != databuf_stride) {
- if (databuf_[i]) {
- delete databuf_[i];
- }
- databuf_[i] = new uint8[databuf_size];
- databuf_strides_[i] = databuf_stride;
- }
-
- if (GetComponentStride(i) != GetComponentWidth(i)) {
- has_scanline_padding_ = LIBYUV_TRUE;
- }
- }
- return LIBYUV_TRUE;
-}
-
-static int DivideAndRoundUp(int numerator, int denominator) {
- return (numerator + denominator - 1) / denominator;
-}
-
-static int DivideAndRoundDown(int numerator, int denominator) {
- return numerator / denominator;
-}
-
-// Returns width of the last loaded frame.
-int MJpegDecoder::GetWidth() {
- return decompress_struct_->image_width;
-}
-
-// Returns height of the last loaded frame.
-int MJpegDecoder::GetHeight() {
- return decompress_struct_->image_height;
-}
-
-// Returns format of the last loaded frame. The return value is one of the
-// kColorSpace* constants.
-int MJpegDecoder::GetColorSpace() {
- return decompress_struct_->jpeg_color_space;
-}
-
-// Number of color components in the color space.
-int MJpegDecoder::GetNumComponents() {
- return decompress_struct_->num_components;
-}
-
-// Sample factors of the n-th component.
-int MJpegDecoder::GetHorizSampFactor(int component) {
- return decompress_struct_->comp_info[component].h_samp_factor;
-}
-
-int MJpegDecoder::GetVertSampFactor(int component) {
- return decompress_struct_->comp_info[component].v_samp_factor;
-}
-
-int MJpegDecoder::GetHorizSubSampFactor(int component) {
- return decompress_struct_->max_h_samp_factor /
- GetHorizSampFactor(component);
-}
-
-int MJpegDecoder::GetVertSubSampFactor(int component) {
- return decompress_struct_->max_v_samp_factor /
- GetVertSampFactor(component);
-}
-
-int MJpegDecoder::GetImageScanlinesPerImcuRow() {
- return decompress_struct_->max_v_samp_factor * DCTSIZE;
-}
-
-int MJpegDecoder::GetComponentScanlinesPerImcuRow(int component) {
- int vs = GetVertSubSampFactor(component);
- return DivideAndRoundUp(GetImageScanlinesPerImcuRow(), vs);
-}
-
-int MJpegDecoder::GetComponentWidth(int component) {
- int hs = GetHorizSubSampFactor(component);
- return DivideAndRoundUp(GetWidth(), hs);
-}
-
-int MJpegDecoder::GetComponentHeight(int component) {
- int vs = GetVertSubSampFactor(component);
- return DivideAndRoundUp(GetHeight(), vs);
-}
-
-// Get width in bytes padded out to a multiple of DCTSIZE
-int MJpegDecoder::GetComponentStride(int component) {
- return (GetComponentWidth(component) + DCTSIZE - 1) & ~(DCTSIZE - 1);
-}
-
-int MJpegDecoder::GetComponentSize(int component) {
- return GetComponentWidth(component) * GetComponentHeight(component);
-}
-
-LIBYUV_BOOL MJpegDecoder::UnloadFrame() {
-#ifdef HAVE_SETJMP
- if (setjmp(error_mgr_->setjmp_buffer)) {
- // We called jpeg_abort_decompress, it experienced an error, and we called
- // longjmp() and rewound the stack to here. Return error.
- return LIBYUV_FALSE;
- }
-#endif
- jpeg_abort_decompress(decompress_struct_);
- return LIBYUV_TRUE;
-}
-
-// TODO(fbarchard): Allow rectangle to be specified: x, y, width, height.
-LIBYUV_BOOL MJpegDecoder::DecodeToBuffers(
- uint8** planes, int dst_width, int dst_height) {
- if (dst_width != GetWidth() ||
- dst_height > GetHeight()) {
- // ERROR: Bad dimensions
- return LIBYUV_FALSE;
- }
-#ifdef HAVE_SETJMP
- if (setjmp(error_mgr_->setjmp_buffer)) {
- // We called into jpeglib, it experienced an error sometime during this
- // function call, and we called longjmp() and rewound the stack to here.
- // Return error.
- return LIBYUV_FALSE;
- }
-#endif
- if (!StartDecode()) {
- return LIBYUV_FALSE;
- }
- SetScanlinePointers(databuf_);
- int lines_left = dst_height;
- // Compute amount of lines to skip to implement vertical crop.
- // TODO(fbarchard): Ensure skip is a multiple of maximum component
- // subsample. ie 2
- int skip = (GetHeight() - dst_height) / 2;
- if (skip > 0) {
- // There is no API to skip lines in the output data, so we read them
- // into the temp buffer.
- while (skip >= GetImageScanlinesPerImcuRow()) {
- if (!DecodeImcuRow()) {
- FinishDecode();
- return LIBYUV_FALSE;
- }
- skip -= GetImageScanlinesPerImcuRow();
- }
- if (skip > 0) {
- // Have a partial iMCU row left over to skip. Must read it and then
- // copy the parts we want into the destination.
- if (!DecodeImcuRow()) {
- FinishDecode();
- return LIBYUV_FALSE;
- }
- for (int i = 0; i < num_outbufs_; ++i) {
- // TODO(fbarchard): Compute skip to avoid this
- assert(skip % GetVertSubSampFactor(i) == 0);
- int rows_to_skip =
- DivideAndRoundDown(skip, GetVertSubSampFactor(i));
- int scanlines_to_copy = GetComponentScanlinesPerImcuRow(i) -
- rows_to_skip;
- int data_to_skip = rows_to_skip * GetComponentStride(i);
- CopyPlane(databuf_[i] + data_to_skip, GetComponentStride(i),
- planes[i], GetComponentWidth(i),
- GetComponentWidth(i), scanlines_to_copy);
- planes[i] += scanlines_to_copy * GetComponentWidth(i);
- }
- lines_left -= (GetImageScanlinesPerImcuRow() - skip);
- }
- }
-
- // Read full MCUs but cropped horizontally
- for (; lines_left > GetImageScanlinesPerImcuRow();
- lines_left -= GetImageScanlinesPerImcuRow()) {
- if (!DecodeImcuRow()) {
- FinishDecode();
- return LIBYUV_FALSE;
- }
- for (int i = 0; i < num_outbufs_; ++i) {
- int scanlines_to_copy = GetComponentScanlinesPerImcuRow(i);
- CopyPlane(databuf_[i], GetComponentStride(i),
- planes[i], GetComponentWidth(i),
- GetComponentWidth(i), scanlines_to_copy);
- planes[i] += scanlines_to_copy * GetComponentWidth(i);
- }
- }
-
- if (lines_left > 0) {
- // Have a partial iMCU row left over to decode.
- if (!DecodeImcuRow()) {
- FinishDecode();
- return LIBYUV_FALSE;
- }
- for (int i = 0; i < num_outbufs_; ++i) {
- int scanlines_to_copy =
- DivideAndRoundUp(lines_left, GetVertSubSampFactor(i));
- CopyPlane(databuf_[i], GetComponentStride(i),
- planes[i], GetComponentWidth(i),
- GetComponentWidth(i), scanlines_to_copy);
- planes[i] += scanlines_to_copy * GetComponentWidth(i);
- }
- }
- return FinishDecode();
-}
-
-LIBYUV_BOOL MJpegDecoder::DecodeToCallback(CallbackFunction fn, void* opaque,
- int dst_width, int dst_height) {
- if (dst_width != GetWidth() ||
- dst_height > GetHeight()) {
- // ERROR: Bad dimensions
- return LIBYUV_FALSE;
- }
-#ifdef HAVE_SETJMP
- if (setjmp(error_mgr_->setjmp_buffer)) {
- // We called into jpeglib, it experienced an error sometime during this
- // function call, and we called longjmp() and rewound the stack to here.
- // Return error.
- return LIBYUV_FALSE;
- }
-#endif
- if (!StartDecode()) {
- return LIBYUV_FALSE;
- }
- SetScanlinePointers(databuf_);
- int lines_left = dst_height;
- // TODO(fbarchard): Compute amount of lines to skip to implement vertical crop
- int skip = (GetHeight() - dst_height) / 2;
- if (skip > 0) {
- while (skip >= GetImageScanlinesPerImcuRow()) {
- if (!DecodeImcuRow()) {
- FinishDecode();
- return LIBYUV_FALSE;
- }
- skip -= GetImageScanlinesPerImcuRow();
- }
- if (skip > 0) {
- // Have a partial iMCU row left over to skip.
- if (!DecodeImcuRow()) {
- FinishDecode();
- return LIBYUV_FALSE;
- }
- for (int i = 0; i < num_outbufs_; ++i) {
- // TODO(fbarchard): Compute skip to avoid this
- assert(skip % GetVertSubSampFactor(i) == 0);
- int rows_to_skip = DivideAndRoundDown(skip, GetVertSubSampFactor(i));
- int data_to_skip = rows_to_skip * GetComponentStride(i);
- // Change our own data buffer pointers so we can pass them to the
- // callback.
- databuf_[i] += data_to_skip;
- }
- int scanlines_to_copy = GetImageScanlinesPerImcuRow() - skip;
- (*fn)(opaque, databuf_, databuf_strides_, scanlines_to_copy);
- // Now change them back.
- for (int i = 0; i < num_outbufs_; ++i) {
- int rows_to_skip = DivideAndRoundDown(skip, GetVertSubSampFactor(i));
- int data_to_skip = rows_to_skip * GetComponentStride(i);
- databuf_[i] -= data_to_skip;
- }
- lines_left -= scanlines_to_copy;
- }
- }
- // Read full MCUs until we get to the crop point.
- for (; lines_left >= GetImageScanlinesPerImcuRow();
- lines_left -= GetImageScanlinesPerImcuRow()) {
- if (!DecodeImcuRow()) {
- FinishDecode();
- return LIBYUV_FALSE;
- }
- (*fn)(opaque, databuf_, databuf_strides_, GetImageScanlinesPerImcuRow());
- }
- if (lines_left > 0) {
- // Have a partial iMCU row left over to decode.
- if (!DecodeImcuRow()) {
- FinishDecode();
- return LIBYUV_FALSE;
- }
- (*fn)(opaque, databuf_, databuf_strides_, lines_left);
- }
- return FinishDecode();
-}
-
-void init_source(j_decompress_ptr cinfo) {
- fill_input_buffer(cinfo);
-}
-
-boolean fill_input_buffer(j_decompress_ptr cinfo) {
- BufferVector* buf_vec = reinterpret_cast<BufferVector*>(cinfo->client_data);
- if (buf_vec->pos >= buf_vec->len) {
- assert(0 && "No more data");
- // ERROR: No more data
- return FALSE;
- }
- cinfo->src->next_input_byte = buf_vec->buffers[buf_vec->pos].data;
- cinfo->src->bytes_in_buffer = buf_vec->buffers[buf_vec->pos].len;
- ++buf_vec->pos;
- return TRUE;
-}
-
-void skip_input_data(j_decompress_ptr cinfo,
- long num_bytes) { // NOLINT
- cinfo->src->next_input_byte += num_bytes;
-}
-
-void term_source(j_decompress_ptr cinfo) {
- // Nothing to do.
-}
-
-#ifdef HAVE_SETJMP
-void ErrorHandler(j_common_ptr cinfo) {
- // This is called when a jpeglib command experiences an error. Unfortunately
- // jpeglib's error handling model is not very flexible, because it expects the
- // error handler to not return--i.e., it wants the program to terminate. To
- // recover from errors we use setjmp() as shown in their example. setjmp() is
- // C's implementation for the "call with current continuation" functionality
- // seen in some functional programming languages.
- // A formatted message can be output, but is unsafe for release.
-#ifdef DEBUG
- char buf[JMSG_LENGTH_MAX];
- (*cinfo->err->format_message)(cinfo, buf);
- // ERROR: Error in jpeglib: buf
-#endif
-
- SetJmpErrorMgr* mgr = reinterpret_cast<SetJmpErrorMgr*>(cinfo->err);
- // This rewinds the call stack to the point of the corresponding setjmp()
- // and causes it to return (for a second time) with value 1.
- longjmp(mgr->setjmp_buffer, 1);
-}
-#endif
-
-void MJpegDecoder::AllocOutputBuffers(int num_outbufs) {
- if (num_outbufs != num_outbufs_) {
- // We could perhaps optimize this case to resize the output buffers without
- // necessarily having to delete and recreate each one, but it's not worth
- // it.
- DestroyOutputBuffers();
-
- scanlines_ = new uint8** [num_outbufs];
- scanlines_sizes_ = new int[num_outbufs];
- databuf_ = new uint8* [num_outbufs];
- databuf_strides_ = new int[num_outbufs];
-
- for (int i = 0; i < num_outbufs; ++i) {
- scanlines_[i] = NULL;
- scanlines_sizes_[i] = 0;
- databuf_[i] = NULL;
- databuf_strides_[i] = 0;
- }
-
- num_outbufs_ = num_outbufs;
- }
-}
-
-void MJpegDecoder::DestroyOutputBuffers() {
- for (int i = 0; i < num_outbufs_; ++i) {
- delete [] scanlines_[i];
- delete [] databuf_[i];
- }
- delete [] scanlines_;
- delete [] databuf_;
- delete [] scanlines_sizes_;
- delete [] databuf_strides_;
- scanlines_ = NULL;
- databuf_ = NULL;
- scanlines_sizes_ = NULL;
- databuf_strides_ = NULL;
- num_outbufs_ = 0;
-}
-
-// JDCT_IFAST and do_block_smoothing improve performance substantially.
-LIBYUV_BOOL MJpegDecoder::StartDecode() {
- decompress_struct_->raw_data_out = TRUE;
- decompress_struct_->dct_method = JDCT_IFAST; // JDCT_ISLOW is default
- decompress_struct_->dither_mode = JDITHER_NONE;
- // Not applicable to 'raw':
- decompress_struct_->do_fancy_upsampling = (boolean)(LIBYUV_FALSE);
- // Only for buffered mode:
- decompress_struct_->enable_2pass_quant = (boolean)(LIBYUV_FALSE);
- // Blocky but fast:
- decompress_struct_->do_block_smoothing = (boolean)(LIBYUV_FALSE);
-
- if (!jpeg_start_decompress(decompress_struct_)) {
- // ERROR: Couldn't start JPEG decompressor";
- return LIBYUV_FALSE;
- }
- return LIBYUV_TRUE;
-}
-
-LIBYUV_BOOL MJpegDecoder::FinishDecode() {
- // jpeglib considers it an error if we finish without decoding the whole
- // image, so we call "abort" rather than "finish".
- jpeg_abort_decompress(decompress_struct_);
- return LIBYUV_TRUE;
-}
-
-void MJpegDecoder::SetScanlinePointers(uint8** data) {
- for (int i = 0; i < num_outbufs_; ++i) {
- uint8* data_i = data[i];
- for (int j = 0; j < scanlines_sizes_[i]; ++j) {
- scanlines_[i][j] = data_i;
- data_i += GetComponentStride(i);
- }
- }
-}
-
-inline LIBYUV_BOOL MJpegDecoder::DecodeImcuRow() {
- return (unsigned int)(GetImageScanlinesPerImcuRow()) ==
- jpeg_read_raw_data(decompress_struct_,
- scanlines_,
- GetImageScanlinesPerImcuRow());
-}
-
-// The helper function which recognizes the jpeg sub-sampling type.
-JpegSubsamplingType MJpegDecoder::JpegSubsamplingTypeHelper(
- int* subsample_x, int* subsample_y, int number_of_components) {
- if (number_of_components == 3) { // Color images.
- if (subsample_x[0] == 1 && subsample_y[0] == 1 &&
- subsample_x[1] == 2 && subsample_y[1] == 2 &&
- subsample_x[2] == 2 && subsample_y[2] == 2) {
- return kJpegYuv420;
- } else if (subsample_x[0] == 1 && subsample_y[0] == 1 &&
- subsample_x[1] == 2 && subsample_y[1] == 1 &&
- subsample_x[2] == 2 && subsample_y[2] == 1) {
- return kJpegYuv422;
- } else if (subsample_x[0] == 1 && subsample_y[0] == 1 &&
- subsample_x[1] == 1 && subsample_y[1] == 1 &&
- subsample_x[2] == 1 && subsample_y[2] == 1) {
- return kJpegYuv444;
- }
- } else if (number_of_components == 1) { // Grey-scale images.
- if (subsample_x[0] == 1 && subsample_y[0] == 1) {
- return kJpegYuv400;
- }
- }
- return kJpegUnknown;
-}
-
-} // namespace libyuv
-#endif // HAVE_JPEG
-
diff --git a/third_party/aom/third_party/libyuv/source/mjpeg_validate.cc b/third_party/aom/third_party/libyuv/source/mjpeg_validate.cc
deleted file mode 100644
index 8edfbe1e7..000000000
--- a/third_party/aom/third_party/libyuv/source/mjpeg_validate.cc
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/mjpeg_decoder.h"
-
-#include <string.h> // For memchr.
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// Enable this to try scasb implementation.
-// #define ENABLE_SCASB 1
-
-#ifdef ENABLE_SCASB
-
-// Multiple of 1.
-__declspec(naked)
-const uint8* ScanRow_ERMS(const uint8* src, uint32 val, int count) {
- __asm {
- mov edx, edi
- mov edi, [esp + 4] // src
- mov eax, [esp + 8] // val
- mov ecx, [esp + 12] // count
- repne scasb
- jne sr99
- mov eax, edi
- sub eax, 1
- mov edi, edx
- ret
-
- sr99:
- mov eax, 0
- mov edi, edx
- ret
- }
-}
-#endif
-
-// Helper function to scan for EOI marker.
-static LIBYUV_BOOL ScanEOI(const uint8* sample, size_t sample_size) {
- const uint8* end = sample + sample_size - 1;
- const uint8* it = sample;
- for (;;) {
-#ifdef ENABLE_SCASB
- it = ScanRow_ERMS(it, 0xff, end - it);
-#else
- it = static_cast<const uint8*>(memchr(it, 0xff, end - it));
-#endif
- if (it == NULL) {
- break;
- }
- if (it[1] == 0xd9) {
- return LIBYUV_TRUE; // Success: Valid jpeg.
- }
- ++it; // Skip over current 0xff.
- }
- // ERROR: Invalid jpeg end code not found. Size sample_size
- return LIBYUV_FALSE;
-}
-
-// Helper function to validate the jpeg appears intact.
-LIBYUV_BOOL ValidateJpeg(const uint8* sample, size_t sample_size) {
- const size_t kBackSearchSize = 1024;
- if (sample_size < 64) {
- // ERROR: Invalid jpeg size: sample_size
- return LIBYUV_FALSE;
- }
- if (sample[0] != 0xff || sample[1] != 0xd8) { // Start Of Image
- // ERROR: Invalid jpeg initial start code
- return LIBYUV_FALSE;
- }
- // Step over SOI marker.
- sample += 2;
- sample_size -= 2;
-
- // Look for the End Of Image (EOI) marker in the end kilobyte of the buffer.
- if (sample_size > kBackSearchSize) {
- if (ScanEOI(sample + sample_size - kBackSearchSize, kBackSearchSize)) {
- return LIBYUV_TRUE; // Success: Valid jpeg.
- }
- // Reduce search size for forward search.
- sample_size = sample_size - kBackSearchSize + 1;
- }
- return ScanEOI(sample, sample_size);
-
-}
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
-
diff --git a/third_party/aom/third_party/libyuv/source/planar_functions.cc b/third_party/aom/third_party/libyuv/source/planar_functions.cc
deleted file mode 100644
index b96bd5020..000000000
--- a/third_party/aom/third_party/libyuv/source/planar_functions.cc
+++ /dev/null
@@ -1,2555 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/planar_functions.h"
-
-#include <string.h> // for memset()
-
-#include "libyuv/cpu_id.h"
-#ifdef HAVE_JPEG
-#include "libyuv/mjpeg_decoder.h"
-#endif
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// Copy a plane of data
-LIBYUV_API
-void CopyPlane(const uint8* src_y, int src_stride_y,
- uint8* dst_y, int dst_stride_y,
- int width, int height) {
- int y;
- void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
- // Coalesce rows.
- if (src_stride_y == width &&
- dst_stride_y == width) {
- width *= height;
- height = 1;
- src_stride_y = dst_stride_y = 0;
- }
- // Nothing to do.
- if (src_y == dst_y && src_stride_y == dst_stride_y) {
- return;
- }
-#if defined(HAS_COPYROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
- }
-#endif
-#if defined(HAS_COPYROW_AVX)
- if (TestCpuFlag(kCpuHasAVX)) {
- CopyRow = IS_ALIGNED(width, 64) ? CopyRow_AVX : CopyRow_Any_AVX;
- }
-#endif
-#if defined(HAS_COPYROW_ERMS)
- if (TestCpuFlag(kCpuHasERMS)) {
- CopyRow = CopyRow_ERMS;
- }
-#endif
-#if defined(HAS_COPYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
- }
-#endif
-#if defined(HAS_COPYROW_MIPS)
- if (TestCpuFlag(kCpuHasMIPS)) {
- CopyRow = CopyRow_MIPS;
- }
-#endif
-
- // Copy plane
- for (y = 0; y < height; ++y) {
- CopyRow(src_y, dst_y, width);
- src_y += src_stride_y;
- dst_y += dst_stride_y;
- }
-}
-
-LIBYUV_API
-void CopyPlane_16(const uint16* src_y, int src_stride_y,
- uint16* dst_y, int dst_stride_y,
- int width, int height) {
- int y;
- void (*CopyRow)(const uint16* src, uint16* dst, int width) = CopyRow_16_C;
- // Coalesce rows.
- if (src_stride_y == width &&
- dst_stride_y == width) {
- width *= height;
- height = 1;
- src_stride_y = dst_stride_y = 0;
- }
-#if defined(HAS_COPYROW_16_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32)) {
- CopyRow = CopyRow_16_SSE2;
- }
-#endif
-#if defined(HAS_COPYROW_16_ERMS)
- if (TestCpuFlag(kCpuHasERMS)) {
- CopyRow = CopyRow_16_ERMS;
- }
-#endif
-#if defined(HAS_COPYROW_16_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {
- CopyRow = CopyRow_16_NEON;
- }
-#endif
-#if defined(HAS_COPYROW_16_MIPS)
- if (TestCpuFlag(kCpuHasMIPS)) {
- CopyRow = CopyRow_16_MIPS;
- }
-#endif
-
- // Copy plane
- for (y = 0; y < height; ++y) {
- CopyRow(src_y, dst_y, width);
- src_y += src_stride_y;
- dst_y += dst_stride_y;
- }
-}
-
-// Copy I422.
-LIBYUV_API
-int I422Copy(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- int halfwidth = (width + 1) >> 1;
- if (!src_y || !src_u || !src_v ||
- !dst_y || !dst_u || !dst_v ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_y = src_y + (height - 1) * src_stride_y;
- src_u = src_u + (height - 1) * src_stride_u;
- src_v = src_v + (height - 1) * src_stride_v;
- src_stride_y = -src_stride_y;
- src_stride_u = -src_stride_u;
- src_stride_v = -src_stride_v;
- }
- CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
- CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, height);
- CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, height);
- return 0;
-}
-
-// Copy I444.
-LIBYUV_API
-int I444Copy(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- if (!src_y || !src_u || !src_v ||
- !dst_y || !dst_u || !dst_v ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_y = src_y + (height - 1) * src_stride_y;
- src_u = src_u + (height - 1) * src_stride_u;
- src_v = src_v + (height - 1) * src_stride_v;
- src_stride_y = -src_stride_y;
- src_stride_u = -src_stride_u;
- src_stride_v = -src_stride_v;
- }
-
- CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
- CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
- CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
- return 0;
-}
-
-// Copy I400.
-LIBYUV_API
-int I400ToI400(const uint8* src_y, int src_stride_y,
- uint8* dst_y, int dst_stride_y,
- int width, int height) {
- if (!src_y || !dst_y || width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_y = src_y + (height - 1) * src_stride_y;
- src_stride_y = -src_stride_y;
- }
- CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
- return 0;
-}
-
-// Convert I420 to I400.
-LIBYUV_API
-int I420ToI400(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- int width, int height) {
- if (!src_y || !dst_y || width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_y = src_y + (height - 1) * src_stride_y;
- src_stride_y = -src_stride_y;
- }
- CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
- return 0;
-}
-
-// Mirror a plane of data.
-void MirrorPlane(const uint8* src_y, int src_stride_y,
- uint8* dst_y, int dst_stride_y,
- int width, int height) {
- int y;
- void (*MirrorRow)(const uint8* src, uint8* dst, int width) = MirrorRow_C;
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_y = src_y + (height - 1) * src_stride_y;
- src_stride_y = -src_stride_y;
- }
-#if defined(HAS_MIRRORROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- MirrorRow = MirrorRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- MirrorRow = MirrorRow_NEON;
- }
- }
-#endif
-#if defined(HAS_MIRRORROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- MirrorRow = MirrorRow_Any_SSE2;
- if (IS_ALIGNED(width, 16)) {
- MirrorRow = MirrorRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_MIRRORROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- MirrorRow = MirrorRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- MirrorRow = MirrorRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_MIRRORROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- MirrorRow = MirrorRow_Any_AVX2;
- if (IS_ALIGNED(width, 32)) {
- MirrorRow = MirrorRow_AVX2;
- }
- }
-#endif
-// TODO(fbarchard): Mirror on mips handle unaligned memory.
-#if defined(HAS_MIRRORROW_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
- IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
- IS_ALIGNED(dst_y, 4) && IS_ALIGNED(dst_stride_y, 4)) {
- MirrorRow = MirrorRow_MIPS_DSPR2;
- }
-#endif
-
- // Mirror plane
- for (y = 0; y < height; ++y) {
- MirrorRow(src_y, dst_y, width);
- src_y += src_stride_y;
- dst_y += dst_stride_y;
- }
-}
-
-// Convert YUY2 to I422.
-LIBYUV_API
-int YUY2ToI422(const uint8* src_yuy2, int src_stride_yuy2,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- int y;
- void (*YUY2ToUV422Row)(const uint8* src_yuy2,
- uint8* dst_u, uint8* dst_v, int pix) =
- YUY2ToUV422Row_C;
- void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int pix) =
- YUY2ToYRow_C;
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
- src_stride_yuy2 = -src_stride_yuy2;
- }
- // Coalesce rows.
- if (src_stride_yuy2 == width * 2 &&
- dst_stride_y == width &&
- dst_stride_u * 2 == width &&
- dst_stride_v * 2 == width) {
- width *= height;
- height = 1;
- src_stride_yuy2 = dst_stride_y = dst_stride_u = dst_stride_v = 0;
- }
-#if defined(HAS_YUY2TOYROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2;
- YUY2ToYRow = YUY2ToYRow_Any_SSE2;
- if (IS_ALIGNED(width, 16)) {
- YUY2ToUV422Row = YUY2ToUV422Row_SSE2;
- YUY2ToYRow = YUY2ToYRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_YUY2TOYROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- YUY2ToUV422Row = YUY2ToUV422Row_Any_AVX2;
- YUY2ToYRow = YUY2ToYRow_Any_AVX2;
- if (IS_ALIGNED(width, 32)) {
- YUY2ToUV422Row = YUY2ToUV422Row_AVX2;
- YUY2ToYRow = YUY2ToYRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_YUY2TOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- YUY2ToYRow = YUY2ToYRow_Any_NEON;
- if (width >= 16) {
- YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON;
- }
- if (IS_ALIGNED(width, 16)) {
- YUY2ToYRow = YUY2ToYRow_NEON;
- YUY2ToUV422Row = YUY2ToUV422Row_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- YUY2ToUV422Row(src_yuy2, dst_u, dst_v, width);
- YUY2ToYRow(src_yuy2, dst_y, width);
- src_yuy2 += src_stride_yuy2;
- dst_y += dst_stride_y;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
- }
- return 0;
-}
-
-// Convert UYVY to I422.
-LIBYUV_API
-int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- int y;
- void (*UYVYToUV422Row)(const uint8* src_uyvy,
- uint8* dst_u, uint8* dst_v, int pix) =
- UYVYToUV422Row_C;
- void (*UYVYToYRow)(const uint8* src_uyvy,
- uint8* dst_y, int pix) = UYVYToYRow_C;
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
- src_stride_uyvy = -src_stride_uyvy;
- }
- // Coalesce rows.
- if (src_stride_uyvy == width * 2 &&
- dst_stride_y == width &&
- dst_stride_u * 2 == width &&
- dst_stride_v * 2 == width) {
- width *= height;
- height = 1;
- src_stride_uyvy = dst_stride_y = dst_stride_u = dst_stride_v = 0;
- }
-#if defined(HAS_UYVYTOYROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- UYVYToUV422Row = UYVYToUV422Row_Any_SSE2;
- UYVYToYRow = UYVYToYRow_Any_SSE2;
- if (IS_ALIGNED(width, 16)) {
- UYVYToUV422Row = UYVYToUV422Row_SSE2;
- UYVYToYRow = UYVYToYRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_UYVYTOYROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- UYVYToUV422Row = UYVYToUV422Row_Any_AVX2;
- UYVYToYRow = UYVYToYRow_Any_AVX2;
- if (IS_ALIGNED(width, 32)) {
- UYVYToUV422Row = UYVYToUV422Row_AVX2;
- UYVYToYRow = UYVYToYRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_UYVYTOYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- UYVYToYRow = UYVYToYRow_Any_NEON;
- if (width >= 16) {
- UYVYToUV422Row = UYVYToUV422Row_Any_NEON;
- }
- if (IS_ALIGNED(width, 16)) {
- UYVYToYRow = UYVYToYRow_NEON;
- UYVYToUV422Row = UYVYToUV422Row_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- UYVYToUV422Row(src_uyvy, dst_u, dst_v, width);
- UYVYToYRow(src_uyvy, dst_y, width);
- src_uyvy += src_stride_uyvy;
- dst_y += dst_stride_y;
- dst_u += dst_stride_u;
- dst_v += dst_stride_v;
- }
- return 0;
-}
-
-// Mirror I400 with optional flipping
-LIBYUV_API
-int I400Mirror(const uint8* src_y, int src_stride_y,
- uint8* dst_y, int dst_stride_y,
- int width, int height) {
- if (!src_y || !dst_y ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_y = src_y + (height - 1) * src_stride_y;
- src_stride_y = -src_stride_y;
- }
-
- MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
- return 0;
-}
-
-// Mirror I420 with optional flipping
-LIBYUV_API
-int I420Mirror(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height) {
- int halfwidth = (width + 1) >> 1;
- int halfheight = (height + 1) >> 1;
- if (!src_y || !src_u || !src_v || !dst_y || !dst_u || !dst_v ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- halfheight = (height + 1) >> 1;
- src_y = src_y + (height - 1) * src_stride_y;
- src_u = src_u + (halfheight - 1) * src_stride_u;
- src_v = src_v + (halfheight - 1) * src_stride_v;
- src_stride_y = -src_stride_y;
- src_stride_u = -src_stride_u;
- src_stride_v = -src_stride_v;
- }
-
- if (dst_y) {
- MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
- }
- MirrorPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight);
- MirrorPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
- return 0;
-}
-
-// ARGB mirror.
-LIBYUV_API
-int ARGBMirror(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- int y;
- void (*ARGBMirrorRow)(const uint8* src, uint8* dst, int width) =
- ARGBMirrorRow_C;
- if (!src_argb || !dst_argb || width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
-#if defined(HAS_ARGBMIRRORROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBMirrorRow = ARGBMirrorRow_Any_NEON;
- if (IS_ALIGNED(width, 4)) {
- ARGBMirrorRow = ARGBMirrorRow_NEON;
- }
- }
-#endif
-#if defined(HAS_ARGBMIRRORROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- ARGBMirrorRow = ARGBMirrorRow_Any_SSE2;
- if (IS_ALIGNED(width, 4)) {
- ARGBMirrorRow = ARGBMirrorRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_ARGBMIRRORROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- ARGBMirrorRow = ARGBMirrorRow_Any_AVX2;
- if (IS_ALIGNED(width, 8)) {
- ARGBMirrorRow = ARGBMirrorRow_AVX2;
- }
- }
-#endif
-
- // Mirror plane
- for (y = 0; y < height; ++y) {
- ARGBMirrorRow(src_argb, dst_argb, width);
- src_argb += src_stride_argb;
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-// Get a blender that optimized for the CPU and pixel count.
-// As there are 6 blenders to choose from, the caller should try to use
-// the same blend function for all pixels if possible.
-LIBYUV_API
-ARGBBlendRow GetARGBBlend() {
- void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width) = ARGBBlendRow_C;
-#if defined(HAS_ARGBBLENDROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- ARGBBlendRow = ARGBBlendRow_SSSE3;
- return ARGBBlendRow;
- }
-#endif
-#if defined(HAS_ARGBBLENDROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- ARGBBlendRow = ARGBBlendRow_SSE2;
- }
-#endif
-#if defined(HAS_ARGBBLENDROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBBlendRow = ARGBBlendRow_NEON;
- }
-#endif
- return ARGBBlendRow;
-}
-
-// Alpha Blend 2 ARGB images and store to destination.
-LIBYUV_API
-int ARGBBlend(const uint8* src_argb0, int src_stride_argb0,
- const uint8* src_argb1, int src_stride_argb1,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- int y;
- void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1,
- uint8* dst_argb, int width) = GetARGBBlend();
- if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_argb = dst_argb + (height - 1) * dst_stride_argb;
- dst_stride_argb = -dst_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_argb0 == width * 4 &&
- src_stride_argb1 == width * 4 &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
- }
-
- for (y = 0; y < height; ++y) {
- ARGBBlendRow(src_argb0, src_argb1, dst_argb, width);
- src_argb0 += src_stride_argb0;
- src_argb1 += src_stride_argb1;
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-// Multiply 2 ARGB images and store to destination.
-LIBYUV_API
-int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0,
- const uint8* src_argb1, int src_stride_argb1,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- int y;
- void (*ARGBMultiplyRow)(const uint8* src0, const uint8* src1, uint8* dst,
- int width) = ARGBMultiplyRow_C;
- if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_argb = dst_argb + (height - 1) * dst_stride_argb;
- dst_stride_argb = -dst_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_argb0 == width * 4 &&
- src_stride_argb1 == width * 4 &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
- }
-#if defined(HAS_ARGBMULTIPLYROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- ARGBMultiplyRow = ARGBMultiplyRow_Any_SSE2;
- if (IS_ALIGNED(width, 4)) {
- ARGBMultiplyRow = ARGBMultiplyRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_ARGBMULTIPLYROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- ARGBMultiplyRow = ARGBMultiplyRow_Any_AVX2;
- if (IS_ALIGNED(width, 8)) {
- ARGBMultiplyRow = ARGBMultiplyRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_ARGBMULTIPLYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBMultiplyRow = ARGBMultiplyRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBMultiplyRow = ARGBMultiplyRow_NEON;
- }
- }
-#endif
-
- // Multiply plane
- for (y = 0; y < height; ++y) {
- ARGBMultiplyRow(src_argb0, src_argb1, dst_argb, width);
- src_argb0 += src_stride_argb0;
- src_argb1 += src_stride_argb1;
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-// Add 2 ARGB images and store to destination.
-LIBYUV_API
-int ARGBAdd(const uint8* src_argb0, int src_stride_argb0,
- const uint8* src_argb1, int src_stride_argb1,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- int y;
- void (*ARGBAddRow)(const uint8* src0, const uint8* src1, uint8* dst,
- int width) = ARGBAddRow_C;
- if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_argb = dst_argb + (height - 1) * dst_stride_argb;
- dst_stride_argb = -dst_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_argb0 == width * 4 &&
- src_stride_argb1 == width * 4 &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
- }
-#if defined(HAS_ARGBADDROW_SSE2) && (defined(_MSC_VER) && !defined(__clang__))
- if (TestCpuFlag(kCpuHasSSE2)) {
- ARGBAddRow = ARGBAddRow_SSE2;
- }
-#endif
-#if defined(HAS_ARGBADDROW_SSE2) && !(defined(_MSC_VER) && !defined(__clang__))
- if (TestCpuFlag(kCpuHasSSE2)) {
- ARGBAddRow = ARGBAddRow_Any_SSE2;
- if (IS_ALIGNED(width, 4)) {
- ARGBAddRow = ARGBAddRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_ARGBADDROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- ARGBAddRow = ARGBAddRow_Any_AVX2;
- if (IS_ALIGNED(width, 8)) {
- ARGBAddRow = ARGBAddRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_ARGBADDROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBAddRow = ARGBAddRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBAddRow = ARGBAddRow_NEON;
- }
- }
-#endif
-
- // Add plane
- for (y = 0; y < height; ++y) {
- ARGBAddRow(src_argb0, src_argb1, dst_argb, width);
- src_argb0 += src_stride_argb0;
- src_argb1 += src_stride_argb1;
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-// Subtract 2 ARGB images and store to destination.
-LIBYUV_API
-int ARGBSubtract(const uint8* src_argb0, int src_stride_argb0,
- const uint8* src_argb1, int src_stride_argb1,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- int y;
- void (*ARGBSubtractRow)(const uint8* src0, const uint8* src1, uint8* dst,
- int width) = ARGBSubtractRow_C;
- if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_argb = dst_argb + (height - 1) * dst_stride_argb;
- dst_stride_argb = -dst_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_argb0 == width * 4 &&
- src_stride_argb1 == width * 4 &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
- }
-#if defined(HAS_ARGBSUBTRACTROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- ARGBSubtractRow = ARGBSubtractRow_Any_SSE2;
- if (IS_ALIGNED(width, 4)) {
- ARGBSubtractRow = ARGBSubtractRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_ARGBSUBTRACTROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- ARGBSubtractRow = ARGBSubtractRow_Any_AVX2;
- if (IS_ALIGNED(width, 8)) {
- ARGBSubtractRow = ARGBSubtractRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_ARGBSUBTRACTROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBSubtractRow = ARGBSubtractRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBSubtractRow = ARGBSubtractRow_NEON;
- }
- }
-#endif
-
- // Subtract plane
- for (y = 0; y < height; ++y) {
- ARGBSubtractRow(src_argb0, src_argb1, dst_argb, width);
- src_argb0 += src_stride_argb0;
- src_argb1 += src_stride_argb1;
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-// Convert I422 to BGRA.
-LIBYUV_API
-int I422ToBGRA(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_bgra, int dst_stride_bgra,
- int width, int height) {
- int y;
- void (*I422ToBGRARow)(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) = I422ToBGRARow_C;
- if (!src_y || !src_u || !src_v ||
- !dst_bgra ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_bgra = dst_bgra + (height - 1) * dst_stride_bgra;
- dst_stride_bgra = -dst_stride_bgra;
- }
- // Coalesce rows.
- if (src_stride_y == width &&
- src_stride_u * 2 == width &&
- src_stride_v * 2 == width &&
- dst_stride_bgra == width * 4) {
- width *= height;
- height = 1;
- src_stride_y = src_stride_u = src_stride_v = dst_stride_bgra = 0;
- }
-#if defined(HAS_I422TOBGRAROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- I422ToBGRARow = I422ToBGRARow_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- I422ToBGRARow = I422ToBGRARow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_I422TOBGRAROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- I422ToBGRARow = I422ToBGRARow_Any_AVX2;
- if (IS_ALIGNED(width, 16)) {
- I422ToBGRARow = I422ToBGRARow_AVX2;
- }
- }
-#endif
-#if defined(HAS_I422TOBGRAROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- I422ToBGRARow = I422ToBGRARow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- I422ToBGRARow = I422ToBGRARow_NEON;
- }
- }
-#endif
-#if defined(HAS_I422TOBGRAROW_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
- IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
- IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
- IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
- IS_ALIGNED(dst_bgra, 4) && IS_ALIGNED(dst_stride_bgra, 4)) {
- I422ToBGRARow = I422ToBGRARow_MIPS_DSPR2;
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- I422ToBGRARow(src_y, src_u, src_v, dst_bgra, width);
- dst_bgra += dst_stride_bgra;
- src_y += src_stride_y;
- src_u += src_stride_u;
- src_v += src_stride_v;
- }
- return 0;
-}
-
-// Convert I422 to ABGR.
-LIBYUV_API
-int I422ToABGR(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_abgr, int dst_stride_abgr,
- int width, int height) {
- int y;
- void (*I422ToABGRRow)(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) = I422ToABGRRow_C;
- if (!src_y || !src_u || !src_v ||
- !dst_abgr ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr;
- dst_stride_abgr = -dst_stride_abgr;
- }
- // Coalesce rows.
- if (src_stride_y == width &&
- src_stride_u * 2 == width &&
- src_stride_v * 2 == width &&
- dst_stride_abgr == width * 4) {
- width *= height;
- height = 1;
- src_stride_y = src_stride_u = src_stride_v = dst_stride_abgr = 0;
- }
-#if defined(HAS_I422TOABGRROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- I422ToABGRRow = I422ToABGRRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- I422ToABGRRow = I422ToABGRRow_NEON;
- }
- }
-#endif
-#if defined(HAS_I422TOABGRROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- I422ToABGRRow = I422ToABGRRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_I422TOABGRROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- I422ToABGRRow = I422ToABGRRow_Any_AVX2;
- if (IS_ALIGNED(width, 16)) {
- I422ToABGRRow = I422ToABGRRow_AVX2;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
- dst_abgr += dst_stride_abgr;
- src_y += src_stride_y;
- src_u += src_stride_u;
- src_v += src_stride_v;
- }
- return 0;
-}
-
-// Convert I422 to RGBA.
-LIBYUV_API
-int I422ToRGBA(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_rgba, int dst_stride_rgba,
- int width, int height) {
- int y;
- void (*I422ToRGBARow)(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) = I422ToRGBARow_C;
- if (!src_y || !src_u || !src_v ||
- !dst_rgba ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_rgba = dst_rgba + (height - 1) * dst_stride_rgba;
- dst_stride_rgba = -dst_stride_rgba;
- }
- // Coalesce rows.
- if (src_stride_y == width &&
- src_stride_u * 2 == width &&
- src_stride_v * 2 == width &&
- dst_stride_rgba == width * 4) {
- width *= height;
- height = 1;
- src_stride_y = src_stride_u = src_stride_v = dst_stride_rgba = 0;
- }
-#if defined(HAS_I422TORGBAROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
- I422ToRGBARow = I422ToRGBARow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- I422ToRGBARow = I422ToRGBARow_NEON;
- }
- }
-#endif
-#if defined(HAS_I422TORGBAROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- I422ToRGBARow = I422ToRGBARow_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- I422ToRGBARow = I422ToRGBARow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_I422TORGBAROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- I422ToRGBARow = I422ToRGBARow_Any_AVX2;
- if (IS_ALIGNED(width, 16)) {
- I422ToRGBARow = I422ToRGBARow_AVX2;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- I422ToRGBARow(src_y, src_u, src_v, dst_rgba, width);
- dst_rgba += dst_stride_rgba;
- src_y += src_stride_y;
- src_u += src_stride_u;
- src_v += src_stride_v;
- }
- return 0;
-}
-
-// Convert NV12 to RGB565.
-LIBYUV_API
-int NV12ToRGB565(const uint8* src_y, int src_stride_y,
- const uint8* src_uv, int src_stride_uv,
- uint8* dst_rgb565, int dst_stride_rgb565,
- int width, int height) {
- int y;
- void (*NV12ToRGB565Row)(const uint8* y_buf,
- const uint8* uv_buf,
- uint8* rgb_buf,
- int width) = NV12ToRGB565Row_C;
- if (!src_y || !src_uv || !dst_rgb565 ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
- dst_stride_rgb565 = -dst_stride_rgb565;
- }
-#if defined(HAS_NV12TORGB565ROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- NV12ToRGB565Row = NV12ToRGB565Row_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- NV12ToRGB565Row = NV12ToRGB565Row_SSSE3;
- }
- }
-#endif
-#if defined(HAS_NV12TORGB565ROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- NV12ToRGB565Row = NV12ToRGB565Row_Any_AVX2;
- if (IS_ALIGNED(width, 16)) {
- NV12ToRGB565Row = NV12ToRGB565Row_AVX2;
- }
- }
-#endif
-#if defined(HAS_NV12TORGB565ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- NV12ToRGB565Row = NV12ToRGB565Row_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- NV12ToRGB565Row = NV12ToRGB565Row_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- NV12ToRGB565Row(src_y, src_uv, dst_rgb565, width);
- dst_rgb565 += dst_stride_rgb565;
- src_y += src_stride_y;
- if (y & 1) {
- src_uv += src_stride_uv;
- }
- }
- return 0;
-}
-
-// Convert NV21 to RGB565.
-LIBYUV_API
-int NV21ToRGB565(const uint8* src_y, int src_stride_y,
- const uint8* src_vu, int src_stride_vu,
- uint8* dst_rgb565, int dst_stride_rgb565,
- int width, int height) {
- int y;
- void (*NV21ToRGB565Row)(const uint8* y_buf,
- const uint8* src_vu,
- uint8* rgb_buf,
- int width) = NV21ToRGB565Row_C;
- if (!src_y || !src_vu || !dst_rgb565 ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
- dst_stride_rgb565 = -dst_stride_rgb565;
- }
-#if defined(HAS_NV21TORGB565ROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- NV21ToRGB565Row = NV21ToRGB565Row_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- NV21ToRGB565Row = NV21ToRGB565Row_SSSE3;
- }
- }
-#endif
-#if defined(HAS_NV21TORGB565ROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- NV21ToRGB565Row = NV21ToRGB565Row_Any_AVX2;
- if (IS_ALIGNED(width, 16)) {
- NV21ToRGB565Row = NV21ToRGB565Row_AVX2;
- }
- }
-#endif
-#if defined(HAS_NV21TORGB565ROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- NV21ToRGB565Row = NV21ToRGB565Row_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- NV21ToRGB565Row = NV21ToRGB565Row_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- NV21ToRGB565Row(src_y, src_vu, dst_rgb565, width);
- dst_rgb565 += dst_stride_rgb565;
- src_y += src_stride_y;
- if (y & 1) {
- src_vu += src_stride_vu;
- }
- }
- return 0;
-}
-
-LIBYUV_API
-void SetPlane(uint8* dst_y, int dst_stride_y,
- int width, int height,
- uint32 value) {
- int y;
- void (*SetRow)(uint8* dst, uint8 value, int pix) = SetRow_C;
- if (height < 0) {
- height = -height;
- dst_y = dst_y + (height - 1) * dst_stride_y;
- dst_stride_y = -dst_stride_y;
- }
- // Coalesce rows.
- if (dst_stride_y == width) {
- width *= height;
- height = 1;
- dst_stride_y = 0;
- }
-#if defined(HAS_SETROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- SetRow = SetRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- SetRow = SetRow_NEON;
- }
- }
-#endif
-#if defined(HAS_SETROW_X86)
- if (TestCpuFlag(kCpuHasX86)) {
- SetRow = SetRow_Any_X86;
- if (IS_ALIGNED(width, 4)) {
- SetRow = SetRow_X86;
- }
- }
-#endif
-#if defined(HAS_SETROW_ERMS)
- if (TestCpuFlag(kCpuHasERMS)) {
- SetRow = SetRow_ERMS;
- }
-#endif
-
- // Set plane
- for (y = 0; y < height; ++y) {
- SetRow(dst_y, value, width);
- dst_y += dst_stride_y;
- }
-}
-
-// Draw a rectangle into I420
-LIBYUV_API
-int I420Rect(uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int x, int y,
- int width, int height,
- int value_y, int value_u, int value_v) {
- int halfwidth = (width + 1) >> 1;
- int halfheight = (height + 1) >> 1;
- uint8* start_y = dst_y + y * dst_stride_y + x;
- uint8* start_u = dst_u + (y / 2) * dst_stride_u + (x / 2);
- uint8* start_v = dst_v + (y / 2) * dst_stride_v + (x / 2);
- if (!dst_y || !dst_u || !dst_v ||
- width <= 0 || height == 0 ||
- x < 0 || y < 0 ||
- value_y < 0 || value_y > 255 ||
- value_u < 0 || value_u > 255 ||
- value_v < 0 || value_v > 255) {
- return -1;
- }
-
- SetPlane(start_y, dst_stride_y, width, height, value_y);
- SetPlane(start_u, dst_stride_u, halfwidth, halfheight, value_u);
- SetPlane(start_v, dst_stride_v, halfwidth, halfheight, value_v);
- return 0;
-}
-
-// Draw a rectangle into ARGB
-LIBYUV_API
-int ARGBRect(uint8* dst_argb, int dst_stride_argb,
- int dst_x, int dst_y,
- int width, int height,
- uint32 value) {
- int y;
- void (*ARGBSetRow)(uint8* dst_argb, uint32 value, int pix) = ARGBSetRow_C;
- if (!dst_argb ||
- width <= 0 || height == 0 ||
- dst_x < 0 || dst_y < 0) {
- return -1;
- }
- if (height < 0) {
- height = -height;
- dst_argb = dst_argb + (height - 1) * dst_stride_argb;
- dst_stride_argb = -dst_stride_argb;
- }
- dst_argb += dst_y * dst_stride_argb + dst_x * 4;
- // Coalesce rows.
- if (dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- dst_stride_argb = 0;
- }
-
-#if defined(HAS_ARGBSETROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBSetRow = ARGBSetRow_Any_NEON;
- if (IS_ALIGNED(width, 4)) {
- ARGBSetRow = ARGBSetRow_NEON;
- }
- }
-#endif
-#if defined(HAS_ARGBSETROW_X86)
- if (TestCpuFlag(kCpuHasX86)) {
- ARGBSetRow = ARGBSetRow_X86;
- }
-#endif
-
- // Set plane
- for (y = 0; y < height; ++y) {
- ARGBSetRow(dst_argb, value, width);
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-// Convert unattentuated ARGB to preattenuated ARGB.
-// An unattenutated ARGB alpha blend uses the formula
-// p = a * f + (1 - a) * b
-// where
-// p is output pixel
-// f is foreground pixel
-// b is background pixel
-// a is alpha value from foreground pixel
-// An preattenutated ARGB alpha blend uses the formula
-// p = f + (1 - a) * b
-// where
-// f is foreground pixel premultiplied by alpha
-
-LIBYUV_API
-int ARGBAttenuate(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- int y;
- void (*ARGBAttenuateRow)(const uint8* src_argb, uint8* dst_argb,
- int width) = ARGBAttenuateRow_C;
- if (!src_argb || !dst_argb || width <= 0 || height == 0) {
- return -1;
- }
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_argb == width * 4 &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_argb = dst_stride_argb = 0;
- }
-#if defined(HAS_ARGBATTENUATEROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- ARGBAttenuateRow = ARGBAttenuateRow_Any_SSE2;
- if (IS_ALIGNED(width, 4)) {
- ARGBAttenuateRow = ARGBAttenuateRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_ARGBATTENUATEROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3;
- if (IS_ALIGNED(width, 4)) {
- ARGBAttenuateRow = ARGBAttenuateRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_ARGBATTENUATEROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2;
- if (IS_ALIGNED(width, 8)) {
- ARGBAttenuateRow = ARGBAttenuateRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_ARGBATTENUATEROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBAttenuateRow = ARGBAttenuateRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBAttenuateRow = ARGBAttenuateRow_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- ARGBAttenuateRow(src_argb, dst_argb, width);
- src_argb += src_stride_argb;
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-// Convert preattentuated ARGB to unattenuated ARGB.
-LIBYUV_API
-int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- int y;
- void (*ARGBUnattenuateRow)(const uint8* src_argb, uint8* dst_argb,
- int width) = ARGBUnattenuateRow_C;
- if (!src_argb || !dst_argb || width <= 0 || height == 0) {
- return -1;
- }
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_argb == width * 4 &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_argb = dst_stride_argb = 0;
- }
-#if defined(HAS_ARGBUNATTENUATEROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- ARGBUnattenuateRow = ARGBUnattenuateRow_Any_SSE2;
- if (IS_ALIGNED(width, 4)) {
- ARGBUnattenuateRow = ARGBUnattenuateRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_ARGBUNATTENUATEROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- ARGBUnattenuateRow = ARGBUnattenuateRow_Any_AVX2;
- if (IS_ALIGNED(width, 8)) {
- ARGBUnattenuateRow = ARGBUnattenuateRow_AVX2;
- }
- }
-#endif
-// TODO(fbarchard): Neon version.
-
- for (y = 0; y < height; ++y) {
- ARGBUnattenuateRow(src_argb, dst_argb, width);
- src_argb += src_stride_argb;
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-// Convert ARGB to Grayed ARGB.
-LIBYUV_API
-int ARGBGrayTo(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- int y;
- void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb,
- int width) = ARGBGrayRow_C;
- if (!src_argb || !dst_argb || width <= 0 || height == 0) {
- return -1;
- }
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_argb == width * 4 &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_argb = dst_stride_argb = 0;
- }
-#if defined(HAS_ARGBGRAYROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
- ARGBGrayRow = ARGBGrayRow_SSSE3;
- }
-#endif
-#if defined(HAS_ARGBGRAYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
- ARGBGrayRow = ARGBGrayRow_NEON;
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- ARGBGrayRow(src_argb, dst_argb, width);
- src_argb += src_stride_argb;
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-// Make a rectangle of ARGB gray scale.
-LIBYUV_API
-int ARGBGray(uint8* dst_argb, int dst_stride_argb,
- int dst_x, int dst_y,
- int width, int height) {
- int y;
- void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb,
- int width) = ARGBGrayRow_C;
- uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
- if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
- return -1;
- }
- // Coalesce rows.
- if (dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- dst_stride_argb = 0;
- }
-#if defined(HAS_ARGBGRAYROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
- ARGBGrayRow = ARGBGrayRow_SSSE3;
- }
-#endif
-#if defined(HAS_ARGBGRAYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
- ARGBGrayRow = ARGBGrayRow_NEON;
- }
-#endif
- for (y = 0; y < height; ++y) {
- ARGBGrayRow(dst, dst, width);
- dst += dst_stride_argb;
- }
- return 0;
-}
-
-// Make a rectangle of ARGB Sepia tone.
-LIBYUV_API
-int ARGBSepia(uint8* dst_argb, int dst_stride_argb,
- int dst_x, int dst_y, int width, int height) {
- int y;
- void (*ARGBSepiaRow)(uint8* dst_argb, int width) = ARGBSepiaRow_C;
- uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
- if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
- return -1;
- }
- // Coalesce rows.
- if (dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- dst_stride_argb = 0;
- }
-#if defined(HAS_ARGBSEPIAROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
- ARGBSepiaRow = ARGBSepiaRow_SSSE3;
- }
-#endif
-#if defined(HAS_ARGBSEPIAROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
- ARGBSepiaRow = ARGBSepiaRow_NEON;
- }
-#endif
- for (y = 0; y < height; ++y) {
- ARGBSepiaRow(dst, width);
- dst += dst_stride_argb;
- }
- return 0;
-}
-
-// Apply a 4x4 matrix to each ARGB pixel.
-// Note: Normally for shading, but can be used to swizzle or invert.
-LIBYUV_API
-int ARGBColorMatrix(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- const int8* matrix_argb,
- int width, int height) {
- int y;
- void (*ARGBColorMatrixRow)(const uint8* src_argb, uint8* dst_argb,
- const int8* matrix_argb, int width) = ARGBColorMatrixRow_C;
- if (!src_argb || !dst_argb || !matrix_argb || width <= 0 || height == 0) {
- return -1;
- }
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_argb == width * 4 &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_argb = dst_stride_argb = 0;
- }
-#if defined(HAS_ARGBCOLORMATRIXROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
- ARGBColorMatrixRow = ARGBColorMatrixRow_SSSE3;
- }
-#endif
-#if defined(HAS_ARGBCOLORMATRIXROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
- ARGBColorMatrixRow = ARGBColorMatrixRow_NEON;
- }
-#endif
- for (y = 0; y < height; ++y) {
- ARGBColorMatrixRow(src_argb, dst_argb, matrix_argb, width);
- src_argb += src_stride_argb;
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-// Apply a 4x3 matrix to each ARGB pixel.
-// Deprecated.
-LIBYUV_API
-int RGBColorMatrix(uint8* dst_argb, int dst_stride_argb,
- const int8* matrix_rgb,
- int dst_x, int dst_y, int width, int height) {
- SIMD_ALIGNED(int8 matrix_argb[16]);
- uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
- if (!dst_argb || !matrix_rgb || width <= 0 || height <= 0 ||
- dst_x < 0 || dst_y < 0) {
- return -1;
- }
-
- // Convert 4x3 7 bit matrix to 4x4 6 bit matrix.
- matrix_argb[0] = matrix_rgb[0] / 2;
- matrix_argb[1] = matrix_rgb[1] / 2;
- matrix_argb[2] = matrix_rgb[2] / 2;
- matrix_argb[3] = matrix_rgb[3] / 2;
- matrix_argb[4] = matrix_rgb[4] / 2;
- matrix_argb[5] = matrix_rgb[5] / 2;
- matrix_argb[6] = matrix_rgb[6] / 2;
- matrix_argb[7] = matrix_rgb[7] / 2;
- matrix_argb[8] = matrix_rgb[8] / 2;
- matrix_argb[9] = matrix_rgb[9] / 2;
- matrix_argb[10] = matrix_rgb[10] / 2;
- matrix_argb[11] = matrix_rgb[11] / 2;
- matrix_argb[14] = matrix_argb[13] = matrix_argb[12] = 0;
- matrix_argb[15] = 64; // 1.0
-
- return ARGBColorMatrix((const uint8*)(dst), dst_stride_argb,
- dst, dst_stride_argb,
- &matrix_argb[0], width, height);
-}
-
-// Apply a color table each ARGB pixel.
-// Table contains 256 ARGB values.
-LIBYUV_API
-int ARGBColorTable(uint8* dst_argb, int dst_stride_argb,
- const uint8* table_argb,
- int dst_x, int dst_y, int width, int height) {
- int y;
- void (*ARGBColorTableRow)(uint8* dst_argb, const uint8* table_argb,
- int width) = ARGBColorTableRow_C;
- uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
- if (!dst_argb || !table_argb || width <= 0 || height <= 0 ||
- dst_x < 0 || dst_y < 0) {
- return -1;
- }
- // Coalesce rows.
- if (dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- dst_stride_argb = 0;
- }
-#if defined(HAS_ARGBCOLORTABLEROW_X86)
- if (TestCpuFlag(kCpuHasX86)) {
- ARGBColorTableRow = ARGBColorTableRow_X86;
- }
-#endif
- for (y = 0; y < height; ++y) {
- ARGBColorTableRow(dst, table_argb, width);
- dst += dst_stride_argb;
- }
- return 0;
-}
-
-// Apply a color table each ARGB pixel but preserve destination alpha.
-// Table contains 256 ARGB values.
-LIBYUV_API
-int RGBColorTable(uint8* dst_argb, int dst_stride_argb,
- const uint8* table_argb,
- int dst_x, int dst_y, int width, int height) {
- int y;
- void (*RGBColorTableRow)(uint8* dst_argb, const uint8* table_argb,
- int width) = RGBColorTableRow_C;
- uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
- if (!dst_argb || !table_argb || width <= 0 || height <= 0 ||
- dst_x < 0 || dst_y < 0) {
- return -1;
- }
- // Coalesce rows.
- if (dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- dst_stride_argb = 0;
- }
-#if defined(HAS_RGBCOLORTABLEROW_X86)
- if (TestCpuFlag(kCpuHasX86)) {
- RGBColorTableRow = RGBColorTableRow_X86;
- }
-#endif
- for (y = 0; y < height; ++y) {
- RGBColorTableRow(dst, table_argb, width);
- dst += dst_stride_argb;
- }
- return 0;
-}
-
-// ARGBQuantize is used to posterize art.
-// e.g. rgb / qvalue * qvalue + qvalue / 2
-// But the low levels implement efficiently with 3 parameters, and could be
-// used for other high level operations.
-// dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
-// where scale is 1 / interval_size as a fixed point value.
-// The divide is replaces with a multiply by reciprocal fixed point multiply.
-// Caveat - although SSE2 saturates, the C function does not and should be used
-// with care if doing anything but quantization.
-LIBYUV_API
-int ARGBQuantize(uint8* dst_argb, int dst_stride_argb,
- int scale, int interval_size, int interval_offset,
- int dst_x, int dst_y, int width, int height) {
- int y;
- void (*ARGBQuantizeRow)(uint8* dst_argb, int scale, int interval_size,
- int interval_offset, int width) = ARGBQuantizeRow_C;
- uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
- if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0 ||
- interval_size < 1 || interval_size > 255) {
- return -1;
- }
- // Coalesce rows.
- if (dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- dst_stride_argb = 0;
- }
-#if defined(HAS_ARGBQUANTIZEROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4)) {
- ARGBQuantizeRow = ARGBQuantizeRow_SSE2;
- }
-#endif
-#if defined(HAS_ARGBQUANTIZEROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
- ARGBQuantizeRow = ARGBQuantizeRow_NEON;
- }
-#endif
- for (y = 0; y < height; ++y) {
- ARGBQuantizeRow(dst, scale, interval_size, interval_offset, width);
- dst += dst_stride_argb;
- }
- return 0;
-}
-
-// Computes table of cumulative sum for image where the value is the sum
-// of all values above and to the left of the entry. Used by ARGBBlur.
-LIBYUV_API
-int ARGBComputeCumulativeSum(const uint8* src_argb, int src_stride_argb,
- int32* dst_cumsum, int dst_stride32_cumsum,
- int width, int height) {
- int y;
- void (*ComputeCumulativeSumRow)(const uint8* row, int32* cumsum,
- const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C;
- int32* previous_cumsum = dst_cumsum;
- if (!dst_cumsum || !src_argb || width <= 0 || height <= 0) {
- return -1;
- }
-#if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
- }
-#endif
- memset(dst_cumsum, 0, width * sizeof(dst_cumsum[0]) * 4); // 4 int per pixel.
- for (y = 0; y < height; ++y) {
- ComputeCumulativeSumRow(src_argb, dst_cumsum, previous_cumsum, width);
- previous_cumsum = dst_cumsum;
- dst_cumsum += dst_stride32_cumsum;
- src_argb += src_stride_argb;
- }
- return 0;
-}
-
-// Blur ARGB image.
-// Caller should allocate CumulativeSum table of width * height * 16 bytes
-// aligned to 16 byte boundary. height can be radius * 2 + 2 to save memory
-// as the buffer is treated as circular.
-LIBYUV_API
-int ARGBBlur(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- int32* dst_cumsum, int dst_stride32_cumsum,
- int width, int height, int radius) {
- int y;
- void (*ComputeCumulativeSumRow)(const uint8 *row, int32 *cumsum,
- const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C;
- void (*CumulativeSumToAverageRow)(const int32* topleft, const int32* botleft,
- int width, int area, uint8* dst, int count) = CumulativeSumToAverageRow_C;
- int32* cumsum_bot_row;
- int32* max_cumsum_bot_row;
- int32* cumsum_top_row;
-
- if (!src_argb || !dst_argb || width <= 0 || height == 0) {
- return -1;
- }
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
- if (radius > height) {
- radius = height;
- }
- if (radius > (width / 2 - 1)) {
- radius = width / 2 - 1;
- }
- if (radius <= 0) {
- return -1;
- }
-#if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
- CumulativeSumToAverageRow = CumulativeSumToAverageRow_SSE2;
- }
-#endif
- // Compute enough CumulativeSum for first row to be blurred. After this
- // one row of CumulativeSum is updated at a time.
- ARGBComputeCumulativeSum(src_argb, src_stride_argb,
- dst_cumsum, dst_stride32_cumsum,
- width, radius);
-
- src_argb = src_argb + radius * src_stride_argb;
- cumsum_bot_row = &dst_cumsum[(radius - 1) * dst_stride32_cumsum];
-
- max_cumsum_bot_row = &dst_cumsum[(radius * 2 + 2) * dst_stride32_cumsum];
- cumsum_top_row = &dst_cumsum[0];
-
- for (y = 0; y < height; ++y) {
- int top_y = ((y - radius - 1) >= 0) ? (y - radius - 1) : 0;
- int bot_y = ((y + radius) < height) ? (y + radius) : (height - 1);
- int area = radius * (bot_y - top_y);
- int boxwidth = radius * 4;
- int x;
- int n;
-
- // Increment cumsum_top_row pointer with circular buffer wrap around.
- if (top_y) {
- cumsum_top_row += dst_stride32_cumsum;
- if (cumsum_top_row >= max_cumsum_bot_row) {
- cumsum_top_row = dst_cumsum;
- }
- }
- // Increment cumsum_bot_row pointer with circular buffer wrap around and
- // then fill in a row of CumulativeSum.
- if ((y + radius) < height) {
- const int32* prev_cumsum_bot_row = cumsum_bot_row;
- cumsum_bot_row += dst_stride32_cumsum;
- if (cumsum_bot_row >= max_cumsum_bot_row) {
- cumsum_bot_row = dst_cumsum;
- }
- ComputeCumulativeSumRow(src_argb, cumsum_bot_row, prev_cumsum_bot_row,
- width);
- src_argb += src_stride_argb;
- }
-
- // Left clipped.
- for (x = 0; x < radius + 1; ++x) {
- CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row,
- boxwidth, area, &dst_argb[x * 4], 1);
- area += (bot_y - top_y);
- boxwidth += 4;
- }
-
- // Middle unclipped.
- n = (width - 1) - radius - x + 1;
- CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row,
- boxwidth, area, &dst_argb[x * 4], n);
-
- // Right clipped.
- for (x += n; x <= width - 1; ++x) {
- area -= (bot_y - top_y);
- boxwidth -= 4;
- CumulativeSumToAverageRow(cumsum_top_row + (x - radius - 1) * 4,
- cumsum_bot_row + (x - radius - 1) * 4,
- boxwidth, area, &dst_argb[x * 4], 1);
- }
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-// Multiply ARGB image by a specified ARGB value.
-LIBYUV_API
-int ARGBShade(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height, uint32 value) {
- int y;
- void (*ARGBShadeRow)(const uint8* src_argb, uint8* dst_argb,
- int width, uint32 value) = ARGBShadeRow_C;
- if (!src_argb || !dst_argb || width <= 0 || height == 0 || value == 0u) {
- return -1;
- }
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_argb == width * 4 &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_argb = dst_stride_argb = 0;
- }
-#if defined(HAS_ARGBSHADEROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4)) {
- ARGBShadeRow = ARGBShadeRow_SSE2;
- }
-#endif
-#if defined(HAS_ARGBSHADEROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
- ARGBShadeRow = ARGBShadeRow_NEON;
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- ARGBShadeRow(src_argb, dst_argb, width, value);
- src_argb += src_stride_argb;
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-// Interpolate 2 ARGB images by specified amount (0 to 255).
-LIBYUV_API
-int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0,
- const uint8* src_argb1, int src_stride_argb1,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height, int interpolation) {
- int y;
- void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride, int dst_width,
- int source_y_fraction) = InterpolateRow_C;
- if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_argb = dst_argb + (height - 1) * dst_stride_argb;
- dst_stride_argb = -dst_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_argb0 == width * 4 &&
- src_stride_argb1 == width * 4 &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
- }
-#if defined(HAS_INTERPOLATEROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- InterpolateRow = InterpolateRow_Any_SSE2;
- if (IS_ALIGNED(width, 4)) {
- InterpolateRow = InterpolateRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- InterpolateRow = InterpolateRow_Any_SSSE3;
- if (IS_ALIGNED(width, 4)) {
- InterpolateRow = InterpolateRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- InterpolateRow = InterpolateRow_Any_AVX2;
- if (IS_ALIGNED(width, 8)) {
- InterpolateRow = InterpolateRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- InterpolateRow = InterpolateRow_Any_NEON;
- if (IS_ALIGNED(width, 4)) {
- InterpolateRow = InterpolateRow_NEON;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
- IS_ALIGNED(src_argb0, 4) && IS_ALIGNED(src_stride_argb0, 4) &&
- IS_ALIGNED(src_argb1, 4) && IS_ALIGNED(src_stride_argb1, 4) &&
- IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
- InterpolateRow = InterpolateRow_MIPS_DSPR2;
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- InterpolateRow(dst_argb, src_argb0, src_argb1 - src_argb0,
- width * 4, interpolation);
- src_argb0 += src_stride_argb0;
- src_argb1 += src_stride_argb1;
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-// Shuffle ARGB channel order. e.g. BGRA to ARGB.
-LIBYUV_API
-int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra,
- uint8* dst_argb, int dst_stride_argb,
- const uint8* shuffler, int width, int height) {
- int y;
- void (*ARGBShuffleRow)(const uint8* src_bgra, uint8* dst_argb,
- const uint8* shuffler, int pix) = ARGBShuffleRow_C;
- if (!src_bgra || !dst_argb ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_bgra = src_bgra + (height - 1) * src_stride_bgra;
- src_stride_bgra = -src_stride_bgra;
- }
- // Coalesce rows.
- if (src_stride_bgra == width * 4 &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_bgra = dst_stride_argb = 0;
- }
-#if defined(HAS_ARGBSHUFFLEROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- ARGBShuffleRow = ARGBShuffleRow_Any_SSE2;
- if (IS_ALIGNED(width, 4)) {
- ARGBShuffleRow = ARGBShuffleRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_ARGBSHUFFLEROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- ARGBShuffleRow = ARGBShuffleRow_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- ARGBShuffleRow = ARGBShuffleRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_ARGBSHUFFLEROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- ARGBShuffleRow = ARGBShuffleRow_Any_AVX2;
- if (IS_ALIGNED(width, 16)) {
- ARGBShuffleRow = ARGBShuffleRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_ARGBSHUFFLEROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBShuffleRow = ARGBShuffleRow_Any_NEON;
- if (IS_ALIGNED(width, 4)) {
- ARGBShuffleRow = ARGBShuffleRow_NEON;
- }
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- ARGBShuffleRow(src_bgra, dst_argb, shuffler, width);
- src_bgra += src_stride_bgra;
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-// Sobel ARGB effect.
-static int ARGBSobelize(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height,
- void (*SobelRow)(const uint8* src_sobelx,
- const uint8* src_sobely,
- uint8* dst, int width)) {
- int y;
- void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_g, int pix) =
- ARGBToYJRow_C;
- void (*SobelYRow)(const uint8* src_y0, const uint8* src_y1,
- uint8* dst_sobely, int width) = SobelYRow_C;
- void (*SobelXRow)(const uint8* src_y0, const uint8* src_y1,
- const uint8* src_y2, uint8* dst_sobely, int width) =
- SobelXRow_C;
- const int kEdge = 16; // Extra pixels at start of row for extrude/align.
- if (!src_argb || !dst_argb || width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
-
-#if defined(HAS_ARGBTOYJROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- ARGBToYJRow = ARGBToYJRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_ARGBTOYJROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- ARGBToYJRow = ARGBToYJRow_Any_AVX2;
- if (IS_ALIGNED(width, 32)) {
- ARGBToYJRow = ARGBToYJRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_ARGBTOYJROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBToYJRow = ARGBToYJRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- ARGBToYJRow = ARGBToYJRow_NEON;
- }
- }
-#endif
-
-#if defined(HAS_SOBELYROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- SobelYRow = SobelYRow_SSE2;
- }
-#endif
-#if defined(HAS_SOBELYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- SobelYRow = SobelYRow_NEON;
- }
-#endif
-#if defined(HAS_SOBELXROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- SobelXRow = SobelXRow_SSE2;
- }
-#endif
-#if defined(HAS_SOBELXROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- SobelXRow = SobelXRow_NEON;
- }
-#endif
- {
- // 3 rows with edges before/after.
- const int kRowSize = (width + kEdge + 31) & ~31;
- align_buffer_64(rows, kRowSize * 2 + (kEdge + kRowSize * 3 + kEdge));
- uint8* row_sobelx = rows;
- uint8* row_sobely = rows + kRowSize;
- uint8* row_y = rows + kRowSize * 2;
-
- // Convert first row.
- uint8* row_y0 = row_y + kEdge;
- uint8* row_y1 = row_y0 + kRowSize;
- uint8* row_y2 = row_y1 + kRowSize;
- ARGBToYJRow(src_argb, row_y0, width);
- row_y0[-1] = row_y0[0];
- memset(row_y0 + width, row_y0[width - 1], 16); // Extrude 16 for valgrind.
- ARGBToYJRow(src_argb, row_y1, width);
- row_y1[-1] = row_y1[0];
- memset(row_y1 + width, row_y1[width - 1], 16);
- memset(row_y2 + width, 0, 16);
-
- for (y = 0; y < height; ++y) {
- // Convert next row of ARGB to G.
- if (y < (height - 1)) {
- src_argb += src_stride_argb;
- }
- ARGBToYJRow(src_argb, row_y2, width);
- row_y2[-1] = row_y2[0];
- row_y2[width] = row_y2[width - 1];
-
- SobelXRow(row_y0 - 1, row_y1 - 1, row_y2 - 1, row_sobelx, width);
- SobelYRow(row_y0 - 1, row_y2 - 1, row_sobely, width);
- SobelRow(row_sobelx, row_sobely, dst_argb, width);
-
- // Cycle thru circular queue of 3 row_y buffers.
- {
- uint8* row_yt = row_y0;
- row_y0 = row_y1;
- row_y1 = row_y2;
- row_y2 = row_yt;
- }
-
- dst_argb += dst_stride_argb;
- }
- free_aligned_buffer_64(rows);
- }
- return 0;
-}
-
-// Sobel ARGB effect.
-LIBYUV_API
-int ARGBSobel(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- void (*SobelRow)(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_argb, int width) = SobelRow_C;
-#if defined(HAS_SOBELROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- SobelRow = SobelRow_Any_SSE2;
- if (IS_ALIGNED(width, 16)) {
- SobelRow = SobelRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_SOBELROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- SobelRow = SobelRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- SobelRow = SobelRow_NEON;
- }
- }
-#endif
- return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
- width, height, SobelRow);
-}
-
-// Sobel ARGB effect with planar output.
-LIBYUV_API
-int ARGBSobelToPlane(const uint8* src_argb, int src_stride_argb,
- uint8* dst_y, int dst_stride_y,
- int width, int height) {
- void (*SobelToPlaneRow)(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_, int width) = SobelToPlaneRow_C;
-#if defined(HAS_SOBELTOPLANEROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- SobelToPlaneRow = SobelToPlaneRow_Any_SSE2;
- if (IS_ALIGNED(width, 16)) {
- SobelToPlaneRow = SobelToPlaneRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_SOBELTOPLANEROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- SobelToPlaneRow = SobelToPlaneRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- SobelToPlaneRow = SobelToPlaneRow_NEON;
- }
- }
-#endif
- return ARGBSobelize(src_argb, src_stride_argb, dst_y, dst_stride_y,
- width, height, SobelToPlaneRow);
-}
-
-// SobelXY ARGB effect.
-// Similar to Sobel, but also stores Sobel X in R and Sobel Y in B. G = Sobel.
-LIBYUV_API
-int ARGBSobelXY(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- void (*SobelXYRow)(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_argb, int width) = SobelXYRow_C;
-#if defined(HAS_SOBELXYROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- SobelXYRow = SobelXYRow_Any_SSE2;
- if (IS_ALIGNED(width, 16)) {
- SobelXYRow = SobelXYRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_SOBELXYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- SobelXYRow = SobelXYRow_Any_NEON;
- if (IS_ALIGNED(width, 8)) {
- SobelXYRow = SobelXYRow_NEON;
- }
- }
-#endif
- return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
- width, height, SobelXYRow);
-}
-
-// Apply a 4x4 polynomial to each ARGB pixel.
-LIBYUV_API
-int ARGBPolynomial(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- const float* poly,
- int width, int height) {
- int y;
- void (*ARGBPolynomialRow)(const uint8* src_argb,
- uint8* dst_argb, const float* poly,
- int width) = ARGBPolynomialRow_C;
- if (!src_argb || !dst_argb || !poly || width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_argb == width * 4 &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_argb = dst_stride_argb = 0;
- }
-#if defined(HAS_ARGBPOLYNOMIALROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 2)) {
- ARGBPolynomialRow = ARGBPolynomialRow_SSE2;
- }
-#endif
-#if defined(HAS_ARGBPOLYNOMIALROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && TestCpuFlag(kCpuHasFMA3) &&
- IS_ALIGNED(width, 2)) {
- ARGBPolynomialRow = ARGBPolynomialRow_AVX2;
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- ARGBPolynomialRow(src_argb, dst_argb, poly, width);
- src_argb += src_stride_argb;
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-// Apply a lumacolortable to each ARGB pixel.
-LIBYUV_API
-int ARGBLumaColorTable(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- const uint8* luma,
- int width, int height) {
- int y;
- void (*ARGBLumaColorTableRow)(const uint8* src_argb, uint8* dst_argb,
- int width, const uint8* luma, const uint32 lumacoeff) =
- ARGBLumaColorTableRow_C;
- if (!src_argb || !dst_argb || !luma || width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_argb == width * 4 &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_argb = dst_stride_argb = 0;
- }
-#if defined(HAS_ARGBLUMACOLORTABLEROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4)) {
- ARGBLumaColorTableRow = ARGBLumaColorTableRow_SSSE3;
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- ARGBLumaColorTableRow(src_argb, dst_argb, width, luma, 0x00264b0f);
- src_argb += src_stride_argb;
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-// Copy Alpha from one ARGB image to another.
-LIBYUV_API
-int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- int y;
- void (*ARGBCopyAlphaRow)(const uint8* src_argb, uint8* dst_argb, int width) =
- ARGBCopyAlphaRow_C;
- if (!src_argb || !dst_argb || width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_argb == width * 4 &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_argb = dst_stride_argb = 0;
- }
-#if defined(HAS_ARGBCOPYALPHAROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 8)) {
- ARGBCopyAlphaRow = ARGBCopyAlphaRow_SSE2;
- }
-#endif
-#if defined(HAS_ARGBCOPYALPHAROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 16)) {
- ARGBCopyAlphaRow = ARGBCopyAlphaRow_AVX2;
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- ARGBCopyAlphaRow(src_argb, dst_argb, width);
- src_argb += src_stride_argb;
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-// Copy a planar Y channel to the alpha channel of a destination ARGB image.
-LIBYUV_API
-int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y,
- uint8* dst_argb, int dst_stride_argb,
- int width, int height) {
- int y;
- void (*ARGBCopyYToAlphaRow)(const uint8* src_y, uint8* dst_argb, int width) =
- ARGBCopyYToAlphaRow_C;
- if (!src_y || !dst_argb || width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_y = src_y + (height - 1) * src_stride_y;
- src_stride_y = -src_stride_y;
- }
- // Coalesce rows.
- if (src_stride_y == width &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_y = dst_stride_argb = 0;
- }
-#if defined(HAS_ARGBCOPYYTOALPHAROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 8)) {
- ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_SSE2;
- }
-#endif
-#if defined(HAS_ARGBCOPYYTOALPHAROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 16)) {
- ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_AVX2;
- }
-#endif
-
- for (y = 0; y < height; ++y) {
- ARGBCopyYToAlphaRow(src_y, dst_argb, width);
- src_y += src_stride_y;
- dst_argb += dst_stride_argb;
- }
- return 0;
-}
-
-LIBYUV_API
-int YUY2ToNV12(const uint8* src_yuy2, int src_stride_yuy2,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_uv, int dst_stride_uv,
- int width, int height) {
- int y;
- int halfwidth = (width + 1) >> 1;
- void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) =
- SplitUVRow_C;
- void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride, int dst_width,
- int source_y_fraction) = InterpolateRow_C;
- if (!src_yuy2 ||
- !dst_y || !dst_uv ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
- src_stride_yuy2 = -src_stride_yuy2;
- }
-#if defined(HAS_SPLITUVROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- SplitUVRow = SplitUVRow_Any_SSE2;
- if (IS_ALIGNED(width, 16)) {
- SplitUVRow = SplitUVRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_SPLITUVROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- SplitUVRow = SplitUVRow_Any_AVX2;
- if (IS_ALIGNED(width, 32)) {
- SplitUVRow = SplitUVRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_SPLITUVROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- SplitUVRow = SplitUVRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- SplitUVRow = SplitUVRow_NEON;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- InterpolateRow = InterpolateRow_Any_SSE2;
- if (IS_ALIGNED(width, 16)) {
- InterpolateRow = InterpolateRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- InterpolateRow = InterpolateRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- InterpolateRow = InterpolateRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- InterpolateRow = InterpolateRow_Any_AVX2;
- if (IS_ALIGNED(width, 32)) {
- InterpolateRow = InterpolateRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- InterpolateRow = InterpolateRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- InterpolateRow = InterpolateRow_NEON;
- }
- }
-#endif
-
- {
- int awidth = halfwidth * 2;
- // 2 rows of uv
- align_buffer_64(rows, awidth * 2);
-
- for (y = 0; y < height - 1; y += 2) {
- // Split Y from UV.
- SplitUVRow(src_yuy2, dst_y, rows, awidth);
- SplitUVRow(src_yuy2 + src_stride_yuy2, dst_y + dst_stride_y,
- rows + awidth, awidth);
- InterpolateRow(dst_uv, rows, awidth, awidth, 128);
- src_yuy2 += src_stride_yuy2 * 2;
- dst_y += dst_stride_y * 2;
- dst_uv += dst_stride_uv;
- }
- if (height & 1) {
- // Split Y from UV.
- SplitUVRow(src_yuy2, dst_y, dst_uv, width);
- }
- free_aligned_buffer_64(rows);
- }
- return 0;
-}
-
-LIBYUV_API
-int UYVYToNV12(const uint8* src_uyvy, int src_stride_uyvy,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_uv, int dst_stride_uv,
- int width, int height) {
- int y;
- int halfwidth = (width + 1) >> 1;
- void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) =
- SplitUVRow_C;
- void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride, int dst_width,
- int source_y_fraction) = InterpolateRow_C;
- if (!src_uyvy ||
- !dst_y || !dst_uv ||
- width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
- src_stride_uyvy = -src_stride_uyvy;
- }
-#if defined(HAS_SPLITUVROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- SplitUVRow = SplitUVRow_Any_SSE2;
- if (IS_ALIGNED(width, 16)) {
- SplitUVRow = SplitUVRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_SPLITUVROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- SplitUVRow = SplitUVRow_Any_AVX2;
- if (IS_ALIGNED(width, 32)) {
- SplitUVRow = SplitUVRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_SPLITUVROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- SplitUVRow = SplitUVRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- SplitUVRow = SplitUVRow_NEON;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- InterpolateRow = InterpolateRow_Any_SSE2;
- if (IS_ALIGNED(width, 16)) {
- InterpolateRow = InterpolateRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- InterpolateRow = InterpolateRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- InterpolateRow = InterpolateRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- InterpolateRow = InterpolateRow_Any_AVX2;
- if (IS_ALIGNED(width, 32)) {
- InterpolateRow = InterpolateRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- InterpolateRow = InterpolateRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- InterpolateRow = InterpolateRow_NEON;
- }
- }
-#endif
-
- {
- int awidth = halfwidth * 2;
- // 2 rows of uv
- align_buffer_64(rows, awidth * 2);
-
- for (y = 0; y < height - 1; y += 2) {
- // Split Y from UV.
- SplitUVRow(src_uyvy, rows, dst_y, awidth);
- SplitUVRow(src_uyvy + src_stride_uyvy, rows + awidth,
- dst_y + dst_stride_y, awidth);
- InterpolateRow(dst_uv, rows, awidth, awidth, 128);
- src_uyvy += src_stride_uyvy * 2;
- dst_y += dst_stride_y * 2;
- dst_uv += dst_stride_uv;
- }
- if (height & 1) {
- // Split Y from UV.
- SplitUVRow(src_uyvy, dst_y, dst_uv, width);
- }
- free_aligned_buffer_64(rows);
- }
- return 0;
-}
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/third_party/aom/third_party/libyuv/source/rotate.cc b/third_party/aom/third_party/libyuv/source/rotate.cc
deleted file mode 100644
index be3d58920..000000000
--- a/third_party/aom/third_party/libyuv/source/rotate.cc
+++ /dev/null
@@ -1,496 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/rotate.h"
-
-#include "libyuv/cpu_id.h"
-#include "libyuv/convert.h"
-#include "libyuv/planar_functions.h"
-#include "libyuv/rotate_row.h"
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-LIBYUV_API
-void TransposePlane(const uint8* src, int src_stride,
- uint8* dst, int dst_stride,
- int width, int height) {
- int i = height;
- void (*TransposeWx8)(const uint8* src, int src_stride,
- uint8* dst, int dst_stride, int width) = TransposeWx8_C;
-#if defined(HAS_TRANSPOSEWX8_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- TransposeWx8 = TransposeWx8_NEON;
- }
-#endif
-#if defined(HAS_TRANSPOSEWX8_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- TransposeWx8 = TransposeWx8_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
- TransposeWx8 = TransposeWx8_SSSE3;
- }
- }
-#endif
-#if defined(HAS_TRANSPOSEWX8_FAST_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- TransposeWx8 = TransposeWx8_Fast_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- TransposeWx8 = TransposeWx8_Fast_SSSE3;
- }
- }
-#endif
-#if defined(HAS_TRANSPOSEWX8_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2)) {
- if (IS_ALIGNED(width, 4) &&
- IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {
- TransposeWx8 = TransposeWx8_Fast_MIPS_DSPR2;
- } else {
- TransposeWx8 = TransposeWx8_MIPS_DSPR2;
- }
- }
-#endif
-
- // Work across the source in 8x8 tiles
- while (i >= 8) {
- TransposeWx8(src, src_stride, dst, dst_stride, width);
- src += 8 * src_stride; // Go down 8 rows.
- dst += 8; // Move over 8 columns.
- i -= 8;
- }
-
- if (i > 0) {
- TransposeWxH_C(src, src_stride, dst, dst_stride, width, i);
- }
-}
-
-LIBYUV_API
-void RotatePlane90(const uint8* src, int src_stride,
- uint8* dst, int dst_stride,
- int width, int height) {
- // Rotate by 90 is a transpose with the source read
- // from bottom to top. So set the source pointer to the end
- // of the buffer and flip the sign of the source stride.
- src += src_stride * (height - 1);
- src_stride = -src_stride;
- TransposePlane(src, src_stride, dst, dst_stride, width, height);
-}
-
-LIBYUV_API
-void RotatePlane270(const uint8* src, int src_stride,
- uint8* dst, int dst_stride,
- int width, int height) {
- // Rotate by 270 is a transpose with the destination written
- // from bottom to top. So set the destination pointer to the end
- // of the buffer and flip the sign of the destination stride.
- dst += dst_stride * (width - 1);
- dst_stride = -dst_stride;
- TransposePlane(src, src_stride, dst, dst_stride, width, height);
-}
-
-LIBYUV_API
-void RotatePlane180(const uint8* src, int src_stride,
- uint8* dst, int dst_stride,
- int width, int height) {
- // Swap first and last row and mirror the content. Uses a temporary row.
- align_buffer_64(row, width);
- const uint8* src_bot = src + src_stride * (height - 1);
- uint8* dst_bot = dst + dst_stride * (height - 1);
- int half_height = (height + 1) >> 1;
- int y;
- void (*MirrorRow)(const uint8* src, uint8* dst, int width) = MirrorRow_C;
- void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
-#if defined(HAS_MIRRORROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- MirrorRow = MirrorRow_Any_NEON;
- if (IS_ALIGNED(width, 16)) {
- MirrorRow = MirrorRow_NEON;
- }
- }
-#endif
-#if defined(HAS_MIRRORROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- MirrorRow = MirrorRow_Any_SSE2;
- if (IS_ALIGNED(width, 16)) {
- MirrorRow = MirrorRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_MIRRORROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- MirrorRow = MirrorRow_Any_SSSE3;
- if (IS_ALIGNED(width, 16)) {
- MirrorRow = MirrorRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_MIRRORROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- MirrorRow = MirrorRow_Any_AVX2;
- if (IS_ALIGNED(width, 32)) {
- MirrorRow = MirrorRow_AVX2;
- }
- }
-#endif
-// TODO(fbarchard): Mirror on mips handle unaligned memory.
-#if defined(HAS_MIRRORROW_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
- IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4) &&
- IS_ALIGNED(dst, 4) && IS_ALIGNED(dst_stride, 4)) {
- MirrorRow = MirrorRow_MIPS_DSPR2;
- }
-#endif
-#if defined(HAS_COPYROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
- }
-#endif
-#if defined(HAS_COPYROW_AVX)
- if (TestCpuFlag(kCpuHasAVX)) {
- CopyRow = IS_ALIGNED(width, 64) ? CopyRow_AVX : CopyRow_Any_AVX;
- }
-#endif
-#if defined(HAS_COPYROW_ERMS)
- if (TestCpuFlag(kCpuHasERMS)) {
- CopyRow = CopyRow_ERMS;
- }
-#endif
-#if defined(HAS_COPYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
- }
-#endif
-#if defined(HAS_COPYROW_MIPS)
- if (TestCpuFlag(kCpuHasMIPS)) {
- CopyRow = CopyRow_MIPS;
- }
-#endif
-
- // Odd height will harmlessly mirror the middle row twice.
- for (y = 0; y < half_height; ++y) {
- MirrorRow(src, row, width); // Mirror first row into a buffer
- src += src_stride;
- MirrorRow(src_bot, dst, width); // Mirror last row into first row
- dst += dst_stride;
- CopyRow(row, dst_bot, width); // Copy first mirrored row into last
- src_bot -= src_stride;
- dst_bot -= dst_stride;
- }
- free_aligned_buffer_64(row);
-}
-
-LIBYUV_API
-void TransposeUV(const uint8* src, int src_stride,
- uint8* dst_a, int dst_stride_a,
- uint8* dst_b, int dst_stride_b,
- int width, int height) {
- int i = height;
- void (*TransposeUVWx8)(const uint8* src, int src_stride,
- uint8* dst_a, int dst_stride_a,
- uint8* dst_b, int dst_stride_b,
- int width) = TransposeUVWx8_C;
-#if defined(HAS_TRANSPOSEUVWX8_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- TransposeUVWx8 = TransposeUVWx8_NEON;
- }
-#endif
-#if defined(HAS_TRANSPOSEUVWX8_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 8)) {
- TransposeUVWx8 = TransposeUVWx8_SSE2;
- }
-#endif
-#if defined(HAS_TRANSPOSEUVWx8_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 2) &&
- IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {
- TransposeUVWx8 = TransposeUVWx8_MIPS_DSPR2;
- }
-#endif
-
- // Work through the source in 8x8 tiles.
- while (i >= 8) {
- TransposeUVWx8(src, src_stride,
- dst_a, dst_stride_a,
- dst_b, dst_stride_b,
- width);
- src += 8 * src_stride; // Go down 8 rows.
- dst_a += 8; // Move over 8 columns.
- dst_b += 8; // Move over 8 columns.
- i -= 8;
- }
-
- if (i > 0) {
- TransposeUVWxH_C(src, src_stride,
- dst_a, dst_stride_a,
- dst_b, dst_stride_b,
- width, i);
- }
-}
-
-LIBYUV_API
-void RotateUV90(const uint8* src, int src_stride,
- uint8* dst_a, int dst_stride_a,
- uint8* dst_b, int dst_stride_b,
- int width, int height) {
- src += src_stride * (height - 1);
- src_stride = -src_stride;
-
- TransposeUV(src, src_stride,
- dst_a, dst_stride_a,
- dst_b, dst_stride_b,
- width, height);
-}
-
-LIBYUV_API
-void RotateUV270(const uint8* src, int src_stride,
- uint8* dst_a, int dst_stride_a,
- uint8* dst_b, int dst_stride_b,
- int width, int height) {
- dst_a += dst_stride_a * (width - 1);
- dst_b += dst_stride_b * (width - 1);
- dst_stride_a = -dst_stride_a;
- dst_stride_b = -dst_stride_b;
-
- TransposeUV(src, src_stride,
- dst_a, dst_stride_a,
- dst_b, dst_stride_b,
- width, height);
-}
-
-// Rotate 180 is a horizontal and vertical flip.
-LIBYUV_API
-void RotateUV180(const uint8* src, int src_stride,
- uint8* dst_a, int dst_stride_a,
- uint8* dst_b, int dst_stride_b,
- int width, int height) {
- int i;
- void (*MirrorRowUV)(const uint8* src, uint8* dst_u, uint8* dst_v, int width) =
- MirrorUVRow_C;
-#if defined(HAS_MIRRORUVROW_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
- MirrorRowUV = MirrorUVRow_NEON;
- }
-#endif
-#if defined(HAS_MIRRORROW_UV_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16)) {
- MirrorRowUV = MirrorUVRow_SSSE3;
- }
-#endif
-#if defined(HAS_MIRRORUVROW_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
- IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {
- MirrorRowUV = MirrorUVRow_MIPS_DSPR2;
- }
-#endif
-
- dst_a += dst_stride_a * (height - 1);
- dst_b += dst_stride_b * (height - 1);
-
- for (i = 0; i < height; ++i) {
- MirrorRowUV(src, dst_a, dst_b, width);
- src += src_stride;
- dst_a -= dst_stride_a;
- dst_b -= dst_stride_b;
- }
-}
-
-LIBYUV_API
-int RotatePlane(const uint8* src, int src_stride,
- uint8* dst, int dst_stride,
- int width, int height,
- enum RotationMode mode) {
- if (!src || width <= 0 || height == 0 || !dst) {
- return -1;
- }
-
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src = src + (height - 1) * src_stride;
- src_stride = -src_stride;
- }
-
- switch (mode) {
- case kRotate0:
- // copy frame
- CopyPlane(src, src_stride,
- dst, dst_stride,
- width, height);
- return 0;
- case kRotate90:
- RotatePlane90(src, src_stride,
- dst, dst_stride,
- width, height);
- return 0;
- case kRotate270:
- RotatePlane270(src, src_stride,
- dst, dst_stride,
- width, height);
- return 0;
- case kRotate180:
- RotatePlane180(src, src_stride,
- dst, dst_stride,
- width, height);
- return 0;
- default:
- break;
- }
- return -1;
-}
-
-LIBYUV_API
-int I420Rotate(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height,
- enum RotationMode mode) {
- int halfwidth = (width + 1) >> 1;
- int halfheight = (height + 1) >> 1;
- if (!src_y || !src_u || !src_v || width <= 0 || height == 0 ||
- !dst_y || !dst_u || !dst_v) {
- return -1;
- }
-
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- halfheight = (height + 1) >> 1;
- src_y = src_y + (height - 1) * src_stride_y;
- src_u = src_u + (halfheight - 1) * src_stride_u;
- src_v = src_v + (halfheight - 1) * src_stride_v;
- src_stride_y = -src_stride_y;
- src_stride_u = -src_stride_u;
- src_stride_v = -src_stride_v;
- }
-
- switch (mode) {
- case kRotate0:
- // copy frame
- return I420Copy(src_y, src_stride_y,
- src_u, src_stride_u,
- src_v, src_stride_v,
- dst_y, dst_stride_y,
- dst_u, dst_stride_u,
- dst_v, dst_stride_v,
- width, height);
- case kRotate90:
- RotatePlane90(src_y, src_stride_y,
- dst_y, dst_stride_y,
- width, height);
- RotatePlane90(src_u, src_stride_u,
- dst_u, dst_stride_u,
- halfwidth, halfheight);
- RotatePlane90(src_v, src_stride_v,
- dst_v, dst_stride_v,
- halfwidth, halfheight);
- return 0;
- case kRotate270:
- RotatePlane270(src_y, src_stride_y,
- dst_y, dst_stride_y,
- width, height);
- RotatePlane270(src_u, src_stride_u,
- dst_u, dst_stride_u,
- halfwidth, halfheight);
- RotatePlane270(src_v, src_stride_v,
- dst_v, dst_stride_v,
- halfwidth, halfheight);
- return 0;
- case kRotate180:
- RotatePlane180(src_y, src_stride_y,
- dst_y, dst_stride_y,
- width, height);
- RotatePlane180(src_u, src_stride_u,
- dst_u, dst_stride_u,
- halfwidth, halfheight);
- RotatePlane180(src_v, src_stride_v,
- dst_v, dst_stride_v,
- halfwidth, halfheight);
- return 0;
- default:
- break;
- }
- return -1;
-}
-
-LIBYUV_API
-int NV12ToI420Rotate(const uint8* src_y, int src_stride_y,
- const uint8* src_uv, int src_stride_uv,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int width, int height,
- enum RotationMode mode) {
- int halfwidth = (width + 1) >> 1;
- int halfheight = (height + 1) >> 1;
- if (!src_y || !src_uv || width <= 0 || height == 0 ||
- !dst_y || !dst_u || !dst_v) {
- return -1;
- }
-
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- halfheight = (height + 1) >> 1;
- src_y = src_y + (height - 1) * src_stride_y;
- src_uv = src_uv + (halfheight - 1) * src_stride_uv;
- src_stride_y = -src_stride_y;
- src_stride_uv = -src_stride_uv;
- }
-
- switch (mode) {
- case kRotate0:
- // copy frame
- return NV12ToI420(src_y, src_stride_y,
- src_uv, src_stride_uv,
- dst_y, dst_stride_y,
- dst_u, dst_stride_u,
- dst_v, dst_stride_v,
- width, height);
- case kRotate90:
- RotatePlane90(src_y, src_stride_y,
- dst_y, dst_stride_y,
- width, height);
- RotateUV90(src_uv, src_stride_uv,
- dst_u, dst_stride_u,
- dst_v, dst_stride_v,
- halfwidth, halfheight);
- return 0;
- case kRotate270:
- RotatePlane270(src_y, src_stride_y,
- dst_y, dst_stride_y,
- width, height);
- RotateUV270(src_uv, src_stride_uv,
- dst_u, dst_stride_u,
- dst_v, dst_stride_v,
- halfwidth, halfheight);
- return 0;
- case kRotate180:
- RotatePlane180(src_y, src_stride_y,
- dst_y, dst_stride_y,
- width, height);
- RotateUV180(src_uv, src_stride_uv,
- dst_u, dst_stride_u,
- dst_v, dst_stride_v,
- halfwidth, halfheight);
- return 0;
- default:
- break;
- }
- return -1;
-}
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/third_party/aom/third_party/libyuv/source/rotate_any.cc b/third_party/aom/third_party/libyuv/source/rotate_any.cc
deleted file mode 100644
index 4d6eb34e1..000000000
--- a/third_party/aom/third_party/libyuv/source/rotate_any.cc
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright 2015 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/rotate.h"
-#include "libyuv/rotate_row.h"
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-#define TANY(NAMEANY, TPOS_SIMD, TPOS_C, MASK) \
- void NAMEANY(const uint8* src, int src_stride, \
- uint8* dst, int dst_stride, int width) { \
- int r = width & MASK; \
- int n = width - r; \
- if (n > 0) { \
- TPOS_SIMD(src, src_stride, dst, dst_stride, n); \
- } \
- TPOS_C(src + n, src_stride, dst + n * dst_stride, dst_stride, r); \
- }
-
-#ifdef HAS_TRANSPOSEWX8_NEON
-TANY(TransposeWx8_Any_NEON, TransposeWx8_NEON, TransposeWx8_C, 7)
-#endif
-#ifdef HAS_TRANSPOSEWX8_SSSE3
-TANY(TransposeWx8_Any_SSSE3, TransposeWx8_SSSE3, TransposeWx8_C, 7)
-#endif
-#ifdef HAS_TRANSPOSEWX8_FAST_SSSE3
-TANY(TransposeWx8_Fast_Any_SSSE3, TransposeWx8_Fast_SSSE3, TransposeWx8_C, 15)
-#endif
-#ifdef HAS_TRANSPOSEWX8_MIPS_DSPR2
-TANY(TransposeWx8_Any_MIPS_DSPR2, TransposeWx8_MIPS_DSPR2, TransposeWx8_C, 7)
-#endif
-
-#undef TANY
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
-
-
-
-
-
diff --git a/third_party/aom/third_party/libyuv/source/rotate_argb.cc b/third_party/aom/third_party/libyuv/source/rotate_argb.cc
deleted file mode 100644
index 787c0ad1b..000000000
--- a/third_party/aom/third_party/libyuv/source/rotate_argb.cc
+++ /dev/null
@@ -1,205 +0,0 @@
-/*
- * Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/rotate.h"
-
-#include "libyuv/cpu_id.h"
-#include "libyuv/convert.h"
-#include "libyuv/planar_functions.h"
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// ARGBScale has a function to copy pixels to a row, striding each source
-// pixel by a constant.
-#if !defined(LIBYUV_DISABLE_X86) && \
- (defined(_M_IX86) || \
- (defined(__x86_64__) && !defined(__native_client__)) || defined(__i386__))
-#define HAS_SCALEARGBROWDOWNEVEN_SSE2
-void ScaleARGBRowDownEven_SSE2(const uint8* src_ptr, int src_stride,
- int src_stepx, uint8* dst_ptr, int dst_width);
-#endif
-#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
- (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
-#define HAS_SCALEARGBROWDOWNEVEN_NEON
-void ScaleARGBRowDownEven_NEON(const uint8* src_ptr, int src_stride,
- int src_stepx, uint8* dst_ptr, int dst_width);
-#endif
-
-void ScaleARGBRowDownEven_C(const uint8* src_ptr, int,
- int src_stepx, uint8* dst_ptr, int dst_width);
-
-static void ARGBTranspose(const uint8* src, int src_stride,
- uint8* dst, int dst_stride, int width, int height) {
- int i;
- int src_pixel_step = src_stride >> 2;
- void (*ScaleARGBRowDownEven)(const uint8* src_ptr, int src_stride,
- int src_step, uint8* dst_ptr, int dst_width) = ScaleARGBRowDownEven_C;
-#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(height, 4)) { // Width of dest.
- ScaleARGBRowDownEven = ScaleARGBRowDownEven_SSE2;
- }
-#endif
-#if defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(height, 4)) { // Width of dest.
- ScaleARGBRowDownEven = ScaleARGBRowDownEven_NEON;
- }
-#endif
-
- for (i = 0; i < width; ++i) { // column of source to row of dest.
- ScaleARGBRowDownEven(src, 0, src_pixel_step, dst, height);
- dst += dst_stride;
- src += 4;
- }
-}
-
-void ARGBRotate90(const uint8* src, int src_stride,
- uint8* dst, int dst_stride, int width, int height) {
- // Rotate by 90 is a ARGBTranspose with the source read
- // from bottom to top. So set the source pointer to the end
- // of the buffer and flip the sign of the source stride.
- src += src_stride * (height - 1);
- src_stride = -src_stride;
- ARGBTranspose(src, src_stride, dst, dst_stride, width, height);
-}
-
-void ARGBRotate270(const uint8* src, int src_stride,
- uint8* dst, int dst_stride, int width, int height) {
- // Rotate by 270 is a ARGBTranspose with the destination written
- // from bottom to top. So set the destination pointer to the end
- // of the buffer and flip the sign of the destination stride.
- dst += dst_stride * (width - 1);
- dst_stride = -dst_stride;
- ARGBTranspose(src, src_stride, dst, dst_stride, width, height);
-}
-
-void ARGBRotate180(const uint8* src, int src_stride,
- uint8* dst, int dst_stride, int width, int height) {
- // Swap first and last row and mirror the content. Uses a temporary row.
- align_buffer_64(row, width * 4);
- const uint8* src_bot = src + src_stride * (height - 1);
- uint8* dst_bot = dst + dst_stride * (height - 1);
- int half_height = (height + 1) >> 1;
- int y;
- void (*ARGBMirrorRow)(const uint8* src, uint8* dst, int width) =
- ARGBMirrorRow_C;
- void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
-#if defined(HAS_ARGBMIRRORROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ARGBMirrorRow = ARGBMirrorRow_Any_NEON;
- if (IS_ALIGNED(width, 4)) {
- ARGBMirrorRow = ARGBMirrorRow_NEON;
- }
- }
-#endif
-#if defined(HAS_ARGBMIRRORROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- ARGBMirrorRow = ARGBMirrorRow_Any_SSE2;
- if (IS_ALIGNED(width, 4)) {
- ARGBMirrorRow = ARGBMirrorRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_ARGBMIRRORROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- ARGBMirrorRow = ARGBMirrorRow_Any_AVX2;
- if (IS_ALIGNED(width, 8)) {
- ARGBMirrorRow = ARGBMirrorRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_COPYROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- CopyRow = IS_ALIGNED(width * 4, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
- }
-#endif
-#if defined(HAS_COPYROW_AVX)
- if (TestCpuFlag(kCpuHasAVX)) {
- CopyRow = IS_ALIGNED(width * 4, 64) ? CopyRow_AVX : CopyRow_Any_AVX;
- }
-#endif
-#if defined(HAS_COPYROW_ERMS)
- if (TestCpuFlag(kCpuHasERMS)) {
- CopyRow = CopyRow_ERMS;
- }
-#endif
-#if defined(HAS_COPYROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- CopyRow = IS_ALIGNED(width * 4, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
- }
-#endif
-#if defined(HAS_COPYROW_MIPS)
- if (TestCpuFlag(kCpuHasMIPS)) {
- CopyRow = CopyRow_MIPS;
- }
-#endif
-
- // Odd height will harmlessly mirror the middle row twice.
- for (y = 0; y < half_height; ++y) {
- ARGBMirrorRow(src, row, width); // Mirror first row into a buffer
- ARGBMirrorRow(src_bot, dst, width); // Mirror last row into first row
- CopyRow(row, dst_bot, width * 4); // Copy first mirrored row into last
- src += src_stride;
- dst += dst_stride;
- src_bot -= src_stride;
- dst_bot -= dst_stride;
- }
- free_aligned_buffer_64(row);
-}
-
-LIBYUV_API
-int ARGBRotate(const uint8* src_argb, int src_stride_argb,
- uint8* dst_argb, int dst_stride_argb, int width, int height,
- enum RotationMode mode) {
- if (!src_argb || width <= 0 || height == 0 || !dst_argb) {
- return -1;
- }
-
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- src_argb = src_argb + (height - 1) * src_stride_argb;
- src_stride_argb = -src_stride_argb;
- }
-
- switch (mode) {
- case kRotate0:
- // copy frame
- return ARGBCopy(src_argb, src_stride_argb,
- dst_argb, dst_stride_argb,
- width, height);
- case kRotate90:
- ARGBRotate90(src_argb, src_stride_argb,
- dst_argb, dst_stride_argb,
- width, height);
- return 0;
- case kRotate270:
- ARGBRotate270(src_argb, src_stride_argb,
- dst_argb, dst_stride_argb,
- width, height);
- return 0;
- case kRotate180:
- ARGBRotate180(src_argb, src_stride_argb,
- dst_argb, dst_stride_argb,
- width, height);
- return 0;
- default:
- break;
- }
- return -1;
-}
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/third_party/aom/third_party/libyuv/source/rotate_common.cc b/third_party/aom/third_party/libyuv/source/rotate_common.cc
deleted file mode 100644
index b33a9a0c6..000000000
--- a/third_party/aom/third_party/libyuv/source/rotate_common.cc
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/row.h"
-#include "libyuv/rotate_row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-void TransposeWx8_C(const uint8* src, int src_stride,
- uint8* dst, int dst_stride, int width) {
- int i;
- for (i = 0; i < width; ++i) {
- dst[0] = src[0 * src_stride];
- dst[1] = src[1 * src_stride];
- dst[2] = src[2 * src_stride];
- dst[3] = src[3 * src_stride];
- dst[4] = src[4 * src_stride];
- dst[5] = src[5 * src_stride];
- dst[6] = src[6 * src_stride];
- dst[7] = src[7 * src_stride];
- ++src;
- dst += dst_stride;
- }
-}
-
-void TransposeUVWx8_C(const uint8* src, int src_stride,
- uint8* dst_a, int dst_stride_a,
- uint8* dst_b, int dst_stride_b, int width) {
- int i;
- for (i = 0; i < width; ++i) {
- dst_a[0] = src[0 * src_stride + 0];
- dst_b[0] = src[0 * src_stride + 1];
- dst_a[1] = src[1 * src_stride + 0];
- dst_b[1] = src[1 * src_stride + 1];
- dst_a[2] = src[2 * src_stride + 0];
- dst_b[2] = src[2 * src_stride + 1];
- dst_a[3] = src[3 * src_stride + 0];
- dst_b[3] = src[3 * src_stride + 1];
- dst_a[4] = src[4 * src_stride + 0];
- dst_b[4] = src[4 * src_stride + 1];
- dst_a[5] = src[5 * src_stride + 0];
- dst_b[5] = src[5 * src_stride + 1];
- dst_a[6] = src[6 * src_stride + 0];
- dst_b[6] = src[6 * src_stride + 1];
- dst_a[7] = src[7 * src_stride + 0];
- dst_b[7] = src[7 * src_stride + 1];
- src += 2;
- dst_a += dst_stride_a;
- dst_b += dst_stride_b;
- }
-}
-
-void TransposeWxH_C(const uint8* src, int src_stride,
- uint8* dst, int dst_stride,
- int width, int height) {
- int i;
- for (i = 0; i < width; ++i) {
- int j;
- for (j = 0; j < height; ++j) {
- dst[i * dst_stride + j] = src[j * src_stride + i];
- }
- }
-}
-
-void TransposeUVWxH_C(const uint8* src, int src_stride,
- uint8* dst_a, int dst_stride_a,
- uint8* dst_b, int dst_stride_b,
- int width, int height) {
- int i;
- for (i = 0; i < width * 2; i += 2) {
- int j;
- for (j = 0; j < height; ++j) {
- dst_a[j + ((i >> 1) * dst_stride_a)] = src[i + (j * src_stride)];
- dst_b[j + ((i >> 1) * dst_stride_b)] = src[i + (j * src_stride) + 1];
- }
- }
-}
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/third_party/aom/third_party/libyuv/source/rotate_gcc.cc b/third_party/aom/third_party/libyuv/source/rotate_gcc.cc
deleted file mode 100644
index fd385bcd3..000000000
--- a/third_party/aom/third_party/libyuv/source/rotate_gcc.cc
+++ /dev/null
@@ -1,493 +0,0 @@
-/*
- * Copyright 2015 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/row.h"
-#include "libyuv/rotate_row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// This module is for GCC x86 and x64.
-#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
-
-#if !defined(LIBYUV_DISABLE_X86) && \
- (defined(__i386__) || (defined(__x86_64__) && !defined(__native_client__)))
-void TransposeWx8_SSSE3(const uint8* src, int src_stride,
- uint8* dst, int dst_stride, int width) {
- asm volatile (
- // Read in the data from the source pointer.
- // First round of bit swap.
- ".p2align 2 \n"
- "1: \n"
- "movq (%0),%%xmm0 \n"
- "movq (%0,%3),%%xmm1 \n"
- "lea (%0,%3,2),%0 \n"
- "punpcklbw %%xmm1,%%xmm0 \n"
- "movq (%0),%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "palignr $0x8,%%xmm1,%%xmm1 \n"
- "movq (%0,%3),%%xmm3 \n"
- "lea (%0,%3,2),%0 \n"
- "punpcklbw %%xmm3,%%xmm2 \n"
- "movdqa %%xmm2,%%xmm3 \n"
- "movq (%0),%%xmm4 \n"
- "palignr $0x8,%%xmm3,%%xmm3 \n"
- "movq (%0,%3),%%xmm5 \n"
- "lea (%0,%3,2),%0 \n"
- "punpcklbw %%xmm5,%%xmm4 \n"
- "movdqa %%xmm4,%%xmm5 \n"
- "movq (%0),%%xmm6 \n"
- "palignr $0x8,%%xmm5,%%xmm5 \n"
- "movq (%0,%3),%%xmm7 \n"
- "lea (%0,%3,2),%0 \n"
- "punpcklbw %%xmm7,%%xmm6 \n"
- "neg %3 \n"
- "movdqa %%xmm6,%%xmm7 \n"
- "lea 0x8(%0,%3,8),%0 \n"
- "palignr $0x8,%%xmm7,%%xmm7 \n"
- "neg %3 \n"
- // Second round of bit swap.
- "punpcklwd %%xmm2,%%xmm0 \n"
- "punpcklwd %%xmm3,%%xmm1 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "movdqa %%xmm1,%%xmm3 \n"
- "palignr $0x8,%%xmm2,%%xmm2 \n"
- "palignr $0x8,%%xmm3,%%xmm3 \n"
- "punpcklwd %%xmm6,%%xmm4 \n"
- "punpcklwd %%xmm7,%%xmm5 \n"
- "movdqa %%xmm4,%%xmm6 \n"
- "movdqa %%xmm5,%%xmm7 \n"
- "palignr $0x8,%%xmm6,%%xmm6 \n"
- "palignr $0x8,%%xmm7,%%xmm7 \n"
- // Third round of bit swap.
- // Write to the destination pointer.
- "punpckldq %%xmm4,%%xmm0 \n"
- "movq %%xmm0,(%1) \n"
- "movdqa %%xmm0,%%xmm4 \n"
- "palignr $0x8,%%xmm4,%%xmm4 \n"
- "movq %%xmm4,(%1,%4) \n"
- "lea (%1,%4,2),%1 \n"
- "punpckldq %%xmm6,%%xmm2 \n"
- "movdqa %%xmm2,%%xmm6 \n"
- "movq %%xmm2,(%1) \n"
- "palignr $0x8,%%xmm6,%%xmm6 \n"
- "punpckldq %%xmm5,%%xmm1 \n"
- "movq %%xmm6,(%1,%4) \n"
- "lea (%1,%4,2),%1 \n"
- "movdqa %%xmm1,%%xmm5 \n"
- "movq %%xmm1,(%1) \n"
- "palignr $0x8,%%xmm5,%%xmm5 \n"
- "movq %%xmm5,(%1,%4) \n"
- "lea (%1,%4,2),%1 \n"
- "punpckldq %%xmm7,%%xmm3 \n"
- "movq %%xmm3,(%1) \n"
- "movdqa %%xmm3,%%xmm7 \n"
- "palignr $0x8,%%xmm7,%%xmm7 \n"
- "sub $0x8,%2 \n"
- "movq %%xmm7,(%1,%4) \n"
- "lea (%1,%4,2),%1 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(width) // %2
- : "r"((intptr_t)(src_stride)), // %3
- "r"((intptr_t)(dst_stride)) // %4
- : "memory", "cc",
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
- );
-}
-
-#if !defined(LIBYUV_DISABLE_X86) && defined(__i386__) && !defined(__clang__)
-void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
- uint8* dst_a, int dst_stride_a,
- uint8* dst_b, int dst_stride_b, int width);
- asm (
- DECLARE_FUNCTION(TransposeUVWx8_SSE2)
- "push %ebx \n"
- "push %esi \n"
- "push %edi \n"
- "push %ebp \n"
- "mov 0x14(%esp),%eax \n"
- "mov 0x18(%esp),%edi \n"
- "mov 0x1c(%esp),%edx \n"
- "mov 0x20(%esp),%esi \n"
- "mov 0x24(%esp),%ebx \n"
- "mov 0x28(%esp),%ebp \n"
- "mov %esp,%ecx \n"
- "sub $0x14,%esp \n"
- "and $0xfffffff0,%esp \n"
- "mov %ecx,0x10(%esp) \n"
- "mov 0x2c(%ecx),%ecx \n"
-
-"1: \n"
- "movdqu (%eax),%xmm0 \n"
- "movdqu (%eax,%edi,1),%xmm1 \n"
- "lea (%eax,%edi,2),%eax \n"
- "movdqa %xmm0,%xmm7 \n"
- "punpcklbw %xmm1,%xmm0 \n"
- "punpckhbw %xmm1,%xmm7 \n"
- "movdqa %xmm7,%xmm1 \n"
- "movdqu (%eax),%xmm2 \n"
- "movdqu (%eax,%edi,1),%xmm3 \n"
- "lea (%eax,%edi,2),%eax \n"
- "movdqa %xmm2,%xmm7 \n"
- "punpcklbw %xmm3,%xmm2 \n"
- "punpckhbw %xmm3,%xmm7 \n"
- "movdqa %xmm7,%xmm3 \n"
- "movdqu (%eax),%xmm4 \n"
- "movdqu (%eax,%edi,1),%xmm5 \n"
- "lea (%eax,%edi,2),%eax \n"
- "movdqa %xmm4,%xmm7 \n"
- "punpcklbw %xmm5,%xmm4 \n"
- "punpckhbw %xmm5,%xmm7 \n"
- "movdqa %xmm7,%xmm5 \n"
- "movdqu (%eax),%xmm6 \n"
- "movdqu (%eax,%edi,1),%xmm7 \n"
- "lea (%eax,%edi,2),%eax \n"
- "movdqu %xmm5,(%esp) \n"
- "neg %edi \n"
- "movdqa %xmm6,%xmm5 \n"
- "punpcklbw %xmm7,%xmm6 \n"
- "punpckhbw %xmm7,%xmm5 \n"
- "movdqa %xmm5,%xmm7 \n"
- "lea 0x10(%eax,%edi,8),%eax \n"
- "neg %edi \n"
- "movdqa %xmm0,%xmm5 \n"
- "punpcklwd %xmm2,%xmm0 \n"
- "punpckhwd %xmm2,%xmm5 \n"
- "movdqa %xmm5,%xmm2 \n"
- "movdqa %xmm1,%xmm5 \n"
- "punpcklwd %xmm3,%xmm1 \n"
- "punpckhwd %xmm3,%xmm5 \n"
- "movdqa %xmm5,%xmm3 \n"
- "movdqa %xmm4,%xmm5 \n"
- "punpcklwd %xmm6,%xmm4 \n"
- "punpckhwd %xmm6,%xmm5 \n"
- "movdqa %xmm5,%xmm6 \n"
- "movdqu (%esp),%xmm5 \n"
- "movdqu %xmm6,(%esp) \n"
- "movdqa %xmm5,%xmm6 \n"
- "punpcklwd %xmm7,%xmm5 \n"
- "punpckhwd %xmm7,%xmm6 \n"
- "movdqa %xmm6,%xmm7 \n"
- "movdqa %xmm0,%xmm6 \n"
- "punpckldq %xmm4,%xmm0 \n"
- "punpckhdq %xmm4,%xmm6 \n"
- "movdqa %xmm6,%xmm4 \n"
- "movdqu (%esp),%xmm6 \n"
- "movlpd %xmm0,(%edx) \n"
- "movhpd %xmm0,(%ebx) \n"
- "movlpd %xmm4,(%edx,%esi,1) \n"
- "lea (%edx,%esi,2),%edx \n"
- "movhpd %xmm4,(%ebx,%ebp,1) \n"
- "lea (%ebx,%ebp,2),%ebx \n"
- "movdqa %xmm2,%xmm0 \n"
- "punpckldq %xmm6,%xmm2 \n"
- "movlpd %xmm2,(%edx) \n"
- "movhpd %xmm2,(%ebx) \n"
- "punpckhdq %xmm6,%xmm0 \n"
- "movlpd %xmm0,(%edx,%esi,1) \n"
- "lea (%edx,%esi,2),%edx \n"
- "movhpd %xmm0,(%ebx,%ebp,1) \n"
- "lea (%ebx,%ebp,2),%ebx \n"
- "movdqa %xmm1,%xmm0 \n"
- "punpckldq %xmm5,%xmm1 \n"
- "movlpd %xmm1,(%edx) \n"
- "movhpd %xmm1,(%ebx) \n"
- "punpckhdq %xmm5,%xmm0 \n"
- "movlpd %xmm0,(%edx,%esi,1) \n"
- "lea (%edx,%esi,2),%edx \n"
- "movhpd %xmm0,(%ebx,%ebp,1) \n"
- "lea (%ebx,%ebp,2),%ebx \n"
- "movdqa %xmm3,%xmm0 \n"
- "punpckldq %xmm7,%xmm3 \n"
- "movlpd %xmm3,(%edx) \n"
- "movhpd %xmm3,(%ebx) \n"
- "punpckhdq %xmm7,%xmm0 \n"
- "sub $0x8,%ecx \n"
- "movlpd %xmm0,(%edx,%esi,1) \n"
- "lea (%edx,%esi,2),%edx \n"
- "movhpd %xmm0,(%ebx,%ebp,1) \n"
- "lea (%ebx,%ebp,2),%ebx \n"
- "jg 1b \n"
- "mov 0x10(%esp),%esp \n"
- "pop %ebp \n"
- "pop %edi \n"
- "pop %esi \n"
- "pop %ebx \n"
-#if defined(__native_client__)
- "pop %ecx \n"
- "and $0xffffffe0,%ecx \n"
- "jmp *%ecx \n"
-#else
- "ret \n"
-#endif
-);
-#endif
-#if !defined(LIBYUV_DISABLE_X86) && !defined(__native_client__) && \
- defined(__x86_64__)
-// 64 bit version has enough registers to do 16x8 to 8x16 at a time.
-void TransposeWx8_Fast_SSSE3(const uint8* src, int src_stride,
- uint8* dst, int dst_stride, int width) {
- asm volatile (
- // Read in the data from the source pointer.
- // First round of bit swap.
- ".p2align 2 \n"
-"1: \n"
- "movdqu (%0),%%xmm0 \n"
- "movdqu (%0,%3),%%xmm1 \n"
- "lea (%0,%3,2),%0 \n"
- "movdqa %%xmm0,%%xmm8 \n"
- "punpcklbw %%xmm1,%%xmm0 \n"
- "punpckhbw %%xmm1,%%xmm8 \n"
- "movdqu (%0),%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm8,%%xmm9 \n"
- "palignr $0x8,%%xmm1,%%xmm1 \n"
- "palignr $0x8,%%xmm9,%%xmm9 \n"
- "movdqu (%0,%3),%%xmm3 \n"
- "lea (%0,%3,2),%0 \n"
- "movdqa %%xmm2,%%xmm10 \n"
- "punpcklbw %%xmm3,%%xmm2 \n"
- "punpckhbw %%xmm3,%%xmm10 \n"
- "movdqa %%xmm2,%%xmm3 \n"
- "movdqa %%xmm10,%%xmm11 \n"
- "movdqu (%0),%%xmm4 \n"
- "palignr $0x8,%%xmm3,%%xmm3 \n"
- "palignr $0x8,%%xmm11,%%xmm11 \n"
- "movdqu (%0,%3),%%xmm5 \n"
- "lea (%0,%3,2),%0 \n"
- "movdqa %%xmm4,%%xmm12 \n"
- "punpcklbw %%xmm5,%%xmm4 \n"
- "punpckhbw %%xmm5,%%xmm12 \n"
- "movdqa %%xmm4,%%xmm5 \n"
- "movdqa %%xmm12,%%xmm13 \n"
- "movdqu (%0),%%xmm6 \n"
- "palignr $0x8,%%xmm5,%%xmm5 \n"
- "palignr $0x8,%%xmm13,%%xmm13 \n"
- "movdqu (%0,%3),%%xmm7 \n"
- "lea (%0,%3,2),%0 \n"
- "movdqa %%xmm6,%%xmm14 \n"
- "punpcklbw %%xmm7,%%xmm6 \n"
- "punpckhbw %%xmm7,%%xmm14 \n"
- "neg %3 \n"
- "movdqa %%xmm6,%%xmm7 \n"
- "movdqa %%xmm14,%%xmm15 \n"
- "lea 0x10(%0,%3,8),%0 \n"
- "palignr $0x8,%%xmm7,%%xmm7 \n"
- "palignr $0x8,%%xmm15,%%xmm15 \n"
- "neg %3 \n"
- // Second round of bit swap.
- "punpcklwd %%xmm2,%%xmm0 \n"
- "punpcklwd %%xmm3,%%xmm1 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "movdqa %%xmm1,%%xmm3 \n"
- "palignr $0x8,%%xmm2,%%xmm2 \n"
- "palignr $0x8,%%xmm3,%%xmm3 \n"
- "punpcklwd %%xmm6,%%xmm4 \n"
- "punpcklwd %%xmm7,%%xmm5 \n"
- "movdqa %%xmm4,%%xmm6 \n"
- "movdqa %%xmm5,%%xmm7 \n"
- "palignr $0x8,%%xmm6,%%xmm6 \n"
- "palignr $0x8,%%xmm7,%%xmm7 \n"
- "punpcklwd %%xmm10,%%xmm8 \n"
- "punpcklwd %%xmm11,%%xmm9 \n"
- "movdqa %%xmm8,%%xmm10 \n"
- "movdqa %%xmm9,%%xmm11 \n"
- "palignr $0x8,%%xmm10,%%xmm10 \n"
- "palignr $0x8,%%xmm11,%%xmm11 \n"
- "punpcklwd %%xmm14,%%xmm12 \n"
- "punpcklwd %%xmm15,%%xmm13 \n"
- "movdqa %%xmm12,%%xmm14 \n"
- "movdqa %%xmm13,%%xmm15 \n"
- "palignr $0x8,%%xmm14,%%xmm14 \n"
- "palignr $0x8,%%xmm15,%%xmm15 \n"
- // Third round of bit swap.
- // Write to the destination pointer.
- "punpckldq %%xmm4,%%xmm0 \n"
- "movq %%xmm0,(%1) \n"
- "movdqa %%xmm0,%%xmm4 \n"
- "palignr $0x8,%%xmm4,%%xmm4 \n"
- "movq %%xmm4,(%1,%4) \n"
- "lea (%1,%4,2),%1 \n"
- "punpckldq %%xmm6,%%xmm2 \n"
- "movdqa %%xmm2,%%xmm6 \n"
- "movq %%xmm2,(%1) \n"
- "palignr $0x8,%%xmm6,%%xmm6 \n"
- "punpckldq %%xmm5,%%xmm1 \n"
- "movq %%xmm6,(%1,%4) \n"
- "lea (%1,%4,2),%1 \n"
- "movdqa %%xmm1,%%xmm5 \n"
- "movq %%xmm1,(%1) \n"
- "palignr $0x8,%%xmm5,%%xmm5 \n"
- "movq %%xmm5,(%1,%4) \n"
- "lea (%1,%4,2),%1 \n"
- "punpckldq %%xmm7,%%xmm3 \n"
- "movq %%xmm3,(%1) \n"
- "movdqa %%xmm3,%%xmm7 \n"
- "palignr $0x8,%%xmm7,%%xmm7 \n"
- "movq %%xmm7,(%1,%4) \n"
- "lea (%1,%4,2),%1 \n"
- "punpckldq %%xmm12,%%xmm8 \n"
- "movq %%xmm8,(%1) \n"
- "movdqa %%xmm8,%%xmm12 \n"
- "palignr $0x8,%%xmm12,%%xmm12 \n"
- "movq %%xmm12,(%1,%4) \n"
- "lea (%1,%4,2),%1 \n"
- "punpckldq %%xmm14,%%xmm10 \n"
- "movdqa %%xmm10,%%xmm14 \n"
- "movq %%xmm10,(%1) \n"
- "palignr $0x8,%%xmm14,%%xmm14 \n"
- "punpckldq %%xmm13,%%xmm9 \n"
- "movq %%xmm14,(%1,%4) \n"
- "lea (%1,%4,2),%1 \n"
- "movdqa %%xmm9,%%xmm13 \n"
- "movq %%xmm9,(%1) \n"
- "palignr $0x8,%%xmm13,%%xmm13 \n"
- "movq %%xmm13,(%1,%4) \n"
- "lea (%1,%4,2),%1 \n"
- "punpckldq %%xmm15,%%xmm11 \n"
- "movq %%xmm11,(%1) \n"
- "movdqa %%xmm11,%%xmm15 \n"
- "palignr $0x8,%%xmm15,%%xmm15 \n"
- "sub $0x10,%2 \n"
- "movq %%xmm15,(%1,%4) \n"
- "lea (%1,%4,2),%1 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(width) // %2
- : "r"((intptr_t)(src_stride)), // %3
- "r"((intptr_t)(dst_stride)) // %4
- : "memory", "cc",
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
- "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"
-);
-}
-
-void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
- uint8* dst_a, int dst_stride_a,
- uint8* dst_b, int dst_stride_b, int width) {
- asm volatile (
- // Read in the data from the source pointer.
- // First round of bit swap.
- ".p2align 2 \n"
-"1: \n"
- "movdqu (%0),%%xmm0 \n"
- "movdqu (%0,%4),%%xmm1 \n"
- "lea (%0,%4,2),%0 \n"
- "movdqa %%xmm0,%%xmm8 \n"
- "punpcklbw %%xmm1,%%xmm0 \n"
- "punpckhbw %%xmm1,%%xmm8 \n"
- "movdqa %%xmm8,%%xmm1 \n"
- "movdqu (%0),%%xmm2 \n"
- "movdqu (%0,%4),%%xmm3 \n"
- "lea (%0,%4,2),%0 \n"
- "movdqa %%xmm2,%%xmm8 \n"
- "punpcklbw %%xmm3,%%xmm2 \n"
- "punpckhbw %%xmm3,%%xmm8 \n"
- "movdqa %%xmm8,%%xmm3 \n"
- "movdqu (%0),%%xmm4 \n"
- "movdqu (%0,%4),%%xmm5 \n"
- "lea (%0,%4,2),%0 \n"
- "movdqa %%xmm4,%%xmm8 \n"
- "punpcklbw %%xmm5,%%xmm4 \n"
- "punpckhbw %%xmm5,%%xmm8 \n"
- "movdqa %%xmm8,%%xmm5 \n"
- "movdqu (%0),%%xmm6 \n"
- "movdqu (%0,%4),%%xmm7 \n"
- "lea (%0,%4,2),%0 \n"
- "movdqa %%xmm6,%%xmm8 \n"
- "punpcklbw %%xmm7,%%xmm6 \n"
- "neg %4 \n"
- "lea 0x10(%0,%4,8),%0 \n"
- "punpckhbw %%xmm7,%%xmm8 \n"
- "movdqa %%xmm8,%%xmm7 \n"
- "neg %4 \n"
- // Second round of bit swap.
- "movdqa %%xmm0,%%xmm8 \n"
- "movdqa %%xmm1,%%xmm9 \n"
- "punpckhwd %%xmm2,%%xmm8 \n"
- "punpckhwd %%xmm3,%%xmm9 \n"
- "punpcklwd %%xmm2,%%xmm0 \n"
- "punpcklwd %%xmm3,%%xmm1 \n"
- "movdqa %%xmm8,%%xmm2 \n"
- "movdqa %%xmm9,%%xmm3 \n"
- "movdqa %%xmm4,%%xmm8 \n"
- "movdqa %%xmm5,%%xmm9 \n"
- "punpckhwd %%xmm6,%%xmm8 \n"
- "punpckhwd %%xmm7,%%xmm9 \n"
- "punpcklwd %%xmm6,%%xmm4 \n"
- "punpcklwd %%xmm7,%%xmm5 \n"
- "movdqa %%xmm8,%%xmm6 \n"
- "movdqa %%xmm9,%%xmm7 \n"
- // Third round of bit swap.
- // Write to the destination pointer.
- "movdqa %%xmm0,%%xmm8 \n"
- "punpckldq %%xmm4,%%xmm0 \n"
- "movlpd %%xmm0,(%1) \n" // Write back U channel
- "movhpd %%xmm0,(%2) \n" // Write back V channel
- "punpckhdq %%xmm4,%%xmm8 \n"
- "movlpd %%xmm8,(%1,%5) \n"
- "lea (%1,%5,2),%1 \n"
- "movhpd %%xmm8,(%2,%6) \n"
- "lea (%2,%6,2),%2 \n"
- "movdqa %%xmm2,%%xmm8 \n"
- "punpckldq %%xmm6,%%xmm2 \n"
- "movlpd %%xmm2,(%1) \n"
- "movhpd %%xmm2,(%2) \n"
- "punpckhdq %%xmm6,%%xmm8 \n"
- "movlpd %%xmm8,(%1,%5) \n"
- "lea (%1,%5,2),%1 \n"
- "movhpd %%xmm8,(%2,%6) \n"
- "lea (%2,%6,2),%2 \n"
- "movdqa %%xmm1,%%xmm8 \n"
- "punpckldq %%xmm5,%%xmm1 \n"
- "movlpd %%xmm1,(%1) \n"
- "movhpd %%xmm1,(%2) \n"
- "punpckhdq %%xmm5,%%xmm8 \n"
- "movlpd %%xmm8,(%1,%5) \n"
- "lea (%1,%5,2),%1 \n"
- "movhpd %%xmm8,(%2,%6) \n"
- "lea (%2,%6,2),%2 \n"
- "movdqa %%xmm3,%%xmm8 \n"
- "punpckldq %%xmm7,%%xmm3 \n"
- "movlpd %%xmm3,(%1) \n"
- "movhpd %%xmm3,(%2) \n"
- "punpckhdq %%xmm7,%%xmm8 \n"
- "sub $0x8,%3 \n"
- "movlpd %%xmm8,(%1,%5) \n"
- "lea (%1,%5,2),%1 \n"
- "movhpd %%xmm8,(%2,%6) \n"
- "lea (%2,%6,2),%2 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst_a), // %1
- "+r"(dst_b), // %2
- "+r"(width) // %3
- : "r"((intptr_t)(src_stride)), // %4
- "r"((intptr_t)(dst_stride_a)), // %5
- "r"((intptr_t)(dst_stride_b)) // %6
- : "memory", "cc",
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
- "xmm8", "xmm9"
-);
-}
-#endif
-#endif
-
-#endif // defined(__x86_64__) || defined(__i386__)
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/third_party/aom/third_party/libyuv/source/rotate_mips.cc b/third_party/aom/third_party/libyuv/source/rotate_mips.cc
deleted file mode 100644
index efe6bd909..000000000
--- a/third_party/aom/third_party/libyuv/source/rotate_mips.cc
+++ /dev/null
@@ -1,484 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/row.h"
-#include "libyuv/rotate_row.h"
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-#if !defined(LIBYUV_DISABLE_MIPS) && \
- defined(__mips_dsp) && (__mips_dsp_rev >= 2) && \
- (_MIPS_SIM == _MIPS_SIM_ABI32)
-
-void TransposeWx8_MIPS_DSPR2(const uint8* src, int src_stride,
- uint8* dst, int dst_stride, int width) {
- __asm__ __volatile__ (
- ".set push \n"
- ".set noreorder \n"
- "sll $t2, %[src_stride], 0x1 \n" // src_stride x 2
- "sll $t4, %[src_stride], 0x2 \n" // src_stride x 4
- "sll $t9, %[src_stride], 0x3 \n" // src_stride x 8
- "addu $t3, $t2, %[src_stride] \n"
- "addu $t5, $t4, %[src_stride] \n"
- "addu $t6, $t2, $t4 \n"
- "andi $t0, %[dst], 0x3 \n"
- "andi $t1, %[dst_stride], 0x3 \n"
- "or $t0, $t0, $t1 \n"
- "bnez $t0, 11f \n"
- " subu $t7, $t9, %[src_stride] \n"
-//dst + dst_stride word aligned
- "1: \n"
- "lbu $t0, 0(%[src]) \n"
- "lbux $t1, %[src_stride](%[src]) \n"
- "lbux $t8, $t2(%[src]) \n"
- "lbux $t9, $t3(%[src]) \n"
- "sll $t1, $t1, 16 \n"
- "sll $t9, $t9, 16 \n"
- "or $t0, $t0, $t1 \n"
- "or $t8, $t8, $t9 \n"
- "precr.qb.ph $s0, $t8, $t0 \n"
- "lbux $t0, $t4(%[src]) \n"
- "lbux $t1, $t5(%[src]) \n"
- "lbux $t8, $t6(%[src]) \n"
- "lbux $t9, $t7(%[src]) \n"
- "sll $t1, $t1, 16 \n"
- "sll $t9, $t9, 16 \n"
- "or $t0, $t0, $t1 \n"
- "or $t8, $t8, $t9 \n"
- "precr.qb.ph $s1, $t8, $t0 \n"
- "sw $s0, 0(%[dst]) \n"
- "addiu %[width], -1 \n"
- "addiu %[src], 1 \n"
- "sw $s1, 4(%[dst]) \n"
- "bnez %[width], 1b \n"
- " addu %[dst], %[dst], %[dst_stride] \n"
- "b 2f \n"
-//dst + dst_stride unaligned
- "11: \n"
- "lbu $t0, 0(%[src]) \n"
- "lbux $t1, %[src_stride](%[src]) \n"
- "lbux $t8, $t2(%[src]) \n"
- "lbux $t9, $t3(%[src]) \n"
- "sll $t1, $t1, 16 \n"
- "sll $t9, $t9, 16 \n"
- "or $t0, $t0, $t1 \n"
- "or $t8, $t8, $t9 \n"
- "precr.qb.ph $s0, $t8, $t0 \n"
- "lbux $t0, $t4(%[src]) \n"
- "lbux $t1, $t5(%[src]) \n"
- "lbux $t8, $t6(%[src]) \n"
- "lbux $t9, $t7(%[src]) \n"
- "sll $t1, $t1, 16 \n"
- "sll $t9, $t9, 16 \n"
- "or $t0, $t0, $t1 \n"
- "or $t8, $t8, $t9 \n"
- "precr.qb.ph $s1, $t8, $t0 \n"
- "swr $s0, 0(%[dst]) \n"
- "swl $s0, 3(%[dst]) \n"
- "addiu %[width], -1 \n"
- "addiu %[src], 1 \n"
- "swr $s1, 4(%[dst]) \n"
- "swl $s1, 7(%[dst]) \n"
- "bnez %[width], 11b \n"
- "addu %[dst], %[dst], %[dst_stride] \n"
- "2: \n"
- ".set pop \n"
- :[src] "+r" (src),
- [dst] "+r" (dst),
- [width] "+r" (width)
- :[src_stride] "r" (src_stride),
- [dst_stride] "r" (dst_stride)
- : "t0", "t1", "t2", "t3", "t4", "t5",
- "t6", "t7", "t8", "t9",
- "s0", "s1"
- );
-}
-
-void TransposeWx8_Fast_MIPS_DSPR2(const uint8* src, int src_stride,
- uint8* dst, int dst_stride, int width) {
- __asm__ __volatile__ (
- ".set noat \n"
- ".set push \n"
- ".set noreorder \n"
- "beqz %[width], 2f \n"
- " sll $t2, %[src_stride], 0x1 \n" // src_stride x 2
- "sll $t4, %[src_stride], 0x2 \n" // src_stride x 4
- "sll $t9, %[src_stride], 0x3 \n" // src_stride x 8
- "addu $t3, $t2, %[src_stride] \n"
- "addu $t5, $t4, %[src_stride] \n"
- "addu $t6, $t2, $t4 \n"
-
- "srl $AT, %[width], 0x2 \n"
- "andi $t0, %[dst], 0x3 \n"
- "andi $t1, %[dst_stride], 0x3 \n"
- "or $t0, $t0, $t1 \n"
- "bnez $t0, 11f \n"
- " subu $t7, $t9, %[src_stride] \n"
-//dst + dst_stride word aligned
- "1: \n"
- "lw $t0, 0(%[src]) \n"
- "lwx $t1, %[src_stride](%[src]) \n"
- "lwx $t8, $t2(%[src]) \n"
- "lwx $t9, $t3(%[src]) \n"
-
-// t0 = | 30 | 20 | 10 | 00 |
-// t1 = | 31 | 21 | 11 | 01 |
-// t8 = | 32 | 22 | 12 | 02 |
-// t9 = | 33 | 23 | 13 | 03 |
-
- "precr.qb.ph $s0, $t1, $t0 \n"
- "precr.qb.ph $s1, $t9, $t8 \n"
- "precrq.qb.ph $s2, $t1, $t0 \n"
- "precrq.qb.ph $s3, $t9, $t8 \n"
-
- // s0 = | 21 | 01 | 20 | 00 |
- // s1 = | 23 | 03 | 22 | 02 |
- // s2 = | 31 | 11 | 30 | 10 |
- // s3 = | 33 | 13 | 32 | 12 |
-
- "precr.qb.ph $s4, $s1, $s0 \n"
- "precrq.qb.ph $s5, $s1, $s0 \n"
- "precr.qb.ph $s6, $s3, $s2 \n"
- "precrq.qb.ph $s7, $s3, $s2 \n"
-
- // s4 = | 03 | 02 | 01 | 00 |
- // s5 = | 23 | 22 | 21 | 20 |
- // s6 = | 13 | 12 | 11 | 10 |
- // s7 = | 33 | 32 | 31 | 30 |
-
- "lwx $t0, $t4(%[src]) \n"
- "lwx $t1, $t5(%[src]) \n"
- "lwx $t8, $t6(%[src]) \n"
- "lwx $t9, $t7(%[src]) \n"
-
-// t0 = | 34 | 24 | 14 | 04 |
-// t1 = | 35 | 25 | 15 | 05 |
-// t8 = | 36 | 26 | 16 | 06 |
-// t9 = | 37 | 27 | 17 | 07 |
-
- "precr.qb.ph $s0, $t1, $t0 \n"
- "precr.qb.ph $s1, $t9, $t8 \n"
- "precrq.qb.ph $s2, $t1, $t0 \n"
- "precrq.qb.ph $s3, $t9, $t8 \n"
-
- // s0 = | 25 | 05 | 24 | 04 |
- // s1 = | 27 | 07 | 26 | 06 |
- // s2 = | 35 | 15 | 34 | 14 |
- // s3 = | 37 | 17 | 36 | 16 |
-
- "precr.qb.ph $t0, $s1, $s0 \n"
- "precrq.qb.ph $t1, $s1, $s0 \n"
- "precr.qb.ph $t8, $s3, $s2 \n"
- "precrq.qb.ph $t9, $s3, $s2 \n"
-
- // t0 = | 07 | 06 | 05 | 04 |
- // t1 = | 27 | 26 | 25 | 24 |
- // t8 = | 17 | 16 | 15 | 14 |
- // t9 = | 37 | 36 | 35 | 34 |
-
- "addu $s0, %[dst], %[dst_stride] \n"
- "addu $s1, $s0, %[dst_stride] \n"
- "addu $s2, $s1, %[dst_stride] \n"
-
- "sw $s4, 0(%[dst]) \n"
- "sw $t0, 4(%[dst]) \n"
- "sw $s6, 0($s0) \n"
- "sw $t8, 4($s0) \n"
- "sw $s5, 0($s1) \n"
- "sw $t1, 4($s1) \n"
- "sw $s7, 0($s2) \n"
- "sw $t9, 4($s2) \n"
-
- "addiu $AT, -1 \n"
- "addiu %[src], 4 \n"
-
- "bnez $AT, 1b \n"
- " addu %[dst], $s2, %[dst_stride] \n"
- "b 2f \n"
-//dst + dst_stride unaligned
- "11: \n"
- "lw $t0, 0(%[src]) \n"
- "lwx $t1, %[src_stride](%[src]) \n"
- "lwx $t8, $t2(%[src]) \n"
- "lwx $t9, $t3(%[src]) \n"
-
-// t0 = | 30 | 20 | 10 | 00 |
-// t1 = | 31 | 21 | 11 | 01 |
-// t8 = | 32 | 22 | 12 | 02 |
-// t9 = | 33 | 23 | 13 | 03 |
-
- "precr.qb.ph $s0, $t1, $t0 \n"
- "precr.qb.ph $s1, $t9, $t8 \n"
- "precrq.qb.ph $s2, $t1, $t0 \n"
- "precrq.qb.ph $s3, $t9, $t8 \n"
-
- // s0 = | 21 | 01 | 20 | 00 |
- // s1 = | 23 | 03 | 22 | 02 |
- // s2 = | 31 | 11 | 30 | 10 |
- // s3 = | 33 | 13 | 32 | 12 |
-
- "precr.qb.ph $s4, $s1, $s0 \n"
- "precrq.qb.ph $s5, $s1, $s0 \n"
- "precr.qb.ph $s6, $s3, $s2 \n"
- "precrq.qb.ph $s7, $s3, $s2 \n"
-
- // s4 = | 03 | 02 | 01 | 00 |
- // s5 = | 23 | 22 | 21 | 20 |
- // s6 = | 13 | 12 | 11 | 10 |
- // s7 = | 33 | 32 | 31 | 30 |
-
- "lwx $t0, $t4(%[src]) \n"
- "lwx $t1, $t5(%[src]) \n"
- "lwx $t8, $t6(%[src]) \n"
- "lwx $t9, $t7(%[src]) \n"
-
-// t0 = | 34 | 24 | 14 | 04 |
-// t1 = | 35 | 25 | 15 | 05 |
-// t8 = | 36 | 26 | 16 | 06 |
-// t9 = | 37 | 27 | 17 | 07 |
-
- "precr.qb.ph $s0, $t1, $t0 \n"
- "precr.qb.ph $s1, $t9, $t8 \n"
- "precrq.qb.ph $s2, $t1, $t0 \n"
- "precrq.qb.ph $s3, $t9, $t8 \n"
-
- // s0 = | 25 | 05 | 24 | 04 |
- // s1 = | 27 | 07 | 26 | 06 |
- // s2 = | 35 | 15 | 34 | 14 |
- // s3 = | 37 | 17 | 36 | 16 |
-
- "precr.qb.ph $t0, $s1, $s0 \n"
- "precrq.qb.ph $t1, $s1, $s0 \n"
- "precr.qb.ph $t8, $s3, $s2 \n"
- "precrq.qb.ph $t9, $s3, $s2 \n"
-
- // t0 = | 07 | 06 | 05 | 04 |
- // t1 = | 27 | 26 | 25 | 24 |
- // t8 = | 17 | 16 | 15 | 14 |
- // t9 = | 37 | 36 | 35 | 34 |
-
- "addu $s0, %[dst], %[dst_stride] \n"
- "addu $s1, $s0, %[dst_stride] \n"
- "addu $s2, $s1, %[dst_stride] \n"
-
- "swr $s4, 0(%[dst]) \n"
- "swl $s4, 3(%[dst]) \n"
- "swr $t0, 4(%[dst]) \n"
- "swl $t0, 7(%[dst]) \n"
- "swr $s6, 0($s0) \n"
- "swl $s6, 3($s0) \n"
- "swr $t8, 4($s0) \n"
- "swl $t8, 7($s0) \n"
- "swr $s5, 0($s1) \n"
- "swl $s5, 3($s1) \n"
- "swr $t1, 4($s1) \n"
- "swl $t1, 7($s1) \n"
- "swr $s7, 0($s2) \n"
- "swl $s7, 3($s2) \n"
- "swr $t9, 4($s2) \n"
- "swl $t9, 7($s2) \n"
-
- "addiu $AT, -1 \n"
- "addiu %[src], 4 \n"
-
- "bnez $AT, 11b \n"
- " addu %[dst], $s2, %[dst_stride] \n"
- "2: \n"
- ".set pop \n"
- ".set at \n"
- :[src] "+r" (src),
- [dst] "+r" (dst),
- [width] "+r" (width)
- :[src_stride] "r" (src_stride),
- [dst_stride] "r" (dst_stride)
- : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9",
- "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7"
- );
-}
-
-void TransposeUVWx8_MIPS_DSPR2(const uint8* src, int src_stride,
- uint8* dst_a, int dst_stride_a,
- uint8* dst_b, int dst_stride_b,
- int width) {
- __asm__ __volatile__ (
- ".set push \n"
- ".set noreorder \n"
- "beqz %[width], 2f \n"
- " sll $t2, %[src_stride], 0x1 \n" // src_stride x 2
- "sll $t4, %[src_stride], 0x2 \n" // src_stride x 4
- "sll $t9, %[src_stride], 0x3 \n" // src_stride x 8
- "addu $t3, $t2, %[src_stride] \n"
- "addu $t5, $t4, %[src_stride] \n"
- "addu $t6, $t2, $t4 \n"
- "subu $t7, $t9, %[src_stride] \n"
- "srl $t1, %[width], 1 \n"
-
-// check word aligment for dst_a, dst_b, dst_stride_a and dst_stride_b
- "andi $t0, %[dst_a], 0x3 \n"
- "andi $t8, %[dst_b], 0x3 \n"
- "or $t0, $t0, $t8 \n"
- "andi $t8, %[dst_stride_a], 0x3 \n"
- "andi $s5, %[dst_stride_b], 0x3 \n"
- "or $t8, $t8, $s5 \n"
- "or $t0, $t0, $t8 \n"
- "bnez $t0, 11f \n"
- " nop \n"
-// dst + dst_stride word aligned (both, a & b dst addresses)
- "1: \n"
- "lw $t0, 0(%[src]) \n" // |B0|A0|b0|a0|
- "lwx $t8, %[src_stride](%[src]) \n" // |B1|A1|b1|a1|
- "addu $s5, %[dst_a], %[dst_stride_a] \n"
- "lwx $t9, $t2(%[src]) \n" // |B2|A2|b2|a2|
- "lwx $s0, $t3(%[src]) \n" // |B3|A3|b3|a3|
- "addu $s6, %[dst_b], %[dst_stride_b] \n"
-
- "precrq.ph.w $s1, $t8, $t0 \n" // |B1|A1|B0|A0|
- "precrq.ph.w $s2, $s0, $t9 \n" // |B3|A3|B2|A2|
- "precr.qb.ph $s3, $s2, $s1 \n" // |A3|A2|A1|A0|
- "precrq.qb.ph $s4, $s2, $s1 \n" // |B3|B2|B1|B0|
-
- "sll $t0, $t0, 16 \n"
- "packrl.ph $s1, $t8, $t0 \n" // |b1|a1|b0|a0|
- "sll $t9, $t9, 16 \n"
- "packrl.ph $s2, $s0, $t9 \n" // |b3|a3|b2|a2|
-
- "sw $s3, 0($s5) \n"
- "sw $s4, 0($s6) \n"
-
- "precr.qb.ph $s3, $s2, $s1 \n" // |a3|a2|a1|a0|
- "precrq.qb.ph $s4, $s2, $s1 \n" // |b3|b2|b1|b0|
-
- "lwx $t0, $t4(%[src]) \n" // |B4|A4|b4|a4|
- "lwx $t8, $t5(%[src]) \n" // |B5|A5|b5|a5|
- "lwx $t9, $t6(%[src]) \n" // |B6|A6|b6|a6|
- "lwx $s0, $t7(%[src]) \n" // |B7|A7|b7|a7|
- "sw $s3, 0(%[dst_a]) \n"
- "sw $s4, 0(%[dst_b]) \n"
-
- "precrq.ph.w $s1, $t8, $t0 \n" // |B5|A5|B4|A4|
- "precrq.ph.w $s2, $s0, $t9 \n" // |B6|A6|B7|A7|
- "precr.qb.ph $s3, $s2, $s1 \n" // |A7|A6|A5|A4|
- "precrq.qb.ph $s4, $s2, $s1 \n" // |B7|B6|B5|B4|
-
- "sll $t0, $t0, 16 \n"
- "packrl.ph $s1, $t8, $t0 \n" // |b5|a5|b4|a4|
- "sll $t9, $t9, 16 \n"
- "packrl.ph $s2, $s0, $t9 \n" // |b7|a7|b6|a6|
- "sw $s3, 4($s5) \n"
- "sw $s4, 4($s6) \n"
-
- "precr.qb.ph $s3, $s2, $s1 \n" // |a7|a6|a5|a4|
- "precrq.qb.ph $s4, $s2, $s1 \n" // |b7|b6|b5|b4|
-
- "addiu %[src], 4 \n"
- "addiu $t1, -1 \n"
- "sll $t0, %[dst_stride_a], 1 \n"
- "sll $t8, %[dst_stride_b], 1 \n"
- "sw $s3, 4(%[dst_a]) \n"
- "sw $s4, 4(%[dst_b]) \n"
- "addu %[dst_a], %[dst_a], $t0 \n"
- "bnez $t1, 1b \n"
- " addu %[dst_b], %[dst_b], $t8 \n"
- "b 2f \n"
- " nop \n"
-
-// dst_a or dst_b or dst_stride_a or dst_stride_b not word aligned
- "11: \n"
- "lw $t0, 0(%[src]) \n" // |B0|A0|b0|a0|
- "lwx $t8, %[src_stride](%[src]) \n" // |B1|A1|b1|a1|
- "addu $s5, %[dst_a], %[dst_stride_a] \n"
- "lwx $t9, $t2(%[src]) \n" // |B2|A2|b2|a2|
- "lwx $s0, $t3(%[src]) \n" // |B3|A3|b3|a3|
- "addu $s6, %[dst_b], %[dst_stride_b] \n"
-
- "precrq.ph.w $s1, $t8, $t0 \n" // |B1|A1|B0|A0|
- "precrq.ph.w $s2, $s0, $t9 \n" // |B3|A3|B2|A2|
- "precr.qb.ph $s3, $s2, $s1 \n" // |A3|A2|A1|A0|
- "precrq.qb.ph $s4, $s2, $s1 \n" // |B3|B2|B1|B0|
-
- "sll $t0, $t0, 16 \n"
- "packrl.ph $s1, $t8, $t0 \n" // |b1|a1|b0|a0|
- "sll $t9, $t9, 16 \n"
- "packrl.ph $s2, $s0, $t9 \n" // |b3|a3|b2|a2|
-
- "swr $s3, 0($s5) \n"
- "swl $s3, 3($s5) \n"
- "swr $s4, 0($s6) \n"
- "swl $s4, 3($s6) \n"
-
- "precr.qb.ph $s3, $s2, $s1 \n" // |a3|a2|a1|a0|
- "precrq.qb.ph $s4, $s2, $s1 \n" // |b3|b2|b1|b0|
-
- "lwx $t0, $t4(%[src]) \n" // |B4|A4|b4|a4|
- "lwx $t8, $t5(%[src]) \n" // |B5|A5|b5|a5|
- "lwx $t9, $t6(%[src]) \n" // |B6|A6|b6|a6|
- "lwx $s0, $t7(%[src]) \n" // |B7|A7|b7|a7|
- "swr $s3, 0(%[dst_a]) \n"
- "swl $s3, 3(%[dst_a]) \n"
- "swr $s4, 0(%[dst_b]) \n"
- "swl $s4, 3(%[dst_b]) \n"
-
- "precrq.ph.w $s1, $t8, $t0 \n" // |B5|A5|B4|A4|
- "precrq.ph.w $s2, $s0, $t9 \n" // |B6|A6|B7|A7|
- "precr.qb.ph $s3, $s2, $s1 \n" // |A7|A6|A5|A4|
- "precrq.qb.ph $s4, $s2, $s1 \n" // |B7|B6|B5|B4|
-
- "sll $t0, $t0, 16 \n"
- "packrl.ph $s1, $t8, $t0 \n" // |b5|a5|b4|a4|
- "sll $t9, $t9, 16 \n"
- "packrl.ph $s2, $s0, $t9 \n" // |b7|a7|b6|a6|
-
- "swr $s3, 4($s5) \n"
- "swl $s3, 7($s5) \n"
- "swr $s4, 4($s6) \n"
- "swl $s4, 7($s6) \n"
-
- "precr.qb.ph $s3, $s2, $s1 \n" // |a7|a6|a5|a4|
- "precrq.qb.ph $s4, $s2, $s1 \n" // |b7|b6|b5|b4|
-
- "addiu %[src], 4 \n"
- "addiu $t1, -1 \n"
- "sll $t0, %[dst_stride_a], 1 \n"
- "sll $t8, %[dst_stride_b], 1 \n"
- "swr $s3, 4(%[dst_a]) \n"
- "swl $s3, 7(%[dst_a]) \n"
- "swr $s4, 4(%[dst_b]) \n"
- "swl $s4, 7(%[dst_b]) \n"
- "addu %[dst_a], %[dst_a], $t0 \n"
- "bnez $t1, 11b \n"
- " addu %[dst_b], %[dst_b], $t8 \n"
-
- "2: \n"
- ".set pop \n"
- : [src] "+r" (src),
- [dst_a] "+r" (dst_a),
- [dst_b] "+r" (dst_b),
- [width] "+r" (width),
- [src_stride] "+r" (src_stride)
- : [dst_stride_a] "r" (dst_stride_a),
- [dst_stride_b] "r" (dst_stride_b)
- : "t0", "t1", "t2", "t3", "t4", "t5",
- "t6", "t7", "t8", "t9",
- "s0", "s1", "s2", "s3",
- "s4", "s5", "s6"
- );
-}
-
-#endif // defined(__mips_dsp) && (__mips_dsp_rev >= 2)
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/third_party/aom/third_party/libyuv/source/rotate_neon.cc b/third_party/aom/third_party/libyuv/source/rotate_neon.cc
deleted file mode 100644
index 76043b3b3..000000000
--- a/third_party/aom/third_party/libyuv/source/rotate_neon.cc
+++ /dev/null
@@ -1,535 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/row.h"
-#include "libyuv/rotate_row.h"
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \
- !defined(__aarch64__)
-
-static uvec8 kVTbl4x4Transpose =
- { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 };
-
-void TransposeWx8_NEON(const uint8* src, int src_stride,
- uint8* dst, int dst_stride,
- int width) {
- const uint8* src_temp = NULL;
- asm volatile (
- // loops are on blocks of 8. loop will stop when
- // counter gets to or below 0. starting the counter
- // at w-8 allow for this
- "sub %5, #8 \n"
-
- // handle 8x8 blocks. this should be the majority of the plane
- ".p2align 2 \n"
- "1: \n"
- "mov %0, %1 \n"
-
- MEMACCESS(0)
- "vld1.8 {d0}, [%0], %2 \n"
- MEMACCESS(0)
- "vld1.8 {d1}, [%0], %2 \n"
- MEMACCESS(0)
- "vld1.8 {d2}, [%0], %2 \n"
- MEMACCESS(0)
- "vld1.8 {d3}, [%0], %2 \n"
- MEMACCESS(0)
- "vld1.8 {d4}, [%0], %2 \n"
- MEMACCESS(0)
- "vld1.8 {d5}, [%0], %2 \n"
- MEMACCESS(0)
- "vld1.8 {d6}, [%0], %2 \n"
- MEMACCESS(0)
- "vld1.8 {d7}, [%0] \n"
-
- "vtrn.8 d1, d0 \n"
- "vtrn.8 d3, d2 \n"
- "vtrn.8 d5, d4 \n"
- "vtrn.8 d7, d6 \n"
-
- "vtrn.16 d1, d3 \n"
- "vtrn.16 d0, d2 \n"
- "vtrn.16 d5, d7 \n"
- "vtrn.16 d4, d6 \n"
-
- "vtrn.32 d1, d5 \n"
- "vtrn.32 d0, d4 \n"
- "vtrn.32 d3, d7 \n"
- "vtrn.32 d2, d6 \n"
-
- "vrev16.8 q0, q0 \n"
- "vrev16.8 q1, q1 \n"
- "vrev16.8 q2, q2 \n"
- "vrev16.8 q3, q3 \n"
-
- "mov %0, %3 \n"
-
- MEMACCESS(0)
- "vst1.8 {d1}, [%0], %4 \n"
- MEMACCESS(0)
- "vst1.8 {d0}, [%0], %4 \n"
- MEMACCESS(0)
- "vst1.8 {d3}, [%0], %4 \n"
- MEMACCESS(0)
- "vst1.8 {d2}, [%0], %4 \n"
- MEMACCESS(0)
- "vst1.8 {d5}, [%0], %4 \n"
- MEMACCESS(0)
- "vst1.8 {d4}, [%0], %4 \n"
- MEMACCESS(0)
- "vst1.8 {d7}, [%0], %4 \n"
- MEMACCESS(0)
- "vst1.8 {d6}, [%0] \n"
-
- "add %1, #8 \n" // src += 8
- "add %3, %3, %4, lsl #3 \n" // dst += 8 * dst_stride
- "subs %5, #8 \n" // w -= 8
- "bge 1b \n"
-
- // add 8 back to counter. if the result is 0 there are
- // no residuals.
- "adds %5, #8 \n"
- "beq 4f \n"
-
- // some residual, so between 1 and 7 lines left to transpose
- "cmp %5, #2 \n"
- "blt 3f \n"
-
- "cmp %5, #4 \n"
- "blt 2f \n"
-
- // 4x8 block
- "mov %0, %1 \n"
- MEMACCESS(0)
- "vld1.32 {d0[0]}, [%0], %2 \n"
- MEMACCESS(0)
- "vld1.32 {d0[1]}, [%0], %2 \n"
- MEMACCESS(0)
- "vld1.32 {d1[0]}, [%0], %2 \n"
- MEMACCESS(0)
- "vld1.32 {d1[1]}, [%0], %2 \n"
- MEMACCESS(0)
- "vld1.32 {d2[0]}, [%0], %2 \n"
- MEMACCESS(0)
- "vld1.32 {d2[1]}, [%0], %2 \n"
- MEMACCESS(0)
- "vld1.32 {d3[0]}, [%0], %2 \n"
- MEMACCESS(0)
- "vld1.32 {d3[1]}, [%0] \n"
-
- "mov %0, %3 \n"
-
- MEMACCESS(6)
- "vld1.8 {q3}, [%6] \n"
-
- "vtbl.8 d4, {d0, d1}, d6 \n"
- "vtbl.8 d5, {d0, d1}, d7 \n"
- "vtbl.8 d0, {d2, d3}, d6 \n"
- "vtbl.8 d1, {d2, d3}, d7 \n"
-
- // TODO(frkoenig): Rework shuffle above to
- // write out with 4 instead of 8 writes.
- MEMACCESS(0)
- "vst1.32 {d4[0]}, [%0], %4 \n"
- MEMACCESS(0)
- "vst1.32 {d4[1]}, [%0], %4 \n"
- MEMACCESS(0)
- "vst1.32 {d5[0]}, [%0], %4 \n"
- MEMACCESS(0)
- "vst1.32 {d5[1]}, [%0] \n"
-
- "add %0, %3, #4 \n"
- MEMACCESS(0)
- "vst1.32 {d0[0]}, [%0], %4 \n"
- MEMACCESS(0)
- "vst1.32 {d0[1]}, [%0], %4 \n"
- MEMACCESS(0)
- "vst1.32 {d1[0]}, [%0], %4 \n"
- MEMACCESS(0)
- "vst1.32 {d1[1]}, [%0] \n"
-
- "add %1, #4 \n" // src += 4
- "add %3, %3, %4, lsl #2 \n" // dst += 4 * dst_stride
- "subs %5, #4 \n" // w -= 4
- "beq 4f \n"
-
- // some residual, check to see if it includes a 2x8 block,
- // or less
- "cmp %5, #2 \n"
- "blt 3f \n"
-
- // 2x8 block
- "2: \n"
- "mov %0, %1 \n"
- MEMACCESS(0)
- "vld1.16 {d0[0]}, [%0], %2 \n"
- MEMACCESS(0)
- "vld1.16 {d1[0]}, [%0], %2 \n"
- MEMACCESS(0)
- "vld1.16 {d0[1]}, [%0], %2 \n"
- MEMACCESS(0)
- "vld1.16 {d1[1]}, [%0], %2 \n"
- MEMACCESS(0)
- "vld1.16 {d0[2]}, [%0], %2 \n"
- MEMACCESS(0)
- "vld1.16 {d1[2]}, [%0], %2 \n"
- MEMACCESS(0)
- "vld1.16 {d0[3]}, [%0], %2 \n"
- MEMACCESS(0)
- "vld1.16 {d1[3]}, [%0] \n"
-
- "vtrn.8 d0, d1 \n"
-
- "mov %0, %3 \n"
-
- MEMACCESS(0)
- "vst1.64 {d0}, [%0], %4 \n"
- MEMACCESS(0)
- "vst1.64 {d1}, [%0] \n"
-
- "add %1, #2 \n" // src += 2
- "add %3, %3, %4, lsl #1 \n" // dst += 2 * dst_stride
- "subs %5, #2 \n" // w -= 2
- "beq 4f \n"
-
- // 1x8 block
- "3: \n"
- MEMACCESS(1)
- "vld1.8 {d0[0]}, [%1], %2 \n"
- MEMACCESS(1)
- "vld1.8 {d0[1]}, [%1], %2 \n"
- MEMACCESS(1)
- "vld1.8 {d0[2]}, [%1], %2 \n"
- MEMACCESS(1)
- "vld1.8 {d0[3]}, [%1], %2 \n"
- MEMACCESS(1)
- "vld1.8 {d0[4]}, [%1], %2 \n"
- MEMACCESS(1)
- "vld1.8 {d0[5]}, [%1], %2 \n"
- MEMACCESS(1)
- "vld1.8 {d0[6]}, [%1], %2 \n"
- MEMACCESS(1)
- "vld1.8 {d0[7]}, [%1] \n"
-
- MEMACCESS(3)
- "vst1.64 {d0}, [%3] \n"
-
- "4: \n"
-
- : "+r"(src_temp), // %0
- "+r"(src), // %1
- "+r"(src_stride), // %2
- "+r"(dst), // %3
- "+r"(dst_stride), // %4
- "+r"(width) // %5
- : "r"(&kVTbl4x4Transpose) // %6
- : "memory", "cc", "q0", "q1", "q2", "q3"
- );
-}
-
-static uvec8 kVTbl4x4TransposeDi =
- { 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15 };
-
-void TransposeUVWx8_NEON(const uint8* src, int src_stride,
- uint8* dst_a, int dst_stride_a,
- uint8* dst_b, int dst_stride_b,
- int width) {
- const uint8* src_temp = NULL;
- asm volatile (
- // loops are on blocks of 8. loop will stop when
- // counter gets to or below 0. starting the counter
- // at w-8 allow for this
- "sub %7, #8 \n"
-
- // handle 8x8 blocks. this should be the majority of the plane
- ".p2align 2 \n"
- "1: \n"
- "mov %0, %1 \n"
-
- MEMACCESS(0)
- "vld2.8 {d0, d1}, [%0], %2 \n"
- MEMACCESS(0)
- "vld2.8 {d2, d3}, [%0], %2 \n"
- MEMACCESS(0)
- "vld2.8 {d4, d5}, [%0], %2 \n"
- MEMACCESS(0)
- "vld2.8 {d6, d7}, [%0], %2 \n"
- MEMACCESS(0)
- "vld2.8 {d16, d17}, [%0], %2 \n"
- MEMACCESS(0)
- "vld2.8 {d18, d19}, [%0], %2 \n"
- MEMACCESS(0)
- "vld2.8 {d20, d21}, [%0], %2 \n"
- MEMACCESS(0)
- "vld2.8 {d22, d23}, [%0] \n"
-
- "vtrn.8 q1, q0 \n"
- "vtrn.8 q3, q2 \n"
- "vtrn.8 q9, q8 \n"
- "vtrn.8 q11, q10 \n"
-
- "vtrn.16 q1, q3 \n"
- "vtrn.16 q0, q2 \n"
- "vtrn.16 q9, q11 \n"
- "vtrn.16 q8, q10 \n"
-
- "vtrn.32 q1, q9 \n"
- "vtrn.32 q0, q8 \n"
- "vtrn.32 q3, q11 \n"
- "vtrn.32 q2, q10 \n"
-
- "vrev16.8 q0, q0 \n"
- "vrev16.8 q1, q1 \n"
- "vrev16.8 q2, q2 \n"
- "vrev16.8 q3, q3 \n"
- "vrev16.8 q8, q8 \n"
- "vrev16.8 q9, q9 \n"
- "vrev16.8 q10, q10 \n"
- "vrev16.8 q11, q11 \n"
-
- "mov %0, %3 \n"
-
- MEMACCESS(0)
- "vst1.8 {d2}, [%0], %4 \n"
- MEMACCESS(0)
- "vst1.8 {d0}, [%0], %4 \n"
- MEMACCESS(0)
- "vst1.8 {d6}, [%0], %4 \n"
- MEMACCESS(0)
- "vst1.8 {d4}, [%0], %4 \n"
- MEMACCESS(0)
- "vst1.8 {d18}, [%0], %4 \n"
- MEMACCESS(0)
- "vst1.8 {d16}, [%0], %4 \n"
- MEMACCESS(0)
- "vst1.8 {d22}, [%0], %4 \n"
- MEMACCESS(0)
- "vst1.8 {d20}, [%0] \n"
-
- "mov %0, %5 \n"
-
- MEMACCESS(0)
- "vst1.8 {d3}, [%0], %6 \n"
- MEMACCESS(0)
- "vst1.8 {d1}, [%0], %6 \n"
- MEMACCESS(0)
- "vst1.8 {d7}, [%0], %6 \n"
- MEMACCESS(0)
- "vst1.8 {d5}, [%0], %6 \n"
- MEMACCESS(0)
- "vst1.8 {d19}, [%0], %6 \n"
- MEMACCESS(0)
- "vst1.8 {d17}, [%0], %6 \n"
- MEMACCESS(0)
- "vst1.8 {d23}, [%0], %6 \n"
- MEMACCESS(0)
- "vst1.8 {d21}, [%0] \n"
-
- "add %1, #8*2 \n" // src += 8*2
- "add %3, %3, %4, lsl #3 \n" // dst_a += 8 * dst_stride_a
- "add %5, %5, %6, lsl #3 \n" // dst_b += 8 * dst_stride_b
- "subs %7, #8 \n" // w -= 8
- "bge 1b \n"
-
- // add 8 back to counter. if the result is 0 there are
- // no residuals.
- "adds %7, #8 \n"
- "beq 4f \n"
-
- // some residual, so between 1 and 7 lines left to transpose
- "cmp %7, #2 \n"
- "blt 3f \n"
-
- "cmp %7, #4 \n"
- "blt 2f \n"
-
- // TODO(frkoenig): Clean this up
- // 4x8 block
- "mov %0, %1 \n"
- MEMACCESS(0)
- "vld1.64 {d0}, [%0], %2 \n"
- MEMACCESS(0)
- "vld1.64 {d1}, [%0], %2 \n"
- MEMACCESS(0)
- "vld1.64 {d2}, [%0], %2 \n"
- MEMACCESS(0)
- "vld1.64 {d3}, [%0], %2 \n"
- MEMACCESS(0)
- "vld1.64 {d4}, [%0], %2 \n"
- MEMACCESS(0)
- "vld1.64 {d5}, [%0], %2 \n"
- MEMACCESS(0)
- "vld1.64 {d6}, [%0], %2 \n"
- MEMACCESS(0)
- "vld1.64 {d7}, [%0] \n"
-
- MEMACCESS(8)
- "vld1.8 {q15}, [%8] \n"
-
- "vtrn.8 q0, q1 \n"
- "vtrn.8 q2, q3 \n"
-
- "vtbl.8 d16, {d0, d1}, d30 \n"
- "vtbl.8 d17, {d0, d1}, d31 \n"
- "vtbl.8 d18, {d2, d3}, d30 \n"
- "vtbl.8 d19, {d2, d3}, d31 \n"
- "vtbl.8 d20, {d4, d5}, d30 \n"
- "vtbl.8 d21, {d4, d5}, d31 \n"
- "vtbl.8 d22, {d6, d7}, d30 \n"
- "vtbl.8 d23, {d6, d7}, d31 \n"
-
- "mov %0, %3 \n"
-
- MEMACCESS(0)
- "vst1.32 {d16[0]}, [%0], %4 \n"
- MEMACCESS(0)
- "vst1.32 {d16[1]}, [%0], %4 \n"
- MEMACCESS(0)
- "vst1.32 {d17[0]}, [%0], %4 \n"
- MEMACCESS(0)
- "vst1.32 {d17[1]}, [%0], %4 \n"
-
- "add %0, %3, #4 \n"
- MEMACCESS(0)
- "vst1.32 {d20[0]}, [%0], %4 \n"
- MEMACCESS(0)
- "vst1.32 {d20[1]}, [%0], %4 \n"
- MEMACCESS(0)
- "vst1.32 {d21[0]}, [%0], %4 \n"
- MEMACCESS(0)
- "vst1.32 {d21[1]}, [%0] \n"
-
- "mov %0, %5 \n"
-
- MEMACCESS(0)
- "vst1.32 {d18[0]}, [%0], %6 \n"
- MEMACCESS(0)
- "vst1.32 {d18[1]}, [%0], %6 \n"
- MEMACCESS(0)
- "vst1.32 {d19[0]}, [%0], %6 \n"
- MEMACCESS(0)
- "vst1.32 {d19[1]}, [%0], %6 \n"
-
- "add %0, %5, #4 \n"
- MEMACCESS(0)
- "vst1.32 {d22[0]}, [%0], %6 \n"
- MEMACCESS(0)
- "vst1.32 {d22[1]}, [%0], %6 \n"
- MEMACCESS(0)
- "vst1.32 {d23[0]}, [%0], %6 \n"
- MEMACCESS(0)
- "vst1.32 {d23[1]}, [%0] \n"
-
- "add %1, #4*2 \n" // src += 4 * 2
- "add %3, %3, %4, lsl #2 \n" // dst_a += 4 * dst_stride_a
- "add %5, %5, %6, lsl #2 \n" // dst_b += 4 * dst_stride_b
- "subs %7, #4 \n" // w -= 4
- "beq 4f \n"
-
- // some residual, check to see if it includes a 2x8 block,
- // or less
- "cmp %7, #2 \n"
- "blt 3f \n"
-
- // 2x8 block
- "2: \n"
- "mov %0, %1 \n"
- MEMACCESS(0)
- "vld2.16 {d0[0], d2[0]}, [%0], %2 \n"
- MEMACCESS(0)
- "vld2.16 {d1[0], d3[0]}, [%0], %2 \n"
- MEMACCESS(0)
- "vld2.16 {d0[1], d2[1]}, [%0], %2 \n"
- MEMACCESS(0)
- "vld2.16 {d1[1], d3[1]}, [%0], %2 \n"
- MEMACCESS(0)
- "vld2.16 {d0[2], d2[2]}, [%0], %2 \n"
- MEMACCESS(0)
- "vld2.16 {d1[2], d3[2]}, [%0], %2 \n"
- MEMACCESS(0)
- "vld2.16 {d0[3], d2[3]}, [%0], %2 \n"
- MEMACCESS(0)
- "vld2.16 {d1[3], d3[3]}, [%0] \n"
-
- "vtrn.8 d0, d1 \n"
- "vtrn.8 d2, d3 \n"
-
- "mov %0, %3 \n"
-
- MEMACCESS(0)
- "vst1.64 {d0}, [%0], %4 \n"
- MEMACCESS(0)
- "vst1.64 {d2}, [%0] \n"
-
- "mov %0, %5 \n"
-
- MEMACCESS(0)
- "vst1.64 {d1}, [%0], %6 \n"
- MEMACCESS(0)
- "vst1.64 {d3}, [%0] \n"
-
- "add %1, #2*2 \n" // src += 2 * 2
- "add %3, %3, %4, lsl #1 \n" // dst_a += 2 * dst_stride_a
- "add %5, %5, %6, lsl #1 \n" // dst_b += 2 * dst_stride_b
- "subs %7, #2 \n" // w -= 2
- "beq 4f \n"
-
- // 1x8 block
- "3: \n"
- MEMACCESS(1)
- "vld2.8 {d0[0], d1[0]}, [%1], %2 \n"
- MEMACCESS(1)
- "vld2.8 {d0[1], d1[1]}, [%1], %2 \n"
- MEMACCESS(1)
- "vld2.8 {d0[2], d1[2]}, [%1], %2 \n"
- MEMACCESS(1)
- "vld2.8 {d0[3], d1[3]}, [%1], %2 \n"
- MEMACCESS(1)
- "vld2.8 {d0[4], d1[4]}, [%1], %2 \n"
- MEMACCESS(1)
- "vld2.8 {d0[5], d1[5]}, [%1], %2 \n"
- MEMACCESS(1)
- "vld2.8 {d0[6], d1[6]}, [%1], %2 \n"
- MEMACCESS(1)
- "vld2.8 {d0[7], d1[7]}, [%1] \n"
-
- MEMACCESS(3)
- "vst1.64 {d0}, [%3] \n"
- MEMACCESS(5)
- "vst1.64 {d1}, [%5] \n"
-
- "4: \n"
-
- : "+r"(src_temp), // %0
- "+r"(src), // %1
- "+r"(src_stride), // %2
- "+r"(dst_a), // %3
- "+r"(dst_stride_a), // %4
- "+r"(dst_b), // %5
- "+r"(dst_stride_b), // %6
- "+r"(width) // %7
- : "r"(&kVTbl4x4TransposeDi) // %8
- : "memory", "cc",
- "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"
- );
-}
-#endif // defined(__ARM_NEON__) && !defined(__aarch64__)
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/third_party/aom/third_party/libyuv/source/rotate_neon64.cc b/third_party/aom/third_party/libyuv/source/rotate_neon64.cc
deleted file mode 100644
index f52c082b3..000000000
--- a/third_party/aom/third_party/libyuv/source/rotate_neon64.cc
+++ /dev/null
@@ -1,543 +0,0 @@
-/*
- * Copyright 2014 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/row.h"
-#include "libyuv/rotate_row.h"
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// This module is for GCC Neon armv8 64 bit.
-#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
-
-static uvec8 kVTbl4x4Transpose =
- { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 };
-
-void TransposeWx8_NEON(const uint8* src, int src_stride,
- uint8* dst, int dst_stride, int width) {
- const uint8* src_temp = NULL;
- int64 width64 = (int64) width; // Work around clang 3.4 warning.
- asm volatile (
- // loops are on blocks of 8. loop will stop when
- // counter gets to or below 0. starting the counter
- // at w-8 allow for this
- "sub %3, %3, #8 \n"
-
- // handle 8x8 blocks. this should be the majority of the plane
- "1: \n"
- "mov %0, %1 \n"
-
- MEMACCESS(0)
- "ld1 {v0.8b}, [%0], %5 \n"
- MEMACCESS(0)
- "ld1 {v1.8b}, [%0], %5 \n"
- MEMACCESS(0)
- "ld1 {v2.8b}, [%0], %5 \n"
- MEMACCESS(0)
- "ld1 {v3.8b}, [%0], %5 \n"
- MEMACCESS(0)
- "ld1 {v4.8b}, [%0], %5 \n"
- MEMACCESS(0)
- "ld1 {v5.8b}, [%0], %5 \n"
- MEMACCESS(0)
- "ld1 {v6.8b}, [%0], %5 \n"
- MEMACCESS(0)
- "ld1 {v7.8b}, [%0] \n"
-
- "trn2 v16.8b, v0.8b, v1.8b \n"
- "trn1 v17.8b, v0.8b, v1.8b \n"
- "trn2 v18.8b, v2.8b, v3.8b \n"
- "trn1 v19.8b, v2.8b, v3.8b \n"
- "trn2 v20.8b, v4.8b, v5.8b \n"
- "trn1 v21.8b, v4.8b, v5.8b \n"
- "trn2 v22.8b, v6.8b, v7.8b \n"
- "trn1 v23.8b, v6.8b, v7.8b \n"
-
- "trn2 v3.4h, v17.4h, v19.4h \n"
- "trn1 v1.4h, v17.4h, v19.4h \n"
- "trn2 v2.4h, v16.4h, v18.4h \n"
- "trn1 v0.4h, v16.4h, v18.4h \n"
- "trn2 v7.4h, v21.4h, v23.4h \n"
- "trn1 v5.4h, v21.4h, v23.4h \n"
- "trn2 v6.4h, v20.4h, v22.4h \n"
- "trn1 v4.4h, v20.4h, v22.4h \n"
-
- "trn2 v21.2s, v1.2s, v5.2s \n"
- "trn1 v17.2s, v1.2s, v5.2s \n"
- "trn2 v20.2s, v0.2s, v4.2s \n"
- "trn1 v16.2s, v0.2s, v4.2s \n"
- "trn2 v23.2s, v3.2s, v7.2s \n"
- "trn1 v19.2s, v3.2s, v7.2s \n"
- "trn2 v22.2s, v2.2s, v6.2s \n"
- "trn1 v18.2s, v2.2s, v6.2s \n"
-
- "mov %0, %2 \n"
-
- MEMACCESS(0)
- "st1 {v17.8b}, [%0], %6 \n"
- MEMACCESS(0)
- "st1 {v16.8b}, [%0], %6 \n"
- MEMACCESS(0)
- "st1 {v19.8b}, [%0], %6 \n"
- MEMACCESS(0)
- "st1 {v18.8b}, [%0], %6 \n"
- MEMACCESS(0)
- "st1 {v21.8b}, [%0], %6 \n"
- MEMACCESS(0)
- "st1 {v20.8b}, [%0], %6 \n"
- MEMACCESS(0)
- "st1 {v23.8b}, [%0], %6 \n"
- MEMACCESS(0)
- "st1 {v22.8b}, [%0] \n"
-
- "add %1, %1, #8 \n" // src += 8
- "add %2, %2, %6, lsl #3 \n" // dst += 8 * dst_stride
- "subs %3, %3, #8 \n" // w -= 8
- "b.ge 1b \n"
-
- // add 8 back to counter. if the result is 0 there are
- // no residuals.
- "adds %3, %3, #8 \n"
- "b.eq 4f \n"
-
- // some residual, so between 1 and 7 lines left to transpose
- "cmp %3, #2 \n"
- "b.lt 3f \n"
-
- "cmp %3, #4 \n"
- "b.lt 2f \n"
-
- // 4x8 block
- "mov %0, %1 \n"
- MEMACCESS(0)
- "ld1 {v0.s}[0], [%0], %5 \n"
- MEMACCESS(0)
- "ld1 {v0.s}[1], [%0], %5 \n"
- MEMACCESS(0)
- "ld1 {v0.s}[2], [%0], %5 \n"
- MEMACCESS(0)
- "ld1 {v0.s}[3], [%0], %5 \n"
- MEMACCESS(0)
- "ld1 {v1.s}[0], [%0], %5 \n"
- MEMACCESS(0)
- "ld1 {v1.s}[1], [%0], %5 \n"
- MEMACCESS(0)
- "ld1 {v1.s}[2], [%0], %5 \n"
- MEMACCESS(0)
- "ld1 {v1.s}[3], [%0] \n"
-
- "mov %0, %2 \n"
-
- MEMACCESS(4)
- "ld1 {v2.16b}, [%4] \n"
-
- "tbl v3.16b, {v0.16b}, v2.16b \n"
- "tbl v0.16b, {v1.16b}, v2.16b \n"
-
- // TODO(frkoenig): Rework shuffle above to
- // write out with 4 instead of 8 writes.
- MEMACCESS(0)
- "st1 {v3.s}[0], [%0], %6 \n"
- MEMACCESS(0)
- "st1 {v3.s}[1], [%0], %6 \n"
- MEMACCESS(0)
- "st1 {v3.s}[2], [%0], %6 \n"
- MEMACCESS(0)
- "st1 {v3.s}[3], [%0] \n"
-
- "add %0, %2, #4 \n"
- MEMACCESS(0)
- "st1 {v0.s}[0], [%0], %6 \n"
- MEMACCESS(0)
- "st1 {v0.s}[1], [%0], %6 \n"
- MEMACCESS(0)
- "st1 {v0.s}[2], [%0], %6 \n"
- MEMACCESS(0)
- "st1 {v0.s}[3], [%0] \n"
-
- "add %1, %1, #4 \n" // src += 4
- "add %2, %2, %6, lsl #2 \n" // dst += 4 * dst_stride
- "subs %3, %3, #4 \n" // w -= 4
- "b.eq 4f \n"
-
- // some residual, check to see if it includes a 2x8 block,
- // or less
- "cmp %3, #2 \n"
- "b.lt 3f \n"
-
- // 2x8 block
- "2: \n"
- "mov %0, %1 \n"
- MEMACCESS(0)
- "ld1 {v0.h}[0], [%0], %5 \n"
- MEMACCESS(0)
- "ld1 {v1.h}[0], [%0], %5 \n"
- MEMACCESS(0)
- "ld1 {v0.h}[1], [%0], %5 \n"
- MEMACCESS(0)
- "ld1 {v1.h}[1], [%0], %5 \n"
- MEMACCESS(0)
- "ld1 {v0.h}[2], [%0], %5 \n"
- MEMACCESS(0)
- "ld1 {v1.h}[2], [%0], %5 \n"
- MEMACCESS(0)
- "ld1 {v0.h}[3], [%0], %5 \n"
- MEMACCESS(0)
- "ld1 {v1.h}[3], [%0] \n"
-
- "trn2 v2.8b, v0.8b, v1.8b \n"
- "trn1 v3.8b, v0.8b, v1.8b \n"
-
- "mov %0, %2 \n"
-
- MEMACCESS(0)
- "st1 {v3.8b}, [%0], %6 \n"
- MEMACCESS(0)
- "st1 {v2.8b}, [%0] \n"
-
- "add %1, %1, #2 \n" // src += 2
- "add %2, %2, %6, lsl #1 \n" // dst += 2 * dst_stride
- "subs %3, %3, #2 \n" // w -= 2
- "b.eq 4f \n"
-
- // 1x8 block
- "3: \n"
- MEMACCESS(1)
- "ld1 {v0.b}[0], [%1], %5 \n"
- MEMACCESS(1)
- "ld1 {v0.b}[1], [%1], %5 \n"
- MEMACCESS(1)
- "ld1 {v0.b}[2], [%1], %5 \n"
- MEMACCESS(1)
- "ld1 {v0.b}[3], [%1], %5 \n"
- MEMACCESS(1)
- "ld1 {v0.b}[4], [%1], %5 \n"
- MEMACCESS(1)
- "ld1 {v0.b}[5], [%1], %5 \n"
- MEMACCESS(1)
- "ld1 {v0.b}[6], [%1], %5 \n"
- MEMACCESS(1)
- "ld1 {v0.b}[7], [%1] \n"
-
- MEMACCESS(2)
- "st1 {v0.8b}, [%2] \n"
-
- "4: \n"
-
- : "+r"(src_temp), // %0
- "+r"(src), // %1
- "+r"(dst), // %2
- "+r"(width64) // %3
- : "r"(&kVTbl4x4Transpose), // %4
- "r"(static_cast<ptrdiff_t>(src_stride)), // %5
- "r"(static_cast<ptrdiff_t>(dst_stride)) // %6
- : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16",
- "v17", "v18", "v19", "v20", "v21", "v22", "v23"
- );
-}
-
-static uint8 kVTbl4x4TransposeDi[32] =
- { 0, 16, 32, 48, 2, 18, 34, 50, 4, 20, 36, 52, 6, 22, 38, 54,
- 1, 17, 33, 49, 3, 19, 35, 51, 5, 21, 37, 53, 7, 23, 39, 55};
-
-void TransposeUVWx8_NEON(const uint8* src, int src_stride,
- uint8* dst_a, int dst_stride_a,
- uint8* dst_b, int dst_stride_b,
- int width) {
- const uint8* src_temp = NULL;
- int64 width64 = (int64) width; // Work around clang 3.4 warning.
- asm volatile (
- // loops are on blocks of 8. loop will stop when
- // counter gets to or below 0. starting the counter
- // at w-8 allow for this
- "sub %4, %4, #8 \n"
-
- // handle 8x8 blocks. this should be the majority of the plane
- "1: \n"
- "mov %0, %1 \n"
-
- MEMACCESS(0)
- "ld1 {v0.16b}, [%0], %5 \n"
- MEMACCESS(0)
- "ld1 {v1.16b}, [%0], %5 \n"
- MEMACCESS(0)
- "ld1 {v2.16b}, [%0], %5 \n"
- MEMACCESS(0)
- "ld1 {v3.16b}, [%0], %5 \n"
- MEMACCESS(0)
- "ld1 {v4.16b}, [%0], %5 \n"
- MEMACCESS(0)
- "ld1 {v5.16b}, [%0], %5 \n"
- MEMACCESS(0)
- "ld1 {v6.16b}, [%0], %5 \n"
- MEMACCESS(0)
- "ld1 {v7.16b}, [%0] \n"
-
- "trn1 v16.16b, v0.16b, v1.16b \n"
- "trn2 v17.16b, v0.16b, v1.16b \n"
- "trn1 v18.16b, v2.16b, v3.16b \n"
- "trn2 v19.16b, v2.16b, v3.16b \n"
- "trn1 v20.16b, v4.16b, v5.16b \n"
- "trn2 v21.16b, v4.16b, v5.16b \n"
- "trn1 v22.16b, v6.16b, v7.16b \n"
- "trn2 v23.16b, v6.16b, v7.16b \n"
-
- "trn1 v0.8h, v16.8h, v18.8h \n"
- "trn2 v1.8h, v16.8h, v18.8h \n"
- "trn1 v2.8h, v20.8h, v22.8h \n"
- "trn2 v3.8h, v20.8h, v22.8h \n"
- "trn1 v4.8h, v17.8h, v19.8h \n"
- "trn2 v5.8h, v17.8h, v19.8h \n"
- "trn1 v6.8h, v21.8h, v23.8h \n"
- "trn2 v7.8h, v21.8h, v23.8h \n"
-
- "trn1 v16.4s, v0.4s, v2.4s \n"
- "trn2 v17.4s, v0.4s, v2.4s \n"
- "trn1 v18.4s, v1.4s, v3.4s \n"
- "trn2 v19.4s, v1.4s, v3.4s \n"
- "trn1 v20.4s, v4.4s, v6.4s \n"
- "trn2 v21.4s, v4.4s, v6.4s \n"
- "trn1 v22.4s, v5.4s, v7.4s \n"
- "trn2 v23.4s, v5.4s, v7.4s \n"
-
- "mov %0, %2 \n"
-
- MEMACCESS(0)
- "st1 {v16.d}[0], [%0], %6 \n"
- MEMACCESS(0)
- "st1 {v18.d}[0], [%0], %6 \n"
- MEMACCESS(0)
- "st1 {v17.d}[0], [%0], %6 \n"
- MEMACCESS(0)
- "st1 {v19.d}[0], [%0], %6 \n"
- MEMACCESS(0)
- "st1 {v16.d}[1], [%0], %6 \n"
- MEMACCESS(0)
- "st1 {v18.d}[1], [%0], %6 \n"
- MEMACCESS(0)
- "st1 {v17.d}[1], [%0], %6 \n"
- MEMACCESS(0)
- "st1 {v19.d}[1], [%0] \n"
-
- "mov %0, %3 \n"
-
- MEMACCESS(0)
- "st1 {v20.d}[0], [%0], %7 \n"
- MEMACCESS(0)
- "st1 {v22.d}[0], [%0], %7 \n"
- MEMACCESS(0)
- "st1 {v21.d}[0], [%0], %7 \n"
- MEMACCESS(0)
- "st1 {v23.d}[0], [%0], %7 \n"
- MEMACCESS(0)
- "st1 {v20.d}[1], [%0], %7 \n"
- MEMACCESS(0)
- "st1 {v22.d}[1], [%0], %7 \n"
- MEMACCESS(0)
- "st1 {v21.d}[1], [%0], %7 \n"
- MEMACCESS(0)
- "st1 {v23.d}[1], [%0] \n"
-
- "add %1, %1, #16 \n" // src += 8*2
- "add %2, %2, %6, lsl #3 \n" // dst_a += 8 * dst_stride_a
- "add %3, %3, %7, lsl #3 \n" // dst_b += 8 * dst_stride_b
- "subs %4, %4, #8 \n" // w -= 8
- "b.ge 1b \n"
-
- // add 8 back to counter. if the result is 0 there are
- // no residuals.
- "adds %4, %4, #8 \n"
- "b.eq 4f \n"
-
- // some residual, so between 1 and 7 lines left to transpose
- "cmp %4, #2 \n"
- "b.lt 3f \n"
-
- "cmp %4, #4 \n"
- "b.lt 2f \n"
-
- // TODO(frkoenig): Clean this up
- // 4x8 block
- "mov %0, %1 \n"
- MEMACCESS(0)
- "ld1 {v0.8b}, [%0], %5 \n"
- MEMACCESS(0)
- "ld1 {v1.8b}, [%0], %5 \n"
- MEMACCESS(0)
- "ld1 {v2.8b}, [%0], %5 \n"
- MEMACCESS(0)
- "ld1 {v3.8b}, [%0], %5 \n"
- MEMACCESS(0)
- "ld1 {v4.8b}, [%0], %5 \n"
- MEMACCESS(0)
- "ld1 {v5.8b}, [%0], %5 \n"
- MEMACCESS(0)
- "ld1 {v6.8b}, [%0], %5 \n"
- MEMACCESS(0)
- "ld1 {v7.8b}, [%0] \n"
-
- MEMACCESS(8)
- "ld1 {v30.16b}, [%8], #16 \n"
- "ld1 {v31.16b}, [%8] \n"
-
- "tbl v16.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v30.16b \n"
- "tbl v17.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v31.16b \n"
- "tbl v18.16b, {v4.16b, v5.16b, v6.16b, v7.16b}, v30.16b \n"
- "tbl v19.16b, {v4.16b, v5.16b, v6.16b, v7.16b}, v31.16b \n"
-
- "mov %0, %2 \n"
-
- MEMACCESS(0)
- "st1 {v16.s}[0], [%0], %6 \n"
- MEMACCESS(0)
- "st1 {v16.s}[1], [%0], %6 \n"
- MEMACCESS(0)
- "st1 {v16.s}[2], [%0], %6 \n"
- MEMACCESS(0)
- "st1 {v16.s}[3], [%0], %6 \n"
-
- "add %0, %2, #4 \n"
- MEMACCESS(0)
- "st1 {v18.s}[0], [%0], %6 \n"
- MEMACCESS(0)
- "st1 {v18.s}[1], [%0], %6 \n"
- MEMACCESS(0)
- "st1 {v18.s}[2], [%0], %6 \n"
- MEMACCESS(0)
- "st1 {v18.s}[3], [%0] \n"
-
- "mov %0, %3 \n"
-
- MEMACCESS(0)
- "st1 {v17.s}[0], [%0], %7 \n"
- MEMACCESS(0)
- "st1 {v17.s}[1], [%0], %7 \n"
- MEMACCESS(0)
- "st1 {v17.s}[2], [%0], %7 \n"
- MEMACCESS(0)
- "st1 {v17.s}[3], [%0], %7 \n"
-
- "add %0, %3, #4 \n"
- MEMACCESS(0)
- "st1 {v19.s}[0], [%0], %7 \n"
- MEMACCESS(0)
- "st1 {v19.s}[1], [%0], %7 \n"
- MEMACCESS(0)
- "st1 {v19.s}[2], [%0], %7 \n"
- MEMACCESS(0)
- "st1 {v19.s}[3], [%0] \n"
-
- "add %1, %1, #8 \n" // src += 4 * 2
- "add %2, %2, %6, lsl #2 \n" // dst_a += 4 * dst_stride_a
- "add %3, %3, %7, lsl #2 \n" // dst_b += 4 * dst_stride_b
- "subs %4, %4, #4 \n" // w -= 4
- "b.eq 4f \n"
-
- // some residual, check to see if it includes a 2x8 block,
- // or less
- "cmp %4, #2 \n"
- "b.lt 3f \n"
-
- // 2x8 block
- "2: \n"
- "mov %0, %1 \n"
- MEMACCESS(0)
- "ld2 {v0.h, v1.h}[0], [%0], %5 \n"
- MEMACCESS(0)
- "ld2 {v2.h, v3.h}[0], [%0], %5 \n"
- MEMACCESS(0)
- "ld2 {v0.h, v1.h}[1], [%0], %5 \n"
- MEMACCESS(0)
- "ld2 {v2.h, v3.h}[1], [%0], %5 \n"
- MEMACCESS(0)
- "ld2 {v0.h, v1.h}[2], [%0], %5 \n"
- MEMACCESS(0)
- "ld2 {v2.h, v3.h}[2], [%0], %5 \n"
- MEMACCESS(0)
- "ld2 {v0.h, v1.h}[3], [%0], %5 \n"
- MEMACCESS(0)
- "ld2 {v2.h, v3.h}[3], [%0] \n"
-
- "trn1 v4.8b, v0.8b, v2.8b \n"
- "trn2 v5.8b, v0.8b, v2.8b \n"
- "trn1 v6.8b, v1.8b, v3.8b \n"
- "trn2 v7.8b, v1.8b, v3.8b \n"
-
- "mov %0, %2 \n"
-
- MEMACCESS(0)
- "st1 {v4.d}[0], [%0], %6 \n"
- MEMACCESS(0)
- "st1 {v6.d}[0], [%0] \n"
-
- "mov %0, %3 \n"
-
- MEMACCESS(0)
- "st1 {v5.d}[0], [%0], %7 \n"
- MEMACCESS(0)
- "st1 {v7.d}[0], [%0] \n"
-
- "add %1, %1, #4 \n" // src += 2 * 2
- "add %2, %2, %6, lsl #1 \n" // dst_a += 2 * dst_stride_a
- "add %3, %3, %7, lsl #1 \n" // dst_b += 2 * dst_stride_b
- "subs %4, %4, #2 \n" // w -= 2
- "b.eq 4f \n"
-
- // 1x8 block
- "3: \n"
- MEMACCESS(1)
- "ld2 {v0.b, v1.b}[0], [%1], %5 \n"
- MEMACCESS(1)
- "ld2 {v0.b, v1.b}[1], [%1], %5 \n"
- MEMACCESS(1)
- "ld2 {v0.b, v1.b}[2], [%1], %5 \n"
- MEMACCESS(1)
- "ld2 {v0.b, v1.b}[3], [%1], %5 \n"
- MEMACCESS(1)
- "ld2 {v0.b, v1.b}[4], [%1], %5 \n"
- MEMACCESS(1)
- "ld2 {v0.b, v1.b}[5], [%1], %5 \n"
- MEMACCESS(1)
- "ld2 {v0.b, v1.b}[6], [%1], %5 \n"
- MEMACCESS(1)
- "ld2 {v0.b, v1.b}[7], [%1] \n"
-
- MEMACCESS(2)
- "st1 {v0.d}[0], [%2] \n"
- MEMACCESS(3)
- "st1 {v1.d}[0], [%3] \n"
-
- "4: \n"
-
- : "+r"(src_temp), // %0
- "+r"(src), // %1
- "+r"(dst_a), // %2
- "+r"(dst_b), // %3
- "+r"(width64) // %4
- : "r"(static_cast<ptrdiff_t>(src_stride)), // %5
- "r"(static_cast<ptrdiff_t>(dst_stride_a)), // %6
- "r"(static_cast<ptrdiff_t>(dst_stride_b)), // %7
- "r"(&kVTbl4x4TransposeDi) // %8
- : "memory", "cc",
- "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
- "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
- "v30", "v31"
- );
-}
-#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/third_party/aom/third_party/libyuv/source/rotate_win.cc b/third_party/aom/third_party/libyuv/source/rotate_win.cc
deleted file mode 100644
index 2760066df..000000000
--- a/third_party/aom/third_party/libyuv/source/rotate_win.cc
+++ /dev/null
@@ -1,248 +0,0 @@
-/*
- * Copyright 2013 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/row.h"
-#include "libyuv/rotate_row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// This module is for Visual C x86.
-#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \
- defined(_MSC_VER) && !defined(__clang__)
-
-__declspec(naked)
-void TransposeWx8_SSSE3(const uint8* src, int src_stride,
- uint8* dst, int dst_stride, int width) {
- __asm {
- push edi
- push esi
- push ebp
- mov eax, [esp + 12 + 4] // src
- mov edi, [esp + 12 + 8] // src_stride
- mov edx, [esp + 12 + 12] // dst
- mov esi, [esp + 12 + 16] // dst_stride
- mov ecx, [esp + 12 + 20] // width
-
- // Read in the data from the source pointer.
- // First round of bit swap.
- align 4
- convertloop:
- movq xmm0, qword ptr [eax]
- lea ebp, [eax + 8]
- movq xmm1, qword ptr [eax + edi]
- lea eax, [eax + 2 * edi]
- punpcklbw xmm0, xmm1
- movq xmm2, qword ptr [eax]
- movdqa xmm1, xmm0
- palignr xmm1, xmm1, 8
- movq xmm3, qword ptr [eax + edi]
- lea eax, [eax + 2 * edi]
- punpcklbw xmm2, xmm3
- movdqa xmm3, xmm2
- movq xmm4, qword ptr [eax]
- palignr xmm3, xmm3, 8
- movq xmm5, qword ptr [eax + edi]
- punpcklbw xmm4, xmm5
- lea eax, [eax + 2 * edi]
- movdqa xmm5, xmm4
- movq xmm6, qword ptr [eax]
- palignr xmm5, xmm5, 8
- movq xmm7, qword ptr [eax + edi]
- punpcklbw xmm6, xmm7
- mov eax, ebp
- movdqa xmm7, xmm6
- palignr xmm7, xmm7, 8
- // Second round of bit swap.
- punpcklwd xmm0, xmm2
- punpcklwd xmm1, xmm3
- movdqa xmm2, xmm0
- movdqa xmm3, xmm1
- palignr xmm2, xmm2, 8
- palignr xmm3, xmm3, 8
- punpcklwd xmm4, xmm6
- punpcklwd xmm5, xmm7
- movdqa xmm6, xmm4
- movdqa xmm7, xmm5
- palignr xmm6, xmm6, 8
- palignr xmm7, xmm7, 8
- // Third round of bit swap.
- // Write to the destination pointer.
- punpckldq xmm0, xmm4
- movq qword ptr [edx], xmm0
- movdqa xmm4, xmm0
- palignr xmm4, xmm4, 8
- movq qword ptr [edx + esi], xmm4
- lea edx, [edx + 2 * esi]
- punpckldq xmm2, xmm6
- movdqa xmm6, xmm2
- palignr xmm6, xmm6, 8
- movq qword ptr [edx], xmm2
- punpckldq xmm1, xmm5
- movq qword ptr [edx + esi], xmm6
- lea edx, [edx + 2 * esi]
- movdqa xmm5, xmm1
- movq qword ptr [edx], xmm1
- palignr xmm5, xmm5, 8
- punpckldq xmm3, xmm7
- movq qword ptr [edx + esi], xmm5
- lea edx, [edx + 2 * esi]
- movq qword ptr [edx], xmm3
- movdqa xmm7, xmm3
- palignr xmm7, xmm7, 8
- sub ecx, 8
- movq qword ptr [edx + esi], xmm7
- lea edx, [edx + 2 * esi]
- jg convertloop
-
- pop ebp
- pop esi
- pop edi
- ret
- }
-}
-
-__declspec(naked)
-void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
- uint8* dst_a, int dst_stride_a,
- uint8* dst_b, int dst_stride_b,
- int w) {
- __asm {
- push ebx
- push esi
- push edi
- push ebp
- mov eax, [esp + 16 + 4] // src
- mov edi, [esp + 16 + 8] // src_stride
- mov edx, [esp + 16 + 12] // dst_a
- mov esi, [esp + 16 + 16] // dst_stride_a
- mov ebx, [esp + 16 + 20] // dst_b
- mov ebp, [esp + 16 + 24] // dst_stride_b
- mov ecx, esp
- sub esp, 4 + 16
- and esp, ~15
- mov [esp + 16], ecx
- mov ecx, [ecx + 16 + 28] // w
-
- align 4
- convertloop:
- // Read in the data from the source pointer.
- // First round of bit swap.
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + edi]
- lea eax, [eax + 2 * edi]
- movdqa xmm7, xmm0 // use xmm7 as temp register.
- punpcklbw xmm0, xmm1
- punpckhbw xmm7, xmm1
- movdqa xmm1, xmm7
- movdqu xmm2, [eax]
- movdqu xmm3, [eax + edi]
- lea eax, [eax + 2 * edi]
- movdqa xmm7, xmm2
- punpcklbw xmm2, xmm3
- punpckhbw xmm7, xmm3
- movdqa xmm3, xmm7
- movdqu xmm4, [eax]
- movdqu xmm5, [eax + edi]
- lea eax, [eax + 2 * edi]
- movdqa xmm7, xmm4
- punpcklbw xmm4, xmm5
- punpckhbw xmm7, xmm5
- movdqa xmm5, xmm7
- movdqu xmm6, [eax]
- movdqu xmm7, [eax + edi]
- lea eax, [eax + 2 * edi]
- movdqu [esp], xmm5 // backup xmm5
- neg edi
- movdqa xmm5, xmm6 // use xmm5 as temp register.
- punpcklbw xmm6, xmm7
- punpckhbw xmm5, xmm7
- movdqa xmm7, xmm5
- lea eax, [eax + 8 * edi + 16]
- neg edi
- // Second round of bit swap.
- movdqa xmm5, xmm0
- punpcklwd xmm0, xmm2
- punpckhwd xmm5, xmm2
- movdqa xmm2, xmm5
- movdqa xmm5, xmm1
- punpcklwd xmm1, xmm3
- punpckhwd xmm5, xmm3
- movdqa xmm3, xmm5
- movdqa xmm5, xmm4
- punpcklwd xmm4, xmm6
- punpckhwd xmm5, xmm6
- movdqa xmm6, xmm5
- movdqu xmm5, [esp] // restore xmm5
- movdqu [esp], xmm6 // backup xmm6
- movdqa xmm6, xmm5 // use xmm6 as temp register.
- punpcklwd xmm5, xmm7
- punpckhwd xmm6, xmm7
- movdqa xmm7, xmm6
- // Third round of bit swap.
- // Write to the destination pointer.
- movdqa xmm6, xmm0
- punpckldq xmm0, xmm4
- punpckhdq xmm6, xmm4
- movdqa xmm4, xmm6
- movdqu xmm6, [esp] // restore xmm6
- movlpd qword ptr [edx], xmm0
- movhpd qword ptr [ebx], xmm0
- movlpd qword ptr [edx + esi], xmm4
- lea edx, [edx + 2 * esi]
- movhpd qword ptr [ebx + ebp], xmm4
- lea ebx, [ebx + 2 * ebp]
- movdqa xmm0, xmm2 // use xmm0 as the temp register.
- punpckldq xmm2, xmm6
- movlpd qword ptr [edx], xmm2
- movhpd qword ptr [ebx], xmm2
- punpckhdq xmm0, xmm6
- movlpd qword ptr [edx + esi], xmm0
- lea edx, [edx + 2 * esi]
- movhpd qword ptr [ebx + ebp], xmm0
- lea ebx, [ebx + 2 * ebp]
- movdqa xmm0, xmm1 // use xmm0 as the temp register.
- punpckldq xmm1, xmm5
- movlpd qword ptr [edx], xmm1
- movhpd qword ptr [ebx], xmm1
- punpckhdq xmm0, xmm5
- movlpd qword ptr [edx + esi], xmm0
- lea edx, [edx + 2 * esi]
- movhpd qword ptr [ebx + ebp], xmm0
- lea ebx, [ebx + 2 * ebp]
- movdqa xmm0, xmm3 // use xmm0 as the temp register.
- punpckldq xmm3, xmm7
- movlpd qword ptr [edx], xmm3
- movhpd qword ptr [ebx], xmm3
- punpckhdq xmm0, xmm7
- sub ecx, 8
- movlpd qword ptr [edx + esi], xmm0
- lea edx, [edx + 2 * esi]
- movhpd qword ptr [ebx + ebp], xmm0
- lea ebx, [ebx + 2 * ebp]
- jg convertloop
-
- mov esp, [esp + 16]
- pop ebp
- pop edi
- pop esi
- pop ebx
- ret
- }
-}
-
-#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/third_party/aom/third_party/libyuv/source/row_any.cc b/third_party/aom/third_party/libyuv/source/row_any.cc
deleted file mode 100644
index 1cb1f6b93..000000000
--- a/third_party/aom/third_party/libyuv/source/row_any.cc
+++ /dev/null
@@ -1,680 +0,0 @@
-/*
- * Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/row.h"
-
-#include <string.h> // For memset.
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// Subsampled source needs to be increase by 1 of not even.
-#define SS(width, shift) (((width) + (1 << (shift)) - 1) >> (shift))
-
-// Any 3 planes to 1.
-#define ANY31(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
- void NAMEANY(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, \
- uint8* dst_ptr, int width) { \
- SIMD_ALIGNED(uint8 temp[64 * 4]); \
- memset(temp, 0, 64 * 3); /* for YUY2 and msan */ \
- int r = width & MASK; \
- int n = width & ~MASK; \
- if (n > 0) { \
- ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, n); \
- } \
- memcpy(temp, y_buf + n, r); \
- memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
- memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
- ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, MASK + 1); \
- memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192, \
- SS(r, DUVSHIFT) * BPP); \
- }
-
-#ifdef HAS_I422TOARGBROW_SSSE3
-ANY31(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_SSSE3, 1, 0, 4, 7)
-#endif
-#ifdef HAS_I444TOARGBROW_SSSE3
-ANY31(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, 0, 0, 4, 7)
-ANY31(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_SSSE3, 2, 0, 4, 7)
-ANY31(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_SSSE3, 1, 0, 4, 7)
-ANY31(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_SSSE3, 1, 0, 4, 7)
-ANY31(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_SSSE3, 1, 0, 4, 7)
-ANY31(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, 1, 0, 2, 7)
-ANY31(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, 1, 0, 2, 7)
-ANY31(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, 1, 0, 2, 7)
-ANY31(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, 1, 0, 3, 7)
-ANY31(I422ToRAWRow_Any_SSSE3, I422ToRAWRow_SSSE3, 1, 0, 3, 7)
-ANY31(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, 1, 1, 4, 15)
-ANY31(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, 1, 1, 4, 15)
-#endif // HAS_I444TOARGBROW_SSSE3
-#ifdef HAS_I422TORGB24ROW_AVX2
-ANY31(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 15)
-#endif
-#ifdef HAS_I422TORAWROW_AVX2
-ANY31(I422ToRAWRow_Any_AVX2, I422ToRAWRow_AVX2, 1, 0, 3, 15)
-#endif
-#ifdef HAS_J422TOARGBROW_SSSE3
-ANY31(J422ToARGBRow_Any_SSSE3, J422ToARGBRow_SSSE3, 1, 0, 4, 7)
-#endif
-#ifdef HAS_J422TOARGBROW_AVX2
-ANY31(J422ToARGBRow_Any_AVX2, J422ToARGBRow_AVX2, 1, 0, 4, 15)
-#endif
-#ifdef HAS_I422TOARGBROW_AVX2
-ANY31(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, 1, 0, 4, 15)
-#endif
-#ifdef HAS_I422TOBGRAROW_AVX2
-ANY31(I422ToBGRARow_Any_AVX2, I422ToBGRARow_AVX2, 1, 0, 4, 15)
-#endif
-#ifdef HAS_I422TORGBAROW_AVX2
-ANY31(I422ToRGBARow_Any_AVX2, I422ToRGBARow_AVX2, 1, 0, 4, 15)
-#endif
-#ifdef HAS_I422TOABGRROW_AVX2
-ANY31(I422ToABGRRow_Any_AVX2, I422ToABGRRow_AVX2, 1, 0, 4, 15)
-#endif
-#ifdef HAS_I444TOARGBROW_AVX2
-ANY31(I444ToARGBRow_Any_AVX2, I444ToARGBRow_AVX2, 0, 0, 4, 15)
-#endif
-#ifdef HAS_I411TOARGBROW_AVX2
-ANY31(I411ToARGBRow_Any_AVX2, I411ToARGBRow_AVX2, 2, 0, 4, 15)
-#endif
-#ifdef HAS_I422TOARGB4444ROW_AVX2
-ANY31(I422ToARGB4444Row_Any_AVX2, I422ToARGB4444Row_AVX2, 1, 0, 2, 7)
-#endif
-#ifdef HAS_I422TOARGB1555ROW_AVX2
-ANY31(I422ToARGB1555Row_Any_AVX2, I422ToARGB1555Row_AVX2, 1, 0, 2, 7)
-#endif
-#ifdef HAS_I422TORGB565ROW_AVX2
-ANY31(I422ToRGB565Row_Any_AVX2, I422ToRGB565Row_AVX2, 1, 0, 2, 7)
-#endif
-#ifdef HAS_I422TOARGBROW_NEON
-ANY31(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, 0, 0, 4, 7)
-ANY31(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, 1, 0, 4, 7)
-ANY31(I411ToARGBRow_Any_NEON, I411ToARGBRow_NEON, 2, 0, 4, 7)
-ANY31(I422ToBGRARow_Any_NEON, I422ToBGRARow_NEON, 1, 0, 4, 7)
-ANY31(I422ToABGRRow_Any_NEON, I422ToABGRRow_NEON, 1, 0, 4, 7)
-ANY31(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, 1, 0, 4, 7)
-ANY31(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, 1, 0, 3, 7)
-ANY31(I422ToRAWRow_Any_NEON, I422ToRAWRow_NEON, 1, 0, 3, 7)
-ANY31(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7)
-ANY31(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, 1, 0, 2, 7)
-ANY31(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7)
-#endif
-#ifdef HAS_I422TOYUY2ROW_NEON
-ANY31(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, 1, 1, 4, 15)
-#endif
-#ifdef HAS_I422TOUYVYROW_NEON
-ANY31(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, 1, 1, 4, 15)
-#endif
-#undef ANY31
-
-// Any 2 planes to 1.
-#define ANY21(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \
- void NAMEANY(const uint8* y_buf, const uint8* uv_buf, \
- uint8* dst_ptr, int width) { \
- SIMD_ALIGNED(uint8 temp[64 * 3]); \
- memset(temp, 0, 64 * 2); /* for msan */ \
- int r = width & MASK; \
- int n = width & ~MASK; \
- if (n > 0) { \
- ANY_SIMD(y_buf, uv_buf, dst_ptr, n); \
- } \
- memcpy(temp, y_buf + n * SBPP, r * SBPP); \
- memcpy(temp + 64, uv_buf + (n >> UVSHIFT) * SBPP2, \
- SS(r, UVSHIFT) * SBPP2); \
- ANY_SIMD(temp, temp + 64, temp + 128, MASK + 1); \
- memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
- }
-
-// Biplanar to RGB.
-#ifdef HAS_NV12TOARGBROW_SSSE3
-ANY21(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_SSSE3, 1, 1, 2, 4, 7)
-ANY21(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_SSSE3, 1, 1, 2, 4, 7)
-#endif
-#ifdef HAS_NV12TOARGBROW_AVX2
-ANY21(NV12ToARGBRow_Any_AVX2, NV12ToARGBRow_AVX2, 1, 1, 2, 4, 15)
-ANY21(NV21ToARGBRow_Any_AVX2, NV21ToARGBRow_AVX2, 1, 1, 2, 4, 15)
-#endif
-#ifdef HAS_NV12TOARGBROW_NEON
-ANY21(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7)
-ANY21(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, 1, 1, 2, 4, 7)
-#endif
-#ifdef HAS_NV12TORGB565ROW_SSSE3
-ANY21(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, 1, 1, 2, 2, 7)
-ANY21(NV21ToRGB565Row_Any_SSSE3, NV21ToRGB565Row_SSSE3, 1, 1, 2, 2, 7)
-#endif
-#ifdef HAS_NV12TORGB565ROW_AVX2
-ANY21(NV12ToRGB565Row_Any_AVX2, NV12ToRGB565Row_AVX2, 1, 1, 2, 2, 15)
-ANY21(NV21ToRGB565Row_Any_AVX2, NV21ToRGB565Row_AVX2, 1, 1, 2, 2, 15)
-#endif
-#ifdef HAS_NV12TORGB565ROW_NEON
-ANY21(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, 1, 1, 2, 2, 7)
-ANY21(NV21ToRGB565Row_Any_NEON, NV21ToRGB565Row_NEON, 1, 1, 2, 2, 7)
-#endif
-
-// Merge functions.
-#ifdef HAS_MERGEUVROW_SSE2
-ANY21(MergeUVRow_Any_SSE2, MergeUVRow_SSE2, 0, 1, 1, 2, 15)
-#endif
-#ifdef HAS_MERGEUVROW_AVX2
-ANY21(MergeUVRow_Any_AVX2, MergeUVRow_AVX2, 0, 1, 1, 2, 31)
-#endif
-#ifdef HAS_MERGEUVROW_NEON
-ANY21(MergeUVRow_Any_NEON, MergeUVRow_NEON, 0, 1, 1, 2, 15)
-#endif
-
-// Math functions.
-#ifdef HAS_ARGBMULTIPLYROW_SSE2
-ANY21(ARGBMultiplyRow_Any_SSE2, ARGBMultiplyRow_SSE2, 0, 4, 4, 4, 3)
-#endif
-#ifdef HAS_ARGBADDROW_SSE2
-ANY21(ARGBAddRow_Any_SSE2, ARGBAddRow_SSE2, 0, 4, 4, 4, 3)
-#endif
-#ifdef HAS_ARGBSUBTRACTROW_SSE2
-ANY21(ARGBSubtractRow_Any_SSE2, ARGBSubtractRow_SSE2, 0, 4, 4, 4, 3)
-#endif
-#ifdef HAS_ARGBMULTIPLYROW_AVX2
-ANY21(ARGBMultiplyRow_Any_AVX2, ARGBMultiplyRow_AVX2, 0, 4, 4, 4, 7)
-#endif
-#ifdef HAS_ARGBADDROW_AVX2
-ANY21(ARGBAddRow_Any_AVX2, ARGBAddRow_AVX2, 0, 4, 4, 4, 7)
-#endif
-#ifdef HAS_ARGBSUBTRACTROW_AVX2
-ANY21(ARGBSubtractRow_Any_AVX2, ARGBSubtractRow_AVX2, 0, 4, 4, 4, 7)
-#endif
-#ifdef HAS_ARGBMULTIPLYROW_NEON
-ANY21(ARGBMultiplyRow_Any_NEON, ARGBMultiplyRow_NEON, 0, 4, 4, 4, 7)
-#endif
-#ifdef HAS_ARGBADDROW_NEON
-ANY21(ARGBAddRow_Any_NEON, ARGBAddRow_NEON, 0, 4, 4, 4, 7)
-#endif
-#ifdef HAS_ARGBSUBTRACTROW_NEON
-ANY21(ARGBSubtractRow_Any_NEON, ARGBSubtractRow_NEON, 0, 4, 4, 4, 7)
-#endif
-#ifdef HAS_SOBELROW_SSE2
-ANY21(SobelRow_Any_SSE2, SobelRow_SSE2, 0, 1, 1, 4, 15)
-#endif
-#ifdef HAS_SOBELROW_NEON
-ANY21(SobelRow_Any_NEON, SobelRow_NEON, 0, 1, 1, 4, 7)
-#endif
-#ifdef HAS_SOBELTOPLANEROW_SSE2
-ANY21(SobelToPlaneRow_Any_SSE2, SobelToPlaneRow_SSE2, 0, 1, 1, 1, 15)
-#endif
-#ifdef HAS_SOBELTOPLANEROW_NEON
-ANY21(SobelToPlaneRow_Any_NEON, SobelToPlaneRow_NEON, 0, 1, 1, 1, 15)
-#endif
-#ifdef HAS_SOBELXYROW_SSE2
-ANY21(SobelXYRow_Any_SSE2, SobelXYRow_SSE2, 0, 1, 1, 4, 15)
-#endif
-#ifdef HAS_SOBELXYROW_NEON
-ANY21(SobelXYRow_Any_NEON, SobelXYRow_NEON, 0, 1, 1, 4, 7)
-#endif
-#undef ANY21
-
-// Any 1 to 1.
-#define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
- void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) { \
- SIMD_ALIGNED(uint8 temp[128 * 2]); \
- memset(temp, 0, 128); /* for YUY2 and msan */ \
- int r = width & MASK; \
- int n = width & ~MASK; \
- if (n > 0) { \
- ANY_SIMD(src_ptr, dst_ptr, n); \
- } \
- memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
- ANY_SIMD(temp, temp + 128, MASK + 1); \
- memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
- }
-
-#ifdef HAS_COPYROW_AVX
-ANY11(CopyRow_Any_AVX, CopyRow_AVX, 0, 1, 1, 63)
-#endif
-#ifdef HAS_COPYROW_SSE2
-ANY11(CopyRow_Any_SSE2, CopyRow_SSE2, 0, 1, 1, 31)
-#endif
-#ifdef HAS_COPYROW_NEON
-ANY11(CopyRow_Any_NEON, CopyRow_NEON, 0, 1, 1, 31)
-#endif
-#if defined(HAS_ARGBTORGB24ROW_SSSE3)
-ANY11(ARGBToRGB24Row_Any_SSSE3, ARGBToRGB24Row_SSSE3, 0, 4, 3, 15)
-ANY11(ARGBToRAWRow_Any_SSSE3, ARGBToRAWRow_SSSE3, 0, 4, 3, 15)
-ANY11(ARGBToRGB565Row_Any_SSE2, ARGBToRGB565Row_SSE2, 0, 4, 2, 3)
-ANY11(ARGBToARGB1555Row_Any_SSE2, ARGBToARGB1555Row_SSE2, 0, 4, 2, 3)
-ANY11(ARGBToARGB4444Row_Any_SSE2, ARGBToARGB4444Row_SSE2, 0, 4, 2, 3)
-#endif
-#if defined(HAS_ARGBTOARGB4444ROW_AVX2)
-ANY11(ARGBToRGB565Row_Any_AVX2, ARGBToRGB565Row_AVX2, 0, 4, 2, 7)
-ANY11(ARGBToARGB1555Row_Any_AVX2, ARGBToARGB1555Row_AVX2, 0, 4, 2, 7)
-ANY11(ARGBToARGB4444Row_Any_AVX2, ARGBToARGB4444Row_AVX2, 0, 4, 2, 7)
-#endif
-#if defined(HAS_J400TOARGBROW_SSE2)
-ANY11(J400ToARGBRow_Any_SSE2, J400ToARGBRow_SSE2, 0, 1, 4, 7)
-#endif
-#if defined(HAS_J400TOARGBROW_AVX2)
-ANY11(J400ToARGBRow_Any_AVX2, J400ToARGBRow_AVX2, 0, 1, 4, 15)
-#endif
-#if defined(HAS_I400TOARGBROW_SSE2)
-ANY11(I400ToARGBRow_Any_SSE2, I400ToARGBRow_SSE2, 0, 1, 4, 7)
-#endif
-#if defined(HAS_I400TOARGBROW_AVX2)
-ANY11(I400ToARGBRow_Any_AVX2, I400ToARGBRow_AVX2, 0, 1, 4, 15)
-#endif
-#if defined(HAS_YUY2TOARGBROW_SSSE3)
-ANY11(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_SSSE3, 1, 4, 4, 15)
-ANY11(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_SSSE3, 1, 4, 4, 15)
-ANY11(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, 0, 3, 4, 15)
-ANY11(RAWToARGBRow_Any_SSSE3, RAWToARGBRow_SSSE3, 0, 3, 4, 15)
-ANY11(RGB565ToARGBRow_Any_SSE2, RGB565ToARGBRow_SSE2, 0, 2, 4, 7)
-ANY11(ARGB1555ToARGBRow_Any_SSE2, ARGB1555ToARGBRow_SSE2, 0, 2, 4, 7)
-ANY11(ARGB4444ToARGBRow_Any_SSE2, ARGB4444ToARGBRow_SSE2, 0, 2, 4, 7)
-#endif
-#if defined(HAS_RGB565TOARGBROW_AVX2)
-ANY11(RGB565ToARGBRow_Any_AVX2, RGB565ToARGBRow_AVX2, 0, 2, 4, 15)
-#endif
-#if defined(HAS_ARGB1555TOARGBROW_AVX2)
-ANY11(ARGB1555ToARGBRow_Any_AVX2, ARGB1555ToARGBRow_AVX2, 0, 2, 4, 15)
-#endif
-#if defined(HAS_ARGB4444TOARGBROW_AVX2)
-ANY11(ARGB4444ToARGBRow_Any_AVX2, ARGB4444ToARGBRow_AVX2, 0, 2, 4, 15)
-#endif
-#if defined(HAS_YUY2TOARGBROW_AVX2)
-ANY11(YUY2ToARGBRow_Any_AVX2, YUY2ToARGBRow_AVX2, 1, 4, 4, 31)
-ANY11(UYVYToARGBRow_Any_AVX2, UYVYToARGBRow_AVX2, 1, 4, 4, 31)
-#endif
-#if defined(HAS_ARGBTORGB24ROW_NEON)
-ANY11(ARGBToRGB24Row_Any_NEON, ARGBToRGB24Row_NEON, 0, 4, 3, 7)
-ANY11(ARGBToRAWRow_Any_NEON, ARGBToRAWRow_NEON, 0, 4, 3, 7)
-ANY11(ARGBToRGB565Row_Any_NEON, ARGBToRGB565Row_NEON, 0, 4, 2, 7)
-ANY11(ARGBToARGB1555Row_Any_NEON, ARGBToARGB1555Row_NEON, 0, 4, 2, 7)
-ANY11(ARGBToARGB4444Row_Any_NEON, ARGBToARGB4444Row_NEON, 0, 4, 2, 7)
-ANY11(J400ToARGBRow_Any_NEON, J400ToARGBRow_NEON, 0, 1, 4, 7)
-ANY11(I400ToARGBRow_Any_NEON, I400ToARGBRow_NEON, 0, 1, 4, 7)
-ANY11(YUY2ToARGBRow_Any_NEON, YUY2ToARGBRow_NEON, 1, 4, 4, 7)
-ANY11(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, 1, 4, 4, 7)
-#endif
-#ifdef HAS_ARGBTOYROW_AVX2
-ANY11(ARGBToYRow_Any_AVX2, ARGBToYRow_AVX2, 0, 4, 1, 31)
-#endif
-#ifdef HAS_ARGBTOYJROW_AVX2
-ANY11(ARGBToYJRow_Any_AVX2, ARGBToYJRow_AVX2, 0, 4, 1, 31)
-#endif
-#ifdef HAS_UYVYTOYROW_AVX2
-ANY11(UYVYToYRow_Any_AVX2, UYVYToYRow_AVX2, 0, 2, 1, 31)
-#endif
-#ifdef HAS_YUY2TOYROW_AVX2
-ANY11(YUY2ToYRow_Any_AVX2, YUY2ToYRow_AVX2, 1, 4, 1, 31)
-#endif
-#ifdef HAS_ARGBTOYROW_SSSE3
-ANY11(ARGBToYRow_Any_SSSE3, ARGBToYRow_SSSE3, 0, 4, 1, 15)
-#endif
-#ifdef HAS_BGRATOYROW_SSSE3
-ANY11(BGRAToYRow_Any_SSSE3, BGRAToYRow_SSSE3, 0, 4, 1, 15)
-ANY11(ABGRToYRow_Any_SSSE3, ABGRToYRow_SSSE3, 0, 4, 1, 15)
-ANY11(RGBAToYRow_Any_SSSE3, RGBAToYRow_SSSE3, 0, 4, 1, 15)
-ANY11(YUY2ToYRow_Any_SSE2, YUY2ToYRow_SSE2, 1, 4, 1, 15)
-ANY11(UYVYToYRow_Any_SSE2, UYVYToYRow_SSE2, 1, 4, 1, 15)
-#endif
-#ifdef HAS_ARGBTOYJROW_SSSE3
-ANY11(ARGBToYJRow_Any_SSSE3, ARGBToYJRow_SSSE3, 0, 4, 1, 15)
-#endif
-#ifdef HAS_ARGBTOYROW_NEON
-ANY11(ARGBToYRow_Any_NEON, ARGBToYRow_NEON, 0, 4, 1, 7)
-#endif
-#ifdef HAS_ARGBTOYJROW_NEON
-ANY11(ARGBToYJRow_Any_NEON, ARGBToYJRow_NEON, 0, 4, 1, 7)
-#endif
-#ifdef HAS_BGRATOYROW_NEON
-ANY11(BGRAToYRow_Any_NEON, BGRAToYRow_NEON, 0, 4, 1, 7)
-#endif
-#ifdef HAS_ABGRTOYROW_NEON
-ANY11(ABGRToYRow_Any_NEON, ABGRToYRow_NEON, 0, 4, 1, 7)
-#endif
-#ifdef HAS_RGBATOYROW_NEON
-ANY11(RGBAToYRow_Any_NEON, RGBAToYRow_NEON, 0, 4, 1, 7)
-#endif
-#ifdef HAS_RGB24TOYROW_NEON
-ANY11(RGB24ToYRow_Any_NEON, RGB24ToYRow_NEON, 0, 3, 1, 7)
-#endif
-#ifdef HAS_RAWTOYROW_NEON
-ANY11(RAWToYRow_Any_NEON, RAWToYRow_NEON, 0, 3, 1, 7)
-#endif
-#ifdef HAS_RGB565TOYROW_NEON
-ANY11(RGB565ToYRow_Any_NEON, RGB565ToYRow_NEON, 0, 2, 1, 7)
-#endif
-#ifdef HAS_ARGB1555TOYROW_NEON
-ANY11(ARGB1555ToYRow_Any_NEON, ARGB1555ToYRow_NEON, 0, 2, 1, 7)
-#endif
-#ifdef HAS_ARGB4444TOYROW_NEON
-ANY11(ARGB4444ToYRow_Any_NEON, ARGB4444ToYRow_NEON, 0, 2, 1, 7)
-#endif
-#ifdef HAS_YUY2TOYROW_NEON
-ANY11(YUY2ToYRow_Any_NEON, YUY2ToYRow_NEON, 1, 4, 1, 15)
-#endif
-#ifdef HAS_UYVYTOYROW_NEON
-ANY11(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 0, 2, 1, 15)
-#endif
-#ifdef HAS_RGB24TOARGBROW_NEON
-ANY11(RGB24ToARGBRow_Any_NEON, RGB24ToARGBRow_NEON, 0, 3, 4, 7)
-#endif
-#ifdef HAS_RAWTOARGBROW_NEON
-ANY11(RAWToARGBRow_Any_NEON, RAWToARGBRow_NEON, 0, 3, 4, 7)
-#endif
-#ifdef HAS_RGB565TOARGBROW_NEON
-ANY11(RGB565ToARGBRow_Any_NEON, RGB565ToARGBRow_NEON, 0, 2, 4, 7)
-#endif
-#ifdef HAS_ARGB1555TOARGBROW_NEON
-ANY11(ARGB1555ToARGBRow_Any_NEON, ARGB1555ToARGBRow_NEON, 0, 2, 4, 7)
-#endif
-#ifdef HAS_ARGB4444TOARGBROW_NEON
-ANY11(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 0, 2, 4, 7)
-#endif
-#ifdef HAS_ARGBATTENUATEROW_SSSE3
-ANY11(ARGBAttenuateRow_Any_SSSE3, ARGBAttenuateRow_SSSE3, 0, 4, 4, 3)
-#endif
-#ifdef HAS_ARGBATTENUATEROW_SSE2
-ANY11(ARGBAttenuateRow_Any_SSE2, ARGBAttenuateRow_SSE2, 0, 4, 4, 3)
-#endif
-#ifdef HAS_ARGBUNATTENUATEROW_SSE2
-ANY11(ARGBUnattenuateRow_Any_SSE2, ARGBUnattenuateRow_SSE2, 0, 4, 4, 3)
-#endif
-#ifdef HAS_ARGBATTENUATEROW_AVX2
-ANY11(ARGBAttenuateRow_Any_AVX2, ARGBAttenuateRow_AVX2, 0, 4, 4, 7)
-#endif
-#ifdef HAS_ARGBUNATTENUATEROW_AVX2
-ANY11(ARGBUnattenuateRow_Any_AVX2, ARGBUnattenuateRow_AVX2, 0, 4, 4, 7)
-#endif
-#ifdef HAS_ARGBATTENUATEROW_NEON
-ANY11(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, 0, 4, 4, 7)
-#endif
-#undef ANY11
-
-// Any 1 to 1 with parameter.
-#define ANY11P(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK) \
- void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, \
- T shuffler, int width) { \
- SIMD_ALIGNED(uint8 temp[64 * 2]); \
- memset(temp, 0, 64); /* for msan */ \
- int r = width & MASK; \
- int n = width & ~MASK; \
- if (n > 0) { \
- ANY_SIMD(src_ptr, dst_ptr, shuffler, n); \
- } \
- memcpy(temp, src_ptr + n * SBPP, r * SBPP); \
- ANY_SIMD(temp, temp + 64, shuffler, MASK + 1); \
- memcpy(dst_ptr + n * BPP, temp + 64, r * BPP); \
- }
-
-#if defined(HAS_ARGBTORGB565DITHERROW_SSE2)
-ANY11P(ARGBToRGB565DitherRow_Any_SSE2, ARGBToRGB565DitherRow_SSE2,
- const uint32, 4, 2, 3)
-#endif
-#if defined(HAS_ARGBTORGB565DITHERROW_AVX2)
-ANY11P(ARGBToRGB565DitherRow_Any_AVX2, ARGBToRGB565DitherRow_AVX2,
- const uint32, 4, 2, 7)
-#endif
-#if defined(HAS_ARGBTORGB565DITHERROW_NEON)
-ANY11P(ARGBToRGB565DitherRow_Any_NEON, ARGBToRGB565DitherRow_NEON,
- const uint32, 4, 2, 7)
-#endif
-#ifdef HAS_ARGBSHUFFLEROW_SSE2
-ANY11P(ARGBShuffleRow_Any_SSE2, ARGBShuffleRow_SSE2, const uint8*, 4, 4, 3)
-#endif
-#ifdef HAS_ARGBSHUFFLEROW_SSSE3
-ANY11P(ARGBShuffleRow_Any_SSSE3, ARGBShuffleRow_SSSE3, const uint8*, 4, 4, 7)
-#endif
-#ifdef HAS_ARGBSHUFFLEROW_AVX2
-ANY11P(ARGBShuffleRow_Any_AVX2, ARGBShuffleRow_AVX2, const uint8*, 4, 4, 15)
-#endif
-#ifdef HAS_ARGBSHUFFLEROW_NEON
-ANY11P(ARGBShuffleRow_Any_NEON, ARGBShuffleRow_NEON, const uint8*, 4, 4, 3)
-#endif
-#undef ANY11P
-
-// Any 1 to 1 interpolate. Takes 2 rows of source via stride.
-#define ANY11T(NAMEANY, ANY_SIMD, SBPP, BPP, MASK) \
- void NAMEANY(uint8* dst_ptr, const uint8* src_ptr, \
- ptrdiff_t src_stride_ptr, int width, \
- int source_y_fraction) { \
- SIMD_ALIGNED(uint8 temp[64 * 3]); \
- memset(temp, 0, 64 * 2); /* for msan */ \
- int r = width & MASK; \
- int n = width & ~MASK; \
- if (n > 0) { \
- ANY_SIMD(dst_ptr, src_ptr, src_stride_ptr, n, source_y_fraction); \
- } \
- memcpy(temp, src_ptr + n * SBPP, r * SBPP); \
- memcpy(temp + 64, src_ptr + src_stride_ptr + n * SBPP, r * SBPP); \
- ANY_SIMD(temp + 128, temp, 64, MASK + 1, source_y_fraction); \
- memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
- }
-
-#ifdef HAS_INTERPOLATEROW_AVX2
-ANY11T(InterpolateRow_Any_AVX2, InterpolateRow_AVX2, 1, 1, 31)
-#endif
-#ifdef HAS_INTERPOLATEROW_SSSE3
-ANY11T(InterpolateRow_Any_SSSE3, InterpolateRow_SSSE3, 1, 1, 15)
-#endif
-#ifdef HAS_INTERPOLATEROW_SSE2
-ANY11T(InterpolateRow_Any_SSE2, InterpolateRow_SSE2, 1, 1, 15)
-#endif
-#ifdef HAS_INTERPOLATEROW_NEON
-ANY11T(InterpolateRow_Any_NEON, InterpolateRow_NEON, 1, 1, 15)
-#endif
-#ifdef HAS_INTERPOLATEROW_MIPS_DSPR2
-ANY11T(InterpolateRow_Any_MIPS_DSPR2, InterpolateRow_MIPS_DSPR2, 1, 1, 3)
-#endif
-#undef ANY11T
-
-// Any 1 to 1 mirror.
-#define ANY11M(NAMEANY, ANY_SIMD, BPP, MASK) \
- void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) { \
- SIMD_ALIGNED(uint8 temp[64 * 2]); \
- memset(temp, 0, 64); /* for msan */ \
- int r = width & MASK; \
- int n = width & ~MASK; \
- if (n > 0) { \
- ANY_SIMD(src_ptr + r * BPP, dst_ptr, n); \
- } \
- memcpy(temp, src_ptr, r * BPP); \
- ANY_SIMD(temp, temp + 64, MASK + 1); \
- memcpy(dst_ptr + n * BPP, temp + 64 + (MASK + 1 - r) * BPP, r * BPP); \
- }
-
-#ifdef HAS_MIRRORROW_AVX2
-ANY11M(MirrorRow_Any_AVX2, MirrorRow_AVX2, 1, 31)
-#endif
-#ifdef HAS_MIRRORROW_SSSE3
-ANY11M(MirrorRow_Any_SSSE3, MirrorRow_SSSE3, 1, 15)
-#endif
-#ifdef HAS_MIRRORROW_SSE2
-ANY11M(MirrorRow_Any_SSE2, MirrorRow_SSE2, 1, 15)
-#endif
-#ifdef HAS_MIRRORROW_NEON
-ANY11M(MirrorRow_Any_NEON, MirrorRow_NEON, 1, 15)
-#endif
-#ifdef HAS_ARGBMIRRORROW_AVX2
-ANY11M(ARGBMirrorRow_Any_AVX2, ARGBMirrorRow_AVX2, 4, 7)
-#endif
-#ifdef HAS_ARGBMIRRORROW_SSE2
-ANY11M(ARGBMirrorRow_Any_SSE2, ARGBMirrorRow_SSE2, 4, 3)
-#endif
-#ifdef HAS_ARGBMIRRORROW_NEON
-ANY11M(ARGBMirrorRow_Any_NEON, ARGBMirrorRow_NEON, 4, 3)
-#endif
-#undef ANY11M
-
-// Any 1 plane. (memset)
-#define ANY1(NAMEANY, ANY_SIMD, T, BPP, MASK) \
- void NAMEANY(uint8* dst_ptr, T v32, int width) { \
- SIMD_ALIGNED(uint8 temp[64]); \
- int r = width & MASK; \
- int n = width & ~MASK; \
- if (n > 0) { \
- ANY_SIMD(dst_ptr, v32, n); \
- } \
- ANY_SIMD(temp, v32, MASK + 1); \
- memcpy(dst_ptr + n * BPP, temp, r * BPP); \
- }
-
-#ifdef HAS_SETROW_X86
-ANY1(SetRow_Any_X86, SetRow_X86, uint8, 1, 3)
-#endif
-#ifdef HAS_SETROW_NEON
-ANY1(SetRow_Any_NEON, SetRow_NEON, uint8, 1, 15)
-#endif
-#ifdef HAS_ARGBSETROW_NEON
-ANY1(ARGBSetRow_Any_NEON, ARGBSetRow_NEON, uint32, 4, 3)
-#endif
-#undef ANY1
-
-// Any 1 to 2. Outputs UV planes.
-#define ANY12(NAMEANY, ANY_SIMD, UVSHIFT, BPP, DUVSHIFT, MASK) \
- void NAMEANY(const uint8* src_ptr, uint8* dst_u, uint8* dst_v, int width) {\
- SIMD_ALIGNED(uint8 temp[128 * 3]); \
- memset(temp, 0, 128); /* for msan */ \
- int r = width & MASK; \
- int n = width & ~MASK; \
- if (n > 0) { \
- ANY_SIMD(src_ptr, dst_u, dst_v, n); \
- } \
- memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
- if ((width & 1) && BPP == 4) { /* repeat last 4 bytes for subsampler */ \
- memcpy(temp + SS(r, UVSHIFT) * BPP, \
- temp + SS(r, UVSHIFT) * BPP - BPP, 4); \
- } \
- ANY_SIMD(temp, temp + 128, temp + 256, MASK + 1); \
- memcpy(dst_u + (n >> DUVSHIFT), temp + 128, SS(r, DUVSHIFT)); \
- memcpy(dst_v + (n >> DUVSHIFT), temp + 256, SS(r, DUVSHIFT)); \
- }
-
-#ifdef HAS_SPLITUVROW_SSE2
-ANY12(SplitUVRow_Any_SSE2, SplitUVRow_SSE2, 0, 2, 0, 15)
-#endif
-#ifdef HAS_SPLITUVROW_AVX2
-ANY12(SplitUVRow_Any_AVX2, SplitUVRow_AVX2, 0, 2, 0, 31)
-#endif
-#ifdef HAS_SPLITUVROW_NEON
-ANY12(SplitUVRow_Any_NEON, SplitUVRow_NEON, 0, 2, 0, 15)
-#endif
-#ifdef HAS_SPLITUVROW_MIPS_DSPR2
-ANY12(SplitUVRow_Any_MIPS_DSPR2, SplitUVRow_MIPS_DSPR2, 0, 2, 0, 15)
-#endif
-#ifdef HAS_ARGBTOUV444ROW_SSSE3
-ANY12(ARGBToUV444Row_Any_SSSE3, ARGBToUV444Row_SSSE3, 0, 4, 0, 15)
-#endif
-#ifdef HAS_YUY2TOUV422ROW_AVX2
-ANY12(YUY2ToUV422Row_Any_AVX2, YUY2ToUV422Row_AVX2, 1, 4, 1, 31)
-ANY12(UYVYToUV422Row_Any_AVX2, UYVYToUV422Row_AVX2, 1, 4, 1, 31)
-#endif
-#ifdef HAS_ARGBTOUV422ROW_SSSE3
-ANY12(ARGBToUV422Row_Any_SSSE3, ARGBToUV422Row_SSSE3, 0, 4, 1, 15)
-#endif
-#ifdef HAS_YUY2TOUV422ROW_SSE2
-ANY12(YUY2ToUV422Row_Any_SSE2, YUY2ToUV422Row_SSE2, 1, 4, 1, 15)
-ANY12(UYVYToUV422Row_Any_SSE2, UYVYToUV422Row_SSE2, 1, 4, 1, 15)
-#endif
-#ifdef HAS_YUY2TOUV422ROW_NEON
-ANY12(ARGBToUV444Row_Any_NEON, ARGBToUV444Row_NEON, 0, 4, 0, 7)
-ANY12(ARGBToUV422Row_Any_NEON, ARGBToUV422Row_NEON, 0, 4, 1, 15)
-ANY12(ARGBToUV411Row_Any_NEON, ARGBToUV411Row_NEON, 0, 4, 2, 31)
-ANY12(YUY2ToUV422Row_Any_NEON, YUY2ToUV422Row_NEON, 1, 4, 1, 15)
-ANY12(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON, 1, 4, 1, 15)
-#endif
-#undef ANY12
-
-// Any 1 to 2 with source stride (2 rows of source). Outputs UV planes.
-// 128 byte row allows for 32 avx ARGB pixels.
-#define ANY12S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK) \
- void NAMEANY(const uint8* src_ptr, int src_stride_ptr, \
- uint8* dst_u, uint8* dst_v, int width) { \
- SIMD_ALIGNED(uint8 temp[128 * 4]); \
- memset(temp, 0, 128 * 2); /* for msan */ \
- int r = width & MASK; \
- int n = width & ~MASK; \
- if (n > 0) { \
- ANY_SIMD(src_ptr, src_stride_ptr, dst_u, dst_v, n); \
- } \
- memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
- memcpy(temp + 128, src_ptr + src_stride_ptr + (n >> UVSHIFT) * BPP, \
- SS(r, UVSHIFT) * BPP); \
- if ((width & 1) && BPP == 4) { /* repeat last 4 bytes for subsampler */ \
- memcpy(temp + SS(r, UVSHIFT) * BPP, \
- temp + SS(r, UVSHIFT) * BPP - BPP, 4); \
- memcpy(temp + 128 + SS(r, UVSHIFT) * BPP, \
- temp + 128 + SS(r, UVSHIFT) * BPP - BPP, 4); \
- } \
- ANY_SIMD(temp, 128, temp + 256, temp + 384, MASK + 1); \
- memcpy(dst_u + (n >> 1), temp + 256, SS(r, 1)); \
- memcpy(dst_v + (n >> 1), temp + 384, SS(r, 1)); \
- }
-
-#ifdef HAS_ARGBTOUVROW_AVX2
-ANY12S(ARGBToUVRow_Any_AVX2, ARGBToUVRow_AVX2, 0, 4, 31)
-#endif
-#ifdef HAS_ARGBTOUVROW_SSSE3
-ANY12S(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_SSSE3, 0, 4, 15)
-ANY12S(ARGBToUVJRow_Any_SSSE3, ARGBToUVJRow_SSSE3, 0, 4, 15)
-ANY12S(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_SSSE3, 0, 4, 15)
-ANY12S(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_SSSE3, 0, 4, 15)
-ANY12S(RGBAToUVRow_Any_SSSE3, RGBAToUVRow_SSSE3, 0, 4, 15)
-#endif
-#ifdef HAS_YUY2TOUVROW_AVX2
-ANY12S(YUY2ToUVRow_Any_AVX2, YUY2ToUVRow_AVX2, 1, 4, 31)
-ANY12S(UYVYToUVRow_Any_AVX2, UYVYToUVRow_AVX2, 1, 4, 31)
-#endif
-#ifdef HAS_YUY2TOUVROW_SSE2
-ANY12S(YUY2ToUVRow_Any_SSE2, YUY2ToUVRow_SSE2, 1, 4, 15)
-ANY12S(UYVYToUVRow_Any_SSE2, UYVYToUVRow_SSE2, 1, 4, 15)
-#endif
-#ifdef HAS_ARGBTOUVROW_NEON
-ANY12S(ARGBToUVRow_Any_NEON, ARGBToUVRow_NEON, 0, 4, 15)
-#endif
-#ifdef HAS_ARGBTOUVJROW_NEON
-ANY12S(ARGBToUVJRow_Any_NEON, ARGBToUVJRow_NEON, 0, 4, 15)
-#endif
-#ifdef HAS_BGRATOUVROW_NEON
-ANY12S(BGRAToUVRow_Any_NEON, BGRAToUVRow_NEON, 0, 4, 15)
-#endif
-#ifdef HAS_ABGRTOUVROW_NEON
-ANY12S(ABGRToUVRow_Any_NEON, ABGRToUVRow_NEON, 0, 4, 15)
-#endif
-#ifdef HAS_RGBATOUVROW_NEON
-ANY12S(RGBAToUVRow_Any_NEON, RGBAToUVRow_NEON, 0, 4, 15)
-#endif
-#ifdef HAS_RGB24TOUVROW_NEON
-ANY12S(RGB24ToUVRow_Any_NEON, RGB24ToUVRow_NEON, 0, 3, 15)
-#endif
-#ifdef HAS_RAWTOUVROW_NEON
-ANY12S(RAWToUVRow_Any_NEON, RAWToUVRow_NEON, 0, 3, 15)
-#endif
-#ifdef HAS_RGB565TOUVROW_NEON
-ANY12S(RGB565ToUVRow_Any_NEON, RGB565ToUVRow_NEON, 0, 2, 15)
-#endif
-#ifdef HAS_ARGB1555TOUVROW_NEON
-ANY12S(ARGB1555ToUVRow_Any_NEON, ARGB1555ToUVRow_NEON, 0, 2, 15)
-#endif
-#ifdef HAS_ARGB4444TOUVROW_NEON
-ANY12S(ARGB4444ToUVRow_Any_NEON, ARGB4444ToUVRow_NEON, 0, 2, 15)
-#endif
-#ifdef HAS_YUY2TOUVROW_NEON
-ANY12S(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, 1, 4, 15)
-#endif
-#ifdef HAS_UYVYTOUVROW_NEON
-ANY12S(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, 1, 4, 15)
-#endif
-#undef ANY12S
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/third_party/aom/third_party/libyuv/source/row_common.cc b/third_party/aom/third_party/libyuv/source/row_common.cc
deleted file mode 100644
index 49875894f..000000000
--- a/third_party/aom/third_party/libyuv/source/row_common.cc
+++ /dev/null
@@ -1,2576 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/row.h"
-
-#include <string.h> // For memcpy and memset.
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// llvm x86 is poor at ternary operator, so use branchless min/max.
-
-#define USE_BRANCHLESS 1
-#if USE_BRANCHLESS
-static __inline int32 clamp0(int32 v) {
- return ((-(v) >> 31) & (v));
-}
-
-static __inline int32 clamp255(int32 v) {
- return (((255 - (v)) >> 31) | (v)) & 255;
-}
-
-static __inline uint32 Clamp(int32 val) {
- int v = clamp0(val);
- return (uint32)(clamp255(v));
-}
-
-static __inline uint32 Abs(int32 v) {
- int m = v >> 31;
- return (v + m) ^ m;
-}
-#else // USE_BRANCHLESS
-static __inline int32 clamp0(int32 v) {
- return (v < 0) ? 0 : v;
-}
-
-static __inline int32 clamp255(int32 v) {
- return (v > 255) ? 255 : v;
-}
-
-static __inline uint32 Clamp(int32 val) {
- int v = clamp0(val);
- return (uint32)(clamp255(v));
-}
-
-static __inline uint32 Abs(int32 v) {
- return (v < 0) ? -v : v;
-}
-#endif // USE_BRANCHLESS
-
-#ifdef LIBYUV_LITTLE_ENDIAN
-#define WRITEWORD(p, v) *(uint32*)(p) = v
-#else
-static inline void WRITEWORD(uint8* p, uint32 v) {
- p[0] = (uint8)(v & 255);
- p[1] = (uint8)((v >> 8) & 255);
- p[2] = (uint8)((v >> 16) & 255);
- p[3] = (uint8)((v >> 24) & 255);
-}
-#endif
-
-void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int width) {
- int x;
- for (x = 0; x < width; ++x) {
- uint8 b = src_rgb24[0];
- uint8 g = src_rgb24[1];
- uint8 r = src_rgb24[2];
- dst_argb[0] = b;
- dst_argb[1] = g;
- dst_argb[2] = r;
- dst_argb[3] = 255u;
- dst_argb += 4;
- src_rgb24 += 3;
- }
-}
-
-void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int width) {
- int x;
- for (x = 0; x < width; ++x) {
- uint8 r = src_raw[0];
- uint8 g = src_raw[1];
- uint8 b = src_raw[2];
- dst_argb[0] = b;
- dst_argb[1] = g;
- dst_argb[2] = r;
- dst_argb[3] = 255u;
- dst_argb += 4;
- src_raw += 3;
- }
-}
-
-void RGB565ToARGBRow_C(const uint8* src_rgb565, uint8* dst_argb, int width) {
- int x;
- for (x = 0; x < width; ++x) {
- uint8 b = src_rgb565[0] & 0x1f;
- uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
- uint8 r = src_rgb565[1] >> 3;
- dst_argb[0] = (b << 3) | (b >> 2);
- dst_argb[1] = (g << 2) | (g >> 4);
- dst_argb[2] = (r << 3) | (r >> 2);
- dst_argb[3] = 255u;
- dst_argb += 4;
- src_rgb565 += 2;
- }
-}
-
-void ARGB1555ToARGBRow_C(const uint8* src_argb1555, uint8* dst_argb,
- int width) {
- int x;
- for (x = 0; x < width; ++x) {
- uint8 b = src_argb1555[0] & 0x1f;
- uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
- uint8 r = (src_argb1555[1] & 0x7c) >> 2;
- uint8 a = src_argb1555[1] >> 7;
- dst_argb[0] = (b << 3) | (b >> 2);
- dst_argb[1] = (g << 3) | (g >> 2);
- dst_argb[2] = (r << 3) | (r >> 2);
- dst_argb[3] = -a;
- dst_argb += 4;
- src_argb1555 += 2;
- }
-}
-
-void ARGB4444ToARGBRow_C(const uint8* src_argb4444, uint8* dst_argb,
- int width) {
- int x;
- for (x = 0; x < width; ++x) {
- uint8 b = src_argb4444[0] & 0x0f;
- uint8 g = src_argb4444[0] >> 4;
- uint8 r = src_argb4444[1] & 0x0f;
- uint8 a = src_argb4444[1] >> 4;
- dst_argb[0] = (b << 4) | b;
- dst_argb[1] = (g << 4) | g;
- dst_argb[2] = (r << 4) | r;
- dst_argb[3] = (a << 4) | a;
- dst_argb += 4;
- src_argb4444 += 2;
- }
-}
-
-void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
- int x;
- for (x = 0; x < width; ++x) {
- uint8 b = src_argb[0];
- uint8 g = src_argb[1];
- uint8 r = src_argb[2];
- dst_rgb[0] = b;
- dst_rgb[1] = g;
- dst_rgb[2] = r;
- dst_rgb += 3;
- src_argb += 4;
- }
-}
-
-void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int width) {
- int x;
- for (x = 0; x < width; ++x) {
- uint8 b = src_argb[0];
- uint8 g = src_argb[1];
- uint8 r = src_argb[2];
- dst_rgb[0] = r;
- dst_rgb[1] = g;
- dst_rgb[2] = b;
- dst_rgb += 3;
- src_argb += 4;
- }
-}
-
-void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- uint8 b0 = src_argb[0] >> 3;
- uint8 g0 = src_argb[1] >> 2;
- uint8 r0 = src_argb[2] >> 3;
- uint8 b1 = src_argb[4] >> 3;
- uint8 g1 = src_argb[5] >> 2;
- uint8 r1 = src_argb[6] >> 3;
- WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) |
- (b1 << 16) | (g1 << 21) | (r1 << 27));
- dst_rgb += 4;
- src_argb += 8;
- }
- if (width & 1) {
- uint8 b0 = src_argb[0] >> 3;
- uint8 g0 = src_argb[1] >> 2;
- uint8 r0 = src_argb[2] >> 3;
- *(uint16*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
- }
-}
-
-// dither4 is a row of 4 values from 4x4 dither matrix.
-// The 4x4 matrix contains values to increase RGB. When converting to
-// fewer bits (565) this provides an ordered dither.
-// The order in the 4x4 matrix in first byte is upper left.
-// The 4 values are passed as an int, then referenced as an array, so
-// endian will not affect order of the original matrix. But the dither4
-// will containing the first pixel in the lower byte for little endian
-// or the upper byte for big endian.
-void ARGBToRGB565DitherRow_C(const uint8* src_argb, uint8* dst_rgb,
- const uint32 dither4, int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- int dither0 = ((const unsigned char*)(&dither4))[x & 3];
- int dither1 = ((const unsigned char*)(&dither4))[(x + 1) & 3];
- uint8 b0 = clamp255(src_argb[0] + dither0) >> 3;
- uint8 g0 = clamp255(src_argb[1] + dither0) >> 2;
- uint8 r0 = clamp255(src_argb[2] + dither0) >> 3;
- uint8 b1 = clamp255(src_argb[4] + dither1) >> 3;
- uint8 g1 = clamp255(src_argb[5] + dither1) >> 2;
- uint8 r1 = clamp255(src_argb[6] + dither1) >> 3;
- WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) |
- (b1 << 16) | (g1 << 21) | (r1 << 27));
- dst_rgb += 4;
- src_argb += 8;
- }
- if (width & 1) {
- int dither0 = ((const unsigned char*)(&dither4))[(width - 1) & 3];
- uint8 b0 = clamp255(src_argb[0] + dither0) >> 3;
- uint8 g0 = clamp255(src_argb[1] + dither0) >> 2;
- uint8 r0 = clamp255(src_argb[2] + dither0) >> 3;
- *(uint16*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
- }
-}
-
-void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- uint8 b0 = src_argb[0] >> 3;
- uint8 g0 = src_argb[1] >> 3;
- uint8 r0 = src_argb[2] >> 3;
- uint8 a0 = src_argb[3] >> 7;
- uint8 b1 = src_argb[4] >> 3;
- uint8 g1 = src_argb[5] >> 3;
- uint8 r1 = src_argb[6] >> 3;
- uint8 a1 = src_argb[7] >> 7;
- *(uint32*)(dst_rgb) =
- b0 | (g0 << 5) | (r0 << 10) | (a0 << 15) |
- (b1 << 16) | (g1 << 21) | (r1 << 26) | (a1 << 31);
- dst_rgb += 4;
- src_argb += 8;
- }
- if (width & 1) {
- uint8 b0 = src_argb[0] >> 3;
- uint8 g0 = src_argb[1] >> 3;
- uint8 r0 = src_argb[2] >> 3;
- uint8 a0 = src_argb[3] >> 7;
- *(uint16*)(dst_rgb) =
- b0 | (g0 << 5) | (r0 << 10) | (a0 << 15);
- }
-}
-
-void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- uint8 b0 = src_argb[0] >> 4;
- uint8 g0 = src_argb[1] >> 4;
- uint8 r0 = src_argb[2] >> 4;
- uint8 a0 = src_argb[3] >> 4;
- uint8 b1 = src_argb[4] >> 4;
- uint8 g1 = src_argb[5] >> 4;
- uint8 r1 = src_argb[6] >> 4;
- uint8 a1 = src_argb[7] >> 4;
- *(uint32*)(dst_rgb) =
- b0 | (g0 << 4) | (r0 << 8) | (a0 << 12) |
- (b1 << 16) | (g1 << 20) | (r1 << 24) | (a1 << 28);
- dst_rgb += 4;
- src_argb += 8;
- }
- if (width & 1) {
- uint8 b0 = src_argb[0] >> 4;
- uint8 g0 = src_argb[1] >> 4;
- uint8 r0 = src_argb[2] >> 4;
- uint8 a0 = src_argb[3] >> 4;
- *(uint16*)(dst_rgb) =
- b0 | (g0 << 4) | (r0 << 8) | (a0 << 12);
- }
-}
-
-static __inline int RGBToY(uint8 r, uint8 g, uint8 b) {
- return (66 * r + 129 * g + 25 * b + 0x1080) >> 8;
-}
-
-static __inline int RGBToU(uint8 r, uint8 g, uint8 b) {
- return (112 * b - 74 * g - 38 * r + 0x8080) >> 8;
-}
-static __inline int RGBToV(uint8 r, uint8 g, uint8 b) {
- return (112 * r - 94 * g - 18 * b + 0x8080) >> 8;
-}
-
-#define MAKEROWY(NAME, R, G, B, BPP) \
-void NAME ## ToYRow_C(const uint8* src_argb0, uint8* dst_y, int width) { \
- int x; \
- for (x = 0; x < width; ++x) { \
- dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]); \
- src_argb0 += BPP; \
- dst_y += 1; \
- } \
-} \
-void NAME ## ToUVRow_C(const uint8* src_rgb0, int src_stride_rgb, \
- uint8* dst_u, uint8* dst_v, int width) { \
- const uint8* src_rgb1 = src_rgb0 + src_stride_rgb; \
- int x; \
- for (x = 0; x < width - 1; x += 2) { \
- uint8 ab = (src_rgb0[B] + src_rgb0[B + BPP] + \
- src_rgb1[B] + src_rgb1[B + BPP]) >> 2; \
- uint8 ag = (src_rgb0[G] + src_rgb0[G + BPP] + \
- src_rgb1[G] + src_rgb1[G + BPP]) >> 2; \
- uint8 ar = (src_rgb0[R] + src_rgb0[R + BPP] + \
- src_rgb1[R] + src_rgb1[R + BPP]) >> 2; \
- dst_u[0] = RGBToU(ar, ag, ab); \
- dst_v[0] = RGBToV(ar, ag, ab); \
- src_rgb0 += BPP * 2; \
- src_rgb1 += BPP * 2; \
- dst_u += 1; \
- dst_v += 1; \
- } \
- if (width & 1) { \
- uint8 ab = (src_rgb0[B] + src_rgb1[B]) >> 1; \
- uint8 ag = (src_rgb0[G] + src_rgb1[G]) >> 1; \
- uint8 ar = (src_rgb0[R] + src_rgb1[R]) >> 1; \
- dst_u[0] = RGBToU(ar, ag, ab); \
- dst_v[0] = RGBToV(ar, ag, ab); \
- } \
-}
-
-MAKEROWY(ARGB, 2, 1, 0, 4)
-MAKEROWY(BGRA, 1, 2, 3, 4)
-MAKEROWY(ABGR, 0, 1, 2, 4)
-MAKEROWY(RGBA, 3, 2, 1, 4)
-MAKEROWY(RGB24, 2, 1, 0, 3)
-MAKEROWY(RAW, 0, 1, 2, 3)
-#undef MAKEROWY
-
-// JPeg uses a variation on BT.601-1 full range
-// y = 0.29900 * r + 0.58700 * g + 0.11400 * b
-// u = -0.16874 * r - 0.33126 * g + 0.50000 * b + center
-// v = 0.50000 * r - 0.41869 * g - 0.08131 * b + center
-// BT.601 Mpeg range uses:
-// b 0.1016 * 255 = 25.908 = 25
-// g 0.5078 * 255 = 129.489 = 129
-// r 0.2578 * 255 = 65.739 = 66
-// JPeg 8 bit Y (not used):
-// b 0.11400 * 256 = 29.184 = 29
-// g 0.58700 * 256 = 150.272 = 150
-// r 0.29900 * 256 = 76.544 = 77
-// JPeg 7 bit Y:
-// b 0.11400 * 128 = 14.592 = 15
-// g 0.58700 * 128 = 75.136 = 75
-// r 0.29900 * 128 = 38.272 = 38
-// JPeg 8 bit U:
-// b 0.50000 * 255 = 127.5 = 127
-// g -0.33126 * 255 = -84.4713 = -84
-// r -0.16874 * 255 = -43.0287 = -43
-// JPeg 8 bit V:
-// b -0.08131 * 255 = -20.73405 = -20
-// g -0.41869 * 255 = -106.76595 = -107
-// r 0.50000 * 255 = 127.5 = 127
-
-static __inline int RGBToYJ(uint8 r, uint8 g, uint8 b) {
- return (38 * r + 75 * g + 15 * b + 64) >> 7;
-}
-
-static __inline int RGBToUJ(uint8 r, uint8 g, uint8 b) {
- return (127 * b - 84 * g - 43 * r + 0x8080) >> 8;
-}
-static __inline int RGBToVJ(uint8 r, uint8 g, uint8 b) {
- return (127 * r - 107 * g - 20 * b + 0x8080) >> 8;
-}
-
-#define AVGB(a, b) (((a) + (b) + 1) >> 1)
-
-#define MAKEROWYJ(NAME, R, G, B, BPP) \
-void NAME ## ToYJRow_C(const uint8* src_argb0, uint8* dst_y, int width) { \
- int x; \
- for (x = 0; x < width; ++x) { \
- dst_y[0] = RGBToYJ(src_argb0[R], src_argb0[G], src_argb0[B]); \
- src_argb0 += BPP; \
- dst_y += 1; \
- } \
-} \
-void NAME ## ToUVJRow_C(const uint8* src_rgb0, int src_stride_rgb, \
- uint8* dst_u, uint8* dst_v, int width) { \
- const uint8* src_rgb1 = src_rgb0 + src_stride_rgb; \
- int x; \
- for (x = 0; x < width - 1; x += 2) { \
- uint8 ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]), \
- AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP])); \
- uint8 ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]), \
- AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP])); \
- uint8 ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]), \
- AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP])); \
- dst_u[0] = RGBToUJ(ar, ag, ab); \
- dst_v[0] = RGBToVJ(ar, ag, ab); \
- src_rgb0 += BPP * 2; \
- src_rgb1 += BPP * 2; \
- dst_u += 1; \
- dst_v += 1; \
- } \
- if (width & 1) { \
- uint8 ab = AVGB(src_rgb0[B], src_rgb1[B]); \
- uint8 ag = AVGB(src_rgb0[G], src_rgb1[G]); \
- uint8 ar = AVGB(src_rgb0[R], src_rgb1[R]); \
- dst_u[0] = RGBToUJ(ar, ag, ab); \
- dst_v[0] = RGBToVJ(ar, ag, ab); \
- } \
-}
-
-MAKEROWYJ(ARGB, 2, 1, 0, 4)
-#undef MAKEROWYJ
-
-void ARGBToUVJ422Row_C(const uint8* src_argb,
- uint8* dst_u, uint8* dst_v, int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- uint8 ab = (src_argb[0] + src_argb[4]) >> 1;
- uint8 ag = (src_argb[1] + src_argb[5]) >> 1;
- uint8 ar = (src_argb[2] + src_argb[6]) >> 1;
- dst_u[0] = RGBToUJ(ar, ag, ab);
- dst_v[0] = RGBToVJ(ar, ag, ab);
- src_argb += 8;
- dst_u += 1;
- dst_v += 1;
- }
- if (width & 1) {
- uint8 ab = src_argb[0];
- uint8 ag = src_argb[1];
- uint8 ar = src_argb[2];
- dst_u[0] = RGBToUJ(ar, ag, ab);
- dst_v[0] = RGBToVJ(ar, ag, ab);
- }
-}
-
-void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int width) {
- int x;
- for (x = 0; x < width; ++x) {
- uint8 b = src_rgb565[0] & 0x1f;
- uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
- uint8 r = src_rgb565[1] >> 3;
- b = (b << 3) | (b >> 2);
- g = (g << 2) | (g >> 4);
- r = (r << 3) | (r >> 2);
- dst_y[0] = RGBToY(r, g, b);
- src_rgb565 += 2;
- dst_y += 1;
- }
-}
-
-void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int width) {
- int x;
- for (x = 0; x < width; ++x) {
- uint8 b = src_argb1555[0] & 0x1f;
- uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
- uint8 r = (src_argb1555[1] & 0x7c) >> 2;
- b = (b << 3) | (b >> 2);
- g = (g << 3) | (g >> 2);
- r = (r << 3) | (r >> 2);
- dst_y[0] = RGBToY(r, g, b);
- src_argb1555 += 2;
- dst_y += 1;
- }
-}
-
-void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int width) {
- int x;
- for (x = 0; x < width; ++x) {
- uint8 b = src_argb4444[0] & 0x0f;
- uint8 g = src_argb4444[0] >> 4;
- uint8 r = src_argb4444[1] & 0x0f;
- b = (b << 4) | b;
- g = (g << 4) | g;
- r = (r << 4) | r;
- dst_y[0] = RGBToY(r, g, b);
- src_argb4444 += 2;
- dst_y += 1;
- }
-}
-
-void RGB565ToUVRow_C(const uint8* src_rgb565, int src_stride_rgb565,
- uint8* dst_u, uint8* dst_v, int width) {
- const uint8* next_rgb565 = src_rgb565 + src_stride_rgb565;
- int x;
- for (x = 0; x < width - 1; x += 2) {
- uint8 b0 = src_rgb565[0] & 0x1f;
- uint8 g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
- uint8 r0 = src_rgb565[1] >> 3;
- uint8 b1 = src_rgb565[2] & 0x1f;
- uint8 g1 = (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3);
- uint8 r1 = src_rgb565[3] >> 3;
- uint8 b2 = next_rgb565[0] & 0x1f;
- uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
- uint8 r2 = next_rgb565[1] >> 3;
- uint8 b3 = next_rgb565[2] & 0x1f;
- uint8 g3 = (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3);
- uint8 r3 = next_rgb565[3] >> 3;
- uint8 b = (b0 + b1 + b2 + b3); // 565 * 4 = 787.
- uint8 g = (g0 + g1 + g2 + g3);
- uint8 r = (r0 + r1 + r2 + r3);
- b = (b << 1) | (b >> 6); // 787 -> 888.
- r = (r << 1) | (r >> 6);
- dst_u[0] = RGBToU(r, g, b);
- dst_v[0] = RGBToV(r, g, b);
- src_rgb565 += 4;
- next_rgb565 += 4;
- dst_u += 1;
- dst_v += 1;
- }
- if (width & 1) {
- uint8 b0 = src_rgb565[0] & 0x1f;
- uint8 g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
- uint8 r0 = src_rgb565[1] >> 3;
- uint8 b2 = next_rgb565[0] & 0x1f;
- uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
- uint8 r2 = next_rgb565[1] >> 3;
- uint8 b = (b0 + b2); // 565 * 2 = 676.
- uint8 g = (g0 + g2);
- uint8 r = (r0 + r2);
- b = (b << 2) | (b >> 4); // 676 -> 888
- g = (g << 1) | (g >> 6);
- r = (r << 2) | (r >> 4);
- dst_u[0] = RGBToU(r, g, b);
- dst_v[0] = RGBToV(r, g, b);
- }
-}
-
-void ARGB1555ToUVRow_C(const uint8* src_argb1555, int src_stride_argb1555,
- uint8* dst_u, uint8* dst_v, int width) {
- const uint8* next_argb1555 = src_argb1555 + src_stride_argb1555;
- int x;
- for (x = 0; x < width - 1; x += 2) {
- uint8 b0 = src_argb1555[0] & 0x1f;
- uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
- uint8 r0 = (src_argb1555[1] & 0x7c) >> 2;
- uint8 b1 = src_argb1555[2] & 0x1f;
- uint8 g1 = (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3);
- uint8 r1 = (src_argb1555[3] & 0x7c) >> 2;
- uint8 b2 = next_argb1555[0] & 0x1f;
- uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
- uint8 r2 = (next_argb1555[1] & 0x7c) >> 2;
- uint8 b3 = next_argb1555[2] & 0x1f;
- uint8 g3 = (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3);
- uint8 r3 = (next_argb1555[3] & 0x7c) >> 2;
- uint8 b = (b0 + b1 + b2 + b3); // 555 * 4 = 777.
- uint8 g = (g0 + g1 + g2 + g3);
- uint8 r = (r0 + r1 + r2 + r3);
- b = (b << 1) | (b >> 6); // 777 -> 888.
- g = (g << 1) | (g >> 6);
- r = (r << 1) | (r >> 6);
- dst_u[0] = RGBToU(r, g, b);
- dst_v[0] = RGBToV(r, g, b);
- src_argb1555 += 4;
- next_argb1555 += 4;
- dst_u += 1;
- dst_v += 1;
- }
- if (width & 1) {
- uint8 b0 = src_argb1555[0] & 0x1f;
- uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
- uint8 r0 = (src_argb1555[1] & 0x7c) >> 2;
- uint8 b2 = next_argb1555[0] & 0x1f;
- uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
- uint8 r2 = next_argb1555[1] >> 3;
- uint8 b = (b0 + b2); // 555 * 2 = 666.
- uint8 g = (g0 + g2);
- uint8 r = (r0 + r2);
- b = (b << 2) | (b >> 4); // 666 -> 888.
- g = (g << 2) | (g >> 4);
- r = (r << 2) | (r >> 4);
- dst_u[0] = RGBToU(r, g, b);
- dst_v[0] = RGBToV(r, g, b);
- }
-}
-
-void ARGB4444ToUVRow_C(const uint8* src_argb4444, int src_stride_argb4444,
- uint8* dst_u, uint8* dst_v, int width) {
- const uint8* next_argb4444 = src_argb4444 + src_stride_argb4444;
- int x;
- for (x = 0; x < width - 1; x += 2) {
- uint8 b0 = src_argb4444[0] & 0x0f;
- uint8 g0 = src_argb4444[0] >> 4;
- uint8 r0 = src_argb4444[1] & 0x0f;
- uint8 b1 = src_argb4444[2] & 0x0f;
- uint8 g1 = src_argb4444[2] >> 4;
- uint8 r1 = src_argb4444[3] & 0x0f;
- uint8 b2 = next_argb4444[0] & 0x0f;
- uint8 g2 = next_argb4444[0] >> 4;
- uint8 r2 = next_argb4444[1] & 0x0f;
- uint8 b3 = next_argb4444[2] & 0x0f;
- uint8 g3 = next_argb4444[2] >> 4;
- uint8 r3 = next_argb4444[3] & 0x0f;
- uint8 b = (b0 + b1 + b2 + b3); // 444 * 4 = 666.
- uint8 g = (g0 + g1 + g2 + g3);
- uint8 r = (r0 + r1 + r2 + r3);
- b = (b << 2) | (b >> 4); // 666 -> 888.
- g = (g << 2) | (g >> 4);
- r = (r << 2) | (r >> 4);
- dst_u[0] = RGBToU(r, g, b);
- dst_v[0] = RGBToV(r, g, b);
- src_argb4444 += 4;
- next_argb4444 += 4;
- dst_u += 1;
- dst_v += 1;
- }
- if (width & 1) {
- uint8 b0 = src_argb4444[0] & 0x0f;
- uint8 g0 = src_argb4444[0] >> 4;
- uint8 r0 = src_argb4444[1] & 0x0f;
- uint8 b2 = next_argb4444[0] & 0x0f;
- uint8 g2 = next_argb4444[0] >> 4;
- uint8 r2 = next_argb4444[1] & 0x0f;
- uint8 b = (b0 + b2); // 444 * 2 = 555.
- uint8 g = (g0 + g2);
- uint8 r = (r0 + r2);
- b = (b << 3) | (b >> 2); // 555 -> 888.
- g = (g << 3) | (g >> 2);
- r = (r << 3) | (r >> 2);
- dst_u[0] = RGBToU(r, g, b);
- dst_v[0] = RGBToV(r, g, b);
- }
-}
-
-void ARGBToUV444Row_C(const uint8* src_argb,
- uint8* dst_u, uint8* dst_v, int width) {
- int x;
- for (x = 0; x < width; ++x) {
- uint8 ab = src_argb[0];
- uint8 ag = src_argb[1];
- uint8 ar = src_argb[2];
- dst_u[0] = RGBToU(ar, ag, ab);
- dst_v[0] = RGBToV(ar, ag, ab);
- src_argb += 4;
- dst_u += 1;
- dst_v += 1;
- }
-}
-
-void ARGBToUV422Row_C(const uint8* src_argb,
- uint8* dst_u, uint8* dst_v, int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- uint8 ab = (src_argb[0] + src_argb[4]) >> 1;
- uint8 ag = (src_argb[1] + src_argb[5]) >> 1;
- uint8 ar = (src_argb[2] + src_argb[6]) >> 1;
- dst_u[0] = RGBToU(ar, ag, ab);
- dst_v[0] = RGBToV(ar, ag, ab);
- src_argb += 8;
- dst_u += 1;
- dst_v += 1;
- }
- if (width & 1) {
- uint8 ab = src_argb[0];
- uint8 ag = src_argb[1];
- uint8 ar = src_argb[2];
- dst_u[0] = RGBToU(ar, ag, ab);
- dst_v[0] = RGBToV(ar, ag, ab);
- }
-}
-
-void ARGBToUV411Row_C(const uint8* src_argb,
- uint8* dst_u, uint8* dst_v, int width) {
- int x;
- for (x = 0; x < width - 3; x += 4) {
- uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8] + src_argb[12]) >> 2;
- uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9] + src_argb[13]) >> 2;
- uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10] + src_argb[14]) >> 2;
- dst_u[0] = RGBToU(ar, ag, ab);
- dst_v[0] = RGBToV(ar, ag, ab);
- src_argb += 16;
- dst_u += 1;
- dst_v += 1;
- }
- if ((width & 3) == 3) {
- uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8]) / 3;
- uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9]) / 3;
- uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10]) / 3;
- dst_u[0] = RGBToU(ar, ag, ab);
- dst_v[0] = RGBToV(ar, ag, ab);
- } else if ((width & 3) == 2) {
- uint8 ab = (src_argb[0] + src_argb[4]) >> 1;
- uint8 ag = (src_argb[1] + src_argb[5]) >> 1;
- uint8 ar = (src_argb[2] + src_argb[6]) >> 1;
- dst_u[0] = RGBToU(ar, ag, ab);
- dst_v[0] = RGBToV(ar, ag, ab);
- } else if ((width & 3) == 1) {
- uint8 ab = src_argb[0];
- uint8 ag = src_argb[1];
- uint8 ar = src_argb[2];
- dst_u[0] = RGBToU(ar, ag, ab);
- dst_v[0] = RGBToV(ar, ag, ab);
- }
-}
-
-void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
- int x;
- for (x = 0; x < width; ++x) {
- uint8 y = RGBToYJ(src_argb[2], src_argb[1], src_argb[0]);
- dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
- dst_argb[3] = src_argb[3];
- dst_argb += 4;
- src_argb += 4;
- }
-}
-
-// Convert a row of image to Sepia tone.
-void ARGBSepiaRow_C(uint8* dst_argb, int width) {
- int x;
- for (x = 0; x < width; ++x) {
- int b = dst_argb[0];
- int g = dst_argb[1];
- int r = dst_argb[2];
- int sb = (b * 17 + g * 68 + r * 35) >> 7;
- int sg = (b * 22 + g * 88 + r * 45) >> 7;
- int sr = (b * 24 + g * 98 + r * 50) >> 7;
- // b does not over flow. a is preserved from original.
- dst_argb[0] = sb;
- dst_argb[1] = clamp255(sg);
- dst_argb[2] = clamp255(sr);
- dst_argb += 4;
- }
-}
-
-// Apply color matrix to a row of image. Matrix is signed.
-// TODO(fbarchard): Consider adding rounding (+32).
-void ARGBColorMatrixRow_C(const uint8* src_argb, uint8* dst_argb,
- const int8* matrix_argb, int width) {
- int x;
- for (x = 0; x < width; ++x) {
- int b = src_argb[0];
- int g = src_argb[1];
- int r = src_argb[2];
- int a = src_argb[3];
- int sb = (b * matrix_argb[0] + g * matrix_argb[1] +
- r * matrix_argb[2] + a * matrix_argb[3]) >> 6;
- int sg = (b * matrix_argb[4] + g * matrix_argb[5] +
- r * matrix_argb[6] + a * matrix_argb[7]) >> 6;
- int sr = (b * matrix_argb[8] + g * matrix_argb[9] +
- r * matrix_argb[10] + a * matrix_argb[11]) >> 6;
- int sa = (b * matrix_argb[12] + g * matrix_argb[13] +
- r * matrix_argb[14] + a * matrix_argb[15]) >> 6;
- dst_argb[0] = Clamp(sb);
- dst_argb[1] = Clamp(sg);
- dst_argb[2] = Clamp(sr);
- dst_argb[3] = Clamp(sa);
- src_argb += 4;
- dst_argb += 4;
- }
-}
-
-// Apply color table to a row of image.
-void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) {
- int x;
- for (x = 0; x < width; ++x) {
- int b = dst_argb[0];
- int g = dst_argb[1];
- int r = dst_argb[2];
- int a = dst_argb[3];
- dst_argb[0] = table_argb[b * 4 + 0];
- dst_argb[1] = table_argb[g * 4 + 1];
- dst_argb[2] = table_argb[r * 4 + 2];
- dst_argb[3] = table_argb[a * 4 + 3];
- dst_argb += 4;
- }
-}
-
-// Apply color table to a row of image.
-void RGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) {
- int x;
- for (x = 0; x < width; ++x) {
- int b = dst_argb[0];
- int g = dst_argb[1];
- int r = dst_argb[2];
- dst_argb[0] = table_argb[b * 4 + 0];
- dst_argb[1] = table_argb[g * 4 + 1];
- dst_argb[2] = table_argb[r * 4 + 2];
- dst_argb += 4;
- }
-}
-
-void ARGBQuantizeRow_C(uint8* dst_argb, int scale, int interval_size,
- int interval_offset, int width) {
- int x;
- for (x = 0; x < width; ++x) {
- int b = dst_argb[0];
- int g = dst_argb[1];
- int r = dst_argb[2];
- dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
- dst_argb[1] = (g * scale >> 16) * interval_size + interval_offset;
- dst_argb[2] = (r * scale >> 16) * interval_size + interval_offset;
- dst_argb += 4;
- }
-}
-
-#define REPEAT8(v) (v) | ((v) << 8)
-#define SHADE(f, v) v * f >> 24
-
-void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width,
- uint32 value) {
- const uint32 b_scale = REPEAT8(value & 0xff);
- const uint32 g_scale = REPEAT8((value >> 8) & 0xff);
- const uint32 r_scale = REPEAT8((value >> 16) & 0xff);
- const uint32 a_scale = REPEAT8(value >> 24);
-
- int i;
- for (i = 0; i < width; ++i) {
- const uint32 b = REPEAT8(src_argb[0]);
- const uint32 g = REPEAT8(src_argb[1]);
- const uint32 r = REPEAT8(src_argb[2]);
- const uint32 a = REPEAT8(src_argb[3]);
- dst_argb[0] = SHADE(b, b_scale);
- dst_argb[1] = SHADE(g, g_scale);
- dst_argb[2] = SHADE(r, r_scale);
- dst_argb[3] = SHADE(a, a_scale);
- src_argb += 4;
- dst_argb += 4;
- }
-}
-#undef REPEAT8
-#undef SHADE
-
-#define REPEAT8(v) (v) | ((v) << 8)
-#define SHADE(f, v) v * f >> 16
-
-void ARGBMultiplyRow_C(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
- int i;
- for (i = 0; i < width; ++i) {
- const uint32 b = REPEAT8(src_argb0[0]);
- const uint32 g = REPEAT8(src_argb0[1]);
- const uint32 r = REPEAT8(src_argb0[2]);
- const uint32 a = REPEAT8(src_argb0[3]);
- const uint32 b_scale = src_argb1[0];
- const uint32 g_scale = src_argb1[1];
- const uint32 r_scale = src_argb1[2];
- const uint32 a_scale = src_argb1[3];
- dst_argb[0] = SHADE(b, b_scale);
- dst_argb[1] = SHADE(g, g_scale);
- dst_argb[2] = SHADE(r, r_scale);
- dst_argb[3] = SHADE(a, a_scale);
- src_argb0 += 4;
- src_argb1 += 4;
- dst_argb += 4;
- }
-}
-#undef REPEAT8
-#undef SHADE
-
-#define SHADE(f, v) clamp255(v + f)
-
-void ARGBAddRow_C(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
- int i;
- for (i = 0; i < width; ++i) {
- const int b = src_argb0[0];
- const int g = src_argb0[1];
- const int r = src_argb0[2];
- const int a = src_argb0[3];
- const int b_add = src_argb1[0];
- const int g_add = src_argb1[1];
- const int r_add = src_argb1[2];
- const int a_add = src_argb1[3];
- dst_argb[0] = SHADE(b, b_add);
- dst_argb[1] = SHADE(g, g_add);
- dst_argb[2] = SHADE(r, r_add);
- dst_argb[3] = SHADE(a, a_add);
- src_argb0 += 4;
- src_argb1 += 4;
- dst_argb += 4;
- }
-}
-#undef SHADE
-
-#define SHADE(f, v) clamp0(f - v)
-
-void ARGBSubtractRow_C(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
- int i;
- for (i = 0; i < width; ++i) {
- const int b = src_argb0[0];
- const int g = src_argb0[1];
- const int r = src_argb0[2];
- const int a = src_argb0[3];
- const int b_sub = src_argb1[0];
- const int g_sub = src_argb1[1];
- const int r_sub = src_argb1[2];
- const int a_sub = src_argb1[3];
- dst_argb[0] = SHADE(b, b_sub);
- dst_argb[1] = SHADE(g, g_sub);
- dst_argb[2] = SHADE(r, r_sub);
- dst_argb[3] = SHADE(a, a_sub);
- src_argb0 += 4;
- src_argb1 += 4;
- dst_argb += 4;
- }
-}
-#undef SHADE
-
-// Sobel functions which mimics SSSE3.
-void SobelXRow_C(const uint8* src_y0, const uint8* src_y1, const uint8* src_y2,
- uint8* dst_sobelx, int width) {
- int i;
- for (i = 0; i < width; ++i) {
- int a = src_y0[i];
- int b = src_y1[i];
- int c = src_y2[i];
- int a_sub = src_y0[i + 2];
- int b_sub = src_y1[i + 2];
- int c_sub = src_y2[i + 2];
- int a_diff = a - a_sub;
- int b_diff = b - b_sub;
- int c_diff = c - c_sub;
- int sobel = Abs(a_diff + b_diff * 2 + c_diff);
- dst_sobelx[i] = (uint8)(clamp255(sobel));
- }
-}
-
-void SobelYRow_C(const uint8* src_y0, const uint8* src_y1,
- uint8* dst_sobely, int width) {
- int i;
- for (i = 0; i < width; ++i) {
- int a = src_y0[i + 0];
- int b = src_y0[i + 1];
- int c = src_y0[i + 2];
- int a_sub = src_y1[i + 0];
- int b_sub = src_y1[i + 1];
- int c_sub = src_y1[i + 2];
- int a_diff = a - a_sub;
- int b_diff = b - b_sub;
- int c_diff = c - c_sub;
- int sobel = Abs(a_diff + b_diff * 2 + c_diff);
- dst_sobely[i] = (uint8)(clamp255(sobel));
- }
-}
-
-void SobelRow_C(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_argb, int width) {
- int i;
- for (i = 0; i < width; ++i) {
- int r = src_sobelx[i];
- int b = src_sobely[i];
- int s = clamp255(r + b);
- dst_argb[0] = (uint8)(s);
- dst_argb[1] = (uint8)(s);
- dst_argb[2] = (uint8)(s);
- dst_argb[3] = (uint8)(255u);
- dst_argb += 4;
- }
-}
-
-void SobelToPlaneRow_C(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_y, int width) {
- int i;
- for (i = 0; i < width; ++i) {
- int r = src_sobelx[i];
- int b = src_sobely[i];
- int s = clamp255(r + b);
- dst_y[i] = (uint8)(s);
- }
-}
-
-void SobelXYRow_C(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_argb, int width) {
- int i;
- for (i = 0; i < width; ++i) {
- int r = src_sobelx[i];
- int b = src_sobely[i];
- int g = clamp255(r + b);
- dst_argb[0] = (uint8)(b);
- dst_argb[1] = (uint8)(g);
- dst_argb[2] = (uint8)(r);
- dst_argb[3] = (uint8)(255u);
- dst_argb += 4;
- }
-}
-
-void J400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width) {
- // Copy a Y to RGB.
- int x;
- for (x = 0; x < width; ++x) {
- uint8 y = src_y[0];
- dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
- dst_argb[3] = 255u;
- dst_argb += 4;
- ++src_y;
- }
-}
-
-// BT.601 YUV to RGB reference
-// R = (Y - 16) * 1.164 - V * -1.596
-// G = (Y - 16) * 1.164 - U * 0.391 - V * 0.813
-// B = (Y - 16) * 1.164 - U * -2.018
-
-// Y contribution to R,G,B. Scale and bias.
-// TODO(fbarchard): Consider moving constants into a common header.
-#define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
-#define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
-
-// U and V contributions to R,G,B.
-#define UB -128 /* max(-128, round(-2.018 * 64)) */
-#define UG 25 /* round(0.391 * 64) */
-#define VG 52 /* round(0.813 * 64) */
-#define VR -102 /* round(-1.596 * 64) */
-
-// Bias values to subtract 16 from Y and 128 from U and V.
-#define BB (UB * 128 + YGB)
-#define BG (UG * 128 + VG * 128 + YGB)
-#define BR (VR * 128 + YGB)
-
-// C reference code that mimics the YUV assembly.
-static __inline void YuvPixel(uint8 y, uint8 u, uint8 v,
- uint8* b, uint8* g, uint8* r) {
- uint32 y1 = (uint32)(y * 0x0101 * YG) >> 16;
- *b = Clamp((int32)(-(u * UB) + y1 + BB) >> 6);
- *g = Clamp((int32)(-(v * VG + u * UG) + y1 + BG) >> 6);
- *r = Clamp((int32)(-(v * VR)+ y1 + BR) >> 6);
-}
-
-// C reference code that mimics the YUV assembly.
-static __inline void YPixel(uint8 y, uint8* b, uint8* g, uint8* r) {
- uint32 y1 = (uint32)(y * 0x0101 * YG) >> 16;
- *b = Clamp((int32)(y1 + YGB) >> 6);
- *g = Clamp((int32)(y1 + YGB) >> 6);
- *r = Clamp((int32)(y1 + YGB) >> 6);
-}
-
-#undef YG
-#undef YGB
-#undef UB
-#undef UG
-#undef VG
-#undef VR
-#undef BB
-#undef BG
-#undef BR
-
-// JPEG YUV to RGB reference
-// * R = Y - V * -1.40200
-// * G = Y - U * 0.34414 - V * 0.71414
-// * B = Y - U * -1.77200
-
-// Y contribution to R,G,B. Scale and bias.
-// TODO(fbarchard): Consider moving constants into a common header.
-#define YGJ 16320 /* round(1.000 * 64 * 256 * 256 / 257) */
-#define YGBJ 32 /* 64 / 2 */
-
-// U and V contributions to R,G,B.
-#define UBJ -113 /* round(-1.77200 * 64) */
-#define UGJ 22 /* round(0.34414 * 64) */
-#define VGJ 46 /* round(0.71414 * 64) */
-#define VRJ -90 /* round(-1.40200 * 64) */
-
-// Bias values to subtract 16 from Y and 128 from U and V.
-#define BBJ (UBJ * 128 + YGBJ)
-#define BGJ (UGJ * 128 + VGJ * 128 + YGBJ)
-#define BRJ (VRJ * 128 + YGBJ)
-
-// C reference code that mimics the YUV assembly.
-static __inline void YuvJPixel(uint8 y, uint8 u, uint8 v,
- uint8* b, uint8* g, uint8* r) {
- uint32 y1 = (uint32)(y * 0x0101 * YGJ) >> 16;
- *b = Clamp((int32)(-(u * UBJ) + y1 + BBJ) >> 6);
- *g = Clamp((int32)(-(v * VGJ + u * UGJ) + y1 + BGJ) >> 6);
- *r = Clamp((int32)(-(v * VRJ) + y1 + BRJ) >> 6);
-}
-
-#undef YGJ
-#undef YGBJ
-#undef UBJ
-#undef UGJ
-#undef VGJ
-#undef VRJ
-#undef BBJ
-#undef BGJ
-#undef BRJ
-
-#if !defined(LIBYUV_DISABLE_NEON) && \
- (defined(__ARM_NEON__) || defined(__aarch64__) || defined(LIBYUV_NEON))
-// C mimic assembly.
-// TODO(fbarchard): Remove subsampling from Neon.
-void I444ToARGBRow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* rgb_buf,
- int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- uint8 u = (src_u[0] + src_u[1] + 1) >> 1;
- uint8 v = (src_v[0] + src_v[1] + 1) >> 1;
- YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
- rgb_buf[3] = 255;
- YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
- rgb_buf[7] = 255;
- src_y += 2;
- src_u += 2;
- src_v += 2;
- rgb_buf += 8; // Advance 2 pixels.
- }
- if (width & 1) {
- YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
- }
-}
-#else
-void I444ToARGBRow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* rgb_buf,
- int width) {
- int x;
- for (x = 0; x < width; ++x) {
- YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
- rgb_buf[3] = 255;
- src_y += 1;
- src_u += 1;
- src_v += 1;
- rgb_buf += 4; // Advance 1 pixel.
- }
-}
-#endif
-
-// Also used for 420
-void I422ToARGBRow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* rgb_buf,
- int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
- rgb_buf[3] = 255;
- YuvPixel(src_y[1], src_u[0], src_v[0],
- rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
- rgb_buf[7] = 255;
- src_y += 2;
- src_u += 1;
- src_v += 1;
- rgb_buf += 8; // Advance 2 pixels.
- }
- if (width & 1) {
- YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
- rgb_buf[3] = 255;
- }
-}
-
-void J422ToARGBRow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* rgb_buf,
- int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- YuvJPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
- rgb_buf[3] = 255;
- YuvJPixel(src_y[1], src_u[0], src_v[0],
- rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
- rgb_buf[7] = 255;
- src_y += 2;
- src_u += 1;
- src_v += 1;
- rgb_buf += 8; // Advance 2 pixels.
- }
- if (width & 1) {
- YuvJPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
- rgb_buf[3] = 255;
- }
-}
-
-void I422ToRGB24Row_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* rgb_buf,
- int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
- YuvPixel(src_y[1], src_u[0], src_v[0],
- rgb_buf + 3, rgb_buf + 4, rgb_buf + 5);
- src_y += 2;
- src_u += 1;
- src_v += 1;
- rgb_buf += 6; // Advance 2 pixels.
- }
- if (width & 1) {
- YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
- }
-}
-
-void I422ToRAWRow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* rgb_buf,
- int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
- YuvPixel(src_y[1], src_u[0], src_v[0],
- rgb_buf + 5, rgb_buf + 4, rgb_buf + 3);
- src_y += 2;
- src_u += 1;
- src_v += 1;
- rgb_buf += 6; // Advance 2 pixels.
- }
- if (width & 1) {
- YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
- }
-}
-
-void I422ToARGB4444Row_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb4444,
- int width) {
- uint8 b0;
- uint8 g0;
- uint8 r0;
- uint8 b1;
- uint8 g1;
- uint8 r1;
- int x;
- for (x = 0; x < width - 1; x += 2) {
- YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
- YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
- b0 = b0 >> 4;
- g0 = g0 >> 4;
- r0 = r0 >> 4;
- b1 = b1 >> 4;
- g1 = g1 >> 4;
- r1 = r1 >> 4;
- *(uint32*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) |
- (b1 << 16) | (g1 << 20) | (r1 << 24) | 0xf000f000;
- src_y += 2;
- src_u += 1;
- src_v += 1;
- dst_argb4444 += 4; // Advance 2 pixels.
- }
- if (width & 1) {
- YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
- b0 = b0 >> 4;
- g0 = g0 >> 4;
- r0 = r0 >> 4;
- *(uint16*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) |
- 0xf000;
- }
-}
-
-void I422ToARGB1555Row_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb1555,
- int width) {
- uint8 b0;
- uint8 g0;
- uint8 r0;
- uint8 b1;
- uint8 g1;
- uint8 r1;
- int x;
- for (x = 0; x < width - 1; x += 2) {
- YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
- YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
- b0 = b0 >> 3;
- g0 = g0 >> 3;
- r0 = r0 >> 3;
- b1 = b1 >> 3;
- g1 = g1 >> 3;
- r1 = r1 >> 3;
- *(uint32*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) |
- (b1 << 16) | (g1 << 21) | (r1 << 26) | 0x80008000;
- src_y += 2;
- src_u += 1;
- src_v += 1;
- dst_argb1555 += 4; // Advance 2 pixels.
- }
- if (width & 1) {
- YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
- b0 = b0 >> 3;
- g0 = g0 >> 3;
- r0 = r0 >> 3;
- *(uint16*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) |
- 0x8000;
- }
-}
-
-void I422ToRGB565Row_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgb565,
- int width) {
- uint8 b0;
- uint8 g0;
- uint8 r0;
- uint8 b1;
- uint8 g1;
- uint8 r1;
- int x;
- for (x = 0; x < width - 1; x += 2) {
- YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
- YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
- b0 = b0 >> 3;
- g0 = g0 >> 2;
- r0 = r0 >> 3;
- b1 = b1 >> 3;
- g1 = g1 >> 2;
- r1 = r1 >> 3;
- *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
- (b1 << 16) | (g1 << 21) | (r1 << 27);
- src_y += 2;
- src_u += 1;
- src_v += 1;
- dst_rgb565 += 4; // Advance 2 pixels.
- }
- if (width & 1) {
- YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
- b0 = b0 >> 3;
- g0 = g0 >> 2;
- r0 = r0 >> 3;
- *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
- }
-}
-
-void I411ToARGBRow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* rgb_buf,
- int width) {
- int x;
- for (x = 0; x < width - 3; x += 4) {
- YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
- rgb_buf[3] = 255;
- YuvPixel(src_y[1], src_u[0], src_v[0],
- rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
- rgb_buf[7] = 255;
- YuvPixel(src_y[2], src_u[0], src_v[0],
- rgb_buf + 8, rgb_buf + 9, rgb_buf + 10);
- rgb_buf[11] = 255;
- YuvPixel(src_y[3], src_u[0], src_v[0],
- rgb_buf + 12, rgb_buf + 13, rgb_buf + 14);
- rgb_buf[15] = 255;
- src_y += 4;
- src_u += 1;
- src_v += 1;
- rgb_buf += 16; // Advance 4 pixels.
- }
- if (width & 2) {
- YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
- rgb_buf[3] = 255;
- YuvPixel(src_y[1], src_u[0], src_v[0],
- rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
- rgb_buf[7] = 255;
- src_y += 2;
- rgb_buf += 8; // Advance 2 pixels.
- }
- if (width & 1) {
- YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
- rgb_buf[3] = 255;
- }
-}
-
-void NV12ToARGBRow_C(const uint8* src_y,
- const uint8* src_uv,
- uint8* rgb_buf,
- int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- YuvPixel(src_y[0], src_uv[0], src_uv[1],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
- rgb_buf[3] = 255;
- YuvPixel(src_y[1], src_uv[0], src_uv[1],
- rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
- rgb_buf[7] = 255;
- src_y += 2;
- src_uv += 2;
- rgb_buf += 8; // Advance 2 pixels.
- }
- if (width & 1) {
- YuvPixel(src_y[0], src_uv[0], src_uv[1],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
- rgb_buf[3] = 255;
- }
-}
-
-void NV21ToARGBRow_C(const uint8* src_y,
- const uint8* src_vu,
- uint8* rgb_buf,
- int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- YuvPixel(src_y[0], src_vu[1], src_vu[0],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
- rgb_buf[3] = 255;
-
- YuvPixel(src_y[1], src_vu[1], src_vu[0],
- rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
- rgb_buf[7] = 255;
-
- src_y += 2;
- src_vu += 2;
- rgb_buf += 8; // Advance 2 pixels.
- }
- if (width & 1) {
- YuvPixel(src_y[0], src_vu[1], src_vu[0],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
- rgb_buf[3] = 255;
- }
-}
-
-void NV12ToRGB565Row_C(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_rgb565,
- int width) {
- uint8 b0;
- uint8 g0;
- uint8 r0;
- uint8 b1;
- uint8 g1;
- uint8 r1;
- int x;
- for (x = 0; x < width - 1; x += 2) {
- YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0);
- YuvPixel(src_y[1], src_uv[0], src_uv[1], &b1, &g1, &r1);
- b0 = b0 >> 3;
- g0 = g0 >> 2;
- r0 = r0 >> 3;
- b1 = b1 >> 3;
- g1 = g1 >> 2;
- r1 = r1 >> 3;
- *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
- (b1 << 16) | (g1 << 21) | (r1 << 27);
- src_y += 2;
- src_uv += 2;
- dst_rgb565 += 4; // Advance 2 pixels.
- }
- if (width & 1) {
- YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0);
- b0 = b0 >> 3;
- g0 = g0 >> 2;
- r0 = r0 >> 3;
- *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
- }
-}
-
-void NV21ToRGB565Row_C(const uint8* src_y,
- const uint8* vsrc_u,
- uint8* dst_rgb565,
- int width) {
- uint8 b0;
- uint8 g0;
- uint8 r0;
- uint8 b1;
- uint8 g1;
- uint8 r1;
- int x;
- for (x = 0; x < width - 1; x += 2) {
- YuvPixel(src_y[0], vsrc_u[1], vsrc_u[0], &b0, &g0, &r0);
- YuvPixel(src_y[1], vsrc_u[1], vsrc_u[0], &b1, &g1, &r1);
- b0 = b0 >> 3;
- g0 = g0 >> 2;
- r0 = r0 >> 3;
- b1 = b1 >> 3;
- g1 = g1 >> 2;
- r1 = r1 >> 3;
- *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
- (b1 << 16) | (g1 << 21) | (r1 << 27);
- src_y += 2;
- vsrc_u += 2;
- dst_rgb565 += 4; // Advance 2 pixels.
- }
- if (width & 1) {
- YuvPixel(src_y[0], vsrc_u[1], vsrc_u[0], &b0, &g0, &r0);
- b0 = b0 >> 3;
- g0 = g0 >> 2;
- r0 = r0 >> 3;
- *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
- }
-}
-
-void YUY2ToARGBRow_C(const uint8* src_yuy2,
- uint8* rgb_buf,
- int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
- rgb_buf[3] = 255;
- YuvPixel(src_yuy2[2], src_yuy2[1], src_yuy2[3],
- rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
- rgb_buf[7] = 255;
- src_yuy2 += 4;
- rgb_buf += 8; // Advance 2 pixels.
- }
- if (width & 1) {
- YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
- rgb_buf[3] = 255;
- }
-}
-
-void UYVYToARGBRow_C(const uint8* src_uyvy,
- uint8* rgb_buf,
- int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
- rgb_buf[3] = 255;
- YuvPixel(src_uyvy[3], src_uyvy[0], src_uyvy[2],
- rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
- rgb_buf[7] = 255;
- src_uyvy += 4;
- rgb_buf += 8; // Advance 2 pixels.
- }
- if (width & 1) {
- YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
- rgb_buf[3] = 255;
- }
-}
-
-void I422ToBGRARow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* rgb_buf,
- int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 3, rgb_buf + 2, rgb_buf + 1);
- rgb_buf[0] = 255;
- YuvPixel(src_y[1], src_u[0], src_v[0],
- rgb_buf + 7, rgb_buf + 6, rgb_buf + 5);
- rgb_buf[4] = 255;
- src_y += 2;
- src_u += 1;
- src_v += 1;
- rgb_buf += 8; // Advance 2 pixels.
- }
- if (width & 1) {
- YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 3, rgb_buf + 2, rgb_buf + 1);
- rgb_buf[0] = 255;
- }
-}
-
-void I422ToABGRRow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* rgb_buf,
- int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
- rgb_buf[3] = 255;
- YuvPixel(src_y[1], src_u[0], src_v[0],
- rgb_buf + 6, rgb_buf + 5, rgb_buf + 4);
- rgb_buf[7] = 255;
- src_y += 2;
- src_u += 1;
- src_v += 1;
- rgb_buf += 8; // Advance 2 pixels.
- }
- if (width & 1) {
- YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
- rgb_buf[3] = 255;
- }
-}
-
-void I422ToRGBARow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* rgb_buf,
- int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 1, rgb_buf + 2, rgb_buf + 3);
- rgb_buf[0] = 255;
- YuvPixel(src_y[1], src_u[0], src_v[0],
- rgb_buf + 5, rgb_buf + 6, rgb_buf + 7);
- rgb_buf[4] = 255;
- src_y += 2;
- src_u += 1;
- src_v += 1;
- rgb_buf += 8; // Advance 2 pixels.
- }
- if (width & 1) {
- YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 1, rgb_buf + 2, rgb_buf + 3);
- rgb_buf[0] = 255;
- }
-}
-
-void I400ToARGBRow_C(const uint8* src_y, uint8* rgb_buf, int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
- rgb_buf[3] = 255;
- YPixel(src_y[1], rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
- rgb_buf[7] = 255;
- src_y += 2;
- rgb_buf += 8; // Advance 2 pixels.
- }
- if (width & 1) {
- YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
- rgb_buf[3] = 255;
- }
-}
-
-void MirrorRow_C(const uint8* src, uint8* dst, int width) {
- int x;
- src += width - 1;
- for (x = 0; x < width - 1; x += 2) {
- dst[x] = src[0];
- dst[x + 1] = src[-1];
- src -= 2;
- }
- if (width & 1) {
- dst[width - 1] = src[0];
- }
-}
-
-void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
- int x;
- src_uv += (width - 1) << 1;
- for (x = 0; x < width - 1; x += 2) {
- dst_u[x] = src_uv[0];
- dst_u[x + 1] = src_uv[-2];
- dst_v[x] = src_uv[1];
- dst_v[x + 1] = src_uv[-2 + 1];
- src_uv -= 4;
- }
- if (width & 1) {
- dst_u[width - 1] = src_uv[0];
- dst_v[width - 1] = src_uv[1];
- }
-}
-
-void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width) {
- int x;
- const uint32* src32 = (const uint32*)(src);
- uint32* dst32 = (uint32*)(dst);
- src32 += width - 1;
- for (x = 0; x < width - 1; x += 2) {
- dst32[x] = src32[0];
- dst32[x + 1] = src32[-1];
- src32 -= 2;
- }
- if (width & 1) {
- dst32[width - 1] = src32[0];
- }
-}
-
-void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- dst_u[x] = src_uv[0];
- dst_u[x + 1] = src_uv[2];
- dst_v[x] = src_uv[1];
- dst_v[x + 1] = src_uv[3];
- src_uv += 4;
- }
- if (width & 1) {
- dst_u[width - 1] = src_uv[0];
- dst_v[width - 1] = src_uv[1];
- }
-}
-
-void MergeUVRow_C(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
- int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- dst_uv[0] = src_u[x];
- dst_uv[1] = src_v[x];
- dst_uv[2] = src_u[x + 1];
- dst_uv[3] = src_v[x + 1];
- dst_uv += 4;
- }
- if (width & 1) {
- dst_uv[0] = src_u[width - 1];
- dst_uv[1] = src_v[width - 1];
- }
-}
-
-void CopyRow_C(const uint8* src, uint8* dst, int count) {
- memcpy(dst, src, count);
-}
-
-void CopyRow_16_C(const uint16* src, uint16* dst, int count) {
- memcpy(dst, src, count * 2);
-}
-
-void SetRow_C(uint8* dst, uint8 v8, int width) {
- memset(dst, v8, width);
-}
-
-void ARGBSetRow_C(uint8* dst_argb, uint32 v32, int width) {
- uint32* d = (uint32*)(dst_argb);
- int x;
- for (x = 0; x < width; ++x) {
- d[x] = v32;
- }
-}
-
-// Filter 2 rows of YUY2 UV's (422) into U and V (420).
-void YUY2ToUVRow_C(const uint8* src_yuy2, int src_stride_yuy2,
- uint8* dst_u, uint8* dst_v, int width) {
- // Output a row of UV values, filtering 2 rows of YUY2.
- int x;
- for (x = 0; x < width; x += 2) {
- dst_u[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1;
- dst_v[0] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1;
- src_yuy2 += 4;
- dst_u += 1;
- dst_v += 1;
- }
-}
-
-// Copy row of YUY2 UV's (422) into U and V (422).
-void YUY2ToUV422Row_C(const uint8* src_yuy2,
- uint8* dst_u, uint8* dst_v, int width) {
- // Output a row of UV values.
- int x;
- for (x = 0; x < width; x += 2) {
- dst_u[0] = src_yuy2[1];
- dst_v[0] = src_yuy2[3];
- src_yuy2 += 4;
- dst_u += 1;
- dst_v += 1;
- }
-}
-
-// Copy row of YUY2 Y's (422) into Y (420/422).
-void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int width) {
- // Output a row of Y values.
- int x;
- for (x = 0; x < width - 1; x += 2) {
- dst_y[x] = src_yuy2[0];
- dst_y[x + 1] = src_yuy2[2];
- src_yuy2 += 4;
- }
- if (width & 1) {
- dst_y[width - 1] = src_yuy2[0];
- }
-}
-
-// Filter 2 rows of UYVY UV's (422) into U and V (420).
-void UYVYToUVRow_C(const uint8* src_uyvy, int src_stride_uyvy,
- uint8* dst_u, uint8* dst_v, int width) {
- // Output a row of UV values.
- int x;
- for (x = 0; x < width; x += 2) {
- dst_u[0] = (src_uyvy[0] + src_uyvy[src_stride_uyvy + 0] + 1) >> 1;
- dst_v[0] = (src_uyvy[2] + src_uyvy[src_stride_uyvy + 2] + 1) >> 1;
- src_uyvy += 4;
- dst_u += 1;
- dst_v += 1;
- }
-}
-
-// Copy row of UYVY UV's (422) into U and V (422).
-void UYVYToUV422Row_C(const uint8* src_uyvy,
- uint8* dst_u, uint8* dst_v, int width) {
- // Output a row of UV values.
- int x;
- for (x = 0; x < width; x += 2) {
- dst_u[0] = src_uyvy[0];
- dst_v[0] = src_uyvy[2];
- src_uyvy += 4;
- dst_u += 1;
- dst_v += 1;
- }
-}
-
-// Copy row of UYVY Y's (422) into Y (420/422).
-void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int width) {
- // Output a row of Y values.
- int x;
- for (x = 0; x < width - 1; x += 2) {
- dst_y[x] = src_uyvy[1];
- dst_y[x + 1] = src_uyvy[3];
- src_uyvy += 4;
- }
- if (width & 1) {
- dst_y[width - 1] = src_uyvy[1];
- }
-}
-
-#define BLEND(f, b, a) (((256 - a) * b) >> 8) + f
-
-// Blend src_argb0 over src_argb1 and store to dst_argb.
-// dst_argb may be src_argb0 or src_argb1.
-// This code mimics the SSSE3 version for better testability.
-void ARGBBlendRow_C(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- uint32 fb = src_argb0[0];
- uint32 fg = src_argb0[1];
- uint32 fr = src_argb0[2];
- uint32 a = src_argb0[3];
- uint32 bb = src_argb1[0];
- uint32 bg = src_argb1[1];
- uint32 br = src_argb1[2];
- dst_argb[0] = BLEND(fb, bb, a);
- dst_argb[1] = BLEND(fg, bg, a);
- dst_argb[2] = BLEND(fr, br, a);
- dst_argb[3] = 255u;
-
- fb = src_argb0[4 + 0];
- fg = src_argb0[4 + 1];
- fr = src_argb0[4 + 2];
- a = src_argb0[4 + 3];
- bb = src_argb1[4 + 0];
- bg = src_argb1[4 + 1];
- br = src_argb1[4 + 2];
- dst_argb[4 + 0] = BLEND(fb, bb, a);
- dst_argb[4 + 1] = BLEND(fg, bg, a);
- dst_argb[4 + 2] = BLEND(fr, br, a);
- dst_argb[4 + 3] = 255u;
- src_argb0 += 8;
- src_argb1 += 8;
- dst_argb += 8;
- }
-
- if (width & 1) {
- uint32 fb = src_argb0[0];
- uint32 fg = src_argb0[1];
- uint32 fr = src_argb0[2];
- uint32 a = src_argb0[3];
- uint32 bb = src_argb1[0];
- uint32 bg = src_argb1[1];
- uint32 br = src_argb1[2];
- dst_argb[0] = BLEND(fb, bb, a);
- dst_argb[1] = BLEND(fg, bg, a);
- dst_argb[2] = BLEND(fr, br, a);
- dst_argb[3] = 255u;
- }
-}
-#undef BLEND
-#define ATTENUATE(f, a) (a | (a << 8)) * (f | (f << 8)) >> 24
-
-// Multiply source RGB by alpha and store to destination.
-// This code mimics the SSSE3 version for better testability.
-void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
- int i;
- for (i = 0; i < width - 1; i += 2) {
- uint32 b = src_argb[0];
- uint32 g = src_argb[1];
- uint32 r = src_argb[2];
- uint32 a = src_argb[3];
- dst_argb[0] = ATTENUATE(b, a);
- dst_argb[1] = ATTENUATE(g, a);
- dst_argb[2] = ATTENUATE(r, a);
- dst_argb[3] = a;
- b = src_argb[4];
- g = src_argb[5];
- r = src_argb[6];
- a = src_argb[7];
- dst_argb[4] = ATTENUATE(b, a);
- dst_argb[5] = ATTENUATE(g, a);
- dst_argb[6] = ATTENUATE(r, a);
- dst_argb[7] = a;
- src_argb += 8;
- dst_argb += 8;
- }
-
- if (width & 1) {
- const uint32 b = src_argb[0];
- const uint32 g = src_argb[1];
- const uint32 r = src_argb[2];
- const uint32 a = src_argb[3];
- dst_argb[0] = ATTENUATE(b, a);
- dst_argb[1] = ATTENUATE(g, a);
- dst_argb[2] = ATTENUATE(r, a);
- dst_argb[3] = a;
- }
-}
-#undef ATTENUATE
-
-// Divide source RGB by alpha and store to destination.
-// b = (b * 255 + (a / 2)) / a;
-// g = (g * 255 + (a / 2)) / a;
-// r = (r * 255 + (a / 2)) / a;
-// Reciprocal method is off by 1 on some values. ie 125
-// 8.8 fixed point inverse table with 1.0 in upper short and 1 / a in lower.
-#define T(a) 0x01000000 + (0x10000 / a)
-const uint32 fixed_invtbl8[256] = {
- 0x01000000, 0x0100ffff, T(0x02), T(0x03), T(0x04), T(0x05), T(0x06), T(0x07),
- T(0x08), T(0x09), T(0x0a), T(0x0b), T(0x0c), T(0x0d), T(0x0e), T(0x0f),
- T(0x10), T(0x11), T(0x12), T(0x13), T(0x14), T(0x15), T(0x16), T(0x17),
- T(0x18), T(0x19), T(0x1a), T(0x1b), T(0x1c), T(0x1d), T(0x1e), T(0x1f),
- T(0x20), T(0x21), T(0x22), T(0x23), T(0x24), T(0x25), T(0x26), T(0x27),
- T(0x28), T(0x29), T(0x2a), T(0x2b), T(0x2c), T(0x2d), T(0x2e), T(0x2f),
- T(0x30), T(0x31), T(0x32), T(0x33), T(0x34), T(0x35), T(0x36), T(0x37),
- T(0x38), T(0x39), T(0x3a), T(0x3b), T(0x3c), T(0x3d), T(0x3e), T(0x3f),
- T(0x40), T(0x41), T(0x42), T(0x43), T(0x44), T(0x45), T(0x46), T(0x47),
- T(0x48), T(0x49), T(0x4a), T(0x4b), T(0x4c), T(0x4d), T(0x4e), T(0x4f),
- T(0x50), T(0x51), T(0x52), T(0x53), T(0x54), T(0x55), T(0x56), T(0x57),
- T(0x58), T(0x59), T(0x5a), T(0x5b), T(0x5c), T(0x5d), T(0x5e), T(0x5f),
- T(0x60), T(0x61), T(0x62), T(0x63), T(0x64), T(0x65), T(0x66), T(0x67),
- T(0x68), T(0x69), T(0x6a), T(0x6b), T(0x6c), T(0x6d), T(0x6e), T(0x6f),
- T(0x70), T(0x71), T(0x72), T(0x73), T(0x74), T(0x75), T(0x76), T(0x77),
- T(0x78), T(0x79), T(0x7a), T(0x7b), T(0x7c), T(0x7d), T(0x7e), T(0x7f),
- T(0x80), T(0x81), T(0x82), T(0x83), T(0x84), T(0x85), T(0x86), T(0x87),
- T(0x88), T(0x89), T(0x8a), T(0x8b), T(0x8c), T(0x8d), T(0x8e), T(0x8f),
- T(0x90), T(0x91), T(0x92), T(0x93), T(0x94), T(0x95), T(0x96), T(0x97),
- T(0x98), T(0x99), T(0x9a), T(0x9b), T(0x9c), T(0x9d), T(0x9e), T(0x9f),
- T(0xa0), T(0xa1), T(0xa2), T(0xa3), T(0xa4), T(0xa5), T(0xa6), T(0xa7),
- T(0xa8), T(0xa9), T(0xaa), T(0xab), T(0xac), T(0xad), T(0xae), T(0xaf),
- T(0xb0), T(0xb1), T(0xb2), T(0xb3), T(0xb4), T(0xb5), T(0xb6), T(0xb7),
- T(0xb8), T(0xb9), T(0xba), T(0xbb), T(0xbc), T(0xbd), T(0xbe), T(0xbf),
- T(0xc0), T(0xc1), T(0xc2), T(0xc3), T(0xc4), T(0xc5), T(0xc6), T(0xc7),
- T(0xc8), T(0xc9), T(0xca), T(0xcb), T(0xcc), T(0xcd), T(0xce), T(0xcf),
- T(0xd0), T(0xd1), T(0xd2), T(0xd3), T(0xd4), T(0xd5), T(0xd6), T(0xd7),
- T(0xd8), T(0xd9), T(0xda), T(0xdb), T(0xdc), T(0xdd), T(0xde), T(0xdf),
- T(0xe0), T(0xe1), T(0xe2), T(0xe3), T(0xe4), T(0xe5), T(0xe6), T(0xe7),
- T(0xe8), T(0xe9), T(0xea), T(0xeb), T(0xec), T(0xed), T(0xee), T(0xef),
- T(0xf0), T(0xf1), T(0xf2), T(0xf3), T(0xf4), T(0xf5), T(0xf6), T(0xf7),
- T(0xf8), T(0xf9), T(0xfa), T(0xfb), T(0xfc), T(0xfd), T(0xfe), 0x01000100 };
-#undef T
-
-void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
- int i;
- for (i = 0; i < width; ++i) {
- uint32 b = src_argb[0];
- uint32 g = src_argb[1];
- uint32 r = src_argb[2];
- const uint32 a = src_argb[3];
- const uint32 ia = fixed_invtbl8[a] & 0xffff; // 8.8 fixed point
- b = (b * ia) >> 8;
- g = (g * ia) >> 8;
- r = (r * ia) >> 8;
- // Clamping should not be necessary but is free in assembly.
- dst_argb[0] = clamp255(b);
- dst_argb[1] = clamp255(g);
- dst_argb[2] = clamp255(r);
- dst_argb[3] = a;
- src_argb += 4;
- dst_argb += 4;
- }
-}
-
-void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum,
- const int32* previous_cumsum, int width) {
- int32 row_sum[4] = {0, 0, 0, 0};
- int x;
- for (x = 0; x < width; ++x) {
- row_sum[0] += row[x * 4 + 0];
- row_sum[1] += row[x * 4 + 1];
- row_sum[2] += row[x * 4 + 2];
- row_sum[3] += row[x * 4 + 3];
- cumsum[x * 4 + 0] = row_sum[0] + previous_cumsum[x * 4 + 0];
- cumsum[x * 4 + 1] = row_sum[1] + previous_cumsum[x * 4 + 1];
- cumsum[x * 4 + 2] = row_sum[2] + previous_cumsum[x * 4 + 2];
- cumsum[x * 4 + 3] = row_sum[3] + previous_cumsum[x * 4 + 3];
- }
-}
-
-void CumulativeSumToAverageRow_C(const int32* tl, const int32* bl,
- int w, int area, uint8* dst, int count) {
- float ooa = 1.0f / area;
- int i;
- for (i = 0; i < count; ++i) {
- dst[0] = (uint8)((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa);
- dst[1] = (uint8)((bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) * ooa);
- dst[2] = (uint8)((bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) * ooa);
- dst[3] = (uint8)((bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) * ooa);
- dst += 4;
- tl += 4;
- bl += 4;
- }
-}
-
-// Copy pixels from rotated source to destination row with a slope.
-LIBYUV_API
-void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride,
- uint8* dst_argb, const float* uv_dudv, int width) {
- int i;
- // Render a row of pixels from source into a buffer.
- float uv[2];
- uv[0] = uv_dudv[0];
- uv[1] = uv_dudv[1];
- for (i = 0; i < width; ++i) {
- int x = (int)(uv[0]);
- int y = (int)(uv[1]);
- *(uint32*)(dst_argb) =
- *(const uint32*)(src_argb + y * src_argb_stride +
- x * 4);
- dst_argb += 4;
- uv[0] += uv_dudv[2];
- uv[1] += uv_dudv[3];
- }
-}
-
-// Blend 2 rows into 1.
-static void HalfRow_C(const uint8* src_uv, int src_uv_stride,
- uint8* dst_uv, int pix) {
- int x;
- for (x = 0; x < pix; ++x) {
- dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
- }
-}
-
-static void HalfRow_16_C(const uint16* src_uv, int src_uv_stride,
- uint16* dst_uv, int pix) {
- int x;
- for (x = 0; x < pix; ++x) {
- dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
- }
-}
-
-// C version 2x2 -> 2x1.
-void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride,
- int width, int source_y_fraction) {
- int y1_fraction = source_y_fraction;
- int y0_fraction = 256 - y1_fraction;
- const uint8* src_ptr1 = src_ptr + src_stride;
- int x;
- if (source_y_fraction == 0) {
- memcpy(dst_ptr, src_ptr, width);
- return;
- }
- if (source_y_fraction == 128) {
- HalfRow_C(src_ptr, (int)(src_stride), dst_ptr, width);
- return;
- }
- for (x = 0; x < width - 1; x += 2) {
- dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
- dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8;
- src_ptr += 2;
- src_ptr1 += 2;
- dst_ptr += 2;
- }
- if (width & 1) {
- dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
- }
-}
-
-void InterpolateRow_16_C(uint16* dst_ptr, const uint16* src_ptr,
- ptrdiff_t src_stride,
- int width, int source_y_fraction) {
- int y1_fraction = source_y_fraction;
- int y0_fraction = 256 - y1_fraction;
- const uint16* src_ptr1 = src_ptr + src_stride;
- int x;
- if (source_y_fraction == 0) {
- memcpy(dst_ptr, src_ptr, width * 2);
- return;
- }
- if (source_y_fraction == 128) {
- HalfRow_16_C(src_ptr, (int)(src_stride), dst_ptr, width);
- return;
- }
- for (x = 0; x < width - 1; x += 2) {
- dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
- dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8;
- src_ptr += 2;
- src_ptr1 += 2;
- dst_ptr += 2;
- }
- if (width & 1) {
- dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
- }
-}
-
-// Use first 4 shuffler values to reorder ARGB channels.
-void ARGBShuffleRow_C(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix) {
- int index0 = shuffler[0];
- int index1 = shuffler[1];
- int index2 = shuffler[2];
- int index3 = shuffler[3];
- // Shuffle a row of ARGB.
- int x;
- for (x = 0; x < pix; ++x) {
- // To support in-place conversion.
- uint8 b = src_argb[index0];
- uint8 g = src_argb[index1];
- uint8 r = src_argb[index2];
- uint8 a = src_argb[index3];
- dst_argb[0] = b;
- dst_argb[1] = g;
- dst_argb[2] = r;
- dst_argb[3] = a;
- src_argb += 4;
- dst_argb += 4;
- }
-}
-
-void I422ToYUY2Row_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_frame, int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- dst_frame[0] = src_y[0];
- dst_frame[1] = src_u[0];
- dst_frame[2] = src_y[1];
- dst_frame[3] = src_v[0];
- dst_frame += 4;
- src_y += 2;
- src_u += 1;
- src_v += 1;
- }
- if (width & 1) {
- dst_frame[0] = src_y[0];
- dst_frame[1] = src_u[0];
- dst_frame[2] = 0;
- dst_frame[3] = src_v[0];
- }
-}
-
-void I422ToUYVYRow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_frame, int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- dst_frame[0] = src_u[0];
- dst_frame[1] = src_y[0];
- dst_frame[2] = src_v[0];
- dst_frame[3] = src_y[1];
- dst_frame += 4;
- src_y += 2;
- src_u += 1;
- src_v += 1;
- }
- if (width & 1) {
- dst_frame[0] = src_u[0];
- dst_frame[1] = src_y[0];
- dst_frame[2] = src_v[0];
- dst_frame[3] = 0;
- }
-}
-
-// Maximum temporary width for wrappers to process at a time, in pixels.
-#define MAXTWIDTH 2048
-
-#if !(defined(_MSC_VER) && !defined(__clang__)) && \
- defined(HAS_I422TORGB565ROW_SSSE3)
-// row_win.cc has asm version, but GCC uses 2 step wrapper.
-void I422ToRGB565Row_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgb565,
- int width) {
- SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
- while (width > 0) {
- int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
- I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, twidth);
- ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
- src_y += twidth;
- src_u += twidth / 2;
- src_v += twidth / 2;
- dst_rgb565 += twidth * 2;
- width -= twidth;
- }
-}
-#endif
-
-#if defined(HAS_I422TOARGB1555ROW_SSSE3)
-void I422ToARGB1555Row_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb1555,
- int width) {
- // Row buffer for intermediate ARGB pixels.
- SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
- while (width > 0) {
- int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
- I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, twidth);
- ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth);
- src_y += twidth;
- src_u += twidth / 2;
- src_v += twidth / 2;
- dst_argb1555 += twidth * 2;
- width -= twidth;
- }
-}
-#endif
-
-#if defined(HAS_I422TOARGB4444ROW_SSSE3)
-void I422ToARGB4444Row_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb4444,
- int width) {
- // Row buffer for intermediate ARGB pixels.
- SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
- while (width > 0) {
- int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
- I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, twidth);
- ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth);
- src_y += twidth;
- src_u += twidth / 2;
- src_v += twidth / 2;
- dst_argb4444 += twidth * 2;
- width -= twidth;
- }
-}
-#endif
-
-#if defined(HAS_NV12TORGB565ROW_SSSE3)
-void NV12ToRGB565Row_SSSE3(const uint8* src_y, const uint8* src_uv,
- uint8* dst_rgb565, int width) {
- // Row buffer for intermediate ARGB pixels.
- SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
- while (width > 0) {
- int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
- NV12ToARGBRow_SSSE3(src_y, src_uv, row, twidth);
- ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
- src_y += twidth;
- src_uv += twidth;
- dst_rgb565 += twidth * 2;
- width -= twidth;
- }
-}
-#endif
-
-#if defined(HAS_NV21TORGB565ROW_SSSE3)
-void NV21ToRGB565Row_SSSE3(const uint8* src_y, const uint8* src_vu,
- uint8* dst_rgb565, int width) {
- // Row buffer for intermediate ARGB pixels.
- SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
- while (width > 0) {
- int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
- NV21ToARGBRow_SSSE3(src_y, src_vu, row, twidth);
- ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
- src_y += twidth;
- src_vu += twidth;
- dst_rgb565 += twidth * 2;
- width -= twidth;
- }
-}
-#endif
-
-#if defined(HAS_YUY2TOARGBROW_SSSE3)
-void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2, uint8* dst_argb, int width) {
- // Row buffers for intermediate YUV pixels.
- SIMD_ALIGNED(uint8 row_y[MAXTWIDTH]);
- SIMD_ALIGNED(uint8 row_u[MAXTWIDTH / 2]);
- SIMD_ALIGNED(uint8 row_v[MAXTWIDTH / 2]);
- while (width > 0) {
- int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
- YUY2ToUV422Row_SSE2(src_yuy2, row_u, row_v, twidth);
- YUY2ToYRow_SSE2(src_yuy2, row_y, twidth);
- I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, twidth);
- src_yuy2 += twidth * 2;
- dst_argb += twidth * 4;
- width -= twidth;
- }
-}
-#endif
-
-#if defined(HAS_UYVYTOARGBROW_SSSE3)
-void UYVYToARGBRow_SSSE3(const uint8* src_uyvy, uint8* dst_argb, int width) {
- // Row buffers for intermediate YUV pixels.
- SIMD_ALIGNED(uint8 row_y[MAXTWIDTH]);
- SIMD_ALIGNED(uint8 row_u[MAXTWIDTH / 2]);
- SIMD_ALIGNED(uint8 row_v[MAXTWIDTH / 2]);
- while (width > 0) {
- int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
- UYVYToUV422Row_SSE2(src_uyvy, row_u, row_v, twidth);
- UYVYToYRow_SSE2(src_uyvy, row_y, twidth);
- I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, twidth);
- src_uyvy += twidth * 2;
- dst_argb += twidth * 4;
- width -= twidth;
- }
-}
-#endif // !defined(LIBYUV_DISABLE_X86)
-
-#if defined(HAS_I422TORGB565ROW_AVX2)
-void I422ToRGB565Row_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgb565,
- int width) {
- SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
- while (width > 0) {
- int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
- I422ToARGBRow_AVX2(src_y, src_u, src_v, row, twidth);
- ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
- src_y += twidth;
- src_u += twidth / 2;
- src_v += twidth / 2;
- dst_rgb565 += twidth * 2;
- width -= twidth;
- }
-}
-#endif
-
-#if defined(HAS_I422TOARGB1555ROW_AVX2)
-void I422ToARGB1555Row_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb1555,
- int width) {
- // Row buffer for intermediate ARGB pixels.
- SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
- while (width > 0) {
- int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
- I422ToARGBRow_AVX2(src_y, src_u, src_v, row, twidth);
- ARGBToARGB1555Row_AVX2(row, dst_argb1555, twidth);
- src_y += twidth;
- src_u += twidth / 2;
- src_v += twidth / 2;
- dst_argb1555 += twidth * 2;
- width -= twidth;
- }
-}
-#endif
-
-#if defined(HAS_I422TOARGB4444ROW_AVX2)
-void I422ToARGB4444Row_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb4444,
- int width) {
- // Row buffer for intermediate ARGB pixels.
- SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
- while (width > 0) {
- int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
- I422ToARGBRow_AVX2(src_y, src_u, src_v, row, twidth);
- ARGBToARGB4444Row_AVX2(row, dst_argb4444, twidth);
- src_y += twidth;
- src_u += twidth / 2;
- src_v += twidth / 2;
- dst_argb4444 += twidth * 2;
- width -= twidth;
- }
-}
-#endif
-
-#if defined(HAS_I422TORGB24ROW_AVX2)
-void I422ToRGB24Row_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgb24,
- int width) {
- // Row buffer for intermediate ARGB pixels.
- SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
- while (width > 0) {
- int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
- I422ToARGBRow_AVX2(src_y, src_u, src_v, row, twidth);
- // TODO(fbarchard): ARGBToRGB24Row_AVX2
- ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
- src_y += twidth;
- src_u += twidth / 2;
- src_v += twidth / 2;
- dst_rgb24 += twidth * 3;
- width -= twidth;
- }
-}
-#endif
-
-#if defined(HAS_I422TORAWROW_AVX2)
-void I422ToRAWRow_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_raw,
- int width) {
- // Row buffer for intermediate ARGB pixels.
- SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
- while (width > 0) {
- int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
- I422ToARGBRow_AVX2(src_y, src_u, src_v, row, twidth);
- // TODO(fbarchard): ARGBToRAWRow_AVX2
- ARGBToRAWRow_SSSE3(row, dst_raw, twidth);
- src_y += twidth;
- src_u += twidth / 2;
- src_v += twidth / 2;
- dst_raw += twidth * 3;
- width -= twidth;
- }
-}
-#endif
-
-#if defined(HAS_NV12TORGB565ROW_AVX2)
-void NV12ToRGB565Row_AVX2(const uint8* src_y, const uint8* src_uv,
- uint8* dst_rgb565, int width) {
- // Row buffer for intermediate ARGB pixels.
- SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
- while (width > 0) {
- int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
- NV12ToARGBRow_AVX2(src_y, src_uv, row, twidth);
- ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
- src_y += twidth;
- src_uv += twidth;
- dst_rgb565 += twidth * 2;
- width -= twidth;
- }
-}
-#endif
-
-#if defined(HAS_NV21TORGB565ROW_AVX2)
-void NV21ToRGB565Row_AVX2(const uint8* src_y, const uint8* src_vu,
- uint8* dst_rgb565, int width) {
- // Row buffer for intermediate ARGB pixels.
- SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
- while (width > 0) {
- int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
- NV21ToARGBRow_AVX2(src_y, src_vu, row, twidth);
- ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
- src_y += twidth;
- src_vu += twidth;
- dst_rgb565 += twidth * 2;
- width -= twidth;
- }
-}
-#endif
-
-#if defined(HAS_YUY2TOARGBROW_AVX2)
-void YUY2ToARGBRow_AVX2(const uint8* src_yuy2, uint8* dst_argb, int width) {
- // Row buffers for intermediate YUV pixels.
- SIMD_ALIGNED32(uint8 row_y[MAXTWIDTH]);
- SIMD_ALIGNED32(uint8 row_u[MAXTWIDTH / 2]);
- SIMD_ALIGNED32(uint8 row_v[MAXTWIDTH / 2]);
- while (width > 0) {
- int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
- YUY2ToUV422Row_AVX2(src_yuy2, row_u, row_v, twidth);
- YUY2ToYRow_AVX2(src_yuy2, row_y, twidth);
- I422ToARGBRow_AVX2(row_y, row_u, row_v, dst_argb, twidth);
- src_yuy2 += twidth * 2;
- dst_argb += twidth * 4;
- width -= twidth;
- }
-}
-#endif
-
-#if defined(HAS_UYVYTOARGBROW_AVX2)
-void UYVYToARGBRow_AVX2(const uint8* src_uyvy, uint8* dst_argb, int width) {
- // Row buffers for intermediate YUV pixels.
- SIMD_ALIGNED32(uint8 row_y[MAXTWIDTH]);
- SIMD_ALIGNED32(uint8 row_u[MAXTWIDTH / 2]);
- SIMD_ALIGNED32(uint8 row_v[MAXTWIDTH / 2]);
- while (width > 0) {
- int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
- UYVYToUV422Row_AVX2(src_uyvy, row_u, row_v, twidth);
- UYVYToYRow_AVX2(src_uyvy, row_y, twidth);
- I422ToARGBRow_AVX2(row_y, row_u, row_v, dst_argb, twidth);
- src_uyvy += twidth * 2;
- dst_argb += twidth * 4;
- width -= twidth;
- }
-}
-#endif // !defined(LIBYUV_DISABLE_X86)
-
-void ARGBPolynomialRow_C(const uint8* src_argb,
- uint8* dst_argb, const float* poly,
- int width) {
- int i;
- for (i = 0; i < width; ++i) {
- float b = (float)(src_argb[0]);
- float g = (float)(src_argb[1]);
- float r = (float)(src_argb[2]);
- float a = (float)(src_argb[3]);
- float b2 = b * b;
- float g2 = g * g;
- float r2 = r * r;
- float a2 = a * a;
- float db = poly[0] + poly[4] * b;
- float dg = poly[1] + poly[5] * g;
- float dr = poly[2] + poly[6] * r;
- float da = poly[3] + poly[7] * a;
- float b3 = b2 * b;
- float g3 = g2 * g;
- float r3 = r2 * r;
- float a3 = a2 * a;
- db += poly[8] * b2;
- dg += poly[9] * g2;
- dr += poly[10] * r2;
- da += poly[11] * a2;
- db += poly[12] * b3;
- dg += poly[13] * g3;
- dr += poly[14] * r3;
- da += poly[15] * a3;
-
- dst_argb[0] = Clamp((int32)(db));
- dst_argb[1] = Clamp((int32)(dg));
- dst_argb[2] = Clamp((int32)(dr));
- dst_argb[3] = Clamp((int32)(da));
- src_argb += 4;
- dst_argb += 4;
- }
-}
-
-void ARGBLumaColorTableRow_C(const uint8* src_argb, uint8* dst_argb, int width,
- const uint8* luma, uint32 lumacoeff) {
- uint32 bc = lumacoeff & 0xff;
- uint32 gc = (lumacoeff >> 8) & 0xff;
- uint32 rc = (lumacoeff >> 16) & 0xff;
-
- int i;
- for (i = 0; i < width - 1; i += 2) {
- // Luminance in rows, color values in columns.
- const uint8* luma0 = ((src_argb[0] * bc + src_argb[1] * gc +
- src_argb[2] * rc) & 0x7F00u) + luma;
- const uint8* luma1;
- dst_argb[0] = luma0[src_argb[0]];
- dst_argb[1] = luma0[src_argb[1]];
- dst_argb[2] = luma0[src_argb[2]];
- dst_argb[3] = src_argb[3];
- luma1 = ((src_argb[4] * bc + src_argb[5] * gc +
- src_argb[6] * rc) & 0x7F00u) + luma;
- dst_argb[4] = luma1[src_argb[4]];
- dst_argb[5] = luma1[src_argb[5]];
- dst_argb[6] = luma1[src_argb[6]];
- dst_argb[7] = src_argb[7];
- src_argb += 8;
- dst_argb += 8;
- }
- if (width & 1) {
- // Luminance in rows, color values in columns.
- const uint8* luma0 = ((src_argb[0] * bc + src_argb[1] * gc +
- src_argb[2] * rc) & 0x7F00u) + luma;
- dst_argb[0] = luma0[src_argb[0]];
- dst_argb[1] = luma0[src_argb[1]];
- dst_argb[2] = luma0[src_argb[2]];
- dst_argb[3] = src_argb[3];
- }
-}
-
-void ARGBCopyAlphaRow_C(const uint8* src, uint8* dst, int width) {
- int i;
- for (i = 0; i < width - 1; i += 2) {
- dst[3] = src[3];
- dst[7] = src[7];
- dst += 8;
- src += 8;
- }
- if (width & 1) {
- dst[3] = src[3];
- }
-}
-
-void ARGBCopyYToAlphaRow_C(const uint8* src, uint8* dst, int width) {
- int i;
- for (i = 0; i < width - 1; i += 2) {
- dst[3] = src[0];
- dst[7] = src[1];
- dst += 8;
- src += 2;
- }
- if (width & 1) {
- dst[3] = src[0];
- }
-}
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/third_party/aom/third_party/libyuv/source/row_gcc.cc b/third_party/aom/third_party/libyuv/source/row_gcc.cc
deleted file mode 100644
index 820de0a1c..000000000
--- a/third_party/aom/third_party/libyuv/source/row_gcc.cc
+++ /dev/null
@@ -1,5475 +0,0 @@
-// VERSION 2
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// This module is for GCC x86 and x64.
-#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
-
-#if defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_ARGBGRAYROW_SSSE3)
-
-// Constants for ARGB
-static vec8 kARGBToY = {
- 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0
-};
-
-// JPeg full range.
-static vec8 kARGBToYJ = {
- 15, 75, 38, 0, 15, 75, 38, 0, 15, 75, 38, 0, 15, 75, 38, 0
-};
-#endif // defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_ARGBGRAYROW_SSSE3)
-
-#if defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_I422TOARGBROW_SSSE3)
-
-static vec8 kARGBToU = {
- 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0
-};
-
-static vec8 kARGBToUJ = {
- 127, -84, -43, 0, 127, -84, -43, 0, 127, -84, -43, 0, 127, -84, -43, 0
-};
-
-static vec8 kARGBToV = {
- -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0,
-};
-
-static vec8 kARGBToVJ = {
- -20, -107, 127, 0, -20, -107, 127, 0, -20, -107, 127, 0, -20, -107, 127, 0
-};
-
-// Constants for BGRA
-static vec8 kBGRAToY = {
- 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13
-};
-
-static vec8 kBGRAToU = {
- 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112
-};
-
-static vec8 kBGRAToV = {
- 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18
-};
-
-// Constants for ABGR
-static vec8 kABGRToY = {
- 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0
-};
-
-static vec8 kABGRToU = {
- -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0
-};
-
-static vec8 kABGRToV = {
- 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0
-};
-
-// Constants for RGBA.
-static vec8 kRGBAToY = {
- 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33
-};
-
-static vec8 kRGBAToU = {
- 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38
-};
-
-static vec8 kRGBAToV = {
- 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112
-};
-
-static uvec8 kAddY16 = {
- 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u
-};
-
-// 7 bit fixed point 0.5.
-static vec16 kAddYJ64 = {
- 64, 64, 64, 64, 64, 64, 64, 64
-};
-
-static uvec8 kAddUV128 = {
- 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u,
- 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u
-};
-
-static uvec16 kAddUVJ128 = {
- 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u
-};
-#endif // defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_I422TOARGBROW_SSSE3)
-
-#ifdef HAS_RGB24TOARGBROW_SSSE3
-
-// Shuffle table for converting RGB24 to ARGB.
-static uvec8 kShuffleMaskRGB24ToARGB = {
- 0u, 1u, 2u, 12u, 3u, 4u, 5u, 13u, 6u, 7u, 8u, 14u, 9u, 10u, 11u, 15u
-};
-
-// Shuffle table for converting RAW to ARGB.
-static uvec8 kShuffleMaskRAWToARGB = {
- 2u, 1u, 0u, 12u, 5u, 4u, 3u, 13u, 8u, 7u, 6u, 14u, 11u, 10u, 9u, 15u
-};
-
-// Shuffle table for converting ARGB to RGB24.
-static uvec8 kShuffleMaskARGBToRGB24 = {
- 0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 10u, 12u, 13u, 14u, 128u, 128u, 128u, 128u
-};
-
-// Shuffle table for converting ARGB to RAW.
-static uvec8 kShuffleMaskARGBToRAW = {
- 2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 8u, 14u, 13u, 12u, 128u, 128u, 128u, 128u
-};
-
-// Shuffle table for converting ARGBToRGB24 for I422ToRGB24. First 8 + next 4
-static uvec8 kShuffleMaskARGBToRGB24_0 = {
- 0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 128u, 128u, 128u, 128u, 10u, 12u, 13u, 14u
-};
-
-// Shuffle table for converting ARGB to RAW.
-static uvec8 kShuffleMaskARGBToRAW_0 = {
- 2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 128u, 128u, 128u, 128u, 8u, 14u, 13u, 12u
-};
-#endif // HAS_RGB24TOARGBROW_SSSE3
-
-#if defined(TESTING) && defined(__x86_64__)
-void TestRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {
- asm volatile (
- ".p2align 5 \n"
- "mov %%eax,%%eax \n"
- "mov %%ebx,%%ebx \n"
- "mov %%ecx,%%ecx \n"
- "mov %%edx,%%edx \n"
- "mov %%esi,%%esi \n"
- "mov %%edi,%%edi \n"
- "mov %%ebp,%%ebp \n"
- "mov %%esp,%%esp \n"
- ".p2align 5 \n"
- "mov %%r8d,%%r8d \n"
- "mov %%r9d,%%r9d \n"
- "mov %%r10d,%%r10d \n"
- "mov %%r11d,%%r11d \n"
- "mov %%r12d,%%r12d \n"
- "mov %%r13d,%%r13d \n"
- "mov %%r14d,%%r14d \n"
- "mov %%r15d,%%r15d \n"
- ".p2align 5 \n"
- "lea (%%rax),%%eax \n"
- "lea (%%rbx),%%ebx \n"
- "lea (%%rcx),%%ecx \n"
- "lea (%%rdx),%%edx \n"
- "lea (%%rsi),%%esi \n"
- "lea (%%rdi),%%edi \n"
- "lea (%%rbp),%%ebp \n"
- "lea (%%rsp),%%esp \n"
- ".p2align 5 \n"
- "lea (%%r8),%%r8d \n"
- "lea (%%r9),%%r9d \n"
- "lea (%%r10),%%r10d \n"
- "lea (%%r11),%%r11d \n"
- "lea (%%r12),%%r12d \n"
- "lea (%%r13),%%r13d \n"
- "lea (%%r14),%%r14d \n"
- "lea (%%r15),%%r15d \n"
-
- ".p2align 5 \n"
- "lea 0x10(%%rax),%%eax \n"
- "lea 0x10(%%rbx),%%ebx \n"
- "lea 0x10(%%rcx),%%ecx \n"
- "lea 0x10(%%rdx),%%edx \n"
- "lea 0x10(%%rsi),%%esi \n"
- "lea 0x10(%%rdi),%%edi \n"
- "lea 0x10(%%rbp),%%ebp \n"
- "lea 0x10(%%rsp),%%esp \n"
- ".p2align 5 \n"
- "lea 0x10(%%r8),%%r8d \n"
- "lea 0x10(%%r9),%%r9d \n"
- "lea 0x10(%%r10),%%r10d \n"
- "lea 0x10(%%r11),%%r11d \n"
- "lea 0x10(%%r12),%%r12d \n"
- "lea 0x10(%%r13),%%r13d \n"
- "lea 0x10(%%r14),%%r14d \n"
- "lea 0x10(%%r15),%%r15d \n"
-
- ".p2align 5 \n"
- "add 0x10,%%eax \n"
- "add 0x10,%%ebx \n"
- "add 0x10,%%ecx \n"
- "add 0x10,%%edx \n"
- "add 0x10,%%esi \n"
- "add 0x10,%%edi \n"
- "add 0x10,%%ebp \n"
- "add 0x10,%%esp \n"
- ".p2align 5 \n"
- "add 0x10,%%r8d \n"
- "add 0x10,%%r9d \n"
- "add 0x10,%%r10d \n"
- "add 0x10,%%r11d \n"
- "add 0x10,%%r12d \n"
- "add 0x10,%%r13d \n"
- "add 0x10,%%r14d \n"
- "add 0x10,%%r15d \n"
-
- ".p2align 2 \n"
- "1: \n"
- "movq " MEMACCESS(0) ",%%xmm0 \n"
- "lea " MEMLEA(0x8,0) ",%0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x8,%2 \n"
- "jg 1b \n"
- : "+r"(src_y), // %0
- "+r"(dst_argb), // %1
- "+r"(pix) // %2
- :
- : "memory", "cc", "xmm0", "xmm1", "xmm5"
- );
-}
-#endif // TESTING
-
-#ifdef HAS_J400TOARGBROW_SSE2
-void J400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "pslld $0x18,%%xmm5 \n"
- LABELALIGN
- "1: \n"
- "movq " MEMACCESS(0) ",%%xmm0 \n"
- "lea " MEMLEA(0x8,0) ",%0 \n"
- "punpcklbw %%xmm0,%%xmm0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklwd %%xmm0,%%xmm0 \n"
- "punpckhwd %%xmm1,%%xmm1 \n"
- "por %%xmm5,%%xmm0 \n"
- "por %%xmm5,%%xmm1 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x8,%2 \n"
- "jg 1b \n"
- : "+r"(src_y), // %0
- "+r"(dst_argb), // %1
- "+r"(pix) // %2
- :: "memory", "cc", "xmm0", "xmm1", "xmm5"
- );
-}
-#endif // HAS_J400TOARGBROW_SSE2
-
-#ifdef HAS_RGB24TOARGBROW_SSSE3
-void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n" // generate mask 0xff000000
- "pslld $0x18,%%xmm5 \n"
- "movdqa %3,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm3 \n"
- "lea " MEMLEA(0x30,0) ",%0 \n"
- "movdqa %%xmm3,%%xmm2 \n"
- "palignr $0x8,%%xmm1,%%xmm2 \n"
- "pshufb %%xmm4,%%xmm2 \n"
- "por %%xmm5,%%xmm2 \n"
- "palignr $0xc,%%xmm0,%%xmm1 \n"
- "pshufb %%xmm4,%%xmm0 \n"
- "movdqu %%xmm2," MEMACCESS2(0x20,1) " \n"
- "por %%xmm5,%%xmm0 \n"
- "pshufb %%xmm4,%%xmm1 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "por %%xmm5,%%xmm1 \n"
- "palignr $0x4,%%xmm3,%%xmm3 \n"
- "pshufb %%xmm4,%%xmm3 \n"
- "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n"
- "por %%xmm5,%%xmm3 \n"
- "movdqu %%xmm3," MEMACCESS2(0x30,1) " \n"
- "lea " MEMLEA(0x40,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- : "+r"(src_rgb24), // %0
- "+r"(dst_argb), // %1
- "+r"(pix) // %2
- : "m"(kShuffleMaskRGB24ToARGB) // %3
- : "memory", "cc" , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
- );
-}
-
-void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n" // generate mask 0xff000000
- "pslld $0x18,%%xmm5 \n"
- "movdqa %3,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm3 \n"
- "lea " MEMLEA(0x30,0) ",%0 \n"
- "movdqa %%xmm3,%%xmm2 \n"
- "palignr $0x8,%%xmm1,%%xmm2 \n"
- "pshufb %%xmm4,%%xmm2 \n"
- "por %%xmm5,%%xmm2 \n"
- "palignr $0xc,%%xmm0,%%xmm1 \n"
- "pshufb %%xmm4,%%xmm0 \n"
- "movdqu %%xmm2," MEMACCESS2(0x20,1) " \n"
- "por %%xmm5,%%xmm0 \n"
- "pshufb %%xmm4,%%xmm1 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "por %%xmm5,%%xmm1 \n"
- "palignr $0x4,%%xmm3,%%xmm3 \n"
- "pshufb %%xmm4,%%xmm3 \n"
- "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n"
- "por %%xmm5,%%xmm3 \n"
- "movdqu %%xmm3," MEMACCESS2(0x30,1) " \n"
- "lea " MEMLEA(0x40,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- : "+r"(src_raw), // %0
- "+r"(dst_argb), // %1
- "+r"(pix) // %2
- : "m"(kShuffleMaskRAWToARGB) // %3
- : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
- );
-}
-
-void RGB565ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) {
- asm volatile (
- "mov $0x1080108,%%eax \n"
- "movd %%eax,%%xmm5 \n"
- "pshufd $0x0,%%xmm5,%%xmm5 \n"
- "mov $0x20802080,%%eax \n"
- "movd %%eax,%%xmm6 \n"
- "pshufd $0x0,%%xmm6,%%xmm6 \n"
- "pcmpeqb %%xmm3,%%xmm3 \n"
- "psllw $0xb,%%xmm3 \n"
- "pcmpeqb %%xmm4,%%xmm4 \n"
- "psllw $0xa,%%xmm4 \n"
- "psrlw $0x5,%%xmm4 \n"
- "pcmpeqb %%xmm7,%%xmm7 \n"
- "psllw $0x8,%%xmm7 \n"
- "sub %0,%1 \n"
- "sub %0,%1 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "pand %%xmm3,%%xmm1 \n"
- "psllw $0xb,%%xmm2 \n"
- "pmulhuw %%xmm5,%%xmm1 \n"
- "pmulhuw %%xmm5,%%xmm2 \n"
- "psllw $0x8,%%xmm1 \n"
- "por %%xmm2,%%xmm1 \n"
- "pand %%xmm4,%%xmm0 \n"
- "pmulhuw %%xmm6,%%xmm0 \n"
- "por %%xmm7,%%xmm0 \n"
- "movdqa %%xmm1,%%xmm2 \n"
- "punpcklbw %%xmm0,%%xmm1 \n"
- "punpckhbw %%xmm0,%%xmm2 \n"
- MEMOPMEM(movdqu,xmm1,0x00,1,0,2) // movdqu %%xmm1,(%1,%0,2)
- MEMOPMEM(movdqu,xmm2,0x10,1,0,2) // movdqu %%xmm2,0x10(%1,%0,2)
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "sub $0x8,%2 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(pix) // %2
- :
- : "memory", "cc", "eax", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
- );
-}
-
-void ARGB1555ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) {
- asm volatile (
- "mov $0x1080108,%%eax \n"
- "movd %%eax,%%xmm5 \n"
- "pshufd $0x0,%%xmm5,%%xmm5 \n"
- "mov $0x42004200,%%eax \n"
- "movd %%eax,%%xmm6 \n"
- "pshufd $0x0,%%xmm6,%%xmm6 \n"
- "pcmpeqb %%xmm3,%%xmm3 \n"
- "psllw $0xb,%%xmm3 \n"
- "movdqa %%xmm3,%%xmm4 \n"
- "psrlw $0x6,%%xmm4 \n"
- "pcmpeqb %%xmm7,%%xmm7 \n"
- "psllw $0x8,%%xmm7 \n"
- "sub %0,%1 \n"
- "sub %0,%1 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "psllw $0x1,%%xmm1 \n"
- "psllw $0xb,%%xmm2 \n"
- "pand %%xmm3,%%xmm1 \n"
- "pmulhuw %%xmm5,%%xmm2 \n"
- "pmulhuw %%xmm5,%%xmm1 \n"
- "psllw $0x8,%%xmm1 \n"
- "por %%xmm2,%%xmm1 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "pand %%xmm4,%%xmm0 \n"
- "psraw $0x8,%%xmm2 \n"
- "pmulhuw %%xmm6,%%xmm0 \n"
- "pand %%xmm7,%%xmm2 \n"
- "por %%xmm2,%%xmm0 \n"
- "movdqa %%xmm1,%%xmm2 \n"
- "punpcklbw %%xmm0,%%xmm1 \n"
- "punpckhbw %%xmm0,%%xmm2 \n"
- MEMOPMEM(movdqu,xmm1,0x00,1,0,2) // movdqu %%xmm1,(%1,%0,2)
- MEMOPMEM(movdqu,xmm2,0x10,1,0,2) // movdqu %%xmm2,0x10(%1,%0,2)
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "sub $0x8,%2 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(pix) // %2
- :
- : "memory", "cc", "eax", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
- );
-}
-
-void ARGB4444ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) {
- asm volatile (
- "mov $0xf0f0f0f,%%eax \n"
- "movd %%eax,%%xmm4 \n"
- "pshufd $0x0,%%xmm4,%%xmm4 \n"
- "movdqa %%xmm4,%%xmm5 \n"
- "pslld $0x4,%%xmm5 \n"
- "sub %0,%1 \n"
- "sub %0,%1 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "pand %%xmm4,%%xmm0 \n"
- "pand %%xmm5,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm2,%%xmm3 \n"
- "psllw $0x4,%%xmm1 \n"
- "psrlw $0x4,%%xmm3 \n"
- "por %%xmm1,%%xmm0 \n"
- "por %%xmm3,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklbw %%xmm2,%%xmm0 \n"
- "punpckhbw %%xmm2,%%xmm1 \n"
- MEMOPMEM(movdqu,xmm0,0x00,1,0,2) // movdqu %%xmm0,(%1,%0,2)
- MEMOPMEM(movdqu,xmm1,0x10,1,0,2) // movdqu %%xmm1,0x10(%1,%0,2)
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "sub $0x8,%2 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(pix) // %2
- :
- : "memory", "cc", "eax", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
- );
-}
-
-void ARGBToRGB24Row_SSSE3(const uint8* src, uint8* dst, int pix) {
- asm volatile (
- "movdqa %3,%%xmm6 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "pshufb %%xmm6,%%xmm0 \n"
- "pshufb %%xmm6,%%xmm1 \n"
- "pshufb %%xmm6,%%xmm2 \n"
- "pshufb %%xmm6,%%xmm3 \n"
- "movdqa %%xmm1,%%xmm4 \n"
- "psrldq $0x4,%%xmm1 \n"
- "pslldq $0xc,%%xmm4 \n"
- "movdqa %%xmm2,%%xmm5 \n"
- "por %%xmm4,%%xmm0 \n"
- "pslldq $0x8,%%xmm5 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "por %%xmm5,%%xmm1 \n"
- "psrldq $0x8,%%xmm2 \n"
- "pslldq $0x4,%%xmm3 \n"
- "por %%xmm3,%%xmm2 \n"
- "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n"
- "movdqu %%xmm2," MEMACCESS2(0x20,1) " \n"
- "lea " MEMLEA(0x30,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(pix) // %2
- : "m"(kShuffleMaskARGBToRGB24) // %3
- : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
- );
-}
-
-void ARGBToRAWRow_SSSE3(const uint8* src, uint8* dst, int pix) {
- asm volatile (
- "movdqa %3,%%xmm6 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "pshufb %%xmm6,%%xmm0 \n"
- "pshufb %%xmm6,%%xmm1 \n"
- "pshufb %%xmm6,%%xmm2 \n"
- "pshufb %%xmm6,%%xmm3 \n"
- "movdqa %%xmm1,%%xmm4 \n"
- "psrldq $0x4,%%xmm1 \n"
- "pslldq $0xc,%%xmm4 \n"
- "movdqa %%xmm2,%%xmm5 \n"
- "por %%xmm4,%%xmm0 \n"
- "pslldq $0x8,%%xmm5 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "por %%xmm5,%%xmm1 \n"
- "psrldq $0x8,%%xmm2 \n"
- "pslldq $0x4,%%xmm3 \n"
- "por %%xmm3,%%xmm2 \n"
- "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n"
- "movdqu %%xmm2," MEMACCESS2(0x20,1) " \n"
- "lea " MEMLEA(0x30,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(pix) // %2
- : "m"(kShuffleMaskARGBToRAW) // %3
- : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
- );
-}
-
-void ARGBToRGB565Row_SSE2(const uint8* src, uint8* dst, int pix) {
- asm volatile (
- "pcmpeqb %%xmm3,%%xmm3 \n"
- "psrld $0x1b,%%xmm3 \n"
- "pcmpeqb %%xmm4,%%xmm4 \n"
- "psrld $0x1a,%%xmm4 \n"
- "pslld $0x5,%%xmm4 \n"
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "pslld $0xb,%%xmm5 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "pslld $0x8,%%xmm0 \n"
- "psrld $0x3,%%xmm1 \n"
- "psrld $0x5,%%xmm2 \n"
- "psrad $0x10,%%xmm0 \n"
- "pand %%xmm3,%%xmm1 \n"
- "pand %%xmm4,%%xmm2 \n"
- "pand %%xmm5,%%xmm0 \n"
- "por %%xmm2,%%xmm1 \n"
- "por %%xmm1,%%xmm0 \n"
- "packssdw %%xmm0,%%xmm0 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movq %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $0x4,%2 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(pix) // %2
- :: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
- );
-}
-
-void ARGBToARGB1555Row_SSE2(const uint8* src, uint8* dst, int pix) {
- asm volatile (
- "pcmpeqb %%xmm4,%%xmm4 \n"
- "psrld $0x1b,%%xmm4 \n"
- "movdqa %%xmm4,%%xmm5 \n"
- "pslld $0x5,%%xmm5 \n"
- "movdqa %%xmm4,%%xmm6 \n"
- "pslld $0xa,%%xmm6 \n"
- "pcmpeqb %%xmm7,%%xmm7 \n"
- "pslld $0xf,%%xmm7 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm3 \n"
- "psrad $0x10,%%xmm0 \n"
- "psrld $0x3,%%xmm1 \n"
- "psrld $0x6,%%xmm2 \n"
- "psrld $0x9,%%xmm3 \n"
- "pand %%xmm7,%%xmm0 \n"
- "pand %%xmm4,%%xmm1 \n"
- "pand %%xmm5,%%xmm2 \n"
- "pand %%xmm6,%%xmm3 \n"
- "por %%xmm1,%%xmm0 \n"
- "por %%xmm3,%%xmm2 \n"
- "por %%xmm2,%%xmm0 \n"
- "packssdw %%xmm0,%%xmm0 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movq %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $0x4,%2 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(pix) // %2
- :: "memory", "cc",
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
- );
-}
-
-void ARGBToARGB4444Row_SSE2(const uint8* src, uint8* dst, int pix) {
- asm volatile (
- "pcmpeqb %%xmm4,%%xmm4 \n"
- "psllw $0xc,%%xmm4 \n"
- "movdqa %%xmm4,%%xmm3 \n"
- "psrlw $0x8,%%xmm3 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "pand %%xmm3,%%xmm0 \n"
- "pand %%xmm4,%%xmm1 \n"
- "psrlq $0x4,%%xmm0 \n"
- "psrlq $0x8,%%xmm1 \n"
- "por %%xmm1,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movq %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $0x4,%2 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(pix) // %2
- :: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
- );
-}
-#endif // HAS_RGB24TOARGBROW_SSSE3
-
-#ifdef HAS_ARGBTOYROW_SSSE3
-// Convert 16 ARGB pixels (64 bytes) to 16 Y values.
-void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
- asm volatile (
- "movdqa %3,%%xmm4 \n"
- "movdqa %4,%%xmm5 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm1 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm4,%%xmm3 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "phaddw %%xmm1,%%xmm0 \n"
- "phaddw %%xmm3,%%xmm2 \n"
- "psrlw $0x7,%%xmm0 \n"
- "psrlw $0x7,%%xmm2 \n"
- "packuswb %%xmm2,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- : "m"(kARGBToY), // %3
- "m"(kAddY16) // %4
- : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
- );
-}
-#endif // HAS_ARGBTOYROW_SSSE3
-
-#ifdef HAS_ARGBTOYJROW_SSSE3
-// Convert 16 ARGB pixels (64 bytes) to 16 YJ values.
-// Same as ARGBToYRow but different coefficients, no add 16, but do rounding.
-void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
- asm volatile (
- "movdqa %3,%%xmm4 \n"
- "movdqa %4,%%xmm5 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm1 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm4,%%xmm3 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "phaddw %%xmm1,%%xmm0 \n"
- "phaddw %%xmm3,%%xmm2 \n"
- "paddw %%xmm5,%%xmm0 \n"
- "paddw %%xmm5,%%xmm2 \n"
- "psrlw $0x7,%%xmm0 \n"
- "psrlw $0x7,%%xmm2 \n"
- "packuswb %%xmm2,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- : "m"(kARGBToYJ), // %3
- "m"(kAddYJ64) // %4
- : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
- );
-}
-#endif // HAS_ARGBTOYJROW_SSSE3
-
-#ifdef HAS_ARGBTOYROW_AVX2
-// vpermd for vphaddw + vpackuswb vpermd.
-static const lvec32 kPermdARGBToY_AVX = {
- 0, 4, 1, 5, 2, 6, 3, 7
-};
-
-// Convert 32 ARGB pixels (128 bytes) to 32 Y values.
-void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) {
- asm volatile (
- "vbroadcastf128 %3,%%ymm4 \n"
- "vbroadcastf128 %4,%%ymm5 \n"
- "vmovdqu %5,%%ymm6 \n"
- LABELALIGN
- "1: \n"
- "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
- "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
- "vmovdqu " MEMACCESS2(0x40,0) ",%%ymm2 \n"
- "vmovdqu " MEMACCESS2(0x60,0) ",%%ymm3 \n"
- "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n"
- "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n"
- "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n"
- "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n"
- "lea " MEMLEA(0x80,0) ",%0 \n"
- "vphaddw %%ymm1,%%ymm0,%%ymm0 \n" // mutates.
- "vphaddw %%ymm3,%%ymm2,%%ymm2 \n"
- "vpsrlw $0x7,%%ymm0,%%ymm0 \n"
- "vpsrlw $0x7,%%ymm2,%%ymm2 \n"
- "vpackuswb %%ymm2,%%ymm0,%%ymm0 \n" // mutates.
- "vpermd %%ymm0,%%ymm6,%%ymm0 \n" // unmutate.
- "vpaddb %%ymm5,%%ymm0,%%ymm0 \n" // add 16 for Y
- "vmovdqu %%ymm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x20,%2 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src_argb), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- : "m"(kARGBToY), // %3
- "m"(kAddY16), // %4
- "m"(kPermdARGBToY_AVX) // %5
- : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
- );
-}
-#endif // HAS_ARGBTOYROW_AVX2
-
-#ifdef HAS_ARGBTOYJROW_AVX2
-// Convert 32 ARGB pixels (128 bytes) to 32 Y values.
-void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) {
- asm volatile (
- "vbroadcastf128 %3,%%ymm4 \n"
- "vbroadcastf128 %4,%%ymm5 \n"
- "vmovdqu %5,%%ymm6 \n"
- LABELALIGN
- "1: \n"
- "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
- "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
- "vmovdqu " MEMACCESS2(0x40,0) ",%%ymm2 \n"
- "vmovdqu " MEMACCESS2(0x60,0) ",%%ymm3 \n"
- "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n"
- "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n"
- "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n"
- "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n"
- "lea " MEMLEA(0x80,0) ",%0 \n"
- "vphaddw %%ymm1,%%ymm0,%%ymm0 \n" // mutates.
- "vphaddw %%ymm3,%%ymm2,%%ymm2 \n"
- "vpaddw %%ymm5,%%ymm0,%%ymm0 \n" // Add .5 for rounding.
- "vpaddw %%ymm5,%%ymm2,%%ymm2 \n"
- "vpsrlw $0x7,%%ymm0,%%ymm0 \n"
- "vpsrlw $0x7,%%ymm2,%%ymm2 \n"
- "vpackuswb %%ymm2,%%ymm0,%%ymm0 \n" // mutates.
- "vpermd %%ymm0,%%ymm6,%%ymm0 \n" // unmutate.
- "vmovdqu %%ymm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x20,%2 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src_argb), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- : "m"(kARGBToYJ), // %3
- "m"(kAddYJ64), // %4
- "m"(kPermdARGBToY_AVX) // %5
- : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
- );
-}
-#endif // HAS_ARGBTOYJROW_AVX2
-
-#ifdef HAS_ARGBTOUVROW_SSSE3
-void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) {
- asm volatile (
- "movdqa %5,%%xmm3 \n"
- "movdqa %6,%%xmm4 \n"
- "movdqa %7,%%xmm5 \n"
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n"
- MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm6 \n"
-
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm7 \n"
- "shufps $0x88,%%xmm1,%%xmm0 \n"
- "shufps $0xdd,%%xmm1,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm0 \n"
- "movdqa %%xmm2,%%xmm7 \n"
- "shufps $0x88,%%xmm6,%%xmm2 \n"
- "shufps $0xdd,%%xmm6,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm2,%%xmm6 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm3,%%xmm1 \n"
- "pmaddubsw %%xmm3,%%xmm6 \n"
- "phaddw %%xmm2,%%xmm0 \n"
- "phaddw %%xmm6,%%xmm1 \n"
- "psraw $0x8,%%xmm0 \n"
- "psraw $0x8,%%xmm1 \n"
- "packsswb %%xmm1,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "movlps %%xmm0," MEMACCESS(1) " \n"
- MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $0x10,%3 \n"
- "jg 1b \n"
- : "+r"(src_argb0), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+rm"(width) // %3
- : "r"((intptr_t)(src_stride_argb)), // %4
- "m"(kARGBToV), // %5
- "m"(kARGBToU), // %6
- "m"(kAddUV128) // %7
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
- );
-}
-#endif // HAS_ARGBTOUVROW_SSSE3
-
-#ifdef HAS_ARGBTOUVROW_AVX2
-// vpshufb for vphaddw + vpackuswb packed to shorts.
-static const lvec8 kShufARGBToUV_AVX = {
- 0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15,
- 0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15
-};
-void ARGBToUVRow_AVX2(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) {
- asm volatile (
- "vbroadcastf128 %5,%%ymm5 \n"
- "vbroadcastf128 %6,%%ymm6 \n"
- "vbroadcastf128 %7,%%ymm7 \n"
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
- "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
- "vmovdqu " MEMACCESS2(0x40,0) ",%%ymm2 \n"
- "vmovdqu " MEMACCESS2(0x60,0) ",%%ymm3 \n"
- VMEMOPREG(vpavgb,0x00,0,4,1,ymm0,ymm0) // vpavgb (%0,%4,1),%%ymm0,%%ymm0
- VMEMOPREG(vpavgb,0x20,0,4,1,ymm1,ymm1)
- VMEMOPREG(vpavgb,0x40,0,4,1,ymm2,ymm2)
- VMEMOPREG(vpavgb,0x60,0,4,1,ymm3,ymm3)
- "lea " MEMLEA(0x80,0) ",%0 \n"
- "vshufps $0x88,%%ymm1,%%ymm0,%%ymm4 \n"
- "vshufps $0xdd,%%ymm1,%%ymm0,%%ymm0 \n"
- "vpavgb %%ymm4,%%ymm0,%%ymm0 \n"
- "vshufps $0x88,%%ymm3,%%ymm2,%%ymm4 \n"
- "vshufps $0xdd,%%ymm3,%%ymm2,%%ymm2 \n"
- "vpavgb %%ymm4,%%ymm2,%%ymm2 \n"
-
- "vpmaddubsw %%ymm7,%%ymm0,%%ymm1 \n"
- "vpmaddubsw %%ymm7,%%ymm2,%%ymm3 \n"
- "vpmaddubsw %%ymm6,%%ymm0,%%ymm0 \n"
- "vpmaddubsw %%ymm6,%%ymm2,%%ymm2 \n"
- "vphaddw %%ymm3,%%ymm1,%%ymm1 \n"
- "vphaddw %%ymm2,%%ymm0,%%ymm0 \n"
- "vpsraw $0x8,%%ymm1,%%ymm1 \n"
- "vpsraw $0x8,%%ymm0,%%ymm0 \n"
- "vpacksswb %%ymm0,%%ymm1,%%ymm0 \n"
- "vpermq $0xd8,%%ymm0,%%ymm0 \n"
- "vpshufb %8,%%ymm0,%%ymm0 \n"
- "vpaddb %%ymm5,%%ymm0,%%ymm0 \n"
-
- "vextractf128 $0x0,%%ymm0," MEMACCESS(1) " \n"
- VEXTOPMEM(vextractf128,1,ymm0,0x0,1,2,1) // vextractf128 $1,%%ymm0,(%1,%2,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x20,%3 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src_argb0), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+rm"(width) // %3
- : "r"((intptr_t)(src_stride_argb)), // %4
- "m"(kAddUV128), // %5
- "m"(kARGBToV), // %6
- "m"(kARGBToU), // %7
- "m"(kShufARGBToUV_AVX) // %8
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
- );
-}
-#endif // HAS_ARGBTOUVROW_AVX2
-
-#ifdef HAS_ARGBTOUVJROW_SSSE3
-void ARGBToUVJRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) {
- asm volatile (
- "movdqa %5,%%xmm3 \n"
- "movdqa %6,%%xmm4 \n"
- "movdqa %7,%%xmm5 \n"
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n"
- MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm6 \n"
-
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm7 \n"
- "shufps $0x88,%%xmm1,%%xmm0 \n"
- "shufps $0xdd,%%xmm1,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm0 \n"
- "movdqa %%xmm2,%%xmm7 \n"
- "shufps $0x88,%%xmm6,%%xmm2 \n"
- "shufps $0xdd,%%xmm6,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm2,%%xmm6 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm3,%%xmm1 \n"
- "pmaddubsw %%xmm3,%%xmm6 \n"
- "phaddw %%xmm2,%%xmm0 \n"
- "phaddw %%xmm6,%%xmm1 \n"
- "paddw %%xmm5,%%xmm0 \n"
- "paddw %%xmm5,%%xmm1 \n"
- "psraw $0x8,%%xmm0 \n"
- "psraw $0x8,%%xmm1 \n"
- "packsswb %%xmm1,%%xmm0 \n"
- "movlps %%xmm0," MEMACCESS(1) " \n"
- MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $0x10,%3 \n"
- "jg 1b \n"
- : "+r"(src_argb0), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+rm"(width) // %3
- : "r"((intptr_t)(src_stride_argb)), // %4
- "m"(kARGBToVJ), // %5
- "m"(kARGBToUJ), // %6
- "m"(kAddUVJ128) // %7
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
- );
-}
-#endif // HAS_ARGBTOUVJROW_SSSE3
-
-#ifdef HAS_ARGBTOUV444ROW_SSSE3
-void ARGBToUV444Row_SSSE3(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int width) {
- asm volatile (
- "movdqa %4,%%xmm3 \n"
- "movdqa %5,%%xmm4 \n"
- "movdqa %6,%%xmm5 \n"
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm1 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm4,%%xmm6 \n"
- "phaddw %%xmm1,%%xmm0 \n"
- "phaddw %%xmm6,%%xmm2 \n"
- "psraw $0x8,%%xmm0 \n"
- "psraw $0x8,%%xmm2 \n"
- "packsswb %%xmm2,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n"
- "pmaddubsw %%xmm3,%%xmm0 \n"
- "pmaddubsw %%xmm3,%%xmm1 \n"
- "pmaddubsw %%xmm3,%%xmm2 \n"
- "pmaddubsw %%xmm3,%%xmm6 \n"
- "phaddw %%xmm1,%%xmm0 \n"
- "phaddw %%xmm6,%%xmm2 \n"
- "psraw $0x8,%%xmm0 \n"
- "psraw $0x8,%%xmm2 \n"
- "packsswb %%xmm2,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- MEMOPMEM(movdqu,xmm0,0x00,1,2,1) // movdqu %%xmm0,(%1,%2,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%3 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+rm"(width) // %3
- : "m"(kARGBToV), // %4
- "m"(kARGBToU), // %5
- "m"(kAddUV128) // %6
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm6"
- );
-}
-#endif // HAS_ARGBTOUV444ROW_SSSE3
-
-#ifdef HAS_ARGBTOUV422ROW_SSSE3
-void ARGBToUV422Row_SSSE3(const uint8* src_argb0,
- uint8* dst_u, uint8* dst_v, int width) {
- asm volatile (
- "movdqa %4,%%xmm3 \n"
- "movdqa %5,%%xmm4 \n"
- "movdqa %6,%%xmm5 \n"
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm7 \n"
- "shufps $0x88,%%xmm1,%%xmm0 \n"
- "shufps $0xdd,%%xmm1,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm0 \n"
- "movdqa %%xmm2,%%xmm7 \n"
- "shufps $0x88,%%xmm6,%%xmm2 \n"
- "shufps $0xdd,%%xmm6,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm2,%%xmm6 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm3,%%xmm1 \n"
- "pmaddubsw %%xmm3,%%xmm6 \n"
- "phaddw %%xmm2,%%xmm0 \n"
- "phaddw %%xmm6,%%xmm1 \n"
- "psraw $0x8,%%xmm0 \n"
- "psraw $0x8,%%xmm1 \n"
- "packsswb %%xmm1,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "movlps %%xmm0," MEMACCESS(1) " \n"
- MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $0x10,%3 \n"
- "jg 1b \n"
- : "+r"(src_argb0), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+rm"(width) // %3
- : "m"(kARGBToV), // %4
- "m"(kARGBToU), // %5
- "m"(kAddUV128) // %6
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
- );
-}
-#endif // HAS_ARGBTOUV422ROW_SSSE3
-
-void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix) {
- asm volatile (
- "movdqa %4,%%xmm5 \n"
- "movdqa %3,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm1 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm4,%%xmm3 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "phaddw %%xmm1,%%xmm0 \n"
- "phaddw %%xmm3,%%xmm2 \n"
- "psrlw $0x7,%%xmm0 \n"
- "psrlw $0x7,%%xmm2 \n"
- "packuswb %%xmm2,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- : "+r"(src_bgra), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- : "m"(kBGRAToY), // %3
- "m"(kAddY16) // %4
- : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
- );
-}
-
-void BGRAToUVRow_SSSE3(const uint8* src_bgra0, int src_stride_bgra,
- uint8* dst_u, uint8* dst_v, int width) {
- asm volatile (
- "movdqa %5,%%xmm3 \n"
- "movdqa %6,%%xmm4 \n"
- "movdqa %7,%%xmm5 \n"
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n"
- MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm6 \n"
-
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm7 \n"
- "shufps $0x88,%%xmm1,%%xmm0 \n"
- "shufps $0xdd,%%xmm1,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm0 \n"
- "movdqa %%xmm2,%%xmm7 \n"
- "shufps $0x88,%%xmm6,%%xmm2 \n"
- "shufps $0xdd,%%xmm6,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm2,%%xmm6 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm3,%%xmm1 \n"
- "pmaddubsw %%xmm3,%%xmm6 \n"
- "phaddw %%xmm2,%%xmm0 \n"
- "phaddw %%xmm6,%%xmm1 \n"
- "psraw $0x8,%%xmm0 \n"
- "psraw $0x8,%%xmm1 \n"
- "packsswb %%xmm1,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "movlps %%xmm0," MEMACCESS(1) " \n"
- MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $0x10,%3 \n"
- "jg 1b \n"
- : "+r"(src_bgra0), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+rm"(width) // %3
- : "r"((intptr_t)(src_stride_bgra)), // %4
- "m"(kBGRAToV), // %5
- "m"(kBGRAToU), // %6
- "m"(kAddUV128) // %7
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
- );
-}
-
-void ABGRToYRow_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix) {
- asm volatile (
- "movdqa %4,%%xmm5 \n"
- "movdqa %3,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm1 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm4,%%xmm3 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "phaddw %%xmm1,%%xmm0 \n"
- "phaddw %%xmm3,%%xmm2 \n"
- "psrlw $0x7,%%xmm0 \n"
- "psrlw $0x7,%%xmm2 \n"
- "packuswb %%xmm2,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- : "+r"(src_abgr), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- : "m"(kABGRToY), // %3
- "m"(kAddY16) // %4
- : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
- );
-}
-
-void RGBAToYRow_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix) {
- asm volatile (
- "movdqa %4,%%xmm5 \n"
- "movdqa %3,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm1 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm4,%%xmm3 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "phaddw %%xmm1,%%xmm0 \n"
- "phaddw %%xmm3,%%xmm2 \n"
- "psrlw $0x7,%%xmm0 \n"
- "psrlw $0x7,%%xmm2 \n"
- "packuswb %%xmm2,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- : "+r"(src_rgba), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- : "m"(kRGBAToY), // %3
- "m"(kAddY16) // %4
- : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
- );
-}
-
-void ABGRToUVRow_SSSE3(const uint8* src_abgr0, int src_stride_abgr,
- uint8* dst_u, uint8* dst_v, int width) {
- asm volatile (
- "movdqa %5,%%xmm3 \n"
- "movdqa %6,%%xmm4 \n"
- "movdqa %7,%%xmm5 \n"
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n"
- MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm6 \n"
-
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm7 \n"
- "shufps $0x88,%%xmm1,%%xmm0 \n"
- "shufps $0xdd,%%xmm1,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm0 \n"
- "movdqa %%xmm2,%%xmm7 \n"
- "shufps $0x88,%%xmm6,%%xmm2 \n"
- "shufps $0xdd,%%xmm6,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm2,%%xmm6 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm3,%%xmm1 \n"
- "pmaddubsw %%xmm3,%%xmm6 \n"
- "phaddw %%xmm2,%%xmm0 \n"
- "phaddw %%xmm6,%%xmm1 \n"
- "psraw $0x8,%%xmm0 \n"
- "psraw $0x8,%%xmm1 \n"
- "packsswb %%xmm1,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "movlps %%xmm0," MEMACCESS(1) " \n"
- MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $0x10,%3 \n"
- "jg 1b \n"
- : "+r"(src_abgr0), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+rm"(width) // %3
- : "r"((intptr_t)(src_stride_abgr)), // %4
- "m"(kABGRToV), // %5
- "m"(kABGRToU), // %6
- "m"(kAddUV128) // %7
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
- );
-}
-
-void RGBAToUVRow_SSSE3(const uint8* src_rgba0, int src_stride_rgba,
- uint8* dst_u, uint8* dst_v, int width) {
- asm volatile (
- "movdqa %5,%%xmm3 \n"
- "movdqa %6,%%xmm4 \n"
- "movdqa %7,%%xmm5 \n"
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n"
- MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7
- "pavgb %%xmm7,%%xmm6 \n"
-
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm7 \n"
- "shufps $0x88,%%xmm1,%%xmm0 \n"
- "shufps $0xdd,%%xmm1,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm0 \n"
- "movdqa %%xmm2,%%xmm7 \n"
- "shufps $0x88,%%xmm6,%%xmm2 \n"
- "shufps $0xdd,%%xmm6,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm2,%%xmm6 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm2 \n"
- "pmaddubsw %%xmm3,%%xmm1 \n"
- "pmaddubsw %%xmm3,%%xmm6 \n"
- "phaddw %%xmm2,%%xmm0 \n"
- "phaddw %%xmm6,%%xmm1 \n"
- "psraw $0x8,%%xmm0 \n"
- "psraw $0x8,%%xmm1 \n"
- "packsswb %%xmm1,%%xmm0 \n"
- "paddb %%xmm5,%%xmm0 \n"
- "movlps %%xmm0," MEMACCESS(1) " \n"
- MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $0x10,%3 \n"
- "jg 1b \n"
- : "+r"(src_rgba0), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+rm"(width) // %3
- : "r"((intptr_t)(src_stride_rgba)), // %4
- "m"(kRGBAToV), // %5
- "m"(kRGBAToU), // %6
- "m"(kAddUV128) // %7
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
- );
-}
-
-#if defined(HAS_I422TOARGBROW_SSSE3) || defined(HAS_I422TOARGBROW_AVX2)
-
-struct YuvConstants {
- lvec8 kUVToB; // 0
- lvec8 kUVToG; // 32
- lvec8 kUVToR; // 64
- lvec16 kUVBiasB; // 96
- lvec16 kUVBiasG; // 128
- lvec16 kUVBiasR; // 160
- lvec16 kYToRgb; // 192
-};
-
-// BT.601 YUV to RGB reference
-// R = (Y - 16) * 1.164 - V * -1.596
-// G = (Y - 16) * 1.164 - U * 0.391 - V * 0.813
-// B = (Y - 16) * 1.164 - U * -2.018
-
-// Y contribution to R,G,B. Scale and bias.
-// TODO(fbarchard): Consider moving constants into a common header.
-#define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
-#define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
-
-// U and V contributions to R,G,B.
-#define UB -128 /* max(-128, round(-2.018 * 64)) */
-#define UG 25 /* round(0.391 * 64) */
-#define VG 52 /* round(0.813 * 64) */
-#define VR -102 /* round(-1.596 * 64) */
-
-// Bias values to subtract 16 from Y and 128 from U and V.
-#define BB (UB * 128 + YGB)
-#define BG (UG * 128 + VG * 128 + YGB)
-#define BR (VR * 128 + YGB)
-
-// BT601 constants for YUV to RGB.
-static YuvConstants SIMD_ALIGNED(kYuvConstants) = {
- { UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
- UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0 },
- { UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
- UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG },
- { 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR,
- 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR },
- { BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB },
- { BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG },
- { BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR },
- { YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG }
-};
-
-// BT601 constants for NV21 where chroma plane is VU instead of UV.
-static YuvConstants SIMD_ALIGNED(kYvuConstants) = {
- { 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB,
- 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB },
- { VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
- VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG },
- { VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
- VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0 },
- { BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB },
- { BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG },
- { BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR },
- { YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG }
-};
-
-#undef YG
-#undef YGB
-#undef UB
-#undef UG
-#undef VG
-#undef VR
-#undef BB
-#undef BG
-#undef BR
-
-// JPEG YUV to RGB reference
-// * R = Y - V * -1.40200
-// * G = Y - U * 0.34414 - V * 0.71414
-// * B = Y - U * -1.77200
-
-// Y contribution to R,G,B. Scale and bias.
-// TODO(fbarchard): Consider moving constants into a common header.
-#define YGJ 16320 /* round(1.000 * 64 * 256 * 256 / 257) */
-#define YGBJ 32 /* 64 / 2 */
-
-// U and V contributions to R,G,B.
-#define UBJ -113 /* round(-1.77200 * 64) */
-#define UGJ 22 /* round(0.34414 * 64) */
-#define VGJ 46 /* round(0.71414 * 64) */
-#define VRJ -90 /* round(-1.40200 * 64) */
-
-// Bias values to subtract 16 from Y and 128 from U and V.
-#define BBJ (UBJ * 128 + YGBJ)
-#define BGJ (UGJ * 128 + VGJ * 128 + YGBJ)
-#define BRJ (VRJ * 128 + YGBJ)
-
-// JPEG constants for YUV to RGB.
-YuvConstants SIMD_ALIGNED(kYuvJConstants) = {
- { UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0,
- UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0 },
- { UGJ, VGJ, UGJ, VGJ, UGJ, VGJ, UGJ, VGJ,
- UGJ, VGJ, UGJ, VGJ, UGJ, VGJ, UGJ, VGJ,
- UGJ, VGJ, UGJ, VGJ, UGJ, VGJ, UGJ, VGJ,
- UGJ, VGJ, UGJ, VGJ, UGJ, VGJ, UGJ, VGJ },
- { 0, VRJ, 0, VRJ, 0, VRJ, 0, VRJ, 0, VRJ, 0, VRJ, 0, VRJ, 0, VRJ,
- 0, VRJ, 0, VRJ, 0, VRJ, 0, VRJ, 0, VRJ, 0, VRJ, 0, VRJ, 0, VRJ },
- { BBJ, BBJ, BBJ, BBJ, BBJ, BBJ, BBJ, BBJ,
- BBJ, BBJ, BBJ, BBJ, BBJ, BBJ, BBJ, BBJ },
- { BGJ, BGJ, BGJ, BGJ, BGJ, BGJ, BGJ, BGJ,
- BGJ, BGJ, BGJ, BGJ, BGJ, BGJ, BGJ, BGJ },
- { BRJ, BRJ, BRJ, BRJ, BRJ, BRJ, BRJ, BRJ,
- BRJ, BRJ, BRJ, BRJ, BRJ, BRJ, BRJ, BRJ },
- { YGJ, YGJ, YGJ, YGJ, YGJ, YGJ, YGJ, YGJ,
- YGJ, YGJ, YGJ, YGJ, YGJ, YGJ, YGJ, YGJ }
-};
-
-#undef YGJ
-#undef YGBJ
-#undef UBJ
-#undef UGJ
-#undef VGJ
-#undef VRJ
-#undef BBJ
-#undef BGJ
-#undef BRJ
-
-// Read 8 UV from 411
-#define READYUV444 \
- "movq " MEMACCESS([u_buf]) ",%%xmm0 \n" \
- MEMOPREG(movq, 0x00, [u_buf], [v_buf], 1, xmm1) \
- "lea " MEMLEA(0x8, [u_buf]) ",%[u_buf] \n" \
- "punpcklbw %%xmm1,%%xmm0 \n"
-
-// Read 4 UV from 422, upsample to 8 UV
-#define READYUV422 \
- "movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \
- MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \
- "lea " MEMLEA(0x4, [u_buf]) ",%[u_buf] \n" \
- "punpcklbw %%xmm1,%%xmm0 \n" \
- "punpcklwd %%xmm0,%%xmm0 \n"
-
-// Read 2 UV from 411, upsample to 8 UV
-#define READYUV411 \
- "movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \
- MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \
- "lea " MEMLEA(0x2, [u_buf]) ",%[u_buf] \n" \
- "punpcklbw %%xmm1,%%xmm0 \n" \
- "punpcklwd %%xmm0,%%xmm0 \n" \
- "punpckldq %%xmm0,%%xmm0 \n"
-
-// Read 4 UV from NV12, upsample to 8 UV
-#define READNV12 \
- "movq " MEMACCESS([uv_buf]) ",%%xmm0 \n" \
- "lea " MEMLEA(0x8, [uv_buf]) ",%[uv_buf] \n" \
- "punpcklwd %%xmm0,%%xmm0 \n"
-
-// Convert 8 pixels: 8 UV and 8 Y
-#define YUVTORGB(YuvConstants) \
- "movdqa %%xmm0,%%xmm1 \n" \
- "movdqa %%xmm0,%%xmm2 \n" \
- "movdqa %%xmm0,%%xmm3 \n" \
- "movdqa " MEMACCESS2(96, [YuvConstants]) ",%%xmm0 \n" \
- "pmaddubsw " MEMACCESS([YuvConstants]) ",%%xmm1 \n" \
- "psubw %%xmm1,%%xmm0 \n" \
- "movdqa " MEMACCESS2(128, [YuvConstants]) ",%%xmm1 \n" \
- "pmaddubsw " MEMACCESS2(32, [YuvConstants]) ",%%xmm2 \n" \
- "psubw %%xmm2,%%xmm1 \n" \
- "movdqa " MEMACCESS2(160, [YuvConstants]) ",%%xmm2 \n" \
- "pmaddubsw " MEMACCESS2(64, [YuvConstants]) ",%%xmm3 \n" \
- "psubw %%xmm3,%%xmm2 \n" \
- "movq " MEMACCESS([y_buf]) ",%%xmm3 \n" \
- "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" \
- "punpcklbw %%xmm3,%%xmm3 \n" \
- "pmulhuw " MEMACCESS2(192, [YuvConstants]) ",%%xmm3 \n" \
- "paddsw %%xmm3,%%xmm0 \n" \
- "paddsw %%xmm3,%%xmm1 \n" \
- "paddsw %%xmm3,%%xmm2 \n" \
- "psraw $0x6,%%xmm0 \n" \
- "psraw $0x6,%%xmm1 \n" \
- "psraw $0x6,%%xmm2 \n" \
- "packuswb %%xmm0,%%xmm0 \n" \
- "packuswb %%xmm1,%%xmm1 \n" \
- "packuswb %%xmm2,%%xmm2 \n"
-
-// Store 8 ARGB values. Assumes XMM5 is zero.
-#define STOREARGB \
- "punpcklbw %%xmm1,%%xmm0 \n" \
- "punpcklbw %%xmm5,%%xmm2 \n" \
- "movdqa %%xmm0,%%xmm1 \n" \
- "punpcklwd %%xmm2,%%xmm0 \n" \
- "punpckhwd %%xmm2,%%xmm1 \n" \
- "movdqu %%xmm0," MEMACCESS([dst_argb]) " \n" \
- "movdqu %%xmm1," MEMACCESS2(0x10, [dst_argb]) " \n" \
- "lea " MEMLEA(0x20, [dst_argb]) ", %[dst_argb] \n"
-
-// Store 8 BGRA values. Assumes XMM5 is zero.
-#define STOREBGRA \
- "pcmpeqb %%xmm5,%%xmm5 \n" \
- "punpcklbw %%xmm0,%%xmm1 \n" \
- "punpcklbw %%xmm2,%%xmm5 \n" \
- "movdqa %%xmm5,%%xmm0 \n" \
- "punpcklwd %%xmm1,%%xmm5 \n" \
- "punpckhwd %%xmm1,%%xmm0 \n" \
- "movdqu %%xmm5," MEMACCESS([dst_bgra]) " \n" \
- "movdqu %%xmm0," MEMACCESS2(0x10, [dst_bgra]) " \n" \
- "lea " MEMLEA(0x20, [dst_bgra]) ", %[dst_bgra] \n"
-
-// Store 8 ABGR values. Assumes XMM5 is zero.
-#define STOREABGR \
- "punpcklbw %%xmm1,%%xmm2 \n" \
- "punpcklbw %%xmm5,%%xmm0 \n" \
- "movdqa %%xmm2,%%xmm1 \n" \
- "punpcklwd %%xmm0,%%xmm2 \n" \
- "punpckhwd %%xmm0,%%xmm1 \n" \
- "movdqu %%xmm2," MEMACCESS([dst_abgr]) " \n" \
- "movdqu %%xmm1," MEMACCESS2(0x10, [dst_abgr]) " \n" \
- "lea " MEMLEA(0x20, [dst_abgr]) ", %[dst_abgr] \n"
-
-// Store 8 RGBA values. Assumes XMM5 is zero.
-#define STORERGBA \
- "pcmpeqb %%xmm5,%%xmm5 \n" \
- "punpcklbw %%xmm2,%%xmm1 \n" \
- "punpcklbw %%xmm0,%%xmm5 \n" \
- "movdqa %%xmm5,%%xmm0 \n" \
- "punpcklwd %%xmm1,%%xmm5 \n" \
- "punpckhwd %%xmm1,%%xmm0 \n" \
- "movdqu %%xmm5," MEMACCESS([dst_rgba]) " \n" \
- "movdqu %%xmm0," MEMACCESS2(0x10, [dst_rgba]) " \n" \
- "lea " MEMLEA(0x20, [dst_rgba]) ",%[dst_rgba] \n"
-
-void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- int width) {
- asm volatile (
- "sub %[u_buf],%[v_buf] \n"
- "pcmpeqb %%xmm5,%%xmm5 \n"
- LABELALIGN
- "1: \n"
- READYUV444
- YUVTORGB(kYuvConstants)
- STOREARGB
- "sub $0x8,%[width] \n"
- "jg 1b \n"
- : [y_buf]"+r"(y_buf), // %[y_buf]
- [u_buf]"+r"(u_buf), // %[u_buf]
- [v_buf]"+r"(v_buf), // %[v_buf]
- [dst_argb]"+r"(dst_argb), // %[dst_argb]
- [width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
- );
-}
-
-// TODO(fbarchard): Consider putting masks into constants.
-void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_rgb24,
- int width) {
- asm volatile (
- "movdqa %[kShuffleMaskARGBToRGB24_0],%%xmm5 \n"
- "movdqa %[kShuffleMaskARGBToRGB24],%%xmm6 \n"
- "sub %[u_buf],%[v_buf] \n"
- LABELALIGN
- "1: \n"
- READYUV422
- YUVTORGB(kYuvConstants)
- "punpcklbw %%xmm1,%%xmm0 \n"
- "punpcklbw %%xmm2,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklwd %%xmm2,%%xmm0 \n"
- "punpckhwd %%xmm2,%%xmm1 \n"
- "pshufb %%xmm5,%%xmm0 \n"
- "pshufb %%xmm6,%%xmm1 \n"
- "palignr $0xc,%%xmm0,%%xmm1 \n"
- "movq %%xmm0," MEMACCESS([dst_rgb24]) "\n"
- "movdqu %%xmm1," MEMACCESS2(0x8,[dst_rgb24]) "\n"
- "lea " MEMLEA(0x18,[dst_rgb24]) ",%[dst_rgb24] \n"
- "subl $0x8,%[width] \n"
- "jg 1b \n"
- : [y_buf]"+r"(y_buf), // %[y_buf]
- [u_buf]"+r"(u_buf), // %[u_buf]
- [v_buf]"+r"(v_buf), // %[v_buf]
- [dst_rgb24]"+r"(dst_rgb24), // %[dst_rgb24]
-// TODO(fbarchard): Make width a register for 32 bit.
-#if defined(__i386__) && defined(__pic__)
- [width]"+m"(width) // %[width]
-#else
- [width]"+rm"(width) // %[width]
-#endif
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB),
- [kShuffleMaskARGBToRGB24_0]"m"(kShuffleMaskARGBToRGB24_0),
- [kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24)
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm5", "xmm6"
- );
-}
-
-void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_raw,
- int width) {
- asm volatile (
- "movdqa %[kShuffleMaskARGBToRAW_0],%%xmm5 \n"
- "movdqa %[kShuffleMaskARGBToRAW],%%xmm6 \n"
- "sub %[u_buf],%[v_buf] \n"
- LABELALIGN
- "1: \n"
- READYUV422
- YUVTORGB(kYuvConstants)
- "punpcklbw %%xmm1,%%xmm0 \n"
- "punpcklbw %%xmm2,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklwd %%xmm2,%%xmm0 \n"
- "punpckhwd %%xmm2,%%xmm1 \n"
- "pshufb %%xmm5,%%xmm0 \n"
- "pshufb %%xmm6,%%xmm1 \n"
- "palignr $0xc,%%xmm0,%%xmm1 \n"
- "movq %%xmm0," MEMACCESS([dst_raw]) " \n"
- "movdqu %%xmm1," MEMACCESS2(0x8,[dst_raw]) "\n"
- "lea " MEMLEA(0x18,[dst_raw]) ",%[dst_raw] \n"
- "subl $0x8,%[width] \n"
- "jg 1b \n"
- : [y_buf]"+r"(y_buf), // %[y_buf]
- [u_buf]"+r"(u_buf), // %[u_buf]
- [v_buf]"+r"(v_buf), // %[v_buf]
- [dst_raw]"+r"(dst_raw), // %[dst_raw]
-// TODO(fbarchard): Make width a register for 32 bit.
-#if defined(__i386__) && defined(__pic__)
- [width]"+m"(width) // %[width]
-#else
- [width]"+rm"(width) // %[width]
-#endif
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB),
- [kShuffleMaskARGBToRAW_0]"m"(kShuffleMaskARGBToRAW_0),
- [kShuffleMaskARGBToRAW]"m"(kShuffleMaskARGBToRAW)
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm5", "xmm6"
- );
-}
-
-void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- int width) {
- asm volatile (
- "sub %[u_buf],%[v_buf] \n"
- "pcmpeqb %%xmm5,%%xmm5 \n"
- LABELALIGN
- "1: \n"
- READYUV422
- YUVTORGB(kYuvConstants)
- STOREARGB
- "sub $0x8,%[width] \n"
- "jg 1b \n"
- : [y_buf]"+r"(y_buf), // %[y_buf]
- [u_buf]"+r"(u_buf), // %[u_buf]
- [v_buf]"+r"(v_buf), // %[v_buf]
- [dst_argb]"+r"(dst_argb), // %[dst_argb]
- [width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
- );
-}
-
-void OMITFP J422ToARGBRow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- int width) {
- asm volatile (
- "sub %[u_buf],%[v_buf] \n"
- "pcmpeqb %%xmm5,%%xmm5 \n"
- LABELALIGN
- "1: \n"
- READYUV422
- YUVTORGB(kYuvConstants)
- STOREARGB
- "sub $0x8,%[width] \n"
- "jg 1b \n"
- : [y_buf]"+r"(y_buf), // %[y_buf]
- [u_buf]"+r"(u_buf), // %[u_buf]
- [v_buf]"+r"(v_buf), // %[v_buf]
- [dst_argb]"+r"(dst_argb), // %[dst_argb]
- [width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvJConstants.kUVToB) // %[kYuvConstants]
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
- );
-}
-
-void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- int width) {
- asm volatile (
- "sub %[u_buf],%[v_buf] \n"
- "pcmpeqb %%xmm5,%%xmm5 \n"
- LABELALIGN
- "1: \n"
- READYUV411
- YUVTORGB(kYuvConstants)
- STOREARGB
- "sub $0x8,%[width] \n"
- "jg 1b \n"
- : [y_buf]"+r"(y_buf), // %[y_buf]
- [u_buf]"+r"(u_buf), // %[u_buf]
- [v_buf]"+r"(v_buf), // %[v_buf]
- [dst_argb]"+r"(dst_argb), // %[dst_argb]
- [width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
- );
-}
-
-void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf,
- const uint8* uv_buf,
- uint8* dst_argb,
- int width) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- LABELALIGN
- "1: \n"
- READNV12
- YUVTORGB(kYuvConstants)
- STOREARGB
- "sub $0x8,%[width] \n"
- "jg 1b \n"
- : [y_buf]"+r"(y_buf), // %[y_buf]
- [uv_buf]"+r"(uv_buf), // %[uv_buf]
- [dst_argb]"+r"(dst_argb), // %[dst_argb]
- [width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
- // Does not use r14.
- : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
- );
-}
-
-void OMITFP NV21ToARGBRow_SSSE3(const uint8* y_buf,
- const uint8* uv_buf,
- uint8* dst_argb,
- int width) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- LABELALIGN
- "1: \n"
- READNV12
- YUVTORGB(kYuvConstants)
- STOREARGB
- "sub $0x8,%[width] \n"
- "jg 1b \n"
- : [y_buf]"+r"(y_buf), // %[y_buf]
- [uv_buf]"+r"(uv_buf), // %[uv_buf]
- [dst_argb]"+r"(dst_argb), // %[dst_argb]
- [width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYvuConstants.kUVToB) // %[kYuvConstants]
- // Does not use r14.
- : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
- );
-}
-
-void OMITFP I422ToBGRARow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_bgra,
- int width) {
- asm volatile (
- "sub %[u_buf],%[v_buf] \n"
- "pcmpeqb %%xmm5,%%xmm5 \n"
- LABELALIGN
- "1: \n"
- READYUV422
- YUVTORGB(kYuvConstants)
- STOREBGRA
- "sub $0x8,%[width] \n"
- "jg 1b \n"
- : [y_buf]"+r"(y_buf), // %[y_buf]
- [u_buf]"+r"(u_buf), // %[u_buf]
- [v_buf]"+r"(v_buf), // %[v_buf]
- [dst_bgra]"+r"(dst_bgra), // %[dst_bgra]
- [width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
- );
-}
-
-void OMITFP I422ToABGRRow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_abgr,
- int width) {
- asm volatile (
- "sub %[u_buf],%[v_buf] \n"
- "pcmpeqb %%xmm5,%%xmm5 \n"
- LABELALIGN
- "1: \n"
- READYUV422
- YUVTORGB(kYuvConstants)
- STOREABGR
- "sub $0x8,%[width] \n"
- "jg 1b \n"
- : [y_buf]"+r"(y_buf), // %[y_buf]
- [u_buf]"+r"(u_buf), // %[u_buf]
- [v_buf]"+r"(v_buf), // %[v_buf]
- [dst_abgr]"+r"(dst_abgr), // %[dst_abgr]
- [width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
- );
-}
-
-void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_rgba,
- int width) {
- asm volatile (
- "sub %[u_buf],%[v_buf] \n"
- "pcmpeqb %%xmm5,%%xmm5 \n"
- LABELALIGN
- "1: \n"
- READYUV422
- YUVTORGB(kYuvConstants)
- STORERGBA
- "sub $0x8,%[width] \n"
- "jg 1b \n"
- : [y_buf]"+r"(y_buf), // %[y_buf]
- [u_buf]"+r"(u_buf), // %[u_buf]
- [v_buf]"+r"(v_buf), // %[v_buf]
- [dst_rgba]"+r"(dst_rgba), // %[dst_rgba]
- [width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
- );
-}
-
-#endif // HAS_I422TOARGBROW_SSSE3
-
-// Read 8 UV from 422, upsample to 16 UV.
-#define READYUV422_AVX2 \
- "vmovq " MEMACCESS([u_buf]) ",%%xmm0 \n" \
- MEMOPREG(vmovq, 0x00, [u_buf], [v_buf], 1, xmm1) \
- "lea " MEMLEA(0x8, [u_buf]) ",%[u_buf] \n" \
- "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \
- "vpermq $0xd8,%%ymm0,%%ymm0 \n" \
- "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n"
-
-// Convert 16 pixels: 16 UV and 16 Y.
-#define YUVTORGB_AVX2(YuvConstants) \
- "vpmaddubsw " MEMACCESS2(64, [YuvConstants]) ",%%ymm0,%%ymm2 \n" \
- "vpmaddubsw " MEMACCESS2(32, [YuvConstants]) ",%%ymm0,%%ymm1 \n" \
- "vpmaddubsw " MEMACCESS([YuvConstants]) ",%%ymm0,%%ymm0 \n" \
- "vmovdqu " MEMACCESS2(160, [YuvConstants]) ",%%ymm3 \n" \
- "vpsubw %%ymm2,%%ymm3,%%ymm2 \n" \
- "vmovdqu " MEMACCESS2(128, [YuvConstants]) ",%%ymm3 \n" \
- "vpsubw %%ymm1,%%ymm3,%%ymm1 \n" \
- "vmovdqu " MEMACCESS2(96, [YuvConstants]) ",%%ymm3 \n" \
- "vpsubw %%ymm0,%%ymm3,%%ymm0 \n" \
- "vmovdqu " MEMACCESS([y_buf]) ",%%xmm3 \n" \
- "lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n" \
- "vpermq $0xd8,%%ymm3,%%ymm3 \n" \
- "vpunpcklbw %%ymm3,%%ymm3,%%ymm3 \n" \
- "vpmulhuw " MEMACCESS2(192, [YuvConstants]) ",%%ymm3,%%ymm3 \n" \
- "vpaddsw %%ymm3,%%ymm0,%%ymm0 \n" \
- "vpaddsw %%ymm3,%%ymm1,%%ymm1 \n" \
- "vpaddsw %%ymm3,%%ymm2,%%ymm2 \n" \
- "vpsraw $0x6,%%ymm0,%%ymm0 \n" \
- "vpsraw $0x6,%%ymm1,%%ymm1 \n" \
- "vpsraw $0x6,%%ymm2,%%ymm2 \n" \
- "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" \
- "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n" \
- "vpackuswb %%ymm2,%%ymm2,%%ymm2 \n"
-
-#if defined(HAS_I422TOBGRAROW_AVX2)
-// 16 pixels
-// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 BGRA (64 bytes).
-void OMITFP I422ToBGRARow_AVX2(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_bgra,
- int width) {
- asm volatile (
- "sub %[u_buf],%[v_buf] \n"
- "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
- LABELALIGN
- "1: \n"
- READYUV422_AVX2
- YUVTORGB_AVX2(kYuvConstants)
-
- // Step 3: Weave into BGRA
- "vpunpcklbw %%ymm0,%%ymm1,%%ymm1 \n" // GB
- "vpermq $0xd8,%%ymm1,%%ymm1 \n"
- "vpunpcklbw %%ymm2,%%ymm5,%%ymm2 \n" // AR
- "vpermq $0xd8,%%ymm2,%%ymm2 \n"
- "vpunpcklwd %%ymm1,%%ymm2,%%ymm0 \n" // ARGB first 8 pixels
- "vpunpckhwd %%ymm1,%%ymm2,%%ymm2 \n" // ARGB next 8 pixels
-
- "vmovdqu %%ymm0," MEMACCESS([dst_bgra]) "\n"
- "vmovdqu %%ymm2," MEMACCESS2(0x20,[dst_bgra]) "\n"
- "lea " MEMLEA(0x40,[dst_bgra]) ",%[dst_bgra] \n"
- "sub $0x10,%[width] \n"
- "jg 1b \n"
- "vzeroupper \n"
- : [y_buf]"+r"(y_buf), // %[y_buf]
- [u_buf]"+r"(u_buf), // %[u_buf]
- [v_buf]"+r"(v_buf), // %[v_buf]
- [dst_bgra]"+r"(dst_bgra), // %[dst_bgra]
- [width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
- );
-}
-#endif // HAS_I422TOBGRAROW_AVX2
-
-#if defined(HAS_I422TOARGBROW_AVX2)
-// 16 pixels
-// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
-void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- int width) {
- asm volatile (
- "sub %[u_buf],%[v_buf] \n"
- "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
- LABELALIGN
- "1: \n"
- READYUV422_AVX2
- YUVTORGB_AVX2(kYuvConstants)
-
- // Step 3: Weave into ARGB
- "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" // BG
- "vpermq $0xd8,%%ymm0,%%ymm0 \n"
- "vpunpcklbw %%ymm5,%%ymm2,%%ymm2 \n" // RA
- "vpermq $0xd8,%%ymm2,%%ymm2 \n"
- "vpunpcklwd %%ymm2,%%ymm0,%%ymm1 \n" // BGRA first 8 pixels
- "vpunpckhwd %%ymm2,%%ymm0,%%ymm0 \n" // BGRA next 8 pixels
-
- "vmovdqu %%ymm1," MEMACCESS([dst_argb]) "\n"
- "vmovdqu %%ymm0," MEMACCESS2(0x20,[dst_argb]) "\n"
- "lea " MEMLEA(0x40,[dst_argb]) ",%[dst_argb] \n"
- "sub $0x10,%[width] \n"
- "jg 1b \n"
- "vzeroupper \n"
- : [y_buf]"+r"(y_buf), // %[y_buf]
- [u_buf]"+r"(u_buf), // %[u_buf]
- [v_buf]"+r"(v_buf), // %[v_buf]
- [dst_argb]"+r"(dst_argb), // %[dst_argb]
- [width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
- );
-}
-#endif // HAS_I422TOARGBROW_AVX2
-
-#if defined(HAS_J422TOARGBROW_AVX2)
-// 16 pixels
-// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
-void OMITFP J422ToARGBRow_AVX2(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- int width) {
- asm volatile (
- "sub %[u_buf],%[v_buf] \n"
- "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
- LABELALIGN
- "1: \n"
- READYUV422_AVX2
- YUVTORGB_AVX2(kYuvConstants)
-
- // Step 3: Weave into ARGB
- "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" // BG
- "vpermq $0xd8,%%ymm0,%%ymm0 \n"
- "vpunpcklbw %%ymm5,%%ymm2,%%ymm2 \n" // RA
- "vpermq $0xd8,%%ymm2,%%ymm2 \n"
- "vpunpcklwd %%ymm2,%%ymm0,%%ymm1 \n" // BGRA first 8 pixels
- "vpunpckhwd %%ymm2,%%ymm0,%%ymm0 \n" // BGRA next 8 pixels
-
- "vmovdqu %%ymm1," MEMACCESS([dst_argb]) "\n"
- "vmovdqu %%ymm0," MEMACCESS2(0x20,[dst_argb]) "\n"
- "lea " MEMLEA(0x40,[dst_argb]) ",%[dst_argb] \n"
- "sub $0x10,%[width] \n"
- "jg 1b \n"
- "vzeroupper \n"
- : [y_buf]"+r"(y_buf), // %[y_buf]
- [u_buf]"+r"(u_buf), // %[u_buf]
- [v_buf]"+r"(v_buf), // %[v_buf]
- [dst_argb]"+r"(dst_argb), // %[dst_argb]
- [width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvJConstants.kUVToB) // %[kYuvConstants]
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
- );
-}
-#endif // HAS_J422TOARGBROW_AVX2
-
-#if defined(HAS_I422TOABGRROW_AVX2)
-// 16 pixels
-// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes).
-void OMITFP I422ToABGRRow_AVX2(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- int width) {
- asm volatile (
- "sub %[u_buf],%[v_buf] \n"
- "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
- LABELALIGN
- "1: \n"
- READYUV422_AVX2
- YUVTORGB_AVX2(kYuvConstants)
-
- // Step 3: Weave into ABGR
- "vpunpcklbw %%ymm1,%%ymm2,%%ymm1 \n" // RG
- "vpermq $0xd8,%%ymm1,%%ymm1 \n"
- "vpunpcklbw %%ymm5,%%ymm0,%%ymm2 \n" // BA
- "vpermq $0xd8,%%ymm2,%%ymm2 \n"
- "vpunpcklwd %%ymm2,%%ymm1,%%ymm0 \n" // RGBA first 8 pixels
- "vpunpckhwd %%ymm2,%%ymm1,%%ymm1 \n" // RGBA next 8 pixels
- "vmovdqu %%ymm0," MEMACCESS([dst_argb]) "\n"
- "vmovdqu %%ymm1," MEMACCESS2(0x20,[dst_argb]) "\n"
- "lea " MEMLEA(0x40,[dst_argb]) ",%[dst_argb] \n"
- "sub $0x10,%[width] \n"
- "jg 1b \n"
- "vzeroupper \n"
- : [y_buf]"+r"(y_buf), // %[y_buf]
- [u_buf]"+r"(u_buf), // %[u_buf]
- [v_buf]"+r"(v_buf), // %[v_buf]
- [dst_argb]"+r"(dst_argb), // %[dst_argb]
- [width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
- );
-}
-#endif // HAS_I422TOABGRROW_AVX2
-
-#if defined(HAS_I422TORGBAROW_AVX2)
-// 16 pixels
-// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes).
-void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- int width) {
- asm volatile (
- "sub %[u_buf],%[v_buf] \n"
- "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
- LABELALIGN
- "1: \n"
- READYUV422_AVX2
- YUVTORGB_AVX2(kYuvConstants)
-
- // Step 3: Weave into RGBA
- "vpunpcklbw %%ymm2,%%ymm1,%%ymm1 \n"
- "vpermq $0xd8,%%ymm1,%%ymm1 \n"
- "vpunpcklbw %%ymm0,%%ymm5,%%ymm2 \n"
- "vpermq $0xd8,%%ymm2,%%ymm2 \n"
- "vpunpcklwd %%ymm1,%%ymm2,%%ymm0 \n"
- "vpunpckhwd %%ymm1,%%ymm2,%%ymm1 \n"
- "vmovdqu %%ymm0," MEMACCESS([dst_argb]) "\n"
- "vmovdqu %%ymm1," MEMACCESS2(0x20,[dst_argb]) "\n"
- "lea " MEMLEA(0x40,[dst_argb]) ",%[dst_argb] \n"
- "sub $0x10,%[width] \n"
- "jg 1b \n"
- "vzeroupper \n"
- : [y_buf]"+r"(y_buf), // %[y_buf]
- [u_buf]"+r"(u_buf), // %[u_buf]
- [v_buf]"+r"(v_buf), // %[v_buf]
- [dst_argb]"+r"(dst_argb), // %[dst_argb]
- [width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
- );
-}
-#endif // HAS_I422TORGBAROW_AVX2
-
-#ifdef HAS_I400TOARGBROW_SSE2
-void I400ToARGBRow_SSE2(const uint8* y_buf, uint8* dst_argb, int width) {
- asm volatile (
- "mov $0x4a354a35,%%eax \n" // 4a35 = 18997 = 1.164
- "movd %%eax,%%xmm2 \n"
- "pshufd $0x0,%%xmm2,%%xmm2 \n"
- "mov $0x04880488,%%eax \n" // 0488 = 1160 = 1.164 * 16
- "movd %%eax,%%xmm3 \n"
- "pshufd $0x0,%%xmm3,%%xmm3 \n"
- "pcmpeqb %%xmm4,%%xmm4 \n"
- "pslld $0x18,%%xmm4 \n"
- LABELALIGN
- "1: \n"
- // Step 1: Scale Y contribution to 8 G values. G = (y - 16) * 1.164
- "movq " MEMACCESS(0) ",%%xmm0 \n"
- "lea " MEMLEA(0x8,0) ",%0 \n"
- "punpcklbw %%xmm0,%%xmm0 \n"
- "pmulhuw %%xmm2,%%xmm0 \n"
- "psubusw %%xmm3,%%xmm0 \n"
- "psrlw $6, %%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
-
- // Step 2: Weave into ARGB
- "punpcklbw %%xmm0,%%xmm0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklwd %%xmm0,%%xmm0 \n"
- "punpckhwd %%xmm1,%%xmm1 \n"
- "por %%xmm4,%%xmm0 \n"
- "por %%xmm4,%%xmm1 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
-
- "sub $0x8,%2 \n"
- "jg 1b \n"
- : "+r"(y_buf), // %0
- "+r"(dst_argb), // %1
- "+rm"(width) // %2
- :
- : "memory", "cc", "eax"
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
- );
-}
-#endif // HAS_I400TOARGBROW_SSE2
-
-#ifdef HAS_I400TOARGBROW_AVX2
-// 16 pixels of Y converted to 16 pixels of ARGB (64 bytes).
-// note: vpunpcklbw mutates and vpackuswb unmutates.
-void I400ToARGBRow_AVX2(const uint8* y_buf, uint8* dst_argb, int width) {
- asm volatile (
- "mov $0x4a354a35,%%eax \n" // 0488 = 1160 = 1.164 * 16
- "vmovd %%eax,%%xmm2 \n"
- "vbroadcastss %%xmm2,%%ymm2 \n"
- "mov $0x4880488,%%eax \n" // 4a35 = 18997 = 1.164
- "vmovd %%eax,%%xmm3 \n"
- "vbroadcastss %%xmm3,%%ymm3 \n"
- "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n"
- "vpslld $0x18,%%ymm4,%%ymm4 \n"
-
- LABELALIGN
- "1: \n"
- // Step 1: Scale Y contribution to 16 G values. G = (y - 16) * 1.164
- "vmovdqu " MEMACCESS(0) ",%%xmm0 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "vpermq $0xd8,%%ymm0,%%ymm0 \n"
- "vpunpcklbw %%ymm0,%%ymm0,%%ymm0 \n"
- "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n"
- "vpsubusw %%ymm3,%%ymm0,%%ymm0 \n"
- "vpsrlw $0x6,%%ymm0,%%ymm0 \n"
- "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n"
- "vpunpcklbw %%ymm0,%%ymm0,%%ymm1 \n"
- "vpermq $0xd8,%%ymm1,%%ymm1 \n"
- "vpunpcklwd %%ymm1,%%ymm1,%%ymm0 \n"
- "vpunpckhwd %%ymm1,%%ymm1,%%ymm1 \n"
- "vpor %%ymm4,%%ymm0,%%ymm0 \n"
- "vpor %%ymm4,%%ymm1,%%ymm1 \n"
- "vmovdqu %%ymm0," MEMACCESS(1) " \n"
- "vmovdqu %%ymm1," MEMACCESS2(0x20,1) " \n"
- "lea " MEMLEA(0x40,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(y_buf), // %0
- "+r"(dst_argb), // %1
- "+rm"(width) // %2
- :
- : "memory", "cc", "eax"
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
- );
-}
-#endif // HAS_I400TOARGBROW_AVX2
-
-#ifdef HAS_MIRRORROW_SSSE3
-// Shuffle table for reversing the bytes.
-static uvec8 kShuffleMirror = {
- 15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u
-};
-
-void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
- intptr_t temp_width = (intptr_t)(width);
- asm volatile (
- "movdqa %3,%%xmm5 \n"
- LABELALIGN
- "1: \n"
- MEMOPREG(movdqu,-0x10,0,2,1,xmm0) // movdqu -0x10(%0,%2),%%xmm0
- "pshufb %%xmm5,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(temp_width) // %2
- : "m"(kShuffleMirror) // %3
- : "memory", "cc", NACL_R14
- "xmm0", "xmm5"
- );
-}
-#endif // HAS_MIRRORROW_SSSE3
-
-#ifdef HAS_MIRRORROW_AVX2
-void MirrorRow_AVX2(const uint8* src, uint8* dst, int width) {
- intptr_t temp_width = (intptr_t)(width);
- asm volatile (
- "vbroadcastf128 %3,%%ymm5 \n"
- LABELALIGN
- "1: \n"
- MEMOPREG(vmovdqu,-0x20,0,2,1,ymm0) // vmovdqu -0x20(%0,%2),%%ymm0
- "vpshufb %%ymm5,%%ymm0,%%ymm0 \n"
- "vpermq $0x4e,%%ymm0,%%ymm0 \n"
- "vmovdqu %%ymm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x20,%2 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(temp_width) // %2
- : "m"(kShuffleMirror) // %3
- : "memory", "cc", NACL_R14
- "xmm0", "xmm5"
- );
-}
-#endif // HAS_MIRRORROW_AVX2
-
-#ifdef HAS_MIRRORROW_SSE2
-void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) {
- intptr_t temp_width = (intptr_t)(width);
- asm volatile (
- LABELALIGN
- "1: \n"
- MEMOPREG(movdqu,-0x10,0,2,1,xmm0) // movdqu -0x10(%0,%2),%%xmm0
- "movdqa %%xmm0,%%xmm1 \n"
- "psllw $0x8,%%xmm0 \n"
- "psrlw $0x8,%%xmm1 \n"
- "por %%xmm1,%%xmm0 \n"
- "pshuflw $0x1b,%%xmm0,%%xmm0 \n"
- "pshufhw $0x1b,%%xmm0,%%xmm0 \n"
- "pshufd $0x4e,%%xmm0,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1)",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(temp_width) // %2
- :
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1"
- );
-}
-#endif // HAS_MIRRORROW_SSE2
-
-#ifdef HAS_MIRRORROW_UV_SSSE3
-// Shuffle table for reversing the bytes of UV channels.
-static uvec8 kShuffleMirrorUV = {
- 14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u, 15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u
-};
-void MirrorUVRow_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v,
- int width) {
- intptr_t temp_width = (intptr_t)(width);
- asm volatile (
- "movdqa %4,%%xmm1 \n"
- "lea " MEMLEA4(-0x10,0,3,2) ",%0 \n"
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "lea " MEMLEA(-0x10,0) ",%0 \n"
- "pshufb %%xmm1,%%xmm0 \n"
- "movlpd %%xmm0," MEMACCESS(1) " \n"
- MEMOPMEM(movhpd,xmm0,0x00,1,2,1) // movhpd %%xmm0,(%1,%2)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $8,%3 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(temp_width) // %3
- : "m"(kShuffleMirrorUV) // %4
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1"
- );
-}
-#endif // HAS_MIRRORROW_UV_SSSE3
-
-#ifdef HAS_ARGBMIRRORROW_SSE2
-
-void ARGBMirrorRow_SSE2(const uint8* src, uint8* dst, int width) {
- intptr_t temp_width = (intptr_t)(width);
- asm volatile (
- "lea " MEMLEA4(-0x10,0,2,4) ",%0 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "pshufd $0x1b,%%xmm0,%%xmm0 \n"
- "lea " MEMLEA(-0x10,0) ",%0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x4,%2 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(temp_width) // %2
- :
- : "memory", "cc"
- , "xmm0"
- );
-}
-#endif // HAS_ARGBMIRRORROW_SSE2
-
-#ifdef HAS_ARGBMIRRORROW_AVX2
-// Shuffle table for reversing the bytes.
-static const ulvec32 kARGBShuffleMirror_AVX2 = {
- 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u
-};
-void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width) {
- intptr_t temp_width = (intptr_t)(width);
- asm volatile (
- "vmovdqu %3,%%ymm5 \n"
- LABELALIGN
- "1: \n"
- VMEMOPREG(vpermd,-0x20,0,2,4,ymm5,ymm0) // vpermd -0x20(%0,%2,4),ymm5,ymm0
- "vmovdqu %%ymm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x8,%2 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(temp_width) // %2
- : "m"(kARGBShuffleMirror_AVX2) // %3
- : "memory", "cc", NACL_R14
- "xmm0", "xmm5"
- );
-}
-#endif // HAS_ARGBMIRRORROW_AVX2
-
-#ifdef HAS_SPLITUVROW_AVX2
-void SplitUVRow_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
- asm volatile (
- "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
- "vpsrlw $0x8,%%ymm5,%%ymm5 \n"
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
- "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "vpsrlw $0x8,%%ymm0,%%ymm2 \n"
- "vpsrlw $0x8,%%ymm1,%%ymm3 \n"
- "vpand %%ymm5,%%ymm0,%%ymm0 \n"
- "vpand %%ymm5,%%ymm1,%%ymm1 \n"
- "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
- "vpackuswb %%ymm3,%%ymm2,%%ymm2 \n"
- "vpermq $0xd8,%%ymm0,%%ymm0 \n"
- "vpermq $0xd8,%%ymm2,%%ymm2 \n"
- "vmovdqu %%ymm0," MEMACCESS(1) " \n"
- MEMOPMEM(vmovdqu,ymm2,0x00,1,2,1) // vmovdqu %%ymm2,(%1,%2)
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x20,%3 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src_uv), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(pix) // %3
- :
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
- );
-}
-#endif // HAS_SPLITUVROW_AVX2
-
-#ifdef HAS_SPLITUVROW_SSE2
-void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psrlw $0x8,%%xmm5 \n"
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "movdqa %%xmm1,%%xmm3 \n"
- "pand %%xmm5,%%xmm0 \n"
- "pand %%xmm5,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "psrlw $0x8,%%xmm2 \n"
- "psrlw $0x8,%%xmm3 \n"
- "packuswb %%xmm3,%%xmm2 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- MEMOPMEM(movdqu,xmm2,0x00,1,2,1) // movdqu %%xmm2,(%1,%2)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%3 \n"
- "jg 1b \n"
- : "+r"(src_uv), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(pix) // %3
- :
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
- );
-}
-#endif // HAS_SPLITUVROW_SSE2
-
-#ifdef HAS_MERGEUVROW_AVX2
-void MergeUVRow_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
- int width) {
- asm volatile (
- "sub %0,%1 \n"
- LABELALIGN
- "1: \n"
- "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
- MEMOPREG(vmovdqu,0x00,0,1,1,ymm1) // vmovdqu (%0,%1,1),%%ymm1
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "vpunpcklbw %%ymm1,%%ymm0,%%ymm2 \n"
- "vpunpckhbw %%ymm1,%%ymm0,%%ymm0 \n"
- "vextractf128 $0x0,%%ymm2," MEMACCESS(2) " \n"
- "vextractf128 $0x0,%%ymm0," MEMACCESS2(0x10,2) "\n"
- "vextractf128 $0x1,%%ymm2," MEMACCESS2(0x20,2) "\n"
- "vextractf128 $0x1,%%ymm0," MEMACCESS2(0x30,2) "\n"
- "lea " MEMLEA(0x40,2) ",%2 \n"
- "sub $0x20,%3 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src_u), // %0
- "+r"(src_v), // %1
- "+r"(dst_uv), // %2
- "+r"(width) // %3
- :
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2"
- );
-}
-#endif // HAS_MERGEUVROW_AVX2
-
-#ifdef HAS_MERGEUVROW_SSE2
-void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
- int width) {
- asm volatile (
- "sub %0,%1 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- MEMOPREG(movdqu,0x00,0,1,1,xmm1) // movdqu (%0,%1,1),%%xmm1
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "punpcklbw %%xmm1,%%xmm0 \n"
- "punpckhbw %%xmm1,%%xmm2 \n"
- "movdqu %%xmm0," MEMACCESS(2) " \n"
- "movdqu %%xmm2," MEMACCESS2(0x10,2) " \n"
- "lea " MEMLEA(0x20,2) ",%2 \n"
- "sub $0x10,%3 \n"
- "jg 1b \n"
- : "+r"(src_u), // %0
- "+r"(src_v), // %1
- "+r"(dst_uv), // %2
- "+r"(width) // %3
- :
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2"
- );
-}
-#endif // HAS_MERGEUVROW_SSE2
-
-#ifdef HAS_COPYROW_SSE2
-void CopyRow_SSE2(const uint8* src, uint8* dst, int count) {
- asm volatile (
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x20,%2 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(count) // %2
- :
- : "memory", "cc"
- , "xmm0", "xmm1"
- );
-}
-#endif // HAS_COPYROW_SSE2
-
-#ifdef HAS_COPYROW_AVX
-void CopyRow_AVX(const uint8* src, uint8* dst, int count) {
- asm volatile (
- LABELALIGN
- "1: \n"
- "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
- "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "vmovdqu %%ymm0," MEMACCESS(1) " \n"
- "vmovdqu %%ymm1," MEMACCESS2(0x20,1) " \n"
- "lea " MEMLEA(0x40,1) ",%1 \n"
- "sub $0x40,%2 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(count) // %2
- :
- : "memory", "cc"
- , "xmm0", "xmm1"
- );
-}
-#endif // HAS_COPYROW_AVX
-
-#ifdef HAS_COPYROW_ERMS
-// Multiple of 1.
-void CopyRow_ERMS(const uint8* src, uint8* dst, int width) {
- size_t width_tmp = (size_t)(width);
- asm volatile (
- "rep movsb " MEMMOVESTRING(0,1) " \n"
- : "+S"(src), // %0
- "+D"(dst), // %1
- "+c"(width_tmp) // %2
- :
- : "memory", "cc"
- );
-}
-#endif // HAS_COPYROW_ERMS
-
-#ifdef HAS_ARGBCOPYALPHAROW_SSE2
-// width in pixels
-void ARGBCopyAlphaRow_SSE2(const uint8* src, uint8* dst, int width) {
- asm volatile (
- "pcmpeqb %%xmm0,%%xmm0 \n"
- "pslld $0x18,%%xmm0 \n"
- "pcmpeqb %%xmm1,%%xmm1 \n"
- "psrld $0x8,%%xmm1 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm3 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "movdqu " MEMACCESS(1) ",%%xmm4 \n"
- "movdqu " MEMACCESS2(0x10,1) ",%%xmm5 \n"
- "pand %%xmm0,%%xmm2 \n"
- "pand %%xmm0,%%xmm3 \n"
- "pand %%xmm1,%%xmm4 \n"
- "pand %%xmm1,%%xmm5 \n"
- "por %%xmm4,%%xmm2 \n"
- "por %%xmm5,%%xmm3 \n"
- "movdqu %%xmm2," MEMACCESS(1) " \n"
- "movdqu %%xmm3," MEMACCESS2(0x10,1) " \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x8,%2 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(width) // %2
- :
- : "memory", "cc"
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
- );
-}
-#endif // HAS_ARGBCOPYALPHAROW_SSE2
-
-#ifdef HAS_ARGBCOPYALPHAROW_AVX2
-// width in pixels
-void ARGBCopyAlphaRow_AVX2(const uint8* src, uint8* dst, int width) {
- asm volatile (
- "vpcmpeqb %%ymm0,%%ymm0,%%ymm0 \n"
- "vpsrld $0x8,%%ymm0,%%ymm0 \n"
- LABELALIGN
- "1: \n"
- "vmovdqu " MEMACCESS(0) ",%%ymm1 \n"
- "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm2 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "vpblendvb %%ymm0," MEMACCESS(1) ",%%ymm1,%%ymm1 \n"
- "vpblendvb %%ymm0," MEMACCESS2(0x20,1) ",%%ymm2,%%ymm2 \n"
- "vmovdqu %%ymm1," MEMACCESS(1) " \n"
- "vmovdqu %%ymm2," MEMACCESS2(0x20,1) " \n"
- "lea " MEMLEA(0x40,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(width) // %2
- :
- : "memory", "cc"
- , "xmm0", "xmm1", "xmm2"
- );
-}
-#endif // HAS_ARGBCOPYALPHAROW_AVX2
-
-#ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2
-// width in pixels
-void ARGBCopyYToAlphaRow_SSE2(const uint8* src, uint8* dst, int width) {
- asm volatile (
- "pcmpeqb %%xmm0,%%xmm0 \n"
- "pslld $0x18,%%xmm0 \n"
- "pcmpeqb %%xmm1,%%xmm1 \n"
- "psrld $0x8,%%xmm1 \n"
- LABELALIGN
- "1: \n"
- "movq " MEMACCESS(0) ",%%xmm2 \n"
- "lea " MEMLEA(0x8,0) ",%0 \n"
- "punpcklbw %%xmm2,%%xmm2 \n"
- "punpckhwd %%xmm2,%%xmm3 \n"
- "punpcklwd %%xmm2,%%xmm2 \n"
- "movdqu " MEMACCESS(1) ",%%xmm4 \n"
- "movdqu " MEMACCESS2(0x10,1) ",%%xmm5 \n"
- "pand %%xmm0,%%xmm2 \n"
- "pand %%xmm0,%%xmm3 \n"
- "pand %%xmm1,%%xmm4 \n"
- "pand %%xmm1,%%xmm5 \n"
- "por %%xmm4,%%xmm2 \n"
- "por %%xmm5,%%xmm3 \n"
- "movdqu %%xmm2," MEMACCESS(1) " \n"
- "movdqu %%xmm3," MEMACCESS2(0x10,1) " \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x8,%2 \n"
- "jg 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(width) // %2
- :
- : "memory", "cc"
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
- );
-}
-#endif // HAS_ARGBCOPYYTOALPHAROW_SSE2
-
-#ifdef HAS_ARGBCOPYYTOALPHAROW_AVX2
-// width in pixels
-void ARGBCopyYToAlphaRow_AVX2(const uint8* src, uint8* dst, int width) {
- asm volatile (
- "vpcmpeqb %%ymm0,%%ymm0,%%ymm0 \n"
- "vpsrld $0x8,%%ymm0,%%ymm0 \n"
- LABELALIGN
- "1: \n"
- "vpmovzxbd " MEMACCESS(0) ",%%ymm1 \n"
- "vpmovzxbd " MEMACCESS2(0x8,0) ",%%ymm2 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "vpslld $0x18,%%ymm1,%%ymm1 \n"
- "vpslld $0x18,%%ymm2,%%ymm2 \n"
- "vpblendvb %%ymm0," MEMACCESS(1) ",%%ymm1,%%ymm1 \n"
- "vpblendvb %%ymm0," MEMACCESS2(0x20,1) ",%%ymm2,%%ymm2 \n"
- "vmovdqu %%ymm1," MEMACCESS(1) " \n"
- "vmovdqu %%ymm2," MEMACCESS2(0x20,1) " \n"
- "lea " MEMLEA(0x40,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(width) // %2
- :
- : "memory", "cc"
- , "xmm0", "xmm1", "xmm2"
- );
-}
-#endif // HAS_ARGBCOPYYTOALPHAROW_AVX2
-
-#ifdef HAS_SETROW_X86
-void SetRow_X86(uint8* dst, uint8 v8, int width) {
- size_t width_tmp = (size_t)(width >> 2);
- const uint32 v32 = v8 * 0x01010101; // Duplicate byte to all bytes.
- asm volatile (
- "rep stosl " MEMSTORESTRING(eax,0) " \n"
- : "+D"(dst), // %0
- "+c"(width_tmp) // %1
- : "a"(v32) // %2
- : "memory", "cc");
-}
-
-void SetRow_ERMS(uint8* dst, uint8 v8, int width) {
- size_t width_tmp = (size_t)(width);
- asm volatile (
- "rep stosb " MEMSTORESTRING(al,0) " \n"
- : "+D"(dst), // %0
- "+c"(width_tmp) // %1
- : "a"(v8) // %2
- : "memory", "cc");
-}
-
-void ARGBSetRow_X86(uint8* dst_argb, uint32 v32, int width) {
- size_t width_tmp = (size_t)(width);
- asm volatile (
- "rep stosl " MEMSTORESTRING(eax,0) " \n"
- : "+D"(dst_argb), // %0
- "+c"(width_tmp) // %1
- : "a"(v32) // %2
- : "memory", "cc");
-}
-#endif // HAS_SETROW_X86
-
-#ifdef HAS_YUY2TOYROW_SSE2
-void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psrlw $0x8,%%xmm5 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "pand %%xmm5,%%xmm0 \n"
- "pand %%xmm5,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- : "+r"(src_yuy2), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- :
- : "memory", "cc"
- , "xmm0", "xmm1", "xmm5"
- );
-}
-
-void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
- uint8* dst_u, uint8* dst_v, int pix) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psrlw $0x8,%%xmm5 \n"
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- MEMOPREG(movdqu,0x00,0,4,1,xmm2) // movdqu (%0,%4,1),%%xmm2
- MEMOPREG(movdqu,0x10,0,4,1,xmm3) // movdqu 0x10(%0,%4,1),%%xmm3
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "pavgb %%xmm2,%%xmm0 \n"
- "pavgb %%xmm3,%%xmm1 \n"
- "psrlw $0x8,%%xmm0 \n"
- "psrlw $0x8,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "pand %%xmm5,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "psrlw $0x8,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm1 \n"
- "movq %%xmm0," MEMACCESS(1) " \n"
- MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $0x10,%3 \n"
- "jg 1b \n"
- : "+r"(src_yuy2), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(pix) // %3
- : "r"((intptr_t)(stride_yuy2)) // %4
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
- );
-}
-
-void YUY2ToUV422Row_SSE2(const uint8* src_yuy2,
- uint8* dst_u, uint8* dst_v, int pix) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psrlw $0x8,%%xmm5 \n"
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "psrlw $0x8,%%xmm0 \n"
- "psrlw $0x8,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "pand %%xmm5,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "psrlw $0x8,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm1 \n"
- "movq %%xmm0," MEMACCESS(1) " \n"
- MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $0x10,%3 \n"
- "jg 1b \n"
- : "+r"(src_yuy2), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(pix) // %3
- :
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm5"
- );
-}
-
-void UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix) {
- asm volatile (
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "psrlw $0x8,%%xmm0 \n"
- "psrlw $0x8,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- : "+r"(src_uyvy), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- :
- : "memory", "cc"
- , "xmm0", "xmm1"
- );
-}
-
-void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy,
- uint8* dst_u, uint8* dst_v, int pix) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psrlw $0x8,%%xmm5 \n"
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- MEMOPREG(movdqu,0x00,0,4,1,xmm2) // movdqu (%0,%4,1),%%xmm2
- MEMOPREG(movdqu,0x10,0,4,1,xmm3) // movdqu 0x10(%0,%4,1),%%xmm3
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "pavgb %%xmm2,%%xmm0 \n"
- "pavgb %%xmm3,%%xmm1 \n"
- "pand %%xmm5,%%xmm0 \n"
- "pand %%xmm5,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "pand %%xmm5,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "psrlw $0x8,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm1 \n"
- "movq %%xmm0," MEMACCESS(1) " \n"
- MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $0x10,%3 \n"
- "jg 1b \n"
- : "+r"(src_uyvy), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(pix) // %3
- : "r"((intptr_t)(stride_uyvy)) // %4
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
- );
-}
-
-void UYVYToUV422Row_SSE2(const uint8* src_uyvy,
- uint8* dst_u, uint8* dst_v, int pix) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psrlw $0x8,%%xmm5 \n"
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "pand %%xmm5,%%xmm0 \n"
- "pand %%xmm5,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "pand %%xmm5,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "psrlw $0x8,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm1 \n"
- "movq %%xmm0," MEMACCESS(1) " \n"
- MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2)
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $0x10,%3 \n"
- "jg 1b \n"
- : "+r"(src_uyvy), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(pix) // %3
- :
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm5"
- );
-}
-#endif // HAS_YUY2TOYROW_SSE2
-
-#ifdef HAS_YUY2TOYROW_AVX2
-void YUY2ToYRow_AVX2(const uint8* src_yuy2, uint8* dst_y, int pix) {
- asm volatile (
- "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
- "vpsrlw $0x8,%%ymm5,%%ymm5 \n"
- LABELALIGN
- "1: \n"
- "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
- "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "vpand %%ymm5,%%ymm0,%%ymm0 \n"
- "vpand %%ymm5,%%ymm1,%%ymm1 \n"
- "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
- "vpermq $0xd8,%%ymm0,%%ymm0 \n"
- "vmovdqu %%ymm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x20,%2 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src_yuy2), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- :
- : "memory", "cc"
- , "xmm0", "xmm1", "xmm5"
- );
-}
-
-void YUY2ToUVRow_AVX2(const uint8* src_yuy2, int stride_yuy2,
- uint8* dst_u, uint8* dst_v, int pix) {
- asm volatile (
- "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
- "vpsrlw $0x8,%%ymm5,%%ymm5 \n"
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
- "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
- VMEMOPREG(vpavgb,0x00,0,4,1,ymm0,ymm0) // vpavgb (%0,%4,1),%%ymm0,%%ymm0
- VMEMOPREG(vpavgb,0x20,0,4,1,ymm1,ymm1)
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
- "vpsrlw $0x8,%%ymm1,%%ymm1 \n"
- "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
- "vpermq $0xd8,%%ymm0,%%ymm0 \n"
- "vpand %%ymm5,%%ymm0,%%ymm1 \n"
- "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
- "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n"
- "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n"
- "vpermq $0xd8,%%ymm1,%%ymm1 \n"
- "vpermq $0xd8,%%ymm0,%%ymm0 \n"
- "vextractf128 $0x0,%%ymm1," MEMACCESS(1) " \n"
- VEXTOPMEM(vextractf128,0,ymm0,0x00,1,2,1) // vextractf128 $0x0,%%ymm0,(%1,%2,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x20,%3 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src_yuy2), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(pix) // %3
- : "r"((intptr_t)(stride_yuy2)) // %4
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm5"
- );
-}
-
-void YUY2ToUV422Row_AVX2(const uint8* src_yuy2,
- uint8* dst_u, uint8* dst_v, int pix) {
- asm volatile (
- "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
- "vpsrlw $0x8,%%ymm5,%%ymm5 \n"
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
- "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
- "vpsrlw $0x8,%%ymm1,%%ymm1 \n"
- "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
- "vpermq $0xd8,%%ymm0,%%ymm0 \n"
- "vpand %%ymm5,%%ymm0,%%ymm1 \n"
- "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
- "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n"
- "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n"
- "vpermq $0xd8,%%ymm1,%%ymm1 \n"
- "vpermq $0xd8,%%ymm0,%%ymm0 \n"
- "vextractf128 $0x0,%%ymm1," MEMACCESS(1) " \n"
- VEXTOPMEM(vextractf128,0,ymm0,0x00,1,2,1) // vextractf128 $0x0,%%ymm0,(%1,%2,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x20,%3 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src_yuy2), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(pix) // %3
- :
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm5"
- );
-}
-
-void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int pix) {
- asm volatile (
- LABELALIGN
- "1: \n"
- "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
- "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
- "vpsrlw $0x8,%%ymm1,%%ymm1 \n"
- "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
- "vpermq $0xd8,%%ymm0,%%ymm0 \n"
- "vmovdqu %%ymm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x20,%2 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src_uyvy), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- :
- : "memory", "cc"
- , "xmm0", "xmm1", "xmm5"
- );
-}
-void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy,
- uint8* dst_u, uint8* dst_v, int pix) {
- asm volatile (
- "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
- "vpsrlw $0x8,%%ymm5,%%ymm5 \n"
- "sub %1,%2 \n"
-
- LABELALIGN
- "1: \n"
- "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
- "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
- VMEMOPREG(vpavgb,0x00,0,4,1,ymm0,ymm0) // vpavgb (%0,%4,1),%%ymm0,%%ymm0
- VMEMOPREG(vpavgb,0x20,0,4,1,ymm1,ymm1)
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "vpand %%ymm5,%%ymm0,%%ymm0 \n"
- "vpand %%ymm5,%%ymm1,%%ymm1 \n"
- "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
- "vpermq $0xd8,%%ymm0,%%ymm0 \n"
- "vpand %%ymm5,%%ymm0,%%ymm1 \n"
- "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
- "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n"
- "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n"
- "vpermq $0xd8,%%ymm1,%%ymm1 \n"
- "vpermq $0xd8,%%ymm0,%%ymm0 \n"
- "vextractf128 $0x0,%%ymm1," MEMACCESS(1) " \n"
- VEXTOPMEM(vextractf128,0,ymm0,0x00,1,2,1) // vextractf128 $0x0,%%ymm0,(%1,%2,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x20,%3 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src_uyvy), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(pix) // %3
- : "r"((intptr_t)(stride_uyvy)) // %4
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm5"
- );
-}
-
-void UYVYToUV422Row_AVX2(const uint8* src_uyvy,
- uint8* dst_u, uint8* dst_v, int pix) {
- asm volatile (
- "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
- "vpsrlw $0x8,%%ymm5,%%ymm5 \n"
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
- "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "vpand %%ymm5,%%ymm0,%%ymm0 \n"
- "vpand %%ymm5,%%ymm1,%%ymm1 \n"
- "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
- "vpermq $0xd8,%%ymm0,%%ymm0 \n"
- "vpand %%ymm5,%%ymm0,%%ymm1 \n"
- "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
- "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n"
- "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n"
- "vpermq $0xd8,%%ymm1,%%ymm1 \n"
- "vpermq $0xd8,%%ymm0,%%ymm0 \n"
- "vextractf128 $0x0,%%ymm1," MEMACCESS(1) " \n"
- VEXTOPMEM(vextractf128,0,ymm0,0x00,1,2,1) // vextractf128 $0x0,%%ymm0,(%1,%2,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x20,%3 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src_uyvy), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(pix) // %3
- :
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm5"
- );
-}
-#endif // HAS_YUY2TOYROW_AVX2
-
-#ifdef HAS_ARGBBLENDROW_SSE2
-// Blend 8 pixels at a time.
-void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
- asm volatile (
- "pcmpeqb %%xmm7,%%xmm7 \n"
- "psrlw $0xf,%%xmm7 \n"
- "pcmpeqb %%xmm6,%%xmm6 \n"
- "psrlw $0x8,%%xmm6 \n"
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psllw $0x8,%%xmm5 \n"
- "pcmpeqb %%xmm4,%%xmm4 \n"
- "pslld $0x18,%%xmm4 \n"
- "sub $0x4,%3 \n"
- "jl 49f \n"
-
- // 4 pixel loop.
- LABELALIGN
- "41: \n"
- "movdqu " MEMACCESS(0) ",%%xmm3 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movdqa %%xmm3,%%xmm0 \n"
- "pxor %%xmm4,%%xmm3 \n"
- "movdqu " MEMACCESS(1) ",%%xmm2 \n"
- "psrlw $0x8,%%xmm3 \n"
- "pshufhw $0xf5,%%xmm3,%%xmm3 \n"
- "pshuflw $0xf5,%%xmm3,%%xmm3 \n"
- "pand %%xmm6,%%xmm2 \n"
- "paddw %%xmm7,%%xmm3 \n"
- "pmullw %%xmm3,%%xmm2 \n"
- "movdqu " MEMACCESS(1) ",%%xmm1 \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "psrlw $0x8,%%xmm1 \n"
- "por %%xmm4,%%xmm0 \n"
- "pmullw %%xmm3,%%xmm1 \n"
- "psrlw $0x8,%%xmm2 \n"
- "paddusb %%xmm2,%%xmm0 \n"
- "pand %%xmm5,%%xmm1 \n"
- "paddusb %%xmm1,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x10,2) ",%2 \n"
- "sub $0x4,%3 \n"
- "jge 41b \n"
-
- "49: \n"
- "add $0x3,%3 \n"
- "jl 99f \n"
-
- // 1 pixel loop.
- "91: \n"
- "movd " MEMACCESS(0) ",%%xmm3 \n"
- "lea " MEMLEA(0x4,0) ",%0 \n"
- "movdqa %%xmm3,%%xmm0 \n"
- "pxor %%xmm4,%%xmm3 \n"
- "movd " MEMACCESS(1) ",%%xmm2 \n"
- "psrlw $0x8,%%xmm3 \n"
- "pshufhw $0xf5,%%xmm3,%%xmm3 \n"
- "pshuflw $0xf5,%%xmm3,%%xmm3 \n"
- "pand %%xmm6,%%xmm2 \n"
- "paddw %%xmm7,%%xmm3 \n"
- "pmullw %%xmm3,%%xmm2 \n"
- "movd " MEMACCESS(1) ",%%xmm1 \n"
- "lea " MEMLEA(0x4,1) ",%1 \n"
- "psrlw $0x8,%%xmm1 \n"
- "por %%xmm4,%%xmm0 \n"
- "pmullw %%xmm3,%%xmm1 \n"
- "psrlw $0x8,%%xmm2 \n"
- "paddusb %%xmm2,%%xmm0 \n"
- "pand %%xmm5,%%xmm1 \n"
- "paddusb %%xmm1,%%xmm0 \n"
- "movd %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x4,2) ",%2 \n"
- "sub $0x1,%3 \n"
- "jge 91b \n"
- "99: \n"
- : "+r"(src_argb0), // %0
- "+r"(src_argb1), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- :
- : "memory", "cc"
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
- );
-}
-#endif // HAS_ARGBBLENDROW_SSE2
-
-#ifdef HAS_ARGBBLENDROW_SSSE3
-// Shuffle table for isolating alpha.
-static uvec8 kShuffleAlpha = {
- 3u, 0x80, 3u, 0x80, 7u, 0x80, 7u, 0x80,
- 11u, 0x80, 11u, 0x80, 15u, 0x80, 15u, 0x80
-};
-
-// Blend 8 pixels at a time
-// Shuffle table for reversing the bytes.
-
-// Same as SSE2, but replaces
-// psrlw xmm3, 8 // alpha
-// pshufhw xmm3, xmm3,0F5h // 8 alpha words
-// pshuflw xmm3, xmm3,0F5h
-// with..
-// pshufb xmm3, kShuffleAlpha // alpha
-
-void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
- asm volatile (
- "pcmpeqb %%xmm7,%%xmm7 \n"
- "psrlw $0xf,%%xmm7 \n"
- "pcmpeqb %%xmm6,%%xmm6 \n"
- "psrlw $0x8,%%xmm6 \n"
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psllw $0x8,%%xmm5 \n"
- "pcmpeqb %%xmm4,%%xmm4 \n"
- "pslld $0x18,%%xmm4 \n"
- "sub $0x4,%3 \n"
- "jl 49f \n"
-
- // 4 pixel loop.
- LABELALIGN
- "40: \n"
- "movdqu " MEMACCESS(0) ",%%xmm3 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movdqa %%xmm3,%%xmm0 \n"
- "pxor %%xmm4,%%xmm3 \n"
- "movdqu " MEMACCESS(1) ",%%xmm2 \n"
- "pshufb %4,%%xmm3 \n"
- "pand %%xmm6,%%xmm2 \n"
- "paddw %%xmm7,%%xmm3 \n"
- "pmullw %%xmm3,%%xmm2 \n"
- "movdqu " MEMACCESS(1) ",%%xmm1 \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "psrlw $0x8,%%xmm1 \n"
- "por %%xmm4,%%xmm0 \n"
- "pmullw %%xmm3,%%xmm1 \n"
- "psrlw $0x8,%%xmm2 \n"
- "paddusb %%xmm2,%%xmm0 \n"
- "pand %%xmm5,%%xmm1 \n"
- "paddusb %%xmm1,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x10,2) ",%2 \n"
- "sub $0x4,%3 \n"
- "jge 40b \n"
-
- "49: \n"
- "add $0x3,%3 \n"
- "jl 99f \n"
-
- // 1 pixel loop.
- "91: \n"
- "movd " MEMACCESS(0) ",%%xmm3 \n"
- "lea " MEMLEA(0x4,0) ",%0 \n"
- "movdqa %%xmm3,%%xmm0 \n"
- "pxor %%xmm4,%%xmm3 \n"
- "movd " MEMACCESS(1) ",%%xmm2 \n"
- "pshufb %4,%%xmm3 \n"
- "pand %%xmm6,%%xmm2 \n"
- "paddw %%xmm7,%%xmm3 \n"
- "pmullw %%xmm3,%%xmm2 \n"
- "movd " MEMACCESS(1) ",%%xmm1 \n"
- "lea " MEMLEA(0x4,1) ",%1 \n"
- "psrlw $0x8,%%xmm1 \n"
- "por %%xmm4,%%xmm0 \n"
- "pmullw %%xmm3,%%xmm1 \n"
- "psrlw $0x8,%%xmm2 \n"
- "paddusb %%xmm2,%%xmm0 \n"
- "pand %%xmm5,%%xmm1 \n"
- "paddusb %%xmm1,%%xmm0 \n"
- "movd %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x4,2) ",%2 \n"
- "sub $0x1,%3 \n"
- "jge 91b \n"
- "99: \n"
- : "+r"(src_argb0), // %0
- "+r"(src_argb1), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- : "m"(kShuffleAlpha) // %4
- : "memory", "cc"
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
- );
-}
-#endif // HAS_ARGBBLENDROW_SSSE3
-
-#ifdef HAS_ARGBATTENUATEROW_SSE2
-// Attenuate 4 pixels at a time.
-void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) {
- asm volatile (
- "pcmpeqb %%xmm4,%%xmm4 \n"
- "pslld $0x18,%%xmm4 \n"
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psrld $0x8,%%xmm5 \n"
-
- // 4 pixel loop.
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "punpcklbw %%xmm0,%%xmm0 \n"
- "pshufhw $0xff,%%xmm0,%%xmm2 \n"
- "pshuflw $0xff,%%xmm2,%%xmm2 \n"
- "pmulhuw %%xmm2,%%xmm0 \n"
- "movdqu " MEMACCESS(0) ",%%xmm1 \n"
- "punpckhbw %%xmm1,%%xmm1 \n"
- "pshufhw $0xff,%%xmm1,%%xmm2 \n"
- "pshuflw $0xff,%%xmm2,%%xmm2 \n"
- "pmulhuw %%xmm2,%%xmm1 \n"
- "movdqu " MEMACCESS(0) ",%%xmm2 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "psrlw $0x8,%%xmm0 \n"
- "pand %%xmm4,%%xmm2 \n"
- "psrlw $0x8,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "pand %%xmm5,%%xmm0 \n"
- "por %%xmm2,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x4,%2 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- :
- : "memory", "cc"
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
- );
-}
-#endif // HAS_ARGBATTENUATEROW_SSE2
-
-#ifdef HAS_ARGBATTENUATEROW_SSSE3
-// Shuffle table duplicating alpha
-static uvec8 kShuffleAlpha0 = {
- 3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u, 7u, 7u, 7u, 7u, 7u, 7u, 128u, 128u
-};
-static uvec8 kShuffleAlpha1 = {
- 11u, 11u, 11u, 11u, 11u, 11u, 128u, 128u,
- 15u, 15u, 15u, 15u, 15u, 15u, 128u, 128u
-};
-// Attenuate 4 pixels at a time.
-void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
- asm volatile (
- "pcmpeqb %%xmm3,%%xmm3 \n"
- "pslld $0x18,%%xmm3 \n"
- "movdqa %3,%%xmm4 \n"
- "movdqa %4,%%xmm5 \n"
-
- // 4 pixel loop.
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "pshufb %%xmm4,%%xmm0 \n"
- "movdqu " MEMACCESS(0) ",%%xmm1 \n"
- "punpcklbw %%xmm1,%%xmm1 \n"
- "pmulhuw %%xmm1,%%xmm0 \n"
- "movdqu " MEMACCESS(0) ",%%xmm1 \n"
- "pshufb %%xmm5,%%xmm1 \n"
- "movdqu " MEMACCESS(0) ",%%xmm2 \n"
- "punpckhbw %%xmm2,%%xmm2 \n"
- "pmulhuw %%xmm2,%%xmm1 \n"
- "movdqu " MEMACCESS(0) ",%%xmm2 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "pand %%xmm3,%%xmm2 \n"
- "psrlw $0x8,%%xmm0 \n"
- "psrlw $0x8,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "por %%xmm2,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x4,%2 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- : "m"(kShuffleAlpha0), // %3
- "m"(kShuffleAlpha1) // %4
- : "memory", "cc"
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
- );
-}
-#endif // HAS_ARGBATTENUATEROW_SSSE3
-
-#ifdef HAS_ARGBATTENUATEROW_AVX2
-// Shuffle table duplicating alpha.
-static const uvec8 kShuffleAlpha_AVX2 = {
- 6u, 7u, 6u, 7u, 6u, 7u, 128u, 128u, 14u, 15u, 14u, 15u, 14u, 15u, 128u, 128u
-};
-// Attenuate 8 pixels at a time.
-void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width) {
- asm volatile (
- "vbroadcastf128 %3,%%ymm4 \n"
- "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
- "vpslld $0x18,%%ymm5,%%ymm5 \n"
- "sub %0,%1 \n"
-
- // 8 pixel loop.
- LABELALIGN
- "1: \n"
- "vmovdqu " MEMACCESS(0) ",%%ymm6 \n"
- "vpunpcklbw %%ymm6,%%ymm6,%%ymm0 \n"
- "vpunpckhbw %%ymm6,%%ymm6,%%ymm1 \n"
- "vpshufb %%ymm4,%%ymm0,%%ymm2 \n"
- "vpshufb %%ymm4,%%ymm1,%%ymm3 \n"
- "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n"
- "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n"
- "vpand %%ymm5,%%ymm6,%%ymm6 \n"
- "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
- "vpsrlw $0x8,%%ymm1,%%ymm1 \n"
- "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
- "vpor %%ymm6,%%ymm0,%%ymm0 \n"
- MEMOPMEM(vmovdqu,ymm0,0x00,0,1,1) // vmovdqu %%ymm0,(%0,%1)
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "sub $0x8,%2 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- : "m"(kShuffleAlpha_AVX2) // %3
- : "memory", "cc"
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
- );
-}
-#endif // HAS_ARGBATTENUATEROW_AVX2
-
-#ifdef HAS_ARGBUNATTENUATEROW_SSE2
-// Unattenuate 4 pixels at a time.
-void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb,
- int width) {
- uintptr_t alpha = 0;
- asm volatile (
- // 4 pixel loop.
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movzb " MEMACCESS2(0x03,0) ",%3 \n"
- "punpcklbw %%xmm0,%%xmm0 \n"
- MEMOPREG(movd,0x00,4,3,4,xmm2) // movd 0x0(%4,%3,4),%%xmm2
- "movzb " MEMACCESS2(0x07,0) ",%3 \n"
- MEMOPREG(movd,0x00,4,3,4,xmm3) // movd 0x0(%4,%3,4),%%xmm3
- "pshuflw $0x40,%%xmm2,%%xmm2 \n"
- "pshuflw $0x40,%%xmm3,%%xmm3 \n"
- "movlhps %%xmm3,%%xmm2 \n"
- "pmulhuw %%xmm2,%%xmm0 \n"
- "movdqu " MEMACCESS(0) ",%%xmm1 \n"
- "movzb " MEMACCESS2(0x0b,0) ",%3 \n"
- "punpckhbw %%xmm1,%%xmm1 \n"
- MEMOPREG(movd,0x00,4,3,4,xmm2) // movd 0x0(%4,%3,4),%%xmm2
- "movzb " MEMACCESS2(0x0f,0) ",%3 \n"
- MEMOPREG(movd,0x00,4,3,4,xmm3) // movd 0x0(%4,%3,4),%%xmm3
- "pshuflw $0x40,%%xmm2,%%xmm2 \n"
- "pshuflw $0x40,%%xmm3,%%xmm3 \n"
- "movlhps %%xmm3,%%xmm2 \n"
- "pmulhuw %%xmm2,%%xmm1 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x4,%2 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(width), // %2
- "+r"(alpha) // %3
- : "r"(fixed_invtbl8) // %4
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
- );
-}
-#endif // HAS_ARGBUNATTENUATEROW_SSE2
-
-#ifdef HAS_ARGBUNATTENUATEROW_AVX2
-// Shuffle table duplicating alpha.
-static const uvec8 kUnattenShuffleAlpha_AVX2 = {
- 0u, 1u, 0u, 1u, 0u, 1u, 6u, 7u, 8u, 9u, 8u, 9u, 8u, 9u, 14u, 15u
-};
-// Unattenuate 8 pixels at a time.
-void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb,
- int width) {
- uintptr_t alpha = 0;
- asm volatile (
- "sub %0,%1 \n"
- "vbroadcastf128 %5,%%ymm5 \n"
-
- // 8 pixel loop.
- LABELALIGN
- "1: \n"
- // replace VPGATHER
- "movzb " MEMACCESS2(0x03,0) ",%3 \n"
- MEMOPREG(vmovd,0x00,4,3,4,xmm0) // vmovd 0x0(%4,%3,4),%%xmm0
- "movzb " MEMACCESS2(0x07,0) ",%3 \n"
- MEMOPREG(vmovd,0x00,4,3,4,xmm1) // vmovd 0x0(%4,%3,4),%%xmm1
- "movzb " MEMACCESS2(0x0b,0) ",%3 \n"
- "vpunpckldq %%xmm1,%%xmm0,%%xmm6 \n"
- MEMOPREG(vmovd,0x00,4,3,4,xmm2) // vmovd 0x0(%4,%3,4),%%xmm2
- "movzb " MEMACCESS2(0x0f,0) ",%3 \n"
- MEMOPREG(vmovd,0x00,4,3,4,xmm3) // vmovd 0x0(%4,%3,4),%%xmm3
- "movzb " MEMACCESS2(0x13,0) ",%3 \n"
- "vpunpckldq %%xmm3,%%xmm2,%%xmm7 \n"
- MEMOPREG(vmovd,0x00,4,3,4,xmm0) // vmovd 0x0(%4,%3,4),%%xmm0
- "movzb " MEMACCESS2(0x17,0) ",%3 \n"
- MEMOPREG(vmovd,0x00,4,3,4,xmm1) // vmovd 0x0(%4,%3,4),%%xmm1
- "movzb " MEMACCESS2(0x1b,0) ",%3 \n"
- "vpunpckldq %%xmm1,%%xmm0,%%xmm0 \n"
- MEMOPREG(vmovd,0x00,4,3,4,xmm2) // vmovd 0x0(%4,%3,4),%%xmm2
- "movzb " MEMACCESS2(0x1f,0) ",%3 \n"
- MEMOPREG(vmovd,0x00,4,3,4,xmm3) // vmovd 0x0(%4,%3,4),%%xmm3
- "vpunpckldq %%xmm3,%%xmm2,%%xmm2 \n"
- "vpunpcklqdq %%xmm7,%%xmm6,%%xmm3 \n"
- "vpunpcklqdq %%xmm2,%%xmm0,%%xmm0 \n"
- "vinserti128 $0x1,%%xmm0,%%ymm3,%%ymm3 \n"
- // end of VPGATHER
-
- "vmovdqu " MEMACCESS(0) ",%%ymm6 \n"
- "vpunpcklbw %%ymm6,%%ymm6,%%ymm0 \n"
- "vpunpckhbw %%ymm6,%%ymm6,%%ymm1 \n"
- "vpunpcklwd %%ymm3,%%ymm3,%%ymm2 \n"
- "vpunpckhwd %%ymm3,%%ymm3,%%ymm3 \n"
- "vpshufb %%ymm5,%%ymm2,%%ymm2 \n"
- "vpshufb %%ymm5,%%ymm3,%%ymm3 \n"
- "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n"
- "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n"
- "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
- MEMOPMEM(vmovdqu,ymm0,0x00,0,1,1) // vmovdqu %%ymm0,(%0,%1)
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "sub $0x8,%2 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(width), // %2
- "+r"(alpha) // %3
- : "r"(fixed_invtbl8), // %4
- "m"(kUnattenShuffleAlpha_AVX2) // %5
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
- );
-}
-#endif // HAS_ARGBUNATTENUATEROW_AVX2
-
-#ifdef HAS_ARGBGRAYROW_SSSE3
-// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels
-void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
- asm volatile (
- "movdqa %3,%%xmm4 \n"
- "movdqa %4,%%xmm5 \n"
-
- // 8 pixel loop.
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "pmaddubsw %%xmm4,%%xmm0 \n"
- "pmaddubsw %%xmm4,%%xmm1 \n"
- "phaddw %%xmm1,%%xmm0 \n"
- "paddw %%xmm5,%%xmm0 \n"
- "psrlw $0x7,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "movdqu " MEMACCESS(0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm3 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "psrld $0x18,%%xmm2 \n"
- "psrld $0x18,%%xmm3 \n"
- "packuswb %%xmm3,%%xmm2 \n"
- "packuswb %%xmm2,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm3 \n"
- "punpcklbw %%xmm0,%%xmm0 \n"
- "punpcklbw %%xmm2,%%xmm3 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklwd %%xmm3,%%xmm0 \n"
- "punpckhwd %%xmm3,%%xmm1 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x8,%2 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- : "m"(kARGBToYJ), // %3
- "m"(kAddYJ64) // %4
- : "memory", "cc"
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
- );
-}
-#endif // HAS_ARGBGRAYROW_SSSE3
-
-#ifdef HAS_ARGBSEPIAROW_SSSE3
-// b = (r * 35 + g * 68 + b * 17) >> 7
-// g = (r * 45 + g * 88 + b * 22) >> 7
-// r = (r * 50 + g * 98 + b * 24) >> 7
-// Constant for ARGB color to sepia tone
-static vec8 kARGBToSepiaB = {
- 17, 68, 35, 0, 17, 68, 35, 0, 17, 68, 35, 0, 17, 68, 35, 0
-};
-
-static vec8 kARGBToSepiaG = {
- 22, 88, 45, 0, 22, 88, 45, 0, 22, 88, 45, 0, 22, 88, 45, 0
-};
-
-static vec8 kARGBToSepiaR = {
- 24, 98, 50, 0, 24, 98, 50, 0, 24, 98, 50, 0, 24, 98, 50, 0
-};
-
-// Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels.
-void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) {
- asm volatile (
- "movdqa %2,%%xmm2 \n"
- "movdqa %3,%%xmm3 \n"
- "movdqa %4,%%xmm4 \n"
-
- // 8 pixel loop.
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm6 \n"
- "pmaddubsw %%xmm2,%%xmm0 \n"
- "pmaddubsw %%xmm2,%%xmm6 \n"
- "phaddw %%xmm6,%%xmm0 \n"
- "psrlw $0x7,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "movdqu " MEMACCESS(0) ",%%xmm5 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "pmaddubsw %%xmm3,%%xmm5 \n"
- "pmaddubsw %%xmm3,%%xmm1 \n"
- "phaddw %%xmm1,%%xmm5 \n"
- "psrlw $0x7,%%xmm5 \n"
- "packuswb %%xmm5,%%xmm5 \n"
- "punpcklbw %%xmm5,%%xmm0 \n"
- "movdqu " MEMACCESS(0) ",%%xmm5 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "pmaddubsw %%xmm4,%%xmm5 \n"
- "pmaddubsw %%xmm4,%%xmm1 \n"
- "phaddw %%xmm1,%%xmm5 \n"
- "psrlw $0x7,%%xmm5 \n"
- "packuswb %%xmm5,%%xmm5 \n"
- "movdqu " MEMACCESS(0) ",%%xmm6 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "psrld $0x18,%%xmm6 \n"
- "psrld $0x18,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm6 \n"
- "packuswb %%xmm6,%%xmm6 \n"
- "punpcklbw %%xmm6,%%xmm5 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklwd %%xmm5,%%xmm0 \n"
- "punpckhwd %%xmm5,%%xmm1 \n"
- "movdqu %%xmm0," MEMACCESS(0) " \n"
- "movdqu %%xmm1," MEMACCESS2(0x10,0) " \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "sub $0x8,%1 \n"
- "jg 1b \n"
- : "+r"(dst_argb), // %0
- "+r"(width) // %1
- : "m"(kARGBToSepiaB), // %2
- "m"(kARGBToSepiaG), // %3
- "m"(kARGBToSepiaR) // %4
- : "memory", "cc"
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
- );
-}
-#endif // HAS_ARGBSEPIAROW_SSSE3
-
-#ifdef HAS_ARGBCOLORMATRIXROW_SSSE3
-// Tranform 8 ARGB pixels (32 bytes) with color matrix.
-// Same as Sepia except matrix is provided.
-void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
- const int8* matrix_argb, int width) {
- asm volatile (
- "movdqu " MEMACCESS(3) ",%%xmm5 \n"
- "pshufd $0x00,%%xmm5,%%xmm2 \n"
- "pshufd $0x55,%%xmm5,%%xmm3 \n"
- "pshufd $0xaa,%%xmm5,%%xmm4 \n"
- "pshufd $0xff,%%xmm5,%%xmm5 \n"
-
- // 8 pixel loop.
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm7 \n"
- "pmaddubsw %%xmm2,%%xmm0 \n"
- "pmaddubsw %%xmm2,%%xmm7 \n"
- "movdqu " MEMACCESS(0) ",%%xmm6 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "pmaddubsw %%xmm3,%%xmm6 \n"
- "pmaddubsw %%xmm3,%%xmm1 \n"
- "phaddsw %%xmm7,%%xmm0 \n"
- "phaddsw %%xmm1,%%xmm6 \n"
- "psraw $0x6,%%xmm0 \n"
- "psraw $0x6,%%xmm6 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "packuswb %%xmm6,%%xmm6 \n"
- "punpcklbw %%xmm6,%%xmm0 \n"
- "movdqu " MEMACCESS(0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm7 \n"
- "pmaddubsw %%xmm4,%%xmm1 \n"
- "pmaddubsw %%xmm4,%%xmm7 \n"
- "phaddsw %%xmm7,%%xmm1 \n"
- "movdqu " MEMACCESS(0) ",%%xmm6 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm7 \n"
- "pmaddubsw %%xmm5,%%xmm6 \n"
- "pmaddubsw %%xmm5,%%xmm7 \n"
- "phaddsw %%xmm7,%%xmm6 \n"
- "psraw $0x6,%%xmm1 \n"
- "psraw $0x6,%%xmm6 \n"
- "packuswb %%xmm1,%%xmm1 \n"
- "packuswb %%xmm6,%%xmm6 \n"
- "punpcklbw %%xmm6,%%xmm1 \n"
- "movdqa %%xmm0,%%xmm6 \n"
- "punpcklwd %%xmm1,%%xmm0 \n"
- "punpckhwd %%xmm1,%%xmm6 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "movdqu %%xmm6," MEMACCESS2(0x10,1) " \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x8,%2 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- : "r"(matrix_argb) // %3
- : "memory", "cc"
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
- );
-}
-#endif // HAS_ARGBCOLORMATRIXROW_SSSE3
-
-#ifdef HAS_ARGBQUANTIZEROW_SSE2
-// Quantize 4 ARGB pixels (16 bytes).
-void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size,
- int interval_offset, int width) {
- asm volatile (
- "movd %2,%%xmm2 \n"
- "movd %3,%%xmm3 \n"
- "movd %4,%%xmm4 \n"
- "pshuflw $0x40,%%xmm2,%%xmm2 \n"
- "pshufd $0x44,%%xmm2,%%xmm2 \n"
- "pshuflw $0x40,%%xmm3,%%xmm3 \n"
- "pshufd $0x44,%%xmm3,%%xmm3 \n"
- "pshuflw $0x40,%%xmm4,%%xmm4 \n"
- "pshufd $0x44,%%xmm4,%%xmm4 \n"
- "pxor %%xmm5,%%xmm5 \n"
- "pcmpeqb %%xmm6,%%xmm6 \n"
- "pslld $0x18,%%xmm6 \n"
-
- // 4 pixel loop.
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "punpcklbw %%xmm5,%%xmm0 \n"
- "pmulhuw %%xmm2,%%xmm0 \n"
- "movdqu " MEMACCESS(0) ",%%xmm1 \n"
- "punpckhbw %%xmm5,%%xmm1 \n"
- "pmulhuw %%xmm2,%%xmm1 \n"
- "pmullw %%xmm3,%%xmm0 \n"
- "movdqu " MEMACCESS(0) ",%%xmm7 \n"
- "pmullw %%xmm3,%%xmm1 \n"
- "pand %%xmm6,%%xmm7 \n"
- "paddw %%xmm4,%%xmm0 \n"
- "paddw %%xmm4,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "por %%xmm7,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(0) " \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "sub $0x4,%1 \n"
- "jg 1b \n"
- : "+r"(dst_argb), // %0
- "+r"(width) // %1
- : "r"(scale), // %2
- "r"(interval_size), // %3
- "r"(interval_offset) // %4
- : "memory", "cc"
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
- );
-}
-#endif // HAS_ARGBQUANTIZEROW_SSE2
-
-#ifdef HAS_ARGBSHADEROW_SSE2
-// Shade 4 pixels at a time by specified value.
-void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
- uint32 value) {
- asm volatile (
- "movd %3,%%xmm2 \n"
- "punpcklbw %%xmm2,%%xmm2 \n"
- "punpcklqdq %%xmm2,%%xmm2 \n"
-
- // 4 pixel loop.
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklbw %%xmm0,%%xmm0 \n"
- "punpckhbw %%xmm1,%%xmm1 \n"
- "pmulhuw %%xmm2,%%xmm0 \n"
- "pmulhuw %%xmm2,%%xmm1 \n"
- "psrlw $0x8,%%xmm0 \n"
- "psrlw $0x8,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x4,%2 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- : "r"(value) // %3
- : "memory", "cc"
- , "xmm0", "xmm1", "xmm2"
- );
-}
-#endif // HAS_ARGBSHADEROW_SSE2
-
-#ifdef HAS_ARGBMULTIPLYROW_SSE2
-// Multiply 2 rows of ARGB pixels together, 4 pixels at a time.
-void ARGBMultiplyRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
- asm volatile (
- "pxor %%xmm5,%%xmm5 \n"
-
- // 4 pixel loop.
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movdqu " MEMACCESS(1) ",%%xmm2 \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "movdqu %%xmm0,%%xmm1 \n"
- "movdqu %%xmm2,%%xmm3 \n"
- "punpcklbw %%xmm0,%%xmm0 \n"
- "punpckhbw %%xmm1,%%xmm1 \n"
- "punpcklbw %%xmm5,%%xmm2 \n"
- "punpckhbw %%xmm5,%%xmm3 \n"
- "pmulhuw %%xmm2,%%xmm0 \n"
- "pmulhuw %%xmm3,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x10,2) ",%2 \n"
- "sub $0x4,%3 \n"
- "jg 1b \n"
- : "+r"(src_argb0), // %0
- "+r"(src_argb1), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- :
- : "memory", "cc"
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
- );
-}
-#endif // HAS_ARGBMULTIPLYROW_SSE2
-
-#ifdef HAS_ARGBMULTIPLYROW_AVX2
-// Multiply 2 rows of ARGB pixels together, 8 pixels at a time.
-void ARGBMultiplyRow_AVX2(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
- asm volatile (
- "vpxor %%ymm5,%%ymm5,%%ymm5 \n"
-
- // 4 pixel loop.
- LABELALIGN
- "1: \n"
- "vmovdqu " MEMACCESS(0) ",%%ymm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "vmovdqu " MEMACCESS(1) ",%%ymm3 \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "vpunpcklbw %%ymm1,%%ymm1,%%ymm0 \n"
- "vpunpckhbw %%ymm1,%%ymm1,%%ymm1 \n"
- "vpunpcklbw %%ymm5,%%ymm3,%%ymm2 \n"
- "vpunpckhbw %%ymm5,%%ymm3,%%ymm3 \n"
- "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n"
- "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n"
- "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
- "vmovdqu %%ymm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x20,2) ",%2 \n"
- "sub $0x8,%3 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src_argb0), // %0
- "+r"(src_argb1), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- :
- : "memory", "cc"
-#if defined(__AVX2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
-#endif
- );
-}
-#endif // HAS_ARGBMULTIPLYROW_AVX2
-
-#ifdef HAS_ARGBADDROW_SSE2
-// Add 2 rows of ARGB pixels together, 4 pixels at a time.
-void ARGBAddRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
- asm volatile (
- // 4 pixel loop.
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movdqu " MEMACCESS(1) ",%%xmm1 \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "paddusb %%xmm1,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x10,2) ",%2 \n"
- "sub $0x4,%3 \n"
- "jg 1b \n"
- : "+r"(src_argb0), // %0
- "+r"(src_argb1), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- :
- : "memory", "cc"
- , "xmm0", "xmm1"
- );
-}
-#endif // HAS_ARGBADDROW_SSE2
-
-#ifdef HAS_ARGBADDROW_AVX2
-// Add 2 rows of ARGB pixels together, 4 pixels at a time.
-void ARGBAddRow_AVX2(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
- asm volatile (
- // 4 pixel loop.
- LABELALIGN
- "1: \n"
- "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "vpaddusb " MEMACCESS(1) ",%%ymm0,%%ymm0 \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "vmovdqu %%ymm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x20,2) ",%2 \n"
- "sub $0x8,%3 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src_argb0), // %0
- "+r"(src_argb1), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- :
- : "memory", "cc"
- , "xmm0"
- );
-}
-#endif // HAS_ARGBADDROW_AVX2
-
-#ifdef HAS_ARGBSUBTRACTROW_SSE2
-// Subtract 2 rows of ARGB pixels, 4 pixels at a time.
-void ARGBSubtractRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
- asm volatile (
- // 4 pixel loop.
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movdqu " MEMACCESS(1) ",%%xmm1 \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "psubusb %%xmm1,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x10,2) ",%2 \n"
- "sub $0x4,%3 \n"
- "jg 1b \n"
- : "+r"(src_argb0), // %0
- "+r"(src_argb1), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- :
- : "memory", "cc"
- , "xmm0", "xmm1"
- );
-}
-#endif // HAS_ARGBSUBTRACTROW_SSE2
-
-#ifdef HAS_ARGBSUBTRACTROW_AVX2
-// Subtract 2 rows of ARGB pixels, 8 pixels at a time.
-void ARGBSubtractRow_AVX2(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
- asm volatile (
- // 4 pixel loop.
- LABELALIGN
- "1: \n"
- "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "vpsubusb " MEMACCESS(1) ",%%ymm0,%%ymm0 \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "vmovdqu %%ymm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x20,2) ",%2 \n"
- "sub $0x8,%3 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src_argb0), // %0
- "+r"(src_argb1), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- :
- : "memory", "cc"
- , "xmm0"
- );
-}
-#endif // HAS_ARGBSUBTRACTROW_AVX2
-
-#ifdef HAS_SOBELXROW_SSE2
-// SobelX as a matrix is
-// -1 0 1
-// -2 0 2
-// -1 0 1
-void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1,
- const uint8* src_y2, uint8* dst_sobelx, int width) {
- asm volatile (
- "sub %0,%1 \n"
- "sub %0,%2 \n"
- "sub %0,%3 \n"
- "pxor %%xmm5,%%xmm5 \n"
-
- // 8 pixel loop.
- LABELALIGN
- "1: \n"
- "movq " MEMACCESS(0) ",%%xmm0 \n"
- "movq " MEMACCESS2(0x2,0) ",%%xmm1 \n"
- "punpcklbw %%xmm5,%%xmm0 \n"
- "punpcklbw %%xmm5,%%xmm1 \n"
- "psubw %%xmm1,%%xmm0 \n"
- MEMOPREG(movq,0x00,0,1,1,xmm1) // movq (%0,%1,1),%%xmm1
- MEMOPREG(movq,0x02,0,1,1,xmm2) // movq 0x2(%0,%1,1),%%xmm2
- "punpcklbw %%xmm5,%%xmm1 \n"
- "punpcklbw %%xmm5,%%xmm2 \n"
- "psubw %%xmm2,%%xmm1 \n"
- MEMOPREG(movq,0x00,0,2,1,xmm2) // movq (%0,%2,1),%%xmm2
- MEMOPREG(movq,0x02,0,2,1,xmm3) // movq 0x2(%0,%2,1),%%xmm3
- "punpcklbw %%xmm5,%%xmm2 \n"
- "punpcklbw %%xmm5,%%xmm3 \n"
- "psubw %%xmm3,%%xmm2 \n"
- "paddw %%xmm2,%%xmm0 \n"
- "paddw %%xmm1,%%xmm0 \n"
- "paddw %%xmm1,%%xmm0 \n"
- "pxor %%xmm1,%%xmm1 \n"
- "psubw %%xmm0,%%xmm1 \n"
- "pmaxsw %%xmm1,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- MEMOPMEM(movq,xmm0,0x00,0,3,1) // movq %%xmm0,(%0,%3,1)
- "lea " MEMLEA(0x8,0) ",%0 \n"
- "sub $0x8,%4 \n"
- "jg 1b \n"
- : "+r"(src_y0), // %0
- "+r"(src_y1), // %1
- "+r"(src_y2), // %2
- "+r"(dst_sobelx), // %3
- "+r"(width) // %4
- :
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
- );
-}
-#endif // HAS_SOBELXROW_SSE2
-
-#ifdef HAS_SOBELYROW_SSE2
-// SobelY as a matrix is
-// -1 -2 -1
-// 0 0 0
-// 1 2 1
-void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1,
- uint8* dst_sobely, int width) {
- asm volatile (
- "sub %0,%1 \n"
- "sub %0,%2 \n"
- "pxor %%xmm5,%%xmm5 \n"
-
- // 8 pixel loop.
- LABELALIGN
- "1: \n"
- "movq " MEMACCESS(0) ",%%xmm0 \n"
- MEMOPREG(movq,0x00,0,1,1,xmm1) // movq (%0,%1,1),%%xmm1
- "punpcklbw %%xmm5,%%xmm0 \n"
- "punpcklbw %%xmm5,%%xmm1 \n"
- "psubw %%xmm1,%%xmm0 \n"
- "movq " MEMACCESS2(0x1,0) ",%%xmm1 \n"
- MEMOPREG(movq,0x01,0,1,1,xmm2) // movq 0x1(%0,%1,1),%%xmm2
- "punpcklbw %%xmm5,%%xmm1 \n"
- "punpcklbw %%xmm5,%%xmm2 \n"
- "psubw %%xmm2,%%xmm1 \n"
- "movq " MEMACCESS2(0x2,0) ",%%xmm2 \n"
- MEMOPREG(movq,0x02,0,1,1,xmm3) // movq 0x2(%0,%1,1),%%xmm3
- "punpcklbw %%xmm5,%%xmm2 \n"
- "punpcklbw %%xmm5,%%xmm3 \n"
- "psubw %%xmm3,%%xmm2 \n"
- "paddw %%xmm2,%%xmm0 \n"
- "paddw %%xmm1,%%xmm0 \n"
- "paddw %%xmm1,%%xmm0 \n"
- "pxor %%xmm1,%%xmm1 \n"
- "psubw %%xmm0,%%xmm1 \n"
- "pmaxsw %%xmm1,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- MEMOPMEM(movq,xmm0,0x00,0,2,1) // movq %%xmm0,(%0,%2,1)
- "lea " MEMLEA(0x8,0) ",%0 \n"
- "sub $0x8,%3 \n"
- "jg 1b \n"
- : "+r"(src_y0), // %0
- "+r"(src_y1), // %1
- "+r"(dst_sobely), // %2
- "+r"(width) // %3
- :
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
- );
-}
-#endif // HAS_SOBELYROW_SSE2
-
-#ifdef HAS_SOBELROW_SSE2
-// Adds Sobel X and Sobel Y and stores Sobel into ARGB.
-// A = 255
-// R = Sobel
-// G = Sobel
-// B = Sobel
-void SobelRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_argb, int width) {
- asm volatile (
- "sub %0,%1 \n"
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "pslld $0x18,%%xmm5 \n"
-
- // 8 pixel loop.
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- MEMOPREG(movdqu,0x00,0,1,1,xmm1) // movdqu (%0,%1,1),%%xmm1
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "paddusb %%xmm1,%%xmm0 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "punpcklbw %%xmm0,%%xmm2 \n"
- "punpckhbw %%xmm0,%%xmm0 \n"
- "movdqa %%xmm2,%%xmm1 \n"
- "punpcklwd %%xmm2,%%xmm1 \n"
- "punpckhwd %%xmm2,%%xmm2 \n"
- "por %%xmm5,%%xmm1 \n"
- "por %%xmm5,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm3 \n"
- "punpcklwd %%xmm0,%%xmm3 \n"
- "punpckhwd %%xmm0,%%xmm0 \n"
- "por %%xmm5,%%xmm3 \n"
- "por %%xmm5,%%xmm0 \n"
- "movdqu %%xmm1," MEMACCESS(2) " \n"
- "movdqu %%xmm2," MEMACCESS2(0x10,2) " \n"
- "movdqu %%xmm3," MEMACCESS2(0x20,2) " \n"
- "movdqu %%xmm0," MEMACCESS2(0x30,2) " \n"
- "lea " MEMLEA(0x40,2) ",%2 \n"
- "sub $0x10,%3 \n"
- "jg 1b \n"
- : "+r"(src_sobelx), // %0
- "+r"(src_sobely), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- :
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
- );
-}
-#endif // HAS_SOBELROW_SSE2
-
-#ifdef HAS_SOBELTOPLANEROW_SSE2
-// Adds Sobel X and Sobel Y and stores Sobel into a plane.
-void SobelToPlaneRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_y, int width) {
- asm volatile (
- "sub %0,%1 \n"
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "pslld $0x18,%%xmm5 \n"
-
- // 8 pixel loop.
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- MEMOPREG(movdqu,0x00,0,1,1,xmm1) // movdqu (%0,%1,1),%%xmm1
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "paddusb %%xmm1,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x10,2) ",%2 \n"
- "sub $0x10,%3 \n"
- "jg 1b \n"
- : "+r"(src_sobelx), // %0
- "+r"(src_sobely), // %1
- "+r"(dst_y), // %2
- "+r"(width) // %3
- :
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1"
- );
-}
-#endif // HAS_SOBELTOPLANEROW_SSE2
-
-#ifdef HAS_SOBELXYROW_SSE2
-// Mixes Sobel X, Sobel Y and Sobel into ARGB.
-// A = 255
-// R = Sobel X
-// G = Sobel
-// B = Sobel Y
-void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_argb, int width) {
- asm volatile (
- "sub %0,%1 \n"
- "pcmpeqb %%xmm5,%%xmm5 \n"
-
- // 8 pixel loop.
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- MEMOPREG(movdqu,0x00,0,1,1,xmm1) // movdqu (%0,%1,1),%%xmm1
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "paddusb %%xmm1,%%xmm2 \n"
- "movdqa %%xmm0,%%xmm3 \n"
- "punpcklbw %%xmm5,%%xmm3 \n"
- "punpckhbw %%xmm5,%%xmm0 \n"
- "movdqa %%xmm1,%%xmm4 \n"
- "punpcklbw %%xmm2,%%xmm4 \n"
- "punpckhbw %%xmm2,%%xmm1 \n"
- "movdqa %%xmm4,%%xmm6 \n"
- "punpcklwd %%xmm3,%%xmm6 \n"
- "punpckhwd %%xmm3,%%xmm4 \n"
- "movdqa %%xmm1,%%xmm7 \n"
- "punpcklwd %%xmm0,%%xmm7 \n"
- "punpckhwd %%xmm0,%%xmm1 \n"
- "movdqu %%xmm6," MEMACCESS(2) " \n"
- "movdqu %%xmm4," MEMACCESS2(0x10,2) " \n"
- "movdqu %%xmm7," MEMACCESS2(0x20,2) " \n"
- "movdqu %%xmm1," MEMACCESS2(0x30,2) " \n"
- "lea " MEMLEA(0x40,2) ",%2 \n"
- "sub $0x10,%3 \n"
- "jg 1b \n"
- : "+r"(src_sobelx), // %0
- "+r"(src_sobely), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- :
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
- );
-}
-#endif // HAS_SOBELXYROW_SSE2
-
-#ifdef HAS_COMPUTECUMULATIVESUMROW_SSE2
-// Creates a table of cumulative sums where each value is a sum of all values
-// above and to the left of the value, inclusive of the value.
-void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum,
- const int32* previous_cumsum, int width) {
- asm volatile (
- "pxor %%xmm0,%%xmm0 \n"
- "pxor %%xmm1,%%xmm1 \n"
- "sub $0x4,%3 \n"
- "jl 49f \n"
- "test $0xf,%1 \n"
- "jne 49f \n"
-
- // 4 pixel loop \n"
- LABELALIGN
- "40: \n"
- "movdqu " MEMACCESS(0) ",%%xmm2 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movdqa %%xmm2,%%xmm4 \n"
- "punpcklbw %%xmm1,%%xmm2 \n"
- "movdqa %%xmm2,%%xmm3 \n"
- "punpcklwd %%xmm1,%%xmm2 \n"
- "punpckhwd %%xmm1,%%xmm3 \n"
- "punpckhbw %%xmm1,%%xmm4 \n"
- "movdqa %%xmm4,%%xmm5 \n"
- "punpcklwd %%xmm1,%%xmm4 \n"
- "punpckhwd %%xmm1,%%xmm5 \n"
- "paddd %%xmm2,%%xmm0 \n"
- "movdqu " MEMACCESS(2) ",%%xmm2 \n"
- "paddd %%xmm0,%%xmm2 \n"
- "paddd %%xmm3,%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,2) ",%%xmm3 \n"
- "paddd %%xmm0,%%xmm3 \n"
- "paddd %%xmm4,%%xmm0 \n"
- "movdqu " MEMACCESS2(0x20,2) ",%%xmm4 \n"
- "paddd %%xmm0,%%xmm4 \n"
- "paddd %%xmm5,%%xmm0 \n"
- "movdqu " MEMACCESS2(0x30,2) ",%%xmm5 \n"
- "lea " MEMLEA(0x40,2) ",%2 \n"
- "paddd %%xmm0,%%xmm5 \n"
- "movdqu %%xmm2," MEMACCESS(1) " \n"
- "movdqu %%xmm3," MEMACCESS2(0x10,1) " \n"
- "movdqu %%xmm4," MEMACCESS2(0x20,1) " \n"
- "movdqu %%xmm5," MEMACCESS2(0x30,1) " \n"
- "lea " MEMLEA(0x40,1) ",%1 \n"
- "sub $0x4,%3 \n"
- "jge 40b \n"
-
- "49: \n"
- "add $0x3,%3 \n"
- "jl 19f \n"
-
- // 1 pixel loop \n"
- LABELALIGN
- "10: \n"
- "movd " MEMACCESS(0) ",%%xmm2 \n"
- "lea " MEMLEA(0x4,0) ",%0 \n"
- "punpcklbw %%xmm1,%%xmm2 \n"
- "punpcklwd %%xmm1,%%xmm2 \n"
- "paddd %%xmm2,%%xmm0 \n"
- "movdqu " MEMACCESS(2) ",%%xmm2 \n"
- "lea " MEMLEA(0x10,2) ",%2 \n"
- "paddd %%xmm0,%%xmm2 \n"
- "movdqu %%xmm2," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x1,%3 \n"
- "jge 10b \n"
-
- "19: \n"
- : "+r"(row), // %0
- "+r"(cumsum), // %1
- "+r"(previous_cumsum), // %2
- "+r"(width) // %3
- :
- : "memory", "cc"
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
- );
-}
-#endif // HAS_COMPUTECUMULATIVESUMROW_SSE2
-
-#ifdef HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
-void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
- int width, int area, uint8* dst,
- int count) {
- asm volatile (
- "movd %5,%%xmm5 \n"
- "cvtdq2ps %%xmm5,%%xmm5 \n"
- "rcpss %%xmm5,%%xmm4 \n"
- "pshufd $0x0,%%xmm4,%%xmm4 \n"
- "sub $0x4,%3 \n"
- "jl 49f \n"
- "cmpl $0x80,%5 \n"
- "ja 40f \n"
-
- "pshufd $0x0,%%xmm5,%%xmm5 \n"
- "pcmpeqb %%xmm6,%%xmm6 \n"
- "psrld $0x10,%%xmm6 \n"
- "cvtdq2ps %%xmm6,%%xmm6 \n"
- "addps %%xmm6,%%xmm5 \n"
- "mulps %%xmm4,%%xmm5 \n"
- "cvtps2dq %%xmm5,%%xmm5 \n"
- "packssdw %%xmm5,%%xmm5 \n"
-
- // 4 pixel small loop \n"
- LABELALIGN
- "4: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n"
- MEMOPREG(psubd,0x00,0,4,4,xmm0) // psubd 0x00(%0,%4,4),%%xmm0
- MEMOPREG(psubd,0x10,0,4,4,xmm1) // psubd 0x10(%0,%4,4),%%xmm1
- MEMOPREG(psubd,0x20,0,4,4,xmm2) // psubd 0x20(%0,%4,4),%%xmm2
- MEMOPREG(psubd,0x30,0,4,4,xmm3) // psubd 0x30(%0,%4,4),%%xmm3
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "psubd " MEMACCESS(1) ",%%xmm0 \n"
- "psubd " MEMACCESS2(0x10,1) ",%%xmm1 \n"
- "psubd " MEMACCESS2(0x20,1) ",%%xmm2 \n"
- "psubd " MEMACCESS2(0x30,1) ",%%xmm3 \n"
- MEMOPREG(paddd,0x00,1,4,4,xmm0) // paddd 0x00(%1,%4,4),%%xmm0
- MEMOPREG(paddd,0x10,1,4,4,xmm1) // paddd 0x10(%1,%4,4),%%xmm1
- MEMOPREG(paddd,0x20,1,4,4,xmm2) // paddd 0x20(%1,%4,4),%%xmm2
- MEMOPREG(paddd,0x30,1,4,4,xmm3) // paddd 0x30(%1,%4,4),%%xmm3
- "lea " MEMLEA(0x40,1) ",%1 \n"
- "packssdw %%xmm1,%%xmm0 \n"
- "packssdw %%xmm3,%%xmm2 \n"
- "pmulhuw %%xmm5,%%xmm0 \n"
- "pmulhuw %%xmm5,%%xmm2 \n"
- "packuswb %%xmm2,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x10,2) ",%2 \n"
- "sub $0x4,%3 \n"
- "jge 4b \n"
- "jmp 49f \n"
-
- // 4 pixel loop \n"
- LABELALIGN
- "40: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n"
- "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n"
- MEMOPREG(psubd,0x00,0,4,4,xmm0) // psubd 0x00(%0,%4,4),%%xmm0
- MEMOPREG(psubd,0x10,0,4,4,xmm1) // psubd 0x10(%0,%4,4),%%xmm1
- MEMOPREG(psubd,0x20,0,4,4,xmm2) // psubd 0x20(%0,%4,4),%%xmm2
- MEMOPREG(psubd,0x30,0,4,4,xmm3) // psubd 0x30(%0,%4,4),%%xmm3
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "psubd " MEMACCESS(1) ",%%xmm0 \n"
- "psubd " MEMACCESS2(0x10,1) ",%%xmm1 \n"
- "psubd " MEMACCESS2(0x20,1) ",%%xmm2 \n"
- "psubd " MEMACCESS2(0x30,1) ",%%xmm3 \n"
- MEMOPREG(paddd,0x00,1,4,4,xmm0) // paddd 0x00(%1,%4,4),%%xmm0
- MEMOPREG(paddd,0x10,1,4,4,xmm1) // paddd 0x10(%1,%4,4),%%xmm1
- MEMOPREG(paddd,0x20,1,4,4,xmm2) // paddd 0x20(%1,%4,4),%%xmm2
- MEMOPREG(paddd,0x30,1,4,4,xmm3) // paddd 0x30(%1,%4,4),%%xmm3
- "lea " MEMLEA(0x40,1) ",%1 \n"
- "cvtdq2ps %%xmm0,%%xmm0 \n"
- "cvtdq2ps %%xmm1,%%xmm1 \n"
- "mulps %%xmm4,%%xmm0 \n"
- "mulps %%xmm4,%%xmm1 \n"
- "cvtdq2ps %%xmm2,%%xmm2 \n"
- "cvtdq2ps %%xmm3,%%xmm3 \n"
- "mulps %%xmm4,%%xmm2 \n"
- "mulps %%xmm4,%%xmm3 \n"
- "cvtps2dq %%xmm0,%%xmm0 \n"
- "cvtps2dq %%xmm1,%%xmm1 \n"
- "cvtps2dq %%xmm2,%%xmm2 \n"
- "cvtps2dq %%xmm3,%%xmm3 \n"
- "packssdw %%xmm1,%%xmm0 \n"
- "packssdw %%xmm3,%%xmm2 \n"
- "packuswb %%xmm2,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x10,2) ",%2 \n"
- "sub $0x4,%3 \n"
- "jge 40b \n"
-
- "49: \n"
- "add $0x3,%3 \n"
- "jl 19f \n"
-
- // 1 pixel loop \n"
- LABELALIGN
- "10: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- MEMOPREG(psubd,0x00,0,4,4,xmm0) // psubd 0x00(%0,%4,4),%%xmm0
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "psubd " MEMACCESS(1) ",%%xmm0 \n"
- MEMOPREG(paddd,0x00,1,4,4,xmm0) // paddd 0x00(%1,%4,4),%%xmm0
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "cvtdq2ps %%xmm0,%%xmm0 \n"
- "mulps %%xmm4,%%xmm0 \n"
- "cvtps2dq %%xmm0,%%xmm0 \n"
- "packssdw %%xmm0,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "movd %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x4,2) ",%2 \n"
- "sub $0x1,%3 \n"
- "jge 10b \n"
- "19: \n"
- : "+r"(topleft), // %0
- "+r"(botleft), // %1
- "+r"(dst), // %2
- "+rm"(count) // %3
- : "r"((intptr_t)(width)), // %4
- "rm"(area) // %5
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
- );
-}
-#endif // HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
-
-#ifdef HAS_ARGBAFFINEROW_SSE2
-// Copy ARGB pixels from source image with slope to a row of destination.
-LIBYUV_API
-void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
- uint8* dst_argb, const float* src_dudv, int width) {
- intptr_t src_argb_stride_temp = src_argb_stride;
- intptr_t temp = 0;
- asm volatile (
- "movq " MEMACCESS(3) ",%%xmm2 \n"
- "movq " MEMACCESS2(0x08,3) ",%%xmm7 \n"
- "shl $0x10,%1 \n"
- "add $0x4,%1 \n"
- "movd %1,%%xmm5 \n"
- "sub $0x4,%4 \n"
- "jl 49f \n"
-
- "pshufd $0x44,%%xmm7,%%xmm7 \n"
- "pshufd $0x0,%%xmm5,%%xmm5 \n"
- "movdqa %%xmm2,%%xmm0 \n"
- "addps %%xmm7,%%xmm0 \n"
- "movlhps %%xmm0,%%xmm2 \n"
- "movdqa %%xmm7,%%xmm4 \n"
- "addps %%xmm4,%%xmm4 \n"
- "movdqa %%xmm2,%%xmm3 \n"
- "addps %%xmm4,%%xmm3 \n"
- "addps %%xmm4,%%xmm4 \n"
-
- // 4 pixel loop \n"
- LABELALIGN
- "40: \n"
- "cvttps2dq %%xmm2,%%xmm0 \n" // x, y float to int first 2
- "cvttps2dq %%xmm3,%%xmm1 \n" // x, y float to int next 2
- "packssdw %%xmm1,%%xmm0 \n" // x, y as 8 shorts
- "pmaddwd %%xmm5,%%xmm0 \n" // off = x * 4 + y * stride
- "movd %%xmm0,%k1 \n"
- "pshufd $0x39,%%xmm0,%%xmm0 \n"
- "movd %%xmm0,%k5 \n"
- "pshufd $0x39,%%xmm0,%%xmm0 \n"
- MEMOPREG(movd,0x00,0,1,1,xmm1) // movd (%0,%1,1),%%xmm1
- MEMOPREG(movd,0x00,0,5,1,xmm6) // movd (%0,%5,1),%%xmm6
- "punpckldq %%xmm6,%%xmm1 \n"
- "addps %%xmm4,%%xmm2 \n"
- "movq %%xmm1," MEMACCESS(2) " \n"
- "movd %%xmm0,%k1 \n"
- "pshufd $0x39,%%xmm0,%%xmm0 \n"
- "movd %%xmm0,%k5 \n"
- MEMOPREG(movd,0x00,0,1,1,xmm0) // movd (%0,%1,1),%%xmm0
- MEMOPREG(movd,0x00,0,5,1,xmm6) // movd (%0,%5,1),%%xmm6
- "punpckldq %%xmm6,%%xmm0 \n"
- "addps %%xmm4,%%xmm3 \n"
- "movq %%xmm0," MEMACCESS2(0x08,2) " \n"
- "lea " MEMLEA(0x10,2) ",%2 \n"
- "sub $0x4,%4 \n"
- "jge 40b \n"
-
- "49: \n"
- "add $0x3,%4 \n"
- "jl 19f \n"
-
- // 1 pixel loop \n"
- LABELALIGN
- "10: \n"
- "cvttps2dq %%xmm2,%%xmm0 \n"
- "packssdw %%xmm0,%%xmm0 \n"
- "pmaddwd %%xmm5,%%xmm0 \n"
- "addps %%xmm7,%%xmm2 \n"
- "movd %%xmm0,%k1 \n"
- MEMOPREG(movd,0x00,0,1,1,xmm0) // movd (%0,%1,1),%%xmm0
- "movd %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x04,2) ",%2 \n"
- "sub $0x1,%4 \n"
- "jge 10b \n"
- "19: \n"
- : "+r"(src_argb), // %0
- "+r"(src_argb_stride_temp), // %1
- "+r"(dst_argb), // %2
- "+r"(src_dudv), // %3
- "+rm"(width), // %4
- "+r"(temp) // %5
- :
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
- );
-}
-#endif // HAS_ARGBAFFINEROW_SSE2
-
-#ifdef HAS_INTERPOLATEROW_SSSE3
-// Bilinear filter 16x2 -> 16x1
-void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride, int dst_width,
- int source_y_fraction) {
- asm volatile (
- "sub %1,%0 \n"
- "shr %3 \n"
- "cmp $0x0,%3 \n"
- "je 100f \n"
- "cmp $0x20,%3 \n"
- "je 75f \n"
- "cmp $0x40,%3 \n"
- "je 50f \n"
- "cmp $0x60,%3 \n"
- "je 25f \n"
-
- "movd %3,%%xmm0 \n"
- "neg %3 \n"
- "add $0x80,%3 \n"
- "movd %3,%%xmm5 \n"
- "punpcklbw %%xmm0,%%xmm5 \n"
- "punpcklwd %%xmm5,%%xmm5 \n"
- "pshufd $0x0,%%xmm5,%%xmm5 \n"
-
- // General purpose row blend.
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(1) ",%%xmm0 \n"
- MEMOPREG(movdqu,0x00,1,4,1,xmm2)
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklbw %%xmm2,%%xmm0 \n"
- "punpckhbw %%xmm2,%%xmm1 \n"
- "pmaddubsw %%xmm5,%%xmm0 \n"
- "pmaddubsw %%xmm5,%%xmm1 \n"
- "psrlw $0x7,%%xmm0 \n"
- "psrlw $0x7,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- MEMOPMEM(movdqu,xmm0,0x00,1,0,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- "jmp 99f \n"
-
- // Blend 25 / 75.
- LABELALIGN
- "25: \n"
- "movdqu " MEMACCESS(1) ",%%xmm0 \n"
- MEMOPREG(movdqu,0x00,1,4,1,xmm1)
- "pavgb %%xmm1,%%xmm0 \n"
- "pavgb %%xmm1,%%xmm0 \n"
- MEMOPMEM(movdqu,xmm0,0x00,1,0,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 25b \n"
- "jmp 99f \n"
-
- // Blend 50 / 50.
- LABELALIGN
- "50: \n"
- "movdqu " MEMACCESS(1) ",%%xmm0 \n"
- MEMOPREG(movdqu,0x00,1,4,1,xmm1)
- "pavgb %%xmm1,%%xmm0 \n"
- MEMOPMEM(movdqu,xmm0,0x00,1,0,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 50b \n"
- "jmp 99f \n"
-
- // Blend 75 / 25.
- LABELALIGN
- "75: \n"
- "movdqu " MEMACCESS(1) ",%%xmm1 \n"
- MEMOPREG(movdqu,0x00,1,4,1,xmm0)
- "pavgb %%xmm1,%%xmm0 \n"
- "pavgb %%xmm1,%%xmm0 \n"
- MEMOPMEM(movdqu,xmm0,0x00,1,0,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 75b \n"
- "jmp 99f \n"
-
- // Blend 100 / 0 - Copy row unchanged.
- LABELALIGN
- "100: \n"
- "movdqu " MEMACCESS(1) ",%%xmm0 \n"
- MEMOPMEM(movdqu,xmm0,0x00,1,0,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 100b \n"
-
- "99: \n"
- : "+r"(dst_ptr), // %0
- "+r"(src_ptr), // %1
- "+r"(dst_width), // %2
- "+r"(source_y_fraction) // %3
- : "r"((intptr_t)(src_stride)) // %4
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm5"
- );
-}
-#endif // HAS_INTERPOLATEROW_SSSE3
-
-#ifdef HAS_INTERPOLATEROW_AVX2
-// Bilinear filter 32x2 -> 32x1
-void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride, int dst_width,
- int source_y_fraction) {
- asm volatile (
- "shr %3 \n"
- "cmp $0x0,%3 \n"
- "je 100f \n"
- "sub %1,%0 \n"
- "cmp $0x20,%3 \n"
- "je 75f \n"
- "cmp $0x40,%3 \n"
- "je 50f \n"
- "cmp $0x60,%3 \n"
- "je 25f \n"
-
- "vmovd %3,%%xmm0 \n"
- "neg %3 \n"
- "add $0x80,%3 \n"
- "vmovd %3,%%xmm5 \n"
- "vpunpcklbw %%xmm0,%%xmm5,%%xmm5 \n"
- "vpunpcklwd %%xmm5,%%xmm5,%%xmm5 \n"
- "vpxor %%ymm0,%%ymm0,%%ymm0 \n"
- "vpermd %%ymm5,%%ymm0,%%ymm5 \n"
-
- // General purpose row blend.
- LABELALIGN
- "1: \n"
- "vmovdqu " MEMACCESS(1) ",%%ymm0 \n"
- MEMOPREG(vmovdqu,0x00,1,4,1,ymm2)
- "vpunpckhbw %%ymm2,%%ymm0,%%ymm1 \n"
- "vpunpcklbw %%ymm2,%%ymm0,%%ymm0 \n"
- "vpmaddubsw %%ymm5,%%ymm0,%%ymm0 \n"
- "vpmaddubsw %%ymm5,%%ymm1,%%ymm1 \n"
- "vpsrlw $0x7,%%ymm0,%%ymm0 \n"
- "vpsrlw $0x7,%%ymm1,%%ymm1 \n"
- "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
- MEMOPMEM(vmovdqu,ymm0,0x00,1,0,1)
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x20,%2 \n"
- "jg 1b \n"
- "jmp 99f \n"
-
- // Blend 25 / 75.
- LABELALIGN
- "25: \n"
- "vmovdqu " MEMACCESS(1) ",%%ymm0 \n"
- MEMOPREG(vmovdqu,0x00,1,4,1,ymm1)
- "vpavgb %%ymm1,%%ymm0,%%ymm0 \n"
- "vpavgb %%ymm1,%%ymm0,%%ymm0 \n"
- MEMOPMEM(vmovdqu,ymm0,0x00,1,0,1)
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x20,%2 \n"
- "jg 25b \n"
- "jmp 99f \n"
-
- // Blend 50 / 50.
- LABELALIGN
- "50: \n"
- "vmovdqu " MEMACCESS(1) ",%%ymm0 \n"
- VMEMOPREG(vpavgb,0x00,1,4,1,ymm0,ymm0) // vpavgb (%1,%4,1),%%ymm0,%%ymm0
- MEMOPMEM(vmovdqu,ymm0,0x00,1,0,1)
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x20,%2 \n"
- "jg 50b \n"
- "jmp 99f \n"
-
- // Blend 75 / 25.
- LABELALIGN
- "75: \n"
- "vmovdqu " MEMACCESS(1) ",%%ymm1 \n"
- MEMOPREG(vmovdqu,0x00,1,4,1,ymm0)
- "vpavgb %%ymm1,%%ymm0,%%ymm0 \n"
- "vpavgb %%ymm1,%%ymm0,%%ymm0 \n"
- MEMOPMEM(vmovdqu,ymm0,0x00,1,0,1)
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x20,%2 \n"
- "jg 75b \n"
- "jmp 99f \n"
-
- // Blend 100 / 0 - Copy row unchanged.
- LABELALIGN
- "100: \n"
- "rep movsb " MEMMOVESTRING(1,0) " \n"
- "jmp 999f \n"
-
- "99: \n"
- "vzeroupper \n"
- "999: \n"
- : "+D"(dst_ptr), // %0
- "+S"(src_ptr), // %1
- "+c"(dst_width), // %2
- "+r"(source_y_fraction) // %3
- : "r"((intptr_t)(src_stride)) // %4
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm5"
- );
-}
-#endif // HAS_INTERPOLATEROW_AVX2
-
-#ifdef HAS_INTERPOLATEROW_SSE2
-// Bilinear filter 16x2 -> 16x1
-void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride, int dst_width,
- int source_y_fraction) {
- asm volatile (
- "sub %1,%0 \n"
- "shr %3 \n"
- "cmp $0x0,%3 \n"
- "je 100f \n"
- "cmp $0x20,%3 \n"
- "je 75f \n"
- "cmp $0x40,%3 \n"
- "je 50f \n"
- "cmp $0x60,%3 \n"
- "je 25f \n"
-
- "movd %3,%%xmm0 \n"
- "neg %3 \n"
- "add $0x80,%3 \n"
- "movd %3,%%xmm5 \n"
- "punpcklbw %%xmm0,%%xmm5 \n"
- "punpcklwd %%xmm5,%%xmm5 \n"
- "pshufd $0x0,%%xmm5,%%xmm5 \n"
- "pxor %%xmm4,%%xmm4 \n"
-
- // General purpose row blend.
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(1) ",%%xmm0 \n"
- MEMOPREG(movdqu,0x00,1,4,1,xmm2) // movdqu (%1,%4,1),%%xmm2
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm2,%%xmm3 \n"
- "punpcklbw %%xmm4,%%xmm2 \n"
- "punpckhbw %%xmm4,%%xmm3 \n"
- "punpcklbw %%xmm4,%%xmm0 \n"
- "punpckhbw %%xmm4,%%xmm1 \n"
- "psubw %%xmm0,%%xmm2 \n"
- "psubw %%xmm1,%%xmm3 \n"
- "paddw %%xmm2,%%xmm2 \n"
- "paddw %%xmm3,%%xmm3 \n"
- "pmulhw %%xmm5,%%xmm2 \n"
- "pmulhw %%xmm5,%%xmm3 \n"
- "paddw %%xmm2,%%xmm0 \n"
- "paddw %%xmm3,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- "jmp 99f \n"
-
- // Blend 25 / 75.
- LABELALIGN
- "25: \n"
- "movdqu " MEMACCESS(1) ",%%xmm0 \n"
- MEMOPREG(movdqu,0x00,1,4,1,xmm1) // movdqu (%1,%4,1),%%xmm1
- "pavgb %%xmm1,%%xmm0 \n"
- "pavgb %%xmm1,%%xmm0 \n"
- MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 25b \n"
- "jmp 99f \n"
-
- // Blend 50 / 50.
- LABELALIGN
- "50: \n"
- "movdqu " MEMACCESS(1) ",%%xmm0 \n"
- MEMOPREG(movdqu,0x00,1,4,1,xmm1) // movdqu (%1,%4,1),%%xmm1
- "pavgb %%xmm1,%%xmm0 \n"
- MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 50b \n"
- "jmp 99f \n"
-
- // Blend 75 / 25.
- LABELALIGN
- "75: \n"
- "movdqu " MEMACCESS(1) ",%%xmm1 \n"
- MEMOPREG(movdqu,0x00,1,4,1,xmm0) // movdqu (%1,%4,1),%%xmm0
- "pavgb %%xmm1,%%xmm0 \n"
- "pavgb %%xmm1,%%xmm0 \n"
- MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 75b \n"
- "jmp 99f \n"
-
- // Blend 100 / 0 - Copy row unchanged.
- LABELALIGN
- "100: \n"
- "movdqu " MEMACCESS(1) ",%%xmm0 \n"
- MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1)
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 100b \n"
-
- "99: \n"
- : "+r"(dst_ptr), // %0
- "+r"(src_ptr), // %1
- "+r"(dst_width), // %2
- "+r"(source_y_fraction) // %3
- : "r"((intptr_t)(src_stride)) // %4
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
- );
-}
-#endif // HAS_INTERPOLATEROW_SSE2
-
-#ifdef HAS_ARGBSHUFFLEROW_SSSE3
-// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
-void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix) {
- asm volatile (
- "movdqu " MEMACCESS(3) ",%%xmm5 \n"
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "pshufb %%xmm5,%%xmm0 \n"
- "pshufb %%xmm5,%%xmm1 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "sub $0x8,%2 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(pix) // %2
- : "r"(shuffler) // %3
- : "memory", "cc"
- , "xmm0", "xmm1", "xmm5"
- );
-}
-#endif // HAS_ARGBSHUFFLEROW_SSSE3
-
-#ifdef HAS_ARGBSHUFFLEROW_AVX2
-// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
-void ARGBShuffleRow_AVX2(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix) {
- asm volatile (
- "vbroadcastf128 " MEMACCESS(3) ",%%ymm5 \n"
- LABELALIGN
- "1: \n"
- "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
- "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
- "lea " MEMLEA(0x40,0) ",%0 \n"
- "vpshufb %%ymm5,%%ymm0,%%ymm0 \n"
- "vpshufb %%ymm5,%%ymm1,%%ymm1 \n"
- "vmovdqu %%ymm0," MEMACCESS(1) " \n"
- "vmovdqu %%ymm1," MEMACCESS2(0x20,1) " \n"
- "lea " MEMLEA(0x40,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(pix) // %2
- : "r"(shuffler) // %3
- : "memory", "cc"
- , "xmm0", "xmm1", "xmm5"
- );
-}
-#endif // HAS_ARGBSHUFFLEROW_AVX2
-
-#ifdef HAS_ARGBSHUFFLEROW_SSE2
-// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
-void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix) {
- uintptr_t pixel_temp = 0u;
- asm volatile (
- "pxor %%xmm5,%%xmm5 \n"
- "mov " MEMACCESS(4) ",%k2 \n"
- "cmp $0x3000102,%k2 \n"
- "je 3012f \n"
- "cmp $0x10203,%k2 \n"
- "je 123f \n"
- "cmp $0x30201,%k2 \n"
- "je 321f \n"
- "cmp $0x2010003,%k2 \n"
- "je 2103f \n"
-
- LABELALIGN
- "1: \n"
- "movzb " MEMACCESS(4) ",%2 \n"
- MEMOPARG(movzb,0x00,0,2,1,2) " \n" // movzb (%0,%2,1),%2
- "mov %b2," MEMACCESS(1) " \n"
- "movzb " MEMACCESS2(0x1,4) ",%2 \n"
- MEMOPARG(movzb,0x00,0,2,1,2) " \n" // movzb (%0,%2,1),%2
- "mov %b2," MEMACCESS2(0x1,1) " \n"
- "movzb " MEMACCESS2(0x2,4) ",%2 \n"
- MEMOPARG(movzb,0x00,0,2,1,2) " \n" // movzb (%0,%2,1),%2
- "mov %b2," MEMACCESS2(0x2,1) " \n"
- "movzb " MEMACCESS2(0x3,4) ",%2 \n"
- MEMOPARG(movzb,0x00,0,2,1,2) " \n" // movzb (%0,%2,1),%2
- "mov %b2," MEMACCESS2(0x3,1) " \n"
- "lea " MEMLEA(0x4,0) ",%0 \n"
- "lea " MEMLEA(0x4,1) ",%1 \n"
- "sub $0x1,%3 \n"
- "jg 1b \n"
- "jmp 99f \n"
-
- LABELALIGN
- "123: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklbw %%xmm5,%%xmm0 \n"
- "punpckhbw %%xmm5,%%xmm1 \n"
- "pshufhw $0x1b,%%xmm0,%%xmm0 \n"
- "pshuflw $0x1b,%%xmm0,%%xmm0 \n"
- "pshufhw $0x1b,%%xmm1,%%xmm1 \n"
- "pshuflw $0x1b,%%xmm1,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x4,%3 \n"
- "jg 123b \n"
- "jmp 99f \n"
-
- LABELALIGN
- "321: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklbw %%xmm5,%%xmm0 \n"
- "punpckhbw %%xmm5,%%xmm1 \n"
- "pshufhw $0x39,%%xmm0,%%xmm0 \n"
- "pshuflw $0x39,%%xmm0,%%xmm0 \n"
- "pshufhw $0x39,%%xmm1,%%xmm1 \n"
- "pshuflw $0x39,%%xmm1,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x4,%3 \n"
- "jg 321b \n"
- "jmp 99f \n"
-
- LABELALIGN
- "2103: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklbw %%xmm5,%%xmm0 \n"
- "punpckhbw %%xmm5,%%xmm1 \n"
- "pshufhw $0x93,%%xmm0,%%xmm0 \n"
- "pshuflw $0x93,%%xmm0,%%xmm0 \n"
- "pshufhw $0x93,%%xmm1,%%xmm1 \n"
- "pshuflw $0x93,%%xmm1,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x4,%3 \n"
- "jg 2103b \n"
- "jmp 99f \n"
-
- LABELALIGN
- "3012: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklbw %%xmm5,%%xmm0 \n"
- "punpckhbw %%xmm5,%%xmm1 \n"
- "pshufhw $0xc6,%%xmm0,%%xmm0 \n"
- "pshuflw $0xc6,%%xmm0,%%xmm0 \n"
- "pshufhw $0xc6,%%xmm1,%%xmm1 \n"
- "pshuflw $0xc6,%%xmm1,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x4,%3 \n"
- "jg 3012b \n"
-
- "99: \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+d"(pixel_temp), // %2
- "+r"(pix) // %3
- : "r"(shuffler) // %4
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm5"
- );
-}
-#endif // HAS_ARGBSHUFFLEROW_SSE2
-
-#ifdef HAS_I422TOYUY2ROW_SSE2
-void I422ToYUY2Row_SSE2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_frame, int width) {
- asm volatile (
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movq " MEMACCESS(1) ",%%xmm2 \n"
- MEMOPREG(movq,0x00,1,2,1,xmm3) // movq (%1,%2,1),%%xmm3
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "punpcklbw %%xmm3,%%xmm2 \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklbw %%xmm2,%%xmm0 \n"
- "punpckhbw %%xmm2,%%xmm1 \n"
- "movdqu %%xmm0," MEMACCESS(3) " \n"
- "movdqu %%xmm1," MEMACCESS2(0x10,3) " \n"
- "lea " MEMLEA(0x20,3) ",%3 \n"
- "sub $0x10,%4 \n"
- "jg 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_frame), // %3
- "+rm"(width) // %4
- :
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3"
- );
-}
-#endif // HAS_I422TOYUY2ROW_SSE2
-
-#ifdef HAS_I422TOUYVYROW_SSE2
-void I422ToUYVYRow_SSE2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_frame, int width) {
- asm volatile (
- "sub %1,%2 \n"
- LABELALIGN
- "1: \n"
- "movq " MEMACCESS(1) ",%%xmm2 \n"
- MEMOPREG(movq,0x00,1,2,1,xmm3) // movq (%1,%2,1),%%xmm3
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "punpcklbw %%xmm3,%%xmm2 \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqa %%xmm2,%%xmm1 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "punpcklbw %%xmm0,%%xmm1 \n"
- "punpckhbw %%xmm0,%%xmm2 \n"
- "movdqu %%xmm1," MEMACCESS(3) " \n"
- "movdqu %%xmm2," MEMACCESS2(0x10,3) " \n"
- "lea " MEMLEA(0x20,3) ",%3 \n"
- "sub $0x10,%4 \n"
- "jg 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_frame), // %3
- "+rm"(width) // %4
- :
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3"
- );
-}
-#endif // HAS_I422TOUYVYROW_SSE2
-
-#ifdef HAS_ARGBPOLYNOMIALROW_SSE2
-void ARGBPolynomialRow_SSE2(const uint8* src_argb,
- uint8* dst_argb, const float* poly,
- int width) {
- asm volatile (
- "pxor %%xmm3,%%xmm3 \n"
-
- // 2 pixel loop.
- LABELALIGN
- "1: \n"
- "movq " MEMACCESS(0) ",%%xmm0 \n"
- "lea " MEMLEA(0x8,0) ",%0 \n"
- "punpcklbw %%xmm3,%%xmm0 \n"
- "movdqa %%xmm0,%%xmm4 \n"
- "punpcklwd %%xmm3,%%xmm0 \n"
- "punpckhwd %%xmm3,%%xmm4 \n"
- "cvtdq2ps %%xmm0,%%xmm0 \n"
- "cvtdq2ps %%xmm4,%%xmm4 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "movdqa %%xmm4,%%xmm5 \n"
- "mulps " MEMACCESS2(0x10,3) ",%%xmm0 \n"
- "mulps " MEMACCESS2(0x10,3) ",%%xmm4 \n"
- "addps " MEMACCESS(3) ",%%xmm0 \n"
- "addps " MEMACCESS(3) ",%%xmm4 \n"
- "movdqa %%xmm1,%%xmm2 \n"
- "movdqa %%xmm5,%%xmm6 \n"
- "mulps %%xmm1,%%xmm2 \n"
- "mulps %%xmm5,%%xmm6 \n"
- "mulps %%xmm2,%%xmm1 \n"
- "mulps %%xmm6,%%xmm5 \n"
- "mulps " MEMACCESS2(0x20,3) ",%%xmm2 \n"
- "mulps " MEMACCESS2(0x20,3) ",%%xmm6 \n"
- "mulps " MEMACCESS2(0x30,3) ",%%xmm1 \n"
- "mulps " MEMACCESS2(0x30,3) ",%%xmm5 \n"
- "addps %%xmm2,%%xmm0 \n"
- "addps %%xmm6,%%xmm4 \n"
- "addps %%xmm1,%%xmm0 \n"
- "addps %%xmm5,%%xmm4 \n"
- "cvttps2dq %%xmm0,%%xmm0 \n"
- "cvttps2dq %%xmm4,%%xmm4 \n"
- "packuswb %%xmm4,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "movq %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $0x2,%2 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- : "r"(poly) // %3
- : "memory", "cc"
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
- );
-}
-#endif // HAS_ARGBPOLYNOMIALROW_SSE2
-
-#ifdef HAS_ARGBPOLYNOMIALROW_AVX2
-void ARGBPolynomialRow_AVX2(const uint8* src_argb,
- uint8* dst_argb, const float* poly,
- int width) {
- asm volatile (
- "vbroadcastf128 " MEMACCESS(3) ",%%ymm4 \n"
- "vbroadcastf128 " MEMACCESS2(0x10,3) ",%%ymm5 \n"
- "vbroadcastf128 " MEMACCESS2(0x20,3) ",%%ymm6 \n"
- "vbroadcastf128 " MEMACCESS2(0x30,3) ",%%ymm7 \n"
-
- // 2 pixel loop.
- LABELALIGN
- "1: \n"
- "vpmovzxbd " MEMACCESS(0) ",%%ymm0 \n" // 2 ARGB pixels
- "lea " MEMLEA(0x8,0) ",%0 \n"
- "vcvtdq2ps %%ymm0,%%ymm0 \n" // X 8 floats
- "vmulps %%ymm0,%%ymm0,%%ymm2 \n" // X * X
- "vmulps %%ymm7,%%ymm0,%%ymm3 \n" // C3 * X
- "vfmadd132ps %%ymm5,%%ymm4,%%ymm0 \n" // result = C0 + C1 * X
- "vfmadd231ps %%ymm6,%%ymm2,%%ymm0 \n" // result += C2 * X * X
- "vfmadd231ps %%ymm3,%%ymm2,%%ymm0 \n" // result += C3 * X * X * X
- "vcvttps2dq %%ymm0,%%ymm0 \n"
- "vpackusdw %%ymm0,%%ymm0,%%ymm0 \n"
- "vpermq $0xd8,%%ymm0,%%ymm0 \n"
- "vpackuswb %%xmm0,%%xmm0,%%xmm0 \n"
- "vmovq %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $0x2,%2 \n"
- "jg 1b \n"
- "vzeroupper \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- : "r"(poly) // %3
- : "memory", "cc",
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
- );
-}
-#endif // HAS_ARGBPOLYNOMIALROW_AVX2
-
-#ifdef HAS_ARGBCOLORTABLEROW_X86
-// Tranform ARGB pixels with color table.
-void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb,
- int width) {
- uintptr_t pixel_temp = 0u;
- asm volatile (
- // 1 pixel loop.
- LABELALIGN
- "1: \n"
- "movzb " MEMACCESS(0) ",%1 \n"
- "lea " MEMLEA(0x4,0) ",%0 \n"
- MEMOPARG(movzb,0x00,3,1,4,1) " \n" // movzb (%3,%1,4),%1
- "mov %b1," MEMACCESS2(-0x4,0) " \n"
- "movzb " MEMACCESS2(-0x3,0) ",%1 \n"
- MEMOPARG(movzb,0x01,3,1,4,1) " \n" // movzb 0x1(%3,%1,4),%1
- "mov %b1," MEMACCESS2(-0x3,0) " \n"
- "movzb " MEMACCESS2(-0x2,0) ",%1 \n"
- MEMOPARG(movzb,0x02,3,1,4,1) " \n" // movzb 0x2(%3,%1,4),%1
- "mov %b1," MEMACCESS2(-0x2,0) " \n"
- "movzb " MEMACCESS2(-0x1,0) ",%1 \n"
- MEMOPARG(movzb,0x03,3,1,4,1) " \n" // movzb 0x3(%3,%1,4),%1
- "mov %b1," MEMACCESS2(-0x1,0) " \n"
- "dec %2 \n"
- "jg 1b \n"
- : "+r"(dst_argb), // %0
- "+d"(pixel_temp), // %1
- "+r"(width) // %2
- : "r"(table_argb) // %3
- : "memory", "cc");
-}
-#endif // HAS_ARGBCOLORTABLEROW_X86
-
-#ifdef HAS_RGBCOLORTABLEROW_X86
-// Tranform RGB pixels with color table.
-void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width) {
- uintptr_t pixel_temp = 0u;
- asm volatile (
- // 1 pixel loop.
- LABELALIGN
- "1: \n"
- "movzb " MEMACCESS(0) ",%1 \n"
- "lea " MEMLEA(0x4,0) ",%0 \n"
- MEMOPARG(movzb,0x00,3,1,4,1) " \n" // movzb (%3,%1,4),%1
- "mov %b1," MEMACCESS2(-0x4,0) " \n"
- "movzb " MEMACCESS2(-0x3,0) ",%1 \n"
- MEMOPARG(movzb,0x01,3,1,4,1) " \n" // movzb 0x1(%3,%1,4),%1
- "mov %b1," MEMACCESS2(-0x3,0) " \n"
- "movzb " MEMACCESS2(-0x2,0) ",%1 \n"
- MEMOPARG(movzb,0x02,3,1,4,1) " \n" // movzb 0x2(%3,%1,4),%1
- "mov %b1," MEMACCESS2(-0x2,0) " \n"
- "dec %2 \n"
- "jg 1b \n"
- : "+r"(dst_argb), // %0
- "+d"(pixel_temp), // %1
- "+r"(width) // %2
- : "r"(table_argb) // %3
- : "memory", "cc");
-}
-#endif // HAS_RGBCOLORTABLEROW_X86
-
-#ifdef HAS_ARGBLUMACOLORTABLEROW_SSSE3
-// Tranform RGB pixels with luma table.
-void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
- int width,
- const uint8* luma, uint32 lumacoeff) {
- uintptr_t pixel_temp = 0u;
- uintptr_t table_temp = 0u;
- asm volatile (
- "movd %6,%%xmm3 \n"
- "pshufd $0x0,%%xmm3,%%xmm3 \n"
- "pcmpeqb %%xmm4,%%xmm4 \n"
- "psllw $0x8,%%xmm4 \n"
- "pxor %%xmm5,%%xmm5 \n"
-
- // 4 pixel loop.
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(2) ",%%xmm0 \n"
- "pmaddubsw %%xmm3,%%xmm0 \n"
- "phaddw %%xmm0,%%xmm0 \n"
- "pand %%xmm4,%%xmm0 \n"
- "punpcklwd %%xmm5,%%xmm0 \n"
- "movd %%xmm0,%k1 \n" // 32 bit offset
- "add %5,%1 \n"
- "pshufd $0x39,%%xmm0,%%xmm0 \n"
-
- "movzb " MEMACCESS(2) ",%0 \n"
- MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0
- "mov %b0," MEMACCESS(3) " \n"
- "movzb " MEMACCESS2(0x1,2) ",%0 \n"
- MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0
- "mov %b0," MEMACCESS2(0x1,3) " \n"
- "movzb " MEMACCESS2(0x2,2) ",%0 \n"
- MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0
- "mov %b0," MEMACCESS2(0x2,3) " \n"
- "movzb " MEMACCESS2(0x3,2) ",%0 \n"
- "mov %b0," MEMACCESS2(0x3,3) " \n"
-
- "movd %%xmm0,%k1 \n" // 32 bit offset
- "add %5,%1 \n"
- "pshufd $0x39,%%xmm0,%%xmm0 \n"
-
- "movzb " MEMACCESS2(0x4,2) ",%0 \n"
- MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0
- "mov %b0," MEMACCESS2(0x4,3) " \n"
- "movzb " MEMACCESS2(0x5,2) ",%0 \n"
- MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0
- "mov %b0," MEMACCESS2(0x5,3) " \n"
- "movzb " MEMACCESS2(0x6,2) ",%0 \n"
- MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0
- "mov %b0," MEMACCESS2(0x6,3) " \n"
- "movzb " MEMACCESS2(0x7,2) ",%0 \n"
- "mov %b0," MEMACCESS2(0x7,3) " \n"
-
- "movd %%xmm0,%k1 \n" // 32 bit offset
- "add %5,%1 \n"
- "pshufd $0x39,%%xmm0,%%xmm0 \n"
-
- "movzb " MEMACCESS2(0x8,2) ",%0 \n"
- MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0
- "mov %b0," MEMACCESS2(0x8,3) " \n"
- "movzb " MEMACCESS2(0x9,2) ",%0 \n"
- MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0
- "mov %b0," MEMACCESS2(0x9,3) " \n"
- "movzb " MEMACCESS2(0xa,2) ",%0 \n"
- MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0
- "mov %b0," MEMACCESS2(0xa,3) " \n"
- "movzb " MEMACCESS2(0xb,2) ",%0 \n"
- "mov %b0," MEMACCESS2(0xb,3) " \n"
-
- "movd %%xmm0,%k1 \n" // 32 bit offset
- "add %5,%1 \n"
-
- "movzb " MEMACCESS2(0xc,2) ",%0 \n"
- MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0
- "mov %b0," MEMACCESS2(0xc,3) " \n"
- "movzb " MEMACCESS2(0xd,2) ",%0 \n"
- MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0
- "mov %b0," MEMACCESS2(0xd,3) " \n"
- "movzb " MEMACCESS2(0xe,2) ",%0 \n"
- MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0
- "mov %b0," MEMACCESS2(0xe,3) " \n"
- "movzb " MEMACCESS2(0xf,2) ",%0 \n"
- "mov %b0," MEMACCESS2(0xf,3) " \n"
- "lea " MEMLEA(0x10,2) ",%2 \n"
- "lea " MEMLEA(0x10,3) ",%3 \n"
- "sub $0x4,%4 \n"
- "jg 1b \n"
- : "+d"(pixel_temp), // %0
- "+a"(table_temp), // %1
- "+r"(src_argb), // %2
- "+r"(dst_argb), // %3
- "+rm"(width) // %4
- : "r"(luma), // %5
- "rm"(lumacoeff) // %6
- : "memory", "cc", "xmm0", "xmm3", "xmm4", "xmm5"
- );
-}
-#endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3
-
-#endif // defined(__x86_64__) || defined(__i386__)
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/third_party/aom/third_party/libyuv/source/row_mips.cc b/third_party/aom/third_party/libyuv/source/row_mips.cc
deleted file mode 100644
index cfc9ffe03..000000000
--- a/third_party/aom/third_party/libyuv/source/row_mips.cc
+++ /dev/null
@@ -1,911 +0,0 @@
-/*
- * Copyright (c) 2012 The LibYuv project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// The following are available on Mips platforms:
-#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips__) && \
- (_MIPS_SIM == _MIPS_SIM_ABI32)
-
-#ifdef HAS_COPYROW_MIPS
-void CopyRow_MIPS(const uint8* src, uint8* dst, int count) {
- __asm__ __volatile__ (
- ".set noreorder \n"
- ".set noat \n"
- "slti $at, %[count], 8 \n"
- "bne $at ,$zero, $last8 \n"
- "xor $t8, %[src], %[dst] \n"
- "andi $t8, $t8, 0x3 \n"
-
- "bne $t8, $zero, unaligned \n"
- "negu $a3, %[dst] \n"
- // make dst/src aligned
- "andi $a3, $a3, 0x3 \n"
- "beq $a3, $zero, $chk16w \n"
- // word-aligned now count is the remining bytes count
- "subu %[count], %[count], $a3 \n"
-
- "lwr $t8, 0(%[src]) \n"
- "addu %[src], %[src], $a3 \n"
- "swr $t8, 0(%[dst]) \n"
- "addu %[dst], %[dst], $a3 \n"
-
- // Now the dst/src are mutually word-aligned with word-aligned addresses
- "$chk16w: \n"
- "andi $t8, %[count], 0x3f \n" // whole 64-B chunks?
- // t8 is the byte count after 64-byte chunks
- "beq %[count], $t8, chk8w \n"
- // There will be at most 1 32-byte chunk after it
- "subu $a3, %[count], $t8 \n" // the reminder
- // Here a3 counts bytes in 16w chunks
- "addu $a3, %[dst], $a3 \n"
- // Now a3 is the final dst after 64-byte chunks
- "addu $t0, %[dst], %[count] \n"
- // t0 is the "past the end" address
-
- // When in the loop we exercise "pref 30,x(a1)", the a1+x should not be past
- // the "t0-32" address
- // This means: for x=128 the last "safe" a1 address is "t0-160"
- // Alternatively, for x=64 the last "safe" a1 address is "t0-96"
- // we will use "pref 30,128(a1)", so "t0-160" is the limit
- "subu $t9, $t0, 160 \n"
- // t9 is the "last safe pref 30,128(a1)" address
- "pref 0, 0(%[src]) \n" // first line of src
- "pref 0, 32(%[src]) \n" // second line of src
- "pref 0, 64(%[src]) \n"
- "pref 30, 32(%[dst]) \n"
- // In case the a1 > t9 don't use "pref 30" at all
- "sgtu $v1, %[dst], $t9 \n"
- "bgtz $v1, $loop16w \n"
- "nop \n"
- // otherwise, start with using pref30
- "pref 30, 64(%[dst]) \n"
- "$loop16w: \n"
- "pref 0, 96(%[src]) \n"
- "lw $t0, 0(%[src]) \n"
- "bgtz $v1, $skip_pref30_96 \n" // skip
- "lw $t1, 4(%[src]) \n"
- "pref 30, 96(%[dst]) \n" // continue
- "$skip_pref30_96: \n"
- "lw $t2, 8(%[src]) \n"
- "lw $t3, 12(%[src]) \n"
- "lw $t4, 16(%[src]) \n"
- "lw $t5, 20(%[src]) \n"
- "lw $t6, 24(%[src]) \n"
- "lw $t7, 28(%[src]) \n"
- "pref 0, 128(%[src]) \n"
- // bring the next lines of src, addr 128
- "sw $t0, 0(%[dst]) \n"
- "sw $t1, 4(%[dst]) \n"
- "sw $t2, 8(%[dst]) \n"
- "sw $t3, 12(%[dst]) \n"
- "sw $t4, 16(%[dst]) \n"
- "sw $t5, 20(%[dst]) \n"
- "sw $t6, 24(%[dst]) \n"
- "sw $t7, 28(%[dst]) \n"
- "lw $t0, 32(%[src]) \n"
- "bgtz $v1, $skip_pref30_128 \n" // skip pref 30,128(a1)
- "lw $t1, 36(%[src]) \n"
- "pref 30, 128(%[dst]) \n" // set dest, addr 128
- "$skip_pref30_128: \n"
- "lw $t2, 40(%[src]) \n"
- "lw $t3, 44(%[src]) \n"
- "lw $t4, 48(%[src]) \n"
- "lw $t5, 52(%[src]) \n"
- "lw $t6, 56(%[src]) \n"
- "lw $t7, 60(%[src]) \n"
- "pref 0, 160(%[src]) \n"
- // bring the next lines of src, addr 160
- "sw $t0, 32(%[dst]) \n"
- "sw $t1, 36(%[dst]) \n"
- "sw $t2, 40(%[dst]) \n"
- "sw $t3, 44(%[dst]) \n"
- "sw $t4, 48(%[dst]) \n"
- "sw $t5, 52(%[dst]) \n"
- "sw $t6, 56(%[dst]) \n"
- "sw $t7, 60(%[dst]) \n"
-
- "addiu %[dst], %[dst], 64 \n" // adding 64 to dest
- "sgtu $v1, %[dst], $t9 \n"
- "bne %[dst], $a3, $loop16w \n"
- " addiu %[src], %[src], 64 \n" // adding 64 to src
- "move %[count], $t8 \n"
-
- // Here we have src and dest word-aligned but less than 64-bytes to go
-
- "chk8w: \n"
- "pref 0, 0x0(%[src]) \n"
- "andi $t8, %[count], 0x1f \n" // 32-byte chunk?
- // the t8 is the reminder count past 32-bytes
- "beq %[count], $t8, chk1w \n"
- // count=t8,no 32-byte chunk
- " nop \n"
-
- "lw $t0, 0(%[src]) \n"
- "lw $t1, 4(%[src]) \n"
- "lw $t2, 8(%[src]) \n"
- "lw $t3, 12(%[src]) \n"
- "lw $t4, 16(%[src]) \n"
- "lw $t5, 20(%[src]) \n"
- "lw $t6, 24(%[src]) \n"
- "lw $t7, 28(%[src]) \n"
- "addiu %[src], %[src], 32 \n"
-
- "sw $t0, 0(%[dst]) \n"
- "sw $t1, 4(%[dst]) \n"
- "sw $t2, 8(%[dst]) \n"
- "sw $t3, 12(%[dst]) \n"
- "sw $t4, 16(%[dst]) \n"
- "sw $t5, 20(%[dst]) \n"
- "sw $t6, 24(%[dst]) \n"
- "sw $t7, 28(%[dst]) \n"
- "addiu %[dst], %[dst], 32 \n"
-
- "chk1w: \n"
- "andi %[count], $t8, 0x3 \n"
- // now count is the reminder past 1w chunks
- "beq %[count], $t8, $last8 \n"
- " subu $a3, $t8, %[count] \n"
- // a3 is count of bytes in 1w chunks
- "addu $a3, %[dst], $a3 \n"
- // now a3 is the dst address past the 1w chunks
- // copying in words (4-byte chunks)
- "$wordCopy_loop: \n"
- "lw $t3, 0(%[src]) \n"
- // the first t3 may be equal t0 ... optimize?
- "addiu %[src], %[src],4 \n"
- "addiu %[dst], %[dst],4 \n"
- "bne %[dst], $a3,$wordCopy_loop \n"
- " sw $t3, -4(%[dst]) \n"
-
- // For the last (<8) bytes
- "$last8: \n"
- "blez %[count], leave \n"
- " addu $a3, %[dst], %[count] \n" // a3 -last dst address
- "$last8loop: \n"
- "lb $v1, 0(%[src]) \n"
- "addiu %[src], %[src], 1 \n"
- "addiu %[dst], %[dst], 1 \n"
- "bne %[dst], $a3, $last8loop \n"
- " sb $v1, -1(%[dst]) \n"
-
- "leave: \n"
- " j $ra \n"
- " nop \n"
-
- //
- // UNALIGNED case
- //
-
- "unaligned: \n"
- // got here with a3="negu a1"
- "andi $a3, $a3, 0x3 \n" // a1 is word aligned?
- "beqz $a3, $ua_chk16w \n"
- " subu %[count], %[count], $a3 \n"
- // bytes left after initial a3 bytes
- "lwr $v1, 0(%[src]) \n"
- "lwl $v1, 3(%[src]) \n"
- "addu %[src], %[src], $a3 \n" // a3 may be 1, 2 or 3
- "swr $v1, 0(%[dst]) \n"
- "addu %[dst], %[dst], $a3 \n"
- // below the dst will be word aligned (NOTE1)
- "$ua_chk16w: \n"
- "andi $t8, %[count], 0x3f \n" // whole 64-B chunks?
- // t8 is the byte count after 64-byte chunks
- "beq %[count], $t8, ua_chk8w \n"
- // if a2==t8, no 64-byte chunks
- // There will be at most 1 32-byte chunk after it
- "subu $a3, %[count], $t8 \n" // the reminder
- // Here a3 counts bytes in 16w chunks
- "addu $a3, %[dst], $a3 \n"
- // Now a3 is the final dst after 64-byte chunks
- "addu $t0, %[dst], %[count] \n" // t0 "past the end"
- "subu $t9, $t0, 160 \n"
- // t9 is the "last safe pref 30,128(a1)" address
- "pref 0, 0(%[src]) \n" // first line of src
- "pref 0, 32(%[src]) \n" // second line addr 32
- "pref 0, 64(%[src]) \n"
- "pref 30, 32(%[dst]) \n"
- // safe, as we have at least 64 bytes ahead
- // In case the a1 > t9 don't use "pref 30" at all
- "sgtu $v1, %[dst], $t9 \n"
- "bgtz $v1, $ua_loop16w \n"
- // skip "pref 30,64(a1)" for too short arrays
- " nop \n"
- // otherwise, start with using pref30
- "pref 30, 64(%[dst]) \n"
- "$ua_loop16w: \n"
- "pref 0, 96(%[src]) \n"
- "lwr $t0, 0(%[src]) \n"
- "lwl $t0, 3(%[src]) \n"
- "lwr $t1, 4(%[src]) \n"
- "bgtz $v1, $ua_skip_pref30_96 \n"
- " lwl $t1, 7(%[src]) \n"
- "pref 30, 96(%[dst]) \n"
- // continue setting up the dest, addr 96
- "$ua_skip_pref30_96: \n"
- "lwr $t2, 8(%[src]) \n"
- "lwl $t2, 11(%[src]) \n"
- "lwr $t3, 12(%[src]) \n"
- "lwl $t3, 15(%[src]) \n"
- "lwr $t4, 16(%[src]) \n"
- "lwl $t4, 19(%[src]) \n"
- "lwr $t5, 20(%[src]) \n"
- "lwl $t5, 23(%[src]) \n"
- "lwr $t6, 24(%[src]) \n"
- "lwl $t6, 27(%[src]) \n"
- "lwr $t7, 28(%[src]) \n"
- "lwl $t7, 31(%[src]) \n"
- "pref 0, 128(%[src]) \n"
- // bring the next lines of src, addr 128
- "sw $t0, 0(%[dst]) \n"
- "sw $t1, 4(%[dst]) \n"
- "sw $t2, 8(%[dst]) \n"
- "sw $t3, 12(%[dst]) \n"
- "sw $t4, 16(%[dst]) \n"
- "sw $t5, 20(%[dst]) \n"
- "sw $t6, 24(%[dst]) \n"
- "sw $t7, 28(%[dst]) \n"
- "lwr $t0, 32(%[src]) \n"
- "lwl $t0, 35(%[src]) \n"
- "lwr $t1, 36(%[src]) \n"
- "bgtz $v1, ua_skip_pref30_128 \n"
- " lwl $t1, 39(%[src]) \n"
- "pref 30, 128(%[dst]) \n"
- // continue setting up the dest, addr 128
- "ua_skip_pref30_128: \n"
-
- "lwr $t2, 40(%[src]) \n"
- "lwl $t2, 43(%[src]) \n"
- "lwr $t3, 44(%[src]) \n"
- "lwl $t3, 47(%[src]) \n"
- "lwr $t4, 48(%[src]) \n"
- "lwl $t4, 51(%[src]) \n"
- "lwr $t5, 52(%[src]) \n"
- "lwl $t5, 55(%[src]) \n"
- "lwr $t6, 56(%[src]) \n"
- "lwl $t6, 59(%[src]) \n"
- "lwr $t7, 60(%[src]) \n"
- "lwl $t7, 63(%[src]) \n"
- "pref 0, 160(%[src]) \n"
- // bring the next lines of src, addr 160
- "sw $t0, 32(%[dst]) \n"
- "sw $t1, 36(%[dst]) \n"
- "sw $t2, 40(%[dst]) \n"
- "sw $t3, 44(%[dst]) \n"
- "sw $t4, 48(%[dst]) \n"
- "sw $t5, 52(%[dst]) \n"
- "sw $t6, 56(%[dst]) \n"
- "sw $t7, 60(%[dst]) \n"
-
- "addiu %[dst],%[dst],64 \n" // adding 64 to dest
- "sgtu $v1,%[dst],$t9 \n"
- "bne %[dst],$a3,$ua_loop16w \n"
- " addiu %[src],%[src],64 \n" // adding 64 to src
- "move %[count],$t8 \n"
-
- // Here we have src and dest word-aligned but less than 64-bytes to go
-
- "ua_chk8w: \n"
- "pref 0, 0x0(%[src]) \n"
- "andi $t8, %[count], 0x1f \n" // 32-byte chunk?
- // the t8 is the reminder count
- "beq %[count], $t8, $ua_chk1w \n"
- // when count==t8, no 32-byte chunk
-
- "lwr $t0, 0(%[src]) \n"
- "lwl $t0, 3(%[src]) \n"
- "lwr $t1, 4(%[src]) \n"
- "lwl $t1, 7(%[src]) \n"
- "lwr $t2, 8(%[src]) \n"
- "lwl $t2, 11(%[src]) \n"
- "lwr $t3, 12(%[src]) \n"
- "lwl $t3, 15(%[src]) \n"
- "lwr $t4, 16(%[src]) \n"
- "lwl $t4, 19(%[src]) \n"
- "lwr $t5, 20(%[src]) \n"
- "lwl $t5, 23(%[src]) \n"
- "lwr $t6, 24(%[src]) \n"
- "lwl $t6, 27(%[src]) \n"
- "lwr $t7, 28(%[src]) \n"
- "lwl $t7, 31(%[src]) \n"
- "addiu %[src], %[src], 32 \n"
-
- "sw $t0, 0(%[dst]) \n"
- "sw $t1, 4(%[dst]) \n"
- "sw $t2, 8(%[dst]) \n"
- "sw $t3, 12(%[dst]) \n"
- "sw $t4, 16(%[dst]) \n"
- "sw $t5, 20(%[dst]) \n"
- "sw $t6, 24(%[dst]) \n"
- "sw $t7, 28(%[dst]) \n"
- "addiu %[dst], %[dst], 32 \n"
-
- "$ua_chk1w: \n"
- "andi %[count], $t8, 0x3 \n"
- // now count is the reminder past 1w chunks
- "beq %[count], $t8, ua_smallCopy \n"
- "subu $a3, $t8, %[count] \n"
- // a3 is count of bytes in 1w chunks
- "addu $a3, %[dst], $a3 \n"
- // now a3 is the dst address past the 1w chunks
-
- // copying in words (4-byte chunks)
- "$ua_wordCopy_loop: \n"
- "lwr $v1, 0(%[src]) \n"
- "lwl $v1, 3(%[src]) \n"
- "addiu %[src], %[src], 4 \n"
- "addiu %[dst], %[dst], 4 \n"
- // note: dst=a1 is word aligned here, see NOTE1
- "bne %[dst], $a3, $ua_wordCopy_loop \n"
- " sw $v1,-4(%[dst]) \n"
-
- // Now less than 4 bytes (value in count) left to copy
- "ua_smallCopy: \n"
- "beqz %[count], leave \n"
- " addu $a3, %[dst], %[count] \n" // a3 = last dst address
- "$ua_smallCopy_loop: \n"
- "lb $v1, 0(%[src]) \n"
- "addiu %[src], %[src], 1 \n"
- "addiu %[dst], %[dst], 1 \n"
- "bne %[dst],$a3,$ua_smallCopy_loop \n"
- " sb $v1, -1(%[dst]) \n"
-
- "j $ra \n"
- " nop \n"
- ".set at \n"
- ".set reorder \n"
- : [dst] "+r" (dst), [src] "+r" (src)
- : [count] "r" (count)
- : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7",
- "t8", "t9", "a3", "v1", "at"
- );
-}
-#endif // HAS_COPYROW_MIPS
-
-// MIPS DSPR2 functions
-#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips_dsp) && \
- (__mips_dsp_rev >= 2) && \
- (_MIPS_SIM == _MIPS_SIM_ABI32) && (__mips_isa_rev < 6)
-
-void SplitUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
- int width) {
- __asm__ __volatile__ (
- ".set push \n"
- ".set noreorder \n"
- "srl $t4, %[width], 4 \n" // multiplies of 16
- "blez $t4, 2f \n"
- " andi %[width], %[width], 0xf \n" // residual
-
- ".p2align 2 \n"
- "1: \n"
- "addiu $t4, $t4, -1 \n"
- "lw $t0, 0(%[src_uv]) \n" // V1 | U1 | V0 | U0
- "lw $t1, 4(%[src_uv]) \n" // V3 | U3 | V2 | U2
- "lw $t2, 8(%[src_uv]) \n" // V5 | U5 | V4 | U4
- "lw $t3, 12(%[src_uv]) \n" // V7 | U7 | V6 | U6
- "lw $t5, 16(%[src_uv]) \n" // V9 | U9 | V8 | U8
- "lw $t6, 20(%[src_uv]) \n" // V11 | U11 | V10 | U10
- "lw $t7, 24(%[src_uv]) \n" // V13 | U13 | V12 | U12
- "lw $t8, 28(%[src_uv]) \n" // V15 | U15 | V14 | U14
- "addiu %[src_uv], %[src_uv], 32 \n"
- "precrq.qb.ph $t9, $t1, $t0 \n" // V3 | V2 | V1 | V0
- "precr.qb.ph $t0, $t1, $t0 \n" // U3 | U2 | U1 | U0
- "precrq.qb.ph $t1, $t3, $t2 \n" // V7 | V6 | V5 | V4
- "precr.qb.ph $t2, $t3, $t2 \n" // U7 | U6 | U5 | U4
- "precrq.qb.ph $t3, $t6, $t5 \n" // V11 | V10 | V9 | V8
- "precr.qb.ph $t5, $t6, $t5 \n" // U11 | U10 | U9 | U8
- "precrq.qb.ph $t6, $t8, $t7 \n" // V15 | V14 | V13 | V12
- "precr.qb.ph $t7, $t8, $t7 \n" // U15 | U14 | U13 | U12
- "sw $t9, 0(%[dst_v]) \n"
- "sw $t0, 0(%[dst_u]) \n"
- "sw $t1, 4(%[dst_v]) \n"
- "sw $t2, 4(%[dst_u]) \n"
- "sw $t3, 8(%[dst_v]) \n"
- "sw $t5, 8(%[dst_u]) \n"
- "sw $t6, 12(%[dst_v]) \n"
- "sw $t7, 12(%[dst_u]) \n"
- "addiu %[dst_v], %[dst_v], 16 \n"
- "bgtz $t4, 1b \n"
- " addiu %[dst_u], %[dst_u], 16 \n"
-
- "beqz %[width], 3f \n"
- " nop \n"
-
- "2: \n"
- "lbu $t0, 0(%[src_uv]) \n"
- "lbu $t1, 1(%[src_uv]) \n"
- "addiu %[src_uv], %[src_uv], 2 \n"
- "addiu %[width], %[width], -1 \n"
- "sb $t0, 0(%[dst_u]) \n"
- "sb $t1, 0(%[dst_v]) \n"
- "addiu %[dst_u], %[dst_u], 1 \n"
- "bgtz %[width], 2b \n"
- " addiu %[dst_v], %[dst_v], 1 \n"
-
- "3: \n"
- ".set pop \n"
- : [src_uv] "+r" (src_uv),
- [width] "+r" (width),
- [dst_u] "+r" (dst_u),
- [dst_v] "+r" (dst_v)
- :
- : "t0", "t1", "t2", "t3",
- "t4", "t5", "t6", "t7", "t8", "t9"
- );
-}
-
-void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width) {
- __asm__ __volatile__ (
- ".set push \n"
- ".set noreorder \n"
-
- "srl $t4, %[width], 4 \n" // multiplies of 16
- "andi $t5, %[width], 0xf \n"
- "blez $t4, 2f \n"
- " addu %[src], %[src], %[width] \n" // src += width
-
- ".p2align 2 \n"
- "1: \n"
- "lw $t0, -16(%[src]) \n" // |3|2|1|0|
- "lw $t1, -12(%[src]) \n" // |7|6|5|4|
- "lw $t2, -8(%[src]) \n" // |11|10|9|8|
- "lw $t3, -4(%[src]) \n" // |15|14|13|12|
- "wsbh $t0, $t0 \n" // |2|3|0|1|
- "wsbh $t1, $t1 \n" // |6|7|4|5|
- "wsbh $t2, $t2 \n" // |10|11|8|9|
- "wsbh $t3, $t3 \n" // |14|15|12|13|
- "rotr $t0, $t0, 16 \n" // |0|1|2|3|
- "rotr $t1, $t1, 16 \n" // |4|5|6|7|
- "rotr $t2, $t2, 16 \n" // |8|9|10|11|
- "rotr $t3, $t3, 16 \n" // |12|13|14|15|
- "addiu %[src], %[src], -16 \n"
- "addiu $t4, $t4, -1 \n"
- "sw $t3, 0(%[dst]) \n" // |15|14|13|12|
- "sw $t2, 4(%[dst]) \n" // |11|10|9|8|
- "sw $t1, 8(%[dst]) \n" // |7|6|5|4|
- "sw $t0, 12(%[dst]) \n" // |3|2|1|0|
- "bgtz $t4, 1b \n"
- " addiu %[dst], %[dst], 16 \n"
- "beqz $t5, 3f \n"
- " nop \n"
-
- "2: \n"
- "lbu $t0, -1(%[src]) \n"
- "addiu $t5, $t5, -1 \n"
- "addiu %[src], %[src], -1 \n"
- "sb $t0, 0(%[dst]) \n"
- "bgez $t5, 2b \n"
- " addiu %[dst], %[dst], 1 \n"
-
- "3: \n"
- ".set pop \n"
- : [src] "+r" (src), [dst] "+r" (dst)
- : [width] "r" (width)
- : "t0", "t1", "t2", "t3", "t4", "t5"
- );
-}
-
-void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
- int width) {
- int x = 0;
- int y = 0;
- __asm__ __volatile__ (
- ".set push \n"
- ".set noreorder \n"
-
- "addu $t4, %[width], %[width] \n"
- "srl %[x], %[width], 4 \n"
- "andi %[y], %[width], 0xf \n"
- "blez %[x], 2f \n"
- " addu %[src_uv], %[src_uv], $t4 \n"
-
- ".p2align 2 \n"
- "1: \n"
- "lw $t0, -32(%[src_uv]) \n" // |3|2|1|0|
- "lw $t1, -28(%[src_uv]) \n" // |7|6|5|4|
- "lw $t2, -24(%[src_uv]) \n" // |11|10|9|8|
- "lw $t3, -20(%[src_uv]) \n" // |15|14|13|12|
- "lw $t4, -16(%[src_uv]) \n" // |19|18|17|16|
- "lw $t6, -12(%[src_uv]) \n" // |23|22|21|20|
- "lw $t7, -8(%[src_uv]) \n" // |27|26|25|24|
- "lw $t8, -4(%[src_uv]) \n" // |31|30|29|28|
-
- "rotr $t0, $t0, 16 \n" // |1|0|3|2|
- "rotr $t1, $t1, 16 \n" // |5|4|7|6|
- "rotr $t2, $t2, 16 \n" // |9|8|11|10|
- "rotr $t3, $t3, 16 \n" // |13|12|15|14|
- "rotr $t4, $t4, 16 \n" // |17|16|19|18|
- "rotr $t6, $t6, 16 \n" // |21|20|23|22|
- "rotr $t7, $t7, 16 \n" // |25|24|27|26|
- "rotr $t8, $t8, 16 \n" // |29|28|31|30|
- "precr.qb.ph $t9, $t0, $t1 \n" // |0|2|4|6|
- "precrq.qb.ph $t5, $t0, $t1 \n" // |1|3|5|7|
- "precr.qb.ph $t0, $t2, $t3 \n" // |8|10|12|14|
- "precrq.qb.ph $t1, $t2, $t3 \n" // |9|11|13|15|
- "precr.qb.ph $t2, $t4, $t6 \n" // |16|18|20|22|
- "precrq.qb.ph $t3, $t4, $t6 \n" // |17|19|21|23|
- "precr.qb.ph $t4, $t7, $t8 \n" // |24|26|28|30|
- "precrq.qb.ph $t6, $t7, $t8 \n" // |25|27|29|31|
- "addiu %[src_uv], %[src_uv], -32 \n"
- "addiu %[x], %[x], -1 \n"
- "swr $t4, 0(%[dst_u]) \n"
- "swl $t4, 3(%[dst_u]) \n" // |30|28|26|24|
- "swr $t6, 0(%[dst_v]) \n"
- "swl $t6, 3(%[dst_v]) \n" // |31|29|27|25|
- "swr $t2, 4(%[dst_u]) \n"
- "swl $t2, 7(%[dst_u]) \n" // |22|20|18|16|
- "swr $t3, 4(%[dst_v]) \n"
- "swl $t3, 7(%[dst_v]) \n" // |23|21|19|17|
- "swr $t0, 8(%[dst_u]) \n"
- "swl $t0, 11(%[dst_u]) \n" // |14|12|10|8|
- "swr $t1, 8(%[dst_v]) \n"
- "swl $t1, 11(%[dst_v]) \n" // |15|13|11|9|
- "swr $t9, 12(%[dst_u]) \n"
- "swl $t9, 15(%[dst_u]) \n" // |6|4|2|0|
- "swr $t5, 12(%[dst_v]) \n"
- "swl $t5, 15(%[dst_v]) \n" // |7|5|3|1|
- "addiu %[dst_v], %[dst_v], 16 \n"
- "bgtz %[x], 1b \n"
- " addiu %[dst_u], %[dst_u], 16 \n"
- "beqz %[y], 3f \n"
- " nop \n"
- "b 2f \n"
- " nop \n"
-
- "2: \n"
- "lbu $t0, -2(%[src_uv]) \n"
- "lbu $t1, -1(%[src_uv]) \n"
- "addiu %[src_uv], %[src_uv], -2 \n"
- "addiu %[y], %[y], -1 \n"
- "sb $t0, 0(%[dst_u]) \n"
- "sb $t1, 0(%[dst_v]) \n"
- "addiu %[dst_u], %[dst_u], 1 \n"
- "bgtz %[y], 2b \n"
- " addiu %[dst_v], %[dst_v], 1 \n"
-
- "3: \n"
- ".set pop \n"
- : [src_uv] "+r" (src_uv),
- [dst_u] "+r" (dst_u),
- [dst_v] "+r" (dst_v),
- [x] "=&r" (x),
- [y] "+r" (y)
- : [width] "r" (width)
- : "t0", "t1", "t2", "t3", "t4",
- "t5", "t7", "t8", "t9"
- );
-}
-
-// Convert (4 Y and 2 VU) I422 and arrange RGB values into
-// t5 = | 0 | B0 | 0 | b0 |
-// t4 = | 0 | B1 | 0 | b1 |
-// t9 = | 0 | G0 | 0 | g0 |
-// t8 = | 0 | G1 | 0 | g1 |
-// t2 = | 0 | R0 | 0 | r0 |
-// t1 = | 0 | R1 | 0 | r1 |
-#define I422ToTransientMipsRGB \
- "lw $t0, 0(%[y_buf]) \n" \
- "lhu $t1, 0(%[u_buf]) \n" \
- "lhu $t2, 0(%[v_buf]) \n" \
- "preceu.ph.qbr $t1, $t1 \n" \
- "preceu.ph.qbr $t2, $t2 \n" \
- "preceu.ph.qbra $t3, $t0 \n" \
- "preceu.ph.qbla $t0, $t0 \n" \
- "subu.ph $t1, $t1, $s5 \n" \
- "subu.ph $t2, $t2, $s5 \n" \
- "subu.ph $t3, $t3, $s4 \n" \
- "subu.ph $t0, $t0, $s4 \n" \
- "mul.ph $t3, $t3, $s0 \n" \
- "mul.ph $t0, $t0, $s0 \n" \
- "shll.ph $t4, $t1, 0x7 \n" \
- "subu.ph $t4, $t4, $t1 \n" \
- "mul.ph $t6, $t1, $s1 \n" \
- "mul.ph $t1, $t2, $s2 \n" \
- "addq_s.ph $t5, $t4, $t3 \n" \
- "addq_s.ph $t4, $t4, $t0 \n" \
- "shra.ph $t5, $t5, 6 \n" \
- "shra.ph $t4, $t4, 6 \n" \
- "addiu %[u_buf], 2 \n" \
- "addiu %[v_buf], 2 \n" \
- "addu.ph $t6, $t6, $t1 \n" \
- "mul.ph $t1, $t2, $s3 \n" \
- "addu.ph $t9, $t6, $t3 \n" \
- "addu.ph $t8, $t6, $t0 \n" \
- "shra.ph $t9, $t9, 6 \n" \
- "shra.ph $t8, $t8, 6 \n" \
- "addu.ph $t2, $t1, $t3 \n" \
- "addu.ph $t1, $t1, $t0 \n" \
- "shra.ph $t2, $t2, 6 \n" \
- "shra.ph $t1, $t1, 6 \n" \
- "subu.ph $t5, $t5, $s5 \n" \
- "subu.ph $t4, $t4, $s5 \n" \
- "subu.ph $t9, $t9, $s5 \n" \
- "subu.ph $t8, $t8, $s5 \n" \
- "subu.ph $t2, $t2, $s5 \n" \
- "subu.ph $t1, $t1, $s5 \n" \
- "shll_s.ph $t5, $t5, 8 \n" \
- "shll_s.ph $t4, $t4, 8 \n" \
- "shll_s.ph $t9, $t9, 8 \n" \
- "shll_s.ph $t8, $t8, 8 \n" \
- "shll_s.ph $t2, $t2, 8 \n" \
- "shll_s.ph $t1, $t1, 8 \n" \
- "shra.ph $t5, $t5, 8 \n" \
- "shra.ph $t4, $t4, 8 \n" \
- "shra.ph $t9, $t9, 8 \n" \
- "shra.ph $t8, $t8, 8 \n" \
- "shra.ph $t2, $t2, 8 \n" \
- "shra.ph $t1, $t1, 8 \n" \
- "addu.ph $t5, $t5, $s5 \n" \
- "addu.ph $t4, $t4, $s5 \n" \
- "addu.ph $t9, $t9, $s5 \n" \
- "addu.ph $t8, $t8, $s5 \n" \
- "addu.ph $t2, $t2, $s5 \n" \
- "addu.ph $t1, $t1, $s5 \n"
-
-void I422ToARGBRow_MIPS_DSPR2(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) {
- __asm__ __volatile__ (
- ".set push \n"
- ".set noreorder \n"
- "beqz %[width], 2f \n"
- " repl.ph $s0, 74 \n" // |YG|YG| = |74|74|
- "repl.ph $s1, -25 \n" // |UG|UG| = |-25|-25|
- "repl.ph $s2, -52 \n" // |VG|VG| = |-52|-52|
- "repl.ph $s3, 102 \n" // |VR|VR| = |102|102|
- "repl.ph $s4, 16 \n" // |0|16|0|16|
- "repl.ph $s5, 128 \n" // |128|128| // clipping
- "lui $s6, 0xff00 \n"
- "ori $s6, 0xff00 \n" // |ff|00|ff|00|ff|
-
- ".p2align 2 \n"
- "1: \n"
- I422ToTransientMipsRGB
-// Arranging into argb format
- "precr.qb.ph $t4, $t8, $t4 \n" // |G1|g1|B1|b1|
- "precr.qb.ph $t5, $t9, $t5 \n" // |G0|g0|B0|b0|
- "addiu %[width], -4 \n"
- "precrq.qb.ph $t8, $t4, $t5 \n" // |G1|B1|G0|B0|
- "precr.qb.ph $t9, $t4, $t5 \n" // |g1|b1|g0|b0|
- "precr.qb.ph $t2, $t1, $t2 \n" // |R1|r1|R0|r0|
-
- "addiu %[y_buf], 4 \n"
- "preceu.ph.qbla $t1, $t2 \n" // |0 |R1|0 |R0|
- "preceu.ph.qbra $t2, $t2 \n" // |0 |r1|0 |r0|
- "or $t1, $t1, $s6 \n" // |ff|R1|ff|R0|
- "or $t2, $t2, $s6 \n" // |ff|r1|ff|r0|
- "precrq.ph.w $t0, $t2, $t9 \n" // |ff|r1|g1|b1|
- "precrq.ph.w $t3, $t1, $t8 \n" // |ff|R1|G1|B1|
- "sll $t9, $t9, 16 \n"
- "sll $t8, $t8, 16 \n"
- "packrl.ph $t2, $t2, $t9 \n" // |ff|r0|g0|b0|
- "packrl.ph $t1, $t1, $t8 \n" // |ff|R0|G0|B0|
-// Store results.
- "sw $t2, 0(%[rgb_buf]) \n"
- "sw $t0, 4(%[rgb_buf]) \n"
- "sw $t1, 8(%[rgb_buf]) \n"
- "sw $t3, 12(%[rgb_buf]) \n"
- "bnez %[width], 1b \n"
- " addiu %[rgb_buf], 16 \n"
- "2: \n"
- ".set pop \n"
- :[y_buf] "+r" (y_buf),
- [u_buf] "+r" (u_buf),
- [v_buf] "+r" (v_buf),
- [width] "+r" (width),
- [rgb_buf] "+r" (rgb_buf)
- :
- : "t0", "t1", "t2", "t3", "t4", "t5",
- "t6", "t7", "t8", "t9",
- "s0", "s1", "s2", "s3",
- "s4", "s5", "s6"
- );
-}
-
-void I422ToABGRRow_MIPS_DSPR2(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) {
- __asm__ __volatile__ (
- ".set push \n"
- ".set noreorder \n"
- "beqz %[width], 2f \n"
- " repl.ph $s0, 74 \n" // |YG|YG| = |74|74|
- "repl.ph $s1, -25 \n" // |UG|UG| = |-25|-25|
- "repl.ph $s2, -52 \n" // |VG|VG| = |-52|-52|
- "repl.ph $s3, 102 \n" // |VR|VR| = |102|102|
- "repl.ph $s4, 16 \n" // |0|16|0|16|
- "repl.ph $s5, 128 \n" // |128|128|
- "lui $s6, 0xff00 \n"
- "ori $s6, 0xff00 \n" // |ff|00|ff|00|
-
- ".p2align 2 \n"
- "1: \n"
- I422ToTransientMipsRGB
-// Arranging into abgr format
- "precr.qb.ph $t0, $t8, $t1 \n" // |G1|g1|R1|r1|
- "precr.qb.ph $t3, $t9, $t2 \n" // |G0|g0|R0|r0|
- "precrq.qb.ph $t8, $t0, $t3 \n" // |G1|R1|G0|R0|
- "precr.qb.ph $t9, $t0, $t3 \n" // |g1|r1|g0|r0|
-
- "precr.qb.ph $t2, $t4, $t5 \n" // |B1|b1|B0|b0|
- "addiu %[width], -4 \n"
- "addiu %[y_buf], 4 \n"
- "preceu.ph.qbla $t1, $t2 \n" // |0 |B1|0 |B0|
- "preceu.ph.qbra $t2, $t2 \n" // |0 |b1|0 |b0|
- "or $t1, $t1, $s6 \n" // |ff|B1|ff|B0|
- "or $t2, $t2, $s6 \n" // |ff|b1|ff|b0|
- "precrq.ph.w $t0, $t2, $t9 \n" // |ff|b1|g1|r1|
- "precrq.ph.w $t3, $t1, $t8 \n" // |ff|B1|G1|R1|
- "sll $t9, $t9, 16 \n"
- "sll $t8, $t8, 16 \n"
- "packrl.ph $t2, $t2, $t9 \n" // |ff|b0|g0|r0|
- "packrl.ph $t1, $t1, $t8 \n" // |ff|B0|G0|R0|
-// Store results.
- "sw $t2, 0(%[rgb_buf]) \n"
- "sw $t0, 4(%[rgb_buf]) \n"
- "sw $t1, 8(%[rgb_buf]) \n"
- "sw $t3, 12(%[rgb_buf]) \n"
- "bnez %[width], 1b \n"
- " addiu %[rgb_buf], 16 \n"
- "2: \n"
- ".set pop \n"
- :[y_buf] "+r" (y_buf),
- [u_buf] "+r" (u_buf),
- [v_buf] "+r" (v_buf),
- [width] "+r" (width),
- [rgb_buf] "+r" (rgb_buf)
- :
- : "t0", "t1", "t2", "t3", "t4", "t5",
- "t6", "t7", "t8", "t9",
- "s0", "s1", "s2", "s3",
- "s4", "s5", "s6"
- );
-}
-
-void I422ToBGRARow_MIPS_DSPR2(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) {
- __asm__ __volatile__ (
- ".set push \n"
- ".set noreorder \n"
- "beqz %[width], 2f \n"
- " repl.ph $s0, 74 \n" // |YG|YG| = |74 |74 |
- "repl.ph $s1, -25 \n" // |UG|UG| = |-25|-25|
- "repl.ph $s2, -52 \n" // |VG|VG| = |-52|-52|
- "repl.ph $s3, 102 \n" // |VR|VR| = |102|102|
- "repl.ph $s4, 16 \n" // |0|16|0|16|
- "repl.ph $s5, 128 \n" // |128|128|
- "lui $s6, 0xff \n"
- "ori $s6, 0xff \n" // |00|ff|00|ff|
-
- ".p2align 2 \n"
- "1: \n"
- I422ToTransientMipsRGB
- // Arranging into bgra format
- "precr.qb.ph $t4, $t4, $t8 \n" // |B1|b1|G1|g1|
- "precr.qb.ph $t5, $t5, $t9 \n" // |B0|b0|G0|g0|
- "precrq.qb.ph $t8, $t4, $t5 \n" // |B1|G1|B0|G0|
- "precr.qb.ph $t9, $t4, $t5 \n" // |b1|g1|b0|g0|
-
- "precr.qb.ph $t2, $t1, $t2 \n" // |R1|r1|R0|r0|
- "addiu %[width], -4 \n"
- "addiu %[y_buf], 4 \n"
- "preceu.ph.qbla $t1, $t2 \n" // |0 |R1|0 |R0|
- "preceu.ph.qbra $t2, $t2 \n" // |0 |r1|0 |r0|
- "sll $t1, $t1, 8 \n" // |R1|0 |R0|0 |
- "sll $t2, $t2, 8 \n" // |r1|0 |r0|0 |
- "or $t1, $t1, $s6 \n" // |R1|ff|R0|ff|
- "or $t2, $t2, $s6 \n" // |r1|ff|r0|ff|
- "precrq.ph.w $t0, $t9, $t2 \n" // |b1|g1|r1|ff|
- "precrq.ph.w $t3, $t8, $t1 \n" // |B1|G1|R1|ff|
- "sll $t1, $t1, 16 \n"
- "sll $t2, $t2, 16 \n"
- "packrl.ph $t2, $t9, $t2 \n" // |b0|g0|r0|ff|
- "packrl.ph $t1, $t8, $t1 \n" // |B0|G0|R0|ff|
-// Store results.
- "sw $t2, 0(%[rgb_buf]) \n"
- "sw $t0, 4(%[rgb_buf]) \n"
- "sw $t1, 8(%[rgb_buf]) \n"
- "sw $t3, 12(%[rgb_buf]) \n"
- "bnez %[width], 1b \n"
- " addiu %[rgb_buf], 16 \n"
- "2: \n"
- ".set pop \n"
- :[y_buf] "+r" (y_buf),
- [u_buf] "+r" (u_buf),
- [v_buf] "+r" (v_buf),
- [width] "+r" (width),
- [rgb_buf] "+r" (rgb_buf)
- :
- : "t0", "t1", "t2", "t3", "t4", "t5",
- "t6", "t7", "t8", "t9",
- "s0", "s1", "s2", "s3",
- "s4", "s5", "s6"
- );
-}
-
-// Bilinear filter 8x2 -> 8x1
-void InterpolateRow_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride, int dst_width,
- int source_y_fraction) {
- int y0_fraction = 256 - source_y_fraction;
- const uint8* src_ptr1 = src_ptr + src_stride;
-
- __asm__ __volatile__ (
- ".set push \n"
- ".set noreorder \n"
-
- "replv.ph $t0, %[y0_fraction] \n"
- "replv.ph $t1, %[source_y_fraction] \n"
-
- ".p2align 2 \n"
- "1: \n"
- "lw $t2, 0(%[src_ptr]) \n"
- "lw $t3, 0(%[src_ptr1]) \n"
- "lw $t4, 4(%[src_ptr]) \n"
- "lw $t5, 4(%[src_ptr1]) \n"
- "muleu_s.ph.qbl $t6, $t2, $t0 \n"
- "muleu_s.ph.qbr $t7, $t2, $t0 \n"
- "muleu_s.ph.qbl $t8, $t3, $t1 \n"
- "muleu_s.ph.qbr $t9, $t3, $t1 \n"
- "muleu_s.ph.qbl $t2, $t4, $t0 \n"
- "muleu_s.ph.qbr $t3, $t4, $t0 \n"
- "muleu_s.ph.qbl $t4, $t5, $t1 \n"
- "muleu_s.ph.qbr $t5, $t5, $t1 \n"
- "addq.ph $t6, $t6, $t8 \n"
- "addq.ph $t7, $t7, $t9 \n"
- "addq.ph $t2, $t2, $t4 \n"
- "addq.ph $t3, $t3, $t5 \n"
- "shra.ph $t6, $t6, 8 \n"
- "shra.ph $t7, $t7, 8 \n"
- "shra.ph $t2, $t2, 8 \n"
- "shra.ph $t3, $t3, 8 \n"
- "precr.qb.ph $t6, $t6, $t7 \n"
- "precr.qb.ph $t2, $t2, $t3 \n"
- "addiu %[src_ptr], %[src_ptr], 8 \n"
- "addiu %[src_ptr1], %[src_ptr1], 8 \n"
- "addiu %[dst_width], %[dst_width], -8 \n"
- "sw $t6, 0(%[dst_ptr]) \n"
- "sw $t2, 4(%[dst_ptr]) \n"
- "bgtz %[dst_width], 1b \n"
- " addiu %[dst_ptr], %[dst_ptr], 8 \n"
-
- ".set pop \n"
- : [dst_ptr] "+r" (dst_ptr),
- [src_ptr1] "+r" (src_ptr1),
- [src_ptr] "+r" (src_ptr),
- [dst_width] "+r" (dst_width)
- : [source_y_fraction] "r" (source_y_fraction),
- [y0_fraction] "r" (y0_fraction),
- [src_stride] "r" (src_stride)
- : "t0", "t1", "t2", "t3", "t4", "t5",
- "t6", "t7", "t8", "t9"
- );
-}
-#endif // __mips_dsp_rev >= 2
-
-#endif // defined(__mips__)
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/third_party/aom/third_party/libyuv/source/row_neon.cc b/third_party/aom/third_party/libyuv/source/row_neon.cc
deleted file mode 100644
index 1a72eb903..000000000
--- a/third_party/aom/third_party/libyuv/source/row_neon.cc
+++ /dev/null
@@ -1,3084 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// This module is for GCC Neon
-#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \
- !defined(__aarch64__)
-
-// Read 8 Y, 4 U and 4 V from 422
-#define READYUV422 \
- MEMACCESS(0) \
- "vld1.8 {d0}, [%0]! \n" \
- MEMACCESS(1) \
- "vld1.32 {d2[0]}, [%1]! \n" \
- MEMACCESS(2) \
- "vld1.32 {d2[1]}, [%2]! \n"
-
-// Read 8 Y, 2 U and 2 V from 422
-#define READYUV411 \
- MEMACCESS(0) \
- "vld1.8 {d0}, [%0]! \n" \
- MEMACCESS(1) \
- "vld1.16 {d2[0]}, [%1]! \n" \
- MEMACCESS(2) \
- "vld1.16 {d2[1]}, [%2]! \n" \
- "vmov.u8 d3, d2 \n" \
- "vzip.u8 d2, d3 \n"
-
-// Read 8 Y, 8 U and 8 V from 444
-#define READYUV444 \
- MEMACCESS(0) \
- "vld1.8 {d0}, [%0]! \n" \
- MEMACCESS(1) \
- "vld1.8 {d2}, [%1]! \n" \
- MEMACCESS(2) \
- "vld1.8 {d3}, [%2]! \n" \
- "vpaddl.u8 q1, q1 \n" \
- "vrshrn.u16 d2, q1, #1 \n"
-
-// Read 8 Y, and set 4 U and 4 V to 128
-#define READYUV400 \
- MEMACCESS(0) \
- "vld1.8 {d0}, [%0]! \n" \
- "vmov.u8 d2, #128 \n"
-
-// Read 8 Y and 4 UV from NV12
-#define READNV12 \
- MEMACCESS(0) \
- "vld1.8 {d0}, [%0]! \n" \
- MEMACCESS(1) \
- "vld1.8 {d2}, [%1]! \n" \
- "vmov.u8 d3, d2 \n"/* split odd/even uv apart */\
- "vuzp.u8 d2, d3 \n" \
- "vtrn.u32 d2, d3 \n"
-
-// Read 8 Y and 4 VU from NV21
-#define READNV21 \
- MEMACCESS(0) \
- "vld1.8 {d0}, [%0]! \n" \
- MEMACCESS(1) \
- "vld1.8 {d2}, [%1]! \n" \
- "vmov.u8 d3, d2 \n"/* split odd/even uv apart */\
- "vuzp.u8 d3, d2 \n" \
- "vtrn.u32 d2, d3 \n"
-
-// Read 8 YUY2
-#define READYUY2 \
- MEMACCESS(0) \
- "vld2.8 {d0, d2}, [%0]! \n" \
- "vmov.u8 d3, d2 \n" \
- "vuzp.u8 d2, d3 \n" \
- "vtrn.u32 d2, d3 \n"
-
-// Read 8 UYVY
-#define READUYVY \
- MEMACCESS(0) \
- "vld2.8 {d2, d3}, [%0]! \n" \
- "vmov.u8 d0, d3 \n" \
- "vmov.u8 d3, d2 \n" \
- "vuzp.u8 d2, d3 \n" \
- "vtrn.u32 d2, d3 \n"
-
-#define YUV422TORGB_SETUP_REG \
- MEMACCESS([kUVToRB]) \
- "vld1.8 {d24}, [%[kUVToRB]] \n" \
- MEMACCESS([kUVToG]) \
- "vld1.8 {d25}, [%[kUVToG]] \n" \
- MEMACCESS([kUVBiasBGR]) \
- "vld1.16 {d26[], d27[]}, [%[kUVBiasBGR]]! \n" \
- MEMACCESS([kUVBiasBGR]) \
- "vld1.16 {d8[], d9[]}, [%[kUVBiasBGR]]! \n" \
- MEMACCESS([kUVBiasBGR]) \
- "vld1.16 {d28[], d29[]}, [%[kUVBiasBGR]] \n" \
- MEMACCESS([kYToRgb]) \
- "vld1.32 {d30[], d31[]}, [%[kYToRgb]] \n"
-
-#define YUV422TORGB \
- "vmull.u8 q8, d2, d24 \n" /* u/v B/R component */\
- "vmull.u8 q9, d2, d25 \n" /* u/v G component */\
- "vmovl.u8 q0, d0 \n" /* Y */\
- "vmovl.s16 q10, d1 \n" \
- "vmovl.s16 q0, d0 \n" \
- "vmul.s32 q10, q10, q15 \n" \
- "vmul.s32 q0, q0, q15 \n" \
- "vqshrun.s32 d0, q0, #16 \n" \
- "vqshrun.s32 d1, q10, #16 \n" /* Y */\
- "vadd.s16 d18, d19 \n" \
- "vshll.u16 q1, d16, #16 \n" /* Replicate u * UB */\
- "vshll.u16 q10, d17, #16 \n" /* Replicate v * VR */\
- "vshll.u16 q3, d18, #16 \n" /* Replicate (v*VG + u*UG)*/\
- "vaddw.u16 q1, q1, d16 \n" \
- "vaddw.u16 q10, q10, d17 \n" \
- "vaddw.u16 q3, q3, d18 \n" \
- "vqadd.s16 q8, q0, q13 \n" /* B */ \
- "vqadd.s16 q9, q0, q14 \n" /* R */ \
- "vqadd.s16 q0, q0, q4 \n" /* G */ \
- "vqadd.s16 q8, q8, q1 \n" /* B */ \
- "vqadd.s16 q9, q9, q10 \n" /* R */ \
- "vqsub.s16 q0, q0, q3 \n" /* G */ \
- "vqshrun.s16 d20, q8, #6 \n" /* B */ \
- "vqshrun.s16 d22, q9, #6 \n" /* R */ \
- "vqshrun.s16 d21, q0, #6 \n" /* G */
-
-// YUV to RGB conversion constants.
-// Y contribution to R,G,B. Scale and bias.
-#define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
-#define YGB 1160 /* 1.164 * 64 * 16 - adjusted for even error distribution */
-
-// U and V contributions to R,G,B.
-#define UB -128 /* -min(128, round(2.018 * 64)) */
-#define UG 25 /* -round(-0.391 * 64) */
-#define VG 52 /* -round(-0.813 * 64) */
-#define VR -102 /* -round(1.596 * 64) */
-
-// Bias values to subtract 16 from Y and 128 from U and V.
-#define BB (UB * 128 - YGB)
-#define BG (UG * 128 + VG * 128 - YGB)
-#define BR (VR * 128 - YGB)
-
-static uvec8 kUVToRB = { 128, 128, 128, 128, 102, 102, 102, 102,
- 0, 0, 0, 0, 0, 0, 0, 0 };
-static uvec8 kUVToG = { 25, 25, 25, 25, 52, 52, 52, 52,
- 0, 0, 0, 0, 0, 0, 0, 0 };
-static vec16 kUVBiasBGR = { BB, BG, BR, 0, 0, 0, 0, 0 };
-static vec32 kYToRgb = { 0x0101 * YG, 0, 0, 0 };
-
-#undef YG
-#undef YGB
-#undef UB
-#undef UG
-#undef VG
-#undef VR
-#undef BB
-#undef BG
-#undef BR
-
-void I444ToARGBRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width) {
- asm volatile (
- YUV422TORGB_SETUP_REG
- ".p2align 2 \n"
- "1: \n"
- READYUV444
- YUV422TORGB
- "subs %4, %4, #8 \n"
- "vmov.u8 d23, #255 \n"
- MEMACCESS(3)
- "vst4.8 {d20, d21, d22, d23}, [%3]! \n"
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_argb), // %3
- "+r"(width) // %4
- : [kUVToRB]"r"(&kUVToRB), // %5
- [kUVToG]"r"(&kUVToG), // %6
- [kUVBiasBGR]"r"(&kUVBiasBGR),
- [kYToRgb]"r"(&kYToRgb)
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-void I422ToARGBRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width) {
- asm volatile (
- YUV422TORGB_SETUP_REG
- ".p2align 2 \n"
- "1: \n"
- READYUV422
- YUV422TORGB
- "subs %4, %4, #8 \n"
- "vmov.u8 d23, #255 \n"
- MEMACCESS(3)
- "vst4.8 {d20, d21, d22, d23}, [%3]! \n"
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_argb), // %3
- "+r"(width) // %4
- : [kUVToRB]"r"(&kUVToRB), // %5
- [kUVToG]"r"(&kUVToG), // %6
- [kUVBiasBGR]"r"(&kUVBiasBGR),
- [kYToRgb]"r"(&kYToRgb)
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-void I411ToARGBRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width) {
- asm volatile (
- YUV422TORGB_SETUP_REG
- ".p2align 2 \n"
- "1: \n"
- READYUV411
- YUV422TORGB
- "subs %4, %4, #8 \n"
- "vmov.u8 d23, #255 \n"
- MEMACCESS(3)
- "vst4.8 {d20, d21, d22, d23}, [%3]! \n"
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_argb), // %3
- "+r"(width) // %4
- : [kUVToRB]"r"(&kUVToRB), // %5
- [kUVToG]"r"(&kUVToG), // %6
- [kUVBiasBGR]"r"(&kUVBiasBGR),
- [kYToRgb]"r"(&kYToRgb)
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-void I422ToBGRARow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_bgra,
- int width) {
- asm volatile (
- YUV422TORGB_SETUP_REG
- ".p2align 2 \n"
- "1: \n"
- READYUV422
- YUV422TORGB
- "subs %4, %4, #8 \n"
- "vswp.u8 d20, d22 \n"
- "vmov.u8 d19, #255 \n"
- MEMACCESS(3)
- "vst4.8 {d19, d20, d21, d22}, [%3]! \n"
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_bgra), // %3
- "+r"(width) // %4
- : [kUVToRB]"r"(&kUVToRB), // %5
- [kUVToG]"r"(&kUVToG), // %6
- [kUVBiasBGR]"r"(&kUVBiasBGR),
- [kYToRgb]"r"(&kYToRgb)
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-void I422ToABGRRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_abgr,
- int width) {
- asm volatile (
- YUV422TORGB_SETUP_REG
- ".p2align 2 \n"
- "1: \n"
- READYUV422
- YUV422TORGB
- "subs %4, %4, #8 \n"
- "vswp.u8 d20, d22 \n"
- "vmov.u8 d23, #255 \n"
- MEMACCESS(3)
- "vst4.8 {d20, d21, d22, d23}, [%3]! \n"
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_abgr), // %3
- "+r"(width) // %4
- : [kUVToRB]"r"(&kUVToRB), // %5
- [kUVToG]"r"(&kUVToG), // %6
- [kUVBiasBGR]"r"(&kUVBiasBGR),
- [kYToRgb]"r"(&kYToRgb)
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-void I422ToRGBARow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgba,
- int width) {
- asm volatile (
- YUV422TORGB_SETUP_REG
- ".p2align 2 \n"
- "1: \n"
- READYUV422
- YUV422TORGB
- "subs %4, %4, #8 \n"
- "vmov.u8 d19, #255 \n"
- MEMACCESS(3)
- "vst4.8 {d19, d20, d21, d22}, [%3]! \n"
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_rgba), // %3
- "+r"(width) // %4
- : [kUVToRB]"r"(&kUVToRB), // %5
- [kUVToG]"r"(&kUVToG), // %6
- [kUVBiasBGR]"r"(&kUVBiasBGR),
- [kYToRgb]"r"(&kYToRgb)
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-void I422ToRGB24Row_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgb24,
- int width) {
- asm volatile (
- YUV422TORGB_SETUP_REG
- ".p2align 2 \n"
- "1: \n"
- READYUV422
- YUV422TORGB
- "subs %4, %4, #8 \n"
- MEMACCESS(3)
- "vst3.8 {d20, d21, d22}, [%3]! \n"
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_rgb24), // %3
- "+r"(width) // %4
- : [kUVToRB]"r"(&kUVToRB), // %5
- [kUVToG]"r"(&kUVToG), // %6
- [kUVBiasBGR]"r"(&kUVBiasBGR),
- [kYToRgb]"r"(&kYToRgb)
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-void I422ToRAWRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_raw,
- int width) {
- asm volatile (
- YUV422TORGB_SETUP_REG
- ".p2align 2 \n"
- "1: \n"
- READYUV422
- YUV422TORGB
- "subs %4, %4, #8 \n"
- "vswp.u8 d20, d22 \n"
- MEMACCESS(3)
- "vst3.8 {d20, d21, d22}, [%3]! \n"
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_raw), // %3
- "+r"(width) // %4
- : [kUVToRB]"r"(&kUVToRB), // %5
- [kUVToG]"r"(&kUVToG), // %6
- [kUVBiasBGR]"r"(&kUVBiasBGR),
- [kYToRgb]"r"(&kYToRgb)
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-#define ARGBTORGB565 \
- "vshr.u8 d20, d20, #3 \n" /* B */ \
- "vshr.u8 d21, d21, #2 \n" /* G */ \
- "vshr.u8 d22, d22, #3 \n" /* R */ \
- "vmovl.u8 q8, d20 \n" /* B */ \
- "vmovl.u8 q9, d21 \n" /* G */ \
- "vmovl.u8 q10, d22 \n" /* R */ \
- "vshl.u16 q9, q9, #5 \n" /* G */ \
- "vshl.u16 q10, q10, #11 \n" /* R */ \
- "vorr q0, q8, q9 \n" /* BG */ \
- "vorr q0, q0, q10 \n" /* BGR */
-
-void I422ToRGB565Row_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgb565,
- int width) {
- asm volatile (
- YUV422TORGB_SETUP_REG
- ".p2align 2 \n"
- "1: \n"
- READYUV422
- YUV422TORGB
- "subs %4, %4, #8 \n"
- ARGBTORGB565
- MEMACCESS(3)
- "vst1.8 {q0}, [%3]! \n" // store 8 pixels RGB565.
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_rgb565), // %3
- "+r"(width) // %4
- : [kUVToRB]"r"(&kUVToRB), // %5
- [kUVToG]"r"(&kUVToG), // %6
- [kUVBiasBGR]"r"(&kUVBiasBGR),
- [kYToRgb]"r"(&kYToRgb)
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-#define ARGBTOARGB1555 \
- "vshr.u8 q10, q10, #3 \n" /* B */ \
- "vshr.u8 d22, d22, #3 \n" /* R */ \
- "vshr.u8 d23, d23, #7 \n" /* A */ \
- "vmovl.u8 q8, d20 \n" /* B */ \
- "vmovl.u8 q9, d21 \n" /* G */ \
- "vmovl.u8 q10, d22 \n" /* R */ \
- "vmovl.u8 q11, d23 \n" /* A */ \
- "vshl.u16 q9, q9, #5 \n" /* G */ \
- "vshl.u16 q10, q10, #10 \n" /* R */ \
- "vshl.u16 q11, q11, #15 \n" /* A */ \
- "vorr q0, q8, q9 \n" /* BG */ \
- "vorr q1, q10, q11 \n" /* RA */ \
- "vorr q0, q0, q1 \n" /* BGRA */
-
-void I422ToARGB1555Row_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb1555,
- int width) {
- asm volatile (
- YUV422TORGB_SETUP_REG
- ".p2align 2 \n"
- "1: \n"
- READYUV422
- YUV422TORGB
- "subs %4, %4, #8 \n"
- "vmov.u8 d23, #255 \n"
- ARGBTOARGB1555
- MEMACCESS(3)
- "vst1.8 {q0}, [%3]! \n" // store 8 pixels ARGB1555.
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_argb1555), // %3
- "+r"(width) // %4
- : [kUVToRB]"r"(&kUVToRB), // %5
- [kUVToG]"r"(&kUVToG), // %6
- [kUVBiasBGR]"r"(&kUVBiasBGR),
- [kYToRgb]"r"(&kYToRgb)
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-#define ARGBTOARGB4444 \
- "vshr.u8 d20, d20, #4 \n" /* B */ \
- "vbic.32 d21, d21, d4 \n" /* G */ \
- "vshr.u8 d22, d22, #4 \n" /* R */ \
- "vbic.32 d23, d23, d4 \n" /* A */ \
- "vorr d0, d20, d21 \n" /* BG */ \
- "vorr d1, d22, d23 \n" /* RA */ \
- "vzip.u8 d0, d1 \n" /* BGRA */
-
-void I422ToARGB4444Row_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb4444,
- int width) {
- asm volatile (
- YUV422TORGB_SETUP_REG
- "vmov.u8 d4, #0x0f \n" // bits to clear with vbic.
- ".p2align 2 \n"
- "1: \n"
- READYUV422
- YUV422TORGB
- "subs %4, %4, #8 \n"
- "vmov.u8 d23, #255 \n"
- ARGBTOARGB4444
- MEMACCESS(3)
- "vst1.8 {q0}, [%3]! \n" // store 8 pixels ARGB4444.
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_argb4444), // %3
- "+r"(width) // %4
- : [kUVToRB]"r"(&kUVToRB), // %5
- [kUVToG]"r"(&kUVToG), // %6
- [kUVBiasBGR]"r"(&kUVBiasBGR),
- [kYToRgb]"r"(&kYToRgb)
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-void I400ToARGBRow_NEON(const uint8* src_y,
- uint8* dst_argb,
- int width) {
- asm volatile (
- YUV422TORGB_SETUP_REG
- ".p2align 2 \n"
- "1: \n"
- READYUV400
- YUV422TORGB
- "subs %2, %2, #8 \n"
- "vmov.u8 d23, #255 \n"
- MEMACCESS(1)
- "vst4.8 {d20, d21, d22, d23}, [%1]! \n"
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- : [kUVToRB]"r"(&kUVToRB), // %3
- [kUVToG]"r"(&kUVToG), // %4
- [kUVBiasBGR]"r"(&kUVBiasBGR),
- [kYToRgb]"r"(&kYToRgb)
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-void J400ToARGBRow_NEON(const uint8* src_y,
- uint8* dst_argb,
- int width) {
- asm volatile (
- "vmov.u8 d23, #255 \n"
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld1.8 {d20}, [%0]! \n"
- "vmov d21, d20 \n"
- "vmov d22, d20 \n"
- "subs %2, %2, #8 \n"
- MEMACCESS(1)
- "vst4.8 {d20, d21, d22, d23}, [%1]! \n"
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- :
- : "cc", "memory", "d20", "d21", "d22", "d23"
- );
-}
-
-void NV12ToARGBRow_NEON(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
- int width) {
- asm volatile (
- YUV422TORGB_SETUP_REG
- ".p2align 2 \n"
- "1: \n"
- READNV12
- YUV422TORGB
- "subs %3, %3, #8 \n"
- "vmov.u8 d23, #255 \n"
- MEMACCESS(2)
- "vst4.8 {d20, d21, d22, d23}, [%2]! \n"
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_uv), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- : [kUVToRB]"r"(&kUVToRB), // %4
- [kUVToG]"r"(&kUVToG), // %5
- [kUVBiasBGR]"r"(&kUVBiasBGR),
- [kYToRgb]"r"(&kYToRgb)
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-void NV21ToARGBRow_NEON(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
- int width) {
- asm volatile (
- YUV422TORGB_SETUP_REG
- ".p2align 2 \n"
- "1: \n"
- READNV21
- YUV422TORGB
- "subs %3, %3, #8 \n"
- "vmov.u8 d23, #255 \n"
- MEMACCESS(2)
- "vst4.8 {d20, d21, d22, d23}, [%2]! \n"
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_uv), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- : [kUVToRB]"r"(&kUVToRB), // %4
- [kUVToG]"r"(&kUVToG), // %5
- [kUVBiasBGR]"r"(&kUVBiasBGR),
- [kYToRgb]"r"(&kYToRgb)
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-void NV12ToRGB565Row_NEON(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_rgb565,
- int width) {
- asm volatile (
- YUV422TORGB_SETUP_REG
- ".p2align 2 \n"
- "1: \n"
- READNV12
- YUV422TORGB
- "subs %3, %3, #8 \n"
- ARGBTORGB565
- MEMACCESS(2)
- "vst1.8 {q0}, [%2]! \n" // store 8 pixels RGB565.
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_uv), // %1
- "+r"(dst_rgb565), // %2
- "+r"(width) // %3
- : [kUVToRB]"r"(&kUVToRB), // %4
- [kUVToG]"r"(&kUVToG), // %5
- [kUVBiasBGR]"r"(&kUVBiasBGR),
- [kYToRgb]"r"(&kYToRgb)
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-void NV21ToRGB565Row_NEON(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_rgb565,
- int width) {
- asm volatile (
- YUV422TORGB_SETUP_REG
- ".p2align 2 \n"
- "1: \n"
- READNV21
- YUV422TORGB
- "subs %3, %3, #8 \n"
- ARGBTORGB565
- MEMACCESS(2)
- "vst1.8 {q0}, [%2]! \n" // store 8 pixels RGB565.
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_uv), // %1
- "+r"(dst_rgb565), // %2
- "+r"(width) // %3
- : [kUVToRB]"r"(&kUVToRB), // %4
- [kUVToG]"r"(&kUVToG), // %5
- [kUVBiasBGR]"r"(&kUVBiasBGR),
- [kYToRgb]"r"(&kYToRgb)
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
- uint8* dst_argb,
- int width) {
- asm volatile (
- YUV422TORGB_SETUP_REG
- ".p2align 2 \n"
- "1: \n"
- READYUY2
- YUV422TORGB
- "subs %2, %2, #8 \n"
- "vmov.u8 d23, #255 \n"
- MEMACCESS(1)
- "vst4.8 {d20, d21, d22, d23}, [%1]! \n"
- "bgt 1b \n"
- : "+r"(src_yuy2), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- : [kUVToRB]"r"(&kUVToRB), // %3
- [kUVToG]"r"(&kUVToG), // %4
- [kUVBiasBGR]"r"(&kUVBiasBGR),
- [kYToRgb]"r"(&kYToRgb)
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-void UYVYToARGBRow_NEON(const uint8* src_uyvy,
- uint8* dst_argb,
- int width) {
- asm volatile (
- YUV422TORGB_SETUP_REG
- ".p2align 2 \n"
- "1: \n"
- READUYVY
- YUV422TORGB
- "subs %2, %2, #8 \n"
- "vmov.u8 d23, #255 \n"
- MEMACCESS(1)
- "vst4.8 {d20, d21, d22, d23}, [%1]! \n"
- "bgt 1b \n"
- : "+r"(src_uyvy), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- : [kUVToRB]"r"(&kUVToRB), // %3
- [kUVToG]"r"(&kUVToG), // %4
- [kUVBiasBGR]"r"(&kUVBiasBGR),
- [kYToRgb]"r"(&kYToRgb)
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-// Reads 16 pairs of UV and write even values to dst_u and odd to dst_v.
-void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
- int width) {
- asm volatile (
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld2.8 {q0, q1}, [%0]! \n" // load 16 pairs of UV
- "subs %3, %3, #16 \n" // 16 processed per loop
- MEMACCESS(1)
- "vst1.8 {q0}, [%1]! \n" // store U
- MEMACCESS(2)
- "vst1.8 {q1}, [%2]! \n" // store V
- "bgt 1b \n"
- : "+r"(src_uv), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(width) // %3 // Output registers
- : // Input registers
- : "cc", "memory", "q0", "q1" // Clobber List
- );
-}
-
-// Reads 16 U's and V's and writes out 16 pairs of UV.
-void MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
- int width) {
- asm volatile (
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld1.8 {q0}, [%0]! \n" // load U
- MEMACCESS(1)
- "vld1.8 {q1}, [%1]! \n" // load V
- "subs %3, %3, #16 \n" // 16 processed per loop
- MEMACCESS(2)
- "vst2.u8 {q0, q1}, [%2]! \n" // store 16 pairs of UV
- "bgt 1b \n"
- :
- "+r"(src_u), // %0
- "+r"(src_v), // %1
- "+r"(dst_uv), // %2
- "+r"(width) // %3 // Output registers
- : // Input registers
- : "cc", "memory", "q0", "q1" // Clobber List
- );
-}
-
-// Copy multiple of 32. vld4.8 allow unaligned and is fastest on a15.
-void CopyRow_NEON(const uint8* src, uint8* dst, int count) {
- asm volatile (
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld1.8 {d0, d1, d2, d3}, [%0]! \n" // load 32
- "subs %2, %2, #32 \n" // 32 processed per loop
- MEMACCESS(1)
- "vst1.8 {d0, d1, d2, d3}, [%1]! \n" // store 32
- "bgt 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(count) // %2 // Output registers
- : // Input registers
- : "cc", "memory", "q0", "q1" // Clobber List
- );
-}
-
-// SetRow writes 'count' bytes using an 8 bit value repeated.
-void SetRow_NEON(uint8* dst, uint8 v8, int count) {
- asm volatile (
- "vdup.8 q0, %2 \n" // duplicate 16 bytes
- "1: \n"
- "subs %1, %1, #16 \n" // 16 bytes per loop
- MEMACCESS(0)
- "vst1.8 {q0}, [%0]! \n" // store
- "bgt 1b \n"
- : "+r"(dst), // %0
- "+r"(count) // %1
- : "r"(v8) // %2
- : "cc", "memory", "q0"
- );
-}
-
-// ARGBSetRow writes 'count' pixels using an 32 bit value repeated.
-void ARGBSetRow_NEON(uint8* dst, uint32 v32, int count) {
- asm volatile (
- "vdup.u32 q0, %2 \n" // duplicate 4 ints
- "1: \n"
- "subs %1, %1, #4 \n" // 4 pixels per loop
- MEMACCESS(0)
- "vst1.8 {q0}, [%0]! \n" // store
- "bgt 1b \n"
- : "+r"(dst), // %0
- "+r"(count) // %1
- : "r"(v32) // %2
- : "cc", "memory", "q0"
- );
-}
-
-void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
- asm volatile (
- // Start at end of source row.
- "mov r3, #-16 \n"
- "add %0, %0, %2 \n"
- "sub %0, #16 \n"
-
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld1.8 {q0}, [%0], r3 \n" // src -= 16
- "subs %2, #16 \n" // 16 pixels per loop.
- "vrev64.8 q0, q0 \n"
- MEMACCESS(1)
- "vst1.8 {d1}, [%1]! \n" // dst += 16
- MEMACCESS(1)
- "vst1.8 {d0}, [%1]! \n"
- "bgt 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(width) // %2
- :
- : "cc", "memory", "r3", "q0"
- );
-}
-
-void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
- int width) {
- asm volatile (
- // Start at end of source row.
- "mov r12, #-16 \n"
- "add %0, %0, %3, lsl #1 \n"
- "sub %0, #16 \n"
-
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld2.8 {d0, d1}, [%0], r12 \n" // src -= 16
- "subs %3, #8 \n" // 8 pixels per loop.
- "vrev64.8 q0, q0 \n"
- MEMACCESS(1)
- "vst1.8 {d0}, [%1]! \n" // dst += 8
- MEMACCESS(2)
- "vst1.8 {d1}, [%2]! \n"
- "bgt 1b \n"
- : "+r"(src_uv), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(width) // %3
- :
- : "cc", "memory", "r12", "q0"
- );
-}
-
-void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width) {
- asm volatile (
- // Start at end of source row.
- "mov r3, #-16 \n"
- "add %0, %0, %2, lsl #2 \n"
- "sub %0, #16 \n"
-
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld1.8 {q0}, [%0], r3 \n" // src -= 16
- "subs %2, #4 \n" // 4 pixels per loop.
- "vrev64.32 q0, q0 \n"
- MEMACCESS(1)
- "vst1.8 {d1}, [%1]! \n" // dst += 16
- MEMACCESS(1)
- "vst1.8 {d0}, [%1]! \n"
- "bgt 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(width) // %2
- :
- : "cc", "memory", "r3", "q0"
- );
-}
-
-void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix) {
- asm volatile (
- "vmov.u8 d4, #255 \n" // Alpha
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld3.8 {d1, d2, d3}, [%0]! \n" // load 8 pixels of RGB24.
- "subs %2, %2, #8 \n" // 8 processed per loop.
- MEMACCESS(1)
- "vst4.8 {d1, d2, d3, d4}, [%1]! \n" // store 8 pixels of ARGB.
- "bgt 1b \n"
- : "+r"(src_rgb24), // %0
- "+r"(dst_argb), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List
- );
-}
-
-void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int pix) {
- asm volatile (
- "vmov.u8 d4, #255 \n" // Alpha
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld3.8 {d1, d2, d3}, [%0]! \n" // load 8 pixels of RAW.
- "subs %2, %2, #8 \n" // 8 processed per loop.
- "vswp.u8 d1, d3 \n" // swap R, B
- MEMACCESS(1)
- "vst4.8 {d1, d2, d3, d4}, [%1]! \n" // store 8 pixels of ARGB.
- "bgt 1b \n"
- : "+r"(src_raw), // %0
- "+r"(dst_argb), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List
- );
-}
-
-#define RGB565TOARGB \
- "vshrn.u16 d6, q0, #5 \n" /* G xxGGGGGG */ \
- "vuzp.u8 d0, d1 \n" /* d0 xxxBBBBB RRRRRxxx */ \
- "vshl.u8 d6, d6, #2 \n" /* G GGGGGG00 upper 6 */ \
- "vshr.u8 d1, d1, #3 \n" /* R 000RRRRR lower 5 */ \
- "vshl.u8 q0, q0, #3 \n" /* B,R BBBBB000 upper 5 */ \
- "vshr.u8 q2, q0, #5 \n" /* B,R 00000BBB lower 3 */ \
- "vorr.u8 d0, d0, d4 \n" /* B */ \
- "vshr.u8 d4, d6, #6 \n" /* G 000000GG lower 2 */ \
- "vorr.u8 d2, d1, d5 \n" /* R */ \
- "vorr.u8 d1, d4, d6 \n" /* G */
-
-void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int pix) {
- asm volatile (
- "vmov.u8 d3, #255 \n" // Alpha
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld1.8 {q0}, [%0]! \n" // load 8 RGB565 pixels.
- "subs %2, %2, #8 \n" // 8 processed per loop.
- RGB565TOARGB
- MEMACCESS(1)
- "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB.
- "bgt 1b \n"
- : "+r"(src_rgb565), // %0
- "+r"(dst_argb), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List
- );
-}
-
-#define ARGB1555TOARGB \
- "vshrn.u16 d7, q0, #8 \n" /* A Arrrrrxx */ \
- "vshr.u8 d6, d7, #2 \n" /* R xxxRRRRR */ \
- "vshrn.u16 d5, q0, #5 \n" /* G xxxGGGGG */ \
- "vmovn.u16 d4, q0 \n" /* B xxxBBBBB */ \
- "vshr.u8 d7, d7, #7 \n" /* A 0000000A */ \
- "vneg.s8 d7, d7 \n" /* A AAAAAAAA upper 8 */ \
- "vshl.u8 d6, d6, #3 \n" /* R RRRRR000 upper 5 */ \
- "vshr.u8 q1, q3, #5 \n" /* R,A 00000RRR lower 3 */ \
- "vshl.u8 q0, q2, #3 \n" /* B,G BBBBB000 upper 5 */ \
- "vshr.u8 q2, q0, #5 \n" /* B,G 00000BBB lower 3 */ \
- "vorr.u8 q1, q1, q3 \n" /* R,A */ \
- "vorr.u8 q0, q0, q2 \n" /* B,G */ \
-
-// RGB555TOARGB is same as ARGB1555TOARGB but ignores alpha.
-#define RGB555TOARGB \
- "vshrn.u16 d6, q0, #5 \n" /* G xxxGGGGG */ \
- "vuzp.u8 d0, d1 \n" /* d0 xxxBBBBB xRRRRRxx */ \
- "vshl.u8 d6, d6, #3 \n" /* G GGGGG000 upper 5 */ \
- "vshr.u8 d1, d1, #2 \n" /* R 00xRRRRR lower 5 */ \
- "vshl.u8 q0, q0, #3 \n" /* B,R BBBBB000 upper 5 */ \
- "vshr.u8 q2, q0, #5 \n" /* B,R 00000BBB lower 3 */ \
- "vorr.u8 d0, d0, d4 \n" /* B */ \
- "vshr.u8 d4, d6, #5 \n" /* G 00000GGG lower 3 */ \
- "vorr.u8 d2, d1, d5 \n" /* R */ \
- "vorr.u8 d1, d4, d6 \n" /* G */
-
-void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, uint8* dst_argb,
- int pix) {
- asm volatile (
- "vmov.u8 d3, #255 \n" // Alpha
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld1.8 {q0}, [%0]! \n" // load 8 ARGB1555 pixels.
- "subs %2, %2, #8 \n" // 8 processed per loop.
- ARGB1555TOARGB
- MEMACCESS(1)
- "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB.
- "bgt 1b \n"
- : "+r"(src_argb1555), // %0
- "+r"(dst_argb), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List
- );
-}
-
-#define ARGB4444TOARGB \
- "vuzp.u8 d0, d1 \n" /* d0 BG, d1 RA */ \
- "vshl.u8 q2, q0, #4 \n" /* B,R BBBB0000 */ \
- "vshr.u8 q1, q0, #4 \n" /* G,A 0000GGGG */ \
- "vshr.u8 q0, q2, #4 \n" /* B,R 0000BBBB */ \
- "vorr.u8 q0, q0, q2 \n" /* B,R BBBBBBBB */ \
- "vshl.u8 q2, q1, #4 \n" /* G,A GGGG0000 */ \
- "vorr.u8 q1, q1, q2 \n" /* G,A GGGGGGGG */ \
- "vswp.u8 d1, d2 \n" /* B,R,G,A -> B,G,R,A */
-
-void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, uint8* dst_argb,
- int pix) {
- asm volatile (
- "vmov.u8 d3, #255 \n" // Alpha
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld1.8 {q0}, [%0]! \n" // load 8 ARGB4444 pixels.
- "subs %2, %2, #8 \n" // 8 processed per loop.
- ARGB4444TOARGB
- MEMACCESS(1)
- "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB.
- "bgt 1b \n"
- : "+r"(src_argb4444), // %0
- "+r"(dst_argb), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "q0", "q1", "q2" // Clobber List
- );
-}
-
-void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb24, int pix) {
- asm volatile (
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld4.8 {d1, d2, d3, d4}, [%0]! \n" // load 8 pixels of ARGB.
- "subs %2, %2, #8 \n" // 8 processed per loop.
- MEMACCESS(1)
- "vst3.8 {d1, d2, d3}, [%1]! \n" // store 8 pixels of RGB24.
- "bgt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_rgb24), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List
- );
-}
-
-void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_raw, int pix) {
- asm volatile (
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld4.8 {d1, d2, d3, d4}, [%0]! \n" // load 8 pixels of ARGB.
- "subs %2, %2, #8 \n" // 8 processed per loop.
- "vswp.u8 d1, d3 \n" // swap R, B
- MEMACCESS(1)
- "vst3.8 {d1, d2, d3}, [%1]! \n" // store 8 pixels of RAW.
- "bgt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_raw), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List
- );
-}
-
-void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int pix) {
- asm volatile (
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld2.8 {q0, q1}, [%0]! \n" // load 16 pixels of YUY2.
- "subs %2, %2, #16 \n" // 16 processed per loop.
- MEMACCESS(1)
- "vst1.8 {q0}, [%1]! \n" // store 16 pixels of Y.
- "bgt 1b \n"
- : "+r"(src_yuy2), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "q0", "q1" // Clobber List
- );
-}
-
-void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int pix) {
- asm volatile (
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld2.8 {q0, q1}, [%0]! \n" // load 16 pixels of UYVY.
- "subs %2, %2, #16 \n" // 16 processed per loop.
- MEMACCESS(1)
- "vst1.8 {q1}, [%1]! \n" // store 16 pixels of Y.
- "bgt 1b \n"
- : "+r"(src_uyvy), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "q0", "q1" // Clobber List
- );
-}
-
-void YUY2ToUV422Row_NEON(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v,
- int pix) {
- asm volatile (
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of YUY2.
- "subs %3, %3, #16 \n" // 16 pixels = 8 UVs.
- MEMACCESS(1)
- "vst1.8 {d1}, [%1]! \n" // store 8 U.
- MEMACCESS(2)
- "vst1.8 {d3}, [%2]! \n" // store 8 V.
- "bgt 1b \n"
- : "+r"(src_yuy2), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(pix) // %3
- :
- : "cc", "memory", "d0", "d1", "d2", "d3" // Clobber List
- );
-}
-
-void UYVYToUV422Row_NEON(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v,
- int pix) {
- asm volatile (
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of UYVY.
- "subs %3, %3, #16 \n" // 16 pixels = 8 UVs.
- MEMACCESS(1)
- "vst1.8 {d0}, [%1]! \n" // store 8 U.
- MEMACCESS(2)
- "vst1.8 {d2}, [%2]! \n" // store 8 V.
- "bgt 1b \n"
- : "+r"(src_uyvy), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(pix) // %3
- :
- : "cc", "memory", "d0", "d1", "d2", "d3" // Clobber List
- );
-}
-
-void YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2,
- uint8* dst_u, uint8* dst_v, int pix) {
- asm volatile (
- "add %1, %0, %1 \n" // stride + src_yuy2
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of YUY2.
- "subs %4, %4, #16 \n" // 16 pixels = 8 UVs.
- MEMACCESS(1)
- "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load next row YUY2.
- "vrhadd.u8 d1, d1, d5 \n" // average rows of U
- "vrhadd.u8 d3, d3, d7 \n" // average rows of V
- MEMACCESS(2)
- "vst1.8 {d1}, [%2]! \n" // store 8 U.
- MEMACCESS(3)
- "vst1.8 {d3}, [%3]! \n" // store 8 V.
- "bgt 1b \n"
- : "+r"(src_yuy2), // %0
- "+r"(stride_yuy2), // %1
- "+r"(dst_u), // %2
- "+r"(dst_v), // %3
- "+r"(pix) // %4
- :
- : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7" // Clobber List
- );
-}
-
-void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy,
- uint8* dst_u, uint8* dst_v, int pix) {
- asm volatile (
- "add %1, %0, %1 \n" // stride + src_uyvy
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of UYVY.
- "subs %4, %4, #16 \n" // 16 pixels = 8 UVs.
- MEMACCESS(1)
- "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load next row UYVY.
- "vrhadd.u8 d0, d0, d4 \n" // average rows of U
- "vrhadd.u8 d2, d2, d6 \n" // average rows of V
- MEMACCESS(2)
- "vst1.8 {d0}, [%2]! \n" // store 8 U.
- MEMACCESS(3)
- "vst1.8 {d2}, [%3]! \n" // store 8 V.
- "bgt 1b \n"
- : "+r"(src_uyvy), // %0
- "+r"(stride_uyvy), // %1
- "+r"(dst_u), // %2
- "+r"(dst_v), // %3
- "+r"(pix) // %4
- :
- : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7" // Clobber List
- );
-}
-
-// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
-void ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix) {
- asm volatile (
- MEMACCESS(3)
- "vld1.8 {q2}, [%3] \n" // shuffler
- "1: \n"
- MEMACCESS(0)
- "vld1.8 {q0}, [%0]! \n" // load 4 pixels.
- "subs %2, %2, #4 \n" // 4 processed per loop
- "vtbl.8 d2, {d0, d1}, d4 \n" // look up 2 first pixels
- "vtbl.8 d3, {d0, d1}, d5 \n" // look up 2 next pixels
- MEMACCESS(1)
- "vst1.8 {q1}, [%1]! \n" // store 4.
- "bgt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(pix) // %2
- : "r"(shuffler) // %3
- : "cc", "memory", "q0", "q1", "q2" // Clobber List
- );
-}
-
-void I422ToYUY2Row_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_yuy2, int width) {
- asm volatile (
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld2.8 {d0, d2}, [%0]! \n" // load 16 Ys
- MEMACCESS(1)
- "vld1.8 {d1}, [%1]! \n" // load 8 Us
- MEMACCESS(2)
- "vld1.8 {d3}, [%2]! \n" // load 8 Vs
- "subs %4, %4, #16 \n" // 16 pixels
- MEMACCESS(3)
- "vst4.8 {d0, d1, d2, d3}, [%3]! \n" // Store 8 YUY2/16 pixels.
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_yuy2), // %3
- "+r"(width) // %4
- :
- : "cc", "memory", "d0", "d1", "d2", "d3"
- );
-}
-
-void I422ToUYVYRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_uyvy, int width) {
- asm volatile (
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld2.8 {d1, d3}, [%0]! \n" // load 16 Ys
- MEMACCESS(1)
- "vld1.8 {d0}, [%1]! \n" // load 8 Us
- MEMACCESS(2)
- "vld1.8 {d2}, [%2]! \n" // load 8 Vs
- "subs %4, %4, #16 \n" // 16 pixels
- MEMACCESS(3)
- "vst4.8 {d0, d1, d2, d3}, [%3]! \n" // Store 8 UYVY/16 pixels.
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_uyvy), // %3
- "+r"(width) // %4
- :
- : "cc", "memory", "d0", "d1", "d2", "d3"
- );
-}
-
-void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb565, int pix) {
- asm volatile (
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld4.8 {d20, d21, d22, d23}, [%0]! \n" // load 8 pixels of ARGB.
- "subs %2, %2, #8 \n" // 8 processed per loop.
- ARGBTORGB565
- MEMACCESS(1)
- "vst1.8 {q0}, [%1]! \n" // store 8 pixels RGB565.
- "bgt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_rgb565), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "q0", "q8", "q9", "q10", "q11"
- );
-}
-
-void ARGBToRGB565DitherRow_NEON(const uint8* src_argb, uint8* dst_rgb,
- const uint32 dither4, int width) {
- asm volatile (
- ".p2align 2 \n"
- "vdup.32 d2, %2 \n" // dither4
- "1: \n"
- MEMACCESS(1)
- "vld4.8 {d20, d21, d22, d23}, [%1]! \n" // load 8 pixels of ARGB.
- "subs %3, %3, #8 \n" // 8 processed per loop.
- "vqadd.u8 d20, d20, d2 \n"
- "vqadd.u8 d21, d21, d2 \n"
- "vqadd.u8 d22, d22, d2 \n"
- ARGBTORGB565
- MEMACCESS(0)
- "vst1.8 {q0}, [%0]! \n" // store 8 pixels RGB565.
- "bgt 1b \n"
- : "+r"(dst_rgb) // %0
- : "r"(src_argb), // %1
- "r"(dither4), // %2
- "r"(width) // %3
- : "cc", "memory", "q0", "q1", "q8", "q9", "q10", "q11"
- );
-}
-
-void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_argb1555,
- int pix) {
- asm volatile (
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld4.8 {d20, d21, d22, d23}, [%0]! \n" // load 8 pixels of ARGB.
- "subs %2, %2, #8 \n" // 8 processed per loop.
- ARGBTOARGB1555
- MEMACCESS(1)
- "vst1.8 {q0}, [%1]! \n" // store 8 pixels ARGB1555.
- "bgt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb1555), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "q0", "q8", "q9", "q10", "q11"
- );
-}
-
-void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_argb4444,
- int pix) {
- asm volatile (
- "vmov.u8 d4, #0x0f \n" // bits to clear with vbic.
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld4.8 {d20, d21, d22, d23}, [%0]! \n" // load 8 pixels of ARGB.
- "subs %2, %2, #8 \n" // 8 processed per loop.
- ARGBTOARGB4444
- MEMACCESS(1)
- "vst1.8 {q0}, [%1]! \n" // store 8 pixels ARGB4444.
- "bgt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb4444), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "q0", "q8", "q9", "q10", "q11"
- );
-}
-
-void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
- asm volatile (
- "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient
- "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient
- "vmov.u8 d26, #33 \n" // R * 0.2578 coefficient
- "vmov.u8 d27, #16 \n" // Add 16 constant
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
- "subs %2, %2, #8 \n" // 8 processed per loop.
- "vmull.u8 q2, d0, d24 \n" // B
- "vmlal.u8 q2, d1, d25 \n" // G
- "vmlal.u8 q2, d2, d26 \n" // R
- "vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y
- "vqadd.u8 d0, d27 \n"
- MEMACCESS(1)
- "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
- "bgt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "q0", "q1", "q2", "q12", "q13"
- );
-}
-
-void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
- asm volatile (
- "vmov.u8 d24, #15 \n" // B * 0.11400 coefficient
- "vmov.u8 d25, #75 \n" // G * 0.58700 coefficient
- "vmov.u8 d26, #38 \n" // R * 0.29900 coefficient
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
- "subs %2, %2, #8 \n" // 8 processed per loop.
- "vmull.u8 q2, d0, d24 \n" // B
- "vmlal.u8 q2, d1, d25 \n" // G
- "vmlal.u8 q2, d2, d26 \n" // R
- "vqrshrun.s16 d0, q2, #7 \n" // 15 bit to 8 bit Y
- MEMACCESS(1)
- "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
- "bgt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "q0", "q1", "q2", "q12", "q13"
- );
-}
-
-// 8x1 pixels.
-void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int pix) {
- asm volatile (
- "vmov.u8 d24, #112 \n" // UB / VR 0.875 coefficient
- "vmov.u8 d25, #74 \n" // UG -0.5781 coefficient
- "vmov.u8 d26, #38 \n" // UR -0.2969 coefficient
- "vmov.u8 d27, #18 \n" // VB -0.1406 coefficient
- "vmov.u8 d28, #94 \n" // VG -0.7344 coefficient
- "vmov.u16 q15, #0x8080 \n" // 128.5
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
- "subs %3, %3, #8 \n" // 8 processed per loop.
- "vmull.u8 q2, d0, d24 \n" // B
- "vmlsl.u8 q2, d1, d25 \n" // G
- "vmlsl.u8 q2, d2, d26 \n" // R
- "vadd.u16 q2, q2, q15 \n" // +128 -> unsigned
-
- "vmull.u8 q3, d2, d24 \n" // R
- "vmlsl.u8 q3, d1, d28 \n" // G
- "vmlsl.u8 q3, d0, d27 \n" // B
- "vadd.u16 q3, q3, q15 \n" // +128 -> unsigned
-
- "vqshrn.u16 d0, q2, #8 \n" // 16 bit to 8 bit U
- "vqshrn.u16 d1, q3, #8 \n" // 16 bit to 8 bit V
-
- MEMACCESS(1)
- "vst1.8 {d0}, [%1]! \n" // store 8 pixels U.
- MEMACCESS(2)
- "vst1.8 {d1}, [%2]! \n" // store 8 pixels V.
- "bgt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(pix) // %3
- :
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q12", "q13", "q14", "q15"
- );
-}
-
-// 16x1 pixels -> 8x1. pix is number of argb pixels. e.g. 16.
-void ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int pix) {
- asm volatile (
- "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
- "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
- "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
- "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
- "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
- "vmov.u16 q15, #0x8080 \n" // 128.5
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
- MEMACCESS(0)
- "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels.
-
- "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts.
- "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
- "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts.
-
- "subs %3, %3, #16 \n" // 16 processed per loop.
- "vmul.s16 q8, q0, q10 \n" // B
- "vmls.s16 q8, q1, q11 \n" // G
- "vmls.s16 q8, q2, q12 \n" // R
- "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned
-
- "vmul.s16 q9, q2, q10 \n" // R
- "vmls.s16 q9, q1, q14 \n" // G
- "vmls.s16 q9, q0, q13 \n" // B
- "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned
-
- "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U
- "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V
-
- MEMACCESS(1)
- "vst1.8 {d0}, [%1]! \n" // store 8 pixels U.
- MEMACCESS(2)
- "vst1.8 {d1}, [%2]! \n" // store 8 pixels V.
- "bgt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(pix) // %3
- :
- : "cc", "memory", "q0", "q1", "q2", "q3",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-// 32x1 pixels -> 8x1. pix is number of argb pixels. e.g. 32.
-void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int pix) {
- asm volatile (
- "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
- "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
- "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
- "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
- "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
- "vmov.u16 q15, #0x8080 \n" // 128.5
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
- MEMACCESS(0)
- "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels.
- "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts.
- "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
- "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts.
- MEMACCESS(0)
- "vld4.8 {d8, d10, d12, d14}, [%0]! \n" // load 8 more ARGB pixels.
- MEMACCESS(0)
- "vld4.8 {d9, d11, d13, d15}, [%0]! \n" // load last 8 ARGB pixels.
- "vpaddl.u8 q4, q4 \n" // B 16 bytes -> 8 shorts.
- "vpaddl.u8 q5, q5 \n" // G 16 bytes -> 8 shorts.
- "vpaddl.u8 q6, q6 \n" // R 16 bytes -> 8 shorts.
-
- "vpadd.u16 d0, d0, d1 \n" // B 16 shorts -> 8 shorts.
- "vpadd.u16 d1, d8, d9 \n" // B
- "vpadd.u16 d2, d2, d3 \n" // G 16 shorts -> 8 shorts.
- "vpadd.u16 d3, d10, d11 \n" // G
- "vpadd.u16 d4, d4, d5 \n" // R 16 shorts -> 8 shorts.
- "vpadd.u16 d5, d12, d13 \n" // R
-
- "vrshr.u16 q0, q0, #1 \n" // 2x average
- "vrshr.u16 q1, q1, #1 \n"
- "vrshr.u16 q2, q2, #1 \n"
-
- "subs %3, %3, #32 \n" // 32 processed per loop.
- "vmul.s16 q8, q0, q10 \n" // B
- "vmls.s16 q8, q1, q11 \n" // G
- "vmls.s16 q8, q2, q12 \n" // R
- "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned
- "vmul.s16 q9, q2, q10 \n" // R
- "vmls.s16 q9, q1, q14 \n" // G
- "vmls.s16 q9, q0, q13 \n" // B
- "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned
- "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U
- "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V
- MEMACCESS(1)
- "vst1.8 {d0}, [%1]! \n" // store 8 pixels U.
- MEMACCESS(2)
- "vst1.8 {d1}, [%2]! \n" // store 8 pixels V.
- "bgt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(pix) // %3
- :
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-// 16x2 pixels -> 8x1. pix is number of argb pixels. e.g. 16.
-#define RGBTOUV(QB, QG, QR) \
- "vmul.s16 q8, " #QB ", q10 \n" /* B */ \
- "vmls.s16 q8, " #QG ", q11 \n" /* G */ \
- "vmls.s16 q8, " #QR ", q12 \n" /* R */ \
- "vadd.u16 q8, q8, q15 \n" /* +128 -> unsigned */ \
- "vmul.s16 q9, " #QR ", q10 \n" /* R */ \
- "vmls.s16 q9, " #QG ", q14 \n" /* G */ \
- "vmls.s16 q9, " #QB ", q13 \n" /* B */ \
- "vadd.u16 q9, q9, q15 \n" /* +128 -> unsigned */ \
- "vqshrn.u16 d0, q8, #8 \n" /* 16 bit to 8 bit U */ \
- "vqshrn.u16 d1, q9, #8 \n" /* 16 bit to 8 bit V */
-
-// TODO(fbarchard): Consider vhadd vertical, then vpaddl horizontal, avoid shr.
-void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int pix) {
- asm volatile (
- "add %1, %0, %1 \n" // src_stride + src_argb
- "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
- "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
- "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
- "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
- "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
- "vmov.u16 q15, #0x8080 \n" // 128.5
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
- MEMACCESS(0)
- "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels.
- "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts.
- "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
- "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts.
- MEMACCESS(1)
- "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more ARGB pixels.
- MEMACCESS(1)
- "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 ARGB pixels.
- "vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts.
- "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
- "vpadal.u8 q2, q6 \n" // R 16 bytes -> 8 shorts.
-
- "vrshr.u16 q0, q0, #1 \n" // 2x average
- "vrshr.u16 q1, q1, #1 \n"
- "vrshr.u16 q2, q2, #1 \n"
-
- "subs %4, %4, #16 \n" // 32 processed per loop.
- RGBTOUV(q0, q1, q2)
- MEMACCESS(2)
- "vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
- MEMACCESS(3)
- "vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
- "bgt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(src_stride_argb), // %1
- "+r"(dst_u), // %2
- "+r"(dst_v), // %3
- "+r"(pix) // %4
- :
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-// TODO(fbarchard): Subsample match C code.
-void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int pix) {
- asm volatile (
- "add %1, %0, %1 \n" // src_stride + src_argb
- "vmov.s16 q10, #127 / 2 \n" // UB / VR 0.500 coefficient
- "vmov.s16 q11, #84 / 2 \n" // UG -0.33126 coefficient
- "vmov.s16 q12, #43 / 2 \n" // UR -0.16874 coefficient
- "vmov.s16 q13, #20 / 2 \n" // VB -0.08131 coefficient
- "vmov.s16 q14, #107 / 2 \n" // VG -0.41869 coefficient
- "vmov.u16 q15, #0x8080 \n" // 128.5
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
- MEMACCESS(0)
- "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels.
- "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts.
- "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
- "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts.
- MEMACCESS(1)
- "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more ARGB pixels.
- MEMACCESS(1)
- "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 ARGB pixels.
- "vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts.
- "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
- "vpadal.u8 q2, q6 \n" // R 16 bytes -> 8 shorts.
-
- "vrshr.u16 q0, q0, #1 \n" // 2x average
- "vrshr.u16 q1, q1, #1 \n"
- "vrshr.u16 q2, q2, #1 \n"
-
- "subs %4, %4, #16 \n" // 32 processed per loop.
- RGBTOUV(q0, q1, q2)
- MEMACCESS(2)
- "vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
- MEMACCESS(3)
- "vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
- "bgt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(src_stride_argb), // %1
- "+r"(dst_u), // %2
- "+r"(dst_v), // %3
- "+r"(pix) // %4
- :
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra,
- uint8* dst_u, uint8* dst_v, int pix) {
- asm volatile (
- "add %1, %0, %1 \n" // src_stride + src_bgra
- "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
- "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
- "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
- "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
- "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
- "vmov.u16 q15, #0x8080 \n" // 128.5
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 BGRA pixels.
- MEMACCESS(0)
- "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 BGRA pixels.
- "vpaddl.u8 q3, q3 \n" // B 16 bytes -> 8 shorts.
- "vpaddl.u8 q2, q2 \n" // G 16 bytes -> 8 shorts.
- "vpaddl.u8 q1, q1 \n" // R 16 bytes -> 8 shorts.
- MEMACCESS(1)
- "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more BGRA pixels.
- MEMACCESS(1)
- "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 BGRA pixels.
- "vpadal.u8 q3, q7 \n" // B 16 bytes -> 8 shorts.
- "vpadal.u8 q2, q6 \n" // G 16 bytes -> 8 shorts.
- "vpadal.u8 q1, q5 \n" // R 16 bytes -> 8 shorts.
-
- "vrshr.u16 q1, q1, #1 \n" // 2x average
- "vrshr.u16 q2, q2, #1 \n"
- "vrshr.u16 q3, q3, #1 \n"
-
- "subs %4, %4, #16 \n" // 32 processed per loop.
- RGBTOUV(q3, q2, q1)
- MEMACCESS(2)
- "vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
- MEMACCESS(3)
- "vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
- "bgt 1b \n"
- : "+r"(src_bgra), // %0
- "+r"(src_stride_bgra), // %1
- "+r"(dst_u), // %2
- "+r"(dst_v), // %3
- "+r"(pix) // %4
- :
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-void ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr,
- uint8* dst_u, uint8* dst_v, int pix) {
- asm volatile (
- "add %1, %0, %1 \n" // src_stride + src_abgr
- "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
- "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
- "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
- "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
- "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
- "vmov.u16 q15, #0x8080 \n" // 128.5
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ABGR pixels.
- MEMACCESS(0)
- "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ABGR pixels.
- "vpaddl.u8 q2, q2 \n" // B 16 bytes -> 8 shorts.
- "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
- "vpaddl.u8 q0, q0 \n" // R 16 bytes -> 8 shorts.
- MEMACCESS(1)
- "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more ABGR pixels.
- MEMACCESS(1)
- "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 ABGR pixels.
- "vpadal.u8 q2, q6 \n" // B 16 bytes -> 8 shorts.
- "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
- "vpadal.u8 q0, q4 \n" // R 16 bytes -> 8 shorts.
-
- "vrshr.u16 q0, q0, #1 \n" // 2x average
- "vrshr.u16 q1, q1, #1 \n"
- "vrshr.u16 q2, q2, #1 \n"
-
- "subs %4, %4, #16 \n" // 32 processed per loop.
- RGBTOUV(q2, q1, q0)
- MEMACCESS(2)
- "vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
- MEMACCESS(3)
- "vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
- "bgt 1b \n"
- : "+r"(src_abgr), // %0
- "+r"(src_stride_abgr), // %1
- "+r"(dst_u), // %2
- "+r"(dst_v), // %3
- "+r"(pix) // %4
- :
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-void RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba,
- uint8* dst_u, uint8* dst_v, int pix) {
- asm volatile (
- "add %1, %0, %1 \n" // src_stride + src_rgba
- "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
- "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
- "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
- "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
- "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
- "vmov.u16 q15, #0x8080 \n" // 128.5
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 RGBA pixels.
- MEMACCESS(0)
- "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 RGBA pixels.
- "vpaddl.u8 q0, q1 \n" // B 16 bytes -> 8 shorts.
- "vpaddl.u8 q1, q2 \n" // G 16 bytes -> 8 shorts.
- "vpaddl.u8 q2, q3 \n" // R 16 bytes -> 8 shorts.
- MEMACCESS(1)
- "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more RGBA pixels.
- MEMACCESS(1)
- "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 RGBA pixels.
- "vpadal.u8 q0, q5 \n" // B 16 bytes -> 8 shorts.
- "vpadal.u8 q1, q6 \n" // G 16 bytes -> 8 shorts.
- "vpadal.u8 q2, q7 \n" // R 16 bytes -> 8 shorts.
-
- "vrshr.u16 q0, q0, #1 \n" // 2x average
- "vrshr.u16 q1, q1, #1 \n"
- "vrshr.u16 q2, q2, #1 \n"
-
- "subs %4, %4, #16 \n" // 32 processed per loop.
- RGBTOUV(q0, q1, q2)
- MEMACCESS(2)
- "vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
- MEMACCESS(3)
- "vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
- "bgt 1b \n"
- : "+r"(src_rgba), // %0
- "+r"(src_stride_rgba), // %1
- "+r"(dst_u), // %2
- "+r"(dst_v), // %3
- "+r"(pix) // %4
- :
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-void RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24,
- uint8* dst_u, uint8* dst_v, int pix) {
- asm volatile (
- "add %1, %0, %1 \n" // src_stride + src_rgb24
- "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
- "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
- "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
- "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
- "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
- "vmov.u16 q15, #0x8080 \n" // 128.5
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld3.8 {d0, d2, d4}, [%0]! \n" // load 8 RGB24 pixels.
- MEMACCESS(0)
- "vld3.8 {d1, d3, d5}, [%0]! \n" // load next 8 RGB24 pixels.
- "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts.
- "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
- "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts.
- MEMACCESS(1)
- "vld3.8 {d8, d10, d12}, [%1]! \n" // load 8 more RGB24 pixels.
- MEMACCESS(1)
- "vld3.8 {d9, d11, d13}, [%1]! \n" // load last 8 RGB24 pixels.
- "vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts.
- "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
- "vpadal.u8 q2, q6 \n" // R 16 bytes -> 8 shorts.
-
- "vrshr.u16 q0, q0, #1 \n" // 2x average
- "vrshr.u16 q1, q1, #1 \n"
- "vrshr.u16 q2, q2, #1 \n"
-
- "subs %4, %4, #16 \n" // 32 processed per loop.
- RGBTOUV(q0, q1, q2)
- MEMACCESS(2)
- "vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
- MEMACCESS(3)
- "vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
- "bgt 1b \n"
- : "+r"(src_rgb24), // %0
- "+r"(src_stride_rgb24), // %1
- "+r"(dst_u), // %2
- "+r"(dst_v), // %3
- "+r"(pix) // %4
- :
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-void RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw,
- uint8* dst_u, uint8* dst_v, int pix) {
- asm volatile (
- "add %1, %0, %1 \n" // src_stride + src_raw
- "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
- "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
- "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
- "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
- "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
- "vmov.u16 q15, #0x8080 \n" // 128.5
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld3.8 {d0, d2, d4}, [%0]! \n" // load 8 RAW pixels.
- MEMACCESS(0)
- "vld3.8 {d1, d3, d5}, [%0]! \n" // load next 8 RAW pixels.
- "vpaddl.u8 q2, q2 \n" // B 16 bytes -> 8 shorts.
- "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
- "vpaddl.u8 q0, q0 \n" // R 16 bytes -> 8 shorts.
- MEMACCESS(1)
- "vld3.8 {d8, d10, d12}, [%1]! \n" // load 8 more RAW pixels.
- MEMACCESS(1)
- "vld3.8 {d9, d11, d13}, [%1]! \n" // load last 8 RAW pixels.
- "vpadal.u8 q2, q6 \n" // B 16 bytes -> 8 shorts.
- "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
- "vpadal.u8 q0, q4 \n" // R 16 bytes -> 8 shorts.
-
- "vrshr.u16 q0, q0, #1 \n" // 2x average
- "vrshr.u16 q1, q1, #1 \n"
- "vrshr.u16 q2, q2, #1 \n"
-
- "subs %4, %4, #16 \n" // 32 processed per loop.
- RGBTOUV(q2, q1, q0)
- MEMACCESS(2)
- "vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
- MEMACCESS(3)
- "vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
- "bgt 1b \n"
- : "+r"(src_raw), // %0
- "+r"(src_stride_raw), // %1
- "+r"(dst_u), // %2
- "+r"(dst_v), // %3
- "+r"(pix) // %4
- :
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-// 16x2 pixels -> 8x1. pix is number of argb pixels. e.g. 16.
-void RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565,
- uint8* dst_u, uint8* dst_v, int pix) {
- asm volatile (
- "add %1, %0, %1 \n" // src_stride + src_argb
- "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
- "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
- "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
- "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
- "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
- "vmov.u16 q15, #0x8080 \n" // 128.5
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld1.8 {q0}, [%0]! \n" // load 8 RGB565 pixels.
- RGB565TOARGB
- "vpaddl.u8 d8, d0 \n" // B 8 bytes -> 4 shorts.
- "vpaddl.u8 d10, d1 \n" // G 8 bytes -> 4 shorts.
- "vpaddl.u8 d12, d2 \n" // R 8 bytes -> 4 shorts.
- MEMACCESS(0)
- "vld1.8 {q0}, [%0]! \n" // next 8 RGB565 pixels.
- RGB565TOARGB
- "vpaddl.u8 d9, d0 \n" // B 8 bytes -> 4 shorts.
- "vpaddl.u8 d11, d1 \n" // G 8 bytes -> 4 shorts.
- "vpaddl.u8 d13, d2 \n" // R 8 bytes -> 4 shorts.
-
- MEMACCESS(1)
- "vld1.8 {q0}, [%1]! \n" // load 8 RGB565 pixels.
- RGB565TOARGB
- "vpadal.u8 d8, d0 \n" // B 8 bytes -> 4 shorts.
- "vpadal.u8 d10, d1 \n" // G 8 bytes -> 4 shorts.
- "vpadal.u8 d12, d2 \n" // R 8 bytes -> 4 shorts.
- MEMACCESS(1)
- "vld1.8 {q0}, [%1]! \n" // next 8 RGB565 pixels.
- RGB565TOARGB
- "vpadal.u8 d9, d0 \n" // B 8 bytes -> 4 shorts.
- "vpadal.u8 d11, d1 \n" // G 8 bytes -> 4 shorts.
- "vpadal.u8 d13, d2 \n" // R 8 bytes -> 4 shorts.
-
- "vrshr.u16 q4, q4, #1 \n" // 2x average
- "vrshr.u16 q5, q5, #1 \n"
- "vrshr.u16 q6, q6, #1 \n"
-
- "subs %4, %4, #16 \n" // 16 processed per loop.
- "vmul.s16 q8, q4, q10 \n" // B
- "vmls.s16 q8, q5, q11 \n" // G
- "vmls.s16 q8, q6, q12 \n" // R
- "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned
- "vmul.s16 q9, q6, q10 \n" // R
- "vmls.s16 q9, q5, q14 \n" // G
- "vmls.s16 q9, q4, q13 \n" // B
- "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned
- "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U
- "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V
- MEMACCESS(2)
- "vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
- MEMACCESS(3)
- "vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
- "bgt 1b \n"
- : "+r"(src_rgb565), // %0
- "+r"(src_stride_rgb565), // %1
- "+r"(dst_u), // %2
- "+r"(dst_v), // %3
- "+r"(pix) // %4
- :
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-// 16x2 pixels -> 8x1. pix is number of argb pixels. e.g. 16.
-void ARGB1555ToUVRow_NEON(const uint8* src_argb1555, int src_stride_argb1555,
- uint8* dst_u, uint8* dst_v, int pix) {
- asm volatile (
- "add %1, %0, %1 \n" // src_stride + src_argb
- "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
- "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
- "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
- "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
- "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
- "vmov.u16 q15, #0x8080 \n" // 128.5
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld1.8 {q0}, [%0]! \n" // load 8 ARGB1555 pixels.
- RGB555TOARGB
- "vpaddl.u8 d8, d0 \n" // B 8 bytes -> 4 shorts.
- "vpaddl.u8 d10, d1 \n" // G 8 bytes -> 4 shorts.
- "vpaddl.u8 d12, d2 \n" // R 8 bytes -> 4 shorts.
- MEMACCESS(0)
- "vld1.8 {q0}, [%0]! \n" // next 8 ARGB1555 pixels.
- RGB555TOARGB
- "vpaddl.u8 d9, d0 \n" // B 8 bytes -> 4 shorts.
- "vpaddl.u8 d11, d1 \n" // G 8 bytes -> 4 shorts.
- "vpaddl.u8 d13, d2 \n" // R 8 bytes -> 4 shorts.
-
- MEMACCESS(1)
- "vld1.8 {q0}, [%1]! \n" // load 8 ARGB1555 pixels.
- RGB555TOARGB
- "vpadal.u8 d8, d0 \n" // B 8 bytes -> 4 shorts.
- "vpadal.u8 d10, d1 \n" // G 8 bytes -> 4 shorts.
- "vpadal.u8 d12, d2 \n" // R 8 bytes -> 4 shorts.
- MEMACCESS(1)
- "vld1.8 {q0}, [%1]! \n" // next 8 ARGB1555 pixels.
- RGB555TOARGB
- "vpadal.u8 d9, d0 \n" // B 8 bytes -> 4 shorts.
- "vpadal.u8 d11, d1 \n" // G 8 bytes -> 4 shorts.
- "vpadal.u8 d13, d2 \n" // R 8 bytes -> 4 shorts.
-
- "vrshr.u16 q4, q4, #1 \n" // 2x average
- "vrshr.u16 q5, q5, #1 \n"
- "vrshr.u16 q6, q6, #1 \n"
-
- "subs %4, %4, #16 \n" // 16 processed per loop.
- "vmul.s16 q8, q4, q10 \n" // B
- "vmls.s16 q8, q5, q11 \n" // G
- "vmls.s16 q8, q6, q12 \n" // R
- "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned
- "vmul.s16 q9, q6, q10 \n" // R
- "vmls.s16 q9, q5, q14 \n" // G
- "vmls.s16 q9, q4, q13 \n" // B
- "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned
- "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U
- "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V
- MEMACCESS(2)
- "vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
- MEMACCESS(3)
- "vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
- "bgt 1b \n"
- : "+r"(src_argb1555), // %0
- "+r"(src_stride_argb1555), // %1
- "+r"(dst_u), // %2
- "+r"(dst_v), // %3
- "+r"(pix) // %4
- :
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-// 16x2 pixels -> 8x1. pix is number of argb pixels. e.g. 16.
-void ARGB4444ToUVRow_NEON(const uint8* src_argb4444, int src_stride_argb4444,
- uint8* dst_u, uint8* dst_v, int pix) {
- asm volatile (
- "add %1, %0, %1 \n" // src_stride + src_argb
- "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
- "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
- "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
- "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
- "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
- "vmov.u16 q15, #0x8080 \n" // 128.5
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld1.8 {q0}, [%0]! \n" // load 8 ARGB4444 pixels.
- ARGB4444TOARGB
- "vpaddl.u8 d8, d0 \n" // B 8 bytes -> 4 shorts.
- "vpaddl.u8 d10, d1 \n" // G 8 bytes -> 4 shorts.
- "vpaddl.u8 d12, d2 \n" // R 8 bytes -> 4 shorts.
- MEMACCESS(0)
- "vld1.8 {q0}, [%0]! \n" // next 8 ARGB4444 pixels.
- ARGB4444TOARGB
- "vpaddl.u8 d9, d0 \n" // B 8 bytes -> 4 shorts.
- "vpaddl.u8 d11, d1 \n" // G 8 bytes -> 4 shorts.
- "vpaddl.u8 d13, d2 \n" // R 8 bytes -> 4 shorts.
-
- MEMACCESS(1)
- "vld1.8 {q0}, [%1]! \n" // load 8 ARGB4444 pixels.
- ARGB4444TOARGB
- "vpadal.u8 d8, d0 \n" // B 8 bytes -> 4 shorts.
- "vpadal.u8 d10, d1 \n" // G 8 bytes -> 4 shorts.
- "vpadal.u8 d12, d2 \n" // R 8 bytes -> 4 shorts.
- MEMACCESS(1)
- "vld1.8 {q0}, [%1]! \n" // next 8 ARGB4444 pixels.
- ARGB4444TOARGB
- "vpadal.u8 d9, d0 \n" // B 8 bytes -> 4 shorts.
- "vpadal.u8 d11, d1 \n" // G 8 bytes -> 4 shorts.
- "vpadal.u8 d13, d2 \n" // R 8 bytes -> 4 shorts.
-
- "vrshr.u16 q4, q4, #1 \n" // 2x average
- "vrshr.u16 q5, q5, #1 \n"
- "vrshr.u16 q6, q6, #1 \n"
-
- "subs %4, %4, #16 \n" // 16 processed per loop.
- "vmul.s16 q8, q4, q10 \n" // B
- "vmls.s16 q8, q5, q11 \n" // G
- "vmls.s16 q8, q6, q12 \n" // R
- "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned
- "vmul.s16 q9, q6, q10 \n" // R
- "vmls.s16 q9, q5, q14 \n" // G
- "vmls.s16 q9, q4, q13 \n" // B
- "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned
- "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U
- "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V
- MEMACCESS(2)
- "vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
- MEMACCESS(3)
- "vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
- "bgt 1b \n"
- : "+r"(src_argb4444), // %0
- "+r"(src_stride_argb4444), // %1
- "+r"(dst_u), // %2
- "+r"(dst_v), // %3
- "+r"(pix) // %4
- :
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int pix) {
- asm volatile (
- "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient
- "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient
- "vmov.u8 d26, #33 \n" // R * 0.2578 coefficient
- "vmov.u8 d27, #16 \n" // Add 16 constant
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld1.8 {q0}, [%0]! \n" // load 8 RGB565 pixels.
- "subs %2, %2, #8 \n" // 8 processed per loop.
- RGB565TOARGB
- "vmull.u8 q2, d0, d24 \n" // B
- "vmlal.u8 q2, d1, d25 \n" // G
- "vmlal.u8 q2, d2, d26 \n" // R
- "vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y
- "vqadd.u8 d0, d27 \n"
- MEMACCESS(1)
- "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
- "bgt 1b \n"
- : "+r"(src_rgb565), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13"
- );
-}
-
-void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int pix) {
- asm volatile (
- "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient
- "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient
- "vmov.u8 d26, #33 \n" // R * 0.2578 coefficient
- "vmov.u8 d27, #16 \n" // Add 16 constant
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld1.8 {q0}, [%0]! \n" // load 8 ARGB1555 pixels.
- "subs %2, %2, #8 \n" // 8 processed per loop.
- ARGB1555TOARGB
- "vmull.u8 q2, d0, d24 \n" // B
- "vmlal.u8 q2, d1, d25 \n" // G
- "vmlal.u8 q2, d2, d26 \n" // R
- "vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y
- "vqadd.u8 d0, d27 \n"
- MEMACCESS(1)
- "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
- "bgt 1b \n"
- : "+r"(src_argb1555), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13"
- );
-}
-
-void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int pix) {
- asm volatile (
- "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient
- "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient
- "vmov.u8 d26, #33 \n" // R * 0.2578 coefficient
- "vmov.u8 d27, #16 \n" // Add 16 constant
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld1.8 {q0}, [%0]! \n" // load 8 ARGB4444 pixels.
- "subs %2, %2, #8 \n" // 8 processed per loop.
- ARGB4444TOARGB
- "vmull.u8 q2, d0, d24 \n" // B
- "vmlal.u8 q2, d1, d25 \n" // G
- "vmlal.u8 q2, d2, d26 \n" // R
- "vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y
- "vqadd.u8 d0, d27 \n"
- MEMACCESS(1)
- "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
- "bgt 1b \n"
- : "+r"(src_argb4444), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13"
- );
-}
-
-void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int pix) {
- asm volatile (
- "vmov.u8 d4, #33 \n" // R * 0.2578 coefficient
- "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient
- "vmov.u8 d6, #13 \n" // B * 0.1016 coefficient
- "vmov.u8 d7, #16 \n" // Add 16 constant
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of BGRA.
- "subs %2, %2, #8 \n" // 8 processed per loop.
- "vmull.u8 q8, d1, d4 \n" // R
- "vmlal.u8 q8, d2, d5 \n" // G
- "vmlal.u8 q8, d3, d6 \n" // B
- "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y
- "vqadd.u8 d0, d7 \n"
- MEMACCESS(1)
- "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
- "bgt 1b \n"
- : "+r"(src_bgra), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
- );
-}
-
-void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int pix) {
- asm volatile (
- "vmov.u8 d4, #33 \n" // R * 0.2578 coefficient
- "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient
- "vmov.u8 d6, #13 \n" // B * 0.1016 coefficient
- "vmov.u8 d7, #16 \n" // Add 16 constant
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of ABGR.
- "subs %2, %2, #8 \n" // 8 processed per loop.
- "vmull.u8 q8, d0, d4 \n" // R
- "vmlal.u8 q8, d1, d5 \n" // G
- "vmlal.u8 q8, d2, d6 \n" // B
- "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y
- "vqadd.u8 d0, d7 \n"
- MEMACCESS(1)
- "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
- "bgt 1b \n"
- : "+r"(src_abgr), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
- );
-}
-
-void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int pix) {
- asm volatile (
- "vmov.u8 d4, #13 \n" // B * 0.1016 coefficient
- "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient
- "vmov.u8 d6, #33 \n" // R * 0.2578 coefficient
- "vmov.u8 d7, #16 \n" // Add 16 constant
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of RGBA.
- "subs %2, %2, #8 \n" // 8 processed per loop.
- "vmull.u8 q8, d1, d4 \n" // B
- "vmlal.u8 q8, d2, d5 \n" // G
- "vmlal.u8 q8, d3, d6 \n" // R
- "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y
- "vqadd.u8 d0, d7 \n"
- MEMACCESS(1)
- "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
- "bgt 1b \n"
- : "+r"(src_rgba), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
- );
-}
-
-void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int pix) {
- asm volatile (
- "vmov.u8 d4, #13 \n" // B * 0.1016 coefficient
- "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient
- "vmov.u8 d6, #33 \n" // R * 0.2578 coefficient
- "vmov.u8 d7, #16 \n" // Add 16 constant
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld3.8 {d0, d1, d2}, [%0]! \n" // load 8 pixels of RGB24.
- "subs %2, %2, #8 \n" // 8 processed per loop.
- "vmull.u8 q8, d0, d4 \n" // B
- "vmlal.u8 q8, d1, d5 \n" // G
- "vmlal.u8 q8, d2, d6 \n" // R
- "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y
- "vqadd.u8 d0, d7 \n"
- MEMACCESS(1)
- "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
- "bgt 1b \n"
- : "+r"(src_rgb24), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
- );
-}
-
-void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int pix) {
- asm volatile (
- "vmov.u8 d4, #33 \n" // R * 0.2578 coefficient
- "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient
- "vmov.u8 d6, #13 \n" // B * 0.1016 coefficient
- "vmov.u8 d7, #16 \n" // Add 16 constant
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld3.8 {d0, d1, d2}, [%0]! \n" // load 8 pixels of RAW.
- "subs %2, %2, #8 \n" // 8 processed per loop.
- "vmull.u8 q8, d0, d4 \n" // B
- "vmlal.u8 q8, d1, d5 \n" // G
- "vmlal.u8 q8, d2, d6 \n" // R
- "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y
- "vqadd.u8 d0, d7 \n"
- MEMACCESS(1)
- "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
- "bgt 1b \n"
- : "+r"(src_raw), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
- );
-}
-
-// Bilinear filter 16x2 -> 16x1
-void InterpolateRow_NEON(uint8* dst_ptr,
- const uint8* src_ptr, ptrdiff_t src_stride,
- int dst_width, int source_y_fraction) {
- asm volatile (
- "cmp %4, #0 \n"
- "beq 100f \n"
- "add %2, %1 \n"
- "cmp %4, #64 \n"
- "beq 75f \n"
- "cmp %4, #128 \n"
- "beq 50f \n"
- "cmp %4, #192 \n"
- "beq 25f \n"
-
- "vdup.8 d5, %4 \n"
- "rsb %4, #256 \n"
- "vdup.8 d4, %4 \n"
- // General purpose row blend.
- "1: \n"
- MEMACCESS(1)
- "vld1.8 {q0}, [%1]! \n"
- MEMACCESS(2)
- "vld1.8 {q1}, [%2]! \n"
- "subs %3, %3, #16 \n"
- "vmull.u8 q13, d0, d4 \n"
- "vmull.u8 q14, d1, d4 \n"
- "vmlal.u8 q13, d2, d5 \n"
- "vmlal.u8 q14, d3, d5 \n"
- "vrshrn.u16 d0, q13, #8 \n"
- "vrshrn.u16 d1, q14, #8 \n"
- MEMACCESS(0)
- "vst1.8 {q0}, [%0]! \n"
- "bgt 1b \n"
- "b 99f \n"
-
- // Blend 25 / 75.
- "25: \n"
- MEMACCESS(1)
- "vld1.8 {q0}, [%1]! \n"
- MEMACCESS(2)
- "vld1.8 {q1}, [%2]! \n"
- "subs %3, %3, #16 \n"
- "vrhadd.u8 q0, q1 \n"
- "vrhadd.u8 q0, q1 \n"
- MEMACCESS(0)
- "vst1.8 {q0}, [%0]! \n"
- "bgt 25b \n"
- "b 99f \n"
-
- // Blend 50 / 50.
- "50: \n"
- MEMACCESS(1)
- "vld1.8 {q0}, [%1]! \n"
- MEMACCESS(2)
- "vld1.8 {q1}, [%2]! \n"
- "subs %3, %3, #16 \n"
- "vrhadd.u8 q0, q1 \n"
- MEMACCESS(0)
- "vst1.8 {q0}, [%0]! \n"
- "bgt 50b \n"
- "b 99f \n"
-
- // Blend 75 / 25.
- "75: \n"
- MEMACCESS(1)
- "vld1.8 {q1}, [%1]! \n"
- MEMACCESS(2)
- "vld1.8 {q0}, [%2]! \n"
- "subs %3, %3, #16 \n"
- "vrhadd.u8 q0, q1 \n"
- "vrhadd.u8 q0, q1 \n"
- MEMACCESS(0)
- "vst1.8 {q0}, [%0]! \n"
- "bgt 75b \n"
- "b 99f \n"
-
- // Blend 100 / 0 - Copy row unchanged.
- "100: \n"
- MEMACCESS(1)
- "vld1.8 {q0}, [%1]! \n"
- "subs %3, %3, #16 \n"
- MEMACCESS(0)
- "vst1.8 {q0}, [%0]! \n"
- "bgt 100b \n"
-
- "99: \n"
- : "+r"(dst_ptr), // %0
- "+r"(src_ptr), // %1
- "+r"(src_stride), // %2
- "+r"(dst_width), // %3
- "+r"(source_y_fraction) // %4
- :
- : "cc", "memory", "q0", "q1", "d4", "d5", "q13", "q14"
- );
-}
-
-// dr * (256 - sa) / 256 + sr = dr - dr * sa / 256 + sr
-void ARGBBlendRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
- asm volatile (
- "subs %3, #8 \n"
- "blt 89f \n"
- // Blend 8 pixels.
- "8: \n"
- MEMACCESS(0)
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of ARGB0.
- MEMACCESS(1)
- "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load 8 pixels of ARGB1.
- "subs %3, %3, #8 \n" // 8 processed per loop.
- "vmull.u8 q10, d4, d3 \n" // db * a
- "vmull.u8 q11, d5, d3 \n" // dg * a
- "vmull.u8 q12, d6, d3 \n" // dr * a
- "vqrshrn.u16 d20, q10, #8 \n" // db >>= 8
- "vqrshrn.u16 d21, q11, #8 \n" // dg >>= 8
- "vqrshrn.u16 d22, q12, #8 \n" // dr >>= 8
- "vqsub.u8 q2, q2, q10 \n" // dbg - dbg * a / 256
- "vqsub.u8 d6, d6, d22 \n" // dr - dr * a / 256
- "vqadd.u8 q0, q0, q2 \n" // + sbg
- "vqadd.u8 d2, d2, d6 \n" // + sr
- "vmov.u8 d3, #255 \n" // a = 255
- MEMACCESS(2)
- "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 pixels of ARGB.
- "bge 8b \n"
-
- "89: \n"
- "adds %3, #8-1 \n"
- "blt 99f \n"
-
- // Blend 1 pixels.
- "1: \n"
- MEMACCESS(0)
- "vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [%0]! \n" // load 1 pixel ARGB0.
- MEMACCESS(1)
- "vld4.8 {d4[0],d5[0],d6[0],d7[0]}, [%1]! \n" // load 1 pixel ARGB1.
- "subs %3, %3, #1 \n" // 1 processed per loop.
- "vmull.u8 q10, d4, d3 \n" // db * a
- "vmull.u8 q11, d5, d3 \n" // dg * a
- "vmull.u8 q12, d6, d3 \n" // dr * a
- "vqrshrn.u16 d20, q10, #8 \n" // db >>= 8
- "vqrshrn.u16 d21, q11, #8 \n" // dg >>= 8
- "vqrshrn.u16 d22, q12, #8 \n" // dr >>= 8
- "vqsub.u8 q2, q2, q10 \n" // dbg - dbg * a / 256
- "vqsub.u8 d6, d6, d22 \n" // dr - dr * a / 256
- "vqadd.u8 q0, q0, q2 \n" // + sbg
- "vqadd.u8 d2, d2, d6 \n" // + sr
- "vmov.u8 d3, #255 \n" // a = 255
- MEMACCESS(2)
- "vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [%2]! \n" // store 1 pixel.
- "bge 1b \n"
-
- "99: \n"
-
- : "+r"(src_argb0), // %0
- "+r"(src_argb1), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- :
- : "cc", "memory", "q0", "q1", "q2", "q3", "q10", "q11", "q12"
- );
-}
-
-// Attenuate 8 pixels at a time.
-void ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) {
- asm volatile (
- // Attenuate 8 pixels.
- "1: \n"
- MEMACCESS(0)
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of ARGB.
- "subs %2, %2, #8 \n" // 8 processed per loop.
- "vmull.u8 q10, d0, d3 \n" // b * a
- "vmull.u8 q11, d1, d3 \n" // g * a
- "vmull.u8 q12, d2, d3 \n" // r * a
- "vqrshrn.u16 d0, q10, #8 \n" // b >>= 8
- "vqrshrn.u16 d1, q11, #8 \n" // g >>= 8
- "vqrshrn.u16 d2, q12, #8 \n" // r >>= 8
- MEMACCESS(1)
- "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB.
- "bgt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- :
- : "cc", "memory", "q0", "q1", "q10", "q11", "q12"
- );
-}
-
-// Quantize 8 ARGB pixels (32 bytes).
-// dst = (dst * scale >> 16) * interval_size + interval_offset;
-void ARGBQuantizeRow_NEON(uint8* dst_argb, int scale, int interval_size,
- int interval_offset, int width) {
- asm volatile (
- "vdup.u16 q8, %2 \n"
- "vshr.u16 q8, q8, #1 \n" // scale >>= 1
- "vdup.u16 q9, %3 \n" // interval multiply.
- "vdup.u16 q10, %4 \n" // interval add
-
- // 8 pixel loop.
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld4.8 {d0, d2, d4, d6}, [%0] \n" // load 8 pixels of ARGB.
- "subs %1, %1, #8 \n" // 8 processed per loop.
- "vmovl.u8 q0, d0 \n" // b (0 .. 255)
- "vmovl.u8 q1, d2 \n"
- "vmovl.u8 q2, d4 \n"
- "vqdmulh.s16 q0, q0, q8 \n" // b * scale
- "vqdmulh.s16 q1, q1, q8 \n" // g
- "vqdmulh.s16 q2, q2, q8 \n" // r
- "vmul.u16 q0, q0, q9 \n" // b * interval_size
- "vmul.u16 q1, q1, q9 \n" // g
- "vmul.u16 q2, q2, q9 \n" // r
- "vadd.u16 q0, q0, q10 \n" // b + interval_offset
- "vadd.u16 q1, q1, q10 \n" // g
- "vadd.u16 q2, q2, q10 \n" // r
- "vqmovn.u16 d0, q0 \n"
- "vqmovn.u16 d2, q1 \n"
- "vqmovn.u16 d4, q2 \n"
- MEMACCESS(0)
- "vst4.8 {d0, d2, d4, d6}, [%0]! \n" // store 8 pixels of ARGB.
- "bgt 1b \n"
- : "+r"(dst_argb), // %0
- "+r"(width) // %1
- : "r"(scale), // %2
- "r"(interval_size), // %3
- "r"(interval_offset) // %4
- : "cc", "memory", "q0", "q1", "q2", "q3", "q8", "q9", "q10"
- );
-}
-
-// Shade 8 pixels at a time by specified value.
-// NOTE vqrdmulh.s16 q10, q10, d0[0] must use a scaler register from 0 to 8.
-// Rounding in vqrdmulh does +1 to high if high bit of low s16 is set.
-void ARGBShadeRow_NEON(const uint8* src_argb, uint8* dst_argb, int width,
- uint32 value) {
- asm volatile (
- "vdup.u32 q0, %3 \n" // duplicate scale value.
- "vzip.u8 d0, d1 \n" // d0 aarrggbb.
- "vshr.u16 q0, q0, #1 \n" // scale / 2.
-
- // 8 pixel loop.
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld4.8 {d20, d22, d24, d26}, [%0]! \n" // load 8 pixels of ARGB.
- "subs %2, %2, #8 \n" // 8 processed per loop.
- "vmovl.u8 q10, d20 \n" // b (0 .. 255)
- "vmovl.u8 q11, d22 \n"
- "vmovl.u8 q12, d24 \n"
- "vmovl.u8 q13, d26 \n"
- "vqrdmulh.s16 q10, q10, d0[0] \n" // b * scale * 2
- "vqrdmulh.s16 q11, q11, d0[1] \n" // g
- "vqrdmulh.s16 q12, q12, d0[2] \n" // r
- "vqrdmulh.s16 q13, q13, d0[3] \n" // a
- "vqmovn.u16 d20, q10 \n"
- "vqmovn.u16 d22, q11 \n"
- "vqmovn.u16 d24, q12 \n"
- "vqmovn.u16 d26, q13 \n"
- MEMACCESS(1)
- "vst4.8 {d20, d22, d24, d26}, [%1]! \n" // store 8 pixels of ARGB.
- "bgt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- : "r"(value) // %3
- : "cc", "memory", "q0", "q10", "q11", "q12", "q13"
- );
-}
-
-// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels
-// Similar to ARGBToYJ but stores ARGB.
-// C code is (15 * b + 75 * g + 38 * r + 64) >> 7;
-void ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) {
- asm volatile (
- "vmov.u8 d24, #15 \n" // B * 0.11400 coefficient
- "vmov.u8 d25, #75 \n" // G * 0.58700 coefficient
- "vmov.u8 d26, #38 \n" // R * 0.29900 coefficient
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
- "subs %2, %2, #8 \n" // 8 processed per loop.
- "vmull.u8 q2, d0, d24 \n" // B
- "vmlal.u8 q2, d1, d25 \n" // G
- "vmlal.u8 q2, d2, d26 \n" // R
- "vqrshrun.s16 d0, q2, #7 \n" // 15 bit to 8 bit B
- "vmov d1, d0 \n" // G
- "vmov d2, d0 \n" // R
- MEMACCESS(1)
- "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 ARGB pixels.
- "bgt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- :
- : "cc", "memory", "q0", "q1", "q2", "q12", "q13"
- );
-}
-
-// Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels.
-// b = (r * 35 + g * 68 + b * 17) >> 7
-// g = (r * 45 + g * 88 + b * 22) >> 7
-// r = (r * 50 + g * 98 + b * 24) >> 7
-void ARGBSepiaRow_NEON(uint8* dst_argb, int width) {
- asm volatile (
- "vmov.u8 d20, #17 \n" // BB coefficient
- "vmov.u8 d21, #68 \n" // BG coefficient
- "vmov.u8 d22, #35 \n" // BR coefficient
- "vmov.u8 d24, #22 \n" // GB coefficient
- "vmov.u8 d25, #88 \n" // GG coefficient
- "vmov.u8 d26, #45 \n" // GR coefficient
- "vmov.u8 d28, #24 \n" // BB coefficient
- "vmov.u8 d29, #98 \n" // BG coefficient
- "vmov.u8 d30, #50 \n" // BR coefficient
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld4.8 {d0, d1, d2, d3}, [%0] \n" // load 8 ARGB pixels.
- "subs %1, %1, #8 \n" // 8 processed per loop.
- "vmull.u8 q2, d0, d20 \n" // B to Sepia B
- "vmlal.u8 q2, d1, d21 \n" // G
- "vmlal.u8 q2, d2, d22 \n" // R
- "vmull.u8 q3, d0, d24 \n" // B to Sepia G
- "vmlal.u8 q3, d1, d25 \n" // G
- "vmlal.u8 q3, d2, d26 \n" // R
- "vmull.u8 q8, d0, d28 \n" // B to Sepia R
- "vmlal.u8 q8, d1, d29 \n" // G
- "vmlal.u8 q8, d2, d30 \n" // R
- "vqshrn.u16 d0, q2, #7 \n" // 16 bit to 8 bit B
- "vqshrn.u16 d1, q3, #7 \n" // 16 bit to 8 bit G
- "vqshrn.u16 d2, q8, #7 \n" // 16 bit to 8 bit R
- MEMACCESS(0)
- "vst4.8 {d0, d1, d2, d3}, [%0]! \n" // store 8 ARGB pixels.
- "bgt 1b \n"
- : "+r"(dst_argb), // %0
- "+r"(width) // %1
- :
- : "cc", "memory", "q0", "q1", "q2", "q3",
- "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-// Tranform 8 ARGB pixels (32 bytes) with color matrix.
-// TODO(fbarchard): Was same as Sepia except matrix is provided. This function
-// needs to saturate. Consider doing a non-saturating version.
-void ARGBColorMatrixRow_NEON(const uint8* src_argb, uint8* dst_argb,
- const int8* matrix_argb, int width) {
- asm volatile (
- MEMACCESS(3)
- "vld1.8 {q2}, [%3] \n" // load 3 ARGB vectors.
- "vmovl.s8 q0, d4 \n" // B,G coefficients s16.
- "vmovl.s8 q1, d5 \n" // R,A coefficients s16.
-
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld4.8 {d16, d18, d20, d22}, [%0]! \n" // load 8 ARGB pixels.
- "subs %2, %2, #8 \n" // 8 processed per loop.
- "vmovl.u8 q8, d16 \n" // b (0 .. 255) 16 bit
- "vmovl.u8 q9, d18 \n" // g
- "vmovl.u8 q10, d20 \n" // r
- "vmovl.u8 q11, d22 \n" // a
- "vmul.s16 q12, q8, d0[0] \n" // B = B * Matrix B
- "vmul.s16 q13, q8, d1[0] \n" // G = B * Matrix G
- "vmul.s16 q14, q8, d2[0] \n" // R = B * Matrix R
- "vmul.s16 q15, q8, d3[0] \n" // A = B * Matrix A
- "vmul.s16 q4, q9, d0[1] \n" // B += G * Matrix B
- "vmul.s16 q5, q9, d1[1] \n" // G += G * Matrix G
- "vmul.s16 q6, q9, d2[1] \n" // R += G * Matrix R
- "vmul.s16 q7, q9, d3[1] \n" // A += G * Matrix A
- "vqadd.s16 q12, q12, q4 \n" // Accumulate B
- "vqadd.s16 q13, q13, q5 \n" // Accumulate G
- "vqadd.s16 q14, q14, q6 \n" // Accumulate R
- "vqadd.s16 q15, q15, q7 \n" // Accumulate A
- "vmul.s16 q4, q10, d0[2] \n" // B += R * Matrix B
- "vmul.s16 q5, q10, d1[2] \n" // G += R * Matrix G
- "vmul.s16 q6, q10, d2[2] \n" // R += R * Matrix R
- "vmul.s16 q7, q10, d3[2] \n" // A += R * Matrix A
- "vqadd.s16 q12, q12, q4 \n" // Accumulate B
- "vqadd.s16 q13, q13, q5 \n" // Accumulate G
- "vqadd.s16 q14, q14, q6 \n" // Accumulate R
- "vqadd.s16 q15, q15, q7 \n" // Accumulate A
- "vmul.s16 q4, q11, d0[3] \n" // B += A * Matrix B
- "vmul.s16 q5, q11, d1[3] \n" // G += A * Matrix G
- "vmul.s16 q6, q11, d2[3] \n" // R += A * Matrix R
- "vmul.s16 q7, q11, d3[3] \n" // A += A * Matrix A
- "vqadd.s16 q12, q12, q4 \n" // Accumulate B
- "vqadd.s16 q13, q13, q5 \n" // Accumulate G
- "vqadd.s16 q14, q14, q6 \n" // Accumulate R
- "vqadd.s16 q15, q15, q7 \n" // Accumulate A
- "vqshrun.s16 d16, q12, #6 \n" // 16 bit to 8 bit B
- "vqshrun.s16 d18, q13, #6 \n" // 16 bit to 8 bit G
- "vqshrun.s16 d20, q14, #6 \n" // 16 bit to 8 bit R
- "vqshrun.s16 d22, q15, #6 \n" // 16 bit to 8 bit A
- MEMACCESS(1)
- "vst4.8 {d16, d18, d20, d22}, [%1]! \n" // store 8 ARGB pixels.
- "bgt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- : "r"(matrix_argb) // %3
- : "cc", "memory", "q0", "q1", "q2", "q4", "q5", "q6", "q7", "q8", "q9",
- "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-// TODO(fbarchard): fix vqshrun in ARGBMultiplyRow_NEON and reenable.
-#ifdef HAS_ARGBMULTIPLYROW_NEON
-// Multiply 2 rows of ARGB pixels together, 8 pixels at a time.
-void ARGBMultiplyRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
- asm volatile (
- // 8 pixel loop.
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
- MEMACCESS(1)
- "vld4.8 {d1, d3, d5, d7}, [%1]! \n" // load 8 more ARGB pixels.
- "subs %3, %3, #8 \n" // 8 processed per loop.
- "vmull.u8 q0, d0, d1 \n" // multiply B
- "vmull.u8 q1, d2, d3 \n" // multiply G
- "vmull.u8 q2, d4, d5 \n" // multiply R
- "vmull.u8 q3, d6, d7 \n" // multiply A
- "vrshrn.u16 d0, q0, #8 \n" // 16 bit to 8 bit B
- "vrshrn.u16 d1, q1, #8 \n" // 16 bit to 8 bit G
- "vrshrn.u16 d2, q2, #8 \n" // 16 bit to 8 bit R
- "vrshrn.u16 d3, q3, #8 \n" // 16 bit to 8 bit A
- MEMACCESS(2)
- "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels.
- "bgt 1b \n"
-
- : "+r"(src_argb0), // %0
- "+r"(src_argb1), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- :
- : "cc", "memory", "q0", "q1", "q2", "q3"
- );
-}
-#endif // HAS_ARGBMULTIPLYROW_NEON
-
-// Add 2 rows of ARGB pixels together, 8 pixels at a time.
-void ARGBAddRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
- asm volatile (
- // 8 pixel loop.
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
- MEMACCESS(1)
- "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load 8 more ARGB pixels.
- "subs %3, %3, #8 \n" // 8 processed per loop.
- "vqadd.u8 q0, q0, q2 \n" // add B, G
- "vqadd.u8 q1, q1, q3 \n" // add R, A
- MEMACCESS(2)
- "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels.
- "bgt 1b \n"
-
- : "+r"(src_argb0), // %0
- "+r"(src_argb1), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- :
- : "cc", "memory", "q0", "q1", "q2", "q3"
- );
-}
-
-// Subtract 2 rows of ARGB pixels, 8 pixels at a time.
-void ARGBSubtractRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
- asm volatile (
- // 8 pixel loop.
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
- MEMACCESS(1)
- "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load 8 more ARGB pixels.
- "subs %3, %3, #8 \n" // 8 processed per loop.
- "vqsub.u8 q0, q0, q2 \n" // subtract B, G
- "vqsub.u8 q1, q1, q3 \n" // subtract R, A
- MEMACCESS(2)
- "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels.
- "bgt 1b \n"
-
- : "+r"(src_argb0), // %0
- "+r"(src_argb1), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- :
- : "cc", "memory", "q0", "q1", "q2", "q3"
- );
-}
-
-// Adds Sobel X and Sobel Y and stores Sobel into ARGB.
-// A = 255
-// R = Sobel
-// G = Sobel
-// B = Sobel
-void SobelRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_argb, int width) {
- asm volatile (
- "vmov.u8 d3, #255 \n" // alpha
- // 8 pixel loop.
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld1.8 {d0}, [%0]! \n" // load 8 sobelx.
- MEMACCESS(1)
- "vld1.8 {d1}, [%1]! \n" // load 8 sobely.
- "subs %3, %3, #8 \n" // 8 processed per loop.
- "vqadd.u8 d0, d0, d1 \n" // add
- "vmov.u8 d1, d0 \n"
- "vmov.u8 d2, d0 \n"
- MEMACCESS(2)
- "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels.
- "bgt 1b \n"
- : "+r"(src_sobelx), // %0
- "+r"(src_sobely), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- :
- : "cc", "memory", "q0", "q1"
- );
-}
-
-// Adds Sobel X and Sobel Y and stores Sobel into plane.
-void SobelToPlaneRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_y, int width) {
- asm volatile (
- // 16 pixel loop.
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld1.8 {q0}, [%0]! \n" // load 16 sobelx.
- MEMACCESS(1)
- "vld1.8 {q1}, [%1]! \n" // load 16 sobely.
- "subs %3, %3, #16 \n" // 16 processed per loop.
- "vqadd.u8 q0, q0, q1 \n" // add
- MEMACCESS(2)
- "vst1.8 {q0}, [%2]! \n" // store 16 pixels.
- "bgt 1b \n"
- : "+r"(src_sobelx), // %0
- "+r"(src_sobely), // %1
- "+r"(dst_y), // %2
- "+r"(width) // %3
- :
- : "cc", "memory", "q0", "q1"
- );
-}
-
-// Mixes Sobel X, Sobel Y and Sobel into ARGB.
-// A = 255
-// R = Sobel X
-// G = Sobel
-// B = Sobel Y
-void SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_argb, int width) {
- asm volatile (
- "vmov.u8 d3, #255 \n" // alpha
- // 8 pixel loop.
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld1.8 {d2}, [%0]! \n" // load 8 sobelx.
- MEMACCESS(1)
- "vld1.8 {d0}, [%1]! \n" // load 8 sobely.
- "subs %3, %3, #8 \n" // 8 processed per loop.
- "vqadd.u8 d1, d0, d2 \n" // add
- MEMACCESS(2)
- "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels.
- "bgt 1b \n"
- : "+r"(src_sobelx), // %0
- "+r"(src_sobely), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- :
- : "cc", "memory", "q0", "q1"
- );
-}
-
-// SobelX as a matrix is
-// -1 0 1
-// -2 0 2
-// -1 0 1
-void SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1,
- const uint8* src_y2, uint8* dst_sobelx, int width) {
- asm volatile (
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld1.8 {d0}, [%0],%5 \n" // top
- MEMACCESS(0)
- "vld1.8 {d1}, [%0],%6 \n"
- "vsubl.u8 q0, d0, d1 \n"
- MEMACCESS(1)
- "vld1.8 {d2}, [%1],%5 \n" // center * 2
- MEMACCESS(1)
- "vld1.8 {d3}, [%1],%6 \n"
- "vsubl.u8 q1, d2, d3 \n"
- "vadd.s16 q0, q0, q1 \n"
- "vadd.s16 q0, q0, q1 \n"
- MEMACCESS(2)
- "vld1.8 {d2}, [%2],%5 \n" // bottom
- MEMACCESS(2)
- "vld1.8 {d3}, [%2],%6 \n"
- "subs %4, %4, #8 \n" // 8 pixels
- "vsubl.u8 q1, d2, d3 \n"
- "vadd.s16 q0, q0, q1 \n"
- "vabs.s16 q0, q0 \n"
- "vqmovn.u16 d0, q0 \n"
- MEMACCESS(3)
- "vst1.8 {d0}, [%3]! \n" // store 8 sobelx
- "bgt 1b \n"
- : "+r"(src_y0), // %0
- "+r"(src_y1), // %1
- "+r"(src_y2), // %2
- "+r"(dst_sobelx), // %3
- "+r"(width) // %4
- : "r"(2), // %5
- "r"(6) // %6
- : "cc", "memory", "q0", "q1" // Clobber List
- );
-}
-
-// SobelY as a matrix is
-// -1 -2 -1
-// 0 0 0
-// 1 2 1
-void SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1,
- uint8* dst_sobely, int width) {
- asm volatile (
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld1.8 {d0}, [%0],%4 \n" // left
- MEMACCESS(1)
- "vld1.8 {d1}, [%1],%4 \n"
- "vsubl.u8 q0, d0, d1 \n"
- MEMACCESS(0)
- "vld1.8 {d2}, [%0],%4 \n" // center * 2
- MEMACCESS(1)
- "vld1.8 {d3}, [%1],%4 \n"
- "vsubl.u8 q1, d2, d3 \n"
- "vadd.s16 q0, q0, q1 \n"
- "vadd.s16 q0, q0, q1 \n"
- MEMACCESS(0)
- "vld1.8 {d2}, [%0],%5 \n" // right
- MEMACCESS(1)
- "vld1.8 {d3}, [%1],%5 \n"
- "subs %3, %3, #8 \n" // 8 pixels
- "vsubl.u8 q1, d2, d3 \n"
- "vadd.s16 q0, q0, q1 \n"
- "vabs.s16 q0, q0 \n"
- "vqmovn.u16 d0, q0 \n"
- MEMACCESS(2)
- "vst1.8 {d0}, [%2]! \n" // store 8 sobely
- "bgt 1b \n"
- : "+r"(src_y0), // %0
- "+r"(src_y1), // %1
- "+r"(dst_sobely), // %2
- "+r"(width) // %3
- : "r"(1), // %4
- "r"(6) // %5
- : "cc", "memory", "q0", "q1" // Clobber List
- );
-}
-#endif // defined(__ARM_NEON__) && !defined(__aarch64__)
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/third_party/aom/third_party/libyuv/source/row_neon64.cc b/third_party/aom/third_party/libyuv/source/row_neon64.cc
deleted file mode 100644
index 5d015454b..000000000
--- a/third_party/aom/third_party/libyuv/source/row_neon64.cc
+++ /dev/null
@@ -1,3087 +0,0 @@
-/*
- * Copyright 2014 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// This module is for GCC Neon armv8 64 bit.
-#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
-
-// Read 8 Y, 4 U and 4 V from 422
-#define READYUV422 \
- MEMACCESS(0) \
- "ld1 {v0.8b}, [%0], #8 \n" \
- MEMACCESS(1) \
- "ld1 {v1.s}[0], [%1], #4 \n" \
- MEMACCESS(2) \
- "ld1 {v1.s}[1], [%2], #4 \n"
-
-// Read 8 Y, 2 U and 2 V from 422
-#define READYUV411 \
- MEMACCESS(0) \
- "ld1 {v0.8b}, [%0], #8 \n" \
- MEMACCESS(1) \
- "ld1 {v2.h}[0], [%1], #2 \n" \
- MEMACCESS(2) \
- "ld1 {v2.h}[1], [%2], #2 \n" \
- "zip1 v1.8b, v2.8b, v2.8b \n"
-
-// Read 8 Y, 8 U and 8 V from 444
-#define READYUV444 \
- MEMACCESS(0) \
- "ld1 {v0.8b}, [%0], #8 \n" \
- MEMACCESS(1) \
- "ld1 {v1.d}[0], [%1], #8 \n" \
- MEMACCESS(2) \
- "ld1 {v1.d}[1], [%2], #8 \n" \
- "uaddlp v1.8h, v1.16b \n" \
- "rshrn v1.8b, v1.8h, #1 \n"
-
-// Read 8 Y, and set 4 U and 4 V to 128
-#define READYUV400 \
- MEMACCESS(0) \
- "ld1 {v0.8b}, [%0], #8 \n" \
- "movi v1.8b , #128 \n"
-
-// Read 8 Y and 4 UV from NV12
-#define READNV12 \
- MEMACCESS(0) \
- "ld1 {v0.8b}, [%0], #8 \n" \
- MEMACCESS(1) \
- "ld1 {v2.8b}, [%1], #8 \n" \
- "uzp1 v1.8b, v2.8b, v2.8b \n" \
- "uzp2 v3.8b, v2.8b, v2.8b \n" \
- "ins v1.s[1], v3.s[0] \n"
-
-// Read 8 Y and 4 VU from NV21
-#define READNV21 \
- MEMACCESS(0) \
- "ld1 {v0.8b}, [%0], #8 \n" \
- MEMACCESS(1) \
- "ld1 {v2.8b}, [%1], #8 \n" \
- "uzp1 v3.8b, v2.8b, v2.8b \n" \
- "uzp2 v1.8b, v2.8b, v2.8b \n" \
- "ins v1.s[1], v3.s[0] \n"
-
-// Read 8 YUY2
-#define READYUY2 \
- MEMACCESS(0) \
- "ld2 {v0.8b, v1.8b}, [%0], #16 \n" \
- "uzp2 v3.8b, v1.8b, v1.8b \n" \
- "uzp1 v1.8b, v1.8b, v1.8b \n" \
- "ins v1.s[1], v3.s[0] \n"
-
-// Read 8 UYVY
-#define READUYVY \
- MEMACCESS(0) \
- "ld2 {v2.8b, v3.8b}, [%0], #16 \n" \
- "orr v0.8b, v3.8b, v3.8b \n" \
- "uzp1 v1.8b, v2.8b, v2.8b \n" \
- "uzp2 v3.8b, v2.8b, v2.8b \n" \
- "ins v1.s[1], v3.s[0] \n"
-
-#define YUV422TORGB_SETUP_REG \
- "ld1r {v24.8h}, [%[kUVBiasBGR]], #2 \n" \
- "ld1r {v25.8h}, [%[kUVBiasBGR]], #2 \n" \
- "ld1r {v26.8h}, [%[kUVBiasBGR]] \n" \
- "ld1r {v31.4s}, [%[kYToRgb]] \n" \
- "movi v27.8h, #128 \n" \
- "movi v28.8h, #102 \n" \
- "movi v29.8h, #25 \n" \
- "movi v30.8h, #52 \n"
-
-#define YUV422TORGB(vR, vG, vB) \
- "uxtl v0.8h, v0.8b \n" /* Extract Y */ \
- "shll v2.8h, v1.8b, #8 \n" /* Replicate UV */ \
- "ushll2 v3.4s, v0.8h, #0 \n" /* Y */ \
- "ushll v0.4s, v0.4h, #0 \n" \
- "mul v3.4s, v3.4s, v31.4s \n" \
- "mul v0.4s, v0.4s, v31.4s \n" \
- "sqshrun v0.4h, v0.4s, #16 \n" \
- "sqshrun2 v0.8h, v3.4s, #16 \n" /* Y */ \
- "uaddw v1.8h, v2.8h, v1.8b \n" /* Replicate UV */ \
- "mov v2.d[0], v1.d[1] \n" /* Extract V */ \
- "uxtl v2.8h, v2.8b \n" \
- "uxtl v1.8h, v1.8b \n" /* Extract U */ \
- "mul v3.8h, v1.8h, v27.8h \n" \
- "mul v5.8h, v1.8h, v29.8h \n" \
- "mul v6.8h, v2.8h, v30.8h \n" \
- "mul v7.8h, v2.8h, v28.8h \n" \
- "sqadd v6.8h, v6.8h, v5.8h \n" \
- "sqadd " #vB ".8h, v24.8h, v0.8h \n" /* B */ \
- "sqadd " #vG ".8h, v25.8h, v0.8h \n" /* G */ \
- "sqadd " #vR ".8h, v26.8h, v0.8h \n" /* R */ \
- "sqadd " #vB ".8h, " #vB ".8h, v3.8h \n" /* B */ \
- "sqsub " #vG ".8h, " #vG ".8h, v6.8h \n" /* G */ \
- "sqadd " #vR ".8h, " #vR ".8h, v7.8h \n" /* R */ \
- "sqshrun " #vB ".8b, " #vB ".8h, #6 \n" /* B */ \
- "sqshrun " #vG ".8b, " #vG ".8h, #6 \n" /* G */ \
- "sqshrun " #vR ".8b, " #vR ".8h, #6 \n" /* R */ \
-
-// YUV to RGB conversion constants.
-// Y contribution to R,G,B. Scale and bias.
-#define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
-#define YGB 1160 /* 1.164 * 64 * 16 - adjusted for even error distribution */
-
-// U and V contributions to R,G,B.
-#define UB -128 /* -min(128, round(2.018 * 64)) */
-#define UG 25 /* -round(-0.391 * 64) */
-#define VG 52 /* -round(-0.813 * 64) */
-#define VR -102 /* -round(1.596 * 64) */
-
-// Bias values to subtract 16 from Y and 128 from U and V.
-#define BB (UB * 128 - YGB)
-#define BG (UG * 128 + VG * 128 - YGB)
-#define BR (VR * 128 - YGB)
-
-static vec16 kUVBiasBGR = { BB, BG, BR, 0, 0, 0, 0, 0 };
-static vec32 kYToRgb = { 0x0101 * YG, 0, 0, 0 };
-
-#undef YG
-#undef YGB
-#undef UB
-#undef UG
-#undef VG
-#undef VR
-#undef BB
-#undef BG
-#undef BR
-
-#define RGBTOUV_SETUP_REG \
- "movi v20.8h, #56, lsl #0 \n" /* UB/VR coefficient (0.875) / 2 */ \
- "movi v21.8h, #37, lsl #0 \n" /* UG coefficient (-0.5781) / 2 */ \
- "movi v22.8h, #19, lsl #0 \n" /* UR coefficient (-0.2969) / 2 */ \
- "movi v23.8h, #9, lsl #0 \n" /* VB coefficient (-0.1406) / 2 */ \
- "movi v24.8h, #47, lsl #0 \n" /* VG coefficient (-0.7344) / 2 */ \
- "movi v25.16b, #0x80 \n" /* 128.5 (0x8080 in 16-bit) */
-
-
-#ifdef HAS_I444TOARGBROW_NEON
-void I444ToARGBRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width) {
- asm volatile (
- YUV422TORGB_SETUP_REG
- "1: \n"
- READYUV444
- YUV422TORGB(v22, v21, v20)
- "subs %w4, %w4, #8 \n"
- "movi v23.8b, #255 \n" /* A */
- MEMACCESS(3)
- "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n"
- "b.gt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_argb), // %3
- "+r"(width) // %4
- : [kUVBiasBGR]"r"(&kUVBiasBGR),
- [kYToRgb]"r"(&kYToRgb)
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
- "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
- );
-}
-#endif // HAS_I444TOARGBROW_NEON
-
-#ifdef HAS_I422TOARGBROW_NEON
-void I422ToARGBRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width) {
- asm volatile (
- YUV422TORGB_SETUP_REG
- "1: \n"
- READYUV422
- YUV422TORGB(v22, v21, v20)
- "subs %w4, %w4, #8 \n"
- "movi v23.8b, #255 \n" /* A */
- MEMACCESS(3)
- "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n"
- "b.gt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_argb), // %3
- "+r"(width) // %4
- : [kUVBiasBGR]"r"(&kUVBiasBGR),
- [kYToRgb]"r"(&kYToRgb)
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
- "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
- );
-}
-#endif // HAS_I422TOARGBROW_NEON
-
-#ifdef HAS_I411TOARGBROW_NEON
-void I411ToARGBRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width) {
- asm volatile (
- YUV422TORGB_SETUP_REG
- "1: \n"
- READYUV411
- YUV422TORGB(v22, v21, v20)
- "subs %w4, %w4, #8 \n"
- "movi v23.8b, #255 \n" /* A */
- MEMACCESS(3)
- "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n"
- "b.gt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_argb), // %3
- "+r"(width) // %4
- : [kUVBiasBGR]"r"(&kUVBiasBGR),
- [kYToRgb]"r"(&kYToRgb)
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
- "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
- );
-}
-#endif // HAS_I411TOARGBROW_NEON
-
-#ifdef HAS_I422TOBGRAROW_NEON
-void I422ToBGRARow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_bgra,
- int width) {
- asm volatile (
- YUV422TORGB_SETUP_REG
- "1: \n"
- READYUV422
- YUV422TORGB(v21, v22, v23)
- "subs %w4, %w4, #8 \n"
- "movi v20.8b, #255 \n" /* A */
- MEMACCESS(3)
- "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n"
- "b.gt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_bgra), // %3
- "+r"(width) // %4
- : [kUVBiasBGR]"r"(&kUVBiasBGR),
- [kYToRgb]"r"(&kYToRgb)
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
- "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
- );
-}
-#endif // HAS_I422TOBGRAROW_NEON
-
-#ifdef HAS_I422TOABGRROW_NEON
-void I422ToABGRRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_abgr,
- int width) {
- asm volatile (
- YUV422TORGB_SETUP_REG
- "1: \n"
- READYUV422
- YUV422TORGB(v20, v21, v22)
- "subs %w4, %w4, #8 \n"
- "movi v23.8b, #255 \n" /* A */
- MEMACCESS(3)
- "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n"
- "b.gt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_abgr), // %3
- "+r"(width) // %4
- : [kUVBiasBGR]"r"(&kUVBiasBGR),
- [kYToRgb]"r"(&kYToRgb)
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
- "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
- );
-}
-#endif // HAS_I422TOABGRROW_NEON
-
-#ifdef HAS_I422TORGBAROW_NEON
-void I422ToRGBARow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgba,
- int width) {
- asm volatile (
- YUV422TORGB_SETUP_REG
- "1: \n"
- READYUV422
- YUV422TORGB(v23, v22, v21)
- "subs %w4, %w4, #8 \n"
- "movi v20.8b, #255 \n" /* A */
- MEMACCESS(3)
- "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n"
- "b.gt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_rgba), // %3
- "+r"(width) // %4
- : [kUVBiasBGR]"r"(&kUVBiasBGR),
- [kYToRgb]"r"(&kYToRgb)
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
- "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
- );
-}
-#endif // HAS_I422TORGBAROW_NEON
-
-#ifdef HAS_I422TORGB24ROW_NEON
-void I422ToRGB24Row_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgb24,
- int width) {
- asm volatile (
- YUV422TORGB_SETUP_REG
- "1: \n"
- READYUV422
- YUV422TORGB(v22, v21, v20)
- "subs %w4, %w4, #8 \n"
- MEMACCESS(3)
- "st3 {v20.8b,v21.8b,v22.8b}, [%3], #24 \n"
- "b.gt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_rgb24), // %3
- "+r"(width) // %4
- : [kUVBiasBGR]"r"(&kUVBiasBGR),
- [kYToRgb]"r"(&kYToRgb)
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
- "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
- );
-}
-#endif // HAS_I422TORGB24ROW_NEON
-
-#ifdef HAS_I422TORAWROW_NEON
-void I422ToRAWRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_raw,
- int width) {
- asm volatile (
- YUV422TORGB_SETUP_REG
- "1: \n"
- READYUV422
- YUV422TORGB(v20, v21, v22)
- "subs %w4, %w4, #8 \n"
- MEMACCESS(3)
- "st3 {v20.8b,v21.8b,v22.8b}, [%3], #24 \n"
- "b.gt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_raw), // %3
- "+r"(width) // %4
- : [kUVBiasBGR]"r"(&kUVBiasBGR),
- [kYToRgb]"r"(&kYToRgb)
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
- "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
- );
-}
-#endif // HAS_I422TORAWROW_NEON
-
-#define ARGBTORGB565 \
- "shll v0.8h, v22.8b, #8 \n" /* R */ \
- "shll v20.8h, v20.8b, #8 \n" /* B */ \
- "shll v21.8h, v21.8b, #8 \n" /* G */ \
- "sri v0.8h, v21.8h, #5 \n" /* RG */ \
- "sri v0.8h, v20.8h, #11 \n" /* RGB */
-
-#ifdef HAS_I422TORGB565ROW_NEON
-void I422ToRGB565Row_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgb565,
- int width) {
- asm volatile (
- YUV422TORGB_SETUP_REG
- "1: \n"
- READYUV422
- YUV422TORGB(v22, v21, v20)
- "subs %w4, %w4, #8 \n"
- ARGBTORGB565
- MEMACCESS(3)
- "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels RGB565.
- "b.gt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_rgb565), // %3
- "+r"(width) // %4
- : [kUVBiasBGR]"r"(&kUVBiasBGR),
- [kYToRgb]"r"(&kYToRgb)
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
- "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
- );
-}
-#endif // HAS_I422TORGB565ROW_NEON
-
-#define ARGBTOARGB1555 \
- "shll v0.8h, v23.8b, #8 \n" /* A */ \
- "shll v22.8h, v22.8b, #8 \n" /* R */ \
- "shll v20.8h, v20.8b, #8 \n" /* B */ \
- "shll v21.8h, v21.8b, #8 \n" /* G */ \
- "sri v0.8h, v22.8h, #1 \n" /* AR */ \
- "sri v0.8h, v21.8h, #6 \n" /* ARG */ \
- "sri v0.8h, v20.8h, #11 \n" /* ARGB */
-
-#ifdef HAS_I422TOARGB1555ROW_NEON
-void I422ToARGB1555Row_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb1555,
- int width) {
- asm volatile (
- YUV422TORGB_SETUP_REG
- "1: \n"
- READYUV422
- YUV422TORGB(v22, v21, v20)
- "subs %w4, %w4, #8 \n"
- "movi v23.8b, #255 \n"
- ARGBTOARGB1555
- MEMACCESS(3)
- "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels RGB565.
- "b.gt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_argb1555), // %3
- "+r"(width) // %4
- : [kUVBiasBGR]"r"(&kUVBiasBGR),
- [kYToRgb]"r"(&kYToRgb)
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
- "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
- );
-}
-#endif // HAS_I422TOARGB1555ROW_NEON
-
-#define ARGBTOARGB4444 \
- /* Input v20.8b<=B, v21.8b<=G, v22.8b<=R, v23.8b<=A, v4.8b<=0x0f */ \
- "ushr v20.8b, v20.8b, #4 \n" /* B */ \
- "bic v21.8b, v21.8b, v4.8b \n" /* G */ \
- "ushr v22.8b, v22.8b, #4 \n" /* R */ \
- "bic v23.8b, v23.8b, v4.8b \n" /* A */ \
- "orr v0.8b, v20.8b, v21.8b \n" /* BG */ \
- "orr v1.8b, v22.8b, v23.8b \n" /* RA */ \
- "zip1 v0.16b, v0.16b, v1.16b \n" /* BGRA */
-
-#ifdef HAS_I422TOARGB4444ROW_NEON
-void I422ToARGB4444Row_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb4444,
- int width) {
- asm volatile (
- YUV422TORGB_SETUP_REG
- "movi v4.16b, #0x0f \n" // bits to clear with vbic.
- "1: \n"
- READYUV422
- YUV422TORGB(v22, v21, v20)
- "subs %w4, %w4, #8 \n"
- "movi v23.8b, #255 \n"
- ARGBTOARGB4444
- MEMACCESS(3)
- "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels ARGB4444.
- "b.gt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_argb4444), // %3
- "+r"(width) // %4
- : [kUVBiasBGR]"r"(&kUVBiasBGR),
- [kYToRgb]"r"(&kYToRgb)
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
- "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
- );
-}
-#endif // HAS_I422TOARGB4444ROW_NEON
-
-#ifdef HAS_I400TOARGBROW_NEON
-void I400ToARGBRow_NEON(const uint8* src_y,
- uint8* dst_argb,
- int width) {
- int64 width64 = (int64)(width);
- asm volatile (
- YUV422TORGB_SETUP_REG
- "1: \n"
- READYUV400
- YUV422TORGB(v22, v21, v20)
- "subs %w2, %w2, #8 \n"
- "movi v23.8b, #255 \n"
- MEMACCESS(1)
- "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n"
- "b.gt 1b \n"
- : "+r"(src_y), // %0
- "+r"(dst_argb), // %1
- "+r"(width64) // %2
- : [kUVBiasBGR]"r"(&kUVBiasBGR),
- [kYToRgb]"r"(&kYToRgb)
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
- "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
- );
-}
-#endif // HAS_I400TOARGBROW_NEON
-
-#ifdef HAS_J400TOARGBROW_NEON
-void J400ToARGBRow_NEON(const uint8* src_y,
- uint8* dst_argb,
- int width) {
- asm volatile (
- "movi v23.8b, #255 \n"
- "1: \n"
- MEMACCESS(0)
- "ld1 {v20.8b}, [%0], #8 \n"
- "orr v21.8b, v20.8b, v20.8b \n"
- "orr v22.8b, v20.8b, v20.8b \n"
- "subs %w2, %w2, #8 \n"
- MEMACCESS(1)
- "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n"
- "b.gt 1b \n"
- : "+r"(src_y), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- :
- : "cc", "memory", "v20", "v21", "v22", "v23"
- );
-}
-#endif // HAS_J400TOARGBROW_NEON
-
-#ifdef HAS_NV12TOARGBROW_NEON
-void NV12ToARGBRow_NEON(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
- int width) {
- asm volatile (
- YUV422TORGB_SETUP_REG
- "1: \n"
- READNV12
- YUV422TORGB(v22, v21, v20)
- "subs %w3, %w3, #8 \n"
- "movi v23.8b, #255 \n"
- MEMACCESS(2)
- "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32 \n"
- "b.gt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_uv), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- : [kUVBiasBGR]"r"(&kUVBiasBGR),
- [kYToRgb]"r"(&kYToRgb)
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
- "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
- );
-}
-#endif // HAS_NV12TOARGBROW_NEON
-
-#ifdef HAS_NV21TOARGBROW_NEON
-void NV21ToARGBRow_NEON(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
- int width) {
- asm volatile (
- YUV422TORGB_SETUP_REG
- "1: \n"
- READNV21
- YUV422TORGB(v22, v21, v20)
- "subs %w3, %w3, #8 \n"
- "movi v23.8b, #255 \n"
- MEMACCESS(2)
- "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32 \n"
- "b.gt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_uv), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- : [kUVBiasBGR]"r"(&kUVBiasBGR),
- [kYToRgb]"r"(&kYToRgb)
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
- "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
- );
-}
-#endif // HAS_NV21TOARGBROW_NEON
-
-#ifdef HAS_NV12TORGB565ROW_NEON
-void NV12ToRGB565Row_NEON(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_rgb565,
- int width) {
- asm volatile (
- YUV422TORGB_SETUP_REG
- "1: \n"
- READNV12
- YUV422TORGB(v22, v21, v20)
- "subs %w3, %w3, #8 \n"
- ARGBTORGB565
- MEMACCESS(2)
- "st1 {v0.8h}, [%2], 16 \n" // store 8 pixels RGB565.
- "b.gt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_uv), // %1
- "+r"(dst_rgb565), // %2
- "+r"(width) // %3
- : [kUVBiasBGR]"r"(&kUVBiasBGR),
- [kYToRgb]"r"(&kYToRgb)
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
- "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
- );
-}
-#endif // HAS_NV12TORGB565ROW_NEON
-
-#ifdef HAS_NV21TORGB565ROW_NEON
-void NV21ToRGB565Row_NEON(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_rgb565,
- int width) {
- asm volatile (
- YUV422TORGB_SETUP_REG
- "1: \n"
- READNV21
- YUV422TORGB(v22, v21, v20)
- "subs %w3, %w3, #8 \n"
- ARGBTORGB565
- MEMACCESS(2)
- "st1 {v0.8h}, [%2], 16 \n" // store 8 pixels RGB565.
- "b.gt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_uv), // %1
- "+r"(dst_rgb565), // %2
- "+r"(width) // %3
- : [kUVBiasBGR]"r"(&kUVBiasBGR),
- [kYToRgb]"r"(&kYToRgb)
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
- "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
- );
-}
-#endif // HAS_NV21TORGB565ROW_NEON
-
-#ifdef HAS_YUY2TOARGBROW_NEON
-void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
- uint8* dst_argb,
- int width) {
- int64 width64 = (int64)(width);
- asm volatile (
- YUV422TORGB_SETUP_REG
- "1: \n"
- READYUY2
- YUV422TORGB(v22, v21, v20)
- "subs %w2, %w2, #8 \n"
- "movi v23.8b, #255 \n"
- MEMACCESS(1)
- "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n"
- "b.gt 1b \n"
- : "+r"(src_yuy2), // %0
- "+r"(dst_argb), // %1
- "+r"(width64) // %2
- : [kUVBiasBGR]"r"(&kUVBiasBGR),
- [kYToRgb]"r"(&kYToRgb)
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
- "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
- );
-}
-#endif // HAS_YUY2TOARGBROW_NEON
-
-#ifdef HAS_UYVYTOARGBROW_NEON
-void UYVYToARGBRow_NEON(const uint8* src_uyvy,
- uint8* dst_argb,
- int width) {
- int64 width64 = (int64)(width);
- asm volatile (
- YUV422TORGB_SETUP_REG
- "1: \n"
- READUYVY
- YUV422TORGB(v22, v21, v20)
- "subs %w2, %w2, #8 \n"
- "movi v23.8b, #255 \n"
- MEMACCESS(1)
- "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], 32 \n"
- "b.gt 1b \n"
- : "+r"(src_uyvy), // %0
- "+r"(dst_argb), // %1
- "+r"(width64) // %2
- : [kUVBiasBGR]"r"(&kUVBiasBGR),
- [kYToRgb]"r"(&kYToRgb)
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
- "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
- );
-}
-#endif // HAS_UYVYTOARGBROW_NEON
-
-// Reads 16 pairs of UV and write even values to dst_u and odd to dst_v.
-#ifdef HAS_SPLITUVROW_NEON
-void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
- int width) {
- asm volatile (
- "1: \n"
- MEMACCESS(0)
- "ld2 {v0.16b,v1.16b}, [%0], #32 \n" // load 16 pairs of UV
- "subs %w3, %w3, #16 \n" // 16 processed per loop
- MEMACCESS(1)
- "st1 {v0.16b}, [%1], #16 \n" // store U
- MEMACCESS(2)
- "st1 {v1.16b}, [%2], #16 \n" // store V
- "b.gt 1b \n"
- : "+r"(src_uv), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(width) // %3 // Output registers
- : // Input registers
- : "cc", "memory", "v0", "v1" // Clobber List
- );
-}
-#endif // HAS_SPLITUVROW_NEON
-
-// Reads 16 U's and V's and writes out 16 pairs of UV.
-#ifdef HAS_MERGEUVROW_NEON
-void MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
- int width) {
- asm volatile (
- "1: \n"
- MEMACCESS(0)
- "ld1 {v0.16b}, [%0], #16 \n" // load U
- MEMACCESS(1)
- "ld1 {v1.16b}, [%1], #16 \n" // load V
- "subs %w3, %w3, #16 \n" // 16 processed per loop
- MEMACCESS(2)
- "st2 {v0.16b,v1.16b}, [%2], #32 \n" // store 16 pairs of UV
- "b.gt 1b \n"
- :
- "+r"(src_u), // %0
- "+r"(src_v), // %1
- "+r"(dst_uv), // %2
- "+r"(width) // %3 // Output registers
- : // Input registers
- : "cc", "memory", "v0", "v1" // Clobber List
- );
-}
-#endif // HAS_MERGEUVROW_NEON
-
-// Copy multiple of 32. vld4.8 allow unaligned and is fastest on a15.
-#ifdef HAS_COPYROW_NEON
-void CopyRow_NEON(const uint8* src, uint8* dst, int count) {
- asm volatile (
- "1: \n"
- MEMACCESS(0)
- "ld1 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 32
- "subs %w2, %w2, #32 \n" // 32 processed per loop
- MEMACCESS(1)
- "st1 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store 32
- "b.gt 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(count) // %2 // Output registers
- : // Input registers
- : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
- );
-}
-#endif // HAS_COPYROW_NEON
-
-// SetRow writes 'count' bytes using an 8 bit value repeated.
-void SetRow_NEON(uint8* dst, uint8 v8, int count) {
- asm volatile (
- "dup v0.16b, %w2 \n" // duplicate 16 bytes
- "1: \n"
- "subs %w1, %w1, #16 \n" // 16 bytes per loop
- MEMACCESS(0)
- "st1 {v0.16b}, [%0], #16 \n" // store
- "b.gt 1b \n"
- : "+r"(dst), // %0
- "+r"(count) // %1
- : "r"(v8) // %2
- : "cc", "memory", "v0"
- );
-}
-
-void ARGBSetRow_NEON(uint8* dst, uint32 v32, int count) {
- asm volatile (
- "dup v0.4s, %w2 \n" // duplicate 4 ints
- "1: \n"
- "subs %w1, %w1, #4 \n" // 4 ints per loop
- MEMACCESS(0)
- "st1 {v0.16b}, [%0], #16 \n" // store
- "b.gt 1b \n"
- : "+r"(dst), // %0
- "+r"(count) // %1
- : "r"(v32) // %2
- : "cc", "memory", "v0"
- );
-}
-
-#ifdef HAS_MIRRORROW_NEON
-void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
- int64 width64 = (int64) width;
- asm volatile (
- // Start at end of source row.
- "add %0, %0, %2 \n"
- "sub %0, %0, #16 \n"
-
- "1: \n"
- MEMACCESS(0)
- "ld1 {v0.16b}, [%0], %3 \n" // src -= 16
- "subs %2, %2, #16 \n" // 16 pixels per loop.
- "rev64 v0.16b, v0.16b \n"
- MEMACCESS(1)
- "st1 {v0.D}[1], [%1], #8 \n" // dst += 16
- MEMACCESS(1)
- "st1 {v0.D}[0], [%1], #8 \n"
- "b.gt 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(width64) // %2
- : "r"((ptrdiff_t)-16) // %3
- : "cc", "memory", "v0"
- );
-}
-#endif // HAS_MIRRORROW_NEON
-
-#ifdef HAS_MIRRORUVROW_NEON
-void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
- int width) {
- int64 width64 = (int64) width;
- asm volatile (
- // Start at end of source row.
- "add %0, %0, %3, lsl #1 \n"
- "sub %0, %0, #16 \n"
-
- "1: \n"
- MEMACCESS(0)
- "ld2 {v0.8b, v1.8b}, [%0], %4 \n" // src -= 16
- "subs %3, %3, #8 \n" // 8 pixels per loop.
- "rev64 v0.8b, v0.8b \n"
- "rev64 v1.8b, v1.8b \n"
- MEMACCESS(1)
- "st1 {v0.8b}, [%1], #8 \n" // dst += 8
- MEMACCESS(2)
- "st1 {v1.8b}, [%2], #8 \n"
- "b.gt 1b \n"
- : "+r"(src_uv), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(width64) // %3
- : "r"((ptrdiff_t)-16) // %4
- : "cc", "memory", "v0", "v1"
- );
-}
-#endif // HAS_MIRRORUVROW_NEON
-
-#ifdef HAS_ARGBMIRRORROW_NEON
-void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width) {
- int64 width64 = (int64) width;
- asm volatile (
- // Start at end of source row.
- "add %0, %0, %2, lsl #2 \n"
- "sub %0, %0, #16 \n"
-
- "1: \n"
- MEMACCESS(0)
- "ld1 {v0.16b}, [%0], %3 \n" // src -= 16
- "subs %2, %2, #4 \n" // 4 pixels per loop.
- "rev64 v0.4s, v0.4s \n"
- MEMACCESS(1)
- "st1 {v0.D}[1], [%1], #8 \n" // dst += 16
- MEMACCESS(1)
- "st1 {v0.D}[0], [%1], #8 \n"
- "b.gt 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(width64) // %2
- : "r"((ptrdiff_t)-16) // %3
- : "cc", "memory", "v0"
- );
-}
-#endif // HAS_ARGBMIRRORROW_NEON
-
-#ifdef HAS_RGB24TOARGBROW_NEON
-void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix) {
- asm volatile (
- "movi v4.8b, #255 \n" // Alpha
- "1: \n"
- MEMACCESS(0)
- "ld3 {v1.8b,v2.8b,v3.8b}, [%0], #24 \n" // load 8 pixels of RGB24.
- "subs %w2, %w2, #8 \n" // 8 processed per loop.
- MEMACCESS(1)
- "st4 {v1.8b,v2.8b,v3.8b,v4.8b}, [%1], #32 \n" // store 8 ARGB pixels
- "b.gt 1b \n"
- : "+r"(src_rgb24), // %0
- "+r"(dst_argb), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "v1", "v2", "v3", "v4" // Clobber List
- );
-}
-#endif // HAS_RGB24TOARGBROW_NEON
-
-#ifdef HAS_RAWTOARGBROW_NEON
-void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int pix) {
- asm volatile (
- "movi v5.8b, #255 \n" // Alpha
- "1: \n"
- MEMACCESS(0)
- "ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // read r g b
- "subs %w2, %w2, #8 \n" // 8 processed per loop.
- "orr v3.8b, v1.8b, v1.8b \n" // move g
- "orr v4.8b, v0.8b, v0.8b \n" // move r
- MEMACCESS(1)
- "st4 {v2.8b,v3.8b,v4.8b,v5.8b}, [%1], #32 \n" // store b g r a
- "b.gt 1b \n"
- : "+r"(src_raw), // %0
- "+r"(dst_argb), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5" // Clobber List
- );
-}
-#endif // HAS_RAWTOARGBROW_NEON
-
-#define RGB565TOARGB \
- "shrn v6.8b, v0.8h, #5 \n" /* G xxGGGGGG */ \
- "shl v6.8b, v6.8b, #2 \n" /* G GGGGGG00 upper 6 */ \
- "ushr v4.8b, v6.8b, #6 \n" /* G 000000GG lower 2 */ \
- "orr v1.8b, v4.8b, v6.8b \n" /* G */ \
- "xtn v2.8b, v0.8h \n" /* B xxxBBBBB */ \
- "ushr v0.8h, v0.8h, #11 \n" /* R 000RRRRR */ \
- "xtn2 v2.16b,v0.8h \n" /* R in upper part */ \
- "shl v2.16b, v2.16b, #3 \n" /* R,B BBBBB000 upper 5 */ \
- "ushr v0.16b, v2.16b, #5 \n" /* R,B 00000BBB lower 3 */ \
- "orr v0.16b, v0.16b, v2.16b \n" /* R,B */ \
- "dup v2.2D, v0.D[1] \n" /* R */
-
-#ifdef HAS_RGB565TOARGBROW_NEON
-void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int pix) {
- asm volatile (
- "movi v3.8b, #255 \n" // Alpha
- "1: \n"
- MEMACCESS(0)
- "ld1 {v0.16b}, [%0], #16 \n" // load 8 RGB565 pixels.
- "subs %w2, %w2, #8 \n" // 8 processed per loop.
- RGB565TOARGB
- MEMACCESS(1)
- "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store 8 ARGB pixels
- "b.gt 1b \n"
- : "+r"(src_rgb565), // %0
- "+r"(dst_argb), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v6" // Clobber List
- );
-}
-#endif // HAS_RGB565TOARGBROW_NEON
-
-#define ARGB1555TOARGB \
- "ushr v2.8h, v0.8h, #10 \n" /* R xxxRRRRR */ \
- "shl v2.8h, v2.8h, #3 \n" /* R RRRRR000 upper 5 */ \
- "xtn v3.8b, v2.8h \n" /* RRRRR000 AAAAAAAA */ \
- \
- "sshr v2.8h, v0.8h, #15 \n" /* A AAAAAAAA */ \
- "xtn2 v3.16b, v2.8h \n" \
- \
- "xtn v2.8b, v0.8h \n" /* B xxxBBBBB */ \
- "shrn2 v2.16b,v0.8h, #5 \n" /* G xxxGGGGG */ \
- \
- "ushr v1.16b, v3.16b, #5 \n" /* R,A 00000RRR lower 3 */ \
- "shl v0.16b, v2.16b, #3 \n" /* B,G BBBBB000 upper 5 */ \
- "ushr v2.16b, v0.16b, #5 \n" /* B,G 00000BBB lower 3 */ \
- \
- "orr v0.16b, v0.16b, v2.16b \n" /* B,G */ \
- "orr v2.16b, v1.16b, v3.16b \n" /* R,A */ \
- "dup v1.2D, v0.D[1] \n" \
- "dup v3.2D, v2.D[1] \n"
-
-// RGB555TOARGB is same as ARGB1555TOARGB but ignores alpha.
-#define RGB555TOARGB \
- "ushr v2.8h, v0.8h, #10 \n" /* R xxxRRRRR */ \
- "shl v2.8h, v2.8h, #3 \n" /* R RRRRR000 upper 5 */ \
- "xtn v3.8b, v2.8h \n" /* RRRRR000 */ \
- \
- "xtn v2.8b, v0.8h \n" /* B xxxBBBBB */ \
- "shrn2 v2.16b,v0.8h, #5 \n" /* G xxxGGGGG */ \
- \
- "ushr v1.16b, v3.16b, #5 \n" /* R 00000RRR lower 3 */ \
- "shl v0.16b, v2.16b, #3 \n" /* B,G BBBBB000 upper 5 */ \
- "ushr v2.16b, v0.16b, #5 \n" /* B,G 00000BBB lower 3 */ \
- \
- "orr v0.16b, v0.16b, v2.16b \n" /* B,G */ \
- "orr v2.16b, v1.16b, v3.16b \n" /* R */ \
- "dup v1.2D, v0.D[1] \n" /* G */ \
-
-#ifdef HAS_ARGB1555TOARGBROW_NEON
-void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, uint8* dst_argb,
- int pix) {
- asm volatile (
- "movi v3.8b, #255 \n" // Alpha
- "1: \n"
- MEMACCESS(0)
- "ld1 {v0.16b}, [%0], #16 \n" // load 8 ARGB1555 pixels.
- "subs %w2, %w2, #8 \n" // 8 processed per loop.
- ARGB1555TOARGB
- MEMACCESS(1)
- "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store 8 ARGB pixels
- "b.gt 1b \n"
- : "+r"(src_argb1555), // %0
- "+r"(dst_argb), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
- );
-}
-#endif // HAS_ARGB1555TOARGBROW_NEON
-
-#define ARGB4444TOARGB \
- "shrn v1.8b, v0.8h, #8 \n" /* v1(l) AR */ \
- "xtn2 v1.16b, v0.8h \n" /* v1(h) GB */ \
- "shl v2.16b, v1.16b, #4 \n" /* B,R BBBB0000 */ \
- "ushr v3.16b, v1.16b, #4 \n" /* G,A 0000GGGG */ \
- "ushr v0.16b, v2.16b, #4 \n" /* B,R 0000BBBB */ \
- "shl v1.16b, v3.16b, #4 \n" /* G,A GGGG0000 */ \
- "orr v2.16b, v0.16b, v2.16b \n" /* B,R BBBBBBBB */ \
- "orr v3.16b, v1.16b, v3.16b \n" /* G,A GGGGGGGG */ \
- "dup v0.2D, v2.D[1] \n" \
- "dup v1.2D, v3.D[1] \n"
-
-#ifdef HAS_ARGB4444TOARGBROW_NEON
-void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, uint8* dst_argb,
- int pix) {
- asm volatile (
- "1: \n"
- MEMACCESS(0)
- "ld1 {v0.16b}, [%0], #16 \n" // load 8 ARGB4444 pixels.
- "subs %w2, %w2, #8 \n" // 8 processed per loop.
- ARGB4444TOARGB
- MEMACCESS(1)
- "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store 8 ARGB pixels
- "b.gt 1b \n"
- : "+r"(src_argb4444), // %0
- "+r"(dst_argb), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4" // Clobber List
- );
-}
-#endif // HAS_ARGB4444TOARGBROW_NEON
-
-#ifdef HAS_ARGBTORGB24ROW_NEON
-void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb24, int pix) {
- asm volatile (
- "1: \n"
- MEMACCESS(0)
- "ld4 {v1.8b,v2.8b,v3.8b,v4.8b}, [%0], #32 \n" // load 8 ARGB pixels
- "subs %w2, %w2, #8 \n" // 8 processed per loop.
- MEMACCESS(1)
- "st3 {v1.8b,v2.8b,v3.8b}, [%1], #24 \n" // store 8 pixels of RGB24.
- "b.gt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_rgb24), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "v1", "v2", "v3", "v4" // Clobber List
- );
-}
-#endif // HAS_ARGBTORGB24ROW_NEON
-
-#ifdef HAS_ARGBTORAWROW_NEON
-void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_raw, int pix) {
- asm volatile (
- "1: \n"
- MEMACCESS(0)
- "ld4 {v1.8b,v2.8b,v3.8b,v4.8b}, [%0], #32 \n" // load b g r a
- "subs %w2, %w2, #8 \n" // 8 processed per loop.
- "orr v4.8b, v2.8b, v2.8b \n" // mov g
- "orr v5.8b, v1.8b, v1.8b \n" // mov b
- MEMACCESS(1)
- "st3 {v3.8b,v4.8b,v5.8b}, [%1], #24 \n" // store r g b
- "b.gt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_raw), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "v1", "v2", "v3", "v4", "v5" // Clobber List
- );
-}
-#endif // HAS_ARGBTORAWROW_NEON
-
-#ifdef HAS_YUY2TOYROW_NEON
-void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int pix) {
- asm volatile (
- "1: \n"
- MEMACCESS(0)
- "ld2 {v0.16b,v1.16b}, [%0], #32 \n" // load 16 pixels of YUY2.
- "subs %w2, %w2, #16 \n" // 16 processed per loop.
- MEMACCESS(1)
- "st1 {v0.16b}, [%1], #16 \n" // store 16 pixels of Y.
- "b.gt 1b \n"
- : "+r"(src_yuy2), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "v0", "v1" // Clobber List
- );
-}
-#endif // HAS_YUY2TOYROW_NEON
-
-#ifdef HAS_UYVYTOYROW_NEON
-void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int pix) {
- asm volatile (
- "1: \n"
- MEMACCESS(0)
- "ld2 {v0.16b,v1.16b}, [%0], #32 \n" // load 16 pixels of UYVY.
- "subs %w2, %w2, #16 \n" // 16 processed per loop.
- MEMACCESS(1)
- "st1 {v1.16b}, [%1], #16 \n" // store 16 pixels of Y.
- "b.gt 1b \n"
- : "+r"(src_uyvy), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "v0", "v1" // Clobber List
- );
-}
-#endif // HAS_UYVYTOYROW_NEON
-
-#ifdef HAS_YUY2TOUV422ROW_NEON
-void YUY2ToUV422Row_NEON(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v,
- int pix) {
- asm volatile (
- "1: \n"
- MEMACCESS(0)
- "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 16 YUY2 pixels
- "subs %w3, %w3, #16 \n" // 16 pixels = 8 UVs.
- MEMACCESS(1)
- "st1 {v1.8b}, [%1], #8 \n" // store 8 U.
- MEMACCESS(2)
- "st1 {v3.8b}, [%2], #8 \n" // store 8 V.
- "b.gt 1b \n"
- : "+r"(src_yuy2), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(pix) // %3
- :
- : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
- );
-}
-#endif // HAS_YUY2TOUV422ROW_NEON
-
-#ifdef HAS_UYVYTOUV422ROW_NEON
-void UYVYToUV422Row_NEON(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v,
- int pix) {
- asm volatile (
- "1: \n"
- MEMACCESS(0)
- "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 16 UYVY pixels
- "subs %w3, %w3, #16 \n" // 16 pixels = 8 UVs.
- MEMACCESS(1)
- "st1 {v0.8b}, [%1], #8 \n" // store 8 U.
- MEMACCESS(2)
- "st1 {v2.8b}, [%2], #8 \n" // store 8 V.
- "b.gt 1b \n"
- : "+r"(src_uyvy), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(pix) // %3
- :
- : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
- );
-}
-#endif // HAS_UYVYTOUV422ROW_NEON
-
-#ifdef HAS_YUY2TOUVROW_NEON
-void YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2,
- uint8* dst_u, uint8* dst_v, int pix) {
- const uint8* src_yuy2b = src_yuy2 + stride_yuy2;
- asm volatile (
- "1: \n"
- MEMACCESS(0)
- "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 16 pixels
- "subs %w4, %w4, #16 \n" // 16 pixels = 8 UVs.
- MEMACCESS(1)
- "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%1], #32 \n" // load next row
- "urhadd v1.8b, v1.8b, v5.8b \n" // average rows of U
- "urhadd v3.8b, v3.8b, v7.8b \n" // average rows of V
- MEMACCESS(2)
- "st1 {v1.8b}, [%2], #8 \n" // store 8 U.
- MEMACCESS(3)
- "st1 {v3.8b}, [%3], #8 \n" // store 8 V.
- "b.gt 1b \n"
- : "+r"(src_yuy2), // %0
- "+r"(src_yuy2b), // %1
- "+r"(dst_u), // %2
- "+r"(dst_v), // %3
- "+r"(pix) // %4
- :
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4",
- "v5", "v6", "v7" // Clobber List
- );
-}
-#endif // HAS_YUY2TOUVROW_NEON
-
-#ifdef HAS_UYVYTOUVROW_NEON
-void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy,
- uint8* dst_u, uint8* dst_v, int pix) {
- const uint8* src_uyvyb = src_uyvy + stride_uyvy;
- asm volatile (
- "1: \n"
- MEMACCESS(0)
- "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 16 pixels
- "subs %w4, %w4, #16 \n" // 16 pixels = 8 UVs.
- MEMACCESS(1)
- "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%1], #32 \n" // load next row
- "urhadd v0.8b, v0.8b, v4.8b \n" // average rows of U
- "urhadd v2.8b, v2.8b, v6.8b \n" // average rows of V
- MEMACCESS(2)
- "st1 {v0.8b}, [%2], #8 \n" // store 8 U.
- MEMACCESS(3)
- "st1 {v2.8b}, [%3], #8 \n" // store 8 V.
- "b.gt 1b \n"
- : "+r"(src_uyvy), // %0
- "+r"(src_uyvyb), // %1
- "+r"(dst_u), // %2
- "+r"(dst_v), // %3
- "+r"(pix) // %4
- :
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4",
- "v5", "v6", "v7" // Clobber List
- );
-}
-#endif // HAS_UYVYTOUVROW_NEON
-
-// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
-#ifdef HAS_ARGBSHUFFLEROW_NEON
-void ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix) {
- asm volatile (
- MEMACCESS(3)
- "ld1 {v2.16b}, [%3] \n" // shuffler
- "1: \n"
- MEMACCESS(0)
- "ld1 {v0.16b}, [%0], #16 \n" // load 4 pixels.
- "subs %w2, %w2, #4 \n" // 4 processed per loop
- "tbl v1.16b, {v0.16b}, v2.16b \n" // look up 4 pixels
- MEMACCESS(1)
- "st1 {v1.16b}, [%1], #16 \n" // store 4.
- "b.gt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(pix) // %2
- : "r"(shuffler) // %3
- : "cc", "memory", "v0", "v1", "v2" // Clobber List
- );
-}
-#endif // HAS_ARGBSHUFFLEROW_NEON
-
-#ifdef HAS_I422TOYUY2ROW_NEON
-void I422ToYUY2Row_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_yuy2, int width) {
- asm volatile (
- "1: \n"
- MEMACCESS(0)
- "ld2 {v0.8b, v1.8b}, [%0], #16 \n" // load 16 Ys
- "orr v2.8b, v1.8b, v1.8b \n"
- MEMACCESS(1)
- "ld1 {v1.8b}, [%1], #8 \n" // load 8 Us
- MEMACCESS(2)
- "ld1 {v3.8b}, [%2], #8 \n" // load 8 Vs
- "subs %w4, %w4, #16 \n" // 16 pixels
- MEMACCESS(3)
- "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%3], #32 \n" // Store 16 pixels.
- "b.gt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_yuy2), // %3
- "+r"(width) // %4
- :
- : "cc", "memory", "v0", "v1", "v2", "v3"
- );
-}
-#endif // HAS_I422TOYUY2ROW_NEON
-
-#ifdef HAS_I422TOUYVYROW_NEON
-void I422ToUYVYRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_uyvy, int width) {
- asm volatile (
- "1: \n"
- MEMACCESS(0)
- "ld2 {v1.8b,v2.8b}, [%0], #16 \n" // load 16 Ys
- "orr v3.8b, v2.8b, v2.8b \n"
- MEMACCESS(1)
- "ld1 {v0.8b}, [%1], #8 \n" // load 8 Us
- MEMACCESS(2)
- "ld1 {v2.8b}, [%2], #8 \n" // load 8 Vs
- "subs %w4, %w4, #16 \n" // 16 pixels
- MEMACCESS(3)
- "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%3], #32 \n" // Store 16 pixels.
- "b.gt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_u), // %1
- "+r"(src_v), // %2
- "+r"(dst_uyvy), // %3
- "+r"(width) // %4
- :
- : "cc", "memory", "v0", "v1", "v2", "v3"
- );
-}
-#endif // HAS_I422TOUYVYROW_NEON
-
-#ifdef HAS_ARGBTORGB565ROW_NEON
-void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb565, int pix) {
- asm volatile (
- "1: \n"
- MEMACCESS(0)
- "ld4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%0], #32 \n" // load 8 pixels
- "subs %w2, %w2, #8 \n" // 8 processed per loop.
- ARGBTORGB565
- MEMACCESS(1)
- "st1 {v0.16b}, [%1], #16 \n" // store 8 pixels RGB565.
- "b.gt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_rgb565), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "v0", "v20", "v21", "v22", "v23"
- );
-}
-#endif // HAS_ARGBTORGB565ROW_NEON
-
-#ifdef HAS_ARGBTORGB565DITHERROW_NEON
-void ARGBToRGB565DitherRow_NEON(const uint8* src_argb, uint8* dst_rgb,
- const uint32 dither4, int width) {
- asm volatile (
- "dup v1.4s, %w2 \n" // dither4
- "1: \n"
- MEMACCESS(1)
- "ld4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n" // load 8 pixels
- "subs %w3, %w3, #8 \n" // 8 processed per loop.
- "uqadd v20.8b, v20.8b, v1.8b \n"
- "uqadd v21.8b, v21.8b, v1.8b \n"
- "uqadd v22.8b, v22.8b, v1.8b \n"
- ARGBTORGB565
- MEMACCESS(0)
- "st1 {v0.16b}, [%0], #16 \n" // store 8 pixels RGB565.
- "b.gt 1b \n"
- : "+r"(dst_rgb) // %0
- : "r"(src_argb), // %1
- "r"(dither4), // %2
- "r"(width) // %3
- : "cc", "memory", "v0", "v1", "v20", "v21", "v22", "v23"
- );
-}
-#endif // HAS_ARGBTORGB565ROW_NEON
-
-#ifdef HAS_ARGBTOARGB1555ROW_NEON
-void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_argb1555,
- int pix) {
- asm volatile (
- "1: \n"
- MEMACCESS(0)
- "ld4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%0], #32 \n" // load 8 pixels
- "subs %w2, %w2, #8 \n" // 8 processed per loop.
- ARGBTOARGB1555
- MEMACCESS(1)
- "st1 {v0.16b}, [%1], #16 \n" // store 8 pixels ARGB1555.
- "b.gt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb1555), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "v0", "v20", "v21", "v22", "v23"
- );
-}
-#endif // HAS_ARGBTOARGB1555ROW_NEON
-
-#ifdef HAS_ARGBTOARGB4444ROW_NEON
-void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_argb4444,
- int pix) {
- asm volatile (
- "movi v4.16b, #0x0f \n" // bits to clear with vbic.
- "1: \n"
- MEMACCESS(0)
- "ld4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%0], #32 \n" // load 8 pixels
- "subs %w2, %w2, #8 \n" // 8 processed per loop.
- ARGBTOARGB4444
- MEMACCESS(1)
- "st1 {v0.16b}, [%1], #16 \n" // store 8 pixels ARGB4444.
- "b.gt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb4444), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "v0", "v1", "v4", "v20", "v21", "v22", "v23"
- );
-}
-#endif // HAS_ARGBTOARGB4444ROW_NEON
-
-#ifdef HAS_ARGBTOYROW_NEON
-void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
- asm volatile (
- "movi v4.8b, #13 \n" // B * 0.1016 coefficient
- "movi v5.8b, #65 \n" // G * 0.5078 coefficient
- "movi v6.8b, #33 \n" // R * 0.2578 coefficient
- "movi v7.8b, #16 \n" // Add 16 constant
- "1: \n"
- MEMACCESS(0)
- "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB pixels.
- "subs %w2, %w2, #8 \n" // 8 processed per loop.
- "umull v3.8h, v0.8b, v4.8b \n" // B
- "umlal v3.8h, v1.8b, v5.8b \n" // G
- "umlal v3.8h, v2.8b, v6.8b \n" // R
- "sqrshrun v0.8b, v3.8h, #7 \n" // 16 bit to 8 bit Y
- "uqadd v0.8b, v0.8b, v7.8b \n"
- MEMACCESS(1)
- "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y.
- "b.gt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"
- );
-}
-#endif // HAS_ARGBTOYROW_NEON
-
-#ifdef HAS_ARGBTOYJROW_NEON
-void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
- asm volatile (
- "movi v4.8b, #15 \n" // B * 0.11400 coefficient
- "movi v5.8b, #75 \n" // G * 0.58700 coefficient
- "movi v6.8b, #38 \n" // R * 0.29900 coefficient
- "1: \n"
- MEMACCESS(0)
- "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB pixels.
- "subs %w2, %w2, #8 \n" // 8 processed per loop.
- "umull v3.8h, v0.8b, v4.8b \n" // B
- "umlal v3.8h, v1.8b, v5.8b \n" // G
- "umlal v3.8h, v2.8b, v6.8b \n" // R
- "sqrshrun v0.8b, v3.8h, #7 \n" // 15 bit to 8 bit Y
- MEMACCESS(1)
- "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y.
- "b.gt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6"
- );
-}
-#endif // HAS_ARGBTOYJROW_NEON
-
-// 8x1 pixels.
-#ifdef HAS_ARGBTOUV444ROW_NEON
-void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int pix) {
- asm volatile (
- "movi v24.8b, #112 \n" // UB / VR 0.875 coefficient
- "movi v25.8b, #74 \n" // UG -0.5781 coefficient
- "movi v26.8b, #38 \n" // UR -0.2969 coefficient
- "movi v27.8b, #18 \n" // VB -0.1406 coefficient
- "movi v28.8b, #94 \n" // VG -0.7344 coefficient
- "movi v29.16b,#0x80 \n" // 128.5
- "1: \n"
- MEMACCESS(0)
- "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB pixels.
- "subs %w3, %w3, #8 \n" // 8 processed per loop.
- "umull v4.8h, v0.8b, v24.8b \n" // B
- "umlsl v4.8h, v1.8b, v25.8b \n" // G
- "umlsl v4.8h, v2.8b, v26.8b \n" // R
- "add v4.8h, v4.8h, v29.8h \n" // +128 -> unsigned
-
- "umull v3.8h, v2.8b, v24.8b \n" // R
- "umlsl v3.8h, v1.8b, v28.8b \n" // G
- "umlsl v3.8h, v0.8b, v27.8b \n" // B
- "add v3.8h, v3.8h, v29.8h \n" // +128 -> unsigned
-
- "uqshrn v0.8b, v4.8h, #8 \n" // 16 bit to 8 bit U
- "uqshrn v1.8b, v3.8h, #8 \n" // 16 bit to 8 bit V
-
- MEMACCESS(1)
- "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels U.
- MEMACCESS(2)
- "st1 {v1.8b}, [%2], #8 \n" // store 8 pixels V.
- "b.gt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(pix) // %3
- :
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4",
- "v24", "v25", "v26", "v27", "v28", "v29"
- );
-}
-#endif // HAS_ARGBTOUV444ROW_NEON
-
-// 16x1 pixels -> 8x1. pix is number of argb pixels. e.g. 16.
-#ifdef HAS_ARGBTOUV422ROW_NEON
-void ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int pix) {
- asm volatile (
- RGBTOUV_SETUP_REG
- "1: \n"
- MEMACCESS(0)
- "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels.
-
- "uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts.
- "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts.
- "uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts.
-
- "subs %w3, %w3, #16 \n" // 16 processed per loop.
- "mul v3.8h, v0.8h, v20.8h \n" // B
- "mls v3.8h, v1.8h, v21.8h \n" // G
- "mls v3.8h, v2.8h, v22.8h \n" // R
- "add v3.8h, v3.8h, v25.8h \n" // +128 -> unsigned
-
- "mul v4.8h, v2.8h, v20.8h \n" // R
- "mls v4.8h, v1.8h, v24.8h \n" // G
- "mls v4.8h, v0.8h, v23.8h \n" // B
- "add v4.8h, v4.8h, v25.8h \n" // +128 -> unsigned
-
- "uqshrn v0.8b, v3.8h, #8 \n" // 16 bit to 8 bit U
- "uqshrn v1.8b, v4.8h, #8 \n" // 16 bit to 8 bit V
-
- MEMACCESS(1)
- "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels U.
- MEMACCESS(2)
- "st1 {v1.8b}, [%2], #8 \n" // store 8 pixels V.
- "b.gt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(pix) // %3
- :
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
- "v20", "v21", "v22", "v23", "v24", "v25"
- );
-}
-#endif // HAS_ARGBTOUV422ROW_NEON
-
-// 32x1 pixels -> 8x1. pix is number of argb pixels. e.g. 32.
-#ifdef HAS_ARGBTOUV411ROW_NEON
-void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
- int pix) {
- asm volatile (
- RGBTOUV_SETUP_REG
- "1: \n"
- MEMACCESS(0)
- "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels.
- "uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts.
- "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts.
- "uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts.
- MEMACCESS(0)
- "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%0], #64 \n" // load next 16.
- "uaddlp v4.8h, v4.16b \n" // B 16 bytes -> 8 shorts.
- "uaddlp v5.8h, v5.16b \n" // G 16 bytes -> 8 shorts.
- "uaddlp v6.8h, v6.16b \n" // R 16 bytes -> 8 shorts.
-
- "addp v0.8h, v0.8h, v4.8h \n" // B 16 shorts -> 8 shorts.
- "addp v1.8h, v1.8h, v5.8h \n" // G 16 shorts -> 8 shorts.
- "addp v2.8h, v2.8h, v6.8h \n" // R 16 shorts -> 8 shorts.
-
- "urshr v0.8h, v0.8h, #1 \n" // 2x average
- "urshr v1.8h, v1.8h, #1 \n"
- "urshr v2.8h, v2.8h, #1 \n"
-
- "subs %w3, %w3, #32 \n" // 32 processed per loop.
- "mul v3.8h, v0.8h, v20.8h \n" // B
- "mls v3.8h, v1.8h, v21.8h \n" // G
- "mls v3.8h, v2.8h, v22.8h \n" // R
- "add v3.8h, v3.8h, v25.8h \n" // +128 -> unsigned
- "mul v4.8h, v2.8h, v20.8h \n" // R
- "mls v4.8h, v1.8h, v24.8h \n" // G
- "mls v4.8h, v0.8h, v23.8h \n" // B
- "add v4.8h, v4.8h, v25.8h \n" // +128 -> unsigned
- "uqshrn v0.8b, v3.8h, #8 \n" // 16 bit to 8 bit U
- "uqshrn v1.8b, v4.8h, #8 \n" // 16 bit to 8 bit V
- MEMACCESS(1)
- "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels U.
- MEMACCESS(2)
- "st1 {v1.8b}, [%2], #8 \n" // store 8 pixels V.
- "b.gt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(pix) // %3
- :
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
- "v20", "v21", "v22", "v23", "v24", "v25"
- );
-}
-#endif // HAS_ARGBTOUV411ROW_NEON
-
-// 16x2 pixels -> 8x1. pix is number of argb pixels. e.g. 16.
-#define RGBTOUV(QB, QG, QR) \
- "mul v3.8h, " #QB ",v20.8h \n" /* B */ \
- "mul v4.8h, " #QR ",v20.8h \n" /* R */ \
- "mls v3.8h, " #QG ",v21.8h \n" /* G */ \
- "mls v4.8h, " #QG ",v24.8h \n" /* G */ \
- "mls v3.8h, " #QR ",v22.8h \n" /* R */ \
- "mls v4.8h, " #QB ",v23.8h \n" /* B */ \
- "add v3.8h, v3.8h, v25.8h \n" /* +128 -> unsigned */ \
- "add v4.8h, v4.8h, v25.8h \n" /* +128 -> unsigned */ \
- "uqshrn v0.8b, v3.8h, #8 \n" /* 16 bit to 8 bit U */ \
- "uqshrn v1.8b, v4.8h, #8 \n" /* 16 bit to 8 bit V */
-
-// TODO(fbarchard): Consider vhadd vertical, then vpaddl horizontal, avoid shr.
-// TODO(fbarchard): consider ptrdiff_t for all strides.
-
-#ifdef HAS_ARGBTOUVROW_NEON
-void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int pix) {
- const uint8* src_argb_1 = src_argb + src_stride_argb;
- asm volatile (
- RGBTOUV_SETUP_REG
- "1: \n"
- MEMACCESS(0)
- "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels.
- "uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts.
- "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts.
- "uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts.
-
- MEMACCESS(1)
- "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load next 16
- "uadalp v0.8h, v4.16b \n" // B 16 bytes -> 8 shorts.
- "uadalp v1.8h, v5.16b \n" // G 16 bytes -> 8 shorts.
- "uadalp v2.8h, v6.16b \n" // R 16 bytes -> 8 shorts.
-
- "urshr v0.8h, v0.8h, #1 \n" // 2x average
- "urshr v1.8h, v1.8h, #1 \n"
- "urshr v2.8h, v2.8h, #1 \n"
-
- "subs %w4, %w4, #16 \n" // 32 processed per loop.
- RGBTOUV(v0.8h, v1.8h, v2.8h)
- MEMACCESS(2)
- "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U.
- MEMACCESS(3)
- "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V.
- "b.gt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(src_argb_1), // %1
- "+r"(dst_u), // %2
- "+r"(dst_v), // %3
- "+r"(pix) // %4
- :
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
- "v20", "v21", "v22", "v23", "v24", "v25"
- );
-}
-#endif // HAS_ARGBTOUVROW_NEON
-
-// TODO(fbarchard): Subsample match C code.
-#ifdef HAS_ARGBTOUVJROW_NEON
-void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int pix) {
- const uint8* src_argb_1 = src_argb + src_stride_argb;
- asm volatile (
- "movi v20.8h, #63, lsl #0 \n" // UB/VR coeff (0.500) / 2
- "movi v21.8h, #42, lsl #0 \n" // UG coeff (-0.33126) / 2
- "movi v22.8h, #21, lsl #0 \n" // UR coeff (-0.16874) / 2
- "movi v23.8h, #10, lsl #0 \n" // VB coeff (-0.08131) / 2
- "movi v24.8h, #53, lsl #0 \n" // VG coeff (-0.41869) / 2
- "movi v25.16b, #0x80 \n" // 128.5 (0x8080 in 16-bit)
- "1: \n"
- MEMACCESS(0)
- "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels.
- "uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts.
- "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts.
- "uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts.
- MEMACCESS(1)
- "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load next 16
- "uadalp v0.8h, v4.16b \n" // B 16 bytes -> 8 shorts.
- "uadalp v1.8h, v5.16b \n" // G 16 bytes -> 8 shorts.
- "uadalp v2.8h, v6.16b \n" // R 16 bytes -> 8 shorts.
-
- "urshr v0.8h, v0.8h, #1 \n" // 2x average
- "urshr v1.8h, v1.8h, #1 \n"
- "urshr v2.8h, v2.8h, #1 \n"
-
- "subs %w4, %w4, #16 \n" // 32 processed per loop.
- RGBTOUV(v0.8h, v1.8h, v2.8h)
- MEMACCESS(2)
- "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U.
- MEMACCESS(3)
- "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V.
- "b.gt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(src_argb_1), // %1
- "+r"(dst_u), // %2
- "+r"(dst_v), // %3
- "+r"(pix) // %4
- :
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
- "v20", "v21", "v22", "v23", "v24", "v25"
- );
-}
-#endif // HAS_ARGBTOUVJROW_NEON
-
-#ifdef HAS_BGRATOUVROW_NEON
-void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra,
- uint8* dst_u, uint8* dst_v, int pix) {
- const uint8* src_bgra_1 = src_bgra + src_stride_bgra;
- asm volatile (
- RGBTOUV_SETUP_REG
- "1: \n"
- MEMACCESS(0)
- "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels.
- "uaddlp v0.8h, v3.16b \n" // B 16 bytes -> 8 shorts.
- "uaddlp v3.8h, v2.16b \n" // G 16 bytes -> 8 shorts.
- "uaddlp v2.8h, v1.16b \n" // R 16 bytes -> 8 shorts.
- MEMACCESS(1)
- "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load 16 more
- "uadalp v0.8h, v7.16b \n" // B 16 bytes -> 8 shorts.
- "uadalp v3.8h, v6.16b \n" // G 16 bytes -> 8 shorts.
- "uadalp v2.8h, v5.16b \n" // R 16 bytes -> 8 shorts.
-
- "urshr v0.8h, v0.8h, #1 \n" // 2x average
- "urshr v1.8h, v3.8h, #1 \n"
- "urshr v2.8h, v2.8h, #1 \n"
-
- "subs %w4, %w4, #16 \n" // 32 processed per loop.
- RGBTOUV(v0.8h, v1.8h, v2.8h)
- MEMACCESS(2)
- "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U.
- MEMACCESS(3)
- "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V.
- "b.gt 1b \n"
- : "+r"(src_bgra), // %0
- "+r"(src_bgra_1), // %1
- "+r"(dst_u), // %2
- "+r"(dst_v), // %3
- "+r"(pix) // %4
- :
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
- "v20", "v21", "v22", "v23", "v24", "v25"
- );
-}
-#endif // HAS_BGRATOUVROW_NEON
-
-#ifdef HAS_ABGRTOUVROW_NEON
-void ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr,
- uint8* dst_u, uint8* dst_v, int pix) {
- const uint8* src_abgr_1 = src_abgr + src_stride_abgr;
- asm volatile (
- RGBTOUV_SETUP_REG
- "1: \n"
- MEMACCESS(0)
- "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels.
- "uaddlp v3.8h, v2.16b \n" // B 16 bytes -> 8 shorts.
- "uaddlp v2.8h, v1.16b \n" // G 16 bytes -> 8 shorts.
- "uaddlp v1.8h, v0.16b \n" // R 16 bytes -> 8 shorts.
- MEMACCESS(1)
- "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load 16 more.
- "uadalp v3.8h, v6.16b \n" // B 16 bytes -> 8 shorts.
- "uadalp v2.8h, v5.16b \n" // G 16 bytes -> 8 shorts.
- "uadalp v1.8h, v4.16b \n" // R 16 bytes -> 8 shorts.
-
- "urshr v0.8h, v3.8h, #1 \n" // 2x average
- "urshr v2.8h, v2.8h, #1 \n"
- "urshr v1.8h, v1.8h, #1 \n"
-
- "subs %w4, %w4, #16 \n" // 32 processed per loop.
- RGBTOUV(v0.8h, v2.8h, v1.8h)
- MEMACCESS(2)
- "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U.
- MEMACCESS(3)
- "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V.
- "b.gt 1b \n"
- : "+r"(src_abgr), // %0
- "+r"(src_abgr_1), // %1
- "+r"(dst_u), // %2
- "+r"(dst_v), // %3
- "+r"(pix) // %4
- :
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
- "v20", "v21", "v22", "v23", "v24", "v25"
- );
-}
-#endif // HAS_ABGRTOUVROW_NEON
-
-#ifdef HAS_RGBATOUVROW_NEON
-void RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba,
- uint8* dst_u, uint8* dst_v, int pix) {
- const uint8* src_rgba_1 = src_rgba + src_stride_rgba;
- asm volatile (
- RGBTOUV_SETUP_REG
- "1: \n"
- MEMACCESS(0)
- "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels.
- "uaddlp v0.8h, v1.16b \n" // B 16 bytes -> 8 shorts.
- "uaddlp v1.8h, v2.16b \n" // G 16 bytes -> 8 shorts.
- "uaddlp v2.8h, v3.16b \n" // R 16 bytes -> 8 shorts.
- MEMACCESS(1)
- "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load 16 more.
- "uadalp v0.8h, v5.16b \n" // B 16 bytes -> 8 shorts.
- "uadalp v1.8h, v6.16b \n" // G 16 bytes -> 8 shorts.
- "uadalp v2.8h, v7.16b \n" // R 16 bytes -> 8 shorts.
-
- "urshr v0.8h, v0.8h, #1 \n" // 2x average
- "urshr v1.8h, v1.8h, #1 \n"
- "urshr v2.8h, v2.8h, #1 \n"
-
- "subs %w4, %w4, #16 \n" // 32 processed per loop.
- RGBTOUV(v0.8h, v1.8h, v2.8h)
- MEMACCESS(2)
- "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U.
- MEMACCESS(3)
- "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V.
- "b.gt 1b \n"
- : "+r"(src_rgba), // %0
- "+r"(src_rgba_1), // %1
- "+r"(dst_u), // %2
- "+r"(dst_v), // %3
- "+r"(pix) // %4
- :
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
- "v20", "v21", "v22", "v23", "v24", "v25"
- );
-}
-#endif // HAS_RGBATOUVROW_NEON
-
-#ifdef HAS_RGB24TOUVROW_NEON
-void RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24,
- uint8* dst_u, uint8* dst_v, int pix) {
- const uint8* src_rgb24_1 = src_rgb24 + src_stride_rgb24;
- asm volatile (
- RGBTOUV_SETUP_REG
- "1: \n"
- MEMACCESS(0)
- "ld3 {v0.16b,v1.16b,v2.16b}, [%0], #48 \n" // load 16 pixels.
- "uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts.
- "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts.
- "uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts.
- MEMACCESS(1)
- "ld3 {v4.16b,v5.16b,v6.16b}, [%1], #48 \n" // load 16 more.
- "uadalp v0.8h, v4.16b \n" // B 16 bytes -> 8 shorts.
- "uadalp v1.8h, v5.16b \n" // G 16 bytes -> 8 shorts.
- "uadalp v2.8h, v6.16b \n" // R 16 bytes -> 8 shorts.
-
- "urshr v0.8h, v0.8h, #1 \n" // 2x average
- "urshr v1.8h, v1.8h, #1 \n"
- "urshr v2.8h, v2.8h, #1 \n"
-
- "subs %w4, %w4, #16 \n" // 32 processed per loop.
- RGBTOUV(v0.8h, v1.8h, v2.8h)
- MEMACCESS(2)
- "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U.
- MEMACCESS(3)
- "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V.
- "b.gt 1b \n"
- : "+r"(src_rgb24), // %0
- "+r"(src_rgb24_1), // %1
- "+r"(dst_u), // %2
- "+r"(dst_v), // %3
- "+r"(pix) // %4
- :
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
- "v20", "v21", "v22", "v23", "v24", "v25"
- );
-}
-#endif // HAS_RGB24TOUVROW_NEON
-
-#ifdef HAS_RAWTOUVROW_NEON
-void RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw,
- uint8* dst_u, uint8* dst_v, int pix) {
- const uint8* src_raw_1 = src_raw + src_stride_raw;
- asm volatile (
- RGBTOUV_SETUP_REG
- "1: \n"
- MEMACCESS(0)
- "ld3 {v0.16b,v1.16b,v2.16b}, [%0], #48 \n" // load 8 RAW pixels.
- "uaddlp v2.8h, v2.16b \n" // B 16 bytes -> 8 shorts.
- "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts.
- "uaddlp v0.8h, v0.16b \n" // R 16 bytes -> 8 shorts.
- MEMACCESS(1)
- "ld3 {v4.16b,v5.16b,v6.16b}, [%1], #48 \n" // load 8 more RAW pixels
- "uadalp v2.8h, v6.16b \n" // B 16 bytes -> 8 shorts.
- "uadalp v1.8h, v5.16b \n" // G 16 bytes -> 8 shorts.
- "uadalp v0.8h, v4.16b \n" // R 16 bytes -> 8 shorts.
-
- "urshr v2.8h, v2.8h, #1 \n" // 2x average
- "urshr v1.8h, v1.8h, #1 \n"
- "urshr v0.8h, v0.8h, #1 \n"
-
- "subs %w4, %w4, #16 \n" // 32 processed per loop.
- RGBTOUV(v2.8h, v1.8h, v0.8h)
- MEMACCESS(2)
- "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U.
- MEMACCESS(3)
- "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V.
- "b.gt 1b \n"
- : "+r"(src_raw), // %0
- "+r"(src_raw_1), // %1
- "+r"(dst_u), // %2
- "+r"(dst_v), // %3
- "+r"(pix) // %4
- :
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
- "v20", "v21", "v22", "v23", "v24", "v25"
- );
-}
-#endif // HAS_RAWTOUVROW_NEON
-
-// 16x2 pixels -> 8x1. pix is number of argb pixels. e.g. 16.
-#ifdef HAS_RGB565TOUVROW_NEON
-void RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565,
- uint8* dst_u, uint8* dst_v, int pix) {
- const uint8* src_rgb565_1 = src_rgb565 + src_stride_rgb565;
- asm volatile (
- "movi v22.8h, #56, lsl #0 \n" // UB / VR coeff (0.875) / 2
- "movi v23.8h, #37, lsl #0 \n" // UG coeff (-0.5781) / 2
- "movi v24.8h, #19, lsl #0 \n" // UR coeff (-0.2969) / 2
- "movi v25.8h, #9 , lsl #0 \n" // VB coeff (-0.1406) / 2
- "movi v26.8h, #47, lsl #0 \n" // VG coeff (-0.7344) / 2
- "movi v27.16b, #0x80 \n" // 128.5 (0x8080 in 16-bit)
- "1: \n"
- MEMACCESS(0)
- "ld1 {v0.16b}, [%0], #16 \n" // load 8 RGB565 pixels.
- RGB565TOARGB
- "uaddlp v16.4h, v0.8b \n" // B 8 bytes -> 4 shorts.
- "uaddlp v18.4h, v1.8b \n" // G 8 bytes -> 4 shorts.
- "uaddlp v20.4h, v2.8b \n" // R 8 bytes -> 4 shorts.
- MEMACCESS(0)
- "ld1 {v0.16b}, [%0], #16 \n" // next 8 RGB565 pixels.
- RGB565TOARGB
- "uaddlp v17.4h, v0.8b \n" // B 8 bytes -> 4 shorts.
- "uaddlp v19.4h, v1.8b \n" // G 8 bytes -> 4 shorts.
- "uaddlp v21.4h, v2.8b \n" // R 8 bytes -> 4 shorts.
-
- MEMACCESS(1)
- "ld1 {v0.16b}, [%1], #16 \n" // load 8 RGB565 pixels.
- RGB565TOARGB
- "uadalp v16.4h, v0.8b \n" // B 8 bytes -> 4 shorts.
- "uadalp v18.4h, v1.8b \n" // G 8 bytes -> 4 shorts.
- "uadalp v20.4h, v2.8b \n" // R 8 bytes -> 4 shorts.
- MEMACCESS(1)
- "ld1 {v0.16b}, [%1], #16 \n" // next 8 RGB565 pixels.
- RGB565TOARGB
- "uadalp v17.4h, v0.8b \n" // B 8 bytes -> 4 shorts.
- "uadalp v19.4h, v1.8b \n" // G 8 bytes -> 4 shorts.
- "uadalp v21.4h, v2.8b \n" // R 8 bytes -> 4 shorts.
-
- "ins v16.D[1], v17.D[0] \n"
- "ins v18.D[1], v19.D[0] \n"
- "ins v20.D[1], v21.D[0] \n"
-
- "urshr v4.8h, v16.8h, #1 \n" // 2x average
- "urshr v5.8h, v18.8h, #1 \n"
- "urshr v6.8h, v20.8h, #1 \n"
-
- "subs %w4, %w4, #16 \n" // 16 processed per loop.
- "mul v16.8h, v4.8h, v22.8h \n" // B
- "mls v16.8h, v5.8h, v23.8h \n" // G
- "mls v16.8h, v6.8h, v24.8h \n" // R
- "add v16.8h, v16.8h, v27.8h \n" // +128 -> unsigned
- "mul v17.8h, v6.8h, v22.8h \n" // R
- "mls v17.8h, v5.8h, v26.8h \n" // G
- "mls v17.8h, v4.8h, v25.8h \n" // B
- "add v17.8h, v17.8h, v27.8h \n" // +128 -> unsigned
- "uqshrn v0.8b, v16.8h, #8 \n" // 16 bit to 8 bit U
- "uqshrn v1.8b, v17.8h, #8 \n" // 16 bit to 8 bit V
- MEMACCESS(2)
- "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U.
- MEMACCESS(3)
- "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V.
- "b.gt 1b \n"
- : "+r"(src_rgb565), // %0
- "+r"(src_rgb565_1), // %1
- "+r"(dst_u), // %2
- "+r"(dst_v), // %3
- "+r"(pix) // %4
- :
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
- "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24",
- "v25", "v26", "v27"
- );
-}
-#endif // HAS_RGB565TOUVROW_NEON
-
-// 16x2 pixels -> 8x1. pix is number of argb pixels. e.g. 16.
-#ifdef HAS_ARGB1555TOUVROW_NEON
-void ARGB1555ToUVRow_NEON(const uint8* src_argb1555, int src_stride_argb1555,
- uint8* dst_u, uint8* dst_v, int pix) {
- const uint8* src_argb1555_1 = src_argb1555 + src_stride_argb1555;
- asm volatile (
- RGBTOUV_SETUP_REG
- "1: \n"
- MEMACCESS(0)
- "ld1 {v0.16b}, [%0], #16 \n" // load 8 ARGB1555 pixels.
- RGB555TOARGB
- "uaddlp v16.4h, v0.8b \n" // B 8 bytes -> 4 shorts.
- "uaddlp v17.4h, v1.8b \n" // G 8 bytes -> 4 shorts.
- "uaddlp v18.4h, v2.8b \n" // R 8 bytes -> 4 shorts.
- MEMACCESS(0)
- "ld1 {v0.16b}, [%0], #16 \n" // next 8 ARGB1555 pixels.
- RGB555TOARGB
- "uaddlp v26.4h, v0.8b \n" // B 8 bytes -> 4 shorts.
- "uaddlp v27.4h, v1.8b \n" // G 8 bytes -> 4 shorts.
- "uaddlp v28.4h, v2.8b \n" // R 8 bytes -> 4 shorts.
-
- MEMACCESS(1)
- "ld1 {v0.16b}, [%1], #16 \n" // load 8 ARGB1555 pixels.
- RGB555TOARGB
- "uadalp v16.4h, v0.8b \n" // B 8 bytes -> 4 shorts.
- "uadalp v17.4h, v1.8b \n" // G 8 bytes -> 4 shorts.
- "uadalp v18.4h, v2.8b \n" // R 8 bytes -> 4 shorts.
- MEMACCESS(1)
- "ld1 {v0.16b}, [%1], #16 \n" // next 8 ARGB1555 pixels.
- RGB555TOARGB
- "uadalp v26.4h, v0.8b \n" // B 8 bytes -> 4 shorts.
- "uadalp v27.4h, v1.8b \n" // G 8 bytes -> 4 shorts.
- "uadalp v28.4h, v2.8b \n" // R 8 bytes -> 4 shorts.
-
- "ins v16.D[1], v26.D[0] \n"
- "ins v17.D[1], v27.D[0] \n"
- "ins v18.D[1], v28.D[0] \n"
-
- "urshr v4.8h, v16.8h, #1 \n" // 2x average
- "urshr v5.8h, v17.8h, #1 \n"
- "urshr v6.8h, v18.8h, #1 \n"
-
- "subs %w4, %w4, #16 \n" // 16 processed per loop.
- "mul v2.8h, v4.8h, v20.8h \n" // B
- "mls v2.8h, v5.8h, v21.8h \n" // G
- "mls v2.8h, v6.8h, v22.8h \n" // R
- "add v2.8h, v2.8h, v25.8h \n" // +128 -> unsigned
- "mul v3.8h, v6.8h, v20.8h \n" // R
- "mls v3.8h, v5.8h, v24.8h \n" // G
- "mls v3.8h, v4.8h, v23.8h \n" // B
- "add v3.8h, v3.8h, v25.8h \n" // +128 -> unsigned
- "uqshrn v0.8b, v2.8h, #8 \n" // 16 bit to 8 bit U
- "uqshrn v1.8b, v3.8h, #8 \n" // 16 bit to 8 bit V
- MEMACCESS(2)
- "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U.
- MEMACCESS(3)
- "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V.
- "b.gt 1b \n"
- : "+r"(src_argb1555), // %0
- "+r"(src_argb1555_1), // %1
- "+r"(dst_u), // %2
- "+r"(dst_v), // %3
- "+r"(pix) // %4
- :
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6",
- "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
- "v26", "v27", "v28"
- );
-}
-#endif // HAS_ARGB1555TOUVROW_NEON
-
-// 16x2 pixels -> 8x1. pix is number of argb pixels. e.g. 16.
-#ifdef HAS_ARGB4444TOUVROW_NEON
-void ARGB4444ToUVRow_NEON(const uint8* src_argb4444, int src_stride_argb4444,
- uint8* dst_u, uint8* dst_v, int pix) {
- const uint8* src_argb4444_1 = src_argb4444 + src_stride_argb4444;
- asm volatile (
- RGBTOUV_SETUP_REG
- "1: \n"
- MEMACCESS(0)
- "ld1 {v0.16b}, [%0], #16 \n" // load 8 ARGB4444 pixels.
- ARGB4444TOARGB
- "uaddlp v16.4h, v0.8b \n" // B 8 bytes -> 4 shorts.
- "uaddlp v17.4h, v1.8b \n" // G 8 bytes -> 4 shorts.
- "uaddlp v18.4h, v2.8b \n" // R 8 bytes -> 4 shorts.
- MEMACCESS(0)
- "ld1 {v0.16b}, [%0], #16 \n" // next 8 ARGB4444 pixels.
- ARGB4444TOARGB
- "uaddlp v26.4h, v0.8b \n" // B 8 bytes -> 4 shorts.
- "uaddlp v27.4h, v1.8b \n" // G 8 bytes -> 4 shorts.
- "uaddlp v28.4h, v2.8b \n" // R 8 bytes -> 4 shorts.
-
- MEMACCESS(1)
- "ld1 {v0.16b}, [%1], #16 \n" // load 8 ARGB4444 pixels.
- ARGB4444TOARGB
- "uadalp v16.4h, v0.8b \n" // B 8 bytes -> 4 shorts.
- "uadalp v17.4h, v1.8b \n" // G 8 bytes -> 4 shorts.
- "uadalp v18.4h, v2.8b \n" // R 8 bytes -> 4 shorts.
- MEMACCESS(1)
- "ld1 {v0.16b}, [%1], #16 \n" // next 8 ARGB4444 pixels.
- ARGB4444TOARGB
- "uadalp v26.4h, v0.8b \n" // B 8 bytes -> 4 shorts.
- "uadalp v27.4h, v1.8b \n" // G 8 bytes -> 4 shorts.
- "uadalp v28.4h, v2.8b \n" // R 8 bytes -> 4 shorts.
-
- "ins v16.D[1], v26.D[0] \n"
- "ins v17.D[1], v27.D[0] \n"
- "ins v18.D[1], v28.D[0] \n"
-
- "urshr v4.8h, v16.8h, #1 \n" // 2x average
- "urshr v5.8h, v17.8h, #1 \n"
- "urshr v6.8h, v18.8h, #1 \n"
-
- "subs %w4, %w4, #16 \n" // 16 processed per loop.
- "mul v2.8h, v4.8h, v20.8h \n" // B
- "mls v2.8h, v5.8h, v21.8h \n" // G
- "mls v2.8h, v6.8h, v22.8h \n" // R
- "add v2.8h, v2.8h, v25.8h \n" // +128 -> unsigned
- "mul v3.8h, v6.8h, v20.8h \n" // R
- "mls v3.8h, v5.8h, v24.8h \n" // G
- "mls v3.8h, v4.8h, v23.8h \n" // B
- "add v3.8h, v3.8h, v25.8h \n" // +128 -> unsigned
- "uqshrn v0.8b, v2.8h, #8 \n" // 16 bit to 8 bit U
- "uqshrn v1.8b, v3.8h, #8 \n" // 16 bit to 8 bit V
- MEMACCESS(2)
- "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U.
- MEMACCESS(3)
- "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V.
- "b.gt 1b \n"
- : "+r"(src_argb4444), // %0
- "+r"(src_argb4444_1), // %1
- "+r"(dst_u), // %2
- "+r"(dst_v), // %3
- "+r"(pix) // %4
- :
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6",
- "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
- "v26", "v27", "v28"
-
- );
-}
-#endif // HAS_ARGB4444TOUVROW_NEON
-
-#ifdef HAS_RGB565TOYROW_NEON
-void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int pix) {
- asm volatile (
- "movi v24.8b, #13 \n" // B * 0.1016 coefficient
- "movi v25.8b, #65 \n" // G * 0.5078 coefficient
- "movi v26.8b, #33 \n" // R * 0.2578 coefficient
- "movi v27.8b, #16 \n" // Add 16 constant
- "1: \n"
- MEMACCESS(0)
- "ld1 {v0.16b}, [%0], #16 \n" // load 8 RGB565 pixels.
- "subs %w2, %w2, #8 \n" // 8 processed per loop.
- RGB565TOARGB
- "umull v3.8h, v0.8b, v24.8b \n" // B
- "umlal v3.8h, v1.8b, v25.8b \n" // G
- "umlal v3.8h, v2.8b, v26.8b \n" // R
- "sqrshrun v0.8b, v3.8h, #7 \n" // 16 bit to 8 bit Y
- "uqadd v0.8b, v0.8b, v27.8b \n"
- MEMACCESS(1)
- "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y.
- "b.gt 1b \n"
- : "+r"(src_rgb565), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v6",
- "v24", "v25", "v26", "v27"
- );
-}
-#endif // HAS_RGB565TOYROW_NEON
-
-#ifdef HAS_ARGB1555TOYROW_NEON
-void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int pix) {
- asm volatile (
- "movi v4.8b, #13 \n" // B * 0.1016 coefficient
- "movi v5.8b, #65 \n" // G * 0.5078 coefficient
- "movi v6.8b, #33 \n" // R * 0.2578 coefficient
- "movi v7.8b, #16 \n" // Add 16 constant
- "1: \n"
- MEMACCESS(0)
- "ld1 {v0.16b}, [%0], #16 \n" // load 8 ARGB1555 pixels.
- "subs %w2, %w2, #8 \n" // 8 processed per loop.
- ARGB1555TOARGB
- "umull v3.8h, v0.8b, v4.8b \n" // B
- "umlal v3.8h, v1.8b, v5.8b \n" // G
- "umlal v3.8h, v2.8b, v6.8b \n" // R
- "sqrshrun v0.8b, v3.8h, #7 \n" // 16 bit to 8 bit Y
- "uqadd v0.8b, v0.8b, v7.8b \n"
- MEMACCESS(1)
- "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y.
- "b.gt 1b \n"
- : "+r"(src_argb1555), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"
- );
-}
-#endif // HAS_ARGB1555TOYROW_NEON
-
-#ifdef HAS_ARGB4444TOYROW_NEON
-void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int pix) {
- asm volatile (
- "movi v24.8b, #13 \n" // B * 0.1016 coefficient
- "movi v25.8b, #65 \n" // G * 0.5078 coefficient
- "movi v26.8b, #33 \n" // R * 0.2578 coefficient
- "movi v27.8b, #16 \n" // Add 16 constant
- "1: \n"
- MEMACCESS(0)
- "ld1 {v0.16b}, [%0], #16 \n" // load 8 ARGB4444 pixels.
- "subs %w2, %w2, #8 \n" // 8 processed per loop.
- ARGB4444TOARGB
- "umull v3.8h, v0.8b, v24.8b \n" // B
- "umlal v3.8h, v1.8b, v25.8b \n" // G
- "umlal v3.8h, v2.8b, v26.8b \n" // R
- "sqrshrun v0.8b, v3.8h, #7 \n" // 16 bit to 8 bit Y
- "uqadd v0.8b, v0.8b, v27.8b \n"
- MEMACCESS(1)
- "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y.
- "b.gt 1b \n"
- : "+r"(src_argb4444), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "v0", "v1", "v2", "v3", "v24", "v25", "v26", "v27"
- );
-}
-#endif // HAS_ARGB4444TOYROW_NEON
-
-#ifdef HAS_BGRATOYROW_NEON
-void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int pix) {
- asm volatile (
- "movi v4.8b, #33 \n" // R * 0.2578 coefficient
- "movi v5.8b, #65 \n" // G * 0.5078 coefficient
- "movi v6.8b, #13 \n" // B * 0.1016 coefficient
- "movi v7.8b, #16 \n" // Add 16 constant
- "1: \n"
- MEMACCESS(0)
- "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 pixels.
- "subs %w2, %w2, #8 \n" // 8 processed per loop.
- "umull v16.8h, v1.8b, v4.8b \n" // R
- "umlal v16.8h, v2.8b, v5.8b \n" // G
- "umlal v16.8h, v3.8b, v6.8b \n" // B
- "sqrshrun v0.8b, v16.8h, #7 \n" // 16 bit to 8 bit Y
- "uqadd v0.8b, v0.8b, v7.8b \n"
- MEMACCESS(1)
- "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y.
- "b.gt 1b \n"
- : "+r"(src_bgra), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16"
- );
-}
-#endif // HAS_BGRATOYROW_NEON
-
-#ifdef HAS_ABGRTOYROW_NEON
-void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int pix) {
- asm volatile (
- "movi v4.8b, #33 \n" // R * 0.2578 coefficient
- "movi v5.8b, #65 \n" // G * 0.5078 coefficient
- "movi v6.8b, #13 \n" // B * 0.1016 coefficient
- "movi v7.8b, #16 \n" // Add 16 constant
- "1: \n"
- MEMACCESS(0)
- "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 pixels.
- "subs %w2, %w2, #8 \n" // 8 processed per loop.
- "umull v16.8h, v0.8b, v4.8b \n" // R
- "umlal v16.8h, v1.8b, v5.8b \n" // G
- "umlal v16.8h, v2.8b, v6.8b \n" // B
- "sqrshrun v0.8b, v16.8h, #7 \n" // 16 bit to 8 bit Y
- "uqadd v0.8b, v0.8b, v7.8b \n"
- MEMACCESS(1)
- "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y.
- "b.gt 1b \n"
- : "+r"(src_abgr), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16"
- );
-}
-#endif // HAS_ABGRTOYROW_NEON
-
-#ifdef HAS_RGBATOYROW_NEON
-void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int pix) {
- asm volatile (
- "movi v4.8b, #13 \n" // B * 0.1016 coefficient
- "movi v5.8b, #65 \n" // G * 0.5078 coefficient
- "movi v6.8b, #33 \n" // R * 0.2578 coefficient
- "movi v7.8b, #16 \n" // Add 16 constant
- "1: \n"
- MEMACCESS(0)
- "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 pixels.
- "subs %w2, %w2, #8 \n" // 8 processed per loop.
- "umull v16.8h, v1.8b, v4.8b \n" // B
- "umlal v16.8h, v2.8b, v5.8b \n" // G
- "umlal v16.8h, v3.8b, v6.8b \n" // R
- "sqrshrun v0.8b, v16.8h, #7 \n" // 16 bit to 8 bit Y
- "uqadd v0.8b, v0.8b, v7.8b \n"
- MEMACCESS(1)
- "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y.
- "b.gt 1b \n"
- : "+r"(src_rgba), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16"
- );
-}
-#endif // HAS_RGBATOYROW_NEON
-
-#ifdef HAS_RGB24TOYROW_NEON
-void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int pix) {
- asm volatile (
- "movi v4.8b, #13 \n" // B * 0.1016 coefficient
- "movi v5.8b, #65 \n" // G * 0.5078 coefficient
- "movi v6.8b, #33 \n" // R * 0.2578 coefficient
- "movi v7.8b, #16 \n" // Add 16 constant
- "1: \n"
- MEMACCESS(0)
- "ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // load 8 pixels.
- "subs %w2, %w2, #8 \n" // 8 processed per loop.
- "umull v16.8h, v0.8b, v4.8b \n" // B
- "umlal v16.8h, v1.8b, v5.8b \n" // G
- "umlal v16.8h, v2.8b, v6.8b \n" // R
- "sqrshrun v0.8b, v16.8h, #7 \n" // 16 bit to 8 bit Y
- "uqadd v0.8b, v0.8b, v7.8b \n"
- MEMACCESS(1)
- "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y.
- "b.gt 1b \n"
- : "+r"(src_rgb24), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16"
- );
-}
-#endif // HAS_RGB24TOYROW_NEON
-
-#ifdef HAS_RAWTOYROW_NEON
-void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int pix) {
- asm volatile (
- "movi v4.8b, #33 \n" // R * 0.2578 coefficient
- "movi v5.8b, #65 \n" // G * 0.5078 coefficient
- "movi v6.8b, #13 \n" // B * 0.1016 coefficient
- "movi v7.8b, #16 \n" // Add 16 constant
- "1: \n"
- MEMACCESS(0)
- "ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // load 8 pixels.
- "subs %w2, %w2, #8 \n" // 8 processed per loop.
- "umull v16.8h, v0.8b, v4.8b \n" // B
- "umlal v16.8h, v1.8b, v5.8b \n" // G
- "umlal v16.8h, v2.8b, v6.8b \n" // R
- "sqrshrun v0.8b, v16.8h, #7 \n" // 16 bit to 8 bit Y
- "uqadd v0.8b, v0.8b, v7.8b \n"
- MEMACCESS(1)
- "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y.
- "b.gt 1b \n"
- : "+r"(src_raw), // %0
- "+r"(dst_y), // %1
- "+r"(pix) // %2
- :
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16"
- );
-}
-#endif // HAS_RAWTOYROW_NEON
-
-// Bilinear filter 16x2 -> 16x1
-#ifdef HAS_INTERPOLATEROW_NEON
-void InterpolateRow_NEON(uint8* dst_ptr,
- const uint8* src_ptr, ptrdiff_t src_stride,
- int dst_width, int source_y_fraction) {
- int y1_fraction = source_y_fraction;
- int y0_fraction = 256 - y1_fraction;
- const uint8* src_ptr1 = src_ptr + src_stride;
- asm volatile (
- "cmp %w4, #0 \n"
- "b.eq 100f \n"
- "cmp %w4, #64 \n"
- "b.eq 75f \n"
- "cmp %w4, #128 \n"
- "b.eq 50f \n"
- "cmp %w4, #192 \n"
- "b.eq 25f \n"
-
- "dup v5.16b, %w4 \n"
- "dup v4.16b, %w5 \n"
- // General purpose row blend.
- "1: \n"
- MEMACCESS(1)
- "ld1 {v0.16b}, [%1], #16 \n"
- MEMACCESS(2)
- "ld1 {v1.16b}, [%2], #16 \n"
- "subs %w3, %w3, #16 \n"
- "umull v2.8h, v0.8b, v4.8b \n"
- "umull2 v3.8h, v0.16b, v4.16b \n"
- "umlal v2.8h, v1.8b, v5.8b \n"
- "umlal2 v3.8h, v1.16b, v5.16b \n"
- "rshrn v0.8b, v2.8h, #8 \n"
- "rshrn2 v0.16b, v3.8h, #8 \n"
- MEMACCESS(0)
- "st1 {v0.16b}, [%0], #16 \n"
- "b.gt 1b \n"
- "b 99f \n"
-
- // Blend 25 / 75.
- "25: \n"
- MEMACCESS(1)
- "ld1 {v0.16b}, [%1], #16 \n"
- MEMACCESS(2)
- "ld1 {v1.16b}, [%2], #16 \n"
- "subs %w3, %w3, #16 \n"
- "urhadd v0.16b, v0.16b, v1.16b \n"
- "urhadd v0.16b, v0.16b, v1.16b \n"
- MEMACCESS(0)
- "st1 {v0.16b}, [%0], #16 \n"
- "b.gt 25b \n"
- "b 99f \n"
-
- // Blend 50 / 50.
- "50: \n"
- MEMACCESS(1)
- "ld1 {v0.16b}, [%1], #16 \n"
- MEMACCESS(2)
- "ld1 {v1.16b}, [%2], #16 \n"
- "subs %w3, %w3, #16 \n"
- "urhadd v0.16b, v0.16b, v1.16b \n"
- MEMACCESS(0)
- "st1 {v0.16b}, [%0], #16 \n"
- "b.gt 50b \n"
- "b 99f \n"
-
- // Blend 75 / 25.
- "75: \n"
- MEMACCESS(1)
- "ld1 {v1.16b}, [%1], #16 \n"
- MEMACCESS(2)
- "ld1 {v0.16b}, [%2], #16 \n"
- "subs %w3, %w3, #16 \n"
- "urhadd v0.16b, v0.16b, v1.16b \n"
- "urhadd v0.16b, v0.16b, v1.16b \n"
- MEMACCESS(0)
- "st1 {v0.16b}, [%0], #16 \n"
- "b.gt 75b \n"
- "b 99f \n"
-
- // Blend 100 / 0 - Copy row unchanged.
- "100: \n"
- MEMACCESS(1)
- "ld1 {v0.16b}, [%1], #16 \n"
- "subs %w3, %w3, #16 \n"
- MEMACCESS(0)
- "st1 {v0.16b}, [%0], #16 \n"
- "b.gt 100b \n"
-
- "99: \n"
- : "+r"(dst_ptr), // %0
- "+r"(src_ptr), // %1
- "+r"(src_ptr1), // %2
- "+r"(dst_width), // %3
- "+r"(y1_fraction), // %4
- "+r"(y0_fraction) // %5
- :
- : "cc", "memory", "v0", "v1", "v3", "v4", "v5"
- );
-}
-#endif // HAS_INTERPOLATEROW_NEON
-
-// dr * (256 - sa) / 256 + sr = dr - dr * sa / 256 + sr
-#ifdef HAS_ARGBBLENDROW_NEON
-void ARGBBlendRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
- asm volatile (
- "subs %w3, %w3, #8 \n"
- "b.lt 89f \n"
- // Blend 8 pixels.
- "8: \n"
- MEMACCESS(0)
- "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB0 pixels
- MEMACCESS(1)
- "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%1], #32 \n" // load 8 ARGB1 pixels
- "subs %w3, %w3, #8 \n" // 8 processed per loop.
- "umull v16.8h, v4.8b, v3.8b \n" // db * a
- "umull v17.8h, v5.8b, v3.8b \n" // dg * a
- "umull v18.8h, v6.8b, v3.8b \n" // dr * a
- "uqrshrn v16.8b, v16.8h, #8 \n" // db >>= 8
- "uqrshrn v17.8b, v17.8h, #8 \n" // dg >>= 8
- "uqrshrn v18.8b, v18.8h, #8 \n" // dr >>= 8
- "uqsub v4.8b, v4.8b, v16.8b \n" // db - (db * a / 256)
- "uqsub v5.8b, v5.8b, v17.8b \n" // dg - (dg * a / 256)
- "uqsub v6.8b, v6.8b, v18.8b \n" // dr - (dr * a / 256)
- "uqadd v0.8b, v0.8b, v4.8b \n" // + sb
- "uqadd v1.8b, v1.8b, v5.8b \n" // + sg
- "uqadd v2.8b, v2.8b, v6.8b \n" // + sr
- "movi v3.8b, #255 \n" // a = 255
- MEMACCESS(2)
- "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" // store 8 ARGB pixels
- "b.ge 8b \n"
-
- "89: \n"
- "adds %w3, %w3, #8-1 \n"
- "b.lt 99f \n"
-
- // Blend 1 pixels.
- "1: \n"
- MEMACCESS(0)
- "ld4 {v0.b,v1.b,v2.b,v3.b}[0], [%0], #4 \n" // load 1 pixel ARGB0.
- MEMACCESS(1)
- "ld4 {v4.b,v5.b,v6.b,v7.b}[0], [%1], #4 \n" // load 1 pixel ARGB1.
- "subs %w3, %w3, #1 \n" // 1 processed per loop.
- "umull v16.8h, v4.8b, v3.8b \n" // db * a
- "umull v17.8h, v5.8b, v3.8b \n" // dg * a
- "umull v18.8h, v6.8b, v3.8b \n" // dr * a
- "uqrshrn v16.8b, v16.8h, #8 \n" // db >>= 8
- "uqrshrn v17.8b, v17.8h, #8 \n" // dg >>= 8
- "uqrshrn v18.8b, v18.8h, #8 \n" // dr >>= 8
- "uqsub v4.8b, v4.8b, v16.8b \n" // db - (db * a / 256)
- "uqsub v5.8b, v5.8b, v17.8b \n" // dg - (dg * a / 256)
- "uqsub v6.8b, v6.8b, v18.8b \n" // dr - (dr * a / 256)
- "uqadd v0.8b, v0.8b, v4.8b \n" // + sb
- "uqadd v1.8b, v1.8b, v5.8b \n" // + sg
- "uqadd v2.8b, v2.8b, v6.8b \n" // + sr
- "movi v3.8b, #255 \n" // a = 255
- MEMACCESS(2)
- "st4 {v0.b,v1.b,v2.b,v3.b}[0], [%2], #4 \n" // store 1 pixel.
- "b.ge 1b \n"
-
- "99: \n"
-
- : "+r"(src_argb0), // %0
- "+r"(src_argb1), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- :
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
- "v16", "v17", "v18"
- );
-}
-#endif // HAS_ARGBBLENDROW_NEON
-
-// Attenuate 8 pixels at a time.
-#ifdef HAS_ARGBATTENUATEROW_NEON
-void ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) {
- asm volatile (
- // Attenuate 8 pixels.
- "1: \n"
- MEMACCESS(0)
- "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB pixels
- "subs %w2, %w2, #8 \n" // 8 processed per loop.
- "umull v4.8h, v0.8b, v3.8b \n" // b * a
- "umull v5.8h, v1.8b, v3.8b \n" // g * a
- "umull v6.8h, v2.8b, v3.8b \n" // r * a
- "uqrshrn v0.8b, v4.8h, #8 \n" // b >>= 8
- "uqrshrn v1.8b, v5.8h, #8 \n" // g >>= 8
- "uqrshrn v2.8b, v6.8h, #8 \n" // r >>= 8
- MEMACCESS(1)
- "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store 8 ARGB pixels
- "b.gt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- :
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6"
- );
-}
-#endif // HAS_ARGBATTENUATEROW_NEON
-
-// Quantize 8 ARGB pixels (32 bytes).
-// dst = (dst * scale >> 16) * interval_size + interval_offset;
-#ifdef HAS_ARGBQUANTIZEROW_NEON
-void ARGBQuantizeRow_NEON(uint8* dst_argb, int scale, int interval_size,
- int interval_offset, int width) {
- asm volatile (
- "dup v4.8h, %w2 \n"
- "ushr v4.8h, v4.8h, #1 \n" // scale >>= 1
- "dup v5.8h, %w3 \n" // interval multiply.
- "dup v6.8h, %w4 \n" // interval add
-
- // 8 pixel loop.
- "1: \n"
- MEMACCESS(0)
- "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0] \n" // load 8 pixels of ARGB.
- "subs %w1, %w1, #8 \n" // 8 processed per loop.
- "uxtl v0.8h, v0.8b \n" // b (0 .. 255)
- "uxtl v1.8h, v1.8b \n"
- "uxtl v2.8h, v2.8b \n"
- "sqdmulh v0.8h, v0.8h, v4.8h \n" // b * scale
- "sqdmulh v1.8h, v1.8h, v4.8h \n" // g
- "sqdmulh v2.8h, v2.8h, v4.8h \n" // r
- "mul v0.8h, v0.8h, v5.8h \n" // b * interval_size
- "mul v1.8h, v1.8h, v5.8h \n" // g
- "mul v2.8h, v2.8h, v5.8h \n" // r
- "add v0.8h, v0.8h, v6.8h \n" // b + interval_offset
- "add v1.8h, v1.8h, v6.8h \n" // g
- "add v2.8h, v2.8h, v6.8h \n" // r
- "uqxtn v0.8b, v0.8h \n"
- "uqxtn v1.8b, v1.8h \n"
- "uqxtn v2.8b, v2.8h \n"
- MEMACCESS(0)
- "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // store 8 ARGB pixels
- "b.gt 1b \n"
- : "+r"(dst_argb), // %0
- "+r"(width) // %1
- : "r"(scale), // %2
- "r"(interval_size), // %3
- "r"(interval_offset) // %4
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6"
- );
-}
-#endif // HAS_ARGBQUANTIZEROW_NEON
-
-// Shade 8 pixels at a time by specified value.
-// NOTE vqrdmulh.s16 q10, q10, d0[0] must use a scaler register from 0 to 8.
-// Rounding in vqrdmulh does +1 to high if high bit of low s16 is set.
-#ifdef HAS_ARGBSHADEROW_NEON
-void ARGBShadeRow_NEON(const uint8* src_argb, uint8* dst_argb, int width,
- uint32 value) {
- asm volatile (
- "dup v0.4s, %w3 \n" // duplicate scale value.
- "zip1 v0.8b, v0.8b, v0.8b \n" // v0.8b aarrggbb.
- "ushr v0.8h, v0.8h, #1 \n" // scale / 2.
-
- // 8 pixel loop.
- "1: \n"
- MEMACCESS(0)
- "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%0], #32 \n" // load 8 ARGB pixels.
- "subs %w2, %w2, #8 \n" // 8 processed per loop.
- "uxtl v4.8h, v4.8b \n" // b (0 .. 255)
- "uxtl v5.8h, v5.8b \n"
- "uxtl v6.8h, v6.8b \n"
- "uxtl v7.8h, v7.8b \n"
- "sqrdmulh v4.8h, v4.8h, v0.h[0] \n" // b * scale * 2
- "sqrdmulh v5.8h, v5.8h, v0.h[1] \n" // g
- "sqrdmulh v6.8h, v6.8h, v0.h[2] \n" // r
- "sqrdmulh v7.8h, v7.8h, v0.h[3] \n" // a
- "uqxtn v4.8b, v4.8h \n"
- "uqxtn v5.8b, v5.8h \n"
- "uqxtn v6.8b, v6.8h \n"
- "uqxtn v7.8b, v7.8h \n"
- MEMACCESS(1)
- "st4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%1], #32 \n" // store 8 ARGB pixels
- "b.gt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- : "r"(value) // %3
- : "cc", "memory", "v0", "v4", "v5", "v6", "v7"
- );
-}
-#endif // HAS_ARGBSHADEROW_NEON
-
-// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels
-// Similar to ARGBToYJ but stores ARGB.
-// C code is (15 * b + 75 * g + 38 * r + 64) >> 7;
-#ifdef HAS_ARGBGRAYROW_NEON
-void ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) {
- asm volatile (
- "movi v24.8b, #15 \n" // B * 0.11400 coefficient
- "movi v25.8b, #75 \n" // G * 0.58700 coefficient
- "movi v26.8b, #38 \n" // R * 0.29900 coefficient
- "1: \n"
- MEMACCESS(0)
- "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB pixels.
- "subs %w2, %w2, #8 \n" // 8 processed per loop.
- "umull v4.8h, v0.8b, v24.8b \n" // B
- "umlal v4.8h, v1.8b, v25.8b \n" // G
- "umlal v4.8h, v2.8b, v26.8b \n" // R
- "sqrshrun v0.8b, v4.8h, #7 \n" // 15 bit to 8 bit B
- "orr v1.8b, v0.8b, v0.8b \n" // G
- "orr v2.8b, v0.8b, v0.8b \n" // R
- MEMACCESS(1)
- "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store 8 pixels.
- "b.gt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- :
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v24", "v25", "v26"
- );
-}
-#endif // HAS_ARGBGRAYROW_NEON
-
-// Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels.
-// b = (r * 35 + g * 68 + b * 17) >> 7
-// g = (r * 45 + g * 88 + b * 22) >> 7
-// r = (r * 50 + g * 98 + b * 24) >> 7
-
-#ifdef HAS_ARGBSEPIAROW_NEON
-void ARGBSepiaRow_NEON(uint8* dst_argb, int width) {
- asm volatile (
- "movi v20.8b, #17 \n" // BB coefficient
- "movi v21.8b, #68 \n" // BG coefficient
- "movi v22.8b, #35 \n" // BR coefficient
- "movi v24.8b, #22 \n" // GB coefficient
- "movi v25.8b, #88 \n" // GG coefficient
- "movi v26.8b, #45 \n" // GR coefficient
- "movi v28.8b, #24 \n" // BB coefficient
- "movi v29.8b, #98 \n" // BG coefficient
- "movi v30.8b, #50 \n" // BR coefficient
- "1: \n"
- MEMACCESS(0)
- "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0] \n" // load 8 ARGB pixels.
- "subs %w1, %w1, #8 \n" // 8 processed per loop.
- "umull v4.8h, v0.8b, v20.8b \n" // B to Sepia B
- "umlal v4.8h, v1.8b, v21.8b \n" // G
- "umlal v4.8h, v2.8b, v22.8b \n" // R
- "umull v5.8h, v0.8b, v24.8b \n" // B to Sepia G
- "umlal v5.8h, v1.8b, v25.8b \n" // G
- "umlal v5.8h, v2.8b, v26.8b \n" // R
- "umull v6.8h, v0.8b, v28.8b \n" // B to Sepia R
- "umlal v6.8h, v1.8b, v29.8b \n" // G
- "umlal v6.8h, v2.8b, v30.8b \n" // R
- "uqshrn v0.8b, v4.8h, #7 \n" // 16 bit to 8 bit B
- "uqshrn v1.8b, v5.8h, #7 \n" // 16 bit to 8 bit G
- "uqshrn v2.8b, v6.8h, #7 \n" // 16 bit to 8 bit R
- MEMACCESS(0)
- "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // store 8 pixels.
- "b.gt 1b \n"
- : "+r"(dst_argb), // %0
- "+r"(width) // %1
- :
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
- "v20", "v21", "v22", "v24", "v25", "v26", "v28", "v29", "v30"
- );
-}
-#endif // HAS_ARGBSEPIAROW_NEON
-
-// Tranform 8 ARGB pixels (32 bytes) with color matrix.
-// TODO(fbarchard): Was same as Sepia except matrix is provided. This function
-// needs to saturate. Consider doing a non-saturating version.
-#ifdef HAS_ARGBCOLORMATRIXROW_NEON
-void ARGBColorMatrixRow_NEON(const uint8* src_argb, uint8* dst_argb,
- const int8* matrix_argb, int width) {
- asm volatile (
- MEMACCESS(3)
- "ld1 {v2.16b}, [%3] \n" // load 3 ARGB vectors.
- "sxtl v0.8h, v2.8b \n" // B,G coefficients s16.
- "sxtl2 v1.8h, v2.16b \n" // R,A coefficients s16.
-
- "1: \n"
- MEMACCESS(0)
- "ld4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%0], #32 \n" // load 8 pixels.
- "subs %w2, %w2, #8 \n" // 8 processed per loop.
- "uxtl v16.8h, v16.8b \n" // b (0 .. 255) 16 bit
- "uxtl v17.8h, v17.8b \n" // g
- "uxtl v18.8h, v18.8b \n" // r
- "uxtl v19.8h, v19.8b \n" // a
- "mul v22.8h, v16.8h, v0.h[0] \n" // B = B * Matrix B
- "mul v23.8h, v16.8h, v0.h[4] \n" // G = B * Matrix G
- "mul v24.8h, v16.8h, v1.h[0] \n" // R = B * Matrix R
- "mul v25.8h, v16.8h, v1.h[4] \n" // A = B * Matrix A
- "mul v4.8h, v17.8h, v0.h[1] \n" // B += G * Matrix B
- "mul v5.8h, v17.8h, v0.h[5] \n" // G += G * Matrix G
- "mul v6.8h, v17.8h, v1.h[1] \n" // R += G * Matrix R
- "mul v7.8h, v17.8h, v1.h[5] \n" // A += G * Matrix A
- "sqadd v22.8h, v22.8h, v4.8h \n" // Accumulate B
- "sqadd v23.8h, v23.8h, v5.8h \n" // Accumulate G
- "sqadd v24.8h, v24.8h, v6.8h \n" // Accumulate R
- "sqadd v25.8h, v25.8h, v7.8h \n" // Accumulate A
- "mul v4.8h, v18.8h, v0.h[2] \n" // B += R * Matrix B
- "mul v5.8h, v18.8h, v0.h[6] \n" // G += R * Matrix G
- "mul v6.8h, v18.8h, v1.h[2] \n" // R += R * Matrix R
- "mul v7.8h, v18.8h, v1.h[6] \n" // A += R * Matrix A
- "sqadd v22.8h, v22.8h, v4.8h \n" // Accumulate B
- "sqadd v23.8h, v23.8h, v5.8h \n" // Accumulate G
- "sqadd v24.8h, v24.8h, v6.8h \n" // Accumulate R
- "sqadd v25.8h, v25.8h, v7.8h \n" // Accumulate A
- "mul v4.8h, v19.8h, v0.h[3] \n" // B += A * Matrix B
- "mul v5.8h, v19.8h, v0.h[7] \n" // G += A * Matrix G
- "mul v6.8h, v19.8h, v1.h[3] \n" // R += A * Matrix R
- "mul v7.8h, v19.8h, v1.h[7] \n" // A += A * Matrix A
- "sqadd v22.8h, v22.8h, v4.8h \n" // Accumulate B
- "sqadd v23.8h, v23.8h, v5.8h \n" // Accumulate G
- "sqadd v24.8h, v24.8h, v6.8h \n" // Accumulate R
- "sqadd v25.8h, v25.8h, v7.8h \n" // Accumulate A
- "sqshrun v16.8b, v22.8h, #6 \n" // 16 bit to 8 bit B
- "sqshrun v17.8b, v23.8h, #6 \n" // 16 bit to 8 bit G
- "sqshrun v18.8b, v24.8h, #6 \n" // 16 bit to 8 bit R
- "sqshrun v19.8b, v25.8h, #6 \n" // 16 bit to 8 bit A
- MEMACCESS(1)
- "st4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%1], #32 \n" // store 8 pixels.
- "b.gt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- : "r"(matrix_argb) // %3
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
- "v18", "v19", "v22", "v23", "v24", "v25"
- );
-}
-#endif // HAS_ARGBCOLORMATRIXROW_NEON
-
-// TODO(fbarchard): fix vqshrun in ARGBMultiplyRow_NEON and reenable.
-// Multiply 2 rows of ARGB pixels together, 8 pixels at a time.
-#ifdef HAS_ARGBMULTIPLYROW_NEON
-void ARGBMultiplyRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
- asm volatile (
- // 8 pixel loop.
- "1: \n"
- MEMACCESS(0)
- "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB pixels.
- MEMACCESS(1)
- "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%1], #32 \n" // load 8 more pixels.
- "subs %w3, %w3, #8 \n" // 8 processed per loop.
- "umull v0.8h, v0.8b, v4.8b \n" // multiply B
- "umull v1.8h, v1.8b, v5.8b \n" // multiply G
- "umull v2.8h, v2.8b, v6.8b \n" // multiply R
- "umull v3.8h, v3.8b, v7.8b \n" // multiply A
- "rshrn v0.8b, v0.8h, #8 \n" // 16 bit to 8 bit B
- "rshrn v1.8b, v1.8h, #8 \n" // 16 bit to 8 bit G
- "rshrn v2.8b, v2.8h, #8 \n" // 16 bit to 8 bit R
- "rshrn v3.8b, v3.8h, #8 \n" // 16 bit to 8 bit A
- MEMACCESS(2)
- "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" // store 8 ARGB pixels
- "b.gt 1b \n"
-
- : "+r"(src_argb0), // %0
- "+r"(src_argb1), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- :
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"
- );
-}
-#endif // HAS_ARGBMULTIPLYROW_NEON
-
-// Add 2 rows of ARGB pixels together, 8 pixels at a time.
-#ifdef HAS_ARGBADDROW_NEON
-void ARGBAddRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
- asm volatile (
- // 8 pixel loop.
- "1: \n"
- MEMACCESS(0)
- "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB pixels.
- MEMACCESS(1)
- "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%1], #32 \n" // load 8 more pixels.
- "subs %w3, %w3, #8 \n" // 8 processed per loop.
- "uqadd v0.8b, v0.8b, v4.8b \n"
- "uqadd v1.8b, v1.8b, v5.8b \n"
- "uqadd v2.8b, v2.8b, v6.8b \n"
- "uqadd v3.8b, v3.8b, v7.8b \n"
- MEMACCESS(2)
- "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" // store 8 ARGB pixels
- "b.gt 1b \n"
-
- : "+r"(src_argb0), // %0
- "+r"(src_argb1), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- :
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"
- );
-}
-#endif // HAS_ARGBADDROW_NEON
-
-// Subtract 2 rows of ARGB pixels, 8 pixels at a time.
-#ifdef HAS_ARGBSUBTRACTROW_NEON
-void ARGBSubtractRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
- asm volatile (
- // 8 pixel loop.
- "1: \n"
- MEMACCESS(0)
- "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB pixels.
- MEMACCESS(1)
- "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%1], #32 \n" // load 8 more pixels.
- "subs %w3, %w3, #8 \n" // 8 processed per loop.
- "uqsub v0.8b, v0.8b, v4.8b \n"
- "uqsub v1.8b, v1.8b, v5.8b \n"
- "uqsub v2.8b, v2.8b, v6.8b \n"
- "uqsub v3.8b, v3.8b, v7.8b \n"
- MEMACCESS(2)
- "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" // store 8 ARGB pixels
- "b.gt 1b \n"
-
- : "+r"(src_argb0), // %0
- "+r"(src_argb1), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- :
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"
- );
-}
-#endif // HAS_ARGBSUBTRACTROW_NEON
-
-// Adds Sobel X and Sobel Y and stores Sobel into ARGB.
-// A = 255
-// R = Sobel
-// G = Sobel
-// B = Sobel
-#ifdef HAS_SOBELROW_NEON
-void SobelRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_argb, int width) {
- asm volatile (
- "movi v3.8b, #255 \n" // alpha
- // 8 pixel loop.
- "1: \n"
- MEMACCESS(0)
- "ld1 {v0.8b}, [%0], #8 \n" // load 8 sobelx.
- MEMACCESS(1)
- "ld1 {v1.8b}, [%1], #8 \n" // load 8 sobely.
- "subs %w3, %w3, #8 \n" // 8 processed per loop.
- "uqadd v0.8b, v0.8b, v1.8b \n" // add
- "orr v1.8b, v0.8b, v0.8b \n"
- "orr v2.8b, v0.8b, v0.8b \n"
- MEMACCESS(2)
- "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" // store 8 ARGB pixels
- "b.gt 1b \n"
- : "+r"(src_sobelx), // %0
- "+r"(src_sobely), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- :
- : "cc", "memory", "v0", "v1", "v2", "v3"
- );
-}
-#endif // HAS_SOBELROW_NEON
-
-// Adds Sobel X and Sobel Y and stores Sobel into plane.
-#ifdef HAS_SOBELTOPLANEROW_NEON
-void SobelToPlaneRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_y, int width) {
- asm volatile (
- // 16 pixel loop.
- "1: \n"
- MEMACCESS(0)
- "ld1 {v0.16b}, [%0], #16 \n" // load 16 sobelx.
- MEMACCESS(1)
- "ld1 {v1.16b}, [%1], #16 \n" // load 16 sobely.
- "subs %w3, %w3, #16 \n" // 16 processed per loop.
- "uqadd v0.16b, v0.16b, v1.16b \n" // add
- MEMACCESS(2)
- "st1 {v0.16b}, [%2], #16 \n" // store 16 pixels.
- "b.gt 1b \n"
- : "+r"(src_sobelx), // %0
- "+r"(src_sobely), // %1
- "+r"(dst_y), // %2
- "+r"(width) // %3
- :
- : "cc", "memory", "v0", "v1"
- );
-}
-#endif // HAS_SOBELTOPLANEROW_NEON
-
-// Mixes Sobel X, Sobel Y and Sobel into ARGB.
-// A = 255
-// R = Sobel X
-// G = Sobel
-// B = Sobel Y
-#ifdef HAS_SOBELXYROW_NEON
-void SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_argb, int width) {
- asm volatile (
- "movi v3.8b, #255 \n" // alpha
- // 8 pixel loop.
- "1: \n"
- MEMACCESS(0)
- "ld1 {v2.8b}, [%0], #8 \n" // load 8 sobelx.
- MEMACCESS(1)
- "ld1 {v0.8b}, [%1], #8 \n" // load 8 sobely.
- "subs %w3, %w3, #8 \n" // 8 processed per loop.
- "uqadd v1.8b, v0.8b, v2.8b \n" // add
- MEMACCESS(2)
- "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" // store 8 ARGB pixels
- "b.gt 1b \n"
- : "+r"(src_sobelx), // %0
- "+r"(src_sobely), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- :
- : "cc", "memory", "v0", "v1", "v2", "v3"
- );
-}
-#endif // HAS_SOBELXYROW_NEON
-
-// SobelX as a matrix is
-// -1 0 1
-// -2 0 2
-// -1 0 1
-#ifdef HAS_SOBELXROW_NEON
-void SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1,
- const uint8* src_y2, uint8* dst_sobelx, int width) {
- asm volatile (
- "1: \n"
- MEMACCESS(0)
- "ld1 {v0.8b}, [%0],%5 \n" // top
- MEMACCESS(0)
- "ld1 {v1.8b}, [%0],%6 \n"
- "usubl v0.8h, v0.8b, v1.8b \n"
- MEMACCESS(1)
- "ld1 {v2.8b}, [%1],%5 \n" // center * 2
- MEMACCESS(1)
- "ld1 {v3.8b}, [%1],%6 \n"
- "usubl v1.8h, v2.8b, v3.8b \n"
- "add v0.8h, v0.8h, v1.8h \n"
- "add v0.8h, v0.8h, v1.8h \n"
- MEMACCESS(2)
- "ld1 {v2.8b}, [%2],%5 \n" // bottom
- MEMACCESS(2)
- "ld1 {v3.8b}, [%2],%6 \n"
- "subs %w4, %w4, #8 \n" // 8 pixels
- "usubl v1.8h, v2.8b, v3.8b \n"
- "add v0.8h, v0.8h, v1.8h \n"
- "abs v0.8h, v0.8h \n"
- "uqxtn v0.8b, v0.8h \n"
- MEMACCESS(3)
- "st1 {v0.8b}, [%3], #8 \n" // store 8 sobelx
- "b.gt 1b \n"
- : "+r"(src_y0), // %0
- "+r"(src_y1), // %1
- "+r"(src_y2), // %2
- "+r"(dst_sobelx), // %3
- "+r"(width) // %4
- : "r"(2LL), // %5
- "r"(6LL) // %6
- : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
- );
-}
-#endif // HAS_SOBELXROW_NEON
-
-// SobelY as a matrix is
-// -1 -2 -1
-// 0 0 0
-// 1 2 1
-#ifdef HAS_SOBELYROW_NEON
-void SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1,
- uint8* dst_sobely, int width) {
- asm volatile (
- "1: \n"
- MEMACCESS(0)
- "ld1 {v0.8b}, [%0],%4 \n" // left
- MEMACCESS(1)
- "ld1 {v1.8b}, [%1],%4 \n"
- "usubl v0.8h, v0.8b, v1.8b \n"
- MEMACCESS(0)
- "ld1 {v2.8b}, [%0],%4 \n" // center * 2
- MEMACCESS(1)
- "ld1 {v3.8b}, [%1],%4 \n"
- "usubl v1.8h, v2.8b, v3.8b \n"
- "add v0.8h, v0.8h, v1.8h \n"
- "add v0.8h, v0.8h, v1.8h \n"
- MEMACCESS(0)
- "ld1 {v2.8b}, [%0],%5 \n" // right
- MEMACCESS(1)
- "ld1 {v3.8b}, [%1],%5 \n"
- "subs %w3, %w3, #8 \n" // 8 pixels
- "usubl v1.8h, v2.8b, v3.8b \n"
- "add v0.8h, v0.8h, v1.8h \n"
- "abs v0.8h, v0.8h \n"
- "uqxtn v0.8b, v0.8h \n"
- MEMACCESS(2)
- "st1 {v0.8b}, [%2], #8 \n" // store 8 sobely
- "b.gt 1b \n"
- : "+r"(src_y0), // %0
- "+r"(src_y1), // %1
- "+r"(dst_sobely), // %2
- "+r"(width) // %3
- : "r"(1LL), // %4
- "r"(6LL) // %5
- : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
- );
-}
-#endif // HAS_SOBELYROW_NEON
-#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/third_party/aom/third_party/libyuv/source/row_win.cc b/third_party/aom/third_party/libyuv/source/row_win.cc
deleted file mode 100644
index 71be268b4..000000000
--- a/third_party/aom/third_party/libyuv/source/row_win.cc
+++ /dev/null
@@ -1,6331 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/row.h"
-
-#if !defined(LIBYUV_DISABLE_X86) && defined(_M_X64) && \
- defined(_MSC_VER) && !defined(__clang__)
-#include <emmintrin.h>
-#include <tmmintrin.h> // For _mm_maddubs_epi16
-#endif
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// This module is for Visual C.
-#if !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) && \
- defined(_MSC_VER) && !defined(__clang__)
-
-struct YuvConstants {
- lvec8 kUVToB; // 0
- lvec8 kUVToG; // 32
- lvec8 kUVToR; // 64
- lvec16 kUVBiasB; // 96
- lvec16 kUVBiasG; // 128
- lvec16 kUVBiasR; // 160
- lvec16 kYToRgb; // 192
-};
-
-// BT.601 YUV to RGB reference
-// R = (Y - 16) * 1.164 - V * -1.596
-// G = (Y - 16) * 1.164 - U * 0.391 - V * 0.813
-// B = (Y - 16) * 1.164 - U * -2.018
-
-// Y contribution to R,G,B. Scale and bias.
-// TODO(fbarchard): Consider moving constants into a common header.
-#define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
-#define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
-
-// U and V contributions to R,G,B.
-#define UB -128 /* max(-128, round(-2.018 * 64)) */
-#define UG 25 /* round(0.391 * 64) */
-#define VG 52 /* round(0.813 * 64) */
-#define VR -102 /* round(-1.596 * 64) */
-
-// Bias values to subtract 16 from Y and 128 from U and V.
-#define BB (UB * 128 + YGB)
-#define BG (UG * 128 + VG * 128 + YGB)
-#define BR (VR * 128 + YGB)
-
-// BT601 constants for YUV to RGB.
-static YuvConstants SIMD_ALIGNED(kYuvConstants) = {
- { UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
- UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0 },
- { UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
- UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG },
- { 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR,
- 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR },
- { BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB },
- { BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG },
- { BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR },
- { YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG }
-};
-
-// BT601 constants for NV21 where chroma plane is VU instead of UV.
-static YuvConstants SIMD_ALIGNED(kYvuConstants) = {
- { 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB,
- 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB },
- { VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
- VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG },
- { VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
- VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0 },
- { BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB },
- { BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG },
- { BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR },
- { YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG }
-};
-
-#undef YG
-#undef YGB
-#undef UB
-#undef UG
-#undef VG
-#undef VR
-#undef BB
-#undef BG
-#undef BR
-
-// JPEG YUV to RGB reference
-// * R = Y - V * -1.40200
-// * G = Y - U * 0.34414 - V * 0.71414
-// * B = Y - U * -1.77200
-
-// Y contribution to R,G,B. Scale and bias.
-// TODO(fbarchard): Consider moving constants into a common header.
-#define YGJ 16320 /* round(1.000 * 64 * 256 * 256 / 257) */
-#define YGBJ 32 /* 64 / 2 */
-
-// U and V contributions to R,G,B.
-#define UBJ -113 /* round(-1.77200 * 64) */
-#define UGJ 22 /* round(0.34414 * 64) */
-#define VGJ 46 /* round(0.71414 * 64) */
-#define VRJ -90 /* round(-1.40200 * 64) */
-
-// Bias values to subtract 16 from Y and 128 from U and V.
-#define BBJ (UBJ * 128 + YGBJ)
-#define BGJ (UGJ * 128 + VGJ * 128 + YGBJ)
-#define BRJ (VRJ * 128 + YGBJ)
-
-// JPEG constants for YUV to RGB.
-static YuvConstants SIMD_ALIGNED(kYuvJConstants) = {
- { UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0,
- UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0 },
- { UGJ, VGJ, UGJ, VGJ, UGJ, VGJ, UGJ, VGJ,
- UGJ, VGJ, UGJ, VGJ, UGJ, VGJ, UGJ, VGJ,
- UGJ, VGJ, UGJ, VGJ, UGJ, VGJ, UGJ, VGJ,
- UGJ, VGJ, UGJ, VGJ, UGJ, VGJ, UGJ, VGJ },
- { 0, VRJ, 0, VRJ, 0, VRJ, 0, VRJ, 0, VRJ, 0, VRJ, 0, VRJ, 0, VRJ,
- 0, VRJ, 0, VRJ, 0, VRJ, 0, VRJ, 0, VRJ, 0, VRJ, 0, VRJ, 0, VRJ },
- { BBJ, BBJ, BBJ, BBJ, BBJ, BBJ, BBJ, BBJ,
- BBJ, BBJ, BBJ, BBJ, BBJ, BBJ, BBJ, BBJ },
- { BGJ, BGJ, BGJ, BGJ, BGJ, BGJ, BGJ, BGJ,
- BGJ, BGJ, BGJ, BGJ, BGJ, BGJ, BGJ, BGJ },
- { BRJ, BRJ, BRJ, BRJ, BRJ, BRJ, BRJ, BRJ,
- BRJ, BRJ, BRJ, BRJ, BRJ, BRJ, BRJ, BRJ },
- { YGJ, YGJ, YGJ, YGJ, YGJ, YGJ, YGJ, YGJ,
- YGJ, YGJ, YGJ, YGJ, YGJ, YGJ, YGJ, YGJ }
-};
-
-#undef YGJ
-#undef YGBJ
-#undef UBJ
-#undef UGJ
-#undef VGJ
-#undef VRJ
-#undef BBJ
-#undef BGJ
-#undef BRJ
-
-// 64 bit
-#if defined(_M_X64)
-#if defined(HAS_I422TOARGBROW_SSSE3)
-void I422ToARGBRow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- int width) {
- __m128i xmm0, xmm1, xmm2, xmm3;
- const __m128i xmm5 = _mm_set1_epi8(-1);
- const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf;
-
- while (width > 0) {
- xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf);
- xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset));
- xmm0 = _mm_unpacklo_epi8(xmm0, xmm1);
- xmm0 = _mm_unpacklo_epi16(xmm0, xmm0);
- xmm1 = _mm_loadu_si128(&xmm0);
- xmm2 = _mm_loadu_si128(&xmm0);
- xmm0 = _mm_maddubs_epi16(xmm0, *(__m128i*)kYuvConstants.kUVToB);
- xmm1 = _mm_maddubs_epi16(xmm1, *(__m128i*)kYuvConstants.kUVToG);
- xmm2 = _mm_maddubs_epi16(xmm2, *(__m128i*)kYuvConstants.kUVToR);
- xmm0 = _mm_sub_epi16(*(__m128i*)kYuvConstants.kUVBiasB, xmm0);
- xmm1 = _mm_sub_epi16(*(__m128i*)kYuvConstants.kUVBiasG, xmm1);
- xmm2 = _mm_sub_epi16(*(__m128i*)kYuvConstants.kUVBiasR, xmm2);
- xmm3 = _mm_loadl_epi64((__m128i*)y_buf);
- xmm3 = _mm_unpacklo_epi8(xmm3, xmm3);
- xmm3 = _mm_mulhi_epu16(xmm3, *(__m128i*)kYuvConstants.kYToRgb);
- xmm0 = _mm_adds_epi16(xmm0, xmm3);
- xmm1 = _mm_adds_epi16(xmm1, xmm3);
- xmm2 = _mm_adds_epi16(xmm2, xmm3);
- xmm0 = _mm_srai_epi16(xmm0, 6);
- xmm1 = _mm_srai_epi16(xmm1, 6);
- xmm2 = _mm_srai_epi16(xmm2, 6);
- xmm0 = _mm_packus_epi16(xmm0, xmm0);
- xmm1 = _mm_packus_epi16(xmm1, xmm1);
- xmm2 = _mm_packus_epi16(xmm2, xmm2);
- xmm0 = _mm_unpacklo_epi8(xmm0, xmm1);
- xmm2 = _mm_unpacklo_epi8(xmm2, xmm5);
- xmm1 = _mm_loadu_si128(&xmm0);
- xmm0 = _mm_unpacklo_epi16(xmm0, xmm2);
- xmm1 = _mm_unpackhi_epi16(xmm1, xmm2);
-
- _mm_storeu_si128((__m128i *)dst_argb, xmm0);
- _mm_storeu_si128((__m128i *)(dst_argb + 16), xmm1);
-
- y_buf += 8;
- u_buf += 4;
- dst_argb += 32;
- width -= 8;
- }
-}
-#endif
-// 32 bit
-#else // defined(_M_X64)
-#ifdef HAS_ARGBTOYROW_SSSE3
-
-// Constants for ARGB.
-static const vec8 kARGBToY = {
- 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0
-};
-
-// JPeg full range.
-static const vec8 kARGBToYJ = {
- 15, 75, 38, 0, 15, 75, 38, 0, 15, 75, 38, 0, 15, 75, 38, 0
-};
-
-static const vec8 kARGBToU = {
- 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0
-};
-
-static const vec8 kARGBToUJ = {
- 127, -84, -43, 0, 127, -84, -43, 0, 127, -84, -43, 0, 127, -84, -43, 0
-};
-
-static const vec8 kARGBToV = {
- -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0,
-};
-
-static const vec8 kARGBToVJ = {
- -20, -107, 127, 0, -20, -107, 127, 0, -20, -107, 127, 0, -20, -107, 127, 0
-};
-
-// vpshufb for vphaddw + vpackuswb packed to shorts.
-static const lvec8 kShufARGBToUV_AVX = {
- 0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15,
- 0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15
-};
-
-// Constants for BGRA.
-static const vec8 kBGRAToY = {
- 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13
-};
-
-static const vec8 kBGRAToU = {
- 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112
-};
-
-static const vec8 kBGRAToV = {
- 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18
-};
-
-// Constants for ABGR.
-static const vec8 kABGRToY = {
- 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0
-};
-
-static const vec8 kABGRToU = {
- -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0
-};
-
-static const vec8 kABGRToV = {
- 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0
-};
-
-// Constants for RGBA.
-static const vec8 kRGBAToY = {
- 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33
-};
-
-static const vec8 kRGBAToU = {
- 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38
-};
-
-static const vec8 kRGBAToV = {
- 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112
-};
-
-static const uvec8 kAddY16 = {
- 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u
-};
-
-// 7 bit fixed point 0.5.
-static const vec16 kAddYJ64 = {
- 64, 64, 64, 64, 64, 64, 64, 64
-};
-
-static const uvec8 kAddUV128 = {
- 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u,
- 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u
-};
-
-static const uvec16 kAddUVJ128 = {
- 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u
-};
-
-// Shuffle table for converting RGB24 to ARGB.
-static const uvec8 kShuffleMaskRGB24ToARGB = {
- 0u, 1u, 2u, 12u, 3u, 4u, 5u, 13u, 6u, 7u, 8u, 14u, 9u, 10u, 11u, 15u
-};
-
-// Shuffle table for converting RAW to ARGB.
-static const uvec8 kShuffleMaskRAWToARGB = {
- 2u, 1u, 0u, 12u, 5u, 4u, 3u, 13u, 8u, 7u, 6u, 14u, 11u, 10u, 9u, 15u
-};
-
-// Shuffle table for converting ARGB to RGB24.
-static const uvec8 kShuffleMaskARGBToRGB24 = {
- 0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 10u, 12u, 13u, 14u, 128u, 128u, 128u, 128u
-};
-
-// Shuffle table for converting ARGB to RAW.
-static const uvec8 kShuffleMaskARGBToRAW = {
- 2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 8u, 14u, 13u, 12u, 128u, 128u, 128u, 128u
-};
-
-// Shuffle table for converting ARGBToRGB24 for I422ToRGB24. First 8 + next 4
-static const uvec8 kShuffleMaskARGBToRGB24_0 = {
- 0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 128u, 128u, 128u, 128u, 10u, 12u, 13u, 14u
-};
-
-// Shuffle table for converting ARGB to RAW.
-static const uvec8 kShuffleMaskARGBToRAW_0 = {
- 2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 128u, 128u, 128u, 128u, 8u, 14u, 13u, 12u
-};
-
-// Duplicates gray value 3 times and fills in alpha opaque.
-__declspec(naked)
-void J400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {
- __asm {
- mov eax, [esp + 4] // src_y
- mov edx, [esp + 8] // dst_argb
- mov ecx, [esp + 12] // pix
- pcmpeqb xmm5, xmm5 // generate mask 0xff000000
- pslld xmm5, 24
-
- convertloop:
- movq xmm0, qword ptr [eax]
- lea eax, [eax + 8]
- punpcklbw xmm0, xmm0
- movdqa xmm1, xmm0
- punpcklwd xmm0, xmm0
- punpckhwd xmm1, xmm1
- por xmm0, xmm5
- por xmm1, xmm5
- movdqu [edx], xmm0
- movdqu [edx + 16], xmm1
- lea edx, [edx + 32]
- sub ecx, 8
- jg convertloop
- ret
- }
-}
-
-#ifdef HAS_J400TOARGBROW_AVX2
-// Duplicates gray value 3 times and fills in alpha opaque.
-__declspec(naked)
-void J400ToARGBRow_AVX2(const uint8* src_y, uint8* dst_argb, int pix) {
- __asm {
- mov eax, [esp + 4] // src_y
- mov edx, [esp + 8] // dst_argb
- mov ecx, [esp + 12] // pix
- vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0xff000000
- vpslld ymm5, ymm5, 24
-
- convertloop:
- vmovdqu xmm0, [eax]
- lea eax, [eax + 16]
- vpermq ymm0, ymm0, 0xd8
- vpunpcklbw ymm0, ymm0, ymm0
- vpermq ymm0, ymm0, 0xd8
- vpunpckhwd ymm1, ymm0, ymm0
- vpunpcklwd ymm0, ymm0, ymm0
- vpor ymm0, ymm0, ymm5
- vpor ymm1, ymm1, ymm5
- vmovdqu [edx], ymm0
- vmovdqu [edx + 32], ymm1
- lea edx, [edx + 64]
- sub ecx, 16
- jg convertloop
- vzeroupper
- ret
- }
-}
-#endif // HAS_J400TOARGBROW_AVX2
-
-__declspec(naked)
-void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix) {
- __asm {
- mov eax, [esp + 4] // src_rgb24
- mov edx, [esp + 8] // dst_argb
- mov ecx, [esp + 12] // pix
- pcmpeqb xmm5, xmm5 // generate mask 0xff000000
- pslld xmm5, 24
- movdqa xmm4, kShuffleMaskRGB24ToARGB
-
- convertloop:
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- movdqu xmm3, [eax + 32]
- lea eax, [eax + 48]
- movdqa xmm2, xmm3
- palignr xmm2, xmm1, 8 // xmm2 = { xmm3[0:3] xmm1[8:15]}
- pshufb xmm2, xmm4
- por xmm2, xmm5
- palignr xmm1, xmm0, 12 // xmm1 = { xmm3[0:7] xmm0[12:15]}
- pshufb xmm0, xmm4
- movdqu [edx + 32], xmm2
- por xmm0, xmm5
- pshufb xmm1, xmm4
- movdqu [edx], xmm0
- por xmm1, xmm5
- palignr xmm3, xmm3, 4 // xmm3 = { xmm3[4:15]}
- pshufb xmm3, xmm4
- movdqu [edx + 16], xmm1
- por xmm3, xmm5
- movdqu [edx + 48], xmm3
- lea edx, [edx + 64]
- sub ecx, 16
- jg convertloop
- ret
- }
-}
-
-__declspec(naked)
-void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb,
- int pix) {
- __asm {
- mov eax, [esp + 4] // src_raw
- mov edx, [esp + 8] // dst_argb
- mov ecx, [esp + 12] // pix
- pcmpeqb xmm5, xmm5 // generate mask 0xff000000
- pslld xmm5, 24
- movdqa xmm4, kShuffleMaskRAWToARGB
-
- convertloop:
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- movdqu xmm3, [eax + 32]
- lea eax, [eax + 48]
- movdqa xmm2, xmm3
- palignr xmm2, xmm1, 8 // xmm2 = { xmm3[0:3] xmm1[8:15]}
- pshufb xmm2, xmm4
- por xmm2, xmm5
- palignr xmm1, xmm0, 12 // xmm1 = { xmm3[0:7] xmm0[12:15]}
- pshufb xmm0, xmm4
- movdqu [edx + 32], xmm2
- por xmm0, xmm5
- pshufb xmm1, xmm4
- movdqu [edx], xmm0
- por xmm1, xmm5
- palignr xmm3, xmm3, 4 // xmm3 = { xmm3[4:15]}
- pshufb xmm3, xmm4
- movdqu [edx + 16], xmm1
- por xmm3, xmm5
- movdqu [edx + 48], xmm3
- lea edx, [edx + 64]
- sub ecx, 16
- jg convertloop
- ret
- }
-}
-
-// pmul method to replicate bits.
-// Math to replicate bits:
-// (v << 8) | (v << 3)
-// v * 256 + v * 8
-// v * (256 + 8)
-// G shift of 5 is incorporated, so shift is 5 + 8 and 5 + 3
-// 20 instructions.
-__declspec(naked)
-void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb,
- int pix) {
- __asm {
- mov eax, 0x01080108 // generate multiplier to repeat 5 bits
- movd xmm5, eax
- pshufd xmm5, xmm5, 0
- mov eax, 0x20802080 // multiplier shift by 5 and then repeat 6 bits
- movd xmm6, eax
- pshufd xmm6, xmm6, 0
- pcmpeqb xmm3, xmm3 // generate mask 0xf800f800 for Red
- psllw xmm3, 11
- pcmpeqb xmm4, xmm4 // generate mask 0x07e007e0 for Green
- psllw xmm4, 10
- psrlw xmm4, 5
- pcmpeqb xmm7, xmm7 // generate mask 0xff00ff00 for Alpha
- psllw xmm7, 8
-
- mov eax, [esp + 4] // src_rgb565
- mov edx, [esp + 8] // dst_argb
- mov ecx, [esp + 12] // pix
- sub edx, eax
- sub edx, eax
-
- convertloop:
- movdqu xmm0, [eax] // fetch 8 pixels of bgr565
- movdqa xmm1, xmm0
- movdqa xmm2, xmm0
- pand xmm1, xmm3 // R in upper 5 bits
- psllw xmm2, 11 // B in upper 5 bits
- pmulhuw xmm1, xmm5 // * (256 + 8)
- pmulhuw xmm2, xmm5 // * (256 + 8)
- psllw xmm1, 8
- por xmm1, xmm2 // RB
- pand xmm0, xmm4 // G in middle 6 bits
- pmulhuw xmm0, xmm6 // << 5 * (256 + 4)
- por xmm0, xmm7 // AG
- movdqa xmm2, xmm1
- punpcklbw xmm1, xmm0
- punpckhbw xmm2, xmm0
- movdqu [eax * 2 + edx], xmm1 // store 4 pixels of ARGB
- movdqu [eax * 2 + edx + 16], xmm2 // store next 4 pixels of ARGB
- lea eax, [eax + 16]
- sub ecx, 8
- jg convertloop
- ret
- }
-}
-
-#ifdef HAS_RGB565TOARGBROW_AVX2
-// pmul method to replicate bits.
-// Math to replicate bits:
-// (v << 8) | (v << 3)
-// v * 256 + v * 8
-// v * (256 + 8)
-// G shift of 5 is incorporated, so shift is 5 + 8 and 5 + 3
-__declspec(naked)
-void RGB565ToARGBRow_AVX2(const uint8* src_rgb565, uint8* dst_argb,
- int pix) {
- __asm {
- mov eax, 0x01080108 // generate multiplier to repeat 5 bits
- vmovd xmm5, eax
- vbroadcastss ymm5, xmm5
- mov eax, 0x20802080 // multiplier shift by 5 and then repeat 6 bits
- movd xmm6, eax
- vbroadcastss ymm6, xmm6
- vpcmpeqb ymm3, ymm3, ymm3 // generate mask 0xf800f800 for Red
- vpsllw ymm3, ymm3, 11
- vpcmpeqb ymm4, ymm4, ymm4 // generate mask 0x07e007e0 for Green
- vpsllw ymm4, ymm4, 10
- vpsrlw ymm4, ymm4, 5
- vpcmpeqb ymm7, ymm7, ymm7 // generate mask 0xff00ff00 for Alpha
- vpsllw ymm7, ymm7, 8
-
- mov eax, [esp + 4] // src_rgb565
- mov edx, [esp + 8] // dst_argb
- mov ecx, [esp + 12] // pix
- sub edx, eax
- sub edx, eax
-
- convertloop:
- vmovdqu ymm0, [eax] // fetch 16 pixels of bgr565
- vpand ymm1, ymm0, ymm3 // R in upper 5 bits
- vpsllw ymm2, ymm0, 11 // B in upper 5 bits
- vpmulhuw ymm1, ymm1, ymm5 // * (256 + 8)
- vpmulhuw ymm2, ymm2, ymm5 // * (256 + 8)
- vpsllw ymm1, ymm1, 8
- vpor ymm1, ymm1, ymm2 // RB
- vpand ymm0, ymm0, ymm4 // G in middle 6 bits
- vpmulhuw ymm0, ymm0, ymm6 // << 5 * (256 + 4)
- vpor ymm0, ymm0, ymm7 // AG
- vpermq ymm0, ymm0, 0xd8 // mutate for unpack
- vpermq ymm1, ymm1, 0xd8
- vpunpckhbw ymm2, ymm1, ymm0
- vpunpcklbw ymm1, ymm1, ymm0
- vmovdqu [eax * 2 + edx], ymm1 // store 4 pixels of ARGB
- vmovdqu [eax * 2 + edx + 32], ymm2 // store next 4 pixels of ARGB
- lea eax, [eax + 32]
- sub ecx, 16
- jg convertloop
- vzeroupper
- ret
- }
-}
-#endif // HAS_RGB565TOARGBROW_AVX2
-
-#ifdef HAS_ARGB1555TOARGBROW_AVX2
-__declspec(naked)
-void ARGB1555ToARGBRow_AVX2(const uint8* src_argb1555, uint8* dst_argb,
- int pix) {
- __asm {
- mov eax, 0x01080108 // generate multiplier to repeat 5 bits
- vmovd xmm5, eax
- vbroadcastss ymm5, xmm5
- mov eax, 0x42004200 // multiplier shift by 6 and then repeat 5 bits
- movd xmm6, eax
- vbroadcastss ymm6, xmm6
- vpcmpeqb ymm3, ymm3, ymm3 // generate mask 0xf800f800 for Red
- vpsllw ymm3, ymm3, 11
- vpsrlw ymm4, ymm3, 6 // generate mask 0x03e003e0 for Green
- vpcmpeqb ymm7, ymm7, ymm7 // generate mask 0xff00ff00 for Alpha
- vpsllw ymm7, ymm7, 8
-
- mov eax, [esp + 4] // src_argb1555
- mov edx, [esp + 8] // dst_argb
- mov ecx, [esp + 12] // pix
- sub edx, eax
- sub edx, eax
-
- convertloop:
- vmovdqu ymm0, [eax] // fetch 16 pixels of 1555
- vpsllw ymm1, ymm0, 1 // R in upper 5 bits
- vpsllw ymm2, ymm0, 11 // B in upper 5 bits
- vpand ymm1, ymm1, ymm3
- vpmulhuw ymm2, ymm2, ymm5 // * (256 + 8)
- vpmulhuw ymm1, ymm1, ymm5 // * (256 + 8)
- vpsllw ymm1, ymm1, 8
- vpor ymm1, ymm1, ymm2 // RB
- vpsraw ymm2, ymm0, 8 // A
- vpand ymm0, ymm0, ymm4 // G in middle 5 bits
- vpmulhuw ymm0, ymm0, ymm6 // << 6 * (256 + 8)
- vpand ymm2, ymm2, ymm7
- vpor ymm0, ymm0, ymm2 // AG
- vpermq ymm0, ymm0, 0xd8 // mutate for unpack
- vpermq ymm1, ymm1, 0xd8
- vpunpckhbw ymm2, ymm1, ymm0
- vpunpcklbw ymm1, ymm1, ymm0
- vmovdqu [eax * 2 + edx], ymm1 // store 8 pixels of ARGB
- vmovdqu [eax * 2 + edx + 32], ymm2 // store next 8 pixels of ARGB
- lea eax, [eax + 32]
- sub ecx, 16
- jg convertloop
- vzeroupper
- ret
- }
-}
-#endif // HAS_ARGB1555TOARGBROW_AVX2
-
-#ifdef HAS_ARGB4444TOARGBROW_AVX2
-__declspec(naked)
-void ARGB4444ToARGBRow_AVX2(const uint8* src_argb4444, uint8* dst_argb,
- int pix) {
- __asm {
- mov eax, 0x0f0f0f0f // generate mask 0x0f0f0f0f
- vmovd xmm4, eax
- vbroadcastss ymm4, xmm4
- vpslld ymm5, ymm4, 4 // 0xf0f0f0f0 for high nibbles
- mov eax, [esp + 4] // src_argb4444
- mov edx, [esp + 8] // dst_argb
- mov ecx, [esp + 12] // pix
- sub edx, eax
- sub edx, eax
-
- convertloop:
- vmovdqu ymm0, [eax] // fetch 16 pixels of bgra4444
- vpand ymm2, ymm0, ymm5 // mask high nibbles
- vpand ymm0, ymm0, ymm4 // mask low nibbles
- vpsrlw ymm3, ymm2, 4
- vpsllw ymm1, ymm0, 4
- vpor ymm2, ymm2, ymm3
- vpor ymm0, ymm0, ymm1
- vpermq ymm0, ymm0, 0xd8 // mutate for unpack
- vpermq ymm2, ymm2, 0xd8
- vpunpckhbw ymm1, ymm0, ymm2
- vpunpcklbw ymm0, ymm0, ymm2
- vmovdqu [eax * 2 + edx], ymm0 // store 8 pixels of ARGB
- vmovdqu [eax * 2 + edx + 32], ymm1 // store next 8 pixels of ARGB
- lea eax, [eax + 32]
- sub ecx, 16
- jg convertloop
- vzeroupper
- ret
- }
-}
-#endif // HAS_ARGB4444TOARGBROW_AVX2
-
-// 24 instructions
-__declspec(naked)
-void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555, uint8* dst_argb,
- int pix) {
- __asm {
- mov eax, 0x01080108 // generate multiplier to repeat 5 bits
- movd xmm5, eax
- pshufd xmm5, xmm5, 0
- mov eax, 0x42004200 // multiplier shift by 6 and then repeat 5 bits
- movd xmm6, eax
- pshufd xmm6, xmm6, 0
- pcmpeqb xmm3, xmm3 // generate mask 0xf800f800 for Red
- psllw xmm3, 11
- movdqa xmm4, xmm3 // generate mask 0x03e003e0 for Green
- psrlw xmm4, 6
- pcmpeqb xmm7, xmm7 // generate mask 0xff00ff00 for Alpha
- psllw xmm7, 8
-
- mov eax, [esp + 4] // src_argb1555
- mov edx, [esp + 8] // dst_argb
- mov ecx, [esp + 12] // pix
- sub edx, eax
- sub edx, eax
-
- convertloop:
- movdqu xmm0, [eax] // fetch 8 pixels of 1555
- movdqa xmm1, xmm0
- movdqa xmm2, xmm0
- psllw xmm1, 1 // R in upper 5 bits
- psllw xmm2, 11 // B in upper 5 bits
- pand xmm1, xmm3
- pmulhuw xmm2, xmm5 // * (256 + 8)
- pmulhuw xmm1, xmm5 // * (256 + 8)
- psllw xmm1, 8
- por xmm1, xmm2 // RB
- movdqa xmm2, xmm0
- pand xmm0, xmm4 // G in middle 5 bits
- psraw xmm2, 8 // A
- pmulhuw xmm0, xmm6 // << 6 * (256 + 8)
- pand xmm2, xmm7
- por xmm0, xmm2 // AG
- movdqa xmm2, xmm1
- punpcklbw xmm1, xmm0
- punpckhbw xmm2, xmm0
- movdqu [eax * 2 + edx], xmm1 // store 4 pixels of ARGB
- movdqu [eax * 2 + edx + 16], xmm2 // store next 4 pixels of ARGB
- lea eax, [eax + 16]
- sub ecx, 8
- jg convertloop
- ret
- }
-}
-
-// 18 instructions.
-__declspec(naked)
-void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb,
- int pix) {
- __asm {
- mov eax, 0x0f0f0f0f // generate mask 0x0f0f0f0f
- movd xmm4, eax
- pshufd xmm4, xmm4, 0
- movdqa xmm5, xmm4 // 0xf0f0f0f0 for high nibbles
- pslld xmm5, 4
- mov eax, [esp + 4] // src_argb4444
- mov edx, [esp + 8] // dst_argb
- mov ecx, [esp + 12] // pix
- sub edx, eax
- sub edx, eax
-
- convertloop:
- movdqu xmm0, [eax] // fetch 8 pixels of bgra4444
- movdqa xmm2, xmm0
- pand xmm0, xmm4 // mask low nibbles
- pand xmm2, xmm5 // mask high nibbles
- movdqa xmm1, xmm0
- movdqa xmm3, xmm2
- psllw xmm1, 4
- psrlw xmm3, 4
- por xmm0, xmm1
- por xmm2, xmm3
- movdqa xmm1, xmm0
- punpcklbw xmm0, xmm2
- punpckhbw xmm1, xmm2
- movdqu [eax * 2 + edx], xmm0 // store 4 pixels of ARGB
- movdqu [eax * 2 + edx + 16], xmm1 // store next 4 pixels of ARGB
- lea eax, [eax + 16]
- sub ecx, 8
- jg convertloop
- ret
- }
-}
-
-__declspec(naked)
-void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix) {
- __asm {
- mov eax, [esp + 4] // src_argb
- mov edx, [esp + 8] // dst_rgb
- mov ecx, [esp + 12] // pix
- movdqa xmm6, kShuffleMaskARGBToRGB24
-
- convertloop:
- movdqu xmm0, [eax] // fetch 16 pixels of argb
- movdqu xmm1, [eax + 16]
- movdqu xmm2, [eax + 32]
- movdqu xmm3, [eax + 48]
- lea eax, [eax + 64]
- pshufb xmm0, xmm6 // pack 16 bytes of ARGB to 12 bytes of RGB
- pshufb xmm1, xmm6
- pshufb xmm2, xmm6
- pshufb xmm3, xmm6
- movdqa xmm4, xmm1 // 4 bytes from 1 for 0
- psrldq xmm1, 4 // 8 bytes from 1
- pslldq xmm4, 12 // 4 bytes from 1 for 0
- movdqa xmm5, xmm2 // 8 bytes from 2 for 1
- por xmm0, xmm4 // 4 bytes from 1 for 0
- pslldq xmm5, 8 // 8 bytes from 2 for 1
- movdqu [edx], xmm0 // store 0
- por xmm1, xmm5 // 8 bytes from 2 for 1
- psrldq xmm2, 8 // 4 bytes from 2
- pslldq xmm3, 4 // 12 bytes from 3 for 2
- por xmm2, xmm3 // 12 bytes from 3 for 2
- movdqu [edx + 16], xmm1 // store 1
- movdqu [edx + 32], xmm2 // store 2
- lea edx, [edx + 48]
- sub ecx, 16
- jg convertloop
- ret
- }
-}
-
-__declspec(naked)
-void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix) {
- __asm {
- mov eax, [esp + 4] // src_argb
- mov edx, [esp + 8] // dst_rgb
- mov ecx, [esp + 12] // pix
- movdqa xmm6, kShuffleMaskARGBToRAW
-
- convertloop:
- movdqu xmm0, [eax] // fetch 16 pixels of argb
- movdqu xmm1, [eax + 16]
- movdqu xmm2, [eax + 32]
- movdqu xmm3, [eax + 48]
- lea eax, [eax + 64]
- pshufb xmm0, xmm6 // pack 16 bytes of ARGB to 12 bytes of RGB
- pshufb xmm1, xmm6
- pshufb xmm2, xmm6
- pshufb xmm3, xmm6
- movdqa xmm4, xmm1 // 4 bytes from 1 for 0
- psrldq xmm1, 4 // 8 bytes from 1
- pslldq xmm4, 12 // 4 bytes from 1 for 0
- movdqa xmm5, xmm2 // 8 bytes from 2 for 1
- por xmm0, xmm4 // 4 bytes from 1 for 0
- pslldq xmm5, 8 // 8 bytes from 2 for 1
- movdqu [edx], xmm0 // store 0
- por xmm1, xmm5 // 8 bytes from 2 for 1
- psrldq xmm2, 8 // 4 bytes from 2
- pslldq xmm3, 4 // 12 bytes from 3 for 2
- por xmm2, xmm3 // 12 bytes from 3 for 2
- movdqu [edx + 16], xmm1 // store 1
- movdqu [edx + 32], xmm2 // store 2
- lea edx, [edx + 48]
- sub ecx, 16
- jg convertloop
- ret
- }
-}
-
-// 4 pixels
-__declspec(naked)
-void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
- __asm {
- mov eax, [esp + 4] // src_argb
- mov edx, [esp + 8] // dst_rgb
- mov ecx, [esp + 12] // pix
- pcmpeqb xmm3, xmm3 // generate mask 0x0000001f
- psrld xmm3, 27
- pcmpeqb xmm4, xmm4 // generate mask 0x000007e0
- psrld xmm4, 26
- pslld xmm4, 5
- pcmpeqb xmm5, xmm5 // generate mask 0xfffff800
- pslld xmm5, 11
-
- convertloop:
- movdqu xmm0, [eax] // fetch 4 pixels of argb
- movdqa xmm1, xmm0 // B
- movdqa xmm2, xmm0 // G
- pslld xmm0, 8 // R
- psrld xmm1, 3 // B
- psrld xmm2, 5 // G
- psrad xmm0, 16 // R
- pand xmm1, xmm3 // B
- pand xmm2, xmm4 // G
- pand xmm0, xmm5 // R
- por xmm1, xmm2 // BG
- por xmm0, xmm1 // BGR
- packssdw xmm0, xmm0
- lea eax, [eax + 16]
- movq qword ptr [edx], xmm0 // store 4 pixels of RGB565
- lea edx, [edx + 8]
- sub ecx, 4
- jg convertloop
- ret
- }
-}
-
-// 8 pixels
-__declspec(naked)
-void ARGBToRGB565DitherRow_SSE2(const uint8* src_argb, uint8* dst_rgb,
- const uint32 dither4, int pix) {
- __asm {
-
- mov eax, [esp + 4] // src_argb
- mov edx, [esp + 8] // dst_rgb
- movd xmm6, [esp + 12] // dither4
- mov ecx, [esp + 16] // pix
- punpcklbw xmm6, xmm6 // make dither 16 bytes
- movdqa xmm7, xmm6
- punpcklwd xmm6, xmm6
- punpckhwd xmm7, xmm7
- pcmpeqb xmm3, xmm3 // generate mask 0x0000001f
- psrld xmm3, 27
- pcmpeqb xmm4, xmm4 // generate mask 0x000007e0
- psrld xmm4, 26
- pslld xmm4, 5
- pcmpeqb xmm5, xmm5 // generate mask 0xfffff800
- pslld xmm5, 11
-
- convertloop:
- movdqu xmm0, [eax] // fetch 4 pixels of argb
- paddusb xmm0, xmm6 // add dither
- movdqa xmm1, xmm0 // B
- movdqa xmm2, xmm0 // G
- pslld xmm0, 8 // R
- psrld xmm1, 3 // B
- psrld xmm2, 5 // G
- psrad xmm0, 16 // R
- pand xmm1, xmm3 // B
- pand xmm2, xmm4 // G
- pand xmm0, xmm5 // R
- por xmm1, xmm2 // BG
- por xmm0, xmm1 // BGR
- packssdw xmm0, xmm0
- lea eax, [eax + 16]
- movq qword ptr [edx], xmm0 // store 4 pixels of RGB565
- lea edx, [edx + 8]
- sub ecx, 4
- jg convertloop
- ret
- }
-}
-
-#ifdef HAS_ARGBTORGB565DITHERROW_AVX2
-__declspec(naked)
-void ARGBToRGB565DitherRow_AVX2(const uint8* src_argb, uint8* dst_rgb,
- const uint32 dither4, int pix) {
- __asm {
- mov eax, [esp + 4] // src_argb
- mov edx, [esp + 8] // dst_rgb
- vbroadcastss xmm6, [esp + 12] // dither4
- mov ecx, [esp + 16] // pix
- vpunpcklbw xmm6, xmm6, xmm6 // make dither 32 bytes
- vpermq ymm6, ymm6, 0xd8
- vpunpcklwd ymm6, ymm6, ymm6
- vpcmpeqb ymm3, ymm3, ymm3 // generate mask 0x0000001f
- vpsrld ymm3, ymm3, 27
- vpcmpeqb ymm4, ymm4, ymm4 // generate mask 0x000007e0
- vpsrld ymm4, ymm4, 26
- vpslld ymm4, ymm4, 5
- vpslld ymm5, ymm3, 11 // generate mask 0x0000f800
-
- convertloop:
- vmovdqu ymm0, [eax] // fetch 8 pixels of argb
- vpaddusb ymm0, ymm0, ymm6 // add dither
- vpsrld ymm2, ymm0, 5 // G
- vpsrld ymm1, ymm0, 3 // B
- vpsrld ymm0, ymm0, 8 // R
- vpand ymm2, ymm2, ymm4 // G
- vpand ymm1, ymm1, ymm3 // B
- vpand ymm0, ymm0, ymm5 // R
- vpor ymm1, ymm1, ymm2 // BG
- vpor ymm0, ymm0, ymm1 // BGR
- vpackusdw ymm0, ymm0, ymm0
- vpermq ymm0, ymm0, 0xd8
- lea eax, [eax + 32]
- vmovdqu [edx], xmm0 // store 8 pixels of RGB565
- lea edx, [edx + 16]
- sub ecx, 8
- jg convertloop
- vzeroupper
- ret
- }
-}
-#endif // HAS_ARGBTORGB565DITHERROW_AVX2
-
-// TODO(fbarchard): Improve sign extension/packing.
-__declspec(naked)
-void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
- __asm {
- mov eax, [esp + 4] // src_argb
- mov edx, [esp + 8] // dst_rgb
- mov ecx, [esp + 12] // pix
- pcmpeqb xmm4, xmm4 // generate mask 0x0000001f
- psrld xmm4, 27
- movdqa xmm5, xmm4 // generate mask 0x000003e0
- pslld xmm5, 5
- movdqa xmm6, xmm4 // generate mask 0x00007c00
- pslld xmm6, 10
- pcmpeqb xmm7, xmm7 // generate mask 0xffff8000
- pslld xmm7, 15
-
- convertloop:
- movdqu xmm0, [eax] // fetch 4 pixels of argb
- movdqa xmm1, xmm0 // B
- movdqa xmm2, xmm0 // G
- movdqa xmm3, xmm0 // R
- psrad xmm0, 16 // A
- psrld xmm1, 3 // B
- psrld xmm2, 6 // G
- psrld xmm3, 9 // R
- pand xmm0, xmm7 // A
- pand xmm1, xmm4 // B
- pand xmm2, xmm5 // G
- pand xmm3, xmm6 // R
- por xmm0, xmm1 // BA
- por xmm2, xmm3 // GR
- por xmm0, xmm2 // BGRA
- packssdw xmm0, xmm0
- lea eax, [eax + 16]
- movq qword ptr [edx], xmm0 // store 4 pixels of ARGB1555
- lea edx, [edx + 8]
- sub ecx, 4
- jg convertloop
- ret
- }
-}
-
-__declspec(naked)
-void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
- __asm {
- mov eax, [esp + 4] // src_argb
- mov edx, [esp + 8] // dst_rgb
- mov ecx, [esp + 12] // pix
- pcmpeqb xmm4, xmm4 // generate mask 0xf000f000
- psllw xmm4, 12
- movdqa xmm3, xmm4 // generate mask 0x00f000f0
- psrlw xmm3, 8
-
- convertloop:
- movdqu xmm0, [eax] // fetch 4 pixels of argb
- movdqa xmm1, xmm0
- pand xmm0, xmm3 // low nibble
- pand xmm1, xmm4 // high nibble
- psrld xmm0, 4
- psrld xmm1, 8
- por xmm0, xmm1
- packuswb xmm0, xmm0
- lea eax, [eax + 16]
- movq qword ptr [edx], xmm0 // store 4 pixels of ARGB4444
- lea edx, [edx + 8]
- sub ecx, 4
- jg convertloop
- ret
- }
-}
-
-#ifdef HAS_ARGBTORGB565ROW_AVX2
-__declspec(naked)
-void ARGBToRGB565Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix) {
- __asm {
- mov eax, [esp + 4] // src_argb
- mov edx, [esp + 8] // dst_rgb
- mov ecx, [esp + 12] // pix
- vpcmpeqb ymm3, ymm3, ymm3 // generate mask 0x0000001f
- vpsrld ymm3, ymm3, 27
- vpcmpeqb ymm4, ymm4, ymm4 // generate mask 0x000007e0
- vpsrld ymm4, ymm4, 26
- vpslld ymm4, ymm4, 5
- vpslld ymm5, ymm3, 11 // generate mask 0x0000f800
-
- convertloop:
- vmovdqu ymm0, [eax] // fetch 8 pixels of argb
- vpsrld ymm2, ymm0, 5 // G
- vpsrld ymm1, ymm0, 3 // B
- vpsrld ymm0, ymm0, 8 // R
- vpand ymm2, ymm2, ymm4 // G
- vpand ymm1, ymm1, ymm3 // B
- vpand ymm0, ymm0, ymm5 // R
- vpor ymm1, ymm1, ymm2 // BG
- vpor ymm0, ymm0, ymm1 // BGR
- vpackusdw ymm0, ymm0, ymm0
- vpermq ymm0, ymm0, 0xd8
- lea eax, [eax + 32]
- vmovdqu [edx], xmm0 // store 8 pixels of RGB565
- lea edx, [edx + 16]
- sub ecx, 8
- jg convertloop
- vzeroupper
- ret
- }
-}
-#endif // HAS_ARGBTORGB565ROW_AVX2
-
-#ifdef HAS_ARGBTOARGB1555ROW_AVX2
-__declspec(naked)
-void ARGBToARGB1555Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix) {
- __asm {
- mov eax, [esp + 4] // src_argb
- mov edx, [esp + 8] // dst_rgb
- mov ecx, [esp + 12] // pix
- vpcmpeqb ymm4, ymm4, ymm4
- vpsrld ymm4, ymm4, 27 // generate mask 0x0000001f
- vpslld ymm5, ymm4, 5 // generate mask 0x000003e0
- vpslld ymm6, ymm4, 10 // generate mask 0x00007c00
- vpcmpeqb ymm7, ymm7, ymm7 // generate mask 0xffff8000
- vpslld ymm7, ymm7, 15
-
- convertloop:
- vmovdqu ymm0, [eax] // fetch 8 pixels of argb
- vpsrld ymm3, ymm0, 9 // R
- vpsrld ymm2, ymm0, 6 // G
- vpsrld ymm1, ymm0, 3 // B
- vpsrad ymm0, ymm0, 16 // A
- vpand ymm3, ymm3, ymm6 // R
- vpand ymm2, ymm2, ymm5 // G
- vpand ymm1, ymm1, ymm4 // B
- vpand ymm0, ymm0, ymm7 // A
- vpor ymm0, ymm0, ymm1 // BA
- vpor ymm2, ymm2, ymm3 // GR
- vpor ymm0, ymm0, ymm2 // BGRA
- vpackssdw ymm0, ymm0, ymm0
- vpermq ymm0, ymm0, 0xd8
- lea eax, [eax + 32]
- vmovdqu [edx], xmm0 // store 8 pixels of ARGB1555
- lea edx, [edx + 16]
- sub ecx, 8
- jg convertloop
- vzeroupper
- ret
- }
-}
-#endif // HAS_ARGBTOARGB1555ROW_AVX2
-
-#ifdef HAS_ARGBTOARGB4444ROW_AVX2
-__declspec(naked)
-void ARGBToARGB4444Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix) {
- __asm {
- mov eax, [esp + 4] // src_argb
- mov edx, [esp + 8] // dst_rgb
- mov ecx, [esp + 12] // pix
- vpcmpeqb ymm4, ymm4, ymm4 // generate mask 0xf000f000
- vpsllw ymm4, ymm4, 12
- vpsrlw ymm3, ymm4, 8 // generate mask 0x00f000f0
-
- convertloop:
- vmovdqu ymm0, [eax] // fetch 8 pixels of argb
- vpand ymm1, ymm0, ymm4 // high nibble
- vpand ymm0, ymm0, ymm3 // low nibble
- vpsrld ymm1, ymm1, 8
- vpsrld ymm0, ymm0, 4
- vpor ymm0, ymm0, ymm1
- vpackuswb ymm0, ymm0, ymm0
- vpermq ymm0, ymm0, 0xd8
- lea eax, [eax + 32]
- vmovdqu [edx], xmm0 // store 8 pixels of ARGB4444
- lea edx, [edx + 16]
- sub ecx, 8
- jg convertloop
- vzeroupper
- ret
- }
-}
-#endif // HAS_ARGBTOARGB4444ROW_AVX2
-
-// Convert 16 ARGB pixels (64 bytes) to 16 Y values.
-__declspec(naked)
-void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
- __asm {
- mov eax, [esp + 4] /* src_argb */
- mov edx, [esp + 8] /* dst_y */
- mov ecx, [esp + 12] /* pix */
- movdqa xmm4, kARGBToY
- movdqa xmm5, kAddY16
-
- convertloop:
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- movdqu xmm2, [eax + 32]
- movdqu xmm3, [eax + 48]
- pmaddubsw xmm0, xmm4
- pmaddubsw xmm1, xmm4
- pmaddubsw xmm2, xmm4
- pmaddubsw xmm3, xmm4
- lea eax, [eax + 64]
- phaddw xmm0, xmm1
- phaddw xmm2, xmm3
- psrlw xmm0, 7
- psrlw xmm2, 7
- packuswb xmm0, xmm2
- paddb xmm0, xmm5
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- sub ecx, 16
- jg convertloop
- ret
- }
-}
-
-// Convert 16 ARGB pixels (64 bytes) to 16 YJ values.
-// Same as ARGBToYRow but different coefficients, no add 16, but do rounding.
-__declspec(naked)
-void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
- __asm {
- mov eax, [esp + 4] /* src_argb */
- mov edx, [esp + 8] /* dst_y */
- mov ecx, [esp + 12] /* pix */
- movdqa xmm4, kARGBToYJ
- movdqa xmm5, kAddYJ64
-
- convertloop:
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- movdqu xmm2, [eax + 32]
- movdqu xmm3, [eax + 48]
- pmaddubsw xmm0, xmm4
- pmaddubsw xmm1, xmm4
- pmaddubsw xmm2, xmm4
- pmaddubsw xmm3, xmm4
- lea eax, [eax + 64]
- phaddw xmm0, xmm1
- phaddw xmm2, xmm3
- paddw xmm0, xmm5 // Add .5 for rounding.
- paddw xmm2, xmm5
- psrlw xmm0, 7
- psrlw xmm2, 7
- packuswb xmm0, xmm2
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- sub ecx, 16
- jg convertloop
- ret
- }
-}
-
-#ifdef HAS_ARGBTOYROW_AVX2
-// vpermd for vphaddw + vpackuswb vpermd.
-static const lvec32 kPermdARGBToY_AVX = {
- 0, 4, 1, 5, 2, 6, 3, 7
-};
-
-// Convert 32 ARGB pixels (128 bytes) to 32 Y values.
-__declspec(naked)
-void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) {
- __asm {
- mov eax, [esp + 4] /* src_argb */
- mov edx, [esp + 8] /* dst_y */
- mov ecx, [esp + 12] /* pix */
- vbroadcastf128 ymm4, kARGBToY
- vbroadcastf128 ymm5, kAddY16
- vmovdqu ymm6, kPermdARGBToY_AVX
-
- convertloop:
- vmovdqu ymm0, [eax]
- vmovdqu ymm1, [eax + 32]
- vmovdqu ymm2, [eax + 64]
- vmovdqu ymm3, [eax + 96]
- vpmaddubsw ymm0, ymm0, ymm4
- vpmaddubsw ymm1, ymm1, ymm4
- vpmaddubsw ymm2, ymm2, ymm4
- vpmaddubsw ymm3, ymm3, ymm4
- lea eax, [eax + 128]
- vphaddw ymm0, ymm0, ymm1 // mutates.
- vphaddw ymm2, ymm2, ymm3
- vpsrlw ymm0, ymm0, 7
- vpsrlw ymm2, ymm2, 7
- vpackuswb ymm0, ymm0, ymm2 // mutates.
- vpermd ymm0, ymm6, ymm0 // For vphaddw + vpackuswb mutation.
- vpaddb ymm0, ymm0, ymm5 // add 16 for Y
- vmovdqu [edx], ymm0
- lea edx, [edx + 32]
- sub ecx, 32
- jg convertloop
- vzeroupper
- ret
- }
-}
-#endif // HAS_ARGBTOYROW_AVX2
-
-#ifdef HAS_ARGBTOYJROW_AVX2
-// Convert 32 ARGB pixels (128 bytes) to 32 Y values.
-__declspec(naked)
-void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) {
- __asm {
- mov eax, [esp + 4] /* src_argb */
- mov edx, [esp + 8] /* dst_y */
- mov ecx, [esp + 12] /* pix */
- vbroadcastf128 ymm4, kARGBToYJ
- vbroadcastf128 ymm5, kAddYJ64
- vmovdqu ymm6, kPermdARGBToY_AVX
-
- convertloop:
- vmovdqu ymm0, [eax]
- vmovdqu ymm1, [eax + 32]
- vmovdqu ymm2, [eax + 64]
- vmovdqu ymm3, [eax + 96]
- vpmaddubsw ymm0, ymm0, ymm4
- vpmaddubsw ymm1, ymm1, ymm4
- vpmaddubsw ymm2, ymm2, ymm4
- vpmaddubsw ymm3, ymm3, ymm4
- lea eax, [eax + 128]
- vphaddw ymm0, ymm0, ymm1 // mutates.
- vphaddw ymm2, ymm2, ymm3
- vpaddw ymm0, ymm0, ymm5 // Add .5 for rounding.
- vpaddw ymm2, ymm2, ymm5
- vpsrlw ymm0, ymm0, 7
- vpsrlw ymm2, ymm2, 7
- vpackuswb ymm0, ymm0, ymm2 // mutates.
- vpermd ymm0, ymm6, ymm0 // For vphaddw + vpackuswb mutation.
- vmovdqu [edx], ymm0
- lea edx, [edx + 32]
- sub ecx, 32
- jg convertloop
-
- vzeroupper
- ret
- }
-}
-#endif // HAS_ARGBTOYJROW_AVX2
-
-__declspec(naked)
-void BGRAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
- __asm {
- mov eax, [esp + 4] /* src_argb */
- mov edx, [esp + 8] /* dst_y */
- mov ecx, [esp + 12] /* pix */
- movdqa xmm4, kBGRAToY
- movdqa xmm5, kAddY16
-
- convertloop:
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- movdqu xmm2, [eax + 32]
- movdqu xmm3, [eax + 48]
- pmaddubsw xmm0, xmm4
- pmaddubsw xmm1, xmm4
- pmaddubsw xmm2, xmm4
- pmaddubsw xmm3, xmm4
- lea eax, [eax + 64]
- phaddw xmm0, xmm1
- phaddw xmm2, xmm3
- psrlw xmm0, 7
- psrlw xmm2, 7
- packuswb xmm0, xmm2
- paddb xmm0, xmm5
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- sub ecx, 16
- jg convertloop
- ret
- }
-}
-
-__declspec(naked)
-void ABGRToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
- __asm {
- mov eax, [esp + 4] /* src_argb */
- mov edx, [esp + 8] /* dst_y */
- mov ecx, [esp + 12] /* pix */
- movdqa xmm4, kABGRToY
- movdqa xmm5, kAddY16
-
- convertloop:
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- movdqu xmm2, [eax + 32]
- movdqu xmm3, [eax + 48]
- pmaddubsw xmm0, xmm4
- pmaddubsw xmm1, xmm4
- pmaddubsw xmm2, xmm4
- pmaddubsw xmm3, xmm4
- lea eax, [eax + 64]
- phaddw xmm0, xmm1
- phaddw xmm2, xmm3
- psrlw xmm0, 7
- psrlw xmm2, 7
- packuswb xmm0, xmm2
- paddb xmm0, xmm5
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- sub ecx, 16
- jg convertloop
- ret
- }
-}
-
-__declspec(naked)
-void RGBAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
- __asm {
- mov eax, [esp + 4] /* src_argb */
- mov edx, [esp + 8] /* dst_y */
- mov ecx, [esp + 12] /* pix */
- movdqa xmm4, kRGBAToY
- movdqa xmm5, kAddY16
-
- convertloop:
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- movdqu xmm2, [eax + 32]
- movdqu xmm3, [eax + 48]
- pmaddubsw xmm0, xmm4
- pmaddubsw xmm1, xmm4
- pmaddubsw xmm2, xmm4
- pmaddubsw xmm3, xmm4
- lea eax, [eax + 64]
- phaddw xmm0, xmm1
- phaddw xmm2, xmm3
- psrlw xmm0, 7
- psrlw xmm2, 7
- packuswb xmm0, xmm2
- paddb xmm0, xmm5
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- sub ecx, 16
- jg convertloop
- ret
- }
-}
-
-__declspec(naked)
-void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // src_argb
- mov esi, [esp + 8 + 8] // src_stride_argb
- mov edx, [esp + 8 + 12] // dst_u
- mov edi, [esp + 8 + 16] // dst_v
- mov ecx, [esp + 8 + 20] // pix
- movdqa xmm5, kAddUV128
- movdqa xmm6, kARGBToV
- movdqa xmm7, kARGBToU
- sub edi, edx // stride from u to v
-
- convertloop:
- /* step 1 - subsample 16x2 argb pixels to 8x1 */
- movdqu xmm0, [eax]
- movdqu xmm4, [eax + esi]
- pavgb xmm0, xmm4
- movdqu xmm1, [eax + 16]
- movdqu xmm4, [eax + esi + 16]
- pavgb xmm1, xmm4
- movdqu xmm2, [eax + 32]
- movdqu xmm4, [eax + esi + 32]
- pavgb xmm2, xmm4
- movdqu xmm3, [eax + 48]
- movdqu xmm4, [eax + esi + 48]
- pavgb xmm3, xmm4
-
- lea eax, [eax + 64]
- movdqa xmm4, xmm0
- shufps xmm0, xmm1, 0x88
- shufps xmm4, xmm1, 0xdd
- pavgb xmm0, xmm4
- movdqa xmm4, xmm2
- shufps xmm2, xmm3, 0x88
- shufps xmm4, xmm3, 0xdd
- pavgb xmm2, xmm4
-
- // step 2 - convert to U and V
- // from here down is very similar to Y code except
- // instead of 16 different pixels, its 8 pixels of U and 8 of V
- movdqa xmm1, xmm0
- movdqa xmm3, xmm2
- pmaddubsw xmm0, xmm7 // U
- pmaddubsw xmm2, xmm7
- pmaddubsw xmm1, xmm6 // V
- pmaddubsw xmm3, xmm6
- phaddw xmm0, xmm2
- phaddw xmm1, xmm3
- psraw xmm0, 8
- psraw xmm1, 8
- packsswb xmm0, xmm1
- paddb xmm0, xmm5 // -> unsigned
-
- // step 3 - store 8 U and 8 V values
- movlps qword ptr [edx], xmm0 // U
- movhps qword ptr [edx + edi], xmm0 // V
- lea edx, [edx + 8]
- sub ecx, 16
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-__declspec(naked)
-void ARGBToUVJRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // src_argb
- mov esi, [esp + 8 + 8] // src_stride_argb
- mov edx, [esp + 8 + 12] // dst_u
- mov edi, [esp + 8 + 16] // dst_v
- mov ecx, [esp + 8 + 20] // pix
- movdqa xmm5, kAddUVJ128
- movdqa xmm6, kARGBToVJ
- movdqa xmm7, kARGBToUJ
- sub edi, edx // stride from u to v
-
- convertloop:
- /* step 1 - subsample 16x2 argb pixels to 8x1 */
- movdqu xmm0, [eax]
- movdqu xmm4, [eax + esi]
- pavgb xmm0, xmm4
- movdqu xmm1, [eax + 16]
- movdqu xmm4, [eax + esi + 16]
- pavgb xmm1, xmm4
- movdqu xmm2, [eax + 32]
- movdqu xmm4, [eax + esi + 32]
- pavgb xmm2, xmm4
- movdqu xmm3, [eax + 48]
- movdqu xmm4, [eax + esi + 48]
- pavgb xmm3, xmm4
-
- lea eax, [eax + 64]
- movdqa xmm4, xmm0
- shufps xmm0, xmm1, 0x88
- shufps xmm4, xmm1, 0xdd
- pavgb xmm0, xmm4
- movdqa xmm4, xmm2
- shufps xmm2, xmm3, 0x88
- shufps xmm4, xmm3, 0xdd
- pavgb xmm2, xmm4
-
- // step 2 - convert to U and V
- // from here down is very similar to Y code except
- // instead of 16 different pixels, its 8 pixels of U and 8 of V
- movdqa xmm1, xmm0
- movdqa xmm3, xmm2
- pmaddubsw xmm0, xmm7 // U
- pmaddubsw xmm2, xmm7
- pmaddubsw xmm1, xmm6 // V
- pmaddubsw xmm3, xmm6
- phaddw xmm0, xmm2
- phaddw xmm1, xmm3
- paddw xmm0, xmm5 // +.5 rounding -> unsigned
- paddw xmm1, xmm5
- psraw xmm0, 8
- psraw xmm1, 8
- packsswb xmm0, xmm1
-
- // step 3 - store 8 U and 8 V values
- movlps qword ptr [edx], xmm0 // U
- movhps qword ptr [edx + edi], xmm0 // V
- lea edx, [edx + 8]
- sub ecx, 16
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-#ifdef HAS_ARGBTOUVROW_AVX2
-__declspec(naked)
-void ARGBToUVRow_AVX2(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // src_argb
- mov esi, [esp + 8 + 8] // src_stride_argb
- mov edx, [esp + 8 + 12] // dst_u
- mov edi, [esp + 8 + 16] // dst_v
- mov ecx, [esp + 8 + 20] // pix
- vbroadcastf128 ymm5, kAddUV128
- vbroadcastf128 ymm6, kARGBToV
- vbroadcastf128 ymm7, kARGBToU
- sub edi, edx // stride from u to v
-
- convertloop:
- /* step 1 - subsample 32x2 argb pixels to 16x1 */
- vmovdqu ymm0, [eax]
- vmovdqu ymm1, [eax + 32]
- vmovdqu ymm2, [eax + 64]
- vmovdqu ymm3, [eax + 96]
- vpavgb ymm0, ymm0, [eax + esi]
- vpavgb ymm1, ymm1, [eax + esi + 32]
- vpavgb ymm2, ymm2, [eax + esi + 64]
- vpavgb ymm3, ymm3, [eax + esi + 96]
- lea eax, [eax + 128]
- vshufps ymm4, ymm0, ymm1, 0x88
- vshufps ymm0, ymm0, ymm1, 0xdd
- vpavgb ymm0, ymm0, ymm4 // mutated by vshufps
- vshufps ymm4, ymm2, ymm3, 0x88
- vshufps ymm2, ymm2, ymm3, 0xdd
- vpavgb ymm2, ymm2, ymm4 // mutated by vshufps
-
- // step 2 - convert to U and V
- // from here down is very similar to Y code except
- // instead of 32 different pixels, its 16 pixels of U and 16 of V
- vpmaddubsw ymm1, ymm0, ymm7 // U
- vpmaddubsw ymm3, ymm2, ymm7
- vpmaddubsw ymm0, ymm0, ymm6 // V
- vpmaddubsw ymm2, ymm2, ymm6
- vphaddw ymm1, ymm1, ymm3 // mutates
- vphaddw ymm0, ymm0, ymm2
- vpsraw ymm1, ymm1, 8
- vpsraw ymm0, ymm0, 8
- vpacksswb ymm0, ymm1, ymm0 // mutates
- vpermq ymm0, ymm0, 0xd8 // For vpacksswb
- vpshufb ymm0, ymm0, kShufARGBToUV_AVX // For vshufps + vphaddw
- vpaddb ymm0, ymm0, ymm5 // -> unsigned
-
- // step 3 - store 16 U and 16 V values
- vextractf128 [edx], ymm0, 0 // U
- vextractf128 [edx + edi], ymm0, 1 // V
- lea edx, [edx + 16]
- sub ecx, 32
- jg convertloop
-
- pop edi
- pop esi
- vzeroupper
- ret
- }
-}
-#endif // HAS_ARGBTOUVROW_AVX2
-
-__declspec(naked)
-void ARGBToUV444Row_SSSE3(const uint8* src_argb0,
- uint8* dst_u, uint8* dst_v, int width) {
- __asm {
- push edi
- mov eax, [esp + 4 + 4] // src_argb
- mov edx, [esp + 4 + 8] // dst_u
- mov edi, [esp + 4 + 12] // dst_v
- mov ecx, [esp + 4 + 16] // pix
- movdqa xmm5, kAddUV128
- movdqa xmm6, kARGBToV
- movdqa xmm7, kARGBToU
- sub edi, edx // stride from u to v
-
- convertloop:
- /* convert to U and V */
- movdqu xmm0, [eax] // U
- movdqu xmm1, [eax + 16]
- movdqu xmm2, [eax + 32]
- movdqu xmm3, [eax + 48]
- pmaddubsw xmm0, xmm7
- pmaddubsw xmm1, xmm7
- pmaddubsw xmm2, xmm7
- pmaddubsw xmm3, xmm7
- phaddw xmm0, xmm1
- phaddw xmm2, xmm3
- psraw xmm0, 8
- psraw xmm2, 8
- packsswb xmm0, xmm2
- paddb xmm0, xmm5
- movdqu [edx], xmm0
-
- movdqu xmm0, [eax] // V
- movdqu xmm1, [eax + 16]
- movdqu xmm2, [eax + 32]
- movdqu xmm3, [eax + 48]
- pmaddubsw xmm0, xmm6
- pmaddubsw xmm1, xmm6
- pmaddubsw xmm2, xmm6
- pmaddubsw xmm3, xmm6
- phaddw xmm0, xmm1
- phaddw xmm2, xmm3
- psraw xmm0, 8
- psraw xmm2, 8
- packsswb xmm0, xmm2
- paddb xmm0, xmm5
- lea eax, [eax + 64]
- movdqu [edx + edi], xmm0
- lea edx, [edx + 16]
- sub ecx, 16
- jg convertloop
-
- pop edi
- ret
- }
-}
-
-__declspec(naked)
-void ARGBToUV422Row_SSSE3(const uint8* src_argb0,
- uint8* dst_u, uint8* dst_v, int width) {
- __asm {
- push edi
- mov eax, [esp + 4 + 4] // src_argb
- mov edx, [esp + 4 + 8] // dst_u
- mov edi, [esp + 4 + 12] // dst_v
- mov ecx, [esp + 4 + 16] // pix
- movdqa xmm5, kAddUV128
- movdqa xmm6, kARGBToV
- movdqa xmm7, kARGBToU
- sub edi, edx // stride from u to v
-
- convertloop:
- /* step 1 - subsample 16x2 argb pixels to 8x1 */
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- movdqu xmm2, [eax + 32]
- movdqu xmm3, [eax + 48]
- lea eax, [eax + 64]
- movdqa xmm4, xmm0
- shufps xmm0, xmm1, 0x88
- shufps xmm4, xmm1, 0xdd
- pavgb xmm0, xmm4
- movdqa xmm4, xmm2
- shufps xmm2, xmm3, 0x88
- shufps xmm4, xmm3, 0xdd
- pavgb xmm2, xmm4
-
- // step 2 - convert to U and V
- // from here down is very similar to Y code except
- // instead of 16 different pixels, its 8 pixels of U and 8 of V
- movdqa xmm1, xmm0
- movdqa xmm3, xmm2
- pmaddubsw xmm0, xmm7 // U
- pmaddubsw xmm2, xmm7
- pmaddubsw xmm1, xmm6 // V
- pmaddubsw xmm3, xmm6
- phaddw xmm0, xmm2
- phaddw xmm1, xmm3
- psraw xmm0, 8
- psraw xmm1, 8
- packsswb xmm0, xmm1
- paddb xmm0, xmm5 // -> unsigned
-
- // step 3 - store 8 U and 8 V values
- movlps qword ptr [edx], xmm0 // U
- movhps qword ptr [edx + edi], xmm0 // V
- lea edx, [edx + 8]
- sub ecx, 16
- jg convertloop
-
- pop edi
- ret
- }
-}
-
-__declspec(naked)
-void BGRAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // src_argb
- mov esi, [esp + 8 + 8] // src_stride_argb
- mov edx, [esp + 8 + 12] // dst_u
- mov edi, [esp + 8 + 16] // dst_v
- mov ecx, [esp + 8 + 20] // pix
- movdqa xmm5, kAddUV128
- movdqa xmm6, kBGRAToV
- movdqa xmm7, kBGRAToU
- sub edi, edx // stride from u to v
-
- convertloop:
- /* step 1 - subsample 16x2 argb pixels to 8x1 */
- movdqu xmm0, [eax]
- movdqu xmm4, [eax + esi]
- pavgb xmm0, xmm4
- movdqu xmm1, [eax + 16]
- movdqu xmm4, [eax + esi + 16]
- pavgb xmm1, xmm4
- movdqu xmm2, [eax + 32]
- movdqu xmm4, [eax + esi + 32]
- pavgb xmm2, xmm4
- movdqu xmm3, [eax + 48]
- movdqu xmm4, [eax + esi + 48]
- pavgb xmm3, xmm4
-
- lea eax, [eax + 64]
- movdqa xmm4, xmm0
- shufps xmm0, xmm1, 0x88
- shufps xmm4, xmm1, 0xdd
- pavgb xmm0, xmm4
- movdqa xmm4, xmm2
- shufps xmm2, xmm3, 0x88
- shufps xmm4, xmm3, 0xdd
- pavgb xmm2, xmm4
-
- // step 2 - convert to U and V
- // from here down is very similar to Y code except
- // instead of 16 different pixels, its 8 pixels of U and 8 of V
- movdqa xmm1, xmm0
- movdqa xmm3, xmm2
- pmaddubsw xmm0, xmm7 // U
- pmaddubsw xmm2, xmm7
- pmaddubsw xmm1, xmm6 // V
- pmaddubsw xmm3, xmm6
- phaddw xmm0, xmm2
- phaddw xmm1, xmm3
- psraw xmm0, 8
- psraw xmm1, 8
- packsswb xmm0, xmm1
- paddb xmm0, xmm5 // -> unsigned
-
- // step 3 - store 8 U and 8 V values
- movlps qword ptr [edx], xmm0 // U
- movhps qword ptr [edx + edi], xmm0 // V
- lea edx, [edx + 8]
- sub ecx, 16
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-__declspec(naked)
-void ABGRToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // src_argb
- mov esi, [esp + 8 + 8] // src_stride_argb
- mov edx, [esp + 8 + 12] // dst_u
- mov edi, [esp + 8 + 16] // dst_v
- mov ecx, [esp + 8 + 20] // pix
- movdqa xmm5, kAddUV128
- movdqa xmm6, kABGRToV
- movdqa xmm7, kABGRToU
- sub edi, edx // stride from u to v
-
- convertloop:
- /* step 1 - subsample 16x2 argb pixels to 8x1 */
- movdqu xmm0, [eax]
- movdqu xmm4, [eax + esi]
- pavgb xmm0, xmm4
- movdqu xmm1, [eax + 16]
- movdqu xmm4, [eax + esi + 16]
- pavgb xmm1, xmm4
- movdqu xmm2, [eax + 32]
- movdqu xmm4, [eax + esi + 32]
- pavgb xmm2, xmm4
- movdqu xmm3, [eax + 48]
- movdqu xmm4, [eax + esi + 48]
- pavgb xmm3, xmm4
-
- lea eax, [eax + 64]
- movdqa xmm4, xmm0
- shufps xmm0, xmm1, 0x88
- shufps xmm4, xmm1, 0xdd
- pavgb xmm0, xmm4
- movdqa xmm4, xmm2
- shufps xmm2, xmm3, 0x88
- shufps xmm4, xmm3, 0xdd
- pavgb xmm2, xmm4
-
- // step 2 - convert to U and V
- // from here down is very similar to Y code except
- // instead of 16 different pixels, its 8 pixels of U and 8 of V
- movdqa xmm1, xmm0
- movdqa xmm3, xmm2
- pmaddubsw xmm0, xmm7 // U
- pmaddubsw xmm2, xmm7
- pmaddubsw xmm1, xmm6 // V
- pmaddubsw xmm3, xmm6
- phaddw xmm0, xmm2
- phaddw xmm1, xmm3
- psraw xmm0, 8
- psraw xmm1, 8
- packsswb xmm0, xmm1
- paddb xmm0, xmm5 // -> unsigned
-
- // step 3 - store 8 U and 8 V values
- movlps qword ptr [edx], xmm0 // U
- movhps qword ptr [edx + edi], xmm0 // V
- lea edx, [edx + 8]
- sub ecx, 16
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-__declspec(naked)
-void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
- uint8* dst_u, uint8* dst_v, int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // src_argb
- mov esi, [esp + 8 + 8] // src_stride_argb
- mov edx, [esp + 8 + 12] // dst_u
- mov edi, [esp + 8 + 16] // dst_v
- mov ecx, [esp + 8 + 20] // pix
- movdqa xmm5, kAddUV128
- movdqa xmm6, kRGBAToV
- movdqa xmm7, kRGBAToU
- sub edi, edx // stride from u to v
-
- convertloop:
- /* step 1 - subsample 16x2 argb pixels to 8x1 */
- movdqu xmm0, [eax]
- movdqu xmm4, [eax + esi]
- pavgb xmm0, xmm4
- movdqu xmm1, [eax + 16]
- movdqu xmm4, [eax + esi + 16]
- pavgb xmm1, xmm4
- movdqu xmm2, [eax + 32]
- movdqu xmm4, [eax + esi + 32]
- pavgb xmm2, xmm4
- movdqu xmm3, [eax + 48]
- movdqu xmm4, [eax + esi + 48]
- pavgb xmm3, xmm4
-
- lea eax, [eax + 64]
- movdqa xmm4, xmm0
- shufps xmm0, xmm1, 0x88
- shufps xmm4, xmm1, 0xdd
- pavgb xmm0, xmm4
- movdqa xmm4, xmm2
- shufps xmm2, xmm3, 0x88
- shufps xmm4, xmm3, 0xdd
- pavgb xmm2, xmm4
-
- // step 2 - convert to U and V
- // from here down is very similar to Y code except
- // instead of 16 different pixels, its 8 pixels of U and 8 of V
- movdqa xmm1, xmm0
- movdqa xmm3, xmm2
- pmaddubsw xmm0, xmm7 // U
- pmaddubsw xmm2, xmm7
- pmaddubsw xmm1, xmm6 // V
- pmaddubsw xmm3, xmm6
- phaddw xmm0, xmm2
- phaddw xmm1, xmm3
- psraw xmm0, 8
- psraw xmm1, 8
- packsswb xmm0, xmm1
- paddb xmm0, xmm5 // -> unsigned
-
- // step 3 - store 8 U and 8 V values
- movlps qword ptr [edx], xmm0 // U
- movhps qword ptr [edx + edi], xmm0 // V
- lea edx, [edx + 8]
- sub ecx, 16
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-#endif // HAS_ARGBTOYROW_SSSE3
-
-// Read 16 UV from 444
-#define READYUV444_AVX2 __asm { \
- __asm vmovdqu xmm0, [esi] /* U */ /* NOLINT */ \
- __asm vmovdqu xmm1, [esi + edi] /* V */ /* NOLINT */ \
- __asm lea esi, [esi + 16] \
- __asm vpermq ymm0, ymm0, 0xd8 \
- __asm vpermq ymm1, ymm1, 0xd8 \
- __asm vpunpcklbw ymm0, ymm0, ymm1 /* UV */ \
- }
-
-// Read 8 UV from 422, upsample to 16 UV.
-#define READYUV422_AVX2 __asm { \
- __asm vmovq xmm0, qword ptr [esi] /* U */ /* NOLINT */ \
- __asm vmovq xmm1, qword ptr [esi + edi] /* V */ /* NOLINT */ \
- __asm lea esi, [esi + 8] \
- __asm vpunpcklbw ymm0, ymm0, ymm1 /* UV */ \
- __asm vpermq ymm0, ymm0, 0xd8 \
- __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \
- }
-
-// Read 4 UV from 411, upsample to 16 UV.
-#define READYUV411_AVX2 __asm { \
- __asm vmovd xmm0, dword ptr [esi] /* U */ /* NOLINT */ \
- __asm vmovd xmm1, dword ptr [esi + edi] /* V */ /* NOLINT */ \
- __asm lea esi, [esi + 4] \
- __asm vpunpcklbw ymm0, ymm0, ymm1 /* UV */ \
- __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \
- __asm vpermq ymm0, ymm0, 0xd8 \
- __asm vpunpckldq ymm0, ymm0, ymm0 /* UVUVUVUV (upsample) */ \
- }
-
-// Read 8 UV from NV12, upsample to 16 UV.
-#define READNV12_AVX2 __asm { \
- __asm vmovdqu xmm0, [esi] /* UV */ \
- __asm lea esi, [esi + 16] \
- __asm vpermq ymm0, ymm0, 0xd8 \
- __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \
- }
-
-// Convert 16 pixels: 16 UV and 16 Y.
-#define YUVTORGB_AVX2(YuvConstants) __asm { \
- /* Step 1: Find 8 UV contributions to 16 R,G,B values */ \
- __asm vpmaddubsw ymm2, ymm0, YuvConstants.kUVToR /* scale R UV */ \
- __asm vpmaddubsw ymm1, ymm0, YuvConstants.kUVToG /* scale G UV */ \
- __asm vpmaddubsw ymm0, ymm0, YuvConstants.kUVToB /* scale B UV */ \
- __asm vmovdqu ymm3, YuvConstants.kUVBiasR \
- __asm vpsubw ymm2, ymm3, ymm2 \
- __asm vmovdqu ymm3, YuvConstants.kUVBiasG \
- __asm vpsubw ymm1, ymm3, ymm1 \
- __asm vmovdqu ymm3, YuvConstants.kUVBiasB \
- __asm vpsubw ymm0, ymm3, ymm0 \
- /* Step 2: Find Y contribution to 16 R,G,B values */ \
- __asm vmovdqu xmm3, [eax] /* NOLINT */ \
- __asm lea eax, [eax + 16] \
- __asm vpermq ymm3, ymm3, 0xd8 \
- __asm vpunpcklbw ymm3, ymm3, ymm3 \
- __asm vpmulhuw ymm3, ymm3, YuvConstants.kYToRgb \
- __asm vpaddsw ymm0, ymm0, ymm3 /* B += Y */ \
- __asm vpaddsw ymm1, ymm1, ymm3 /* G += Y */ \
- __asm vpaddsw ymm2, ymm2, ymm3 /* R += Y */ \
- __asm vpsraw ymm0, ymm0, 6 \
- __asm vpsraw ymm1, ymm1, 6 \
- __asm vpsraw ymm2, ymm2, 6 \
- __asm vpackuswb ymm0, ymm0, ymm0 /* B */ \
- __asm vpackuswb ymm1, ymm1, ymm1 /* G */ \
- __asm vpackuswb ymm2, ymm2, ymm2 /* R */ \
- }
-
-// Store 16 ARGB values.
-#define STOREARGB_AVX2 __asm { \
- /* Step 3: Weave into ARGB */ \
- __asm vpunpcklbw ymm0, ymm0, ymm1 /* BG */ \
- __asm vpermq ymm0, ymm0, 0xd8 \
- __asm vpunpcklbw ymm2, ymm2, ymm5 /* RA */ \
- __asm vpermq ymm2, ymm2, 0xd8 \
- __asm vpunpcklwd ymm1, ymm0, ymm2 /* BGRA first 8 pixels */ \
- __asm vpunpckhwd ymm0, ymm0, ymm2 /* BGRA next 8 pixels */ \
- __asm vmovdqu 0[edx], ymm1 \
- __asm vmovdqu 32[edx], ymm0 \
- __asm lea edx, [edx + 64] \
- }
-
-#ifdef HAS_I422TOARGBROW_AVX2
-// 16 pixels
-// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
-__declspec(naked)
-void I422ToARGBRow_AVX2(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // argb
- mov ecx, [esp + 8 + 20] // width
- sub edi, esi
- vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
-
- convertloop:
- READYUV422_AVX2
- YUVTORGB_AVX2(kYuvConstants)
- STOREARGB_AVX2
-
- sub ecx, 16
- jg convertloop
-
- pop edi
- pop esi
- vzeroupper
- ret
- }
-}
-#endif // HAS_I422TOARGBROW_AVX2
-
-#ifdef HAS_J422TOARGBROW_AVX2
-// 16 pixels
-// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
-__declspec(naked)
-void J422ToARGBRow_AVX2(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // argb
- mov ecx, [esp + 8 + 20] // width
- sub edi, esi
- vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
-
- convertloop:
- READYUV422_AVX2
- YUVTORGB_AVX2(kYuvJConstants)
- STOREARGB_AVX2
-
- sub ecx, 16
- jg convertloop
-
- pop edi
- pop esi
- vzeroupper
- ret
- }
-}
-#endif // HAS_J422TOARGBROW_AVX2
-
-#ifdef HAS_I444TOARGBROW_AVX2
-// 16 pixels
-// 16 UV values with 16 Y producing 16 ARGB (64 bytes).
-__declspec(naked)
-void I444ToARGBRow_AVX2(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // argb
- mov ecx, [esp + 8 + 20] // width
- sub edi, esi
- vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
-
- convertloop:
- READYUV444_AVX2
- YUVTORGB_AVX2(kYuvConstants)
- STOREARGB_AVX2
-
- sub ecx, 16
- jg convertloop
-
- pop edi
- pop esi
- vzeroupper
- ret
- }
-}
-#endif // HAS_I444TOARGBROW_AVX2
-
-#ifdef HAS_I411TOARGBROW_AVX2
-// 16 pixels
-// 4 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
-__declspec(naked)
-void I411ToARGBRow_AVX2(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // argb
- mov ecx, [esp + 8 + 20] // width
- sub edi, esi
- vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
-
- convertloop:
- READYUV411_AVX2
- YUVTORGB_AVX2(kYuvConstants)
- STOREARGB_AVX2
-
- sub ecx, 16
- jg convertloop
-
- pop edi
- pop esi
- vzeroupper
- ret
- }
-}
-#endif // HAS_I411TOARGBROW_AVX2
-
-#ifdef HAS_NV12TOARGBROW_AVX2
-// 16 pixels.
-// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
-__declspec(naked)
-void NV12ToARGBRow_AVX2(const uint8* y_buf,
- const uint8* uv_buf,
- uint8* dst_argb,
- int width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // Y
- mov esi, [esp + 4 + 8] // UV
- mov edx, [esp + 4 + 12] // argb
- mov ecx, [esp + 4 + 16] // width
- vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
-
- convertloop:
- READNV12_AVX2
- YUVTORGB_AVX2(kYuvConstants)
- STOREARGB_AVX2
-
- sub ecx, 16
- jg convertloop
-
- pop esi
- vzeroupper
- ret
- }
-}
-#endif // HAS_NV12TOARGBROW_AVX2
-
-#ifdef HAS_NV21TOARGBROW_AVX2
-// 16 pixels.
-// 8 VU values upsampled to 16 VU, mixed with 16 Y producing 16 ARGB (64 bytes).
-__declspec(naked)
-void NV21ToARGBRow_AVX2(const uint8* y_buf,
- const uint8* uv_buf,
- uint8* dst_argb,
- int width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // Y
- mov esi, [esp + 4 + 8] // UV
- mov edx, [esp + 4 + 12] // argb
- mov ecx, [esp + 4 + 16] // width
- vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
-
- convertloop:
- READNV12_AVX2
- YUVTORGB_AVX2(kYvuConstants)
- STOREARGB_AVX2
-
- sub ecx, 16
- jg convertloop
-
- pop esi
- vzeroupper
- ret
- }
-}
-#endif // HAS_NV21TOARGBROW_AVX2
-
-#ifdef HAS_I422TOBGRAROW_AVX2
-// 16 pixels
-// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 BGRA (64 bytes).
-// TODO(fbarchard): Use macros to reduce duplicate code. See SSSE3.
-__declspec(naked)
-void I422ToBGRARow_AVX2(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // argb
- mov ecx, [esp + 8 + 20] // width
- sub edi, esi
- vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
-
- convertloop:
- READYUV422_AVX2
- YUVTORGB_AVX2(kYuvConstants)
-
- // Step 3: Weave into BGRA
- vpunpcklbw ymm1, ymm1, ymm0 // GB
- vpermq ymm1, ymm1, 0xd8
- vpunpcklbw ymm2, ymm5, ymm2 // AR
- vpermq ymm2, ymm2, 0xd8
- vpunpcklwd ymm0, ymm2, ymm1 // ARGB first 8 pixels
- vpunpckhwd ymm2, ymm2, ymm1 // ARGB next 8 pixels
- vmovdqu [edx], ymm0
- vmovdqu [edx + 32], ymm2
- lea edx, [edx + 64]
- sub ecx, 16
- jg convertloop
-
- pop edi
- pop esi
- vzeroupper
- ret
- }
-}
-#endif // HAS_I422TOBGRAROW_AVX2
-
-#ifdef HAS_I422TORGBAROW_AVX2
-// 16 pixels
-// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes).
-// TODO(fbarchard): Use macros to reduce duplicate code. See SSSE3.
-__declspec(naked)
-void I422ToRGBARow_AVX2(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // argb
- mov ecx, [esp + 8 + 20] // width
- sub edi, esi
- vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
-
- convertloop:
- READYUV422_AVX2
- YUVTORGB_AVX2(kYuvConstants)
-
- // Step 3: Weave into RGBA
- vpunpcklbw ymm1, ymm1, ymm2 // GR
- vpermq ymm1, ymm1, 0xd8
- vpunpcklbw ymm2, ymm5, ymm0 // AB
- vpermq ymm2, ymm2, 0xd8
- vpunpcklwd ymm0, ymm2, ymm1 // ABGR first 8 pixels
- vpunpckhwd ymm1, ymm2, ymm1 // ABGR next 8 pixels
- vmovdqu [edx], ymm0
- vmovdqu [edx + 32], ymm1
- lea edx, [edx + 64]
- sub ecx, 16
- jg convertloop
-
- pop edi
- pop esi
- vzeroupper
- ret
- }
-}
-#endif // HAS_I422TORGBAROW_AVX2
-
-#ifdef HAS_I422TOABGRROW_AVX2
-// 16 pixels
-// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes).
-// TODO(fbarchard): Use macros to reduce duplicate code. See SSSE3.
-__declspec(naked)
-void I422ToABGRRow_AVX2(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // argb
- mov ecx, [esp + 8 + 20] // width
- sub edi, esi
- vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
-
- convertloop:
- READYUV422_AVX2
- YUVTORGB_AVX2(kYuvConstants)
-
- // Step 3: Weave into ABGR
- vpunpcklbw ymm1, ymm2, ymm1 // RG
- vpermq ymm1, ymm1, 0xd8
- vpunpcklbw ymm2, ymm0, ymm5 // BA
- vpermq ymm2, ymm2, 0xd8
- vpunpcklwd ymm0, ymm1, ymm2 // RGBA first 8 pixels
- vpunpckhwd ymm1, ymm1, ymm2 // RGBA next 8 pixels
- vmovdqu [edx], ymm0
- vmovdqu [edx + 32], ymm1
- lea edx, [edx + 64]
- sub ecx, 16
- jg convertloop
-
- pop edi
- pop esi
- vzeroupper
- ret
- }
-}
-#endif // HAS_I422TOABGRROW_AVX2
-
-#if defined(HAS_I422TOARGBROW_SSSE3)
-// TODO(fbarchard): Read that does half size on Y and treats 420 as 444.
-
-// Read 8 UV from 444.
-#define READYUV444 __asm { \
- __asm movq xmm0, qword ptr [esi] /* U */ /* NOLINT */ \
- __asm movq xmm1, qword ptr [esi + edi] /* V */ /* NOLINT */ \
- __asm lea esi, [esi + 8] \
- __asm punpcklbw xmm0, xmm1 /* UV */ \
- }
-
-// Read 4 UV from 422, upsample to 8 UV.
-#define READYUV422 __asm { \
- __asm movd xmm0, [esi] /* U */ \
- __asm movd xmm1, [esi + edi] /* V */ \
- __asm lea esi, [esi + 4] \
- __asm punpcklbw xmm0, xmm1 /* UV */ \
- __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \
- }
-
-// Read 2 UV from 411, upsample to 8 UV.
-#define READYUV411 __asm { \
- __asm movzx ebx, word ptr [esi] /* U */ /* NOLINT */ \
- __asm movd xmm0, ebx \
- __asm movzx ebx, word ptr [esi + edi] /* V */ /* NOLINT */ \
- __asm movd xmm1, ebx \
- __asm lea esi, [esi + 2] \
- __asm punpcklbw xmm0, xmm1 /* UV */ \
- __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \
- __asm punpckldq xmm0, xmm0 /* UVUVUVUV (upsample) */ \
- }
-
-// Read 4 UV from NV12, upsample to 8 UV.
-#define READNV12 __asm { \
- __asm movq xmm0, qword ptr [esi] /* UV */ /* NOLINT */ \
- __asm lea esi, [esi + 8] \
- __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \
- }
-
-// Convert 8 pixels: 8 UV and 8 Y.
-#define YUVTORGB(YuvConstants) __asm { \
- /* Step 1: Find 4 UV contributions to 8 R,G,B values */ \
- __asm movdqa xmm1, xmm0 \
- __asm movdqa xmm2, xmm0 \
- __asm movdqa xmm3, xmm0 \
- __asm movdqa xmm0, YuvConstants.kUVBiasB /* unbias back to signed */ \
- __asm pmaddubsw xmm1, YuvConstants.kUVToB /* scale B UV */ \
- __asm psubw xmm0, xmm1 \
- __asm movdqa xmm1, YuvConstants.kUVBiasG \
- __asm pmaddubsw xmm2, YuvConstants.kUVToG /* scale G UV */ \
- __asm psubw xmm1, xmm2 \
- __asm movdqa xmm2, YuvConstants.kUVBiasR \
- __asm pmaddubsw xmm3, YuvConstants.kUVToR /* scale R UV */ \
- __asm psubw xmm2, xmm3 \
- /* Step 2: Find Y contribution to 8 R,G,B values */ \
- __asm movq xmm3, qword ptr [eax] /* NOLINT */ \
- __asm lea eax, [eax + 8] \
- __asm punpcklbw xmm3, xmm3 \
- __asm pmulhuw xmm3, YuvConstants.kYToRgb \
- __asm paddsw xmm0, xmm3 /* B += Y */ \
- __asm paddsw xmm1, xmm3 /* G += Y */ \
- __asm paddsw xmm2, xmm3 /* R += Y */ \
- __asm psraw xmm0, 6 \
- __asm psraw xmm1, 6 \
- __asm psraw xmm2, 6 \
- __asm packuswb xmm0, xmm0 /* B */ \
- __asm packuswb xmm1, xmm1 /* G */ \
- __asm packuswb xmm2, xmm2 /* R */ \
- }
-
-// Store 8 ARGB values.
-#define STOREARGB __asm { \
- /* Step 3: Weave into ARGB */ \
- __asm punpcklbw xmm0, xmm1 /* BG */ \
- __asm punpcklbw xmm2, xmm5 /* RA */ \
- __asm movdqa xmm1, xmm0 \
- __asm punpcklwd xmm0, xmm2 /* BGRA first 4 pixels */ \
- __asm punpckhwd xmm1, xmm2 /* BGRA next 4 pixels */ \
- __asm movdqu 0[edx], xmm0 \
- __asm movdqu 16[edx], xmm1 \
- __asm lea edx, [edx + 32] \
- }
-
-// Store 8 BGRA values.
-#define STOREBGRA __asm { \
- /* Step 3: Weave into BGRA */ \
- __asm pcmpeqb xmm5, xmm5 /* generate 0xffffffff for alpha */ \
- __asm punpcklbw xmm1, xmm0 /* GB */ \
- __asm punpcklbw xmm5, xmm2 /* AR */ \
- __asm movdqa xmm0, xmm5 \
- __asm punpcklwd xmm5, xmm1 /* BGRA first 4 pixels */ \
- __asm punpckhwd xmm0, xmm1 /* BGRA next 4 pixels */ \
- __asm movdqu 0[edx], xmm5 \
- __asm movdqu 16[edx], xmm0 \
- __asm lea edx, [edx + 32] \
- }
-
-// Store 8 ABGR values.
-#define STOREABGR __asm { \
- /* Step 3: Weave into ABGR */ \
- __asm punpcklbw xmm2, xmm1 /* RG */ \
- __asm punpcklbw xmm0, xmm5 /* BA */ \
- __asm movdqa xmm1, xmm2 \
- __asm punpcklwd xmm2, xmm0 /* RGBA first 4 pixels */ \
- __asm punpckhwd xmm1, xmm0 /* RGBA next 4 pixels */ \
- __asm movdqu 0[edx], xmm2 \
- __asm movdqu 16[edx], xmm1 \
- __asm lea edx, [edx + 32] \
- }
-
-// Store 8 RGBA values.
-#define STORERGBA __asm { \
- /* Step 3: Weave into RGBA */ \
- __asm pcmpeqb xmm5, xmm5 /* generate 0xffffffff for alpha */ \
- __asm punpcklbw xmm1, xmm2 /* GR */ \
- __asm punpcklbw xmm5, xmm0 /* AB */ \
- __asm movdqa xmm0, xmm5 \
- __asm punpcklwd xmm5, xmm1 /* RGBA first 4 pixels */ \
- __asm punpckhwd xmm0, xmm1 /* RGBA next 4 pixels */ \
- __asm movdqu 0[edx], xmm5 \
- __asm movdqu 16[edx], xmm0 \
- __asm lea edx, [edx + 32] \
- }
-
-// Store 8 RGB24 values.
-#define STORERGB24 __asm { \
- /* Step 3: Weave into RRGB */ \
- __asm punpcklbw xmm0, xmm1 /* BG */ \
- __asm punpcklbw xmm2, xmm2 /* RR */ \
- __asm movdqa xmm1, xmm0 \
- __asm punpcklwd xmm0, xmm2 /* BGRR first 4 pixels */ \
- __asm punpckhwd xmm1, xmm2 /* BGRR next 4 pixels */ \
- /* Step 4: RRGB -> RGB24 */ \
- __asm pshufb xmm0, xmm5 /* Pack first 8 and last 4 bytes. */ \
- __asm pshufb xmm1, xmm6 /* Pack first 12 bytes. */ \
- __asm palignr xmm1, xmm0, 12 /* last 4 bytes of xmm0 + 12 xmm1 */ \
- __asm movq qword ptr 0[edx], xmm0 /* First 8 bytes */ \
- __asm movdqu 8[edx], xmm1 /* Last 16 bytes */ \
- __asm lea edx, [edx + 24] \
- }
-
-// Store 8 RAW values.
-#define STORERAW __asm { \
- /* Step 3: Weave into RRGB */ \
- __asm punpcklbw xmm0, xmm1 /* BG */ \
- __asm punpcklbw xmm2, xmm2 /* RR */ \
- __asm movdqa xmm1, xmm0 \
- __asm punpcklwd xmm0, xmm2 /* BGRR first 4 pixels */ \
- __asm punpckhwd xmm1, xmm2 /* BGRR next 4 pixels */ \
- /* Step 4: RRGB -> RAW */ \
- __asm pshufb xmm0, xmm5 /* Pack first 8 and last 4 bytes. */ \
- __asm pshufb xmm1, xmm6 /* Pack first 12 bytes. */ \
- __asm palignr xmm1, xmm0, 12 /* last 4 bytes of xmm0 + 12 xmm1 */ \
- __asm movq qword ptr 0[edx], xmm0 /* First 8 bytes */ \
- __asm movdqu 8[edx], xmm1 /* Last 16 bytes */ \
- __asm lea edx, [edx + 24] \
- }
-
-// Store 8 RGB565 values.
-#define STORERGB565 __asm { \
- /* Step 3: Weave into RRGB */ \
- __asm punpcklbw xmm0, xmm1 /* BG */ \
- __asm punpcklbw xmm2, xmm2 /* RR */ \
- __asm movdqa xmm1, xmm0 \
- __asm punpcklwd xmm0, xmm2 /* BGRR first 4 pixels */ \
- __asm punpckhwd xmm1, xmm2 /* BGRR next 4 pixels */ \
- /* Step 4: RRGB -> RGB565 */ \
- __asm movdqa xmm3, xmm0 /* B first 4 pixels of argb */ \
- __asm movdqa xmm2, xmm0 /* G */ \
- __asm pslld xmm0, 8 /* R */ \
- __asm psrld xmm3, 3 /* B */ \
- __asm psrld xmm2, 5 /* G */ \
- __asm psrad xmm0, 16 /* R */ \
- __asm pand xmm3, xmm5 /* B */ \
- __asm pand xmm2, xmm6 /* G */ \
- __asm pand xmm0, xmm7 /* R */ \
- __asm por xmm3, xmm2 /* BG */ \
- __asm por xmm0, xmm3 /* BGR */ \
- __asm movdqa xmm3, xmm1 /* B next 4 pixels of argb */ \
- __asm movdqa xmm2, xmm1 /* G */ \
- __asm pslld xmm1, 8 /* R */ \
- __asm psrld xmm3, 3 /* B */ \
- __asm psrld xmm2, 5 /* G */ \
- __asm psrad xmm1, 16 /* R */ \
- __asm pand xmm3, xmm5 /* B */ \
- __asm pand xmm2, xmm6 /* G */ \
- __asm pand xmm1, xmm7 /* R */ \
- __asm por xmm3, xmm2 /* BG */ \
- __asm por xmm1, xmm3 /* BGR */ \
- __asm packssdw xmm0, xmm1 \
- __asm movdqu 0[edx], xmm0 /* store 8 pixels of RGB565 */ \
- __asm lea edx, [edx + 16] \
- }
-
-// 8 pixels.
-// 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked)
-void I444ToARGBRow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // argb
- mov ecx, [esp + 8 + 20] // width
- sub edi, esi
- pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
-
- convertloop:
- READYUV444
- YUVTORGB(kYuvConstants)
- STOREARGB
-
- sub ecx, 8
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-// 8 pixels.
-// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB24 (24 bytes).
-__declspec(naked)
-void I422ToRGB24Row_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_rgb24,
- int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // rgb24
- mov ecx, [esp + 8 + 20] // width
- sub edi, esi
- movdqa xmm5, kShuffleMaskARGBToRGB24_0
- movdqa xmm6, kShuffleMaskARGBToRGB24
-
- convertloop:
- READYUV422
- YUVTORGB(kYuvConstants)
- STORERGB24
-
- sub ecx, 8
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-// 8 pixels.
-// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RAW (24 bytes).
-__declspec(naked)
-void I422ToRAWRow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_raw,
- int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // raw
- mov ecx, [esp + 8 + 20] // width
- sub edi, esi
- movdqa xmm5, kShuffleMaskARGBToRAW_0
- movdqa xmm6, kShuffleMaskARGBToRAW
-
- convertloop:
- READYUV422
- YUVTORGB(kYuvConstants)
- STORERAW
-
- sub ecx, 8
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-// 8 pixels
-// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB565 (16 bytes).
-__declspec(naked)
-void I422ToRGB565Row_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb565_buf,
- int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // rgb565
- mov ecx, [esp + 8 + 20] // width
- sub edi, esi
- pcmpeqb xmm5, xmm5 // generate mask 0x0000001f
- psrld xmm5, 27
- pcmpeqb xmm6, xmm6 // generate mask 0x000007e0
- psrld xmm6, 26
- pslld xmm6, 5
- pcmpeqb xmm7, xmm7 // generate mask 0xfffff800
- pslld xmm7, 11
-
- convertloop:
- READYUV422
- YUVTORGB(kYuvConstants)
- STORERGB565
-
- sub ecx, 8
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-// 8 pixels.
-// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked)
-void I422ToARGBRow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // argb
- mov ecx, [esp + 8 + 20] // width
- sub edi, esi
- pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
-
- convertloop:
- READYUV422
- YUVTORGB(kYuvConstants)
- STOREARGB
-
- sub ecx, 8
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-// 8 pixels.
-// JPeg color space version of I422ToARGB
-// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked)
-void J422ToARGBRow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // argb
- mov ecx, [esp + 8 + 20] // width
- sub edi, esi
- pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
-
- convertloop:
- READYUV422
- YUVTORGB(kYuvJConstants)
- STOREARGB
-
- sub ecx, 8
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-// 8 pixels.
-// 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
-// Similar to I420 but duplicate UV once more.
-__declspec(naked)
-void I411ToARGBRow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- int width) {
- __asm {
- push ebx
- push esi
- push edi
- mov eax, [esp + 12 + 4] // Y
- mov esi, [esp + 12 + 8] // U
- mov edi, [esp + 12 + 12] // V
- mov edx, [esp + 12 + 16] // argb
- mov ecx, [esp + 12 + 20] // width
- sub edi, esi
- pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
-
- convertloop:
- READYUV411 // modifies EBX
- YUVTORGB(kYuvConstants)
- STOREARGB
-
- sub ecx, 8
- jg convertloop
-
- pop edi
- pop esi
- pop ebx
- ret
- }
-}
-
-// 8 pixels.
-// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked)
-void NV12ToARGBRow_SSSE3(const uint8* y_buf,
- const uint8* uv_buf,
- uint8* dst_argb,
- int width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // Y
- mov esi, [esp + 4 + 8] // UV
- mov edx, [esp + 4 + 12] // argb
- mov ecx, [esp + 4 + 16] // width
- pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
-
- convertloop:
- READNV12
- YUVTORGB(kYuvConstants)
- STOREARGB
-
- sub ecx, 8
- jg convertloop
-
- pop esi
- ret
- }
-}
-
-// 8 pixels.
-// 4 VU values upsampled to 8 VU, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked)
-void NV21ToARGBRow_SSSE3(const uint8* y_buf,
- const uint8* uv_buf,
- uint8* dst_argb,
- int width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // Y
- mov esi, [esp + 4 + 8] // UV
- mov edx, [esp + 4 + 12] // argb
- mov ecx, [esp + 4 + 16] // width
- pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
-
- convertloop:
- READNV12
- YUVTORGB(kYvuConstants)
- STOREARGB
-
- sub ecx, 8
- jg convertloop
-
- pop esi
- ret
- }
-}
-
-__declspec(naked)
-void I422ToBGRARow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_bgra,
- int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // bgra
- mov ecx, [esp + 8 + 20] // width
- sub edi, esi
-
- convertloop:
- READYUV422
- YUVTORGB(kYuvConstants)
- STOREBGRA
-
- sub ecx, 8
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-__declspec(naked)
-void I422ToABGRRow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_abgr,
- int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // abgr
- mov ecx, [esp + 8 + 20] // width
- sub edi, esi
- pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
-
- convertloop:
- READYUV422
- YUVTORGB(kYuvConstants)
- STOREABGR
-
- sub ecx, 8
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-__declspec(naked)
-void I422ToRGBARow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_rgba,
- int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // rgba
- mov ecx, [esp + 8 + 20] // width
- sub edi, esi
-
- convertloop:
- READYUV422
- YUVTORGB(kYuvConstants)
- STORERGBA
-
- sub ecx, 8
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-#endif // HAS_I422TOARGBROW_SSSE3
-
-#ifdef HAS_I400TOARGBROW_SSE2
-// 8 pixels of Y converted to 8 pixels of ARGB (32 bytes).
-__declspec(naked)
-void I400ToARGBRow_SSE2(const uint8* y_buf,
- uint8* rgb_buf,
- int width) {
- __asm {
- mov eax, 0x4a354a35 // 4a35 = 18997 = round(1.164 * 64 * 256)
- movd xmm2, eax
- pshufd xmm2, xmm2,0
- mov eax, 0x04880488 // 0488 = 1160 = round(1.164 * 64 * 16)
- movd xmm3, eax
- pshufd xmm3, xmm3, 0
- pcmpeqb xmm4, xmm4 // generate mask 0xff000000
- pslld xmm4, 24
-
- mov eax, [esp + 4] // Y
- mov edx, [esp + 8] // rgb
- mov ecx, [esp + 12] // width
-
- convertloop:
- // Step 1: Scale Y contribution to 8 G values. G = (y - 16) * 1.164
- movq xmm0, qword ptr [eax]
- lea eax, [eax + 8]
- punpcklbw xmm0, xmm0 // Y.Y
- pmulhuw xmm0, xmm2
- psubusw xmm0, xmm3
- psrlw xmm0, 6
- packuswb xmm0, xmm0 // G
-
- // Step 2: Weave into ARGB
- punpcklbw xmm0, xmm0 // GG
- movdqa xmm1, xmm0
- punpcklwd xmm0, xmm0 // BGRA first 4 pixels
- punpckhwd xmm1, xmm1 // BGRA next 4 pixels
- por xmm0, xmm4
- por xmm1, xmm4
- movdqu [edx], xmm0
- movdqu [edx + 16], xmm1
- lea edx, [edx + 32]
- sub ecx, 8
- jg convertloop
- ret
- }
-}
-#endif // HAS_I400TOARGBROW_SSE2
-
-#ifdef HAS_I400TOARGBROW_AVX2
-// 16 pixels of Y converted to 16 pixels of ARGB (64 bytes).
-// note: vpunpcklbw mutates and vpackuswb unmutates.
-__declspec(naked)
-void I400ToARGBRow_AVX2(const uint8* y_buf,
- uint8* rgb_buf,
- int width) {
- __asm {
- mov eax, 0x4a354a35 // 4a35 = 18997 = round(1.164 * 64 * 256)
- vmovd xmm2, eax
- vbroadcastss ymm2, xmm2
- mov eax, 0x04880488 // 0488 = 1160 = round(1.164 * 64 * 16)
- vmovd xmm3, eax
- vbroadcastss ymm3, xmm3
- vpcmpeqb ymm4, ymm4, ymm4 // generate mask 0xff000000
- vpslld ymm4, ymm4, 24
-
- mov eax, [esp + 4] // Y
- mov edx, [esp + 8] // rgb
- mov ecx, [esp + 12] // width
-
- convertloop:
- // Step 1: Scale Y contriportbution to 16 G values. G = (y - 16) * 1.164
- vmovdqu xmm0, [eax]
- lea eax, [eax + 16]
- vpermq ymm0, ymm0, 0xd8 // vpunpcklbw mutates
- vpunpcklbw ymm0, ymm0, ymm0 // Y.Y
- vpmulhuw ymm0, ymm0, ymm2
- vpsubusw ymm0, ymm0, ymm3
- vpsrlw ymm0, ymm0, 6
- vpackuswb ymm0, ymm0, ymm0 // G. still mutated: 3120
-
- // TODO(fbarchard): Weave alpha with unpack.
- // Step 2: Weave into ARGB
- vpunpcklbw ymm1, ymm0, ymm0 // GG - mutates
- vpermq ymm1, ymm1, 0xd8
- vpunpcklwd ymm0, ymm1, ymm1 // GGGG first 8 pixels
- vpunpckhwd ymm1, ymm1, ymm1 // GGGG next 8 pixels
- vpor ymm0, ymm0, ymm4
- vpor ymm1, ymm1, ymm4
- vmovdqu [edx], ymm0
- vmovdqu [edx + 32], ymm1
- lea edx, [edx + 64]
- sub ecx, 16
- jg convertloop
- vzeroupper
- ret
- }
-}
-#endif // HAS_I400TOARGBROW_AVX2
-
-#ifdef HAS_MIRRORROW_SSSE3
-// Shuffle table for reversing the bytes.
-static const uvec8 kShuffleMirror = {
- 15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u
-};
-
-// TODO(fbarchard): Replace lea with -16 offset.
-__declspec(naked)
-void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
- __asm {
- mov eax, [esp + 4] // src
- mov edx, [esp + 8] // dst
- mov ecx, [esp + 12] // width
- movdqa xmm5, kShuffleMirror
-
- convertloop:
- movdqu xmm0, [eax - 16 + ecx]
- pshufb xmm0, xmm5
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- sub ecx, 16
- jg convertloop
- ret
- }
-}
-#endif // HAS_MIRRORROW_SSSE3
-
-#ifdef HAS_MIRRORROW_AVX2
-__declspec(naked)
-void MirrorRow_AVX2(const uint8* src, uint8* dst, int width) {
- __asm {
- mov eax, [esp + 4] // src
- mov edx, [esp + 8] // dst
- mov ecx, [esp + 12] // width
- vbroadcastf128 ymm5, kShuffleMirror
-
- convertloop:
- vmovdqu ymm0, [eax - 32 + ecx]
- vpshufb ymm0, ymm0, ymm5
- vpermq ymm0, ymm0, 0x4e // swap high and low halfs
- vmovdqu [edx], ymm0
- lea edx, [edx + 32]
- sub ecx, 32
- jg convertloop
- vzeroupper
- ret
- }
-}
-#endif // HAS_MIRRORROW_AVX2
-
-#ifdef HAS_MIRRORROW_SSE2
-__declspec(naked)
-void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) {
- __asm {
- mov eax, [esp + 4] // src
- mov edx, [esp + 8] // dst
- mov ecx, [esp + 12] // width
-
- convertloop:
- movdqu xmm0, [eax - 16 + ecx]
- movdqa xmm1, xmm0 // swap bytes
- psllw xmm0, 8
- psrlw xmm1, 8
- por xmm0, xmm1
- pshuflw xmm0, xmm0, 0x1b // swap words
- pshufhw xmm0, xmm0, 0x1b
- pshufd xmm0, xmm0, 0x4e // swap qwords
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- sub ecx, 16
- jg convertloop
- ret
- }
-}
-#endif // HAS_MIRRORROW_SSE2
-
-#ifdef HAS_MIRRORROW_UV_SSSE3
-// Shuffle table for reversing the bytes of UV channels.
-static const uvec8 kShuffleMirrorUV = {
- 14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u, 15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u
-};
-
-__declspec(naked)
-void MirrorUVRow_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v,
- int width) {
- __asm {
- push edi
- mov eax, [esp + 4 + 4] // src
- mov edx, [esp + 4 + 8] // dst_u
- mov edi, [esp + 4 + 12] // dst_v
- mov ecx, [esp + 4 + 16] // width
- movdqa xmm1, kShuffleMirrorUV
- lea eax, [eax + ecx * 2 - 16]
- sub edi, edx
-
- convertloop:
- movdqu xmm0, [eax]
- lea eax, [eax - 16]
- pshufb xmm0, xmm1
- movlpd qword ptr [edx], xmm0
- movhpd qword ptr [edx + edi], xmm0
- lea edx, [edx + 8]
- sub ecx, 8
- jg convertloop
-
- pop edi
- ret
- }
-}
-#endif // HAS_MIRRORROW_UV_SSSE3
-
-#ifdef HAS_ARGBMIRRORROW_SSE2
-__declspec(naked)
-void ARGBMirrorRow_SSE2(const uint8* src, uint8* dst, int width) {
- __asm {
- mov eax, [esp + 4] // src
- mov edx, [esp + 8] // dst
- mov ecx, [esp + 12] // width
- lea eax, [eax - 16 + ecx * 4] // last 4 pixels.
-
- convertloop:
- movdqu xmm0, [eax]
- lea eax, [eax - 16]
- pshufd xmm0, xmm0, 0x1b
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- sub ecx, 4
- jg convertloop
- ret
- }
-}
-#endif // HAS_ARGBMIRRORROW_SSE2
-
-#ifdef HAS_ARGBMIRRORROW_AVX2
-// Shuffle table for reversing the bytes.
-static const ulvec32 kARGBShuffleMirror_AVX2 = {
- 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u
-};
-
-__declspec(naked)
-void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width) {
- __asm {
- mov eax, [esp + 4] // src
- mov edx, [esp + 8] // dst
- mov ecx, [esp + 12] // width
- vmovdqu ymm5, kARGBShuffleMirror_AVX2
-
- convertloop:
- vpermd ymm0, ymm5, [eax - 32 + ecx * 4] // permute dword order
- vmovdqu [edx], ymm0
- lea edx, [edx + 32]
- sub ecx, 8
- jg convertloop
- vzeroupper
- ret
- }
-}
-#endif // HAS_ARGBMIRRORROW_AVX2
-
-#ifdef HAS_SPLITUVROW_SSE2
-__declspec(naked)
-void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
- __asm {
- push edi
- mov eax, [esp + 4 + 4] // src_uv
- mov edx, [esp + 4 + 8] // dst_u
- mov edi, [esp + 4 + 12] // dst_v
- mov ecx, [esp + 4 + 16] // pix
- pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
- psrlw xmm5, 8
- sub edi, edx
-
- convertloop:
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- lea eax, [eax + 32]
- movdqa xmm2, xmm0
- movdqa xmm3, xmm1
- pand xmm0, xmm5 // even bytes
- pand xmm1, xmm5
- packuswb xmm0, xmm1
- psrlw xmm2, 8 // odd bytes
- psrlw xmm3, 8
- packuswb xmm2, xmm3
- movdqu [edx], xmm0
- movdqu [edx + edi], xmm2
- lea edx, [edx + 16]
- sub ecx, 16
- jg convertloop
-
- pop edi
- ret
- }
-}
-
-#endif // HAS_SPLITUVROW_SSE2
-
-#ifdef HAS_SPLITUVROW_AVX2
-__declspec(naked)
-void SplitUVRow_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
- __asm {
- push edi
- mov eax, [esp + 4 + 4] // src_uv
- mov edx, [esp + 4 + 8] // dst_u
- mov edi, [esp + 4 + 12] // dst_v
- mov ecx, [esp + 4 + 16] // pix
- vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff
- vpsrlw ymm5, ymm5, 8
- sub edi, edx
-
- convertloop:
- vmovdqu ymm0, [eax]
- vmovdqu ymm1, [eax + 32]
- lea eax, [eax + 64]
- vpsrlw ymm2, ymm0, 8 // odd bytes
- vpsrlw ymm3, ymm1, 8
- vpand ymm0, ymm0, ymm5 // even bytes
- vpand ymm1, ymm1, ymm5
- vpackuswb ymm0, ymm0, ymm1
- vpackuswb ymm2, ymm2, ymm3
- vpermq ymm0, ymm0, 0xd8
- vpermq ymm2, ymm2, 0xd8
- vmovdqu [edx], ymm0
- vmovdqu [edx + edi], ymm2
- lea edx, [edx + 32]
- sub ecx, 32
- jg convertloop
-
- pop edi
- vzeroupper
- ret
- }
-}
-#endif // HAS_SPLITUVROW_AVX2
-
-#ifdef HAS_MERGEUVROW_SSE2
-__declspec(naked)
-void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
- int width) {
- __asm {
- push edi
- mov eax, [esp + 4 + 4] // src_u
- mov edx, [esp + 4 + 8] // src_v
- mov edi, [esp + 4 + 12] // dst_uv
- mov ecx, [esp + 4 + 16] // width
- sub edx, eax
-
- convertloop:
- movdqu xmm0, [eax] // read 16 U's
- movdqu xmm1, [eax + edx] // and 16 V's
- lea eax, [eax + 16]
- movdqa xmm2, xmm0
- punpcklbw xmm0, xmm1 // first 8 UV pairs
- punpckhbw xmm2, xmm1 // next 8 UV pairs
- movdqu [edi], xmm0
- movdqu [edi + 16], xmm2
- lea edi, [edi + 32]
- sub ecx, 16
- jg convertloop
-
- pop edi
- ret
- }
-}
-#endif // HAS_MERGEUVROW_SSE2
-
-#ifdef HAS_MERGEUVROW_AVX2
-__declspec(naked)
-void MergeUVRow_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
- int width) {
- __asm {
- push edi
- mov eax, [esp + 4 + 4] // src_u
- mov edx, [esp + 4 + 8] // src_v
- mov edi, [esp + 4 + 12] // dst_uv
- mov ecx, [esp + 4 + 16] // width
- sub edx, eax
-
- convertloop:
- vmovdqu ymm0, [eax] // read 32 U's
- vmovdqu ymm1, [eax + edx] // and 32 V's
- lea eax, [eax + 32]
- vpunpcklbw ymm2, ymm0, ymm1 // low 16 UV pairs. mutated qqword 0,2
- vpunpckhbw ymm0, ymm0, ymm1 // high 16 UV pairs. mutated qqword 1,3
- vextractf128 [edi], ymm2, 0 // bytes 0..15
- vextractf128 [edi + 16], ymm0, 0 // bytes 16..31
- vextractf128 [edi + 32], ymm2, 1 // bytes 32..47
- vextractf128 [edi + 48], ymm0, 1 // bytes 47..63
- lea edi, [edi + 64]
- sub ecx, 32
- jg convertloop
-
- pop edi
- vzeroupper
- ret
- }
-}
-#endif // HAS_MERGEUVROW_AVX2
-
-#ifdef HAS_COPYROW_SSE2
-// CopyRow copys 'count' bytes using a 16 byte load/store, 32 bytes at time.
-__declspec(naked)
-void CopyRow_SSE2(const uint8* src, uint8* dst, int count) {
- __asm {
- mov eax, [esp + 4] // src
- mov edx, [esp + 8] // dst
- mov ecx, [esp + 12] // count
-
- convertloop:
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- lea eax, [eax + 32]
- movdqu [edx], xmm0
- movdqu [edx + 16], xmm1
- lea edx, [edx + 32]
- sub ecx, 32
- jg convertloop
- ret
- }
-}
-#endif // HAS_COPYROW_SSE2
-
-#ifdef HAS_COPYROW_AVX
-// CopyRow copys 'count' bytes using a 32 byte load/store, 64 bytes at time.
-__declspec(naked)
-void CopyRow_AVX(const uint8* src, uint8* dst, int count) {
- __asm {
- mov eax, [esp + 4] // src
- mov edx, [esp + 8] // dst
- mov ecx, [esp + 12] // count
-
- convertloop:
- vmovdqu ymm0, [eax]
- vmovdqu ymm1, [eax + 32]
- lea eax, [eax + 64]
- vmovdqu [edx], ymm0
- vmovdqu [edx + 32], ymm1
- lea edx, [edx + 64]
- sub ecx, 64
- jg convertloop
-
- vzeroupper
- ret
- }
-}
-#endif // HAS_COPYROW_AVX
-
-// Multiple of 1.
-__declspec(naked)
-void CopyRow_ERMS(const uint8* src, uint8* dst, int count) {
- __asm {
- mov eax, esi
- mov edx, edi
- mov esi, [esp + 4] // src
- mov edi, [esp + 8] // dst
- mov ecx, [esp + 12] // count
- rep movsb
- mov edi, edx
- mov esi, eax
- ret
- }
-}
-
-#ifdef HAS_ARGBCOPYALPHAROW_SSE2
-// width in pixels
-__declspec(naked)
-void ARGBCopyAlphaRow_SSE2(const uint8* src, uint8* dst, int width) {
- __asm {
- mov eax, [esp + 4] // src
- mov edx, [esp + 8] // dst
- mov ecx, [esp + 12] // count
- pcmpeqb xmm0, xmm0 // generate mask 0xff000000
- pslld xmm0, 24
- pcmpeqb xmm1, xmm1 // generate mask 0x00ffffff
- psrld xmm1, 8
-
- convertloop:
- movdqu xmm2, [eax]
- movdqu xmm3, [eax + 16]
- lea eax, [eax + 32]
- movdqu xmm4, [edx]
- movdqu xmm5, [edx + 16]
- pand xmm2, xmm0
- pand xmm3, xmm0
- pand xmm4, xmm1
- pand xmm5, xmm1
- por xmm2, xmm4
- por xmm3, xmm5
- movdqu [edx], xmm2
- movdqu [edx + 16], xmm3
- lea edx, [edx + 32]
- sub ecx, 8
- jg convertloop
-
- ret
- }
-}
-#endif // HAS_ARGBCOPYALPHAROW_SSE2
-
-#ifdef HAS_ARGBCOPYALPHAROW_AVX2
-// width in pixels
-__declspec(naked)
-void ARGBCopyAlphaRow_AVX2(const uint8* src, uint8* dst, int width) {
- __asm {
- mov eax, [esp + 4] // src
- mov edx, [esp + 8] // dst
- mov ecx, [esp + 12] // count
- vpcmpeqb ymm0, ymm0, ymm0
- vpsrld ymm0, ymm0, 8 // generate mask 0x00ffffff
-
- convertloop:
- vmovdqu ymm1, [eax]
- vmovdqu ymm2, [eax + 32]
- lea eax, [eax + 64]
- vpblendvb ymm1, ymm1, [edx], ymm0
- vpblendvb ymm2, ymm2, [edx + 32], ymm0
- vmovdqu [edx], ymm1
- vmovdqu [edx + 32], ymm2
- lea edx, [edx + 64]
- sub ecx, 16
- jg convertloop
-
- vzeroupper
- ret
- }
-}
-#endif // HAS_ARGBCOPYALPHAROW_AVX2
-
-#ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2
-// width in pixels
-__declspec(naked)
-void ARGBCopyYToAlphaRow_SSE2(const uint8* src, uint8* dst, int width) {
- __asm {
- mov eax, [esp + 4] // src
- mov edx, [esp + 8] // dst
- mov ecx, [esp + 12] // count
- pcmpeqb xmm0, xmm0 // generate mask 0xff000000
- pslld xmm0, 24
- pcmpeqb xmm1, xmm1 // generate mask 0x00ffffff
- psrld xmm1, 8
-
- convertloop:
- movq xmm2, qword ptr [eax] // 8 Y's
- lea eax, [eax + 8]
- punpcklbw xmm2, xmm2
- punpckhwd xmm3, xmm2
- punpcklwd xmm2, xmm2
- movdqu xmm4, [edx]
- movdqu xmm5, [edx + 16]
- pand xmm2, xmm0
- pand xmm3, xmm0
- pand xmm4, xmm1
- pand xmm5, xmm1
- por xmm2, xmm4
- por xmm3, xmm5
- movdqu [edx], xmm2
- movdqu [edx + 16], xmm3
- lea edx, [edx + 32]
- sub ecx, 8
- jg convertloop
-
- ret
- }
-}
-#endif // HAS_ARGBCOPYYTOALPHAROW_SSE2
-
-#ifdef HAS_ARGBCOPYYTOALPHAROW_AVX2
-// width in pixels
-__declspec(naked)
-void ARGBCopyYToAlphaRow_AVX2(const uint8* src, uint8* dst, int width) {
- __asm {
- mov eax, [esp + 4] // src
- mov edx, [esp + 8] // dst
- mov ecx, [esp + 12] // count
- vpcmpeqb ymm0, ymm0, ymm0
- vpsrld ymm0, ymm0, 8 // generate mask 0x00ffffff
-
- convertloop:
- vpmovzxbd ymm1, qword ptr [eax]
- vpmovzxbd ymm2, qword ptr [eax + 8]
- lea eax, [eax + 16]
- vpslld ymm1, ymm1, 24
- vpslld ymm2, ymm2, 24
- vpblendvb ymm1, ymm1, [edx], ymm0
- vpblendvb ymm2, ymm2, [edx + 32], ymm0
- vmovdqu [edx], ymm1
- vmovdqu [edx + 32], ymm2
- lea edx, [edx + 64]
- sub ecx, 16
- jg convertloop
-
- vzeroupper
- ret
- }
-}
-#endif // HAS_ARGBCOPYYTOALPHAROW_AVX2
-
-#ifdef HAS_SETROW_X86
-// Write 'count' bytes using an 8 bit value repeated.
-// Count should be multiple of 4.
-__declspec(naked)
-void SetRow_X86(uint8* dst, uint8 v8, int count) {
- __asm {
- movzx eax, byte ptr [esp + 8] // v8
- mov edx, 0x01010101 // Duplicate byte to all bytes.
- mul edx // overwrites edx with upper part of result.
- mov edx, edi
- mov edi, [esp + 4] // dst
- mov ecx, [esp + 12] // count
- shr ecx, 2
- rep stosd
- mov edi, edx
- ret
- }
-}
-
-// Write 'count' bytes using an 8 bit value repeated.
-__declspec(naked)
-void SetRow_ERMS(uint8* dst, uint8 v8, int count) {
- __asm {
- mov edx, edi
- mov edi, [esp + 4] // dst
- mov eax, [esp + 8] // v8
- mov ecx, [esp + 12] // count
- rep stosb
- mov edi, edx
- ret
- }
-}
-
-// Write 'count' 32 bit values.
-__declspec(naked)
-void ARGBSetRow_X86(uint8* dst_argb, uint32 v32, int count) {
- __asm {
- mov edx, edi
- mov edi, [esp + 4] // dst
- mov eax, [esp + 8] // v32
- mov ecx, [esp + 12] // count
- rep stosd
- mov edi, edx
- ret
- }
-}
-#endif // HAS_SETROW_X86
-
-#ifdef HAS_YUY2TOYROW_AVX2
-__declspec(naked)
-void YUY2ToYRow_AVX2(const uint8* src_yuy2,
- uint8* dst_y, int pix) {
- __asm {
- mov eax, [esp + 4] // src_yuy2
- mov edx, [esp + 8] // dst_y
- mov ecx, [esp + 12] // pix
- vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff
- vpsrlw ymm5, ymm5, 8
-
- convertloop:
- vmovdqu ymm0, [eax]
- vmovdqu ymm1, [eax + 32]
- lea eax, [eax + 64]
- vpand ymm0, ymm0, ymm5 // even bytes are Y
- vpand ymm1, ymm1, ymm5
- vpackuswb ymm0, ymm0, ymm1 // mutates.
- vpermq ymm0, ymm0, 0xd8
- vmovdqu [edx], ymm0
- lea edx, [edx + 32]
- sub ecx, 32
- jg convertloop
- vzeroupper
- ret
- }
-}
-
-__declspec(naked)
-void YUY2ToUVRow_AVX2(const uint8* src_yuy2, int stride_yuy2,
- uint8* dst_u, uint8* dst_v, int pix) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // src_yuy2
- mov esi, [esp + 8 + 8] // stride_yuy2
- mov edx, [esp + 8 + 12] // dst_u
- mov edi, [esp + 8 + 16] // dst_v
- mov ecx, [esp + 8 + 20] // pix
- vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff
- vpsrlw ymm5, ymm5, 8
- sub edi, edx
-
- convertloop:
- vmovdqu ymm0, [eax]
- vmovdqu ymm1, [eax + 32]
- vpavgb ymm0, ymm0, [eax + esi]
- vpavgb ymm1, ymm1, [eax + esi + 32]
- lea eax, [eax + 64]
- vpsrlw ymm0, ymm0, 8 // YUYV -> UVUV
- vpsrlw ymm1, ymm1, 8
- vpackuswb ymm0, ymm0, ymm1 // mutates.
- vpermq ymm0, ymm0, 0xd8
- vpand ymm1, ymm0, ymm5 // U
- vpsrlw ymm0, ymm0, 8 // V
- vpackuswb ymm1, ymm1, ymm1 // mutates.
- vpackuswb ymm0, ymm0, ymm0 // mutates.
- vpermq ymm1, ymm1, 0xd8
- vpermq ymm0, ymm0, 0xd8
- vextractf128 [edx], ymm1, 0 // U
- vextractf128 [edx + edi], ymm0, 0 // V
- lea edx, [edx + 16]
- sub ecx, 32
- jg convertloop
-
- pop edi
- pop esi
- vzeroupper
- ret
- }
-}
-
-__declspec(naked)
-void YUY2ToUV422Row_AVX2(const uint8* src_yuy2,
- uint8* dst_u, uint8* dst_v, int pix) {
- __asm {
- push edi
- mov eax, [esp + 4 + 4] // src_yuy2
- mov edx, [esp + 4 + 8] // dst_u
- mov edi, [esp + 4 + 12] // dst_v
- mov ecx, [esp + 4 + 16] // pix
- vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff
- vpsrlw ymm5, ymm5, 8
- sub edi, edx
-
- convertloop:
- vmovdqu ymm0, [eax]
- vmovdqu ymm1, [eax + 32]
- lea eax, [eax + 64]
- vpsrlw ymm0, ymm0, 8 // YUYV -> UVUV
- vpsrlw ymm1, ymm1, 8
- vpackuswb ymm0, ymm0, ymm1 // mutates.
- vpermq ymm0, ymm0, 0xd8
- vpand ymm1, ymm0, ymm5 // U
- vpsrlw ymm0, ymm0, 8 // V
- vpackuswb ymm1, ymm1, ymm1 // mutates.
- vpackuswb ymm0, ymm0, ymm0 // mutates.
- vpermq ymm1, ymm1, 0xd8
- vpermq ymm0, ymm0, 0xd8
- vextractf128 [edx], ymm1, 0 // U
- vextractf128 [edx + edi], ymm0, 0 // V
- lea edx, [edx + 16]
- sub ecx, 32
- jg convertloop
-
- pop edi
- vzeroupper
- ret
- }
-}
-
-__declspec(naked)
-void UYVYToYRow_AVX2(const uint8* src_uyvy,
- uint8* dst_y, int pix) {
- __asm {
- mov eax, [esp + 4] // src_uyvy
- mov edx, [esp + 8] // dst_y
- mov ecx, [esp + 12] // pix
-
- convertloop:
- vmovdqu ymm0, [eax]
- vmovdqu ymm1, [eax + 32]
- lea eax, [eax + 64]
- vpsrlw ymm0, ymm0, 8 // odd bytes are Y
- vpsrlw ymm1, ymm1, 8
- vpackuswb ymm0, ymm0, ymm1 // mutates.
- vpermq ymm0, ymm0, 0xd8
- vmovdqu [edx], ymm0
- lea edx, [edx + 32]
- sub ecx, 32
- jg convertloop
- vzeroupper
- ret
- }
-}
-
-__declspec(naked)
-void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy,
- uint8* dst_u, uint8* dst_v, int pix) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // src_yuy2
- mov esi, [esp + 8 + 8] // stride_yuy2
- mov edx, [esp + 8 + 12] // dst_u
- mov edi, [esp + 8 + 16] // dst_v
- mov ecx, [esp + 8 + 20] // pix
- vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff
- vpsrlw ymm5, ymm5, 8
- sub edi, edx
-
- convertloop:
- vmovdqu ymm0, [eax]
- vmovdqu ymm1, [eax + 32]
- vpavgb ymm0, ymm0, [eax + esi]
- vpavgb ymm1, ymm1, [eax + esi + 32]
- lea eax, [eax + 64]
- vpand ymm0, ymm0, ymm5 // UYVY -> UVUV
- vpand ymm1, ymm1, ymm5
- vpackuswb ymm0, ymm0, ymm1 // mutates.
- vpermq ymm0, ymm0, 0xd8
- vpand ymm1, ymm0, ymm5 // U
- vpsrlw ymm0, ymm0, 8 // V
- vpackuswb ymm1, ymm1, ymm1 // mutates.
- vpackuswb ymm0, ymm0, ymm0 // mutates.
- vpermq ymm1, ymm1, 0xd8
- vpermq ymm0, ymm0, 0xd8
- vextractf128 [edx], ymm1, 0 // U
- vextractf128 [edx + edi], ymm0, 0 // V
- lea edx, [edx + 16]
- sub ecx, 32
- jg convertloop
-
- pop edi
- pop esi
- vzeroupper
- ret
- }
-}
-
-__declspec(naked)
-void UYVYToUV422Row_AVX2(const uint8* src_uyvy,
- uint8* dst_u, uint8* dst_v, int pix) {
- __asm {
- push edi
- mov eax, [esp + 4 + 4] // src_yuy2
- mov edx, [esp + 4 + 8] // dst_u
- mov edi, [esp + 4 + 12] // dst_v
- mov ecx, [esp + 4 + 16] // pix
- vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff
- vpsrlw ymm5, ymm5, 8
- sub edi, edx
-
- convertloop:
- vmovdqu ymm0, [eax]
- vmovdqu ymm1, [eax + 32]
- lea eax, [eax + 64]
- vpand ymm0, ymm0, ymm5 // UYVY -> UVUV
- vpand ymm1, ymm1, ymm5
- vpackuswb ymm0, ymm0, ymm1 // mutates.
- vpermq ymm0, ymm0, 0xd8
- vpand ymm1, ymm0, ymm5 // U
- vpsrlw ymm0, ymm0, 8 // V
- vpackuswb ymm1, ymm1, ymm1 // mutates.
- vpackuswb ymm0, ymm0, ymm0 // mutates.
- vpermq ymm1, ymm1, 0xd8
- vpermq ymm0, ymm0, 0xd8
- vextractf128 [edx], ymm1, 0 // U
- vextractf128 [edx + edi], ymm0, 0 // V
- lea edx, [edx + 16]
- sub ecx, 32
- jg convertloop
-
- pop edi
- vzeroupper
- ret
- }
-}
-#endif // HAS_YUY2TOYROW_AVX2
-
-#ifdef HAS_YUY2TOYROW_SSE2
-__declspec(naked)
-void YUY2ToYRow_SSE2(const uint8* src_yuy2,
- uint8* dst_y, int pix) {
- __asm {
- mov eax, [esp + 4] // src_yuy2
- mov edx, [esp + 8] // dst_y
- mov ecx, [esp + 12] // pix
- pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
- psrlw xmm5, 8
-
- convertloop:
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- lea eax, [eax + 32]
- pand xmm0, xmm5 // even bytes are Y
- pand xmm1, xmm5
- packuswb xmm0, xmm1
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- sub ecx, 16
- jg convertloop
- ret
- }
-}
-
-__declspec(naked)
-void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
- uint8* dst_u, uint8* dst_v, int pix) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // src_yuy2
- mov esi, [esp + 8 + 8] // stride_yuy2
- mov edx, [esp + 8 + 12] // dst_u
- mov edi, [esp + 8 + 16] // dst_v
- mov ecx, [esp + 8 + 20] // pix
- pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
- psrlw xmm5, 8
- sub edi, edx
-
- convertloop:
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- movdqu xmm2, [eax + esi]
- movdqu xmm3, [eax + esi + 16]
- lea eax, [eax + 32]
- pavgb xmm0, xmm2
- pavgb xmm1, xmm3
- psrlw xmm0, 8 // YUYV -> UVUV
- psrlw xmm1, 8
- packuswb xmm0, xmm1
- movdqa xmm1, xmm0
- pand xmm0, xmm5 // U
- packuswb xmm0, xmm0
- psrlw xmm1, 8 // V
- packuswb xmm1, xmm1
- movq qword ptr [edx], xmm0
- movq qword ptr [edx + edi], xmm1
- lea edx, [edx + 8]
- sub ecx, 16
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-__declspec(naked)
-void YUY2ToUV422Row_SSE2(const uint8* src_yuy2,
- uint8* dst_u, uint8* dst_v, int pix) {
- __asm {
- push edi
- mov eax, [esp + 4 + 4] // src_yuy2
- mov edx, [esp + 4 + 8] // dst_u
- mov edi, [esp + 4 + 12] // dst_v
- mov ecx, [esp + 4 + 16] // pix
- pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
- psrlw xmm5, 8
- sub edi, edx
-
- convertloop:
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- lea eax, [eax + 32]
- psrlw xmm0, 8 // YUYV -> UVUV
- psrlw xmm1, 8
- packuswb xmm0, xmm1
- movdqa xmm1, xmm0
- pand xmm0, xmm5 // U
- packuswb xmm0, xmm0
- psrlw xmm1, 8 // V
- packuswb xmm1, xmm1
- movq qword ptr [edx], xmm0
- movq qword ptr [edx + edi], xmm1
- lea edx, [edx + 8]
- sub ecx, 16
- jg convertloop
-
- pop edi
- ret
- }
-}
-
-__declspec(naked)
-void UYVYToYRow_SSE2(const uint8* src_uyvy,
- uint8* dst_y, int pix) {
- __asm {
- mov eax, [esp + 4] // src_uyvy
- mov edx, [esp + 8] // dst_y
- mov ecx, [esp + 12] // pix
-
- convertloop:
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- lea eax, [eax + 32]
- psrlw xmm0, 8 // odd bytes are Y
- psrlw xmm1, 8
- packuswb xmm0, xmm1
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- sub ecx, 16
- jg convertloop
- ret
- }
-}
-
-__declspec(naked)
-void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy,
- uint8* dst_u, uint8* dst_v, int pix) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // src_yuy2
- mov esi, [esp + 8 + 8] // stride_yuy2
- mov edx, [esp + 8 + 12] // dst_u
- mov edi, [esp + 8 + 16] // dst_v
- mov ecx, [esp + 8 + 20] // pix
- pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
- psrlw xmm5, 8
- sub edi, edx
-
- convertloop:
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- movdqu xmm2, [eax + esi]
- movdqu xmm3, [eax + esi + 16]
- lea eax, [eax + 32]
- pavgb xmm0, xmm2
- pavgb xmm1, xmm3
- pand xmm0, xmm5 // UYVY -> UVUV
- pand xmm1, xmm5
- packuswb xmm0, xmm1
- movdqa xmm1, xmm0
- pand xmm0, xmm5 // U
- packuswb xmm0, xmm0
- psrlw xmm1, 8 // V
- packuswb xmm1, xmm1
- movq qword ptr [edx], xmm0
- movq qword ptr [edx + edi], xmm1
- lea edx, [edx + 8]
- sub ecx, 16
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-__declspec(naked)
-void UYVYToUV422Row_SSE2(const uint8* src_uyvy,
- uint8* dst_u, uint8* dst_v, int pix) {
- __asm {
- push edi
- mov eax, [esp + 4 + 4] // src_yuy2
- mov edx, [esp + 4 + 8] // dst_u
- mov edi, [esp + 4 + 12] // dst_v
- mov ecx, [esp + 4 + 16] // pix
- pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
- psrlw xmm5, 8
- sub edi, edx
-
- convertloop:
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- lea eax, [eax + 32]
- pand xmm0, xmm5 // UYVY -> UVUV
- pand xmm1, xmm5
- packuswb xmm0, xmm1
- movdqa xmm1, xmm0
- pand xmm0, xmm5 // U
- packuswb xmm0, xmm0
- psrlw xmm1, 8 // V
- packuswb xmm1, xmm1
- movq qword ptr [edx], xmm0
- movq qword ptr [edx + edi], xmm1
- lea edx, [edx + 8]
- sub ecx, 16
- jg convertloop
-
- pop edi
- ret
- }
-}
-#endif // HAS_YUY2TOYROW_SSE2
-
-#ifdef HAS_ARGBBLENDROW_SSE2
-// Blend 8 pixels at a time.
-__declspec(naked)
-void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // src_argb0
- mov esi, [esp + 4 + 8] // src_argb1
- mov edx, [esp + 4 + 12] // dst_argb
- mov ecx, [esp + 4 + 16] // width
- pcmpeqb xmm7, xmm7 // generate constant 1
- psrlw xmm7, 15
- pcmpeqb xmm6, xmm6 // generate mask 0x00ff00ff
- psrlw xmm6, 8
- pcmpeqb xmm5, xmm5 // generate mask 0xff00ff00
- psllw xmm5, 8
- pcmpeqb xmm4, xmm4 // generate mask 0xff000000
- pslld xmm4, 24
- sub ecx, 4
- jl convertloop4b // less than 4 pixels?
-
- // 4 pixel loop.
- convertloop4:
- movdqu xmm3, [eax] // src argb
- lea eax, [eax + 16]
- movdqa xmm0, xmm3 // src argb
- pxor xmm3, xmm4 // ~alpha
- movdqu xmm2, [esi] // _r_b
- psrlw xmm3, 8 // alpha
- pshufhw xmm3, xmm3, 0F5h // 8 alpha words
- pshuflw xmm3, xmm3, 0F5h
- pand xmm2, xmm6 // _r_b
- paddw xmm3, xmm7 // 256 - alpha
- pmullw xmm2, xmm3 // _r_b * alpha
- movdqu xmm1, [esi] // _a_g
- lea esi, [esi + 16]
- psrlw xmm1, 8 // _a_g
- por xmm0, xmm4 // set alpha to 255
- pmullw xmm1, xmm3 // _a_g * alpha
- psrlw xmm2, 8 // _r_b convert to 8 bits again
- paddusb xmm0, xmm2 // + src argb
- pand xmm1, xmm5 // a_g_ convert to 8 bits again
- paddusb xmm0, xmm1 // + src argb
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- sub ecx, 4
- jge convertloop4
-
- convertloop4b:
- add ecx, 4 - 1
- jl convertloop1b
-
- // 1 pixel loop.
- convertloop1:
- movd xmm3, [eax] // src argb
- lea eax, [eax + 4]
- movdqa xmm0, xmm3 // src argb
- pxor xmm3, xmm4 // ~alpha
- movd xmm2, [esi] // _r_b
- psrlw xmm3, 8 // alpha
- pshufhw xmm3, xmm3, 0F5h // 8 alpha words
- pshuflw xmm3, xmm3, 0F5h
- pand xmm2, xmm6 // _r_b
- paddw xmm3, xmm7 // 256 - alpha
- pmullw xmm2, xmm3 // _r_b * alpha
- movd xmm1, [esi] // _a_g
- lea esi, [esi + 4]
- psrlw xmm1, 8 // _a_g
- por xmm0, xmm4 // set alpha to 255
- pmullw xmm1, xmm3 // _a_g * alpha
- psrlw xmm2, 8 // _r_b convert to 8 bits again
- paddusb xmm0, xmm2 // + src argb
- pand xmm1, xmm5 // a_g_ convert to 8 bits again
- paddusb xmm0, xmm1 // + src argb
- movd [edx], xmm0
- lea edx, [edx + 4]
- sub ecx, 1
- jge convertloop1
-
- convertloop1b:
- pop esi
- ret
- }
-}
-#endif // HAS_ARGBBLENDROW_SSE2
-
-#ifdef HAS_ARGBBLENDROW_SSSE3
-// Shuffle table for isolating alpha.
-static const uvec8 kShuffleAlpha = {
- 3u, 0x80, 3u, 0x80, 7u, 0x80, 7u, 0x80,
- 11u, 0x80, 11u, 0x80, 15u, 0x80, 15u, 0x80
-};
-// Same as SSE2, but replaces:
-// psrlw xmm3, 8 // alpha
-// pshufhw xmm3, xmm3, 0F5h // 8 alpha words
-// pshuflw xmm3, xmm3, 0F5h
-// with..
-// pshufb xmm3, kShuffleAlpha // alpha
-// Blend 8 pixels at a time.
-
-__declspec(naked)
-void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // src_argb0
- mov esi, [esp + 4 + 8] // src_argb1
- mov edx, [esp + 4 + 12] // dst_argb
- mov ecx, [esp + 4 + 16] // width
- pcmpeqb xmm7, xmm7 // generate constant 0x0001
- psrlw xmm7, 15
- pcmpeqb xmm6, xmm6 // generate mask 0x00ff00ff
- psrlw xmm6, 8
- pcmpeqb xmm5, xmm5 // generate mask 0xff00ff00
- psllw xmm5, 8
- pcmpeqb xmm4, xmm4 // generate mask 0xff000000
- pslld xmm4, 24
- sub ecx, 4
- jl convertloop4b // less than 4 pixels?
-
- // 4 pixel loop.
- convertloop4:
- movdqu xmm3, [eax] // src argb
- lea eax, [eax + 16]
- movdqa xmm0, xmm3 // src argb
- pxor xmm3, xmm4 // ~alpha
- movdqu xmm2, [esi] // _r_b
- pshufb xmm3, kShuffleAlpha // alpha
- pand xmm2, xmm6 // _r_b
- paddw xmm3, xmm7 // 256 - alpha
- pmullw xmm2, xmm3 // _r_b * alpha
- movdqu xmm1, [esi] // _a_g
- lea esi, [esi + 16]
- psrlw xmm1, 8 // _a_g
- por xmm0, xmm4 // set alpha to 255
- pmullw xmm1, xmm3 // _a_g * alpha
- psrlw xmm2, 8 // _r_b convert to 8 bits again
- paddusb xmm0, xmm2 // + src argb
- pand xmm1, xmm5 // a_g_ convert to 8 bits again
- paddusb xmm0, xmm1 // + src argb
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- sub ecx, 4
- jge convertloop4
-
- convertloop4b:
- add ecx, 4 - 1
- jl convertloop1b
-
- // 1 pixel loop.
- convertloop1:
- movd xmm3, [eax] // src argb
- lea eax, [eax + 4]
- movdqa xmm0, xmm3 // src argb
- pxor xmm3, xmm4 // ~alpha
- movd xmm2, [esi] // _r_b
- pshufb xmm3, kShuffleAlpha // alpha
- pand xmm2, xmm6 // _r_b
- paddw xmm3, xmm7 // 256 - alpha
- pmullw xmm2, xmm3 // _r_b * alpha
- movd xmm1, [esi] // _a_g
- lea esi, [esi + 4]
- psrlw xmm1, 8 // _a_g
- por xmm0, xmm4 // set alpha to 255
- pmullw xmm1, xmm3 // _a_g * alpha
- psrlw xmm2, 8 // _r_b convert to 8 bits again
- paddusb xmm0, xmm2 // + src argb
- pand xmm1, xmm5 // a_g_ convert to 8 bits again
- paddusb xmm0, xmm1 // + src argb
- movd [edx], xmm0
- lea edx, [edx + 4]
- sub ecx, 1
- jge convertloop1
-
- convertloop1b:
- pop esi
- ret
- }
-}
-#endif // HAS_ARGBBLENDROW_SSSE3
-
-#ifdef HAS_ARGBATTENUATEROW_SSE2
-// Attenuate 4 pixels at a time.
-__declspec(naked)
-void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) {
- __asm {
- mov eax, [esp + 4] // src_argb0
- mov edx, [esp + 8] // dst_argb
- mov ecx, [esp + 12] // width
- pcmpeqb xmm4, xmm4 // generate mask 0xff000000
- pslld xmm4, 24
- pcmpeqb xmm5, xmm5 // generate mask 0x00ffffff
- psrld xmm5, 8
-
- convertloop:
- movdqu xmm0, [eax] // read 4 pixels
- punpcklbw xmm0, xmm0 // first 2
- pshufhw xmm2, xmm0, 0FFh // 8 alpha words
- pshuflw xmm2, xmm2, 0FFh
- pmulhuw xmm0, xmm2 // rgb * a
- movdqu xmm1, [eax] // read 4 pixels
- punpckhbw xmm1, xmm1 // next 2 pixels
- pshufhw xmm2, xmm1, 0FFh // 8 alpha words
- pshuflw xmm2, xmm2, 0FFh
- pmulhuw xmm1, xmm2 // rgb * a
- movdqu xmm2, [eax] // alphas
- lea eax, [eax + 16]
- psrlw xmm0, 8
- pand xmm2, xmm4
- psrlw xmm1, 8
- packuswb xmm0, xmm1
- pand xmm0, xmm5 // keep original alphas
- por xmm0, xmm2
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- sub ecx, 4
- jg convertloop
-
- ret
- }
-}
-#endif // HAS_ARGBATTENUATEROW_SSE2
-
-#ifdef HAS_ARGBATTENUATEROW_SSSE3
-// Shuffle table duplicating alpha.
-static const uvec8 kShuffleAlpha0 = {
- 3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u, 7u, 7u, 7u, 7u, 7u, 7u, 128u, 128u,
-};
-static const uvec8 kShuffleAlpha1 = {
- 11u, 11u, 11u, 11u, 11u, 11u, 128u, 128u,
- 15u, 15u, 15u, 15u, 15u, 15u, 128u, 128u,
-};
-__declspec(naked)
-void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
- __asm {
- mov eax, [esp + 4] // src_argb0
- mov edx, [esp + 8] // dst_argb
- mov ecx, [esp + 12] // width
- pcmpeqb xmm3, xmm3 // generate mask 0xff000000
- pslld xmm3, 24
- movdqa xmm4, kShuffleAlpha0
- movdqa xmm5, kShuffleAlpha1
-
- convertloop:
- movdqu xmm0, [eax] // read 4 pixels
- pshufb xmm0, xmm4 // isolate first 2 alphas
- movdqu xmm1, [eax] // read 4 pixels
- punpcklbw xmm1, xmm1 // first 2 pixel rgbs
- pmulhuw xmm0, xmm1 // rgb * a
- movdqu xmm1, [eax] // read 4 pixels
- pshufb xmm1, xmm5 // isolate next 2 alphas
- movdqu xmm2, [eax] // read 4 pixels
- punpckhbw xmm2, xmm2 // next 2 pixel rgbs
- pmulhuw xmm1, xmm2 // rgb * a
- movdqu xmm2, [eax] // mask original alpha
- lea eax, [eax + 16]
- pand xmm2, xmm3
- psrlw xmm0, 8
- psrlw xmm1, 8
- packuswb xmm0, xmm1
- por xmm0, xmm2 // copy original alpha
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- sub ecx, 4
- jg convertloop
-
- ret
- }
-}
-#endif // HAS_ARGBATTENUATEROW_SSSE3
-
-#ifdef HAS_ARGBATTENUATEROW_AVX2
-// Shuffle table duplicating alpha.
-static const uvec8 kShuffleAlpha_AVX2 = {
- 6u, 7u, 6u, 7u, 6u, 7u, 128u, 128u, 14u, 15u, 14u, 15u, 14u, 15u, 128u, 128u
-};
-__declspec(naked)
-void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width) {
- __asm {
- mov eax, [esp + 4] // src_argb0
- mov edx, [esp + 8] // dst_argb
- mov ecx, [esp + 12] // width
- sub edx, eax
- vbroadcastf128 ymm4,kShuffleAlpha_AVX2
- vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0xff000000
- vpslld ymm5, ymm5, 24
-
- convertloop:
- vmovdqu ymm6, [eax] // read 8 pixels.
- vpunpcklbw ymm0, ymm6, ymm6 // low 4 pixels. mutated.
- vpunpckhbw ymm1, ymm6, ymm6 // high 4 pixels. mutated.
- vpshufb ymm2, ymm0, ymm4 // low 4 alphas
- vpshufb ymm3, ymm1, ymm4 // high 4 alphas
- vpmulhuw ymm0, ymm0, ymm2 // rgb * a
- vpmulhuw ymm1, ymm1, ymm3 // rgb * a
- vpand ymm6, ymm6, ymm5 // isolate alpha
- vpsrlw ymm0, ymm0, 8
- vpsrlw ymm1, ymm1, 8
- vpackuswb ymm0, ymm0, ymm1 // unmutated.
- vpor ymm0, ymm0, ymm6 // copy original alpha
- vmovdqu [eax + edx], ymm0
- lea eax, [eax + 32]
- sub ecx, 8
- jg convertloop
-
- vzeroupper
- ret
- }
-}
-#endif // HAS_ARGBATTENUATEROW_AVX2
-
-#ifdef HAS_ARGBUNATTENUATEROW_SSE2
-// Unattenuate 4 pixels at a time.
-__declspec(naked)
-void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb,
- int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // src_argb0
- mov edx, [esp + 8 + 8] // dst_argb
- mov ecx, [esp + 8 + 12] // width
-
- convertloop:
- movdqu xmm0, [eax] // read 4 pixels
- movzx esi, byte ptr [eax + 3] // first alpha
- movzx edi, byte ptr [eax + 7] // second alpha
- punpcklbw xmm0, xmm0 // first 2
- movd xmm2, dword ptr fixed_invtbl8[esi * 4]
- movd xmm3, dword ptr fixed_invtbl8[edi * 4]
- pshuflw xmm2, xmm2, 040h // first 4 inv_alpha words. 1, a, a, a
- pshuflw xmm3, xmm3, 040h // next 4 inv_alpha words
- movlhps xmm2, xmm3
- pmulhuw xmm0, xmm2 // rgb * a
-
- movdqu xmm1, [eax] // read 4 pixels
- movzx esi, byte ptr [eax + 11] // third alpha
- movzx edi, byte ptr [eax + 15] // forth alpha
- punpckhbw xmm1, xmm1 // next 2
- movd xmm2, dword ptr fixed_invtbl8[esi * 4]
- movd xmm3, dword ptr fixed_invtbl8[edi * 4]
- pshuflw xmm2, xmm2, 040h // first 4 inv_alpha words
- pshuflw xmm3, xmm3, 040h // next 4 inv_alpha words
- movlhps xmm2, xmm3
- pmulhuw xmm1, xmm2 // rgb * a
- lea eax, [eax + 16]
-
- packuswb xmm0, xmm1
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- sub ecx, 4
- jg convertloop
- pop edi
- pop esi
- ret
- }
-}
-#endif // HAS_ARGBUNATTENUATEROW_SSE2
-
-#ifdef HAS_ARGBUNATTENUATEROW_AVX2
-// Shuffle table duplicating alpha.
-static const uvec8 kUnattenShuffleAlpha_AVX2 = {
- 0u, 1u, 0u, 1u, 0u, 1u, 6u, 7u, 8u, 9u, 8u, 9u, 8u, 9u, 14u, 15u
-};
-// TODO(fbarchard): Enable USE_GATHER for future hardware if faster.
-// USE_GATHER is not on by default, due to being a slow instruction.
-#ifdef USE_GATHER
-__declspec(naked)
-void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb,
- int width) {
- __asm {
- mov eax, [esp + 4] // src_argb0
- mov edx, [esp + 8] // dst_argb
- mov ecx, [esp + 12] // width
- sub edx, eax
- vbroadcastf128 ymm4, kUnattenShuffleAlpha_AVX2
-
- convertloop:
- vmovdqu ymm6, [eax] // read 8 pixels.
- vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0xffffffff for gather.
- vpsrld ymm2, ymm6, 24 // alpha in low 8 bits.
- vpunpcklbw ymm0, ymm6, ymm6 // low 4 pixels. mutated.
- vpunpckhbw ymm1, ymm6, ymm6 // high 4 pixels. mutated.
- vpgatherdd ymm3, [ymm2 * 4 + fixed_invtbl8], ymm5 // ymm5 cleared. 1, a
- vpunpcklwd ymm2, ymm3, ymm3 // low 4 inverted alphas. mutated. 1, 1, a, a
- vpunpckhwd ymm3, ymm3, ymm3 // high 4 inverted alphas. mutated.
- vpshufb ymm2, ymm2, ymm4 // replicate low 4 alphas. 1, a, a, a
- vpshufb ymm3, ymm3, ymm4 // replicate high 4 alphas
- vpmulhuw ymm0, ymm0, ymm2 // rgb * ia
- vpmulhuw ymm1, ymm1, ymm3 // rgb * ia
- vpackuswb ymm0, ymm0, ymm1 // unmutated.
- vmovdqu [eax + edx], ymm0
- lea eax, [eax + 32]
- sub ecx, 8
- jg convertloop
-
- vzeroupper
- ret
- }
-}
-#else // USE_GATHER
-__declspec(naked)
-void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb,
- int width) {
- __asm {
-
- mov eax, [esp + 4] // src_argb0
- mov edx, [esp + 8] // dst_argb
- mov ecx, [esp + 12] // width
- sub edx, eax
- vbroadcastf128 ymm5, kUnattenShuffleAlpha_AVX2
-
- push esi
- push edi
-
- convertloop:
- // replace VPGATHER
- movzx esi, byte ptr [eax + 3] // alpha0
- movzx edi, byte ptr [eax + 7] // alpha1
- vmovd xmm0, dword ptr fixed_invtbl8[esi * 4] // [1,a0]
- vmovd xmm1, dword ptr fixed_invtbl8[edi * 4] // [1,a1]
- movzx esi, byte ptr [eax + 11] // alpha2
- movzx edi, byte ptr [eax + 15] // alpha3
- vpunpckldq xmm6, xmm0, xmm1 // [1,a1,1,a0]
- vmovd xmm2, dword ptr fixed_invtbl8[esi * 4] // [1,a2]
- vmovd xmm3, dword ptr fixed_invtbl8[edi * 4] // [1,a3]
- movzx esi, byte ptr [eax + 19] // alpha4
- movzx edi, byte ptr [eax + 23] // alpha5
- vpunpckldq xmm7, xmm2, xmm3 // [1,a3,1,a2]
- vmovd xmm0, dword ptr fixed_invtbl8[esi * 4] // [1,a4]
- vmovd xmm1, dword ptr fixed_invtbl8[edi * 4] // [1,a5]
- movzx esi, byte ptr [eax + 27] // alpha6
- movzx edi, byte ptr [eax + 31] // alpha7
- vpunpckldq xmm0, xmm0, xmm1 // [1,a5,1,a4]
- vmovd xmm2, dword ptr fixed_invtbl8[esi * 4] // [1,a6]
- vmovd xmm3, dword ptr fixed_invtbl8[edi * 4] // [1,a7]
- vpunpckldq xmm2, xmm2, xmm3 // [1,a7,1,a6]
- vpunpcklqdq xmm3, xmm6, xmm7 // [1,a3,1,a2,1,a1,1,a0]
- vpunpcklqdq xmm0, xmm0, xmm2 // [1,a7,1,a6,1,a5,1,a4]
- vinserti128 ymm3, ymm3, xmm0, 1 // [1,a7,1,a6,1,a5,1,a4,1,a3,1,a2,1,a1,1,a0]
- // end of VPGATHER
-
- vmovdqu ymm6, [eax] // read 8 pixels.
- vpunpcklbw ymm0, ymm6, ymm6 // low 4 pixels. mutated.
- vpunpckhbw ymm1, ymm6, ymm6 // high 4 pixels. mutated.
- vpunpcklwd ymm2, ymm3, ymm3 // low 4 inverted alphas. mutated. 1, 1, a, a
- vpunpckhwd ymm3, ymm3, ymm3 // high 4 inverted alphas. mutated.
- vpshufb ymm2, ymm2, ymm5 // replicate low 4 alphas. 1, a, a, a
- vpshufb ymm3, ymm3, ymm5 // replicate high 4 alphas
- vpmulhuw ymm0, ymm0, ymm2 // rgb * ia
- vpmulhuw ymm1, ymm1, ymm3 // rgb * ia
- vpackuswb ymm0, ymm0, ymm1 // unmutated.
- vmovdqu [eax + edx], ymm0
- lea eax, [eax + 32]
- sub ecx, 8
- jg convertloop
-
- pop edi
- pop esi
- vzeroupper
- ret
- }
-}
-#endif // USE_GATHER
-#endif // HAS_ARGBATTENUATEROW_AVX2
-
-#ifdef HAS_ARGBGRAYROW_SSSE3
-// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels.
-__declspec(naked)
-void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
- __asm {
- mov eax, [esp + 4] /* src_argb */
- mov edx, [esp + 8] /* dst_argb */
- mov ecx, [esp + 12] /* width */
- movdqa xmm4, kARGBToYJ
- movdqa xmm5, kAddYJ64
-
- convertloop:
- movdqu xmm0, [eax] // G
- movdqu xmm1, [eax + 16]
- pmaddubsw xmm0, xmm4
- pmaddubsw xmm1, xmm4
- phaddw xmm0, xmm1
- paddw xmm0, xmm5 // Add .5 for rounding.
- psrlw xmm0, 7
- packuswb xmm0, xmm0 // 8 G bytes
- movdqu xmm2, [eax] // A
- movdqu xmm3, [eax + 16]
- lea eax, [eax + 32]
- psrld xmm2, 24
- psrld xmm3, 24
- packuswb xmm2, xmm3
- packuswb xmm2, xmm2 // 8 A bytes
- movdqa xmm3, xmm0 // Weave into GG, GA, then GGGA
- punpcklbw xmm0, xmm0 // 8 GG words
- punpcklbw xmm3, xmm2 // 8 GA words
- movdqa xmm1, xmm0
- punpcklwd xmm0, xmm3 // GGGA first 4
- punpckhwd xmm1, xmm3 // GGGA next 4
- movdqu [edx], xmm0
- movdqu [edx + 16], xmm1
- lea edx, [edx + 32]
- sub ecx, 8
- jg convertloop
- ret
- }
-}
-#endif // HAS_ARGBGRAYROW_SSSE3
-
-#ifdef HAS_ARGBSEPIAROW_SSSE3
-// b = (r * 35 + g * 68 + b * 17) >> 7
-// g = (r * 45 + g * 88 + b * 22) >> 7
-// r = (r * 50 + g * 98 + b * 24) >> 7
-// Constant for ARGB color to sepia tone.
-static const vec8 kARGBToSepiaB = {
- 17, 68, 35, 0, 17, 68, 35, 0, 17, 68, 35, 0, 17, 68, 35, 0
-};
-
-static const vec8 kARGBToSepiaG = {
- 22, 88, 45, 0, 22, 88, 45, 0, 22, 88, 45, 0, 22, 88, 45, 0
-};
-
-static const vec8 kARGBToSepiaR = {
- 24, 98, 50, 0, 24, 98, 50, 0, 24, 98, 50, 0, 24, 98, 50, 0
-};
-
-// Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels.
-__declspec(naked)
-void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) {
- __asm {
- mov eax, [esp + 4] /* dst_argb */
- mov ecx, [esp + 8] /* width */
- movdqa xmm2, kARGBToSepiaB
- movdqa xmm3, kARGBToSepiaG
- movdqa xmm4, kARGBToSepiaR
-
- convertloop:
- movdqu xmm0, [eax] // B
- movdqu xmm6, [eax + 16]
- pmaddubsw xmm0, xmm2
- pmaddubsw xmm6, xmm2
- phaddw xmm0, xmm6
- psrlw xmm0, 7
- packuswb xmm0, xmm0 // 8 B values
- movdqu xmm5, [eax] // G
- movdqu xmm1, [eax + 16]
- pmaddubsw xmm5, xmm3
- pmaddubsw xmm1, xmm3
- phaddw xmm5, xmm1
- psrlw xmm5, 7
- packuswb xmm5, xmm5 // 8 G values
- punpcklbw xmm0, xmm5 // 8 BG values
- movdqu xmm5, [eax] // R
- movdqu xmm1, [eax + 16]
- pmaddubsw xmm5, xmm4
- pmaddubsw xmm1, xmm4
- phaddw xmm5, xmm1
- psrlw xmm5, 7
- packuswb xmm5, xmm5 // 8 R values
- movdqu xmm6, [eax] // A
- movdqu xmm1, [eax + 16]
- psrld xmm6, 24
- psrld xmm1, 24
- packuswb xmm6, xmm1
- packuswb xmm6, xmm6 // 8 A values
- punpcklbw xmm5, xmm6 // 8 RA values
- movdqa xmm1, xmm0 // Weave BG, RA together
- punpcklwd xmm0, xmm5 // BGRA first 4
- punpckhwd xmm1, xmm5 // BGRA next 4
- movdqu [eax], xmm0
- movdqu [eax + 16], xmm1
- lea eax, [eax + 32]
- sub ecx, 8
- jg convertloop
- ret
- }
-}
-#endif // HAS_ARGBSEPIAROW_SSSE3
-
-#ifdef HAS_ARGBCOLORMATRIXROW_SSSE3
-// Tranform 8 ARGB pixels (32 bytes) with color matrix.
-// Same as Sepia except matrix is provided.
-// TODO(fbarchard): packuswbs only use half of the reg. To make RGBA, combine R
-// and B into a high and low, then G/A, unpackl/hbw and then unpckl/hwd.
-__declspec(naked)
-void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
- const int8* matrix_argb, int width) {
- __asm {
- mov eax, [esp + 4] /* src_argb */
- mov edx, [esp + 8] /* dst_argb */
- mov ecx, [esp + 12] /* matrix_argb */
- movdqu xmm5, [ecx]
- pshufd xmm2, xmm5, 0x00
- pshufd xmm3, xmm5, 0x55
- pshufd xmm4, xmm5, 0xaa
- pshufd xmm5, xmm5, 0xff
- mov ecx, [esp + 16] /* width */
-
- convertloop:
- movdqu xmm0, [eax] // B
- movdqu xmm7, [eax + 16]
- pmaddubsw xmm0, xmm2
- pmaddubsw xmm7, xmm2
- movdqu xmm6, [eax] // G
- movdqu xmm1, [eax + 16]
- pmaddubsw xmm6, xmm3
- pmaddubsw xmm1, xmm3
- phaddsw xmm0, xmm7 // B
- phaddsw xmm6, xmm1 // G
- psraw xmm0, 6 // B
- psraw xmm6, 6 // G
- packuswb xmm0, xmm0 // 8 B values
- packuswb xmm6, xmm6 // 8 G values
- punpcklbw xmm0, xmm6 // 8 BG values
- movdqu xmm1, [eax] // R
- movdqu xmm7, [eax + 16]
- pmaddubsw xmm1, xmm4
- pmaddubsw xmm7, xmm4
- phaddsw xmm1, xmm7 // R
- movdqu xmm6, [eax] // A
- movdqu xmm7, [eax + 16]
- pmaddubsw xmm6, xmm5
- pmaddubsw xmm7, xmm5
- phaddsw xmm6, xmm7 // A
- psraw xmm1, 6 // R
- psraw xmm6, 6 // A
- packuswb xmm1, xmm1 // 8 R values
- packuswb xmm6, xmm6 // 8 A values
- punpcklbw xmm1, xmm6 // 8 RA values
- movdqa xmm6, xmm0 // Weave BG, RA together
- punpcklwd xmm0, xmm1 // BGRA first 4
- punpckhwd xmm6, xmm1 // BGRA next 4
- movdqu [edx], xmm0
- movdqu [edx + 16], xmm6
- lea eax, [eax + 32]
- lea edx, [edx + 32]
- sub ecx, 8
- jg convertloop
- ret
- }
-}
-#endif // HAS_ARGBCOLORMATRIXROW_SSSE3
-
-#ifdef HAS_ARGBQUANTIZEROW_SSE2
-// Quantize 4 ARGB pixels (16 bytes).
-__declspec(naked)
-void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size,
- int interval_offset, int width) {
- __asm {
- mov eax, [esp + 4] /* dst_argb */
- movd xmm2, [esp + 8] /* scale */
- movd xmm3, [esp + 12] /* interval_size */
- movd xmm4, [esp + 16] /* interval_offset */
- mov ecx, [esp + 20] /* width */
- pshuflw xmm2, xmm2, 040h
- pshufd xmm2, xmm2, 044h
- pshuflw xmm3, xmm3, 040h
- pshufd xmm3, xmm3, 044h
- pshuflw xmm4, xmm4, 040h
- pshufd xmm4, xmm4, 044h
- pxor xmm5, xmm5 // constant 0
- pcmpeqb xmm6, xmm6 // generate mask 0xff000000
- pslld xmm6, 24
-
- convertloop:
- movdqu xmm0, [eax] // read 4 pixels
- punpcklbw xmm0, xmm5 // first 2 pixels
- pmulhuw xmm0, xmm2 // pixel * scale >> 16
- movdqu xmm1, [eax] // read 4 pixels
- punpckhbw xmm1, xmm5 // next 2 pixels
- pmulhuw xmm1, xmm2
- pmullw xmm0, xmm3 // * interval_size
- movdqu xmm7, [eax] // read 4 pixels
- pmullw xmm1, xmm3
- pand xmm7, xmm6 // mask alpha
- paddw xmm0, xmm4 // + interval_size / 2
- paddw xmm1, xmm4
- packuswb xmm0, xmm1
- por xmm0, xmm7
- movdqu [eax], xmm0
- lea eax, [eax + 16]
- sub ecx, 4
- jg convertloop
- ret
- }
-}
-#endif // HAS_ARGBQUANTIZEROW_SSE2
-
-#ifdef HAS_ARGBSHADEROW_SSE2
-// Shade 4 pixels at a time by specified value.
-__declspec(naked)
-void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
- uint32 value) {
- __asm {
- mov eax, [esp + 4] // src_argb
- mov edx, [esp + 8] // dst_argb
- mov ecx, [esp + 12] // width
- movd xmm2, [esp + 16] // value
- punpcklbw xmm2, xmm2
- punpcklqdq xmm2, xmm2
-
- convertloop:
- movdqu xmm0, [eax] // read 4 pixels
- lea eax, [eax + 16]
- movdqa xmm1, xmm0
- punpcklbw xmm0, xmm0 // first 2
- punpckhbw xmm1, xmm1 // next 2
- pmulhuw xmm0, xmm2 // argb * value
- pmulhuw xmm1, xmm2 // argb * value
- psrlw xmm0, 8
- psrlw xmm1, 8
- packuswb xmm0, xmm1
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- sub ecx, 4
- jg convertloop
-
- ret
- }
-}
-#endif // HAS_ARGBSHADEROW_SSE2
-
-#ifdef HAS_ARGBMULTIPLYROW_SSE2
-// Multiply 2 rows of ARGB pixels together, 4 pixels at a time.
-__declspec(naked)
-void ARGBMultiplyRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // src_argb0
- mov esi, [esp + 4 + 8] // src_argb1
- mov edx, [esp + 4 + 12] // dst_argb
- mov ecx, [esp + 4 + 16] // width
- pxor xmm5, xmm5 // constant 0
-
- convertloop:
- movdqu xmm0, [eax] // read 4 pixels from src_argb0
- movdqu xmm2, [esi] // read 4 pixels from src_argb1
- movdqu xmm1, xmm0
- movdqu xmm3, xmm2
- punpcklbw xmm0, xmm0 // first 2
- punpckhbw xmm1, xmm1 // next 2
- punpcklbw xmm2, xmm5 // first 2
- punpckhbw xmm3, xmm5 // next 2
- pmulhuw xmm0, xmm2 // src_argb0 * src_argb1 first 2
- pmulhuw xmm1, xmm3 // src_argb0 * src_argb1 next 2
- lea eax, [eax + 16]
- lea esi, [esi + 16]
- packuswb xmm0, xmm1
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- sub ecx, 4
- jg convertloop
-
- pop esi
- ret
- }
-}
-#endif // HAS_ARGBMULTIPLYROW_SSE2
-
-#ifdef HAS_ARGBADDROW_SSE2
-// Add 2 rows of ARGB pixels together, 4 pixels at a time.
-// TODO(fbarchard): Port this to posix, neon and other math functions.
-__declspec(naked)
-void ARGBAddRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // src_argb0
- mov esi, [esp + 4 + 8] // src_argb1
- mov edx, [esp + 4 + 12] // dst_argb
- mov ecx, [esp + 4 + 16] // width
-
- sub ecx, 4
- jl convertloop49
-
- convertloop4:
- movdqu xmm0, [eax] // read 4 pixels from src_argb0
- lea eax, [eax + 16]
- movdqu xmm1, [esi] // read 4 pixels from src_argb1
- lea esi, [esi + 16]
- paddusb xmm0, xmm1 // src_argb0 + src_argb1
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- sub ecx, 4
- jge convertloop4
-
- convertloop49:
- add ecx, 4 - 1
- jl convertloop19
-
- convertloop1:
- movd xmm0, [eax] // read 1 pixels from src_argb0
- lea eax, [eax + 4]
- movd xmm1, [esi] // read 1 pixels from src_argb1
- lea esi, [esi + 4]
- paddusb xmm0, xmm1 // src_argb0 + src_argb1
- movd [edx], xmm0
- lea edx, [edx + 4]
- sub ecx, 1
- jge convertloop1
-
- convertloop19:
- pop esi
- ret
- }
-}
-#endif // HAS_ARGBADDROW_SSE2
-
-#ifdef HAS_ARGBSUBTRACTROW_SSE2
-// Subtract 2 rows of ARGB pixels together, 4 pixels at a time.
-__declspec(naked)
-void ARGBSubtractRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // src_argb0
- mov esi, [esp + 4 + 8] // src_argb1
- mov edx, [esp + 4 + 12] // dst_argb
- mov ecx, [esp + 4 + 16] // width
-
- convertloop:
- movdqu xmm0, [eax] // read 4 pixels from src_argb0
- lea eax, [eax + 16]
- movdqu xmm1, [esi] // read 4 pixels from src_argb1
- lea esi, [esi + 16]
- psubusb xmm0, xmm1 // src_argb0 - src_argb1
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- sub ecx, 4
- jg convertloop
-
- pop esi
- ret
- }
-}
-#endif // HAS_ARGBSUBTRACTROW_SSE2
-
-#ifdef HAS_ARGBMULTIPLYROW_AVX2
-// Multiply 2 rows of ARGB pixels together, 8 pixels at a time.
-__declspec(naked)
-void ARGBMultiplyRow_AVX2(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // src_argb0
- mov esi, [esp + 4 + 8] // src_argb1
- mov edx, [esp + 4 + 12] // dst_argb
- mov ecx, [esp + 4 + 16] // width
- vpxor ymm5, ymm5, ymm5 // constant 0
-
- convertloop:
- vmovdqu ymm1, [eax] // read 8 pixels from src_argb0
- lea eax, [eax + 32]
- vmovdqu ymm3, [esi] // read 8 pixels from src_argb1
- lea esi, [esi + 32]
- vpunpcklbw ymm0, ymm1, ymm1 // low 4
- vpunpckhbw ymm1, ymm1, ymm1 // high 4
- vpunpcklbw ymm2, ymm3, ymm5 // low 4
- vpunpckhbw ymm3, ymm3, ymm5 // high 4
- vpmulhuw ymm0, ymm0, ymm2 // src_argb0 * src_argb1 low 4
- vpmulhuw ymm1, ymm1, ymm3 // src_argb0 * src_argb1 high 4
- vpackuswb ymm0, ymm0, ymm1
- vmovdqu [edx], ymm0
- lea edx, [edx + 32]
- sub ecx, 8
- jg convertloop
-
- pop esi
- vzeroupper
- ret
- }
-}
-#endif // HAS_ARGBMULTIPLYROW_AVX2
-
-#ifdef HAS_ARGBADDROW_AVX2
-// Add 2 rows of ARGB pixels together, 8 pixels at a time.
-__declspec(naked)
-void ARGBAddRow_AVX2(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // src_argb0
- mov esi, [esp + 4 + 8] // src_argb1
- mov edx, [esp + 4 + 12] // dst_argb
- mov ecx, [esp + 4 + 16] // width
-
- convertloop:
- vmovdqu ymm0, [eax] // read 8 pixels from src_argb0
- lea eax, [eax + 32]
- vpaddusb ymm0, ymm0, [esi] // add 8 pixels from src_argb1
- lea esi, [esi + 32]
- vmovdqu [edx], ymm0
- lea edx, [edx + 32]
- sub ecx, 8
- jg convertloop
-
- pop esi
- vzeroupper
- ret
- }
-}
-#endif // HAS_ARGBADDROW_AVX2
-
-#ifdef HAS_ARGBSUBTRACTROW_AVX2
-// Subtract 2 rows of ARGB pixels together, 8 pixels at a time.
-__declspec(naked)
-void ARGBSubtractRow_AVX2(const uint8* src_argb0, const uint8* src_argb1,
- uint8* dst_argb, int width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // src_argb0
- mov esi, [esp + 4 + 8] // src_argb1
- mov edx, [esp + 4 + 12] // dst_argb
- mov ecx, [esp + 4 + 16] // width
-
- convertloop:
- vmovdqu ymm0, [eax] // read 8 pixels from src_argb0
- lea eax, [eax + 32]
- vpsubusb ymm0, ymm0, [esi] // src_argb0 - src_argb1
- lea esi, [esi + 32]
- vmovdqu [edx], ymm0
- lea edx, [edx + 32]
- sub ecx, 8
- jg convertloop
-
- pop esi
- vzeroupper
- ret
- }
-}
-#endif // HAS_ARGBSUBTRACTROW_AVX2
-
-#ifdef HAS_SOBELXROW_SSE2
-// SobelX as a matrix is
-// -1 0 1
-// -2 0 2
-// -1 0 1
-__declspec(naked)
-void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1,
- const uint8* src_y2, uint8* dst_sobelx, int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // src_y0
- mov esi, [esp + 8 + 8] // src_y1
- mov edi, [esp + 8 + 12] // src_y2
- mov edx, [esp + 8 + 16] // dst_sobelx
- mov ecx, [esp + 8 + 20] // width
- sub esi, eax
- sub edi, eax
- sub edx, eax
- pxor xmm5, xmm5 // constant 0
-
- convertloop:
- movq xmm0, qword ptr [eax] // read 8 pixels from src_y0[0]
- movq xmm1, qword ptr [eax + 2] // read 8 pixels from src_y0[2]
- punpcklbw xmm0, xmm5
- punpcklbw xmm1, xmm5
- psubw xmm0, xmm1
- movq xmm1, qword ptr [eax + esi] // read 8 pixels from src_y1[0]
- movq xmm2, qword ptr [eax + esi + 2] // read 8 pixels from src_y1[2]
- punpcklbw xmm1, xmm5
- punpcklbw xmm2, xmm5
- psubw xmm1, xmm2
- movq xmm2, qword ptr [eax + edi] // read 8 pixels from src_y2[0]
- movq xmm3, qword ptr [eax + edi + 2] // read 8 pixels from src_y2[2]
- punpcklbw xmm2, xmm5
- punpcklbw xmm3, xmm5
- psubw xmm2, xmm3
- paddw xmm0, xmm2
- paddw xmm0, xmm1
- paddw xmm0, xmm1
- pxor xmm1, xmm1 // abs = max(xmm0, -xmm0). SSSE3 could use pabsw
- psubw xmm1, xmm0
- pmaxsw xmm0, xmm1
- packuswb xmm0, xmm0
- movq qword ptr [eax + edx], xmm0
- lea eax, [eax + 8]
- sub ecx, 8
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-#endif // HAS_SOBELXROW_SSE2
-
-#ifdef HAS_SOBELYROW_SSE2
-// SobelY as a matrix is
-// -1 -2 -1
-// 0 0 0
-// 1 2 1
-__declspec(naked)
-void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1,
- uint8* dst_sobely, int width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // src_y0
- mov esi, [esp + 4 + 8] // src_y1
- mov edx, [esp + 4 + 12] // dst_sobely
- mov ecx, [esp + 4 + 16] // width
- sub esi, eax
- sub edx, eax
- pxor xmm5, xmm5 // constant 0
-
- convertloop:
- movq xmm0, qword ptr [eax] // read 8 pixels from src_y0[0]
- movq xmm1, qword ptr [eax + esi] // read 8 pixels from src_y1[0]
- punpcklbw xmm0, xmm5
- punpcklbw xmm1, xmm5
- psubw xmm0, xmm1
- movq xmm1, qword ptr [eax + 1] // read 8 pixels from src_y0[1]
- movq xmm2, qword ptr [eax + esi + 1] // read 8 pixels from src_y1[1]
- punpcklbw xmm1, xmm5
- punpcklbw xmm2, xmm5
- psubw xmm1, xmm2
- movq xmm2, qword ptr [eax + 2] // read 8 pixels from src_y0[2]
- movq xmm3, qword ptr [eax + esi + 2] // read 8 pixels from src_y1[2]
- punpcklbw xmm2, xmm5
- punpcklbw xmm3, xmm5
- psubw xmm2, xmm3
- paddw xmm0, xmm2
- paddw xmm0, xmm1
- paddw xmm0, xmm1
- pxor xmm1, xmm1 // abs = max(xmm0, -xmm0). SSSE3 could use pabsw
- psubw xmm1, xmm0
- pmaxsw xmm0, xmm1
- packuswb xmm0, xmm0
- movq qword ptr [eax + edx], xmm0
- lea eax, [eax + 8]
- sub ecx, 8
- jg convertloop
-
- pop esi
- ret
- }
-}
-#endif // HAS_SOBELYROW_SSE2
-
-#ifdef HAS_SOBELROW_SSE2
-// Adds Sobel X and Sobel Y and stores Sobel into ARGB.
-// A = 255
-// R = Sobel
-// G = Sobel
-// B = Sobel
-__declspec(naked)
-void SobelRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_argb, int width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // src_sobelx
- mov esi, [esp + 4 + 8] // src_sobely
- mov edx, [esp + 4 + 12] // dst_argb
- mov ecx, [esp + 4 + 16] // width
- sub esi, eax
- pcmpeqb xmm5, xmm5 // alpha 255
- pslld xmm5, 24 // 0xff000000
-
- convertloop:
- movdqu xmm0, [eax] // read 16 pixels src_sobelx
- movdqu xmm1, [eax + esi] // read 16 pixels src_sobely
- lea eax, [eax + 16]
- paddusb xmm0, xmm1 // sobel = sobelx + sobely
- movdqa xmm2, xmm0 // GG
- punpcklbw xmm2, xmm0 // First 8
- punpckhbw xmm0, xmm0 // Next 8
- movdqa xmm1, xmm2 // GGGG
- punpcklwd xmm1, xmm2 // First 4
- punpckhwd xmm2, xmm2 // Next 4
- por xmm1, xmm5 // GGGA
- por xmm2, xmm5
- movdqa xmm3, xmm0 // GGGG
- punpcklwd xmm3, xmm0 // Next 4
- punpckhwd xmm0, xmm0 // Last 4
- por xmm3, xmm5 // GGGA
- por xmm0, xmm5
- movdqu [edx], xmm1
- movdqu [edx + 16], xmm2
- movdqu [edx + 32], xmm3
- movdqu [edx + 48], xmm0
- lea edx, [edx + 64]
- sub ecx, 16
- jg convertloop
-
- pop esi
- ret
- }
-}
-#endif // HAS_SOBELROW_SSE2
-
-#ifdef HAS_SOBELTOPLANEROW_SSE2
-// Adds Sobel X and Sobel Y and stores Sobel into a plane.
-__declspec(naked)
-void SobelToPlaneRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_y, int width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // src_sobelx
- mov esi, [esp + 4 + 8] // src_sobely
- mov edx, [esp + 4 + 12] // dst_argb
- mov ecx, [esp + 4 + 16] // width
- sub esi, eax
-
- convertloop:
- movdqu xmm0, [eax] // read 16 pixels src_sobelx
- movdqu xmm1, [eax + esi] // read 16 pixels src_sobely
- lea eax, [eax + 16]
- paddusb xmm0, xmm1 // sobel = sobelx + sobely
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- sub ecx, 16
- jg convertloop
-
- pop esi
- ret
- }
-}
-#endif // HAS_SOBELTOPLANEROW_SSE2
-
-#ifdef HAS_SOBELXYROW_SSE2
-// Mixes Sobel X, Sobel Y and Sobel into ARGB.
-// A = 255
-// R = Sobel X
-// G = Sobel
-// B = Sobel Y
-__declspec(naked)
-void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
- uint8* dst_argb, int width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // src_sobelx
- mov esi, [esp + 4 + 8] // src_sobely
- mov edx, [esp + 4 + 12] // dst_argb
- mov ecx, [esp + 4 + 16] // width
- sub esi, eax
- pcmpeqb xmm5, xmm5 // alpha 255
-
- convertloop:
- movdqu xmm0, [eax] // read 16 pixels src_sobelx
- movdqu xmm1, [eax + esi] // read 16 pixels src_sobely
- lea eax, [eax + 16]
- movdqa xmm2, xmm0
- paddusb xmm2, xmm1 // sobel = sobelx + sobely
- movdqa xmm3, xmm0 // XA
- punpcklbw xmm3, xmm5
- punpckhbw xmm0, xmm5
- movdqa xmm4, xmm1 // YS
- punpcklbw xmm4, xmm2
- punpckhbw xmm1, xmm2
- movdqa xmm6, xmm4 // YSXA
- punpcklwd xmm6, xmm3 // First 4
- punpckhwd xmm4, xmm3 // Next 4
- movdqa xmm7, xmm1 // YSXA
- punpcklwd xmm7, xmm0 // Next 4
- punpckhwd xmm1, xmm0 // Last 4
- movdqu [edx], xmm6
- movdqu [edx + 16], xmm4
- movdqu [edx + 32], xmm7
- movdqu [edx + 48], xmm1
- lea edx, [edx + 64]
- sub ecx, 16
- jg convertloop
-
- pop esi
- ret
- }
-}
-#endif // HAS_SOBELXYROW_SSE2
-
-#ifdef HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
-// Consider float CumulativeSum.
-// Consider calling CumulativeSum one row at time as needed.
-// Consider circular CumulativeSum buffer of radius * 2 + 1 height.
-// Convert cumulative sum for an area to an average for 1 pixel.
-// topleft is pointer to top left of CumulativeSum buffer for area.
-// botleft is pointer to bottom left of CumulativeSum buffer.
-// width is offset from left to right of area in CumulativeSum buffer measured
-// in number of ints.
-// area is the number of pixels in the area being averaged.
-// dst points to pixel to store result to.
-// count is number of averaged pixels to produce.
-// Does 4 pixels at a time.
-void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
- int width, int area, uint8* dst,
- int count) {
- __asm {
- mov eax, topleft // eax topleft
- mov esi, botleft // esi botleft
- mov edx, width
- movd xmm5, area
- mov edi, dst
- mov ecx, count
- cvtdq2ps xmm5, xmm5
- rcpss xmm4, xmm5 // 1.0f / area
- pshufd xmm4, xmm4, 0
- sub ecx, 4
- jl l4b
-
- cmp area, 128 // 128 pixels will not overflow 15 bits.
- ja l4
-
- pshufd xmm5, xmm5, 0 // area
- pcmpeqb xmm6, xmm6 // constant of 65536.0 - 1 = 65535.0
- psrld xmm6, 16
- cvtdq2ps xmm6, xmm6
- addps xmm5, xmm6 // (65536.0 + area - 1)
- mulps xmm5, xmm4 // (65536.0 + area - 1) * 1 / area
- cvtps2dq xmm5, xmm5 // 0.16 fixed point
- packssdw xmm5, xmm5 // 16 bit shorts
-
- // 4 pixel loop small blocks.
- s4:
- // top left
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- movdqu xmm2, [eax + 32]
- movdqu xmm3, [eax + 48]
-
- // - top right
- psubd xmm0, [eax + edx * 4]
- psubd xmm1, [eax + edx * 4 + 16]
- psubd xmm2, [eax + edx * 4 + 32]
- psubd xmm3, [eax + edx * 4 + 48]
- lea eax, [eax + 64]
-
- // - bottom left
- psubd xmm0, [esi]
- psubd xmm1, [esi + 16]
- psubd xmm2, [esi + 32]
- psubd xmm3, [esi + 48]
-
- // + bottom right
- paddd xmm0, [esi + edx * 4]
- paddd xmm1, [esi + edx * 4 + 16]
- paddd xmm2, [esi + edx * 4 + 32]
- paddd xmm3, [esi + edx * 4 + 48]
- lea esi, [esi + 64]
-
- packssdw xmm0, xmm1 // pack 4 pixels into 2 registers
- packssdw xmm2, xmm3
-
- pmulhuw xmm0, xmm5
- pmulhuw xmm2, xmm5
-
- packuswb xmm0, xmm2
- movdqu [edi], xmm0
- lea edi, [edi + 16]
- sub ecx, 4
- jge s4
-
- jmp l4b
-
- // 4 pixel loop
- l4:
- // top left
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- movdqu xmm2, [eax + 32]
- movdqu xmm3, [eax + 48]
-
- // - top right
- psubd xmm0, [eax + edx * 4]
- psubd xmm1, [eax + edx * 4 + 16]
- psubd xmm2, [eax + edx * 4 + 32]
- psubd xmm3, [eax + edx * 4 + 48]
- lea eax, [eax + 64]
-
- // - bottom left
- psubd xmm0, [esi]
- psubd xmm1, [esi + 16]
- psubd xmm2, [esi + 32]
- psubd xmm3, [esi + 48]
-
- // + bottom right
- paddd xmm0, [esi + edx * 4]
- paddd xmm1, [esi + edx * 4 + 16]
- paddd xmm2, [esi + edx * 4 + 32]
- paddd xmm3, [esi + edx * 4 + 48]
- lea esi, [esi + 64]
-
- cvtdq2ps xmm0, xmm0 // Average = Sum * 1 / Area
- cvtdq2ps xmm1, xmm1
- mulps xmm0, xmm4
- mulps xmm1, xmm4
- cvtdq2ps xmm2, xmm2
- cvtdq2ps xmm3, xmm3
- mulps xmm2, xmm4
- mulps xmm3, xmm4
- cvtps2dq xmm0, xmm0
- cvtps2dq xmm1, xmm1
- cvtps2dq xmm2, xmm2
- cvtps2dq xmm3, xmm3
- packssdw xmm0, xmm1
- packssdw xmm2, xmm3
- packuswb xmm0, xmm2
- movdqu [edi], xmm0
- lea edi, [edi + 16]
- sub ecx, 4
- jge l4
-
- l4b:
- add ecx, 4 - 1
- jl l1b
-
- // 1 pixel loop
- l1:
- movdqu xmm0, [eax]
- psubd xmm0, [eax + edx * 4]
- lea eax, [eax + 16]
- psubd xmm0, [esi]
- paddd xmm0, [esi + edx * 4]
- lea esi, [esi + 16]
- cvtdq2ps xmm0, xmm0
- mulps xmm0, xmm4
- cvtps2dq xmm0, xmm0
- packssdw xmm0, xmm0
- packuswb xmm0, xmm0
- movd dword ptr [edi], xmm0
- lea edi, [edi + 4]
- sub ecx, 1
- jge l1
- l1b:
- }
-}
-#endif // HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
-
-#ifdef HAS_COMPUTECUMULATIVESUMROW_SSE2
-// Creates a table of cumulative sums where each value is a sum of all values
-// above and to the left of the value.
-void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum,
- const int32* previous_cumsum, int width) {
- __asm {
- mov eax, row
- mov edx, cumsum
- mov esi, previous_cumsum
- mov ecx, width
- pxor xmm0, xmm0
- pxor xmm1, xmm1
-
- sub ecx, 4
- jl l4b
- test edx, 15
- jne l4b
-
- // 4 pixel loop
- l4:
- movdqu xmm2, [eax] // 4 argb pixels 16 bytes.
- lea eax, [eax + 16]
- movdqa xmm4, xmm2
-
- punpcklbw xmm2, xmm1
- movdqa xmm3, xmm2
- punpcklwd xmm2, xmm1
- punpckhwd xmm3, xmm1
-
- punpckhbw xmm4, xmm1
- movdqa xmm5, xmm4
- punpcklwd xmm4, xmm1
- punpckhwd xmm5, xmm1
-
- paddd xmm0, xmm2
- movdqu xmm2, [esi] // previous row above.
- paddd xmm2, xmm0
-
- paddd xmm0, xmm3
- movdqu xmm3, [esi + 16]
- paddd xmm3, xmm0
-
- paddd xmm0, xmm4
- movdqu xmm4, [esi + 32]
- paddd xmm4, xmm0
-
- paddd xmm0, xmm5
- movdqu xmm5, [esi + 48]
- lea esi, [esi + 64]
- paddd xmm5, xmm0
-
- movdqu [edx], xmm2
- movdqu [edx + 16], xmm3
- movdqu [edx + 32], xmm4
- movdqu [edx + 48], xmm5
-
- lea edx, [edx + 64]
- sub ecx, 4
- jge l4
-
- l4b:
- add ecx, 4 - 1
- jl l1b
-
- // 1 pixel loop
- l1:
- movd xmm2, dword ptr [eax] // 1 argb pixel 4 bytes.
- lea eax, [eax + 4]
- punpcklbw xmm2, xmm1
- punpcklwd xmm2, xmm1
- paddd xmm0, xmm2
- movdqu xmm2, [esi]
- lea esi, [esi + 16]
- paddd xmm2, xmm0
- movdqu [edx], xmm2
- lea edx, [edx + 16]
- sub ecx, 1
- jge l1
-
- l1b:
- }
-}
-#endif // HAS_COMPUTECUMULATIVESUMROW_SSE2
-
-#ifdef HAS_ARGBAFFINEROW_SSE2
-// Copy ARGB pixels from source image with slope to a row of destination.
-__declspec(naked)
-LIBYUV_API
-void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
- uint8* dst_argb, const float* uv_dudv, int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 12] // src_argb
- mov esi, [esp + 16] // stride
- mov edx, [esp + 20] // dst_argb
- mov ecx, [esp + 24] // pointer to uv_dudv
- movq xmm2, qword ptr [ecx] // uv
- movq xmm7, qword ptr [ecx + 8] // dudv
- mov ecx, [esp + 28] // width
- shl esi, 16 // 4, stride
- add esi, 4
- movd xmm5, esi
- sub ecx, 4
- jl l4b
-
- // setup for 4 pixel loop
- pshufd xmm7, xmm7, 0x44 // dup dudv
- pshufd xmm5, xmm5, 0 // dup 4, stride
- movdqa xmm0, xmm2 // x0, y0, x1, y1
- addps xmm0, xmm7
- movlhps xmm2, xmm0
- movdqa xmm4, xmm7
- addps xmm4, xmm4 // dudv *= 2
- movdqa xmm3, xmm2 // x2, y2, x3, y3
- addps xmm3, xmm4
- addps xmm4, xmm4 // dudv *= 4
-
- // 4 pixel loop
- l4:
- cvttps2dq xmm0, xmm2 // x, y float to int first 2
- cvttps2dq xmm1, xmm3 // x, y float to int next 2
- packssdw xmm0, xmm1 // x, y as 8 shorts
- pmaddwd xmm0, xmm5 // offsets = x * 4 + y * stride.
- movd esi, xmm0
- pshufd xmm0, xmm0, 0x39 // shift right
- movd edi, xmm0
- pshufd xmm0, xmm0, 0x39 // shift right
- movd xmm1, [eax + esi] // read pixel 0
- movd xmm6, [eax + edi] // read pixel 1
- punpckldq xmm1, xmm6 // combine pixel 0 and 1
- addps xmm2, xmm4 // x, y += dx, dy first 2
- movq qword ptr [edx], xmm1
- movd esi, xmm0
- pshufd xmm0, xmm0, 0x39 // shift right
- movd edi, xmm0
- movd xmm6, [eax + esi] // read pixel 2
- movd xmm0, [eax + edi] // read pixel 3
- punpckldq xmm6, xmm0 // combine pixel 2 and 3
- addps xmm3, xmm4 // x, y += dx, dy next 2
- movq qword ptr 8[edx], xmm6
- lea edx, [edx + 16]
- sub ecx, 4
- jge l4
-
- l4b:
- add ecx, 4 - 1
- jl l1b
-
- // 1 pixel loop
- l1:
- cvttps2dq xmm0, xmm2 // x, y float to int
- packssdw xmm0, xmm0 // x, y as shorts
- pmaddwd xmm0, xmm5 // offset = x * 4 + y * stride
- addps xmm2, xmm7 // x, y += dx, dy
- movd esi, xmm0
- movd xmm0, [eax + esi] // copy a pixel
- movd [edx], xmm0
- lea edx, [edx + 4]
- sub ecx, 1
- jge l1
- l1b:
- pop edi
- pop esi
- ret
- }
-}
-#endif // HAS_ARGBAFFINEROW_SSE2
-
-#ifdef HAS_INTERPOLATEROW_AVX2
-// Bilinear filter 32x2 -> 32x1
-__declspec(naked)
-void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride, int dst_width,
- int source_y_fraction) {
- __asm {
- push esi
- push edi
- mov edi, [esp + 8 + 4] // dst_ptr
- mov esi, [esp + 8 + 8] // src_ptr
- mov edx, [esp + 8 + 12] // src_stride
- mov ecx, [esp + 8 + 16] // dst_width
- mov eax, [esp + 8 + 20] // source_y_fraction (0..255)
- shr eax, 1
- // Dispatch to specialized filters if applicable.
- cmp eax, 0
- je xloop100 // 0 / 128. Blend 100 / 0.
- sub edi, esi
- cmp eax, 32
- je xloop75 // 32 / 128 is 0.25. Blend 75 / 25.
- cmp eax, 64
- je xloop50 // 64 / 128 is 0.50. Blend 50 / 50.
- cmp eax, 96
- je xloop25 // 96 / 128 is 0.75. Blend 25 / 75.
-
- vmovd xmm0, eax // high fraction 0..127
- neg eax
- add eax, 128
- vmovd xmm5, eax // low fraction 128..1
- vpunpcklbw xmm5, xmm5, xmm0
- vpunpcklwd xmm5, xmm5, xmm5
- vpxor ymm0, ymm0, ymm0
- vpermd ymm5, ymm0, ymm5
-
- xloop:
- vmovdqu ymm0, [esi]
- vmovdqu ymm2, [esi + edx]
- vpunpckhbw ymm1, ymm0, ymm2 // mutates
- vpunpcklbw ymm0, ymm0, ymm2 // mutates
- vpmaddubsw ymm0, ymm0, ymm5
- vpmaddubsw ymm1, ymm1, ymm5
- vpsrlw ymm0, ymm0, 7
- vpsrlw ymm1, ymm1, 7
- vpackuswb ymm0, ymm0, ymm1 // unmutates
- vmovdqu [esi + edi], ymm0
- lea esi, [esi + 32]
- sub ecx, 32
- jg xloop
- jmp xloop99
-
- // Blend 25 / 75.
- xloop25:
- vmovdqu ymm0, [esi]
- vmovdqu ymm1, [esi + edx]
- vpavgb ymm0, ymm0, ymm1
- vpavgb ymm0, ymm0, ymm1
- vmovdqu [esi + edi], ymm0
- lea esi, [esi + 32]
- sub ecx, 32
- jg xloop25
- jmp xloop99
-
- // Blend 50 / 50.
- xloop50:
- vmovdqu ymm0, [esi]
- vpavgb ymm0, ymm0, [esi + edx]
- vmovdqu [esi + edi], ymm0
- lea esi, [esi + 32]
- sub ecx, 32
- jg xloop50
- jmp xloop99
-
- // Blend 75 / 25.
- xloop75:
- vmovdqu ymm1, [esi]
- vmovdqu ymm0, [esi + edx]
- vpavgb ymm0, ymm0, ymm1
- vpavgb ymm0, ymm0, ymm1
- vmovdqu [esi + edi], ymm0
- lea esi, [esi + 32]
- sub ecx, 32
- jg xloop75
- jmp xloop99
-
- // Blend 100 / 0 - Copy row unchanged.
- xloop100:
- rep movsb
-
- xloop99:
- pop edi
- pop esi
- vzeroupper
- ret
- }
-}
-#endif // HAS_INTERPOLATEROW_AVX2
-
-// Bilinear filter 16x2 -> 16x1
-__declspec(naked)
-void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride, int dst_width,
- int source_y_fraction) {
- __asm {
- push esi
- push edi
- mov edi, [esp + 8 + 4] // dst_ptr
- mov esi, [esp + 8 + 8] // src_ptr
- mov edx, [esp + 8 + 12] // src_stride
- mov ecx, [esp + 8 + 16] // dst_width
- mov eax, [esp + 8 + 20] // source_y_fraction (0..255)
- sub edi, esi
- shr eax, 1
- // Dispatch to specialized filters if applicable.
- cmp eax, 0
- je xloop100 // 0 / 128. Blend 100 / 0.
- cmp eax, 32
- je xloop75 // 32 / 128 is 0.25. Blend 75 / 25.
- cmp eax, 64
- je xloop50 // 64 / 128 is 0.50. Blend 50 / 50.
- cmp eax, 96
- je xloop25 // 96 / 128 is 0.75. Blend 25 / 75.
-
- movd xmm0, eax // high fraction 0..127
- neg eax
- add eax, 128
- movd xmm5, eax // low fraction 128..1
- punpcklbw xmm5, xmm0
- punpcklwd xmm5, xmm5
- pshufd xmm5, xmm5, 0
-
- xloop:
- movdqu xmm0, [esi]
- movdqu xmm2, [esi + edx]
- movdqu xmm1, xmm0
- punpcklbw xmm0, xmm2
- punpckhbw xmm1, xmm2
- pmaddubsw xmm0, xmm5
- pmaddubsw xmm1, xmm5
- psrlw xmm0, 7
- psrlw xmm1, 7
- packuswb xmm0, xmm1
- movdqu [esi + edi], xmm0
- lea esi, [esi + 16]
- sub ecx, 16
- jg xloop
- jmp xloop99
-
- // Blend 25 / 75.
- xloop25:
- movdqu xmm0, [esi]
- movdqu xmm1, [esi + edx]
- pavgb xmm0, xmm1
- pavgb xmm0, xmm1
- movdqu [esi + edi], xmm0
- lea esi, [esi + 16]
- sub ecx, 16
- jg xloop25
- jmp xloop99
-
- // Blend 50 / 50.
- xloop50:
- movdqu xmm0, [esi]
- movdqu xmm1, [esi + edx]
- pavgb xmm0, xmm1
- movdqu [esi + edi], xmm0
- lea esi, [esi + 16]
- sub ecx, 16
- jg xloop50
- jmp xloop99
-
- // Blend 75 / 25.
- xloop75:
- movdqu xmm1, [esi]
- movdqu xmm0, [esi + edx]
- pavgb xmm0, xmm1
- pavgb xmm0, xmm1
- movdqu [esi + edi], xmm0
- lea esi, [esi + 16]
- sub ecx, 16
- jg xloop75
- jmp xloop99
-
- // Blend 100 / 0 - Copy row unchanged.
- xloop100:
- movdqu xmm0, [esi]
- movdqu [esi + edi], xmm0
- lea esi, [esi + 16]
- sub ecx, 16
- jg xloop100
-
- xloop99:
- pop edi
- pop esi
- ret
- }
-}
-
-#ifdef HAS_INTERPOLATEROW_SSE2
-// Bilinear filter 16x2 -> 16x1
-__declspec(naked)
-void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride, int dst_width,
- int source_y_fraction) {
- __asm {
- push esi
- push edi
- mov edi, [esp + 8 + 4] // dst_ptr
- mov esi, [esp + 8 + 8] // src_ptr
- mov edx, [esp + 8 + 12] // src_stride
- mov ecx, [esp + 8 + 16] // dst_width
- mov eax, [esp + 8 + 20] // source_y_fraction (0..255)
- sub edi, esi
- // Dispatch to specialized filters if applicable.
- cmp eax, 0
- je xloop100 // 0 / 256. Blend 100 / 0.
- cmp eax, 64
- je xloop75 // 64 / 256 is 0.25. Blend 75 / 25.
- cmp eax, 128
- je xloop50 // 128 / 256 is 0.50. Blend 50 / 50.
- cmp eax, 192
- je xloop25 // 192 / 256 is 0.75. Blend 25 / 75.
-
- movd xmm5, eax // xmm5 = y fraction
- punpcklbw xmm5, xmm5
- psrlw xmm5, 1
- punpcklwd xmm5, xmm5
- punpckldq xmm5, xmm5
- punpcklqdq xmm5, xmm5
- pxor xmm4, xmm4
-
- xloop:
- movdqu xmm0, [esi] // row0
- movdqu xmm2, [esi + edx] // row1
- movdqu xmm1, xmm0
- movdqu xmm3, xmm2
- punpcklbw xmm2, xmm4
- punpckhbw xmm3, xmm4
- punpcklbw xmm0, xmm4
- punpckhbw xmm1, xmm4
- psubw xmm2, xmm0 // row1 - row0
- psubw xmm3, xmm1
- paddw xmm2, xmm2 // 9 bits * 15 bits = 8.16
- paddw xmm3, xmm3
- pmulhw xmm2, xmm5 // scale diff
- pmulhw xmm3, xmm5
- paddw xmm0, xmm2 // sum rows
- paddw xmm1, xmm3
- packuswb xmm0, xmm1
- movdqu [esi + edi], xmm0
- lea esi, [esi + 16]
- sub ecx, 16
- jg xloop
- jmp xloop99
-
- // Blend 25 / 75.
- xloop25:
- movdqu xmm0, [esi]
- movdqu xmm1, [esi + edx]
- pavgb xmm0, xmm1
- pavgb xmm0, xmm1
- movdqu [esi + edi], xmm0
- lea esi, [esi + 16]
- sub ecx, 16
- jg xloop25
- jmp xloop99
-
- // Blend 50 / 50.
- xloop50:
- movdqu xmm0, [esi]
- movdqu xmm1, [esi + edx]
- pavgb xmm0, xmm1
- movdqu [esi + edi], xmm0
- lea esi, [esi + 16]
- sub ecx, 16
- jg xloop50
- jmp xloop99
-
- // Blend 75 / 25.
- xloop75:
- movdqu xmm1, [esi]
- movdqu xmm0, [esi + edx]
- pavgb xmm0, xmm1
- pavgb xmm0, xmm1
- movdqu [esi + edi], xmm0
- lea esi, [esi + 16]
- sub ecx, 16
- jg xloop75
- jmp xloop99
-
- // Blend 100 / 0 - Copy row unchanged.
- xloop100:
- movdqu xmm0, [esi]
- movdqu [esi + edi], xmm0
- lea esi, [esi + 16]
- sub ecx, 16
- jg xloop100
-
- xloop99:
- pop edi
- pop esi
- ret
- }
-}
-#endif // HAS_INTERPOLATEROW_SSE2
-
-// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
-__declspec(naked)
-void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix) {
- __asm {
- mov eax, [esp + 4] // src_argb
- mov edx, [esp + 8] // dst_argb
- mov ecx, [esp + 12] // shuffler
- movdqu xmm5, [ecx]
- mov ecx, [esp + 16] // pix
-
- wloop:
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- lea eax, [eax + 32]
- pshufb xmm0, xmm5
- pshufb xmm1, xmm5
- movdqu [edx], xmm0
- movdqu [edx + 16], xmm1
- lea edx, [edx + 32]
- sub ecx, 8
- jg wloop
- ret
- }
-}
-
-#ifdef HAS_ARGBSHUFFLEROW_AVX2
-__declspec(naked)
-void ARGBShuffleRow_AVX2(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix) {
- __asm {
- mov eax, [esp + 4] // src_argb
- mov edx, [esp + 8] // dst_argb
- mov ecx, [esp + 12] // shuffler
- vbroadcastf128 ymm5, [ecx] // same shuffle in high as low.
- mov ecx, [esp + 16] // pix
-
- wloop:
- vmovdqu ymm0, [eax]
- vmovdqu ymm1, [eax + 32]
- lea eax, [eax + 64]
- vpshufb ymm0, ymm0, ymm5
- vpshufb ymm1, ymm1, ymm5
- vmovdqu [edx], ymm0
- vmovdqu [edx + 32], ymm1
- lea edx, [edx + 64]
- sub ecx, 16
- jg wloop
-
- vzeroupper
- ret
- }
-}
-#endif // HAS_ARGBSHUFFLEROW_AVX2
-
-__declspec(naked)
-void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb,
- const uint8* shuffler, int pix) {
- __asm {
- push ebx
- push esi
- mov eax, [esp + 8 + 4] // src_argb
- mov edx, [esp + 8 + 8] // dst_argb
- mov esi, [esp + 8 + 12] // shuffler
- mov ecx, [esp + 8 + 16] // pix
- pxor xmm5, xmm5
-
- mov ebx, [esi] // shuffler
- cmp ebx, 0x03000102
- je shuf_3012
- cmp ebx, 0x00010203
- je shuf_0123
- cmp ebx, 0x00030201
- je shuf_0321
- cmp ebx, 0x02010003
- je shuf_2103
-
- // TODO(fbarchard): Use one source pointer and 3 offsets.
- shuf_any1:
- movzx ebx, byte ptr [esi]
- movzx ebx, byte ptr [eax + ebx]
- mov [edx], bl
- movzx ebx, byte ptr [esi + 1]
- movzx ebx, byte ptr [eax + ebx]
- mov [edx + 1], bl
- movzx ebx, byte ptr [esi + 2]
- movzx ebx, byte ptr [eax + ebx]
- mov [edx + 2], bl
- movzx ebx, byte ptr [esi + 3]
- movzx ebx, byte ptr [eax + ebx]
- mov [edx + 3], bl
- lea eax, [eax + 4]
- lea edx, [edx + 4]
- sub ecx, 1
- jg shuf_any1
- jmp shuf99
-
- shuf_0123:
- movdqu xmm0, [eax]
- lea eax, [eax + 16]
- movdqa xmm1, xmm0
- punpcklbw xmm0, xmm5
- punpckhbw xmm1, xmm5
- pshufhw xmm0, xmm0, 01Bh // 1B = 00011011 = 0x0123 = BGRAToARGB
- pshuflw xmm0, xmm0, 01Bh
- pshufhw xmm1, xmm1, 01Bh
- pshuflw xmm1, xmm1, 01Bh
- packuswb xmm0, xmm1
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- sub ecx, 4
- jg shuf_0123
- jmp shuf99
-
- shuf_0321:
- movdqu xmm0, [eax]
- lea eax, [eax + 16]
- movdqa xmm1, xmm0
- punpcklbw xmm0, xmm5
- punpckhbw xmm1, xmm5
- pshufhw xmm0, xmm0, 039h // 39 = 00111001 = 0x0321 = RGBAToARGB
- pshuflw xmm0, xmm0, 039h
- pshufhw xmm1, xmm1, 039h
- pshuflw xmm1, xmm1, 039h
- packuswb xmm0, xmm1
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- sub ecx, 4
- jg shuf_0321
- jmp shuf99
-
- shuf_2103:
- movdqu xmm0, [eax]
- lea eax, [eax + 16]
- movdqa xmm1, xmm0
- punpcklbw xmm0, xmm5
- punpckhbw xmm1, xmm5
- pshufhw xmm0, xmm0, 093h // 93 = 10010011 = 0x2103 = ARGBToRGBA
- pshuflw xmm0, xmm0, 093h
- pshufhw xmm1, xmm1, 093h
- pshuflw xmm1, xmm1, 093h
- packuswb xmm0, xmm1
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- sub ecx, 4
- jg shuf_2103
- jmp shuf99
-
- shuf_3012:
- movdqu xmm0, [eax]
- lea eax, [eax + 16]
- movdqa xmm1, xmm0
- punpcklbw xmm0, xmm5
- punpckhbw xmm1, xmm5
- pshufhw xmm0, xmm0, 0C6h // C6 = 11000110 = 0x3012 = ABGRToARGB
- pshuflw xmm0, xmm0, 0C6h
- pshufhw xmm1, xmm1, 0C6h
- pshuflw xmm1, xmm1, 0C6h
- packuswb xmm0, xmm1
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- sub ecx, 4
- jg shuf_3012
-
- shuf99:
- pop esi
- pop ebx
- ret
- }
-}
-
-// YUY2 - Macro-pixel = 2 image pixels
-// Y0U0Y1V0....Y2U2Y3V2...Y4U4Y5V4....
-
-// UYVY - Macro-pixel = 2 image pixels
-// U0Y0V0Y1
-
-__declspec(naked)
-void I422ToYUY2Row_SSE2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_frame, int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // src_y
- mov esi, [esp + 8 + 8] // src_u
- mov edx, [esp + 8 + 12] // src_v
- mov edi, [esp + 8 + 16] // dst_frame
- mov ecx, [esp + 8 + 20] // width
- sub edx, esi
-
- convertloop:
- movq xmm2, qword ptr [esi] // U
- movq xmm3, qword ptr [esi + edx] // V
- lea esi, [esi + 8]
- punpcklbw xmm2, xmm3 // UV
- movdqu xmm0, [eax] // Y
- lea eax, [eax + 16]
- movdqa xmm1, xmm0
- punpcklbw xmm0, xmm2 // YUYV
- punpckhbw xmm1, xmm2
- movdqu [edi], xmm0
- movdqu [edi + 16], xmm1
- lea edi, [edi + 32]
- sub ecx, 16
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-__declspec(naked)
-void I422ToUYVYRow_SSE2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_frame, int width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // src_y
- mov esi, [esp + 8 + 8] // src_u
- mov edx, [esp + 8 + 12] // src_v
- mov edi, [esp + 8 + 16] // dst_frame
- mov ecx, [esp + 8 + 20] // width
- sub edx, esi
-
- convertloop:
- movq xmm2, qword ptr [esi] // U
- movq xmm3, qword ptr [esi + edx] // V
- lea esi, [esi + 8]
- punpcklbw xmm2, xmm3 // UV
- movdqu xmm0, [eax] // Y
- movdqa xmm1, xmm2
- lea eax, [eax + 16]
- punpcklbw xmm1, xmm0 // UYVY
- punpckhbw xmm2, xmm0
- movdqu [edi], xmm1
- movdqu [edi + 16], xmm2
- lea edi, [edi + 32]
- sub ecx, 16
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-#ifdef HAS_ARGBPOLYNOMIALROW_SSE2
-__declspec(naked)
-void ARGBPolynomialRow_SSE2(const uint8* src_argb,
- uint8* dst_argb, const float* poly,
- int width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] /* src_argb */
- mov edx, [esp + 4 + 8] /* dst_argb */
- mov esi, [esp + 4 + 12] /* poly */
- mov ecx, [esp + 4 + 16] /* width */
- pxor xmm3, xmm3 // 0 constant for zero extending bytes to ints.
-
- // 2 pixel loop.
- convertloop:
-// pmovzxbd xmm0, dword ptr [eax] // BGRA pixel
-// pmovzxbd xmm4, dword ptr [eax + 4] // BGRA pixel
- movq xmm0, qword ptr [eax] // BGRABGRA
- lea eax, [eax + 8]
- punpcklbw xmm0, xmm3
- movdqa xmm4, xmm0
- punpcklwd xmm0, xmm3 // pixel 0
- punpckhwd xmm4, xmm3 // pixel 1
- cvtdq2ps xmm0, xmm0 // 4 floats
- cvtdq2ps xmm4, xmm4
- movdqa xmm1, xmm0 // X
- movdqa xmm5, xmm4
- mulps xmm0, [esi + 16] // C1 * X
- mulps xmm4, [esi + 16]
- addps xmm0, [esi] // result = C0 + C1 * X
- addps xmm4, [esi]
- movdqa xmm2, xmm1
- movdqa xmm6, xmm5
- mulps xmm2, xmm1 // X * X
- mulps xmm6, xmm5
- mulps xmm1, xmm2 // X * X * X
- mulps xmm5, xmm6
- mulps xmm2, [esi + 32] // C2 * X * X
- mulps xmm6, [esi + 32]
- mulps xmm1, [esi + 48] // C3 * X * X * X
- mulps xmm5, [esi + 48]
- addps xmm0, xmm2 // result += C2 * X * X
- addps xmm4, xmm6
- addps xmm0, xmm1 // result += C3 * X * X * X
- addps xmm4, xmm5
- cvttps2dq xmm0, xmm0
- cvttps2dq xmm4, xmm4
- packuswb xmm0, xmm4
- packuswb xmm0, xmm0
- movq qword ptr [edx], xmm0
- lea edx, [edx + 8]
- sub ecx, 2
- jg convertloop
- pop esi
- ret
- }
-}
-#endif // HAS_ARGBPOLYNOMIALROW_SSE2
-
-#ifdef HAS_ARGBPOLYNOMIALROW_AVX2
-__declspec(naked)
-void ARGBPolynomialRow_AVX2(const uint8* src_argb,
- uint8* dst_argb, const float* poly,
- int width) {
- __asm {
- mov eax, [esp + 4] /* src_argb */
- mov edx, [esp + 8] /* dst_argb */
- mov ecx, [esp + 12] /* poly */
- vbroadcastf128 ymm4, [ecx] // C0
- vbroadcastf128 ymm5, [ecx + 16] // C1
- vbroadcastf128 ymm6, [ecx + 32] // C2
- vbroadcastf128 ymm7, [ecx + 48] // C3
- mov ecx, [esp + 16] /* width */
-
- // 2 pixel loop.
- convertloop:
- vpmovzxbd ymm0, qword ptr [eax] // 2 BGRA pixels
- lea eax, [eax + 8]
- vcvtdq2ps ymm0, ymm0 // X 8 floats
- vmulps ymm2, ymm0, ymm0 // X * X
- vmulps ymm3, ymm0, ymm7 // C3 * X
- vfmadd132ps ymm0, ymm4, ymm5 // result = C0 + C1 * X
- vfmadd231ps ymm0, ymm2, ymm6 // result += C2 * X * X
- vfmadd231ps ymm0, ymm2, ymm3 // result += C3 * X * X * X
- vcvttps2dq ymm0, ymm0
- vpackusdw ymm0, ymm0, ymm0 // b0g0r0a0_00000000_b0g0r0a0_00000000
- vpermq ymm0, ymm0, 0xd8 // b0g0r0a0_b0g0r0a0_00000000_00000000
- vpackuswb xmm0, xmm0, xmm0 // bgrabgra_00000000_00000000_00000000
- vmovq qword ptr [edx], xmm0
- lea edx, [edx + 8]
- sub ecx, 2
- jg convertloop
- vzeroupper
- ret
- }
-}
-#endif // HAS_ARGBPOLYNOMIALROW_AVX2
-
-#ifdef HAS_ARGBCOLORTABLEROW_X86
-// Tranform ARGB pixels with color table.
-__declspec(naked)
-void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb,
- int width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] /* dst_argb */
- mov esi, [esp + 4 + 8] /* table_argb */
- mov ecx, [esp + 4 + 12] /* width */
-
- // 1 pixel loop.
- convertloop:
- movzx edx, byte ptr [eax]
- lea eax, [eax + 4]
- movzx edx, byte ptr [esi + edx * 4]
- mov byte ptr [eax - 4], dl
- movzx edx, byte ptr [eax - 4 + 1]
- movzx edx, byte ptr [esi + edx * 4 + 1]
- mov byte ptr [eax - 4 + 1], dl
- movzx edx, byte ptr [eax - 4 + 2]
- movzx edx, byte ptr [esi + edx * 4 + 2]
- mov byte ptr [eax - 4 + 2], dl
- movzx edx, byte ptr [eax - 4 + 3]
- movzx edx, byte ptr [esi + edx * 4 + 3]
- mov byte ptr [eax - 4 + 3], dl
- dec ecx
- jg convertloop
- pop esi
- ret
- }
-}
-#endif // HAS_ARGBCOLORTABLEROW_X86
-
-#ifdef HAS_RGBCOLORTABLEROW_X86
-// Tranform RGB pixels with color table.
-__declspec(naked)
-void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] /* dst_argb */
- mov esi, [esp + 4 + 8] /* table_argb */
- mov ecx, [esp + 4 + 12] /* width */
-
- // 1 pixel loop.
- convertloop:
- movzx edx, byte ptr [eax]
- lea eax, [eax + 4]
- movzx edx, byte ptr [esi + edx * 4]
- mov byte ptr [eax - 4], dl
- movzx edx, byte ptr [eax - 4 + 1]
- movzx edx, byte ptr [esi + edx * 4 + 1]
- mov byte ptr [eax - 4 + 1], dl
- movzx edx, byte ptr [eax - 4 + 2]
- movzx edx, byte ptr [esi + edx * 4 + 2]
- mov byte ptr [eax - 4 + 2], dl
- dec ecx
- jg convertloop
-
- pop esi
- ret
- }
-}
-#endif // HAS_RGBCOLORTABLEROW_X86
-
-#ifdef HAS_ARGBLUMACOLORTABLEROW_SSSE3
-// Tranform RGB pixels with luma table.
-__declspec(naked)
-void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
- int width,
- const uint8* luma, uint32 lumacoeff) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] /* src_argb */
- mov edi, [esp + 8 + 8] /* dst_argb */
- mov ecx, [esp + 8 + 12] /* width */
- movd xmm2, dword ptr [esp + 8 + 16] // luma table
- movd xmm3, dword ptr [esp + 8 + 20] // lumacoeff
- pshufd xmm2, xmm2, 0
- pshufd xmm3, xmm3, 0
- pcmpeqb xmm4, xmm4 // generate mask 0xff00ff00
- psllw xmm4, 8
- pxor xmm5, xmm5
-
- // 4 pixel loop.
- convertloop:
- movdqu xmm0, qword ptr [eax] // generate luma ptr
- pmaddubsw xmm0, xmm3
- phaddw xmm0, xmm0
- pand xmm0, xmm4 // mask out low bits
- punpcklwd xmm0, xmm5
- paddd xmm0, xmm2 // add table base
- movd esi, xmm0
- pshufd xmm0, xmm0, 0x39 // 00111001 to rotate right 32
-
- movzx edx, byte ptr [eax]
- movzx edx, byte ptr [esi + edx]
- mov byte ptr [edi], dl
- movzx edx, byte ptr [eax + 1]
- movzx edx, byte ptr [esi + edx]
- mov byte ptr [edi + 1], dl
- movzx edx, byte ptr [eax + 2]
- movzx edx, byte ptr [esi + edx]
- mov byte ptr [edi + 2], dl
- movzx edx, byte ptr [eax + 3] // copy alpha.
- mov byte ptr [edi + 3], dl
-
- movd esi, xmm0
- pshufd xmm0, xmm0, 0x39 // 00111001 to rotate right 32
-
- movzx edx, byte ptr [eax + 4]
- movzx edx, byte ptr [esi + edx]
- mov byte ptr [edi + 4], dl
- movzx edx, byte ptr [eax + 5]
- movzx edx, byte ptr [esi + edx]
- mov byte ptr [edi + 5], dl
- movzx edx, byte ptr [eax + 6]
- movzx edx, byte ptr [esi + edx]
- mov byte ptr [edi + 6], dl
- movzx edx, byte ptr [eax + 7] // copy alpha.
- mov byte ptr [edi + 7], dl
-
- movd esi, xmm0
- pshufd xmm0, xmm0, 0x39 // 00111001 to rotate right 32
-
- movzx edx, byte ptr [eax + 8]
- movzx edx, byte ptr [esi + edx]
- mov byte ptr [edi + 8], dl
- movzx edx, byte ptr [eax + 9]
- movzx edx, byte ptr [esi + edx]
- mov byte ptr [edi + 9], dl
- movzx edx, byte ptr [eax + 10]
- movzx edx, byte ptr [esi + edx]
- mov byte ptr [edi + 10], dl
- movzx edx, byte ptr [eax + 11] // copy alpha.
- mov byte ptr [edi + 11], dl
-
- movd esi, xmm0
-
- movzx edx, byte ptr [eax + 12]
- movzx edx, byte ptr [esi + edx]
- mov byte ptr [edi + 12], dl
- movzx edx, byte ptr [eax + 13]
- movzx edx, byte ptr [esi + edx]
- mov byte ptr [edi + 13], dl
- movzx edx, byte ptr [eax + 14]
- movzx edx, byte ptr [esi + edx]
- mov byte ptr [edi + 14], dl
- movzx edx, byte ptr [eax + 15] // copy alpha.
- mov byte ptr [edi + 15], dl
-
- lea eax, [eax + 16]
- lea edi, [edi + 16]
- sub ecx, 4
- jg convertloop
-
- pop edi
- pop esi
- ret
- }
-}
-#endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3
-
-#endif // defined(_M_X64)
-#endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64))
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/third_party/aom/third_party/libyuv/source/row_x86.asm b/third_party/aom/third_party/libyuv/source/row_x86.asm
deleted file mode 100644
index 0cb326f8e..000000000
--- a/third_party/aom/third_party/libyuv/source/row_x86.asm
+++ /dev/null
@@ -1,146 +0,0 @@
-;
-; Copyright 2012 The LibYuv Project Authors. All rights reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-%ifdef __YASM_VERSION_ID__
-%if __YASM_VERSION_ID__ < 01020000h
-%error AVX2 is supported only by yasm 1.2.0 or later.
-%endif
-%endif
-%include "x86inc.asm"
-
-SECTION .text
-
-; cglobal numeric constants are parameters, gpr regs, mm regs
-
-; void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix)
-
-%macro YUY2TOYROW 2-3
-cglobal %1ToYRow%3, 3, 3, 3, src_yuy2, dst_y, pix
-%ifidn %1,YUY2
- pcmpeqb m2, m2, m2 ; generate mask 0x00ff00ff
- psrlw m2, m2, 8
-%endif
-
- ALIGN 4
-.convertloop:
- mov%2 m0, [src_yuy2q]
- mov%2 m1, [src_yuy2q + mmsize]
- lea src_yuy2q, [src_yuy2q + mmsize * 2]
-%ifidn %1,YUY2
- pand m0, m0, m2 ; YUY2 even bytes are Y
- pand m1, m1, m2
-%else
- psrlw m0, m0, 8 ; UYVY odd bytes are Y
- psrlw m1, m1, 8
-%endif
- packuswb m0, m0, m1
-%if cpuflag(AVX2)
- vpermq m0, m0, 0xd8
-%endif
- sub pixd, mmsize
- mov%2 [dst_yq], m0
- lea dst_yq, [dst_yq + mmsize]
- jg .convertloop
- REP_RET
-%endmacro
-
-; TODO(fbarchard): Remove MMX. Add SSSE3 pshufb version.
-INIT_MMX MMX
-YUY2TOYROW YUY2,a,
-YUY2TOYROW YUY2,u,_Unaligned
-YUY2TOYROW UYVY,a,
-YUY2TOYROW UYVY,u,_Unaligned
-INIT_XMM SSE2
-YUY2TOYROW YUY2,a,
-YUY2TOYROW YUY2,u,_Unaligned
-YUY2TOYROW UYVY,a,
-YUY2TOYROW UYVY,u,_Unaligned
-INIT_YMM AVX2
-YUY2TOYROW YUY2,a,
-YUY2TOYROW UYVY,a,
-
-; void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix)
-
-%macro SplitUVRow 1-2
-cglobal SplitUVRow%2, 4, 4, 5, src_uv, dst_u, dst_v, pix
- pcmpeqb m4, m4, m4 ; generate mask 0x00ff00ff
- psrlw m4, m4, 8
- sub dst_vq, dst_uq
-
- ALIGN 4
-.convertloop:
- mov%1 m0, [src_uvq]
- mov%1 m1, [src_uvq + mmsize]
- lea src_uvq, [src_uvq + mmsize * 2]
- psrlw m2, m0, 8 ; odd bytes
- psrlw m3, m1, 8
- pand m0, m0, m4 ; even bytes
- pand m1, m1, m4
- packuswb m0, m0, m1
- packuswb m2, m2, m3
-%if cpuflag(AVX2)
- vpermq m0, m0, 0xd8
- vpermq m2, m2, 0xd8
-%endif
- mov%1 [dst_uq], m0
- mov%1 [dst_uq + dst_vq], m2
- lea dst_uq, [dst_uq + mmsize]
- sub pixd, mmsize
- jg .convertloop
- REP_RET
-%endmacro
-
-INIT_MMX MMX
-SplitUVRow a,
-SplitUVRow u,_Unaligned
-INIT_XMM SSE2
-SplitUVRow a,
-SplitUVRow u,_Unaligned
-INIT_YMM AVX2
-SplitUVRow a,
-
-; void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
-; int width);
-
-%macro MergeUVRow_ 1-2
-cglobal MergeUVRow_%2, 4, 4, 3, src_u, src_v, dst_uv, pix
- sub src_vq, src_uq
-
- ALIGN 4
-.convertloop:
- mov%1 m0, [src_uq]
- mov%1 m1, [src_vq]
- lea src_uq, [src_uq + mmsize]
- punpcklbw m2, m0, m1 // first 8 UV pairs
- punpckhbw m0, m0, m1 // next 8 UV pairs
-%if cpuflag(AVX2)
- vperm2i128 m1, m2, m0, 0x20 // low 128 of ymm2 and low 128 of ymm0
- vperm2i128 m2, m2, m0, 0x31 // high 128 of ymm2 and high 128 of ymm0
- mov%1 [dst_uvq], m1
- mov%1 [dst_uvq + mmsize], m2
-%else
- mov%1 [dst_uvq], m2
- mov%1 [dst_uvq + mmsize], m0
-%endif
- lea dst_uvq, [dst_uvq + mmsize * 2]
- sub pixd, mmsize
- jg .convertloop
- REP_RET
-%endmacro
-
-INIT_MMX MMX
-MergeUVRow_ a,
-MergeUVRow_ u,_Unaligned
-INIT_XMM SSE2
-MergeUVRow_ a,
-MergeUVRow_ u,_Unaligned
-INIT_YMM AVX2
-MergeUVRow_ a,
-
diff --git a/third_party/aom/third_party/libyuv/source/scale.cc b/third_party/aom/third_party/libyuv/source/scale.cc
deleted file mode 100644
index 0a01304c4..000000000
--- a/third_party/aom/third_party/libyuv/source/scale.cc
+++ /dev/null
@@ -1,1689 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/scale.h"
-
-#include <assert.h>
-#include <string.h>
-
-#include "libyuv/cpu_id.h"
-#include "libyuv/planar_functions.h" // For CopyPlane
-#include "libyuv/row.h"
-#include "libyuv/scale_row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-static __inline int Abs(int v) {
- return v >= 0 ? v : -v;
-}
-
-#define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s)
-
-// Scale plane, 1/2
-// This is an optimized version for scaling down a plane to 1/2 of
-// its original size.
-
-static void ScalePlaneDown2(int src_width, int src_height,
- int dst_width, int dst_height,
- int src_stride, int dst_stride,
- const uint8* src_ptr, uint8* dst_ptr,
- enum FilterMode filtering) {
- int y;
- void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) =
- filtering == kFilterNone ? ScaleRowDown2_C :
- (filtering == kFilterLinear ? ScaleRowDown2Linear_C : ScaleRowDown2Box_C);
- int row_stride = src_stride << 1;
- if (!filtering) {
- src_ptr += src_stride; // Point to odd rows.
- src_stride = 0;
- }
-
-#if defined(HAS_SCALEROWDOWN2_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Any_NEON :
- (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_NEON :
- ScaleRowDown2Box_Any_NEON);
- if (IS_ALIGNED(dst_width, 16)) {
- ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_NEON :
- (filtering == kFilterLinear ? ScaleRowDown2Linear_NEON :
- ScaleRowDown2Box_NEON);
- }
- }
-#endif
-#if defined(HAS_SCALEROWDOWN2_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Any_SSE2 :
- (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_SSE2 :
- ScaleRowDown2Box_Any_SSE2);
- if (IS_ALIGNED(dst_width, 16)) {
- ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_SSE2 :
- (filtering == kFilterLinear ? ScaleRowDown2Linear_SSE2 :
- ScaleRowDown2Box_SSE2);
- }
- }
-#endif
-#if defined(HAS_SCALEROWDOWN2_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Any_AVX2 :
- (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_AVX2 :
- ScaleRowDown2Box_Any_AVX2);
- if (IS_ALIGNED(dst_width, 32)) {
- ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_AVX2 :
- (filtering == kFilterLinear ? ScaleRowDown2Linear_AVX2 :
- ScaleRowDown2Box_AVX2);
- }
- }
-#endif
-#if defined(HAS_SCALEROWDOWN2_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_ptr, 4) &&
- IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) &&
- IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
- ScaleRowDown2 = filtering ?
- ScaleRowDown2Box_MIPS_DSPR2 : ScaleRowDown2_MIPS_DSPR2;
- }
-#endif
-
- if (filtering == kFilterLinear) {
- src_stride = 0;
- }
- // TODO(fbarchard): Loop through source height to allow odd height.
- for (y = 0; y < dst_height; ++y) {
- ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
- src_ptr += row_stride;
- dst_ptr += dst_stride;
- }
-}
-
-static void ScalePlaneDown2_16(int src_width, int src_height,
- int dst_width, int dst_height,
- int src_stride, int dst_stride,
- const uint16* src_ptr, uint16* dst_ptr,
- enum FilterMode filtering) {
- int y;
- void (*ScaleRowDown2)(const uint16* src_ptr, ptrdiff_t src_stride,
- uint16* dst_ptr, int dst_width) =
- filtering == kFilterNone ? ScaleRowDown2_16_C :
- (filtering == kFilterLinear ? ScaleRowDown2Linear_16_C :
- ScaleRowDown2Box_16_C);
- int row_stride = src_stride << 1;
- if (!filtering) {
- src_ptr += src_stride; // Point to odd rows.
- src_stride = 0;
- }
-
-#if defined(HAS_SCALEROWDOWN2_16_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) {
- ScaleRowDown2 = filtering ? ScaleRowDown2Box_16_NEON :
- ScaleRowDown2_16_NEON;
- }
-#endif
-#if defined(HAS_SCALEROWDOWN2_16_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) {
- ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_16_SSE2 :
- (filtering == kFilterLinear ? ScaleRowDown2Linear_16_SSE2 :
- ScaleRowDown2Box_16_SSE2);
- }
-#endif
-#if defined(HAS_SCALEROWDOWN2_16_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_ptr, 4) &&
- IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) &&
- IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
- ScaleRowDown2 = filtering ?
- ScaleRowDown2Box_16_MIPS_DSPR2 : ScaleRowDown2_16_MIPS_DSPR2;
- }
-#endif
-
- if (filtering == kFilterLinear) {
- src_stride = 0;
- }
- // TODO(fbarchard): Loop through source height to allow odd height.
- for (y = 0; y < dst_height; ++y) {
- ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
- src_ptr += row_stride;
- dst_ptr += dst_stride;
- }
-}
-
-// Scale plane, 1/4
-// This is an optimized version for scaling down a plane to 1/4 of
-// its original size.
-
-static void ScalePlaneDown4(int src_width, int src_height,
- int dst_width, int dst_height,
- int src_stride, int dst_stride,
- const uint8* src_ptr, uint8* dst_ptr,
- enum FilterMode filtering) {
- int y;
- void (*ScaleRowDown4)(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) =
- filtering ? ScaleRowDown4Box_C : ScaleRowDown4_C;
- int row_stride = src_stride << 2;
- if (!filtering) {
- src_ptr += src_stride * 2; // Point to row 2.
- src_stride = 0;
- }
-#if defined(HAS_SCALEROWDOWN4_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ScaleRowDown4 = filtering ?
- ScaleRowDown4Box_Any_NEON : ScaleRowDown4_Any_NEON;
- if (IS_ALIGNED(dst_width, 8)) {
- ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON;
- }
- }
-#endif
-#if defined(HAS_SCALEROWDOWN4_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- ScaleRowDown4 = filtering ?
- ScaleRowDown4Box_Any_SSE2 : ScaleRowDown4_Any_SSE2;
- if (IS_ALIGNED(dst_width, 8)) {
- ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSE2 : ScaleRowDown4_SSE2;
- }
- }
-#endif
-#if defined(HAS_SCALEROWDOWN4_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- ScaleRowDown4 = filtering ?
- ScaleRowDown4Box_Any_AVX2 : ScaleRowDown4_Any_AVX2;
- if (IS_ALIGNED(dst_width, 16)) {
- ScaleRowDown4 = filtering ? ScaleRowDown4Box_AVX2 : ScaleRowDown4_AVX2;
- }
- }
-#endif
-#if defined(HAS_SCALEROWDOWN4_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(row_stride, 4) &&
- IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
- IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
- ScaleRowDown4 = filtering ?
- ScaleRowDown4Box_MIPS_DSPR2 : ScaleRowDown4_MIPS_DSPR2;
- }
-#endif
-
- if (filtering == kFilterLinear) {
- src_stride = 0;
- }
- for (y = 0; y < dst_height; ++y) {
- ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
- src_ptr += row_stride;
- dst_ptr += dst_stride;
- }
-}
-
-static void ScalePlaneDown4_16(int src_width, int src_height,
- int dst_width, int dst_height,
- int src_stride, int dst_stride,
- const uint16* src_ptr, uint16* dst_ptr,
- enum FilterMode filtering) {
- int y;
- void (*ScaleRowDown4)(const uint16* src_ptr, ptrdiff_t src_stride,
- uint16* dst_ptr, int dst_width) =
- filtering ? ScaleRowDown4Box_16_C : ScaleRowDown4_16_C;
- int row_stride = src_stride << 2;
- if (!filtering) {
- src_ptr += src_stride * 2; // Point to row 2.
- src_stride = 0;
- }
-#if defined(HAS_SCALEROWDOWN4_16_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) {
- ScaleRowDown4 = filtering ? ScaleRowDown4Box_16_NEON :
- ScaleRowDown4_16_NEON;
- }
-#endif
-#if defined(HAS_SCALEROWDOWN4_16_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
- ScaleRowDown4 = filtering ? ScaleRowDown4Box_16_SSE2 :
- ScaleRowDown4_16_SSE2;
- }
-#endif
-#if defined(HAS_SCALEROWDOWN4_16_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(row_stride, 4) &&
- IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
- IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
- ScaleRowDown4 = filtering ?
- ScaleRowDown4Box_16_MIPS_DSPR2 : ScaleRowDown4_16_MIPS_DSPR2;
- }
-#endif
-
- if (filtering == kFilterLinear) {
- src_stride = 0;
- }
- for (y = 0; y < dst_height; ++y) {
- ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
- src_ptr += row_stride;
- dst_ptr += dst_stride;
- }
-}
-
-// Scale plane down, 3/4
-
-static void ScalePlaneDown34(int src_width, int src_height,
- int dst_width, int dst_height,
- int src_stride, int dst_stride,
- const uint8* src_ptr, uint8* dst_ptr,
- enum FilterMode filtering) {
- int y;
- void (*ScaleRowDown34_0)(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
- void (*ScaleRowDown34_1)(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
- const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
- assert(dst_width % 3 == 0);
- if (!filtering) {
- ScaleRowDown34_0 = ScaleRowDown34_C;
- ScaleRowDown34_1 = ScaleRowDown34_C;
- } else {
- ScaleRowDown34_0 = ScaleRowDown34_0_Box_C;
- ScaleRowDown34_1 = ScaleRowDown34_1_Box_C;
- }
-#if defined(HAS_SCALEROWDOWN34_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- if (!filtering) {
- ScaleRowDown34_0 = ScaleRowDown34_Any_NEON;
- ScaleRowDown34_1 = ScaleRowDown34_Any_NEON;
- } else {
- ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_NEON;
- ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_NEON;
- }
- if (dst_width % 24 == 0) {
- if (!filtering) {
- ScaleRowDown34_0 = ScaleRowDown34_NEON;
- ScaleRowDown34_1 = ScaleRowDown34_NEON;
- } else {
- ScaleRowDown34_0 = ScaleRowDown34_0_Box_NEON;
- ScaleRowDown34_1 = ScaleRowDown34_1_Box_NEON;
- }
- }
- }
-#endif
-#if defined(HAS_SCALEROWDOWN34_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- if (!filtering) {
- ScaleRowDown34_0 = ScaleRowDown34_Any_SSSE3;
- ScaleRowDown34_1 = ScaleRowDown34_Any_SSSE3;
- } else {
- ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_SSSE3;
- ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_SSSE3;
- }
- if (dst_width % 24 == 0) {
- if (!filtering) {
- ScaleRowDown34_0 = ScaleRowDown34_SSSE3;
- ScaleRowDown34_1 = ScaleRowDown34_SSSE3;
- } else {
- ScaleRowDown34_0 = ScaleRowDown34_0_Box_SSSE3;
- ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3;
- }
- }
- }
-#endif
-#if defined(HAS_SCALEROWDOWN34_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 24 == 0) &&
- IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
- IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
- if (!filtering) {
- ScaleRowDown34_0 = ScaleRowDown34_MIPS_DSPR2;
- ScaleRowDown34_1 = ScaleRowDown34_MIPS_DSPR2;
- } else {
- ScaleRowDown34_0 = ScaleRowDown34_0_Box_MIPS_DSPR2;
- ScaleRowDown34_1 = ScaleRowDown34_1_Box_MIPS_DSPR2;
- }
- }
-#endif
-
- for (y = 0; y < dst_height - 2; y += 3) {
- ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
- src_ptr += src_stride;
- dst_ptr += dst_stride;
- ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);
- src_ptr += src_stride;
- dst_ptr += dst_stride;
- ScaleRowDown34_0(src_ptr + src_stride, -filter_stride,
- dst_ptr, dst_width);
- src_ptr += src_stride * 2;
- dst_ptr += dst_stride;
- }
-
- // Remainder 1 or 2 rows with last row vertically unfiltered
- if ((dst_height % 3) == 2) {
- ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
- src_ptr += src_stride;
- dst_ptr += dst_stride;
- ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);
- } else if ((dst_height % 3) == 1) {
- ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width);
- }
-}
-
-static void ScalePlaneDown34_16(int src_width, int src_height,
- int dst_width, int dst_height,
- int src_stride, int dst_stride,
- const uint16* src_ptr, uint16* dst_ptr,
- enum FilterMode filtering) {
- int y;
- void (*ScaleRowDown34_0)(const uint16* src_ptr, ptrdiff_t src_stride,
- uint16* dst_ptr, int dst_width);
- void (*ScaleRowDown34_1)(const uint16* src_ptr, ptrdiff_t src_stride,
- uint16* dst_ptr, int dst_width);
- const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
- assert(dst_width % 3 == 0);
- if (!filtering) {
- ScaleRowDown34_0 = ScaleRowDown34_16_C;
- ScaleRowDown34_1 = ScaleRowDown34_16_C;
- } else {
- ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_C;
- ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_C;
- }
-#if defined(HAS_SCALEROWDOWN34_16_NEON)
- if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) {
- if (!filtering) {
- ScaleRowDown34_0 = ScaleRowDown34_16_NEON;
- ScaleRowDown34_1 = ScaleRowDown34_16_NEON;
- } else {
- ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_NEON;
- ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_NEON;
- }
- }
-#endif
-#if defined(HAS_SCALEROWDOWN34_16_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) {
- if (!filtering) {
- ScaleRowDown34_0 = ScaleRowDown34_16_SSSE3;
- ScaleRowDown34_1 = ScaleRowDown34_16_SSSE3;
- } else {
- ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_SSSE3;
- ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_SSSE3;
- }
- }
-#endif
-#if defined(HAS_SCALEROWDOWN34_16_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 24 == 0) &&
- IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
- IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
- if (!filtering) {
- ScaleRowDown34_0 = ScaleRowDown34_16_MIPS_DSPR2;
- ScaleRowDown34_1 = ScaleRowDown34_16_MIPS_DSPR2;
- } else {
- ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_MIPS_DSPR2;
- ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_MIPS_DSPR2;
- }
- }
-#endif
-
- for (y = 0; y < dst_height - 2; y += 3) {
- ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
- src_ptr += src_stride;
- dst_ptr += dst_stride;
- ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);
- src_ptr += src_stride;
- dst_ptr += dst_stride;
- ScaleRowDown34_0(src_ptr + src_stride, -filter_stride,
- dst_ptr, dst_width);
- src_ptr += src_stride * 2;
- dst_ptr += dst_stride;
- }
-
- // Remainder 1 or 2 rows with last row vertically unfiltered
- if ((dst_height % 3) == 2) {
- ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
- src_ptr += src_stride;
- dst_ptr += dst_stride;
- ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);
- } else if ((dst_height % 3) == 1) {
- ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width);
- }
-}
-
-
-// Scale plane, 3/8
-// This is an optimized version for scaling down a plane to 3/8
-// of its original size.
-//
-// Uses box filter arranges like this
-// aaabbbcc -> abc
-// aaabbbcc def
-// aaabbbcc ghi
-// dddeeeff
-// dddeeeff
-// dddeeeff
-// ggghhhii
-// ggghhhii
-// Boxes are 3x3, 2x3, 3x2 and 2x2
-
-static void ScalePlaneDown38(int src_width, int src_height,
- int dst_width, int dst_height,
- int src_stride, int dst_stride,
- const uint8* src_ptr, uint8* dst_ptr,
- enum FilterMode filtering) {
- int y;
- void (*ScaleRowDown38_3)(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
- void (*ScaleRowDown38_2)(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width);
- const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
- assert(dst_width % 3 == 0);
- if (!filtering) {
- ScaleRowDown38_3 = ScaleRowDown38_C;
- ScaleRowDown38_2 = ScaleRowDown38_C;
- } else {
- ScaleRowDown38_3 = ScaleRowDown38_3_Box_C;
- ScaleRowDown38_2 = ScaleRowDown38_2_Box_C;
- }
-
-#if defined(HAS_SCALEROWDOWN38_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- if (!filtering) {
- ScaleRowDown38_3 = ScaleRowDown38_Any_NEON;
- ScaleRowDown38_2 = ScaleRowDown38_Any_NEON;
- } else {
- ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_NEON;
- ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_NEON;
- }
- if (dst_width % 12 == 0) {
- if (!filtering) {
- ScaleRowDown38_3 = ScaleRowDown38_NEON;
- ScaleRowDown38_2 = ScaleRowDown38_NEON;
- } else {
- ScaleRowDown38_3 = ScaleRowDown38_3_Box_NEON;
- ScaleRowDown38_2 = ScaleRowDown38_2_Box_NEON;
- }
- }
- }
-#endif
-#if defined(HAS_SCALEROWDOWN38_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- if (!filtering) {
- ScaleRowDown38_3 = ScaleRowDown38_Any_SSSE3;
- ScaleRowDown38_2 = ScaleRowDown38_Any_SSSE3;
- } else {
- ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_SSSE3;
- ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_SSSE3;
- }
- if (dst_width % 12 == 0 && !filtering) {
- ScaleRowDown38_3 = ScaleRowDown38_SSSE3;
- ScaleRowDown38_2 = ScaleRowDown38_SSSE3;
- }
- if (dst_width % 6 == 0 && filtering) {
- ScaleRowDown38_3 = ScaleRowDown38_3_Box_SSSE3;
- ScaleRowDown38_2 = ScaleRowDown38_2_Box_SSSE3;
- }
- }
-#endif
-#if defined(HAS_SCALEROWDOWN38_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 12 == 0) &&
- IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
- IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
- if (!filtering) {
- ScaleRowDown38_3 = ScaleRowDown38_MIPS_DSPR2;
- ScaleRowDown38_2 = ScaleRowDown38_MIPS_DSPR2;
- } else {
- ScaleRowDown38_3 = ScaleRowDown38_3_Box_MIPS_DSPR2;
- ScaleRowDown38_2 = ScaleRowDown38_2_Box_MIPS_DSPR2;
- }
- }
-#endif
-
- for (y = 0; y < dst_height - 2; y += 3) {
- ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
- src_ptr += src_stride * 3;
- dst_ptr += dst_stride;
- ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
- src_ptr += src_stride * 3;
- dst_ptr += dst_stride;
- ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);
- src_ptr += src_stride * 2;
- dst_ptr += dst_stride;
- }
-
- // Remainder 1 or 2 rows with last row vertically unfiltered
- if ((dst_height % 3) == 2) {
- ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
- src_ptr += src_stride * 3;
- dst_ptr += dst_stride;
- ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
- } else if ((dst_height % 3) == 1) {
- ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
- }
-}
-
-static void ScalePlaneDown38_16(int src_width, int src_height,
- int dst_width, int dst_height,
- int src_stride, int dst_stride,
- const uint16* src_ptr, uint16* dst_ptr,
- enum FilterMode filtering) {
- int y;
- void (*ScaleRowDown38_3)(const uint16* src_ptr, ptrdiff_t src_stride,
- uint16* dst_ptr, int dst_width);
- void (*ScaleRowDown38_2)(const uint16* src_ptr, ptrdiff_t src_stride,
- uint16* dst_ptr, int dst_width);
- const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
- assert(dst_width % 3 == 0);
- if (!filtering) {
- ScaleRowDown38_3 = ScaleRowDown38_16_C;
- ScaleRowDown38_2 = ScaleRowDown38_16_C;
- } else {
- ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_C;
- ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_C;
- }
-#if defined(HAS_SCALEROWDOWN38_16_NEON)
- if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) {
- if (!filtering) {
- ScaleRowDown38_3 = ScaleRowDown38_16_NEON;
- ScaleRowDown38_2 = ScaleRowDown38_16_NEON;
- } else {
- ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_NEON;
- ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_NEON;
- }
- }
-#endif
-#if defined(HAS_SCALEROWDOWN38_16_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) {
- if (!filtering) {
- ScaleRowDown38_3 = ScaleRowDown38_16_SSSE3;
- ScaleRowDown38_2 = ScaleRowDown38_16_SSSE3;
- } else {
- ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_SSSE3;
- ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_SSSE3;
- }
- }
-#endif
-#if defined(HAS_SCALEROWDOWN38_16_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 12 == 0) &&
- IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
- IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
- if (!filtering) {
- ScaleRowDown38_3 = ScaleRowDown38_16_MIPS_DSPR2;
- ScaleRowDown38_2 = ScaleRowDown38_16_MIPS_DSPR2;
- } else {
- ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_MIPS_DSPR2;
- ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_MIPS_DSPR2;
- }
- }
-#endif
-
- for (y = 0; y < dst_height - 2; y += 3) {
- ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
- src_ptr += src_stride * 3;
- dst_ptr += dst_stride;
- ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
- src_ptr += src_stride * 3;
- dst_ptr += dst_stride;
- ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);
- src_ptr += src_stride * 2;
- dst_ptr += dst_stride;
- }
-
- // Remainder 1 or 2 rows with last row vertically unfiltered
- if ((dst_height % 3) == 2) {
- ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
- src_ptr += src_stride * 3;
- dst_ptr += dst_stride;
- ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
- } else if ((dst_height % 3) == 1) {
- ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
- }
-}
-
-#define MIN1(x) ((x) < 1 ? 1 : (x))
-
-static __inline uint32 SumPixels(int iboxwidth, const uint16* src_ptr) {
- uint32 sum = 0u;
- int x;
- assert(iboxwidth > 0);
- for (x = 0; x < iboxwidth; ++x) {
- sum += src_ptr[x];
- }
- return sum;
-}
-
-static __inline uint32 SumPixels_16(int iboxwidth, const uint32* src_ptr) {
- uint32 sum = 0u;
- int x;
- assert(iboxwidth > 0);
- for (x = 0; x < iboxwidth; ++x) {
- sum += src_ptr[x];
- }
- return sum;
-}
-
-static void ScaleAddCols2_C(int dst_width, int boxheight, int x, int dx,
- const uint16* src_ptr, uint8* dst_ptr) {
- int i;
- int scaletbl[2];
- int minboxwidth = dx >> 16;
- int* scaleptr = scaletbl - minboxwidth;
- int boxwidth;
- scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight);
- scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight);
- for (i = 0; i < dst_width; ++i) {
- int ix = x >> 16;
- x += dx;
- boxwidth = MIN1((x >> 16) - ix);
- *dst_ptr++ = SumPixels(boxwidth, src_ptr + ix) * scaleptr[boxwidth] >> 16;
- }
-}
-
-static void ScaleAddCols2_16_C(int dst_width, int boxheight, int x, int dx,
- const uint32* src_ptr, uint16* dst_ptr) {
- int i;
- int scaletbl[2];
- int minboxwidth = dx >> 16;
- int* scaleptr = scaletbl - minboxwidth;
- int boxwidth;
- scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight);
- scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight);
- for (i = 0; i < dst_width; ++i) {
- int ix = x >> 16;
- x += dx;
- boxwidth = MIN1((x >> 16) - ix);
- *dst_ptr++ =
- SumPixels_16(boxwidth, src_ptr + ix) * scaleptr[boxwidth] >> 16;
- }
-}
-
-static void ScaleAddCols0_C(int dst_width, int boxheight, int x, int,
- const uint16* src_ptr, uint8* dst_ptr) {
- int scaleval = 65536 / boxheight;
- int i;
- src_ptr += (x >> 16);
- for (i = 0; i < dst_width; ++i) {
- *dst_ptr++ = src_ptr[i] * scaleval >> 16;
- }
-}
-
-static void ScaleAddCols1_C(int dst_width, int boxheight, int x, int dx,
- const uint16* src_ptr, uint8* dst_ptr) {
- int boxwidth = MIN1(dx >> 16);
- int scaleval = 65536 / (boxwidth * boxheight);
- int i;
- x >>= 16;
- for (i = 0; i < dst_width; ++i) {
- *dst_ptr++ = SumPixels(boxwidth, src_ptr + x) * scaleval >> 16;
- x += boxwidth;
- }
-}
-
-static void ScaleAddCols1_16_C(int dst_width, int boxheight, int x, int dx,
- const uint32* src_ptr, uint16* dst_ptr) {
- int boxwidth = MIN1(dx >> 16);
- int scaleval = 65536 / (boxwidth * boxheight);
- int i;
- for (i = 0; i < dst_width; ++i) {
- *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + x) * scaleval >> 16;
- x += boxwidth;
- }
-}
-
-// Scale plane down to any dimensions, with interpolation.
-// (boxfilter).
-//
-// Same method as SimpleScale, which is fixed point, outputting
-// one pixel of destination using fixed point (16.16) to step
-// through source, sampling a box of pixel with simple
-// averaging.
-static void ScalePlaneBox(int src_width, int src_height,
- int dst_width, int dst_height,
- int src_stride, int dst_stride,
- const uint8* src_ptr, uint8* dst_ptr) {
- int j, k;
- // Initial source x/y coordinate and step values as 16.16 fixed point.
- int x = 0;
- int y = 0;
- int dx = 0;
- int dy = 0;
- const int max_y = (src_height << 16);
- ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox,
- &x, &y, &dx, &dy);
- src_width = Abs(src_width);
- {
- // Allocate a row buffer of uint16.
- align_buffer_64(row16, src_width * 2);
- void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
- const uint16* src_ptr, uint8* dst_ptr) =
- (dx & 0xffff) ? ScaleAddCols2_C:
- ((dx != 0x10000) ? ScaleAddCols1_C : ScaleAddCols0_C);
- void (*ScaleAddRow)(const uint8* src_ptr, uint16* dst_ptr, int src_width) =
- ScaleAddRow_C;
-#if defined(HAS_SCALEADDROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- ScaleAddRow = ScaleAddRow_Any_SSE2;
- if (IS_ALIGNED(src_width, 16)) {
- ScaleAddRow = ScaleAddRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_SCALEADDROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- ScaleAddRow = ScaleAddRow_Any_AVX2;
- if (IS_ALIGNED(src_width, 32)) {
- ScaleAddRow = ScaleAddRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_SCALEADDROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ScaleAddRow = ScaleAddRow_Any_NEON;
- if (IS_ALIGNED(src_width, 16)) {
- ScaleAddRow = ScaleAddRow_NEON;
- }
- }
-#endif
-
- for (j = 0; j < dst_height; ++j) {
- int boxheight;
- int iy = y >> 16;
- const uint8* src = src_ptr + iy * src_stride;
- y += dy;
- if (y > max_y) {
- y = max_y;
- }
- boxheight = MIN1((y >> 16) - iy);
- memset(row16, 0, src_width * 2);
- for (k = 0; k < boxheight; ++k) {
- ScaleAddRow(src, (uint16 *)(row16), src_width);
- src += src_stride;
- }
- ScaleAddCols(dst_width, boxheight, x, dx, (uint16*)(row16), dst_ptr);
- dst_ptr += dst_stride;
- }
- free_aligned_buffer_64(row16);
- }
-}
-
-static void ScalePlaneBox_16(int src_width, int src_height,
- int dst_width, int dst_height,
- int src_stride, int dst_stride,
- const uint16* src_ptr, uint16* dst_ptr) {
- int j, k;
- // Initial source x/y coordinate and step values as 16.16 fixed point.
- int x = 0;
- int y = 0;
- int dx = 0;
- int dy = 0;
- const int max_y = (src_height << 16);
- ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox,
- &x, &y, &dx, &dy);
- src_width = Abs(src_width);
- {
- // Allocate a row buffer of uint32.
- align_buffer_64(row32, src_width * 4);
- void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
- const uint32* src_ptr, uint16* dst_ptr) =
- (dx & 0xffff) ? ScaleAddCols2_16_C: ScaleAddCols1_16_C;
- void (*ScaleAddRow)(const uint16* src_ptr, uint32* dst_ptr, int src_width) =
- ScaleAddRow_16_C;
-
-#if defined(HAS_SCALEADDROW_16_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(src_width, 16)) {
- ScaleAddRow = ScaleAddRow_16_SSE2;
- }
-#endif
-
- for (j = 0; j < dst_height; ++j) {
- int boxheight;
- int iy = y >> 16;
- const uint16* src = src_ptr + iy * src_stride;
- y += dy;
- if (y > max_y) {
- y = max_y;
- }
- boxheight = MIN1((y >> 16) - iy);
- memset(row32, 0, src_width * 4);
- for (k = 0; k < boxheight; ++k) {
- ScaleAddRow(src, (uint32 *)(row32), src_width);
- src += src_stride;
- }
- ScaleAddCols(dst_width, boxheight, x, dx, (uint32*)(row32), dst_ptr);
- dst_ptr += dst_stride;
- }
- free_aligned_buffer_64(row32);
- }
-}
-
-// Scale plane down with bilinear interpolation.
-void ScalePlaneBilinearDown(int src_width, int src_height,
- int dst_width, int dst_height,
- int src_stride, int dst_stride,
- const uint8* src_ptr, uint8* dst_ptr,
- enum FilterMode filtering) {
- // Initial source x/y coordinate and step values as 16.16 fixed point.
- int x = 0;
- int y = 0;
- int dx = 0;
- int dy = 0;
- // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
- // Allocate a row buffer.
- align_buffer_64(row, src_width);
-
- const int max_y = (src_height - 1) << 16;
- int j;
- void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr,
- int dst_width, int x, int dx) =
- (src_width >= 32768) ? ScaleFilterCols64_C : ScaleFilterCols_C;
- void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
- InterpolateRow_C;
- ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
- &x, &y, &dx, &dy);
- src_width = Abs(src_width);
-
-#if defined(HAS_INTERPOLATEROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- InterpolateRow = InterpolateRow_Any_SSE2;
- if (IS_ALIGNED(src_width, 16)) {
- InterpolateRow = InterpolateRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- InterpolateRow = InterpolateRow_Any_SSSE3;
- if (IS_ALIGNED(src_width, 16)) {
- InterpolateRow = InterpolateRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- InterpolateRow = InterpolateRow_Any_AVX2;
- if (IS_ALIGNED(src_width, 32)) {
- InterpolateRow = InterpolateRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- InterpolateRow = InterpolateRow_Any_NEON;
- if (IS_ALIGNED(src_width, 16)) {
- InterpolateRow = InterpolateRow_NEON;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2)) {
- InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
- if (IS_ALIGNED(src_width, 4)) {
- InterpolateRow = InterpolateRow_MIPS_DSPR2;
- }
- }
-#endif
-
-
-#if defined(HAS_SCALEFILTERCOLS_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
- ScaleFilterCols = ScaleFilterCols_SSSE3;
- }
-#endif
-#if defined(HAS_SCALEFILTERCOLS_NEON)
- if (TestCpuFlag(kCpuHasNEON) && src_width < 32768) {
- ScaleFilterCols = ScaleFilterCols_Any_NEON;
- if (IS_ALIGNED(dst_width, 8)) {
- ScaleFilterCols = ScaleFilterCols_NEON;
- }
- }
-#endif
- if (y > max_y) {
- y = max_y;
- }
-
- for (j = 0; j < dst_height; ++j) {
- int yi = y >> 16;
- const uint8* src = src_ptr + yi * src_stride;
- if (filtering == kFilterLinear) {
- ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
- } else {
- int yf = (y >> 8) & 255;
- InterpolateRow(row, src, src_stride, src_width, yf);
- ScaleFilterCols(dst_ptr, row, dst_width, x, dx);
- }
- dst_ptr += dst_stride;
- y += dy;
- if (y > max_y) {
- y = max_y;
- }
- }
- free_aligned_buffer_64(row);
-}
-
-void ScalePlaneBilinearDown_16(int src_width, int src_height,
- int dst_width, int dst_height,
- int src_stride, int dst_stride,
- const uint16* src_ptr, uint16* dst_ptr,
- enum FilterMode filtering) {
- // Initial source x/y coordinate and step values as 16.16 fixed point.
- int x = 0;
- int y = 0;
- int dx = 0;
- int dy = 0;
- // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
- // Allocate a row buffer.
- align_buffer_64(row, src_width * 2);
-
- const int max_y = (src_height - 1) << 16;
- int j;
- void (*ScaleFilterCols)(uint16* dst_ptr, const uint16* src_ptr,
- int dst_width, int x, int dx) =
- (src_width >= 32768) ? ScaleFilterCols64_16_C : ScaleFilterCols_16_C;
- void (*InterpolateRow)(uint16* dst_ptr, const uint16* src_ptr,
- ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
- InterpolateRow_16_C;
- ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
- &x, &y, &dx, &dy);
- src_width = Abs(src_width);
-
-#if defined(HAS_INTERPOLATEROW_16_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- InterpolateRow = InterpolateRow_Any_16_SSE2;
- if (IS_ALIGNED(src_width, 16)) {
- InterpolateRow = InterpolateRow_16_SSE2;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_16_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- InterpolateRow = InterpolateRow_Any_16_SSSE3;
- if (IS_ALIGNED(src_width, 16)) {
- InterpolateRow = InterpolateRow_16_SSSE3;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_16_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- InterpolateRow = InterpolateRow_Any_16_AVX2;
- if (IS_ALIGNED(src_width, 32)) {
- InterpolateRow = InterpolateRow_16_AVX2;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_16_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- InterpolateRow = InterpolateRow_Any_16_NEON;
- if (IS_ALIGNED(src_width, 16)) {
- InterpolateRow = InterpolateRow_16_NEON;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_16_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2)) {
- InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2;
- if (IS_ALIGNED(src_width, 4)) {
- InterpolateRow = InterpolateRow_16_MIPS_DSPR2;
- }
- }
-#endif
-
-
-#if defined(HAS_SCALEFILTERCOLS_16_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
- ScaleFilterCols = ScaleFilterCols_16_SSSE3;
- }
-#endif
- if (y > max_y) {
- y = max_y;
- }
-
- for (j = 0; j < dst_height; ++j) {
- int yi = y >> 16;
- const uint16* src = src_ptr + yi * src_stride;
- if (filtering == kFilterLinear) {
- ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
- } else {
- int yf = (y >> 8) & 255;
- InterpolateRow((uint16*)row, src, src_stride, src_width, yf);
- ScaleFilterCols(dst_ptr, (uint16*)row, dst_width, x, dx);
- }
- dst_ptr += dst_stride;
- y += dy;
- if (y > max_y) {
- y = max_y;
- }
- }
- free_aligned_buffer_64(row);
-}
-
-// Scale up down with bilinear interpolation.
-void ScalePlaneBilinearUp(int src_width, int src_height,
- int dst_width, int dst_height,
- int src_stride, int dst_stride,
- const uint8* src_ptr, uint8* dst_ptr,
- enum FilterMode filtering) {
- int j;
- // Initial source x/y coordinate and step values as 16.16 fixed point.
- int x = 0;
- int y = 0;
- int dx = 0;
- int dy = 0;
- const int max_y = (src_height - 1) << 16;
- void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
- ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
- InterpolateRow_C;
- void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr,
- int dst_width, int x, int dx) =
- filtering ? ScaleFilterCols_C : ScaleCols_C;
- ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
- &x, &y, &dx, &dy);
- src_width = Abs(src_width);
-
-#if defined(HAS_INTERPOLATEROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- InterpolateRow = InterpolateRow_Any_SSE2;
- if (IS_ALIGNED(dst_width, 16)) {
- InterpolateRow = InterpolateRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- InterpolateRow = InterpolateRow_Any_SSSE3;
- if (IS_ALIGNED(dst_width, 16)) {
- InterpolateRow = InterpolateRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- InterpolateRow = InterpolateRow_Any_AVX2;
- if (IS_ALIGNED(dst_width, 32)) {
- InterpolateRow = InterpolateRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- InterpolateRow = InterpolateRow_Any_NEON;
- if (IS_ALIGNED(dst_width, 16)) {
- InterpolateRow = InterpolateRow_NEON;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2)) {
- InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
- if (IS_ALIGNED(dst_width, 4)) {
- InterpolateRow = InterpolateRow_MIPS_DSPR2;
- }
- }
-#endif
-
- if (filtering && src_width >= 32768) {
- ScaleFilterCols = ScaleFilterCols64_C;
- }
-#if defined(HAS_SCALEFILTERCOLS_SSSE3)
- if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
- ScaleFilterCols = ScaleFilterCols_SSSE3;
- }
-#endif
-#if defined(HAS_SCALEFILTERCOLS_NEON)
- if (filtering && TestCpuFlag(kCpuHasNEON) && src_width < 32768) {
- ScaleFilterCols = ScaleFilterCols_Any_NEON;
- if (IS_ALIGNED(dst_width, 8)) {
- ScaleFilterCols = ScaleFilterCols_NEON;
- }
- }
-#endif
- if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
- ScaleFilterCols = ScaleColsUp2_C;
-#if defined(HAS_SCALECOLS_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
- ScaleFilterCols = ScaleColsUp2_SSE2;
- }
-#endif
- }
-
- if (y > max_y) {
- y = max_y;
- }
- {
- int yi = y >> 16;
- const uint8* src = src_ptr + yi * src_stride;
-
- // Allocate 2 row buffers.
- const int kRowSize = (dst_width + 31) & ~31;
- align_buffer_64(row, kRowSize * 2);
-
- uint8* rowptr = row;
- int rowstride = kRowSize;
- int lasty = yi;
-
- ScaleFilterCols(rowptr, src, dst_width, x, dx);
- if (src_height > 1) {
- src += src_stride;
- }
- ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);
- src += src_stride;
-
- for (j = 0; j < dst_height; ++j) {
- yi = y >> 16;
- if (yi != lasty) {
- if (y > max_y) {
- y = max_y;
- yi = y >> 16;
- src = src_ptr + yi * src_stride;
- }
- if (yi != lasty) {
- ScaleFilterCols(rowptr, src, dst_width, x, dx);
- rowptr += rowstride;
- rowstride = -rowstride;
- lasty = yi;
- src += src_stride;
- }
- }
- if (filtering == kFilterLinear) {
- InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0);
- } else {
- int yf = (y >> 8) & 255;
- InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf);
- }
- dst_ptr += dst_stride;
- y += dy;
- }
- free_aligned_buffer_64(row);
- }
-}
-
-void ScalePlaneBilinearUp_16(int src_width, int src_height,
- int dst_width, int dst_height,
- int src_stride, int dst_stride,
- const uint16* src_ptr, uint16* dst_ptr,
- enum FilterMode filtering) {
- int j;
- // Initial source x/y coordinate and step values as 16.16 fixed point.
- int x = 0;
- int y = 0;
- int dx = 0;
- int dy = 0;
- const int max_y = (src_height - 1) << 16;
- void (*InterpolateRow)(uint16* dst_ptr, const uint16* src_ptr,
- ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
- InterpolateRow_16_C;
- void (*ScaleFilterCols)(uint16* dst_ptr, const uint16* src_ptr,
- int dst_width, int x, int dx) =
- filtering ? ScaleFilterCols_16_C : ScaleCols_16_C;
- ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
- &x, &y, &dx, &dy);
- src_width = Abs(src_width);
-
-#if defined(HAS_INTERPOLATEROW_16_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- InterpolateRow = InterpolateRow_Any_16_SSE2;
- if (IS_ALIGNED(dst_width, 16)) {
- InterpolateRow = InterpolateRow_16_SSE2;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_16_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- InterpolateRow = InterpolateRow_Any_16_SSSE3;
- if (IS_ALIGNED(dst_width, 16)) {
- InterpolateRow = InterpolateRow_16_SSSE3;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_16_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- InterpolateRow = InterpolateRow_Any_16_AVX2;
- if (IS_ALIGNED(dst_width, 32)) {
- InterpolateRow = InterpolateRow_16_AVX2;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_16_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- InterpolateRow = InterpolateRow_Any_16_NEON;
- if (IS_ALIGNED(dst_width, 16)) {
- InterpolateRow = InterpolateRow_16_NEON;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_16_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2)) {
- InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2;
- if (IS_ALIGNED(dst_width, 4)) {
- InterpolateRow = InterpolateRow_16_MIPS_DSPR2;
- }
- }
-#endif
-
- if (filtering && src_width >= 32768) {
- ScaleFilterCols = ScaleFilterCols64_16_C;
- }
-#if defined(HAS_SCALEFILTERCOLS_16_SSSE3)
- if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
- ScaleFilterCols = ScaleFilterCols_16_SSSE3;
- }
-#endif
- if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
- ScaleFilterCols = ScaleColsUp2_16_C;
-#if defined(HAS_SCALECOLS_16_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
- ScaleFilterCols = ScaleColsUp2_16_SSE2;
- }
-#endif
- }
-
- if (y > max_y) {
- y = max_y;
- }
- {
- int yi = y >> 16;
- const uint16* src = src_ptr + yi * src_stride;
-
- // Allocate 2 row buffers.
- const int kRowSize = (dst_width + 31) & ~31;
- align_buffer_64(row, kRowSize * 4);
-
- uint16* rowptr = (uint16*)row;
- int rowstride = kRowSize;
- int lasty = yi;
-
- ScaleFilterCols(rowptr, src, dst_width, x, dx);
- if (src_height > 1) {
- src += src_stride;
- }
- ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);
- src += src_stride;
-
- for (j = 0; j < dst_height; ++j) {
- yi = y >> 16;
- if (yi != lasty) {
- if (y > max_y) {
- y = max_y;
- yi = y >> 16;
- src = src_ptr + yi * src_stride;
- }
- if (yi != lasty) {
- ScaleFilterCols(rowptr, src, dst_width, x, dx);
- rowptr += rowstride;
- rowstride = -rowstride;
- lasty = yi;
- src += src_stride;
- }
- }
- if (filtering == kFilterLinear) {
- InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0);
- } else {
- int yf = (y >> 8) & 255;
- InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf);
- }
- dst_ptr += dst_stride;
- y += dy;
- }
- free_aligned_buffer_64(row);
- }
-}
-
-// Scale Plane to/from any dimensions, without interpolation.
-// Fixed point math is used for performance: The upper 16 bits
-// of x and dx is the integer part of the source position and
-// the lower 16 bits are the fixed decimal part.
-
-static void ScalePlaneSimple(int src_width, int src_height,
- int dst_width, int dst_height,
- int src_stride, int dst_stride,
- const uint8* src_ptr, uint8* dst_ptr) {
- int i;
- void (*ScaleCols)(uint8* dst_ptr, const uint8* src_ptr,
- int dst_width, int x, int dx) = ScaleCols_C;
- // Initial source x/y coordinate and step values as 16.16 fixed point.
- int x = 0;
- int y = 0;
- int dx = 0;
- int dy = 0;
- ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone,
- &x, &y, &dx, &dy);
- src_width = Abs(src_width);
-
- if (src_width * 2 == dst_width && x < 0x8000) {
- ScaleCols = ScaleColsUp2_C;
-#if defined(HAS_SCALECOLS_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
- ScaleCols = ScaleColsUp2_SSE2;
- }
-#endif
- }
-
- for (i = 0; i < dst_height; ++i) {
- ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx);
- dst_ptr += dst_stride;
- y += dy;
- }
-}
-
-static void ScalePlaneSimple_16(int src_width, int src_height,
- int dst_width, int dst_height,
- int src_stride, int dst_stride,
- const uint16* src_ptr, uint16* dst_ptr) {
- int i;
- void (*ScaleCols)(uint16* dst_ptr, const uint16* src_ptr,
- int dst_width, int x, int dx) = ScaleCols_16_C;
- // Initial source x/y coordinate and step values as 16.16 fixed point.
- int x = 0;
- int y = 0;
- int dx = 0;
- int dy = 0;
- ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone,
- &x, &y, &dx, &dy);
- src_width = Abs(src_width);
-
- if (src_width * 2 == dst_width && x < 0x8000) {
- ScaleCols = ScaleColsUp2_16_C;
-#if defined(HAS_SCALECOLS_16_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
- ScaleCols = ScaleColsUp2_16_SSE2;
- }
-#endif
- }
-
- for (i = 0; i < dst_height; ++i) {
- ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride,
- dst_width, x, dx);
- dst_ptr += dst_stride;
- y += dy;
- }
-}
-
-// Scale a plane.
-// This function dispatches to a specialized scaler based on scale factor.
-
-LIBYUV_API
-void ScalePlane(const uint8* src, int src_stride,
- int src_width, int src_height,
- uint8* dst, int dst_stride,
- int dst_width, int dst_height,
- enum FilterMode filtering) {
- // Simplify filtering when possible.
- filtering = ScaleFilterReduce(src_width, src_height,
- dst_width, dst_height, filtering);
-
- // Negative height means invert the image.
- if (src_height < 0) {
- src_height = -src_height;
- src = src + (src_height - 1) * src_stride;
- src_stride = -src_stride;
- }
-
- // Use specialized scales to improve performance for common resolutions.
- // For example, all the 1/2 scalings will use ScalePlaneDown2()
- if (dst_width == src_width && dst_height == src_height) {
- // Straight copy.
- CopyPlane(src, src_stride, dst, dst_stride, dst_width, dst_height);
- return;
- }
- if (dst_width == src_width && filtering != kFilterBox) {
- int dy = FixedDiv(src_height, dst_height);
- // Arbitrary scale vertically, but unscaled horizontally.
- ScalePlaneVertical(src_height,
- dst_width, dst_height,
- src_stride, dst_stride, src, dst,
- 0, 0, dy, 1, filtering);
- return;
- }
- if (dst_width <= Abs(src_width) && dst_height <= src_height) {
- // Scale down.
- if (4 * dst_width == 3 * src_width &&
- 4 * dst_height == 3 * src_height) {
- // optimized, 3/4
- ScalePlaneDown34(src_width, src_height, dst_width, dst_height,
- src_stride, dst_stride, src, dst, filtering);
- return;
- }
- if (2 * dst_width == src_width && 2 * dst_height == src_height) {
- // optimized, 1/2
- ScalePlaneDown2(src_width, src_height, dst_width, dst_height,
- src_stride, dst_stride, src, dst, filtering);
- return;
- }
- // 3/8 rounded up for odd sized chroma height.
- if (8 * dst_width == 3 * src_width &&
- dst_height == ((src_height * 3 + 7) / 8)) {
- // optimized, 3/8
- ScalePlaneDown38(src_width, src_height, dst_width, dst_height,
- src_stride, dst_stride, src, dst, filtering);
- return;
- }
- if (4 * dst_width == src_width && 4 * dst_height == src_height &&
- (filtering == kFilterBox || filtering == kFilterNone)) {
- // optimized, 1/4
- ScalePlaneDown4(src_width, src_height, dst_width, dst_height,
- src_stride, dst_stride, src, dst, filtering);
- return;
- }
- }
- if (filtering == kFilterBox && dst_height * 2 < src_height) {
- ScalePlaneBox(src_width, src_height, dst_width, dst_height,
- src_stride, dst_stride, src, dst);
- return;
- }
- if (filtering && dst_height > src_height) {
- ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height,
- src_stride, dst_stride, src, dst, filtering);
- return;
- }
- if (filtering) {
- ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height,
- src_stride, dst_stride, src, dst, filtering);
- return;
- }
- ScalePlaneSimple(src_width, src_height, dst_width, dst_height,
- src_stride, dst_stride, src, dst);
-}
-
-LIBYUV_API
-void ScalePlane_16(const uint16* src, int src_stride,
- int src_width, int src_height,
- uint16* dst, int dst_stride,
- int dst_width, int dst_height,
- enum FilterMode filtering) {
- // Simplify filtering when possible.
- filtering = ScaleFilterReduce(src_width, src_height,
- dst_width, dst_height, filtering);
-
- // Negative height means invert the image.
- if (src_height < 0) {
- src_height = -src_height;
- src = src + (src_height - 1) * src_stride;
- src_stride = -src_stride;
- }
-
- // Use specialized scales to improve performance for common resolutions.
- // For example, all the 1/2 scalings will use ScalePlaneDown2()
- if (dst_width == src_width && dst_height == src_height) {
- // Straight copy.
- CopyPlane_16(src, src_stride, dst, dst_stride, dst_width, dst_height);
- return;
- }
- if (dst_width == src_width) {
- int dy = FixedDiv(src_height, dst_height);
- // Arbitrary scale vertically, but unscaled vertically.
- ScalePlaneVertical_16(src_height,
- dst_width, dst_height,
- src_stride, dst_stride, src, dst,
- 0, 0, dy, 1, filtering);
- return;
- }
- if (dst_width <= Abs(src_width) && dst_height <= src_height) {
- // Scale down.
- if (4 * dst_width == 3 * src_width &&
- 4 * dst_height == 3 * src_height) {
- // optimized, 3/4
- ScalePlaneDown34_16(src_width, src_height, dst_width, dst_height,
- src_stride, dst_stride, src, dst, filtering);
- return;
- }
- if (2 * dst_width == src_width && 2 * dst_height == src_height) {
- // optimized, 1/2
- ScalePlaneDown2_16(src_width, src_height, dst_width, dst_height,
- src_stride, dst_stride, src, dst, filtering);
- return;
- }
- // 3/8 rounded up for odd sized chroma height.
- if (8 * dst_width == 3 * src_width &&
- dst_height == ((src_height * 3 + 7) / 8)) {
- // optimized, 3/8
- ScalePlaneDown38_16(src_width, src_height, dst_width, dst_height,
- src_stride, dst_stride, src, dst, filtering);
- return;
- }
- if (4 * dst_width == src_width && 4 * dst_height == src_height &&
- filtering != kFilterBilinear) {
- // optimized, 1/4
- ScalePlaneDown4_16(src_width, src_height, dst_width, dst_height,
- src_stride, dst_stride, src, dst, filtering);
- return;
- }
- }
- if (filtering == kFilterBox && dst_height * 2 < src_height) {
- ScalePlaneBox_16(src_width, src_height, dst_width, dst_height,
- src_stride, dst_stride, src, dst);
- return;
- }
- if (filtering && dst_height > src_height) {
- ScalePlaneBilinearUp_16(src_width, src_height, dst_width, dst_height,
- src_stride, dst_stride, src, dst, filtering);
- return;
- }
- if (filtering) {
- ScalePlaneBilinearDown_16(src_width, src_height, dst_width, dst_height,
- src_stride, dst_stride, src, dst, filtering);
- return;
- }
- ScalePlaneSimple_16(src_width, src_height, dst_width, dst_height,
- src_stride, dst_stride, src, dst);
-}
-
-// Scale an I420 image.
-// This function in turn calls a scaling function for each plane.
-
-LIBYUV_API
-int I420Scale(const uint8* src_y, int src_stride_y,
- const uint8* src_u, int src_stride_u,
- const uint8* src_v, int src_stride_v,
- int src_width, int src_height,
- uint8* dst_y, int dst_stride_y,
- uint8* dst_u, int dst_stride_u,
- uint8* dst_v, int dst_stride_v,
- int dst_width, int dst_height,
- enum FilterMode filtering) {
- int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
- int src_halfheight = SUBSAMPLE(src_height, 1, 1);
- int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
- int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
- if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
- src_width > 32768 || src_height > 32768 ||
- !dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) {
- return -1;
- }
-
- ScalePlane(src_y, src_stride_y, src_width, src_height,
- dst_y, dst_stride_y, dst_width, dst_height,
- filtering);
- ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight,
- dst_u, dst_stride_u, dst_halfwidth, dst_halfheight,
- filtering);
- ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight,
- dst_v, dst_stride_v, dst_halfwidth, dst_halfheight,
- filtering);
- return 0;
-}
-
-LIBYUV_API
-int I420Scale_16(const uint16* src_y, int src_stride_y,
- const uint16* src_u, int src_stride_u,
- const uint16* src_v, int src_stride_v,
- int src_width, int src_height,
- uint16* dst_y, int dst_stride_y,
- uint16* dst_u, int dst_stride_u,
- uint16* dst_v, int dst_stride_v,
- int dst_width, int dst_height,
- enum FilterMode filtering) {
- int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
- int src_halfheight = SUBSAMPLE(src_height, 1, 1);
- int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
- int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
- if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
- src_width > 32768 || src_height > 32768 ||
- !dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) {
- return -1;
- }
-
- ScalePlane_16(src_y, src_stride_y, src_width, src_height,
- dst_y, dst_stride_y, dst_width, dst_height,
- filtering);
- ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_halfheight,
- dst_u, dst_stride_u, dst_halfwidth, dst_halfheight,
- filtering);
- ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_halfheight,
- dst_v, dst_stride_v, dst_halfwidth, dst_halfheight,
- filtering);
- return 0;
-}
-
-// Deprecated api
-LIBYUV_API
-int Scale(const uint8* src_y, const uint8* src_u, const uint8* src_v,
- int src_stride_y, int src_stride_u, int src_stride_v,
- int src_width, int src_height,
- uint8* dst_y, uint8* dst_u, uint8* dst_v,
- int dst_stride_y, int dst_stride_u, int dst_stride_v,
- int dst_width, int dst_height,
- LIBYUV_BOOL interpolate) {
- return I420Scale(src_y, src_stride_y,
- src_u, src_stride_u,
- src_v, src_stride_v,
- src_width, src_height,
- dst_y, dst_stride_y,
- dst_u, dst_stride_u,
- dst_v, dst_stride_v,
- dst_width, dst_height,
- interpolate ? kFilterBox : kFilterNone);
-}
-
-// Deprecated api
-LIBYUV_API
-int ScaleOffset(const uint8* src, int src_width, int src_height,
- uint8* dst, int dst_width, int dst_height, int dst_yoffset,
- LIBYUV_BOOL interpolate) {
- // Chroma requires offset to multiple of 2.
- int dst_yoffset_even = dst_yoffset & ~1;
- int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
- int src_halfheight = SUBSAMPLE(src_height, 1, 1);
- int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
- int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
- int aheight = dst_height - dst_yoffset_even * 2; // actual output height
- const uint8* src_y = src;
- const uint8* src_u = src + src_width * src_height;
- const uint8* src_v = src + src_width * src_height +
- src_halfwidth * src_halfheight;
- uint8* dst_y = dst + dst_yoffset_even * dst_width;
- uint8* dst_u = dst + dst_width * dst_height +
- (dst_yoffset_even >> 1) * dst_halfwidth;
- uint8* dst_v = dst + dst_width * dst_height + dst_halfwidth * dst_halfheight +
- (dst_yoffset_even >> 1) * dst_halfwidth;
- if (!src || src_width <= 0 || src_height <= 0 ||
- !dst || dst_width <= 0 || dst_height <= 0 || dst_yoffset_even < 0 ||
- dst_yoffset_even >= dst_height) {
- return -1;
- }
- return I420Scale(src_y, src_width,
- src_u, src_halfwidth,
- src_v, src_halfwidth,
- src_width, src_height,
- dst_y, dst_width,
- dst_u, dst_halfwidth,
- dst_v, dst_halfwidth,
- dst_width, aheight,
- interpolate ? kFilterBox : kFilterNone);
-}
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/third_party/aom/third_party/libyuv/source/scale_any.cc b/third_party/aom/third_party/libyuv/source/scale_any.cc
deleted file mode 100644
index 2f6a2c8ba..000000000
--- a/third_party/aom/third_party/libyuv/source/scale_any.cc
+++ /dev/null
@@ -1,200 +0,0 @@
-/*
- * Copyright 2015 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/scale.h"
-#include "libyuv/scale_row.h"
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// Definition for ScaleFilterCols, ScaleARGBCols and ScaleARGBFilterCols
-#define CANY(NAMEANY, TERP_SIMD, TERP_C, BPP, MASK) \
- void NAMEANY(uint8* dst_ptr, const uint8* src_ptr, \
- int dst_width, int x, int dx) { \
- int n = dst_width & ~MASK; \
- if (n > 0) { \
- TERP_SIMD(dst_ptr, src_ptr, n, x, dx); \
- } \
- TERP_C(dst_ptr + n * BPP, src_ptr, \
- dst_width & MASK, x + n * dx, dx); \
- }
-
-#ifdef HAS_SCALEFILTERCOLS_NEON
-CANY(ScaleFilterCols_Any_NEON, ScaleFilterCols_NEON, ScaleFilterCols_C, 1, 7)
-#endif
-#ifdef HAS_SCALEARGBCOLS_NEON
-CANY(ScaleARGBCols_Any_NEON, ScaleARGBCols_NEON, ScaleARGBCols_C, 4, 7)
-#endif
-#ifdef HAS_SCALEARGBFILTERCOLS_NEON
-CANY(ScaleARGBFilterCols_Any_NEON, ScaleARGBFilterCols_NEON,
- ScaleARGBFilterCols_C, 4, 3)
-#endif
-#undef CANY
-
-// Fixed scale down.
-#define SDANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \
- void NAMEANY(const uint8* src_ptr, ptrdiff_t src_stride, \
- uint8* dst_ptr, int dst_width) { \
- int r = (int)((unsigned int)dst_width % (MASK + 1)); \
- int n = dst_width - r; \
- if (n > 0) { \
- SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \
- } \
- SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \
- dst_ptr + n * BPP, r); \
- }
-
-#ifdef HAS_SCALEROWDOWN2_SSE2
-SDANY(ScaleRowDown2_Any_SSE2, ScaleRowDown2_SSE2, ScaleRowDown2_C, 2, 1, 15)
-SDANY(ScaleRowDown2Linear_Any_SSE2, ScaleRowDown2Linear_SSE2,
- ScaleRowDown2Linear_C, 2, 1, 15)
-SDANY(ScaleRowDown2Box_Any_SSE2, ScaleRowDown2Box_SSE2, ScaleRowDown2Box_C,
- 2, 1, 15)
-#endif
-#ifdef HAS_SCALEROWDOWN2_AVX2
-SDANY(ScaleRowDown2_Any_AVX2, ScaleRowDown2_AVX2, ScaleRowDown2_C, 2, 1, 31)
-SDANY(ScaleRowDown2Linear_Any_AVX2, ScaleRowDown2Linear_AVX2,
- ScaleRowDown2Linear_C, 2, 1, 31)
-SDANY(ScaleRowDown2Box_Any_AVX2, ScaleRowDown2Box_AVX2, ScaleRowDown2Box_C,
- 2, 1, 31)
-#endif
-#ifdef HAS_SCALEROWDOWN2_NEON
-SDANY(ScaleRowDown2_Any_NEON, ScaleRowDown2_NEON, ScaleRowDown2_C, 2, 1, 15)
-SDANY(ScaleRowDown2Linear_Any_NEON, ScaleRowDown2Linear_NEON,
- ScaleRowDown2Linear_C, 2, 1, 15)
-SDANY(ScaleRowDown2Box_Any_NEON, ScaleRowDown2Box_NEON,
- ScaleRowDown2Box_C, 2, 1, 15)
-#endif
-#ifdef HAS_SCALEROWDOWN4_SSE2
-SDANY(ScaleRowDown4_Any_SSE2, ScaleRowDown4_SSE2, ScaleRowDown4_C, 4, 1, 7)
-SDANY(ScaleRowDown4Box_Any_SSE2, ScaleRowDown4Box_SSE2, ScaleRowDown4Box_C,
- 4, 1, 7)
-#endif
-#ifdef HAS_SCALEROWDOWN4_AVX2
-SDANY(ScaleRowDown4_Any_AVX2, ScaleRowDown4_AVX2, ScaleRowDown4_C, 4, 1, 15)
-SDANY(ScaleRowDown4Box_Any_AVX2, ScaleRowDown4Box_AVX2, ScaleRowDown4Box_C,
- 4, 1, 15)
-#endif
-#ifdef HAS_SCALEROWDOWN4_NEON
-SDANY(ScaleRowDown4_Any_NEON, ScaleRowDown4_NEON, ScaleRowDown4_C, 4, 1, 7)
-SDANY(ScaleRowDown4Box_Any_NEON, ScaleRowDown4Box_NEON, ScaleRowDown4Box_C,
- 4, 1, 7)
-#endif
-#ifdef HAS_SCALEROWDOWN34_SSSE3
-SDANY(ScaleRowDown34_Any_SSSE3, ScaleRowDown34_SSSE3,
- ScaleRowDown34_C, 4 / 3, 1, 23)
-SDANY(ScaleRowDown34_0_Box_Any_SSSE3, ScaleRowDown34_0_Box_SSSE3,
- ScaleRowDown34_0_Box_C, 4 / 3, 1, 23)
-SDANY(ScaleRowDown34_1_Box_Any_SSSE3, ScaleRowDown34_1_Box_SSSE3,
- ScaleRowDown34_1_Box_C, 4 / 3, 1, 23)
-#endif
-#ifdef HAS_SCALEROWDOWN34_NEON
-SDANY(ScaleRowDown34_Any_NEON, ScaleRowDown34_NEON,
- ScaleRowDown34_C, 4 / 3, 1, 23)
-SDANY(ScaleRowDown34_0_Box_Any_NEON, ScaleRowDown34_0_Box_NEON,
- ScaleRowDown34_0_Box_C, 4 / 3, 1, 23)
-SDANY(ScaleRowDown34_1_Box_Any_NEON, ScaleRowDown34_1_Box_NEON,
- ScaleRowDown34_1_Box_C, 4 / 3, 1, 23)
-#endif
-#ifdef HAS_SCALEROWDOWN38_SSSE3
-SDANY(ScaleRowDown38_Any_SSSE3, ScaleRowDown38_SSSE3,
- ScaleRowDown38_C, 8 / 3, 1, 11)
-SDANY(ScaleRowDown38_3_Box_Any_SSSE3, ScaleRowDown38_3_Box_SSSE3,
- ScaleRowDown38_3_Box_C, 8 / 3, 1, 5)
-SDANY(ScaleRowDown38_2_Box_Any_SSSE3, ScaleRowDown38_2_Box_SSSE3,
- ScaleRowDown38_2_Box_C, 8 / 3, 1, 5)
-#endif
-#ifdef HAS_SCALEROWDOWN38_NEON
-SDANY(ScaleRowDown38_Any_NEON, ScaleRowDown38_NEON,
- ScaleRowDown38_C, 8 / 3, 1, 11)
-SDANY(ScaleRowDown38_3_Box_Any_NEON, ScaleRowDown38_3_Box_NEON,
- ScaleRowDown38_3_Box_C, 8 / 3, 1, 11)
-SDANY(ScaleRowDown38_2_Box_Any_NEON, ScaleRowDown38_2_Box_NEON,
- ScaleRowDown38_2_Box_C, 8 / 3, 1, 11)
-#endif
-
-#ifdef HAS_SCALEARGBROWDOWN2_SSE2
-SDANY(ScaleARGBRowDown2_Any_SSE2, ScaleARGBRowDown2_SSE2,
- ScaleARGBRowDown2_C, 2, 4, 3)
-SDANY(ScaleARGBRowDown2Linear_Any_SSE2, ScaleARGBRowDown2Linear_SSE2,
- ScaleARGBRowDown2Linear_C, 2, 4, 3)
-SDANY(ScaleARGBRowDown2Box_Any_SSE2, ScaleARGBRowDown2Box_SSE2,
- ScaleARGBRowDown2Box_C, 2, 4, 3)
-#endif
-#ifdef HAS_SCALEARGBROWDOWN2_NEON
-SDANY(ScaleARGBRowDown2_Any_NEON, ScaleARGBRowDown2_NEON,
- ScaleARGBRowDown2_C, 2, 4, 7)
-SDANY(ScaleARGBRowDown2Linear_Any_NEON, ScaleARGBRowDown2Linear_NEON,
- ScaleARGBRowDown2Linear_C, 2, 4, 7)
-SDANY(ScaleARGBRowDown2Box_Any_NEON, ScaleARGBRowDown2Box_NEON,
- ScaleARGBRowDown2Box_C, 2, 4, 7)
-#endif
-#undef SDANY
-
-// Scale down by even scale factor.
-#define SDAANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, BPP, MASK) \
- void NAMEANY(const uint8* src_ptr, ptrdiff_t src_stride, int src_stepx, \
- uint8* dst_ptr, int dst_width) { \
- int r = (int)((unsigned int)dst_width % (MASK + 1)); \
- int n = dst_width - r; \
- if (n > 0) { \
- SCALEROWDOWN_SIMD(src_ptr, src_stride, src_stepx, dst_ptr, n); \
- } \
- SCALEROWDOWN_C(src_ptr + (n * src_stepx) * BPP, src_stride, \
- src_stepx, dst_ptr + n * BPP, r); \
- }
-
-#ifdef HAS_SCALEARGBROWDOWNEVEN_SSE2
-SDAANY(ScaleARGBRowDownEven_Any_SSE2, ScaleARGBRowDownEven_SSE2,
- ScaleARGBRowDownEven_C, 4, 3)
-SDAANY(ScaleARGBRowDownEvenBox_Any_SSE2, ScaleARGBRowDownEvenBox_SSE2,
- ScaleARGBRowDownEvenBox_C, 4, 3)
-#endif
-#ifdef HAS_SCALEARGBROWDOWNEVEN_NEON
-SDAANY(ScaleARGBRowDownEven_Any_NEON, ScaleARGBRowDownEven_NEON,
- ScaleARGBRowDownEven_C, 4, 3)
-SDAANY(ScaleARGBRowDownEvenBox_Any_NEON, ScaleARGBRowDownEvenBox_NEON,
- ScaleARGBRowDownEvenBox_C, 4, 3)
-#endif
-
-// Add rows box filter scale down.
-#define SAANY(NAMEANY, SCALEADDROW_SIMD, SCALEADDROW_C, MASK) \
- void NAMEANY(const uint8* src_ptr, uint16* dst_ptr, int src_width) { \
- int n = src_width & ~MASK; \
- if (n > 0) { \
- SCALEADDROW_SIMD(src_ptr, dst_ptr, n); \
- } \
- SCALEADDROW_C(src_ptr + n, dst_ptr + n, src_width & MASK); \
- }
-
-#ifdef HAS_SCALEADDROW_SSE2
-SAANY(ScaleAddRow_Any_SSE2, ScaleAddRow_SSE2, ScaleAddRow_C, 15)
-#endif
-#ifdef HAS_SCALEADDROW_AVX2
-SAANY(ScaleAddRow_Any_AVX2, ScaleAddRow_AVX2, ScaleAddRow_C, 31)
-#endif
-#ifdef HAS_SCALEADDROW_NEON
-SAANY(ScaleAddRow_Any_NEON, ScaleAddRow_NEON, ScaleAddRow_C, 15)
-#endif
-#undef SAANY
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
-
-
-
-
-
diff --git a/third_party/aom/third_party/libyuv/source/scale_argb.cc b/third_party/aom/third_party/libyuv/source/scale_argb.cc
deleted file mode 100644
index 40a2d1ab2..000000000
--- a/third_party/aom/third_party/libyuv/source/scale_argb.cc
+++ /dev/null
@@ -1,853 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/scale.h"
-
-#include <assert.h>
-#include <string.h>
-
-#include "libyuv/cpu_id.h"
-#include "libyuv/planar_functions.h" // For CopyARGB
-#include "libyuv/row.h"
-#include "libyuv/scale_row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-static __inline int Abs(int v) {
- return v >= 0 ? v : -v;
-}
-
-// ScaleARGB ARGB, 1/2
-// This is an optimized version for scaling down a ARGB to 1/2 of
-// its original size.
-static void ScaleARGBDown2(int src_width, int src_height,
- int dst_width, int dst_height,
- int src_stride, int dst_stride,
- const uint8* src_argb, uint8* dst_argb,
- int x, int dx, int y, int dy,
- enum FilterMode filtering) {
- int j;
- int row_stride = src_stride * (dy >> 16);
- void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
- uint8* dst_argb, int dst_width) =
- filtering == kFilterNone ? ScaleARGBRowDown2_C :
- (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_C :
- ScaleARGBRowDown2Box_C);
- assert(dx == 65536 * 2); // Test scale factor of 2.
- assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2.
- // Advance to odd row, even column.
- if (filtering == kFilterBilinear) {
- src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
- } else {
- src_argb += (y >> 16) * src_stride + ((x >> 16) - 1) * 4;
- }
-
-#if defined(HAS_SCALEARGBROWDOWN2_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_Any_SSE2 :
- (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_SSE2 :
- ScaleARGBRowDown2Box_Any_SSE2);
- if (IS_ALIGNED(dst_width, 4)) {
- ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_SSE2 :
- (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2 :
- ScaleARGBRowDown2Box_SSE2);
- }
- }
-#endif
-#if defined(HAS_SCALEARGBROWDOWN2_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_Any_NEON :
- (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_NEON :
- ScaleARGBRowDown2Box_Any_NEON);
- if (IS_ALIGNED(dst_width, 8)) {
- ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_NEON :
- (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_NEON :
- ScaleARGBRowDown2Box_NEON);
- }
- }
-#endif
-
- if (filtering == kFilterLinear) {
- src_stride = 0;
- }
- for (j = 0; j < dst_height; ++j) {
- ScaleARGBRowDown2(src_argb, src_stride, dst_argb, dst_width);
- src_argb += row_stride;
- dst_argb += dst_stride;
- }
-}
-
-// ScaleARGB ARGB, 1/4
-// This is an optimized version for scaling down a ARGB to 1/4 of
-// its original size.
-static void ScaleARGBDown4Box(int src_width, int src_height,
- int dst_width, int dst_height,
- int src_stride, int dst_stride,
- const uint8* src_argb, uint8* dst_argb,
- int x, int dx, int y, int dy) {
- int j;
- // Allocate 2 rows of ARGB.
- const int kRowSize = (dst_width * 2 * 4 + 31) & ~31;
- align_buffer_64(row, kRowSize * 2);
- int row_stride = src_stride * (dy >> 16);
- void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
- uint8* dst_argb, int dst_width) = ScaleARGBRowDown2Box_C;
- // Advance to odd row, even column.
- src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
- assert(dx == 65536 * 4); // Test scale factor of 4.
- assert((dy & 0x3ffff) == 0); // Test vertical scale is multiple of 4.
-#if defined(HAS_SCALEARGBROWDOWN2_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- ScaleARGBRowDown2 = ScaleARGBRowDown2Box_Any_SSE2;
- if (IS_ALIGNED(dst_width, 4)) {
- ScaleARGBRowDown2 = ScaleARGBRowDown2Box_SSE2;
- }
- }
-#endif
-#if defined(HAS_SCALEARGBROWDOWN2_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ScaleARGBRowDown2 = ScaleARGBRowDown2Box_Any_NEON;
- if (IS_ALIGNED(dst_width, 8)) {
- ScaleARGBRowDown2 = ScaleARGBRowDown2Box_NEON;
- }
- }
-#endif
-
- for (j = 0; j < dst_height; ++j) {
- ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2);
- ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride,
- row + kRowSize, dst_width * 2);
- ScaleARGBRowDown2(row, kRowSize, dst_argb, dst_width);
- src_argb += row_stride;
- dst_argb += dst_stride;
- }
- free_aligned_buffer_64(row);
-}
-
-// ScaleARGB ARGB Even
-// This is an optimized version for scaling down a ARGB to even
-// multiple of its original size.
-static void ScaleARGBDownEven(int src_width, int src_height,
- int dst_width, int dst_height,
- int src_stride, int dst_stride,
- const uint8* src_argb, uint8* dst_argb,
- int x, int dx, int y, int dy,
- enum FilterMode filtering) {
- int j;
- int col_step = dx >> 16;
- int row_stride = (dy >> 16) * src_stride;
- void (*ScaleARGBRowDownEven)(const uint8* src_argb, ptrdiff_t src_stride,
- int src_step, uint8* dst_argb, int dst_width) =
- filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C;
- assert(IS_ALIGNED(src_width, 2));
- assert(IS_ALIGNED(src_height, 2));
- src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
-#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_SSE2 :
- ScaleARGBRowDownEven_Any_SSE2;
- if (IS_ALIGNED(dst_width, 4)) {
- ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_SSE2 :
- ScaleARGBRowDownEven_SSE2;
- }
- }
-#endif
-#if defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_NEON :
- ScaleARGBRowDownEven_Any_NEON;
- if (IS_ALIGNED(dst_width, 4)) {
- ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_NEON :
- ScaleARGBRowDownEven_NEON;
- }
- }
-#endif
-
- if (filtering == kFilterLinear) {
- src_stride = 0;
- }
- for (j = 0; j < dst_height; ++j) {
- ScaleARGBRowDownEven(src_argb, src_stride, col_step, dst_argb, dst_width);
- src_argb += row_stride;
- dst_argb += dst_stride;
- }
-}
-
-// Scale ARGB down with bilinear interpolation.
-static void ScaleARGBBilinearDown(int src_width, int src_height,
- int dst_width, int dst_height,
- int src_stride, int dst_stride,
- const uint8* src_argb, uint8* dst_argb,
- int x, int dx, int y, int dy,
- enum FilterMode filtering) {
- int j;
- void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
- ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
- InterpolateRow_C;
- void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx) =
- (src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C;
- int64 xlast = x + (int64)(dst_width - 1) * dx;
- int64 xl = (dx >= 0) ? x : xlast;
- int64 xr = (dx >= 0) ? xlast : x;
- int clip_src_width;
- xl = (xl >> 16) & ~3; // Left edge aligned.
- xr = (xr >> 16) + 1; // Right most pixel used. Bilinear uses 2 pixels.
- xr = (xr + 1 + 3) & ~3; // 1 beyond 4 pixel aligned right most pixel.
- if (xr > src_width) {
- xr = src_width;
- }
- clip_src_width = (int)(xr - xl) * 4; // Width aligned to 4.
- src_argb += xl * 4;
- x -= (int)(xl << 16);
-#if defined(HAS_INTERPOLATEROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- InterpolateRow = InterpolateRow_Any_SSE2;
- if (IS_ALIGNED(clip_src_width, 16)) {
- InterpolateRow = InterpolateRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- InterpolateRow = InterpolateRow_Any_SSSE3;
- if (IS_ALIGNED(clip_src_width, 16)) {
- InterpolateRow = InterpolateRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- InterpolateRow = InterpolateRow_Any_AVX2;
- if (IS_ALIGNED(clip_src_width, 32)) {
- InterpolateRow = InterpolateRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- InterpolateRow = InterpolateRow_Any_NEON;
- if (IS_ALIGNED(clip_src_width, 16)) {
- InterpolateRow = InterpolateRow_NEON;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
- IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4)) {
- InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
- if (IS_ALIGNED(clip_src_width, 4)) {
- InterpolateRow = InterpolateRow_MIPS_DSPR2;
- }
- }
-#endif
-#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
- ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
- }
-#endif
-#if defined(HAS_SCALEARGBFILTERCOLS_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
- if (IS_ALIGNED(dst_width, 4)) {
- ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
- }
- }
-#endif
- // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
- // Allocate a row of ARGB.
- {
- align_buffer_64(row, clip_src_width * 4);
-
- const int max_y = (src_height - 1) << 16;
- if (y > max_y) {
- y = max_y;
- }
- for (j = 0; j < dst_height; ++j) {
- int yi = y >> 16;
- const uint8* src = src_argb + yi * src_stride;
- if (filtering == kFilterLinear) {
- ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx);
- } else {
- int yf = (y >> 8) & 255;
- InterpolateRow(row, src, src_stride, clip_src_width, yf);
- ScaleARGBFilterCols(dst_argb, row, dst_width, x, dx);
- }
- dst_argb += dst_stride;
- y += dy;
- if (y > max_y) {
- y = max_y;
- }
- }
- free_aligned_buffer_64(row);
- }
-}
-
-// Scale ARGB up with bilinear interpolation.
-static void ScaleARGBBilinearUp(int src_width, int src_height,
- int dst_width, int dst_height,
- int src_stride, int dst_stride,
- const uint8* src_argb, uint8* dst_argb,
- int x, int dx, int y, int dy,
- enum FilterMode filtering) {
- int j;
- void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
- ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
- InterpolateRow_C;
- void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx) =
- filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
- const int max_y = (src_height - 1) << 16;
-#if defined(HAS_INTERPOLATEROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- InterpolateRow = InterpolateRow_Any_SSE2;
- if (IS_ALIGNED(dst_width, 4)) {
- InterpolateRow = InterpolateRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- InterpolateRow = InterpolateRow_Any_SSSE3;
- if (IS_ALIGNED(dst_width, 4)) {
- InterpolateRow = InterpolateRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- InterpolateRow = InterpolateRow_Any_AVX2;
- if (IS_ALIGNED(dst_width, 8)) {
- InterpolateRow = InterpolateRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- InterpolateRow = InterpolateRow_Any_NEON;
- if (IS_ALIGNED(dst_width, 4)) {
- InterpolateRow = InterpolateRow_NEON;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
- IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
- InterpolateRow = InterpolateRow_MIPS_DSPR2;
- }
-#endif
- if (src_width >= 32768) {
- ScaleARGBFilterCols = filtering ?
- ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
- }
-#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
- if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
- ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
- }
-#endif
-#if defined(HAS_SCALEARGBFILTERCOLS_NEON)
- if (filtering && TestCpuFlag(kCpuHasNEON)) {
- ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
- if (IS_ALIGNED(dst_width, 4)) {
- ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
- }
- }
-#endif
-#if defined(HAS_SCALEARGBCOLS_SSE2)
- if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
- ScaleARGBFilterCols = ScaleARGBCols_SSE2;
- }
-#endif
-#if defined(HAS_SCALEARGBCOLS_NEON)
- if (!filtering && TestCpuFlag(kCpuHasNEON)) {
- ScaleARGBFilterCols = ScaleARGBCols_Any_NEON;
- if (IS_ALIGNED(dst_width, 8)) {
- ScaleARGBFilterCols = ScaleARGBCols_NEON;
- }
- }
-#endif
- if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
- ScaleARGBFilterCols = ScaleARGBColsUp2_C;
-#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
- ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
- }
-#endif
- }
-
- if (y > max_y) {
- y = max_y;
- }
-
- {
- int yi = y >> 16;
- const uint8* src = src_argb + yi * src_stride;
-
- // Allocate 2 rows of ARGB.
- const int kRowSize = (dst_width * 4 + 31) & ~31;
- align_buffer_64(row, kRowSize * 2);
-
- uint8* rowptr = row;
- int rowstride = kRowSize;
- int lasty = yi;
-
- ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
- if (src_height > 1) {
- src += src_stride;
- }
- ScaleARGBFilterCols(rowptr + rowstride, src, dst_width, x, dx);
- src += src_stride;
-
- for (j = 0; j < dst_height; ++j) {
- yi = y >> 16;
- if (yi != lasty) {
- if (y > max_y) {
- y = max_y;
- yi = y >> 16;
- src = src_argb + yi * src_stride;
- }
- if (yi != lasty) {
- ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
- rowptr += rowstride;
- rowstride = -rowstride;
- lasty = yi;
- src += src_stride;
- }
- }
- if (filtering == kFilterLinear) {
- InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
- } else {
- int yf = (y >> 8) & 255;
- InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
- }
- dst_argb += dst_stride;
- y += dy;
- }
- free_aligned_buffer_64(row);
- }
-}
-
-#ifdef YUVSCALEUP
-// Scale YUV to ARGB up with bilinear interpolation.
-static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
- int dst_width, int dst_height,
- int src_stride_y,
- int src_stride_u,
- int src_stride_v,
- int dst_stride_argb,
- const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int x, int dx, int y, int dy,
- enum FilterMode filtering) {
- int j;
- void (*I422ToARGBRow)(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) = I422ToARGBRow_C;
-#if defined(HAS_I422TOARGBROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
- if (IS_ALIGNED(src_width, 8)) {
- I422ToARGBRow = I422ToARGBRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_I422TOARGBROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- I422ToARGBRow = I422ToARGBRow_Any_AVX2;
- if (IS_ALIGNED(src_width, 16)) {
- I422ToARGBRow = I422ToARGBRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_I422TOARGBROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- I422ToARGBRow = I422ToARGBRow_Any_NEON;
- if (IS_ALIGNED(src_width, 8)) {
- I422ToARGBRow = I422ToARGBRow_NEON;
- }
- }
-#endif
-#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_width, 4) &&
- IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
- IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
- IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
- IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
- I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
- }
-#endif
-
- void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
- ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
- InterpolateRow_C;
-#if defined(HAS_INTERPOLATEROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- InterpolateRow = InterpolateRow_Any_SSE2;
- if (IS_ALIGNED(dst_width, 4)) {
- InterpolateRow = InterpolateRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- InterpolateRow = InterpolateRow_Any_SSSE3;
- if (IS_ALIGNED(dst_width, 4)) {
- InterpolateRow = InterpolateRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- InterpolateRow = InterpolateRow_Any_AVX2;
- if (IS_ALIGNED(dst_width, 8)) {
- InterpolateRow = InterpolateRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- InterpolateRow = InterpolateRow_Any_NEON;
- if (IS_ALIGNED(dst_width, 4)) {
- InterpolateRow = InterpolateRow_NEON;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
- IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
- InterpolateRow = InterpolateRow_MIPS_DSPR2;
- }
-#endif
-
- void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx) =
- filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
- if (src_width >= 32768) {
- ScaleARGBFilterCols = filtering ?
- ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
- }
-#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
- if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
- ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
- }
-#endif
-#if defined(HAS_SCALEARGBFILTERCOLS_NEON)
- if (filtering && TestCpuFlag(kCpuHasNEON)) {
- ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
- if (IS_ALIGNED(dst_width, 4)) {
- ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
- }
- }
-#endif
-#if defined(HAS_SCALEARGBCOLS_SSE2)
- if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
- ScaleARGBFilterCols = ScaleARGBCols_SSE2;
- }
-#endif
-#if defined(HAS_SCALEARGBCOLS_NEON)
- if (!filtering && TestCpuFlag(kCpuHasNEON)) {
- ScaleARGBFilterCols = ScaleARGBCols_Any_NEON;
- if (IS_ALIGNED(dst_width, 8)) {
- ScaleARGBFilterCols = ScaleARGBCols_NEON;
- }
- }
-#endif
- if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
- ScaleARGBFilterCols = ScaleARGBColsUp2_C;
-#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
- ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
- }
-#endif
- }
-
- const int max_y = (src_height - 1) << 16;
- if (y > max_y) {
- y = max_y;
- }
- const int kYShift = 1; // Shift Y by 1 to convert Y plane to UV coordinate.
- int yi = y >> 16;
- int uv_yi = yi >> kYShift;
- const uint8* src_row_y = src_y + yi * src_stride_y;
- const uint8* src_row_u = src_u + uv_yi * src_stride_u;
- const uint8* src_row_v = src_v + uv_yi * src_stride_v;
-
- // Allocate 2 rows of ARGB.
- const int kRowSize = (dst_width * 4 + 31) & ~31;
- align_buffer_64(row, kRowSize * 2);
-
- // Allocate 1 row of ARGB for source conversion.
- align_buffer_64(argb_row, src_width * 4);
-
- uint8* rowptr = row;
- int rowstride = kRowSize;
- int lasty = yi;
-
- // TODO(fbarchard): Convert first 2 rows of YUV to ARGB.
- ScaleARGBFilterCols(rowptr, src_row_y, dst_width, x, dx);
- if (src_height > 1) {
- src_row_y += src_stride_y;
- if (yi & 1) {
- src_row_u += src_stride_u;
- src_row_v += src_stride_v;
- }
- }
- ScaleARGBFilterCols(rowptr + rowstride, src_row_y, dst_width, x, dx);
- if (src_height > 2) {
- src_row_y += src_stride_y;
- if (!(yi & 1)) {
- src_row_u += src_stride_u;
- src_row_v += src_stride_v;
- }
- }
-
- for (j = 0; j < dst_height; ++j) {
- yi = y >> 16;
- if (yi != lasty) {
- if (y > max_y) {
- y = max_y;
- yi = y >> 16;
- uv_yi = yi >> kYShift;
- src_row_y = src_y + yi * src_stride_y;
- src_row_u = src_u + uv_yi * src_stride_u;
- src_row_v = src_v + uv_yi * src_stride_v;
- }
- if (yi != lasty) {
- // TODO(fbarchard): Convert the clipped region of row.
- I422ToARGBRow(src_row_y, src_row_u, src_row_v, argb_row, src_width);
- ScaleARGBFilterCols(rowptr, argb_row, dst_width, x, dx);
- rowptr += rowstride;
- rowstride = -rowstride;
- lasty = yi;
- src_row_y += src_stride_y;
- if (yi & 1) {
- src_row_u += src_stride_u;
- src_row_v += src_stride_v;
- }
- }
- }
- if (filtering == kFilterLinear) {
- InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
- } else {
- int yf = (y >> 8) & 255;
- InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
- }
- dst_argb += dst_stride_argb;
- y += dy;
- }
- free_aligned_buffer_64(row);
- free_aligned_buffer_64(row_argb);
-}
-#endif
-
-// Scale ARGB to/from any dimensions, without interpolation.
-// Fixed point math is used for performance: The upper 16 bits
-// of x and dx is the integer part of the source position and
-// the lower 16 bits are the fixed decimal part.
-
-static void ScaleARGBSimple(int src_width, int src_height,
- int dst_width, int dst_height,
- int src_stride, int dst_stride,
- const uint8* src_argb, uint8* dst_argb,
- int x, int dx, int y, int dy) {
- int j;
- void (*ScaleARGBCols)(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx) =
- (src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C;
-#if defined(HAS_SCALEARGBCOLS_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
- ScaleARGBCols = ScaleARGBCols_SSE2;
- }
-#endif
-#if defined(HAS_SCALEARGBCOLS_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- ScaleARGBCols = ScaleARGBCols_Any_NEON;
- if (IS_ALIGNED(dst_width, 8)) {
- ScaleARGBCols = ScaleARGBCols_NEON;
- }
- }
-#endif
- if (src_width * 2 == dst_width && x < 0x8000) {
- ScaleARGBCols = ScaleARGBColsUp2_C;
-#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
- ScaleARGBCols = ScaleARGBColsUp2_SSE2;
- }
-#endif
- }
-
- for (j = 0; j < dst_height; ++j) {
- ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride,
- dst_width, x, dx);
- dst_argb += dst_stride;
- y += dy;
- }
-}
-
-// ScaleARGB a ARGB.
-// This function in turn calls a scaling function
-// suitable for handling the desired resolutions.
-static void ScaleARGB(const uint8* src, int src_stride,
- int src_width, int src_height,
- uint8* dst, int dst_stride,
- int dst_width, int dst_height,
- int clip_x, int clip_y, int clip_width, int clip_height,
- enum FilterMode filtering) {
- // Initial source x/y coordinate and step values as 16.16 fixed point.
- int x = 0;
- int y = 0;
- int dx = 0;
- int dy = 0;
- // ARGB does not support box filter yet, but allow the user to pass it.
- // Simplify filtering when possible.
- filtering = ScaleFilterReduce(src_width, src_height,
- dst_width, dst_height,
- filtering);
-
- // Negative src_height means invert the image.
- if (src_height < 0) {
- src_height = -src_height;
- src = src + (src_height - 1) * src_stride;
- src_stride = -src_stride;
- }
- ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
- &x, &y, &dx, &dy);
- src_width = Abs(src_width);
- if (clip_x) {
- int64 clipf = (int64)(clip_x) * dx;
- x += (clipf & 0xffff);
- src += (clipf >> 16) * 4;
- dst += clip_x * 4;
- }
- if (clip_y) {
- int64 clipf = (int64)(clip_y) * dy;
- y += (clipf & 0xffff);
- src += (clipf >> 16) * src_stride;
- dst += clip_y * dst_stride;
- }
-
- // Special case for integer step values.
- if (((dx | dy) & 0xffff) == 0) {
- if (!dx || !dy) { // 1 pixel wide and/or tall.
- filtering = kFilterNone;
- } else {
- // Optimized even scale down. ie 2, 4, 6, 8, 10x.
- if (!(dx & 0x10000) && !(dy & 0x10000)) {
- if (dx == 0x20000) {
- // Optimized 1/2 downsample.
- ScaleARGBDown2(src_width, src_height,
- clip_width, clip_height,
- src_stride, dst_stride, src, dst,
- x, dx, y, dy, filtering);
- return;
- }
- if (dx == 0x40000 && filtering == kFilterBox) {
- // Optimized 1/4 box downsample.
- ScaleARGBDown4Box(src_width, src_height,
- clip_width, clip_height,
- src_stride, dst_stride, src, dst,
- x, dx, y, dy);
- return;
- }
- ScaleARGBDownEven(src_width, src_height,
- clip_width, clip_height,
- src_stride, dst_stride, src, dst,
- x, dx, y, dy, filtering);
- return;
- }
- // Optimized odd scale down. ie 3, 5, 7, 9x.
- if ((dx & 0x10000) && (dy & 0x10000)) {
- filtering = kFilterNone;
- if (dx == 0x10000 && dy == 0x10000) {
- // Straight copy.
- ARGBCopy(src + (y >> 16) * src_stride + (x >> 16) * 4, src_stride,
- dst, dst_stride, clip_width, clip_height);
- return;
- }
- }
- }
- }
- if (dx == 0x10000 && (x & 0xffff) == 0) {
- // Arbitrary scale vertically, but unscaled vertically.
- ScalePlaneVertical(src_height,
- clip_width, clip_height,
- src_stride, dst_stride, src, dst,
- x, y, dy, 4, filtering);
- return;
- }
- if (filtering && dy < 65536) {
- ScaleARGBBilinearUp(src_width, src_height,
- clip_width, clip_height,
- src_stride, dst_stride, src, dst,
- x, dx, y, dy, filtering);
- return;
- }
- if (filtering) {
- ScaleARGBBilinearDown(src_width, src_height,
- clip_width, clip_height,
- src_stride, dst_stride, src, dst,
- x, dx, y, dy, filtering);
- return;
- }
- ScaleARGBSimple(src_width, src_height, clip_width, clip_height,
- src_stride, dst_stride, src, dst,
- x, dx, y, dy);
-}
-
-LIBYUV_API
-int ARGBScaleClip(const uint8* src_argb, int src_stride_argb,
- int src_width, int src_height,
- uint8* dst_argb, int dst_stride_argb,
- int dst_width, int dst_height,
- int clip_x, int clip_y, int clip_width, int clip_height,
- enum FilterMode filtering) {
- if (!src_argb || src_width == 0 || src_height == 0 ||
- !dst_argb || dst_width <= 0 || dst_height <= 0 ||
- clip_x < 0 || clip_y < 0 ||
- clip_width > 32768 || clip_height > 32768 ||
- (clip_x + clip_width) > dst_width ||
- (clip_y + clip_height) > dst_height) {
- return -1;
- }
- ScaleARGB(src_argb, src_stride_argb, src_width, src_height,
- dst_argb, dst_stride_argb, dst_width, dst_height,
- clip_x, clip_y, clip_width, clip_height, filtering);
- return 0;
-}
-
-// Scale an ARGB image.
-LIBYUV_API
-int ARGBScale(const uint8* src_argb, int src_stride_argb,
- int src_width, int src_height,
- uint8* dst_argb, int dst_stride_argb,
- int dst_width, int dst_height,
- enum FilterMode filtering) {
- if (!src_argb || src_width == 0 || src_height == 0 ||
- src_width > 32768 || src_height > 32768 ||
- !dst_argb || dst_width <= 0 || dst_height <= 0) {
- return -1;
- }
- ScaleARGB(src_argb, src_stride_argb, src_width, src_height,
- dst_argb, dst_stride_argb, dst_width, dst_height,
- 0, 0, dst_width, dst_height, filtering);
- return 0;
-}
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/third_party/aom/third_party/libyuv/source/scale_common.cc b/third_party/aom/third_party/libyuv/source/scale_common.cc
deleted file mode 100644
index 1711f3d54..000000000
--- a/third_party/aom/third_party/libyuv/source/scale_common.cc
+++ /dev/null
@@ -1,1137 +0,0 @@
-/*
- * Copyright 2013 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/scale.h"
-
-#include <assert.h>
-#include <string.h>
-
-#include "libyuv/cpu_id.h"
-#include "libyuv/planar_functions.h" // For CopyARGB
-#include "libyuv/row.h"
-#include "libyuv/scale_row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-static __inline int Abs(int v) {
- return v >= 0 ? v : -v;
-}
-
-// CPU agnostic row functions
-void ScaleRowDown2_C(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width) {
- int x;
- for (x = 0; x < dst_width - 1; x += 2) {
- dst[0] = src_ptr[1];
- dst[1] = src_ptr[3];
- dst += 2;
- src_ptr += 4;
- }
- if (dst_width & 1) {
- dst[0] = src_ptr[1];
- }
-}
-
-void ScaleRowDown2_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
- uint16* dst, int dst_width) {
- int x;
- for (x = 0; x < dst_width - 1; x += 2) {
- dst[0] = src_ptr[1];
- dst[1] = src_ptr[3];
- dst += 2;
- src_ptr += 4;
- }
- if (dst_width & 1) {
- dst[0] = src_ptr[1];
- }
-}
-
-void ScaleRowDown2Linear_C(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width) {
- const uint8* s = src_ptr;
- int x;
- for (x = 0; x < dst_width - 1; x += 2) {
- dst[0] = (s[0] + s[1] + 1) >> 1;
- dst[1] = (s[2] + s[3] + 1) >> 1;
- dst += 2;
- s += 4;
- }
- if (dst_width & 1) {
- dst[0] = (s[0] + s[1] + 1) >> 1;
- }
-}
-
-void ScaleRowDown2Linear_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
- uint16* dst, int dst_width) {
- const uint16* s = src_ptr;
- int x;
- for (x = 0; x < dst_width - 1; x += 2) {
- dst[0] = (s[0] + s[1] + 1) >> 1;
- dst[1] = (s[2] + s[3] + 1) >> 1;
- dst += 2;
- s += 4;
- }
- if (dst_width & 1) {
- dst[0] = (s[0] + s[1] + 1) >> 1;
- }
-}
-
-void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width) {
- const uint8* s = src_ptr;
- const uint8* t = src_ptr + src_stride;
- int x;
- for (x = 0; x < dst_width - 1; x += 2) {
- dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
- dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
- dst += 2;
- s += 4;
- t += 4;
- }
- if (dst_width & 1) {
- dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
- }
-}
-
-void ScaleRowDown2Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
- uint16* dst, int dst_width) {
- const uint16* s = src_ptr;
- const uint16* t = src_ptr + src_stride;
- int x;
- for (x = 0; x < dst_width - 1; x += 2) {
- dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
- dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
- dst += 2;
- s += 4;
- t += 4;
- }
- if (dst_width & 1) {
- dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
- }
-}
-
-void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width) {
- int x;
- for (x = 0; x < dst_width - 1; x += 2) {
- dst[0] = src_ptr[2];
- dst[1] = src_ptr[6];
- dst += 2;
- src_ptr += 8;
- }
- if (dst_width & 1) {
- dst[0] = src_ptr[2];
- }
-}
-
-void ScaleRowDown4_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
- uint16* dst, int dst_width) {
- int x;
- for (x = 0; x < dst_width - 1; x += 2) {
- dst[0] = src_ptr[2];
- dst[1] = src_ptr[6];
- dst += 2;
- src_ptr += 8;
- }
- if (dst_width & 1) {
- dst[0] = src_ptr[2];
- }
-}
-
-void ScaleRowDown4Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width) {
- intptr_t stride = src_stride;
- int x;
- for (x = 0; x < dst_width - 1; x += 2) {
- dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
- src_ptr[stride + 0] + src_ptr[stride + 1] +
- src_ptr[stride + 2] + src_ptr[stride + 3] +
- src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
- src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
- src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
- src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
- 8) >> 4;
- dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
- src_ptr[stride + 4] + src_ptr[stride + 5] +
- src_ptr[stride + 6] + src_ptr[stride + 7] +
- src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5] +
- src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7] +
- src_ptr[stride * 3 + 4] + src_ptr[stride * 3 + 5] +
- src_ptr[stride * 3 + 6] + src_ptr[stride * 3 + 7] +
- 8) >> 4;
- dst += 2;
- src_ptr += 8;
- }
- if (dst_width & 1) {
- dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
- src_ptr[stride + 0] + src_ptr[stride + 1] +
- src_ptr[stride + 2] + src_ptr[stride + 3] +
- src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
- src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
- src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
- src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
- 8) >> 4;
- }
-}
-
-void ScaleRowDown4Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
- uint16* dst, int dst_width) {
- intptr_t stride = src_stride;
- int x;
- for (x = 0; x < dst_width - 1; x += 2) {
- dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
- src_ptr[stride + 0] + src_ptr[stride + 1] +
- src_ptr[stride + 2] + src_ptr[stride + 3] +
- src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
- src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
- src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
- src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
- 8) >> 4;
- dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
- src_ptr[stride + 4] + src_ptr[stride + 5] +
- src_ptr[stride + 6] + src_ptr[stride + 7] +
- src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5] +
- src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7] +
- src_ptr[stride * 3 + 4] + src_ptr[stride * 3 + 5] +
- src_ptr[stride * 3 + 6] + src_ptr[stride * 3 + 7] +
- 8) >> 4;
- dst += 2;
- src_ptr += 8;
- }
- if (dst_width & 1) {
- dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
- src_ptr[stride + 0] + src_ptr[stride + 1] +
- src_ptr[stride + 2] + src_ptr[stride + 3] +
- src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
- src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
- src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
- src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
- 8) >> 4;
- }
-}
-
-void ScaleRowDown34_C(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width) {
- int x;
- assert((dst_width % 3 == 0) && (dst_width > 0));
- for (x = 0; x < dst_width; x += 3) {
- dst[0] = src_ptr[0];
- dst[1] = src_ptr[1];
- dst[2] = src_ptr[3];
- dst += 3;
- src_ptr += 4;
- }
-}
-
-void ScaleRowDown34_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
- uint16* dst, int dst_width) {
- int x;
- assert((dst_width % 3 == 0) && (dst_width > 0));
- for (x = 0; x < dst_width; x += 3) {
- dst[0] = src_ptr[0];
- dst[1] = src_ptr[1];
- dst[2] = src_ptr[3];
- dst += 3;
- src_ptr += 4;
- }
-}
-
-// Filter rows 0 and 1 together, 3 : 1
-void ScaleRowDown34_0_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* d, int dst_width) {
- const uint8* s = src_ptr;
- const uint8* t = src_ptr + src_stride;
- int x;
- assert((dst_width % 3 == 0) && (dst_width > 0));
- for (x = 0; x < dst_width; x += 3) {
- uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
- uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
- uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
- uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
- uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
- uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
- d[0] = (a0 * 3 + b0 + 2) >> 2;
- d[1] = (a1 * 3 + b1 + 2) >> 2;
- d[2] = (a2 * 3 + b2 + 2) >> 2;
- d += 3;
- s += 4;
- t += 4;
- }
-}
-
-void ScaleRowDown34_0_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
- uint16* d, int dst_width) {
- const uint16* s = src_ptr;
- const uint16* t = src_ptr + src_stride;
- int x;
- assert((dst_width % 3 == 0) && (dst_width > 0));
- for (x = 0; x < dst_width; x += 3) {
- uint16 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
- uint16 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
- uint16 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
- uint16 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
- uint16 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
- uint16 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
- d[0] = (a0 * 3 + b0 + 2) >> 2;
- d[1] = (a1 * 3 + b1 + 2) >> 2;
- d[2] = (a2 * 3 + b2 + 2) >> 2;
- d += 3;
- s += 4;
- t += 4;
- }
-}
-
-// Filter rows 1 and 2 together, 1 : 1
-void ScaleRowDown34_1_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* d, int dst_width) {
- const uint8* s = src_ptr;
- const uint8* t = src_ptr + src_stride;
- int x;
- assert((dst_width % 3 == 0) && (dst_width > 0));
- for (x = 0; x < dst_width; x += 3) {
- uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
- uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
- uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
- uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
- uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
- uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
- d[0] = (a0 + b0 + 1) >> 1;
- d[1] = (a1 + b1 + 1) >> 1;
- d[2] = (a2 + b2 + 1) >> 1;
- d += 3;
- s += 4;
- t += 4;
- }
-}
-
-void ScaleRowDown34_1_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
- uint16* d, int dst_width) {
- const uint16* s = src_ptr;
- const uint16* t = src_ptr + src_stride;
- int x;
- assert((dst_width % 3 == 0) && (dst_width > 0));
- for (x = 0; x < dst_width; x += 3) {
- uint16 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
- uint16 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
- uint16 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
- uint16 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
- uint16 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
- uint16 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
- d[0] = (a0 + b0 + 1) >> 1;
- d[1] = (a1 + b1 + 1) >> 1;
- d[2] = (a2 + b2 + 1) >> 1;
- d += 3;
- s += 4;
- t += 4;
- }
-}
-
-// Scales a single row of pixels using point sampling.
-void ScaleCols_C(uint8* dst_ptr, const uint8* src_ptr,
- int dst_width, int x, int dx) {
- int j;
- for (j = 0; j < dst_width - 1; j += 2) {
- dst_ptr[0] = src_ptr[x >> 16];
- x += dx;
- dst_ptr[1] = src_ptr[x >> 16];
- x += dx;
- dst_ptr += 2;
- }
- if (dst_width & 1) {
- dst_ptr[0] = src_ptr[x >> 16];
- }
-}
-
-void ScaleCols_16_C(uint16* dst_ptr, const uint16* src_ptr,
- int dst_width, int x, int dx) {
- int j;
- for (j = 0; j < dst_width - 1; j += 2) {
- dst_ptr[0] = src_ptr[x >> 16];
- x += dx;
- dst_ptr[1] = src_ptr[x >> 16];
- x += dx;
- dst_ptr += 2;
- }
- if (dst_width & 1) {
- dst_ptr[0] = src_ptr[x >> 16];
- }
-}
-
-// Scales a single row of pixels up by 2x using point sampling.
-void ScaleColsUp2_C(uint8* dst_ptr, const uint8* src_ptr,
- int dst_width, int x, int dx) {
- int j;
- for (j = 0; j < dst_width - 1; j += 2) {
- dst_ptr[1] = dst_ptr[0] = src_ptr[0];
- src_ptr += 1;
- dst_ptr += 2;
- }
- if (dst_width & 1) {
- dst_ptr[0] = src_ptr[0];
- }
-}
-
-void ScaleColsUp2_16_C(uint16* dst_ptr, const uint16* src_ptr,
- int dst_width, int x, int dx) {
- int j;
- for (j = 0; j < dst_width - 1; j += 2) {
- dst_ptr[1] = dst_ptr[0] = src_ptr[0];
- src_ptr += 1;
- dst_ptr += 2;
- }
- if (dst_width & 1) {
- dst_ptr[0] = src_ptr[0];
- }
-}
-
-// (1-f)a + fb can be replaced with a + f(b-a)
-#define BLENDER(a, b, f) (uint8)((int)(a) + \
- ((int)(f) * ((int)(b) - (int)(a)) >> 16))
-
-void ScaleFilterCols_C(uint8* dst_ptr, const uint8* src_ptr,
- int dst_width, int x, int dx) {
- int j;
- for (j = 0; j < dst_width - 1; j += 2) {
- int xi = x >> 16;
- int a = src_ptr[xi];
- int b = src_ptr[xi + 1];
- dst_ptr[0] = BLENDER(a, b, x & 0xffff);
- x += dx;
- xi = x >> 16;
- a = src_ptr[xi];
- b = src_ptr[xi + 1];
- dst_ptr[1] = BLENDER(a, b, x & 0xffff);
- x += dx;
- dst_ptr += 2;
- }
- if (dst_width & 1) {
- int xi = x >> 16;
- int a = src_ptr[xi];
- int b = src_ptr[xi + 1];
- dst_ptr[0] = BLENDER(a, b, x & 0xffff);
- }
-}
-
-void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr,
- int dst_width, int x32, int dx) {
- int64 x = (int64)(x32);
- int j;
- for (j = 0; j < dst_width - 1; j += 2) {
- int64 xi = x >> 16;
- int a = src_ptr[xi];
- int b = src_ptr[xi + 1];
- dst_ptr[0] = BLENDER(a, b, x & 0xffff);
- x += dx;
- xi = x >> 16;
- a = src_ptr[xi];
- b = src_ptr[xi + 1];
- dst_ptr[1] = BLENDER(a, b, x & 0xffff);
- x += dx;
- dst_ptr += 2;
- }
- if (dst_width & 1) {
- int64 xi = x >> 16;
- int a = src_ptr[xi];
- int b = src_ptr[xi + 1];
- dst_ptr[0] = BLENDER(a, b, x & 0xffff);
- }
-}
-#undef BLENDER
-
-#define BLENDER(a, b, f) (uint16)((int)(a) + \
- ((int)(f) * ((int)(b) - (int)(a)) >> 16))
-
-void ScaleFilterCols_16_C(uint16* dst_ptr, const uint16* src_ptr,
- int dst_width, int x, int dx) {
- int j;
- for (j = 0; j < dst_width - 1; j += 2) {
- int xi = x >> 16;
- int a = src_ptr[xi];
- int b = src_ptr[xi + 1];
- dst_ptr[0] = BLENDER(a, b, x & 0xffff);
- x += dx;
- xi = x >> 16;
- a = src_ptr[xi];
- b = src_ptr[xi + 1];
- dst_ptr[1] = BLENDER(a, b, x & 0xffff);
- x += dx;
- dst_ptr += 2;
- }
- if (dst_width & 1) {
- int xi = x >> 16;
- int a = src_ptr[xi];
- int b = src_ptr[xi + 1];
- dst_ptr[0] = BLENDER(a, b, x & 0xffff);
- }
-}
-
-void ScaleFilterCols64_16_C(uint16* dst_ptr, const uint16* src_ptr,
- int dst_width, int x32, int dx) {
- int64 x = (int64)(x32);
- int j;
- for (j = 0; j < dst_width - 1; j += 2) {
- int64 xi = x >> 16;
- int a = src_ptr[xi];
- int b = src_ptr[xi + 1];
- dst_ptr[0] = BLENDER(a, b, x & 0xffff);
- x += dx;
- xi = x >> 16;
- a = src_ptr[xi];
- b = src_ptr[xi + 1];
- dst_ptr[1] = BLENDER(a, b, x & 0xffff);
- x += dx;
- dst_ptr += 2;
- }
- if (dst_width & 1) {
- int64 xi = x >> 16;
- int a = src_ptr[xi];
- int b = src_ptr[xi + 1];
- dst_ptr[0] = BLENDER(a, b, x & 0xffff);
- }
-}
-#undef BLENDER
-
-void ScaleRowDown38_C(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width) {
- int x;
- assert(dst_width % 3 == 0);
- for (x = 0; x < dst_width; x += 3) {
- dst[0] = src_ptr[0];
- dst[1] = src_ptr[3];
- dst[2] = src_ptr[6];
- dst += 3;
- src_ptr += 8;
- }
-}
-
-void ScaleRowDown38_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
- uint16* dst, int dst_width) {
- int x;
- assert(dst_width % 3 == 0);
- for (x = 0; x < dst_width; x += 3) {
- dst[0] = src_ptr[0];
- dst[1] = src_ptr[3];
- dst[2] = src_ptr[6];
- dst += 3;
- src_ptr += 8;
- }
-}
-
-// 8x3 -> 3x1
-void ScaleRowDown38_3_Box_C(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- intptr_t stride = src_stride;
- int i;
- assert((dst_width % 3 == 0) && (dst_width > 0));
- for (i = 0; i < dst_width; i += 3) {
- dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
- src_ptr[stride + 0] + src_ptr[stride + 1] +
- src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
- src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
- (65536 / 9) >> 16;
- dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
- src_ptr[stride + 3] + src_ptr[stride + 4] +
- src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
- src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
- (65536 / 9) >> 16;
- dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
- src_ptr[stride + 6] + src_ptr[stride + 7] +
- src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
- (65536 / 6) >> 16;
- src_ptr += 8;
- dst_ptr += 3;
- }
-}
-
-void ScaleRowDown38_3_Box_16_C(const uint16* src_ptr,
- ptrdiff_t src_stride,
- uint16* dst_ptr, int dst_width) {
- intptr_t stride = src_stride;
- int i;
- assert((dst_width % 3 == 0) && (dst_width > 0));
- for (i = 0; i < dst_width; i += 3) {
- dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
- src_ptr[stride + 0] + src_ptr[stride + 1] +
- src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
- src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
- (65536 / 9) >> 16;
- dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
- src_ptr[stride + 3] + src_ptr[stride + 4] +
- src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
- src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
- (65536 / 9) >> 16;
- dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
- src_ptr[stride + 6] + src_ptr[stride + 7] +
- src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
- (65536 / 6) >> 16;
- src_ptr += 8;
- dst_ptr += 3;
- }
-}
-
-// 8x2 -> 3x1
-void ScaleRowDown38_2_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- intptr_t stride = src_stride;
- int i;
- assert((dst_width % 3 == 0) && (dst_width > 0));
- for (i = 0; i < dst_width; i += 3) {
- dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
- src_ptr[stride + 0] + src_ptr[stride + 1] +
- src_ptr[stride + 2]) * (65536 / 6) >> 16;
- dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
- src_ptr[stride + 3] + src_ptr[stride + 4] +
- src_ptr[stride + 5]) * (65536 / 6) >> 16;
- dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
- src_ptr[stride + 6] + src_ptr[stride + 7]) *
- (65536 / 4) >> 16;
- src_ptr += 8;
- dst_ptr += 3;
- }
-}
-
-void ScaleRowDown38_2_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
- uint16* dst_ptr, int dst_width) {
- intptr_t stride = src_stride;
- int i;
- assert((dst_width % 3 == 0) && (dst_width > 0));
- for (i = 0; i < dst_width; i += 3) {
- dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
- src_ptr[stride + 0] + src_ptr[stride + 1] +
- src_ptr[stride + 2]) * (65536 / 6) >> 16;
- dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
- src_ptr[stride + 3] + src_ptr[stride + 4] +
- src_ptr[stride + 5]) * (65536 / 6) >> 16;
- dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
- src_ptr[stride + 6] + src_ptr[stride + 7]) *
- (65536 / 4) >> 16;
- src_ptr += 8;
- dst_ptr += 3;
- }
-}
-
-void ScaleAddRow_C(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
- int x;
- assert(src_width > 0);
- for (x = 0; x < src_width - 1; x += 2) {
- dst_ptr[0] += src_ptr[0];
- dst_ptr[1] += src_ptr[1];
- src_ptr += 2;
- dst_ptr += 2;
- }
- if (src_width & 1) {
- dst_ptr[0] += src_ptr[0];
- }
-}
-
-void ScaleAddRow_16_C(const uint16* src_ptr, uint32* dst_ptr, int src_width) {
- int x;
- assert(src_width > 0);
- for (x = 0; x < src_width - 1; x += 2) {
- dst_ptr[0] += src_ptr[0];
- dst_ptr[1] += src_ptr[1];
- src_ptr += 2;
- dst_ptr += 2;
- }
- if (src_width & 1) {
- dst_ptr[0] += src_ptr[0];
- }
-}
-
-void ScaleARGBRowDown2_C(const uint8* src_argb,
- ptrdiff_t src_stride,
- uint8* dst_argb, int dst_width) {
- const uint32* src = (const uint32*)(src_argb);
- uint32* dst = (uint32*)(dst_argb);
-
- int x;
- for (x = 0; x < dst_width - 1; x += 2) {
- dst[0] = src[1];
- dst[1] = src[3];
- src += 4;
- dst += 2;
- }
- if (dst_width & 1) {
- dst[0] = src[1];
- }
-}
-
-void ScaleARGBRowDown2Linear_C(const uint8* src_argb,
- ptrdiff_t src_stride,
- uint8* dst_argb, int dst_width) {
- int x;
- for (x = 0; x < dst_width; ++x) {
- dst_argb[0] = (src_argb[0] + src_argb[4] + 1) >> 1;
- dst_argb[1] = (src_argb[1] + src_argb[5] + 1) >> 1;
- dst_argb[2] = (src_argb[2] + src_argb[6] + 1) >> 1;
- dst_argb[3] = (src_argb[3] + src_argb[7] + 1) >> 1;
- src_argb += 8;
- dst_argb += 4;
- }
-}
-
-void ScaleARGBRowDown2Box_C(const uint8* src_argb, ptrdiff_t src_stride,
- uint8* dst_argb, int dst_width) {
- int x;
- for (x = 0; x < dst_width; ++x) {
- dst_argb[0] = (src_argb[0] + src_argb[4] +
- src_argb[src_stride] + src_argb[src_stride + 4] + 2) >> 2;
- dst_argb[1] = (src_argb[1] + src_argb[5] +
- src_argb[src_stride + 1] + src_argb[src_stride + 5] + 2) >> 2;
- dst_argb[2] = (src_argb[2] + src_argb[6] +
- src_argb[src_stride + 2] + src_argb[src_stride + 6] + 2) >> 2;
- dst_argb[3] = (src_argb[3] + src_argb[7] +
- src_argb[src_stride + 3] + src_argb[src_stride + 7] + 2) >> 2;
- src_argb += 8;
- dst_argb += 4;
- }
-}
-
-void ScaleARGBRowDownEven_C(const uint8* src_argb, ptrdiff_t src_stride,
- int src_stepx,
- uint8* dst_argb, int dst_width) {
- const uint32* src = (const uint32*)(src_argb);
- uint32* dst = (uint32*)(dst_argb);
-
- int x;
- for (x = 0; x < dst_width - 1; x += 2) {
- dst[0] = src[0];
- dst[1] = src[src_stepx];
- src += src_stepx * 2;
- dst += 2;
- }
- if (dst_width & 1) {
- dst[0] = src[0];
- }
-}
-
-void ScaleARGBRowDownEvenBox_C(const uint8* src_argb,
- ptrdiff_t src_stride,
- int src_stepx,
- uint8* dst_argb, int dst_width) {
- int x;
- for (x = 0; x < dst_width; ++x) {
- dst_argb[0] = (src_argb[0] + src_argb[4] +
- src_argb[src_stride] + src_argb[src_stride + 4] + 2) >> 2;
- dst_argb[1] = (src_argb[1] + src_argb[5] +
- src_argb[src_stride + 1] + src_argb[src_stride + 5] + 2) >> 2;
- dst_argb[2] = (src_argb[2] + src_argb[6] +
- src_argb[src_stride + 2] + src_argb[src_stride + 6] + 2) >> 2;
- dst_argb[3] = (src_argb[3] + src_argb[7] +
- src_argb[src_stride + 3] + src_argb[src_stride + 7] + 2) >> 2;
- src_argb += src_stepx * 4;
- dst_argb += 4;
- }
-}
-
-// Scales a single row of pixels using point sampling.
-void ScaleARGBCols_C(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx) {
- const uint32* src = (const uint32*)(src_argb);
- uint32* dst = (uint32*)(dst_argb);
- int j;
- for (j = 0; j < dst_width - 1; j += 2) {
- dst[0] = src[x >> 16];
- x += dx;
- dst[1] = src[x >> 16];
- x += dx;
- dst += 2;
- }
- if (dst_width & 1) {
- dst[0] = src[x >> 16];
- }
-}
-
-void ScaleARGBCols64_C(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x32, int dx) {
- int64 x = (int64)(x32);
- const uint32* src = (const uint32*)(src_argb);
- uint32* dst = (uint32*)(dst_argb);
- int j;
- for (j = 0; j < dst_width - 1; j += 2) {
- dst[0] = src[x >> 16];
- x += dx;
- dst[1] = src[x >> 16];
- x += dx;
- dst += 2;
- }
- if (dst_width & 1) {
- dst[0] = src[x >> 16];
- }
-}
-
-// Scales a single row of pixels up by 2x using point sampling.
-void ScaleARGBColsUp2_C(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx) {
- const uint32* src = (const uint32*)(src_argb);
- uint32* dst = (uint32*)(dst_argb);
- int j;
- for (j = 0; j < dst_width - 1; j += 2) {
- dst[1] = dst[0] = src[0];
- src += 1;
- dst += 2;
- }
- if (dst_width & 1) {
- dst[0] = src[0];
- }
-}
-
-// Mimics SSSE3 blender
-#define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b) * f) >> 7
-#define BLENDERC(a, b, f, s) (uint32)( \
- BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s)
-#define BLENDER(a, b, f) \
- BLENDERC(a, b, f, 24) | BLENDERC(a, b, f, 16) | \
- BLENDERC(a, b, f, 8) | BLENDERC(a, b, f, 0)
-
-void ScaleARGBFilterCols_C(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx) {
- const uint32* src = (const uint32*)(src_argb);
- uint32* dst = (uint32*)(dst_argb);
- int j;
- for (j = 0; j < dst_width - 1; j += 2) {
- int xi = x >> 16;
- int xf = (x >> 9) & 0x7f;
- uint32 a = src[xi];
- uint32 b = src[xi + 1];
- dst[0] = BLENDER(a, b, xf);
- x += dx;
- xi = x >> 16;
- xf = (x >> 9) & 0x7f;
- a = src[xi];
- b = src[xi + 1];
- dst[1] = BLENDER(a, b, xf);
- x += dx;
- dst += 2;
- }
- if (dst_width & 1) {
- int xi = x >> 16;
- int xf = (x >> 9) & 0x7f;
- uint32 a = src[xi];
- uint32 b = src[xi + 1];
- dst[0] = BLENDER(a, b, xf);
- }
-}
-
-void ScaleARGBFilterCols64_C(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x32, int dx) {
- int64 x = (int64)(x32);
- const uint32* src = (const uint32*)(src_argb);
- uint32* dst = (uint32*)(dst_argb);
- int j;
- for (j = 0; j < dst_width - 1; j += 2) {
- int64 xi = x >> 16;
- int xf = (x >> 9) & 0x7f;
- uint32 a = src[xi];
- uint32 b = src[xi + 1];
- dst[0] = BLENDER(a, b, xf);
- x += dx;
- xi = x >> 16;
- xf = (x >> 9) & 0x7f;
- a = src[xi];
- b = src[xi + 1];
- dst[1] = BLENDER(a, b, xf);
- x += dx;
- dst += 2;
- }
- if (dst_width & 1) {
- int64 xi = x >> 16;
- int xf = (x >> 9) & 0x7f;
- uint32 a = src[xi];
- uint32 b = src[xi + 1];
- dst[0] = BLENDER(a, b, xf);
- }
-}
-#undef BLENDER1
-#undef BLENDERC
-#undef BLENDER
-
-// Scale plane vertically with bilinear interpolation.
-void ScalePlaneVertical(int src_height,
- int dst_width, int dst_height,
- int src_stride, int dst_stride,
- const uint8* src_argb, uint8* dst_argb,
- int x, int y, int dy,
- int bpp, enum FilterMode filtering) {
- // TODO(fbarchard): Allow higher bpp.
- int dst_width_bytes = dst_width * bpp;
- void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
- ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
- InterpolateRow_C;
- const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
- int j;
- assert(bpp >= 1 && bpp <= 4);
- assert(src_height != 0);
- assert(dst_width > 0);
- assert(dst_height > 0);
- src_argb += (x >> 16) * bpp;
-#if defined(HAS_INTERPOLATEROW_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- InterpolateRow = InterpolateRow_Any_SSE2;
- if (IS_ALIGNED(dst_width_bytes, 16)) {
- InterpolateRow = InterpolateRow_SSE2;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- InterpolateRow = InterpolateRow_Any_SSSE3;
- if (IS_ALIGNED(dst_width_bytes, 16)) {
- InterpolateRow = InterpolateRow_SSSE3;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- InterpolateRow = InterpolateRow_Any_AVX2;
- if (IS_ALIGNED(dst_width_bytes, 32)) {
- InterpolateRow = InterpolateRow_AVX2;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- InterpolateRow = InterpolateRow_Any_NEON;
- if (IS_ALIGNED(dst_width_bytes, 16)) {
- InterpolateRow = InterpolateRow_NEON;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
- IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4) &&
- IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
- InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
- if (IS_ALIGNED(dst_width_bytes, 4)) {
- InterpolateRow = InterpolateRow_MIPS_DSPR2;
- }
- }
-#endif
- for (j = 0; j < dst_height; ++j) {
- int yi;
- int yf;
- if (y > max_y) {
- y = max_y;
- }
- yi = y >> 16;
- yf = filtering ? ((y >> 8) & 255) : 0;
- InterpolateRow(dst_argb, src_argb + yi * src_stride,
- src_stride, dst_width_bytes, yf);
- dst_argb += dst_stride;
- y += dy;
- }
-}
-void ScalePlaneVertical_16(int src_height,
- int dst_width, int dst_height,
- int src_stride, int dst_stride,
- const uint16* src_argb, uint16* dst_argb,
- int x, int y, int dy,
- int wpp, enum FilterMode filtering) {
- // TODO(fbarchard): Allow higher wpp.
- int dst_width_words = dst_width * wpp;
- void (*InterpolateRow)(uint16* dst_argb, const uint16* src_argb,
- ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
- InterpolateRow_16_C;
- const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
- int j;
- assert(wpp >= 1 && wpp <= 2);
- assert(src_height != 0);
- assert(dst_width > 0);
- assert(dst_height > 0);
- src_argb += (x >> 16) * wpp;
-#if defined(HAS_INTERPOLATEROW_16_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)) {
- InterpolateRow = InterpolateRow_Any_16_SSE2;
- if (IS_ALIGNED(dst_width_bytes, 16)) {
- InterpolateRow = InterpolateRow_16_SSE2;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_16_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- InterpolateRow = InterpolateRow_Any_16_SSSE3;
- if (IS_ALIGNED(dst_width_bytes, 16)) {
- InterpolateRow = InterpolateRow_16_SSSE3;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_16_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- InterpolateRow = InterpolateRow_Any_16_AVX2;
- if (IS_ALIGNED(dst_width_bytes, 32)) {
- InterpolateRow = InterpolateRow_16_AVX2;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_16_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- InterpolateRow = InterpolateRow_Any_16_NEON;
- if (IS_ALIGNED(dst_width_bytes, 16)) {
- InterpolateRow = InterpolateRow_16_NEON;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_16_MIPS_DSPR2)
- if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
- IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4) &&
- IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
- InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2;
- if (IS_ALIGNED(dst_width_bytes, 4)) {
- InterpolateRow = InterpolateRow_16_MIPS_DSPR2;
- }
- }
-#endif
- for (j = 0; j < dst_height; ++j) {
- int yi;
- int yf;
- if (y > max_y) {
- y = max_y;
- }
- yi = y >> 16;
- yf = filtering ? ((y >> 8) & 255) : 0;
- InterpolateRow(dst_argb, src_argb + yi * src_stride,
- src_stride, dst_width_words, yf);
- dst_argb += dst_stride;
- y += dy;
- }
-}
-
-// Simplify the filtering based on scale factors.
-enum FilterMode ScaleFilterReduce(int src_width, int src_height,
- int dst_width, int dst_height,
- enum FilterMode filtering) {
- if (src_width < 0) {
- src_width = -src_width;
- }
- if (src_height < 0) {
- src_height = -src_height;
- }
- if (filtering == kFilterBox) {
- // If scaling both axis to 0.5 or larger, switch from Box to Bilinear.
- if (dst_width * 2 >= src_width && dst_height * 2 >= src_height) {
- filtering = kFilterBilinear;
- }
- }
- if (filtering == kFilterBilinear) {
- if (src_height == 1) {
- filtering = kFilterLinear;
- }
- // TODO(fbarchard): Detect any odd scale factor and reduce to Linear.
- if (dst_height == src_height || dst_height * 3 == src_height) {
- filtering = kFilterLinear;
- }
- // TODO(fbarchard): Remove 1 pixel wide filter restriction, which is to
- // avoid reading 2 pixels horizontally that causes memory exception.
- if (src_width == 1) {
- filtering = kFilterNone;
- }
- }
- if (filtering == kFilterLinear) {
- if (src_width == 1) {
- filtering = kFilterNone;
- }
- // TODO(fbarchard): Detect any odd scale factor and reduce to None.
- if (dst_width == src_width || dst_width * 3 == src_width) {
- filtering = kFilterNone;
- }
- }
- return filtering;
-}
-
-// Divide num by div and return as 16.16 fixed point result.
-int FixedDiv_C(int num, int div) {
- return (int)(((int64)(num) << 16) / div);
-}
-
-// Divide num by div and return as 16.16 fixed point result.
-int FixedDiv1_C(int num, int div) {
- return (int)((((int64)(num) << 16) - 0x00010001) /
- (div - 1));
-}
-
-#define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s)
-
-// Compute slope values for stepping.
-void ScaleSlope(int src_width, int src_height,
- int dst_width, int dst_height,
- enum FilterMode filtering,
- int* x, int* y, int* dx, int* dy) {
- assert(x != NULL);
- assert(y != NULL);
- assert(dx != NULL);
- assert(dy != NULL);
- assert(src_width != 0);
- assert(src_height != 0);
- assert(dst_width > 0);
- assert(dst_height > 0);
- // Check for 1 pixel and avoid FixedDiv overflow.
- if (dst_width == 1 && src_width >= 32768) {
- dst_width = src_width;
- }
- if (dst_height == 1 && src_height >= 32768) {
- dst_height = src_height;
- }
- if (filtering == kFilterBox) {
- // Scale step for point sampling duplicates all pixels equally.
- *dx = FixedDiv(Abs(src_width), dst_width);
- *dy = FixedDiv(src_height, dst_height);
- *x = 0;
- *y = 0;
- } else if (filtering == kFilterBilinear) {
- // Scale step for bilinear sampling renders last pixel once for upsample.
- if (dst_width <= Abs(src_width)) {
- *dx = FixedDiv(Abs(src_width), dst_width);
- *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter.
- } else if (dst_width > 1) {
- *dx = FixedDiv1(Abs(src_width), dst_width);
- *x = 0;
- }
- if (dst_height <= src_height) {
- *dy = FixedDiv(src_height, dst_height);
- *y = CENTERSTART(*dy, -32768); // Subtract 0.5 (32768) to center filter.
- } else if (dst_height > 1) {
- *dy = FixedDiv1(src_height, dst_height);
- *y = 0;
- }
- } else if (filtering == kFilterLinear) {
- // Scale step for bilinear sampling renders last pixel once for upsample.
- if (dst_width <= Abs(src_width)) {
- *dx = FixedDiv(Abs(src_width), dst_width);
- *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter.
- } else if (dst_width > 1) {
- *dx = FixedDiv1(Abs(src_width), dst_width);
- *x = 0;
- }
- *dy = FixedDiv(src_height, dst_height);
- *y = *dy >> 1;
- } else {
- // Scale step for point sampling duplicates all pixels equally.
- *dx = FixedDiv(Abs(src_width), dst_width);
- *dy = FixedDiv(src_height, dst_height);
- *x = CENTERSTART(*dx, 0);
- *y = CENTERSTART(*dy, 0);
- }
- // Negative src_width means horizontally mirror.
- if (src_width < 0) {
- *x += (dst_width - 1) * *dx;
- *dx = -*dx;
- // src_width = -src_width; // Caller must do this.
- }
-}
-#undef CENTERSTART
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/third_party/aom/third_party/libyuv/source/scale_gcc.cc b/third_party/aom/third_party/libyuv/source/scale_gcc.cc
deleted file mode 100644
index 8a6ac5459..000000000
--- a/third_party/aom/third_party/libyuv/source/scale_gcc.cc
+++ /dev/null
@@ -1,1089 +0,0 @@
-/*
- * Copyright 2013 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// This module is for GCC x86 and x64.
-#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
-
-// Offsets for source bytes 0 to 9
-static uvec8 kShuf0 =
- { 0, 1, 3, 4, 5, 7, 8, 9, 128, 128, 128, 128, 128, 128, 128, 128 };
-
-// Offsets for source bytes 11 to 20 with 8 subtracted = 3 to 12.
-static uvec8 kShuf1 =
- { 3, 4, 5, 7, 8, 9, 11, 12, 128, 128, 128, 128, 128, 128, 128, 128 };
-
-// Offsets for source bytes 21 to 31 with 16 subtracted = 5 to 31.
-static uvec8 kShuf2 =
- { 5, 7, 8, 9, 11, 12, 13, 15, 128, 128, 128, 128, 128, 128, 128, 128 };
-
-// Offsets for source bytes 0 to 10
-static uvec8 kShuf01 =
- { 0, 1, 1, 2, 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10 };
-
-// Offsets for source bytes 10 to 21 with 8 subtracted = 3 to 13.
-static uvec8 kShuf11 =
- { 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13 };
-
-// Offsets for source bytes 21 to 31 with 16 subtracted = 5 to 31.
-static uvec8 kShuf21 =
- { 5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13, 13, 14, 14, 15 };
-
-// Coefficients for source bytes 0 to 10
-static uvec8 kMadd01 =
- { 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2 };
-
-// Coefficients for source bytes 10 to 21
-static uvec8 kMadd11 =
- { 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1 };
-
-// Coefficients for source bytes 21 to 31
-static uvec8 kMadd21 =
- { 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3 };
-
-// Coefficients for source bytes 21 to 31
-static vec16 kRound34 =
- { 2, 2, 2, 2, 2, 2, 2, 2 };
-
-static uvec8 kShuf38a =
- { 0, 3, 6, 8, 11, 14, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 };
-
-static uvec8 kShuf38b =
- { 128, 128, 128, 128, 128, 128, 0, 3, 6, 8, 11, 14, 128, 128, 128, 128 };
-
-// Arrange words 0,3,6 into 0,1,2
-static uvec8 kShufAc =
- { 0, 1, 6, 7, 12, 13, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 };
-
-// Arrange words 0,3,6 into 3,4,5
-static uvec8 kShufAc3 =
- { 128, 128, 128, 128, 128, 128, 0, 1, 6, 7, 12, 13, 128, 128, 128, 128 };
-
-// Scaling values for boxes of 3x3 and 2x3
-static uvec16 kScaleAc33 =
- { 65536 / 9, 65536 / 9, 65536 / 6, 65536 / 9, 65536 / 9, 65536 / 6, 0, 0 };
-
-// Arrange first value for pixels 0,1,2,3,4,5
-static uvec8 kShufAb0 =
- { 0, 128, 3, 128, 6, 128, 8, 128, 11, 128, 14, 128, 128, 128, 128, 128 };
-
-// Arrange second value for pixels 0,1,2,3,4,5
-static uvec8 kShufAb1 =
- { 1, 128, 4, 128, 7, 128, 9, 128, 12, 128, 15, 128, 128, 128, 128, 128 };
-
-// Arrange third value for pixels 0,1,2,3,4,5
-static uvec8 kShufAb2 =
- { 2, 128, 5, 128, 128, 128, 10, 128, 13, 128, 128, 128, 128, 128, 128, 128 };
-
-// Scaling values for boxes of 3x2 and 2x2
-static uvec16 kScaleAb2 =
- { 65536 / 3, 65536 / 3, 65536 / 2, 65536 / 3, 65536 / 3, 65536 / 2, 0, 0 };
-
-// GCC versions of row functions are verbatim conversions from Visual C.
-// Generated using gcc disassembly on Visual C object file:
-// objdump -D yuvscaler.obj >yuvscaler.txt
-
-void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- asm volatile (
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "psrlw $0x8,%%xmm0 \n"
- "psrlw $0x8,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- :: "memory", "cc", "xmm0", "xmm1"
- );
-}
-
-void ScaleRowDown2Linear_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psrlw $0x8,%%xmm5 \n"
-
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10, 0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "psrlw $0x8,%%xmm0 \n"
- "movdqa %%xmm1,%%xmm3 \n"
- "psrlw $0x8,%%xmm1 \n"
- "pand %%xmm5,%%xmm2 \n"
- "pand %%xmm5,%%xmm3 \n"
- "pavgw %%xmm2,%%xmm0 \n"
- "pavgw %%xmm3,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- :: "memory", "cc", "xmm0", "xmm1", "xmm5"
- );
-}
-
-void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psrlw $0x8,%%xmm5 \n"
-
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- MEMOPREG(movdqu,0x00,0,3,1,xmm2) // movdqu (%0,%3,1),%%xmm2
- MEMOPREG(movdqu,0x10,0,3,1,xmm3) // movdqu 0x10(%0,%3,1),%%xmm3
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "pavgb %%xmm2,%%xmm0 \n"
- "pavgb %%xmm3,%%xmm1 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "psrlw $0x8,%%xmm0 \n"
- "movdqa %%xmm1,%%xmm3 \n"
- "psrlw $0x8,%%xmm1 \n"
- "pand %%xmm5,%%xmm2 \n"
- "pand %%xmm5,%%xmm3 \n"
- "pavgw %%xmm2,%%xmm0 \n"
- "pavgw %%xmm3,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x10,%2 \n"
- "jg 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- : "r"((intptr_t)(src_stride)) // %3
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
- );
-}
-
-void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "psrld $0x18,%%xmm5 \n"
- "pslld $0x10,%%xmm5 \n"
-
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "pand %%xmm5,%%xmm0 \n"
- "pand %%xmm5,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "psrlw $0x8,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "movq %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $0x8,%2 \n"
- "jg 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- :: "memory", "cc", "xmm0", "xmm1", "xmm5"
- );
-}
-
-void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- intptr_t stridex3 = 0;
- asm volatile (
- "pcmpeqb %%xmm7,%%xmm7 \n"
- "psrlw $0x8,%%xmm7 \n"
- "lea " MEMLEA4(0x00,4,4,2) ",%3 \n"
-
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- MEMOPREG(movdqu,0x00,0,4,1,xmm2) // movdqu (%0,%4,1),%%xmm2
- MEMOPREG(movdqu,0x10,0,4,1,xmm3) // movdqu 0x10(%0,%4,1),%%xmm3
- "pavgb %%xmm2,%%xmm0 \n"
- "pavgb %%xmm3,%%xmm1 \n"
- MEMOPREG(movdqu,0x00,0,4,2,xmm2) // movdqu (%0,%4,2),%%xmm2
- MEMOPREG(movdqu,0x10,0,4,2,xmm3) // movdqu 0x10(%0,%4,2),%%xmm3
- MEMOPREG(movdqu,0x00,0,3,1,xmm4) // movdqu (%0,%3,1),%%xmm4
- MEMOPREG(movdqu,0x10,0,3,1,xmm5) // movdqu 0x10(%0,%3,1),%%xmm5
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "pavgb %%xmm4,%%xmm2 \n"
- "pavgb %%xmm2,%%xmm0 \n"
- "pavgb %%xmm5,%%xmm3 \n"
- "pavgb %%xmm3,%%xmm1 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "psrlw $0x8,%%xmm0 \n"
- "movdqa %%xmm1,%%xmm3 \n"
- "psrlw $0x8,%%xmm1 \n"
- "pand %%xmm7,%%xmm2 \n"
- "pand %%xmm7,%%xmm3 \n"
- "pavgw %%xmm2,%%xmm0 \n"
- "pavgw %%xmm3,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "psrlw $0x8,%%xmm0 \n"
- "pand %%xmm7,%%xmm2 \n"
- "pavgw %%xmm2,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "movq %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x8,1) ",%1 \n"
- "sub $0x8,%2 \n"
- "jg 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width), // %2
- "+r"(stridex3) // %3
- : "r"((intptr_t)(src_stride)) // %4
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm7"
- );
-}
-
-void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- asm volatile (
- "movdqa %0,%%xmm3 \n"
- "movdqa %1,%%xmm4 \n"
- "movdqa %2,%%xmm5 \n"
- :
- : "m"(kShuf0), // %0
- "m"(kShuf1), // %1
- "m"(kShuf2) // %2
- );
- asm volatile (
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm2 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "movdqa %%xmm2,%%xmm1 \n"
- "palignr $0x8,%%xmm0,%%xmm1 \n"
- "pshufb %%xmm3,%%xmm0 \n"
- "pshufb %%xmm4,%%xmm1 \n"
- "pshufb %%xmm5,%%xmm2 \n"
- "movq %%xmm0," MEMACCESS(1) " \n"
- "movq %%xmm1," MEMACCESS2(0x8,1) " \n"
- "movq %%xmm2," MEMACCESS2(0x10,1) " \n"
- "lea " MEMLEA(0x18,1) ",%1 \n"
- "sub $0x18,%2 \n"
- "jg 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- :: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
- );
-}
-
-void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- asm volatile (
- "movdqa %0,%%xmm2 \n" // kShuf01
- "movdqa %1,%%xmm3 \n" // kShuf11
- "movdqa %2,%%xmm4 \n" // kShuf21
- :
- : "m"(kShuf01), // %0
- "m"(kShuf11), // %1
- "m"(kShuf21) // %2
- );
- asm volatile (
- "movdqa %0,%%xmm5 \n" // kMadd01
- "movdqa %1,%%xmm0 \n" // kMadd11
- "movdqa %2,%%xmm1 \n" // kRound34
- :
- : "m"(kMadd01), // %0
- "m"(kMadd11), // %1
- "m"(kRound34) // %2
- );
- asm volatile (
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm6 \n"
- MEMOPREG(movdqu,0x00,0,3,1,xmm7) // movdqu (%0,%3),%%xmm7
- "pavgb %%xmm7,%%xmm6 \n"
- "pshufb %%xmm2,%%xmm6 \n"
- "pmaddubsw %%xmm5,%%xmm6 \n"
- "paddsw %%xmm1,%%xmm6 \n"
- "psrlw $0x2,%%xmm6 \n"
- "packuswb %%xmm6,%%xmm6 \n"
- "movq %%xmm6," MEMACCESS(1) " \n"
- "movdqu " MEMACCESS2(0x8,0) ",%%xmm6 \n"
- MEMOPREG(movdqu,0x8,0,3,1,xmm7) // movdqu 0x8(%0,%3),%%xmm7
- "pavgb %%xmm7,%%xmm6 \n"
- "pshufb %%xmm3,%%xmm6 \n"
- "pmaddubsw %%xmm0,%%xmm6 \n"
- "paddsw %%xmm1,%%xmm6 \n"
- "psrlw $0x2,%%xmm6 \n"
- "packuswb %%xmm6,%%xmm6 \n"
- "movq %%xmm6," MEMACCESS2(0x8,1) " \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm6 \n"
- MEMOPREG(movdqu,0x10,0,3,1,xmm7) // movdqu 0x10(%0,%3),%%xmm7
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "pavgb %%xmm7,%%xmm6 \n"
- "pshufb %%xmm4,%%xmm6 \n"
- "pmaddubsw %4,%%xmm6 \n"
- "paddsw %%xmm1,%%xmm6 \n"
- "psrlw $0x2,%%xmm6 \n"
- "packuswb %%xmm6,%%xmm6 \n"
- "movq %%xmm6," MEMACCESS2(0x10,1) " \n"
- "lea " MEMLEA(0x18,1) ",%1 \n"
- "sub $0x18,%2 \n"
- "jg 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- : "r"((intptr_t)(src_stride)), // %3
- "m"(kMadd21) // %4
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
- );
-}
-
-void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- asm volatile (
- "movdqa %0,%%xmm2 \n" // kShuf01
- "movdqa %1,%%xmm3 \n" // kShuf11
- "movdqa %2,%%xmm4 \n" // kShuf21
- :
- : "m"(kShuf01), // %0
- "m"(kShuf11), // %1
- "m"(kShuf21) // %2
- );
- asm volatile (
- "movdqa %0,%%xmm5 \n" // kMadd01
- "movdqa %1,%%xmm0 \n" // kMadd11
- "movdqa %2,%%xmm1 \n" // kRound34
- :
- : "m"(kMadd01), // %0
- "m"(kMadd11), // %1
- "m"(kRound34) // %2
- );
-
- asm volatile (
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm6 \n"
- MEMOPREG(movdqu,0x00,0,3,1,xmm7) // movdqu (%0,%3,1),%%xmm7
- "pavgb %%xmm6,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm6 \n"
- "pshufb %%xmm2,%%xmm6 \n"
- "pmaddubsw %%xmm5,%%xmm6 \n"
- "paddsw %%xmm1,%%xmm6 \n"
- "psrlw $0x2,%%xmm6 \n"
- "packuswb %%xmm6,%%xmm6 \n"
- "movq %%xmm6," MEMACCESS(1) " \n"
- "movdqu " MEMACCESS2(0x8,0) ",%%xmm6 \n"
- MEMOPREG(movdqu,0x8,0,3,1,xmm7) // movdqu 0x8(%0,%3,1),%%xmm7
- "pavgb %%xmm6,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm6 \n"
- "pshufb %%xmm3,%%xmm6 \n"
- "pmaddubsw %%xmm0,%%xmm6 \n"
- "paddsw %%xmm1,%%xmm6 \n"
- "psrlw $0x2,%%xmm6 \n"
- "packuswb %%xmm6,%%xmm6 \n"
- "movq %%xmm6," MEMACCESS2(0x8,1) " \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm6 \n"
- MEMOPREG(movdqu,0x10,0,3,1,xmm7) // movdqu 0x10(%0,%3,1),%%xmm7
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "pavgb %%xmm6,%%xmm7 \n"
- "pavgb %%xmm7,%%xmm6 \n"
- "pshufb %%xmm4,%%xmm6 \n"
- "pmaddubsw %4,%%xmm6 \n"
- "paddsw %%xmm1,%%xmm6 \n"
- "psrlw $0x2,%%xmm6 \n"
- "packuswb %%xmm6,%%xmm6 \n"
- "movq %%xmm6," MEMACCESS2(0x10,1) " \n"
- "lea " MEMLEA(0x18,1) ",%1 \n"
- "sub $0x18,%2 \n"
- "jg 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- : "r"((intptr_t)(src_stride)), // %3
- "m"(kMadd21) // %4
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
- );
-}
-
-void ScaleRowDown38_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- asm volatile (
- "movdqa %3,%%xmm4 \n"
- "movdqa %4,%%xmm5 \n"
-
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "pshufb %%xmm4,%%xmm0 \n"
- "pshufb %%xmm5,%%xmm1 \n"
- "paddusb %%xmm1,%%xmm0 \n"
- "movq %%xmm0," MEMACCESS(1) " \n"
- "movhlps %%xmm0,%%xmm1 \n"
- "movd %%xmm1," MEMACCESS2(0x8,1) " \n"
- "lea " MEMLEA(0xc,1) ",%1 \n"
- "sub $0xc,%2 \n"
- "jg 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- : "m"(kShuf38a), // %3
- "m"(kShuf38b) // %4
- : "memory", "cc", "xmm0", "xmm1", "xmm4", "xmm5"
- );
-}
-
-void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- asm volatile (
- "movdqa %0,%%xmm2 \n"
- "movdqa %1,%%xmm3 \n"
- "movdqa %2,%%xmm4 \n"
- "movdqa %3,%%xmm5 \n"
- :
- : "m"(kShufAb0), // %0
- "m"(kShufAb1), // %1
- "m"(kShufAb2), // %2
- "m"(kScaleAb2) // %3
- );
- asm volatile (
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- MEMOPREG(movdqu,0x00,0,3,1,xmm1) // movdqu (%0,%3,1),%%xmm1
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "pavgb %%xmm1,%%xmm0 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "pshufb %%xmm2,%%xmm1 \n"
- "movdqa %%xmm0,%%xmm6 \n"
- "pshufb %%xmm3,%%xmm6 \n"
- "paddusw %%xmm6,%%xmm1 \n"
- "pshufb %%xmm4,%%xmm0 \n"
- "paddusw %%xmm0,%%xmm1 \n"
- "pmulhuw %%xmm5,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm1 \n"
- "movd %%xmm1," MEMACCESS(1) " \n"
- "psrlq $0x10,%%xmm1 \n"
- "movd %%xmm1," MEMACCESS2(0x2,1) " \n"
- "lea " MEMLEA(0x6,1) ",%1 \n"
- "sub $0x6,%2 \n"
- "jg 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- : "r"((intptr_t)(src_stride)) // %3
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
- );
-}
-
-void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- asm volatile (
- "movdqa %0,%%xmm2 \n"
- "movdqa %1,%%xmm3 \n"
- "movdqa %2,%%xmm4 \n"
- "pxor %%xmm5,%%xmm5 \n"
- :
- : "m"(kShufAc), // %0
- "m"(kShufAc3), // %1
- "m"(kScaleAc33) // %2
- );
- asm volatile (
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- MEMOPREG(movdqu,0x00,0,3,1,xmm6) // movdqu (%0,%3,1),%%xmm6
- "movhlps %%xmm0,%%xmm1 \n"
- "movhlps %%xmm6,%%xmm7 \n"
- "punpcklbw %%xmm5,%%xmm0 \n"
- "punpcklbw %%xmm5,%%xmm1 \n"
- "punpcklbw %%xmm5,%%xmm6 \n"
- "punpcklbw %%xmm5,%%xmm7 \n"
- "paddusw %%xmm6,%%xmm0 \n"
- "paddusw %%xmm7,%%xmm1 \n"
- MEMOPREG(movdqu,0x00,0,3,2,xmm6) // movdqu (%0,%3,2),%%xmm6
- "lea " MEMLEA(0x10,0) ",%0 \n"
- "movhlps %%xmm6,%%xmm7 \n"
- "punpcklbw %%xmm5,%%xmm6 \n"
- "punpcklbw %%xmm5,%%xmm7 \n"
- "paddusw %%xmm6,%%xmm0 \n"
- "paddusw %%xmm7,%%xmm1 \n"
- "movdqa %%xmm0,%%xmm6 \n"
- "psrldq $0x2,%%xmm0 \n"
- "paddusw %%xmm0,%%xmm6 \n"
- "psrldq $0x2,%%xmm0 \n"
- "paddusw %%xmm0,%%xmm6 \n"
- "pshufb %%xmm2,%%xmm6 \n"
- "movdqa %%xmm1,%%xmm7 \n"
- "psrldq $0x2,%%xmm1 \n"
- "paddusw %%xmm1,%%xmm7 \n"
- "psrldq $0x2,%%xmm1 \n"
- "paddusw %%xmm1,%%xmm7 \n"
- "pshufb %%xmm3,%%xmm7 \n"
- "paddusw %%xmm7,%%xmm6 \n"
- "pmulhuw %%xmm4,%%xmm6 \n"
- "packuswb %%xmm6,%%xmm6 \n"
- "movd %%xmm6," MEMACCESS(1) " \n"
- "psrlq $0x10,%%xmm6 \n"
- "movd %%xmm6," MEMACCESS2(0x2,1) " \n"
- "lea " MEMLEA(0x6,1) ",%1 \n"
- "sub $0x6,%2 \n"
- "jg 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- : "r"((intptr_t)(src_stride)) // %3
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
- );
-}
-
-// Reads 16xN bytes and produces 16 shorts at a time.
-void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint16* dst_ptr, int src_width, int src_height) {
- int tmp_height = 0;
- intptr_t tmp_src = 0;
- asm volatile (
- "mov %0,%3 \n" // row pointer
- "mov %5,%2 \n" // height
- "pxor %%xmm0,%%xmm0 \n" // clear accumulators
- "pxor %%xmm1,%%xmm1 \n"
- "pxor %%xmm4,%%xmm4 \n"
-
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(3) ",%%xmm2 \n"
- "add %6,%3 \n"
- "movdqa %%xmm2,%%xmm3 \n"
- "punpcklbw %%xmm4,%%xmm2 \n"
- "punpckhbw %%xmm4,%%xmm3 \n"
- "paddusw %%xmm2,%%xmm0 \n"
- "paddusw %%xmm3,%%xmm1 \n"
- "sub $0x1,%2 \n"
- "jg 1b \n"
-
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n"
- "lea " MEMLEA(0x20,1) ",%1 \n"
- "lea " MEMLEA(0x10,0) ",%0 \n" // src_ptr += 16
- "mov %0,%3 \n" // row pointer
- "mov %5,%2 \n" // height
- "pxor %%xmm0,%%xmm0 \n" // clear accumulators
- "pxor %%xmm1,%%xmm1 \n"
- "sub $0x10,%4 \n"
- "jg 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(tmp_height), // %2
- "+r"(tmp_src), // %3
- "+r"(src_width), // %4
- "+rm"(src_height) // %5
- : "rm"((intptr_t)(src_stride)) // %6
- : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
- );
-}
-
-// Bilinear column filtering. SSSE3 version.
-void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
- int dst_width, int x, int dx) {
- intptr_t x0 = 0, x1 = 0, temp_pixel = 0;
- asm volatile (
- "movd %6,%%xmm2 \n"
- "movd %7,%%xmm3 \n"
- "movl $0x04040000,%k2 \n"
- "movd %k2,%%xmm5 \n"
- "pcmpeqb %%xmm6,%%xmm6 \n"
- "psrlw $0x9,%%xmm6 \n"
- "pextrw $0x1,%%xmm2,%k3 \n"
- "subl $0x2,%5 \n"
- "jl 29f \n"
- "movdqa %%xmm2,%%xmm0 \n"
- "paddd %%xmm3,%%xmm0 \n"
- "punpckldq %%xmm0,%%xmm2 \n"
- "punpckldq %%xmm3,%%xmm3 \n"
- "paddd %%xmm3,%%xmm3 \n"
- "pextrw $0x3,%%xmm2,%k4 \n"
-
- LABELALIGN
- "2: \n"
- "movdqa %%xmm2,%%xmm1 \n"
- "paddd %%xmm3,%%xmm2 \n"
- MEMOPARG(movzwl,0x00,1,3,1,k2) // movzwl (%1,%3,1),%k2
- "movd %k2,%%xmm0 \n"
- "psrlw $0x9,%%xmm1 \n"
- MEMOPARG(movzwl,0x00,1,4,1,k2) // movzwl (%1,%4,1),%k2
- "movd %k2,%%xmm4 \n"
- "pshufb %%xmm5,%%xmm1 \n"
- "punpcklwd %%xmm4,%%xmm0 \n"
- "pxor %%xmm6,%%xmm1 \n"
- "pmaddubsw %%xmm1,%%xmm0 \n"
- "pextrw $0x1,%%xmm2,%k3 \n"
- "pextrw $0x3,%%xmm2,%k4 \n"
- "psrlw $0x7,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "movd %%xmm0,%k2 \n"
- "mov %w2," MEMACCESS(0) " \n"
- "lea " MEMLEA(0x2,0) ",%0 \n"
- "sub $0x2,%5 \n"
- "jge 2b \n"
-
- LABELALIGN
- "29: \n"
- "addl $0x1,%5 \n"
- "jl 99f \n"
- MEMOPARG(movzwl,0x00,1,3,1,k2) // movzwl (%1,%3,1),%k2
- "movd %k2,%%xmm0 \n"
- "psrlw $0x9,%%xmm2 \n"
- "pshufb %%xmm5,%%xmm2 \n"
- "pxor %%xmm6,%%xmm2 \n"
- "pmaddubsw %%xmm2,%%xmm0 \n"
- "psrlw $0x7,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "movd %%xmm0,%k2 \n"
- "mov %b2," MEMACCESS(0) " \n"
- "99: \n"
- : "+r"(dst_ptr), // %0
- "+r"(src_ptr), // %1
- "+a"(temp_pixel), // %2
- "+r"(x0), // %3
- "+r"(x1), // %4
- "+rm"(dst_width) // %5
- : "rm"(x), // %6
- "rm"(dx) // %7
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
- );
-}
-
-// Reads 4 pixels, duplicates them and writes 8 pixels.
-// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.
-void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr,
- int dst_width, int x, int dx) {
- asm volatile (
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(1) ",%%xmm0 \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklbw %%xmm0,%%xmm0 \n"
- "punpckhbw %%xmm1,%%xmm1 \n"
- "movdqu %%xmm0," MEMACCESS(0) " \n"
- "movdqu %%xmm1," MEMACCESS2(0x10,0) " \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "sub $0x20,%2 \n"
- "jg 1b \n"
-
- : "+r"(dst_ptr), // %0
- "+r"(src_ptr), // %1
- "+r"(dst_width) // %2
- :: "memory", "cc", "xmm0", "xmm1"
- );
-}
-
-void ScaleARGBRowDown2_SSE2(const uint8* src_argb,
- ptrdiff_t src_stride,
- uint8* dst_argb, int dst_width) {
- asm volatile (
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "shufps $0xdd,%%xmm1,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x4,%2 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(dst_width) // %2
- :: "memory", "cc", "xmm0", "xmm1"
- );
-}
-
-void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb,
- ptrdiff_t src_stride,
- uint8* dst_argb, int dst_width) {
- asm volatile (
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "shufps $0x88,%%xmm1,%%xmm0 \n"
- "shufps $0xdd,%%xmm1,%%xmm2 \n"
- "pavgb %%xmm2,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x4,%2 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(dst_width) // %2
- :: "memory", "cc", "xmm0", "xmm1"
- );
-}
-
-void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb,
- ptrdiff_t src_stride,
- uint8* dst_argb, int dst_width) {
- asm volatile (
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(0) ",%%xmm0 \n"
- "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
- MEMOPREG(movdqu,0x00,0,3,1,xmm2) // movdqu (%0,%3,1),%%xmm2
- MEMOPREG(movdqu,0x10,0,3,1,xmm3) // movdqu 0x10(%0,%3,1),%%xmm3
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "pavgb %%xmm2,%%xmm0 \n"
- "pavgb %%xmm3,%%xmm1 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "shufps $0x88,%%xmm1,%%xmm0 \n"
- "shufps $0xdd,%%xmm1,%%xmm2 \n"
- "pavgb %%xmm2,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(1) " \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "sub $0x4,%2 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(dst_width) // %2
- : "r"((intptr_t)(src_stride)) // %3
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3"
- );
-}
-
-// Reads 4 pixels at a time.
-// Alignment requirement: dst_argb 16 byte aligned.
-void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
- int src_stepx, uint8* dst_argb, int dst_width) {
- intptr_t src_stepx_x4 = (intptr_t)(src_stepx);
- intptr_t src_stepx_x12 = 0;
- asm volatile (
- "lea " MEMLEA3(0x00,1,4) ",%1 \n"
- "lea " MEMLEA4(0x00,1,1,2) ",%4 \n"
- LABELALIGN
- "1: \n"
- "movd " MEMACCESS(0) ",%%xmm0 \n"
- MEMOPREG(movd,0x00,0,1,1,xmm1) // movd (%0,%1,1),%%xmm1
- "punpckldq %%xmm1,%%xmm0 \n"
- MEMOPREG(movd,0x00,0,1,2,xmm2) // movd (%0,%1,2),%%xmm2
- MEMOPREG(movd,0x00,0,4,1,xmm3) // movd (%0,%4,1),%%xmm3
- "lea " MEMLEA4(0x00,0,1,4) ",%0 \n"
- "punpckldq %%xmm3,%%xmm2 \n"
- "punpcklqdq %%xmm2,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x10,2) ",%2 \n"
- "sub $0x4,%3 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(src_stepx_x4), // %1
- "+r"(dst_argb), // %2
- "+r"(dst_width), // %3
- "+r"(src_stepx_x12) // %4
- :: "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3"
- );
-}
-
-// Blends four 2x2 to 4x1.
-// Alignment requirement: dst_argb 16 byte aligned.
-void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
- ptrdiff_t src_stride, int src_stepx,
- uint8* dst_argb, int dst_width) {
- intptr_t src_stepx_x4 = (intptr_t)(src_stepx);
- intptr_t src_stepx_x12 = 0;
- intptr_t row1 = (intptr_t)(src_stride);
- asm volatile (
- "lea " MEMLEA3(0x00,1,4) ",%1 \n"
- "lea " MEMLEA4(0x00,1,1,2) ",%4 \n"
- "lea " MEMLEA4(0x00,0,5,1) ",%5 \n"
-
- LABELALIGN
- "1: \n"
- "movq " MEMACCESS(0) ",%%xmm0 \n"
- MEMOPREG(movhps,0x00,0,1,1,xmm0) // movhps (%0,%1,1),%%xmm0
- MEMOPREG(movq,0x00,0,1,2,xmm1) // movq (%0,%1,2),%%xmm1
- MEMOPREG(movhps,0x00,0,4,1,xmm1) // movhps (%0,%4,1),%%xmm1
- "lea " MEMLEA4(0x00,0,1,4) ",%0 \n"
- "movq " MEMACCESS(5) ",%%xmm2 \n"
- MEMOPREG(movhps,0x00,5,1,1,xmm2) // movhps (%5,%1,1),%%xmm2
- MEMOPREG(movq,0x00,5,1,2,xmm3) // movq (%5,%1,2),%%xmm3
- MEMOPREG(movhps,0x00,5,4,1,xmm3) // movhps (%5,%4,1),%%xmm3
- "lea " MEMLEA4(0x00,5,1,4) ",%5 \n"
- "pavgb %%xmm2,%%xmm0 \n"
- "pavgb %%xmm3,%%xmm1 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "shufps $0x88,%%xmm1,%%xmm0 \n"
- "shufps $0xdd,%%xmm1,%%xmm2 \n"
- "pavgb %%xmm2,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x10,2) ",%2 \n"
- "sub $0x4,%3 \n"
- "jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(src_stepx_x4), // %1
- "+r"(dst_argb), // %2
- "+rm"(dst_width), // %3
- "+r"(src_stepx_x12), // %4
- "+r"(row1) // %5
- :: "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3"
- );
-}
-
-void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx) {
- intptr_t x0 = 0, x1 = 0;
- asm volatile (
- "movd %5,%%xmm2 \n"
- "movd %6,%%xmm3 \n"
- "pshufd $0x0,%%xmm2,%%xmm2 \n"
- "pshufd $0x11,%%xmm3,%%xmm0 \n"
- "paddd %%xmm0,%%xmm2 \n"
- "paddd %%xmm3,%%xmm3 \n"
- "pshufd $0x5,%%xmm3,%%xmm0 \n"
- "paddd %%xmm0,%%xmm2 \n"
- "paddd %%xmm3,%%xmm3 \n"
- "pshufd $0x0,%%xmm3,%%xmm3 \n"
- "pextrw $0x1,%%xmm2,%k0 \n"
- "pextrw $0x3,%%xmm2,%k1 \n"
- "cmp $0x0,%4 \n"
- "jl 99f \n"
- "sub $0x4,%4 \n"
- "jl 49f \n"
-
- LABELALIGN
- "40: \n"
- MEMOPREG(movd,0x00,3,0,4,xmm0) // movd (%3,%0,4),%%xmm0
- MEMOPREG(movd,0x00,3,1,4,xmm1) // movd (%3,%1,4),%%xmm1
- "pextrw $0x5,%%xmm2,%k0 \n"
- "pextrw $0x7,%%xmm2,%k1 \n"
- "paddd %%xmm3,%%xmm2 \n"
- "punpckldq %%xmm1,%%xmm0 \n"
- MEMOPREG(movd,0x00,3,0,4,xmm1) // movd (%3,%0,4),%%xmm1
- MEMOPREG(movd,0x00,3,1,4,xmm4) // movd (%3,%1,4),%%xmm4
- "pextrw $0x1,%%xmm2,%k0 \n"
- "pextrw $0x3,%%xmm2,%k1 \n"
- "punpckldq %%xmm4,%%xmm1 \n"
- "punpcklqdq %%xmm1,%%xmm0 \n"
- "movdqu %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x10,2) ",%2 \n"
- "sub $0x4,%4 \n"
- "jge 40b \n"
-
- "49: \n"
- "test $0x2,%4 \n"
- "je 29f \n"
- MEMOPREG(movd,0x00,3,0,4,xmm0) // movd (%3,%0,4),%%xmm0
- MEMOPREG(movd,0x00,3,1,4,xmm1) // movd (%3,%1,4),%%xmm1
- "pextrw $0x5,%%xmm2,%k0 \n"
- "punpckldq %%xmm1,%%xmm0 \n"
- "movq %%xmm0," MEMACCESS(2) " \n"
- "lea " MEMLEA(0x8,2) ",%2 \n"
- "29: \n"
- "test $0x1,%4 \n"
- "je 99f \n"
- MEMOPREG(movd,0x00,3,0,4,xmm0) // movd (%3,%0,4),%%xmm0
- "movd %%xmm0," MEMACCESS(2) " \n"
- "99: \n"
- : "+a"(x0), // %0
- "+d"(x1), // %1
- "+r"(dst_argb), // %2
- "+r"(src_argb), // %3
- "+r"(dst_width) // %4
- : "rm"(x), // %5
- "rm"(dx) // %6
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
- );
-}
-
-// Reads 4 pixels, duplicates them and writes 8 pixels.
-// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.
-void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx) {
- asm volatile (
- LABELALIGN
- "1: \n"
- "movdqu " MEMACCESS(1) ",%%xmm0 \n"
- "lea " MEMLEA(0x10,1) ",%1 \n"
- "movdqa %%xmm0,%%xmm1 \n"
- "punpckldq %%xmm0,%%xmm0 \n"
- "punpckhdq %%xmm1,%%xmm1 \n"
- "movdqu %%xmm0," MEMACCESS(0) " \n"
- "movdqu %%xmm1," MEMACCESS2(0x10,0) " \n"
- "lea " MEMLEA(0x20,0) ",%0 \n"
- "sub $0x8,%2 \n"
- "jg 1b \n"
-
- : "+r"(dst_argb), // %0
- "+r"(src_argb), // %1
- "+r"(dst_width) // %2
- :: "memory", "cc", NACL_R14
- "xmm0", "xmm1"
- );
-}
-
-// Shuffle table for arranging 2 pixels into pairs for pmaddubsw
-static uvec8 kShuffleColARGB = {
- 0u, 4u, 1u, 5u, 2u, 6u, 3u, 7u, // bbggrraa 1st pixel
- 8u, 12u, 9u, 13u, 10u, 14u, 11u, 15u // bbggrraa 2nd pixel
-};
-
-// Shuffle table for duplicating 2 fractions into 8 bytes each
-static uvec8 kShuffleFractions = {
- 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 4u, 4u, 4u, 4u, 4u, 4u, 4u, 4u,
-};
-
-// Bilinear row filtering combines 4x2 -> 4x1. SSSE3 version
-void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx) {
- intptr_t x0 = 0, x1 = 0;
- asm volatile (
- "movdqa %0,%%xmm4 \n"
- "movdqa %1,%%xmm5 \n"
- :
- : "m"(kShuffleColARGB), // %0
- "m"(kShuffleFractions) // %1
- );
-
- asm volatile (
- "movd %5,%%xmm2 \n"
- "movd %6,%%xmm3 \n"
- "pcmpeqb %%xmm6,%%xmm6 \n"
- "psrlw $0x9,%%xmm6 \n"
- "pextrw $0x1,%%xmm2,%k3 \n"
- "sub $0x2,%2 \n"
- "jl 29f \n"
- "movdqa %%xmm2,%%xmm0 \n"
- "paddd %%xmm3,%%xmm0 \n"
- "punpckldq %%xmm0,%%xmm2 \n"
- "punpckldq %%xmm3,%%xmm3 \n"
- "paddd %%xmm3,%%xmm3 \n"
- "pextrw $0x3,%%xmm2,%k4 \n"
-
- LABELALIGN
- "2: \n"
- "movdqa %%xmm2,%%xmm1 \n"
- "paddd %%xmm3,%%xmm2 \n"
- MEMOPREG(movq,0x00,1,3,4,xmm0) // movq (%1,%3,4),%%xmm0
- "psrlw $0x9,%%xmm1 \n"
- MEMOPREG(movhps,0x00,1,4,4,xmm0) // movhps (%1,%4,4),%%xmm0
- "pshufb %%xmm5,%%xmm1 \n"
- "pshufb %%xmm4,%%xmm0 \n"
- "pxor %%xmm6,%%xmm1 \n"
- "pmaddubsw %%xmm1,%%xmm0 \n"
- "psrlw $0x7,%%xmm0 \n"
- "pextrw $0x1,%%xmm2,%k3 \n"
- "pextrw $0x3,%%xmm2,%k4 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "movq %%xmm0," MEMACCESS(0) " \n"
- "lea " MEMLEA(0x8,0) ",%0 \n"
- "sub $0x2,%2 \n"
- "jge 2b \n"
-
- LABELALIGN
- "29: \n"
- "add $0x1,%2 \n"
- "jl 99f \n"
- "psrlw $0x9,%%xmm2 \n"
- MEMOPREG(movq,0x00,1,3,4,xmm0) // movq (%1,%3,4),%%xmm0
- "pshufb %%xmm5,%%xmm2 \n"
- "pshufb %%xmm4,%%xmm0 \n"
- "pxor %%xmm6,%%xmm2 \n"
- "pmaddubsw %%xmm2,%%xmm0 \n"
- "psrlw $0x7,%%xmm0 \n"
- "packuswb %%xmm0,%%xmm0 \n"
- "movd %%xmm0," MEMACCESS(0) " \n"
-
- LABELALIGN
- "99: \n"
- : "+r"(dst_argb), // %0
- "+r"(src_argb), // %1
- "+rm"(dst_width), // %2
- "+r"(x0), // %3
- "+r"(x1) // %4
- : "rm"(x), // %5
- "rm"(dx) // %6
- : "memory", "cc", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
- );
-}
-
-// Divide num by div and return as 16.16 fixed point result.
-int FixedDiv_X86(int num, int div) {
- asm volatile (
- "cdq \n"
- "shld $0x10,%%eax,%%edx \n"
- "shl $0x10,%%eax \n"
- "idiv %1 \n"
- "mov %0, %%eax \n"
- : "+a"(num) // %0
- : "c"(div) // %1
- : "memory", "cc", "edx"
- );
- return num;
-}
-
-// Divide num - 1 by div - 1 and return as 16.16 fixed point result.
-int FixedDiv1_X86(int num, int div) {
- asm volatile (
- "cdq \n"
- "shld $0x10,%%eax,%%edx \n"
- "shl $0x10,%%eax \n"
- "sub $0x10001,%%eax \n"
- "sbb $0x0,%%edx \n"
- "sub $0x1,%1 \n"
- "idiv %1 \n"
- "mov %0, %%eax \n"
- : "+a"(num) // %0
- : "c"(div) // %1
- : "memory", "cc", "edx"
- );
- return num;
-}
-
-#endif // defined(__x86_64__) || defined(__i386__)
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/third_party/aom/third_party/libyuv/source/scale_mips.cc b/third_party/aom/third_party/libyuv/source/scale_mips.cc
deleted file mode 100644
index 3eb4f27c4..000000000
--- a/third_party/aom/third_party/libyuv/source/scale_mips.cc
+++ /dev/null
@@ -1,654 +0,0 @@
-/*
- * Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/basic_types.h"
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// This module is for GCC MIPS DSPR2
-#if !defined(LIBYUV_DISABLE_MIPS) && \
- defined(__mips_dsp) && (__mips_dsp_rev >= 2) && \
- (_MIPS_SIM == _MIPS_SIM_ABI32)
-
-void ScaleRowDown2_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width) {
- __asm__ __volatile__(
- ".set push \n"
- ".set noreorder \n"
-
- "srl $t9, %[dst_width], 4 \n" // iterations -> by 16
- "beqz $t9, 2f \n"
- " nop \n"
-
- ".p2align 2 \n"
- "1: \n"
- "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
- "lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4|
- "lw $t2, 8(%[src_ptr]) \n" // |11|10|9|8|
- "lw $t3, 12(%[src_ptr]) \n" // |15|14|13|12|
- "lw $t4, 16(%[src_ptr]) \n" // |19|18|17|16|
- "lw $t5, 20(%[src_ptr]) \n" // |23|22|21|20|
- "lw $t6, 24(%[src_ptr]) \n" // |27|26|25|24|
- "lw $t7, 28(%[src_ptr]) \n" // |31|30|29|28|
- // TODO(fbarchard): Use odd pixels instead of even.
- "precr.qb.ph $t8, $t1, $t0 \n" // |6|4|2|0|
- "precr.qb.ph $t0, $t3, $t2 \n" // |14|12|10|8|
- "precr.qb.ph $t1, $t5, $t4 \n" // |22|20|18|16|
- "precr.qb.ph $t2, $t7, $t6 \n" // |30|28|26|24|
- "addiu %[src_ptr], %[src_ptr], 32 \n"
- "addiu $t9, $t9, -1 \n"
- "sw $t8, 0(%[dst]) \n"
- "sw $t0, 4(%[dst]) \n"
- "sw $t1, 8(%[dst]) \n"
- "sw $t2, 12(%[dst]) \n"
- "bgtz $t9, 1b \n"
- " addiu %[dst], %[dst], 16 \n"
-
- "2: \n"
- "andi $t9, %[dst_width], 0xf \n" // residue
- "beqz $t9, 3f \n"
- " nop \n"
-
- "21: \n"
- "lbu $t0, 0(%[src_ptr]) \n"
- "addiu %[src_ptr], %[src_ptr], 2 \n"
- "addiu $t9, $t9, -1 \n"
- "sb $t0, 0(%[dst]) \n"
- "bgtz $t9, 21b \n"
- " addiu %[dst], %[dst], 1 \n"
-
- "3: \n"
- ".set pop \n"
- : [src_ptr] "+r" (src_ptr),
- [dst] "+r" (dst)
- : [dst_width] "r" (dst_width)
- : "t0", "t1", "t2", "t3", "t4", "t5",
- "t6", "t7", "t8", "t9"
- );
-}
-
-void ScaleRowDown2Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width) {
- const uint8* t = src_ptr + src_stride;
-
- __asm__ __volatile__ (
- ".set push \n"
- ".set noreorder \n"
-
- "srl $t9, %[dst_width], 3 \n" // iterations -> step 8
- "bltz $t9, 2f \n"
- " nop \n"
-
- ".p2align 2 \n"
- "1: \n"
- "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
- "lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4|
- "lw $t2, 8(%[src_ptr]) \n" // |11|10|9|8|
- "lw $t3, 12(%[src_ptr]) \n" // |15|14|13|12|
- "lw $t4, 0(%[t]) \n" // |19|18|17|16|
- "lw $t5, 4(%[t]) \n" // |23|22|21|20|
- "lw $t6, 8(%[t]) \n" // |27|26|25|24|
- "lw $t7, 12(%[t]) \n" // |31|30|29|28|
- "addiu $t9, $t9, -1 \n"
- "srl $t8, $t0, 16 \n" // |X|X|3|2|
- "ins $t0, $t4, 16, 16 \n" // |17|16|1|0|
- "ins $t4, $t8, 0, 16 \n" // |19|18|3|2|
- "raddu.w.qb $t0, $t0 \n" // |17+16+1+0|
- "raddu.w.qb $t4, $t4 \n" // |19+18+3+2|
- "shra_r.w $t0, $t0, 2 \n" // |t0+2|>>2
- "shra_r.w $t4, $t4, 2 \n" // |t4+2|>>2
- "srl $t8, $t1, 16 \n" // |X|X|7|6|
- "ins $t1, $t5, 16, 16 \n" // |21|20|5|4|
- "ins $t5, $t8, 0, 16 \n" // |22|23|7|6|
- "raddu.w.qb $t1, $t1 \n" // |21+20+5+4|
- "raddu.w.qb $t5, $t5 \n" // |23+22+7+6|
- "shra_r.w $t1, $t1, 2 \n" // |t1+2|>>2
- "shra_r.w $t5, $t5, 2 \n" // |t5+2|>>2
- "srl $t8, $t2, 16 \n" // |X|X|11|10|
- "ins $t2, $t6, 16, 16 \n" // |25|24|9|8|
- "ins $t6, $t8, 0, 16 \n" // |27|26|11|10|
- "raddu.w.qb $t2, $t2 \n" // |25+24+9+8|
- "raddu.w.qb $t6, $t6 \n" // |27+26+11+10|
- "shra_r.w $t2, $t2, 2 \n" // |t2+2|>>2
- "shra_r.w $t6, $t6, 2 \n" // |t5+2|>>2
- "srl $t8, $t3, 16 \n" // |X|X|15|14|
- "ins $t3, $t7, 16, 16 \n" // |29|28|13|12|
- "ins $t7, $t8, 0, 16 \n" // |31|30|15|14|
- "raddu.w.qb $t3, $t3 \n" // |29+28+13+12|
- "raddu.w.qb $t7, $t7 \n" // |31+30+15+14|
- "shra_r.w $t3, $t3, 2 \n" // |t3+2|>>2
- "shra_r.w $t7, $t7, 2 \n" // |t7+2|>>2
- "addiu %[src_ptr], %[src_ptr], 16 \n"
- "addiu %[t], %[t], 16 \n"
- "sb $t0, 0(%[dst]) \n"
- "sb $t4, 1(%[dst]) \n"
- "sb $t1, 2(%[dst]) \n"
- "sb $t5, 3(%[dst]) \n"
- "sb $t2, 4(%[dst]) \n"
- "sb $t6, 5(%[dst]) \n"
- "sb $t3, 6(%[dst]) \n"
- "sb $t7, 7(%[dst]) \n"
- "bgtz $t9, 1b \n"
- " addiu %[dst], %[dst], 8 \n"
-
- "2: \n"
- "andi $t9, %[dst_width], 0x7 \n" // x = residue
- "beqz $t9, 3f \n"
- " nop \n"
-
- "21: \n"
- "lwr $t1, 0(%[src_ptr]) \n"
- "lwl $t1, 3(%[src_ptr]) \n"
- "lwr $t2, 0(%[t]) \n"
- "lwl $t2, 3(%[t]) \n"
- "srl $t8, $t1, 16 \n"
- "ins $t1, $t2, 16, 16 \n"
- "ins $t2, $t8, 0, 16 \n"
- "raddu.w.qb $t1, $t1 \n"
- "raddu.w.qb $t2, $t2 \n"
- "shra_r.w $t1, $t1, 2 \n"
- "shra_r.w $t2, $t2, 2 \n"
- "sb $t1, 0(%[dst]) \n"
- "sb $t2, 1(%[dst]) \n"
- "addiu %[src_ptr], %[src_ptr], 4 \n"
- "addiu $t9, $t9, -2 \n"
- "addiu %[t], %[t], 4 \n"
- "bgtz $t9, 21b \n"
- " addiu %[dst], %[dst], 2 \n"
-
- "3: \n"
- ".set pop \n"
-
- : [src_ptr] "+r" (src_ptr),
- [dst] "+r" (dst), [t] "+r" (t)
- : [dst_width] "r" (dst_width)
- : "t0", "t1", "t2", "t3", "t4", "t5",
- "t6", "t7", "t8", "t9"
- );
-}
-
-void ScaleRowDown4_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width) {
- __asm__ __volatile__ (
- ".set push \n"
- ".set noreorder \n"
-
- "srl $t9, %[dst_width], 3 \n"
- "beqz $t9, 2f \n"
- " nop \n"
-
- ".p2align 2 \n"
- "1: \n"
- "lw $t1, 0(%[src_ptr]) \n" // |3|2|1|0|
- "lw $t2, 4(%[src_ptr]) \n" // |7|6|5|4|
- "lw $t3, 8(%[src_ptr]) \n" // |11|10|9|8|
- "lw $t4, 12(%[src_ptr]) \n" // |15|14|13|12|
- "lw $t5, 16(%[src_ptr]) \n" // |19|18|17|16|
- "lw $t6, 20(%[src_ptr]) \n" // |23|22|21|20|
- "lw $t7, 24(%[src_ptr]) \n" // |27|26|25|24|
- "lw $t8, 28(%[src_ptr]) \n" // |31|30|29|28|
- "precr.qb.ph $t1, $t2, $t1 \n" // |6|4|2|0|
- "precr.qb.ph $t2, $t4, $t3 \n" // |14|12|10|8|
- "precr.qb.ph $t5, $t6, $t5 \n" // |22|20|18|16|
- "precr.qb.ph $t6, $t8, $t7 \n" // |30|28|26|24|
- "precr.qb.ph $t1, $t2, $t1 \n" // |12|8|4|0|
- "precr.qb.ph $t5, $t6, $t5 \n" // |28|24|20|16|
- "addiu %[src_ptr], %[src_ptr], 32 \n"
- "addiu $t9, $t9, -1 \n"
- "sw $t1, 0(%[dst]) \n"
- "sw $t5, 4(%[dst]) \n"
- "bgtz $t9, 1b \n"
- " addiu %[dst], %[dst], 8 \n"
-
- "2: \n"
- "andi $t9, %[dst_width], 7 \n" // residue
- "beqz $t9, 3f \n"
- " nop \n"
-
- "21: \n"
- "lbu $t1, 0(%[src_ptr]) \n"
- "addiu %[src_ptr], %[src_ptr], 4 \n"
- "addiu $t9, $t9, -1 \n"
- "sb $t1, 0(%[dst]) \n"
- "bgtz $t9, 21b \n"
- " addiu %[dst], %[dst], 1 \n"
-
- "3: \n"
- ".set pop \n"
- : [src_ptr] "+r" (src_ptr),
- [dst] "+r" (dst)
- : [dst_width] "r" (dst_width)
- : "t1", "t2", "t3", "t4", "t5",
- "t6", "t7", "t8", "t9"
- );
-}
-
-void ScaleRowDown4Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width) {
- intptr_t stride = src_stride;
- const uint8* s1 = src_ptr + stride;
- const uint8* s2 = s1 + stride;
- const uint8* s3 = s2 + stride;
-
- __asm__ __volatile__ (
- ".set push \n"
- ".set noreorder \n"
-
- "srl $t9, %[dst_width], 1 \n"
- "andi $t8, %[dst_width], 1 \n"
-
- ".p2align 2 \n"
- "1: \n"
- "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
- "lw $t1, 0(%[s1]) \n" // |7|6|5|4|
- "lw $t2, 0(%[s2]) \n" // |11|10|9|8|
- "lw $t3, 0(%[s3]) \n" // |15|14|13|12|
- "lw $t4, 4(%[src_ptr]) \n" // |19|18|17|16|
- "lw $t5, 4(%[s1]) \n" // |23|22|21|20|
- "lw $t6, 4(%[s2]) \n" // |27|26|25|24|
- "lw $t7, 4(%[s3]) \n" // |31|30|29|28|
- "raddu.w.qb $t0, $t0 \n" // |3 + 2 + 1 + 0|
- "raddu.w.qb $t1, $t1 \n" // |7 + 6 + 5 + 4|
- "raddu.w.qb $t2, $t2 \n" // |11 + 10 + 9 + 8|
- "raddu.w.qb $t3, $t3 \n" // |15 + 14 + 13 + 12|
- "raddu.w.qb $t4, $t4 \n" // |19 + 18 + 17 + 16|
- "raddu.w.qb $t5, $t5 \n" // |23 + 22 + 21 + 20|
- "raddu.w.qb $t6, $t6 \n" // |27 + 26 + 25 + 24|
- "raddu.w.qb $t7, $t7 \n" // |31 + 30 + 29 + 28|
- "add $t0, $t0, $t1 \n"
- "add $t1, $t2, $t3 \n"
- "add $t0, $t0, $t1 \n"
- "add $t4, $t4, $t5 \n"
- "add $t6, $t6, $t7 \n"
- "add $t4, $t4, $t6 \n"
- "shra_r.w $t0, $t0, 4 \n"
- "shra_r.w $t4, $t4, 4 \n"
- "sb $t0, 0(%[dst]) \n"
- "sb $t4, 1(%[dst]) \n"
- "addiu %[src_ptr], %[src_ptr], 8 \n"
- "addiu %[s1], %[s1], 8 \n"
- "addiu %[s2], %[s2], 8 \n"
- "addiu %[s3], %[s3], 8 \n"
- "addiu $t9, $t9, -1 \n"
- "bgtz $t9, 1b \n"
- " addiu %[dst], %[dst], 2 \n"
- "beqz $t8, 2f \n"
- " nop \n"
-
- "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
- "lw $t1, 0(%[s1]) \n" // |7|6|5|4|
- "lw $t2, 0(%[s2]) \n" // |11|10|9|8|
- "lw $t3, 0(%[s3]) \n" // |15|14|13|12|
- "raddu.w.qb $t0, $t0 \n" // |3 + 2 + 1 + 0|
- "raddu.w.qb $t1, $t1 \n" // |7 + 6 + 5 + 4|
- "raddu.w.qb $t2, $t2 \n" // |11 + 10 + 9 + 8|
- "raddu.w.qb $t3, $t3 \n" // |15 + 14 + 13 + 12|
- "add $t0, $t0, $t1 \n"
- "add $t1, $t2, $t3 \n"
- "add $t0, $t0, $t1 \n"
- "shra_r.w $t0, $t0, 4 \n"
- "sb $t0, 0(%[dst]) \n"
-
- "2: \n"
- ".set pop \n"
-
- : [src_ptr] "+r" (src_ptr),
- [dst] "+r" (dst),
- [s1] "+r" (s1),
- [s2] "+r" (s2),
- [s3] "+r" (s3)
- : [dst_width] "r" (dst_width)
- : "t0", "t1", "t2", "t3", "t4", "t5",
- "t6","t7", "t8", "t9"
- );
-}
-
-void ScaleRowDown34_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width) {
- __asm__ __volatile__ (
- ".set push \n"
- ".set noreorder \n"
- ".p2align 2 \n"
- "1: \n"
- "lw $t1, 0(%[src_ptr]) \n" // |3|2|1|0|
- "lw $t2, 4(%[src_ptr]) \n" // |7|6|5|4|
- "lw $t3, 8(%[src_ptr]) \n" // |11|10|9|8|
- "lw $t4, 12(%[src_ptr]) \n" // |15|14|13|12|
- "lw $t5, 16(%[src_ptr]) \n" // |19|18|17|16|
- "lw $t6, 20(%[src_ptr]) \n" // |23|22|21|20|
- "lw $t7, 24(%[src_ptr]) \n" // |27|26|25|24|
- "lw $t8, 28(%[src_ptr]) \n" // |31|30|29|28|
- "precrq.qb.ph $t0, $t2, $t4 \n" // |7|5|15|13|
- "precrq.qb.ph $t9, $t6, $t8 \n" // |23|21|31|30|
- "addiu %[dst_width], %[dst_width], -24 \n"
- "ins $t1, $t1, 8, 16 \n" // |3|1|0|X|
- "ins $t4, $t0, 8, 16 \n" // |X|15|13|12|
- "ins $t5, $t5, 8, 16 \n" // |19|17|16|X|
- "ins $t8, $t9, 8, 16 \n" // |X|31|29|28|
- "addiu %[src_ptr], %[src_ptr], 32 \n"
- "packrl.ph $t0, $t3, $t0 \n" // |9|8|7|5|
- "packrl.ph $t9, $t7, $t9 \n" // |25|24|23|21|
- "prepend $t1, $t2, 8 \n" // |4|3|1|0|
- "prepend $t3, $t4, 24 \n" // |15|13|12|11|
- "prepend $t5, $t6, 8 \n" // |20|19|17|16|
- "prepend $t7, $t8, 24 \n" // |31|29|28|27|
- "sw $t1, 0(%[dst]) \n"
- "sw $t0, 4(%[dst]) \n"
- "sw $t3, 8(%[dst]) \n"
- "sw $t5, 12(%[dst]) \n"
- "sw $t9, 16(%[dst]) \n"
- "sw $t7, 20(%[dst]) \n"
- "bnez %[dst_width], 1b \n"
- " addiu %[dst], %[dst], 24 \n"
- ".set pop \n"
- : [src_ptr] "+r" (src_ptr),
- [dst] "+r" (dst),
- [dst_width] "+r" (dst_width)
- :
- : "t0", "t1", "t2", "t3", "t4", "t5",
- "t6","t7", "t8", "t9"
- );
-}
-
-void ScaleRowDown34_0_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* d, int dst_width) {
- __asm__ __volatile__ (
- ".set push \n"
- ".set noreorder \n"
- "repl.ph $t3, 3 \n" // 0x00030003
-
- ".p2align 2 \n"
- "1: \n"
- "lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
- "lwx $t1, %[src_stride](%[src_ptr]) \n" // |T3|T2|T1|T0|
- "rotr $t2, $t0, 8 \n" // |S0|S3|S2|S1|
- "rotr $t6, $t1, 8 \n" // |T0|T3|T2|T1|
- "muleu_s.ph.qbl $t4, $t2, $t3 \n" // |S0*3|S3*3|
- "muleu_s.ph.qbl $t5, $t6, $t3 \n" // |T0*3|T3*3|
- "andi $t0, $t2, 0xFFFF \n" // |0|0|S2|S1|
- "andi $t1, $t6, 0xFFFF \n" // |0|0|T2|T1|
- "raddu.w.qb $t0, $t0 \n"
- "raddu.w.qb $t1, $t1 \n"
- "shra_r.w $t0, $t0, 1 \n"
- "shra_r.w $t1, $t1, 1 \n"
- "preceu.ph.qbr $t2, $t2 \n" // |0|S2|0|S1|
- "preceu.ph.qbr $t6, $t6 \n" // |0|T2|0|T1|
- "rotr $t2, $t2, 16 \n" // |0|S1|0|S2|
- "rotr $t6, $t6, 16 \n" // |0|T1|0|T2|
- "addu.ph $t2, $t2, $t4 \n"
- "addu.ph $t6, $t6, $t5 \n"
- "sll $t5, $t0, 1 \n"
- "add $t0, $t5, $t0 \n"
- "shra_r.ph $t2, $t2, 2 \n"
- "shra_r.ph $t6, $t6, 2 \n"
- "shll.ph $t4, $t2, 1 \n"
- "addq.ph $t4, $t4, $t2 \n"
- "addu $t0, $t0, $t1 \n"
- "addiu %[src_ptr], %[src_ptr], 4 \n"
- "shra_r.w $t0, $t0, 2 \n"
- "addu.ph $t6, $t6, $t4 \n"
- "shra_r.ph $t6, $t6, 2 \n"
- "srl $t1, $t6, 16 \n"
- "addiu %[dst_width], %[dst_width], -3 \n"
- "sb $t1, 0(%[d]) \n"
- "sb $t0, 1(%[d]) \n"
- "sb $t6, 2(%[d]) \n"
- "bgtz %[dst_width], 1b \n"
- " addiu %[d], %[d], 3 \n"
- "3: \n"
- ".set pop \n"
- : [src_ptr] "+r" (src_ptr),
- [src_stride] "+r" (src_stride),
- [d] "+r" (d),
- [dst_width] "+r" (dst_width)
- :
- : "t0", "t1", "t2", "t3",
- "t4", "t5", "t6"
- );
-}
-
-void ScaleRowDown34_1_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* d, int dst_width) {
- __asm__ __volatile__ (
- ".set push \n"
- ".set noreorder \n"
- "repl.ph $t2, 3 \n" // 0x00030003
-
- ".p2align 2 \n"
- "1: \n"
- "lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
- "lwx $t1, %[src_stride](%[src_ptr]) \n" // |T3|T2|T1|T0|
- "rotr $t4, $t0, 8 \n" // |S0|S3|S2|S1|
- "rotr $t6, $t1, 8 \n" // |T0|T3|T2|T1|
- "muleu_s.ph.qbl $t3, $t4, $t2 \n" // |S0*3|S3*3|
- "muleu_s.ph.qbl $t5, $t6, $t2 \n" // |T0*3|T3*3|
- "andi $t0, $t4, 0xFFFF \n" // |0|0|S2|S1|
- "andi $t1, $t6, 0xFFFF \n" // |0|0|T2|T1|
- "raddu.w.qb $t0, $t0 \n"
- "raddu.w.qb $t1, $t1 \n"
- "shra_r.w $t0, $t0, 1 \n"
- "shra_r.w $t1, $t1, 1 \n"
- "preceu.ph.qbr $t4, $t4 \n" // |0|S2|0|S1|
- "preceu.ph.qbr $t6, $t6 \n" // |0|T2|0|T1|
- "rotr $t4, $t4, 16 \n" // |0|S1|0|S2|
- "rotr $t6, $t6, 16 \n" // |0|T1|0|T2|
- "addu.ph $t4, $t4, $t3 \n"
- "addu.ph $t6, $t6, $t5 \n"
- "shra_r.ph $t6, $t6, 2 \n"
- "shra_r.ph $t4, $t4, 2 \n"
- "addu.ph $t6, $t6, $t4 \n"
- "addiu %[src_ptr], %[src_ptr], 4 \n"
- "shra_r.ph $t6, $t6, 1 \n"
- "addu $t0, $t0, $t1 \n"
- "addiu %[dst_width], %[dst_width], -3 \n"
- "shra_r.w $t0, $t0, 1 \n"
- "srl $t1, $t6, 16 \n"
- "sb $t1, 0(%[d]) \n"
- "sb $t0, 1(%[d]) \n"
- "sb $t6, 2(%[d]) \n"
- "bgtz %[dst_width], 1b \n"
- " addiu %[d], %[d], 3 \n"
- "3: \n"
- ".set pop \n"
- : [src_ptr] "+r" (src_ptr),
- [src_stride] "+r" (src_stride),
- [d] "+r" (d),
- [dst_width] "+r" (dst_width)
- :
- : "t0", "t1", "t2", "t3",
- "t4", "t5", "t6"
- );
-}
-
-void ScaleRowDown38_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width) {
- __asm__ __volatile__ (
- ".set push \n"
- ".set noreorder \n"
-
- ".p2align 2 \n"
- "1: \n"
- "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
- "lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4|
- "lw $t2, 8(%[src_ptr]) \n" // |11|10|9|8|
- "lw $t3, 12(%[src_ptr]) \n" // |15|14|13|12|
- "lw $t4, 16(%[src_ptr]) \n" // |19|18|17|16|
- "lw $t5, 20(%[src_ptr]) \n" // |23|22|21|20|
- "lw $t6, 24(%[src_ptr]) \n" // |27|26|25|24|
- "lw $t7, 28(%[src_ptr]) \n" // |31|30|29|28|
- "wsbh $t0, $t0 \n" // |2|3|0|1|
- "wsbh $t6, $t6 \n" // |26|27|24|25|
- "srl $t0, $t0, 8 \n" // |X|2|3|0|
- "srl $t3, $t3, 16 \n" // |X|X|15|14|
- "srl $t5, $t5, 16 \n" // |X|X|23|22|
- "srl $t7, $t7, 16 \n" // |X|X|31|30|
- "ins $t1, $t2, 24, 8 \n" // |8|6|5|4|
- "ins $t6, $t5, 0, 8 \n" // |26|27|24|22|
- "ins $t1, $t0, 0, 16 \n" // |8|6|3|0|
- "ins $t6, $t7, 24, 8 \n" // |30|27|24|22|
- "prepend $t2, $t3, 24 \n" // |X|15|14|11|
- "ins $t4, $t4, 16, 8 \n" // |19|16|17|X|
- "ins $t4, $t2, 0, 16 \n" // |19|16|14|11|
- "addiu %[src_ptr], %[src_ptr], 32 \n"
- "addiu %[dst_width], %[dst_width], -12 \n"
- "addiu $t8,%[dst_width], -12 \n"
- "sw $t1, 0(%[dst]) \n"
- "sw $t4, 4(%[dst]) \n"
- "sw $t6, 8(%[dst]) \n"
- "bgez $t8, 1b \n"
- " addiu %[dst], %[dst], 12 \n"
- ".set pop \n"
- : [src_ptr] "+r" (src_ptr),
- [dst] "+r" (dst),
- [dst_width] "+r" (dst_width)
- :
- : "t0", "t1", "t2", "t3", "t4",
- "t5", "t6", "t7", "t8"
- );
-}
-
-void ScaleRowDown38_2_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- intptr_t stride = src_stride;
- const uint8* t = src_ptr + stride;
- const int c = 0x2AAA;
-
- __asm__ __volatile__ (
- ".set push \n"
- ".set noreorder \n"
-
- ".p2align 2 \n"
- "1: \n"
- "lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
- "lw $t1, 4(%[src_ptr]) \n" // |S7|S6|S5|S4|
- "lw $t2, 0(%[t]) \n" // |T3|T2|T1|T0|
- "lw $t3, 4(%[t]) \n" // |T7|T6|T5|T4|
- "rotr $t1, $t1, 16 \n" // |S5|S4|S7|S6|
- "packrl.ph $t4, $t1, $t3 \n" // |S7|S6|T7|T6|
- "packrl.ph $t5, $t3, $t1 \n" // |T5|T4|S5|S4|
- "raddu.w.qb $t4, $t4 \n" // S7+S6+T7+T6
- "raddu.w.qb $t5, $t5 \n" // T5+T4+S5+S4
- "precrq.qb.ph $t6, $t0, $t2 \n" // |S3|S1|T3|T1|
- "precrq.qb.ph $t6, $t6, $t6 \n" // |S3|T3|S3|T3|
- "srl $t4, $t4, 2 \n" // t4 / 4
- "srl $t6, $t6, 16 \n" // |0|0|S3|T3|
- "raddu.w.qb $t6, $t6 \n" // 0+0+S3+T3
- "addu $t6, $t5, $t6 \n"
- "mul $t6, $t6, %[c] \n" // t6 * 0x2AAA
- "sll $t0, $t0, 8 \n" // |S2|S1|S0|0|
- "sll $t2, $t2, 8 \n" // |T2|T1|T0|0|
- "raddu.w.qb $t0, $t0 \n" // S2+S1+S0+0
- "raddu.w.qb $t2, $t2 \n" // T2+T1+T0+0
- "addu $t0, $t0, $t2 \n"
- "mul $t0, $t0, %[c] \n" // t0 * 0x2AAA
- "addiu %[src_ptr], %[src_ptr], 8 \n"
- "addiu %[t], %[t], 8 \n"
- "addiu %[dst_width], %[dst_width], -3 \n"
- "addiu %[dst_ptr], %[dst_ptr], 3 \n"
- "srl $t6, $t6, 16 \n"
- "srl $t0, $t0, 16 \n"
- "sb $t4, -1(%[dst_ptr]) \n"
- "sb $t6, -2(%[dst_ptr]) \n"
- "bgtz %[dst_width], 1b \n"
- " sb $t0, -3(%[dst_ptr]) \n"
- ".set pop \n"
- : [src_ptr] "+r" (src_ptr),
- [dst_ptr] "+r" (dst_ptr),
- [t] "+r" (t),
- [dst_width] "+r" (dst_width)
- : [c] "r" (c)
- : "t0", "t1", "t2", "t3", "t4", "t5", "t6"
- );
-}
-
-void ScaleRowDown38_3_Box_MIPS_DSPR2(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- intptr_t stride = src_stride;
- const uint8* s1 = src_ptr + stride;
- stride += stride;
- const uint8* s2 = src_ptr + stride;
- const int c1 = 0x1C71;
- const int c2 = 0x2AAA;
-
- __asm__ __volatile__ (
- ".set push \n"
- ".set noreorder \n"
-
- ".p2align 2 \n"
- "1: \n"
- "lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
- "lw $t1, 4(%[src_ptr]) \n" // |S7|S6|S5|S4|
- "lw $t2, 0(%[s1]) \n" // |T3|T2|T1|T0|
- "lw $t3, 4(%[s1]) \n" // |T7|T6|T5|T4|
- "lw $t4, 0(%[s2]) \n" // |R3|R2|R1|R0|
- "lw $t5, 4(%[s2]) \n" // |R7|R6|R5|R4|
- "rotr $t1, $t1, 16 \n" // |S5|S4|S7|S6|
- "packrl.ph $t6, $t1, $t3 \n" // |S7|S6|T7|T6|
- "raddu.w.qb $t6, $t6 \n" // S7+S6+T7+T6
- "packrl.ph $t7, $t3, $t1 \n" // |T5|T4|S5|S4|
- "raddu.w.qb $t7, $t7 \n" // T5+T4+S5+S4
- "sll $t8, $t5, 16 \n" // |R5|R4|0|0|
- "raddu.w.qb $t8, $t8 \n" // R5+R4
- "addu $t7, $t7, $t8 \n"
- "srl $t8, $t5, 16 \n" // |0|0|R7|R6|
- "raddu.w.qb $t8, $t8 \n" // R7 + R6
- "addu $t6, $t6, $t8 \n"
- "mul $t6, $t6, %[c2] \n" // t6 * 0x2AAA
- "precrq.qb.ph $t8, $t0, $t2 \n" // |S3|S1|T3|T1|
- "precrq.qb.ph $t8, $t8, $t4 \n" // |S3|T3|R3|R1|
- "srl $t8, $t8, 8 \n" // |0|S3|T3|R3|
- "raddu.w.qb $t8, $t8 \n" // S3 + T3 + R3
- "addu $t7, $t7, $t8 \n"
- "mul $t7, $t7, %[c1] \n" // t7 * 0x1C71
- "sll $t0, $t0, 8 \n" // |S2|S1|S0|0|
- "sll $t2, $t2, 8 \n" // |T2|T1|T0|0|
- "sll $t4, $t4, 8 \n" // |R2|R1|R0|0|
- "raddu.w.qb $t0, $t0 \n"
- "raddu.w.qb $t2, $t2 \n"
- "raddu.w.qb $t4, $t4 \n"
- "addu $t0, $t0, $t2 \n"
- "addu $t0, $t0, $t4 \n"
- "mul $t0, $t0, %[c1] \n" // t0 * 0x1C71
- "addiu %[src_ptr], %[src_ptr], 8 \n"
- "addiu %[s1], %[s1], 8 \n"
- "addiu %[s2], %[s2], 8 \n"
- "addiu %[dst_width], %[dst_width], -3 \n"
- "addiu %[dst_ptr], %[dst_ptr], 3 \n"
- "srl $t6, $t6, 16 \n"
- "srl $t7, $t7, 16 \n"
- "srl $t0, $t0, 16 \n"
- "sb $t6, -1(%[dst_ptr]) \n"
- "sb $t7, -2(%[dst_ptr]) \n"
- "bgtz %[dst_width], 1b \n"
- " sb $t0, -3(%[dst_ptr]) \n"
- ".set pop \n"
- : [src_ptr] "+r" (src_ptr),
- [dst_ptr] "+r" (dst_ptr),
- [s1] "+r" (s1),
- [s2] "+r" (s2),
- [dst_width] "+r" (dst_width)
- : [c1] "r" (c1), [c2] "r" (c2)
- : "t0", "t1", "t2", "t3", "t4",
- "t5", "t6", "t7", "t8"
- );
-}
-
-#endif // defined(__mips_dsp) && (__mips_dsp_rev >= 2)
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
-
diff --git a/third_party/aom/third_party/libyuv/source/scale_neon.cc b/third_party/aom/third_party/libyuv/source/scale_neon.cc
deleted file mode 100644
index 7825878e9..000000000
--- a/third_party/aom/third_party/libyuv/source/scale_neon.cc
+++ /dev/null
@@ -1,1037 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// This module is for GCC Neon.
-#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \
- !defined(__aarch64__)
-
-// NEON downscalers with interpolation.
-// Provided by Fritz Koenig
-
-// Read 32x1 throw away even pixels, and write 16x1.
-void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width) {
- asm volatile (
- ".p2align 2 \n"
- "1: \n"
- // load even pixels into q0, odd into q1
- MEMACCESS(0)
- "vld2.8 {q0, q1}, [%0]! \n"
- "subs %2, %2, #16 \n" // 16 processed per loop
- MEMACCESS(1)
- "vst1.8 {q1}, [%1]! \n" // store odd pixels
- "bgt 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst), // %1
- "+r"(dst_width) // %2
- :
- : "q0", "q1" // Clobber List
- );
-}
-
-// Read 32x1 average down and write 16x1.
-void ScaleRowDown2Linear_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width) {
- asm volatile (
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld1.8 {q0, q1}, [%0]! \n" // load pixels and post inc
- "subs %2, %2, #16 \n" // 16 processed per loop
- "vpaddl.u8 q0, q0 \n" // add adjacent
- "vpaddl.u8 q1, q1 \n"
- "vrshrn.u16 d0, q0, #1 \n" // downshift, round and pack
- "vrshrn.u16 d1, q1, #1 \n"
- MEMACCESS(1)
- "vst1.8 {q0}, [%1]! \n"
- "bgt 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst), // %1
- "+r"(dst_width) // %2
- :
- : "q0", "q1" // Clobber List
- );
-}
-
-// Read 32x2 average down and write 16x1.
-void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width) {
- asm volatile (
- // change the stride to row 2 pointer
- "add %1, %0 \n"
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld1.8 {q0, q1}, [%0]! \n" // load row 1 and post inc
- MEMACCESS(1)
- "vld1.8 {q2, q3}, [%1]! \n" // load row 2 and post inc
- "subs %3, %3, #16 \n" // 16 processed per loop
- "vpaddl.u8 q0, q0 \n" // row 1 add adjacent
- "vpaddl.u8 q1, q1 \n"
- "vpadal.u8 q0, q2 \n" // row 2 add adjacent + row1
- "vpadal.u8 q1, q3 \n"
- "vrshrn.u16 d0, q0, #2 \n" // downshift, round and pack
- "vrshrn.u16 d1, q1, #2 \n"
- MEMACCESS(2)
- "vst1.8 {q0}, [%2]! \n"
- "bgt 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(src_stride), // %1
- "+r"(dst), // %2
- "+r"(dst_width) // %3
- :
- : "q0", "q1", "q2", "q3" // Clobber List
- );
-}
-
-void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- asm volatile (
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
- "subs %2, %2, #8 \n" // 8 processed per loop
- MEMACCESS(1)
- "vst1.8 {d2}, [%1]! \n"
- "bgt 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- :
- : "q0", "q1", "memory", "cc"
- );
-}
-
-void ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- const uint8* src_ptr1 = src_ptr + src_stride;
- const uint8* src_ptr2 = src_ptr + src_stride * 2;
- const uint8* src_ptr3 = src_ptr + src_stride * 3;
-asm volatile (
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld1.8 {q0}, [%0]! \n" // load up 16x4
- MEMACCESS(3)
- "vld1.8 {q1}, [%3]! \n"
- MEMACCESS(4)
- "vld1.8 {q2}, [%4]! \n"
- MEMACCESS(5)
- "vld1.8 {q3}, [%5]! \n"
- "subs %2, %2, #4 \n"
- "vpaddl.u8 q0, q0 \n"
- "vpadal.u8 q0, q1 \n"
- "vpadal.u8 q0, q2 \n"
- "vpadal.u8 q0, q3 \n"
- "vpaddl.u16 q0, q0 \n"
- "vrshrn.u32 d0, q0, #4 \n" // divide by 16 w/rounding
- "vmovn.u16 d0, q0 \n"
- MEMACCESS(1)
- "vst1.32 {d0[0]}, [%1]! \n"
- "bgt 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width), // %2
- "+r"(src_ptr1), // %3
- "+r"(src_ptr2), // %4
- "+r"(src_ptr3) // %5
- :
- : "q0", "q1", "q2", "q3", "memory", "cc"
- );
-}
-
-// Down scale from 4 to 3 pixels. Use the neon multilane read/write
-// to load up the every 4th pixel into a 4 different registers.
-// Point samples 32 pixels to 24 pixels.
-void ScaleRowDown34_NEON(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- asm volatile (
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
- "subs %2, %2, #24 \n"
- "vmov d2, d3 \n" // order d0, d1, d2
- MEMACCESS(1)
- "vst3.8 {d0, d1, d2}, [%1]! \n"
- "bgt 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- :
- : "d0", "d1", "d2", "d3", "memory", "cc"
- );
-}
-
-void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- asm volatile (
- "vmov.u8 d24, #3 \n"
- "add %3, %0 \n"
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
- MEMACCESS(3)
- "vld4.8 {d4, d5, d6, d7}, [%3]! \n" // src line 1
- "subs %2, %2, #24 \n"
-
- // filter src line 0 with src line 1
- // expand chars to shorts to allow for room
- // when adding lines together
- "vmovl.u8 q8, d4 \n"
- "vmovl.u8 q9, d5 \n"
- "vmovl.u8 q10, d6 \n"
- "vmovl.u8 q11, d7 \n"
-
- // 3 * line_0 + line_1
- "vmlal.u8 q8, d0, d24 \n"
- "vmlal.u8 q9, d1, d24 \n"
- "vmlal.u8 q10, d2, d24 \n"
- "vmlal.u8 q11, d3, d24 \n"
-
- // (3 * line_0 + line_1) >> 2
- "vqrshrn.u16 d0, q8, #2 \n"
- "vqrshrn.u16 d1, q9, #2 \n"
- "vqrshrn.u16 d2, q10, #2 \n"
- "vqrshrn.u16 d3, q11, #2 \n"
-
- // a0 = (src[0] * 3 + s[1] * 1) >> 2
- "vmovl.u8 q8, d1 \n"
- "vmlal.u8 q8, d0, d24 \n"
- "vqrshrn.u16 d0, q8, #2 \n"
-
- // a1 = (src[1] * 1 + s[2] * 1) >> 1
- "vrhadd.u8 d1, d1, d2 \n"
-
- // a2 = (src[2] * 1 + s[3] * 3) >> 2
- "vmovl.u8 q8, d2 \n"
- "vmlal.u8 q8, d3, d24 \n"
- "vqrshrn.u16 d2, q8, #2 \n"
-
- MEMACCESS(1)
- "vst3.8 {d0, d1, d2}, [%1]! \n"
-
- "bgt 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width), // %2
- "+r"(src_stride) // %3
- :
- : "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "d24", "memory", "cc"
- );
-}
-
-void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- asm volatile (
- "vmov.u8 d24, #3 \n"
- "add %3, %0 \n"
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
- MEMACCESS(3)
- "vld4.8 {d4, d5, d6, d7}, [%3]! \n" // src line 1
- "subs %2, %2, #24 \n"
- // average src line 0 with src line 1
- "vrhadd.u8 q0, q0, q2 \n"
- "vrhadd.u8 q1, q1, q3 \n"
-
- // a0 = (src[0] * 3 + s[1] * 1) >> 2
- "vmovl.u8 q3, d1 \n"
- "vmlal.u8 q3, d0, d24 \n"
- "vqrshrn.u16 d0, q3, #2 \n"
-
- // a1 = (src[1] * 1 + s[2] * 1) >> 1
- "vrhadd.u8 d1, d1, d2 \n"
-
- // a2 = (src[2] * 1 + s[3] * 3) >> 2
- "vmovl.u8 q3, d2 \n"
- "vmlal.u8 q3, d3, d24 \n"
- "vqrshrn.u16 d2, q3, #2 \n"
-
- MEMACCESS(1)
- "vst3.8 {d0, d1, d2}, [%1]! \n"
- "bgt 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width), // %2
- "+r"(src_stride) // %3
- :
- : "r4", "q0", "q1", "q2", "q3", "d24", "memory", "cc"
- );
-}
-
-#define HAS_SCALEROWDOWN38_NEON
-static uvec8 kShuf38 =
- { 0, 3, 6, 8, 11, 14, 16, 19, 22, 24, 27, 30, 0, 0, 0, 0 };
-static uvec8 kShuf38_2 =
- { 0, 8, 16, 2, 10, 17, 4, 12, 18, 6, 14, 19, 0, 0, 0, 0 };
-static vec16 kMult38_Div6 =
- { 65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12,
- 65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12 };
-static vec16 kMult38_Div9 =
- { 65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18,
- 65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18 };
-
-// 32 -> 12
-void ScaleRowDown38_NEON(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- asm volatile (
- MEMACCESS(3)
- "vld1.8 {q3}, [%3] \n"
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld1.8 {d0, d1, d2, d3}, [%0]! \n"
- "subs %2, %2, #12 \n"
- "vtbl.u8 d4, {d0, d1, d2, d3}, d6 \n"
- "vtbl.u8 d5, {d0, d1, d2, d3}, d7 \n"
- MEMACCESS(1)
- "vst1.8 {d4}, [%1]! \n"
- MEMACCESS(1)
- "vst1.32 {d5[0]}, [%1]! \n"
- "bgt 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- : "r"(&kShuf38) // %3
- : "d0", "d1", "d2", "d3", "d4", "d5", "memory", "cc"
- );
-}
-
-// 32x3 -> 12x1
-void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- const uint8* src_ptr1 = src_ptr + src_stride * 2;
-
- asm volatile (
- MEMACCESS(5)
- "vld1.16 {q13}, [%5] \n"
- MEMACCESS(6)
- "vld1.8 {q14}, [%6] \n"
- MEMACCESS(7)
- "vld1.8 {q15}, [%7] \n"
- "add %3, %0 \n"
- ".p2align 2 \n"
- "1: \n"
-
- // d0 = 00 40 01 41 02 42 03 43
- // d1 = 10 50 11 51 12 52 13 53
- // d2 = 20 60 21 61 22 62 23 63
- // d3 = 30 70 31 71 32 72 33 73
- MEMACCESS(0)
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n"
- MEMACCESS(3)
- "vld4.8 {d4, d5, d6, d7}, [%3]! \n"
- MEMACCESS(4)
- "vld4.8 {d16, d17, d18, d19}, [%4]! \n"
- "subs %2, %2, #12 \n"
-
- // Shuffle the input data around to get align the data
- // so adjacent data can be added. 0,1 - 2,3 - 4,5 - 6,7
- // d0 = 00 10 01 11 02 12 03 13
- // d1 = 40 50 41 51 42 52 43 53
- "vtrn.u8 d0, d1 \n"
- "vtrn.u8 d4, d5 \n"
- "vtrn.u8 d16, d17 \n"
-
- // d2 = 20 30 21 31 22 32 23 33
- // d3 = 60 70 61 71 62 72 63 73
- "vtrn.u8 d2, d3 \n"
- "vtrn.u8 d6, d7 \n"
- "vtrn.u8 d18, d19 \n"
-
- // d0 = 00+10 01+11 02+12 03+13
- // d2 = 40+50 41+51 42+52 43+53
- "vpaddl.u8 q0, q0 \n"
- "vpaddl.u8 q2, q2 \n"
- "vpaddl.u8 q8, q8 \n"
-
- // d3 = 60+70 61+71 62+72 63+73
- "vpaddl.u8 d3, d3 \n"
- "vpaddl.u8 d7, d7 \n"
- "vpaddl.u8 d19, d19 \n"
-
- // combine source lines
- "vadd.u16 q0, q2 \n"
- "vadd.u16 q0, q8 \n"
- "vadd.u16 d4, d3, d7 \n"
- "vadd.u16 d4, d19 \n"
-
- // dst_ptr[3] = (s[6 + st * 0] + s[7 + st * 0]
- // + s[6 + st * 1] + s[7 + st * 1]
- // + s[6 + st * 2] + s[7 + st * 2]) / 6
- "vqrdmulh.s16 q2, q2, q13 \n"
- "vmovn.u16 d4, q2 \n"
-
- // Shuffle 2,3 reg around so that 2 can be added to the
- // 0,1 reg and 3 can be added to the 4,5 reg. This
- // requires expanding from u8 to u16 as the 0,1 and 4,5
- // registers are already expanded. Then do transposes
- // to get aligned.
- // q2 = xx 20 xx 30 xx 21 xx 31 xx 22 xx 32 xx 23 xx 33
- "vmovl.u8 q1, d2 \n"
- "vmovl.u8 q3, d6 \n"
- "vmovl.u8 q9, d18 \n"
-
- // combine source lines
- "vadd.u16 q1, q3 \n"
- "vadd.u16 q1, q9 \n"
-
- // d4 = xx 20 xx 30 xx 22 xx 32
- // d5 = xx 21 xx 31 xx 23 xx 33
- "vtrn.u32 d2, d3 \n"
-
- // d4 = xx 20 xx 21 xx 22 xx 23
- // d5 = xx 30 xx 31 xx 32 xx 33
- "vtrn.u16 d2, d3 \n"
-
- // 0+1+2, 3+4+5
- "vadd.u16 q0, q1 \n"
-
- // Need to divide, but can't downshift as the the value
- // isn't a power of 2. So multiply by 65536 / n
- // and take the upper 16 bits.
- "vqrdmulh.s16 q0, q0, q15 \n"
-
- // Align for table lookup, vtbl requires registers to
- // be adjacent
- "vmov.u8 d2, d4 \n"
-
- "vtbl.u8 d3, {d0, d1, d2}, d28 \n"
- "vtbl.u8 d4, {d0, d1, d2}, d29 \n"
-
- MEMACCESS(1)
- "vst1.8 {d3}, [%1]! \n"
- MEMACCESS(1)
- "vst1.32 {d4[0]}, [%1]! \n"
- "bgt 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width), // %2
- "+r"(src_stride), // %3
- "+r"(src_ptr1) // %4
- : "r"(&kMult38_Div6), // %5
- "r"(&kShuf38_2), // %6
- "r"(&kMult38_Div9) // %7
- : "q0", "q1", "q2", "q3", "q8", "q9", "q13", "q14", "q15", "memory", "cc"
- );
-}
-
-// 32x2 -> 12x1
-void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- asm volatile (
- MEMACCESS(4)
- "vld1.16 {q13}, [%4] \n"
- MEMACCESS(5)
- "vld1.8 {q14}, [%5] \n"
- "add %3, %0 \n"
- ".p2align 2 \n"
- "1: \n"
-
- // d0 = 00 40 01 41 02 42 03 43
- // d1 = 10 50 11 51 12 52 13 53
- // d2 = 20 60 21 61 22 62 23 63
- // d3 = 30 70 31 71 32 72 33 73
- MEMACCESS(0)
- "vld4.8 {d0, d1, d2, d3}, [%0]! \n"
- MEMACCESS(3)
- "vld4.8 {d4, d5, d6, d7}, [%3]! \n"
- "subs %2, %2, #12 \n"
-
- // Shuffle the input data around to get align the data
- // so adjacent data can be added. 0,1 - 2,3 - 4,5 - 6,7
- // d0 = 00 10 01 11 02 12 03 13
- // d1 = 40 50 41 51 42 52 43 53
- "vtrn.u8 d0, d1 \n"
- "vtrn.u8 d4, d5 \n"
-
- // d2 = 20 30 21 31 22 32 23 33
- // d3 = 60 70 61 71 62 72 63 73
- "vtrn.u8 d2, d3 \n"
- "vtrn.u8 d6, d7 \n"
-
- // d0 = 00+10 01+11 02+12 03+13
- // d2 = 40+50 41+51 42+52 43+53
- "vpaddl.u8 q0, q0 \n"
- "vpaddl.u8 q2, q2 \n"
-
- // d3 = 60+70 61+71 62+72 63+73
- "vpaddl.u8 d3, d3 \n"
- "vpaddl.u8 d7, d7 \n"
-
- // combine source lines
- "vadd.u16 q0, q2 \n"
- "vadd.u16 d4, d3, d7 \n"
-
- // dst_ptr[3] = (s[6] + s[7] + s[6+st] + s[7+st]) / 4
- "vqrshrn.u16 d4, q2, #2 \n"
-
- // Shuffle 2,3 reg around so that 2 can be added to the
- // 0,1 reg and 3 can be added to the 4,5 reg. This
- // requires expanding from u8 to u16 as the 0,1 and 4,5
- // registers are already expanded. Then do transposes
- // to get aligned.
- // q2 = xx 20 xx 30 xx 21 xx 31 xx 22 xx 32 xx 23 xx 33
- "vmovl.u8 q1, d2 \n"
- "vmovl.u8 q3, d6 \n"
-
- // combine source lines
- "vadd.u16 q1, q3 \n"
-
- // d4 = xx 20 xx 30 xx 22 xx 32
- // d5 = xx 21 xx 31 xx 23 xx 33
- "vtrn.u32 d2, d3 \n"
-
- // d4 = xx 20 xx 21 xx 22 xx 23
- // d5 = xx 30 xx 31 xx 32 xx 33
- "vtrn.u16 d2, d3 \n"
-
- // 0+1+2, 3+4+5
- "vadd.u16 q0, q1 \n"
-
- // Need to divide, but can't downshift as the the value
- // isn't a power of 2. So multiply by 65536 / n
- // and take the upper 16 bits.
- "vqrdmulh.s16 q0, q0, q13 \n"
-
- // Align for table lookup, vtbl requires registers to
- // be adjacent
- "vmov.u8 d2, d4 \n"
-
- "vtbl.u8 d3, {d0, d1, d2}, d28 \n"
- "vtbl.u8 d4, {d0, d1, d2}, d29 \n"
-
- MEMACCESS(1)
- "vst1.8 {d3}, [%1]! \n"
- MEMACCESS(1)
- "vst1.32 {d4[0]}, [%1]! \n"
- "bgt 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width), // %2
- "+r"(src_stride) // %3
- : "r"(&kMult38_Div6), // %4
- "r"(&kShuf38_2) // %5
- : "q0", "q1", "q2", "q3", "q13", "q14", "memory", "cc"
- );
-}
-
-void ScaleAddRows_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint16* dst_ptr, int src_width, int src_height) {
- const uint8* src_tmp = NULL;
- asm volatile (
- ".p2align 2 \n"
- "1: \n"
- "mov %0, %1 \n"
- "mov r12, %5 \n"
- "veor q2, q2, q2 \n"
- "veor q3, q3, q3 \n"
- "2: \n"
- // load 16 pixels into q0
- MEMACCESS(0)
- "vld1.8 {q0}, [%0], %3 \n"
- "vaddw.u8 q3, q3, d1 \n"
- "vaddw.u8 q2, q2, d0 \n"
- "subs r12, r12, #1 \n"
- "bgt 2b \n"
- MEMACCESS(2)
- "vst1.16 {q2, q3}, [%2]! \n" // store pixels
- "add %1, %1, #16 \n"
- "subs %4, %4, #16 \n" // 16 processed per loop
- "bgt 1b \n"
- : "+r"(src_tmp), // %0
- "+r"(src_ptr), // %1
- "+r"(dst_ptr), // %2
- "+r"(src_stride), // %3
- "+r"(src_width), // %4
- "+r"(src_height) // %5
- :
- : "memory", "cc", "r12", "q0", "q1", "q2", "q3" // Clobber List
- );
-}
-
-// TODO(Yang Zhang): Investigate less load instructions for
-// the x/dx stepping
-#define LOAD2_DATA8_LANE(n) \
- "lsr %5, %3, #16 \n" \
- "add %6, %1, %5 \n" \
- "add %3, %3, %4 \n" \
- MEMACCESS(6) \
- "vld2.8 {d6["#n"], d7["#n"]}, [%6] \n"
-
-void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr,
- int dst_width, int x, int dx) {
- int dx_offset[4] = {0, 1, 2, 3};
- int* tmp = dx_offset;
- const uint8* src_tmp = src_ptr;
- asm volatile (
- ".p2align 2 \n"
- "vdup.32 q0, %3 \n" // x
- "vdup.32 q1, %4 \n" // dx
- "vld1.32 {q2}, [%5] \n" // 0 1 2 3
- "vshl.i32 q3, q1, #2 \n" // 4 * dx
- "vmul.s32 q1, q1, q2 \n"
- // x , x + 1 * dx, x + 2 * dx, x + 3 * dx
- "vadd.s32 q1, q1, q0 \n"
- // x + 4 * dx, x + 5 * dx, x + 6 * dx, x + 7 * dx
- "vadd.s32 q2, q1, q3 \n"
- "vshl.i32 q0, q3, #1 \n" // 8 * dx
- "1: \n"
- LOAD2_DATA8_LANE(0)
- LOAD2_DATA8_LANE(1)
- LOAD2_DATA8_LANE(2)
- LOAD2_DATA8_LANE(3)
- LOAD2_DATA8_LANE(4)
- LOAD2_DATA8_LANE(5)
- LOAD2_DATA8_LANE(6)
- LOAD2_DATA8_LANE(7)
- "vmov q10, q1 \n"
- "vmov q11, q2 \n"
- "vuzp.16 q10, q11 \n"
- "vmovl.u8 q8, d6 \n"
- "vmovl.u8 q9, d7 \n"
- "vsubl.s16 q11, d18, d16 \n"
- "vsubl.s16 q12, d19, d17 \n"
- "vmovl.u16 q13, d20 \n"
- "vmovl.u16 q10, d21 \n"
- "vmul.s32 q11, q11, q13 \n"
- "vmul.s32 q12, q12, q10 \n"
- "vshrn.s32 d18, q11, #16 \n"
- "vshrn.s32 d19, q12, #16 \n"
- "vadd.s16 q8, q8, q9 \n"
- "vmovn.s16 d6, q8 \n"
-
- MEMACCESS(0)
- "vst1.8 {d6}, [%0]! \n" // store pixels
- "vadd.s32 q1, q1, q0 \n"
- "vadd.s32 q2, q2, q0 \n"
- "subs %2, %2, #8 \n" // 8 processed per loop
- "bgt 1b \n"
- : "+r"(dst_ptr), // %0
- "+r"(src_ptr), // %1
- "+r"(dst_width), // %2
- "+r"(x), // %3
- "+r"(dx), // %4
- "+r"(tmp), // %5
- "+r"(src_tmp) // %6
- :
- : "memory", "cc", "q0", "q1", "q2", "q3",
- "q8", "q9", "q10", "q11", "q12", "q13"
- );
-}
-
-#undef LOAD2_DATA8_LANE
-
-// 16x2 -> 16x1
-void ScaleFilterRows_NEON(uint8* dst_ptr,
- const uint8* src_ptr, ptrdiff_t src_stride,
- int dst_width, int source_y_fraction) {
- asm volatile (
- "cmp %4, #0 \n"
- "beq 100f \n"
- "add %2, %1 \n"
- "cmp %4, #64 \n"
- "beq 75f \n"
- "cmp %4, #128 \n"
- "beq 50f \n"
- "cmp %4, #192 \n"
- "beq 25f \n"
-
- "vdup.8 d5, %4 \n"
- "rsb %4, #256 \n"
- "vdup.8 d4, %4 \n"
- // General purpose row blend.
- "1: \n"
- MEMACCESS(1)
- "vld1.8 {q0}, [%1]! \n"
- MEMACCESS(2)
- "vld1.8 {q1}, [%2]! \n"
- "subs %3, %3, #16 \n"
- "vmull.u8 q13, d0, d4 \n"
- "vmull.u8 q14, d1, d4 \n"
- "vmlal.u8 q13, d2, d5 \n"
- "vmlal.u8 q14, d3, d5 \n"
- "vrshrn.u16 d0, q13, #8 \n"
- "vrshrn.u16 d1, q14, #8 \n"
- MEMACCESS(0)
- "vst1.8 {q0}, [%0]! \n"
- "bgt 1b \n"
- "b 99f \n"
-
- // Blend 25 / 75.
- "25: \n"
- MEMACCESS(1)
- "vld1.8 {q0}, [%1]! \n"
- MEMACCESS(2)
- "vld1.8 {q1}, [%2]! \n"
- "subs %3, %3, #16 \n"
- "vrhadd.u8 q0, q1 \n"
- "vrhadd.u8 q0, q1 \n"
- MEMACCESS(0)
- "vst1.8 {q0}, [%0]! \n"
- "bgt 25b \n"
- "b 99f \n"
-
- // Blend 50 / 50.
- "50: \n"
- MEMACCESS(1)
- "vld1.8 {q0}, [%1]! \n"
- MEMACCESS(2)
- "vld1.8 {q1}, [%2]! \n"
- "subs %3, %3, #16 \n"
- "vrhadd.u8 q0, q1 \n"
- MEMACCESS(0)
- "vst1.8 {q0}, [%0]! \n"
- "bgt 50b \n"
- "b 99f \n"
-
- // Blend 75 / 25.
- "75: \n"
- MEMACCESS(1)
- "vld1.8 {q1}, [%1]! \n"
- MEMACCESS(2)
- "vld1.8 {q0}, [%2]! \n"
- "subs %3, %3, #16 \n"
- "vrhadd.u8 q0, q1 \n"
- "vrhadd.u8 q0, q1 \n"
- MEMACCESS(0)
- "vst1.8 {q0}, [%0]! \n"
- "bgt 75b \n"
- "b 99f \n"
-
- // Blend 100 / 0 - Copy row unchanged.
- "100: \n"
- MEMACCESS(1)
- "vld1.8 {q0}, [%1]! \n"
- "subs %3, %3, #16 \n"
- MEMACCESS(0)
- "vst1.8 {q0}, [%0]! \n"
- "bgt 100b \n"
-
- "99: \n"
- MEMACCESS(0)
- "vst1.8 {d1[7]}, [%0] \n"
- : "+r"(dst_ptr), // %0
- "+r"(src_ptr), // %1
- "+r"(src_stride), // %2
- "+r"(dst_width), // %3
- "+r"(source_y_fraction) // %4
- :
- : "q0", "q1", "d4", "d5", "q13", "q14", "memory", "cc"
- );
-}
-
-void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width) {
- asm volatile (
- ".p2align 2 \n"
- "1: \n"
- // load even pixels into q0, odd into q1
- MEMACCESS(0)
- "vld2.32 {q0, q1}, [%0]! \n"
- MEMACCESS(0)
- "vld2.32 {q2, q3}, [%0]! \n"
- "subs %2, %2, #8 \n" // 8 processed per loop
- MEMACCESS(1)
- "vst1.8 {q1}, [%1]! \n" // store odd pixels
- MEMACCESS(1)
- "vst1.8 {q3}, [%1]! \n"
- "bgt 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst), // %1
- "+r"(dst_width) // %2
- :
- : "memory", "cc", "q0", "q1", "q2", "q3" // Clobber List
- );
-}
-
-void ScaleARGBRowDown2Linear_NEON(const uint8* src_argb, ptrdiff_t src_stride,
- uint8* dst_argb, int dst_width) {
- asm volatile (
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
- MEMACCESS(0)
- "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels.
- "subs %2, %2, #8 \n" // 8 processed per loop
- "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts.
- "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
- "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts.
- "vpaddl.u8 q3, q3 \n" // A 16 bytes -> 8 shorts.
- "vrshrn.u16 d0, q0, #1 \n" // downshift, round and pack
- "vrshrn.u16 d1, q1, #1 \n"
- "vrshrn.u16 d2, q2, #1 \n"
- "vrshrn.u16 d3, q3, #1 \n"
- MEMACCESS(1)
- "vst4.8 {d0, d1, d2, d3}, [%1]! \n"
- "bgt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(dst_width) // %2
- :
- : "memory", "cc", "q0", "q1", "q2", "q3" // Clobber List
- );
-}
-
-void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width) {
- asm volatile (
- // change the stride to row 2 pointer
- "add %1, %1, %0 \n"
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
- MEMACCESS(0)
- "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels.
- "subs %3, %3, #8 \n" // 8 processed per loop.
- "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts.
- "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
- "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts.
- "vpaddl.u8 q3, q3 \n" // A 16 bytes -> 8 shorts.
- MEMACCESS(1)
- "vld4.8 {d16, d18, d20, d22}, [%1]! \n" // load 8 more ARGB pixels.
- MEMACCESS(1)
- "vld4.8 {d17, d19, d21, d23}, [%1]! \n" // load last 8 ARGB pixels.
- "vpadal.u8 q0, q8 \n" // B 16 bytes -> 8 shorts.
- "vpadal.u8 q1, q9 \n" // G 16 bytes -> 8 shorts.
- "vpadal.u8 q2, q10 \n" // R 16 bytes -> 8 shorts.
- "vpadal.u8 q3, q11 \n" // A 16 bytes -> 8 shorts.
- "vrshrn.u16 d0, q0, #2 \n" // downshift, round and pack
- "vrshrn.u16 d1, q1, #2 \n"
- "vrshrn.u16 d2, q2, #2 \n"
- "vrshrn.u16 d3, q3, #2 \n"
- MEMACCESS(2)
- "vst4.8 {d0, d1, d2, d3}, [%2]! \n"
- "bgt 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(src_stride), // %1
- "+r"(dst), // %2
- "+r"(dst_width) // %3
- :
- : "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"
- );
-}
-
-// Reads 4 pixels at a time.
-// Alignment requirement: src_argb 4 byte aligned.
-void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride,
- int src_stepx, uint8* dst_argb, int dst_width) {
- asm volatile (
- "mov r12, %3, lsl #2 \n"
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld1.32 {d0[0]}, [%0], r12 \n"
- MEMACCESS(0)
- "vld1.32 {d0[1]}, [%0], r12 \n"
- MEMACCESS(0)
- "vld1.32 {d1[0]}, [%0], r12 \n"
- MEMACCESS(0)
- "vld1.32 {d1[1]}, [%0], r12 \n"
- "subs %2, %2, #4 \n" // 4 pixels per loop.
- MEMACCESS(1)
- "vst1.8 {q0}, [%1]! \n"
- "bgt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(dst_width) // %2
- : "r"(src_stepx) // %3
- : "memory", "cc", "r12", "q0"
- );
-}
-
-// Reads 4 pixels at a time.
-// Alignment requirement: src_argb 4 byte aligned.
-void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
- int src_stepx,
- uint8* dst_argb, int dst_width) {
- asm volatile (
- "mov r12, %4, lsl #2 \n"
- "add %1, %1, %0 \n"
- ".p2align 2 \n"
- "1: \n"
- MEMACCESS(0)
- "vld1.8 {d0}, [%0], r12 \n" // Read 4 2x2 blocks -> 2x1
- MEMACCESS(1)
- "vld1.8 {d1}, [%1], r12 \n"
- MEMACCESS(0)
- "vld1.8 {d2}, [%0], r12 \n"
- MEMACCESS(1)
- "vld1.8 {d3}, [%1], r12 \n"
- MEMACCESS(0)
- "vld1.8 {d4}, [%0], r12 \n"
- MEMACCESS(1)
- "vld1.8 {d5}, [%1], r12 \n"
- MEMACCESS(0)
- "vld1.8 {d6}, [%0], r12 \n"
- MEMACCESS(1)
- "vld1.8 {d7}, [%1], r12 \n"
- "vaddl.u8 q0, d0, d1 \n"
- "vaddl.u8 q1, d2, d3 \n"
- "vaddl.u8 q2, d4, d5 \n"
- "vaddl.u8 q3, d6, d7 \n"
- "vswp.8 d1, d2 \n" // ab_cd -> ac_bd
- "vswp.8 d5, d6 \n" // ef_gh -> eg_fh
- "vadd.u16 q0, q0, q1 \n" // (a+b)_(c+d)
- "vadd.u16 q2, q2, q3 \n" // (e+f)_(g+h)
- "vrshrn.u16 d0, q0, #2 \n" // first 2 pixels.
- "vrshrn.u16 d1, q2, #2 \n" // next 2 pixels.
- "subs %3, %3, #4 \n" // 4 pixels per loop.
- MEMACCESS(2)
- "vst1.8 {q0}, [%2]! \n"
- "bgt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(src_stride), // %1
- "+r"(dst_argb), // %2
- "+r"(dst_width) // %3
- : "r"(src_stepx) // %4
- : "memory", "cc", "r12", "q0", "q1", "q2", "q3"
- );
-}
-
-// TODO(Yang Zhang): Investigate less load instructions for
-// the x/dx stepping
-#define LOAD1_DATA32_LANE(dn, n) \
- "lsr %5, %3, #16 \n" \
- "add %6, %1, %5, lsl #2 \n" \
- "add %3, %3, %4 \n" \
- MEMACCESS(6) \
- "vld1.32 {"#dn"["#n"]}, [%6] \n"
-
-void ScaleARGBCols_NEON(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx) {
- int tmp = 0;
- const uint8* src_tmp = src_argb;
- asm volatile (
- ".p2align 2 \n"
- "1: \n"
- LOAD1_DATA32_LANE(d0, 0)
- LOAD1_DATA32_LANE(d0, 1)
- LOAD1_DATA32_LANE(d1, 0)
- LOAD1_DATA32_LANE(d1, 1)
- LOAD1_DATA32_LANE(d2, 0)
- LOAD1_DATA32_LANE(d2, 1)
- LOAD1_DATA32_LANE(d3, 0)
- LOAD1_DATA32_LANE(d3, 1)
-
- MEMACCESS(0)
- "vst1.32 {q0, q1}, [%0]! \n" // store pixels
- "subs %2, %2, #8 \n" // 8 processed per loop
- "bgt 1b \n"
- : "+r"(dst_argb), // %0
- "+r"(src_argb), // %1
- "+r"(dst_width), // %2
- "+r"(x), // %3
- "+r"(dx), // %4
- "+r"(tmp), // %5
- "+r"(src_tmp) // %6
- :
- : "memory", "cc", "q0", "q1"
- );
-}
-
-#undef LOAD1_DATA32_LANE
-
-// TODO(Yang Zhang): Investigate less load instructions for
-// the x/dx stepping
-#define LOAD2_DATA32_LANE(dn1, dn2, n) \
- "lsr %5, %3, #16 \n" \
- "add %6, %1, %5, lsl #2 \n" \
- "add %3, %3, %4 \n" \
- MEMACCESS(6) \
- "vld2.32 {"#dn1"["#n"], "#dn2"["#n"]}, [%6] \n"
-
-void ScaleARGBFilterCols_NEON(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx) {
- int dx_offset[4] = {0, 1, 2, 3};
- int* tmp = dx_offset;
- const uint8* src_tmp = src_argb;
- asm volatile (
- ".p2align 2 \n"
- "vdup.32 q0, %3 \n" // x
- "vdup.32 q1, %4 \n" // dx
- "vld1.32 {q2}, [%5] \n" // 0 1 2 3
- "vshl.i32 q9, q1, #2 \n" // 4 * dx
- "vmul.s32 q1, q1, q2 \n"
- "vmov.i8 q3, #0x7f \n" // 0x7F
- "vmov.i16 q15, #0x7f \n" // 0x7F
- // x , x + 1 * dx, x + 2 * dx, x + 3 * dx
- "vadd.s32 q8, q1, q0 \n"
- "1: \n"
- // d0, d1: a
- // d2, d3: b
- LOAD2_DATA32_LANE(d0, d2, 0)
- LOAD2_DATA32_LANE(d0, d2, 1)
- LOAD2_DATA32_LANE(d1, d3, 0)
- LOAD2_DATA32_LANE(d1, d3, 1)
- "vshrn.i32 d22, q8, #9 \n"
- "vand.16 d22, d22, d30 \n"
- "vdup.8 d24, d22[0] \n"
- "vdup.8 d25, d22[2] \n"
- "vdup.8 d26, d22[4] \n"
- "vdup.8 d27, d22[6] \n"
- "vext.8 d4, d24, d25, #4 \n"
- "vext.8 d5, d26, d27, #4 \n" // f
- "veor.8 q10, q2, q3 \n" // 0x7f ^ f
- "vmull.u8 q11, d0, d20 \n"
- "vmull.u8 q12, d1, d21 \n"
- "vmull.u8 q13, d2, d4 \n"
- "vmull.u8 q14, d3, d5 \n"
- "vadd.i16 q11, q11, q13 \n"
- "vadd.i16 q12, q12, q14 \n"
- "vshrn.i16 d0, q11, #7 \n"
- "vshrn.i16 d1, q12, #7 \n"
-
- MEMACCESS(0)
- "vst1.32 {d0, d1}, [%0]! \n" // store pixels
- "vadd.s32 q8, q8, q9 \n"
- "subs %2, %2, #4 \n" // 4 processed per loop
- "bgt 1b \n"
- : "+r"(dst_argb), // %0
- "+r"(src_argb), // %1
- "+r"(dst_width), // %2
- "+r"(x), // %3
- "+r"(dx), // %4
- "+r"(tmp), // %5
- "+r"(src_tmp) // %6
- :
- : "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9",
- "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-#undef LOAD2_DATA32_LANE
-
-#endif // defined(__ARM_NEON__) && !defined(__aarch64__)
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/third_party/aom/third_party/libyuv/source/scale_neon64.cc b/third_party/aom/third_party/libyuv/source/scale_neon64.cc
deleted file mode 100644
index 1d5519357..000000000
--- a/third_party/aom/third_party/libyuv/source/scale_neon64.cc
+++ /dev/null
@@ -1,1042 +0,0 @@
-/*
- * Copyright 2014 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/scale.h"
-#include "libyuv/row.h"
-#include "libyuv/scale_row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// This module is for GCC Neon armv8 64 bit.
-#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
-
-// Read 32x1 throw away even pixels, and write 16x1.
-void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width) {
- asm volatile (
- "1: \n"
- // load even pixels into v0, odd into v1
- MEMACCESS(0)
- "ld2 {v0.16b,v1.16b}, [%0], #32 \n"
- "subs %w2, %w2, #16 \n" // 16 processed per loop
- MEMACCESS(1)
- "st1 {v1.16b}, [%1], #16 \n" // store odd pixels
- "b.gt 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst), // %1
- "+r"(dst_width) // %2
- :
- : "v0", "v1" // Clobber List
- );
-}
-
-// Read 32x1 average down and write 16x1.
-void ScaleRowDown2Linear_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width) {
- asm volatile (
- "1: \n"
- MEMACCESS(0)
- "ld1 {v0.16b,v1.16b}, [%0], #32 \n" // load pixels and post inc
- "subs %w2, %w2, #16 \n" // 16 processed per loop
- "uaddlp v0.8h, v0.16b \n" // add adjacent
- "uaddlp v1.8h, v1.16b \n"
- "rshrn v0.8b, v0.8h, #1 \n" // downshift, round and pack
- "rshrn2 v0.16b, v1.8h, #1 \n"
- MEMACCESS(1)
- "st1 {v0.16b}, [%1], #16 \n"
- "b.gt 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst), // %1
- "+r"(dst_width) // %2
- :
- : "v0", "v1" // Clobber List
- );
-}
-
-// Read 32x2 average down and write 16x1.
-void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width) {
- asm volatile (
- // change the stride to row 2 pointer
- "add %1, %1, %0 \n"
- "1: \n"
- MEMACCESS(0)
- "ld1 {v0.16b,v1.16b}, [%0], #32 \n" // load row 1 and post inc
- MEMACCESS(1)
- "ld1 {v2.16b, v3.16b}, [%1], #32 \n" // load row 2 and post inc
- "subs %w3, %w3, #16 \n" // 16 processed per loop
- "uaddlp v0.8h, v0.16b \n" // row 1 add adjacent
- "uaddlp v1.8h, v1.16b \n"
- "uadalp v0.8h, v2.16b \n" // row 2 add adjacent + row1
- "uadalp v1.8h, v3.16b \n"
- "rshrn v0.8b, v0.8h, #2 \n" // downshift, round and pack
- "rshrn2 v0.16b, v1.8h, #2 \n"
- MEMACCESS(2)
- "st1 {v0.16b}, [%2], #16 \n"
- "b.gt 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(src_stride), // %1
- "+r"(dst), // %2
- "+r"(dst_width) // %3
- :
- : "v0", "v1", "v2", "v3" // Clobber List
- );
-}
-
-void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- asm volatile (
- "1: \n"
- MEMACCESS(0)
- "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // src line 0
- "subs %w2, %w2, #8 \n" // 8 processed per loop
- MEMACCESS(1)
- "st1 {v2.8b}, [%1], #8 \n"
- "b.gt 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- :
- : "v0", "v1", "v2", "v3", "memory", "cc"
- );
-}
-
-void ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- const uint8* src_ptr1 = src_ptr + src_stride;
- const uint8* src_ptr2 = src_ptr + src_stride * 2;
- const uint8* src_ptr3 = src_ptr + src_stride * 3;
-asm volatile (
- "1: \n"
- MEMACCESS(0)
- "ld1 {v0.16b}, [%0], #16 \n" // load up 16x4
- MEMACCESS(3)
- "ld1 {v1.16b}, [%2], #16 \n"
- MEMACCESS(4)
- "ld1 {v2.16b}, [%3], #16 \n"
- MEMACCESS(5)
- "ld1 {v3.16b}, [%4], #16 \n"
- "subs %w5, %w5, #4 \n"
- "uaddlp v0.8h, v0.16b \n"
- "uadalp v0.8h, v1.16b \n"
- "uadalp v0.8h, v2.16b \n"
- "uadalp v0.8h, v3.16b \n"
- "addp v0.8h, v0.8h, v0.8h \n"
- "rshrn v0.8b, v0.8h, #4 \n" // divide by 16 w/rounding
- MEMACCESS(1)
- "st1 {v0.s}[0], [%1], #4 \n"
- "b.gt 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(src_ptr1), // %2
- "+r"(src_ptr2), // %3
- "+r"(src_ptr3), // %4
- "+r"(dst_width) // %5
- :
- : "v0", "v1", "v2", "v3", "memory", "cc"
- );
-}
-
-// Down scale from 4 to 3 pixels. Use the neon multilane read/write
-// to load up the every 4th pixel into a 4 different registers.
-// Point samples 32 pixels to 24 pixels.
-void ScaleRowDown34_NEON(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- asm volatile (
- "1: \n"
- MEMACCESS(0)
- "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // src line 0
- "subs %w2, %w2, #24 \n"
- "orr v2.16b, v3.16b, v3.16b \n" // order v0, v1, v2
- MEMACCESS(1)
- "st3 {v0.8b,v1.8b,v2.8b}, [%1], #24 \n"
- "b.gt 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- :
- : "v0", "v1", "v2", "v3", "memory", "cc"
- );
-}
-
-void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- asm volatile (
- "movi v20.8b, #3 \n"
- "add %3, %3, %0 \n"
- "1: \n"
- MEMACCESS(0)
- "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // src line 0
- MEMACCESS(3)
- "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%3], #32 \n" // src line 1
- "subs %w2, %w2, #24 \n"
-
- // filter src line 0 with src line 1
- // expand chars to shorts to allow for room
- // when adding lines together
- "ushll v16.8h, v4.8b, #0 \n"
- "ushll v17.8h, v5.8b, #0 \n"
- "ushll v18.8h, v6.8b, #0 \n"
- "ushll v19.8h, v7.8b, #0 \n"
-
- // 3 * line_0 + line_1
- "umlal v16.8h, v0.8b, v20.8b \n"
- "umlal v17.8h, v1.8b, v20.8b \n"
- "umlal v18.8h, v2.8b, v20.8b \n"
- "umlal v19.8h, v3.8b, v20.8b \n"
-
- // (3 * line_0 + line_1) >> 2
- "uqrshrn v0.8b, v16.8h, #2 \n"
- "uqrshrn v1.8b, v17.8h, #2 \n"
- "uqrshrn v2.8b, v18.8h, #2 \n"
- "uqrshrn v3.8b, v19.8h, #2 \n"
-
- // a0 = (src[0] * 3 + s[1] * 1) >> 2
- "ushll v16.8h, v1.8b, #0 \n"
- "umlal v16.8h, v0.8b, v20.8b \n"
- "uqrshrn v0.8b, v16.8h, #2 \n"
-
- // a1 = (src[1] * 1 + s[2] * 1) >> 1
- "urhadd v1.8b, v1.8b, v2.8b \n"
-
- // a2 = (src[2] * 1 + s[3] * 3) >> 2
- "ushll v16.8h, v2.8b, #0 \n"
- "umlal v16.8h, v3.8b, v20.8b \n"
- "uqrshrn v2.8b, v16.8h, #2 \n"
-
- MEMACCESS(1)
- "st3 {v0.8b,v1.8b,v2.8b}, [%1], #24 \n"
-
- "b.gt 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width), // %2
- "+r"(src_stride) // %3
- :
- : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19",
- "v20", "memory", "cc"
- );
-}
-
-void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- asm volatile (
- "movi v20.8b, #3 \n"
- "add %3, %3, %0 \n"
- "1: \n"
- MEMACCESS(0)
- "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // src line 0
- MEMACCESS(3)
- "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%3], #32 \n" // src line 1
- "subs %w2, %w2, #24 \n"
- // average src line 0 with src line 1
- "urhadd v0.8b, v0.8b, v4.8b \n"
- "urhadd v1.8b, v1.8b, v5.8b \n"
- "urhadd v2.8b, v2.8b, v6.8b \n"
- "urhadd v3.8b, v3.8b, v7.8b \n"
-
- // a0 = (src[0] * 3 + s[1] * 1) >> 2
- "ushll v4.8h, v1.8b, #0 \n"
- "umlal v4.8h, v0.8b, v20.8b \n"
- "uqrshrn v0.8b, v4.8h, #2 \n"
-
- // a1 = (src[1] * 1 + s[2] * 1) >> 1
- "urhadd v1.8b, v1.8b, v2.8b \n"
-
- // a2 = (src[2] * 1 + s[3] * 3) >> 2
- "ushll v4.8h, v2.8b, #0 \n"
- "umlal v4.8h, v3.8b, v20.8b \n"
- "uqrshrn v2.8b, v4.8h, #2 \n"
-
- MEMACCESS(1)
- "st3 {v0.8b,v1.8b,v2.8b}, [%1], #24 \n"
- "b.gt 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width), // %2
- "+r"(src_stride) // %3
- :
- : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "memory", "cc"
- );
-}
-
-static uvec8 kShuf38 =
- { 0, 3, 6, 8, 11, 14, 16, 19, 22, 24, 27, 30, 0, 0, 0, 0 };
-static uvec8 kShuf38_2 =
- { 0, 16, 32, 2, 18, 33, 4, 20, 34, 6, 22, 35, 0, 0, 0, 0 };
-static vec16 kMult38_Div6 =
- { 65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12,
- 65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12 };
-static vec16 kMult38_Div9 =
- { 65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18,
- 65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18 };
-
-// 32 -> 12
-void ScaleRowDown38_NEON(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- asm volatile (
- MEMACCESS(3)
- "ld1 {v3.16b}, [%3] \n"
- "1: \n"
- MEMACCESS(0)
- "ld1 {v0.16b,v1.16b}, [%0], #32 \n"
- "subs %w2, %w2, #12 \n"
- "tbl v2.16b, {v0.16b,v1.16b}, v3.16b \n"
- MEMACCESS(1)
- "st1 {v2.8b}, [%1], #8 \n"
- MEMACCESS(1)
- "st1 {v2.s}[2], [%1], #4 \n"
- "b.gt 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(dst_width) // %2
- : "r"(&kShuf38) // %3
- : "v0", "v1", "v2", "v3", "memory", "cc"
- );
-}
-
-// 32x3 -> 12x1
-void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- const uint8* src_ptr1 = src_ptr + src_stride * 2;
- ptrdiff_t tmp_src_stride = src_stride;
-
- asm volatile (
- MEMACCESS(5)
- "ld1 {v29.8h}, [%5] \n"
- MEMACCESS(6)
- "ld1 {v30.16b}, [%6] \n"
- MEMACCESS(7)
- "ld1 {v31.8h}, [%7] \n"
- "add %2, %2, %0 \n"
- "1: \n"
-
- // 00 40 01 41 02 42 03 43
- // 10 50 11 51 12 52 13 53
- // 20 60 21 61 22 62 23 63
- // 30 70 31 71 32 72 33 73
- MEMACCESS(0)
- "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n"
- MEMACCESS(3)
- "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%2], #32 \n"
- MEMACCESS(4)
- "ld4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%3], #32 \n"
- "subs %w4, %w4, #12 \n"
-
- // Shuffle the input data around to get align the data
- // so adjacent data can be added. 0,1 - 2,3 - 4,5 - 6,7
- // 00 10 01 11 02 12 03 13
- // 40 50 41 51 42 52 43 53
- "trn1 v20.8b, v0.8b, v1.8b \n"
- "trn2 v21.8b, v0.8b, v1.8b \n"
- "trn1 v22.8b, v4.8b, v5.8b \n"
- "trn2 v23.8b, v4.8b, v5.8b \n"
- "trn1 v24.8b, v16.8b, v17.8b \n"
- "trn2 v25.8b, v16.8b, v17.8b \n"
-
- // 20 30 21 31 22 32 23 33
- // 60 70 61 71 62 72 63 73
- "trn1 v0.8b, v2.8b, v3.8b \n"
- "trn2 v1.8b, v2.8b, v3.8b \n"
- "trn1 v4.8b, v6.8b, v7.8b \n"
- "trn2 v5.8b, v6.8b, v7.8b \n"
- "trn1 v16.8b, v18.8b, v19.8b \n"
- "trn2 v17.8b, v18.8b, v19.8b \n"
-
- // 00+10 01+11 02+12 03+13
- // 40+50 41+51 42+52 43+53
- "uaddlp v20.4h, v20.8b \n"
- "uaddlp v21.4h, v21.8b \n"
- "uaddlp v22.4h, v22.8b \n"
- "uaddlp v23.4h, v23.8b \n"
- "uaddlp v24.4h, v24.8b \n"
- "uaddlp v25.4h, v25.8b \n"
-
- // 60+70 61+71 62+72 63+73
- "uaddlp v1.4h, v1.8b \n"
- "uaddlp v5.4h, v5.8b \n"
- "uaddlp v17.4h, v17.8b \n"
-
- // combine source lines
- "add v20.4h, v20.4h, v22.4h \n"
- "add v21.4h, v21.4h, v23.4h \n"
- "add v20.4h, v20.4h, v24.4h \n"
- "add v21.4h, v21.4h, v25.4h \n"
- "add v2.4h, v1.4h, v5.4h \n"
- "add v2.4h, v2.4h, v17.4h \n"
-
- // dst_ptr[3] = (s[6 + st * 0] + s[7 + st * 0]
- // + s[6 + st * 1] + s[7 + st * 1]
- // + s[6 + st * 2] + s[7 + st * 2]) / 6
- "sqrdmulh v2.8h, v2.8h, v29.8h \n"
- "xtn v2.8b, v2.8h \n"
-
- // Shuffle 2,3 reg around so that 2 can be added to the
- // 0,1 reg and 3 can be added to the 4,5 reg. This
- // requires expanding from u8 to u16 as the 0,1 and 4,5
- // registers are already expanded. Then do transposes
- // to get aligned.
- // xx 20 xx 30 xx 21 xx 31 xx 22 xx 32 xx 23 xx 33
- "ushll v16.8h, v16.8b, #0 \n"
- "uaddl v0.8h, v0.8b, v4.8b \n"
-
- // combine source lines
- "add v0.8h, v0.8h, v16.8h \n"
-
- // xx 20 xx 21 xx 22 xx 23
- // xx 30 xx 31 xx 32 xx 33
- "trn1 v1.8h, v0.8h, v0.8h \n"
- "trn2 v4.8h, v0.8h, v0.8h \n"
- "xtn v0.4h, v1.4s \n"
- "xtn v4.4h, v4.4s \n"
-
- // 0+1+2, 3+4+5
- "add v20.8h, v20.8h, v0.8h \n"
- "add v21.8h, v21.8h, v4.8h \n"
-
- // Need to divide, but can't downshift as the the value
- // isn't a power of 2. So multiply by 65536 / n
- // and take the upper 16 bits.
- "sqrdmulh v0.8h, v20.8h, v31.8h \n"
- "sqrdmulh v1.8h, v21.8h, v31.8h \n"
-
- // Align for table lookup, vtbl requires registers to
- // be adjacent
- "tbl v3.16b, {v0.16b, v1.16b, v2.16b}, v30.16b \n"
-
- MEMACCESS(1)
- "st1 {v3.8b}, [%1], #8 \n"
- MEMACCESS(1)
- "st1 {v3.s}[2], [%1], #4 \n"
- "b.gt 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(tmp_src_stride), // %2
- "+r"(src_ptr1), // %3
- "+r"(dst_width) // %4
- : "r"(&kMult38_Div6), // %5
- "r"(&kShuf38_2), // %6
- "r"(&kMult38_Div9) // %7
- : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
- "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v29",
- "v30", "v31", "memory", "cc"
- );
-}
-
-// 32x2 -> 12x1
-void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- // TODO(fbarchard): use src_stride directly for clang 3.5+.
- ptrdiff_t tmp_src_stride = src_stride;
- asm volatile (
- MEMACCESS(4)
- "ld1 {v30.8h}, [%4] \n"
- MEMACCESS(5)
- "ld1 {v31.16b}, [%5] \n"
- "add %2, %2, %0 \n"
- "1: \n"
-
- // 00 40 01 41 02 42 03 43
- // 10 50 11 51 12 52 13 53
- // 20 60 21 61 22 62 23 63
- // 30 70 31 71 32 72 33 73
- MEMACCESS(0)
- "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n"
- MEMACCESS(3)
- "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%2], #32 \n"
- "subs %w3, %w3, #12 \n"
-
- // Shuffle the input data around to get align the data
- // so adjacent data can be added. 0,1 - 2,3 - 4,5 - 6,7
- // 00 10 01 11 02 12 03 13
- // 40 50 41 51 42 52 43 53
- "trn1 v16.8b, v0.8b, v1.8b \n"
- "trn2 v17.8b, v0.8b, v1.8b \n"
- "trn1 v18.8b, v4.8b, v5.8b \n"
- "trn2 v19.8b, v4.8b, v5.8b \n"
-
- // 20 30 21 31 22 32 23 33
- // 60 70 61 71 62 72 63 73
- "trn1 v0.8b, v2.8b, v3.8b \n"
- "trn2 v1.8b, v2.8b, v3.8b \n"
- "trn1 v4.8b, v6.8b, v7.8b \n"
- "trn2 v5.8b, v6.8b, v7.8b \n"
-
- // 00+10 01+11 02+12 03+13
- // 40+50 41+51 42+52 43+53
- "uaddlp v16.4h, v16.8b \n"
- "uaddlp v17.4h, v17.8b \n"
- "uaddlp v18.4h, v18.8b \n"
- "uaddlp v19.4h, v19.8b \n"
-
- // 60+70 61+71 62+72 63+73
- "uaddlp v1.4h, v1.8b \n"
- "uaddlp v5.4h, v5.8b \n"
-
- // combine source lines
- "add v16.4h, v16.4h, v18.4h \n"
- "add v17.4h, v17.4h, v19.4h \n"
- "add v2.4h, v1.4h, v5.4h \n"
-
- // dst_ptr[3] = (s[6] + s[7] + s[6+st] + s[7+st]) / 4
- "uqrshrn v2.8b, v2.8h, #2 \n"
-
- // Shuffle 2,3 reg around so that 2 can be added to the
- // 0,1 reg and 3 can be added to the 4,5 reg. This
- // requires expanding from u8 to u16 as the 0,1 and 4,5
- // registers are already expanded. Then do transposes
- // to get aligned.
- // xx 20 xx 30 xx 21 xx 31 xx 22 xx 32 xx 23 xx 33
-
- // combine source lines
- "uaddl v0.8h, v0.8b, v4.8b \n"
-
- // xx 20 xx 21 xx 22 xx 23
- // xx 30 xx 31 xx 32 xx 33
- "trn1 v1.8h, v0.8h, v0.8h \n"
- "trn2 v4.8h, v0.8h, v0.8h \n"
- "xtn v0.4h, v1.4s \n"
- "xtn v4.4h, v4.4s \n"
-
- // 0+1+2, 3+4+5
- "add v16.8h, v16.8h, v0.8h \n"
- "add v17.8h, v17.8h, v4.8h \n"
-
- // Need to divide, but can't downshift as the the value
- // isn't a power of 2. So multiply by 65536 / n
- // and take the upper 16 bits.
- "sqrdmulh v0.8h, v16.8h, v30.8h \n"
- "sqrdmulh v1.8h, v17.8h, v30.8h \n"
-
- // Align for table lookup, vtbl requires registers to
- // be adjacent
-
- "tbl v3.16b, {v0.16b, v1.16b, v2.16b}, v31.16b \n"
-
- MEMACCESS(1)
- "st1 {v3.8b}, [%1], #8 \n"
- MEMACCESS(1)
- "st1 {v3.s}[2], [%1], #4 \n"
- "b.gt 1b \n"
- : "+r"(src_ptr), // %0
- "+r"(dst_ptr), // %1
- "+r"(tmp_src_stride), // %2
- "+r"(dst_width) // %3
- : "r"(&kMult38_Div6), // %4
- "r"(&kShuf38_2) // %5
- : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17",
- "v18", "v19", "v30", "v31", "memory", "cc"
- );
-}
-
-void ScaleAddRows_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint16* dst_ptr, int src_width, int src_height) {
- const uint8* src_tmp = NULL;
- asm volatile (
- "1: \n"
- "mov %0, %1 \n"
- "mov w12, %w5 \n"
- "eor v2.16b, v2.16b, v2.16b \n"
- "eor v3.16b, v3.16b, v3.16b \n"
- "2: \n"
- // load 16 pixels into q0
- MEMACCESS(0)
- "ld1 {v0.16b}, [%0], %3 \n"
- "uaddw2 v3.8h, v3.8h, v0.16b \n"
- "uaddw v2.8h, v2.8h, v0.8b \n"
- "subs w12, w12, #1 \n"
- "b.gt 2b \n"
- MEMACCESS(2)
- "st1 {v2.8h, v3.8h}, [%2], #32 \n" // store pixels
- "add %1, %1, #16 \n"
- "subs %w4, %w4, #16 \n" // 16 processed per loop
- "b.gt 1b \n"
- : "+r"(src_tmp), // %0
- "+r"(src_ptr), // %1
- "+r"(dst_ptr), // %2
- "+r"(src_stride), // %3
- "+r"(src_width), // %4
- "+r"(src_height) // %5
- :
- : "memory", "cc", "w12", "v0", "v1", "v2", "v3" // Clobber List
- );
-}
-
-// TODO(Yang Zhang): Investigate less load instructions for
-// the x/dx stepping
-#define LOAD2_DATA8_LANE(n) \
- "lsr %5, %3, #16 \n" \
- "add %6, %1, %5 \n" \
- "add %3, %3, %4 \n" \
- MEMACCESS(6) \
- "ld2 {v4.b, v5.b}["#n"], [%6] \n"
-
-void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr,
- int dst_width, int x, int dx) {
- int dx_offset[4] = {0, 1, 2, 3};
- int* tmp = dx_offset;
- const uint8* src_tmp = src_ptr;
- int64 dst_width64 = (int64) dst_width; // Work around ios 64 bit warning.
- int64 x64 = (int64) x;
- int64 dx64 = (int64) dx;
- asm volatile (
- "dup v0.4s, %w3 \n" // x
- "dup v1.4s, %w4 \n" // dx
- "ld1 {v2.4s}, [%5] \n" // 0 1 2 3
- "shl v3.4s, v1.4s, #2 \n" // 4 * dx
- "mul v1.4s, v1.4s, v2.4s \n"
- // x , x + 1 * dx, x + 2 * dx, x + 3 * dx
- "add v1.4s, v1.4s, v0.4s \n"
- // x + 4 * dx, x + 5 * dx, x + 6 * dx, x + 7 * dx
- "add v2.4s, v1.4s, v3.4s \n"
- "shl v0.4s, v3.4s, #1 \n" // 8 * dx
- "1: \n"
- LOAD2_DATA8_LANE(0)
- LOAD2_DATA8_LANE(1)
- LOAD2_DATA8_LANE(2)
- LOAD2_DATA8_LANE(3)
- LOAD2_DATA8_LANE(4)
- LOAD2_DATA8_LANE(5)
- LOAD2_DATA8_LANE(6)
- LOAD2_DATA8_LANE(7)
- "mov v6.16b, v1.16b \n"
- "mov v7.16b, v2.16b \n"
- "uzp1 v6.8h, v6.8h, v7.8h \n"
- "ushll v4.8h, v4.8b, #0 \n"
- "ushll v5.8h, v5.8b, #0 \n"
- "ssubl v16.4s, v5.4h, v4.4h \n"
- "ssubl2 v17.4s, v5.8h, v4.8h \n"
- "ushll v7.4s, v6.4h, #0 \n"
- "ushll2 v6.4s, v6.8h, #0 \n"
- "mul v16.4s, v16.4s, v7.4s \n"
- "mul v17.4s, v17.4s, v6.4s \n"
- "shrn v6.4h, v16.4s, #16 \n"
- "shrn2 v6.8h, v17.4s, #16 \n"
- "add v4.8h, v4.8h, v6.8h \n"
- "xtn v4.8b, v4.8h \n"
-
- MEMACCESS(0)
- "st1 {v4.8b}, [%0], #8 \n" // store pixels
- "add v1.4s, v1.4s, v0.4s \n"
- "add v2.4s, v2.4s, v0.4s \n"
- "subs %w2, %w2, #8 \n" // 8 processed per loop
- "b.gt 1b \n"
- : "+r"(dst_ptr), // %0
- "+r"(src_ptr), // %1
- "+r"(dst_width64), // %2
- "+r"(x64), // %3
- "+r"(dx64), // %4
- "+r"(tmp), // %5
- "+r"(src_tmp) // %6
- :
- : "memory", "cc", "v0", "v1", "v2", "v3",
- "v4", "v5", "v6", "v7", "v16", "v17"
- );
-}
-
-#undef LOAD2_DATA8_LANE
-
-// 16x2 -> 16x1
-void ScaleFilterRows_NEON(uint8* dst_ptr,
- const uint8* src_ptr, ptrdiff_t src_stride,
- int dst_width, int source_y_fraction) {
- int y_fraction = 256 - source_y_fraction;
- asm volatile (
- "cmp %w4, #0 \n"
- "b.eq 100f \n"
- "add %2, %2, %1 \n"
- "cmp %w4, #64 \n"
- "b.eq 75f \n"
- "cmp %w4, #128 \n"
- "b.eq 50f \n"
- "cmp %w4, #192 \n"
- "b.eq 25f \n"
-
- "dup v5.8b, %w4 \n"
- "dup v4.8b, %w5 \n"
- // General purpose row blend.
- "1: \n"
- MEMACCESS(1)
- "ld1 {v0.16b}, [%1], #16 \n"
- MEMACCESS(2)
- "ld1 {v1.16b}, [%2], #16 \n"
- "subs %w3, %w3, #16 \n"
- "umull v6.8h, v0.8b, v4.8b \n"
- "umull2 v7.8h, v0.16b, v4.16b \n"
- "umlal v6.8h, v1.8b, v5.8b \n"
- "umlal2 v7.8h, v1.16b, v5.16b \n"
- "rshrn v0.8b, v6.8h, #8 \n"
- "rshrn2 v0.16b, v7.8h, #8 \n"
- MEMACCESS(0)
- "st1 {v0.16b}, [%0], #16 \n"
- "b.gt 1b \n"
- "b 99f \n"
-
- // Blend 25 / 75.
- "25: \n"
- MEMACCESS(1)
- "ld1 {v0.16b}, [%1], #16 \n"
- MEMACCESS(2)
- "ld1 {v1.16b}, [%2], #16 \n"
- "subs %w3, %w3, #16 \n"
- "urhadd v0.16b, v0.16b, v1.16b \n"
- "urhadd v0.16b, v0.16b, v1.16b \n"
- MEMACCESS(0)
- "st1 {v0.16b}, [%0], #16 \n"
- "b.gt 25b \n"
- "b 99f \n"
-
- // Blend 50 / 50.
- "50: \n"
- MEMACCESS(1)
- "ld1 {v0.16b}, [%1], #16 \n"
- MEMACCESS(2)
- "ld1 {v1.16b}, [%2], #16 \n"
- "subs %w3, %w3, #16 \n"
- "urhadd v0.16b, v0.16b, v1.16b \n"
- MEMACCESS(0)
- "st1 {v0.16b}, [%0], #16 \n"
- "b.gt 50b \n"
- "b 99f \n"
-
- // Blend 75 / 25.
- "75: \n"
- MEMACCESS(1)
- "ld1 {v1.16b}, [%1], #16 \n"
- MEMACCESS(2)
- "ld1 {v0.16b}, [%2], #16 \n"
- "subs %w3, %w3, #16 \n"
- "urhadd v0.16b, v0.16b, v1.16b \n"
- "urhadd v0.16b, v0.16b, v1.16b \n"
- MEMACCESS(0)
- "st1 {v0.16b}, [%0], #16 \n"
- "b.gt 75b \n"
- "b 99f \n"
-
- // Blend 100 / 0 - Copy row unchanged.
- "100: \n"
- MEMACCESS(1)
- "ld1 {v0.16b}, [%1], #16 \n"
- "subs %w3, %w3, #16 \n"
- MEMACCESS(0)
- "st1 {v0.16b}, [%0], #16 \n"
- "b.gt 100b \n"
-
- "99: \n"
- MEMACCESS(0)
- "st1 {v0.b}[15], [%0] \n"
- : "+r"(dst_ptr), // %0
- "+r"(src_ptr), // %1
- "+r"(src_stride), // %2
- "+r"(dst_width), // %3
- "+r"(source_y_fraction),// %4
- "+r"(y_fraction) // %5
- :
- : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "memory", "cc"
- );
-}
-
-void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width) {
- asm volatile (
- "1: \n"
- // load even pixels into q0, odd into q1
- MEMACCESS (0)
- "ld2 {v0.4s, v1.4s}, [%0], #32 \n"
- MEMACCESS (0)
- "ld2 {v2.4s, v3.4s}, [%0], #32 \n"
- "subs %w2, %w2, #8 \n" // 8 processed per loop
- MEMACCESS (1)
- "st1 {v1.16b}, [%1], #16 \n" // store odd pixels
- MEMACCESS (1)
- "st1 {v3.16b}, [%1], #16 \n"
- "b.gt 1b \n"
- : "+r" (src_ptr), // %0
- "+r" (dst), // %1
- "+r" (dst_width) // %2
- :
- : "memory", "cc", "v0", "v1", "v2", "v3" // Clobber List
- );
-}
-
-void ScaleARGBRowDown2Linear_NEON(const uint8* src_argb, ptrdiff_t src_stride,
- uint8* dst_argb, int dst_width) {
- asm volatile (
- "1: \n"
- MEMACCESS (0)
- // load 8 ARGB pixels.
- "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n"
- "subs %w2, %w2, #8 \n" // 8 processed per loop.
- "uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts.
- "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts.
- "uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts.
- "uaddlp v3.8h, v3.16b \n" // A 16 bytes -> 8 shorts.
- "rshrn v0.8b, v0.8h, #1 \n" // downshift, round and pack
- "rshrn v1.8b, v1.8h, #1 \n"
- "rshrn v2.8b, v2.8h, #1 \n"
- "rshrn v3.8b, v3.8h, #1 \n"
- MEMACCESS (1)
- "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n"
- "b.gt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(dst_width) // %2
- :
- : "memory", "cc", "v0", "v1", "v2", "v3" // Clobber List
- );
-}
-
-void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst, int dst_width) {
- asm volatile (
- // change the stride to row 2 pointer
- "add %1, %1, %0 \n"
- "1: \n"
- MEMACCESS (0)
- "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 8 ARGB pixels.
- "subs %w3, %w3, #8 \n" // 8 processed per loop.
- "uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts.
- "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts.
- "uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts.
- "uaddlp v3.8h, v3.16b \n" // A 16 bytes -> 8 shorts.
- MEMACCESS (1)
- "ld4 {v16.16b,v17.16b,v18.16b,v19.16b}, [%1], #64 \n" // load 8 more ARGB pixels.
- "uadalp v0.8h, v16.16b \n" // B 16 bytes -> 8 shorts.
- "uadalp v1.8h, v17.16b \n" // G 16 bytes -> 8 shorts.
- "uadalp v2.8h, v18.16b \n" // R 16 bytes -> 8 shorts.
- "uadalp v3.8h, v19.16b \n" // A 16 bytes -> 8 shorts.
- "rshrn v0.8b, v0.8h, #2 \n" // downshift, round and pack
- "rshrn v1.8b, v1.8h, #2 \n"
- "rshrn v2.8b, v2.8h, #2 \n"
- "rshrn v3.8b, v3.8h, #2 \n"
- MEMACCESS (2)
- "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n"
- "b.gt 1b \n"
- : "+r" (src_ptr), // %0
- "+r" (src_stride), // %1
- "+r" (dst), // %2
- "+r" (dst_width) // %3
- :
- : "memory", "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19"
- );
-}
-
-// Reads 4 pixels at a time.
-// Alignment requirement: src_argb 4 byte aligned.
-void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride,
- int src_stepx, uint8* dst_argb, int dst_width) {
- asm volatile (
- "1: \n"
- MEMACCESS(0)
- "ld1 {v0.s}[0], [%0], %3 \n"
- MEMACCESS(0)
- "ld1 {v0.s}[1], [%0], %3 \n"
- MEMACCESS(0)
- "ld1 {v0.s}[2], [%0], %3 \n"
- MEMACCESS(0)
- "ld1 {v0.s}[3], [%0], %3 \n"
- "subs %w2, %w2, #4 \n" // 4 pixels per loop.
- MEMACCESS(1)
- "st1 {v0.16b}, [%1], #16 \n"
- "b.gt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(dst_width) // %2
- : "r"((int64)(src_stepx * 4)) // %3
- : "memory", "cc", "v0"
- );
-}
-
-// Reads 4 pixels at a time.
-// Alignment requirement: src_argb 4 byte aligned.
-// TODO(Yang Zhang): Might be worth another optimization pass in future.
-// It could be upgraded to 8 pixels at a time to start with.
-void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
- int src_stepx,
- uint8* dst_argb, int dst_width) {
- asm volatile (
- "add %1, %1, %0 \n"
- "1: \n"
- MEMACCESS(0)
- "ld1 {v0.8b}, [%0], %4 \n" // Read 4 2x2 blocks -> 2x1
- MEMACCESS(1)
- "ld1 {v1.8b}, [%1], %4 \n"
- MEMACCESS(0)
- "ld1 {v2.8b}, [%0], %4 \n"
- MEMACCESS(1)
- "ld1 {v3.8b}, [%1], %4 \n"
- MEMACCESS(0)
- "ld1 {v4.8b}, [%0], %4 \n"
- MEMACCESS(1)
- "ld1 {v5.8b}, [%1], %4 \n"
- MEMACCESS(0)
- "ld1 {v6.8b}, [%0], %4 \n"
- MEMACCESS(1)
- "ld1 {v7.8b}, [%1], %4 \n"
- "uaddl v0.8h, v0.8b, v1.8b \n"
- "uaddl v2.8h, v2.8b, v3.8b \n"
- "uaddl v4.8h, v4.8b, v5.8b \n"
- "uaddl v6.8h, v6.8b, v7.8b \n"
- "mov v16.d[1], v0.d[1] \n" // ab_cd -> ac_bd
- "mov v0.d[1], v2.d[0] \n"
- "mov v2.d[0], v16.d[1] \n"
- "mov v16.d[1], v4.d[1] \n" // ef_gh -> eg_fh
- "mov v4.d[1], v6.d[0] \n"
- "mov v6.d[0], v16.d[1] \n"
- "add v0.8h, v0.8h, v2.8h \n" // (a+b)_(c+d)
- "add v4.8h, v4.8h, v6.8h \n" // (e+f)_(g+h)
- "rshrn v0.8b, v0.8h, #2 \n" // first 2 pixels.
- "rshrn2 v0.16b, v4.8h, #2 \n" // next 2 pixels.
- "subs %w3, %w3, #4 \n" // 4 pixels per loop.
- MEMACCESS(2)
- "st1 {v0.16b}, [%2], #16 \n"
- "b.gt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(src_stride), // %1
- "+r"(dst_argb), // %2
- "+r"(dst_width) // %3
- : "r"((int64)(src_stepx * 4)) // %4
- : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16"
- );
-}
-
-// TODO(Yang Zhang): Investigate less load instructions for
-// the x/dx stepping
-#define LOAD1_DATA32_LANE(vn, n) \
- "lsr %5, %3, #16 \n" \
- "add %6, %1, %5, lsl #2 \n" \
- "add %3, %3, %4 \n" \
- MEMACCESS(6) \
- "ld1 {"#vn".s}["#n"], [%6] \n"
-
-void ScaleARGBCols_NEON(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx) {
- const uint8* src_tmp = src_argb;
- int64 dst_width64 = (int64) dst_width; // Work around ios 64 bit warning.
- int64 x64 = (int64) x;
- int64 dx64 = (int64) dx;
- int64 tmp64 = 0;
- asm volatile (
- "1: \n"
- LOAD1_DATA32_LANE(v0, 0)
- LOAD1_DATA32_LANE(v0, 1)
- LOAD1_DATA32_LANE(v0, 2)
- LOAD1_DATA32_LANE(v0, 3)
- LOAD1_DATA32_LANE(v1, 0)
- LOAD1_DATA32_LANE(v1, 1)
- LOAD1_DATA32_LANE(v1, 2)
- LOAD1_DATA32_LANE(v1, 3)
-
- MEMACCESS(0)
- "st1 {v0.4s, v1.4s}, [%0], #32 \n" // store pixels
- "subs %w2, %w2, #8 \n" // 8 processed per loop
- "b.gt 1b \n"
- : "+r"(dst_argb), // %0
- "+r"(src_argb), // %1
- "+r"(dst_width64), // %2
- "+r"(x64), // %3
- "+r"(dx64), // %4
- "+r"(tmp64), // %5
- "+r"(src_tmp) // %6
- :
- : "memory", "cc", "v0", "v1"
- );
-}
-
-#undef LOAD1_DATA32_LANE
-
-// TODO(Yang Zhang): Investigate less load instructions for
-// the x/dx stepping
-#define LOAD2_DATA32_LANE(vn1, vn2, n) \
- "lsr %5, %3, #16 \n" \
- "add %6, %1, %5, lsl #2 \n" \
- "add %3, %3, %4 \n" \
- MEMACCESS(6) \
- "ld2 {"#vn1".s, "#vn2".s}["#n"], [%6] \n"
-
-void ScaleARGBFilterCols_NEON(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx) {
- int dx_offset[4] = {0, 1, 2, 3};
- int* tmp = dx_offset;
- const uint8* src_tmp = src_argb;
- int64 dst_width64 = (int64) dst_width; // Work around ios 64 bit warning.
- int64 x64 = (int64) x;
- int64 dx64 = (int64) dx;
- asm volatile (
- "dup v0.4s, %w3 \n" // x
- "dup v1.4s, %w4 \n" // dx
- "ld1 {v2.4s}, [%5] \n" // 0 1 2 3
- "shl v6.4s, v1.4s, #2 \n" // 4 * dx
- "mul v1.4s, v1.4s, v2.4s \n"
- "movi v3.16b, #0x7f \n" // 0x7F
- "movi v4.8h, #0x7f \n" // 0x7F
- // x , x + 1 * dx, x + 2 * dx, x + 3 * dx
- "add v5.4s, v1.4s, v0.4s \n"
- "1: \n"
- // d0, d1: a
- // d2, d3: b
- LOAD2_DATA32_LANE(v0, v1, 0)
- LOAD2_DATA32_LANE(v0, v1, 1)
- LOAD2_DATA32_LANE(v0, v1, 2)
- LOAD2_DATA32_LANE(v0, v1, 3)
- "shrn v2.4h, v5.4s, #9 \n"
- "and v2.8b, v2.8b, v4.8b \n"
- "dup v16.8b, v2.b[0] \n"
- "dup v17.8b, v2.b[2] \n"
- "dup v18.8b, v2.b[4] \n"
- "dup v19.8b, v2.b[6] \n"
- "ext v2.8b, v16.8b, v17.8b, #4 \n"
- "ext v17.8b, v18.8b, v19.8b, #4 \n"
- "ins v2.d[1], v17.d[0] \n" // f
- "eor v7.16b, v2.16b, v3.16b \n" // 0x7f ^ f
- "umull v16.8h, v0.8b, v7.8b \n"
- "umull2 v17.8h, v0.16b, v7.16b \n"
- "umull v18.8h, v1.8b, v2.8b \n"
- "umull2 v19.8h, v1.16b, v2.16b \n"
- "add v16.8h, v16.8h, v18.8h \n"
- "add v17.8h, v17.8h, v19.8h \n"
- "shrn v0.8b, v16.8h, #7 \n"
- "shrn2 v0.16b, v17.8h, #7 \n"
-
- MEMACCESS(0)
- "st1 {v0.4s}, [%0], #16 \n" // store pixels
- "add v5.4s, v5.4s, v6.4s \n"
- "subs %w2, %w2, #4 \n" // 4 processed per loop
- "b.gt 1b \n"
- : "+r"(dst_argb), // %0
- "+r"(src_argb), // %1
- "+r"(dst_width64), // %2
- "+r"(x64), // %3
- "+r"(dx64), // %4
- "+r"(tmp), // %5
- "+r"(src_tmp) // %6
- :
- : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5",
- "v6", "v7", "v16", "v17", "v18", "v19"
- );
-}
-
-#undef LOAD2_DATA32_LANE
-
-#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/third_party/aom/third_party/libyuv/source/scale_win.cc b/third_party/aom/third_party/libyuv/source/scale_win.cc
deleted file mode 100644
index c3896ebad..000000000
--- a/third_party/aom/third_party/libyuv/source/scale_win.cc
+++ /dev/null
@@ -1,1354 +0,0 @@
-/*
- * Copyright 2013 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/row.h"
-#include "libyuv/scale_row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// This module is for Visual C x86.
-#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \
- defined(_MSC_VER) && !defined(__clang__)
-
-// Offsets for source bytes 0 to 9
-static uvec8 kShuf0 =
- { 0, 1, 3, 4, 5, 7, 8, 9, 128, 128, 128, 128, 128, 128, 128, 128 };
-
-// Offsets for source bytes 11 to 20 with 8 subtracted = 3 to 12.
-static uvec8 kShuf1 =
- { 3, 4, 5, 7, 8, 9, 11, 12, 128, 128, 128, 128, 128, 128, 128, 128 };
-
-// Offsets for source bytes 21 to 31 with 16 subtracted = 5 to 31.
-static uvec8 kShuf2 =
- { 5, 7, 8, 9, 11, 12, 13, 15, 128, 128, 128, 128, 128, 128, 128, 128 };
-
-// Offsets for source bytes 0 to 10
-static uvec8 kShuf01 =
- { 0, 1, 1, 2, 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10 };
-
-// Offsets for source bytes 10 to 21 with 8 subtracted = 3 to 13.
-static uvec8 kShuf11 =
- { 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13 };
-
-// Offsets for source bytes 21 to 31 with 16 subtracted = 5 to 31.
-static uvec8 kShuf21 =
- { 5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13, 13, 14, 14, 15 };
-
-// Coefficients for source bytes 0 to 10
-static uvec8 kMadd01 =
- { 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2 };
-
-// Coefficients for source bytes 10 to 21
-static uvec8 kMadd11 =
- { 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1 };
-
-// Coefficients for source bytes 21 to 31
-static uvec8 kMadd21 =
- { 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3 };
-
-// Coefficients for source bytes 21 to 31
-static vec16 kRound34 =
- { 2, 2, 2, 2, 2, 2, 2, 2 };
-
-static uvec8 kShuf38a =
- { 0, 3, 6, 8, 11, 14, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 };
-
-static uvec8 kShuf38b =
- { 128, 128, 128, 128, 128, 128, 0, 3, 6, 8, 11, 14, 128, 128, 128, 128 };
-
-// Arrange words 0,3,6 into 0,1,2
-static uvec8 kShufAc =
- { 0, 1, 6, 7, 12, 13, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 };
-
-// Arrange words 0,3,6 into 3,4,5
-static uvec8 kShufAc3 =
- { 128, 128, 128, 128, 128, 128, 0, 1, 6, 7, 12, 13, 128, 128, 128, 128 };
-
-// Scaling values for boxes of 3x3 and 2x3
-static uvec16 kScaleAc33 =
- { 65536 / 9, 65536 / 9, 65536 / 6, 65536 / 9, 65536 / 9, 65536 / 6, 0, 0 };
-
-// Arrange first value for pixels 0,1,2,3,4,5
-static uvec8 kShufAb0 =
- { 0, 128, 3, 128, 6, 128, 8, 128, 11, 128, 14, 128, 128, 128, 128, 128 };
-
-// Arrange second value for pixels 0,1,2,3,4,5
-static uvec8 kShufAb1 =
- { 1, 128, 4, 128, 7, 128, 9, 128, 12, 128, 15, 128, 128, 128, 128, 128 };
-
-// Arrange third value for pixels 0,1,2,3,4,5
-static uvec8 kShufAb2 =
- { 2, 128, 5, 128, 128, 128, 10, 128, 13, 128, 128, 128, 128, 128, 128, 128 };
-
-// Scaling values for boxes of 3x2 and 2x2
-static uvec16 kScaleAb2 =
- { 65536 / 3, 65536 / 3, 65536 / 2, 65536 / 3, 65536 / 3, 65536 / 2, 0, 0 };
-
-// Reads 32 pixels, throws half away and writes 16 pixels.
-__declspec(naked)
-void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- __asm {
- mov eax, [esp + 4] // src_ptr
- // src_stride ignored
- mov edx, [esp + 12] // dst_ptr
- mov ecx, [esp + 16] // dst_width
-
- wloop:
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- lea eax, [eax + 32]
- psrlw xmm0, 8 // isolate odd pixels.
- psrlw xmm1, 8
- packuswb xmm0, xmm1
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- sub ecx, 16
- jg wloop
-
- ret
- }
-}
-
-// Blends 32x1 rectangle to 16x1.
-__declspec(naked)
-void ScaleRowDown2Linear_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- __asm {
- mov eax, [esp + 4] // src_ptr
- // src_stride
- mov edx, [esp + 12] // dst_ptr
- mov ecx, [esp + 16] // dst_width
- pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
- psrlw xmm5, 8
-
- wloop:
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- lea eax, [eax + 32]
-
- movdqa xmm2, xmm0 // average columns (32 to 16 pixels)
- psrlw xmm0, 8
- movdqa xmm3, xmm1
- psrlw xmm1, 8
- pand xmm2, xmm5
- pand xmm3, xmm5
- pavgw xmm0, xmm2
- pavgw xmm1, xmm3
- packuswb xmm0, xmm1
-
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- sub ecx, 16
- jg wloop
-
- ret
- }
-}
-
-// Blends 32x2 rectangle to 16x1.
-__declspec(naked)
-void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // src_ptr
- mov esi, [esp + 4 + 8] // src_stride
- mov edx, [esp + 4 + 12] // dst_ptr
- mov ecx, [esp + 4 + 16] // dst_width
- pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
- psrlw xmm5, 8
-
- wloop:
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- movdqu xmm2, [eax + esi]
- movdqu xmm3, [eax + esi + 16]
- lea eax, [eax + 32]
- pavgb xmm0, xmm2 // average rows
- pavgb xmm1, xmm3
-
- movdqa xmm2, xmm0 // average columns (32 to 16 pixels)
- psrlw xmm0, 8
- movdqa xmm3, xmm1
- psrlw xmm1, 8
- pand xmm2, xmm5
- pand xmm3, xmm5
- pavgw xmm0, xmm2
- pavgw xmm1, xmm3
- packuswb xmm0, xmm1
-
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- sub ecx, 16
- jg wloop
-
- pop esi
- ret
- }
-}
-
-#ifdef HAS_SCALEROWDOWN2_AVX2
-// Reads 64 pixels, throws half away and writes 32 pixels.
-__declspec(naked)
-void ScaleRowDown2_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- __asm {
- mov eax, [esp + 4] // src_ptr
- // src_stride ignored
- mov edx, [esp + 12] // dst_ptr
- mov ecx, [esp + 16] // dst_width
-
- wloop:
- vmovdqu ymm0, [eax]
- vmovdqu ymm1, [eax + 32]
- lea eax, [eax + 64]
- vpsrlw ymm0, ymm0, 8 // isolate odd pixels.
- vpsrlw ymm1, ymm1, 8
- vpackuswb ymm0, ymm0, ymm1
- vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb
- vmovdqu [edx], ymm0
- lea edx, [edx + 32]
- sub ecx, 32
- jg wloop
-
- vzeroupper
- ret
- }
-}
-
-// Blends 64x1 rectangle to 32x1.
-__declspec(naked)
-void ScaleRowDown2Linear_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- __asm {
- mov eax, [esp + 4] // src_ptr
- // src_stride
- mov edx, [esp + 12] // dst_ptr
- mov ecx, [esp + 16] // dst_width
-
- vpcmpeqb ymm4, ymm4, ymm4 // '1' constant, 8b
- vpsrlw ymm4, ymm4, 15
- vpackuswb ymm4, ymm4, ymm4
- vpxor ymm5, ymm5, ymm5 // constant 0
-
- wloop:
- vmovdqu ymm0, [eax]
- vmovdqu ymm1, [eax + 32]
- lea eax, [eax + 64]
-
- vpmaddubsw ymm0, ymm0, ymm4 // average horizontally
- vpmaddubsw ymm1, ymm1, ymm4
- vpavgw ymm0, ymm0, ymm5 // (x + 1) / 2
- vpavgw ymm1, ymm1, ymm5
- vpackuswb ymm0, ymm0, ymm1
- vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb
-
- vmovdqu [edx], ymm0
- lea edx, [edx + 32]
- sub ecx, 32
- jg wloop
-
- vzeroupper
- ret
- }
-}
-
-// Blends 64x2 rectangle to 32x1.
-__declspec(naked)
-void ScaleRowDown2Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // src_ptr
- mov esi, [esp + 4 + 8] // src_stride
- mov edx, [esp + 4 + 12] // dst_ptr
- mov ecx, [esp + 4 + 16] // dst_width
-
- vpcmpeqb ymm4, ymm4, ymm4 // '1' constant, 8b
- vpsrlw ymm4, ymm4, 15
- vpackuswb ymm4, ymm4, ymm4
- vpxor ymm5, ymm5, ymm5 // constant 0
-
- wloop:
- vmovdqu ymm0, [eax] // average rows
- vmovdqu ymm1, [eax + 32]
- vpavgb ymm0, ymm0, [eax + esi]
- vpavgb ymm1, ymm1, [eax + esi + 32]
- lea eax, [eax + 64]
-
- vpmaddubsw ymm0, ymm0, ymm4 // average horizontally
- vpmaddubsw ymm1, ymm1, ymm4
- vpavgw ymm0, ymm0, ymm5 // (x + 1) / 2
- vpavgw ymm1, ymm1, ymm5
- vpackuswb ymm0, ymm0, ymm1
- vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb
-
- vmovdqu [edx], ymm0
- lea edx, [edx + 32]
- sub ecx, 32
- jg wloop
-
- pop esi
- vzeroupper
- ret
- }
-}
-#endif // HAS_SCALEROWDOWN2_AVX2
-
-// Point samples 32 pixels to 8 pixels.
-__declspec(naked)
-void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- __asm {
- mov eax, [esp + 4] // src_ptr
- // src_stride ignored
- mov edx, [esp + 12] // dst_ptr
- mov ecx, [esp + 16] // dst_width
- pcmpeqb xmm5, xmm5 // generate mask 0x00ff0000
- psrld xmm5, 24
- pslld xmm5, 16
-
- wloop:
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- lea eax, [eax + 32]
- pand xmm0, xmm5
- pand xmm1, xmm5
- packuswb xmm0, xmm1
- psrlw xmm0, 8
- packuswb xmm0, xmm0
- movq qword ptr [edx], xmm0
- lea edx, [edx + 8]
- sub ecx, 8
- jg wloop
-
- ret
- }
-}
-
-// Blends 32x4 rectangle to 8x1.
-__declspec(naked)
-void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // src_ptr
- mov esi, [esp + 8 + 8] // src_stride
- mov edx, [esp + 8 + 12] // dst_ptr
- mov ecx, [esp + 8 + 16] // dst_width
- lea edi, [esi + esi * 2] // src_stride * 3
- pcmpeqb xmm7, xmm7 // generate mask 0x00ff00ff
- psrlw xmm7, 8
-
- wloop:
- movdqu xmm0, [eax] // average rows
- movdqu xmm1, [eax + 16]
- movdqu xmm2, [eax + esi]
- movdqu xmm3, [eax + esi + 16]
- pavgb xmm0, xmm2
- pavgb xmm1, xmm3
- movdqu xmm2, [eax + esi * 2]
- movdqu xmm3, [eax + esi * 2 + 16]
- movdqu xmm4, [eax + edi]
- movdqu xmm5, [eax + edi + 16]
- lea eax, [eax + 32]
- pavgb xmm2, xmm4
- pavgb xmm3, xmm5
- pavgb xmm0, xmm2
- pavgb xmm1, xmm3
-
- movdqa xmm2, xmm0 // average columns (32 to 16 pixels)
- psrlw xmm0, 8
- movdqa xmm3, xmm1
- psrlw xmm1, 8
- pand xmm2, xmm7
- pand xmm3, xmm7
- pavgw xmm0, xmm2
- pavgw xmm1, xmm3
- packuswb xmm0, xmm1
-
- movdqa xmm2, xmm0 // average columns (16 to 8 pixels)
- psrlw xmm0, 8
- pand xmm2, xmm7
- pavgw xmm0, xmm2
- packuswb xmm0, xmm0
-
- movq qword ptr [edx], xmm0
- lea edx, [edx + 8]
- sub ecx, 8
- jg wloop
-
- pop edi
- pop esi
- ret
- }
-}
-
-#ifdef HAS_SCALEROWDOWN4_AVX2
-// Point samples 64 pixels to 16 pixels.
-__declspec(naked)
-void ScaleRowDown4_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- __asm {
- mov eax, [esp + 4] // src_ptr
- // src_stride ignored
- mov edx, [esp + 12] // dst_ptr
- mov ecx, [esp + 16] // dst_width
- vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff0000
- vpsrld ymm5, ymm5, 24
- vpslld ymm5, ymm5, 16
-
- wloop:
- vmovdqu ymm0, [eax]
- vmovdqu ymm1, [eax + 32]
- lea eax, [eax + 64]
- vpand ymm0, ymm0, ymm5
- vpand ymm1, ymm1, ymm5
- vpackuswb ymm0, ymm0, ymm1
- vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb
- vpsrlw ymm0, ymm0, 8
- vpackuswb ymm0, ymm0, ymm0
- vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb
- vmovdqu [edx], xmm0
- lea edx, [edx + 16]
- sub ecx, 16
- jg wloop
-
- vzeroupper
- ret
- }
-}
-
-// Blends 64x4 rectangle to 16x1.
-__declspec(naked)
-void ScaleRowDown4Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- __asm {
- push esi
- push edi
- mov eax, [esp + 8 + 4] // src_ptr
- mov esi, [esp + 8 + 8] // src_stride
- mov edx, [esp + 8 + 12] // dst_ptr
- mov ecx, [esp + 8 + 16] // dst_width
- lea edi, [esi + esi * 2] // src_stride * 3
- vpcmpeqb ymm7, ymm7, ymm7 // generate mask 0x00ff00ff
- vpsrlw ymm7, ymm7, 8
-
- wloop:
- vmovdqu ymm0, [eax] // average rows
- vmovdqu ymm1, [eax + 32]
- vpavgb ymm0, ymm0, [eax + esi]
- vpavgb ymm1, ymm1, [eax + esi + 32]
- vmovdqu ymm2, [eax + esi * 2]
- vmovdqu ymm3, [eax + esi * 2 + 32]
- vpavgb ymm2, ymm2, [eax + edi]
- vpavgb ymm3, ymm3, [eax + edi + 32]
- lea eax, [eax + 64]
- vpavgb ymm0, ymm0, ymm2
- vpavgb ymm1, ymm1, ymm3
-
- vpand ymm2, ymm0, ymm7 // average columns (64 to 32 pixels)
- vpand ymm3, ymm1, ymm7
- vpsrlw ymm0, ymm0, 8
- vpsrlw ymm1, ymm1, 8
- vpavgw ymm0, ymm0, ymm2
- vpavgw ymm1, ymm1, ymm3
- vpackuswb ymm0, ymm0, ymm1
- vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb
-
- vpand ymm2, ymm0, ymm7 // average columns (32 to 16 pixels)
- vpsrlw ymm0, ymm0, 8
- vpavgw ymm0, ymm0, ymm2
- vpackuswb ymm0, ymm0, ymm0
- vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb
-
- vmovdqu [edx], xmm0
- lea edx, [edx + 16]
- sub ecx, 16
- jg wloop
-
- pop edi
- pop esi
- vzeroupper
- ret
- }
-}
-#endif // HAS_SCALEROWDOWN4_AVX2
-
-// Point samples 32 pixels to 24 pixels.
-// Produces three 8 byte values. For each 8 bytes, 16 bytes are read.
-// Then shuffled to do the scaling.
-
-__declspec(naked)
-void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- __asm {
- mov eax, [esp + 4] // src_ptr
- // src_stride ignored
- mov edx, [esp + 12] // dst_ptr
- mov ecx, [esp + 16] // dst_width
- movdqa xmm3, kShuf0
- movdqa xmm4, kShuf1
- movdqa xmm5, kShuf2
-
- wloop:
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- lea eax, [eax + 32]
- movdqa xmm2, xmm1
- palignr xmm1, xmm0, 8
- pshufb xmm0, xmm3
- pshufb xmm1, xmm4
- pshufb xmm2, xmm5
- movq qword ptr [edx], xmm0
- movq qword ptr [edx + 8], xmm1
- movq qword ptr [edx + 16], xmm2
- lea edx, [edx + 24]
- sub ecx, 24
- jg wloop
-
- ret
- }
-}
-
-// Blends 32x2 rectangle to 24x1
-// Produces three 8 byte values. For each 8 bytes, 16 bytes are read.
-// Then shuffled to do the scaling.
-
-// Register usage:
-// xmm0 src_row 0
-// xmm1 src_row 1
-// xmm2 shuf 0
-// xmm3 shuf 1
-// xmm4 shuf 2
-// xmm5 madd 0
-// xmm6 madd 1
-// xmm7 kRound34
-
-// Note that movdqa+palign may be better than movdqu.
-__declspec(naked)
-void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // src_ptr
- mov esi, [esp + 4 + 8] // src_stride
- mov edx, [esp + 4 + 12] // dst_ptr
- mov ecx, [esp + 4 + 16] // dst_width
- movdqa xmm2, kShuf01
- movdqa xmm3, kShuf11
- movdqa xmm4, kShuf21
- movdqa xmm5, kMadd01
- movdqa xmm6, kMadd11
- movdqa xmm7, kRound34
-
- wloop:
- movdqu xmm0, [eax] // pixels 0..7
- movdqu xmm1, [eax + esi]
- pavgb xmm0, xmm1
- pshufb xmm0, xmm2
- pmaddubsw xmm0, xmm5
- paddsw xmm0, xmm7
- psrlw xmm0, 2
- packuswb xmm0, xmm0
- movq qword ptr [edx], xmm0
- movdqu xmm0, [eax + 8] // pixels 8..15
- movdqu xmm1, [eax + esi + 8]
- pavgb xmm0, xmm1
- pshufb xmm0, xmm3
- pmaddubsw xmm0, xmm6
- paddsw xmm0, xmm7
- psrlw xmm0, 2
- packuswb xmm0, xmm0
- movq qword ptr [edx + 8], xmm0
- movdqu xmm0, [eax + 16] // pixels 16..23
- movdqu xmm1, [eax + esi + 16]
- lea eax, [eax + 32]
- pavgb xmm0, xmm1
- pshufb xmm0, xmm4
- movdqa xmm1, kMadd21
- pmaddubsw xmm0, xmm1
- paddsw xmm0, xmm7
- psrlw xmm0, 2
- packuswb xmm0, xmm0
- movq qword ptr [edx + 16], xmm0
- lea edx, [edx + 24]
- sub ecx, 24
- jg wloop
-
- pop esi
- ret
- }
-}
-
-// Note that movdqa+palign may be better than movdqu.
-__declspec(naked)
-void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // src_ptr
- mov esi, [esp + 4 + 8] // src_stride
- mov edx, [esp + 4 + 12] // dst_ptr
- mov ecx, [esp + 4 + 16] // dst_width
- movdqa xmm2, kShuf01
- movdqa xmm3, kShuf11
- movdqa xmm4, kShuf21
- movdqa xmm5, kMadd01
- movdqa xmm6, kMadd11
- movdqa xmm7, kRound34
-
- wloop:
- movdqu xmm0, [eax] // pixels 0..7
- movdqu xmm1, [eax + esi]
- pavgb xmm1, xmm0
- pavgb xmm0, xmm1
- pshufb xmm0, xmm2
- pmaddubsw xmm0, xmm5
- paddsw xmm0, xmm7
- psrlw xmm0, 2
- packuswb xmm0, xmm0
- movq qword ptr [edx], xmm0
- movdqu xmm0, [eax + 8] // pixels 8..15
- movdqu xmm1, [eax + esi + 8]
- pavgb xmm1, xmm0
- pavgb xmm0, xmm1
- pshufb xmm0, xmm3
- pmaddubsw xmm0, xmm6
- paddsw xmm0, xmm7
- psrlw xmm0, 2
- packuswb xmm0, xmm0
- movq qword ptr [edx + 8], xmm0
- movdqu xmm0, [eax + 16] // pixels 16..23
- movdqu xmm1, [eax + esi + 16]
- lea eax, [eax + 32]
- pavgb xmm1, xmm0
- pavgb xmm0, xmm1
- pshufb xmm0, xmm4
- movdqa xmm1, kMadd21
- pmaddubsw xmm0, xmm1
- paddsw xmm0, xmm7
- psrlw xmm0, 2
- packuswb xmm0, xmm0
- movq qword ptr [edx + 16], xmm0
- lea edx, [edx+24]
- sub ecx, 24
- jg wloop
-
- pop esi
- ret
- }
-}
-
-// 3/8 point sampler
-
-// Scale 32 pixels to 12
-__declspec(naked)
-void ScaleRowDown38_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- __asm {
- mov eax, [esp + 4] // src_ptr
- // src_stride ignored
- mov edx, [esp + 12] // dst_ptr
- mov ecx, [esp + 16] // dst_width
- movdqa xmm4, kShuf38a
- movdqa xmm5, kShuf38b
-
- xloop:
- movdqu xmm0, [eax] // 16 pixels -> 0,1,2,3,4,5
- movdqu xmm1, [eax + 16] // 16 pixels -> 6,7,8,9,10,11
- lea eax, [eax + 32]
- pshufb xmm0, xmm4
- pshufb xmm1, xmm5
- paddusb xmm0, xmm1
-
- movq qword ptr [edx], xmm0 // write 12 pixels
- movhlps xmm1, xmm0
- movd [edx + 8], xmm1
- lea edx, [edx + 12]
- sub ecx, 12
- jg xloop
-
- ret
- }
-}
-
-// Scale 16x3 pixels to 6x1 with interpolation
-__declspec(naked)
-void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // src_ptr
- mov esi, [esp + 4 + 8] // src_stride
- mov edx, [esp + 4 + 12] // dst_ptr
- mov ecx, [esp + 4 + 16] // dst_width
- movdqa xmm2, kShufAc
- movdqa xmm3, kShufAc3
- movdqa xmm4, kScaleAc33
- pxor xmm5, xmm5
-
- xloop:
- movdqu xmm0, [eax] // sum up 3 rows into xmm0/1
- movdqu xmm6, [eax + esi]
- movhlps xmm1, xmm0
- movhlps xmm7, xmm6
- punpcklbw xmm0, xmm5
- punpcklbw xmm1, xmm5
- punpcklbw xmm6, xmm5
- punpcklbw xmm7, xmm5
- paddusw xmm0, xmm6
- paddusw xmm1, xmm7
- movdqu xmm6, [eax + esi * 2]
- lea eax, [eax + 16]
- movhlps xmm7, xmm6
- punpcklbw xmm6, xmm5
- punpcklbw xmm7, xmm5
- paddusw xmm0, xmm6
- paddusw xmm1, xmm7
-
- movdqa xmm6, xmm0 // 8 pixels -> 0,1,2 of xmm6
- psrldq xmm0, 2
- paddusw xmm6, xmm0
- psrldq xmm0, 2
- paddusw xmm6, xmm0
- pshufb xmm6, xmm2
-
- movdqa xmm7, xmm1 // 8 pixels -> 3,4,5 of xmm6
- psrldq xmm1, 2
- paddusw xmm7, xmm1
- psrldq xmm1, 2
- paddusw xmm7, xmm1
- pshufb xmm7, xmm3
- paddusw xmm6, xmm7
-
- pmulhuw xmm6, xmm4 // divide by 9,9,6, 9,9,6
- packuswb xmm6, xmm6
-
- movd [edx], xmm6 // write 6 pixels
- psrlq xmm6, 16
- movd [edx + 2], xmm6
- lea edx, [edx + 6]
- sub ecx, 6
- jg xloop
-
- pop esi
- ret
- }
-}
-
-// Scale 16x2 pixels to 6x1 with interpolation
-__declspec(naked)
-void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
- ptrdiff_t src_stride,
- uint8* dst_ptr, int dst_width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // src_ptr
- mov esi, [esp + 4 + 8] // src_stride
- mov edx, [esp + 4 + 12] // dst_ptr
- mov ecx, [esp + 4 + 16] // dst_width
- movdqa xmm2, kShufAb0
- movdqa xmm3, kShufAb1
- movdqa xmm4, kShufAb2
- movdqa xmm5, kScaleAb2
-
- xloop:
- movdqu xmm0, [eax] // average 2 rows into xmm0
- movdqu xmm1, [eax + esi]
- lea eax, [eax + 16]
- pavgb xmm0, xmm1
-
- movdqa xmm1, xmm0 // 16 pixels -> 0,1,2,3,4,5 of xmm1
- pshufb xmm1, xmm2
- movdqa xmm6, xmm0
- pshufb xmm6, xmm3
- paddusw xmm1, xmm6
- pshufb xmm0, xmm4
- paddusw xmm1, xmm0
-
- pmulhuw xmm1, xmm5 // divide by 3,3,2, 3,3,2
- packuswb xmm1, xmm1
-
- movd [edx], xmm1 // write 6 pixels
- psrlq xmm1, 16
- movd [edx + 2], xmm1
- lea edx, [edx + 6]
- sub ecx, 6
- jg xloop
-
- pop esi
- ret
- }
-}
-
-// Reads 16 bytes and accumulates to 16 shorts at a time.
-__declspec(naked)
-void ScaleAddRow_SSE2(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
- __asm {
- mov eax, [esp + 4] // src_ptr
- mov edx, [esp + 8] // dst_ptr
- mov ecx, [esp + 12] // src_width
- pxor xmm5, xmm5
-
- // sum rows
- xloop:
- movdqu xmm3, [eax] // read 16 bytes
- lea eax, [eax + 16]
- movdqu xmm0, [edx] // read 16 words from destination
- movdqu xmm1, [edx + 16]
- movdqa xmm2, xmm3
- punpcklbw xmm2, xmm5
- punpckhbw xmm3, xmm5
- paddusw xmm0, xmm2 // sum 16 words
- paddusw xmm1, xmm3
- movdqu [edx], xmm0 // write 16 words to destination
- movdqu [edx + 16], xmm1
- lea edx, [edx + 32]
- sub ecx, 16
- jg xloop
- ret
- }
-}
-
-#ifdef HAS_SCALEADDROW_AVX2
-// Reads 32 bytes and accumulates to 32 shorts at a time.
-__declspec(naked)
-void ScaleAddRow_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
- __asm {
- mov eax, [esp + 4] // src_ptr
- mov edx, [esp + 8] // dst_ptr
- mov ecx, [esp + 12] // src_width
- vpxor ymm5, ymm5, ymm5
-
- // sum rows
- xloop:
- vmovdqu ymm3, [eax] // read 32 bytes
- lea eax, [eax + 32]
- vpermq ymm3, ymm3, 0xd8 // unmutate for vpunpck
- vpunpcklbw ymm2, ymm3, ymm5
- vpunpckhbw ymm3, ymm3, ymm5
- vpaddusw ymm0, ymm2, [edx] // sum 16 words
- vpaddusw ymm1, ymm3, [edx + 32]
- vmovdqu [edx], ymm0 // write 32 words to destination
- vmovdqu [edx + 32], ymm1
- lea edx, [edx + 64]
- sub ecx, 32
- jg xloop
-
- vzeroupper
- ret
- }
-}
-#endif // HAS_SCALEADDROW_AVX2
-
-// Bilinear column filtering. SSSE3 version.
-__declspec(naked)
-void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
- int dst_width, int x, int dx) {
- __asm {
- push ebx
- push esi
- push edi
- mov edi, [esp + 12 + 4] // dst_ptr
- mov esi, [esp + 12 + 8] // src_ptr
- mov ecx, [esp + 12 + 12] // dst_width
- movd xmm2, [esp + 12 + 16] // x
- movd xmm3, [esp + 12 + 20] // dx
- mov eax, 0x04040000 // shuffle to line up fractions with pixel.
- movd xmm5, eax
- pcmpeqb xmm6, xmm6 // generate 0x007f for inverting fraction.
- psrlw xmm6, 9
- pextrw eax, xmm2, 1 // get x0 integer. preroll
- sub ecx, 2
- jl xloop29
-
- movdqa xmm0, xmm2 // x1 = x0 + dx
- paddd xmm0, xmm3
- punpckldq xmm2, xmm0 // x0 x1
- punpckldq xmm3, xmm3 // dx dx
- paddd xmm3, xmm3 // dx * 2, dx * 2
- pextrw edx, xmm2, 3 // get x1 integer. preroll
-
- // 2 Pixel loop.
- xloop2:
- movdqa xmm1, xmm2 // x0, x1 fractions.
- paddd xmm2, xmm3 // x += dx
- movzx ebx, word ptr [esi + eax] // 2 source x0 pixels
- movd xmm0, ebx
- psrlw xmm1, 9 // 7 bit fractions.
- movzx ebx, word ptr [esi + edx] // 2 source x1 pixels
- movd xmm4, ebx
- pshufb xmm1, xmm5 // 0011
- punpcklwd xmm0, xmm4
- pxor xmm1, xmm6 // 0..7f and 7f..0
- pmaddubsw xmm0, xmm1 // 16 bit, 2 pixels.
- pextrw eax, xmm2, 1 // get x0 integer. next iteration.
- pextrw edx, xmm2, 3 // get x1 integer. next iteration.
- psrlw xmm0, 7 // 8.7 fixed point to low 8 bits.
- packuswb xmm0, xmm0 // 8 bits, 2 pixels.
- movd ebx, xmm0
- mov [edi], bx
- lea edi, [edi + 2]
- sub ecx, 2 // 2 pixels
- jge xloop2
-
- xloop29:
-
- add ecx, 2 - 1
- jl xloop99
-
- // 1 pixel remainder
- movzx ebx, word ptr [esi + eax] // 2 source x0 pixels
- movd xmm0, ebx
- psrlw xmm2, 9 // 7 bit fractions.
- pshufb xmm2, xmm5 // 0011
- pxor xmm2, xmm6 // 0..7f and 7f..0
- pmaddubsw xmm0, xmm2 // 16 bit
- psrlw xmm0, 7 // 8.7 fixed point to low 8 bits.
- packuswb xmm0, xmm0 // 8 bits
- movd ebx, xmm0
- mov [edi], bl
-
- xloop99:
-
- pop edi
- pop esi
- pop ebx
- ret
- }
-}
-
-// Reads 16 pixels, duplicates them and writes 32 pixels.
-__declspec(naked)
-void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr,
- int dst_width, int x, int dx) {
- __asm {
- mov edx, [esp + 4] // dst_ptr
- mov eax, [esp + 8] // src_ptr
- mov ecx, [esp + 12] // dst_width
-
- wloop:
- movdqu xmm0, [eax]
- lea eax, [eax + 16]
- movdqa xmm1, xmm0
- punpcklbw xmm0, xmm0
- punpckhbw xmm1, xmm1
- movdqu [edx], xmm0
- movdqu [edx + 16], xmm1
- lea edx, [edx + 32]
- sub ecx, 32
- jg wloop
-
- ret
- }
-}
-
-// Reads 8 pixels, throws half away and writes 4 even pixels (0, 2, 4, 6)
-__declspec(naked)
-void ScaleARGBRowDown2_SSE2(const uint8* src_argb,
- ptrdiff_t src_stride,
- uint8* dst_argb, int dst_width) {
- __asm {
- mov eax, [esp + 4] // src_argb
- // src_stride ignored
- mov edx, [esp + 12] // dst_argb
- mov ecx, [esp + 16] // dst_width
-
- wloop:
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- lea eax, [eax + 32]
- shufps xmm0, xmm1, 0xdd
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- sub ecx, 4
- jg wloop
-
- ret
- }
-}
-
-// Blends 8x1 rectangle to 4x1.
-__declspec(naked)
-void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb,
- ptrdiff_t src_stride,
- uint8* dst_argb, int dst_width) {
- __asm {
- mov eax, [esp + 4] // src_argb
- // src_stride ignored
- mov edx, [esp + 12] // dst_argb
- mov ecx, [esp + 16] // dst_width
-
- wloop:
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- lea eax, [eax + 32]
- movdqa xmm2, xmm0
- shufps xmm0, xmm1, 0x88 // even pixels
- shufps xmm2, xmm1, 0xdd // odd pixels
- pavgb xmm0, xmm2
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- sub ecx, 4
- jg wloop
-
- ret
- }
-}
-
-// Blends 8x2 rectangle to 4x1.
-__declspec(naked)
-void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb,
- ptrdiff_t src_stride,
- uint8* dst_argb, int dst_width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // src_argb
- mov esi, [esp + 4 + 8] // src_stride
- mov edx, [esp + 4 + 12] // dst_argb
- mov ecx, [esp + 4 + 16] // dst_width
-
- wloop:
- movdqu xmm0, [eax]
- movdqu xmm1, [eax + 16]
- movdqu xmm2, [eax + esi]
- movdqu xmm3, [eax + esi + 16]
- lea eax, [eax + 32]
- pavgb xmm0, xmm2 // average rows
- pavgb xmm1, xmm3
- movdqa xmm2, xmm0 // average columns (8 to 4 pixels)
- shufps xmm0, xmm1, 0x88 // even pixels
- shufps xmm2, xmm1, 0xdd // odd pixels
- pavgb xmm0, xmm2
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- sub ecx, 4
- jg wloop
-
- pop esi
- ret
- }
-}
-
-// Reads 4 pixels at a time.
-__declspec(naked)
-void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
- int src_stepx,
- uint8* dst_argb, int dst_width) {
- __asm {
- push ebx
- push edi
- mov eax, [esp + 8 + 4] // src_argb
- // src_stride ignored
- mov ebx, [esp + 8 + 12] // src_stepx
- mov edx, [esp + 8 + 16] // dst_argb
- mov ecx, [esp + 8 + 20] // dst_width
- lea ebx, [ebx * 4]
- lea edi, [ebx + ebx * 2]
-
- wloop:
- movd xmm0, [eax]
- movd xmm1, [eax + ebx]
- punpckldq xmm0, xmm1
- movd xmm2, [eax + ebx * 2]
- movd xmm3, [eax + edi]
- lea eax, [eax + ebx * 4]
- punpckldq xmm2, xmm3
- punpcklqdq xmm0, xmm2
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- sub ecx, 4
- jg wloop
-
- pop edi
- pop ebx
- ret
- }
-}
-
-// Blends four 2x2 to 4x1.
-__declspec(naked)
-void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
- ptrdiff_t src_stride,
- int src_stepx,
- uint8* dst_argb, int dst_width) {
- __asm {
- push ebx
- push esi
- push edi
- mov eax, [esp + 12 + 4] // src_argb
- mov esi, [esp + 12 + 8] // src_stride
- mov ebx, [esp + 12 + 12] // src_stepx
- mov edx, [esp + 12 + 16] // dst_argb
- mov ecx, [esp + 12 + 20] // dst_width
- lea esi, [eax + esi] // row1 pointer
- lea ebx, [ebx * 4]
- lea edi, [ebx + ebx * 2]
-
- wloop:
- movq xmm0, qword ptr [eax] // row0 4 pairs
- movhps xmm0, qword ptr [eax + ebx]
- movq xmm1, qword ptr [eax + ebx * 2]
- movhps xmm1, qword ptr [eax + edi]
- lea eax, [eax + ebx * 4]
- movq xmm2, qword ptr [esi] // row1 4 pairs
- movhps xmm2, qword ptr [esi + ebx]
- movq xmm3, qword ptr [esi + ebx * 2]
- movhps xmm3, qword ptr [esi + edi]
- lea esi, [esi + ebx * 4]
- pavgb xmm0, xmm2 // average rows
- pavgb xmm1, xmm3
- movdqa xmm2, xmm0 // average columns (8 to 4 pixels)
- shufps xmm0, xmm1, 0x88 // even pixels
- shufps xmm2, xmm1, 0xdd // odd pixels
- pavgb xmm0, xmm2
- movdqu [edx], xmm0
- lea edx, [edx + 16]
- sub ecx, 4
- jg wloop
-
- pop edi
- pop esi
- pop ebx
- ret
- }
-}
-
-// Column scaling unfiltered. SSE2 version.
-__declspec(naked)
-void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx) {
- __asm {
- push edi
- push esi
- mov edi, [esp + 8 + 4] // dst_argb
- mov esi, [esp + 8 + 8] // src_argb
- mov ecx, [esp + 8 + 12] // dst_width
- movd xmm2, [esp + 8 + 16] // x
- movd xmm3, [esp + 8 + 20] // dx
-
- pshufd xmm2, xmm2, 0 // x0 x0 x0 x0
- pshufd xmm0, xmm3, 0x11 // dx 0 dx 0
- paddd xmm2, xmm0
- paddd xmm3, xmm3 // 0, 0, 0, dx * 2
- pshufd xmm0, xmm3, 0x05 // dx * 2, dx * 2, 0, 0
- paddd xmm2, xmm0 // x3 x2 x1 x0
- paddd xmm3, xmm3 // 0, 0, 0, dx * 4
- pshufd xmm3, xmm3, 0 // dx * 4, dx * 4, dx * 4, dx * 4
-
- pextrw eax, xmm2, 1 // get x0 integer.
- pextrw edx, xmm2, 3 // get x1 integer.
-
- cmp ecx, 0
- jle xloop99
- sub ecx, 4
- jl xloop49
-
- // 4 Pixel loop.
- xloop4:
- movd xmm0, [esi + eax * 4] // 1 source x0 pixels
- movd xmm1, [esi + edx * 4] // 1 source x1 pixels
- pextrw eax, xmm2, 5 // get x2 integer.
- pextrw edx, xmm2, 7 // get x3 integer.
- paddd xmm2, xmm3 // x += dx
- punpckldq xmm0, xmm1 // x0 x1
-
- movd xmm1, [esi + eax * 4] // 1 source x2 pixels
- movd xmm4, [esi + edx * 4] // 1 source x3 pixels
- pextrw eax, xmm2, 1 // get x0 integer. next iteration.
- pextrw edx, xmm2, 3 // get x1 integer. next iteration.
- punpckldq xmm1, xmm4 // x2 x3
- punpcklqdq xmm0, xmm1 // x0 x1 x2 x3
- movdqu [edi], xmm0
- lea edi, [edi + 16]
- sub ecx, 4 // 4 pixels
- jge xloop4
-
- xloop49:
- test ecx, 2
- je xloop29
-
- // 2 Pixels.
- movd xmm0, [esi + eax * 4] // 1 source x0 pixels
- movd xmm1, [esi + edx * 4] // 1 source x1 pixels
- pextrw eax, xmm2, 5 // get x2 integer.
- punpckldq xmm0, xmm1 // x0 x1
-
- movq qword ptr [edi], xmm0
- lea edi, [edi + 8]
-
- xloop29:
- test ecx, 1
- je xloop99
-
- // 1 Pixels.
- movd xmm0, [esi + eax * 4] // 1 source x2 pixels
- movd dword ptr [edi], xmm0
- xloop99:
-
- pop esi
- pop edi
- ret
- }
-}
-
-// Bilinear row filtering combines 2x1 -> 1x1. SSSE3 version.
-// TODO(fbarchard): Port to Neon
-
-// Shuffle table for arranging 2 pixels into pairs for pmaddubsw
-static uvec8 kShuffleColARGB = {
- 0u, 4u, 1u, 5u, 2u, 6u, 3u, 7u, // bbggrraa 1st pixel
- 8u, 12u, 9u, 13u, 10u, 14u, 11u, 15u // bbggrraa 2nd pixel
-};
-
-// Shuffle table for duplicating 2 fractions into 8 bytes each
-static uvec8 kShuffleFractions = {
- 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 4u, 4u, 4u, 4u, 4u, 4u, 4u, 4u,
-};
-
-__declspec(naked)
-void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx) {
- __asm {
- push esi
- push edi
- mov edi, [esp + 8 + 4] // dst_argb
- mov esi, [esp + 8 + 8] // src_argb
- mov ecx, [esp + 8 + 12] // dst_width
- movd xmm2, [esp + 8 + 16] // x
- movd xmm3, [esp + 8 + 20] // dx
- movdqa xmm4, kShuffleColARGB
- movdqa xmm5, kShuffleFractions
- pcmpeqb xmm6, xmm6 // generate 0x007f for inverting fraction.
- psrlw xmm6, 9
- pextrw eax, xmm2, 1 // get x0 integer. preroll
- sub ecx, 2
- jl xloop29
-
- movdqa xmm0, xmm2 // x1 = x0 + dx
- paddd xmm0, xmm3
- punpckldq xmm2, xmm0 // x0 x1
- punpckldq xmm3, xmm3 // dx dx
- paddd xmm3, xmm3 // dx * 2, dx * 2
- pextrw edx, xmm2, 3 // get x1 integer. preroll
-
- // 2 Pixel loop.
- xloop2:
- movdqa xmm1, xmm2 // x0, x1 fractions.
- paddd xmm2, xmm3 // x += dx
- movq xmm0, qword ptr [esi + eax * 4] // 2 source x0 pixels
- psrlw xmm1, 9 // 7 bit fractions.
- movhps xmm0, qword ptr [esi + edx * 4] // 2 source x1 pixels
- pshufb xmm1, xmm5 // 0000000011111111
- pshufb xmm0, xmm4 // arrange pixels into pairs
- pxor xmm1, xmm6 // 0..7f and 7f..0
- pmaddubsw xmm0, xmm1 // argb_argb 16 bit, 2 pixels.
- pextrw eax, xmm2, 1 // get x0 integer. next iteration.
- pextrw edx, xmm2, 3 // get x1 integer. next iteration.
- psrlw xmm0, 7 // argb 8.7 fixed point to low 8 bits.
- packuswb xmm0, xmm0 // argb_argb 8 bits, 2 pixels.
- movq qword ptr [edi], xmm0
- lea edi, [edi + 8]
- sub ecx, 2 // 2 pixels
- jge xloop2
-
- xloop29:
-
- add ecx, 2 - 1
- jl xloop99
-
- // 1 pixel remainder
- psrlw xmm2, 9 // 7 bit fractions.
- movq xmm0, qword ptr [esi + eax * 4] // 2 source x0 pixels
- pshufb xmm2, xmm5 // 00000000
- pshufb xmm0, xmm4 // arrange pixels into pairs
- pxor xmm2, xmm6 // 0..7f and 7f..0
- pmaddubsw xmm0, xmm2 // argb 16 bit, 1 pixel.
- psrlw xmm0, 7
- packuswb xmm0, xmm0 // argb 8 bits, 1 pixel.
- movd [edi], xmm0
-
- xloop99:
-
- pop edi
- pop esi
- ret
- }
-}
-
-// Reads 4 pixels, duplicates them and writes 8 pixels.
-__declspec(naked)
-void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb,
- int dst_width, int x, int dx) {
- __asm {
- mov edx, [esp + 4] // dst_argb
- mov eax, [esp + 8] // src_argb
- mov ecx, [esp + 12] // dst_width
-
- wloop:
- movdqu xmm0, [eax]
- lea eax, [eax + 16]
- movdqa xmm1, xmm0
- punpckldq xmm0, xmm0
- punpckhdq xmm1, xmm1
- movdqu [edx], xmm0
- movdqu [edx + 16], xmm1
- lea edx, [edx + 32]
- sub ecx, 8
- jg wloop
-
- ret
- }
-}
-
-// Divide num by div and return as 16.16 fixed point result.
-__declspec(naked)
-int FixedDiv_X86(int num, int div) {
- __asm {
- mov eax, [esp + 4] // num
- cdq // extend num to 64 bits
- shld edx, eax, 16 // 32.16
- shl eax, 16
- idiv dword ptr [esp + 8]
- ret
- }
-}
-
-// Divide num by div and return as 16.16 fixed point result.
-__declspec(naked)
-int FixedDiv1_X86(int num, int div) {
- __asm {
- mov eax, [esp + 4] // num
- mov ecx, [esp + 8] // denom
- cdq // extend num to 64 bits
- shld edx, eax, 16 // 32.16
- shl eax, 16
- sub eax, 0x00010001
- sbb edx, 0
- sub ecx, 1
- idiv ecx
- ret
- }
-}
-#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
diff --git a/third_party/aom/third_party/libyuv/source/video_common.cc b/third_party/aom/third_party/libyuv/source/video_common.cc
deleted file mode 100644
index 379a0669a..000000000
--- a/third_party/aom/third_party/libyuv/source/video_common.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "libyuv/video_common.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-#define ARRAY_SIZE(x) (int)(sizeof(x) / sizeof(x[0]))
-
-struct FourCCAliasEntry {
- uint32 alias;
- uint32 canonical;
-};
-
-static const struct FourCCAliasEntry kFourCCAliases[] = {
- {FOURCC_IYUV, FOURCC_I420},
- {FOURCC_YU16, FOURCC_I422},
- {FOURCC_YU24, FOURCC_I444},
- {FOURCC_YUYV, FOURCC_YUY2},
- {FOURCC_YUVS, FOURCC_YUY2}, // kCMPixelFormat_422YpCbCr8_yuvs
- {FOURCC_HDYC, FOURCC_UYVY},
- {FOURCC_2VUY, FOURCC_UYVY}, // kCMPixelFormat_422YpCbCr8
- {FOURCC_JPEG, FOURCC_MJPG}, // Note: JPEG has DHT while MJPG does not.
- {FOURCC_DMB1, FOURCC_MJPG},
- {FOURCC_BA81, FOURCC_BGGR}, // deprecated.
- {FOURCC_RGB3, FOURCC_RAW },
- {FOURCC_BGR3, FOURCC_24BG},
- {FOURCC_CM32, FOURCC_BGRA}, // kCMPixelFormat_32ARGB
- {FOURCC_CM24, FOURCC_RAW }, // kCMPixelFormat_24RGB
- {FOURCC_L555, FOURCC_RGBO}, // kCMPixelFormat_16LE555
- {FOURCC_L565, FOURCC_RGBP}, // kCMPixelFormat_16LE565
- {FOURCC_5551, FOURCC_RGBO}, // kCMPixelFormat_16LE5551
-};
-// TODO(fbarchard): Consider mapping kCMPixelFormat_32BGRA to FOURCC_ARGB.
-// {FOURCC_BGRA, FOURCC_ARGB}, // kCMPixelFormat_32BGRA
-
-LIBYUV_API
-uint32 CanonicalFourCC(uint32 fourcc) {
- int i;
- for (i = 0; i < ARRAY_SIZE(kFourCCAliases); ++i) {
- if (kFourCCAliases[i].alias == fourcc) {
- return kFourCCAliases[i].canonical;
- }
- }
- // Not an alias, so return it as-is.
- return fourcc;
-}
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
-
diff --git a/third_party/aom/third_party/libyuv/source/x86inc.asm b/third_party/aom/third_party/libyuv/source/x86inc.asm
deleted file mode 100644
index cb5c32df3..000000000
--- a/third_party/aom/third_party/libyuv/source/x86inc.asm
+++ /dev/null
@@ -1,1136 +0,0 @@
-;*****************************************************************************
-;* x86inc.asm: x264asm abstraction layer
-;*****************************************************************************
-;* Copyright (C) 2005-2012 x264 project
-;*
-;* Authors: Loren Merritt <lorenm@u.washington.edu>
-;* Anton Mitrofanov <BugMaster@narod.ru>
-;* Jason Garrett-Glaser <darkshikari@gmail.com>
-;* Henrik Gramner <hengar-6@student.ltu.se>
-;*
-;* Permission to use, copy, modify, and/or distribute this software for any
-;* purpose with or without fee is hereby granted, provided that the above
-;* copyright notice and this permission notice appear in all copies.
-;*
-;* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-;* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-;* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-;* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-;* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-;* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-;* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-;*****************************************************************************
-
-; This is a header file for the x264ASM assembly language, which uses
-; NASM/YASM syntax combined with a large number of macros to provide easy
-; abstraction between different calling conventions (x86_32, win64, linux64).
-; It also has various other useful features to simplify writing the kind of
-; DSP functions that are most often used in x264.
-
-; Unlike the rest of x264, this file is available under an ISC license, as it
-; has significant usefulness outside of x264 and we want it to be available
-; to the largest audience possible. Of course, if you modify it for your own
-; purposes to add a new feature, we strongly encourage contributing a patch
-; as this feature might be useful for others as well. Send patches or ideas
-; to x264-devel@videolan.org .
-
-; Local changes for libyuv:
-; remove %define program_name and references in labels
-; rename cpus to uppercase
-
-%define WIN64 0
-%define UNIX64 0
-%if ARCH_X86_64
- %ifidn __OUTPUT_FORMAT__,win32
- %define WIN64 1
- %elifidn __OUTPUT_FORMAT__,win64
- %define WIN64 1
- %else
- %define UNIX64 1
- %endif
-%endif
-
-%ifdef PREFIX
- %define mangle(x) _ %+ x
-%else
- %define mangle(x) x
-%endif
-
-; Name of the .rodata section.
-; Kludge: Something on OS X fails to align .rodata even given an align attribute,
-; so use a different read-only section.
-%macro SECTION_RODATA 0-1 16
- %ifidn __OUTPUT_FORMAT__,macho64
- SECTION .text align=%1
- %elifidn __OUTPUT_FORMAT__,macho
- SECTION .text align=%1
- fakegot:
- %elifidn __OUTPUT_FORMAT__,aout
- section .text
- %else
- SECTION .rodata align=%1
- %endif
-%endmacro
-
-; aout does not support align=
-%macro SECTION_TEXT 0-1 16
- %ifidn __OUTPUT_FORMAT__,aout
- SECTION .text
- %else
- SECTION .text align=%1
- %endif
-%endmacro
-
-%if WIN64
- %define PIC
-%elif ARCH_X86_64 == 0
-; x86_32 doesn't require PIC.
-; Some distros prefer shared objects to be PIC, but nothing breaks if
-; the code contains a few textrels, so we'll skip that complexity.
- %undef PIC
-%endif
-%ifdef PIC
- default rel
-%endif
-
-; Always use long nops (reduces 0x90 spam in disassembly on x86_32)
-CPU amdnop
-
-; Macros to eliminate most code duplication between x86_32 and x86_64:
-; Currently this works only for leaf functions which load all their arguments
-; into registers at the start, and make no other use of the stack. Luckily that
-; covers most of x264's asm.
-
-; PROLOGUE:
-; %1 = number of arguments. loads them from stack if needed.
-; %2 = number of registers used. pushes callee-saved regs if needed.
-; %3 = number of xmm registers used. pushes callee-saved xmm regs if needed.
-; %4 = list of names to define to registers
-; PROLOGUE can also be invoked by adding the same options to cglobal
-
-; e.g.
-; cglobal foo, 2,3,0, dst, src, tmp
-; declares a function (foo), taking two args (dst and src) and one local variable (tmp)
-
-; TODO Some functions can use some args directly from the stack. If they're the
-; last args then you can just not declare them, but if they're in the middle
-; we need more flexible macro.
-
-; RET:
-; Pops anything that was pushed by PROLOGUE, and returns.
-
-; REP_RET:
-; Same, but if it doesn't pop anything it becomes a 2-byte ret, for athlons
-; which are slow when a normal ret follows a branch.
-
-; registers:
-; rN and rNq are the native-size register holding function argument N
-; rNd, rNw, rNb are dword, word, and byte size
-; rNh is the high 8 bits of the word size
-; rNm is the original location of arg N (a register or on the stack), dword
-; rNmp is native size
-
-%macro DECLARE_REG 2-3
- %define r%1q %2
- %define r%1d %2d
- %define r%1w %2w
- %define r%1b %2b
- %define r%1h %2h
- %if %0 == 2
- %define r%1m %2d
- %define r%1mp %2
- %elif ARCH_X86_64 ; memory
- %define r%1m [rsp + stack_offset + %3]
- %define r%1mp qword r %+ %1m
- %else
- %define r%1m [esp + stack_offset + %3]
- %define r%1mp dword r %+ %1m
- %endif
- %define r%1 %2
-%endmacro
-
-%macro DECLARE_REG_SIZE 3
- %define r%1q r%1
- %define e%1q r%1
- %define r%1d e%1
- %define e%1d e%1
- %define r%1w %1
- %define e%1w %1
- %define r%1h %3
- %define e%1h %3
- %define r%1b %2
- %define e%1b %2
-%if ARCH_X86_64 == 0
- %define r%1 e%1
-%endif
-%endmacro
-
-DECLARE_REG_SIZE ax, al, ah
-DECLARE_REG_SIZE bx, bl, bh
-DECLARE_REG_SIZE cx, cl, ch
-DECLARE_REG_SIZE dx, dl, dh
-DECLARE_REG_SIZE si, sil, null
-DECLARE_REG_SIZE di, dil, null
-DECLARE_REG_SIZE bp, bpl, null
-
-; t# defines for when per-arch register allocation is more complex than just function arguments
-
-%macro DECLARE_REG_TMP 1-*
- %assign %%i 0
- %rep %0
- CAT_XDEFINE t, %%i, r%1
- %assign %%i %%i+1
- %rotate 1
- %endrep
-%endmacro
-
-%macro DECLARE_REG_TMP_SIZE 0-*
- %rep %0
- %define t%1q t%1 %+ q
- %define t%1d t%1 %+ d
- %define t%1w t%1 %+ w
- %define t%1h t%1 %+ h
- %define t%1b t%1 %+ b
- %rotate 1
- %endrep
-%endmacro
-
-DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
-
-%if ARCH_X86_64
- %define gprsize 8
-%else
- %define gprsize 4
-%endif
-
-%macro PUSH 1
- push %1
- %assign stack_offset stack_offset+gprsize
-%endmacro
-
-%macro POP 1
- pop %1
- %assign stack_offset stack_offset-gprsize
-%endmacro
-
-%macro PUSH_IF_USED 1-*
- %rep %0
- %if %1 < regs_used
- PUSH r%1
- %endif
- %rotate 1
- %endrep
-%endmacro
-
-%macro POP_IF_USED 1-*
- %rep %0
- %if %1 < regs_used
- pop r%1
- %endif
- %rotate 1
- %endrep
-%endmacro
-
-%macro LOAD_IF_USED 1-*
- %rep %0
- %if %1 < num_args
- mov r%1, r %+ %1 %+ mp
- %endif
- %rotate 1
- %endrep
-%endmacro
-
-%macro SUB 2
- sub %1, %2
- %ifidn %1, rsp
- %assign stack_offset stack_offset+(%2)
- %endif
-%endmacro
-
-%macro ADD 2
- add %1, %2
- %ifidn %1, rsp
- %assign stack_offset stack_offset-(%2)
- %endif
-%endmacro
-
-%macro movifnidn 2
- %ifnidn %1, %2
- mov %1, %2
- %endif
-%endmacro
-
-%macro movsxdifnidn 2
- %ifnidn %1, %2
- movsxd %1, %2
- %endif
-%endmacro
-
-%macro ASSERT 1
- %if (%1) == 0
- %error assert failed
- %endif
-%endmacro
-
-%macro DEFINE_ARGS 0-*
- %ifdef n_arg_names
- %assign %%i 0
- %rep n_arg_names
- CAT_UNDEF arg_name %+ %%i, q
- CAT_UNDEF arg_name %+ %%i, d
- CAT_UNDEF arg_name %+ %%i, w
- CAT_UNDEF arg_name %+ %%i, h
- CAT_UNDEF arg_name %+ %%i, b
- CAT_UNDEF arg_name %+ %%i, m
- CAT_UNDEF arg_name %+ %%i, mp
- CAT_UNDEF arg_name, %%i
- %assign %%i %%i+1
- %endrep
- %endif
-
- %xdefine %%stack_offset stack_offset
- %undef stack_offset ; so that the current value of stack_offset doesn't get baked in by xdefine
- %assign %%i 0
- %rep %0
- %xdefine %1q r %+ %%i %+ q
- %xdefine %1d r %+ %%i %+ d
- %xdefine %1w r %+ %%i %+ w
- %xdefine %1h r %+ %%i %+ h
- %xdefine %1b r %+ %%i %+ b
- %xdefine %1m r %+ %%i %+ m
- %xdefine %1mp r %+ %%i %+ mp
- CAT_XDEFINE arg_name, %%i, %1
- %assign %%i %%i+1
- %rotate 1
- %endrep
- %xdefine stack_offset %%stack_offset
- %assign n_arg_names %0
-%endmacro
-
-%if WIN64 ; Windows x64 ;=================================================
-
-DECLARE_REG 0, rcx
-DECLARE_REG 1, rdx
-DECLARE_REG 2, R8
-DECLARE_REG 3, R9
-DECLARE_REG 4, R10, 40
-DECLARE_REG 5, R11, 48
-DECLARE_REG 6, rax, 56
-DECLARE_REG 7, rdi, 64
-DECLARE_REG 8, rsi, 72
-DECLARE_REG 9, rbx, 80
-DECLARE_REG 10, rbp, 88
-DECLARE_REG 11, R12, 96
-DECLARE_REG 12, R13, 104
-DECLARE_REG 13, R14, 112
-DECLARE_REG 14, R15, 120
-
-%macro PROLOGUE 2-4+ 0 ; #args, #regs, #xmm_regs, arg_names...
- %assign num_args %1
- %assign regs_used %2
- ASSERT regs_used >= num_args
- ASSERT regs_used <= 15
- PUSH_IF_USED 7, 8, 9, 10, 11, 12, 13, 14
- %if mmsize == 8
- %assign xmm_regs_used 0
- %else
- WIN64_SPILL_XMM %3
- %endif
- LOAD_IF_USED 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14
- DEFINE_ARGS %4
-%endmacro
-
-%macro WIN64_SPILL_XMM 1
- %assign xmm_regs_used %1
- ASSERT xmm_regs_used <= 16
- %if xmm_regs_used > 6
- SUB rsp, (xmm_regs_used-6)*16+16
- %assign %%i xmm_regs_used
- %rep (xmm_regs_used-6)
- %assign %%i %%i-1
- movdqa [rsp + (%%i-6)*16+(~stack_offset&8)], xmm %+ %%i
- %endrep
- %endif
-%endmacro
-
-%macro WIN64_RESTORE_XMM_INTERNAL 1
- %if xmm_regs_used > 6
- %assign %%i xmm_regs_used
- %rep (xmm_regs_used-6)
- %assign %%i %%i-1
- movdqa xmm %+ %%i, [%1 + (%%i-6)*16+(~stack_offset&8)]
- %endrep
- add %1, (xmm_regs_used-6)*16+16
- %endif
-%endmacro
-
-%macro WIN64_RESTORE_XMM 1
- WIN64_RESTORE_XMM_INTERNAL %1
- %assign stack_offset stack_offset-(xmm_regs_used-6)*16+16
- %assign xmm_regs_used 0
-%endmacro
-
-%define has_epilogue regs_used > 7 || xmm_regs_used > 6 || mmsize == 32
-
-%macro RET 0
- WIN64_RESTORE_XMM_INTERNAL rsp
- POP_IF_USED 14, 13, 12, 11, 10, 9, 8, 7
-%if mmsize == 32
- vzeroupper
-%endif
- ret
-%endmacro
-
-%elif ARCH_X86_64 ; *nix x64 ;=============================================
-
-DECLARE_REG 0, rdi
-DECLARE_REG 1, rsi
-DECLARE_REG 2, rdx
-DECLARE_REG 3, rcx
-DECLARE_REG 4, R8
-DECLARE_REG 5, R9
-DECLARE_REG 6, rax, 8
-DECLARE_REG 7, R10, 16
-DECLARE_REG 8, R11, 24
-DECLARE_REG 9, rbx, 32
-DECLARE_REG 10, rbp, 40
-DECLARE_REG 11, R12, 48
-DECLARE_REG 12, R13, 56
-DECLARE_REG 13, R14, 64
-DECLARE_REG 14, R15, 72
-
-%macro PROLOGUE 2-4+ ; #args, #regs, #xmm_regs, arg_names...
- %assign num_args %1
- %assign regs_used %2
- ASSERT regs_used >= num_args
- ASSERT regs_used <= 15
- PUSH_IF_USED 9, 10, 11, 12, 13, 14
- LOAD_IF_USED 6, 7, 8, 9, 10, 11, 12, 13, 14
- DEFINE_ARGS %4
-%endmacro
-
-%define has_epilogue regs_used > 9 || mmsize == 32
-
-%macro RET 0
- POP_IF_USED 14, 13, 12, 11, 10, 9
-%if mmsize == 32
- vzeroupper
-%endif
- ret
-%endmacro
-
-%else ; X86_32 ;==============================================================
-
-DECLARE_REG 0, eax, 4
-DECLARE_REG 1, ecx, 8
-DECLARE_REG 2, edx, 12
-DECLARE_REG 3, ebx, 16
-DECLARE_REG 4, esi, 20
-DECLARE_REG 5, edi, 24
-DECLARE_REG 6, ebp, 28
-%define rsp esp
-
-%macro DECLARE_ARG 1-*
- %rep %0
- %define r%1m [esp + stack_offset + 4*%1 + 4]
- %define r%1mp dword r%1m
- %rotate 1
- %endrep
-%endmacro
-
-DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
-
-%macro PROLOGUE 2-4+ ; #args, #regs, #xmm_regs, arg_names...
- %assign num_args %1
- %assign regs_used %2
- %if regs_used > 7
- %assign regs_used 7
- %endif
- ASSERT regs_used >= num_args
- PUSH_IF_USED 3, 4, 5, 6
- LOAD_IF_USED 0, 1, 2, 3, 4, 5, 6
- DEFINE_ARGS %4
-%endmacro
-
-%define has_epilogue regs_used > 3 || mmsize == 32
-
-%macro RET 0
- POP_IF_USED 6, 5, 4, 3
-%if mmsize == 32
- vzeroupper
-%endif
- ret
-%endmacro
-
-%endif ;======================================================================
-
-%if WIN64 == 0
-%macro WIN64_SPILL_XMM 1
-%endmacro
-%macro WIN64_RESTORE_XMM 1
-%endmacro
-%endif
-
-%macro REP_RET 0
- %if has_epilogue
- RET
- %else
- rep ret
- %endif
-%endmacro
-
-%macro TAIL_CALL 2 ; callee, is_nonadjacent
- %if has_epilogue
- call %1
- RET
- %elif %2
- jmp %1
- %endif
-%endmacro
-
-;=============================================================================
-; arch-independent part
-;=============================================================================
-
-%assign function_align 16
-
-; Begin a function.
-; Applies any symbol mangling needed for C linkage, and sets up a define such that
-; subsequent uses of the function name automatically refer to the mangled version.
-; Appends cpuflags to the function name if cpuflags has been specified.
-%macro cglobal 1-2+ ; name, [PROLOGUE args]
-%if %0 == 1
- cglobal_internal %1 %+ SUFFIX
-%else
- cglobal_internal %1 %+ SUFFIX, %2
-%endif
-%endmacro
-%macro cglobal_internal 1-2+
- %ifndef cglobaled_%1
- %xdefine %1 mangle(%1)
- %xdefine %1.skip_prologue %1 %+ .skip_prologue
- CAT_XDEFINE cglobaled_, %1, 1
- %endif
- %xdefine current_function %1
- %ifidn __OUTPUT_FORMAT__,elf
- global %1:function hidden
- %else
- global %1
- %endif
- align function_align
- %1:
- RESET_MM_PERMUTATION ; not really needed, but makes disassembly somewhat nicer
- %assign stack_offset 0
- %if %0 > 1
- PROLOGUE %2
- %endif
-%endmacro
-
-%macro cextern 1
- %xdefine %1 mangle(%1)
- CAT_XDEFINE cglobaled_, %1, 1
- extern %1
-%endmacro
-
-; like cextern, but without the prefix
-%macro cextern_naked 1
- %xdefine %1 mangle(%1)
- CAT_XDEFINE cglobaled_, %1, 1
- extern %1
-%endmacro
-
-%macro const 2+
- %xdefine %1 mangle(%1)
- global %1
- %1: %2
-%endmacro
-
-; This is needed for ELF, otherwise the GNU linker assumes the stack is
-; executable by default.
-%ifidn __OUTPUT_FORMAT__,elf
-SECTION .note.GNU-stack noalloc noexec nowrite progbits
-%endif
-%ifidn __OUTPUT_FORMAT__,elf32
-section .note.GNU-stack noalloc noexec nowrite progbits
-%endif
-%ifidn __OUTPUT_FORMAT__,elf64
-section .note.GNU-stack noalloc noexec nowrite progbits
-%endif
-
-; cpuflags
-
-%assign cpuflags_MMX (1<<0)
-%assign cpuflags_MMX2 (1<<1) | cpuflags_MMX
-%assign cpuflags_3dnow (1<<2) | cpuflags_MMX
-%assign cpuflags_3dnow2 (1<<3) | cpuflags_3dnow
-%assign cpuflags_SSE (1<<4) | cpuflags_MMX2
-%assign cpuflags_SSE2 (1<<5) | cpuflags_SSE
-%assign cpuflags_SSE2slow (1<<6) | cpuflags_SSE2
-%assign cpuflags_SSE3 (1<<7) | cpuflags_SSE2
-%assign cpuflags_SSSE3 (1<<8) | cpuflags_SSE3
-%assign cpuflags_SSE4 (1<<9) | cpuflags_SSSE3
-%assign cpuflags_SSE42 (1<<10)| cpuflags_SSE4
-%assign cpuflags_AVX (1<<11)| cpuflags_SSE42
-%assign cpuflags_xop (1<<12)| cpuflags_AVX
-%assign cpuflags_fma4 (1<<13)| cpuflags_AVX
-%assign cpuflags_AVX2 (1<<14)| cpuflags_AVX
-%assign cpuflags_fma3 (1<<15)| cpuflags_AVX
-
-%assign cpuflags_cache32 (1<<16)
-%assign cpuflags_cache64 (1<<17)
-%assign cpuflags_slowctz (1<<18)
-%assign cpuflags_lzcnt (1<<19)
-%assign cpuflags_misalign (1<<20)
-%assign cpuflags_aligned (1<<21) ; not a cpu feature, but a function variant
-%assign cpuflags_atom (1<<22)
-%assign cpuflags_bmi1 (1<<23)
-%assign cpuflags_bmi2 (1<<24)|cpuflags_bmi1
-%assign cpuflags_tbm (1<<25)|cpuflags_bmi1
-
-%define cpuflag(x) ((cpuflags & (cpuflags_ %+ x)) == (cpuflags_ %+ x))
-%define notcpuflag(x) ((cpuflags & (cpuflags_ %+ x)) != (cpuflags_ %+ x))
-
-; Takes up to 2 cpuflags from the above list.
-; All subsequent functions (up to the next INIT_CPUFLAGS) is built for the specified cpu.
-; You shouldn't need to invoke this macro directly, it's a subroutine for INIT_MMX &co.
-%macro INIT_CPUFLAGS 0-2
- %if %0 >= 1
- %xdefine cpuname %1
- %assign cpuflags cpuflags_%1
- %if %0 >= 2
- %xdefine cpuname %1_%2
- %assign cpuflags cpuflags | cpuflags_%2
- %endif
- %xdefine SUFFIX _ %+ cpuname
- %if cpuflag(AVX)
- %assign AVX_enabled 1
- %endif
- %if mmsize == 16 && notcpuflag(SSE2)
- %define mova movaps
- %define movu movups
- %define movnta movntps
- %endif
- %if cpuflag(aligned)
- %define movu mova
- %elifidn %1, SSE3
- %define movu lddqu
- %endif
- %else
- %xdefine SUFFIX
- %undef cpuname
- %undef cpuflags
- %endif
-%endmacro
-
-; merge MMX and SSE*
-
-%macro CAT_XDEFINE 3
- %xdefine %1%2 %3
-%endmacro
-
-%macro CAT_UNDEF 2
- %undef %1%2
-%endmacro
-
-%macro INIT_MMX 0-1+
- %assign AVX_enabled 0
- %define RESET_MM_PERMUTATION INIT_MMX %1
- %define mmsize 8
- %define num_mmregs 8
- %define mova movq
- %define movu movq
- %define movh movd
- %define movnta movntq
- %assign %%i 0
- %rep 8
- CAT_XDEFINE m, %%i, mm %+ %%i
- CAT_XDEFINE nmm, %%i, %%i
- %assign %%i %%i+1
- %endrep
- %rep 8
- CAT_UNDEF m, %%i
- CAT_UNDEF nmm, %%i
- %assign %%i %%i+1
- %endrep
- INIT_CPUFLAGS %1
-%endmacro
-
-%macro INIT_XMM 0-1+
- %assign AVX_enabled 0
- %define RESET_MM_PERMUTATION INIT_XMM %1
- %define mmsize 16
- %define num_mmregs 8
- %if ARCH_X86_64
- %define num_mmregs 16
- %endif
- %define mova movdqa
- %define movu movdqu
- %define movh movq
- %define movnta movntdq
- %assign %%i 0
- %rep num_mmregs
- CAT_XDEFINE m, %%i, xmm %+ %%i
- CAT_XDEFINE nxmm, %%i, %%i
- %assign %%i %%i+1
- %endrep
- INIT_CPUFLAGS %1
-%endmacro
-
-%macro INIT_YMM 0-1+
- %assign AVX_enabled 1
- %define RESET_MM_PERMUTATION INIT_YMM %1
- %define mmsize 32
- %define num_mmregs 8
- %if ARCH_X86_64
- %define num_mmregs 16
- %endif
- %define mova vmovaps
- %define movu vmovups
- %undef movh
- %define movnta vmovntps
- %assign %%i 0
- %rep num_mmregs
- CAT_XDEFINE m, %%i, ymm %+ %%i
- CAT_XDEFINE nymm, %%i, %%i
- %assign %%i %%i+1
- %endrep
- INIT_CPUFLAGS %1
-%endmacro
-
-INIT_XMM
-
-; I often want to use macros that permute their arguments. e.g. there's no
-; efficient way to implement butterfly or transpose or dct without swapping some
-; arguments.
-;
-; I would like to not have to manually keep track of the permutations:
-; If I insert a permutation in the middle of a function, it should automatically
-; change everything that follows. For more complex macros I may also have multiple
-; implementations, e.g. the SSE2 and SSSE3 versions may have different permutations.
-;
-; Hence these macros. Insert a PERMUTE or some SWAPs at the end of a macro that
-; permutes its arguments. It's equivalent to exchanging the contents of the
-; registers, except that this way you exchange the register names instead, so it
-; doesn't cost any cycles.
-
-%macro PERMUTE 2-* ; takes a list of pairs to swap
-%rep %0/2
- %xdefine tmp%2 m%2
- %xdefine ntmp%2 nm%2
- %rotate 2
-%endrep
-%rep %0/2
- %xdefine m%1 tmp%2
- %xdefine nm%1 ntmp%2
- %undef tmp%2
- %undef ntmp%2
- %rotate 2
-%endrep
-%endmacro
-
-%macro SWAP 2-* ; swaps a single chain (sometimes more concise than pairs)
-%rep %0-1
-%ifdef m%1
- %xdefine tmp m%1
- %xdefine m%1 m%2
- %xdefine m%2 tmp
- CAT_XDEFINE n, m%1, %1
- CAT_XDEFINE n, m%2, %2
-%else
- ; If we were called as "SWAP m0,m1" rather than "SWAP 0,1" infer the original numbers here.
- ; Be careful using this mode in nested macros though, as in some cases there may be
- ; other copies of m# that have already been dereferenced and don't get updated correctly.
- %xdefine %%n1 n %+ %1
- %xdefine %%n2 n %+ %2
- %xdefine tmp m %+ %%n1
- CAT_XDEFINE m, %%n1, m %+ %%n2
- CAT_XDEFINE m, %%n2, tmp
- CAT_XDEFINE n, m %+ %%n1, %%n1
- CAT_XDEFINE n, m %+ %%n2, %%n2
-%endif
- %undef tmp
- %rotate 1
-%endrep
-%endmacro
-
-; If SAVE_MM_PERMUTATION is placed at the end of a function, then any later
-; calls to that function will automatically load the permutation, so values can
-; be returned in mmregs.
-%macro SAVE_MM_PERMUTATION 0-1
- %if %0
- %xdefine %%f %1_m
- %else
- %xdefine %%f current_function %+ _m
- %endif
- %assign %%i 0
- %rep num_mmregs
- CAT_XDEFINE %%f, %%i, m %+ %%i
- %assign %%i %%i+1
- %endrep
-%endmacro
-
-%macro LOAD_MM_PERMUTATION 1 ; name to load from
- %ifdef %1_m0
- %assign %%i 0
- %rep num_mmregs
- CAT_XDEFINE m, %%i, %1_m %+ %%i
- CAT_XDEFINE n, m %+ %%i, %%i
- %assign %%i %%i+1
- %endrep
- %endif
-%endmacro
-
-; Append cpuflags to the callee's name iff the appended name is known and the plain name isn't
-%macro call 1
- call_internal %1, %1 %+ SUFFIX
-%endmacro
-%macro call_internal 2
- %xdefine %%i %1
- %ifndef cglobaled_%1
- %ifdef cglobaled_%2
- %xdefine %%i %2
- %endif
- %endif
- call %%i
- LOAD_MM_PERMUTATION %%i
-%endmacro
-
-; Substitutions that reduce instruction size but are functionally equivalent
-%macro add 2
- %ifnum %2
- %if %2==128
- sub %1, -128
- %else
- add %1, %2
- %endif
- %else
- add %1, %2
- %endif
-%endmacro
-
-%macro sub 2
- %ifnum %2
- %if %2==128
- add %1, -128
- %else
- sub %1, %2
- %endif
- %else
- sub %1, %2
- %endif
-%endmacro
-
-;=============================================================================
-; AVX abstraction layer
-;=============================================================================
-
-%assign i 0
-%rep 16
- %if i < 8
- CAT_XDEFINE sizeofmm, i, 8
- %endif
- CAT_XDEFINE sizeofxmm, i, 16
- CAT_XDEFINE sizeofymm, i, 32
-%assign i i+1
-%endrep
-%undef i
-
-%macro CHECK_AVX_INSTR_EMU 3-*
- %xdefine %%opcode %1
- %xdefine %%dst %2
- %rep %0-2
- %ifidn %%dst, %3
- %error non-AVX emulation of ``%%opcode'' is not supported
- %endif
- %rotate 1
- %endrep
-%endmacro
-
-;%1 == instruction
-;%2 == 1 if float, 0 if int
-;%3 == 1 if 4-operand (xmm, xmm, xmm, imm), 0 if 2- or 3-operand (xmm, xmm, xmm)
-;%4 == number of operands given
-;%5+: operands
-%macro RUN_AVX_INSTR 6-7+
- %ifid %6
- %define %%sizeofreg sizeof%6
- %elifid %5
- %define %%sizeofreg sizeof%5
- %else
- %define %%sizeofreg mmsize
- %endif
- %if %%sizeofreg==32
- %if %4>=3
- v%1 %5, %6, %7
- %else
- v%1 %5, %6
- %endif
- %else
- %if %%sizeofreg==8
- %define %%regmov movq
- %elif %2
- %define %%regmov movaps
- %else
- %define %%regmov movdqa
- %endif
-
- %if %4>=3+%3
- %ifnidn %5, %6
- %if AVX_enabled && %%sizeofreg==16
- v%1 %5, %6, %7
- %else
- CHECK_AVX_INSTR_EMU {%1 %5, %6, %7}, %5, %7
- %%regmov %5, %6
- %1 %5, %7
- %endif
- %else
- %1 %5, %7
- %endif
- %elif %4>=3
- %1 %5, %6, %7
- %else
- %1 %5, %6
- %endif
- %endif
-%endmacro
-
-; 3arg AVX ops with a memory arg can only have it in src2,
-; whereas SSE emulation of 3arg prefers to have it in src1 (i.e. the mov).
-; So, if the op is symmetric and the wrong one is memory, swap them.
-%macro RUN_AVX_INSTR1 8
- %assign %%swap 0
- %if AVX_enabled
- %ifnid %6
- %assign %%swap 1
- %endif
- %elifnidn %5, %6
- %ifnid %7
- %assign %%swap 1
- %endif
- %endif
- %if %%swap && %3 == 0 && %8 == 1
- RUN_AVX_INSTR %1, %2, %3, %4, %5, %7, %6
- %else
- RUN_AVX_INSTR %1, %2, %3, %4, %5, %6, %7
- %endif
-%endmacro
-
-;%1 == instruction
-;%2 == 1 if float, 0 if int
-;%3 == 1 if 4-operand (xmm, xmm, xmm, imm), 0 if 2- or 3-operand (xmm, xmm, xmm)
-;%4 == 1 if symmetric (i.e. doesn't matter which src arg is which), 0 if not
-%macro AVX_INSTR 4
- %macro %1 2-9 fnord, fnord, fnord, %1, %2, %3, %4
- %ifidn %3, fnord
- RUN_AVX_INSTR %6, %7, %8, 2, %1, %2
- %elifidn %4, fnord
- RUN_AVX_INSTR1 %6, %7, %8, 3, %1, %2, %3, %9
- %elifidn %5, fnord
- RUN_AVX_INSTR %6, %7, %8, 4, %1, %2, %3, %4
- %else
- RUN_AVX_INSTR %6, %7, %8, 5, %1, %2, %3, %4, %5
- %endif
- %endmacro
-%endmacro
-
-AVX_INSTR addpd, 1, 0, 1
-AVX_INSTR addps, 1, 0, 1
-AVX_INSTR addsd, 1, 0, 1
-AVX_INSTR addss, 1, 0, 1
-AVX_INSTR addsubpd, 1, 0, 0
-AVX_INSTR addsubps, 1, 0, 0
-AVX_INSTR andpd, 1, 0, 1
-AVX_INSTR andps, 1, 0, 1
-AVX_INSTR andnpd, 1, 0, 0
-AVX_INSTR andnps, 1, 0, 0
-AVX_INSTR blendpd, 1, 0, 0
-AVX_INSTR blendps, 1, 0, 0
-AVX_INSTR blendvpd, 1, 0, 0
-AVX_INSTR blendvps, 1, 0, 0
-AVX_INSTR cmppd, 1, 0, 0
-AVX_INSTR cmpps, 1, 0, 0
-AVX_INSTR cmpsd, 1, 0, 0
-AVX_INSTR cmpss, 1, 0, 0
-AVX_INSTR cvtdq2ps, 1, 0, 0
-AVX_INSTR cvtps2dq, 1, 0, 0
-AVX_INSTR divpd, 1, 0, 0
-AVX_INSTR divps, 1, 0, 0
-AVX_INSTR divsd, 1, 0, 0
-AVX_INSTR divss, 1, 0, 0
-AVX_INSTR dppd, 1, 1, 0
-AVX_INSTR dpps, 1, 1, 0
-AVX_INSTR haddpd, 1, 0, 0
-AVX_INSTR haddps, 1, 0, 0
-AVX_INSTR hsubpd, 1, 0, 0
-AVX_INSTR hsubps, 1, 0, 0
-AVX_INSTR maxpd, 1, 0, 1
-AVX_INSTR maxps, 1, 0, 1
-AVX_INSTR maxsd, 1, 0, 1
-AVX_INSTR maxss, 1, 0, 1
-AVX_INSTR minpd, 1, 0, 1
-AVX_INSTR minps, 1, 0, 1
-AVX_INSTR minsd, 1, 0, 1
-AVX_INSTR minss, 1, 0, 1
-AVX_INSTR movhlps, 1, 0, 0
-AVX_INSTR movlhps, 1, 0, 0
-AVX_INSTR movsd, 1, 0, 0
-AVX_INSTR movss, 1, 0, 0
-AVX_INSTR mpsadbw, 0, 1, 0
-AVX_INSTR mulpd, 1, 0, 1
-AVX_INSTR mulps, 1, 0, 1
-AVX_INSTR mulsd, 1, 0, 1
-AVX_INSTR mulss, 1, 0, 1
-AVX_INSTR orpd, 1, 0, 1
-AVX_INSTR orps, 1, 0, 1
-AVX_INSTR pabsb, 0, 0, 0
-AVX_INSTR pabsw, 0, 0, 0
-AVX_INSTR pabsd, 0, 0, 0
-AVX_INSTR packsswb, 0, 0, 0
-AVX_INSTR packssdw, 0, 0, 0
-AVX_INSTR packuswb, 0, 0, 0
-AVX_INSTR packusdw, 0, 0, 0
-AVX_INSTR paddb, 0, 0, 1
-AVX_INSTR paddw, 0, 0, 1
-AVX_INSTR paddd, 0, 0, 1
-AVX_INSTR paddq, 0, 0, 1
-AVX_INSTR paddsb, 0, 0, 1
-AVX_INSTR paddsw, 0, 0, 1
-AVX_INSTR paddusb, 0, 0, 1
-AVX_INSTR paddusw, 0, 0, 1
-AVX_INSTR palignr, 0, 1, 0
-AVX_INSTR pand, 0, 0, 1
-AVX_INSTR pandn, 0, 0, 0
-AVX_INSTR pavgb, 0, 0, 1
-AVX_INSTR pavgw, 0, 0, 1
-AVX_INSTR pblendvb, 0, 0, 0
-AVX_INSTR pblendw, 0, 1, 0
-AVX_INSTR pcmpestri, 0, 0, 0
-AVX_INSTR pcmpestrm, 0, 0, 0
-AVX_INSTR pcmpistri, 0, 0, 0
-AVX_INSTR pcmpistrm, 0, 0, 0
-AVX_INSTR pcmpeqb, 0, 0, 1
-AVX_INSTR pcmpeqw, 0, 0, 1
-AVX_INSTR pcmpeqd, 0, 0, 1
-AVX_INSTR pcmpeqq, 0, 0, 1
-AVX_INSTR pcmpgtb, 0, 0, 0
-AVX_INSTR pcmpgtw, 0, 0, 0
-AVX_INSTR pcmpgtd, 0, 0, 0
-AVX_INSTR pcmpgtq, 0, 0, 0
-AVX_INSTR phaddw, 0, 0, 0
-AVX_INSTR phaddd, 0, 0, 0
-AVX_INSTR phaddsw, 0, 0, 0
-AVX_INSTR phsubw, 0, 0, 0
-AVX_INSTR phsubd, 0, 0, 0
-AVX_INSTR phsubsw, 0, 0, 0
-AVX_INSTR pmaddwd, 0, 0, 1
-AVX_INSTR pmaddubsw, 0, 0, 0
-AVX_INSTR pmaxsb, 0, 0, 1
-AVX_INSTR pmaxsw, 0, 0, 1
-AVX_INSTR pmaxsd, 0, 0, 1
-AVX_INSTR pmaxub, 0, 0, 1
-AVX_INSTR pmaxuw, 0, 0, 1
-AVX_INSTR pmaxud, 0, 0, 1
-AVX_INSTR pminsb, 0, 0, 1
-AVX_INSTR pminsw, 0, 0, 1
-AVX_INSTR pminsd, 0, 0, 1
-AVX_INSTR pminub, 0, 0, 1
-AVX_INSTR pminuw, 0, 0, 1
-AVX_INSTR pminud, 0, 0, 1
-AVX_INSTR pmovmskb, 0, 0, 0
-AVX_INSTR pmulhuw, 0, 0, 1
-AVX_INSTR pmulhrsw, 0, 0, 1
-AVX_INSTR pmulhw, 0, 0, 1
-AVX_INSTR pmullw, 0, 0, 1
-AVX_INSTR pmulld, 0, 0, 1
-AVX_INSTR pmuludq, 0, 0, 1
-AVX_INSTR pmuldq, 0, 0, 1
-AVX_INSTR por, 0, 0, 1
-AVX_INSTR psadbw, 0, 0, 1
-AVX_INSTR pshufb, 0, 0, 0
-AVX_INSTR pshufd, 0, 1, 0
-AVX_INSTR pshufhw, 0, 1, 0
-AVX_INSTR pshuflw, 0, 1, 0
-AVX_INSTR psignb, 0, 0, 0
-AVX_INSTR psignw, 0, 0, 0
-AVX_INSTR psignd, 0, 0, 0
-AVX_INSTR psllw, 0, 0, 0
-AVX_INSTR pslld, 0, 0, 0
-AVX_INSTR psllq, 0, 0, 0
-AVX_INSTR pslldq, 0, 0, 0
-AVX_INSTR psraw, 0, 0, 0
-AVX_INSTR psrad, 0, 0, 0
-AVX_INSTR psrlw, 0, 0, 0
-AVX_INSTR psrld, 0, 0, 0
-AVX_INSTR psrlq, 0, 0, 0
-AVX_INSTR psrldq, 0, 0, 0
-AVX_INSTR psubb, 0, 0, 0
-AVX_INSTR psubw, 0, 0, 0
-AVX_INSTR psubd, 0, 0, 0
-AVX_INSTR psubq, 0, 0, 0
-AVX_INSTR psubsb, 0, 0, 0
-AVX_INSTR psubsw, 0, 0, 0
-AVX_INSTR psubusb, 0, 0, 0
-AVX_INSTR psubusw, 0, 0, 0
-AVX_INSTR ptest, 0, 0, 0
-AVX_INSTR punpckhbw, 0, 0, 0
-AVX_INSTR punpckhwd, 0, 0, 0
-AVX_INSTR punpckhdq, 0, 0, 0
-AVX_INSTR punpckhqdq, 0, 0, 0
-AVX_INSTR punpcklbw, 0, 0, 0
-AVX_INSTR punpcklwd, 0, 0, 0
-AVX_INSTR punpckldq, 0, 0, 0
-AVX_INSTR punpcklqdq, 0, 0, 0
-AVX_INSTR pxor, 0, 0, 1
-AVX_INSTR shufps, 1, 1, 0
-AVX_INSTR subpd, 1, 0, 0
-AVX_INSTR subps, 1, 0, 0
-AVX_INSTR subsd, 1, 0, 0
-AVX_INSTR subss, 1, 0, 0
-AVX_INSTR unpckhpd, 1, 0, 0
-AVX_INSTR unpckhps, 1, 0, 0
-AVX_INSTR unpcklpd, 1, 0, 0
-AVX_INSTR unpcklps, 1, 0, 0
-AVX_INSTR xorpd, 1, 0, 1
-AVX_INSTR xorps, 1, 0, 1
-
-; 3DNow instructions, for sharing code between AVX, SSE and 3DN
-AVX_INSTR pfadd, 1, 0, 1
-AVX_INSTR pfsub, 1, 0, 0
-AVX_INSTR pfmul, 1, 0, 1
-
-; base-4 constants for shuffles
-%assign i 0
-%rep 256
- %assign j ((i>>6)&3)*1000 + ((i>>4)&3)*100 + ((i>>2)&3)*10 + (i&3)
- %if j < 10
- CAT_XDEFINE q000, j, i
- %elif j < 100
- CAT_XDEFINE q00, j, i
- %elif j < 1000
- CAT_XDEFINE q0, j, i
- %else
- CAT_XDEFINE q, j, i
- %endif
-%assign i i+1
-%endrep
-%undef i
-%undef j
-
-%macro FMA_INSTR 3
- %macro %1 4-7 %1, %2, %3
- %if cpuflag(xop)
- v%5 %1, %2, %3, %4
- %else
- %6 %1, %2, %3
- %7 %1, %4
- %endif
- %endmacro
-%endmacro
-
-FMA_INSTR pmacsdd, pmulld, paddd
-FMA_INSTR pmacsww, pmullw, paddw
-FMA_INSTR pmadcswd, pmaddwd, paddd
-
-; tzcnt is equivalent to "rep bsf" and is backwards-compatible with bsf.
-; This lets us use tzcnt without bumping the yasm version requirement yet.
-%define tzcnt rep bsf
diff --git a/third_party/aom/third_party/vector/LICENSE b/third_party/aom/third_party/vector/LICENSE
deleted file mode 100644
index afcb9f00a..000000000
--- a/third_party/aom/third_party/vector/LICENSE
+++ /dev/null
@@ -1,19 +0,0 @@
-The MIT License (MIT)
-Copyright (c) 2016 Peter Goldsborough
-
-Permission is hereby granted, free of charge, to any person obtaining a copy of
-this software and associated documentation files (the "Software"), to deal in
-the Software without restriction, including without limitation the rights to
-use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
-the Software, and to permit persons to whom the Software is furnished to do so,
-subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
-FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
-COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
-IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/third_party/aom/third_party/vector/README.libaom b/third_party/aom/third_party/vector/README.libaom
deleted file mode 100644
index 2bb8b2d5d..000000000
--- a/third_party/aom/third_party/vector/README.libaom
+++ /dev/null
@@ -1,14 +0,0 @@
-Name: vector
-URL: https://github.com/goldsborough/vector
-Version: commit-id: 40efe82
-License: MIT
-License File: LICENSE
-
-Description:
-A feature-complete, generic and customizable resizable
-array implementation in pure C that supports almost
-the entire C++ std::vector API, including iterators.
-
-Local Modifications:
-Renamed some functions to fit in with the AOMedia
-naming convention.
diff --git a/third_party/aom/third_party/vector/vector.c b/third_party/aom/third_party/vector/vector.c
deleted file mode 100644
index fe46246a1..000000000
--- a/third_party/aom/third_party/vector/vector.c
+++ /dev/null
@@ -1,543 +0,0 @@
-/*
-The MIT License(MIT)
-Copyright(c) 2016 Peter Goldsborough
-
-Permission is hereby granted, free of charge, to any person obtaining a copy of
-this software and associated documentation files(the "Software"), to deal in
-the Software without restriction, including without limitation the rights to
-use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
-the Software, and to permit persons to whom the Software is furnished to do so,
-subject to the following conditions :
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
-FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE AUTHORS OR
-COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
-IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-*/
-
-#define __STDC_WANT_LIB_EXT1__ 1
-
-#include <assert.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "third_party/vector/vector.h"
-
-int aom_vector_setup(Vector *vector, size_t capacity, size_t element_size) {
- assert(vector != NULL);
-
- if (vector == NULL) return VECTOR_ERROR;
-
- vector->size = 0;
- vector->capacity = MAX(VECTOR_MINIMUM_CAPACITY, capacity);
- vector->element_size = element_size;
- vector->data = malloc(vector->capacity * element_size);
-
- return vector->data == NULL ? VECTOR_ERROR : VECTOR_SUCCESS;
-}
-
-int aom_vector_copy(Vector *destination, Vector *source) {
- assert(destination != NULL);
- assert(source != NULL);
- assert(aom_vector_is_initialized(source));
- assert(!aom_vector_is_initialized(destination));
-
- if (destination == NULL) return VECTOR_ERROR;
- if (source == NULL) return VECTOR_ERROR;
- if (aom_vector_is_initialized(destination)) return VECTOR_ERROR;
- if (!aom_vector_is_initialized(source)) return VECTOR_ERROR;
-
- /* Copy ALL the data */
- destination->size = source->size;
- destination->capacity = source->size * 2;
- destination->element_size = source->element_size;
-
- /* Note that we are not necessarily allocating the same capacity */
- destination->data = malloc(destination->capacity * source->element_size);
- if (destination->data == NULL) return VECTOR_ERROR;
-
- memcpy(destination->data, source->data, aom_vector_byte_size(source));
-
- return VECTOR_SUCCESS;
-}
-
-int aom_vector_copy_assign(Vector *destination, Vector *source) {
- assert(destination != NULL);
- assert(source != NULL);
- assert(aom_vector_is_initialized(source));
- assert(aom_vector_is_initialized(destination));
-
- if (destination == NULL) return VECTOR_ERROR;
- if (source == NULL) return VECTOR_ERROR;
- if (!aom_vector_is_initialized(destination)) return VECTOR_ERROR;
- if (!aom_vector_is_initialized(source)) return VECTOR_ERROR;
-
- aom_vector_destroy(destination);
-
- return aom_vector_copy(destination, source);
-}
-
-int aom_vector_move(Vector *destination, Vector *source) {
- assert(destination != NULL);
- assert(source != NULL);
-
- if (destination == NULL) return VECTOR_ERROR;
- if (source == NULL) return VECTOR_ERROR;
-
- *destination = *source;
- source->data = NULL;
-
- return VECTOR_SUCCESS;
-}
-
-int aom_vector_move_assign(Vector *destination, Vector *source) {
- aom_vector_swap(destination, source);
- return aom_vector_destroy(source);
-}
-
-int aom_vector_swap(Vector *destination, Vector *source) {
- void *temp;
-
- assert(destination != NULL);
- assert(source != NULL);
- assert(aom_vector_is_initialized(source));
- assert(aom_vector_is_initialized(destination));
-
- if (destination == NULL) return VECTOR_ERROR;
- if (source == NULL) return VECTOR_ERROR;
- if (!aom_vector_is_initialized(destination)) return VECTOR_ERROR;
- if (!aom_vector_is_initialized(source)) return VECTOR_ERROR;
-
- _vector_swap(&destination->size, &source->size);
- _vector_swap(&destination->capacity, &source->capacity);
- _vector_swap(&destination->element_size, &source->element_size);
-
- temp = destination->data;
- destination->data = source->data;
- source->data = temp;
-
- return VECTOR_SUCCESS;
-}
-
-int aom_vector_destroy(Vector *vector) {
- assert(vector != NULL);
-
- if (vector == NULL) return VECTOR_ERROR;
-
- free(vector->data);
- vector->data = NULL;
-
- return VECTOR_SUCCESS;
-}
-
-/* Insertion */
-int aom_vector_push_back(Vector *vector, void *element) {
- assert(vector != NULL);
- assert(element != NULL);
-
- if (_vector_should_grow(vector)) {
- if (_vector_adjust_capacity(vector) == VECTOR_ERROR) {
- return VECTOR_ERROR;
- }
- }
-
- _vector_assign(vector, vector->size, element);
-
- ++vector->size;
-
- return VECTOR_SUCCESS;
-}
-
-int aom_vector_push_front(Vector *vector, void *element) {
- return aom_vector_insert(vector, 0, element);
-}
-
-int aom_vector_insert(Vector *vector, size_t index, void *element) {
- void *offset;
-
- assert(vector != NULL);
- assert(element != NULL);
- assert(index <= vector->size);
-
- if (vector == NULL) return VECTOR_ERROR;
- if (element == NULL) return VECTOR_ERROR;
- if (vector->element_size == 0) return VECTOR_ERROR;
- if (index > vector->size) return VECTOR_ERROR;
-
- if (_vector_should_grow(vector)) {
- if (_vector_adjust_capacity(vector) == VECTOR_ERROR) {
- return VECTOR_ERROR;
- }
- }
-
- /* Move other elements to the right */
- if (_vector_move_right(vector, index) == VECTOR_ERROR) {
- return VECTOR_ERROR;
- }
-
- /* Insert the element */
- offset = _vector_offset(vector, index);
- memcpy(offset, element, vector->element_size);
- ++vector->size;
-
- return VECTOR_SUCCESS;
-}
-
-int aom_vector_assign(Vector *vector, size_t index, void *element) {
- assert(vector != NULL);
- assert(element != NULL);
- assert(index < vector->size);
-
- if (vector == NULL) return VECTOR_ERROR;
- if (element == NULL) return VECTOR_ERROR;
- if (vector->element_size == 0) return VECTOR_ERROR;
- if (index >= vector->size) return VECTOR_ERROR;
-
- _vector_assign(vector, index, element);
-
- return VECTOR_SUCCESS;
-}
-
-/* Deletion */
-int aom_vector_pop_back(Vector *vector) {
- assert(vector != NULL);
- assert(vector->size > 0);
-
- if (vector == NULL) return VECTOR_ERROR;
- if (vector->element_size == 0) return VECTOR_ERROR;
-
- --vector->size;
-
-#ifndef VECTOR_NO_SHRINK
- if (_vector_should_shrink(vector)) {
- _vector_adjust_capacity(vector);
- }
-#endif
-
- return VECTOR_SUCCESS;
-}
-
-int aom_vector_pop_front(Vector *vector) { return aom_vector_erase(vector, 0); }
-
-int aom_vector_erase(Vector *vector, size_t index) {
- assert(vector != NULL);
- assert(index < vector->size);
-
- if (vector == NULL) return VECTOR_ERROR;
- if (vector->element_size == 0) return VECTOR_ERROR;
- if (index >= vector->size) return VECTOR_ERROR;
-
- /* Just overwrite */
- _vector_move_left(vector, index);
-
-#ifndef VECTOR_NO_SHRINK
- if (--vector->size == vector->capacity / 4) {
- _vector_adjust_capacity(vector);
- }
-#endif
-
- return VECTOR_SUCCESS;
-}
-
-int aom_vector_clear(Vector *vector) { return aom_vector_resize(vector, 0); }
-
-/* Lookup */
-void *aom_vector_get(Vector *vector, size_t index) {
- assert(vector != NULL);
- assert(index < vector->size);
-
- if (vector == NULL) return NULL;
- if (vector->element_size == 0) return NULL;
- if (index >= vector->size) return NULL;
-
- return _vector_offset(vector, index);
-}
-
-const void *aom_vector_const_get(const Vector *vector, size_t index) {
- assert(vector != NULL);
- assert(index < vector->size);
-
- if (vector == NULL) return NULL;
- if (vector->element_size == 0) return NULL;
- if (index >= vector->size) return NULL;
-
- return _vector_const_offset(vector, index);
-}
-
-void *aom_vector_front(Vector *vector) { return aom_vector_get(vector, 0); }
-
-void *aom_vector_back(Vector *vector) {
- return aom_vector_get(vector, vector->size - 1);
-}
-
-/* Information */
-
-bool aom_vector_is_initialized(const Vector *vector) {
- return vector->data != NULL;
-}
-
-size_t aom_vector_byte_size(const Vector *vector) {
- return vector->size * vector->element_size;
-}
-
-size_t aom_vector_free_space(const Vector *vector) {
- return vector->capacity - vector->size;
-}
-
-bool aom_vector_is_empty(const Vector *vector) { return vector->size == 0; }
-
-/* Memory management */
-int aom_vector_resize(Vector *vector, size_t new_size) {
- if (new_size <= vector->capacity * VECTOR_SHRINK_THRESHOLD) {
- vector->size = new_size;
- if (_vector_reallocate(vector, new_size * VECTOR_GROWTH_FACTOR) == -1) {
- return VECTOR_ERROR;
- }
- } else if (new_size > vector->capacity) {
- if (_vector_reallocate(vector, new_size * VECTOR_GROWTH_FACTOR) == -1) {
- return VECTOR_ERROR;
- }
- }
-
- vector->size = new_size;
-
- return VECTOR_SUCCESS;
-}
-
-int aom_vector_reserve(Vector *vector, size_t minimum_capacity) {
- if (minimum_capacity > vector->capacity) {
- if (_vector_reallocate(vector, minimum_capacity) == VECTOR_ERROR) {
- return VECTOR_ERROR;
- }
- }
-
- return VECTOR_SUCCESS;
-}
-
-int aom_vector_shrink_to_fit(Vector *vector) {
- return _vector_reallocate(vector, vector->size);
-}
-
-/* Iterators */
-Iterator aom_vector_begin(Vector *vector) { return aom_vector_iterator(vector, 0); }
-
-Iterator aom_vector_end(Vector *vector) {
- return aom_vector_iterator(vector, vector->size);
-}
-
-Iterator aom_vector_iterator(Vector *vector, size_t index) {
- Iterator iterator = { NULL, 0 };
-
- assert(vector != NULL);
- assert(index <= vector->size);
-
- if (vector == NULL) return iterator;
- if (index > vector->size) return iterator;
- if (vector->element_size == 0) return iterator;
-
- iterator.pointer = _vector_offset(vector, index);
- iterator.element_size = vector->element_size;
-
- return iterator;
-}
-
-void *iterator_get(Iterator *iterator) { return iterator->pointer; }
-
-int iterator_erase(Vector *vector, Iterator *iterator) {
- size_t index = iterator_index(vector, iterator);
-
- if (aom_vector_erase(vector, index) == VECTOR_ERROR) {
- return VECTOR_ERROR;
- }
-
- *iterator = aom_vector_iterator(vector, index);
-
- return VECTOR_SUCCESS;
-}
-
-void iterator_increment(Iterator *iterator) {
- assert(iterator != NULL);
- // iterator->pointer += iterator->element_size;
- iterator->pointer =
- (unsigned char *)iterator->pointer + iterator->element_size;
-}
-
-void iterator_decrement(Iterator *iterator) {
- assert(iterator != NULL);
- // iterator->pointer -= iterator->element_size;
- iterator->pointer =
- (unsigned char *)iterator->pointer - iterator->element_size;
-}
-
-void *iterator_next(Iterator *iterator) {
- void *current = iterator->pointer;
- iterator_increment(iterator);
-
- return current;
-}
-
-void *iterator_previous(Iterator *iterator) {
- void *current = iterator->pointer;
- iterator_decrement(iterator);
-
- return current;
-}
-
-bool iterator_equals(Iterator *first, Iterator *second) {
- assert(first->element_size == second->element_size);
- return first->pointer == second->pointer;
-}
-
-bool iterator_is_before(Iterator *first, Iterator *second) {
- assert(first->element_size == second->element_size);
- return first->pointer < second->pointer;
-}
-
-bool iterator_is_after(Iterator *first, Iterator *second) {
- assert(first->element_size == second->element_size);
- return first->pointer > second->pointer;
-}
-
-size_t iterator_index(Vector *vector, Iterator *iterator) {
- assert(vector != NULL);
- assert(iterator != NULL);
- // return (iterator->pointer - vector->data) / vector->element_size;
- return ((unsigned char *)iterator->pointer - (unsigned char *)vector->data) /
- vector->element_size;
-}
-
-/***** PRIVATE *****/
-
-bool _vector_should_grow(Vector *vector) {
- assert(vector->size <= vector->capacity);
- return vector->size == vector->capacity;
-}
-
-bool _vector_should_shrink(Vector *vector) {
- assert(vector->size <= vector->capacity);
- return vector->size == vector->capacity * VECTOR_SHRINK_THRESHOLD;
-}
-
-size_t _vector_free_bytes(const Vector *vector) {
- return aom_vector_free_space(vector) * vector->element_size;
-}
-
-void *_vector_offset(Vector *vector, size_t index) {
- // return vector->data + (index * vector->element_size);
- return (unsigned char *)vector->data + (index * vector->element_size);
-}
-
-const void *_vector_const_offset(const Vector *vector, size_t index) {
- // return vector->data + (index * vector->element_size);
- return (unsigned char *)vector->data + (index * vector->element_size);
-}
-
-void _vector_assign(Vector *vector, size_t index, void *element) {
- /* Insert the element */
- void *offset = _vector_offset(vector, index);
- memcpy(offset, element, vector->element_size);
-}
-
-int _vector_move_right(Vector *vector, size_t index) {
- assert(vector->size < vector->capacity);
-
- /* The location where to start to move from. */
- void *offset = _vector_offset(vector, index);
-
- /* How many to move to the right. */
- size_t elements_in_bytes = (vector->size - index) * vector->element_size;
-
-#ifdef __STDC_LIB_EXT1__
- size_t right_capacity_in_bytes =
- (vector->capacity - (index + 1)) * vector->element_size;
-
- /* clang-format off */
- int return_code = memmove_s(
- offset + vector->element_size,
- right_capacity_in_bytes,
- offset,
- elements_in_bytes);
-
- /* clang-format on */
-
- return return_code == 0 ? VECTOR_SUCCESS : VECTOR_ERROR;
-
-#else
- // memmove(offset + vector->element_size, offset, elements_in_bytes);
- memmove((unsigned char *)offset + vector->element_size, offset,
- elements_in_bytes);
- return VECTOR_SUCCESS;
-#endif
-}
-
-void _vector_move_left(Vector *vector, size_t index) {
- size_t right_elements_in_bytes;
- void *offset;
-
- /* The offset into the memory */
- offset = _vector_offset(vector, index);
-
- /* How many to move to the left */
- right_elements_in_bytes = (vector->size - index - 1) * vector->element_size;
-
- // memmove(offset, offset + vector->element_size, right_elements_in_bytes);
- memmove(offset, (unsigned char *)offset + vector->element_size,
- right_elements_in_bytes);
-}
-
-int _vector_adjust_capacity(Vector *vector) {
- return _vector_reallocate(vector,
- MAX(1, vector->size * VECTOR_GROWTH_FACTOR));
-}
-
-int _vector_reallocate(Vector *vector, size_t new_capacity) {
- size_t new_capacity_in_bytes;
- void *old;
- assert(vector != NULL);
-
- if (new_capacity < VECTOR_MINIMUM_CAPACITY) {
- if (vector->capacity > VECTOR_MINIMUM_CAPACITY) {
- new_capacity = VECTOR_MINIMUM_CAPACITY;
- } else {
- /* NO-OP */
- return VECTOR_SUCCESS;
- }
- }
-
- new_capacity_in_bytes = new_capacity * vector->element_size;
- old = vector->data;
-
- if ((vector->data = malloc(new_capacity_in_bytes)) == NULL) {
- return VECTOR_ERROR;
- }
-
-#ifdef __STDC_LIB_EXT1__
- /* clang-format off */
- if (memcpy_s(vector->data,
- new_capacity_in_bytes,
- old,
- aom_vector_byte_size(vector)) != 0) {
- return VECTOR_ERROR;
- }
-/* clang-format on */
-#else
- memcpy(vector->data, old, aom_vector_byte_size(vector));
-#endif
-
- vector->capacity = new_capacity;
-
- free(old);
-
- return VECTOR_SUCCESS;
-}
-
-void _vector_swap(size_t *first, size_t *second) {
- size_t temp = *first;
- *first = *second;
- *second = temp;
-}
diff --git a/third_party/aom/third_party/vector/vector.h b/third_party/aom/third_party/vector/vector.h
deleted file mode 100644
index 02743f5f1..000000000
--- a/third_party/aom/third_party/vector/vector.h
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
-The MIT License(MIT)
-Copyright(c) 2016 Peter Goldsborough
-
-Permission is hereby granted, free of charge, to any person obtaining a copy of
-this software and associated documentation files(the "Software"), to deal in
-the Software without restriction, including without limitation the rights to
-use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
-the Software, and to permit persons to whom the Software is furnished to do so,
-subject to the following conditions :
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
-FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE AUTHORS OR
-COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
-IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-*/
-
-#ifndef VECTOR_H
-#define VECTOR_H
-
-#include <stdbool.h>
-#include <stddef.h>
-
-/***** DEFINITIONS *****/
-
-#define VECTOR_MINIMUM_CAPACITY 2
-#define VECTOR_GROWTH_FACTOR 2
-#define VECTOR_SHRINK_THRESHOLD (1 / 4)
-
-#define VECTOR_ERROR -1
-#define VECTOR_SUCCESS 0
-
-#define VECTOR_UNINITIALIZED NULL
-#define VECTOR_INITIALIZER \
- { 0, 0, 0, VECTOR_UNINITIALIZED }
-
-/***** STRUCTURES *****/
-
-typedef struct Vector {
- size_t size;
- size_t capacity;
- size_t element_size;
-
- void *data;
-} Vector;
-
-typedef struct Iterator {
- void *pointer;
- size_t element_size;
-} Iterator;
-
-/***** METHODS *****/
-
-/* Constructor */
-int aom_vector_setup(Vector *vector, size_t capacity, size_t element_size);
-
-/* Copy Constructor */
-int aom_vector_copy(Vector *destination, Vector *source);
-
-/* Copy Assignment */
-int aom_vector_copy_assign(Vector *destination, Vector *source);
-
-/* Move Constructor */
-int aom_vector_move(Vector *destination, Vector *source);
-
-/* Move Assignment */
-int aom_vector_move_assign(Vector *destination, Vector *source);
-
-int aom_vector_swap(Vector *destination, Vector *source);
-
-/* Destructor */
-int aom_vector_destroy(Vector *vector);
-
-/* Insertion */
-int aom_vector_push_back(Vector *vector, void *element);
-int aom_vector_push_front(Vector *vector, void *element);
-int aom_vector_insert(Vector *vector, size_t index, void *element);
-int aom_vector_assign(Vector *vector, size_t index, void *element);
-
-/* Deletion */
-int aom_vector_pop_back(Vector *vector);
-int aom_vector_pop_front(Vector *vector);
-int aom_vector_erase(Vector *vector, size_t index);
-int aom_vector_clear(Vector *vector);
-
-/* Lookup */
-void *aom_vector_get(Vector *vector, size_t index);
-const void *aom_vector_const_get(const Vector *vector, size_t index);
-void *aom_vector_front(Vector *vector);
-void *aom_vector_back(Vector *vector);
-#define VECTOR_GET_AS(type, aom_vector_pointer, index) \
- *((type *)aom_vector_get((aom_vector_pointer), (index)))
-
-/* Information */
-bool aom_vector_is_initialized(const Vector *vector);
-size_t aom_vector_byte_size(const Vector *vector);
-size_t aom_vector_free_space(const Vector *vector);
-bool aom_vector_is_empty(const Vector *vector);
-
-/* Memory management */
-int aom_vector_resize(Vector *vector, size_t new_size);
-int aom_vector_reserve(Vector *vector, size_t minimum_capacity);
-int aom_vector_shrink_to_fit(Vector *vector);
-
-/* Iterators */
-Iterator aom_vector_begin(Vector *vector);
-Iterator aom_vector_end(Vector *vector);
-Iterator aom_vector_iterator(Vector *vector, size_t index);
-
-void *iterator_get(Iterator *iterator);
-#define ITERATOR_GET_AS(type, iterator) *((type *)iterator_get((iterator)))
-
-int iterator_erase(Vector *vector, Iterator *iterator);
-
-void iterator_increment(Iterator *iterator);
-void iterator_decrement(Iterator *iterator);
-
-void *iterator_next(Iterator *iterator);
-void *iterator_previous(Iterator *iterator);
-
-bool iterator_equals(Iterator *first, Iterator *second);
-bool iterator_is_before(Iterator *first, Iterator *second);
-bool iterator_is_after(Iterator *first, Iterator *second);
-
-size_t iterator_index(Vector *vector, Iterator *iterator);
-
-#define VECTOR_FOR_EACH(aom_vector_pointer, iterator_name) \
- for (Iterator(iterator_name) = aom_vector_begin((aom_vector_pointer)), \
- end = aom_vector_end((aom_vector_pointer)); \
- !iterator_equals(&(iterator_name), &end); \
- iterator_increment(&(iterator_name)))
-
-/***** PRIVATE *****/
-
-#define MAX(a, b) ((a) > (b) ? (a) : (b))
-
-bool _vector_should_grow(Vector *vector);
-bool _vector_should_shrink(Vector *vector);
-
-size_t _vector_free_bytes(const Vector *vector);
-void *_vector_offset(Vector *vector, size_t index);
-const void *_vector_const_offset(const Vector *vector, size_t index);
-
-void _vector_assign(Vector *vector, size_t index, void *element);
-
-int _vector_move_right(Vector *vector, size_t index);
-void _vector_move_left(Vector *vector, size_t index);
-
-int _vector_adjust_capacity(Vector *vector);
-int _vector_reallocate(Vector *vector, size_t new_capacity);
-
-void _vector_swap(size_t *first, size_t *second);
-
-#endif /* VECTOR_H */
diff --git a/third_party/aom/third_party/x86inc/LICENSE b/third_party/aom/third_party/x86inc/LICENSE
deleted file mode 100644
index 7d07645a1..000000000
--- a/third_party/aom/third_party/x86inc/LICENSE
+++ /dev/null
@@ -1,18 +0,0 @@
-Copyright (C) 2005-2012 x264 project
-
-Authors: Loren Merritt <lorenm@u.washington.edu>
- Anton Mitrofanov <BugMaster@narod.ru>
- Jason Garrett-Glaser <darkshikari@gmail.com>
- Henrik Gramner <hengar-6@student.ltu.se>
-
-Permission to use, copy, modify, and/or distribute this software for any
-purpose with or without fee is hereby granted, provided that the above
-copyright notice and this permission notice appear in all copies.
-
-THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
diff --git a/third_party/aom/third_party/x86inc/README.libaom b/third_party/aom/third_party/x86inc/README.libaom
deleted file mode 100644
index 07c4dad20..000000000
--- a/third_party/aom/third_party/x86inc/README.libaom
+++ /dev/null
@@ -1,20 +0,0 @@
-URL: https://git.videolan.org/git/x264.git
-Version: d23d18655249944c1ca894b451e2c82c7a584c62
-License: ISC
-License File: LICENSE
-
-Description:
-x264/libav's framework for x86 assembly. Contains a variety of macros and
-defines that help automatically allow assembly to work cross-platform.
-
-Local Modifications:
-Get configuration from aom_config.asm.
-Prefix functions with aom by default.
-Manage name mangling (prefixing with '_') manually because 'PREFIX' does not
- exist in libaom.
-Expand PIC default to macho64 and respect CONFIG_PIC from libaom
-Set 'private_extern' visibility for macho targets.
-Copy PIC 'GLOBAL' macros from x86_abi_support.asm
-Use .text instead of .rodata on macho to avoid broken tables in PIC mode.
-Use .text with no alignment for aout
-Only use 'hidden' visibility with Chromium
diff --git a/third_party/aom/third_party/x86inc/x86inc.asm b/third_party/aom/third_party/x86inc/x86inc.asm
deleted file mode 100644
index adaf2d99e..000000000
--- a/third_party/aom/third_party/x86inc/x86inc.asm
+++ /dev/null
@@ -1,1649 +0,0 @@
-;*****************************************************************************
-;* x86inc.asm: x264asm abstraction layer
-;*****************************************************************************
-;* Copyright (C) 2005-2016 x264 project
-;*
-;* Authors: Loren Merritt <lorenm@u.washington.edu>
-;* Anton Mitrofanov <BugMaster@narod.ru>
-;* Fiona Glaser <fiona@x264.com>
-;* Henrik Gramner <henrik@gramner.com>
-;*
-;* Permission to use, copy, modify, and/or distribute this software for any
-;* purpose with or without fee is hereby granted, provided that the above
-;* copyright notice and this permission notice appear in all copies.
-;*
-;* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-;* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-;* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-;* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-;* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-;* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-;* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-;*****************************************************************************
-
-; This is a header file for the x264ASM assembly language, which uses
-; NASM/YASM syntax combined with a large number of macros to provide easy
-; abstraction between different calling conventions (x86_32, win64, linux64).
-; It also has various other useful features to simplify writing the kind of
-; DSP functions that are most often used in x264.
-
-; Unlike the rest of x264, this file is available under an ISC license, as it
-; has significant usefulness outside of x264 and we want it to be available
-; to the largest audience possible. Of course, if you modify it for your own
-; purposes to add a new feature, we strongly encourage contributing a patch
-; as this feature might be useful for others as well. Send patches or ideas
-; to x264-devel@videolan.org .
-
-%include "config/aom_config.asm"
-
-%ifndef private_prefix
- %define private_prefix aom
-%endif
-
-%ifndef public_prefix
- %define public_prefix private_prefix
-%endif
-
-%ifndef STACK_ALIGNMENT
- %if ARCH_X86_64
- %define STACK_ALIGNMENT 16
- %else
- %define STACK_ALIGNMENT 4
- %endif
-%endif
-
-%define WIN64 0
-%define UNIX64 0
-%if ARCH_X86_64
- %ifidn __OUTPUT_FORMAT__,win32
- %define WIN64 1
- %elifidn __OUTPUT_FORMAT__,win64
- %define WIN64 1
- %elifidn __OUTPUT_FORMAT__,x64
- %define WIN64 1
- %else
- %define UNIX64 1
- %endif
-%endif
-
-%define FORMAT_ELF 0
-%ifidn __OUTPUT_FORMAT__,elf
- %define FORMAT_ELF 1
-%elifidn __OUTPUT_FORMAT__,elf32
- %define FORMAT_ELF 1
-%elifidn __OUTPUT_FORMAT__,elf64
- %define FORMAT_ELF 1
-%endif
-
-%define FORMAT_MACHO 0
-%ifidn __OUTPUT_FORMAT__,macho32
- %define FORMAT_MACHO 1
-%elifidn __OUTPUT_FORMAT__,macho64
- %define FORMAT_MACHO 1
-%endif
-
-; Set PREFIX for libaom builds.
-%if FORMAT_ELF
- %undef PREFIX
-%elif WIN64
- %undef PREFIX
-%else
- %define PREFIX
-%endif
-
-%ifdef PREFIX
- %define mangle(x) _ %+ x
-%else
- %define mangle(x) x
-%endif
-
-; In some instances macho32 tables get misaligned when using .rodata.
-; When looking at the disassembly it appears that the offset is either
-; correct or consistently off by 90. Placing them in the .text section
-; works around the issue. It appears to be specific to the way libaom
-; handles the tables.
-%macro SECTION_RODATA 0-1 16
- %ifidn __OUTPUT_FORMAT__,macho32
- SECTION .text align=%1
- fakegot:
- %elifidn __OUTPUT_FORMAT__,aout
- SECTION .text
- %else
- SECTION .rodata align=%1
- %endif
-%endmacro
-
-; PIC macros are copied from aom_ports/x86_abi_support.asm. The "define PIC"
-; from original code is added in for 64bit.
-%ifidn __OUTPUT_FORMAT__,elf32
-%define ABI_IS_32BIT 1
-%elifidn __OUTPUT_FORMAT__,macho32
-%define ABI_IS_32BIT 1
-%elifidn __OUTPUT_FORMAT__,win32
-%define ABI_IS_32BIT 1
-%elifidn __OUTPUT_FORMAT__,aout
-%define ABI_IS_32BIT 1
-%else
-%define ABI_IS_32BIT 0
-%endif
-
-%if ABI_IS_32BIT
- %if CONFIG_PIC=1
- %ifidn __OUTPUT_FORMAT__,elf32
- %define GET_GOT_DEFINED 1
- %define WRT_PLT wrt ..plt
- %macro GET_GOT 1
- extern _GLOBAL_OFFSET_TABLE_
- push %1
- call %%get_got
- %%sub_offset:
- jmp %%exitGG
- %%get_got:
- mov %1, [esp]
- add %1, _GLOBAL_OFFSET_TABLE_ + $$ - %%sub_offset wrt ..gotpc
- ret
- %%exitGG:
- %undef GLOBAL
- %define GLOBAL(x) x + %1 wrt ..gotoff
- %undef RESTORE_GOT
- %define RESTORE_GOT pop %1
- %endmacro
- %elifidn __OUTPUT_FORMAT__,macho32
- %define GET_GOT_DEFINED 1
- %macro GET_GOT 1
- push %1
- call %%get_got
- %%get_got:
- pop %1
- %undef GLOBAL
- %define GLOBAL(x) x + %1 - %%get_got
- %undef RESTORE_GOT
- %define RESTORE_GOT pop %1
- %endmacro
- %else
- %define GET_GOT_DEFINED 0
- %endif
- %endif
-
- %if ARCH_X86_64 == 0
- %undef PIC
- %endif
-
-%else
- %macro GET_GOT 1
- %endmacro
- %define GLOBAL(x) rel x
- %define WRT_PLT wrt ..plt
-
- %if WIN64
- %define PIC
- %elifidn __OUTPUT_FORMAT__,macho64
- %define PIC
- %elif CONFIG_PIC
- %define PIC
- %endif
-%endif
-
-%ifnmacro GET_GOT
- %macro GET_GOT 1
- %endmacro
- %define GLOBAL(x) x
-%endif
-%ifndef RESTORE_GOT
- %define RESTORE_GOT
-%endif
-%ifndef WRT_PLT
- %define WRT_PLT
-%endif
-
-%ifdef PIC
- default rel
-%endif
-
-%ifndef GET_GOT_DEFINED
- %define GET_GOT_DEFINED 0
-%endif
-; Done with PIC macros
-
-%ifdef __NASM_VER__
- %use smartalign
-%endif
-
-; Macros to eliminate most code duplication between x86_32 and x86_64:
-; Currently this works only for leaf functions which load all their arguments
-; into registers at the start, and make no other use of the stack. Luckily that
-; covers most of x264's asm.
-
-; PROLOGUE:
-; %1 = number of arguments. loads them from stack if needed.
-; %2 = number of registers used. pushes callee-saved regs if needed.
-; %3 = number of xmm registers used. pushes callee-saved xmm regs if needed.
-; %4 = (optional) stack size to be allocated. The stack will be aligned before
-; allocating the specified stack size. If the required stack alignment is
-; larger than the known stack alignment the stack will be manually aligned
-; and an extra register will be allocated to hold the original stack
-; pointer (to not invalidate r0m etc.). To prevent the use of an extra
-; register as stack pointer, request a negative stack size.
-; %4+/%5+ = list of names to define to registers
-; PROLOGUE can also be invoked by adding the same options to cglobal
-
-; e.g.
-; cglobal foo, 2,3,7,0x40, dst, src, tmp
-; declares a function (foo) that automatically loads two arguments (dst and
-; src) into registers, uses one additional register (tmp) plus 7 vector
-; registers (m0-m6) and allocates 0x40 bytes of stack space.
-
-; TODO Some functions can use some args directly from the stack. If they're the
-; last args then you can just not declare them, but if they're in the middle
-; we need more flexible macro.
-
-; RET:
-; Pops anything that was pushed by PROLOGUE, and returns.
-
-; REP_RET:
-; Use this instead of RET if it's a branch target.
-
-; registers:
-; rN and rNq are the native-size register holding function argument N
-; rNd, rNw, rNb are dword, word, and byte size
-; rNh is the high 8 bits of the word size
-; rNm is the original location of arg N (a register or on the stack), dword
-; rNmp is native size
-
-%macro DECLARE_REG 2-3
- %define r%1q %2
- %define r%1d %2d
- %define r%1w %2w
- %define r%1b %2b
- %define r%1h %2h
- %define %2q %2
- %if %0 == 2
- %define r%1m %2d
- %define r%1mp %2
- %elif ARCH_X86_64 ; memory
- %define r%1m [rstk + stack_offset + %3]
- %define r%1mp qword r %+ %1 %+ m
- %else
- %define r%1m [rstk + stack_offset + %3]
- %define r%1mp dword r %+ %1 %+ m
- %endif
- %define r%1 %2
-%endmacro
-
-%macro DECLARE_REG_SIZE 3
- %define r%1q r%1
- %define e%1q r%1
- %define r%1d e%1
- %define e%1d e%1
- %define r%1w %1
- %define e%1w %1
- %define r%1h %3
- %define e%1h %3
- %define r%1b %2
- %define e%1b %2
- %if ARCH_X86_64 == 0
- %define r%1 e%1
- %endif
-%endmacro
-
-DECLARE_REG_SIZE ax, al, ah
-DECLARE_REG_SIZE bx, bl, bh
-DECLARE_REG_SIZE cx, cl, ch
-DECLARE_REG_SIZE dx, dl, dh
-DECLARE_REG_SIZE si, sil, null
-DECLARE_REG_SIZE di, dil, null
-DECLARE_REG_SIZE bp, bpl, null
-
-; t# defines for when per-arch register allocation is more complex than just function arguments
-
-%macro DECLARE_REG_TMP 1-*
- %assign %%i 0
- %rep %0
- CAT_XDEFINE t, %%i, r%1
- %assign %%i %%i+1
- %rotate 1
- %endrep
-%endmacro
-
-%macro DECLARE_REG_TMP_SIZE 0-*
- %rep %0
- %define t%1q t%1 %+ q
- %define t%1d t%1 %+ d
- %define t%1w t%1 %+ w
- %define t%1h t%1 %+ h
- %define t%1b t%1 %+ b
- %rotate 1
- %endrep
-%endmacro
-
-DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
-
-%if ARCH_X86_64
- %define gprsize 8
-%else
- %define gprsize 4
-%endif
-
-%macro PUSH 1
- push %1
- %ifidn rstk, rsp
- %assign stack_offset stack_offset+gprsize
- %endif
-%endmacro
-
-%macro POP 1
- pop %1
- %ifidn rstk, rsp
- %assign stack_offset stack_offset-gprsize
- %endif
-%endmacro
-
-%macro PUSH_IF_USED 1-*
- %rep %0
- %if %1 < regs_used
- PUSH r%1
- %endif
- %rotate 1
- %endrep
-%endmacro
-
-%macro POP_IF_USED 1-*
- %rep %0
- %if %1 < regs_used
- pop r%1
- %endif
- %rotate 1
- %endrep
-%endmacro
-
-%macro LOAD_IF_USED 1-*
- %rep %0
- %if %1 < num_args
- mov r%1, r %+ %1 %+ mp
- %endif
- %rotate 1
- %endrep
-%endmacro
-
-%macro SUB 2
- sub %1, %2
- %ifidn %1, rstk
- %assign stack_offset stack_offset+(%2)
- %endif
-%endmacro
-
-%macro ADD 2
- add %1, %2
- %ifidn %1, rstk
- %assign stack_offset stack_offset-(%2)
- %endif
-%endmacro
-
-%macro movifnidn 2
- %ifnidn %1, %2
- mov %1, %2
- %endif
-%endmacro
-
-%macro movsxdifnidn 2
- %ifnidn %1, %2
- movsxd %1, %2
- %endif
-%endmacro
-
-%macro ASSERT 1
- %if (%1) == 0
- %error assertion ``%1'' failed
- %endif
-%endmacro
-
-%macro DEFINE_ARGS 0-*
- %ifdef n_arg_names
- %assign %%i 0
- %rep n_arg_names
- CAT_UNDEF arg_name %+ %%i, q
- CAT_UNDEF arg_name %+ %%i, d
- CAT_UNDEF arg_name %+ %%i, w
- CAT_UNDEF arg_name %+ %%i, h
- CAT_UNDEF arg_name %+ %%i, b
- CAT_UNDEF arg_name %+ %%i, m
- CAT_UNDEF arg_name %+ %%i, mp
- CAT_UNDEF arg_name, %%i
- %assign %%i %%i+1
- %endrep
- %endif
-
- %xdefine %%stack_offset stack_offset
- %undef stack_offset ; so that the current value of stack_offset doesn't get baked in by xdefine
- %assign %%i 0
- %rep %0
- %xdefine %1q r %+ %%i %+ q
- %xdefine %1d r %+ %%i %+ d
- %xdefine %1w r %+ %%i %+ w
- %xdefine %1h r %+ %%i %+ h
- %xdefine %1b r %+ %%i %+ b
- %xdefine %1m r %+ %%i %+ m
- %xdefine %1mp r %+ %%i %+ mp
- CAT_XDEFINE arg_name, %%i, %1
- %assign %%i %%i+1
- %rotate 1
- %endrep
- %xdefine stack_offset %%stack_offset
- %assign n_arg_names %0
-%endmacro
-
-%define required_stack_alignment ((mmsize + 15) & ~15)
-
-%macro ALLOC_STACK 1-2 0 ; stack_size, n_xmm_regs (for win64 only)
- %ifnum %1
- %if %1 != 0
- %assign %%pad 0
- %assign stack_size %1
- %if stack_size < 0
- %assign stack_size -stack_size
- %endif
- %if WIN64
- %assign %%pad %%pad + 32 ; shadow space
- %if mmsize != 8
- %assign xmm_regs_used %2
- %if xmm_regs_used > 8
- %assign %%pad %%pad + (xmm_regs_used-8)*16 ; callee-saved xmm registers
- %endif
- %endif
- %endif
- %if required_stack_alignment <= STACK_ALIGNMENT
- ; maintain the current stack alignment
- %assign stack_size_padded stack_size + %%pad + ((-%%pad-stack_offset-gprsize) & (STACK_ALIGNMENT-1))
- SUB rsp, stack_size_padded
- %else
- %assign %%reg_num (regs_used - 1)
- %xdefine rstk r %+ %%reg_num
- ; align stack, and save original stack location directly above
- ; it, i.e. in [rsp+stack_size_padded], so we can restore the
- ; stack in a single instruction (i.e. mov rsp, rstk or mov
- ; rsp, [rsp+stack_size_padded])
- %if %1 < 0 ; need to store rsp on stack
- %xdefine rstkm [rsp + stack_size + %%pad]
- %assign %%pad %%pad + gprsize
- %else ; can keep rsp in rstk during whole function
- %xdefine rstkm rstk
- %endif
- %assign stack_size_padded stack_size + ((%%pad + required_stack_alignment-1) & ~(required_stack_alignment-1))
- mov rstk, rsp
- and rsp, ~(required_stack_alignment-1)
- sub rsp, stack_size_padded
- movifnidn rstkm, rstk
- %endif
- WIN64_PUSH_XMM
- %endif
- %endif
-%endmacro
-
-%macro SETUP_STACK_POINTER 1
- %ifnum %1
- %if %1 != 0 && required_stack_alignment > STACK_ALIGNMENT
- %if %1 > 0
- %assign regs_used (regs_used + 1)
- %endif
- %if ARCH_X86_64 && regs_used < 5 + UNIX64 * 3
- ; Ensure that we don't clobber any registers containing arguments
- %assign regs_used 5 + UNIX64 * 3
- %endif
- %endif
- %endif
-%endmacro
-
-%macro DEFINE_ARGS_INTERNAL 3+
- %ifnum %2
- DEFINE_ARGS %3
- %elif %1 == 4
- DEFINE_ARGS %2
- %elif %1 > 4
- DEFINE_ARGS %2, %3
- %endif
-%endmacro
-
-%if WIN64 ; Windows x64 ;=================================================
-
-DECLARE_REG 0, rcx
-DECLARE_REG 1, rdx
-DECLARE_REG 2, R8
-DECLARE_REG 3, R9
-DECLARE_REG 4, R10, 40
-DECLARE_REG 5, R11, 48
-DECLARE_REG 6, rax, 56
-DECLARE_REG 7, rdi, 64
-DECLARE_REG 8, rsi, 72
-DECLARE_REG 9, rbx, 80
-DECLARE_REG 10, rbp, 88
-DECLARE_REG 11, R12, 96
-DECLARE_REG 12, R13, 104
-DECLARE_REG 13, R14, 112
-DECLARE_REG 14, R15, 120
-
-%macro PROLOGUE 2-5+ 0 ; #args, #regs, #xmm_regs, [stack_size,] arg_names...
- %assign num_args %1
- %assign regs_used %2
- ASSERT regs_used >= num_args
- SETUP_STACK_POINTER %4
- ASSERT regs_used <= 15
- PUSH_IF_USED 7, 8, 9, 10, 11, 12, 13, 14
- ALLOC_STACK %4, %3
- %if mmsize != 8 && stack_size == 0
- WIN64_SPILL_XMM %3
- %endif
- LOAD_IF_USED 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14
- DEFINE_ARGS_INTERNAL %0, %4, %5
-%endmacro
-
-%macro WIN64_PUSH_XMM 0
- ; Use the shadow space to store XMM6 and XMM7, the rest needs stack space allocated.
- %if xmm_regs_used > 6
- movaps [rstk + stack_offset + 8], xmm6
- %endif
- %if xmm_regs_used > 7
- movaps [rstk + stack_offset + 24], xmm7
- %endif
- %if xmm_regs_used > 8
- %assign %%i 8
- %rep xmm_regs_used-8
- movaps [rsp + (%%i-8)*16 + stack_size + 32], xmm %+ %%i
- %assign %%i %%i+1
- %endrep
- %endif
-%endmacro
-
-%macro WIN64_SPILL_XMM 1
- %assign xmm_regs_used %1
- ASSERT xmm_regs_used <= 16
- %if xmm_regs_used > 8
- ; Allocate stack space for callee-saved xmm registers plus shadow space and align the stack.
- %assign %%pad (xmm_regs_used-8)*16 + 32
- %assign stack_size_padded %%pad + ((-%%pad-stack_offset-gprsize) & (STACK_ALIGNMENT-1))
- SUB rsp, stack_size_padded
- %endif
- WIN64_PUSH_XMM
-%endmacro
-
-%macro WIN64_RESTORE_XMM_INTERNAL 1
- %assign %%pad_size 0
- %if xmm_regs_used > 8
- %assign %%i xmm_regs_used
- %rep xmm_regs_used-8
- %assign %%i %%i-1
- movaps xmm %+ %%i, [%1 + (%%i-8)*16 + stack_size + 32]
- %endrep
- %endif
- %if stack_size_padded > 0
- %if stack_size > 0 && required_stack_alignment > STACK_ALIGNMENT
- mov rsp, rstkm
- %else
- add %1, stack_size_padded
- %assign %%pad_size stack_size_padded
- %endif
- %endif
- %if xmm_regs_used > 7
- movaps xmm7, [%1 + stack_offset - %%pad_size + 24]
- %endif
- %if xmm_regs_used > 6
- movaps xmm6, [%1 + stack_offset - %%pad_size + 8]
- %endif
-%endmacro
-
-%macro WIN64_RESTORE_XMM 1
- WIN64_RESTORE_XMM_INTERNAL %1
- %assign stack_offset (stack_offset-stack_size_padded)
- %assign xmm_regs_used 0
-%endmacro
-
-%define has_epilogue regs_used > 7 || xmm_regs_used > 6 || mmsize == 32 || stack_size > 0
-
-%macro RET 0
- WIN64_RESTORE_XMM_INTERNAL rsp
- POP_IF_USED 14, 13, 12, 11, 10, 9, 8, 7
- %if mmsize == 32
- vzeroupper
- %endif
- AUTO_REP_RET
-%endmacro
-
-%elif ARCH_X86_64 ; *nix x64 ;=============================================
-
-DECLARE_REG 0, rdi
-DECLARE_REG 1, rsi
-DECLARE_REG 2, rdx
-DECLARE_REG 3, rcx
-DECLARE_REG 4, R8
-DECLARE_REG 5, R9
-DECLARE_REG 6, rax, 8
-DECLARE_REG 7, R10, 16
-DECLARE_REG 8, R11, 24
-DECLARE_REG 9, rbx, 32
-DECLARE_REG 10, rbp, 40
-DECLARE_REG 11, R12, 48
-DECLARE_REG 12, R13, 56
-DECLARE_REG 13, R14, 64
-DECLARE_REG 14, R15, 72
-
-%macro PROLOGUE 2-5+ ; #args, #regs, #xmm_regs, [stack_size,] arg_names...
- %assign num_args %1
- %assign regs_used %2
- ASSERT regs_used >= num_args
- SETUP_STACK_POINTER %4
- ASSERT regs_used <= 15
- PUSH_IF_USED 9, 10, 11, 12, 13, 14
- ALLOC_STACK %4
- LOAD_IF_USED 6, 7, 8, 9, 10, 11, 12, 13, 14
- DEFINE_ARGS_INTERNAL %0, %4, %5
-%endmacro
-
-%define has_epilogue regs_used > 9 || mmsize == 32 || stack_size > 0
-
-%macro RET 0
- %if stack_size_padded > 0
- %if required_stack_alignment > STACK_ALIGNMENT
- mov rsp, rstkm
- %else
- add rsp, stack_size_padded
- %endif
- %endif
- POP_IF_USED 14, 13, 12, 11, 10, 9
- %if mmsize == 32
- vzeroupper
- %endif
- AUTO_REP_RET
-%endmacro
-
-%else ; X86_32 ;==============================================================
-
-DECLARE_REG 0, eax, 4
-DECLARE_REG 1, ecx, 8
-DECLARE_REG 2, edx, 12
-DECLARE_REG 3, ebx, 16
-DECLARE_REG 4, esi, 20
-DECLARE_REG 5, edi, 24
-DECLARE_REG 6, ebp, 28
-%define rsp esp
-
-%macro DECLARE_ARG 1-*
- %rep %0
- %define r%1m [rstk + stack_offset + 4*%1 + 4]
- %define r%1mp dword r%1m
- %rotate 1
- %endrep
-%endmacro
-
-DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
-
-%macro PROLOGUE 2-5+ ; #args, #regs, #xmm_regs, [stack_size,] arg_names...
- %assign num_args %1
- %assign regs_used %2
- ASSERT regs_used >= num_args
- %if num_args > 7
- %assign num_args 7
- %endif
- %if regs_used > 7
- %assign regs_used 7
- %endif
- SETUP_STACK_POINTER %4
- ASSERT regs_used <= 7
- PUSH_IF_USED 3, 4, 5, 6
- ALLOC_STACK %4
- LOAD_IF_USED 0, 1, 2, 3, 4, 5, 6
- DEFINE_ARGS_INTERNAL %0, %4, %5
-%endmacro
-
-%define has_epilogue regs_used > 3 || mmsize == 32 || stack_size > 0
-
-%macro RET 0
- %if stack_size_padded > 0
- %if required_stack_alignment > STACK_ALIGNMENT
- mov rsp, rstkm
- %else
- add rsp, stack_size_padded
- %endif
- %endif
- POP_IF_USED 6, 5, 4, 3
- %if mmsize == 32
- vzeroupper
- %endif
- AUTO_REP_RET
-%endmacro
-
-%endif ;======================================================================
-
-%if WIN64 == 0
- %macro WIN64_SPILL_XMM 1
- %endmacro
- %macro WIN64_RESTORE_XMM 1
- %endmacro
- %macro WIN64_PUSH_XMM 0
- %endmacro
-%endif
-
-; On AMD cpus <=K10, an ordinary ret is slow if it immediately follows either
-; a branch or a branch target. So switch to a 2-byte form of ret in that case.
-; We can automatically detect "follows a branch", but not a branch target.
-; (SSSE3 is a sufficient condition to know that your cpu doesn't have this problem.)
-%macro REP_RET 0
- %if has_epilogue
- RET
- %else
- rep ret
- %endif
- annotate_function_size
-%endmacro
-
-%define last_branch_adr $$
-%macro AUTO_REP_RET 0
- %if notcpuflag(ssse3)
- times ((last_branch_adr-$)>>31)+1 rep ; times 1 iff $ == last_branch_adr.
- %endif
- ret
- annotate_function_size
-%endmacro
-
-%macro BRANCH_INSTR 0-*
- %rep %0
- %macro %1 1-2 %1
- %2 %1
- %if notcpuflag(ssse3)
- %%branch_instr equ $
- %xdefine last_branch_adr %%branch_instr
- %endif
- %endmacro
- %rotate 1
- %endrep
-%endmacro
-
-BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae, jna, jnae, jb, jbe, jnb, jnbe, jc, jnc, js, jns, jo, jno, jp, jnp
-
-%macro TAIL_CALL 2 ; callee, is_nonadjacent
- %if has_epilogue
- call %1
- RET
- %elif %2
- jmp %1
- %endif
- annotate_function_size
-%endmacro
-
-;=============================================================================
-; arch-independent part
-;=============================================================================
-
-%assign function_align 16
-
-; Begin a function.
-; Applies any symbol mangling needed for C linkage, and sets up a define such that
-; subsequent uses of the function name automatically refer to the mangled version.
-; Appends cpuflags to the function name if cpuflags has been specified.
-; The "" empty default parameter is a workaround for nasm, which fails if SUFFIX
-; is empty and we call cglobal_internal with just %1 %+ SUFFIX (without %2).
-%macro cglobal 1-2+ "" ; name, [PROLOGUE args]
- cglobal_internal 1, %1 %+ SUFFIX, %2
-%endmacro
-%macro cvisible 1-2+ "" ; name, [PROLOGUE args]
- cglobal_internal 0, %1 %+ SUFFIX, %2
-%endmacro
-%macro cglobal_internal 2-3+
- annotate_function_size
- %if %1
- %xdefine %%FUNCTION_PREFIX private_prefix
- ; libaom explicitly sets visibility in shared object builds. Avoid
- ; setting visibility to hidden as it may break builds that split
- ; sources on e.g., directory boundaries.
- %ifdef CHROMIUM
- %xdefine %%VISIBILITY hidden
- %else
- %xdefine %%VISIBILITY
- %endif
- %else
- %xdefine %%FUNCTION_PREFIX public_prefix
- %xdefine %%VISIBILITY
- %endif
- %ifndef cglobaled_%2
- %xdefine %2 mangle(%%FUNCTION_PREFIX %+ _ %+ %2)
- %xdefine %2.skip_prologue %2 %+ .skip_prologue
- CAT_XDEFINE cglobaled_, %2, 1
- %endif
- %xdefine current_function %2
- %xdefine current_function_section __SECT__
- %if FORMAT_ELF
- global %2:function %%VISIBILITY
- %elif FORMAT_MACHO
- %ifdef __NASM_VER__
- global %2
- %else
- global %2:private_extern
- %endif
- %else
- global %2
- %endif
- align function_align
- %2:
- RESET_MM_PERMUTATION ; needed for x86-64, also makes disassembly somewhat nicer
- %xdefine rstk rsp ; copy of the original stack pointer, used when greater alignment than the known stack alignment is required
- %assign stack_offset 0 ; stack pointer offset relative to the return address
- %assign stack_size 0 ; amount of stack space that can be freely used inside a function
- %assign stack_size_padded 0 ; total amount of allocated stack space, including space for callee-saved xmm registers on WIN64 and alignment padding
- %assign xmm_regs_used 0 ; number of XMM registers requested, used for dealing with callee-saved registers on WIN64
- %ifnidn %3, ""
- PROLOGUE %3
- %endif
-%endmacro
-
-%macro cextern 1
- %xdefine %1 mangle(private_prefix %+ _ %+ %1)
- CAT_XDEFINE cglobaled_, %1, 1
- extern %1
-%endmacro
-
-; like cextern, but without the prefix
-%macro cextern_naked 1
- %ifdef PREFIX
- %xdefine %1 mangle(%1)
- %endif
- CAT_XDEFINE cglobaled_, %1, 1
- extern %1
-%endmacro
-
-%macro const 1-2+
- %xdefine %1 mangle(private_prefix %+ _ %+ %1)
- %if FORMAT_ELF
- global %1:data hidden
- %else
- global %1
- %endif
- %1: %2
-%endmacro
-
-; This is needed for ELF, otherwise the GNU linker assumes the stack is executable by default.
-%if FORMAT_ELF
- [SECTION .note.GNU-stack noalloc noexec nowrite progbits]
-%endif
-
-; Tell debuggers how large the function was.
-; This may be invoked multiple times per function; we rely on later instances overriding earlier ones.
-; This is invoked by RET and similar macros, and also cglobal does it for the previous function,
-; but if the last function in a source file doesn't use any of the standard macros for its epilogue,
-; then its size might be unspecified.
-%macro annotate_function_size 0
- %ifdef __YASM_VER__
- %ifdef current_function
- %if FORMAT_ELF
- current_function_section
- %%ecf equ $
- size current_function %%ecf - current_function
- __SECT__
- %endif
- %endif
- %endif
-%endmacro
-
-; cpuflags
-
-%assign cpuflags_mmx (1<<0)
-%assign cpuflags_mmx2 (1<<1) | cpuflags_mmx
-%assign cpuflags_3dnow (1<<2) | cpuflags_mmx
-%assign cpuflags_3dnowext (1<<3) | cpuflags_3dnow
-%assign cpuflags_sse (1<<4) | cpuflags_mmx2
-%assign cpuflags_sse2 (1<<5) | cpuflags_sse
-%assign cpuflags_sse2slow (1<<6) | cpuflags_sse2
-%assign cpuflags_sse3 (1<<7) | cpuflags_sse2
-%assign cpuflags_ssse3 (1<<8) | cpuflags_sse3
-%assign cpuflags_sse4 (1<<9) | cpuflags_ssse3
-%assign cpuflags_sse42 (1<<10)| cpuflags_sse4
-%assign cpuflags_avx (1<<11)| cpuflags_sse42
-%assign cpuflags_xop (1<<12)| cpuflags_avx
-%assign cpuflags_fma4 (1<<13)| cpuflags_avx
-%assign cpuflags_fma3 (1<<14)| cpuflags_avx
-%assign cpuflags_avx2 (1<<15)| cpuflags_fma3
-
-%assign cpuflags_cache32 (1<<16)
-%assign cpuflags_cache64 (1<<17)
-%assign cpuflags_slowctz (1<<18)
-%assign cpuflags_lzcnt (1<<19)
-%assign cpuflags_aligned (1<<20) ; not a cpu feature, but a function variant
-%assign cpuflags_atom (1<<21)
-%assign cpuflags_bmi1 (1<<22)|cpuflags_lzcnt
-%assign cpuflags_bmi2 (1<<23)|cpuflags_bmi1
-
-; Returns a boolean value expressing whether or not the specified cpuflag is enabled.
-%define cpuflag(x) (((((cpuflags & (cpuflags_ %+ x)) ^ (cpuflags_ %+ x)) - 1) >> 31) & 1)
-%define notcpuflag(x) (cpuflag(x) ^ 1)
-
-; Takes an arbitrary number of cpuflags from the above list.
-; All subsequent functions (up to the next INIT_CPUFLAGS) is built for the specified cpu.
-; You shouldn't need to invoke this macro directly, it's a subroutine for INIT_MMX &co.
-%macro INIT_CPUFLAGS 0-*
- %xdefine SUFFIX
- %undef cpuname
- %assign cpuflags 0
-
- %if %0 >= 1
- %rep %0
- %ifdef cpuname
- %xdefine cpuname cpuname %+ _%1
- %else
- %xdefine cpuname %1
- %endif
- %assign cpuflags cpuflags | cpuflags_%1
- %rotate 1
- %endrep
- %xdefine SUFFIX _ %+ cpuname
-
- %if cpuflag(avx)
- %assign avx_enabled 1
- %endif
- %if (mmsize == 16 && notcpuflag(sse2)) || (mmsize == 32 && notcpuflag(avx2))
- %define mova movaps
- %define movu movups
- %define movnta movntps
- %endif
- %if cpuflag(aligned)
- %define movu mova
- %elif cpuflag(sse3) && notcpuflag(ssse3)
- %define movu lddqu
- %endif
- %endif
-
- %if ARCH_X86_64 || cpuflag(sse2)
- %ifdef __NASM_VER__
- ALIGNMODE k8
- %else
- CPU amdnop
- %endif
- %else
- %ifdef __NASM_VER__
- ALIGNMODE nop
- %else
- CPU basicnop
- %endif
- %endif
-%endmacro
-
-; Merge mmx and sse*
-; m# is a simd register of the currently selected size
-; xm# is the corresponding xmm register if mmsize >= 16, otherwise the same as m#
-; ym# is the corresponding ymm register if mmsize >= 32, otherwise the same as m#
-; (All 3 remain in sync through SWAP.)
-
-%macro CAT_XDEFINE 3
- %xdefine %1%2 %3
-%endmacro
-
-%macro CAT_UNDEF 2
- %undef %1%2
-%endmacro
-
-%macro INIT_MMX 0-1+
- %assign avx_enabled 0
- %define RESET_MM_PERMUTATION INIT_MMX %1
- %define mmsize 8
- %define num_mmregs 8
- %define mova movq
- %define movu movq
- %define movh movd
- %define movnta movntq
- %assign %%i 0
- %rep 8
- CAT_XDEFINE m, %%i, mm %+ %%i
- CAT_XDEFINE nnmm, %%i, %%i
- %assign %%i %%i+1
- %endrep
- %rep 8
- CAT_UNDEF m, %%i
- CAT_UNDEF nnmm, %%i
- %assign %%i %%i+1
- %endrep
- INIT_CPUFLAGS %1
-%endmacro
-
-%macro INIT_XMM 0-1+
- %assign avx_enabled 0
- %define RESET_MM_PERMUTATION INIT_XMM %1
- %define mmsize 16
- %define num_mmregs 8
- %if ARCH_X86_64
- %define num_mmregs 16
- %endif
- %define mova movdqa
- %define movu movdqu
- %define movh movq
- %define movnta movntdq
- %assign %%i 0
- %rep num_mmregs
- CAT_XDEFINE m, %%i, xmm %+ %%i
- CAT_XDEFINE nnxmm, %%i, %%i
- %assign %%i %%i+1
- %endrep
- INIT_CPUFLAGS %1
-%endmacro
-
-%macro INIT_YMM 0-1+
- %assign avx_enabled 1
- %define RESET_MM_PERMUTATION INIT_YMM %1
- %define mmsize 32
- %define num_mmregs 8
- %if ARCH_X86_64
- %define num_mmregs 16
- %endif
- %define mova movdqa
- %define movu movdqu
- %undef movh
- %define movnta movntdq
- %assign %%i 0
- %rep num_mmregs
- CAT_XDEFINE m, %%i, ymm %+ %%i
- CAT_XDEFINE nnymm, %%i, %%i
- %assign %%i %%i+1
- %endrep
- INIT_CPUFLAGS %1
-%endmacro
-
-INIT_XMM
-
-%macro DECLARE_MMCAST 1
- %define mmmm%1 mm%1
- %define mmxmm%1 mm%1
- %define mmymm%1 mm%1
- %define xmmmm%1 mm%1
- %define xmmxmm%1 xmm%1
- %define xmmymm%1 xmm%1
- %define ymmmm%1 mm%1
- %define ymmxmm%1 xmm%1
- %define ymmymm%1 ymm%1
- %define xm%1 xmm %+ m%1
- %define ym%1 ymm %+ m%1
-%endmacro
-
-%assign i 0
-%rep 16
- DECLARE_MMCAST i
- %assign i i+1
-%endrep
-
-; I often want to use macros that permute their arguments. e.g. there's no
-; efficient way to implement butterfly or transpose or dct without swapping some
-; arguments.
-;
-; I would like to not have to manually keep track of the permutations:
-; If I insert a permutation in the middle of a function, it should automatically
-; change everything that follows. For more complex macros I may also have multiple
-; implementations, e.g. the SSE2 and SSSE3 versions may have different permutations.
-;
-; Hence these macros. Insert a PERMUTE or some SWAPs at the end of a macro that
-; permutes its arguments. It's equivalent to exchanging the contents of the
-; registers, except that this way you exchange the register names instead, so it
-; doesn't cost any cycles.
-
-%macro PERMUTE 2-* ; takes a list of pairs to swap
- %rep %0/2
- %xdefine %%tmp%2 m%2
- %rotate 2
- %endrep
- %rep %0/2
- %xdefine m%1 %%tmp%2
- CAT_XDEFINE nn, m%1, %1
- %rotate 2
- %endrep
-%endmacro
-
-%macro SWAP 2+ ; swaps a single chain (sometimes more concise than pairs)
- %ifnum %1 ; SWAP 0, 1, ...
- SWAP_INTERNAL_NUM %1, %2
- %else ; SWAP m0, m1, ...
- SWAP_INTERNAL_NAME %1, %2
- %endif
-%endmacro
-
-%macro SWAP_INTERNAL_NUM 2-*
- %rep %0-1
- %xdefine %%tmp m%1
- %xdefine m%1 m%2
- %xdefine m%2 %%tmp
- CAT_XDEFINE nn, m%1, %1
- CAT_XDEFINE nn, m%2, %2
- %rotate 1
- %endrep
-%endmacro
-
-%macro SWAP_INTERNAL_NAME 2-*
- %xdefine %%args nn %+ %1
- %rep %0-1
- %xdefine %%args %%args, nn %+ %2
- %rotate 1
- %endrep
- SWAP_INTERNAL_NUM %%args
-%endmacro
-
-; If SAVE_MM_PERMUTATION is placed at the end of a function, then any later
-; calls to that function will automatically load the permutation, so values can
-; be returned in mmregs.
-%macro SAVE_MM_PERMUTATION 0-1
- %if %0
- %xdefine %%f %1_m
- %else
- %xdefine %%f current_function %+ _m
- %endif
- %assign %%i 0
- %rep num_mmregs
- CAT_XDEFINE %%f, %%i, m %+ %%i
- %assign %%i %%i+1
- %endrep
-%endmacro
-
-%macro LOAD_MM_PERMUTATION 1 ; name to load from
- %ifdef %1_m0
- %assign %%i 0
- %rep num_mmregs
- CAT_XDEFINE m, %%i, %1_m %+ %%i
- CAT_XDEFINE nn, m %+ %%i, %%i
- %assign %%i %%i+1
- %endrep
- %endif
-%endmacro
-
-; Append cpuflags to the callee's name iff the appended name is known and the plain name isn't
-%macro call 1
- call_internal %1 %+ SUFFIX, %1
-%endmacro
-%macro call_internal 2
- %xdefine %%i %2
- %ifndef cglobaled_%2
- %ifdef cglobaled_%1
- %xdefine %%i %1
- %endif
- %endif
- call %%i
- LOAD_MM_PERMUTATION %%i
-%endmacro
-
-; Substitutions that reduce instruction size but are functionally equivalent
-%macro add 2
- %ifnum %2
- %if %2==128
- sub %1, -128
- %else
- add %1, %2
- %endif
- %else
- add %1, %2
- %endif
-%endmacro
-
-%macro sub 2
- %ifnum %2
- %if %2==128
- add %1, -128
- %else
- sub %1, %2
- %endif
- %else
- sub %1, %2
- %endif
-%endmacro
-
-;=============================================================================
-; AVX abstraction layer
-;=============================================================================
-
-%assign i 0
-%rep 16
- %if i < 8
- CAT_XDEFINE sizeofmm, i, 8
- %endif
- CAT_XDEFINE sizeofxmm, i, 16
- CAT_XDEFINE sizeofymm, i, 32
- %assign i i+1
-%endrep
-%undef i
-
-%macro CHECK_AVX_INSTR_EMU 3-*
- %xdefine %%opcode %1
- %xdefine %%dst %2
- %rep %0-2
- %ifidn %%dst, %3
- %error non-avx emulation of ``%%opcode'' is not supported
- %endif
- %rotate 1
- %endrep
-%endmacro
-
-;%1 == instruction
-;%2 == minimal instruction set
-;%3 == 1 if float, 0 if int
-;%4 == 1 if non-destructive or 4-operand (xmm, xmm, xmm, imm), 0 otherwise
-;%5 == 1 if commutative (i.e. doesn't matter which src arg is which), 0 if not
-;%6+: operands
-%macro RUN_AVX_INSTR 6-9+
- %ifnum sizeof%7
- %assign __sizeofreg sizeof%7
- %elifnum sizeof%6
- %assign __sizeofreg sizeof%6
- %else
- %assign __sizeofreg mmsize
- %endif
- %assign __emulate_avx 0
- %if avx_enabled && __sizeofreg >= 16
- %xdefine __instr v%1
- %else
- %xdefine __instr %1
- %if %0 >= 8+%4
- %assign __emulate_avx 1
- %endif
- %endif
- %ifnidn %2, fnord
- %ifdef cpuname
- %if notcpuflag(%2)
- %error use of ``%1'' %2 instruction in cpuname function: current_function
- %elif cpuflags_%2 < cpuflags_sse && notcpuflag(sse2) && __sizeofreg > 8
- %error use of ``%1'' sse2 instruction in cpuname function: current_function
- %endif
- %endif
- %endif
-
- %if __emulate_avx
- %xdefine __src1 %7
- %xdefine __src2 %8
- %ifnidn %6, %7
- %if %0 >= 9
- CHECK_AVX_INSTR_EMU {%1 %6, %7, %8, %9}, %6, %8, %9
- %else
- CHECK_AVX_INSTR_EMU {%1 %6, %7, %8}, %6, %8
- %endif
- %if %5 && %4 == 0
- %ifnid %8
- ; 3-operand AVX instructions with a memory arg can only have it in src2,
- ; whereas SSE emulation prefers to have it in src1 (i.e. the mov).
- ; So, if the instruction is commutative with a memory arg, swap them.
- %xdefine __src1 %8
- %xdefine __src2 %7
- %endif
- %endif
- %if __sizeofreg == 8
- MOVQ %6, __src1
- %elif %3
- MOVAPS %6, __src1
- %else
- MOVDQA %6, __src1
- %endif
- %endif
- %if %0 >= 9
- %1 %6, __src2, %9
- %else
- %1 %6, __src2
- %endif
- %elif %0 >= 9
- __instr %6, %7, %8, %9
- %elif %0 == 8
- __instr %6, %7, %8
- %elif %0 == 7
- __instr %6, %7
- %else
- __instr %6
- %endif
-%endmacro
-
-;%1 == instruction
-;%2 == minimal instruction set
-;%3 == 1 if float, 0 if int
-;%4 == 1 if non-destructive or 4-operand (xmm, xmm, xmm, imm), 0 otherwise
-;%5 == 1 if commutative (i.e. doesn't matter which src arg is which), 0 if not
-%macro AVX_INSTR 1-5 fnord, 0, 1, 0
- %macro %1 1-10 fnord, fnord, fnord, fnord, %1, %2, %3, %4, %5
- %ifidn %2, fnord
- RUN_AVX_INSTR %6, %7, %8, %9, %10, %1
- %elifidn %3, fnord
- RUN_AVX_INSTR %6, %7, %8, %9, %10, %1, %2
- %elifidn %4, fnord
- RUN_AVX_INSTR %6, %7, %8, %9, %10, %1, %2, %3
- %elifidn %5, fnord
- RUN_AVX_INSTR %6, %7, %8, %9, %10, %1, %2, %3, %4
- %else
- RUN_AVX_INSTR %6, %7, %8, %9, %10, %1, %2, %3, %4, %5
- %endif
- %endmacro
-%endmacro
-
-; Instructions with both VEX and non-VEX encodings
-; Non-destructive instructions are written without parameters
-AVX_INSTR addpd, sse2, 1, 0, 1
-AVX_INSTR addps, sse, 1, 0, 1
-AVX_INSTR addsd, sse2, 1, 0, 1
-AVX_INSTR addss, sse, 1, 0, 1
-AVX_INSTR addsubpd, sse3, 1, 0, 0
-AVX_INSTR addsubps, sse3, 1, 0, 0
-AVX_INSTR aesdec, fnord, 0, 0, 0
-AVX_INSTR aesdeclast, fnord, 0, 0, 0
-AVX_INSTR aesenc, fnord, 0, 0, 0
-AVX_INSTR aesenclast, fnord, 0, 0, 0
-AVX_INSTR aesimc
-AVX_INSTR aeskeygenassist
-AVX_INSTR andnpd, sse2, 1, 0, 0
-AVX_INSTR andnps, sse, 1, 0, 0
-AVX_INSTR andpd, sse2, 1, 0, 1
-AVX_INSTR andps, sse, 1, 0, 1
-AVX_INSTR blendpd, sse4, 1, 0, 0
-AVX_INSTR blendps, sse4, 1, 0, 0
-AVX_INSTR blendvpd, sse4, 1, 0, 0
-AVX_INSTR blendvps, sse4, 1, 0, 0
-AVX_INSTR cmppd, sse2, 1, 1, 0
-AVX_INSTR cmpps, sse, 1, 1, 0
-AVX_INSTR cmpsd, sse2, 1, 1, 0
-AVX_INSTR cmpss, sse, 1, 1, 0
-AVX_INSTR comisd, sse2
-AVX_INSTR comiss, sse
-AVX_INSTR cvtdq2pd, sse2
-AVX_INSTR cvtdq2ps, sse2
-AVX_INSTR cvtpd2dq, sse2
-AVX_INSTR cvtpd2ps, sse2
-AVX_INSTR cvtps2dq, sse2
-AVX_INSTR cvtps2pd, sse2
-AVX_INSTR cvtsd2si, sse2
-AVX_INSTR cvtsd2ss, sse2
-AVX_INSTR cvtsi2sd, sse2
-AVX_INSTR cvtsi2ss, sse
-AVX_INSTR cvtss2sd, sse2
-AVX_INSTR cvtss2si, sse
-AVX_INSTR cvttpd2dq, sse2
-AVX_INSTR cvttps2dq, sse2
-AVX_INSTR cvttsd2si, sse2
-AVX_INSTR cvttss2si, sse
-AVX_INSTR divpd, sse2, 1, 0, 0
-AVX_INSTR divps, sse, 1, 0, 0
-AVX_INSTR divsd, sse2, 1, 0, 0
-AVX_INSTR divss, sse, 1, 0, 0
-AVX_INSTR dppd, sse4, 1, 1, 0
-AVX_INSTR dpps, sse4, 1, 1, 0
-AVX_INSTR extractps, sse4
-AVX_INSTR haddpd, sse3, 1, 0, 0
-AVX_INSTR haddps, sse3, 1, 0, 0
-AVX_INSTR hsubpd, sse3, 1, 0, 0
-AVX_INSTR hsubps, sse3, 1, 0, 0
-AVX_INSTR insertps, sse4, 1, 1, 0
-AVX_INSTR lddqu, sse3
-AVX_INSTR ldmxcsr, sse
-AVX_INSTR maskmovdqu, sse2
-AVX_INSTR maxpd, sse2, 1, 0, 1
-AVX_INSTR maxps, sse, 1, 0, 1
-AVX_INSTR maxsd, sse2, 1, 0, 1
-AVX_INSTR maxss, sse, 1, 0, 1
-AVX_INSTR minpd, sse2, 1, 0, 1
-AVX_INSTR minps, sse, 1, 0, 1
-AVX_INSTR minsd, sse2, 1, 0, 1
-AVX_INSTR minss, sse, 1, 0, 1
-AVX_INSTR movapd, sse2
-AVX_INSTR movaps, sse
-AVX_INSTR movd, mmx
-AVX_INSTR movddup, sse3
-AVX_INSTR movdqa, sse2
-AVX_INSTR movdqu, sse2
-AVX_INSTR movhlps, sse, 1, 0, 0
-AVX_INSTR movhpd, sse2, 1, 0, 0
-AVX_INSTR movhps, sse, 1, 0, 0
-AVX_INSTR movlhps, sse, 1, 0, 0
-AVX_INSTR movlpd, sse2, 1, 0, 0
-AVX_INSTR movlps, sse, 1, 0, 0
-AVX_INSTR movmskpd, sse2
-AVX_INSTR movmskps, sse
-AVX_INSTR movntdq, sse2
-AVX_INSTR movntdqa, sse4
-AVX_INSTR movntpd, sse2
-AVX_INSTR movntps, sse
-AVX_INSTR movq, mmx
-AVX_INSTR movsd, sse2, 1, 0, 0
-AVX_INSTR movshdup, sse3
-AVX_INSTR movsldup, sse3
-AVX_INSTR movss, sse, 1, 0, 0
-AVX_INSTR movupd, sse2
-AVX_INSTR movups, sse
-AVX_INSTR mpsadbw, sse4
-AVX_INSTR mulpd, sse2, 1, 0, 1
-AVX_INSTR mulps, sse, 1, 0, 1
-AVX_INSTR mulsd, sse2, 1, 0, 1
-AVX_INSTR mulss, sse, 1, 0, 1
-AVX_INSTR orpd, sse2, 1, 0, 1
-AVX_INSTR orps, sse, 1, 0, 1
-AVX_INSTR pabsb, ssse3
-AVX_INSTR pabsd, ssse3
-AVX_INSTR pabsw, ssse3
-AVX_INSTR packsswb, mmx, 0, 0, 0
-AVX_INSTR packssdw, mmx, 0, 0, 0
-AVX_INSTR packuswb, mmx, 0, 0, 0
-AVX_INSTR packusdw, sse4, 0, 0, 0
-AVX_INSTR paddb, mmx, 0, 0, 1
-AVX_INSTR paddw, mmx, 0, 0, 1
-AVX_INSTR paddd, mmx, 0, 0, 1
-AVX_INSTR paddq, sse2, 0, 0, 1
-AVX_INSTR paddsb, mmx, 0, 0, 1
-AVX_INSTR paddsw, mmx, 0, 0, 1
-AVX_INSTR paddusb, mmx, 0, 0, 1
-AVX_INSTR paddusw, mmx, 0, 0, 1
-AVX_INSTR palignr, ssse3
-AVX_INSTR pand, mmx, 0, 0, 1
-AVX_INSTR pandn, mmx, 0, 0, 0
-AVX_INSTR pavgb, mmx2, 0, 0, 1
-AVX_INSTR pavgw, mmx2, 0, 0, 1
-AVX_INSTR pblendvb, sse4, 0, 0, 0
-AVX_INSTR pblendw, sse4
-AVX_INSTR pclmulqdq
-AVX_INSTR pcmpestri, sse42
-AVX_INSTR pcmpestrm, sse42
-AVX_INSTR pcmpistri, sse42
-AVX_INSTR pcmpistrm, sse42
-AVX_INSTR pcmpeqb, mmx, 0, 0, 1
-AVX_INSTR pcmpeqw, mmx, 0, 0, 1
-AVX_INSTR pcmpeqd, mmx, 0, 0, 1
-AVX_INSTR pcmpeqq, sse4, 0, 0, 1
-AVX_INSTR pcmpgtb, mmx, 0, 0, 0
-AVX_INSTR pcmpgtw, mmx, 0, 0, 0
-AVX_INSTR pcmpgtd, mmx, 0, 0, 0
-AVX_INSTR pcmpgtq, sse42, 0, 0, 0
-AVX_INSTR pextrb, sse4
-AVX_INSTR pextrd, sse4
-AVX_INSTR pextrq, sse4
-AVX_INSTR pextrw, mmx2
-AVX_INSTR phaddw, ssse3, 0, 0, 0
-AVX_INSTR phaddd, ssse3, 0, 0, 0
-AVX_INSTR phaddsw, ssse3, 0, 0, 0
-AVX_INSTR phminposuw, sse4
-AVX_INSTR phsubw, ssse3, 0, 0, 0
-AVX_INSTR phsubd, ssse3, 0, 0, 0
-AVX_INSTR phsubsw, ssse3, 0, 0, 0
-AVX_INSTR pinsrb, sse4
-AVX_INSTR pinsrd, sse4
-AVX_INSTR pinsrq, sse4
-AVX_INSTR pinsrw, mmx2
-AVX_INSTR pmaddwd, mmx, 0, 0, 1
-AVX_INSTR pmaddubsw, ssse3, 0, 0, 0
-AVX_INSTR pmaxsb, sse4, 0, 0, 1
-AVX_INSTR pmaxsw, mmx2, 0, 0, 1
-AVX_INSTR pmaxsd, sse4, 0, 0, 1
-AVX_INSTR pmaxub, mmx2, 0, 0, 1
-AVX_INSTR pmaxuw, sse4, 0, 0, 1
-AVX_INSTR pmaxud, sse4, 0, 0, 1
-AVX_INSTR pminsb, sse4, 0, 0, 1
-AVX_INSTR pminsw, mmx2, 0, 0, 1
-AVX_INSTR pminsd, sse4, 0, 0, 1
-AVX_INSTR pminub, mmx2, 0, 0, 1
-AVX_INSTR pminuw, sse4, 0, 0, 1
-AVX_INSTR pminud, sse4, 0, 0, 1
-AVX_INSTR pmovmskb, mmx2
-AVX_INSTR pmovsxbw, sse4
-AVX_INSTR pmovsxbd, sse4
-AVX_INSTR pmovsxbq, sse4
-AVX_INSTR pmovsxwd, sse4
-AVX_INSTR pmovsxwq, sse4
-AVX_INSTR pmovsxdq, sse4
-AVX_INSTR pmovzxbw, sse4
-AVX_INSTR pmovzxbd, sse4
-AVX_INSTR pmovzxbq, sse4
-AVX_INSTR pmovzxwd, sse4
-AVX_INSTR pmovzxwq, sse4
-AVX_INSTR pmovzxdq, sse4
-AVX_INSTR pmuldq, sse4, 0, 0, 1
-AVX_INSTR pmulhrsw, ssse3, 0, 0, 1
-AVX_INSTR pmulhuw, mmx2, 0, 0, 1
-AVX_INSTR pmulhw, mmx, 0, 0, 1
-AVX_INSTR pmullw, mmx, 0, 0, 1
-AVX_INSTR pmulld, sse4, 0, 0, 1
-AVX_INSTR pmuludq, sse2, 0, 0, 1
-AVX_INSTR por, mmx, 0, 0, 1
-AVX_INSTR psadbw, mmx2, 0, 0, 1
-AVX_INSTR pshufb, ssse3, 0, 0, 0
-AVX_INSTR pshufd, sse2
-AVX_INSTR pshufhw, sse2
-AVX_INSTR pshuflw, sse2
-AVX_INSTR psignb, ssse3, 0, 0, 0
-AVX_INSTR psignw, ssse3, 0, 0, 0
-AVX_INSTR psignd, ssse3, 0, 0, 0
-AVX_INSTR psllw, mmx, 0, 0, 0
-AVX_INSTR pslld, mmx, 0, 0, 0
-AVX_INSTR psllq, mmx, 0, 0, 0
-AVX_INSTR pslldq, sse2, 0, 0, 0
-AVX_INSTR psraw, mmx, 0, 0, 0
-AVX_INSTR psrad, mmx, 0, 0, 0
-AVX_INSTR psrlw, mmx, 0, 0, 0
-AVX_INSTR psrld, mmx, 0, 0, 0
-AVX_INSTR psrlq, mmx, 0, 0, 0
-AVX_INSTR psrldq, sse2, 0, 0, 0
-AVX_INSTR psubb, mmx, 0, 0, 0
-AVX_INSTR psubw, mmx, 0, 0, 0
-AVX_INSTR psubd, mmx, 0, 0, 0
-AVX_INSTR psubq, sse2, 0, 0, 0
-AVX_INSTR psubsb, mmx, 0, 0, 0
-AVX_INSTR psubsw, mmx, 0, 0, 0
-AVX_INSTR psubusb, mmx, 0, 0, 0
-AVX_INSTR psubusw, mmx, 0, 0, 0
-AVX_INSTR ptest, sse4
-AVX_INSTR punpckhbw, mmx, 0, 0, 0
-AVX_INSTR punpckhwd, mmx, 0, 0, 0
-AVX_INSTR punpckhdq, mmx, 0, 0, 0
-AVX_INSTR punpckhqdq, sse2, 0, 0, 0
-AVX_INSTR punpcklbw, mmx, 0, 0, 0
-AVX_INSTR punpcklwd, mmx, 0, 0, 0
-AVX_INSTR punpckldq, mmx, 0, 0, 0
-AVX_INSTR punpcklqdq, sse2, 0, 0, 0
-AVX_INSTR pxor, mmx, 0, 0, 1
-AVX_INSTR rcpps, sse, 1, 0, 0
-AVX_INSTR rcpss, sse, 1, 0, 0
-AVX_INSTR roundpd, sse4
-AVX_INSTR roundps, sse4
-AVX_INSTR roundsd, sse4
-AVX_INSTR roundss, sse4
-AVX_INSTR rsqrtps, sse, 1, 0, 0
-AVX_INSTR rsqrtss, sse, 1, 0, 0
-AVX_INSTR shufpd, sse2, 1, 1, 0
-AVX_INSTR shufps, sse, 1, 1, 0
-AVX_INSTR sqrtpd, sse2, 1, 0, 0
-AVX_INSTR sqrtps, sse, 1, 0, 0
-AVX_INSTR sqrtsd, sse2, 1, 0, 0
-AVX_INSTR sqrtss, sse, 1, 0, 0
-AVX_INSTR stmxcsr, sse
-AVX_INSTR subpd, sse2, 1, 0, 0
-AVX_INSTR subps, sse, 1, 0, 0
-AVX_INSTR subsd, sse2, 1, 0, 0
-AVX_INSTR subss, sse, 1, 0, 0
-AVX_INSTR ucomisd, sse2
-AVX_INSTR ucomiss, sse
-AVX_INSTR unpckhpd, sse2, 1, 0, 0
-AVX_INSTR unpckhps, sse, 1, 0, 0
-AVX_INSTR unpcklpd, sse2, 1, 0, 0
-AVX_INSTR unpcklps, sse, 1, 0, 0
-AVX_INSTR xorpd, sse2, 1, 0, 1
-AVX_INSTR xorps, sse, 1, 0, 1
-
-; 3DNow instructions, for sharing code between AVX, SSE and 3DN
-AVX_INSTR pfadd, 3dnow, 1, 0, 1
-AVX_INSTR pfsub, 3dnow, 1, 0, 0
-AVX_INSTR pfmul, 3dnow, 1, 0, 1
-
-; base-4 constants for shuffles
-%assign i 0
-%rep 256
- %assign j ((i>>6)&3)*1000 + ((i>>4)&3)*100 + ((i>>2)&3)*10 + (i&3)
- %if j < 10
- CAT_XDEFINE q000, j, i
- %elif j < 100
- CAT_XDEFINE q00, j, i
- %elif j < 1000
- CAT_XDEFINE q0, j, i
- %else
- CAT_XDEFINE q, j, i
- %endif
- %assign i i+1
-%endrep
-%undef i
-%undef j
-
-%macro FMA_INSTR 3
- %macro %1 4-7 %1, %2, %3
- %if cpuflag(xop)
- v%5 %1, %2, %3, %4
- %elifnidn %1, %4
- %6 %1, %2, %3
- %7 %1, %4
- %else
- %error non-xop emulation of ``%5 %1, %2, %3, %4'' is not supported
- %endif
- %endmacro
-%endmacro
-
-FMA_INSTR pmacsww, pmullw, paddw
-FMA_INSTR pmacsdd, pmulld, paddd ; sse4 emulation
-FMA_INSTR pmacsdql, pmuldq, paddq ; sse4 emulation
-FMA_INSTR pmadcswd, pmaddwd, paddd
-
-; Macros for consolidating FMA3 and FMA4 using 4-operand (dst, src1, src2, src3) syntax.
-; FMA3 is only possible if dst is the same as one of the src registers.
-; Either src2 or src3 can be a memory operand.
-%macro FMA4_INSTR 2-*
- %push fma4_instr
- %xdefine %$prefix %1
- %rep %0 - 1
- %macro %$prefix%2 4-6 %$prefix, %2
- %if notcpuflag(fma3) && notcpuflag(fma4)
- %error use of ``%5%6'' fma instruction in cpuname function: current_function
- %elif cpuflag(fma4)
- v%5%6 %1, %2, %3, %4
- %elifidn %1, %2
- ; If %3 or %4 is a memory operand it needs to be encoded as the last operand.
- %ifid %3
- v%{5}213%6 %2, %3, %4
- %else
- v%{5}132%6 %2, %4, %3
- %endif
- %elifidn %1, %3
- v%{5}213%6 %3, %2, %4
- %elifidn %1, %4
- v%{5}231%6 %4, %2, %3
- %else
- %error fma3 emulation of ``%5%6 %1, %2, %3, %4'' is not supported
- %endif
- %endmacro
- %rotate 1
- %endrep
- %pop
-%endmacro
-
-FMA4_INSTR fmadd, pd, ps, sd, ss
-FMA4_INSTR fmaddsub, pd, ps
-FMA4_INSTR fmsub, pd, ps, sd, ss
-FMA4_INSTR fmsubadd, pd, ps
-FMA4_INSTR fnmadd, pd, ps, sd, ss
-FMA4_INSTR fnmsub, pd, ps, sd, ss
-
-; workaround: vpbroadcastq is broken in x86_32 due to a yasm bug (fixed in 1.3.0)
-%ifdef __YASM_VER__
- %if __YASM_VERSION_ID__ < 0x01030000 && ARCH_X86_64 == 0
- %macro vpbroadcastq 2
- %if sizeof%1 == 16
- movddup %1, %2
- %else
- vbroadcastsd %1, %2
- %endif
- %endmacro
- %endif
-%endif
diff --git a/third_party/aom/tools/aggregate_entropy_stats.py b/third_party/aom/tools/aggregate_entropy_stats.py
deleted file mode 100644
index 7cb4d18e1..000000000
--- a/third_party/aom/tools/aggregate_entropy_stats.py
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/usr/bin/env python
-## Copyright (c) 2017, Alliance for Open Media. All rights reserved
-##
-## This source code is subject to the terms of the BSD 2 Clause License and
-## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-## was not distributed with this source code in the LICENSE file, you can
-## obtain it at www.aomedia.org/license/software. If the Alliance for Open
-## Media Patent License 1.0 was not distributed with this source code in the
-## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-##
-"""Aggregate multiple entropy stats output which is written in 32-bit int.
-
-python ./aggregate_entropy_stats.py [dir of stats files] [keyword of filenames]
- [filename of final stats]
-"""
-
-__author__ = "yuec@google.com"
-
-import os
-import sys
-import numpy as np
-
-def main():
- dir = sys.argv[1]
- sum = []
- for fn in os.listdir(dir):
- if sys.argv[2] in fn:
- stats = np.fromfile(dir + fn, dtype=np.int32)
- if len(sum) == 0:
- sum = stats
- else:
- sum = np.add(sum, stats)
- if len(sum) == 0:
- print("No stats file is found. Double-check directory and keyword?")
- else:
- sum.tofile(dir+sys.argv[3])
-
-if __name__ == '__main__':
- main()
diff --git a/third_party/aom/tools/aom_entropy_optimizer.c b/third_party/aom/tools/aom_entropy_optimizer.c
deleted file mode 100644
index 551adf4f2..000000000
--- a/third_party/aom/tools/aom_entropy_optimizer.c
+++ /dev/null
@@ -1,758 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-// This tool is a gadget for offline probability training.
-// A binary executable aom_entropy_optimizer will be generated in tools/. It
-// parses a binary file consisting of counts written in the format of
-// FRAME_COUNTS in entropymode.h, and computes optimized probability tables
-// and CDF tables, which will be written to a new c file optimized_probs.c
-// according to format in the codebase.
-//
-// Command line: ./aom_entropy_optimizer [directory of the count file]
-//
-// The input file can either be generated by encoding a single clip by
-// turning on entropy_stats experiment, or be collected at a larger scale at
-// which a python script which will be provided soon can be used to aggregate
-// multiple stats output.
-
-#include <assert.h>
-#include <stdio.h>
-
-#include "config/aom_config.h"
-
-#include "av1/encoder/encoder.h"
-
-#define SPACES_PER_TAB 2
-#define CDF_MAX_SIZE 16
-
-typedef unsigned int aom_count_type;
-// A log file recording parsed counts
-static FILE *logfile; // TODO(yuec): make it a command line option
-
-static void counts_to_cdf(const aom_count_type *counts, aom_cdf_prob *cdf,
- int modes) {
- int64_t csum[CDF_MAX_SIZE];
- assert(modes <= CDF_MAX_SIZE);
-
- csum[0] = counts[0] + 1;
- for (int i = 1; i < modes; ++i) csum[i] = counts[i] + 1 + csum[i - 1];
-
- for (int i = 0; i < modes; ++i) fprintf(logfile, "%d ", counts[i]);
- fprintf(logfile, "\n");
-
- int64_t sum = csum[modes - 1];
- const int64_t round_shift = sum >> 1;
- for (int i = 0; i < modes; ++i) {
- cdf[i] = (csum[i] * CDF_PROB_TOP + round_shift) / sum;
- cdf[i] = AOMMIN(cdf[i], CDF_PROB_TOP - (modes - 1 + i) * 4);
- cdf[i] = (i == 0) ? AOMMAX(cdf[i], 4) : AOMMAX(cdf[i], cdf[i - 1] + 4);
- }
-}
-
-static int parse_counts_for_cdf_opt(aom_count_type **ct_ptr,
- FILE *const probsfile, int tabs,
- int dim_of_cts, int *cts_each_dim) {
- if (dim_of_cts < 1) {
- fprintf(stderr, "The dimension of a counts vector should be at least 1!\n");
- return 1;
- }
- const int total_modes = cts_each_dim[0];
- if (dim_of_cts == 1) {
- assert(total_modes <= CDF_MAX_SIZE);
- aom_cdf_prob cdfs[CDF_MAX_SIZE];
- aom_count_type *counts1d = *ct_ptr;
-
- counts_to_cdf(counts1d, cdfs, total_modes);
- (*ct_ptr) += total_modes;
-
- if (tabs > 0) fprintf(probsfile, "%*c", tabs * SPACES_PER_TAB, ' ');
- fprintf(probsfile, "AOM_CDF%d(", total_modes);
- for (int k = 0; k < total_modes - 1; ++k) {
- fprintf(probsfile, "%d", cdfs[k]);
- if (k < total_modes - 2) fprintf(probsfile, ", ");
- }
- fprintf(probsfile, ")");
- } else {
- for (int k = 0; k < total_modes; ++k) {
- int tabs_next_level;
-
- if (dim_of_cts == 2)
- fprintf(probsfile, "%*c{ ", tabs * SPACES_PER_TAB, ' ');
- else
- fprintf(probsfile, "%*c{\n", tabs * SPACES_PER_TAB, ' ');
- tabs_next_level = dim_of_cts == 2 ? 0 : tabs + 1;
-
- if (parse_counts_for_cdf_opt(ct_ptr, probsfile, tabs_next_level,
- dim_of_cts - 1, cts_each_dim + 1)) {
- return 1;
- }
-
- if (dim_of_cts == 2) {
- if (k == total_modes - 1)
- fprintf(probsfile, " }\n");
- else
- fprintf(probsfile, " },\n");
- } else {
- if (k == total_modes - 1)
- fprintf(probsfile, "%*c}\n", tabs * SPACES_PER_TAB, ' ');
- else
- fprintf(probsfile, "%*c},\n", tabs * SPACES_PER_TAB, ' ');
- }
- }
- }
- return 0;
-}
-
-static void optimize_cdf_table(aom_count_type *counts, FILE *const probsfile,
- int dim_of_cts, int *cts_each_dim,
- char *prefix) {
- aom_count_type *ct_ptr = counts;
-
- fprintf(probsfile, "%s = {\n", prefix);
- fprintf(logfile, "%s\n", prefix);
- if (parse_counts_for_cdf_opt(&ct_ptr, probsfile, 1, dim_of_cts,
- cts_each_dim)) {
- fprintf(probsfile, "Optimizer failed!\n");
- }
- fprintf(probsfile, "};\n\n");
- fprintf(logfile, "============================\n");
-}
-
-static void optimize_uv_mode(aom_count_type *counts, FILE *const probsfile,
- int dim_of_cts, int *cts_each_dim, char *prefix) {
- aom_count_type *ct_ptr = counts;
-
- fprintf(probsfile, "%s = {\n", prefix);
- fprintf(probsfile, "%*c{\n", SPACES_PER_TAB, ' ');
- fprintf(logfile, "%s\n", prefix);
- cts_each_dim[2] = UV_INTRA_MODES - 1;
- for (int k = 0; k < cts_each_dim[1]; ++k) {
- fprintf(probsfile, "%*c{ ", 2 * SPACES_PER_TAB, ' ');
- parse_counts_for_cdf_opt(&ct_ptr, probsfile, 0, dim_of_cts - 2,
- cts_each_dim + 2);
- if (k + 1 == cts_each_dim[1]) {
- fprintf(probsfile, " }\n");
- } else {
- fprintf(probsfile, " },\n");
- }
- ++ct_ptr;
- }
- fprintf(probsfile, "%*c},\n", SPACES_PER_TAB, ' ');
- fprintf(probsfile, "%*c{\n", SPACES_PER_TAB, ' ');
- cts_each_dim[2] = UV_INTRA_MODES;
- parse_counts_for_cdf_opt(&ct_ptr, probsfile, 2, dim_of_cts - 1,
- cts_each_dim + 1);
- fprintf(probsfile, "%*c}\n", SPACES_PER_TAB, ' ');
- fprintf(probsfile, "};\n\n");
- fprintf(logfile, "============================\n");
-}
-
-static void optimize_cdf_table_var_modes_2d(aom_count_type *counts,
- FILE *const probsfile,
- int dim_of_cts, int *cts_each_dim,
- int *modes_each_ctx, char *prefix) {
- aom_count_type *ct_ptr = counts;
-
- assert(dim_of_cts == 2);
- (void)dim_of_cts;
-
- fprintf(probsfile, "%s = {\n", prefix);
- fprintf(logfile, "%s\n", prefix);
-
- for (int d0_idx = 0; d0_idx < cts_each_dim[0]; ++d0_idx) {
- int num_of_modes = modes_each_ctx[d0_idx];
-
- if (num_of_modes > 0) {
- fprintf(probsfile, "%*c{ ", SPACES_PER_TAB, ' ');
- parse_counts_for_cdf_opt(&ct_ptr, probsfile, 0, 1, &num_of_modes);
- ct_ptr += cts_each_dim[1] - num_of_modes;
- fprintf(probsfile, " },\n");
- } else {
- fprintf(probsfile, "%*c{ 0 },\n", SPACES_PER_TAB, ' ');
- fprintf(logfile, "dummy cdf, no need to optimize\n");
- ct_ptr += cts_each_dim[1];
- }
- }
- fprintf(probsfile, "};\n\n");
- fprintf(logfile, "============================\n");
-}
-
-static void optimize_cdf_table_var_modes_3d(aom_count_type *counts,
- FILE *const probsfile,
- int dim_of_cts, int *cts_each_dim,
- int *modes_each_ctx, char *prefix) {
- aom_count_type *ct_ptr = counts;
-
- assert(dim_of_cts == 3);
- (void)dim_of_cts;
-
- fprintf(probsfile, "%s = {\n", prefix);
- fprintf(logfile, "%s\n", prefix);
-
- for (int d0_idx = 0; d0_idx < cts_each_dim[0]; ++d0_idx) {
- fprintf(probsfile, "%*c{\n", SPACES_PER_TAB, ' ');
- for (int d1_idx = 0; d1_idx < cts_each_dim[1]; ++d1_idx) {
- int num_of_modes = modes_each_ctx[d0_idx];
-
- if (num_of_modes > 0) {
- fprintf(probsfile, "%*c{ ", 2 * SPACES_PER_TAB, ' ');
- parse_counts_for_cdf_opt(&ct_ptr, probsfile, 0, 1, &num_of_modes);
- ct_ptr += cts_each_dim[2] - num_of_modes;
- fprintf(probsfile, " },\n");
- } else {
- fprintf(probsfile, "%*c{ 0 },\n", 2 * SPACES_PER_TAB, ' ');
- fprintf(logfile, "dummy cdf, no need to optimize\n");
- ct_ptr += cts_each_dim[2];
- }
- }
- fprintf(probsfile, "%*c},\n", SPACES_PER_TAB, ' ');
- }
- fprintf(probsfile, "};\n\n");
- fprintf(logfile, "============================\n");
-}
-
-static void optimize_cdf_table_var_modes_4d(aom_count_type *counts,
- FILE *const probsfile,
- int dim_of_cts, int *cts_each_dim,
- int *modes_each_ctx, char *prefix) {
- aom_count_type *ct_ptr = counts;
-
- assert(dim_of_cts == 4);
- (void)dim_of_cts;
-
- fprintf(probsfile, "%s = {\n", prefix);
- fprintf(logfile, "%s\n", prefix);
-
- for (int d0_idx = 0; d0_idx < cts_each_dim[0]; ++d0_idx) {
- fprintf(probsfile, "%*c{\n", SPACES_PER_TAB, ' ');
- for (int d1_idx = 0; d1_idx < cts_each_dim[1]; ++d1_idx) {
- fprintf(probsfile, "%*c{\n", 2 * SPACES_PER_TAB, ' ');
- for (int d2_idx = 0; d2_idx < cts_each_dim[2]; ++d2_idx) {
- int num_of_modes = modes_each_ctx[d0_idx];
-
- if (num_of_modes > 0) {
- fprintf(probsfile, "%*c{ ", 3 * SPACES_PER_TAB, ' ');
- parse_counts_for_cdf_opt(&ct_ptr, probsfile, 0, 1, &num_of_modes);
- ct_ptr += cts_each_dim[3] - num_of_modes;
- fprintf(probsfile, " },\n");
- } else {
- fprintf(probsfile, "%*c{ 0 },\n", 3 * SPACES_PER_TAB, ' ');
- fprintf(logfile, "dummy cdf, no need to optimize\n");
- ct_ptr += cts_each_dim[3];
- }
- }
- fprintf(probsfile, "%*c},\n", 2 * SPACES_PER_TAB, ' ');
- }
- fprintf(probsfile, "%*c},\n", SPACES_PER_TAB, ' ');
- }
- fprintf(probsfile, "};\n\n");
- fprintf(logfile, "============================\n");
-}
-
-int main(int argc, const char **argv) {
- if (argc < 2) {
- fprintf(stderr, "Please specify the input stats file!\n");
- exit(EXIT_FAILURE);
- }
-
- FILE *const statsfile = fopen(argv[1], "rb");
- if (statsfile == NULL) {
- fprintf(stderr, "Failed to open input file!\n");
- exit(EXIT_FAILURE);
- }
-
- FRAME_COUNTS fc;
- const size_t bytes = fread(&fc, sizeof(FRAME_COUNTS), 1, statsfile);
- if (!bytes) return 1;
-
- FILE *const probsfile = fopen("optimized_probs.c", "w");
- if (probsfile == NULL) {
- fprintf(stderr,
- "Failed to create output file for optimized entropy tables!\n");
- exit(EXIT_FAILURE);
- }
-
- logfile = fopen("aom_entropy_optimizer_parsed_counts.log", "w");
- if (logfile == NULL) {
- fprintf(stderr, "Failed to create log file for parsed counts!\n");
- exit(EXIT_FAILURE);
- }
-
- int cts_each_dim[10];
-
- /* Intra mode (keyframe luma) */
- cts_each_dim[0] = KF_MODE_CONTEXTS;
- cts_each_dim[1] = KF_MODE_CONTEXTS;
- cts_each_dim[2] = INTRA_MODES;
- optimize_cdf_table(&fc.kf_y_mode[0][0][0], probsfile, 3, cts_each_dim,
- "const aom_cdf_prob\n"
- "default_kf_y_mode_cdf[KF_MODE_CONTEXTS][KF_MODE_CONTEXTS]"
- "[CDF_SIZE(INTRA_MODES)]");
-
- cts_each_dim[0] = DIRECTIONAL_MODES;
- cts_each_dim[1] = 2 * MAX_ANGLE_DELTA + 1;
- optimize_cdf_table(&fc.angle_delta[0][0], probsfile, 2, cts_each_dim,
- "static const aom_cdf_prob default_angle_delta_cdf"
- "[DIRECTIONAL_MODES][CDF_SIZE(2 * MAX_ANGLE_DELTA + 1)]");
-
- /* Intra mode (non-keyframe luma) */
- cts_each_dim[0] = BLOCK_SIZE_GROUPS;
- cts_each_dim[1] = INTRA_MODES;
- optimize_cdf_table(
- &fc.y_mode[0][0], probsfile, 2, cts_each_dim,
- "static const aom_cdf_prob\n"
- "default_if_y_mode_cdf[BLOCK_SIZE_GROUPS][CDF_SIZE(INTRA_MODES)]");
-
- /* Intra mode (chroma) */
- cts_each_dim[0] = CFL_ALLOWED_TYPES;
- cts_each_dim[1] = INTRA_MODES;
- cts_each_dim[2] = UV_INTRA_MODES;
- optimize_uv_mode(&fc.uv_mode[0][0][0], probsfile, 3, cts_each_dim,
- "static const aom_cdf_prob\n"
- "default_uv_mode_cdf[CFL_ALLOWED_TYPES][INTRA_MODES]"
- "[CDF_SIZE(UV_INTRA_MODES)]");
-
- /* block partition */
- cts_each_dim[0] = PARTITION_CONTEXTS;
- cts_each_dim[1] = EXT_PARTITION_TYPES;
- int part_types_each_ctx[PARTITION_CONTEXTS] = {
- 4, 4, 4, 4, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 8, 8, 8, 8
- };
- optimize_cdf_table_var_modes_2d(
- &fc.partition[0][0], probsfile, 2, cts_each_dim, part_types_each_ctx,
- "static const aom_cdf_prob default_partition_cdf[PARTITION_CONTEXTS]"
- "[CDF_SIZE(EXT_PARTITION_TYPES)]");
-
- /* tx type */
- cts_each_dim[0] = EXT_TX_SETS_INTRA;
- cts_each_dim[1] = EXT_TX_SIZES;
- cts_each_dim[2] = INTRA_MODES;
- cts_each_dim[3] = TX_TYPES;
- int intra_ext_tx_types_each_ctx[EXT_TX_SETS_INTRA] = { 0, 7, 5 };
- optimize_cdf_table_var_modes_4d(
- &fc.intra_ext_tx[0][0][0][0], probsfile, 4, cts_each_dim,
- intra_ext_tx_types_each_ctx,
- "static const aom_cdf_prob default_intra_ext_tx_cdf[EXT_TX_SETS_INTRA]"
- "[EXT_TX_SIZES][INTRA_MODES][CDF_SIZE(TX_TYPES)]");
-
- cts_each_dim[0] = EXT_TX_SETS_INTER;
- cts_each_dim[1] = EXT_TX_SIZES;
- cts_each_dim[2] = TX_TYPES;
- int inter_ext_tx_types_each_ctx[EXT_TX_SETS_INTER] = { 0, 16, 12, 2 };
- optimize_cdf_table_var_modes_3d(
- &fc.inter_ext_tx[0][0][0], probsfile, 3, cts_each_dim,
- inter_ext_tx_types_each_ctx,
- "static const aom_cdf_prob default_inter_ext_tx_cdf[EXT_TX_SETS_INTER]"
- "[EXT_TX_SIZES][CDF_SIZE(TX_TYPES)]");
-
- /* Chroma from Luma */
- cts_each_dim[0] = CFL_JOINT_SIGNS;
- optimize_cdf_table(&fc.cfl_sign[0], probsfile, 1, cts_each_dim,
- "static const aom_cdf_prob\n"
- "default_cfl_sign_cdf[CDF_SIZE(CFL_JOINT_SIGNS)]");
- cts_each_dim[0] = CFL_ALPHA_CONTEXTS;
- cts_each_dim[1] = CFL_ALPHABET_SIZE;
- optimize_cdf_table(&fc.cfl_alpha[0][0], probsfile, 2, cts_each_dim,
- "static const aom_cdf_prob\n"
- "default_cfl_alpha_cdf[CFL_ALPHA_CONTEXTS]"
- "[CDF_SIZE(CFL_ALPHABET_SIZE)]");
-
- /* Interpolation filter */
- cts_each_dim[0] = SWITCHABLE_FILTER_CONTEXTS;
- cts_each_dim[1] = SWITCHABLE_FILTERS;
- optimize_cdf_table(&fc.switchable_interp[0][0], probsfile, 2, cts_each_dim,
- "static const aom_cdf_prob\n"
- "default_switchable_interp_cdf[SWITCHABLE_FILTER_CONTEXTS]"
- "[CDF_SIZE(SWITCHABLE_FILTERS)]");
-
- /* Motion vector referencing */
- cts_each_dim[0] = NEWMV_MODE_CONTEXTS;
- cts_each_dim[1] = 2;
- optimize_cdf_table(&fc.newmv_mode[0][0], probsfile, 2, cts_each_dim,
- "static const aom_cdf_prob "
- "default_newmv_cdf[NEWMV_MODE_CONTEXTS][CDF_SIZE(2)]");
-
- cts_each_dim[0] = GLOBALMV_MODE_CONTEXTS;
- cts_each_dim[1] = 2;
- optimize_cdf_table(&fc.zeromv_mode[0][0], probsfile, 2, cts_each_dim,
- "static const aom_cdf_prob "
- "default_zeromv_cdf[GLOBALMV_MODE_CONTEXTS][CDF_SIZE(2)]");
-
- cts_each_dim[0] = REFMV_MODE_CONTEXTS;
- cts_each_dim[1] = 2;
- optimize_cdf_table(&fc.refmv_mode[0][0], probsfile, 2, cts_each_dim,
- "static const aom_cdf_prob "
- "default_refmv_cdf[REFMV_MODE_CONTEXTS][CDF_SIZE(2)]");
-
- cts_each_dim[0] = DRL_MODE_CONTEXTS;
- cts_each_dim[1] = 2;
- optimize_cdf_table(&fc.drl_mode[0][0], probsfile, 2, cts_each_dim,
- "static const aom_cdf_prob "
- "default_drl_cdf[DRL_MODE_CONTEXTS][CDF_SIZE(2)]");
-
- /* ext_inter experiment */
- /* New compound mode */
- cts_each_dim[0] = INTER_MODE_CONTEXTS;
- cts_each_dim[1] = INTER_COMPOUND_MODES;
- optimize_cdf_table(&fc.inter_compound_mode[0][0], probsfile, 2, cts_each_dim,
- "static const aom_cdf_prob\n"
- "default_inter_compound_mode_cdf[INTER_MODE_CONTEXTS][CDF_"
- "SIZE(INTER_COMPOUND_MODES)]");
-
- /* Interintra */
- cts_each_dim[0] = BLOCK_SIZE_GROUPS;
- cts_each_dim[1] = 2;
- optimize_cdf_table(&fc.interintra[0][0], probsfile, 2, cts_each_dim,
- "static const aom_cdf_prob "
- "default_interintra_cdf[BLOCK_SIZE_GROUPS][CDF_SIZE(2)]");
-
- cts_each_dim[0] = BLOCK_SIZE_GROUPS;
- cts_each_dim[1] = INTERINTRA_MODES;
- optimize_cdf_table(&fc.interintra_mode[0][0], probsfile, 2, cts_each_dim,
- "static const aom_cdf_prob\n"
- "default_interintra_mode_cdf[BLOCK_SIZE_GROUPS][CDF_SIZE("
- "INTERINTRA_MODES)]");
-
- cts_each_dim[0] = BLOCK_SIZES_ALL;
- cts_each_dim[1] = 2;
- optimize_cdf_table(
- &fc.wedge_interintra[0][0], probsfile, 2, cts_each_dim,
- "static const aom_cdf_prob\n"
- "default_wedge_interintra_cdf[BLOCK_SIZES_ALL][CDF_SIZE(2)]");
-
- /* Compound type */
- cts_each_dim[0] = BLOCK_SIZES_ALL;
- cts_each_dim[1] = COMPOUND_TYPES - 1;
- optimize_cdf_table(&fc.compound_type[0][0], probsfile, 2, cts_each_dim,
- "static const aom_cdf_prob default_compound_type_cdf"
- "[BLOCK_SIZES_ALL][CDF_SIZE(COMPOUND_TYPES - 1)]");
-
- cts_each_dim[0] = BLOCK_SIZES_ALL;
- cts_each_dim[1] = 16;
- optimize_cdf_table(&fc.wedge_idx[0][0], probsfile, 2, cts_each_dim,
- "static const aom_cdf_prob "
- "default_wedge_idx_cdf[BLOCK_SIZES_ALL][CDF_SIZE(16)]");
-
- /* motion_var and warped_motion experiments */
- cts_each_dim[0] = BLOCK_SIZES_ALL;
- cts_each_dim[1] = MOTION_MODES;
- optimize_cdf_table(
- &fc.motion_mode[0][0], probsfile, 2, cts_each_dim,
- "static const aom_cdf_prob\n"
- "default_motion_mode_cdf[BLOCK_SIZES_ALL][CDF_SIZE(MOTION_MODES)]");
- cts_each_dim[0] = BLOCK_SIZES_ALL;
- cts_each_dim[1] = 2;
- optimize_cdf_table(&fc.obmc[0][0], probsfile, 2, cts_each_dim,
- "static const aom_cdf_prob "
- "default_obmc_cdf[BLOCK_SIZES_ALL][CDF_SIZE(2)]");
-
- /* Intra/inter flag */
- cts_each_dim[0] = INTRA_INTER_CONTEXTS;
- cts_each_dim[1] = 2;
- optimize_cdf_table(
- &fc.intra_inter[0][0], probsfile, 2, cts_each_dim,
- "static const aom_cdf_prob\n"
- "default_intra_inter_cdf[INTRA_INTER_CONTEXTS][CDF_SIZE(2)]");
-
- /* Single/comp ref flag */
- cts_each_dim[0] = COMP_INTER_CONTEXTS;
- cts_each_dim[1] = 2;
- optimize_cdf_table(
- &fc.comp_inter[0][0], probsfile, 2, cts_each_dim,
- "static const aom_cdf_prob\n"
- "default_comp_inter_cdf[COMP_INTER_CONTEXTS][CDF_SIZE(2)]");
-
- /* ext_comp_refs experiment */
- cts_each_dim[0] = COMP_REF_TYPE_CONTEXTS;
- cts_each_dim[1] = 2;
- optimize_cdf_table(
- &fc.comp_ref_type[0][0], probsfile, 2, cts_each_dim,
- "static const aom_cdf_prob\n"
- "default_comp_ref_type_cdf[COMP_REF_TYPE_CONTEXTS][CDF_SIZE(2)]");
-
- cts_each_dim[0] = UNI_COMP_REF_CONTEXTS;
- cts_each_dim[1] = UNIDIR_COMP_REFS - 1;
- cts_each_dim[2] = 2;
- optimize_cdf_table(&fc.uni_comp_ref[0][0][0], probsfile, 3, cts_each_dim,
- "static const aom_cdf_prob\n"
- "default_uni_comp_ref_cdf[UNI_COMP_REF_CONTEXTS][UNIDIR_"
- "COMP_REFS - 1][CDF_SIZE(2)]");
-
- /* Reference frame (single ref) */
- cts_each_dim[0] = REF_CONTEXTS;
- cts_each_dim[1] = SINGLE_REFS - 1;
- cts_each_dim[2] = 2;
- optimize_cdf_table(
- &fc.single_ref[0][0][0], probsfile, 3, cts_each_dim,
- "static const aom_cdf_prob\n"
- "default_single_ref_cdf[REF_CONTEXTS][SINGLE_REFS - 1][CDF_SIZE(2)]");
-
- /* ext_refs experiment */
- cts_each_dim[0] = REF_CONTEXTS;
- cts_each_dim[1] = FWD_REFS - 1;
- cts_each_dim[2] = 2;
- optimize_cdf_table(
- &fc.comp_ref[0][0][0], probsfile, 3, cts_each_dim,
- "static const aom_cdf_prob\n"
- "default_comp_ref_cdf[REF_CONTEXTS][FWD_REFS - 1][CDF_SIZE(2)]");
-
- cts_each_dim[0] = REF_CONTEXTS;
- cts_each_dim[1] = BWD_REFS - 1;
- cts_each_dim[2] = 2;
- optimize_cdf_table(
- &fc.comp_bwdref[0][0][0], probsfile, 3, cts_each_dim,
- "static const aom_cdf_prob\n"
- "default_comp_bwdref_cdf[REF_CONTEXTS][BWD_REFS - 1][CDF_SIZE(2)]");
-
- /* palette */
- cts_each_dim[0] = PALATTE_BSIZE_CTXS;
- cts_each_dim[1] = PALETTE_SIZES;
- optimize_cdf_table(&fc.palette_y_size[0][0], probsfile, 2, cts_each_dim,
- "const aom_cdf_prob default_palette_y_size_cdf"
- "[PALATTE_BSIZE_CTXS][CDF_SIZE(PALETTE_SIZES)]");
-
- cts_each_dim[0] = PALATTE_BSIZE_CTXS;
- cts_each_dim[1] = PALETTE_SIZES;
- optimize_cdf_table(&fc.palette_uv_size[0][0], probsfile, 2, cts_each_dim,
- "const aom_cdf_prob default_palette_uv_size_cdf"
- "[PALATTE_BSIZE_CTXS][CDF_SIZE(PALETTE_SIZES)]");
-
- cts_each_dim[0] = PALATTE_BSIZE_CTXS;
- cts_each_dim[1] = PALETTE_Y_MODE_CONTEXTS;
- cts_each_dim[2] = 2;
- optimize_cdf_table(&fc.palette_y_mode[0][0][0], probsfile, 3, cts_each_dim,
- "const aom_cdf_prob default_palette_y_mode_cdf"
- "[PALATTE_BSIZE_CTXS][PALETTE_Y_MODE_CONTEXTS]"
- "[CDF_SIZE(2)]");
-
- cts_each_dim[0] = PALETTE_UV_MODE_CONTEXTS;
- cts_each_dim[1] = 2;
- optimize_cdf_table(&fc.palette_uv_mode[0][0], probsfile, 2, cts_each_dim,
- "const aom_cdf_prob default_palette_uv_mode_cdf"
- "[PALETTE_UV_MODE_CONTEXTS][CDF_SIZE(2)]");
-
- cts_each_dim[0] = PALETTE_SIZES;
- cts_each_dim[1] = PALETTE_COLOR_INDEX_CONTEXTS;
- cts_each_dim[2] = PALETTE_COLORS;
- int palette_color_indexes_each_ctx[PALETTE_SIZES] = { 2, 3, 4, 5, 6, 7, 8 };
- optimize_cdf_table_var_modes_3d(
- &fc.palette_y_color_index[0][0][0], probsfile, 3, cts_each_dim,
- palette_color_indexes_each_ctx,
- "const aom_cdf_prob default_palette_y_color_index_cdf[PALETTE_SIZES]"
- "[PALETTE_COLOR_INDEX_CONTEXTS][CDF_SIZE(PALETTE_COLORS)]");
-
- cts_each_dim[0] = PALETTE_SIZES;
- cts_each_dim[1] = PALETTE_COLOR_INDEX_CONTEXTS;
- cts_each_dim[2] = PALETTE_COLORS;
- optimize_cdf_table_var_modes_3d(
- &fc.palette_uv_color_index[0][0][0], probsfile, 3, cts_each_dim,
- palette_color_indexes_each_ctx,
- "const aom_cdf_prob default_palette_uv_color_index_cdf[PALETTE_SIZES]"
- "[PALETTE_COLOR_INDEX_CONTEXTS][CDF_SIZE(PALETTE_COLORS)]");
-
- /* Transform size */
- cts_each_dim[0] = TXFM_PARTITION_CONTEXTS;
- cts_each_dim[1] = 2;
- optimize_cdf_table(
- &fc.txfm_partition[0][0], probsfile, 2, cts_each_dim,
- "static const aom_cdf_prob\n"
- "default_txfm_partition_cdf[TXFM_PARTITION_CONTEXTS][CDF_SIZE(2)]");
-
- /* Skip flag */
- cts_each_dim[0] = SKIP_CONTEXTS;
- cts_each_dim[1] = 2;
- optimize_cdf_table(&fc.skip[0][0], probsfile, 2, cts_each_dim,
- "static const aom_cdf_prob "
- "default_skip_cdfs[SKIP_CONTEXTS][CDF_SIZE(2)]");
-
- /* Skip mode flag */
- cts_each_dim[0] = SKIP_MODE_CONTEXTS;
- cts_each_dim[1] = 2;
- optimize_cdf_table(&fc.skip_mode[0][0], probsfile, 2, cts_each_dim,
- "static const aom_cdf_prob "
- "default_skip_mode_cdfs[SKIP_MODE_CONTEXTS][CDF_SIZE(2)]");
-
- /* joint compound flag */
- cts_each_dim[0] = COMP_INDEX_CONTEXTS;
- cts_each_dim[1] = 2;
- optimize_cdf_table(&fc.compound_index[0][0], probsfile, 2, cts_each_dim,
- "static const aom_cdf_prob default_compound_idx_cdfs"
- "[COMP_INDEX_CONTEXTS][CDF_SIZE(2)]");
-
- cts_each_dim[0] = COMP_GROUP_IDX_CONTEXTS;
- cts_each_dim[1] = 2;
- optimize_cdf_table(&fc.comp_group_idx[0][0], probsfile, 2, cts_each_dim,
- "static const aom_cdf_prob default_comp_group_idx_cdfs"
- "[COMP_GROUP_IDX_CONTEXTS][CDF_SIZE(2)]");
-
- /* intrabc */
- cts_each_dim[0] = 2;
- optimize_cdf_table(
- &fc.intrabc[0], probsfile, 1, cts_each_dim,
- "static const aom_cdf_prob default_intrabc_cdf[CDF_SIZE(2)]");
-
- /* filter_intra experiment */
- cts_each_dim[0] = FILTER_INTRA_MODES;
- optimize_cdf_table(
- &fc.filter_intra_mode[0], probsfile, 1, cts_each_dim,
- "static const aom_cdf_prob "
- "default_filter_intra_mode_cdf[CDF_SIZE(FILTER_INTRA_MODES)]");
-
- cts_each_dim[0] = BLOCK_SIZES_ALL;
- cts_each_dim[1] = 2;
- optimize_cdf_table(&fc.filter_intra[0][0], probsfile, 2, cts_each_dim,
- "static const aom_cdf_prob "
- "default_filter_intra_cdfs[BLOCK_SIZES_ALL][CDF_SIZE(2)]");
-
- /* restoration type */
- cts_each_dim[0] = RESTORE_SWITCHABLE_TYPES;
- optimize_cdf_table(&fc.switchable_restore[0], probsfile, 1, cts_each_dim,
- "static const aom_cdf_prob default_switchable_restore_cdf"
- "[CDF_SIZE(RESTORE_SWITCHABLE_TYPES)]");
-
- cts_each_dim[0] = 2;
- optimize_cdf_table(&fc.wiener_restore[0], probsfile, 1, cts_each_dim,
- "static const aom_cdf_prob default_wiener_restore_cdf"
- "[CDF_SIZE(2)]");
-
- cts_each_dim[0] = 2;
- optimize_cdf_table(&fc.sgrproj_restore[0], probsfile, 1, cts_each_dim,
- "static const aom_cdf_prob default_sgrproj_restore_cdf"
- "[CDF_SIZE(2)]");
-
- /* intra tx size */
- cts_each_dim[0] = MAX_TX_CATS;
- cts_each_dim[1] = TX_SIZE_CONTEXTS;
- cts_each_dim[2] = MAX_TX_DEPTH + 1;
- int intra_tx_sizes_each_ctx[MAX_TX_CATS] = { 2, 3, 3, 3 };
- optimize_cdf_table_var_modes_3d(
- &fc.intra_tx_size[0][0][0], probsfile, 3, cts_each_dim,
- intra_tx_sizes_each_ctx,
- "static const aom_cdf_prob default_tx_size_cdf"
- "[MAX_TX_CATS][TX_SIZE_CONTEXTS][CDF_SIZE(MAX_TX_DEPTH + 1)]");
-
- /* transform coding */
- cts_each_dim[0] = TOKEN_CDF_Q_CTXS;
- cts_each_dim[1] = TX_SIZES;
- cts_each_dim[2] = TXB_SKIP_CONTEXTS;
- cts_each_dim[3] = 2;
- optimize_cdf_table(&fc.txb_skip[0][0][0][0], probsfile, 4, cts_each_dim,
- "static const aom_cdf_prob "
- "av1_default_txb_skip_cdfs[TOKEN_CDF_Q_CTXS][TX_SIZES]"
- "[TXB_SKIP_CONTEXTS][CDF_SIZE(2)]");
-
- cts_each_dim[0] = TOKEN_CDF_Q_CTXS;
- cts_each_dim[1] = TX_SIZES;
- cts_each_dim[2] = PLANE_TYPES;
- cts_each_dim[3] = EOB_COEF_CONTEXTS;
- cts_each_dim[4] = 2;
- optimize_cdf_table(
- &fc.eob_extra[0][0][0][0][0], probsfile, 5, cts_each_dim,
- "static const aom_cdf_prob av1_default_eob_extra_cdfs "
- "[TOKEN_CDF_Q_CTXS][TX_SIZES][PLANE_TYPES][EOB_COEF_CONTEXTS]"
- "[CDF_SIZE(2)]");
-
- cts_each_dim[0] = TOKEN_CDF_Q_CTXS;
- cts_each_dim[1] = PLANE_TYPES;
- cts_each_dim[2] = 2;
- cts_each_dim[3] = 5;
- optimize_cdf_table(&fc.eob_multi16[0][0][0][0], probsfile, 4, cts_each_dim,
- "static const aom_cdf_prob av1_default_eob_multi16_cdfs"
- "[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][CDF_SIZE(5)]");
-
- cts_each_dim[0] = TOKEN_CDF_Q_CTXS;
- cts_each_dim[1] = PLANE_TYPES;
- cts_each_dim[2] = 2;
- cts_each_dim[3] = 6;
- optimize_cdf_table(&fc.eob_multi32[0][0][0][0], probsfile, 4, cts_each_dim,
- "static const aom_cdf_prob av1_default_eob_multi32_cdfs"
- "[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][CDF_SIZE(6)]");
-
- cts_each_dim[0] = TOKEN_CDF_Q_CTXS;
- cts_each_dim[1] = PLANE_TYPES;
- cts_each_dim[2] = 2;
- cts_each_dim[3] = 7;
- optimize_cdf_table(&fc.eob_multi64[0][0][0][0], probsfile, 4, cts_each_dim,
- "static const aom_cdf_prob av1_default_eob_multi64_cdfs"
- "[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][CDF_SIZE(7)]");
-
- cts_each_dim[0] = TOKEN_CDF_Q_CTXS;
- cts_each_dim[1] = PLANE_TYPES;
- cts_each_dim[2] = 2;
- cts_each_dim[3] = 8;
- optimize_cdf_table(&fc.eob_multi128[0][0][0][0], probsfile, 4, cts_each_dim,
- "static const aom_cdf_prob av1_default_eob_multi128_cdfs"
- "[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][CDF_SIZE(8)]");
-
- cts_each_dim[0] = TOKEN_CDF_Q_CTXS;
- cts_each_dim[1] = PLANE_TYPES;
- cts_each_dim[2] = 2;
- cts_each_dim[3] = 9;
- optimize_cdf_table(&fc.eob_multi256[0][0][0][0], probsfile, 4, cts_each_dim,
- "static const aom_cdf_prob av1_default_eob_multi256_cdfs"
- "[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][CDF_SIZE(9)]");
-
- cts_each_dim[0] = TOKEN_CDF_Q_CTXS;
- cts_each_dim[1] = PLANE_TYPES;
- cts_each_dim[2] = 2;
- cts_each_dim[3] = 10;
- optimize_cdf_table(&fc.eob_multi512[0][0][0][0], probsfile, 4, cts_each_dim,
- "static const aom_cdf_prob av1_default_eob_multi512_cdfs"
- "[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][CDF_SIZE(10)]");
-
- cts_each_dim[0] = TOKEN_CDF_Q_CTXS;
- cts_each_dim[1] = PLANE_TYPES;
- cts_each_dim[2] = 2;
- cts_each_dim[3] = 11;
- optimize_cdf_table(&fc.eob_multi1024[0][0][0][0], probsfile, 4, cts_each_dim,
- "static const aom_cdf_prob av1_default_eob_multi1024_cdfs"
- "[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][CDF_SIZE(11)]");
-
- cts_each_dim[0] = TOKEN_CDF_Q_CTXS;
- cts_each_dim[1] = TX_SIZES;
- cts_each_dim[2] = PLANE_TYPES;
- cts_each_dim[3] = LEVEL_CONTEXTS;
- cts_each_dim[4] = BR_CDF_SIZE;
- optimize_cdf_table(&fc.coeff_lps_multi[0][0][0][0][0], probsfile, 5,
- cts_each_dim,
- "static const aom_cdf_prob "
- "av1_default_coeff_lps_multi_cdfs[TOKEN_CDF_Q_CTXS]"
- "[TX_SIZES][PLANE_TYPES][LEVEL_CONTEXTS]"
- "[CDF_SIZE(BR_CDF_SIZE)]");
-
- cts_each_dim[0] = TOKEN_CDF_Q_CTXS;
- cts_each_dim[1] = TX_SIZES;
- cts_each_dim[2] = PLANE_TYPES;
- cts_each_dim[3] = SIG_COEF_CONTEXTS;
- cts_each_dim[4] = NUM_BASE_LEVELS + 2;
- optimize_cdf_table(
- &fc.coeff_base_multi[0][0][0][0][0], probsfile, 5, cts_each_dim,
- "static const aom_cdf_prob av1_default_coeff_base_multi_cdfs"
- "[TOKEN_CDF_Q_CTXS][TX_SIZES][PLANE_TYPES][SIG_COEF_CONTEXTS]"
- "[CDF_SIZE(NUM_BASE_LEVELS + 2)]");
-
- cts_each_dim[0] = TOKEN_CDF_Q_CTXS;
- cts_each_dim[1] = TX_SIZES;
- cts_each_dim[2] = PLANE_TYPES;
- cts_each_dim[3] = SIG_COEF_CONTEXTS_EOB;
- cts_each_dim[4] = NUM_BASE_LEVELS + 1;
- optimize_cdf_table(
- &fc.coeff_base_eob_multi[0][0][0][0][0], probsfile, 5, cts_each_dim,
- "static const aom_cdf_prob av1_default_coeff_base_eob_multi_cdfs"
- "[TOKEN_CDF_Q_CTXS][TX_SIZES][PLANE_TYPES][SIG_COEF_CONTEXTS_EOB]"
- "[CDF_SIZE(NUM_BASE_LEVELS + 1)]");
-
- fclose(statsfile);
- fclose(logfile);
- fclose(probsfile);
-
- return 0;
-}
diff --git a/third_party/aom/tools/cpplint.py b/third_party/aom/tools/cpplint.py
deleted file mode 100755
index 25fbef73d..000000000
--- a/third_party/aom/tools/cpplint.py
+++ /dev/null
@@ -1,4756 +0,0 @@
-#!/usr/bin/python
-#
-# Copyright (c) 2009 Google Inc. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-# * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-# * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-"""Does google-lint on c++ files.
-
-The goal of this script is to identify places in the code that *may*
-be in non-compliance with google style. It does not attempt to fix
-up these problems -- the point is to educate. It does also not
-attempt to find all problems, or to ensure that everything it does
-find is legitimately a problem.
-
-In particular, we can get very confused by /* and // inside strings!
-We do a small hack, which is to ignore //'s with "'s after them on the
-same line, but it is far from perfect (in either direction).
-"""
-
-import codecs
-import copy
-import getopt
-import math # for log
-import os
-import re
-import sre_compile
-import string
-import sys
-import unicodedata
-
-
-_USAGE = """
-Syntax: cpplint.py [--verbose=#] [--output=vs7] [--filter=-x,+y,...]
- [--counting=total|toplevel|detailed] [--root=subdir]
- [--linelength=digits]
- <file> [file] ...
-
- The style guidelines this tries to follow are those in
- http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml
-
- Every problem is given a confidence score from 1-5, with 5 meaning we are
- certain of the problem, and 1 meaning it could be a legitimate construct.
- This will miss some errors, and is not a substitute for a code review.
-
- To suppress false-positive errors of a certain category, add a
- 'NOLINT(category)' comment to the line. NOLINT or NOLINT(*)
- suppresses errors of all categories on that line.
-
- The files passed in will be linted; at least one file must be provided.
- Default linted extensions are .cc, .cpp, .cu, .cuh and .h. Change the
- extensions with the --extensions flag.
-
- Flags:
-
- output=vs7
- By default, the output is formatted to ease emacs parsing. Visual Studio
- compatible output (vs7) may also be used. Other formats are unsupported.
-
- verbose=#
- Specify a number 0-5 to restrict errors to certain verbosity levels.
-
- filter=-x,+y,...
- Specify a comma-separated list of category-filters to apply: only
- error messages whose category names pass the filters will be printed.
- (Category names are printed with the message and look like
- "[whitespace/indent]".) Filters are evaluated left to right.
- "-FOO" and "FOO" means "do not print categories that start with FOO".
- "+FOO" means "do print categories that start with FOO".
-
- Examples: --filter=-whitespace,+whitespace/braces
- --filter=whitespace,runtime/printf,+runtime/printf_format
- --filter=-,+build/include_what_you_use
-
- To see a list of all the categories used in cpplint, pass no arg:
- --filter=
-
- counting=total|toplevel|detailed
- The total number of errors found is always printed. If
- 'toplevel' is provided, then the count of errors in each of
- the top-level categories like 'build' and 'whitespace' will
- also be printed. If 'detailed' is provided, then a count
- is provided for each category like 'build/class'.
-
- root=subdir
- The root directory used for deriving header guard CPP variable.
- By default, the header guard CPP variable is calculated as the relative
- path to the directory that contains .git, .hg, or .svn. When this flag
- is specified, the relative path is calculated from the specified
- directory. If the specified directory does not exist, this flag is
- ignored.
-
- Examples:
- Assuing that src/.git exists, the header guard CPP variables for
- src/chrome/browser/ui/browser.h are:
-
- No flag => CHROME_BROWSER_UI_BROWSER_H_
- --root=chrome => BROWSER_UI_BROWSER_H_
- --root=chrome/browser => UI_BROWSER_H_
-
- linelength=digits
- This is the allowed line length for the project. The default value is
- 80 characters.
-
- Examples:
- --linelength=120
-
- extensions=extension,extension,...
- The allowed file extensions that cpplint will check
-
- Examples:
- --extensions=hpp,cpp
-"""
-
-# We categorize each error message we print. Here are the categories.
-# We want an explicit list so we can list them all in cpplint --filter=.
-# If you add a new error message with a new category, add it to the list
-# here! cpplint_unittest.py should tell you if you forget to do this.
-_ERROR_CATEGORIES = [
- 'build/class',
- 'build/deprecated',
- 'build/endif_comment',
- 'build/explicit_make_pair',
- 'build/forward_decl',
- 'build/header_guard',
- 'build/include',
- 'build/include_alpha',
- 'build/include_order',
- 'build/include_what_you_use',
- 'build/namespaces',
- 'build/printf_format',
- 'build/storage_class',
- 'legal/copyright',
- 'readability/alt_tokens',
- 'readability/braces',
- 'readability/casting',
- 'readability/check',
- 'readability/constructors',
- 'readability/fn_size',
- 'readability/function',
- 'readability/multiline_comment',
- 'readability/multiline_string',
- 'readability/namespace',
- 'readability/nolint',
- 'readability/nul',
- 'readability/streams',
- 'readability/todo',
- 'readability/utf8',
- 'runtime/arrays',
- 'runtime/casting',
- 'runtime/explicit',
- 'runtime/int',
- 'runtime/init',
- 'runtime/invalid_increment',
- 'runtime/member_string_references',
- 'runtime/memset',
- 'runtime/operator',
- 'runtime/printf',
- 'runtime/printf_format',
- 'runtime/references',
- 'runtime/sizeof',
- 'runtime/string',
- 'runtime/threadsafe_fn',
- 'runtime/vlog',
- 'whitespace/blank_line',
- 'whitespace/braces',
- 'whitespace/comma',
- 'whitespace/comments',
- 'whitespace/empty_conditional_body',
- 'whitespace/empty_loop_body',
- 'whitespace/end_of_line',
- 'whitespace/ending_newline',
- 'whitespace/forcolon',
- 'whitespace/indent',
- 'whitespace/line_length',
- 'whitespace/newline',
- 'whitespace/operators',
- 'whitespace/parens',
- 'whitespace/semicolon',
- 'whitespace/tab',
- 'whitespace/todo'
- ]
-
-# The default state of the category filter. This is overrided by the --filter=
-# flag. By default all errors are on, so only add here categories that should be
-# off by default (i.e., categories that must be enabled by the --filter= flags).
-# All entries here should start with a '-' or '+', as in the --filter= flag.
-_DEFAULT_FILTERS = ['-build/include_alpha']
-
-# We used to check for high-bit characters, but after much discussion we
-# decided those were OK, as long as they were in UTF-8 and didn't represent
-# hard-coded international strings, which belong in a separate i18n file.
-
-
-# C++ headers
-_CPP_HEADERS = frozenset([
- # Legacy
- 'algobase.h',
- 'algo.h',
- 'alloc.h',
- 'builtinbuf.h',
- 'bvector.h',
- 'complex.h',
- 'defalloc.h',
- 'deque.h',
- 'editbuf.h',
- 'fstream.h',
- 'function.h',
- 'hash_map',
- 'hash_map.h',
- 'hash_set',
- 'hash_set.h',
- 'hashtable.h',
- 'heap.h',
- 'indstream.h',
- 'iomanip.h',
- 'iostream.h',
- 'istream.h',
- 'iterator.h',
- 'list.h',
- 'map.h',
- 'multimap.h',
- 'multiset.h',
- 'ostream.h',
- 'pair.h',
- 'parsestream.h',
- 'pfstream.h',
- 'procbuf.h',
- 'pthread_alloc',
- 'pthread_alloc.h',
- 'rope',
- 'rope.h',
- 'ropeimpl.h',
- 'set.h',
- 'slist',
- 'slist.h',
- 'stack.h',
- 'stdiostream.h',
- 'stl_alloc.h',
- 'stl_relops.h',
- 'streambuf.h',
- 'stream.h',
- 'strfile.h',
- 'strstream.h',
- 'tempbuf.h',
- 'tree.h',
- 'type_traits.h',
- 'vector.h',
- # 17.6.1.2 C++ library headers
- 'algorithm',
- 'array',
- 'atomic',
- 'bitset',
- 'chrono',
- 'codecvt',
- 'complex',
- 'condition_variable',
- 'deque',
- 'exception',
- 'forward_list',
- 'fstream',
- 'functional',
- 'future',
- 'initializer_list',
- 'iomanip',
- 'ios',
- 'iosfwd',
- 'iostream',
- 'istream',
- 'iterator',
- 'limits',
- 'list',
- 'locale',
- 'map',
- 'memory',
- 'mutex',
- 'new',
- 'numeric',
- 'ostream',
- 'queue',
- 'random',
- 'ratio',
- 'regex',
- 'set',
- 'sstream',
- 'stack',
- 'stdexcept',
- 'streambuf',
- 'string',
- 'strstream',
- 'system_error',
- 'thread',
- 'tuple',
- 'typeindex',
- 'typeinfo',
- 'type_traits',
- 'unordered_map',
- 'unordered_set',
- 'utility',
- 'valarray',
- 'vector',
- # 17.6.1.2 C++ headers for C library facilities
- 'cassert',
- 'ccomplex',
- 'cctype',
- 'cerrno',
- 'cfenv',
- 'cfloat',
- 'cinttypes',
- 'ciso646',
- 'climits',
- 'clocale',
- 'cmath',
- 'csetjmp',
- 'csignal',
- 'cstdalign',
- 'cstdarg',
- 'cstdbool',
- 'cstddef',
- 'cstdint',
- 'cstdio',
- 'cstdlib',
- 'cstring',
- 'ctgmath',
- 'ctime',
- 'cuchar',
- 'cwchar',
- 'cwctype',
- ])
-
-# Assertion macros. These are defined in base/logging.h and
-# testing/base/gunit.h. Note that the _M versions need to come first
-# for substring matching to work.
-_CHECK_MACROS = [
- 'DCHECK', 'CHECK',
- 'EXPECT_TRUE_M', 'EXPECT_TRUE',
- 'ASSERT_TRUE_M', 'ASSERT_TRUE',
- 'EXPECT_FALSE_M', 'EXPECT_FALSE',
- 'ASSERT_FALSE_M', 'ASSERT_FALSE',
- ]
-
-# Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE
-_CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS])
-
-for op, replacement in [('==', 'EQ'), ('!=', 'NE'),
- ('>=', 'GE'), ('>', 'GT'),
- ('<=', 'LE'), ('<', 'LT')]:
- _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement
- _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement
- _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement
- _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement
- _CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement
- _CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement
-
-for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'),
- ('>=', 'LT'), ('>', 'LE'),
- ('<=', 'GT'), ('<', 'GE')]:
- _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement
- _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement
- _CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement
- _CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement
-
-# Alternative tokens and their replacements. For full list, see section 2.5
-# Alternative tokens [lex.digraph] in the C++ standard.
-#
-# Digraphs (such as '%:') are not included here since it's a mess to
-# match those on a word boundary.
-_ALT_TOKEN_REPLACEMENT = {
- 'and': '&&',
- 'bitor': '|',
- 'or': '||',
- 'xor': '^',
- 'compl': '~',
- 'bitand': '&',
- 'and_eq': '&=',
- 'or_eq': '|=',
- 'xor_eq': '^=',
- 'not': '!',
- 'not_eq': '!='
- }
-
-# Compile regular expression that matches all the above keywords. The "[ =()]"
-# bit is meant to avoid matching these keywords outside of boolean expressions.
-#
-# False positives include C-style multi-line comments and multi-line strings
-# but those have always been troublesome for cpplint.
-_ALT_TOKEN_REPLACEMENT_PATTERN = re.compile(
- r'[ =()](' + ('|'.join(_ALT_TOKEN_REPLACEMENT.keys())) + r')(?=[ (]|$)')
-
-
-# These constants define types of headers for use with
-# _IncludeState.CheckNextIncludeOrder().
-_C_SYS_HEADER = 1
-_CPP_SYS_HEADER = 2
-_LIKELY_MY_HEADER = 3
-_POSSIBLE_MY_HEADER = 4
-_OTHER_HEADER = 5
-
-# These constants define the current inline assembly state
-_NO_ASM = 0 # Outside of inline assembly block
-_INSIDE_ASM = 1 # Inside inline assembly block
-_END_ASM = 2 # Last line of inline assembly block
-_BLOCK_ASM = 3 # The whole block is an inline assembly block
-
-# Match start of assembly blocks
-_MATCH_ASM = re.compile(r'^\s*(?:asm|_asm|__asm|__asm__)'
- r'(?:\s+(volatile|__volatile__))?'
- r'\s*[{(]')
-
-
-_regexp_compile_cache = {}
-
-# Finds occurrences of NOLINT or NOLINT(...).
-_RE_SUPPRESSION = re.compile(r'\bNOLINT\b(\([^)]*\))?')
-
-# {str, set(int)}: a map from error categories to sets of linenumbers
-# on which those errors are expected and should be suppressed.
-_error_suppressions = {}
-
-# The root directory used for deriving header guard CPP variable.
-# This is set by --root flag.
-_root = None
-
-# The allowed line length of files.
-# This is set by --linelength flag.
-_line_length = 80
-
-# The allowed extensions for file names
-# This is set by --extensions flag.
-_valid_extensions = set(['cc', 'h', 'cpp', 'cu', 'cuh'])
-
-def ParseNolintSuppressions(filename, raw_line, linenum, error):
- """Updates the global list of error-suppressions.
-
- Parses any NOLINT comments on the current line, updating the global
- error_suppressions store. Reports an error if the NOLINT comment
- was malformed.
-
- Args:
- filename: str, the name of the input file.
- raw_line: str, the line of input text, with comments.
- linenum: int, the number of the current line.
- error: function, an error handler.
- """
- # FIXME(adonovan): "NOLINT(" is misparsed as NOLINT(*).
- matched = _RE_SUPPRESSION.search(raw_line)
- if matched:
- category = matched.group(1)
- if category in (None, '(*)'): # => "suppress all"
- _error_suppressions.setdefault(None, set()).add(linenum)
- else:
- if category.startswith('(') and category.endswith(')'):
- category = category[1:-1]
- if category in _ERROR_CATEGORIES:
- _error_suppressions.setdefault(category, set()).add(linenum)
- else:
- error(filename, linenum, 'readability/nolint', 5,
- 'Unknown NOLINT error category: %s' % category)
-
-
-def ResetNolintSuppressions():
- "Resets the set of NOLINT suppressions to empty."
- _error_suppressions.clear()
-
-
-def IsErrorSuppressedByNolint(category, linenum):
- """Returns true if the specified error category is suppressed on this line.
-
- Consults the global error_suppressions map populated by
- ParseNolintSuppressions/ResetNolintSuppressions.
-
- Args:
- category: str, the category of the error.
- linenum: int, the current line number.
- Returns:
- bool, True iff the error should be suppressed due to a NOLINT comment.
- """
- return (linenum in _error_suppressions.get(category, set()) or
- linenum in _error_suppressions.get(None, set()))
-
-def Match(pattern, s):
- """Matches the string with the pattern, caching the compiled regexp."""
- # The regexp compilation caching is inlined in both Match and Search for
- # performance reasons; factoring it out into a separate function turns out
- # to be noticeably expensive.
- if pattern not in _regexp_compile_cache:
- _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
- return _regexp_compile_cache[pattern].match(s)
-
-
-def ReplaceAll(pattern, rep, s):
- """Replaces instances of pattern in a string with a replacement.
-
- The compiled regex is kept in a cache shared by Match and Search.
-
- Args:
- pattern: regex pattern
- rep: replacement text
- s: search string
-
- Returns:
- string with replacements made (or original string if no replacements)
- """
- if pattern not in _regexp_compile_cache:
- _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
- return _regexp_compile_cache[pattern].sub(rep, s)
-
-
-def Search(pattern, s):
- """Searches the string for the pattern, caching the compiled regexp."""
- if pattern not in _regexp_compile_cache:
- _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
- return _regexp_compile_cache[pattern].search(s)
-
-
-class _IncludeState(dict):
- """Tracks line numbers for includes, and the order in which includes appear.
-
- As a dict, an _IncludeState object serves as a mapping between include
- filename and line number on which that file was included.
-
- Call CheckNextIncludeOrder() once for each header in the file, passing
- in the type constants defined above. Calls in an illegal order will
- raise an _IncludeError with an appropriate error message.
-
- """
- # self._section will move monotonically through this set. If it ever
- # needs to move backwards, CheckNextIncludeOrder will raise an error.
- _INITIAL_SECTION = 0
- _MY_H_SECTION = 1
- _C_SECTION = 2
- _CPP_SECTION = 3
- _OTHER_H_SECTION = 4
-
- _TYPE_NAMES = {
- _C_SYS_HEADER: 'C system header',
- _CPP_SYS_HEADER: 'C++ system header',
- _LIKELY_MY_HEADER: 'header this file implements',
- _POSSIBLE_MY_HEADER: 'header this file may implement',
- _OTHER_HEADER: 'other header',
- }
- _SECTION_NAMES = {
- _INITIAL_SECTION: "... nothing. (This can't be an error.)",
- _MY_H_SECTION: 'a header this file implements',
- _C_SECTION: 'C system header',
- _CPP_SECTION: 'C++ system header',
- _OTHER_H_SECTION: 'other header',
- }
-
- def __init__(self):
- dict.__init__(self)
- self.ResetSection()
-
- def ResetSection(self):
- # The name of the current section.
- self._section = self._INITIAL_SECTION
- # The path of last found header.
- self._last_header = ''
-
- def SetLastHeader(self, header_path):
- self._last_header = header_path
-
- def CanonicalizeAlphabeticalOrder(self, header_path):
- """Returns a path canonicalized for alphabetical comparison.
-
- - replaces "-" with "_" so they both cmp the same.
- - removes '-inl' since we don't require them to be after the main header.
- - lowercase everything, just in case.
-
- Args:
- header_path: Path to be canonicalized.
-
- Returns:
- Canonicalized path.
- """
- return header_path.replace('-inl.h', '.h').replace('-', '_').lower()
-
- def IsInAlphabeticalOrder(self, clean_lines, linenum, header_path):
- """Check if a header is in alphabetical order with the previous header.
-
- Args:
- clean_lines: A CleansedLines instance containing the file.
- linenum: The number of the line to check.
- header_path: Canonicalized header to be checked.
-
- Returns:
- Returns true if the header is in alphabetical order.
- """
- # If previous section is different from current section, _last_header will
- # be reset to empty string, so it's always less than current header.
- #
- # If previous line was a blank line, assume that the headers are
- # intentionally sorted the way they are.
- if (self._last_header > header_path and
- not Match(r'^\s*$', clean_lines.elided[linenum - 1])):
- return False
- return True
-
- def CheckNextIncludeOrder(self, header_type):
- """Returns a non-empty error message if the next header is out of order.
-
- This function also updates the internal state to be ready to check
- the next include.
-
- Args:
- header_type: One of the _XXX_HEADER constants defined above.
-
- Returns:
- The empty string if the header is in the right order, or an
- error message describing what's wrong.
-
- """
- error_message = ('Found %s after %s' %
- (self._TYPE_NAMES[header_type],
- self._SECTION_NAMES[self._section]))
-
- last_section = self._section
-
- if header_type == _C_SYS_HEADER:
- if self._section <= self._C_SECTION:
- self._section = self._C_SECTION
- else:
- self._last_header = ''
- return error_message
- elif header_type == _CPP_SYS_HEADER:
- if self._section <= self._CPP_SECTION:
- self._section = self._CPP_SECTION
- else:
- self._last_header = ''
- return error_message
- elif header_type == _LIKELY_MY_HEADER:
- if self._section <= self._MY_H_SECTION:
- self._section = self._MY_H_SECTION
- else:
- self._section = self._OTHER_H_SECTION
- elif header_type == _POSSIBLE_MY_HEADER:
- if self._section <= self._MY_H_SECTION:
- self._section = self._MY_H_SECTION
- else:
- # This will always be the fallback because we're not sure
- # enough that the header is associated with this file.
- self._section = self._OTHER_H_SECTION
- else:
- assert header_type == _OTHER_HEADER
- self._section = self._OTHER_H_SECTION
-
- if last_section != self._section:
- self._last_header = ''
-
- return ''
-
-
-class _CppLintState(object):
- """Maintains module-wide state.."""
-
- def __init__(self):
- self.verbose_level = 1 # global setting.
- self.error_count = 0 # global count of reported errors
- # filters to apply when emitting error messages
- self.filters = _DEFAULT_FILTERS[:]
- self.counting = 'total' # In what way are we counting errors?
- self.errors_by_category = {} # string to int dict storing error counts
-
- # output format:
- # "emacs" - format that emacs can parse (default)
- # "vs7" - format that Microsoft Visual Studio 7 can parse
- self.output_format = 'emacs'
-
- def SetOutputFormat(self, output_format):
- """Sets the output format for errors."""
- self.output_format = output_format
-
- def SetVerboseLevel(self, level):
- """Sets the module's verbosity, and returns the previous setting."""
- last_verbose_level = self.verbose_level
- self.verbose_level = level
- return last_verbose_level
-
- def SetCountingStyle(self, counting_style):
- """Sets the module's counting options."""
- self.counting = counting_style
-
- def SetFilters(self, filters):
- """Sets the error-message filters.
-
- These filters are applied when deciding whether to emit a given
- error message.
-
- Args:
- filters: A string of comma-separated filters (eg "+whitespace/indent").
- Each filter should start with + or -; else we die.
-
- Raises:
- ValueError: The comma-separated filters did not all start with '+' or '-'.
- E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter"
- """
- # Default filters always have less priority than the flag ones.
- self.filters = _DEFAULT_FILTERS[:]
- for filt in filters.split(','):
- clean_filt = filt.strip()
- if clean_filt:
- self.filters.append(clean_filt)
- for filt in self.filters:
- if not (filt.startswith('+') or filt.startswith('-')):
- raise ValueError('Every filter in --filters must start with + or -'
- ' (%s does not)' % filt)
-
- def ResetErrorCounts(self):
- """Sets the module's error statistic back to zero."""
- self.error_count = 0
- self.errors_by_category = {}
-
- def IncrementErrorCount(self, category):
- """Bumps the module's error statistic."""
- self.error_count += 1
- if self.counting in ('toplevel', 'detailed'):
- if self.counting != 'detailed':
- category = category.split('/')[0]
- if category not in self.errors_by_category:
- self.errors_by_category[category] = 0
- self.errors_by_category[category] += 1
-
- def PrintErrorCounts(self):
- """Print a summary of errors by category, and the total."""
- for category, count in self.errors_by_category.iteritems():
- sys.stderr.write('Category \'%s\' errors found: %d\n' %
- (category, count))
- sys.stderr.write('Total errors found: %d\n' % self.error_count)
-
-_cpplint_state = _CppLintState()
-
-
-def _OutputFormat():
- """Gets the module's output format."""
- return _cpplint_state.output_format
-
-
-def _SetOutputFormat(output_format):
- """Sets the module's output format."""
- _cpplint_state.SetOutputFormat(output_format)
-
-
-def _VerboseLevel():
- """Returns the module's verbosity setting."""
- return _cpplint_state.verbose_level
-
-
-def _SetVerboseLevel(level):
- """Sets the module's verbosity, and returns the previous setting."""
- return _cpplint_state.SetVerboseLevel(level)
-
-
-def _SetCountingStyle(level):
- """Sets the module's counting options."""
- _cpplint_state.SetCountingStyle(level)
-
-
-def _Filters():
- """Returns the module's list of output filters, as a list."""
- return _cpplint_state.filters
-
-
-def _SetFilters(filters):
- """Sets the module's error-message filters.
-
- These filters are applied when deciding whether to emit a given
- error message.
-
- Args:
- filters: A string of comma-separated filters (eg "whitespace/indent").
- Each filter should start with + or -; else we die.
- """
- _cpplint_state.SetFilters(filters)
-
-
-class _FunctionState(object):
- """Tracks current function name and the number of lines in its body."""
-
- _NORMAL_TRIGGER = 250 # for --v=0, 500 for --v=1, etc.
- _TEST_TRIGGER = 400 # about 50% more than _NORMAL_TRIGGER.
-
- def __init__(self):
- self.in_a_function = False
- self.lines_in_function = 0
- self.current_function = ''
-
- def Begin(self, function_name):
- """Start analyzing function body.
-
- Args:
- function_name: The name of the function being tracked.
- """
- self.in_a_function = True
- self.lines_in_function = 0
- self.current_function = function_name
-
- def Count(self):
- """Count line in current function body."""
- if self.in_a_function:
- self.lines_in_function += 1
-
- def Check(self, error, filename, linenum):
- """Report if too many lines in function body.
-
- Args:
- error: The function to call with any errors found.
- filename: The name of the current file.
- linenum: The number of the line to check.
- """
- if Match(r'T(EST|est)', self.current_function):
- base_trigger = self._TEST_TRIGGER
- else:
- base_trigger = self._NORMAL_TRIGGER
- trigger = base_trigger * 2**_VerboseLevel()
-
- if self.lines_in_function > trigger:
- error_level = int(math.log(self.lines_in_function / base_trigger, 2))
- # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
- if error_level > 5:
- error_level = 5
- error(filename, linenum, 'readability/fn_size', error_level,
- 'Small and focused functions are preferred:'
- ' %s has %d non-comment lines'
- ' (error triggered by exceeding %d lines).' % (
- self.current_function, self.lines_in_function, trigger))
-
- def End(self):
- """Stop analyzing function body."""
- self.in_a_function = False
-
-
-class _IncludeError(Exception):
- """Indicates a problem with the include order in a file."""
- pass
-
-
-class FileInfo:
- """Provides utility functions for filenames.
-
- FileInfo provides easy access to the components of a file's path
- relative to the project root.
- """
-
- def __init__(self, filename):
- self._filename = filename
-
- def FullName(self):
- """Make Windows paths like Unix."""
- return os.path.abspath(self._filename).replace('\\', '/')
-
- def RepositoryName(self):
- """FullName after removing the local path to the repository.
-
- If we have a real absolute path name here we can try to do something smart:
- detecting the root of the checkout and truncating /path/to/checkout from
- the name so that we get header guards that don't include things like
- "C:\Documents and Settings\..." or "/home/username/..." in them and thus
- people on different computers who have checked the source out to different
- locations won't see bogus errors.
- """
- fullname = self.FullName()
-
- if os.path.exists(fullname):
- project_dir = os.path.dirname(fullname)
-
- if os.path.exists(os.path.join(project_dir, ".svn")):
- # If there's a .svn file in the current directory, we recursively look
- # up the directory tree for the top of the SVN checkout
- root_dir = project_dir
- one_up_dir = os.path.dirname(root_dir)
- while os.path.exists(os.path.join(one_up_dir, ".svn")):
- root_dir = os.path.dirname(root_dir)
- one_up_dir = os.path.dirname(one_up_dir)
-
- prefix = os.path.commonprefix([root_dir, project_dir])
- return fullname[len(prefix) + 1:]
-
- # Not SVN <= 1.6? Try to find a git, hg, or svn top level directory by
- # searching up from the current path.
- root_dir = os.path.dirname(fullname)
- while (root_dir != os.path.dirname(root_dir) and
- not os.path.exists(os.path.join(root_dir, ".git")) and
- not os.path.exists(os.path.join(root_dir, ".hg")) and
- not os.path.exists(os.path.join(root_dir, ".svn"))):
- root_dir = os.path.dirname(root_dir)
-
- if (os.path.exists(os.path.join(root_dir, ".git")) or
- os.path.exists(os.path.join(root_dir, ".hg")) or
- os.path.exists(os.path.join(root_dir, ".svn"))):
- prefix = os.path.commonprefix([root_dir, project_dir])
- return fullname[len(prefix) + 1:]
-
- # Don't know what to do; header guard warnings may be wrong...
- return fullname
-
- def Split(self):
- """Splits the file into the directory, basename, and extension.
-
- For 'chrome/browser/browser.cc', Split() would
- return ('chrome/browser', 'browser', '.cc')
-
- Returns:
- A tuple of (directory, basename, extension).
- """
-
- googlename = self.RepositoryName()
- project, rest = os.path.split(googlename)
- return (project,) + os.path.splitext(rest)
-
- def BaseName(self):
- """File base name - text after the final slash, before the final period."""
- return self.Split()[1]
-
- def Extension(self):
- """File extension - text following the final period."""
- return self.Split()[2]
-
- def NoExtension(self):
- """File has no source file extension."""
- return '/'.join(self.Split()[0:2])
-
- def IsSource(self):
- """File has a source file extension."""
- return self.Extension()[1:] in ('c', 'cc', 'cpp', 'cxx')
-
-
-def _ShouldPrintError(category, confidence, linenum):
- """If confidence >= verbose, category passes filter and is not suppressed."""
-
- # There are three ways we might decide not to print an error message:
- # a "NOLINT(category)" comment appears in the source,
- # the verbosity level isn't high enough, or the filters filter it out.
- if IsErrorSuppressedByNolint(category, linenum):
- return False
- if confidence < _cpplint_state.verbose_level:
- return False
-
- is_filtered = False
- for one_filter in _Filters():
- if one_filter.startswith('-'):
- if category.startswith(one_filter[1:]):
- is_filtered = True
- elif one_filter.startswith('+'):
- if category.startswith(one_filter[1:]):
- is_filtered = False
- else:
- assert False # should have been checked for in SetFilter.
- if is_filtered:
- return False
-
- return True
-
-
-def Error(filename, linenum, category, confidence, message):
- """Logs the fact we've found a lint error.
-
- We log where the error was found, and also our confidence in the error,
- that is, how certain we are this is a legitimate style regression, and
- not a misidentification or a use that's sometimes justified.
-
- False positives can be suppressed by the use of
- "cpplint(category)" comments on the offending line. These are
- parsed into _error_suppressions.
-
- Args:
- filename: The name of the file containing the error.
- linenum: The number of the line containing the error.
- category: A string used to describe the "category" this bug
- falls under: "whitespace", say, or "runtime". Categories
- may have a hierarchy separated by slashes: "whitespace/indent".
- confidence: A number from 1-5 representing a confidence score for
- the error, with 5 meaning that we are certain of the problem,
- and 1 meaning that it could be a legitimate construct.
- message: The error message.
- """
- if _ShouldPrintError(category, confidence, linenum):
- _cpplint_state.IncrementErrorCount(category)
- if _cpplint_state.output_format == 'vs7':
- sys.stderr.write('%s(%s): %s [%s] [%d]\n' % (
- filename, linenum, message, category, confidence))
- elif _cpplint_state.output_format == 'eclipse':
- sys.stderr.write('%s:%s: warning: %s [%s] [%d]\n' % (
- filename, linenum, message, category, confidence))
- else:
- sys.stderr.write('%s:%s: %s [%s] [%d]\n' % (
- filename, linenum, message, category, confidence))
-
-
-# Matches standard C++ escape sequences per 2.13.2.3 of the C++ standard.
-_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
- r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)')
-# Matches strings. Escape codes should already be removed by ESCAPES.
-_RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES = re.compile(r'"[^"]*"')
-# Matches characters. Escape codes should already be removed by ESCAPES.
-_RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES = re.compile(r"'.'")
-# Matches multi-line C++ comments.
-# This RE is a little bit more complicated than one might expect, because we
-# have to take care of space removals tools so we can handle comments inside
-# statements better.
-# The current rule is: We only clear spaces from both sides when we're at the
-# end of the line. Otherwise, we try to remove spaces from the right side,
-# if this doesn't work we try on left side but only if there's a non-character
-# on the right.
-_RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
- r"""(\s*/\*.*\*/\s*$|
- /\*.*\*/\s+|
- \s+/\*.*\*/(?=\W)|
- /\*.*\*/)""", re.VERBOSE)
-
-
-def IsCppString(line):
- """Does line terminate so, that the next symbol is in string constant.
-
- This function does not consider single-line nor multi-line comments.
-
- Args:
- line: is a partial line of code starting from the 0..n.
-
- Returns:
- True, if next character appended to 'line' is inside a
- string constant.
- """
-
- line = line.replace(r'\\', 'XX') # after this, \\" does not match to \"
- return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1
-
-
-def CleanseRawStrings(raw_lines):
- """Removes C++11 raw strings from lines.
-
- Before:
- static const char kData[] = R"(
- multi-line string
- )";
-
- After:
- static const char kData[] = ""
- (replaced by blank line)
- "";
-
- Args:
- raw_lines: list of raw lines.
-
- Returns:
- list of lines with C++11 raw strings replaced by empty strings.
- """
-
- delimiter = None
- lines_without_raw_strings = []
- for line in raw_lines:
- if delimiter:
- # Inside a raw string, look for the end
- end = line.find(delimiter)
- if end >= 0:
- # Found the end of the string, match leading space for this
- # line and resume copying the original lines, and also insert
- # a "" on the last line.
- leading_space = Match(r'^(\s*)\S', line)
- line = leading_space.group(1) + '""' + line[end + len(delimiter):]
- delimiter = None
- else:
- # Haven't found the end yet, append a blank line.
- line = ''
-
- else:
- # Look for beginning of a raw string.
- # See 2.14.15 [lex.string] for syntax.
- matched = Match(r'^(.*)\b(?:R|u8R|uR|UR|LR)"([^\s\\()]*)\((.*)$', line)
- if matched:
- delimiter = ')' + matched.group(2) + '"'
-
- end = matched.group(3).find(delimiter)
- if end >= 0:
- # Raw string ended on same line
- line = (matched.group(1) + '""' +
- matched.group(3)[end + len(delimiter):])
- delimiter = None
- else:
- # Start of a multi-line raw string
- line = matched.group(1) + '""'
-
- lines_without_raw_strings.append(line)
-
- # TODO(unknown): if delimiter is not None here, we might want to
- # emit a warning for unterminated string.
- return lines_without_raw_strings
-
-
-def FindNextMultiLineCommentStart(lines, lineix):
- """Find the beginning marker for a multiline comment."""
- while lineix < len(lines):
- if lines[lineix].strip().startswith('/*'):
- # Only return this marker if the comment goes beyond this line
- if lines[lineix].strip().find('*/', 2) < 0:
- return lineix
- lineix += 1
- return len(lines)
-
-
-def FindNextMultiLineCommentEnd(lines, lineix):
- """We are inside a comment, find the end marker."""
- while lineix < len(lines):
- if lines[lineix].strip().endswith('*/'):
- return lineix
- lineix += 1
- return len(lines)
-
-
-def RemoveMultiLineCommentsFromRange(lines, begin, end):
- """Clears a range of lines for multi-line comments."""
- # Having // dummy comments makes the lines non-empty, so we will not get
- # unnecessary blank line warnings later in the code.
- for i in range(begin, end):
- lines[i] = '// dummy'
-
-
-def RemoveMultiLineComments(filename, lines, error):
- """Removes multiline (c-style) comments from lines."""
- lineix = 0
- while lineix < len(lines):
- lineix_begin = FindNextMultiLineCommentStart(lines, lineix)
- if lineix_begin >= len(lines):
- return
- lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin)
- if lineix_end >= len(lines):
- error(filename, lineix_begin + 1, 'readability/multiline_comment', 5,
- 'Could not find end of multi-line comment')
- return
- RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1)
- lineix = lineix_end + 1
-
-
-def CleanseComments(line):
- """Removes //-comments and single-line C-style /* */ comments.
-
- Args:
- line: A line of C++ source.
-
- Returns:
- The line with single-line comments removed.
- """
- commentpos = line.find('//')
- if commentpos != -1 and not IsCppString(line[:commentpos]):
- line = line[:commentpos].rstrip()
- # get rid of /* ... */
- return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)
-
-
-class CleansedLines(object):
- """Holds 3 copies of all lines with different preprocessing applied to them.
-
- 1) elided member contains lines without strings and comments,
- 2) lines member contains lines without comments, and
- 3) raw_lines member contains all the lines without processing.
- All these three members are of <type 'list'>, and of the same length.
- """
-
- def __init__(self, lines):
- self.elided = []
- self.lines = []
- self.raw_lines = lines
- self.num_lines = len(lines)
- self.lines_without_raw_strings = CleanseRawStrings(lines)
- for linenum in range(len(self.lines_without_raw_strings)):
- self.lines.append(CleanseComments(
- self.lines_without_raw_strings[linenum]))
- elided = self._CollapseStrings(self.lines_without_raw_strings[linenum])
- self.elided.append(CleanseComments(elided))
-
- def NumLines(self):
- """Returns the number of lines represented."""
- return self.num_lines
-
- @staticmethod
- def _CollapseStrings(elided):
- """Collapses strings and chars on a line to simple "" or '' blocks.
-
- We nix strings first so we're not fooled by text like '"http://"'
-
- Args:
- elided: The line being processed.
-
- Returns:
- The line with collapsed strings.
- """
- if not _RE_PATTERN_INCLUDE.match(elided):
- # Remove escaped characters first to make quote/single quote collapsing
- # basic. Things that look like escaped characters shouldn't occur
- # outside of strings and chars.
- elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided)
- elided = _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES.sub("''", elided)
- elided = _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES.sub('""', elided)
- return elided
-
-
-def FindEndOfExpressionInLine(line, startpos, depth, startchar, endchar):
- """Find the position just after the matching endchar.
-
- Args:
- line: a CleansedLines line.
- startpos: start searching at this position.
- depth: nesting level at startpos.
- startchar: expression opening character.
- endchar: expression closing character.
-
- Returns:
- On finding matching endchar: (index just after matching endchar, 0)
- Otherwise: (-1, new depth at end of this line)
- """
- for i in xrange(startpos, len(line)):
- if line[i] == startchar:
- depth += 1
- elif line[i] == endchar:
- depth -= 1
- if depth == 0:
- return (i + 1, 0)
- return (-1, depth)
-
-
-def CloseExpression(clean_lines, linenum, pos):
- """If input points to ( or { or [ or <, finds the position that closes it.
-
- If lines[linenum][pos] points to a '(' or '{' or '[' or '<', finds the
- linenum/pos that correspond to the closing of the expression.
-
- Args:
- clean_lines: A CleansedLines instance containing the file.
- linenum: The number of the line to check.
- pos: A position on the line.
-
- Returns:
- A tuple (line, linenum, pos) pointer *past* the closing brace, or
- (line, len(lines), -1) if we never find a close. Note we ignore
- strings and comments when matching; and the line we return is the
- 'cleansed' line at linenum.
- """
-
- line = clean_lines.elided[linenum]
- startchar = line[pos]
- if startchar not in '({[<':
- return (line, clean_lines.NumLines(), -1)
- if startchar == '(': endchar = ')'
- if startchar == '[': endchar = ']'
- if startchar == '{': endchar = '}'
- if startchar == '<': endchar = '>'
-
- # Check first line
- (end_pos, num_open) = FindEndOfExpressionInLine(
- line, pos, 0, startchar, endchar)
- if end_pos > -1:
- return (line, linenum, end_pos)
-
- # Continue scanning forward
- while linenum < clean_lines.NumLines() - 1:
- linenum += 1
- line = clean_lines.elided[linenum]
- (end_pos, num_open) = FindEndOfExpressionInLine(
- line, 0, num_open, startchar, endchar)
- if end_pos > -1:
- return (line, linenum, end_pos)
-
- # Did not find endchar before end of file, give up
- return (line, clean_lines.NumLines(), -1)
-
-
-def FindStartOfExpressionInLine(line, endpos, depth, startchar, endchar):
- """Find position at the matching startchar.
-
- This is almost the reverse of FindEndOfExpressionInLine, but note
- that the input position and returned position differs by 1.
-
- Args:
- line: a CleansedLines line.
- endpos: start searching at this position.
- depth: nesting level at endpos.
- startchar: expression opening character.
- endchar: expression closing character.
-
- Returns:
- On finding matching startchar: (index at matching startchar, 0)
- Otherwise: (-1, new depth at beginning of this line)
- """
- for i in xrange(endpos, -1, -1):
- if line[i] == endchar:
- depth += 1
- elif line[i] == startchar:
- depth -= 1
- if depth == 0:
- return (i, 0)
- return (-1, depth)
-
-
-def ReverseCloseExpression(clean_lines, linenum, pos):
- """If input points to ) or } or ] or >, finds the position that opens it.
-
- If lines[linenum][pos] points to a ')' or '}' or ']' or '>', finds the
- linenum/pos that correspond to the opening of the expression.
-
- Args:
- clean_lines: A CleansedLines instance containing the file.
- linenum: The number of the line to check.
- pos: A position on the line.
-
- Returns:
- A tuple (line, linenum, pos) pointer *at* the opening brace, or
- (line, 0, -1) if we never find the matching opening brace. Note
- we ignore strings and comments when matching; and the line we
- return is the 'cleansed' line at linenum.
- """
- line = clean_lines.elided[linenum]
- endchar = line[pos]
- if endchar not in ')}]>':
- return (line, 0, -1)
- if endchar == ')': startchar = '('
- if endchar == ']': startchar = '['
- if endchar == '}': startchar = '{'
- if endchar == '>': startchar = '<'
-
- # Check last line
- (start_pos, num_open) = FindStartOfExpressionInLine(
- line, pos, 0, startchar, endchar)
- if start_pos > -1:
- return (line, linenum, start_pos)
-
- # Continue scanning backward
- while linenum > 0:
- linenum -= 1
- line = clean_lines.elided[linenum]
- (start_pos, num_open) = FindStartOfExpressionInLine(
- line, len(line) - 1, num_open, startchar, endchar)
- if start_pos > -1:
- return (line, linenum, start_pos)
-
- # Did not find startchar before beginning of file, give up
- return (line, 0, -1)
-
-
-def CheckForCopyright(filename, lines, error):
- """Logs an error if no Copyright message appears at the top of the file."""
-
- # We'll say it should occur by line 10. Don't forget there's a
- # dummy line at the front.
- for line in xrange(1, min(len(lines), 11)):
- if re.search(r'Copyright', lines[line], re.I): break
- else: # means no copyright line was found
- error(filename, 0, 'legal/copyright', 5,
- 'No copyright message found. '
- 'You should have a line: "Copyright [year] <Copyright Owner>"')
-
-
-def GetHeaderGuardCPPVariable(filename):
- """Returns the CPP variable that should be used as a header guard.
-
- Args:
- filename: The name of a C++ header file.
-
- Returns:
- The CPP variable that should be used as a header guard in the
- named file.
-
- """
-
- # Restores original filename in case that cpplint is invoked from Emacs's
- # flymake.
- filename = re.sub(r'_flymake\.h$', '.h', filename)
- filename = re.sub(r'/\.flymake/([^/]*)$', r'/\1', filename)
-
- fileinfo = FileInfo(filename)
- file_path_from_root = fileinfo.RepositoryName()
- if _root:
- file_path_from_root = re.sub('^' + _root + os.sep, '', file_path_from_root)
- return re.sub(r'[-./\s]', '_', file_path_from_root).upper() + '_'
-
-
-def CheckForHeaderGuard(filename, lines, error):
- """Checks that the file contains a header guard.
-
- Logs an error if no #ifndef header guard is present. For other
- headers, checks that the full pathname is used.
-
- Args:
- filename: The name of the C++ header file.
- lines: An array of strings, each representing a line of the file.
- error: The function to call with any errors found.
- """
-
- cppvar = GetHeaderGuardCPPVariable(filename)
-
- ifndef = None
- ifndef_linenum = 0
- define = None
- endif = None
- endif_linenum = 0
- for linenum, line in enumerate(lines):
- linesplit = line.split()
- if len(linesplit) >= 2:
- # find the first occurrence of #ifndef and #define, save arg
- if not ifndef and linesplit[0] == '#ifndef':
- # set ifndef to the header guard presented on the #ifndef line.
- ifndef = linesplit[1]
- ifndef_linenum = linenum
- if not define and linesplit[0] == '#define':
- define = linesplit[1]
- # find the last occurrence of #endif, save entire line
- if line.startswith('#endif'):
- endif = line
- endif_linenum = linenum
-
- if not ifndef:
- error(filename, 0, 'build/header_guard', 5,
- 'No #ifndef header guard found, suggested CPP variable is: %s' %
- cppvar)
- return
-
- if not define:
- error(filename, 0, 'build/header_guard', 5,
- 'No #define header guard found, suggested CPP variable is: %s' %
- cppvar)
- return
-
- # The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__
- # for backward compatibility.
- if ifndef != cppvar:
- error_level = 0
- if ifndef != cppvar + '_':
- error_level = 5
-
- ParseNolintSuppressions(filename, lines[ifndef_linenum], ifndef_linenum,
- error)
- error(filename, ifndef_linenum, 'build/header_guard', error_level,
- '#ifndef header guard has wrong style, please use: %s' % cppvar)
-
- if define != ifndef:
- error(filename, 0, 'build/header_guard', 5,
- '#ifndef and #define don\'t match, suggested CPP variable is: %s' %
- cppvar)
- return
-
- if endif != ('#endif // %s' % cppvar):
- error_level = 0
- if endif != ('#endif // %s' % (cppvar + '_')):
- error_level = 5
-
- ParseNolintSuppressions(filename, lines[endif_linenum], endif_linenum,
- error)
- error(filename, endif_linenum, 'build/header_guard', error_level,
- '#endif line should be "#endif // %s"' % cppvar)
-
-
-def CheckForBadCharacters(filename, lines, error):
- """Logs an error for each line containing bad characters.
-
- Two kinds of bad characters:
-
- 1. Unicode replacement characters: These indicate that either the file
- contained invalid UTF-8 (likely) or Unicode replacement characters (which
- it shouldn't). Note that it's possible for this to throw off line
- numbering if the invalid UTF-8 occurred adjacent to a newline.
-
- 2. NUL bytes. These are problematic for some tools.
-
- Args:
- filename: The name of the current file.
- lines: An array of strings, each representing a line of the file.
- error: The function to call with any errors found.
- """
- for linenum, line in enumerate(lines):
- if u'\ufffd' in line:
- error(filename, linenum, 'readability/utf8', 5,
- 'Line contains invalid UTF-8 (or Unicode replacement character).')
- if '\0' in line:
- error(filename, linenum, 'readability/nul', 5, 'Line contains NUL byte.')
-
-
-def CheckForNewlineAtEOF(filename, lines, error):
- """Logs an error if there is no newline char at the end of the file.
-
- Args:
- filename: The name of the current file.
- lines: An array of strings, each representing a line of the file.
- error: The function to call with any errors found.
- """
-
- # The array lines() was created by adding two newlines to the
- # original file (go figure), then splitting on \n.
- # To verify that the file ends in \n, we just have to make sure the
- # last-but-two element of lines() exists and is empty.
- if len(lines) < 3 or lines[-2]:
- error(filename, len(lines) - 2, 'whitespace/ending_newline', 5,
- 'Could not find a newline character at the end of the file.')
-
-
-def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error):
- """Logs an error if we see /* ... */ or "..." that extend past one line.
-
- /* ... */ comments are legit inside macros, for one line.
- Otherwise, we prefer // comments, so it's ok to warn about the
- other. Likewise, it's ok for strings to extend across multiple
- lines, as long as a line continuation character (backslash)
- terminates each line. Although not currently prohibited by the C++
- style guide, it's ugly and unnecessary. We don't do well with either
- in this lint program, so we warn about both.
-
- Args:
- filename: The name of the current file.
- clean_lines: A CleansedLines instance containing the file.
- linenum: The number of the line to check.
- error: The function to call with any errors found.
- """
- line = clean_lines.elided[linenum]
-
- # Remove all \\ (escaped backslashes) from the line. They are OK, and the
- # second (escaped) slash may trigger later \" detection erroneously.
- line = line.replace('\\\\', '')
-
- if line.count('/*') > line.count('*/'):
- error(filename, linenum, 'readability/multiline_comment', 5,
- 'Complex multi-line /*...*/-style comment found. '
- 'Lint may give bogus warnings. '
- 'Consider replacing these with //-style comments, '
- 'with #if 0...#endif, '
- 'or with more clearly structured multi-line comments.')
-
- if (line.count('"') - line.count('\\"')) % 2:
- error(filename, linenum, 'readability/multiline_string', 5,
- 'Multi-line string ("...") found. This lint script doesn\'t '
- 'do well with such strings, and may give bogus warnings. '
- 'Use C++11 raw strings or concatenation instead.')
-
-
-threading_list = (
- ('asctime(', 'asctime_r('),
- ('ctime(', 'ctime_r('),
- ('getgrgid(', 'getgrgid_r('),
- ('getgrnam(', 'getgrnam_r('),
- ('getlogin(', 'getlogin_r('),
- ('getpwnam(', 'getpwnam_r('),
- ('getpwuid(', 'getpwuid_r('),
- ('gmtime(', 'gmtime_r('),
- ('localtime(', 'localtime_r('),
- ('rand(', 'rand_r('),
- ('strtok(', 'strtok_r('),
- ('ttyname(', 'ttyname_r('),
- )
-
-
-def CheckPosixThreading(filename, clean_lines, linenum, error):
- """Checks for calls to thread-unsafe functions.
-
- Much code has been originally written without consideration of
- multi-threading. Also, engineers are relying on their old experience;
- they have learned posix before threading extensions were added. These
- tests guide the engineers to use thread-safe functions (when using
- posix directly).
-
- Args:
- filename: The name of the current file.
- clean_lines: A CleansedLines instance containing the file.
- linenum: The number of the line to check.
- error: The function to call with any errors found.
- """
- line = clean_lines.elided[linenum]
- for single_thread_function, multithread_safe_function in threading_list:
- ix = line.find(single_thread_function)
- # Comparisons made explicit for clarity -- pylint: disable=g-explicit-bool-comparison
- if ix >= 0 and (ix == 0 or (not line[ix - 1].isalnum() and
- line[ix - 1] not in ('_', '.', '>'))):
- error(filename, linenum, 'runtime/threadsafe_fn', 2,
- 'Consider using ' + multithread_safe_function +
- '...) instead of ' + single_thread_function +
- '...) for improved thread safety.')
-
-
-def CheckVlogArguments(filename, clean_lines, linenum, error):
- """Checks that VLOG() is only used for defining a logging level.
-
- For example, VLOG(2) is correct. VLOG(INFO), VLOG(WARNING), VLOG(ERROR), and
- VLOG(FATAL) are not.
-
- Args:
- filename: The name of the current file.
- clean_lines: A CleansedLines instance containing the file.
- linenum: The number of the line to check.
- error: The function to call with any errors found.
- """
- line = clean_lines.elided[linenum]
- if Search(r'\bVLOG\((INFO|ERROR|WARNING|DFATAL|FATAL)\)', line):
- error(filename, linenum, 'runtime/vlog', 5,
- 'VLOG() should be used with numeric verbosity level. '
- 'Use LOG() if you want symbolic severity levels.')
-
-
-# Matches invalid increment: *count++, which moves pointer instead of
-# incrementing a value.
-_RE_PATTERN_INVALID_INCREMENT = re.compile(
- r'^\s*\*\w+(\+\+|--);')
-
-
-def CheckInvalidIncrement(filename, clean_lines, linenum, error):
- """Checks for invalid increment *count++.
-
- For example following function:
- void increment_counter(int* count) {
- *count++;
- }
- is invalid, because it effectively does count++, moving pointer, and should
- be replaced with ++*count, (*count)++ or *count += 1.
-
- Args:
- filename: The name of the current file.
- clean_lines: A CleansedLines instance containing the file.
- linenum: The number of the line to check.
- error: The function to call with any errors found.
- """
- line = clean_lines.elided[linenum]
- if _RE_PATTERN_INVALID_INCREMENT.match(line):
- error(filename, linenum, 'runtime/invalid_increment', 5,
- 'Changing pointer instead of value (or unused value of operator*).')
-
-
-class _BlockInfo(object):
- """Stores information about a generic block of code."""
-
- def __init__(self, seen_open_brace):
- self.seen_open_brace = seen_open_brace
- self.open_parentheses = 0
- self.inline_asm = _NO_ASM
-
- def CheckBegin(self, filename, clean_lines, linenum, error):
- """Run checks that applies to text up to the opening brace.
-
- This is mostly for checking the text after the class identifier
- and the "{", usually where the base class is specified. For other
- blocks, there isn't much to check, so we always pass.
-
- Args:
- filename: The name of the current file.
- clean_lines: A CleansedLines instance containing the file.
- linenum: The number of the line to check.
- error: The function to call with any errors found.
- """
- pass
-
- def CheckEnd(self, filename, clean_lines, linenum, error):
- """Run checks that applies to text after the closing brace.
-
- This is mostly used for checking end of namespace comments.
-
- Args:
- filename: The name of the current file.
- clean_lines: A CleansedLines instance containing the file.
- linenum: The number of the line to check.
- error: The function to call with any errors found.
- """
- pass
-
-
-class _ClassInfo(_BlockInfo):
- """Stores information about a class."""
-
- def __init__(self, name, class_or_struct, clean_lines, linenum):
- _BlockInfo.__init__(self, False)
- self.name = name
- self.starting_linenum = linenum
- self.is_derived = False
- if class_or_struct == 'struct':
- self.access = 'public'
- self.is_struct = True
- else:
- self.access = 'private'
- self.is_struct = False
-
- # Remember initial indentation level for this class. Using raw_lines here
- # instead of elided to account for leading comments.
- initial_indent = Match(r'^( *)\S', clean_lines.raw_lines[linenum])
- if initial_indent:
- self.class_indent = len(initial_indent.group(1))
- else:
- self.class_indent = 0
-
- # Try to find the end of the class. This will be confused by things like:
- # class A {
- # } *x = { ...
- #
- # But it's still good enough for CheckSectionSpacing.
- self.last_line = 0
- depth = 0
- for i in range(linenum, clean_lines.NumLines()):
- line = clean_lines.elided[i]
- depth += line.count('{') - line.count('}')
- if not depth:
- self.last_line = i
- break
-
- def CheckBegin(self, filename, clean_lines, linenum, error):
- # Look for a bare ':'
- if Search('(^|[^:]):($|[^:])', clean_lines.elided[linenum]):
- self.is_derived = True
-
- def CheckEnd(self, filename, clean_lines, linenum, error):
- # Check that closing brace is aligned with beginning of the class.
- # Only do this if the closing brace is indented by only whitespaces.
- # This means we will not check single-line class definitions.
- indent = Match(r'^( *)\}', clean_lines.elided[linenum])
- if indent and len(indent.group(1)) != self.class_indent:
- if self.is_struct:
- parent = 'struct ' + self.name
- else:
- parent = 'class ' + self.name
- error(filename, linenum, 'whitespace/indent', 3,
- 'Closing brace should be aligned with beginning of %s' % parent)
-
-
-class _NamespaceInfo(_BlockInfo):
- """Stores information about a namespace."""
-
- def __init__(self, name, linenum):
- _BlockInfo.__init__(self, False)
- self.name = name or ''
- self.starting_linenum = linenum
-
- def CheckEnd(self, filename, clean_lines, linenum, error):
- """Check end of namespace comments."""
- line = clean_lines.raw_lines[linenum]
-
- # Check how many lines is enclosed in this namespace. Don't issue
- # warning for missing namespace comments if there aren't enough
- # lines. However, do apply checks if there is already an end of
- # namespace comment and it's incorrect.
- #
- # TODO(unknown): We always want to check end of namespace comments
- # if a namespace is large, but sometimes we also want to apply the
- # check if a short namespace contained nontrivial things (something
- # other than forward declarations). There is currently no logic on
- # deciding what these nontrivial things are, so this check is
- # triggered by namespace size only, which works most of the time.
- if (linenum - self.starting_linenum < 10
- and not Match(r'};*\s*(//|/\*).*\bnamespace\b', line)):
- return
-
- # Look for matching comment at end of namespace.
- #
- # Note that we accept C style "/* */" comments for terminating
- # namespaces, so that code that terminate namespaces inside
- # preprocessor macros can be cpplint clean.
- #
- # We also accept stuff like "// end of namespace <name>." with the
- # period at the end.
- #
- # Besides these, we don't accept anything else, otherwise we might
- # get false negatives when existing comment is a substring of the
- # expected namespace.
- if self.name:
- # Named namespace
- if not Match((r'};*\s*(//|/\*).*\bnamespace\s+' + re.escape(self.name) +
- r'[\*/\.\\\s]*$'),
- line):
- error(filename, linenum, 'readability/namespace', 5,
- 'Namespace should be terminated with "// namespace %s"' %
- self.name)
- else:
- # Anonymous namespace
- if not Match(r'};*\s*(//|/\*).*\bnamespace[\*/\.\\\s]*$', line):
- error(filename, linenum, 'readability/namespace', 5,
- 'Namespace should be terminated with "// namespace"')
-
-
-class _PreprocessorInfo(object):
- """Stores checkpoints of nesting stacks when #if/#else is seen."""
-
- def __init__(self, stack_before_if):
- # The entire nesting stack before #if
- self.stack_before_if = stack_before_if
-
- # The entire nesting stack up to #else
- self.stack_before_else = []
-
- # Whether we have already seen #else or #elif
- self.seen_else = False
-
-
-class _NestingState(object):
- """Holds states related to parsing braces."""
-
- def __init__(self):
- # Stack for tracking all braces. An object is pushed whenever we
- # see a "{", and popped when we see a "}". Only 3 types of
- # objects are possible:
- # - _ClassInfo: a class or struct.
- # - _NamespaceInfo: a namespace.
- # - _BlockInfo: some other type of block.
- self.stack = []
-
- # Stack of _PreprocessorInfo objects.
- self.pp_stack = []
-
- def SeenOpenBrace(self):
- """Check if we have seen the opening brace for the innermost block.
-
- Returns:
- True if we have seen the opening brace, False if the innermost
- block is still expecting an opening brace.
- """
- return (not self.stack) or self.stack[-1].seen_open_brace
-
- def InNamespaceBody(self):
- """Check if we are currently one level inside a namespace body.
-
- Returns:
- True if top of the stack is a namespace block, False otherwise.
- """
- return self.stack and isinstance(self.stack[-1], _NamespaceInfo)
-
- def UpdatePreprocessor(self, line):
- """Update preprocessor stack.
-
- We need to handle preprocessors due to classes like this:
- #ifdef SWIG
- struct ResultDetailsPageElementExtensionPoint {
- #else
- struct ResultDetailsPageElementExtensionPoint : public Extension {
- #endif
-
- We make the following assumptions (good enough for most files):
- - Preprocessor condition evaluates to true from #if up to first
- #else/#elif/#endif.
-
- - Preprocessor condition evaluates to false from #else/#elif up
- to #endif. We still perform lint checks on these lines, but
- these do not affect nesting stack.
-
- Args:
- line: current line to check.
- """
- if Match(r'^\s*#\s*(if|ifdef|ifndef)\b', line):
- # Beginning of #if block, save the nesting stack here. The saved
- # stack will allow us to restore the parsing state in the #else case.
- self.pp_stack.append(_PreprocessorInfo(copy.deepcopy(self.stack)))
- elif Match(r'^\s*#\s*(else|elif)\b', line):
- # Beginning of #else block
- if self.pp_stack:
- if not self.pp_stack[-1].seen_else:
- # This is the first #else or #elif block. Remember the
- # whole nesting stack up to this point. This is what we
- # keep after the #endif.
- self.pp_stack[-1].seen_else = True
- self.pp_stack[-1].stack_before_else = copy.deepcopy(self.stack)
-
- # Restore the stack to how it was before the #if
- self.stack = copy.deepcopy(self.pp_stack[-1].stack_before_if)
- else:
- # TODO(unknown): unexpected #else, issue warning?
- pass
- elif Match(r'^\s*#\s*endif\b', line):
- # End of #if or #else blocks.
- if self.pp_stack:
- # If we saw an #else, we will need to restore the nesting
- # stack to its former state before the #else, otherwise we
- # will just continue from where we left off.
- if self.pp_stack[-1].seen_else:
- # Here we can just use a shallow copy since we are the last
- # reference to it.
- self.stack = self.pp_stack[-1].stack_before_else
- # Drop the corresponding #if
- self.pp_stack.pop()
- else:
- # TODO(unknown): unexpected #endif, issue warning?
- pass
-
- def Update(self, filename, clean_lines, linenum, error):
- """Update nesting state with current line.
-
- Args:
- filename: The name of the current file.
- clean_lines: A CleansedLines instance containing the file.
- linenum: The number of the line to check.
- error: The function to call with any errors found.
- """
- line = clean_lines.elided[linenum]
-
- # Update pp_stack first
- self.UpdatePreprocessor(line)
-
- # Count parentheses. This is to avoid adding struct arguments to
- # the nesting stack.
- if self.stack:
- inner_block = self.stack[-1]
- depth_change = line.count('(') - line.count(')')
- inner_block.open_parentheses += depth_change
-
- # Also check if we are starting or ending an inline assembly block.
- if inner_block.inline_asm in (_NO_ASM, _END_ASM):
- if (depth_change != 0 and
- inner_block.open_parentheses == 1 and
- _MATCH_ASM.match(line)):
- # Enter assembly block
- inner_block.inline_asm = _INSIDE_ASM
- else:
- # Not entering assembly block. If previous line was _END_ASM,
- # we will now shift to _NO_ASM state.
- inner_block.inline_asm = _NO_ASM
- elif (inner_block.inline_asm == _INSIDE_ASM and
- inner_block.open_parentheses == 0):
- # Exit assembly block
- inner_block.inline_asm = _END_ASM
-
- # Consume namespace declaration at the beginning of the line. Do
- # this in a loop so that we catch same line declarations like this:
- # namespace proto2 { namespace bridge { class MessageSet; } }
- while True:
- # Match start of namespace. The "\b\s*" below catches namespace
- # declarations even if it weren't followed by a whitespace, this
- # is so that we don't confuse our namespace checker. The
- # missing spaces will be flagged by CheckSpacing.
- namespace_decl_match = Match(r'^\s*namespace\b\s*([:\w]+)?(.*)$', line)
- if not namespace_decl_match:
- break
-
- new_namespace = _NamespaceInfo(namespace_decl_match.group(1), linenum)
- self.stack.append(new_namespace)
-
- line = namespace_decl_match.group(2)
- if line.find('{') != -1:
- new_namespace.seen_open_brace = True
- line = line[line.find('{') + 1:]
-
- # Look for a class declaration in whatever is left of the line
- # after parsing namespaces. The regexp accounts for decorated classes
- # such as in:
- # class LOCKABLE API Object {
- # };
- #
- # Templates with class arguments may confuse the parser, for example:
- # template <class T
- # class Comparator = less<T>,
- # class Vector = vector<T> >
- # class HeapQueue {
- #
- # Because this parser has no nesting state about templates, by the
- # time it saw "class Comparator", it may think that it's a new class.
- # Nested templates have a similar problem:
- # template <
- # typename ExportedType,
- # typename TupleType,
- # template <typename, typename> class ImplTemplate>
- #
- # To avoid these cases, we ignore classes that are followed by '=' or '>'
- class_decl_match = Match(
- r'\s*(template\s*<[\w\s<>,:]*>\s*)?'
- r'(class|struct)\s+([A-Z_]+\s+)*(\w+(?:::\w+)*)'
- r'(([^=>]|<[^<>]*>|<[^<>]*<[^<>]*>\s*>)*)$', line)
- if (class_decl_match and
- (not self.stack or self.stack[-1].open_parentheses == 0)):
- self.stack.append(_ClassInfo(
- class_decl_match.group(4), class_decl_match.group(2),
- clean_lines, linenum))
- line = class_decl_match.group(5)
-
- # If we have not yet seen the opening brace for the innermost block,
- # run checks here.
- if not self.SeenOpenBrace():
- self.stack[-1].CheckBegin(filename, clean_lines, linenum, error)
-
- # Update access control if we are inside a class/struct
- if self.stack and isinstance(self.stack[-1], _ClassInfo):
- classinfo = self.stack[-1]
- access_match = Match(
- r'^(.*)\b(public|private|protected|signals)(\s+(?:slots\s*)?)?'
- r':(?:[^:]|$)',
- line)
- if access_match:
- classinfo.access = access_match.group(2)
-
- # Check that access keywords are indented +1 space. Skip this
- # check if the keywords are not preceded by whitespaces.
- indent = access_match.group(1)
- if (len(indent) != classinfo.class_indent + 1 and
- Match(r'^\s*$', indent)):
- if classinfo.is_struct:
- parent = 'struct ' + classinfo.name
- else:
- parent = 'class ' + classinfo.name
- slots = ''
- if access_match.group(3):
- slots = access_match.group(3)
- error(filename, linenum, 'whitespace/indent', 3,
- '%s%s: should be indented +1 space inside %s' % (
- access_match.group(2), slots, parent))
-
- # Consume braces or semicolons from what's left of the line
- while True:
- # Match first brace, semicolon, or closed parenthesis.
- matched = Match(r'^[^{;)}]*([{;)}])(.*)$', line)
- if not matched:
- break
-
- token = matched.group(1)
- if token == '{':
- # If namespace or class hasn't seen a opening brace yet, mark
- # namespace/class head as complete. Push a new block onto the
- # stack otherwise.
- if not self.SeenOpenBrace():
- self.stack[-1].seen_open_brace = True
- else:
- self.stack.append(_BlockInfo(True))
- if _MATCH_ASM.match(line):
- self.stack[-1].inline_asm = _BLOCK_ASM
- elif token == ';' or token == ')':
- # If we haven't seen an opening brace yet, but we already saw
- # a semicolon, this is probably a forward declaration. Pop
- # the stack for these.
- #
- # Similarly, if we haven't seen an opening brace yet, but we
- # already saw a closing parenthesis, then these are probably
- # function arguments with extra "class" or "struct" keywords.
- # Also pop these stack for these.
- if not self.SeenOpenBrace():
- self.stack.pop()
- else: # token == '}'
- # Perform end of block checks and pop the stack.
- if self.stack:
- self.stack[-1].CheckEnd(filename, clean_lines, linenum, error)
- self.stack.pop()
- line = matched.group(2)
-
- def InnermostClass(self):
- """Get class info on the top of the stack.
-
- Returns:
- A _ClassInfo object if we are inside a class, or None otherwise.
- """
- for i in range(len(self.stack), 0, -1):
- classinfo = self.stack[i - 1]
- if isinstance(classinfo, _ClassInfo):
- return classinfo
- return None
-
- def CheckCompletedBlocks(self, filename, error):
- """Checks that all classes and namespaces have been completely parsed.
-
- Call this when all lines in a file have been processed.
- Args:
- filename: The name of the current file.
- error: The function to call with any errors found.
- """
- # Note: This test can result in false positives if #ifdef constructs
- # get in the way of brace matching. See the testBuildClass test in
- # cpplint_unittest.py for an example of this.
- for obj in self.stack:
- if isinstance(obj, _ClassInfo):
- error(filename, obj.starting_linenum, 'build/class', 5,
- 'Failed to find complete declaration of class %s' %
- obj.name)
- elif isinstance(obj, _NamespaceInfo):
- error(filename, obj.starting_linenum, 'build/namespaces', 5,
- 'Failed to find complete declaration of namespace %s' %
- obj.name)
-
-
-def CheckForNonStandardConstructs(filename, clean_lines, linenum,
- nesting_state, error):
- r"""Logs an error if we see certain non-ANSI constructs ignored by gcc-2.
-
- Complain about several constructs which gcc-2 accepts, but which are
- not standard C++. Warning about these in lint is one way to ease the
- transition to new compilers.
- - put storage class first (e.g. "static const" instead of "const static").
- - "%lld" instead of %qd" in printf-type functions.
- - "%1$d" is non-standard in printf-type functions.
- - "\%" is an undefined character escape sequence.
- - text after #endif is not allowed.
- - invalid inner-style forward declaration.
- - >? and <? operators, and their >?= and <?= cousins.
-
- Additionally, check for constructor/destructor style violations and reference
- members, as it is very convenient to do so while checking for
- gcc-2 compliance.
-
- Args:
- filename: The name of the current file.
- clean_lines: A CleansedLines instance containing the file.
- linenum: The number of the line to check.
- nesting_state: A _NestingState instance which maintains information about
- the current stack of nested blocks being parsed.
- error: A callable to which errors are reported, which takes 4 arguments:
- filename, line number, error level, and message
- """
-
- # Remove comments from the line, but leave in strings for now.
- line = clean_lines.lines[linenum]
-
- if Search(r'printf\s*\(.*".*%[-+ ]?\d*q', line):
- error(filename, linenum, 'runtime/printf_format', 3,
- '%q in format strings is deprecated. Use %ll instead.')
-
- if Search(r'printf\s*\(.*".*%\d+\$', line):
- error(filename, linenum, 'runtime/printf_format', 2,
- '%N$ formats are unconventional. Try rewriting to avoid them.')
-
- # Remove escaped backslashes before looking for undefined escapes.
- line = line.replace('\\\\', '')
-
- if Search(r'("|\').*\\(%|\[|\(|{)', line):
- error(filename, linenum, 'build/printf_format', 3,
- '%, [, (, and { are undefined character escapes. Unescape them.')
-
- # For the rest, work with both comments and strings removed.
- line = clean_lines.elided[linenum]
-
- if Search(r'\b(const|volatile|void|char|short|int|long'
- r'|float|double|signed|unsigned'
- r'|schar|u?int8|u?int16|u?int32|u?int64)'
- r'\s+(register|static|extern|typedef)\b',
- line):
- error(filename, linenum, 'build/storage_class', 5,
- 'Storage class (static, extern, typedef, etc) should be first.')
-
- if Match(r'\s*#\s*endif\s*[^/\s]+', line):
- error(filename, linenum, 'build/endif_comment', 5,
- 'Uncommented text after #endif is non-standard. Use a comment.')
-
- if Match(r'\s*class\s+(\w+\s*::\s*)+\w+\s*;', line):
- error(filename, linenum, 'build/forward_decl', 5,
- 'Inner-style forward declarations are invalid. Remove this line.')
-
- if Search(r'(\w+|[+-]?\d+(\.\d*)?)\s*(<|>)\?=?\s*(\w+|[+-]?\d+)(\.\d*)?',
- line):
- error(filename, linenum, 'build/deprecated', 3,
- '>? and <? (max and min) operators are non-standard and deprecated.')
-
- if Search(r'^\s*const\s*string\s*&\s*\w+\s*;', line):
- # TODO(unknown): Could it be expanded safely to arbitrary references,
- # without triggering too many false positives? The first
- # attempt triggered 5 warnings for mostly benign code in the regtest, hence
- # the restriction.
- # Here's the original regexp, for the reference:
- # type_name = r'\w+((\s*::\s*\w+)|(\s*<\s*\w+?\s*>))?'
- # r'\s*const\s*' + type_name + '\s*&\s*\w+\s*;'
- error(filename, linenum, 'runtime/member_string_references', 2,
- 'const string& members are dangerous. It is much better to use '
- 'alternatives, such as pointers or simple constants.')
-
- # Everything else in this function operates on class declarations.
- # Return early if the top of the nesting stack is not a class, or if
- # the class head is not completed yet.
- classinfo = nesting_state.InnermostClass()
- if not classinfo or not classinfo.seen_open_brace:
- return
-
- # The class may have been declared with namespace or classname qualifiers.
- # The constructor and destructor will not have those qualifiers.
- base_classname = classinfo.name.split('::')[-1]
-
- # Look for single-argument constructors that aren't marked explicit.
- # Technically a valid construct, but against style.
- args = Match(r'\s+(?:inline\s+)?%s\s*\(([^,()]+)\)'
- % re.escape(base_classname),
- line)
- if (args and
- args.group(1) != 'void' and
- not Match(r'(const\s+)?%s(\s+const)?\s*(?:<\w+>\s*)?&'
- % re.escape(base_classname), args.group(1).strip())):
- error(filename, linenum, 'runtime/explicit', 5,
- 'Single-argument constructors should be marked explicit.')
-
-
-def CheckSpacingForFunctionCall(filename, line, linenum, error):
- """Checks for the correctness of various spacing around function calls.
-
- Args:
- filename: The name of the current file.
- line: The text of the line to check.
- linenum: The number of the line to check.
- error: The function to call with any errors found.
- """
-
- # Since function calls often occur inside if/for/while/switch
- # expressions - which have their own, more liberal conventions - we
- # first see if we should be looking inside such an expression for a
- # function call, to which we can apply more strict standards.
- fncall = line # if there's no control flow construct, look at whole line
- for pattern in (r'\bif\s*\((.*)\)\s*{',
- r'\bfor\s*\((.*)\)\s*{',
- r'\bwhile\s*\((.*)\)\s*[{;]',
- r'\bswitch\s*\((.*)\)\s*{'):
- match = Search(pattern, line)
- if match:
- fncall = match.group(1) # look inside the parens for function calls
- break
-
- # Except in if/for/while/switch, there should never be space
- # immediately inside parens (eg "f( 3, 4 )"). We make an exception
- # for nested parens ( (a+b) + c ). Likewise, there should never be
- # a space before a ( when it's a function argument. I assume it's a
- # function argument when the char before the whitespace is legal in
- # a function name (alnum + _) and we're not starting a macro. Also ignore
- # pointers and references to arrays and functions coz they're too tricky:
- # we use a very simple way to recognize these:
- # " (something)(maybe-something)" or
- # " (something)(maybe-something," or
- # " (something)[something]"
- # Note that we assume the contents of [] to be short enough that
- # they'll never need to wrap.
- if ( # Ignore control structures.
- not Search(r'\b(if|for|while|switch|return|new|delete|catch|sizeof)\b',
- fncall) and
- # Ignore pointers/references to functions.
- not Search(r' \([^)]+\)\([^)]*(\)|,$)', fncall) and
- # Ignore pointers/references to arrays.
- not Search(r' \([^)]+\)\[[^\]]+\]', fncall)):
- if Search(r'\w\s*\(\s(?!\s*\\$)', fncall): # a ( used for a fn call
- error(filename, linenum, 'whitespace/parens', 4,
- 'Extra space after ( in function call')
- elif Search(r'\(\s+(?!(\s*\\)|\()', fncall):
- error(filename, linenum, 'whitespace/parens', 2,
- 'Extra space after (')
- if (Search(r'\w\s+\(', fncall) and
- not Search(r'#\s*define|typedef', fncall) and
- not Search(r'\w\s+\((\w+::)*\*\w+\)\(', fncall)):
- error(filename, linenum, 'whitespace/parens', 4,
- 'Extra space before ( in function call')
- # If the ) is followed only by a newline or a { + newline, assume it's
- # part of a control statement (if/while/etc), and don't complain
- if Search(r'[^)]\s+\)\s*[^{\s]', fncall):
- # If the closing parenthesis is preceded by only whitespaces,
- # try to give a more descriptive error message.
- if Search(r'^\s+\)', fncall):
- error(filename, linenum, 'whitespace/parens', 2,
- 'Closing ) should be moved to the previous line')
- else:
- error(filename, linenum, 'whitespace/parens', 2,
- 'Extra space before )')
-
-
-def IsBlankLine(line):
- """Returns true if the given line is blank.
-
- We consider a line to be blank if the line is empty or consists of
- only white spaces.
-
- Args:
- line: A line of a string.
-
- Returns:
- True, if the given line is blank.
- """
- return not line or line.isspace()
-
-
-def CheckForFunctionLengths(filename, clean_lines, linenum,
- function_state, error):
- """Reports for long function bodies.
-
- For an overview why this is done, see:
- http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions
-
- Uses a simplistic algorithm assuming other style guidelines
- (especially spacing) are followed.
- Only checks unindented functions, so class members are unchecked.
- Trivial bodies are unchecked, so constructors with huge initializer lists
- may be missed.
- Blank/comment lines are not counted so as to avoid encouraging the removal
- of vertical space and comments just to get through a lint check.
- NOLINT *on the last line of a function* disables this check.
-
- Args:
- filename: The name of the current file.
- clean_lines: A CleansedLines instance containing the file.
- linenum: The number of the line to check.
- function_state: Current function name and lines in body so far.
- error: The function to call with any errors found.
- """
- lines = clean_lines.lines
- line = lines[linenum]
- raw = clean_lines.raw_lines
- raw_line = raw[linenum]
- joined_line = ''
-
- starting_func = False
- regexp = r'(\w(\w|::|\*|\&|\s)*)\(' # decls * & space::name( ...
- match_result = Match(regexp, line)
- if match_result:
- # If the name is all caps and underscores, figure it's a macro and
- # ignore it, unless it's TEST or TEST_F.
- function_name = match_result.group(1).split()[-1]
- if function_name == 'TEST' or function_name == 'TEST_F' or (
- not Match(r'[A-Z_]+$', function_name)):
- starting_func = True
-
- if starting_func:
- body_found = False
- for start_linenum in xrange(linenum, clean_lines.NumLines()):
- start_line = lines[start_linenum]
- joined_line += ' ' + start_line.lstrip()
- if Search(r'(;|})', start_line): # Declarations and trivial functions
- body_found = True
- break # ... ignore
- elif Search(r'{', start_line):
- body_found = True
- function = Search(r'((\w|:)*)\(', line).group(1)
- if Match(r'TEST', function): # Handle TEST... macros
- parameter_regexp = Search(r'(\(.*\))', joined_line)
- if parameter_regexp: # Ignore bad syntax
- function += parameter_regexp.group(1)
- else:
- function += '()'
- function_state.Begin(function)
- break
- if not body_found:
- # No body for the function (or evidence of a non-function) was found.
- error(filename, linenum, 'readability/fn_size', 5,
- 'Lint failed to find start of function body.')
- elif Match(r'^\}\s*$', line): # function end
- function_state.Check(error, filename, linenum)
- function_state.End()
- elif not Match(r'^\s*$', line):
- function_state.Count() # Count non-blank/non-comment lines.
-
-
-_RE_PATTERN_TODO = re.compile(r'^//(\s*)TODO(\(.+?\))?:?(\s|$)?')
-
-
-def CheckComment(comment, filename, linenum, error):
- """Checks for common mistakes in TODO comments.
-
- Args:
- comment: The text of the comment from the line in question.
- filename: The name of the current file.
- linenum: The number of the line to check.
- error: The function to call with any errors found.
- """
- match = _RE_PATTERN_TODO.match(comment)
- if match:
- # One whitespace is correct; zero whitespace is handled elsewhere.
- leading_whitespace = match.group(1)
- if len(leading_whitespace) > 1:
- error(filename, linenum, 'whitespace/todo', 2,
- 'Too many spaces before TODO')
-
- username = match.group(2)
- if not username:
- error(filename, linenum, 'readability/todo', 2,
- 'Missing username in TODO; it should look like '
- '"// TODO(my_username): Stuff."')
-
- middle_whitespace = match.group(3)
- # Comparisons made explicit for correctness -- pylint: disable=g-explicit-bool-comparison
- if middle_whitespace != ' ' and middle_whitespace != '':
- error(filename, linenum, 'whitespace/todo', 2,
- 'TODO(my_username) should be followed by a space')
-
-def CheckAccess(filename, clean_lines, linenum, nesting_state, error):
- """Checks for improper use of DISALLOW* macros.
-
- Args:
- filename: The name of the current file.
- clean_lines: A CleansedLines instance containing the file.
- linenum: The number of the line to check.
- nesting_state: A _NestingState instance which maintains information about
- the current stack of nested blocks being parsed.
- error: The function to call with any errors found.
- """
- line = clean_lines.elided[linenum] # get rid of comments and strings
-
- matched = Match((r'\s*(DISALLOW_COPY_AND_ASSIGN|'
- r'DISALLOW_EVIL_CONSTRUCTORS|'
- r'DISALLOW_IMPLICIT_CONSTRUCTORS)'), line)
- if not matched:
- return
- if nesting_state.stack and isinstance(nesting_state.stack[-1], _ClassInfo):
- if nesting_state.stack[-1].access != 'private':
- error(filename, linenum, 'readability/constructors', 3,
- '%s must be in the private: section' % matched.group(1))
-
- else:
- # Found DISALLOW* macro outside a class declaration, or perhaps it
- # was used inside a function when it should have been part of the
- # class declaration. We could issue a warning here, but it
- # probably resulted in a compiler error already.
- pass
-
-
-def FindNextMatchingAngleBracket(clean_lines, linenum, init_suffix):
- """Find the corresponding > to close a template.
-
- Args:
- clean_lines: A CleansedLines instance containing the file.
- linenum: Current line number.
- init_suffix: Remainder of the current line after the initial <.
-
- Returns:
- True if a matching bracket exists.
- """
- line = init_suffix
- nesting_stack = ['<']
- while True:
- # Find the next operator that can tell us whether < is used as an
- # opening bracket or as a less-than operator. We only want to
- # warn on the latter case.
- #
- # We could also check all other operators and terminate the search
- # early, e.g. if we got something like this "a<b+c", the "<" is
- # most likely a less-than operator, but then we will get false
- # positives for default arguments and other template expressions.
- match = Search(r'^[^<>(),;\[\]]*([<>(),;\[\]])(.*)$', line)
- if match:
- # Found an operator, update nesting stack
- operator = match.group(1)
- line = match.group(2)
-
- if nesting_stack[-1] == '<':
- # Expecting closing angle bracket
- if operator in ('<', '(', '['):
- nesting_stack.append(operator)
- elif operator == '>':
- nesting_stack.pop()
- if not nesting_stack:
- # Found matching angle bracket
- return True
- elif operator == ',':
- # Got a comma after a bracket, this is most likely a template
- # argument. We have not seen a closing angle bracket yet, but
- # it's probably a few lines later if we look for it, so just
- # return early here.
- return True
- else:
- # Got some other operator.
- return False
-
- else:
- # Expecting closing parenthesis or closing bracket
- if operator in ('<', '(', '['):
- nesting_stack.append(operator)
- elif operator in (')', ']'):
- # We don't bother checking for matching () or []. If we got
- # something like (] or [), it would have been a syntax error.
- nesting_stack.pop()
-
- else:
- # Scan the next line
- linenum += 1
- if linenum >= len(clean_lines.elided):
- break
- line = clean_lines.elided[linenum]
-
- # Exhausted all remaining lines and still no matching angle bracket.
- # Most likely the input was incomplete, otherwise we should have
- # seen a semicolon and returned early.
- return True
-
-
-def FindPreviousMatchingAngleBracket(clean_lines, linenum, init_prefix):
- """Find the corresponding < that started a template.
-
- Args:
- clean_lines: A CleansedLines instance containing the file.
- linenum: Current line number.
- init_prefix: Part of the current line before the initial >.
-
- Returns:
- True if a matching bracket exists.
- """
- line = init_prefix
- nesting_stack = ['>']
- while True:
- # Find the previous operator
- match = Search(r'^(.*)([<>(),;\[\]])[^<>(),;\[\]]*$', line)
- if match:
- # Found an operator, update nesting stack
- operator = match.group(2)
- line = match.group(1)
-
- if nesting_stack[-1] == '>':
- # Expecting opening angle bracket
- if operator in ('>', ')', ']'):
- nesting_stack.append(operator)
- elif operator == '<':
- nesting_stack.pop()
- if not nesting_stack:
- # Found matching angle bracket
- return True
- elif operator == ',':
- # Got a comma before a bracket, this is most likely a
- # template argument. The opening angle bracket is probably
- # there if we look for it, so just return early here.
- return True
- else:
- # Got some other operator.
- return False
-
- else:
- # Expecting opening parenthesis or opening bracket
- if operator in ('>', ')', ']'):
- nesting_stack.append(operator)
- elif operator in ('(', '['):
- nesting_stack.pop()
-
- else:
- # Scan the previous line
- linenum -= 1
- if linenum < 0:
- break
- line = clean_lines.elided[linenum]
-
- # Exhausted all earlier lines and still no matching angle bracket.
- return False
-
-
-def CheckSpacing(filename, clean_lines, linenum, nesting_state, error):
- """Checks for the correctness of various spacing issues in the code.
-
- Things we check for: spaces around operators, spaces after
- if/for/while/switch, no spaces around parens in function calls, two
- spaces between code and comment, don't start a block with a blank
- line, don't end a function with a blank line, don't add a blank line
- after public/protected/private, don't have too many blank lines in a row.
-
- Args:
- filename: The name of the current file.
- clean_lines: A CleansedLines instance containing the file.
- linenum: The number of the line to check.
- nesting_state: A _NestingState instance which maintains information about
- the current stack of nested blocks being parsed.
- error: The function to call with any errors found.
- """
-
- # Don't use "elided" lines here, otherwise we can't check commented lines.
- # Don't want to use "raw" either, because we don't want to check inside C++11
- # raw strings,
- raw = clean_lines.lines_without_raw_strings
- line = raw[linenum]
-
- # Before nixing comments, check if the line is blank for no good
- # reason. This includes the first line after a block is opened, and
- # blank lines at the end of a function (ie, right before a line like '}'
- #
- # Skip all the blank line checks if we are immediately inside a
- # namespace body. In other words, don't issue blank line warnings
- # for this block:
- # namespace {
- #
- # }
- #
- # A warning about missing end of namespace comments will be issued instead.
- if IsBlankLine(line) and not nesting_state.InNamespaceBody():
- elided = clean_lines.elided
- prev_line = elided[linenum - 1]
- prevbrace = prev_line.rfind('{')
- # TODO(unknown): Don't complain if line before blank line, and line after,
- # both start with alnums and are indented the same amount.
- # This ignores whitespace at the start of a namespace block
- # because those are not usually indented.
- if prevbrace != -1 and prev_line[prevbrace:].find('}') == -1:
- # OK, we have a blank line at the start of a code block. Before we
- # complain, we check if it is an exception to the rule: The previous
- # non-empty line has the parameters of a function header that are indented
- # 4 spaces (because they did not fit in a 80 column line when placed on
- # the same line as the function name). We also check for the case where
- # the previous line is indented 6 spaces, which may happen when the
- # initializers of a constructor do not fit into a 80 column line.
- exception = False
- if Match(r' {6}\w', prev_line): # Initializer list?
- # We are looking for the opening column of initializer list, which
- # should be indented 4 spaces to cause 6 space indentation afterwards.
- search_position = linenum-2
- while (search_position >= 0
- and Match(r' {6}\w', elided[search_position])):
- search_position -= 1
- exception = (search_position >= 0
- and elided[search_position][:5] == ' :')
- else:
- # Search for the function arguments or an initializer list. We use a
- # simple heuristic here: If the line is indented 4 spaces; and we have a
- # closing paren, without the opening paren, followed by an opening brace
- # or colon (for initializer lists) we assume that it is the last line of
- # a function header. If we have a colon indented 4 spaces, it is an
- # initializer list.
- exception = (Match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)',
- prev_line)
- or Match(r' {4}:', prev_line))
-
- if not exception:
- error(filename, linenum, 'whitespace/blank_line', 2,
- 'Redundant blank line at the start of a code block '
- 'should be deleted.')
- # Ignore blank lines at the end of a block in a long if-else
- # chain, like this:
- # if (condition1) {
- # // Something followed by a blank line
- #
- # } else if (condition2) {
- # // Something else
- # }
- if linenum + 1 < clean_lines.NumLines():
- next_line = raw[linenum + 1]
- if (next_line
- and Match(r'\s*}', next_line)
- and next_line.find('} else ') == -1):
- error(filename, linenum, 'whitespace/blank_line', 3,
- 'Redundant blank line at the end of a code block '
- 'should be deleted.')
-
- matched = Match(r'\s*(public|protected|private):', prev_line)
- if matched:
- error(filename, linenum, 'whitespace/blank_line', 3,
- 'Do not leave a blank line after "%s:"' % matched.group(1))
-
- # Next, we complain if there's a comment too near the text
- commentpos = line.find('//')
- if commentpos != -1:
- # Check if the // may be in quotes. If so, ignore it
- # Comparisons made explicit for clarity -- pylint: disable=g-explicit-bool-comparison
- if (line.count('"', 0, commentpos) -
- line.count('\\"', 0, commentpos)) % 2 == 0: # not in quotes
- # Allow one space for new scopes, two spaces otherwise:
- if (not Match(r'^\s*{ //', line) and
- ((commentpos >= 1 and
- line[commentpos-1] not in string.whitespace) or
- (commentpos >= 2 and
- line[commentpos-2] not in string.whitespace))):
- error(filename, linenum, 'whitespace/comments', 2,
- 'At least two spaces is best between code and comments')
- # There should always be a space between the // and the comment
- commentend = commentpos + 2
- if commentend < len(line) and not line[commentend] == ' ':
- # but some lines are exceptions -- e.g. if they're big
- # comment delimiters like:
- # //----------------------------------------------------------
- # or are an empty C++ style Doxygen comment, like:
- # ///
- # or C++ style Doxygen comments placed after the variable:
- # ///< Header comment
- # //!< Header comment
- # or they begin with multiple slashes followed by a space:
- # //////// Header comment
- match = (Search(r'[=/-]{4,}\s*$', line[commentend:]) or
- Search(r'^/$', line[commentend:]) or
- Search(r'^!< ', line[commentend:]) or
- Search(r'^/< ', line[commentend:]) or
- Search(r'^/+ ', line[commentend:]))
- if not match:
- error(filename, linenum, 'whitespace/comments', 4,
- 'Should have a space between // and comment')
- CheckComment(line[commentpos:], filename, linenum, error)
-
- line = clean_lines.elided[linenum] # get rid of comments and strings
-
- # Don't try to do spacing checks for operator methods
- line = re.sub(r'operator(==|!=|<|<<|<=|>=|>>|>)\(', 'operator\(', line)
-
- # We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )".
- # Otherwise not. Note we only check for non-spaces on *both* sides;
- # sometimes people put non-spaces on one side when aligning ='s among
- # many lines (not that this is behavior that I approve of...)
- if Search(r'[\w.]=[\w.]', line) and not Search(r'\b(if|while) ', line):
- error(filename, linenum, 'whitespace/operators', 4,
- 'Missing spaces around =')
-
- # It's ok not to have spaces around binary operators like + - * /, but if
- # there's too little whitespace, we get concerned. It's hard to tell,
- # though, so we punt on this one for now. TODO.
-
- # You should always have whitespace around binary operators.
- #
- # Check <= and >= first to avoid false positives with < and >, then
- # check non-include lines for spacing around < and >.
- match = Search(r'[^<>=!\s](==|!=|<=|>=)[^<>=!\s]', line)
- if match:
- error(filename, linenum, 'whitespace/operators', 3,
- 'Missing spaces around %s' % match.group(1))
- # We allow no-spaces around << when used like this: 10<<20, but
- # not otherwise (particularly, not when used as streams)
- # Also ignore using ns::operator<<;
- match = Search(r'(operator|\S)(?:L|UL|ULL|l|ul|ull)?<<(\S)', line)
- if (match and
- not (match.group(1).isdigit() and match.group(2).isdigit()) and
- not (match.group(1) == 'operator' and match.group(2) == ';')):
- error(filename, linenum, 'whitespace/operators', 3,
- 'Missing spaces around <<')
- elif not Match(r'#.*include', line):
- # Avoid false positives on ->
- reduced_line = line.replace('->', '')
-
- # Look for < that is not surrounded by spaces. This is only
- # triggered if both sides are missing spaces, even though
- # technically should should flag if at least one side is missing a
- # space. This is done to avoid some false positives with shifts.
- match = Search(r'[^\s<]<([^\s=<].*)', reduced_line)
- if (match and
- not FindNextMatchingAngleBracket(clean_lines, linenum, match.group(1))):
- error(filename, linenum, 'whitespace/operators', 3,
- 'Missing spaces around <')
-
- # Look for > that is not surrounded by spaces. Similar to the
- # above, we only trigger if both sides are missing spaces to avoid
- # false positives with shifts.
- match = Search(r'^(.*[^\s>])>[^\s=>]', reduced_line)
- if (match and
- not FindPreviousMatchingAngleBracket(clean_lines, linenum,
- match.group(1))):
- error(filename, linenum, 'whitespace/operators', 3,
- 'Missing spaces around >')
-
- # We allow no-spaces around >> for almost anything. This is because
- # C++11 allows ">>" to close nested templates, which accounts for
- # most cases when ">>" is not followed by a space.
- #
- # We still warn on ">>" followed by alpha character, because that is
- # likely due to ">>" being used for right shifts, e.g.:
- # value >> alpha
- #
- # When ">>" is used to close templates, the alphanumeric letter that
- # follows would be part of an identifier, and there should still be
- # a space separating the template type and the identifier.
- # type<type<type>> alpha
- match = Search(r'>>[a-zA-Z_]', line)
- if match:
- error(filename, linenum, 'whitespace/operators', 3,
- 'Missing spaces around >>')
-
- # There shouldn't be space around unary operators
- match = Search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line)
- if match:
- error(filename, linenum, 'whitespace/operators', 4,
- 'Extra space for operator %s' % match.group(1))
-
- # A pet peeve of mine: no spaces after an if, while, switch, or for
- match = Search(r' (if\(|for\(|while\(|switch\()', line)
- if match:
- error(filename, linenum, 'whitespace/parens', 5,
- 'Missing space before ( in %s' % match.group(1))
-
- # For if/for/while/switch, the left and right parens should be
- # consistent about how many spaces are inside the parens, and
- # there should either be zero or one spaces inside the parens.
- # We don't want: "if ( foo)" or "if ( foo )".
- # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed.
- match = Search(r'\b(if|for|while|switch)\s*'
- r'\(([ ]*)(.).*[^ ]+([ ]*)\)\s*{\s*$',
- line)
- if match:
- if len(match.group(2)) != len(match.group(4)):
- if not (match.group(3) == ';' and
- len(match.group(2)) == 1 + len(match.group(4)) or
- not match.group(2) and Search(r'\bfor\s*\(.*; \)', line)):
- error(filename, linenum, 'whitespace/parens', 5,
- 'Mismatching spaces inside () in %s' % match.group(1))
- if len(match.group(2)) not in [0, 1]:
- error(filename, linenum, 'whitespace/parens', 5,
- 'Should have zero or one spaces inside ( and ) in %s' %
- match.group(1))
-
- # You should always have a space after a comma (either as fn arg or operator)
- #
- # This does not apply when the non-space character following the
- # comma is another comma, since the only time when that happens is
- # for empty macro arguments.
- #
- # We run this check in two passes: first pass on elided lines to
- # verify that lines contain missing whitespaces, second pass on raw
- # lines to confirm that those missing whitespaces are not due to
- # elided comments.
- if Search(r',[^,\s]', line) and Search(r',[^,\s]', raw[linenum]):
- error(filename, linenum, 'whitespace/comma', 3,
- 'Missing space after ,')
-
- # You should always have a space after a semicolon
- # except for few corner cases
- # TODO(unknown): clarify if 'if (1) { return 1;}' is requires one more
- # space after ;
- if Search(r';[^\s};\\)/]', line):
- error(filename, linenum, 'whitespace/semicolon', 3,
- 'Missing space after ;')
-
- # Next we will look for issues with function calls.
- CheckSpacingForFunctionCall(filename, line, linenum, error)
-
- # Except after an opening paren, or after another opening brace (in case of
- # an initializer list, for instance), you should have spaces before your
- # braces. And since you should never have braces at the beginning of a line,
- # this is an easy test.
- match = Match(r'^(.*[^ ({]){', line)
- if match:
- # Try a bit harder to check for brace initialization. This
- # happens in one of the following forms:
- # Constructor() : initializer_list_{} { ... }
- # Constructor{}.MemberFunction()
- # Type variable{};
- # FunctionCall(type{}, ...);
- # LastArgument(..., type{});
- # LOG(INFO) << type{} << " ...";
- # map_of_type[{...}] = ...;
- #
- # We check for the character following the closing brace, and
- # silence the warning if it's one of those listed above, i.e.
- # "{.;,)<]".
- #
- # To account for nested initializer list, we allow any number of
- # closing braces up to "{;,)<". We can't simply silence the
- # warning on first sight of closing brace, because that would
- # cause false negatives for things that are not initializer lists.
- # Silence this: But not this:
- # Outer{ if (...) {
- # Inner{...} if (...){ // Missing space before {
- # }; }
- #
- # There is a false negative with this approach if people inserted
- # spurious semicolons, e.g. "if (cond){};", but we will catch the
- # spurious semicolon with a separate check.
- (endline, endlinenum, endpos) = CloseExpression(
- clean_lines, linenum, len(match.group(1)))
- trailing_text = ''
- if endpos > -1:
- trailing_text = endline[endpos:]
- for offset in xrange(endlinenum + 1,
- min(endlinenum + 3, clean_lines.NumLines() - 1)):
- trailing_text += clean_lines.elided[offset]
- if not Match(r'^[\s}]*[{.;,)<\]]', trailing_text):
- error(filename, linenum, 'whitespace/braces', 5,
- 'Missing space before {')
-
- # Make sure '} else {' has spaces.
- if Search(r'}else', line):
- error(filename, linenum, 'whitespace/braces', 5,
- 'Missing space before else')
-
- # You shouldn't have spaces before your brackets, except maybe after
- # 'delete []' or 'new char * []'.
- if Search(r'\w\s+\[', line) and not Search(r'delete\s+\[', line):
- error(filename, linenum, 'whitespace/braces', 5,
- 'Extra space before [')
-
- # You shouldn't have a space before a semicolon at the end of the line.
- # There's a special case for "for" since the style guide allows space before
- # the semicolon there.
- if Search(r':\s*;\s*$', line):
- error(filename, linenum, 'whitespace/semicolon', 5,
- 'Semicolon defining empty statement. Use {} instead.')
- elif Search(r'^\s*;\s*$', line):
- error(filename, linenum, 'whitespace/semicolon', 5,
- 'Line contains only semicolon. If this should be an empty statement, '
- 'use {} instead.')
- elif (Search(r'\s+;\s*$', line) and
- not Search(r'\bfor\b', line)):
- error(filename, linenum, 'whitespace/semicolon', 5,
- 'Extra space before last semicolon. If this should be an empty '
- 'statement, use {} instead.')
-
- # In range-based for, we wanted spaces before and after the colon, but
- # not around "::" tokens that might appear.
- if (Search('for *\(.*[^:]:[^: ]', line) or
- Search('for *\(.*[^: ]:[^:]', line)):
- error(filename, linenum, 'whitespace/forcolon', 2,
- 'Missing space around colon in range-based for loop')
-
-
-def CheckSectionSpacing(filename, clean_lines, class_info, linenum, error):
- """Checks for additional blank line issues related to sections.
-
- Currently the only thing checked here is blank line before protected/private.
-
- Args:
- filename: The name of the current file.
- clean_lines: A CleansedLines instance containing the file.
- class_info: A _ClassInfo objects.
- linenum: The number of the line to check.
- error: The function to call with any errors found.
- """
- # Skip checks if the class is small, where small means 25 lines or less.
- # 25 lines seems like a good cutoff since that's the usual height of
- # terminals, and any class that can't fit in one screen can't really
- # be considered "small".
- #
- # Also skip checks if we are on the first line. This accounts for
- # classes that look like
- # class Foo { public: ... };
- #
- # If we didn't find the end of the class, last_line would be zero,
- # and the check will be skipped by the first condition.
- if (class_info.last_line - class_info.starting_linenum <= 24 or
- linenum <= class_info.starting_linenum):
- return
-
- matched = Match(r'\s*(public|protected|private):', clean_lines.lines[linenum])
- if matched:
- # Issue warning if the line before public/protected/private was
- # not a blank line, but don't do this if the previous line contains
- # "class" or "struct". This can happen two ways:
- # - We are at the beginning of the class.
- # - We are forward-declaring an inner class that is semantically
- # private, but needed to be public for implementation reasons.
- # Also ignores cases where the previous line ends with a backslash as can be
- # common when defining classes in C macros.
- prev_line = clean_lines.lines[linenum - 1]
- if (not IsBlankLine(prev_line) and
- not Search(r'\b(class|struct)\b', prev_line) and
- not Search(r'\\$', prev_line)):
- # Try a bit harder to find the beginning of the class. This is to
- # account for multi-line base-specifier lists, e.g.:
- # class Derived
- # : public Base {
- end_class_head = class_info.starting_linenum
- for i in range(class_info.starting_linenum, linenum):
- if Search(r'\{\s*$', clean_lines.lines[i]):
- end_class_head = i
- break
- if end_class_head < linenum - 1:
- error(filename, linenum, 'whitespace/blank_line', 3,
- '"%s:" should be preceded by a blank line' % matched.group(1))
-
-
-def GetPreviousNonBlankLine(clean_lines, linenum):
- """Return the most recent non-blank line and its line number.
-
- Args:
- clean_lines: A CleansedLines instance containing the file contents.
- linenum: The number of the line to check.
-
- Returns:
- A tuple with two elements. The first element is the contents of the last
- non-blank line before the current line, or the empty string if this is the
- first non-blank line. The second is the line number of that line, or -1
- if this is the first non-blank line.
- """
-
- prevlinenum = linenum - 1
- while prevlinenum >= 0:
- prevline = clean_lines.elided[prevlinenum]
- if not IsBlankLine(prevline): # if not a blank line...
- return (prevline, prevlinenum)
- prevlinenum -= 1
- return ('', -1)
-
-
-def CheckBraces(filename, clean_lines, linenum, error):
- """Looks for misplaced braces (e.g. at the end of line).
-
- Args:
- filename: The name of the current file.
- clean_lines: A CleansedLines instance containing the file.
- linenum: The number of the line to check.
- error: The function to call with any errors found.
- """
-
- line = clean_lines.elided[linenum] # get rid of comments and strings
-
- if Match(r'\s*{\s*$', line):
- # We allow an open brace to start a line in the case where someone is using
- # braces in a block to explicitly create a new scope, which is commonly used
- # to control the lifetime of stack-allocated variables. Braces are also
- # used for brace initializers inside function calls. We don't detect this
- # perfectly: we just don't complain if the last non-whitespace character on
- # the previous non-blank line is ',', ';', ':', '(', '{', or '}', or if the
- # previous line starts a preprocessor block.
- prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
- if (not Search(r'[,;:}{(]\s*$', prevline) and
- not Match(r'\s*#', prevline)):
- error(filename, linenum, 'whitespace/braces', 4,
- '{ should almost always be at the end of the previous line')
-
- # An else clause should be on the same line as the preceding closing brace.
- if Match(r'\s*else\s*', line):
- prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
- if Match(r'\s*}\s*$', prevline):
- error(filename, linenum, 'whitespace/newline', 4,
- 'An else should appear on the same line as the preceding }')
-
- # If braces come on one side of an else, they should be on both.
- # However, we have to worry about "else if" that spans multiple lines!
- if Search(r'}\s*else[^{]*$', line) or Match(r'[^}]*else\s*{', line):
- if Search(r'}\s*else if([^{]*)$', line): # could be multi-line if
- # find the ( after the if
- pos = line.find('else if')
- pos = line.find('(', pos)
- if pos > 0:
- (endline, _, endpos) = CloseExpression(clean_lines, linenum, pos)
- if endline[endpos:].find('{') == -1: # must be brace after if
- error(filename, linenum, 'readability/braces', 5,
- 'If an else has a brace on one side, it should have it on both')
- else: # common case: else not followed by a multi-line if
- error(filename, linenum, 'readability/braces', 5,
- 'If an else has a brace on one side, it should have it on both')
-
- # Likewise, an else should never have the else clause on the same line
- if Search(r'\belse [^\s{]', line) and not Search(r'\belse if\b', line):
- error(filename, linenum, 'whitespace/newline', 4,
- 'Else clause should never be on same line as else (use 2 lines)')
-
- # In the same way, a do/while should never be on one line
- if Match(r'\s*do [^\s{]', line):
- error(filename, linenum, 'whitespace/newline', 4,
- 'do/while clauses should not be on a single line')
-
- # Block bodies should not be followed by a semicolon. Due to C++11
- # brace initialization, there are more places where semicolons are
- # required than not, so we use a whitelist approach to check these
- # rather than a blacklist. These are the places where "};" should
- # be replaced by just "}":
- # 1. Some flavor of block following closing parenthesis:
- # for (;;) {};
- # while (...) {};
- # switch (...) {};
- # Function(...) {};
- # if (...) {};
- # if (...) else if (...) {};
- #
- # 2. else block:
- # if (...) else {};
- #
- # 3. const member function:
- # Function(...) const {};
- #
- # 4. Block following some statement:
- # x = 42;
- # {};
- #
- # 5. Block at the beginning of a function:
- # Function(...) {
- # {};
- # }
- #
- # Note that naively checking for the preceding "{" will also match
- # braces inside multi-dimensional arrays, but this is fine since
- # that expression will not contain semicolons.
- #
- # 6. Block following another block:
- # while (true) {}
- # {};
- #
- # 7. End of namespaces:
- # namespace {};
- #
- # These semicolons seems far more common than other kinds of
- # redundant semicolons, possibly due to people converting classes
- # to namespaces. For now we do not warn for this case.
- #
- # Try matching case 1 first.
- match = Match(r'^(.*\)\s*)\{', line)
- if match:
- # Matched closing parenthesis (case 1). Check the token before the
- # matching opening parenthesis, and don't warn if it looks like a
- # macro. This avoids these false positives:
- # - macro that defines a base class
- # - multi-line macro that defines a base class
- # - macro that defines the whole class-head
- #
- # But we still issue warnings for macros that we know are safe to
- # warn, specifically:
- # - TEST, TEST_F, TEST_P, MATCHER, MATCHER_P
- # - TYPED_TEST
- # - INTERFACE_DEF
- # - EXCLUSIVE_LOCKS_REQUIRED, SHARED_LOCKS_REQUIRED, LOCKS_EXCLUDED:
- #
- # We implement a whitelist of safe macros instead of a blacklist of
- # unsafe macros, even though the latter appears less frequently in
- # google code and would have been easier to implement. This is because
- # the downside for getting the whitelist wrong means some extra
- # semicolons, while the downside for getting the blacklist wrong
- # would result in compile errors.
- #
- # In addition to macros, we also don't want to warn on compound
- # literals.
- closing_brace_pos = match.group(1).rfind(')')
- opening_parenthesis = ReverseCloseExpression(
- clean_lines, linenum, closing_brace_pos)
- if opening_parenthesis[2] > -1:
- line_prefix = opening_parenthesis[0][0:opening_parenthesis[2]]
- macro = Search(r'\b([A-Z_]+)\s*$', line_prefix)
- if ((macro and
- macro.group(1) not in (
- 'TEST', 'TEST_F', 'MATCHER', 'MATCHER_P', 'TYPED_TEST',
- 'EXCLUSIVE_LOCKS_REQUIRED', 'SHARED_LOCKS_REQUIRED',
- 'LOCKS_EXCLUDED', 'INTERFACE_DEF')) or
- Search(r'\s+=\s*$', line_prefix)):
- match = None
-
- else:
- # Try matching cases 2-3.
- match = Match(r'^(.*(?:else|\)\s*const)\s*)\{', line)
- if not match:
- # Try matching cases 4-6. These are always matched on separate lines.
- #
- # Note that we can't simply concatenate the previous line to the
- # current line and do a single match, otherwise we may output
- # duplicate warnings for the blank line case:
- # if (cond) {
- # // blank line
- # }
- prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
- if prevline and Search(r'[;{}]\s*$', prevline):
- match = Match(r'^(\s*)\{', line)
-
- # Check matching closing brace
- if match:
- (endline, endlinenum, endpos) = CloseExpression(
- clean_lines, linenum, len(match.group(1)))
- if endpos > -1 and Match(r'^\s*;', endline[endpos:]):
- # Current {} pair is eligible for semicolon check, and we have found
- # the redundant semicolon, output warning here.
- #
- # Note: because we are scanning forward for opening braces, and
- # outputting warnings for the matching closing brace, if there are
- # nested blocks with trailing semicolons, we will get the error
- # messages in reversed order.
- error(filename, endlinenum, 'readability/braces', 4,
- "You don't need a ; after a }")
-
-
-def CheckEmptyBlockBody(filename, clean_lines, linenum, error):
- """Look for empty loop/conditional body with only a single semicolon.
-
- Args:
- filename: The name of the current file.
- clean_lines: A CleansedLines instance containing the file.
- linenum: The number of the line to check.
- error: The function to call with any errors found.
- """
-
- # Search for loop keywords at the beginning of the line. Because only
- # whitespaces are allowed before the keywords, this will also ignore most
- # do-while-loops, since those lines should start with closing brace.
- #
- # We also check "if" blocks here, since an empty conditional block
- # is likely an error.
- line = clean_lines.elided[linenum]
- matched = Match(r'\s*(for|while|if)\s*\(', line)
- if matched:
- # Find the end of the conditional expression
- (end_line, end_linenum, end_pos) = CloseExpression(
- clean_lines, linenum, line.find('('))
-
- # Output warning if what follows the condition expression is a semicolon.
- # No warning for all other cases, including whitespace or newline, since we
- # have a separate check for semicolons preceded by whitespace.
- if end_pos >= 0 and Match(r';', end_line[end_pos:]):
- if matched.group(1) == 'if':
- error(filename, end_linenum, 'whitespace/empty_conditional_body', 5,
- 'Empty conditional bodies should use {}')
- else:
- error(filename, end_linenum, 'whitespace/empty_loop_body', 5,
- 'Empty loop bodies should use {} or continue')
-
-
-def CheckCheck(filename, clean_lines, linenum, error):
- """Checks the use of CHECK and EXPECT macros.
-
- Args:
- filename: The name of the current file.
- clean_lines: A CleansedLines instance containing the file.
- linenum: The number of the line to check.
- error: The function to call with any errors found.
- """
-
- # Decide the set of replacement macros that should be suggested
- lines = clean_lines.elided
- check_macro = None
- start_pos = -1
- for macro in _CHECK_MACROS:
- i = lines[linenum].find(macro)
- if i >= 0:
- check_macro = macro
-
- # Find opening parenthesis. Do a regular expression match here
- # to make sure that we are matching the expected CHECK macro, as
- # opposed to some other macro that happens to contain the CHECK
- # substring.
- matched = Match(r'^(.*\b' + check_macro + r'\s*)\(', lines[linenum])
- if not matched:
- continue
- start_pos = len(matched.group(1))
- break
- if not check_macro or start_pos < 0:
- # Don't waste time here if line doesn't contain 'CHECK' or 'EXPECT'
- return
-
- # Find end of the boolean expression by matching parentheses
- (last_line, end_line, end_pos) = CloseExpression(
- clean_lines, linenum, start_pos)
- if end_pos < 0:
- return
- if linenum == end_line:
- expression = lines[linenum][start_pos + 1:end_pos - 1]
- else:
- expression = lines[linenum][start_pos + 1:]
- for i in xrange(linenum + 1, end_line):
- expression += lines[i]
- expression += last_line[0:end_pos - 1]
-
- # Parse expression so that we can take parentheses into account.
- # This avoids false positives for inputs like "CHECK((a < 4) == b)",
- # which is not replaceable by CHECK_LE.
- lhs = ''
- rhs = ''
- operator = None
- while expression:
- matched = Match(r'^\s*(<<|<<=|>>|>>=|->\*|->|&&|\|\||'
- r'==|!=|>=|>|<=|<|\()(.*)$', expression)
- if matched:
- token = matched.group(1)
- if token == '(':
- # Parenthesized operand
- expression = matched.group(2)
- (end, _) = FindEndOfExpressionInLine(expression, 0, 1, '(', ')')
- if end < 0:
- return # Unmatched parenthesis
- lhs += '(' + expression[0:end]
- expression = expression[end:]
- elif token in ('&&', '||'):
- # Logical and/or operators. This means the expression
- # contains more than one term, for example:
- # CHECK(42 < a && a < b);
- #
- # These are not replaceable with CHECK_LE, so bail out early.
- return
- elif token in ('<<', '<<=', '>>', '>>=', '->*', '->'):
- # Non-relational operator
- lhs += token
- expression = matched.group(2)
- else:
- # Relational operator
- operator = token
- rhs = matched.group(2)
- break
- else:
- # Unparenthesized operand. Instead of appending to lhs one character
- # at a time, we do another regular expression match to consume several
- # characters at once if possible. Trivial benchmark shows that this
- # is more efficient when the operands are longer than a single
- # character, which is generally the case.
- matched = Match(r'^([^-=!<>()&|]+)(.*)$', expression)
- if not matched:
- matched = Match(r'^(\s*\S)(.*)$', expression)
- if not matched:
- break
- lhs += matched.group(1)
- expression = matched.group(2)
-
- # Only apply checks if we got all parts of the boolean expression
- if not (lhs and operator and rhs):
- return
-
- # Check that rhs do not contain logical operators. We already know
- # that lhs is fine since the loop above parses out && and ||.
- if rhs.find('&&') > -1 or rhs.find('||') > -1:
- return
-
- # At least one of the operands must be a constant literal. This is
- # to avoid suggesting replacements for unprintable things like
- # CHECK(variable != iterator)
- #
- # The following pattern matches decimal, hex integers, strings, and
- # characters (in that order).
- lhs = lhs.strip()
- rhs = rhs.strip()
- match_constant = r'^([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')$'
- if Match(match_constant, lhs) or Match(match_constant, rhs):
- # Note: since we know both lhs and rhs, we can provide a more
- # descriptive error message like:
- # Consider using CHECK_EQ(x, 42) instead of CHECK(x == 42)
- # Instead of:
- # Consider using CHECK_EQ instead of CHECK(a == b)
- #
- # We are still keeping the less descriptive message because if lhs
- # or rhs gets long, the error message might become unreadable.
- error(filename, linenum, 'readability/check', 2,
- 'Consider using %s instead of %s(a %s b)' % (
- _CHECK_REPLACEMENT[check_macro][operator],
- check_macro, operator))
-
-
-def CheckAltTokens(filename, clean_lines, linenum, error):
- """Check alternative keywords being used in boolean expressions.
-
- Args:
- filename: The name of the current file.
- clean_lines: A CleansedLines instance containing the file.
- linenum: The number of the line to check.
- error: The function to call with any errors found.
- """
- line = clean_lines.elided[linenum]
-
- # Avoid preprocessor lines
- if Match(r'^\s*#', line):
- return
-
- # Last ditch effort to avoid multi-line comments. This will not help
- # if the comment started before the current line or ended after the
- # current line, but it catches most of the false positives. At least,
- # it provides a way to workaround this warning for people who use
- # multi-line comments in preprocessor macros.
- #
- # TODO(unknown): remove this once cpplint has better support for
- # multi-line comments.
- if line.find('/*') >= 0 or line.find('*/') >= 0:
- return
-
- for match in _ALT_TOKEN_REPLACEMENT_PATTERN.finditer(line):
- error(filename, linenum, 'readability/alt_tokens', 2,
- 'Use operator %s instead of %s' % (
- _ALT_TOKEN_REPLACEMENT[match.group(1)], match.group(1)))
-
-
-def GetLineWidth(line):
- """Determines the width of the line in column positions.
-
- Args:
- line: A string, which may be a Unicode string.
-
- Returns:
- The width of the line in column positions, accounting for Unicode
- combining characters and wide characters.
- """
- if isinstance(line, unicode):
- width = 0
- for uc in unicodedata.normalize('NFC', line):
- if unicodedata.east_asian_width(uc) in ('W', 'F'):
- width += 2
- elif not unicodedata.combining(uc):
- width += 1
- return width
- else:
- return len(line)
-
-
-def CheckStyle(filename, clean_lines, linenum, file_extension, nesting_state,
- error):
- """Checks rules from the 'C++ style rules' section of cppguide.html.
-
- Most of these rules are hard to test (naming, comment style), but we
- do what we can. In particular we check for 2-space indents, line lengths,
- tab usage, spaces inside code, etc.
-
- Args:
- filename: The name of the current file.
- clean_lines: A CleansedLines instance containing the file.
- linenum: The number of the line to check.
- file_extension: The extension (without the dot) of the filename.
- nesting_state: A _NestingState instance which maintains information about
- the current stack of nested blocks being parsed.
- error: The function to call with any errors found.
- """
-
- # Don't use "elided" lines here, otherwise we can't check commented lines.
- # Don't want to use "raw" either, because we don't want to check inside C++11
- # raw strings,
- raw_lines = clean_lines.lines_without_raw_strings
- line = raw_lines[linenum]
-
- if line.find('\t') != -1:
- error(filename, linenum, 'whitespace/tab', 1,
- 'Tab found; better to use spaces')
-
- # One or three blank spaces at the beginning of the line is weird; it's
- # hard to reconcile that with 2-space indents.
- # NOTE: here are the conditions rob pike used for his tests. Mine aren't
- # as sophisticated, but it may be worth becoming so: RLENGTH==initial_spaces
- # if(RLENGTH > 20) complain = 0;
- # if(match($0, " +(error|private|public|protected):")) complain = 0;
- # if(match(prev, "&& *$")) complain = 0;
- # if(match(prev, "\\|\\| *$")) complain = 0;
- # if(match(prev, "[\",=><] *$")) complain = 0;
- # if(match($0, " <<")) complain = 0;
- # if(match(prev, " +for \\(")) complain = 0;
- # if(prevodd && match(prevprev, " +for \\(")) complain = 0;
- initial_spaces = 0
- cleansed_line = clean_lines.elided[linenum]
- while initial_spaces < len(line) and line[initial_spaces] == ' ':
- initial_spaces += 1
- if line and line[-1].isspace():
- error(filename, linenum, 'whitespace/end_of_line', 4,
- 'Line ends in whitespace. Consider deleting these extra spaces.')
- # There are certain situations we allow one space, notably for section labels
- elif ((initial_spaces == 1 or initial_spaces == 3) and
- not Match(r'\s*\w+\s*:\s*$', cleansed_line)):
- error(filename, linenum, 'whitespace/indent', 3,
- 'Weird number of spaces at line-start. '
- 'Are you using a 2-space indent?')
-
- # Check if the line is a header guard.
- is_header_guard = False
- if file_extension == 'h':
- cppvar = GetHeaderGuardCPPVariable(filename)
- if (line.startswith('#ifndef %s' % cppvar) or
- line.startswith('#define %s' % cppvar) or
- line.startswith('#endif // %s' % cppvar)):
- is_header_guard = True
- # #include lines and header guards can be long, since there's no clean way to
- # split them.
- #
- # URLs can be long too. It's possible to split these, but it makes them
- # harder to cut&paste.
- #
- # The "$Id:...$" comment may also get very long without it being the
- # developers fault.
- if (not line.startswith('#include') and not is_header_guard and
- not Match(r'^\s*//.*http(s?)://\S*$', line) and
- not Match(r'^// \$Id:.*#[0-9]+ \$$', line)):
- line_width = GetLineWidth(line)
- extended_length = int((_line_length * 1.25))
- if line_width > extended_length:
- error(filename, linenum, 'whitespace/line_length', 4,
- 'Lines should very rarely be longer than %i characters' %
- extended_length)
- elif line_width > _line_length:
- error(filename, linenum, 'whitespace/line_length', 2,
- 'Lines should be <= %i characters long' % _line_length)
-
- if (cleansed_line.count(';') > 1 and
- # for loops are allowed two ;'s (and may run over two lines).
- cleansed_line.find('for') == -1 and
- (GetPreviousNonBlankLine(clean_lines, linenum)[0].find('for') == -1 or
- GetPreviousNonBlankLine(clean_lines, linenum)[0].find(';') != -1) and
- # It's ok to have many commands in a switch case that fits in 1 line
- not ((cleansed_line.find('case ') != -1 or
- cleansed_line.find('default:') != -1) and
- cleansed_line.find('break;') != -1)):
- error(filename, linenum, 'whitespace/newline', 0,
- 'More than one command on the same line')
-
- # Some more style checks
- CheckBraces(filename, clean_lines, linenum, error)
- CheckEmptyBlockBody(filename, clean_lines, linenum, error)
- CheckAccess(filename, clean_lines, linenum, nesting_state, error)
- CheckSpacing(filename, clean_lines, linenum, nesting_state, error)
- CheckCheck(filename, clean_lines, linenum, error)
- CheckAltTokens(filename, clean_lines, linenum, error)
- classinfo = nesting_state.InnermostClass()
- if classinfo:
- CheckSectionSpacing(filename, clean_lines, classinfo, linenum, error)
-
-
-_RE_PATTERN_INCLUDE_NEW_STYLE = re.compile(r'#include +"[^/]+\.h"')
-_RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$')
-# Matches the first component of a filename delimited by -s and _s. That is:
-# _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo'
-# _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo'
-# _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo'
-# _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo'
-_RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+')
-
-
-def _DropCommonSuffixes(filename):
- """Drops common suffixes like _test.cc or -inl.h from filename.
-
- For example:
- >>> _DropCommonSuffixes('foo/foo-inl.h')
- 'foo/foo'
- >>> _DropCommonSuffixes('foo/bar/foo.cc')
- 'foo/bar/foo'
- >>> _DropCommonSuffixes('foo/foo_internal.h')
- 'foo/foo'
- >>> _DropCommonSuffixes('foo/foo_unusualinternal.h')
- 'foo/foo_unusualinternal'
-
- Args:
- filename: The input filename.
-
- Returns:
- The filename with the common suffix removed.
- """
- for suffix in ('test.cc', 'regtest.cc', 'unittest.cc',
- 'inl.h', 'impl.h', 'internal.h'):
- if (filename.endswith(suffix) and len(filename) > len(suffix) and
- filename[-len(suffix) - 1] in ('-', '_')):
- return filename[:-len(suffix) - 1]
- return os.path.splitext(filename)[0]
-
-
-def _IsTestFilename(filename):
- """Determines if the given filename has a suffix that identifies it as a test.
-
- Args:
- filename: The input filename.
-
- Returns:
- True if 'filename' looks like a test, False otherwise.
- """
- if (filename.endswith('_test.cc') or
- filename.endswith('_unittest.cc') or
- filename.endswith('_regtest.cc')):
- return True
- else:
- return False
-
-
-def _ClassifyInclude(fileinfo, include, is_system):
- """Figures out what kind of header 'include' is.
-
- Args:
- fileinfo: The current file cpplint is running over. A FileInfo instance.
- include: The path to a #included file.
- is_system: True if the #include used <> rather than "".
-
- Returns:
- One of the _XXX_HEADER constants.
-
- For example:
- >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True)
- _C_SYS_HEADER
- >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True)
- _CPP_SYS_HEADER
- >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False)
- _LIKELY_MY_HEADER
- >>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'),
- ... 'bar/foo_other_ext.h', False)
- _POSSIBLE_MY_HEADER
- >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False)
- _OTHER_HEADER
- """
- # This is a list of all standard c++ header files, except
- # those already checked for above.
- is_cpp_h = include in _CPP_HEADERS
-
- if is_system:
- if is_cpp_h:
- return _CPP_SYS_HEADER
- else:
- return _C_SYS_HEADER
-
- # If the target file and the include we're checking share a
- # basename when we drop common extensions, and the include
- # lives in . , then it's likely to be owned by the target file.
- target_dir, target_base = (
- os.path.split(_DropCommonSuffixes(fileinfo.RepositoryName())))
- include_dir, include_base = os.path.split(_DropCommonSuffixes(include))
- if target_base == include_base and (
- include_dir == target_dir or
- include_dir == os.path.normpath(target_dir + '/../public')):
- return _LIKELY_MY_HEADER
-
- # If the target and include share some initial basename
- # component, it's possible the target is implementing the
- # include, so it's allowed to be first, but we'll never
- # complain if it's not there.
- target_first_component = _RE_FIRST_COMPONENT.match(target_base)
- include_first_component = _RE_FIRST_COMPONENT.match(include_base)
- if (target_first_component and include_first_component and
- target_first_component.group(0) ==
- include_first_component.group(0)):
- return _POSSIBLE_MY_HEADER
-
- return _OTHER_HEADER
-
-
-
-def CheckIncludeLine(filename, clean_lines, linenum, include_state, error):
- """Check rules that are applicable to #include lines.
-
- Strings on #include lines are NOT removed from elided line, to make
- certain tasks easier. However, to prevent false positives, checks
- applicable to #include lines in CheckLanguage must be put here.
-
- Args:
- filename: The name of the current file.
- clean_lines: A CleansedLines instance containing the file.
- linenum: The number of the line to check.
- include_state: An _IncludeState instance in which the headers are inserted.
- error: The function to call with any errors found.
- """
- fileinfo = FileInfo(filename)
-
- line = clean_lines.lines[linenum]
-
- # "include" should use the new style "foo/bar.h" instead of just "bar.h"
- if _RE_PATTERN_INCLUDE_NEW_STYLE.search(line):
- error(filename, linenum, 'build/include', 4,
- 'Include the directory when naming .h files')
-
- # we shouldn't include a file more than once. actually, there are a
- # handful of instances where doing so is okay, but in general it's
- # not.
- match = _RE_PATTERN_INCLUDE.search(line)
- if match:
- include = match.group(2)
- is_system = (match.group(1) == '<')
- if include in include_state:
- error(filename, linenum, 'build/include', 4,
- '"%s" already included at %s:%s' %
- (include, filename, include_state[include]))
- else:
- include_state[include] = linenum
-
- # We want to ensure that headers appear in the right order:
- # 1) for foo.cc, foo.h (preferred location)
- # 2) c system files
- # 3) cpp system files
- # 4) for foo.cc, foo.h (deprecated location)
- # 5) other google headers
- #
- # We classify each include statement as one of those 5 types
- # using a number of techniques. The include_state object keeps
- # track of the highest type seen, and complains if we see a
- # lower type after that.
- error_message = include_state.CheckNextIncludeOrder(
- _ClassifyInclude(fileinfo, include, is_system))
- if error_message:
- error(filename, linenum, 'build/include_order', 4,
- '%s. Should be: %s.h, c system, c++ system, other.' %
- (error_message, fileinfo.BaseName()))
- canonical_include = include_state.CanonicalizeAlphabeticalOrder(include)
- if not include_state.IsInAlphabeticalOrder(
- clean_lines, linenum, canonical_include):
- error(filename, linenum, 'build/include_alpha', 4,
- 'Include "%s" not in alphabetical order' % include)
- include_state.SetLastHeader(canonical_include)
-
- # Look for any of the stream classes that are part of standard C++.
- match = _RE_PATTERN_INCLUDE.match(line)
- if match:
- include = match.group(2)
- if Match(r'(f|ind|io|i|o|parse|pf|stdio|str|)?stream$', include):
- # Many unit tests use cout, so we exempt them.
- if not _IsTestFilename(filename):
- error(filename, linenum, 'readability/streams', 3,
- 'Streams are highly discouraged.')
-
-
-def _GetTextInside(text, start_pattern):
- r"""Retrieves all the text between matching open and close parentheses.
-
- Given a string of lines and a regular expression string, retrieve all the text
- following the expression and between opening punctuation symbols like
- (, [, or {, and the matching close-punctuation symbol. This properly nested
- occurrences of the punctuations, so for the text like
- printf(a(), b(c()));
- a call to _GetTextInside(text, r'printf\(') will return 'a(), b(c())'.
- start_pattern must match string having an open punctuation symbol at the end.
-
- Args:
- text: The lines to extract text. Its comments and strings must be elided.
- It can be single line and can span multiple lines.
- start_pattern: The regexp string indicating where to start extracting
- the text.
- Returns:
- The extracted text.
- None if either the opening string or ending punctuation could not be found.
- """
- # TODO(sugawarayu): Audit cpplint.py to see what places could be profitably
- # rewritten to use _GetTextInside (and use inferior regexp matching today).
-
- # Give opening punctuations to get the matching close-punctuations.
- matching_punctuation = {'(': ')', '{': '}', '[': ']'}
- closing_punctuation = set(matching_punctuation.itervalues())
-
- # Find the position to start extracting text.
- match = re.search(start_pattern, text, re.M)
- if not match: # start_pattern not found in text.
- return None
- start_position = match.end(0)
-
- assert start_position > 0, (
- 'start_pattern must ends with an opening punctuation.')
- assert text[start_position - 1] in matching_punctuation, (
- 'start_pattern must ends with an opening punctuation.')
- # Stack of closing punctuations we expect to have in text after position.
- punctuation_stack = [matching_punctuation[text[start_position - 1]]]
- position = start_position
- while punctuation_stack and position < len(text):
- if text[position] == punctuation_stack[-1]:
- punctuation_stack.pop()
- elif text[position] in closing_punctuation:
- # A closing punctuation without matching opening punctuations.
- return None
- elif text[position] in matching_punctuation:
- punctuation_stack.append(matching_punctuation[text[position]])
- position += 1
- if punctuation_stack:
- # Opening punctuations left without matching close-punctuations.
- return None
- # punctuations match.
- return text[start_position:position - 1]
-
-
-# Patterns for matching call-by-reference parameters.
-#
-# Supports nested templates up to 2 levels deep using this messy pattern:
-# < (?: < (?: < [^<>]*
-# >
-# | [^<>] )*
-# >
-# | [^<>] )*
-# >
-_RE_PATTERN_IDENT = r'[_a-zA-Z]\w*' # =~ [[:alpha:]][[:alnum:]]*
-_RE_PATTERN_TYPE = (
- r'(?:const\s+)?(?:typename\s+|class\s+|struct\s+|union\s+|enum\s+)?'
- r'(?:\w|'
- r'\s*<(?:<(?:<[^<>]*>|[^<>])*>|[^<>])*>|'
- r'::)+')
-# A call-by-reference parameter ends with '& identifier'.
-_RE_PATTERN_REF_PARAM = re.compile(
- r'(' + _RE_PATTERN_TYPE + r'(?:\s*(?:\bconst\b|[*]))*\s*'
- r'&\s*' + _RE_PATTERN_IDENT + r')\s*(?:=[^,()]+)?[,)]')
-# A call-by-const-reference parameter either ends with 'const& identifier'
-# or looks like 'const type& identifier' when 'type' is atomic.
-_RE_PATTERN_CONST_REF_PARAM = (
- r'(?:.*\s*\bconst\s*&\s*' + _RE_PATTERN_IDENT +
- r'|const\s+' + _RE_PATTERN_TYPE + r'\s*&\s*' + _RE_PATTERN_IDENT + r')')
-
-
-def CheckLanguage(filename, clean_lines, linenum, file_extension,
- include_state, nesting_state, error):
- """Checks rules from the 'C++ language rules' section of cppguide.html.
-
- Some of these rules are hard to test (function overloading, using
- uint32 inappropriately), but we do the best we can.
-
- Args:
- filename: The name of the current file.
- clean_lines: A CleansedLines instance containing the file.
- linenum: The number of the line to check.
- file_extension: The extension (without the dot) of the filename.
- include_state: An _IncludeState instance in which the headers are inserted.
- nesting_state: A _NestingState instance which maintains information about
- the current stack of nested blocks being parsed.
- error: The function to call with any errors found.
- """
- # If the line is empty or consists of entirely a comment, no need to
- # check it.
- line = clean_lines.elided[linenum]
- if not line:
- return
-
- match = _RE_PATTERN_INCLUDE.search(line)
- if match:
- CheckIncludeLine(filename, clean_lines, linenum, include_state, error)
- return
-
- # Reset include state across preprocessor directives. This is meant
- # to silence warnings for conditional includes.
- if Match(r'^\s*#\s*(?:ifdef|elif|else|endif)\b', line):
- include_state.ResetSection()
-
- # Make Windows paths like Unix.
- fullname = os.path.abspath(filename).replace('\\', '/')
-
- # TODO(unknown): figure out if they're using default arguments in fn proto.
-
- # Check to see if they're using an conversion function cast.
- # I just try to capture the most common basic types, though there are more.
- # Parameterless conversion functions, such as bool(), are allowed as they are
- # probably a member operator declaration or default constructor.
- match = Search(
- r'(\bnew\s+)?\b' # Grab 'new' operator, if it's there
- r'(int|float|double|bool|char|int32|uint32|int64|uint64)'
- r'(\([^)].*)', line)
- if match:
- matched_new = match.group(1)
- matched_type = match.group(2)
- matched_funcptr = match.group(3)
-
- # gMock methods are defined using some variant of MOCK_METHODx(name, type)
- # where type may be float(), int(string), etc. Without context they are
- # virtually indistinguishable from int(x) casts. Likewise, gMock's
- # MockCallback takes a template parameter of the form return_type(arg_type),
- # which looks much like the cast we're trying to detect.
- #
- # std::function<> wrapper has a similar problem.
- #
- # Return types for function pointers also look like casts if they
- # don't have an extra space.
- if (matched_new is None and # If new operator, then this isn't a cast
- not (Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line) or
- Search(r'\bMockCallback<.*>', line) or
- Search(r'\bstd::function<.*>', line)) and
- not (matched_funcptr and
- Match(r'\((?:[^() ]+::\s*\*\s*)?[^() ]+\)\s*\(',
- matched_funcptr))):
- # Try a bit harder to catch gmock lines: the only place where
- # something looks like an old-style cast is where we declare the
- # return type of the mocked method, and the only time when we
- # are missing context is if MOCK_METHOD was split across
- # multiple lines. The missing MOCK_METHOD is usually one or two
- # lines back, so scan back one or two lines.
- #
- # It's not possible for gmock macros to appear in the first 2
- # lines, since the class head + section name takes up 2 lines.
- if (linenum < 2 or
- not (Match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\((?:\S+,)?\s*$',
- clean_lines.elided[linenum - 1]) or
- Match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\(\s*$',
- clean_lines.elided[linenum - 2]))):
- error(filename, linenum, 'readability/casting', 4,
- 'Using deprecated casting style. '
- 'Use static_cast<%s>(...) instead' %
- matched_type)
-
- CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
- 'static_cast',
- r'\((int|float|double|bool|char|u?int(16|32|64))\)', error)
-
- # This doesn't catch all cases. Consider (const char * const)"hello".
- #
- # (char *) "foo" should always be a const_cast (reinterpret_cast won't
- # compile).
- if CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
- 'const_cast', r'\((char\s?\*+\s?)\)\s*"', error):
- pass
- else:
- # Check pointer casts for other than string constants
- CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
- 'reinterpret_cast', r'\((\w+\s?\*+\s?)\)', error)
-
- # In addition, we look for people taking the address of a cast. This
- # is dangerous -- casts can assign to temporaries, so the pointer doesn't
- # point where you think.
- match = Search(
- r'(?:&\(([^)]+)\)[\w(])|'
- r'(?:&(static|dynamic|down|reinterpret)_cast\b)', line)
- if match and match.group(1) != '*':
- error(filename, linenum, 'runtime/casting', 4,
- ('Are you taking an address of a cast? '
- 'This is dangerous: could be a temp var. '
- 'Take the address before doing the cast, rather than after'))
-
- # Create an extended_line, which is the concatenation of the current and
- # next lines, for more effective checking of code that may span more than one
- # line.
- if linenum + 1 < clean_lines.NumLines():
- extended_line = line + clean_lines.elided[linenum + 1]
- else:
- extended_line = line
-
- # Check for people declaring static/global STL strings at the top level.
- # This is dangerous because the C++ language does not guarantee that
- # globals with constructors are initialized before the first access.
- match = Match(
- r'((?:|static +)(?:|const +))string +([a-zA-Z0-9_:]+)\b(.*)',
- line)
- # Make sure it's not a function.
- # Function template specialization looks like: "string foo<Type>(...".
- # Class template definitions look like: "string Foo<Type>::Method(...".
- #
- # Also ignore things that look like operators. These are matched separately
- # because operator names cross non-word boundaries. If we change the pattern
- # above, we would decrease the accuracy of matching identifiers.
- if (match and
- not Search(r'\boperator\W', line) and
- not Match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)?\s*\(([^"]|$)', match.group(3))):
- error(filename, linenum, 'runtime/string', 4,
- 'For a static/global string constant, use a C style string instead: '
- '"%schar %s[]".' %
- (match.group(1), match.group(2)))
-
- if Search(r'\b([A-Za-z0-9_]*_)\(\1\)', line):
- error(filename, linenum, 'runtime/init', 4,
- 'You seem to be initializing a member variable with itself.')
-
- if file_extension == 'h':
- # TODO(unknown): check that 1-arg constructors are explicit.
- # How to tell it's a constructor?
- # (handled in CheckForNonStandardConstructs for now)
- # TODO(unknown): check that classes have DISALLOW_EVIL_CONSTRUCTORS
- # (level 1 error)
- pass
-
- # Check if people are using the verboten C basic types. The only exception
- # we regularly allow is "unsigned short port" for port.
- if Search(r'\bshort port\b', line):
- if not Search(r'\bunsigned short port\b', line):
- error(filename, linenum, 'runtime/int', 4,
- 'Use "unsigned short" for ports, not "short"')
- else:
- match = Search(r'\b(short|long(?! +double)|long long)\b', line)
- if match:
- error(filename, linenum, 'runtime/int', 4,
- 'Use int16/int64/etc, rather than the C type %s' % match.group(1))
-
- # When snprintf is used, the second argument shouldn't be a literal.
- match = Search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line)
- if match and match.group(2) != '0':
- # If 2nd arg is zero, snprintf is used to calculate size.
- error(filename, linenum, 'runtime/printf', 3,
- 'If you can, use sizeof(%s) instead of %s as the 2nd arg '
- 'to snprintf.' % (match.group(1), match.group(2)))
-
- # Check if some verboten C functions are being used.
- if Search(r'\bsprintf\b', line):
- error(filename, linenum, 'runtime/printf', 5,
- 'Never use sprintf. Use snprintf instead.')
- match = Search(r'\b(strcpy|strcat)\b', line)
- if match:
- error(filename, linenum, 'runtime/printf', 4,
- 'Almost always, snprintf is better than %s' % match.group(1))
-
- # Check if some verboten operator overloading is going on
- # TODO(unknown): catch out-of-line unary operator&:
- # class X {};
- # int operator&(const X& x) { return 42; } // unary operator&
- # The trick is it's hard to tell apart from binary operator&:
- # class Y { int operator&(const Y& x) { return 23; } }; // binary operator&
- if Search(r'\boperator\s*&\s*\(\s*\)', line):
- error(filename, linenum, 'runtime/operator', 4,
- 'Unary operator& is dangerous. Do not use it.')
-
- # Check for suspicious usage of "if" like
- # } if (a == b) {
- if Search(r'\}\s*if\s*\(', line):
- error(filename, linenum, 'readability/braces', 4,
- 'Did you mean "else if"? If not, start a new line for "if".')
-
- # Check for potential format string bugs like printf(foo).
- # We constrain the pattern not to pick things like DocidForPrintf(foo).
- # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str())
- # TODO(sugawarayu): Catch the following case. Need to change the calling
- # convention of the whole function to process multiple line to handle it.
- # printf(
- # boy_this_is_a_really_long_variable_that_cannot_fit_on_the_prev_line);
- printf_args = _GetTextInside(line, r'(?i)\b(string)?printf\s*\(')
- if printf_args:
- match = Match(r'([\w.\->()]+)$', printf_args)
- if match and match.group(1) != '__VA_ARGS__':
- function_name = re.search(r'\b((?:string)?printf)\s*\(',
- line, re.I).group(1)
- error(filename, linenum, 'runtime/printf', 4,
- 'Potential format string bug. Do %s("%%s", %s) instead.'
- % (function_name, match.group(1)))
-
- # Check for potential memset bugs like memset(buf, sizeof(buf), 0).
- match = Search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line)
- if match and not Match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", match.group(2)):
- error(filename, linenum, 'runtime/memset', 4,
- 'Did you mean "memset(%s, 0, %s)"?'
- % (match.group(1), match.group(2)))
-
- if Search(r'\busing namespace\b', line):
- error(filename, linenum, 'build/namespaces', 5,
- 'Do not use namespace using-directives. '
- 'Use using-declarations instead.')
-
- # Detect variable-length arrays.
- match = Match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line)
- if (match and match.group(2) != 'return' and match.group(2) != 'delete' and
- match.group(3).find(']') == -1):
- # Split the size using space and arithmetic operators as delimiters.
- # If any of the resulting tokens are not compile time constants then
- # report the error.
- tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', match.group(3))
- is_const = True
- skip_next = False
- for tok in tokens:
- if skip_next:
- skip_next = False
- continue
-
- if Search(r'sizeof\(.+\)', tok): continue
- if Search(r'arraysize\(\w+\)', tok): continue
-
- tok = tok.lstrip('(')
- tok = tok.rstrip(')')
- if not tok: continue
- if Match(r'\d+', tok): continue
- if Match(r'0[xX][0-9a-fA-F]+', tok): continue
- if Match(r'k[A-Z0-9]\w*', tok): continue
- if Match(r'(.+::)?k[A-Z0-9]\w*', tok): continue
- if Match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): continue
- # A catch all for tricky sizeof cases, including 'sizeof expression',
- # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)'
- # requires skipping the next token because we split on ' ' and '*'.
- if tok.startswith('sizeof'):
- skip_next = True
- continue
- is_const = False
- break
- if not is_const:
- error(filename, linenum, 'runtime/arrays', 1,
- 'Do not use variable-length arrays. Use an appropriately named '
- "('k' followed by CamelCase) compile-time constant for the size.")
-
- # If DISALLOW_EVIL_CONSTRUCTORS, DISALLOW_COPY_AND_ASSIGN, or
- # DISALLOW_IMPLICIT_CONSTRUCTORS is present, then it should be the last thing
- # in the class declaration.
- match = Match(
- (r'\s*'
- r'(DISALLOW_(EVIL_CONSTRUCTORS|COPY_AND_ASSIGN|IMPLICIT_CONSTRUCTORS))'
- r'\(.*\);$'),
- line)
- if match and linenum + 1 < clean_lines.NumLines():
- next_line = clean_lines.elided[linenum + 1]
- # We allow some, but not all, declarations of variables to be present
- # in the statement that defines the class. The [\w\*,\s]* fragment of
- # the regular expression below allows users to declare instances of
- # the class or pointers to instances, but not less common types such
- # as function pointers or arrays. It's a tradeoff between allowing
- # reasonable code and avoiding trying to parse more C++ using regexps.
- if not Search(r'^\s*}[\w\*,\s]*;', next_line):
- error(filename, linenum, 'readability/constructors', 3,
- match.group(1) + ' should be the last thing in the class')
-
- # Check for use of unnamed namespaces in header files. Registration
- # macros are typically OK, so we allow use of "namespace {" on lines
- # that end with backslashes.
- if (file_extension == 'h'
- and Search(r'\bnamespace\s*{', line)
- and line[-1] != '\\'):
- error(filename, linenum, 'build/namespaces', 4,
- 'Do not use unnamed namespaces in header files. See '
- 'http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces'
- ' for more information.')
-
-def CheckForNonConstReference(filename, clean_lines, linenum,
- nesting_state, error):
- """Check for non-const references.
-
- Separate from CheckLanguage since it scans backwards from current
- line, instead of scanning forward.
-
- Args:
- filename: The name of the current file.
- clean_lines: A CleansedLines instance containing the file.
- linenum: The number of the line to check.
- nesting_state: A _NestingState instance which maintains information about
- the current stack of nested blocks being parsed.
- error: The function to call with any errors found.
- """
- # Do nothing if there is no '&' on current line.
- line = clean_lines.elided[linenum]
- if '&' not in line:
- return
-
- # Long type names may be broken across multiple lines, usually in one
- # of these forms:
- # LongType
- # ::LongTypeContinued &identifier
- # LongType::
- # LongTypeContinued &identifier
- # LongType<
- # ...>::LongTypeContinued &identifier
- #
- # If we detected a type split across two lines, join the previous
- # line to current line so that we can match const references
- # accordingly.
- #
- # Note that this only scans back one line, since scanning back
- # arbitrary number of lines would be expensive. If you have a type
- # that spans more than 2 lines, please use a typedef.
- if linenum > 1:
- previous = None
- if Match(r'\s*::(?:[\w<>]|::)+\s*&\s*\S', line):
- # previous_line\n + ::current_line
- previous = Search(r'\b((?:const\s*)?(?:[\w<>]|::)+[\w<>])\s*$',
- clean_lines.elided[linenum - 1])
- elif Match(r'\s*[a-zA-Z_]([\w<>]|::)+\s*&\s*\S', line):
- # previous_line::\n + current_line
- previous = Search(r'\b((?:const\s*)?(?:[\w<>]|::)+::)\s*$',
- clean_lines.elided[linenum - 1])
- if previous:
- line = previous.group(1) + line.lstrip()
- else:
- # Check for templated parameter that is split across multiple lines
- endpos = line.rfind('>')
- if endpos > -1:
- (_, startline, startpos) = ReverseCloseExpression(
- clean_lines, linenum, endpos)
- if startpos > -1 and startline < linenum:
- # Found the matching < on an earlier line, collect all
- # pieces up to current line.
- line = ''
- for i in xrange(startline, linenum + 1):
- line += clean_lines.elided[i].strip()
-
- # Check for non-const references in function parameters. A single '&' may
- # found in the following places:
- # inside expression: binary & for bitwise AND
- # inside expression: unary & for taking the address of something
- # inside declarators: reference parameter
- # We will exclude the first two cases by checking that we are not inside a
- # function body, including one that was just introduced by a trailing '{'.
- # TODO(unknwon): Doesn't account for preprocessor directives.
- # TODO(unknown): Doesn't account for 'catch(Exception& e)' [rare].
- check_params = False
- if not nesting_state.stack:
- check_params = True # top level
- elif (isinstance(nesting_state.stack[-1], _ClassInfo) or
- isinstance(nesting_state.stack[-1], _NamespaceInfo)):
- check_params = True # within class or namespace
- elif Match(r'.*{\s*$', line):
- if (len(nesting_state.stack) == 1 or
- isinstance(nesting_state.stack[-2], _ClassInfo) or
- isinstance(nesting_state.stack[-2], _NamespaceInfo)):
- check_params = True # just opened global/class/namespace block
- # We allow non-const references in a few standard places, like functions
- # called "swap()" or iostream operators like "<<" or ">>". Do not check
- # those function parameters.
- #
- # We also accept & in static_assert, which looks like a function but
- # it's actually a declaration expression.
- whitelisted_functions = (r'(?:[sS]wap(?:<\w:+>)?|'
- r'operator\s*[<>][<>]|'
- r'static_assert|COMPILE_ASSERT'
- r')\s*\(')
- if Search(whitelisted_functions, line):
- check_params = False
- elif not Search(r'\S+\([^)]*$', line):
- # Don't see a whitelisted function on this line. Actually we
- # didn't see any function name on this line, so this is likely a
- # multi-line parameter list. Try a bit harder to catch this case.
- for i in xrange(2):
- if (linenum > i and
- Search(whitelisted_functions, clean_lines.elided[linenum - i - 1])):
- check_params = False
- break
-
- if check_params:
- decls = ReplaceAll(r'{[^}]*}', ' ', line) # exclude function body
- for parameter in re.findall(_RE_PATTERN_REF_PARAM, decls):
- if not Match(_RE_PATTERN_CONST_REF_PARAM, parameter):
- error(filename, linenum, 'runtime/references', 2,
- 'Is this a non-const reference? '
- 'If so, make const or use a pointer: ' +
- ReplaceAll(' *<', '<', parameter))
-
-
-def CheckCStyleCast(filename, linenum, line, raw_line, cast_type, pattern,
- error):
- """Checks for a C-style cast by looking for the pattern.
-
- Args:
- filename: The name of the current file.
- linenum: The number of the line to check.
- line: The line of code to check.
- raw_line: The raw line of code to check, with comments.
- cast_type: The string for the C++ cast to recommend. This is either
- reinterpret_cast, static_cast, or const_cast, depending.
- pattern: The regular expression used to find C-style casts.
- error: The function to call with any errors found.
-
- Returns:
- True if an error was emitted.
- False otherwise.
- """
- match = Search(pattern, line)
- if not match:
- return False
-
- # e.g., sizeof(int)
- sizeof_match = Match(r'.*sizeof\s*$', line[0:match.start(1) - 1])
- if sizeof_match:
- error(filename, linenum, 'runtime/sizeof', 1,
- 'Using sizeof(type). Use sizeof(varname) instead if possible')
- return True
-
- # operator++(int) and operator--(int)
- if (line[0:match.start(1) - 1].endswith(' operator++') or
- line[0:match.start(1) - 1].endswith(' operator--')):
- return False
-
- # A single unnamed argument for a function tends to look like old
- # style cast. If we see those, don't issue warnings for deprecated
- # casts, instead issue warnings for unnamed arguments where
- # appropriate.
- #
- # These are things that we want warnings for, since the style guide
- # explicitly require all parameters to be named:
- # Function(int);
- # Function(int) {
- # ConstMember(int) const;
- # ConstMember(int) const {
- # ExceptionMember(int) throw (...);
- # ExceptionMember(int) throw (...) {
- # PureVirtual(int) = 0;
- #
- # These are functions of some sort, where the compiler would be fine
- # if they had named parameters, but people often omit those
- # identifiers to reduce clutter:
- # (FunctionPointer)(int);
- # (FunctionPointer)(int) = value;
- # Function((function_pointer_arg)(int))
- # <TemplateArgument(int)>;
- # <(FunctionPointerTemplateArgument)(int)>;
- remainder = line[match.end(0):]
- if Match(r'^\s*(?:;|const\b|throw\b|=|>|\{|\))', remainder):
- # Looks like an unnamed parameter.
-
- # Don't warn on any kind of template arguments.
- if Match(r'^\s*>', remainder):
- return False
-
- # Don't warn on assignments to function pointers, but keep warnings for
- # unnamed parameters to pure virtual functions. Note that this pattern
- # will also pass on assignments of "0" to function pointers, but the
- # preferred values for those would be "nullptr" or "NULL".
- matched_zero = Match(r'^\s=\s*(\S+)\s*;', remainder)
- if matched_zero and matched_zero.group(1) != '0':
- return False
-
- # Don't warn on function pointer declarations. For this we need
- # to check what came before the "(type)" string.
- if Match(r'.*\)\s*$', line[0:match.start(0)]):
- return False
-
- # Don't warn if the parameter is named with block comments, e.g.:
- # Function(int /*unused_param*/);
- if '/*' in raw_line:
- return False
-
- # Passed all filters, issue warning here.
- error(filename, linenum, 'readability/function', 3,
- 'All parameters should be named in a function')
- return True
-
- # At this point, all that should be left is actual casts.
- error(filename, linenum, 'readability/casting', 4,
- 'Using C-style cast. Use %s<%s>(...) instead' %
- (cast_type, match.group(1)))
-
- return True
-
-
-_HEADERS_CONTAINING_TEMPLATES = (
- ('<deque>', ('deque',)),
- ('<functional>', ('unary_function', 'binary_function',
- 'plus', 'minus', 'multiplies', 'divides', 'modulus',
- 'negate',
- 'equal_to', 'not_equal_to', 'greater', 'less',
- 'greater_equal', 'less_equal',
- 'logical_and', 'logical_or', 'logical_not',
- 'unary_negate', 'not1', 'binary_negate', 'not2',
- 'bind1st', 'bind2nd',
- 'pointer_to_unary_function',
- 'pointer_to_binary_function',
- 'ptr_fun',
- 'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t',
- 'mem_fun_ref_t',
- 'const_mem_fun_t', 'const_mem_fun1_t',
- 'const_mem_fun_ref_t', 'const_mem_fun1_ref_t',
- 'mem_fun_ref',
- )),
- ('<limits>', ('numeric_limits',)),
- ('<list>', ('list',)),
- ('<map>', ('map', 'multimap',)),
- ('<memory>', ('allocator',)),
- ('<queue>', ('queue', 'priority_queue',)),
- ('<set>', ('set', 'multiset',)),
- ('<stack>', ('stack',)),
- ('<string>', ('char_traits', 'basic_string',)),
- ('<utility>', ('pair',)),
- ('<vector>', ('vector',)),
-
- # gcc extensions.
- # Note: std::hash is their hash, ::hash is our hash
- ('<hash_map>', ('hash_map', 'hash_multimap',)),
- ('<hash_set>', ('hash_set', 'hash_multiset',)),
- ('<slist>', ('slist',)),
- )
-
-_RE_PATTERN_STRING = re.compile(r'\bstring\b')
-
-_re_pattern_algorithm_header = []
-for _template in ('copy', 'max', 'min', 'min_element', 'sort', 'swap',
- 'transform'):
- # Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or
- # type::max().
- _re_pattern_algorithm_header.append(
- (re.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'),
- _template,
- '<algorithm>'))
-
-_re_pattern_templates = []
-for _header, _templates in _HEADERS_CONTAINING_TEMPLATES:
- for _template in _templates:
- _re_pattern_templates.append(
- (re.compile(r'(\<|\b)' + _template + r'\s*\<'),
- _template + '<>',
- _header))
-
-
-def FilesBelongToSameModule(filename_cc, filename_h):
- """Check if these two filenames belong to the same module.
-
- The concept of a 'module' here is a as follows:
- foo.h, foo-inl.h, foo.cc, foo_test.cc and foo_unittest.cc belong to the
- same 'module' if they are in the same directory.
- some/path/public/xyzzy and some/path/internal/xyzzy are also considered
- to belong to the same module here.
-
- If the filename_cc contains a longer path than the filename_h, for example,
- '/absolute/path/to/base/sysinfo.cc', and this file would include
- 'base/sysinfo.h', this function also produces the prefix needed to open the
- header. This is used by the caller of this function to more robustly open the
- header file. We don't have access to the real include paths in this context,
- so we need this guesswork here.
-
- Known bugs: tools/base/bar.cc and base/bar.h belong to the same module
- according to this implementation. Because of this, this function gives
- some false positives. This should be sufficiently rare in practice.
-
- Args:
- filename_cc: is the path for the .cc file
- filename_h: is the path for the header path
-
- Returns:
- Tuple with a bool and a string:
- bool: True if filename_cc and filename_h belong to the same module.
- string: the additional prefix needed to open the header file.
- """
-
- if not filename_cc.endswith('.cc'):
- return (False, '')
- filename_cc = filename_cc[:-len('.cc')]
- if filename_cc.endswith('_unittest'):
- filename_cc = filename_cc[:-len('_unittest')]
- elif filename_cc.endswith('_test'):
- filename_cc = filename_cc[:-len('_test')]
- filename_cc = filename_cc.replace('/public/', '/')
- filename_cc = filename_cc.replace('/internal/', '/')
-
- if not filename_h.endswith('.h'):
- return (False, '')
- filename_h = filename_h[:-len('.h')]
- if filename_h.endswith('-inl'):
- filename_h = filename_h[:-len('-inl')]
- filename_h = filename_h.replace('/public/', '/')
- filename_h = filename_h.replace('/internal/', '/')
-
- files_belong_to_same_module = filename_cc.endswith(filename_h)
- common_path = ''
- if files_belong_to_same_module:
- common_path = filename_cc[:-len(filename_h)]
- return files_belong_to_same_module, common_path
-
-
-def UpdateIncludeState(filename, include_state, io=codecs):
- """Fill up the include_state with new includes found from the file.
-
- Args:
- filename: the name of the header to read.
- include_state: an _IncludeState instance in which the headers are inserted.
- io: The io factory to use to read the file. Provided for testability.
-
- Returns:
- True if a header was succesfully added. False otherwise.
- """
- headerfile = None
- try:
- headerfile = io.open(filename, 'r', 'utf8', 'replace')
- except IOError:
- return False
- linenum = 0
- for line in headerfile:
- linenum += 1
- clean_line = CleanseComments(line)
- match = _RE_PATTERN_INCLUDE.search(clean_line)
- if match:
- include = match.group(2)
- # The value formatting is cute, but not really used right now.
- # What matters here is that the key is in include_state.
- include_state.setdefault(include, '%s:%d' % (filename, linenum))
- return True
-
-
-def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error,
- io=codecs):
- """Reports for missing stl includes.
-
- This function will output warnings to make sure you are including the headers
- necessary for the stl containers and functions that you use. We only give one
- reason to include a header. For example, if you use both equal_to<> and
- less<> in a .h file, only one (the latter in the file) of these will be
- reported as a reason to include the <functional>.
-
- Args:
- filename: The name of the current file.
- clean_lines: A CleansedLines instance containing the file.
- include_state: An _IncludeState instance.
- error: The function to call with any errors found.
- io: The IO factory to use to read the header file. Provided for unittest
- injection.
- """
- required = {} # A map of header name to linenumber and the template entity.
- # Example of required: { '<functional>': (1219, 'less<>') }
-
- for linenum in xrange(clean_lines.NumLines()):
- line = clean_lines.elided[linenum]
- if not line or line[0] == '#':
- continue
-
- # String is special -- it is a non-templatized type in STL.
- matched = _RE_PATTERN_STRING.search(line)
- if matched:
- # Don't warn about strings in non-STL namespaces:
- # (We check only the first match per line; good enough.)
- prefix = line[:matched.start()]
- if prefix.endswith('std::') or not prefix.endswith('::'):
- required['<string>'] = (linenum, 'string')
-
- for pattern, template, header in _re_pattern_algorithm_header:
- if pattern.search(line):
- required[header] = (linenum, template)
-
- # The following function is just a speed up, no semantics are changed.
- if not '<' in line: # Reduces the cpu time usage by skipping lines.
- continue
-
- for pattern, template, header in _re_pattern_templates:
- if pattern.search(line):
- required[header] = (linenum, template)
-
- # The policy is that if you #include something in foo.h you don't need to
- # include it again in foo.cc. Here, we will look at possible includes.
- # Let's copy the include_state so it is only messed up within this function.
- include_state = include_state.copy()
-
- # Did we find the header for this file (if any) and succesfully load it?
- header_found = False
-
- # Use the absolute path so that matching works properly.
- abs_filename = FileInfo(filename).FullName()
-
- # For Emacs's flymake.
- # If cpplint is invoked from Emacs's flymake, a temporary file is generated
- # by flymake and that file name might end with '_flymake.cc'. In that case,
- # restore original file name here so that the corresponding header file can be
- # found.
- # e.g. If the file name is 'foo_flymake.cc', we should search for 'foo.h'
- # instead of 'foo_flymake.h'
- abs_filename = re.sub(r'_flymake\.cc$', '.cc', abs_filename)
-
- # include_state is modified during iteration, so we iterate over a copy of
- # the keys.
- header_keys = include_state.keys()
- for header in header_keys:
- (same_module, common_path) = FilesBelongToSameModule(abs_filename, header)
- fullpath = common_path + header
- if same_module and UpdateIncludeState(fullpath, include_state, io):
- header_found = True
-
- # If we can't find the header file for a .cc, assume it's because we don't
- # know where to look. In that case we'll give up as we're not sure they
- # didn't include it in the .h file.
- # TODO(unknown): Do a better job of finding .h files so we are confident that
- # not having the .h file means there isn't one.
- if filename.endswith('.cc') and not header_found:
- return
-
- # All the lines have been processed, report the errors found.
- for required_header_unstripped in required:
- template = required[required_header_unstripped][1]
- if required_header_unstripped.strip('<>"') not in include_state:
- error(filename, required[required_header_unstripped][0],
- 'build/include_what_you_use', 4,
- 'Add #include ' + required_header_unstripped + ' for ' + template)
-
-
-_RE_PATTERN_EXPLICIT_MAKEPAIR = re.compile(r'\bmake_pair\s*<')
-
-
-def CheckMakePairUsesDeduction(filename, clean_lines, linenum, error):
- """Check that make_pair's template arguments are deduced.
-
- G++ 4.6 in C++0x mode fails badly if make_pair's template arguments are
- specified explicitly, and such use isn't intended in any case.
-
- Args:
- filename: The name of the current file.
- clean_lines: A CleansedLines instance containing the file.
- linenum: The number of the line to check.
- error: The function to call with any errors found.
- """
- line = clean_lines.elided[linenum]
- match = _RE_PATTERN_EXPLICIT_MAKEPAIR.search(line)
- if match:
- error(filename, linenum, 'build/explicit_make_pair',
- 4, # 4 = high confidence
- 'For C++11-compatibility, omit template arguments from make_pair'
- ' OR use pair directly OR if appropriate, construct a pair directly')
-
-
-def ProcessLine(filename, file_extension, clean_lines, line,
- include_state, function_state, nesting_state, error,
- extra_check_functions=[]):
- """Processes a single line in the file.
-
- Args:
- filename: Filename of the file that is being processed.
- file_extension: The extension (dot not included) of the file.
- clean_lines: An array of strings, each representing a line of the file,
- with comments stripped.
- line: Number of line being processed.
- include_state: An _IncludeState instance in which the headers are inserted.
- function_state: A _FunctionState instance which counts function lines, etc.
- nesting_state: A _NestingState instance which maintains information about
- the current stack of nested blocks being parsed.
- error: A callable to which errors are reported, which takes 4 arguments:
- filename, line number, error level, and message
- extra_check_functions: An array of additional check functions that will be
- run on each source line. Each function takes 4
- arguments: filename, clean_lines, line, error
- """
- raw_lines = clean_lines.raw_lines
- ParseNolintSuppressions(filename, raw_lines[line], line, error)
- nesting_state.Update(filename, clean_lines, line, error)
- if nesting_state.stack and nesting_state.stack[-1].inline_asm != _NO_ASM:
- return
- CheckForFunctionLengths(filename, clean_lines, line, function_state, error)
- CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error)
- CheckStyle(filename, clean_lines, line, file_extension, nesting_state, error)
- CheckLanguage(filename, clean_lines, line, file_extension, include_state,
- nesting_state, error)
- CheckForNonConstReference(filename, clean_lines, line, nesting_state, error)
- CheckForNonStandardConstructs(filename, clean_lines, line,
- nesting_state, error)
- CheckVlogArguments(filename, clean_lines, line, error)
- CheckPosixThreading(filename, clean_lines, line, error)
- CheckInvalidIncrement(filename, clean_lines, line, error)
- CheckMakePairUsesDeduction(filename, clean_lines, line, error)
- for check_fn in extra_check_functions:
- check_fn(filename, clean_lines, line, error)
-
-def ProcessFileData(filename, file_extension, lines, error,
- extra_check_functions=[]):
- """Performs lint checks and reports any errors to the given error function.
-
- Args:
- filename: Filename of the file that is being processed.
- file_extension: The extension (dot not included) of the file.
- lines: An array of strings, each representing a line of the file, with the
- last element being empty if the file is terminated with a newline.
- error: A callable to which errors are reported, which takes 4 arguments:
- filename, line number, error level, and message
- extra_check_functions: An array of additional check functions that will be
- run on each source line. Each function takes 4
- arguments: filename, clean_lines, line, error
- """
- lines = (['// marker so line numbers and indices both start at 1'] + lines +
- ['// marker so line numbers end in a known way'])
-
- include_state = _IncludeState()
- function_state = _FunctionState()
- nesting_state = _NestingState()
-
- ResetNolintSuppressions()
-
- CheckForCopyright(filename, lines, error)
-
- if file_extension == 'h':
- CheckForHeaderGuard(filename, lines, error)
-
- RemoveMultiLineComments(filename, lines, error)
- clean_lines = CleansedLines(lines)
- for line in xrange(clean_lines.NumLines()):
- ProcessLine(filename, file_extension, clean_lines, line,
- include_state, function_state, nesting_state, error,
- extra_check_functions)
- nesting_state.CheckCompletedBlocks(filename, error)
-
- CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error)
-
- # We check here rather than inside ProcessLine so that we see raw
- # lines rather than "cleaned" lines.
- CheckForBadCharacters(filename, lines, error)
-
- CheckForNewlineAtEOF(filename, lines, error)
-
-def ProcessFile(filename, vlevel, extra_check_functions=[]):
- """Does google-lint on a single file.
-
- Args:
- filename: The name of the file to parse.
-
- vlevel: The level of errors to report. Every error of confidence
- >= verbose_level will be reported. 0 is a good default.
-
- extra_check_functions: An array of additional check functions that will be
- run on each source line. Each function takes 4
- arguments: filename, clean_lines, line, error
- """
-
- _SetVerboseLevel(vlevel)
-
- try:
- # Support the UNIX convention of using "-" for stdin. Note that
- # we are not opening the file with universal newline support
- # (which codecs doesn't support anyway), so the resulting lines do
- # contain trailing '\r' characters if we are reading a file that
- # has CRLF endings.
- # If after the split a trailing '\r' is present, it is removed
- # below. If it is not expected to be present (i.e. os.linesep !=
- # '\r\n' as in Windows), a warning is issued below if this file
- # is processed.
-
- if filename == '-':
- lines = codecs.StreamReaderWriter(sys.stdin,
- codecs.getreader('utf8'),
- codecs.getwriter('utf8'),
- 'replace').read().split('\n')
- else:
- lines = codecs.open(filename, 'r', 'utf8', 'replace').read().split('\n')
-
- carriage_return_found = False
- # Remove trailing '\r'.
- for linenum in range(len(lines)):
- if lines[linenum].endswith('\r'):
- lines[linenum] = lines[linenum].rstrip('\r')
- carriage_return_found = True
-
- except IOError:
- sys.stderr.write(
- "Skipping input '%s': Can't open for reading\n" % filename)
- return
-
- # Note, if no dot is found, this will give the entire filename as the ext.
- file_extension = filename[filename.rfind('.') + 1:]
-
- # When reading from stdin, the extension is unknown, so no cpplint tests
- # should rely on the extension.
- if filename != '-' and file_extension not in _valid_extensions:
- sys.stderr.write('Ignoring %s; not a valid file name '
- '(%s)\n' % (filename, ', '.join(_valid_extensions)))
- else:
- ProcessFileData(filename, file_extension, lines, Error,
- extra_check_functions)
- if carriage_return_found and os.linesep != '\r\n':
- # Use 0 for linenum since outputting only one error for potentially
- # several lines.
- Error(filename, 0, 'whitespace/newline', 1,
- 'One or more unexpected \\r (^M) found;'
- 'better to use only a \\n')
-
- sys.stderr.write('Done processing %s\n' % filename)
-
-
-def PrintUsage(message):
- """Prints a brief usage string and exits, optionally with an error message.
-
- Args:
- message: The optional error message.
- """
- sys.stderr.write(_USAGE)
- if message:
- sys.exit('\nFATAL ERROR: ' + message)
- else:
- sys.exit(1)
-
-
-def PrintCategories():
- """Prints a list of all the error-categories used by error messages.
-
- These are the categories used to filter messages via --filter.
- """
- sys.stderr.write(''.join(' %s\n' % cat for cat in _ERROR_CATEGORIES))
- sys.exit(0)
-
-
-def ParseArguments(args):
- """Parses the command line arguments.
-
- This may set the output format and verbosity level as side-effects.
-
- Args:
- args: The command line arguments:
-
- Returns:
- The list of filenames to lint.
- """
- try:
- (opts, filenames) = getopt.getopt(args, '', ['help', 'output=', 'verbose=',
- 'counting=',
- 'filter=',
- 'root=',
- 'linelength=',
- 'extensions='])
- except getopt.GetoptError:
- PrintUsage('Invalid arguments.')
-
- verbosity = _VerboseLevel()
- output_format = _OutputFormat()
- filters = ''
- counting_style = ''
-
- for (opt, val) in opts:
- if opt == '--help':
- PrintUsage(None)
- elif opt == '--output':
- if val not in ('emacs', 'vs7', 'eclipse'):
- PrintUsage('The only allowed output formats are emacs, vs7 and eclipse.')
- output_format = val
- elif opt == '--verbose':
- verbosity = int(val)
- elif opt == '--filter':
- filters = val
- if not filters:
- PrintCategories()
- elif opt == '--counting':
- if val not in ('total', 'toplevel', 'detailed'):
- PrintUsage('Valid counting options are total, toplevel, and detailed')
- counting_style = val
- elif opt == '--root':
- global _root
- _root = val
- elif opt == '--linelength':
- global _line_length
- try:
- _line_length = int(val)
- except ValueError:
- PrintUsage('Line length must be digits.')
- elif opt == '--extensions':
- global _valid_extensions
- try:
- _valid_extensions = set(val.split(','))
- except ValueError:
- PrintUsage('Extensions must be comma seperated list.')
-
- if not filenames:
- PrintUsage('No files were specified.')
-
- _SetOutputFormat(output_format)
- _SetVerboseLevel(verbosity)
- _SetFilters(filters)
- _SetCountingStyle(counting_style)
-
- return filenames
-
-
-def main():
- filenames = ParseArguments(sys.argv[1:])
-
- # Change stderr to write with replacement characters so we don't die
- # if we try to print something containing non-ASCII characters.
- sys.stderr = codecs.StreamReaderWriter(sys.stderr,
- codecs.getreader('utf8'),
- codecs.getwriter('utf8'),
- 'replace')
-
- _cpplint_state.ResetErrorCounts()
- for filename in filenames:
- ProcessFile(filename, _cpplint_state.verbose_level)
- _cpplint_state.PrintErrorCounts()
-
- sys.exit(_cpplint_state.error_count > 0)
-
-
-if __name__ == '__main__':
- main()
diff --git a/third_party/aom/tools/diff.py b/third_party/aom/tools/diff.py
deleted file mode 100644
index bac6aabdc..000000000
--- a/third_party/aom/tools/diff.py
+++ /dev/null
@@ -1,132 +0,0 @@
-#!/usr/bin/env python
-##
-## Copyright (c) 2016, Alliance for Open Media. All rights reserved
-##
-## This source code is subject to the terms of the BSD 2 Clause License and
-## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-## was not distributed with this source code in the LICENSE file, you can
-## obtain it at www.aomedia.org/license/software. If the Alliance for Open
-## Media Patent License 1.0 was not distributed with this source code in the
-## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-##
-"""Classes for representing diff pieces."""
-
-__author__ = "jkoleszar@google.com"
-
-import re
-
-
-class DiffLines(object):
- """A container for one half of a diff."""
-
- def __init__(self, filename, offset, length):
- self.filename = filename
- self.offset = offset
- self.length = length
- self.lines = []
- self.delta_line_nums = []
-
- def Append(self, line):
- l = len(self.lines)
- if line[0] != " ":
- self.delta_line_nums.append(self.offset + l)
- self.lines.append(line[1:])
- assert l+1 <= self.length
-
- def Complete(self):
- return len(self.lines) == self.length
-
- def __contains__(self, item):
- return item >= self.offset and item <= self.offset + self.length - 1
-
-
-class DiffHunk(object):
- """A container for one diff hunk, consisting of two DiffLines."""
-
- def __init__(self, header, file_a, file_b, start_a, len_a, start_b, len_b):
- self.header = header
- self.left = DiffLines(file_a, start_a, len_a)
- self.right = DiffLines(file_b, start_b, len_b)
- self.lines = []
-
- def Append(self, line):
- """Adds a line to the DiffHunk and its DiffLines children."""
- if line[0] == "-":
- self.left.Append(line)
- elif line[0] == "+":
- self.right.Append(line)
- elif line[0] == " ":
- self.left.Append(line)
- self.right.Append(line)
- elif line[0] == "\\":
- # Ignore newline messages from git diff.
- pass
- else:
- assert False, ("Unrecognized character at start of diff line "
- "%r" % line[0])
- self.lines.append(line)
-
- def Complete(self):
- return self.left.Complete() and self.right.Complete()
-
- def __repr__(self):
- return "DiffHunk(%s, %s, len %d)" % (
- self.left.filename, self.right.filename,
- max(self.left.length, self.right.length))
-
-
-def ParseDiffHunks(stream):
- """Walk a file-like object, yielding DiffHunks as they're parsed."""
-
- file_regex = re.compile(r"(\+\+\+|---) (\S+)")
- range_regex = re.compile(r"@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))?")
- hunk = None
- while True:
- line = stream.readline()
- if not line:
- break
-
- if hunk is None:
- # Parse file names
- diff_file = file_regex.match(line)
- if diff_file:
- if line.startswith("---"):
- a_line = line
- a = diff_file.group(2)
- continue
- if line.startswith("+++"):
- b_line = line
- b = diff_file.group(2)
- continue
-
- # Parse offset/lengths
- diffrange = range_regex.match(line)
- if diffrange:
- if diffrange.group(2):
- start_a = int(diffrange.group(1))
- len_a = int(diffrange.group(3))
- else:
- start_a = 1
- len_a = int(diffrange.group(1))
-
- if diffrange.group(5):
- start_b = int(diffrange.group(4))
- len_b = int(diffrange.group(6))
- else:
- start_b = 1
- len_b = int(diffrange.group(4))
-
- header = [a_line, b_line, line]
- hunk = DiffHunk(header, a, b, start_a, len_a, start_b, len_b)
- else:
- # Add the current line to the hunk
- hunk.Append(line)
-
- # See if the whole hunk has been parsed. If so, yield it and prepare
- # for the next hunk.
- if hunk.Complete():
- yield hunk
- hunk = None
-
- # Partial hunks are a parse error
- assert hunk is None
diff --git a/third_party/aom/tools/dump_obu.cc b/third_party/aom/tools/dump_obu.cc
deleted file mode 100644
index 30ee5e7a1..000000000
--- a/third_party/aom/tools/dump_obu.cc
+++ /dev/null
@@ -1,164 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <stdlib.h>
-#include <string.h>
-
-#include <memory>
-#include <string>
-
-#include "config/aom_config.h"
-
-#include "common/ivfdec.h"
-#include "common/obudec.h"
-#include "common/tools_common.h"
-#include "common/webmdec.h"
-#include "tools/obu_parser.h"
-
-namespace {
-
-const size_t kInitialBufferSize = 100 * 1024;
-
-struct InputContext {
- InputContext() = default;
- ~InputContext() { free(unit_buffer); }
-
- void Init() {
- memset(avx_ctx, 0, sizeof(*avx_ctx));
- memset(obu_ctx, 0, sizeof(*obu_ctx));
- obu_ctx->avx_ctx = avx_ctx;
-#if CONFIG_WEBM_IO
- memset(webm_ctx, 0, sizeof(*webm_ctx));
-#endif
- }
-
- AvxInputContext *avx_ctx = nullptr;
- ObuDecInputContext *obu_ctx = nullptr;
-#if CONFIG_WEBM_IO
- WebmInputContext *webm_ctx = nullptr;
-#endif
- uint8_t *unit_buffer = nullptr;
- size_t unit_buffer_size = 0;
-};
-
-void PrintUsage() {
- printf("Libaom OBU dump.\nUsage: dump_obu <input_file>\n");
-}
-
-VideoFileType GetFileType(InputContext *ctx) {
- if (file_is_ivf(ctx->avx_ctx)) return FILE_TYPE_IVF;
- if (file_is_obu(ctx->obu_ctx)) return FILE_TYPE_OBU;
-#if CONFIG_WEBM_IO
- if (file_is_webm(ctx->webm_ctx, ctx->avx_ctx)) return FILE_TYPE_WEBM;
-#endif
- return FILE_TYPE_RAW;
-}
-
-bool ReadTemporalUnit(InputContext *ctx, size_t *unit_size) {
- const VideoFileType file_type = ctx->avx_ctx->file_type;
- switch (file_type) {
- case FILE_TYPE_IVF: {
- if (ivf_read_frame(ctx->avx_ctx->file, &ctx->unit_buffer, unit_size,
- &ctx->unit_buffer_size, NULL)) {
- return false;
- }
- break;
- }
- case FILE_TYPE_OBU: {
- if (obudec_read_temporal_unit(ctx->obu_ctx, &ctx->unit_buffer, unit_size,
- &ctx->unit_buffer_size)) {
- return false;
- }
- break;
- }
-#if CONFIG_WEBM_IO
- case FILE_TYPE_WEBM: {
- if (webm_read_frame(ctx->webm_ctx, &ctx->unit_buffer, unit_size,
- &ctx->unit_buffer_size)) {
- return false;
- }
- break;
- }
-#endif
- default:
- // TODO(tomfinegan): Abuse FILE_TYPE_RAW for AV1/OBU elementary streams?
- fprintf(stderr, "Error: Unsupported file type.\n");
- return false;
- }
-
- return true;
-}
-
-} // namespace
-
-int main(int argc, const char *argv[]) {
- // TODO(tomfinegan): Could do with some params for verbosity.
- if (argc < 2) {
- PrintUsage();
- return EXIT_SUCCESS;
- }
-
- const std::string filename = argv[1];
-
- using FilePtr = std::unique_ptr<FILE, decltype(&fclose)>;
- FilePtr input_file(fopen(filename.c_str(), "rb"), &fclose);
- if (input_file.get() == nullptr) {
- input_file.release();
- fprintf(stderr, "Error: Cannot open input file.\n");
- return EXIT_FAILURE;
- }
-
- AvxInputContext avx_ctx;
- InputContext input_ctx;
- input_ctx.avx_ctx = &avx_ctx;
- ObuDecInputContext obu_ctx;
- input_ctx.obu_ctx = &obu_ctx;
-#if CONFIG_WEBM_IO
- WebmInputContext webm_ctx;
- input_ctx.webm_ctx = &webm_ctx;
-#endif
-
- input_ctx.Init();
- avx_ctx.file = input_file.get();
- avx_ctx.file_type = GetFileType(&input_ctx);
-
- // Note: the reader utilities will realloc the buffer using realloc() etc.
- // Can't have nice things like unique_ptr wrappers with that type of
- // behavior underneath the function calls.
- input_ctx.unit_buffer =
- reinterpret_cast<uint8_t *>(calloc(kInitialBufferSize, 1));
- if (!input_ctx.unit_buffer) {
- fprintf(stderr, "Error: No memory, can't alloc input buffer.\n");
- return EXIT_FAILURE;
- }
- input_ctx.unit_buffer_size = kInitialBufferSize;
-
- size_t unit_size = 0;
- int unit_number = 0;
- int64_t obu_overhead_bytes_total = 0;
- while (ReadTemporalUnit(&input_ctx, &unit_size)) {
- printf("Temporal unit %d\n", unit_number);
-
- int obu_overhead_current_unit = 0;
- if (!aom_tools::DumpObu(input_ctx.unit_buffer, static_cast<int>(unit_size),
- &obu_overhead_current_unit)) {
- fprintf(stderr, "Error: Temporal Unit parse failed on unit number %d.\n",
- unit_number);
- return EXIT_FAILURE;
- }
- printf(" OBU overhead: %d\n", obu_overhead_current_unit);
- ++unit_number;
- obu_overhead_bytes_total += obu_overhead_current_unit;
- }
-
- printf("File total OBU overhead: %" PRId64 "\n", obu_overhead_bytes_total);
- return EXIT_SUCCESS;
-}
diff --git a/third_party/aom/tools/gen_authors.sh b/third_party/aom/tools/gen_authors.sh
deleted file mode 100755
index 5def8bc89..000000000
--- a/third_party/aom/tools/gen_authors.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/bash
-
-# Add organization names manually.
-
-cat <<EOF
-# This file is automatically generated from the git commit history
-# by tools/gen_authors.sh.
-
-$(git log --pretty=format:"%aN <%aE>" | sort | uniq | grep -v "corp.google\|clang-format")
-EOF
diff --git a/third_party/aom/tools/gen_constrained_tokenset.py b/third_party/aom/tools/gen_constrained_tokenset.py
deleted file mode 100755
index 5d12ee1ef..000000000
--- a/third_party/aom/tools/gen_constrained_tokenset.py
+++ /dev/null
@@ -1,120 +0,0 @@
-#!/usr/bin/python
-##
-## Copyright (c) 2016, Alliance for Open Media. All rights reserved
-##
-## This source code is subject to the terms of the BSD 2 Clause License and
-## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-## was not distributed with this source code in the LICENSE file, you can
-## obtain it at www.aomedia.org/license/software. If the Alliance for Open
-## Media Patent License 1.0 was not distributed with this source code in the
-## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-##
-"""Generate the probability model for the constrained token set.
-
-Model obtained from a 2-sided zero-centered distribution derived
-from a Pareto distribution. The cdf of the distribution is:
-cdf(x) = 0.5 + 0.5 * sgn(x) * [1 - {alpha/(alpha + |x|)} ^ beta]
-
-For a given beta and a given probability of the 1-node, the alpha
-is first solved, and then the {alpha, beta} pair is used to generate
-the probabilities for the rest of the nodes.
-"""
-
-import heapq
-import sys
-import numpy as np
-import scipy.optimize
-import scipy.stats
-
-
-def cdf_spareto(x, xm, beta):
- p = 1 - (xm / (np.abs(x) + xm))**beta
- p = 0.5 + 0.5 * np.sign(x) * p
- return p
-
-
-def get_spareto(p, beta):
- cdf = cdf_spareto
-
- def func(x):
- return ((cdf(1.5, x, beta) - cdf(0.5, x, beta)) /
- (1 - cdf(0.5, x, beta)) - p)**2
-
- alpha = scipy.optimize.fminbound(func, 1e-12, 10000, xtol=1e-12)
- parray = np.zeros(11)
- parray[0] = 2 * (cdf(0.5, alpha, beta) - 0.5)
- parray[1] = (2 * (cdf(1.5, alpha, beta) - cdf(0.5, alpha, beta)))
- parray[2] = (2 * (cdf(2.5, alpha, beta) - cdf(1.5, alpha, beta)))
- parray[3] = (2 * (cdf(3.5, alpha, beta) - cdf(2.5, alpha, beta)))
- parray[4] = (2 * (cdf(4.5, alpha, beta) - cdf(3.5, alpha, beta)))
- parray[5] = (2 * (cdf(6.5, alpha, beta) - cdf(4.5, alpha, beta)))
- parray[6] = (2 * (cdf(10.5, alpha, beta) - cdf(6.5, alpha, beta)))
- parray[7] = (2 * (cdf(18.5, alpha, beta) - cdf(10.5, alpha, beta)))
- parray[8] = (2 * (cdf(34.5, alpha, beta) - cdf(18.5, alpha, beta)))
- parray[9] = (2 * (cdf(66.5, alpha, beta) - cdf(34.5, alpha, beta)))
- parray[10] = 2 * (1. - cdf(66.5, alpha, beta))
- return parray
-
-
-def quantize_probs(p, save_first_bin, bits):
- """Quantize probability precisely.
-
- Quantize probabilities minimizing dH (Kullback-Leibler divergence)
- approximated by: sum (p_i-q_i)^2/p_i.
- References:
- https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence
- https://github.com/JarekDuda/AsymmetricNumeralSystemsToolkit
- """
- num_sym = p.size
- p = np.clip(p, 1e-16, 1)
- L = 2**bits
- pL = p * L
- ip = 1. / p # inverse probability
- q = np.clip(np.round(pL), 1, L + 1 - num_sym)
- quant_err = (pL - q)**2 * ip
- sgn = np.sign(L - q.sum()) # direction of correction
- if sgn != 0: # correction is needed
- v = [] # heap of adjustment results (adjustment err, index) of each symbol
- for i in range(1 if save_first_bin else 0, num_sym):
- q_adj = q[i] + sgn
- if q_adj > 0 and q_adj < L:
- adj_err = (pL[i] - q_adj)**2 * ip[i] - quant_err[i]
- heapq.heappush(v, (adj_err, i))
- while q.sum() != L:
- # apply lowest error adjustment
- (adj_err, i) = heapq.heappop(v)
- quant_err[i] += adj_err
- q[i] += sgn
- # calculate the cost of adjusting this symbol again
- q_adj = q[i] + sgn
- if q_adj > 0 and q_adj < L:
- adj_err = (pL[i] - q_adj)**2 * ip[i] - quant_err[i]
- heapq.heappush(v, (adj_err, i))
- return q
-
-
-def get_quantized_spareto(p, beta, bits, first_token):
- parray = get_spareto(p, beta)
- parray = parray[1:] / (1 - parray[0])
- # CONFIG_NEW_TOKENSET
- if first_token > 1:
- parray = parray[1:] / (1 - parray[0])
- qarray = quantize_probs(parray, first_token == 1, bits)
- return qarray.astype(np.int)
-
-
-def main(bits=15, first_token=1):
- beta = 8
- for q in range(1, 256):
- parray = get_quantized_spareto(q / 256., beta, bits, first_token)
- assert parray.sum() == 2**bits
- print '{', ', '.join('%d' % i for i in parray), '},'
-
-
-if __name__ == '__main__':
- if len(sys.argv) > 2:
- main(int(sys.argv[1]), int(sys.argv[2]))
- elif len(sys.argv) > 1:
- main(int(sys.argv[1]))
- else:
- main()
diff --git a/third_party/aom/tools/inspect-cli.js b/third_party/aom/tools/inspect-cli.js
deleted file mode 100644
index a14c08111..000000000
--- a/third_party/aom/tools/inspect-cli.js
+++ /dev/null
@@ -1,39 +0,0 @@
-/**
- * This tool lets you test if the compiled Javascript decoder is functioning properly. You'll
- * need to download a SpiderMonkey js-shell to run this script.
- * https://archive.mozilla.org/pub/firefox/nightly/latest-mozilla-central/
- *
- * Example:
- * js-shell inspect-cli.js video.ivf
- */
-load("inspect.js");
-var buffer = read(scriptArgs[0], "binary");
-var Module = {
- noExitRuntime: true,
- noInitialRun: true,
- preInit: [],
- preRun: [],
- postRun: [function () {
- printErr(`Loaded Javascript Decoder OK`);
- }],
- memoryInitializerPrefixURL: "bin/",
- arguments: ['input.ivf', 'output.raw'],
- on_frame_decoded_json: function (jsonString) {
- let json = JSON.parse("[" + Module.UTF8ToString(jsonString) + "null]");
- json.forEach(frame => {
- if (frame) {
- print(frame.frame);
- }
- });
- }
-};
-DecoderModule(Module);
-Module.FS.writeFile("/tmp/input.ivf", buffer, { encoding: "binary" });
-Module._open_file();
-Module._set_layers(0xFFFFFFFF); // Set this to zero if you want to benchmark decoding.
-while(true) {
- printErr("Decoding Frame ...");
- if (Module._read_frame()) {
- break;
- }
-}
diff --git a/third_party/aom/tools/inspect-post.js b/third_party/aom/tools/inspect-post.js
deleted file mode 100644
index 31c40bb82..000000000
--- a/third_party/aom/tools/inspect-post.js
+++ /dev/null
@@ -1 +0,0 @@
-Module["FS"] = FS;
diff --git a/third_party/aom/tools/intersect-diffs.py b/third_party/aom/tools/intersect-diffs.py
deleted file mode 100755
index df13c4ef7..000000000
--- a/third_party/aom/tools/intersect-diffs.py
+++ /dev/null
@@ -1,78 +0,0 @@
-#!/usr/bin/env python
-##
-## Copyright (c) 2016, Alliance for Open Media. All rights reserved
-##
-## This source code is subject to the terms of the BSD 2 Clause License and
-## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-## was not distributed with this source code in the LICENSE file, you can
-## obtain it at www.aomedia.org/license/software. If the Alliance for Open
-## Media Patent License 1.0 was not distributed with this source code in the
-## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-##
-"""Calculates the "intersection" of two unified diffs.
-
-Given two diffs, A and B, it finds all hunks in B that had non-context lines
-in A and prints them to stdout. This is useful to determine the hunks in B that
-are relevant to A. The resulting file can be applied with patch(1) on top of A.
-"""
-
-__author__ = "jkoleszar@google.com"
-
-import sys
-
-import diff
-
-
-def FormatDiffHunks(hunks):
- """Re-serialize a list of DiffHunks."""
- r = []
- last_header = None
- for hunk in hunks:
- this_header = hunk.header[0:2]
- if last_header != this_header:
- r.extend(hunk.header)
- last_header = this_header
- else:
- r.extend(hunk.header[2])
- r.extend(hunk.lines)
- r.append("\n")
- return "".join(r)
-
-
-def ZipHunks(rhs_hunks, lhs_hunks):
- """Join two hunk lists on filename."""
- for rhs_hunk in rhs_hunks:
- rhs_file = rhs_hunk.right.filename.split("/")[1:]
-
- for lhs_hunk in lhs_hunks:
- lhs_file = lhs_hunk.left.filename.split("/")[1:]
- if lhs_file != rhs_file:
- continue
- yield (rhs_hunk, lhs_hunk)
-
-
-def main():
- old_hunks = [x for x in diff.ParseDiffHunks(open(sys.argv[1], "r"))]
- new_hunks = [x for x in diff.ParseDiffHunks(open(sys.argv[2], "r"))]
- out_hunks = []
-
- # Join the right hand side of the older diff with the left hand side of the
- # newer diff.
- for old_hunk, new_hunk in ZipHunks(old_hunks, new_hunks):
- if new_hunk in out_hunks:
- continue
- old_lines = old_hunk.right
- new_lines = new_hunk.left
-
- # Determine if this hunk overlaps any non-context line from the other
- for i in old_lines.delta_line_nums:
- if i in new_lines:
- out_hunks.append(new_hunk)
- break
-
- if out_hunks:
- print FormatDiffHunks(out_hunks)
- sys.exit(1)
-
-if __name__ == "__main__":
- main()
diff --git a/third_party/aom/tools/lint-hunks.py b/third_party/aom/tools/lint-hunks.py
deleted file mode 100755
index d02bee16c..000000000
--- a/third_party/aom/tools/lint-hunks.py
+++ /dev/null
@@ -1,146 +0,0 @@
-#!/usr/bin/python
-##
-## Copyright (c) 2016, Alliance for Open Media. All rights reserved
-##
-## This source code is subject to the terms of the BSD 2 Clause License and
-## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-## was not distributed with this source code in the LICENSE file, you can
-## obtain it at www.aomedia.org/license/software. If the Alliance for Open
-## Media Patent License 1.0 was not distributed with this source code in the
-## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-##
-"""Performs style checking on each diff hunk."""
-import getopt
-import os
-import StringIO
-import subprocess
-import sys
-
-import diff
-
-
-SHORT_OPTIONS = "h"
-LONG_OPTIONS = ["help"]
-
-TOPLEVEL_CMD = ["git", "rev-parse", "--show-toplevel"]
-DIFF_CMD = ["git", "diff"]
-DIFF_INDEX_CMD = ["git", "diff-index", "-u", "HEAD", "--"]
-SHOW_CMD = ["git", "show"]
-CPPLINT_FILTERS = ["-readability/casting"]
-
-
-class Usage(Exception):
- pass
-
-
-class SubprocessException(Exception):
- def __init__(self, args):
- msg = "Failed to execute '%s'"%(" ".join(args))
- super(SubprocessException, self).__init__(msg)
-
-
-class Subprocess(subprocess.Popen):
- """Adds the notion of an expected returncode to Popen."""
-
- def __init__(self, args, expected_returncode=0, **kwargs):
- self._args = args
- self._expected_returncode = expected_returncode
- super(Subprocess, self).__init__(args, **kwargs)
-
- def communicate(self, *args, **kwargs):
- result = super(Subprocess, self).communicate(*args, **kwargs)
- if self._expected_returncode is not None:
- try:
- ok = self.returncode in self._expected_returncode
- except TypeError:
- ok = self.returncode == self._expected_returncode
- if not ok:
- raise SubprocessException(self._args)
- return result
-
-
-def main(argv=None):
- if argv is None:
- argv = sys.argv
- try:
- try:
- opts, args = getopt.getopt(argv[1:], SHORT_OPTIONS, LONG_OPTIONS)
- except getopt.error, msg:
- raise Usage(msg)
-
- # process options
- for o, _ in opts:
- if o in ("-h", "--help"):
- print __doc__
- sys.exit(0)
-
- if args and len(args) > 1:
- print __doc__
- sys.exit(0)
-
- # Find the fully qualified path to the root of the tree
- tl = Subprocess(TOPLEVEL_CMD, stdout=subprocess.PIPE)
- tl = tl.communicate()[0].strip()
-
- # See if we're working on the index or not.
- if args:
- diff_cmd = DIFF_CMD + [args[0] + "^!"]
- else:
- diff_cmd = DIFF_INDEX_CMD
-
- # Build the command line to execute cpplint
- cpplint_cmd = [os.path.join(tl, "tools", "cpplint.py"),
- "--filter=" + ",".join(CPPLINT_FILTERS),
- "-"]
-
- # Get a list of all affected lines
- file_affected_line_map = {}
- p = Subprocess(diff_cmd, stdout=subprocess.PIPE)
- stdout = p.communicate()[0]
- for hunk in diff.ParseDiffHunks(StringIO.StringIO(stdout)):
- filename = hunk.right.filename[2:]
- if filename not in file_affected_line_map:
- file_affected_line_map[filename] = set()
- file_affected_line_map[filename].update(hunk.right.delta_line_nums)
-
- # Run each affected file through cpplint
- lint_failed = False
- for filename, affected_lines in file_affected_line_map.iteritems():
- if filename.split(".")[-1] not in ("c", "h", "cc"):
- continue
-
- if args:
- # File contents come from git
- show_cmd = SHOW_CMD + [args[0] + ":" + filename]
- show = Subprocess(show_cmd, stdout=subprocess.PIPE)
- lint = Subprocess(cpplint_cmd, expected_returncode=(0, 1),
- stdin=show.stdout, stderr=subprocess.PIPE)
- lint_out = lint.communicate()[1]
- else:
- # File contents come from the working tree
- lint = Subprocess(cpplint_cmd, expected_returncode=(0, 1),
- stdin=subprocess.PIPE, stderr=subprocess.PIPE)
- stdin = open(os.path.join(tl, filename)).read()
- lint_out = lint.communicate(stdin)[1]
-
- for line in lint_out.split("\n"):
- fields = line.split(":")
- if fields[0] != "-":
- continue
- warning_line_num = int(fields[1])
- if warning_line_num in affected_lines:
- print "%s:%d:%s"%(filename, warning_line_num,
- ":".join(fields[2:]))
- lint_failed = True
-
- # Set exit code if any relevant lint errors seen
- if lint_failed:
- return 1
-
- except Usage, err:
- print >>sys.stderr, err
- print >>sys.stderr, "for help use --help"
- return 2
-
-if __name__ == "__main__":
- sys.exit(main())
diff --git a/third_party/aom/tools/obu_parser.cc b/third_party/aom/tools/obu_parser.cc
deleted file mode 100644
index 7d71386ce..000000000
--- a/third_party/aom/tools/obu_parser.cc
+++ /dev/null
@@ -1,190 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#include <string.h>
-
-#include <cstdio>
-#include <string>
-
-#include "aom/aom_codec.h"
-#include "aom/aom_integer.h"
-#include "aom_ports/mem_ops.h"
-#include "av1/common/obu_util.h"
-#include "tools/obu_parser.h"
-
-namespace aom_tools {
-
-// Basic OBU syntax
-// 8 bits: Header
-// 7
-// forbidden bit
-// 6,5,4,3
-// type bits
-// 2
-// extension flag bit
-// 1
-// has size field bit
-// 0
-// reserved bit
-const uint32_t kObuForbiddenBitMask = 0x1;
-const uint32_t kObuForbiddenBitShift = 7;
-const uint32_t kObuTypeBitsMask = 0xF;
-const uint32_t kObuTypeBitsShift = 3;
-const uint32_t kObuExtensionFlagBitMask = 0x1;
-const uint32_t kObuExtensionFlagBitShift = 2;
-const uint32_t kObuHasSizeFieldBitMask = 0x1;
-const uint32_t kObuHasSizeFieldBitShift = 1;
-
-// When extension flag bit is set:
-// 8 bits: extension header
-// 7,6,5
-// temporal ID
-// 4,3
-// spatial ID
-// 2,1,0
-// reserved bits
-const uint32_t kObuExtTemporalIdBitsMask = 0x7;
-const uint32_t kObuExtTemporalIdBitsShift = 5;
-const uint32_t kObuExtSpatialIdBitsMask = 0x3;
-const uint32_t kObuExtSpatialIdBitsShift = 3;
-
-bool ValidObuType(int obu_type) {
- switch (obu_type) {
- case OBU_SEQUENCE_HEADER:
- case OBU_TEMPORAL_DELIMITER:
- case OBU_FRAME_HEADER:
- case OBU_TILE_GROUP:
- case OBU_METADATA:
- case OBU_FRAME:
- case OBU_REDUNDANT_FRAME_HEADER:
- case OBU_TILE_LIST:
- case OBU_PADDING: return true;
- }
- return false;
-}
-
-bool ParseObuHeader(uint8_t obu_header_byte, ObuHeader *obu_header) {
- const int forbidden_bit =
- (obu_header_byte >> kObuForbiddenBitShift) & kObuForbiddenBitMask;
- if (forbidden_bit) {
- fprintf(stderr, "Invalid OBU, forbidden bit set.\n");
- return false;
- }
-
- obu_header->type = static_cast<OBU_TYPE>(
- (obu_header_byte >> kObuTypeBitsShift) & kObuTypeBitsMask);
- if (!ValidObuType(obu_header->type)) {
- fprintf(stderr, "Invalid OBU type: %d.\n", obu_header->type);
- return false;
- }
-
- obu_header->has_extension =
- (obu_header_byte >> kObuExtensionFlagBitShift) & kObuExtensionFlagBitMask;
- obu_header->has_size_field =
- (obu_header_byte >> kObuHasSizeFieldBitShift) & kObuHasSizeFieldBitMask;
- return true;
-}
-
-bool ParseObuExtensionHeader(uint8_t ext_header_byte, ObuHeader *obu_header) {
- obu_header->temporal_layer_id =
- (ext_header_byte >> kObuExtTemporalIdBitsShift) &
- kObuExtTemporalIdBitsMask;
- obu_header->spatial_layer_id =
- (ext_header_byte >> kObuExtSpatialIdBitsShift) & kObuExtSpatialIdBitsMask;
-
- return true;
-}
-
-void PrintObuHeader(const ObuHeader *header) {
- printf(
- " OBU type: %s\n"
- " extension: %s\n",
- aom_obu_type_to_string(static_cast<OBU_TYPE>(header->type)),
- header->has_extension ? "yes" : "no");
- if (header->has_extension) {
- printf(
- " temporal_id: %d\n"
- " spatial_id: %d\n",
- header->temporal_layer_id, header->temporal_layer_id);
- }
-}
-
-bool DumpObu(const uint8_t *data, int length, int *obu_overhead_bytes) {
- const int kObuHeaderSizeBytes = 1;
- const int kMinimumBytesRequired = 1 + kObuHeaderSizeBytes;
- int consumed = 0;
- int obu_overhead = 0;
- ObuHeader obu_header;
- while (consumed < length) {
- const int remaining = length - consumed;
- if (remaining < kMinimumBytesRequired) {
- fprintf(stderr,
- "OBU parse error. Did not consume all data, %d bytes remain.\n",
- remaining);
- return false;
- }
-
- int obu_header_size = 0;
-
- memset(&obu_header, 0, sizeof(obu_header));
- const uint8_t obu_header_byte = *(data + consumed);
- if (!ParseObuHeader(obu_header_byte, &obu_header)) {
- fprintf(stderr, "OBU parsing failed at offset %d.\n", consumed);
- return false;
- }
-
- ++obu_overhead;
- ++obu_header_size;
-
- if (obu_header.has_extension) {
- const uint8_t obu_ext_header_byte =
- *(data + consumed + kObuHeaderSizeBytes);
- if (!ParseObuExtensionHeader(obu_ext_header_byte, &obu_header)) {
- fprintf(stderr, "OBU extension parsing failed at offset %d.\n",
- consumed + kObuHeaderSizeBytes);
- return false;
- }
-
- ++obu_overhead;
- ++obu_header_size;
- }
-
- PrintObuHeader(&obu_header);
-
- uint64_t obu_size = 0;
- size_t length_field_size = 0;
- if (aom_uleb_decode(data + consumed + obu_header_size,
- remaining - obu_header_size, &obu_size,
- &length_field_size) != 0) {
- fprintf(stderr, "OBU size parsing failed at offset %d.\n",
- consumed + obu_header_size);
- return false;
- }
- int current_obu_length = static_cast<int>(obu_size);
- if (obu_header_size + static_cast<int>(length_field_size) +
- current_obu_length >
- remaining) {
- fprintf(stderr, "OBU parsing failed: not enough OBU data.\n");
- return false;
- }
- consumed += obu_header_size + static_cast<int>(length_field_size) +
- current_obu_length;
- printf(" length: %d\n",
- static_cast<int>(obu_header_size + length_field_size +
- current_obu_length));
- }
-
- if (obu_overhead_bytes != nullptr) *obu_overhead_bytes = obu_overhead;
- printf(" TU size: %d\n", consumed);
-
- return true;
-}
-
-} // namespace aom_tools
diff --git a/third_party/aom/tools/obu_parser.h b/third_party/aom/tools/obu_parser.h
deleted file mode 100644
index 1d7d2d794..000000000
--- a/third_party/aom/tools/obu_parser.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_TOOLS_OBU_PARSER_H_
-#define AOM_TOOLS_OBU_PARSER_H_
-
-#include <cstdint>
-
-namespace aom_tools {
-
-// Print information obtained from OBU(s) in data until data is exhausted or an
-// error occurs. Returns true when all data is consumed successfully, and
-// optionally reports OBU storage overhead via obu_overhead_bytes when the
-// pointer is non-null.
-bool DumpObu(const uint8_t *data, int length, int *obu_overhead_bytes);
-
-} // namespace aom_tools
-
-#endif // AOM_TOOLS_OBU_PARSER_H_
diff --git a/third_party/aom/tools/txfm_analyzer/txfm_gen_code.cc b/third_party/aom/tools/txfm_analyzer/txfm_gen_code.cc
deleted file mode 100644
index 7c5400b91..000000000
--- a/third_party/aom/tools/txfm_analyzer/txfm_gen_code.cc
+++ /dev/null
@@ -1,580 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include <float.h>
-#include <string.h>
-
-#include "tools/txfm_analyzer/txfm_graph.h"
-
-typedef enum CODE_TYPE {
- CODE_TYPE_C,
- CODE_TYPE_SSE2,
- CODE_TYPE_SSE4_1
-} CODE_TYPE;
-
-int get_cos_idx(double value, int mod) {
- return round(acos(fabs(value)) / PI * mod);
-}
-
-char *cos_text_arr(double value, int mod, char *text, int size) {
- int num = get_cos_idx(value, mod);
- if (value < 0) {
- snprintf(text, size, "-cospi[%2d]", num);
- } else {
- snprintf(text, size, " cospi[%2d]", num);
- }
-
- if (num == 0)
- printf("v: %f -> %d/%d v==-1 is %d\n", value, num, mod, value == -1);
-
- return text;
-}
-
-char *cos_text_sse2(double w0, double w1, int mod, char *text, int size) {
- int idx0 = get_cos_idx(w0, mod);
- int idx1 = get_cos_idx(w1, mod);
- char p[] = "p";
- char n[] = "m";
- char *sgn0 = w0 < 0 ? n : p;
- char *sgn1 = w1 < 0 ? n : p;
- snprintf(text, size, "cospi_%s%02d_%s%02d", sgn0, idx0, sgn1, idx1);
- return text;
-}
-
-char *cos_text_sse4_1(double w, int mod, char *text, int size) {
- int idx = get_cos_idx(w, mod);
- char p[] = "p";
- char n[] = "m";
- char *sgn = w < 0 ? n : p;
- snprintf(text, size, "cospi_%s%02d", sgn, idx);
- return text;
-}
-
-void node_to_code_c(Node *node, const char *buf0, const char *buf1) {
- int cnt = 0;
- for (int i = 0; i < 2; i++) {
- if (fabs(node->inWeight[i]) == 1 || fabs(node->inWeight[i]) == 0) cnt++;
- }
- if (cnt == 2) {
- int cnt2 = 0;
- printf(" %s[%d] =", buf1, node->nodeIdx);
- for (int i = 0; i < 2; i++) {
- if (fabs(node->inWeight[i]) == 1) {
- cnt2++;
- }
- }
- if (cnt2 == 2) {
- printf(" apply_value(");
- }
- int cnt1 = 0;
- for (int i = 0; i < 2; i++) {
- if (node->inWeight[i] == 1) {
- if (cnt1 > 0)
- printf(" + %s[%d]", buf0, node->inNodeIdx[i]);
- else
- printf(" %s[%d]", buf0, node->inNodeIdx[i]);
- cnt1++;
- } else if (node->inWeight[i] == -1) {
- if (cnt1 > 0)
- printf(" - %s[%d]", buf0, node->inNodeIdx[i]);
- else
- printf("-%s[%d]", buf0, node->inNodeIdx[i]);
- cnt1++;
- }
- }
- if (cnt2 == 2) {
- printf(", stage_range[stage])");
- }
- printf(";\n");
- } else {
- char w0[100];
- char w1[100];
- printf(
- " %s[%d] = half_btf(%s, %s[%d], %s, %s[%d], "
- "cos_bit);\n",
- buf1, node->nodeIdx, cos_text_arr(node->inWeight[0], COS_MOD, w0, 100),
- buf0, node->inNodeIdx[0],
- cos_text_arr(node->inWeight[1], COS_MOD, w1, 100), buf0,
- node->inNodeIdx[1]);
- }
-}
-
-void gen_code_c(Node *node, int stage_num, int node_num, TYPE_TXFM type) {
- char *fun_name = new char[100];
- get_fun_name(fun_name, 100, type, node_num);
-
- printf("\n");
- printf(
- "void av1_%s(const int32_t *input, int32_t *output, int8_t cos_bit, "
- "const int8_t* stage_range) "
- "{\n",
- fun_name);
- printf(" assert(output != input);\n");
- printf(" const int32_t size = %d;\n", node_num);
- printf(" const int32_t *cospi = cospi_arr(cos_bit);\n");
- printf("\n");
-
- printf(" int32_t stage = 0;\n");
- printf(" int32_t *bf0, *bf1;\n");
- printf(" int32_t step[%d];\n", node_num);
-
- const char *buf0 = "bf0";
- const char *buf1 = "bf1";
- const char *input = "input";
-
- int si = 0;
- printf("\n");
- printf(" // stage %d;\n", si);
- printf(" apply_range(stage, input, %s, size, stage_range[stage]);\n", input);
-
- si = 1;
- printf("\n");
- printf(" // stage %d;\n", si);
- printf(" stage++;\n");
- if (si % 2 == (stage_num - 1) % 2) {
- printf(" %s = output;\n", buf1);
- } else {
- printf(" %s = step;\n", buf1);
- }
-
- for (int ni = 0; ni < node_num; ni++) {
- int idx = get_idx(si, ni, node_num);
- node_to_code_c(node + idx, input, buf1);
- }
-
- printf(" range_check_buf(stage, input, bf1, size, stage_range[stage]);\n");
-
- for (int si = 2; si < stage_num; si++) {
- printf("\n");
- printf(" // stage %d\n", si);
- printf(" stage++;\n");
- if (si % 2 == (stage_num - 1) % 2) {
- printf(" %s = step;\n", buf0);
- printf(" %s = output;\n", buf1);
- } else {
- printf(" %s = output;\n", buf0);
- printf(" %s = step;\n", buf1);
- }
-
- // computation code
- for (int ni = 0; ni < node_num; ni++) {
- int idx = get_idx(si, ni, node_num);
- node_to_code_c(node + idx, buf0, buf1);
- }
-
- if (si != stage_num - 1) {
- printf(
- " range_check_buf(stage, input, bf1, size, stage_range[stage]);\n");
- }
- }
- printf(" apply_range(stage, input, output, size, stage_range[stage]);\n");
- printf("}\n");
-}
-
-void single_node_to_code_sse2(Node *node, const char *buf0, const char *buf1) {
- printf(" %s[%2d] =", buf1, node->nodeIdx);
- if (node->inWeight[0] == 1 && node->inWeight[1] == 1) {
- printf(" _mm_adds_epi16(%s[%d], %s[%d])", buf0, node->inNodeIdx[0], buf0,
- node->inNodeIdx[1]);
- } else if (node->inWeight[0] == 1 && node->inWeight[1] == -1) {
- printf(" _mm_subs_epi16(%s[%d], %s[%d])", buf0, node->inNodeIdx[0], buf0,
- node->inNodeIdx[1]);
- } else if (node->inWeight[0] == -1 && node->inWeight[1] == 1) {
- printf(" _mm_subs_epi16(%s[%d], %s[%d])", buf0, node->inNodeIdx[1], buf0,
- node->inNodeIdx[0]);
- } else if (node->inWeight[0] == 1 && node->inWeight[1] == 0) {
- printf(" %s[%d]", buf0, node->inNodeIdx[0]);
- } else if (node->inWeight[0] == 0 && node->inWeight[1] == 1) {
- printf(" %s[%d]", buf0, node->inNodeIdx[1]);
- } else if (node->inWeight[0] == -1 && node->inWeight[1] == 0) {
- printf(" _mm_subs_epi16(__zero, %s[%d])", buf0, node->inNodeIdx[0]);
- } else if (node->inWeight[0] == 0 && node->inWeight[1] == -1) {
- printf(" _mm_subs_epi16(__zero, %s[%d])", buf0, node->inNodeIdx[1]);
- }
- printf(";\n");
-}
-
-void pair_node_to_code_sse2(Node *node, Node *partnerNode, const char *buf0,
- const char *buf1) {
- char temp0[100];
- char temp1[100];
- // btf_16_sse2_type0(w0, w1, in0, in1, out0, out1)
- if (node->inNodeIdx[0] != partnerNode->inNodeIdx[0])
- printf(" btf_16_sse2(%s, %s, %s[%d], %s[%d], %s[%d], %s[%d]);\n",
- cos_text_sse2(node->inWeight[0], node->inWeight[1], COS_MOD, temp0,
- 100),
- cos_text_sse2(partnerNode->inWeight[1], partnerNode->inWeight[0],
- COS_MOD, temp1, 100),
- buf0, node->inNodeIdx[0], buf0, node->inNodeIdx[1], buf1,
- node->nodeIdx, buf1, partnerNode->nodeIdx);
- else
- printf(" btf_16_sse2(%s, %s, %s[%d], %s[%d], %s[%d], %s[%d]);\n",
- cos_text_sse2(node->inWeight[0], node->inWeight[1], COS_MOD, temp0,
- 100),
- cos_text_sse2(partnerNode->inWeight[0], partnerNode->inWeight[1],
- COS_MOD, temp1, 100),
- buf0, node->inNodeIdx[0], buf0, node->inNodeIdx[1], buf1,
- node->nodeIdx, buf1, partnerNode->nodeIdx);
-}
-
-Node *get_partner_node(Node *node) {
- int diff = node->inNode[1]->nodeIdx - node->nodeIdx;
- return node + diff;
-}
-
-void node_to_code_sse2(Node *node, const char *buf0, const char *buf1) {
- int cnt = 0;
- int cnt1 = 0;
- if (node->visited == 0) {
- node->visited = 1;
- for (int i = 0; i < 2; i++) {
- if (fabs(node->inWeight[i]) == 1 || fabs(node->inWeight[i]) == 0) cnt++;
- if (fabs(node->inWeight[i]) == 1) cnt1++;
- }
- if (cnt == 2) {
- if (cnt1 == 2) {
- // has a partner
- Node *partnerNode = get_partner_node(node);
- partnerNode->visited = 1;
- single_node_to_code_sse2(node, buf0, buf1);
- single_node_to_code_sse2(partnerNode, buf0, buf1);
- } else {
- single_node_to_code_sse2(node, buf0, buf1);
- }
- } else {
- Node *partnerNode = get_partner_node(node);
- partnerNode->visited = 1;
- pair_node_to_code_sse2(node, partnerNode, buf0, buf1);
- }
- }
-}
-
-void gen_cospi_list_sse2(Node *node, int stage_num, int node_num) {
- int visited[65][65][2][2];
- memset(visited, 0, sizeof(visited));
- char text[100];
- char text1[100];
- char text2[100];
- int size = 100;
- printf("\n");
- for (int si = 1; si < stage_num; si++) {
- for (int ni = 0; ni < node_num; ni++) {
- int idx = get_idx(si, ni, node_num);
- int cnt = 0;
- Node *node0 = node + idx;
- if (node0->visited == 0) {
- node0->visited = 1;
- for (int i = 0; i < 2; i++) {
- if (fabs(node0->inWeight[i]) == 1 || fabs(node0->inWeight[i]) == 0)
- cnt++;
- }
- if (cnt != 2) {
- {
- double w0 = node0->inWeight[0];
- double w1 = node0->inWeight[1];
- int idx0 = get_cos_idx(w0, COS_MOD);
- int idx1 = get_cos_idx(w1, COS_MOD);
- int sgn0 = w0 < 0 ? 1 : 0;
- int sgn1 = w1 < 0 ? 1 : 0;
-
- if (!visited[idx0][idx1][sgn0][sgn1]) {
- visited[idx0][idx1][sgn0][sgn1] = 1;
- printf(" __m128i %s = pair_set_epi16(%s, %s);\n",
- cos_text_sse2(w0, w1, COS_MOD, text, size),
- cos_text_arr(w0, COS_MOD, text1, size),
- cos_text_arr(w1, COS_MOD, text2, size));
- }
- }
- Node *node1 = get_partner_node(node0);
- node1->visited = 1;
- if (node1->inNode[0]->nodeIdx != node0->inNode[0]->nodeIdx) {
- double w0 = node1->inWeight[0];
- double w1 = node1->inWeight[1];
- int idx0 = get_cos_idx(w0, COS_MOD);
- int idx1 = get_cos_idx(w1, COS_MOD);
- int sgn0 = w0 < 0 ? 1 : 0;
- int sgn1 = w1 < 0 ? 1 : 0;
-
- if (!visited[idx1][idx0][sgn1][sgn0]) {
- visited[idx1][idx0][sgn1][sgn0] = 1;
- printf(" __m128i %s = pair_set_epi16(%s, %s);\n",
- cos_text_sse2(w1, w0, COS_MOD, text, size),
- cos_text_arr(w1, COS_MOD, text1, size),
- cos_text_arr(w0, COS_MOD, text2, size));
- }
- } else {
- double w0 = node1->inWeight[0];
- double w1 = node1->inWeight[1];
- int idx0 = get_cos_idx(w0, COS_MOD);
- int idx1 = get_cos_idx(w1, COS_MOD);
- int sgn0 = w0 < 0 ? 1 : 0;
- int sgn1 = w1 < 0 ? 1 : 0;
-
- if (!visited[idx0][idx1][sgn0][sgn1]) {
- visited[idx0][idx1][sgn0][sgn1] = 1;
- printf(" __m128i %s = pair_set_epi16(%s, %s);\n",
- cos_text_sse2(w0, w1, COS_MOD, text, size),
- cos_text_arr(w0, COS_MOD, text1, size),
- cos_text_arr(w1, COS_MOD, text2, size));
- }
- }
- }
- }
- }
- }
-}
-
-void gen_code_sse2(Node *node, int stage_num, int node_num, TYPE_TXFM type) {
- char *fun_name = new char[100];
- get_fun_name(fun_name, 100, type, node_num);
-
- printf("\n");
- printf(
- "void %s_sse2(const __m128i *input, __m128i *output, int8_t cos_bit) "
- "{\n",
- fun_name);
-
- printf(" const int32_t* cospi = cospi_arr(cos_bit);\n");
- printf(" const __m128i __zero = _mm_setzero_si128();\n");
- printf(" const __m128i __rounding = _mm_set1_epi32(1 << (cos_bit - 1));\n");
-
- graph_reset_visited(node, stage_num, node_num);
- gen_cospi_list_sse2(node, stage_num, node_num);
- graph_reset_visited(node, stage_num, node_num);
- for (int si = 1; si < stage_num; si++) {
- char in[100];
- char out[100];
- printf("\n");
- printf(" // stage %d\n", si);
- if (si == 1)
- snprintf(in, 100, "%s", "input");
- else
- snprintf(in, 100, "x%d", si - 1);
- if (si == stage_num - 1) {
- snprintf(out, 100, "%s", "output");
- } else {
- snprintf(out, 100, "x%d", si);
- printf(" __m128i %s[%d];\n", out, node_num);
- }
- // computation code
- for (int ni = 0; ni < node_num; ni++) {
- int idx = get_idx(si, ni, node_num);
- node_to_code_sse2(node + idx, in, out);
- }
- }
-
- printf("}\n");
-}
-void gen_cospi_list_sse4_1(Node *node, int stage_num, int node_num) {
- int visited[65][2];
- memset(visited, 0, sizeof(visited));
- char text[100];
- char text1[100];
- int size = 100;
- printf("\n");
- for (int si = 1; si < stage_num; si++) {
- for (int ni = 0; ni < node_num; ni++) {
- int idx = get_idx(si, ni, node_num);
- Node *node0 = node + idx;
- if (node0->visited == 0) {
- int cnt = 0;
- node0->visited = 1;
- for (int i = 0; i < 2; i++) {
- if (fabs(node0->inWeight[i]) == 1 || fabs(node0->inWeight[i]) == 0)
- cnt++;
- }
- if (cnt != 2) {
- for (int i = 0; i < 2; i++) {
- if (fabs(node0->inWeight[i]) != 1 &&
- fabs(node0->inWeight[i]) != 0) {
- double w = node0->inWeight[i];
- int idx = get_cos_idx(w, COS_MOD);
- int sgn = w < 0 ? 1 : 0;
-
- if (!visited[idx][sgn]) {
- visited[idx][sgn] = 1;
- printf(" __m128i %s = _mm_set1_epi32(%s);\n",
- cos_text_sse4_1(w, COS_MOD, text, size),
- cos_text_arr(w, COS_MOD, text1, size));
- }
- }
- }
- Node *node1 = get_partner_node(node0);
- node1->visited = 1;
- }
- }
- }
- }
-}
-
-void single_node_to_code_sse4_1(Node *node, const char *buf0,
- const char *buf1) {
- printf(" %s[%2d] =", buf1, node->nodeIdx);
- if (node->inWeight[0] == 1 && node->inWeight[1] == 1) {
- printf(" _mm_add_epi32(%s[%d], %s[%d])", buf0, node->inNodeIdx[0], buf0,
- node->inNodeIdx[1]);
- } else if (node->inWeight[0] == 1 && node->inWeight[1] == -1) {
- printf(" _mm_sub_epi32(%s[%d], %s[%d])", buf0, node->inNodeIdx[0], buf0,
- node->inNodeIdx[1]);
- } else if (node->inWeight[0] == -1 && node->inWeight[1] == 1) {
- printf(" _mm_sub_epi32(%s[%d], %s[%d])", buf0, node->inNodeIdx[1], buf0,
- node->inNodeIdx[0]);
- } else if (node->inWeight[0] == 1 && node->inWeight[1] == 0) {
- printf(" %s[%d]", buf0, node->inNodeIdx[0]);
- } else if (node->inWeight[0] == 0 && node->inWeight[1] == 1) {
- printf(" %s[%d]", buf0, node->inNodeIdx[1]);
- } else if (node->inWeight[0] == -1 && node->inWeight[1] == 0) {
- printf(" _mm_sub_epi32(__zero, %s[%d])", buf0, node->inNodeIdx[0]);
- } else if (node->inWeight[0] == 0 && node->inWeight[1] == -1) {
- printf(" _mm_sub_epi32(__zero, %s[%d])", buf0, node->inNodeIdx[1]);
- }
- printf(";\n");
-}
-
-void pair_node_to_code_sse4_1(Node *node, Node *partnerNode, const char *buf0,
- const char *buf1) {
- char temp0[100];
- char temp1[100];
- if (node->inWeight[0] * partnerNode->inWeight[0] < 0) {
- /* type0
- * cos sin
- * sin -cos
- */
- // btf_32_sse2_type0(w0, w1, in0, in1, out0, out1)
- // out0 = w0*in0 + w1*in1
- // out1 = -w0*in1 + w1*in0
- printf(
- " btf_32_type0_sse4_1_new(%s, %s, %s[%d], %s[%d], %s[%d], %s[%d], "
- "__rounding, cos_bit);\n",
- cos_text_sse4_1(node->inWeight[0], COS_MOD, temp0, 100),
- cos_text_sse4_1(node->inWeight[1], COS_MOD, temp1, 100), buf0,
- node->inNodeIdx[0], buf0, node->inNodeIdx[1], buf1, node->nodeIdx, buf1,
- partnerNode->nodeIdx);
- } else {
- /* type1
- * cos sin
- * -sin cos
- */
- // btf_32_sse2_type1(w0, w1, in0, in1, out0, out1)
- // out0 = w0*in0 + w1*in1
- // out1 = w0*in1 - w1*in0
- printf(
- " btf_32_type1_sse4_1_new(%s, %s, %s[%d], %s[%d], %s[%d], %s[%d], "
- "__rounding, cos_bit);\n",
- cos_text_sse4_1(node->inWeight[0], COS_MOD, temp0, 100),
- cos_text_sse4_1(node->inWeight[1], COS_MOD, temp1, 100), buf0,
- node->inNodeIdx[0], buf0, node->inNodeIdx[1], buf1, node->nodeIdx, buf1,
- partnerNode->nodeIdx);
- }
-}
-
-void node_to_code_sse4_1(Node *node, const char *buf0, const char *buf1) {
- int cnt = 0;
- int cnt1 = 0;
- if (node->visited == 0) {
- node->visited = 1;
- for (int i = 0; i < 2; i++) {
- if (fabs(node->inWeight[i]) == 1 || fabs(node->inWeight[i]) == 0) cnt++;
- if (fabs(node->inWeight[i]) == 1) cnt1++;
- }
- if (cnt == 2) {
- if (cnt1 == 2) {
- // has a partner
- Node *partnerNode = get_partner_node(node);
- partnerNode->visited = 1;
- single_node_to_code_sse4_1(node, buf0, buf1);
- single_node_to_code_sse4_1(partnerNode, buf0, buf1);
- } else {
- single_node_to_code_sse2(node, buf0, buf1);
- }
- } else {
- Node *partnerNode = get_partner_node(node);
- partnerNode->visited = 1;
- pair_node_to_code_sse4_1(node, partnerNode, buf0, buf1);
- }
- }
-}
-
-void gen_code_sse4_1(Node *node, int stage_num, int node_num, TYPE_TXFM type) {
- char *fun_name = new char[100];
- get_fun_name(fun_name, 100, type, node_num);
-
- printf("\n");
- printf(
- "void %s_sse4_1(const __m128i *input, __m128i *output, int8_t cos_bit) "
- "{\n",
- fun_name);
-
- printf(" const int32_t* cospi = cospi_arr(cos_bit);\n");
- printf(" const __m128i __zero = _mm_setzero_si128();\n");
- printf(" const __m128i __rounding = _mm_set1_epi32(1 << (cos_bit - 1));\n");
-
- graph_reset_visited(node, stage_num, node_num);
- gen_cospi_list_sse4_1(node, stage_num, node_num);
- graph_reset_visited(node, stage_num, node_num);
- for (int si = 1; si < stage_num; si++) {
- char in[100];
- char out[100];
- printf("\n");
- printf(" // stage %d\n", si);
- if (si == 1)
- snprintf(in, 100, "%s", "input");
- else
- snprintf(in, 100, "x%d", si - 1);
- if (si == stage_num - 1) {
- snprintf(out, 100, "%s", "output");
- } else {
- snprintf(out, 100, "x%d", si);
- printf(" __m128i %s[%d];\n", out, node_num);
- }
- // computation code
- for (int ni = 0; ni < node_num; ni++) {
- int idx = get_idx(si, ni, node_num);
- node_to_code_sse4_1(node + idx, in, out);
- }
- }
-
- printf("}\n");
-}
-
-void gen_hybrid_code(CODE_TYPE code_type, TYPE_TXFM txfm_type, int node_num) {
- int stage_num = get_hybrid_stage_num(txfm_type, node_num);
-
- Node *node = new Node[node_num * stage_num];
- init_graph(node, stage_num, node_num);
-
- gen_hybrid_graph_1d(node, stage_num, node_num, 0, 0, node_num, txfm_type);
-
- switch (code_type) {
- case CODE_TYPE_C: gen_code_c(node, stage_num, node_num, txfm_type); break;
- case CODE_TYPE_SSE2:
- gen_code_sse2(node, stage_num, node_num, txfm_type);
- break;
- case CODE_TYPE_SSE4_1:
- gen_code_sse4_1(node, stage_num, node_num, txfm_type);
- break;
- }
-
- delete[] node;
-}
-
-int main(int argc, char **argv) {
- CODE_TYPE code_type = CODE_TYPE_SSE4_1;
- for (int txfm_type = TYPE_DCT; txfm_type < TYPE_LAST; txfm_type++) {
- for (int node_num = 4; node_num <= 64; node_num *= 2) {
- gen_hybrid_code(code_type, (TYPE_TXFM)txfm_type, node_num);
- }
- }
- return 0;
-}
diff --git a/third_party/aom/tools/txfm_analyzer/txfm_graph.cc b/third_party/aom/tools/txfm_analyzer/txfm_graph.cc
deleted file mode 100644
index a24906100..000000000
--- a/third_party/aom/tools/txfm_analyzer/txfm_graph.cc
+++ /dev/null
@@ -1,943 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "tools/txfm_analyzer/txfm_graph.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-
-typedef struct Node Node;
-
-void get_fun_name(char *str_fun_name, int str_buf_size, const TYPE_TXFM type,
- const int txfm_size) {
- if (type == TYPE_DCT)
- snprintf(str_fun_name, str_buf_size, "fdct%d_new", txfm_size);
- else if (type == TYPE_ADST)
- snprintf(str_fun_name, str_buf_size, "fadst%d_new", txfm_size);
- else if (type == TYPE_IDCT)
- snprintf(str_fun_name, str_buf_size, "idct%d_new", txfm_size);
- else if (type == TYPE_IADST)
- snprintf(str_fun_name, str_buf_size, "iadst%d_new", txfm_size);
-}
-
-void get_txfm_type_name(char *str_fun_name, int str_buf_size,
- const TYPE_TXFM type, const int txfm_size) {
- if (type == TYPE_DCT)
- snprintf(str_fun_name, str_buf_size, "TXFM_TYPE_DCT%d", txfm_size);
- else if (type == TYPE_ADST)
- snprintf(str_fun_name, str_buf_size, "TXFM_TYPE_ADST%d", txfm_size);
- else if (type == TYPE_IDCT)
- snprintf(str_fun_name, str_buf_size, "TXFM_TYPE_DCT%d", txfm_size);
- else if (type == TYPE_IADST)
- snprintf(str_fun_name, str_buf_size, "TXFM_TYPE_ADST%d", txfm_size);
-}
-
-void get_hybrid_2d_type_name(char *buf, int buf_size, const TYPE_TXFM type0,
- const TYPE_TXFM type1, const int txfm_size0,
- const int txfm_size1) {
- if (type0 == TYPE_DCT && type1 == TYPE_DCT)
- snprintf(buf, buf_size, "_dct_dct_%dx%d", txfm_size1, txfm_size0);
- else if (type0 == TYPE_DCT && type1 == TYPE_ADST)
- snprintf(buf, buf_size, "_dct_adst_%dx%d", txfm_size1, txfm_size0);
- else if (type0 == TYPE_ADST && type1 == TYPE_ADST)
- snprintf(buf, buf_size, "_adst_adst_%dx%d", txfm_size1, txfm_size0);
- else if (type0 == TYPE_ADST && type1 == TYPE_DCT)
- snprintf(buf, buf_size, "_adst_dct_%dx%d", txfm_size1, txfm_size0);
-}
-
-TYPE_TXFM get_inv_type(TYPE_TXFM type) {
- if (type == TYPE_DCT)
- return TYPE_IDCT;
- else if (type == TYPE_ADST)
- return TYPE_IADST;
- else if (type == TYPE_IDCT)
- return TYPE_DCT;
- else if (type == TYPE_IADST)
- return TYPE_ADST;
- else
- return TYPE_LAST;
-}
-
-void reference_dct_1d(double *in, double *out, int size) {
- const double kInvSqrt2 = 0.707106781186547524400844362104;
- for (int k = 0; k < size; k++) {
- out[k] = 0; // initialize out[k]
- for (int n = 0; n < size; n++) {
- out[k] += in[n] * cos(PI * (2 * n + 1) * k / (2 * size));
- }
- if (k == 0) out[k] = out[k] * kInvSqrt2;
- }
-}
-
-void reference_dct_2d(double *in, double *out, int size) {
- double *tempOut = new double[size * size];
- // dct each row: in -> out
- for (int r = 0; r < size; r++) {
- reference_dct_1d(in + r * size, out + r * size, size);
- }
-
- for (int r = 0; r < size; r++) {
- // out ->tempOut
- for (int c = 0; c < size; c++) {
- tempOut[r * size + c] = out[c * size + r];
- }
- }
- for (int r = 0; r < size; r++) {
- reference_dct_1d(tempOut + r * size, out + r * size, size);
- }
- delete[] tempOut;
-}
-
-void reference_adst_1d(double *in, double *out, int size) {
- for (int k = 0; k < size; k++) {
- out[k] = 0; // initialize out[k]
- for (int n = 0; n < size; n++) {
- out[k] += in[n] * sin(PI * (2 * n + 1) * (2 * k + 1) / (4 * size));
- }
- }
-}
-
-void reference_hybrid_2d(double *in, double *out, int size, int type0,
- int type1) {
- double *tempOut = new double[size * size];
- // dct each row: in -> out
- for (int r = 0; r < size; r++) {
- if (type0 == TYPE_DCT)
- reference_dct_1d(in + r * size, out + r * size, size);
- else
- reference_adst_1d(in + r * size, out + r * size, size);
- }
-
- for (int r = 0; r < size; r++) {
- // out ->tempOut
- for (int c = 0; c < size; c++) {
- tempOut[r * size + c] = out[c * size + r];
- }
- }
- for (int r = 0; r < size; r++) {
- if (type1 == TYPE_DCT)
- reference_dct_1d(tempOut + r * size, out + r * size, size);
- else
- reference_adst_1d(tempOut + r * size, out + r * size, size);
- }
- delete[] tempOut;
-}
-
-void reference_hybrid_2d_new(double *in, double *out, int size0, int size1,
- int type0, int type1) {
- double *tempOut = new double[size0 * size1];
- // dct each row: in -> out
- for (int r = 0; r < size1; r++) {
- if (type0 == TYPE_DCT)
- reference_dct_1d(in + r * size0, out + r * size0, size0);
- else
- reference_adst_1d(in + r * size0, out + r * size0, size0);
- }
-
- for (int r = 0; r < size1; r++) {
- // out ->tempOut
- for (int c = 0; c < size0; c++) {
- tempOut[c * size1 + r] = out[r * size0 + c];
- }
- }
- for (int r = 0; r < size0; r++) {
- if (type1 == TYPE_DCT)
- reference_dct_1d(tempOut + r * size1, out + r * size1, size1);
- else
- reference_adst_1d(tempOut + r * size1, out + r * size1, size1);
- }
- delete[] tempOut;
-}
-
-unsigned int get_max_bit(unsigned int x) {
- int max_bit = -1;
- while (x) {
- x = x >> 1;
- max_bit++;
- }
- return max_bit;
-}
-
-unsigned int bitwise_reverse(unsigned int x, int max_bit) {
- x = ((x >> 16) & 0x0000ffff) | ((x & 0x0000ffff) << 16);
- x = ((x >> 8) & 0x00ff00ff) | ((x & 0x00ff00ff) << 8);
- x = ((x >> 4) & 0x0f0f0f0f) | ((x & 0x0f0f0f0f) << 4);
- x = ((x >> 2) & 0x33333333) | ((x & 0x33333333) << 2);
- x = ((x >> 1) & 0x55555555) | ((x & 0x55555555) << 1);
- x = x >> (31 - max_bit);
- return x;
-}
-
-int get_idx(int ri, int ci, int cSize) { return ri * cSize + ci; }
-
-void add_node(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int in, double w) {
- int outIdx = get_idx(stage_idx, node_idx, node_num);
- int inIdx = get_idx(stage_idx - 1, in, node_num);
- int idx = node[outIdx].inNodeNum;
- if (idx < 2) {
- node[outIdx].inNode[idx] = &node[inIdx];
- node[outIdx].inNodeIdx[idx] = in;
- node[outIdx].inWeight[idx] = w;
- idx++;
- node[outIdx].inNodeNum = idx;
- } else {
- printf("Error: inNode is full");
- }
-}
-
-void connect_node(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int in0, double w0, int in1, double w1) {
- int outIdx = get_idx(stage_idx, node_idx, node_num);
- int inIdx0 = get_idx(stage_idx - 1, in0, node_num);
- int inIdx1 = get_idx(stage_idx - 1, in1, node_num);
-
- int idx = 0;
- // if(w0 != 0) {
- node[outIdx].inNode[idx] = &node[inIdx0];
- node[outIdx].inNodeIdx[idx] = in0;
- node[outIdx].inWeight[idx] = w0;
- idx++;
- //}
-
- // if(w1 != 0) {
- node[outIdx].inNode[idx] = &node[inIdx1];
- node[outIdx].inNodeIdx[idx] = in1;
- node[outIdx].inWeight[idx] = w1;
- idx++;
- //}
-
- node[outIdx].inNodeNum = idx;
-}
-
-void propagate(Node *node, int stage_num, int node_num, int stage_idx) {
- for (int ni = 0; ni < node_num; ni++) {
- int outIdx = get_idx(stage_idx, ni, node_num);
- node[outIdx].value = 0;
- for (int k = 0; k < node[outIdx].inNodeNum; k++) {
- node[outIdx].value +=
- node[outIdx].inNode[k]->value * node[outIdx].inWeight[k];
- }
- }
-}
-
-int64_t round_shift(int64_t value, int bit) {
- if (bit > 0) {
- if (value < 0) {
- return -round_shift(-value, bit);
- } else {
- return (value + (1 << (bit - 1))) >> bit;
- }
- } else {
- return value << (-bit);
- }
-}
-
-void round_shift_array(int32_t *arr, int size, int bit) {
- if (bit == 0) {
- return;
- } else {
- for (int i = 0; i < size; i++) {
- arr[i] = round_shift(arr[i], bit);
- }
- }
-}
-
-void graph_reset_visited(Node *node, int stage_num, int node_num) {
- for (int si = 0; si < stage_num; si++) {
- for (int ni = 0; ni < node_num; ni++) {
- int idx = get_idx(si, ni, node_num);
- node[idx].visited = 0;
- }
- }
-}
-
-void estimate_value(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int estimate_bit) {
- if (stage_idx > 0) {
- int outIdx = get_idx(stage_idx, node_idx, node_num);
- int64_t out = 0;
- node[outIdx].value = 0;
- for (int k = 0; k < node[outIdx].inNodeNum; k++) {
- int64_t w = round(node[outIdx].inWeight[k] * (1 << estimate_bit));
- int64_t v = round(node[outIdx].inNode[k]->value);
- out += v * w;
- }
- node[outIdx].value = round_shift(out, estimate_bit);
- }
-}
-
-void amplify_value(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int amplify_bit) {
- int outIdx = get_idx(stage_idx, node_idx, node_num);
- node[outIdx].value = round_shift(round(node[outIdx].value), -amplify_bit);
-}
-
-void propagate_estimate_amlify(Node *node, int stage_num, int node_num,
- int stage_idx, int amplify_bit,
- int estimate_bit) {
- for (int ni = 0; ni < node_num; ni++) {
- estimate_value(node, stage_num, node_num, stage_idx, ni, estimate_bit);
- amplify_value(node, stage_num, node_num, stage_idx, ni, amplify_bit);
- }
-}
-
-void init_graph(Node *node, int stage_num, int node_num) {
- for (int si = 0; si < stage_num; si++) {
- for (int ni = 0; ni < node_num; ni++) {
- int outIdx = get_idx(si, ni, node_num);
- node[outIdx].stageIdx = si;
- node[outIdx].nodeIdx = ni;
- node[outIdx].value = 0;
- node[outIdx].inNodeNum = 0;
- if (si >= 1) {
- connect_node(node, stage_num, node_num, si, ni, ni, 1, ni, 0);
- }
- }
- }
-}
-
-void gen_B_graph(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int N, int star) {
- for (int i = 0; i < N / 2; i++) {
- int out = node_idx + i;
- int in1 = node_idx + N - 1 - i;
- if (star == 1) {
- connect_node(node, stage_num, node_num, stage_idx + 1, out, out, -1, in1,
- 1);
- } else {
- connect_node(node, stage_num, node_num, stage_idx + 1, out, out, 1, in1,
- 1);
- }
- }
- for (int i = N / 2; i < N; i++) {
- int out = node_idx + i;
- int in1 = node_idx + N - 1 - i;
- if (star == 1) {
- connect_node(node, stage_num, node_num, stage_idx + 1, out, out, 1, in1,
- 1);
- } else {
- connect_node(node, stage_num, node_num, stage_idx + 1, out, out, -1, in1,
- 1);
- }
- }
-}
-
-void gen_P_graph(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int N) {
- int max_bit = get_max_bit(N - 1);
- for (int i = 0; i < N; i++) {
- int out = node_idx + bitwise_reverse(i, max_bit);
- int in = node_idx + i;
- connect_node(node, stage_num, node_num, stage_idx + 1, out, in, 1, in, 0);
- }
-}
-
-void gen_type1_graph(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int N) {
- int max_bit = get_max_bit(N);
- for (int ni = 0; ni < N / 2; ni++) {
- int ai = bitwise_reverse(N + ni, max_bit);
- int out = node_idx + ni;
- int in1 = node_idx + N - ni - 1;
- connect_node(node, stage_num, node_num, stage_idx + 1, out, out,
- sin(PI * ai / (2 * 2 * N)), in1, cos(PI * ai / (2 * 2 * N)));
- }
- for (int ni = N / 2; ni < N; ni++) {
- int ai = bitwise_reverse(N + ni, max_bit);
- int out = node_idx + ni;
- int in1 = node_idx + N - ni - 1;
- connect_node(node, stage_num, node_num, stage_idx + 1, out, out,
- cos(PI * ai / (2 * 2 * N)), in1, -sin(PI * ai / (2 * 2 * N)));
- }
-}
-
-void gen_type2_graph(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int N) {
- for (int ni = 0; ni < N / 4; ni++) {
- int out = node_idx + ni;
- connect_node(node, stage_num, node_num, stage_idx + 1, out, out, 1, out, 0);
- }
-
- for (int ni = N / 4; ni < N / 2; ni++) {
- int out = node_idx + ni;
- int in1 = node_idx + N - ni - 1;
- connect_node(node, stage_num, node_num, stage_idx + 1, out, out,
- -cos(PI / 4), in1, cos(-PI / 4));
- }
-
- for (int ni = N / 2; ni < N * 3 / 4; ni++) {
- int out = node_idx + ni;
- int in1 = node_idx + N - ni - 1;
- connect_node(node, stage_num, node_num, stage_idx + 1, out, out,
- cos(-PI / 4), in1, cos(PI / 4));
- }
-
- for (int ni = N * 3 / 4; ni < N; ni++) {
- int out = node_idx + ni;
- connect_node(node, stage_num, node_num, stage_idx + 1, out, out, 1, out, 0);
- }
-}
-
-void gen_type3_graph(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int idx, int N) {
- // TODO(angiebird): Simplify and clarify this function
-
- int i = 2 * N / (1 << (idx / 2));
- int max_bit =
- get_max_bit(i / 2) - 1; // the max_bit counts on i/2 instead of N here
- int N_over_i = 2 << (idx / 2);
-
- for (int nj = 0; nj < N / 2; nj += N_over_i) {
- int j = nj / (N_over_i);
- int kj = bitwise_reverse(i / 4 + j, max_bit);
- // printf("kj = %d\n", kj);
-
- // I_N/2i --- 0
- int offset = nj;
- for (int ni = 0; ni < N_over_i / 4; ni++) {
- int out = node_idx + offset + ni;
- int in = out;
- connect_node(node, stage_num, node_num, stage_idx + 1, out, in, 1, in, 0);
- }
-
- // -C_Kj/i --- S_Kj/i
- offset += N_over_i / 4;
- for (int ni = 0; ni < N_over_i / 4; ni++) {
- int out = node_idx + offset + ni;
- int in0 = out;
- double w0 = -cos(kj * PI / i);
- int in1 = N - (offset + ni) - 1 + node_idx;
- double w1 = sin(kj * PI / i);
- connect_node(node, stage_num, node_num, stage_idx + 1, out, in0, w0, in1,
- w1);
- }
-
- // S_kj/i --- -C_Kj/i
- offset += N_over_i / 4;
- for (int ni = 0; ni < N_over_i / 4; ni++) {
- int out = node_idx + offset + ni;
- int in0 = out;
- double w0 = -sin(kj * PI / i);
- int in1 = N - (offset + ni) - 1 + node_idx;
- double w1 = -cos(kj * PI / i);
- connect_node(node, stage_num, node_num, stage_idx + 1, out, in0, w0, in1,
- w1);
- }
-
- // I_N/2i --- 0
- offset += N_over_i / 4;
- for (int ni = 0; ni < N_over_i / 4; ni++) {
- int out = node_idx + offset + ni;
- int in = out;
- connect_node(node, stage_num, node_num, stage_idx + 1, out, in, 1, in, 0);
- }
- }
-
- for (int nj = N / 2; nj < N; nj += N_over_i) {
- int j = nj / N_over_i;
- int kj = bitwise_reverse(i / 4 + j, max_bit);
-
- // I_N/2i --- 0
- int offset = nj;
- for (int ni = 0; ni < N_over_i / 4; ni++) {
- int out = node_idx + offset + ni;
- int in = out;
- connect_node(node, stage_num, node_num, stage_idx + 1, out, in, 1, in, 0);
- }
-
- // C_kj/i --- -S_Kj/i
- offset += N_over_i / 4;
- for (int ni = 0; ni < N_over_i / 4; ni++) {
- int out = node_idx + offset + ni;
- int in0 = out;
- double w0 = cos(kj * PI / i);
- int in1 = N - (offset + ni) - 1 + node_idx;
- double w1 = -sin(kj * PI / i);
- connect_node(node, stage_num, node_num, stage_idx + 1, out, in0, w0, in1,
- w1);
- }
-
- // S_kj/i --- C_Kj/i
- offset += N_over_i / 4;
- for (int ni = 0; ni < N_over_i / 4; ni++) {
- int out = node_idx + offset + ni;
- int in0 = out;
- double w0 = sin(kj * PI / i);
- int in1 = N - (offset + ni) - 1 + node_idx;
- double w1 = cos(kj * PI / i);
- connect_node(node, stage_num, node_num, stage_idx + 1, out, in0, w0, in1,
- w1);
- }
-
- // I_N/2i --- 0
- offset += N_over_i / 4;
- for (int ni = 0; ni < N_over_i / 4; ni++) {
- int out = node_idx + offset + ni;
- int in = out;
- connect_node(node, stage_num, node_num, stage_idx + 1, out, in, 1, in, 0);
- }
- }
-}
-
-void gen_type4_graph(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int idx, int N) {
- int B_size = 1 << ((idx + 1) / 2);
- for (int ni = 0; ni < N; ni += B_size) {
- gen_B_graph(node, stage_num, node_num, stage_idx, node_idx + ni, B_size,
- (ni / B_size) % 2);
- }
-}
-
-void gen_R_graph(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int N) {
- int max_idx = 2 * (get_max_bit(N) + 1) - 3;
- for (int idx = 0; idx < max_idx; idx++) {
- int s = stage_idx + max_idx - idx - 1;
- if (idx == 0) {
- // type 1
- gen_type1_graph(node, stage_num, node_num, s, node_idx, N);
- } else if (idx == max_idx - 1) {
- // type 2
- gen_type2_graph(node, stage_num, node_num, s, node_idx, N);
- } else if ((idx + 1) % 2 == 0) {
- // type 4
- gen_type4_graph(node, stage_num, node_num, s, node_idx, idx, N);
- } else if ((idx + 1) % 2 == 1) {
- // type 3
- gen_type3_graph(node, stage_num, node_num, s, node_idx, idx, N);
- } else {
- printf("check gen_R_graph()\n");
- }
- }
-}
-
-void gen_DCT_graph(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int N) {
- if (N > 2) {
- gen_B_graph(node, stage_num, node_num, stage_idx, node_idx, N, 0);
- gen_DCT_graph(node, stage_num, node_num, stage_idx + 1, node_idx, N / 2);
- gen_R_graph(node, stage_num, node_num, stage_idx + 1, node_idx + N / 2,
- N / 2);
- } else {
- // generate dct_2
- connect_node(node, stage_num, node_num, stage_idx + 1, node_idx, node_idx,
- cos(PI / 4), node_idx + 1, cos(PI / 4));
- connect_node(node, stage_num, node_num, stage_idx + 1, node_idx + 1,
- node_idx + 1, -cos(PI / 4), node_idx, cos(PI / 4));
- }
-}
-
-int get_dct_stage_num(int size) { return 2 * get_max_bit(size); }
-
-void gen_DCT_graph_1d(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int dct_node_num) {
- gen_DCT_graph(node, stage_num, node_num, stage_idx, node_idx, dct_node_num);
- int dct_stage_num = get_dct_stage_num(dct_node_num);
- gen_P_graph(node, stage_num, node_num, stage_idx + dct_stage_num - 2,
- node_idx, dct_node_num);
-}
-
-void gen_adst_B_graph(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int adst_idx) {
- int size = 1 << (adst_idx + 1);
- for (int ni = 0; ni < size / 2; ni++) {
- int nOut = node_idx + ni;
- int nIn = nOut + size / 2;
- connect_node(node, stage_num, node_num, stage_idx + 1, nOut, nOut, 1, nIn,
- 1);
- // printf("nOut: %d nIn: %d\n", nOut, nIn);
- }
- for (int ni = size / 2; ni < size; ni++) {
- int nOut = node_idx + ni;
- int nIn = nOut - size / 2;
- connect_node(node, stage_num, node_num, stage_idx + 1, nOut, nOut, -1, nIn,
- 1);
- // printf("ndctOut: %d nIn: %d\n", nOut, nIn);
- }
-}
-
-void gen_adst_U_graph(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int adst_idx, int adst_node_num) {
- int size = 1 << (adst_idx + 1);
- for (int ni = 0; ni < adst_node_num; ni += size) {
- gen_adst_B_graph(node, stage_num, node_num, stage_idx, node_idx + ni,
- adst_idx);
- }
-}
-
-void gen_adst_T_graph(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, double freq) {
- connect_node(node, stage_num, node_num, stage_idx + 1, node_idx, node_idx,
- cos(freq * PI), node_idx + 1, sin(freq * PI));
- connect_node(node, stage_num, node_num, stage_idx + 1, node_idx + 1,
- node_idx + 1, -cos(freq * PI), node_idx, sin(freq * PI));
-}
-
-void gen_adst_E_graph(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int adst_idx) {
- int size = 1 << (adst_idx);
- for (int i = 0; i < size / 2; i++) {
- int ni = i * 2;
- double fi = (1 + 4 * i) * 1.0 / (1 << (adst_idx + 1));
- gen_adst_T_graph(node, stage_num, node_num, stage_idx, node_idx + ni, fi);
- }
-}
-
-void gen_adst_V_graph(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int adst_idx, int adst_node_num) {
- int size = 1 << (adst_idx);
- for (int i = 0; i < adst_node_num / size; i++) {
- if (i % 2 == 1) {
- int ni = i * size;
- gen_adst_E_graph(node, stage_num, node_num, stage_idx, node_idx + ni,
- adst_idx);
- }
- }
-}
-void gen_adst_VJ_graph(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int adst_node_num) {
- for (int i = 0; i < adst_node_num / 2; i++) {
- int ni = i * 2;
- double fi = (1 + 4 * i) * 1.0 / (4 * adst_node_num);
- gen_adst_T_graph(node, stage_num, node_num, stage_idx, node_idx + ni, fi);
- }
-}
-void gen_adst_Q_graph(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int adst_node_num) {
- // reverse order when idx is 1, 3, 5, 7 ...
- // example of adst_node_num = 8:
- // 0 1 2 3 4 5 6 7
- // --> 0 7 2 5 4 3 6 1
- for (int ni = 0; ni < adst_node_num; ni++) {
- if (ni % 2 == 0) {
- int out = node_idx + ni;
- connect_node(node, stage_num, node_num, stage_idx + 1, out, out, 1, out,
- 0);
- } else {
- int out = node_idx + ni;
- int in = node_idx + adst_node_num - ni;
- connect_node(node, stage_num, node_num, stage_idx + 1, out, in, 1, in, 0);
- }
- }
-}
-void gen_adst_Ibar_graph(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int adst_node_num) {
- // reverse order
- // 0 1 2 3 --> 3 2 1 0
- for (int ni = 0; ni < adst_node_num; ni++) {
- int out = node_idx + ni;
- int in = node_idx + adst_node_num - ni - 1;
- connect_node(node, stage_num, node_num, stage_idx + 1, out, in, 1, in, 0);
- }
-}
-
-int get_Q_out2in(int adst_node_num, int out) {
- int in;
- if (out % 2 == 0) {
- in = out;
- } else {
- in = adst_node_num - out;
- }
- return in;
-}
-
-int get_Ibar_out2in(int adst_node_num, int out) {
- return adst_node_num - out - 1;
-}
-
-void gen_adst_IbarQ_graph(Node *node, int stage_num, int node_num,
- int stage_idx, int node_idx, int adst_node_num) {
- // in -> Ibar -> Q -> out
- for (int ni = 0; ni < adst_node_num; ni++) {
- int out = node_idx + ni;
- int in = node_idx +
- get_Ibar_out2in(adst_node_num, get_Q_out2in(adst_node_num, ni));
- connect_node(node, stage_num, node_num, stage_idx + 1, out, in, 1, in, 0);
- }
-}
-
-void gen_adst_D_graph(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int adst_node_num) {
- // reverse order
- for (int ni = 0; ni < adst_node_num; ni++) {
- int out = node_idx + ni;
- int in = out;
- if (ni % 2 == 0) {
- connect_node(node, stage_num, node_num, stage_idx + 1, out, in, 1, in, 0);
- } else {
- connect_node(node, stage_num, node_num, stage_idx + 1, out, in, -1, in,
- 0);
- }
- }
-}
-
-int get_hadamard_idx(int x, int adst_node_num) {
- int max_bit = get_max_bit(adst_node_num - 1);
- x = bitwise_reverse(x, max_bit);
-
- // gray code
- int c = x & 1;
- int p = x & 1;
- int y = c;
-
- for (int i = 1; i <= max_bit; i++) {
- p = c;
- c = (x >> i) & 1;
- y += (c ^ p) << i;
- }
- return y;
-}
-
-void gen_adst_Ht_graph(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int adst_node_num) {
- for (int ni = 0; ni < adst_node_num; ni++) {
- int out = node_idx + ni;
- int in = node_idx + get_hadamard_idx(ni, adst_node_num);
- connect_node(node, stage_num, node_num, stage_idx + 1, out, in, 1, in, 0);
- }
-}
-
-void gen_adst_HtD_graph(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int adst_node_num) {
- for (int ni = 0; ni < adst_node_num; ni++) {
- int out = node_idx + ni;
- int in = node_idx + get_hadamard_idx(ni, adst_node_num);
- double inW;
- if (ni % 2 == 0)
- inW = 1;
- else
- inW = -1;
- connect_node(node, stage_num, node_num, stage_idx + 1, out, in, inW, in, 0);
- }
-}
-
-int get_adst_stage_num(int adst_node_num) {
- return 2 * get_max_bit(adst_node_num) + 2;
-}
-
-int gen_iadst_graph(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int adst_node_num) {
- int max_bit = get_max_bit(adst_node_num);
- int si = 0;
- gen_adst_IbarQ_graph(node, stage_num, node_num, stage_idx + si, node_idx,
- adst_node_num);
- si++;
- gen_adst_VJ_graph(node, stage_num, node_num, stage_idx + si, node_idx,
- adst_node_num);
- si++;
- for (int adst_idx = max_bit - 1; adst_idx >= 1; adst_idx--) {
- gen_adst_U_graph(node, stage_num, node_num, stage_idx + si, node_idx,
- adst_idx, adst_node_num);
- si++;
- gen_adst_V_graph(node, stage_num, node_num, stage_idx + si, node_idx,
- adst_idx, adst_node_num);
- si++;
- }
- gen_adst_HtD_graph(node, stage_num, node_num, stage_idx + si, node_idx,
- adst_node_num);
- si++;
- return si + 1;
-}
-
-int gen_adst_graph(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int adst_node_num) {
- int hybrid_stage_num = get_hybrid_stage_num(TYPE_ADST, adst_node_num);
- // generate a adst tempNode
- Node *tempNode = new Node[hybrid_stage_num * adst_node_num];
- init_graph(tempNode, hybrid_stage_num, adst_node_num);
- int si = gen_iadst_graph(tempNode, hybrid_stage_num, adst_node_num, 0, 0,
- adst_node_num);
-
- // tempNode's inverse graph to node[stage_idx][node_idx]
- gen_inv_graph(tempNode, hybrid_stage_num, adst_node_num, node, stage_num,
- node_num, stage_idx, node_idx);
- delete[] tempNode;
- return si;
-}
-
-void connect_layer_2d(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int dct_node_num) {
- for (int first = 0; first < dct_node_num; first++) {
- for (int second = 0; second < dct_node_num; second++) {
- // int sIn = stage_idx;
- int sOut = stage_idx + 1;
- int nIn = node_idx + first * dct_node_num + second;
- int nOut = node_idx + second * dct_node_num + first;
-
- // printf("sIn: %d nIn: %d sOut: %d nOut: %d\n", sIn, nIn, sOut, nOut);
-
- connect_node(node, stage_num, node_num, sOut, nOut, nIn, 1, nIn, 0);
- }
- }
-}
-
-void connect_layer_2d_new(Node *node, int stage_num, int node_num,
- int stage_idx, int node_idx, int dct_node_num0,
- int dct_node_num1) {
- for (int i = 0; i < dct_node_num1; i++) {
- for (int j = 0; j < dct_node_num0; j++) {
- // int sIn = stage_idx;
- int sOut = stage_idx + 1;
- int nIn = node_idx + i * dct_node_num0 + j;
- int nOut = node_idx + j * dct_node_num1 + i;
-
- // printf("sIn: %d nIn: %d sOut: %d nOut: %d\n", sIn, nIn, sOut, nOut);
-
- connect_node(node, stage_num, node_num, sOut, nOut, nIn, 1, nIn, 0);
- }
- }
-}
-
-void gen_DCT_graph_2d(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int dct_node_num) {
- int dct_stage_num = get_dct_stage_num(dct_node_num);
- // put 2 layers of dct_node_num DCTs on the graph
- for (int ni = 0; ni < dct_node_num; ni++) {
- gen_DCT_graph_1d(node, stage_num, node_num, stage_idx,
- node_idx + ni * dct_node_num, dct_node_num);
- gen_DCT_graph_1d(node, stage_num, node_num, stage_idx + dct_stage_num,
- node_idx + ni * dct_node_num, dct_node_num);
- }
- // connect first layer and second layer
- connect_layer_2d(node, stage_num, node_num, stage_idx + dct_stage_num - 1,
- node_idx, dct_node_num);
-}
-
-int get_hybrid_stage_num(int type, int hybrid_node_num) {
- if (type == TYPE_DCT || type == TYPE_IDCT) {
- return get_dct_stage_num(hybrid_node_num);
- } else if (type == TYPE_ADST || type == TYPE_IADST) {
- return get_adst_stage_num(hybrid_node_num);
- }
- return 0;
-}
-
-int get_hybrid_2d_stage_num(int type0, int type1, int hybrid_node_num) {
- int stage_num = 0;
- stage_num += get_hybrid_stage_num(type0, hybrid_node_num);
- stage_num += get_hybrid_stage_num(type1, hybrid_node_num);
- return stage_num;
-}
-
-int get_hybrid_2d_stage_num_new(int type0, int type1, int hybrid_node_num0,
- int hybrid_node_num1) {
- int stage_num = 0;
- stage_num += get_hybrid_stage_num(type0, hybrid_node_num0);
- stage_num += get_hybrid_stage_num(type1, hybrid_node_num1);
- return stage_num;
-}
-
-int get_hybrid_amplify_factor(int type, int hybrid_node_num) {
- return get_max_bit(hybrid_node_num) - 1;
-}
-
-void gen_hybrid_graph_1d(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int hybrid_node_num, int type) {
- if (type == TYPE_DCT) {
- gen_DCT_graph_1d(node, stage_num, node_num, stage_idx, node_idx,
- hybrid_node_num);
- } else if (type == TYPE_ADST) {
- gen_adst_graph(node, stage_num, node_num, stage_idx, node_idx,
- hybrid_node_num);
- } else if (type == TYPE_IDCT) {
- int hybrid_stage_num = get_hybrid_stage_num(type, hybrid_node_num);
- // generate a dct tempNode
- Node *tempNode = new Node[hybrid_stage_num * hybrid_node_num];
- init_graph(tempNode, hybrid_stage_num, hybrid_node_num);
- gen_DCT_graph_1d(tempNode, hybrid_stage_num, hybrid_node_num, 0, 0,
- hybrid_node_num);
-
- // tempNode's inverse graph to node[stage_idx][node_idx]
- gen_inv_graph(tempNode, hybrid_stage_num, hybrid_node_num, node, stage_num,
- node_num, stage_idx, node_idx);
- delete[] tempNode;
- } else if (type == TYPE_IADST) {
- int hybrid_stage_num = get_hybrid_stage_num(type, hybrid_node_num);
- // generate a adst tempNode
- Node *tempNode = new Node[hybrid_stage_num * hybrid_node_num];
- init_graph(tempNode, hybrid_stage_num, hybrid_node_num);
- gen_adst_graph(tempNode, hybrid_stage_num, hybrid_node_num, 0, 0,
- hybrid_node_num);
-
- // tempNode's inverse graph to node[stage_idx][node_idx]
- gen_inv_graph(tempNode, hybrid_stage_num, hybrid_node_num, node, stage_num,
- node_num, stage_idx, node_idx);
- delete[] tempNode;
- }
-}
-
-void gen_hybrid_graph_2d(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int hybrid_node_num, int type0,
- int type1) {
- int hybrid_stage_num = get_hybrid_stage_num(type0, hybrid_node_num);
-
- for (int ni = 0; ni < hybrid_node_num; ni++) {
- gen_hybrid_graph_1d(node, stage_num, node_num, stage_idx,
- node_idx + ni * hybrid_node_num, hybrid_node_num,
- type0);
- gen_hybrid_graph_1d(node, stage_num, node_num, stage_idx + hybrid_stage_num,
- node_idx + ni * hybrid_node_num, hybrid_node_num,
- type1);
- }
-
- // connect first layer and second layer
- connect_layer_2d(node, stage_num, node_num, stage_idx + hybrid_stage_num - 1,
- node_idx, hybrid_node_num);
-}
-
-void gen_hybrid_graph_2d_new(Node *node, int stage_num, int node_num,
- int stage_idx, int node_idx, int hybrid_node_num0,
- int hybrid_node_num1, int type0, int type1) {
- int hybrid_stage_num0 = get_hybrid_stage_num(type0, hybrid_node_num0);
-
- for (int ni = 0; ni < hybrid_node_num1; ni++) {
- gen_hybrid_graph_1d(node, stage_num, node_num, stage_idx,
- node_idx + ni * hybrid_node_num0, hybrid_node_num0,
- type0);
- }
- for (int ni = 0; ni < hybrid_node_num0; ni++) {
- gen_hybrid_graph_1d(
- node, stage_num, node_num, stage_idx + hybrid_stage_num0,
- node_idx + ni * hybrid_node_num1, hybrid_node_num1, type1);
- }
-
- // connect first layer and second layer
- connect_layer_2d_new(node, stage_num, node_num,
- stage_idx + hybrid_stage_num0 - 1, node_idx,
- hybrid_node_num0, hybrid_node_num1);
-}
-
-void gen_inv_graph(Node *node, int stage_num, int node_num, Node *invNode,
- int inv_stage_num, int inv_node_num, int inv_stage_idx,
- int inv_node_idx) {
- // clean up inNodeNum in invNode because of add_node
- for (int si = 1 + inv_stage_idx; si < inv_stage_idx + stage_num; si++) {
- for (int ni = inv_node_idx; ni < inv_node_idx + node_num; ni++) {
- int idx = get_idx(si, ni, inv_node_num);
- invNode[idx].inNodeNum = 0;
- }
- }
- // generate inverse graph of node on invNode
- for (int si = 1; si < stage_num; si++) {
- for (int ni = 0; ni < node_num; ni++) {
- int invSi = stage_num - si;
- int idx = get_idx(si, ni, node_num);
- for (int k = 0; k < node[idx].inNodeNum; k++) {
- int invNi = node[idx].inNodeIdx[k];
- add_node(invNode, inv_stage_num, inv_node_num, invSi + inv_stage_idx,
- invNi + inv_node_idx, ni + inv_node_idx,
- node[idx].inWeight[k]);
- }
- }
- }
-}
diff --git a/third_party/aom/tools/txfm_analyzer/txfm_graph.h b/third_party/aom/tools/txfm_analyzer/txfm_graph.h
deleted file mode 100644
index 2e3c9551e..000000000
--- a/third_party/aom/tools/txfm_analyzer/txfm_graph.h
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_TOOLS_TXFM_ANALYZER_TXFM_GRAPH_H_
-#define AOM_TOOLS_TXFM_ANALYZER_TXFM_GRAPH_H_
-
-struct Node {
- Node *inNode[2];
- int inNodeNum;
- int inNodeIdx[2];
- double inWeight[2];
- double value;
- int nodeIdx;
- int stageIdx;
- int visited;
-};
-
-#define PI (3.141592653589793238462643383279502884)
-#define STAGENUM (10)
-#define NODENUM (32)
-#define COS_MOD (128)
-
-typedef enum {
- TYPE_DCT = 0,
- TYPE_ADST,
- TYPE_IDCT,
- TYPE_IADST,
- TYPE_LAST
-} TYPE_TXFM;
-
-TYPE_TXFM get_inv_type(TYPE_TXFM type);
-void get_fun_name(char *str_fun_name, int str_buf_size, const TYPE_TXFM type,
- const int txfm_size);
-
-void get_txfm_type_name(char *str_fun_name, int str_buf_size,
- const TYPE_TXFM type, const int txfm_size);
-void get_hybrid_2d_type_name(char *buf, int buf_size, const TYPE_TXFM type0,
- const TYPE_TXFM type1, const int txfm_size0,
- const int txfm_size1);
-unsigned int get_max_bit(unsigned int x);
-unsigned int bitwise_reverse(unsigned int x, int max_bit);
-int get_idx(int ri, int ci, int cSize);
-
-int get_dct_stage_num(int size);
-void reference_dct_1d(double *in, double *out, int size);
-void reference_dct_2d(double *in, double *out, int size);
-void connect_node(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int in0, double w0, int in1, double w1);
-void propagate(Node *node, int stage_num, int node_num, int stage);
-void init_graph(Node *node, int stage_num, int node_num);
-void graph_reset_visited(Node *node, int stage_num, int node_num);
-void gen_B_graph(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int N, int star);
-void gen_P_graph(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int N);
-
-void gen_type1_graph(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int N);
-void gen_type2_graph(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int N);
-void gen_type3_graph(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int idx, int N);
-void gen_type4_graph(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int idx, int N);
-
-void gen_R_graph(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int N);
-
-void gen_DCT_graph(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int N);
-
-void gen_DCT_graph_1d(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int dct_node_num);
-void connect_layer_2d(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int dct_node_num);
-
-void gen_DCT_graph_2d(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int dct_node_num);
-
-void gen_adst_B_graph(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int adst_idx);
-
-void gen_adst_U_graph(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int adst_idx, int adst_node_num);
-void gen_adst_T_graph(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, double freq);
-
-void gen_adst_E_graph(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int adst_idx);
-
-void gen_adst_V_graph(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int adst_idx, int adst_node_num);
-
-void gen_adst_VJ_graph(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int adst_node_num);
-void gen_adst_Q_graph(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int adst_node_num);
-void gen_adst_Ibar_graph(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int adst_node_num);
-
-void gen_adst_D_graph(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int adst_node_num);
-
-int get_hadamard_idx(int x, int adst_node_num);
-void gen_adst_Ht_graph(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int adst_node_num);
-
-int gen_adst_graph(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int adst_node_num);
-int gen_iadst_graph(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int adst_node_num);
-void reference_adst_1d(double *in, double *out, int size);
-
-int get_adst_stage_num(int adst_node_num);
-int get_hybrid_stage_num(int type, int hybrid_node_num);
-int get_hybrid_2d_stage_num(int type0, int type1, int hybrid_node_num);
-int get_hybrid_2d_stage_num_new(int type0, int type1, int hybrid_node_num0,
- int hybrid_node_num1);
-int get_hybrid_amplify_factor(int type, int hybrid_node_num);
-void gen_hybrid_graph_1d(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int hybrid_node_num, int type);
-void gen_hybrid_graph_2d(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int hybrid_node_num, int type0,
- int type1);
-void gen_hybrid_graph_2d_new(Node *node, int stage_num, int node_num,
- int stage_idx, int node_idx, int hybrid_node_num0,
- int hybrid_node_num1, int type0, int type1);
-
-void reference_hybrid_2d(double *in, double *out, int size, int type0,
- int type1);
-
-void reference_hybrid_2d_new(double *in, double *out, int size0, int size1,
- int type0, int type1);
-void reference_adst_dct_2d(double *in, double *out, int size);
-
-void gen_code(Node *node, int stage_num, int node_num, TYPE_TXFM type);
-
-void gen_inv_graph(Node *node, int stage_num, int node_num, Node *invNode,
- int inv_stage_num, int inv_node_num, int inv_stage_idx,
- int inv_node_idx);
-
-TYPE_TXFM hybrid_char_to_int(char ctype);
-
-int64_t round_shift(int64_t value, int bit);
-void round_shift_array(int32_t *arr, int size, int bit);
-void estimate_value(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int estimate_bit);
-void amplify_value(Node *node, int stage_num, int node_num, int stage_idx,
- int node_idx, int estimate_bit);
-void propagate_estimate_amlify(Node *node, int stage_num, int node_num,
- int stage_idx, int amplify_bit,
- int estimate_bit);
-#endif // AOM_TOOLS_TXFM_ANALYZER_TXFM_GRAPH_H_
diff --git a/third_party/aom/tools/wrap-commit-msg.py b/third_party/aom/tools/wrap-commit-msg.py
deleted file mode 100755
index 1c7882443..000000000
--- a/third_party/aom/tools/wrap-commit-msg.py
+++ /dev/null
@@ -1,72 +0,0 @@
-#!/usr/bin/env python
-##
-## Copyright (c) 2016, Alliance for Open Media. All rights reserved
-##
-## This source code is subject to the terms of the BSD 2 Clause License and
-## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-## was not distributed with this source code in the LICENSE file, you can
-## obtain it at www.aomedia.org/license/software. If the Alliance for Open
-## Media Patent License 1.0 was not distributed with this source code in the
-## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-##
-"""Wraps paragraphs of text, preserving manual formatting
-
-This is like fold(1), but has the special convention of not modifying lines
-that start with whitespace. This allows you to intersperse blocks with
-special formatting, like code blocks, with written prose. The prose will
-be wordwrapped, and the manual formatting will be preserved.
-
- * This won't handle the case of a bulleted (or ordered) list specially, so
- manual wrapping must be done.
-
-Occasionally it's useful to put something with explicit formatting that
-doesn't look at all like a block of text inline.
-
- indicator = has_leading_whitespace(line);
- if (indicator)
- preserve_formatting(line);
-
-The intent is that this docstring would make it through the transform
-and still be legible and presented as it is in the source. If additional
-cases are handled, update this doc to describe the effect.
-"""
-
-__author__ = "jkoleszar@google.com"
-import textwrap
-import sys
-
-def wrap(text):
- if text:
- return textwrap.fill(text, break_long_words=False) + '\n'
- return ""
-
-
-def main(fileobj):
- text = ""
- output = ""
- while True:
- line = fileobj.readline()
- if not line:
- break
-
- if line.lstrip() == line:
- text += line
- else:
- output += wrap(text)
- text=""
- output += line
- output += wrap(text)
-
- # Replace the file or write to stdout.
- if fileobj == sys.stdin:
- fileobj = sys.stdout
- else:
- fileobj.seek(0)
- fileobj.truncate(0)
- fileobj.write(output)
-
-if __name__ == "__main__":
- if len(sys.argv) > 1:
- main(open(sys.argv[1], "r+"))
- else:
- main(sys.stdin)
diff --git a/third_party/aom/usage.dox b/third_party/aom/usage.dox
deleted file mode 100644
index 062d35a83..000000000
--- a/third_party/aom/usage.dox
+++ /dev/null
@@ -1,111 +0,0 @@
-/*!\page usage Usage
-
- The aom multi-format codec SDK provides a unified interface amongst its
- supported codecs. This abstraction allows applications using this SDK to
- easily support multiple video formats with minimal code duplication or
- "special casing." This section describes the interface common to all codecs.
- For codec-specific details, see the \ref codecs page.
-
- The following sections are common to all codecs:
- - \ref usage_types
- - \ref usage_features
- - \ref usage_init
- - \ref usage_errors
-
- For more information on decoder and encoder specific usage, see the
- following pages:
- \if decoder
- \li \subpage usage_decode
- \endif
- \if encoder
- \li \subpage usage_encode
- \endif
-
- \section usage_types Important Data Types
- There are two important data structures to consider in this interface.
-
- \subsection usage_ctxs Contexts
- A context is a storage area allocated by the calling application that the
- codec may write into to store details about a single instance of that codec.
- Most of the context is implementation specific, and thus opaque to the
- application. The context structure as seen by the application is of fixed
- size, and thus can be allocated with automatic storage or dynamically
- on the heap.
-
- Most operations require an initialized codec context. Codec context
- instances are codec specific. That is, the codec to be used for the encoded
- video must be known at initialization time. See #aom_codec_ctx_t for further
- information.
-
- \subsection usage_ifaces Interfaces
- A codec interface is an opaque structure that controls how function calls
- into the generic interface are dispatched to their codec-specific
- implementations. Applications \ref MUSTNOT attempt to examine or override
- this storage, as it contains internal implementation details likely to
- change from release to release.
-
- Each supported codec will expose an interface structure to the application
- as an <code>extern</code> reference to a structure of the incomplete type
- #aom_codec_iface_t.
-
- \section usage_features Features
- Several "features" are defined that are optionally implemented by codec
- algorithms. Indeed, the same algorithm may support different features on
- different platforms. The purpose of defining these features is that when
- they are implemented, they conform to a common interface. The features, or
- capabilities, of an algorithm can be queried from it's interface by using
- the aom_codec_get_caps() method. Attempts to invoke features not supported
- by an algorithm will generally result in #AOM_CODEC_INCAPABLE.
-
- \if decoder
- Currently defined decoder features include:
- - \ref usage_cb
- \endif
-
- \section usage_init Initialization
- To initialize a codec instance, the address of the codec context
- and interface structures are passed to an initialization function. Depending
- on the \ref usage_features that the codec supports, the codec could be
- initialized in different modes.
-
- To prevent cases of confusion where the ABI of the library changes,
- the ABI is versioned. The ABI version number must be passed at
- initialization time to ensure the application is using a header file that
- matches the library. The current ABI version number is stored in the
- preprocessor macros #AOM_CODEC_ABI_VERSION, #AOM_ENCODER_ABI_VERSION, and
- #AOM_DECODER_ABI_VERSION. For convenience, each initialization function has
- a wrapper macro that inserts the correct version number. These macros are
- named like the initialization methods, but without the _ver suffix.
-
-
- The available initialization methods are:
- \if encoder
- \li #aom_codec_enc_init (calls aom_codec_enc_init_ver())
- \li #aom_codec_enc_init_multi (calls aom_codec_enc_init_multi_ver())
- \endif
- \if decoder
- \li #aom_codec_dec_init (calls aom_codec_dec_init_ver())
- \endif
-
-
- \section usage_errors Error Handling
- Almost all codec functions return an error status of type #aom_codec_err_t.
- The semantics of how each error condition should be processed is clearly
- defined in the definitions of each enumerated value. Error values can be
- converted into ASCII strings with the aom_codec_error() and
- aom_codec_err_to_string() methods. The difference between these two methods is
- that aom_codec_error() returns the error state from an initialized context,
- whereas aom_codec_err_to_string() can be used in cases where an error occurs
- outside any context. The enumerated value returned from the last call can be
- retrieved from the <code>err</code> member of the decoder context as well.
- Finally, more detailed error information may be able to be obtained by using
- the aom_codec_error_detail() method. Not all errors produce detailed error
- information.
-
- In addition to error information, the codec library's build configuration
- is available at runtime on some platforms. This information can be returned
- by calling aom_codec_build_config(), and is formatted as a base64 coded string
- (comprised of characters in the set [a-z_a-Z0-9+/]). This information is not
- useful to an application at runtime, but may be of use to aom for support.
-
-*/
diff --git a/third_party/aom/usage_cx.dox b/third_party/aom/usage_cx.dox
deleted file mode 100644
index 51b4e8e3e..000000000
--- a/third_party/aom/usage_cx.dox
+++ /dev/null
@@ -1,9 +0,0 @@
-/*! \page usage_encode Encoding
-
- The aom_codec_encode() function is at the core of the encode loop. It
- processes raw images passed by the application, producing packets of
- compressed data.
-
- \ref samples
-
-*/
diff --git a/third_party/aom/usage_dx.dox b/third_party/aom/usage_dx.dox
deleted file mode 100644
index eef78376f..000000000
--- a/third_party/aom/usage_dx.dox
+++ /dev/null
@@ -1,57 +0,0 @@
-/*! \page usage_decode Decoding
-
- The aom_codec_decode() function is at the core of the decode loop. It
- processes packets of compressed data passed by the application, producing
- decoded images. The decoder expects packets to comprise exactly one image
- frame of data. Packets \ref MUST be passed in decode order. If the
- application wishes to associate some data with the frame, the
- <code>user_priv</code> member may be set.
-
- \ref samples
-
-
- \section usage_cb Callback Based Decoding
- There are two methods for the application to access decoded frame data. Some
- codecs support asynchronous (callback-based) decoding \ref usage_features
- that allow the application to register a callback to be invoked by the
- decoder when decoded data becomes available. Decoders are not required to
- support this feature, however. Like all \ref usage_features, support can be
- determined by calling aom_codec_get_caps(). Callbacks are available in both
- frame-based and slice-based variants. Frame based callbacks conform to the
- signature of #aom_codec_put_frame_cb_fn_t and are invoked once the entire
- frame has been decoded. Slice based callbacks conform to the signature of
- #aom_codec_put_slice_cb_fn_t and are invoked after a subsection of the frame
- is decoded. For example, a slice callback could be issued for each
- macroblock row. However, the number and size of slices to return is
- implementation specific. Also, the image data passed in a slice callback is
- not necessarily in the same memory segment as the data will be when it is
- assembled into a full frame. For this reason, the application \ref MUST
- examine the rectangles that describe what data is valid to access and what
- data has been updated in this call. For all their additional complexity,
- slice based decoding callbacks provide substantial speed gains to the
- overall application in some cases, due to improved cache behavior.
-
-
- \section usage_frame_iter Frame Iterator Based Decoding
- If the codec does not support callback based decoding, or the application
- chooses not to make use of that feature, decoded frames are made available
- through the aom_codec_get_frame() iterator. The application initializes the
- iterator storage (of type #aom_codec_iter_t) to NULL, then calls
- aom_codec_get_frame repeatedly until it returns NULL, indicating that all
- images have been returned. This process may result in zero, one, or many
- frames that are ready for display, depending on the codec.
-
-
- \section usage_postproc Postprocessing
- Postprocessing is a process that is applied after a frame is decoded to
- enhance the image's appearance by removing artifacts introduced in the
- compression process. It is not required to properly decode the frame, and
- is generally done only when there is enough spare CPU time to execute
- the required filters. Codecs may support a number of different
- postprocessing filters, and the available filters may differ from platform
- to platform. Embedded devices often do not have enough CPU to implement
- postprocessing in software. The filter selection is generally handled
- automatically by the codec.
-
-
-*/